diff --git a/.gitignore b/.gitignore index 10da50d4a86..f5fbba37f6a 100644 --- a/.gitignore +++ b/.gitignore @@ -140,9 +140,12 @@ ompi/debuggers/predefined_pad_test ompi/include/mpi.h ompi/include/mpif-config.h ompi/include/mpif.h +ompi/include/mpif-c-constants-decl.h +ompi/include/mpif-c-constants.h ompi/include/mpif-common.h ompi/include/mpi-ext.h ompi/include/mpif-ext.h +ompi/include/mpif-f08-types.h ompi/include/mpif-handles.h ompi/include/mpif-io-constants.h ompi/include/mpif-constants.h @@ -231,6 +234,9 @@ ompi/mpiext/example/tests/progress_mpifh ompi/mpiext/example/tests/progress_usempi ompi/mpiext/example/tests/progress_usempif08 +ompi/mpiext/cuda/c/MPIX_Query_cuda_support.3 +ompi/mpiext/cuda/c/mpiext_cuda_c.h + ompi/tools/mpisync/mpisync ompi/tools/mpisync/mpirun_prof ompi/tools/mpisync/ompi_timing_post @@ -294,6 +300,14 @@ opal/mca/hwloc/hwloc*/hwloc/include/private/autogen/config.h opal/mca/installdirs/config/install_dirs.h +opal/mca/pmix/pmix112/pmix/include/pmix/autogen/config.h +opal/mca/pmix/pmix112/pmix/include/pmix/autogen/config.h.in +opal/mca/pmix/pmix112/pmix/include/pmix/pmix_common.h +opal/mca/pmix/pmix112/pmix/include/private/autogen/config.h +opal/mca/pmix/pmix112/pmix/include/private/autogen/config.h.in +opal/mca/pmix/pmix112/pmix/include/pmix/autogen/config.h.in +opal/mca/pmix/pmix112/pmix/include/pmix_version.h + opal/tools/opal-checkpoint/opal-checkpoint opal/tools/opal-checkpoint/opal-checkpoint.1 opal/tools/opal-restart/opal-restart @@ -382,6 +396,7 @@ orte/test/mpi/pconnect orte/test/mpi/thread_init orte/test/mpi/memcached-dummy orte/test/mpi/coll_test +orte/test/mpi/badcoll orte/test/system/radix orte/test/system/sigusr_trap @@ -503,9 +518,27 @@ oshmem/shmem/java/java/shmem oshmem/shmem/java/java/shmem_Addr.h oshmem/shmem/java/java/shmem_PSync.h +oshmem/shmem/man/man3/shmem_*.3 +oshmem/shmem/man/man3/OpenSHMEM.3 +oshmem/shmem/man/man3/intro_shmem.3 +oshmem/shmem/man/man3/_my_pe.3 +oshmem/shmem/man/man3/_num_pes.3 +oshmem/shmem/man/man3/shfree.3 +oshmem/shmem/man/man3/shmalloc.3 +oshmem/shmem/man/man3/shmemalign.3 +oshmem/shmem/man/man3/shrealloc.3 +oshmem/shmem/man/man3/start_pes.3 +oshmem/shmem/man/man3/.dir-stamp + oshmem/tools/oshmem_info/oshmem_info oshmem/tools/oshmem_info/oshmem_info.1 +oshmem/tools/wrappers/oshcc.1 +oshmem/tools/wrappers/oshfort.1 +oshmem/tools/wrappers/oshrun.1 +oshmem/tools/wrappers/shmemcc.1 +oshmem/tools/wrappers/shmemfort.1 +oshmem/tools/wrappers/shmemrun.1 oshmem/tools/wrappers/shmemcc-wrapper-data.txt oshmem/tools/wrappers/shmemfort-wrapper-data.txt diff --git a/.mailmap b/.mailmap index 43559a70ce2..7bb958c34a9 100644 --- a/.mailmap +++ b/.mailmap @@ -1,8 +1,19 @@ -# This file exists to help map usernames to proper names and email addresses -# in the Open MPI github mirror of the canonical SVN repository. The github -# mirror can be found here: +# This file exists to help consolidate names and email addresses +# (e.g., when people accidentally commit with an incorrect or local +# email address). Two common use cases: # -# https://github.com/open-mpi/ompi-svn-mirror +# 1. Consolidate multiple email addresses from a single person. +# Example: one commit from John Smith is from +# and another is from +# , and a third is from +# . But they're all from +# the same John Smith person. +# +# 2. Consolidate misspellings / altername names from a single person. +# Example: one commit is from "John Smith" and another is from +# "John Smith, CONTRACTOR", and third is from "RocketMan 9982". But +# these are all really the same person, who can be listed once in +# AUTHORS as "John Smith". # # The format of this file is documented in git-shortlog(1). Specifically, # a line like this: @@ -12,121 +23,82 @@ # means that when git sees "commit@email.xx" it will display # "Proper Name " instead in certain circumstances. Those # circumstances include: +# # - git shortlog # - git blame # - git log --format=tformat:"%aN <%aE>" (and similar) # -# A copy of this file should be present on each branch in SVN which is being -# tracked in the Git mirror. - -Dave Goodell -Jeff Squyres -Reese Faucette -Bill D'Amico -Adrian Reber - -# fix Manjunath's name, it was inadvertently truncated in commit c4e17f1 -Manjunath Gorentla Venkata - -George Bosilca -Aurelien Bouteiller -Thomas Herault - -# If you want your name to be rendered properly in these situations, please -# remove it from the list below, place it above, uncomment it, and change -# "proper.name@proper.domain" to the appropriate values. This template list -# comes from the AUTHORS file. -# -#Abhishek Kulkarni -#Adrian Knoth -#Abhishek Kulkarni -#Andrew Friedley -#Aleksey Senin -#Alex Margolin -#Alex Mikheev -#Thara Angskun -#Anya Tatashina -#Avneesh Pant -#Brad Benton -#Brian Barrett -#Alex Brick -#Laura Casswell -#Camille Coti -#Christian Bell -#Chris Yeoh -#David Daniel -#Nadia Derby -#Denis Dimick -#Donald Kerr -#Dan Lacher -#Doron Shoham -#Edgar Gabriel -#Ethan Mallove -#Eugene Loh -#Graham Fagg -#Ginger Young -#Gleb Natapov -#Galen Shipman -#Greg Watson -#Nathan Hjelm -#Sven Stork -#Torsten Hoefler -#Iain Bason -#Igor Usarov -#Jon Mason -#Josh Hursey -#Joshua Ladd -#Nysal Jan -#Jose E. Roman -#Matthias Jurenz -#Karen Norteman -#Yevgeny Kliteynik -#Karl Mroz -#Andreas Knuepfer -#Greg Koenig -#Pierre Lemarinier -#Lenny Verkhovsky -#Andrew Lumsdaine -#Manjunath Gorentla Venkata -#Ken Matney -#Mike Dubman -#Mitch Sukalski -#Mohamad Chaarawi -#Mark Taylor -#Tom Naughton -#Li-Ta Lo -#Oscar Vega-Gisbert -#Pak Lui -#Pavel Shamis -#Patrick Geoffray -#Brad Penoff -#Jelena Pjesivac-Grbovic -#Prabhanjan Kambadur -#Craig Rasmussen -#Ron Brightwell -#Ralph Castain -#Rich Graham -#Rolf Vandevaart -#Rob Awles -#Rainer Keller -#Sami Ayyorgun -#Samuel K. Gutierrez -#Gopal Santhanaraman -#Swen Boehm -#Sharon Melamed -#Shiqing Fan -#Sylvain Jeaugey -#Sayantan Sur -#Sushant Sharma -#Steve Wise -#Terry Dontje -#Tim Mattox -#Tim Prins -#Tim Woodall -#Vasily Filipov -#Vishal Sahay -#Vishwanath Venkatesan -#Wesley Bland -#Yael Dalen -#Yossi Etigin -#Weikuan Yu + +Jeff Squyres +Jeff Squyres --quiet <--quiet> +Jeff Squyres + +George Bosilca + +Howard Pritchard +Howard Pritchard + +Andrew Friedley + +Devendar Bureddy + +Edgar Gabriel +Edgar Gabriel + +Gilles Gouaillardet + +Matias A Cabral +Matias A Cabral + +Pavel Shamis +Pavel Shamis +Pavel Shamis + +Todd Kordenbrock + +Yohann Burette +Yohann Burette + +MPI Team (bot) +MPI Team (bot) + +Yossi Itigin + +Josh Hursey +Josh Hursey + +Adrian Reber + +Elena Elkina +Elena Elkina + +Igor Ivanov +Igor Ivanov + +Mangala Jyothi Bhaskar +Mangala Jyothi Bhaskar + +Ralph Castain +Ralph Castain + +Rolf vandeVaart + +Karol Mroz + +Nadezhda Kogteva + +Thananon Patinyasakdikul + +Nysal Jan K A +Nysal Jan K A + +Zhi Ming Wang + +Annapurna Dasari + +L. R. Rajeshnarayanan + +Aurelien Bouteiller +Aurelien Bouteiller + +Alex Mikheev diff --git a/.travis.yml b/.travis.yml new file mode 100644 index 00000000000..4036ab3cea1 --- /dev/null +++ b/.travis.yml @@ -0,0 +1,83 @@ +# Use "required" for sudo, because we want to use the "trusty" Debian +# distro, which is (currently) only available in the legacy Travis +# infrastructure (i.e., if we put "sudo: false" to use the new container- +# based Travis infrastructure, then "trusty" is not available). We +# need the "trusty" distro because it has more recent versions of the +# GNU Autotools (i.e., autogen.pl will fail if you use the regular +# distro because the GNU Autotools are too old). +sudo: required +dist: trusty +language: c + +# Iterate over 2 different compilers +compiler: + - gcc + - clang + +# Iterate over 2 different OSs +os: + - linux + - osx + +addons: + # For Linux, make sure we have some extra packages that we like to + # build with + apt: + packages: + - autoconf + - automake + - libtool + - libnl-3-200 + - libnl-3-dev + - libnl-route-3-200 + - libnl-route-3-dev + - libibverbs-dev + - librdmacm-dev + sources: + - ubuntu-toolchain-r-test + +env: + global: + - AM_MAKEFLAGS="-j4" + - CPPFLAGS="-I$HOME/bogus/include" + - LDFLAGS="-L$HOME/bogus/lib" + - LD_LIBRARY_PATH="$HOME/bogus/lib" + matrix: + - GCC_VERSION=default + - GCC_VERSION=6 + +# Install dependencies for the verbs and usnic providers. Open MPI is +# not currently using the verbs provider in Libfabric, so we might as +# well not build it. +before_install: + - if [[ "GCC_VERSION" == "6" ]]; then COMPILERS="CC=gcc-6 CXX=g++-6 FC=gfortran-6"; fi + - export CONFIGURE_ARGS="--prefix=$HOME/bogus $COMPILERS" DISTCHECK_CONFIGURE_FLAGS="$CONFIGURE_ARGS" + - export DISTCHECK_CONFIGURE_FLAGS="$CONFIGURE_ARGS" + - if [[ "$TRAVIS_OS_NAME" == "linux" ]]; then git clone https://github.com/ofiwg/libfabric.git ; fi + - if [[ "$TRAVIS_OS_NAME" == "linux" ]] && [[ "$GCC_VERSION" == "6" ]] ; then sudo apt-get --assume-yes install gcc-6 g++-6 gfortran-6; fi + - if [[ "$TRAVIS_OS_NAME" == "linux" ]]; then cd libfabric && ./autogen.sh && ./configure --prefix=$HOME/bogus --enable-usnic --disable-verbs $COMPILERS && make install && cd .. ; fi + - if [[ "$TRAVIS_OS_NAME" == "osx" ]]; then brew update; brew upgrade automake || true; brew upgrade libtool || true; fi + - if [[ "$TRAVIS_OS_NAME" == "osx" ]] && [[ "$GCC_VERSION" == "6" ]] ; then brew install gcc || true; brew upgrade gcc || true ; fi + - mkdir -p $HOME/bogus/include $HOME/bogus/lib + +# Note that we use "make -k" to do the entire build, even if there was a +# build error in there somewhere. This prevents us from needing to submit +# to Travis, see the first error, fix that first error, submit again, ...etc. +install: + - m4 --version + - autoconf --version + - automake --version + - if [[ "$TRAVIS_OS_NAME" == "linux" ]]; then libtool --version; else glibtool --version; fi + - ./autogen.pl + - if [[ "$TRAVIS_OS_NAME" == "linux" && "$CC" == "gcc" ]]; then ./configure $CONFIGURE_ARGS --with-libfabric=$HOME/bogus --with-usnic --with-verbs; else ./configure $CONFIGURE_ARGS; fi + - make -k + +# We only need to distcheck on one OS / compiler combination (this is just +# a minor optimization to make the overall set of builds faster). +script: + - if [[ "$TRAVIS_OS_NAME" == "linux" && "$CC" == "gcc" ]]; then make distcheck; else make check; fi + +matrix: + exclude: + - env: GCC_VERSION=6 + compiler: clang diff --git a/AUTHORS b/AUTHORS index 0ef08dcf5df..d70d4127bc7 100644 --- a/AUTHORS +++ b/AUTHORS @@ -1,152 +1,330 @@ Open MPI Authors ================ -The following cumulative list contains the names and Subversion IDs of -all individuals who have committed code to the Open MPI repository. +The following cumulative list contains the names and email addresses +of all individuals who have committed code to the Open MPI repository +(either directly or through a third party, such as through a +Github.com pull request). Note that these email addresses are not +guaranteed to be current; they are simply a unique indicator of the +individual who committed them. -Email Name Affiliation(s) -------------------------------- --------------------------- ------------------- -Nadia.Derbey@bull.net Nadia Derby Bull -adi@minet.uni-jena.de Adrian Knoth UJ -adkulkar@cs.indiana.edu Abhishek Kulkarni IU -adrian@lisas.de Adrian Reber HE -afriedle@osl.iu.edu Andrew Friedley IU, SNL -alekseys@mellanox.com Aleksey Senin Mellanox -alex.margolin@mail.huji.ac.il Alex Margolin Mellanox -alexm@mellanox.com Alex Mikheev Mellanox -alinas@mellanox.com Alina Sklarevich Mellanox -andreas.knuepfer@tu-dresden.de Andreas Knuepfer ZIH -angskun@cs.utk.edu Thara Angskun UTK -anya.tatashina@sun.com Anya Tatashina Sun -artpol84@gmail.com Artem Polyakov Individual -avneesh.pant@qlogic.com Avneesh Pant QLogic -bdamico@cisco.com Bill D'Amico Cisco -bert.wesarg@tu-dresden.de Bert Wesarg ZIH -bosilca@icl.utk.edu George Bosilca UTK -bouteill@icl.utk.edu Aurelien Bouteiller UTK -brad.benton@us.ibm.com Brad Benton IBM, AMD -brbarret@open-mpi.org Brian Barrett IU, LANL, SNL -ccoti@icl.utk.edu Camille Coti UTK, INRIA -christian.bell@qlogic.com Christian Bell QLogic -cyeoh@au1.ibm.com Chris Yeoh IBM -dan.lacher@sun.com Dan Lacher Sun -ddd@lanl.gov David Daniel LANL -devendar@mellanox.com Devendar Bureddy Mellanox -dgdimick@lnal.gov Denis Dimick LANL -dgoodell@cisco.com Dave Goodell Cisco -donald.kerr@oracle.com Donald Kerr Sun, Oracle -dorons@mellanox.com Doron Shoham Mellanox -ethan.mallove@oracle.com Ethan Mallove Sun, Oracle -eugene.loh@oracle.com Eugene Loh Sun, Oracle -gabriel@cs.uh.edu Edgar Gabriel HLRS, UH, UTK -gef@icl.utk.edu Graham Fagg UTK -gilles.gouaillardet@iferc.org Gilles Gouaillardet RIST -gingery@lanl.gov Ginger Young LANL -gleb@voltaire.com Gleb Natapov Voltaire -gshipman@lanl.gov Galen Shipman LANL -gwatson@lanl.gov Greg Watson LANL -herault@icl.utk.edu Thomas Herault INRIA -hjelmn@lanl.gov Nathan Hjelm LANL -hmontakhabi@uh.edu Hadi Montakhabi UH -howardp@lanl.gov Howard Pritchard LANL -hppritcha@gmail.com Howard Pritchard LANL -htor@osl.iu.edu Torsten Hoefler IU, TUC -iain.bason@oracle.com Iain Bason Sun, Oracle -igoru@mellanox.com Igor Usarov Mellanox -jdmason@opengridcomputing.com Jon Mason Chelsio -jjhursey@open-mpi.org Josh Hursey IU, ORNL, LANL, LBNL, UWL -jnysal@in.ibm.com Nysal Jan IBM -joshual@mellanox.com Joshua Ladd Mellanox -jroman@dsic.upv.es Jose E. Roman UPV -jsquyres@cisco.com Jeff Squyres Cisco, IU -karen.norteman@sun.com Karen Norteman Sun -kliteyn@mellanox.co.il Yevgeny Kliteynik Mellanox -koenig@acm.org Greg Koenig ORNL -lcasswell@lanl.gov Laura Casswell LANL -lemarini@icl.utk.edu Pierre Lemarinier UTK -lennyb@voltaire.com Lenny Verkhovsky Mellanox -lums@cs.indiana.edu Andrew Lumsdaine IU -manjugv@ornl.gov Manjunath Gorentla Venkata ORNL -matneykdsr@ornl.gov Ken Matney ORNL -matthias.jurenz@tu-dresden.de Matthias Jurenz ZIH -miked@mellanox.com Mike Dubman Mellanox -mjbhaskar@uh.edu Mangala Jyothi Bhaskar UH -mroz.karol@gmail.com Karl Mroz UBC -mschaara@cs.uh.edu Mohamad Chaarawi UH -mt@lanl.gov Mark Taylor LANL -mwsukal@ca.sandia.gov Mitch Sukalski SNL -naughtont@ornl.gov Tom Naughton ORNL -niethammer@hlrs.de Christoph Niethammer HLRS -ollie@lanl.gov Li-Ta Lo LANL -ovega@dsic.upv.es Oscar Vega-Gisbert UPV -pak.lui@sun.com Pak Lui Sun -patrick@myri.com Patrick Geoffray Myricom -penoff@cs.ubc.ca Brad Penoff UBC -pjesa@icl.iu.edu Jelena Pjesivac-Grbovic UTK -pkambadu@osl.iu.edu Prabhanjan Kambadur IU -rainer.keller@hlrs.de Rainer Keller HLRS, ORNL -rasmus@cas.uoregon.edu Craig Rasmussen LANL, UO -rbbrigh@sandia.gov Ron Brightwell SNL -regrant@sandia.gov Ryan Grant SNL -rfaucett@cisco.com Reese Faucette Cisco -rhc@open-mpi.org Ralph Castain LANL, Cisco, Intel -richardg@mellanox.com Rich Graham ORNL, LANL, Mellanox -rta@lanl.gov Rob Awles LANL -rvandevaart@nvidia.com Rolf Vandevaart Sun, Oracle, NVIDIA -sami@lanl.gov Sami Ayyorgun LANL -samuel@lanl.gov Samuel K. Gutierrez LANL -santhana@osu.edu Gopal Santhanaraman OSU -sboehm@ornl.gov Swen Boehm ORNL -shamisp@ornl.gov Pavel Shamis Mellanox, ORNL -sharonm@voltaire.com Sharon Melamed Voltaire -shiqing@hlrs.de Shiqing Fan HLRS -stork@hlrs.de Sven Stork HLRS -surs@osu.edu Sayantan Sur OSU -sushant@lanl.gov Sushant Sharma LANL -swise@opengridcomputing.com Steve Wise Chelsio -sylvain.jeaugey@bull.net Sylvain Jeaugey Bull -terry.dontje@oracle.com Terry Dontje Sun, Oracle -thkorde@sandia.gov Todd Kordenbrock SNL -tmattox@gmail.com Tim Mattox IU, Cisco -tprins@lanl.gov Tim Prins IU, LANL -twoodall@lanl.gov Tim Woodall LANL -vasily@mellanox.com Vasily Filipov Mellanox -vsahay@osl.iu.edu Vishal Sahay IU -vvenkates@gmail.com Vishwanath Venkatesan UH, Intel -wbland@icl.utk.edu Wesley Bland UTK -yaeld@mellanox.com Yael Dalen Mellanox -yosefe@mellanox.com Yossi Etigin Mellanox -yuw@lanl.gov Weikuan Yu LANL, OSU -------------------------------- --------------------------- ------------------- +----- -Affiliation abbreviations: --------------------------- - -AMD = Advanced Micro Devices, Inc. -Chelsio = Chelsio Communications, Inc. -Cisco = Cisco Systems, Inc. -HE = Hochschule Esslingen -HLRS = High Performance Computing Center, Stuttgart -IU = Indiana University -LANL = Los Alamos National Laboratory -LBNL = Lawrence Berkeley National Laboratory -Voltaire = Voltaire -Myricom = Myricom, Inc. -NU = Northeastern University -Oracle = Oracle -ORNL = Oak Ridge National Laboratory -OSU = The Ohio State University -QLogic = QLogic -RIST = Research Organization for Information Science and Technology -SNL = Sandia National Laboratories -Sun = Sun Microsystems, Inc. -TUC = Technische Universtaet Chemnitz -UBC = University of British Columbia -UJ = Friedrich-Schiller-Universitat Jena -UH = University of Houston -OU = University of Oregon -UPV = Universitat Politecnica de Valencia -UTK = University of Tennessee, Knoxville -UWL = University of Wisconsin-La Crosse -Mellanox = Mellanox -ZIH = Technische Universitaet Dresden +Abhishek Joshi, Broadcom + abhishek.joshi@broadcom.com +Abhishek Kulkarni, Indiana University + adkulkar@cs.indiana.edu +Aboorva Devarajan, IBM + abodevar@in.ibm.com +Adrian Knoth, Friedrich-Schiller-Universitat Jena + adi@minet.uni-jena.de +Adrian Reber, Hochschule Esslingen + adrian@lisas.de +Aleksey Senin, Mellanox + alekseys@mellanox.com +Alex Margolin, Mellanox + alex.margolin@mail.huji.ac.il +Alex Mikheev, Mellanox + alexm@mellanox.com +Alina Sklarevich, Mellanox + alinas@mellanox.com +Andreas Knüpfer, Technische Universitaet Dresden + andreas.knuepfer@tu-dresden.de +Andrew Friedley, Indiana University, Sandia National Laboratory, Intel + afriedle@osl.iu.edu + andrew.friedley@intel.com +Andrew Lumsdaine, Indiana University + lums@cs.indiana.edu +Annapurna Dasari, Intel + annapurna.dasari@intel.com +Anya Tatashina, Sun + anya.tatashina@sun.com +Artem Polyakov, Individual, Mellanox + artpol84@gmail.com +Aurelien Bouteiller, University of Tennessee-Knoxville + bouteill@icl.utk.edu +Avneesh Pant, QLogic + avneesh.pant@qlogic.com +Bert Wesarg, Technische Universitaet Dresden + bert.wesarg@tu-dresden.de +Bill D'Amico, Cisco + bdamico@cisco.com +Boris Karasev, Mellanox + karasev.b@gmail.com +Brad Benton, IBM, AMD + brad.benton@us.ibm.com +Brad Penoff, University of British Columbia + penoff@cs.ubc.ca +Brian Barrett, Indiana University, Los Alamos National Laboratory, Sandia National Laboratory + brbarret@open-mpi.org +Brice Goglin, INRIA + brice.goglin@inria.fr +Camille Coti, University of Tennessee-Knoxville, INRIA + ccoti@icl.utk.edu +Christian Bell, QLogic + christian.bell@qlogic.com +Christoph Niethammer, High Performance Computing Center, Stuttgart + niethammer@hlrs.de +Christopher Yeoh, IBM + cyeoh@au1.ibm.com +Craig E Rasmussen, Los Alamos National Laboratory, University of Oregon + rasmus@cas.uoregon.edu +Dan Lacher, Sun + dan.lacher@sun.com +Dave Goodell, Cisco + davidjgoodell@gmail.com + dgoodell@cisco.com +David Daniel, Los Alamos National Laboratory + ddd@lanl.gov +Denis Dimick, Los Alamos National Laboratory + dgdimick@lnal.gov +Devendar Bureddy, Mellanox + devendar@mellanox.com +Dimitar Pashov, Individual + d.pashov@gmail.com +Donald Kerr, Sun, Oracle + donald.kerr@oracle.com +Doron Shoham, Mellanox + dorons@mellanox.com +Edgar Gabriel, High Performance Computing Center, Stuttgart, University of Tennessee-Knoxville, University of Houston + egabriel@central.uh.edu + gabriel@cs.uh.edu +Elena Elkina, Mellanox + elena.elkina@itseez.com +Ethan Mallove, Sun, Oracle + ethan.mallove@oracle.com +Eugene Loh, Sun, Oracle + eugene.loh@oracle.com +Francois WELLENREITER, Individual + francois.wellenreiter@atos.net + wellen@free.fr +Gabriel Pichot, Individual + gabriel.pichot@gmail.com +Galen Shipman, Los Alamos National Laboratory + gshipman@lanl.gov +Geoffrey Paulsen, IBM + gpaulsen@us.ibm.com +George Bosilca, University of Tennessee-Knoxville + bosilca@eecs.utk.edu + bosilca@icl.utk.edu +Gilles Gouaillardet, Research Organization for Information Science and Technology + gilles.gouaillardet@iferc.org + gilles@rist.or.jp +Ginger Young, Los Alamos National Laboratory + gingery@lanl.gov +Gleb Natapov, Voltaire + gleb@voltaire.com +Gopal Santhanaraman, The Ohio State University + santhana@osu.edu +Graham Fagg, University of Tennessee-Knoxville + gef@icl.utk.edu +Greg Koenig, Oak Ridge National Laboratory + koenig@acm.org +Greg Watson, Los Alamos National Laboratory + gwatson@lanl.gov +Hadi Montakhabi, University of Houston + hmontakhabi@uh.edu +Howard Pritchard, Los Alamos National Laboratory + howardp@lanl.gov + hppritcha@gmail.com +Iain Bason, Sun, Oracle + iain.bason@oracle.com +Igor Ivanov, Mellanox + igor.ivanov.va@gmail.com + igor.ivanov@itseez.com +Igor Usarov, Mellanox + igoru@mellanox.com +Jeff Squyres, University of Indiana, Cisco + jeff@squyres.com + jsquyres@cisco.com +Jelena Pjesivac-Grbovic, University of Tennessee-Knoxville + pjesa@icl.iu.edu +Jithin Jose, Intel + jithin.jose@intel.com +Jon Mason, OpenGrid Computing + jdmason@opengridcomputing.com +Jose Roman, Universitat Politecnica de Valencia + jroman@dsic.upv.es +Josh Hursey, Indiana University, Oak Ridge National Laboratory, Los Alamos National Laboratory, Lawrence Berkeley National Laboratory, University of Wisconsin-La Crosse, IBM + jhursey@us.ibm.com + jjhursey@open-mpi.org +Joshua Gerrard, Individual + enquiries@joshuagerrard.com + joshuagerrard+ompi-commit@protonmail.com +Joshua Ladd, Mellanox + jladd.mlnx@gmail.com + joshual@mellanox.com +KAWASHIMA Takahiro, Fujistu + rivis.kawashima@nifty.com + t-kawashima@jp.fujitsu.com +Karen Norteman, Sun + karen.norteman@sun.com +Karol Mroz, University of British Columbia + mroz.karol@gmail.com +Kenneth Matney, Oak Ridge National Laboratory + matneykdsr@ornl.gov +L. R. Rajeshnarayanan, Intel + l.r.rajeshnarayanan@intel.com +LANL OMPI Bot, Los Alamos National Laboratory + ompigithub@lanl.gov +Laura Casswell, Los Alamos National Laboratory + lcasswell@lanl.gov +Lenny Verkhovsky, Voltaire + lennyb@voltaire.com +Li-Ta Lo, Los Alamos National Laboratory + ollie@lanl.gov +MPI Team (bot), self + mpiteam@open-mpi.org +Mangala Jyothi Bhaskar, University of Houston + mjbhaskar@salmon.cs.uh.edu + mjbhaskar@uh.edu +Manjunath Gorentla Venkata, Oak Ridge National Laboratory + manjugv@ornl.gov +Mark Allen, IBM + markalle@us.ibm.com +Mark Santcroos, Rutgers University + mas781@scarletmail.rutgers.edu +Mark Taylor, Los Alamos National Laboratory + mt@lanl.gov +Matias A Cabral, Intel + matias.a.cabral@intel.com +Matthias Jurenz, Technische Universitaet Dresden + matthias.jurenz@tu-dresden.de +Maximilien Levesque, Individual + maximilien.levesque@gmail.com +Mike Dubman, Mellanox + miked@mellanox.com +Mitch Sukalski, Sandia National Laboratory + mwsukal@ca.sandia.gov +Mohamad Chaarawi, University of Houston + mschaara@cs.uh.edu +Nadezhda Kogteva, Mellanox + nadezhda.kogteva@itseez.com +Nadia Derbey, Bull + nadia.derbey@bull.net +Nathan Hjelm, Los Alamos National Laboratory + hjelmn@cs.unm.edu + hjelmn@lanl.gov + hjelmn@me.com +Nathaniel Graham, Los Alamos National Laboratory + ngraham@lanl.gov + nrgraham23@gmail.com +Nick Papior Andersen, Individual + nickpapior@gmail.com +Nicolas Chevalier, Bull + nicolas.chevalier@bull.net +Nysal Jan K A, IBM + jnysal@in.ibm.com +Orion Poplawski, Individual + orion@cora.nwra.com +Oscar Vega-Gisbert, Universitat Politecnica de Valencia + ovega@dsic.upv.es +Pak Lui, Sun + pak.lui@sun.com +Pascal Deveze, Bull + pascal.deveze@atos.net +Patrick Geoffray, Myricom + patrick@myri.com +Pavel Shamis, Mellanox, Oak Ridge National Laboratory + pasharesearch@gmail.com + shamisp@ornl.gov +Pierre Lemarinier, University of Tennessee-Knoxville + lemarini@icl.utk.edu +Piotr Lesnicki, Bull + piotr.lesnicki@ext.bull.net +Potnuri Bharat Teja, Chelsio + bharat@chelsio.com +Prabhanjan Kambadur, Indiana University + pkambadu@osl.iu.edu +Rainer Keller, High Performance Computing Center, Stuttgart, Oak Ridge National Laboratory, Hochschule fuer Technik Stuttgart + rainer.keller@hlrs.de +Ralph Castain, Los Alamos National Laboratory, Cisco, Greenplum, Intel + rhc@open-mpi.org +Reese Faucette, Cisco + rfaucett@cisco.com +Rich Graham, Los Alamos National Laboratory, Oak Ridge National Laboratory, Mellanox + richardg@mellanox.com +Rob Awles, Los Alamos National Laboratory + rta@lanl.gov +Rob Latham, Argonne National Laboratory + robl@mcs.anl.gov +Rolf vandeVaart, Sun, Oracle, NVIDIA + rvandevaart@nvidia.com +Ron Brightwell, Sandia National Laboratory + rbbrigh@sandia.gov +Ryan Grant, Sandia National Laboratory + regrant233@gmail.com + regrant@sandia.gov +Sameh S. Sharkawi, IBM + sssharka@us.ibm.com +Sami Ayyorgun, Los Alamos National Laboratory + sami@lanl.gov +Samuel Gutierrez, Los Alamos National Laboratory + samuel@lanl.gov +Sayantan Sur, The Ohio State University + surs@osu.edu +Sharon Melamed, Voltaire + sharonm@voltaire.com +Shiqing Fan, High Performance Computing Center, Stuttgart + shiqing@hlrs.de +Steve Wise, OpenGrid Computing + swise@opengridcomputing.com +Sushant Sharma, Los Alamos National Laboratory + sushant@lanl.gov +Sven Stork, High Performance Computing Center, Stuttgart + stork@hlrs.de +Swen Boehm, Oak Ridge National Laboratory + sboehm@ornl.gov +Sylvain Jeaugey, Bull, NVIDIA + sjeaugey@nvidia.com + sylvain.jeaugey@bull.net +Teng Lin, Individual + teng.lin@gmail.com +Terry Dontje, Sun, Oracle + terry.dontje@oracle.com +Thananon Patinyasakdikul, Cisco, University of Tennessee-Knoxville + apatinya@cisco.com + tpatinya@utk.edu +Thara Angskun, University of Tennessee-Knoxville + angskun@cs.utk.edu +Thomas Herault, University of Tennessee-Knoxville + herault@icl.utk.edu +Tim Mattox, Indiana University, Cisco, Individual + tmattox@gmail.com +Tim Prins, Indiana University, Los Alamos National Laboratory + tprins@lanl.gov +Tim Woodall, Los Alamos National Laboratory + twoodall@lanl.gov +Todd Kordenbrock, Sandia National Laboratory + thkgcode@gmail.com + thkorde@sandia.gov +Tom Naughton, Oak Ridge National Laboratory + naughtont@ornl.gov +Tomislav Janjusic, Mellanox + tomislavj@mngx-apl-01.mtl.labs.mlnx +Torsten Hoefler, Indiana University, Technische Universtaet Chemnitz + htor@osl.iu.edu +Valentin Petrov, Mellanox + valentinp@mellanox.com +Vasily Filipov, Mellanox + vasily@mellanox.com +Vishal Sahay, Indiana University + vsahay@osl.iu.edu +Vishwanath Venkatesan, University of Houston, Intel + vvenkates@gmail.com +Weikuan Yu, Los Alamos National Laboratory + yuw@lanl.gov +Wesley Bland, University of Tennessee-Knoxville + wbland@icl.utk.edu +William Throwe, Individual + wtt6@cornell.edu +Yael Dayan, Mellanox + yaeld@mellanox.com +Yevgeny Kliteynik, Mellanox + kliteyn@mellanox.co.il +Yohann Burette, Intel + yohann.burette@intel.com +Yossi Itigin, Mellanox + yosefe@mellanox.com +Zhi Ming Wang, IBM + wangzm@cn.ibm.com diff --git a/HACKING b/HACKING index 3ed56a98cdc..473500aa5c3 100644 --- a/HACKING +++ b/HACKING @@ -8,7 +8,7 @@ Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, University of Stuttgart. All rights reserved. Copyright (c) 2004-2005 The Regents of the University of California. All rights reserved. -Copyright (c) 2008-2015 Cisco Systems, Inc. All rights reserved. +Copyright (c) 2008-2016 Cisco Systems, Inc. All rights reserved. Copyright (c) 2013 Intel, Inc. All rights reserved. $COPYRIGHT$ @@ -24,60 +24,34 @@ source code form, most likely through a developer's tree (i.e., a Git clone). -Debugging vs. Optimized Builds -============================== - -If you are building Open MPI from a Git clone, the default build -includes a lot of debugging features. This happens automatically when -when configure detects the hidden ".git" Git meta directory (that is -present in all Git clones) in your source tree, and therefore -activates a number of developer-only debugging features in the Open -MPI code base. - -By definition, debugging builds will perform [much] slower than -optimized builds of Open MPI. You should *NOT* conduct timing tests -or try to run production performance numbers with debugging builds. - -If you wish to build an optimized version of Open MPI from a -developer's checkout, you have three main options: - -1. Use the "--with-platform=optimized" switch to configure. This is - the preferred (and probably easiest) method. For example: - - shell$ git clone git@github.com:open-mpi/ompi.git - shell$ cd ompi - shell$ ./autogen.pl - shell$ mkdir build - shell$ cd build - shell$ ../configure --with-platform=optimized ... - [...lots of output...] - shell$ make all install - -2. Use a VPATH build. Simply build Open MPI from a different - directory than the source tree -- one where the .git subdirectory - is not present. For example: - - shell$ git clone git@github.com:open-mpi/ompi.git - shell$ cd ompi - shell$ ./autogen.pl - shell$ mkdir build - shell$ cd build - shell$ ../configure ... - [...lots of output...] - shell$ make all install - -3. Manually specify configure options to disable all the debugging - options (note that this is exactly what "--with-platform=optimized" - does behind the scenes). You'll need to carefully examine the - output of "./configure --help" to see which options to disable. - They are all listed, but some are less obvious than others (they - are not listed here because it is a changing set of flags; by - Murphy's Law, listing them here will pretty much guarantee that - this file will get out of date): - - shell$ ./configure --disable-debug ... - [...lots of output...] - shell$ make all install +Developer Builds: Compiler Pickyness by Default +=============================================== + +If you are building Open MPI from a Git clone (i.e., there is a ".git" +directory in your build tree), the default build includes extra +compiler pickyness, which will result in more compiler warnings than +in non-developer builds. Getting these extra compiler warnings is +helpful to Open MPI developers in making the code base as clean as +possible. + +Developers can disable this picky-by-default behavior by using the +--disable-picky configure option. Also note that extra-picky compiles +do *not* happen automatically when you do a VPATH build (e.g., if +".git" is in your source tree, but not in your build tree). + +Prior versions of Open MPI would automatically activate a lot of +(performance-reducing) debugging code by default if ".git" was found +in your build tree. This is no longer true. You can manually enable +these (performance-reducing) debugging features in the Open MPI code +base with these configure options: + + --enable-debug + --enable-mem-debug + --enable-mem-profile + +NOTE: These options are really only relevant to those who are +developing Open MPI itself. They are not generally helpful for +debugging general MPI applications. Use of GNU Autoconf, Automake, and Libtool (and m4) diff --git a/ISSUE_TEMPLATE.md b/ISSUE_TEMPLATE.md new file mode 100644 index 00000000000..475fb36c2a2 --- /dev/null +++ b/ISSUE_TEMPLATE.md @@ -0,0 +1,6 @@ +Thanks very much for your interest in opening issues against Open MPI. +This helps us make a better product! + +However, do not open issues on the open-mpi/ompi-release repo. +Rather, please open issues on the [open-mpi/ompi](https://github.com/open-mpi/ompi/issues) repo. + diff --git a/LICENSE b/LICENSE index 469eedd6de4..c835765b580 100644 --- a/LICENSE +++ b/LICENSE @@ -8,25 +8,25 @@ corresponding files. Copyright (c) 2004-2010 The Trustees of Indiana University and Indiana University Research and Technology Corporation. All rights reserved. -Copyright (c) 2004-2010 The University of Tennessee and The University +Copyright (c) 2004-2017 The University of Tennessee and The University of Tennessee Research Foundation. All rights reserved. -Copyright (c) 2004-2010 High Performance Computing Center Stuttgart, +Copyright (c) 2004-2010 High Performance Computing Center Stuttgart, University of Stuttgart. All rights reserved. Copyright (c) 2004-2008 The Regents of the University of California. All rights reserved. -Copyright (c) 2006-2010 Los Alamos National Security, LLC. All rights - reserved. -Copyright (c) 2006-2010 Cisco Systems, Inc. All rights reserved. +Copyright (c) 2006-2017 Los Alamos National Security, LLC. All rights + reserved. +Copyright (c) 2006-2017 Cisco Systems, Inc. All rights reserved. Copyright (c) 2006-2010 Voltaire, Inc. All rights reserved. -Copyright (c) 2006-2011 Sandia National Laboratories. All rights reserved. +Copyright (c) 2006-2017 Sandia National Laboratories. All rights reserved. Copyright (c) 2006-2010 Sun Microsystems, Inc. All rights reserved. Use is subject to license terms. -Copyright (c) 2006-2010 The University of Houston. All rights reserved. +Copyright (c) 2006-2017 The University of Houston. All rights reserved. Copyright (c) 2006-2009 Myricom, Inc. All rights reserved. -Copyright (c) 2007-2008 UT-Battelle, LLC. All rights reserved. -Copyright (c) 2007-2010 IBM Corporation. All rights reserved. -Copyright (c) 1998-2005 Forschungszentrum Juelich, Juelich Supercomputing +Copyright (c) 2007-2017 UT-Battelle, LLC. All rights reserved. +Copyright (c) 2007-2017 IBM Corporation. All rights reserved. +Copyright (c) 1998-2005 Forschungszentrum Juelich, Juelich Supercomputing Centre, Federal Republic of Germany Copyright (c) 2005-2008 ZIH, TU Dresden, Federal Republic of Germany Copyright (c) 2007 Evergrid, Inc. All rights reserved. @@ -35,17 +35,26 @@ Copyright (c) 2008-2009 Institut National de Recherche en Informatique. All rights reserved. Copyright (c) 2007 Lawrence Livermore National Security, LLC. All rights reserved. -Copyright (c) 2007-2009 Mellanox Technologies. All rights reserved. +Copyright (c) 2007-2017 Mellanox Technologies. All rights reserved. Copyright (c) 2006-2010 QLogic Corporation. All rights reserved. -Copyright (c) 2008-2010 Oak Ridge National Labs. All rights reserved. -Copyright (c) 2006-2010 Oracle and/or its affiliates. All rights reserved. -Copyright (c) 2009 Bull SAS. All rights reserved. +Copyright (c) 2008-2017 Oak Ridge National Labs. All rights reserved. +Copyright (c) 2006-2012 Oracle and/or its affiliates. All rights reserved. +Copyright (c) 2009-2015 Bull SAS. All rights reserved. Copyright (c) 2010 ARM ltd. All rights reserved. +Copyright (c) 2016 ARM, Inc. All rights reserved. Copyright (c) 2010-2011 Alex Brick . All rights reserved. Copyright (c) 2012 The University of Wisconsin-La Crosse. All rights reserved. -Copyright (c) 2013-2014 Intel, Inc. All rights reserved. -Copyright (c) 2011-2014 NVIDIA Corporation. All rights reserved. +Copyright (c) 2013-2016 Intel, Inc. All rights reserved. +Copyright (c) 2011-2017 NVIDIA Corporation. All rights reserved. +Copyright (c) 2016 Broadcom Limited. All rights reserved. +Copyright (c) 2011-2017 Fujitsu Limited. All rights reserved. +Copyright (c) 2014-2015 Hewlett-Packard Development Company, LP. All + rights reserved. +Copyright (c) 2013-2017 Research Organization for Information Science (RIST). + All rights reserved. +Copyright (c) 2017 Amazon.com, Inc. or its affiliates. All Rights + reserved. $COPYRIGHT$ diff --git a/Makefile.am b/Makefile.am index 259330077e2..a4eba0a207f 100644 --- a/Makefile.am +++ b/Makefile.am @@ -5,17 +5,17 @@ # Copyright (c) 2004-2005 The University of Tennessee and The University # of Tennessee Research Foundation. All rights # reserved. -# Copyright (c) 2004-2009 High Performance Computing Center Stuttgart, +# Copyright (c) 2004-2009 High Performance Computing Center Stuttgart, # University of Stuttgart. All rights reserved. # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. -# Copyright (c) 2006-2015 Cisco Systems, Inc. All rights reserved. +# Copyright (c) 2006-2016 Cisco Systems, Inc. All rights reserved. # Copyright (c) 2012-2015 Los Alamos National Security, Inc. All rights reserved. # Copyright (c) 2014 Intel, Inc. All rights reserved. # $COPYRIGHT$ -# +# # Additional copyrights may follow -# +# # $HEADER$ # @@ -30,10 +30,12 @@ dist-hook: # Check for common symbols. Use a "-hook" to increase the odds that a # developer will see it at the end of their installation process. install-exec-hook: - -@$(top_srcdir)/config/find_common_syms \ - --brief \ - --top_builddir=$(top_builddir) \ - --top_srcdir=$(top_srcdir) \ - --objext=$(OBJEXT) + -@if test -d "$(top_srcdir)/.git"; then \ + $(top_srcdir)/config/find_common_syms \ + --brief \ + --top_builddir=$(top_builddir) \ + --top_srcdir=$(top_srcdir) \ + --objext=$(OBJEXT); \ + fi ACLOCAL_AMFLAGS = -I config diff --git a/NEWS b/NEWS index fb927d5fd9f..912c7268ddf 100644 --- a/NEWS +++ b/NEWS @@ -8,18 +8,18 @@ Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, University of Stuttgart. All rights reserved. Copyright (c) 2004-2006 The Regents of the University of California. All rights reserved. -Copyright (c) 2006-2015 Cisco Systems, Inc. All rights reserved. +Copyright (c) 2006-2017 Cisco Systems, Inc. All rights reserved. Copyright (c) 2006 Voltaire, Inc. All rights reserved. Copyright (c) 2006 Sun Microsystems, Inc. All rights reserved. Use is subject to license terms. -Copyright (c) 2006-2014 Los Alamos National Security, LLC. All rights +Copyright (c) 2006-2017 Los Alamos National Security, LLC. All rights reserved. Copyright (c) 2010-2012 IBM Corporation. All rights reserved. Copyright (c) 2012 Oak Ridge National Labs. All rights reserved. -Copyright (c) 2012 Sandia National Laboratories. All rights reserved. +Copyright (c) 2012-2017 Sandia National Laboratories. All rights reserved. Copyright (c) 2012 University of Houston. All rights reserved. Copyright (c) 2013 NVIDIA Corporation. All rights reserved. -Copyright (c) 2013-2015 Intel, Inc. All rights reserved. +Copyright (c) 2013-2016 Intel, Inc. All rights reserved. $COPYRIGHT$ Additional copyrights may follow @@ -53,24 +53,876 @@ included in the vX.Y.Z section and be denoted as: (** also appeared: A.B.C) -- indicating that this item was previously included in release version vA.B.C. -Master (not on release branches yet) ------------------------------------- +2.1.0 -- March, 2017 +-------------------- + +Major new features: + +- The main focus of the Open MPI v2.1.0 release was to update to PMIx + v1.2.1. When using PMIx (e.g., via mpirun-based launches, or via + direct launches with recent versions of popular resource managers), + launch time scalability is improved, and the run time memory + footprint is greatly decreased when launching large numbers of MPI / + OpenSHMEM processes. +- Update OpenSHMEM API conformance to v1.3. +- The usnic BTL now supports MPI_THREAD_MULTIPLE. +- General/overall performance improvements to MPI_THREAD_MULTIPLE. +- Add a summary message at the bottom of configure that tells you many + of the configuration options specified and/or discovered by Open + MPI. + +Changes in behavior compared to prior versions: + +- None. + +Removed legacy support: + +- The ptmalloc2 hooks have been removed from the Open MPI code base. + This is not really a user-noticable change; it is only mentioned + here because there was much rejoycing in the Open MPI developer + community. + +Bug fixes/minor improvements: + +- New MCA parameters: + - iof_base_redirect_app_stderr_to_stdout: as its name implies, it + combines MPI / OpenSHMEM applications' stderr into its stdout + stream. + - opal_event_include: allow the user to specify which FD selection + mechanism is used by the underlying event engine. + - opal_stacktrace_output: indicate where stacktraces should be sent + upon MPI / OpenSHMEM process crashes ("none", "stdout", "stderr", + "file:filename"). + - orte_timeout_for_stack_trace: number of seconds to wait for stack + traces to be reported (or <=0 to wait forever). + - mtl_ofi_control_prog_type/mtl_ofi_data_prog_type: specify libfabric + progress model to be used for control and data. +- Fix MPI_WTICK regression where the time reported may be inaccurate + on systems with processor frequency scalaing enabled. +- Fix regression that lowered the memory maximum message bandwidth for + large messages on some BTL network transports, such as openib, sm, + and vader. +- Fix a name collision in the shared file pointer MPI IO file locking + scheme. Thanks to Nicolas Joly for reporting the issue. +- Fix datatype extent/offset errors in MPI_PUT and MPI_RACCUMULATE + when using the Portals 4 one-sided component. +- Add support for non-contiguous datatypes to the Portals 4 one-sided + component. +- Various updates for the UCX PML. +- Updates to the following man pages: + - mpirun(1) + - MPI_COMM_CONNECT(3) + - MPI_WIN_GET_NAME(3). Thanks to Nicolas Joly for reporting the + typo. + - MPI_INFO_GET_[NKEYS|NTHKEY](3). Thanks to Nicolas Joly for + reporting the typo. +- Fixed a problem in the TCP BTL when using MPI_THREAD_MULTIPLE. + Thanks to Evgueni Petrov for reporting. +- Fixed external32 representation in the romio314 module. Note that + for now, external32 representation is not correctly supported by the + ompio module. Thanks to Thomas Gastine for bringing this to our + attention. +- Add note how to disable a warning message about when a high-speed + MPI transport is not found. Thanks to Susan Schwarz for reporting + the issue. +- Ensure that sending SIGINT when using the rsh/ssh launcher does not + orphan children nodes in the launch tree. +- Fix the help message when showing deprecated MCA param names to show + the correct (i.e., deprecated) name. +- Enable support for the openib BTL to use multiple different + InfiniBand subnets. +- Fix a minor error in MPI_AINT_DIFF. +- Fix bugs with MPI_IN_PLACE handling in: + - MPI_ALLGATHER[V] + - MPI_[I][GATHER|SCATTER][V] + - MPI_IREDUCE[_SCATTER] + - Thanks to all the users who helped diagnose these issues. +- Allow qrsh to tree spawn (if the back-end system supports it). +- Fix MPI_T_PVAR_GET_INDEX to return the correct index. +- Correctly position the shared file pointer in append mode in the + OMPIO component. +- Add some deprecated names into shmem.h for backwards compatibility + with legacy codes. +- Fix MPI_MODE_NOCHECK support. +- Fix a regression in PowerPC atomics support. Thanks to Orion + Poplawski for reporting the issue. +- Fixes for assembly code with aggressively-optimized compilers on + x86_64/AMD64 platforms. +- Fix one more place where configure was mangling custom CFLAGS. + Thanks to Phil Tooley (@Telemin) for reporting the issue. +- Better handle builds with external installations of hwloc. +- Fixed a hang with MPI_PUT and MPI_WIN_LOCK_ALL. +- Fixed a bug when using MPI_GET on non-contiguous datatypes and + MPI_LOCK/MPI_UNLOCK. +- Fixed a bug when using POST/START/COMPLETE/WAIT after a fence. +- Fix configure portability by cleaning up a few uses of "==" with + "test". Thanks to Kevin Buckley for pointing out the issue. +- Fix bug when using darrays with lib and extent of darray datatypes. +- Updates to make Open MPI binary builds more bit-for-bit + reproducable. Thanks to Alastair McKinstry for the suggestion. +- Fix issues regarding persistent request handling. +- Ensure that shmemx.h is a standalone OpenSHMEM header file. Thanks + to Nick Park (@nspark) for the report. +- Ensure that we always send SIGTERM prior to SIGKILL. Thanks to Noel + Rycroft for the report. +- Added ConnectX-5 and Chelsio T6 device defaults for the openib BTL. +- OpenSHMEM no longer supports MXM less than v2.0. +- Plug a memory leak in ompi_osc_sm_free. Thanks to Joseph Schuchart + for the report. +- The "self" BTL now uses less memory. +- The vader BTL is now more efficient in terms of memory usage when + using XPMEM. +- Removed the --enable-openib-failover configure option. This is not + considered backwards-incompatible because this option was stale and + had long-since stopped working, anyway. +- Allow jobs launched under Cray aprun to use hyperthreads if + opal_hwloc_base_hwthreads_as_cpus MCA parameter is set. +- Add support for 32-bit and floating point Cray Aries atomic + operations. +- Add support for network AMOs for MPI_ACCUMULATE, MPI_FETCH_AND_OP, + and MPI_COMPARE_AND_SWAP if the "ompi_single_intrinsic" info key is + set on the window or the "acc_single_intrinsic" MCA param is set. +- Automatically disqualify RDMA CM support in the openib BTL if + MPI_THREAD_MULTIPLE is used. +- Make configure smarter/better about auto-detecting Linux CMA + support. +- Improve the scalability of MPI_COMM_SPLIT_TYPE. +- Fix the mixing of C99 and C++ header files with the MPI C++ + bindings. Thanks to Alastair McKinstry for the bug report. +- Add support for ARM v8. +- Several MCA parameters now directly support MPI_T enumerator + semantics (i.e., they accept a limited set of values -- e.g., MCA + parameters that accept boolean values). +- Added --with-libmpi-name=STRING configure option for vendor releases + of Open MPI. See the README for more detail. +- Fix a problem with Open MPI's internal memory checker. Thanks to Yvan + Fournier for reporting. +- Fix a multi-threaded issue with MPI_WAIT. Thanks to Pascal Deveze for + reporting. + +Known issues (to be addressed in v2.1.1): + +- See the list of fixes slated for v2.1.1 here: + https://github.com/open-mpi/ompi/milestone/26 + + +2.0.2 -- 26 January 2017 +------------------------- + +Bug fixes/minor improvements: + +- Fix a problem with MPI_FILE_WRITE_SHARED when using MPI_MODE_APPEND and + Open MPI's native MPI-IO implementation. Thanks to Nicolas Joly for + reporting. +- Fix a typo in the MPI_WIN_GET_NAME man page. Thanks to Nicolas Joly + for reporting. +- Fix a race condition with ORTE's session directory setup. Thanks to + @tbj900 for reporting this issue. +- Fix a deadlock issue arising from Open MPI's approach to catching calls to + munmap. Thanks to Paul Hargrove for reporting and helping to analyze this + problem. +- Fix a problem with PPC atomics which caused make check to fail unless builtin + atomics configure option was enabled. Thanks to Orion Poplawski for reporting. +- Fix a problem with use of x86_64 cpuid instruction which led to segmentation + faults when Open MPI was configured with -O3 optimization. Thanks to Mark + Santcroos for reporting this problem. +- Fix a problem when using built in atomics configure options on PPC platforms + when building 32 bit applications. Thanks to Paul Hargrove for reporting. +- Fix a problem with building Open MPI against an external hwloc installation. + Thanks to Orion Poplawski for reporting this issue. +- Remove use of DATE in the message queue version string reported to debuggers to + insure bit-wise reproducibility of binaries. Thanks to Alastair McKinstry + for help in fixing this problem. +- Fix a problem with early exit of a MPI process without calling MPI_FINALIZE + or MPI_ABORT that could lead to job hangs. Thanks to Christof Koehler for + reporting. +- Fix a problem with forwarding of SIGTERM signal from mpirun to MPI processes + in a job. Thanks to Noel Rycroft for reporting this problem +- Plug some memory leaks in MPI_WIN_FREE discovered using Valgrind. Thanks + to Joseph Schuchart for reporting. +- Fix a problems MPI_NEIGHOR_ALLTOALL when using a communicator with an empty topology + graph. Thanks to Daniel Ibanez for reporting. +- Fix a typo in a PMIx component help file. Thanks to @njoly for reporting this. +- Fix a problem with Valgrind false positives when using Open MPI's internal memchecker. + Thanks to Yvan Fournier for reporting. +- Fix a problem with MPI_FILE_DELETE returning MPI_SUCCESS when + deleting a non-existent file. Thanks to Wei-keng Liao for reporting. +- Fix a problem with MPI_IMPROBE that could lead to hangs in subsequent MPI + point to point or collective calls. Thanks to Chris Pattison for reporting. +- Fix a problem when configure Open MPI for powerpc with --enable-mpi-cxx + enabled. Thanks to Alastair McKinstry for reporting. +- Fix a problem using MPI_IALLTOALL with MPI_IN_PLACE argument. Thanks to + Chris Ward for reporting. +- Fix a problem using MPI_RACCUMULATE with the Portals4 transport. Thanks to + @PDeveze for reporting. +- Fix an issue with static linking and duplicate symbols arising from PMIx + Slurm components. Thanks to Limin Gu for reporting. +- Fix a problem when using MPI dynamic memory windows. Thanks to + Christoph Niethammer for reporting. +- Fix a problem with Open MPI's pkgconfig files. Thanks to Alastair McKinstry + for reporting. +- Fix a problem with MPI_IREDUCE when the same buffer is supplied for the + send and recv buffer arguments. Thanks to Valentin Petrov for reporting. +- Fix a problem with atomic operations on PowerPC. Thanks to Paul + Hargrove for reporting. + +Known issues (to be addressed in v2.0.3): + +- See the list of fixes slated for v2.0.3 here: + https://github.com/open-mpi/ompi/milestone/23 + +2.0.1 -- 2 September 2016 +----------------------- + +Bug fixes/minor improvements: + +- Short message latency and message rate performance improvements for + all transports. +- Fix shared memory performance when using RDMA-capable networks. + Thanks to Tetsuya Mishima and Christoph Niethammer for reporting. +- Fix bandwith performance degredation in the yalla (MXM) PML. Thanks + to Andreas Kempf for reporting the issue. +- Fix OpenSHMEM crash when running on non-Mellanox MXM-based networks. + Thanks to Debendra Das for reporting the issue. +- Fix a crash occuring after repeated calls to MPI_FILE_SET_VIEW with + predefined datatypes. Thanks to Eric Chamberland and Matthew + Knepley for reporting and helping chase down this issue. +- Fix stdin propagation to MPI processes. Thanks to Jingchao Zhang + for reporting the issue. +- Fix various runtime and portability issues by updating the PMIx + internal component to v1.1.5. +- Fix process startup failures on Intel MIC platforms due to very + large entries in /proc/mounts. +- Fix a problem with use of relative path for specifing executables to + mpirun/oshrun. Thanks to David Schneider for reporting. +- Various improvements when running over portals-based networks. +- Fix thread-based race conditions with GNI-based networks. +- Fix a problem with MPI_FILE_CLOSE and MPI_FILE_SET_SIZE. Thanks + to Cihan Altinay for reporting. +- Remove all use of rand(3) from within Open MPI so as not to perturb + applications use of it. Thanks to Matias Cabral and Noel Rycroft + for reporting. +- Fix crash in MPI_COMM_SPAWN. +- Fix types for MPI_UNWEIGHTED and MPI_WEIGHTS_EMPTY. Thanks to + Lisandro Dalcin for reporting. +- Correctly report the name of MPI_INTEGER16. +- Add some missing MPI constants to the Fortran bindings. +- Fixed compile error when configuring Open MPI with --enable-timing. +- Correctly set the shared library version of libompitrace.so. Thanks + to Alastair McKinstry for reporting. +- Fix errors in the MPI_RPUT, MPI_RGET, MPI_RACCUMULATE, and + MPI_RGET_ACCUMULATE Fortran bindings. Thanks to Alfio Lazzaro and + Joost VandeVondele for tracking this down. +- Fix problems with use of derived datatypes in non-blocking + collectives. Thanks to Yuki Matsumoto for reporting. +- Fix problems with OpenSHMEM header files when using CMake. Thanks to + Paul Kapinos for reporting the issue. +- Fix problem with use use of non-zero lower bound datatypes in + collectives. Thanks to Hristo Iliev for reporting. +- Fix a problem with memory allocation within MPI_GROUP_INTERSECTION. + Thanks to Lisandro Dalcin for reporting. +- Fix an issue with MPI_ALLGATHER for communicators that don't consist + of two ranks. Thanks to David Love for reporting. +- Various fixes for collectives when used with esoteric MPI datatypes. +- Fixed corner cases of handling DARRAY and HINDEXED_BLOCK datatypes. +- Fix a problem with filesystem type check for OpenBSD. + Thanks to Paul Hargrove for reporting. +- Fix some debug input within Open MPI internal functions. Thanks to + Durga Choudhury for reporting. +- Fix a typo in a configury help message. Thanks to Paul Hargrove for + reporting. +- Correctly support MPI_IN_PLACE in MPI_[I]ALLTOALL[V|W] and + MPI_[I]EXSCAN. +- Fix alignment issues on SPARC platforms. + +Known issues (to be addressed in v2.0.2): + +- See the list of fixes slated for v2.0.2 here: + https://github.com/open-mpi/ompi/milestone/20, and + https://github.com/open-mpi/ompi-release/milestone/19 + (note that the "ompi-release" Github repo will be folded/absorbed + into the "ompi" Github repo at some point in the future) + + +2.0.0 -- 12 July 2016 +--------------------- -- ompi_info parsable output now escapes double quotes in values, and - also quotes values can contains colons. Thanks to Lev Givon for the - suggestion. -- CUDA-aware support can now handle GPUs within a node that do not - support CUDA IPC. Earlier versions would get error and abort. -- Do not build the MPI C++ bindings by default. They must be enabled - via --enable-mpi-cxx. -- Remove embedded VampirTrace. It is in maintenance mode since 2013. - Please consider Score-P (score-p.org) as an external replacement. -- usNIC BTL updated to use libfabric. -- OFI MTL. + ********************************************************************** + * Open MPI is now fully MPI-3.1 compliant + ********************************************************************** + +Major new features: + +- Many enhancements to MPI RMA. Open MPI now maps MPI RMA operations + on to native RMA operations for those networks which support this + capability. +- Greatly improved support for MPI_THREAD_MULTIPLE (when configured + with --enable-mpi-thread-multiple). +- Enhancements to reduce the memory footprint for jobs at scale. A + new MCA parameter, "mpi_add_procs_cutoff", is available to set the + threshold for using this feature. +- Completely revamped support for memory registration hooks when using + OS-bypass network transports. +- Significant OMPIO performance improvements and many bug fixes. +- Add support for PMIx - Process Management Interface for Exascale. + Version 1.1.2 of PMIx is included internally in this release. +- Add support for PLFS file systems in Open MPI I/O. +- Add support for UCX transport. +- Simplify build process for Cray XC systems. Add support for + using native SLURM. +- Add a --tune mpirun command line option to simplify setting many + environment variables and MCA parameters. +- Add a new MCA parameter "orte_default_dash_host" to offer an analogue + to the existing "orte_default_hostfile" MCA parameter. +- Add the ability to specify the number of desired slots in the mpirun + --host option. + +Changes in behavior compared to prior versions: + +- In environments where mpirun cannot automatically determine the + number of slots available (e.g., when using a hostfile that does not + specify "slots", or when using --host without specifying a ":N" + suffix to hostnames), mpirun now requires the use of "-np N" to + specify how many MPI processes to launch. +- The MPI C++ bindings -- which were removed from the MPI standard in + v3.0 -- are no longer built by default and will be removed in some + future version of Open MPI. Use the --enable-mpi-cxx-bindings + configure option to build the deprecated/removed MPI C++ bindings. +- ompi_info now shows all components, even if they do not have MCA + parameters. The prettyprint output now separates groups with a + dashed line. +- OMPIO is now the default implementation of parallel I/O, with the + exception for Lustre parallel filesystems (where ROMIO is still the + default). The default selection of OMPI vs. ROMIO can be controlled + via the "--mca io ompi|romio" command line switch to mpirun. +- Per Open MPI's versioning scheme (see the README), increasing the + major version number to 2 indicates that this version is not + ABI-compatible with prior versions of Open MPI. You will need to + recompile MPI and OpenSHMEM applications to work with this version + of Open MPI. +- Removed checkpoint/restart code due to loss of maintainer. :-( +- Change the behavior for handling certain signals when using PSM and + PSM2 libraries. Previously, the PSM and PSM2 libraries would trap + certain signals in order to generate tracebacks. The mechanism was + found to cause issues with Open MPI's own error reporting mechanism. + If not already set, Open MPI now sets the IPATH_NO_BACKTRACE and + HFI_NO_BACKTRACE environment variables to disable PSM/PSM2's + handling these signals. + +Removed legacy support: + +- Removed support for OS X Leopard. +- Removed support for Cray XT systems. +- Removed VampirTrace. +- Removed support for Myrinet/MX. +- Removed legacy collective module:ML. +- Removed support for Alpha processors. +- Removed --enable-mpi-profiling configure option. + +Known issues (to be addressed in v2.0.1): + +- See the list of fixes slated for v2.0.1 here: + https://github.com/open-mpi/ompi/milestone/16, and + https://github.com/open-mpi/ompi-release/milestone/16 + (note that the "ompi-release" Github repo will be folded/absorbed + into the "ompi" Github repo at some point in the future) + +- ompi-release#986: Fix data size counter for large ops with fcoll/static +- ompi-release#987: Fix OMPIO performance on Lustre +- ompi-release#1013: Fix potential inconsistency in btl/openib default settings +- ompi-release#1014: Do not return MPI_ERR_PENDING from collectives +- ompi-release#1056: Remove dead profile code from oshmem +- ompi-release#1081: Fix MPI_IN_PLACE checking for IALLTOALL{V|W} +- ompi-release#1081: Fix memchecker in MPI_IALLTOALLW +- ompi-release#1081: Support MPI_IN_PLACE in MPI_(I)ALLTOALLW and MPI_(I)EXSCAN +- ompi-release#1107: Allow future PMIx support for RM spawn limits +- ompi-release#1108: Fix sparse group process reference counting +- ompi-release#1109: If specified to be oversubcribed, disable binding +- ompi-release#1122: Allow NULL arrays for empty datatypes +- ompi-release#1123: Fix signed vs. unsigned compiler warnings +- ompi-release#1123: Make max hostname length uniform across code base +- ompi-release#1127: Fix MPI_Compare_and_swap +- ompi-release#1127: Fix MPI_Win_lock when used with MPI_Win_fence +- ompi-release#1132: Fix typo in help message for --enable-mca-no-build +- ompi-release#1154: Ensure pairwise coll algorithms disqualify themselves properly +- ompi-release#1165: Fix typos in debugging/verbose message output +- ompi-release#1178: Fix ROMIO filesystem check on OpenBSD 5.7 +- ompi-release#1197: Fix Fortran pthread configure check +- ompi-release#1205: Allow using external PMIx 1.1.4 and 2.0 +- ompi-release#1215: Fix configure to support the NAG Fortran compiler +- ompi-release#1220: Fix combiner args for MPI_HINDEXED_BLOCK +- ompi-release#1225: Fix combiner args for MPI_DARRAY +- ompi-release#1226: Disable old memory hooks with recent gcc versions +- ompi-release#1231: Fix new "patcher" support for some XLC platforms +- ompi-release#1244: Fix Java error handling +- ompi-release#1250: Ensure TCP is not selected for RDMA operations +- ompi-release#1252: Fix verbose output in coll selection +- ompi-release#1253: Set a default name for user-defined MPI_Op +- ompi-release#1254: Add count==0 checks in some non-blocking colls +- ompi-release#1258: Fix "make distclean" when using external pmix/hwloc/libevent +- ompi-release#1260: Clean up/uniform mca/coll/base memory management +- ompi-release#1261: Remove "patcher" warning message for static builds +- ompi-release#1263: Fix IO MPI_Request for 0-size read/write +- ompi-release#1264: Add blocking fence for SLURM operations + +Bug fixes / minor enhancements: + +- Updated internal/embedded copies of third-party software: + - Update the internal copy of ROMIO to that which shipped in MPICH + 3.1.4. + - Update internal copy of libevent to v2.0.22. + - Update internal copy of hwloc to v1.11.2. +- Notable new MCA parameters: + - opal_progress_lp_call_ration: Control how often low-priority + callbacks are made during Open MPI's main progress loop. + - opal_common_verbs_want_fork_support: This replaces the + btl_openib_want_fork_support parameter. +- Add --with-platform-patches-dir configure option. +- Add --with-pmi-libdir configure option for environments that install + PMI libs in a non-default location. +- Various configure-related compatibility updates for newer versions + of libibverbs and OFED. +- Numerous fixes/improvements to orte-dvm. Special thanks to Mark + Santcroos for his help. +- Fix a problem with timer code on ia32 platforms. Thanks to + Paul Hargrove for reporting this and providing a patch. +- Fix a problem with use of a 64 bit atomic counter. Thanks to + Paul Hargrove for reporting. +- Fix a problem with singleton job launching. Thanks to Lisandro + Dalcin for reporting. +- Fix a problem with use of MPI_UNDEFINED with MPI_COMM_SPLIT_TYPE. + Thanks to Lisandro Dalcin for reporting. +- Silence a compiler warning in PSM MTL. Thanks to Adrian Reber for + reporting this. +- Properly detect Intel TrueScale and OmniPath devices in the ACTIVE + state. Thanks to Durga Choudhury for reporting the issue. +- Fix detection and use of Solaris Studio 12.5 (beta) compilers. + Thanks to Paul Hargrove for reporting and debugging. +- Fix various small memory leaks. +- Allow NULL arrays when creating empty MPI datatypes. +- Replace use of alloca with malloc for certain datatype creation + functions. Thanks to Bogdan Sataric for reporting this. +- Fix use of MPI_LB and MPI_UB in creation of of certain MPI datatypes. + Thanks to Gus Correa for helping to fix this. +- Implement a workaround for a GNU Libtool problem. Thanks to Eric + Schnetter for reporting and fixing. +- Improve hcoll library detection in configure. Thanks to David + Shrader and Ake Sandgren for reporting this. +- Miscellaneous minor bug fixes in the hcoll component. +- Miscellaneous minor bug fixes in the ugni component. +- Fix problems with XRC detection in OFED 3.12 and older releases. + Thanks to Paul Hargrove for his analysis of this problem. +- Update (non-standard/experimental) Java MPI interfaces to support + MPI-3.1 functionality. +- Fix an issue with MCA parameters for Java bindings. Thanks to + Takahiro Kawashima and Siegmar Gross for reporting this issue. +- Fix a problem when using persistent requests in the Java bindings. + Thanks to Nate Chambers for reporting. +- Fix problem with Java bindings on OX X 10.11. Thanks to Alexander + Daryin for reporting this issue. +- Fix a performance problem for large messages for Cray XC systems. + Thanks to Jerome Vienne for reporting this. +- Fix an issue with MPI_WIN_LOCK_ALL. Thanks to Thomas Jahns for + reporting. +- Fix an issue with passing a parameter to configure multiple times. + Thanks to QuesarVII for reporting and supplying a fix. +- Add support for ALPS resource allocation system on Cray CLE 5.2 and + later. Thanks to Mark Santcroos. +- Corrections to the HACKING file. Thanks to Maximilien Levesque. +- Fix an issue with user supplied reduction operator functions. + Thanks to Rupert Nash for reporting this. +- Fix an issue with an internal list management function. Thanks to + Adrian Reber for reporting this. +- Fix a problem with MPI-RMA PSCW epochs. Thanks to Berk Hess for + reporting this. +- Fix a problem in neighborhood collectives. Thanks to Lisandro + Dalcin for reporting. +- Fix MPI_IREDUCE_SCATTER_BLOCK for a one-process communicator. Thanks + to Lisandro Dalcin for reporting. +- Add (Open MPI-specific) additional flavors to MPI_COMM_SPLIT_TYPE. + See MPI_Comm_split_type(3) for details. Thanks to Nick Andersen for + supplying this enhancement. +- Improve closing of file descriptors during the job launch phase. + Thanks to Piotr Lesnicki for reporting and providing this + enhancement. +- Fix a problem in MPI_GET_ACCUMULATE and MPI_RGET_ACCUMULATE when + using Portals4. Thanks to Nicolas Chevalier for reporting. +- Use correct include file for lstat prototype in ROMIO. Thanks to + William Throwe for finding and providing a fix. +- Add missing Fortran bindings for MPI_WIN_ALLOCATE. Thanks to Christoph + Niethammer for reporting and fixing. +- Fortran related fixes to handle Intel 2016 compiler. Thanks to + Fabrice Roy for reporting this. +- Fix a Fortran linkage issue. Thanks to Macro Atzeri for finding and + suggesting a fix. +- Fix problem with using BIND(C) for Fortran bindings with logical + parameters. Thanks to Paul Romano for reporting. +- Fix an issue with use of DL-related macros in opal library. Thanks to + Scott Atchley for finding this. +- Fix an issue with parsing mpirun command line options which contain + colons. Thanks to Lev Given for reporting. +- Fix a problem with Open MPI's package configury files. Thanks to + Christoph Junghans for reporting. +- Fix a typo in the MPI_INTERCOMM_MERGE man page. Thanks To Harald + Servat for reporting and correcting. +- Update man pages for non-blocking sends per MPI 3.1 standard. + Thanks to Alexander Pozdneev for reporting. +- Fix problem when compiling against PVFS2. Thanks to Dave Love for + reporting. +- Fix problems with MPI_NEIGHBOR_ALLTOALL{V,W}. Thanks to Willem + Vermin for reporting this issue. +- Fix various compilation problems on Cygwin. Thanks to Marco Atzeri + for supplying these fixes. +- Fix problem with resizing of subarray and darray data types. Thanks + to Keith Bennett and Dan Garmann for reporting. +- Fix a problem with MPI_COMBINER_RESIZED. Thanks to James Ramsey for + the report. +- Fix an hwloc binding issue. Thanks to Ben Menadue for reporting. +- Fix a problem with the shared memory (sm) BTL. Thanks to Peter Wind + for the report. +- Fixes for heterogeneous support. Thanks to Siegmar Gross for reporting. +- Fix a problem with memchecker. Thanks to Clinton Simpson for reporting. +- Fix a problem with MPI_UNWEIGHTED in topology functions. Thanks to + Jun Kudo for reporting. +- Fix problem with a MCA parameter base filesystem types. Thanks to + Siegmar Gross for reporting. +- Fix a problem with some windows info argument types. Thanks to + Alastair McKinstry for reporting. + + +1.10.6 - 17 Feb 2017 +------ +- Fix bug in timer code that caused problems at optimization settings + greater than 2 +- OSHMEM: make mmap allocator the default instead of sysv or verbs +- Support MPI_Dims_create with dimension zero +- Update USNIC support +- Prevent 64-bit overflow on timer counter +- Add support for forwarding signals +- Fix bug that caused truncated messages on large sends over TCP BTL +- Fix potential infinite loop when printing a stacktrace + + +1.10.5 - 19 Dec 2016 +------ +- Update UCX APIs +- Fix bug in darray that caused MPI/IO failures +- Use a MPI_Get_library_version() like string to tag the debugger DLL. + Thanks to Alastair McKinstry for the report +- Fix multi-threaded race condition in coll/libnbc +- Several fixes to OSHMEM +- Fix bug in UCX support due to uninitialized field +- Fix MPI_Ialltoallv with MPI_IN_PLACE and without MPI param check +- Correctly reset receive request type before init. Thanks Chris Pattison + for the report and test case. +- Fix bug in iallgather[v] +- Fix concurrency issue with MPI_Comm_accept. Thanks to Pieter Noordhuis + for the patch +- Fix ompi_coll_base_{gather,scatter}_intra_binomial +- Fixed an issue with MPI_Type_get_extent returning the wrong extent + for distributed array datatypes. +- Re-enable use of rtdtsc instruction as a monotonic clock source if + the processor has a core-invariant tsc. This is a partial fix for a + performance regression introduced in Open MPI v1.10.3. + + +1.10.4 - 01 Sept 2016 +------ + +- Fix assembler support for MIPS +- Improve memory handling for temp buffers in collectives +- Fix [all]reduce with non-zero lower bound datatypes + Thanks Hristo Iliev for the report +- Fix non-standard ddt handling. Thanks Yuki Matsumoto for the report +- Various libnbc fixes. Thanks Yuki Matsumoto for the report +- Fix typos in request RMA bindings for Fortran. Thanks to @alazzaro + and @vondele for the assist +- Various bug fixes and enhancements to collective support +- Fix predefined types mapping in hcoll +- Revive the coll/sync component to resolve unexpected message issues + during tight loops across collectives +- Fix typo in wrapper compiler for Fortran static builds + + +1.10.3 - 15 June 2016 +------ + +- Fix zero-length datatypes. Thanks to Wei-keng Liao for reporting + the issue. +- Minor manpage cleanups +- Implement atomic support in OSHMEM/UCX +- Fix support of MPI_COMBINER_RESIZED. Thanks to James Ramsey + for the report +- Fix computation of #cpus when --use-hwthread-cpus is used +- Add entry points for Allgatherv, iAllgatherv, Reduce, and iReduce + for the HCOLL library +- Fix an HCOLL integration bug that could signal completion of request + while still being worked +- Fix computation of cores when SMT is enabled. Thanks to Ben Menadue + for the report +- Various USNIC fixes +- Create a datafile in the per-proc directory in order to make it + unique per communicator. Thanks to Peter Wind for the report +- Fix zero-size malloc in one-sided pt-to-pt code. Thanks to Lisandro + Dalcin for the report +- Fix MPI_Get_address when passed MPI_BOTTOM to not return an error. + Thanks to Lisandro Dalcin for the report +- Fix MPI_TYPE_SET_ATTR with NULL value. Thanks to Lisandro Dalcin for + the report +- Fix various Fortran08 binding issues +- Fix memchecker no-data case. Thanks to Clinton Stimpson for the report +- Fix CUDA support under OS-X +- Fix various OFI/MTL integration issues +- Add MPI_T man pages +- Fix one-sided pt-to-pt issue by preventing communication from happening + before a target enters a fence, even in the no-precede case +- Fix a bug that disabled Totalview for MPMD use-case +- Correctly support MPI_UNWEIGHTED in topo-graph-neighbors. Thanks to + Jun Kudo for the report +- Fix singleton operations under SLURM when PMI2 is enabled +- Do not use MPI_IN_PLACE in neighborhood collectives for non-blocking + collectives (libnbc). Thanks to Jun Kudo for the report +- Silence autogen deprecation warnings for newer versions of Perl +- Do not return MPI_ERR_PENDING from collectives +- Use type int* for MPI_WIN_DISP_UNIT, MPI_WIN_CREATE_FLAVOR, and MPI_WIN_MODEL. + Thanks to Alastair McKinstry for the report +- Fix register_datarep stub function in IO/OMPIO. Thanks to Eric + Chamberland for the report +- Fix a bus error on MPI_WIN_[POST,START] in the shared memory one-sided component +- Add several missing MPI_WIN_FLAVOR constants to the Fortran support +- Enable connecting processes from different subnets using the openib BTL +- Fix bug in basic/barrier algorithm in OSHMEM +- Correct process binding for the --map-by node case +- Include support for subnet-to-subnet routing over InfiniBand networks +- Fix usnic resource check +- AUTHORS: Fix an errant reference to Subversion IDs +- Fix affinity for MPMD jobs running under LSF +- Fix many Fortran binding bugs +- Fix `MPI_IN_PLACE`-related bugs +- Fix PSM/PSM2 support for singleton operations +- Ensure MPI transports continue to progress during RTE barriers +- Update HWLOC to 1.9.1 end-of-series +- Fix a bug in the Java command line parser when the + -Djava.library.path options was given by the user +- Update the MTL/OFI provider selection behavior +- Add support for clock_gettime on Linux. +- Correctly detect and configure for Solaris Studio 12.5 + beta compilers +- Correctly compute #slots when -host is used for MPMD case +- Fix a bug in the hcoll collectives due to an uninitialized field +- Do not set a binding policy when oversubscribing a node +- Fix hang in intercommunicator operations when oversubscribed +- Speed up process termination during MPI_Abort +- Disable backtrace support by default in the PSM/PSM2 libraries to + prevent unintentional conflicting behavior. + + + +1.10.2: 26 Jan 2016 +------------------- + ********************************************************************** + * OSHMEM is now 1.2 compliant + ********************************************************************** + +- Fix NBC_Copy for legitimate zero-size messages +- Fix multiple bugs in OSHMEM +- Correctly handle mpirun --host @ +- Centralize two MCA params to avoid duplication between OMPI and + OSHMEM layers: opal_abort_delay and opal_abort_print_stack +- Add support for Fujitsu compilers +- Add UCX support for OMPI and OSHMEM +- Correctly handle oversubscription when not given directives + to permit it. Thanks to @ammore1 for reporting it +- Fix rpm spec file to not include the /usr directory +- Add Intel HFI1 default parameters for the openib BTL +- Resolve symbol conflicts in the PSM2 library +- Add ability to empty the rgpusm cache when full if requested +- Fix another libtool bug when -L requires a space between it + and the path. Thanks to Eric Schnetter for the patch. +- Add support for OSHMEM v1.2 APIs +- Improve efficiency of oshmem_preconnect_all algorithm +- Fix bug in buffered sends support +- Fix double free in edge case of mpirun. Thanks to @jsharpe for + the patch +- Multiple one-sided support fixes +- Fix integer overflow in the tuned "reduce" collective when + using buffers larger than INT_MAX in size +- Fix parse of user environment variables in mpirun. Thanks to + Stefano Garzarella for the patch +- Performance improvements in PSM2 support +- Fix NBS iBarrier for inter-communicators +- Fix bug in vader BTL during finalize +- Improved configure support for Fortran compilers +- Fix rank_file mapper to support default --slot-set. Thanks + to Matt Thompson for reporting it +- Update MPI_Testsome man page. Thanks to Eric Schnetter for + the suggestion +- Fix missing resize of the returned type for subarray and + darray types. Thanks to Keith Bennett and Dan Garmann for + reporting it +- Fix Java support on OSX 10.11. Thanks to Alexander Daryin + for reporting the problem +- Fix some compilation issues on Solaris 11.2. Thanks to + Paul Hargrove for his continued help in such areas -1.8.6 ------ + +1.10.1: 4 Nov 2015 +------------------ + +- Workaround an optimization problem with gcc compilers >= 4.9.2 that + causes problems with memory registration, and forced + mpi_leave_pinned to default to 0 (i.e., off). Thanks to @oere for + the fix. +- Fix use of MPI_LB and MPI_UB in subarray and darray datatypes. + Thanks to Gus Correa and Dimitar Pashov for pointing out the issue. +- Minor updates to mpi_show_mpi_alloc_mem_leaks and + ompi_debug_show_handle_leaks functionality. +- Fix segv when invoking non-blocking reductions with a user-defined + operation. Thanks to Rupert Nash and Georg Geiser for identifying + the issue. +- No longer probe for PCI topology on Solaris (unless running as root). +- Fix for Intel Parallel Studio 2016 ifort partial support of the + !GCC$ pragma. Thanks to Fabrice Roy for reporting the problem. +- Bunches of Coverity / static analysis fixes. +- Fixed ROMIO to look for lstat in . Thanks to William + Throwe for submitting the patch both upstream and to Open MPI. +- Fixed minor memory leak when attempting to open plugins. +- Fixed type in MPI_IBARRIER C prototype. Thanks to Harald Servat for + reporting the issue. +- Add missing man pages for MPI_WIN_CREATE_DYNAMIC, MPI_WIN_ATTACH, + MPI_WIN_DETACH, MPI_WIN_ALLOCATE, MPI_WIN_ALLOCATE_SHARED. +- When mpirun-launching new applications, only close file descriptors + that are actually open (resulting in a faster launch in some + environments). +- Fix "test ==" issues in Open MPI's configure script. Thank to Kevin + Buckley for pointing out the issue. +- Fix performance issue in usnic BTL: ensure progress thread is + throttled back to not aggressively steal CPU cycles. +- Fix cache line size detection on POWER architectures. +- Add missing #include in a few places. Thanks to Orion Poplawski for + supplying the patch. +- When OpenSHMEM building is disabled, no longer install its header + files, help files, or man pages. Add man pages for oshrun, oshcc, + and oshfort. +- Fix mpi_f08 implementations of MPI_COMM_SET_INFO, and profiling + versions of MPI_BUFFER_DETACH, MPI_WIN_ALLOCATE, + MPI_WIN_ALLOCATE_SHARED, MPI_WTICK, and MPI_WTIME. +- Add orte_rmaps_dist_device MCA param, allowing users to map near a + specific device. +- Various updates/fixes to the openib BTL. +- Add missing defaults for the Mellanox ConnectX 3 card to the openib BTL. +- Minor bug fixes in the OFI MTL. +- Various updates to Mellanox's MXM, hcoll, and FCA components. +- Add OpenSHMEM man pages. Thanks to Tony Curtis for sharing the man + pages files from openshmem.org. +- Add missing "const" attributes to MPI_COMPARE_AND_SWAP, + MPI_FETCH_AND_OP, MPI_RACCUMULATE, and MPI_WIN_DETACH prototypes. + Thanks to Michael Knobloch and Takahiro Kawashima for bringing this + to our attention. +- Fix linking issues on some platforms (e.g., SLES 12). +- Fix hang on some corner cases when MPI applications abort. +- Add missing options to mpirun man page. Thanks to Daniel Letai + for bringing this to our attention. +- Add new --with-platform-patches-dir configure option +- Adjust relative selection priorities to ensure that MTL + support is favored over BTL support when both are available +- Use CUDA IPC for all sized messages for performance + + +1.10.0: 25 Aug 2015 +------------------- + +** NOTE: The v1.10.0 release marks the transition to Open MPI's new +** version numbering scheme. The v1.10.x release series is based on +** the v1.8.x series, but with a few new features. v2.x will be the +** next series after the v1.10.x series, and complete the transition +** to the new version numbering scheme. See README for more details +** on the new versioning scheme. +** +** NOTE: In accordance with OMPI version numbering, the v1.10 is *not* +** API compatible with the v1.8 release series. + +- Added libfabric support (see README for more details): + - usNIC BTL updated to use libfabric. + - Added OFI MTL (usable with PSM in libfabric v1.1.0). +- Added Intel Omni-Path support via new PSM2 MTL. +- Added "yalla" PML for faster MXM support. +- Removed support for MX +- Added persistent distributed virtual machine (pDVM) support for fast + workflow executions. +- Fixed typo in GCC inline assembly introduced in Open MPI v1.8.8. + Thanks to Paul Hargrove for pointing out the issue. +- Add missing man pages for MPI_Win_get|set_info(3). +- Ensure that session directories are cleaned up at the end of a run. +- Fixed linking issues on some OSs where symbols of dependent + libraries are not automatically publicly available. +- Improve hcoll and fca configury library detection. Thanks to David + Shrader for helping track down the issue. +- Removed the LAMA mapper (for use in setting affinity). Its + functionality has been largely superseded by other mpirun CLI + options. +- CUDA: Made the asynchronous copy mode be the default. +- Fix a malloc(0) warning in MPI_IREDUCE_SCATTER_BLOCK. Thanks to + Lisandro Dalcin for reporting the issue. +- Fix typo in MPI_Scatter(3) man page. Thanks to Akshay Venkatesh for + noticing the mistake. +- Add rudimentary protection from TCP port scanners. +- Fix typo in Open MPI error handling. Thanks to ke Sandgren for + pointing out the error. +- Increased the performance of the CM PML (i.e., the Portals, PSM, + PSM2, MXM, and OFI transports). +- Restored visibility of blocking send requests in message queue + debuggers (e.g., TotalView, DDT). +- Fixed obscure IPv6-related bug in the TCP BTL. +- Add support for the "no_locks" MPI_Info key for one-sided + functionality. +- Fixed ibv_fork support for verbs-based networks. +- Fixed a variety of small bugs in OpenSHMEM. +- Fixed MXM configure with additional CPPFLAGS and LDFLAGS. Thanks to + David Shrader for the patch. +- Fixed incorrect memalign threshhold in the openib BTL. Thanks to + Xavier Besseron for pointing out the issue. + + +1.8.8: 5 Aug 2015 +----------------- + +- Fix a segfault in MPI_FINALIZE with the PSM MTL. +- Fix mpi_f08 sentinels (e.g., MPI_STATUS_IGNORE) handling. +- Set some additional MXM default values for OSHMEM. +- Fix an invalid memory access in MPI_MRECV and MPI_IMRECV. +- Include two fixes that were mistakenly left out of the official + v1.8.7 tarball: + - Fixed MPI_WIN_POST and MPI_WIN_START for zero-size messages + - Protect the OOB TCP ports from segfaulting when accessed by port + scanners + + +1.8.7: 15 Jul 2015 +------------------ + +** NOTE: v1.8.7 technically breaks ABI with prior versions +** in the 1.8 series because it repairs two incorrect API +** signatures. However, users will only need to recompile +** if they were using those functions - which they couldn't +** have been, because the signatures were wrong :-) + +- Plugged a memory leak that impacted blocking sends +- Fixed incorrect declaration for MPI_T_pvar_get_index and added + missing return code MPI_T_INVALID_NAME. +- Fixed an uninitialized variable in PMI2 support +- Added new vendor part id for Mellanox ConnectX4-LX +- Fixed NBC_Copy for legitimate zero-size messages +- Fixed MPI_Win_post and MPI_Win_start for zero-size messages +- Protect the OOB ports from segfaulting when accessed by port scanners +- Fixed several Fortran typos +- Fixed configure detection of XRC support +- Fixed support for highly heterogeneous systems to avoid + memory corruption when printing out the bindings + +1.8.6: 17 Jun 2015 +------------------ - Fixed memory leak on Mac OS-X exposed by TCP keepalive - Fixed keepalive support to ensure that daemon/node failure @@ -82,10 +934,15 @@ Master (not on release branches yet) - Fixed trivial typo in MPI_Neighbor_allgather manpage - Fixed tree-spawn support for sh and ksh shells - Several data type fixes +- Fixed IPv6 support bug +- Cleaned up an unlikely build issue +- Fixed PMI2 process map parsing for cyclic mappings +- Fixed memalign threshold in openib BTL +- Fixed debugger access to message queues for blocking send/recv -1.8.5 ------ +1.8.5: 5 May 2015 +----------------- - Fixed configure problems in some cases when using an external hwloc installation. Thanks to Erick Schnetter for reporting the error and @@ -170,8 +1027,8 @@ Master (not on release branches yet) enabling of MPI_THREAD_MULTIPLE support. -1.8.4 ------ +1.8.4: 19 Dec 2014 +------------------ - Fix MPI_SIZEOF; now available in mpif.h for modern Fortran compilers (see README for more details). Also fixed various compiler/linker @@ -227,10 +1084,11 @@ Master (not on release branches yet) output extra bytes if the system was very heavily loaded - Fix a bug where specifying mca_component_show_load_errors=0 could cause ompi_info to segfault +- Updated valgrind suppression file -1.8.3 ------ +1.8.3: 26 Sep 2014 +------------------ - Fixed application abort bug to ensure that MPI_Abort exits appropriately and returns the provided exit status @@ -252,9 +1110,13 @@ Master (not on release branches yet) patches. -1.8.2 ------ +1.8.2: 25 Aug 2014 +------------------ +- Fix auto-wireup of OOB, allowing ORTE to automatically + test all available NICs +- "Un-deprecate" pernode, npernode, and npersocket options + by popular demand - Add missing Fortran bindings for MPI_WIN_LOCK_ALL, MPI_WIN_UNLOCK_ALL, and MPI_WIN_SYNC. - Fix cascading/over-quoting in some cases with the rsh/ssh-based @@ -312,8 +1174,8 @@ Master (not on release branches yet) exits one step of that pipe before completing output -1.8.1 ------ +1.8.1: 23 Apr 2014 +------------------ - Fix for critical bug: mpirun removed files (but not directories) from / when run as root. Thanks to Jay Fenlason and Orion Poplawski @@ -321,8 +1183,8 @@ Master (not on release branches yet) fix. -1.8 ---- +1.8: 31 Mar 2014 +---------------- - Commit upstream ROMIO fix for mixed NFS+local filesystem environments. - Several fixes for MPI-3 one-sided support. For example, @@ -339,12 +1201,13 @@ Master (not on release branches yet) for identifying the problem and providing a patch -1.7.5 ------ +1.7.5 20 Mar 2014 +----------------- ********************************************************************** * Open MPI is now fully MPI-3.0 compliant ********************************************************************** + - Add Linux OpenSHMEM support built on top of Open MPI's MPI layer. Thanks to Mellanox for contributing this new feature. - Allow restricting ORTE daemons to specific cores using the @@ -400,8 +1263,9 @@ Master (not on release branches yet) to enable this mode. -1.7.4 ------ +1.7.4: 5 Feb 2014 +----------------- + ********************************************************************** * CRITICAL CHANGE * @@ -423,6 +1287,7 @@ Master (not on release branches yet) * in particular may want to override at least the binding default * to allow threads to use multiple cores. ********************************************************************** + - Restore version number output in "ompi_info --all". - Various bug fixes for the mpi_f08 Fortran bindings. - Fix ROMIO compile error with Lustre 2.4. Thanks to Adam Moody for @@ -558,8 +1423,9 @@ Master (not on release branches yet) - MPI-3: Added support for non-collective communicator creation. -1.7.3 ------ +1.7.3: 17 Oct 2013 +------------------ + - Make CUDA-aware support dynamically load libcuda.so so CUDA-aware MPI library can run on systems without CUDA software. - Fix various issues with dynamic processes and intercommunicator @@ -614,8 +1480,9 @@ Master (not on release branches yet) be leveraged by any resource manager that implements PMI2; e.g. SLURM, versions 2.6 and higher. -1.7.2 ------ +1.7.2: 26 Jun 2013 +------------------ + - Major VampirTrace update to 5.14.4.2. (** also appeared: 1.6.5) - Fix to set flag==1 when MPI_IPROBE is called with MPI_PROC_NULL. @@ -665,18 +1532,20 @@ Master (not on release branches yet) formats. - Added Location Aware Mapping Algorithm (LAMA) mapping component. - Fixes for MPI_STATUS handling in corner cases. +- Add a distance-based mapping component to find the socket "closest" + to the PCI bus. -1.7.1 ------ +1.7.1: 16 Apr 2013 +------------------ - Fixed compile error when --without-memory-manager was specified on Linux - Fixed XRC compile issue in Open Fabrics support. -1.7 ---- +1.7: 1 Apr 2013 +--------------- - Added MPI-3 functionality: - MPI_GET_LIBRARY_VERSION @@ -694,6 +1563,7 @@ Master (not on release branches yet) - Added better "use mpi" support (for compilers that support it) - Removed incorrect MPI_SCATTERV interface from "mpi" module that was added in the 1.5.x series for ABI reasons. +- Lots of VampirTrace upgrades and fixes; upgrade to v5.14.3. - Modified process affinity system to provide warning when bindings result in being "bound to all", which is equivalent to not being bound. @@ -747,8 +1617,8 @@ Master (not on release branches yet) for chasing it down. -1.6.6 ------ +1.6.6: Not released +------------------- - Prevent integer overflow in datatype creation. Thanks to Gilles Gouaillardet for identifying the problem and providing a preliminary @@ -778,8 +1648,8 @@ Master (not on release branches yet) - Add Gentoo "sandbox" memory hooks override. -1.6.5 ------ +1.6.5: 26 Jun 2013 +------------------ - Updated default SRQ parameters for the openib BTL. (** also to appear: 1.7.2) @@ -822,8 +1692,8 @@ Master (not on release branches yet) (** also to appear: 1.7.2) -1.6.4 ------ +1.6.4: 21 Feb 2013 +------------------ - Fix Cygwin shared memory and debugger plugin support. Thanks to Marco Atzeri for reporting the issue and providing initial patches. @@ -865,8 +1735,8 @@ Master (not on release branches yet) for chasing it down. -1.6.3 ------ +1.6.3: 30 Oct 2012 +------------------ - Fix mpirun --launch-agent behavior when a prefix is specified. Thanks to Reuti for identifying the issue. @@ -889,8 +1759,8 @@ Master (not on release branches yet) the issue. -1.6.2 ------ +1.6.2: 25 Sep 2012 +------------------ - Fix issue with MX MTL. Thanks to Doug Eadline for raising the issue. - Fix singleton MPI_COMM_SPAWN when the result job spans multiple nodes. @@ -910,8 +1780,8 @@ Master (not on release branches yet) - Fix VampirTrace compilation issue with the PGI compiler suite. -1.6.1 ------ +1.6.1: 22 Aug 2012 +------------------ - A bunch of changes to eliminate hangs on OpenFabrics-based networks. Users with Mellanox hardware are ***STRONGLY ENCOURAGED*** to check @@ -967,8 +1837,8 @@ Master (not on release branches yet) - Improve several error messages. -1.6 ---- +1.6: 14 May 2012 +---------------- - Fix some process affinity issues. When binding a process, Open MPI will now bind to all available hyperthreads in a core (or socket, @@ -1006,8 +1876,8 @@ Master (not on release branches yet) MPI_CLOSE_PORT (it's an IN parameter). -1.5.5 ------ +1.5.5: 27 Mar 2012 +------------------ - Many, many portability configure/build fixes courtesy of Paul Hargrove. Thanks, Paul! @@ -1067,8 +1937,8 @@ Master (not on release branches yet) - Many fixes to the Mellanox MXM transport. -1.5.4 ------ +1.5.4: 18 Aug 2011 +------------------ - Add support for the (as yet unreleased) Mellanox MXM transport. - Add support for dynamic service levels (SLs) in the openib BTL. @@ -1148,15 +2018,15 @@ Master (not on release branches yet) MPI_INIT. -1.5.3 ------ +1.5.3: 16 Mar 2011 +------------------ - Add missing "affinity" MPI extension (i.e., the OMPI_Affinity_str() API) that was accidentally left out of the 1.5.2 release. -1.5.2 ------ +1.5.2: 9 Mar 2011 +----------------- - Replaced all custom topology / affinity code with initial support for hwloc v1.1.1 (PLPA has been removed -- long live hwloc!). Note @@ -1213,8 +2083,8 @@ Master (not on release branches yet) README for more details. -1.5.1 ------ +1.5.1: 15 Dec 2010 +------------------ - Fixes for the Oracle Studio 12.2 Fortran compiler. - Fix SPARC and SPARCv9 atomics. Thanks to Nicola Stange for the @@ -1239,8 +2109,8 @@ Master (not on release branches yet) - Various VT fixes and updates. -1.5 ---- +1.5: 10 Oct 2010 +---------------- - Added "knem" support: direct process-to-process copying for shared memory message passing. See http://runtime.bordeaux.inria.fr/knem/ @@ -1323,8 +2193,8 @@ Master (not on release branches yet) - hnp to send the output to mpirun. - smtp (requires libesmtp) to send an email. -1.4.5 ------ +1.4.5: 12 Feb 2012 +------------------ - Fixed the --disable-memory-manager configure switch. (** also to appear in 1.5.5) @@ -1364,8 +2234,8 @@ Master (not on release branches yet) (** also to appear in 1.5.5) -1.4.4 ------ +1.4.4: 11 Oct 2011 +------------------ - Modified a memcpy() call in the openib btl connection setup to use memmove() instead because of the possibility of an overlapping @@ -1464,8 +2334,8 @@ Master (not on release branches yet) directories that are remotely mounted. -1.4.3 ------ +1.4.3: 6 Sep 2010 +----------------- - Fixed handling of the array_of_argv parameter in the Fortran binding of MPI_COMM_SPAWN_MULTIPLE (** also to appear: 1.5). @@ -1519,8 +2389,8 @@ Master (not on release branches yet) - Various man page updates -1.4.2 ------ +1.4.2: 4 May 2010 +----------------- - Fixed problem when running in heterogeneous environments. Thanks to Timur Magomedov for helping to track down this issue. @@ -1585,8 +2455,8 @@ Master (not on release branches yet) tarballs. -1.4.1 ------ +1.4.1: 15 Jan 2010 +------------------ - Update to PLPA v1.3.2, addressing a licensing issue identified by the Fedora project. See @@ -1610,8 +2480,8 @@ Master (not on release branches yet) release). -1.4 ---- +1.4: 8 Dec 2009 +--------------- The *only* change in the Open MPI v1.4 release (as compared to v1.3.4) was to update the embedded version of Libtool's libltdl to address a @@ -1621,8 +2491,8 @@ Libtool 2.2.6b. There are no other changes between Open MPI v1.3.4 and v1.4. -1.3.4 ------ +1.3.4: 13 Feb 2010 +------------------ - Fix some issues in OMPI's SRPM with regard to shell_scripts_basename and its use with mpi-selector. Thanks to Bill Johnstone for @@ -1668,8 +2538,8 @@ and v1.4. libltdl from that used to build Open MPI. -1.3.3 ------ +1.3.3: 14 Jul 2009 +------------------ - Fix a number of issues with the openib BTL (OpenFabrics) RDMA CM, including a memory corruption bug, a shutdown deadlock, and a route @@ -1714,8 +2584,8 @@ and v1.4. details. -1.3.2 ------ +1.3.2: 22 Apr 2009 +------------------ - Fixed a potential infinite loop in the openib BTL that could occur in senders in some frequent-communication scenarios. Thanks to Don @@ -1776,8 +2646,8 @@ and v1.4. reporting the problem. -1.3.1 ------ +1.3.1: 19 Mar 2009 +------------------ - Added "sync" coll component to allow users to synchronize every N collective operations on a given communicator. @@ -1838,8 +2708,8 @@ and v1.4. the problem. -1.3 ---- +1.3: 19 Jan 2009 +---------------- - Extended the OS X 10.5.x (Leopard) workaround for a problem when assembly code is compiled with -g[0-9]. Thanks to Barry Smith for @@ -1918,7 +2788,7 @@ and v1.4. in cross-compile environments. -1.2.9 (unreleased) +1.2.9: 14 Feb 2009 ------------------ - Fix a segfault when using one-sided communications on some forms of derived @@ -1945,8 +2815,8 @@ and v1.4. See ticket #1580. -1.2.8 ------ +1.2.8: 14 Oct 2008 +------------------ - Tweaked one memory barrier in the openib component to be more conservative. May fix a problem observed on PPC machines. See ticket #1532. @@ -1961,8 +2831,8 @@ and v1.4. - Fix a regression introduced in 1.2.6 for the IBM eHCA. See ticket #1526. -1.2.7 ------ +1.2.7: 28 Aug 2008 +------------------ - Add some Sun HCA vendor IDs. See ticket #1461. - Fixed a memory leak in MPI_Alltoallw when called from Fortran. @@ -1995,8 +2865,8 @@ and v1.4. Thanks to Martin Audet for the bug report. See ticket #1268. -1.2.6 ------ +1.2.6: 7 Apr 2008 +----------------- - Fix a bug in the inter-allgather for asymmetric inter-communicators. Thanks to Martin Audet for the bug report. See ticket #1247. @@ -2025,8 +2895,8 @@ and v1.4. Thanks to Peter Breitenlohner for the patch. -1.2.5 ------ +1.2.5: 8 Jan 2008 +----------------- - Fixed compile issue with open() on Fedora 8 (and newer) platforms. Thanks to Sebastian Schmitzdorff for noticing the problem. @@ -2070,8 +2940,8 @@ and v1.4. #1164. -1.2.4 ------ +1.2.4: 26 Sep 2007 +------------------ - Really added support for TotalView/DDT parallel debugger message queue debugging (it was mistakenly listed as "added" in the 1.2 release). @@ -2124,8 +2994,8 @@ and v1.4. libibverbs >=v1.1 (i.e., OFED 1.2 and beyond). -1.2.3 ------ +1.2.3: 20 Jun 2007 +------------------ - Fix a regression in comm_spawn functionality that inadvertently caused the mapping of child processes to always start at the same @@ -2155,8 +3025,8 @@ and v1.4. anonymous unions. Thanks to Luis Kornblueh for reporting the bug. -1.2.2 ------ +1.2.2: 16 May 2007 +------------------ - Fix regression in 1.2.1 regarding the handling of $CC with both absolute and relative path names. @@ -2170,8 +3040,8 @@ and v1.4. - Fixed a deadlock in orterun when the rsh PLS encounters some errors. -1.2.1 ------ +1.2.1: 25 Apr 2007 +------------------ - Fixed a number of connection establishment errors in the TCP out- of-band messaging system. @@ -2208,8 +3078,8 @@ and v1.4. - Support for setting specific limits on registered memory. -1.2 ---- +1.2: 15 Mar 2007 +---------------- - Fixed race condition in the shared memory fifo's, which led to orphaned messages. @@ -2281,8 +3151,8 @@ and v1.4. for 127.0.0.0/8, rather than just 127.0.0.1. -1.1.5 ------ +1.1.5: 19 Mar 2007 +------------------ - Implement MPI_TYPE_CREATE_DARRAY function. - Fix race condition in shared memory BTL startup that could cause MPI @@ -2303,8 +3173,8 @@ and v1.4. compilers. -1.1.4 ------ +1.1.4: 30 Jan 2007 +------------------ - Fixed 64-bit alignment issues with TCP interface detection on intel-based OS X machines. @@ -2329,8 +3199,8 @@ and v1.4. MX BTL. -1.1.3 ------ +1.1.3: 26 Jan 2007 +------------------ - Remove the "hierarch" coll component; it was not intended to be included in stable releases yet. @@ -2368,8 +3238,8 @@ and v1.4. problems. -1.1.2 ------ +1.1.2: 18 Oct 2006 +------------------ - Really fix Fortran status handling in MPI_WAITSOME and MPI_TESTSOME. - Various datatype fixes, reported by several users as causing @@ -2401,8 +3271,8 @@ and v1.4. - Add some missing Fortran MPI-2 IO constants. -1.1.1 ------ +1.1.1: 28 Aug 2006 +------------------ - Fix for Fortran string handling in various MPI API functions. - Fix for Fortran status handling in MPI_WAITSOME and MPI_TESTSOME. @@ -2460,8 +3330,8 @@ and v1.4. - Add missing MPI::Is_finalized() function. -1.1 ---- +1.1: 23 Jun 2006 +---------------- - Various MPI datatype fixes, optimizations. - Fixed various problems on the SPARC architecture (e.g., not @@ -2507,8 +3377,8 @@ and v1.4. - Add --debug option to mpirun to generically invoke a parallel debugger. -1.0.3 (unreleased; all fixes included in 1.1) ---------------------------------------------- +1.0.3: Not released (all fixes included in 1.1) +----------------------------------------------- - Fix a problem noted by Chris Hennes where MPI_INFO_SET incorrectly disallowed long values. @@ -2560,8 +3430,8 @@ and v1.4. recent versions of GCC. -1.0.2 ------ +1.0.2: 7 Apr 2006 +----------------- - Fixed assembly race condition on AMD64 platforms. - Fixed residual .TRUE. issue with copying MPI attributes set from @@ -2665,8 +3535,8 @@ and v1.4. for OS X). -1.0.1 ------ +1.0.1: 12 Dec 2005 +------------------ - Fixed assembly on Solaris AMD platforms. Thanks to Pierre Valiron for bringing this to our attention. @@ -2706,7 +3576,7 @@ and v1.4. pointing this out to us. -1.0 ---- +1.0: 17 Nov 2005 +---------------- Initial public release. diff --git a/README b/README index bf3d3001ed1..50e87f50078 100644 --- a/README +++ b/README @@ -8,15 +8,18 @@ Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, University of Stuttgart. All rights reserved. Copyright (c) 2004-2007 The Regents of the University of California. All rights reserved. -Copyright (c) 2006-2015 Cisco Systems, Inc. All rights reserved. +Copyright (c) 2006-2017 Cisco Systems, Inc. All rights reserved. Copyright (c) 2006-2011 Mellanox Technologies. All rights reserved. Copyright (c) 2006-2012 Oracle and/or its affiliates. All rights reserved. Copyright (c) 2007 Myricom, Inc. All rights reserved. -Copyright (c) 2008 IBM Corporation. All rights reserved. +Copyright (c) 2008-2016 IBM Corporation. All rights reserved. Copyright (c) 2010 Oak Ridge National Labs. All rights reserved. Copyright (c) 2011 University of Houston. All rights reserved. Copyright (c) 2013-2015 Intel, Inc. All rights reserved Copyright (c) 2015 NVIDIA Corporation. All rights reserved. +Copyright (c) 2017 Los Alamos National Security, LLC. All rights + reserved. + $COPYRIGHT$ Additional copyrights may follow @@ -36,8 +39,8 @@ sign up on the user's and/or developer's mailing list (for user-level and developer-level questions; when in doubt, send to the user's list): - users@open-mpi.org - devel@open-mpi.org + users@lists.open-mpi.org + devel@lists.open-mpi.org Because of spam, only subscribers are allowed to post to these lists (ensure that you subscribe with and post from exactly the same e-mail @@ -45,8 +48,8 @@ address -- joe@example.com is considered different than joe@mycomputer.example.com!). Visit these pages to subscribe to the lists: - http://www.open-mpi.org/mailman/listinfo.cgi/users - http://www.open-mpi.org/mailman/listinfo.cgi/devel + http://lists.open-mpi.org/mailman/listinfo/users + http://lists.open-mpi.org/mailman/listinfo/devel Thanks for your time. @@ -54,12 +57,12 @@ Thanks for your time. Much, much more information is also available in the Open MPI FAQ: - http://www.open-mpi.org/faq/ + https://www.open-mpi.org/faq/ =========================================================================== The following abbreviated list of release notes applies to this code -base as of this writing (April 2015): +base as of this writing (March 2017): General notes ------------- @@ -67,8 +70,8 @@ General notes - Open MPI now includes two public software layers: MPI and OpenSHMEM. Throughout this document, references to Open MPI implicitly include both of these layers. When distinction between these two layers is - necessary, we will reference them as the "MPI" and "OSHMEM" layers - respectively. + necessary, we will reference them as the "MPI" and "OpenSHMEM" + layers respectively. - OpenSHMEM is a collaborative effort between academia, industry, and the U.S. Government to create a specification for a standardized API @@ -78,17 +81,8 @@ General notes http://openshmem.org/ - This OpenSHMEM implementation is provided on an experimental basis; - it has been lightly tested and will only work in Linux environments. - Although this implementation attempts to be portable to multiple - different environments and networks, it is still new and will likely - experience growing pains typical of any new software package. - End-user feedback is greatly appreciated. - - This implementation will currently most likely provide optimal - performance on Mellanox hardware and software stacks. Overall - performance is expected to improve as other network vendors and/or - institutions contribute platform specific optimizations. + This OpenSHMEM implementation will only work in Linux environments + with a restricted set of supported networks. See below for details on how to enable the OpenSHMEM implementation. @@ -108,7 +102,7 @@ General notes - The majority of Open MPI's documentation is here in this file, the included man pages, and on the web site FAQ - (http://www.open-mpi.org/). + (https://www.open-mpi.org/). - Note that Open MPI documentation uses the word "component" frequently; the word "plugin" is probably more familiar to most @@ -125,14 +119,16 @@ General notes - Platform LSF (v7.0.2 and later) - SLURM - Cray XE, XC, and XK - - Oracle Grid Engine (OGE) 6.1, 6.2 and open source Grid Engine - Systems that have been tested are: - Linux (various flavors/distros), 32 bit, with gcc - Linux (various flavors/distros), 64 bit (x86), with gcc, Absoft, Intel, and Portland (*) - - OS X (10.6, 10.7, 10.8, 10.9, 10.10), 32 and 64 bit (x86_64), with + - OS X (10.8, 10.9, 10.10, 10.11), 32 and 64 bit (x86_64), with XCode and Absoft compilers (*) + - MacOS (10.12), 64 bit (x85_64) with XCode and Absoft compilers (*) + - OpenBSD. Requires configure option --enable-mca-no-build=patcher + with this release. (*) Be sure to read the Compiler Notes, below. @@ -141,7 +137,7 @@ General notes - ARMv4, ARMv5, ARMv6, ARMv7, ARMv8 - Other 64 bit platforms (e.g., Linux on PPC64) - Oracle Solaris 10 and 11, 32 and 64 bit (SPARC, i386, x86_64), - with Oracle Solaris Studio 12.2, 12.3, and 12.4 + with Oracle Solaris Studio 12.5 Compiler Notes -------------- @@ -174,16 +170,17 @@ Compiler Notes pgi-9 : 9.0-4 known GOOD pgi-10: 10.0-0 known GOOD pgi-11: NO known good version with --enable-debug - pgi-12: 12.10 known GOOD (and 12.8 and 12.9 both known BAD with - --enable-debug) - pgi-13: 13.10 known GOOD + pgi-12: 12.10 known BAD with -m32, but known GOOD without -m32 + (and 12.8 and 12.9 both known BAD with --enable-debug) + pgi-13: 13.9 known BAD with -m32, 13.10 known GOOD without -m32 + pgi-15: 15.10 known BAD with -m32 - Similarly, there is a known Fortran PGI compiler issue with long source directory path names that was resolved in 9.0-4 (9.0-3 is known to be broken in this regard). - IBM's xlf compilers: NO known good version that can build/link - the MPI f08 bindings or build/link the OSHMEM Fortran bindings. + the MPI f08 bindings or build/link the OpenSHMEM Fortran bindings. - On NetBSD-6 (at least AMD64 and i386), and possibly on OpenBSD, libtool misidentifies properties of f95/g95, leading to obscure @@ -194,9 +191,14 @@ Compiler Notes f95/g95), or by disabling the Fortran MPI bindings with --disable-mpi-fortran. +- On OpenBSD/i386, if you configure with + --enable-mca-no-build=patcher, you will also need to add + --disable-dlopen. Otherwise, odd crashes can occur + nondeterministically. + - Absoft 11.5.2 plus a service pack from September 2012 (which Absoft says is available upon request), or a version later than 11.5.2 - (e.g., 11.5.3), is required to compile the new Fortran mpi_f08 + (e.g., 11.5.3), is required to compile the Fortran mpi_f08 module. - Open MPI does not support the Sparc v8 CPU target. However, @@ -252,6 +254,9 @@ Compiler Notes version of the Intel 12.1 Linux compiler suite, the problem will go away. +- It has been reported that Pathscale 5.0.5 and 6.0.527 compilers + give an internal compiler error when trying to Open MPI. + - Early versions of the Portland Group 6.0 compiler have problems creating the C++ MPI bindings as a shared library (e.g., v6.0-1). Tests with later versions show that this has been fixed (e.g., @@ -287,6 +292,9 @@ Compiler Notes still using GCC 3.x). Contact Pathscale support if you continue to have problems with Open MPI's C++ bindings. + Note the MPI C++ bindings have been deprecated by the MPI Forum and + may not be supported in future releases. + - Using the Absoft compiler to build the MPI Fortran bindings on Suse 9.3 is known to fail due to a Libtool compatibility issue. @@ -296,7 +304,7 @@ Compiler Notes ******************************************************************** ******************************************************************** *** There is now only a single Fortran MPI wrapper compiler and a - *** single Fortran OSHMEM wrapper compiler: mpifort and oshfort, + *** single Fortran OpenSHMEM wrapper compiler: mpifort and oshfort, *** respectively. mpif77 and mpif90 still exist, but they are *** symbolic links to mpifort. ******************************************************************** @@ -347,12 +355,12 @@ Compiler Notes is provided, allowing mpi_f08 to be used in new subroutines in legacy MPI applications. - Per the OSHMEM specification, there is only one Fortran OSHMEM binding - provided: + Per the OpenSHMEM specification, there is only one Fortran OpenSHMEM + binding provided: - - shmem.fh: All Fortran OpenSHMEM programs **should** include 'shmem.fh', - and Fortran OSHMEM programs that use constants defined by OpenSHMEM - **MUST** include 'shmem.fh'. + - shmem.fh: All Fortran OpenSHMEM programs **should** include + 'shmem.fh', and Fortran OpenSHMEM programs that use constants + defined by OpenSHMEM **MUST** include 'shmem.fh'. The following notes apply to the above-listed Fortran bindings: @@ -384,10 +392,9 @@ Compiler Notes Similar to the mpif.h interface, MPI_SIZEOF is only supported on Fortran compilers that support INTERFACE and ISO_FORTRAN_ENV. - - The mpi_f08 module is new and has been tested with the Intel - Fortran compiler and gfortran >= 4.9. Other modern Fortran - compilers may also work (but are, as yet, only lightly tested). - It is expected that this support will mature over time. + - The mpi_f08 module has been tested with the Intel Fortran compiler + and gfortran >= 4.9. Other modern Fortran compilers likely also + work. Many older Fortran compilers do not provide enough modern Fortran features to support the mpi_f08 module. For example, gfortran < @@ -408,10 +415,10 @@ General Run-Time Support Notes is a shared library), unless using the --prefix or --enable-mpirun-prefix-by-default functionality (see below). -- Open MPI's run-time behavior can be customized via MCA ("MPI - Component Architecture") parameters (see below for more information - on how to get/set MCA parameter values). Some MCA parameters can be - set in a way that renders Open MPI inoperable (see notes about MCA +- Open MPI's run-time behavior can be customized via MPI Component + Architecture (MCA) parameters (see below for more information on how + to get/set MCA parameter values). Some MCA parameters can be set in + a way that renders Open MPI inoperable (see notes about MCA parameters later in this file). In particular, some parameters have required options that must be included. @@ -419,7 +426,7 @@ General Run-Time Support Notes component, or Open MPI will not be able to deliver messages to the same rank as the sender. For example: "mpirun --mca btl tcp,self ..." - - If specified, the "btl_tcp_if_exclude" paramater must include the + - If specified, the "btl_tcp_if_exclude" parameter must include the loopback device ("lo" on many Linux platforms), or Open MPI will not be able to route MPI messages using the TCP BTL. For example: "mpirun --mca btl_tcp_if_exclude lo,eth1 ..." @@ -450,22 +457,37 @@ MPI Functionality and Features deprecated_example.c:4: warning: 'MPI_Type_struct' is deprecated (declared at /opt/openmpi/include/mpi.h:1522) shell$ -- MPI_THREAD_MULTIPLE support is included, but is only lightly tested. - It likely does not work for thread-intensive applications. Note - that *only* the MPI point-to-point communication functions for the - BTL's listed here are considered thread safe. Other support - functions (e.g., MPI attributes) have not been certified as safe - when simultaneously used by multiple threads. - - tcp - - sm - - self - - Note that Open MPI's thread support is in a fairly early stage; the - above devices may *work*, but the latency is likely to be fairly - high. Specifically, efforts so far have concentrated on - *correctness*, not *performance* (yet). - - YMMV. +- MPI_THREAD_MULTIPLE is supported with some exceptions. Note that + Open MPI must be configured with --enable-mpi-thread-multiple to get + this level of thread safety support. + + The following PMLs support MPI_THREAD_MULTIPLE: + - cm (see list (1) of supported MTLs, below) + - ob1 (see list (2) of supported BTLs, below) + - ucx + - yalla + + (1) The cm PML and the following MTLs support MPI_THREAD_MULTIPLE: + - MXM + - ofi (Libfabric) + - portals4 + + (2) The ob1 PML and the following BTLs support MPI_THREAD_MULTIPLE: + - openib (see exception below) + - self + - sm + - smcuda + - tcp + - ugni + - usnic + - vader (shared memory) + + The openib BTL's RDMACM based connection setup mechanism is also not + thread safe. The default UDCM method should be used for + applications requiring MPI_THREAD_MULTIPLE support. + + Currently, MPI File operations are not thread safe even if MPI is + initialized for MPI_THREAD_MULTIPLE support. - MPI_REAL16 and MPI_COMPLEX32 are only supported on platforms where a portable C datatype can be found that matches the Fortran type @@ -475,11 +497,12 @@ MPI Functionality and Features by default (it can be disabled via the --disable-libompitrace flag). This library provides a simplistic tracing of select MPI function calls via the MPI profiling interface. Linking it in to - your appliation via (e.g., via -lompitrace) will automatically + your application via (e.g., via -lompitrace) will automatically output to stderr when some MPI functions are invoked: - shell$ mpicc hello_world.c -o hello_world -lompitrace - shell$ mpirun -np 1 hello_world.c + shell$ cd examples/ + shell$ mpicc hello_c.c -o hello_c -lompitrace + shell$ mpirun -np 1 hello_c MPI_INIT: argc 1 Hello, world, I am 0 of 1 MPI_BARRIER[0]: comm MPI_COMM_WORLD @@ -492,17 +515,17 @@ MPI Functionality and Features This library is being offered as a "proof of concept" / convenience from Open MPI. If there is interest, it is trivially easy to extend - it to printf for other MPI functions. Patches and/or suggestions - would be greatfully appreciated on the Open MPI developer's list. + it to printf for other MPI functions. Pull requests on github.com + would be greatly appreciated. -OSHMEM Functionality and Features ------------------------------- +OpenSHMEM Functionality and Features +------------------------------------ -- All OpenSHMEM-1.0 functionality is supported. +- All OpenSHMEM-1.3 functionality is supported. MPI Collectives ------------ +--------------- - The "hierarch" coll component (i.e., an implementation of MPI collective operations) attempts to discover network layers of @@ -526,39 +549,6 @@ MPI Collectives (FCA) is a solution for offloading collective operations from the MPI process onto Mellanox QDR InfiniBand switch CPUs and HCAs. -- The "ML" coll component is an implementation of MPI collective - operations that takes advantage of communication hierarchies - in modern systems. A ML collective operation is implemented by - combining multiple independently progressing collective primitives - implemented over different communication hierarchies, hence a ML - collective operation is also referred to as a hierarchical collective - operation. The number of collective primitives that are included in a - ML collective operation is a function of subgroups(hierarchies). - Typically, MPI processes in a single communication hierarchy such as - CPU socket, node, or subnet are grouped together into a single subgroup - (hierarchy). The number of subgroups are configurable at runtime, - and each different collective operation could be configured to have - a different of number of subgroups. - - The component frameworks and components used by/required for a - "ML" collective operation. - - Frameworks: - * "sbgp" - Provides functionality for grouping processes into subgroups - * "bcol" - Provides collective primitives optimized for a particular - communication hierarchy - - Components: - * sbgp components - Provides grouping functionality over a CPU socket - ("basesocket"), shared memory ("basesmuma"), - Mellanox's ConnectX HCA ("ibnet"), and other - interconnects supported by PML ("p2p") - - * BCOL components - Provides optimized collective primitives for - shared memory ("basesmuma"), Mellanox's ConnectX - HCA ("iboffload"), and other interconnects supported - by PML ("ptpcoll") - - The "cuda" coll component provides CUDA-aware support for the reduction type collectives with GPU buffers. This component is only compiled into the library when the library has been configured with @@ -566,30 +556,29 @@ MPI Collectives collectives, copies the data to staging buffers if GPU buffers, then calls underlying collectives to do the work. -OSHMEM Collectives ------------ +OpenSHMEM Collectives +--------------------- -- The "fca" scoll component: the Mellanox Fabric Collective Accelerator - (FCA) is a solution for offloading collective operations from the - MPI process onto Mellanox QDR InfiniBand switch CPUs and HCAs. +- The "fca" scoll component: the Mellanox Fabric Collective + Accelerator (FCA) is a solution for offloading collective operations + from the MPI process onto Mellanox QDR InfiniBand switch CPUs and + HCAs. -- The "basic" scoll component: Reference implementation of all OSHMEM - collective operations. +- The "basic" scoll component: Reference implementation of all + OpenSHMEM collective operations. Network Support --------------- -- There are three main MPI network models available: "ob1", "cm", and - "yalla". "ob1" uses BTL ("Byte Transfer Layer") components for each - supported network. "cm" uses MTL ("Matching Tranport Layer") - components for each supported network. "yalla" uses the Mellanox - MXM transport. +- There are four main MPI network models available: "ob1", "cm", + "yalla", and "ucx". "ob1" uses BTL ("Byte Transfer Layer") + components for each supported network. "cm" uses MTL ("Matching + Tranport Layer") components for each supported network. "yalla" + uses the Mellanox MXM transport. "ucx" uses the OpenUCX transport. - "ob1" supports a variety of networks that can be used in - combination with each other (per OS constraints; e.g., there are - reports that the GM and OpenFabrics kernel drivers do not operate - well together): + combination with each other: - OpenFabrics: InfiniBand, iWARP, and RoCE - Loopback (send-to-self) @@ -599,29 +588,31 @@ Network Support - SMCUDA - Cisco usNIC - uGNI (Cray Gemini, Aries) - - vader (XPMEM, Linux CMA, Linux KNEM, and general shared memory) + - vader (XPMEM, Linux CMA, Linux KNEM, and copy-in/copy-out shared + memory) - "cm" supports a smaller number of networks (and they cannot be used together), but may provide better overall MPI performance: - - InfiniPath PSM - - Mellanox MXM - - Portals4 - - OpenFabrics Interfaces ("libfabric") + - Intel Omni-Path PSM2 + - Intel True Scale PSM (QLogic InfiniPath) + - OpenFabrics Interfaces ("libfabric" tag matching) + - Portals 4 - Open MPI will, by default, choose to use "cm" when the InfiniPath - PSM or the Mellanox MXM MTL can be used. Otherwise, "ob1" will be - used and the corresponding BTLs will be selected. Users can force - the use of ob1 or cm if desired by setting the "pml" MCA parameter - at run-time: + Open MPI will, by default, choose to use "cm" when one of the + above transports can be used, unless OpenUCX or MXM support is + detected, in which case the "ucx" or "yalla" PML will be used + by default. Otherwise, "ob1" will be used and the corresponding + BTLs will be selected. Users can force the use of ob1 or cm if + desired by setting the "pml" MCA parameter at run-time: shell$ mpirun --mca pml ob1 ... or shell$ mpirun --mca pml cm ... -- Similarly, there are two OSHMEM network models available: "yoda", - and "ikrit". "yoda" also uses the BTL components for many supported - network. "ikrit" interfaces directly with Mellanox MXM. +- Similarly, there are two OpenSHMEM network models available: "yoda", + and "ikrit". "yoda" also uses the BTL components for supported + networks. "ikrit" interfaces directly with Mellanox MXM. - "yoda" supports a variety of networks that can be used: @@ -629,12 +620,13 @@ Network Support - Loopback (send-to-self) - Shared memory - TCP + - usNIC - "ikrit" only supports Mellanox MXM. - MXM is the Mellanox Messaging Accelerator library utilizing a full range of IB transports to provide the following messaging services - to the upper level MPI/OSHMEM libraries: + to the upper level MPI/OpenSHMEM libraries: - Usage of all available IB transports - Native RDMA support @@ -645,7 +637,7 @@ Network Support - The usnic BTL is support for Cisco's usNIC device ("userspace NIC") on Cisco UCS servers with the Virtualized Interface Card (VIC). Although the usNIC is accessed via the OpenFabrics Libfabric API - stack, this BTL is specific to the Cisco usNIC device. + stack, this BTL is specific to Cisco usNIC devices. - uGNI is a Cray library for communicating over the Gemini and Aries interconnects. @@ -655,25 +647,6 @@ Network Support Mellanox InfiniBand plugin driver is created. The problem is fixed OFED v1.1 (and later). -- Better memory management support is available for OFED-based - transports using the "ummunotify" Linux kernel module. OFED memory - managers are necessary for better bandwidth when re-using the same - buffers for large messages (e.g., benchmarks and some applications). - - Unfortunately, the ummunotify module was not accepted by the Linux - kernel community (and is still not distributed by OFED). But it - still remains the best memory management solution for MPI - applications that used the OFED network transports. If Open MPI is - able to find the header file, it will build - support for ummunotify and include it by default. If MPI processes - then find the ummunotify kernel module loaded and active, then their - memory managers (which have been shown to be problematic in some - cases) will be disabled and ummunotify will be used. Otherwise, the - same memory managers from prior versions of Open MPI will be used. - The ummunotify Linux kernel module can be downloaded from: - - http://lwn.net/Articles/343351/ - - The use of fork() with OpenFabrics-based networks (i.e., the openib BTL) is only partially supported, and only on Linux kernels >= v2.6.15 with libibverbs v1.1 or later (first released as part of @@ -696,9 +669,9 @@ Network Support Open MPI Extensions ------------------- -- An MPI "extensions" framework has been added (but is not enabled by - default). See the "Open MPI API Extensions" section below for more - information on compiling and using MPI extensions. +- An MPI "extensions" framework is included in Open MPI, but is not + enabled by default. See the "Open MPI API Extensions" section below + for more information on compiling and using MPI extensions. - The following extensions are included in this version of Open MPI: @@ -706,8 +679,12 @@ Open MPI Extensions a string that contains what resources a process is bound to. See its man page for more details. - cr: Provides routines to access to checkpoint restart routines. - See ompi/mpiext/cr/mpiext_cr_c.h for a listing of availble + See ompi/mpiext/cr/mpiext_cr_c.h for a listing of available functions. + - cuda: When the library is compiled with CUDA-aware support, it + provides two things. First, a macro + MPIX_CUDA_AWARE_SUPPORT. Secondly, the function + MPIX_Query_cuda_support that can be used to query for support. - example: A non-functional extension; its only purpose is to provide an example for how to create other extensions. @@ -719,10 +696,9 @@ Building Open MPI Open MPI uses a traditional configure script paired with "make" to build. Typical installs can be of the pattern: ---------------------------------------------------------------------------- shell$ ./configure [...options...] -shell$ make all install ---------------------------------------------------------------------------- +shell$ make [-j N] all install + (use an integer value of N for parallel builds) There are many available configure options (see "./configure --help" for a full list); a summary of the more commonly used ones is included @@ -745,16 +721,16 @@ INSTALLATION OPTIONS files in /include, its libraries in /lib, etc. --disable-shared - By default, libmpi and libshmem are built as a shared library, and - all components are built as dynamic shared objects (DSOs). This - switch disables this default; it is really only useful when used with + By default, Open MPI and OpenSHMEM build shared libraries, and all + components are built as dynamic shared objects (DSOs). This switch + disables this default; it is really only useful when used with --enable-static. Specifically, this option does *not* imply --enable-static; enabling static libraries and disabling shared libraries are two independent options. --enable-static - Build libmpi and libshmem as static libraries, and statically link in all - components. Note that this option does *not* imply + Build MPI and OpenSHMEM as static libraries, and statically link in + all components. Note that this option does *not* imply --disable-shared; enabling static libraries and disabling shared libraries are two independent options. @@ -775,14 +751,15 @@ INSTALLATION OPTIONS is an important difference between the two: "rpath": the location of the Open MPI libraries is hard-coded into - the MPI/OSHMEM application and cannot be overridden at run-time. + the MPI/OpenSHMEM application and cannot be overridden at + run-time. "runpath": the location of the Open MPI libraries is hard-coded into - the MPI/OSHMEM application, but can be overridden at run-time by - setting the LD_LIBRARY_PATH environment variable. + the MPI/OpenSHMEM application, but can be overridden at run-time + by setting the LD_LIBRARY_PATH environment variable. For example, consider that you install Open MPI vA.B.0 and - compile/link your MPI/OSHMEM application against it. Later, you install - Open MPI vA.B.1 to a different installation prefix (e.g., + compile/link your MPI/OpenSHMEM application against it. Later, you + install Open MPI vA.B.1 to a different installation prefix (e.g., /opt/openmpi/A.B.1 vs. /opt/openmpi/A.B.0), and you leave the old installation intact. @@ -820,13 +797,26 @@ INSTALLATION OPTIONS command line that are not in FILE are also used. Options on the command line and in FILE are replaced by what is in FILE. +--with-libmpi-name=STRING + Replace libmpi.* and libmpi_FOO.* (where FOO is one of the fortran + supporting libraries installed in lib) with libSTRING.* and + libSTRING_FOO.*. This is provided as a convenience mechanism for + third-party packagers of Open MPI that might want to rename these + libraries for their own purposes. This option is *not* intended for + typical users of Open MPI. + +--enable-mca-no-build=LIST + Comma-separated list of - pairs that will not be + built. For example, "--enable-mca-no-build=btl-portals,oob-ud" will + disable building the portals BTL and the ud OOB component. + NETWORKING SUPPORT / OPTIONS --with-fca= Specify the directory where the Mellanox FCA library and header files are located. - FCA is the support library for Mellanox QDR switches and HCAs. + FCA is the support library for Mellanox switches and HCAs. --with-hcoll= Specify the directory where the Mellanox hcoll library and header @@ -848,6 +838,22 @@ NETWORKING SUPPORT / OPTIONS same server. See http://runtime.bordeaux.inria.fr/knem/ for details. +--with-libfabric= + Specify the directory where the OpenFabrics Interfaces libfabric + library and header files are located. This option is generally only + necessary if the libfabric headers and libraries are not in default + compiler/linker search paths. + + Libfabric is the support library for OpenFabrics Interfaces-based + network adapters, such as Cisco usNIC, Intel True Scale PSM, Cray + uGNI, etc. + +--with-libfabric-libdir= + Look in directory for the libfabric libraries. By default, Open MPI + will look in /lib and /lib64, which covers most cases. This option is only + needed for special configurations. + --with-mxm= Specify the directory where the Mellanox MXM library and header files are located. This option is generally only necessary if the @@ -861,24 +867,6 @@ NETWORKING SUPPORT / OPTIONS look in /lib and /lib64, which covers most cases. This option is only needed for special configurations. ---with-usnic - Abort configure if Cisco usNIC support cannot be built. - ---with-verbs= - Specify the directory where the verbs (also know as OpenFabrics, and - previously known as OpenIB) libraries and header files are located. - This option is generally only necessary if the verbs headers and - libraries are not in default compiler/linker search paths. - - "OpenFabrics" refers to operating system bypass networks, such as - InfiniBand, usNIC, iWARP, and RoCE (aka "IBoIP"). - ---with-verbs-libdir= - Look in directory for the verbs libraries. By default, Open MPI - will look in /lib and /lib64, - which covers most cases. This option is only needed for special - configurations. - --with-portals4= Specify the directory where the Portals4 libraries and header files are located. This option is generally only necessary if the Portals4 @@ -899,9 +887,9 @@ NETWORKING SUPPORT / OPTIONS Set configuration values for Portals 4 --with-psm= - Specify the directory where the InfiniPath PSM library and - header files are located. This option is generally only necessary - if the InfiniPath headers and libraries are not in default + Specify the directory where the QLogic InfiniPath / Intel True Scale + PSM library and header files are located. This option is generally + only necessary if the PSM headers and libraries are not in default compiler/linker search paths. PSM is the support library for QLogic InfiniPath and Intel TrueScale @@ -912,21 +900,53 @@ NETWORKING SUPPORT / OPTIONS look in /lib and /lib64, which covers most cases. This option is only needed for special configurations. ---with-sctp= - Specify the directory where the SCTP libraries and header files are - located. This option is generally only necessary if the SCTP headers - and libraries are not in default compiler/linker search paths. +--with-psm2= + Specify the directory where the Intel Omni-Path PSM2 library and + header files are located. This option is generally only necessary + if the PSM2 headers and libraries are not in default compiler/linker + search paths. - SCTP is a special network stack over Ethernet networks. + PSM is the support library for Intel Omni-Path network adapters. ---with-sctp-libdir= - Look in directory for the SCTP libraries. By default, Open MPI will - look in /lib and /lib64, which covers - most cases. This option is only needed for special configurations. +--with-psm2-libdir= + Look in directory for the PSM2 libraries. By default, Open MPI will + look in /lib and /lib64, which + covers most cases. This option is only needed for special + configurations. --with-scif= Look in directory for Intel SCIF support libraries +--with-verbs= + Specify the directory where the verbs (also known as OpenFabrics + verbs, or Linux verbs, and previously known as OpenIB) libraries and + header files are located. This option is generally only necessary + if the verbs headers and libraries are not in default + compiler/linker search paths. + + The Verbs library usually implies operating system bypass networks, + such as InfiniBand, usNIC, iWARP, and RoCE (aka "IBoIP"). + +--with-verbs-libdir= + Look in directory for the verbs libraries. By default, Open MPI + will look in /lib and /lib64, + which covers most cases. This option is only needed for special + configurations. + +--with-verbs-usnic + This option will activate support in Open MPI for disabling a + dire-sounding warning message from libibverbs that Cisco usNIC + devices are not supported (because Cisco usNIC devices are supported + through libfabric, not libibverbs). This libibverbs warning can + also be suppressed by installing the "no op" libusnic_verbs plugin + for libibverbs (see https://github.com/cisco/libusnic_verbs, or + download binaries from cisco.com). This option is disabled by + default because it causes libopen-pal.so to depend on libibverbs.so, + which is undesirable to many downstream packagers. + +--with-usnic + Abort configure if Cisco usNIC support cannot be built. + RUN-TIME SYSTEM SUPPORT --enable-mpirun-prefix-by-default @@ -941,9 +961,6 @@ RUN-TIME SYSTEM SUPPORT path names. --enable-orterun-prefix-by-default is a synonym for this option. ---enable-sensors - Enable internal sensors (default: disabled). - --enable-orte-static-ports Enable orte static ports for tcp oob (default: enabled). @@ -976,10 +993,11 @@ RUN-TIME SYSTEM SUPPORT most cases. This option is only needed for special configurations. --with-pmi - Build PMI support (by default on non-Cray XE/XC systems, it is not built). - On Cray XE/XC systems, the location of pmi is detected automatically as - part of the configure process. For non-Cray systems, if the pmi2.h header - is found in addition to pmi.h, then support for PMI2 will be built. + Build PMI support (by default on non-Cray XE/XC systems, it is not + built). On Cray XE/XC systems, the location of pmi is detected + automatically as part of the configure process. For non-Cray + systems, if the pmi2.h header is found in addition to pmi.h, then + support for PMI2 will be built. --with-slurm Force the building of SLURM scheduler support. @@ -1004,36 +1022,6 @@ RUN-TIME SYSTEM SUPPORT MISCELLANEOUS SUPPORT LIBRARIES ---with-blcr= - Specify the directory where the Berkeley Labs Checkpoint / Restart - (BLCR) libraries and header files are located. This option is - generally only necessary if the BLCR headers and libraries are not - in default compiler/linker search paths. - - This option is only meaningful if the --with-ft option is also used - to active Open MPI's fault tolerance behavior. - ---with-blcr-libdir= - Look in directory for the BLCR libraries. By default, Open MPI will - look in /lib and /lib64, which - covers most cases. This option is only needed for special - configurations. - ---with-dmtcp= - Specify the directory where the Distributed MultiThreaded - Checkpointing (DMTCP) libraries and header files are located. This - option is generally only necessary if the DMTCP headers and - libraries are not in default compiler/linker search paths. - - This option is only meaningful if the --with-ft option is also used - to active Open MPI's fault tolerance behavior. - ---with-dmtcp-libdir= - Look in directory for the DMTCP libraries. By default, Open MPI - will look in /lib and /lib64, - which covers most cases. This option is only needed for special - configurations. - --with-libevent(=value) This option specifies where to find the libevent support headers and library. The following VALUEs are permitted: @@ -1046,7 +1034,7 @@ MISCELLANEOUS SUPPORT LIBRARIES installation to use By default (or if --with-libevent is specified with no VALUE), Open - MPI will build and use the copy of libeveny that it has in its + MPI will build and use the copy of libevent that it has in its source tree. However, if the VALUE is "external", Open MPI will look for the relevant libevent header file and library in default compiler / linker locations. Or, VALUE can be a directory tree @@ -1152,12 +1140,6 @@ MPI FUNCTIONALITY --enable-mpi-thread-multiple Allows the MPI thread level MPI_THREAD_MULTIPLE. - This is currently disabled by default. Enabling - this feature will automatically --enable-opal-multi-threads. - ---enable-opal-multi-threads - Enables thread lock support in the OPAL and ORTE layers. Does - not enable MPI_THREAD_MULTIPLE - see above option for that feature. This is currently disabled by default. --enable-mpi-cxx @@ -1193,7 +1175,7 @@ MPI FUNCTIONALITY none: Synonym for "no". no: Do not build any MPI Fortran support (same as --disable-mpi-fortran). This is mutually exclusive - with building the OSHMEM Fortran interface. + with building the OpenSHMEM Fortran interface. --enable-mpi-ext(=) Enable Open MPI's non-portable API extensions. If no is @@ -1201,25 +1183,38 @@ MPI FUNCTIONALITY See "Open MPI API Extensions", below, for more details. +--disable-mpi-io + Disable built-in support for MPI-2 I/O, likely because an + externally-provided MPI I/O package will be used. Default is to use + the internal framework system that uses the ompio component and a + specially modified version of ROMIO that fits inside the romio + component + +--disable-io-romio + Disable the ROMIO MPI-IO component + --with-io-romio-flags=flags Pass flags to the ROMIO distribution configuration script. This option is usually only necessary to pass parallel-filesystem-specific preprocessor/compiler/linker flags back to the ROMIO system. +--disable-io-ompio + Disable the ompio MPI-IO component + --enable-sparse-groups Enable the usage of sparse groups. This would save memory significantly especially if you are creating large communicators. (Disabled by default) -OSHMEM FUNCTIONALITY +OPENSHMEM FUNCTIONALITY --disable-oshmem Disable building the OpenSHMEM implementation (by default, it is enabled). --disable-oshmem-fortran - Disable building only the Fortran OSHMEM bindings. Please see + Disable building only the Fortran OpenSHMEM bindings. Please see the "Compiler Notes" section herein which contains further details on known issues with various Fortran compilers. @@ -1235,11 +1230,6 @@ MISCELLANEOUS FUNCTIONALITY However, it may be necessary to disable the memory manager in order to build Open MPI statically. ---with-ft=TYPE - Specify the type of fault tolerance to enable. Options: LAM - (LAM/MPI-like), cr (Checkpoint/Restart). Fault tolerance support is - disabled unless this option is specified. - --enable-peruse Enable the PERUSE MPI data analysis interface. @@ -1383,24 +1373,38 @@ Backwards Compatibility Open MPI version Y is backwards compatible with Open MPI version X (where Y>X) if users can: - * Compile an MPI/OSHMEM application with version X, mpirun/oshrun it - with version Y, and get the same user-observable behavior. + * Compile an MPI/OpenSHMEM application with version X, mpirun/oshrun + it with version Y, and get the same user-observable behavior. * Invoke ompi_info with the same CLI options in versions X and Y and get the same user-observable behavior. Note that this definition encompasses several things: * Application Binary Interface (ABI) - * MPI / OSHMEM run time system + * MPI / OpenSHMEM run time system * mpirun / oshrun command line options * MCA parameter names / values / meanings However, this definition only applies when the same version of Open -MPI is used with all instances of the runtime and MPI / OSHMEM +MPI is used with all instances of the runtime and MPI / OpenSHMEM processes in a single MPI job. If the versions are not exactly the same everywhere, Open MPI is not guaranteed to work properly in any scenario. +Backwards compatibility tends to work best when user applications are +dynamically linked to one version of the Open MPI / OSHMEM libraries, +and can be updated at run time to link to a new version of the Open +MPI / OSHMEM libraries. + +For example, if an MPI / OSHMEM application links statically against +the libraries from Open MPI vX, then attempting to launch that +application with mpirun / oshrun from Open MPI vY is not guaranteed to +work (because it is mixing vX and vY of Open MPI in a single job). + +Similarly, if using a container technology that internally bundles all +the libraries from Open MPI vX, attempting to launch that container +with mpirun / oshrun from Open MPI vY is not guaranteed to work. + Software Version Number ----------------------- @@ -1412,7 +1416,7 @@ format. Each of the three numbers has a specific meaning: change in the code base and/or end-user functionality, and also indicate a break from backwards compatibility. Specifically: Open MPI releases with different major version numbers are not - backwards compatibile with each other. + backwards compatibale with each other. CAVEAT: This rule does not extend to versions prior to v1.10.0. Specifically: v1.10.x is not guaranteed to be backwards @@ -1451,25 +1455,14 @@ The "A.B.C" version number may optionally be followed by a Quantifier: Nightly development snapshot tarballs use a different version number scheme; they contain three distinct values: - * The most recent Git tag name on the branch from which the tarball - was created. - * An integer indicating how many Git commits have occurred since - that Git tag. - * The Git hash of the tip of the branch. + * The git branch name from which the tarball was created. + * The date/timestamp, in YYYYMMDDHHMM format. + * The hash of the git commit from which the tarball was created. For example, a snapshot tarball filename of -"openmpi-v1.8.2-57-gb9f1fd9.tar.bz2" indicates that this tarball was -created from the v1.8 branch, 57 Git commits after the "v1.8.2" tag, -specifically at Git hash gb9f1fd9. - -Open MPI's Git master branch contains a single "dev" tag. For -example, "openmpi-dev-8-gf21c349.tar.bz2" represents a snapshot -tarball created from the master branch, 8 Git commits after the "dev" -tag, specifically at Git hash gf21c349. - -The exact value of the "number of Git commits past a tag" integer is -fairly meaningless; its sole purpose is to provide an easy, -human-recognizable ordering for snapshot tarballs. +"openmpi-v2.x-201703070235-e4798fb.tar.gz" indicates that this tarball +was created from the v2.x branch, on March 7, 2017, at 2:35am GMT, +from git hash e4798fb. Shared Library Version Number ----------------------------- @@ -1519,11 +1512,11 @@ Here's how we apply those rules specifically to Open MPI: above rules: rules 4, 5, and 6 only apply to the official MPI and OpenSHMEM interfaces (functions, global variables). The rationale for this decision is that the vast majority of our users only care - about the official/public MPI/OSHMEM interfaces; we therefore want - the .so version number to reflect only changes to the official - MPI/OSHMEM APIs. Put simply: non-MPI/OSHMEM API / internal - changes to the MPI-application-facing libraries are irrelevant to - pure MPI/OSHMEM applications. + about the official/public MPI/OpenSHMEM interfaces; we therefore + want the .so version number to reflect only changes to the + official MPI/OpenSHMEM APIs. Put simply: non-MPI/OpenSHMEM API / + internal changes to the MPI-application-facing libraries are + irrelevant to pure MPI/OpenSHMEM applications. * libmpi * libmpi_mpifh @@ -1533,7 +1526,6 @@ Here's how we apply those rules specifically to Open MPI: * libmpi_cxx * libmpi_java * liboshmem - * liboshmem_java =========================================================================== @@ -1591,15 +1583,16 @@ tests: receives a few MPI messages (e.g., the ring_c program in the examples/ directory in the Open MPI distribution). -4. Use "oshrun" to launch a non-OSHMEM program across multiple nodes. +4. Use "oshrun" to launch a non-OpenSHMEM program across multiple + nodes. -5. Use "oshrun" to launch a trivial MPI program that does no OSHMEM - communication (e.g., hello_shmem.c program in the examples/ directory - in the Open MPI distribution.) +5. Use "oshrun" to launch a trivial MPI program that does no OpenSHMEM + communication (e.g., hello_shmem.c program in the examples/ + directory in the Open MPI distribution.) -6. Use "oshrun" to launch a trivial OSHMEM program that puts and gets - a few messages. (e.g., the ring_shmem.c in the examples/ directory - in the Open MPI distribution.) +6. Use "oshrun" to launch a trivial OpenSHMEM program that puts and + gets a few messages. (e.g., the ring_shmem.c in the examples/ + directory in the Open MPI distribution.) If you can run all six of these tests successfully, that is a good indication that Open MPI built and installed properly. @@ -1610,9 +1603,9 @@ Open MPI API Extensions ----------------------- Open MPI contains a framework for extending the MPI API that is -available to applications. Each extension is usually a standalone set of -functionality that is distinct from other extensions (similar to how -Open MPI's plugins are usually unrelated to each other). These +available to applications. Each extension is usually a standalone set +of functionality that is distinct from other extensions (similar to +how Open MPI's plugins are usually unrelated to each other). These extensions provide new functions and/or constants that are available to MPI applications. @@ -1622,10 +1615,8 @@ MPI implementations! Compiling the extensions ------------------------ -Open MPI extensions are not enabled by default; they must be enabled -by Open MPI's configure script. The --enable-mpi-ext command line -switch accepts a comma-delimited list of extensions to enable, or, if -it is specified without a list, all extensions are enabled. +Open MPI extensions are all enabled by default; they can be disabled +via the --disable-mpi-ext command line switch. Since extensions are meant to be used by advanced users only, this file does not document which extensions are available or what they @@ -1677,7 +1668,7 @@ Compiling Open MPI Applications ------------------------------- Open MPI provides "wrapper" compilers that should be used for -compiling MPI and OSHMEM applications: +compiling MPI and OpenSHMEM applications: C: mpicc, oshcc C++: mpiCC, oshCC (or mpic++ if your filesystem is case-insensitive) @@ -1688,7 +1679,7 @@ For example: shell$ mpicc hello_world_mpi.c -o hello_world_mpi -g shell$ -For OSHMEM applications: +For OpenSHMEM applications: shell$ oshcc hello_shmem.c -o hello_shmem -g shell$ @@ -1730,7 +1721,7 @@ configure script. They are not necessary for MPI applications, but may be used by applications that use Open MPI's lower layer support libraries. -orte: Open MPI Run-Time Environment applicaions +orte: Open MPI Run-Time Environment applications opal: Open Portable Access Layer applications =========================================================================== @@ -1788,17 +1779,18 @@ Note that the values of component parameters can be changed on the mpirun / mpiexec command line. This is explained in the section below, "The Modular Component Architecture (MCA)". -Open MPI supports oshrun to launch OSHMEM applications. For example: +Open MPI supports oshrun to launch OpenSHMEM applications. For +example: shell$ oshrun -np 2 hello_world_oshmem -OSHMEM applications may also be launched directly by resource managers -such as SLURM. For example, when OMPI is configured --with-pmi and ---with-slurm one may launch OSHMEM applications via srun: +OpenSHMEM applications may also be launched directly by resource +managers such as SLURM. For example, when OMPI is configured +--with-pmi and --with-slurm, one may launch OpenSHMEM applications via +srun: shell$ srun -N 2 hello_world_oshmem - =========================================================================== The Modular Component Architecture (MCA) @@ -1812,43 +1804,33 @@ component frameworks in Open MPI: MPI component frameworks: ------------------------- -allocator - Memory allocator -bcol - Base collective operations bml - BTL management layer -btl - MPI point-to-point Byte Transfer Layer, used for MPI - point-to-point messages on some types of networks coll - MPI collective algorithms -crcp - Checkpoint/restart coordination protocol -dpm - MPI dynamic process management fbtl - file byte transfer layer: abstraction for individual read/write operations for OMPIO fcoll - collective read and write operations for MPI I/O fs - file system functions for MPI I/O io - MPI I/O -mpool - Memory pooling mtl - Matching transport layer, used for MPI point-to-point messages on some types of networks op - Back end computations for intrinsic MPI_Op operators osc - MPI one-sided communications pml - MPI point-to-point management layer -pubsub - MPI publish/subscribe management -rcache - Memory registration cache rte - Run-time environment operations -sbgp - Collective operation sub-group sharedfp - shared file pointer operations for MPI I/O topo - MPI topology routines vprotocol - Protocols for the "v" PML -OSHMEM component frameworks: +OpenSHMEM component frameworks: ------------------------- -atomic - OSHMEM atomic operations -memheap - OSHMEM memory allocators that support the +atomic - OpenSHMEM atomic operations +memheap - OpenSHMEM memory allocators that support the PGAS memory model -scoll - OSHMEM collective operations -spml - OSHMEM "pml-like" layer: supports one-sided, +scoll - OpenSHMEM collective operations +spml - OpenSHMEM "pml-like" layer: supports one-sided, point-to-point operations -sshmem - OSHMEM shared memory backing facility +sshmem - OpenSHMEM shared memory backing facility Back-end run-time environment (RTE) component frameworks: @@ -1856,10 +1838,11 @@ Back-end run-time environment (RTE) component frameworks: dfs - Distributed file system errmgr - RTE error manager -ess - RTE environment-specfic services +ess - RTE environment-specific services filem - Remote file management grpcomm - RTE group communications iof - I/O forwarding +notifier - System-level notification support odls - OpenRTE daemon local launch subsystem oob - Out of band messaging plm - Process lifecycle management @@ -1867,28 +1850,31 @@ ras - Resource allocation system rmaps - Resource mapping system rml - RTE message layer routed - Routing table for the RML -sensor - Software and hardware health monitoring -snapc - Snapshot coordination -sstore - Distributed scalable storage +rtc - Run-time control framework +schizo - OpenRTE personality framework state - RTE state machine Miscellaneous frameworks: ------------------------- +allocator - Memory allocator backtrace - Debugging call stack backtrace support -compress - Compression algorithms -crs - Checkpoint and restart service -db - Internal database support +btl - Point-to-point Byte Transfer Layer dl - Dynamic loading library interface event - Event library (libevent) versioning support hwloc - Hardware locality (hwloc) versioning support if - OS IP interface support installdirs - Installation directory relocation services memchecker - Run-time memory checking -memcpy - Memopy copy support +memcpy - Memory copy support memory - Memory management hooks +mpool - Memory pooling +patcher - Symbol patcher hooks +pmix - Process management interface (exascale) pstat - Process status -shmem - Shared memory support (NOT related to OSHMEM) +rcache - Memory registration cache +sec - Security framework +shmem - Shared memory support (NOT related to OpenSHMEM) timer - High-resolution timers --------------------------------------------------------------------------- @@ -1905,8 +1891,8 @@ to see what its tunable parameters are. For example: shell$ ompi_info --param btl tcp -shows a some of parameters (and default values) for the tcp btl -component. +shows some of the parameters (and default values) for the tcp btl +component (use "--level 9" to show *all* the parameters; see below). Note that ompi_info only shows a small number a component's MCA parameters by default. Each MCA parameter has a "level" value from 1 @@ -1921,20 +1907,20 @@ MPI, we have interpreted these nine levels as three groups of three: 5. Application tuner / detailed 6. Application tuner / all - 7. MPI/OSHMEM developer / basic - 8. MPI/OSHMEM developer / detailed - 9. MPI/OSHMEM developer / all + 7. MPI/OpenSHMEM developer / basic + 8. MPI/OpenSHMEM developer / detailed + 9. MPI/OpenSHMEM developer / all Here's how the three sub-groups are defined: 1. End user: Generally, these are parameters that are required for correctness, meaning that someone may need to set these just to - get their MPI/OSHMEM application to run correctly. + get their MPI/OpenSHMEM application to run correctly. 2. Application tuner: Generally, these are parameters that can be used to tweak MPI application performance. - 3. MPI/OSHMEM developer: Parameters that either don't fit in the other two, - or are specifically intended for debugging / development of Open - MPI itself. + 3. MPI/OpenSHMEM developer: Parameters that either don't fit in the + other two, or are specifically intended for debugging / + development of Open MPI itself. Each sub-group is broken down into three classifications: @@ -1988,11 +1974,11 @@ passed on the mpirun command line will override an environment variable; an environment variable will override the system-wide defaults. -Each component typically activates itself when relavant. For example, -the MX component will detect that MX devices are present and will -automatically be used for MPI communications. The SLURM component -will automatically detect when running inside a SLURM job and activate -itself. And so on. +Each component typically activates itself when relevant. For example, +the usNIC component will detect that usNIC devices are present and +will automatically be used for MPI communications. The SLURM +component will automatically detect when running inside a SLURM job +and activate itself. And so on. Components can be manually activated or deactivated if necessary, of course. The most common components that are manually activated, @@ -2006,10 +1992,14 @@ comma-delimited list to the "btl" MCA parameter: shell$ mpirun --mca btl tcp,self hello_world_mpi -To add shared memory support, add "sm" into the command-delimited list -(list order does not matter): +To add shared memory support, add "vader" into the command-delimited +list (list order does not matter): - shell$ mpirun --mca btl tcp,sm,self hello_world_mpi + shell$ mpirun --mca btl tcp,vader,self hello_world_mpi + +(there is an "sm" shared memory BTL, too, but "vader" is a newer +generation of shared memory support; by default, "vader" will be used +instead of "sm") To specifically deactivate a specific component, the comma-delimited list can be prepended with a "^" to negate it: @@ -2027,7 +2017,7 @@ Common Questions Many common questions about building and using Open MPI are answered on the FAQ: - http://www.open-mpi.org/faq/ + https://www.open-mpi.org/faq/ =========================================================================== @@ -2041,24 +2031,24 @@ When submitting questions and problems, be sure to include as much extra information as possible. This web page details all the information that we request in order to provide assistance: - http://www.open-mpi.org/community/help/ + https://www.open-mpi.org/community/help/ User-level questions and comments should generally be sent to the -user's mailing list (users@open-mpi.org). Because of spam, only +user's mailing list (users@lists.open-mpi.org). Because of spam, only subscribers are allowed to post to this list (ensure that you subscribe with and post from *exactly* the same e-mail address -- joe@example.com is considered different than joe@mycomputer.example.com!). Visit this page to subscribe to the user's list: - http://www.open-mpi.org/mailman/listinfo.cgi/users + http://lists.open-mpi.org/mailman/listinfo/users Developer-level bug reports, questions, and comments should generally -be sent to the developer's mailing list (devel@open-mpi.org). Please -do not post the same question to both lists. As with the user's list, -only subscribers are allowed to post to the developer's list. Visit -the following web page to subscribe: +be sent to the developer's mailing list (devel@lists.open-mpi.org). +Please do not post the same question to both lists. As with the +user's list, only subscribers are allowed to post to the developer's +list. Visit the following web page to subscribe: - http://www.open-mpi.org/mailman/listinfo.cgi/devel + http://lists.open-mpi.org/mailman/listinfo/devel Make today an Open MPI day! diff --git a/VERSION b/VERSION index 1402b171e47..66d803be730 100644 --- a/VERSION +++ b/VERSION @@ -3,6 +3,7 @@ # Copyright (c) 2011 NVIDIA Corporation. All rights reserved. # Copyright (c) 2013 Mellanox Technologies, Inc. # All rights reserved. +# Copyright (c) 2016 IBM Corporation. All rights reserved. # This is the VERSION file for Open MPI, describing the precise # version of Open MPI in this distribution. The various components of @@ -13,8 +14,8 @@ # ... major=2 -minor=0 -release=0 +minor=1 +release=1 # greek is generally used for alpha or beta release tags. If it is # non-empty, it will be appended to the version number. It does not @@ -81,17 +82,18 @@ date="Unreleased developer copy" # Version numbers are described in the Libtool current:revision:age # format. -libmpi_so_version=0:0:0 -libmpi_cxx_so_version=0:0:0 -libmpi_mpifh_so_version=0:0:0 -libmpi_usempi_tkr_so_version=0:0:0 -libmpi_usempi_ignore_tkr_so_version=0:0:0 -libmpi_usempif08_so_version=0:0:0 -libopen_rte_so_version=0:0:0 -libopen_pal_so_version=0:0:0 -libmpi_java_so_version=0:0:0 -liboshmem_so_version=0:0:0 -liboshmem_java_so_version=0:0:0 +libmpi_so_version=30:0:10 +libmpi_cxx_so_version=30:0:10 +libmpi_mpifh_so_version=30:0:10 +libmpi_usempi_tkr_so_version=30:0:10 +libmpi_usempi_ignore_tkr_so_version=30:0:10 +libmpi_usempif08_so_version=30:0:10 + +libopen_rte_so_version=30:0:10 +libopen_pal_so_version=30:0:10 +libmpi_java_so_version=30:0:10 +liboshmem_so_version=30:0:10 +libompitrace_so_version=30:0:10 # "Common" components install standalone libraries that are run-time # linked by one or more components. So they need to be versioned as @@ -99,14 +101,13 @@ liboshmem_java_so_version=0:0:0 # components-don't-affect-the-build-system abstraction. # OMPI layer -libmca_common_cuda_so_version=0:0:0 -libmca_common_ofacm_so_version=0:0:0 -libmca_common_sm_so_version=0:0:0 -libmca_common_ugni_so_version=0:0:0 -libmca_common_verbs_so_version=0:0:0 - -# OPAL layer -libmca_opal_common_libfabric_so_version=0:0:0 # ORTE layer -libmca_common_alps_so_version=0:0:0 +libmca_orte_common_alps_so_version=30:0:10 + +# OPAL layer +libmca_opal_common_cuda_so_version=30:0:10 +libmca_opal_common_libfabric_so_version=30:0:10 +libmca_opal_common_sm_so_version=30:0:10 +libmca_opal_common_ugni_so_version=30:0:10 +libmca_opal_common_verbs_so_version=30:0:10 diff --git a/autogen.pl b/autogen.pl index 9694250aafa..42016f709bb 100755 --- a/autogen.pl +++ b/autogen.pl @@ -1,14 +1,18 @@ #!/usr/bin/env perl # -# Copyright (c) 2009-2015 Cisco Systems, Inc. All rights reserved. +# Copyright (c) 2009-2017 Cisco Systems, Inc. All rights reserved # Copyright (c) 2010 Oracle and/or its affiliates. All rights reserved. # Copyright (c) 2013 Mellanox Technologies, Inc. # All rights reserved. # Copyright (c) 2013-2014 Intel, Inc. All rights reserved. +# Copyright (c) 2015-2016 Research Organization for Information Science +# and Technology (RIST). All rights reserved. +# Copyright (c) 2015 IBM Corporation. All rights reserved. +# # $COPYRIGHT$ -# +# # Additional copyrights may follow -# +# # $HEADER$ # @@ -50,6 +54,7 @@ my $platform_arg = 0; my $include_arg = 0; my $exclude_arg = 0; +my $force_arg = 0; # Include/exclude lists my $include_list; @@ -74,7 +79,7 @@ my $patch_prog = "patch"; # Solaris "patch" doesn't understand unified diffs, and will cause # autogen.pl to hang with a "File to patch:" prompt. Default to Linux -# "patch", but use "gpatch" on Solaris. +# "patch", but use "gpatch" on Solaris. if ($^O eq "solaris") { $patch_prog = "gpatch"; } @@ -180,7 +185,6 @@ sub process_subdir { print "--- Found configure.in|ac; running autoreconf...\n"; safe_system("autoreconf -ivf"); print "--- Patching autotools output... :-(\n"; - patch_autotools_output($start); } else { my_die "Found subdir, but no autogen.sh or configure.in|ac to do anything"; } @@ -189,6 +193,9 @@ sub process_subdir { my_die "Did not generate a \"configure\" executable in $dir.\n" if (! -x "configure"); + # Fix known issues in Autotools output + patch_autotools_output($start); + # Chdir back to where we came from chdir($start); } @@ -247,7 +254,7 @@ sub mca_process_component { $found_component->{"name"} = $component; # Push the results onto the $mca_found hash array - push(@{$mca_found->{$pname}->{$framework}->{"components"}}, + push(@{$mca_found->{$pname}->{$framework}->{"components"}}, $found_component); # Is there an autogen.subdirs in here? @@ -274,7 +281,7 @@ sub ignored { $unignore .= $_ while (); close(UNIGNORE); - + $ignored = 0 if ($unignore =~ /^$username$/m || $unignore =~ /^$username\@$hostname$/m || @@ -307,13 +314,13 @@ sub mca_process_framework { # Look for component directories in this framework if (-d $dir) { $mca_found->{$pname}->{$framework}->{found} = 1; - opendir(DIR, $dir) || + opendir(DIR, $dir) || my_die "Can't open $dir directory"; foreach my $d (readdir(DIR)) { # Skip any non-directory, "base", or any dir that # begins with "." next - if (! -d "$dir/$d" || $d eq "base" || + if (! -d "$dir/$d" || $d eq "base" || substr($d, 0, 1) eq "."); # Skip any component that doesn't have a configure.m4 @@ -328,7 +335,7 @@ sub mca_process_framework { verbose "--- Found $pname / $framework / $d component\n"; - # Skip if specifically excluded + # Skip if specifically excluded if (exists($exclude_list->{$framework}) && $exclude_list->{$framework}[0] eq $d) { verbose " => Excluded\n"; @@ -415,7 +422,7 @@ sub mca_process_project { # Look for framework directories in this project my $dir = "$topdir/$pdir/mca"; if (-d $dir) { - opendir(DIR, $dir) || + opendir(DIR, $dir) || my_die "Can't open $dir directory"; my @my_dirs = readdir(DIR); @my_dirs = sort(@my_dirs); @@ -425,11 +432,28 @@ sub mca_process_project { next if (! -d "$dir/$d" || $d eq "base" || substr($d, 0, 1) eq "."); - # If this directory has a $dir.h file and a base/ + my $framework_header = "$dir/$d/$d.h"; + + # If there's a $dir/$d/autogen.options file, read it + my $ao_file = "$dir/$d/autogen.options"; + if (-r $ao_file) { + verbose "\n>>> Found $dir/$d/autogen.options file\n"; + open(IN, $ao_file) || + die "$ao_file present, but cannot open it"; + while () { + if (m/\s*framework_header\s*=\s*(.+?)\s*$/) { + verbose " Framework header entry: $1\n"; + $framework_header = "$dir/$d/$1"; + } + } + close(IN); + } + + # If this directory has a framework header and a base/ # subdirectory, or its name is "common", then it's a # framework. if ("common" eq $d || !$project->{need_base} || - (-f "$dir/$d/$d.h" && -d "$dir/$d/base")) { + (-f $framework_header && -d "$dir/$d/base")) { verbose "\n=== Found $pname / $d framework\n"; mca_process_framework($topdir, $project, $d); } @@ -493,7 +517,7 @@ sub mca_run_global { # Does this project have a configure.m4 file? push(@includes, "$pdir/configure.m4") if (exists($mca_found->{$p}->{"configure.m4"})); - + # Print out project-level info my @mykeys = keys(%{$mca_found->{$pname}}); @mykeys = sort(@mykeys); @@ -553,7 +577,7 @@ sub mca_run_global { } $m4_config_component_list =~ s/^, //; $no_config_component_list =~ s/^, //; - + $m4 .= "dnl Components in the $pname / $f framework m4_define([mca_${pname}_${f}_m4_config_component_list], [$m4_config_component_list]) m4_define([mca_${pname}_${f}_no_config_component_list], [$no_config_component_list]) @@ -576,7 +600,7 @@ sub mca_run_global { sub mpiext_process_extension { my ($topdir, $ext_prefix, $extdir) = @_; - + my $edir = "$topdir/$ext_prefix/$extdir"; return if (! -d $edir); @@ -602,13 +626,13 @@ sub mpiext_run_global { my $topdir = Cwd::cwd(); my $dir = "$topdir/$ext_prefix"; - opendir(DIR, $dir) || + opendir(DIR, $dir) || my_die "Can't open $dir directory"; foreach my $d (readdir(DIR)) { # Skip any non-directory, "base", or any dir that begins with "." next if (! -d "$dir/$d" || $d eq "base" || substr($d, 0, 1) eq "."); - + # If this directory has a configure.m4, then it's an # extension. if (-f "$dir/$d/configure.m4") { @@ -663,7 +687,7 @@ sub mpiext_run_global { sub mpicontrib_process { my ($topdir, $contrib_prefix, $contribdir) = @_; - + my $cdir = "$topdir/$contrib_prefix/$contribdir"; return if (! -d $cdir); @@ -689,13 +713,13 @@ sub mpicontrib_run_global { my $topdir = Cwd::cwd(); my $dir = "$topdir/$contrib_prefix"; - opendir(DIR, $dir) || + opendir(DIR, $dir) || my_die "Can't open $dir directory"; foreach my $d (readdir(DIR)) { # Skip any non-directory, "base", or any dir that begins with "." next if (! -d "$dir/$d" || $d eq "base" || substr($d, 0, 1) eq "."); - + # If this directory has a configure.m4, then it's an # extension. if (-f "$dir/$d/configure.m4") { @@ -821,7 +845,7 @@ sub find_and_check { if ($pn > $mn) { verbose " ==> ACCEPTED\n"; return; - } + } # If the version is lower, we're done. elsif ($pn < $mn || ($pn == $mn && $pa lt $ma)) { @@ -900,6 +924,11 @@ sub patch_autotools_output { unlink("config/ltmain.sh.rej"); } + # If there's no configure script, there's nothing else to do. + return + if (! -f "configure"); + my @verbose_out; + # Total ugh. We have to patch the configure script itself. See below # for explainations why. open(IN, "configure") || my_die "Can't open configure"; @@ -907,6 +936,7 @@ sub patch_autotools_output { $c .= $_ while(); close(IN); + my $c_orig = $c; # LT <=2.2.6b need to be patched for the PGI 10.0 fortran compiler # name (pgfortran). The following comes from the upstream LT patches: @@ -915,7 +945,7 @@ sub patch_autotools_output { # Note that that patch is part of Libtool (which is not in this OMPI # source tree); we can't fix it. So all we can do is patch the # resulting configure script. :-( - verbose "$indent_str"."Patching configure for Libtool PGI 10 fortran compiler name\n"; + push(@verbose_out, $indent_str . "Patching configure for Libtool PGI 10 fortran compiler name\n"); $c =~ s/gfortran g95 xlf95 f95 fort ifort ifc efc pgf95 lf95 ftn/gfortran g95 xlf95 f95 fort ifort ifc efc pgfortran pgf95 lf95 ftn/g; $c =~ s/pgcc\* \| pgf77\* \| pgf90\* \| pgf95\*\)/pgcc* | pgf77* | pgf90* | pgf95* | pgfortran*)/g; $c =~ s/pgf77\* \| pgf90\* \| pgf95\*\)/pgf77* | pgf90* | pgf95* | pgfortran*)/g; @@ -925,19 +955,19 @@ sub patch_autotools_output { # Libtool install; all we can do is patch the resulting configure # script. :-( The following comes from the upstream patch: # http://lists.gnu.org/archive/html/libtool-patches/2009-11/msg00016.html - verbose "$indent_str"."Patching configure for Libtool PGI version number regexps\n"; + push(@verbose_out, $indent_str . "Patching configure for Libtool PGI version number regexps\n"); $c =~ s/\*pgCC\\ \[1-5\]\* \| \*pgcpp\\ \[1-5\]\*/*pgCC\\ [1-5]\.* | *pgcpp\\ [1-5]\.*/g; # Similar issue as above -- fix the case statements that handle the Sun # Fortran version strings. # - # Note: we have to use octal escapes to match '*Sun\ F*) and the + # Note: we have to use octal escapes to match '*Sun\ F*) and the # four succeeding lines in the bourne shell switch statement. # \ = 134 # ) = 051 # * = 052 # - # Below is essentially an upstream patch for Libtool which we want + # Below is essentially an upstream patch for Libtool which we want # made available to Open MPI users running older versions of Libtool foreach my $tag (("", "_FC")) { @@ -960,16 +990,52 @@ sub patch_autotools_output { ;; "; - verbose "$indent_str"."Patching configure for Sun Studio Fortran version strings ($tag)\n"; + push(@verbose_out, $indent_str . "Patching configure for Sun Studio Fortran version strings ($tag)\n"); $c =~ s/$search_string/$replace_string/; } + # Oracle has apparently begun (as of 12.5-beta) removing the "Sun" branding. + # So this patch (cumulative over the previous one) is required. + push(@verbose_out, $indent_str . "Patching configure for Oracle Studio Fortran version strings\n"); + $c =~ s/\*Sun\*Fortran\*\)/*Sun*Fortran* | *Studio*Fortran*)/g; + $c =~ s/\*Sun\\ F\*\)(.*\n\s+tmp_sharedflag=)/*Sun\\ F* | *Studio*Fortran*)$1/g; + # See http://git.savannah.gnu.org/cgit/libtool.git/commit/?id=v2.2.6-201-g519bf91 for details # Note that this issue was fixed in LT 2.2.8, however most distros are still using 2.2.6b - verbose "$indent_str"."Patching configure for IBM xlf libtool bug\n"; + push(@verbose_out, $indent_str . "Patching configure for IBM xlf libtool bug\n"); $c =~ s/(\$LD -shared \$libobjs \$deplibs \$)compiler_flags( -soname \$soname)/$1linker_flags$2/g; + #Check if we are using a recent enough libtool that supports PowerPC little endian + if(index($c, 'powerpc64le-*linux*)') == -1) { + push(@verbose_out, $indent_str . "Patching configure for PowerPC little endian support\n"); + my $replace_string = "x86_64-*kfreebsd*-gnu|x86_64-*linux*|powerpc*-*linux*|"; + $c =~ s/x86_64-\*kfreebsd\*-gnu\|x86_64-\*linux\*\|ppc\*-\*linux\*\|powerpc\*-\*linux\*\|/$replace_string/g; + $replace_string = + "powerpc64le-*linux*)\n\t LD=\"\${LD-ld} -m elf32lppclinux\"\n\t ;;\n\t powerpc64-*linux*)"; + $c =~ s/ppc64-\*linux\*\|powerpc64-\*linux\*\)/$replace_string/g; + $replace_string = + "powerpcle-*linux*)\n\t LD=\"\${LD-ld} -m elf64lppc\"\n\t ;;\n\t powerpc-*linux*)"; + $c =~ s/ppc\*-\*linux\*\|powerpc\*-\*linux\*\)/$replace_string/g; + } + + # Fix consequence of broken libtool.m4 + # see http://lists.gnu.org/archive/html/bug-libtool/2015-07/msg00002.html and + # https://github.com/open-mpi/ompi/issues/751 + push(@verbose_out, $indent_str . "Patching configure for libtool.m4 bug\n"); + # patch for libtool < 2.4.3 + $c =~ s/# Some compilers place space between "-\{L,R\}" and the path.\n # Remove the space.\n if test \$p = \"-L\" \|\|/# Some compilers place space between "-\{L,-l,R\}" and the path.\n # Remove the spaces.\n if test \$p = \"-L\" \|\|\n test \$p = \"-l\" \|\|/g; + # patch for libtool >= 2.4.3 + $c =~ s/# Some compilers place space between "-\{L,R\}" and the path.\n # Remove the space.\n if test x-L = \"\$p\" \|\|\n test x-R = \"\$p\"\; then/# Some compilers place space between "-\{L,-l,R\}" and the path.\n # Remove the spaces.\n if test x-L = \"x\$p\" \|\|\n test x-l = \"x\$p\" \|\|\n test x-R = \"x\$p\"\; then/g; + + # Only write out verbose statements and a new configure if the + # configure content actually changed + return + if ($c eq $c_orig); + foreach my $str (@verbose_out) { + verbose($str); + } + open(OUT, ">configure.patched") || my_die "Can't open configure.patched"; print OUT $c; close(OUT); @@ -978,6 +1044,24 @@ sub patch_autotools_output { unlink("configure.patched"); } +sub in_tarball { + my $tarball = 0; + open(IN, "VERSION") || my_die "Can't open VERSION"; + # If repo_rev is not an empty string, we are in a tarball + while () { + my $line = $_; + my @fields = split(/=/,$line); + if ($fields[0] eq "repo_rev") { + if ($fields[1] ne "\n") { + $tarball = 1; + last; + } + } + } + close(IN); + return $tarball; +} + ############################################################################## ############################################################################## ## main - do the real work... @@ -995,6 +1079,7 @@ sub patch_autotools_output { "platform=s" => \$platform_arg, "include=s" => \$include_arg, "exclude=s" => \$exclude_arg, + "force|f" => \$force_arg, ); if (!$ok || $help_arg) { @@ -1014,7 +1099,9 @@ sub patch_autotools_output { will be ignored and only those specified will be marked to build --exclude | -e Comma-separated list of framework or framework-component - to be excluded from the build\n"; + to be excluded from the build + --force | -f Run even if invoked from the source tree of an expanded + distribution tarball\n"; my_exit($ok ? 0 : 1); } @@ -1066,7 +1153,7 @@ sub patch_autotools_output { $dnl_line dnl This file is automatically created by autogen.pl; it should not dnl be edited by hand!! -dnl +dnl dnl Generated by $username at " . localtime(time) . " dnl on $full_hostname. $dnl_line\n\n"; @@ -1078,6 +1165,11 @@ sub patch_autotools_output { my_die "Not at the root directory of an OMPI source tree" if (! -f "config/opal_try_assemble.m4"); +my_die "autogen.pl has been invoked in the source tree of an Open MPI distribution tarball; aborting... +You likely do not need to invoke \"autogen.pl\" -- you can probably run \"configure\" directly. +If you really know what you are doing, and really need to run autogen.pl, use the \"--force\" flag." + if (!$force_arg && in_tarball()); + # Now that we've verified that we're in the top-level OMPI directory, # set the sentinel file to remove if we abort. $sentinel = Cwd::cwd() . "/configure"; @@ -1269,7 +1361,7 @@ sub patch_autotools_output { # Remove the old m4 file and write the new one verbose "==> Writing m4 file with autogen.pl results\n"; unlink($m4_output_file); -open(M4, ">$m4_output_file") || +open(M4, ">$m4_output_file") || my_die "Can't open $m4_output_file"; print M4 $m4; close(M4); @@ -1289,6 +1381,8 @@ sub patch_autotools_output { } safe_system($cmd); +patch_autotools_output("."); + #--------------------------------------------------------------------------- verbose " diff --git a/config/Makefile.options b/config/Makefile.options index 4de360600ea..7f42e967402 100644 --- a/config/Makefile.options +++ b/config/Makefile.options @@ -6,14 +6,14 @@ # Copyright (c) 2004-2005 The University of Tennessee and The University # of Tennessee Research Foundation. All rights # reserved. -# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, # University of Stuttgart. All rights reserved. # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. # $COPYRIGHT$ -# +# # Additional copyrights may follow -# +# # $HEADER$ # diff --git a/config/c_get_alignment.m4 b/config/c_get_alignment.m4 index 89ee21f603f..fa41da89647 100644 --- a/config/c_get_alignment.m4 +++ b/config/c_get_alignment.m4 @@ -6,22 +6,22 @@ dnl Corporation. All rights reserved. dnl Copyright (c) 2004-2005 The University of Tennessee and The University dnl of Tennessee Research Foundation. All rights dnl reserved. -dnl Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +dnl Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, dnl University of Stuttgart. All rights reserved. dnl Copyright (c) 2004-2005 The Regents of the University of California. dnl All rights reserved. dnl Copyright (c) 2009 Sun Microsystems, Inc. All rights reserved. dnl Copyright (c) 2014 Intel, Inc. All rights reserved. dnl $COPYRIGHT$ -dnl +dnl dnl Additional copyrights may follow -dnl +dnl dnl $HEADER$ dnl # OPAL_C_GET_ALIGN(type, config_var) # ---------------------------------- -# Determine datatype alignment. +# Determine datatype alignment. # First arg is type, 2nd arg is config var to define. AC_DEFUN([OPAL_C_GET_ALIGNMENT],[ AC_CACHE_CHECK([alignment of $1], @@ -39,7 +39,7 @@ AC_DEFUN([OPAL_C_GET_ALIGNMENT],[ [AC_MSG_WARN([*** Problem running configure test!]) AC_MSG_WARN([*** See config.log for details.]) AC_MSG_ERROR([*** Cannot continue.])], - [ # cross compile - do a non-executable test. Trick + [ # cross compile - do a non-executable test. Trick # taken from the Autoconf 2.59c. Switch to using # AC_CHECK_ALIGNOF when we can require Autoconf 2.60. _AC_COMPUTE_INT([(long int) offsetof (opal__type_alignof_, y)], diff --git a/config/ompi_check_libfca.m4 b/config/ompi_check_libfca.m4 index 058464b420f..5605739199c 100644 --- a/config/ompi_check_libfca.m4 +++ b/config/ompi_check_libfca.m4 @@ -13,53 +13,47 @@ dnl # OMPI_CHECK_FCA(prefix, [action-if-found], [action-if-not-found]) # -------------------------------------------------------- -# check if fca support can be found. sets prefix_{CPPFLAGS, +# check if fca support can be found. sets prefix_{CPPFLAGS, # LDFLAGS, LIBS} as needed and runs action-if-found if there is # support, otherwise executes action-if-not-found AC_DEFUN([OMPI_CHECK_FCA],[ - OPAL_VAR_SCOPE_PUSH([ompi_check_fca_libdir ompi_check_fca_incdir ompi_check_fca_libs ompi_check_fca_happy CPPFLAGS_save LDFLAGS_save LIBS_save]) + OPAL_VAR_SCOPE_PUSH([ompi_check_fca_libs ompi_check_fca_happy CPPFLAGS_save LDFLAGS_save LIBS_save]) AC_ARG_WITH([fca], [AC_HELP_STRING([--with-fca(=DIR)], - [Build fca (Mellanox Fabric Collective Accelerator) support, searching for libraries in DIR])]) - OPAL_CHECK_WITHDIR([fca], [$with_fca], [lib/libfca.so]) + [Build fca (Mellanox Fabric Collective Accelerator) support, optionally adding + DIR/include and DIR/lib or DIR/lib64 to the search path for headers and libraries])]) AS_IF([test "$with_fca" != "no"], - [AS_IF([test ! -z "$with_fca" && test "$with_fca" != "yes"], - [ompi_check_fca_dir=$with_fca - ompi_check_fca_libdir="$ompi_check_fca_dir/lib" - ompi_check_fca_incdir="$ompi_check_fca_dir/include" - ompi_check_fca_libs=fca + [ompi_check_fca_libs=fca + AS_IF([test ! -z "$with_fca" && test "$with_fca" != "yes"], + [ompi_check_fca_dir=$with_fca + AC_SUBST([coll_fca_HOME], "$ompi_check_fca_dir")], + [AC_SUBST([coll_fca_HOME], "/")]) - coll_fca_extra_CPPFLAGS="-I$ompi_check_fca_incdir/fca -I$ompi_check_fca_incdir/fca_core" - AC_SUBST([coll_fca_extra_CPPFLAGS]) - AC_SUBST([coll_fca_HOME], "$ompi_check_fca_dir") + CPPFLAGS_save=$CPPFLAGS + LDFLAGS_save=$LDFLAGS + LIBS_save=$LIBS - CPPFLAGS_save=$CPPFLAGS - LDFLAGS_save=$LDFLAGS - LIBS_save=$LIBS - CPPFLAGS="$CPPFLAGS $coll_fca_extra_CPPFLAGS" + OPAL_LOG_MSG([$1_CPPFLAGS : $$1_CPPFLAGS], 1) + OPAL_LOG_MSG([$1_LDFLAGS : $$1_LDFLAGS], 1) + OPAL_LOG_MSG([$1_LIBS : $$1_LIBS], 1) - OPAL_LOG_MSG([$1_CPPFLAGS : $$1_CPPFLAGS], 1) - OPAL_LOG_MSG([$1_LDFLAGS : $$1_LDFLAGS], 1) - OPAL_LOG_MSG([$1_LIBS : $$1_LIBS], 1) + OPAL_CHECK_PACKAGE([$1], + [fca/fca_api.h], + [$ompi_check_fca_libs], + [fca_get_version], + [], + [$ompi_check_fca_dir], + [], + [ompi_check_fca_happy="yes"], + [ompi_check_fca_happy="no"]) - OPAL_CHECK_PACKAGE([$1], - [fca_api.h], - [$ompi_check_fca_libs], - [fca_get_version], - [], - [$ompi_check_fca_dir], - [$ompi_check_fca_libdir], - [ompi_check_fca_happy="yes"], - [ompi_check_fca_happy="no"]) - - CPPFLAGS=$CPPFLAGS_save - LDFLAGS=$LDFLAGS_save - LIBS=$LIBS_save], - [ompi_check_fca_happy="no"]) - ]) + CPPFLAGS=$CPPFLAGS_save + LDFLAGS=$LDFLAGS_save + LIBS=$LIBS_save], + [ompi_check_fca_happy="no"]) AS_IF([test "$ompi_check_fca_happy" = "yes" && test "$enable_progress_threads" = "yes"], [AC_MSG_WARN([fca driver does not currently support progress threads. Disabling FCA.]) diff --git a/config/ompi_check_libhcoll.m4 b/config/ompi_check_libhcoll.m4 index 165e5177629..65e6e9de1b4 100644 --- a/config/ompi_check_libhcoll.m4 +++ b/config/ompi_check_libhcoll.m4 @@ -13,56 +13,53 @@ dnl # OMPI_CHECK_HCOLL(prefix, [action-if-found], [action-if-not-found]) # -------------------------------------------------------- -# check if hcoll support can be found. sets prefix_{CPPFLAGS, +# check if hcoll support can be found. sets prefix_{CPPFLAGS, # LDFLAGS, LIBS} as needed and runs action-if-found if there is # support, otherwise executes action-if-not-found AC_DEFUN([OMPI_CHECK_HCOLL],[ - OPAL_VAR_SCOPE_PUSH([ompi_check_hcoll_dir ompi_hcoll_libdir ompi_check_hcoll_incdir ompi_check_hcoll_libs ompi_check_hcoll_happy CPPFLAGS_save LDFLAGS_save LIBS_save]) + OPAL_VAR_SCOPE_PUSH([ompi_check_hcoll_dir ompi_check_hcoll_libs ompi_check_hcoll_happy CPPFLAGS_save LDFLAGS_save LIBS_save]) AC_ARG_WITH([hcoll], [AC_HELP_STRING([--with-hcoll(=DIR)], - [Build hcoll (Mellanox Hierarchical Collectives) support, searching for libraries in DIR])]) - OPAL_CHECK_WITHDIR([hcoll], [$with_hcoll], [lib/libhcoll.so]) + [Build hcoll (Mellanox Hierarchical Collectives) support, optionally adding + DIR/include and DIR/lib or DIR/lib64 to the search path for headers and libraries])]) AS_IF([test "$with_hcoll" != "no"], - [AS_IF([test ! -z "$with_hcoll" && test "$with_hcoll" != "yes"], - [ompi_check_hcoll_dir=$with_hcoll - ompi_check_hcoll_libdir="$ompi_check_hcoll_dir/lib" - ompi_check_hcoll_incdir="$ompi_check_hcoll_dir/include" - ompi_check_hcoll_libs=hcoll + [ompi_check_hcoll_libs=hcoll + AS_IF([test ! -z "$with_hcoll" && test "$with_hcoll" != "yes"], + [ompi_check_hcoll_dir=$with_hcoll]) - coll_hcoll_extra_CPPFLAGS="-I$ompi_check_hcoll_incdir/hcoll -I$ompi_check_hcoll_incdir/hcoll/api" + CPPFLAGS_save=$CPPFLAGS + LDFLAGS_save=$LDFLAGS + LIBS_save=$LIBS - AC_SUBST([coll_hcoll_extra_CPPFLAGS]) - AC_SUBST([coll_hcoll_HOME], "$ompi_check_hcoll_dir") + OPAL_LOG_MSG([$1_CPPFLAGS : $$1_CPPFLAGS], 1) + OPAL_LOG_MSG([$1_LDFLAGS : $$1_LDFLAGS], 1) + OPAL_LOG_MSG([$1_LIBS : $$1_LIBS], 1) + OPAL_CHECK_PACKAGE([$1], + [hcoll/api/hcoll_api.h], + [$ompi_check_hcoll_libs], + [hcoll_get_version], + [], + [$ompi_check_hcoll_dir], + [], + [ompi_check_hcoll_happy="yes"], + [ompi_check_hcoll_happy="no"]) - CPPFLAGS_save=$CPPFLAGS - LDFLAGS_save=$LDFLAGS - LIBS_save=$LIBS - CPPFLAGS="$CPPFLAGS $coll_hcoll_extra_CPPFLAGS" - - OPAL_LOG_MSG([$1_CPPFLAGS : $$1_CPPFLAGS], 1) - OPAL_LOG_MSG([$1_LDFLAGS : $$1_LDFLAGS], 1) - OPAL_LOG_MSG([$1_LIBS : $$1_LIBS], 1) - - OPAL_CHECK_PACKAGE([$1], - [hcoll_api.h], - [$ompi_check_hcoll_libs], - [hcoll_get_version], - [], - [$ompi_check_hcoll_dir], - [$ompi_check_hcoll_libdir], - [ompi_check_hcoll_happy="yes"], - [ompi_check_hcoll_happy="no"]) - - CPPFLAGS=$CPPFLAGS_save - LDFLAGS=$LDFLAGS_save - LIBS=$LIBS_save], - [ompi_check_hcoll_happy="no"]) - ]) - + AS_IF([test "$ompi_check_hcoll_happy" = "yes"], + [ + CPPFLAGS=$coll_hcoll_CPPFLAGS + LDFLAGS=$coll_hcoll_LDFLAGS + LIBS=$coll_hcoll_LIBS + AC_CHECK_FUNCS(hcoll_context_free, [], []) + ], + []) + CPPFLAGS=$CPPFLAGS_save + LDFLAGS=$LDFLAGS_save + LIBS=$LIBS_save], + [ompi_check_hcoll_happy=no]) AS_IF([test "$ompi_check_hcoll_happy" = "yes" && test "$enable_progress_threads" = "yes"], [AC_MSG_WARN([hcoll driver does not currently support progress threads. Disabling HCOLL.]) diff --git a/config/ompi_check_lustre.m4 b/config/ompi_check_lustre.m4 index d089b9007ac..d27fe3bf390 100644 --- a/config/ompi_check_lustre.m4 +++ b/config/ompi_check_lustre.m4 @@ -23,7 +23,7 @@ dnl # OMPI_CHECK_LUSTRE(prefix, [action-if-found], [action-if-not-found]) # -------------------------------------------------------- -# check if LUSTRE support can be found. sets prefix_{CPPFLAGS, +# check if LUSTRE support can be found. sets prefix_{CPPFLAGS, # LDFLAGS, LIBS} as needed and runs action-if-found if there is # support, otherwise executes action-if-not-found AC_DEFUN([OMPI_CHECK_LUSTRE],[ @@ -32,7 +32,7 @@ AC_DEFUN([OMPI_CHECK_LUSTRE],[ check_lustre_LDFLAGS= check_lustre_LIBS= - check_lustre_save_LIBS="$LIBS" + check_lustre_save_LIBS="$LIBS" check_lustre_save_LDFLAGS="$LDFLAGS" check_lustre_save_CPPFLAGS="$CPPFLAGS" @@ -61,6 +61,30 @@ AC_DEFUN([OMPI_CHECK_LUSTRE],[ [$ompi_check_lustre_dir], [$ompi_check_lustre_libdir], [ompi_check_lustre_happy="yes"], [ompi_check_lustre_happy="no"]) + AC_MSG_CHECKING([for required lustre data structures]) + cat > conftest.c <]], - [[ + [[ #ifndef MXM_VERSION #error "MXM Version is less than 2.1, please upgrade" #endif @@ -75,13 +74,19 @@ AC_DEFUN([OMPI_CHECK_MXM],[ [ompi_mxm_version_ok="yes"], [ompi_mxm_version_ok="no"]) - AC_MSG_RESULT([$ompi_mxm_version_ok]) - CFLAGS=$old_CFLAGS + AC_MSG_RESULT([$ompi_mxm_version_ok]) + CFLAGS=$old_CFLAGS + + AS_IF([test "$ompi_mxm_version_ok" = "no"], [ompi_check_mxm_happy="no"]) - AS_IF([test "$ompi_mxm_version_ok" = "no"], [ompi_check_mxm_happy="no"]) + OPAL_SUMMARY_ADD([[Transports]],[[Mellanox MXM]],[$1],[$ompi_check_mxm_happy]) + fi AS_IF([test "$ompi_check_mxm_happy" = "yes"], - [$2], + [$1_LDFLAGS="[$]$1_LDFLAGS $ompi_check_mxm_LDFLAGS" + $1_LIBS="[$]$1_LIBS $ompi_check_mxm_LIBS" + $1_CPPFLAGS="[$]$1_CPPFLAGS $ompi_check_mxm_CPPFLAGS" + $2], [AS_IF([test ! -z "$with_mxm" && test "$with_mxm" != "no"], [AC_MSG_ERROR([MXM support requested but not found. Aborting])]) $3]) diff --git a/config/ompi_check_plfs.m4 b/config/ompi_check_plfs.m4 index aced3f4c363..71eff914ccd 100644 --- a/config/ompi_check_plfs.m4 +++ b/config/ompi_check_plfs.m4 @@ -23,7 +23,7 @@ dnl # OMPI_CHECK_PLFS(prefix, [action-if-found], [action-if-not-found]) # -------------------------------------------------------- -# check if PLFS support can be found. sets prefix_{CPPFLAGS, +# check if PLFS support can be found. sets prefix_{CPPFLAGS, # LDFLAGS, LIBS} as needed and runs action-if-found if there is # support, otherwise executes action-if-not-found AC_DEFUN([OMPI_CHECK_PLFS],[ @@ -32,7 +32,7 @@ AC_DEFUN([OMPI_CHECK_PLFS],[ check_plfs_LDFLAGS= check_plfs_LIBS= - check_plfs_save_LIBS="$LIBS" + check_plfs_save_LIBS="$LIBS" check_plfs_save_LDFLAGS="$LDFLAGS" check_plfs_save_CPPFLAGS="$CPPFLAGS" @@ -46,7 +46,7 @@ AC_DEFUN([OMPI_CHECK_PLFS],[ [Build Plfs support, optionally adding DIR/include, DIR/lib, and DIR/lib64 to the search path for headers and libraries])]) OPAL_CHECK_WITHDIR([plfs], [$with_plfs], [include/plfs.h]) - AC_ARG_WITH([plfs-libs], + AC_ARG_WITH([plfs-libs], [AC_HELP_STRING([--with-plfs-libs=LIBS], [Libraries to link with for plfs])]) @@ -57,29 +57,29 @@ AC_DEFUN([OMPI_CHECK_PLFS],[ temp_with_plfs_libs="$with_plfs_libs" AS_IF([test -z "$with_plfs_libs"], [with_plfs_libs="plfs pthread"]) - + # Add correct -I and -L flags AS_IF([test -d "$with_plfs/include"], [check_plfs_CPPFLAGS="-I$with_plfs/include" $1_CPPFLAGS="$check_plfs_CPPFLAGS" CPPFLAGS="$CPPFLAGS $check_plfs_CPPFLAGS"], [ompi_check_plfs_happy="no"]) - + AS_IF([test "$ompi_check_plfs_happy" = "yes"], [AS_IF([test -d "$with_plfs/lib"], [check_plfs_LDFLAGS="-L$with_plfs/lib" $1_LDFLAGS="$check_plfs_LDFLAGS" LDFLAGS="$LDFLAGS $check_plfs_LDFLAGS"], - [ompi_check_plfs_happy="no"]) + [ompi_check_plfs_happy="no"]) ],[]) - + # Try to find all the plfs libraries AS_IF([test "$ompi_check_plfs_happy" = "yes"], [ AS_IF([test -n "$with_plfs_libs"] [for lib in $with_plfs_libs ; do check_plfs_LIBS="$check_plfs_LIBS -l$lib" - done]) - + done]) + $1_LIBS="$check_plfs_LIBS" LIBS="$LIBS $check_plfs_LIBS" diff --git a/config/ompi_check_psm.m4 b/config/ompi_check_psm.m4 index 7259354b6c9..12954f40630 100644 --- a/config/ompi_check_psm.m4 +++ b/config/ompi_check_psm.m4 @@ -11,9 +11,13 @@ dnl University of Stuttgart. All rights reserved. dnl Copyright (c) 2004-2006 The Regents of the University of California. dnl All rights reserved. dnl Copyright (c) 2006 QLogic Corp. All rights reserved. -dnl Copyright (c) 2009 Cisco Systems, Inc. All rights reserved. +dnl Copyright (c) 2009-2016 Cisco Systems, Inc. All rights reserved. dnl Copyright (c) 2015 Research Organization for Information Science dnl and Technology (RIST). All rights reserved. +dnl Copyright (c) 2016 Los Alamos National Security, LLC. All rights +dnl reserved. +dnl Copyright (c) 2016 Intel Corporation. All rights reserved. +dnl dnl $COPYRIGHT$ dnl dnl Additional copyrights may follow @@ -23,50 +27,64 @@ dnl # OMPI_CHECK_PSM(prefix, [action-if-found], [action-if-not-found]) # -------------------------------------------------------- -# check if PSM support can be found. sets prefix_{CPPFLAGS, +# check if PSM support can be found. sets prefix_{CPPFLAGS, # LDFLAGS, LIBS} as needed and runs action-if-found if there is # support, otherwise executes action-if-not-found AC_DEFUN([OMPI_CHECK_PSM],[ - AC_ARG_WITH([psm], - [AC_HELP_STRING([--with-psm(=DIR)], - [Build PSM (Qlogic InfiniPath) support, optionally adding DIR/include, DIR/lib, and DIR/lib64 to the search path for headers and libraries])]) - OPAL_CHECK_WITHDIR([psm], [$with_psm], [include/psm.h]) - AC_ARG_WITH([psm-libdir], - [AC_HELP_STRING([--with-psm-libdir=DIR], - [Search for PSM (QLogic InfiniPath PSM) libraries in DIR])]) - OPAL_CHECK_WITHDIR([psm-libdir], [$with_psm_libdir], [libpsm_infinipath.*]) + if test -z "$ompi_check_psm_happy" ; then + AC_ARG_WITH([psm], + [AC_HELP_STRING([--with-psm(=DIR)], + [Build PSM (Qlogic InfiniPath) support, optionally adding DIR/include, DIR/lib, and DIR/lib64 to the search path for headers and libraries])]) + OPAL_CHECK_WITHDIR([psm], [$with_psm], [include/psm.h]) + AC_ARG_WITH([psm-libdir], + [AC_HELP_STRING([--with-psm-libdir=DIR], + [Search for PSM (QLogic InfiniPath PSM) libraries in DIR])]) + OPAL_CHECK_WITHDIR([psm-libdir], [$with_psm_libdir], [libpsm_infinipath.*]) + + ompi_check_psm_$1_save_CPPFLAGS="$CPPFLAGS" + ompi_check_psm_$1_save_LDFLAGS="$LDFLAGS" + ompi_check_psm_$1_save_LIBS="$LIBS" + + AS_IF([test "$with_psm" != "no"], + [AS_IF([test ! -z "$with_psm" && test "$with_psm" != "yes"], + [ompi_check_psm_dir="$with_psm"]) + AS_IF([test ! -z "$with_psm_libdir" && test "$with_psm_libdir" != "yes"], + [ompi_check_psm_libdir="$with_psm_libdir"]) - ompi_check_psm_$1_save_CPPFLAGS="$CPPFLAGS" - ompi_check_psm_$1_save_LDFLAGS="$LDFLAGS" - ompi_check_psm_$1_save_LIBS="$LIBS" + OPAL_CHECK_PACKAGE([ompi_check_psm], + [psm.h], + [psm_infinipath], + [psm_finalize], + [], + [$ompi_check_psm_dir], + [$ompi_check_psm_libdir], + [ompi_check_psm_happy="yes"], + [ompi_check_psm_happy="no"])], + [ompi_check_psm_happy="no"]) - AS_IF([test "$with_psm" != "no"], - [AS_IF([test ! -z "$with_psm" && test "$with_psm" != "yes"], - [ompi_check_psm_dir="$with_psm"]) - AS_IF([test ! -z "$with_psm_libdir" && test "$with_psm_libdir" != "yes"], - [ompi_check_psm_libdir="$with_psm_libdir"]) + CPPFLAGS="$ompi_check_psm_$1_save_CPPFLAGS" + LDFLAGS="$ompi_check_psm_$1_save_LDFLAGS" + LIBS="$ompi_check_psm_$1_save_LIBS" - OPAL_CHECK_PACKAGE([$1], - [psm.h], - [psm_infinipath], - [psm_finalize], - [], - [$ompi_check_psm_dir], - [$ompi_check_psm_libdir], - [ompi_check_psm_happy="yes"], - [ompi_check_psm_happy="no"])], - [ompi_check_psm_happy="no"]) + AS_IF([test "$ompi_check_psm_happy" = "yes" && test "$enable_progress_threads" = "yes"], + [AC_MSG_WARN([PSM driver does not currently support progress threads. Disabling BTL.]) + ompi_check_psm_happy="no"]) - CPPFLAGS="$ompi_check_psm_$1_save_CPPFLAGS" - LDFLAGS="$ompi_check_psm_$1_save_LDFLAGS" - LIBS="$ompi_check_psm_$1_save_LIBS" + AS_IF([test "$ompi_check_psm_happy" = "yes"], + [AC_CHECK_HEADERS( + glob.h, + [], + [AC_MSG_WARN([glob.h not found. Can not build component.]) + ompi_check_psm_happy="no"])]) - AS_IF([test "$ompi_check_psm_happy" = "yes" && test "$enable_progress_threads" = "yes"], - [AC_MSG_WARN([PSM driver does not currently support progress threads. Disabling BTL.]) - ompi_check_psm_happy="no"]) + OPAL_SUMMARY_ADD([[Transports]],[[Intel TrueScale (PSM)]],[$1],[$ompi_check_psm_happy]) + fi AS_IF([test "$ompi_check_psm_happy" = "yes"], - [$2], + [$1_LDFLAGS="[$]$1_LDFLAGS $ompi_check_psm_LDFLAGS" + $1_CPPFLAGS="[$]$1_CPPFLAGS $ompi_check_psm_CPPFLAGS" + $1_LIBS="[$]$1_LIBS $ompi_check_psm_LIBS" + $2], [AS_IF([test ! -z "$with_psm" && test "$with_psm" != "no"], [AC_MSG_ERROR([PSM support requested but not found. Aborting])]) $3]) diff --git a/config/ompi_check_psm2.m4 b/config/ompi_check_psm2.m4 new file mode 100644 index 00000000000..ff00cb54d77 --- /dev/null +++ b/config/ompi_check_psm2.m4 @@ -0,0 +1,90 @@ +# -*- shell-script -*- +# +# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana +# University Research and Technology +# Corporation. All rights reserved. +# Copyright (c) 2004-2005 The University of Tennessee and The University +# of Tennessee Research Foundation. All rights +# reserved. +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# University of Stuttgart. All rights reserved. +# Copyright (c) 2004-2006 The Regents of the University of California. +# All rights reserved. +# Copyright (c) 2006 QLogic Corp. All rights reserved. +# Copyright (c) 2009-2016 Cisco Systems, Inc. All rights reserved. +# Copyright (c) 2014 Intel Corporation. All rights reserved. +# Copyright (c) 2015 Research Organization for Information Science +# and Technology (RIST). All rights reserved. +# Copyright (c) 2016 Los Alamos National Security, LLC. All rights +# reserved. +# $COPYRIGHT$ +# +# Additional copyrights may follow +# +# $HEADER$ +# + +# OMPI_CHECK_PSM2(prefix, [action-if-found], [action-if-not-found]) +# -------------------------------------------------------- +# check if PSM2 support can be found. sets prefix_{CPPFLAGS, +# LDFLAGS, LIBS} as needed and runs action-if-found if there is +# support, otherwise executes action-if-not-found +AC_DEFUN([OMPI_CHECK_PSM2],[ + if test -z "$ompi_check_psm2_happy" ; then + AC_ARG_WITH([psm2], + [AC_HELP_STRING([--with-psm2(=DIR)], + [Build PSM2 (Intel PSM2) support, optionally adding DIR/include, DIR/lib, and DIR/lib64 to the search path for headers and libraries])]) + OPAL_CHECK_WITHDIR([psm2], [$with_psm2], [include/psm2.h]) + AC_ARG_WITH([psm2-libdir], + [AC_HELP_STRING([--with-psm2-libdir=DIR], + [Search for PSM (Intel PSM2) libraries in DIR])]) + OPAL_CHECK_WITHDIR([psm2-libdir], [$with_psm2_libdir], [libpsm2.*]) + + ompi_check_psm2_$1_save_CPPFLAGS="$CPPFLAGS" + ompi_check_psm2_$1_save_LDFLAGS="$LDFLAGS" + ompi_check_psm2_$1_save_LIBS="$LIBS" + + AS_IF([test "$with_psm2" != "no"], + [AS_IF([test ! -z "$with_psm2" && test "$with_psm2" != "yes"], + [ompi_check_psm2_dir="$with_psm2"]) + AS_IF([test ! -z "$with_psm2_libdir" && test "$with_psm2_libdir" != "yes"], + [ompi_check_psm2_libdir="$with_psm2_libdir"]) + + OPAL_CHECK_PACKAGE([ompi_check_psm2], + [psm2.h], + [psm2], + [psm2_mq_irecv2], + [], + [$ompi_check_psm2_dir], + [$ompi_check_psm2_libdir], + [ompi_check_psm2_happy="yes"], + [ompi_check_psm2_happy="no"])], + [ompi_check_psm2_happy="no"]) + + CPPFLAGS="$ompi_check_psm2_$1_save_CPPFLAGS" + LDFLAGS="$ompi_check_psm2_$1_save_LDFLAGS" + LIBS="$ompi_check_psm2_$1_save_LIBS" + + AS_IF([test "$ompi_check_psm2_happy" = "yes" && test "$enable_progress_threads" = "yes"], + [AC_MSG_WARN([PSM2 driver does not currently support progress threads. Disabling MTL.]) + ompi_check_psm2_happy="no"]) + + AS_IF([test "$ompi_check_psm2_happy" = "yes"], + [AC_CHECK_HEADERS( + glob.h, + [], + [AC_MSG_WARN([glob.h not found. Can not build component.]) + ompi_check_psm2_happy="no"])]) + + OPAL_SUMMARY_ADD([[Transports]],[[Intel Omnipath (PSM2)]],[$1],[$ompi_check_psm2_happy]) + fi + + AS_IF([test "$ompi_check_psm2_happy" = "yes"], + [$1_LDFLAGS="[$]$1_LDFLAGS $ompi_check_psm2_LDFLAGS" + $1_CPPFLAGS="[$]$1_CPPFLAGS $ompi_check_psm2_CPPFLAGS" + $1_LIBS="[$]$1_LIBS $ompi_check_psm2_LIBS" + $2], + [AS_IF([test ! -z "$with_psm2" && test "$with_psm2" != "no"], + [AC_MSG_ERROR([PSM2 support requested but not found. Aborting])]) + $3]) +]) diff --git a/config/ompi_check_pvfs2.m4 b/config/ompi_check_pvfs2.m4 index 618e145ddef..07176f6652a 100644 --- a/config/ompi_check_pvfs2.m4 +++ b/config/ompi_check_pvfs2.m4 @@ -11,7 +11,7 @@ dnl University of Stuttgart. All rights reserved. dnl Copyright (c) 2004-2006 The Regents of the University of California. dnl All rights reserved. dnl Copyright (c) 2009 Cisco Systems, Inc. All rights reserved. -dnl Copyright (c) 2008-2012 University of Houston. All rights reserved. +dnl Copyright (c) 2008-2016 University of Houston. All rights reserved. dnl Copyright (c) 2015 Research Organization for Information Science dnl and Technology (RIST). All rights reserved. dnl $COPYRIGHT$ @@ -23,7 +23,7 @@ dnl # OMPI_CHECK_PVFS2(prefix, [action-if-found], [action-if-not-found]) # -------------------------------------------------------- -# check if PVFS2 support can be found. sets prefix_{CPPFLAGS, +# check if PVFS2 support can be found. sets prefix_{CPPFLAGS, # LDFLAGS, LIBS} as needed and runs action-if-found if there is # support, otherwise executes action-if-not-found AC_DEFUN([OMPI_CHECK_PVFS2],[ @@ -32,10 +32,6 @@ AC_DEFUN([OMPI_CHECK_PVFS2],[ check_pvfs2_LDFLAGS= check_pvfs2_LIBS= - check_pvfs2_save_LIBS="$LIBS" - check_pvfs2_save_LDFLAGS="$LDFLAGS" - check_pvfs2_save_CPPFLAGS="$CPPFLAGS" - check_pvfs2_configuration="none" ompi_check_pvfs2_happy="yes" @@ -46,70 +42,27 @@ AC_DEFUN([OMPI_CHECK_PVFS2],[ [Build Pvfs2 support, optionally adding DIR/include, DIR/lib, and DIR/lib64 to the search path for headers and libraries])]) OPAL_CHECK_WITHDIR([pvfs2], [$with_pvfs2], [include/pvfs2.h]) - AC_ARG_WITH([pvfs2-libs], - [AC_HELP_STRING([--with-pvfs2-libs=LIBS], - [Libraries to link with for pvfs2])]) - - temp_with_pvfs2="$with_pvfs2" AS_IF([test -z "$with_pvfs2"], - [with_pvfs2="/usr/local"]) + [ompi_check_pvfs2_dir="/usr/local"], + [ompi_check_pvfs2_dir=$with_pvfs2]) - temp_with_pvfs2_libs="$with_pvfs2_libs" - AS_IF([test -z "$with_pvfs2_libs"], - [with_pvfs2_libs="pvfs2 pthread"]) - - # Add correct -I and -L flags - AS_IF([test -d "$with_pvfs2/include"], - [check_pvfs2_CPPFLAGS="-I$with_pvfs2/include" - $1_CPPFLAGS="$check_pvfs2_CPPFLAGS" - CPPFLAGS="$CPPFLAGS $check_pvfs2_CPPFLAGS"], - [ompi_check_pvfs2_happy="no"]) - - AS_IF([test "$ompi_check_pvfs2_happy" = "yes"], - [AS_IF([test -d "$with_pvfs2/lib"], - [check_pvfs2_LDFLAGS="-L$with_pvfs2/lib" - $1_LDFLAGS="$check_pvfs2_LDFLAGS" - LDFLAGS="$LDFLAGS $check_pvfs2_LDFLAGS"], - [ompi_check_pvfs2_happy="no"]) - ],[]) - - # Try to find all the pvfs2 libraries - AS_IF([test "$ompi_check_pvfs2_happy" = "yes"], - [ AS_IF([test -n "$with_pvfs2_libs"] - [for lib in $with_pvfs2_libs ; do - check_pvfs2_LIBS="$check_pvfs2_LIBS -l$lib" - done]) - - $1_LIBS="$check_pvfs2_LIBS" - LIBS="$LIBS $check_pvfs2_LIBS" + if test -e "$ompi_check_pvfs2_dir/lib64" ; then + ompi_check_pvfs2_libdir="$ompi_check_pvfs2_dir/lib64" + else + ompi_check_pvfs2_libdir="$ompi_check_pvfs2_dir/lib" + fi - # check for pvfs2 - AC_CHECK_HEADERS([pvfs2.h], - [AC_MSG_CHECKING([if possible to link PVFS2]) - AC_LINK_IFELSE( - [AC_LANG_PROGRAM( - [[#include - #include ]], - [[PVFS_util_resolve(NULL,NULL,NULL,0);]])], - [AC_MSG_RESULT([yes]) - ompi_check_pvfs2_happy="yes"], - [AC_MSG_RESULT([no]) - ompi_check_pvfs2_happy="no"])], - [ompi_check_pvfs2_happy="no"]) - ]) + # Add correct -I and -L flags + OPAL_CHECK_PACKAGE([$1], [pvfs2.h], [pvfs2], [PVFS_util_resolve], [], + [$ompi_check_pvfs2_dir], [$ompi_check_pvfs2_libdir], [ompi_check_pvfs2_happy="yes"], + [ompi_check_pvfs2_happy="no"]) - LDFLAGS="$check_pvfs2_save_LDFLAGS" - CPPFLAGS="$check_pvfs2_save_CPPFLAGS" - LIBS="$check_pvfs2_save_LIBS" AS_IF([test "$ompi_check_pvfs2_happy" = "yes"], [$2], [AS_IF([test ! -z "$with_pvfs2" && test "$with_pvfs2" != "no"], [echo PVFS2 support not found]) $3]) - with_pvfs2="$temp_with_pvfs2" - with_pvfs2_libs="$temp_with_pvfs2_libs" - ]) diff --git a/config/ompi_check_ucx.m4 b/config/ompi_check_ucx.m4 new file mode 100644 index 00000000000..93aceebca3e --- /dev/null +++ b/config/ompi_check_ucx.m4 @@ -0,0 +1,93 @@ +# -*- shell-script -*- +# +# Copyright (C) 2015 Mellanox Technologies Ltd. ALL RIGHTS RESERVED. +# Copyright (c) 2015 Research Organization for Information Science +# and Technology (RIST). All rights reserved. +# Copyright (c) 2016 Los Alamos National Security, LLC. All rights +# reserved. +# Copyright (c) 2016 Cisco Systems, Inc. All rights reserved. +# $COPYRIGHT$ +# +# Additional copyrights may follow +# +# $HEADER$ +# + +# OMPI_CHECK_UCX(prefix, [action-if-found], [action-if-not-found]) +# -------------------------------------------------------- +# check if UCX support can be found. sets prefix_{CPPFLAGS, +# LDFLAGS, LIBS} as needed and runs action-if-found if there is +# support, otherwise executes action-if-not-found +AC_DEFUN([OMPI_CHECK_UCX],[ + if test -z "$ompi_check_ucx_happy" ; then + AC_ARG_WITH([ucx], + [AC_HELP_STRING([--with-ucx(=DIR)], + [Build with Unified Communication X library support])]) + OPAL_CHECK_WITHDIR([ucx], [$with_ucx], [include/ucp/api/ucp.h]) + AC_ARG_WITH([ucx-libdir], + [AC_HELP_STRING([--with-ucx-libdir=DIR], + [Search for Unified Communication X libraries in DIR])]) + OPAL_CHECK_WITHDIR([ucx-libdir], [$with_ucx_libdir], [libucp.*]) + + ompi_check_ucx_$1_save_CPPFLAGS="$CPPFLAGS" + ompi_check_ucx_$1_save_LDFLAGS="$LDFLAGS" + ompi_check_ucx_$1_save_LIBS="$LIBS" + + AS_IF([test "$with_ucx" != "no"], + [AS_IF([test ! -z "$with_ucx" && test "$with_ucx" != "yes"], + [ + ompi_check_ucx_dir="$with_ucx" + ompi_check_ucx_libdir="$with_ucx/lib" + ]) + AS_IF([test ! -z "$with_ucx_libdir" && test "$with_ucx_libdir" != "yes"], + [ompi_check_ucx_libdir="$with_ucx_libdir"]) + + ompi_check_ucx_extra_libs="-L$ompi_check_ucx_libdir" + + OPAL_CHECK_PACKAGE([ompi_check_ucx], + [ucp/api/ucp.h], + [ucp], + [ucp_cleanup], + [$ompi_check_ucx_extra_libs], + [$ompi_check_ucx_dir], + [$ompi_check_ucx_libdir], + [ompi_check_ucx_happy="yes"], + [ompi_check_ucx_happy="no"])], + [ompi_check_ucx_happy="no"]) + + + + CPPFLAGS="$ompi_check_ucx_$1_save_CPPFLAGS" + LDFLAGS="$ompi_check_ucx_$1_save_LDFLAGS" + LIBS="$ompi_check_ucx_$1_save_LIBS" + + AC_MSG_CHECKING(for UCX version compatibility) + AC_REQUIRE_CPP + old_CPPFLAGS="$CPPFLAGS" + CPPFLAGS="$CPPFLAGS -I$ompi_check_ucx_dir/include" + AC_COMPILE_IFELSE( + [AC_LANG_PROGRAM([[#include ]], + [[ + ]])], + [ompi_ucx_version_ok="yes"], + [ompi_ucx_version_ok="no"]) + + AC_MSG_RESULT([$ompi_ucx_version_ok]) + CPPFLAGS=$old_CPPFLAGS + + AS_IF([test "$ompi_ucx_version_ok" = "no"], [ompi_check_ucx_happy="no"]) + + OPAL_SUMMARY_ADD([[Transports]],[[Open UCX]],[$1],[$ompi_check_ucx_happy]) + fi + + + AS_IF([test "$ompi_check_ucx_happy" = "yes"], + [$1_CPPFLAGS="[$]$1_CPPFLAGS $ompi_check_ucx_CPPFLAGS" + $1_LDFLAGS="[$]$1_LDFLAGS $ompi_check_ucx_LDFLAGS" + $1_LIBS="[$]$1_LIBS $ompi_check_ucx_LIBS" + $2], + [AS_IF([test ! -z "$with_ucx" && test "$with_ucx" != "no"], + [AC_MSG_ERROR([UCX support requested but not found. Aborting])]) + $3]) +]) + diff --git a/config/ompi_check_udapl.m4 b/config/ompi_check_udapl.m4 index 02dac4649d5..d45a709af67 100644 --- a/config/ompi_check_udapl.m4 +++ b/config/ompi_check_udapl.m4 @@ -22,7 +22,7 @@ dnl # OMPI_CHECK_UDAPL(prefix, [action-if-found], [action-if-not-found]) # -------------------------------------------------------- -# check if uDAPL support can be found. sets prefix_{CPPFLAGS, +# check if uDAPL support can be found. sets prefix_{CPPFLAGS, # LDFLAGS, LIBS} as needed and runs action-if-found if there is # support, otherwise executes action-if-not-found AC_DEFUN([OMPI_CHECK_UDAPL],[ @@ -44,7 +44,7 @@ AC_DEFUN([OMPI_CHECK_UDAPL],[ # Linux/OFED, you'll get a bunch of warning messages about the # providers that don't work. However, on Linux/OFED, you don't # really want to use udapl anyway; you likely really want to use - # the openib BTL (i.e., native verbs, not udapl). + # the openib BTL (i.e., native verbs, not udapl). # So after exploring many different scenarios, the least evil # solution seemed to be to disable building the udapl BTL on @@ -53,7 +53,7 @@ AC_DEFUN([OMPI_CHECK_UDAPL],[ # --with-udapl(=DIR), the udapl BTL will not be built. AS_IF([test -z "$with_udapl"], [case $host in - *linux*) + *linux*) AC_MSG_WARN([On Linux and --with-udapl was not specified]) AC_MSG_WARN([Not building the udapl BTL]) with_udapl=no @@ -84,7 +84,7 @@ dnl out we need -ldapl to link (looks like udapl over GM). ompi_check_package_$1_orig_LIBS="$$1_LIBS" AS_IF([test "$ompi_check_udapl_happy" = "yes"], - [_OPAL_CHECK_PACKAGE_HEADER([$1], + [_OPAL_CHECK_PACKAGE_HEADER([$1], [dat/udat.h], [$ompi_check_udapl_dir], [ompi_check_udapl_happy="yes"], diff --git a/config/ompi_config_files.m4 b/config/ompi_config_files.m4 index c30b485a6a7..b20ca13400e 100644 --- a/config/ompi_config_files.m4 +++ b/config/ompi_config_files.m4 @@ -2,9 +2,9 @@ # # Copyright (c) 2009-2014 Cisco Systems, Inc. All rights reserved. # $COPYRIGHT$ -# +# # Additional copyrights may follow -# +# # $HEADER$ # @@ -15,10 +15,10 @@ AC_DEFUN([OMPI_CONFIG_FILES],[ ompi/include/Makefile ompi/include/mpif.h ompi/include/mpif-config.h - + ompi/datatype/Makefile ompi/debuggers/Makefile - + ompi/mpi/c/Makefile ompi/mpi/c/profile/Makefile ompi/mpi/cxx/Makefile @@ -36,7 +36,7 @@ AC_DEFUN([OMPI_CONFIG_FILES],[ ompi/mpi/fortran/mpiext/Makefile ompi/mpi/tool/Makefile ompi/mpi/tool/profile/Makefile - + ompi/tools/ompi_info/Makefile ompi/tools/wrappers/Makefile ompi/tools/wrappers/mpicc-wrapper-data.txt diff --git a/config/ompi_config_threads.m4 b/config/ompi_config_threads.m4 index 761fc858742..4431cad7d9e 100644 --- a/config/ompi_config_threads.m4 +++ b/config/ompi_config_threads.m4 @@ -5,15 +5,15 @@ dnl Corporation. All rights reserved. dnl Copyright (c) 2004-2005 The University of Tennessee and The University dnl of Tennessee Research Foundation. All rights dnl reserved. -dnl Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +dnl Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, dnl University of Stuttgart. All rights reserved. dnl Copyright (c) 2004-2005 The Regents of the University of California. dnl All rights reserved. dnl Copyright (c) 2010 Cisco Systems, Inc. All rights reserved. dnl $COPYRIGHT$ -dnl +dnl dnl Additional copyrights may follow -dnl +dnl dnl $HEADER$ dnl @@ -35,7 +35,7 @@ AC_DEFUN([OMPI_CONFIG_THREADS],[ # --enable-mpi-thread-multiple # #if OMPI_ENABLE_THREAD_MULTIPLE == 0 /* Not available */ # #if OMPI_ENABLE_THREAD_MULTIPLE == 1 /* Available */ -# +# AC_MSG_CHECKING([if want MPI_THREAD_MULTIPLE support]) AC_ARG_ENABLE([mpi_thread_multiple], [AC_HELP_STRING([--enable-mpi-thread-multiple], diff --git a/config/ompi_configure_options.m4 b/config/ompi_configure_options.m4 index 4c0cdd44b67..d48005ab223 100644 --- a/config/ompi_configure_options.m4 +++ b/config/ompi_configure_options.m4 @@ -140,7 +140,7 @@ case "x$enable_mpi_fortran" in OMPI_MIN_REQUIRED_FORTRAN_BINDINGS=$OMPI_FORTRAN_USEMPIF08_BINDINGS OMPI_TRY_FORTRAN_BINDINGS=$OMPI_FORTRAN_USEMPIF08_BINDINGS ;; - + xno|xnone) AC_MSG_RESULT([no (none)]) OMPI_MIN_REQUIRED_FORTRAN_BINDINGS=$OMPI_FORTRAN_NO_BINDINGS @@ -166,27 +166,6 @@ case "x$enable_mpi_fortran" in ;; esac -# -# MPI profiling -# - -AC_MSG_CHECKING([if want PMPI]) -AC_ARG_ENABLE(mpi-profile, - AC_HELP_STRING([--enable-mpi-profile], - [enable MPI profiling (default: enabled)])) -if test "$enable_mpi_profile" != "no"; then - AC_MSG_RESULT([yes]) - WANT_MPI_PROFILING=1 - MPIF_H_PMPI_W_FUNCS=", PMPI_WTICK, PMPI_WTIME" -else - AC_MSG_RESULT([no]) - WANT_MPI_PROFILING=0 - MPIF_H_PMPI_W_FUNCS= -fi -AC_SUBST(MPIF_H_PMPI_W_FUNCS) -AM_CONDITIONAL(WANT_MPI_PROFILING, test "$WANT_MPI_PROFILING" = 1) - - # # C++ # @@ -276,5 +255,9 @@ AC_DEFINE_UNQUOTED([OMPI_BUILD_FORTRAN_F08_SUBARRAYS], [$OMPI_BUILD_FORTRAN_F08_SUBARRAYS], [Whether we built the 'use mpi_f08' prototype subarray-based implementation or not (i.e., whether to build the use-mpi-f08-desc prototype or the regular use-mpi-f08 implementation)]) +AC_ARG_ENABLE([io-ompio], + [AC_HELP_STRING([--disable-io-ompio], + [Disable the ompio MPI-IO component])]) + ])dnl diff --git a/config/ompi_contrib.m4 b/config/ompi_contrib.m4 index e0d503a5257..d2b19753d8a 100644 --- a/config/ompi_contrib.m4 +++ b/config/ompi_contrib.m4 @@ -6,7 +6,7 @@ dnl Corporation. All rights reserved. dnl Copyright (c) 2004-2005 The University of Tennessee and The University dnl of Tennessee Research Foundation. All rights dnl reserved. -dnl Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +dnl Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, dnl University of Stuttgart. All rights reserved. dnl Copyright (c) 2004-2005 The Regents of the University of California. dnl All rights reserved. @@ -15,9 +15,9 @@ dnl Copyright (c) 2008 Sun Microsystems, Inc. All rights reserved. dnl Copyright (c) 2015 Research Organization for Information Science dnl and Technology (RIST). All rights reserved. dnl $COPYRIGHT$ -dnl +dnl dnl Additional copyrights may follow -dnl +dnl dnl $HEADER$ dnl @@ -69,7 +69,7 @@ AC_DEFUN([OMPI_CONTRIB],[ OMPI_MPI_CONTRIBS= # Cycle through each of the software packages and - # configure them if not disabled. + # configure them if not disabled. m4_foreach(software, [ompi_mpicontrib_list], [_OMPI_CONTRIB_CONFIGURE(software)]) diff --git a/config/ompi_cxx_find_exception_flags.m4 b/config/ompi_cxx_find_exception_flags.m4 index fd543a2fc6e..d6c45855357 100644 --- a/config/ompi_cxx_find_exception_flags.m4 +++ b/config/ompi_cxx_find_exception_flags.m4 @@ -6,15 +6,15 @@ dnl Corporation. All rights reserved. dnl Copyright (c) 2004-2005 The University of Tennessee and The University dnl of Tennessee Research Foundation. All rights dnl reserved. -dnl Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +dnl Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, dnl University of Stuttgart. All rights reserved. dnl Copyright (c) 2004-2005 The Regents of the University of California. dnl All rights reserved. dnl Copyright (c) 2008 Cisco Systems, Inc. All rights reserved. dnl $COPYRIGHT$ -dnl +dnl dnl Additional copyrights may follow -dnl +dnl dnl $HEADER$ dnl @@ -37,7 +37,7 @@ AC_DEFUN([OMPI_CXX_FIND_EXCEPTION_FLAGS],[ AC_ARG_WITH(exflags, AC_HELP_STRING([--with-exflags], - [Specify flags necessary to enable C++ exceptions]), + [Specify flags necessary to enable C++ exceptions]), ompi_force_exflags="$withval") ompi_CXXFLAGS_SAVE="$CXXFLAGS" diff --git a/config/ompi_cxx_find_template_parameters.m4 b/config/ompi_cxx_find_template_parameters.m4 index ed0d6adbf45..bfa4677a15e 100644 --- a/config/ompi_cxx_find_template_parameters.m4 +++ b/config/ompi_cxx_find_template_parameters.m4 @@ -6,15 +6,15 @@ dnl Corporation. All rights reserved. dnl Copyright (c) 2004-2005 The University of Tennessee and The University dnl of Tennessee Research Foundation. All rights dnl reserved. -dnl Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +dnl Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, dnl University of Stuttgart. All rights reserved. dnl Copyright (c) 2004-2005 The Regents of the University of California. dnl All rights reserved. dnl Copyright (c) 2008 Cisco Systems, Inc. All rights reserved. dnl $COPYRIGHT$ -dnl +dnl dnl Additional copyrights may follow -dnl +dnl dnl $HEADER$ dnl @@ -29,9 +29,9 @@ AC_DEFUN([OMPI_CXX_FIND_TEMPLATE_PARAMETERS],[ # Adds to CXXFLAGS AC_MSG_CHECKING([for C++ compiler template parameters]) -if test "$BASECXX" = "KCC"; then +if test "$BASECXX" = "KCC"; then new_flags="--one_instantiation_per_object" - CXXFLAGS="$CXXFLAGS $new_flags" + CXXFLAGS="$CXXFLAGS $new_flags" else new_flags="none needed" fi diff --git a/config/ompi_cxx_find_template_repository.m4 b/config/ompi_cxx_find_template_repository.m4 index cef8911e194..47d4cf2a26a 100644 --- a/config/ompi_cxx_find_template_repository.m4 +++ b/config/ompi_cxx_find_template_repository.m4 @@ -6,16 +6,16 @@ dnl Corporation. All rights reserved. dnl Copyright (c) 2004-2005 The University of Tennessee and The University dnl of Tennessee Research Foundation. All rights dnl reserved. -dnl Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +dnl Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, dnl University of Stuttgart. All rights reserved. dnl Copyright (c) 2004-2005 The Regents of the University of California. dnl All rights reserved. dnl Copyright (c) 2015 Research Organization for Information Science dnl and Technology (RIST). All rights reserved. dnl $COPYRIGHT$ -dnl +dnl dnl Additional copyrights may follow -dnl +dnl dnl $HEADER$ dnl @@ -133,11 +133,11 @@ else # Is it a directory? if test -d "$ompi_file"; then ompi_template_dir="$ompi_file $ompi_template_dir" - + # Or is it a file? else name="`echo $ompi_file | cut -d. -f1`" - + temp_mask= if test "$name" = "main" || test "$name" = "other"; then temp_mask="`echo $ompi_file | cut -d. -f2`" diff --git a/config/ompi_cxx_have_exceptions.m4 b/config/ompi_cxx_have_exceptions.m4 index 2049e926590..2bd886e675f 100644 --- a/config/ompi_cxx_have_exceptions.m4 +++ b/config/ompi_cxx_have_exceptions.m4 @@ -6,15 +6,15 @@ dnl Corporation. All rights reserved. dnl Copyright (c) 2004-2005 The University of Tennessee and The University dnl of Tennessee Research Foundation. All rights dnl reserved. -dnl Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +dnl Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, dnl University of Stuttgart. All rights reserved. dnl Copyright (c) 2004-2005 The Regents of the University of California. dnl All rights reserved. dnl Copyright (c) 2008 Cisco Systems, Inc. All rights reserved. dnl $COPYRIGHT$ -dnl +dnl dnl Additional copyrights may follow -dnl +dnl dnl $HEADER$ dnl @@ -24,7 +24,7 @@ AC_DEFUN([OMPI_CXX_HAVE_EXCEPTIONS],[ # # Depdencies: None # -# Check to see if the C++ compiler can handle exceptions +# Check to see if the C++ compiler can handle exceptions # # Sets OMPI_CXX_EXCEPTIONS to 1 if compiler has exceptions, 0 if not # @@ -32,7 +32,7 @@ AC_DEFUN([OMPI_CXX_HAVE_EXCEPTIONS],[ AC_MSG_CHECKING([for throw/catch]) AC_LANG_SAVE AC_LANG_CPLUSPLUS -AC_COMPILE_IFELSE([AC_LANG_PROGRAM([[]], [[int i=1; throw(i);]])], +AC_COMPILE_IFELSE([AC_LANG_PROGRAM([[]], [[int i=1; throw(i);]])], OMPI_CXX_EXCEPTIONS=1, OMPI_CXX_EXCPTIONS=0) if test "$OMPI_CXX_EXCEPTIONS" = "1"; then AC_MSG_RESULT([yes]) diff --git a/config/ompi_ext.m4 b/config/ompi_ext.m4 index aa798ab3f5e..40be85af98c 100644 --- a/config/ompi_ext.m4 +++ b/config/ompi_ext.m4 @@ -38,15 +38,18 @@ AC_DEFUN([OMPI_EXT],[ # AC_ARG_ENABLE(mpi-ext, AC_HELP_STRING([--enable-mpi-ext[=LIST]], - [Comma-separated list of extensions that should be built. Possible values: ompi_mpiext_list. Example: "--enable-mpi-ext=foo,bar" will enable building the MPI extensions "foo" and "bar". If LIST is empty or the special value "all", then all available MPI extensions will be built (default: none).])) + [Comma-separated list of extensions that should be built. Possible values: ompi_mpiext_list. Example: "--enable-mpi-ext=foo,bar" will enable building the MPI extensions "foo" and "bar". If LIST is empty or the special value "all", then all available MPI extensions will be built (default: all).])) # print some nice messages about what we're about to do... AC_MSG_CHECKING([for available MPI Extensions]) AC_MSG_RESULT([ompi_mpiext_list]) AC_MSG_CHECKING([which MPI extension should be enabled]) - if test "$enable_mpi_ext" = "yes" || test "$enable_mpi_ext" = "all"; then - msg="All Extensions" + if test "$enable_mpi_ext" = "" || \ + test "$enable_mpi_ext" = "yes" || \ + test "$enable_mpi_ext" = "all"; then + enable_mpi_ext=all + msg="All Available Extensions" str="`echo ENABLE_EXT_ALL=1`" eval $str else @@ -173,7 +176,7 @@ EOF # Make an AM conditional to see whether we're building the mpi_ext # module. Note that we only build it if we support the ignore-tkr # mpi module. - AS_IF([test $OMPI_BUILD_FORTRAN_USEMPI_BINDINGS -eq 1 && \ + AS_IF([test $OMPI_BUILD_FORTRAN_BINDINGS -ge $OMPI_FORTRAN_USEMPI_BINDINGS && \ test $OMPI_FORTRAN_HAVE_IGNORE_TKR -eq 1], [OMPI_BUILD_FORTRAN_USEMPI_EXT=1], [OMPI_BUILD_FORTRAN_USEMPI_EXT=0]) @@ -210,7 +213,7 @@ EOF # Only build this mpi_f08_ext module if we're building the "use # mpi_f08" module *and* it's the non-descriptor one. - AS_IF([test $OMPI_BUILD_FORTRAN_USEMPIF08_BINDINGS -eq 1 && \ + AS_IF([test $OMPI_BUILD_FORTRAN_BINDINGS -ge $OMPI_FORTRAN_USEMPIF08_BINDINGS && \ test $OMPI_BUILD_FORTRAN_F08_SUBARRAYS -eq 0], [OMPI_BUILD_FORTRAN_USEMPIF08_EXT=1], [OMPI_BUILD_FORTRAN_USEMPIF08_EXT=0]) @@ -453,7 +456,7 @@ AC_DEFUN([EXT_PROCESS_COMPONENT],[ AC_MSG_CHECKING([if MPI Extension $component has C bindings]) - AS_IF([test ! -e "$test_header"], + AS_IF([test ! -e "$test_header" && test ! -e "$test_header.in"], [ # There *must* be C bindings AC_MSG_RESULT([no]) AC_MSG_WARN([C bindings for MPI extensions are required]) diff --git a/config/ompi_find_mpi_aint_count_offset.m4 b/config/ompi_find_mpi_aint_count_offset.m4 index b84dad355c6..d16bcc79766 100644 --- a/config/ompi_find_mpi_aint_count_offset.m4 +++ b/config/ompi_find_mpi_aint_count_offset.m4 @@ -17,6 +17,7 @@ # Copyright (c) 2009 Oak Ridge National Labs. All rights reserved. # Copyright (c) 2014 Research Organization for Information Science # and Technology (RIST). All rights reserved. +# Copyright (c) 2015 Intel, Inc. All rights reserved. # $COPYRIGHT$ # # Additional copyrights may follow @@ -29,7 +30,7 @@ AC_DEFUN([OMPI_FIND_MPI_AINT_COUNT_OFFSET],[ _OMPI_FIND_MPI_COUNT_TYPE _OMPI_FIND_MPI_OFFSET_TYPE - if test "$ompi_fortran_happy" == "1" && \ + if test "$ompi_fortran_happy" = "1" && \ test "$OMPI_TRY_FORTRAN_BINDINGS" -gt "$OMPI_FORTRAN_NO_BINDINGS"; then _OMPI_FIND_MPI_INTEGER_KIND _OMPI_FIND_MPI_ADDRESS_KIND diff --git a/config/ompi_fortran_check.m4 b/config/ompi_fortran_check.m4 index f96f1707bc3..46ecf24d0ee 100644 --- a/config/ompi_fortran_check.m4 +++ b/config/ompi_fortran_check.m4 @@ -6,7 +6,7 @@ dnl Corporation. All rights reserved. dnl Copyright (c) 2004-2005 The University of Tennessee and The University dnl of Tennessee Research Foundation. All rights dnl reserved. -dnl Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +dnl Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, dnl University of Stuttgart. All rights reserved. dnl Copyright (c) 2004-2005 The Regents of the University of California. dnl All rights reserved. @@ -14,14 +14,14 @@ dnl Copyright (c) 2011-2012 Cisco Systems, Inc. All rights reserved. dnl Copyright (c) 2015 Research Organization for Information Science dnl and Technology (RIST). All rights reserved. dnl $COPYRIGHT$ -dnl +dnl dnl Additional copyrights may follow -dnl +dnl dnl $HEADER$ dnl -# OMPI_FORTRAN_CHECK(Fortran type, c type required, types to search, +# OMPI_FORTRAN_CHECK(Fortran type, c type required, types to search, # expected size, define ompi_fortran__t or not)) #---------------------------------------------------------- # Check Fortran type, including: @@ -55,7 +55,7 @@ AC_DEFUN([OMPI_FORTRAN_CHECK], [ fi if test "$ofc_have_type" = "1"; then - # What is the size of this type? + # What is the size of this type? # NOTE: Some Fortran compilers actually will return that a # type exists even if it doesn't support it -- the compiler @@ -166,20 +166,20 @@ AC_DEFUN([OMPI_FORTRAN_CHECK], [ # AC_DEFINE_UNQUOTED), autoheader won't put them in the # AC_CONFIG_HEADER (or AM_CONFIG_HEADER, in our case). AC_DEFINE_UNQUOTED([OMPI_HAVE_FORTRAN_]m4_translit(m4_bpatsubst(m4_bpatsubst([$1], [*], []), [[^a-zA-Z0-9_]], [_]), [a-z], [A-Z]), - [$ofc_have_type], + [$ofc_have_type], [Whether we have Fortran $1 or not]) AC_DEFINE_UNQUOTED([OMPI_SIZEOF_FORTRAN_]m4_translit(m4_bpatsubst(m4_bpatsubst([$1], [*], []), [[^a-zA-Z0-9_]], [_]), [a-z], [A-Z]), - [$ofc_type_size], + [$ofc_type_size], [Size of Fortran $1]) AC_DEFINE_UNQUOTED([OMPI_ALIGNMENT_FORTRAN_]m4_translit(m4_bpatsubst(m4_bpatsubst([$1], [*], []), [[^a-zA-Z0-9_]], [_]), [a-z], [A-Z]), - [$ofc_type_alignment], + [$ofc_type_alignment], [Alignment of Fortran $1]) AC_DEFINE_UNQUOTED([OMPI_KIND_FORTRAN_]m4_translit(m4_bpatsubst(m4_bpatsubst([$1], [*], []), [[^a-zA-Z0-9_]], [_]), [a-z], [A-Z]), - [$ofc_type_kind], + [$ofc_type_kind], [Fortrn KIND number for $1]) if test "$3" != "" && test "$ofc_define_type" = "yes"; then AC_DEFINE_UNQUOTED([ompi_fortran_]m4_translit(m4_bpatsubst(m4_bpatsubst([$1], [*], []), [[^a-zA-Z0-9_]], [_]), [A-Z], [a-z])[_t], - [$ofc_c_type], + [$ofc_c_type], [C type corresponding to Fortran $1]) fi diff --git a/config/ompi_fortran_check_abstract.m4 b/config/ompi_fortran_check_abstract.m4 index 95fcb444e0c..422ce35b431 100644 --- a/config/ompi_fortran_check_abstract.m4 +++ b/config/ompi_fortran_check_abstract.m4 @@ -6,22 +6,22 @@ dnl Corporation. All rights reserved. dnl Copyright (c) 2004-2005 The University of Tennessee and The University dnl of Tennessee Research Foundation. All rights dnl reserved. -dnl Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +dnl Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, dnl University of Stuttgart. All rights reserved. dnl Copyright (c) 2004-2005 The Regents of the University of California. dnl All rights reserved. dnl Copyright (c) 2010-2014 Cisco Systems, Inc. All rights reserved. dnl $COPYRIGHT$ -dnl +dnl dnl Additional copyrights may follow -dnl +dnl dnl $HEADER$ dnl # Check whether or not the Fortran compiler supports the "abstract" # keyword in derived types or not. -# OMPI_FORTRAN_CHECK_ABSTRACT([action if found], +# OMPI_FORTRAN_CHECK_ABSTRACT([action if found], # [action if not found]) # ---------------------------------------------------- AC_DEFUN([OMPI_FORTRAN_CHECK_ABSTRACT],[ diff --git a/config/ompi_fortran_check_asynchronous.m4 b/config/ompi_fortran_check_asynchronous.m4 index 92247360140..0cc3c84bfe5 100644 --- a/config/ompi_fortran_check_asynchronous.m4 +++ b/config/ompi_fortran_check_asynchronous.m4 @@ -6,22 +6,22 @@ dnl Corporation. All rights reserved. dnl Copyright (c) 2004-2005 The University of Tennessee and The University dnl of Tennessee Research Foundation. All rights dnl reserved. -dnl Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +dnl Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, dnl University of Stuttgart. All rights reserved. dnl Copyright (c) 2004-2005 The Regents of the University of California. dnl All rights reserved. dnl Copyright (c) 2010-2014 Cisco Systems, Inc. All rights reserved. dnl $COPYRIGHT$ -dnl +dnl dnl Additional copyrights may follow -dnl +dnl dnl $HEADER$ dnl # Check whether or not the Fortran compiler supports the "asynchronous" # keyword in derived types or not. -# OMPI_FORTRAN_CHECK_ASYNCHRONOUS([action if found], +# OMPI_FORTRAN_CHECK_ASYNCHRONOUS([action if found], # [action if not found]) # ---------------------------------------------------- AC_DEFUN([OMPI_FORTRAN_CHECK_ASYNCHRONOUS],[ diff --git a/config/ompi_fortran_check_bind_c.m4 b/config/ompi_fortran_check_bind_c.m4 index 7551ae1a677..bcdf6e31872 100644 --- a/config/ompi_fortran_check_bind_c.m4 +++ b/config/ompi_fortran_check_bind_c.m4 @@ -6,15 +6,15 @@ dnl Corporation. All rights reserved. dnl Copyright (c) 2004-2005 The University of Tennessee and The University dnl of Tennessee Research Foundation. All rights dnl reserved. -dnl Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +dnl Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, dnl University of Stuttgart. All rights reserved. dnl Copyright (c) 2004-2005 The Regents of the University of California. dnl All rights reserved. dnl Copyright (c) 2010-2014 Cisco Systems, Inc. All rights reserved. dnl $COPYRIGHT$ -dnl +dnl dnl Additional copyrights may follow -dnl +dnl dnl $HEADER$ dnl diff --git a/config/ompi_fortran_check_f08_assumed_rank.m4 b/config/ompi_fortran_check_f08_assumed_rank.m4 index 4bf942f9c6f..023569a22b2 100644 --- a/config/ompi_fortran_check_f08_assumed_rank.m4 +++ b/config/ompi_fortran_check_f08_assumed_rank.m4 @@ -6,27 +6,27 @@ dnl Corporation. All rights reserved. dnl Copyright (c) 2004-2005 The University of Tennessee and The University dnl of Tennessee Research Foundation. All rights dnl reserved. -dnl Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +dnl Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, dnl University of Stuttgart. All rights reserved. dnl Copyright (c) 2004-2005 The Regents of the University of California. dnl All rights reserved. dnl Copyright (c) 2007 Los Alamos National Security, LLC. All rights -dnl reserved. +dnl reserved. dnl Copyright (c) 2007 Sun Microsystems, Inc. All rights reserved. dnl Copyright (c) 2009-2012 Cisco Systems, Inc. All rights reserved. dnl $COPYRIGHT$ -dnl +dnl dnl Additional copyrights may follow -dnl +dnl dnl $HEADER$ # Does this compiler support the Fortran 2008 assumed rank syntax? -# OMPI_FORTRAN_CHECK_F08_ASSUMED_RANK([action if found], +# OMPI_FORTRAN_CHECK_F08_ASSUMED_RANK([action if found], # [action if not found]) # ---------------------------------------------------------------- AC_DEFUN([OMPI_FORTRAN_CHECK_F08_ASSUMED_RANK], [ - AS_VAR_PUSHDEF([fortran_f08_assumed_rank], + AS_VAR_PUSHDEF([fortran_f08_assumed_rank], [ompi_cv_fortran_f08_assumed_rank]) AC_CACHE_CHECK([Fortran compiler F08 assumed rank syntax], diff --git a/config/ompi_fortran_check_ignore_tkr.m4 b/config/ompi_fortran_check_ignore_tkr.m4 index 76493fa33a0..ee69e49eb5e 100644 --- a/config/ompi_fortran_check_ignore_tkr.m4 +++ b/config/ompi_fortran_check_ignore_tkr.m4 @@ -6,18 +6,18 @@ dnl Corporation. All rights reserved. dnl Copyright (c) 2004-2005 The University of Tennessee and The University dnl of Tennessee Research Foundation. All rights dnl reserved. -dnl Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +dnl Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, dnl University of Stuttgart. All rights reserved. dnl Copyright (c) 2004-2005 The Regents of the University of California. dnl All rights reserved. dnl Copyright (c) 2007 Los Alamos National Security, LLC. All rights -dnl reserved. +dnl reserved. dnl Copyright (c) 2007 Sun Microsystems, Inc. All rights reserved. -dnl Copyright (c) 2009-2014 Cisco Systems, Inc. All rights reserved. +dnl Copyright (c) 2009-2015 Cisco Systems, Inc. All rights reserved. dnl $COPYRIGHT$ -dnl +dnl dnl Additional copyrights may follow -dnl +dnl dnl $HEADER$ # Does this compiler support (void*)-like functionality for MPI choice @@ -28,7 +28,7 @@ AC_DEFUN([OMPI_FORTRAN_CHECK_IGNORE_TKR], [ OMPI_FORTRAN_IGNORE_TKR_PREDECL= OMPI_FORTRAN_IGNORE_TKR_TYPE= - AS_VAR_PUSHDEF([fortran_ignore_tkr_data], + AS_VAR_PUSHDEF([fortran_ignore_tkr_data], [ompi_cv_fortran_ignore_tkr_data]) # Note that we can only cache 1 value at a time, but this test @@ -89,7 +89,7 @@ AC_DEFUN([_OMPI_FORTRAN_CHECK_IGNORE_TKR], [ [!DEC\$ ATTRIBUTES NO_ARG_CHECK], [happy=1], [happy=0])]) # Solaris Studio compilers - # Note that due to a compiler bug, we have been advised by Oracle to + # Note that due to a compiler bug, we have been advised by Oracle to # use the "character(*)" type AS_IF([test $happy -eq 0], [OMPI_FORTRAN_CHECK_IGNORE_TKR_SUB( @@ -109,7 +109,7 @@ AC_DEFUN([_OMPI_FORTRAN_CHECK_IGNORE_TKR], [ [!IBM* IGNORE_TKR], [happy=1], [happy=0])]) - AS_VAR_SET(fortran_ignore_tkr_data, + AS_VAR_SET(fortran_ignore_tkr_data, [${happy}:${ompi_fortran_ignore_tkr_type}:${ompi_fortran_ignore_tkr_predecl}]) # Now put the orignal CACHE_CHECK MSG_CHECKING back so that it can @@ -156,7 +156,8 @@ AC_DEFUN([OMPI_FORTRAN_CHECK_IGNORE_TKR_SUB], [ complex :: buffer3(4,4) complex, pointer, dimension(:,:) :: ptr target :: buffer3 - ptr => buffer3 + integer :: buffer4 + ptr => buffer3 ! Set some known values (somewhat irrelevant for this test, but just be ! sure that the values are initialized) @@ -173,7 +174,12 @@ AC_DEFUN([OMPI_FORTRAN_CHECK_IGNORE_TKR_SUB], [ ! Force us through an assumed shape call force_assumed_shape(buffer3, count) ! Force a pointer call through an assumed shape (!) - ptr => buffer3 + ptr => buffer3 +! Also try with a simple scalar integer +! (Intel 2016 compiler suite only partially supports GCC pragmas; +! they work with all the above buffer types, but fail with a +! simple scalar integer) + call foo(buffer4, count) end program @@ -183,7 +189,7 @@ AC_DEFUN([OMPI_FORTRAN_CHECK_IGNORE_TKR_SUB], [ call foo(a, count) end subroutine force_assumed_shape -! Autoconf puts "end" after the last line +! Autoconf puts "end" after the last line subroutine bogus ]]), [msg=yes diff --git a/config/ompi_fortran_check_logical_array.m4 b/config/ompi_fortran_check_logical_array.m4 index a358cbbb794..7a6a6291548 100644 --- a/config/ompi_fortran_check_logical_array.m4 +++ b/config/ompi_fortran_check_logical_array.m4 @@ -4,7 +4,7 @@ dnl Copyright (c) 2004-2005 The Trustees of Indiana University. dnl All rights reserved. dnl Copyright (c) 2004-2005 The Trustees of the University of Tennessee. dnl All rights reserved. -dnl Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +dnl Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, dnl University of Stuttgart. All rights reserved. dnl Copyright (c) 2004-2005 The Regents of the University of California. dnl All rights reserved. @@ -12,14 +12,14 @@ dnl Copyright (c) 2011-2012 Cisco Systems, Inc. All rights reserved. dnl Copyright (c) 2015 Research Organization for Information Science dnl and Technology (RIST). All rights reserved. dnl $COPYRIGHT$ -dnl +dnl dnl Additional copyrights may follow -dnl +dnl dnl $HEADER$ dnl AC_DEFUN([OMPI_FORTRAN_CHECK_LOGICAL_ARRAY],[ - AS_VAR_PUSHDEF([logical_array_var], + AS_VAR_PUSHDEF([logical_array_var], [ompi_cv_fortran_logical_array_correct]) AC_CACHE_CHECK([for correct handling of Fortran logical arrays], @@ -41,7 +41,7 @@ AC_DEFUN([OMPI_FORTRAN_CHECK_LOGICAL_ARRAY],[ EOF # C module - # We really need the confdefs.h Header file for + # We really need the confdefs.h Header file for # the ompi_fortran_logical_t definition if test \! -f confdefs.h ; then AC_MSG_WARN([*** Problem running configure test!]) @@ -97,7 +97,7 @@ EOF AC_MSG_ERROR([Error determining if arrays of logical values work properly.]) fi - AS_IF([test "$cross_compiling" = "yes"], + AS_IF([test "$cross_compiling" = "yes"], [ # assume we're ok value=yes], [OPAL_LOG_COMMAND([./conftest], @@ -105,7 +105,7 @@ EOF value=yes else value=no - fi], + fi], [value=no])]) fi AS_VAR_SET(logical_array_var, [$value]) diff --git a/config/ompi_fortran_check_private.m4 b/config/ompi_fortran_check_private.m4 index 2dd6b5fc46d..b9789b1db14 100644 --- a/config/ompi_fortran_check_private.m4 +++ b/config/ompi_fortran_check_private.m4 @@ -6,22 +6,22 @@ dnl Corporation. All rights reserved. dnl Copyright (c) 2004-2005 The University of Tennessee and The University dnl of Tennessee Research Foundation. All rights dnl reserved. -dnl Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +dnl Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, dnl University of Stuttgart. All rights reserved. dnl Copyright (c) 2004-2005 The Regents of the University of California. dnl All rights reserved. dnl Copyright (c) 2010-2014 Cisco Systems, Inc. All rights reserved. dnl $COPYRIGHT$ -dnl +dnl dnl Additional copyrights may follow -dnl +dnl dnl $HEADER$ dnl # Check whether or not the Fortran compiler supports the "private" # keyword in derived types or not. -# OMPI_FORTRAN_CHECK_PRIVATE([action if found], +# OMPI_FORTRAN_CHECK_PRIVATE([action if found], # [action if not found]) # ---------------------------------------------------- AC_DEFUN([OMPI_FORTRAN_CHECK_PRIVATE],[ diff --git a/config/ompi_fortran_check_procedure.m4 b/config/ompi_fortran_check_procedure.m4 index ef09dccc709..5ab68714492 100644 --- a/config/ompi_fortran_check_procedure.m4 +++ b/config/ompi_fortran_check_procedure.m4 @@ -6,22 +6,22 @@ dnl Corporation. All rights reserved. dnl Copyright (c) 2004-2005 The University of Tennessee and The University dnl of Tennessee Research Foundation. All rights dnl reserved. -dnl Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +dnl Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, dnl University of Stuttgart. All rights reserved. dnl Copyright (c) 2004-2005 The Regents of the University of California. dnl All rights reserved. dnl Copyright (c) 2010-2014 Cisco Systems, Inc. All rights reserved. dnl $COPYRIGHT$ -dnl +dnl dnl Additional copyrights may follow -dnl +dnl dnl $HEADER$ dnl # Check whether or not the Fortran compiler supports the "procedure" # keyword in derived types or not. -# OMPI_FORTRAN_CHECK_PROCEDURE([action if found], +# OMPI_FORTRAN_CHECK_PROCEDURE([action if found], # [action if not found]) # ---------------------------------------------------- AC_DEFUN([OMPI_FORTRAN_CHECK_PROCEDURE],[ diff --git a/config/ompi_fortran_check_protected.m4 b/config/ompi_fortran_check_protected.m4 index aacd54072f7..effdcd90579 100644 --- a/config/ompi_fortran_check_protected.m4 +++ b/config/ompi_fortran_check_protected.m4 @@ -6,22 +6,22 @@ dnl Corporation. All rights reserved. dnl Copyright (c) 2004-2005 The University of Tennessee and The University dnl of Tennessee Research Foundation. All rights dnl reserved. -dnl Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +dnl Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, dnl University of Stuttgart. All rights reserved. dnl Copyright (c) 2004-2005 The Regents of the University of California. dnl All rights reserved. dnl Copyright (c) 2010-2014 Cisco Systems, Inc. All rights reserved. dnl $COPYRIGHT$ -dnl +dnl dnl Additional copyrights may follow -dnl +dnl dnl $HEADER$ dnl # Check whether or not the Fortran compiler supports the "protected" # keyword in derived types or not. -# OMPI_FORTRAN_CHECK_PROTECTED([action if found], +# OMPI_FORTRAN_CHECK_PROTECTED([action if found], # [action if not found]) # ---------------------------------------------------- AC_DEFUN([OMPI_FORTRAN_CHECK_PROTECTED],[ diff --git a/config/ompi_fortran_check_real16_c_equiv.m4 b/config/ompi_fortran_check_real16_c_equiv.m4 index 2a40bcae04d..12e427d754b 100644 --- a/config/ompi_fortran_check_real16_c_equiv.m4 +++ b/config/ompi_fortran_check_real16_c_equiv.m4 @@ -6,18 +6,18 @@ dnl Corporation. All rights reserved. dnl Copyright (c) 2004-2005 The University of Tennessee and The University dnl of Tennessee Research Foundation. All rights dnl reserved. -dnl Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +dnl Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, dnl University of Stuttgart. All rights reserved. dnl Copyright (c) 2004-2005 The Regents of the University of California. dnl All rights reserved. -dnl Copyright (c) 2008-2013 Cisco Systems, Inc. All rights reserved. +dnl Copyright (c) 2008-2016 Cisco Systems, Inc. All rights reserved. dnl Copyright (c) 2012 Oracle and/or its affiliates. All rights reserved. dnl Copyright (c) 2015 Research Organization for Information Science dnl and Technology (RIST). All rights reserved. dnl $COPYRIGHT$ -dnl +dnl dnl Additional copyrights may follow -dnl +dnl dnl $HEADER$ dnl @@ -29,7 +29,7 @@ AC_DEFUN([OMPI_FORTRAN_CHECK_REAL16_C_EQUIV],[ AS_VAR_PUSHDEF([real16_matches_c_var], [ompi_cv_real16_c_equiv]) # We have to do this as a cache check for cross-compilation platforms - AC_CACHE_CHECK([for C type matching bit representation of REAL*16], + AC_CACHE_CHECK([for C type matching bit representation of REAL*16], real16_matches_c_var, [AS_IF([test "$OMPI_TRY_FORTRAN_BINDINGS" -gt "$OMPI_FORTRAN_NO_BINDINGS" && \ test "$OMPI_HAVE_FORTRAN_REAL16" = "1"], @@ -49,7 +49,7 @@ AC_DEFUN([OMPI_FORTRAN_CHECK_REAL16_C_EQUIV],[ [AC_MSG_CHECKING([if intel compiler _Quad == REAL*16]) CFLAGS_save="$CFLAGS" CFLAGS="$CFLAGS -Qoption,cpp,--extended_float_types" - OPAL_UNIQ([CFLAGS]) + OPAL_FLAGS_UNIQ([CFLAGS]) OMPI_FORTRAN_CHECK_REAL16_EQUIV_TYPE([_Quad], [q]) AS_IF([test "$happy" = "yes"], [OMPI_FORTRAN_REAL16_C_TYPE="_Quad" @@ -57,9 +57,9 @@ AC_DEFUN([OMPI_FORTRAN_CHECK_REAL16_C_EQUIV],[ [CFLAGS="$CFLAGS_save" AC_MSG_RESULT([does not work])]) ]) - AS_IF([test "$opal_cv_c_compiler_vendor" = "gnu" -a "$ac_cv_type___float128" = "yes"], + AS_IF([test "$opal_cv_c_compiler_vendor" = "gnu" && test "$ac_cv_type___float128" = "yes"], [AC_MSG_CHECKING([if gnu compiler __float128 == REAL*16]) - OPAL_UNIQ([CFLAGS]) + OPAL_FLAGS_UNIQ([CFLAGS]) OMPI_FORTRAN_CHECK_REAL16_EQUIV_TYPE([__float128], [q]) AS_IF([test "$happy" = "yes"], [OMPI_FORTRAN_REAL16_C_TYPE="__float128" @@ -84,7 +84,7 @@ AC_DEFUN([OMPI_FORTRAN_CHECK_REAL16_C_EQUIV],[ AS_VAR_POPDEF([real16_matches_c_var]) AS_IF([test "$ompi_real16_matches_c" = "yes"], - [define_value=1], + [define_value=1], [define_value=0 AC_MSG_WARN([MPI_REAL16 and MPI_COMPLEX32 support have been disabled])]) AC_DEFINE_UNQUOTED([OMPI_REAL16_MATCHES_C], [$define_value], diff --git a/config/ompi_fortran_check_type.m4 b/config/ompi_fortran_check_type.m4 index 6ce7e24391b..eac3eeabbc8 100644 --- a/config/ompi_fortran_check_type.m4 +++ b/config/ompi_fortran_check_type.m4 @@ -6,15 +6,15 @@ dnl Corporation. All rights reserved. dnl Copyright (c) 2004-2005 The University of Tennessee and The University dnl of Tennessee Research Foundation. All rights dnl reserved. -dnl Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +dnl Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, dnl University of Stuttgart. All rights reserved. dnl Copyright (c) 2004-2005 The Regents of the University of California. dnl All rights reserved. dnl Copyright (c) 2010-2012 Cisco Systems, Inc. All rights reserved. dnl $COPYRIGHT$ -dnl +dnl dnl Additional copyrights may follow -dnl +dnl dnl $HEADER$ dnl diff --git a/config/ompi_fortran_check_use_only.m4 b/config/ompi_fortran_check_use_only.m4 new file mode 100644 index 00000000000..c147167391d --- /dev/null +++ b/config/ompi_fortran_check_use_only.m4 @@ -0,0 +1,75 @@ +dnl -*- shell-script -*- +dnl +dnl Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana +dnl University Research and Technology +dnl Corporation. All rights reserved. +dnl Copyright (c) 2004-2005 The University of Tennessee and The University +dnl of Tennessee Research Foundation. All rights +dnl reserved. +dnl Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +dnl University of Stuttgart. All rights reserved. +dnl Copyright (c) 2004-2005 The Regents of the University of California. +dnl All rights reserved. +dnl Copyright (c) 2010-2015 Cisco Systems, Inc. All rights reserved. +dnl Copyright (c) 2015 Research Organization for Information Science +dnl and Technology (RIST). All rights reserved. +dnl $COPYRIGHT$ +dnl +dnl Additional copyrights may follow +dnl +dnl $HEADER$ +dnl + +dnl +dnl Check whether Fortran compiler supports the "only" clause properly +dnl when using modules. Specifically, if we "use a :: only foo" and "use +dnl b :: only bar", and modules a and b have a conflicting "yow" +dnl definition, it *should* be ignored because of the "only" clauses. PGI +dnl 15.7 (and probably prior versions) does not -- but only when +dnl compiling with -g (!). +dnl + +dnl OMPI_FORTRAN_CHECK_USE_ONLY([action if supported], +dnl [action if not supported]) +dnl ---------------------------------------------------- +AC_DEFUN([OMPI_FORTRAN_CHECK_USE_ONLY],[ + AS_VAR_PUSHDEF([use_only_var], [ompi_cv_fortran_use_only]) + OPAL_VAR_SCOPE_PUSH([FCFLAGS_save]) + FCFLAGS_save=$FCFLAGS + FCFLAGS="-I. $FCFLAGS" + + AC_CACHE_CHECK([if Fortran compiler supports USE...ONLY], use_only_var, + [AC_LANG_PUSH([Fortran]) + cat > aaa.f90 << EOF +MODULE aaa +INTEGER :: CMON(1) +COMMON/CMMON/CMON +INTEGER :: global_aaa +END MODULE aaa +EOF + cat > bbb.f90 << EOF +MODULE bbb +integer, bind(C, name="cmmon_") :: CMON +INTEGER :: global_bbb +END MODULE bbb +EOF + OPAL_LOG_COMMAND([$FC $FCFLAGS -c aaa.f90], + [OPAL_LOG_COMMAND([$FC $FCFLAGS -c bbb.f90], + [AC_COMPILE_IFELSE([AC_LANG_SOURCE([[PROGRAM test +USE aaa, ONLY : global_aaa +USE bbb, ONLY : global_bbb +implicit none +END PROGRAM]])], + [AS_VAR_SET(use_only_var, yes)], + [AS_VAR_SET(use_only_var, no)])], + [AS_VAR_SET(use_only_var, no)])], + [AS_VAR_SET(use_only_var, no)]) + rm -rf aaa.f90 aaa.o bbb.f90 bbb.o *.mod 2>/dev/null + AC_LANG_POP([Fortran]) + ]) + + AS_VAR_IF(use_only_var, [yes], [$1], [$2]) + FCFLAGS=$FCFLAGS_save + OPAL_VAR_SCOPE_POP + AS_VAR_POPDEF([use_only_var])dnl +]) diff --git a/config/ompi_fortran_find_ext_symbol_convention.m4 b/config/ompi_fortran_find_ext_symbol_convention.m4 index 80a8d1f907c..c073f8448f6 100644 --- a/config/ompi_fortran_find_ext_symbol_convention.m4 +++ b/config/ompi_fortran_find_ext_symbol_convention.m4 @@ -6,7 +6,7 @@ dnl Corporation. All rights reserved. dnl Copyright (c) 2004-2005 The University of Tennessee and The University dnl of Tennessee Research Foundation. All rights dnl reserved. -dnl Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +dnl Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, dnl University of Stuttgart. All rights reserved. dnl Copyright (c) 2004-2005 The Regents of the University of California. dnl All rights reserved. @@ -15,9 +15,9 @@ dnl Copyright (c) 2009-2012 Cisco Systems, Inc. All rights reserved. dnl Copyright (c) 2015 Research Organization for Information Science dnl and Technology (RIST). All rights reserved. dnl $COPYRIGHT$ -dnl +dnl dnl Additional copyrights may follow -dnl +dnl dnl $HEADER$ dnl diff --git a/config/ompi_fortran_find_module_include_flag.m4 b/config/ompi_fortran_find_module_include_flag.m4 index 132aeba1622..5b6e0debf0c 100644 --- a/config/ompi_fortran_find_module_include_flag.m4 +++ b/config/ompi_fortran_find_module_include_flag.m4 @@ -6,15 +6,15 @@ dnl Corporation. All rights reserved. dnl Copyright (c) 2004-2005 The University of Tennessee and The University dnl of Tennessee Research Foundation. All rights dnl reserved. -dnl Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +dnl Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, dnl University of Stuttgart. All rights reserved. dnl Copyright (c) 2004-2005 The Regents of the University of California. dnl All rights reserved. dnl Copyright (c) 2010-2012 Cisco Systems, Inc. All rights reserved. dnl $COPYRIGHT$ -dnl +dnl dnl Additional copyrights may follow -dnl +dnl dnl $HEADER$ dnl @@ -24,9 +24,9 @@ dnl # OMPI_FORTRAN_FIND_MODULE_INCLUDE_FLAG([action if found], [action if not found] AC_DEFUN([OMPI_FORTRAN_FIND_MODULE_INCLUDE_FLAG],[ AS_VAR_PUSHDEF([fortran_inc_var], [ompi_cv_fortran_module_include_flag]) - + OMPI_FC_MODULE_FLAG= - AC_CACHE_CHECK([for Fortran compiler module include flag], + AC_CACHE_CHECK([for Fortran compiler module include flag], fortran_inc_var, [ofi_possible_flags="-I -p -M" mkdir conftest.$$ @@ -43,7 +43,7 @@ module OMPI_MOD_FLAG end module OMPI_MOD_FLAG EOF - OPAL_LOG_COMMAND([$FC $FCFLAGS $FCFLAGS_f90 -c conftest-module.f90 $LDFLAGS $LIBS], , + OPAL_LOG_COMMAND([$FC $FCFLAGS $FCFLAGS_f90 -c conftest-module.f90 $LDFLAGS $LIBS], , [cd .. rm -rf conftest.$$ AC_MSG_RESULT([Whoops!]) diff --git a/config/ompi_fortran_get_alignment.m4 b/config/ompi_fortran_get_alignment.m4 index c8b6cf8751c..141e062d5d7 100644 --- a/config/ompi_fortran_get_alignment.m4 +++ b/config/ompi_fortran_get_alignment.m4 @@ -6,15 +6,15 @@ dnl Corporation. All rights reserved. dnl Copyright (c) 2004-2005 The University of Tennessee and The University dnl of Tennessee Research Foundation. All rights dnl reserved. -dnl Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +dnl Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, dnl University of Stuttgart. All rights reserved. dnl Copyright (c) 2004-2005 The Regents of the University of California. dnl All rights reserved. dnl Copyright (c) 2010-2012 Cisco Systems, Inc. All rights reserved. dnl $COPYRIGHT$ -dnl +dnl dnl Additional copyrights may follow -dnl +dnl dnl $HEADER$ dnl @@ -102,7 +102,7 @@ void $ompi_ac_align_fn(char *w, char *x, char *y, char *z) else if (! ((aw%8)||(ax%8)||(ay%8)||(az%8))) fprintf(f, "%d\n", 8); else if (! ((aw%4)||(ax%4)||(ay%4)||(az%4))) fprintf(f, "%d\n", 4); else if (! ((aw%2)||(ax%2)||(ay%2)||(az%2))) fprintf(f, "%d\n", 2); - else fprintf(f, "%d\n", 1); + else fprintf(f, "%d\n", 1); fclose(f); } #ifdef __cplusplus @@ -132,3 +132,48 @@ EOF AS_VAR_POPDEF([type_var])dnl OPAL_VAR_SCOPE_POP ]) + +# OMPI_FORTRAN_F08_GET_HANDLE_ALIGNMENT(type, variable to set) +# ------------------------------------------ +AC_DEFUN([OMPI_FORTRAN_F08_GET_HANDLE_ALIGNMENT],[ + # Use of m4_translit suggested by Eric Blake: + # http://lists.gnu.org/archive/html/bug-autoconf/2010-10/msg00016.html + AS_VAR_PUSHDEF([type_var], + m4_translit([[ompi_cv_fortran_alignment_$1]], [*], [p])) + + AC_CACHE_CHECK([alignment of Fortran $1], type_var, + [AC_LANG_PUSH([Fortran]) + AC_LINK_IFELSE([AC_LANG_SOURCE([[module alignment_mod +type, BIND(C) :: test_mpi_handle + integer :: MPI_VAL +end type test_mpi_handle +type(test_mpi_handle) :: t1 +type(test_mpi_handle) :: t2 +end module + +program falignment + use alignment_mod + OPEN(UNIT=10, FILE="conftestval") + if (LOC(t1) > LOC(t2)) then + write (10,'(I5)') LOC(t1)-LOC(t2) + else + write (10,'(I5)') LOC(t2)-LOC(t1) + endif + CLOSE(10) + +end program]])], + [AS_IF([test "$cross_compiling" = "yes"], + [AC_MSG_ERROR([Can not determine alignment of $1 when cross-compiling])], + [OPAL_LOG_COMMAND([./conftest], + [AS_VAR_SET(type_var, [`cat conftestval`])], + [AC_MSG_ERROR([Could not determine alignment of $1])])])], + + [AC_MSG_WARN([Could not determine alignment of $1]) + AC_MSG_WARN([See config.log for details]) + AC_MSG_ERROR([Cannot continue])]) + rm -rf conftest* *.mod 2> /dev/null + AC_LANG_POP([Fortran])]) + + AS_VAR_COPY([$2], [type_var]) + AS_VAR_POPDEF([type_var])dnl +])dnl diff --git a/config/ompi_fortran_get_handle_max.m4 b/config/ompi_fortran_get_handle_max.m4 index 8d4981ba007..819d40a3651 100644 --- a/config/ompi_fortran_get_handle_max.m4 +++ b/config/ompi_fortran_get_handle_max.m4 @@ -6,15 +6,15 @@ dnl Corporation. All rights reserved. dnl Copyright (c) 2004-2005 The University of Tennessee and The University dnl of Tennessee Research Foundation. All rights dnl reserved. -dnl Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +dnl Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, dnl University of Stuttgart. All rights reserved. dnl Copyright (c) 2004-2005 The Regents of the University of California. dnl All rights reserved. dnl Copyright (c) 2011-2012 Cisco Systems, Inc. All rights reserved. dnl $COPYRIGHT$ -dnl +dnl dnl Additional copyrights may follow -dnl +dnl dnl $HEADER$ dnl @@ -24,13 +24,13 @@ dnl # min(INT_MAX, max fortran INTEGER). This represents the maximum # number of fortran MPI handle index. AC_DEFUN([OMPI_FORTRAN_GET_HANDLE_MAX],[ - AS_VAR_PUSHDEF([fortran_handle_max_var], + AS_VAR_PUSHDEF([fortran_handle_max_var], [ompi_cv_fortran_handle_max]) AC_CACHE_CHECK([for max Fortran MPI handle index], fortran_handle_max_var, [ # Find max fortran INTEGER value. Set to sentinel value if we don't - # have a Fortran compiler (e.g., if --disable-fortran was given). + # have a Fortran compiler (e.g., if --disable-fortran was given). if test $ompi_fortran_happy -eq 0; then ompi_fint_max=0 else @@ -46,8 +46,8 @@ AC_DEFUN([OMPI_FORTRAN_GET_HANDLE_MAX],[ ]],[[FILE *fp = fopen("conftest.out", "w"); long cint = INT_MAX; fprintf(fp, "%ld", cint); -fclose(fp);]])], - [ompi_cint_max=`cat conftest.out`], +fclose(fp);]])], + [ompi_cint_max=`cat conftest.out`], [ompi_cint_max=0], [ #cross compiling is fun. compute INT_MAX same as INTEGER max OPAL_COMPUTE_MAX_VALUE([$ac_cv_sizeof_int], [ompi_cint_max])]) @@ -71,7 +71,7 @@ fclose(fp);]])], fi fi AS_VAR_SET(fortran_handle_max_var, [$value]) - rm -f conftest.out > /dev/null 2>&1 + rm -f conftest.out > /dev/null 2>&1 unset value]) AS_VAR_COPY([ompi_fortran_handle_max], [fortran_handle_max_var]) diff --git a/config/ompi_fortran_get_kind_value.m4 b/config/ompi_fortran_get_kind_value.m4 index 54e22ac0211..fb476596ee9 100644 --- a/config/ompi_fortran_get_kind_value.m4 +++ b/config/ompi_fortran_get_kind_value.m4 @@ -6,7 +6,7 @@ dnl Corporation. All rights reserved. dnl Copyright (c) 2004-2005 The University of Tennessee and The University dnl of Tennessee Research Foundation. All rights dnl reserved. -dnl Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +dnl Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, dnl University of Stuttgart. All rights reserved. dnl Copyright (c) 2004-2005 The Regents of the University of California. dnl All rights reserved. @@ -14,9 +14,9 @@ dnl Copyright (c) 2010-2012 Cisco Systems, Inc. All rights reserved. dnl Copyright (c) 2015 Research Organization for Information Science dnl and Technology (RIST). All rights reserved. dnl $COPYRIGHT$ -dnl +dnl dnl Additional copyrights may follow -dnl +dnl dnl $HEADER$ dnl @@ -28,7 +28,7 @@ AC_DEFUN([OMPI_FORTRAN_GET_KIND_VALUE],[ AS_VAR_PUSHDEF([kind_value_var], m4_translit([[ompi_cv_fortran_kind_value_$1]], [*], [p])) - rm -f conftest.out + rm -f conftest.out AC_CACHE_CHECK([KIND value of Fortran $1], kind_value_var, [if test $OMPI_TRY_FORTRAN_BINDINGS -eq $OMPI_FORTRAN_NO_BINDINGS || \ test $ompi_fortran_happy -eq 0; then diff --git a/config/ompi_fortran_get_sizeof.m4 b/config/ompi_fortran_get_sizeof.m4 index defd35e31fa..e25d982c58f 100644 --- a/config/ompi_fortran_get_sizeof.m4 +++ b/config/ompi_fortran_get_sizeof.m4 @@ -6,15 +6,15 @@ dnl Corporation. All rights reserved. dnl Copyright (c) 2004-2005 The University of Tennessee and The University dnl of Tennessee Research Foundation. All rights dnl reserved. -dnl Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +dnl Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, dnl University of Stuttgart. All rights reserved. dnl Copyright (c) 2004-2005 The Regents of the University of California. dnl All rights reserved. dnl Copyright (c) 2010-2012 Cisco Systems, Inc. All rights reserved. dnl $COPYRIGHT$ -dnl +dnl dnl Additional copyrights may follow -dnl +dnl dnl $HEADER$ dnl @@ -25,7 +25,7 @@ AC_DEFUN([OMPI_FORTRAN_GET_SIZEOF],[ # http://lists.gnu.org/archive/html/bug-autoconf/2010-10/msg00016.html AS_VAR_PUSHDEF([type_var], m4_translit([[ompi_cv_fortran_sizeof_$2]], [*], [p])) - + AC_CACHE_CHECK([size of Fortran $2], type_var, [OMPI_FORTRAN_MAKE_C_FUNCTION([ompi_ac_size_fn], [size]) # Fortran module diff --git a/config/ompi_fortran_get_value_true.m4 b/config/ompi_fortran_get_value_true.m4 index f14b4b8b04c..180d62c820f 100644 --- a/config/ompi_fortran_get_value_true.m4 +++ b/config/ompi_fortran_get_value_true.m4 @@ -4,7 +4,7 @@ dnl Copyright (c) 2004-2005 The Trustees of Indiana University. dnl All rights reserved. dnl Copyright (c) 2004-2005 The Trustees of the University of Tennessee. dnl All rights reserved. -dnl Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +dnl Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, dnl University of Stuttgart. All rights reserved. dnl Copyright (c) 2004-2005 The Regents of the University of California. dnl All rights reserved. @@ -12,9 +12,9 @@ dnl Copyright (c) 2011-2012 Cisco Systems, Inc. All rights reserved. dnl Copyright (c) 2015 Research Organization for Information Science dnl and Technology (RIST). All rights reserved. dnl $COPYRIGHT$ -dnl +dnl dnl Additional copyrights may follow -dnl +dnl dnl $HEADER$ dnl @@ -28,7 +28,7 @@ AC_DEFUN([OMPI_FORTRAN_GET_VALUE_TRUE],[ unset ompi_cv_fortran_true_value fi - AS_VAR_PUSHDEF([fortran_true_var], + AS_VAR_PUSHDEF([fortran_true_var], [ompi_cv_fortran_true_value]) AC_CACHE_CHECK([Fortran value for .TRUE. logical type], @@ -124,7 +124,7 @@ EOF ]) AS_VAR_COPY([ompi_fortran_true_value], [fortran_true_var]) - AC_DEFINE_UNQUOTED([OMPI_FORTRAN_VALUE_TRUE], + AC_DEFINE_UNQUOTED([OMPI_FORTRAN_VALUE_TRUE], [$ompi_fortran_true_value], [Fortran value for LOGICAL .TRUE. value]) AS_VAR_POPDEF([fortran_true_var]) diff --git a/config/ompi_interix.m4 b/config/ompi_interix.m4 index c745aad517e..7e4339a497b 100644 --- a/config/ompi_interix.m4 +++ b/config/ompi_interix.m4 @@ -4,9 +4,9 @@ dnl Copyright (c) 2008 The University of Tennessee and The University dnl of Tennessee Research Foundation. All rights dnl reserved. dnl $COPYRIGHT$ -dnl +dnl dnl Additional copyrights may follow -dnl +dnl dnl $HEADER$ dnl diff --git a/config/ompi_microsoft.m4 b/config/ompi_microsoft.m4 index 52be48d2785..b50db392c89 100644 --- a/config/ompi_microsoft.m4 +++ b/config/ompi_microsoft.m4 @@ -4,9 +4,9 @@ dnl Copyright (c) 2004-2007 The University of Tennessee and The University dnl of Tennessee Research Foundation. All rights dnl reserved. dnl $COPYRIGHT$ -dnl +dnl dnl Additional copyrights may follow -dnl +dnl dnl $HEADER$ dnl diff --git a/config/ompi_setup_contrib.m4 b/config/ompi_setup_contrib.m4 index eb143850657..58c470ef131 100644 --- a/config/ompi_setup_contrib.m4 +++ b/config/ompi_setup_contrib.m4 @@ -6,19 +6,19 @@ # Copyright (c) 2004-2005 The University of Tennessee and The University # of Tennessee Research Foundation. All rights # reserved. -# Copyright (c) 2004-2007 High Performance Computing Center Stuttgart, +# Copyright (c) 2004-2007 High Performance Computing Center Stuttgart, # University of Stuttgart. All rights reserved. # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. # Copyright (c) 2006-2009 Cisco Systems, Inc. All rights reserved. # Copyright (c) 2006-2008 Sun Microsystems, Inc. All rights reserved. # Copyright (c) 2006-2007 Los Alamos National Security, LLC. All rights -# reserved. +# reserved. # Copyright (c) 2009 Oak Ridge National Labs. All rights reserved. # $COPYRIGHT$ -# +# # Additional copyrights may follow -# +# # $HEADER$ # diff --git a/config/ompi_setup_cxx.m4 b/config/ompi_setup_cxx.m4 index eef5f1adaca..059a172aa85 100644 --- a/config/ompi_setup_cxx.m4 +++ b/config/ompi_setup_cxx.m4 @@ -11,21 +11,21 @@ dnl University of Stuttgart. All rights reserved. dnl Copyright (c) 2004-2006 The Regents of the University of California. dnl All rights reserved. dnl Copyright (c) 2006 Los Alamos National Security, LLC. All rights -dnl reserved. +dnl reserved. dnl Copyright (c) 2007-2009 Sun Microsystems, Inc. All rights reserved. dnl Copyright (c) 2008-2013 Cisco Systems, Inc. All rights reserved. dnl Copyright (c) 2015 Research Organization for Information Science dnl and Technology (RIST). All rights reserved. dnl $COPYRIGHT$ -dnl +dnl dnl Additional copyrights may follow -dnl +dnl dnl $HEADER$ dnl # This macro is necessary to get the title to be displayed first. :-) AC_DEFUN([OMPI_SETUP_CXX_BANNER],[ - opal_show_subtitle "C++ compiler and preprocessor" + opal_show_subtitle "C++ compiler and preprocessor" ]) # This macro is necessary because PROG_CXX* is REQUIREd by multiple @@ -116,7 +116,7 @@ AC_DEFUN([_OMPI_SETUP_CXX_COMPILER],[ [AS_IF([test "$ompi_cv_cxx_compiler_vendor" = "microsoft" ], [ompi_cxx_compiler_works=yes], [OPAL_CHECK_COMPILER_WORKS([C++], [#include -], +], [std::string foo = "Hello, world"], [ompi_cxx_compiler_works=yes], [ompi_cxx_compiler_works=no])])]) @@ -131,7 +131,7 @@ AC_DEFUN([_OMPI_SETUP_CXX_COMPILER],[ AC_MSG_CHECKING([if able to build the MPI C++ bindings]) AS_IF([test "$WANT_MPI_CXX_SUPPORT" = "1"], - [AC_MSG_RESULT([yes])], + [AC_MSG_RESULT([yes])], [AC_MSG_RESULT([no]) AS_IF([test "$enable_mpi_cxx" = "yes"], [AC_MSG_WARN([MPI C++ binding support requested but not delivered]) @@ -178,10 +178,10 @@ AC_DEFUN([_OMPI_SETUP_CXX_COMPILER_BACKEND],[ CXXFLAGS="$CXXFLAGS $add -Wno-long-double -fstrict-prototype" AC_CACHE_CHECK([if $CXX supports -Wno-long-double], [ompi_cv_cxx_wno_long_double], - [AC_TRY_COMPILE([], [], + [AC_TRY_COMPILE([], [], [dnl Alright, the -Wno-long-double did not produce any errors... dnl Well well, try to extract a warning regarding unrecognized or ignored options - AC_TRY_COMPILE([], [long double test;], + AC_TRY_COMPILE([], [long double test;], [ ompi_cv_cxx_wno_long_double="yes" if test -s conftest.err ; then @@ -243,7 +243,7 @@ AC_DEFUN([_OMPI_SETUP_CXX_COMPILER_BACKEND],[ * files created by your C compiler. This generally indicates either * a conflict between the options specified in CFLAGS and CXXFLAGS * or a problem with the local compiler installation. More -* information (including exactly what command was given to the +* information (including exactly what command was given to the * compilers and what error resulted when the commands were executed) is * available in the config.log file in this directory. ********************************************************************** @@ -296,7 +296,7 @@ AC_DEFUN([_OMPI_CXX_CHECK_EXCEPTIONS],[ # Check for special things due to C++ exceptions ENABLE_CXX_EXCEPTIONS=no HAVE_CXX_EXCEPTIONS=0 - AC_ARG_ENABLE([cxx-exceptions], + AC_ARG_ENABLE([cxx-exceptions], [AC_HELP_STRING([--enable-cxx-exceptions], [enable support for C++ exceptions (default: disabled)])], [ENABLE_CXX_EXCEPTIONS="$enableval"]) @@ -366,10 +366,10 @@ AC_DEFUN([_OMPI_CXX_CHECK_BUILTIN],[ AS_IF([test "$WANT_MPI_CXX_SUPPORT" = "1"], [_OMPI_CXX_CHECK_BUILTIN_BACKEND]) - AC_DEFINE_UNQUOTED([OMPI_CXX_HAVE_BUILTIN_EXPECT], + AC_DEFINE_UNQUOTED([OMPI_CXX_HAVE_BUILTIN_EXPECT], [$have_cxx_builtin_expect], [Whether C++ compiler supports __builtin_expect]) - AC_DEFINE_UNQUOTED([OMPI_CXX_HAVE_BUILTIN_PREFETCH], + AC_DEFINE_UNQUOTED([OMPI_CXX_HAVE_BUILTIN_PREFETCH], [$have_cxx_builtin_prefetch], [Whether C++ compiler supports __builtin_prefetch]) @@ -442,7 +442,7 @@ AC_DEFUN([_OMPI_CXX_CHECK_2D_CONST_CAST_BACKEND],[ [ompi_cv_cxx_supports_2d_const_cast], [AC_TRY_COMPILE([int non_const_func(int ranges[][3]); int cast_test(const int ranges[][3]) { - return non_const_func(const_cast(ranges)); + return non_const_func(const_cast(ranges)); }], [], [ompi_cv_cxx_supports_2d_const_cast="yes"], diff --git a/config/ompi_setup_fc.m4 b/config/ompi_setup_fc.m4 index 69d3b5d0802..0312ef54e34 100644 --- a/config/ompi_setup_fc.m4 +++ b/config/ompi_setup_fc.m4 @@ -6,20 +6,20 @@ dnl Corporation. All rights reserved. dnl Copyright (c) 2004-2005 The University of Tennessee and The University dnl of Tennessee Research Foundation. All rights dnl reserved. -dnl Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +dnl Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, dnl University of Stuttgart. All rights reserved. dnl Copyright (c) 2004-2005 The Regents of the University of California. dnl All rights reserved. dnl Copyright (c) 2007 Los Alamos National Security, LLC. All rights -dnl reserved. +dnl reserved. dnl Copyright (c) 2007 Sun Microsystems, Inc. All rights reserved. dnl Copyright (c) 2009-2014 Cisco Systems, Inc. All rights reserved. dnl Copyright (c) 2015 Research Organization for Information Science dnl and Technology (RIST). All rights reserved. dnl $COPYRIGHT$ -dnl +dnl dnl Additional copyrights may follow -dnl +dnl dnl $HEADER$ dnl dnl OMPI_SETUP_FC @@ -27,7 +27,7 @@ dnl # This is REQUIREd, below. AC_DEFUN_ONCE([_OMPI_SETUP_FC_BANNER],[ - opal_show_subtitle "Fortran compiler" + opal_show_subtitle "Fortran compiler" ]) ############################################################################# @@ -78,7 +78,7 @@ AC_DEFUN([OMPI_SETUP_FC],[ AS_IF([test $ompi_fc_happy -eq 1], [OPAL_CHECK_COMPILER_WORKS([Fortran], [], [], [], [AC_MSG_ERROR([Could not run a simple Fortran program. Aborting.])])]) - + # OS X before 10.3 (deployment target) does not allow undefined common # symbols in shared libraries. Because we can't figure out how to # implement MPI_STATUSES_IGNORE and friends wihtout common symbols, on @@ -125,7 +125,7 @@ AC_DEFUN([OMPI_SETUP_FC],[ AC_MSG_RESULT([none]) ;; esac - + # If we're still good, then save the extra file types. Do this last # because it implies tests that should be invoked by the above tests # (e.g., running the fortran compiler). @@ -204,7 +204,7 @@ EOF [AC_MSG_RESULT([skipped (no C++ exceptions flags)])], [FCFLAGS="$FCFLAGS $OMPI_CXX_EXCEPTIONS_CXXFLAGS" AC_LANG_PUSH([Fortran]) - AC_COMPILE_IFELSE([AC_LANG_PROGRAM([], [[ + AC_COMPILE_IFELSE([AC_LANG_PROGRAM([], [[ INTEGER I I = 3]])], [AC_MSG_RESULT([yes])], diff --git a/config/ompi_setup_java.m4 b/config/ompi_setup_java.m4 index 5fb6e5f8e7c..aeacf40907b 100644 --- a/config/ompi_setup_java.m4 +++ b/config/ompi_setup_java.m4 @@ -11,21 +11,21 @@ dnl University of Stuttgart. All rights reserved. dnl Copyright (c) 2004-2006 The Regents of the University of California. dnl All rights reserved. dnl Copyright (c) 2006-2012 Los Alamos National Security, LLC. All rights -dnl reserved. +dnl reserved. dnl Copyright (c) 2007-2012 Oracle and/or its affiliates. All rights reserved. dnl Copyright (c) 2008-2012 Cisco Systems, Inc. All rights reserved. dnl Copyright (c) 2015 Research Organization for Information Science dnl and Technology (RIST). All rights reserved. dnl $COPYRIGHT$ -dnl +dnl dnl Additional copyrights may follow -dnl +dnl dnl $HEADER$ dnl # This macro is necessary to get the title to be displayed first. :-) AC_DEFUN([OMPI_SETUP_JAVA_BINDINGS_BANNER],[ - opal_show_subtitle "Java MPI bindings" + opal_show_subtitle "Java MPI bindings" ]) # OMPI_SETUP_JAVA_BINDINGS() @@ -74,6 +74,9 @@ AC_DEFUN([OMPI_SETUP_JAVA_BINDINGS],[ # header file needs this file, so we need to check for # it/include it in our sources when compiling on Mac). AC_CHECK_HEADERS([TargetConditionals.h]) + + # dladdr and Dl_info are required to build the full path to libmpi on OS X 10.11 aka El Capitan + AC_CHECK_TYPES([Dl_info], [], [], [[#include ]]) else AC_MSG_RESULT([no]) WANT_MPI_JAVA_SUPPORT=0 diff --git a/config/ompi_setup_mpi_ext.m4 b/config/ompi_setup_mpi_ext.m4 index 9abfd9f60e4..a22ef1aa766 100644 --- a/config/ompi_setup_mpi_ext.m4 +++ b/config/ompi_setup_mpi_ext.m4 @@ -6,24 +6,24 @@ # Copyright (c) 2004-2005 The University of Tennessee and The University # of Tennessee Research Foundation. All rights # reserved. -# Copyright (c) 2004-2007 High Performance Computing Center Stuttgart, +# Copyright (c) 2004-2007 High Performance Computing Center Stuttgart, # University of Stuttgart. All rights reserved. # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. # Copyright (c) 2006-2009 Cisco Systems, Inc. All rights reserved. # Copyright (c) 2006-2008 Sun Microsystems, Inc. All rights reserved. # Copyright (c) 2006-2007 Los Alamos National Security, LLC. All rights -# reserved. +# reserved. # Copyright (c) 2009 Oak Ridge National Labs. All rights reserved. # $COPYRIGHT$ -# +# # Additional copyrights may follow -# +# # $HEADER$ # AC_DEFUN([OMPI_SETUP_MPI_EXT],[ - opal_show_title "Extended MPI interfaces setup" + opal_show_title "Extended MPI interfaces setup" OMPI_EXT ]) diff --git a/config/ompi_setup_mpi_fortran.m4 b/config/ompi_setup_mpi_fortran.m4 index 2f8150e241a..089f7c5b934 100644 --- a/config/ompi_setup_mpi_fortran.m4 +++ b/config/ompi_setup_mpi_fortran.m4 @@ -15,8 +15,9 @@ dnl Copyright (c) 2006-2008 Sun Microsystems, Inc. All rights reserved. dnl Copyright (c) 2006-2007 Los Alamos National Security, LLC. All rights dnl reserved. dnl Copyright (c) 2009 Oak Ridge National Labs. All rights reserved. -dnl Copyright (c) 2014-2015 Research Organization for Information Science +dnl Copyright (c) 2014-2016 Research Organization for Information Science dnl and Technology (RIST). All rights reserved. +dnl Copyright (c) 2016 IBM Corporation. All rights reserved. dnl $COPYRIGHT$ dnl dnl Additional copyrights may follow @@ -101,17 +102,25 @@ AC_DEFUN([OMPI_SETUP_MPI_FORTRAN],[ # AC_DEFINE these results, even in the --disable-mpi-fortran case, # for ompi_info. AC_DEFINE_UNQUOTED([OMPI_FORTRAN_DOUBLE_UNDERSCORE], - [$ompi_fortran_double_underscore], + [$ompi_fortran_double_underscore], [Whether fortran symbols have a trailing double underscore or not]) - AC_DEFINE_UNQUOTED([OMPI_FORTRAN_SINGLE_UNDERSCORE], + OMPI_FORTRAN_DOUBLE_UNDERSCORE=$ompi_fortran_double_underscore + AC_SUBST(OMPI_FORTRAN_DOUBLE_UNDERSCORE) + AC_DEFINE_UNQUOTED([OMPI_FORTRAN_SINGLE_UNDERSCORE], [$ompi_fortran_single_underscore], [Whether fortran symbols have a trailing underscore or not]) + OMPI_FORTRAN_SINGLE_UNDERSCORE=$ompi_fortran_single_underscore + AC_SUBST(OMPI_FORTRAN_SINGLE_UNDERSCORE) AC_DEFINE_UNQUOTED([OMPI_FORTRAN_CAPS], [$ompi_fortran_caps], [Whether fortran symbols are all caps or not]) - AC_DEFINE_UNQUOTED([OMPI_FORTRAN_PLAIN], + OMPI_FORTRAN_CAPS=$ompi_fortran_caps + AC_SUBST(OMPI_FORTRAN_CAPS) + AC_DEFINE_UNQUOTED([OMPI_FORTRAN_PLAIN], [$ompi_fortran_plain], [Whether fortran symbols have no trailing underscore or not]) + OMPI_FORTRAN_PLAIN=$ompi_fortran_plain + AC_SUBST(OMPI_FORTRAN_PLAIN) # Check to see if any of the MPI Fortran bindings were # specifically requested. If so, and we weren't able to setup the @@ -135,7 +144,7 @@ AC_DEFUN([OMPI_SETUP_MPI_FORTRAN],[ # regardless of whether we have F77 support or not. OMPI_FORTRAN_CHECK([CHARACTER], [yes], [char, int32_t, int, int64_t, long long, long], [-1], [yes]) - + OMPI_FORTRAN_CHECK([LOGICAL], [yes], [char, int32_t, int, int64_t, long long, long], [-1], [yes]) OMPI_FORTRAN_CHECK([LOGICAL*1], [yes], @@ -146,7 +155,7 @@ AC_DEFUN([OMPI_SETUP_MPI_FORTRAN],[ [int32_t, int, int64_t, long long, long], [4], [yes]) OMPI_FORTRAN_CHECK([LOGICAL*8], [yes], [int, int64_t, long long, long], [8], [yes]) - + OMPI_FORTRAN_CHECK([INTEGER], [yes], [int32_t, int, int64_t, long long, long], [-1], [yes]) OMPI_FORTRAN_CHECK([INTEGER*1], [no], @@ -159,7 +168,7 @@ AC_DEFUN([OMPI_SETUP_MPI_FORTRAN],[ [int, int64_t, long long, long], [8], [yes]) OMPI_FORTRAN_CHECK([INTEGER*16], [no], [int, int64_t, long long, long], [16], [yes]) - + OMPI_FORTRAN_CHECK([REAL], [yes], [float, double, long double], [-1], [yes]) OMPI_FORTRAN_CHECK([REAL*2], [no], @@ -170,16 +179,16 @@ AC_DEFUN([OMPI_SETUP_MPI_FORTRAN],[ [float, double, long double], [8], [yes]) OMPI_FORTRAN_CHECK([REAL*16], [no], [float, double, long double], [16], [yes]) - + # In some compilers, the bit representation of REAL*16 is not the same # as the C counterpart that we found. If this is the case, then we # want to disable reduction support for MPI_REAL16 (per ticket #1603). OMPI_FORTRAN_CHECK_REAL16_C_EQUIV - + OMPI_FORTRAN_CHECK([DOUBLE PRECISION], [yes], [float, double, long double], [-1], [yes]) - - OMPI_FORTRAN_CHECK([COMPLEX], [yes], [float _Complex], [-1], [no]) + + OMPI_FORTRAN_CHECK([COMPLEX], [yes], [float _Complex, double _Complex], [-1], [no]) # The complex*N tests are a bit different (note: the complex tests are # the same as all the rest, because complex is a composite of two @@ -191,39 +200,39 @@ AC_DEFUN([OMPI_SETUP_MPI_FORTRAN],[ # a) we must support real*(N/2) (i.e., compiler supports it and we # have a back-end C type for it) # b) compiler supports complex*N - + OMPI_FORTRAN_CHECK([COMPLEX*4], [no], [float _Complex], [4], [no]) - OMPI_FORTRAN_CHECK([COMPLEX*8], [no], + OMPI_FORTRAN_CHECK([COMPLEX*8], [no], [float _Complex, double _Complex, long double _Complex], [8], [no]) - OMPI_FORTRAN_CHECK([COMPLEX*16], [no], - [float _Complex, double _Complex, long double _Complex], + OMPI_FORTRAN_CHECK([COMPLEX*16], [no], + [float _Complex, double _Complex, long double _Complex], [16], [no]) - OMPI_FORTRAN_CHECK([COMPLEX*32], [no], + OMPI_FORTRAN_CHECK([COMPLEX*32], [no], [float _Complex, double _Complex, long double _Complex], [32], [no]) # Double precision complex types are not standard, but many # compilers support it. Code should be wrapped with #ifdef # OMPI_HAVE_FORTRAN_DOUBLE_COMPLEX OMPI_FORTRAN_CHECK([DOUBLE COMPLEX], [no], - [float _Complex, double _Complex, long double _Complex], + [float _Complex, double _Complex, long double _Complex], [-1], [no]) - + # Regardless of whether we have fortran bindings, or even a # fortran compiler, get the max value for a fortran MPI handle # (this macro handles the case where we don't have a fortran - # compiler). + # compiler). OMPI_FORTRAN_GET_HANDLE_MAX # Check for Fortran compilers value of TRUE and for the correct # assumption on LOGICAL for conversion into what C considers to be - # a true value. + # a true value. OMPI_FORTRAN_GET_VALUE_TRUE OMPI_FORTRAN_CHECK_LOGICAL_ARRAY # Find out how many array ranks this compiler supports. OMPI_FORTRAN_CHECK_MAX_ARRAY_RANK - + # How big should MPI_STATUS_SIZE be? (i.e., the size of # MPI_STATUS, expressed in units of Fortran INTEGERs). The C # equivalent of MPI_Status contains 4 C ints and a size_t. @@ -301,7 +310,8 @@ AC_DEFUN([OMPI_SETUP_MPI_FORTRAN],[ # We need INTERFACE, ISO_FORTRAN_ENV, and STORAGE_SIZE() support # to build MPI_SIZEOF support - AS_IF([test $OMPI_FORTRAN_HAVE_INTERFACE -eq 1 && \ + AS_IF([test $ompi_fortran_happy -eq 1 && \ + test $OMPI_FORTRAN_HAVE_INTERFACE -eq 1 && \ test $OMPI_FORTRAN_HAVE_ISO_FORTRAN_ENV -eq 1 && \ test $OMPI_FORTRAN_HAVE_STORAGE_SIZE -eq 1], [OMPI_FORTRAN_BUILD_SIZEOF=1], @@ -315,7 +325,7 @@ AC_DEFUN([OMPI_SETUP_MPI_FORTRAN],[ AS_IF([test $ompi_fortran_happy -eq 1 && \ test $OMPI_TRY_FORTRAN_BINDINGS -ge $OMPI_FORTRAN_USEMPI_BINDINGS], [ # Look for the fortran module compiler flag - OMPI_FORTRAN_FIND_MODULE_INCLUDE_FLAG([], + OMPI_FORTRAN_FIND_MODULE_INCLUDE_FLAG([], [AC_MSG_WARN([*** Could not determine the fortran compiler flag to indicate where modules reside]) AC_MSG_ERROR([*** Cannot continue])]) @@ -331,9 +341,9 @@ AC_DEFUN([OMPI_SETUP_MPI_FORTRAN],[ [OMPI_BUILD_FORTRAN_BINDINGS=$OMPI_FORTRAN_USEMPI_BINDINGS AS_IF([test $OMPI_FORTRAN_HAVE_IGNORE_TKR -eq 1], [OMPI_FORTRAN_USEMPI_DIR=mpi/fortran/use-mpi-ignore-tkr - OMPI_FORTRAN_USEMPI_LIB=-lmpi_usempi_ignore_tkr], + OMPI_FORTRAN_USEMPI_LIB=-l${with_libmpi_name}_usempi_ignore_tkr], [OMPI_FORTRAN_USEMPI_DIR=mpi/fortran/use-mpi-tkr - OMPI_FORTRAN_USEMPI_LIB=-lmpi_usempi]) + OMPI_FORTRAN_USEMPI_LIB=-l${with_libmpi_name}_usempi]) ]) OMPI_FORTRAN_HAVE_ISO_C_BINDING=0 @@ -348,7 +358,7 @@ AC_DEFUN([OMPI_SETUP_MPI_FORTRAN],[ [AC_MSG_RESULT([yes])], [OMPI_TRY_FORTRAN_BINDINGS=$OMPI_FORTRAN_MPIFH_BINDINGS AC_MSG_RESULT([no])]) - + #--------------------------------- # Fortran use mpi_f08 MPI bindings #--------------------------------- @@ -358,7 +368,7 @@ AC_DEFUN([OMPI_SETUP_MPI_FORTRAN],[ # We need to have ignore TKR functionality to build the mpi_f08 # module - AS_IF([test $OMPI_TRY_FORTRAN_BINDINGS -ge $OMPI_FORTRAN_USEMPIF08_BINDINGS && + AS_IF([test $OMPI_TRY_FORTRAN_BINDINGS -ge $OMPI_FORTRAN_USEMPIF08_BINDINGS && \ test $OMPI_FORTRAN_HAVE_IGNORE_TKR -eq 1], [OMPI_BUILD_FORTRAN_BINDINGS=$OMPI_FORTRAN_USEMPIF08_BINDINGS OMPI_FORTRAN_F08_PREDECL=$OMPI_FORTRAN_IGNORE_TKR_PREDECL @@ -429,6 +439,18 @@ AC_DEFUN([OMPI_SETUP_MPI_FORTRAN],[ [OMPI_FORTRAN_HAVE_PROCEDURE=0 OMPI_BUILD_FORTRAN_BINDINGS=$OMPI_FORTRAN_USEMPI_BINDINGS])]) + # Per https://github.com/open-mpi/ompi/issues/857, if the Fortran + # compiler doesn't properly support "USE ... ONLY" notation, + # disable the mpi_f08 module. + OMPI_FORTRAN_HAVE_USE_ONLY=0 + AS_IF([test $OMPI_TRY_FORTRAN_BINDINGS -ge $OMPI_FORTRAN_USEMPIF08_BINDINGS && \ + test $OMPI_BUILD_FORTRAN_BINDINGS -ge $OMPI_FORTRAN_USEMPIF08_BINDINGS], + [ # Does the compiler support "USE ... ONLY" + OMPI_FORTRAN_CHECK_USE_ONLY( + [OMPI_FORTRAN_HAVE_USE_ONLY=1], + [OMPI_FORTRAN_HAVE_USE_ONLY=0 + OMPI_BUILD_FORTRAN_BINDINGS=$OMPI_FORTRAN_USEMPI_BINDINGS])]) + OMPI_FORTRAN_HAVE_OPTIONAL_ARGS=0 AS_IF([test $OMPI_TRY_FORTRAN_BINDINGS -ge $OMPI_FORTRAN_USEMPIF08_BINDINGS && \ test $OMPI_BUILD_FORTRAN_BINDINGS -ge $OMPI_FORTRAN_USEMPIF08_BINDINGS], @@ -481,14 +503,18 @@ AC_DEFUN([OMPI_SETUP_MPI_FORTRAN],[ [OMPI_FORTRAN_HAVE_ASYNCHRONOUS=0])]) OMPI_FORTRAN_F08_HANDLE_SIZE=4 + OMPI_FORTRAN_F08_HANDLE_ALIGNMENT=4 AS_IF([test $OMPI_TRY_FORTRAN_BINDINGS -ge $OMPI_FORTRAN_USEMPIF08_BINDINGS && \ test $OMPI_BUILD_FORTRAN_BINDINGS -ge $OMPI_FORTRAN_USEMPIF08_BINDINGS], [ # How big are derived types with a single INTEGER? OMPI_FORTRAN_GET_SIZEOF([type, BIND(C) :: test_mpi_handle integer :: MPI_VAL -end type test_mpi_handle], - [type(test_mpi_handle)], +end type test_mpi_handle], + [type(test_mpi_handle)], [OMPI_FORTRAN_F08_HANDLE_SIZE]) + OMPI_FORTRAN_F08_GET_HANDLE_ALIGNMENT( + [type(test_mpi_handle)], + [OMPI_FORTRAN_F08_HANDLE_ALIGNMENT]) ]) OMPI_FORTRAN_NEED_WRAPPER_ROUTINES=1 @@ -512,7 +538,7 @@ end type test_mpi_handle], # ("good" compilers) # c) compiler that does not support the items listed # in b) ("bad" compilers) - + AC_MSG_CHECKING([which mpi_f08 implementation to build]) AS_IF([test $OMPI_BUILD_FORTRAN_F08_SUBARRAYS -eq 1], [ # Case a) partial/prototype implementation @@ -549,7 +575,7 @@ end type test_mpi_handle], AC_MSG_CHECKING([if building Fortran 'use mpi_f08' bindings]) AS_IF([test $OMPI_BUILD_FORTRAN_BINDINGS -ge $OMPI_FORTRAN_USEMPIF08_BINDINGS], - [OMPI_FORTRAN_USEMPIF08_LIB=-lmpi_usempif08 + [OMPI_FORTRAN_USEMPIF08_LIB=-l${with_libmpi_name}_usempif08 AC_MSG_RESULT([yes])], [OMPI_TRY_FORTRAN_BINDIGS=$OMPI_FORTRAN_USEMPI_BINDINGS AC_MSG_RESULT([no])]) @@ -567,7 +593,7 @@ end type test_mpi_handle], # and this header file must be usable in .F90 files. :-( AC_CONFIG_FILES([ompi/mpi/fortran/configure-fortran-output.h]) - # Values for wrapper compilers + # Values for wrapper compilers OMPI_FC=$FC set dummy $OMPI_FC OMPI_FC_ARGV0=[$]2 @@ -622,12 +648,11 @@ end type test_mpi_handle], # these layers need to be built or NOT AM_CONDITIONAL(BUILD_MPI_FORTRAN_MPIFH_BINDINGS_LAYER, - [( test $WANT_MPI_PROFILING -eq 0 || test $OMPI_PROFILING_COMPILE_SEPARATELY -eq 1 ) && \ + [test $OMPI_PROFILING_COMPILE_SEPARATELY -eq 1 && \ test $OMPI_BUILD_FORTRAN_BINDINGS -gt $OMPI_FORTRAN_NO_BINDINGS]) AM_CONDITIONAL(BUILD_PMPI_FORTRAN_MPIFH_BINDINGS_LAYER, - [test $OMPI_BUILD_FORTRAN_BINDINGS -gt $OMPI_FORTRAN_NO_BINDINGS && \ - test $WANT_MPI_PROFILING -eq 1]) - AM_CONDITIONAL(OMPI_BUILD_FORTRAN_MPIFH_BINDINGS, + [test $OMPI_BUILD_FORTRAN_BINDINGS -gt $OMPI_FORTRAN_NO_BINDINGS]) + AM_CONDITIONAL(OMPI_BUILD_FORTRAN_MPIFH_BINDINGS, [test $OMPI_BUILD_FORTRAN_BINDINGS -gt $OMPI_FORTRAN_NO_BINDINGS]) # ------------------- @@ -658,15 +683,15 @@ end type test_mpi_handle], # the unused "use mpi" directory, but we might as well have the # ompi/mpi/fortran/use-mpi*/Makefile.ams be safe, too. # True if we're building either "use mpi" bindings - AM_CONDITIONAL(OMPI_BUILD_FORTRAN_USEMPI_BINDINGS, + AM_CONDITIONAL(OMPI_BUILD_FORTRAN_USEMPI_BINDINGS, [test $OMPI_BUILD_FORTRAN_BINDINGS -ge $OMPI_FORTRAN_USEMPI_BINDINGS || \ test $OMPI_FORTRAN_HAVE_IGNORE_TKR -eq 1]) # True if we're building the old TKR-style bindings - AM_CONDITIONAL(OMPI_BUILD_FORTRAN_USEMPI_TKR_BINDINGS, + AM_CONDITIONAL(OMPI_BUILD_FORTRAN_USEMPI_TKR_BINDINGS, [test $OMPI_BUILD_FORTRAN_BINDINGS -ge $OMPI_FORTRAN_USEMPI_BINDINGS && \ test $OMPI_FORTRAN_HAVE_IGNORE_TKR -eq 0]) # True if we're building the new ignore-TKR-style bindings - AM_CONDITIONAL(OMPI_BUILD_FORTRAN_USEMPI_IGNORE_TKR_BINDINGS, + AM_CONDITIONAL(OMPI_BUILD_FORTRAN_USEMPI_IGNORE_TKR_BINDINGS, [test $OMPI_BUILD_FORTRAN_BINDINGS -ge $OMPI_FORTRAN_USEMPI_BINDINGS && \ test $OMPI_FORTRAN_HAVE_IGNORE_TKR -eq 1]) @@ -703,10 +728,14 @@ end type test_mpi_handle], [$OMPI_FORTRAN_NEED_WRAPPER_ROUTINES], [Whether the mpi_f08 implementation is using wrapper routines ("bad" Fortran compiler) or weak symbols ("good" Fortran compiler) for the F08 interface definition implementations]) - AC_DEFINE_UNQUOTED(OMPI_FORTRAN_F08_HANDLE_SIZE, + AC_DEFINE_UNQUOTED(OMPI_FORTRAN_F08_HANDLE_SIZE, $OMPI_FORTRAN_F08_HANDLE_SIZE, [How many bytes the mpi_f08 TYPE(MPI_) handles will be]) + AC_DEFINE_UNQUOTED(OMPI_FORTRAN_F08_HANDLE_ALIGNMENT, + $OMPI_FORTRAN_F08_HANDLE_ALIGNMENT, + [How many bytes the mpi_f08 TYPE(MPI_) handles will be aligned to]) + # These go into ompi/info/param.c AC_DEFINE_UNQUOTED(OMPI_FORTRAN_HAVE_F08_ASSUMED_RANK, [$OMPI_FORTRAN_HAVE_F08_ASSUMED_RANK], @@ -727,47 +756,54 @@ end type test_mpi_handle], AC_DEFINE_UNQUOTED(OMPI_FORTRAN_HAVE_BIND_C_TYPE_NAME, [$OMPI_FORTRAN_HAVE_BIND_C_TYPE_NAME], [For ompi_info: Whether the compiler supports TYPE, BIND(C, NAME="name") or not]) - AC_DEFINE_UNQUOTED([OMPI_FORTRAN_HAVE_OPTIONAL_ARGS], + AC_DEFINE_UNQUOTED([OMPI_FORTRAN_HAVE_OPTIONAL_ARGS], [$OMPI_FORTRAN_HAVE_OPTIONAL_ARGS], [For ompi_info: whether the Fortran compiler supports optional arguments or not]) # For configure-fortran-output.h, mpi-f08-types.F90 (and ompi_info) AC_SUBST([OMPI_FORTRAN_HAVE_PRIVATE]) - AC_DEFINE_UNQUOTED([OMPI_FORTRAN_HAVE_PRIVATE], + AC_DEFINE_UNQUOTED([OMPI_FORTRAN_HAVE_PRIVATE], [$OMPI_FORTRAN_HAVE_PRIVATE], [For mpi-f08-types.f90 and ompi_info: whether the compiler supports the "private" keyword or not (used in MPI_Status)]) # For configure-fortran-output.h, mpi-f08-types.F90 (and ompi_info) AC_SUBST([OMPI_FORTRAN_HAVE_PROTECTED]) - AC_DEFINE_UNQUOTED([OMPI_FORTRAN_HAVE_PROTECTED], + AC_DEFINE_UNQUOTED([OMPI_FORTRAN_HAVE_PROTECTED], [$OMPI_FORTRAN_HAVE_PROTECTED], [For mpi-f08-types.f90 and .F90 and ompi_info: whether the compiler supports the "protected" keyword or not]) # For configure-fortran-output.h, mpi-f08-interfaces-callbacks.F90 # (and ompi_info) AC_SUBST([OMPI_FORTRAN_HAVE_ABSTRACT]) - AC_DEFINE_UNQUOTED([OMPI_FORTRAN_HAVE_ABSTRACT], + AC_DEFINE_UNQUOTED([OMPI_FORTRAN_HAVE_ABSTRACT], [$OMPI_FORTRAN_HAVE_ABSTRACT], [For mpi-f08-interfaces-callbacks.f90 and ompi_info: whether the compiler supports the "abstract" keyword or not]) # For configure-fortran-output.h, various files in # ompi/mpi/fortran/use-mpi-f08/*.F90 and *.h files (and ompi_info) AC_SUBST([OMPI_FORTRAN_HAVE_ASYNCHRONOUS]) - AC_DEFINE_UNQUOTED([OMPI_FORTRAN_HAVE_ASYNCHRONOUS], + AC_DEFINE_UNQUOTED([OMPI_FORTRAN_HAVE_ASYNCHRONOUS], [$OMPI_FORTRAN_HAVE_ASYNCHRONOUS], [For ompi/mpi/fortran/use-mpi-f08/blah.F90 and blah.h and ompi_info: whether the compiler supports the "asynchronous" keyword or not]) # For configure-fortran-output.h, various files in # ompi/mpi/fortran/use-mpi-f08/*.F90 and *.h files (and ompi_info) AC_SUBST([OMPI_FORTRAN_HAVE_PROCEDURE]) - AC_DEFINE_UNQUOTED([OMPI_FORTRAN_HAVE_PROCEDURE], + AC_DEFINE_UNQUOTED([OMPI_FORTRAN_HAVE_PROCEDURE], [$OMPI_FORTRAN_HAVE_PROCEDURE], [For ompi/mpi/fortran/use-mpi-f08/blah.F90 and blah.h and ompi_info: whether the compiler supports the "procedure" keyword or not]) + # For configure-fortran-output.h, various files in + # ompi/mpi/fortran/use-mpi-f08/*.F90 and *.h files (and ompi_info) + AC_SUBST([OMPI_FORTRAN_HAVE_USE_ONLY]) + AC_DEFINE_UNQUOTED([OMPI_FORTRAN_HAVE_USE_ONLY], + [$OMPI_FORTRAN_HAVE_USE_ONLY], + [For ompi/mpi/fortran/use-mpi-f08/blah.F90 and blah.h and ompi_info: whether the compiler supports "USE ... ONLY" notation properly or not]) + # For configure-fortran-output.h, various files in # ompi/mpi/fortran/use-mpi-f08/*.F90 and *.h files (and ompi_info) AC_SUBST([OMPI_FORTRAN_HAVE_C_FUNLOC]) - AC_DEFINE_UNQUOTED([OMPI_FORTRAN_HAVE_C_FUNLOC], + AC_DEFINE_UNQUOTED([OMPI_FORTRAN_HAVE_C_FUNLOC], [$OMPI_FORTRAN_HAVE_C_FUNLOC], [For ompi/mpi/fortran/use-mpi-f08/blah.F90 and blah.h and ompi_info: whether the compiler supports c_funloc or not]) @@ -778,7 +814,7 @@ end type test_mpi_handle], # ompi/mpi/fortran/use-mpi-f08 if it's not to be built, but we # might as well have ompi/mpi/fortran/use-mpi-f08/Makefile.am be # safe, too. - AM_CONDITIONAL(OMPI_BUILD_FORTRAN_USEMPIF08_BINDINGS, + AM_CONDITIONAL(OMPI_BUILD_FORTRAN_USEMPIF08_BINDINGS, [test $OMPI_BUILD_FORTRAN_BINDINGS -ge $OMPI_FORTRAN_USEMPIF08_BINDINGS]) AC_DEFINE_UNQUOTED(OMPI_BUILD_FORTRAN_BINDINGS, diff --git a/config/ompi_setup_mpi_profiling.m4 b/config/ompi_setup_mpi_profiling.m4 index 810bed73808..2fc2ba1f0f7 100644 --- a/config/ompi_setup_mpi_profiling.m4 +++ b/config/ompi_setup_mpi_profiling.m4 @@ -6,19 +6,21 @@ # Copyright (c) 2004-2005 The University of Tennessee and The University # of Tennessee Research Foundation. All rights # reserved. -# Copyright (c) 2004-2007 High Performance Computing Center Stuttgart, +# Copyright (c) 2004-2007 High Performance Computing Center Stuttgart, # University of Stuttgart. All rights reserved. # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. # Copyright (c) 2006-2012 Cisco Systems, Inc. All rights reserved. # Copyright (c) 2006-2008 Sun Microsystems, Inc. All rights reserved. # Copyright (c) 2006-2007 Los Alamos National Security, LLC. All rights -# reserved. +# reserved. # Copyright (c) 2009 Oak Ridge National Labs. All rights reserved. +# Copyright (c) 2015 Research Organization for Information Science +# and Technology (RIST). All rights reserved. # $COPYRIGHT$ -# +# # Additional copyrights may follow -# +# # $HEADER$ # @@ -37,16 +39,12 @@ AC_DEFUN([OMPI_SETUP_MPI_PROFILING],[ # define 2 conditionals which tell us whether each of these layers # need to be built or NOT # - + AM_CONDITIONAL(BUILD_MPI_BINDINGS_LAYER, - test "$WANT_MPI_PROFILING" = 0 -o "$OMPI_PROFILING_COMPILE_SEPARATELY" = 1) - - AM_CONDITIONAL(BUILD_PMPI_BINDINGS_LAYER, - test "$WANT_MPI_PROFILING" = 1) + test "$OMPI_PROFILING_COMPILE_SEPARATELY" = 1) + AM_CONDITIONAL(COMPILE_PROFILING_SEPARATELY, test "$OMPI_PROFILING_COMPILE_SEPARATELY" = 1) - AC_DEFINE_UNQUOTED(OMPI_ENABLE_MPI_PROFILING, $WANT_MPI_PROFILING, - [Whether we want MPI profiling or not]) AC_DEFINE_UNQUOTED(OPAL_HAVE_WEAK_SYMBOLS, $OPAL_C_HAVE_WEAK_SYMBOLS, [Whether we have weak symbols or not]) ]) diff --git a/config/opal_case_sensitive_fs_setup.m4 b/config/opal_case_sensitive_fs_setup.m4 index ff1ed65372a..d6592e10478 100644 --- a/config/opal_case_sensitive_fs_setup.m4 +++ b/config/opal_case_sensitive_fs_setup.m4 @@ -6,14 +6,14 @@ dnl Corporation. All rights reserved. dnl Copyright (c) 2004-2005 The University of Tennessee and The University dnl of Tennessee Research Foundation. All rights dnl reserved. -dnl Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +dnl Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, dnl University of Stuttgart. All rights reserved. dnl Copyright (c) 2004-2005 The Regents of the University of California. dnl All rights reserved. dnl $COPYRIGHT$ -dnl +dnl dnl Additional copyrights may follow -dnl +dnl dnl $HEADER$ dnl @@ -59,7 +59,7 @@ rm -f conf_fs_test.$$ CONF_FS_TEST.$$ # Now see what the user wants to do... # AC_MSG_CHECKING([if configuring for case sensitive filesystem]) -AC_ARG_WITH(cs_fs, +AC_ARG_WITH(cs_fs, AC_HELP_STRING([--with-cs-fs], [Destination FS is case sensitive (default: set to value of the build FS's case sensitivity)])) diff --git a/config/opal_check_attributes.m4 b/config/opal_check_attributes.m4 index 88788f26777..064a59aea6d 100644 --- a/config/opal_check_attributes.m4 +++ b/config/opal_check_attributes.m4 @@ -53,7 +53,7 @@ AC_DEFUN([_OPAL_ATTRIBUTE_FAIL_SEARCH],[ # regarding unused function in main file) # static int usage (int * argument); # -# The last argument is for specific CFLAGS, that need to be set +# The last argument is for specific CFLAGS, that need to be set # for the compiler to generate a warning on the cross-check. # This may need adaption for future compilers / CFLAG-settings. # @@ -138,7 +138,7 @@ AC_DEFUN([_OPAL_CHECK_SPECIFIC_ATTRIBUTE], [ # attribute most often fail with a warning (when the warning # level is set). # The compilers output is parsed in _OPAL_ATTRIBUTE_FAIL_SEARCH -# +# # To add a new attributes __NAME__ add the # opal_cv___attribute__NAME # add a new check with _OPAL_CHECK_SPECIFIC_ATTRIBUTE (possibly with a cross-check) diff --git a/config/opal_check_broken_qsort.m4 b/config/opal_check_broken_qsort.m4 index ac12bf6c59e..7d70c6a0ae7 100644 --- a/config/opal_check_broken_qsort.m4 +++ b/config/opal_check_broken_qsort.m4 @@ -6,16 +6,16 @@ dnl Corporation. All rights reserved. dnl Copyright (c) 2004-2005 The University of Tennessee and The University dnl of Tennessee Research Foundation. All rights dnl reserved. -dnl Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +dnl Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, dnl University of Stuttgart. All rights reserved. dnl Copyright (c) 2004-2005 The Regents of the University of California. dnl All rights reserved. dnl Copyright (c) 2007 Sun Microsystems, Inc. All rights reserved. dnl Copyright (c) 2014 Intel, Inc. All rights reserved. dnl $COPYRIGHT$ -dnl +dnl dnl Additional copyrights may follow -dnl +dnl dnl $HEADER$ dnl dnl There was some mentioning of broken qsort happened for Solaris that could @@ -29,7 +29,7 @@ dnl 5.9_sparc #112874-20 or later dnl 5.9_x86 #114432-07 or later dnl dnl For users who could not patch their systems or are convinced that their -dnl native qsort is broken, they could specify this configure flag to use +dnl native qsort is broken, they could specify this configure flag to use dnl the opal_qsort instead. # check for broken qsort diff --git a/config/opal_check_cma.m4 b/config/opal_check_cma.m4 index cfb7639c13b..2930debf911 100644 --- a/config/opal_check_cma.m4 +++ b/config/opal_check_cma.m4 @@ -3,9 +3,9 @@ # Copyright (c) 2009 The University of Tennessee and The University # of Tennessee Research Foundation. All rights # reserved. -# Copyright (c) 2009-2010 Cisco Systems, Inc. All rights reserved. +# Copyright (c) 2009-2016 Cisco Systems, Inc. All rights reserved. # Copyright (c) 2010-2012 IBM Corporation. All rights reserved. -# Copyright (c) 2013-2014 Los Alamos National Security, LLC. All rights +# Copyright (c) 2013-2016 Los Alamos National Security, LLC. All rights # reserved. # $COPYRIGHT$ # @@ -18,29 +18,115 @@ # -------------------------------------------------------- # check if cma support is wanted. AC_DEFUN([OPAL_CHECK_CMA],[ - OPAL_VAR_SCOPE_PUSH([ompi_check_cma_happy ompi_check_cma_need_defs]) - - ompi_check_cma_happy="no" AC_ARG_WITH([cma], [AC_HELP_STRING([--with-cma], - [Build Cross Memory Attach support (default: no)])]) - - AC_MSG_CHECKING([if user requested CMA build]) - if test "$with_cma" = "yes" ; then - ompi_check_cma_happy="yes" - AC_MSG_RESULT([yes]) - AC_CHECK_FUNC(process_vm_readv, [ompi_check_cma_need_defs=0], - [ompi_check_cma_need_defs=1]) - AC_DEFINE_UNQUOTED([OPAL_CMA_NEED_SYSCALL_DEFS], - [$ompi_check_cma_need_defs], - [Need CMA syscalls defined]) - AC_CHECK_HEADERS([sys/prctl.h]) + [Build Cross Memory Attach support (default: autodetect)])]) + + # We only need to do the back-end test once + if test -z "$opal_check_cma_happy" ; then + OPAL_CHECK_CMA_BACKEND + fi + + AS_IF([test $opal_check_cma_happy -eq 1], + [$2], + [if test "$with_cma" = "yes"; then + AC_MSG_WARN([--with-cma support requested, but not available]) + AC_MSG_ERROR([Cannot continue]) + fi + $3]) +]) + +AC_DEFUN([OPAL_CHECK_CMA_BACKEND], +[ + OPAL_VAR_SCOPE_PUSH([opal_check_cma_need_defs opal_check_cma_kernel_version opal_check_cma_CFLAGS opal_check_cma_msg]) + + # Some systems have process_cm_readv() in libc, which means CMA is + # supported. Other systems do not have process_cm_readv() in + # libc, but have support for it in the kernel if we invoke it + # directly. Check for both. + AC_CHECK_HEADERS([sys/prctl.h]) + + AC_CHECK_FUNC([process_vm_readv], [opal_check_cma_need_defs=0], + [opal_check_cma_need_defs=1]) + AC_DEFINE_UNQUOTED([OPAL_CMA_NEED_SYSCALL_DEFS], + [$opal_check_cma_need_defs], + [Need CMA syscalls defined]) + if test $opal_check_cma_need_defs -eq 1 ; then + opal_check_cma_CFLAGS=$CFLAGS + # Need some extra include paths to locate the appropriate headers + CFLAGS="$CFLAGS -I${srcdir} -I${srcdir}/opal/include" + AC_MSG_CHECKING([if internal syscall numbers for Linux CMA work]) + AC_RUN_IFELSE([AC_LANG_PROGRAM([[ +#include +#include +#include +#include + +#include +#include +#include + +#include "opal/include/opal/sys/cma.h" + +static void do_check (pid_t pid, int *in, int *out) +{ + int check[4] = {0, 0, 0, 0}, i; + struct iovec rem_iov = {out, sizeof (check)}; + struct iovec loc_iov = {check, sizeof (check)}; + ssize_t rc; + + rc = process_vm_readv (pid, &loc_iov, 1, &rem_iov, 1, 0); + if (sizeof (check) != rc) { + exit (1); + } + + for (i = 0 ; i < 4 ; ++i) { + if (check[i] != i) { + exit (1); + } + + check[i] = i * 2; + } + + rem_iov.iov_base = in; + rc = process_vm_writev (pid, &loc_iov, 1, &rem_iov, 1, 0); + if (sizeof (check) != rc) { + exit (1); + } + + exit (0); +} +]],[[ + int i, in[4] = {-1, -1, -1, -1}, out[4] = {0, 1, 2, 3}; + + do_check (getpid (), in, out); + + for (i = 0 ; i < 4 ; ++i) { + if (in[i] != 2 * i) { + return 1; + } + } + + /* all good */ + return 0; +]])], + [AC_MSG_RESULT([yes]) + opal_check_cma_happy=1], + [AC_MSG_RESULT([no]) + opal_check_cma_happy=0], + [AC_MSG_RESULT([no (cross-compiling)]) + opal_check_cma_happy=0]) + CFLAGS=$opal_check_cma_CFLAGS else - AC_MSG_RESULT([no]) + # If we didn't need the defs, then we have process_vm_readv(), + # and CMA is happy. + opal_check_cma_happy=1 fi - AS_IF([test "$ompi_check_cma_happy" = "yes"], - [$2], - [$3]) OPAL_VAR_SCOPE_POP -])dnl + + AS_IF([test $opal_check_cma_happy -eq 1], + [opal_check_cma_msg=yes], + [opal_check_cma_msg=no]) + OPAL_SUMMARY_ADD([[Transports]],[[Shared memory/Linux CMA]],[$1],[$opal_check_cma_msg]) +]) diff --git a/config/opal_check_compiler_works.m4 b/config/opal_check_compiler_works.m4 index 7c5fcbe3244..f632c0c81f6 100644 --- a/config/opal_check_compiler_works.m4 +++ b/config/opal_check_compiler_works.m4 @@ -4,13 +4,13 @@ dnl Copyright (c) 2004-2006 The Trustees of Indiana University and Indiana dnl University Research and Technology dnl Corporation. All rights reserved. dnl Copyright (c) 2006 Los Alamos National Security, LLC. All rights -dnl reserved. +dnl reserved. dnl Copyright (c) 2010-2012 Cisco Systems, Inc. All rights reserved. dnl Copyright (c) 2014 Intel, Inc. All rights reserved. dnl $COPYRIGHT$ -dnl +dnl dnl Additional copyrights may follow -dnl +dnl dnl $HEADER$ dnl @@ -39,14 +39,14 @@ AC_DEFUN([OPAL_CHECK_COMPILER_WORKS], [AS_VAR_SET(lang_var, ["links (cross compiling)"])], [AS_VAR_SET(lang_var, ["no"])])]) AC_LANG_POP($1)]) - AS_VAR_IF(lang_var, [no], + AS_VAR_IF(lang_var, [no], [cat <&2 ********************************************************************** * It appears that your $1 compiler is unable to produce working * executables. A simple test application failed to properly * execute. Note that this is likely not a problem with Open MPI, * but a problem with the local compiler installation. More -* information (including exactly what command was given to the +* information (including exactly what command was given to the * compiler and what error resulted when the command was executed) is * available in the config.log file in the Open MPI build directory. ********************************************************************** diff --git a/config/opal_check_cray_pmi.m4 b/config/opal_check_cray_pmi.m4 index 9789aba050a..8e3dfee58f3 100644 --- a/config/opal_check_cray_pmi.m4 +++ b/config/opal_check_cray_pmi.m4 @@ -13,7 +13,7 @@ dnl All rights reserved. dnl Copyright (c) 2009-2011 Cisco Systems, Inc. All rights reserved. dnl Copyright (c) 2011-2014 Los Alamos National Security, LLC. All rights dnl reserved. -dnl Copyright (c) 2014 Intel, Inc. All rights reserved. +dnl Copyright (c) 2014-2015 Intel, Inc. All rights reserved. dnl Copyright (c) 2014-2015 Research Organization for Information Science dnl and Technology (RIST). All rights reserved. dnl $COPYRIGHT$ @@ -50,7 +50,7 @@ AC_DEFUN([OPAL_CHECK_CRAY_PMI_EXPLICIT],[ # this logic assumes knowledge about all the dependencies of the Cray PMI library, # something that Cray doesn't generally document # - AS_IF([test "$enable_static" == "yes"], + AS_IF([test "$enable_static" = "yes"], [AS_IF([test -d /usr/lib/alps], [AC_MSG_RESULT([Detected presense of /usr/lib/alps]) CRAY_PMI_LDFLAGS="$CRAY_PMI_LDFLAGS -L/usr/lib/alps -lalpslli -lalpsutil" diff --git a/config/opal_check_cray_xpmem.m4 b/config/opal_check_cray_xpmem.m4 deleted file mode 100644 index 4dca2f62a98..00000000000 --- a/config/opal_check_cray_xpmem.m4 +++ /dev/null @@ -1,64 +0,0 @@ -# -*- shell-script ; indent-tabs-mode:nil -*- -# -# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana -# University Research and Technology -# Corporation. All rights reserved. -# Copyright (c) 2004-2005 The University of Tennessee and The University -# of Tennessee Research Foundation. All rights -# reserved. -# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, -# University of Stuttgart. All rights reserved. -# Copyright (c) 2004-2005 The Regents of the University of California. -# All rights reserved. -# Copyright (c) 2009-2011 Cisco Systems, Inc. All rights reserved. -# Copyright (c) 2011-2014 Los Alamos National Security, LLC. All rights -# reserved. -# Copyright (c) 2014 Intel, Inc. All rights reserved. -# Copyright (c) 2014 Research Organization for Information Science -# and Technology (RIST). All rights reserved. -# $COPYRIGHT$ -# -# Additional copyrights may follow -# -# $HEADER$ -# - -# -# special check for cray xpmem, uses macro(s) from pkg.m4 -# -# OPAL_CHECK_CRAY_XPMEM(prefix, [action-if-found], [action-if-not-found]) -# -------------------------------------------------------- -AC_DEFUN([OPAL_CHECK_CRAY_XPMEM],[ - AC_ARG_WITH([cray_xpmem], - [AC_HELP_STRING([--with-cray-xpmem(=yes/no)], - [Build Cray XPMEM support(default: auto)])], - [], with_cray_xpmem=auto) - - AC_MSG_CHECKING([for Cray XPMEM support]) - AS_IF([test "$with_cray_xpmem" = "no"], - [AC_MSG_RESULT([no]) - $3], - [AS_IF([test "$with_cray_xpmem" = "auto" -o "$with_cray_xpmem" = "yes"], - [PKG_CHECK_MODULES_STATIC([CRAY_XPMEM], [cray-xpmem], - [opal_check_cray_xpmem_happy="yes"], - [opal_check_cray_xpmem_happy="no"] - [AS_IF([test "$with_cray_xpmem" = "yes"], - [AC_MSG_WARN([Cray XPMEM support requested but pkg-config failed.]) - AC_MSG_ERROR([Aborting])],[])] - )], - []) - ]) - - AS_IF([test "$opal_check_cray_xpmem_happy" = "yes" -a "$enable_static" = "yes"], - [CRAY_XPMEM_LIBS = $CRAY_XPMEM_STATIC_LIBS],[]) - - AS_IF([test "$opal_check_cray_xpmem_happy" = "yes"], - [$1_LDFLAGS="$CRAY_XPMEM_LIBS" - $1_CPPFLAGS="$CRAY_XPMEM_CFLAGS" - $1_LIBS="$CRAY_XPMEM_LIBS" - AC_DEFINE_UNQUOTED([HAVE_XPMEM_H], [1],[is xpmem.h available]) - $2], [$3]) -]) - - - diff --git a/config/opal_check_cuda.m4 b/config/opal_check_cuda.m4 index 7040f5c515b..fd7816e3ea7 100644 --- a/config/opal_check_cuda.m4 +++ b/config/opal_check_cuda.m4 @@ -10,13 +10,13 @@ dnl Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, dnl University of Stuttgart. All rights reserved. dnl Copyright (c) 2004-2005 The Regents of the University of California. dnl All rights reserved. -dnl Copyright (c) 2006-2015 Cisco Systems, Inc. All rights reserved. +dnl Copyright (c) 2006-2016 Cisco Systems, Inc. All rights reserved. dnl Copyright (c) 2007 Sun Microsystems, Inc. All rights reserved. dnl Copyright (c) 2009 IBM Corporation. All rights reserved. dnl Copyright (c) 2009 Los Alamos National Security, LLC. All rights dnl reserved. dnl Copyright (c) 2009-2011 Oak Ridge National Labs. All rights reserved. -dnl Copyright (c) 2011-2014 NVIDIA Corporation. All rights reserved. +dnl Copyright (c) 2011-2015 NVIDIA Corporation. All rights reserved. dnl Copyright (c) 2015 Research Organization for Information Science dnl and Technology (RIST). All rights reserved. dnl @@ -79,10 +79,13 @@ dnl common framework, and likely configured first). So we have to dnl defer this check until later (see the OPAL_CHECK_CUDA_AFTER_OPAL_DL m4 dnl macro, below). :-( -# If we have CUDA support, check to see if we have CUDA 4.1 support -AS_IF([test "$opal_check_cuda_happy"="yes"], - AC_CHECK_MEMBER([struct CUipcMemHandle_st.reserved], [CUDA_SUPPORT_41=1], [CUDA_SUPPORT_41=0], - [#include <$opal_cuda_incdir/cuda.h>]), +# We require CUDA IPC support which started in CUDA 4.1. Error +# out if the support is not there. +AS_IF([test "$opal_check_cuda_happy" = "yes"], + [AC_CHECK_MEMBER([struct CUipcMemHandle_st.reserved], + [], + [AC_MSG_ERROR([Cannot continue because CUDA 4.1 or later is required])], + [#include <$opal_cuda_incdir/cuda.h>])], []) # If we have CUDA support, check to see if we have support for SYNC_MEMOPS @@ -121,14 +124,12 @@ else CUDA_SUPPORT=0 fi +OPAL_SUMMARY_ADD([[Miscellaneous]],[[CUDA support]],[opal_cuda], [$opal_check_cuda_happy]) + AM_CONDITIONAL([OPAL_cuda_support], [test "x$CUDA_SUPPORT" = "x1"]) AC_DEFINE_UNQUOTED([OPAL_CUDA_SUPPORT],$CUDA_SUPPORT, [Whether we want cuda device pointer support]) -AM_CONDITIONAL([OPAL_cuda_support_41], [test "x$CUDA_SUPPORT_41" = "x1"]) -AC_DEFINE_UNQUOTED([OPAL_CUDA_SUPPORT_41],$CUDA_SUPPORT_41, - [Whether we have CUDA 4.1 support available]) - AM_CONDITIONAL([OPAL_cuda_sync_memops], [test "x$CUDA_SYNC_MEMOPS" = "x1"]) AC_DEFINE_UNQUOTED([OPAL_CUDA_SYNC_MEMOPS],$CUDA_SYNC_MEMOPS, [Whether we have CUDA CU_POINTER_ATTRIBUTE_SYNC_MEMOPS support available]) diff --git a/config/opal_check_icc.m4 b/config/opal_check_icc.m4 index 08502ea3d18..9c60fa29701 100644 --- a/config/opal_check_icc.m4 +++ b/config/opal_check_icc.m4 @@ -6,21 +6,21 @@ dnl Corporation. All rights reserved. dnl Copyright (c) 2004-2005 The University of Tennessee and The University dnl of Tennessee Research Foundation. All rights dnl reserved. -dnl Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +dnl Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, dnl University of Stuttgart. All rights reserved. dnl Copyright (c) 2004-2005 The Regents of the University of California. dnl All rights reserved. dnl Copyright (c) 2014 Intel, Inc. All rights reserved. dnl $COPYRIGHT$ -dnl +dnl dnl Additional copyrights may follow -dnl +dnl dnl $HEADER$ dnl AC_DEFUN([OPAL_CHECK_ICC_VARARGS],[ dnl -dnl On EM64T, icc-8.1 before version 8.1.027 segfaulted, since +dnl On EM64T, icc-8.1 before version 8.1.027 segfaulted, since dnl va_start was miscompiled... dnl AC_MSG_CHECKING([whether icc-8.1 for EM64T works with variable arguments]) diff --git a/config/opal_check_knem.m4 b/config/opal_check_knem.m4 index 14d1eec1748..2e999f1fd42 100644 --- a/config/opal_check_knem.m4 +++ b/config/opal_check_knem.m4 @@ -3,9 +3,9 @@ dnl dnl Copyright (c) 2009 The University of Tennessee and The University dnl of Tennessee Research Foundation. All rights dnl reserved. -dnl Copyright (c) 2009-2010 Cisco Systems, Inc. All rights reserved. +dnl Copyright (c) 2009-2016 Cisco Systems, Inc. All rights reserved. dnl Copyright (c) 2010-2012 IBM Corporation. All rights reserved. -dnl Copyright (c) 2014 Los Alamos National Security, LLC. All rights +dnl Copyright (c) 2014-2016 Los Alamos National Security, LLC. All rights dnl reserved. dnl Copyright (c) 2015 Research Organization for Information Science dnl and Technology (RIST). All rights reserved. @@ -22,48 +22,53 @@ dnl # LDFLAGS, LIBS} as needed and runs action-if-found if there is # support, otherwise executes action-if-not-found AC_DEFUN([OPAL_CHECK_KNEM],[ - OPAL_VAR_SCOPE_PUSH([opal_check_knem_happy opal_check_knem_$1_save_CPPFLAGS opal_check_knem_dir]) - AC_ARG_WITH([knem], - [AC_HELP_STRING([--with-knem(=DIR)], - [Build knem Linux kernel module support, searching for headers in DIR])]) + if test -z "$opal_check_knem_happy" ; then + OPAL_VAR_SCOPE_PUSH([opal_check_knem_$1_save_CPPFLAGS opal_check_knem_dir]) + AC_ARG_WITH([knem], + [AC_HELP_STRING([--with-knem(=DIR)], + [Build knem Linux kernel module support, searching for headers in DIR/include])]) - OPAL_CHECK_WITHDIR([knem], [$with_knem], [include/knem_io.h]) - opal_check_knem_$1_save_CPPFLAGS="$CPPFLAGS" + OPAL_CHECK_WITHDIR([knem], [$with_knem], [include/knem_io.h]) + opal_check_knem_$1_save_CPPFLAGS="$CPPFLAGS" - AS_IF([test "$with_knem" != "no"], - [AS_IF([test ! -z "$with_knem" && test "$with_knem" != "yes"], - [opal_check_knem_dir="$with_knem"]) + opal_check_knem_happy=no - _OPAL_CHECK_PACKAGE_HEADER([$1], - [knem_io.h], - [$opal_check_knem_dir], - [opal_check_knem_happy="yes"], - [opal_check_knem_happy="no"])], - [opal_check_knem_happy="no"]) + AS_IF([test "$with_knem" != "no"], + [AS_IF([test ! -z "$with_knem" && test "$with_knem" != "yes"], + [opal_check_knem_dir="$with_knem"]) - CPPFLAGS="$CPPFLAGS $$1_CPPFLAGS" + _OPAL_CHECK_PACKAGE_HEADER([ompi_check_knem], + [knem_io.h], + [$opal_check_knem_dir], + [opal_check_knem_happy="yes"], + [])], + []) - # need at least version 0x0000000b - AS_IF([test "$opal_check_knem_happy" = "yes"], - [AC_CACHE_CHECK([for knem ABI version 0xb or later], - [opal_cv_knem_version_ok], - [AC_PREPROC_IFELSE( - [AC_LANG_PROGRAM([ + CPPFLAGS="$CPPFLAGS $ompi_check_knem_CPPFLAGS" + + # need at least version 0x0000000b + if test "$opal_check_knem_happy" = "yes" ; then + AC_PREPROC_IFELSE([AC_LANG_PROGRAM([ #include - ],[ + ],[ #if KNEM_ABI_VERSION < 0xc #error "Version less than 0xc" #endif - ])], - [opal_cv_knem_version_ok=yes], - [opal_cv_knem_version_ok=no])])]) + ])], + [opal_check_knem_happy=yes], + [opal_check_knem_happy=no]) + fi - CPPFLAGS="$opal_check_knem_$1_save_CPPFLAGS" + CPPFLAGS="$opal_check_knem_$1_save_CPPFLAGS" - AS_IF([test "$opal_check_knem_happy" = "yes" && test "$opal_cv_knem_version_ok" = "yes"], - [$2], + OPAL_SUMMARY_ADD([[Transports]],[[Shared memory/Linux KNEM]],[$1],[$opal_check_knem_happy]) + OPAL_VAR_SCOPE_POP + fi + + AS_IF([test "$opal_check_knem_happy" = "yes"], + [$1_CPPFLAGS="[$]$1_CPPFLAGS $ompi_check_knem_CPPFLAGS" + $2], [AS_IF([test ! -z "$with_knem" && test "$with_knem" != "no"], [AC_MSG_ERROR([KNEM support requested but not found. Aborting])]) $3]) - OPAL_VAR_SCOPE_POP ])dnl diff --git a/config/opal_check_libfabric.m4 b/config/opal_check_libfabric.m4 index 5cf314d0ec6..142c7c61008 100644 --- a/config/opal_check_libfabric.m4 +++ b/config/opal_check_libfabric.m4 @@ -1,6 +1,8 @@ dnl -*- shell-script -*- dnl -dnl Copyright (c) 2015 Cisco Systems, Inc. All rights reserved. +dnl Copyright (c) 2015-2016 Cisco Systems, Inc. All rights reserved. +dnl Copyright (c) 2016 Los Alamos National Security, LLC. All rights +dnl reserved. dnl $COPYRIGHT$ dnl dnl Additional copyrights may follow @@ -18,66 +20,76 @@ dnl # action-if-not-found. # AC_DEFUN([OPAL_CHECK_LIBFABRIC],[ - OPAL_VAR_SCOPE_PUSH([opal_check_libfabric_$1_save_CPPFLAGS opal_check_libfabric_$1_save_LDFLAGS opal_check_libfabric_$1_save_LIBS]) + if test -z "$opal_check_libfabric_happy" ; then + OPAL_VAR_SCOPE_PUSH([opal_check_libfabric_$1_save_CPPFLAGS opal_check_libfabric_$1_save_LDFLAGS opal_check_libfabric_$1_save_LIBS]) - # Add --with options - AC_ARG_WITH([libfabric], - [AC_HELP_STRING([--with-libfabric=DIR], - [Specify location of libfabric installation, adding DIR/include to the default search location for libfabric headers, and DIR/lib or DIR/lib64 to the default search location for libfabric libraries. Error if libfabric support cannot be found.])]) - AC_ARG_WITH([libfabric-libdir], - [AC_HELP_STRING([--with-libfabric-libdir=DIR], - [Search for libfabric libraries in DIR])]) + # Add --with options + AC_ARG_WITH([libfabric], + [AC_HELP_STRING([--with-libfabric=DIR], + [Specify location of libfabric installation, adding DIR/include to the default search location for libfabric headers, and DIR/lib or DIR/lib64 to the default search location for libfabric libraries. Error if libfabric support cannot be found.])]) + AC_ARG_WITH([libfabric-libdir], + [AC_HELP_STRING([--with-libfabric-libdir=DIR], + [Search for libfabric libraries in DIR])]) - # Sanity check the --with values - OPAL_CHECK_WITHDIR([libfabric], [$with_libfabric], - [include/rdma/fabric.h]) - OPAL_CHECK_WITHDIR([libfabric-libdir], [$with_libfabric_libdir], - [libfabric.*]) + # Sanity check the --with values + OPAL_CHECK_WITHDIR([libfabric], [$with_libfabric], + [include/rdma/fabric.h]) + OPAL_CHECK_WITHDIR([libfabric-libdir], [$with_libfabric_libdir], + [libfabric.*]) - opal_check_libfabric_$1_save_CPPFLAGS=$CPPFLAGS - opal_check_libfabric_$1_save_LDFLAGS=$LDFLAGS - opal_check_libfabric_$1_save_LIBS=$LIBS + opal_check_libfabric_$1_save_CPPFLAGS=$CPPFLAGS + opal_check_libfabric_$1_save_LDFLAGS=$LDFLAGS + opal_check_libfabric_$1_save_LIBS=$LIBS - opal_check_libfabric_happy=1 - AS_IF([test "$with_libfabric" = "no"], - [opal_check_libfabric_happy=0]) + opal_check_libfabric_happy=yes + AS_IF([test "$with_libfabric" = "no"], + [opal_check_libfabric_happy=no]) - AS_IF([test $opal_check_libfabric_happy -eq 1], - [AC_MSG_CHECKING([looking for libfabric in]) - AS_IF([test "$with_libfabric" != "yes"], - [opal_libfabric_dir=$with_libfabric - AC_MSG_RESULT([($opal_libfabric_dir)])], - [AC_MSG_RESULT([(default search paths)])]) - AS_IF([test ! -z "$with_libfabric_libdir" && \ - test "$with_libfabric_libdir" != "yes"], - [opal_libfabric_libdir=$with_libfabric_libdir]) - ]) + AS_IF([test $opal_check_libfabric_happy = yes], + [AC_MSG_CHECKING([looking for libfabric in]) + AS_IF([test "$with_libfabric" != "yes"], + [opal_libfabric_dir=$with_libfabric + AC_MSG_RESULT([($opal_libfabric_dir)])], + [AC_MSG_RESULT([(default search paths)])]) + AS_IF([test ! -z "$with_libfabric_libdir" && \ + test "$with_libfabric_libdir" != "yes"], + [opal_libfabric_libdir=$with_libfabric_libdir]) + ]) - AS_IF([test $opal_check_libfabric_happy -eq 1], - [OPAL_CHECK_PACKAGE([$1], - [rdma/fabric.h], - [fabric], - [fi_getinfo], - [], - [$opal_libfabric_dir], - [$opal_libfabric_libdir], - [opal_check_libfabric_happy=1], - [opal_check_libfabric_happy=0])]) + AS_IF([test $opal_check_libfabric_happy = yes], + [OPAL_CHECK_PACKAGE([opal_check_libfabric], + [rdma/fabric.h], + [fabric], + [fi_getinfo], + [], + [$opal_libfabric_dir], + [$opal_libfabric_libdir], + [], + [opal_check_libfabric_happy=no])]) - CPPFLAGS=$opal_check_libfabric_$1_save_CPPFLAGS - LDFLAGS=$opal_check_libfabric_$1_save_LDFLAGS - LIBS=$opal_check_libfabric_$1_save_LIBS + CPPFLAGS=$opal_check_libfabric_$1_save_CPPFLAGS + LDFLAGS=$opal_check_libfabric_$1_save_LDFLAGS + LIBS=$opal_check_libfabric_$1_save_LIBS - AC_SUBST($1_CPPFLAGS) - AC_SUBST($1_LDFLAGS) - AC_SUBST($1_LIBS) + OPAL_SUMMARY_ADD([[Transports]],[[OpenFabrics Libfabric]],[$1],[$opal_check_libfabric_happy]) - AS_IF([test $opal_check_libfabric_happy -eq 1], + OPAL_VAR_SCOPE_POP + fi + + if test $opal_check_libfabric_happy = yes ; then + $1_CPPFLAGS="[$]$1_CPPFLAGS $opal_check_libfabric_CPPFLAGS" + $1_LIBS="[$]$1_LIBS $opal_check_libfabric_LIBS" + $1_LDFLAGS="[$]$1_LDFLAGS $opal_check_libfabric_LDFLAGS" + + AC_SUBST($1_CPPFLAGS) + AC_SUBST($1_LDFLAGS) + AC_SUBST($1_LIBS) + fi + + AS_IF([test $opal_check_libfabric_happy = yes], [$2], - [AS_IF([test "$opal_want_libfabric" = "yes"], + [AS_IF([test -n "$with_libfabric" && test "$with_libfabric" != "no"], [AC_MSG_WARN([libfabric support requested (via --with-libfabric), but not found.]) AC_MSG_ERROR([Cannot continue.])]) $3]) - - OPAL_VAR_SCOPE_POP ])dnl diff --git a/config/opal_check_offsetof.m4 b/config/opal_check_offsetof.m4 index cf644813f68..fc8c0caeabc 100644 --- a/config/opal_check_offsetof.m4 +++ b/config/opal_check_offsetof.m4 @@ -3,9 +3,9 @@ # Copyright (c) 2009 IBM Corporation. All rights reserved. # Copyright (c) 2009 Cisco Systems, Inc. All rights reserved. # $COPYRIGHT$ -# +# # Additional copyrights may follow -# +# # $HEADER$ # diff --git a/config/opal_check_openfabrics.m4 b/config/opal_check_openfabrics.m4 index 9a2b4cfcc2b..0ead428eb56 100644 --- a/config/opal_check_openfabrics.m4 +++ b/config/opal_check_openfabrics.m4 @@ -6,12 +6,12 @@ # Copyright (c) 2004-2005 The University of Tennessee and The University # of Tennessee Research Foundation. All rights # reserved. -# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, # University of Stuttgart. All rights reserved. # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. -# Copyright (c) 2006-2015 Cisco Systems, Inc. All rights reserved. -# Copyright (c) 2006-2015 Los Alamos National Security, LLC. All rights +# Copyright (c) 2006-2016 Cisco Systems, Inc. All rights reserved. +# Copyright (c) 2006-2016 Los Alamos National Security, LLC. All rights # reserved. # Copyright (c) 2006-2009 Mellanox Technologies. All rights reserved. # Copyright (c) 2010-2012 Oracle and/or its affiliates. All rights reserved. @@ -20,15 +20,15 @@ # Copyright (c) 2014-2015 Research Organization for Information Science # and Technology (RIST). All rights reserved. # $COPYRIGHT$ -# +# # Additional copyrights may follow -# +# # $HEADER$ # # OPAL_CHECK_OPENFABRICS(prefix, [action-if-found], [action-if-not-found]) # -------------------------------------------------------- -# check if OPENIB support can be found. sets prefix_{CPPFLAGS, +# check if OPENIB support can be found. sets prefix_{CPPFLAGS, # LDFLAGS, LIBS} as needed and runs action-if-found if there is # support, otherwise executes action-if-not-found AC_DEFUN([OPAL_CHECK_OPENFABRICS],[ @@ -38,232 +38,265 @@ AC_DEFUN([OPAL_CHECK_OPENFABRICS],[ # verbs stuff lives. AC_REQUIRE([OPAL_CHECK_VERBS_DIR]) - # - # Add padding to OpenIB header - # - AC_ARG_ENABLE([openib-control-hdr-padding], - [AC_HELP_STRING([--enable-openib-control-hdr-padding], - [Add padding bytes to the openib BTL control header (default:disabled)])]) - AC_MSG_CHECKING([if want to add padding to the openib BTL control header]) - if test "$enable_openib_control_hdr_padding" = "yes"; then - AC_MSG_RESULT([yes]) - ompi_openib_pad_hdr=1 - elif test "$enable_openib_control_hdr_padding" = "no"; then - AC_MSG_RESULT([no]) - ompi_openib_pad_hdr=0 - else - # - # Enable padding for SPARC platforms by default because the - # btl will segv otherwise. Keep padding disabled for other - # platforms since there are some performance implications with - # padding on for those plaforms. - # - case "${host}" in - sparc*) - AC_MSG_RESULT([yes (enabled by default on SPARC)]) + if test -z "$opal_check_openib_happy" ; then + # + # Add padding to OpenIB header + # + AC_ARG_ENABLE([openib-control-hdr-padding], + [AC_HELP_STRING([--enable-openib-control-hdr-padding], + [Add padding bytes to the openib BTL control header (default:disabled)])]) + AC_MSG_CHECKING([if want to add padding to the openib BTL control header]) + if test "$enable_openib_control_hdr_padding" = "yes"; then + AC_MSG_RESULT([yes]) ompi_openib_pad_hdr=1 - ;; - *) + elif test "$enable_openib_control_hdr_padding" = "no"; then AC_MSG_RESULT([no]) ompi_openib_pad_hdr=0 - ;; - esac - fi - AC_DEFINE_UNQUOTED([OPAL_OPENIB_PAD_HDR], [$ompi_openib_pad_hdr], - [Add padding bytes to the openib BTL control header]) - - AS_IF([test "$opal_want_verbs" = "no"], - [ompi_check_openib_happy="no"], - [ompi_check_openib_happy="yes"]) - - ompi_check_openib_$1_save_CPPFLAGS="$CPPFLAGS" - ompi_check_openib_$1_save_LDFLAGS="$LDFLAGS" - ompi_check_openib_$1_save_LIBS="$LIBS" - - AS_IF([test "$ompi_check_openib_happy" = "yes"], - [AC_CHECK_HEADERS( - fcntl.h sys/poll.h, - [], - [AC_MSG_WARN([fcntl.h sys/poll.h not found. Can not build component.]) - ompi_check_openib_happy="no"])]) - - AS_IF([test "$ompi_check_openib_happy" = "yes"], - [OPAL_CHECK_PACKAGE([$1], - [infiniband/verbs.h], - [ibverbs], - [ibv_open_device], - [], - [$opal_verbs_dir], - [$opal_verbs_libdir], - [ompi_check_openib_happy="yes"], - [ompi_check_openib_happy="no"])]) - - CPPFLAGS="$CPPFLAGS $$1_CPPFLAGS" - LDFLAGS="$LDFLAGS $$1_LDFLAGS" - LIBS="$LIBS $$1_LIBS" - - AS_IF([test "$ompi_check_openib_happy" = "yes"], - [AC_CACHE_CHECK( - [number of arguments to ibv_create_cq], - [ompi_cv_func_ibv_create_cq_args], - [AC_LINK_IFELSE( - [AC_LANG_PROGRAM( - [[#include ]], - [[ibv_create_cq(NULL, 0, NULL, NULL, 0);]])], - [ompi_cv_func_ibv_create_cq_args=5], - [AC_LINK_IFELSE( - [AC_LANG_PROGRAM( - [[#include ]], - [[ibv_create_cq(NULL, 0, NULL);]])], - [ompi_cv_func_ibv_create_cq_args=3], - [ompi_cv_func_ibv_create_cq_args="unknown"])])]) - AS_IF([test "$ompi_cv_func_ibv_create_cq_args" = "unknown"], - [AC_MSG_WARN([Can not determine number of args to ibv_create_cq.]) - AC_MSG_WARN([Not building component.]) - ompi_check_openib_happy="no"], - [AC_DEFINE_UNQUOTED([OPAL_IBV_CREATE_CQ_ARGS], - [$ompi_cv_func_ibv_create_cq_args], - [Number of arguments to ibv_create_cq])])]) - - # - # OpenIB dynamic SL - # - AC_ARG_ENABLE([openib-dynamic-sl], - [AC_HELP_STRING([--enable-openib-dynamic-sl], - [Enable openib BTL to query Subnet Manager for IB SL (default: enabled)])]) - - # Set these up so that we can do an AC_DEFINE below - # (unconditionally) - $1_have_xrc=0 - $1_have_xrc_domains=0 - $1_have_opensm_devel=0 - - # If we have the openib stuff available, find out what we've got - AS_IF([test "$ompi_check_openib_happy" = "yes"], - [AC_CHECK_DECLS([IBV_EVENT_CLIENT_REREGISTER, IBV_ACCESS_SO, IBV_ATOMIC_HCA], [], [], - [#include ]) - AC_CHECK_FUNCS([ibv_get_device_list ibv_resize_cq]) - - # struct ibv_device.transport_type was added in OFED v1.2 - AC_CHECK_MEMBERS([struct ibv_device.transport_type], [], [], - [#include ]) - - # ibv_create_xrc_rcv_qp was added in OFED 1.3 - # ibv_cmd_open_xrcd (aka XRC Domains) was added in OFED 3.12 - if test "$enable_connectx_xrc" = "yes"; then - $1_have_xrc=1 - AC_CHECK_FUNCS([ibv_create_xrc_rcv_qp ibv_cmd_open_xrcd], - [], [$1_have_xrc=0]) - AC_CHECK_DECLS([IBV_SRQT_XRC], - [], [$1_have_xrc=0]) + else + # + # Enable padding for SPARC platforms by default because the + # btl will segv otherwise. Keep padding disabled for other + # platforms since there are some performance implications with + # padding on for those plaforms. + # + case "${host}" in + sparc*) + AC_MSG_RESULT([yes (enabled by default on SPARC)]) + ompi_openib_pad_hdr=1 + ;; + *) + AC_MSG_RESULT([no]) + ompi_openib_pad_hdr=0 + ;; + esac + fi + AC_DEFINE_UNQUOTED([OPAL_OPENIB_PAD_HDR], [$ompi_openib_pad_hdr], + [Add padding bytes to the openib BTL control header]) + + AS_IF([test "$opal_want_verbs" = "no"], + [opal_check_openib_happy="no"], + [opal_check_openib_happy="yes"]) + + ompi_check_openib_$1_save_CPPFLAGS="$CPPFLAGS" + ompi_check_openib_$1_save_LDFLAGS="$LDFLAGS" + ompi_check_openib_$1_save_LIBS="$LIBS" + + AS_IF([test "$opal_check_openib_happy" = "yes"], + [AC_CHECK_HEADERS( + fcntl.h sys/poll.h, + [], + [AC_MSG_WARN([fcntl.h sys/poll.h not found. Can not build component.]) + opal_check_openib_happy="no"])]) + + AS_IF([test "$opal_check_openib_happy" = "yes"], + [OPAL_CHECK_PACKAGE([opal_check_openib], + [infiniband/verbs.h], + [ibverbs], + [ibv_open_device], + [], + [$opal_verbs_dir], + [$opal_verbs_libdir], + [opal_check_openib_happy="yes"], + [opal_check_openib_happy="no"])]) + + CPPFLAGS="$CPPFLAGS $opal_check_openib_CPPFLAGS" + LDFLAGS="$LDFLAGS $opal_check_openib_LDFLAGS" + LIBS="$LIBS $opal_check_openib_LIBS" + + AS_IF([test "$opal_check_openib_happy" = "yes"], + [AC_CACHE_CHECK( + [number of arguments to ibv_create_cq], + [ompi_cv_func_ibv_create_cq_args], + [AC_LINK_IFELSE( + [AC_LANG_PROGRAM( + [[#include ]], + [[ibv_create_cq(NULL, 0, NULL, NULL, 0);]])], + [ompi_cv_func_ibv_create_cq_args=5], + [AC_LINK_IFELSE( + [AC_LANG_PROGRAM( + [[#include ]], + [[ibv_create_cq(NULL, 0, NULL);]])], + [ompi_cv_func_ibv_create_cq_args=3], + [ompi_cv_func_ibv_create_cq_args="unknown"])])]) + AS_IF([test "$ompi_cv_func_ibv_create_cq_args" = "unknown"], + [AC_MSG_WARN([Can not determine number of args to ibv_create_cq.]) + AC_MSG_WARN([Not building component.]) + opal_check_openib_happy="no"], + [AC_DEFINE_UNQUOTED([OPAL_IBV_CREATE_CQ_ARGS], + [$ompi_cv_func_ibv_create_cq_args], + [Number of arguments to ibv_create_cq])])]) + + # + # OpenIB dynamic SL + # + AC_ARG_ENABLE([openib-dynamic-sl], + [AC_HELP_STRING([--enable-openib-dynamic-sl], + [Enable openib BTL to query Subnet Manager for IB SL (default: enabled)])]) + + # Set these up so that we can do an AC_DEFINE below + # (unconditionally) + opal_check_openib_have_xrc=0 + opal_check_openib_have_xrc_domains=0 + opal_check_openib_have_opensm_devel=0 + + # If we have the openib stuff available, find out what we've got + AS_IF([test "$opal_check_openib_happy" = "yes"], + [AC_CHECK_DECLS([IBV_EVENT_CLIENT_REREGISTER, IBV_ACCESS_SO, IBV_ATOMIC_HCA], [], [], + [#include ]) + AC_CHECK_FUNCS([ibv_get_device_list ibv_resize_cq]) + + # struct ibv_device.transport_type was added in OFED v1.2 + AC_CHECK_MEMBERS([struct ibv_device.transport_type], [], [], + [#include ]) + + # We have to check functions both exits *and* are declared + # since some distro ship broken ibverbs devel headers + # IBV_DEVICE_XRC is common to all OFED versions + # ibv_create_xrc_rcv_qp was added in OFED 1.3 + # ibv_cmd_open_xrcd (aka XRC Domains) was added in OFED 3.12 + if test "$enable_connectx_xrc" = "yes"; then + AC_CHECK_DECLS([IBV_DEVICE_XRC], + [opal_check_openib_have_xrc=1 + opal_check_openib_have_xrc_domains=1], + [], + [#include ]) + fi + if test "$enable_connectx_xrc" = "yes" \ + && test $opal_check_openib_have_xrc -eq 1; then + AC_CHECK_DECLS([ibv_create_xrc_rcv_qp], + [AC_CHECK_FUNCS([ibv_create_xrc_rcv_qp], + [], [opal_check_openib_have_xrc=0])], + [opal_check_openib_have_xrc=0], + [#include ]) + fi + if test "$enable_connectx_xrc" = "yes" \ + && test $opal_check_openib_have_xrc_domains -eq 1; then + AC_CHECK_DECLS([ibv_cmd_open_xrcd], + [AC_CHECK_DECLS([IBV_SRQT_XRC], + [AC_CHECK_FUNCS([ibv_cmd_open_xrcd], + [], [opal_check_openib_have_xrc_domains=0])], + [opal_check_openib_have_xrc_domains=0], + [#include ])], + [opal_check_openib_have_xrc_domains=0], + [#include ]) + # XRC and XRC Domains should be considered as exclusive + if test "$opal_check_openib_have_xrc" -eq 1 && \ + test "$opal_check_openib_have_xrc_domains" -eq 1; then + opal_check_openib_have_xrc=0 + fi + fi + + if test "no" != "$enable_openib_dynamic_sl"; then + # We need ib_types.h file, which is installed with opensm-devel + # package. However, ib_types.h has a bad include directive, + # which will cause AC_CHECK_HEADER to fail. + # So instead, we will look for another file that is also + # installed as part of opensm-devel package and included in + # ib_types.h, but it doesn't include any other IB-related files. + AC_CHECK_HEADER([infiniband/complib/cl_types_osd.h], + [AC_CHECK_LIB([osmcomp], [cl_map_init], + [opal_check_openib_have_opensm_devel=1],[])], + [], + []) + # Abort if dynamic SL support was explicitly requested but opensm-devel + # package wasn't found. Otherwise, OMPI will be built w/o dynamic SL. + AC_MSG_CHECKING([if can use dynamic SL support]) + AS_IF([test "$opal_check_openib_have_opensm_devel" = "1"], + [AC_MSG_RESULT([yes])], + [AC_MSG_RESULT([no]) + AS_IF([test "$enable_openib_dynamic_sl" = "yes"], + [AC_MSG_WARN([--enable-openib-dynamic-sl was specified but the]) + AC_MSG_WARN([appropriate header/library files could not be found]) + AC_MSG_WARN([Please install opensm-devel if you need dynamic SL support]) + AC_MSG_ERROR([Cannot continue])])]) + fi + + + # Check support for RDMAoE devices + $1_have_rdmaoe=0 + AC_CHECK_DECLS([IBV_LINK_LAYER_ETHERNET], + [$1_have_rdmaoe=1], [], [#include ]) - fi - if test "$enable_connectx_xrc" = "yes" \ - && test $$1_have_xrc -eq 1; then - AC_CHECK_FUNCS([ibv_cmd_open_xrcd], [$1_have_xrc_domains=1]) - fi - - if test "no" != "$enable_openib_dynamic_sl"; then - # We need ib_types.h file, which is installed with opensm-devel - # package. However, ib_types.h has a bad include directive, - # which will cause AC_CHECK_HEADER to fail. - # So instead, we will look for another file that is also - # installed as part of opensm-devel package and included in - # ib_types.h, but it doesn't include any other IB-related files. - AC_CHECK_HEADER([infiniband/complib/cl_types_osd.h], - [AC_CHECK_LIB([osmcomp], [cl_map_init], - [$1_have_opensm_devel=1],[])], - [], - []) - # Abort if dynamic SL support was explicitly requested but opensm-devel - # package wasn't found. Otherwise, OMPI will be built w/o dynamic SL. - AC_MSG_CHECKING([if can use dynamic SL support]) - AS_IF([test "$$1_have_opensm_devel" = "1"], - [AC_MSG_RESULT([yes])], - [AC_MSG_RESULT([no]) - AS_IF([test "$enable_openib_dynamic_sl" = "yes"], - [AC_MSG_WARN([--enable-openib-dynamic-sl was specified but the]) - AC_MSG_WARN([appropriate header/library files could not be found]) - AC_MSG_WARN([Please install opensm-devel if you need dynamic SL support]) - AC_MSG_ERROR([Cannot continue])])]) - fi + AC_MSG_CHECKING([if RDMAoE support is enabled]) + AC_DEFINE_UNQUOTED([OPAL_HAVE_RDMAOE], [$$1_have_rdmaoe], [Enable RDMAoE support]) + if test "1" = "$$1_have_rdmaoe"; then + AC_MSG_RESULT([yes]) + else + AC_MSG_RESULT([no]) + fi + + ]) + + # Check to see if works. It is known to + # create problems on some platforms with some compilers (e.g., + # RHEL4U3 with the PGI 32 bit compiler). Use undocumented (in AC + # 2.63) feature of AC_CHECK_HEADERS: if you explicitly pass in + # AC_INCLUDES_DEFAULT as the 4th arg to AC_CHECK_HEADERS, the test + # will fail if the header is present but not compilable, *but it + # will not print the big scary warning*. See + # http://lists.gnu.org/archive/html/autoconf/2008-10/msg00143.html. + AS_IF([test "$opal_check_openib_happy" = "yes"], + [AC_CHECK_HEADERS([infiniband/driver.h], [], [], + [AC_INCLUDES_DEFAULT])]) + + AC_MSG_CHECKING([if ConnectX XRC support is enabled]) + AC_DEFINE_UNQUOTED([OPAL_HAVE_CONNECTX_XRC], [$opal_check_openib_have_xrc], + [Enable features required for ConnectX XRC support]) + if test "1" = "$opal_check_openib_have_xrc"; then + AC_MSG_RESULT([yes]) + else + AC_MSG_RESULT([no]) + fi + + AC_MSG_CHECKING([if ConnectIB XRC support is enabled]) + AC_DEFINE_UNQUOTED([OPAL_HAVE_CONNECTX_XRC_DOMAINS], [$opal_check_openib_have_xrc_domains], + [Enable features required for XRC domains support]) + if test "1" = "$opal_check_openib_have_xrc_domains"; then + AC_MSG_RESULT([yes]) + else + AC_MSG_RESULT([no]) + fi + + AC_MSG_CHECKING([if dynamic SL is enabled]) + AC_DEFINE_UNQUOTED([OPAL_ENABLE_DYNAMIC_SL], [$opal_check_openib_have_opensm_devel], + [Enable features required for dynamic SL support]) + if test "1" = "$opal_check_openib_have_opensm_devel"; then + AC_MSG_RESULT([yes]) + $1_LIBS="-losmcomp $$1_LIBS" + else + AC_MSG_RESULT([no]) + fi + AS_IF([test -z "$opal_verbs_dir"], + [openib_include_dir="/usr/include"], + [openib_include_dir="$opal_verbs_dir/include"]) + opal_check_openib_CPPFLAGS="$opal_check_openib_CPPFLAGS -I$openib_include_dir/infiniband" - # Check support for RDMAoE devices - $1_have_rdmaoe=0 - AC_CHECK_DECLS([IBV_LINK_LAYER_ETHERNET], - [$1_have_rdmaoe=1], [], - [#include ]) + CPPFLAGS="$ompi_check_openib_$1_save_CPPFLAGS" + LDFLAGS="$ompi_check_openib_$1_save_LDFLAGS" + LIBS="$ompi_check_openib_$1_save_LIBS" - AC_MSG_CHECKING([if RDMAoE support is enabled]) - AC_DEFINE_UNQUOTED([OPAL_HAVE_RDMAOE], [$$1_have_rdmaoe], [Enable RDMAoE support]) - if test "1" = "$$1_have_rdmaoe"; then - AC_MSG_RESULT([yes]) - else - AC_MSG_RESULT([no]) - fi + OPAL_SUMMARY_ADD([[Transports]],[[OpenFabrics Verbs]],[$1],[$opal_check_openib_happy]) - ]) - - # Check to see if works. It is known to - # create problems on some platforms with some compilers (e.g., - # RHEL4U3 with the PGI 32 bit compiler). Use undocumented (in AC - # 2.63) feature of AC_CHECK_HEADERS: if you explicitly pass in - # AC_INCLUDES_DEFAULT as the 4th arg to AC_CHECK_HEADERS, the test - # will fail if the header is present but not compilable, *but it - # will not print the big scary warning*. See - # http://lists.gnu.org/archive/html/autoconf/2008-10/msg00143.html. - AS_IF([test "$ompi_check_openib_happy" = "yes"], - [AC_CHECK_HEADERS([infiniband/driver.h], [], [], - [AC_INCLUDES_DEFAULT])]) - - AC_MSG_CHECKING([if ConnectX XRC support is enabled]) - AC_DEFINE_UNQUOTED([OPAL_HAVE_CONNECTX_XRC], [$$1_have_xrc], - [Enable features required for ConnectX XRC support]) - if test "1" = "$$1_have_xrc"; then - AC_MSG_RESULT([yes]) - else - AC_MSG_RESULT([no]) + OPAL_VAR_SCOPE_POP fi - AC_MSG_CHECKING([if ConnectIB XRC support is enabled]) - AC_DEFINE_UNQUOTED([OPAL_HAVE_CONNECTX_XRC_DOMAINS], [$$1_have_xrc_domains], - [Enable features required for XRC domains support]) - if test "1" = "$$1_have_xrc_domains"; then - AC_MSG_RESULT([yes]) - else - AC_MSG_RESULT([no]) - fi + $1_have_xrc=$opal_check_openib_have_xrc + $1_have_xrc_domains=$opal_check_openib_have_xrc_domains + $1_have_opensm_devel=$opal_check_openib_have_opensm_devel - AC_MSG_CHECKING([if dynamic SL is enabled]) - AC_DEFINE_UNQUOTED([OPAL_ENABLE_DYNAMIC_SL], [$$1_have_opensm_devel], - [Enable features required for dynamic SL support]) - if test "1" = "$$1_have_opensm_devel"; then - AC_MSG_RESULT([yes]) - $1_LIBS="-losmcomp $$1_LIBS" - else - AC_MSG_RESULT([no]) - fi - - AS_IF([test -z "$opal_verbs_dir"], - [openib_include_dir="/usr/include"], - [openib_include_dir="$opal_verbs_dir/include"]) - $1_CPPFLAGS="$$1_CPPFLAGS -I$openib_include_dir/infiniband" - - CPPFLAGS="$ompi_check_openib_$1_save_CPPFLAGS" - LDFLAGS="$ompi_check_openib_$1_save_LDFLAGS" - LIBS="$ompi_check_openib_$1_save_LIBS" - - AS_IF([test "$ompi_check_openib_happy" = "yes"], - [$2], + AS_IF([test "$opal_check_openib_happy" = "yes"], + [$1_CPPFLAGS="[$]$1_CPPFLAGS $opal_check_openib_CPPFLAGS" + $1_LDFLAGS="[$]$1_LDFLAGS $opal_check_openib_LDFLAGS" + $1_LIBS="[$]$1_LIBS $opal_check_openib_LIBS" + $2], [AS_IF([test "$opal_want_verbs" = "yes"], [AC_MSG_WARN([Verbs support requested (via --with-verbs) but not found.]) AC_MSG_WARN([If you are using libibverbs v1.0 (i.e., OFED v1.0 or v1.1), you *MUST* have both the libsysfs headers and libraries installed. Later versions of libibverbs do not require libsysfs.]) AC_MSG_ERROR([Aborting.])]) $3]) - OPAL_VAR_SCOPE_POP ]) AC_DEFUN([OPAL_CHECK_OPENFABRICS_CM_ARGS],[ @@ -279,7 +312,7 @@ AC_DEFUN([OPAL_CHECK_OPENFABRICS_CM_ARGS],[ # AC_ARG_ENABLE([openib-udcm], [AC_HELP_STRING([--enable-openib-udcm], - [Enable datagram connection support in openib BTL (default: enabled)])], + [Enable datagram connection support in openib BTL (default: enabled)])], [enable_openib_udcm="$enableval"], [enable_openib_udcm="yes"]) # Per discussion with Ralph and Nathan, disable UDCM for now. # It's borked and needs some surgery to get back on its feet. @@ -311,7 +344,7 @@ AC_DEFUN([OPAL_CHECK_OPENFABRICS_CM],[ LDFLAGS="$LDFLAGS $$1_LDFLAGS" LIBS="$LIBS $$1_LIBS" - AS_IF([test "$ompi_check_openib_happy" = "yes"], + AS_IF([test "$opal_check_openib_happy" = "yes"], [# Do we have a recent enough RDMA CM? Need to have the # rdma_get_peer_addr (inline) function (originally appeared # in OFED v1.3). @@ -322,7 +355,7 @@ AC_DEFUN([OPAL_CHECK_OPENFABRICS_CM],[ $1_msg=no AC_LINK_IFELSE([AC_LANG_PROGRAM([[#include "rdma/rdma_cma.h" ]], [[void *ret = (void*) rdma_get_peer_addr((struct rdma_cm_id*)0);]])], - [$1_have_rdmacm=1 + [$1_have_rdmacm=1 $1_msg=yes]) AC_MSG_RESULT([$$1_msg])])]) @@ -366,11 +399,30 @@ AC_DEFUN([OPAL_CHECK_OPENFABRICS_CM],[ fi ])dnl +AC_DEFUN([OPAL_CHECK_EXP_VERBS],[ + OPAL_VAR_SCOPE_PUSH([have_struct_ibv_exp_send_wr]) + + AC_MSG_CHECKING([whether expanded verbs are available]) + AC_TRY_COMPILE([#include ], [struct ibv_exp_send_wr;], + [have_struct_ibv_exp_send_wr=1 + AC_MSG_RESULT([yes])], + [have_struct_ibv_exp_send_wr=0 + AC_MSG_RESULT([no])]) + + AC_DEFINE_UNQUOTED([HAVE_EXP_VERBS], [$have_struct_ibv_exp_send_wr], [Experimental verbs]) + AC_CHECK_DECLS([IBV_EXP_ATOMIC_HCA_REPLY_BE, IBV_EXP_QP_CREATE_ATOMIC_BE_REPLY, ibv_exp_create_qp, ibv_exp_query_device, IBV_EXP_QP_INIT_ATTR_ATOMICS_ARG], + [], [], [#include ]) + AC_CHECK_MEMBERS([struct ibv_exp_device_attr.ext_atom, struct ibv_exp_device_attr.ext_atomic_cap], [], [], + [[#include ]]) +AS_IF([test '$have_struct_ibv_exp_send_wr' = 1], [$1], [$2]) + OPAL_VAR_SCOPE_POP +])dnl + AC_DEFUN([OPAL_CHECK_MLNX_OPENFABRICS],[ $1_have_mverbs=0 $1_have_mqe=0 - AS_IF([test "$ompi_check_openib_happy" = "yes"], + AS_IF([test "$opal_check_openib_happy" = "yes"], [OPAL_CHECK_PACKAGE([$1], [infiniband/mverbs.h], [mverbs], @@ -381,7 +433,7 @@ AC_DEFUN([OPAL_CHECK_MLNX_OPENFABRICS],[ [$1_have_mverbs=1], [])]) - AS_IF([test "$ompi_check_openib_happy" = "yes"], + AS_IF([test "$opal_check_openib_happy" = "yes"], [OPAL_CHECK_PACKAGE([$1], [infiniband/mqe.h], [mqe], @@ -410,11 +462,11 @@ AC_DEFUN([OPAL_CHECK_MLNX_OPENFABRICS],[ fi AS_IF([test "1" = "$$1_have_mverbs"], - [AC_CHECK_DECLS([IBV_M_WR_CALC_RDMA_WRITE_WITH_IMM], + [AC_CHECK_DECLS([IBV_M_WR_CALC_RDMA_WRITE_WITH_IMM], [AC_DEFINE_UNQUOTED([OPAL_HAVE_IBOFFLOAD_CALC_RDMA], [1], - [Whether IBV_M_WR_CALC_SEND is defined or not])], + [Whether IBV_M_WR_CALC_SEND is defined or not])], [AC_DEFINE_UNQUOTED([OPAL_HAVE_IBOFFLOAD_CALC_RDMA], [0], - [Whether IBV_M_WR_CALC_SEND is defined or not])], + [Whether IBV_M_WR_CALC_SEND is defined or not])], [#include ])]) # restoring the CPPFLAGS diff --git a/config/opal_check_os_flavors.m4 b/config/opal_check_os_flavors.m4 index d1d124d7eb8..e8eaba112e9 100644 --- a/config/opal_check_os_flavors.m4 +++ b/config/opal_check_os_flavors.m4 @@ -59,9 +59,9 @@ AC_DEFUN([OPAL_CHECK_OS_FLAVORS], # check for sockaddr_in (a good sign we have TCP) AC_CHECK_HEADERS([netdb.h netinet/in.h netinet/tcp.h]) - AC_CHECK_TYPES([struct sockaddr_in], + AC_CHECK_TYPES([struct sockaddr_in], [opal_found_sockaddr=yes], - [opal_found_sockaddr=no], + [opal_found_sockaddr=no], [AC_INCLUDES_DEFAULT #ifdef HAVE_NETINET_IN_H #include diff --git a/config/opal_check_package.m4 b/config/opal_check_package.m4 index 8e3f83d46b2..9bbe0a6d00a 100644 --- a/config/opal_check_package.m4 +++ b/config/opal_check_package.m4 @@ -13,6 +13,8 @@ dnl All rights reserved. dnl Copyright (c) 2012-2015 Cisco Systems, Inc. All rights reserved. dnl Copyright (c) 2012 Oracle and/or its affiliates. All rights reserved. dnl Copyright (c) 2014 Intel, Inc. All rights reserved. +dnl Copyright (c) 2016 Research Organization for Information Science +dnl and Technology (RIST). All rights reserved. dnl $COPYRIGHT$ dnl dnl Additional copyrights may follow @@ -29,15 +31,18 @@ AC_DEFUN([_OPAL_CHECK_PACKAGE_HEADER], [ # cache variable for the library check. one should not copy this # code into other places unless you want much pain and suffering AS_VAR_PUSHDEF([opal_Header], [ac_cv_header_$2]) + OPAL_VAR_SCOPE_PUSH([dir_prefix]) # so this sucks, but there's no way to get through the progression # of header includes without killing off the cache variable and trying # again... unset opal_Header + # get rid of the trailing slash(es) + dir_prefix=$(echo $3 | sed -e 'sX/*$XXg') opal_check_package_header_happy="no" - AS_IF([test "$3" = "/usr" || \ - test "$3" = "/usr/local"], + AS_IF([test "$dir_prefix" = "/usr" || \ + test "$dir_prefix" = "/usr/local"], [ # try as is... AC_VERBOSE([looking for header without includes]) AC_CHECK_HEADERS([$2], [opal_check_package_header_happy="yes"], []) @@ -46,14 +51,15 @@ AC_DEFUN([_OPAL_CHECK_PACKAGE_HEADER], [ unset opal_Header])]) AS_IF([test "$opal_check_package_header_happy" = "no"], - [AS_IF([test "$3" != ""], - [$1_CPPFLAGS="$$1_CPPFLAGS -I$3/include" - CPPFLAGS="$CPPFLAGS -I$3/include"]) + [AS_IF([test "$dir_prefix" != ""], + [$1_CPPFLAGS="$$1_CPPFLAGS -I$dir_prefix/include" + CPPFLAGS="$CPPFLAGS -I$dir_prefix/include"]) AC_CHECK_HEADERS([$2], [opal_check_package_header_happy="yes"], [], [$6]) AS_IF([test "$opal_check_package_header_happy" = "yes"], [$4], [$5])], [$4]) unset opal_check_package_header_happy + OPAL_VAR_SCOPE_POP([dir_prefix]) AS_VAR_POPDEF([opal_Header])dnl ]) diff --git a/config/opal_check_pmi.m4 b/config/opal_check_pmi.m4 index 9112eaf9b07..bb1df6453cf 100644 --- a/config/opal_check_pmi.m4 +++ b/config/opal_check_pmi.m4 @@ -13,9 +13,10 @@ # Copyright (c) 2009-2015 Cisco Systems, Inc. All rights reserved. # Copyright (c) 2011-2014 Los Alamos National Security, LLC. All rights # reserved. -# Copyright (c) 2014 Intel, Inc. All rights reserved. -# Copyright (c) 2014 Research Organization for Information Science +# Copyright (c) 2014-2016 Intel, Inc. All rights reserved. +# Copyright (c) 2014-2016 Research Organization for Information Science # and Technology (RIST). All rights reserved. +# Copyright (c) 2016 IBM Corporation. All rights reserved. # $COPYRIGHT$ # # Additional copyrights may follow @@ -41,7 +42,7 @@ AC_DEFUN([OPAL_CHECK_PMI_LIB], AC_MSG_CHECKING([for $3.h in $1/include]) AS_IF([test -f $1/include/$3.h], [AC_MSG_RESULT([found]) - opal_check_$3_mycppflags="-I$3/include"], + opal_check_$3_mycppflags="-I$1/include"], [AC_MSG_RESULT([not found]) AC_MSG_CHECKING([for $3.h in $1/include/slurm]) AS_IF([test -f $1/include/slurm/$3.h], @@ -76,7 +77,7 @@ AC_DEFUN([OPAL_CHECK_PMI_LIB], [opal_check_$3_lib_happy=no])], [opal_check_$3_lib_happy=no AC_MSG_RESULT([not found])]) - + # check for presence of lib64 directory - if found, see if the # desired library is present and matches our build requirements files=`ls $2/lib64/lib$3.* 2> /dev/null | wc -l` @@ -195,12 +196,12 @@ AC_DEFUN([OPAL_CHECK_PMI],[ AS_IF([test "$opal_enable_pmi2" = "yes"], [AS_IF([test "$default_pmi_loc" = "no" || test "$slurm_pmi_found" = "yes"], - [opal_pmi2_CPPFLAGS="$pmi_CPPFLAGS" + [opal_pmi2_CPPFLAGS="$pmi2_CPPFLAGS" AC_SUBST(opal_pmi2_CPPFLAGS)]) AS_IF([test "$default_pmi_libloc" = "no" || test "$slurm_pmi_found" = "yes"], - [opal_pmi2_LDFLAGS="$pmi_LDFLAGS" + [opal_pmi2_LDFLAGS="$pmi2_LDFLAGS" AC_SUBST(opal_pmi2_LDFLAGS) - opal_pmi2_rpath="$pmi_rpath" + opal_pmi2_rpath="$pmi2_rpath" AC_SUBST(opal_pmi2_rpath)])]) # since support was explicitly requested, then we should error out @@ -224,3 +225,110 @@ AC_DEFUN([OPAL_CHECK_PMI],[ OPAL_VAR_SCOPE_POP ]) +AC_DEFUN([OPAL_CHECK_PMIX],[ + + OPAL_VAR_SCOPE_PUSH([opal_external_pmix_save_CPPFLAGS opal_external_pmix_save_LDFLAGS opal_external_pmix_save_LIBS]) + + AC_ARG_WITH([pmix], + [AC_HELP_STRING([--with-pmix(=DIR)], + [Build PMIx support. DIR can take one of three values: "internal", "external", or a valid directory name. "internal" (or no DIR value) forces Open MPI to use its internal copy of PMIx. "external" forces Open MPI to use an external installation of PMIx. Supplying a valid directory name also forces Open MPI to use an external installation of PMIx, and adds DIR/include, DIR/lib, and DIR/lib64 to the search path for headers and libraries. Note that Open MPI does not support --without-pmix.])]) + + AS_IF([test "$with_pmix" = "no"], + [AC_MSG_WARN([Open MPI requires PMIx support. It can be built]) + AC_MSG_WARN([with either its own internal copy of PMIx, or with]) + AC_MSG_WARN([an external copy that you supply.]) + AC_MSG_ERROR([Cannot continue])]) + + AC_MSG_CHECKING([if user requested external PMIx support($with_pmix)]) + AS_IF([test -z "$with_pmix" || test "$with_pmix" = "yes" || test "$with_pmix" = "internal"], + [AC_MSG_RESULT([no]) + opal_external_pmix_happy=no], + + [AC_MSG_RESULT([yes]) + # check for external pmix lib */ + AS_IF([test "$with_pmix" = "external"], + [pmix_ext_install_dir=/usr], + [pmix_ext_install_dir=$with_pmix]) + + # Make sure we have the headers and libs in the correct location + OPAL_CHECK_WITHDIR([external-pmix], [$pmix_ext_install_dir/include], [pmix.h]) + OPAL_CHECK_WITHDIR([external-libpmix], [$pmix_ext_install_dir/lib], [libpmix.*]) + + # check the version + opal_external_pmix_save_CPPFLAGS=$CPPFLAGS + opal_external_pmix_save_LDFLAGS=$LDFLAGS + opal_external_pmix_save_LIBS=$LIBS + + # if the pmix_version.h file does not exist, then + # this must be from a pre-1.1.5 version + AC_MSG_CHECKING([PMIx version]) + CPPFLAGS="-I$pmix_ext_install_dir/include $CPPFLAGS" + AS_IF([test "x`ls $pmix_ext_install_dir/include/pmix_version.h 2> /dev/null`" = "x"], + [AC_MSG_RESULT([version file not found - assuming v1.1.4]) + opal_external_pmix_version_found=1 + opal_external_pmix_version=114], + [AC_MSG_RESULT([version file found]) + opal_external_pmix_version_found=0]) + + # if it does exist, then we need to parse it to find + # the actual release series + AS_IF([test "$opal_external_pmix_version_found" = "0"], + [AC_MSG_CHECKING([version 3x]) + AC_PREPROC_IFELSE([AC_LANG_PROGRAM([ + #include + #if (PMIX_VERSION_MAJOR != 3L) + #error "not version 3" + #endif + ], [])], + [AC_MSG_RESULT([found]) + opal_external_pmix_version=3X + opal_external_pmix_version_found=1 + AC_MSG_WARN([This version of Open MPI does not support PMIx version 2.x and later]) + AC_MSG_ERROR([Cannot continue])], + [AC_MSG_RESULT([not found])])]) + + AS_IF([test "$opal_external_pmix_version_found" = "0"], + [AC_MSG_CHECKING([version 2x]) + AC_PREPROC_IFELSE([AC_LANG_PROGRAM([ + #include + #if (PMIX_VERSION_MAJOR != 2L) + #error "not version 2" + #endif + ], [])], + [AC_MSG_RESULT([found]) + opal_external_pmix_version=2X + opal_external_pmix_version_found=1 + AC_MSG_WARN([This version of Open MPI does not support PMIx version 2.x and later]) + AC_MSG_ERROR([Cannot continue])], + [AC_MSG_RESULT([not found])])]) + + AS_IF([test "$opal_external_pmix_version_found" = "0"], + [AC_MSG_CHECKING([version 1x]) + AC_PREPROC_IFELSE([AC_LANG_PROGRAM([ + #include + #if (PMIX_VERSION_MAJOR != 1L) + #error "not version 1" + #endif + ], [])], + [AC_MSG_RESULT([found]) + opal_external_pmix_version=1X + opal_external_pmix_version_found=1], + [AC_MSG_RESULT([not found])])]) + + AS_IF([test "x$opal_external_pmix_version" = "x"], + [AC_MSG_WARN([External PMIx support requested, but version]) + AC_MSG_WARN([information of the external lib could not]) + AC_MSG_WARN([be detected]) + AC_MSG_ERROR([cannot continue])]) + + CPPFLAGS=$opal_external_pmix_save_CPPFLAGS + LDFLAGS=$opal_external_pmix_save_LDFLAGS + LIBS=$opal_external_pmix_save_LIBS + + opal_external_pmix_CPPFLAGS="-I$pmix_ext_install_dir/include" + opal_external_pmix_LDFLAGS=-L$pmix_ext_install_dir/lib + opal_external_pmix_LIBS=-lpmix + opal_external_pmix_happy=yes]) + + OPAL_VAR_SCOPE_POP +]) diff --git a/config/opal_check_portals4.m4 b/config/opal_check_portals4.m4 index d8a27ffe8ef..086bb3f665d 100644 --- a/config/opal_check_portals4.m4 +++ b/config/opal_check_portals4.m4 @@ -11,9 +11,11 @@ dnl University of Stuttgart. All rights reserved. dnl Copyright (c) 2004-2006 The Regents of the University of California. dnl All rights reserved. dnl Copyright (c) 2006 QLogic Corp. All rights reserved. -dnl Copyright (c) 2009 Cisco Systems, Inc. All rights reserved. +dnl Copyright (c) 2009-2016 Cisco Systems, Inc. All rights reserved. dnl Copyright (c) 2015 Research Organization for Information Science dnl and Technology (RIST). All rights reserved. +dnl Copyright (c) 2016 Los Alamos National Security, LLC. All rights +dnl reserved. dnl $COPYRIGHT$ dnl dnl Additional copyrights may follow @@ -27,72 +29,79 @@ dnl # LDFLAGS, LIBS} as needed and runs action-if-found if there is # support, otherwise executes action-if-not-found AC_DEFUN([OPAL_CHECK_PORTALS4],[ - AC_ARG_WITH([portals4], - [AC_HELP_STRING([--with-portals4(=DIR)], - [Build Portals4 support, optionally adding DIR/include, DIR/lib, and DIR/lib64 to the search path for headers and libraries])]) - OPAL_CHECK_WITHDIR([portals4], [$with_portals4], [include/portals4.h]) - AC_ARG_WITH([portals4-libdir], - [AC_HELP_STRING([--with-portals4-libdir=DIR], - [Search for Portals4 libraries in DIR])]) - OPAL_CHECK_WITHDIR([portals4-libdir], [$with_portals4_libdir], [libportals.*]) + if test -z "$ompi_check_portals4_happy" ; then + AC_ARG_WITH([portals4], + [AC_HELP_STRING([--with-portals4(=DIR)], + [Build Portals4 support, optionally adding DIR/include, DIR/lib, and DIR/lib64 to the search path for headers and libraries])]) + OPAL_CHECK_WITHDIR([portals4], [$with_portals4], [include/portals4.h]) + AC_ARG_WITH([portals4-libdir], + [AC_HELP_STRING([--with-portals4-libdir=DIR], + [Search for Portals4 libraries in DIR])]) + OPAL_CHECK_WITHDIR([portals4-libdir], [$with_portals4_libdir], [libportals.*]) - ompi_check_portals4_$1_save_CPPFLAGS="$CPPFLAGS" - ompi_check_portals4_$1_save_LDFLAGS="$LDFLAGS" - ompi_check_portals4_$1_save_LIBS="$LIBS" + ompi_check_portals4_$1_save_CPPFLAGS="$CPPFLAGS" + ompi_check_portals4_$1_save_LDFLAGS="$LDFLAGS" + ompi_check_portals4_$1_save_LIBS="$LIBS" - AS_IF([test "$with_portals4" != "no"], - [AS_IF([test ! -z "$with_portals4" && test "$with_portals4" != "yes"], - [ompi_check_portals4_dir="$with_portals4"]) - AS_IF([test ! -z "$with_portals4_libdir" && test "$with_portals4_libdir" != "yes"], - [ompi_check_portals4_libdir="$with_portals4_libdir"]) + AS_IF([test "$with_portals4" != "no"], + [AS_IF([test ! -z "$with_portals4" && test "$with_portals4" != "yes"], + [ompi_check_portals4_dir="$with_portals4"]) + AS_IF([test ! -z "$with_portals4_libdir" && test "$with_portals4_libdir" != "yes"], + [ompi_check_portals4_libdir="$with_portals4_libdir"]) - OPAL_CHECK_PACKAGE([$1], - [portals4.h], - [portals], - [PtlLEAppend], - [], - [$ompi_check_portals4_dir], - [$ompi_check_portals4_libdir], - [ompi_check_portals4_happy="yes"], - [ompi_check_portals4_happy="no"])], - [ompi_check_portals4_happy="no"]) + OPAL_CHECK_PACKAGE([opal_check_portals4], + [portals4.h], + [portals], + [PtlLEAppend], + [], + [$ompi_check_portals4_dir], + [$ompi_check_portals4_libdir], + [ompi_check_portals4_happy="yes"], + [ompi_check_portals4_happy="no"])], + [ompi_check_portals4_happy="no"]) - CPPFLAGS="$ompi_check_portals4_$1_save_CPPFLAGS" - LDFLAGS="$ompi_check_portals4_$1_save_LDFLAGS" - LIBS="$ompi_check_portals4_$1_save_LIBS" + CPPFLAGS="$ompi_check_portals4_$1_save_CPPFLAGS" + LDFLAGS="$ompi_check_portals4_$1_save_LDFLAGS" + LIBS="$ompi_check_portals4_$1_save_LIBS" - max_md_size=0 - AC_ARG_WITH([portals4-max-md-size], - [AC_HELP_STRING([--with-portals4-max-md-size=SIZE], - [Log base 2 of the maximum size in bytes of a memory descriptor. Should only be set for implementations which do not support binding all of virtual address space.])]) - AS_IF([test "$with_portals4_max_md_size" = "yes" || test "$with_portals4_max_md_size" = "no"], - [AC_MSG_ERROR([--with-portals4-max-md-size requires an integer argument])], - [AS_IF([test -n "$with_portals4_max_md_size"], - [max_md_size="$with_portals4_max_md_size"])]) - AC_DEFINE_UNQUOTED([OPAL_PORTALS4_MAX_MD_SIZE], [$max_md_size], - [Log base 2 of the maximum size in bytes of a memory descriptor. Set to 0 if MD can bind all of memory.]) + max_md_size=0 + AC_ARG_WITH([portals4-max-md-size], + [AC_HELP_STRING([--with-portals4-max-md-size=SIZE], + [Log base 2 of the maximum size in bytes of a memory descriptor. Should only be set for implementations which do not support binding all of virtual address space.])]) + AS_IF([test "$with_portals4_max_md_size" = "yes" || test "$with_portals4_max_md_size" = "no"], + [AC_MSG_ERROR([--with-portals4-max-md-size requires an integer argument])], + [AS_IF([test -n "$with_portals4_max_md_size"], + [max_md_size="$with_portals4_max_md_size"])]) + AC_DEFINE_UNQUOTED([OPAL_PORTALS4_MAX_MD_SIZE], [$max_md_size], + [Log base 2 of the maximum size in bytes of a memory descriptor. Set to 0 if MD can bind all of memory.]) - max_va_size=0 - AC_ARG_WITH([portals4-max-va-size], - [AC_HELP_STRING([--with-portals4-max-va-size=SIZE], - [Log base 2 of the maximum size in bytes of the user virtual address space. Should only be set for implementations which do not support binding all of virtual address space.])]) - AS_IF([test "$with_portals4_max_va_size" = "yes" || test "$with_portals4_max_va_size" = "no"], - [AC_MSG_ERROR([--with-portals4-max-va-size requires an integer argument])], - [AS_IF([test -n "$with_portals4_max_va_size"], - [max_va_size="$with_portals4_max_va_size"])]) - AC_DEFINE_UNQUOTED([OPAL_PORTALS4_MAX_VA_SIZE], [$max_va_size], - [Log base 2 of the maximum size in bytes of the user virtual address space. Set to 0 if MD can bind all of memory.]) + max_va_size=0 + AC_ARG_WITH([portals4-max-va-size], + [AC_HELP_STRING([--with-portals4-max-va-size=SIZE], + [Log base 2 of the maximum size in bytes of the user virtual address space. Should only be set for implementations which do not support binding all of virtual address space.])]) + AS_IF([test "$with_portals4_max_va_size" = "yes" || test "$with_portals4_max_va_size" = "no"], + [AC_MSG_ERROR([--with-portals4-max-va-size requires an integer argument])], + [AS_IF([test -n "$with_portals4_max_va_size"], + [max_va_size="$with_portals4_max_va_size"])]) + AC_DEFINE_UNQUOTED([OPAL_PORTALS4_MAX_VA_SIZE], [$max_va_size], + [Log base 2 of the maximum size in bytes of the user virtual address space. Set to 0 if MD can bind all of memory.]) - AS_IF([(test $max_md_size -eq 0 && test $max_va_size -ne 0 ) || (test $max_md_size -ne 0 && test $max_va_size -eq 0 )], + AS_IF([(test $max_md_size -eq 0 && test $max_va_size -ne 0 ) || (test $max_md_size -ne 0 && test $max_va_size -eq 0 )], [AC_ERROR([If either --with-portals4-max-md-size or --with-portals4-max-va-size is set, both must be set.])]) - AS_IF([test $max_md_size -ge $max_va_size], - [max_md_size=0 - max_va_size=0]) - AS_IF([test $max_md_size -ne 0 && test $max_va_size -ne 0], - [AC_MSG_NOTICE([Portals 4 address space size: $max_md_size, $max_va_size])]) + AS_IF([test $max_md_size -ge $max_va_size], + [max_md_size=0 + max_va_size=0]) + AS_IF([test $max_md_size -ne 0 && test $max_va_size -ne 0], + [AC_MSG_NOTICE([Portals 4 address space size: $max_md_size, $max_va_size])]) + + OPAL_SUMMARY_ADD([[Transports]],[[Portals4]],[$1],[$ompi_check_portals4_happy]) + fi AS_IF([test "$ompi_check_portals4_happy" = "yes"], - [$2], + [$1_LDFLAGS="[$]$1_LDFLAGS $opal_check_portals4_LDFLAGS" + $1_CPPFLAGS="[$]$1_CPPFLAGS $opal_check_portals4_CPPFLAGS" + $1_LIBS="[$]$1_LIBS $opal_check_portals4_LIBS" + $2], [AS_IF([test ! -z "$with_portals4" && test "$with_portals4" != "no"], [AC_MSG_ERROR([Portals4 support requested but not found. Aborting])]) $3]) diff --git a/config/opal_check_ps.m4 b/config/opal_check_ps.m4 index 014cdfec9d2..8a47b1c8343 100644 --- a/config/opal_check_ps.m4 +++ b/config/opal_check_ps.m4 @@ -6,17 +6,18 @@ dnl Corporation. All rights reserved. dnl Copyright (c) 2004-2005 The University of Tennessee and The University dnl of Tennessee Research Foundation. All rights dnl reserved. -dnl Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +dnl Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, dnl University of Stuttgart. All rights reserved. dnl Copyright (c) 2004-2005 The Regents of the University of California. dnl All rights reserved. -dnl Copyright (c) 2007 Sun Microsystems, Inc. All rights reserved. +dnl Copyright (c) 2007 Sun Microsystems, Inc. All rights reserved. dnl Copyright (c) 2008 Cisco Systems, Inc. All rights reserved. dnl Copyright (c) 2014 Intel, Inc. All rights reserved. +dnl Copyright (c) 2017 UT-Battelle, LLC. All rights reserved. dnl $COPYRIGHT$ -dnl +dnl dnl Additional copyrights may follow -dnl +dnl dnl $HEADER$ dnl @@ -30,11 +31,11 @@ PS_FLAVOR="unknown" ps -A -o fname > /dev/null 2>&1 if test "$?" = "0"; then - PS_FLAVOR="ps -A -o fname,pid,user" + PS_FLAVOR="ps -A -o fname,pid,uid" else ps -A -o command > /dev/null 2>&1 if test "$?" = "0"; then - PS_FLAVOR="ps -A -o command,pid,user" + PS_FLAVOR="ps -A -o command,pid,uid" fi fi AC_MSG_RESULT([$PS_FLAVOR]) diff --git a/config/opal_check_pthread_pids.m4 b/config/opal_check_pthread_pids.m4 index 513e2cd6a71..cb3b20a85e5 100644 --- a/config/opal_check_pthread_pids.m4 +++ b/config/opal_check_pthread_pids.m4 @@ -5,15 +5,15 @@ dnl Corporation. All rights reserved. dnl Copyright (c) 2004-2005 The University of Tennessee and The University dnl of Tennessee Research Foundation. All rights dnl reserved. -dnl Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +dnl Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, dnl University of Stuttgart. All rights reserved. dnl Copyright (c) 2004-2005 The Regents of the University of California. dnl All rights reserved. dnl Copyright (c) 2008-2013 Cisco Systems, Inc. All rights reserved. dnl $COPYRIGHT$ -dnl +dnl dnl Additional copyrights may follow -dnl +dnl dnl $HEADER$ dnl @@ -65,8 +65,8 @@ void *checkpid(void *arg) { else ret = 1; pthread_exit((void *) &ret); -}])], -[MSG=no OPAL_THREADS_HAVE_DIFFERENT_PIDS=0], +}])], +[MSG=no OPAL_THREADS_HAVE_DIFFERENT_PIDS=0], [MSG=yes OPAL_THREADS_HAVE_DIFFERENT_PIDS=1], [ # If we're cross compiling, we can't do another AC_* function here beause @@ -99,7 +99,7 @@ AS_IF([test "$OPAL_THREADS_HAVE_DIFFERENT_PIDS" = "1"], [AC_MSG_WARN([This version of Open MPI only supports environments where]) AC_MSG_WARN([threads have the same PID. Please use an older version of]) AC_MSG_WARN([Open MPI if you need support on systems with different]) - AC_MSG_WARN([PIDs for threads in the same process. Open MPI 1.4.x]) + AC_MSG_WARN([PIDs for threads in the same process. Open MPI 1.4.x]) AC_MSG_WARN([supports such systems, as does at least some versions the]) AC_MSG_WARN([Open MPI 1.5.x series.]) AC_MSG_ERROR([Cannot continue]) diff --git a/config/opal_check_ugni.m4 b/config/opal_check_ugni.m4 index 6acc7fa74b3..6ae8bc25d6e 100644 --- a/config/opal_check_ugni.m4 +++ b/config/opal_check_ugni.m4 @@ -1,4 +1,4 @@ -dnl -*- Mode: Shell ; indent-tabs-mode:nil -*- +dnl -*- Mode: Shell-script ; indent-tabs-mode:nil -*- dnl dnl Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana dnl University Research and Technology @@ -11,9 +11,9 @@ dnl University of Stuttgart. All rights reserved. dnl Copyright (c) 2004-2006 The Regents of the University of California. dnl All rights reserved. dnl Copyright (c) 2006 QLogic Corp. All rights reserved. -dnl Copyright (c) 2009 Cisco Systems, Inc. All rights reserved. -dnl Copyright (c) 2011-2014 Los Alamos National Security, LLC. -dnl All rights reserved. +dnl Copyright (c) 2009-2016 Cisco Systems, Inc. All rights reserved. +dnl Copyright (c) 2011-2016 Los Alamos National Security, LLC. All rights +dnl reserved. dnl Copyright (c) 2014 Intel, Inc. All rights reserved dnl Copyright (c) 2015 Research Organization for Information Science dnl and Technology (RIST). All rights reserved. @@ -32,25 +32,24 @@ dnl # AC_DEFUN([OPAL_CHECK_UGNI], [ - AC_ARG_WITH([ugni], [AC_HELP_STRING([--with-ugni], - [Build support for Cray GNI. Set PKG_CONFIG_PATH env. variable to specify alternate path.])]) + if test -z "$opal_check_ugni_happy" ; then + AC_ARG_WITH([ugni], [AC_HELP_STRING([--with-ugni], + [Build support for Cray GNI. Set PKG_CONFIG_PATH env. variable to specify alternate path.])]) - opal_check_ugni_happy="no" + opal_check_ugni_happy="no" - AS_IF([test "$with_ugni" = "no"], - [opal_check_ugni_happy="no"], - [PKG_CHECK_MODULES([CRAY_UGNI], [cray-ugni], - [$1_LDFLAGS="$CRAY_UGNI_LIBS" - $1_CPPFLAGS="$CRAY_UGNI_CFLAGS" - opal_check_ugni_happy="yes"], - [ opal_check_ugni_happy="no"])]) + AS_IF([test "$with_ugni" = "no"], + [opal_check_ugni_happy="no"], + [PKG_CHECK_MODULES([CRAY_UGNI], [cray-ugni], + [opal_check_ugni_happy="yes"], + [opal_check_ugni_happy="no"])]) - opal_check_ugni_$1_save_CPPFLAGS="$CPPFLAGS" - opal_check_ugni_$1_save_LDFLAGS="$LDFLAGS" - opal_check_ugni_$1_save_LIBS="$LIBS" + opal_check_ugni_$1_save_CPPFLAGS="$CPPFLAGS" + opal_check_ugni_$1_save_LIBS="$LIBS" - CPPFLAGS="$CPPFLAGS $$1_CPPFLAGS" - LDFLAGS="$LDFLAGS $$1_LDFLAGS" + if test "$opal_check_ugni_happy" = "yes" ; then + CPPFLAGS="$CPPFLAGS $CRAY_UGNI_CFLAGS" + LIBS="$LIBS $CRAY_UGNI_LIBS" # echo "+++++++++++++++++++++++CPPFLAGS",$CPPFLAGS # echo "+++++++++++++++++++++++LDFLAGSS",$LDFLAGS # echo "+++++++++++++++++++++++1_CPPFLAGS",$$1_CPPFLAGS @@ -58,23 +57,24 @@ AC_DEFUN([OPAL_CHECK_UGNI], [ # sanity checks - AS_IF([test "$opal_check_ugni_happy" = "yes"], - [AC_CHECK_HEADER([gni_pub.h],[],AC_MSG_ERROR(['gni_pub.h not found.'])) - AC_CHECK_FUNCS([GNI_GetJobResInfo])]) + AC_CHECK_HEADER([gni_pub.h],[],AC_MSG_ERROR(['gni_pub.h not found.'])) + AC_CHECK_FUNCS([GNI_GetJobResInfo]) -# AS_IF([test "$opal_check_ugni_happy" = "yes"], -# [AC_CHECK_FUNCS([GNI_GetJobResInfo])]) + CPPFLAGS="$opal_check_ugni_$1_save_CPPFLAGS" + LIBS="$opal_check_ugni_$1_save_LIBS" + fi - CPPFLAGS="$opal_check_ugni_$1_save_CPPFLAGS" - LDFLAGS="$opal_check_ugni_$1_save_LDFLAGS" - LIBS="$opal_check_ugni_$1_save_LIBS" + AS_IF([test "$opal_check_ugni_happy" = "yes" && test "$enable_progress_threads" = "yes"], + [AC_MSG_WARN([GNI driver does not currently support progress threads. Disabling.]) + opal_check_ugni_happy="no"]) - AS_IF([test "$opal_check_ugni_happy" = "yes" && test "$enable_progress_threads" = "yes"], - [AC_MSG_WARN([GNI driver does not currently support progress threads. Disabling.]) - opal_check_ugni_happy="no"]) + OPAL_SUMMARY_ADD([[Transports]],[[Cray uGNI (Gemini/Aries)]],[$1],[$opal_check_ugni_happy]) + fi AS_IF([test "$opal_check_ugni_happy" = "yes"], - [$2], + [$1_CPPFLAGS="[$]$1_CPPFLAGS $CRAY_UGNI_CFLAGS" + $1_LIBS="[$]$1_LIBS $CRAY_UGNI_LIBS" + $2], [AS_IF([test ! -z "$with_ugni" && test "$with_ugni" != "no"], [AC_MSG_ERROR([GNI support requested but not found. Cannot continue.])]) $3]) diff --git a/config/opal_check_vendor.m4 b/config/opal_check_vendor.m4 index 95998e676a0..c227c2a347f 100644 --- a/config/opal_check_vendor.m4 +++ b/config/opal_check_vendor.m4 @@ -6,16 +6,16 @@ dnl Corporation. All rights reserved. dnl Copyright (c) 2004-2005 The University of Tennessee and The University dnl of Tennessee Research Foundation. All rights dnl reserved. -dnl Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +dnl Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, dnl University of Stuttgart. All rights reserved. dnl Copyright (c) 2004-2005 The Regents of the University of California. dnl All rights reserved. dnl Copyright (c) 2012 Oracle and/or its affiliates. All rights reserved. dnl Copyright (c) 2014 Intel, Inc. All rights reserved dnl $COPYRIGHT$ -dnl +dnl dnl Additional copyrights may follow -dnl +dnl dnl $HEADER$ dnl @@ -63,7 +63,7 @@ AC_DEFUN([OPAL_CXX_COMPILER_VENDOR], [ m4_ifndef([AC_LANG_DEFINES_PROVIDED], [m4_define([AC_LANG_DEFINES_PROVIDED])]) -# OPAL_IFDEF_IFELSE(symbol, [action-if-defined], +# OPAL_IFDEF_IFELSE(symbol, [action-if-defined], # [action-if-not-defined]) # ---------------------------------------------- # Run compiler to determine if preprocessor symbol "symbol" is @@ -76,7 +76,7 @@ choke me #endif], [$2], [$3])]) -# OPAL_IF_IFELSE(symbol, [action-if-defined], +# OPAL_IF_IFELSE(symbol, [action-if-defined], # [action-if-not-defined]) # ---------------------------------------------- # Run compiler to determine if preprocessor symbol "symbol" is @@ -106,24 +106,24 @@ AC_DEFUN([_OPAL_CHECK_COMPILER_VENDOR], [ # Intel AS_IF([test "$opal_check_compiler_vendor_result" = "unknown"], - [OPAL_IF_IFELSE([defined(__INTEL_COMPILER) || defined(__ICC)], + [OPAL_IF_IFELSE([defined(__INTEL_COMPILER) || defined(__ICC)], [opal_check_compiler_vendor_result="intel"])]) # Fujitsu AS_IF([test "$opal_check_compiler_vendor_result" = "unknown"], - [OPAL_IF_IFELSE([defined(__FUJITSU)], + [OPAL_IF_IFELSE([defined(__FUJITSU)], [opal_check_compiler_vendor_result="fujitsu"])]) # GNU AS_IF([test "$opal_check_compiler_vendor_result" = "unknown"], - [OPAL_IFDEF_IFELSE([__GNUC__], + [OPAL_IFDEF_IFELSE([__GNUC__], [opal_check_compiler_vendor_result="gnu" - # We do not support gccfss as a compiler so die if - # someone tries to use said compiler. gccfss (gcc - # for SPARC Systems) is a compiler that is no longer + # We do not support gccfss as a compiler so die if + # someone tries to use said compiler. gccfss (gcc + # for SPARC Systems) is a compiler that is no longer # supported by Oracle and it has some major flaws - # that prevents it from actually compiling OMPI code. + # that prevents it from actually compiling OMPI code. # So if we detect it we automatically bail. if ($CC --version | grep gccfss) >/dev/null 2>&1; then @@ -139,17 +139,17 @@ AC_DEFUN([_OPAL_CHECK_COMPILER_VENDOR], [ # Borland Turbo C AS_IF([test "$opal_check_compiler_vendor_result" = "unknown"], - [OPAL_IFDEF_IFELSE([__TURBOC__], + [OPAL_IFDEF_IFELSE([__TURBOC__], [opal_check_compiler_vendor_result="borland"])]) # Borland C++ AS_IF([test "$opal_check_compiler_vendor_result" = "unknown"], - [OPAL_IFDEF_IFELSE([__BORLANDC__], + [OPAL_IFDEF_IFELSE([__BORLANDC__], [opal_check_compiler_vendor_result="borland"])]) # Comeau C++ AS_IF([test "$opal_check_compiler_vendor_result" = "unknown"], - [OPAL_IFDEF_IFELSE([__COMO__], + [OPAL_IFDEF_IFELSE([__COMO__], [opal_check_compiler_vendor_result="comeau"])]) # Compaq C/C++ @@ -163,12 +163,12 @@ AC_DEFUN([_OPAL_CHECK_COMPILER_VENDOR], [ # Cray C/C++ AS_IF([test "$opal_check_compiler_vendor_result" = "unknown"], - [OPAL_IFDEF_IFELSE([_CRAYC], + [OPAL_IFDEF_IFELSE([_CRAYC], [opal_check_compiler_vendor_result="cray"])]) # Diab C/C++ AS_IF([test "$opal_check_compiler_vendor_result" = "unknown"], - [OPAL_IFDEF_IFELSE([__DCC__], + [OPAL_IFDEF_IFELSE([__DCC__], [opal_check_compiler_vendor_result="diab"])]) # Digital Mars @@ -210,20 +210,20 @@ AC_DEFUN([_OPAL_CHECK_COMPILER_VENDOR], [ # MIPSpro (SGI) AS_IF([test "$opal_check_compiler_vendor_result" = "unknown"], - [OPAL_IF_IFELSE([defined(sgi) || defined(__sgi)], + [OPAL_IF_IFELSE([defined(sgi) || defined(__sgi)], [opal_check_compiler_vendor_result="sgi"])]) # MPW C++ AS_IF([test "$opal_check_compiler_vendor_result" = "unknown"], - [OPAL_IF_IFELSE([defined(__MRC__) || defined(MPW_C) || defined(MPW_CPLUS)], + [OPAL_IF_IFELSE([defined(__MRC__) || defined(MPW_C) || defined(MPW_CPLUS)], [opal_check_compiler_vendor_result="mpw"])]) # Microsoft AS_IF([test "$opal_check_compiler_vendor_result" = "unknown"], - [# Always use C compiler when checking for Microsoft, as + [# Always use C compiler when checking for Microsoft, as # Visual C++ doesn't recognize .cc as a C++ file. AC_LANG_PUSH(C) - OPAL_IF_IFELSE([defined(_MSC_VER) || defined(__MSC_VER)], + OPAL_IF_IFELSE([defined(_MSC_VER) || defined(__MSC_VER)], [opal_check_compiler_vendor_result="microsoft"]) AC_LANG_POP(C)]) @@ -239,7 +239,7 @@ AC_DEFUN([_OPAL_CHECK_COMPILER_VENDOR], [ # Portland Group AS_IF([test "$opal_check_compiler_vendor_result" = "unknown"], - [OPAL_IFDEF_IFELSE([__PGI], + [OPAL_IFDEF_IFELSE([__PGI], [opal_check_compiler_vendor_result="portland group"])]) # SAS/C diff --git a/config/opal_check_verbs.m4 b/config/opal_check_verbs.m4 index 9456ff29905..f5eea2c7718 100644 --- a/config/opal_check_verbs.m4 +++ b/config/opal_check_verbs.m4 @@ -69,9 +69,9 @@ AC_DEFUN([OPAL_CHECK_VERBS_DIR],[ [Search for verbs libraries in DIR])]) # Sanity check the --with values - OPAL_CHECK_WITHDIR([verbs], [$with_verbs], + OPAL_CHECK_WITHDIR([verbs], [$with_verbs], [include/infiniband/verbs.h]) - OPAL_CHECK_WITHDIR([verbs-libdir], [$with_verbs_libdir], + OPAL_CHECK_WITHDIR([verbs-libdir], [$with_verbs_libdir], [libibverbs.*]) # Set standardized shell variables for OFED lovin' components to diff --git a/config/opal_check_visibility.m4 b/config/opal_check_visibility.m4 index b8ec53c0618..bb9097633a9 100644 --- a/config/opal_check_visibility.m4 +++ b/config/opal_check_visibility.m4 @@ -6,16 +6,16 @@ # Copyright (c) 2004-2005 The University of Tennessee and The University # of Tennessee Research Foundation. All rights # reserved. -# Copyright (c) 2004-2007 High Performance Computing Center Stuttgart, +# Copyright (c) 2004-2007 High Performance Computing Center Stuttgart, # University of Stuttgart. All rights reserved. # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. # Copyright (c) 2006-2012 Cisco Systems, Inc. All rights reserved. # Copyright (c) 2009-2011 Oracle and/or its affiliates. All rights reserved. # $COPYRIGHT$ -# +# # Additional copyrights may follow -# +# # $HEADER$ # @@ -26,7 +26,7 @@ AC_DEFUN([OPAL_CHECK_VISIBILITY],[ # Check if the compiler has support for visibility, like some # versions of gcc, icc Sun Studio cc. - AC_ARG_ENABLE(visibility, + AC_ARG_ENABLE(visibility, AC_HELP_STRING([--enable-visibility], [enable visibility feature of certain compilers/linkers (default: enabled)])) @@ -35,7 +35,7 @@ AC_DEFUN([OPAL_CHECK_VISIBILITY],[ if test "$enable_visibility" = "no"; then AC_MSG_CHECKING([$opal_msg]) - AC_MSG_RESULT([no (disabled)]) + AC_MSG_RESULT([no (disabled)]) else CFLAGS_orig=$CFLAGS diff --git a/config/opal_check_withdir.m4 b/config/opal_check_withdir.m4 index 3116c9f9f6e..7c0ffa84ffd 100644 --- a/config/opal_check_withdir.m4 +++ b/config/opal_check_withdir.m4 @@ -4,12 +4,14 @@ dnl Copyright (c) 2004-2006 The Trustees of Indiana University and Indiana dnl University Research and Technology dnl Corporation. All rights reserved. dnl Copyright (c) 2006 Los Alamos National Security, LLC. All rights -dnl reserved. +dnl reserved. dnl Copyright (c) 2008-2009 Cisco Systems, Inc. All rights reserved. +dnl Copyright (c) 2015 Research Organization for Information Science +dnl and Technology (RIST). All rights reserved. dnl $COPYRIGHT$ -dnl +dnl dnl Additional copyrights may follow -dnl +dnl dnl $HEADER$ dnl @@ -17,7 +19,7 @@ dnl # ---------------------------------------------------- AC_DEFUN([OPAL_CHECK_WITHDIR],[ AC_MSG_CHECKING([--with-$1 value]) - AS_IF([test "$2" = "yes" -o "$2" = "no" -o "x$2" = "x"], + AS_IF([test "$2" = "yes" || test "$2" = "no" || test "x$2" = "x"], [AC_MSG_RESULT([simple ok (unspecified)])], [AS_IF([test ! -d "$2"], [AC_MSG_RESULT([not found]) diff --git a/config/opal_check_xpmem.m4 b/config/opal_check_xpmem.m4 new file mode 100644 index 00000000000..ee1fd42027a --- /dev/null +++ b/config/opal_check_xpmem.m4 @@ -0,0 +1,112 @@ +# -*- shell-script ; indent-tabs-mode:nil -*- +# +# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana +# University Research and Technology +# Corporation. All rights reserved. +# Copyright (c) 2004-2005 The University of Tennessee and The University +# of Tennessee Research Foundation. All rights +# reserved. +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# University of Stuttgart. All rights reserved. +# Copyright (c) 2004-2005 The Regents of the University of California. +# All rights reserved. +# Copyright (c) 2009-2016 Cisco Systems, Inc. All rights reserved. +# Copyright (c) 2011-2016 Los Alamos National Security, LLC. All rights +# reserved. +# Copyright (c) 2014 Intel, Inc. All rights reserved. +# Copyright (c) 2014-2015 Research Organization for Information Science +# and Technology (RIST). All rights reserved. +# $COPYRIGHT$ +# +# Additional copyrights may follow +# +# $HEADER$ +# + +# +# special check for cray xpmem, uses macro(s) from pkg.m4 +# +# OPAL_CHECK_CRAY_XPMEM(prefix, [action-if-found], [action-if-not-found]) +# -------------------------------------------------------- +AC_DEFUN([OPAL_CHECK_CRAY_XPMEM],[ + if test -z "$opal_check_cray_xpmem_happy" ; then + AC_ARG_WITH([cray_xpmem], + [AC_HELP_STRING([--with-cray-xpmem(=yes/no)], + [Build Cray XPMEM support(default: auto)])], + [], with_cray_xpmem=auto) + + AC_MSG_CHECKING([for Cray XPMEM support]) + AS_IF([test "$with_cray_xpmem" = "no"], + [AC_MSG_RESULT([no]) + $3], + [AS_IF([test "$with_cray_xpmem" = "auto" || test "$with_cray_xpmem" = "yes"], + [PKG_CHECK_MODULES_STATIC([CRAY_XPMEM], [cray-xpmem], + [opal_check_cray_xpmem_happy="yes"], + [opal_check_cray_xpmem_happy="no"] + [AS_IF([test "$with_cray_xpmem" = "yes"], + [AC_MSG_WARN([Cray XPMEM support requested but pkg-config failed.]) + AC_MSG_ERROR([Aborting])],[])] + )], + []) + ]) + + AS_IF([test "$opal_check_cray_xpmem_happy" = "yes" && test "$enable_static" = "yes"], + [CRAY_XPMEM_LIBS = $CRAY_XPMEM_STATIC_LIBS],[]) + fi + + AS_IF([test "$opal_check_cray_xpmem_happy" = "yes"], + [$1_LDFLAGS="[$]$1_LDFLAGS $CRAY_XPMEM_LIBS" + $1_CPPFLAGS="[$]$1_CPPFLAGS $CRAY_XPMEM_CFLAGS" + $1_LIBS="[$]$1_LIBS $CRAY_XPMEM_LIBS" + AC_DEFINE_UNQUOTED([HAVE_XPMEM_H], [1],[is xpmem.h available]) + $2], [$3]) +]) + +# OPAL_CHECK_XPMEM(prefix, [action-if-found], [action-if-not-found]) +# -------------------------------------------------------- +# check if XPMEM support can be found. sets prefix_{CPPFLAGS, +# LDFLAGS, LIBS} as needed and runs action-if-found if there is +# support, otherwise executes action-if-not-found +AC_DEFUN([OPAL_CHECK_XPMEM], [ + if test -z "$opal_check_xpmem_happy" ; then + # check for a cray installed xpmem first + OPAL_CHECK_CRAY_XPMEM([opal_check_xpmem],[opal_check_xpmem_happy=yes],[opal_check_xpmem_happy=no]) + + if test "$opal_check_xpmem_happy" = no ; then + AC_ARG_WITH([xpmem], + [AC_HELP_STRING([--with-xpmem(=DIR)], + [Build with XPMEM kernel module support, searching for headers in DIR])]) + OPAL_CHECK_WITHDIR([xpmem], [$with_xpmem], [include/xpmem.h]) + + AC_ARG_WITH([xpmem-libdir], + [AC_HELP_STRING([--with-xpmem-libdir=DIR], + [Search for XPMEM library in DIR])]) + OPAL_CHECK_WITHDIR([xpmem-libdir], [$with_xpmem_libdir], [libxpmem.*]) + + if test ! "$with_xpmem" = "no" ; then + if test ! -z "$with_xpmem" && test "$with_xpmem" != "yes" ; then + opal_check_xpmem_dir="$with_xpmem" + fi + + if test ! -z "$with_xpmem_libdir" && test "$with_xpmem_libdir" != "yes" ; then + opal_check_xpmem_libdir="$with_xpmem_libdir" + fi + + OPAL_CHECK_PACKAGE([opal_check_xpmem],[xpmem.h],[xpmem],[xpmem_make],[], + [$opal_check_xpmem_dir],[$opal_check_xpmem_libdir], [opal_check_xpmem_happy="yes"], []) + + if test "$opal_check_xpmem_happy" = "no" && test -n "$with_xpmem" && test "$with_xpmem" != "yes" ; then + AC_MSG_ERROR([XPMEM support requested but not found. Aborting]) + fi + fi + fi + + OPAL_SUMMARY_ADD([[Transports]],[[Shared memory/XPMEM]],[$1],[$opal_check_cray_xpmem_happy]) + fi + + AS_IF([test "$opal_check_xpmem_happy" = "yes"], [ + $1_CPPFLAGS="[$]$1_CPPFLAGS $opal_check_xpmem_CPPFLAGS" + $1_LDFLAGS="[$]$1_LDFLAGS $opal_check_xpmem_LDFLAGS" + $1_LIBS="[$]$1_LIBS $opal_check_xpmem_LIBS" + $2], [$3]) +])dnl diff --git a/config/opal_config_asm.m4 b/config/opal_config_asm.m4 index 29d9c2d8da0..3cc4033aeea 100644 --- a/config/opal_config_asm.m4 +++ b/config/opal_config_asm.m4 @@ -5,18 +5,22 @@ dnl Corporation. All rights reserved. dnl Copyright (c) 2004-2005 The University of Tennessee and The University dnl of Tennessee Research Foundation. All rights dnl reserved. -dnl Copyright (c) 2004-2006 High Performance Computing Center Stuttgart, +dnl Copyright (c) 2004-2006 High Performance Computing Center Stuttgart, dnl University of Stuttgart. All rights reserved. dnl Copyright (c) 2004-2005 The Regents of the University of California. dnl All rights reserved. dnl Copyright (c) 2008-2015 Cisco Systems, Inc. All rights reserved. dnl Copyright (c) 2010 Oracle and/or its affiliates. All rights reserved. -dnl Copyright (c) 2015 Research Organization for Information Science +dnl Copyright (c) 2015-2017 Research Organization for Information Science dnl and Technology (RIST). All rights reserved. +dnl Copyright (c) 2014-2016 Los Alamos National Security, LLC. All rights +dnl reserved. +dnl Copyright (c) 2017 Amazon.com, Inc. or its affiliates. All Rights +dnl reserved. dnl $COPYRIGHT$ -dnl +dnl dnl Additional copyrights may follow -dnl +dnl dnl $HEADER$ dnl @@ -86,11 +90,119 @@ AC_DEFUN([OPAL_CHECK_SYNC_BUILTIN_CSWAP_INT128], [ AC_DEFUN([OPAL_CHECK_SYNC_BUILTINS], [ AC_MSG_CHECKING([for __sync builtin atomics]) - AC_TRY_COMPILE([], [__sync_synchronize()], + AC_TRY_LINK([long tmp;], [__sync_synchronize(); +__sync_bool_compare_and_swap(&tmp, 0, 1); +__sync_add_and_fetch(&tmp, 1);], + [AC_MSG_RESULT([yes]) + $1], + [AC_MSG_RESULT([no]) + $2]) + + AC_MSG_CHECKING([for 64-bit __sync builtin atomics]) + + AC_TRY_LINK([ +#include +uint64_t tmp;], [ +__sync_bool_compare_and_swap(&tmp, 0, 1); +__sync_add_and_fetch(&tmp, 1);], + [AC_MSG_RESULT([yes]) + opal_asm_sync_have_64bit=1], + [AC_MSG_RESULT([no]) + opal_asm_sync_have_64bit=0]) + + AC_DEFINE_UNQUOTED([OPAL_ASM_SYNC_HAVE_64BIT],[$opal_asm_sync_have_64bit], + [Whether 64-bit is supported by the __sync builtin atomics]) + + # Check for 128-bit support + OPAL_CHECK_SYNC_BUILTIN_CSWAP_INT128 +]) + + +AC_DEFUN([OPAL_CHECK_GCC_BUILTIN_CSWAP_INT128], [ + + OPAL_VAR_SCOPE_PUSH([atomic_compare_exchange_n_128_result CFLAGS_save]) + + AC_ARG_ENABLE([cross-cmpset128],[AC_HELP_STRING([--enable-cross-cmpset128], + [enable the use of the __sync builtin atomic compare-and-swap 128 when cross compiling])]) + + atomic_compare_exchange_n_128_result=0 + + if test ! "$enable_cross_cmpset128" = "yes" ; then + AC_MSG_CHECKING([for processor support of __atomic builtin atomic compare-and-swap on 128-bit values]) + + AC_RUN_IFELSE([AC_LANG_PROGRAM([], [__int128 x = 0, y = 0; __atomic_compare_exchange_n (&x, &y, 1, 0, __ATOMIC_RELAXED, __ATOMIC_RELAXED);])], + [AC_MSG_RESULT([yes]) + atomic_compare_exchange_n_128_result=1], + [AC_MSG_RESULT([no])], + [AC_MSG_RESULT([no (cross compiling)])]) + + if test $atomic_compare_exchange_n_128_result = 0 ; then + CFLAGS_save=$CFLAGS + CFLAGS="$CFLAGS -mcx16" + + AC_MSG_CHECKING([for __atomic builtin atomic compare-and-swap on 128-bit values with -mcx16 flag]) + AC_RUN_IFELSE([AC_LANG_PROGRAM([], [__int128 x = 0, y = 0; __atomic_compare_exchange_n (&x, &y, 1, 0, __ATOMIC_RELAXED, __ATOMIC_RELAXED);])], + [AC_MSG_RESULT([yes]) + atomic_compare_exchange_n_128_result=1 + CFLAGS_save="$CFLAGS"], + [AC_MSG_RESULT([no])], + [AC_MSG_RESULT([no (cross compiling)])]) + + CFLAGS=$CFLAGS_save + fi + + if test $atomic_compare_exchange_n_128_result = 1 ; then + AC_MSG_CHECKING([if __int128 atomic compare-and-swap is always lock-free]) + AC_RUN_IFELSE([AC_LANG_PROGRAM([], [if (!__atomic_always_lock_free(16, 0)) { return 1; }])], + [AC_MSG_RESULT([yes])], + [AC_MSG_RESULT([no]) + OPAL_CHECK_SYNC_BUILTIN_CSWAP_INT128 + atomic_compare_exchange_n_128_result=0], + [AC_MSG_RESULT([no (cross compiling)])]) + fi + else + AC_MSG_CHECKING([for compiler support of __atomic builtin atomic compare-and-swap on 128-bit values]) + + # Check if the compiler supports the __atomic builtin + AC_TRY_LINK([], [__int128 x = 0, y = 0; __atomic_compare_exchange_n (&x, &y, 1, 0, __ATOMIC_RELAXED, __ATOMIC_RELAXED);], + [AC_MSG_RESULT([yes]) + atomic_compare_exchange_n_128_result=1], + [AC_MSG_RESULT([no])]) + + if test $atomic_compare_exchange_n_128_result = 0 ; then + CFLAGS_save=$CFLAGS + CFLAGS="$CFLAGS -mcx16" + + AC_MSG_CHECKING([for __atomic builtin atomic compare-and-swap on 128-bit values with -mcx16 flag]) + AC_TRY_LINK([], [__int128 x = 0, y = 0; __atomic_compare_exchange_n (&x, &y, 1, 0, __ATOMIC_RELAXED, __ATOMIC_RELAXED);], + [AC_MSG_RESULT([yes]) + atomic_compare_exchange_n_128_result=1 + CFLAGS_save="$CFLAGS"], + [AC_MSG_RESULT([no])]) + + CFLAGS=$CFLAGS_save + fi + fi + + AC_DEFINE_UNQUOTED([OPAL_HAVE_GCC_BUILTIN_CSWAP_INT128], [$atomic_compare_exchange_n_128_result], + [Whether the __atomic builtin atomic compare and swap is lock-free on 128-bit values]) + + OPAL_VAR_SCOPE_POP +]) + +AC_DEFUN([OPAL_CHECK_GCC_ATOMIC_BUILTINS], [ + AC_MSG_CHECKING([for __atomic builtin atomics]) + + AC_TRY_LINK([long tmp, old = 0;], [__atomic_thread_fence(__ATOMIC_SEQ_CST); +__atomic_compare_exchange_n(&tmp, &old, 1, 0, __ATOMIC_RELAXED, __ATOMIC_RELAXED); +__atomic_add_fetch(&tmp, 1, __ATOMIC_RELAXED);], [AC_MSG_RESULT([yes]) $1], [AC_MSG_RESULT([no]) $2]) + + # Check for 128-bit support + OPAL_CHECK_GCC_BUILTIN_CSWAP_INT128 ]) @@ -181,7 +293,7 @@ AC_DEFUN([_OPAL_CHECK_ASM_LSYM],[ echo "configure: trying $sym" >&AC_FD_CC OPAL_TRY_ASSEMBLE([foobar$opal_cv_asm_label_suffix ${sym}mytestlabel$opal_cv_asm_label_suffix], - [# ok, we succeeded at assembling. see if we can nm, + [# ok, we succeeded at assembling. see if we can nm, # throwing the results in a file if $NM conftest.$OBJEXT > conftest.out 2>&AC_FD_CC ; then if test "`$GREP mytestlabel conftest.out`" = "" ; then @@ -326,7 +438,7 @@ $opal_cv_asm_endproc ${sym}gsym_test_func echo "configure: failed C program was: " >&AC_FD_CC cat conftest.c >&AC_FD_CC asm_result=0 - fi], + fi], [asm_result=0]) if test "$asm_result" = "1" ; then opal_cv_asm_gsym="$sym" @@ -367,7 +479,7 @@ dnl ################################################################# dnl dnl OPAL_CHECK_ASM_ALIGN_LOG dnl -dnl Sets OPAL_ASM_ALIGN_LOG to 1 if align is specified +dnl Sets OPAL_ASM_ALIGN_LOG to 1 if align is specified dnl logarithmically, 0 otherwise dnl dnl ################################################################# @@ -383,7 +495,7 @@ AC_DEFUN([OPAL_CHECK_ASM_ALIGN_LOG],[ .byte 1 .align 4 foo$opal_cv_asm_label_suffix - .byte 2], + .byte 2], [opal_asm_addr=[`$NM conftest.$OBJEXT | $GREP foo | sed -e 's/.*\([0-9a-fA-F][0-9a-fA-F]\).*foo.*/\1/'`]], [opal_asm_addr=""]) # test for both 16 and 10 (decimal and hex notations) @@ -412,7 +524,7 @@ dnl ################################################################# dnl dnl OPAL_CHECK_ASM_TYPE dnl -dnl Sets OPAL_ASM_TYPE to the prefix for the function type to +dnl Sets OPAL_ASM_TYPE to the prefix for the function type to dnl set a symbol's type as function (needed on ELF for shared dnl libaries). If no .type directive is needed, sets OPAL_ASM_TYPE dnl to an empty string @@ -715,8 +827,8 @@ dnl assembly. Some compilers emit a warning and ignore the inline dnl assembly (xlc on OS X) and compile without error. Therefore, dnl the test attempts to run the emited code to check that the dnl assembly is actually run. To run this test, one argument to -dnl the macro must be an assembly instruction in gcc format to move -dnl the value 0 into the register containing the variable ret. +dnl the macro must be an assembly instruction in gcc format to move +dnl the value 0 into the register containing the variable ret. dnl For PowerPC, this would be: dnl dnl "li %0,0" : "=&r"(ret) @@ -738,34 +850,28 @@ AC_DEFUN([OPAL_CHECK_INLINE_C_GCC],[ AC_MSG_CHECKING([if $CC supports GCC inline assembly]) - if test "$opal_cv_c_compiler_vendor" = "portland group" ; then - # PGI seems to have some issues with our inline assembly. - # Disable for now. - asm_result="no (Portland Group)" - else - if test ! "$assembly" = "" ; then - AC_RUN_IFELSE([AC_LANG_PROGRAM([ -AC_INCLUDES_DEFAULT], -[[int ret = 1; + if test ! "$assembly" = "" ; then + AC_RUN_IFELSE([AC_LANG_PROGRAM([AC_INCLUDES_DEFAULT],[[ +int ret = 1; int negone = -1; __asm__ __volatile__ ($assembly); -return ret;]])], - [asm_result="yes"], [asm_result="no"], - [asm_result="unknown"]) - else - assembly="test skipped - assuming no" - fi +return ret; + ]])], + [asm_result="yes"], [asm_result="no"], + [asm_result="unknown"]) + else + assembly="test skipped - assuming no" + fi - # if we're cross compiling, just try to compile and figure good enough - if test "$asm_result" = "unknown" ; then - AC_LINK_IFELSE([AC_LANG_PROGRAM([ -AC_INCLUDES_DEFAULT], -[[int ret = 1; + # if we're cross compiling, just try to compile and figure good enough + if test "$asm_result" = "unknown" ; then + AC_LINK_IFELSE([AC_LANG_PROGRAM([AC_INCLUDES_DEFAULT],[[ +int ret = 1; int negone = -1; __asm__ __volatile__ ($assembly); -return ret;]])], - [asm_result="yes"], [asm_result="no"]) - fi +return ret; + ]])], + [asm_result="yes"], [asm_result="no"]) fi AC_MSG_RESULT([$asm_result]) @@ -870,17 +976,16 @@ AC_DEFUN([OPAL_CONFIG_ASM],[ [AC_HELP_STRING([--enable-osx-builtin-atomics], [Enable use of OSX builtin atomics (default: disabled)])]) - if test "$enable_builtin_atomics" = "yes" ; then - OPAL_CHECK_SYNC_BUILTINS([opal_cv_asm_builtin="BUILTIN_SYNC"], - [AC_MSG_ERROR([__sync builtin atomics requested but not found.])]) - AC_DEFINE([OPAL_C_GCC_INLINE_ASSEMBLY], [1], - [Whether C compiler supports GCC style inline assembly]) - OPAL_CHECK_SYNC_BUILTIN_CSWAP_INT128 - elif test "$enable_osx_builtin_atomics" = "yes" ; then - AC_CHECK_HEADER([libkern/OSAtomic.h],[opal_cv_asm_builtin="BUILTIN_OSX"], - [AC_MSG_ERROR([OSX builtin atomics requested but not found.])]) - else - opal_cv_asm_builtin="BUILTIN_NO" + opal_cv_asm_builtin="BUILTIN_NO" + if test "$opal_cv_asm_builtin" = "BUILTIN_NO" && test "$enable_builtin_atomics" = "yes" ; then + OPAL_CHECK_GCC_ATOMIC_BUILTINS([opal_cv_asm_builtin="BUILTIN_GCC"], []) + fi + if test "$opal_cv_asm_builtin" = "BUILTIN_NO" && test "$enable_builtin_atomics" = "yes" ; then + OPAL_CHECK_SYNC_BUILTINS([opal_cv_asm_builtin="BUILTIN_SYNC"], []) + fi + if test "$opal_cv_asm_builtin" = "BUILTIN_NO" && test "$enable_osx_builtin_atomics" = "yes" ; then + AC_CHECK_HEADER([libkern/OSAtomic.h], + [opal_cv_asm_builtin="BUILTIN_OSX"]) fi OPAL_CHECK_ASM_PROC @@ -899,11 +1004,11 @@ AC_DEFUN([OPAL_CONFIG_ASM],[ OPAL_GCC_INLINE_ASSIGN="" OPAL_ASM_SUPPORT_64BIT=0 case "${host}" in - i?86-*|x86_64*) + i?86-*|x86_64*|amd64*) if test "$ac_cv_sizeof_long" = "4" ; then opal_cv_asm_arch="IA32" else - opal_cv_asm_arch="AMD64" + opal_cv_asm_arch="X86_64" fi OPAL_ASM_SUPPORT_64BIT=1 OPAL_GCC_INLINE_ASSIGN='"xaddl %1,%0" : "=m"(ret), "+r"(negone) : "m"(ret)' @@ -915,11 +1020,13 @@ AC_DEFUN([OPAL_CONFIG_ASM],[ OPAL_ASM_SUPPORT_64BIT=1 OPAL_GCC_INLINE_ASSIGN='"mov %0=r0\n;;\n" : "=&r"(ret)' ;; - - alpha-*|alphaev[[4-8]]-*|alphaev56-*|alphaev6[[78]]-*) - opal_cv_asm_arch="ALPHA" + aarch64*) + opal_cv_asm_arch="ARM64" OPAL_ASM_SUPPORT_64BIT=1 - OPAL_GCC_INLINE_ASSIGN='"bis [$]31,[$]31,%0" : "=&r"(ret)' + OPAL_ASM_ARM_VERSION=8 + AC_DEFINE_UNQUOTED([OPAL_ASM_ARM_VERSION], [$OPAL_ASM_ARM_VERSION], + [What ARM assembly version to use]) + OPAL_GCC_INLINE_ASSIGN='"mov %0, #0" : "=&r"(ret)' ;; armv7*) @@ -982,7 +1089,7 @@ AC_DEFUN([OPAL_CONFIG_ASM],[ sparc*-*) # SPARC v9 (and above) are the only ones with 64bit support # if compiling 32 bit, see if we are v9 (aka v8plus) or - # earlier (casa is v8+/v9). + # earlier (casa is v8+/v9). if test "$ac_cv_sizeof_long" = "4" ; then have_v8plus=0 OPAL_CHECK_SPARCV8PLUS([have_v8plus=1]) @@ -1013,7 +1120,13 @@ AC_MSG_ERROR([Can not continue.]) ;; esac - if test "$opal_cv_asm_builtin" = "BUILTIN_SYNC" ; then + if test "x$OPAL_ASM_SUPPORT_64BIT" = "x1" && test "$opal_cv_asm_builtin" = "BUILTIN_SYNC" && + test "$opal_asm_sync_have_64bit" = "0" ; then + # __sync builtins exist but do not implement 64-bit support. Fall back on inline asm. + opal_cv_asm_builtin="BUILTIN_NO" + fi + + if test "$opal_cv_asm_builtin" = "BUILTIN_SYNC" || test "$opal_cv_asm_builtin" = "BUILTIN_GCC" ; then AC_DEFINE([OPAL_C_GCC_INLINE_ASSEMBLY], [1], [Whether C compiler supports GCC style inline assembly]) else @@ -1076,7 +1189,7 @@ AC_MSG_ERROR([Can not continue.]) # Check for RDTSCP support result=0 - AS_IF([test "$opal_cv_asm_arch" = "OPAL_AMD64" || test "$opal_cv_asm_arch" = "OPAL_IA32"], + AS_IF([test "$opal_cv_asm_arch" = "OPAL_X86_64" || test "$opal_cv_asm_arch" = "OPAL_IA32"], [AC_MSG_CHECKING([for RDTSCP assembly support]) AC_LANG_PUSH([C]) AC_TRY_RUN([[ @@ -1122,9 +1235,7 @@ AC_DEFUN([OPAL_ASM_FIND_FILE], [ AC_REQUIRE([AC_PROG_GREP]) AC_REQUIRE([AC_PROG_FGREP]) -if test "$opal_cv_asm_arch" != "WINDOWS" && test "$opal_cv_asm_builtin" != "BUILTIN_SYNC" && test "$opal_cv_asm_builtin" != "BUILTIN_OSX" ; then - AC_CHECK_PROG([PERL], [perl], [perl]) - +if test "$opal_cv_asm_arch" != "WINDOWS" && test "$opal_cv_asm_builtin" != "BUILTIN_SYNC" && test "$opal_cv_asm_builtin" != "BUILTIN_GCC" && test "$opal_cv_asm_builtin" != "BUILTIN_OSX" ; then # see if we have a pre-built one already AC_MSG_CHECKING([for pre-built assembly file]) opal_cv_asm_file="" @@ -1145,27 +1256,21 @@ if test "$opal_cv_asm_arch" != "WINDOWS" && test "$opal_cv_asm_builtin" != "BUIL rm -rf conftest.* if test "$opal_cv_asm_file" = "" ; then - if test ! "$PERL" = "" ; then - # we have perl... Can we generate a file? - AC_MSG_CHECKING([whether possible to generate assembly file]) - mkdir -p opal/asm/generated - opal_cv_asm_file="atomic-local.s" - opal_try='$PERL $OPAL_TOP_SRCDIR/opal/asm/generate-asm.pl $opal_cv_asm_arch "$opal_cv_asm_format" $OPAL_TOP_SRCDIR/opal/asm/base $OPAL_TOP_BUILDDIR/opal/asm/generated/$opal_cv_asm_file >conftest.out 2>&1' - if AC_TRY_EVAL(opal_try) ; then - # save the warnings - cat conftest.out >&AC_FD_CC - AC_MSG_RESULT([yes]) - else - # save output - cat conftest.out >&AC_FD_CC - opal_cv_asm_file="" - AC_MSG_RESULT([failed]) - AC_MSG_WARN([Could not build atomic operations assembly file.]) - AC_MSG_WARN([There will be no atomic operations for this build.]) - fi + # Can we generate a file? + AC_MSG_CHECKING([whether possible to generate assembly file]) + mkdir -p opal/asm/generated + opal_cv_asm_file="atomic-local.s" + opal_try='$PERL $OPAL_TOP_SRCDIR/opal/asm/generate-asm.pl $opal_cv_asm_arch "$opal_cv_asm_format" $OPAL_TOP_SRCDIR/opal/asm/base $OPAL_TOP_BUILDDIR/opal/asm/generated/$opal_cv_asm_file >conftest.out 2>&1' + if AC_TRY_EVAL(opal_try) ; then + # save the warnings + cat conftest.out >&AC_FD_CC + AC_MSG_RESULT([yes]) else - AC_MSG_WARN([Could not find prebuilt atomic operations file and could not]) - AC_MSG_WARN([find perl to attempt to generate a custom assembly file.]) + # save output + cat conftest.out >&AC_FD_CC + opal_cv_asm_file="" + AC_MSG_RESULT([failed]) + AC_MSG_WARN([Could not build atomic operations assembly file.]) AC_MSG_WARN([There will be no atomic operations for this build.]) fi fi diff --git a/config/opal_config_files.m4 b/config/opal_config_files.m4 index 46509ebacd4..95e71637107 100644 --- a/config/opal_config_files.m4 +++ b/config/opal_config_files.m4 @@ -2,9 +2,9 @@ # # Copyright (c) 2009-2010 Cisco Systems, Inc. All rights reserved. # $COPYRIGHT$ -# +# # Additional copyrights may follow -# +# # $HEADER$ # @@ -24,7 +24,5 @@ AC_DEFUN([OPAL_CONFIG_FILES],[ opal/tools/wrappers/opalcc-wrapper-data.txt opal/tools/wrappers/opalc++-wrapper-data.txt opal/tools/wrappers/opal.pc - opal/tools/opal-checkpoint/Makefile - opal/tools/opal-restart/Makefile ]) ]) diff --git a/config/opal_config_pthreads.m4 b/config/opal_config_pthreads.m4 index db12c6ba56f..b2d9c7aaece 100644 --- a/config/opal_config_pthreads.m4 +++ b/config/opal_config_pthreads.m4 @@ -11,7 +11,7 @@ dnl Copyright (c) 2004-2005 The Regents of the University of California. dnl All rights reserved. dnl Copyright (c) 2012 Cisco Systems, Inc. All rights reserved. dnl Copyright (c) 2014 Intel, Inc. All rights reserved. -dnl Copyright (c) 2014-2015 Research Organization for Information Science +dnl Copyright (c) 2014-2016 Research Organization for Information Science dnl and Technology (RIST). All rights reserved. dnl $COPYRIGHT$ dnl @@ -57,13 +57,13 @@ int main(int argc, char* argv[]) { pthread_attr_t attr; - me = pthread_self(); + me = pthread_self(); pthread_atfork(NULL, NULL, NULL); - pthread_attr_init(&attr); + pthread_attr_init(&attr); pthread_cleanup_push(cleanup_routine, 0); - pthread_create(&newthread, &attr, thread_main, 0); + pthread_create(&newthread, &attr, thread_main, 0); pthread_join(newthread, 0); - pthread_cleanup_pop(0); + pthread_cleanup_pop(0); return 0; }]])], @@ -114,13 +114,13 @@ void pthreadtest_f(void) { pthread_attr_t attr; - me = pthread_self(); + me = pthread_self(); pthread_atfork(NULL, NULL, NULL); - pthread_attr_init(&attr); + pthread_attr_init(&attr); pthread_cleanup_push(cleanup_routine, 0); - pthread_create(&newthread, &attr, thread_main, 0); - pthread_join(&newthread, 0); - pthread_cleanup_pop(0); + pthread_create(&newthread, &attr, thread_main, 0); + pthread_join(newthread, 0); + pthread_cleanup_pop(0); } void pthreadtest(void) @@ -202,7 +202,7 @@ if test "$opal_pthread_cxx_success" = "0"; then AC_MSG_CHECKING([if C++ compiler and POSIX threads work as is]) AC_LANG_PUSH(C++) - OPAL_INTL_PTHREAD_TRY_LINK(opal_pthread_cxx_success=1, + OPAL_INTL_PTHREAD_TRY_LINK(opal_pthread_cxx_success=1, opal_pthread_cxx_success=0) AC_LANG_POP(C++) if test "$opal_pthread_cxx_success" = "1"; then @@ -224,7 +224,7 @@ if test "$opal_pthread_fortran_success" = "0" && \ AC_MSG_CHECKING([if Fortran compiler and POSIX threads work as is]) AC_LANG_PUSH(C) - OPAL_INTL_PTHREAD_TRY_LINK_FORTRAN(opal_pthread_fortran_success=1, + OPAL_INTL_PTHREAD_TRY_LINK_FORTRAN(opal_pthread_fortran_success=1, opal_pthread_fortran_success=0) AC_LANG_POP(C) if test "$opal_pthread_fortran_success" = "1"; then @@ -251,11 +251,11 @@ AC_PROVIDE_IFELSE([AC_PROG_CC], [OPAL_INTL_POSIX_THREADS_PLAIN_C], [opal_pthread_c_success=1]) -AC_PROVIDE_IFELSE([AC_PROG_CXX], - [OPAL_INTL_POSIX_THREADS_PLAIN_CXX], +AC_PROVIDE_IFELSE([AC_PROG_CXX], + [OPAL_INTL_POSIX_THREADS_PLAIN_CXX], [opal_pthread_cxx_success=1]) -AC_PROVIDE_IFELSE([AC_PROG_FC], +AC_PROVIDE_IFELSE([AC_PROG_FC], [OPAL_INTL_POSIX_THREADS_PLAIN_FC], [opal_pthread_fortran_success=1]) @@ -331,7 +331,7 @@ if test "$opal_pthread_fortran_success" = "0" && \ AC_MSG_CHECKING([if Fortran compiler and POSIX threads work with $pf]) FCFLAGS="$orig_FCFLAGS $pf" AC_LANG_PUSH(C) - OPAL_INTL_PTHREAD_TRY_LINK_FORTRAN(opal_pthread_fortran_success=1, + OPAL_INTL_PTHREAD_TRY_LINK_FORTRAN(opal_pthread_fortran_success=1, opal_pthread_fortran_success=0) AC_LANG_POP(C) if test "$opal_pthread_fortran_success" = "1"; then @@ -351,7 +351,7 @@ fi AC_DEFUN([OPAL_INTL_POSIX_THREADS_SPECIAL_FLAGS],[ # Begin: OPAL_INTL_POSIX_THREADS_SPECIAL_FLAGS # -# If above didn't work, try some super-special compiler flags +# If above didn't work, try some super-special compiler flags # that get evaluated to the "right" things. # # -Kthread: @@ -375,15 +375,15 @@ case "${host_cpu}-${host_os}" in esac # Only run C++ and Fortran if those compilers already configured -AC_PROVIDE_IFELSE([AC_PROG_CC], +AC_PROVIDE_IFELSE([AC_PROG_CC], [OPAL_INTL_POSIX_THREADS_SPECIAL_FLAGS_C], [opal_pthread_c_success=1]) -AC_PROVIDE_IFELSE([AC_PROG_CXX], - [OPAL_INTL_POSIX_THREADS_SPECIAL_FLAGS_CXX], +AC_PROVIDE_IFELSE([AC_PROG_CXX], + [OPAL_INTL_POSIX_THREADS_SPECIAL_FLAGS_CXX], [opal_pthread_cxx_success=1]) -AC_PROVIDE_IFELSE([AC_PROG_FC], +AC_PROVIDE_IFELSE([AC_PROG_FC], [OPAL_INTL_POSIX_THREADS_SPECIAL_FLAGS_FC], [opal_pthread_fortran_success=1]) @@ -459,7 +459,7 @@ if test "$opal_pthread_cxx_success" = "0"; then esac LIBS="$orig_LIBS $PTHREAD_LIBS" AC_LANG_PUSH(C++) - OPAL_INTL_PTHREAD_TRY_LINK(opal_pthread_cxx_success=1, + OPAL_INTL_PTHREAD_TRY_LINK(opal_pthread_cxx_success=1, opal_pthread_cxx_success=0) AC_LANG_POP(C++) if test "$opal_pthread_cxx_success" = "1"; then @@ -470,7 +470,7 @@ if test "$opal_pthread_cxx_success" = "0"; then AC_MSG_RESULT([no]) AC_MSG_ERROR([Can not find working threads configuration. aborting]) fi - else + else for pl in $plibs; do AC_MSG_CHECKING([if C++ compiler and POSIX threads work with $pl]) case "${host_cpu}-${host-_os}" in @@ -489,7 +489,7 @@ if test "$opal_pthread_cxx_success" = "0"; then esac LIBS="$orig_LIBS $pl" AC_LANG_PUSH(C++) - OPAL_INTL_PTHREAD_TRY_LINK(opal_pthread_cxx_success=1, + OPAL_INTL_PTHREAD_TRY_LINK(opal_pthread_cxx_success=1, opal_pthread_cxx_success=0) AC_LANG_POP(C++) if test "$opal_pthread_cxx_success" = "1"; then @@ -518,7 +518,7 @@ if test "$opal_pthread_fortran_success" = "0" && \ AC_MSG_CHECKING([if Fortran compiler and POSIX threads work with $PTHREAD_LIBS]) LIBS="$orig_LIBS $PTHREAD_LIBS" AC_LANG_PUSH(C) - OPAL_INTL_PTHREAD_TRY_LINK_FORTRAN(opal_pthread_fortran_success=1, + OPAL_INTL_PTHREAD_TRY_LINK_FORTRAN(opal_pthread_fortran_success=1, opal_pthread_fortran_success=0) AC_LANG_POP(C) if test "$opal_pthread_fortran_success" = "1"; then @@ -533,7 +533,7 @@ if test "$opal_pthread_fortran_success" = "0" && \ AC_MSG_CHECKING([if Fortran compiler and POSIX threads work with $pl]) LIBS="$orig_LIBS $pl" AC_LANG_PUSH(C) - OPAL_INTL_PTHREAD_TRY_LINK_FORTRAN(opal_pthread_fortran_success=1, + OPAL_INTL_PTHREAD_TRY_LINK_FORTRAN(opal_pthread_fortran_success=1, opal_pthread_fortran_success=0) AC_LANG_POP(C) if test "$opal_pthread_fortran_success" = "1"; then @@ -564,15 +564,15 @@ AC_DEFUN([OPAL_INTL_POSIX_THREADS_LIBS],[ plibs="-lpthreads -llthread -lpthread" # Only run C++ and Fortran if those compilers already configured -AC_PROVIDE_IFELSE([AC_PROG_CC], - [OPAL_INTL_POSIX_THREADS_LIBS_C], +AC_PROVIDE_IFELSE([AC_PROG_CC], + [OPAL_INTL_POSIX_THREADS_LIBS_C], [opal_pthread_c_success=1]) -AC_PROVIDE_IFELSE([AC_PROG_CXX], - [OPAL_INTL_POSIX_THREADS_LIBS_CXX], +AC_PROVIDE_IFELSE([AC_PROG_CXX], + [OPAL_INTL_POSIX_THREADS_LIBS_CXX], [opal_pthread_cxx_success=1]) -AC_PROVIDE_IFELSE([AC_PROG_FC], +AC_PROVIDE_IFELSE([AC_PROG_FC], [OPAL_INTL_POSIX_THREADS_LIBS_FC], [opal_pthread_fortran_success=1]) diff --git a/config/opal_config_subdir.m4 b/config/opal_config_subdir.m4 index d2ad1fc2bb1..b6e20c802b3 100644 --- a/config/opal_config_subdir.m4 +++ b/config/opal_config_subdir.m4 @@ -6,7 +6,7 @@ dnl Corporation. All rights reserved. dnl Copyright (c) 2004-2005 The University of Tennessee and The University dnl of Tennessee Research Foundation. All rights dnl reserved. -dnl Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +dnl Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, dnl University of Stuttgart. All rights reserved. dnl Copyright (c) 2004-2005 The Regents of the University of California. dnl All rights reserved. @@ -15,9 +15,9 @@ dnl Copyright (c) 2014 Intel, Inc. All rights reserved. dnl Copyright (c) 2015 Research Organization for Information Science dnl and Technology (RIST). All rights reserved. dnl $COPYRIGHT$ -dnl +dnl dnl Additional copyrights may follow -dnl +dnl dnl $HEADER$ dnl @@ -51,9 +51,9 @@ if test "$subdir_dir" != ":" && test -d $srcdir/$subdir_dir; then # case $srcdir in - .) + .) ;; - *) + *) { case $subdir_dir in [[\\/]]* | ?:[[\\/]]* ) total_dir=;; *) total_dir=.;; @@ -113,7 +113,7 @@ if test "$subdir_dir" != ":" && test -d $srcdir/$subdir_dir; then # Construct the --cache-file argument # - # BWB - subdir caching is a pain since we change CFLAGS and all that. + # BWB - subdir caching is a pain since we change CFLAGS and all that. # Just disable it for now subdir_cache_file="/dev/null" diff --git a/config/opal_config_subdir_args.m4 b/config/opal_config_subdir_args.m4 index 21caaa1703e..3b7a35580f9 100644 --- a/config/opal_config_subdir_args.m4 +++ b/config/opal_config_subdir_args.m4 @@ -6,16 +6,16 @@ dnl Corporation. All rights reserved. dnl Copyright (c) 2004-2005 The University of Tennessee and The University dnl of Tennessee Research Foundation. All rights dnl reserved. -dnl Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +dnl Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, dnl University of Stuttgart. All rights reserved. dnl Copyright (c) 2004-2005 The Regents of the University of California. dnl All rights reserved. -dnl Copyright (c) 2014 Intel, Inc. All rights reserved. -dnl Copyright (c) 2015 Cisco Systems, Inc. All rights reserved. +dnl Copyright (c) 2014-2015 Intel, Inc. All rights reserved. +dnl Copyright (c) 2015 Cisco Systems, Inc. All rights reserved. dnl $COPYRIGHT$ -dnl +dnl dnl Additional copyrights may follow -dnl +dnl dnl $HEADER$ dnl @@ -30,7 +30,9 @@ OPAL_VAR_SCOPE_PUSH([subdirs_str subdirs_skip subdirs_args subdirs_arg]) # # Make a list of command line args --eliminate the --srcdir and # --cache-file args, because we need to replace them with our own -# values when invoking the sub-configure script. +# values when invoking the sub-configure script. Also eliminate +# the --with-platform as this will confuse any subdir with +# similar options # subdirs_args= @@ -56,11 +58,13 @@ do ;; -srcdir=* | --srcdir=*) ;; - *) + -with-platform=* | --with-platform=*) + ;; + *) case $subdir_arg in *\'*) subdir_arg=`echo "$subdir_arg" | sed "s/'/'\\\\\\\\''/g"` ;; esac - subdirs_args="$subdirs_args '$subdirs_arg'" + subdirs_args="$subdirs_args '$subdirs_arg'" ;; esac fi diff --git a/config/opal_config_threads.m4 b/config/opal_config_threads.m4 index c07257a265b..66aa874a200 100644 --- a/config/opal_config_threads.m4 +++ b/config/opal_config_threads.m4 @@ -5,7 +5,7 @@ dnl Corporation. All rights reserved. dnl Copyright (c) 2004-2005 The University of Tennessee and The University dnl of Tennessee Research Foundation. All rights dnl reserved. -dnl Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +dnl Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, dnl University of Stuttgart. All rights reserved. dnl Copyright (c) 2004-2005 The Regents of the University of California. dnl All rights reserved. @@ -15,9 +15,9 @@ dnl Copyright (c) 2014 Intel, Inc. All rights reserved dnl Copyright (c) 2015 Research Organization for Information Science dnl and Technology (RIST). All rights reserved. dnl $COPYRIGHT$ -dnl +dnl dnl Additional copyrights may follow -dnl +dnl dnl $HEADER$ dnl diff --git a/config/opal_configure_options.m4 b/config/opal_configure_options.m4 index 4177ee99747..42b02b13dfe 100644 --- a/config/opal_configure_options.m4 +++ b/config/opal_configure_options.m4 @@ -6,11 +6,11 @@ dnl Corporation. All rights reserved. dnl Copyright (c) 2004-2005 The University of Tennessee and The University dnl of Tennessee Research Foundation. All rights dnl reserved. -dnl Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +dnl Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, dnl University of Stuttgart. All rights reserved. dnl Copyright (c) 2004-2005 The Regents of the University of California. dnl All rights reserved. -dnl Copyright (c) 2006-2015 Cisco Systems, Inc. All rights reserved. +dnl Copyright (c) 2006-2016 Cisco Systems, Inc. All rights reserved. dnl Copyright (c) 2007 Sun Microsystems, Inc. All rights reserved. dnl Copyright (c) 2009 IBM Corporation. All rights reserved. dnl Copyright (c) 2009 Los Alamos National Security, LLC. All rights @@ -22,9 +22,9 @@ dnl Copyright (c) 2015 Research Organization for Information Science dnl and Technology (RIST). All rights reserved. dnl dnl $COPYRIGHT$ -dnl +dnl dnl Additional copyrights may follow -dnl +dnl dnl $HEADER$ dnl @@ -52,17 +52,17 @@ AC_ARG_ENABLE(coverage, AC_HELP_STRING([--enable-coverage], [enable code coverage files to be generated])) if test "$enable_coverage" = "yes"; then - if test "$enable_shared" = "yes"; then - AC_MSG_WARN([Code coverage can run only with static libraries. Please -run configure with --enable-static --disable-shared if -you want code coverage. Also ensure that you execute -make clean too ensure removal of all leftover shared + if test "$enable_shared" = "yes"; then + AC_MSG_WARN([Code coverage can run only with static libraries. Please +run configure with --enable-static --disable-shared if +you want code coverage. Also ensure that you execute +make clean too ensure removal of all leftover shared mpi libraries]) AC_MSG_ERROR([Cannot continue processing]) fi AC_MSG_RESULT([yes]) WANT_COVERAGE=1 -else +else AC_MSG_RESULT([no]) WANT_COVERAGE=0 fi @@ -79,7 +79,7 @@ AC_ARG_ENABLE(branch-probabilities, if test "$enable_branch_probabilities" = "yes"; then AC_MSG_RESULT([yes]) WANT_BRANCH_PROBABILITIES=1 -else +else AC_MSG_RESULT([no]) WANT_BRANCH_PROBABILITIES=0 fi @@ -90,9 +90,9 @@ fi # AC_MSG_CHECKING([if want to debug memory usage]) -AC_ARG_ENABLE(mem-debug, +AC_ARG_ENABLE(mem-debug, AC_HELP_STRING([--enable-mem-debug], - [enable memory debugging (debugging only) (default: disabled)])) + [enable memory debugging (not for general MPI users!) (default: disabled)])) if test "$enable_mem_debug" = "yes"; then AC_MSG_RESULT([yes]) WANT_MEM_DEBUG=1 @@ -100,12 +100,6 @@ else AC_MSG_RESULT([no]) WANT_MEM_DEBUG=0 fi -#################### Early development override #################### -if test "$WANT_MEM_DEBUG" = "0" && test -z "$enable_mem_debug" && test "$OPAL_DEVEL" = 1; then - WANT_MEM_DEBUG=1 - echo "--> developer override: enable mem profiling by default" -fi -#################### Early development override #################### AC_DEFINE_UNQUOTED(OPAL_ENABLE_MEM_DEBUG, $WANT_MEM_DEBUG, [Whether we want the memory profiling or not]) @@ -114,9 +108,9 @@ AC_DEFINE_UNQUOTED(OPAL_ENABLE_MEM_DEBUG, $WANT_MEM_DEBUG, # AC_MSG_CHECKING([if want to profile memory usage]) -AC_ARG_ENABLE(mem-profile, +AC_ARG_ENABLE(mem-profile, AC_HELP_STRING([--enable-mem-profile], - [enable memory profiling (debugging only) (default: disabled)])) + [enable memory profiling (not for general MPI users!) (default: disabled)])) if test "$enable_mem_profile" = "yes"; then AC_MSG_RESULT([yes]) WANT_MEM_PROFILE=1 @@ -124,12 +118,6 @@ else AC_MSG_RESULT([no]) WANT_MEM_PROFILE=0 fi -#################### Early development override #################### -if test "$WANT_MEM_PROFILE" = "0" && test -z "$enable_mem_profile" && test "$OPAL_DEVEL" = 1; then - WANT_MEM_PROFILE=1 - echo "--> developer override: enable mem profiling by default" -fi -#################### Early development override #################### AC_DEFINE_UNQUOTED(OPAL_ENABLE_MEM_PROFILE, $WANT_MEM_PROFILE, [Whether we want the memory profiling or not]) @@ -138,9 +126,9 @@ AC_DEFINE_UNQUOTED(OPAL_ENABLE_MEM_PROFILE, $WANT_MEM_PROFILE, # AC_MSG_CHECKING([if want developer-level compiler pickyness]) -AC_ARG_ENABLE(picky, +AC_ARG_ENABLE(picky, AC_HELP_STRING([--enable-picky], - [enable developer-level compiler pickyness when building Open MPI (default: disabled)])) + [enable developer-level compiler pickyness when building Open MPI (default: disabled, unless a .git directory is found in the build tree)])) if test "$enable_picky" = "yes"; then AC_MSG_RESULT([yes]) WANT_PICKY_COMPILER=1 @@ -148,19 +136,19 @@ else AC_MSG_RESULT([no]) WANT_PICKY_COMPILER=0 fi -#################### Early development override #################### +#################### Developer default override #################### if test "$WANT_PICKY_COMPILER" = "0" && test -z "$enable_picky" && test "$OPAL_DEVEL" = 1; then WANT_PICKY_COMPILER=1 echo "--> developer override: enable picky compiler by default" fi -#################### Early development override #################### +#################### Developer default override #################### # # Developer debugging # AC_MSG_CHECKING([if want developer-level debugging code]) -AC_ARG_ENABLE(debug, +AC_ARG_ENABLE(debug, AC_HELP_STRING([--enable-debug], [enable developer-level debugging code (not for general MPI users!) (default: disabled)])) if test "$enable_debug" = "yes"; then @@ -173,7 +161,7 @@ fi AC_MSG_CHECKING([if want to developer-level timing framework]) -AC_ARG_ENABLE(timing, +AC_ARG_ENABLE(timing, AC_HELP_STRING([--enable-timing], [enable developer-level timing code (not for general MPI users!) (default: disabled)])) if test "$enable_timing" = "yes"; then @@ -190,13 +178,6 @@ AC_DEFINE_UNQUOTED(OPAL_ENABLE_TIMING, $WANT_TIMING, AM_CONDITIONAL([OPAL_COMPILE_TIMING], [test "$WANT_TIMING" = "1"]) AM_CONDITIONAL([OPAL_INSTALL_TIMING_BINARIES], [test "$WANT_TIMING" = "1" && test "$enable_binaries" != "no"]) - -#################### Early development override #################### -if test "$WANT_DEBUG" = "0" && test -z "$enable_debug" && test "$OPAL_DEVEL" = 1; then - WANT_DEBUG=1 - echo "--> developer override: enable debugging code by default" -fi -#################### Early development override #################### if test "$WANT_DEBUG" = "0"; then CFLAGS="-DNDEBUG $CFLAGS" CXXFLAGS="-DNDEBUG $CXXFLAGS" @@ -302,7 +283,8 @@ else OPAL_ENABLE_DLOPEN_SUPPORT=1 AC_MSG_RESULT([yes]) fi - +AC_DEFINE_UNQUOTED(OPAL_ENABLE_DLOPEN_SUPPORT, $OPAL_ENABLE_DLOPEN_SUPPORT, + [Whether we want to enable dlopen support]) # # Heterogeneous support @@ -320,8 +302,8 @@ else AC_MSG_RESULT([no]) opal_want_heterogeneous=0 fi -AC_DEFINE_UNQUOTED([OPAL_ENABLE_HETEROGENEOUS_SUPPORT], - [$opal_want_heterogeneous], +AC_DEFINE_UNQUOTED([OPAL_ENABLE_HETEROGENEOUS_SUPPORT], + [$opal_want_heterogeneous], [Enable features required for heterogeneous support]) @@ -502,11 +484,6 @@ OPAL_WITH_OPTION_MIN_MAX_VALUE(port_name, 1024, 255, 2048) # Min length accroding to MPI-2.1, p. 418 OPAL_WITH_OPTION_MIN_MAX_VALUE(datarep_string, 128, 64, 256) -# How to build libltdl -AC_ARG_WITH([libltdl], - [AC_HELP_STRING([--with-libltdl(=DIR)], - [Where to find libltdl (this option is ignored if --disable-dlopen is used). DIR can take one of three values: "internal", "external", or a valid directory name. "internal" (or no DIR value) forces Open MPI to use its internal copy of libltdl. "external" forces Open MPI to use an external installation of libltdl. Supplying a valid directory name also forces Open MPI to use an external installation of libltdl, and adds DIR/include, DIR/lib, and DIR/lib64 to the search path for headers and libraries.])]) - AC_DEFINE_UNQUOTED([OPAL_ENABLE_CRDEBUG], [0], [Whether we want checkpoint/restart enabled debugging functionality or not]) diff --git a/config/opal_ensure_contains_optflags.m4 b/config/opal_ensure_contains_optflags.m4 index 78a1e5d458d..b9d96de1299 100644 --- a/config/opal_ensure_contains_optflags.m4 +++ b/config/opal_ensure_contains_optflags.m4 @@ -6,16 +6,16 @@ dnl Corporation. All rights reserved. dnl Copyright (c) 2004-2005 The University of Tennessee and The University dnl of Tennessee Research Foundation. All rights dnl reserved. -dnl Copyright (c) 2004-2007 High Performance Computing Center Stuttgart, +dnl Copyright (c) 2004-2007 High Performance Computing Center Stuttgart, dnl University of Stuttgart. All rights reserved. dnl Copyright (c) 2004-2005 The Regents of the University of California. dnl All rights reserved. dnl Copyright (c) 2007-2009 Sun Microsystems, Inc. All rights reserved. dnl Copyright (c) 2008 Cisco Systems, Inc. All rights reserved. dnl $COPYRIGHT$ -dnl +dnl dnl Additional copyrights may follow -dnl +dnl dnl $HEADER$ dnl diff --git a/config/opal_find_type.m4 b/config/opal_find_type.m4 index 46bc138a55d..bc98dd681c5 100644 --- a/config/opal_find_type.m4 +++ b/config/opal_find_type.m4 @@ -6,16 +6,16 @@ dnl Corporation. All rights reserved. dnl Copyright (c) 2004-2005 The University of Tennessee and The University dnl of Tennessee Research Foundation. All rights dnl reserved. -dnl Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +dnl Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, dnl University of Stuttgart. All rights reserved. dnl Copyright (c) 2004-2005 The Regents of the University of California. dnl All rights reserved. dnl Copyright (c) 2010 Cisco Systems, Inc. All rights reserved. dnl Copyright (c) 2014 Intel, Inc. All rights reserved. dnl $COPYRIGHT$ -dnl +dnl dnl Additional copyrights may follow -dnl +dnl dnl $HEADER$ dnl @@ -32,7 +32,7 @@ AC_DEFUN([OPAL_FIND_TYPE],[ [ # Loop over all the types handed to us oft_real_type= AS_IF([test "$oft_target_size" != ""], - [m4_foreach(oft_type, [$2], + [m4_foreach(oft_type, [$2], [if test -z "$oft_real_type"; then if test "[$ac_cv_sizeof_]m4_bpatsubst(oft_type, [[^a-zA-Z0-9_]], [_])" = "$oft_target_size" ; then oft_real_type="oft_type" diff --git a/config/opal_get_version.m4sh b/config/opal_get_version.m4sh index b1041f4c65e..d9d45d972a2 100644 --- a/config/opal_get_version.m4sh +++ b/config/opal_get_version.m4sh @@ -10,15 +10,15 @@ m4_divert_push([SCRIPT])dnl # Copyright (c) 2004-2005 The University of Tennessee and The University # of Tennessee Research Foundation. All rights # reserved. -# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, # University of Stuttgart. All rights reserved. # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. # Copyright (c) 2008-2015 Cisco Systems, Inc. All rights reserved. # $COPYRIGHT$ -# +# # Additional copyrights may follow -# +# # $HEADER$ # diff --git a/config/opal_lang_link_with_c.m4 b/config/opal_lang_link_with_c.m4 index f5451a6a35e..496081f4b0a 100644 --- a/config/opal_lang_link_with_c.m4 +++ b/config/opal_lang_link_with_c.m4 @@ -1,12 +1,12 @@ dnl -*- shell-script -*- dnl dnl Copyright (c) 2006 Los Alamos National Security, LLC. All rights -dnl reserved. +dnl reserved. dnl Copyright (c) 2010-2012 Cisco Systems, Inc. All rights reserved. dnl $COPYRIGHT$ -dnl +dnl dnl Additional copyrights may follow -dnl +dnl dnl $HEADER$ dnl @@ -20,7 +20,7 @@ AC_DEFUN([OPAL_LANG_LINK_WITH_C], [ AC_CACHE_CHECK([if C and $1 are link compatible], lang_var, - [m4_if([$1], [Fortran], + [m4_if([$1], [Fortran], [m4_define([ompi_lang_link_with_c_fortran], 1) OMPI_FORTRAN_MAKE_C_FUNCTION([testfunc_name], [testfunc])], [testfunc_name="testfunc"]) @@ -38,7 +38,7 @@ EOF [AC_LANG_PUSH($1) ompi_lang_link_with_c_libs="$LIBS" LIBS="conftest_c.o $LIBS" - m4_if(ompi_lang_link_with_c_fortran, 1, + m4_if(ompi_lang_link_with_c_fortran, 1, [AC_LINK_IFELSE([AC_LANG_PROGRAM([], [ external testfunc call testfunc(1) @@ -50,7 +50,7 @@ extern "C" int testfunc(int); #else extern int testfunc(int); #endif -], +], [return testfunc(0);])], [AS_VAR_SET(lang_var, ["yes"])], [AS_VAR_SET(lang_var, ["no"])])]) LIBS="$ompi_lang_link_with_c_libs" diff --git a/config/opal_load_platform.m4 b/config/opal_load_platform.m4 index c0b3a23d34d..2079d761488 100644 --- a/config/opal_load_platform.m4 +++ b/config/opal_load_platform.m4 @@ -23,6 +23,10 @@ dnl # OPAL_LOAD_PLATFORM() # -------------------- AC_DEFUN([OPAL_LOAD_PLATFORM], [ + AC_ARG_WITH([platform-patches-dir], + [AC_HELP_STRING([--with-platform-patches-dir=DIR], + [Location of the platform patches directory. If you use this option, you must also use --with-platform.])]) + AC_ARG_WITH([platform], [AC_HELP_STRING([--with-platform=FILE], [Load options for build from FILE. Options on the @@ -97,6 +101,87 @@ AC_DEFUN([OPAL_LOAD_PLATFORM], [ AC_SUBST(OPAL_PARAM_FROM_PLATFORM, "no") fi + patch_dir="${with_platform}.patches" + if test -n "$with_platform_patches_dir"; then + if test "$with_platform_patches_dir" = "yes"; then + patch_dir="${with_platform}.patches" + elif test "$with_platform_patches_dir" = "no"; then + AC_MSG_NOTICE([Disabling platform patches on user request]) + patch_dir="" + elif test -d "$with_platform_patches_dir"; then + patch_dir=$with_platform_patches_dir + else + AC_MSG_ERROR([User provided patches directory: $with_platform_patches_dir not found]) + fi + fi + + patch_done="${srcdir}/.platform_patches" + patch_found=no + + if test -d "${patch_dir}"; then + if test ! -f "${patch_done}"; then + + AC_MSG_NOTICE([Checking patches from ${patch_dir}/ directory ]) + for one_patch in $patch_dir/*.patch ; do + + AC_MSG_CHECKING([patch: $one_patch for errors ]) + patch -d ${srcdir} -p1 -t -s --dry-run < ${one_patch} + if test "$?" != "0"; then + AC_MSG_RESULT([fail]) + AC_MSG_ERROR([Platform patches failed to apply]) + else + AC_MSG_RESULT([ok]) + fi + + AC_MSG_CHECKING([patch: $one_patch for unsupported configury changes ]) + has_configury_items=$(patch -d ${srcdir} -p1 -t --dry-run < ${one_patch} 2>&1 | egrep "^patching" | egrep '*\.(am|m4)$' | wc -l) + + if test $has_configury_items -ne 0; then + AC_MSG_RESULT([fail]) + AC_MSG_ERROR([Platform patches should not change configury files]) + else + AC_MSG_RESULT([ok]) + fi + done + + + for one_patch in $patch_dir/*.patch ; do + AC_MSG_NOTICE([Applying patch ${one_patch}]) + patch -d ${srcdir} -p1 -t -s < ${one_patch} + if test "$?" != "0"; then + AC_MSG_ERROR([Failed to apply patch ${one_patch}]) + fi + patch_found=yes + done + + if test "$patch_found" = "yes"; then + + platform_root_short="$(basename $platform_base)" + + # If platform file resides under platform/ root folder - use filename as ident + if [ test "$platform_root_short" = "platform" ]; then + platform_ident="$platform_file" + else + platform_ident="$(basename $platform_base)" + fi + + # Set custom ident for platform patched OMPI + if [ test -z "$with_ident_string" ]; then + with_ident_string="Platform: $platform_ident" + fi + + AC_MSG_NOTICE([Platform patches applied, created stamp file ${patch_done}]) + touch ${patch_done} + else + AC_MSG_NOTICE([No platform patches in ${patch_dir}]) + fi + + else + AC_MSG_WARN([Platform patches already applied, skipping. ${patch_done} can be removed to re-apply ]) + fi + elif test -n "${patch_dir}"; then + AC_MSG_NOTICE([No platform patches in ${patch_dir}]) + fi else AC_SUBST(OPAL_DEFAULT_MCA_PARAM_CONF, [openmpi-mca-params.conf]) fi diff --git a/config/opal_mca.m4 b/config/opal_mca.m4 index 1ec342c4ecb..a1f94bce404 100644 --- a/config/opal_mca.m4 +++ b/config/opal_mca.m4 @@ -10,7 +10,7 @@ dnl Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, dnl University of Stuttgart. All rights reserved. dnl Copyright (c) 2004-2005 The Regents of the University of California. dnl All rights reserved. -dnl Copyright (c) 2010-2015 Cisco Systems, Inc. All rights reserved. +dnl Copyright (c) 2010-2016 Cisco Systems, Inc. All rights reserved. dnl Copyright (c) 2013-2014 Intel, Inc. All rights reserved. dnl $COPYRIGHT$ dnl @@ -56,7 +56,10 @@ AC_DEFUN([OPAL_MCA],[ AC_ARG_ENABLE([mca-no-build], [AC_HELP_STRING([--enable-mca-no-build=LIST], [Comma-separated list of - pairs - that will not be built. Example: "--enable-mca-no-build=maffinity,btl-portals" will disable building all maffinity components and the "portals" btl components.])]) + that will not be built. Example: + "--enable-mca-no-build=btl-portals,oob-ud" will + disable building the "portals" btl and the "ud" + oob components.])]) AC_ARG_ENABLE(mca-dso, AC_HELP_STRING([--enable-mca-dso=LIST], [Comma-separated list of types and/or @@ -86,7 +89,7 @@ AC_DEFUN([OPAL_MCA],[ if test "$enable_mca_no_build" = "yes"; then AC_MSG_RESULT([yes]) AC_MSG_ERROR([*** The enable-mca-no-build flag requires an explicit list -*** of type-component pairs. For example, --enable-mca-direct=pml-ob1]) +*** of type-component pairs. For example, --enable-mca-no-build=pml-ob1]) else ifs_save="$IFS" IFS="${IFS}$PATH_SEPARATOR," diff --git a/config/opal_mca_priority_sort.pl b/config/opal_mca_priority_sort.pl index e54a0b70ce3..4026e990edc 100755 --- a/config/opal_mca_priority_sort.pl +++ b/config/opal_mca_priority_sort.pl @@ -3,9 +3,9 @@ # Copyright (c) 2010 Sandia National Laboratories. All rights reserved. # # $COPYRIGHT$ -# +# # Additional copyrights may follow -# +# # $HEADER$ # diff --git a/config/opal_set_lib_prefix.m4 b/config/opal_set_lib_prefix.m4 index b8a0dfa85fa..1aec310294f 100644 --- a/config/opal_set_lib_prefix.m4 +++ b/config/opal_set_lib_prefix.m4 @@ -1,6 +1,7 @@ # -*- shell-script -*- # # Copyright (c) 2014 Cisco Systems, Inc. All rights reserved. +# Copyright (c) 2016 IBM Corporation. All rights reserved. # $COPYRIGHT$ # # Additional copyrights may follow @@ -38,3 +39,24 @@ AC_DEFUN([ORTE_SET_LIB_PREFIX],[ orte_lib_prefix_set=yes AC_SUBST(ORTE_LIB_PREFIX) ])dnl + +# +# Rename 'libmpi' and 'libmpi_FOO' with a configure time option. +# +AC_DEFUN([OMPI_SET_LIB_NAME],[ + AC_MSG_CHECKING([if want custom libmpi(_FOO) name]) + AC_ARG_WITH([libmpi-name], + [AC_HELP_STRING([--with-libmpi-name=STRING], + ["Replace \"libmpi(_FOO)\" with \"libSTRING(_FOO)\" (default=mpi)"])]) + + AS_IF([test "$with_libmpi_name" = "no"], + [AC_MSG_RESULT([Error]) + AC_MSG_WARN([Invalid to specify --without-libmpi-name]) + AC_MSG_ERROR([Cannot continue])]) + + AS_IF([test "$with_libmpi_name" = "" || test "$with_libmpi_name" = "yes"], + [with_libmpi_name="mpi"]) + + AC_MSG_RESULT([$with_libmpi_name]) + AC_SUBST(OMPI_LIBMPI_NAME, $with_libmpi_name) +])dnl diff --git a/config/opal_setup_component_package.m4 b/config/opal_setup_component_package.m4 index 7a939e207be..e8956493440 100644 --- a/config/opal_setup_component_package.m4 +++ b/config/opal_setup_component_package.m4 @@ -21,16 +21,16 @@ dnl dnl $HEADER$ dnl -# OPAL_SETUP_COMPONENT_PACKAGE(1: framework_name, +# OPAL_SETUP_COMPONENT_PACKAGE(1: framework_name, # 2: component_name, # 3: option_name, # 4: withdir_dir_check_file, # 5: withdir_libdir_check_file, -# 6: header, -# 7: library, -# 8: function, -# 9: extra-libraries, -# 10: [action-if-found], +# 6: header, +# 7: library, +# 8: function, +# 9: extra-libraries, +# 10: [action-if-found], # 11: [action-if-not-found]) # ------------------------------------------------ # Many components need to just check for one package, and if it's all diff --git a/config/opal_setup_cxx.m4 b/config/opal_setup_cxx.m4 index acf1b0c97c9..daf2ffe8957 100644 --- a/config/opal_setup_cxx.m4 +++ b/config/opal_setup_cxx.m4 @@ -11,21 +11,21 @@ dnl University of Stuttgart. All rights reserved. dnl Copyright (c) 2004-2006 The Regents of the University of California. dnl All rights reserved. dnl Copyright (c) 2006 Los Alamos National Security, LLC. All rights -dnl reserved. +dnl reserved. dnl Copyright (c) 2007-2009 Sun Microsystems, Inc. All rights reserved. dnl Copyright (c) 2008-2013 Cisco Systems, Inc. All rights reserved. dnl Copyright (c) 2015 Research Organization for Information Science dnl and Technology (RIST). All rights reserved. dnl $COPYRIGHT$ -dnl +dnl dnl Additional copyrights may follow -dnl +dnl dnl $HEADER$ dnl # This macro is necessary to get the title to be displayed first. :-) AC_DEFUN([OPAL_SETUP_CXX_BANNER],[ - opal_show_subtitle "C++ compiler and preprocessor" + opal_show_subtitle "C++ compiler and preprocessor" ]) # This macro is necessary because PROG_CXX* is REQUIREd by multiple @@ -81,7 +81,7 @@ AC_DEFUN([_OPAL_SETUP_CXX_COMPILER],[ # Back end of _OPAL_SETUP_CXX_COMPILER_BACKEND() AC_DEFUN([_OPAL_SETUP_CXX_COMPILER_BACKEND],[ # Do we want code coverage - if test "$WANT_COVERAGE" = "1"; then + if test "$WANT_COVERAGE" = "1"; then if test "$opal_cxx_vendor" = "gnu" ; then AC_MSG_WARN([$OPAL_COVERAGE_FLAGS has been added to CFLAGS (--enable-coverage)]) WANT_DEBUG=1 @@ -112,12 +112,12 @@ AC_DEFUN([_OPAL_SETUP_CXX_COMPILER_BACKEND],[ CXXFLAGS="$CXXFLAGS $add -Wno-long-double -fstrict-prototype" AC_CACHE_CHECK([if $CXX supports -Wno-long-double], [opal_cv_cxx_wno_long_double], - [AC_TRY_COMPILE([], [], + [AC_TRY_COMPILE([], [], [ dnl So -Wno-long-double did not produce any errors... - dnl We will try to extract a warning regarding + dnl We will try to extract a warning regarding dnl unrecognized or ignored options - AC_TRY_COMPILE([], [long double test;], + AC_TRY_COMPILE([], [long double test;], [ opal_cv_cxx_wno_long_double="yes" if test -s conftest.err ; then @@ -181,7 +181,7 @@ AC_DEFUN([_OPAL_SETUP_CXX_COMPILER_BACKEND],[ * files created by your C compiler. This generally indicates either * a conflict between the options specified in CFLAGS and CXXFLAGS * or a problem with the local compiler installation. More -* information (including exactly what command was given to the +* information (including exactly what command was given to the * compilers and what error resulted when the commands were executed) is * available in the config.log file in this directory. ********************************************************************** diff --git a/config/opal_setup_ft.m4 b/config/opal_setup_ft.m4 deleted file mode 100644 index b54eea0c13c..00000000000 --- a/config/opal_setup_ft.m4 +++ /dev/null @@ -1,178 +0,0 @@ -dnl -dnl Copyright (c) 2013-2014 Intel, Inc. All rights reserved. -dnl Copyright (c) 2015 Research Organization for Information Science -dnl and Technology (RIST). All rights reserved. -dnl $COPYRIGHT$ -dnl -dnl Additional copyrights may follow -dnl -dnl $HEADER$ -dnl - -# -# --with-ft=TYPE -# TYPE: -# - LAM (synonym for 'cr' currently) -# - cr -# /* General FT sections */ -# #if OPAL_ENABLE_FT == 0 /* FT Disabled globaly */ -# #if OPAL_ENABLE_FT == 1 /* FT Enabled globaly */ -# /* CR Specific sections */ -# #if OPAL_ENABLE_FT_CR == 0 /* FT Ckpt/Restart Disabled */ -# #if OPAL_ENABLE_FT_CR == 1 /* FT Ckpt/Restart Enabled */ -# - -# This macro is necessary to get the title to be displayed first. :-) -AC_DEFUN([OPAL_SETUP_FT_BANNER],[ - opal_show_subtitle "Fault tolerance" -]) - -AC_DEFUN([OPAL_SETUP_FT_OPTIONS],[ - AC_REQUIRE([OPAL_SETUP_FT_BANNER]) - # define a variable that tells us that these options were enabled - opal_setup_ft_options="yes" - AC_ARG_WITH(ft, - [AC_HELP_STRING([--with-ft=TYPE], - [Specify the type of fault tolerance to enable. Options: LAM (LAM/MPI-like), cr (Checkpoint/Restart), (default: disabled)])], - [opal_want_ft=1], - [opal_want_ft=0]) - - # - # Checkpoint/restart enabled debugging - # - AC_ARG_ENABLE([crdebug], - [AC_HELP_STRING([--enable-crdebug], - [enable checkpoint/restart debugging functionality (default: disabled)])]) - - # - # Fault Tolerance Thread - # - # --enable-ft-thread - # #if OPAL_ENABLE_FT_THREAD == 0 /* Disabled */ - # #if OPAL_ENABLE_FT_THREAD == 1 /* Enabled */ - # - AC_ARG_ENABLE([ft_thread], - [AC_HELP_STRING([--disable-ft-thread], - [Disable fault tolerance thread running inside all processes. Requires OPAL thread support (default: enabled)])], - [enable_ft_thread="$enableval"], - [enable_ft_thread="undef"]) - -]) - -AC_DEFUN([OPAL_SETUP_FT],[ - if test "$opal_setup_ft_options" = "yes"; then - AC_MSG_CHECKING([if want fault tolerance]) - fi - if test "x$with_ft" != "x" || test "$opal_want_ft" = "1"; then - opal_want_ft=1 - opal_want_ft_cr=0 - opal_want_ft_type=none - - as_save_IFS=$IFS - IFS="," - for opt in $with_ft; do - IFS=$as_save_IFS - - # Default value - if test "$opt" = "" || test "$opt" = "yes"; then - opal_want_ft_cr=1 - elif test "$opt" = "LAM"; then - opal_want_ft_cr=1 - elif test "$opt" = "lam"; then - opal_want_ft_cr=1 - elif test "$opt" = "CR"; then - opal_want_ft_cr=1 - elif test "$opt" = "cr"; then - opal_want_ft_cr=1 - else - AC_MSG_RESULT([Unrecognized FT TYPE: $opt]) - AC_MSG_ERROR([Cannot continue]) - fi - done - if test "$opal_want_ft_cr" = 1; then - opal_want_ft_type="cr" - fi - - AC_MSG_RESULT([Enabled $opal_want_ft_type (Specified $with_ft)]) - AC_MSG_WARN([**************************************************]) - AC_MSG_WARN([*** Fault Tolerance Integration into Open MPI is *]) - AC_MSG_WARN([*** a research quality implementation, and care *]) - AC_MSG_WARN([*** should be used when choosing to enable it. *]) - AC_MSG_WARN([**************************************************]) - else - opal_want_ft=0 - opal_want_ft_cr=0 - if test "$opal_setup_ft_options" = "yes"; then - AC_MSG_RESULT([Disabled fault tolerance]) - fi - fi - AC_DEFINE_UNQUOTED([OPAL_ENABLE_FT], [$opal_want_ft], - [Enable fault tolerance general components and logic]) - AC_DEFINE_UNQUOTED([OPAL_ENABLE_FT_CR], [$opal_want_ft_cr], - [Enable fault tolerance checkpoint/restart components and logic]) - AM_CONDITIONAL(WANT_FT, test "$opal_want_ft" = "1") - AM_CONDITIONAL(WANT_FT_CR, test "$opal_want_ft_cr" = "1") - - if test "$opal_setup_ft_options" = "yes"; then - AC_MSG_CHECKING([if want checkpoint/restart enabled debugging option]) - fi - if test "$opal_want_ft" = "0"; then - opal_want_prd=0 - if test "$opal_setup_ft_options" = "yes"; then - AC_MSG_RESULT([Disabled (fault tolerance disabled --without-ft)]) - fi - elif test "$enable_crdebug" = "yes"; then - opal_want_prd=1 - AC_MSG_RESULT([Enabled]) - else - opal_want_prd=0 - if test "$opal_setup_ft_options" = "yes"; then - AC_MSG_RESULT([Disabled]) - fi - fi - AC_DEFINE_UNQUOTED([OPAL_ENABLE_CRDEBUG], [$opal_want_prd], - [Whether we want checkpoint/restart enabled debugging functionality or not]) - - if test "$opal_setup_ft_options" = "yes"; then - AC_MSG_CHECKING([if want fault tolerance thread]) - fi - # if they do not want FT support, then they do not want this thread either - if test "$opal_want_ft" = "0"; then - opal_want_ft_thread=0 - if test "$opal_setup_ft_options" = "yes"; then - AC_MSG_RESULT([Disabled (fault tolerance disabled --without-ft)]) - fi - # if --disable-ft-thread - elif test "$enable_ft_thread" = "no"; then - opal_want_ft_thread=0 - AC_MSG_RESULT([Disabled]) - # if default, and no progress or MPI threads - elif test "$enable_ft_thread" = "undef" && test "$enable_opal_multi_threads" = "no" ; then - opal_want_ft_thread=0 - AC_MSG_RESULT([Disabled (OPAL Thread Support Disabled)]) - # if default, and MPI threads enabled for C/R only - elif test "$opal_want_ft_cr" = 1; then - # Default: Enable - # Make sure we have OPAL Threads enabled - if test "$enable_opal_multi_threads" = "no"; then - AC_MSG_RESULT([Must enable OPAL basic thread support to use this option]) - AC_MSG_ERROR([Cannot continue]) - else - AC_MSG_RESULT([yes]) - opal_want_ft_thread=1 - AC_MSG_WARN([**************************************************]) - AC_MSG_WARN([*** Fault Tolerance with a thread in Open MPI *]) - AC_MSG_WARN([*** is an experimental, research quality option. *]) - AC_MSG_WARN([*** It requires OPAL thread support and care *]) - AC_MSG_WARN([*** should be used when enabling these options. *]) - AC_MSG_WARN([**************************************************]) - fi - # Otherwise disabled - else - opal_want_ft_thread=0 - AC_MSG_RESULT([Disabled (Non-C/R Fault Tolerance enabled)]) - fi - AC_DEFINE_UNQUOTED([OPAL_ENABLE_FT_THREAD], [$opal_want_ft_thread], - [Enable fault tolerance thread in Open PAL]) - AM_CONDITIONAL(WANT_FT_THREAD, test "$opal_want_ft_thread" = "1") -]) diff --git a/config/opal_setup_java.m4 b/config/opal_setup_java.m4 index 9eec3b21349..ace116026ac 100644 --- a/config/opal_setup_java.m4 +++ b/config/opal_setup_java.m4 @@ -11,22 +11,22 @@ dnl University of Stuttgart. All rights reserved. dnl Copyright (c) 2004-2006 The Regents of the University of California. dnl All rights reserved. dnl Copyright (c) 2006-2012 Los Alamos National Security, LLC. All rights -dnl reserved. +dnl reserved. dnl Copyright (c) 2007-2012 Oracle and/or its affiliates. All rights reserved. dnl Copyright (c) 2008-2013 Cisco Systems, Inc. All rights reserved. dnl Copyright (c) 2013 Intel, Inc. All rights reserved. dnl Copyright (c) 2015 Research Organization for Information Science dnl and Technology (RIST). All rights reserved. dnl $COPYRIGHT$ -dnl +dnl dnl Additional copyrights may follow -dnl +dnl dnl $HEADER$ dnl # This macro is necessary to get the title to be displayed first. :-) AC_DEFUN([OPAL_SETUP_JAVA_BANNER],[ - opal_show_subtitle "Java compiler" + opal_show_subtitle "Java compiler" ]) # OPAL_SETUP_JAVA() @@ -100,7 +100,7 @@ AC_DEFUN([OPAL_SETUP_JAVA],[ AS_IF([test -d $opal_java_dir], [AC_MSG_RESULT([found ($opal_java_dir)]) opal_java_found=1 - with_jdk_headers=$opal_java_dir + with_jdk_headers=$opal_java_dir with_jdk_bindir=/usr/bin], [AC_MSG_RESULT([not found])]) @@ -113,7 +113,7 @@ AC_DEFUN([OPAL_SETUP_JAVA],[ fi opal_java_jnih=`ls $opal_java_dir/jni.h 2>/dev/null | head -n 1` AC_MSG_CHECKING([Linux locations]) - AS_IF([test -r "$opal_java_jnih"], + AS_IF([test -r "$opal_java_jnih"], [with_jdk_headers=`dirname $opal_java_jnih` OPAL_WHICH([javac], [with_jdk_bindir]) AS_IF([test -n "$with_jdk_bindir"], @@ -123,7 +123,7 @@ AC_DEFUN([OPAL_SETUP_JAVA],[ [with_jdk_headers=])], [opal_java_dir='/usr/lib/jvm/default-java/include/' opal_java_jnih=`ls $opal_java_dir/jni.h 2>/dev/null | head -n 1` - AS_IF([test -r "$opal_java_jnih"], + AS_IF([test -r "$opal_java_jnih"], [with_jdk_headers=`dirname $opal_java_jnih` OPAL_WHICH([javac], [with_jdk_bindir]) AS_IF([test -n "$with_jdk_bindir"], @@ -186,9 +186,14 @@ AC_DEFUN([OPAL_SETUP_JAVA],[ # too. Ugh. AS_IF([test -d "$with_jdk_headers/solaris"], [OPAL_JDK_CPPFLAGS="$OPAL_JDK_CPPFLAGS -I$with_jdk_headers/solaris"]) + # Darwin JDK also require -I/darwin. + # See if that's there, and if so, add a -I for that, + # too. Ugh. + AS_IF([test -d "$with_jdk_headers/darwin"], + [OPAL_JDK_CPPFLAGS="$OPAL_JDK_CPPFLAGS -I$with_jdk_headers/darwin"]) CPPFLAGS="$CPPFLAGS $OPAL_JDK_CPPFLAGS"]) - AC_CHECK_HEADER([jni.h], [], + AC_CHECK_HEADER([jni.h], [], [opal_java_happy=no]) CPPFLAGS=$opal_java_CPPFLAGS_save ]) diff --git a/config/opal_setup_wrappers.m4 b/config/opal_setup_wrappers.m4 index 4fe3f18b15d..8a1f5837f61 100644 --- a/config/opal_setup_wrappers.m4 +++ b/config/opal_setup_wrappers.m4 @@ -14,6 +14,7 @@ dnl Copyright (c) 2006-2010 Oracle and/or its affiliates. All rights reserved. dnl Copyright (c) 2009-2015 Cisco Systems, Inc. All rights reserved. dnl Copyright (c) 2015 Research Organization for Information Science dnl and Technology (RIST). All rights reserved. +dnl Copyright (c) 2016 IBM Corporation. All rights reserved. dnl $COPYRIGHT$ dnl dnl Additional copyrights may follow @@ -428,7 +429,7 @@ AC_DEFUN([OPAL_SETUP_WRAPPER_FINAL],[ # support. However, we do want it to fail if there is no C++ # compiler. if test "$WANT_MPI_CXX_SUPPORT" = "1" ; then - OMPI_WRAPPER_CXX_LIB="-lmpi_cxx" + OMPI_WRAPPER_CXX_LIB="-l${with_libmpi_name}_cxx" OMPI_WRAPPER_CXX_REQUIRED_FILE="" elif test "$CXX" = "none"; then OMPI_WRAPPER_CXX_LIB="" diff --git a/config/opal_strip_optflags.m4 b/config/opal_strip_optflags.m4 index 8ae61423f95..eebc908664f 100644 --- a/config/opal_strip_optflags.m4 +++ b/config/opal_strip_optflags.m4 @@ -6,7 +6,7 @@ dnl Corporation. All rights reserved. dnl Copyright (c) 2004-2005 The University of Tennessee and The University dnl of Tennessee Research Foundation. All rights dnl reserved. -dnl Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +dnl Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, dnl University of Stuttgart. All rights reserved. dnl Copyright (c) 2004-2005 The Regents of the University of California. dnl All rights reserved. @@ -14,9 +14,9 @@ dnl Copyright (c) 2008 Cisco Systems, Inc. All rights reserved. dnl Copyright (c) 2008-2009 Sun Microsystems, Inc. All rights reserved. dnl Copyright (c) 2014 Intel, Inc. All rights reserved. dnl $COPYRIGHT$ -dnl +dnl dnl Additional copyrights may follow -dnl +dnl dnl $HEADER$ dnl diff --git a/config/opal_summary.m4 b/config/opal_summary.m4 new file mode 100644 index 00000000000..084896df125 --- /dev/null +++ b/config/opal_summary.m4 @@ -0,0 +1,115 @@ +dnl -*- shell-script -*- +dnl +dnl Copyright (c) 2016 Los Alamos National Security, LLC. All rights +dnl reserved. +dnl Copyright (c) 2016 Cisco Systems, Inc. All rights reserved. +dnl Copyright (c) 2016 Research Organization for Information Science +dnl and Technology (RIST). All rights reserved. +dnl $COPYRIGHT$ +dnl +dnl Additional copyrights may follow +dnl +dnl $HEADER$ +dnl +AC_DEFUN([OPAL_SUMMARY_ADD],[ + OPAL_VAR_SCOPE_PUSH([ompi_summary_section ompi_summary_line ompi_summary_section_current]) + + dnl need to replace spaces in the section name with somethis else. _ seems like a reasonable + dnl choice. if this changes remember to change OMPI_PRINT_SUMMARY as well. + ompi_summary_section=$(echo $1 | tr ' ' '_') + ompi_summary_line="$2: $4" + ompi_summary_section_current=$(eval echo \$ompi_summary_values_$ompi_summary_section) + + if test -z "$ompi_summary_section_current" ; then + if test -z "$ompi_summary_sections" ; then + ompi_summary_sections=$ompi_summary_section + else + ompi_summary_sections="$ompi_summary_sections $ompi_summary_section" + fi + eval ompi_summary_values_$ompi_summary_section=\"$ompi_summary_line\" + else + eval ompi_summary_values_$ompi_summary_section=\"$ompi_summary_section_current,$ompi_summary_line\" + fi + + OPAL_VAR_SCOPE_POP +]) + +AC_DEFUN([OPAL_SUMMARY_PRINT],[ + OPAL_VAR_SCOPE_PUSH([ompi_summary_section ompi_summary_section_name]) + cat </dev/null 2>&1], - [orte_check_tm_pbs_config="pbs-config"])]) - AC_MSG_RESULT([$orte_check_tm_pbs_config])]) - - # If we have pbs-config, get the flags we need from there and then - # do simplistic tests looking for the tm headers and symbols - - AS_IF([test "$orte_check_tm_happy" = "yes" && test "$orte_check_tm_pbs_config" != "not found"], - [$1_CPPFLAGS=`$orte_check_tm_pbs_config --cflags` - OPAL_LOG_MSG([$1_CPPFLAGS from pbs-config: $$1_CPPFLAGS], 1) - - ORTE_CHECK_TM_LIBS_FLAGS([$1], [LDFLAGS]) - OPAL_LOG_MSG([$1_LDFLAGS from pbs-config: $$1_LDFLAGS], 1) - - ORTE_CHECK_TM_LIBS_FLAGS([$1], [LIBS]) - OPAL_LOG_MSG([$1_LIBS from pbs-config: $$1_LIBS], 1) - - # Now that we supposedly have the right flags, try them out. - - CPPFLAGS_save="$CPPFLAGS" - LDFLAGS_save="$LDFLAGS" - LIBS_save="$LIBS" - - CPPFLAGS="$CPPFLAGS $$1_CPPFLAGS" - LIBS="$LIBS $$1_LIBS" - LDFLAGS="$LDFLAGS $$1_LDFLAGS" - - AC_CHECK_HEADER([tm.h], - [AC_CHECK_FUNC([tm_finalize], - [orte_check_tm_found="yes"])]) - - CPPFLAGS="$CPPFLAGS_save" - LDFLAGS="$LDFLAGS_save" - LIBS="$LIBS_save"]) - - # If we don't have pbs-config, then we have to look around - # manually. - - # Note that Torque 2.1.0 changed the name of their back-end - # library to "libtorque". So we have to check for both libpbs and - # libtorque. First, check for libpbs. - - orte_check_package_$1_save_CPPFLAGS="$CPPFLAGS" - orte_check_package_$1_save_LDFLAGS="$LDFLAGS" - orte_check_package_$1_save_LIBS="$LIBS" - - orte_check_package_$1_orig_CPPFLAGS="$$1_CPPFLAGS" - orte_check_package_$1_orig_LDFLAGS="$$1_LDFLAGS" - orte_check_package_$1_orig_LIBS="$$1_LIBS" - - AS_IF([test "$orte_check_tm_found" = "no"], - [AS_IF([test "$orte_check_tm_happy" = "yes"], - [_OPAL_CHECK_PACKAGE_HEADER([$1], - [tm.h], - [$orte_check_tm_dir], - [orte_check_tm_found="yes"], - [orte_check_tm_found="no"])]) - - AS_IF([test "$orte_check_tm_found" = "yes"], - [_OPAL_CHECK_PACKAGE_LIB([$1], - [pbs], - [tm_init], - [], - [$orte_check_tm_dir], - [$orte_check_tm_libdir], - [orte_check_tm_found="yes"], - [_OPAL_CHECK_PACKAGE_LIB([$1], - [torque], - [tm_init], - [], - [$orte_check_tm_dir], - [$orte_check_tm_libdir], - [orte_check_tm_found="yes"], - [orte_check_tm_found="no"])])])]) - - CPPFLAGS="$orte_check_package_$1_save_CPPFLAGS" - LDFLAGS="$orte_check_package_$1_save_LDFLAGS" - LIBS="$orte_check_package_$1_save_LIBS" - - # add the TM libraries to static builds as they are required - $1_WRAPPER_EXTRA_LDFLAGS=[$]$1_LDFLAGS - $1_WRAPPER_EXTRA_LIBS=[$]$1_LIBS + if test -z $orte_check_tm_happy ; then + OPAL_VAR_SCOPE_PUSH([orte_check_tm_found orte_check_tm_dir orte_check_tm_pbs_config orte_check_tm_LDFLAGS_save orte_check_tm_CPPFLAGS_save orte_check_tm_LIBS_save]) + + AC_ARG_WITH([tm], + [AC_HELP_STRING([--with-tm(=DIR)], + [Build TM (Torque, PBSPro, and compatible) support, optionally adding DIR/include, DIR/lib, and DIR/lib64 to the search path for headers and libraries])]) + OPAL_CHECK_WITHDIR([tm], [$with_tm], [include/tm.h]) + + orte_check_tm_found=no + AS_IF([test "$with_tm" = "no"], + [orte_check_tm_happy="no"], + [orte_check_tm_happy="yes" + AS_IF([test ! -z "$with_tm" && test "$with_tm" != "yes"], + [orte_check_tm_dir="$with_tm"], + [orte_check_tm_dir=""])]) + + AS_IF([test "$orte_check_tm_happy" = "yes"], + [AC_MSG_CHECKING([for pbs-config]) + orte_check_tm_pbs_config="not found" + AS_IF([test "$orte_check_tm_dir" != "" && test -d "$orte_check_tm_dir" && test -x "$orte_check_tm_dir/bin/pbs-config"], + [orte_check_tm_pbs_config="$orte_check_tm_dir/bin/pbs-config"], + [AS_IF([pbs-config --prefix >/dev/null 2>&1], + [orte_check_tm_pbs_config="pbs-config"])]) + AC_MSG_RESULT([$orte_check_tm_pbs_config])]) + + # If we have pbs-config, get the flags we need from there and then + # do simplistic tests looking for the tm headers and symbols + + AS_IF([test "$orte_check_tm_happy" = "yes" && test "$orte_check_tm_pbs_config" != "not found"], + [orte_check_tm_CPPFLAGS=`$orte_check_tm_pbs_config --cflags` + OPAL_LOG_MSG([orte_check_tm_CPPFLAGS from pbs-config: $orte_check_tm_CPPFLAGS], 1) + + ORTE_CHECK_TM_LIBS_FLAGS([orte_check_tm], [LDFLAGS]) + OPAL_LOG_MSG([orte_check_tm_LDFLAGS from pbs-config: $orte_check_tm_LDFLAGS], 1) + + ORTE_CHECK_TM_LIBS_FLAGS([orte_check_tm], [LIBS]) + OPAL_LOG_MSG([orte_check_tm_LIBS from pbs-config: $orte_check_tm_LIBS], 1) + + # Now that we supposedly have the right flags, try them out. + + orte_check_tm_CPPFLAGS_save="$CPPFLAGS" + orte_check_tm_LDFLAGS_save="$LDFLAGS" + orte_check_tm_LIBS_save="$LIBS" + + CPPFLAGS="$CPPFLAGS $orte_check_tm_CPPFLAGS" + LIBS="$LIBS $orte_check_tm_LIBS" + LDFLAGS="$LDFLAGS $orte_check_tm_LDFLAGS" + + AC_CHECK_HEADER([tm.h], + [AC_CHECK_FUNC([tm_finalize], + [orte_check_tm_found="yes"])]) + + CPPFLAGS="$orte_check_tm_CPPFLAGS_save" + LDFLAGS="$orte_check_tm_LDFLAGS_save" + LIBS="$orte_check_tm_LIBS_save"]) + + # If we don't have pbs-config, then we have to look around + # manually. + + # Note that Torque 2.1.0 changed the name of their back-end + # library to "libtorque". So we have to check for both libpbs and + # libtorque. First, check for libpbs. + + orte_check_package_$1_save_CPPFLAGS="$CPPFLAGS" + orte_check_package_$1_save_LDFLAGS="$LDFLAGS" + orte_check_package_$1_save_LIBS="$LIBS" + + AS_IF([test "$orte_check_tm_found" = "no"], + [AS_IF([test "$orte_check_tm_happy" = "yes"], + [_OPAL_CHECK_PACKAGE_HEADER([orte_check_tm], + [tm.h], + [$orte_check_tm_dir], + [orte_check_tm_found="yes"], + [orte_check_tm_found="no"])]) + + AS_IF([test "$orte_check_tm_found" = "yes"], + [_OPAL_CHECK_PACKAGE_LIB([orte_check_tm], + [pbs], + [tm_init], + [], + [$orte_check_tm_dir], + [$orte_check_tm_libdir], + [orte_check_tm_found="yes"], + [_OPAL_CHECK_PACKAGE_LIB([orte_check_tm], + [torque], + [tm_init], + [], + [$orte_check_tm_dir], + [$orte_check_tm_libdir], + [orte_check_tm_found="yes"], + [orte_check_tm_found="no"])])])]) + + CPPFLAGS="$orte_check_package_$1_save_CPPFLAGS" + LDFLAGS="$orte_check_package_$1_save_LDFLAGS" + LIBS="$orte_check_package_$1_save_LIBS" + + if test "$orte_check_tm_found" = "no" ; then + orte_check_tm_happy=no + fi + + OPAL_SUMMARY_ADD([[Resource Managers]],[[Torque]],[$1],[$orte_check_tm_happy]) + + OPAL_VAR_SCOPE_POP + fi # Did we find the right stuff? - AS_IF([test "$orte_check_tm_happy" = "yes" && test "$orte_check_tm_found" = "yes"], - [$2], + AS_IF([test "$orte_check_tm_happy" = "yes"], + [$1_LIBS="[$]$1_LIBS $orte_check_tm_LIBS" + $1_LDFLAGS="[$]$1_LDFLAGS $orte_check_tm_LDFLAGS" + $1_CPPFLAGS="[$]$1_CPPFLAGS $orte_check_tm_CPPFLAGS" + # add the TM libraries to static builds as they are required + $1_WRAPPER_EXTRA_LDFLAGS=[$]$1_LDFLAGS + $1_WRAPPER_EXTRA_LIBS=[$]$1_LIBS + $2], [AS_IF([test ! -z "$with_tm" && test "$with_tm" != "no"], [AC_MSG_ERROR([TM support requested but not found. Aborting])]) - $3]) - - OPAL_VAR_SCOPE_POP + orte_check_tm_happy="no" + $3]) ]) diff --git a/config/orte_config_files.m4 b/config/orte_config_files.m4 index dc87637d34b..a426b08d4fa 100644 --- a/config/orte_config_files.m4 +++ b/config/orte_config_files.m4 @@ -8,9 +8,9 @@ # reserved. # Copyright (c) 2015 Intel, Inc. All rights reserved # $COPYRIGHT$ -# +# # Additional copyrights may follow -# +# # $HEADER$ # @@ -19,18 +19,15 @@ AC_DEFUN([ORTE_CONFIG_FILES],[ orte/Makefile orte/include/Makefile orte/etc/Makefile - + orte/tools/orted/Makefile orte/tools/orterun/Makefile orte/tools/wrappers/Makefile orte/tools/wrappers/ortecc-wrapper-data.txt orte/tools/wrappers/orte.pc - orte/tools/orte-checkpoint/Makefile - orte/tools/orte-restart/Makefile orte/tools/orte-ps/Makefile orte/tools/orte-clean/Makefile orte/tools/orte-top/Makefile - orte/tools/orte-migrate/Makefile orte/tools/orte-info/Makefile orte/tools/orte-server/Makefile orte/tools/orte-submit/Makefile diff --git a/config/orte_configure_options.m4 b/config/orte_configure_options.m4 index b97a549380e..2c7db0ae62c 100644 --- a/config/orte_configure_options.m4 +++ b/config/orte_configure_options.m4 @@ -18,9 +18,9 @@ dnl reserved. dnl Copyright (c) 2009 Oak Ridge National Labs. All rights reserved. dnl dnl $COPYRIGHT$ -dnl +dnl dnl Additional copyrights may follow -dnl +dnl dnl $HEADER$ dnl diff --git a/config/orte_setup_debugger_flags.m4 b/config/orte_setup_debugger_flags.m4 index 4ed96b9becc..39ac77defef 100644 --- a/config/orte_setup_debugger_flags.m4 +++ b/config/orte_setup_debugger_flags.m4 @@ -50,7 +50,7 @@ AC_DEFUN([ORTE_SETUP_DEBUGGER_FLAGS],[ AC_MSG_RESULT([$CFLAGS_WITHOUT_OPTFLAGS]) AC_MSG_CHECKING([for debugger extra CFLAGS]) AC_MSG_RESULT([$DEBUGGER_CFLAGS]) - + AC_SUBST(CFLAGS_WITHOUT_OPTFLAGS) AC_SUBST(DEBUGGER_CFLAGS) ]) diff --git a/config/orte_setup_java.m4 b/config/orte_setup_java.m4 index ab7d6b5fc62..e8c9789afdb 100644 --- a/config/orte_setup_java.m4 +++ b/config/orte_setup_java.m4 @@ -11,21 +11,21 @@ dnl University of Stuttgart. All rights reserved. dnl Copyright (c) 2004-2006 The Regents of the University of California. dnl All rights reserved. dnl Copyright (c) 2006-2012 Los Alamos National Security, LLC. All rights -dnl reserved. +dnl reserved. dnl Copyright (c) 2007-2012 Oracle and/or its affiliates. All rights reserved. dnl Copyright (c) 2008-2013 Cisco Systems, Inc. All rights reserved. dnl Copyright (c) 2015 Research Organization for Information Science dnl and Technology (RIST). All rights reserved. dnl $COPYRIGHT$ -dnl +dnl dnl Additional copyrights may follow -dnl +dnl dnl $HEADER$ dnl # This macro is necessary to get the title to be displayed first. :-) AC_DEFUN([ORTE_SETUP_JAVA_BANNER],[ - opal_show_subtitle "Java compiler" + opal_show_subtitle "Java compiler" ]) # ORTE_SETUP_JAVA() @@ -99,7 +99,7 @@ AC_DEFUN([ORTE_SETUP_JAVA],[ AS_IF([test -d $dir], [AC_MSG_RESULT([found]) found=1 - with_jdk_headers=$dir + with_jdk_headers=$dir with_jdk_bindir=/usr/bin], [AC_MSG_RESULT([not found])]) @@ -112,7 +112,7 @@ AC_DEFUN([ORTE_SETUP_JAVA],[ fi jnih=`ls $dir/jni.h 2>/dev/null | head -n 1` AC_MSG_CHECKING([Linux locations]) - AS_IF([test -r "$jnih"], + AS_IF([test -r "$jnih"], [with_jdk_headers=`dirname $jnih` OPAL_WHICH([javac], [with_jdk_bindir]) AS_IF([test -n "$with_jdk_bindir"], @@ -122,7 +122,7 @@ AC_DEFUN([ORTE_SETUP_JAVA],[ [with_jdk_headers=])], [dir='/usr/lib/jvm/default-java/include/' jnih=`ls $dir/jni.h 2>/dev/null | head -n 1` - AS_IF([test -r "$jnih"], + AS_IF([test -r "$jnih"], [with_jdk_headers=`dirname $jnih` OPAL_WHICH([javac], [with_jdk_bindir]) AS_IF([test -n "$with_jdk_bindir"], @@ -187,7 +187,7 @@ AC_DEFUN([ORTE_SETUP_JAVA],[ [ORTE_JDK_CPPFLAGS="$ORTE_JDK_CPPFLAGS -I$with_jdk_headers/solaris"]) CPPFLAGS="$CPPFLAGS $ORTE_JDK_CPPFLAGS"]) - AC_CHECK_HEADER([jni.h], [], + AC_CHECK_HEADER([jni.h], [], [orte_java_happy=no]) CPPFLAGS=$CPPFLAGS_save ]) diff --git a/config/oshmem_setup_java.m4 b/config/oshmem_setup_java.m4 deleted file mode 100644 index b8796134299..00000000000 --- a/config/oshmem_setup_java.m4 +++ /dev/null @@ -1,101 +0,0 @@ -dnl -*- shell-script -*- -dnl -dnl Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana -dnl University Research and Technology -dnl Corporation. All rights reserved. -dnl Copyright (c) 2004-2006 The University of Tennessee and The University -dnl of Tennessee Research Foundation. All rights -dnl reserved. -dnl Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, -dnl University of Stuttgart. All rights reserved. -dnl Copyright (c) 2004-2006 The Regents of the University of California. -dnl All rights reserved. -dnl Copyright (c) 2006-2012 Los Alamos National Security, LLC. All rights -dnl reserved. -dnl Copyright (c) 2007-2012 Oracle and/or its affiliates. All rights reserved. -dnl Copyright (c) 2008-2012 Cisco Systems, Inc. All rights reserved. -dnl Copyright (c) 2015 Research Organization for Information Science -dnl and Technology (RIST). All rights reserved. -dnl $COPYRIGHT$ -dnl -dnl Additional copyrights may follow -dnl -dnl $HEADER$ -dnl - -# This macro is necessary to get the title to be displayed first. :-) -AC_DEFUN([OSHMEM_SETUP_JAVA_BINDINGS_BANNER],[ - opal_show_subtitle "Java OSHMEM bindings" -]) - -# OSHMEM_SETUP_JAVA_BINDINGS() -# ---------------- -# Do everything required to setup the Java OSHMEM bindings. Safe to AC_REQUIRE -# this macro. -AC_DEFUN([OSHMEM_SETUP_JAVA_BINDINGS],[ - # must have Java setup - AC_REQUIRE([OPAL_SETUP_JAVA]) - - AC_REQUIRE([OSHMEM_SETUP_JAVA_BINDINGS_BANNER]) - - AC_MSG_CHECKING([if want Java bindings]) - AC_ARG_ENABLE(oshmem-java, - AC_HELP_STRING([--enable-oshmem-java], - [enable Java OSHMEM bindings (default: disabled)])) - - # check for required support - if test "$opal_java_happy" = "no" && test "$enable_oshmem_java" = "yes"; then - AC_MSG_RESULT([yes]) - AC_MSG_WARN([Java bindings requested but no Java support found]) - AC_MSG_ERROR([cannot continue]) - fi - - # Only build the Java bindings if requested - if test "$opal_java_happy" = "yes" && test "$enable_oshmem_java" = "yes"; then - AC_MSG_RESULT([yes]) - WANT_OSHMEM_JAVA_SUPPORT=1 - AC_MSG_CHECKING([if shared libraries are enabled]) - AS_IF([test "$enable_shared" != "yes"], - [AC_MSG_RESULT([no]) - AC_MSG_WARN([Java bindings cannot be built without shared libraries]) - AC_MSG_WARN([Please reconfigure with --enable-shared]) - AC_MSG_ERROR([Cannot continue])], - [AC_MSG_RESULT([yes])]) - # must have Java support - AC_MSG_CHECKING([if Java support was found]) - AS_IF([test "$opal_java_happy" = "yes"], - [AC_MSG_RESULT([yes])], - [AC_MSG_WARN([Java OSHMEM bindings requested, but Java support was not found]) - AC_MSG_WARN([Please reconfigure the --with-jdk options to where Java]) - AC_MSG_WARN([support can be found]) - AC_MSG_ERROR([Cannot continue])]) - - # Mac Java requires this file (i.e., some other Java-related - # header file needs this file, so we need to check for - # it/include it in our sources when compiling on Mac). - AC_CHECK_HEADERS([TargetConditionals.h]) - else - AC_MSG_RESULT([no]) - WANT_OSHMEM_JAVA_SUPPORT=0 - fi - AC_DEFINE_UNQUOTED([OSHMEM_WANT_JAVA_BINDINGS], [$WANT_OSHMEM_JAVA_SUPPORT], - [do we want java oshmem bindings]) - AM_CONDITIONAL(OSHMEM_WANT_JAVA_BINDINGS, test "$WANT_OSHMEM_JAVA_SUPPORT" = "1") - - # Are we happy? - AS_IF([test "$WANT_OSHMEM_JAVA_SUPPORT" = "1"], - [AC_MSG_WARN([******************************************************]) - AC_MSG_WARN([*** Java OSHMEM bindings are provided on a provisional]) - AC_MSG_WARN([*** basis. They are NOT part of the current or]) - AC_MSG_WARN([*** proposed OSHMEM standard. Continued inclusion of]) - AC_MSG_WARN([*** the Java OSHMEM bindings OSHMEM is contingent]) - AC_MSG_WARN([*** upon user interest and developer support.]) - AC_MSG_WARN([******************************************************]) - ]) - - AC_CONFIG_FILES([ - oshmem/shmem/java/Makefile - oshmem/shmem/java/java/Makefile - oshmem/shmem/java/c/Makefile - ]) -]) diff --git a/config/pkg.m4 b/config/pkg.m4 index 5f67c354609..b0bab42dfa9 100644 --- a/config/pkg.m4 +++ b/config/pkg.m4 @@ -1,6 +1,6 @@ # pkg.m4 - Macros to locate and utilise pkg-config. -*- Autoconf -*- # serial 1 (pkg-config-0.24) -# +# # Copyright © 2004 Scott James Remnant . # # This program is free software; you can redistribute it and/or modify @@ -123,7 +123,7 @@ if test $pkg_failed = yes; then _PKG_SHORT_ERRORS_SUPPORTED if test $_pkg_short_errors_supported = yes; then $1[]_PKG_ERRORS=`$PKG_CONFIG --short-errors --print-errors --cflags --libs "$2" 2>&1` - else + else $1[]_PKG_ERRORS=`$PKG_CONFIG --print-errors --cflags --libs "$2" 2>&1` fi # Put the nasty error message in config.log where it belongs @@ -266,7 +266,7 @@ if test $pkg_failed = yes; then _PKG_SHORT_ERRORS_SUPPORTED if test $_pkg_short_errors_supported = yes; then $1[]_PKG_ERRORS=`$PKG_CONFIG --short-errors --print-errors --cflags --libs "$2" 2>&1` - else + else $1[]_PKG_ERRORS=`$PKG_CONFIG --print-errors --cflags --libs "$2" 2>&1` fi # Put the nasty error message in config.log where it belongs diff --git a/configure.ac b/configure.ac index a46a7c6d1ed..eefecef2b1a 100644 --- a/configure.ac +++ b/configure.ac @@ -10,7 +10,7 @@ # University of Stuttgart. All rights reserved. # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. -# Copyright (c) 2006-2015 Cisco Systems, Inc. All rights reserved. +# Copyright (c) 2006-2016 Cisco Systems, Inc. All rights reserved. # Copyright (c) 2006-2008 Sun Microsystems, Inc. All rights reserved. # Copyright (c) 2006-2011 Los Alamos National Security, LLC. All rights # reserved. @@ -20,8 +20,9 @@ # Copyright (c) 2013 Mellanox Technologies, Inc. # All rights reserved. # Copyright (c) 2013-2014 Intel, Inc. All rights reserved. -# Copyright (c) 2014-2015 Research Organization for Information Science +# Copyright (c) 2014-2016 Research Organization for Information Science # and Technology (RIST). All rights reserved. +# Copyright (c) 2016 IBM Corporation. All rights reserved. # $COPYRIGHT$ # # Additional copyrights may follow @@ -62,6 +63,10 @@ OPAL_LOAD_PLATFORM # Start it up # +AC_CHECK_PROG([PERL],[perl],[perl],[no]) +AS_IF([test "X$PERL" = "Xno"], + [AC_MSG_ERROR(["Open MPI requires perl. Aborting"])]) + OPAL_CONFIGURE_SETUP opal_show_title "Configuring project_name_long" @@ -132,12 +137,12 @@ m4_ifdef([project_ompi], AC_SUBST(libmpi_usempi_tkr_so_version) AC_SUBST(libmpi_usempi_ignore_tkr_so_version) AC_SUBST(libmpi_usempif08_so_version) - AC_SUBST(libmpi_java_so_version)]) + AC_SUBST(libmpi_java_so_version) + AC_SUBST(libompitrace_so_version)]) m4_ifdef([project_orte], [AC_SUBST(libopen_rte_so_version)]) m4_ifdef([project_oshmem], - [AC_SUBST(liboshmem_so_version) - AC_SUBST(liboshmem_java_so_version)]) + [AC_SUBST(liboshmem_so_version)]) AC_SUBST(libopen_pal_so_version) # It's icky that we have to hard-code the names of the # common components here. :-( This could probably be done @@ -145,12 +150,11 @@ AC_SUBST(libopen_pal_so_version) # and/or opal_mca.m4, but I don't have the cycles to do this # right now. AC_SUBST(libmca_opal_common_libfabric_so_version) -AC_SUBST(libmca_common_cuda_so_version) -AC_SUBST(libmca_common_ofacm_so_version) -AC_SUBST(libmca_common_sm_so_version) -AC_SUBST(libmca_common_ugni_so_version) -AC_SUBST(libmca_common_verbs_so_version) -AC_SUBST(libmca_common_alps_so_version) +AC_SUBST(libmca_opal_common_cuda_so_version) +AC_SUBST(libmca_opal_common_sm_so_version) +AC_SUBST(libmca_opal_common_ugni_so_version) +AC_SUBST(libmca_opal_common_verbs_so_version) +AC_SUBST(libmca_orte_common_alps_so_version) # # Get the versions of the autotools that were used to bootstrap us @@ -249,6 +253,7 @@ m4_ifdef([project_oshmem], OPAL_CONFIGURE_OPTIONS OPAL_CHECK_OS_FLAVORS OPAL_CHECK_CUDA +OPAL_CHECK_PMIX m4_ifdef([project_orte], [ORTE_CONFIGURE_OPTIONS]) m4_ifdef([project_ompi], [OMPI_CONFIGURE_OPTIONS]) m4_ifdef([project_oshmem], [OSHMEM_CONFIGURE_OPTIONS]) @@ -276,6 +281,8 @@ fi OPAL_SET_LIB_PREFIX([]) m4_ifdef([project_orte], [ORTE_SET_LIB_PREFIX([])]) +m4_ifdef([project_ompi], + [OMPI_SET_LIB_NAME([])]) ############################################################################ # Libtool: part one @@ -496,14 +503,10 @@ fi if test "$WANT_WEAK_SYMBOLS" = "0"; then OPAL_C_HAVE_WEAK_SYMBOLS=0 fi -if test "$WANT_MPI_PROFILING" = "1"; then - if test "$OPAL_C_HAVE_WEAK_SYMBOLS" = "1"; then - OMPI_PROFILING_COMPILE_SEPARATELY=0 - else - OMPI_PROFILING_COMPILE_SEPARATELY=1 - fi -else +if test "$OPAL_C_HAVE_WEAK_SYMBOLS" = "1"; then OMPI_PROFILING_COMPILE_SEPARATELY=0 +else + OMPI_PROFILING_COMPILE_SEPARATELY=1 fi # Check if we support the offsetof compiler directive @@ -541,7 +544,6 @@ OPAL_CHECK_COMPILER_VERSION_ID ################################## # Only needed for OMPI m4_ifdef([project_ompi], [OMPI_SETUP_JAVA_BINDINGS]) -m4_ifdef([project_oshmem], [OSHMEM_SETUP_JAVA_BINDINGS]) ################################## @@ -549,13 +551,7 @@ m4_ifdef([project_oshmem], [OSHMEM_SETUP_JAVA_BINDINGS]) ################################## # Setup profiling bindings (if we're building the relevant projects). -# Note that opal_wrapper.c has a hard-coded use of the -# OMPI_ENABLE_MPI_PROFILING macro, so we need to define it (to 0) even -# if we're not building the OMPI project. - -m4_ifdef([project_ompi], [OMPI_SETUP_MPI_PROFILING], - [AC_DEFINE([OMPI_ENABLE_MPI_PROFILING], [0], - [We are not building OMPI, so no profiling])]) +m4_ifdef([project_ompi], [OMPI_SETUP_MPI_PROFILING]) m4_ifdef([project_oshmem], [OSHMEM_SETUP_PROFILING]) @@ -592,18 +588,17 @@ AC_CACHE_SAVE opal_show_title "Header file tests" AC_CHECK_HEADERS([alloca.h aio.h arpa/inet.h dirent.h \ - dlfcn.h execinfo.h err.h fcntl.h grp.h inttypes.h libgen.h \ + dlfcn.h execinfo.h err.h fcntl.h grp.h libgen.h \ libutil.h memory.h netdb.h netinet/in.h netinet/tcp.h \ - poll.h pthread.h pty.h pwd.h sched.h stdint.h stddef.h \ - stdlib.h string.h strings.h stropts.h sys/fcntl.h sys/ipc.h sys/shm.h \ + poll.h pthread.h pty.h pwd.h sched.h \ + strings.h stropts.h sys/fcntl.h sys/ipc.h sys/shm.h \ sys/ioctl.h sys/mman.h sys/param.h sys/queue.h \ sys/resource.h sys/select.h sys/socket.h sys/sockio.h \ - stdarg.h sys/stat.h sys/statfs.h sys/statvfs.h sys/time.h sys/tree.h \ + sys/stat.h sys/statfs.h sys/statvfs.h sys/time.h sys/tree.h \ sys/types.h sys/uio.h sys/un.h net/uio.h sys/utsname.h sys/vfs.h sys/wait.h syslog.h \ - time.h termios.h ulimit.h unistd.h util.h utmp.h malloc.h \ - ifaddrs.h crt_externs.h regex.h signal.h \ - mntent.h paths.h \ - ioLib.h sockLib.h hostLib.h shlwapi.h sys/synch.h limits.h db.h ndbm.h]) + termios.h ulimit.h unistd.h util.h utmp.h malloc.h \ + ifaddrs.h crt_externs.h regex.h mntent.h paths.h \ + ioLib.h sockLib.h hostLib.h shlwapi.h sys/synch.h db.h ndbm.h]) AC_CHECK_HEADERS([sys/mount.h], [], [], [AC_INCLUDES_DEFAULT @@ -852,7 +847,7 @@ OPAL_SEARCH_LIBS_CORE([ceil], [m]) # -lrt might be needed for clock_gettime OPAL_SEARCH_LIBS_CORE([clock_gettime], [rt]) -AC_CHECK_FUNCS([asprintf snprintf vasprintf vsnprintf openpty isatty getpwuid fork waitpid execve pipe ptsname setsid mmap tcgetpgrp posix_memalign strsignal sysconf syslog vsyslog regcmp regexec regfree _NSGetEnviron socketpair strncpy_s usleep mkfifo dbopen dbm_open statfs statvfs setpgid setenv]) +AC_CHECK_FUNCS([asprintf snprintf vasprintf vsnprintf openpty isatty getpwuid fork waitpid execve pipe ptsname setsid mmap tcgetpgrp posix_memalign strsignal sysconf syslog vsyslog regcmp regexec regfree _NSGetEnviron socketpair strncpy_s usleep mkfifo dbopen dbm_open statfs statvfs setpgid setenv __malloc_initialize_hook]) # Sanity check: ensure that we got at least one of statfs or statvfs. if test $ac_cv_func_statfs = no && test $ac_cv_func_statvfs = no; then @@ -1082,30 +1077,6 @@ AC_INCLUDES_DEFAULT # checkpoint results AC_CACHE_SAVE -########################################################### -# Fault Tolerance -# -# The FT code in the OMPI trunk is currently broken. We don't -# have an active maintainer for it at this time, and it isn't -# clear if/when we will return to it. We have therefore removed -# the configure options supporting it until such time as it -# can be fixed. -# -# However, we recognize that there are researchers who use this -# option on their independent branches. In such cases, simply -# uncomment the line below to render the FT configure options -# visible again -# -########################################################### -OPAL_SETUP_FT_OPTIONS -########################################################### -# The following line is always required as it contains the -# AC_DEFINE and AM_CONDITIONAL calls that set variables used -# throughout the build system. If the above line is commented -# out, then those variables will be set to "off". Otherwise, -# they are controlled by the options -OPAL_SETUP_FT - ################################## # MCA ################################## @@ -1438,3 +1409,5 @@ m4_ifdef([project_ompi], [OMPI_CONFIG_FILES]) m4_ifdef([project_oshmem], [OSHMEM_CONFIG_FILES]) AC_OUTPUT + +OPAL_SUMMARY_PRINT diff --git a/contrib/Makefile.am b/contrib/Makefile.am index 8a984ac2ed2..384e07c3b56 100644 --- a/contrib/Makefile.am +++ b/contrib/Makefile.am @@ -5,31 +5,26 @@ # Copyright (c) 2004-2005 The University of Tennessee and The University # of Tennessee Research Foundation. All rights # reserved. -# Copyright (c) 2004-2009 High Performance Computing Center Stuttgart, +# Copyright (c) 2004-2009 High Performance Computing Center Stuttgart, # University of Stuttgart. All rights reserved. # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. # Copyright (c) 2009 Cisco Systems, Inc. All rights reserved. # Copyright (c) 2010 IBM Corporation. All rights reserved. # Copyright (c) 2010-2011 Oak Ridge National Labs. All rights reserved. -# Copyright (c) 2013 Los Alamos National Security, Inc. All rights reserved. +# Copyright (c) 2013-2016 Los Alamos National Security, Inc. All rights +# reserved. # Copyright (c) 2013 Intel Corporation. All rights reserved. # $COPYRIGHT$ -# +# # Additional copyrights may follow -# +# # $HEADER$ # amca_paramdir = $(AMCA_PARAM_SETS_DIR) dist_amca_param_DATA = amca-param-sets/example.conf -if WANT_FT_CR -dist_amca_param_DATA += \ - amca-param-sets/ft-enable-cr \ - amca-param-sets/ft-enable-cr-recovery -endif # WANT_FT_CR - EXTRA_DIST = \ completion/mpirun.sh \ completion/mpirun.zsh \ @@ -59,16 +54,22 @@ EXTRA_DIST = \ platform/lanl/cray_xe6/optimized-common \ platform/lanl/cray_xe6/optimized-lustre \ platform/lanl/cray_xe6/optimized-lustre.conf \ + platform/lanl/cray_xc_cle5.2/debug-common \ + platform/lanl/cray_xc_cle5.2/debug-lustre \ + platform/lanl/cray_xc_cle5.2/debug-lustre.conf \ + platform/lanl/cray_xc_cle5.2/optimized-common \ + platform/lanl/cray_xc_cle5.2/optimized-lustre \ + platform/lanl/cray_xc_cle5.2/optimized-lustre.conf \ platform/lanl/toss/debug-common \ - platform/lanl/toss/debug-nopanasas \ - platform/lanl/toss/debug-nopanasas.conf \ - platform/lanl/toss/debug-panasas \ - platform/lanl/toss/debug-panasas.conf \ + platform/lanl/toss/debug \ + platform/lanl/toss/debug.conf \ + platform/lanl/toss/debug-mlx \ + platform/lanl/toss/debug-mlx.conf \ platform/lanl/toss/optimized-common \ - platform/lanl/toss/optimized-nopanasas \ - platform/lanl/toss/optimized-nopanasas.conf \ - platform/lanl/toss/optimized-panasas \ - platform/lanl/toss/optimized-panasas.conf \ + platform/lanl/toss/optimized \ + platform/lanl/toss/optimized.conf \ + platform/lanl/toss/optimized-mlx \ + platform/lanl/toss/optimized-mlx.conf \ platform/lanl/toss/toss-common \ platform/lanl/darwin/darwin-common \ platform/lanl/darwin/debug-common \ diff --git a/contrib/amca-param-sets/ft-enable-cr b/contrib/amca-param-sets/ft-enable-cr index ea125f66a42..1cec86dde48 100644 --- a/contrib/amca-param-sets/ft-enable-cr +++ b/contrib/amca-param-sets/ft-enable-cr @@ -4,9 +4,9 @@ # Corporation. All rights reserved. # # $COPYRIGHT$ -# +# # Additional copyrights may follow -# +# # $HEADER$ # # An Aggregate MCA Parameter Set to enable checkpoint/restart capabilities diff --git a/contrib/amca-param-sets/ft-enable-cr-recovery b/contrib/amca-param-sets/ft-enable-cr-recovery index 62100984f40..5b175ab018f 100644 --- a/contrib/amca-param-sets/ft-enable-cr-recovery +++ b/contrib/amca-param-sets/ft-enable-cr-recovery @@ -3,9 +3,9 @@ # University Research and Technology # Corporation. All rights reserved. # $COPYRIGHT$ -# +# # Additional copyrights may follow -# +# # $HEADER$ # # An Aggregate MCA Parameter Set to enable checkpoint/restart capabilities diff --git a/contrib/build-mca-comps-outside-of-tree/Makefile.am b/contrib/build-mca-comps-outside-of-tree/Makefile.am index d2474b67e88..33b598d1bd5 100644 --- a/contrib/build-mca-comps-outside-of-tree/Makefile.am +++ b/contrib/build-mca-comps-outside-of-tree/Makefile.am @@ -5,15 +5,15 @@ # Copyright (c) 2004-2005 The University of Tennessee and The University # of Tennessee Research Foundation. All rights # reserved. -# Copyright (c) 2004-2009 High Performance Computing Center Stuttgart, +# Copyright (c) 2004-2009 High Performance Computing Center Stuttgart, # University of Stuttgart. All rights reserved. # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. # Copyright (c) 2009-2011 Cisco Systems, Inc. All rights reserved. # $COPYRIGHT$ -# +# # Additional copyrights may follow -# +# # $HEADER$ # diff --git a/contrib/build-mca-comps-outside-of-tree/autogen.sh b/contrib/build-mca-comps-outside-of-tree/autogen.sh index 4720d5576b3..554b595f2d3 100755 --- a/contrib/build-mca-comps-outside-of-tree/autogen.sh +++ b/contrib/build-mca-comps-outside-of-tree/autogen.sh @@ -1,9 +1,9 @@ #!/bin/sh # Copyright (c) 2011 Cisco Systems, Inc. All rights reserved. # $COPYRIGHT$ -# +# # Additional copyrights may follow -# +# # $HEADER$ # diff --git a/contrib/build-mca-comps-outside-of-tree/btl_tcp2.addr.h b/contrib/build-mca-comps-outside-of-tree/btl_tcp2.addr.h index c75bb3bc98e..cebfda0967a 100644 --- a/contrib/build-mca-comps-outside-of-tree/btl_tcp2.addr.h +++ b/contrib/build-mca-comps-outside-of-tree/btl_tcp2.addr.h @@ -5,15 +5,15 @@ * Copyright (c) 2004-2007 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2011 Cisco Systems, Inc. All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ /** @@ -54,13 +54,13 @@ struct mca_btl_tcp2_addr_t { uint32_t _pad[3]; } _addr__inet; } _union_inet; - } addr_inet; + } addr_inet; #endif in_port_t addr_port; /**< listen port */ uint16_t addr_ifkindex; /**< remote interface index assigned with this address */ unsigned short addr_inuse; /**< local meaning only */ - uint8_t addr_family; /**< AF_INET or AF_INET6 */ + uint8_t addr_family; /**< AF_INET or AF_INET6 */ }; typedef struct mca_btl_tcp2_addr_t mca_btl_tcp2_addr_t; diff --git a/contrib/build-mca-comps-outside-of-tree/btl_tcp2.c b/contrib/build-mca-comps-outside-of-tree/btl_tcp2.c index 86c2629d57d..e2188d81383 100644 --- a/contrib/build-mca-comps-outside-of-tree/btl_tcp2.c +++ b/contrib/build-mca-comps-outside-of-tree/btl_tcp2.c @@ -5,18 +5,18 @@ * Copyright (c) 2004-2013 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2006 Los Alamos National Security, LLC. All rights - * reserved. + * reserved. * Copyright (c) 2011 Cisco Systems, Inc. All rights reserved. * * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -26,13 +26,16 @@ #include "ompi/mca/btl/btl.h" #include "btl_tcp2.h" -#include "btl_tcp2_frag.h" +#include "btl_tcp2_frag.h" #include "btl_tcp2_proc.h" #include "btl_tcp2_endpoint.h" -#include "opal/datatype/opal_convertor.h" -#include "ompi/mca/mpool/base/base.h" -#include "ompi/mca/mpool/mpool.h" -#include "ompi/proc/proc.h" +#include "opal/datatype/opal_convertor.h" +#include "ompi/mca/mpool/base/base.h" +#include "ompi/mca/mpool/mpool.h" +#include "btl_tcp.h" +#include "btl_tcp_frag.h" +#include "btl_tcp_proc.h" +#include "btl_tcp_endpoint.h" mca_btl_tcp2_module_t mca_btl_tcp2_module = { { @@ -49,17 +52,17 @@ mca_btl_tcp2_module_t mca_btl_tcp2_module = { 0, /* flags */ mca_btl_tcp2_add_procs, mca_btl_tcp2_del_procs, - NULL, + NULL, mca_btl_tcp2_finalize, - mca_btl_tcp2_alloc, - mca_btl_tcp2_free, + mca_btl_tcp2_alloc, + mca_btl_tcp2_free, mca_btl_tcp2_prepare_src, mca_btl_tcp2_prepare_dst, mca_btl_tcp2_send, NULL, /* send immediate */ - mca_btl_tcp2_put, - NULL, /* get */ - mca_btl_base_dump, + mca_btl_tcp_put, + NULL, /* get */ + mca_btl_tcp_dump, NULL, /* mpool */ NULL, /* register error */ mca_btl_tcp2_ft_event @@ -70,10 +73,10 @@ mca_btl_tcp2_module_t mca_btl_tcp2_module = { * */ -int mca_btl_tcp2_add_procs( struct mca_btl_base_module_t* btl, - size_t nprocs, - struct ompi_proc_t **ompi_procs, - struct mca_btl_base_endpoint_t** peers, +int mca_btl_tcp2_add_procs( struct mca_btl_base_module_t* btl, + size_t nprocs, + struct ompi_proc_t **ompi_procs, + struct mca_btl_base_endpoint_t** peers, opal_bitmap_t* reachable ) { mca_btl_tcp2_module_t* tcp_btl = (mca_btl_tcp2_module_t*)btl; @@ -101,15 +104,15 @@ int mca_btl_tcp2_add_procs( struct mca_btl_base_module_t* btl, } /* - * Check to make sure that the peer has at least as many interface - * addresses exported as we are trying to use. If not, then + * Check to make sure that the peer has at least as many interface + * addresses exported as we are trying to use. If not, then * don't bind this BTL instance to the proc. */ OPAL_THREAD_LOCK(&tcp_proc->proc_lock); /* The btl_proc datastructure is shared by all TCP BTL - * instances that are trying to reach this destination. + * instances that are trying to reach this destination. * Cache the peer instance on the btl_proc. */ tcp_endpoint = OBJ_NEW(mca_btl_tcp2_endpoint_t); @@ -134,15 +137,17 @@ int mca_btl_tcp2_add_procs( struct mca_btl_base_module_t* btl, /* we increase the count of MPI users of the event library once per peer, so that we are used until we aren't connected to a peer */ +#if !MCA_BTL_TCP_USES_PROGRESS_THREAD opal_progress_event_users_increment(); +#endif /* !MCA_BTL_TCP_USES_PROGRESS_THREAD */ } return OMPI_SUCCESS; } -int mca_btl_tcp2_del_procs(struct mca_btl_base_module_t* btl, - size_t nprocs, - struct ompi_proc_t **procs, +int mca_btl_tcp2_del_procs(struct mca_btl_base_module_t* btl, + size_t nprocs, + struct ompi_proc_t **procs, struct mca_btl_base_endpoint_t ** endpoints) { mca_btl_tcp2_module_t* tcp_btl = (mca_btl_tcp2_module_t*)btl; @@ -153,7 +158,9 @@ int mca_btl_tcp2_del_procs(struct mca_btl_base_module_t* btl, opal_list_remove_item(&tcp_btl->tcp_endpoints, (opal_list_item_t*)tcp_endpoint); OBJ_RELEASE(tcp_endpoint); } +#if !MCA_BTL_TCP_USES_PROGRESS_THREAD opal_progress_event_users_decrement(); +#endif /* !MCA_BTL_TCP_USES_PROGRESS_THREAD */ } return OMPI_SUCCESS; } @@ -174,16 +181,16 @@ mca_btl_base_descriptor_t* mca_btl_tcp2_alloc( uint32_t flags) { mca_btl_tcp2_frag_t* frag = NULL; - - if(size <= btl->btl_eager_limit) { - MCA_BTL_TCP_FRAG_ALLOC_EAGER(frag); - } else if (size <= btl->btl_max_send_size) { - MCA_BTL_TCP_FRAG_ALLOC_MAX(frag); + + if(size <= btl->btl_eager_limit) { + MCA_BTL_TCP_FRAG_ALLOC_EAGER(frag); + } else if (size <= btl->btl_max_send_size) { + MCA_BTL_TCP_FRAG_ALLOC_MAX(frag); } if( OPAL_UNLIKELY(NULL == frag) ) { return NULL; } - + frag->segments[0].seg_len = size; frag->segments[0].seg_addr.pval = frag+1; @@ -191,9 +198,10 @@ mca_btl_base_descriptor_t* mca_btl_tcp2_alloc( frag->base.des_src_cnt = 1; frag->base.des_dst = NULL; frag->base.des_dst_cnt = 0; - frag->base.des_flags = flags; + frag->base.des_flags = flags; frag->base.order = MCA_BTL_NO_ORDER; - frag->btl = (mca_btl_tcp2_module_t*)btl; + frag->btl = (mca_btl_tcp_module_t*)btl; + frag->endpoint = endpoint; return (mca_btl_base_descriptor_t*)frag; } @@ -203,12 +211,12 @@ mca_btl_base_descriptor_t* mca_btl_tcp2_alloc( */ int mca_btl_tcp2_free( - struct mca_btl_base_module_t* btl, - mca_btl_base_descriptor_t* des) + struct mca_btl_base_module_t* btl, + mca_btl_base_descriptor_t* des) { - mca_btl_tcp2_frag_t* frag = (mca_btl_tcp2_frag_t*)des; - MCA_BTL_TCP_FRAG_RETURN(frag); - return OMPI_SUCCESS; + mca_btl_tcp2_frag_t* frag = (mca_btl_tcp2_frag_t*)des; + MCA_BTL_TCP_FRAG_RETURN(frag); + return OMPI_SUCCESS; } /** @@ -244,7 +252,7 @@ mca_btl_base_descriptor_t* mca_btl_tcp2_prepare_src( if (max_data+reserve <= btl->btl_eager_limit) { MCA_BTL_TCP_FRAG_ALLOC_EAGER(frag); } else { - /* + /* * otherwise pack as much data as we can into a fragment * that is the max send size. */ @@ -265,13 +273,13 @@ mca_btl_base_descriptor_t* mca_btl_tcp2_prepare_src( } iov.iov_len = max_data; iov.iov_base = (IOVBASE_TYPE*)(((unsigned char*)(frag->segments[0].seg_addr.pval)) + reserve); - + rc = opal_convertor_pack(convertor, &iov, &iov_count, &max_data ); if( OPAL_UNLIKELY(rc < 0) ) { mca_btl_tcp2_free(btl, &frag->base); return NULL; } - + frag->segments[0].seg_len += max_data; } else { @@ -358,11 +366,11 @@ mca_btl_base_descriptor_t* mca_btl_tcp2_prepare_dst( int mca_btl_tcp2_send( struct mca_btl_base_module_t* btl, struct mca_btl_base_endpoint_t* endpoint, - struct mca_btl_base_descriptor_t* descriptor, + struct mca_btl_base_descriptor_t* descriptor, mca_btl_base_tag_t tag ) { - mca_btl_tcp2_module_t* tcp_btl = (mca_btl_tcp2_module_t*) btl; - mca_btl_tcp2_frag_t* frag = (mca_btl_tcp2_frag_t*)descriptor; + mca_btl_tcp2_module_t* tcp_btl = (mca_btl_tcp2_module_t*) btl; + mca_btl_tcp2_frag_t* frag = (mca_btl_tcp2_frag_t*)descriptor; int i; frag->btl = tcp_btl; @@ -384,7 +392,7 @@ int mca_btl_tcp2_send( struct mca_btl_base_module_t* btl, frag->hdr.type = MCA_BTL_TCP_HDR_TYPE_SEND; frag->hdr.count = 0; if (endpoint->endpoint_nbo) MCA_BTL_TCP_HDR_HTON(frag->hdr); - return mca_btl_tcp2_endpoint_send(endpoint,frag); + return mca_btl_tcp_endpoint_send(endpoint,frag); } @@ -400,8 +408,8 @@ int mca_btl_tcp2_put( mca_btl_base_module_t* btl, mca_btl_base_endpoint_t* endpoint, mca_btl_base_descriptor_t* descriptor ) { - mca_btl_tcp2_module_t* tcp_btl = (mca_btl_tcp2_module_t*) btl; - mca_btl_tcp2_frag_t* frag = (mca_btl_tcp2_frag_t*)descriptor; + mca_btl_tcp2_module_t* tcp_btl = (mca_btl_tcp2_module_t*) btl; + mca_btl_tcp2_frag_t* frag = (mca_btl_tcp2_frag_t*)descriptor; int i; frag->btl = tcp_btl; @@ -425,7 +433,7 @@ int mca_btl_tcp2_put( mca_btl_base_module_t* btl, frag->hdr.type = MCA_BTL_TCP_HDR_TYPE_PUT; frag->hdr.count = frag->base.des_dst_cnt; if (endpoint->endpoint_nbo) MCA_BTL_TCP_HDR_HTON(frag->hdr); - return ((i = mca_btl_tcp2_endpoint_send(endpoint,frag)) >= 0 ? OMPI_SUCCESS : i); + return ((i = mca_btl_tcp_endpoint_send(endpoint,frag)) >= 0 ? OMPI_SUCCESS : i); } @@ -438,13 +446,13 @@ int mca_btl_tcp2_put( mca_btl_base_module_t* btl, * */ -int mca_btl_tcp2_get( +int mca_btl_tcp2_get( mca_btl_base_module_t* btl, mca_btl_base_endpoint_t* endpoint, mca_btl_base_descriptor_t* descriptor) { - mca_btl_tcp2_module_t* tcp_btl = (mca_btl_tcp2_module_t*) btl; - mca_btl_tcp2_frag_t* frag = (mca_btl_tcp2_frag_t*)descriptor; + mca_btl_tcp2_module_t* tcp_btl = (mca_btl_tcp2_module_t*) btl; + mca_btl_tcp2_frag_t* frag = (mca_btl_tcp2_frag_t*)descriptor; int rc; frag->btl = tcp_btl; @@ -462,25 +470,60 @@ int mca_btl_tcp2_get( frag->hdr.type = MCA_BTL_TCP_HDR_TYPE_GET; frag->hdr.count = frag->base.des_src_cnt; if (endpoint->endpoint_nbo) MCA_BTL_TCP_HDR_HTON(frag->hdr); - return ((rc = mca_btl_tcp2_endpoint_send(endpoint,frag)) >= 0 ? OMPI_SUCCESS : rc); + return ((rc = mca_btl_tcp_endpoint_send(endpoint,frag)) >= 0 ? OMPI_SUCCESS : rc); } /* - * Cleanup/release module resources. + * Cleanup/release module resources. This function should only be called once, + * there is no need to protect it. */ int mca_btl_tcp2_finalize(struct mca_btl_base_module_t* btl) { - mca_btl_tcp2_module_t* tcp_btl = (mca_btl_tcp2_module_t*) btl; + mca_btl_tcp2_module_t* tcp_btl = (mca_btl_tcp2_module_t*) btl; opal_list_item_t* item; for( item = opal_list_remove_first(&tcp_btl->tcp_endpoints); item != NULL; item = opal_list_remove_first(&tcp_btl->tcp_endpoints)) { mca_btl_tcp2_endpoint_t *endpoint = (mca_btl_tcp2_endpoint_t*)item; OBJ_RELEASE(endpoint); +#if !MCA_BTL_TCP_USES_PROGRESS_THREAD opal_progress_event_users_decrement(); +#endif /* !MCA_BTL_TCP_USES_PROGRESS_THREAD */ } free(tcp_btl); return OMPI_SUCCESS; } + +/** + * + */ +void mca_btl_tcp_dump(struct mca_btl_base_module_t* base_btl, + struct mca_btl_base_endpoint_t* endpoint, + int verbose) +{ + mca_btl_tcp_module_t* btl = (mca_btl_tcp_module_t*)base_btl; + mca_btl_base_err("%s TCP %p kernel_id %d\n" +#if MCA_BTL_TCP_STATISTICS + " | statistics: sent %lu recv %lu\n" +#endif /* MCA_BTL_TCP_STATISTICS */ + " | latency %u bandwidth %u\n", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), (void*)btl, btl->tcp_ifkindex, +#if MCA_BTL_TCP_STATISTICS + btl->tcp_bytes_sent, btl->btl_bytes_recv, +#endif /* MCA_BTL_TCP_STATISTICS */ + btl->super.btl_latency, btl->super.btl_bandwidth); + if( NULL != endpoint ) { + mca_btl_tcp_endpoint_dump( endpoint, "TCP" ); + } else if( verbose ) { + opal_list_item_t *item; + + for(item = opal_list_get_first(&btl->tcp_endpoints); + item != opal_list_get_end(&btl->tcp_endpoints); + item = opal_list_get_next(item)) { + mca_btl_tcp_endpoint_dump( (mca_btl_base_endpoint_t*)item, "TCP" ); + } + } +} + diff --git a/contrib/build-mca-comps-outside-of-tree/btl_tcp2.h b/contrib/build-mca-comps-outside-of-tree/btl_tcp2.h index 9173ff9ff36..581f0871874 100644 --- a/contrib/build-mca-comps-outside-of-tree/btl_tcp2.h +++ b/contrib/build-mca-comps-outside-of-tree/btl_tcp2.h @@ -6,15 +6,15 @@ * Copyright (c) 2004-2009 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2011 Cisco Systems, Inc. All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ /** @@ -39,7 +39,7 @@ #include "ompi/class/ompi_free_list.h" #include "ompi/mca/btl/btl.h" #include "ompi/mca/btl/base/base.h" -#include "ompi/mca/mpool/mpool.h" +#include "ompi/mca/mpool/mpool.h" #include "ompi/mca/btl/btl.h" #include "opal/class/opal_hash_table.h" @@ -52,7 +52,7 @@ BEGIN_C_DECLS */ struct mca_btl_tcp2_component_t { - mca_btl_base_component_2_0_0_t super; /**< base BTL component */ + mca_btl_base_component_2_0_0_t super; /**< base BTL component */ uint32_t tcp_addr_count; /**< total number of addresses */ uint32_t tcp_num_btls; /**< number of hcas available to the TCP component */ uint32_t tcp_num_links; /**< number of logical links per physical device */ @@ -93,7 +93,7 @@ struct mca_btl_tcp2_component_t { /* Do we want to use TCP_NODELAY? */ int tcp_use_nodelay; -}; +}; typedef struct mca_btl_tcp2_component_t mca_btl_tcp2_component_t; OMPI_MODULE_DECLSPEC extern mca_btl_tcp2_component_t mca_btl_tcp2_component; @@ -115,7 +115,7 @@ struct mca_btl_tcp2_module_t { size_t tcp_bytes_recv; size_t tcp_send_handler; #endif -}; +}; typedef struct mca_btl_tcp2_module_t mca_btl_tcp2_module_t; extern mca_btl_tcp2_module_t mca_btl_tcp2_module; @@ -133,13 +133,13 @@ extern int mca_btl_tcp2_component_close(void); /** * TCP component initialization. - * + * * @param num_btl_modules (OUT) Number of BTLs returned in BTL array. * @param allow_multi_user_threads (OUT) Flag indicating wether BTL supports user threads (TRUE) * @param have_hidden_threads (OUT) Flag indicating wether BTL uses threads (TRUE) */ extern mca_btl_base_module_t** mca_btl_tcp2_component_init( - int *num_btl_modules, + int *num_btl_modules, bool allow_multi_user_threads, bool have_hidden_threads ); @@ -148,8 +148,8 @@ extern mca_btl_base_module_t** mca_btl_tcp2_component_init( * TCP component control. */ int mca_btl_tcp2_component_control( - int param, - void* value, + int param, + void* value, size_t size ); @@ -163,7 +163,7 @@ extern int mca_btl_tcp2_component_progress(void); /** * Cleanup any resources held by the BTL. - * + * * @param btl BTL instance. * @return OMPI_SUCCESS or error status on failure. */ @@ -175,14 +175,14 @@ extern int mca_btl_tcp2_finalize( /** * PML->BTL notification of change in the process list. - * + * * @param btl (IN) * @param nprocs (IN) Number of processes * @param procs (IN) Set of processes * @param peers (OUT) Set of (optional) peer addressing info. * @param peers (IN/OUT) Set of processes that are reachable via this BTL. * @return OMPI_SUCCESS or error status on failure. - * + * */ extern int mca_btl_tcp2_add_procs( @@ -272,7 +272,7 @@ extern mca_btl_base_descriptor_t* mca_btl_tcp2_alloc( struct mca_btl_base_endpoint_t* endpoint, uint8_t order, size_t size, - uint32_t flags); + uint32_t flags); /** @@ -283,9 +283,9 @@ extern mca_btl_base_descriptor_t* mca_btl_tcp2_alloc( */ extern int mca_btl_tcp2_free( - struct mca_btl_base_module_t* btl, - mca_btl_base_descriptor_t* des); - + struct mca_btl_base_module_t* btl, + mca_btl_base_descriptor_t* des); + /** * Prepare a descriptor for send/rdma using the supplied @@ -298,7 +298,7 @@ extern int mca_btl_tcp2_free( * @param endpoint (IN) BTL peer addressing * @param convertor (IN) Data type convertor * @param reserve (IN) Additional bytes requested by upper layer to precede user data - * @param size (IN/OUT) Number of bytes to prepare (IN), number of bytes actually prepared (OUT) + * @param size (IN/OUT) Number of bytes to prepare (IN), number of bytes actually prepared (OUT) */ mca_btl_base_descriptor_t* mca_btl_tcp2_prepare_src( @@ -312,15 +312,15 @@ mca_btl_base_descriptor_t* mca_btl_tcp2_prepare_src( uint32_t flags ); -extern mca_btl_base_descriptor_t* mca_btl_tcp2_prepare_dst( - struct mca_btl_base_module_t* btl, +extern mca_btl_base_descriptor_t* mca_btl_tcp2_prepare_dst( + struct mca_btl_base_module_t* btl, struct mca_btl_base_endpoint_t* peer, struct mca_mpool_base_registration_t*, struct opal_convertor_t* convertor, uint8_t order, size_t reserve, size_t* size, - uint32_t flags); + uint32_t flags); /** diff --git a/contrib/build-mca-comps-outside-of-tree/btl_tcp2_addr.h b/contrib/build-mca-comps-outside-of-tree/btl_tcp2_addr.h index c75bb3bc98e..cebfda0967a 100644 --- a/contrib/build-mca-comps-outside-of-tree/btl_tcp2_addr.h +++ b/contrib/build-mca-comps-outside-of-tree/btl_tcp2_addr.h @@ -5,15 +5,15 @@ * Copyright (c) 2004-2007 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2011 Cisco Systems, Inc. All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ /** @@ -54,13 +54,13 @@ struct mca_btl_tcp2_addr_t { uint32_t _pad[3]; } _addr__inet; } _union_inet; - } addr_inet; + } addr_inet; #endif in_port_t addr_port; /**< listen port */ uint16_t addr_ifkindex; /**< remote interface index assigned with this address */ unsigned short addr_inuse; /**< local meaning only */ - uint8_t addr_family; /**< AF_INET or AF_INET6 */ + uint8_t addr_family; /**< AF_INET or AF_INET6 */ }; typedef struct mca_btl_tcp2_addr_t mca_btl_tcp2_addr_t; diff --git a/contrib/build-mca-comps-outside-of-tree/btl_tcp2_component.c b/contrib/build-mca-comps-outside-of-tree/btl_tcp2_component.c index 37879b2fc8f..d79aac4b32a 100644 --- a/contrib/build-mca-comps-outside-of-tree/btl_tcp2_component.c +++ b/contrib/build-mca-comps-outside-of-tree/btl_tcp2_component.c @@ -5,7 +5,7 @@ * Copyright (c) 2004-2009 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. @@ -13,9 +13,9 @@ * Copyright (c) 2008 Sun Microsystems, Inc. All rights reserved. * Copyright (c) 2009 Oak Ridge National Laboratory * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ * */ @@ -62,14 +62,14 @@ #include "ompi/mca/btl/btl.h" #include "opal/mca/base/mca_base_param.h" #include "ompi/runtime/ompi_module_exchange.h" -#include "ompi/mca/mpool/base/base.h" +#include "ompi/mca/mpool/base/base.h" #include "ompi/mca/btl/base/btl_base_error.h" #include "btl_tcp2.h" #include "btl_tcp2_addr.h" #include "btl_tcp2_proc.h" #include "btl_tcp2_frag.h" -#include "btl_tcp2_endpoint.h" -#include "ompi/mca/btl/base/base.h" +#include "btl_tcp2_endpoint.h" +#include "ompi/mca/btl/base/base.h" mca_btl_tcp2_component_t mca_btl_tcp2_component = { @@ -92,7 +92,7 @@ mca_btl_tcp2_component_t mca_btl_tcp2_component = { MCA_BASE_METADATA_PARAM_CHECKPOINT }, - mca_btl_tcp2_component_init, + mca_btl_tcp2_component_init, NULL, } }; @@ -102,7 +102,7 @@ mca_btl_tcp2_component_t mca_btl_tcp2_component = { */ static inline char* mca_btl_tcp2_param_register_string( - const char* param_name, + const char* param_name, const char* help_string, const char* default_value) { @@ -114,13 +114,13 @@ static inline char* mca_btl_tcp2_param_register_string( } static inline int mca_btl_tcp2_param_register_int( - const char* param_name, + const char* param_name, const char* help_string, int default_value) { int value; mca_base_param_reg_int(&mca_btl_tcp2_component.super.btl_version, - param_name, help_string, false, false, + param_name, help_string, false, false, default_value, &value); return value; } @@ -181,8 +181,8 @@ int mca_btl_tcp2_component_open(void) mca_btl_tcp2_component.tcp_num_btls=0; mca_btl_tcp2_component.tcp_addr_count = 0; mca_btl_tcp2_component.tcp_btls=NULL; - - /* initialize objects */ + + /* initialize objects */ OBJ_CONSTRUCT(&mca_btl_tcp2_component.tcp_lock, opal_mutex_t); OBJ_CONSTRUCT(&mca_btl_tcp2_component.tcp_procs, opal_hash_table_t); OBJ_CONSTRUCT(&mca_btl_tcp2_component.tcp_events, opal_list_t); @@ -225,7 +225,7 @@ int mca_btl_tcp2_component_open(void) mca_btl_tcp2_component.tcp_port_min ); mca_btl_tcp2_component.tcp_port_min = 1024; } - asprintf( &message, + asprintf( &message, "The number of ports where the TCP BTL will try to bind (default %d)." " This parameter together with the port min, define a range of ports" " where Open MPI will open sockets.", @@ -244,7 +244,7 @@ int mca_btl_tcp2_component_open(void) mca_btl_tcp2_component.tcp6_port_min ); mca_btl_tcp2_component.tcp6_port_min = 1024; } - asprintf( &message, + asprintf( &message, "The number of ports where the TCP BTL will try to bind (default %d)." " This parameter together with the port min, define a range of ports" " where Open MPI will open sockets.", @@ -298,7 +298,7 @@ int mca_btl_tcp2_component_close(void) if (NULL != mca_btl_tcp2_component.tcp_btls) free(mca_btl_tcp2_component.tcp_btls); - + if (mca_btl_tcp2_component.tcp_listen_sd >= 0) { opal_event_del(&mca_btl_tcp2_component.tcp_recv_event); CLOSE_THE_SOCKET(mca_btl_tcp2_component.tcp_listen_sd); @@ -316,7 +316,7 @@ int mca_btl_tcp2_component_close(void) /* cleanup any pending events */ OPAL_THREAD_LOCK(&mca_btl_tcp2_component.tcp_lock); for(item = opal_list_get_first(&mca_btl_tcp2_component.tcp_events); - item != opal_list_get_end(&mca_btl_tcp2_component.tcp_events); + item != opal_list_get_end(&mca_btl_tcp2_component.tcp_events); item = next) { mca_btl_tcp2_event_t* event = (mca_btl_tcp2_event_t*)item; next = opal_list_get_next(item); @@ -425,7 +425,7 @@ static char **split_and_resolve(char **orig_str, char *name) str = strchr(argv[i], '/'); if (NULL == str) { orte_show_help("help-mpi-btl-tcp2.txt", "invalid if_inexclude", - true, name, orte_process_info.nodename, + true, name, orte_process_info.nodename, tmp, "Invalid specification (missing \"/\")"); free(argv[i]); free(tmp); @@ -436,7 +436,7 @@ static char **split_and_resolve(char **orig_str, char *name) /* Now convert the IPv4 address */ ((struct sockaddr*) &argv_inaddr)->sa_family = AF_INET; - ret = inet_pton(AF_INET, argv[i], + ret = inet_pton(AF_INET, argv[i], &((struct sockaddr_in*) &argv_inaddr)->sin_addr); free(argv[i]); @@ -447,16 +447,16 @@ static char **split_and_resolve(char **orig_str, char *name) free(tmp); continue; } - opal_output_verbose(20, mca_btl_base_output, + opal_output_verbose(20, mca_btl_base_output, "btl: tcp: Searching for %s address+prefix: %s / %u", name, opal_net_get_hostname((struct sockaddr*) &argv_inaddr), argv_prefix); - + /* Go through all interfaces and see if we can find a match */ - for (if_index = opal_ifbegin(); if_index >= 0; + for (if_index = opal_ifbegin(); if_index >= 0; if_index = opal_ifnext(if_index)) { - opal_ifindextoaddr(if_index, + opal_ifindextoaddr(if_index, (struct sockaddr*) &if_inaddr, sizeof(if_inaddr)); if (opal_net_samenetwork((struct sockaddr*) &argv_inaddr, @@ -465,7 +465,7 @@ static char **split_and_resolve(char **orig_str, char *name) break; } } - + /* If we didn't find a match, keep trying */ if (if_index < 0) { orte_show_help("help-mpi-btl-tcp2.txt", "invalid if_inexclude", @@ -478,7 +478,7 @@ static char **split_and_resolve(char **orig_str, char *name) /* We found a match; get the name and replace it in the argv */ opal_ifindextoname(if_index, if_name, sizeof(if_name)); - opal_output_verbose(20, mca_btl_base_output, + opal_output_verbose(20, mca_btl_base_output, "btl: tcp: Found match: %s (%s)", opal_net_get_hostname((struct sockaddr*) &if_inaddr), if_name); @@ -498,7 +498,7 @@ static char **split_and_resolve(char **orig_str, char *name) /* * Create a TCP BTL instance for either: * (1) all interfaces specified by the user - * (2) all available interfaces + * (2) all available interfaces * (3) all available interfaces except for those excluded by the user */ @@ -583,7 +583,7 @@ static int mca_btl_tcp2_component_create_instances(void) goto cleanup; } - /* if the interface list was not specified by the user, create + /* if the interface list was not specified by the user, create * a BTL for each interface that was not excluded. */ exclude = split_and_resolve(&mca_btl_tcp2_component.tcp_if_exclude, @@ -695,7 +695,7 @@ static int mca_btl_tcp2_component_create_listen(uint16_t af_family) { int index, range, port; - + range = mca_btl_tcp2_component.tcp_port_range; port = mca_btl_tcp2_component.tcp_port_min; #if OPAL_ENABLE_IPV6 @@ -759,7 +759,7 @@ static int mca_btl_tcp2_component_create_listen(uint16_t af_family) /* setup listen backlog to maximum allowed by kernel */ if(listen(sd, SOMAXCONN) < 0) { - BTL_ERROR(("listen() failed: %s (%d)", + BTL_ERROR(("listen() failed: %s (%d)", strerror(opal_socket_errno), opal_socket_errno)); CLOSE_THE_SOCKET(sd); return OMPI_ERROR; @@ -805,14 +805,14 @@ static int mca_btl_tcp2_component_create_listen(uint16_t af_family) /* * Register TCP module addressing information. The MCA framework - * will make this available to all peers. + * will make this available to all peers. */ static int mca_btl_tcp2_component_exchange(void) { int rc = 0, index; size_t i = 0; - size_t size = mca_btl_tcp2_component.tcp_addr_count * + size_t size = mca_btl_tcp2_component.tcp_addr_count * mca_btl_tcp2_component.tcp_num_links * sizeof(mca_btl_tcp2_addr_t); /* adi@2007-04-12: * @@ -842,10 +842,10 @@ static int mca_btl_tcp2_component_exchange(void) continue; } - if (OPAL_SUCCESS != + if (OPAL_SUCCESS != opal_ifindextoaddr(index, (struct sockaddr*) &my_ss, sizeof (my_ss))) { - opal_output (0, + opal_output (0, "btl_tcp2_component: problems getting address for index %i (kernel index %i)\n", index, opal_ifindextokindex (index)); continue; @@ -853,10 +853,10 @@ static int mca_btl_tcp2_component_exchange(void) if ((AF_INET == my_ss.ss_family) && (4 != mca_btl_tcp2_component.tcp_disable_family)) { - memcpy(&addrs[current_addr].addr_inet, + memcpy(&addrs[current_addr].addr_inet, &((struct sockaddr_in*)&my_ss)->sin_addr, sizeof(addrs[0].addr_inet)); - addrs[current_addr].addr_port = + addrs[current_addr].addr_port = mca_btl_tcp2_component.tcp_listen_port; addrs[current_addr].addr_family = MCA_BTL_TCP_AF_INET; xfer_size += sizeof (mca_btl_tcp2_addr_t); @@ -871,7 +871,7 @@ static int mca_btl_tcp2_component_exchange(void) memcpy(&addrs[current_addr].addr_inet, &((struct sockaddr_in6*)&my_ss)->sin6_addr, sizeof(addrs[0].addr_inet)); - addrs[current_addr].addr_port = + addrs[current_addr].addr_port = mca_btl_tcp2_component.tcp6_listen_port; addrs[current_addr].addr_family = MCA_BTL_TCP_AF_INET6; xfer_size += sizeof (mca_btl_tcp2_addr_t); @@ -883,7 +883,7 @@ static int mca_btl_tcp2_component_exchange(void) #endif } /* end of for opal_ifbegin() */ } /* end of for tcp_num_btls */ - rc = ompi_modex_send(&mca_btl_tcp2_component.super.btl_version, + rc = ompi_modex_send(&mca_btl_tcp2_component.super.btl_version, addrs, xfer_size); free(addrs); } /* end if */ @@ -897,7 +897,7 @@ static int mca_btl_tcp2_component_exchange(void) * (2) setup TCP listen socket for incoming connection attempts * (3) register BTL parameters with the MCA */ -mca_btl_base_module_t** mca_btl_tcp2_component_init(int *num_btl_modules, +mca_btl_base_module_t** mca_btl_tcp2_component_init(int *num_btl_modules, bool enable_progress_threads, bool enable_mpi_threads) { @@ -907,7 +907,7 @@ mca_btl_base_module_t** mca_btl_tcp2_component_init(int *num_btl_modules, /* initialize free lists */ ompi_free_list_init_new( &mca_btl_tcp2_component.tcp_frag_eager, - sizeof (mca_btl_tcp2_frag_eager_t) + + sizeof (mca_btl_tcp2_frag_eager_t) + mca_btl_tcp2_module.super.btl_eager_limit, opal_cache_line_size, OBJ_CLASS (mca_btl_tcp2_frag_eager_t), @@ -916,9 +916,9 @@ mca_btl_base_module_t** mca_btl_tcp2_component_init(int *num_btl_modules, mca_btl_tcp2_component.tcp_free_list_max, mca_btl_tcp2_component.tcp_free_list_inc, NULL ); - + ompi_free_list_init_new( &mca_btl_tcp2_component.tcp_frag_max, - sizeof (mca_btl_tcp2_frag_max_t) + + sizeof (mca_btl_tcp2_frag_max_t) + mca_btl_tcp2_module.super.btl_max_send_size, opal_cache_line_size, OBJ_CLASS (mca_btl_tcp2_frag_max_t), @@ -927,7 +927,7 @@ mca_btl_base_module_t** mca_btl_tcp2_component_init(int *num_btl_modules, mca_btl_tcp2_component.tcp_free_list_max, mca_btl_tcp2_component.tcp_free_list_inc, NULL ); - + ompi_free_list_init_new( &mca_btl_tcp2_component.tcp_frag_user, sizeof (mca_btl_tcp2_frag_user_t), opal_cache_line_size, @@ -937,7 +937,7 @@ mca_btl_base_module_t** mca_btl_tcp2_component_init(int *num_btl_modules, mca_btl_tcp2_component.tcp_free_list_max, mca_btl_tcp2_component.tcp_free_list_inc, NULL ); - + /* create a BTL TCP module for selected interfaces */ if(OMPI_SUCCESS != (ret = mca_btl_tcp2_component_create_instances() )) { return 0; @@ -962,7 +962,7 @@ mca_btl_base_module_t** mca_btl_tcp2_component_init(int *num_btl_modules, return 0; } - btls = (mca_btl_base_module_t **)malloc(mca_btl_tcp2_component.tcp_num_btls * + btls = (mca_btl_base_module_t **)malloc(mca_btl_tcp2_component.tcp_num_btls * sizeof(mca_btl_base_module_t*)); if(NULL == btls) { return NULL; @@ -1006,14 +1006,14 @@ static void mca_btl_tcp2_component_accept_handler( int incoming_sd, if(opal_socket_errno == EINTR) continue; if(opal_socket_errno != EAGAIN && opal_socket_errno != EWOULDBLOCK) - BTL_ERROR(("accept() failed: %s (%d).", + BTL_ERROR(("accept() failed: %s (%d).", strerror(opal_socket_errno), opal_socket_errno)); return; } mca_btl_tcp2_set_socket_options(sd); /* wait for receipt of peers process identifier to complete this connection */ - + event = OBJ_NEW(mca_btl_tcp2_event_t); opal_event_set(opal_event_base, &event->event, sd, OPAL_EV_READ, mca_btl_tcp2_component_recv_handler, event); opal_event_add(&event->event, 0); @@ -1022,7 +1022,7 @@ static void mca_btl_tcp2_component_accept_handler( int incoming_sd, /** - * Event callback when there is data available on the registered + * Event callback when there is data available on the registered * socket to recv. This callback is triggered only once per lifetime * for any socket, in the beginning when we setup the handshake * protocol. @@ -1057,7 +1057,7 @@ static void mca_btl_tcp2_component_recv_handler(int sd, short flags, void* user) strerror(opal_socket_errno), opal_socket_errno)); } } - + /* lookup the corresponding process */ btl_proc = mca_btl_tcp2_proc_lookup(&guid); if(NULL == btl_proc) { @@ -1067,7 +1067,7 @@ static void mca_btl_tcp2_component_recv_handler(int sd, short flags, void* user) /* lookup peer address */ if(getpeername(sd, (struct sockaddr*)&addr, &addr_len) != 0) { - BTL_ERROR(("getpeername() failed: %s (%d)", + BTL_ERROR(("getpeername() failed: %s (%d)", strerror(opal_socket_errno), opal_socket_errno)); CLOSE_THE_SOCKET(sd); return; diff --git a/contrib/build-mca-comps-outside-of-tree/btl_tcp2_endpoint.c b/contrib/build-mca-comps-outside-of-tree/btl_tcp2_endpoint.c index 5299ef7b1ae..a1998de71af 100644 --- a/contrib/build-mca-comps-outside-of-tree/btl_tcp2_endpoint.c +++ b/contrib/build-mca-comps-outside-of-tree/btl_tcp2_endpoint.c @@ -5,16 +5,16 @@ * Copyright (c) 2004-2013 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2007-2008 Sun Microsystems, Inc. All rights reserved. * Copyright (c) 2011 Cisco Systems, Inc. All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ * */ @@ -49,18 +49,15 @@ #include #endif /* HAVE_TIME_H */ -#include "opal/mca/event/event.h" - -#include "ompi/types.h" -#include "ompi/mca/btl/base/btl_base_error.h" #include "opal/util/net.h" +#include "opal/util/fd.h" +#include "opal/util/show_help.h" +#include "ompi/mca/btl/base/btl_base_error.h" +#include "ompi/mca/rte/rte.h" -#include "btl_tcp2.h" -#include "btl_tcp2_endpoint.h" -#include "btl_tcp2_proc.h" -#include "btl_tcp2_frag.h" -#include "btl_tcp2_addr.h" - +#include "btl_tcp_endpoint.h" +#include "btl_tcp_proc.h" +#include "btl_tcp_frag.h" /* * Initialize state of the endpoint instance. @@ -101,9 +98,9 @@ static void mca_btl_tcp2_endpoint_destruct(mca_btl_tcp2_endpoint_t* endpoint) } OBJ_CLASS_INSTANCE( - mca_btl_tcp2_endpoint_t, - opal_list_item_t, - mca_btl_tcp2_endpoint_construct, + mca_btl_tcp2_endpoint_t, + opal_list_item_t, + mca_btl_tcp2_endpoint_construct, mca_btl_tcp2_endpoint_destruct); @@ -123,12 +120,10 @@ static void mca_btl_tcp2_endpoint_send_handler(int sd, short flags, void* user); * diagnostics */ -#if WANT_PEER_DUMP -static void mca_btl_tcp2_endpoint_dump(mca_btl_base_endpoint_t* btl_endpoint, const char* msg) +void mca_btl_tcp_endpoint_dump(mca_btl_base_endpoint_t* btl_endpoint, const char* msg) { - char src[64]; - char dst[64]; - int sndbuf,rcvbuf,nodelay,flags; + char src[64], dst[64], *status; + int sndbuf, rcvbuf, nodelay, flags = -1; #if OPAL_ENABLE_IPV6 struct sockaddr_storage inaddr; #else @@ -136,69 +131,102 @@ static void mca_btl_tcp2_endpoint_dump(mca_btl_base_endpoint_t* btl_endpoint, co #endif opal_socklen_t obtlen; opal_socklen_t addrlen = sizeof(inaddr); + opal_list_item_t *item; - getsockname(btl_endpoint->endpoint_sd, (struct sockaddr*)&inaddr, &addrlen); + if( -1 != btl_endpoint->endpoint_sd ) { + getsockname(btl_endpoint->endpoint_sd, (struct sockaddr*)&inaddr, &addrlen); #if OPAL_ENABLE_IPV6 - { - char *address; - address = (char *) opal_net_get_hostname((struct sockaddr*) &inaddr); - if (NULL != address) { - sprintf(src, "%s", address); + { + char *address; + address = (char *) opal_net_get_hostname((struct sockaddr*) &inaddr); + if (NULL != address) { + sprintf(src, "%s", address); + } } - } #else - sprintf(src, "%s", inet_ntoa(inaddr.sin_addr)); + sprintf(src, "%s", inet_ntoa(inaddr.sin_addr)); #endif - getpeername(btl_endpoint->endpoint_sd, (struct sockaddr*)&inaddr, &addrlen); + getpeername(btl_endpoint->endpoint_sd, (struct sockaddr*)&inaddr, &addrlen); #if OPAL_ENABLE_IPV6 - { - char *address; - address = (char *) opal_net_get_hostname ((struct sockaddr*) &inaddr); - if (NULL != address) { - sprintf(dst, "%s", address); + { + char *address; + address = (char *) opal_net_get_hostname ((struct sockaddr*) &inaddr); + if (NULL != address) { + sprintf(dst, "%s", address); + } } - } #else - sprintf(dst, "%s", inet_ntoa(inaddr.sin_addr)); + sprintf(dst, "%s", inet_ntoa(inaddr.sin_addr)); #endif - if((flags = fcntl(btl_endpoint->endpoint_sd, F_GETFL, 0)) < 0) { - BTL_ERROR(("fcntl(F_GETFL) failed: %s (%d)", - strerror(opal_socket_errno), opal_socket_errno)); - } + if((flags = fcntl(btl_endpoint->endpoint_sd, F_GETFL, 0)) < 0) { + BTL_ERROR(("fcntl(F_GETFL) failed: %s (%d)", + strerror(opal_socket_errno), opal_socket_errno)); + } #if defined(SO_SNDBUF) - obtlen = sizeof(sndbuf); - if(getsockopt(btl_endpoint->endpoint_sd, SOL_SOCKET, SO_SNDBUF, (char *)&sndbuf, &obtlen) < 0) { - BTL_ERROR(("SO_SNDBUF option: %s (%d)", - strerror(opal_socket_errno), opal_socket_errno)); - } + obtlen = sizeof(sndbuf); + if(getsockopt(btl_endpoint->endpoint_sd, SOL_SOCKET, SO_SNDBUF, (char *)&sndbuf, &obtlen) < 0) { + BTL_ERROR(("SO_SNDBUF option: %s (%d)", + strerror(opal_socket_errno), opal_socket_errno)); + } #else - sndbuf = -1; + sndbuf = -1; #endif #if defined(SO_RCVBUF) - obtlen = sizeof(rcvbuf); - if(getsockopt(btl_endpoint->endpoint_sd, SOL_SOCKET, SO_RCVBUF, (char *)&rcvbuf, &obtlen) < 0) { - BTL_ERROR(("SO_RCVBUF option: %s (%d)", - strerror(opal_socket_errno), opal_socket_errno)); - } + obtlen = sizeof(rcvbuf); + if(getsockopt(btl_endpoint->endpoint_sd, SOL_SOCKET, SO_RCVBUF, (char *)&rcvbuf, &obtlen) < 0) { + BTL_ERROR(("SO_RCVBUF option: %s (%d)", + strerror(opal_socket_errno), opal_socket_errno)); + } #else - rcvbuf = -1; + rcvbuf = -1; #endif #if defined(TCP_NODELAY) - obtlen = sizeof(nodelay); - if(getsockopt(btl_endpoint->endpoint_sd, IPPROTO_TCP, TCP_NODELAY, (char *)&nodelay, &obtlen) < 0) { - BTL_ERROR(("TCP_NODELAY option: %s (%d)", - strerror(opal_socket_errno), opal_socket_errno)); - } + obtlen = sizeof(nodelay); + if(getsockopt(btl_endpoint->endpoint_sd, IPPROTO_TCP, TCP_NODELAY, (char *)&nodelay, &obtlen) < 0) { + BTL_ERROR(("TCP_NODELAY option: %s (%d)", + strerror(opal_socket_errno), opal_socket_errno)); + } #else - nodelay = 0; + nodelay = 0; #endif + } + + mca_btl_base_err("%s %s: endpoint %p src %s - dst %s nodelay %d sndbuf %d rcvbuf %d flags %08x\n", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), msg, (void*)btl_endpoint, src, dst, nodelay, sndbuf, rcvbuf, flags); - BTL_VERBOSE(("%s: %s - %s nodelay %d sndbuf %d rcvbuf %d flags %08x", - msg, src, dst, nodelay, sndbuf, rcvbuf, flags)); + switch(btl_endpoint->endpoint_state) { + case MCA_BTL_TCP_CONNECTING: + status = "connecting"; break; + case MCA_BTL_TCP_CONNECT_ACK: + status = "connect ack"; break; + case MCA_BTL_TCP_CLOSED: + status = "closed"; break; + case MCA_BTL_TCP_FAILED: + status = "failed"; break; + case MCA_BTL_TCP_CONNECTED: + status = "connected"; break; + default: + status = "undefined"; break; + } + mca_btl_base_err("%s | [socket %d] [state %s] (nbo %s) (retries %u)\n" +#if MCA_BTL_TCP_ENDPOINT_CACHE + "\tcache %p length %lu pos %ld\n" +#endif /* MCA_BTL_TCP_ENDPOINT_CACHE */ + "\tpending: send %p recv %p\n", + msg, btl_endpoint->endpoint_sd, status, + (btl_endpoint->endpoint_nbo ? "true" : "false"), btl_endpoint->endpoint_retries, +#if MCA_BTL_TCP_ENDPOINT_CACHE + btl_endpoint->endpoint_cache, btl_endpoint->endpoint_cache_length, btl_endpoint->endpoint_cache_pos - btl_endpoint->endpoint_cache, +#endif /* MCA_BTL_TCP_ENDPOINT_CACHE */ + (void*)btl_endpoint->endpoint_send_frag, (void*)btl_endpoint->endpoint_recv_frag ); + for(item = opal_list_get_first(&btl_endpoint->endpoint_frags); + item != opal_list_get_end(&btl_endpoint->endpoint_frags); + item = opal_list_get_next(item)) { + mca_btl_tcp_dump_frag( (mca_btl_tcp_frag_t*)item, " | send" ); + } } -#endif /* * Initialize events to be used by the endpoint instance for TCP select/poll callbacks. @@ -211,22 +239,22 @@ static inline void mca_btl_tcp2_endpoint_event_init(mca_btl_base_endpoint_t* btl btl_endpoint->endpoint_cache_pos = btl_endpoint->endpoint_cache; #endif /* MCA_BTL_TCP_ENDPOINT_CACHE */ - opal_event_set(opal_event_base, &btl_endpoint->endpoint_recv_event, - btl_endpoint->endpoint_sd, - OPAL_EV_READ|OPAL_EV_PERSIST, - mca_btl_tcp2_endpoint_recv_handler, - btl_endpoint ); + opal_event_set(mca_btl_tcp_event_base, &btl_endpoint->endpoint_recv_event, + btl_endpoint->endpoint_sd, + OPAL_EV_READ|OPAL_EV_PERSIST, + mca_btl_tcp_endpoint_recv_handler, + btl_endpoint ); /** * The send event should be non persistent until the endpoint is * completely connected. This means, when the event is created it - * will be fired only once, and when the endpoint is marked as + * will be fired only once, and when the endpoint is marked as * CONNECTED the event should be recreated with the correct flags. */ - opal_event_set(opal_event_base, &btl_endpoint->endpoint_send_event, - btl_endpoint->endpoint_sd, - OPAL_EV_WRITE, - mca_btl_tcp2_endpoint_send_handler, - btl_endpoint); + opal_event_set(mca_btl_tcp_event_base, &btl_endpoint->endpoint_send_event, + btl_endpoint->endpoint_sd, + OPAL_EV_WRITE, + mca_btl_tcp_endpoint_send_handler, + btl_endpoint); } @@ -239,7 +267,7 @@ int mca_btl_tcp2_endpoint_send(mca_btl_base_endpoint_t* btl_endpoint, mca_btl_tc { int rc = OMPI_SUCCESS; - OPAL_THREAD_LOCK(&btl_endpoint->endpoint_send_lock); + MCA_BTL_TCP_CRITICAL_SECTION_ENTER(&btl_endpoint->endpoint_send_lock); switch(btl_endpoint->endpoint_state) { case MCA_BTL_TCP_CONNECTING: case MCA_BTL_TCP_CONNECT_ACK: @@ -257,19 +285,13 @@ int mca_btl_tcp2_endpoint_send(mca_btl_base_endpoint_t* btl_endpoint, mca_btl_tc if(frag->base.des_flags & MCA_BTL_DES_FLAGS_PRIORITY && mca_btl_tcp2_frag_send(frag, btl_endpoint->endpoint_sd)) { int btl_ownership = (frag->base.des_flags & MCA_BTL_DES_FLAGS_BTL_OWNERSHIP); - - OPAL_THREAD_UNLOCK(&btl_endpoint->endpoint_send_lock); - if( frag->base.des_flags & MCA_BTL_DES_SEND_ALWAYS_CALLBACK ) { - frag->base.des_cbfunc(&frag->btl->super, frag->endpoint, &frag->base, frag->rc); - } - if( btl_ownership ) { - MCA_BTL_TCP_FRAG_RETURN(frag); - } + opal_mutex_atomic_unlock(&btl_endpoint->endpoint_send_lock); + MCA_BTL_TCP_COMPLETE_FRAG_SEND(frag); return 1; } else { btl_endpoint->endpoint_send_frag = frag; - opal_event_add(&btl_endpoint->endpoint_send_event, 0); frag->base.des_flags |= MCA_BTL_DES_SEND_ALWAYS_CALLBACK; + MCA_BTL_TCP_ACTIVATE_EVENT(&btl_endpoint->endpoint_send_event, 0); } } else { frag->base.des_flags |= MCA_BTL_DES_SEND_ALWAYS_CALLBACK; @@ -277,7 +299,7 @@ int mca_btl_tcp2_endpoint_send(mca_btl_base_endpoint_t* btl_endpoint, mca_btl_tc } break; } - OPAL_THREAD_UNLOCK(&btl_endpoint->endpoint_send_lock); + MCA_BTL_TCP_CRITICAL_SECTION_LEAVE(&btl_endpoint->endpoint_send_lock); return rc; } @@ -308,7 +330,7 @@ static int mca_btl_tcp2_endpoint_send_blocking(mca_btl_base_endpoint_t* btl_endp /* - * Send the globally unique identifier for this process to a endpoint on + * Send the globally unique identifier for this process to a endpoint on * a newly connected socket. */ @@ -319,7 +341,7 @@ static int mca_btl_tcp2_endpoint_send_connect_ack(mca_btl_base_endpoint_t* btl_e orte_process_name_t guid = btl_proc->proc_ompi->proc_name; ORTE_PROCESS_NAME_HTON(guid); - if(mca_btl_tcp2_endpoint_send_blocking(btl_endpoint, &guid, sizeof(guid)) != + if(mca_btl_tcp2_endpoint_send_blocking(btl_endpoint, &guid, sizeof(guid)) != sizeof(guid)) { return OMPI_ERR_UNREACH; } @@ -332,28 +354,26 @@ static int mca_btl_tcp2_endpoint_send_connect_ack(mca_btl_base_endpoint_t* btl_e * (1) if a connection has not been attempted, accept the connection * (2) if a connection has not been established, and the endpoints process identifier * is less than the local process, accept the connection - * otherwise, reject the connection and continue with the current connection + * otherwise, reject the connection and continue with the current connection */ bool mca_btl_tcp2_endpoint_accept(mca_btl_base_endpoint_t* btl_endpoint, struct sockaddr* addr, int sd) { - mca_btl_tcp2_proc_t* this_proc = mca_btl_tcp2_proc_local(); - mca_btl_tcp2_proc_t *endpoint_proc = btl_endpoint->endpoint_proc; + mca_btl_tcp_proc_t *endpoint_proc = btl_endpoint->endpoint_proc; + const orte_process_name_t *this_proc = &(ompi_proc_local()->proc_name); int cmpval; - OPAL_THREAD_LOCK(&btl_endpoint->endpoint_recv_lock); - OPAL_THREAD_LOCK(&btl_endpoint->endpoint_send_lock); - if(NULL == btl_endpoint->endpoint_addr) { - OPAL_THREAD_UNLOCK(&btl_endpoint->endpoint_send_lock); - OPAL_THREAD_UNLOCK(&btl_endpoint->endpoint_recv_lock); return false; } - cmpval = orte_util_compare_name_fields(ORTE_NS_CMP_ALL, + OPAL_THREAD_LOCK(&btl_endpoint->endpoint_recv_lock); + OPAL_THREAD_LOCK(&btl_endpoint->endpoint_send_lock); + + cmpval = ompi_rte_compare_name_fields(OMPI_RTE_CMP_ALL, &endpoint_proc->proc_ompi->proc_name, - &this_proc->proc_ompi->proc_name); + this_proc); if((btl_endpoint->endpoint_sd < 0) || (btl_endpoint->endpoint_state != MCA_BTL_TCP_CONNECTED && cmpval < 0)) { @@ -365,9 +385,12 @@ bool mca_btl_tcp2_endpoint_accept(mca_btl_base_endpoint_t* btl_endpoint, OPAL_THREAD_UNLOCK(&btl_endpoint->endpoint_recv_lock); return false; } - mca_btl_tcp2_endpoint_event_init(btl_endpoint); - opal_event_add(&btl_endpoint->endpoint_recv_event, 0); - mca_btl_tcp2_endpoint_connected(btl_endpoint); + mca_btl_tcp_endpoint_event_init(btl_endpoint); + /* NOT NEEDED if we remove the PERSISTENT flag when we create the + * first recv_event. + */ + opal_event_add(&btl_endpoint->endpoint_recv_event, 0); /* TODO */ + mca_btl_tcp_endpoint_connected(btl_endpoint); #if OPAL_ENABLE_DEBUG && WANT_PEER_DUMP mca_btl_tcp2_endpoint_dump(btl_endpoint, "accepted"); #endif @@ -388,16 +411,19 @@ bool mca_btl_tcp2_endpoint_accept(mca_btl_base_endpoint_t* btl_endpoint, */ void mca_btl_tcp2_endpoint_close(mca_btl_base_endpoint_t* btl_endpoint) { - if(btl_endpoint->endpoint_sd < 0) - return; - btl_endpoint->endpoint_state = MCA_BTL_TCP_CLOSED; + int sd = btl_endpoint->endpoint_sd; + + do { + if( sd < 0 ) return; + } while ( opal_atomic_cmpset( &(btl_endpoint->endpoint_sd), sd, -1 ) ); + + CLOSE_THE_SOCKET(sd); btl_endpoint->endpoint_retries++; opal_event_del(&btl_endpoint->endpoint_recv_event); opal_event_del(&btl_endpoint->endpoint_send_event); - CLOSE_THE_SOCKET(btl_endpoint->endpoint_sd); - btl_endpoint->endpoint_sd = -1; #if MCA_BTL_TCP_ENDPOINT_CACHE - free( btl_endpoint->endpoint_cache ); + if( NULL != btl_endpoint->endpoint_cache ) + free( btl_endpoint->endpoint_cache ); btl_endpoint->endpoint_cache = NULL; btl_endpoint->endpoint_cache_pos = NULL; btl_endpoint->endpoint_cache_length = 0; @@ -417,16 +443,17 @@ static void mca_btl_tcp2_endpoint_connected(mca_btl_base_endpoint_t* btl_endpoin btl_endpoint->endpoint_retries = 0; /* Create the send event in a persistent manner. */ - opal_event_set(opal_event_base, &btl_endpoint->endpoint_send_event, - btl_endpoint->endpoint_sd, - OPAL_EV_WRITE | OPAL_EV_PERSIST, - mca_btl_tcp2_endpoint_send_handler, - btl_endpoint ); + opal_event_set(mca_btl_tcp_event_base, &btl_endpoint->endpoint_send_event, + btl_endpoint->endpoint_sd, + OPAL_EV_WRITE | OPAL_EV_PERSIST, + mca_btl_tcp_endpoint_send_handler, + btl_endpoint ); if(opal_list_get_size(&btl_endpoint->endpoint_frags) > 0) { - if(NULL == btl_endpoint->endpoint_send_frag) - btl_endpoint->endpoint_send_frag = (mca_btl_tcp2_frag_t*) + if(NULL == btl_endpoint->endpoint_send_frag) { + btl_endpoint->endpoint_send_frag = (mca_btl_tcp_frag_t*) opal_list_remove_first(&btl_endpoint->endpoint_frags); + } opal_event_add(&btl_endpoint->endpoint_send_event, 0); } } @@ -483,7 +510,7 @@ static int mca_btl_tcp2_endpoint_recv_connect_ack(mca_btl_base_endpoint_t* btl_e if (OPAL_EQUAL != orte_util_compare_name_fields(ORTE_NS_CMP_ALL, &btl_proc->proc_ompi->proc_name, &guid)) { - BTL_ERROR(("received unexpected process identifier %s", + BTL_ERROR(("received unexpected process identifier %s", ORTE_NAME_PRINT(&guid))); mca_btl_tcp2_endpoint_close(btl_endpoint); return OMPI_ERR_UNREACH; @@ -499,21 +526,21 @@ void mca_btl_tcp2_set_socket_options(int sd) #if defined(TCP_NODELAY) optval = mca_btl_tcp2_component.tcp_use_nodelay; if(setsockopt(sd, IPPROTO_TCP, TCP_NODELAY, (char *)&optval, sizeof(optval)) < 0) { - BTL_ERROR(("setsockopt(TCP_NODELAY) failed: %s (%d)", + BTL_ERROR(("setsockopt(TCP_NODELAY) failed: %s (%d)", strerror(opal_socket_errno), opal_socket_errno)); } #endif #if defined(SO_SNDBUF) if(mca_btl_tcp2_component.tcp_sndbuf > 0 && setsockopt(sd, SOL_SOCKET, SO_SNDBUF, (char *)&mca_btl_tcp2_component.tcp_sndbuf, sizeof(int)) < 0) { - BTL_ERROR(("setsockopt(SO_SNDBUF) failed: %s (%d)", + BTL_ERROR(("setsockopt(SO_SNDBUF) failed: %s (%d)", strerror(opal_socket_errno), opal_socket_errno)); } #endif #if defined(SO_RCVBUF) if(mca_btl_tcp2_component.tcp_rcvbuf > 0 && setsockopt(sd, SOL_SOCKET, SO_RCVBUF, (char *)&mca_btl_tcp2_component.tcp_rcvbuf, sizeof(int)) < 0) { - BTL_ERROR(("setsockopt(SO_RCVBUF) failed: %s (%d)", + BTL_ERROR(("setsockopt(SO_RCVBUF) failed: %s (%d)", strerror(opal_socket_errno), opal_socket_errno)); } #endif @@ -535,14 +562,14 @@ static int mca_btl_tcp2_endpoint_start_connect(mca_btl_base_endpoint_t* btl_endp /* By default consider a IPv4 connection */ uint16_t af_family = AF_INET; opal_socklen_t addrlen = sizeof(struct sockaddr_in); - + #if OPAL_ENABLE_IPV6 if (AF_INET6 == btl_endpoint->endpoint_addr->addr_family) { af_family = AF_INET6; addrlen = sizeof (struct sockaddr_in6); } #endif - + btl_endpoint->endpoint_sd = socket(af_family, SOCK_STREAM, 0); if (btl_endpoint->endpoint_sd < 0) { btl_endpoint->endpoint_retries++; @@ -557,19 +584,19 @@ static int mca_btl_tcp2_endpoint_start_connect(mca_btl_base_endpoint_t* btl_endp /* setup the socket as non-blocking */ if((flags = fcntl(btl_endpoint->endpoint_sd, F_GETFL, 0)) < 0) { - BTL_ERROR(("fcntl(F_GETFL) failed: %s (%d)", + BTL_ERROR(("fcntl(F_GETFL) failed: %s (%d)", strerror(opal_socket_errno), opal_socket_errno)); } else { flags |= O_NONBLOCK; if(fcntl(btl_endpoint->endpoint_sd, F_SETFL, flags) < 0) - BTL_ERROR(("fcntl(F_SETFL) failed: %s (%d)", + BTL_ERROR(("fcntl(F_SETFL) failed: %s (%d)", strerror(opal_socket_errno), opal_socket_errno)); } /* start the connect - will likely fail with EINPROGRESS */ mca_btl_tcp2_proc_tosocks(btl_endpoint->endpoint_addr, &endpoint_addr); - opal_output_verbose(20, mca_btl_base_output, + opal_output_verbose(20, mca_btl_base_output, "btl: tcp: attempting to connect() to address %s on port %d", opal_net_get_hostname((struct sockaddr*) &endpoint_addr), btl_endpoint->endpoint_addr->addr_port); @@ -578,7 +605,7 @@ static int mca_btl_tcp2_endpoint_start_connect(mca_btl_base_endpoint_t* btl_endp /* non-blocking so wait for completion */ if(opal_socket_errno == EINPROGRESS || opal_socket_errno == EWOULDBLOCK) { btl_endpoint->endpoint_state = MCA_BTL_TCP_CONNECTING; - opal_event_add(&btl_endpoint->endpoint_send_event, 0); + MCA_BTL_TCP_ACTIVATE_EVENT(&btl_endpoint->endpoint_send_event, 0); return OMPI_SUCCESS; } { @@ -597,7 +624,7 @@ static int mca_btl_tcp2_endpoint_start_connect(mca_btl_base_endpoint_t* btl_endp /* send our globally unique process identifier to the endpoint */ if((rc = mca_btl_tcp2_endpoint_send_connect_ack(btl_endpoint)) == OMPI_SUCCESS) { btl_endpoint->endpoint_state = MCA_BTL_TCP_CONNECT_ACK; - opal_event_add(&btl_endpoint->endpoint_recv_event, 0); + MCA_BTL_TCP_ACTIVATE_EVENT(&btl_endpoint->endpoint_recv_event, 0); } else { mca_btl_tcp2_endpoint_close(btl_endpoint); } @@ -607,7 +634,7 @@ static int mca_btl_tcp2_endpoint_start_connect(mca_btl_base_endpoint_t* btl_endp /* * Check the status of the connection. If the connection failed, will retry - * later. Otherwise, send this processes identifier to the endpoint on the + * later. Otherwise, send this processes identifier to the endpoint on the * newly connected socket. */ static void mca_btl_tcp2_endpoint_complete_connect(mca_btl_base_endpoint_t* btl_endpoint) @@ -623,7 +650,7 @@ static void mca_btl_tcp2_endpoint_complete_connect(mca_btl_base_endpoint_t* btl_ /* check connect completion status */ if(getsockopt(btl_endpoint->endpoint_sd, SOL_SOCKET, SO_ERROR, (char *)&so_error, &so_length) < 0) { - BTL_ERROR(("getsockopt() to %s failed: %s (%d)", + BTL_ERROR(("getsockopt() to %s failed: %s (%d)", opal_net_get_hostname((struct sockaddr*) &endpoint_addr), strerror(opal_socket_errno), opal_socket_errno)); mca_btl_tcp2_endpoint_close(btl_endpoint); @@ -634,7 +661,7 @@ static void mca_btl_tcp2_endpoint_complete_connect(mca_btl_base_endpoint_t* btl_ return; } if(so_error != 0) { - BTL_ERROR(("connect() to %s failed: %s (%d)", + BTL_ERROR(("connect() to %s failed: %s (%d)", opal_net_get_hostname((struct sockaddr*) &endpoint_addr), strerror(so_error), so_error)); mca_btl_tcp2_endpoint_close(btl_endpoint); @@ -651,7 +678,7 @@ static void mca_btl_tcp2_endpoint_complete_connect(mca_btl_base_endpoint_t* btl_ /* - * A file descriptor is available/ready for recv. Check the state + * A file descriptor is available/ready for recv. Check the state * of the socket and take the appropriate action. */ @@ -688,14 +715,17 @@ static void mca_btl_tcp2_endpoint_recv_handler(int sd, short flags, void* user) mca_btl_tcp2_frag_t* frag; frag = btl_endpoint->endpoint_recv_frag; + + data_still_pending_on_endpoint: if(NULL == frag) { - if(mca_btl_tcp2_module.super.btl_max_send_size > - mca_btl_tcp2_module.super.btl_eager_limit) { + + if(mca_btl_tcp_module.super.btl_max_send_size > + mca_btl_tcp_module.super.btl_eager_limit) { MCA_BTL_TCP_FRAG_ALLOC_MAX(frag); - } else { + } else { MCA_BTL_TCP_FRAG_ALLOC_EAGER(frag); } - + if(NULL == frag) { OPAL_THREAD_UNLOCK(&btl_endpoint->endpoint_recv_lock); return; @@ -703,30 +733,32 @@ static void mca_btl_tcp2_endpoint_recv_handler(int sd, short flags, void* user) MCA_BTL_TCP_FRAG_INIT_DST(frag, btl_endpoint); } -#if MCA_BTL_TCP_ENDPOINT_CACHE - assert( 0 == btl_endpoint->endpoint_cache_length ); - data_still_pending_on_endpoint: -#endif /* MCA_BTL_TCP_ENDPOINT_CACHE */ /* check for completion of non-blocking recv on the current fragment */ - if(mca_btl_tcp2_frag_recv(frag, btl_endpoint->endpoint_sd) == false) { + if( mca_btl_tcp_frag_recv(frag, btl_endpoint->endpoint_sd) == false ) { btl_endpoint->endpoint_recv_frag = frag; } else { btl_endpoint->endpoint_recv_frag = NULL; - if( MCA_BTL_TCP_HDR_TYPE_SEND == frag->hdr.type ) { - mca_btl_active_message_callback_t* reg; - reg = mca_btl_base_active_message_trigger + frag->hdr.base.tag; - reg->cbfunc(&frag->btl->super, frag->hdr.base.tag, &frag->base, reg->cbdata); - } + + TODO_MCA_BTL_TCP_RECV_TRIGGER_CB(frag); + #if MCA_BTL_TCP_ENDPOINT_CACHE if( 0 != btl_endpoint->endpoint_cache_length ) { +#if MCA_BTL_TCP_USES_PROGRESS_THREAD + /* Get a new fragment and try again */ + frag = NULL; +#else /* If the cache still contain some data we can reuse the same fragment * until we flush it completly. */ MCA_BTL_TCP_FRAG_INIT_DST(frag, btl_endpoint); +#endif /* MCA_BTL_TCP_USES_PROGRESS_THREAD */ goto data_still_pending_on_endpoint; } #endif /* MCA_BTL_TCP_ENDPOINT_CACHE */ + +#if !MCA_BTL_TCP_USES_PROGRESS_THREAD MCA_BTL_TCP_FRAG_RETURN(frag); +#endif /* MCA_BTL_TCP_USES_PROGRESS_THREAD */ } #if MCA_BTL_TCP_ENDPOINT_CACHE assert( 0 == btl_endpoint->endpoint_cache_length ); @@ -741,12 +773,13 @@ static void mca_btl_tcp2_endpoint_recv_handler(int sd, short flags, void* user) * of the MPI_Finalize. The first one will close the connections, * and all others will complain. */ - OPAL_THREAD_UNLOCK(&btl_endpoint->endpoint_recv_lock); break; default: - OPAL_THREAD_UNLOCK(&btl_endpoint->endpoint_recv_lock); + OPAL_THREAD_LOCK(&btl_endpoint->endpoint_recv_lock); BTL_ERROR(("invalid socket state(%d)", btl_endpoint->endpoint_state)); - mca_btl_tcp2_endpoint_close(btl_endpoint); + btl_endpoint->endpoint_state = MCA_BTL_TCP_FAILED; + mca_btl_tcp_endpoint_close(btl_endpoint); + OPAL_THREAD_UNLOCK(&btl_endpoint->endpoint_recv_lock); break; } } @@ -759,8 +792,8 @@ static void mca_btl_tcp2_endpoint_recv_handler(int sd, short flags, void* user) static void mca_btl_tcp2_endpoint_send_handler(int sd, short flags, void* user) { - mca_btl_tcp2_endpoint_t* btl_endpoint = (mca_btl_tcp2_endpoint_t *)user; - OPAL_THREAD_LOCK(&btl_endpoint->endpoint_send_lock); + mca_btl_tcp_endpoint_t* btl_endpoint = (mca_btl_tcp_endpoint_t *)user; + opal_mutex_atomic_lock(&btl_endpoint->endpoint_send_lock); switch(btl_endpoint->endpoint_state) { case MCA_BTL_TCP_CONNECTING: mca_btl_tcp2_endpoint_complete_connect(btl_endpoint); @@ -779,17 +812,13 @@ static void mca_btl_tcp2_endpoint_send_handler(int sd, short flags, void* user) opal_list_remove_first(&btl_endpoint->endpoint_frags); /* if required - update request status and release fragment */ - OPAL_THREAD_UNLOCK(&btl_endpoint->endpoint_send_lock); + opal_mutex_atomic_unlock(&btl_endpoint->endpoint_send_lock); assert( frag->base.des_flags & MCA_BTL_DES_SEND_ALWAYS_CALLBACK ); - frag->base.des_cbfunc(&frag->btl->super, frag->endpoint, &frag->base, frag->rc); - if( btl_ownership ) { - MCA_BTL_TCP_FRAG_RETURN(frag); - } - OPAL_THREAD_LOCK(&btl_endpoint->endpoint_send_lock); - + TODO_MCA_BTL_TCP_COMPLETE_FRAG_SEND(frag); + opal_mutex_atomic_lock(&btl_endpoint->endpoint_send_lock); } - /* if nothing else to do unregister for send event notifications */ + /* if no more data to send unregister the send notifications */ if(NULL == btl_endpoint->endpoint_send_frag) { opal_event_del(&btl_endpoint->endpoint_send_event); } @@ -799,7 +828,7 @@ static void mca_btl_tcp2_endpoint_send_handler(int sd, short flags, void* user) opal_event_del(&btl_endpoint->endpoint_send_event); break; } - OPAL_THREAD_UNLOCK(&btl_endpoint->endpoint_send_lock); + opal_mutex_atomic_unlock(&btl_endpoint->endpoint_send_lock); } diff --git a/contrib/build-mca-comps-outside-of-tree/btl_tcp2_endpoint.h b/contrib/build-mca-comps-outside-of-tree/btl_tcp2_endpoint.h index bfa01a14359..d9f66c5945b 100644 --- a/contrib/build-mca-comps-outside-of-tree/btl_tcp2_endpoint.h +++ b/contrib/build-mca-comps-outside-of-tree/btl_tcp2_endpoint.h @@ -5,15 +5,15 @@ * Copyright (c) 2004-2007 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2011 Cisco Systems, Inc. All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ diff --git a/contrib/build-mca-comps-outside-of-tree/btl_tcp2_frag.c b/contrib/build-mca-comps-outside-of-tree/btl_tcp2_frag.c index b65f22831d3..c46b0127c60 100644 --- a/contrib/build-mca-comps-outside-of-tree/btl_tcp2_frag.c +++ b/contrib/build-mca-comps-outside-of-tree/btl_tcp2_frag.c @@ -5,22 +5,22 @@ * Copyright (c) 2004-2006 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2008 Sun Microsystems, Inc. All rights reserved. * Copyright (c) 2011 Cisco Systems, Inc. All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ * * In windows, many of the socket functions return an EWOULDBLOCK * instead of \ things like EAGAIN, EINPROGRESS, etc. It has been * verified that this will \ not conflict with other error codes that - * are returned by these functions \ under UNIX/Linux environments + * are returned by these functions \ under UNIX/Linux environments */ #include "ompi_config.h" @@ -40,62 +40,62 @@ #include "opal/opal_socket_errno.h" #include "ompi/mca/btl/base/btl_base_error.h" -#include "btl_tcp2_frag.h" +#include "btl_tcp2_frag.h" #include "btl_tcp2_endpoint.h" -static void mca_btl_tcp2_frag_common_constructor(mca_btl_tcp2_frag_t* frag) -{ +static void mca_btl_tcp2_frag_common_constructor(mca_btl_tcp2_frag_t* frag) +{ frag->base.des_src = NULL; frag->base.des_src_cnt = 0; frag->base.des_dst = NULL; frag->base.des_dst_cnt = 0; } -static void mca_btl_tcp2_frag_eager_constructor(mca_btl_tcp2_frag_t* frag) -{ - frag->size = mca_btl_tcp2_module.super.btl_eager_limit; +static void mca_btl_tcp2_frag_eager_constructor(mca_btl_tcp2_frag_t* frag) +{ + frag->size = mca_btl_tcp2_module.super.btl_eager_limit; frag->my_list = &mca_btl_tcp2_component.tcp_frag_eager; - mca_btl_tcp2_frag_common_constructor(frag); + mca_btl_tcp2_frag_common_constructor(frag); } -static void mca_btl_tcp2_frag_max_constructor(mca_btl_tcp2_frag_t* frag) -{ - frag->size = mca_btl_tcp2_module.super.btl_max_send_size; +static void mca_btl_tcp2_frag_max_constructor(mca_btl_tcp2_frag_t* frag) +{ + frag->size = mca_btl_tcp2_module.super.btl_max_send_size; frag->my_list = &mca_btl_tcp2_component.tcp_frag_max; - mca_btl_tcp2_frag_common_constructor(frag); + mca_btl_tcp2_frag_common_constructor(frag); } -static void mca_btl_tcp2_frag_user_constructor(mca_btl_tcp2_frag_t* frag) -{ - frag->size = 0; +static void mca_btl_tcp2_frag_user_constructor(mca_btl_tcp2_frag_t* frag) +{ + frag->size = 0; frag->my_list = &mca_btl_tcp2_component.tcp_frag_user; - mca_btl_tcp2_frag_common_constructor(frag); + mca_btl_tcp2_frag_common_constructor(frag); } OBJ_CLASS_INSTANCE( - mca_btl_tcp2_frag_t, - mca_btl_base_descriptor_t, - NULL, - NULL); + mca_btl_tcp2_frag_t, + mca_btl_base_descriptor_t, + NULL, + NULL); OBJ_CLASS_INSTANCE( - mca_btl_tcp2_frag_eager_t, - mca_btl_base_descriptor_t, - mca_btl_tcp2_frag_eager_constructor, - NULL); + mca_btl_tcp2_frag_eager_t, + mca_btl_base_descriptor_t, + mca_btl_tcp2_frag_eager_constructor, + NULL); OBJ_CLASS_INSTANCE( - mca_btl_tcp2_frag_max_t, - mca_btl_base_descriptor_t, - mca_btl_tcp2_frag_max_constructor, - NULL); + mca_btl_tcp2_frag_max_t, + mca_btl_base_descriptor_t, + mca_btl_tcp2_frag_max_constructor, + NULL); OBJ_CLASS_INSTANCE( - mca_btl_tcp2_frag_user_t, - mca_btl_base_descriptor_t, - mca_btl_tcp2_frag_user_constructor, - NULL); + mca_btl_tcp2_frag_user_t, + mca_btl_base_descriptor_t, + mca_btl_tcp2_frag_user_constructor, + NULL); bool mca_btl_tcp2_frag_send(mca_btl_tcp2_frag_t* frag, int sd) @@ -119,7 +119,7 @@ bool mca_btl_tcp2_frag_send(mca_btl_tcp2_frag_t* frag, int sd) mca_btl_tcp2_endpoint_close(frag->endpoint); return false; default: - BTL_ERROR(("mca_btl_tcp2_frag_send: writev failed: %s (%d)", + BTL_ERROR(("mca_btl_tcp2_frag_send: writev failed: %s (%d)", strerror(opal_socket_errno), opal_socket_errno)); mca_btl_tcp2_endpoint_close(frag->endpoint); @@ -185,7 +185,7 @@ bool mca_btl_tcp2_frag_recv(mca_btl_tcp2_frag_t* frag, int sd) * iovec for the caching in the fragment structure (the +1). */ frag->iov_ptr[num_vecs].iov_base = btl_endpoint->endpoint_cache_pos; - frag->iov_ptr[num_vecs].iov_len = + frag->iov_ptr[num_vecs].iov_len = mca_btl_tcp2_component.tcp_endpoint_cache - btl_endpoint->endpoint_cache_length; num_vecs++; #endif /* MCA_BTL_TCP_ENDPOINT_CACHE */ @@ -211,7 +211,7 @@ bool mca_btl_tcp2_frag_recv(mca_btl_tcp2_frag_t* frag, int sd) mca_btl_tcp2_endpoint_close(btl_endpoint); return false; default: - BTL_ERROR(("mca_btl_tcp2_frag_recv: readv failed: %s (%d)", + BTL_ERROR(("mca_btl_tcp2_frag_recv: readv failed: %s (%d)", strerror(opal_socket_errno), opal_socket_errno)); mca_btl_tcp2_endpoint_close(btl_endpoint); @@ -251,7 +251,7 @@ bool mca_btl_tcp2_frag_recv(mca_btl_tcp2_frag_t* frag, int sd) frag->iov[1].iov_len = frag->hdr.size; frag->iov_cnt++; #ifndef __sparc - /* The following cannot be done for sparc code + /* The following cannot be done for sparc code * because it causes alignment errors when accessing * structures later on in the btl and pml code. */ diff --git a/contrib/build-mca-comps-outside-of-tree/btl_tcp2_frag.h b/contrib/build-mca-comps-outside-of-tree/btl_tcp2_frag.h index 8d1d692b16c..661e271683b 100644 --- a/contrib/build-mca-comps-outside-of-tree/btl_tcp2_frag.h +++ b/contrib/build-mca-comps-outside-of-tree/btl_tcp2_frag.h @@ -5,15 +5,15 @@ * Copyright (c) 2004-2013 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2011 Cisco Systems, Inc. All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -34,44 +34,52 @@ #include #endif -#include "btl_tcp2.h" +#include "btl_tcp2.h" #include "btl_tcp2_hdr.h" BEGIN_C_DECLS #define MCA_BTL_TCP_FRAG_IOVEC_NUMBER 4 +/** + * Commands for the threaded version when the fragments must be completed + * by one of the MPI bounded threads. + */ +#define MCA_BTL_TCP_FRAG_STEP_UNDEFINED ((uint16_t)0x0000) +#define MCA_BTL_TCP_FRAG_STEP_SEND_COMPLETE ((uint16_t)0x0001) +#define MCA_BTL_TCP_FRAG_STEP_RECV_COMPLETE ((uint16_t)0x0002) + /** * TCP fragment derived type. */ struct mca_btl_tcp2_frag_t { - mca_btl_base_descriptor_t base; - mca_btl_base_segment_t segments[2]; - struct mca_btl_base_endpoint_t *endpoint; + mca_btl_base_descriptor_t base; + mca_btl_base_segment_t segments[2]; + struct mca_btl_base_endpoint_t *endpoint; struct mca_btl_tcp2_module_t* btl; mca_btl_tcp2_hdr_t hdr; struct iovec iov[MCA_BTL_TCP_FRAG_IOVEC_NUMBER + 1]; struct iovec *iov_ptr; size_t iov_cnt; size_t iov_idx; - size_t size; + size_t size; int rc; ompi_free_list_t* my_list; -}; -typedef struct mca_btl_tcp2_frag_t mca_btl_tcp2_frag_t; -OBJ_CLASS_DECLARATION(mca_btl_tcp2_frag_t); +}; +typedef struct mca_btl_tcp2_frag_t mca_btl_tcp2_frag_t; +OBJ_CLASS_DECLARATION(mca_btl_tcp2_frag_t); + +typedef struct mca_btl_tcp2_frag_t mca_btl_tcp2_frag_eager_t; + +OBJ_CLASS_DECLARATION(mca_btl_tcp2_frag_eager_t); -typedef struct mca_btl_tcp2_frag_t mca_btl_tcp2_frag_eager_t; - -OBJ_CLASS_DECLARATION(mca_btl_tcp2_frag_eager_t); +typedef struct mca_btl_tcp2_frag_t mca_btl_tcp2_frag_max_t; -typedef struct mca_btl_tcp2_frag_t mca_btl_tcp2_frag_max_t; - -OBJ_CLASS_DECLARATION(mca_btl_tcp2_frag_max_t); +OBJ_CLASS_DECLARATION(mca_btl_tcp2_frag_max_t); -typedef struct mca_btl_tcp2_frag_t mca_btl_tcp2_frag_user_t; - -OBJ_CLASS_DECLARATION(mca_btl_tcp2_frag_user_t); +typedef struct mca_btl_tcp2_frag_t mca_btl_tcp2_frag_user_t; + +OBJ_CLASS_DECLARATION(mca_btl_tcp2_frag_user_t); /* @@ -82,49 +90,77 @@ OBJ_CLASS_DECLARATION(mca_btl_tcp2_frag_user_t); #define MCA_BTL_TCP_FRAG_ALLOC_EAGER(frag) \ { \ ompi_free_list_item_t *item; \ - OMPI_FREE_LIST_GET(&mca_btl_tcp2_component.tcp_frag_eager, item); \ - frag = (mca_btl_tcp2_frag_t*) item; \ + MCA_BTL_TCP_CRITICAL_SECTION_ENTER(&mca_btl_tcp_component.tcp_frag_eager_mutex); \ + OMPI_FREE_LIST_GET_MT(&mca_btl_tcp_component.tcp_frag_eager, item); \ + MCA_BTL_TCP_CRITICAL_SECTION_LEAVE(&mca_btl_tcp_component.tcp_frag_eager_mutex); \ + frag = (mca_btl_tcp_frag_t*) item; \ } #define MCA_BTL_TCP_FRAG_ALLOC_MAX(frag) \ { \ ompi_free_list_item_t *item; \ - OMPI_FREE_LIST_GET(&mca_btl_tcp2_component.tcp_frag_max, item); \ - frag = (mca_btl_tcp2_frag_t*) item; \ + MCA_BTL_TCP_CRITICAL_SECTION_ENTER(&mca_btl_tcp_component.tcp_frag_max_mutex); \ + OMPI_FREE_LIST_GET_MT(&mca_btl_tcp_component.tcp_frag_max, item); \ + MCA_BTL_TCP_CRITICAL_SECTION_LEAVE(&mca_btl_tcp_component.tcp_frag_max_mutex); \ + frag = (mca_btl_tcp_frag_t*) item; \ } #define MCA_BTL_TCP_FRAG_ALLOC_USER(frag) \ { \ ompi_free_list_item_t *item; \ - OMPI_FREE_LIST_GET(&mca_btl_tcp2_component.tcp_frag_user, item); \ - frag = (mca_btl_tcp2_frag_t*) item; \ + MCA_BTL_TCP_CRITICAL_SECTION_ENTER(&mca_btl_tcp_component.tcp_frag_user_mutex); \ + OMPI_FREE_LIST_GET_MT(&mca_btl_tcp_component.tcp_frag_user, item); \ + MCA_BTL_TCP_CRITICAL_SECTION_LEAVE(&mca_btl_tcp_component.tcp_frag_user_mutex); \ + frag = (mca_btl_tcp_frag_t*) item; \ } +#if MCA_BTL_TCP_USES_PROGRESS_THREAD +#define MCA_BTL_TCP_FRAG_RETURN(frag) \ +{ \ + (frag)->next_step = MCA_BTL_TCP_FRAG_STEP_UNDEFINED; \ + if( frag->my_list == &mca_btl_tcp_component.tcp_frag_eager ) { \ + MCA_BTL_TCP_CRITICAL_SECTION_ENTER(&mca_btl_tcp_component.tcp_frag_eager_mutex); \ + OMPI_FREE_LIST_RETURN_MT(frag->my_list, (ompi_free_list_item_t*)(frag)); \ + MCA_BTL_TCP_CRITICAL_SECTION_LEAVE(&mca_btl_tcp_component.tcp_frag_eager_mutex); \ + } else if( frag->my_list == &mca_btl_tcp_component.tcp_frag_max ) { \ + MCA_BTL_TCP_CRITICAL_SECTION_ENTER(&mca_btl_tcp_component.tcp_frag_max_mutex); \ + OMPI_FREE_LIST_RETURN_MT(frag->my_list, (ompi_free_list_item_t*)(frag)); \ + MCA_BTL_TCP_CRITICAL_SECTION_LEAVE(&mca_btl_tcp_component.tcp_frag_max_mutex); \ + } else { \ + assert( frag->my_list == &mca_btl_tcp_component.tcp_frag_user ); \ + MCA_BTL_TCP_CRITICAL_SECTION_ENTER(&mca_btl_tcp_component.tcp_frag_user_mutex); \ + OMPI_FREE_LIST_RETURN_MT(frag->my_list, (ompi_free_list_item_t*)(frag)); \ + MCA_BTL_TCP_CRITICAL_SECTION_LEAVE(&mca_btl_tcp_component.tcp_frag_user_mutex); \ + } \ +} +#else #define MCA_BTL_TCP_FRAG_RETURN(frag) \ { \ - OMPI_FREE_LIST_RETURN(frag->my_list, (ompi_free_list_item_t*)(frag)); \ + (frag)->next_step = MCA_BTL_TCP_FRAG_STEP_UNDEFINED; \ + OMPI_FREE_LIST_RETURN_MT(frag->my_list, (ompi_free_list_item_t*)(frag)); \ } +#endif /* MCA_BTL_TCP_USES_PROGRESS_THREAD */ #define MCA_BTL_TCP_FRAG_INIT_DST(frag,ep) \ do { \ - frag->rc = 0; \ - frag->btl = ep->endpoint_btl; \ + frag->base.des_src = NULL; \ + frag->base.des_src_cnt = 0; \ + frag->base.des_dst = frag->segments; \ + frag->base.des_dst_cnt = 1; \ frag->endpoint = ep; \ frag->iov[0].iov_len = sizeof(frag->hdr); \ frag->iov[0].iov_base = (IOVBASE_TYPE*)&frag->hdr; \ frag->iov_cnt = 1; \ frag->iov_idx = 0; \ frag->iov_ptr = frag->iov; \ - frag->base.des_src = NULL; \ - frag->base.des_dst_cnt = 0; \ - frag->base.des_dst = frag->segments; \ - frag->base.des_dst_cnt = 1; \ + frag->rc = 0; \ } while(0) bool mca_btl_tcp2_frag_send(mca_btl_tcp2_frag_t*, int sd); bool mca_btl_tcp2_frag_recv(mca_btl_tcp2_frag_t*, int sd); +void mca_btl_tcp_dump_frag( mca_btl_tcp_frag_t* frag, char* msg ); END_C_DECLS #endif diff --git a/contrib/build-mca-comps-outside-of-tree/btl_tcp2_ft.c b/contrib/build-mca-comps-outside-of-tree/btl_tcp2_ft.c index 4fa38e3103a..9eba7e62057 100644 --- a/contrib/build-mca-comps-outside-of-tree/btl_tcp2_ft.c +++ b/contrib/build-mca-comps-outside-of-tree/btl_tcp2_ft.c @@ -5,18 +5,18 @@ * Copyright (c) 2004-2006 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2006 Los Alamos National Security, LLC. All rights - * reserved. + * reserved. * Copyright (c) 2011 Cisco Systems, Inc. All rights reserved. * * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ #include "ompi_config.h" @@ -25,7 +25,7 @@ #include "btl_tcp2.h" -#include "btl_tcp2_frag.h" +#include "btl_tcp2_frag.h" #include "btl_tcp2_proc.h" #include "btl_tcp2_endpoint.h" diff --git a/contrib/build-mca-comps-outside-of-tree/btl_tcp2_ft.h b/contrib/build-mca-comps-outside-of-tree/btl_tcp2_ft.h index 38bff62636b..1156e379fba 100644 --- a/contrib/build-mca-comps-outside-of-tree/btl_tcp2_ft.h +++ b/contrib/build-mca-comps-outside-of-tree/btl_tcp2_ft.h @@ -5,15 +5,15 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2011 Cisco Systems, Inc. All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ /** diff --git a/contrib/build-mca-comps-outside-of-tree/btl_tcp2_hdr.h b/contrib/build-mca-comps-outside-of-tree/btl_tcp2_hdr.h index 3036765055e..95110ecff77 100644 --- a/contrib/build-mca-comps-outside-of-tree/btl_tcp2_hdr.h +++ b/contrib/build-mca-comps-outside-of-tree/btl_tcp2_hdr.h @@ -5,15 +5,15 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2011 Cisco Systems, Inc. All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -23,7 +23,7 @@ #include "ompi_config.h" #include "ompi/mca/btl/base/base.h" -#include "btl_tcp2.h" +#include "btl_tcp2.h" BEGIN_C_DECLS @@ -39,9 +39,9 @@ struct mca_btl_tcp2_hdr_t { mca_btl_base_header_t base; uint8_t type; uint16_t count; - uint32_t size; -}; -typedef struct mca_btl_tcp2_hdr_t mca_btl_tcp2_hdr_t; + uint32_t size; +}; +typedef struct mca_btl_tcp2_hdr_t mca_btl_tcp2_hdr_t; #define MCA_BTL_TCP_HDR_HTON(hdr) \ do { \ diff --git a/contrib/build-mca-comps-outside-of-tree/btl_tcp2_proc.c b/contrib/build-mca-comps-outside-of-tree/btl_tcp2_proc.c index 61fc2b70219..e81818e81ee 100644 --- a/contrib/build-mca-comps-outside-of-tree/btl_tcp2_proc.c +++ b/contrib/build-mca-comps-outside-of-tree/btl_tcp2_proc.c @@ -3,9 +3,10 @@ * University Research and Technology * Corporation. All rights reserved. * Copyright (c) 2004-2010 The University of Tennessee and The University + * Copyright (c) 2004-2012 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. @@ -14,9 +15,9 @@ * Copyright (c) 2014 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -55,9 +56,9 @@ static int max_assignment_cardinality; static enum mca_btl_tcp2_connection_quality **weights; static struct mca_btl_tcp2_addr_t ***best_addr; -OBJ_CLASS_INSTANCE( mca_btl_tcp2_proc_t, - opal_list_item_t, - mca_btl_tcp2_proc_construct, +OBJ_CLASS_INSTANCE( mca_btl_tcp2_proc_t, + opal_list_item_t, + mca_btl_tcp2_proc_construct, mca_btl_tcp2_proc_destruct ); void mca_btl_tcp2_proc_construct(mca_btl_tcp2_proc_t* tcp_proc) @@ -77,10 +78,10 @@ void mca_btl_tcp2_proc_construct(mca_btl_tcp2_proc_t* tcp_proc) void mca_btl_tcp2_proc_destruct(mca_btl_tcp2_proc_t* tcp_proc) { /* remove from list of all proc instances */ - OPAL_THREAD_LOCK(&mca_btl_tcp2_component.tcp_lock); - opal_proc_table_remove_value(&mca_btl_tcp2_component.tcp_procs, - tcp_proc->proc_ompi->proc_name); - OPAL_THREAD_UNLOCK(&mca_btl_tcp2_component.tcp_lock); + MCA_BTL_TCP_CRITICAL_SECTION_ENTER(&mca_btl_tcp_component.tcp_lock); + opal_hash_table_remove_value_uint64(&mca_btl_tcp_component.tcp_procs, + ompi_rte_hash_name(&tcp_proc->proc_ompi->proc_name)); + MCA_BTL_TCP_CRITICAL_SECTION_LEAVE(&mca_btl_tcp_component.tcp_lock); /* release resources */ if(NULL != tcp_proc->proc_endpoints) { @@ -92,7 +93,7 @@ void mca_btl_tcp2_proc_destruct(mca_btl_tcp2_proc_t* tcp_proc) /* * Create a TCP process structure. There is a one-to-one correspondence * between a ompi_proc_t and a mca_btl_tcp2_proc_t instance. We cache - * additional data (specifically the list of mca_btl_tcp2_endpoint_t instances, + * additional data (specifically the list of mca_btl_tcp2_endpoint_t instances, * and published addresses) associated w/ a given destination on this * datastructure. */ @@ -103,11 +104,11 @@ mca_btl_tcp2_proc_t* mca_btl_tcp2_proc_create(ompi_proc_t* ompi_proc) size_t size; mca_btl_tcp2_proc_t* btl_proc; - OPAL_THREAD_LOCK(&mca_btl_tcp2_component.tcp_lock); - rc = opal_proc_table_get_value(&mca_btl_tcp2_component.tcp_procs, - ompi_proc->proc_name, (void**)&btl_proc); + MCA_BTL_TCP_CRITICAL_SECTION_ENTER(&mca_btl_tcp_component.tcp_lock); + rc = opal_hash_table_get_value_uint64(&mca_btl_tcp_component.tcp_procs, + hash, (void**)&btl_proc); if(OMPI_SUCCESS == rc) { - OPAL_THREAD_UNLOCK(&mca_btl_tcp2_component.tcp_lock); + MCA_BTL_TCP_CRITICAL_SECTION_LEAVE(&mca_btl_tcp_component.tcp_lock); return btl_proc; } @@ -115,11 +116,11 @@ mca_btl_tcp2_proc_t* mca_btl_tcp2_proc_create(ompi_proc_t* ompi_proc) if(NULL == btl_proc) return NULL; btl_proc->proc_ompi = ompi_proc; - + /* add to hash table of all proc instance */ - opal_proc_table_set_value(&mca_btl_tcp2_component.tcp_procs, - ompi_proc->proc_name, btl_proc); - OPAL_THREAD_UNLOCK(&mca_btl_tcp2_component.tcp_lock); + opal_hash_table_set_value_uint64(&mca_btl_tcp_component.tcp_procs, + hash, btl_proc); + MCA_BTL_TCP_CRITICAL_SECTION_LEAVE(&mca_btl_tcp_component.tcp_lock); /* lookup tcp parameters exported by this proc */ rc = ompi_modex_recv( &mca_btl_tcp2_component.super.btl_version, @@ -333,12 +334,12 @@ static mca_btl_tcp2_interface_t** mca_btl_tcp2_retrieve_local_interfaces(void) continue; } - local_interfaces[local_kindex_to_index[kindex]]->ipv4_address = + local_interfaces[local_kindex_to_index[kindex]]->ipv4_address = (struct sockaddr_storage*) malloc(sizeof(local_addr)); - memcpy(local_interfaces[local_kindex_to_index[kindex]]->ipv4_address, + memcpy(local_interfaces[local_kindex_to_index[kindex]]->ipv4_address, &local_addr, sizeof(local_addr)); - opal_ifindextomask(idx, - &local_interfaces[local_kindex_to_index[kindex]]->ipv4_netmask, + opal_ifindextomask(idx, + &local_interfaces[local_kindex_to_index[kindex]]->ipv4_netmask, sizeof(int)); break; case AF_INET6: @@ -347,12 +348,12 @@ static mca_btl_tcp2_interface_t** mca_btl_tcp2_retrieve_local_interfaces(void) continue; } - local_interfaces[local_kindex_to_index[kindex]]->ipv6_address + local_interfaces[local_kindex_to_index[kindex]]->ipv6_address = (struct sockaddr_storage*) malloc(sizeof(local_addr)); - memcpy(local_interfaces[local_kindex_to_index[kindex]]->ipv6_address, + memcpy(local_interfaces[local_kindex_to_index[kindex]]->ipv6_address, &local_addr, sizeof(local_addr)); - opal_ifindextomask(idx, - &local_interfaces[local_kindex_to_index[kindex]]->ipv6_netmask, + opal_ifindextomask(idx, + &local_interfaces[local_kindex_to_index[kindex]]->ipv6_netmask, sizeof(int)); break; default: @@ -367,10 +368,10 @@ static mca_btl_tcp2_interface_t** mca_btl_tcp2_retrieve_local_interfaces(void) } /* * Note that this routine must be called with the lock on the process - * already held. Insert a btl instance into the proc array and assign + * already held. Insert a btl instance into the proc array and assign * it an address. */ -int mca_btl_tcp2_proc_insert( mca_btl_tcp2_proc_t* btl_proc, +int mca_btl_tcp2_proc_insert( mca_btl_tcp2_proc_t* btl_proc, mca_btl_base_endpoint_t* btl_endpoint ) { struct sockaddr_storage endpoint_addr_ss; @@ -436,10 +437,10 @@ int mca_btl_tcp2_proc_insert( mca_btl_tcp2_proc_t* btl_proc, return OMPI_ERR_OUT_OF_RESOURCE; } peer_interfaces[index] = (mca_btl_tcp2_interface_t *) malloc(sizeof(mca_btl_tcp2_interface_t)); - mca_btl_tcp2_initialise_interface(peer_interfaces[index], + mca_btl_tcp2_initialise_interface(peer_interfaces[index], endpoint_addr->addr_ifkindex, index); - } - + } + /* * in case one of the peer addresses is already in use, * mark the complete peer interface as 'not available' @@ -452,13 +453,13 @@ int mca_btl_tcp2_proc_insert( mca_btl_tcp2_proc_t* btl_proc, case AF_INET: peer_interfaces[index]->ipv4_address = (struct sockaddr_storage*) malloc(sizeof(endpoint_addr_ss)); peer_interfaces[index]->ipv4_endpoint_addr = endpoint_addr; - memcpy(peer_interfaces[index]->ipv4_address, + memcpy(peer_interfaces[index]->ipv4_address, &endpoint_addr_ss, sizeof(endpoint_addr_ss)); break; case AF_INET6: peer_interfaces[index]->ipv6_address = (struct sockaddr_storage*) malloc(sizeof(endpoint_addr_ss)); peer_interfaces[index]->ipv6_endpoint_addr = endpoint_addr; - memcpy(peer_interfaces[index]->ipv6_address, + memcpy(peer_interfaces[index]->ipv6_address, &endpoint_addr_ss, sizeof(endpoint_addr_ss)); break; default: @@ -472,7 +473,7 @@ int mca_btl_tcp2_proc_insert( mca_btl_tcp2_proc_t* btl_proc, } /* - * assign weights to each possible pair of interfaces + * assign weights to each possible pair of interfaces */ perm_size = num_local_interfaces; @@ -482,7 +483,7 @@ int mca_btl_tcp2_proc_insert( mca_btl_tcp2_proc_t* btl_proc, weights = (enum mca_btl_tcp2_connection_quality**) malloc(perm_size * sizeof(enum mca_btl_tcp2_connection_quality*)); - + best_addr = (mca_btl_tcp2_addr_t ***) malloc(perm_size * sizeof(mca_btl_tcp2_addr_t **)); for(i = 0; i < perm_size; ++i) { @@ -494,7 +495,7 @@ int mca_btl_tcp2_proc_insert( mca_btl_tcp2_proc_t* btl_proc, sizeof(mca_btl_tcp2_addr_t *)); memset(best_addr[i], 0, perm_size * sizeof(mca_btl_tcp2_addr_t *)); } - + for(i=0; iproc_ompi->proc_hostname))) { /* No connection is possible on these interfaces */ - + /* check for RFC1918 */ } else if(opal_net_addr_isipv4public((struct sockaddr*) local_interfaces[i]->ipv4_address) - && opal_net_addr_isipv4public((struct sockaddr*) + && opal_net_addr_isipv4public((struct sockaddr*) peer_interfaces[j]->ipv4_address)) { if(opal_net_samenetwork((struct sockaddr*) local_interfaces[i]->ipv4_address, (struct sockaddr*) peer_interfaces[j]->ipv4_address, @@ -565,7 +566,7 @@ int mca_btl_tcp2_proc_insert( mca_btl_tcp2_proc_t* btl_proc, weights[i][j] = CQ_PUBLIC_DIFFERENT_NETWORK; } best_addr[i][j] = peer_interfaces[j]->ipv6_endpoint_addr; - } + } } /* for each peer interface */ } /* for each local interface */ @@ -584,8 +585,8 @@ int mca_btl_tcp2_proc_insert( mca_btl_tcp2_proc_t* btl_proc, /* Can only find the best set of connections when the number of * interfaces is not too big. When it gets larger, we fall back - * to a simpler and faster (and not as optimal) algorithm. - * See ticket https://svn.open-mpi.org/trac/ompi/ticket/2031 + * to a simpler and faster (and not as optimal) algorithm. + * See ticket https://svn.open-mpi.org/trac/ompi/ticket/2031 * for more details about this issue. */ if (perm_size <= MAX_PERMUTATION_INTERFACES) { memset(a, 0, perm_size * sizeof(int)); @@ -597,10 +598,10 @@ int mca_btl_tcp2_proc_insert( mca_btl_tcp2_proc_t* btl_proc, for(i = 0; i < perm_size; ++i) { if(best_assignment[i] > num_peer_interfaces || weights[i][best_assignment[i]] == CQ_NO_CONNECTION - || peer_interfaces[best_assignment[i]]->inuse + || peer_interfaces[best_assignment[i]]->inuse || NULL == peer_interfaces[best_assignment[i]]) { continue; - } + } peer_interfaces[best_assignment[i]]->inuse++; btl_endpoint->endpoint_addr = best_addr[i][best_assignment[i]]; btl_endpoint->endpoint_addr->addr_inuse++; @@ -677,12 +678,12 @@ int mca_btl_tcp2_proc_insert( mca_btl_tcp2_proc_t* btl_proc, * Remove an endpoint from the proc array and indicate the address is * no longer in use. */ - + int mca_btl_tcp2_proc_remove(mca_btl_tcp2_proc_t* btl_proc, mca_btl_base_endpoint_t* btl_endpoint) { size_t i; - OPAL_THREAD_LOCK(&btl_proc->proc_lock); - for(i=0; iproc_endpoint_count; i++) { + MCA_BTL_TCP_CRITICAL_SECTION_ENTER(&btl_proc->proc_lock); + for( i = 0; i < btl_proc->proc_endpoint_count; i++ ) { if(btl_proc->proc_endpoints[i] == btl_endpoint) { memmove(btl_proc->proc_endpoints+i, btl_proc->proc_endpoints+i+1, (btl_proc->proc_endpoint_count-i-1)*sizeof(mca_btl_base_endpoint_t*)); @@ -700,7 +701,7 @@ int mca_btl_tcp2_proc_remove(mca_btl_tcp2_proc_t* btl_proc, mca_btl_base_endpoin break; } } - OPAL_THREAD_UNLOCK(&btl_proc->proc_lock); + MCA_BTL_TCP_CRITICAL_SECTION_LEAVE(&btl_proc->proc_lock); return OMPI_SUCCESS; } @@ -710,11 +711,11 @@ int mca_btl_tcp2_proc_remove(mca_btl_tcp2_proc_t* btl_proc, mca_btl_base_endpoin */ mca_btl_tcp2_proc_t* mca_btl_tcp2_proc_lookup(const orte_process_name_t *name) { - mca_btl_tcp2_proc_t* proc = NULL; - OPAL_THREAD_LOCK(&mca_btl_tcp2_component.tcp_lock); - opal_proc_table_get_value(&mca_btl_tcp2_component.tcp_procs, - name->proc_name, (void**)&proc); - OPAL_THREAD_UNLOCK(&mca_btl_tcp2_component.tcp_lock); + mca_btl_tcp_proc_t* proc = NULL; + MCA_BTL_TCP_CRITICAL_SECTION_ENTER(&mca_btl_tcp_component.tcp_lock); + opal_hash_table_get_value_uint64(&mca_btl_tcp_component.tcp_procs, + ompi_rte_hash_name(name), (void**)&proc); + MCA_BTL_TCP_CRITICAL_SECTION_LEAVE(&mca_btl_tcp_component.tcp_lock); return proc; } @@ -725,7 +726,7 @@ mca_btl_tcp2_proc_t* mca_btl_tcp2_proc_lookup(const orte_process_name_t *name) bool mca_btl_tcp2_proc_accept(mca_btl_tcp2_proc_t* btl_proc, struct sockaddr* addr, int sd) { size_t i; - OPAL_THREAD_LOCK(&btl_proc->proc_lock); + MCA_BTL_TCP_CRITICAL_SECTION_ENTER(&btl_proc->proc_lock); for( i = 0; i < btl_proc->proc_endpoint_count; i++ ) { mca_btl_base_endpoint_t* btl_endpoint = btl_proc->proc_endpoints[i]; /* Check all conditions before going to try to accept the connection. */ @@ -754,12 +755,12 @@ bool mca_btl_tcp2_proc_accept(mca_btl_tcp2_proc_t* btl_proc, struct sockaddr* ad ; } - if(mca_btl_tcp2_endpoint_accept(btl_endpoint, addr, sd)) { - OPAL_THREAD_UNLOCK(&btl_proc->proc_lock); + if(mca_btl_tcp_endpoint_accept(btl_endpoint, addr, sd)) { + MCA_BTL_TCP_CRITICAL_SECTION_LEAVE(&btl_proc->proc_lock); return true; } } - OPAL_THREAD_UNLOCK(&btl_proc->proc_lock); + MCA_BTL_TCP_CRITICAL_SECTION_LEAVE(&btl_proc->proc_lock); return false; } @@ -795,7 +796,7 @@ bool mca_btl_tcp2_proc_tosocks(mca_btl_tcp2_addr_t* proc_addr, opal_output( 0, "mca_btl_tcp2_proc: unknown af_family received: %d\n", proc_addr->addr_family ); return false; - } + } return true; } diff --git a/contrib/build-mca-comps-outside-of-tree/btl_tcp2_proc.h b/contrib/build-mca-comps-outside-of-tree/btl_tcp2_proc.h index 98f460841e8..be2f6b7a9c1 100644 --- a/contrib/build-mca-comps-outside-of-tree/btl_tcp2_proc.h +++ b/contrib/build-mca-comps-outside-of-tree/btl_tcp2_proc.h @@ -5,16 +5,16 @@ * Copyright (c) 2004-2006 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2010 Oracle and/or its affiliates. All rights reserved * Copyright (c) 2011 Cisco Systems, Inc. All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -37,25 +37,25 @@ BEGIN_C_DECLS * BTL instance that attempts to open a connection to the process. */ struct mca_btl_tcp2_proc_t { - opal_list_item_t super; + opal_list_item_t super; /**< allow proc to be placed on a list */ - ompi_proc_t *proc_ompi; + ompi_proc_t *proc_ompi; /**< pointer to corresponding ompi_proc_t */ struct mca_btl_tcp2_addr_t* proc_addrs; /**< array of addresses exported by peer */ - size_t proc_addr_count; + size_t proc_addr_count; /**< number of addresses published by endpoint */ - struct mca_btl_base_endpoint_t **proc_endpoints; - /**< array of endpoints that have been created to access this proc */ + struct mca_btl_base_endpoint_t **proc_endpoints; + /**< array of endpoints that have been created to access this proc */ - size_t proc_endpoint_count; + size_t proc_endpoint_count; /**< number of endpoints */ - opal_mutex_t proc_lock; + opal_mutex_t proc_lock; /**< lock to protect against concurrent access to proc state */ }; typedef struct mca_btl_tcp2_proc_t mca_btl_tcp2_proc_t; @@ -97,7 +97,7 @@ typedef struct mca_btl_tcp2_interface_t mca_btl_tcp2_interface_t; * describes the quality of a possible connection between a local and * a remote network interface */ -enum mca_btl_tcp2_connection_quality { +enum mca_btl_tcp2_connection_quality { CQ_NO_CONNECTION, CQ_PRIVATE_DIFFERENT_NETWORK, CQ_PRIVATE_SAME_NETWORK, diff --git a/contrib/build-mca-comps-outside-of-tree/configure.ac b/contrib/build-mca-comps-outside-of-tree/configure.ac index 55f6005bee6..af7ad22f6f5 100644 --- a/contrib/build-mca-comps-outside-of-tree/configure.ac +++ b/contrib/build-mca-comps-outside-of-tree/configure.ac @@ -6,21 +6,22 @@ # Copyright (c) 2004-2005 The University of Tennessee and The University # of Tennessee Research Foundation. All rights # reserved. -# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, # University of Stuttgart. All rights reserved. # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. # Copyright (c) 2011-2014 Cisco Systems, Inc. All rights reserved. +# Copyright (c) 2016 IBM Corporation. All rights reserved. # $COPYRIGHT$ -# +# # Additional copyrights may follow -# +# # $HEADER$ # # Normal Autotools setup stuff -AC_INIT([openmpi_btl_tcp2], [1.0.0], +AC_INIT([openmpi_btl_tcp2], [1.0.0], [http://example.com/help], [openmpi_btl_tcp2]) AC_CONFIG_AUX_DIR(config) AC_CONFIG_MACRO_DIR(config) @@ -154,7 +155,7 @@ EOF # Open MPI v1.7 libraries LDFLAGS="$LDFLAGS -L$withval/ompi/.libs" # For OMPI v1.7 and later - LIBS="$LIBS -lmpi" + LIBS="$LIBS -l@OMPI_LIBMPI_NAME@" CPPFLAGS="$CPPFLAGS $source_CPPFLAGS" CFLAGS="$CFLAGS $source_CFLAGS" @@ -178,9 +179,9 @@ AC_CHECK_FUNC([orte_show_help], [], # Check for types we need for this component AC_CHECK_HEADERS([netinet/in.h]) -AC_CHECK_TYPES([struct sockaddr_in], [], +AC_CHECK_TYPES([struct sockaddr_in], [], [AC_MSG_WARN([No struct sockaddr_in]) - AC_MSG_ERROR([Cannot continue])], + AC_MSG_ERROR([Cannot continue])], [AC_INCLUDES_DEFAULT #ifdef HAVE_NETINET_IN_H #include diff --git a/contrib/build-mca-comps-outside-of-tree/help-mpi-btl-tcp2.txt b/contrib/build-mca-comps-outside-of-tree/help-mpi-btl-tcp2.txt index a92b7fdf373..d7faf6a10e2 100644 --- a/contrib/build-mca-comps-outside-of-tree/help-mpi-btl-tcp2.txt +++ b/contrib/build-mca-comps-outside-of-tree/help-mpi-btl-tcp2.txt @@ -2,9 +2,9 @@ # # Copyright (c) 2009 Cisco Systems, Inc. All rights reserved. # $COPYRIGHT$ -# +# # Additional copyrights may follow -# +# # $HEADER$ # # This is the US/English help file for Open MPI's TCP support diff --git a/contrib/code_counter.pl b/contrib/code_counter.pl index 3ad124674ce..e0518d9cfa8 100755 --- a/contrib/code_counter.pl +++ b/contrib/code_counter.pl @@ -6,15 +6,15 @@ # Copyright (c) 2004-2005 The University of Tennessee and The University # of Tennessee Research Foundation. All rights # reserved. -# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, # University of Stuttgart. All rights reserved. # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. # Copyright (c) 2006 Cisco Systems, Inc. All rights reserved. # $COPYRIGHT$ -# +# # Additional copyrights may follow -# +# # $HEADER$ # # Primitive script to give approximate code counts in the Open MPI tree @@ -97,7 +97,7 @@ sub wanted { # Do we want this dir? for (my $i = 0; $i <= $#skip_dirs; ++$i) { - if ($skip_dirs[$i] eq $dir || + if ($skip_dirs[$i] eq $dir || ($is_dir && $skip_dirs[$i] eq $file)) { print("Skipping dir: $File::Find::dir / $file\n") if ($verbose); diff --git a/contrib/dist/linux/README b/contrib/dist/linux/README index b3b79157f8b..5e08c84ead8 100644 --- a/contrib/dist/linux/README +++ b/contrib/dist/linux/README @@ -4,7 +4,7 @@ Copyright (c) 2004-2006 The Trustees of Indiana University and Indiana Copyright (c) 2004-2006 The University of Tennessee and The University of Tennessee Research Foundation. All rights reserved. -Copyright (c) 2004-2006 High Performance Computing Center Stuttgart, +Copyright (c) 2004-2006 High Performance Computing Center Stuttgart, University of Stuttgart. All rights reserved. Copyright (c) 2004-2006 The Regents of the University of California. All rights reserved. diff --git a/contrib/dist/linux/README.ompi-spec-generator b/contrib/dist/linux/README.ompi-spec-generator index d9d78e6ac8f..4ccdcd12576 100644 --- a/contrib/dist/linux/README.ompi-spec-generator +++ b/contrib/dist/linux/README.ompi-spec-generator @@ -30,7 +30,7 @@ following format: e.g. openmpi-btl-mvapi-1.2a1r10877Mmvapi_4.1.0-1.x86_64.rpm -Package Description File +Package Description File ======================== Package description files a simple INI files with the suffix @@ -57,7 +57,7 @@ This summary will be the summary of the RPM. -- description The description will become the description of the RPM. --- type +-- type Specified the type of the package -- license @@ -66,7 +66,7 @@ The license of the RPM. (default : BSD) -- group The group where this package belongs to. (default = Development/Libraries) --- version +-- version The version field specifies a command that is executed during the RPM build process to obtain the version number of the package. This command is executed after the source code has been configured, diff --git a/contrib/dist/linux/buildrpm.sh b/contrib/dist/linux/buildrpm.sh index c3ab6ac2abd..b84732fc1fb 100755 --- a/contrib/dist/linux/buildrpm.sh +++ b/contrib/dist/linux/buildrpm.sh @@ -4,7 +4,7 @@ # University Research and Technology # Corporation. All rights reserved. # Copyright (c) 2006 Cisco Systems, Inc. All rights reserved. -# +# # # General config vars @@ -95,7 +95,7 @@ echo "--> Found specfile: $specfile" rpmtopdir=${rpmtopdir:-"`grep %_topdir $HOME/.rpmmacros | awk '{ print $2 }'`"} if test "$rpmtopdir" != ""; then - rpmbuild_options="$rpmbuild_options --define '_topdir $rpmtopdir'" + rpmbuild_options="$rpmbuild_options --define '_topdir $rpmtopdir'" if test ! -d "$rpmtopdir"; then mkdir -p "$rpmtopdir" mkdir -p "$rpmtopdir/BUILD" diff --git a/contrib/dist/linux/ompi-spec-generator.py b/contrib/dist/linux/ompi-spec-generator.py index 7c1ba781c0e..5e34cc2914e 100755 --- a/contrib/dist/linux/ompi-spec-generator.py +++ b/contrib/dist/linux/ompi-spec-generator.py @@ -6,14 +6,14 @@ import optparse import ConfigParser - + ###################################################################### # global stuff ###################################################################### configext = ".package" # the name of the configurations files configfiles = [] # list with all found config files params = 0 # contains the cmd line options -packages = {} # directory for packages +packages = {} # directory for packages options = ["name", "type", "license", "summary", "files", "version", "description", "group", "vendor", "requires"] shell_cmds = {} compilers = { "default" : {"compiler":"default", @@ -69,7 +69,7 @@ fi done done -if [ $BUILD_PACKAGE == 1 ] ; then +if [ $BUILD_PACKAGE == 1 ] ; then eval export OMPI_PACKAGE_VERSION=`%(version)s` rpmbuild %(mode)s --define \'_topdir %%_topdir\' --define \'build_%(name)s 1\' --define \'build_default 0\' --define \"ompi_package_version $OMPI_PACKAGE_VERSION\" %%{ompi_specfile} fi @@ -95,7 +95,7 @@ # -# fix configure +# fix configure # %%define _prefix %%{ompi_prefix} %%define _sysconfdir %%{_prefix}/etc @@ -154,10 +154,10 @@ %%clean -%%files +%%files %%defattr(-,root,root,-) -%%endif +%%endif """ @@ -169,7 +169,7 @@ # ###################################################################### %%if %%{build_install} -Summary: Install a already compiled tree +Summary: Install a already compiled tree Name: %%{ompi_name_prefix}%%{ompi_name} Version: %%{ompi_version} Release: %%{ompi_release} @@ -196,7 +196,7 @@ # # create a module file on request # -if [ %(modulefile_condition)s ] ; then +if [ %(modulefile_condition)s ] ; then %%{__mkdir_p} $RPM_BUILD_ROOT/%(modulefile_path)s/%%{ompi_name}/ cat <$RPM_BUILD_ROOT/%(modulefile_path)s/%%{ompi_name}/%%{ompi_version} #%%Module @@ -270,10 +270,10 @@ %%clean -%%files +%%files %%defattr(-,root,root,-) -%%endif +%%endif """ @@ -329,11 +329,11 @@ default_template = """ ###################################################################### # -# default +# default # ###################################################################### %%if %%{build_default} -Summary: Open MPI +Summary: Open MPI Name: %%{ompi_name_prefix}%%{ompi_name} Version: %%{ompi_version}%%{ompi_extra_version} Release: %%{ompi_release} @@ -454,13 +454,13 @@ def __init__(self, name): name_prefix = params.ompi_name_prefix self.options[option] = name_prefix + params.ompi_name + " >= " + params.ompi_version else: - self.options[option] = None + self.options[option] = None def getOption(self, option): if option in self.options.keys(): return self.options[option] else: - return None + return None def setOption(self, option, value): if ( option == "files" ): @@ -497,11 +497,11 @@ def get_package(name): if not (name in packages.keys()): packages[name] = Package(name) return packages[name] - + ###################################################################### # -# verbose output +# verbose output # ###################################################################### def verbose(msg): @@ -511,7 +511,7 @@ def verbose(msg): ###################################################################### # -# debug output +# debug output # ###################################################################### def debug(msg): @@ -521,7 +521,7 @@ def debug(msg): ###################################################################### # -# error output +# error output # ###################################################################### def error(msg): @@ -530,7 +530,7 @@ def error(msg): ###################################################################### # -# error output +# error output # ###################################################################### def get_compiler(name): @@ -667,7 +667,7 @@ def write_specfile(build_packages): specfile.write(package_template % package_params); # create build command build_cmds += build_command_template % {"files":package_params["installed_files"], "default":"0", "name":package_params["name"], "mode":"-bb", "version":package_params["version"]} - + verbose(" Create build section") specfile.write(build_template % {"ompi_prefix":params.ompi_prefix}) @@ -686,11 +686,11 @@ def write_specfile(build_packages): else: inst_params["profile_condition"] = "1 == 0" specfile.write(install_template % inst_params) - + verbose(" Create default section") default_params = { "build_cmds": build_cmds, "version":params.ompi_version} specfile.write(default_template % default_params) - + verbose(" Write changelog") specfile.write(changelog) @@ -777,11 +777,11 @@ def main(): if params.interactive: shell() return - + # filter packages print "--> Select packages" build_packages = [] - # filter packages + # filter packages if params.packages != None: verbose(" Apply user profided packages list : " + params.packages); user_packages = params.packages.split(',') @@ -807,7 +807,7 @@ def main(): # done print "--> Finished." - + if ("__main__" == __name__): main() diff --git a/contrib/dist/linux/openmpi-switcher-modulefile.spec b/contrib/dist/linux/openmpi-switcher-modulefile.spec index 04a4574effe..5e5a50a41db 100644 --- a/contrib/dist/linux/openmpi-switcher-modulefile.spec +++ b/contrib/dist/linux/openmpi-switcher-modulefile.spec @@ -1,12 +1,12 @@ -# +# # Copyright (c) 2006 The Trustees of Indiana University and Indiana # University Research and Technology # Corporation. All rights reserved. # Copyright (c) 2006 Cisco Systems, Inc. All rights reserved. # $COPYRIGHT$ -# +# # Additional copyrights may follow -# +# # $HEADER$ ############################################################################# @@ -57,7 +57,7 @@ Requires: %__rm %__mkdir %__sed %__mv %__chmod %__chown Requires: modules-oscar Requires: env-switcher -%description +%description This RPM is used to install a Open MPI-installation-agnostic modulefile that is used to automatically select which Open MPI diff --git a/contrib/dist/linux/openmpi-switcher-modulefile.tcl b/contrib/dist/linux/openmpi-switcher-modulefile.tcl index df65db6c479..3a029ad9c18 100644 --- a/contrib/dist/linux/openmpi-switcher-modulefile.tcl +++ b/contrib/dist/linux/openmpi-switcher-modulefile.tcl @@ -5,9 +5,9 @@ # Corporation. All rights reserved. # Copyright (c) 2006 Cisco Systems, Inc. All rights reserved. # $COPYRIGHT$ -# +# # Additional copyrights may follow -# +# # $HEADER$ # This modulefile is a dispatcher for other Open MPI modulefiles. It diff --git a/contrib/dist/linux/openmpi.spec b/contrib/dist/linux/openmpi.spec index cc6707c63ce..2a80af296b8 100644 --- a/contrib/dist/linux/openmpi.spec +++ b/contrib/dist/linux/openmpi.spec @@ -5,17 +5,19 @@ # Copyright (c) 2004-2005 The University of Tennessee and The University # of Tennessee Research Foundation. All rights # reserved. -# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, # University of Stuttgart. All rights reserved. # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. -# Copyright (c) 2006-2014 Cisco Systems, Inc. All rights reserved. +# Copyright (c) 2006-2016 Cisco Systems, Inc. All rights reserved. # Copyright (c) 2013 Mellanox Technologies, Inc. # All rights reserved. +# Copyright (c) 2015 Research Organization for Information Science +# and Technology (RIST). All rights reserved. # $COPYRIGHT$ -# +# # Additional copyrights may follow -# +# # $HEADER$ # ############################################################################ @@ -161,6 +163,8 @@ # bets are off. So feel free to install it anywhere in your tree. He # suggests $prefix/doc. %define _defaultdocdir /opt/%{name}/%{version}/doc +# Also put the modulefile in /opt. +%define modulefile_path /opt/%{name}/%{version}/share/openmpi/modulefiles %endif %if !%{build_debuginfo_rpm} @@ -434,9 +438,8 @@ fi CFLAGS="%{?cflags:%{cflags}}%{!?cflags:$RPM_OPT_FLAGS}" CXXFLAGS="%{?cxxflags:%{cxxflags}}%{!?cxxflags:$RPM_OPT_FLAGS}" -FFLAGS="%{?f77flags:%{f77flags}}%{!?f7flags:$RPM_OPT_FLAGS}" FCFLAGS="%{?fcflags:%{fcflags}}%{!?fcflags:$RPM_OPT_FLAGS}" -export CFLAGS CXXFLAGS F77FLAGS FCFLAGS +export CFLAGS CXXFLAGS FCFLAGS %configure %{configure_options} %{__make} %{?mflags} @@ -625,7 +628,7 @@ mv tmp.files docs.files cd /tmp # Remove installed driver after rpm build finished -rm -rf $RPM_BUILD_DIR/%{name}-%{version} +rm -rf $RPM_BUILD_DIR/%{name}-%{version} test "x$RPM_BUILD_ROOT" != "x" && rm -rf $RPM_BUILD_ROOT @@ -662,7 +665,7 @@ test "x$RPM_BUILD_ROOT" != "x" && rm -rf $RPM_BUILD_ROOT %if %{build_all_in_one_rpm} # -# All in one RPM +# All in one RPM # # Easy; just list the prefix and then specifically call out the doc # files. @@ -670,7 +673,14 @@ test "x$RPM_BUILD_ROOT" != "x" && rm -rf $RPM_BUILD_ROOT %files %defattr(-, root, root, -) +%if %(test "%{_prefix}" = "/usr" && echo 1 || echo 0) +%{_bindir}/* +%{_includedir}/* +%{_libdir}/* +%{_datadir} +%else %{_prefix} +%endif # If the sysconfdir is not under the prefix, then list it explicitly. %if !%{sysconfdir_in_prefix} %{_sysconfdir} @@ -706,7 +716,13 @@ test "x$RPM_BUILD_ROOT" != "x" && rm -rf $RPM_BUILD_ROOT %files runtime -f runtime.files %defattr(-, root, root, -) -%dir %{_prefix} +%if %(test "%{_prefix}" = "/usr" && echo 1 || echo 0) +%{_bindir}/* +%{_libdir}/* +%{_datadir} +%else +%{_prefix} +%endif # If the sysconfdir is not under the prefix, then list it explicitly. %if !%{sysconfdir_in_prefix} %{_sysconfdir} @@ -729,9 +745,6 @@ test "x$RPM_BUILD_ROOT" != "x" && rm -rf $RPM_BUILD_ROOT %{shell_scripts_path}/%{shell_scripts_basename}.sh %{shell_scripts_path}/%{shell_scripts_basename}.csh %endif -%dir %{_bindir} -%dir %{_libdir} -%dir %{_libdir}/openmpi %doc README INSTALL LICENSE %{_pkgdatadir} @@ -756,6 +769,13 @@ test "x$RPM_BUILD_ROOT" != "x" && rm -rf $RPM_BUILD_ROOT # ############################################################################# %changelog +* Tue Mar 28 2017 Jeff Squyres +- Reverting a decision from a prior changelog entry: if + install_in_opt==1, then even put the modulefile under /opt. + +* Thu Nov 12 2015 Gilles Gouaillardet +- Revamp packaging when prefix is /usr + * Tue Jan 20 2015 Bert Wesarg - Remove VampirTrace wrapper from package. diff --git a/contrib/dist/macosx-pkg/ReadMe.rtf b/contrib/dist/macosx-pkg/ReadMe.rtf index 91b8d0d2c61..82969cc7528 100644 --- a/contrib/dist/macosx-pkg/ReadMe.rtf +++ b/contrib/dist/macosx-pkg/ReadMe.rtf @@ -31,4 +31,4 @@ Please see the Open MPI web page for help with Open MPI, especially the frequent \cf0 http://www.open-mpi.org/faq/\ \pard\tx720\tx1440\tx2160\tx2880\tx3600\tx4320\tx5040\tx5760\tx6480\tx7200\tx7920\tx8640\ql\qnatural \cf0 \ -If this does not answer your question, further help is available via our mailing list at users@open-mpi.org} \ No newline at end of file +If this does not answer your question, further help is available via our mailing list at users@open-mpi.org} diff --git a/contrib/dist/macosx-pkg/buildpackage.sh b/contrib/dist/macosx-pkg/buildpackage.sh index 7dbd66c38c0..dc0f0b01bc7 100755 --- a/contrib/dist/macosx-pkg/buildpackage.sh +++ b/contrib/dist/macosx-pkg/buildpackage.sh @@ -1,10 +1,10 @@ #!/bin/bash -# Copyright (c) 2001-2006 The Trustees of Indiana University. +# Copyright (c) 2001-2006 The Trustees of Indiana University. # All rights reserved. # Copyright (c) 2006-2007 Los Alamos National Security, LLC. All rights -# reserved. -# +# reserved. +# # This file is part of the Open MPI software package. For license # information, see the LICENSE file in the top level directory of the # Open MPI source distribution. @@ -32,7 +32,7 @@ OMPI_PACKAGE="openmpi" OMPI_PREFIX="/usr/local/" OMPI_OPTIONS="--disable-mpi-f77 --without-cs-fs --enable-mca-no-build=ras-slurm,pls-slurm,gpr-null,sds-pipe,sds-slurm,pml-cm NM=\"nm -p\"" OMPI_OSX_README="ReadMe.rtf" -# note - if want XGrid support, make sure that a cocoa-supported +# note - if want XGrid support, make sure that a cocoa-supported # architecture appears first on the list. Otherwise, we won't # lipo that component and it will be dropped OPAL_ARCH_LIST="ppc ppc64 i386 x86_64" @@ -186,7 +186,7 @@ for arch in $OPAL_ARCH_LIST ; do # # Run configure - # + # cd $builddir config="$srcdir/configure CFLAGS=\"-arch $arch -isysroot $OMPI_SDK\" CXXFLAGS=\"-arch $arch -isysroot $OMPI_SDK\" OBJCFLAGS=\"-arch $arch -isysroot $OMPI_SDK\" --prefix=$OMPI_PREFIX $OMPI_OPTIONS --build=$build_arch --host=$host_arch" echo "--> Running configure: $config" @@ -274,7 +274,7 @@ print_arch_if() { exit 1 ;; esac -} +} # Set arch to the first arch in the list. Go through the for loop, # although we'll break out at the end of the first time through. Look diff --git a/contrib/dist/make-authors.pl b/contrib/dist/make-authors.pl index 1084ad9a169..0ff69a0e3b5 100755 --- a/contrib/dist/make-authors.pl +++ b/contrib/dist/make-authors.pl @@ -1,9 +1,10 @@ #!/usr/bin/env perl # -# Copyright (c) 2008 Cisco Systems, Inc. All rights reserved. +# Copyright (c) 2008-2016 Cisco Systems, Inc. All rights reserved. # use strict; + use Data::Dumper; # Ensure that we're in the root of a writeable Git clone @@ -14,149 +15,213 @@ ###################################################################### +my $header_sep = "-----"; +my $unknown_org = "********* NO ORGANIZATION SET ********"; + +my $people; + +###################################################################### + # Run git log to get a list of committers -my $committers; -open (GIT, "git log --pretty=format:%ae|") || die "Can't run 'git log'."; +open (GIT, "git log --format=tformat:'%aN <%aE>'|") || die "Can't run 'git log'."; while () { chomp; - m/^\s*([\S]+)\s*$/; + m/^\s*(.+)\s+<(.+)>\s*$/; + + if (!exists($people->{$1})) { + # The person doesn't exist, so save a new entry + $people->{$1} = { + name => $1, + org => $unknown_org, + emails => { + lc($2) => 1, + } + }; - if (!exists($committers->{$1})) { - $committers->{$1} = { }; - print "Found Git commit email: $1\n"; + + print "Found Git committer: $1 <$2>\n"; + } else { + # The person already exists, so just add (or overwrite) this + # email address + $people->{$1}->{emails}->{$2} = 1; } } close(GIT); -# Read the existing AUTHORS file to get the header, footer, and Git -# email ID -> (gecos, affiliation) mappings. +###################################################################### + +# Read the existing AUTHORS file my $header; -my $footer; print "Matching Git emails to existing names/affiliations...\n"; +sub save { + my $current = shift; + + print "Saving person from AUTHORS: $current->{name}\n"; + + # We may overwrite an entry written from the git log, but that's + # ok + $people->{$current->{name}} = $current; +} + open (AUTHORS, "AUTHORS") || die "Can't open AUTHORS file"; my $in_header = 1; -my $in_footer = 0; +my $current = undef; while () { chomp; my $line = $_; - # Slurp down header lines until we hit a line that begins with an - # Git email + # Slurp down header lines until we hit a line that begins with + # $header_sep if ($in_header) { - foreach my $git_email (keys(%{$committers})) { - if ($line =~ /$git_email\s+/) { - $in_header = 0; - } - } - if ($in_header) { - $header .= "$_\n"; + $header .= "$line\n"; + + if ($_ =~ /^$header_sep/) { + $in_header = 0; + + # There should be a blank line after this, too + $header .= "\n"; } + next; } - # If we're in the body, parse to get the existing Git emails, gecos, - # and affiliations - if (!$in_header && !$in_footer) { - - # Make sure we have a line that begins with an Git email; - # otherwise, fall through to the footer. - my $found = undef; - my $git_email; - foreach $git_email (keys(%{$committers})) { - if ($line =~ /$git_email\s+/) { - $found = $git_email; - last; - } - } - if (!$found) { - $in_footer = 1; + # Skip blank lines + next + if ($line =~ /^\s*$/); + + # Format of body: + # + # NAME, Affiliation 1[, Affiliation 2[...]] + # Email address 1 + # [Email address 2] + # [...] + # NAME, Affiliation 1[, Affiliation 2[...]] + # Email address 1 + # [Email address 2] + # [...] + + # Found a new email address for an existing person + if ($line =~ /^ /) { + m/^ (.+)$/; + $current->{emails}->{lc($1)} = 1; + + next; + } else { + # Found a new person; save the old entry + save($current) + if (defined($current)); + + $current = undef; + $current->{org} = $unknown_org; + if ($line =~ m/^(.+?),\s+(.+)$/) { + $current->{name} = $1; + $current->{org} = $2; } else { - $line =~ m/^$found\s+(.+?)\s{2,}(.+)$/; - my $gecos = $1; - my $aff = $2; - - if ($gecos =~ /^\s+$/) { - $gecos = ""; - } else { - $committers->{$found}->{gecos} = $gecos; - } - if ($aff =~ /^\s+$/) { - $aff = ""; - } else { - $committers->{$found}->{affiliation} = $aff; - } - print "Git email $found matches: $gecos / $aff\n"; + $current->{name} = $line; } - } - # If we're in the footer, just save all the lines - if ($in_footer) { - $footer .= "$_\n"; + next; } } + +save($current) + if (defined($current)); + close(AUTHORS); -# Figure out the 3 column widths. The last line of the header -# contains -'s for each of the columns. +###################################################################### -$header =~ m/\n([\-\s]+?)$/m; -my $div_line = $1; -my @divs = split(/ /, $div_line); -my $id_col = length($divs[0]); -my $gecos_col = length($divs[1]); -my $aff_col = length($divs[2]); +# Output a new AUTHORS file -# Print out a new AUTHORS file open (AUTHORS, ">AUTHORS.new") || die "Can't write to AUTHORS file"; + print AUTHORS $header; -my $i; -my $have_unknowns = 0; -foreach my $git_email (sort(keys(%${committers}))) { - # Skip the automated accounts - next - if ($git_email eq "no-author\@open-mpi.org" || - $git_email eq "mpiteam\@open-mpi.org"); - - print AUTHORS $git_email; - $i = length($git_email); - while ($i <= $id_col) { - print AUTHORS ' '; - ++$i; + +my @people_with_unknown_orgs; +my $email_dups; + +my @sorted_people = sort(keys(%{$people})); +foreach my $p (@sorted_people) { + print AUTHORS $p; + if (exists($people->{$p}->{org})) { + print AUTHORS ", $people->{$p}->{org}"; + + # Record this so that we can warn about it + push(@people_with_unknown_orgs, $p) + if ($people->{$p}->{org} eq $unknown_org); } + print AUTHORS "\n"; + + foreach my $e (sort(keys(%{$people->{$p}->{emails}}))) { + # Sanity check: make sure this email address does not show up + # with any other person/name + my $dup; + foreach my $p2 (@sorted_people) { + next + if ($p eq $p2); + + foreach my $e2 (keys(%{$people->{$p2}->{emails}})) { + if ($e eq $e2) { + $dup = $p2; + + # Record this so that we can warn about it + if ($p le $p2) { + $email_dups->{$p} = $p2; + } else { + $email_dups->{$p2} = $p; + } + last; + } + } - # if we have gecos/affiliation, print them. Otherwise, just end - # the line here - if ((exists($committers->{$git_email}->{gecos}) && - $committers->{$git_email}->{gecos} !~ /^\s+$/) || - (exists($committers->{$git_email}->{affiliation}) && - $committers->{$git_email}->{affiliation} !~ /^\s+$/)) { - print AUTHORS $committers->{$git_email}->{gecos}; - $i = length($committers->{$git_email}->{gecos}); - while ($i <= $gecos_col) { - print AUTHORS ' '; - ++$i; + last + if (defined($dup)); } - print AUTHORS $committers->{$git_email}->{affiliation} - if (exists($committers->{$git_email}->{affiliation})); - } else { - $have_unknowns = 1; + print AUTHORS " $e"; + print AUTHORS " (**** DUPLICATE EMAIL ADDRESS WITH $dup ***)" + if (defined($dup)); + print AUTHORS "\n"; } - print AUTHORS "\n"; } -print AUTHORS $footer; close(AUTHORS); +# We have a new AUTHORS file! Replace the old one. unlink("AUTHORS"); rename("AUTHORS.new", "AUTHORS"); print "New AUTHORS file written.\n"; -if ($have_unknowns) { - print "*** WARNING: There were Git committers with unknown real names and/or\n*** affiliations. You *MUST* edit the AUTHORS file to fill them in!\n"; -} else { - print "All Git emails were matched! No need to hand-edit the AUTHORS file.\n"; + +###################################################################### + +# Output any relevant warnings + +my $warned = 0; +if ($#people_with_unknown_orgs >= 0) { + $warned = 1; + print "\n*** WARNING: The following people have unspecified organiations:\n"; + foreach my $p (@people_with_unknown_orgs) { + print "*** $p\n"; + } +} + +my @k = sort(keys(%{$email_dups})); +if ($#k >= 0) { + $warned = 1; + print "\n*** WARNING: The following people had the same email address:\n"; + foreach my $p (@k) { + print "*** $p, $email_dups->{$p}\n"; + } +} + +if ($warned) { + print " +******************************************************************************* +*** YOU SHOULD EDIT THE .mailmap AND/OR AUTHORS FILE TO RESOLVE THESE WARNINGS! +*******************************************************************************\n"; } +exit($warned); diff --git a/contrib/dist/make-html-man-pages.pl b/contrib/dist/make-html-man-pages.pl index 27c6aaf3713..31de66ed6a6 100755 --- a/contrib/dist/make-html-man-pages.pl +++ b/contrib/dist/make-html-man-pages.pl @@ -204,7 +204,7 @@ sub doit { if (3 == $section && $name =~ /^MPI_/) { $meta_name = uc($name); } - + # Now we're left with what we want. Output the PHP page. # Write the output PHP file with our own header and footer, # suitable for www.open-mpi.org. @@ -269,7 +269,7 @@ sub doit { # Print the top-level engine file for this version (it will use the # data-.inc file). open(FILE, ">$outdir_base/index.php") || die "Can't open $outdir_base/index.php"; -print FILE '&1 && touch success) | tee auto.out + (./autogen.pl --force $autogen_args 2>&1 && touch success) | tee auto.out else - (./autogen.pl --no-ompi $autogen_args 2>&1 && touch success) | tee auto.out + (./autogen.pl --force --no-ompi $autogen_args 2>&1 && touch success) | tee auto.out fi if test ! -f success; then echo "Autogen failed. Aborting" diff --git a/contrib/find_occurence.pl b/contrib/find_occurence.pl index 84297fd1c98..0e9e1ce1bd0 100755 --- a/contrib/find_occurence.pl +++ b/contrib/find_occurence.pl @@ -6,14 +6,14 @@ # Copyright (c) 2004-2005 The University of Tennessee and The University # of Tennessee Research Foundation. All rights # reserved. -# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, # University of Stuttgart. All rights reserved. # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. # $COPYRIGHT$ -# +# # Additional copyrights may follow -# +# # $HEADER$ # @@ -27,13 +27,13 @@ open (SOURCE_FILES, "find $source_path -name *.c |") || print "could not open the pipe\n"; while () { - + #open the file and delete the occurence - + $file_name = $_; - + open (FILE, "$file_name") || print "Could not open $file_name for reading\n"; - + while () { if (/$search_string/) { print $file_name; diff --git a/contrib/find_offenders.pl b/contrib/find_offenders.pl index 839e1a6361e..ec21ade0897 100755 --- a/contrib/find_offenders.pl +++ b/contrib/find_offenders.pl @@ -6,14 +6,14 @@ # Copyright (c) 2004-2005 The University of Tennessee and The University # of Tennessee Research Foundation. All rights # reserved. -# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, # University of Stuttgart. All rights reserved. # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. # $COPYRIGHT$ -# +# # Additional copyrights may follow -# +# # $HEADER$ # @@ -37,7 +37,7 @@ $file_name = $_; open(FILE, "find . -name $file_name |") || print "find failed\n"; while() { - #file is found + #file is found print DANGER_FILES "#include <$file_name>\n"; } close (FILE); @@ -61,14 +61,14 @@ open(C_FILE, "$c_file") || print "Could not open $_\n"; while () { if (/$header/) { - print OFFENSIVE $header ." --> ". $c_file ; + print OFFENSIVE $header ." --> ". $c_file ; } } close (C_FILE); } close (C_FILES); - + open(H_FILES, "find . -name *.h |") || print "Could not complete find command\n"; while () { @@ -76,7 +76,7 @@ open(H_FILE, "$h_file") || print "Could not open $_\n"; while () { if (/$header/) { - print OFFENSIVE $header ." --> ". $h_file ; + print OFFENSIVE $header ." --> ". $h_file ; } } close (H_FILE); diff --git a/contrib/fix_headers.pl b/contrib/fix_headers.pl index 33c4b8b14fc..c2527678f5d 100755 --- a/contrib/fix_headers.pl +++ b/contrib/fix_headers.pl @@ -6,14 +6,14 @@ # Copyright (c) 2004-2005 The University of Tennessee and The University # of Tennessee Research Foundation. All rights # reserved. -# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, # University of Stuttgart. All rights reserved. # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. # $COPYRIGHT$ -# +# # Additional copyrights may follow -# +# # $HEADER$ # @@ -48,7 +48,7 @@ $protection = "HAVE_" . $protection; print $protection; - + $string_to_replace = "#ifdef $protection$_#endif\n"; print $string_to_replace; @@ -70,7 +70,7 @@ } } close (C_FILE); - if ($protected == 0) { + if ($protected == 0) { #this file is not yet protected open(C_FILE, "$c_file") || print "Open failed on $c_file\n"; open(TEMP, "> $temp_file") || print "Open failed on temp.c \n"; @@ -113,7 +113,7 @@ } } close (H_FILE); - if ($protected == 0) { + if ($protected == 0) { #this file is not yet protected open(H_FILE, "$h_file") || print "Open failed on $h_file\n"; open(TEMP, "> $temp_file") || print "Open failed on temp.c \n"; diff --git a/contrib/fix_indent.pl b/contrib/fix_indent.pl index 895aa5e7569..d0c13586783 100755 --- a/contrib/fix_indent.pl +++ b/contrib/fix_indent.pl @@ -6,14 +6,14 @@ # Copyright (c) 2004-2005 The University of Tennessee and The University # of Tennessee Research Foundation. All rights # reserved. -# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, # University of Stuttgart. All rights reserved. # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. # $COPYRIGHT$ -# +# # Additional copyrights may follow -# +# # $HEADER$ # #To keep brian happy diff --git a/contrib/gen_stats.pl b/contrib/gen_stats.pl index 17485ecec8a..82252196633 100755 --- a/contrib/gen_stats.pl +++ b/contrib/gen_stats.pl @@ -3,16 +3,16 @@ # Currently, we implement only the directory option # The following features have been requested for # -# 1. Get the name of the directory and produce a +# 1. Get the name of the directory and produce a # statistic for all the files which have been touched -# in all the subdirectories starting from that +# in all the subdirectories starting from that # directory # # 2. Since gcov spits out a statistic for all the header -# files included by all the source files, we need to +# files included by all the source files, we need to # aggregate them somehow into a single file. This might # have to be done manually by going through all the lines -# which have been executed in that file since a header +# which have been executed in that file since a header # file may have multiple inclusions meaning that they # might have multiple .gcov files in different directories # @@ -54,25 +54,25 @@ # process the arguments while($num_args > 0) { switch ($ARGV[$index]) { - case "-d" { + case "-d" { print DIR_FILE $ARGV[$index+1]; $index += 2; - $num_args -= 2; + $num_args -= 2; $dir_list_given = 1; } - case "-f" { + case "-f" { my $filename = `find . -name $ARGV[$index+1]`; print REQ_FILE $filename; $index += 2; - $num_args -= 2; + $num_args -= 2; $req_list_given = 1; } - case "-p" { + case "-p" { $percentage = $ARGV[$index]; $index += 2; - $num_args -= 2; + $num_args -= 2; } - else { + else { print "ERROR: Incorrect command line option\n"; exit(3); } @@ -95,14 +95,14 @@ get_file_list("./dir_list.txt", "touched_files.txt", "untouched_files.txt"); - + generate_stats("touched_files.txt", # file_list "coverage_stats.txt",# generic coverage numbers "percent_stats.txt", # files below a certain % $percentage, # percentage below which we report 1); # 1 to report } - + if (1 == $req_list_given) { generate_stats("req_list.txt", # file_list "req_stats.txt",# generic coverage @@ -118,7 +118,7 @@ sub get_file_list { print "ERROR: could not open directory listing\n"; exit(3); } - + while() { chomp(); my $c_files = `find $_ -name \"*.c\"`; @@ -128,7 +128,7 @@ sub get_file_list { $c_files =~ s/\.c//g; $c_files = $c_files . $cc_files; my @C_FILES = split(/\n/, $c_files); - + my $da_files = `find $_ -name \"*.da\" -o -name \"*.gcda\"`; $da_files =~ s/\.gcda//g; $da_files =~ s/\.da//g; @@ -140,12 +140,12 @@ sub get_file_list { print TEMP2 $da_files; close(TEMP1); close(TEMP2); - - # Now do the manual diff + + # Now do the manual diff open(TEMP1, "< temp1"); open(UNTOUCHED_FILES, ">> $untouched"); open(TOUCHED_FILES, ">> $touched"); - + while() { my $c_file = $_; my $found = 0; @@ -183,7 +183,7 @@ sub get_file_list { system("sort $touched -o temp; uniq temp $touched"); system("sort $untouched -o temp; uniq temp $untouched"); } - + # This is the function which generates the statistics and dumps it out # to a file. Details are pretty straightforward at this point @@ -192,7 +192,7 @@ sub generate_stats { my $k = 0; my $l = 0; - open (INPUT, "< $input_file"); + open (INPUT, "< $input_file"); open (COVERAGE, "> $coverage_file"); if ($calculate == 1) { open (PERCENT, "> percent_coverage.txt"); @@ -200,7 +200,7 @@ sub generate_stats { print COVERAGE "#Index Directory Filename Usage(%)\n"; print COVERAGE "#======================================================================================================\n"; - + if ($calculate == 1) { print PERCENT "#Index Directory Filename Usage(%)\n"; print PERCENT "#======================================================================================================\n"; @@ -216,7 +216,7 @@ sub generate_stats { #1. Get the directory name and filename seperately #2. Invoke gcov on the file #3. Print the statistic onto a file - + chomp(); my $full_name = $_; my $dir_name; @@ -229,7 +229,7 @@ sub generate_stats { open(RESULT, "cd $dir_name; gcov $file_gcda -o .libs 2> /dev/null | "); while () { - if (/Creating/) { $found_file = 0; } + if (/Creating/) { $found_file = 0; } else { # print "check: ", $_; # Do not check including the file_extension; might be .c or .cc or .C @@ -242,12 +242,12 @@ sub generate_stats { if (/^Lines/ && $found_file == 1) { # print "Found Lines:\n", $_; s/([\s,0-9]*\.[0-9]+\%)\.*/$1/; - my $val = $1; + my $val = $1; $average += $val; $k++; my $print_string = sprintf("%4d %40s %40s %3.2f\n", $k, $dir_name, $file_name, $val); if ($calculate == 1) { - if ($val <= $percentage) { + if ($val <= $percentage) { $l++; my $zero_string = sprintf("%4d %40s %40s %3.2f\n", $l, $dir_name, $file_name, $val); print PERCENT $zero_string; @@ -258,7 +258,7 @@ sub generate_stats { # Need to detect the next round $found_file = 0; } - } + } } close(RESULT); } diff --git a/contrib/generate_file_list.pl b/contrib/generate_file_list.pl index be166f8c74e..4737f87205e 100755 --- a/contrib/generate_file_list.pl +++ b/contrib/generate_file_list.pl @@ -6,14 +6,14 @@ # Copyright (c) 2004-2005 The University of Tennessee and The University # of Tennessee Research Foundation. All rights # reserved. -# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, # University of Stuttgart. All rights reserved. # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. # $COPYRIGHT$ -# +# # Additional copyrights may follow -# +# # $HEADER$ # diff --git a/contrib/git/build-gitignore.pl b/contrib/git/build-gitignore.pl index 57371b37252..3f7a0f86e45 100755 --- a/contrib/git/build-gitignore.pl +++ b/contrib/git/build-gitignore.pl @@ -5,7 +5,7 @@ # $COPYRIGHT$ # # Dumb script to run through all the svn:ignore's in the tree and -# build build .gitignore files for Git. +# build build .gitignore files for Git. use strict; @@ -62,7 +62,7 @@ close(IN); } } - + foreach my $val (@git) { print OUT "$val\n"; } @@ -92,7 +92,7 @@ static-components.h *\\\#/; unshift(@globals, "# Automatically generated by build-gitignore.pl; edits may be lost!"); - + # add the globals */ foreach my $val (@globals) { @@ -142,7 +142,7 @@ sub process { chomp; push(@git, $_); } - + close(IN); } @@ -166,7 +166,7 @@ sub process { last; } } - next + next if ($skip); push(@git, "$line"); @@ -182,7 +182,7 @@ sub process { } } - + # Now find subdirectories in this directory my @entries; opendir(DIR, $dir) || die "Cannot open directory \"$dir\" for reading: $!"; diff --git a/contrib/git/update-git-svn.sh b/contrib/git/update-git-svn.sh index 845398f75ce..c8d84ca9693 100755 --- a/contrib/git/update-git-svn.sh +++ b/contrib/git/update-git-svn.sh @@ -96,7 +96,7 @@ doit $GIT18 push $force $github_url :trunk if [ -d $local_fs_git_mirror ]; then - for repo in ompi.git ompi-replication.git; do + for repo in ompi.git ompi-replication.git; do if [ -d $local_fs_git_mirror/$repo ]; then doit $GIT18 push $force $local_fs_git_mirror/$repo '*:*' doit $GIT18 push $force $local_fs_git_mirror/$repo :trunk diff --git a/contrib/header_replacement.sh b/contrib/header_replacement.sh index 7cebe5ae5bc..1bf873143fa 100755 --- a/contrib/header_replacement.sh +++ b/contrib/header_replacement.sh @@ -12,7 +12,7 @@ # All rights reserved. # Copyright (c) 2009 Oak Ridge National Labs. All rights reserved. # -# orte_show_help_replacement.sh Script to detect occurences of +# orte_show_help_replacement.sh Script to detect occurences of # #include "orte/util/show_help.h", where actually either # 1. #include "opal/util/output.h" # 2. #include "orte/mca/rml/rml_types.h" diff --git a/contrib/infrastructure/README-setup-github-diff-emails.txt b/contrib/infrastructure/README-setup-github-diff-emails.txt index 738de6b1d48..c5ad447722f 100644 --- a/contrib/infrastructure/README-setup-github-diff-emails.txt +++ b/contrib/infrastructure/README-setup-github-diff-emails.txt @@ -31,13 +31,13 @@ Still in that same config file, add a line in the remote "email" section: Now exit the editor and remove the tracking branch of the origin: - git branch -d -r origin/HEAD + git branch -d -r origin/HEAD IF YOU SCREW UP AND "push" to the email remote before deleting the origin/HEAD tracking branch, then run the "git branch -d ..." and the following (both in the original repo, not the bare repo): - git push email :refs/heads/HEAD + git push email :refs/heads/HEAD Copy the hooks/post-receive script from any of the other bare repos to the hooks subdir in this bare repo (e.g., from diff --git a/contrib/infrastructure/crontab-mpiteam-at-lion.crest.iu.edu b/contrib/infrastructure/crontab-mpiteam-at-lion.crest.iu.edu index 8aef2f6692f..ce10cc1d2b0 100644 --- a/contrib/infrastructure/crontab-mpiteam-at-lion.crest.iu.edu +++ b/contrib/infrastructure/crontab-mpiteam-at-lion.crest.iu.edu @@ -1,7 +1,7 @@ ######################### # Update the OSL FT webpage # -# Everyday @ 1:00 am +# Everyday @ 1:00 am # 0 1 * * * (cd /l/osl/www/www.osl.iu.edu/research/ft && svn up ) 1> /dev/null 2> /dev/null ######################### diff --git a/contrib/nightly/create_tarball.sh b/contrib/nightly/create_tarball.sh index 31f8186a706..b29fe564755 100755 --- a/contrib/nightly/create_tarball.sh +++ b/contrib/nightly/create_tarball.sh @@ -10,7 +10,7 @@ # University of Stuttgart. All rights reserved. # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. -# Copyright (c) 2006-2014 Cisco Systems, Inc. All rights reserved. +# Copyright (c) 2006-2017 Cisco Systems, Inc. All rights reserved. # $COPYRIGHT$ # # Additional copyrights may follow @@ -34,7 +34,7 @@ gitbranch=$5 # Set this to any value for additional output; typically only when # debugging -debug=1 +: ${debug:=} # do you want a success mail? want_success_mail=1 @@ -174,46 +174,51 @@ do_command "git clone $giturl ompi" cd ompi do_command "git checkout $gitbranch" -# Find the "git describe" string for this branch (remove a leading "ompi-" -# prefix, if there is one). -describe=`git describe --tags --always | sed -e s/^ompi-//` +# Nightly tarballs are named in this format: +# openmpi-${BRANCHNAME}-${YYYYMMDDHHMM}-${SHORTHASH}.tar.${COMPRESSION} +timestamp=`date '+%Y%m%d%H%M'` +githash=`git log -n 1 '--pretty=format:%h'` +version="$gitbranch-$timestamp-$githash" if test -n "$debug"; then - echo "** found $gitbranch describe: $describe" + echo "*** This snapshot version: $version" fi -version=$describe # if there's a $destdir/latest_snapshot.txt, see if anything has -# happened since the describe listed in that file +# happened since the version listed in that file if test -f "$destdir/latest_snapshot.txt"; then - snapshot_describe=`cat $destdir/latest_snapshot.txt` + snapshot_version=`cat $destdir/latest_snapshot.txt` if test -n "$debug"; then - echo "** last snapshot describe: $snapshot_describe" + echo "*** Last snapshot version: $snapshot_version" fi # Do we need a new snapshot? - if test "$describe" = "$snapshot_describe"; then - if test -n "$debug"; then - echo "** git $gitbranch describe is same as latest_snapshot -- not doing anything" - fi - # Since we didn't do anything, there's no point in leaving the clone we - # just created - cd .. - rm -rf $clone_root - - # All done... nothing to see here... - exit 0 + # Snip the timestamp out of the versions and compare just + # ${BRANCHNAME}-${SHORTHASH}. + compare_version="$gitbranch-$githash" + compare_snapshot_version=`echo $snapshot_version | perl -p -e 's/^(.+?)-(\d+)-(.*+)$/$1-$3/'` + if test "$compare_version" = "$compare_snapshot_version"; then + if test -n "$debug"; then + echo "*** Our branch/git hash is the same as the last snapshot -- not doing anything" + fi + # Since we didn't do anything, there's no point in leaving the clone we + # just created + cd .. + rm -rf $clone_root + + # All done... nothing to see here... + exit 0 fi fi if test -n "$debug"; then - echo "** making snapshot for describe: $describe" + echo "*** Houston: we're a go to make snapshot $version" fi # Ensure that VERSION is set to indicate that it wants a snapshot, and # insert the actual value that we want (so that ompi_get_version.sh # will report exactly that version). -sed -e 's/^repo_rev=.*/repo_rev='$describe/ \ - -e 's/^tarball_version=.*/tarball_version='$describe/ \ +sed -e 's/^repo_rev=.*/repo_rev='$githash/ \ + -e 's/^tarball_version=.*/tarball_version='$version/ \ VERSION > VERSION.new cp -f VERSION.new VERSION rm -f VERSION.new @@ -225,7 +230,7 @@ USER="ompibuilder" export USER # autogen is our friend -do_command "./autogen.pl" +do_command "./autogen.pl --force" # do config do_command "./configure" diff --git a/contrib/nightly/reports/check_devel_headers.pl b/contrib/nightly/reports/check_devel_headers.pl index 0b14cef51cf..3e647de8d18 100755 --- a/contrib/nightly/reports/check_devel_headers.pl +++ b/contrib/nightly/reports/check_devel_headers.pl @@ -6,15 +6,15 @@ # Copyright (c) 2004-2005 The University of Tennessee and The University # of Tennessee Research Foundation. All rights # reserved. -# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, # University of Stuttgart. All rights reserved. # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. # Copyright (c) 2015 Cisco Systems, Inc. All rights reserved. # $COPYRIGHT$ -# +# # Additional copyrights may follow -# +# # $HEADER$ # # Check headers in the core opal, orte, and ompi trees and ensure that @@ -52,10 +52,10 @@ } sub wanted { - # don't process directories or links, and dont' recurse down + # don't process directories or links, and dont' recurse down # "special" directories if ( -l $_ ) { return; } - if ( -d $_ ) { + if ( -d $_ ) { if ((/\.svn/) || (/\.deps/) || (/\.libs/)) { $File::Find::prune = 1; } @@ -82,7 +82,7 @@ sub wanted { } else { return; } - } + } # We don't want any of the tools headers elsif ($parts[1] eq "tools") { return; diff --git a/contrib/nightly/reports/illegal_symbols_report.pl b/contrib/nightly/reports/illegal_symbols_report.pl index 94166d9fa2b..e8c44930bdd 100755 --- a/contrib/nightly/reports/illegal_symbols_report.pl +++ b/contrib/nightly/reports/illegal_symbols_report.pl @@ -6,14 +6,14 @@ # Copyright (c) 2004-2005 The University of Tennessee and The University # of Tennessee Research Foundation. All rights # reserved. -# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, # University of Stuttgart. All rights reserved. # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. # $COPYRIGHT$ -# +# # Additional copyrights may follow -# +# # $HEADER$ # # Look for public symbols in Open MPI libraries and components that @@ -130,7 +130,7 @@ sub check_files { # Only look for symbols that are a) global [i.e., # uppercase scope], b) not U, V, or W - if ($scope =~ /[A-Z]/ && + if ($scope =~ /[A-Z]/ && $scope !~ /[UVW]/ && $symbol !~ /^_/) { @@ -206,7 +206,7 @@ sub mail_symbols { print $mail " --> $symbol->{symbol}\n"; } } - + } print $mail "\n"; } @@ -309,7 +309,7 @@ sub mail_symbols { if ($$bad_libsymbols) { mail_symbols($bad_libsymbols, *MAIL{IO}); } - + print MAIL "\nYour friendly server,\nCyrador\n"; close MAIL; } diff --git a/contrib/ompi_branch_check_revisions-v1.5.txt b/contrib/ompi_branch_check_revisions-v1.5.txt index ca475a328f7..2837e719e38 100644 --- a/contrib/ompi_branch_check_revisions-v1.5.txt +++ b/contrib/ompi_branch_check_revisions-v1.5.txt @@ -1,5 +1,5 @@ # -# Input file for script ompi_branch_check_revisions.sh +# Input file for script ompi_branch_check_revisions.sh # here for the v1.5 branch # # Contains optional notes for revisions, about: diff --git a/contrib/openmpi-valgrind.supp b/contrib/openmpi-valgrind.supp index 46e9bfc4e9b..0a3ba945658 100644 --- a/contrib/openmpi-valgrind.supp +++ b/contrib/openmpi-valgrind.supp @@ -6,14 +6,14 @@ # Copyright (c) 2004-2005 The University of Tennessee and The University # of Tennessee Research Foundation. All rights # reserved. -# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, # University of Stuttgart. All rights reserved. # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. # $COPYRIGHT$ -# +# # Additional copyrights may follow -# +# # $HEADER$ # @@ -67,7 +67,7 @@ # ############################################################### -# inet_ntoa on linux mallocs a static buffer. We can't free +# inet_ntoa on linux mallocs a static buffer. We can't free # it, so we have to live with it { linux_inet_ntoa diff --git a/contrib/platform/embedded/build_embedded.sh b/contrib/platform/embedded/build_embedded.sh index c2e05249391..e07c79ff911 100755 --- a/contrib/platform/embedded/build_embedded.sh +++ b/contrib/platform/embedded/build_embedded.sh @@ -1,4 +1,4 @@ -#! /usr/bin/env bash +#! /usr/bin/env bash # if (( $# < 2 )) ; then diff --git a/contrib/platform/embedded/gen_embedded.sh b/contrib/platform/embedded/gen_embedded.sh index 45f5e913e1d..9f35cfb8d94 100755 --- a/contrib/platform/embedded/gen_embedded.sh +++ b/contrib/platform/embedded/gen_embedded.sh @@ -1,4 +1,4 @@ -#! /usr/bin/env bash +#! /usr/bin/env bash # if (( $# < 2 )) ; then diff --git a/contrib/platform/hadoop/cisco.conf b/contrib/platform/hadoop/cisco.conf index 3af96eeaddf..27a5caba7fd 100644 --- a/contrib/platform/hadoop/cisco.conf +++ b/contrib/platform/hadoop/cisco.conf @@ -1,9 +1,9 @@ # # Copyright (c) 2009 Cisco Systems, Inc. All rights reserved. # $COPYRIGHT$ -# +# # Additional copyrights may follow -# +# # $HEADER$ # @@ -49,7 +49,7 @@ # # Basic behavior to smooth startup -orte_abort_timeout = 10 +orte_abort_timeout = 10 opal_set_max_sys_limits = 1 ## Add the interface for out-of-band communication diff --git a/contrib/platform/hadoop/linux.conf b/contrib/platform/hadoop/linux.conf index 3af96eeaddf..27a5caba7fd 100644 --- a/contrib/platform/hadoop/linux.conf +++ b/contrib/platform/hadoop/linux.conf @@ -1,9 +1,9 @@ # # Copyright (c) 2009 Cisco Systems, Inc. All rights reserved. # $COPYRIGHT$ -# +# # Additional copyrights may follow -# +# # $HEADER$ # @@ -49,7 +49,7 @@ # # Basic behavior to smooth startup -orte_abort_timeout = 10 +orte_abort_timeout = 10 opal_set_max_sys_limits = 1 ## Add the interface for out-of-band communication diff --git a/contrib/platform/hadoop/mac.conf b/contrib/platform/hadoop/mac.conf index 1630024e29c..60d86a4ca7c 100644 --- a/contrib/platform/hadoop/mac.conf +++ b/contrib/platform/hadoop/mac.conf @@ -5,15 +5,15 @@ # Copyright (c) 2004-2005 The University of Tennessee and The University # of Tennessee Research Foundation. All rights # reserved. -# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, # University of Stuttgart. All rights reserved. # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. # Copyright (c) 2006-2011 Cisco Systems, Inc. All rights reserved. # $COPYRIGHT$ -# +# # Additional copyrights may follow -# +# # $HEADER$ # @@ -60,7 +60,7 @@ # Basic behavior to smooth startup mca_component_show_load_errors = 0 -orte_abort_timeout = 10 +orte_abort_timeout = 10 ## Add the interface for out-of-band communication ## and set it up diff --git a/contrib/platform/ibm/debug-power6-gcc b/contrib/platform/ibm/debug-power6-gcc index 8f5fbcdf35f..81c3651a053 100644 --- a/contrib/platform/ibm/debug-power6-gcc +++ b/contrib/platform/ibm/debug-power6-gcc @@ -4,8 +4,8 @@ enable_debug=yes enable_mem_profile=no enable_contrib_no_build=libnbc enable_ft_thread=no -with_verbs=/usr -CXXFLAGS="-m64 -mcpu=power6 -mtune=power6 -O0 -g3 -ggdb" +with_verbs=/usr +CXXFLAGS="-m64 -mcpu=power6 -mtune=power6 -O0 -g3 -ggdb" CCASFLAGS="-m64 -mcpu=power6 -mtune=power6 -O0 -g3 -ggdb" FCFLAGS="-m64 -mcpu=power6 -mtune=power6 -O0 -g3 -ggdb" CFLAGS="-m64 -mcpu=power6 -mtune=power6 -O0 -g3 -ggdb" diff --git a/contrib/platform/ibm/debug-power7-gcc b/contrib/platform/ibm/debug-power7-gcc index 20ac4c5353c..d4e0f828e12 100644 --- a/contrib/platform/ibm/debug-power7-gcc +++ b/contrib/platform/ibm/debug-power7-gcc @@ -4,8 +4,8 @@ enable_debug=yes enable_mem_profile=no enable_contrib_no_build=libnbc enable_ft_thread=no -with_verbs=/usr -CXXFLAGS="-m64 -mcpu=power7 -mtune=power7 -O0 -g3 -ggdb" +with_verbs=/usr +CXXFLAGS="-m64 -mcpu=power7 -mtune=power7 -O0 -g3 -ggdb" CCASFLAGS="-m64 -mcpu=power7 -mtune=power7 -O0 -g3 -ggdb" FCFLAGS="-m64 -mcpu=power7 -mtune=power7 -O0 -g3 -ggdb" CFLAGS="-m64 -mcpu=power7 -mtune=power7 -O0 -g3 -ggdb" diff --git a/contrib/platform/ibm/debug-ppc32-gcc b/contrib/platform/ibm/debug-ppc32-gcc index 8b082e28af9..dc5e70e0d92 100644 --- a/contrib/platform/ibm/debug-ppc32-gcc +++ b/contrib/platform/ibm/debug-ppc32-gcc @@ -4,8 +4,8 @@ enable_debug=yes enable_mem_profile=no enable_contrib_no_build=libnbc enable_ft_thread=no -with_verbs=/usr -CXXFLAGS="-m32 -mcpu=powerpc64 -mtune=powerpc64 -O0 -g3 -ggdb" +with_verbs=/usr +CXXFLAGS="-m32 -mcpu=powerpc64 -mtune=powerpc64 -O0 -g3 -ggdb" CCASFLAGS="-m32 -mcpu=powerpc64 -mtune=powerpc64 -O0 -g3 -ggdb" FCFLAGS="-m32 -mcpu=powerpc64 -mtune=powerpc64 -O0 -g3 -ggdb" CFLAGS="-m32 -mcpu=powerpc64 -mtune=powerpc64 -O0 -g3 -ggdb" diff --git a/contrib/platform/ibm/debug-ppc64-gcc b/contrib/platform/ibm/debug-ppc64-gcc index b93d9cc35a1..07ad2acecf0 100644 --- a/contrib/platform/ibm/debug-ppc64-gcc +++ b/contrib/platform/ibm/debug-ppc64-gcc @@ -4,8 +4,8 @@ enable_debug=yes enable_mem_profile=no enable_contrib_no_build=libnbc enable_ft_thread=no -with_verbs=/usr -CXXFLAGS="-m64 -mcpu=powerpc64 -mtune=powerpc64 -O0 -g3 -ggdb" +with_verbs=/usr +CXXFLAGS="-m64 -mcpu=powerpc64 -mtune=powerpc64 -O0 -g3 -ggdb" CCASFLAGS="-m64 -mcpu=powerpc64 -mtune=powerpc64 -O0 -g3 -ggdb" FCFLAGS="-m64 -mcpu=powerpc64 -mtune=powerpc64 -O0 -g3 -ggdb" CFLAGS="-m64 -mcpu=powerpc64 -mtune=powerpc64 -O0 -g3 -ggdb" diff --git a/contrib/platform/ibm/optimized-power6-gcc b/contrib/platform/ibm/optimized-power6-gcc index 38645a4b468..23c8643a16a 100644 --- a/contrib/platform/ibm/optimized-power6-gcc +++ b/contrib/platform/ibm/optimized-power6-gcc @@ -3,10 +3,10 @@ enable_mem_profile=no enable_debug=no enable_contrib_no_build=libnbc enable_ft_thread=no -with_verbs=/usr +with_verbs=/usr enable_shared=yes enable_static=no -CXXFLAGS="-m64 -mcpu=power6 -mtune=power6 -O3" +CXXFLAGS="-m64 -mcpu=power6 -mtune=power6 -O3" CCASFLAGS="-m64 -mcpu=power6 -mtune=power6 -O3" FCFLAGS="-m64 -mcpu=power6 -mtune=power6 -O3" CFLAGS="-m64 -mcpu=power6 -mtune=power6 -O3" diff --git a/contrib/platform/ibm/optimized-power7-gcc b/contrib/platform/ibm/optimized-power7-gcc index d554d86a30b..89615e96355 100644 --- a/contrib/platform/ibm/optimized-power7-gcc +++ b/contrib/platform/ibm/optimized-power7-gcc @@ -3,10 +3,10 @@ enable_mem_profile=no enable_debug=no enable_contrib_no_build=libnbc enable_ft_thread=no -with_verbs=/usr +with_verbs=/usr enable_shared=yes enable_static=no -CXXFLAGS="-m64 -mcpu=power7 -mtune=power7 -O3" +CXXFLAGS="-m64 -mcpu=power7 -mtune=power7 -O3" CCASFLAGS="-m64 -mcpu=power7 -mtune=power7 -O3" FCFLAGS="-m64 -mcpu=power7 -mtune=power7 -O3" CFLAGS="-m64 -mcpu=power7 -mtune=power7 -O3" diff --git a/contrib/platform/ibm/optimized-ppc32-gcc b/contrib/platform/ibm/optimized-ppc32-gcc index a41f6c74115..beabe18930a 100644 --- a/contrib/platform/ibm/optimized-ppc32-gcc +++ b/contrib/platform/ibm/optimized-ppc32-gcc @@ -3,10 +3,10 @@ enable_mem_profile=no enable_debug=no enable_contrib_no_build=libnbc enable_ft_thread=no -with_verbs=/usr +with_verbs=/usr enable_shared=yes enable_static=no -CXXFLAGS="-m32 -mcpu=powerpc64 -mtune=powerpc64 -O3" +CXXFLAGS="-m32 -mcpu=powerpc64 -mtune=powerpc64 -O3" CCASFLAGS="-m32 -mcpu=powerpc64 -mtune=powerpc64 -O3" FCFLAGS="-m32 -mcpu=powerpc64 -mtune=powerpc64 -O3" CFLAGS="-m32 -mcpu=powerpc64 -mtune=powerpc64 -O3" diff --git a/contrib/platform/ibm/optimized-ppc64-gcc b/contrib/platform/ibm/optimized-ppc64-gcc index 62e40fb773a..54aba5e0ab7 100644 --- a/contrib/platform/ibm/optimized-ppc64-gcc +++ b/contrib/platform/ibm/optimized-ppc64-gcc @@ -3,7 +3,7 @@ enable_mem_profile=no enable_debug=no enable_contrib_no_build=libnbc enable_ft_thread=no -with_verbs=/usr +with_verbs=/usr enable_shared=yes enable_static=no CXXFLAGS="-m64 -mcpu=powerpc64 -mtune=powerpc64 -O3" diff --git a/contrib/platform/intel/bend/linux b/contrib/platform/intel/bend/linux index e9f0de71097..da7989aac35 100644 --- a/contrib/platform/intel/bend/linux +++ b/contrib/platform/intel/bend/linux @@ -18,6 +18,7 @@ enable_cxx_exceptions=no enable_mpi_java=yes enable_io_romio=no enable_contrib_no_build=libnbc +enable_mca_no_build=btl-usnic with_memory_manager=no with_tm=no with_devel_headers=yes diff --git a/contrib/platform/intel/bend/linux-optimized.conf b/contrib/platform/intel/bend/linux-optimized.conf index 4264a402376..f3ec12ddcb8 100644 --- a/contrib/platform/intel/bend/linux-optimized.conf +++ b/contrib/platform/intel/bend/linux-optimized.conf @@ -5,15 +5,15 @@ # Copyright (c) 2004-2005 The University of Tennessee and The University # of Tennessee Research Foundation. All rights # reserved. -# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, # University of Stuttgart. All rights reserved. # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. # Copyright (c) 2006 Cisco Systems, Inc. All rights reserved. # $COPYRIGHT$ -# +# # Additional copyrights may follow -# +# # $HEADER$ # @@ -60,18 +60,18 @@ # Basic behavior to smooth startup mca_base_component_show_load_errors = 1 -orte_abort_timeout = 10 +orte_abort_timeout = 10 hwloc_base_mem_bind_failure_action = silent ## Protect the shared file systems ## Add the interface for out-of-band communication ## and set it up -oob_tcp_peer_retries = 120 -#oob_tcp_connect_timeout=600 +oob_tcp_peer_retries = 120 +#oob_tcp_connect_timeout=600 ## Define the MPI interconnects -btl = sm,tcp,self +btl = sm,tcp,self ## Setup shared memory btl_sm_free_list_max = 768 diff --git a/contrib/platform/intel/bend/linux-orcm.conf b/contrib/platform/intel/bend/linux-orcm.conf index 1f5150c2be1..63889a7093f 100644 --- a/contrib/platform/intel/bend/linux-orcm.conf +++ b/contrib/platform/intel/bend/linux-orcm.conf @@ -5,15 +5,15 @@ # Copyright (c) 2004-2005 The University of Tennessee and The University # of Tennessee Research Foundation. All rights # reserved. -# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, # University of Stuttgart. All rights reserved. # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. # Copyright (c) 2006 Cisco Systems, Inc. All rights reserved. # $COPYRIGHT$ -# +# # Additional copyrights may follow -# +# # $HEADER$ # @@ -65,19 +65,19 @@ # Basic behavior to smooth startup mca_base_component_show_load_errors = 1 -mpi_param_check = 0 -orte_abort_timeout = 10 +mpi_param_check = 0 +orte_abort_timeout = 10 hwloc_base_mem_bind_failure_action = silent ## Protect the shared file systems ## Add the interface for out-of-band communication ## and set it up -oob_tcp_peer_retries = 120 -#oob_tcp_connect_timeout=600 +oob_tcp_peer_retries = 120 +#oob_tcp_connect_timeout=600 ## Define the MPI interconnects -btl = sm,tcp,self +btl = sm,tcp,self ## Setup shared memory btl_sm_free_list_max = 768 diff --git a/contrib/platform/intel/bend/linux.conf b/contrib/platform/intel/bend/linux.conf index 1f5150c2be1..b8b8194e5e3 100644 --- a/contrib/platform/intel/bend/linux.conf +++ b/contrib/platform/intel/bend/linux.conf @@ -5,15 +5,15 @@ # Copyright (c) 2004-2005 The University of Tennessee and The University # of Tennessee Research Foundation. All rights # reserved. -# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, # University of Stuttgart. All rights reserved. # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. # Copyright (c) 2006 Cisco Systems, Inc. All rights reserved. # $COPYRIGHT$ -# +# # Additional copyrights may follow -# +# # $HEADER$ # @@ -58,26 +58,8 @@ # parameters available and their default values. # -#default hostfile -#orte_default_hostfile = /home/common/hosts -#ras_slurm_enable_dyn_alloc = 1 -#ras_slurm_config_file = /home/common/slurm/conf/slurm.conf - # Basic behavior to smooth startup mca_base_component_show_load_errors = 1 -mpi_param_check = 0 -orte_abort_timeout = 10 +orte_abort_timeout = 10 hwloc_base_mem_bind_failure_action = silent -## Protect the shared file systems - -## Add the interface for out-of-band communication -## and set it up -oob_tcp_peer_retries = 120 -#oob_tcp_connect_timeout=600 - -## Define the MPI interconnects -btl = sm,tcp,self - -## Setup shared memory -btl_sm_free_list_max = 768 diff --git a/contrib/platform/intel/bend/mac-optimized.conf b/contrib/platform/intel/bend/mac-optimized.conf index 851342ef538..d3ed4f556d2 100644 --- a/contrib/platform/intel/bend/mac-optimized.conf +++ b/contrib/platform/intel/bend/mac-optimized.conf @@ -5,15 +5,15 @@ # Copyright (c) 2004-2005 The University of Tennessee and The University # of Tennessee Research Foundation. All rights # reserved. -# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, # University of Stuttgart. All rights reserved. # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. # Copyright (c) 2006 Cisco Systems, Inc. All rights reserved. # $COPYRIGHT$ -# +# # Additional copyrights may follow -# +# # $HEADER$ # @@ -60,19 +60,19 @@ # Basic behavior to smooth startup mca_base_component_show_load_errors = 0 -mpi_param_check = 0 -orte_abort_timeout = 10 +mpi_param_check = 0 +orte_abort_timeout = 10 hwloc_base_mem_bind_failure_action = silent ## Protect the shared file systems ## Add the interface for out-of-band communication ## and set it up -oob_tcp_peer_retries = 120 -#oob_tcp_connect_timeout=600 +oob_tcp_peer_retries = 120 +#oob_tcp_connect_timeout=600 ## Define the MPI interconnects -btl = sm,tcp,self +btl = sm,tcp,self ## Setup shared memory btl_sm_free_list_max = 768 diff --git a/contrib/platform/intel/bend/mac-orcm.conf b/contrib/platform/intel/bend/mac-orcm.conf index 3d25f512863..b8b8194e5e3 100644 --- a/contrib/platform/intel/bend/mac-orcm.conf +++ b/contrib/platform/intel/bend/mac-orcm.conf @@ -5,15 +5,15 @@ # Copyright (c) 2004-2005 The University of Tennessee and The University # of Tennessee Research Foundation. All rights # reserved. -# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, # University of Stuttgart. All rights reserved. # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. # Copyright (c) 2006 Cisco Systems, Inc. All rights reserved. # $COPYRIGHT$ -# +# # Additional copyrights may follow -# +# # $HEADER$ # @@ -60,19 +60,6 @@ # Basic behavior to smooth startup mca_base_component_show_load_errors = 1 -mpi_param_check = 0 -orte_abort_timeout = 10 +orte_abort_timeout = 10 hwloc_base_mem_bind_failure_action = silent -## Protect the shared file systems - -## Add the interface for out-of-band communication -## and set it up -oob_tcp_peer_retries = 120 -#oob_tcp_connect_timeout=600 - -## Define the MPI interconnects -btl = sm,tcp,self - -## Setup shared memory -btl_sm_free_list_max = 768 diff --git a/contrib/platform/intel/bend/mac.conf b/contrib/platform/intel/bend/mac.conf index 3d25f512863..bec396b8324 100644 --- a/contrib/platform/intel/bend/mac.conf +++ b/contrib/platform/intel/bend/mac.conf @@ -5,15 +5,15 @@ # Copyright (c) 2004-2005 The University of Tennessee and The University # of Tennessee Research Foundation. All rights # reserved. -# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, # University of Stuttgart. All rights reserved. # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. # Copyright (c) 2006 Cisco Systems, Inc. All rights reserved. # $COPYRIGHT$ -# +# # Additional copyrights may follow -# +# # $HEADER$ # @@ -60,19 +60,5 @@ # Basic behavior to smooth startup mca_base_component_show_load_errors = 1 -mpi_param_check = 0 -orte_abort_timeout = 10 +orte_abort_timeout = 10 hwloc_base_mem_bind_failure_action = silent - -## Protect the shared file systems - -## Add the interface for out-of-band communication -## and set it up -oob_tcp_peer_retries = 120 -#oob_tcp_connect_timeout=600 - -## Define the MPI interconnects -btl = sm,tcp,self - -## Setup shared memory -btl_sm_free_list_max = 768 diff --git a/contrib/platform/iu/odin/debug-nopmi.conf b/contrib/platform/iu/odin/debug-nopmi.conf index 2116035dff2..a509ae11214 100644 --- a/contrib/platform/iu/odin/debug-nopmi.conf +++ b/contrib/platform/iu/odin/debug-nopmi.conf @@ -5,15 +5,15 @@ # Copyright (c) 2004-2005 The University of Tennessee and The University # of Tennessee Research Foundation. All rights # reserved. -# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, # University of Stuttgart. All rights reserved. # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. # Copyright (c) 2006 Cisco Systems, Inc. All rights reserved. # $COPYRIGHT$ -# +# # Additional copyrights may follow -# +# # $HEADER$ # @@ -60,20 +60,20 @@ # Basic behavior to smooth startup mca_component_show_load_errors = 0 -mpi_param_check = 0 -orte_abort_timeout = 10 +mpi_param_check = 0 +orte_abort_timeout = 10 hwloc_base_mem_bind_failure_action = silent ## Protect the shared file systems ## Add the interface for out-of-band communication ## and set it up -oob_tcp_peer_retries = 120 -oob_tcp_disable_family = IPv6 -#oob_tcp_connect_timeout=600 +oob_tcp_peer_retries = 120 +oob_tcp_disable_family = IPv6 +#oob_tcp_connect_timeout=600 ## Define the MPI interconnects -btl = sm,tcp,self +btl = sm,tcp,self ## Setup shared memory btl_sm_free_list_max = 768 diff --git a/contrib/platform/iu/odin/debug.conf b/contrib/platform/iu/odin/debug.conf index 1d92d661018..43fbf4b825e 100644 --- a/contrib/platform/iu/odin/debug.conf +++ b/contrib/platform/iu/odin/debug.conf @@ -5,15 +5,15 @@ # Copyright (c) 2004-2005 The University of Tennessee and The University # of Tennessee Research Foundation. All rights # reserved. -# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, # University of Stuttgart. All rights reserved. # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. # Copyright (c) 2006 Cisco Systems, Inc. All rights reserved. # $COPYRIGHT$ -# +# # Additional copyrights may follow -# +# # $HEADER$ # @@ -60,19 +60,19 @@ # Basic behavior to smooth startup mca_base_component_show_load_errors = 1 -orte_abort_timeout = 10 +orte_abort_timeout = 10 hwloc_base_mem_bind_failure_action = silent ## Protect the shared file systems ## Add the interface for out-of-band communication ## and set it up -oob_tcp_peer_retries = 120 -oob_tcp_disable_family = IPv6 -#oob_tcp_connect_timeout=600 +oob_tcp_peer_retries = 120 +oob_tcp_disable_family = IPv6 +#oob_tcp_connect_timeout=600 ## Define the MPI interconnects -btl = sm,openib,self +btl = sm,openib,self btl_openib_cpc_include = udcm ## Setup shared memory diff --git a/contrib/platform/iu/odin/optimized.conf b/contrib/platform/iu/odin/optimized.conf index 81f63b2ba2a..4b40fb0e6a1 100644 --- a/contrib/platform/iu/odin/optimized.conf +++ b/contrib/platform/iu/odin/optimized.conf @@ -5,15 +5,15 @@ # Copyright (c) 2004-2005 The University of Tennessee and The University # of Tennessee Research Foundation. All rights # reserved. -# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, # University of Stuttgart. All rights reserved. # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. # Copyright (c) 2006 Cisco Systems, Inc. All rights reserved. # $COPYRIGHT$ -# +# # Additional copyrights may follow -# +# # $HEADER$ # @@ -60,33 +60,33 @@ # Basic behavior to smooth startup mca_base_component_show_load_errors = 0 -mpi_param_check = 0 -orte_abort_timeout = 10 +mpi_param_check = 0 +orte_abort_timeout = 10 ## Protect the shared file systems ## Add the interface for out-of-band communication ## and set it up -oob_tcp_peer_retries = 120 -oob_tcp_disable_family = IPv6 -#oob_tcp_connect_timeout=600 +oob_tcp_peer_retries = 120 +oob_tcp_disable_family = IPv6 +#oob_tcp_connect_timeout=600 ## Define the MPI interconnects -btl = sm,openib,self +btl = sm,openib,self #mpi_leave_pinned = 1 ## Setup shared memory btl_sm_free_list_max = 768 ## Setup OpenIB -btl_openib_want_fork_support = 0 -#btl_openib_receive_queues = P,128,256,64,32,32:S,2048,1024,128,32:S,12288,1024,128,32:S,65536,1024,128,32 +btl_openib_want_fork_support = 0 +#btl_openib_receive_queues = P,128,256,64,32,32:S,2048,1024,128,32:S,12288,1024,128,32:S,65536,1024,128,32 ## Setup TCP btl_tcp_if_include = ib0 ## Configure the PML pml_ob1_use_early_completion = 0 - -## Enable cpu affinity -opal_paffinity_alone = 1 + +## Enable cpu affinity +opal_paffinity_alone = 1 diff --git a/contrib/platform/iu/odin/static.conf b/contrib/platform/iu/odin/static.conf index 7a21290b253..757c5323009 100644 --- a/contrib/platform/iu/odin/static.conf +++ b/contrib/platform/iu/odin/static.conf @@ -5,15 +5,15 @@ # Copyright (c) 2004-2005 The University of Tennessee and The University # of Tennessee Research Foundation. All rights # reserved. -# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, # University of Stuttgart. All rights reserved. # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. # Copyright (c) 2006 Cisco Systems, Inc. All rights reserved. # $COPYRIGHT$ -# +# # Additional copyrights may follow -# +# # $HEADER$ # @@ -60,33 +60,33 @@ # Basic behavior to smooth startup mca_component_show_load_errors = 0 -mpi_param_check = 0 -orte_abort_timeout = 10 +mpi_param_check = 0 +orte_abort_timeout = 10 ## Protect the shared file systems ## Add the interface for out-of-band communication ## and set it up -oob_tcp_peer_retries = 120 -oob_tcp_disable_family = IPv6 -#oob_tcp_connect_timeout=600 +oob_tcp_peer_retries = 120 +oob_tcp_disable_family = IPv6 +#oob_tcp_connect_timeout=600 ## Define the MPI interconnects -btl = sm,openib,self +btl = sm,openib,self #mpi_leave_pinned = 1 ## Setup shared memory btl_sm_free_list_max = 768 ## Setup OpenIB -btl_openib_want_fork_support = 0 -#btl_openib_receive_queues = P,128,256,64,32,32:S,2048,1024,128,32:S,12288,1024,128,32:S,65536,1024,128,32 +btl_openib_want_fork_support = 0 +#btl_openib_receive_queues = P,128,256,64,32,32:S,2048,1024,128,32:S,12288,1024,128,32:S,65536,1024,128,32 ## Setup TCP btl_tcp_if_include = ib0 ## Configure the PML pml_ob1_use_early_completion = 0 - -## Enable cpu affinity -opal_paffinity_alone = 1 + +## Enable cpu affinity +opal_paffinity_alone = 1 diff --git a/contrib/platform/lanl/cray_xc_cle5.2/debug-common b/contrib/platform/lanl/cray_xc_cle5.2/debug-common new file mode 100644 index 00000000000..4397ccf9d0f --- /dev/null +++ b/contrib/platform/lanl/cray_xc_cle5.2/debug-common @@ -0,0 +1,36 @@ +# do not use IB verbs +with_verbs=no + +enable_dlopen=no +enable_mem_profile=no +enable_binaries=yes +enable_heterogeneous=no +enable_shared=yes +enable_static=yes +enable_orte_static_ports=no +enable_ipv6=no +enable_mpi_fortran=yes + +# enable debugging +enable_mem_debug=yes +enable_debug_symbols=yes +enable_picky=yes +enable_debug=yes + +# disable c++ bindings (removed in MPI 3.0) +enable_mpi_cxx=no +enable_mpi_cxx_seek=no +enable_cxx_exceptions=no + +enable_per_user_config_files=no +enable_pty_support=no +enable_mca_no_build=crs,filem,routed-linear,snapc,pml-dr,pml-crcp2,pml-crcpw,pml-v,pml-example,crcp,pml-cm,ess-cnos,grpcomm-cnos,plm-rsh,btl-tcp,oob-ud,ras-simulator + +# enable direct calling for ob1 +enable_mca_direct=pml-ob1 + +# enable development headers +with_devel_headers=yes + +# disable valgrind +with_valgrind=no diff --git a/contrib/platform/lanl/cray_xc_cle5.2/debug-lustre b/contrib/platform/lanl/cray_xc_cle5.2/debug-lustre new file mode 100644 index 00000000000..2bedb1b7a33 --- /dev/null +++ b/contrib/platform/lanl/cray_xc_cle5.2/debug-lustre @@ -0,0 +1,19 @@ +# (c) 2012-2015 Los Alamos National Security, LLC. All rights reserved. +# Open MPI configuration for Cray XC v2.x GNU compiler, +# Lustre + +if test "$CC" = "cc" ; then + echo "ERROR: Open MPI should not be compiled with Cray's wrapper compilers (cc/CC/ftn)" + exit 1 +fi + +source debug-common + +if test "$CC" = "icc" ; then + source intel-common +else + source gnu-common +fi + +# enable and Lustre in romio +with_io_romio_flags="--with-file-system=ufs+nfs+lustre CFLAGS=-I/opt/cray/lustre-cray_gem_s/default/include" diff --git a/contrib/platform/lanl/cray_xc_cle5.2/debug-lustre.conf b/contrib/platform/lanl/cray_xc_cle5.2/debug-lustre.conf new file mode 100644 index 00000000000..657fc25a489 --- /dev/null +++ b/contrib/platform/lanl/cray_xc_cle5.2/debug-lustre.conf @@ -0,0 +1,104 @@ +# +# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana +# University Research and Technology +# Corporation. All rights reserved. +# Copyright (c) 2004-2005 The University of Tennessee and The University +# of Tennessee Research Foundation. All rights +# reserved. +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# University of Stuttgart. All rights reserved. +# Copyright (c) 2004-2005 The Regents of the University of California. +# All rights reserved. +# Copyright (c) 2006 Cisco Systems, Inc. All rights reserved. +# Copyright (c) 2015 Los Alamos National Security, LLC. +# All rights reserved. +# $COPYRIGHT$ +# +# Additional copyrights may follow +# +# $HEADER$ +# + +# This is the default system-wide MCA parameters defaults file. +# Specifically, the MCA parameter "mca_param_files" defaults to a +# value of +# "$HOME/.openmpi/mca-params.conf:$sysconf/openmpi-mca-params.conf" +# (this file is the latter of the two). So if the default value of +# mca_param_files is not changed, this file is used to set system-wide +# MCA parameters. This file can therefore be used to set system-wide +# default MCA parameters for all users. Of course, users can override +# these values if they want, but this file is an excellent location +# for setting system-specific MCA parameters for those users who don't +# know / care enough to investigate the proper values for them. + +# Note that this file is only applicable where it is visible (in a +# filesystem sense). Specifically, MPI processes each read this file +# during their startup to determine what default values for MCA +# parameters should be used. mpirun does not bundle up the values in +# this file from the node where it was run and send them to all nodes; +# the default value decisions are effectively distributed. Hence, +# these values are only applicable on nodes that "see" this file. If +# $sysconf is a directory on a local disk, it is likely that changes +# to this file will need to be propagated to other nodes. If $sysconf +# is a directory that is shared via a networked filesystem, changes to +# this file will be visible to all nodes that share this $sysconf. + +# The format is straightforward: one per line, mca_param_name = +# rvalue. Quoting is ignored (so if you use quotes or escape +# characters, they'll be included as part of the value). For example: + +# Disable run-time MPI parameter checking +# mpi_param_check = 0 + +# Note that the value "~/" will be expanded to the current user's home +# directory. For example: + +# Change component loading path +# component_path = /usr/local/lib/openmpi:~/my_openmpi_components + +# See "ompi_info --param all all" for a full listing of Open MPI MCA +# parameters available and their default values. +# + +# Basic behavior to smooth startup +mca_base_component_show_load_errors = 0 +orte_report_launch_progress = 1 + +# Set line buffering for stdout/stderr +ess_base_stream_buffering = 1 + +# Define timeout for daemons to report back during launch +orte_startup_timeout = 360 + +## Protect the shared file systems +orte_no_session_dirs = /lustre,/panfs,/scratch,/lscratch,/users,/usr/projects +orte_tmpdir_base = /var/tmp + +## Require an allocation to run - protects the frontend +## from inadvertent job executions +orte_allocation_required = 1 + +## Deal with the allocator +orte_strip_prefix = nid +orte_retain_aliases = 1 +# 1st alias entry is the stripped node name, +# 2nd is the unstripped one +orte_hostname_alias_index = 2 + +## Add the interface for out-of-band communication +## and set it up +oob_tcp_if_include=ipogif0 +oob_tcp_peer_retries = 1000 +oob_tcp_sndbuf = 32768 +oob_tcp_rcvbuf = 32768 + +## Define the MPI interconnects +btl = self,vader,ugni + +## Enable cpu affinity +hwloc_base_binding_policy = core + +## Setup MPI options +mpi_show_handle_leaks = 1 +mpi_warn_on_fork = 1 +#mpi_abort_print_stack = 1 diff --git a/contrib/platform/lanl/cray_xc_cle5.2/optimized-common b/contrib/platform/lanl/cray_xc_cle5.2/optimized-common new file mode 100644 index 00000000000..4380b9a0e9a --- /dev/null +++ b/contrib/platform/lanl/cray_xc_cle5.2/optimized-common @@ -0,0 +1,44 @@ +# do not use IB verbs +with_verbs=no + +enable_dlopen=no + +enable_mem_profile=no +enable_binaries=yes + +enable_heterogeneous=no +enable_shared=yes +enable_static=yes + +enable_orte_static_ports=no + +enable_ipv6=no + +# enable fortran bindings +enable_mpi_fortran=yes + +# disable debugging +enable_mem_debug=no +enable_debug_symbols=no +enable_picky=no +enable_debug=no + +# disable c++ bindings (removed in MPI 3.0) +enable_mpi_cxx=no +enable_mpi_cxx_seek=no +enable_cxx_exceptions=no + +enable_per_user_config_files=no +enable_pty_support=no +enable_mca_no_build=crs,filem,routed-linear,snapc,pml-dr,pml-crcp2,pml-crcpw,pml-v,pml-example,crcp,pml-cm,ess-cnos,grpcomm-cnos,plm-rsh,btl-tcp,oob-ud,ras-simulator,mpool-fake +enable_mca_static=btl:ugni,btl:self,btl:vader,pml:ob1 +#enable_contrib_no_build=libnbc + +# enable direct calling for ob1 +enable_mca_direct=pml-ob1 + +# enable development headers +with_devel_headers=yes + +# disable valgrind +with_valgrind=no diff --git a/contrib/platform/lanl/cray_xc_cle5.2/optimized-lustre b/contrib/platform/lanl/cray_xc_cle5.2/optimized-lustre new file mode 100644 index 00000000000..b0f32256b08 --- /dev/null +++ b/contrib/platform/lanl/cray_xc_cle5.2/optimized-lustre @@ -0,0 +1,20 @@ +# (c) 2012-2015 Los Alamos National Security, LLC. All rights reserved. +# Open MPI configuration for Cray XC v2.x GNU compiler, +# Lustre + +if test "$CC" = "cc" ; then + echo "ERROR: Open MPI should not be compiled with Cray's wrapper compilers (cc/CC/ftn)" + exit 1 +fi + +if test "$enable_debug" = "yes"; then + source debug-common +else + source optimized-common +fi + +# enable and Lustre in romio +with_io_romio_flags="--with-file-system=ufs+nfs+lustre CFLAGS=-I/opt/cray/lustre-cray_ari_s/default/include" + +# enable Lustre in OMPI I/O +with_lustre=/opt/cray/lustre-cray_ari_s/default diff --git a/contrib/platform/lanl/cray_xc_cle5.2/optimized-lustre.conf b/contrib/platform/lanl/cray_xc_cle5.2/optimized-lustre.conf new file mode 100644 index 00000000000..d545d6891ef --- /dev/null +++ b/contrib/platform/lanl/cray_xc_cle5.2/optimized-lustre.conf @@ -0,0 +1,107 @@ +# +# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana +# University Research and Technology +# Corporation. All rights reserved. +# Copyright (c) 2004-2005 The University of Tennessee and The University +# of Tennessee Research Foundation. All rights +# reserved. +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# University of Stuttgart. All rights reserved. +# Copyright (c) 2004-2005 The Regents of the University of California. +# All rights reserved. +# Copyright (c) 2006 Cisco Systems, Inc. All rights reserved. +# Copyright (c) 2015 Los Alamos National Security, LLC. +# All rights reserved. +# $COPYRIGHT$ +# +# Additional copyrights may follow +# +# $HEADER$ +# + +# This is the default system-wide MCA parameters defaults file. +# Specifically, the MCA parameter "mca_param_files" defaults to a +# value of +# "$HOME/.openmpi/mca-params.conf:$sysconf/openmpi-mca-params.conf" +# (this file is the latter of the two). So if the default value of +# mca_param_files is not changed, this file is used to set system-wide +# MCA parameters. This file can therefore be used to set system-wide +# default MCA parameters for all users. Of course, users can override +# these values if they want, but this file is an excellent location +# for setting system-specific MCA parameters for those users who don't +# know / care enough to investigate the proper values for them. + +# Note that this file is only applicable where it is visible (in a +# filesystem sense). Specifically, MPI processes each read this file +# during their startup to determine what default values for MCA +# parameters should be used. mpirun does not bundle up the values in +# this file from the node where it was run and send them to all nodes; +# the default value decisions are effectively distributed. Hence, +# these values are only applicable on nodes that "see" this file. If +# $sysconf is a directory on a local disk, it is likely that changes +# to this file will need to be propagated to other nodes. If $sysconf +# is a directory that is shared via a networked filesystem, changes to +# this file will be visible to all nodes that share this $sysconf. + +# The format is straightforward: one per line, mca_param_name = +# rvalue. Quoting is ignored (so if you use quotes or escape +# characters, they'll be included as part of the value). For example: + +# Disable run-time MPI parameter checking +# mpi_param_check = 0 + +# Note that the value "~/" will be expanded to the current user's home +# directory. For example: + +# Change component loading path +# component_path = /usr/local/lib/openmpi:~/my_openmpi_components + +# See "ompi_info --param all all" for a full listing of Open MPI MCA +# parameters available and their default values. +# + +# Basic behavior to smooth startup +mca_base_component_show_load_errors = 0 +#orte_report_launch_progress = 1 + +# Set line buffering for stdout/stderr +ess_base_stream_buffering = 1 + +# Define timeout for daemons to report back during launch +orte_startup_timeout = 360 + +## Protect the shared file systems +orte_no_session_dirs = /lustre,/panfs,/scratch,/lscratch,/users,/usr/projects +orte_tmpdir_base = /var/tmp + +## Require an allocation to run - protects the frontend +## from inadvertent job executions +orte_allocation_required = 1 + +## Deal with the allocator +orte_strip_prefix = nid +orte_retain_aliases = 1 +# 1st alias entry is the stripped node name, +# 2nd is the unstripped one +orte_hostname_alias_index = 2 + +## Add the interface for out-of-band communication +## and set it up +oob_tcp_if_include=ipogif0 +oob_tcp_peer_retries = 1000 +oob_tcp_sndbuf = 32768 +oob_tcp_rcvbuf = 32768 + +## Define the MPI interconnects +btl = self,vader,ugni + +## Setup Gemini +# TODO LANL + +## Enable cpu affinity +hwloc_base_binding_policy = core + +## Setup MPI options +mpi_show_handle_leaks = 1 +mpi_warn_on_fork = 1 +#mpi_abort_print_stack = 1 diff --git a/contrib/platform/lanl/cray_xe6/debug-common b/contrib/platform/lanl/cray_xe6/debug-common index e7e6bcd99de..6bb806362d5 100644 --- a/contrib/platform/lanl/cray_xe6/debug-common +++ b/contrib/platform/lanl/cray_xe6/debug-common @@ -1,4 +1,3 @@ -enable_dlopen=no enable_mem_profile=no enable_binaries=yes enable_heterogeneous=no @@ -33,8 +32,5 @@ enable_mca_direct=pml-ob1 # enable development headers with_devel_headers=yes -# enable ptmalloc (enables lazy deregistration) -with_memory_manager=linux - # disable valgrind with_valgrind=no diff --git a/contrib/platform/lanl/cray_xe6/debug-lustre.conf b/contrib/platform/lanl/cray_xe6/debug-lustre.conf index 48435536b4f..2cdc8ebc552 100644 --- a/contrib/platform/lanl/cray_xe6/debug-lustre.conf +++ b/contrib/platform/lanl/cray_xe6/debug-lustre.conf @@ -5,7 +5,7 @@ # Copyright (c) 2004-2005 The University of Tennessee and The University # of Tennessee Research Foundation. All rights # reserved. -# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, # University of Stuttgart. All rights reserved. # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. @@ -13,9 +13,9 @@ # Copyright (c) 2011 Los Alamos National Security, LLC. # All rights reserved. # $COPYRIGHT$ -# +# # Additional copyrights may follow -# +# # $HEADER$ # @@ -95,8 +95,8 @@ oob_tcp_rcvbuf = 32768 ## Define the MPI interconnects btl = self,vader,ugni -## Enable cpu affinity -hwloc_base_binding_policy = core +## Enable cpu affinity +hwloc_base_binding_policy = core ## Setup MPI options mpi_show_handle_leaks = 1 diff --git a/contrib/platform/lanl/cray_xe6/intel-common b/contrib/platform/lanl/cray_xe6/intel-common index 0ce2f3f17f1..caf7ca0136d 100644 --- a/contrib/platform/lanl/cray_xe6/intel-common +++ b/contrib/platform/lanl/cray_xe6/intel-common @@ -12,4 +12,4 @@ CFLAGS="$CFLAGS -msse3" CXXFLAGS="$CXXFLAGS -msse3" CC=icc CXX=icpc -FTN=ifort \ No newline at end of file +FTN=ifort diff --git a/contrib/platform/lanl/cray_xe6/optimized-common b/contrib/platform/lanl/cray_xe6/optimized-common index 26877aa41bb..937bf6b5982 100644 --- a/contrib/platform/lanl/cray_xe6/optimized-common +++ b/contrib/platform/lanl/cray_xe6/optimized-common @@ -1,5 +1,3 @@ -enable_dlopen=no - enable_mem_profile=no enable_binaries=yes @@ -40,8 +38,5 @@ enable_mca_direct=pml-ob1 # enable development headers with_devel_headers=yes -# enable ptmalloc (enables lazy deregistration) -with_memory_manager=linux - # disable valgrind with_valgrind=no diff --git a/contrib/platform/lanl/cray_xe6/optimized-lustre.conf b/contrib/platform/lanl/cray_xe6/optimized-lustre.conf index 460561d0375..18c11fc99a4 100644 --- a/contrib/platform/lanl/cray_xe6/optimized-lustre.conf +++ b/contrib/platform/lanl/cray_xe6/optimized-lustre.conf @@ -5,7 +5,7 @@ # Copyright (c) 2004-2005 The University of Tennessee and The University # of Tennessee Research Foundation. All rights # reserved. -# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, # University of Stuttgart. All rights reserved. # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. @@ -13,9 +13,9 @@ # Copyright (c) 2011 Los Alamos National Security, LLC. # All rights reserved. # $COPYRIGHT$ -# +# # Additional copyrights may follow -# +# # $HEADER$ # @@ -95,11 +95,11 @@ oob_tcp_rcvbuf = 32768 ## Define the MPI interconnects btl = self,vader,ugni -## Setup Gemini +## Setup Gemini # TODO LANL -## Enable cpu affinity -hwloc_base_binding_policy = core +## Enable cpu affinity +hwloc_base_binding_policy = core ## Setup MPI options mpi_show_handle_leaks = 1 diff --git a/contrib/platform/lanl/darwin/darwin-common b/contrib/platform/lanl/darwin/darwin-common index 1b51cf39d3c..31ee911e265 100644 --- a/contrib/platform/lanl/darwin/darwin-common +++ b/contrib/platform/lanl/darwin/darwin-common @@ -22,9 +22,6 @@ enable_cxx_exceptions=no # Disable components not needed on any TOSS platform enable_mca_no_build=carto,crs,filem,routed-linear,snapc,pml-dr,pml-crcp2,pml-crcpw,pml-v,pml-example,crcp,btl-tcp -# Enable malloc hooks for mpi_leave_pinned -with_memory_manager=linux - # TOSS2 uses slurm with_slurm=yes with_tm=no diff --git a/contrib/platform/lanl/mac.conf b/contrib/platform/lanl/mac.conf index 5f23b5e4db2..6f2c90116fc 100644 --- a/contrib/platform/lanl/mac.conf +++ b/contrib/platform/lanl/mac.conf @@ -5,15 +5,15 @@ # Copyright (c) 2004-2005 The University of Tennessee and The University # of Tennessee Research Foundation. All rights # reserved. -# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, # University of Stuttgart. All rights reserved. # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. # Copyright (c) 2006 Cisco Systems, Inc. All rights reserved. # $COPYRIGHT$ -# +# # Additional copyrights may follow -# +# # $HEADER$ # diff --git a/contrib/platform/lanl/macosx-dynamic-java.conf b/contrib/platform/lanl/macosx-dynamic-java.conf index 280a6848c35..c888e678047 100644 --- a/contrib/platform/lanl/macosx-dynamic-java.conf +++ b/contrib/platform/lanl/macosx-dynamic-java.conf @@ -5,15 +5,15 @@ # Copyright (c) 2004-2005 The University of Tennessee and The University # of Tennessee Research Foundation. All rights # reserved. -# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, # University of Stuttgart. All rights reserved. # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. # Copyright (c) 2006 Cisco Systems, Inc. All rights reserved. # $COPYRIGHT$ -# +# # Additional copyrights may follow -# +# # $HEADER$ # diff --git a/contrib/platform/lanl/macosx-dynamic-optimized.conf b/contrib/platform/lanl/macosx-dynamic-optimized.conf index a94ec8eb83d..8482886f157 100644 --- a/contrib/platform/lanl/macosx-dynamic-optimized.conf +++ b/contrib/platform/lanl/macosx-dynamic-optimized.conf @@ -5,15 +5,15 @@ # Copyright (c) 2004-2005 The University of Tennessee and The University # of Tennessee Research Foundation. All rights # reserved. -# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, # University of Stuttgart. All rights reserved. # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. # Copyright (c) 2006 Cisco Systems, Inc. All rights reserved. # $COPYRIGHT$ -# +# # Additional copyrights may follow -# +# # $HEADER$ # diff --git a/contrib/platform/lanl/macosx-dynamic.conf b/contrib/platform/lanl/macosx-dynamic.conf index 18dd7f64438..60efd784870 100644 --- a/contrib/platform/lanl/macosx-dynamic.conf +++ b/contrib/platform/lanl/macosx-dynamic.conf @@ -5,15 +5,15 @@ # Copyright (c) 2004-2005 The University of Tennessee and The University # of Tennessee Research Foundation. All rights # reserved. -# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, # University of Stuttgart. All rights reserved. # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. # Copyright (c) 2006 Cisco Systems, Inc. All rights reserved. # $COPYRIGHT$ -# +# # Additional copyrights may follow -# +# # $HEADER$ # diff --git a/contrib/platform/lanl/macosx-static-leopard.conf b/contrib/platform/lanl/macosx-static-leopard.conf index a94ec8eb83d..8482886f157 100644 --- a/contrib/platform/lanl/macosx-static-leopard.conf +++ b/contrib/platform/lanl/macosx-static-leopard.conf @@ -5,15 +5,15 @@ # Copyright (c) 2004-2005 The University of Tennessee and The University # of Tennessee Research Foundation. All rights # reserved. -# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, # University of Stuttgart. All rights reserved. # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. # Copyright (c) 2006 Cisco Systems, Inc. All rights reserved. # $COPYRIGHT$ -# +# # Additional copyrights may follow -# +# # $HEADER$ # diff --git a/contrib/platform/lanl/toss/debug b/contrib/platform/lanl/toss/debug new file mode 100644 index 00000000000..f975ae98187 --- /dev/null +++ b/contrib/platform/lanl/toss/debug @@ -0,0 +1,8 @@ +# (c) 2013-2016 Los Alamos National Security, LLC. All rights reserved. +# Open MPI debug configuration for TOSS/TOSS2 v1.7.x/1.8.x + +source ./toss-common +source ./debug-common + +# Enable panasas support in romio +with_io_romio_flags=--with-file-system=ufs+nfs+lustre diff --git a/contrib/platform/lanl/toss/debug-mlx b/contrib/platform/lanl/toss/debug-mlx new file mode 100644 index 00000000000..2940f2135bd --- /dev/null +++ b/contrib/platform/lanl/toss/debug-mlx @@ -0,0 +1,4 @@ +# (c) 2013-2016 Los Alamos National Security, LLC. All rights reserved. +# Open MPI debug configuration for TOSS/TOSS2 v1.7.x/1.8.x + +source ./debug diff --git a/contrib/platform/lanl/toss/debug-mlx.conf b/contrib/platform/lanl/toss/debug-mlx.conf new file mode 100644 index 00000000000..03eea2b0072 --- /dev/null +++ b/contrib/platform/lanl/toss/debug-mlx.conf @@ -0,0 +1,104 @@ +# +# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana +# University Research and Technology +# Corporation. All rights reserved. +# Copyright (c) 2004-2005 The University of Tennessee and The University +# of Tennessee Research Foundation. All rights +# reserved. +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# University of Stuttgart. All rights reserved. +# Copyright (c) 2004-2005 The Regents of the University of California. +# All rights reserved. +# Copyright (c) 2006 Cisco Systems, Inc. All rights reserved. +# Copyright (c) 2011-2016 Los Alamos National Security, LLC. All rights +# reserved. +# $COPYRIGHT$ +# +# Additional copyrights may follow +# +# $HEADER$ +# + +# This is the default system-wide MCA parameters defaults file. +# Specifically, the MCA parameter "mca_param_files" defaults to a +# value of +# "$HOME/.openmpi/mca-params.conf:$sysconf/openmpi-mca-params.conf" +# (this file is the latter of the two). So if the default value of +# mca_param_files is not changed, this file is used to set system-wide +# MCA parameters. This file can therefore be used to set system-wide +# default MCA parameters for all users. Of course, users can override +# these values if they want, but this file is an excellent location +# for setting system-specific MCA parameters for those users who don't +# know / care enough to investigate the proper values for them. + +# Note that this file is only applicable where it is visible (in a +# filesystem sense). Specifically, MPI processes each read this file +# during their startup to determine what default values for MCA +# parameters should be used. mpirun does not bundle up the values in +# this file from the node where it was run and send them to all nodes; +# the default value decisions are effectively distributed. Hence, +# these values are only applicable on nodes that "see" this file. If +# $sysconf is a directory on a local disk, it is likely that changes +# to this file will need to be propagated to other nodes. If $sysconf +# is a directory that is shared via a networked filesystem, changes to +# this file will be visible to all nodes that share this $sysconf. + +# The format is straightforward: one per line, mca_param_name = +# rvalue. Quoting is ignored (so if you use quotes or escape +# characters, they'll be included as part of the value). For example: + +# Disable run-time MPI parameter checking +# mpi_param_check = 0 + +# Note that the value "~/" will be expanded to the current user's home +# directory. For example: + +# Change component loading path +# component_path = /usr/local/lib/openmpi:~/my_openmpi_components + +# See "ompi_info --param all all" for a full listing of Open MPI MCA +# parameters available and their default values. +# + +# Basic behavior to smooth startup +mca_base_component_show_load_errors = 0 +opal_set_max_sys_limits = 1 +orte_report_launch_progress = 1 + +# Define timeout for daemons to report back during launch +orte_startup_timeout = 10000 + +## Protect the shared file systems +orte_no_session_dirs = /panfs,/scratch,/users,/usr/projects +orte_tmpdir_base = /tmp + +## Require an allocation to run - protects the frontend +## from inadvertent job executions +orte_allocation_required = 1 + +## Add the interface for out-of-band communication +## and set it up +oob_tcp_if_include=ib0,eth0 +oob_tcp_peer_retries = 1000 +oob_tcp_sndbuf = 32768 +oob_tcp_rcvbuf = 32768 + +## Define the MPI interconnects +btl = vader,openib,self + +## Setup OpenIB - just in case +btl_openib_want_fork_support = 0 +btl_openib_receive_queues = X,4096,1024:X,12288,512:X,65536,512 + +## Disable MXM +pml = ob1 +coll = ^hcoll + +## Enable cpu affinity +hwloc_base_binding_policy = core + +## Setup MPI options +mpi_show_handle_leaks = 1 +mpi_warn_on_fork = 1 +#mpi_abort_print_stack = 1 + diff --git a/contrib/platform/lanl/toss/debug-nopanasas b/contrib/platform/lanl/toss/debug-nopanasas deleted file mode 100644 index 29c3d40cb6c..00000000000 --- a/contrib/platform/lanl/toss/debug-nopanasas +++ /dev/null @@ -1,5 +0,0 @@ -# (c) 2013 Los Alamos National Security, LLC. All rights reserved. -# Open MPI debug configuration for TOSS/TOSS2 v1.7.x/1.8.x - -source ./toss-common -source ./debug-common diff --git a/contrib/platform/lanl/toss/debug-nopanasas.conf b/contrib/platform/lanl/toss/debug-nopanasas.conf deleted file mode 120000 index 2c6787c0c40..00000000000 --- a/contrib/platform/lanl/toss/debug-nopanasas.conf +++ /dev/null @@ -1 +0,0 @@ -debug-panasas.conf \ No newline at end of file diff --git a/contrib/platform/lanl/toss/debug-panasas b/contrib/platform/lanl/toss/debug-panasas deleted file mode 100644 index fffcf61d04d..00000000000 --- a/contrib/platform/lanl/toss/debug-panasas +++ /dev/null @@ -1,13 +0,0 @@ -# (c) 2013 Los Alamos National Security, LLC. All rights reserved. -# Open MPI debug configuration for TOSS/TOSS2 v1.7.x/1.8.x - -source ./toss-common -source ./debug-common - -# Enable panasas support in romio -with_io_romio_flags=--with-file-system=ufs+nfs+panfs -CFLAGS="-I/opt/panfs/include" -CXXFLAGS="-I/opt/panfs/include" -FCFLAGS="-I/opt/panfs/include" -FFLAGS="-I/opt/panfs/include" -CCASFLAGS="-I/opt/panfs/include" diff --git a/contrib/platform/lanl/toss/debug-panasas.conf b/contrib/platform/lanl/toss/debug.conf similarity index 100% rename from contrib/platform/lanl/toss/debug-panasas.conf rename to contrib/platform/lanl/toss/debug.conf diff --git a/contrib/platform/lanl/toss/optimized b/contrib/platform/lanl/toss/optimized new file mode 100644 index 00000000000..59ef7e1add0 --- /dev/null +++ b/contrib/platform/lanl/toss/optimized @@ -0,0 +1,8 @@ +# (c) 2013-2016 Los Alamos National Security, LLC. All rights reserved. +# Open MPI optimized configuration for TOSS/TOSS2 v1.7.x/1.8.x + +source ./toss-common +source ./optimized-common + +# Enable panasas support in romio +with_io_romio_flags=--with-file-system=ufs+nfs+lustre diff --git a/contrib/platform/lanl/toss/optimized-mlx b/contrib/platform/lanl/toss/optimized-mlx new file mode 100644 index 00000000000..38adccf8f5c --- /dev/null +++ b/contrib/platform/lanl/toss/optimized-mlx @@ -0,0 +1,4 @@ +# (c) 2013-2016 Los Alamos National Security, LLC. All rights reserved. +# Open MPI optimized configuration for TOSS/TOSS2 v1.7.x/1.8.x + +source ./optimized diff --git a/contrib/platform/lanl/toss/optimized-mlx.conf b/contrib/platform/lanl/toss/optimized-mlx.conf new file mode 100644 index 00000000000..4bcbc3cefb7 --- /dev/null +++ b/contrib/platform/lanl/toss/optimized-mlx.conf @@ -0,0 +1,104 @@ +# +# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana +# University Research and Technology +# Corporation. All rights reserved. +# Copyright (c) 2004-2005 The University of Tennessee and The University +# of Tennessee Research Foundation. All rights +# reserved. +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# University of Stuttgart. All rights reserved. +# Copyright (c) 2004-2005 The Regents of the University of California. +# All rights reserved. +# Copyright (c) 2006 Cisco Systems, Inc. All rights reserved. +# Copyright (c) 2011-2016 Los Alamos National Security, LLC. All rights +# reserved. +# $COPYRIGHT$ +# +# Additional copyrights may follow +# +# $HEADER$ +# + +# This is the default system-wide MCA parameters defaults file. +# Specifically, the MCA parameter "mca_param_files" defaults to a +# value of +# "$HOME/.openmpi/mca-params.conf:$sysconf/openmpi-mca-params.conf" +# (this file is the latter of the two). So if the default value of +# mca_param_files is not changed, this file is used to set system-wide +# MCA parameters. This file can therefore be used to set system-wide +# default MCA parameters for all users. Of course, users can override +# these values if they want, but this file is an excellent location +# for setting system-specific MCA parameters for those users who don't +# know / care enough to investigate the proper values for them. + +# Note that this file is only applicable where it is visible (in a +# filesystem sense). Specifically, MPI processes each read this file +# during their startup to determine what default values for MCA +# parameters should be used. mpirun does not bundle up the values in +# this file from the node where it was run and send them to all nodes; +# the default value decisions are effectively distributed. Hence, +# these values are only applicable on nodes that "see" this file. If +# $sysconf is a directory on a local disk, it is likely that changes +# to this file will need to be propagated to other nodes. If $sysconf +# is a directory that is shared via a networked filesystem, changes to +# this file will be visible to all nodes that share this $sysconf. + +# The format is straightforward: one per line, mca_param_name = +# rvalue. Quoting is ignored (so if you use quotes or escape +# characters, they'll be included as part of the value). For example: + +# Disable run-time MPI parameter checking +# mpi_param_check = 0 + +# Note that the value "~/" will be expanded to the current user's home +# directory. For example: + +# Change component loading path +# component_path = /usr/local/lib/openmpi:~/my_openmpi_components + +# See "ompi_info --param all all" for a full listing of Open MPI MCA +# parameters available and their default values. +# + +# Basic behavior to smooth startup +mca_base_component_show_load_errors = 0 +opal_set_max_sys_limits = 1 +orte_report_launch_progress = 1 + +# Define timeout for daemons to report back during launch +orte_startup_timeout = 10000 + +## Protect the shared file systems +orte_no_session_dirs = /panfs,/scratch,/users,/usr/projects +orte_tmpdir_base = /tmp + +## Require an allocation to run - protects the frontend +## from inadvertent job executions +orte_allocation_required = 1 + +## Add the interface for out-of-band communication +## and set it up +oob_tcp_if_include = ib0,eth0 +oob_tcp_peer_retries = 1000 +oob_tcp_sndbuf = 32768 +oob_tcp_rcvbuf = 32768 + +## Define the MPI interconnects +btl = vader,openib,self + +## Setup OpenIB - just in case +btl_openib_want_fork_support = 0 +btl_openib_receive_queues = X,4096,1024:X,12288,512:X,65536,512 + +## Disable MXM +pml = ob1 +coll = ^hcoll + +## Enable cpu affinity +hwloc_base_binding_policy = core + +## Setup MPI options +mpi_show_handle_leaks = 0 +mpi_warn_on_fork = 1 +#mpi_abort_print_stack = 0 + diff --git a/contrib/platform/lanl/toss/optimized-nopanasas b/contrib/platform/lanl/toss/optimized-nopanasas deleted file mode 100644 index 08c80593ff0..00000000000 --- a/contrib/platform/lanl/toss/optimized-nopanasas +++ /dev/null @@ -1,5 +0,0 @@ -# (c) 2013 Los Alamos National Security, LLC. All rights reserved. -# Open MPI optimized configuration for TOSS/TOSS2 v1.7.x/1.8.x - -source ./toss-common -source ./optimized-common diff --git a/contrib/platform/lanl/toss/optimized-nopanasas.conf b/contrib/platform/lanl/toss/optimized-nopanasas.conf deleted file mode 120000 index 10407fce626..00000000000 --- a/contrib/platform/lanl/toss/optimized-nopanasas.conf +++ /dev/null @@ -1 +0,0 @@ -optimized-panasas.conf \ No newline at end of file diff --git a/contrib/platform/lanl/toss/optimized-panasas b/contrib/platform/lanl/toss/optimized-panasas deleted file mode 100644 index 06b5bc0287f..00000000000 --- a/contrib/platform/lanl/toss/optimized-panasas +++ /dev/null @@ -1,13 +0,0 @@ -# (c) 2013 Los Alamos National Security, LLC. All rights reserved. -# Open MPI optimized configuration for TOSS/TOSS2 v1.7.x/1.8.x - -source ./toss-common -source ./optimized-common - -# Enable panasas support in romio -with_io_romio_flags=--with-file-system=ufs+nfs+panfs -CFLAGS="-I/opt/panfs/include" -CXXFLAGS="-I/opt/panfs/include" -FCFLAGS="-I/opt/panfs/include" -FFLAGS="-I/opt/panfs/include" -CCASFLAGS="-I/opt/panfs/include" diff --git a/contrib/platform/lanl/toss/optimized-panasas.conf b/contrib/platform/lanl/toss/optimized.conf similarity index 100% rename from contrib/platform/lanl/toss/optimized-panasas.conf rename to contrib/platform/lanl/toss/optimized.conf diff --git a/contrib/platform/lanl/toss/toss-common b/contrib/platform/lanl/toss/toss-common index 639672a3a54..2927fb28168 100644 --- a/contrib/platform/lanl/toss/toss-common +++ b/contrib/platform/lanl/toss/toss-common @@ -1,7 +1,6 @@ # (c) 2013 Los Alamos National Security, LLC. All rights reserved. # Open MPI common configuration for TOSS/TOSS2 v1.7.x/1.8.x -enable_dlopen=no enable_binaries=yes enable_heterogeneous=no enable_shared=yes @@ -23,9 +22,6 @@ enable_cxx_exceptions=no # Disable components not needed on any TOSS platform enable_mca_no_build=carto,crs,filem,routed-linear,snapc,pml-dr,pml-crcp2,pml-crcpw,pml-v,pml-example,crcp,btl-tcp -# Enable malloc hooks for mpi_leave_pinned -with_memory_manager=linux - # TOSS2 uses slurm with_slurm=yes with_tm=no diff --git a/contrib/platform/mellanox/optimized b/contrib/platform/mellanox/optimized index a322e971ee9..ae89528ce5f 100644 --- a/contrib/platform/mellanox/optimized +++ b/contrib/platform/mellanox/optimized @@ -18,6 +18,11 @@ fi if [ "$mellanox_autodetect" == "yes" ]; then + ucx_dir=${ucx_dir:="$(pkg-config --variable=prefix ucx)"} + if [ -d $ucx_dir ]; then + with_ucx=$ucx_dir + fi + mxm_dir=${mxm_dir:="$(pkg-config --variable=prefix mxm)"} if [ -d $mxm_dir ]; then with_mxm=$mxm_dir diff --git a/contrib/platform/mellanox/optimized.conf b/contrib/platform/mellanox/optimized.conf index aa1d58c92f1..d74b6ad98b4 100644 --- a/contrib/platform/mellanox/optimized.conf +++ b/contrib/platform/mellanox/optimized.conf @@ -5,15 +5,15 @@ # Copyright (c) 2004-2005 The University of Tennessee and The University # of Tennessee Research Foundation. All rights # reserved. -# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, # University of Stuttgart. All rights reserved. # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. # Copyright (c) 2006 Cisco Systems, Inc. All rights reserved. # $COPYRIGHT$ -# +# # Additional copyrights may follow -# +# # $HEADER$ # diff --git a/contrib/platform/ornl/ornl_configure.gnu b/contrib/platform/ornl/ornl_configure.gnu index c78026dce5d..2ca4852b06e 100755 --- a/contrib/platform/ornl/ornl_configure.gnu +++ b/contrib/platform/ornl/ornl_configure.gnu @@ -1,5 +1,5 @@ -#!/bin/bash -# change the following for install path, note +#!/bin/bash +# change the following for install path, note # that VER is appended to the path. VER="trunk" SW_INSTALL_ROOT=/tmp/work/gshipman/ompi/install @@ -8,8 +8,8 @@ PLATFORM=ornl/cray_xt_cnl_romio ORTED_MAKEFILE=orte/tools/orted/Makefile -if test -z "`grep "orted_LDFLAGS =.*-all-static" ${ORTED_MAKEFILE}`"; then - echo "WARNING: patching ${ORTED_MAKEFILE} to build it static" +if test -z "`grep "orted_LDFLAGS =.*-all-static" ${ORTED_MAKEFILE}`"; then + echo "WARNING: patching ${ORTED_MAKEFILE} to build it static" sed -i 's/orted_LDFLAGS =/orted_LDFLAGS = -all-static/g' ${ORTED_MAKEFILE} fi diff --git a/contrib/scaling/Makefile.include b/contrib/scaling/Makefile.include index 1e2c738bb32..6dfe24515e4 100644 --- a/contrib/scaling/Makefile.include +++ b/contrib/scaling/Makefile.include @@ -2,9 +2,9 @@ # # Copyright (c) 2012 Los Alamos National Security, LLC. All rights reserved. # $COPYRIGHT$ -# +# # Additional copyrights may follow -# +# # $HEADER$ # diff --git a/contrib/scaling/orte_no_op.c b/contrib/scaling/orte_no_op.c index a96c94b66ff..ea85f5a0f24 100644 --- a/contrib/scaling/orte_no_op.c +++ b/contrib/scaling/orte_no_op.c @@ -15,7 +15,7 @@ int main(int argc, char* argv[]) fprintf(stderr, "Failed orte_init\n"); exit(1); } - + if (ORTE_SUCCESS != orte_finalize()) { fprintf(stderr, "Failed orte_finalize\n"); exit(1); diff --git a/contrib/search_compare.pl b/contrib/search_compare.pl index f0d9bfa8701..c4f06e24489 100755 --- a/contrib/search_compare.pl +++ b/contrib/search_compare.pl @@ -6,15 +6,15 @@ # Copyright (c) 2004-2005 The University of Tennessee and The University # of Tennessee Research Foundation. All rights # reserved. -# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, # University of Stuttgart. All rights reserved. # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. # Copyright (c) 2013 Intel, Inc. All rights reserved. # $COPYRIGHT$ -# +# # Additional copyrights may follow -# +# # $HEADER$ # @@ -42,13 +42,13 @@ my $cmd; sub construct { - # don't process directories or links, and dont' recurse down + # don't process directories or links, and dont' recurse down # "special" directories if ( -l $_ ) { return; } - if ( -d $_ ) { + if ( -d $_ ) { if ((/\.deps/) || (/\.libs/) || (/\.git/) || (/\.dSYM/) || ($_ eq "autom4te.cache") || ($_ eq "libltdl")) { $File::Find::prune = true; - } + } return; } diff --git a/contrib/search_replace.pl b/contrib/search_replace.pl index 96f07ddce0f..3d23c63848c 100755 --- a/contrib/search_replace.pl +++ b/contrib/search_replace.pl @@ -6,14 +6,14 @@ # Copyright (c) 2004-2005 The University of Tennessee and The University # of Tennessee Research Foundation. All rights # reserved. -# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, # University of Stuttgart. All rights reserved. # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. # $COPYRIGHT$ -# +# # Additional copyrights may follow -# +# # $HEADER$ # @@ -31,10 +31,10 @@ print "replace: $replace_string\n"; sub replace { - # don't process directories or links, and dont' recurse down + # don't process directories or links, and dont' recurse down # "special" directories if ( -l $_ ) { return; } - if ( -d $_ ) { + if ( -d $_ ) { if ((/\.svn/) || (/\.deps/) || (/\.libs/) || (/\.hg/) || (/\.git/) || (/autom4te\.cache/)) { $File::Find::prune = true; } @@ -48,7 +48,7 @@ sub replace { print "--> $File::Find::name\n"; my $replace = 0; - open(INFILE, $process_file) || + open(INFILE, $process_file) || die "Could not open " . $File::Find::name . ": $!\n"; open(OUTFILE, "> " . $process_file . ".tmp") || die "Could not open " . $File::Find::name . ".tmp: $!\n"; diff --git a/contrib/spread/spread.conf b/contrib/spread/spread.conf index 79d7fd60929..25c1381a3d8 100644 --- a/contrib/spread/spread.conf +++ b/contrib/spread/spread.conf @@ -1,12 +1,12 @@ # Blank lines are permitted in this file. # spread.conf sample file -# +# # questions to spread@spread.org # #MINIMAL REQUIRED FILE # -# Spread should work fine on one machine with just the uncommented +# Spread should work fine on one machine with just the uncommented # lines below. The rest of the file documents all the options and # more complex network setups. # @@ -24,34 +24,34 @@ Spread_Segment 192.168.203.255:4803 { # Spread options #--------------------------------------------------------------------------- #--------------------------------------------------------------------------- -#Set what internal Spread events are logged to the screen or file +#Set what internal Spread events are logged to the screen or file # (see EventLogFile). -# Default setting is to enable PRINT and EXIT events only. +# Default setting is to enable PRINT and EXIT events only. #The PRINT and EXIT types should always be enabled. The names of others are: -# EXIT PRINT DEBUG DATA_LINK NETWORK PROTOCOL SESSION -# CONFIGURATION MEMBERSHIP FLOW_CONTROL STATUS EVENTS -# GROUPS MEMORY SKIPLIST ALL NONE -# ALL and NONE are special and represent either enabling every type +# EXIT PRINT DEBUG DATA_LINK NETWORK PROTOCOL SESSION +# CONFIGURATION MEMBERSHIP FLOW_CONTROL STATUS EVENTS +# GROUPS MEMORY SKIPLIST ALL NONE +# ALL and NONE are special and represent either enabling every type # or enabling none of them. -# You can also use a "!" sign to negate a type, +# You can also use a "!" sign to negate a type, # so { ALL !DATA_LINK } means log all events except data_link ones. #DebugFlags = { PRINT EXIT } DebugFlags = { ALL } # Set priority level of events to output to log file or screen -# The possible levels are: +# The possible levels are: # pDEBUG INFO WARNING ERROR CRITICAL FATAL # Once selected all events tagged with that priority or higher will -# be output. FATAL events are always output and cause the daemon to +# be output. FATAL events are always output and cause the daemon to # shut down. Some Events are tagged with a priority of PRINT which -# causes them to print out no matter what priority level is set. +# causes them to print out no matter what priority level is set. # # The default level used if nothing is set is INFO. - + #EventPriority = INFO -#Set whether to log to a file as opposed to stdout/stderr and what +#Set whether to log to a file as opposed to stdout/stderr and what # file to log to. # Default is to log to stdout. # @@ -59,7 +59,7 @@ DebugFlags = { ALL } #If option is set then logging is to the filename specified. # The filename can include a %h or %H escape that will be replaced at runtime # by the hostname of the machine upon which the daemon is running. -# For example "EventLogFile = spreadlog_%h.log" with 2 machines +# For example "EventLogFile = spreadlog_%h.log" with 2 machines # running Spread (machine1.mydomain.com and machine2.mydomain.com) will # cause the daemons to log to "spreadlog_machine1.mydomain.com.log" and # "spreadlog_machine2.mydomain.com.log" respectively. @@ -81,16 +81,16 @@ EventLogFile = spread_%h.log EventTimeStamp = "[%a %d %b %Y %H:%M:%S]" #Set whether to add a precise (microsecond) resolution timestamp to all logged -# events or not. This option requires that EventTimeStamp is also enabled. +# events or not. This option requires that EventTimeStamp is also enabled. # If the option is commented out then the microsecond timestamp is not added # If the option is uncommented then a microsecond time will print in addition -# to the H:M:S resolution timestamp provided by EventTimeStamp. +# to the H:M:S resolution timestamp provided by EventTimeStamp. #EventPreciseTimeStamp # Set to initialize daemon sequence numbers to a 'large' number for testing # this is purely a debugging capability and should never be enabled on -# production systems (note one side effect of enabling this is that +# production systems (note one side effect of enabling this is that # your system will experience an extra daemon membership every few messages # so you REALLY do not want this turned on) # If you want to change the initial value the sequence number is set to @@ -99,12 +99,12 @@ EventTimeStamp = "[%a %d %b %Y %H:%M:%S]" #DebugInitialSequence -#Set whether to allow dangerous monitor commands +#Set whether to allow dangerous monitor commands # like "partition, flow_control, or kill" # Default setting is FALSE. -#If option is set to false then only "safe" monitor commands are allowed +#If option is set to false then only "safe" monitor commands are allowed # (such as requesting a status update). -#If option is set to true then all monitor commands are enabled. +#If option is set to true then all monitor commands are enabled. # THIS IS A SECURTIY RISK IF YOUR NETWORK IS NOT PROTECTED! #DangerousMonitor = false @@ -123,9 +123,9 @@ DangerousMonitor = true #SocketPortReuse = AUTO #Set what the maximum per-session queue should be for messages before disconnecting -# a session. Spread will buffer upto that number of messages that are destined to the -# session, but that can not be delivered currently because the session is not reading fast enough. -# The compiled in default is usually 1000 if you havn't changed it in the spread_params.h file. +# a session. Spread will buffer upto that number of messages that are destined to the +# session, but that can not be delivered currently because the session is not reading fast enough. +# The compiled in default is usually 1000 if you havn't changed it in the spread_params.h file. #MaxSessionMessages = 5000 MaxSessionMessages = 5000 @@ -178,7 +178,7 @@ MaxSessionMessages = 5000 #Spread_Segment 127.0.0.255:4803 { -# either a name or IP address. If both are given, than the name is taken +# either a name or IP address. If both are given, than the name is taken # as-is, and the IP address is used for that name. # localhost 127.0.0.1 @@ -208,10 +208,10 @@ MaxSessionMessages = 5000 # Multi-homed host setup # -# If you run Spread on hosts with multiple interfaces you may want to +# If you run Spread on hosts with multiple interfaces you may want to # control which interfaces Spread uses for client connections and for # the daemon-to-daemon (and monitor control) messages. This can be done -# by adding an extra stanza to each configured machine. +# by adding an extra stanza to each configured machine. # #Sample: # @@ -237,6 +237,6 @@ MaxSessionMessages = 5000 # explicitly configure ALL interfaces you want as Spread removes all defaults when # you use the explicit notation. # The third multihomed3 host uses a shorthand form of omitting the D or C option and -# just listening for all types of traffic and events on both the 192.168.0 and 1.2.3 -# networks. If no letter is listed before the interface address then ALL types of +# just listening for all types of traffic and events on both the 192.168.0 and 1.2.3 +# networks. If no letter is listed before the interface address then ALL types of # events are handled on that interface. diff --git a/contrib/submit_test.pl b/contrib/submit_test.pl index e0a39e86f4c..4ba7f4ea507 100755 --- a/contrib/submit_test.pl +++ b/contrib/submit_test.pl @@ -6,14 +6,14 @@ # Copyright (c) 2004-2005 The University of Tennessee and The University # of Tennessee Research Foundation. All rights # reserved. -# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, # University of Stuttgart. All rights reserved. # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. # $COPYRIGHT$ -# +# # Additional copyrights may follow -# +# # $HEADER$ # @@ -42,7 +42,7 @@ sub download { my $req = HTTP::Request->new(GET => $uri); my $res = $ua->request($req); if ($res->is_success()) { - open(FILE, ">$file") || + open(FILE, ">$file") || die("ERROR: Could not write to $file"); print FILE $res->content; close(FILE); @@ -50,7 +50,7 @@ sub download { print "Error retrieving URL: $uri\n"; die $res->message; } - + # Find the fields and prompts in the config files my @prompts; @@ -83,7 +83,7 @@ sub download { while () { my $line = $_; chomp($line); - if (length($line) > 0 && + if (length($line) > 0 && ! ($line =~ /^[ \t]+$/) && ! ($line =~ /^[ \t]*#/)) { my @vals = split(/:/, $line); @@ -142,7 +142,7 @@ sub do_menu { printf("%d. %s\n", $j + 1, $keys[$j]); } print "\n$$prompts[$i] (1-" . ($#keys + 1) . "): "; - + my $input = ; print "\n"; chomp($input); @@ -150,7 +150,7 @@ sub do_menu { print "Please enter a valid selection\n"; next; } - + push(@config, $keys[$input - 1]); $level = $level->{$keys[$input - 1]}; } @@ -189,7 +189,7 @@ sub do_question { sub do_yn { my ($q, $default) = @_; - my $input = do_question($q, $default, + my $input = do_question($q, $default, sub { my ($line) = @_; lc($line) eq "y" || lc($line) eq "n"; }); lc($input); } @@ -198,7 +198,7 @@ sub do_yn { sub do_int { my ($q, $default) = @_; - do_question($q, $default, + do_question($q, $default, sub { my ($line) = @_; $line >= 0; }); } diff --git a/contrib/test_headers_in_ompi.pl b/contrib/test_headers_in_ompi.pl index 860189c05b9..16595e1f01a 100755 --- a/contrib/test_headers_in_ompi.pl +++ b/contrib/test_headers_in_ompi.pl @@ -6,14 +6,14 @@ # Copyright (c) 2004-2005 The University of Tennessee and The University # of Tennessee Research Foundation. All rights # reserved. -# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, # University of Stuttgart. All rights reserved. # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. # $COPYRIGHT$ -# +# # Additional copyrights may follow -# +# # $HEADER$ # @@ -42,9 +42,9 @@ # this file is used to extract which header files are included in a particular # source file. Kind of a neat implementation sub get_header_files { - + local($dump_file) = @_; - + open(C_FILES, "find $source_tree -name \*.c |") || print "could not find source files\n"; open(H_FILES, "find $source_tree -name \*.c |") || print "could not find header files\n"; @@ -86,7 +86,7 @@ sub get_header_files { } -#this simply constructs the header file list from dump and dump_pl and then checks whether all the +#this simply constructs the header file list from dump and dump_pl and then checks whether all the #header files are present sub parse_header_files { @@ -114,7 +114,7 @@ sub parse_header_files { s/^\s*//; #remove anything after <*.h> s/>{1,1}.*\n/>\n/; - #remove anything before #include + #remove anything before #include s/^.*#include/#include/; print TEMP $_; } @@ -141,7 +141,7 @@ sub test_for_headers { while(
) { print $_; - + #create the file for compilation chomp $_; $string = " @@ -151,15 +151,15 @@ sub test_for_headers { int main(int argc, char **argv) { return 0; } - + "; - + open(TEMP, "> $temp") || print "Could not open $temp for writing\n"; print TEMP $string; close(TEMP); - + $compiled = system("$CC $temp"); - + if ($compiled == 0) { print "$_ is present\n"; } else { diff --git a/contrib/update-my-copyright.pl b/contrib/update-my-copyright.pl index fb46bc9e951..e962184bd9d 100755 --- a/contrib/update-my-copyright.pl +++ b/contrib/update-my-copyright.pl @@ -4,7 +4,7 @@ # $COPYRIGHT$ # -# Short version: +# Short version: # # This script automates the tedious task of updating copyright notices # in the tops of OMPI/ORTE/OPAL source files before committing back to @@ -35,7 +35,7 @@ # # NOTE: this script currently doesn't handle multi-line copyright # statements, such as: -# +# # Copyright (c) 2010 University of Blabbityblah and the Trustees of # Schblitbittyboo. All rights reserved. # diff --git a/contrib/whitespace-purge.sh b/contrib/whitespace-purge.sh new file mode 100755 index 00000000000..9c9d8fe909e --- /dev/null +++ b/contrib/whitespace-purge.sh @@ -0,0 +1,25 @@ +#!/bin/bash +# +# Copyright (c) 2015 Intel, Inc. All rights reserved. +# Copyright (c) 2015 Los Alamos National Security, LLC. All rights +# reserved +# Copyright (c) 2015 Cisco Systems, Inc. +# $COPYRIGHT$ +# +# Additional copyrights may follow +# +# $HEADER$ +# + +for file in $(git ls-files) ; do + # check for the mime-type and do not follow symbolic links. this + # will cause file to print application/x-symlink for the mime-type + # allowing us to only have to check if the type is application to + # skip sym links, pdfs, etc. If any other file types should be + # skipped add the check here. + type=$(file -b --mime-type -h $file) + if test ${type::4} == "text" ; then + # Eliminate whitespace at the end of lines + perl -pi -e 's/\s*$/\n/' $file + fi +done diff --git a/examples/Hello.java b/examples/Hello.java index eba78c23815..4f2ac4cfb15 100644 --- a/examples/Hello.java +++ b/examples/Hello.java @@ -23,17 +23,17 @@ */ import mpi.*; - + class Hello { static public void main(String[] args) throws MPIException { - + MPI.Init(args); int myrank = MPI.COMM_WORLD.getRank(); int size = MPI.COMM_WORLD.getSize() ; System.out.println("Hello world from rank " + myrank + " of " + size); - + MPI.Finalize(); } } diff --git a/examples/Hello_oshmem.java b/examples/Hello_oshmem.java deleted file mode 100644 index dc6595858a8..00000000000 --- a/examples/Hello_oshmem.java +++ /dev/null @@ -1,12 +0,0 @@ -import shmem.*; - -public class Hello_oshmem -{ - public static void main(String[] args) - { - ShMem.startPEs(0); - int nproc = ShMem.getNumPEs(); - int proc = ShMem.getMyPE(); - System.out.println("Hello, world, I am "+ proc +" of "+ nproc); - } -} diff --git a/examples/Makefile b/examples/Makefile index 83b5d2fc53a..0a7e57c408f 100644 --- a/examples/Makefile +++ b/examples/Makefile @@ -5,18 +5,18 @@ # Copyright (c) 2004-2005 The University of Tennessee and The University # of Tennessee Research Foundation. All rights # reserved. -# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, # University of Stuttgart. All rights reserved. # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. # Copyright (c) 2006-2007 Sun Microsystems, Inc. All rights reserved. -# Copyright (c) 2011-2014 Cisco Systems, Inc. All rights reserved. +# Copyright (c) 2011-2016 Cisco Systems, Inc. All rights reserved. # Copyright (c) 2012 Los Alamos National Security, Inc. All rights reserved. # Copyright (c) 2013 Mellanox Technologies, Inc. All rights reserved. # $COPYRIGHT$ -# +# # Additional copyrights may follow -# +# # $HEADER$ # @@ -53,8 +53,7 @@ EXAMPLES = \ hello_oshmem \ hello_oshmemfh \ Hello.class \ - Hello_oshmem.class \ - ring_c \ + ring_c \ ring_cxx \ ring_mpifh \ ring_usempi \ @@ -62,17 +61,12 @@ EXAMPLES = \ ring_oshmem \ ring_oshmemfh \ Ring.class \ - Ring_oshmem.class \ connectivity_c \ oshmem_shmalloc \ oshmem_circular_shift \ - oshmem_circular_shift.class \ oshmem_max_reduction \ - oshmem_max_reduction.class \ oshmem_strided_puts \ - oshmem_strided_puts.class \ - oshmem_symmetric_data \ - oshmem_symmetric_data.class + oshmem_symmetric_data # Default target. Always build the C MPI examples. Only build the @@ -90,19 +84,19 @@ all: hello_c ring_c connectivity_c # MPI examples mpi: - @ if ompi_info --parsable | grep bindings:cxx:yes >/dev/null; then \ + @ if ompi_info --parsable | grep -q bindings:cxx:yes >/dev/null; then \ $(MAKE) hello_cxx ring_cxx; \ fi - @ if ompi_info --parsable | grep bindings:mpif.h:yes >/dev/null; then \ + @ if ompi_info --parsable | grep -q bindings:mpif.h:yes >/dev/null; then \ $(MAKE) hello_mpifh ring_mpifh; \ fi - @ if ompi_info --parsable | grep bindings:use_mpi:yes >/dev/null; then \ + @ if ompi_info --parsable | egrep -q bindings:use_mpi:\"\?yes >/dev/null; then \ $(MAKE) hello_usempi ring_usempi; \ fi - @ if ompi_info --parsable | grep bindings:use_mpi_f08:yes >/dev/null; then \ + @ if ompi_info --parsable | grep -q bindings:use_mpi_f08:yes >/dev/null; then \ $(MAKE) hello_usempif08 ring_usempif08; \ fi - @ if ompi_info --parsable | grep bindings:java:yes >/dev/null; then \ + @ if ompi_info --parsable | grep -q bindings:java:yes >/dev/null; then \ $(MAKE) Hello.class Ring.class; \ fi @@ -122,14 +116,6 @@ oshmem: $(MAKE) hello_oshmemfh; \ $(MAKE) ring_oshmemfh; \ fi - @ if oshmem_info --parsable | grep bindings:java:yes >/dev/null; then \ - $(MAKE) Hello_oshmem.class; \ - $(MAKE) Ring_oshmem.class; \ - $(MAKE) oshmem_circular_shift.class; \ - $(MAKE) oshmem_max_reduction.class; \ - $(MAKE) oshmem_strided_puts.class; \ - $(MAKE) oshmem_symmetric_data.class; \ - fi # The usual "clean" target @@ -162,35 +148,23 @@ hello_oshmem: hello_oshmem_c.c $(SHMEMCC) $(CFLAGS) $? -o $@ hello_oshmemfh: hello_oshmemfh.f90 $(SHMEMFC) $(FCFLAGS) $? -o $@ -Hello_oshmem.class: Hello_oshmem.java - $(JAVAC) Hello_oshmem.java ring_oshmem: ring_oshmem_c.c $(SHMEMCC) $(CFLAGS) $? -o $@ ring_oshmemfh: ring_oshmemfh.f90 $(SHMEMFC) $(FCFLAGS) $? -o $@ -Ring_oshmem.class: Ring_oshmem.java - $(JAVAC) Ring_oshmem.java oshmem_shmalloc: oshmem_shmalloc.c $(SHMEMCC) $(CCFLAGS) $? -o $@ oshmem_circular_shift: oshmem_circular_shift.c $(SHMEMCC) $(CFLAGS) $? -o $@ -oshmem_circular_shift.class: oshmem_circular_shift.java - $(JAVAC) oshmem_circular_shift.java oshmem_max_reduction: oshmem_max_reduction.c $(SHMEMCC) $(CFLAGS) $? -o $@ -oshmem_max_reduction.class: oshmem_max_reduction.java - $(JAVAC) oshmem_max_reduction.java oshmem_strided_puts: oshmem_strided_puts.c $(SHMEMCC) $(CFLAGS) $? -o $@ -oshmem_strided_puts.class: oshmem_strided_puts.java - $(JAVAC) oshmem_strided_puts.java oshmem_symmetric_data: oshmem_symmetric_data.c $(SHMEMCC) $(CFLAGS) $? -o $@ -oshmem_symmetric_data.class: oshmem_symmetric_data.java - $(JAVAC) oshmem_symmetric_data.java diff --git a/examples/Makefile.include b/examples/Makefile.include index ad40e4f25e1..7707521c943 100644 --- a/examples/Makefile.include +++ b/examples/Makefile.include @@ -6,7 +6,7 @@ # Copyright (c) 2004-2005 The University of Tennessee and The University # of Tennessee Research Foundation. All rights # reserved. -# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, # University of Stuttgart. All rights reserved. # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. @@ -15,9 +15,9 @@ # Copyright (c) 2012 Los Alamos National Security, Inc. All rights reserved. # Copyright (c) 2013 Mellanox Technologies, Inc. All rights reserved. # $COPYRIGHT$ -# +# # Additional copyrights may follow -# +# # $HEADER$ # @@ -54,10 +54,4 @@ EXTRA_DIST += \ examples/oshmem_strided_puts.c \ examples/oshmem_symmetric_data.c \ examples/Hello.java \ - examples/Ring.java \ - examples/Hello_oshmem.java \ - examples/Ring_oshmem.java \ - examples/oshmem_circular_shift.java \ - examples/oshmem_max_reduction.java \ - examples/oshmem_strided_puts.java \ - examples/oshmem_symmetric_data.java + examples/Ring.java diff --git a/examples/Ring.java b/examples/Ring.java index 5309d3cb5cc..08cd864663e 100644 --- a/examples/Ring.java +++ b/examples/Ring.java @@ -5,16 +5,16 @@ */ import mpi.* ; - + class Ring { static public void main(String[] args) throws MPIException { - - + + MPI.Init(args) ; - + int source; // Rank of sender - int dest; // Rank of receiver - int tag=50; // Tag for messages + int dest; // Rank of receiver + int tag=50; // Tag for messages int next; int prev; int message[] = new int [1]; @@ -36,8 +36,8 @@ static public void main(String[] args) throws MPIException { if (0 == myrank) { message[0] = 10; - System.out.println("Process 0 sending " + message[0] + " to rank " + next + " (" + size + " processes in ring)"); - MPI.COMM_WORLD.send(message, 1, MPI.INT, next, tag); + System.out.println("Process 0 sending " + message[0] + " to rank " + next + " (" + size + " processes in ring)"); + MPI.COMM_WORLD.send(message, 1, MPI.INT, next, tag); } /* Pass the message around the ring. The exit mechanism works as @@ -69,7 +69,7 @@ static public void main(String[] args) throws MPIException { if (0 == myrank) { MPI.COMM_WORLD.recv(message, 1, MPI.INT, prev, tag); } - + MPI.Finalize(); } } diff --git a/examples/Ring_oshmem.java b/examples/Ring_oshmem.java deleted file mode 100644 index 2bc126f12da..00000000000 --- a/examples/Ring_oshmem.java +++ /dev/null @@ -1,54 +0,0 @@ -import shmem.*; - -public class Ring_oshmem -{ - public static void main(String[] args) throws ShMemException - { - ShMem.startPEs(0); - int nproc = ShMem.getNumPEs(); - int proc = ShMem.getMyPE(); - Addr rbuf = new Addr(4); // One integer value. - rbuf.putInt(-1); - int message = 10; - - // Calculate the PE number of the next process in the ring. Use the - // modulus operator so that the last process "wraps around" to PE 0. - int next = (proc + 1) % nproc; - - if(proc == 0) - { - System.out.println("Process 0 puts message "+ message +" to "+ - next +" ("+ nproc +" processes in ring)"); - - rbuf.putInt(message, next); - } - - // Pass the message around the ring. The exit mechanism works as - // follows: the message (a positive integer) is passed around the - // ring. Each time it passes PE 0, it is decremented. When each - // processes receives a message containing a 0 value, it passes the - // message on to the next process and then quits. By passing the 0 - // message first, every process gets the 0 message and can quit - // normally. - - while(message > 0) - { - rbuf.waitUntilInt(ShMem.CMP_EQ, message); - - if(proc == 0) - { - message--; - System.out.println("Process 0 decremented value: "+ message); - } - - rbuf.putInt(message, next); - - if(proc != 0) - message--; - } - - // All done - rbuf.free(); - System.out.println("Process "+ proc +" exiting"); - } -} diff --git a/examples/connectivity_c.c b/examples/connectivity_c.c index f52c8a301a2..14f3f546d53 100644 --- a/examples/connectivity_c.c +++ b/examples/connectivity_c.c @@ -37,7 +37,7 @@ main(int argc, char **argv) if (MPI_SUCCESS != MPI_Get_processor_name(name, &length)) { strcpy(name, "unknown"); } - + if (argc>1 && strcmp(argv[1], "-v")==0) verbose = 1; diff --git a/examples/dtrace/Makefile b/examples/dtrace/Makefile index 16156913510..bcd544f9a88 100644 --- a/examples/dtrace/Makefile +++ b/examples/dtrace/Makefile @@ -3,9 +3,9 @@ # Use is subject to license terms. # # $COPYRIGHT$ -# +# # Additional copyrights may follow -# +# # $HEADER$ # diff --git a/examples/dtrace/README b/examples/dtrace/README index f023fb46ed0..8df35acca8a 100644 --- a/examples/dtrace/README +++ b/examples/dtrace/README @@ -2,17 +2,17 @@ Copyright (c) 2007-2008 Sun Microsystems, Inc. All rights reserved. Use is subject to license terms. This document discusses how to use the Solaris Dynamic Tracing utility (DTrace) -with Open MPI. DTrace is a comprehensive dynamic tracing utility that you can -use to monitor the behavior of applications programs as well as the operating -system itself. You can use DTrace on live production systems to understand +with Open MPI. DTrace is a comprehensive dynamic tracing utility that you can +use to monitor the behavior of applications programs as well as the operating +system itself. You can use DTrace on live production systems to understand those systems' behavior and to track down any problems that might be occurring. -The D language is the programming language used to create the source code for +The D language is the programming language used to create the source code for DTrace programs. -The material in this chapter assumes knowledge of the D language and how to -use DTrace. For more information about the D language and DTrace, refer to -the Solaris Dynamic Tracing Guide (Part Number 817-6223). This guide is part +The material in this chapter assumes knowledge of the D language and how to +use DTrace. For more information about the D language and DTrace, refer to +the Solaris Dynamic Tracing Guide (Part Number 817-6223). This guide is part of the Solaris 10 OS Software Developer Collection. Solaris 10 OS documentation can be found on the web at the following location: @@ -21,7 +21,7 @@ http://www.sun.com/documentation Follow these links to the Solaris Dynamic Tracing Guide: -Solaris Operating Systems -> Solaris 10 -> Solaris 10 Software Developer +Solaris Operating Systems -> Solaris 10 -> Solaris 10 Software Developer Collection Note: The sample program mpicommleak and other sample scripts are located at: @@ -37,7 +37,7 @@ The following topics are covered in this chapter: 1. mpirun Privileges -Before you run a program under DTrace, you need to make sure that you have the +Before you run a program under DTrace, you need to make sure that you have the correct mpirun privileges. In order to run the script under mpirun, make sure that you have dtrace_proc and @@ -50,12 +50,12 @@ To determine whether you have the appropriate privileges on the entire cluster, perform the following steps: 1. Use your favorite text editor to create the following shell script. - + myppriv.sh: - #!/bin/sh - # myppriv.sh - run ppriv under a shell so you can get the privileges - # of the process that mprun creates + #!/bin/sh + # myppriv.sh - run ppriv under a shell so you can get the privileges + # of the process that mprun creates ppriv $$ 2. Type the following command but replace the hostnames in the example with the @@ -64,77 +64,77 @@ perform the following steps: % mpirun -np 2 --host burl-ct-v440-4,burl-ct-v440-5 myppriv.sh -If the output of ppriv shows that the E privilege set has the dtrace -privileges, then you will be able to run dtrace under mpirun (see the two +If the output of ppriv shows that the E privilege set has the dtrace +privileges, then you will be able to run dtrace under mpirun (see the two examples below). Otherwise, you will need to adjust your system to get dtrace access. This example shows the ppriv output when the privileges have not been set: -% ppriv $$ -4084: -csh -flags = - E: basic - I: basic - P: basic +% ppriv $$ +4084: -csh +flags = + E: basic + I: basic + P: basic L: all This example shows ppriv output when the privileges have been set: -% ppriv $$ -2075: tcsh -flags = - E:basic,dtrace_proc,dtrace_user - I:basic,dtrace_proc,dtrace_user - P:basic,dtrace_proc,dtrace_user +% ppriv $$ +2075: tcsh +flags = + E:basic,dtrace_proc,dtrace_user + I:basic,dtrace_proc,dtrace_user + P:basic,dtrace_proc,dtrace_user L: all -NOTE: To update your privileges, ask your system administrator to add the -dtrace_user and dtrace_proc privileges to your account in the /etc/user_attr +NOTE: To update your privileges, ask your system administrator to add the +dtrace_user and dtrace_proc privileges to your account in the /etc/user_attr file. -After the privileges have been changed, you can rerun the myppriv.sh script to +After the privileges have been changed, you can rerun the myppriv.sh script to view the changed privileges. 2. Running DTrace with MPI Programs There are two ways to use Dynamic Tracing with MPI programs: - - Run the MPI program directly under DTrace, or + - Run the MPI program directly under DTrace, or - Attach DTrace to a running MPI program 2.1 Running an MPI Program Under DTrace -For illustration purposes, assume you have a program named mpiapp. To trace +For illustration purposes, assume you have a program named mpiapp. To trace the program mpiapp using the mpitrace.d script, type the following command: % mpirun -np 4 dtrace -s mpitrace.d -c mpiapp -The advantage of tracing an MPI program in this way is that all the processes -in the job will be traced from the beginning. This method is probably most -useful in doing performance measurements, when you need to start at the -beginning of an application and you need all the processes in a job to +The advantage of tracing an MPI program in this way is that all the processes +in the job will be traced from the beginning. This method is probably most +useful in doing performance measurements, when you need to start at the +beginning of an application and you need all the processes in a job to participate in collecting data. This approach also has some disadvantages. One disadvantage of running a -program like the one in the above example is that all the tracing output for -all four processes is directed to standard output (stdout). +program like the one in the above example is that all the tracing output for +all four processes is directed to standard output (stdout). To trace a parallel program and get separate trace files, create a script similar to the following. -#!/bin/sh -# partrace.sh - a helper script to dtrace Open MPI jobs from the -# start of the job. +#!/bin/sh +# partrace.sh - a helper script to dtrace Open MPI jobs from the +# start of the job. dtrace -s $1 -c $2 -o $2.$OMPI_COMM_WORLD_RANK.trace Type the following command to run the partrace.sh shell script: % mpirun -np 4 partrace.sh mpitrace.d mpiapp -This will run mpiapp under dtrace using the mpitrace.d script. The script -saves the trace output for each process in a job under a separate file name, +This will run mpiapp under dtrace using the mpitrace.d script. The script +saves the trace output for each process in a job under a separate file name, based on the program name and rank of the process. Note that subsequent runs will append the data into the existing trace files. @@ -168,69 +168,69 @@ process. Perform the following procedure: DTrace enables you to easily trace programs. When used in conjunction with MPI and the more than 200 functions defined in the MPI standard, dtrace provides an easy way to determine which functions might be in error during the debugging -process, or those functions which might be of interest. After you determine +process, or those functions which might be of interest. After you determine the function showing the error, it is easy to locate the desired job, process, -and rank on which to run your scripts. As demonstrated above, DTrace allows +and rank on which to run your scripts. As demonstrated above, DTrace allows you to perform these determinations while the program is running. Although the MPI standard provides the MPI profiling interface, using DTrace -does provide a number of advantages. The advantages of using DTrace include +does provide a number of advantages. The advantages of using DTrace include the following: - 1. The PMPI interface requires you to restart a job every time you make + 1. The PMPI interface requires you to restart a job every time you make changes to the interposing library. - 2. DTrace allows you to define probes that let you capture tracing - information on MPI without having to code the specific details for each + 2. DTrace allows you to define probes that let you capture tracing + information on MPI without having to code the specific details for each function you want to capture. - 3. DTrace's scripting language D has several built-in functions that help + 3. DTrace's scripting language D has several built-in functions that help in debugging problematic programs. The following example shows a simple script that traces the entry and exit into all the MPI API calls. -mpitrace.d: -pid$target:libmpi:MPI_*:entry -{ +mpitrace.d: +pid$target:libmpi:MPI_*:entry +{ printf("Entered %s...", probefunc); } -pid$target:libmpi:MPI_*:return -{ -printf("exiting, return value = %d\n", arg1); +pid$target:libmpi:MPI_*:return +{ +printf("exiting, return value = %d\n", arg1); } When you use this example script to attach DTrace to a job that performs send and recv operations, the output looks similar to the following: -% dtrace -q -p 6391 -s mpitrace.d +% dtrace -q -p 6391 -s mpitrace.d Entered MPI_Send...exiting, return value = 0 -Entered MPI_Recv...exiting, return value = 0 -Entered MPI_Send...exiting, return value = 0 -Entered MPI_Recv...exiting, return value = 0 +Entered MPI_Recv...exiting, return value = 0 +Entered MPI_Send...exiting, return value = 0 +Entered MPI_Recv...exiting, return value = 0 Entered MPI_Send...exiting, return value = 0 ... You can easily modify the mpitrace.d script to include an argument list. The resulting output resembles truss output. For example: -mpitruss.d: +mpitruss.d: pid$target:libmpi:MPI_Send:entry, -pid$target:libmpi:MPI_*send:entry, +pid$target:libmpi:MPI_*send:entry, pid$target:libmpi:MPI_Recv:entry, -pid$target:libmpi:MPI_*recv:entry -{ -printf("%s(0x%x, %d, 0x%x, %d, %d, 0x%x)",probefunc, arg0, arg1, arg2, arg3, +pid$target:libmpi:MPI_*recv:entry +{ +printf("%s(0x%x, %d, 0x%x, %d, %d, 0x%x)",probefunc, arg0, arg1, arg2, arg3, arg4, arg5); } -pid$target:libmpi:MPI_Send:return, +pid$target:libmpi:MPI_Send:return, pid$target:libmpi:MPI_*send:return, -pid$target:libmpi:MPI_Recv:return, -pid$target:libmpi:MPI_*recv:return +pid$target:libmpi:MPI_Recv:return, +pid$target:libmpi:MPI_*recv:return { -printf("\t\t = %d\n", arg1); +printf("\t\t = %d\n", arg1); } The mpitruss.d script shows how you can specify wildcard names to match the @@ -238,37 +238,37 @@ functions. Both probes will match all send and receive type function calls in the MPI library. The first probe shows the usage of the built-in arg variables to print out the arglist of the function being traced. -Take care when wildcarding the entrypoint and the formatting argument output, -because you could end up printing either too many arguments, or not enough -arguments, for certain functions. For example, in the above case, the -MPI_Irecv and MPI_Isend functions will not have their Request handle +Take care when wildcarding the entrypoint and the formatting argument output, +because you could end up printing either too many arguments, or not enough +arguments, for certain functions. For example, in the above case, the +MPI_Irecv and MPI_Isend functions will not have their Request handle parameters printed out. The following example shows a sample output of the mpitruss.d script: -% dtrace -q -p 6391 -s mpitruss.d -MPI_Send(0x80470b0, 1, 0x8060f48, 0, 1,0x8060d48) = 0 +% dtrace -q -p 6391 -s mpitruss.d +MPI_Send(0x80470b0, 1, 0x8060f48, 0, 1,0x8060d48) = 0 MPI_Recv(0x80470a8, 1, 0x8060f48, 0, 0, 0x8060d48) = 0 -MPI_Send(0x80470b0, 1, 0x8060f48, 0, 1, 0x8060d48) = 0 +MPI_Send(0x80470b0, 1, 0x8060f48, 0, 1, 0x8060d48) = 0 MPI_Recv(0x80470a8, 1,0x8060f48, 0, 0, 0x8060d48) = 0 ... 4. Tracking Down Resource Leaks One of the biggest issues with programming is the unintentional leaking of -resources (such as memory). With MPI, tracking and repairing resource leaks +resources (such as memory). With MPI, tracking and repairing resource leaks can be somewhat more challenging because the objects being leaked are in the middleware, and thus are not easily detected by the use of memory checkers. -DTrace helps with debugging such problems using variables, the profile -provider, and a callstack function. The mpicommcheck.d script (shown in the +DTrace helps with debugging such problems using variables, the profile +provider, and a callstack function. The mpicommcheck.d script (shown in the example below) probes for all the the MPI communicator calls that allocate and -deallocate communicators, and keeps track of the stack each time the function +deallocate communicators, and keeps track of the stack each time the function is called. Every 10 seconds the script dumps out the current count of MPI communicator calls and the total calls for the allocation and deallocation of communicators. When the dtrace session ends (usually by typing Ctrl-C, if you -attached to a running MPI program), the script will print out the totals and -all the different stack traces, as well as the number of times those stack +attached to a running MPI program), the script will print out the totals and +all the different stack traces, as well as the number of times those stack traces were reached. In order to perform these tasks, the script uses DTrace features such as @@ -277,44 +277,44 @@ predefined variable probefunc. The following example shows the mpicommcheck.d script. -mpicommcheck.d: -BEGIN -{ - allocations = 0; - deallocations = 0; - prcnt = 0; +mpicommcheck.d: +BEGIN +{ + allocations = 0; + deallocations = 0; + prcnt = 0; } -pid$target:libmpi:MPI_Comm_create:entry, +pid$target:libmpi:MPI_Comm_create:entry, pid$target:libmpi:MPI_Comm_dup:entry, -pid$target:libmpi:MPI_Comm_split:entry -{ - ++allocations; - @counts[probefunc] = count(); - @stacks[ustack()] = count(); +pid$target:libmpi:MPI_Comm_split:entry +{ + ++allocations; + @counts[probefunc] = count(); + @stacks[ustack()] = count(); } -pid$target:libmpi:MPI_Comm_free:entry -{ - ++deallocations; - @counts[probefunc] = count(); - @stacks[ustack()] = count(); +pid$target:libmpi:MPI_Comm_free:entry +{ + ++deallocations; + @counts[probefunc] = count(); + @stacks[ustack()] = count(); } -profile:::tick-1sec -/++prcnt > 10/ +profile:::tick-1sec +/++prcnt > 10/ { printf("====================================================================="); - printa(@counts); + printa(@counts); printf("Communicator Allocations = %d \n", allocations); - printf("Communicator Deallocations = %d\n", deallocations); - prcnt = 0; + printf("Communicator Deallocations = %d\n", deallocations); + prcnt = 0; } -END -{ +END +{ printf("Communicator Allocations = %d, Communicator Deallocations = %d\n", - allocations, deallocations); + allocations, deallocations); } This script attaches dtrace to a suspect section of code in your program (that @@ -336,13 +336,13 @@ This program performs three MPI_Comm_dup operations and two MPI_Comm_free operations. The program thus "leaks" one communicator operation with each iteration of a loop. -When you attach dtrace to mpicommleak using the mpicommcheck.d script above, +When you attach dtrace to mpicommleak using the mpicommcheck.d script above, you will see a 10-second periodic output. This output shows that the count of the allocated communicators is growing faster than the count of deallocations. When you finally end the dtrace session by typing Ctrl-C, the session will have -output a total of five stack traces, showing the distinct three MPI_Comm_dup -and two MPI_Comm_free call stacks, as well as the number of times each call +output a total of five stack traces, showing the distinct three MPI_Comm_dup +and two MPI_Comm_free call stacks, as well as the number of times each call stack was encountered. For example: diff --git a/examples/dtrace/mpicommcheck.d b/examples/dtrace/mpicommcheck.d index 11cdc54e61a..937680c1389 100644 --- a/examples/dtrace/mpicommcheck.d +++ b/examples/dtrace/mpicommcheck.d @@ -2,9 +2,9 @@ * Copyright (c) 2006 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -45,6 +45,6 @@ END { printf("Communicator Allocations = %d, Communicator Deallocations = %d\n", allocations, deallocations); -} +} diff --git a/examples/dtrace/mpicommleak.c b/examples/dtrace/mpicommleak.c index c2888c9be67..b2fcb8a25cf 100644 --- a/examples/dtrace/mpicommleak.c +++ b/examples/dtrace/mpicommleak.c @@ -2,9 +2,9 @@ * Copyright (c) 2006 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ diff --git a/examples/dtrace/mpicount.d b/examples/dtrace/mpicount.d index df451da35ee..38924a92dcd 100644 --- a/examples/dtrace/mpicount.d +++ b/examples/dtrace/mpicount.d @@ -1,13 +1,13 @@ /* * Copyright (c) 2006 Sun Microsystems, Inc. All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ -/* +/* * This script will count the number of times MPI APIs are called. It * will print out its results every 10 seconds and then a final count * at the end. This script can be used to see that an MPI application @@ -46,4 +46,4 @@ profile:::tick-1sec dtrace:::END { printa(@api); -} \ No newline at end of file +} diff --git a/examples/dtrace/mpitrace.d b/examples/dtrace/mpitrace.d index bc0b85ed228..9dfc0cd21e4 100644 --- a/examples/dtrace/mpitrace.d +++ b/examples/dtrace/mpitrace.d @@ -2,9 +2,9 @@ * Copyright (c) 2006 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ diff --git a/examples/dtrace/mpitruss.d b/examples/dtrace/mpitruss.d index fc1b016f8aa..73dd4cafd3e 100644 --- a/examples/dtrace/mpitruss.d +++ b/examples/dtrace/mpitruss.d @@ -2,9 +2,9 @@ * Copyright (c) 2006 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ diff --git a/examples/dtrace/myppriv.sh b/examples/dtrace/myppriv.sh index c15af1ecd66..798c24fce82 100755 --- a/examples/dtrace/myppriv.sh +++ b/examples/dtrace/myppriv.sh @@ -9,6 +9,6 @@ # $HEADER$ # -# run ppriv under a shell so you can get the privileges of the +# run ppriv under a shell so you can get the privileges of the # process that mprun creates ppriv $$ diff --git a/examples/hello_oshmem_c.c b/examples/hello_oshmem_c.c index 079e5d49704..6bb0d08c062 100644 --- a/examples/hello_oshmem_c.c +++ b/examples/hello_oshmem_c.c @@ -12,16 +12,25 @@ #include #include "shmem.h" +#if !defined(OSHMEM_SPEC_VERSION) || OSHMEM_SPEC_VERSION < 10200 +#error This application uses API 1.2 and up +#endif + int main(int argc, char* argv[]) { int proc, nproc; + char name[SHMEM_MAX_NAME_LEN]; + int major, minor; - start_pes(0); - nproc = _num_pes(); - proc = _my_pe(); + shmem_init(); + nproc = shmem_n_pes(); + proc = shmem_my_pe(); + shmem_info_get_name(name); + shmem_info_get_version(&major, &minor); - printf("Hello, world, I am %d of %d\n", - proc, nproc); + printf("Hello, world, I am %d of %d: %s (version: %d.%d)\n", + proc, nproc, name, major, minor); + shmem_finalize(); return 0; } diff --git a/examples/hello_oshmemfh.f90 b/examples/hello_oshmemfh.f90 index 283697bad30..09404c3bb8a 100644 --- a/examples/hello_oshmemfh.f90 +++ b/examples/hello_oshmemfh.f90 @@ -13,11 +13,17 @@ program hello_oshmem include 'shmem.fh' integer proc, nproc - integer my_pe, num_pes + integer shmem_my_pe, shmem_n_pes + integer major, minor, len + character(len=SHMEM_MAX_NAME_LEN) name - call START_PES(0) - proc = MY_PE() - nproc = NUM_PES() + call SHMEM_INIT() + proc = SHMEM_MY_PE() + nproc = SHMEM_N_PES() + call SHMEM_INFO_GET_VERSION(major, minor) + call SHMEM_INFO_GET_NAME(name) + + write(*, '("Hello, world, I am ", i2, " of ", i2, ": (version: ", i0, ".", i0, ")")') proc, nproc, major, minor + call SHMEM_FINALIZE() - write(*, '("Hello, world, I am ", i2, " of ", i2)') proc, nproc end program hello_oshmem diff --git a/examples/oshmem_circular_shift.c b/examples/oshmem_circular_shift.c index cfd6c0aa8b8..3ff2d6d0c87 100644 --- a/examples/oshmem_circular_shift.c +++ b/examples/oshmem_circular_shift.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2014 Mellanox Technologies, Inc. + * Copyright (c) 2014-2016 Mellanox Technologies, Inc. * All rights reserved. * $COPYRIGHT$ * @@ -16,10 +16,10 @@ int main (void) static int aaa, bbb; int num_pes, my_pe, peer; - start_pes(0); + shmem_init(); - num_pes = _num_pes(); - my_pe = _my_pe(); + num_pes = shmem_n_pes(); + my_pe = shmem_my_pe(); peer = (my_pe + 1) % num_pes; @@ -28,6 +28,7 @@ int main (void) shmem_barrier_all(); printf("Process %d exiting\n", my_pe); + shmem_finalize(); return 0; } diff --git a/examples/oshmem_circular_shift.java b/examples/oshmem_circular_shift.java deleted file mode 100644 index 776d166d270..00000000000 --- a/examples/oshmem_circular_shift.java +++ /dev/null @@ -1,24 +0,0 @@ -import shmem.*; - -public class oshmem_circular_shift -{ - public static void main(String[] args) throws ShMemException - { - ShMem.startPEs(0); - - int numPEs = ShMem.getNumPEs(), - myPE = ShMem.getMyPE(), - peer = (myPE + 1) % numPEs; - - int[] aaa = new int[1]; - Addr bbb = new Addr(4); - - System.out.println("Process "+ myPE +" gets message from "+ - peer +" ("+ numPEs +" processes in ring)"); - - bbb.getInt(aaa, peer); - ShMem.barrierAll(); - bbb.free(); - System.out.println("Process "+ myPE +" exiting"); - } -} diff --git a/examples/oshmem_max_reduction.c b/examples/oshmem_max_reduction.c index 2f0139313a3..d0dec673af2 100644 --- a/examples/oshmem_max_reduction.c +++ b/examples/oshmem_max_reduction.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2014 Mellanox Technologies, Inc. + * Copyright (c) 2014-2016 Mellanox Technologies, Inc. * All rights reserved. * $COPYRIGHT$ * @@ -32,10 +32,10 @@ int main(void) pSync[i] = _SHMEM_SYNC_VALUE; } - start_pes(0); + shmem_init(); - my_pe = _my_pe(); - num_pes = _num_pes(); + my_pe = shmem_my_pe(); + num_pes = shmem_n_pes(); for (i = 0; i < N; i += 1) { src[i] = my_pe + i; @@ -52,6 +52,7 @@ int main(void) } printf("\n"); + shmem_finalize(); return 0; } diff --git a/examples/oshmem_max_reduction.java b/examples/oshmem_max_reduction.java deleted file mode 100644 index 499427d656f..00000000000 --- a/examples/oshmem_max_reduction.java +++ /dev/null @@ -1,43 +0,0 @@ -import shmem.*; -import java.nio.*; - -public class oshmem_max_reduction -{ - private static final int N = 3; - - public static void main(String[] args) throws ShMemException - { - ShMem.startPEs(0); - - int numPEs = ShMem.getNumPEs(), - myPE = ShMem.getMyPE(); - - Addr src = new Addr(8 * N), // long is 8 bytes. - dst = new Addr(8 * N), - pWrk = new Addr(8 * ShMem.REDUCE_SYNC_SIZE); - PSync pSync = new PSync(ShMem.BCAST_SYNC_SIZE); - - LongBuffer srcBuf = src.asLongBuffer(), - dstBuf = dst.asLongBuffer(); - - for(int i = 0; i < N; i++) - srcBuf.put(i, myPE + i); - - ShMem.barrierAll(); - dst.maxToAllLong(src, N, 0, 0, numPEs, pWrk, pSync); - - StringBuilder sb = new StringBuilder(); - sb.append(myPE +"/"+ numPEs +" dst ="); - - for(int i = 0; i < N; i++) - sb.append(" "+ dstBuf.get(i)); - - sb.append("\n"); - System.out.print(sb); - - src.free(); - dst.free(); - pWrk.free(); - pSync.free(); - } -} diff --git a/examples/oshmem_shmalloc.c b/examples/oshmem_shmalloc.c index bc31d95dfaa..e5f9ab90907 100644 --- a/examples/oshmem_shmalloc.c +++ b/examples/oshmem_shmalloc.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2014 Mellanox Technologies, Inc. + * Copyright (c) 2014-2016 Mellanox Technologies, Inc. * All rights reserved. * $COPYRIGHT$ * @@ -19,10 +19,12 @@ int main(void) { long *x; - start_pes(0); + shmem_init(); - x = (long *) shmalloc(sizeof(*x)); + x = (long *) shmem_malloc(sizeof(*x)); - shfree(x); + shmem_free(x); + + shmem_finalize(); } diff --git a/examples/oshmem_strided_puts.c b/examples/oshmem_strided_puts.c index 839417ff933..e9f31ddb749 100644 --- a/examples/oshmem_strided_puts.c +++ b/examples/oshmem_strided_puts.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2014 Mellanox Technologies, Inc. + * Copyright (c) 2014-2016 Mellanox Technologies, Inc. * All rights reserved. * $COPYRIGHT$ * @@ -33,8 +33,8 @@ int main(void) static short target[10]; int me; - start_pes(0); - me = _my_pe(); + shmem_init(); + me = shmem_my_pe(); if (me == 0) { /* put 10 words into target on PE 1 */ @@ -49,6 +49,7 @@ int main(void) target[3], target[4] ); } shmem_barrier_all(); /* sync before exiting */ + shmem_finalize(); return 0; } diff --git a/examples/oshmem_strided_puts.java b/examples/oshmem_strided_puts.java deleted file mode 100644 index 0213213e2ef..00000000000 --- a/examples/oshmem_strided_puts.java +++ /dev/null @@ -1,33 +0,0 @@ -import shmem.*; -import java.nio.*; - -public class oshmem_strided_puts -{ - public static void main(String[] args) throws ShMemException - { - ShMem.startPEs(0); - int me = ShMem.getMyPE(); - - short[] source = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 }; - Addr target = new Addr(2 * 10); // short is 2 bytes. - - if(me == 0) - { - /* put 10 words into target on PE 1 */ - target.iPutShort(source, 1, 2, 5, 1); - } - - ShMem.barrierAll(); // sync sender and receiver - - if(me == 1) - { - ShortBuffer buf = target.asShortBuffer(); - System.out.printf("target on PE %d is %d %d %d %d %d\n", me, - buf.get(0), buf.get(1), buf.get(2), - buf.get(3), buf.get(4)); - } - - ShMem.barrierAll(); // sync before exiting - target.free(); - } -} diff --git a/examples/oshmem_symmetric_data.c b/examples/oshmem_symmetric_data.c index 2234e2e06ea..7c5a1db6b53 100644 --- a/examples/oshmem_symmetric_data.c +++ b/examples/oshmem_symmetric_data.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2014 Mellanox Technologies, Inc. + * Copyright (c) 2014-2016 Mellanox Technologies, Inc. * All rights reserved. * $COPYRIGHT$ * @@ -20,10 +20,10 @@ int main(int argc, char* argv[]) int i; int num_pe, my_pe; - start_pes(0); + shmem_init(); - num_pe = _num_pes(); - my_pe = _my_pe(); + num_pe = shmem_n_pes(); + my_pe = shmem_my_pe(); if (my_pe == 0) { /* initialize array */ @@ -50,6 +50,7 @@ int main(int argc, char* argv[]) } shmem_barrier_all(); /* sync before exiting */ + shmem_finalize(); return 0; } diff --git a/examples/oshmem_symmetric_data.java b/examples/oshmem_symmetric_data.java deleted file mode 100644 index d21ef48955e..00000000000 --- a/examples/oshmem_symmetric_data.java +++ /dev/null @@ -1,49 +0,0 @@ -import shmem.*; -import java.nio.*; - -public class oshmem_symmetric_data -{ - private static final int SIZE = 16; - - public static void main(String[] args) throws ShMemException - { - ShMem.startPEs(0); - - int numPE = ShMem.getNumPEs(), - myPE = ShMem.getMyPE(); - - int[] source = new int[SIZE]; - Addr target = new Addr(4 * SIZE); // int is 4 bytes - - if(myPE == 0) - { - // initialize array - for(int i = 0; i < SIZE; i++) - source[i] = i; - - // local, not symmetric - // static makes it symmetric - // put "size" words into target on each PE - for(int i = 1; i < numPE; i++) - target.putInt(source, i); - } - - ShMem.barrierAll(); // sync sender and receiver - - if(myPE != 0) - { - StringBuilder sb = new StringBuilder(); - sb.append("Target on PE "+ myPE +" is \t"); - IntBuffer buf = target.asIntBuffer(); - - for(int i = 0; i < SIZE; i++) - sb.append(buf.get(i) +" \t"); - - sb.append('\n'); - System.out.print(sb); - } - - ShMem.barrierAll(); // sync before exiting - target.free(); - } -} diff --git a/examples/ring_oshmem_c.c b/examples/ring_oshmem_c.c index 0823b5e1fc7..5fe1c7f65dc 100644 --- a/examples/ring_oshmem_c.c +++ b/examples/ring_oshmem_c.c @@ -11,15 +11,19 @@ #include #include +#if !defined(OSHMEM_SPEC_VERSION) || OSHMEM_SPEC_VERSION < 10200 +#error This application uses API 1.2 and up +#endif + int main (int argc, char * argv[]) { static int rbuf = -1; int proc, nproc, next; int message = 10; - start_pes(0); - proc = _my_pe(); - nproc = _num_pes(); + shmem_init(); + nproc = shmem_n_pes(); + proc = shmem_my_pe(); /* Calculate the PE number of the next process in the ring. Use the modulus operator so that the last process "wraps around" to PE 0. */ @@ -51,6 +55,7 @@ int main (int argc, char * argv[]) --message; } } + shmem_finalize(); /* All done */ diff --git a/ompi/Makefile.am b/ompi/Makefile.am index 850aa6c57df..ecd9f42e50f 100644 --- a/ompi/Makefile.am +++ b/ompi/Makefile.am @@ -5,7 +5,7 @@ # Copyright (c) 2004-2005 The University of Tennessee and The University # of Tennessee Research Foundation. All rights # reserved. -# Copyright (c) 2004-2009 High Performance Computing Center Stuttgart, +# Copyright (c) 2004-2009 High Performance Computing Center Stuttgart, # University of Stuttgart. All rights reserved. # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. @@ -14,10 +14,14 @@ # Copyright (c) 2010-2011 Sandia National Laboratories. All rights reserved. # Copyright (c) 2013-2015 Los Alamos National Security, LLC. All rights # reserved. +# Copyright (c) 2015 Intel, Inc. All rights reserved. +# Copyright (c) 2015 Research Organization for Information Science +# and Technology (RIST). All rights reserved. +# Copyright (c) 2016 IBM Corporation. All rights reserved. # $COPYRIGHT$ -# +# # Additional copyrights may follow -# +# # $HEADER$ # @@ -26,13 +30,9 @@ if BUILD_MPI_BINDINGS_LAYER c_mpi_lib = mpi/c/libmpi_c_mpi.la mpi/tool/libmpi_mpit.la else -c_mpi_lib = +c_mpi_lib = endif -if BUILD_PMPI_BINDINGS_LAYER c_pmpi_lib = mpi/c/profile/libmpi_c_pmpi.la mpi/tool/profile/libmpi_pmpit.la -else -c_pmpi_lib = -endif # See if we have Fortran mpif.h MPI bindings @@ -128,9 +128,9 @@ DIST_SUBDIRS = \ # Build the main MPI library -lib_LTLIBRARIES = libmpi.la -libmpi_la_SOURCES = -libmpi_la_LIBADD = \ +lib_LTLIBRARIES = lib@OMPI_LIBMPI_NAME@.la +lib@OMPI_LIBMPI_NAME@_la_SOURCES = +lib@OMPI_LIBMPI_NAME@_la_LIBADD = \ datatype/libdatatype.la \ debuggers/libdebuggers.la \ mpi/c/libmpi_c.la \ @@ -144,13 +144,13 @@ libmpi_la_LIBADD = \ if OMPI_RTE_ORTE -libmpi_la_LIBADD += \ +lib@OMPI_LIBMPI_NAME@_la_LIBADD += \ $(OMPI_TOP_BUILDDIR)/orte/lib@ORTE_LIB_PREFIX@open-rte.la endif -libmpi_la_LIBADD += \ +lib@OMPI_LIBMPI_NAME@_la_LIBADD += \ $(OMPI_TOP_BUILDDIR)/opal/lib@OPAL_LIB_PREFIX@open-pal.la -libmpi_la_DEPENDENCIES = $(libmpi_la_LIBADD) -libmpi_la_LDFLAGS = \ +lib@OMPI_LIBMPI_NAME@_la_DEPENDENCIES = $(lib@OMPI_LIBMPI_NAME@_la_LIBADD) +lib@OMPI_LIBMPI_NAME@_la_LDFLAGS = \ -version-info $(libmpi_so_version) \ $(OMPI_LIBMPI_EXTRA_LDFLAGS) @@ -159,7 +159,7 @@ headers = noinst_LTLIBRARIES = include_HEADERS = dist_ompidata_DATA = -libmpi_la_SOURCES += $(headers) +lib@OMPI_LIBMPI_NAME@_la_SOURCES += $(headers) nodist_man_MANS = # Conditionally install the header files @@ -172,6 +172,7 @@ endif include class/Makefile.am include attribute/Makefile.am include communicator/Makefile.am +include dpm/Makefile.am include errhandler/Makefile.am include file/Makefile.am include group/Makefile.am diff --git a/ompi/attribute/Makefile.am b/ompi/attribute/Makefile.am index 2510601863a..cb1193deb2f 100644 --- a/ompi/attribute/Makefile.am +++ b/ompi/attribute/Makefile.am @@ -5,14 +5,15 @@ # Copyright (c) 2004-2005 The University of Tennessee and The University # of Tennessee Research Foundation. All rights # reserved. -# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, # University of Stuttgart. All rights reserved. # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. +# Copyright (c) 2016 IBM Corporation. All rights reserved. # $COPYRIGHT$ -# +# # Additional copyrights may follow -# +# # $HEADER$ # @@ -21,6 +22,6 @@ headers += \ attribute/attribute.h -libmpi_la_SOURCES += \ +lib@OMPI_LIBMPI_NAME@_la_SOURCES += \ attribute/attribute.c \ attribute/attribute_predefined.c diff --git a/ompi/attribute/attribute.c b/ompi/attribute/attribute.c index 418ab80a57b..5ac0b53f069 100644 --- a/ompi/attribute/attribute.c +++ b/ompi/attribute/attribute.c @@ -5,17 +5,17 @@ * Copyright (c) 2004-2014 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2006-2014 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2012 Los Alamos National Security, LLC. All rights - * reserved. + * reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -55,7 +55,7 @@ * will not be able to affect the value read by anyone else). * - The predefined attribute MPI_WIN_BASE seems to flaunt the rules * designated by the rest of the standard; it is handled - * specifically in the MPI_WIN_GET_ATTR binding functions (see the + * specifically in the MPI_WIN_GET_ATTR binding functions (see the * comments in there for an explanation). * - MPI-2 4.12.7:Example 4.13 (p58) is wrong. The C->Fortran example * should have the Fortran "val" variable equal to &I. @@ -72,19 +72,19 @@ * to an int -- but it could have been a pointer to anything, such as * a struct). These scenarios each have 2 examples: * - * Example A: int foo = 3; + * Example A: int foo = 3; * MPI_Attr_put(..., &foo); - * Example B: struct foo bar; + * Example B: struct foo bar; * MPI_Attr_put(..., &bar); - * + * * 1. C reads the attribute value. Clearly, this is a "unity" case, * and no translation occurs. A pointer is written, and that same * pointer is returned. * - * Example A: int *ret; - * MPI_Attr_get(..., &ret); + * Example A: int *ret; + * MPI_Attr_get(..., &ret); * --> *ret will equal 3 - * Example B: struct foo *ret; + * Example B: struct foo *ret; * MPI_Attr_get(..., &ret); * --> *ret will point to the instance bar that was written * @@ -106,7 +106,7 @@ * CALL MPI_COMM_GET_ATTR(..., ret, ierr) * --> ret will equal &foo * Example B: INTEGER(KIND=MPI_ADDRESS_KIND) ret - * CALL MPI_COMM_GET_ATTR(..., ret, ierr) + * CALL MPI_COMM_GET_ATTR(..., ret, ierr) * --> ret will equal &bar * * Cases where Fortran MPI-1 writes an attribute value: @@ -124,7 +124,7 @@ * this case and case 7. It is the programer's responsibility * to code accordingly. * - * Example: MPI_Fint *ret; + * Example: MPI_Fint *ret; * MPI_Attr_get(..., &ret); * -> *ret will equal 7. * @@ -163,10 +163,10 @@ * this case and case 4. It is the programer's responsibility * to code accordingly. * - * Example A: MPI_Aint *ret; + * Example A: MPI_Aint *ret; * MPI_Attr_get(..., &ret); * -> *ret will equal 12 - * Example B: MPI_Aint *ret; + * Example B: MPI_Aint *ret; * MPI_Attr_get(..., &ret); * -> *ret will equal 2^40 * @@ -226,7 +226,7 @@ /* Not checking for NULL_DELETE_FN here, since according to the MPI-standard it should be a valid function that returns - MPI_SUCCESS. + MPI_SUCCESS. This macro exists because we have to replicate the same code for MPI_Comm, MPI_Datatype, and MPI_Win. Ick. @@ -242,7 +242,7 @@ *not* set. 3. C-style: attribute arguments are of type (void*). This is used if OMPI_KEYVAL_F77 is not set. - + Ick. */ @@ -340,7 +340,7 @@ do { \ } while (0) -/* +/* * Cases for attribute values */ typedef enum ompi_attribute_translate_t { @@ -364,17 +364,17 @@ typedef struct attribute_value_t { } attribute_value_t; -/* +/* * Local functions */ static void attribute_value_construct(attribute_value_t *item); static void ompi_attribute_keyval_construct(ompi_attribute_keyval_t *keyval); static void ompi_attribute_keyval_destruct(ompi_attribute_keyval_t *keyval); -static int set_value(ompi_attribute_type_t type, void *object, - opal_hash_table_t **attr_hash, int key, +static int set_value(ompi_attribute_type_t type, void *object, + opal_hash_table_t **attr_hash, int key, attribute_value_t *new_attr, bool predefined); -static int get_value(opal_hash_table_t *attr_hash, int key, +static int get_value(opal_hash_table_t *attr_hash, int key, attribute_value_t **attribute, int *flag); static void *translate_to_c(attribute_value_t *val); static MPI_Fint translate_to_fortran_mpi1(attribute_value_t *val); @@ -394,14 +394,14 @@ static OBJ_CLASS_INSTANCE(attribute_value_t, /* * ompi_attribute_entry_t classes */ -static OBJ_CLASS_INSTANCE(ompi_attribute_keyval_t, +static OBJ_CLASS_INSTANCE(ompi_attribute_keyval_t, opal_object_t, ompi_attribute_keyval_construct, ompi_attribute_keyval_destruct); -/* - * Static variables +/* + * Static variables */ static opal_hash_table_t *keyval_hash; @@ -434,7 +434,7 @@ static void attribute_value_construct(attribute_value_t *item) * ompi_attribute_keyval_t constructor / destructor */ static void -ompi_attribute_keyval_construct(ompi_attribute_keyval_t *keyval) +ompi_attribute_keyval_construct(ompi_attribute_keyval_t *keyval) { keyval->attr_type = UNUSED_ATTR; keyval->attr_flag = 0; @@ -452,8 +452,8 @@ ompi_attribute_keyval_construct(ompi_attribute_keyval_t *keyval) } -static void -ompi_attribute_keyval_destruct(ompi_attribute_keyval_t *keyval) +static void +ompi_attribute_keyval_destruct(ompi_attribute_keyval_t *keyval) { if (-1 != keyval->key) { /* If the bindings_extra_state pointer is not NULL, free it */ @@ -467,7 +467,7 @@ ompi_attribute_keyval_destruct(ompi_attribute_keyval_t *keyval) } -/* +/* * This will initialize the main list to store key- attribute * items. This will be called one time, during MPI_INIT(). */ @@ -490,7 +490,7 @@ int ompi_attr_init(void) return OMPI_ERR_OUT_OF_RESOURCE; } - for (int_pos = 0; int_pos < (sizeof(void*) / sizeof(MPI_Fint)); + for (int_pos = 0; int_pos < (sizeof(void*) / sizeof(MPI_Fint)); ++int_pos) { if (p[int_pos] == 1) { break; @@ -506,12 +506,12 @@ int ompi_attr_init(void) if (OMPI_SUCCESS != (ret = ompi_attr_create_predefined())) { return ret; } - + return OMPI_SUCCESS; } -/* +/* * Cleanup everything during MPI_Finalize(). */ int ompi_attr_finalize(void) @@ -623,7 +623,7 @@ int ompi_attr_create_keyval_aint(ompi_attribute_type_t type, /*****************************************************************************/ -int ompi_attr_free_keyval(ompi_attribute_type_t type, int *key, +int ompi_attr_free_keyval(ompi_attribute_type_t type, int *key, bool predefined) { int ret; @@ -631,9 +631,9 @@ int ompi_attr_free_keyval(ompi_attribute_type_t type, int *key, /* Find the key-value pair */ OPAL_THREAD_LOCK(&attribute_lock); - ret = opal_hash_table_get_value_uint32(keyval_hash, *key, + ret = opal_hash_table_get_value_uint32(keyval_hash, *key, (void **) &keyval); - if ((OMPI_SUCCESS != ret) || (NULL == keyval) || + if ((OMPI_SUCCESS != ret) || (NULL == keyval) || (keyval->attr_type != type) || ((!predefined) && (keyval->attr_flag & OMPI_KEYVAL_PREDEFINED))) { OPAL_THREAD_UNLOCK(&attribute_lock); @@ -660,7 +660,7 @@ int ompi_attr_free_keyval(ompi_attribute_type_t type, int *key, * Front-end function called by the C MPI API functions to set an * attribute. */ -int ompi_attr_set_c(ompi_attribute_type_t type, void *object, +int ompi_attr_set_c(ompi_attribute_type_t type, void *object, opal_hash_table_t **attr_hash, int key, void *attribute, bool predefined) { @@ -690,9 +690,9 @@ int ompi_attr_set_c(ompi_attribute_type_t type, void *object, * Front-end function called by the Fortran MPI-1 API functions to set * an attribute. */ -int ompi_attr_set_fortran_mpi1(ompi_attribute_type_t type, void *object, +int ompi_attr_set_fortran_mpi1(ompi_attribute_type_t type, void *object, opal_hash_table_t **attr_hash, - int key, MPI_Fint attribute, + int key, MPI_Fint attribute, bool predefined) { int ret; @@ -722,9 +722,9 @@ int ompi_attr_set_fortran_mpi1(ompi_attribute_type_t type, void *object, * Front-end function called by the Fortran MPI-2 API functions to set * an attribute. */ -int ompi_attr_set_fortran_mpi2(ompi_attribute_type_t type, void *object, +int ompi_attr_set_fortran_mpi2(ompi_attribute_type_t type, void *object, opal_hash_table_t **attr_hash, - int key, MPI_Aint attribute, + int key, MPI_Aint attribute, bool predefined) { int ret; @@ -754,7 +754,7 @@ int ompi_attr_set_fortran_mpi2(ompi_attribute_type_t type, void *object, * Front-end function called by the C MPI API functions to get * attributes. */ -int ompi_attr_get_c(opal_hash_table_t *attr_hash, int key, +int ompi_attr_get_c(opal_hash_table_t *attr_hash, int key, void **attribute, int *flag) { attribute_value_t *val = NULL; @@ -777,7 +777,7 @@ int ompi_attr_get_c(opal_hash_table_t *attr_hash, int key, * Front-end function called by the Fortran MPI-1 API functions to get * attributes. */ -int ompi_attr_get_fortran_mpi1(opal_hash_table_t *attr_hash, int key, +int ompi_attr_get_fortran_mpi1(opal_hash_table_t *attr_hash, int key, MPI_Fint *attribute, int *flag) { attribute_value_t *val = NULL; @@ -800,7 +800,7 @@ int ompi_attr_get_fortran_mpi1(opal_hash_table_t *attr_hash, int key, * Front-end function called by the Fortran MPI-2 API functions to get * attributes. */ -int ompi_attr_get_fortran_mpi2(opal_hash_table_t *attr_hash, int key, +int ompi_attr_get_fortran_mpi2(opal_hash_table_t *attr_hash, int key, MPI_Aint *attribute, int *flag) { attribute_value_t *val = NULL; @@ -825,7 +825,7 @@ int ompi_attr_get_fortran_mpi2(opal_hash_table_t *attr_hash, int key, * when MPI objects are copied (e.g., back-end actions to * MPI_COMM_DUP). */ -int ompi_attr_copy_all(ompi_attribute_type_t type, void *old_object, +int ompi_attr_copy_all(ompi_attribute_type_t type, void *old_object, void *new_object, opal_hash_table_t *oldattr_hash, opal_hash_table_t *newattr_hash) { @@ -845,7 +845,7 @@ int ompi_attr_copy_all(ompi_attribute_type_t type, void *old_object, OPAL_THREAD_LOCK(&attribute_lock); /* Get the first attribute in the object's hash */ - ret = opal_hash_table_get_first_key_uint32(oldattr_hash, &key, + ret = opal_hash_table_get_first_key_uint32(oldattr_hash, &key, (void **) &old_attr, &node); @@ -855,7 +855,7 @@ int ompi_attr_copy_all(ompi_attribute_type_t type, void *old_object, /* Get the keyval in the main keyval hash - so that we know what the copy_attr_fn is */ - err = opal_hash_table_get_value_uint32(keyval_hash, key, + err = opal_hash_table_get_value_uint32(keyval_hash, key, (void **) &hash_value); if (OMPI_SUCCESS != err) { /* This should not happen! */ @@ -868,19 +868,19 @@ int ompi_attr_copy_all(ompi_attribute_type_t type, void *old_object, switch (type) { case COMM_ATTR: /* Now call the copy_attr_fn */ - COPY_ATTR_CALLBACKS(communicator, old_object, hash_value, + COPY_ATTR_CALLBACKS(communicator, old_object, hash_value, old_attr, new_object, new_attr, err); break; - + case TYPE_ATTR: /* Now call the copy_attr_fn */ - COPY_ATTR_CALLBACKS(datatype, old_object, hash_value, + COPY_ATTR_CALLBACKS(datatype, old_object, hash_value, old_attr, new_object, new_attr, err); break; case WIN_ATTR: /* Now call the copy_attr_fn */ - COPY_ATTR_CALLBACKS(win, old_object, hash_value, + COPY_ATTR_CALLBACKS(win, old_object, hash_value, old_attr, new_object, new_attr, err); break; @@ -896,7 +896,7 @@ int ompi_attr_copy_all(ompi_attribute_type_t type, void *old_object, } /* Hang this off the object's hash */ - + /* The COPY_ATTR_CALLBACKS macro will have converted the _flag_ callback output value from Fortran's .TRUE. value to 0/1 (if necessary). So we only need to check for 0/1 here @@ -911,7 +911,7 @@ int ompi_attr_copy_all(ompi_attribute_type_t type, void *old_object, } else { new_attr->av_set_from = OMPI_ATTRIBUTE_C; } - ret = set_value(type, new_object, &newattr_hash, key, + ret = set_value(type, new_object, &newattr_hash, key, new_attr, true); if (MPI_SUCCESS != ret) { goto out; @@ -920,8 +920,8 @@ int ompi_attr_copy_all(ompi_attribute_type_t type, void *old_object, OBJ_RELEASE(new_attr); } - ret = opal_hash_table_get_next_key_uint32(oldattr_hash, &key, - (void **) &old_attr, + ret = opal_hash_table_get_next_key_uint32(oldattr_hash, &key, + (void **) &old_attr, in_node, &node); } ret = MPI_SUCCESS; @@ -940,7 +940,7 @@ int ompi_attr_copy_all(ompi_attribute_type_t type, void *old_object, * * Assumes that you DO already have the attribute_lock. */ -static int ompi_attr_delete_impl(ompi_attribute_type_t type, void *object, +static int ompi_attr_delete_impl(ompi_attribute_type_t type, void *object, opal_hash_table_t *attr_hash, int key, bool predefined) { @@ -949,7 +949,7 @@ static int ompi_attr_delete_impl(ompi_attribute_type_t type, void *object, attribute_value_t *attr; /* Check if the key is valid in the master keyval hash */ - ret = opal_hash_table_get_value_uint32(keyval_hash, key, + ret = opal_hash_table_get_value_uint32(keyval_hash, key, (void **) &keyval); if ((OMPI_SUCCESS != ret) || (NULL == keyval) || @@ -1014,7 +1014,7 @@ static int ompi_attr_delete_impl(ompi_attribute_type_t type, void *object, /* * Front end function to delete a single attribute. */ -int ompi_attr_delete(ompi_attribute_type_t type, void *object, +int ompi_attr_delete(ompi_attribute_type_t type, void *object, opal_hash_table_t *attr_hash, int key, bool predefined) { @@ -1030,7 +1030,7 @@ int ompi_attr_delete(ompi_attribute_type_t type, void *object, /* * Front-end function to delete all the attributes on an MPI object */ -int ompi_attr_delete_all(ompi_attribute_type_t type, void *object, +int ompi_attr_delete_all(ompi_attribute_type_t type, void *object, opal_hash_table_t *attr_hash) { int ret, i, num_attrs; @@ -1076,7 +1076,7 @@ int ompi_attr_delete_all(ompi_attribute_type_t type, void *object, Termination, but we do it for everything -- what the heck. :-) */ for (i = num_attrs - 1; i >= 0; i--) { - ret = ompi_attr_delete_impl(type, object, attr_hash, + ret = ompi_attr_delete_impl(type, object, attr_hash, attrs[i]->av_key, true); if (OMPI_SUCCESS != ret) { break; @@ -1097,8 +1097,8 @@ int ompi_attr_delete_all(ompi_attribute_type_t type, void *object, * Back-end function to set an attribute on an MPI object. Assumes * that you already hold the attribute_lock. */ -static int set_value(ompi_attribute_type_t type, void *object, - opal_hash_table_t **attr_hash, int key, +static int set_value(ompi_attribute_type_t type, void *object, + opal_hash_table_t **attr_hash, int key, attribute_value_t *new_attr, bool predefined) { @@ -1110,11 +1110,11 @@ static int set_value(ompi_attribute_type_t type, void *object, /* Note that this function can be invoked by ompi_attr_copy_all() to set attributes on the new object (in addition to the top-level MPI_* functions that set attributes). */ - ret = opal_hash_table_get_value_uint32(keyval_hash, key, + ret = opal_hash_table_get_value_uint32(keyval_hash, key, (void **) &keyval); /* If key not found */ - if ((OMPI_SUCCESS != ret ) || (NULL == keyval) || + if ((OMPI_SUCCESS != ret ) || (NULL == keyval) || (keyval->attr_type != type) || ((!predefined) && (keyval->attr_flag & OMPI_KEYVAL_PREDEFINED))) { return OMPI_ERR_BAD_PARAM; @@ -1186,7 +1186,7 @@ static int set_value(ompi_attribute_type_t type, void *object, * * Assumes that you do NOT already have the attribute lock. */ -static int get_value(opal_hash_table_t *attr_hash, int key, +static int get_value(opal_hash_table_t *attr_hash, int key, attribute_value_t **attribute, int *flag) { int ret; @@ -1199,7 +1199,7 @@ static int get_value(opal_hash_table_t *attr_hash, int key, with the key, then the call is valid and returns FALSE in the flag argument */ *flag = 0; - ret = opal_hash_table_get_value_uint32(keyval_hash, key, + ret = opal_hash_table_get_value_uint32(keyval_hash, key, (void**) &keyval); if (OMPI_ERR_NOT_FOUND == ret) { return MPI_KEYVAL_INVALID; diff --git a/ompi/attribute/attribute.h b/ompi/attribute/attribute.h index b0dc9ecab84..b762aa24f45 100644 --- a/ompi/attribute/attribute.h +++ b/ompi/attribute/attribute.h @@ -5,21 +5,21 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2007 Cisco Systems, Inc. All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ /** @file * - * Implementation for taking care of the attribute that can hang off a comm, + * Implementation for taking care of the attribute that can hang off a comm, * win or datatype. */ @@ -36,8 +36,8 @@ #define ATTR_HASH_SIZE 10 -/* - * Flags for keyvals +/* + * Flags for keyvals */ #define OMPI_KEYVAL_PREDEFINED 0x0001 #define OMPI_KEYVAL_F77 0x0002 @@ -62,34 +62,34 @@ typedef enum ompi_attribute_type_t ompi_attribute_type_t; delete. These will only be used here and not in the front end functions. */ -typedef void (ompi_mpi1_fortran_copy_attr_function)(MPI_Fint *oldobj, +typedef void (ompi_mpi1_fortran_copy_attr_function)(MPI_Fint *oldobj, MPI_Fint *keyval, - MPI_Fint *extra_state, + MPI_Fint *extra_state, MPI_Fint *attr_in, MPI_Fint *attr_out, ompi_fortran_logical_t *flag, MPI_Fint *ierr); -typedef void (ompi_mpi1_fortran_delete_attr_function)(MPI_Fint *obj, +typedef void (ompi_mpi1_fortran_delete_attr_function)(MPI_Fint *obj, MPI_Fint *keyval, MPI_Fint *attr_in, - MPI_Fint *extra_state, + MPI_Fint *extra_state, MPI_Fint *ierr); /* New-style MPI-2 Fortran function pointer declarations for copy and delete. These will only be used here and not in the front end functions. */ -typedef void (ompi_mpi2_fortran_copy_attr_function)(MPI_Fint *oldobj, +typedef void (ompi_mpi2_fortran_copy_attr_function)(MPI_Fint *oldobj, MPI_Fint *keyval, - void *extra_state, - void *attr_in, + void *extra_state, + void *attr_in, void *attr_out, ompi_fortran_logical_t *flag, MPI_Fint *ierr); -typedef void (ompi_mpi2_fortran_delete_attr_function)(MPI_Fint *obj, +typedef void (ompi_mpi2_fortran_delete_attr_function)(MPI_Fint *obj, MPI_Fint *keyval, void *attr_in, - void *extra_state, + void *extra_state, MPI_Fint *ierr); /* * Internally the copy function for all kinds of MPI objects has one more @@ -98,13 +98,13 @@ typedef void (ompi_mpi2_fortran_delete_attr_function)(MPI_Fint *obj, * on the main communicator. */ typedef int (MPI_Comm_internal_copy_attr_function)(MPI_Comm, int, void *, - void *, void *, int *, + void *, void *, int *, MPI_Comm); typedef int (MPI_Type_internal_copy_attr_function)(MPI_Datatype, int, void *, - void *, void *, int *, + void *, void *, int *, MPI_Datatype); typedef int (MPI_Win_internal_copy_attr_function)(MPI_Win, int, void *, - void *, void *, int *, + void *, void *, int *, MPI_Win); typedef void (ompi_attribute_keyval_destructor_fn_t)(int); @@ -123,12 +123,12 @@ union ompi_attribute_fn_ptr_union_t { MPI_Win_internal_copy_attr_function *attr_win_copy_fn; /* For Fortran old MPI-1 callback functions */ - + ompi_mpi1_fortran_delete_attr_function *attr_mpi1_fortran_delete_fn; ompi_mpi1_fortran_copy_attr_function *attr_mpi1_fortran_copy_fn; /* For Fortran new MPI-2 callback functions */ - + ompi_mpi2_fortran_delete_attr_function *attr_mpi2_fortran_delete_fn; ompi_mpi2_fortran_copy_attr_function *attr_mpi2_fortran_copy_fn; }; @@ -159,7 +159,7 @@ struct ompi_attribute_keyval_t { properly and error checking */ int attr_flag; /**< flag field: contains "OMPI_KEYVAL_PREDEFINED", "OMPI_KEYVAL_F77" */ - ompi_attribute_fn_ptr_union_t copy_attr_fn; /**< Copy function for the + ompi_attribute_fn_ptr_union_t copy_attr_fn; /**< Copy function for the attribute */ ompi_attribute_fn_ptr_union_t delete_attr_fn; /**< Delete function for the attribute */ @@ -173,14 +173,14 @@ struct ompi_attribute_keyval_t { }; typedef struct ompi_attribute_keyval_t ompi_attribute_keyval_t; - + /* Functions */ -/** - * Convenient way to initialize the attribute hash table per MPI-Object +/** + * Convenient way to initialize the attribute hash table per MPI-Object */ static inline @@ -194,7 +194,7 @@ int ompi_attr_hash_init(opal_hash_table_t **hash) if (OMPI_SUCCESS != opal_hash_table_init(*hash, ATTR_HASH_SIZE)) { return OMPI_ERR_OUT_OF_RESOURCE; } - + return MPI_SUCCESS; } @@ -219,7 +219,7 @@ int ompi_attr_finalize(void); * @param type Type of attribute (COMM/WIN/DTYPE) (IN) * @param copy_attr_fn Union variable containing the function pointer * to be used in order to copy the attribute (IN) - * @param delete_attr_fn Function pointer to be used for deleting the + * @param delete_attr_fn Function pointer to be used for deleting the * attribute (IN) * @param key The newly created key is returned here (OUT) * @param extra_state Extra state to hang off/do some special things (IN) @@ -234,10 +234,10 @@ int ompi_attr_finalize(void); * all MPI* calls will have OMPI_KEYVAL_PREDEFINED set as 0. MPI * implementors who will need to play with the predefined keys and * attributes would call the ompi* functions here and not the MPI* - * functions, with OMPI_KEYVAL_PREDEFINED set to 1. + * functions, with OMPI_KEYVAL_PREDEFINED set to 1. * END OF NOTE * - * NOTE: For the function pointers, you need to create a variable of the + * NOTE: For the function pointers, you need to create a variable of the * union type "ompi_attribute_fn_ptr_union_t" and assign the proper field. * to be passed into this function * END OF NOTE @@ -247,8 +247,8 @@ int ompi_attr_finalize(void); * */ -OMPI_DECLSPEC int ompi_attr_create_keyval(ompi_attribute_type_t type, - ompi_attribute_fn_ptr_union_t copy_attr_fn, +OMPI_DECLSPEC int ompi_attr_create_keyval(ompi_attribute_type_t type, + ompi_attribute_fn_ptr_union_t copy_attr_fn, ompi_attribute_fn_ptr_union_t delete_attr_fn, int *key, void *extra_state, int flags, void *bindings_extra_state); @@ -257,8 +257,8 @@ OMPI_DECLSPEC int ompi_attr_create_keyval(ompi_attribute_type_t type, * Same as ompi_attr_create_keyval, but extra_state is a Fortran default integer. */ -OMPI_DECLSPEC int ompi_attr_create_keyval_fint(ompi_attribute_type_t type, - ompi_attribute_fn_ptr_union_t copy_attr_fn, +OMPI_DECLSPEC int ompi_attr_create_keyval_fint(ompi_attribute_type_t type, + ompi_attribute_fn_ptr_union_t copy_attr_fn, ompi_attribute_fn_ptr_union_t delete_attr_fn, int *key, MPI_Fint extra_state, int flags, void *bindings_extra_state); @@ -267,8 +267,8 @@ OMPI_DECLSPEC int ompi_attr_create_keyval_fint(ompi_attribute_type_t type, * Same as ompi_attr_create_keyval, but extra_state is a Fortran address integer. */ -OMPI_DECLSPEC int ompi_attr_create_keyval_aint(ompi_attribute_type_t type, - ompi_attribute_fn_ptr_union_t copy_attr_fn, +OMPI_DECLSPEC int ompi_attr_create_keyval_aint(ompi_attribute_type_t type, + ompi_attribute_fn_ptr_union_t copy_attr_fn, ompi_attribute_fn_ptr_union_t delete_attr_fn, int *key, MPI_Aint extra_state, int flags, void *bindings_extra_state); @@ -280,7 +280,7 @@ OMPI_DECLSPEC int ompi_attr_create_keyval_aint(ompi_attribute_type_t type, * @return OMPI error code */ -int ompi_attr_free_keyval(ompi_attribute_type_t type, int *key, +int ompi_attr_free_keyval(ompi_attribute_type_t type, int *key, bool predefined); /** @@ -303,12 +303,12 @@ int ompi_attr_free_keyval(ompi_attribute_type_t type, int *key, * (void*) and an enum to indicate which way to translate the final * representation, but that just seemed to make an already complicated * situation more complicated through yet another layer of - * indirection. + * indirection. * * So yes, this is more code, but it's clearer and less error-prone * (read: better) this way. */ -int ompi_attr_set_c(ompi_attribute_type_t type, void *object, +int ompi_attr_set_c(ompi_attribute_type_t type, void *object, opal_hash_table_t **attr_hash, int key, void *attribute, bool predefined); @@ -333,14 +333,14 @@ int ompi_attr_set_c(ompi_attribute_type_t type, void *object, * (void*) and an enum to indicate which way to translate the final * representation, but that just seemed to make an already complicated * situation more complicated through yet another layer of - * indirection. + * indirection. * * So yes, this is more code, but it's clearer and less error-prone * (read: better) this way. */ -OMPI_DECLSPEC int ompi_attr_set_fortran_mpi1(ompi_attribute_type_t type, void *object, +OMPI_DECLSPEC int ompi_attr_set_fortran_mpi1(ompi_attribute_type_t type, void *object, opal_hash_table_t **attr_hash, - int key, MPI_Fint attribute, + int key, MPI_Fint attribute, bool predefined); /** @@ -364,14 +364,14 @@ OMPI_DECLSPEC int ompi_attr_set_fortran_mpi1(ompi_attribute_type_t type, void *o * (void*) and an enum to indicate which way to translate the final * representation, but that just seemed to make an already complicated * situation more complicated through yet another layer of - * indirection. + * indirection. * * So yes, this is more code, but it's clearer and less error-prone * (read: better) this way. */ -OMPI_DECLSPEC int ompi_attr_set_fortran_mpi2(ompi_attribute_type_t type, void *object, +OMPI_DECLSPEC int ompi_attr_set_fortran_mpi2(ompi_attribute_type_t type, void *object, opal_hash_table_t **attr_hash, - int key, MPI_Aint attribute, + int key, MPI_Aint attribute, bool predefined); /** @@ -380,7 +380,7 @@ OMPI_DECLSPEC int ompi_attr_set_fortran_mpi2(ompi_attribute_type_t type, void *o * @param attr_hash The attribute hash table hanging on the object(IN) * @param key Key val for the attribute (IN) * @param attribute The actual attribute pointer (OUT) - * @param flag Flag whether an attribute is associated + * @param flag Flag whether an attribute is associated * with the key (OUT) * @return OMPI error code * @@ -390,13 +390,13 @@ OMPI_DECLSPEC int ompi_attr_set_fortran_mpi2(ompi_attribute_type_t type, void *o * (void*) and an enum to indicate which way to translate the final * representation, but that just seemed to make an already complicated * situation more complicated through yet another layer of - * indirection. + * indirection. * * So yes, this is more code, but it's clearer and less error-prone * (read: better) this way. */ -int ompi_attr_get_c(opal_hash_table_t *attr_hash, int key, +int ompi_attr_get_c(opal_hash_table_t *attr_hash, int key, void **attribute, int *flag); @@ -407,7 +407,7 @@ int ompi_attr_get_c(opal_hash_table_t *attr_hash, int key, * @param attr_hash The attribute hash table hanging on the object(IN) * @param key Key val for the attribute (IN) * @param attribute The actual attribute pointer (OUT) - * @param flag Flag whether an attribute is associated + * @param flag Flag whether an attribute is associated * with the key (OUT) * @return OMPI error code * @@ -417,13 +417,13 @@ int ompi_attr_get_c(opal_hash_table_t *attr_hash, int key, * (void*) and an enum to indicate which way to translate the final * representation, but that just seemed to make an already complicated * situation more complicated through yet another layer of - * indirection. + * indirection. * * So yes, this is more code, but it's clearer and less error-prone * (read: better) this way. */ - OMPI_DECLSPEC int ompi_attr_get_fortran_mpi1(opal_hash_table_t *attr_hash, int key, + OMPI_DECLSPEC int ompi_attr_get_fortran_mpi1(opal_hash_table_t *attr_hash, int key, MPI_Fint *attribute, int *flag); @@ -434,7 +434,7 @@ int ompi_attr_get_c(opal_hash_table_t *attr_hash, int key, * @param attr_hash The attribute hash table hanging on the object(IN) * @param key Key val for the attribute (IN) * @param attribute The actual attribute pointer (OUT) - * @param flag Flag whether an attribute is associated + * @param flag Flag whether an attribute is associated * with the key (OUT) * @return OMPI error code * @@ -444,13 +444,13 @@ int ompi_attr_get_c(opal_hash_table_t *attr_hash, int key, * (void*) and an enum to indicate which way to translate the final * representation, but that just seemed to make an already complicated * situation more complicated through yet another layer of - * indirection. + * indirection. * * So yes, this is more code, but it's clearer and less error-prone * (read: better) this way. */ -OMPI_DECLSPEC int ompi_attr_get_fortran_mpi2(opal_hash_table_t *attr_hash, int key, +OMPI_DECLSPEC int ompi_attr_get_fortran_mpi2(opal_hash_table_t *attr_hash, int key, MPI_Aint *attribute, int *flag); @@ -465,15 +465,15 @@ OMPI_DECLSPEC int ompi_attr_get_fortran_mpi2(opal_hash_table_t *attr_hash, int k * */ -int ompi_attr_delete(ompi_attribute_type_t type, void *object, +int ompi_attr_delete(ompi_attribute_type_t type, void *object, opal_hash_table_t *attr_hash , int key, bool predefined); -/** +/** * This to be used from functions like MPI_*_DUP in order to copy all * the attributes from the old Comm/Win/Dtype object to a new - * object. + * object. * @param type Type of attribute (COMM/WIN/DTYPE) (IN) * @param old_object The old COMM/WIN/DTYPE object (IN) * @param new_object The new COMM/WIN/DTYPE object (IN) @@ -483,12 +483,12 @@ int ompi_attr_delete(ompi_attribute_type_t type, void *object, * */ -int ompi_attr_copy_all(ompi_attribute_type_t type, void *old_object, +int ompi_attr_copy_all(ompi_attribute_type_t type, void *old_object, void *new_object, opal_hash_table_t *oldattr_hash, opal_hash_table_t *newattr_hash); -/** +/** * This to be used to delete all the attributes from the Comm/Win/Dtype * object in one shot * @param type Type of attribute (COMM/WIN/DTYPE) (IN) @@ -498,7 +498,7 @@ int ompi_attr_copy_all(ompi_attribute_type_t type, void *old_object, * */ -int ompi_attr_delete_all(ompi_attribute_type_t type, void *object, +int ompi_attr_delete_all(ompi_attribute_type_t type, void *object, opal_hash_table_t *attr_hash); diff --git a/ompi/attribute/attribute_predefined.c b/ompi/attribute/attribute_predefined.c index c2d3d631f1c..e9cdc1273e7 100644 --- a/ompi/attribute/attribute_predefined.c +++ b/ompi/attribute/attribute_predefined.c @@ -52,7 +52,7 @@ * MPI_APPNUM is set as the result of a GPR subscription. * * MPI_LASTUSEDCODE is set to an initial value and is reset every time - * MPI_ADD_ERROR_CLASS or MPI_ADD_ERROR_CODE is invoked. + * MPI_ADD_ERROR_CLASS or MPI_ADD_ERROR_CODE is invoked. * Its copy function is set to * MPI_COMM_NULL_COPY_FN, meaning that *only* MPI_COMM_WORLD will have * this attribute value. As such, we only have to update @@ -81,9 +81,7 @@ #include "ompi_config.h" -#ifdef HAVE_STDLIB_H #include -#endif #include "mpi.h" diff --git a/ompi/class/Makefile.am b/ompi/class/Makefile.am index 2fe4523d931..7784da8ad69 100644 --- a/ompi/class/Makefile.am +++ b/ompi/class/Makefile.am @@ -6,14 +6,15 @@ # Copyright (c) 2004-2007 The University of Tennessee and The University # of Tennessee Research Foundation. All rights # reserved. -# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, # University of Stuttgart. All rights reserved. # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. +# Copyright (c) 2016 IBM Corporation. All rights reserved. # $COPYRIGHT$ -# +# # Additional copyrights may follow -# +# # $HEADER$ # @@ -22,6 +23,6 @@ headers += \ class/ompi_seq_tracker.h -libmpi_la_SOURCES += \ +lib@OMPI_LIBMPI_NAME@_la_SOURCES += \ class/ompi_seq_tracker.c diff --git a/ompi/class/ompi_seq_tracker.c b/ompi/class/ompi_seq_tracker.c index 25ebf6d9e5e..b021ae18d8c 100644 --- a/ompi/class/ompi_seq_tracker.c +++ b/ompi/class/ompi_seq_tracker.c @@ -5,15 +5,15 @@ * Copyright (c) 2004-2006 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2010 Cisco Systems, Inc. All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -23,20 +23,20 @@ -OBJ_CLASS_INSTANCE(ompi_seq_tracker_range_t, - opal_list_item_t, - NULL, +OBJ_CLASS_INSTANCE(ompi_seq_tracker_range_t, + opal_list_item_t, + NULL, NULL); -static void ompi_seq_tracker_construct(ompi_seq_tracker_t* seq_tracker) { +static void ompi_seq_tracker_construct(ompi_seq_tracker_t* seq_tracker) { OBJ_CONSTRUCT(&seq_tracker->seq_ids, opal_list_t); seq_tracker->seq_ids_current = (ompi_seq_tracker_range_t*)opal_list_get_end(&seq_tracker->seq_ids); } -static void ompi_seq_tracker_destruct(ompi_seq_tracker_t* seq_tracker) -{ +static void ompi_seq_tracker_destruct(ompi_seq_tracker_t* seq_tracker) +{ opal_list_item_t* item; while(NULL != (item = opal_list_remove_first(&seq_tracker->seq_ids))) OBJ_RELEASE(item); @@ -56,27 +56,27 @@ OBJ_CLASS_INSTANCE( */ bool ompi_seq_tracker_check_duplicate( - ompi_seq_tracker_t* seq_tracker, - uint32_t seq_id) -{ + ompi_seq_tracker_t* seq_tracker, + uint32_t seq_id) +{ ompi_seq_tracker_range_t* item; const ompi_seq_tracker_range_t* sentinel = (ompi_seq_tracker_range_t*)opal_list_get_end(&seq_tracker->seq_ids); int8_t direction = 0; /* 1 is next, -1 is previous */ item = seq_tracker->seq_ids_current; - while(true) { - if(sentinel == item) { + while(true) { + if(sentinel == item) { return false; - } else if(item->seq_id_high >= seq_id && item->seq_id_low <= seq_id) { + } else if(item->seq_id_high >= seq_id && item->seq_id_low <= seq_id) { seq_tracker->seq_ids_current = (ompi_seq_tracker_range_t*) item; - return true; - } else if(seq_id > item->seq_id_high && direction != -1) { - direction = 1; - item = (ompi_seq_tracker_range_t*) opal_list_get_next(item); - } else if(seq_id < item->seq_id_low && direction != 1) { + return true; + } else if(seq_id > item->seq_id_high && direction != -1) { + direction = 1; + item = (ompi_seq_tracker_range_t*) opal_list_get_next(item); + } else if(seq_id < item->seq_id_low && direction != 1) { direction = -1; - item = (ompi_seq_tracker_range_t*) opal_list_get_prev(item); - } else { + item = (ompi_seq_tracker_range_t*) opal_list_get_prev(item); + } else { return false; } } @@ -86,19 +86,19 @@ bool ompi_seq_tracker_check_duplicate( /* * insert item into sequence tracking list, * compacts continuous regions into a single entry - * GMS::: Use a free list for the items (don't do OBJ_NEW)! + * GMS::: Use a free list for the items (don't do OBJ_NEW)! */ -void ompi_seq_tracker_insert(ompi_seq_tracker_t* seq_tracker, - uint32_t seq_id) -{ - opal_list_t* seq_ids = &seq_tracker->seq_ids; +void ompi_seq_tracker_insert(ompi_seq_tracker_t* seq_tracker, + uint32_t seq_id) +{ + opal_list_t* seq_ids = &seq_tracker->seq_ids; ompi_seq_tracker_range_t* item = seq_tracker->seq_ids_current; int8_t direction = 0; /* 1 is next, -1 is previous */ ompi_seq_tracker_range_t *new_item, *next_item, *prev_item; const ompi_seq_tracker_range_t* sentinel = (ompi_seq_tracker_range_t*)opal_list_get_end(seq_ids); - while( true ) { - if( item == sentinel ) { + while( true ) { + if( item == sentinel ) { new_item = OBJ_NEW(ompi_seq_tracker_range_t); new_item->seq_id_low = new_item->seq_id_high = seq_id; if( -1 == direction ) { @@ -109,71 +109,71 @@ void ompi_seq_tracker_insert(ompi_seq_tracker_t* seq_tracker, seq_tracker->seq_ids_current = (ompi_seq_tracker_range_t*) new_item; return; - } else if(item->seq_id_high >= seq_id && item->seq_id_low <= seq_id ) { + } else if(item->seq_id_high >= seq_id && item->seq_id_low <= seq_id ) { seq_tracker->seq_ids_current = (ompi_seq_tracker_range_t*) item; - return; + return; + + } else if((item->seq_id_high + 1) == seq_id) { - } else if((item->seq_id_high + 1) == seq_id) { - - next_item = (ompi_seq_tracker_range_t*) opal_list_get_next(item); - /* try to consolidate */ - if( (sentinel != next_item) && next_item->seq_id_low == (seq_id+1)) { + next_item = (ompi_seq_tracker_range_t*) opal_list_get_next(item); + /* try to consolidate */ + if( (sentinel != next_item) && next_item->seq_id_low == (seq_id+1)) { item->seq_id_high = next_item->seq_id_high; opal_list_remove_item(seq_ids, (opal_list_item_t*) next_item); OBJ_RELEASE(next_item); - } else { + } else { item->seq_id_high = seq_id; - } + } seq_tracker->seq_ids_current = (ompi_seq_tracker_range_t*) item; - return; - - } else if((item->seq_id_low - 1) == seq_id) { - + return; + + } else if((item->seq_id_low - 1) == seq_id) { + prev_item = (ompi_seq_tracker_range_t*) opal_list_get_prev(item); /* try to consolidate */ - if( (sentinel != prev_item) && prev_item->seq_id_high == (seq_id-1)) { + if( (sentinel != prev_item) && prev_item->seq_id_high == (seq_id-1)) { item->seq_id_low = prev_item->seq_id_low; opal_list_remove_item(seq_ids, (opal_list_item_t*) prev_item); OBJ_RELEASE(prev_item); - } else { - item->seq_id_low = seq_id; + } else { + item->seq_id_low = seq_id; } seq_tracker->seq_ids_current = (ompi_seq_tracker_range_t*) item; - return; - - } else if(seq_id > item->seq_id_high ) { - if(direction == -1) { - /* we have gone back in the list, and we went one item too far */ + return; + + } else if(seq_id > item->seq_id_high ) { + if(direction == -1) { + /* we have gone back in the list, and we went one item too far */ new_item = OBJ_NEW(ompi_seq_tracker_range_t); - new_item->seq_id_low = new_item->seq_id_high = seq_id; + new_item->seq_id_low = new_item->seq_id_high = seq_id; next_item = (ompi_seq_tracker_range_t*) opal_list_get_next(item); /* insert new_item directly before item */ - opal_list_insert_pos(seq_ids, - (opal_list_item_t*) next_item, - (opal_list_item_t*) new_item); + opal_list_insert_pos(seq_ids, + (opal_list_item_t*) next_item, + (opal_list_item_t*) new_item); seq_tracker->seq_ids_current = (ompi_seq_tracker_range_t*) new_item; return; - } else { + } else { direction = 1; item = (ompi_seq_tracker_range_t*) opal_list_get_next(item); } - } else if(seq_id < item->seq_id_low) { - if(direction == 1) { - /* we have gone forward in the list, and we went one item too far */ - new_item = OBJ_NEW(ompi_seq_tracker_range_t); - new_item->seq_id_low = new_item->seq_id_high = seq_id; - opal_list_insert_pos(seq_ids, - (opal_list_item_t*) item, - (opal_list_item_t*) new_item); - + } else if(seq_id < item->seq_id_low) { + if(direction == 1) { + /* we have gone forward in the list, and we went one item too far */ + new_item = OBJ_NEW(ompi_seq_tracker_range_t); + new_item->seq_id_low = new_item->seq_id_high = seq_id; + opal_list_insert_pos(seq_ids, + (opal_list_item_t*) item, + (opal_list_item_t*) new_item); + seq_tracker->seq_ids_current = (ompi_seq_tracker_range_t*) new_item; return; - } else { + } else { direction = -1; - item = (ompi_seq_tracker_range_t*) opal_list_get_prev(item); + item = (ompi_seq_tracker_range_t*) opal_list_get_prev(item); } - } else { + } else { return; } } diff --git a/ompi/class/ompi_seq_tracker.h b/ompi/class/ompi_seq_tracker.h index bd638967b5a..46f999820a5 100644 --- a/ompi/class/ompi_seq_tracker.h +++ b/ompi/class/ompi_seq_tracker.h @@ -5,14 +5,14 @@ * Copyright (c) 2004-2006 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -25,21 +25,21 @@ BEGIN_C_DECLS -struct ompi_seq_tracker_range_t{ - opal_list_item_t super; +struct ompi_seq_tracker_range_t{ + opal_list_item_t super; uint32_t seq_id_high; uint32_t seq_id_low; -}; +}; typedef struct ompi_seq_tracker_range_t ompi_seq_tracker_range_t; OMPI_DECLSPEC OBJ_CLASS_DECLARATION(ompi_seq_tracker_range_t); -struct ompi_seq_tracker_t{ +struct ompi_seq_tracker_t{ opal_list_t seq_ids; /**< list of seqs id's that have been seen */ ompi_seq_tracker_range_t* seq_ids_current; /**< a pointer to the last place we were in the list */ -}; -typedef struct ompi_seq_tracker_t ompi_seq_tracker_t; +}; +typedef struct ompi_seq_tracker_t ompi_seq_tracker_t; OMPI_DECLSPEC OBJ_CLASS_DECLARATION(ompi_seq_tracker_t); @@ -50,7 +50,7 @@ OMPI_DECLSPEC OBJ_CLASS_DECLARATION(ompi_seq_tracker_t); */ OMPI_DECLSPEC bool ompi_seq_tracker_check_duplicate( - ompi_seq_tracker_t* seq_tracker, + ompi_seq_tracker_t* seq_tracker, uint32_t seq_id); @@ -66,5 +66,5 @@ OMPI_DECLSPEC void ompi_seq_tracker_insert(ompi_seq_tracker_t* seq_tracker, uint OMPI_DECLSPEC void ompi_seq_tracker_copy(ompi_seq_tracker_t* dst, ompi_seq_tracker_t* src); END_C_DECLS -#endif +#endif diff --git a/ompi/communicator/Makefile.am b/ompi/communicator/Makefile.am index 6ff60a4c264..e7f6dc731ee 100644 --- a/ompi/communicator/Makefile.am +++ b/ompi/communicator/Makefile.am @@ -6,7 +6,7 @@ # Copyright (c) 2004-2005 The University of Tennessee and The University # of Tennessee Research Foundation. All rights # reserved. -# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, # University of Stuttgart. All rights reserved. # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. @@ -14,10 +14,11 @@ # reserved. # Copyright (c) 2014 Research Organization for Information Science # and Technology (RIST). All rights reserved. +# Copyright (c) 2016 IBM Corporation. All rights reserved. # $COPYRIGHT$ -# +# # Additional copyrights may follow -# +# # $HEADER$ # @@ -28,7 +29,7 @@ headers += \ communicator/comm_request.h \ communicator/comm_helpers.h -libmpi_la_SOURCES += \ +lib@OMPI_LIBMPI_NAME@_la_SOURCES += \ communicator/comm_init.c \ communicator/comm.c \ communicator/comm_cid.c \ diff --git a/ompi/communicator/comm.c b/ompi/communicator/comm.c index f1e78e078bd..7126ca486b9 100644 --- a/ompi/communicator/comm.c +++ b/ompi/communicator/comm.c @@ -6,7 +6,7 @@ * Copyright (c) 2004-2013 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. @@ -16,15 +16,16 @@ * Copyright (c) 2011-2013 Inria. All rights reserved. * Copyright (c) 2011-2013 Universite Bordeaux 1 * Copyright (c) 2012 Oak Ridge National Labs. All rights reserved. - * Copyright (c) 2012-2014 Los Alamos National Security, LLC. + * Copyright (c) 2012-2016 Los Alamos National Security, LLC. * All rights reserved. - * Copyright (c) 2014 Research Organization for Information Science + * Copyright (c) 2014-2016 Research Organization for Information Science * and Technology (RIST). All rights reserved. - * Copyright (c) 2014 Intel, Inc. All rights reserved. + * Copyright (c) 2014-2015 Intel, Inc. All rights reserved. + * Copyright (c) 2015 Mellanox Technologies. All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -34,8 +35,8 @@ #include "ompi/constants.h" #include "opal/mca/hwloc/base/base.h" -#include "opal/mca/dstore/dstore.h" #include "opal/dss/dss.h" +#include "opal/mca/pmix/pmix.h" #include "ompi/proc/proc.h" #include "opal/threads/mutex.h" @@ -43,7 +44,7 @@ #include "opal/util/output.h" #include "ompi/mca/topo/topo.h" #include "ompi/mca/topo/base/base.h" -#include "ompi/mca/dpm/dpm.h" +#include "ompi/dpm/dpm.h" #include "ompi/attribute/attribute.h" #include "ompi/communicator/communicator.h" @@ -51,13 +52,13 @@ #include "ompi/request/request.h" /* -** sort-function for MPI_Comm_split +** sort-function for MPI_Comm_split */ static int rankkeycompare(const void *, const void *); /** * to fill the rest of the stuff for the communicator - */ + */ static int ompi_comm_fill_rest (ompi_communicator_t *comm, int num_procs, ompi_proc_t **proc_pointers, @@ -75,37 +76,37 @@ typedef int ompi_comm_allgatherfct (void* inbuf, int incount, MPI_Datatype intyp mca_coll_base_module_t *data); static int ompi_comm_allgather_emulate_intra (void* inbuf, int incount, MPI_Datatype intype, - void* outbuf, int outcount, - MPI_Datatype outtype, + void* outbuf, int outcount, + MPI_Datatype outtype, ompi_communicator_t *comm, mca_coll_base_module_t *data); -static int ompi_comm_copy_topo (ompi_communicator_t *oldcomm, +static int ompi_comm_copy_topo (ompi_communicator_t *oldcomm, ompi_communicator_t *newcomm); /* idup with local group and info. the local group support is provided to support ompi_comm_set_nb */ -static int ompi_comm_idup_internal (ompi_communicator_t *comm, ompi_group_t *group, ompi_info_t *info, - ompi_communicator_t **newcomm, ompi_request_t **req); +static int ompi_comm_idup_internal (ompi_communicator_t *comm, ompi_group_t *group, ompi_group_t *remote_group, + ompi_info_t *info, ompi_communicator_t **newcomm, ompi_request_t **req); /**********************************************************************/ /**********************************************************************/ /**********************************************************************/ -/* +/* * This is the function setting all elements of a communicator. * All other routines are just used to determine these elements. - */ + */ -int ompi_comm_set ( ompi_communicator_t **ncomm, +int ompi_comm_set ( ompi_communicator_t **ncomm, ompi_communicator_t *oldcomm, - int local_size, + int local_size, int *local_ranks, int remote_size, int *remote_ranks, opal_hash_table_t *attr, ompi_errhandler_t *errh, - bool copy_topocomponent, - ompi_group_t *local_group, + bool copy_topocomponent, + ompi_group_t *local_group, ompi_group_t *remote_group ) { ompi_request_t *req; @@ -144,6 +145,14 @@ int ompi_comm_set_nb ( ompi_communicator_t **ncomm, ompi_communicator_t *newcomm = NULL; int ret; + if (NULL != local_group) { + local_size = ompi_group_size (local_group); + } + + if ( (NULL != remote_group) && (&ompi_mpi_group_null.group != remote_group) ) { + remote_size = ompi_group_size (remote_group); + } + *req = NULL; /* ompi_comm_allocate */ @@ -156,7 +165,7 @@ int ompi_comm_set_nb ( ompi_communicator_t **ncomm, if (NULL == local_group) { /* determine how the list of local_rank can be stored most efficiently */ - ret = ompi_group_incl(oldcomm->c_local_group, local_size, + ret = ompi_group_incl(oldcomm->c_local_group, local_size, local_ranks, &newcomm->c_local_group); if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) { return ret; @@ -164,14 +173,15 @@ int ompi_comm_set_nb ( ompi_communicator_t **ncomm, } else { newcomm->c_local_group = local_group; OBJ_RETAIN(newcomm->c_local_group); - ompi_group_increment_proc_count(newcomm->c_local_group); } newcomm->c_my_rank = newcomm->c_local_group->grp_my_rank; - + /* Set remote group and duplicate the local comm, if applicable */ - if (0 < remote_size) { - if (NULL == remote_group || &ompi_mpi_group_null.group == remote_group) { - ret = ompi_group_incl(oldcomm->c_remote_group, remote_size, + if ( NULL != remote_group ) { + ompi_communicator_t *old_localcomm; + + if (&ompi_mpi_group_null.group == remote_group) { + ret = ompi_group_incl(oldcomm->c_remote_group, remote_size, remote_ranks, &newcomm->c_remote_group); if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) { return ret; @@ -179,36 +189,31 @@ int ompi_comm_set_nb ( ompi_communicator_t **ncomm, } else { newcomm->c_remote_group = remote_group; OBJ_RETAIN(newcomm->c_remote_group); - ompi_group_increment_proc_count(newcomm->c_remote_group); } - } - if (0 < remote_size || &ompi_mpi_group_null.group == remote_group) { + newcomm->c_flags |= OMPI_COMM_INTER; - if ( OMPI_COMM_IS_INTRA(oldcomm) ) { - ompi_comm_idup(oldcomm, &newcomm->c_local_comm, req); - } else if (NULL == local_group) { - ompi_comm_idup(oldcomm->c_local_comm, &newcomm->c_local_comm, req); - } else { - /* NTH: use internal idup function that takes a local group argument */ - ompi_comm_idup_internal (oldcomm->c_local_comm, local_group, NULL, - &newcomm->c_local_comm, req); - } - } else { + + old_localcomm = OMPI_COMM_IS_INTRA(oldcomm) ? oldcomm : oldcomm->c_local_comm; + + /* NTH: use internal idup function that takes a local group argument */ + ompi_comm_idup_internal (old_localcomm, newcomm->c_local_group, NULL, NULL, + &newcomm->c_local_comm, req); + } else { newcomm->c_remote_group = newcomm->c_local_group; OBJ_RETAIN(newcomm->c_remote_group); } - + /* Check how many different jobids are represented in this communicator. Necessary for the disconnect of dynamic communicators. */ if ( 0 < local_size && (OMPI_COMM_IS_INTRA(newcomm) || 0 error_handler = errh; OBJ_RETAIN ( newcomm->error_handler ); - + /* Set Topology, if required and if available */ if ( copy_topocomponent && (NULL != oldcomm->c_topo) ) { /** @@ -220,20 +225,20 @@ int ompi_comm_set_nb ( ompi_communicator_t **ncomm, return ret; } } - + /* Copy attributes and call according copy functions, if required */ if (NULL != oldcomm->c_keyhash) { if (NULL != attr) { ompi_attr_hash_init(&newcomm->c_keyhash); if (OMPI_SUCCESS != (ret = ompi_attr_copy_all (COMM_ATTR, oldcomm, - newcomm, attr, + newcomm, attr, newcomm->c_keyhash))) { ompi_comm_free(&newcomm); return ret; - } + } } } - + *ncomm = newcomm; return (OMPI_SUCCESS); } @@ -250,9 +255,6 @@ int ompi_comm_group ( ompi_communicator_t* comm, ompi_group_t **group ) /* increment reference counters for the group */ OBJ_RETAIN(comm->c_local_group); - /* increase also the reference counter for the procs */ - ompi_group_increment_proc_count(comm->c_local_group); - *group = comm->c_local_group; return OMPI_SUCCESS; } @@ -263,35 +265,35 @@ int ompi_comm_group ( ompi_communicator_t* comm, ompi_group_t **group ) /* ** Counterpart to MPI_Comm_create. To be used within OMPI. */ -int ompi_comm_create ( ompi_communicator_t *comm, ompi_group_t *group, +int ompi_comm_create ( ompi_communicator_t *comm, ompi_group_t *group, ompi_communicator_t **newcomm ) { ompi_communicator_t *newcomp = NULL; - int rsize , lsize; + int rsize; int mode,i,j; int *allranks=NULL; int *rranks=NULL; int rc = OMPI_SUCCESS; - + ompi_group_t *remote_group = NULL; + /* silence clang warning. newcomm should never be NULL */ if (OPAL_UNLIKELY(NULL == newcomm)) { return OMPI_ERR_BAD_PARAM; } - lsize = group->grp_proc_count; - if ( OMPI_COMM_IS_INTER(comm) ) { int tsize; - + remote_group = &ompi_mpi_group_null.group; + tsize = ompi_comm_remote_size(comm); allranks = (int *) malloc ( tsize * sizeof(int)); if ( NULL == allranks ) { rc = OMPI_ERR_OUT_OF_RESOURCE; goto exit; } - - rc = comm->c_coll.coll_allgather ( &(group->grp_my_rank), - 1, MPI_INT, allranks, + + rc = comm->c_coll.coll_allgather ( &(group->grp_my_rank), + 1, MPI_INT, allranks, 1, MPI_INT, comm, comm->c_coll.coll_allgather_module); if ( OMPI_SUCCESS != rc ) { @@ -325,7 +327,7 @@ int ompi_comm_create ( ompi_communicator_t *comm, ompi_group_t *group, rranks[j] = i; j++; } - } + } mode = OMPI_COMM_CID_INTER; } else { @@ -333,10 +335,10 @@ int ompi_comm_create ( ompi_communicator_t *comm, ompi_group_t *group, rranks = NULL; mode = OMPI_COMM_CID_INTRA; } - + rc = ompi_comm_set ( &newcomp, /* new comm */ comm, /* old comm */ - lsize, /* local_size */ + 0, /* local array size */ NULL, /* local_ranks */ rsize, /* remote_size */ rranks, /* remote_ranks */ @@ -344,8 +346,8 @@ int ompi_comm_create ( ompi_communicator_t *comm, ompi_group_t *group, comm->error_handler, /* error handler */ false, /* dont copy the topo */ group, /* local group */ - NULL /* remote group */ - ); + remote_group); /* remote group */ + if ( OMPI_SUCCESS != rc ) { goto exit; } @@ -356,29 +358,17 @@ int ompi_comm_create ( ompi_communicator_t *comm, ompi_group_t *group, } /* Determine context id. It is identical to f_2_c_handle */ - rc = ompi_comm_nextcid ( newcomp, /* new communicator */ - comm, /* old comm */ - NULL, /* bridge comm */ - NULL, /* local leader */ - NULL, /* remote_leader */ - mode, /* mode */ - -1 ); /* send first */ + rc = ompi_comm_nextcid (newcomp, comm, NULL, NULL, NULL, false, mode); if ( OMPI_SUCCESS != rc ) { goto exit; } /* Set name for debugging purposes */ - snprintf(newcomp->c_name, MPI_MAX_OBJECT_NAME, "MPI COMMUNICATOR %d CREATE FROM %d", + snprintf(newcomp->c_name, MPI_MAX_OBJECT_NAME, "MPI COMMUNICATOR %d CREATE FROM %d", newcomp->c_contextid, comm->c_contextid ); /* Activate the communicator and init coll-component */ - rc = ompi_comm_activate( &newcomp, /* new communicator */ - comm, - NULL, - NULL, - NULL, - mode, - -1 ); + rc = ompi_comm_activate (&newcomp, comm, NULL, NULL, NULL, false, mode); if ( OMPI_SUCCESS != rc ) { goto exit; } @@ -393,7 +383,7 @@ int ompi_comm_create ( ompi_communicator_t *comm, ompi_group_t *group, if ( MPI_UNDEFINED == newcomp->c_local_group->grp_my_rank ) { ompi_comm_free ( &newcomp ); } - + exit: if ( NULL != allranks ) { free ( allranks ); @@ -413,7 +403,7 @@ int ompi_comm_create ( ompi_communicator_t *comm, ompi_group_t *group, /* ** Counterpart to MPI_Comm_split. To be used within OMPI (e.g. MPI_Cart_sub). */ -int ompi_comm_split( ompi_communicator_t* comm, int color, int key, +int ompi_comm_split( ompi_communicator_t* comm, int color, int key, ompi_communicator_t **newcomm, bool pass_on_topo ) { int myinfo[2]; @@ -423,13 +413,13 @@ int ompi_comm_split( ompi_communicator_t* comm, int color, int key, int rsize; int i, loc; int inter; - int *results=NULL, *sorted=NULL; - int *rresults=NULL, *rsorted=NULL; + int *results=NULL, *sorted=NULL; + int *rresults=NULL, *rsorted=NULL; int rc=OMPI_SUCCESS; ompi_communicator_t *newcomp = NULL; int *lranks=NULL, *rranks=NULL; - ompi_group_t * local_group=NULL, * remote_group=NULL; - + ompi_group_t * local_group=NULL, *remote_group=NULL; + ompi_comm_allgatherfct *allgatherfct=NULL; /* Step 1: determine all the information for the local group */ @@ -456,7 +446,7 @@ int ompi_comm_split( ompi_communicator_t* comm, int color, int key, if ( OMPI_SUCCESS != rc ) { goto exit; } - + /* how many have the same color like me */ for ( my_size = 0, i=0; i < size; i++) { if ( results[(2*i)+0] == color) { @@ -475,7 +465,7 @@ int ompi_comm_split( ompi_communicator_t* comm, int color, int key, rc = OMPI_ERR_OUT_OF_RESOURCE; goto exit; } - + /* ok we can now fill this info */ for( loc = 0, i = 0; i < size; i++ ) { if ( results[(2*i)+0] == color) { @@ -484,7 +474,7 @@ int ompi_comm_split( ompi_communicator_t* comm, int color, int key, loc++; } } - + /* the new array needs to be sorted so that it is in 'key' order */ /* if two keys are equal then it is sorted in original rank order! */ if(my_size>1){ @@ -500,7 +490,7 @@ int ompi_comm_split( ompi_communicator_t* comm, int color, int key, for (i = 0; i < my_size; i++) { lranks[i] = sorted[i*2]; } - + /* Step 2: determine all the information for the remote group */ /* --------------------------------------------------------- */ if ( inter ) { @@ -513,7 +503,7 @@ int ompi_comm_split( ompi_communicator_t* comm, int color, int key, } /* this is an allgather on an inter-communicator */ - rc = comm->c_coll.coll_allgather( myinfo, 2, MPI_INT, rresults, 2, + rc = comm->c_coll.coll_allgather( myinfo, 2, MPI_INT, rresults, 2, MPI_INT, comm, comm->c_coll.coll_allgather_module); if ( OMPI_SUCCESS != rc ) { @@ -555,22 +545,23 @@ int ompi_comm_split( ompi_communicator_t* comm, int color, int key, rc = OMPI_ERR_OUT_OF_RESOURCE; goto exit; } - + for (i = 0; i < my_rsize; i++) { rranks[i] = rsorted[i*2]; } } - ompi_group_incl(comm->c_local_group, my_size, lranks, &local_group); - ompi_group_increment_proc_count(local_group); + rc = ompi_group_incl(comm->c_local_group, my_size, lranks, &local_group); + if (OMPI_SUCCESS != rc) { + goto exit; + } mode = OMPI_COMM_CID_INTER; } else { rranks = NULL; mode = OMPI_COMM_CID_INTRA; } - - + /* Step 3: set up the communicator */ /* --------------------------------------------------------- */ /* Create the communicator finally */ @@ -585,8 +576,7 @@ int ompi_comm_split( ompi_communicator_t* comm, int color, int key, comm->error_handler,/* error handler */ pass_on_topo, local_group, /* local group */ - remote_group /* remote group */ - ); + remote_group); /* remote group */ if ( NULL == newcomp ) { rc = MPI_ERR_INTERN; @@ -597,7 +587,6 @@ int ompi_comm_split( ompi_communicator_t* comm, int color, int key, } if ( inter ) { - ompi_group_decrement_proc_count (local_group); OBJ_RELEASE(local_group); if (NULL != newcomp->c_local_comm) { snprintf(newcomp->c_local_comm->c_name, MPI_MAX_OBJECT_NAME, @@ -608,19 +597,13 @@ int ompi_comm_split( ompi_communicator_t* comm, int color, int key, } /* Determine context id. It is identical to f_2_c_handle */ - rc = ompi_comm_nextcid ( newcomp, /* new communicator */ - comm, /* old comm */ - NULL, /* bridge comm */ - NULL, /* local leader */ - NULL, /* remote_leader */ - mode, /* mode */ - -1 ); /* send first, doesn't matter */ + rc = ompi_comm_nextcid (newcomp, comm, NULL, NULL, NULL, false, mode); if ( OMPI_SUCCESS != rc ) { goto exit; } /* Set name for debugging purposes */ - snprintf(newcomp->c_name, MPI_MAX_OBJECT_NAME, "MPI COMMUNICATOR %d SPLIT FROM %d", + snprintf(newcomp->c_name, MPI_MAX_OBJECT_NAME, "MPI COMMUNICATOR %d SPLIT FROM %d", newcomp->c_contextid, comm->c_contextid ); /* set the rank to MPI_UNDEFINED. This prevents in comm_activate @@ -633,36 +616,15 @@ int ompi_comm_split( ompi_communicator_t* comm, int color, int key, /* Activate the communicator and init coll-component */ - rc = ompi_comm_activate( &newcomp, /* new communicator */ - comm, - NULL, - NULL, - NULL, - mode, - -1 ); - if ( OMPI_SUCCESS != rc ) { - goto exit; - } + rc = ompi_comm_activate (&newcomp, comm, NULL, NULL, NULL, false, mode); exit: - if ( NULL != results ) { - free ( results ); - } - if ( NULL != sorted ) { - free ( sorted ); - } - if ( NULL != rresults) { - free ( rresults ); - } - if ( NULL != rsorted ) { - free ( rsorted ); - } - if ( NULL != lranks ) { - free ( lranks ); - } - if ( NULL != rranks ) { - free ( rranks ); - } + free ( results ); + free ( sorted ); + free ( rresults ); + free ( rsorted ); + free ( lranks ); + free ( rranks ); /* Step 4: if we are not part of the comm, free the struct */ /* --------------------------------------------------------- */ @@ -674,514 +636,330 @@ int ompi_comm_split( ompi_communicator_t* comm, int color, int key, } *newcomm = newcomp; - return ( rc ); + return rc; } /**********************************************************************/ /**********************************************************************/ /**********************************************************************/ -int -ompi_comm_split_type(ompi_communicator_t *comm, - int split_type, int key, - ompi_info_t *info, - ompi_communicator_t** newcomm) -{ - int myinfo[2]; - int size, my_size; - int my_rsize; - int mode; - int rsize; - int i, loc, found; - int inter; - int *results=NULL, *sorted=NULL; - int *rresults=NULL, *rsorted=NULL; - int rc=OMPI_SUCCESS; - ompi_communicator_t *newcomp = NULL; - int *lranks=NULL, *rranks=NULL; - - ompi_comm_allgatherfct *allgatherfct=NULL; +/* + * Produces an array of ranks that will be part of the local/remote group in the + * new communicator. The results array will be modified by this call. + */ +static int ompi_comm_split_type_get_part (ompi_group_t *group, const int split_type, int **ranks_out, int *rank_size) { + int size = ompi_group_size (group); + int my_size = 0; + int *ranks; + int ret; - /* silence clang warning. newcomm should never be NULL */ - if (OPAL_UNLIKELY(NULL == newcomm)) { - return OMPI_ERR_BAD_PARAM; + ranks = malloc (size * sizeof (int)); + if (OPAL_UNLIKELY(NULL == ranks)) { + return OMPI_ERR_OUT_OF_RESOURCE; } - /* Step 1: determine all the information for the local group */ - /* --------------------------------------------------------- */ + for (int i = 0 ; i < size ; ++i) { + ompi_proc_t *proc = ompi_group_get_proc_ptr_raw (group, i); + uint16_t locality, *u16ptr; + int include = false; - /* sort according to participation and rank. Gather information from everyone */ - /* allowed splitting types: - CLUSTER - CU - HOST - BOARD - NODE - NUMA - SOCKET - L3CACHE - L2CACHE - L1CACHE - CORE - HWTHREAD - Even though HWTHREAD/CORE etc. is overkill they are here for consistency. - They will most likely return a communicator which is equal to MPI_COMM_SELF - Unless oversubscribing. - */ - myinfo[0] = 0; // default to no type splitting (also if non-recognized split-type) - switch ( split_type ) { - case OMPI_COMM_TYPE_HWTHREAD: - myinfo[0] = 1; break; - case OMPI_COMM_TYPE_CORE: - myinfo[0] = 2; break; - case OMPI_COMM_TYPE_L1CACHE: - myinfo[0] = 3; break; - case OMPI_COMM_TYPE_L2CACHE: - myinfo[0] = 4; break; - case OMPI_COMM_TYPE_L3CACHE: - myinfo[0] = 5; break; - case OMPI_COMM_TYPE_SOCKET: - myinfo[0] = 6; break; - case OMPI_COMM_TYPE_NUMA: - myinfo[0] = 7; break; - //case MPI_COMM_TYPE_SHARED: // the standard implemented type - case OMPI_COMM_TYPE_NODE: - myinfo[0] = 8; break; - case OMPI_COMM_TYPE_BOARD: - myinfo[0] = 9; break; - case OMPI_COMM_TYPE_HOST: - myinfo[0] = 10; break; - case OMPI_COMM_TYPE_CU: - myinfo[0] = 11; break; - case OMPI_COMM_TYPE_CLUSTER: - myinfo[0] = 12; break; - } - myinfo[1] = key; + if (ompi_proc_is_sentinel (proc)) { + opal_process_name_t proc_name = ompi_proc_sentinel_to_name ((uintptr_t) proc); - size = ompi_comm_size ( comm ); - inter = OMPI_COMM_IS_INTER(comm); - if ( inter ) { - allgatherfct = (ompi_comm_allgatherfct *)ompi_comm_allgather_emulate_intra; - } else { - allgatherfct = (ompi_comm_allgatherfct *)comm->c_coll.coll_allgather; - } + if (split_type <= OMPI_COMM_TYPE_HOST) { + /* local ranks should never be represented by sentinel procs. ideally we + * should be able to use OPAL_MODEX_RECV_VALUE_OPTIONAL but it does have + * some overhead. update this to use the optional recv if that is ever fixed. */ + continue; + } - results = (int*) malloc ( 2 * size * sizeof(int)); - if ( NULL == results ) { - return OMPI_ERR_OUT_OF_RESOURCE; - } + u16ptr = &locality; - rc = allgatherfct( myinfo, 2, MPI_INT, results, 2, MPI_INT, comm, comm->c_coll.coll_allgather_module ); - if ( OMPI_SUCCESS != rc ) { - goto exit; - } + OPAL_MODEX_RECV_VALUE(ret, OPAL_PMIX_LOCALITY, &proc_name, &u16ptr, OPAL_UINT16); + if (OPAL_SUCCESS != ret) { + continue; + } + } else { + locality = proc->super.proc_flags; + } - /* check that all processors have been called with the same value */ - for ( i=0; i < size; i++) { - if ( results[2*i] != myinfo[0] ) { - rc = OMPI_ERR_BAD_PARAM; - goto exit; + switch (split_type) { + case OMPI_COMM_TYPE_HWTHREAD: + include = OPAL_PROC_ON_LOCAL_HWTHREAD(locality); + break; + case OMPI_COMM_TYPE_CORE: + include = OPAL_PROC_ON_LOCAL_CORE(locality); + break; + case OMPI_COMM_TYPE_L1CACHE: + include = OPAL_PROC_ON_LOCAL_L1CACHE(locality); + break; + case OMPI_COMM_TYPE_L2CACHE: + include = OPAL_PROC_ON_LOCAL_L2CACHE(locality); + break; + case OMPI_COMM_TYPE_L3CACHE: + include = OPAL_PROC_ON_LOCAL_L3CACHE(locality); + break; + case OMPI_COMM_TYPE_SOCKET: + include = OPAL_PROC_ON_LOCAL_SOCKET(locality); + break; + case OMPI_COMM_TYPE_NUMA: + include = OPAL_PROC_ON_LOCAL_NUMA(locality); + break; + case MPI_COMM_TYPE_SHARED: + include = OPAL_PROC_ON_LOCAL_NODE(locality); + break; + case OMPI_COMM_TYPE_BOARD: + include = OPAL_PROC_ON_LOCAL_BOARD(locality); + break; + case OMPI_COMM_TYPE_HOST: + include = OPAL_PROC_ON_LOCAL_HOST(locality); + break; + case OMPI_COMM_TYPE_CU: + include = OPAL_PROC_ON_LOCAL_CU(locality); + break; + case OMPI_COMM_TYPE_CLUSTER: + include = OPAL_PROC_ON_LOCAL_CLUSTER(locality); + break; } - } - - /* how many are participating and on my node? */ - for ( my_size = 0, i=0; i < size; i++) { - if ( results[2*i] == 1 ) { - if (OPAL_PROC_ON_LOCAL_HWTHREAD(ompi_group_peer_lookup(comm->c_local_group, i)->super.proc_flags)) { - my_size++; - } - } else if ( results[2*i] == 2 ) { - if (OPAL_PROC_ON_LOCAL_CORE(ompi_group_peer_lookup(comm->c_local_group, i)->super.proc_flags)) { - my_size++; - } - } else if ( results[2*i] == 3 ) { - if (OPAL_PROC_ON_LOCAL_L1CACHE(ompi_group_peer_lookup(comm->c_local_group, i)->super.proc_flags)) { - my_size++; - } - } else if ( results[2*i] == 4 ) { - if (OPAL_PROC_ON_LOCAL_L2CACHE(ompi_group_peer_lookup(comm->c_local_group, i)->super.proc_flags)) { - my_size++; - } - } else if ( results[2*i] == 5 ) { - if (OPAL_PROC_ON_LOCAL_L3CACHE(ompi_group_peer_lookup(comm->c_local_group, i)->super.proc_flags)) { - my_size++; - } - } else if ( results[2*i] == 6 ) { - if (OPAL_PROC_ON_LOCAL_SOCKET(ompi_group_peer_lookup(comm->c_local_group, i)->super.proc_flags)) { - my_size++; - } - } else if ( results[2*i] == 7 ) { - if (OPAL_PROC_ON_LOCAL_NUMA(ompi_group_peer_lookup(comm->c_local_group, i)->super.proc_flags)) { - my_size++; - } - } else if ( results[2*i] == 8 ) { - if (OPAL_PROC_ON_LOCAL_NODE(ompi_group_peer_lookup(comm->c_local_group, i)->super.proc_flags)) { - my_size++; - } - } else if ( results[2*i] == 9 ) { - if (OPAL_PROC_ON_LOCAL_BOARD(ompi_group_peer_lookup(comm->c_local_group, i)->super.proc_flags)) { - my_size++; - } - } else if ( results[2*i] == 10 ) { - if (OPAL_PROC_ON_LOCAL_HOST(ompi_group_peer_lookup(comm->c_local_group, i)->super.proc_flags)) { - my_size++; - } - } else if ( results[2*i] == 11 ) { - if (OPAL_PROC_ON_LOCAL_CU(ompi_group_peer_lookup(comm->c_local_group, i)->super.proc_flags)) { - my_size++; - } - } else if ( results[2*i] == 12 ) { - if (OPAL_PROC_ON_LOCAL_CLUSTER(ompi_group_peer_lookup(comm->c_local_group, i)->super.proc_flags)) { - my_size++; - } + + if (include) { + ranks[my_size++] = i; } } - /* silence a clang warning about a 0-byte malloc. my_size can not be 0 here */ + *rank_size = my_size; + + /* silence a clang warning about a 0-byte malloc. my_size will never be 0 here */ if (OPAL_UNLIKELY(0 == my_size)) { - rc = OMPI_ERR_BAD_PARAM; - goto exit; + free (ranks); + return OMPI_SUCCESS; } - sorted = (int *) malloc ( sizeof( int ) * my_size * 2); - if ( NULL == sorted) { - rc = OMPI_ERR_OUT_OF_RESOURCE; - goto exit; + /* shrink the rank array */ + int *tmp = realloc (ranks, my_size * sizeof (int)); + if (OPAL_LIKELY(NULL != tmp)) { + ranks = tmp; } - - /* ok we can now fill this info */ - for( loc = 0, i = 0; i < size; i++ ) { - found = 0; - if ( results[2*i] == 1 ) { - if (OPAL_PROC_ON_LOCAL_HWTHREAD(ompi_group_peer_lookup(comm->c_local_group, i)->super.proc_flags)) { - found = 1; - } - } else if ( results[2*i] == 2 ) { - if (OPAL_PROC_ON_LOCAL_CORE(ompi_group_peer_lookup(comm->c_local_group, i)->super.proc_flags)) { - found = 1; - } - } else if ( results[2*i] == 3 ) { - if (OPAL_PROC_ON_LOCAL_L1CACHE(ompi_group_peer_lookup(comm->c_local_group, i)->super.proc_flags)) { - found = 1; - } - } else if ( results[2*i] == 4 ) { - if (OPAL_PROC_ON_LOCAL_L2CACHE(ompi_group_peer_lookup(comm->c_local_group, i)->super.proc_flags)) { - found = 1; - } - } else if ( results[2*i] == 5 ) { - if (OPAL_PROC_ON_LOCAL_L3CACHE(ompi_group_peer_lookup(comm->c_local_group, i)->super.proc_flags)) { - found = 1; - } - } else if ( results[2*i] == 6 ) { - if (OPAL_PROC_ON_LOCAL_SOCKET(ompi_group_peer_lookup(comm->c_local_group, i)->super.proc_flags)) { - found = 1; - } - } else if ( results[2*i] == 7 ) { - if (OPAL_PROC_ON_LOCAL_NUMA(ompi_group_peer_lookup(comm->c_local_group, i)->super.proc_flags)) { - found = 1; - } - } else if ( results[2*i] == 8 ) { - if (OPAL_PROC_ON_LOCAL_NODE(ompi_group_peer_lookup(comm->c_local_group, i)->super.proc_flags)) { - found = 1; - } - } else if ( results[2*i] == 9 ) { - if (OPAL_PROC_ON_LOCAL_BOARD(ompi_group_peer_lookup(comm->c_local_group, i)->super.proc_flags)) { - found = 1; - } - } else if ( results[2*i] == 10 ) { - if (OPAL_PROC_ON_LOCAL_HOST(ompi_group_peer_lookup(comm->c_local_group, i)->super.proc_flags)) { - found = 1; - } - } else if ( results[2*i] == 11 ) { - if (OPAL_PROC_ON_LOCAL_CU(ompi_group_peer_lookup(comm->c_local_group, i)->super.proc_flags)) { - found = 1; - } - } else if ( results[2*i] == 12 ) { - if (OPAL_PROC_ON_LOCAL_CLUSTER(ompi_group_peer_lookup(comm->c_local_group, i)->super.proc_flags)) { - found = 1; - } - } - /* we have found and occupied the index (i) */ - if ( found == 1 ) { - sorted[2*loc ] = i; /* copy org rank */ - sorted[2*loc+1] = results[2*i+1]; /* copy key */ - loc++; - } - } - - /* the new array needs to be sorted so that it is in 'key' order */ - /* if two keys are equal then it is sorted in original rank order! */ - if(my_size>1){ - qsort ((int*)sorted, my_size, sizeof(int)*2, rankkeycompare); + *ranks_out = ranks; + + return OMPI_SUCCESS; +} + +static int ompi_comm_split_verify (ompi_communicator_t *comm, int split_type, int key, bool *need_split) +{ + int rank = ompi_comm_rank (comm); + int size = ompi_comm_size (comm); + int *results; + int rc; + + if (*need_split) { + return OMPI_SUCCESS; } - /* put group elements in a list */ - lranks = (int *) malloc ( my_size * sizeof(int)); - if ( NULL == lranks ) { - rc = OMPI_ERR_OUT_OF_RESOURCE; - goto exit; + results = malloc (2 * sizeof (int) * size); + if (OPAL_UNLIKELY(NULL == results)) { + return OMPI_ERR_OUT_OF_RESOURCE; } - for (i = 0; i < my_size; i++) { - lranks[i] = sorted[i*2]; + + *need_split = false; + + results[rank * 2] = split_type; + results[rank * 2 + 1] = key; + + rc = comm->c_coll.coll_allgather (MPI_IN_PLACE, 2, MPI_INT, results, 2, MPI_INT, comm, + comm->c_coll.coll_allgather_module); + if (OMPI_SUCCESS != rc) { + free (results); + return rc; } - - /* Step 2: determine all the information for the remote group */ - /* --------------------------------------------------------- */ - if ( inter ) { - rsize = comm->c_remote_group->grp_proc_count; - rresults = (int *) malloc ( rsize * 2 * sizeof(int)); - if ( NULL == rresults ) { - rc = OMPI_ERR_OUT_OF_RESOURCE; - goto exit; + + for (int i = 0 ; i < size ; ++i) { + if (MPI_UNDEFINED == results[i * 2] || (i > 1 && results[i * 2 + 1] < results[i * 2 - 1])) { + *need_split = true; + break; } + } - /* this is an allgather on an inter-communicator */ - rc = comm->c_coll.coll_allgather( myinfo, 2, MPI_INT, rresults, 2, - MPI_INT, comm, + free (results); + + return OMPI_SUCCESS; +} + +int ompi_comm_split_type (ompi_communicator_t *comm, int split_type, int key, + ompi_info_t *info, ompi_communicator_t **newcomm) +{ + bool need_split = false, no_reorder = false, no_undefined = false; + ompi_communicator_t *newcomp = MPI_COMM_NULL; + int my_size, my_rsize = 0, mode, inter; + int *lranks = NULL, *rranks = NULL; + int global_split_type, ok, tmp[4]; + int rc; + + /* silence clang warning. newcomm should never be NULL */ + if (OPAL_UNLIKELY(NULL == newcomm)) { + return OMPI_ERR_BAD_PARAM; + } + + inter = OMPI_COMM_IS_INTER(comm); + + /* Step 1: verify all ranks have supplied the same value for split type. All split types + * must be the same or MPI_UNDEFINED (which is negative). */ + tmp[0] = split_type; + tmp[1] = -split_type; + tmp[2] = key; + tmp[3] = -key; + + rc = comm->c_coll.coll_allreduce (MPI_IN_PLACE, &tmp, 4, MPI_INT, MPI_MAX, comm, + comm->c_coll.coll_allreduce_module); + if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) { + return rc; + } + + global_split_type = tmp[0]; + + if (tmp[0] != -tmp[1] || inter) { + /* at least one rank supplied a different split type check if our split_type is ok */ + ok = (MPI_UNDEFINED == split_type) || global_split_type == split_type; + + rc = comm->c_coll.coll_allreduce (MPI_IN_PLACE, &ok, 1, MPI_INT, MPI_MIN, comm, comm->c_coll.coll_allgather_module); - if ( OMPI_SUCCESS != rc ) { - goto exit; + if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) { + return rc; } - /* how many are participating and on my node? */ - for ( my_rsize = 0, i=0; i < rsize; i++) { - if ( rresults[2*i] == 1 ) { - if (OPAL_PROC_ON_LOCAL_HWTHREAD(ompi_group_peer_lookup(comm->c_remote_group, i)->super.proc_flags)) { - my_rsize++; - } - } else if ( rresults[2*i] == 2 ) { - if (OPAL_PROC_ON_LOCAL_CORE(ompi_group_peer_lookup(comm->c_remote_group, i)->super.proc_flags)) { - my_rsize++; - } - } else if ( rresults[2*i] == 3 ) { - if (OPAL_PROC_ON_LOCAL_L1CACHE(ompi_group_peer_lookup(comm->c_remote_group, i)->super.proc_flags)) { - my_rsize++; - } - } else if ( rresults[2*i] == 4 ) { - if (OPAL_PROC_ON_LOCAL_L2CACHE(ompi_group_peer_lookup(comm->c_remote_group, i)->super.proc_flags)) { - my_rsize++; - } - } else if ( rresults[2*i] == 5 ) { - if (OPAL_PROC_ON_LOCAL_L3CACHE(ompi_group_peer_lookup(comm->c_remote_group, i)->super.proc_flags)) { - my_rsize++; - } - } else if ( rresults[2*i] == 6 ) { - if (OPAL_PROC_ON_LOCAL_SOCKET(ompi_group_peer_lookup(comm->c_remote_group, i)->super.proc_flags)) { - my_rsize++; - } - } else if ( rresults[2*i] == 7 ) { - if (OPAL_PROC_ON_LOCAL_NUMA(ompi_group_peer_lookup(comm->c_remote_group, i)->super.proc_flags)) { - my_rsize++; - } - } else if ( rresults[2*i] == 8 ) { - if (OPAL_PROC_ON_LOCAL_NODE(ompi_group_peer_lookup(comm->c_remote_group, i)->super.proc_flags)) { - my_rsize++; - } - } else if ( rresults[2*i] == 9 ) { - if (OPAL_PROC_ON_LOCAL_BOARD(ompi_group_peer_lookup(comm->c_remote_group, i)->super.proc_flags)) { - my_rsize++; - } - } else if ( rresults[2*i] == 10 ) { - if (OPAL_PROC_ON_LOCAL_HOST(ompi_group_peer_lookup(comm->c_remote_group, i)->super.proc_flags)) { - my_rsize++; - } - } else if ( rresults[2*i] == 11 ) { - if (OPAL_PROC_ON_LOCAL_CU(ompi_group_peer_lookup(comm->c_remote_group, i)->super.proc_flags)) { - my_rsize++; - } - } else if ( rresults[2*i] == 12 ) { - if (OPAL_PROC_ON_LOCAL_CLUSTER(ompi_group_peer_lookup(comm->c_remote_group, i)->super.proc_flags)) { - my_rsize++; - } + if (inter) { + /* need an extra allreduce to ensure that all ranks have the same result */ + rc = comm->c_coll.coll_allreduce (MPI_IN_PLACE, &ok, 1, MPI_INT, MPI_MIN, comm, + comm->c_coll.coll_allgather_module); + if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) { + return rc; } } - if (my_rsize > 0) { - rsorted = (int *) malloc ( sizeof( int ) * my_rsize * 2); - if ( NULL == rsorted) { - rc = OMPI_ERR_OUT_OF_RESOURCE; - goto exit; - } - - /* ok we can now fill this info */ - for( loc = 0, i = 0; i < rsize; i++ ) { - found = 0; - if ( rresults[2*i] == 1 ) { - if (OPAL_PROC_ON_LOCAL_HWTHREAD(ompi_group_peer_lookup(comm->c_remote_group, i)->super.proc_flags)) { - found = 1; - } - } else if ( rresults[2*i] == 2 ) { - if (OPAL_PROC_ON_LOCAL_CORE(ompi_group_peer_lookup(comm->c_remote_group, i)->super.proc_flags)) { - found = 1; - } - } else if ( rresults[2*i] == 3 ) { - if (OPAL_PROC_ON_LOCAL_L1CACHE(ompi_group_peer_lookup(comm->c_remote_group, i)->super.proc_flags)) { - found = 1; - } - } else if ( rresults[2*i] == 4 ) { - if (OPAL_PROC_ON_LOCAL_L2CACHE(ompi_group_peer_lookup(comm->c_remote_group, i)->super.proc_flags)) { - found = 1; - } - } else if ( rresults[2*i] == 5 ) { - if (OPAL_PROC_ON_LOCAL_L3CACHE(ompi_group_peer_lookup(comm->c_remote_group, i)->super.proc_flags)) { - found = 1; - } - } else if ( rresults[2*i] == 6 ) { - if (OPAL_PROC_ON_LOCAL_SOCKET(ompi_group_peer_lookup(comm->c_remote_group, i)->super.proc_flags)) { - found = 1; - } - } else if ( rresults[2*i] == 7 ) { - if (OPAL_PROC_ON_LOCAL_NUMA(ompi_group_peer_lookup(comm->c_remote_group, i)->super.proc_flags)) { - found = 1; - } - } else if ( rresults[2*i] == 8 ) { - if (OPAL_PROC_ON_LOCAL_NODE(ompi_group_peer_lookup(comm->c_remote_group, i)->super.proc_flags)) { - found = 1; - } - } else if ( rresults[2*i] == 9 ) { - if (OPAL_PROC_ON_LOCAL_BOARD(ompi_group_peer_lookup(comm->c_remote_group, i)->super.proc_flags)) { - found = 1; - } - } else if ( rresults[2*i] == 10 ) { - if (OPAL_PROC_ON_LOCAL_HOST(ompi_group_peer_lookup(comm->c_remote_group, i)->super.proc_flags)) { - found = 1; - } - } else if ( rresults[2*i] == 11 ) { - if (OPAL_PROC_ON_LOCAL_CU(ompi_group_peer_lookup(comm->c_remote_group, i)->super.proc_flags)) { - found = 1; - } - } else if ( rresults[2*i] == 12 ) { - if (OPAL_PROC_ON_LOCAL_CLUSTER(ompi_group_peer_lookup(comm->c_remote_group, i)->super.proc_flags)) { - found = 1; - } - } - - if ( found == 1 ) { - rsorted[2*loc ] = i; /* org rank */ - rsorted[2*loc+1] = rresults[2*i+1]; /* key */ - loc++; - } - } - - /* the new array needs to be sorted so that it is in 'key' order */ - /* if two keys are equal then it is sorted in original rank order! */ - if(my_rsize > 1) { - qsort ((int*)rsorted, my_rsize, sizeof(int)*2, rankkeycompare); - } - - /* put group elements in a list */ - rranks = (int *) malloc ( my_rsize * sizeof(int)); - if ( NULL == rranks) { - rc = OMPI_ERR_OUT_OF_RESOURCE; - goto exit; - } - - for (i = 0; i < my_rsize; i++) { - rranks[i] = rsorted[i*2]; - } + if (OPAL_UNLIKELY(!ok)) { + return OMPI_ERR_BAD_PARAM; } - mode = OMPI_COMM_CID_INTER; + need_split = tmp[0] == -tmp[1]; } else { - my_rsize = 0; - rranks = NULL; - mode = OMPI_COMM_CID_INTRA; + /* intracommunicator and all ranks specified the same split type */ + no_undefined = true; + /* check if all ranks specified the same key */ + no_reorder = tmp[2] == -tmp[3]; } - - - /* Step 3: set up the communicator */ + + if (MPI_UNDEFINED == global_split_type) { + /* short-circut. every rank provided MPI_UNDEFINED */ + *newcomm = MPI_COMM_NULL; + return OMPI_SUCCESS; + } + + /* Step 2: Build potential communicator groups. If any ranks will not be part of + * the ultimate communicator we will drop them later. This saves doing an extra + * allgather on the whole communicator. By using ompi_comm_split() later only + * if needed we 1) optimized the common case (no MPI_UNDEFINED and no reorder), + * and 2) limit the allgather to a smaller set of peers in the uncommon case. */ /* --------------------------------------------------------- */ - /* Create the communicator finally */ - rc = ompi_comm_set ( &newcomp, /* new comm */ - comm, /* old comm */ - my_size, /* local_size */ - lranks, /* local_ranks */ - my_rsize, /* remote_size */ - rranks, /* remote_ranks */ - NULL, /* attrs */ - comm->error_handler,/* error handler */ - false, /* don't copy the topo */ - NULL, /* local group */ - NULL ); /* remote group */ + /* allowed splitting types: + CLUSTER + CU + HOST + BOARD + NODE + NUMA + SOCKET + L3CACHE + L2CACHE + L1CACHE + CORE + HWTHREAD + Even though HWTHREAD/CORE etc. is overkill they are here for consistency. + They will most likely return a communicator which is equal to MPI_COMM_SELF + Unless oversubscribing. + */ - if ( NULL == newcomm ) { - rc = MPI_ERR_INTERN; - goto exit; - } - if ( OMPI_SUCCESS != rc ) { - goto exit; + /* how many ranks are potentially participating and on my node? */ + rc = ompi_comm_split_type_get_part (comm->c_local_group, global_split_type, &lranks, &my_size); + if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) { + return rc; } - /* Determine context id. It is identical to f_2_c_handle */ - rc = ompi_comm_nextcid ( newcomp, /* new communicator */ - comm, /* old comm */ - NULL, /* bridge comm */ - NULL, /* local leader */ - NULL, /* remote_leader */ - mode, /* mode */ - -1 ); /* send first, doesn't matter */ - if ( OMPI_SUCCESS != rc ) { - goto exit; + /* Step 3: determine all the information for the remote group */ + /* --------------------------------------------------------- */ + if (inter) { + rc = ompi_comm_split_type_get_part (comm->c_remote_group, global_split_type, &rranks, &my_rsize); + if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) { + free (lranks); + return rc; + } } - /* Set name for debugging purposes */ - snprintf(newcomp->c_name, MPI_MAX_OBJECT_NAME, "MPI COMMUNICATOR %d SPLIT_TYPE FROM %d", - newcomp->c_contextid, comm->c_contextid ); + /* set the CID allgather mode to the appropriate one for the communicator */ + mode = inter ? OMPI_COMM_CID_INTER : OMPI_COMM_CID_INTRA; - /* set the rank to MPI_UNDEFINED. This prevents in comm_activate - * the collective module selection for a communicator that will - * be freed anyway. - */ - if ( MPI_UNDEFINED == split_type ) { - newcomp->c_local_group->grp_my_rank = MPI_UNDEFINED; - } + /* Step 4: set up the communicator */ + /* --------------------------------------------------------- */ + /* Create the communicator finally */ + do { + rc = ompi_comm_set (&newcomp, comm, my_size, lranks, my_rsize, + rranks, NULL, comm->error_handler, false, + NULL, NULL); + if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) { + break; + } - /* Activate the communicator and init coll-component */ - rc = ompi_comm_activate( &newcomp, /* new communicator */ - comm, - NULL, - NULL, - NULL, - mode, - -1 ); - if ( OMPI_SUCCESS != rc ) { - goto exit; - } + /* Determine context id. It is identical to f_2_c_handle */ + rc = ompi_comm_nextcid (newcomp, comm, NULL, NULL, NULL, false, mode); + if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) { + break; + } - exit: - if ( NULL != results ) { - free ( results ); - } - if ( NULL != sorted ) { - free ( sorted ); - } - if ( NULL != rresults) { - free ( rresults ); - } - if ( NULL != rsorted ) { - free ( rsorted ); - } - if ( NULL != lranks ) { - free ( lranks ); - } - if ( NULL != rranks ) { - free ( rranks ); - } + /* Activate the communicator and init coll-component */ + rc = ompi_comm_activate (&newcomp, comm, NULL, NULL, NULL, false, mode); + if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) { + break; + } - /* Step 4: if we are not part of the comm, free the struct */ - /* --------------------------------------------------------- */ - if ( NULL != newcomp && MPI_UNDEFINED == split_type ) { - ompi_comm_free ( &newcomp ); - } + /* Step 5: Check if we need to remove or reorder ranks in the communicator */ + if (!(no_reorder && no_undefined)) { + rc = ompi_comm_split_verify (newcomp, split_type, key, &need_split); - *newcomm = newcomp; - return ( rc ); -} + if (inter) { + /* verify that no local ranks need to be removed or reordered */ + rc = ompi_comm_split_verify (newcomp->c_local_comm, split_type, key, &need_split); + } + } + + if (!need_split) { + /* common case. no reordering and no MPI_UNDEFINED */ + *newcomm = newcomp; + /* Set name for debugging purposes */ + snprintf(newcomp->c_name, MPI_MAX_OBJECT_NAME, "MPI COMMUNICATOR %d SPLIT_TYPE FROM %d", + newcomp->c_contextid, comm->c_contextid ); + break; + } + + /* TODO: there probably is better way to handle this case without throwing away the + * intermediate communicator. */ + rc = ompi_comm_split (newcomp, split_type, key, newcomm, false); + /* get rid of the intermediate communicator */ + ompi_comm_free (&newcomp); + } while (0); + if (OPAL_UNLIKELY(OMPI_SUCCESS != rc && MPI_COMM_NULL != newcomp)) { + ompi_comm_free (&newcomp); + *newcomm = MPI_COMM_NULL; + } + + free (lranks); + free (rranks); + + return rc; +} /**********************************************************************/ /**********************************************************************/ @@ -1197,71 +975,64 @@ int ompi_comm_dup ( ompi_communicator_t * comm, ompi_communicator_t **newcomm ) int ompi_comm_dup_with_info ( ompi_communicator_t * comm, ompi_info_t *info, ompi_communicator_t **newcomm ) { ompi_communicator_t *newcomp = NULL; - int rsize = 0, mode = OMPI_COMM_CID_INTRA, rc = OMPI_SUCCESS; + ompi_group_t *remote_group = NULL; + int mode = OMPI_COMM_CID_INTRA, rc = OMPI_SUCCESS; if ( OMPI_COMM_IS_INTER ( comm ) ){ - rsize = comm->c_remote_group->grp_proc_count; mode = OMPI_COMM_CID_INTER; + remote_group = comm->c_remote_group; } - + *newcomm = MPI_COMM_NULL; rc = ompi_comm_set ( &newcomp, /* new comm */ comm, /* old comm */ - comm->c_local_group->grp_proc_count, /* local_size */ + 0, /* local array size */ NULL, /* local_procs*/ - rsize, /* remote_size */ + 0, /* remote array size */ NULL, /* remote_procs */ comm->c_keyhash, /* attrs */ comm->error_handler, /* error handler */ true, /* copy the topo */ comm->c_local_group, /* local group */ - comm->c_remote_group ); /* remote group */ + remote_group ); /* remote group */ if ( NULL == newcomp ) { rc = MPI_ERR_INTERN; return rc; } - if ( MPI_SUCCESS != rc) { + if ( MPI_SUCCESS != rc) { return rc; } /* Determine context id. It is identical to f_2_c_handle */ - rc = ompi_comm_nextcid ( newcomp, /* new communicator */ - comm, /* old comm */ - NULL, /* bridge comm */ - NULL, /* local leader */ - NULL, /* remote_leader */ - mode, /* mode */ - -1 ); /* send_first */ + rc = ompi_comm_nextcid (newcomp, comm, NULL, NULL, NULL, false, mode); if ( OMPI_SUCCESS != rc ) { return rc; } /* Set name for debugging purposes */ - snprintf(newcomp->c_name, MPI_MAX_OBJECT_NAME, "MPI COMMUNICATOR %d DUP FROM %d", + snprintf(newcomp->c_name, MPI_MAX_OBJECT_NAME, "MPI COMMUNICATOR %d DUP FROM %d", newcomp->c_contextid, comm->c_contextid ); /* activate communicator and init coll-module */ - rc = ompi_comm_activate( &newcomp, /* new communicator */ - comm, - NULL, - NULL, - NULL, - mode, - -1 ); + rc = ompi_comm_activate (&newcomp, comm, NULL, NULL, NULL, false, mode); if ( OMPI_SUCCESS != rc ) { return rc; } - + *newcomm = newcomp; return MPI_SUCCESS; } -struct ompi_comm_idup_with_info_context { +struct ompi_comm_idup_with_info_context_t { + opal_object_t super; ompi_communicator_t *comm; ompi_communicator_t *newcomp; }; +typedef struct ompi_comm_idup_with_info_context_t ompi_comm_idup_with_info_context_t; +OBJ_CLASS_INSTANCE(ompi_comm_idup_with_info_context_t, opal_object_t, NULL, NULL); + static int ompi_comm_idup_with_info_activate (ompi_comm_request_t *request); static int ompi_comm_idup_with_info_finish (ompi_comm_request_t *request); static int ompi_comm_idup_getcid (ompi_comm_request_t *request); @@ -1273,26 +1044,30 @@ int ompi_comm_idup (ompi_communicator_t *comm, ompi_communicator_t **newcomm, om int ompi_comm_idup_with_info (ompi_communicator_t *comm, ompi_info_t *info, ompi_communicator_t **newcomm, ompi_request_t **req) { - return ompi_comm_idup_internal (comm, comm->c_local_group, info, newcomm, req); + return ompi_comm_idup_internal (comm, comm->c_local_group, comm->c_remote_group, info, newcomm, req); } /* NTH: we need a way to idup with a smaller local group so this function takes a local group */ -static int ompi_comm_idup_internal (ompi_communicator_t *comm, ompi_group_t *group, ompi_info_t *info, - ompi_communicator_t **newcomm, ompi_request_t **req) +static int ompi_comm_idup_internal (ompi_communicator_t *comm, ompi_group_t *group, ompi_group_t *remote_group, + ompi_info_t *info, ompi_communicator_t **newcomm, ompi_request_t **req) { - struct ompi_comm_idup_with_info_context *context; + ompi_comm_idup_with_info_context_t *context; ompi_comm_request_t *request; ompi_request_t *subreq[1]; - int rsize = 0, rc; + int rc; *newcomm = MPI_COMM_NULL; + if (!OMPI_COMM_IS_INTER (comm)){ + remote_group = NULL; + } + request = ompi_comm_request_get (); if (NULL == request) { return OMPI_ERR_OUT_OF_RESOURCE; } - context = calloc (1, sizeof (*context)); + context = OBJ_NEW(ompi_comm_idup_with_info_context_t); if (NULL == context) { ompi_comm_request_return (request); return OMPI_ERR_OUT_OF_RESOURCE; @@ -1300,19 +1075,19 @@ static int ompi_comm_idup_internal (ompi_communicator_t *comm, ompi_group_t *gro context->comm = comm; - request->context = context; + request->context = &context->super; rc = ompi_comm_set_nb (&context->newcomp, /* new comm */ comm, /* old comm */ - group->grp_proc_count, /* local_size */ + 0, /* local array size */ NULL, /* local_procs */ - rsize, /* remote_size */ + 0, /* remote array size */ NULL, /* remote_procs */ comm->c_keyhash, /* attrs */ comm->error_handler, /* error handler */ true, /* copy the topo */ group, /* local group */ - comm->c_remote_group, /* remote group */ + remote_group, /* remote group */ subreq); /* new subrequest */ if (NULL == context->newcomp) { ompi_comm_request_return (request); @@ -1333,8 +1108,8 @@ static int ompi_comm_idup_internal (ompi_communicator_t *comm, ompi_group_t *gro static int ompi_comm_idup_getcid (ompi_comm_request_t *request) { - struct ompi_comm_idup_with_info_context *context = - (struct ompi_comm_idup_with_info_context *) request->context; + ompi_comm_idup_with_info_context_t *context = + (ompi_comm_idup_with_info_context_t *) request->context; ompi_request_t *subreq[1]; int rc, mode; @@ -1345,11 +1120,8 @@ static int ompi_comm_idup_getcid (ompi_comm_request_t *request) } /* Determine context id. It is identical to f_2_c_handle */ - rc = ompi_comm_nextcid_nb (context->newcomp, /* new communicator */ - context->comm, /* old comm */ - NULL, /* bridge comm */ - mode, /* mode */ - subreq); /* new subrequest */ + rc = ompi_comm_nextcid_nb (context->newcomp, context->comm, NULL, NULL, + NULL, false, mode, subreq); if (OMPI_SUCCESS != rc) { ompi_comm_request_return (request); return rc; @@ -1362,8 +1134,8 @@ static int ompi_comm_idup_getcid (ompi_comm_request_t *request) static int ompi_comm_idup_with_info_activate (ompi_comm_request_t *request) { - struct ompi_comm_idup_with_info_context *context = - (struct ompi_comm_idup_with_info_context *) request->context; + ompi_comm_idup_with_info_context_t *context = + (ompi_comm_idup_with_info_context_t *) request->context; ompi_request_t *subreq[1]; int rc, mode; @@ -1378,7 +1150,7 @@ static int ompi_comm_idup_with_info_activate (ompi_comm_request_t *request) context->newcomp->c_contextid, context->comm->c_contextid ); /* activate communicator and init coll-module */ - rc = ompi_comm_activate_nb (&context->newcomp, context->comm, NULL, mode, subreq); + rc = ompi_comm_activate_nb (&context->newcomp, context->comm, NULL, NULL, NULL, false, mode, subreq); if ( OMPI_SUCCESS != rc ) { return rc; } @@ -1424,13 +1196,7 @@ int ompi_comm_create_group (ompi_communicator_t *comm, ompi_group_t *group, int } /* Determine context id. It is identical to f_2_c_handle */ - rc = ompi_comm_nextcid ( newcomp, /* new communicator */ - comm, /* old comm */ - newcomp, /* bridge comm (used to pass the group into the group allreduce) */ - &tag, /* user defined tag */ - NULL, /* remote_leader */ - mode, /* mode */ - -1 ); /* send_first */ + rc = ompi_comm_nextcid (newcomp, comm, NULL, &tag, NULL, false, mode); if ( OMPI_SUCCESS != rc ) { return rc; } @@ -1440,13 +1206,7 @@ int ompi_comm_create_group (ompi_communicator_t *comm, ompi_group_t *group, int newcomp->c_contextid, comm->c_contextid ); /* activate communicator and init coll-module */ - rc = ompi_comm_activate( &newcomp, /* new communicator */ - comm, - newcomp, - &tag, - NULL, - mode, - -1 ); + rc = ompi_comm_activate (&newcomp, comm, NULL, &tag, NULL, false, mode); if ( OMPI_SUCCESS != rc ) { return rc; } @@ -1488,7 +1248,7 @@ int ompi_comm_compare(ompi_communicator_t *comm1, ompi_communicator_t *comm2, in *result = MPI_UNEQUAL; return MPI_SUCCESS; } - + /* Compare local groups */ ompi_group_compare((ompi_group_t *)comp1->c_local_group, (ompi_group_t *)comp2->c_local_group, @@ -1505,7 +1265,7 @@ int ompi_comm_compare(ompi_communicator_t *comm1, ompi_communicator_t *comm2, in } - if ( rsize1 > 0 ) { + if ( rsize1 > 0 ) { /* Compare remote groups for inter-communicators */ ompi_group_compare((ompi_group_t *)comp1->c_remote_group, (ompi_group_t *)comp2->c_remote_group, @@ -1562,14 +1322,14 @@ int ompi_comm_set_name (ompi_communicator_t *comm, const char *name ) /**********************************************************************/ /**********************************************************************/ /**********************************************************************/ -/* +/* * Implementation of MPI_Allgather for the local_group in an inter-comm. - * The algorithm consists of two steps: + * The algorithm consists of two steps: * 1. an inter-gather to rank 0 in remote group * 2. an inter-bcast from rank 0 in remote_group. */ -static int ompi_comm_allgather_emulate_intra( void *inbuf, int incount, +static int ompi_comm_allgather_emulate_intra( void *inbuf, int incount, MPI_Datatype intype, void* outbuf, int outcount, MPI_Datatype outtype, ompi_communicator_t *comm, @@ -1605,30 +1365,30 @@ static int ompi_comm_allgather_emulate_intra( void *inbuf, int incount, rc = MCA_PML_CALL(irecv( &tmpbuf[outcount*i], outcount, outtype, i, OMPI_COMM_ALLGATHER_TAG, comm, &req[i] )); if ( OMPI_SUCCESS != rc ) { - goto exit; + goto exit; } } - } + } rc = MCA_PML_CALL(isend( inbuf, incount, intype, 0, OMPI_COMM_ALLGATHER_TAG, MCA_PML_BASE_SEND_STANDARD, comm, &sendreq )); if ( OMPI_SUCCESS != rc ) { - goto exit; + goto exit; } - + if ( 0 == rank ) { rc = ompi_request_wait_all( rsize, req, MPI_STATUSES_IGNORE); if ( OMPI_SUCCESS != rc ) { - goto exit; + goto exit; } } rc = ompi_request_wait( &sendreq, MPI_STATUS_IGNORE); if ( OMPI_SUCCESS != rc ) { - goto exit; + goto exit; } /* Step 2: the inter-bcast step */ - rc = MCA_PML_CALL(irecv (outbuf, size*outcount, outtype, 0, + rc = MCA_PML_CALL(irecv (outbuf, size*outcount, outtype, 0, OMPI_COMM_ALLGATHER_TAG, comm, &sendreq)); if ( OMPI_SUCCESS != rc ) { goto exit; @@ -1636,11 +1396,11 @@ static int ompi_comm_allgather_emulate_intra( void *inbuf, int incount, if ( 0 == rank ) { for ( i=0; i < rsize; i++ ){ - rc = MCA_PML_CALL(send (tmpbuf, rsize*outcount, outtype, i, - OMPI_COMM_ALLGATHER_TAG, + rc = MCA_PML_CALL(send (tmpbuf, rsize*outcount, outtype, i, + OMPI_COMM_ALLGATHER_TAG, MCA_PML_BASE_SEND_STANDARD, comm)); if ( OMPI_SUCCESS != rc ) { - goto exit; + goto exit; } } } @@ -1663,7 +1423,7 @@ static int ompi_comm_allgather_emulate_intra( void *inbuf, int incount, /* ** Counterpart to MPI_Comm_free. To be used within OMPI. ** The freeing of all attached objects (groups, errhandlers -** etc. ) has moved to the destructor. +** etc. ) has moved to the destructor. */ int ompi_comm_free( ompi_communicator_t **comm ) { @@ -1747,8 +1507,8 @@ int ompi_comm_free( ompi_communicator_t **comm ) /**********************************************************************/ /**********************************************************************/ /**********************************************************************/ -ompi_proc_t **ompi_comm_get_rprocs ( ompi_communicator_t *local_comm, - ompi_communicator_t *bridge_comm, +ompi_proc_t **ompi_comm_get_rprocs ( ompi_communicator_t *local_comm, + ompi_communicator_t *bridge_comm, int local_leader, int remote_leader, int tag, @@ -1766,8 +1526,6 @@ ompi_proc_t **ompi_comm_get_rprocs ( ompi_communicator_t *local_comm, char *recvbuf; ompi_proc_t **proc_list=NULL; int i; - opal_list_t myvals; - opal_value_t *kv; local_rank = ompi_comm_rank (local_comm); local_size = ompi_comm_size (local_comm); @@ -1780,7 +1538,7 @@ ompi_proc_t **ompi_comm_get_rprocs ( ompi_communicator_t *local_comm, } if(OMPI_GROUP_IS_DENSE(local_comm->c_local_group)) { rc = ompi_proc_pack(local_comm->c_local_group->grp_proc_pointers, - local_size, true, sbuf); + local_size, sbuf); } /* get the proc list for the sparse implementations */ else { @@ -1788,7 +1546,7 @@ ompi_proc_t **ompi_comm_get_rprocs ( ompi_communicator_t *local_comm, sizeof (ompi_proc_t *)); for(i=0 ; ic_local_group->grp_proc_count ; i++) proc_list[i] = ompi_group_peer_lookup(local_comm->c_local_group,i); - rc = ompi_proc_pack (proc_list, local_size, true, sbuf); + rc = ompi_proc_pack (proc_list, local_size, sbuf); } if ( OMPI_SUCCESS != rc ) { goto err_exit; @@ -1867,7 +1625,7 @@ ompi_proc_t **ompi_comm_get_rprocs ( ompi_communicator_t *local_comm, } /* decode the names into a proc-list */ - rc = ompi_proc_unpack(rbuf, rsize, &rprocs, true, NULL, NULL); + rc = ompi_proc_unpack(rbuf, rsize, &rprocs, NULL, NULL); OBJ_RELEASE(rbuf); if (OMPI_SUCCESS != rc) { OMPI_ERROR_LOG(rc); @@ -1876,22 +1634,16 @@ ompi_proc_t **ompi_comm_get_rprocs ( ompi_communicator_t *local_comm, /* set the locality of the remote procs */ for (i=0; i < rsize; i++) { - /* get the locality information - do not use modex recv for - * this request as that will automatically cause the hostname - * to be loaded as well. All RTEs are required to provide this - * information at startup for procs on our node. Thus, not - * finding the info indicates that the proc is non-local. - */ - OBJ_CONSTRUCT(&myvals, opal_list_t); - if (OMPI_SUCCESS != opal_dstore.fetch(opal_dstore_internal, - &rprocs[i]->super.proc_name, - OPAL_DSTORE_LOCALITY, &myvals)) { - rprocs[i]->super.proc_flags = OPAL_PROC_NON_LOCAL; + /* get the locality information - all RTEs are required + * to provide this information at startup */ + uint16_t *u16ptr, u16; + u16ptr = &u16; + OPAL_MODEX_RECV_VALUE(rc, OPAL_PMIX_LOCALITY, &rprocs[i]->super.proc_name, &u16ptr, OPAL_UINT16); + if (OPAL_SUCCESS == rc) { + rprocs[i]->super.proc_flags = u16; } else { - kv = (opal_value_t*)opal_list_get_first(&myvals); - rprocs[i]->super.proc_flags = kv->data.uint16; + rprocs[i]->super.proc_flags = OPAL_PROC_NON_LOCAL; } - OPAL_LIST_DESTRUCT(&myvals); } /* And now add the information into the database */ @@ -1972,7 +1724,7 @@ int ompi_comm_determine_first ( ompi_communicator_t *intercomm, int high ) if (OPAL_UNLIKELY(0 == rsize)) { return OMPI_ERR_BAD_PARAM; } - + rdisps = (int *) calloc ( rsize, sizeof(int)); if ( NULL == rdisps ){ return OMPI_ERR_OUT_OF_RESOURCE; @@ -1983,12 +1735,12 @@ int ompi_comm_determine_first ( ompi_communicator_t *intercomm, int high ) free (rdisps); return OMPI_ERR_OUT_OF_RESOURCE; } - + rcounts[0] = 1; if ( 0 == rank ) { scount = 1; } - + rc = intercomm->c_coll.coll_allgatherv(&high, scount, MPI_INT, &rhigh, rcounts, rdisps, MPI_INT, intercomm, @@ -2034,10 +1786,10 @@ int ompi_comm_determine_first ( ompi_communicator_t *intercomm, int high ) int ompi_comm_dump ( ompi_communicator_t *comm ) { opal_output(0, "Dumping information for comm_cid %d\n", comm->c_contextid); - opal_output(0," f2c index:%d cube_dim: %d\n", comm->c_f_to_c_index, + opal_output(0," f2c index:%d cube_dim: %d\n", comm->c_f_to_c_index, comm->c_cube_dim); - opal_output(0," Local group: size = %d my_rank = %d\n", - comm->c_local_group->grp_proc_count, + opal_output(0," Local group: size = %d my_rank = %d\n", + comm->c_local_group->grp_proc_count, comm->c_local_group->grp_my_rank ); opal_output(0," Communicator is:"); @@ -2061,19 +1813,19 @@ int ompi_comm_dump ( ompi_communicator_t *comm ) /********************************************************************************/ /********************************************************************************/ /* static functions */ -/* -** rankkeygidcompare() compares a tuple of (rank,key,gid) producing -** sorted lists that match the rules needed for a MPI_Comm_split +/* +** rankkeygidcompare() compares a tuple of (rank,key,gid) producing +** sorted lists that match the rules needed for a MPI_Comm_split */ static int rankkeycompare (const void *p, const void *q) { int *a, *b; - + /* ranks at [0] key at [1] */ /* i.e. we cast and just compare the keys and then the original ranks.. */ a = (int*)p; b = (int*)q; - + /* simple tests are those where the keys are different */ if (a[1] < b[1]) { return (-1); @@ -2081,7 +1833,7 @@ static int rankkeycompare (const void *p, const void *q) if (a[1] > b[1]) { return (1); } - + /* ok, if the keys are the same then we check the original ranks */ if (a[1] == b[1]) { if (a[0] < b[0]) { @@ -2123,13 +1875,8 @@ int ompi_comm_enable(ompi_communicator_t *old_comm, int ret = OMPI_SUCCESS; /* Determine context id. It is identical to f_2_c_handle */ - ret = ompi_comm_nextcid ( new_comm, /* new communicator */ - old_comm, /* old comm */ - NULL, /* bridge comm */ - NULL, /* local leader */ - NULL, /* remote_leader */ - OMPI_COMM_CID_INTRA, /* mode */ - -1 ); /* send first, doesn't matter */ + ret = ompi_comm_nextcid (new_comm, old_comm, NULL, NULL, NULL, false, + OMPI_COMM_CID_INTRA); if (OMPI_SUCCESS != ret) { /* something wrong happened while setting the communicator */ goto complete_and_return; @@ -2152,15 +1899,8 @@ int ompi_comm_enable(ompi_communicator_t *old_comm, goto complete_and_return; } - ret = ompi_comm_activate( &new_comm, /* new communicator */ - old_comm, /* old comm */ - NULL, /* bridge comm */ - NULL, /* local leader */ - NULL, /* remote_leader */ - OMPI_COMM_CID_INTRA, /* mode */ - -1 ); /* send first, doesn't matter */ - - + ret = ompi_comm_activate (&new_comm, old_comm, NULL, NULL, NULL, false, + OMPI_COMM_CID_INTRA); if (OMPI_SUCCESS != ret) { /* something wrong happened while setting the communicator */ goto complete_and_return; @@ -2177,32 +1917,26 @@ static int ompi_comm_fill_rest(ompi_communicator_t *comm, ompi_errhandler_t *errh) { /* properly decrement the ref counts on the groups. - We are doing this because this function is sort of a redo - of what is done in comm.c. No need to decrement the ref - count on the proc pointers - This is just a quick fix, and will be looking for a + We are doing this because this function is sort of a redo + of what is done in comm.c. No need to decrement the ref + count on the proc pointers + This is just a quick fix, and will be looking for a better solution */ - OBJ_RELEASE( comm->c_local_group ); - /* silence clang warning about a NULL pointer dereference */ - assert (NULL != comm->c_local_group); - OBJ_RELEASE( comm->c_local_group ); + if (comm->c_local_group) { + OBJ_RELEASE( comm->c_local_group ); + } + + if (comm->c_remote_group) { + OBJ_RELEASE( comm->c_remote_group ); + } /* allocate a group structure for the new communicator */ - comm->c_local_group = ompi_group_allocate(num_procs); - - /* free the malloced proc pointers */ - free(comm->c_local_group->grp_proc_pointers); - - /* set the group information */ - comm->c_local_group->grp_proc_pointers = proc_pointers; + comm->c_local_group = ompi_group_allocate_plist_w_procs (proc_pointers, num_procs); /* set the remote group to be the same as local group */ comm->c_remote_group = comm->c_local_group; OBJ_RETAIN( comm->c_remote_group ); - /* retain these proc pointers */ - ompi_group_increment_proc_count(comm->c_local_group); - /* set the rank information */ comm->c_local_group->grp_my_rank = my_rank; comm->c_my_rank = my_rank; @@ -2210,7 +1944,7 @@ static int ompi_comm_fill_rest(ompi_communicator_t *comm, if( MPI_UNDEFINED != my_rank ) { /* verify whether to set the flag, that this comm contains process from more than one jobid. */ - ompi_dpm.mark_dyncomm (comm); + ompi_dpm_mark_dyncomm (comm); } /* set the error handler */ @@ -2220,7 +1954,7 @@ static int ompi_comm_fill_rest(ompi_communicator_t *comm, /* set name for debugging purposes */ /* there is no cid at this stage ... make this right and make edgars * code call this function and remove dupli cde - */ + */ snprintf (comm->c_name, MPI_MAX_OBJECT_NAME, "MPI_COMMUNICATOR %d", comm->c_contextid); @@ -2230,8 +1964,8 @@ static int ompi_comm_fill_rest(ompi_communicator_t *comm, return OMPI_SUCCESS; } -static int ompi_comm_copy_topo(ompi_communicator_t *oldcomm, - ompi_communicator_t *newcomm) +static int ompi_comm_copy_topo(ompi_communicator_t *oldcomm, + ompi_communicator_t *newcomm) { if( NULL == oldcomm->c_topo ) return OMPI_ERR_NOT_FOUND; diff --git a/ompi/communicator/comm_cid.c b/ompi/communicator/comm_cid.c index 3ecc8beebc6..cb203f0ecce 100644 --- a/ompi/communicator/comm_cid.c +++ b/ompi/communicator/comm_cid.c @@ -14,12 +14,13 @@ * Copyright (c) 2007 Voltaire All rights reserved. * Copyright (c) 2006-2010 University of Houston. All rights reserved. * Copyright (c) 2009 Sun Microsystems, Inc. All rights reserved. - * Copyright (c) 2012-2014 Los Alamos National Security, LLC. All rights + * Copyright (c) 2012-2016 Los Alamos National Security, LLC. All rights * reserved. * Copyright (c) 2012 Oak Ridge National Labs. All rights reserved. - * Copyright (c) 2013-2014 Intel, Inc. All rights reserved. - * Copyright (c) 2014 Research Organization for Information Science + * Copyright (c) 2013-2016 Intel, Inc. All rights reserved. + * Copyright (c) 2014-2016 Research Organization for Information Science * and Technology (RIST). All rights reserved. + * Copyright (c) 2016 IBM Corporation. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -44,298 +45,196 @@ #include "ompi/request/request.h" #include "ompi/runtime/mpiruntime.h" -BEGIN_C_DECLS +struct ompi_comm_cid_context_t; -/** - * These functions make sure, that we determine the global result over - * an intra communicators (simple), an inter-communicator and a - * pseudo inter-communicator described by two separate intra-comms - * and a bridge-comm (intercomm-create scenario). - */ +typedef int (*ompi_comm_allreduce_impl_fn_t) (int *inbuf, int *outbuf, int count, struct ompi_op_t *op, + struct ompi_comm_cid_context_t *cid_context, + ompi_request_t **req); -typedef int ompi_comm_cid_allredfct (int *inbuf, int* outbuf, - int count, struct ompi_op_t *op, - ompi_communicator_t *comm, - ompi_communicator_t *bridgecomm, - void* lleader, void* rleader, - int send_first ); - -static int ompi_comm_allreduce_intra (int *inbuf, int* outbuf, - int count, struct ompi_op_t *op, - ompi_communicator_t *intercomm, - ompi_communicator_t *bridgecomm, - void* local_leader, - void* remote_ledaer, - int send_first ); - -static int ompi_comm_allreduce_inter (int *inbuf, int *outbuf, - int count, struct ompi_op_t *op, - ompi_communicator_t *intercomm, - ompi_communicator_t *bridgecomm, - void* local_leader, - void* remote_leader, - int send_first ); - -static int ompi_comm_allreduce_intra_bridge(int *inbuf, int* outbuf, - int count, struct ompi_op_t *op, - ompi_communicator_t *intercomm, - ompi_communicator_t *bridgecomm, - void* local_leader, - void* remote_leader, - int send_first); - -static int ompi_comm_allreduce_intra_oob (int *inbuf, int* outbuf, - int count, struct ompi_op_t *op, - ompi_communicator_t *intercomm, - ompi_communicator_t *bridgecomm, - void* local_leader, - void* remote_leader, - int send_first ); - -static int ompi_comm_allreduce_group (int *inbuf, int* outbuf, - int count, struct ompi_op_t *op, - ompi_communicator_t *intercomm, - ompi_communicator_t *bridgecomm, - void* local_leader, - void* remote_leader, - int send_first); +struct ompi_comm_cid_context_t { + opal_object_t super; -/* non-blocking intracommunicator allreduce */ -static int ompi_comm_allreduce_intra_nb (int *inbuf, int *outbuf, - int count, struct ompi_op_t *op, - ompi_communicator_t *comm, - ompi_communicator_t *bridgecomm, - ompi_request_t **req); + ompi_communicator_t *newcomm; + ompi_communicator_t **newcommp; + ompi_communicator_t *comm; + ompi_communicator_t *bridgecomm; -/* non-blocking intercommunicator allreduce */ -static int ompi_comm_allreduce_inter_nb (int *inbuf, int *outbuf, - int count, struct ompi_op_t *op, - ompi_communicator_t *intercomm, - ompi_communicator_t *bridgecomm, - ompi_request_t **req); + ompi_comm_allreduce_impl_fn_t allreduce_fn; + int nextcid; + int nextlocal_cid; + int start; + int flag, rflag; + int local_leader; + int remote_leader; + int iter; + /** storage for activate barrier */ + int ok; + char *port_string; + bool send_first; + int pml_tag; + char *pmix_tag; +}; -static int ompi_comm_register_cid (uint32_t contextid); -static int ompi_comm_unregister_cid (uint32_t contextid); -static uint32_t ompi_comm_lowest_cid ( void ); +typedef struct ompi_comm_cid_context_t ompi_comm_cid_context_t; -struct ompi_comm_reg_t{ - opal_list_item_t super; - uint32_t cid; -}; -typedef struct ompi_comm_reg_t ompi_comm_reg_t; -OMPI_DECLSPEC OBJ_CLASS_DECLARATION(ompi_comm_reg_t); +static void mca_comm_cid_context_construct (ompi_comm_cid_context_t *context) +{ + memset ((void *) ((intptr_t) context + sizeof (context->super)), 0, sizeof (*context) - sizeof (context->super)); +} -static void ompi_comm_reg_constructor(ompi_comm_reg_t *regcom); -static void ompi_comm_reg_destructor(ompi_comm_reg_t *regcom); +static void mca_comm_cid_context_destruct (ompi_comm_cid_context_t *context) +{ + free (context->port_string); + free (context->pmix_tag); +} -OBJ_CLASS_INSTANCE (ompi_comm_reg_t, - opal_list_item_t, - ompi_comm_reg_constructor, - ompi_comm_reg_destructor ); +OBJ_CLASS_INSTANCE (ompi_comm_cid_context_t, opal_object_t, + mca_comm_cid_context_construct, + mca_comm_cid_context_destruct); -static opal_mutex_t ompi_cid_lock; -static opal_list_t ompi_registered_comms; +struct ompi_comm_allreduce_context_t { + opal_object_t super; + int *inbuf; + int *outbuf; + int count; + struct ompi_op_t *op; + ompi_comm_cid_context_t *cid_context; + int *tmpbuf; -/* This variable is zero (false) if all processes in MPI_COMM_WORLD - * did not require MPI_THREAD_MULTIPLE support, and is 1 (true) as - * soon as at least one process requested support for THREAD_MULTIPLE */ -static int ompi_comm_world_thread_level_mult=0; + /* for group allreduce */ + int peers_comm[3]; +}; +typedef struct ompi_comm_allreduce_context_t ompi_comm_allreduce_context_t; -int ompi_comm_cid_init (void) +static void ompi_comm_allreduce_context_construct (ompi_comm_allreduce_context_t *context) { -#if OMPI_ENABLE_THREAD_MULTIPLE - ompi_proc_t **procs, *thisproc; - uint8_t thread_level; - uint8_t *tlpointer; - int ret; - size_t i, size, numprocs; + memset ((void *) ((intptr_t) context + sizeof (context->super)), 0, sizeof (*context) - sizeof (context->super)); +} - /** Note that the following call only returns processes - * with the same jobid. This is on purpose, since - * we switch for the dynamic communicators anyway - * to the original (slower) cid allocation algorithm. - */ - procs = ompi_proc_world ( &numprocs ); +static void ompi_comm_allreduce_context_destruct (ompi_comm_allreduce_context_t *context) +{ + free (context->tmpbuf); +} - for ( i=0; isuper, (uint8_t**)&tlpointer, &size); - if (OMPI_SUCCESS == ret) { - thread_level = *((uint8_t *) tlpointer); - if ( OMPI_THREADLEVEL_IS_MULTIPLE (thread_level) ) { - ompi_comm_world_thread_level_mult = 1; - break; - } - } else if (OMPI_ERR_NOT_IMPLEMENTED == ret) { - if (ompi_mpi_thread_multiple) { - ompi_comm_world_thread_level_mult = 1; - } - break; - } else { - return ret; - } - } - free(procs); -#else - ompi_comm_world_thread_level_mult = 0; // silence compiler warning if not used -#endif +/** + * These functions make sure, that we determine the global result over + * an intra communicators (simple), an inter-communicator and a + * pseudo inter-communicator described by two separate intra-comms + * and a bridge-comm (intercomm-create scenario). + */ - return OMPI_SUCCESS; -} +/* non-blocking intracommunicator allreduce */ +static int ompi_comm_allreduce_intra_nb (int *inbuf, int *outbuf, int count, + struct ompi_op_t *op, ompi_comm_cid_context_t *cid_context, + ompi_request_t **req); -int ompi_comm_nextcid ( ompi_communicator_t* newcomm, - ompi_communicator_t* comm, - ompi_communicator_t* bridgecomm, - void* local_leader, - void* remote_leader, - int mode, int send_first ) -{ - int ret; - int nextcid; - bool flag; - int nextlocal_cid; - int done=0; - int response, glresponse=0; - int start; - unsigned int i; +/* non-blocking intercommunicator allreduce */ +static int ompi_comm_allreduce_inter_nb (int *inbuf, int *outbuf, int count, + struct ompi_op_t *op, ompi_comm_cid_context_t *cid_context, + ompi_request_t **req); - ompi_comm_cid_allredfct* allredfnct; +static int ompi_comm_allreduce_group_nb (int *inbuf, int *outbuf, int count, + struct ompi_op_t *op, ompi_comm_cid_context_t *cid_context, + ompi_request_t **req); - /** - * Determine which implementation of allreduce we have to use - * for the current scenario - */ +static int ompi_comm_allreduce_intra_pmix_nb (int *inbuf, int *outbuf, int count, + struct ompi_op_t *op, ompi_comm_cid_context_t *cid_context, + ompi_request_t **req); - switch (mode) - { - case OMPI_COMM_CID_INTRA: - allredfnct=(ompi_comm_cid_allredfct*)ompi_comm_allreduce_intra; - break; - case OMPI_COMM_CID_INTER: - allredfnct=(ompi_comm_cid_allredfct*)ompi_comm_allreduce_inter; - break; - case OMPI_COMM_CID_INTRA_BRIDGE: - allredfnct=(ompi_comm_cid_allredfct*)ompi_comm_allreduce_intra_bridge; - break; - case OMPI_COMM_CID_INTRA_OOB: - allredfnct=(ompi_comm_cid_allredfct*)ompi_comm_allreduce_intra_oob; - break; - case OMPI_COMM_CID_GROUP: - allredfnct=(ompi_comm_cid_allredfct*)ompi_comm_allreduce_group; - break; - default: - return MPI_UNDEFINED; - break; - } +static int ompi_comm_allreduce_intra_bridge_nb (int *inbuf, int *outbuf, int count, + struct ompi_op_t *op, ompi_comm_cid_context_t *cid_context, + ompi_request_t **req); - ret = ompi_comm_register_cid (comm->c_contextid); - if (OMPI_SUCCESS != ret) { - return ret; - } - start = ompi_mpi_communicators.lowest_free; - - while (!done) { - /** - * This is the real algorithm described in the doc - */ - OPAL_THREAD_LOCK(&ompi_cid_lock); - if (comm->c_contextid != ompi_comm_lowest_cid() ) { - /* if not lowest cid, we do not continue, but sleep and try again */ - OPAL_THREAD_UNLOCK(&ompi_cid_lock); - continue; - } - OPAL_THREAD_UNLOCK(&ompi_cid_lock); +static opal_mutex_t ompi_cid_lock = OPAL_MUTEX_STATIC_INIT; - nextlocal_cid = mca_pml.pml_max_contextid; - flag = false; - for (i=start; i < mca_pml.pml_max_contextid ; i++) { - flag = opal_pointer_array_test_and_set_item(&ompi_mpi_communicators, - i, comm); - if (true == flag) { - nextlocal_cid = i; - break; - } - } - ret = (allredfnct)(&nextlocal_cid, &nextcid, 1, MPI_MAX, comm, bridgecomm, - local_leader, remote_leader, send_first ); - if( OMPI_SUCCESS != ret ) { - opal_pointer_array_set_item(&ompi_mpi_communicators, nextlocal_cid, NULL); - goto release_and_return; - } +int ompi_comm_cid_init (void) +{ + return OMPI_SUCCESS; +} - if (mca_pml.pml_max_contextid == (unsigned int) nextcid) { - /* at least one peer ran out of CIDs */ - if (flag) { - opal_pointer_array_set_item(&ompi_mpi_communicators, nextlocal_cid, NULL); - ret = OMPI_ERR_OUT_OF_RESOURCE; - goto release_and_return; - } - } +static ompi_comm_cid_context_t *mca_comm_cid_context_alloc (ompi_communicator_t *newcomm, ompi_communicator_t *comm, + ompi_communicator_t *bridgecomm, const void *arg0, + const void *arg1, const char *pmix_tag, bool send_first, + int mode) +{ + ompi_comm_cid_context_t *context; - if (nextcid == nextlocal_cid) { - response = 1; /* fine with me */ - } - else { - opal_pointer_array_set_item(&ompi_mpi_communicators, - nextlocal_cid, NULL); - - flag = opal_pointer_array_test_and_set_item(&ompi_mpi_communicators, - nextcid, comm ); - if (true == flag) { - response = 1; /* works as well */ - } - else { - response = 0; /* nope, not acceptable */ - } - } + context = OBJ_NEW(ompi_comm_cid_context_t); + if (OPAL_UNLIKELY(NULL == context)) { + return NULL; + } - ret = (allredfnct)(&response, &glresponse, 1, MPI_MIN, comm, bridgecomm, - local_leader, remote_leader, send_first ); - if( OMPI_SUCCESS != ret ) { - opal_pointer_array_set_item(&ompi_mpi_communicators, nextcid, NULL); - goto release_and_return; - } - if (1 == glresponse) { - done = 1; /* we are done */ - break; - } - else if ( 0 == glresponse ) { - if ( 1 == response ) { - /* we could use that, but other don't agree */ - opal_pointer_array_set_item(&ompi_mpi_communicators, - nextcid, NULL); - } - start = nextcid+1; /* that's where we can start the next round */ + context->newcomm = newcomm; + context->comm = comm; + context->bridgecomm = bridgecomm; + context->pml_tag = 0; + + /* Determine which implementation of allreduce we have to use + * for the current mode. */ + switch (mode) { + case OMPI_COMM_CID_INTRA: + context->allreduce_fn = ompi_comm_allreduce_intra_nb; + break; + case OMPI_COMM_CID_INTER: + context->allreduce_fn = ompi_comm_allreduce_inter_nb; + break; + case OMPI_COMM_CID_GROUP: + context->allreduce_fn = ompi_comm_allreduce_group_nb; + context->pml_tag = ((int *) arg0)[0]; + break; + case OMPI_COMM_CID_INTRA_PMIX: + context->allreduce_fn = ompi_comm_allreduce_intra_pmix_nb; + context->local_leader = ((int *) arg0)[0]; + if (arg1) { + context->port_string = strdup ((char *) arg1); } - } + context->pmix_tag = strdup ((char *) pmix_tag); + break; + case OMPI_COMM_CID_INTRA_BRIDGE: + context->allreduce_fn = ompi_comm_allreduce_intra_bridge_nb; + context->local_leader = ((int *) arg0)[0]; + context->remote_leader = ((int *) arg1)[0]; + break; + default: + OBJ_RELEASE(context); + return NULL; + } + + context->send_first = send_first; + context->iter = 0; + context->ok = 1; + + return context; +} - /* set the according values to the newcomm */ - newcomm->c_contextid = nextcid; - opal_pointer_array_set_item (&ompi_mpi_communicators, nextcid, newcomm); +static ompi_comm_allreduce_context_t *ompi_comm_allreduce_context_alloc (int *inbuf, int *outbuf, + int count, struct ompi_op_t *op, + ompi_comm_cid_context_t *cid_context) +{ + ompi_comm_allreduce_context_t *context; - release_and_return: - ompi_comm_unregister_cid (comm->c_contextid); + context = OBJ_NEW(ompi_comm_allreduce_context_t); + if (OPAL_UNLIKELY(NULL == context)) { + return NULL; + } - return ret; -} + context->inbuf = inbuf; + context->outbuf = outbuf; + context->count = count; + context->op = op; + context->cid_context = cid_context; -/* Non-blocking version of ompi_comm_nextcid */ -struct mca_comm_nextcid_context { - ompi_communicator_t* newcomm; - ompi_communicator_t* comm; - ompi_communicator_t* bridgecomm; - int mode; - int nextcid; - int nextlocal_cid; - int start; - int flag, rflag; -}; + return context; +} /* find the next available local cid and start an allreduce */ static int ompi_comm_allreduce_getnextcid (ompi_comm_request_t *request); @@ -344,98 +243,97 @@ static int ompi_comm_checkcid (ompi_comm_request_t *request); /* verify that the cid was available globally */ static int ompi_comm_nextcid_check_flag (ompi_comm_request_t *request); -int ompi_comm_nextcid_nb (ompi_communicator_t* newcomm, - ompi_communicator_t* comm, - ompi_communicator_t* bridgecomm, - int mode, ompi_request_t **req) +static volatile int64_t ompi_comm_cid_lowest_id = INT64_MAX; + +int ompi_comm_nextcid_nb (ompi_communicator_t *newcomm, ompi_communicator_t *comm, + ompi_communicator_t *bridgecomm, const void *arg0, const void *arg1, + bool send_first, int mode, ompi_request_t **req) { - struct mca_comm_nextcid_context *context; + ompi_comm_cid_context_t *context; ompi_comm_request_t *request; - int ret; - /** - * Determine which implementation of allreduce we have to use - * for the current scenario - */ - if (OMPI_COMM_CID_INTRA != mode && OMPI_COMM_CID_INTER != mode) { - return MPI_UNDEFINED; - } - - ret = ompi_comm_register_cid (comm->c_contextid); - if (OMPI_SUCCESS != ret) { - return ret; - } - - context = calloc (1, sizeof (*context)); + context = mca_comm_cid_context_alloc (newcomm, comm, bridgecomm, arg0, arg1, + "nextcid", send_first, mode); if (NULL == context) { - ompi_comm_unregister_cid (comm->c_contextid); return OMPI_ERR_OUT_OF_RESOURCE; } + context->start = ompi_mpi_communicators.lowest_free; + request = ompi_comm_request_get (); if (NULL == request) { - ompi_comm_unregister_cid (comm->c_contextid); - free (context); + OBJ_RELEASE(context); return OMPI_ERR_OUT_OF_RESOURCE; } - context->newcomm = newcomm; - context->comm = comm; - context->bridgecomm = bridgecomm; - context->mode = mode; - context->start = ompi_mpi_communicators.lowest_free; - - request->context = context; + request->context = &context->super; ompi_comm_request_schedule_append (request, ompi_comm_allreduce_getnextcid, NULL, 0); ompi_comm_request_start (request); *req = &request->super; + return OMPI_SUCCESS; } +int ompi_comm_nextcid (ompi_communicator_t *newcomm, ompi_communicator_t *comm, + ompi_communicator_t *bridgecomm, const void *arg0, const void *arg1, + bool send_first, int mode) +{ + ompi_request_t *req; + int rc; + + rc = ompi_comm_nextcid_nb (newcomm, comm, bridgecomm, arg0, arg1, send_first, mode, &req); + if (OMPI_SUCCESS != rc) { + return rc; + } + + ompi_request_wait_completion (req); + rc = req->req_status.MPI_ERROR; + ompi_comm_request_return ((ompi_comm_request_t *) req); + + return rc; +} + static int ompi_comm_allreduce_getnextcid (ompi_comm_request_t *request) { - struct mca_comm_nextcid_context *context = request->context; + ompi_comm_cid_context_t *context = (ompi_comm_cid_context_t *) request->context; + int64_t my_id = ((int64_t) ompi_comm_get_cid (context->comm) << 32 | context->pml_tag); ompi_request_t *subreq; - unsigned int i; bool flag; int ret; - /** - * This is the real algorithm described in the doc - */ - OPAL_THREAD_LOCK(&ompi_cid_lock); - if (context->comm->c_contextid != ompi_comm_lowest_cid() ) { - /* if not lowest cid, we do not continue, but sleep and try again */ - OPAL_THREAD_UNLOCK(&ompi_cid_lock); - ompi_comm_request_schedule_append (request, ompi_comm_allreduce_getnextcid, NULL, 0); + if (OPAL_THREAD_TRYLOCK(&ompi_cid_lock)) { + return ompi_comm_request_schedule_append (request, ompi_comm_allreduce_getnextcid, NULL, 0); + } - return OMPI_SUCCESS; + if (ompi_comm_cid_lowest_id < my_id) { + OPAL_THREAD_UNLOCK(&ompi_cid_lock); + return ompi_comm_request_schedule_append (request, ompi_comm_allreduce_getnextcid, NULL, 0); } - OPAL_THREAD_UNLOCK(&ompi_cid_lock); + ompi_comm_cid_lowest_id = my_id; + + /** + * This is the real algorithm described in the doc + */ flag = false; context->nextlocal_cid = mca_pml.pml_max_contextid; - for (i = context->start ; i < mca_pml.pml_max_contextid ; ++i) { - flag = opal_pointer_array_test_and_set_item(&ompi_mpi_communicators, - i, context->comm); + for (unsigned int i = context->start ; i < mca_pml.pml_max_contextid ; ++i) { + flag = opal_pointer_array_test_and_set_item (&ompi_mpi_communicators, i, + context->comm); if (true == flag) { context->nextlocal_cid = i; break; } } - if (context->mode == OMPI_COMM_CID_INTRA) { - ret = ompi_comm_allreduce_intra_nb (&context->nextlocal_cid, &context->nextcid, 1, MPI_MAX, - context->comm, context->bridgecomm, &subreq); - } else { - ret = ompi_comm_allreduce_inter_nb (&context->nextlocal_cid, &context->nextcid, 1, MPI_MAX, - context->comm, context->bridgecomm, &subreq); - } - + ret = context->allreduce_fn (&context->nextlocal_cid, &context->nextcid, 1, MPI_MAX, + context, &subreq); if (OMPI_SUCCESS != ret) { + ompi_comm_cid_lowest_id = INT64_MAX; + OPAL_THREAD_UNLOCK(&ompi_cid_lock); return ret; } @@ -444,58 +342,64 @@ static int ompi_comm_allreduce_getnextcid (ompi_comm_request_t *request) if (flag) { opal_pointer_array_test_and_set_item(&ompi_mpi_communicators, context->nextlocal_cid, NULL); } - + + ompi_comm_cid_lowest_id = INT64_MAX; + OPAL_THREAD_UNLOCK(&ompi_cid_lock); return OMPI_ERR_OUT_OF_RESOURCE; } + OPAL_THREAD_UNLOCK(&ompi_cid_lock); /* next we want to verify that the resulting commid is ok */ - ompi_comm_request_schedule_append (request, ompi_comm_checkcid, &subreq, 1); - - return OMPI_SUCCESS; + return ompi_comm_request_schedule_append (request, ompi_comm_checkcid, &subreq, 1); } static int ompi_comm_checkcid (ompi_comm_request_t *request) { - struct mca_comm_nextcid_context *context = request->context; + ompi_comm_cid_context_t *context = (ompi_comm_cid_context_t *) request->context; ompi_request_t *subreq; int ret; + if (OPAL_THREAD_TRYLOCK(&ompi_cid_lock)) { + return ompi_comm_request_schedule_append (request, ompi_comm_checkcid, NULL, 0); + } + context->flag = (context->nextcid == context->nextlocal_cid); if (!context->flag) { opal_pointer_array_set_item(&ompi_mpi_communicators, context->nextlocal_cid, NULL); - context->flag = opal_pointer_array_test_and_set_item(&ompi_mpi_communicators, - context->nextcid, context->comm); + context->flag = opal_pointer_array_test_and_set_item (&ompi_mpi_communicators, + context->nextcid, context->comm); } - if (context->mode == OMPI_COMM_CID_INTRA) { - ret = ompi_comm_allreduce_intra_nb (&context->flag, &context->rflag, 1, MPI_MIN, context->comm, - context->bridgecomm, &subreq); - } else { - ret = ompi_comm_allreduce_inter_nb (&context->flag, &context->rflag, 1, MPI_MIN, context->comm, - context->bridgecomm, &subreq); - } + ++context->iter; - if (OMPI_SUCCESS != ret) { - return ret; + ret = context->allreduce_fn (&context->flag, &context->rflag, 1, MPI_MIN, context, &subreq); + if (OMPI_SUCCESS == ret) { + ompi_comm_request_schedule_append (request, ompi_comm_nextcid_check_flag, &subreq, 1); } - ompi_comm_request_schedule_append (request, ompi_comm_nextcid_check_flag, &subreq, 1); + OPAL_THREAD_UNLOCK(&ompi_cid_lock); - return OMPI_SUCCESS; + return ret; } static int ompi_comm_nextcid_check_flag (ompi_comm_request_t *request) { - struct mca_comm_nextcid_context *context = request->context; + ompi_comm_cid_context_t *context = (ompi_comm_cid_context_t *) request->context; + + if (OPAL_THREAD_TRYLOCK(&ompi_cid_lock)) { + return ompi_comm_request_schedule_append (request, ompi_comm_nextcid_check_flag, NULL, 0); + } if (1 == context->rflag) { /* set the according values to the newcomm */ context->newcomm->c_contextid = context->nextcid; opal_pointer_array_set_item (&ompi_mpi_communicators, context->nextcid, context->newcomm); - ompi_comm_unregister_cid (context->comm->c_contextid); + /* unlock the cid generator */ + ompi_comm_cid_lowest_id = INT64_MAX; + OPAL_THREAD_UNLOCK(&ompi_cid_lock); /* done! */ return OMPI_SUCCESS; @@ -503,118 +407,18 @@ static int ompi_comm_nextcid_check_flag (ompi_comm_request_t *request) if (1 == context->flag) { /* we could use this cid, but other don't agree */ - opal_pointer_array_set_item(&ompi_mpi_communicators, context->nextcid, NULL); + opal_pointer_array_set_item (&ompi_mpi_communicators, context->nextcid, NULL); context->start = context->nextcid + 1; /* that's where we can start the next round */ } - /* try again */ - return ompi_comm_allreduce_getnextcid (request); -} - -/**************************************************************************/ -/**************************************************************************/ -/**************************************************************************/ -static void ompi_comm_reg_constructor (ompi_comm_reg_t *regcom) -{ - regcom->cid=MPI_UNDEFINED; -} - -static void ompi_comm_reg_destructor (ompi_comm_reg_t *regcom) -{ -} - -void ompi_comm_reg_init (void) -{ - OBJ_CONSTRUCT(&ompi_registered_comms, opal_list_t); - OBJ_CONSTRUCT(&ompi_cid_lock, opal_mutex_t); -} - -void ompi_comm_reg_finalize (void) -{ - OBJ_DESTRUCT(&ompi_registered_comms); - OBJ_DESTRUCT(&ompi_cid_lock); -} - - -static int ompi_comm_register_cid (uint32_t cid) -{ - ompi_comm_reg_t *regcom; - ompi_comm_reg_t *newentry = OBJ_NEW(ompi_comm_reg_t); - bool registered = false; - - do { - /* Only one communicator function allowed in same time on the - * same communicator. - */ - OPAL_THREAD_LOCK(&ompi_cid_lock); - - newentry->cid = cid; - if ( !(opal_list_is_empty (&ompi_registered_comms)) ) { - bool ok = true; - - OPAL_LIST_FOREACH(regcom, &ompi_registered_comms, ompi_comm_reg_t) { - if ( regcom->cid > cid ) { - break; - } -#if OMPI_ENABLE_THREAD_MULTIPLE - if( regcom->cid == cid ) { - /** - * The MPI standard state that is the user responsability to - * schedule the global communications in order to avoid any - * kind of troubles. As, managing communicators involve several - * collective communications, we should enforce a sequential - * execution order. This test only allow one communicator - * creation function based on the same communicator. - */ - ok = false; - break; - } -#endif /* OMPI_ENABLE_THREAD_MULTIPLE */ - } - if (ok) { - opal_list_insert_pos (&ompi_registered_comms, (opal_list_item_t *) regcom, - (opal_list_item_t *)newentry); - registered = true; - } - } else { - opal_list_append (&ompi_registered_comms, (opal_list_item_t *)newentry); - registered = true; - } - - /* drop the lock before trying again */ - OPAL_THREAD_UNLOCK(&ompi_cid_lock); - } while (!registered); - - return OMPI_SUCCESS; -} - -static int ompi_comm_unregister_cid (uint32_t cid) -{ - ompi_comm_reg_t *regcom; - - OPAL_THREAD_LOCK(&ompi_cid_lock); - - OPAL_LIST_FOREACH(regcom, &ompi_registered_comms, ompi_comm_reg_t) { - if(regcom->cid == cid) { - opal_list_remove_item(&ompi_registered_comms, (opal_list_item_t *) regcom); - OBJ_RELEASE(regcom); - break; - } - } + ++context->iter; OPAL_THREAD_UNLOCK(&ompi_cid_lock); - return OMPI_SUCCESS; + /* try again */ + return ompi_comm_allreduce_getnextcid (request); } -static uint32_t ompi_comm_lowest_cid (void) -{ - ompi_comm_reg_t *regcom=NULL; - opal_list_item_t *item=opal_list_get_first (&ompi_registered_comms); - - regcom = (ompi_comm_reg_t *)item; - return regcom->cid; -} /**************************************************************************/ /**************************************************************************/ /**************************************************************************/ @@ -632,174 +436,41 @@ static uint32_t ompi_comm_lowest_cid (void) * comm.c is, that this file contains the allreduce implementations * which are required, and thus we avoid having duplicate code... */ -int ompi_comm_activate ( ompi_communicator_t** newcomm, - ompi_communicator_t* comm, - ompi_communicator_t* bridgecomm, - void* local_leader, - void* remote_leader, - int mode, - int send_first ) -{ - int ret = 0; - - int ok=0, gok=0; - ompi_comm_cid_allredfct* allredfnct; - - /* Step 1: the barrier, after which it is allowed to - * send messages over the new communicator - */ - switch (mode) - { - case OMPI_COMM_CID_INTRA: - allredfnct=(ompi_comm_cid_allredfct*)ompi_comm_allreduce_intra; - break; - case OMPI_COMM_CID_INTER: - allredfnct=(ompi_comm_cid_allredfct*)ompi_comm_allreduce_inter; - break; - case OMPI_COMM_CID_INTRA_BRIDGE: - allredfnct=(ompi_comm_cid_allredfct*)ompi_comm_allreduce_intra_bridge; - break; - case OMPI_COMM_CID_INTRA_OOB: - allredfnct=(ompi_comm_cid_allredfct*)ompi_comm_allreduce_intra_oob; - break; - case OMPI_COMM_CID_GROUP: - allredfnct=(ompi_comm_cid_allredfct*)ompi_comm_allreduce_group; - break; - default: - return MPI_UNDEFINED; - break; - } - - if (MPI_UNDEFINED != (*newcomm)->c_local_group->grp_my_rank) { - /* Initialize the PML stuff in the newcomm */ - if ( OMPI_SUCCESS != (ret = MCA_PML_CALL(add_comm(*newcomm))) ) { - goto bail_on_error; - } - - OMPI_COMM_SET_PML_ADDED(*newcomm); - } - - - ret = (allredfnct)(&ok, &gok, 1, MPI_MIN, comm, bridgecomm, - local_leader, remote_leader, send_first ); - if( OMPI_SUCCESS != ret ) { - goto bail_on_error; - } - - - - /** - * Check to see if this process is in the new communicator. - * - * Specifically, this function is invoked by all proceses in the - * old communicator, regardless of whether they are in the new - * communicator or not. This is because it is far simpler to use - * MPI collective functions on the old communicator to determine - * some data for the new communicator (e.g., remote_leader) than - * to kludge up our own pseudo-collective routines over just the - * processes in the new communicator. Hence, *all* processes in - * the old communicator need to invoke this function. - * - * That being said, only processes in the new communicator need to - * select a coll module for the new communicator. More - * specifically, proceses who are not in the new communicator - * should *not* select a coll module -- for example, - * ompi_comm_rank(newcomm) returns MPI_UNDEFINED for processes who - * are not in the new communicator. This can cause errors in the - * selection / initialization of a coll module. Plus, it's - * wasteful -- processes in the new communicator will end up - * freeing the new communicator anyway, so we might as well leave - * the coll selection as NULL (the coll base comm unselect code - * handles that case properly). - */ - if (MPI_UNDEFINED == (*newcomm)->c_local_group->grp_my_rank) { - return OMPI_SUCCESS; - } - - /* Let the collectives components fight over who will do - collective on this new comm. */ - if (OMPI_SUCCESS != (ret = mca_coll_base_comm_select(*newcomm))) { - goto bail_on_error; - } - - /* For an inter communicator, we have to deal with the potential - * problem of what is happening if the local_comm that we created - * has a lower CID than the parent comm. This is not a problem - * as long as the user calls MPI_Comm_free on the inter communicator. - * However, if the communicators are not freed by the user but released - * by Open MPI in MPI_Finalize, we walk through the list of still available - * communicators and free them one by one. Thus, local_comm is freed before - * the actual inter-communicator. However, the local_comm pointer in the - * inter communicator will still contain the 'previous' address of the local_comm - * and thus this will lead to a segmentation violation. In order to prevent - * that from happening, we increase the reference counter local_comm - * by one if its CID is lower than the parent. We cannot increase however - * its reference counter if the CID of local_comm is larger than - * the CID of the inter communicators, since a regular MPI_Comm_free would - * leave in that the case the local_comm hanging around and thus we would not - * recycle CID's properly, which was the reason and the cause for this trouble. - */ - if ( OMPI_COMM_IS_INTER(*newcomm)) { - if ( OMPI_COMM_CID_IS_LOWER(*newcomm, comm)) { - OMPI_COMM_SET_EXTRA_RETAIN (*newcomm); - OBJ_RETAIN (*newcomm); - } - } - - - return OMPI_SUCCESS; - - bail_on_error: - OBJ_RELEASE(*newcomm); - *newcomm = MPI_COMM_NULL; - return ret; -} /* Non-blocking version of ompi_comm_activate */ -struct ompi_comm_activate_nb_context { - ompi_communicator_t **newcomm; - ompi_communicator_t *comm; - - /* storage for activate barrier */ - int ok; -}; - static int ompi_comm_activate_nb_complete (ompi_comm_request_t *request); -int ompi_comm_activate_nb (ompi_communicator_t **newcomm, - ompi_communicator_t *comm, - ompi_communicator_t *bridgecomm, - int mode, ompi_request_t **req) +int ompi_comm_activate_nb (ompi_communicator_t **newcomm, ompi_communicator_t *comm, + ompi_communicator_t *bridgecomm, const void *arg0, + const void *arg1, bool send_first, int mode, ompi_request_t **req) { - struct ompi_comm_activate_nb_context *context; + ompi_comm_cid_context_t *context; ompi_comm_request_t *request; ompi_request_t *subreq; int ret = 0; - request = ompi_comm_request_get (); - if (NULL == request) { - return OMPI_ERR_OUT_OF_RESOURCE; - } - - context = calloc (1, sizeof (*context)); + context = mca_comm_cid_context_alloc (*newcomm, comm, bridgecomm, arg0, arg1, "activate", + send_first, mode); if (NULL == context) { - ompi_comm_request_return (request); return OMPI_ERR_OUT_OF_RESOURCE; } - context->newcomm = newcomm; - context->comm = comm; + /* keep track of the pointer so it can be set to MPI_COMM_NULL on failure */ + context->newcommp = newcomm; - request->context = context; - - if (OMPI_COMM_CID_INTRA != mode && OMPI_COMM_CID_INTER != mode) { - return MPI_UNDEFINED; + request = ompi_comm_request_get (); + if (NULL == request) { + OBJ_RELEASE(context); + return OMPI_ERR_OUT_OF_RESOURCE; } + request->context = &context->super; + if (MPI_UNDEFINED != (*newcomm)->c_local_group->grp_my_rank) { /* Initialize the PML stuff in the newcomm */ if ( OMPI_SUCCESS != (ret = MCA_PML_CALL(add_comm(*newcomm))) ) { OBJ_RELEASE(newcomm); + OBJ_RELEASE(context); *newcomm = MPI_COMM_NULL; return ret; } @@ -809,14 +480,8 @@ int ompi_comm_activate_nb (ompi_communicator_t **newcomm, /* Step 1: the barrier, after which it is allowed to * send messages over the new communicator */ - if (mode == OMPI_COMM_CID_INTRA) { - ret = ompi_comm_allreduce_intra_nb (&context->ok, &context->ok, 1, MPI_MIN, - context->comm, bridgecomm, &subreq); - } else { - ret = ompi_comm_allreduce_inter_nb (&context->ok, &context->ok, 1, MPI_MIN, - context->comm, bridgecomm, &subreq); - } - + ret = context->allreduce_fn (&context->ok, &context->ok, 1, MPI_MIN, context, + &subreq); if (OMPI_SUCCESS != ret) { ompi_comm_request_return (request); return ret; @@ -830,10 +495,28 @@ int ompi_comm_activate_nb (ompi_communicator_t **newcomm, return OMPI_SUCCESS; } +int ompi_comm_activate (ompi_communicator_t **newcomm, ompi_communicator_t *comm, + ompi_communicator_t *bridgecomm, const void *arg0, + const void *arg1, bool send_first, int mode) +{ + ompi_request_t *req; + int rc; + + rc = ompi_comm_activate_nb (newcomm, comm, bridgecomm, arg0, arg1, send_first, mode, &req); + if (OMPI_SUCCESS != rc) { + return rc; + } + + ompi_request_wait_completion (req); + rc = req->req_status.MPI_ERROR; + ompi_comm_request_return ((ompi_comm_request_t *) req); + + return rc; +} + static int ompi_comm_activate_nb_complete (ompi_comm_request_t *request) { - struct ompi_comm_activate_nb_context *context = - (struct ompi_comm_activate_nb_context *) request->context; + ompi_comm_cid_context_t *context = (ompi_comm_cid_context_t *) request->context; int ret; /** @@ -860,15 +543,15 @@ static int ompi_comm_activate_nb_complete (ompi_comm_request_t *request) * the coll selection as NULL (the coll base comm unselect code * handles that case properly). */ - if (MPI_UNDEFINED == (*context->newcomm)->c_local_group->grp_my_rank) { + if (MPI_UNDEFINED == (context->newcomm)->c_local_group->grp_my_rank) { return OMPI_SUCCESS; } /* Let the collectives components fight over who will do collective on this new comm. */ - if (OMPI_SUCCESS != (ret = mca_coll_base_comm_select(*context->newcomm))) { - OBJ_RELEASE(*context->newcomm); - *context->newcomm = MPI_COMM_NULL; + if (OMPI_SUCCESS != (ret = mca_coll_base_comm_select(context->newcomm))) { + OBJ_RELEASE(context->newcomm); + *context->newcommp = MPI_COMM_NULL; return ret; } @@ -889,10 +572,10 @@ static int ompi_comm_activate_nb_complete (ompi_comm_request_t *request) * leave in that the case the local_comm hanging around and thus we would not * recycle CID's properly, which was the reason and the cause for this trouble. */ - if (OMPI_COMM_IS_INTER(*context->newcomm)) { - if (OMPI_COMM_CID_IS_LOWER(*context->newcomm, context->comm)) { - OMPI_COMM_SET_EXTRA_RETAIN (*context->newcomm); - OBJ_RETAIN (*context->newcomm); + if (OMPI_COMM_IS_INTER(context->newcomm)) { + if (OMPI_COMM_CID_IS_LOWER(context->newcomm, context->comm)) { + OMPI_COMM_SET_EXTRA_RETAIN (context->newcomm); + OBJ_RETAIN (context->newcomm); } } @@ -903,569 +586,495 @@ static int ompi_comm_activate_nb_complete (ompi_comm_request_t *request) /**************************************************************************/ /**************************************************************************/ /**************************************************************************/ -/* Arguments not used in this implementation: - * - bridgecomm - * - local_leader - * - remote_leader - * - send_first - */ -static int ompi_comm_allreduce_intra ( int *inbuf, int *outbuf, - int count, struct ompi_op_t *op, - ompi_communicator_t *comm, - ompi_communicator_t *bridgecomm, - void* local_leader, - void* remote_leader, - int send_first ) +static int ompi_comm_allreduce_intra_nb (int *inbuf, int *outbuf, int count, struct ompi_op_t *op, + ompi_comm_cid_context_t *context, ompi_request_t **req) { - return comm->c_coll.coll_allreduce ( inbuf, outbuf, count, MPI_INT, op, comm, - comm->c_coll.coll_allreduce_module ); -} + ompi_communicator_t *comm = context->comm; -static int ompi_comm_allreduce_intra_nb (int *inbuf, int *outbuf, - int count, struct ompi_op_t *op, - ompi_communicator_t *comm, - ompi_communicator_t *bridgecomm, - ompi_request_t **req) -{ return comm->c_coll.coll_iallreduce (inbuf, outbuf, count, MPI_INT, op, comm, req, comm->c_coll.coll_iallreduce_module); } - -/* Arguments not used in this implementation: - * - bridgecomm - * - local_leader - * - remote_leader - * - send_first - */ -static int ompi_comm_allreduce_inter ( int *inbuf, int *outbuf, - int count, struct ompi_op_t *op, - ompi_communicator_t *intercomm, - ompi_communicator_t *bridgecomm, - void* local_leader, - void* remote_leader, - int send_first ) -{ - int local_rank, rsize; - int rc; - int *sbuf; - int *tmpbuf=NULL; - int *rcounts=NULL, scount=0; - int *rdisps=NULL; - - if ( !OMPI_COMM_IS_INTER (intercomm)) { - return MPI_ERR_COMM; - } - - /* Allocate temporary arrays */ - rsize = ompi_comm_remote_size (intercomm); - local_rank = ompi_comm_rank ( intercomm ); - - tmpbuf = (int *) malloc ( count * sizeof(int)); - rdisps = (int *) calloc ( rsize, sizeof(int)); - rcounts = (int *) calloc ( rsize, sizeof(int) ); - if ( OPAL_UNLIKELY (NULL == tmpbuf || NULL == rdisps || NULL == rcounts)) { - rc = OMPI_ERR_OUT_OF_RESOURCE; - goto exit; - } - - /* Execute the inter-allreduce: the result of our group will - be in the buffer of the remote group */ - rc = intercomm->c_coll.coll_allreduce ( inbuf, tmpbuf, count, MPI_INT, - op, intercomm, - intercomm->c_coll.coll_allreduce_module); - if ( OMPI_SUCCESS != rc ) { - goto exit; - } - - if ( 0 == local_rank ) { - MPI_Request req; - - /* for the allgatherv later */ - scount = count; - - /* local leader exchange their data and determine the overall result - for both groups */ - rc = MCA_PML_CALL(irecv (outbuf, count, MPI_INT, 0, - OMPI_COMM_ALLREDUCE_TAG, - intercomm, &req)); - if ( OMPI_SUCCESS != rc ) { - goto exit; - } - rc = MCA_PML_CALL(send (tmpbuf, count, MPI_INT, 0, - OMPI_COMM_ALLREDUCE_TAG, - MCA_PML_BASE_SEND_STANDARD, - intercomm)); - if ( OMPI_SUCCESS != rc ) { - goto exit; - } - rc = ompi_request_wait ( &req, MPI_STATUS_IGNORE ); - if ( OMPI_SUCCESS != rc ) { - goto exit; - } - - ompi_op_reduce (op, tmpbuf, outbuf, count, MPI_INT); - } - - /* distribute the overall result to all processes in the other group. - Instead of using bcast, we are using here allgatherv, to avoid the - possible deadlock. Else, we need an algorithm to determine, - which group sends first in the inter-bcast and which receives - the result first. - */ - rcounts[0] = count; - sbuf = outbuf; - rc = intercomm->c_coll.coll_allgatherv (sbuf, scount, MPI_INT, outbuf, - rcounts, rdisps, MPI_INT, - intercomm, - intercomm->c_coll.coll_allgatherv_module); - - exit: - if ( NULL != tmpbuf ) { - free ( tmpbuf ); - } - if ( NULL != rcounts ) { - free ( rcounts ); - } - if ( NULL != rdisps ) { - free ( rdisps ); - } - - return (rc); -} - /* Non-blocking version of ompi_comm_allreduce_inter */ -struct ompi_comm_allreduce_inter_context { - int *inbuf; - int *outbuf; - int count; - struct ompi_op_t *op; - ompi_communicator_t *intercomm; - ompi_communicator_t *bridgecomm; - int *tmpbuf; - int *rcounts; - int *rdisps; -}; - -static void ompi_comm_allreduce_inter_context_free (struct ompi_comm_allreduce_inter_context *context) -{ - if (context->tmpbuf) { - free (context->tmpbuf); - } - - if (context->rdisps) { - free (context->rdisps); - } - - if (context->rcounts) { - free (context->rcounts); - } - - free (context); -} - static int ompi_comm_allreduce_inter_leader_exchange (ompi_comm_request_t *request); static int ompi_comm_allreduce_inter_leader_reduce (ompi_comm_request_t *request); -static int ompi_comm_allreduce_inter_allgather (ompi_comm_request_t *request); -static int ompi_comm_allreduce_inter_allgather_complete (ompi_comm_request_t *request); +static int ompi_comm_allreduce_inter_bcast (ompi_comm_request_t *request); -/* Arguments not used in this implementation: - * - bridgecomm - */ static int ompi_comm_allreduce_inter_nb (int *inbuf, int *outbuf, int count, struct ompi_op_t *op, - ompi_communicator_t *intercomm, - ompi_communicator_t *bridgecomm, + ompi_comm_cid_context_t *cid_context, ompi_request_t **req) { - struct ompi_comm_allreduce_inter_context *context = NULL; - ompi_comm_request_t *request = NULL; + ompi_communicator_t *intercomm = cid_context->comm; + ompi_comm_allreduce_context_t *context; + ompi_comm_request_t *request; ompi_request_t *subreq; - int local_rank, rsize, rc; + int local_rank, rc; - if (!OMPI_COMM_IS_INTER (intercomm)) { + if (!OMPI_COMM_IS_INTER (cid_context->comm)) { return MPI_ERR_COMM; } request = ompi_comm_request_get (); - if (NULL == request) { + if (OPAL_UNLIKELY(NULL == request)) { return OMPI_ERR_OUT_OF_RESOURCE; } - context = calloc (1, sizeof (*context)); - if (NULL == context) { - rc = OMPI_ERR_OUT_OF_RESOURCE; - goto exit; + context = ompi_comm_allreduce_context_alloc (inbuf, outbuf, count, op, cid_context); + if (OPAL_UNLIKELY(NULL == context)) { + ompi_comm_request_return (request); + return OMPI_ERR_OUT_OF_RESOURCE; } - context->inbuf = inbuf; - context->outbuf = outbuf; - context->count = count; - context->op = op; - context->intercomm = intercomm; - context->bridgecomm = bridgecomm; + request->context = &context->super; /* Allocate temporary arrays */ - rsize = ompi_comm_remote_size (intercomm); local_rank = ompi_comm_rank (intercomm); - context->tmpbuf = (int *) calloc (count, sizeof(int)); - context->rdisps = (int *) calloc (rsize, sizeof(int)); - context->rcounts = (int *) calloc (rsize, sizeof(int)); - if (OPAL_UNLIKELY (NULL == context->tmpbuf || NULL == context->rdisps || NULL == context->rcounts)) { - rc = OMPI_ERR_OUT_OF_RESOURCE; - goto exit; + if (0 == local_rank) { + context->tmpbuf = (int *) calloc (count, sizeof(int)); + if (OPAL_UNLIKELY (NULL == context->tmpbuf)) { + ompi_comm_request_return (request); + return OMPI_ERR_OUT_OF_RESOURCE; + } } - request->context = context; - /* Execute the inter-allreduce: the result from the local will be in the buffer of the remote group * and vise-versa. */ - rc = intercomm->c_coll.coll_iallreduce (inbuf, context->tmpbuf, count, MPI_INT, op, intercomm, - &subreq, intercomm->c_coll.coll_iallreduce_module); - if (OMPI_SUCCESS != rc) { - goto exit; + rc = intercomm->c_local_comm->c_coll.coll_ireduce (inbuf, context->tmpbuf, count, MPI_INT, op, 0, + intercomm->c_local_comm, &subreq, + intercomm->c_local_comm->c_coll.coll_ireduce_module); + if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) { + ompi_comm_request_return (request); + return rc; } if (0 == local_rank) { ompi_comm_request_schedule_append (request, ompi_comm_allreduce_inter_leader_exchange, &subreq, 1); } else { - ompi_comm_request_schedule_append (request, ompi_comm_allreduce_inter_allgather, &subreq, 1); + ompi_comm_request_schedule_append (request, ompi_comm_allreduce_inter_bcast, &subreq, 1); } ompi_comm_request_start (request); *req = &request->super; -exit: - if (OMPI_SUCCESS != rc) { - if (context) { - ompi_comm_allreduce_inter_context_free (context); - } - - if (request) { - request->context = NULL; - ompi_comm_request_return (request); - } - } - - return rc; + return OMPI_SUCCESS; } static int ompi_comm_allreduce_inter_leader_exchange (ompi_comm_request_t *request) { - struct ompi_comm_allreduce_inter_context *context = - (struct ompi_comm_allreduce_inter_context *) request->context; + ompi_comm_allreduce_context_t *context = (ompi_comm_allreduce_context_t *) request->context; + ompi_communicator_t *intercomm = context->cid_context->comm; ompi_request_t *subreqs[2]; int rc; /* local leader exchange their data and determine the overall result for both groups */ rc = MCA_PML_CALL(irecv (context->outbuf, context->count, MPI_INT, 0, OMPI_COMM_ALLREDUCE_TAG, - context->intercomm, subreqs)); - if ( OMPI_SUCCESS != rc ) { - goto exit; + intercomm, subreqs)); + if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) { + return rc; } rc = MCA_PML_CALL(isend (context->tmpbuf, context->count, MPI_INT, 0, OMPI_COMM_ALLREDUCE_TAG, - MCA_PML_BASE_SEND_STANDARD, context->intercomm, subreqs + 1)); - if ( OMPI_SUCCESS != rc ) { - goto exit; - } - - ompi_comm_request_schedule_append (request, ompi_comm_allreduce_inter_leader_reduce, subreqs, 2); - -exit: - if (OMPI_SUCCESS != rc) { - ompi_comm_allreduce_inter_context_free (context); - request->context = NULL; + MCA_PML_BASE_SEND_STANDARD, intercomm, subreqs + 1)); + if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) { + return rc; } - return rc; + return ompi_comm_request_schedule_append (request, ompi_comm_allreduce_inter_leader_reduce, subreqs, 2); } static int ompi_comm_allreduce_inter_leader_reduce (ompi_comm_request_t *request) { - struct ompi_comm_allreduce_inter_context *context = - (struct ompi_comm_allreduce_inter_context *) request->context; + ompi_comm_allreduce_context_t *context = (ompi_comm_allreduce_context_t *) request->context; ompi_op_reduce (context->op, context->tmpbuf, context->outbuf, context->count, MPI_INT); - return ompi_comm_allreduce_inter_allgather (request); + return ompi_comm_allreduce_inter_bcast (request); } -static int ompi_comm_allreduce_inter_allgather (ompi_comm_request_t *request) +static int ompi_comm_allreduce_inter_bcast (ompi_comm_request_t *request) { - struct ompi_comm_allreduce_inter_context *context = - (struct ompi_comm_allreduce_inter_context *) request->context; + ompi_comm_allreduce_context_t *context = (ompi_comm_allreduce_context_t *) request->context; + ompi_communicator_t *comm = context->cid_context->comm->c_local_comm; ompi_request_t *subreq; - int scount = 0, rc; - - /* distribute the overall result to all processes in the other group. - Instead of using bcast, we are using here allgatherv, to avoid the - possible deadlock. Else, we need an algorithm to determine, - which group sends first in the inter-bcast and which receives - the result first. - */ + int rc; - if (0 != ompi_comm_rank (context->intercomm)) { - context->rcounts[0] = context->count; - } else { - scount = context->count; + /* both roots have the same result. broadcast to the local group */ + rc = comm->c_coll.coll_ibcast (context->outbuf, context->count, MPI_INT, 0, comm, + &subreq, comm->c_coll.coll_ibcast_module); + if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) { + return rc; } - rc = context->intercomm->c_coll.coll_iallgatherv (context->outbuf, scount, MPI_INT, context->outbuf, - context->rcounts, context->rdisps, MPI_INT, - context->intercomm, &subreq, - context->intercomm->c_coll.coll_iallgatherv_module); - if (OMPI_SUCCESS != rc) { - ompi_comm_allreduce_inter_context_free (context); - request->context = NULL; + return ompi_comm_request_schedule_append (request, NULL, &subreq, 1); +} + +static int ompi_comm_allreduce_bridged_schedule_bcast (ompi_comm_request_t *request) +{ + ompi_comm_allreduce_context_t *context = (ompi_comm_allreduce_context_t *) request->context; + ompi_communicator_t *comm = context->cid_context->comm; + ompi_request_t *subreq; + int rc; + + rc = comm->c_coll.coll_ibcast (context->outbuf, context->count, MPI_INT, + context->cid_context->local_leader, comm, + &subreq, comm->c_coll.coll_ibcast_module); + if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) { return rc; } - ompi_comm_request_schedule_append (request, ompi_comm_allreduce_inter_allgather_complete, &subreq, 1); + return ompi_comm_request_schedule_append (request, NULL, &subreq, 1); +} - return OMPI_SUCCESS; +static int ompi_comm_allreduce_bridged_xchng_complete (ompi_comm_request_t *request) +{ + ompi_comm_allreduce_context_t *context = (ompi_comm_allreduce_context_t *) request->context; + + /* step 3: reduce leader data */ + ompi_op_reduce (context->op, context->tmpbuf, context->outbuf, context->count, MPI_INT); + + /* schedule the broadcast to local peers */ + return ompi_comm_allreduce_bridged_schedule_bcast (request); } -static int ompi_comm_allreduce_inter_allgather_complete (ompi_comm_request_t *request) +static int ompi_comm_allreduce_bridged_reduce_complete (ompi_comm_request_t *request) { - /* free this request's context */ - ompi_comm_allreduce_inter_context_free (request->context); - /* prevent a double-free from the progress engine */ - request->context = NULL; + ompi_comm_allreduce_context_t *context = (ompi_comm_allreduce_context_t *) request->context; + ompi_communicator_t *bridgecomm = context->cid_context->bridgecomm; + ompi_request_t *subreq[2]; + int rc; - /* done */ - return OMPI_SUCCESS; + /* step 2: leader exchange */ + rc = MCA_PML_CALL(irecv (context->outbuf, context->count, MPI_INT, context->cid_context->remote_leader, + OMPI_COMM_ALLREDUCE_TAG, bridgecomm, subreq + 1)); + if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) { + return rc; + } + + rc = MCA_PML_CALL(isend (context->tmpbuf, context->count, MPI_INT, context->cid_context->remote_leader, + OMPI_COMM_ALLREDUCE_TAG, MCA_PML_BASE_SEND_STANDARD, bridgecomm, + subreq)); + if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) { + return rc; + } + + return ompi_comm_request_schedule_append (request, ompi_comm_allreduce_bridged_xchng_complete, subreq, 2); } -/* Arguments not used in this implementation: - * - send_first - */ -static int ompi_comm_allreduce_intra_bridge (int *inbuf, int *outbuf, - int count, struct ompi_op_t *op, - ompi_communicator_t *comm, - ompi_communicator_t *bcomm, - void* lleader, void* rleader, - int send_first ) +static int ompi_comm_allreduce_intra_bridge_nb (int *inbuf, int *outbuf, + int count, struct ompi_op_t *op, + ompi_comm_cid_context_t *cid_context, + ompi_request_t **req) { - int *tmpbuf=NULL; - int local_rank; - int i; + ompi_communicator_t *comm = cid_context->comm; + ompi_comm_allreduce_context_t *context; + int local_rank = ompi_comm_rank (comm); + ompi_comm_request_t *request; + ompi_request_t *subreq; int rc; - int local_leader, remote_leader; - local_leader = (*((int*)lleader)); - remote_leader = (*((int*)rleader)); + context = ompi_comm_allreduce_context_alloc (inbuf, outbuf, count, op, cid_context); + if (OPAL_UNLIKELY(NULL == context)) { + return OMPI_ERR_OUT_OF_RESOURCE; + } + + if (local_rank == cid_context->local_leader) { + context->tmpbuf = (int *) calloc (count, sizeof (int)); + if (OPAL_UNLIKELY(NULL == context->tmpbuf)) { + OBJ_RELEASE(context); + return OMPI_ERR_OUT_OF_RESOURCE; + } + } - if ( &ompi_mpi_op_sum.op != op && &ompi_mpi_op_prod.op != op && - &ompi_mpi_op_max.op != op && &ompi_mpi_op_min.op != op ) { - return MPI_ERR_OP; + request = ompi_comm_request_get (); + if (OPAL_UNLIKELY(NULL == request)) { + OBJ_RELEASE(context); + return OMPI_ERR_OUT_OF_RESOURCE; } - local_rank = ompi_comm_rank ( comm ); - tmpbuf = (int *) malloc ( count * sizeof(int)); - if ( NULL == tmpbuf ) { - rc = OMPI_ERR_OUT_OF_RESOURCE; - goto exit; + request->context = &context->super; + + if (cid_context->local_leader == local_rank) { + memcpy (context->tmpbuf, inbuf, count * sizeof (int)); } - /* Intercomm_create */ - rc = comm->c_coll.coll_allreduce ( inbuf, tmpbuf, count, MPI_INT, - op, comm, comm->c_coll.coll_allreduce_module ); + /* step 1: reduce to the local leader */ + rc = comm->c_coll.coll_ireduce (inbuf, context->tmpbuf, count, MPI_INT, op, + cid_context->local_leader, comm, &subreq, + comm->c_coll.coll_ireduce_module); if ( OMPI_SUCCESS != rc ) { - goto exit; + ompi_comm_request_return (request); + return rc; } - if (local_rank == local_leader ) { - MPI_Request req; + if (cid_context->local_leader == local_rank) { + rc = ompi_comm_request_schedule_append (request, ompi_comm_allreduce_bridged_reduce_complete, + &subreq, 1); + } else { + /* go ahead and schedule the broadcast */ + ompi_comm_request_schedule_append (request, NULL, &subreq, 1); - rc = MCA_PML_CALL(irecv ( outbuf, count, MPI_INT, remote_leader, - OMPI_COMM_ALLREDUCE_TAG, - bcomm, &req)); - if ( OMPI_SUCCESS != rc ) { - goto exit; - } - rc = MCA_PML_CALL(send (tmpbuf, count, MPI_INT, remote_leader, - OMPI_COMM_ALLREDUCE_TAG, - MCA_PML_BASE_SEND_STANDARD, bcomm)); - if ( OMPI_SUCCESS != rc ) { - goto exit; - } - rc = ompi_request_wait( &req, MPI_STATUS_IGNORE); - if ( OMPI_SUCCESS != rc ) { - goto exit; - } + rc = ompi_comm_allreduce_bridged_schedule_bcast (request); + } - if ( &ompi_mpi_op_max.op == op ) { - for ( i = 0 ; i < count; i++ ) { - if (tmpbuf[i] > outbuf[i]) { - outbuf[i] = tmpbuf[i]; - } - } - } - else if ( &ompi_mpi_op_min.op == op ) { - for ( i = 0 ; i < count; i++ ) { - if (tmpbuf[i] < outbuf[i]) { - outbuf[i] = tmpbuf[i]; - } - } - } - else if ( &ompi_mpi_op_sum.op == op ) { - for ( i = 0 ; i < count; i++ ) { - outbuf[i] += tmpbuf[i]; - } - } - else if ( &ompi_mpi_op_prod.op == op ) { - for ( i = 0 ; i < count; i++ ) { - outbuf[i] *= tmpbuf[i]; - } + if (OMPI_SUCCESS != rc) { + ompi_comm_request_return (request); + return rc; + } + + ompi_comm_request_start (request); + + *req = &request->super; + + return OMPI_SUCCESS; +} + +static int ompi_comm_allreduce_pmix_reduce_complete (ompi_comm_request_t *request) +{ + ompi_comm_allreduce_context_t *context = (ompi_comm_allreduce_context_t *) request->context; + ompi_comm_cid_context_t *cid_context = context->cid_context; + int32_t size_count = context->count; + opal_value_t info; + opal_pmix_pdata_t pdat; + opal_buffer_t sbuf; + int rc; + int bytes_written; + const int output_id = 0; + const int verbosity_level = 1; + + OBJ_CONSTRUCT(&sbuf, opal_buffer_t); + + if (OPAL_SUCCESS != (rc = opal_dss.pack(&sbuf, context->tmpbuf, (int32_t)context->count, OPAL_INT))) { + OBJ_DESTRUCT(&sbuf); + opal_output_verbose (verbosity_level, output_id, "pack failed. rc %d\n", rc); + return rc; + } + + OBJ_CONSTRUCT(&info, opal_value_t); + OBJ_CONSTRUCT(&pdat, opal_pmix_pdata_t); + + info.type = OPAL_BYTE_OBJECT; + pdat.value.type = OPAL_BYTE_OBJECT; + + opal_dss.unload(&sbuf, (void**)&info.data.bo.bytes, &info.data.bo.size); + OBJ_DESTRUCT(&sbuf); + + bytes_written = asprintf(&info.key, + cid_context->send_first ? "%s:%s:send:%d" + : "%s:%s:recv:%d", + cid_context->port_string, + cid_context->pmix_tag, + cid_context->iter); + + if (bytes_written == -1) { + opal_output_verbose (verbosity_level, output_id, "writing info.key failed\n"); + } else { + bytes_written = asprintf(&pdat.value.key, + cid_context->send_first ? "%s:%s:recv:%d" + : "%s:%s:send:%d", + cid_context->port_string, + cid_context->pmix_tag, + cid_context->iter); + + if (bytes_written == -1) { + opal_output_verbose (verbosity_level, output_id, "writing pdat.value.key failed\n"); } } - rc = comm->c_coll.coll_bcast ( outbuf, count, MPI_INT, local_leader, - comm, comm->c_coll.coll_bcast_module ); + if (bytes_written == -1) { + // write with separate calls, + // just in case the args are the cause of failure + opal_output_verbose (verbosity_level, output_id, "send first: %d\n", cid_context->send_first); + opal_output_verbose (verbosity_level, output_id, "port string: %s\n", cid_context->port_string); + opal_output_verbose (verbosity_level, output_id, "pmix tag: %s\n", cid_context->pmix_tag); + opal_output_verbose (verbosity_level, output_id, "iter: %d\n", cid_context->iter); + return OMPI_ERR_OUT_OF_RESOURCE; + } - exit: - if (NULL != tmpbuf ) { - free (tmpbuf); + /* this macro is not actually non-blocking. if a non-blocking version becomes available this function + * needs to be reworked to take advantage of it. */ + OPAL_PMIX_EXCHANGE(rc, &info, &pdat, 60); + OBJ_DESTRUCT(&info); + if (OPAL_SUCCESS != rc) { + OBJ_DESTRUCT(&pdat); + return rc; } - return (rc); -} + OBJ_CONSTRUCT(&sbuf, opal_buffer_t); + opal_dss.load(&sbuf, pdat.value.data.bo.bytes, pdat.value.data.bo.size); + pdat.value.data.bo.bytes = NULL; + pdat.value.data.bo.size = 0; + OBJ_DESTRUCT(&pdat); -typedef struct { - opal_buffer_t buf; - bool active; -} comm_cid_return_t; + rc = opal_dss.unpack (&sbuf, context->outbuf, &size_count, OPAL_INT); + OBJ_DESTRUCT(&sbuf); + if (OPAL_UNLIKELY(OPAL_SUCCESS != rc)) { + return rc; + } -static void comm_cid_recv(int status, - ompi_process_name_t* peer, - opal_buffer_t* buffer, - ompi_rml_tag_t tag, - void* cbdata) -{ - comm_cid_return_t *rcid = (comm_cid_return_t*)cbdata; + ompi_op_reduce (context->op, context->tmpbuf, context->outbuf, size_count, MPI_INT); - opal_dss.copy_payload(&rcid->buf, buffer); - rcid->active = false; + return ompi_comm_allreduce_bridged_schedule_bcast (request); } -/* Arguments not used in this implementation: - * - bridgecomm - * - * lleader is the local rank of root in comm - * rleader is the OOB contact information of the - * root processes in the other world. - */ -static int ompi_comm_allreduce_intra_oob (int *inbuf, int *outbuf, - int count, struct ompi_op_t *op, - ompi_communicator_t *comm, - ompi_communicator_t *bridgecomm, - void* lleader, void* rleader, - int send_first ) +static int ompi_comm_allreduce_intra_pmix_nb (int *inbuf, int *outbuf, + int count, struct ompi_op_t *op, + ompi_comm_cid_context_t *cid_context, + ompi_request_t **req) { - int *tmpbuf=NULL; + ompi_communicator_t *comm = cid_context->comm; + ompi_comm_allreduce_context_t *context; + int local_rank = ompi_comm_rank (comm); + ompi_comm_request_t *request; + ompi_request_t *subreq; int rc; - int local_leader, local_rank; - ompi_process_name_t *remote_leader=NULL; - int32_t size_count; - comm_cid_return_t rcid; - - local_leader = (*((int*)lleader)); - remote_leader = (ompi_process_name_t*)rleader; - size_count = count; - - local_rank = ompi_comm_rank ( comm ); - tmpbuf = (int *) malloc ( count * sizeof(int)); - if ( NULL == tmpbuf ) { - rc = OMPI_ERR_OUT_OF_RESOURCE; - goto exit; + + context = ompi_comm_allreduce_context_alloc (inbuf, outbuf, count, op, cid_context); + if (OPAL_UNLIKELY(NULL == context)) { + return OMPI_ERR_OUT_OF_RESOURCE; + } + + if (cid_context->local_leader == local_rank) { + context->tmpbuf = (int *) calloc (count, sizeof(int)); + if (OPAL_UNLIKELY(NULL == context->tmpbuf)) { + OBJ_RELEASE(context); + return OMPI_ERR_OUT_OF_RESOURCE; + } + } + + request = ompi_comm_request_get (); + if (NULL == request) { + OBJ_RELEASE(context); + return OMPI_ERR_OUT_OF_RESOURCE; } + request->context = &context->super; + /* comm is an intra-communicator */ - rc = comm->c_coll.coll_allreduce(inbuf,tmpbuf,count,MPI_INT,op, comm, - comm->c_coll.coll_allreduce_module); + rc = comm->c_coll.coll_ireduce (inbuf, context->tmpbuf, count, MPI_INT, op, + cid_context->local_leader, comm, + &subreq, comm->c_coll.coll_ireduce_module); if ( OMPI_SUCCESS != rc ) { - goto exit; + ompi_comm_request_return (request); + return rc; } - if (local_rank == local_leader ) { - opal_buffer_t *sbuf; + if (cid_context->local_leader == local_rank) { + rc = ompi_comm_request_schedule_append (request, ompi_comm_allreduce_pmix_reduce_complete, + &subreq, 1); + } else { + /* go ahead and schedule the broadcast */ + rc = ompi_comm_request_schedule_append (request, NULL, &subreq, 1); - sbuf = OBJ_NEW(opal_buffer_t); + rc = ompi_comm_allreduce_bridged_schedule_bcast (request); + } - if (OPAL_SUCCESS != (rc = opal_dss.pack(sbuf, tmpbuf, (int32_t)count, OPAL_INT))) { - goto exit; - } + if (OMPI_SUCCESS != rc) { + ompi_comm_request_return (request); + return rc; + } - if ( send_first ) { - if (0 > (rc = ompi_rte_send_buffer_nb(remote_leader, sbuf, - OMPI_RML_TAG_COMM_CID_INTRA, - ompi_rte_send_cbfunc, NULL))) { - goto exit; - } - OBJ_CONSTRUCT(&rcid.buf, opal_buffer_t); - rcid.active = true; - ompi_rte_recv_buffer_nb(remote_leader, OMPI_RML_TAG_COMM_CID_INTRA, - OMPI_RML_NON_PERSISTENT, comm_cid_recv, &rcid); - while (rcid.active) { - opal_progress(); - } - } - else { - OBJ_CONSTRUCT(&rcid.buf, opal_buffer_t); - rcid.active = true; - ompi_rte_recv_buffer_nb(remote_leader, OMPI_RML_TAG_COMM_CID_INTRA, - OMPI_RML_NON_PERSISTENT, comm_cid_recv, &rcid); - while (rcid.active) { - opal_progress(); - } - if (0 > (rc = ompi_rte_send_buffer_nb(remote_leader, sbuf, - OMPI_RML_TAG_COMM_CID_INTRA, - ompi_rte_send_cbfunc, NULL))) { - goto exit; + ompi_comm_request_start (request); + *req = (ompi_request_t *) request; + + /* use the same function as bridged to schedule the broadcast */ + return OMPI_SUCCESS; +} + +static int ompi_comm_allreduce_group_broadcast (ompi_comm_request_t *request) +{ + ompi_comm_allreduce_context_t *context = (ompi_comm_allreduce_context_t *) request->context; + ompi_comm_cid_context_t *cid_context = context->cid_context; + ompi_request_t *subreq[2]; + int subreq_count = 0; + int rc; + + for (int i = 0 ; i < 2 ; ++i) { + if (MPI_PROC_NULL != context->peers_comm[i + 1]) { + rc = MCA_PML_CALL(isend(context->outbuf, context->count, MPI_INT, context->peers_comm[i+1], + cid_context->pml_tag, MCA_PML_BASE_SEND_STANDARD, + cid_context->comm, subreq + subreq_count++)); + if (OMPI_SUCCESS != rc) { + return rc; } } + } - if (OPAL_SUCCESS != (rc = opal_dss.unpack(&rcid.buf, outbuf, &size_count, OPAL_INT))) { - goto exit; - } - OBJ_DESTRUCT(&rcid.buf); - count = (int)size_count; + return ompi_comm_request_schedule_append (request, NULL, subreq, subreq_count); +} - ompi_op_reduce (op, tmpbuf, outbuf, count, MPI_INT); +static int ompi_comm_allreduce_group_recv_complete (ompi_comm_request_t *request) +{ + ompi_comm_allreduce_context_t *context = (ompi_comm_allreduce_context_t *) request->context; + ompi_comm_cid_context_t *cid_context = context->cid_context; + int *tmp = context->tmpbuf; + ompi_request_t *subreq[2]; + int rc; + + for (int i = 0 ; i < 2 ; ++i) { + if (MPI_PROC_NULL != context->peers_comm[i + 1]) { + ompi_op_reduce (context->op, tmp, context->outbuf, context->count, MPI_INT); + tmp += context->count; + } } - rc = comm->c_coll.coll_bcast (outbuf, count, MPI_INT, - local_leader, comm, - comm->c_coll.coll_bcast_module); + if (MPI_PROC_NULL != context->peers_comm[0]) { + /* interior node */ + rc = MCA_PML_CALL(isend(context->outbuf, context->count, MPI_INT, context->peers_comm[0], + cid_context->pml_tag, MCA_PML_BASE_SEND_STANDARD, + cid_context->comm, subreq)); + if (OMPI_SUCCESS != rc) { + return rc; + } + + rc = MCA_PML_CALL(irecv(context->outbuf, context->count, MPI_INT, context->peers_comm[0], + cid_context->pml_tag, cid_context->comm, subreq + 1)); + if (OMPI_SUCCESS != rc) { + return rc; + } - exit: - if (NULL != tmpbuf ) { - free (tmpbuf); + return ompi_comm_request_schedule_append (request, ompi_comm_allreduce_group_broadcast, subreq, 2); } - return (rc); + /* root */ + return ompi_comm_allreduce_group_broadcast (request); } -static int ompi_comm_allreduce_group (int *inbuf, int* outbuf, - int count, struct ompi_op_t *op, - ompi_communicator_t *comm, - ompi_communicator_t *newcomm, - void* local_leader, - void* remote_leader, - int send_first) +static int ompi_comm_allreduce_group_nb (int *inbuf, int *outbuf, int count, + struct ompi_op_t *op, ompi_comm_cid_context_t *cid_context, + ompi_request_t **req) { - ompi_group_t *group = newcomm->c_local_group; - int peers_group[3], peers_comm[3]; + ompi_group_t *group = cid_context->newcomm->c_local_group; const int group_size = ompi_group_size (group); const int group_rank = ompi_group_rank (group); - int tag = *((int *) local_leader); - int *tmp1; - int i, rc=OMPI_SUCCESS; + ompi_communicator_t *comm = cid_context->comm; + int peers_group[3], *tmp, subreq_count = 0; + ompi_comm_allreduce_context_t *context; + ompi_comm_request_t *request; + ompi_request_t *subreq[3]; + + context = ompi_comm_allreduce_context_alloc (inbuf, outbuf, count, op, cid_context); + if (NULL == context) { + return OMPI_ERR_OUT_OF_RESOURCE; + } + + tmp = context->tmpbuf = calloc (sizeof (int), count * 3); + if (NULL == context->tmpbuf) { + OBJ_RELEASE(context); + return OMPI_ERR_OUT_OF_RESOURCE; + } + + request = ompi_comm_request_get (); + if (NULL == request) { + OBJ_RELEASE(context); + return OMPI_ERR_OUT_OF_RESOURCE; + } + + request->context = &context->super; /* basic recursive doubling allreduce on the group */ peers_group[0] = group_rank ? ((group_rank - 1) >> 1) : MPI_PROC_NULL; @@ -1473,54 +1082,28 @@ static int ompi_comm_allreduce_group (int *inbuf, int* outbuf, peers_group[2] = (group_rank * 2 + 2) < group_size ? group_rank * 2 + 2 : MPI_PROC_NULL; /* translate the ranks into the ranks of the parent communicator */ - ompi_group_translate_ranks (group, 3, peers_group, comm->c_local_group, peers_comm); - - tmp1 = malloc (sizeof (int) * count); + ompi_group_translate_ranks (group, 3, peers_group, comm->c_local_group, context->peers_comm); /* reduce */ memmove (outbuf, inbuf, sizeof (int) * count); - for (i = 1 ; i < 3 ; ++i) { - if (MPI_PROC_NULL != peers_comm[i]) { - rc = MCA_PML_CALL(recv(tmp1, count, MPI_INT, peers_comm[i], tag, comm, - MPI_STATUS_IGNORE)); + for (int i = 0 ; i < 2 ; ++i) { + if (MPI_PROC_NULL != context->peers_comm[i + 1]) { + int rc = MCA_PML_CALL(irecv(tmp, count, MPI_INT, context->peers_comm[i + 1], + cid_context->pml_tag, comm, subreq + subreq_count++)); if (OMPI_SUCCESS != rc) { - goto out; + ompi_comm_request_return (request); + return rc; } - /* this is integer reduction so we do not care about ordering */ - ompi_op_reduce (op, tmp1, outbuf, count, MPI_INT); - } - } - - if (MPI_PROC_NULL != peers_comm[0]) { - rc = MCA_PML_CALL(send(outbuf, count, MPI_INT, peers_comm[0], - tag, MCA_PML_BASE_SEND_STANDARD, comm)); - if (OMPI_SUCCESS != rc) { - goto out; - } - rc = MCA_PML_CALL(recv(outbuf, count, MPI_INT, peers_comm[0], - tag, comm, MPI_STATUS_IGNORE)); - if (OMPI_SUCCESS != rc) { - goto out; + tmp += count; } } - /* broadcast */ - for (i = 1 ; i < 3 ; ++i) { - if (MPI_PROC_NULL != peers_comm[i]) { - rc = MCA_PML_CALL(send(outbuf, count, MPI_INT, peers_comm[i], tag, - MCA_PML_BASE_SEND_STANDARD, comm)); - if (OMPI_SUCCESS != rc) { - goto out; - } - } - } + ompi_comm_request_schedule_append (request, ompi_comm_allreduce_group_recv_complete, subreq, subreq_count); - out: - free (tmp1); + ompi_comm_request_start (request); + *req = &request->super; - return rc; + return OMPI_SUCCESS; } - -END_C_DECLS diff --git a/ompi/communicator/comm_helpers.c b/ompi/communicator/comm_helpers.c index 247883fa091..584e80ee983 100644 --- a/ompi/communicator/comm_helpers.c +++ b/ompi/communicator/comm_helpers.c @@ -19,7 +19,7 @@ int ompi_comm_neighbors_count(MPI_Comm comm, int *indegree, int *outdegree, int *weighted) { int res; - if (OMPI_COMM_IS_CART(comm)) { + if (OMPI_COMM_IS_CART(comm)) { int ndims; res = MPI_Cartdim_get(comm, &ndims) ; if (MPI_SUCCESS != res) { @@ -57,7 +57,7 @@ int ompi_comm_neighbors(MPI_Comm comm, int maxindegree, int sources[], int sourc } if(indeg > maxindegree && outdeg > maxoutdegree) return MPI_ERR_TRUNCATE; /* we want to return *all* neighbors */ - if (OMPI_COMM_IS_CART(comm)) { + if (OMPI_COMM_IS_CART(comm)) { int ndims, i, rpeer, speer; res = MPI_Cartdim_get(comm, &ndims); if (MPI_SUCCESS != res) { diff --git a/ompi/communicator/comm_helpers.h b/ompi/communicator/comm_helpers.h index 0e001bee632..f589d09de84 100644 --- a/ompi/communicator/comm_helpers.h +++ b/ompi/communicator/comm_helpers.h @@ -37,5 +37,5 @@ int ompi_comm_neighbors(MPI_Comm comm, int maxindegree, int sources[], int sourc #ifdef __cplusplus } #endif - + #endif diff --git a/ompi/communicator/comm_init.c b/ompi/communicator/comm_init.c index d7980cb3246..f453ca1e8e1 100644 --- a/ompi/communicator/comm_init.c +++ b/ompi/communicator/comm_init.c @@ -6,24 +6,25 @@ * Copyright (c) 2004-2013 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2006-2010 University of Houston. All rights reserved. * Copyright (c) 2007-2012 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2009 Sun Microsystems, Inc. All rights reserved. - * Copyright (c) 2012-2014 Los Alamos National Security, LLC. + * Copyright (c) 2012-2015 Los Alamos National Security, LLC. * All rights reserved. * Copyright (c) 2011-2013 Inria. All rights reserved. * Copyright (c) 2011-2013 Universite Bordeaux 1 * All rights reserved. * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. + * Copyright (c) 2015 Intel, Inc. All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -39,16 +40,16 @@ #include "ompi/runtime/params.h" #include "ompi/communicator/communicator.h" #include "ompi/attribute/attribute.h" -#include "ompi/mca/dpm/dpm.h" +#include "ompi/dpm/dpm.h" #include "ompi/memchecker.h" /* ** Table for Fortran <-> C communicator handle conversion ** Also used by P2P code to lookup communicator based ** on cid. -** +** */ -opal_pointer_array_t ompi_mpi_communicators = {{0}}; +opal_pointer_array_t ompi_mpi_communicators = {{0}}; opal_pointer_array_t ompi_comm_f_to_c_table = {{0}}; ompi_predefined_communicator_t ompi_mpi_comm_world = {{{0}}}; @@ -56,7 +57,7 @@ ompi_predefined_communicator_t ompi_mpi_comm_self = {{{0}}}; ompi_predefined_communicator_t ompi_mpi_comm_null = {{{0}}}; ompi_communicator_t *ompi_mpi_comm_parent = NULL; -ompi_predefined_communicator_t *ompi_mpi_comm_world_addr = +ompi_predefined_communicator_t *ompi_mpi_comm_world_addr = &ompi_mpi_comm_world; ompi_predefined_communicator_t *ompi_mpi_comm_self_addr = &ompi_mpi_comm_self; @@ -71,7 +72,7 @@ OBJ_CLASS_INSTANCE(ompi_communicator_t, opal_object_t, ompi_comm_destruct); /* This is the counter for the number of communicators, which contain - process with more than one jobid. This counter is a usefull + process with more than one jobid. This counter is a usefull shortcut for finalize and abort. */ int ompi_comm_num_dyncomm=0; @@ -84,7 +85,7 @@ int ompi_comm_init(void) size_t size; /* Setup communicator array */ - OBJ_CONSTRUCT(&ompi_mpi_communicators, opal_pointer_array_t); + OBJ_CONSTRUCT(&ompi_mpi_communicators, opal_pointer_array_t); if( OPAL_SUCCESS != opal_pointer_array_init(&ompi_mpi_communicators, 0, OMPI_FORTRAN_HANDLE_MAX, 64) ) { return OMPI_ERROR; @@ -101,12 +102,26 @@ int ompi_comm_init(void) OBJ_CONSTRUCT(&ompi_mpi_comm_world, ompi_communicator_t); assert(ompi_mpi_comm_world.comm.c_f_to_c_index == 0); group = OBJ_NEW(ompi_group_t); - group->grp_proc_pointers = ompi_proc_world(&size); - group->grp_proc_count = (int)size; + + size = ompi_process_info.num_procs; + group->grp_proc_pointers = (ompi_proc_t **) calloc (size, sizeof (ompi_proc_t *)); + group->grp_proc_count = size; + + for (size_t i = 0 ; i < size ; ++i) { + opal_process_name_t name = {.vpid = i, .jobid = OMPI_PROC_MY_NAME->jobid}; + /* look for existing ompi_proc_t that matches this name */ + group->grp_proc_pointers[i] = (ompi_proc_t *) ompi_proc_lookup (name); + if (NULL == group->grp_proc_pointers[i]) { + /* set sentinel value */ + group->grp_proc_pointers[i] = (ompi_proc_t *) ompi_proc_name_to_sentinel (name); + } else { + OBJ_RETAIN (group->grp_proc_pointers[i]); + } + } + OMPI_GROUP_SET_INTRINSIC (group); OMPI_GROUP_SET_DENSE (group); ompi_set_group_rank(group, ompi_proc_local()); - ompi_group_increment_proc_count (group); ompi_mpi_comm_world.comm.c_contextid = 0; ompi_mpi_comm_world.comm.c_id_start_index = 4; @@ -170,7 +185,7 @@ int ompi_comm_init(void) assert(ompi_mpi_comm_null.comm.c_f_to_c_index == 2); ompi_mpi_comm_null.comm.c_local_group = &ompi_mpi_group_null.group; ompi_mpi_comm_null.comm.c_remote_group = &ompi_mpi_group_null.group; - OBJ_RETAIN(&ompi_mpi_group_null.group); + OBJ_RETAIN(&ompi_mpi_group_null.group); OBJ_RETAIN(&ompi_mpi_group_null.group); ompi_mpi_comm_null.comm.c_contextid = 2; @@ -191,10 +206,6 @@ int ompi_comm_init(void) OBJ_RETAIN(&ompi_mpi_group_null.group); OBJ_RETAIN(&ompi_mpi_errors_are_fatal.eh); - /* initialize the comm_reg stuff for multi-threaded comm_cid - allocation */ - ompi_comm_reg_init(); - /* initialize communicator requests (for ompi_comm_idup) */ ompi_comm_request_init (); @@ -213,9 +224,9 @@ ompi_communicator_t *ompi_comm_allocate ( int local_size, int remote_size ) new_comm->c_remote_group = ompi_group_allocate (remote_size); new_comm->c_flags |= OMPI_COMM_INTER; } else { - /* - * simplifies some operations (e.g. p2p), if - * we can always use the remote group + /* + * simplifies some operations (e.g. p2p), if + * we can always use the remote group */ new_comm->c_remote_group = new_comm->c_local_group; OBJ_RETAIN(new_comm->c_remote_group); @@ -227,7 +238,7 @@ ompi_communicator_t *ompi_comm_allocate ( int local_size, int remote_size ) return new_comm; } -int ompi_comm_finalize(void) +int ompi_comm_finalize(void) { int max, i; ompi_communicator_t *comm; @@ -236,7 +247,7 @@ int ompi_comm_finalize(void) OBJ_DESTRUCT( &ompi_mpi_comm_self ); /* disconnect all dynamic communicators */ - ompi_dpm.dyn_finalize(); + ompi_dpm_dyn_finalize(); /* Free the attributes on comm world. This is not done in the * destructor as we delete attributes in ompi_comm_free (which @@ -261,22 +272,22 @@ int ompi_comm_finalize(void) OBJ_DESTRUCT (ompi_mpi_comm_parent); /* Please note, that the we did increase the reference count - for ompi_mpi_comm_null, ompi_mpi_group_null, and - ompi_mpi_errors_are_fatal in ompi_comm_init because of - ompi_mpi_comm_parent. In case a + for ompi_mpi_comm_null, ompi_mpi_group_null, and + ompi_mpi_errors_are_fatal in ompi_comm_init because of + ompi_mpi_comm_parent. In case a parent communicator is really created, the ref. counters - for these objects are decreased again by one. However, in a + for these objects are decreased again by one. However, in a static scenario, we should ideally decrease the ref. counter - for these objects by one here. The problem just is, that + for these objects by one here. The problem just is, that if the app had a parent_comm, and this has been freed/disconnected, - ompi_comm_parent points again to ompi_comm_null, the reference count + ompi_comm_parent points again to ompi_comm_null, the reference count for these objects has not been increased again. So the point is, if ompi_mpi_comm_parent == &ompi_mpi_comm_null we do not know whether we have to decrease the ref count for those three objects or not. Since this is a constant, non-increasing - amount of memory, we stick with the current solution for now, + amount of memory, we stick with the current solution for now, namely don't do anything. - */ + */ } /* Shut down MPI_COMM_NULL */ @@ -313,27 +324,15 @@ int ompi_comm_finalize(void) } } - OBJ_DESTRUCT (&ompi_mpi_communicators); OBJ_DESTRUCT (&ompi_comm_f_to_c_table); - /* finalize the comm_reg stuff */ - ompi_comm_reg_finalize(); - /* finalize communicator requests */ ompi_comm_request_fini (); return OMPI_SUCCESS; } -/* - * For linking only. To be checked. - */ -int ompi_comm_link_function(void) -{ - return OMPI_SUCCESS; -} - /********************************************************************************/ /********************************************************************************/ /********************************************************************************/ @@ -410,7 +409,6 @@ static void ompi_comm_destruct(ompi_communicator_t* comm) } if (NULL != comm->c_local_group) { - ompi_group_decrement_proc_count (comm->c_local_group); OBJ_RELEASE ( comm->c_local_group ); comm->c_local_group = NULL; if ( OMPI_COMM_IS_INTRA(comm) ) { @@ -423,7 +421,6 @@ static void ompi_comm_destruct(ompi_communicator_t* comm) } if (NULL != comm->c_remote_group) { - ompi_group_decrement_proc_count (comm->c_remote_group); OBJ_RELEASE ( comm->c_remote_group ); comm->c_remote_group = NULL; } @@ -442,7 +439,7 @@ static void ompi_comm_destruct(ompi_communicator_t* comm) } /* reset the ompi_comm_f_to_c_table entry */ - if ( MPI_UNDEFINED != comm->c_f_to_c_index && + if ( MPI_UNDEFINED != comm->c_f_to_c_index && NULL != opal_pointer_array_get_item(&ompi_comm_f_to_c_table, comm->c_f_to_c_index)) { opal_pointer_array_set_item ( &ompi_comm_f_to_c_table, diff --git a/ompi/communicator/comm_request.c b/ompi/communicator/comm_request.c index 14c57c16b58..8ad390ee56b 100644 --- a/ompi/communicator/comm_request.c +++ b/ompi/communicator/comm_request.c @@ -4,6 +4,10 @@ * reseved. * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. + * Copyright (c) 2004-2016 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2016 IBM Corporation. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -113,7 +117,7 @@ static int ompi_comm_request_progress (void) /* don't call ompi_request_test_all as it causes a recursive call into opal_progress */ while (request_item->subreq_count) { ompi_request_t *subreq = request_item->subreqs[request_item->subreq_count-1]; - if (true == subreq->req_complete) { + if( REQUEST_COMPLETE(subreq) ) { ompi_request_free (&subreq); request_item->subreq_count--; } else { @@ -203,7 +207,7 @@ static int ompi_comm_request_free (struct ompi_request_t **ompi_req) { ompi_comm_request_t *request = (ompi_comm_request_t *) *ompi_req; - if (!(*ompi_req)->req_complete) { + if( !REQUEST_COMPLETE(*ompi_req) ) { return MPI_ERR_REQUEST; } @@ -231,6 +235,7 @@ static void ompi_comm_request_destruct (ompi_comm_request_t *request) { OBJ_DESTRUCT(&request->schedule); } + OBJ_CLASS_INSTANCE(ompi_comm_request_t, ompi_request_t, ompi_comm_request_construct, ompi_comm_request_destruct); @@ -254,10 +259,11 @@ ompi_comm_request_t *ompi_comm_request_get (void) void ompi_comm_request_return (ompi_comm_request_t *request) { if (request->context) { - free (request->context); + OBJ_RELEASE (request->context); request->context = NULL; } + OMPI_REQUEST_FINI(&request->super); opal_free_list_return (&ompi_comm_requests, (opal_free_list_item_t *) request); } diff --git a/ompi/communicator/comm_request.h b/ompi/communicator/comm_request.h index 246a3010b0e..65af613f95f 100644 --- a/ompi/communicator/comm_request.h +++ b/ompi/communicator/comm_request.h @@ -22,7 +22,7 @@ typedef struct ompi_comm_request_t { ompi_request_t super; - void *context; + opal_object_t *context; opal_list_t schedule; } ompi_comm_request_t; OBJ_CLASS_DECLARATION(ompi_comm_request_t); diff --git a/ompi/communicator/communicator.h b/ompi/communicator/communicator.h index fb1df57cb61..a8d4756cffa 100644 --- a/ompi/communicator/communicator.h +++ b/ompi/communicator/communicator.h @@ -6,7 +6,7 @@ * Copyright (c) 2004-2013 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. @@ -17,7 +17,7 @@ * Copyright (c) 2011-2013 Universite Bordeaux 1 * Copyright (c) 2012-2013 Los Alamos National Security, LLC. All rights * reserved. - * Copyright (c) 2014 Intel, Inc. All rights reserved. + * Copyright (c) 2014-2015 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -93,7 +93,7 @@ OMPI_DECLSPEC OBJ_CLASS_DECLARATION(ompi_communicator_t); #define OMPI_COMM_CID_INTRA 0x00000020 #define OMPI_COMM_CID_INTER 0x00000040 #define OMPI_COMM_CID_INTRA_BRIDGE 0x00000080 -#define OMPI_COMM_CID_INTRA_OOB 0x00000100 +#define OMPI_COMM_CID_INTRA_PMIX 0x00000100 #define OMPI_COMM_CID_GROUP 0x00000200 /** @@ -351,7 +351,6 @@ static inline bool ompi_comm_peer_invalid(ompi_communicator_t* comm, int peer_id * Initialise MPI_COMM_WORLD and MPI_COMM_SELF */ int ompi_comm_init(void); -OMPI_DECLSPEC int ompi_comm_link_function(void); /** * extract the local group from a communicator @@ -493,24 +492,27 @@ ompi_communicator_t* ompi_comm_allocate (int local_group_size, * @param mode: combination of input * OMPI_COMM_CID_INTRA: intra-comm * OMPI_COMM_CID_INTER: inter-comm + * OMPI_COMM_CID_GROUP: only decide CID within the ompi_group_t + * associated with the communicator. arg0 + * must point to an int which will be used + * as the pml tag for communication. * OMPI_COMM_CID_INTRA_BRIDGE: 2 intracomms connected by - * a bridge comm. local_leader - * and remote leader are in this - * case an int (rank in bridge-comm). - * OMPI_COMM_CID_INTRA_OOB: 2 intracomms, leaders talk - * through OOB. lleader and rleader - * are the required contact information. + * a bridge comm. arg0 and arg1 must point + * to integers representing the local and + * remote leader ranks. the remote leader rank + * is a rank in the bridgecomm. + * OMPI_COMM_CID_INTRA_PMIX: 2 intracomms, leaders talk + * through PMIx. arg0 must point to an integer + * representing the local leader rank. arg1 + * must point to a string representing the + * port of the remote leader. * @param send_first: to avoid a potential deadlock for * the OOB version. * This routine has to be thread safe in the final version. */ -OMPI_DECLSPEC int ompi_comm_nextcid ( ompi_communicator_t* newcomm, - ompi_communicator_t* oldcomm, - ompi_communicator_t* bridgecomm, - void* local_leader, - void* remote_leader, - int mode, - int send_first); +OMPI_DECLSPEC int ompi_comm_nextcid (ompi_communicator_t *newcomm, ompi_communicator_t *comm, + ompi_communicator_t *bridgecomm, const void *arg0, const void *arg1, + bool send_first, int mode); /** * allocate new communicator ID (non-blocking) @@ -522,10 +524,9 @@ OMPI_DECLSPEC int ompi_comm_nextcid ( ompi_communicator_t* newcomm, * OMPI_COMM_CID_INTER: inter-comm * This routine has to be thread safe in the final version. */ -OMPI_DECLSPEC int ompi_comm_nextcid_nb (ompi_communicator_t* newcomm, - ompi_communicator_t* comm, - ompi_communicator_t* bridgecomm, - int mode, ompi_request_t **req); +OMPI_DECLSPEC int ompi_comm_nextcid_nb (ompi_communicator_t *newcomm, ompi_communicator_t *comm, + ompi_communicator_t *bridgecomm, const void *arg0, const void *arg1, + bool send_first, int mode, ompi_request_t **req); /** * shut down the communicator infrastructure. @@ -535,6 +536,18 @@ int ompi_comm_finalize (void); /** * This is THE routine, where all the communicator stuff * is really set. + * + * @param[out] newcomm new ompi communicator object + * @param[in] oldcomm old communicator + * @param[in] local_size size of local_ranks array + * @param[in] local_ranks local ranks (not used if local_group != NULL) + * @param[in] remote_size size of remote_ranks array + * @param[in] remote_ranks remote ranks (intercomm) (not used if remote_group != NULL) + * @param[in] attr attributes (can be NULL) + * @param[in] errh error handler + * @param[in] copy_topocomponent whether to copy the topology + * @param[in] local_group local process group (may be NULL if local_ranks array supplied) + * @param[in] remote_group remote process group (may be NULL) */ OMPI_DECLSPEC int ompi_comm_set ( ompi_communicator_t** newcomm, ompi_communicator_t* oldcomm, @@ -548,6 +561,23 @@ OMPI_DECLSPEC int ompi_comm_set ( ompi_communicator_t** newcomm, ompi_group_t *local_group, ompi_group_t *remote_group ); +/** + * This is THE routine, where all the communicator stuff + * is really set. Non-blocking version. + * + * @param[out] newcomm new ompi communicator object + * @param[in] oldcomm old communicator + * @param[in] local_size size of local_ranks array + * @param[in] local_ranks local ranks (not used if local_group != NULL) + * @param[in] remote_size size of remote_ranks array + * @param[in] remote_ranks remote ranks (intercomm) (not used if remote_group != NULL) + * @param[in] attr attributes (can be NULL) + * @param[in] errh error handler + * @param[in] copy_topocomponent whether to copy the topology + * @param[in] local_group local process group (may be NULL if local_ranks array supplied) + * @param[in] remote_group remote process group (may be NULL) + * @param[out] req ompi_request_t object for tracking completion + */ OMPI_DECLSPEC int ompi_comm_set_nb ( ompi_communicator_t **ncomm, ompi_communicator_t *oldcomm, int local_size, @@ -589,18 +619,25 @@ int ompi_comm_determine_first ( ompi_communicator_t *intercomm, int high ); -OMPI_DECLSPEC int ompi_comm_activate ( ompi_communicator_t** newcomm, - ompi_communicator_t* comm, - ompi_communicator_t* bridgecomm, - void* local_leader, - void* remote_leader, - int mode, - int send_first ); +OMPI_DECLSPEC int ompi_comm_activate (ompi_communicator_t **newcomm, ompi_communicator_t *comm, + ompi_communicator_t *bridgecomm, const void *arg0, + const void *arg1, bool send_first, int mode); -OMPI_DECLSPEC int ompi_comm_activate_nb (ompi_communicator_t **newcomm, - ompi_communicator_t *comm, - ompi_communicator_t *bridgecomm, - int mode, ompi_request_t **req); +/** + * Non-blocking variant of comm_activate. + * + * @param[inout] newcomm New communicator + * @param[in] comm Parent communicator + * @param[in] bridgecomm Bridge communicator (used for PMIX and bridge modes) + * @param[in] arg0 Mode argument 0 + * @param[in] arg1 Mode argument 1 + * @param[in] send_first Send first from this process (PMIX mode only) + * @param[in] mode Collective mode + * @param[out] req New request object to track this operation + */ +OMPI_DECLSPEC int ompi_comm_activate_nb (ompi_communicator_t **newcomm, ompi_communicator_t *comm, + ompi_communicator_t *bridgecomm, const void *arg0, + const void *arg1, bool send_first, int mode, ompi_request_t **req); /** * a simple function to dump the structure @@ -610,14 +647,6 @@ int ompi_comm_dump ( ompi_communicator_t *comm ); /* setting name */ int ompi_comm_set_name (ompi_communicator_t *comm, const char *name ); -/* - * these are the init and finalize functions for the comm_reg - * stuff. These routines are necessary for handling multi-threading - * scenarious in the communicator_cid allocation - */ -void ompi_comm_reg_init(void); -void ompi_comm_reg_finalize(void); - /* global variable to save the number od dynamic communicators */ extern int ompi_comm_num_dyncomm; diff --git a/ompi/contrib/libompitrace/Makefile.am b/ompi/contrib/libompitrace/Makefile.am index e050e2cddd9..9b4cbdf5e88 100644 --- a/ompi/contrib/libompitrace/Makefile.am +++ b/ompi/contrib/libompitrace/Makefile.am @@ -6,15 +6,16 @@ # Copyright (c) 2004-2005 The University of Tennessee and The University # of Tennessee Research Foundation. All rights # reserved. -# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, # University of Stuttgart. All rights reserved. # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. -# Copyright (c) 2009-2010 Cisco Systems, Inc. All rights reserved. +# Copyright (c) 2009-2016 Cisco Systems, Inc. All rights reserved. +# Copyright (c) 2016 IBM Corporation. All rights reserved. # $COPYRIGHT$ -# +# # Additional copyrights may follow -# +# # $HEADER$ # @@ -41,3 +42,5 @@ libompitrace_la_SOURCES = \ request_free.c \ send.c \ sendrecv.c + +libompitrace_la_LDFLAGS = -version-info $(libompitrace_so_version) diff --git a/ompi/contrib/libompitrace/abort.c b/ompi/contrib/libompitrace/abort.c index fcb519a9310..246e4bea335 100644 --- a/ompi/contrib/libompitrace/abort.c +++ b/ompi/contrib/libompitrace/abort.c @@ -5,15 +5,15 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2007-2009 Cisco Systems, Inc. All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -23,17 +23,17 @@ #include "ompi/mpi/c/bindings.h" -int MPI_Abort(MPI_Comm comm, int errorcode) +int MPI_Abort(MPI_Comm comm, int errorcode) { char commname[MPI_MAX_OBJECT_NAME]; int len; int rank; - + PMPI_Comm_rank(MPI_COMM_WORLD, &rank); PMPI_Comm_get_name(comm, commname, &len); - + fprintf(stderr, "MPI_ABORT[%d]: comm %s errorcode %d\n", rank, commname, errorcode); fflush(stderr); - + return PMPI_Abort(comm, errorcode); } diff --git a/ompi/contrib/libompitrace/accumulate.c b/ompi/contrib/libompitrace/accumulate.c index 4275ced121b..1cee619c6fd 100644 --- a/ompi/contrib/libompitrace/accumulate.c +++ b/ompi/contrib/libompitrace/accumulate.c @@ -6,7 +6,7 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. @@ -15,9 +15,9 @@ * Copyright (c) 2013 Los Alamos National Security, LLC. All rights * reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ #include "ompi_config.h" @@ -30,25 +30,25 @@ int MPI_Accumulate(const void *origin_addr, int origin_count, MPI_Datatype origin_datatype, int target_rank, MPI_Aint target_disp, int target_count, - MPI_Datatype target_datatype, MPI_Op op, MPI_Win win) + MPI_Datatype target_datatype, MPI_Op op, MPI_Win win) { - + char typename[MPI_MAX_OBJECT_NAME], target_dt[MPI_MAX_OBJECT_NAME]; char winname[MPI_MAX_OBJECT_NAME]; int len; int rank; - - PMPI_Comm_rank(MPI_COMM_WORLD, &rank); + + PMPI_Comm_rank(MPI_COMM_WORLD, &rank); PMPI_Type_get_name(origin_datatype, typename, &len); PMPI_Type_get_name(target_datatype, target_dt, &len); PMPI_Win_get_name(win, winname, &len); - + fprintf(stderr, "MPI_ACCUMULATE[%d]: origin_addr %0" PRIxPTR " origin_count %d origin_datatype %s\n" "\ttarget_rank %d target_disp %" PRIdPTR " target_count %d target_datatype %s op %s win %s\n", rank, (uintptr_t)origin_addr, origin_count, typename, target_rank, (intptr_t) target_disp, target_count, target_dt, op->o_name, winname); fflush(stderr); - + return PMPI_Accumulate(origin_addr, origin_count, origin_datatype, target_rank, target_disp, target_count, target_datatype, op, win); diff --git a/ompi/contrib/libompitrace/add_error_class.c b/ompi/contrib/libompitrace/add_error_class.c index 030591270c0..721aa3b1fd8 100644 --- a/ompi/contrib/libompitrace/add_error_class.c +++ b/ompi/contrib/libompitrace/add_error_class.c @@ -5,16 +5,16 @@ * Copyright (c) 2004-2006 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2006 University of Houston. All rights reserved. * Copyright (c) 2009 Cisco Systems, Inc. All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ #include "ompi_config.h" @@ -24,15 +24,15 @@ #include "opal_stdint.h" #include "ompi/mpi/c/bindings.h" -int MPI_Add_error_class(int *errorclass) +int MPI_Add_error_class(int *errorclass) { int rank; - - PMPI_Comm_rank(MPI_COMM_WORLD, &rank); + + PMPI_Comm_rank(MPI_COMM_WORLD, &rank); fprintf(stderr, "MPI_ADD_ERROR_CLASS[%d]: errorclass %0" PRIxPTR "\n", rank, (uintptr_t)errorclass); fflush(stderr); - + return PMPI_Add_error_class(errorclass); } diff --git a/ompi/contrib/libompitrace/add_error_code.c b/ompi/contrib/libompitrace/add_error_code.c index ce8af8b87c9..f1465ab3ba8 100644 --- a/ompi/contrib/libompitrace/add_error_code.c +++ b/ompi/contrib/libompitrace/add_error_code.c @@ -5,16 +5,16 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2006 University of Houston. All rights reserved. * Copyright (c) 2009 Cisco Systems, Inc. All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ #include "ompi_config.h" @@ -27,11 +27,11 @@ int MPI_Add_error_code(int errorclass, int *errorcode) { int rank; - - PMPI_Comm_rank(MPI_COMM_WORLD, &rank); - + + PMPI_Comm_rank(MPI_COMM_WORLD, &rank); + fprintf(stderr, "MPI_ADD_ERROR_CODE[%d]: errorclass %d errcode %0" PRIxPTR "\n", rank, errorclass, (uintptr_t)errorcode); fflush(stderr); - + return PMPI_Add_error_code(errorclass, errorcode); } diff --git a/ompi/contrib/libompitrace/add_error_string.c b/ompi/contrib/libompitrace/add_error_string.c index 11894235c89..499607dd9f9 100644 --- a/ompi/contrib/libompitrace/add_error_string.c +++ b/ompi/contrib/libompitrace/add_error_string.c @@ -6,7 +6,7 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. @@ -14,9 +14,9 @@ * Copyright (c) 2013 Los Alamos National Security, LLC. All rights * reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ #include "ompi_config.h" @@ -28,8 +28,8 @@ int MPI_Add_error_string(int errorcode, const char *string) { int rank; - - PMPI_Comm_rank(MPI_COMM_WORLD, &rank); + + PMPI_Comm_rank(MPI_COMM_WORLD, &rank); fprintf(stderr, "MPI_ADD_ERROR_STRING[%d]: errorcode %d string %s\n", rank, errorcode, string); diff --git a/ompi/contrib/libompitrace/address.c b/ompi/contrib/libompitrace/address.c index 8ed9543bcec..259d9f30639 100644 --- a/ompi/contrib/libompitrace/address.c +++ b/ompi/contrib/libompitrace/address.c @@ -5,15 +5,15 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2009 Cisco Systems, Inc. All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -28,12 +28,12 @@ int MPI_Address(void *location, MPI_Aint *address) { int rank; - - PMPI_Comm_rank(MPI_COMM_WORLD, &rank); + + PMPI_Comm_rank(MPI_COMM_WORLD, &rank); fprintf(stderr, "MPI_ADDRESS[%d]: location %0" PRIxPTR " address %0" PRIxPTR "\n", rank, (uintptr_t)location, (uintptr_t)address); fflush(stderr); - + return PMPI_Address(location, address); } diff --git a/ompi/contrib/libompitrace/allgather.c b/ompi/contrib/libompitrace/allgather.c index 7fe1e3a5130..400464742b0 100644 --- a/ompi/contrib/libompitrace/allgather.c +++ b/ompi/contrib/libompitrace/allgather.c @@ -6,7 +6,7 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. @@ -14,9 +14,9 @@ * Copyright (c) 2013 Los Alamos National Security, LLC. All rights * reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -36,16 +36,16 @@ int MPI_Allgather(const void *sendbuf, int sendcount, MPI_Datatype sendtype, char commname[MPI_MAX_OBJECT_NAME]; int len; int rank; - - PMPI_Comm_rank(MPI_COMM_WORLD, &rank); + + PMPI_Comm_rank(MPI_COMM_WORLD, &rank); PMPI_Type_get_name(sendtype, sendtypename, &len); PMPI_Type_get_name(recvtype, recvtypename, &len); PMPI_Comm_get_name(comm, commname, &len); - + fprintf(stderr, "MPI_ALLGATHER[%d]: sendbuf %0" PRIxPTR " sendcount %d sendtype %s\n\trecvbuf %0" PRIxPTR " recvcount %d recvtype %s comm %s\n", rank, (uintptr_t) sendbuf, sendcount, sendtypename, (uintptr_t) recvbuf, recvcount, recvtypename, commname); fflush(stderr); - + return PMPI_Allgather(sendbuf, sendcount, sendtype, recvbuf, recvcount, recvtype, comm); } diff --git a/ompi/contrib/libompitrace/allgatherv.c b/ompi/contrib/libompitrace/allgatherv.c index f9f94c38618..4dbff470479 100644 --- a/ompi/contrib/libompitrace/allgatherv.c +++ b/ompi/contrib/libompitrace/allgatherv.c @@ -6,7 +6,7 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. @@ -14,9 +14,9 @@ * Copyright (c) 2013 Los Alamos National Security, LLC. All rights * reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -36,16 +36,16 @@ int MPI_Allgatherv(const void *sendbuf, int sendcount, MPI_Datatype sendtype, char commname[MPI_MAX_OBJECT_NAME]; int len; int rank; - - PMPI_Comm_rank(MPI_COMM_WORLD, &rank); + + PMPI_Comm_rank(MPI_COMM_WORLD, &rank); PMPI_Type_get_name(sendtype, sendtypename, &len); PMPI_Type_get_name(recvtype, recvtypename, &len); PMPI_Comm_get_name(comm, commname, &len); - + fprintf(stderr, "MPI_ALLGATHERV[%d]: sendbuf %0" PRIxPTR " sendcount %d sendtype %s\n\trecvbuf %0" PRIxPTR " recvtype %s comm %s\n", rank, (uintptr_t) sendbuf, sendcount, sendtypename, (uintptr_t) recvbuf, recvtypename, commname); fflush(stderr); - + return PMPI_Allgatherv(sendbuf, sendcount, sendtype, recvbuf, recvcounts, displs, recvtype, comm); } diff --git a/ompi/contrib/libompitrace/alloc_mem.c b/ompi/contrib/libompitrace/alloc_mem.c index d9fa579ec3c..59c0de39a32 100644 --- a/ompi/contrib/libompitrace/alloc_mem.c +++ b/ompi/contrib/libompitrace/alloc_mem.c @@ -5,15 +5,15 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2007 Cisco Systems, Inc. All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -27,12 +27,12 @@ int MPI_Alloc_mem(MPI_Aint size, MPI_Info info, void *baseptr) { int rank; - + PMPI_Comm_rank(MPI_COMM_WORLD, &rank); - + fprintf(stderr, "MPI_Alloc_mem[%d]: size %0ld\n", rank, (long)size); fflush(stderr); - + return PMPI_Alloc_mem(size, info, baseptr); } diff --git a/ompi/contrib/libompitrace/allreduce.c b/ompi/contrib/libompitrace/allreduce.c index bf3e33ad30f..5382f312b3e 100644 --- a/ompi/contrib/libompitrace/allreduce.c +++ b/ompi/contrib/libompitrace/allreduce.c @@ -6,7 +6,7 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. @@ -14,9 +14,9 @@ * Copyright (c) 2013 Los Alamos National Security, LLC. All rights * reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -29,19 +29,19 @@ #include "ompi/mpi/c/bindings.h" int MPI_Allreduce(const void *sendbuf, void *recvbuf, int count, - MPI_Datatype datatype, MPI_Op op, MPI_Comm comm) + MPI_Datatype datatype, MPI_Op op, MPI_Comm comm) { char typename[MPI_MAX_OBJECT_NAME], commname[MPI_MAX_OBJECT_NAME]; int len; int rank; - - PMPI_Comm_rank(MPI_COMM_WORLD, &rank); + + PMPI_Comm_rank(MPI_COMM_WORLD, &rank); PMPI_Type_get_name(datatype, typename, &len); PMPI_Comm_get_name(comm, commname, &len); - + fprintf(stderr, "MPI_ALLREDUCE[%d]: sendbuf %0" PRIxPTR " recvbuf %0" PRIxPTR " count %d datatype %s op %s comm %s\n", rank, (uintptr_t)sendbuf, (uintptr_t)recvbuf, count, typename, op->o_name, commname); fflush(stderr); - + return PMPI_Allreduce(sendbuf, recvbuf, count, datatype, op, comm); } diff --git a/ompi/contrib/libompitrace/barrier.c b/ompi/contrib/libompitrace/barrier.c index 53ae2d0fa3f..0a1c58ed179 100644 --- a/ompi/contrib/libompitrace/barrier.c +++ b/ompi/contrib/libompitrace/barrier.c @@ -5,15 +5,15 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2009 Cisco Systems, Inc. All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -23,17 +23,17 @@ #include "ompi/mpi/c/bindings.h" -int MPI_Barrier(MPI_Comm comm) +int MPI_Barrier(MPI_Comm comm) { char commname[MPI_MAX_OBJECT_NAME]; int len; int rank; - - PMPI_Comm_rank(MPI_COMM_WORLD, &rank); + + PMPI_Comm_rank(MPI_COMM_WORLD, &rank); PMPI_Comm_get_name(comm, commname, &len); - + fprintf(stderr, "MPI_BARRIER[%d]: comm %s\n", rank, commname); fflush(stderr); - + return PMPI_Barrier(comm); } diff --git a/ompi/contrib/libompitrace/bcast.c b/ompi/contrib/libompitrace/bcast.c index 92351aa40c7..ef6bcdf63a5 100644 --- a/ompi/contrib/libompitrace/bcast.c +++ b/ompi/contrib/libompitrace/bcast.c @@ -5,15 +5,15 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2009 Cisco Systems, Inc. All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -32,14 +32,14 @@ int MPI_Bcast(void *buffer, int count, MPI_Datatype datatype, char typename[MPI_MAX_OBJECT_NAME], commname[MPI_MAX_OBJECT_NAME]; int len; int rank; - - PMPI_Comm_rank(MPI_COMM_WORLD, &rank); + + PMPI_Comm_rank(MPI_COMM_WORLD, &rank); PMPI_Type_get_name(datatype, typename, &len); PMPI_Comm_get_name(comm, commname, &len); - + fprintf(stderr, "MPI_BCAST[%d]: buffer %0" PRIxPTR " count %d datatype %s root %d comm %s\n", rank, (uintptr_t) buffer, count, typename, root, commname); fflush(stderr); - + return PMPI_Bcast(buffer, count, datatype, root, comm); } diff --git a/ompi/contrib/libompitrace/configure.m4 b/ompi/contrib/libompitrace/configure.m4 index 25488e6d0f5..a55b6590785 100644 --- a/ompi/contrib/libompitrace/configure.m4 +++ b/ompi/contrib/libompitrace/configure.m4 @@ -6,19 +6,19 @@ # Copyright (c) 2004-2005 The University of Tennessee and The University # of Tennessee Research Foundation. All rights # reserved. -# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, # University of Stuttgart. All rights reserved. # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. # Copyright (c) 2007-2010 Cisco Systems, Inc. All rights reserved. # $COPYRIGHT$ -# +# # Additional copyrights may follow -# +# # $HEADER$ # -# OMPI_contrib_libompitrace_CONFIG([action-if-can-compile], +# OMPI_contrib_libompitrace_CONFIG([action-if-can-compile], # [action-if-cant-compile]) # ------------------------------------------------ AC_DEFUN([OMPI_contrib_libompitrace_CONFIG],[ diff --git a/ompi/contrib/libompitrace/finalize.c b/ompi/contrib/libompitrace/finalize.c index ee13ebaf0f6..30b4df110cd 100644 --- a/ompi/contrib/libompitrace/finalize.c +++ b/ompi/contrib/libompitrace/finalize.c @@ -5,15 +5,15 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2009 Cisco Systems, Inc. All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -26,11 +26,11 @@ int MPI_Finalize(void) { int rank; - + PMPI_Comm_rank(MPI_COMM_WORLD, &rank); fprintf(stderr, "MPI_FINALIZE[%d]\n", rank); fflush(stderr); - + return PMPI_Finalize(); } diff --git a/ompi/contrib/libompitrace/init.c b/ompi/contrib/libompitrace/init.c index 1dd6f334381..a92f384048c 100644 --- a/ompi/contrib/libompitrace/init.c +++ b/ompi/contrib/libompitrace/init.c @@ -5,16 +5,16 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2006 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2006 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2007-2009 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2007-2008 Sun Microsystems, Inc. All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -28,6 +28,6 @@ int MPI_Init(int *argc, char ***argv) { fprintf(stderr, "MPI_INIT: argc %d\n", (0 < *argc) ? *argc : 0); fflush(stderr); - + return PMPI_Init(argc, argv); } diff --git a/ompi/contrib/libompitrace/isend.c b/ompi/contrib/libompitrace/isend.c index bcb1cde88c1..0ef6e508294 100644 --- a/ompi/contrib/libompitrace/isend.c +++ b/ompi/contrib/libompitrace/isend.c @@ -6,7 +6,7 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. @@ -14,9 +14,9 @@ * Copyright (c) 2013 Los Alamos National Security, LLC. All rights * reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -34,14 +34,14 @@ int MPI_Isend(const void *buf, int count, MPI_Datatype type, int dest, char typename[MPI_MAX_OBJECT_NAME], commname[MPI_MAX_OBJECT_NAME]; int len; int rank; - - PMPI_Comm_rank(MPI_COMM_WORLD, &rank); + + PMPI_Comm_rank(MPI_COMM_WORLD, &rank); PMPI_Type_get_name(type, typename, &len); PMPI_Comm_get_name(comm, commname, &len); - + fprintf(stderr, "MPI_ISEND[%d]: buf %0" PRIxPTR " count %d datatype %s dest %d tag %d comm %s\n", rank, (uintptr_t) buf, count, typename, dest, tag, commname); fflush(stderr); - + return PMPI_Isend(buf, count, type, dest, tag, comm, request); } diff --git a/ompi/contrib/libompitrace/recv.c b/ompi/contrib/libompitrace/recv.c index 85e53cc4068..0fc5a9665c7 100644 --- a/ompi/contrib/libompitrace/recv.c +++ b/ompi/contrib/libompitrace/recv.c @@ -5,15 +5,15 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2009 Cisco Systems, Inc. All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -26,19 +26,19 @@ #include "ompi/mpi/c/bindings.h" int MPI_Recv(void *buf, int count, MPI_Datatype type, int source, - int tag, MPI_Comm comm, MPI_Status *status) + int tag, MPI_Comm comm, MPI_Status *status) { char typename[MPI_MAX_OBJECT_NAME], commname[MPI_MAX_OBJECT_NAME]; int len; int rank; - - PMPI_Comm_rank(MPI_COMM_WORLD, &rank); + + PMPI_Comm_rank(MPI_COMM_WORLD, &rank); PMPI_Type_get_name(type, typename, &len); PMPI_Comm_get_name(comm, commname, &len); - + fprintf(stderr, "MPI_RECV[%d]: buf %0" PRIxPTR " count %d datatype %s source %d tag %d comm %s\n", rank, (uintptr_t) buf, count, typename, source, tag, commname); fflush(stderr); - + return PMPI_Recv(buf, count, type, source, tag, comm, status); } diff --git a/ompi/contrib/libompitrace/reduce.c b/ompi/contrib/libompitrace/reduce.c index 55fe60f52df..24fb60f7b85 100644 --- a/ompi/contrib/libompitrace/reduce.c +++ b/ompi/contrib/libompitrace/reduce.c @@ -6,7 +6,7 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. @@ -14,9 +14,9 @@ * Copyright (c) 2013 Los Alamos National Security, LLC. All rights * reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -30,16 +30,16 @@ #include "ompi/mpi/c/bindings.h" int MPI_Reduce(const void *sendbuf, void *recvbuf, int count, - MPI_Datatype datatype, MPI_Op op, int root, MPI_Comm comm) + MPI_Datatype datatype, MPI_Op op, int root, MPI_Comm comm) { char typename[MPI_MAX_OBJECT_NAME], commname[MPI_MAX_OBJECT_NAME]; int len; int rank; - - PMPI_Comm_rank(MPI_COMM_WORLD, &rank); + + PMPI_Comm_rank(MPI_COMM_WORLD, &rank); PMPI_Type_get_name(datatype, typename, &len); PMPI_Comm_get_name(comm, commname, &len); - + fprintf(stderr,"MPI_REDUCE[%d]: sendbuf %0" PRIxPTR " recvbuf %0" PRIxPTR " count %d datatype %s op %s root %d comm %s\n", rank, (uintptr_t) sendbuf, (uintptr_t) recvbuf, count, typename, op->o_name, root, commname); fflush(stderr); diff --git a/ompi/contrib/libompitrace/request_free.c b/ompi/contrib/libompitrace/request_free.c index 83944103ade..cc740582e8e 100644 --- a/ompi/contrib/libompitrace/request_free.c +++ b/ompi/contrib/libompitrace/request_free.c @@ -5,15 +5,15 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2006-2009 Cisco Systems, Inc. All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -23,10 +23,10 @@ #include "ompi/mpi/c/bindings.h" -int MPI_Request_free(MPI_Request *request) +int MPI_Request_free(MPI_Request *request) { int rank; - + PMPI_Comm_rank(MPI_COMM_WORLD, &rank); fprintf(stderr, "MPI_REQUEST_FREE[%d]\n", rank); diff --git a/ompi/contrib/libompitrace/send.c b/ompi/contrib/libompitrace/send.c index 6e9b9e5d928..b9eddda57f4 100644 --- a/ompi/contrib/libompitrace/send.c +++ b/ompi/contrib/libompitrace/send.c @@ -6,7 +6,7 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. @@ -14,9 +14,9 @@ * Copyright (c) 2013 Los Alamos National Security, LLC. All rights * reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -29,16 +29,16 @@ #include "ompi/mpi/c/bindings.h" int MPI_Send(const void *buf, int count, MPI_Datatype type, int dest, - int tag, MPI_Comm comm) + int tag, MPI_Comm comm) { char typename[MPI_MAX_OBJECT_NAME], commname[MPI_MAX_OBJECT_NAME]; int len; int rank; - - PMPI_Comm_rank(MPI_COMM_WORLD, &rank); + + PMPI_Comm_rank(MPI_COMM_WORLD, &rank); PMPI_Type_get_name(type, typename, &len); PMPI_Comm_get_name(comm, commname, &len); - + fprintf(stderr, "MPI_SEND[%d]: : buf %0" PRIxPTR " count %d datatype %s dest %d tag %d comm %s\n", rank, (uintptr_t) buf, count, typename, dest, tag, commname); fflush(stderr); diff --git a/ompi/contrib/libompitrace/sendrecv.c b/ompi/contrib/libompitrace/sendrecv.c index 0b92aa5964c..5fd84cbc11b 100644 --- a/ompi/contrib/libompitrace/sendrecv.c +++ b/ompi/contrib/libompitrace/sendrecv.c @@ -6,7 +6,7 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. @@ -14,9 +14,9 @@ * Copyright (c) 2013 Los Alamos National Security, LLC. All rights * reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -39,13 +39,13 @@ int MPI_Sendrecv(const void *sendbuf, int sendcount, MPI_Datatype sendtype, int len; int rank; int size; - - PMPI_Comm_rank(MPI_COMM_WORLD, &rank); + + PMPI_Comm_rank(MPI_COMM_WORLD, &rank); PMPI_Type_get_name(sendtype, sendtypename, &len); PMPI_Type_get_name(sendtype, recvtypename, &len); PMPI_Comm_get_name(comm, commname, &len); PMPI_Type_size(recvtype, &size); - + fprintf(stderr, "MPI_SENDRECV[%d]: sendbuf %0" PRIxPTR " sendcount %d sendtype %s dest %d sendtag %d\n\t" "recvbuf %0" PRIxPTR " recvcount %d recvtype %s source %d recvtag %d comm %s\n", rank, (uintptr_t) sendbuf, sendcount, sendtypename, dest, sendtag, diff --git a/ompi/datatype/Makefile.am b/ompi/datatype/Makefile.am index 2897c005cae..643e8147fdd 100644 --- a/ompi/datatype/Makefile.am +++ b/ompi/datatype/Makefile.am @@ -6,17 +6,19 @@ # Copyright (c) 2004-2011 The University of Tennessee and The University # of Tennessee Research Foundation. All rights # reserved. -# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, # University of Stuttgart. All rights reserved. # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. # Copyright (c) 2007-2013 Los Alamos National Security, LLC. All rights # reserved. # Copyright (c) 2010 Cisco Systems, Inc. All rights reserved. +# Copyright (c) 2016 Research Organization for Information Science +# and Technology (RIST). All rights reserved. # $COPYRIGHT$ -# +# # Additional copyrights may follow -# +# # $HEADER$ # @@ -37,6 +39,7 @@ libdatatype_la_SOURCES = \ ompi_datatype_create_vector.c \ ompi_datatype_create_darray.c \ ompi_datatype_create_subarray.c \ + ompi_datatype_external.c \ ompi_datatype_external32.c \ ompi_datatype_match_size.c \ ompi_datatype_module.c \ diff --git a/ompi/datatype/ompi_datatype.h b/ompi/datatype/ompi_datatype.h index 55aeaa9e6d2..ff6a1b0b2f1 100644 --- a/ompi/datatype/ompi_datatype.h +++ b/ompi/datatype/ompi_datatype.h @@ -7,6 +7,8 @@ * Copyright (c) 2010 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2013 Los Alamos National Security, LLC. All rights * reserved. + * Copyright (c) 2015-2016 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -28,15 +30,9 @@ #include "ompi_config.h" #include -#ifdef HAVE_STDINT_H #include -#endif -#ifdef HAVE_STRING_H #include -#endif -#ifdef HAVE_LIMITS_H #include -#endif #include "ompi/constants.h" #include "opal/datatype/opal_convertor.h" @@ -77,6 +73,7 @@ struct ompi_datatype_t { void* args; /**< Data description for the user */ void* packed_description; /**< Packed description of the datatype */ + uint64_t pml_data; /**< PML-specific information */ /* --- cacheline 6 boundary (384 bytes) --- */ char name[MPI_MAX_OBJECT_NAME];/**< Externally visible name */ /* --- cacheline 7 boundary (448 bytes) --- */ @@ -289,7 +286,7 @@ OMPI_DECLSPEC const ompi_datatype_t* ompi_datatype_match_size( int size, uint16_ /* * */ -OMPI_DECLSPEC int32_t ompi_datatype_sndrcv( void *sbuf, int32_t scount, const ompi_datatype_t* sdtype, +OMPI_DECLSPEC int32_t ompi_datatype_sndrcv( const void *sbuf, int32_t scount, const ompi_datatype_t* sdtype, void *rbuf, int32_t rcount, const ompi_datatype_t* rdtype); /* @@ -366,5 +363,16 @@ OMPI_DECLSPEC int ompi_datatype_safeguard_pointer_debug_breakpoint( const void* int count ); #endif /* OPAL_ENABLE_DEBUG */ +OMPI_DECLSPEC int ompi_datatype_pack_external( const char datarep[], const void *inbuf, int incount, + ompi_datatype_t *datatype, void *outbuf, + MPI_Aint outsize, MPI_Aint *position); + +OMPI_DECLSPEC int ompi_datatype_unpack_external( const char datarep[], const void *inbuf, MPI_Aint insize, + MPI_Aint *position, void *outbuf, int outcount, + ompi_datatype_t *datatype); + +OMPI_DECLSPEC int ompi_datatype_pack_external_size( const char datarep[], int incount, + ompi_datatype_t *datatype, MPI_Aint *size); + END_C_DECLS #endif /* OMPI_DATATYPE_H_HAS_BEEN_INCLUDED */ diff --git a/ompi/datatype/ompi_datatype_args.c b/ompi/datatype/ompi_datatype_args.c index 946cb1d3ebd..04fc15097eb 100644 --- a/ompi/datatype/ompi_datatype_args.c +++ b/ompi/datatype/ompi_datatype_args.c @@ -3,7 +3,7 @@ * Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. - * Copyright (c) 2004-2013 The University of Tennessee and The University + * Copyright (c) 2004-2016 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. * Copyright (c) 2004-2006 High Performance Computing Center Stuttgart, @@ -11,9 +11,9 @@ * Copyright (c) 2004-2006 The Regents of the University of California. * All rights reserved. * Copyright (c) 2009 Oak Ridge National Labs. All rights reserved. - * Copyright (c) 2013 Los Alamos National Security, LLC. All rights + * Copyright (c) 2013-2016 Los Alamos National Security, LLC. All rights * reserved. - * Copyright (c) 2015 Research Organization for Information Science + * Copyright (c) 2015-2016 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * @@ -44,12 +44,12 @@ __ompi_datatype_create_from_args( int32_t* i, OPAL_PTRDIFF_TYPE * a, ompi_datatype_t** d, int32_t type ); typedef struct __dt_args { - int ref_count; - int create_type; + int32_t ref_count; + int32_t create_type; size_t total_pack_size; - int ci; - int ca; - int cd; + int32_t ci; + int32_t ca; + int32_t cd; int* i; OPAL_PTRDIFF_TYPE* a; ompi_datatype_t** d; @@ -71,11 +71,11 @@ typedef struct __dt_args { #endif /* OPAL_ALIGN_WORD_SIZE_INTEGERS */ /** - * Some architecture require that 64 bits pointers (to pointers) has to - * be 64 bits aligned. As in the ompi_datatype_args_t structure we have 2 such - * pointers and one to an array of ints, if we start by setting the 64 - * bits aligned one we will not have any trouble. Problem arise on - * SPARC 64. + * Some architectures require 64 bits pointers (to pointers) to + * be 64 bits aligned. As in the ompi_datatype_args_t structure we have + * 2 such array of pointers and one to an array of ints, if we start by + * setting the 64 bits aligned one we will not have any trouble. Problem + * originally reported on SPARC 64. */ #define ALLOC_ARGS(PDATA, IC, AC, DC) \ do { \ @@ -100,7 +100,7 @@ typedef struct __dt_args { if( pArgs->ci == 0 ) pArgs->i = NULL; \ else pArgs->i = (int*)buf; \ pArgs->ref_count = 1; \ - pArgs->total_pack_size = (4 + (IC)) * sizeof(int) + \ + pArgs->total_pack_size = (4 + (IC) + (DC)) * sizeof(int) + \ (AC) * sizeof(OPAL_PTRDIFF_TYPE); \ (PDATA)->args = (void*)pArgs; \ (PDATA)->packed_description = NULL; \ @@ -236,9 +236,8 @@ int32_t ompi_datatype_set_args( ompi_datatype_t* pData, */ OBJ_RETAIN( d[pos] ); pArgs->total_pack_size += ((ompi_datatype_args_t*)d[pos]->args)->total_pack_size; - } else { - pArgs->total_pack_size += 2 * sizeof(int); /* _NAMED + predefined id */ } + pArgs->total_pack_size += sizeof(int); /* each data has an ID */ } return OMPI_SUCCESS; @@ -481,39 +480,69 @@ int ompi_datatype_get_pack_description( ompi_datatype_t* datatype, { ompi_datatype_args_t* args = (ompi_datatype_args_t*)datatype->args; int next_index = OMPI_DATATYPE_MAX_PREDEFINED; + void *packed_description = datatype->packed_description; void* recursive_buffer; - if( NULL == datatype->packed_description ) { - if( ompi_datatype_is_predefined(datatype) ) { - datatype->packed_description = malloc(2 * sizeof(int)); - } else if( NULL == args ) { - return OMPI_ERROR; + if (NULL == packed_description) { + if (opal_atomic_cmpset (&datatype->packed_description, NULL, (void *) 1)) { + if( ompi_datatype_is_predefined(datatype) ) { + packed_description = malloc(2 * sizeof(int)); + } else if( NULL == args ) { + return OMPI_ERROR; + } else { + packed_description = malloc(args->total_pack_size); + } + recursive_buffer = packed_description; + __ompi_datatype_pack_description( datatype, &recursive_buffer, &next_index ); + + if (!ompi_datatype_is_predefined(datatype)) { + /* If the precomputed size is not large enough we're already in troubles, we + * have overwritten outside of the allocated buffer. Raise the alarm ! + * If not reassess the size of the packed buffer necessary for holding the + * datatype description. + */ + assert(args->total_pack_size >= (uintptr_t)((char*)recursive_buffer - (char *) packed_description)); + args->total_pack_size = (uintptr_t)((char*)recursive_buffer - (char *) packed_description); + } + + opal_atomic_wmb (); + datatype->packed_description = packed_description; } else { - datatype->packed_description = malloc(args->total_pack_size); + /* another thread beat us to it */ + packed_description = datatype->packed_description; } - recursive_buffer = datatype->packed_description; - __ompi_datatype_pack_description( datatype, &recursive_buffer, &next_index ); - if( !ompi_datatype_is_predefined(datatype) ) { - args->total_pack_size = (uintptr_t)((char*)recursive_buffer - (char*)datatype->packed_description); + } + + if ((void *) 1 == packed_description) { + struct timespec interval = {.tv_sec = 0, .tv_nsec = 1000}; + + /* wait until the packed description is updated */ + while ((void *) 1 == datatype->packed_description) { + nanosleep (&interval, NULL); } + + packed_description = datatype->packed_description; } - *packed_buffer = (const void*)datatype->packed_description; + *packed_buffer = (const void *) packed_description; return OMPI_SUCCESS; } size_t ompi_datatype_pack_description_length( ompi_datatype_t* datatype ) { + void *packed_description = datatype->packed_description; + if( ompi_datatype_is_predefined(datatype) ) { return 2 * sizeof(int); } - if( NULL == datatype->packed_description ) { + if( NULL == packed_description || (void *) 1 == packed_description) { const void* buf; int rc; rc = ompi_datatype_get_pack_description(datatype, &buf); - if( OMPI_SUCCESS != rc ) + if( OMPI_SUCCESS != rc ) { return 0; + } } assert( NULL != (ompi_datatype_args_t*)datatype->args ); assert( NULL != (ompi_datatype_args_t*)datatype->packed_description ); @@ -733,7 +762,7 @@ static ompi_datatype_t* __ompi_datatype_create_from_args( int32_t* i, MPI_Aint* { const int* a_i[8] = {&i[0], &i[1], &i[2], &i[3 + 0 * i[0]], &i[3 + 1 * i[0]], &i[3 + 2 * i[0]], &i[3 + 3 * i[0]], &i[3 + 4 * i[0]]}; - ompi_datatype_set_args( datatype, 4 * i[0] + 4,a_i, 0, NULL, 1, d, MPI_COMBINER_DARRAY); + ompi_datatype_set_args( datatype, 4 * i[2] + 4, a_i, 0, NULL, 1, d, MPI_COMBINER_DARRAY); } break; /******************************************************************/ @@ -749,14 +778,15 @@ static ompi_datatype_t* __ompi_datatype_create_from_args( int32_t* i, MPI_Aint* break; /******************************************************************/ case MPI_COMBINER_RESIZED: - /*ompi_datatype_set_args( datatype, 0, NULL, 2, a, 1, d, MPI_COMBINER_RESIZED );*/ + ompi_datatype_create_resized(d[0], a[0], a[1], &datatype); + ompi_datatype_set_args( datatype, 0, NULL, 2, a, 1, d, MPI_COMBINER_RESIZED ); break; /******************************************************************/ case MPI_COMBINER_HINDEXED_BLOCK: ompi_datatype_create_hindexed_block( i[0], i[1], a, d[0], &datatype ); { const int* a_i[2] = {&i[0], &i[1]}; - ompi_datatype_set_args( datatype, 2 + i[0], a_i, i[0], a, 1, d, MPI_COMBINER_HINDEXED_BLOCK ); + ompi_datatype_set_args( datatype, 2, a_i, i[0], a, 1, d, MPI_COMBINER_HINDEXED_BLOCK ); } break; /******************************************************************/ @@ -791,7 +821,7 @@ ompi_datatype_t* ompi_datatype_get_single_predefined_type_from_args( ompi_dataty ompi_datatype_t *predef = NULL, *current_type, *current_predef; ompi_datatype_args_t* args = (ompi_datatype_args_t*)type->args; int i; - + if( ompi_datatype_is_predefined(type) ) return type; diff --git a/ompi/datatype/ompi_datatype_create.c b/ompi/datatype/ompi_datatype_create.c index b73eb781cab..8c942ba4baf 100644 --- a/ompi/datatype/ompi_datatype_create.c +++ b/ompi/datatype/ompi_datatype_create.c @@ -20,9 +20,7 @@ #include "ompi_config.h" #include -#ifdef HAVE_STRING_H #include -#endif #include "opal/class/opal_pointer_array.h" #include "ompi/datatype/ompi_datatype.h" @@ -37,6 +35,7 @@ static void __ompi_datatype_allocate( ompi_datatype_t* datatype ) datatype->d_keyhash = NULL; datatype->name[0] = '\0'; datatype->packed_description = NULL; + datatype->pml_data = 0; } static void __ompi_datatype_release(ompi_datatype_t * datatype) diff --git a/ompi/datatype/ompi_datatype_create_darray.c b/ompi/datatype/ompi_datatype_create_darray.c index 846566a8eb1..98c81f0dc29 100644 --- a/ompi/datatype/ompi_datatype_create_darray.c +++ b/ompi/datatype/ompi_datatype_create_darray.c @@ -1,9 +1,9 @@ -/* -*- Mode: C; c-basic-offset:4 ; -*- */ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ /* * Copyright (c) 2004-2006 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. - * Copyright (c) 2004-2014 The University of Tennessee and The University + * Copyright (c) 2004-2015 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. * Copyright (c) 2004-2006 High Performance Computing Center Stuttgart, @@ -13,6 +13,8 @@ * Copyright (c) 2009 Sun Microsystems, Inc. All rights reserved. * Copyright (c) 2009 Oak Ridge National Labs. All rights reserved. * Copyright (c) 2010 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2016 Los Alamos National Security, LLC. All rights + * reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -169,6 +171,7 @@ int32_t ompi_datatype_create_darray(int size, ptrdiff_t orig_extent, *st_offsets = NULL; int i, start_loop, end_loop, step; int *coords = NULL, rc = OMPI_SUCCESS; + ptrdiff_t displs[2], tmp_size = 1; /* speedy corner case */ if (ndims < 1) { @@ -187,10 +190,13 @@ int32_t ompi_datatype_create_darray(int size, int tmp_rank = rank, procs = size; coords = (int *) malloc(ndims * sizeof(int)); + displs[1] = orig_extent; for (i = 0 ; i < ndims ; i++) { procs = procs / psize_array[i]; coords[i] = tmp_rank / procs; tmp_rank = tmp_rank % procs; + /* compute the upper bound of the datatype, including all dimensions */ + displs[1] *= gsize_array[i]; } } @@ -246,41 +252,34 @@ int32_t ompi_datatype_create_darray(int size, lastType = *newtype; } - - /* set displacement and UB correctly. Please read the comment in subarray */ - { - ptrdiff_t displs[3], tmp_size; - ompi_datatype_t *types[3]; - int blength[3] = { 1, 1, 1}; - - displs[1] = st_offsets[start_loop]; - tmp_size = 1; - for (i = start_loop + step ; i != end_loop ; i += step) { - tmp_size *= gsize_array[i - step]; - displs[1] += tmp_size * st_offsets[i]; - } - - displs[0] = 0; - displs[1] *= orig_extent; - displs[2] = orig_extent; - for (i = 0 ; i < ndims ; i++) { - displs[2] *= gsize_array[i]; - } - if(oldtype->super.flags & (OPAL_DATATYPE_FLAG_USER_LB | OPAL_DATATYPE_FLAG_USER_UB) ) { - types[0] = MPI_LB; types[1] = lastType; types[2] = MPI_UB; - - rc = ompi_datatype_create_struct(3, blength, displs, types, newtype); - } else { - ompi_datatype_create_resized(lastType, displs[1], displs[2], newtype); - } - ompi_datatype_destroy(&lastType); - /* need to destroy the old type even in error condition, so - don't check return code from above until after cleanup. */ - if (MPI_SUCCESS != rc) goto cleanup; + /** + * We need to shift the content (useful data) of the datatype, so + * we need to force the displacement to be moved. Therefore, we + * cannot use resize as it will only set the soft lb and ub + * markers without moving the data. Instead, we have to create a + * new data, and insert the last_Type with the correct + * displacement. + */ + displs[0] = st_offsets[start_loop]; + for (i = start_loop + step; i != end_loop; i += step) { + tmp_size *= gsize_array[i - step]; + displs[0] += tmp_size * st_offsets[i]; + } + displs[0] *= orig_extent; + + *newtype = ompi_datatype_create(lastType->super.desc.used); + rc = ompi_datatype_add(*newtype, lastType, 1, displs[0], displs[1]); + ompi_datatype_destroy(&lastType); + /* need to destroy the old type even in error condition, so + don't check return code from above until after cleanup. */ + if (MPI_SUCCESS != rc) { + ompi_datatype_destroy (newtype); + } else { + (void) opal_datatype_resize( &(*newtype)->super, 0, displs[1]); } cleanup: - if (NULL != st_offsets) free(st_offsets); - if (NULL != coords) free(coords); - return OMPI_SUCCESS; + free(st_offsets); + free(coords); + return rc; } diff --git a/ompi/datatype/ompi_datatype_create_indexed.c b/ompi/datatype/ompi_datatype_create_indexed.c index 9a75bca7a0b..9311eac7972 100644 --- a/ompi/datatype/ompi_datatype_create_indexed.c +++ b/ompi/datatype/ompi_datatype_create_indexed.c @@ -13,6 +13,8 @@ * Copyright (c) 2009 Sun Microsystems, Inc. All rights reserved. * Copyright (c) 2009 Oak Ridge National Labs. All rights reserved. * Copyright (c) 2010 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -36,9 +38,7 @@ int32_t ompi_datatype_create_indexed( int count, const int* pBlockLength, const OPAL_PTRDIFF_TYPE extent; if( 0 == count ) { - *newType = ompi_datatype_create( 0 ); - ompi_datatype_add( *newType, &ompi_mpi_datatype_null.dt, 0, 0, 0); - return OMPI_SUCCESS; + return ompi_datatype_duplicate( &ompi_mpi_datatype_null.dt, newType); } disp = pDisp[0]; @@ -113,12 +113,13 @@ int32_t ompi_datatype_create_indexed_block( int count, int bLength, const int* p ompi_datatype_type_extent( oldType, &extent ); if( (count == 0) || (bLength == 0) ) { - *newType = ompi_datatype_create(1); - if( 0 == count ) - ompi_datatype_add( *newType, &ompi_mpi_datatype_null.dt, 0, 0, 0 ); - else + if( 0 == count ) { + return ompi_datatype_duplicate(&ompi_mpi_datatype_null.dt, newType); + } else { + *newType = ompi_datatype_create(1); ompi_datatype_add( *newType, oldType, 0, pDisp[0] * extent, extent ); - return OMPI_SUCCESS; + return OMPI_SUCCESS; + } } pdt = ompi_datatype_create( count * (2 + oldType->super.desc.used) ); disp = pDisp[0]; diff --git a/ompi/datatype/ompi_datatype_create_subarray.c b/ompi/datatype/ompi_datatype_create_subarray.c index 598b0ff3600..fcf44407725 100644 --- a/ompi/datatype/ompi_datatype_create_subarray.c +++ b/ompi/datatype/ompi_datatype_create_subarray.c @@ -3,7 +3,7 @@ * Copyright (c) 2004-2006 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. - * Copyright (c) 2004-2014 The University of Tennessee and The University + * Copyright (c) 2004-2016 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. * Copyright (c) 2004-2006 High Performance Computing Center Stuttgart, @@ -13,7 +13,7 @@ * Copyright (c) 2009 Sun Microsystems, Inc. All rights reserved. * Copyright (c) 2009 Oak Ridge National Labs. All rights reserved. * Copyright (c) 2010 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2014 Research Organization for Information Science + * Copyright (c) 2014-2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * @@ -36,7 +36,7 @@ int32_t ompi_datatype_create_subarray(int ndims, const ompi_datatype_t* oldtype, ompi_datatype_t** newtype) { - MPI_Datatype last_type; + ompi_datatype_t *last_type; int32_t i, step, end_loop; MPI_Aint size, displ, extent; @@ -51,7 +51,7 @@ int32_t ompi_datatype_create_subarray(int ndims, /* If the ndims is zero then return the NULL datatype */ if( ndims < 2 ) { if( 0 == ndims ) { - *newtype = &ompi_mpi_datatype_null.dt; + ompi_datatype_duplicate(&ompi_mpi_datatype_null.dt, newtype); return MPI_SUCCESS; } ompi_datatype_create_contiguous( subsize_array[0], oldtype, &last_type ); @@ -91,30 +91,18 @@ int32_t ompi_datatype_create_subarray(int ndims, } replace_subarray_type: - /* - * Resized will only set the soft lb and ub markers without moving the real - * data inside. Thus, in case the original data contains the hard markers - * (MPI_LB or MPI_UB) we must force the displacement of the data upward to - * the right position AND set the hard markers LB and UB. - * - * NTH: ompi_datatype_create_resized() does not do enough for the general - * pack/unpack functions to work correctly. Until this is fixed always use - * ompi_datatype_create_struct(). Once this is fixed remove 1 || below. To - * verify that the regression is fixed run the subarray test in the Open MPI - * ibm testsuite. - */ - if(1 || oldtype->super.flags & (OPAL_DATATYPE_FLAG_USER_LB | OPAL_DATATYPE_FLAG_USER_UB) ) { - MPI_Aint displs[3]; - MPI_Datatype types[3]; - int blength[3] = { 1, 1, 1 }; - - displs[0] = 0; displs[1] = displ * extent; displs[2] = size * extent; - types[0] = MPI_LB; types[1] = last_type; types[2] = MPI_UB; - ompi_datatype_create_struct( 3, blength, displs, types, newtype ); - } else { - ompi_datatype_create_resized(last_type, displ * extent, size * extent, newtype); - } + /** + * We need to shift the content (useful data) of the datatype, so + * we need to force the displacement to be moved. Therefore, we + * cannot use resize as it will only set the soft lb and ub + * markers without moving the data. Instead, we have to create a + * new data, and insert the last_Type with the correct + * displacement. + */ + *newtype = ompi_datatype_create( last_type->super.desc.used ); + ompi_datatype_add( *newtype, last_type, 1, displ * extent, size * extent); ompi_datatype_destroy( &last_type ); + opal_datatype_resize( &(*newtype)->super, 0, size * extent ); return OMPI_SUCCESS; } diff --git a/ompi/datatype/ompi_datatype_external.c b/ompi/datatype/ompi_datatype_external.c new file mode 100644 index 00000000000..d47531ef29e --- /dev/null +++ b/ompi/datatype/ompi_datatype_external.c @@ -0,0 +1,135 @@ +/* -*- Mode: C; c-basic-offset:4 ; -*- */ +/* + * Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2016 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2006 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2013 Los Alamos National Security, LLC. All rights + * reserved. + * Copyright (c) 2015-2016 Research Organization for Information Science + * and Technology (RIST). All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include "ompi_config.h" +#include + +#include "ompi/runtime/params.h" +#include "ompi/communicator/communicator.h" +#include "ompi/datatype/ompi_datatype.h" +#include "opal/datatype/opal_convertor.h" + +int ompi_datatype_pack_external(const char datarep[], const void *inbuf, int incount, + ompi_datatype_t *datatype, void *outbuf, + MPI_Aint outsize, MPI_Aint *position) +{ + int rc = MPI_SUCCESS; + opal_convertor_t local_convertor; + struct iovec invec; + unsigned int iov_count; + size_t size; + + OBJ_CONSTRUCT(&local_convertor, opal_convertor_t); + + /* The resulting convertor will be set to the position zero. We have to use + * CONVERTOR_SEND_CONVERSION in order to force the convertor to do anything + * more than just packing the data. + */ + opal_convertor_copy_and_prepare_for_send( ompi_mpi_external32_convertor, + &(datatype->super), incount, (void *) inbuf, + CONVERTOR_SEND_CONVERSION, + &local_convertor ); + + /* Check for truncation */ + opal_convertor_get_packed_size( &local_convertor, &size ); + if( (*position + size) > (size_t)outsize ) { /* we can cast as we already checked for < 0 */ + OBJ_DESTRUCT( &local_convertor ); + return MPI_ERR_TRUNCATE; + } + + /* Prepare the iovec with all informations */ + invec.iov_base = (char*) outbuf + (*position); + invec.iov_len = size; + + /* Do the actual packing */ + iov_count = 1; + rc = opal_convertor_pack( &local_convertor, &invec, &iov_count, &size ); + *position += size; + OBJ_DESTRUCT( &local_convertor ); + + /* All done. Note that the convertor returns 1 upon success, not + OMPI_SUCCESS. */ + return (rc == 1) ? OMPI_SUCCESS : MPI_ERR_UNKNOWN; +} + +int ompi_datatype_unpack_external (const char datarep[], const void *inbuf, MPI_Aint insize, + MPI_Aint *position, void *outbuf, int outcount, + ompi_datatype_t *datatype) +{ + int rc = MPI_SUCCESS; + opal_convertor_t local_convertor; + struct iovec outvec; + unsigned int iov_count; + size_t size; + + OBJ_CONSTRUCT(&local_convertor, opal_convertor_t); + + /* the resulting convertor will be set to the position ZERO */ + opal_convertor_copy_and_prepare_for_recv( ompi_mpi_external32_convertor, + &(datatype->super), outcount, outbuf, + 0, + &local_convertor ); + + /* Check for truncation */ + opal_convertor_get_packed_size( &local_convertor, &size ); + if( (*position + size) > (unsigned int)insize ) { + OBJ_DESTRUCT( &local_convertor ); + return MPI_ERR_TRUNCATE; + } + + /* Prepare the iovec with all informations */ + outvec.iov_base = (char*) inbuf + (*position); + outvec.iov_len = size; + + /* Do the actual unpacking */ + iov_count = 1; + rc = opal_convertor_unpack( &local_convertor, &outvec, &iov_count, &size ); + *position += size; + OBJ_DESTRUCT( &local_convertor ); + + /* All done. Note that the convertor returns 1 upon success, not + OMPI_SUCCESS. */ + return (rc == 1) ? OMPI_SUCCESS : MPI_ERR_UNKNOWN; +} + +int ompi_datatype_pack_external_size(const char datarep[], int incount, + ompi_datatype_t *datatype, MPI_Aint *size) +{ + opal_convertor_t local_convertor; + size_t length; + + OBJ_CONSTRUCT(&local_convertor, opal_convertor_t); + + /* the resulting convertor will be set to the position ZERO */ + opal_convertor_copy_and_prepare_for_recv( ompi_mpi_external32_convertor, + &(datatype->super), incount, NULL, + CONVERTOR_SEND_CONVERSION, + &local_convertor ); + + opal_convertor_get_unpacked_size( &local_convertor, &length ); + *size = (MPI_Aint)length; + OBJ_DESTRUCT( &local_convertor ); + + return OMPI_SUCCESS; +} diff --git a/ompi/datatype/ompi_datatype_external32.c b/ompi/datatype/ompi_datatype_external32.c index 501f8d5fe89..108e14258b7 100644 --- a/ompi/datatype/ompi_datatype_external32.c +++ b/ompi/datatype/ompi_datatype_external32.c @@ -63,7 +63,7 @@ * All floating point values are in big-endian IEEE format. Double extended use 16 bytes, with * 15 exponent bits (bias = 10383), 112 mantissa bits and the same encoding as double. All * integers are in two's complement big-endian format. - * + * * All data are byte aligned, regardless of type. That's exactly what we expect as we can * consider the data stored in external32 as being packed. */ @@ -77,7 +77,7 @@ opal_convertor_t* ompi_mpi_external32_convertor = NULL; opal_convertor_t* ompi_mpi_local_convertor = NULL; int32_t ompi_datatype_default_convertors_init( void ) -{ +{ /* create the extern32 convertor */ ompi_mpi_external32_convertor = opal_convertor_create( ompi_datatype_external32_arch_id, 0 ); diff --git a/ompi/datatype/ompi_datatype_get_elements.c b/ompi/datatype/ompi_datatype_get_elements.c index a350f95fb71..0c1f8a7b842 100644 --- a/ompi/datatype/ompi_datatype_get_elements.c +++ b/ompi/datatype/ompi_datatype_get_elements.c @@ -6,16 +6,16 @@ * Copyright (c) 2004-2013 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2013 Los Alamos National Security, LLC. All rights * reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ diff --git a/ompi/datatype/ompi_datatype_internal.h b/ompi/datatype/ompi_datatype_internal.h index 71609345138..76485370dfa 100644 --- a/ompi/datatype/ompi_datatype_internal.h +++ b/ompi/datatype/ompi_datatype_internal.h @@ -7,8 +7,9 @@ * Copyright (c) 2010-2012 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2013 Los Alamos National Security, LLC. All rights * reserved. - * Copyright (c) 2015 Research Organization for Information Science + * Copyright (c) 2015-2016 Research Organization for Information Science * and Technology (RIST). All rights reserved. + * Copyright (c) 2016 FUJITSU LIMITED. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -154,16 +155,16 @@ #if SIZEOF_INT == 1 #define OMPI_DATATYPE_MPI_INT OMPI_DATATYPE_MPI_INT8_T -#define OMPI_DATATYPE_MPI_UNSIGNED_INT OMPI_DATATYPE_MPI_UINT8_T +#define OMPI_DATATYPE_MPI_UNSIGNED OMPI_DATATYPE_MPI_UINT8_T #elif SIZEOF_INT == 2 #define OMPI_DATATYPE_MPI_INT OMPI_DATATYPE_MPI_INT16_T -#define OMPI_DATATYPE_MPI_UNSIGNED_INT OMPI_DATATYPE_MPI_UINT16_T +#define OMPI_DATATYPE_MPI_UNSIGNED OMPI_DATATYPE_MPI_UINT16_T #elif SIZEOF_INT == 4 #define OMPI_DATATYPE_MPI_INT OMPI_DATATYPE_MPI_INT32_T -#define OMPI_DATATYPE_MPI_UNSIGNED_INT OMPI_DATATYPE_MPI_UINT32_T +#define OMPI_DATATYPE_MPI_UNSIGNED OMPI_DATATYPE_MPI_UINT32_T #elif SIZEOF_INT == 8 #define OMPI_DATATYPE_MPI_INT OMPI_DATATYPE_MPI_INT64_T -#define OMPI_DATATYPE_MPI_UNSIGNED_INT OMPI_DATATYPE_MPI_UINT64_T +#define OMPI_DATATYPE_MPI_UNSIGNED OMPI_DATATYPE_MPI_UINT64_T #endif #if SIZEOF_LONG == 1 @@ -181,16 +182,16 @@ #endif #if SIZEOF_LONG_LONG == 1 -#define OMPI_DATATYPE_MPI_LONG_LONG OMPI_DATATYPE_MPI_INT8_T +#define OMPI_DATATYPE_MPI_LONG_LONG_INT OMPI_DATATYPE_MPI_INT8_T #define OMPI_DATATYPE_MPI_UNSIGNED_LONG_LONG OMPI_DATATYPE_MPI_UINT8_T #elif SIZEOF_LONG_LONG == 2 -#define OMPI_DATATYPE_MPI_LONG_LONG OMPI_DATATYPE_MPI_INT16_T +#define OMPI_DATATYPE_MPI_LONG_LONG_INT OMPI_DATATYPE_MPI_INT16_T #define OMPI_DATATYPE_MPI_UNSIGNED_LONG_LONG OMPI_DATATYPE_MPI_UINT16_T #elif SIZEOF_LONG_LONG == 4 -#define OMPI_DATATYPE_MPI_LONG_LONG OMPI_DATATYPE_MPI_INT32_T +#define OMPI_DATATYPE_MPI_LONG_LONG_INT OMPI_DATATYPE_MPI_INT32_T #define OMPI_DATATYPE_MPI_UNSIGNED_LONG_LONG OMPI_DATATYPE_MPI_UINT32_T #elif SIZEOF_LONG_LONG == 8 -#define OMPI_DATATYPE_MPI_LONG_LONG OMPI_DATATYPE_MPI_INT64_T +#define OMPI_DATATYPE_MPI_LONG_LONG_INT OMPI_DATATYPE_MPI_INT64_T #define OMPI_DATATYPE_MPI_UNSIGNED_LONG_LONG OMPI_DATATYPE_MPI_UINT64_T #endif @@ -377,6 +378,13 @@ # define OMPI_DATATYPE_MPI_REAL16 OMPI_DATATYPE_MPI_UNAVAILABLE #endif +/* + * C++ datatypes, these map to C datatypes. + */ +#define OMPI_DATATYPE_MPI_CXX_BOOL OMPI_DATATYPE_MPI_C_BOOL +#define OMPI_DATATYPE_MPI_CXX_FLOAT_COMPLEX OMPI_DATATYPE_MPI_C_FLOAT_COMPLEX +#define OMPI_DATATYPE_MPI_CXX_DOUBLE_COMPLEX OMPI_DATATYPE_MPI_C_DOUBLE_COMPLEX +#define OMPI_DATATYPE_MPI_CXX_LONG_DOUBLE_COMPLEX OMPI_DATATYPE_MPI_C_LONG_DOUBLE_COMPLEX extern const ompi_datatype_t* ompi_datatype_basicDatatypes[OMPI_DATATYPE_MPI_MAX_PREDEFINED]; @@ -525,13 +533,13 @@ extern const ompi_datatype_t* ompi_datatype_basicDatatypes[OMPI_DATATYPE_MPI_MAX #if SIZEOF_INT == 2 #define OMPI_DATATYPE_INITIALIZER_INT OPAL_DATATYPE_INITIALIZER_INT2 -#define OMPI_DATATYPE_INITIALIZER_UNSIGNED_INT OPAL_DATATYPE_INITIALIZER_UINT2 +#define OMPI_DATATYPE_INITIALIZER_UNSIGNED OPAL_DATATYPE_INITIALIZER_UINT2 #elif SIZEOF_INT == 4 #define OMPI_DATATYPE_INITIALIZER_INT OPAL_DATATYPE_INITIALIZER_INT4 -#define OMPI_DATATYPE_INITIALIZER_UNSIGNED_INT OPAL_DATATYPE_INITIALIZER_UINT4 +#define OMPI_DATATYPE_INITIALIZER_UNSIGNED OPAL_DATATYPE_INITIALIZER_UINT4 #elif SIZEOF_INT == 8 #define OMPI_DATATYPE_INITIALIZER_INT OPAL_DATATYPE_INITIALIZER_INT8 -#define OMPI_DATATYPE_INITIALIZER_UNSIGNED_INT OPAL_DATATYPE_INITIALIZER_UINT8 +#define OMPI_DATATYPE_INITIALIZER_UNSIGNED OPAL_DATATYPE_INITIALIZER_UINT8 #endif #if SIZEOF_LONG == 4 @@ -548,19 +556,19 @@ extern const ompi_datatype_t* ompi_datatype_basicDatatypes[OMPI_DATATYPE_MPI_MAX #if HAVE_LONG_LONG #if SIZEOF_LONG_LONG == 4 -#define OMPI_DATATYPE_INITIALIZER_LONG_LONG OPAL_DATATYPE_INITIALIZER_INT4 +#define OMPI_DATATYPE_INITIALIZER_LONG_LONG_INT OPAL_DATATYPE_INITIALIZER_INT4 #define OMPI_DATATYPE_INITIALIZER_UNSIGNED_LONG_LONG OPAL_DATATYPE_INITIALIZER_UINT4 #elif SIZEOF_LONG_LONG == 8 -#define OMPI_DATATYPE_INITIALIZER_LONG_LONG OPAL_DATATYPE_INITIALIZER_INT8 +#define OMPI_DATATYPE_INITIALIZER_LONG_LONG_INT OPAL_DATATYPE_INITIALIZER_INT8 #define OMPI_DATATYPE_INITIALIZER_UNSIGNED_LONG_LONG OPAL_DATATYPE_INITIALIZER_UINT8 #elif SIZEOF_LONG_LONG == 16 -#define OMPI_DATATYPE_INITIALIZER_LONG_LONG OPAL_DATATYPE_INITIALIZER_INT16 +#define OMPI_DATATYPE_INITIALIZER_LONG_LONG_INT OPAL_DATATYPE_INITIALIZER_INT16 #define OMPI_DATATYPE_INITIALIZER_UNSIGNED_LONG_LONG OPAL_DATATYPE_INITIALIZER_UINT16 #endif #else /* HAVE_LONG_LONG */ -#define OMPI_DATATYPE_INITIALIZER_LONG_LONG OPAL_DATATYPE_INIT_UNAVAILABLE (LONG_LONG, OMPI_DATATYPE_FLAG_DATA_C) +#define OMPI_DATATYPE_INITIALIZER_LONG_LONG_INT OPAL_DATATYPE_INIT_UNAVAILABLE (LONG_LONG_INT, OMPI_DATATYPE_FLAG_DATA_C) #define OMPI_DATATYPE_INITIALIZER_UNSIGNED_LONG_LONG OPAL_DATATYPE_INIT_UNAVAILABLE (UNSIGNED_LONG_LONG, OMPI_DATATYPE_FLAG_DATA_C) #endif /* HAVE_LONG_LONG */ diff --git a/ompi/datatype/ompi_datatype_module.c b/ompi/datatype/ompi_datatype_module.c index 85d092067dd..9643b890663 100644 --- a/ompi/datatype/ompi_datatype_module.c +++ b/ompi/datatype/ompi_datatype_module.c @@ -15,8 +15,9 @@ * Copyright (c) 2009 Oak Ridge National Labs. All rights reserved. * Copyright (c) 2013 Los Alamos National Security, LLC. All rights * reserved. - * Copyright (c) 2015 Research Organization for Information Science + * Copyright (c) 2015-2016 Research Organization for Information Science * and Technology (RIST). All rights reserved. + * Copyright (c) 2016 FUJITSU LIMITED. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -73,19 +74,19 @@ ompi_predefined_datatype_t ompi_mpi_lb = OMPI_DATATYPE_INIT_PREDEFIN ompi_predefined_datatype_t ompi_mpi_ub = OMPI_DATATYPE_INIT_PREDEFINED (UB, 0); ompi_predefined_datatype_t ompi_mpi_char = OMPI_DATATYPE_INIT_PREDEFINED (CHAR, OMPI_DATATYPE_FLAG_DATA_C); ompi_predefined_datatype_t ompi_mpi_signed_char = OMPI_DATATYPE_INIT_PREDEFINED (SIGNED_CHAR, OMPI_DATATYPE_FLAG_DATA_C | OMPI_DATATYPE_FLAG_DATA_INT ); -ompi_predefined_datatype_t ompi_mpi_unsigned_char = OMPI_DATATYPE_INIT_PREDEFINED (UNSIGNED_CHAR, OMPI_DATATYPE_FLAG_DATA_C); +ompi_predefined_datatype_t ompi_mpi_unsigned_char = OMPI_DATATYPE_INIT_PREDEFINED (UNSIGNED_CHAR, OMPI_DATATYPE_FLAG_DATA_C | OMPI_DATATYPE_FLAG_DATA_INT ); ompi_predefined_datatype_t ompi_mpi_byte = OMPI_DATATYPE_INIT_PREDEFINED (BYTE, OMPI_DATATYPE_FLAG_DATA_C | OMPI_DATATYPE_FLAG_DATA_INT ); ompi_predefined_datatype_t ompi_mpi_short = OMPI_DATATYPE_INIT_PREDEFINED (SHORT, OMPI_DATATYPE_FLAG_DATA_C | OMPI_DATATYPE_FLAG_DATA_INT ); ompi_predefined_datatype_t ompi_mpi_unsigned_short = OMPI_DATATYPE_INIT_PREDEFINED (UNSIGNED_SHORT, OMPI_DATATYPE_FLAG_DATA_C | OMPI_DATATYPE_FLAG_DATA_INT ); ompi_predefined_datatype_t ompi_mpi_int = OMPI_DATATYPE_INIT_PREDEFINED (INT, OMPI_DATATYPE_FLAG_DATA_C | OMPI_DATATYPE_FLAG_DATA_INT ); -ompi_predefined_datatype_t ompi_mpi_unsigned = OMPI_DATATYPE_INIT_PREDEFINED (UNSIGNED_INT, OMPI_DATATYPE_FLAG_DATA_C | OMPI_DATATYPE_FLAG_DATA_INT ); +ompi_predefined_datatype_t ompi_mpi_unsigned = OMPI_DATATYPE_INIT_PREDEFINED (UNSIGNED, OMPI_DATATYPE_FLAG_DATA_C | OMPI_DATATYPE_FLAG_DATA_INT ); ompi_predefined_datatype_t ompi_mpi_long = OMPI_DATATYPE_INIT_PREDEFINED (LONG, OMPI_DATATYPE_FLAG_DATA_C | OMPI_DATATYPE_FLAG_DATA_INT ); ompi_predefined_datatype_t ompi_mpi_unsigned_long = OMPI_DATATYPE_INIT_PREDEFINED (UNSIGNED_LONG, OMPI_DATATYPE_FLAG_DATA_C | OMPI_DATATYPE_FLAG_DATA_INT ); #if HAVE_LONG_LONG -ompi_predefined_datatype_t ompi_mpi_long_long_int = OMPI_DATATYPE_INIT_PREDEFINED (LONG_LONG, OMPI_DATATYPE_FLAG_DATA_C | OMPI_DATATYPE_FLAG_DATA_INT ); +ompi_predefined_datatype_t ompi_mpi_long_long_int = OMPI_DATATYPE_INIT_PREDEFINED (LONG_LONG_INT, OMPI_DATATYPE_FLAG_DATA_C | OMPI_DATATYPE_FLAG_DATA_INT ); ompi_predefined_datatype_t ompi_mpi_unsigned_long_long = OMPI_DATATYPE_INIT_PREDEFINED (UNSIGNED_LONG_LONG, OMPI_DATATYPE_FLAG_DATA_C | OMPI_DATATYPE_FLAG_DATA_INT ); #else -ompi_predefined_datatype_t ompi_mpi_long_long_int = OMPI_DATATYPE_INIT_UNAVAILABLE (LONG_LONG, OMPI_DATATYPE_FLAG_DATA_C | OMPI_DATATYPE_FLAG_DATA_INT ); +ompi_predefined_datatype_t ompi_mpi_long_long_int = OMPI_DATATYPE_INIT_UNAVAILABLE (LONG_LONG_INT, OMPI_DATATYPE_FLAG_DATA_C | OMPI_DATATYPE_FLAG_DATA_INT ); ompi_predefined_datatype_t ompi_mpi_unsigned_long_long = OMPI_DATATYPE_INIT_UNAVAILABLE (UNSIGNED_LONG_LONG, OMPI_DATATYPE_FLAG_DATA_C | OMPI_DATATYPE_FLAG_DATA_INT); #endif /* HAVE_LONG_LONG */ ompi_predefined_datatype_t ompi_mpi_float = OMPI_DATATYPE_INIT_PREDEFINED (FLOAT, OMPI_DATATYPE_FLAG_DATA_C | OMPI_DATATYPE_FLAG_DATA_FLOAT ); @@ -105,8 +106,8 @@ ompi_predefined_datatype_t ompi_mpi_packed = OMPI_DATATYPE_INIT_PREDEFIN /* * C++ / C99 datatypes */ -ompi_predefined_datatype_t ompi_mpi_c_bool = OMPI_DATATYPE_INIT_PREDEFINED (BOOL, OMPI_DATATYPE_FLAG_DATA_C); -ompi_predefined_datatype_t ompi_mpi_cxx_bool = OMPI_DATATYPE_INIT_PREDEFINED (BOOL, OMPI_DATATYPE_FLAG_DATA_CPP); +ompi_predefined_datatype_t ompi_mpi_c_bool = OMPI_DATATYPE_INIT_PREDEFINED_BASIC_TYPE (BOOL, C_BOOL, OMPI_DATATYPE_FLAG_DATA_C); +ompi_predefined_datatype_t ompi_mpi_cxx_bool = OMPI_DATATYPE_INIT_PREDEFINED_BASIC_TYPE (BOOL, CXX_BOOL, OMPI_DATATYPE_FLAG_DATA_CPP); /* * Complex datatypes for C (base types), C++, and fortran @@ -121,9 +122,9 @@ ompi_predefined_datatype_t ompi_mpi_c_long_double_complex = OMPI_DATATYPE_INIT_U #endif /* HAVE_LONG_DOUBLE */ /* The C++ complex datatypes are the same as the C datatypes */ -ompi_predefined_datatype_t ompi_mpi_cxx_cplex = OMPI_DATATYPE_INIT_PREDEFINED_BASIC_TYPE (C_FLOAT_COMPLEX, C_FLOAT_COMPLEX, OMPI_DATATYPE_FLAG_DATA_CPP | OMPI_DATATYPE_FLAG_DATA_COMPLEX ); -ompi_predefined_datatype_t ompi_mpi_cxx_dblcplex = OMPI_DATATYPE_INIT_PREDEFINED_BASIC_TYPE (C_DOUBLE_COMPLEX, C_DOUBLE_COMPLEX, OMPI_DATATYPE_FLAG_DATA_CPP | OMPI_DATATYPE_FLAG_DATA_COMPLEX ); -ompi_predefined_datatype_t ompi_mpi_cxx_ldblcplex = OMPI_DATATYPE_INIT_PREDEFINED_BASIC_TYPE (C_LONG_DOUBLE_COMPLEX, C_LONG_DOUBLE_COMPLEX, OMPI_DATATYPE_FLAG_DATA_CPP | OMPI_DATATYPE_FLAG_DATA_COMPLEX ); +ompi_predefined_datatype_t ompi_mpi_cxx_cplex = OMPI_DATATYPE_INIT_PREDEFINED_BASIC_TYPE (C_FLOAT_COMPLEX, CXX_FLOAT_COMPLEX, OMPI_DATATYPE_FLAG_DATA_CPP | OMPI_DATATYPE_FLAG_DATA_COMPLEX ); +ompi_predefined_datatype_t ompi_mpi_cxx_dblcplex = OMPI_DATATYPE_INIT_PREDEFINED_BASIC_TYPE (C_DOUBLE_COMPLEX, CXX_DOUBLE_COMPLEX, OMPI_DATATYPE_FLAG_DATA_CPP | OMPI_DATATYPE_FLAG_DATA_COMPLEX ); +ompi_predefined_datatype_t ompi_mpi_cxx_ldblcplex = OMPI_DATATYPE_INIT_PREDEFINED_BASIC_TYPE (C_LONG_DOUBLE_COMPLEX, CXX_LONG_DOUBLE_COMPLEX, OMPI_DATATYPE_FLAG_DATA_CPP | OMPI_DATATYPE_FLAG_DATA_COMPLEX ); #if OMPI_HAVE_FORTRAN_COMPLEX ompi_predefined_datatype_t ompi_mpi_cplex = OMPI_DATATYPE_INIT_PREDEFINED_BASIC_TYPE (OMPI_KIND_FORTRAN_COMPLEX, COMPLEX, OMPI_DATATYPE_FLAG_DATA_FORTRAN | OMPI_DATATYPE_FLAG_DATA_COMPLEX ); @@ -257,7 +258,7 @@ ompi_predefined_datatype_t ompi_mpi_integer8 = OMPI_DATATYPE_INIT_UNAVAILA #if OMPI_HAVE_FORTRAN_INTEGER16 ompi_predefined_datatype_t ompi_mpi_integer16 = OMPI_DATATYPE_INIT_PREDEFINED_BASIC_TYPE_FORTRAN (INT, INTEGER16, OMPI_SIZEOF_FORTRAN_INTEGER16, OMPI_ALIGNMENT_FORTRAN_INTEGER16, OMPI_DATATYPE_FLAG_DATA_INT); #else -ompi_predefined_datatype_t ompi_mpi_integer16 = OMPI_DATATYPE_INIT_UNAVAILABLE (INTEGER8, OMPI_DATATYPE_FLAG_DATA_FORTRAN | OMPI_DATATYPE_FLAG_DATA_INT); +ompi_predefined_datatype_t ompi_mpi_integer16 = OMPI_DATATYPE_INIT_UNAVAILABLE (INTEGER16, OMPI_DATATYPE_FLAG_DATA_FORTRAN | OMPI_DATATYPE_FLAG_DATA_INT); #endif /* @@ -419,7 +420,7 @@ opal_pointer_array_t ompi_datatype_f_to_c_table = {{0}}; ptype->super.desc.desc = NULL; \ ptype->super.opt_desc.desc = NULL; \ OBJ_RELEASE( ptype ); \ - strncpy( (PDATA)->super.name, MPIDDTNAME, MPI_MAX_OBJECT_NAME ); \ + strncpy( (PDATA)->name, MPIDDTNAME, MPI_MAX_OBJECT_NAME ); \ } while(0) #define DECLARE_MPI2_COMPOSED_BLOCK_DDT( PDATA, MPIDDT, MPIDDTNAME, MPIType, FLAGS ) \ @@ -435,7 +436,7 @@ opal_pointer_array_t ompi_datatype_f_to_c_table = {{0}}; ptype->super.desc.desc = NULL; \ ptype->super.opt_desc.desc = NULL; \ OBJ_RELEASE( ptype ); \ - strncpy( (PDATA)->super.name, (MPIDDTNAME), MPI_MAX_OBJECT_NAME ); \ + strncpy( (PDATA)->name, (MPIDDTNAME), MPI_MAX_OBJECT_NAME ); \ } while(0) #define DECLARE_MPI_SYNONYM_DDT( PDATA, MPIDDTNAME, PORIGDDT) \ @@ -520,7 +521,7 @@ int32_t ompi_datatype_init( void ) /* The order of the data registration should be the same as the * one in the mpif.h file. Any modification here should be * reflected there !!! Do the Fortran types first so that mpif.h - * can have consecutive, dense numbers. */ + * can have consecutive, dense numbers. */ /* This macro makes everything significantly easier to read below. All hail the moog! :-) */ @@ -535,7 +536,7 @@ int32_t ompi_datatype_init( void ) } /* - * This MUST match the order of ompi/include/mpif-common.h + * This MUST match the order of ompi/include/mpif-values.pl * Any change will break binary compatibility of Fortran programs. */ MOOG(datatype_null, 0); @@ -624,6 +625,9 @@ int32_t ompi_datatype_init( void ) /* MPI 3.0 types */ MOOG(count, 72); + /* MPI 2.2 types (again) */ + MOOG(c_bool, 73); + /** * Now make sure all non-contiguous types are marked as such. */ diff --git a/ompi/datatype/ompi_datatype_sndrcv.c b/ompi/datatype/ompi_datatype_sndrcv.c index 6cc0b1f7b32..967c7509271 100644 --- a/ompi/datatype/ompi_datatype_sndrcv.c +++ b/ompi/datatype/ompi_datatype_sndrcv.c @@ -11,7 +11,7 @@ * Copyright (c) 2004-2006 The Regents of the University of California. * All rights reserved. * Copyright (c) 2009 Oak Ridge National Labs. All rights reserved. - * Copyright (c) 2014 Research Organization for Information Science + * Copyright (c) 2014-2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * @@ -42,7 +42,7 @@ * - communicator * Returns: - MPI_SUCCESS or error code */ -int32_t ompi_datatype_sndrcv( void *sbuf, int32_t scount, const ompi_datatype_t* sdtype, +int32_t ompi_datatype_sndrcv( const void *sbuf, int32_t scount, const ompi_datatype_t* sdtype, void *rbuf, int32_t rcount, const ompi_datatype_t* rdtype) { opal_convertor_t send_convertor, recv_convertor; diff --git a/ompi/debuggers/MPI_Handles_interface.txt b/ompi/debuggers/MPI_Handles_interface.txt index 2b3463a0a42..f81bef8528d 100644 --- a/ompi/debuggers/MPI_Handles_interface.txt +++ b/ompi/debuggers/MPI_Handles_interface.txt @@ -277,7 +277,7 @@ predefined datatypes, and applications can create their own datatypes. creally created with MPI_TYPE_STRUCT, even though they're effectively equivalent). - TYPE_HINDEXED, TYPE_INDEXED, TYPE_HVECTOR, TYPE_VECTOR, - TYPE_STRUCT, TYPE_CONTIGUOUS, + TYPE_STRUCT, TYPE_CONTIGUOUS, JMS: with the type map provided by MPI, a debugger can show "holes" in a datatype (potentially indicating missed optimizations by diff --git a/ompi/debuggers/Makefile.am b/ompi/debuggers/Makefile.am index d9b08e298bb..8dafba9c9b6 100644 --- a/ompi/debuggers/Makefile.am +++ b/ompi/debuggers/Makefile.am @@ -5,15 +5,16 @@ # Copyright (c) 2004-2005 The University of Tennessee and The University # of Tennessee Research Foundation. All rights # reserved. -# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, # University of Stuttgart. All rights reserved. # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. # Copyright (c) 2007-2015 Cisco Systems, Inc. All rights reserved. +# Copyright (c) 2016 IBM Corporation. All rights reserved. # $COPYRIGHT$ -# +# # Additional copyrights may follow -# +# # $HEADER$ # @@ -43,12 +44,14 @@ headers = \ # Simple checks to ensure that the DSOs are functional dlopen_test_SOURCES = dlopen_test.c -dlopen_test_LDADD = $(top_builddir)/ompi/libmpi.la +dlopen_test_LDADD = \ + $(top_builddir)/ompi/lib@OMPI_LIBMPI_NAME@.la \ + $(top_builddir)/opal/lib@OPAL_LIB_PREFIX@open-pal.la dlopen_test_DEPENDENCIES = $(ompi_predefined_LDADD) predefined_gap_test_SOURCES = predefined_gap_test.c predefined_gap_test_LDFLAGS = $(WRAPPER_EXTRA_LDFLAGS) -predefined_gap_test_LDADD = $(top_builddir)/ompi/libmpi.la +predefined_gap_test_LDADD = $(top_builddir)/ompi/lib@OMPI_LIBMPI_NAME@.la predefined_gap_test_DEPENDENCIES = $(ompi_predefined_LDADD) libdebuggers_la_SOURCES = \ @@ -76,7 +79,7 @@ libompi_dbg_msgq_la_LDFLAGS = -module -avoid-version # and "make check" will *build* a test in runtime/, but it won't *run* # it. :-( predefined_pad_test_LDFLAGS = $(WRAPPER_EXTRA_LDFLAGS) -predefined_pad_test_LDADD = $(top_builddir)/ompi/libmpi.la +predefined_pad_test_LDADD = $(top_builddir)/ompi/lib@OMPI_LIBMPI_NAME@.la predefined_pad_test_DEPENDENCIES = $(ompi_predefined_LDADD) # Conditionally install the header files diff --git a/ompi/debuggers/core.lt-dlopen_test-1424445474-6373 b/ompi/debuggers/core.lt-dlopen_test-1424445474-6373 deleted file mode 100644 index dca2dc1a63f..00000000000 Binary files a/ompi/debuggers/core.lt-dlopen_test-1424445474-6373 and /dev/null differ diff --git a/ompi/debuggers/debuggers.h b/ompi/debuggers/debuggers.h index 1cd8d329461..256bd3559ff 100644 --- a/ompi/debuggers/debuggers.h +++ b/ompi/debuggers/debuggers.h @@ -5,15 +5,15 @@ * Copyright (c) 2004-2009 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2007-2011 Cisco Systems, Inc. All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ diff --git a/ompi/debuggers/dlopen_test.c b/ompi/debuggers/dlopen_test.c index ba6117bfa13..978ca75d1c8 100644 --- a/ompi/debuggers/dlopen_test.c +++ b/ompi/debuggers/dlopen_test.c @@ -12,6 +12,7 @@ #include #include #include +#include #include "opal/runtime/opal.h" #include "opal/mca/dl/base/base.h" @@ -67,7 +68,7 @@ static int do_test(void) not, skip this test. */ fp = fopen(full_filename, "r"); if (NULL == fp) { - fprintf(stderr, + fprintf(stderr, "File %s.la doesn't seem to exist; skipping this test\n", full_filename); exit(77); diff --git a/ompi/debuggers/mpihandles_interface.h b/ompi/debuggers/mpihandles_interface.h index 20796660f7a..66412414e5f 100644 --- a/ompi/debuggers/mpihandles_interface.h +++ b/ompi/debuggers/mpihandles_interface.h @@ -1,16 +1,16 @@ /* - * Copyright (c) 2007 High Performance Computing Center Stuttgart, + * Copyright (c) 2007 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2007-2008 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2007-2013 The University of Tennessee and The University of * Tennessee Research Foundation. All rights reserved. * Copyright (c) 2012-2013 Inria. All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * Some text copied from and references made to mpi_interface.h. - * + * * Copyright (C) 2000-2004 by Etnus, LLC * Copyright (C) 1999 by Etnus, Inc. * Copyright (C) 1997-1998 Dolphin Interconnect Solutions Inc. @@ -564,7 +564,7 @@ int mpidbg_init_once(const mqs_basic_callbacks *callbacks); /*-----------------------------------------------------------------------*/ /* Query the DLL to find out what version of the interface it - supports. + supports. Parameters: @@ -581,21 +581,21 @@ int mpidbg_interface_version_compatibility(void); /* Returns a string describing this DLL. - Parameters: + Parameters: None This function will return: A null-terminated string describing this DLL. -*/ +*/ char *mpidbg_version_string(void); /*-----------------------------------------------------------------------*/ /* Returns the address width that this DLL was compiled with. - Parameters: + Parameters: None @@ -617,7 +617,7 @@ int mpidbg_dll_taddr_width(void); the image (e.g., all of the type offsets it needs could be kept here). The debugger will call mqs_destroy_image_info when it no longer wants to keep information about the given executable. - + This will be called once for each executable image in the parallel job. @@ -706,7 +706,7 @@ void mpidbg_finalize_per_image(mqs_image *image, mqs_image_info *image_info); mpidbg_finalize_per_process()). MPIDBG_ERR_*: if something went wrong. */ -int mpidbg_init_per_process(mqs_process *process, +int mpidbg_init_per_process(mqs_process *process, const mqs_process_callbacks *callbacks, struct mpidbg_handle_info_t *handle_types); @@ -751,7 +751,7 @@ void mpidbg_finalize_per_process(mqs_process *process, MPIDBG_ERR_NOT_FOUND: if the handle is not valid / found. MPIDBG_ERR_UNSUPPORTED: if this function is unsupported. */ -int mpidbg_comm_query(mqs_image *image, mqs_image_info *image_info, +int mpidbg_comm_query(mqs_image *image, mqs_image_info *image_info, mqs_process *process, mqs_process_info *process_info, mqs_taddr_t c_comm, struct mpidbg_comm_info_t **info); @@ -777,7 +777,7 @@ int mpidbg_comm_query(mqs_image *image, mqs_image_info *image_info, MPIDBG_ERR_NOT_FOUND: if the handle is not valid / found. MPIDBG_ERR_UNSUPPORTED: if this function is unsupported. */ -int mpidbg_comm_f2c(mqs_image *image, mqs_image_info *image_info, +int mpidbg_comm_f2c(mqs_image *image, mqs_image_info *image_info, mqs_process *process, mqs_process_info *process_info, mqs_taddr_t f77_comm, mqs_taddr_t *c_comm); @@ -809,9 +809,9 @@ int mpidbg_comm_f2c(mqs_image *image, mqs_image_info *image_info, MPIDBG_ERR_NOT_FOUND: if the handle is not valid / found. MPIDBG_ERR_UNSUPPORTED: if this function is unsupported. */ -int mpidbg_comm_cxx2c(mqs_image *image, mqs_image_info *image_info, +int mpidbg_comm_cxx2c(mqs_image *image, mqs_image_info *image_info, mqs_process *process, mqs_process_info *process_info, - mqs_taddr_t cxx_comm, + mqs_taddr_t cxx_comm, enum mpidbg_comm_info_bitmap_t comm_type, mqs_taddr_t *c_comm); @@ -825,17 +825,17 @@ int mpidbg_comm_cxx2c(mqs_image *image, mqs_image_info *image_info, "errhandler_type" argument to the cxx2c function because MPI::Errhandler has no derived classes. */ -int mpidbg_errhandler_query(mqs_image *image, mqs_image_info *image_info, +int mpidbg_errhandler_query(mqs_image *image, mqs_image_info *image_info, mqs_process *process, mqs_process_info *process_info, mqs_taddr_t errhandler, struct mpidbg_errhandler_info_t **info); -int mpidbg_errhandler_f2c(mqs_image *image, mqs_image_info *image_info, +int mpidbg_errhandler_f2c(mqs_image *image, mqs_image_info *image_info, mqs_process *process, mqs_process_info *process_info, - mqs_taddr_t f77_errhandler, + mqs_taddr_t f77_errhandler, mqs_taddr_t *c_errhandler); -int mpidbg_errhandler_cxx2c(mqs_image *image, mqs_image_info *image_info, +int mpidbg_errhandler_cxx2c(mqs_image *image, mqs_image_info *image_info, mqs_process *process, mqs_process_info *process_info, - mqs_taddr_t cxx_errhandler, + mqs_taddr_t cxx_errhandler, mqs_taddr_t *c_errhandler); /*----------------------------------------------------------------------- @@ -846,16 +846,16 @@ int mpidbg_errhandler_cxx2c(mqs_image *image, mqs_image_info *image_info, /* These functions are analogous to the mpidbg_comm_* functions, but for MPI_Request. */ -int mpidbg_request_query(mqs_image *image, mqs_image_info *image_info, +int mpidbg_request_query(mqs_image *image, mqs_image_info *image_info, mqs_process *process, mqs_process_info *process_info, mqs_taddr_t request, struct mpidbg_request_info_t **info); -int mpidbg_request_f2c(mqs_image *image, mqs_image_info *image_info, +int mpidbg_request_f2c(mqs_image *image, mqs_image_info *image_info, mqs_process *process, mqs_process_info *process_info, mqs_taddr_t f77_request, mqs_taddr_t *c_request); -int mpidbg_request_cxx2c(mqs_image *image, mqs_image_info *image_info, +int mpidbg_request_cxx2c(mqs_image *image, mqs_image_info *image_info, mqs_process *process, mqs_process_info *process_info, - mqs_taddr_t cxx_request, + mqs_taddr_t cxx_request, enum mpidbg_request_info_bitmap_t request_type, mqs_taddr_t *c_request); @@ -867,16 +867,16 @@ int mpidbg_request_cxx2c(mqs_image *image, mqs_image_info *image_info, /* These functions are analogous to the mpidbg_comm_* functions, but for MPI_Status. */ -int mpidbg_status_query(mqs_image *image, mqs_image_info *image_info, +int mpidbg_status_query(mqs_image *image, mqs_image_info *image_info, mqs_process *process, mqs_process_info *process_info, mqs_taddr_t status, struct mpidbg_status_info_t **info); -int mpidbg_status_f2c(mqs_image *image, mqs_image_info *image_info, +int mpidbg_status_f2c(mqs_image *image, mqs_image_info *image_info, mqs_process *process, mqs_process_info *process_info, mqs_taddr_t f77_status, mqs_taddr_t *c_status); -int mpidbg_status_cxx2c(mqs_image *image, mqs_image_info *image_info, +int mpidbg_status_cxx2c(mqs_image *image, mqs_image_info *image_info, mqs_process *process, mqs_process_info *process_info, - mqs_taddr_t cxx_status, + mqs_taddr_t cxx_status, mqs_taddr_t *c_status); #endif /* __MPIDBG_INTERFACE_H__ */ diff --git a/ompi/debuggers/msgq_interface.h b/ompi/debuggers/msgq_interface.h index bb34072b837..097030f64c8 100644 --- a/ompi/debuggers/msgq_interface.h +++ b/ompi/debuggers/msgq_interface.h @@ -4,9 +4,9 @@ * of Tennessee Research Foundation. All rights * reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -58,7 +58,7 @@ * * Aug 5 2002 CLG: Tiny fix to 64 bit taddr_t definition on sun. * Oct 6 2000 JHC: Add all of the MPI-2 relevant types and functions. - * This does need a compatibility number change to + * This does need a compatibility number change to * ensure new libraries can't get loaded into old debuggers. * New debuggers can continue to use old libraries, though. * New functions under control of FOR_MPI2 @@ -72,7 +72,7 @@ * Mar 17 2000 JHC: Add FORCE_32BIT_MPI conditional compilation flag. * Mar 3 2000 JHC: Widen the tword_t and taddr_t on AIX, now that IBM * has 64 bit machines. Increment the version compatibility - * number on AIX (only) since this is an incompatible change in + * number on AIX (only) since this is an incompatible change in * the interface. * Oct 1 1998 JHC: Change MQS_INVALID_PROCESS to -1, TV would never generate * the old value anyway. @@ -92,9 +92,9 @@ * * The interface is specified at the C level, to avoid C++ compiler issues. * - * The interface allows code in the DLL to + * The interface allows code in the DLL to * 1) find named types from the debugger's type system and look up fields in them - * 2) find the address of named external variables + * 2) find the address of named external variables * 3) access objects at absolute addresses in the target process. * 4) convert objects from target format to host format. * @@ -110,7 +110,7 @@ * this provides type checking while maintaining information hiding. * * All named entities in here start with the prefix "mqs_" (for - * Message Queue Support), all the debugger callbacks are made via + * Message Queue Support), all the debugger callbacks are made via * callback tables, so the real (linkage) names of the functions are * not visible to the DLL. */ @@ -127,7 +127,7 @@ BEGIN_C_DECLS /*********************************************************************** - * Version of the interface this header represents + * Version of the interface this header represents */ enum { @@ -210,10 +210,10 @@ typedef struct int bool_size; /* sizeof(bool) */ int size_t_size; /* sizeof(size_t) */ } mqs_target_type_sizes; - -/* Result codes. - * mqs_ok is returned for success. - * Anything else implies a failure of some sort. + +/* Result codes. + * mqs_ok is returned for success. + * Anything else implies a failure of some sort. * * Most of the functions actually return one of these, however to avoid * any potential issues with different compilers implementing enums as @@ -254,8 +254,8 @@ typedef enum { /* Which queue are we interested in ? */ typedef enum { - mqs_pending_sends, - mqs_pending_receives, + mqs_pending_sends, + mqs_pending_receives, mqs_unexpected_messages } mqs_op_class; @@ -265,7 +265,7 @@ enum MQS_INVALID_PROCESS = -1 }; -enum mqs_status +enum mqs_status { mqs_st_pending, mqs_st_matched, mqs_st_complete }; @@ -335,7 +335,7 @@ enum { /* A structure to represent a communicator */ typedef struct { - mqs_taddr_t unique_id; /* A unique tag for the communicator */ + mqs_taddr_t unique_id; /* A unique tag for the communicator */ mqs_tword_t local_rank; /* The rank of this process Comm_rank */ mqs_tword_t size; /* Comm_size */ char name[64]; /* the name if it has one */ @@ -361,12 +361,12 @@ typedef struct /* Fields valid if status >= matched or it's a send */ mqs_tword_t actual_local_rank; /* Actual local rank */ mqs_tword_t actual_global_rank; /* As above but in COMM_WORLD */ - mqs_tword_t actual_tag; + mqs_tword_t actual_tag; mqs_tword_t actual_length; - + /* Additional strings which can be filled in if the DLL has more * info. (Uninterpreted by the debugger, simply displayed to the - * user). + * user). * * Can be used to give the name of the function causing this request, * for instance. @@ -427,10 +427,10 @@ typedef void * (*mqs_malloc_ft) (size_t); typedef void (*mqs_free_ft) (void *); /*********************************************************************** - * Type access functions + * Type access functions */ -/* Given an executable image look up a named type in it. +/* Given an executable image look up a named type in it. * Returns a type handle, or the null pointer if the type could not be * found. Since the debugger may load debug information lazily, the * MPI run time library should ensure that the type definitions @@ -441,7 +441,7 @@ typedef void (*mqs_free_ft) (void *); typedef mqs_type * (*mqs_find_type_ft)(mqs_image *, char *, mqs_lang_code); /* Given the handle for a type (assumed to be a structure) return the - * byte offset of the named field. If the field cannot be found + * byte offset of the named field. If the field cannot be found * the result will be -1. */ typedef int (*mqs_field_offset_ft) (mqs_type *, char *); @@ -459,7 +459,7 @@ typedef void (*mqs_get_type_sizes_ft) (mqs_process *, mqs_target_type_sizes *); */ /* Fetch data from the process into a buffer into a specified buffer. - * N.B. + * N.B. * The data is the same as that in the target process when accessed * as a byte array. You *must* use mqs_target_to_host to do any * necessary byte flipping if you want to look at it at larger @@ -484,7 +484,7 @@ typedef char * (*mqs_errorstring_ft) (int); typedef struct mqs_basic_callbacks { mqs_malloc_ft mqs_malloc_fp; - mqs_free_ft mqs_free_fp; + mqs_free_ft mqs_free_fp; mqs_dprints_ft mqs_dprints_fp; mqs_errorstring_ft mqs_errorstring_fp; mqs_put_image_info_ft mqs_put_image_info_fp; @@ -599,15 +599,15 @@ extern int mqs_destroy_job_info (mqs_job_info *); #endif /*********************************************************************** - * Calls related to a specific process. These will only be called if the - * image which this is an instance of passes the has_message_queues tests. + * Calls related to a specific process. These will only be called if the + * image which this is an instance of passes the has_message_queues tests. * * If you can't tell whether the process will have valid message queues - * just by looking at the image, then you should return mqs_ok from + * just by looking at the image, then you should return mqs_ok from * mqs_image_has_queues and let mqs_process_has_queues handle it. */ -/* Set up whatever process specific information we need. +/* Set up whatever process specific information we need. * For instance addresses of global variables should be handled here, * rather than in the image information if anything is a dynamic library * which could end up mapped differently in different processes. @@ -619,7 +619,7 @@ OMPI_DECLSPEC extern void mqs_destroy_process_info (mqs_process_info *); * if the image claims to have message queues. This lets you actually * delve inside the process to look at variables before deciding if * the process really can support message queue extraction. - */ + */ OMPI_DECLSPEC extern int mqs_process_has_queues (mqs_process *, char **); /*********************************************************************** @@ -627,7 +627,7 @@ OMPI_DECLSPEC extern int mqs_process_has_queues (mqs_process *, char **); * * The model here is that the debugger calls down to the library to initialise * an iteration over a specific class of things, and then keeps calling - * the "next" function until it returns mqs_false. + * the "next" function until it returns mqs_false. * * For communicators we separate stepping from extracting information, * because we want to use the state of the communicator iterator to qualify @@ -688,7 +688,7 @@ extern int mqs_next_new_process (mqs_process *, mqs_process_location *); /* Once the debugger has attached to a newly created process it will * set it up in the normal way, and then set its identity. */ -extern int mqs_set_process_identity (mqs_process *, int); +extern int mqs_set_process_identity (mqs_process *, int); #endif END_C_DECLS diff --git a/ompi/debuggers/ompi_common_dll.c b/ompi/debuggers/ompi_common_dll.c index 9188322b7dd..9395d93241a 100644 --- a/ompi/debuggers/ompi_common_dll.c +++ b/ompi/debuggers/ompi_common_dll.c @@ -6,15 +6,15 @@ * reserved. * Copyright (c) 2008-2009 Sun Microsystems, Inc. All rights reserved. * Copyright (c) 2012-2013 Inria. All rights reserved. - * Copyright (c) 2014-2015 Research Organization for Information Science + * Copyright (c) 2014-2016 Research Organization for Information Science * and Technology (RIST). All rights reserved. * Copyright (c) 2014 Intel, Inc. All rights reserved. * Copyright (c) 2015 Los Alamos National Security, LLC. All rights * reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -44,6 +44,8 @@ #include "ompi_common_dll_defs.h" +#include + /* Basic callbacks into the debugger */ const mqs_basic_callbacks *mqs_basic_entrypoints = NULL; @@ -369,16 +371,16 @@ int ompi_fill_in_type_info(mqs_image *image, char **message) goto type_missing; } ompi_field_offset(i_info->mca_topo_base_module_t.offset.mtc_cart.ndims, - cart_type, mca_topo_base_comm_cart_2_2_0_t, + cart_type, mca_topo_base_comm_cart_2_2_0_t, ndims); ompi_field_offset(i_info->mca_topo_base_module_t.offset.mtc_cart.dims, - cart_type, mca_topo_base_comm_cart_2_2_0_t, + cart_type, mca_topo_base_comm_cart_2_2_0_t, dims); ompi_field_offset(i_info->mca_topo_base_module_t.offset.mtc_cart.periods, - cart_type, mca_topo_base_comm_cart_2_2_0_t, + cart_type, mca_topo_base_comm_cart_2_2_0_t, periods); ompi_field_offset(i_info->mca_topo_base_module_t.offset.mtc_cart.coords, - cart_type, mca_topo_base_comm_cart_2_2_0_t, + cart_type, mca_topo_base_comm_cart_2_2_0_t, coords); i_info->mca_topo_base_module_t.offset.mtc_cart.ndims += offset; i_info->mca_topo_base_module_t.offset.mtc_cart.dims += offset; @@ -392,13 +394,13 @@ int ompi_fill_in_type_info(mqs_image *image, char **message) goto type_missing; } ompi_field_offset(i_info->mca_topo_base_module_t.offset.mtc_graph.nnodes, - graph_type, mca_topo_base_comm_graph_2_2_0_t, + graph_type, mca_topo_base_comm_graph_2_2_0_t, nnodes); ompi_field_offset(i_info->mca_topo_base_module_t.offset.mtc_graph.index, - graph_type, mca_topo_base_comm_graph_2_2_0_t, + graph_type, mca_topo_base_comm_graph_2_2_0_t, index); ompi_field_offset(i_info->mca_topo_base_module_t.offset.mtc_graph.edges, - graph_type, mca_topo_base_comm_graph_2_2_0_t, + graph_type, mca_topo_base_comm_graph_2_2_0_t, edges); i_info->mca_topo_base_module_t.offset.mtc_graph.nnodes += offset; i_info->mca_topo_base_module_t.offset.mtc_graph.index += offset; @@ -482,7 +484,7 @@ int ompi_fill_in_type_info(mqs_image *image, char **message) /* get ompi_datatype_t super.size which requires the offset * of super and then the offset of size in opal_datatype_t. */ - { + { int super_offset = 0; ompi_field_offset(super_offset, @@ -519,7 +521,7 @@ int ompi_fill_in_type_info(mqs_image *image, char **message) * Functions to access the image memory. They are specialized based * * on the type we want to access and the debugged process architecture * ***********************************************************************/ -mqs_taddr_t ompi_fetch_pointer (mqs_process *proc, mqs_taddr_t addr, +mqs_taddr_t ompi_fetch_pointer (mqs_process *proc, mqs_taddr_t addr, mpi_process_info *p_info) { int isize = p_info->sizes.pointer_size; @@ -527,15 +529,15 @@ mqs_taddr_t ompi_fetch_pointer (mqs_process *proc, mqs_taddr_t addr, mqs_taddr_t res = 0; if (mqs_ok == mqs_fetch_data (proc, addr, isize, buffer)) - mqs_target_to_host (proc, buffer, - ((char *)&res) + (host_is_big_endian ? sizeof(mqs_taddr_t)-isize : 0), + mqs_target_to_host (proc, buffer, + ((char *)&res) + (host_is_big_endian ? sizeof(mqs_taddr_t)-isize : 0), isize); return res; } /* fetch_pointer */ /***********************************************************************/ -mqs_tword_t ompi_fetch_int (mqs_process *proc, mqs_taddr_t addr, +mqs_tword_t ompi_fetch_int (mqs_process *proc, mqs_taddr_t addr, mpi_process_info *p_info) { int isize = p_info->sizes.int_size; @@ -543,15 +545,15 @@ mqs_tword_t ompi_fetch_int (mqs_process *proc, mqs_taddr_t addr, mqs_tword_t res = 0; if (mqs_ok == mqs_fetch_data (proc, addr, isize, buffer)) { - mqs_target_to_host (proc, buffer, - ((char *)&res) + (host_is_big_endian ? sizeof(mqs_tword_t)-isize : 0), + mqs_target_to_host (proc, buffer, + ((char *)&res) + (host_is_big_endian ? sizeof(mqs_tword_t)-isize : 0), isize); } return res; } /* fetch_int */ /***********************************************************************/ -mqs_tword_t ompi_fetch_bool(mqs_process *proc, mqs_taddr_t addr, +mqs_tword_t ompi_fetch_bool(mqs_process *proc, mqs_taddr_t addr, mpi_process_info *p_info) { int isize = p_info->sizes.bool_size; @@ -562,7 +564,7 @@ mqs_tword_t ompi_fetch_bool(mqs_process *proc, mqs_taddr_t addr, } /* fetch_bool */ /***********************************************************************/ -mqs_taddr_t ompi_fetch_size_t(mqs_process *proc, mqs_taddr_t addr, +mqs_taddr_t ompi_fetch_size_t(mqs_process *proc, mqs_taddr_t addr, mpi_process_info *p_info) { int isize = p_info->sizes.size_t_size; @@ -570,30 +572,30 @@ mqs_taddr_t ompi_fetch_size_t(mqs_process *proc, mqs_taddr_t addr, mqs_taddr_t res = 0; if (mqs_ok == mqs_fetch_data (proc, addr, isize, buffer)) - mqs_target_to_host (proc, buffer, - ((char *)&res) + (host_is_big_endian ? sizeof(mqs_taddr_t)-isize : 0), + mqs_target_to_host (proc, buffer, + ((char *)&res) + (host_is_big_endian ? sizeof(mqs_taddr_t)-isize : 0), isize); - + return res; } /* fetch_size_t */ /***********************************************************************/ -int ompi_fetch_opal_pointer_array_info(mqs_process *proc, mqs_taddr_t addr, +int ompi_fetch_opal_pointer_array_info(mqs_process *proc, mqs_taddr_t addr, mpi_process_info *p_info, - int *size, int *lowest_free, + int *size, int *lowest_free, int *number_free) { mqs_image *image = mqs_get_image(proc); mpi_image_info *i_info = (mpi_image_info *) mqs_get_image_info(image); - *size = ompi_fetch_int(proc, + *size = ompi_fetch_int(proc, addr + i_info->opal_pointer_array_t.offset.size, p_info); - *lowest_free = ompi_fetch_int(proc, + *lowest_free = ompi_fetch_int(proc, addr + i_info->opal_pointer_array_t.offset.lowest_free, p_info); - *number_free = ompi_fetch_int(proc, + *number_free = ompi_fetch_int(proc, addr + i_info->opal_pointer_array_t.offset.number_free, p_info); return mqs_ok; @@ -601,7 +603,7 @@ int ompi_fetch_opal_pointer_array_info(mqs_process *proc, mqs_taddr_t addr, /***********************************************************************/ -int ompi_fetch_opal_pointer_array_item(mqs_process *proc, mqs_taddr_t addr, +int ompi_fetch_opal_pointer_array_item(mqs_process *proc, mqs_taddr_t addr, mpi_process_info *p_info, int index, mqs_taddr_t *item) { @@ -614,13 +616,13 @@ int ompi_fetch_opal_pointer_array_item(mqs_process *proc, mqs_taddr_t addr, return mqs_no_information; } - ompi_fetch_opal_pointer_array_info(proc, addr, p_info, &size, + ompi_fetch_opal_pointer_array_info(proc, addr, p_info, &size, &lowest_free, &number_free); if (index >= size) { return mqs_no_information; } - base = ompi_fetch_pointer(proc, + base = ompi_fetch_pointer(proc, addr + i_info->opal_pointer_array_t.offset.addr, p_info); *item = ompi_fetch_pointer(proc, @@ -629,3 +631,20 @@ int ompi_fetch_opal_pointer_array_item(mqs_process *proc, mqs_taddr_t addr, return mqs_ok; } + +int ompi_get_lib_version(char * buf, int size) { + int ret; + ret = snprintf(buf, size-1, "Open MPI v%d.%d.%d%s%s%s%s%s%s%s%s%s", + OMPI_MAJOR_VERSION, OMPI_MINOR_VERSION, OMPI_RELEASE_VERSION, + (strlen(OMPI_GREEK_VERSION) > 0)?OMPI_GREEK_VERSION:"", + (strlen(OPAL_PACKAGE_STRING) > 0)?", package: ":"", + (strlen(OPAL_PACKAGE_STRING) > 0)?OPAL_PACKAGE_STRING:"", + (strlen(OPAL_IDENT_STRING)> 0)?", ident: ":"", + (strlen(OPAL_IDENT_STRING)> 0)?OMPI_IDENT_STRING:"", + (strlen(OMPI_REPO_REV) > 0)?", repo rev: ":"", + (strlen(OMPI_REPO_REV) > 0)?OMPI_REPO_REV:"", + (strlen(OMPI_RELEASE_DATE) > 0)?", ":"", + (strlen(OMPI_RELEASE_DATE) > 0)?OMPI_RELEASE_DATE:""); + buf[size-1] = '\0'; + return ret; +} diff --git a/ompi/debuggers/ompi_common_dll_defs.h b/ompi/debuggers/ompi_common_dll_defs.h index 527b7fee4c5..6f4e6b89381 100644 --- a/ompi/debuggers/ompi_common_dll_defs.h +++ b/ompi/debuggers/ompi_common_dll_defs.h @@ -7,10 +7,12 @@ * Copyright (c) 2012-2013 Inria. All rights reserved. * Copyright (c) 2015 Los Alamos National Security, LLC. All rights * reserved. + * Copyright (c) 2016 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -45,7 +47,7 @@ * Information associated with a specific executable image. Common * across all DLLs. */ -typedef struct +typedef struct { /* Functions needed here */ const struct mqs_image_callbacks * image_callbacks; @@ -258,12 +260,12 @@ typedef struct /* For the caller to hang their own stuff */ void *extra; -} mpi_image_info; +} mpi_image_info; /***********************************************************************/ /* Information for a single process. Common across all DLLs. */ -typedef struct +typedef struct { const struct mqs_process_callbacks * process_callbacks; /* Functions needed here */ @@ -307,27 +309,29 @@ extern const mqs_basic_callbacks *mqs_basic_entrypoints; int ompi_fill_in_type_info(mqs_image *image, char **message); /* Fetch a pointer from the process */ -mqs_taddr_t ompi_fetch_pointer(mqs_process *proc, mqs_taddr_t addr, +mqs_taddr_t ompi_fetch_pointer(mqs_process *proc, mqs_taddr_t addr, mpi_process_info *p_info); /* Fetch an int from the process */ -mqs_tword_t ompi_fetch_int(mqs_process *proc, mqs_taddr_t addr, +mqs_tword_t ompi_fetch_int(mqs_process *proc, mqs_taddr_t addr, mpi_process_info *p_info); /* Fetch a bool from the process */ -mqs_tword_t ompi_fetch_bool(mqs_process *proc, mqs_taddr_t addr, +mqs_tword_t ompi_fetch_bool(mqs_process *proc, mqs_taddr_t addr, mpi_process_info *p_info); /* Fetch a size_t from the process */ -mqs_taddr_t ompi_fetch_size_t(mqs_process *proc, mqs_taddr_t addr, +mqs_taddr_t ompi_fetch_size_t(mqs_process *proc, mqs_taddr_t addr, mpi_process_info *p_info); /* Helpers to fetch stuff from an opal_pointer_array_t */ -int ompi_fetch_opal_pointer_array_info(mqs_process *proc, mqs_taddr_t addr, +int ompi_fetch_opal_pointer_array_info(mqs_process *proc, mqs_taddr_t addr, mpi_process_info *p_info, - int *size, int *lowest_free, + int *size, int *lowest_free, int *number_free); -int ompi_fetch_opal_pointer_array_item(mqs_process *proc, mqs_taddr_t addr, +int ompi_fetch_opal_pointer_array_item(mqs_process *proc, mqs_taddr_t addr, mpi_process_info *p_info, int index, mqs_taddr_t *item); +#define OMPI_MAX_VER_SIZE 256 +int ompi_get_lib_version(char *buf, int size); #endif diff --git a/ompi/debuggers/ompi_debugger_canary.c b/ompi/debuggers/ompi_debugger_canary.c index 5c2c4dd5857..3ad4cac0a8c 100644 --- a/ompi/debuggers/ompi_debugger_canary.c +++ b/ompi/debuggers/ompi_debugger_canary.c @@ -2,9 +2,9 @@ * Copyright (c) 2008 Cisco Systems, Inc. All rights reserved * Copyright (c) 2008-2009 Sun Microystems, Inc. All rights reserved * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -12,7 +12,7 @@ /* * Include all header files for the datatypes that we care about / use - * in the DLL code + * in the DLL code */ #include "ompi/mca/topo/topo.h" #include "ompi/mca/pml/base/pml_base_request.h" diff --git a/ompi/debuggers/ompi_debuggers.c b/ompi/debuggers/ompi_debuggers.c index 8d5177c179a..5f8592eb597 100644 --- a/ompi/debuggers/ompi_debuggers.c +++ b/ompi/debuggers/ompi_debuggers.c @@ -6,7 +6,7 @@ * Copyright (c) 2004-2008 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. @@ -14,9 +14,9 @@ * Copyright (c) 2012-2015 Los Alamos National Security, LLC. All rights * reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -92,7 +92,7 @@ OMPI_DECLSPEC int MPIR_debug_typedefs_sizeof[] = { sizeof(bool), sizeof(size_t) }; - + /* * Values defined by the standardized interface; do not change these * values @@ -131,16 +131,16 @@ OMPI_DECLSPEC char *MPIR_debug_abort_string = ""; static char *ompi_debugger_dll_path = NULL; /* Check for a file in few direct ways for portability */ -static void check(char *dir, char *file, char **locations) +static void check(char *dir, char *file, char **locations) { char *str; asprintf(&str, "%s/%s.so", dir, file); - + #if defined(HAVE_SYS_STAT_H) { struct stat buf; - + /* Use stat() */ if (0 == stat(str, &buf)) { opal_argv_append_nosize(&locations, file); @@ -149,7 +149,7 @@ static void check(char *dir, char *file, char **locations) #else { FILE *fp; - + /* Just try to open the file */ if (NULL != (fp = fopen(str, "r"))) { fclose(fp); @@ -175,7 +175,7 @@ ompi_debugger_setup_dlls(void) OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_READONLY, &ompi_debugger_dll_path); - + /* Search the directory for MPI debugger DLLs */ if (NULL != ompi_debugger_dll_path) { dirs = opal_argv_split(ompi_debugger_dll_path, ':'); @@ -212,10 +212,10 @@ void ompi_debugger_notify_abort(char *reason) MPIR_Breakpoint(); } -/* - * Breakpoint function for parallel debuggers. This function is also - * defined in orterun for the starter. It should never conflict with - * this +/* + * Breakpoint function for parallel debuggers. This function is also + * defined in orterun for the starter. It should never conflict with + * this */ void* MPIR_Breakpoint(void) { diff --git a/ompi/debuggers/ompi_mpihandles_dll.c b/ompi/debuggers/ompi_mpihandles_dll.c index 1f598d6b905..05a20e113f6 100644 --- a/ompi/debuggers/ompi_mpihandles_dll.c +++ b/ompi/debuggers/ompi_mpihandles_dll.c @@ -5,10 +5,12 @@ * reserved. * Copyright (c) 2008 Sun Microsystems, Inc. All rights reserved. * Copyright (c) 2012-2013 Inria. All rights reserved. + * Copyright (c) 2016 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -36,12 +38,8 @@ #include "ompi_config.h" -#if defined(HAVE_STRING_H) #include -#endif /* defined(HAVE_STRING_H) */ -#if defined(HAVE_STDLIB_H) #include -#endif /* defined(HAVE_STDLIB_H) */ #include "ompi/mca/pml/base/pml_base_request.h" #include "mpihandles_interface.h" @@ -70,28 +68,24 @@ struct mpidbg_name_map_t *mpidbg_status_name_map = NULL; #if defined(__SUNPRO_C) /* * These symbols are defined here because of the different way compilers - * may handle extern definitions. The particular case that is causing - * problems is when there is an extern variable that is accessed in a - * static inline function. For example, here is the code we often see in + * may handle extern definitions. The particular case that is causing + * problems is when there is an extern variable that is accessed in a + * static inline function. For example, here is the code we often see in * a header file. - * + * * extern int request_complete; * static inline check_request(void) { * request_complete = 1; * } * - * If this code exists in a header file and gets included in a source - * file, then some compilers expect to have request_complete defined - * somewhere even if request_complete is never referenced and - * check_request is never called. Other compilers do not need them defined - * if they are never referenced in the source file. Therefore, to handle - * cases like the above with compilers that require the symbol (like + * If this code exists in a header file and gets included in a source + * file, then some compilers expect to have request_complete defined + * somewhere even if request_complete is never referenced and + * check_request is never called. Other compilers do not need them defined + * if they are never referenced in the source file. Therefore, to handle + * cases like the above with compilers that require the symbol (like * Sun Studio) we add in these definitions here. */ -size_t ompi_request_completed; -opal_condition_t ompi_request_cond; -size_t ompi_request_waiting; -opal_mutex_t ompi_request_lock; opal_mutex_t opal_event_lock; int opal_progress_spin_count; bool opal_mutex_check_locks; @@ -115,7 +109,7 @@ static struct mpidbg_name_map_t *alloc_map(mqs_image *image, int len) /* Small helper function: look up a symbol, and if we find it, put it in a map entry */ -static void fill_map(mqs_image *image, +static void fill_map(mqs_image *image, char *public_name, char *private_name, struct mpidbg_name_map_t *map) { @@ -135,14 +129,14 @@ static void fill_map(mqs_image *image, } } - printf("OMPI MPI handles DLL: fill_map: Unable to find symbol: %s\n", + printf("OMPI MPI handles DLL: fill_map: Unable to find symbol: %s\n", private_name); } /* Helper function to lookup MPI attributes and fill an mpidbg_attribute_pair_t array with their keys/values */ -static int fill_attributes(int *num_attrs, - struct mpidbg_attribute_pair_t **attrs, +static int fill_attributes(int *num_attrs, + struct mpidbg_attribute_pair_t **attrs, mqs_taddr_t table) { /* JMS fill me in */ @@ -168,12 +162,17 @@ int mpidbg_interface_version_compatibility(void) } +static char mpidbg_version_str[OMPI_MAX_VER_SIZE]; + /* Returns a string specific to OMPI */ char *mpidbg_version_string(void) { + int offset; printf("mpidbg_version_string\n"); - return "Open MPI handle interpretation support for parallel" - " debuggers compiled on " __DATE__; + offset = snprintf(mpidbg_version_str, OMPI_MAX_VER_SIZE-1, + "Open MPI handle interpretation support for parallel debuggers "); + ompi_get_lib_version(mpidbg_version_str+offset, OMPI_MAX_VER_SIZE-offset); + return mpidbg_version_str; } @@ -192,7 +191,7 @@ int mpidbg_init_per_image(mqs_image *image, const mqs_image_callbacks *icb, struct mpidbg_handle_info_t *handle_types) { char **message; - mpi_image_info *i_info = + mpi_image_info *i_info = (mpi_image_info *) mqs_malloc(sizeof(mpi_image_info)); printf("mpidbg_init_per_image\n"); @@ -225,28 +224,28 @@ int mpidbg_init_per_image(mqs_image *image, const mqs_image_callbacks *icb, /* JMS: these ompi types are just the "foo" types; but OMPI MPI types are all "foo*"'s -- is this right? If this is wrong, I *suspect* that something like the following may be right: - + handle_types->hi_c_comm = mqs_find_type(image, "ompi_communicator_t*", mqs_lang_c); Need to confirm this with the DDT guys... */ handle_types->hi_c_comm = i_info->ompi_communicator_t.type; handle_types->hi_c_datatype = i_info->ompi_datatype_t.type; - handle_types->hi_c_errhandler = + handle_types->hi_c_errhandler = mqs_find_type(image, "ompi_errhandler_t", mqs_lang_c); - handle_types->hi_c_file = + handle_types->hi_c_file = mqs_find_type(image, "ompi_file_t", mqs_lang_c); handle_types->hi_c_group = i_info->ompi_group_t.type; - handle_types->hi_c_info = + handle_types->hi_c_info = mqs_find_type(image, "ompi_info_t", mqs_lang_c); /* JMS: "MPI_Offset" is a typedef (see comment about MPI_Aint above) */ handle_types->hi_c_offset = mqs_find_type(image, "MPI_Offset", mqs_lang_c); - handle_types->hi_c_op = + handle_types->hi_c_op = mqs_find_type(image, "ompi_op_t", mqs_lang_c); handle_types->hi_c_request = i_info->ompi_request_t.type; handle_types->hi_c_status = i_info->ompi_status_public_t.type; - handle_types->hi_c_win = + handle_types->hi_c_win = mqs_find_type(image, "ompi_win_t", mqs_lang_c); /* MPI::Aint is a typedef to MPI_Aint */ @@ -287,19 +286,19 @@ int mpidbg_init_per_image(mqs_image *image, const mqs_image_callbacks *icb, mqs_find_type(image, "MPI::Win", mqs_lang_cplus); /* Tell the debuger what capabilities we have */ - mpidbg_comm_capabilities = - MPIDBG_COMM_CAP_BASIC | - MPIDBG_COMM_CAP_STRING_NAMES | - MPIDBG_COMM_CAP_FREED_HANDLE | + mpidbg_comm_capabilities = + MPIDBG_COMM_CAP_BASIC | + MPIDBG_COMM_CAP_STRING_NAMES | + MPIDBG_COMM_CAP_FREED_HANDLE | MPIDBG_COMM_CAP_FREED_OBJECT; mpidbg_errhandler_capabilities = - MPIDBG_ERRH_CAP_BASIC | - MPIDBG_ERRH_CAP_STRING_NAMES | - MPIDBG_ERRH_CAP_FREED_HANDLE | + MPIDBG_ERRH_CAP_BASIC | + MPIDBG_ERRH_CAP_STRING_NAMES | + MPIDBG_ERRH_CAP_FREED_HANDLE | MPIDBG_ERRH_CAP_FREED_OBJECT; - mpidbg_request_capabilities = + mpidbg_request_capabilities = MPIDBG_REQUEST_CAP_BASIC; - mpidbg_status_capabilities = + mpidbg_status_capabilities = MPIDBG_STATUS_CAP_BASIC; /* All done */ @@ -327,16 +326,16 @@ void mpidbg_finalize_per_image(mqs_image *image, mqs_image_info *info) * is attached to it, then TV will believe that this process has no * message queue information. */ -int mpidbg_init_per_process(mqs_process *process, +int mpidbg_init_per_process(mqs_process *process, const mqs_process_callbacks *pcb, struct mpidbg_handle_info_t *handle_types) -{ +{ mqs_image *image; mpi_image_info *i_info; /* Extract the addresses of the global variables we need and save them away */ - mpi_process_info *p_info = + mpi_process_info *p_info = (mpi_process_info *) mqs_malloc(sizeof(mpi_process_info)); printf("mpidbg_init_per_process\n"); @@ -437,7 +436,7 @@ void mpidbg_finalize_per_process(mqs_process *process, mqs_process_info *info) /*---------------------------------------------------------------------*/ -int mpidbg_comm_query(mqs_image *image, mqs_image_info *image_info, +int mpidbg_comm_query(mqs_image *image, mqs_image_info *image_info, mqs_process *process, mqs_process_info *process_info, mqs_taddr_t c_comm, struct mpidbg_comm_info_t **info) { @@ -462,12 +461,12 @@ int mpidbg_comm_query(mqs_image *image, mqs_image_info *image_info, MPIDBG_MAX_OBJECT_NAME, (*info)->comm_name); /* Get this process' rank in the comm */ - (*info)->comm_rank = ompi_fetch_int(process, + (*info)->comm_rank = ompi_fetch_int(process, c_comm + i_info->ompi_communicator_t.offset.c_my_rank, p_info); /* Analyze the flags on the comm */ - flags = ompi_fetch_int(process, + flags = ompi_fetch_int(process, c_comm + i_info->ompi_communicator_t.offset.c_flags, p_info); (*info)->comm_bitflags = 0; @@ -495,15 +494,15 @@ int mpidbg_comm_query(mqs_image *image, mqs_image_info *image_info, if (0 != (flags & OMPI_COMM_INVALID)) { (*info)->comm_bitflags |= MPIDBG_COMM_INFO_FREED_OBJECT; } - + /* Look up the local group */ - group = ompi_fetch_pointer(process, + group = ompi_fetch_pointer(process, c_comm + i_info->ompi_communicator_t.offset.c_local_group, p_info); - (*info)->comm_rank = ompi_fetch_int(process, + (*info)->comm_rank = ompi_fetch_int(process, group + i_info->ompi_group_t.offset.grp_my_rank, p_info); - (*info)->comm_num_local_procs = ompi_fetch_int(process, + (*info)->comm_num_local_procs = ompi_fetch_int(process, group + i_info->ompi_group_t.offset.grp_proc_count, p_info); @@ -517,10 +516,10 @@ int mpidbg_comm_query(mqs_image *image, mqs_image_info *image_info, /* Look up the remote group (if relevant) */ if (0 != (flags & OMPI_COMM_INTER)) { - group = ompi_fetch_pointer(process, + group = ompi_fetch_pointer(process, c_comm + i_info->ompi_communicator_t.offset.c_remote_group, p_info); - (*info)->comm_num_remote_procs = ompi_fetch_int(process, + (*info)->comm_num_remote_procs = ompi_fetch_int(process, group + i_info->ompi_group_t.offset.grp_proc_count, p_info); (*info)->comm_size = (*info)->comm_num_remote_procs; @@ -537,17 +536,17 @@ int mpidbg_comm_query(mqs_image *image, mqs_image_info *image_info, graph data is just slightly different from each other; it's [slightly] easier (and less confusing!) to have separate retrieval code blocks. */ - topo = ompi_fetch_pointer(process, + topo = ompi_fetch_pointer(process, c_comm + i_info->ompi_communicator_t.offset.c_topo, p_info); - if (0 != topo && + if (0 != topo && 0 != ((*info)->comm_bitflags & MPIDBG_COMM_INFO_CARTESIAN)) { int i, ndims, tmp; mqs_taddr_t dims, periods; /* Alloc space for copying arrays */ (*info)->comm_cart_num_dims = ndims = - ompi_fetch_int(process, + ompi_fetch_int(process, topo + i_info->mca_topo_base_module_t.offset.mtc.cart.ndims, p_info); (*info)->comm_cart_dims = mqs_malloc(ndims * sizeof(int)); @@ -568,13 +567,13 @@ int mpidbg_comm_query(mqs_image *image, mqs_image_info *image_info, /* Retrieve the dimension and periodic description data from the two arrays on the image's communicator */ - dims = ompi_fetch_pointer(process, + dims = ompi_fetch_pointer(process, topo + i_info->mca_topo_base_module_t.offset.mtc.cart.dims, p_info); - periods = ompi_fetch_pointer(process, + periods = ompi_fetch_pointer(process, topo + i_info->mca_topo_base_module_t.offset.mtc.cart.periods, p_info); - coords = ompi_fetch_pointer(process, + coords = ompi_fetch_pointer(process, topo + i_info->mca_topo_base_module_t.offset.mtc.cart.coords, p_info); @@ -591,8 +590,8 @@ int mpidbg_comm_query(mqs_image *image, mqs_image_info *image_info, mqs_taddr_t index, edges; /* Alloc space for copying the indexes */ - (*info)->comm_graph_num_nodes = nnodes = - ompi_fetch_int(process, + (*info)->comm_graph_num_nodes = nnodes = + ompi_fetch_int(process, topo + i_info->mca_topo_base_module_t.offset.mtc.graph.nnodes, p_info); (*info)->comm_graph_index = mqs_malloc(nnodes * sizeof(int)); @@ -601,11 +600,11 @@ int mpidbg_comm_query(mqs_image *image, mqs_image_info *image_info, } /* Retrieve the index data */ - index = ompi_fetch_pointer(process, + index = ompi_fetch_pointer(process, topo + i_info->mca_topo_base_module_t.offset.mtc.graph.index, p_info); for (i = 0; i < nnodes; ++i) { - (*info)->comm_graph_index[i] = + (*info)->comm_graph_index[i] = ompi_fetch_int(process, index + (sizeof(int) * i), p_info); } @@ -618,13 +617,13 @@ int mpidbg_comm_query(mqs_image *image, mqs_image_info *image_info, } /* Retrieve the edge data */ - edges = ompi_fetch_pointer(process, + edges = ompi_fetch_pointer(process, topo + i_info->mca_topo_base_module_t.offset.mtc.graph.edges, p_info); - for (i = 0; - i < (*info)->comm_graph_index[(*info)->comm_graph_num_nodes - 1]; + for (i = 0; + i < (*info)->comm_graph_index[(*info)->comm_graph_num_nodes - 1]; ++i) { - (*info)->comm_graph_edges[i] = + (*info)->comm_graph_edges[i] = ompi_fetch_int(process, edges + (sizeof(int) * i), p_info); } } else if (0 != topo && @@ -633,14 +632,14 @@ int mpidbg_comm_query(mqs_image *image, mqs_image_info *image_info, } /* Fortran handle */ - (*info)->comm_fortran_handle = - ompi_fetch_int(process, + (*info)->comm_fortran_handle = + ompi_fetch_int(process, c_comm + i_info->ompi_communicator_t.offset.c_f_to_c_index, p_info); printf("mpdbg: comm fortran handle: %d\n", (*info)->comm_fortran_handle); /* Fill in attributes */ - keyhash = ompi_fetch_pointer(process, + keyhash = ompi_fetch_pointer(process, c_comm + i_info->ompi_communicator_t.offset.c_keyhash, p_info); fill_attributes(&((*info)->comm_num_attrs), &((*info)->comm_attrs), @@ -657,7 +656,7 @@ int mpidbg_comm_query(mqs_image *image, mqs_image_info *image_info, return MPIDBG_SUCCESS; } -int mpidbg_comm_f2c(mqs_image *image, mqs_image_info *image_info, +int mpidbg_comm_f2c(mqs_image *image, mqs_image_info *image_info, mqs_process *process, mqs_process_info *process_info, mqs_taddr_t f77_comm, mqs_taddr_t *c_comm) { @@ -666,22 +665,22 @@ int mpidbg_comm_f2c(mqs_image *image, mqs_image_info *image_info, mpi_process_info *p_info = (mpi_process_info*) process_info; mqs_find_symbol(image, "ompi_mpi_communicators", &comm_list); - if (mqs_ok != ompi_fetch_opal_pointer_array_item(process, comm_list, + if (mqs_ok != ompi_fetch_opal_pointer_array_item(process, comm_list, p_info, f77_comm, c_comm) || NULL == c_comm) { - printf("mpidbg_comm_f2c: %lu -> not found\n", + printf("mpidbg_comm_f2c: %lu -> not found\n", (long unsigned int) f77_comm); return MPIDBG_ERR_NOT_FOUND; } - printf("mpidbg_comm_f2c: %lu -> %lu\n", + printf("mpidbg_comm_f2c: %lu -> %lu\n", (long unsigned int) f77_comm, (long unsigned int) c_comm); return MPIDBG_SUCCESS; } -int mpidbg_comm_cxx2c(mqs_image *image, mqs_image_info *image_info, +int mpidbg_comm_cxx2c(mqs_image *image, mqs_image_info *image_info, mqs_process *process, mqs_process_info *process_info, - mqs_taddr_t cxx_comm, + mqs_taddr_t cxx_comm, enum mpidbg_comm_info_bitmap_t comm_type, mqs_taddr_t *c_comm) { @@ -694,9 +693,9 @@ int mpidbg_comm_cxx2c(mqs_image *image, mqs_image_info *image_info, /*---------------------------------------------------------------------*/ -int mpidbg_errhandler_query(mqs_image *image, mqs_image_info *image_info, +int mpidbg_errhandler_query(mqs_image *image, mqs_image_info *image_info, mqs_process *process, mqs_process_info *process_info, - mqs_taddr_t c_errhandler, + mqs_taddr_t c_errhandler, struct mpidbg_errhandler_info_t **info) { printf("mpidbg_errhandler_query: %p\n", (void*) c_errhandler); @@ -704,7 +703,7 @@ int mpidbg_errhandler_query(mqs_image *image, mqs_image_info *image_info, return MPIDBG_ERR_NOT_FOUND; } -int mpidbg_errhandler_f2c(mqs_image *image, mqs_image_info *image_info, +int mpidbg_errhandler_f2c(mqs_image *image, mqs_image_info *image_info, mqs_process *process, mqs_process_info *process_info, mqs_taddr_t f77_errhandler, mqs_taddr_t *c_errhandler) { @@ -713,9 +712,9 @@ int mpidbg_errhandler_f2c(mqs_image *image, mqs_image_info *image_info, return MPIDBG_ERR_NOT_FOUND; } -int mpidbg_errhandler_cxx2c(mqs_image *image, mqs_image_info *image_info, +int mpidbg_errhandler_cxx2c(mqs_image *image, mqs_image_info *image_info, mqs_process *process, mqs_process_info *process_info, - mqs_taddr_t cxx_errhandler, + mqs_taddr_t cxx_errhandler, mqs_taddr_t *c_errhandler) { printf("mpidbg_errhandler_cxx2c: %p\n", (void*) cxx_errhandler); @@ -725,9 +724,9 @@ int mpidbg_errhandler_cxx2c(mqs_image *image, mqs_image_info *image_info, /*---------------------------------------------------------------------*/ -int mpidbg_request_query(mqs_image *image, mqs_image_info *image_info, +int mpidbg_request_query(mqs_image *image, mqs_image_info *image_info, mqs_process *process, mqs_process_info *process_info, - mqs_taddr_t c_request, + mqs_taddr_t c_request, struct mpidbg_request_info_t **info) { printf("mpidbg_request_query: %p\n", (void*) c_request); @@ -735,7 +734,7 @@ int mpidbg_request_query(mqs_image *image, mqs_image_info *image_info, return MPIDBG_ERR_NOT_FOUND; } -int mpidbg_request_f2c(mqs_image *image, mqs_image_info *image_info, +int mpidbg_request_f2c(mqs_image *image, mqs_image_info *image_info, mqs_process *process, mqs_process_info *process_info, mqs_taddr_t f77_request, mqs_taddr_t *c_request) { @@ -744,9 +743,9 @@ int mpidbg_request_f2c(mqs_image *image, mqs_image_info *image_info, return MPIDBG_ERR_NOT_FOUND; } -int mpidbg_request_cxx2c(mqs_image *image, mqs_image_info *image_info, +int mpidbg_request_cxx2c(mqs_image *image, mqs_image_info *image_info, mqs_process *process, mqs_process_info *process_info, - mqs_taddr_t cxx_request, + mqs_taddr_t cxx_request, enum mpidbg_request_info_bitmap_t request_type, mqs_taddr_t *c_request) { @@ -757,9 +756,9 @@ int mpidbg_request_cxx2c(mqs_image *image, mqs_image_info *image_info, /*---------------------------------------------------------------------*/ -int mpidbg_status_query(mqs_image *image, mqs_image_info *image_info, +int mpidbg_status_query(mqs_image *image, mqs_image_info *image_info, mqs_process *process, mqs_process_info *process_info, - mqs_taddr_t c_status, + mqs_taddr_t c_status, struct mpidbg_status_info_t **info) { printf("mpidbg_status_query: %p\n", (void*) c_status); @@ -767,7 +766,7 @@ int mpidbg_status_query(mqs_image *image, mqs_image_info *image_info, return MPIDBG_ERR_NOT_FOUND; } -int mpidbg_status_f2c(mqs_image *image, mqs_image_info *image_info, +int mpidbg_status_f2c(mqs_image *image, mqs_image_info *image_info, mqs_process *process, mqs_process_info *process_info, mqs_taddr_t f77_status, mqs_taddr_t *c_status) { @@ -776,9 +775,9 @@ int mpidbg_status_f2c(mqs_image *image, mqs_image_info *image_info, return MPIDBG_ERR_NOT_FOUND; } -int mpidbg_status_cxx2c(mqs_image *image, mqs_image_info *image_info, +int mpidbg_status_cxx2c(mqs_image *image, mqs_image_info *image_info, mqs_process *process, mqs_process_info *process_info, - mqs_taddr_t cxx_status, + mqs_taddr_t cxx_status, mqs_taddr_t *c_status) { printf("mpidbg_status_cxx2c: %p\n", (void*) cxx_status); diff --git a/ompi/debuggers/ompi_mpihandles_dll_defs.h b/ompi/debuggers/ompi_mpihandles_dll_defs.h index ff0f2240ccc..077a3a9c927 100644 --- a/ompi/debuggers/ompi_mpihandles_dll_defs.h +++ b/ompi/debuggers/ompi_mpihandles_dll_defs.h @@ -4,9 +4,9 @@ * of Tennessee Research Foundation. All rights * reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ diff --git a/ompi/debuggers/ompi_msgq_dll.c b/ompi/debuggers/ompi_msgq_dll.c index fabe6cb268a..130bf3f90a3 100644 --- a/ompi/debuggers/ompi_msgq_dll.c +++ b/ompi/debuggers/ompi_msgq_dll.c @@ -7,10 +7,13 @@ * Copyright (c) 2008-2009 Sun Microsystems, Inc. All rights reserved. * Copyright (c) 2015 Los Alamos National Security, LLC. All rights * reserved. + * Copyright (c) 2016 Intel, Inc. All rights reserved. + * Copyright (c) 2016 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -61,9 +64,9 @@ * Oct 27 1997 JHC: Created by exploding db_message_state_mpich.cxx */ -/* - The following was added by William Gropp to improve the portability - to systems with non-ANSI C compilers +/* + The following was added by William Gropp to improve the portability + to systems with non-ANSI C compilers */ #include "ompi_config.h" @@ -71,20 +74,16 @@ #ifdef HAVE_NO_C_CONST #define const #endif -#if defined(HAVE_STRING_H) #include -#endif /* defined(HAVE_STRING_H) */ -#if defined(HAVE_STDLIB_H) #include -#endif /* defined(HAVE_STDLIB_H) */ /* Notice to developers!!!! - * The following include files with _dbg.h suffixes contains definitions + * The following include files with _dbg.h suffixes contains definitions * that are shared between the debuggger plugins and the OMPI code base. * This is done instead of including the non-_dbg suffixed files because - * of the different way compilers may handle extern definitions. The - * particular case that is causing problems is when there is an extern - * variable or function that is accessed in a static inline function. + * of the different way compilers may handle extern definitions. The + * particular case that is causing problems is when there is an extern + * variable or function that is accessed in a static inline function. * For example, here is the code we often see in a header file. * * extern int request_complete; @@ -107,12 +106,12 @@ * * If this code exists it actually compiles fine however an undefined symbol * is kept for foo() and in the case of some tools that load in plugins with - * RTLD_NOW this undefined symbol causes the dlopen to fail since we do not + * RTLD_NOW this undefined symbol causes the dlopen to fail since we do not * have (nor really need) the supporting library containing foo(). * - * Therefore, to handle cases like the above with compilers that require the - * symbols (like Sun Studio) instead of pulling in all of OMPI into the - * plugins or defining dummy symbols here we separate the definitions used by + * Therefore, to handle cases like the above with compilers that require the + * symbols (like Sun Studio) instead of pulling in all of OMPI into the + * plugins or defining dummy symbols here we separate the definitions used by * both sets of code into the _dbg.h files. * * This means if one needs to add another definition that the plugins must see @@ -127,7 +126,7 @@ #include "msgq_interface.h" #include "ompi_msgq_dll_defs.h" -/* +/* End of inclusion */ @@ -189,11 +188,16 @@ int mqs_version_compatibility (void) return MQS_INTERFACE_COMPATIBILITY; } /* mqs_version_compatibility */ +static char mqs_version_str[OMPI_MAX_VER_SIZE]; + /* This one can say what you like */ char *mqs_version_string (void) { - return "Open MPI message queue support for parallel" - " debuggers compiled on " __DATE__; + int offset; + offset = snprintf(mqs_version_str, OMPI_MAX_VER_SIZE-1, + "Open MPI message queue support for parallel debuggers "); + ompi_get_lib_version(mqs_version_str+offset, OMPI_MAX_VER_SIZE-offset); + return mqs_version_str; } /* mqs_version_string */ /* So the debugger can tell what interface width the library was compiled with */ @@ -209,12 +213,12 @@ int mqs_dll_taddr_width (void) */ /**********************************************************************/ /* Translate a process number */ -static int translate (group_t *this, int index) -{ +static int translate (group_t *this, int index) +{ if (index == MQS_INVALID_PROCESS || ((unsigned int)index) >= ((unsigned int) this->entries)) return MQS_INVALID_PROCESS; - return this->local_to_global[index]; + return this->local_to_global[index]; } /* translate */ /**********************************************************************/ @@ -242,7 +246,7 @@ static group_t * find_or_create_group( mqs_process *proc, DEBUG(VERBOSE_COMM, ("Get a size for the communicator = %d\n", np)); return NULL; /* Makes no sense ! */ } - is_dense = + is_dense = ompi_fetch_int( proc, group_base + i_info->ompi_group_t.offset.grp_flags, p_info ); @@ -259,7 +263,7 @@ static group_t * find_or_create_group( mqs_process *proc, } } - /* Hmm, couldn't find one, so fetch it */ + /* Hmm, couldn't find one, so fetch it */ group = (group_t *)mqs_malloc (sizeof (group_t)); tr = (int *)mqs_malloc (np*sizeof(int)); trbuffer = (char *)mqs_malloc (np*sizeof(mqs_taddr_t)); @@ -267,7 +271,7 @@ static group_t * find_or_create_group( mqs_process *proc, group->group_base = group_base; DEBUG(VERBOSE_GROUP, ("Create a new group 0x%p with %d members\n", (void*)group, np) ); - + tablep = ompi_fetch_pointer( proc, group_base + i_info->ompi_group_t.offset.grp_proc_pointers, p_info); @@ -357,7 +361,7 @@ int mqs_setup_image (mqs_image *image, const mqs_image_callbacks *icb) i_info->extra = NULL; mqs_put_image_info (image, (mqs_image_info *)i_info); - + return mqs_ok; } /* mqs_setup_image */ @@ -406,7 +410,7 @@ int mqs_image_has_queues (mqs_image *image, char **message) * has no message queue information. */ int mqs_setup_process (mqs_process *process, const mqs_process_callbacks *pcb) -{ +{ /* Extract the addresses of the global variables we need and save them away */ mpi_process_info *p_info = (mpi_process_info *)mqs_malloc (sizeof (mpi_process_info)); @@ -445,46 +449,47 @@ int mqs_setup_process (mqs_process *process, const mqs_process_callbacks *pcb) { mqs_taddr_t typedefs_sizeof; - if(mqs_find_symbol (image, "MPIR_debug_typedefs_sizeof", &typedefs_sizeof) != mqs_ok) - return err_no_store; - p_info->sizes.short_size = ompi_fetch_int( process, /* sizeof (short) */ - typedefs_sizeof, - p_info ); - typedefs_sizeof += p_info->sizes.int_size; - p_info->sizes.int_size = ompi_fetch_int( process, /* sizeof (int) */ - typedefs_sizeof, - p_info ); - typedefs_sizeof += p_info->sizes.int_size; - p_info->sizes.long_size = ompi_fetch_int( process, /* sizeof (long) */ - typedefs_sizeof, - p_info ); - typedefs_sizeof += p_info->sizes.int_size; - p_info->sizes.long_long_size = ompi_fetch_int( process, /* sizeof (long long) */ - typedefs_sizeof, - p_info ); - typedefs_sizeof += p_info->sizes.int_size; - p_info->sizes.pointer_size = ompi_fetch_int( process, /* sizeof (void *) */ - typedefs_sizeof, - p_info ); - typedefs_sizeof += p_info->sizes.int_size; - p_info->sizes.bool_size = ompi_fetch_int( process, /* sizeof (bool) */ - typedefs_sizeof, - p_info ); - typedefs_sizeof += p_info->sizes.int_size; - p_info->sizes.size_t_size = ompi_fetch_int( process, /* sizeof (size_t) */ + if (mqs_find_symbol (image, "MPIR_debug_typedefs_sizeof", &typedefs_sizeof) != mqs_ok) { + return err_no_store; + } + p_info->sizes.short_size = ompi_fetch_int( process, /* sizeof (short) */ + typedefs_sizeof, + p_info ); + typedefs_sizeof += p_info->sizes.int_size; + p_info->sizes.int_size = ompi_fetch_int( process, /* sizeof (int) */ + typedefs_sizeof, + p_info ); + typedefs_sizeof += p_info->sizes.int_size; + p_info->sizes.long_size = ompi_fetch_int( process, /* sizeof (long) */ + typedefs_sizeof, + p_info ); + typedefs_sizeof += p_info->sizes.int_size; + p_info->sizes.long_long_size = ompi_fetch_int( process, /* sizeof (long long) */ typedefs_sizeof, p_info ); - DEBUG( VERBOSE_GENERAL, - ("sizes short = %d int = %d long = %d long long = %d " - "void* = %d bool = %d size_t = %d\n", - p_info->sizes.short_size, p_info->sizes.int_size, - p_info->sizes.long_size, p_info->sizes.long_long_size, - p_info->sizes.pointer_size, p_info->sizes.bool_size, - p_info->sizes.size_t_size) ); + typedefs_sizeof += p_info->sizes.int_size; + p_info->sizes.pointer_size = ompi_fetch_int( process, /* sizeof (void *) */ + typedefs_sizeof, + p_info ); + typedefs_sizeof += p_info->sizes.int_size; + p_info->sizes.bool_size = ompi_fetch_int( process, /* sizeof (bool) */ + typedefs_sizeof, + p_info ); + typedefs_sizeof += p_info->sizes.int_size; + p_info->sizes.size_t_size = ompi_fetch_int( process, /* sizeof (size_t) */ + typedefs_sizeof, + p_info ); + DEBUG( VERBOSE_GENERAL, + ("sizes short = %d int = %d long = %d long long = %d " + "void* = %d bool = %d size_t = %d\n", + p_info->sizes.short_size, p_info->sizes.int_size, + p_info->sizes.long_size, p_info->sizes.long_long_size, + p_info->sizes.pointer_size, p_info->sizes.bool_size, + p_info->sizes.size_t_size) ); } mqs_put_process_info (process, (mqs_process_info *)p_info); - + return mqs_ok; } return err_no_store; @@ -505,10 +510,10 @@ int mqs_process_has_queues (mqs_process *proc, char **msg) DEBUG(VERBOSE_GENERAL,("checking the status of the OMPI dll\n")); if (mqs_find_symbol (image, "ompi_mpi_communicators", &extra->commlist_base) != mqs_ok) return err_all_communicators; - + if (mqs_find_symbol (image, "mca_pml_base_send_requests", &extra->send_queue_base) != mqs_ok) return err_mpid_sends; - + if (mqs_find_symbol (image, "mca_pml_base_recv_requests", &extra->recv_queue_base) != mqs_ok) return err_mpid_recvs; DEBUG(VERBOSE_GENERAL,("process_has_queues returned success\n")); @@ -516,7 +521,7 @@ int mqs_process_has_queues (mqs_process *proc, char **msg) } /* mqs_process_has_queues */ /*********************************************************************** - * Check if the communicators have changed by looking at the + * Check if the communicators have changed by looking at the * pointer array values for lowest_free and number_free. */ static int communicators_changed (mqs_process *proc) @@ -580,7 +585,7 @@ static int compare_comms (const void *a, const void *b) } /* compare_comms */ /*********************************************************************** - * Rebuild our list of communicators because something has changed + * Rebuild our list of communicators because something has changed */ static int rebuild_communicator_list (mqs_process *proc) { @@ -637,7 +642,7 @@ static int rebuild_communicator_list (mqs_process *proc) (long long)comm_addr_base, (int)sizeof(mqs_taddr_t))); for( i = 0; (commcount < (comm_size - number_free)) && (i < comm_size); i++ ) { /* Get the communicator pointer */ - comm_ptr = + comm_ptr = ompi_fetch_pointer( proc, comm_addr_base + i * p_info->sizes.pointer_size, p_info ); @@ -970,7 +975,7 @@ static int opal_free_list_t_init_parser( mqs_process *proc, mpi_process_info *p_ (long long)active_allocation, (long long)position->upper_bound)); } position->current_item = active_allocation; - + /*opal_free_list_t_dump_position( position );*/ return mqs_ok; } @@ -1061,10 +1066,6 @@ static void dump_request( mqs_taddr_t current_item, mqs_pending_operation *res ) printf( "+===============================================+\n\n" ); } -/** - * TODO: ompi_request_completed can be used to detect any changes in the request handles. - */ - /** * Handle the send queue as well as the receive queue. The unexpected queue * is a whole different story ... @@ -1137,12 +1138,12 @@ static int fetch_request( mqs_process *proc, mpi_process_info *p_info, res->desired_local_rank = ompi_fetch_int( proc, current_item + i_info->mca_pml_base_request_t.offset.req_peer, p_info ); res->desired_global_rank = translate( extra->current_communicator->group, res->desired_local_rank ); - + res->buffer = ompi_fetch_pointer( proc, current_item + i_info->mca_pml_base_request_t.offset.req_addr, p_info ); /* Set this to true if it's a buffered request */ res->system_buffer = FALSE; - + /* The pointer to the request datatype */ ompi_datatype = ompi_fetch_pointer( proc, @@ -1210,7 +1211,7 @@ static int fetch_request( mqs_process *proc, mpi_process_info *p_info, /* If the length we're looking for is the count ... */ /*res->desired_length = ompi_fetch_int( proc, current_item + i_info->mca_pml_base_request_t.offset.req_count, p_info );*/ - + if( (mqs_st_pending < res->status) && (MCA_PML_REQUEST_SEND != req_type) ) { /* The real data from the status */ res->actual_length = ompi_fetch_size_t( proc, current_item + i_info->ompi_request_t.offset.req_status + @@ -1230,7 +1231,7 @@ static int fetch_request( mqs_process *proc, mpi_process_info *p_info, } /*********************************************************************** - * Setup to iterate over pending operations + * Setup to iterate over pending operations */ int mqs_setup_operation_iterator (mqs_process *proc, int op) { @@ -1259,7 +1260,7 @@ int mqs_setup_operation_iterator (mqs_process *proc, int op) } /* mqs_setup_operation_iterator */ /*********************************************************************** - * Fetch the next valid operation. + * Fetch the next valid operation. * Since Open MPI only maintains a single queue of each type of operation, * we have to run over it and filter out the operations which * match the active communicator. @@ -1297,11 +1298,11 @@ void mqs_destroy_process_info (mqs_process_info *mp_info) comm = extra->communicator_list; while (comm) { communicator_t *next = comm->next; - + if( NULL != comm->group ) group_decref (comm->group); /* Group is no longer referenced from here */ mqs_free (comm); - + comm = next; } if (NULL != extra) { @@ -1327,95 +1328,95 @@ char * mqs_dll_error_string (int errcode) switch (errcode) { case err_silent_failure: return ""; - case err_no_current_communicator: + case err_no_current_communicator: return "No current communicator in the communicator iterator"; - case err_bad_request: + case err_bad_request: return "Attempting to setup to iterate over an unknown queue of operations"; - case err_no_store: + case err_no_store: return "Unable to allocate store"; - case err_failed_qhdr: + case err_failed_qhdr: return "Failed to find type MPID_QHDR"; - case err_unexpected: + case err_unexpected: return "Failed to find field 'unexpected' in MPID_QHDR"; - case err_posted: + case err_posted: return "Failed to find field 'posted' in MPID_QHDR"; - case err_failed_queue: + case err_failed_queue: return "Failed to find type MPID_QUEUE"; - case err_first: + case err_first: return "Failed to find field 'first' in MPID_QUEUE"; - case err_context_id: + case err_context_id: return "Failed to find field 'context_id' in MPID_QEL"; - case err_tag: + case err_tag: return "Failed to find field 'tag' in MPID_QEL"; - case err_tagmask: + case err_tagmask: return "Failed to find field 'tagmask' in MPID_QEL"; - case err_lsrc: + case err_lsrc: return "Failed to find field 'lsrc' in MPID_QEL"; - case err_srcmask: + case err_srcmask: return "Failed to find field 'srcmask' in MPID_QEL"; - case err_next: + case err_next: return "Failed to find field 'next' in MPID_QEL"; - case err_ptr: + case err_ptr: return "Failed to find field 'ptr' in MPID_QEL"; - case err_missing_type: + case err_missing_type: return "Failed to find some type"; - case err_missing_symbol: + case err_missing_symbol: return "Failed to find field the global symbol"; - case err_db_shandle: + case err_db_shandle: return "Failed to find field 'db_shandle' in MPIR_SQEL"; - case err_db_comm: + case err_db_comm: return "Failed to find field 'db_comm' in MPIR_SQEL"; - case err_db_target: + case err_db_target: return "Failed to find field 'db_target' in MPIR_SQEL"; - case err_db_tag: + case err_db_tag: return "Failed to find field 'db_tag' in MPIR_SQEL"; - case err_db_data: + case err_db_data: return "Failed to find field 'db_data' in MPIR_SQEL"; - case err_db_byte_length: + case err_db_byte_length: return "Failed to find field 'db_byte_length' in MPIR_SQEL"; - case err_db_next: + case err_db_next: return "Failed to find field 'db_next' in MPIR_SQEL"; - case err_failed_rhandle: + case err_failed_rhandle: return "Failed to find type MPIR_RHANDLE"; - case err_is_complete: + case err_is_complete: return "Failed to find field 'is_complete' in MPIR_RHANDLE"; - case err_buf: + case err_buf: return "Failed to find field 'buf' in MPIR_RHANDLE"; - case err_len: + case err_len: return "Failed to find field 'len' in MPIR_RHANDLE"; - case err_s: + case err_s: return "Failed to find field 's' in MPIR_RHANDLE"; - case err_failed_status: + case err_failed_status: return "Failed to find type MPI_Status"; - case err_count: + case err_count: return "Failed to find field 'count' in MPIR_Status"; - case err_MPI_SOURCE: + case err_MPI_SOURCE: return "Failed to find field 'MPI_SOURCE' in MPIR_Status"; - case err_MPI_TAG: + case err_MPI_TAG: return "Failed to find field 'MPI_TAG' in MPIR_Status"; - case err_failed_commlist: + case err_failed_commlist: return "Failed to find type MPIR_Comm_list"; - case err_sequence_number: + case err_sequence_number: return "Failed to find field 'sequence_number' in MPIR_Comm_list"; - case err_comm_first: + case err_comm_first: return "Failed to find field 'comm_first' in MPIR_Comm_list"; - case err_failed_communicator: + case err_failed_communicator: return "Failed to find type MPIR_Communicator"; - case err_lrank_to_grank: + case err_lrank_to_grank: return "Failed to find field 'lrank_to_grank' in MPIR_Communicator"; - case err_send_context: + case err_send_context: return "Failed to find field 'send_context' in MPIR_Communicator"; - case err_recv_context: + case err_recv_context: return "Failed to find field 'recv_context' in MPIR_Communicator"; - case err_comm_next: + case err_comm_next: return "Failed to find field 'comm_next' in MPIR_Communicator"; - case err_comm_name: + case err_comm_name: return "Failed to find field 'comm_name' in MPIR_Communicator"; - case err_all_communicators: + case err_all_communicators: return "Failed to find the global symbol MPIR_All_communicators"; - case err_mpid_sends: + case err_mpid_sends: return "Failed to access the global send requests list"; - case err_mpid_recvs: + case err_mpid_recvs: return "Failed to access the global receive requests list"; case err_group_corrupt: return "Could not read a communicator's group from the process (probably a store corruption)"; diff --git a/ompi/debuggers/ompi_msgq_dll_defs.h b/ompi/debuggers/ompi_msgq_dll_defs.h index d9513e81107..af2d93349c4 100644 --- a/ompi/debuggers/ompi_msgq_dll_defs.h +++ b/ompi/debuggers/ompi_msgq_dll_defs.h @@ -7,9 +7,9 @@ * Copyright (c) 2015 Los Alamos National Security, LLC. All rights * reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -95,7 +95,7 @@ typedef struct { /* Information for a single process, a list of communicators, some * useful addresses, and the state of the iterators. */ -typedef struct +typedef struct { struct communicator_t *communicator_list; /* List of communicators in the process */ @@ -114,7 +114,7 @@ typedef struct int world_proc_array_entries; mqs_taddr_t* world_proc_array; - + mqs_opal_free_list_t_pos next_msg; /* And state for the message iterator */ mqs_op_class what; /* What queue are we looking on */ } mpi_process_info_extra; diff --git a/ompi/debuggers/predefined_gap_test.c b/ompi/debuggers/predefined_gap_test.c index 7a46dce3803..69eb1c1791b 100644 --- a/ompi/debuggers/predefined_gap_test.c +++ b/ompi/debuggers/predefined_gap_test.c @@ -1,4 +1,4 @@ -/* +/* * Copyright (c) 2009 Sun Microsystems, Inc All rights reserved. * Copyright (c) 2009-2013 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2012-2013 The University of Tennessee and The University @@ -6,9 +6,9 @@ * reserved. * Copyright (c) 2012-2013 Inria. All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -157,6 +157,6 @@ int main(int argc, char **argv) { GAP_CHECK("f_io_selected_component", test_file, f_io_selected_component, f_io_version, 1); GAP_CHECK("f_io_selected_module", test_file, f_io_selected_module, f_io_selected_component, 1); GAP_CHECK("f_io_selected_data", test_file, f_io_selected_data, f_io_selected_module, 1); - + return 0; } diff --git a/ompi/debuggers/predefined_pad_test.c b/ompi/debuggers/predefined_pad_test.c index b41e16ec17b..a3f724c1731 100644 --- a/ompi/debuggers/predefined_pad_test.c +++ b/ompi/debuggers/predefined_pad_test.c @@ -1,9 +1,9 @@ -/* +/* * Copyright (c) 2014 Cisco Systems, Inc. All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -51,7 +51,7 @@ static int errors = 0; } \ } while(0) -int main(int argc, char **argv) +int main(int argc, char **argv) { PAD_CHECK(communicator); PAD_CHECK(errhandler); @@ -71,6 +71,6 @@ int main(int argc, char **argv) fprintf(stderr, "NUMBER OF ERRORS: %d\n", errors); exit(1); } - + return 0; } diff --git a/ompi/debuggers/tv-debugger-attach.txt b/ompi/debuggers/tv-debugger-attach.txt index 5561019dcf9..ae5f0052410 100644 --- a/ompi/debuggers/tv-debugger-attach.txt +++ b/ompi/debuggers/tv-debugger-attach.txt @@ -76,7 +76,7 @@ typedef struct { extern MPIR_PROCDESC *MPIR_proctable; extern int MPIR_proctable_size; -/* Various global variables which a debugger can use for +/* Various global variables which a debugger can use for * 1) finding out what the state of the program is at * the time the magic breakpoint is hit. * 2) inform the process that it has been attached to and is @@ -88,9 +88,9 @@ extern char * MPIR_debug_abort_string; extern int MPIR_being_debugged; /* Cause extra info on internal state * to be maintained */ - + /* Values for the debug_state, this seems to be all we need at the moment - * but that may change... + * but that may change... */ #define MPIR_DEBUG_SPAWNED 1 #define MPIR_DEBUG_ABORTING 2 @@ -114,9 +114,9 @@ The named symbols looked for by TotalView are #define MPICH_dll_name "MPIR_dll_name" If the symbol MPIR_dll_name is present in the image, then it is -expected to be +expected to be -extern char [] MPIR_dll_name; +extern char [] MPIR_dll_name; and to contain a string which is the name of the message queue debugging library to use to debug this code. @@ -140,7 +140,7 @@ attached to a process to let it run. Totalview also needs the debug information for the MPIR_PROCDESC type, since it uses that to work out the size and fields in the procedesc -array. +array. If the symbol MPIR_i_am_starter appears in the program then TotalView treats it as a starter process which is not in the MPI world, @@ -170,16 +170,16 @@ in a specific communicator, or a specific rank process in COMM_WORLD). TotalView may choose to ignore this and acquire all processes, and its presence does not prevent TotalView from using the old protocol to acquire all of the processes. (Since setting the MPIR_debug_gate is -harmless). +harmless). All of the code that MPICH uses can be found in the MPICH source release, specifically in initutil.c and debugutil.c Here's a little more description of each of the variables TV -references or sets. +references or sets. MPIR_debug_state - Required. + Required. If we don't see this we won't know what the target process is trying to tell us by hitting the breakpoint, and we'll ignore it. Process acquisition will not work without this variable existing and @@ -193,7 +193,7 @@ MPIR_debug_gate attached to the process. MPIR_debug_abort_string - Not required. + Not required. Or rather, only required to get special handling of MPI_Abort. MPIR_i_am_starter @@ -210,13 +210,13 @@ MPIR_acquired_pre_main in the initially debugged process. MPIR_being_debugged - Not required. + Not required. We try to set this to (int)1 to let the target processes know that they're being debugged. If the symbol doesn't exist we won't write it and won't complain. MPIR_dll_name - Not required. + Not required. If it's not present we'll _only_ use the default name for the debug dll. (But if you don't have dlopen or message queue dumping, that certainly won't matter !) diff --git a/ompi/dpm/Makefile.am b/ompi/dpm/Makefile.am new file mode 100644 index 00000000000..8d66ff7d45d --- /dev/null +++ b/ompi/dpm/Makefile.am @@ -0,0 +1,19 @@ +# -*- makefile -*- +# +# Copyright (c) 2015 Intel, Inc. All rights reserved. +# Copyright (c) 2016 IBM Corporation. All rights reserved. +# $COPYRIGHT$ +# +# Additional copyrights may follow +# +# $HEADER$ +# + +# This makefile.am does not stand on its own - it is included from ompi/Makefile.am + +headers += \ + dpm/dpm.h + +lib@OMPI_LIBMPI_NAME@_la_SOURCES += \ + dpm/dpm.c + diff --git a/ompi/dpm/dpm.c b/ompi/dpm/dpm.c new file mode 100644 index 00000000000..7619e8a219f --- /dev/null +++ b/ompi/dpm/dpm.c @@ -0,0 +1,1246 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ +/* + * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2011 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2007-2015 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2006-2009 University of Houston. All rights reserved. + * Copyright (c) 2009 Sun Microsystems, Inc. All rights reserved. + * Copyright (c) 2011-2015 Los Alamos National Security, LLC. All rights + * reserved. + * Copyright (c) 2013-2015 Intel, Inc. All rights reserved + * Copyright (c) 2014-2016 Research Organization for Information Science + * and Technology (RIST). All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include "ompi_config.h" +#include "ompi/constants.h" + +#include +#include +#include +#include +#if HAVE_SYS_TIME_H +#include +#endif + +#include "opal/util/alfg.h" +#include "opal/util/argv.h" +#include "opal/util/opal_getcwd.h" +#include "opal/util/proc.h" +#include "opal/dss/dss.h" +#include "opal/mca/hwloc/base/base.h" +#include "opal/mca/pmix/pmix.h" +#include "opal/util/opal_environ.h" + +#include "ompi/communicator/communicator.h" +#include "ompi/group/group.h" +#include "ompi/proc/proc.h" +#include "ompi/mca/pml/pml.h" +#include "ompi/mca/rte/rte.h" +#include "ompi/info/info.h" + +#include "ompi/dpm/dpm.h" + +static opal_rng_buff_t rnd; + +typedef struct { + ompi_communicator_t *comm; + int size; + struct ompi_request_t **reqs; + int buf; +} ompi_dpm_disconnect_obj; +static int disconnect_waitall (int count, ompi_dpm_disconnect_obj **objs); +static ompi_dpm_disconnect_obj *disconnect_init(ompi_communicator_t *comm); + +typedef struct { + opal_list_item_t super; + ompi_proc_t *p; +} ompi_dpm_proct_caddy_t; +static OBJ_CLASS_INSTANCE(ompi_dpm_proct_caddy_t, + opal_list_item_t, + NULL, NULL); + +/* + * Init the module + */ +int ompi_dpm_init(void) +{ + time_t now; + + /* seed our random number generator */ + now = time(NULL); + if (!opal_srand(&rnd, now)) { + return OMPI_ERROR; + } + return OMPI_SUCCESS; +} + +int ompi_dpm_connect_accept(ompi_communicator_t *comm, int root, + const char *port_string, bool send_first, + ompi_communicator_t **newcomm) +{ + int k, size, rsize, rank, rc, rportlen=0; + char **members = NULL, *nstring, *rport=NULL; + bool dense, isnew; + opal_process_name_t pname; + opal_list_t ilist, mlist, rlist; + opal_value_t info; + opal_pmix_pdata_t pdat; + opal_namelist_t *nm; + opal_jobid_t jobid; + + ompi_communicator_t *newcomp=MPI_COMM_NULL; + ompi_proc_t *proc; + ompi_group_t *group=comm->c_local_group; + ompi_proc_t **proc_list=NULL, **new_proc_list = NULL; + int32_t i; + ompi_group_t *new_group_pointer; + ompi_dpm_proct_caddy_t *cd; + + if (NULL == opal_pmix.publish || NULL == opal_pmix.connect || + NULL == opal_pmix.unpublish || + (NULL == opal_pmix.lookup && NULL == opal_pmix.lookup_nb)) { + return OMPI_ERR_NOT_SUPPORTED; + } + + /* set default error return */ + *newcomm = MPI_COMM_NULL; + + size = ompi_comm_size ( comm ); + rank = ompi_comm_rank ( comm ); + + /* the "send_first" end will append ":connect" to the port name and publish + * the list of its participating procs on that key. The receiving root proc + * will append ":accept" to the port name and publish the list of its + * participants on that key. Each proc will then block waiting for lookup + * to complete on the other's key. Once that completes, the list of remote + * procs is used to complete construction of the intercommunicator. */ + + /* everyone constructs the list of members from their communicator */ + if (MPI_COMM_WORLD == comm) { + pname.jobid = OMPI_PROC_MY_NAME->jobid; + pname.vpid = OPAL_VPID_WILDCARD; + rc = opal_convert_process_name_to_string(&nstring, &pname); + if (OPAL_SUCCESS != rc) { + return OMPI_ERROR; + } + opal_argv_append_nosize(&members, nstring); + free(nstring); + /* have to add the number of procs in the job so the remote side + * can correctly add the procs by computing their names, and our nspace + * so they can update their records */ + if (NULL == (nstring = (char*)opal_pmix.get_nspace(OMPI_PROC_MY_NAME->jobid))) { + opal_argv_free(members); + return OMPI_ERR_NOT_SUPPORTED; + } + opal_argv_append_nosize(&members, nstring); + (void)asprintf(&nstring, "%d", size); + opal_argv_append_nosize(&members, nstring); + free(nstring); + } else { + if (OMPI_GROUP_IS_DENSE(group)) { + proc_list = group->grp_proc_pointers; + dense = true; + } else { + proc_list = (ompi_proc_t**)calloc(group->grp_proc_count, + sizeof(ompi_proc_t *)); + for (i=0 ; igrp_proc_count ; i++) { + if (NULL == (proc_list[i] = ompi_group_peer_lookup(group,i))) { + ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND); + rc = ORTE_ERR_NOT_FOUND; + free(proc_list); + goto exit; + } + } + dense = false; + } + for (i=0; i < size; i++) { + opal_process_name_t proc_name; + if (ompi_proc_is_sentinel (proc_list[i])) { + proc_name = ompi_proc_sentinel_to_name ((uintptr_t) proc_list[i]); + } else { + proc_name = proc_list[i]->super.proc_name; + } + rc = opal_convert_process_name_to_string(&nstring, &proc_name); + if (OPAL_SUCCESS != rc) { + if (!dense) { + free(proc_list); + proc_list = NULL; + } + return OMPI_ERROR; + } + opal_argv_append_nosize(&members, nstring); + free(nstring); + if (NULL == (nstring = (char*)opal_pmix.get_nspace(proc_name.jobid))) { + opal_argv_free(members); + return OMPI_ERR_NOT_SUPPORTED; + } + opal_argv_append_nosize(&members, nstring); + } + if (!dense) { + free(proc_list); + proc_list = NULL; + } + } + + if (rank == root) { + /* the roots for each side exchange their list of participants */ + OBJ_CONSTRUCT(&info, opal_value_t); + OBJ_CONSTRUCT(&pdat, opal_pmix_pdata_t); + if (send_first) { + (void)asprintf(&info.key, "%s:connect", port_string); + (void)asprintf(&pdat.value.key, "%s:accept", port_string); + } else { + (void)asprintf(&info.key, "%s:accept", port_string); + (void)asprintf(&pdat.value.key, "%s:connect", port_string); + } + info.type = OPAL_STRING; + info.data.string = opal_argv_join(members, ':'); + pdat.value.type = OPAL_STRING; + + OPAL_PMIX_EXCHANGE(rc, &info, &pdat, 60); + OBJ_DESTRUCT(&info); + if (OPAL_SUCCESS != rc) { + OBJ_DESTRUCT(&pdat); + return rc; + } + + /* save the result */ + rport = strdup(pdat.value.data.string); // need this later + rportlen = strlen(rport) + 1; // retain the NULL terminator + OBJ_DESTRUCT(&pdat); + } + + /* if we aren't in a comm_spawn, the non-root members won't have + * the port_string - so let's make sure everyone knows the other + * side's participants */ + + /* bcast the list-length to all processes in the local comm */ + rc = comm->c_coll.coll_bcast(&rportlen, 1, MPI_INT, root, comm, + comm->c_coll.coll_bcast_module); + if (OMPI_SUCCESS != rc) { + free(rport); + goto exit; + } + + if (rank != root) { + /* non root processes need to allocate the buffer manually */ + rport = (char*)malloc(rportlen); + if (NULL == rport) { + rc = OMPI_ERR_OUT_OF_RESOURCE; + goto exit; + } + } + /* now share the list of remote participants */ + rc = comm->c_coll.coll_bcast(rport, rportlen, MPI_BYTE, root, comm, + comm->c_coll.coll_bcast_module); + if (OMPI_SUCCESS != rc) { + free(rport); + goto exit; + } + + /* initiate a list of participants for the connect, + * starting with our own members */ + OBJ_CONSTRUCT(&mlist, opal_list_t); + for (i=0; NULL != members[i]; i++) { + nm = OBJ_NEW(opal_namelist_t); + if (OPAL_SUCCESS != (rc = opal_convert_string_to_process_name(&nm->name, members[i]))) { + OMPI_ERROR_LOG(rc); + opal_argv_free(members); + free(rport); + OPAL_LIST_DESTRUCT(&mlist); + goto exit; + } + /* step over the nspace */ + ++i; + if (NULL == members[i]) { + /* this shouldn't happen and is an error */ + OMPI_ERROR_LOG(OMPI_ERR_BAD_PARAM); + OPAL_LIST_DESTRUCT(&mlist); + opal_argv_free(members); + free(rport); + rc = OMPI_ERR_BAD_PARAM; + goto exit; + } + /* if the rank is wildcard, then we need to add all procs + * in that job to the list */ + if (OPAL_VPID_WILDCARD == nm->name.vpid) { + jobid = nm->name.jobid; + OBJ_RELEASE(nm); + for (k=0; k < size; k++) { + nm = OBJ_NEW(opal_namelist_t); + nm->name.jobid = jobid; + nm->name.vpid = k; + opal_list_append(&mlist, &nm->super); + } + /* now step over the size */ + if (NULL == members[i+1]) { + /* this shouldn't happen and is an error */ + OMPI_ERROR_LOG(OMPI_ERR_BAD_PARAM); + OPAL_LIST_DESTRUCT(&mlist); + opal_argv_free(members); + free(rport); + rc = OMPI_ERR_BAD_PARAM; + goto exit; + } + ++i; + } else { + opal_list_append(&mlist, &nm->super); + } + } + opal_argv_free(members); + members = NULL; + + /* rport contains a colon-delimited list + * of process names for the remote procs - convert it + * into an argv array */ + members = opal_argv_split(rport, ':'); + free(rport); + + /* add the list of remote procs to our list, and + * keep a list of them for later */ + OBJ_CONSTRUCT(&ilist, opal_list_t); + OBJ_CONSTRUCT(&rlist, opal_list_t); + + for (i=0; NULL != members[i]; i++) { + nm = OBJ_NEW(opal_namelist_t); + if (OPAL_SUCCESS != (rc = opal_convert_string_to_process_name(&nm->name, members[i]))) { + OMPI_ERROR_LOG(rc); + opal_argv_free(members); + OPAL_LIST_DESTRUCT(&ilist); + OPAL_LIST_DESTRUCT(&rlist); + goto exit; + } + /* next entry is the nspace - register it */ + ++i; + if (NULL == members[i]) { + OMPI_ERROR_LOG(OMPI_ERR_NOT_SUPPORTED); + opal_argv_free(members); + OPAL_LIST_DESTRUCT(&ilist); + OPAL_LIST_DESTRUCT(&rlist); + goto exit; + } + opal_pmix.register_jobid(nm->name.jobid, members[i]); + if (OPAL_VPID_WILDCARD == nm->name.vpid) { + jobid = nm->name.jobid; + OBJ_RELEASE(nm); + /* if the vpid is wildcard, then we are including all ranks + * of that job, and the next entry in members should be the + * number of procs in the job */ + if (NULL == members[i+1]) { + /* just protect against the error */ + OMPI_ERROR_LOG(OMPI_ERR_BAD_PARAM); + opal_argv_free(members); + OPAL_LIST_DESTRUCT(&ilist); + OPAL_LIST_DESTRUCT(&rlist); + rc = OMPI_ERR_BAD_PARAM; + goto exit; + } + rsize = strtoul(members[i+1], NULL, 10); + ++i; + for (k=0; k < rsize; k++) { + nm = OBJ_NEW(opal_namelist_t); + nm->name.jobid = jobid; + nm->name.vpid = k; + opal_list_append(&mlist, &nm->super); + /* see if this needs to be added to our ompi_proc_t array */ + proc = ompi_proc_find_and_add(&nm->name, &isnew); + if (isnew) { + cd = OBJ_NEW(ompi_dpm_proct_caddy_t); + cd->p = proc; + opal_list_append(&ilist, &cd->super); + } + /* either way, add to the remote list */ + cd = OBJ_NEW(ompi_dpm_proct_caddy_t); + cd->p = proc; + opal_list_append(&rlist, &cd->super); + } + } else { + opal_list_append(&mlist, &nm->super); + /* see if this needs to be added to our ompi_proc_t array */ + proc = ompi_proc_find_and_add(&nm->name, &isnew); + if (isnew) { + cd = OBJ_NEW(ompi_dpm_proct_caddy_t); + cd->p = proc; + opal_list_append(&ilist, &cd->super); + } + /* either way, add to the remote list */ + cd = OBJ_NEW(ompi_dpm_proct_caddy_t); + cd->p = proc; + opal_list_append(&rlist, &cd->super); + } + } + opal_argv_free(members); + + /* tell the host RTE to connect us - this will download + * all known data for the nspace's of participating procs + * so that add_procs will not result in a slew of lookups */ + rc = opal_pmix.connect(&mlist); + OPAL_LIST_DESTRUCT(&mlist); + if (OPAL_SUCCESS != rc) { + OMPI_ERROR_LOG(rc); + OPAL_LIST_DESTRUCT(&ilist); + OPAL_LIST_DESTRUCT(&rlist); + goto exit; + } + if (0 < opal_list_get_size(&ilist)) { + /* convert the list of new procs to a proc_t array */ + new_proc_list = (ompi_proc_t**)calloc(opal_list_get_size(&ilist), + sizeof(ompi_proc_t *)); + i = 0; + OPAL_LIST_FOREACH(cd, &ilist, ompi_dpm_proct_caddy_t) { + opal_value_t *kv; + proc = cd->p; + new_proc_list[i] = proc ; + /* ompi_proc_complete_init_single() initializes and optionally retrieves + * OPAL_PMIX_LOCALITY and OPAL_PMIX_HOSTNAME. since we can live without + * them, we are just fine */ + ompi_proc_complete_init_single(proc); + /* save the locality for later */ + kv = OBJ_NEW(opal_value_t); + kv->key = strdup(OPAL_PMIX_LOCALITY); + kv->type = OPAL_UINT16; + kv->data.uint16 = proc->super.proc_flags; + opal_pmix.store_local(&proc->super.proc_name, kv); + OBJ_RELEASE(kv); // maintain accounting + ++i; + } + /* call add_procs on the new ones */ + rc = MCA_PML_CALL(add_procs(new_proc_list, opal_list_get_size(&ilist))); + free(new_proc_list); + new_proc_list = NULL; + if (OMPI_SUCCESS != rc) { + OMPI_ERROR_LOG(rc); + OPAL_LIST_DESTRUCT(&ilist); + goto exit; + } + } + OPAL_LIST_DESTRUCT(&ilist); + + /* now deal with the remote group */ + rsize = opal_list_get_size(&rlist); + new_group_pointer=ompi_group_allocate(rsize); + if (NULL == new_group_pointer) { + rc = OMPI_ERR_OUT_OF_RESOURCE; + OPAL_LIST_DESTRUCT(&rlist); + goto exit; + } + /* assign group elements */ + i=0; + OPAL_LIST_FOREACH(cd, &rlist, ompi_dpm_proct_caddy_t) { + new_group_pointer->grp_proc_pointers[i++] = cd->p; + /* retain the proc */ + OBJ_RETAIN(cd->p); + } + OPAL_LIST_DESTRUCT(&rlist); + + /* set up communicator structure */ + rc = ompi_comm_set ( &newcomp, /* new comm */ + comm, /* old comm */ + group->grp_proc_count, /* local_size */ + NULL, /* local_procs */ + rsize, /* remote_size */ + NULL , /* remote_procs */ + NULL, /* attrs */ + comm->error_handler, /* error handler */ + NULL, /* topo component */ + group, /* local group */ + new_group_pointer /* remote group */ + ); + if ( NULL == newcomp ) { + rc = OMPI_ERR_OUT_OF_RESOURCE; + goto exit; + } + + OBJ_RELEASE(new_group_pointer); + new_group_pointer = MPI_GROUP_NULL; + + /* allocate comm_cid */ + rc = ompi_comm_nextcid ( newcomp, /* new communicator */ + comm, /* old communicator */ + NULL, /* bridge comm */ + &root, /* local leader */ + (void*)port_string, /* rendezvous point */ + send_first, /* send or recv first */ + OMPI_COMM_CID_INTRA_PMIX); /* mode */ + if (OMPI_SUCCESS != rc) { + goto exit; + } + + /* activate comm and init coll-component */ + rc = ompi_comm_activate ( &newcomp, /* new communicator */ + comm, /* old communicator */ + NULL, /* bridge comm */ + &root, /* local leader */ + (void*)port_string, /* rendezvous point */ + send_first, /* send or recv first */ + OMPI_COMM_CID_INTRA_PMIX); /* mode */ + if (OMPI_SUCCESS != rc) { + goto exit; + } + + /* Question: do we have to re-start some low level stuff + to enable the usage of fast communication devices + between the two worlds ? + */ + + exit: + if (OMPI_SUCCESS != rc) { + if (MPI_COMM_NULL != newcomp && NULL != newcomp) { + OBJ_RELEASE(newcomp); + newcomp = MPI_COMM_NULL; + } + } + + *newcomm = newcomp; + return rc; +} + +static int construct_peers(ompi_group_t *group, opal_list_t *peers) +{ + int i; + opal_namelist_t *nm, *n2; + ompi_proc_t *proct; + opal_process_name_t proc_name; + + for (i=0; i < group->grp_proc_count; i++) { + if (OMPI_GROUP_IS_DENSE(group)) { + proct = group->grp_proc_pointers[i]; + } else { + proct = ompi_group_peer_lookup(group, i); + } + if (NULL == proct) { + OMPI_ERROR_LOG(OMPI_ERR_NOT_FOUND); + return OMPI_ERR_NOT_FOUND; + } + if (ompi_proc_is_sentinel (proct)) { + proc_name = ompi_proc_sentinel_to_name ((uintptr_t)proct); + } else { + proc_name = proct->super.proc_name; + } + + /* add to the list of peers */ + nm = OBJ_NEW(opal_namelist_t); + nm->name = proc_name; + /* need to maintain an ordered list to ensure the tracker signatures + * match across all procs */ + OPAL_LIST_FOREACH(n2, peers, opal_namelist_t) { + if (opal_compare_proc(nm->name, n2->name) < 0) { + opal_list_insert_pos(peers, &n2->super, &nm->super); + nm = NULL; + break; + } + } + if (NULL != nm) { + /* append to the end */ + opal_list_append(peers, &nm->super); + } + } + return OMPI_SUCCESS; +} + +int ompi_dpm_disconnect(ompi_communicator_t *comm) +{ + int ret; + ompi_group_t *group; + opal_list_t coll; + + /* Note that we explicitly use an RTE-based barrier (vs. an MPI + barrier). See a lengthy comment in + ompi/runtime/ompi_mpi_finalize.c for a much more detailed + rationale. */ + + /* setup the collective */ + OBJ_CONSTRUCT(&coll, opal_list_t); + /* RHC: assuming for now that this must flow across all + * local and remote group members */ + group = comm->c_local_group; + if (OMPI_SUCCESS != (ret = construct_peers(group, &coll))) { + OMPI_ERROR_LOG(ret); + OPAL_LIST_DESTRUCT(&coll); + return ret; + } + /* do the same for the remote group */ + group = comm->c_remote_group; + if (OMPI_SUCCESS != (ret = construct_peers(group, &coll))) { + OMPI_ERROR_LOG(ret); + OPAL_LIST_DESTRUCT(&coll); + return ret; + } + + /* ensure we tell the host RM to disconnect us - this + * is a blocking operation that must include a fence */ + if (NULL == opal_pmix.disconnect) { + /* use the fence */ + ret = opal_pmix.fence(&coll, false); + } else { + ret = opal_pmix.disconnect(&coll); + } + OPAL_LIST_DESTRUCT(&coll); + + return ret; +} + +int ompi_dpm_spawn(int count, const char *array_of_commands[], + char **array_of_argv[], + const int array_of_maxprocs[], + const MPI_Info array_of_info[], + const char *port_name) +{ + int rc, i, j; + int have_wdir=0; + int flag=0; + char cwd[OPAL_PATH_MAX]; + char host[OPAL_MAX_INFO_VAL]; /*** should define OMPI_HOST_MAX ***/ + char prefix[OPAL_MAX_INFO_VAL]; + char stdin_target[OPAL_MAX_INFO_VAL]; + char params[OPAL_MAX_INFO_VAL]; + char mapper[OPAL_MAX_INFO_VAL]; + char slot_list[OPAL_MAX_INFO_VAL]; + uint32_t ui32; + bool personality = false; + opal_jobid_t jobid; + + opal_list_t apps; + opal_list_t job_info; + opal_pmix_app_t *app; + opal_value_t *info; + bool local_spawn, non_mpi; + char **envars; + + /* parse the info object */ + /* check potentially for: + - "host": desired host where to spawn the processes + - "hostfile": hostfile containing hosts where procs are + to be spawned + - "add-host": add the specified hosts to the known list + of available resources and spawn these + procs on them + - "add-hostfile": add the hosts in the hostfile to the + known list of available resources and spawn + these procs on them + - "env": a newline-delimited list of envar values to be + placed into the app's environment (of form "foo=bar") + - "ompi_prefix": the path to the root of the directory tree where ompi + executables and libraries can be found on all nodes + used to spawn these procs + - "arch": desired architecture + - "wdir": directory, where executable can be found + - "path": list of directories where to look for the executable + - "file": filename, where additional information is provided. + - "soft": see page 92 of MPI-2. + - "mapper": indicate the mapper to be used for the job + - "display_map": display the map of the spawned job + - "npernode": number of procs/node to spawn + - "pernode": spawn one proc/node + - "ppr": spawn specified number of procs per specified object + - "map_by": specify object by which the procs should be mapped + - "rank_by": specify object by which the procs should be ranked + - "bind_to": specify object to which the procs should be bound + - "ompi_preload_binary": move binaries to nodes prior to execution + - "ompi_preload_files": move specified files to nodes prior to execution + - "ompi_non_mpi": spawned job will not call MPI_Init + - "ompi_param": list of MCA params to be in the spawned job's environment + - "env": newline (\n) delimited list of envar values to be passed to spawned procs + */ + + /* setup the job object */ + OBJ_CONSTRUCT(&job_info, opal_list_t); + OBJ_CONSTRUCT(&apps, opal_list_t); + + /* Convert the list of commands to list of opal_pmix_app_t */ + for (i = 0; i < count; ++i) { + app = OBJ_NEW(opal_pmix_app_t); + if (NULL == app) { + ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); + OPAL_LIST_DESTRUCT(&apps); + opal_progress_event_users_decrement(); + return ORTE_ERR_OUT_OF_RESOURCE; + } + /* add the app to the job data */ + opal_list_append(&apps, &app->super); + + /* copy over the name of the executable */ + app->cmd = strdup(array_of_commands[i]); + opal_argv_append(&app->argc, &app->argv, app->cmd); + + /* record the number of procs to be generated */ + app->maxprocs = array_of_maxprocs[i]; + + /* copy over the argv array */ + if (MPI_ARGVS_NULL != array_of_argv && + MPI_ARGV_NULL != array_of_argv[i]) { + for (j=0; NULL != array_of_argv[i][j]; j++) { + opal_argv_append(&app->argc, &app->argv, array_of_argv[i][j]); + } + } + + /* Add environment variable with the contact information for the + child processes. + */ + opal_setenv("OMPI_PARENT_PORT", port_name, true, &app->env); + for (j = 0; NULL != environ[j]; ++j) { + if (0 == strncmp(OPAL_MCA_PREFIX, environ[j], strlen(OPAL_MCA_PREFIX))) { + opal_argv_append_nosize(&app->env, environ[j]); + } + } + + /* Check for well-known info keys */ + have_wdir = 0; + if ( array_of_info != NULL && array_of_info[i] != MPI_INFO_NULL ) { + + /* check for personality - this is a job-level key */ + ompi_info_get (array_of_info[i], "personality", sizeof(host) - 1, host, &flag); + if ( flag ) { + personality = true; + info = OBJ_NEW(opal_value_t); + info->key = strdup(OPAL_PMIX_PERSONALITY); + opal_value_load(info, host, OPAL_STRING); + opal_list_append(&job_info, &info->super); + } + + /* check for 'host' */ + ompi_info_get (array_of_info[i], "host", sizeof(host) - 1, host, &flag); + if ( flag ) { + info = OBJ_NEW(opal_value_t); + info->key = strdup(OPAL_PMIX_HOST); + opal_value_load(info, host, OPAL_STRING); + opal_list_append(&app->info, &info->super); + } + + /* check for 'hostfile' */ + ompi_info_get (array_of_info[i], "hostfile", sizeof(host) - 1, host, &flag); + if ( flag ) { + info = OBJ_NEW(opal_value_t); + info->key = strdup(OPAL_PMIX_HOSTFILE); + opal_value_load(info, host, OPAL_STRING); + opal_list_append(&app->info, &info->super); + } + + /* check for 'add-hostfile' */ + ompi_info_get (array_of_info[i], "add-hostfile", sizeof(host) - 1, host, &flag); + if ( flag ) { + info = OBJ_NEW(opal_value_t); + info->key = strdup(OPAL_PMIX_ADD_HOSTFILE); + opal_value_load(info, host, OPAL_STRING); + opal_list_append(&app->info, &info->super); + } + + /* check for 'add-host' */ + ompi_info_get (array_of_info[i], "add-host", sizeof(host) - 1, host, &flag); + if ( flag ) { + info = OBJ_NEW(opal_value_t); + info->key = strdup(OPAL_PMIX_ADD_HOST); + opal_value_load(info, host, OPAL_STRING); + opal_list_append(&app->info, &info->super); + } + + /* check for env */ + ompi_info_get (array_of_info[i], "env", sizeof(host)-1, host, &flag); + if ( flag ) { + envars = opal_argv_split(host, '\n'); + for (j=0; NULL != envars[j]; j++) { + opal_argv_append_nosize(&app->env, envars[j]); + } + opal_argv_free(envars); + } + + /* 'path', 'arch', 'file', 'soft' -- to be implemented */ + + /* check for 'ompi_prefix' (OMPI-specific -- to effect the same + * behavior as --prefix option to orterun) + * + * This is a job-level key + */ + ompi_info_get (array_of_info[i], "ompi_prefix", sizeof(prefix) - 1, prefix, &flag); + if ( flag ) { + info = OBJ_NEW(opal_value_t); + info->key = strdup(OPAL_PMIX_PREFIX); + opal_value_load(info, prefix, OPAL_STRING); + opal_list_append(&job_info, &info->super); + } + + /* check for 'wdir' */ + ompi_info_get (array_of_info[i], "wdir", sizeof(cwd) - 1, cwd, &flag); + if ( flag ) { + info = OBJ_NEW(opal_value_t); + info->key = strdup(OPAL_PMIX_WDIR); + opal_value_load(info, cwd, OPAL_STRING); + opal_list_append(&app->info, &info->super); + have_wdir = 1; + } + + /* check for 'mapper' - a job-level key */ + ompi_info_get(array_of_info[i], "mapper", sizeof(mapper) - 1, mapper, &flag); + if ( flag ) { + info = OBJ_NEW(opal_value_t); + info->key = strdup(OPAL_PMIX_MAPPER); + opal_value_load(info, mapper, OPAL_STRING); + opal_list_append(&job_info, &info->super); + } + + /* check for 'display_map' - a job-level key */ + ompi_info_get_bool(array_of_info[i], "display_map", &local_spawn, &flag); + if ( flag ) { + info = OBJ_NEW(opal_value_t); + info->key = strdup(OPAL_PMIX_DISPLAY_MAP); + opal_value_load(info, &local_spawn, OPAL_BOOL); + opal_list_append(&job_info, &info->super); + } + + /* check for 'npernode' and 'ppr' - job-level key */ + ompi_info_get (array_of_info[i], "npernode", sizeof(slot_list) - 1, slot_list, &flag); + if ( flag ) { + info = OBJ_NEW(opal_value_t); + info->key = strdup(OPAL_PMIX_PPR); + info->type = OPAL_STRING; + (void)asprintf(&(info->data.string), "%s:n", slot_list); + opal_list_append(&job_info, &info->super); + } + ompi_info_get (array_of_info[i], "pernode", sizeof(slot_list) - 1, slot_list, &flag); + if ( flag ) { + info = OBJ_NEW(opal_value_t); + info->key = strdup(OPAL_PMIX_PPR); + opal_value_load(info, "1:n", OPAL_STRING); + opal_list_append(&job_info, &info->super); + } + ompi_info_get (array_of_info[i], "ppr", sizeof(slot_list) - 1, slot_list, &flag); + if ( flag ) { + info = OBJ_NEW(opal_value_t); + info->key = strdup(OPAL_PMIX_PPR); + opal_value_load(info, slot_list, OPAL_STRING); + opal_list_append(&job_info, &info->super); + } + + /* check for 'map_by' - job-level key */ + ompi_info_get(array_of_info[i], "map_by", sizeof(slot_list) - 1, slot_list, &flag); + if ( flag ) { + info = OBJ_NEW(opal_value_t); + info->key = strdup(OPAL_PMIX_MAPBY); + opal_value_load(info, slot_list, OPAL_STRING); + opal_list_append(&job_info, &info->super); + } + + /* check for 'rank_by' - job-level key */ + ompi_info_get(array_of_info[i], "rank_by", sizeof(slot_list) - 1, slot_list, &flag); + if ( flag ) { + info = OBJ_NEW(opal_value_t); + info->key = strdup(OPAL_PMIX_RANKBY); + opal_value_load(info, slot_list, OPAL_STRING); + opal_list_append(&job_info, &info->super); + } + + /* check for 'bind_to' - job-level key */ + ompi_info_get(array_of_info[i], "bind_to", sizeof(slot_list) - 1, slot_list, &flag); + if ( flag ) { + info = OBJ_NEW(opal_value_t); + info->key = strdup(OPAL_PMIX_BINDTO); + opal_value_load(info, slot_list, OPAL_STRING); + opal_list_append(&job_info, &info->super); + } + + /* check for 'preload_binary' - job-level key */ + ompi_info_get_bool(array_of_info[i], "ompi_preload_binary", &local_spawn, &flag); + if ( flag ) { + info = OBJ_NEW(opal_value_t); + info->key = strdup(OPAL_PMIX_PRELOAD_BIN); + opal_value_load(info, &local_spawn, OPAL_BOOL); + opal_list_append(&job_info, &info->super); + } + + /* check for 'preload_files' - job-level key */ + ompi_info_get (array_of_info[i], "ompi_preload_files", sizeof(cwd) - 1, cwd, &flag); + if ( flag ) { + info = OBJ_NEW(opal_value_t); + info->key = strdup(OPAL_PMIX_PRELOAD_FILES); + opal_value_load(info, cwd, OPAL_STRING); + opal_list_append(&job_info, &info->super); + } + + /* see if this is a non-mpi job - if so, then set the flag so ORTE + * knows what to do - job-level key + */ + ompi_info_get_bool(array_of_info[i], "ompi_non_mpi", &non_mpi, &flag); + if (flag && non_mpi) { + info = OBJ_NEW(opal_value_t); + info->key = strdup(OPAL_PMIX_NON_PMI); + opal_value_load(info, &non_mpi, OPAL_BOOL); + opal_list_append(&job_info, &info->super); + } + + /* see if this is an MCA param that the user wants applied to the child job */ + ompi_info_get (array_of_info[i], "ompi_param", sizeof(params) - 1, params, &flag); + if ( flag ) { + opal_argv_append_unique_nosize(&app->env, params, true); + } + + /* see if user specified what to do with stdin - defaults to + * not forwarding stdin to child processes - job-level key + */ + ompi_info_get (array_of_info[i], "ompi_stdin_target", sizeof(stdin_target) - 1, stdin_target, &flag); + if ( flag ) { + if (0 == strcmp(stdin_target, "all")) { + ui32 = ORTE_VPID_WILDCARD; + } else if (0 == strcmp(stdin_target, "none")) { + ui32 = ORTE_VPID_INVALID; + } else { + ui32 = strtoul(stdin_target, NULL, 10); + } + info = OBJ_NEW(opal_value_t); + info->key = strdup(OPAL_PMIX_STDIN_TGT); + opal_value_load(info, &ui32, OPAL_UINT32); + opal_list_append(&job_info, &info->super); + } + } + + /* default value: If the user did not tell us where to look for the + * executable, we assume the current working directory + */ + if ( !have_wdir ) { + if (OMPI_SUCCESS != (rc = opal_getcwd(cwd, OPAL_PATH_MAX))) { + ORTE_ERROR_LOG(rc); + OPAL_LIST_DESTRUCT(&apps); + opal_progress_event_users_decrement(); + return rc; + } + info = OBJ_NEW(opal_value_t); + info->key = strdup(OPAL_PMIX_WDIR); + opal_value_load(info, cwd, OPAL_STRING); + opal_list_append(&app->info, &info->super); + } + + /* leave the map info alone - the launcher will + * decide where to put things + */ + } /* for (i = 0 ; i < count ; ++i) */ + + /* default the personality - job-level key */ + if (!personality) { + info = OBJ_NEW(opal_value_t); + info->key = strdup(OPAL_PMIX_PERSONALITY); + opal_value_load(info, "ompi", OPAL_STRING); + opal_list_append(&job_info, &info->super); + } + + /* spawn procs */ + rc = opal_pmix.spawn(&job_info, &apps, &jobid); + OPAL_LIST_DESTRUCT(&job_info); + OPAL_LIST_DESTRUCT(&apps); + + if (OPAL_SUCCESS != rc) { + opal_progress_event_users_decrement(); + return MPI_ERR_SPAWN; + } + + return OMPI_SUCCESS; +} + +/* Create a rendezvous tag consisting of our name + a random number */ +int ompi_dpm_open_port(char *port_name) +{ + uint32_t r; + char *tmp; + + r = opal_rand(&rnd); + opal_convert_process_name_to_string(&tmp, OMPI_PROC_MY_NAME); + snprintf(port_name, MPI_MAX_PORT_NAME-1, "%s:%u", tmp, r); + free(tmp); + return OMPI_SUCCESS; +} + +int ompi_dpm_close_port(const char *port_name) +{ + /* nothing to do here - user is responsible for the memory */ + return OMPI_SUCCESS; +} + +int ompi_dpm_dyn_init(void) +{ + int root=0, rc; + bool send_first = true; + ompi_communicator_t *newcomm=NULL; + char *port_name=NULL, *tmp, *ptr; + + /* check for appropriate env variable */ + tmp = getenv("OMPI_PARENT_PORT"); + if (NULL == tmp) { + /* nothing to do */ + return OMPI_SUCCESS; + } + + /* the value passed to us may have quote marks around it to protect + * the value if passed on the command line. We must remove those + * to have a correct string + */ + if ('"' == tmp[0]) { + /* if the first char is a quote, then so will the last one be */ + tmp[strlen(tmp)-1] = '\0'; + ptr = &tmp[1]; + } else { + ptr = &tmp[0]; + } + port_name = strdup(ptr); + + rc = ompi_dpm_connect_accept(MPI_COMM_WORLD, root, port_name, send_first, &newcomm); + free(port_name); + if (OMPI_SUCCESS != rc) { + return rc; + } + + /* originally, we set comm_parent to comm_null (in comm_init), + * now we have to decrease the reference counters to the according + * objects + */ + OBJ_RELEASE(ompi_mpi_comm_parent->c_local_group); + OBJ_RELEASE(ompi_mpi_comm_parent->error_handler); + OBJ_RELEASE(ompi_mpi_comm_parent); + + /* Set the parent communicator */ + ompi_mpi_comm_parent = newcomm; + + /* Set name for debugging purposes */ + snprintf(newcomm->c_name, MPI_MAX_OBJECT_NAME, "MPI_COMM_PARENT"); + newcomm->c_flags |= OMPI_COMM_NAMEISSET; + + return OMPI_SUCCESS; +} + + +/* + * finalize the module + */ +int ompi_dpm_finalize(void) +{ + return OMPI_SUCCESS; +} + + +/**********************************************************************/ +/**********************************************************************/ +/**********************************************************************/ +/* this routine runs through the list of communicators + and does the disconnect for all dynamic communicators */ +int ompi_dpm_dyn_finalize(void) +{ + int i,j=0, max=0; + ompi_dpm_disconnect_obj **objs=NULL; + ompi_communicator_t *comm=NULL; + + if (1 size = ompi_comm_remote_size(comm); + } else { + obj->size = ompi_comm_size(comm); + } + + obj->comm = comm; + obj->reqs = (ompi_request_t**)malloc(2*obj->size*sizeof(ompi_request_t *)); + if (NULL == obj->reqs) { + opal_output(0, "Could not allocate request array for disconnect object"); + free(obj); + return NULL; + } + + /* initiate all isend_irecvs. We use a dummy buffer stored on + the object, since we are sending zero size messages anyway. */ + for (i=0; i < obj->size; i++) { + ret = MCA_PML_CALL(irecv(&(obj->buf), 0, MPI_INT, i, + OMPI_COMM_BARRIER_TAG, comm, + &(obj->reqs[2*i]))); + + if (OMPI_SUCCESS != ret) { + opal_output(0, "dpm_disconnect_init: error %d in irecv to process %d", ret, i); + free(obj->reqs); + free(obj); + return NULL; + } + ret = MCA_PML_CALL(isend(&(obj->buf), 0, MPI_INT, i, + OMPI_COMM_BARRIER_TAG, + MCA_PML_BASE_SEND_SYNCHRONOUS, + comm, &(obj->reqs[2*i+1]))); + + if (OMPI_SUCCESS != ret) { + opal_output(0, "dpm_disconnect_init: error %d in isend to process %d", ret, i); + free(obj->reqs); + free(obj); + return NULL; + } + } + + /* return handle */ + return obj; +} +/**********************************************************************/ +/**********************************************************************/ +/**********************************************************************/ +/* - count how many requests are active + * - generate a request array large enough to hold + all active requests + * - call waitall on the overall request array + * - free the objects + */ +static int disconnect_waitall (int count, ompi_dpm_disconnect_obj **objs) +{ + + ompi_request_t **reqs=NULL; + char *treq=NULL; + int totalcount = 0; + int i; + int ret; + + for (i=0; isize; + } + + reqs = (ompi_request_t**)malloc(2*totalcount*sizeof(ompi_request_t *)); + if (NULL == reqs) { + opal_output(0, "ompi_comm_disconnect_waitall: error allocating memory"); + return OMPI_ERROR; + } + + /* generate a single, large array of pending requests */ + treq = (char *)reqs; + for (i=0; ireqs, 2*objs[i]->size * sizeof(ompi_request_t *)); + treq += 2*objs[i]->size * sizeof(ompi_request_t *); + } + + /* force all non-blocking all-to-alls to finish */ + ret = ompi_request_wait_all(2*totalcount, reqs, MPI_STATUSES_IGNORE); + + /* Finally, free everything */ + for (i=0; i< count; i++ ) { + if (NULL != objs[i]->reqs ) { + free(objs[i]->reqs ); + free(objs[i]); + } + } + + free(reqs); + + return ret; +} + +/**********************************************************************/ +/**********************************************************************/ +/**********************************************************************/ +static bool ompi_dpm_group_is_dyn (ompi_group_t *group, ompi_jobid_t thisjobid) +{ + int size = group ? ompi_group_size (group) : 0; + + for (int i = 0 ; i < size ; ++i) { + opal_process_name_t name = ompi_group_get_proc_name (group, i); + + if (thisjobid != ((ompi_process_name_t *) &name)->jobid) { + /* at least one is different */ + return true; + } + } + + return false; +} + +/* All we want to do in this function is determine if the number of + * jobids in the local and/or remote group is > 1. This tells us to + * set the disconnect flag. We don't actually care what the true + * number -is-, only that it is > 1 + */ +void ompi_dpm_mark_dyncomm(ompi_communicator_t *comm) +{ + bool found; + ompi_jobid_t thisjobid; + + /* special case for MPI_COMM_NULL */ + if (comm == MPI_COMM_NULL) { + return; + } + + thisjobid = ompi_group_get_proc_name (comm->c_local_group, 0).jobid; + + /* loop over all processes in local group and check for + * a different jobid + */ + found = ompi_dpm_group_is_dyn (comm->c_local_group, thisjobid); + if (!found) { + /* if inter-comm, loop over all processes in remote_group + * and see if any are different from thisjobid + */ + found = ompi_dpm_group_is_dyn (comm->c_remote_group, thisjobid); + } + + /* if a different jobid was found, set the disconnect flag*/ + if (found) { + ompi_comm_num_dyncomm++; + OMPI_COMM_SET_DYNAMIC(comm); + } +} diff --git a/ompi/dpm/dpm.h b/ompi/dpm/dpm.h new file mode 100644 index 00000000000..34084480f87 --- /dev/null +++ b/ompi/dpm/dpm.h @@ -0,0 +1,106 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ +/* + * Copyright (c) 2004-2008 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2011 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2013-2015 Intel, Inc. All rights reserved + * Copyright (c) 2013-2015 Los Alamos National Security, LLC. All rights + * reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ +/** + * @file + * + * Dynamic Process Management Interface + * + */ + +#ifndef OMPI_DPM_H +#define OMPI_DPM_H + +#include "ompi_config.h" + +#include "ompi/info/info.h" +#include "ompi/communicator/communicator.h" + +BEGIN_C_DECLS + +/* + * Initialize the DPM system + */ +int ompi_dpm_init(void); + +/* + * Connect/accept communications + */ +int ompi_dpm_connect_accept(ompi_communicator_t *comm, int root, + const char *port, bool send_first, + ompi_communicator_t **newcomm); + +/** + * Executes internally a disconnect on all dynamic communicators + * in case the user did not disconnect them. + */ +int ompi_dpm_disconnect(ompi_communicator_t *comm); + +/* + * Dynamically spawn processes + */ +int ompi_dpm_spawn(int count, char const *array_of_commands[], + char **array_of_argv[], + const int array_of_maxprocs[], + const MPI_Info array_of_info[], + const char *port_name); + +/* + * This routine checks, whether an application has been spawned + * by another MPI application, or has been independently started. + * If it has been spawned, it establishes the parent communicator. + * Since the routine has to communicate, it should be among the last + * steps in MPI_Init, to be sure that everything is already set up. + */ +int ompi_dpm_dyn_init(void); + +/* + * Interface for mpi_finalize to call to ensure dynamically spawned procs + * collectively finalize + */ +int ompi_dpm_dyn_finalize(void); + +/* this routine counts the number of different jobids of the processes + given in a certain communicator. If there is more than one jobid, + we mark the communicator as 'dynamic'. This is especially relevant + for the MPI_Comm_disconnect *and* for MPI_Finalize, where we have + to wait for all still connected processes. +*/ +void ompi_dpm_mark_dyncomm(ompi_communicator_t *comm); + +/* + * Define a rendezvous point for a dynamically spawned job + */ +int ompi_dpm_open_port(char *port_name); + +/* + * Unpublish the rendezvous point + */ +int ompi_dpm_close_port(const char *port_name); + +/* + * Finalize the DPM + */ +int ompi_dpm_finalize(void); + +END_C_DECLS + +#endif /* OMPI_DPM_H */ diff --git a/ompi/errhandler/Makefile.am b/ompi/errhandler/Makefile.am index 8d66dade4ca..6f7264135c2 100644 --- a/ompi/errhandler/Makefile.am +++ b/ompi/errhandler/Makefile.am @@ -6,15 +6,16 @@ # Copyright (c) 2004-2005 The University of Tennessee and The University # of Tennessee Research Foundation. All rights # reserved. -# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, # University of Stuttgart. All rights reserved. # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. # Copyright (c) 2008 Cisco Systems, Inc. All rights reserved. +# Copyright (c) 2016 IBM Corporation. All rights reserved. # $COPYRIGHT$ -# +# # Additional copyrights may follow -# +# # $HEADER$ # @@ -28,7 +29,7 @@ headers += \ errhandler/errhandler.h \ errhandler/errhandler_predefined.h -libmpi_la_SOURCES += \ +lib@OMPI_LIBMPI_NAME@_la_SOURCES += \ errhandler/errhandler.c \ errhandler/errhandler_invoke.c \ errhandler/errhandler_predefined.c \ diff --git a/ompi/errhandler/errcode-internal.c b/ompi/errhandler/errcode-internal.c index 020c525fcbe..37429f6ff81 100644 --- a/ompi/errhandler/errcode-internal.c +++ b/ompi/errhandler/errcode-internal.c @@ -6,7 +6,7 @@ * Copyright (c) 2004-2011 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. @@ -15,9 +15,9 @@ * Copyright (c) 2015 Los Alamos National Security, LLC. All rights * reseved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -55,6 +55,7 @@ static ompi_errcode_intern_t ompi_err_rma_attach_intern; static ompi_errcode_intern_t ompi_err_rma_range_intern; static ompi_errcode_intern_t ompi_err_rma_conflict_intern; static ompi_errcode_intern_t ompi_err_win_intern; +static ompi_errcode_intern_t ompi_err_rma_flavor_intern; static void ompi_errcode_intern_construct(ompi_errcode_intern_t* errcode); static void ompi_errcode_intern_destruct(ompi_errcode_intern_t* errcode); @@ -79,7 +80,7 @@ int ompi_errcode_intern_init (void) ompi_success_intern.mpi_code = MPI_SUCCESS; ompi_success_intern.index = pos++; strncpy(ompi_success_intern.errstring, "OMPI_SUCCESS", OMPI_MAX_ERROR_STRING); - opal_pointer_array_set_item(&ompi_errcodes_intern, ompi_success_intern.index, + opal_pointer_array_set_item(&ompi_errcodes_intern, ompi_success_intern.index, &ompi_success_intern); OBJ_CONSTRUCT(&ompi_error_intern, ompi_errcode_intern_t); @@ -250,6 +251,14 @@ int ompi_errcode_intern_init (void) opal_pointer_array_set_item(&ompi_errcodes_intern, ompi_err_win_intern.index, &ompi_err_win_intern); + OBJ_CONSTRUCT(&ompi_err_rma_flavor_intern, ompi_errcode_intern_t); + ompi_err_rma_flavor_intern.code = OMPI_ERR_RMA_FLAVOR; + ompi_err_rma_flavor_intern.mpi_code = MPI_ERR_RMA_FLAVOR; + ompi_err_rma_flavor_intern.index = pos++; + strncpy(ompi_err_rma_flavor_intern.errstring, "OMPI_ERR_RMA_FLAVOR", OMPI_MAX_ERROR_STRING); + opal_pointer_array_set_item(&ompi_errcodes_intern, ompi_err_rma_flavor_intern.index, + &ompi_err_rma_flavor_intern); + ompi_errcode_intern_lastused=pos; return OMPI_SUCCESS; } @@ -279,6 +288,7 @@ int ompi_errcode_intern_finalize(void) OBJ_DESTRUCT(&ompi_err_rma_range_intern); OBJ_DESTRUCT(&ompi_err_rma_conflict_intern); OBJ_DESTRUCT(&ompi_err_win_intern); + OBJ_DESTRUCT(&ompi_err_rma_flavor_intern); OBJ_DESTRUCT(&ompi_errcodes_intern); return OMPI_SUCCESS; diff --git a/ompi/errhandler/errcode-internal.h b/ompi/errhandler/errcode-internal.h index 183f2ec65e8..745098b5a0d 100644 --- a/ompi/errhandler/errcode-internal.h +++ b/ompi/errhandler/errcode-internal.h @@ -6,18 +6,18 @@ * Copyright (c) 2004-2007 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2010 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2010 Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2012 Los Alamos National Security, LLC. All rights - * reserved. + * reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ /** @file **/ @@ -51,7 +51,7 @@ typedef struct ompi_errcode_intern_t ompi_errcode_intern_t; OMPI_DECLSPEC extern opal_pointer_array_t ompi_errcodes_intern; OMPI_DECLSPEC extern int ompi_errcode_intern_lastused; -/** +/** * Return the MPI errcode for a given internal error code. */ static inline int ompi_errcode_get_mpi_code(int errcode) { @@ -86,7 +86,7 @@ static inline int ompi_errcode_get_mpi_code(int errcode) * Invoked from ompi_mpi_init(); sets up all static MPI error codes, */ int ompi_errcode_intern_init(void); - + /** * Finalize the error codes. * @@ -95,7 +95,7 @@ int ompi_errcode_intern_init(void); * Invokes from ompi_mpi_finalize(); tears down the error code array. */ int ompi_errcode_intern_finalize(void); - + END_C_DECLS #endif /* OMPI_ERRCODE_INTERNAL_H */ diff --git a/ompi/errhandler/errcode.c b/ompi/errhandler/errcode.c index b0a3d81be8b..3a63fa45dff 100644 --- a/ompi/errhandler/errcode.c +++ b/ompi/errhandler/errcode.c @@ -6,7 +6,7 @@ * Copyright (c) 2004-2007 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. @@ -17,18 +17,16 @@ * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ #include "ompi_config.h" #include -#ifdef HAVE_STRING_H #include -#endif #include "mpi.h" @@ -114,6 +112,7 @@ static ompi_mpi_errcode_t ompi_err_rma_attach; static ompi_mpi_errcode_t ompi_err_rma_flavor; static ompi_mpi_errcode_t ompi_err_rma_shared; static ompi_mpi_errcode_t ompi_t_err_invalid; +static ompi_mpi_errcode_t ompi_t_err_invalid_name; static void ompi_mpi_errcode_construct(ompi_mpi_errcode_t* errcode); static void ompi_mpi_errcode_destruct(ompi_mpi_errcode_t* errcode); @@ -214,6 +213,7 @@ int ompi_mpi_errcode_init (void) CONSTRUCT_ERRCODE( ompi_err_rma_flavor, MPI_ERR_RMA_FLAVOR, "MPI_ERR_RMA_FLAVOR: Invalid type of window" ); CONSTRUCT_ERRCODE( ompi_err_rma_shared, MPI_ERR_RMA_SHARED, "MPI_ERR_RMA_SHARED: Memory cannot be shared" ); CONSTRUCT_ERRCODE( ompi_t_err_invalid, MPI_T_ERR_INVALID, "MPI_T_ERR_INVALID: Invalid use of the interface or bad parameter value(s)" ); + CONSTRUCT_ERRCODE( ompi_t_err_invalid_name, MPI_T_ERR_INVALID_NAME, "MPI_T_ERR_INVALID_NAME: The variable or category name is invalid" ); /* Per MPI-3 p353:27-32, MPI_LASTUSEDCODE must be >= MPI_ERR_LASTCODE. So just start it as == MPI_ERR_LASTCODE. */ @@ -226,9 +226,9 @@ int ompi_mpi_errcode_finalize(void) { int i; ompi_mpi_errcode_t *errc; - + for (i=ompi_mpi_errcode_lastpredefined+1; i<=ompi_mpi_errcode_lastused; i++) { - /* + /* * there are some user defined error-codes, which * we have to free. */ @@ -309,6 +309,7 @@ int ompi_mpi_errcode_finalize(void) OBJ_DESTRUCT(&ompi_err_rma_flavor); OBJ_DESTRUCT(&ompi_err_rma_shared); OBJ_DESTRUCT(&ompi_t_err_invalid); + OBJ_DESTRUCT(&ompi_t_err_invalid_name); OBJ_DESTRUCT(&ompi_mpi_errcodes); return OMPI_SUCCESS; @@ -322,7 +323,7 @@ int ompi_mpi_errcode_add(int errclass ) newerrcode->code = (ompi_mpi_errcode_lastused+1); newerrcode->cls = errclass; opal_pointer_array_set_item(&ompi_mpi_errcodes, newerrcode->code, newerrcode); - + ompi_mpi_errcode_lastused++; return newerrcode->code; } @@ -334,7 +335,7 @@ int ompi_mpi_errclass_add(void) newerrcode = OBJ_NEW(ompi_mpi_errcode_t); newerrcode->cls = ( ompi_mpi_errcode_lastused+1); opal_pointer_array_set_item(&ompi_mpi_errcodes, newerrcode->cls, newerrcode); - + ompi_mpi_errcode_lastused++; return newerrcode->cls; } @@ -344,14 +345,14 @@ int ompi_mpi_errnum_add_string(int errnum, const char *errstring, int len) ompi_mpi_errcode_t *errcodep; errcodep = (ompi_mpi_errcode_t *)opal_pointer_array_get_item(&ompi_mpi_errcodes, errnum); - if ( NULL == errcodep ) { + if ( NULL == errcodep ) { return OMPI_ERROR; } if ( MPI_MAX_ERROR_STRING > len ) { len = MPI_MAX_ERROR_STRING; } - + strncpy ( errcodep->errstring, errstring, len ); return OMPI_SUCCESS; } diff --git a/ompi/errhandler/errcode.h b/ompi/errhandler/errcode.h index f6111069c2a..656ddc5576d 100644 --- a/ompi/errhandler/errcode.h +++ b/ompi/errhandler/errcode.h @@ -6,18 +6,18 @@ * Copyright (c) 2004-2007 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2006 University of Houston. All rights reserved. - * Copyright (c) 2007 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2007-2015 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2015 Los Alamos National Security, LLC. All rights * reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ /** @file **/ @@ -34,9 +34,9 @@ BEGIN_C_DECLS /** - * Back-end type for MPI error codes. - * Please note: - * if code == MPI_UNDEFINED, than the according structure + * Back-end type for MPI error codes. + * Please note: + * if code == MPI_UNDEFINED, than the according structure * represents an error class. * For the predefined error codes and classes, code and * cls are both set to the according value. @@ -55,7 +55,7 @@ OMPI_DECLSPEC extern int ompi_mpi_errcode_lastpredefined; OMPI_DECLSPEC extern ompi_mpi_errcode_t ompi_err_unknown; -/** +/** * Check for a valid error code */ static inline bool ompi_mpi_errcode_is_invalid(int errcode) @@ -79,39 +79,39 @@ static inline int ompi_mpi_errcode_get_class (int errcode) } if (NULL != err) { - if ( err->code != MPI_UNDEFINED ) { + if ( err->code != MPI_UNDEFINED ) { return err->cls; } } return ompi_err_unknown.cls; } -static inline int ompi_mpi_errcode_is_predefined ( int errcode ) +static inline int ompi_mpi_errcode_is_predefined ( int errcode ) { - if ( errcode >= 0 && errcode <= ompi_mpi_errcode_lastpredefined ) + if ( errcode >= 0 && errcode <= ompi_mpi_errcode_lastpredefined ) return true; - + return false; } -static inline int ompi_mpi_errnum_is_class ( int errnum ) +static inline int ompi_mpi_errnum_is_class ( int errnum ) { ompi_mpi_errcode_t *err; - if (errno < 0) { + if (errnum < 0) { return false; } if ( errnum <= ompi_mpi_errcode_lastpredefined ) { - /* Predefined error values represent an error code and + /* Predefined error values represent an error code and an error class at the same time */ return true; } err = (ompi_mpi_errcode_t *)opal_pointer_array_get_item(&ompi_mpi_errcodes, errnum); if (NULL != err) { - if ( MPI_UNDEFINED == err->code) { - /* Distinction between error class and error code is that for the + if ( MPI_UNDEFINED == err->code) { + /* Distinction between error class and error code is that for the first one the code section is set to MPI_UNDEFINED */ return true; } @@ -119,10 +119,10 @@ static inline int ompi_mpi_errnum_is_class ( int errnum ) return false; } - - -/** - * Return the error string + + +/** + * Return the error string */ static inline char* ompi_mpi_errnum_get_string (int errnum) { @@ -151,7 +151,7 @@ static inline char* ompi_mpi_errnum_get_string (int errnum) * Invoked from ompi_mpi_init(); sets up all static MPI error codes, */ int ompi_mpi_errcode_init(void); - + /** * Finalize the error codes. * @@ -160,26 +160,26 @@ int ompi_mpi_errcode_init(void); * Invokes from ompi_mpi_finalize(); tears down the error code array. */ int ompi_mpi_errcode_finalize(void); - -/** + +/** * Add an error code * * @param: error class to which this new error code belongs to * * @returns the new error code on SUCCESS (>0) * @returns OMPI_ERROR otherwise - * + * */ int ompi_mpi_errcode_add (int errclass); -/** +/** * Add an error class * * @param: none * * @returns the new error class on SUCCESS (>0) * @returns OMPI_ERROR otherwise - * + * */ int ompi_mpi_errclass_add (void); @@ -194,7 +194,7 @@ int ompi_mpi_errclass_add (void); * @returns OMPI_ERROR on error */ int ompi_mpi_errnum_add_string (int errnum, const char* string, int len); - + END_C_DECLS #endif /* OMPI_MPI_ERRCODE_H */ diff --git a/ompi/errhandler/errhandler.c b/ompi/errhandler/errhandler.c index 54eaf5aebb5..d7fee7be54f 100644 --- a/ompi/errhandler/errhandler.c +++ b/ompi/errhandler/errhandler.c @@ -6,7 +6,7 @@ * Copyright (c) 2004-2014 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. @@ -15,17 +15,15 @@ * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ #include "ompi_config.h" -#ifdef HAVE_STRING_H #include -#endif #include "ompi/communicator/communicator.h" #include "ompi/win/win.h" @@ -58,16 +56,16 @@ OBJ_CLASS_INSTANCE(ompi_errhandler_t, opal_object_t, ompi_errhandler_construct, * _addr flavors are for F03 bindings */ ompi_predefined_errhandler_t ompi_mpi_errhandler_null = {{{0}}}; -ompi_predefined_errhandler_t *ompi_mpi_errhandler_null_addr = +ompi_predefined_errhandler_t *ompi_mpi_errhandler_null_addr = &ompi_mpi_errhandler_null; ompi_predefined_errhandler_t ompi_mpi_errors_are_fatal = {{{0}}}; -ompi_predefined_errhandler_t *ompi_mpi_errors_are_fatal_addr = +ompi_predefined_errhandler_t *ompi_mpi_errors_are_fatal_addr = &ompi_mpi_errors_are_fatal; ompi_predefined_errhandler_t ompi_mpi_errors_return = {{{0}}}; -ompi_predefined_errhandler_t *ompi_mpi_errors_return_addr = +ompi_predefined_errhandler_t *ompi_mpi_errors_return_addr = &ompi_mpi_errors_return; ompi_predefined_errhandler_t ompi_mpi_errors_throw_exceptions = {{{0}}}; -ompi_predefined_errhandler_t *ompi_mpi_errors_throw_exceptions_addr = +ompi_predefined_errhandler_t *ompi_mpi_errors_throw_exceptions_addr = &ompi_mpi_errors_throw_exceptions; @@ -94,7 +92,7 @@ int ompi_errhandler_init(void) ompi_mpi_errhandler_null.eh.eh_file_fn = NULL; ompi_mpi_errhandler_null.eh.eh_win_fn = NULL ; ompi_mpi_errhandler_null.eh.eh_fort_fn = NULL; - strncpy (ompi_mpi_errhandler_null.eh.eh_name, "MPI_ERRHANDLER_NULL", + strncpy (ompi_mpi_errhandler_null.eh.eh_name, "MPI_ERRHANDLER_NULL", strlen("MPI_ERRHANDLER_NULL")+1 ); @@ -107,9 +105,9 @@ int ompi_errhandler_init(void) ompi_mpi_errors_are_fatal.eh.eh_file_fn = ompi_mpi_errors_are_fatal_file_handler; ompi_mpi_errors_are_fatal.eh.eh_win_fn = ompi_mpi_errors_are_fatal_win_handler ; ompi_mpi_errors_are_fatal.eh.eh_fort_fn = NULL; - strncpy (ompi_mpi_errors_are_fatal.eh.eh_name, "MPI_ERRORS_ARE_FATAL", + strncpy (ompi_mpi_errors_are_fatal.eh.eh_name, "MPI_ERRORS_ARE_FATAL", strlen("MPI_ERRORS_ARE_FATAL")+1 ); - + OBJ_CONSTRUCT( &ompi_mpi_errors_return.eh, ompi_errhandler_t ); if( ompi_mpi_errors_return.eh.eh_f_to_c_index != OMPI_ERRORS_RETURN_FORTRAN ) return OMPI_ERROR; @@ -119,7 +117,7 @@ int ompi_errhandler_init(void) ompi_mpi_errors_return.eh.eh_file_fn = ompi_mpi_errors_return_file_handler; ompi_mpi_errors_return.eh.eh_win_fn = ompi_mpi_errors_return_win_handler; ompi_mpi_errors_return.eh.eh_fort_fn = NULL; - strncpy (ompi_mpi_errors_return.eh.eh_name, "MPI_ERRORS_RETURN", + strncpy (ompi_mpi_errors_return.eh.eh_name, "MPI_ERRORS_RETURN", strlen("MPI_ERRORS_RETURN")+1 ); /* If we're going to use C++, functions will be fixed up during @@ -128,17 +126,17 @@ int ompi_errhandler_init(void) conversion from C handles to C++ handles happens in that dispatch function -- not the errhandler_invoke.c stuff here in libmpi. */ OBJ_CONSTRUCT( &ompi_mpi_errors_throw_exceptions.eh, ompi_errhandler_t ); - ompi_mpi_errors_throw_exceptions.eh.eh_mpi_object_type = + ompi_mpi_errors_throw_exceptions.eh.eh_mpi_object_type = OMPI_ERRHANDLER_TYPE_PREDEFINED; ompi_mpi_errors_throw_exceptions.eh.eh_lang = OMPI_ERRHANDLER_LANG_C; - ompi_mpi_errors_throw_exceptions.eh.eh_comm_fn = + ompi_mpi_errors_throw_exceptions.eh.eh_comm_fn = ompi_mpi_errors_are_fatal_comm_handler; - ompi_mpi_errors_throw_exceptions.eh.eh_file_fn = + ompi_mpi_errors_throw_exceptions.eh.eh_file_fn = ompi_mpi_errors_are_fatal_file_handler; - ompi_mpi_errors_throw_exceptions.eh.eh_win_fn = + ompi_mpi_errors_throw_exceptions.eh.eh_win_fn = ompi_mpi_errors_are_fatal_win_handler ; ompi_mpi_errors_throw_exceptions.eh.eh_fort_fn = NULL; - strncpy (ompi_mpi_errors_throw_exceptions.eh.eh_name, "MPI_ERRORS_THROW_EXCEPTIONS", + strncpy (ompi_mpi_errors_throw_exceptions.eh.eh_name, "MPI_ERRORS_THROW_EXCEPTIONS", strlen("MPI_ERRORS_THROW_EXCEPTIONS")+1 ); /* All done */ @@ -189,7 +187,7 @@ ompi_errhandler_t *ompi_errhandler_create(ompi_errhandler_type_t object_type, function pointer types in the union; it doesn't matter which. It only matters that we dereference/use the right member when invoking the callback. */ - + new_errhandler->eh_mpi_object_type = object_type; new_errhandler->eh_lang = lang; switch (object_type ) { @@ -222,11 +220,11 @@ int ompi_errhandler_runtime_callback(opal_pointer_array_t *errors) { ompi_rte_error_report_t *err; int errcode = 1; - if (NULL != errors && + if (NULL != errors && (NULL != (err = (ompi_rte_error_report_t*)opal_pointer_array_get_item(errors, 0)))) { errcode = err->errcode; } - + ompi_mpi_abort(MPI_COMM_WORLD, errcode); return OMPI_SUCCESS; } @@ -246,7 +244,7 @@ static void ompi_errhandler_construct(ompi_errhandler_t *new_errhandler) /* assign entry in fortran <-> c translation array */ - ret_val = opal_pointer_array_add(&ompi_errhandler_f_to_c_table, + ret_val = opal_pointer_array_add(&ompi_errhandler_f_to_c_table, new_errhandler); new_errhandler->eh_f_to_c_index = ret_val; diff --git a/ompi/errhandler/errhandler.h b/ompi/errhandler/errhandler.h index 8a5151fbc2b..b131869676e 100644 --- a/ompi/errhandler/errhandler.h +++ b/ompi/errhandler/errhandler.h @@ -1,4 +1,4 @@ -/* -*- Mode: C; c-basic-offset:4 ; -*- */ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ /* * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana * University Research and Technology @@ -6,16 +6,21 @@ * Copyright (c) 2004-2011 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2008-2012 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2008-2009 Sun Microsystems, Inc. All rights reserved. + * Copyright (c) 2015-2016 Intel, Inc. All rights reserved. + * Copyright (c) 2016 Los Alamos National Security, LLC. All rights + * reserved. + * Copyright (c) 2016 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ /** @file **/ @@ -31,7 +36,6 @@ #include "opal/class/opal_object.h" #include "opal/class/opal_pointer_array.h" -#include "ompi/mca/rte/rte.h" #include "ompi/runtime/mpiruntime.h" #include "ompi/errhandler/errhandler_predefined.h" #include "ompi/errhandler/errcode-internal.h" @@ -51,7 +55,7 @@ enum { /** * Typedef for all fortran errhandler functions */ -typedef void (ompi_errhandler_fortran_handler_fn_t)(MPI_Fint *, +typedef void (ompi_errhandler_fortran_handler_fn_t)(MPI_Fint *, MPI_Fint *, ...); /** @@ -90,9 +94,8 @@ struct ompi_errhandler_t; /** * C++ invocation function signature */ -typedef void (ompi_errhandler_cxx_dispatch_fn_t)(struct ompi_errhandler_t *errhandler, - void *handle, int *err_code, - const char *message); +typedef void (ompi_errhandler_cxx_dispatch_fn_t)(void *handle, int *err_code, + const char *message, ompi_errhandler_generic_handler_fn_t *fn); /** * Back-end type for MPI_Errorhandler. @@ -102,7 +105,7 @@ struct ompi_errhandler_t { char eh_name[MPI_MAX_OBJECT_NAME]; /* Type of MPI object that this handler is for */ - + ompi_errhandler_type_t eh_mpi_object_type; /* What language was the error handler created in */ @@ -233,7 +236,6 @@ struct ompi_request_t; #define OMPI_ERRHANDLER_CHECK(rc, mpi_object, err_code, message) \ if( OPAL_UNLIKELY(rc != OMPI_SUCCESS) ) { \ int __mpi_err_code = ompi_errcode_get_mpi_code(err_code); \ - OPAL_CR_EXIT_LIBRARY() \ ompi_errhandler_invoke((mpi_object)->error_handler, \ (mpi_object), \ (int) (mpi_object)->errhandler_type, \ @@ -258,7 +260,6 @@ struct ompi_request_t; * MPI_SUCCESS. */ #define OMPI_ERRHANDLER_RETURN(rc, mpi_object, err_code, message) \ - OPAL_CR_EXIT_LIBRARY() \ if ( OPAL_UNLIKELY(OMPI_SUCCESS != rc) ) { \ int __mpi_err_code = ompi_errcode_get_mpi_code(err_code); \ ompi_errhandler_invoke((mpi_object)->error_handler, \ @@ -307,7 +308,7 @@ struct ompi_request_t; * comm, win, or win) * @param type The type of the MPI object. Necessary, since * you can not assign a single type to the predefined - * error handlers. This information is therefore + * error handlers. This information is therefore * stored on the MPI object itself. * @param err_code The error code * @param message Any additional message; typically the name of the @@ -323,7 +324,7 @@ struct ompi_request_t; * If this function returns, it returns the err_code. Note that it * may not return (e.g., for MPI_ERRORS_ARE_FATAL). */ - OMPI_DECLSPEC int ompi_errhandler_invoke(ompi_errhandler_t *errhandler, void *mpi_object, + OMPI_DECLSPEC int ompi_errhandler_invoke(ompi_errhandler_t *errhandler, void *mpi_object, int type, int err_code, const char *message); @@ -333,7 +334,7 @@ struct ompi_request_t; * is safe to invoke this function if none of the requests have an * outstanding error; MPI_SUCCESS will be returned. */ - int ompi_errhandler_request_invoke(int count, + int ompi_errhandler_request_invoke(int count, struct ompi_request_t **requests, const char *message); @@ -349,7 +350,7 @@ struct ompi_request_t; * This function is called as the back-end of all the * MPI_*_CREATE_ERRHANDLER functions. It creates a new * ompi_errhandler_t object, initializes it to the correct object - * type, and sets the callback function on it. + * type, and sets the callback function on it. * * The type of the function pointer is (arbitrarily) the fortran * function handler type. Since this function has to accept 4 @@ -362,7 +363,7 @@ struct ompi_request_t; OMPI_DECLSPEC ompi_errhandler_t *ompi_errhandler_create(ompi_errhandler_type_t object_type, ompi_errhandler_generic_handler_fn_t *func, ompi_errhandler_lang_t language); - + /** * Callback function from runtime layer to alert the MPI layer of an error at * the runtime layer. @@ -401,7 +402,7 @@ OMPI_DECLSPEC int ompi_errhandler_runtime_callback(opal_pointer_array_t *errors) */ static inline bool ompi_errhandler_is_intrinsic(ompi_errhandler_t *errhandler) { - if ( OMPI_ERRHANDLER_TYPE_PREDEFINED == errhandler->eh_mpi_object_type ) + if ( OMPI_ERRHANDLER_TYPE_PREDEFINED == errhandler->eh_mpi_object_type ) return true; return false; diff --git a/ompi/errhandler/errhandler_invoke.c b/ompi/errhandler/errhandler_invoke.c index 9d3efca5713..22462a53c18 100644 --- a/ompi/errhandler/errhandler_invoke.c +++ b/ompi/errhandler/errhandler_invoke.c @@ -1,3 +1,4 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ /* * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana * University Research and Technology @@ -5,16 +6,20 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2006-2012 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2012 Oak Ridge National Labs. All rights reserved. + * Copyright (c) 2016 Los Alamos National Security, LLC. All rights + * reserved. + * Copyright (c) 2016 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -28,23 +33,23 @@ #include "ompi/mpi/fortran/base/fint_2_int.h" -int ompi_errhandler_invoke(ompi_errhandler_t *errhandler, void *mpi_object, +int ompi_errhandler_invoke(ompi_errhandler_t *errhandler, void *mpi_object, int object_type, int err_code, const char *message) { MPI_Fint fortran_handle, fortran_err_code = OMPI_INT_2_FINT(err_code); ompi_communicator_t *comm; ompi_win_t *win; ompi_file_t *file; - + /* If we got no errorhandler, then just invoke errors_abort */ if (NULL == errhandler) { ompi_mpi_errors_are_fatal_comm_handler(NULL, NULL, message); return err_code; } - + /* Figure out what kind of errhandler it is, figure out if it's fortran or C, and then invoke it */ - + switch (object_type) { case OMPI_ERRHANDLER_TYPE_COMM: comm = (ompi_communicator_t *) mpi_object; @@ -52,12 +57,12 @@ int ompi_errhandler_invoke(ompi_errhandler_t *errhandler, void *mpi_object, case OMPI_ERRHANDLER_LANG_C: errhandler->eh_comm_fn(&comm, &err_code, message, NULL); break; - + case OMPI_ERRHANDLER_LANG_CXX: - errhandler->eh_cxx_dispatch_fn(errhandler, &comm, - &err_code, message); + errhandler->eh_cxx_dispatch_fn(&comm, &err_code, message, + (ompi_errhandler_generic_handler_fn_t *)errhandler->eh_comm_fn); break; - + case OMPI_ERRHANDLER_LANG_FORTRAN: fortran_handle = OMPI_INT_2_FINT(comm->c_f_to_c_index); errhandler->eh_fort_fn(&fortran_handle, &fortran_err_code); @@ -65,19 +70,19 @@ int ompi_errhandler_invoke(ompi_errhandler_t *errhandler, void *mpi_object, break; } break; - + case OMPI_ERRHANDLER_TYPE_WIN: win = (ompi_win_t *) mpi_object; switch (errhandler->eh_lang) { case OMPI_ERRHANDLER_LANG_C: errhandler->eh_win_fn(&win, &err_code, message, NULL); break; - + case OMPI_ERRHANDLER_LANG_CXX: - errhandler->eh_cxx_dispatch_fn(errhandler, &win, - &err_code, message); + errhandler->eh_cxx_dispatch_fn(&win, &err_code, message, + (ompi_errhandler_generic_handler_fn_t *)errhandler->eh_win_fn); break; - + case OMPI_ERRHANDLER_LANG_FORTRAN: fortran_handle = OMPI_INT_2_FINT(win->w_f_to_c_index); errhandler->eh_fort_fn(&fortran_handle, &fortran_err_code); @@ -85,19 +90,19 @@ int ompi_errhandler_invoke(ompi_errhandler_t *errhandler, void *mpi_object, break; } break; - + case OMPI_ERRHANDLER_TYPE_FILE: file = (ompi_file_t *) mpi_object; switch (errhandler->eh_lang) { case OMPI_ERRHANDLER_LANG_C: errhandler->eh_file_fn(&file, &err_code, message, NULL); break; - + case OMPI_ERRHANDLER_LANG_CXX: - errhandler->eh_cxx_dispatch_fn(errhandler, &file, - &err_code, message); + errhandler->eh_cxx_dispatch_fn(&file, &err_code, message, + (ompi_errhandler_generic_handler_fn_t *)errhandler->eh_file_fn); break; - + case OMPI_ERRHANDLER_LANG_FORTRAN: fortran_handle = OMPI_INT_2_FINT(file->f_f_to_c_index); errhandler->eh_fort_fn(&fortran_handle, &fortran_err_code); @@ -106,13 +111,13 @@ int ompi_errhandler_invoke(ompi_errhandler_t *errhandler, void *mpi_object, } break; } - + /* All done */ return err_code; } -int ompi_errhandler_request_invoke(int count, +int ompi_errhandler_request_invoke(int count, struct ompi_request_t **requests, const char *message) { @@ -148,8 +153,8 @@ int ompi_errhandler_request_invoke(int count, MPI_SUCCESS != requests[i]->req_status.MPI_ERROR) { /* Ignore the error -- what are we going to do? We're already going to invoke an exception */ - ompi_request_free(&(requests[i])); - } + ompi_request_free(&(requests[i])); + } } /* Invoke the exception */ diff --git a/ompi/errhandler/errhandler_predefined.c b/ompi/errhandler/errhandler_predefined.c index 1d42afeffc7..4d50611c12f 100644 --- a/ompi/errhandler/errhandler_predefined.c +++ b/ompi/errhandler/errhandler_predefined.c @@ -5,7 +5,7 @@ * Copyright (c) 2004-2014 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. @@ -16,9 +16,9 @@ * Copyright (c) 2012 Los Alamos National Security, LLC. * All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -45,7 +45,7 @@ /* * Local functions */ -static void backend_fatal(char *type, struct ompi_communicator_t *comm, +static void backend_fatal(char *type, struct ompi_communicator_t *comm, char *name, int *error_code, va_list arglist); static void out(char *str, char *arg); @@ -115,7 +115,7 @@ void ompi_mpi_errors_return_comm_handler(struct ompi_communicator_t **comm, { /* Don't need anything more -- just need this function to exist */ /* Silence some compiler warnings */ - + va_list arglist; va_start(arglist, error_code); va_end(arglist); @@ -127,7 +127,7 @@ void ompi_mpi_errors_return_file_handler(struct ompi_file_t **file, { /* Don't need anything more -- just need this function to exist */ /* Silence some compiler warnings */ - + va_list arglist; va_start(arglist, error_code); va_end(arglist); @@ -139,7 +139,7 @@ void ompi_mpi_errors_return_win_handler(struct ompi_win_t **win, { /* Don't need anything more -- just need this function to exist */ /* Silence some compiler warnings */ - + va_list arglist; va_start(arglist, error_code); va_end(arglist); @@ -165,60 +165,101 @@ static void out(char *str, char *arg) /* * Use opal_show_help() to aggregate the error messages (i.e., show it - * once rather than N times). + * once rather than N times). * * Note that this function will only be invoked for errors during the * MPI application (i.e., after MPI_INIT and before MPI_FINALIZE). So * there's no need to handle the pre-MPI_INIT and post-MPI_FINALIZE * errors here. */ -static void backend_fatal_aggregate(char *type, +static void backend_fatal_aggregate(char *type, struct ompi_communicator_t *comm, - char *name, int *error_code, + char *name, int *error_code, va_list arglist) { - char *arg, *prefix, *err_msg = "Unknown error"; - bool err_msg_need_free = false; + char *arg = NULL, *prefix = NULL, *err_msg = NULL; + const char* const unknown_error_code = "Error code: %d (no associated error message)"; + const char* const unknown_error = "Unknown error"; + const char* const unknown_prefix = "[?:?]"; + + // these do not own what they point to; they're + // here to avoid repeating expressions such as + // (NULL == foo) ? unknown_foo : foo + const char* usable_prefix = unknown_prefix; + const char* usable_err_msg = unknown_error; assert(ompi_mpi_initialized && !ompi_mpi_finalized); arg = va_arg(arglist, char*); va_end(arglist); - asprintf(&prefix, "[%s:%d]", ompi_process_info.nodename, - (int) ompi_process_info.pid); + if (asprintf(&prefix, "[%s:%d]", + ompi_process_info.nodename, + (int) ompi_process_info.pid) == -1) { + prefix = NULL; + // non-fatal, we could still go on to give useful information here... + opal_output(0, "%s", "Could not write node and PID to prefix"); + opal_output(0, "Node: %s", ompi_process_info.nodename); + opal_output(0, "PID: %d", (int) ompi_process_info.pid); + } if (NULL != error_code) { err_msg = ompi_mpi_errnum_get_string(*error_code); if (NULL == err_msg) { - err_msg_need_free = true; - asprintf(&err_msg, "Error code: %d (no associated error message)", - *error_code); + if (asprintf(&err_msg, unknown_error_code, + *error_code) == -1) { + err_msg = NULL; + opal_output(0, "%s", "Could not write to err_msg"); + opal_output(0, unknown_error_code, *error_code); + } } } + usable_prefix = (NULL == prefix) ? unknown_prefix : prefix; + usable_err_msg = (NULL == err_msg) ? unknown_error : err_msg; + if (NULL != name) { - opal_show_help("help-mpi-errors.txt", - "mpi_errors_are_fatal", false, - prefix, (NULL == arg) ? "" : "in", + opal_show_help("help-mpi-errors.txt", + "mpi_errors_are_fatal", + false, + usable_prefix, + (NULL == arg) ? "" : "in", (NULL == arg) ? "" : arg, - prefix, OMPI_PROC_MY_NAME->jobid, OMPI_PROC_MY_NAME->vpid, - prefix, type, name, prefix, err_msg, prefix, type, prefix); + usable_prefix, + OMPI_PROC_MY_NAME->jobid, + OMPI_PROC_MY_NAME->vpid, + usable_prefix, + type, + name, + usable_prefix, + usable_err_msg, + usable_prefix, + type, + usable_prefix); } else { - opal_show_help("help-mpi-errors.txt", - "mpi_errors_are_fatal unknown handle", false, - prefix, (NULL == arg) ? "" : "in", + opal_show_help("help-mpi-errors.txt", + "mpi_errors_are_fatal unknown handle", + false, + usable_prefix, + (NULL == arg) ? "" : "in", (NULL == arg) ? "" : arg, - prefix, OMPI_PROC_MY_NAME->jobid, OMPI_PROC_MY_NAME->vpid, - prefix, type, prefix, err_msg, prefix, type, prefix); + usable_prefix, + OMPI_PROC_MY_NAME->jobid, + OMPI_PROC_MY_NAME->vpid, + usable_prefix, + type, + usable_prefix, + usable_err_msg, + usable_prefix, + type, + usable_prefix); } - if (err_msg_need_free) { - free(err_msg); - } + free(prefix); + free(err_msg); } -/* +/* * Note that this function has to handle pre-MPI_INIT and * post-MPI_FINALIZE errors, which backend_fatal_aggregate() does not * have to handle. @@ -228,9 +269,9 @@ static void backend_fatal_aggregate(char *type, * we *might* be able to get a message out if we're not further * corrupting the stack by calling malloc()... */ -static void backend_fatal_no_aggregate(char *type, +static void backend_fatal_no_aggregate(char *type, struct ompi_communicator_t *comm, - char *name, int *error_code, + char *name, int *error_code, va_list arglist) { char *arg; @@ -271,7 +312,7 @@ static void backend_fatal_no_aggregate(char *type, else { int len; char str[MPI_MAX_PROCESSOR_NAME * 2]; - + /* THESE MESSAGES ARE COORDINATED WITH FIXED STRINGS IN help-mpi-errors.txt! Do not change these messages without also changing help-mpi-errors.txt! */ @@ -291,11 +332,11 @@ static void backend_fatal_no_aggregate(char *type, str[0] = '\0'; len = sizeof(str) - 1; strncat(str, type, len); - + len -= strlen(type); if (len > 0) { strncat(str, " ", len); - + --len; if (len > 0) { strncat(str, name, len); @@ -325,7 +366,7 @@ static void backend_fatal_no_aggregate(char *type, } static void backend_fatal(char *type, struct ompi_communicator_t *comm, - char *name, int *error_code, + char *name, int *error_code, va_list arglist) { /* We only want aggregation after MPI_INIT and before diff --git a/ompi/errhandler/errhandler_predefined.h b/ompi/errhandler/errhandler_predefined.h index 99aaf4ef6dd..60fe9f40758 100644 --- a/ompi/errhandler/errhandler_predefined.h +++ b/ompi/errhandler/errhandler_predefined.h @@ -5,14 +5,14 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ diff --git a/ompi/errhandler/help-mpi-errors.txt b/ompi/errhandler/help-mpi-errors.txt index 44f0cd329e3..a6dcf7172ea 100644 --- a/ompi/errhandler/help-mpi-errors.txt +++ b/ompi/errhandler/help-mpi-errors.txt @@ -6,16 +6,16 @@ # Copyright (c) 2004-2005 The University of Tennessee and The University # of Tennessee Research Foundation. All rights # reserved. -# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, # University of Stuttgart. All rights reserved. # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. # Copyright (c) 2008-2011 Cisco Systems, Inc. All rights reserved. # Copyright (c) 2010-2011 Oak Ridge National Labs. All rights reserved. # $COPYRIGHT$ -# +# # Additional copyrights may follow -# +# # $HEADER$ # # *** THESE MESSAGES ARE COORDINATED WITH FIXED STRINGS IN diff --git a/ompi/etc/Makefile.am b/ompi/etc/Makefile.am index ef7533b9157..f5c210f9c9a 100644 --- a/ompi/etc/Makefile.am +++ b/ompi/etc/Makefile.am @@ -5,15 +5,15 @@ # Copyright (c) 2004-2005 The University of Tennessee and The University # of Tennessee Research Foundation. All rights # reserved. -# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, # University of Stuttgart. All rights reserved. # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. # Copyright (c) 2008 Cisco Systems, Inc. All rights reserved. # $COPYRIGHT$ -# +# # Additional copyrights may follow -# +# # $HEADER$ # @@ -46,7 +46,7 @@ install-data-local: done # Only remove if exactly the same as what in our tree -# NOTE TO READER: Bourne shell if ... fi evaluates the body if +# NOTE TO READER: Bourne shell if ... fi evaluates the body if # the return of the evaluted command is 0 (as opposed to non-zero # as used by everyone else) uninstall-local: diff --git a/ompi/etc/openmpi-totalview.tcl b/ompi/etc/openmpi-totalview.tcl index d9b4ab9edfc..27edbc4f798 100644 --- a/ompi/etc/openmpi-totalview.tcl +++ b/ompi/etc/openmpi-totalview.tcl @@ -5,14 +5,14 @@ # Copyright (c) 2004-2005 The University of Tennessee and The University # of Tennessee Research Foundation. All rights # reserved. -# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, # University of Stuttgart. All rights reserved. # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. # $COPYRIGHT$ -# +# # Additional copyrights may follow -# +# # $HEADER$ # diff --git a/ompi/file/Makefile.am b/ompi/file/Makefile.am index 1452b7c671c..e7d846ddde8 100644 --- a/ompi/file/Makefile.am +++ b/ompi/file/Makefile.am @@ -5,14 +5,15 @@ # Copyright (c) 2004-2005 The University of Tennessee and The University # of Tennessee Research Foundation. All rights # reserved. -# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, # University of Stuttgart. All rights reserved. # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. +# Copyright (c) 2016 IBM Corporation. All rights reserved. # $COPYRIGHT$ -# +# # Additional copyrights may follow -# +# # $HEADER$ # @@ -21,5 +22,5 @@ headers += \ file/file.h -libmpi_la_SOURCES += \ +lib@OMPI_LIBMPI_NAME@_la_SOURCES += \ file/file.c diff --git a/ompi/file/file.c b/ompi/file/file.c index 891d51a64e4..e3888578ee7 100644 --- a/ompi/file/file.c +++ b/ompi/file/file.c @@ -6,7 +6,7 @@ * Copyright (c) 2004-2007 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. @@ -15,9 +15,9 @@ * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -66,7 +66,7 @@ int ompi_file_init(void) { /* Setup file array */ - OBJ_CONSTRUCT(&ompi_file_f_to_c_table, opal_pointer_array_t); + OBJ_CONSTRUCT(&ompi_file_f_to_c_table, opal_pointer_array_t); if( OPAL_SUCCESS != opal_pointer_array_init(&ompi_file_f_to_c_table, 0, OMPI_FORTRAN_HANDLE_MAX, 64) ) { return OMPI_ERROR; @@ -91,7 +91,7 @@ int ompi_file_init(void) /* * Back end to MPI_FILE_OPEN */ -int ompi_file_open(struct ompi_communicator_t *comm, char *filename, +int ompi_file_open(struct ompi_communicator_t *comm, const char *filename, int amode, struct ompi_info_t *info, ompi_file_t **fh) { int ret; @@ -144,7 +144,7 @@ int ompi_file_open(struct ompi_communicator_t *comm, char *filename, /* * Back end to MPI_FILE_CLOSE. */ -int ompi_file_close(ompi_file_t **file) +int ompi_file_close(ompi_file_t **file) { (*file)->f_flags |= OMPI_FILE_ISCLOSED; OBJ_RELEASE(*file); @@ -176,18 +176,18 @@ int ompi_file_finalize(void) max = opal_pointer_array_get_size(&ompi_file_f_to_c_table); for (num_unnamed = i = 0; i < max; ++i) { file = (ompi_file_t *)opal_pointer_array_get_item(&ompi_file_f_to_c_table, i); - + /* If the file was closed but still exists because the user told us to never free handles, then do an OBJ_RELEASE it and all is well. Then get the value again and see if it's actually been freed. */ - if (NULL != file && ompi_debug_no_free_handles && + if (NULL != file && ompi_debug_no_free_handles && 0 == (file->f_flags & OMPI_FILE_ISCLOSED)) { OBJ_RELEASE(file); file = (ompi_file_t *)opal_pointer_array_get_item(&ompi_file_f_to_c_table, i); - } - + } + if (NULL != file) { /* If the user wanted warnings about MPI object leaks, @@ -207,7 +207,7 @@ int ompi_file_finalize(void) opal_output(0, "WARNING: %lu unnamed MPI_File handles still allocated at MPI_FINALIZE", (unsigned long)num_unnamed); } OBJ_DESTRUCT(&ompi_file_f_to_c_table); - + /* All done */ return OMPI_SUCCESS; @@ -232,7 +232,7 @@ static void file_constructor(ompi_file_t *file) /* Initialize the fortran <--> C translation index */ - file->f_f_to_c_index = opal_pointer_array_add(&ompi_file_f_to_c_table, + file->f_f_to_c_index = opal_pointer_array_add(&ompi_file_f_to_c_table, file); /* Initialize the error handler. Per MPI-2:9.7 (p265), the @@ -252,7 +252,7 @@ static void file_constructor(ompi_file_t *file) /* Initialize the module */ file->f_io_version = MCA_IO_BASE_V_NONE; - memset(&(file->f_io_selected_module), 0, + memset(&(file->f_io_selected_module), 0, sizeof(file->f_io_selected_module)); file->f_io_selected_data = NULL; @@ -280,7 +280,7 @@ static void file_destructor(ompi_file_t *file) /* Should never get here */ break; } - + /* Finalize the data members */ if (NULL != file->f_comm) { diff --git a/ompi/file/file.h b/ompi/file/file.h index 7dcc0f0126a..902d3b0b064 100644 --- a/ompi/file/file.h +++ b/ompi/file/file.h @@ -6,16 +6,18 @@ * Copyright (c) 2004-2007 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2009 Sun Microsystems, Inc. All rights reserved. * Copyright (c) 2009-2012 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -71,7 +73,7 @@ struct ompi_file_t { /** Type of the error handler. This field does not have the "f_" prefix for the same reason as the field error_handler. */ ompi_errhandler_type_t errhandler_type; - + /** Indicate what version of the IO component we're using (this indicates what member to look at in the union, below) */ mca_io_base_version_t f_io_version; @@ -145,11 +147,11 @@ int ompi_file_init(void); * mainly calls OBJ_RELEASE() but also does some other error * handling as well. */ -int ompi_file_open(struct ompi_communicator_t *comm, char *filename, - int amode, struct ompi_info_t *info, +int ompi_file_open(struct ompi_communicator_t *comm, const char *filename, + int amode, struct ompi_info_t *info, ompi_file_t **fh); - -/** + +/** * Atomicly set a name on a file handle. * * @param file MPI_File handle to set the name on @@ -188,7 +190,7 @@ int ompi_file_close(ompi_file_t **file); * Invoked during ompi_mpi_finalize(). */ int ompi_file_finalize(void); - + /** * Check to see if an MPI_File handle is valid. * diff --git a/ompi/group/Makefile.am b/ompi/group/Makefile.am index f9f1bfc236d..f92a900da8d 100644 --- a/ompi/group/Makefile.am +++ b/ompi/group/Makefile.am @@ -6,15 +6,16 @@ # Copyright (c) 2004-2005 The University of Tennessee and The University # of Tennessee Research Foundation. All rights # reserved. -# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, # University of Stuttgart. All rights reserved. # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. # Copyright (c) 2006-2007 University of Houston. All rights reserved. +# Copyright (c) 2016 IBM Corporation. All rights reserved. # $COPYRIGHT$ -# +# # Additional copyrights may follow -# +# # $HEADER$ # @@ -24,11 +25,11 @@ headers += \ group/group.h \ group/group_dbg.h -libmpi_la_SOURCES += \ +lib@OMPI_LIBMPI_NAME@_la_SOURCES += \ group/group.c \ group/group_init.c \ group/group_set_rank.c \ group/group_plist.c \ group/group_sporadic.c \ group/group_strided.c \ - group/group_bitmap.c + group/group_bitmap.c diff --git a/ompi/group/group.c b/ompi/group/group.c index ccd62223519..dc8c4d49e6f 100644 --- a/ompi/group/group.c +++ b/ompi/group/group.c @@ -1,12 +1,12 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ -/* +/* * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. * Copyright (c) 2004-2013 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. @@ -14,14 +14,14 @@ * Copyright (c) 2007 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2012 Oak Ridge National Labs. All rights reserved. * Copyright (c) 2012-2013 Inria. All rights reserved. - * Copyright (c) 2013 Los Alamos National Security, LLC. All rights + * Copyright (c) 2013-2015 Los Alamos National Security, LLC. All rights * reserved. - * Copyright (c) 2015 Research Organization for Information Science + * Copyright (c) 2015-2016 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -37,91 +37,95 @@ int ompi_group_free ( ompi_group_t **group ) ompi_group_t *l_group; l_group = (ompi_group_t *) *group; - ompi_group_decrement_proc_count (l_group); OBJ_RELEASE(l_group); *group = MPI_GROUP_NULL; return OMPI_SUCCESS; } -int ompi_group_translate_ranks ( ompi_group_t *group1, +int ompi_group_translate_ranks ( ompi_group_t *group1, int n_ranks, const int *ranks1, - ompi_group_t *group2, - int *ranks2) + ompi_group_t *group2, + int *ranks2) { - int rank, proc, proc2; - struct ompi_proc_t *proc1_pointer, *proc2_pointer; - if ( MPI_GROUP_EMPTY == group1 || MPI_GROUP_EMPTY == group2 ) { - for (proc = 0; proc < n_ranks ; proc++) { + for (int proc = 0; proc < n_ranks ; ++proc) { ranks2[proc] = MPI_UNDEFINED; } return MPI_SUCCESS; } - /* +#if OMPI_GROUP_SPARSE + /* * If we are translating from a parent to a child that uses the sparse format - * or vice versa, we use the translate ranks function corresponding to the - * format used. Generally, all these functions require less time than the - * original method that loops over the processes of both groups till we + * or vice versa, we use the translate ranks function corresponding to the + * format used. Generally, all these functions require less time than the + * original method that loops over the processes of both groups till we * find a match. */ - if( group1->grp_parent_group_ptr == group2 ) { /* from child to parent */ - if(OMPI_GROUP_IS_SPORADIC(group1)) { - return ompi_group_translate_ranks_sporadic_reverse - (group1,n_ranks,ranks1,group2,ranks2); + if( group1->grp_parent_group_ptr == group2 ) { /* from child to parent */ + if(OMPI_GROUP_IS_SPORADIC(group1)) { + return ompi_group_translate_ranks_sporadic_reverse + (group1,n_ranks,ranks1,group2,ranks2); } else if(OMPI_GROUP_IS_STRIDED(group1)) { - return ompi_group_translate_ranks_strided_reverse - (group1,n_ranks,ranks1,group2,ranks2); + return ompi_group_translate_ranks_strided_reverse + (group1,n_ranks,ranks1,group2,ranks2); } else if(OMPI_GROUP_IS_BITMAP(group1)) { - return ompi_group_translate_ranks_bmap_reverse - (group1,n_ranks,ranks1,group2,ranks2); + return ompi_group_translate_ranks_bmap_reverse + (group1,n_ranks,ranks1,group2,ranks2); } + /* unknown sparse group type */ + assert (0); } - else if( group2->grp_parent_group_ptr == group1 ) { /* from parent to child*/ - if(OMPI_GROUP_IS_SPORADIC(group2)) { - return ompi_group_translate_ranks_sporadic - (group1,n_ranks,ranks1,group2,ranks2); + + if( group2->grp_parent_group_ptr == group1 ) { /* from parent to child*/ + if(OMPI_GROUP_IS_SPORADIC(group2)) { + return ompi_group_translate_ranks_sporadic + (group1,n_ranks,ranks1,group2,ranks2); } else if(OMPI_GROUP_IS_STRIDED(group2)) { - return ompi_group_translate_ranks_strided - (group1,n_ranks,ranks1,group2,ranks2); + return ompi_group_translate_ranks_strided + (group1,n_ranks,ranks1,group2,ranks2); } else if(OMPI_GROUP_IS_BITMAP(group2)) { - return ompi_group_translate_ranks_bmap - (group1,n_ranks,ranks1,group2,ranks2); + return ompi_group_translate_ranks_bmap + (group1,n_ranks,ranks1,group2,ranks2); } - + + /* unknown sparse group type */ + assert (0); } - else { - /* loop over all ranks */ - for (proc = 0; proc < n_ranks; proc++) { - rank=ranks1[proc]; - if ( MPI_PROC_NULL == rank) { - ranks2[proc] = MPI_PROC_NULL; - } - else { - proc1_pointer = ompi_group_peer_lookup(group1 ,rank); - /* initialize to no "match" */ - ranks2[proc] = MPI_UNDEFINED; - for (proc2 = 0; proc2 < group2->grp_proc_count; proc2++) { - proc2_pointer= ompi_group_peer_lookup(group2, proc2); - if ( proc1_pointer == proc2_pointer) { - ranks2[proc] = proc2; - break; - } - } /* end proc2 loop */ - } /* end proc loop */ +#endif + + /* loop over all ranks */ + for (int proc = 0; proc < n_ranks; ++proc) { + struct ompi_proc_t *proc1_pointer, *proc2_pointer; + int rank = ranks1[proc]; + + if ( MPI_PROC_NULL == rank) { + ranks2[proc] = MPI_PROC_NULL; + continue; } - } + + proc1_pointer = ompi_group_get_proc_ptr_raw (group1, rank); + /* initialize to no "match" */ + ranks2[proc] = MPI_UNDEFINED; + for (int proc2 = 0; proc2 < group2->grp_proc_count; ++proc2) { + proc2_pointer = ompi_group_get_proc_ptr_raw (group2, proc2); + if ( proc1_pointer == proc2_pointer) { + ranks2[proc] = proc2; + break; + } + } /* end proc2 loop */ + } /* end proc loop */ return MPI_SUCCESS; } -int ompi_group_dump (ompi_group_t* group) +int ompi_group_dump (ompi_group_t* group) { int i; int new_rank; @@ -168,33 +172,14 @@ int ompi_group_dump (ompi_group_t* group) return OMPI_SUCCESS; } -/* - * This is the function that iterates through the sparse groups to the dense group - * to reach the process pointer - */ -ompi_proc_t* ompi_group_get_proc_ptr (ompi_group_t* group , int rank) -{ - int ranks1,ranks2; - do { - if(OMPI_GROUP_IS_DENSE(group)) { - return group->grp_proc_pointers[rank]; - } - ranks1 = rank; - ompi_group_translate_ranks( group, 1, &ranks1, - group->grp_parent_group_ptr,&ranks2); - rank = ranks2; - group = group->grp_parent_group_ptr; - } while (1); -} - -int ompi_group_minloc ( int list[] , int length ) -{ +int ompi_group_minloc ( int list[] , int length ) +{ int i,index,min; min = list[0]; index = 0; - - for (i=0 ; i list[i] && list[i] != -1) { + + for (i=0 ; i list[i] && list[i] != -1) { min = list[i]; index = i; } @@ -215,12 +200,12 @@ int ompi_group_incl(ompi_group_t* group, int n, const int *ranks, ompi_group_t * len[1] = ompi_group_calc_strided ( n ,ranks ); len[2] = ompi_group_calc_sporadic ( n ,ranks ); len[3] = ompi_group_calc_bmap ( n , group->grp_proc_count ,ranks ); - + /* determin minimum length */ method = ompi_group_minloc ( len, 4 ); } #endif - + switch (method) { case 0: @@ -250,19 +235,19 @@ int ompi_group_excl(ompi_group_t* group, int n, const int *ranks, ompi_group_t * if (0 < (group->grp_proc_count - n)) { ranks_included = (int *)malloc( (group->grp_proc_count-n)*(sizeof(int))); - for (i=0 ; igrp_proc_count ; i++) { - for(j=0 ; jgrp_proc_count ; i++) { + for(j=0 ; j last_rank) { /* negative stride */ index = first_rank; @@ -318,7 +303,7 @@ int ompi_group_range_incl(ompi_group_t* group, int n_triplets, int ranges[][3], } /* determine the list of included processes for the range-incl-method */ k = 0; - for(j=0 ; j last_rank) { /* negative stride */ index = first_rank; @@ -358,9 +343,9 @@ int ompi_group_range_incl(ompi_group_t* group, int n_triplets, int ranges[][3], } int ompi_group_range_excl(ompi_group_t* group, int n_triplets, int ranges[][3], - ompi_group_t **new_group) + ompi_group_t **new_group) { - + int j,k,i; int *ranks_included=NULL, *ranks_excluded=NULL; int index,first_rank,last_rank,stride,count,result; @@ -369,7 +354,7 @@ int ompi_group_range_excl(ompi_group_t* group, int n_triplets, int ranges[][3], /* determine the number of excluded processes for the range-excl-method */ k = 0; - for(j=0 ; j last_rank) { /* negative stride */ index = first_rank; @@ -401,7 +386,7 @@ int ompi_group_range_excl(ompi_group_t* group, int n_triplets, int ranges[][3], /* determine the list of included processes for the range-excl-method */ k = 0; i = 0; - for(j=0 ; j last_rank) { /* negative stride */ index = first_rank; @@ -433,11 +418,11 @@ int ompi_group_range_excl(ompi_group_t* group, int n_triplets, int ranges[][3], if (0 != (group->grp_proc_count - count)) { ranks_included = (int *)malloc( (group->grp_proc_count - count)*(sizeof(int))); } - for (j=0 ; jgrp_proc_count ; j++) { - for(index=0 ; indexgrp_proc_count ; j++) { + for(index=0 ; indexgrp_proc_count*(sizeof(int))); - if (NULL == ranks_included) { - return MPI_ERR_NO_MEM; + if (0 < group1_pointer->grp_proc_count) { + ranks_included = (int *)malloc(group1_pointer->grp_proc_count*(sizeof(int))); + if (NULL == ranks_included) { + return MPI_ERR_NO_MEM; + } } /* determine the list of included processes for the incl-method */ k = 0; @@ -478,7 +465,7 @@ int ompi_group_intersection(ompi_group_t* group1,ompi_group_t* group2, proc1_pointer = ompi_group_peer_lookup (group1_pointer , proc1); /* check to see if this proc is in group2 */ - + for (proc2 = 0; proc2 < group2_pointer->grp_proc_count; proc2++) { proc2_pointer = ompi_group_peer_lookup (group2_pointer ,proc2); @@ -491,7 +478,7 @@ int ompi_group_intersection(ompi_group_t* group1,ompi_group_t* group2, } /* end proc1 loop */ result = ompi_group_incl(group1, k, ranks_included, new_group); - + if (NULL != ranks_included) { free(ranks_included); } @@ -568,3 +555,23 @@ int ompi_group_compare(ompi_group_t *group1, return return_value; } + +bool ompi_group_have_remote_peers (ompi_group_t *group) +{ + for (int i = 0 ; i < group->grp_proc_count ; ++i) { + ompi_proc_t *proc = NULL; +#if OMPI_GROUP_SPARSE + proc = ompi_group_peer_lookup (group, i); +#else + if (ompi_proc_is_sentinel (group->grp_proc_pointers[i])) { + return true; + } + proc = group->grp_proc_pointers[i]; +#endif + if (!OPAL_PROC_ON_LOCAL_NODE(proc->super.proc_flags)) { + return true; + } + } + + return false; +} diff --git a/ompi/group/group.h b/ompi/group/group.h index 847f9624644..1d15b97fdf1 100644 --- a/ompi/group/group.h +++ b/ompi/group/group.h @@ -6,7 +6,7 @@ * Copyright (c) 2004-2007 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. @@ -14,16 +14,18 @@ * Copyright (c) 2007-2012 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2009 Sun Microsystems, Inc. All rights reserved. * Copyright (c) 2012 Oak Ridge National Labs. All rights reserved. - * Copyright (c) 2013 Los Alamos National Security, LLC. All rights + * Copyright (c) 2013-2015 Los Alamos National Security, LLC. All rights * reserved. + * Copyright (c) 2016 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ -/** +/** * @file: * * Infrastructure for MPI group support. @@ -36,20 +38,21 @@ #include "mpi.h" #include "opal/class/opal_pointer_array.h" #include "opal/util/output.h" +#include "opal/include/opal/prefetch.h" BEGIN_C_DECLS #define BSIZE ((int)sizeof(unsigned char)*8) -struct ompi_group_sporadic_list_t -{ - int rank_first; - int length; -}; - +struct ompi_group_sporadic_list_t +{ + int rank_first; + int length; +}; + struct ompi_group_sporadic_data_t { - struct ompi_group_sporadic_list_t *grp_sporadic_list; + struct ompi_group_sporadic_list_t *grp_sporadic_list; /** list to hold the sporadic struct */ int grp_sporadic_list_len;/** length of the structure*/ }; @@ -67,7 +70,7 @@ struct ompi_group_bitmap_data_t /** * Group structure - * Currently we have four formats for storing the process pointers that are members + * Currently we have four formats for storing the process pointers that are members * of the group. * PList: a dense format that stores all the process pointers of the group. * Sporadic: a sparse format that stores the ranges of the ranks from the parent group, @@ -116,7 +119,7 @@ typedef struct ompi_predefined_group_t ompi_predefined_group_t; /* * The following include pulls in shared typedefs with debugger plugins. - * For more information on why we do this see the Notice to developers + * For more information on why we do this see the Notice to developers * comment at the top of the ompi_msgq_dll.c file. */ #include "group_dbg.h" @@ -153,6 +156,7 @@ OMPI_DECLSPEC extern struct ompi_predefined_group_t *ompi_mpi_group_null_addr; * @return Pointer to new group structure */ OMPI_DECLSPEC ompi_group_t *ompi_group_allocate(int group_size); +ompi_group_t *ompi_group_allocate_plist_w_procs (ompi_proc_t **procs, int group_size); ompi_group_t *ompi_group_allocate_sporadic(int group_size); ompi_group_t *ompi_group_allocate_strided(void); ompi_group_t *ompi_group_allocate_bmap(int orig_group_size, int group_size); @@ -232,7 +236,7 @@ void ompi_set_group_rank(ompi_group_t *group, struct ompi_proc_t *proc_pointer); /** * Abstracting MPI_Group_translate_ranks to an ompi function for internal use */ -OMPI_DECLSPEC int ompi_group_translate_ranks ( ompi_group_t *group1, +OMPI_DECLSPEC int ompi_group_translate_ranks ( ompi_group_t *group1, int n_ranks, const int *ranks1, ompi_group_t *group2, int *ranks2); @@ -244,7 +248,7 @@ OMPI_DECLSPEC int ompi_group_compare(ompi_group_t *group1, ompi_group_t *group2, int *result); -/** +/** * Abstracting MPI_Group_free, since it is required by some internal functions... */ int ompi_group_free (ompi_group_t **group); @@ -252,46 +256,44 @@ int ompi_group_free (ompi_group_t **group); /** * Functions to handle process pointers for sparse group formats */ -OMPI_DECLSPEC ompi_proc_t* ompi_group_get_proc_ptr (ompi_group_t* group , int rank); - -int ompi_group_translate_ranks_sporadic ( ompi_group_t *group1, +int ompi_group_translate_ranks_sporadic ( ompi_group_t *group1, int n_ranks, const int *ranks1, - ompi_group_t *group2, + ompi_group_t *group2, int *ranks2); -int ompi_group_translate_ranks_sporadic_reverse ( ompi_group_t *group1, +int ompi_group_translate_ranks_sporadic_reverse ( ompi_group_t *group1, int n_ranks, const int *ranks1, - ompi_group_t *group2, + ompi_group_t *group2, int *ranks2); -int ompi_group_translate_ranks_strided ( ompi_group_t *group1, +int ompi_group_translate_ranks_strided ( ompi_group_t *group1, int n_ranks, const int *ranks1, - ompi_group_t *group2, + ompi_group_t *group2, int *ranks2); -int ompi_group_translate_ranks_strided_reverse ( ompi_group_t *group1, +int ompi_group_translate_ranks_strided_reverse ( ompi_group_t *group1, int n_ranks, const int *ranks1, - ompi_group_t *group2, + ompi_group_t *group2, int *ranks2); -int ompi_group_translate_ranks_bmap ( ompi_group_t *group1, +int ompi_group_translate_ranks_bmap ( ompi_group_t *group1, int n_ranks, const int *ranks1, - ompi_group_t *group2, + ompi_group_t *group2, int *ranks2); -int ompi_group_translate_ranks_bmap_reverse ( ompi_group_t *group1, +int ompi_group_translate_ranks_bmap_reverse ( ompi_group_t *group1, int n_ranks, const int *ranks1, - ompi_group_t *group2, + ompi_group_t *group2, int *ranks2); /** - * Prototypes for the group back-end functions. Argument lists + * Prototypes for the group back-end functions. Argument lists are similar to the according C MPI functions. */ int ompi_group_incl(ompi_group_t* group, int n, const int *ranks, ompi_group_t **new_group); int ompi_group_excl(ompi_group_t* group, int n, const int *ranks, ompi_group_t **new_group); -int ompi_group_range_incl(ompi_group_t* group, int n_triplets, +int ompi_group_range_incl(ompi_group_t* group, int n_triplets, int ranges[][3],ompi_group_t **new_group); -int ompi_group_range_excl(ompi_group_t* group, int n_triplets, +int ompi_group_range_excl(ompi_group_t* group, int n_triplets, int ranges[][3],ompi_group_t **new_group); -int ompi_group_union (ompi_group_t* group1, ompi_group_t* group2, +int ompi_group_union (ompi_group_t* group1, ompi_group_t* group2, ompi_group_t **new_group); int ompi_group_intersection(ompi_group_t* group1,ompi_group_t* group2, ompi_group_t **new_group); @@ -299,7 +301,7 @@ int ompi_group_difference(ompi_group_t* group1, ompi_group_t* group2, ompi_group_t **new_group); -/** +/** * Include Functions to handle Sparse storage formats */ int ompi_group_incl_plist(ompi_group_t* group, int n, const int *ranks, @@ -311,7 +313,7 @@ int ompi_group_incl_strided(ompi_group_t* group, int n, const int *ranks, int ompi_group_incl_bmap(ompi_group_t* group, int n, const int *ranks, ompi_group_t **new_group); -/** +/** * Functions to calculate storage spaces */ int ompi_group_calc_plist ( int n, const int *ranks ); @@ -325,24 +327,101 @@ int ompi_group_calc_bmap ( int n, int orig_size , const int *ranks ); int ompi_group_minloc (int list[], int length); /** - * Inline function to check if sparse groups are enabled and return the direct access - * to the proc pointer, otherwise the lookup function + * @brief Helper function for retreiving the proc of a group member in a dense group + * + * This function exists to handle the translation of sentinel group members to real + * ompi_proc_t's. If a sentinel value is found and allocate is true then this function + * looks for an existing ompi_proc_t using ompi_proc_for_name which will allocate a + * ompi_proc_t if one does not exist. If allocate is false then sentinel values translate + * to NULL. */ -static inline struct ompi_proc_t* ompi_group_peer_lookup(ompi_group_t *group, int peer_id) +static inline struct ompi_proc_t *ompi_group_dense_lookup (ompi_group_t *group, const int peer_id, const bool allocate) { + ompi_proc_t *proc; + #if OPAL_ENABLE_DEBUG if (peer_id >= group->grp_proc_count) { - opal_output(0, "ompi_group_lookup_peer: invalid peer index (%d)", peer_id); + opal_output(0, "ompi_group_dense_lookup: invalid peer index (%d)", peer_id); return (struct ompi_proc_t *) NULL; } #endif + + proc = group->grp_proc_pointers[peer_id]; + + if (OPAL_UNLIKELY(ompi_proc_is_sentinel (proc))) { + if (!allocate) { + return NULL; + } + + /* replace sentinel value with an actual ompi_proc_t */ + ompi_proc_t *real_proc = + (ompi_proc_t *) ompi_proc_for_name (ompi_proc_sentinel_to_name ((uintptr_t) proc)); + + if (opal_atomic_cmpset_ptr (group->grp_proc_pointers + peer_id, proc, real_proc)) { + OBJ_RETAIN(real_proc); + } + + proc = real_proc; + } + + return proc; +} + +/* + * This is the function that iterates through the sparse groups to the dense group + * to reach the process pointer + */ +static inline ompi_proc_t *ompi_group_get_proc_ptr (ompi_group_t *group, int rank, const bool allocate) +{ #if OMPI_GROUP_SPARSE - return ompi_group_get_proc_ptr (group, peer_id); + do { + if (OMPI_GROUP_IS_DENSE(group)) { + return ompi_group_dense_lookup (group, rank, allocate); + } + int ranks1 = rank; + ompi_group_translate_ranks (group, 1, &ranks1, group->grp_parent_group_ptr, &rank); + group = group->grp_parent_group_ptr; + } while (1); #else - return group->grp_proc_pointers[peer_id]; + return ompi_group_dense_lookup (group, rank, allocate); #endif } +/** + * @brief Get the raw proc pointer from the group + * + * This function will either return a ompi_proc_t if one exists (either stored in the group + * or cached in the proc hash table) or a sentinel value representing the proc. This + * differs from ompi_group_get_proc_ptr() which returns the ompi_proc_t or NULL. + */ +ompi_proc_t *ompi_group_get_proc_ptr_raw (ompi_group_t *group, int rank); + +static inline opal_process_name_t ompi_group_get_proc_name (ompi_group_t *group, int rank) +{ + ompi_proc_t *proc = ompi_group_get_proc_ptr_raw (group, rank); + if (ompi_proc_is_sentinel (proc)) { + return ompi_proc_sentinel_to_name ((intptr_t) proc); + } + + return proc->super.proc_name; +} + +/** + * Inline function to check if sparse groups are enabled and return the direct access + * to the proc pointer, otherwise the lookup function + */ +static inline struct ompi_proc_t* ompi_group_peer_lookup(ompi_group_t *group, int peer_id) +{ + return ompi_group_get_proc_ptr (group, peer_id, true); +} + +static inline struct ompi_proc_t *ompi_group_peer_lookup_existing (ompi_group_t *group, int peer_id) +{ + return ompi_group_get_proc_ptr (group, peer_id, false); +} + +bool ompi_group_have_remote_peers (ompi_group_t *group); + /** * Function to print the group info */ diff --git a/ompi/group/group_bitmap.c b/ompi/group/group_bitmap.c index bdce1d4b008..c93184f162c 100644 --- a/ompi/group/group_bitmap.c +++ b/ompi/group/group_bitmap.c @@ -1,12 +1,12 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ -/* +/* * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. @@ -30,7 +30,7 @@ static bool check_ranks (int, const int *); int ompi_group_calc_bmap ( int n, int orig_size , const int *ranks) { if (check_ranks(n,ranks)) { - return ompi_group_div_ceil(orig_size,BSIZE); + return ompi_group_div_ceil(orig_size,BSIZE); } else { return -1; @@ -38,10 +38,10 @@ int ompi_group_calc_bmap ( int n, int orig_size , const int *ranks) { } /* from parent group to child group*/ -int ompi_group_translate_ranks_bmap ( ompi_group_t *parent_group, +int ompi_group_translate_ranks_bmap ( ompi_group_t *parent_group, int n_ranks, const int *ranks1, - ompi_group_t *child_group, - int *ranks2) + ompi_group_t *child_group, + int *ranks2) { int i,count,j,k,m; unsigned char tmp, tmp1; @@ -55,17 +55,17 @@ int ompi_group_translate_ranks_bmap ( ompi_group_t *parent_group, count = 0; tmp = ( 1 << (m % BSIZE) ); /* check if the bit that correponds to the parent rank is set in the bitmap */ - if ( tmp == (child_group->sparse_data.grp_bitmap.grp_bitmap_array[(int)(m/BSIZE)] + if ( tmp == (child_group->sparse_data.grp_bitmap.grp_bitmap_array[(int)(m/BSIZE)] & (1 << (m % BSIZE)))) { - /* - * add up how many bits are set, till we get to the bit of parent + /* + * add up how many bits are set, till we get to the bit of parent * rank that we want. The rank in the child will be the sum of the bits - * that are set on the way till we get to the correponding bit + * that are set on the way till we get to the correponding bit */ for (i=0 ; i<=(int)(m/BSIZE) ; i++) { for (k=0 ; ksparse_data.grp_bitmap.grp_bitmap_array[i] + if ( tmp1 == ( child_group->sparse_data.grp_bitmap.grp_bitmap_array[i] & (1 << k) ) ) { count++; } @@ -82,10 +82,10 @@ int ompi_group_translate_ranks_bmap ( ompi_group_t *parent_group, return OMPI_SUCCESS; } /* from child group to parent group */ -int ompi_group_translate_ranks_bmap_reverse ( ompi_group_t *child_group, +int ompi_group_translate_ranks_bmap_reverse ( ompi_group_t *child_group, int n_ranks, const int *ranks1, - ompi_group_t *parent_group, - int *ranks2) + ompi_group_t *parent_group, + int *ranks2) { int i,j,count,m,k; unsigned char tmp; @@ -103,7 +103,7 @@ int ompi_group_translate_ranks_bmap_reverse ( ompi_group_t *child_group, for (i=0 ; isparse_data.grp_bitmap.grp_bitmap_array_len ; i++) { for (k=0 ; ksparse_data.grp_bitmap.grp_bitmap_array[i] + if ( tmp == ( child_group->sparse_data.grp_bitmap.grp_bitmap_array[i] & (1 << k) ) ) { count++; } @@ -119,7 +119,7 @@ int ompi_group_translate_ranks_bmap_reverse ( ompi_group_t *child_group, return OMPI_SUCCESS; } -int ompi_group_div_ceil (int num, int den) +int ompi_group_div_ceil (int num, int den) { if (0 == num%den) { return num/den; @@ -131,7 +131,7 @@ int ompi_group_div_ceil (int num, int den) /* * This functions is to check that all ranks in the included list of ranks * are monotonically increasing. If not, the bitmap format can not be used - * since we won't be able to translate the ranks corrently since the algorithms + * since we won't be able to translate the ranks corrently since the algorithms * assume that the ranks are in order in the bitmap list. */ static bool check_ranks (int n, const int *ranks) { @@ -152,7 +152,7 @@ int ompi_group_incl_bmap(ompi_group_t* group, int n, const int *ranks, ompi_group_t *group_pointer, *new_group_pointer; group_pointer = (ompi_group_t *)group; - + if ( 0 == n ) { *new_group = MPI_GROUP_EMPTY; OBJ_RETAIN(MPI_GROUP_EMPTY); @@ -170,23 +170,23 @@ int ompi_group_incl_bmap(ompi_group_t* group, int n, const int *ranks, } /* set the bits */ - for (i=0 ; i sparse_data.grp_bitmap.grp_bitmap_array[(int)(ranks[i]/BSIZE)] |= (1 << bit_set); } - + new_group_pointer -> grp_parent_group_ptr = group_pointer; - + OBJ_RETAIN(new_group_pointer -> grp_parent_group_ptr); ompi_group_increment_proc_count(new_group_pointer -> grp_parent_group_ptr); - + ompi_group_increment_proc_count(new_group_pointer); my_group_rank=group_pointer->grp_my_rank; - + ompi_group_translate_ranks (group_pointer,1,&my_group_rank, new_group_pointer,&new_group_pointer->grp_my_rank); - + *new_group = (MPI_Group)new_group_pointer; return OMPI_SUCCESS; diff --git a/ompi/group/group_dbg.h b/ompi/group/group_dbg.h index 8983b8ac92b..fe0c1feaedf 100644 --- a/ompi/group/group_dbg.h +++ b/ompi/group/group_dbg.h @@ -2,9 +2,9 @@ /* * Copyright (c) 2009 Sun Microsystems, Inc. All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -13,7 +13,7 @@ /* * This file contains definitions used by both OMPI and debugger plugins. - * For more information on why we do this see the Notice to developers + * For more information on why we do this see the Notice to developers * comment at the top of the ompi_msgq_dll.c file. */ diff --git a/ompi/group/group_init.c b/ompi/group/group_init.c index 5ef2b1f3d72..2155d262470 100644 --- a/ompi/group/group_init.c +++ b/ompi/group/group_init.c @@ -6,7 +6,7 @@ * Copyright (c) 2004-2007 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. @@ -17,9 +17,9 @@ * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -55,6 +55,24 @@ ompi_predefined_group_t *ompi_mpi_group_null_addr = &ompi_mpi_group_null; * Allocate a new group structure */ ompi_group_t *ompi_group_allocate(int group_size) +{ + /* local variables */ + ompi_proc_t **procs = calloc (group_size, sizeof (ompi_proc_t *)); + ompi_group_t *new_group; + + if (NULL == procs) { + return NULL; + } + + new_group = ompi_group_allocate_plist_w_procs (procs, group_size); + if (NULL == new_group) { + free (procs); + } + + return new_group; +} + +ompi_group_t *ompi_group_allocate_plist_w_procs (ompi_proc_t **procs, int group_size) { /* local variables */ ompi_group_t * new_group = NULL; @@ -65,28 +83,19 @@ ompi_group_t *ompi_group_allocate(int group_size) new_group = OBJ_NEW(ompi_group_t); if (NULL == new_group) { - goto error_exit; + return NULL; } if (0 > new_group->grp_f_to_c_index) { OBJ_RELEASE (new_group); - new_group = NULL; - goto error_exit; + return NULL; } /* * Allocate array of (ompi_proc_t *)'s, one for each * process in the group. */ - new_group->grp_proc_pointers = (struct ompi_proc_t **) - malloc(sizeof(struct ompi_proc_t *) * group_size); - - if (NULL == new_group->grp_proc_pointers) { - /* grp_proc_pointers allocation failed */ - OBJ_RELEASE (new_group); - new_group = NULL; - goto error_exit; - } + new_group->grp_proc_pointers = procs; /* set the group size */ new_group->grp_proc_count = group_size; @@ -94,9 +103,9 @@ ompi_group_t *ompi_group_allocate(int group_size) /* initialize our rank to MPI_UNDEFINED */ new_group->grp_my_rank = MPI_UNDEFINED; OMPI_GROUP_SET_DENSE(new_group); - - error_exit: - /* return */ + + ompi_group_increment_proc_count (new_group); + return new_group; } @@ -119,10 +128,10 @@ ompi_group_t *ompi_group_allocate_sporadic(int group_size) } /* allocate array of (grp_sporadic_list )'s */ if (0 < group_size) { - new_group->sparse_data.grp_sporadic.grp_sporadic_list = + new_group->sparse_data.grp_sporadic.grp_sporadic_list = (struct ompi_group_sporadic_list_t *)malloc (sizeof(struct ompi_group_sporadic_list_t ) * group_size); - + /* non-empty group */ if ( NULL == new_group->sparse_data.grp_sporadic.grp_sporadic_list) { /* sporadic list allocation failed */ @@ -131,16 +140,16 @@ ompi_group_t *ompi_group_allocate_sporadic(int group_size) goto error_exit; } } - + /* set the group size */ - new_group->grp_proc_count = group_size; /* actually it's the number of + new_group->grp_proc_count = group_size; /* actually it's the number of elements in the sporadic list*/ - + /* initialize our rank to MPI_UNDEFINED */ new_group->grp_my_rank = MPI_UNDEFINED; new_group->grp_proc_pointers = NULL; - OMPI_GROUP_SET_SPORADIC(new_group); - + OMPI_GROUP_SET_SPORADIC(new_group); + error_exit: return new_group; } @@ -187,19 +196,19 @@ ompi_group_t *ompi_group_allocate_bmap(int orig_group_size , int group_size) goto error_exit; } /* allocate the unsigned char list */ - new_group->sparse_data.grp_bitmap.grp_bitmap_array = (unsigned char *)malloc + new_group->sparse_data.grp_bitmap.grp_bitmap_array = (unsigned char *)malloc (sizeof(unsigned char) * ompi_group_div_ceil(orig_group_size,BSIZE)); - - new_group->sparse_data.grp_bitmap.grp_bitmap_array_len = + + new_group->sparse_data.grp_bitmap.grp_bitmap_array_len = ompi_group_div_ceil(orig_group_size,BSIZE); - + new_group->grp_proc_count = group_size; /* initialize our rank to MPI_UNDEFINED */ new_group->grp_my_rank = MPI_UNDEFINED; new_group->grp_proc_pointers = NULL; OMPI_GROUP_SET_BITMAP(new_group); - + error_exit: /* return */ return new_group; @@ -210,14 +219,13 @@ ompi_group_t *ompi_group_allocate_bmap(int orig_group_size , int group_size) */ void ompi_group_increment_proc_count(ompi_group_t *group) { - int proc; ompi_proc_t * proc_pointer; - for (proc = 0; proc < group->grp_proc_count; proc++) { - proc_pointer = ompi_group_peer_lookup(group,proc); - OBJ_RETAIN(proc_pointer); + for (int proc = 0 ; proc < group->grp_proc_count ; ++proc) { + proc_pointer = ompi_group_peer_lookup_existing (group, proc); + if (proc_pointer) { + OBJ_RETAIN(proc_pointer); + } } - - return; } /* @@ -226,14 +234,13 @@ void ompi_group_increment_proc_count(ompi_group_t *group) void ompi_group_decrement_proc_count(ompi_group_t *group) { - int proc; ompi_proc_t * proc_pointer; - for (proc = 0; proc < group->grp_proc_count; proc++) { - proc_pointer = ompi_group_peer_lookup(group,proc); - OBJ_RELEASE(proc_pointer); + for (int proc = 0 ; proc < group->grp_proc_count ; ++proc) { + proc_pointer = ompi_group_peer_lookup_existing (group, proc); + if (proc_pointer) { + OBJ_RELEASE(proc_pointer); + } } - - return; } /* @@ -255,9 +262,6 @@ static void ompi_group_construct(ompi_group_t *new_group) /* default the sparse values for groups */ new_group->grp_parent_group_ptr = NULL; - - /* return */ - return; } @@ -271,6 +275,12 @@ static void ompi_group_destruct(ompi_group_t *group) the proc counts are not increased during the constructor, either). */ +#if OMPI_GROUP_SPARSE + if (OMPI_GROUP_IS_DENSE(group)) + /* sparse groups do not increment proc reference counters */ +#endif + ompi_group_decrement_proc_count (group); + /* release thegrp_proc_pointers memory */ if (NULL != group->grp_proc_pointers) { free(group->grp_proc_pointers); @@ -289,7 +299,6 @@ static void ompi_group_destruct(ompi_group_t *group) } if (NULL != group->grp_parent_group_ptr){ - ompi_group_decrement_proc_count(group->grp_parent_group_ptr); OBJ_RELEASE(group->grp_parent_group_ptr); } @@ -300,9 +309,6 @@ static void ompi_group_destruct(ompi_group_t *group) opal_pointer_array_set_item(&ompi_group_f_to_c_table, group->grp_f_to_c_index, NULL); } - - /* return */ - return; } @@ -317,7 +323,7 @@ int ompi_group_init(void) OMPI_FORTRAN_HANDLE_MAX, 64) ) { return OMPI_ERROR; } - + /* add MPI_GROUP_NULL to table */ OBJ_CONSTRUCT(&ompi_mpi_group_null, ompi_group_t); ompi_mpi_group_null.group.grp_proc_count = 0; @@ -325,7 +331,7 @@ int ompi_group_init(void) ompi_mpi_group_null.group.grp_proc_pointers = NULL; ompi_mpi_group_null.group.grp_flags |= OMPI_GROUP_DENSE; ompi_mpi_group_null.group.grp_flags |= OMPI_GROUP_INTRINSIC; - + /* add MPI_GROUP_EMPTY to table */ OBJ_CONSTRUCT(&ompi_mpi_group_empty, ompi_group_t); ompi_mpi_group_empty.group.grp_proc_count = 0; @@ -333,7 +339,7 @@ int ompi_group_init(void) ompi_mpi_group_empty.group.grp_proc_pointers = NULL; ompi_mpi_group_empty.group.grp_flags |= OMPI_GROUP_DENSE; ompi_mpi_group_empty.group.grp_flags |= OMPI_GROUP_INTRINSIC; - + return OMPI_SUCCESS; } @@ -350,6 +356,6 @@ int ompi_group_finalize(void) OBJ_DESTRUCT(&ompi_mpi_group_empty); OBJ_DESTRUCT(&ompi_group_f_to_c_table); - + return OMPI_SUCCESS; } diff --git a/ompi/group/group_plist.c b/ompi/group/group_plist.c index e62527015df..62007154f3b 100644 --- a/ompi/group/group_plist.c +++ b/ompi/group/group_plist.c @@ -1,19 +1,21 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ -/* +/* * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2006-2007 University of Houston. All rights reserved. * Copyright (c) 2007 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2013 Los Alamos National Security, LLC. All rights + * Copyright (c) 2013-2015 Los Alamos National Security, LLC. All rights * reserved. + * Copyright (c) 2016 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -29,6 +31,66 @@ #include +static int ompi_group_dense_overlap (ompi_group_t *group1, ompi_group_t *group2, opal_bitmap_t *bitmap) +{ + ompi_proc_t *proc1_pointer, *proc2_pointer; + int rc, overlap_count; + + overlap_count = 0; + + for (int proc1 = 0 ; proc1 < group1->grp_proc_count ; ++proc1) { + proc1_pointer = ompi_group_get_proc_ptr_raw (group1, proc1); + + /* check to see if this proc is in group2 */ + for (int proc2 = 0 ; proc2 < group2->grp_proc_count ; ++proc2) { + proc2_pointer = ompi_group_get_proc_ptr_raw (group2, proc2); + if( proc1_pointer == proc2_pointer ) { + rc = opal_bitmap_set_bit (bitmap, proc2); + if (OPAL_SUCCESS != rc) { + return rc; + } + ++overlap_count; + + break; + } + } /* end proc1 loop */ + } /* end proc loop */ + + return overlap_count; +} + +static struct ompi_proc_t *ompi_group_dense_lookup_raw (ompi_group_t *group, const int peer_id) +{ + if (OPAL_UNLIKELY(ompi_proc_is_sentinel (group->grp_proc_pointers[peer_id]))) { + ompi_proc_t *proc = + (ompi_proc_t *) ompi_proc_lookup (ompi_proc_sentinel_to_name ((uintptr_t) group->grp_proc_pointers[peer_id])); + if (NULL != proc) { + /* replace sentinel value with an actual ompi_proc_t */ + group->grp_proc_pointers[peer_id] = proc; + /* retain the proc */ + OBJ_RETAIN(group->grp_proc_pointers[peer_id]); + } + } + + return group->grp_proc_pointers[peer_id]; +} + +ompi_proc_t *ompi_group_get_proc_ptr_raw (ompi_group_t *group, int rank) +{ +#if OMPI_GROUP_SPARSE + do { + if (OMPI_GROUP_IS_DENSE(group)) { + return ompi_group_dense_lookup_raw (group, rank); + } + int ranks1 = rank; + ompi_group_translate_ranks (group, 1, &ranks1, group->grp_parent_group_ptr, &rank); + group = group->grp_parent_group_ptr; + } while (1); +#else + return ompi_group_dense_lookup_raw (group, rank); +#endif +} + int ompi_group_calc_plist ( int n , const int *ranks ) { return sizeof(char *) * n ; } @@ -37,10 +99,9 @@ int ompi_group_incl_plist(ompi_group_t* group, int n, const int *ranks, ompi_group_t **new_group) { /* local variables */ - int proc,my_group_rank; + int my_group_rank; ompi_group_t *group_pointer, *new_group_pointer; - ompi_proc_t *my_proc_pointer; - + group_pointer = (ompi_group_t *)group; if ( 0 == n ) { @@ -56,9 +117,9 @@ int ompi_group_incl_plist(ompi_group_t* group, int n, const int *ranks, } /* put group elements in the list */ - for (proc = 0; proc < n; proc++) { - new_group_pointer->grp_proc_pointers[proc] = - ompi_group_peer_lookup(group_pointer,ranks[proc]); + for (int proc = 0; proc < n; proc++) { + new_group_pointer->grp_proc_pointers[proc] = + ompi_group_get_proc_ptr_raw (group_pointer, ranks[proc]); } /* end proc loop */ /* increment proc reference counters */ @@ -67,10 +128,8 @@ int ompi_group_incl_plist(ompi_group_t* group, int n, const int *ranks, /* find my rank */ my_group_rank=group_pointer->grp_my_rank; if (MPI_UNDEFINED != my_group_rank) { - my_proc_pointer=ompi_group_peer_lookup (group_pointer,my_group_rank); - ompi_set_group_rank(new_group_pointer,my_proc_pointer); - } - else { + ompi_set_group_rank(new_group_pointer, ompi_proc_local_proc); + } else { new_group_pointer->grp_my_rank = MPI_UNDEFINED; } @@ -79,223 +138,155 @@ int ompi_group_incl_plist(ompi_group_t* group, int n, const int *ranks, return OMPI_SUCCESS; } -/* - * Group Union has to use the dense format since we don't support +/* + * Group Union has to use the dense format since we don't support * two parent groups in the group structure and maintain functions */ -int ompi_group_union (ompi_group_t* group1, ompi_group_t* group2, - ompi_group_t **new_group) +int ompi_group_union (ompi_group_t* group1, ompi_group_t* group2, + ompi_group_t **new_group) { /* local variables */ - int new_group_size, proc1, proc2, found_in_group; - int my_group_rank, cnt; - ompi_group_t *group1_pointer, *group2_pointer, *new_group_pointer; - ompi_proc_t *proc1_pointer, *proc2_pointer, *my_proc_pointer = NULL; - - group1_pointer = (ompi_group_t *) group1; - group2_pointer = (ompi_group_t *) group2; + int new_group_size, cnt, rc, overlap_count; + ompi_group_t *new_group_pointer; + ompi_proc_t *proc2_pointer; + opal_bitmap_t bitmap; /* * form union */ /* get new group size */ - new_group_size = group1_pointer->grp_proc_count; + OBJ_CONSTRUCT(&bitmap, opal_bitmap_t); + rc = opal_bitmap_init (&bitmap, 32); + if (OPAL_SUCCESS != rc) { + return rc; + } /* check group2 elements to see if they need to be included in the list */ - for (proc2 = 0; proc2 < group2_pointer->grp_proc_count; proc2++) { - proc2_pointer = ompi_group_peer_lookup(group2_pointer,proc2); - - /* check to see if this proc2 is alread in the group */ - found_in_group = 0; - for (proc1 = 0; proc1 < group1_pointer->grp_proc_count; proc1++) { - proc1_pointer = ompi_group_peer_lookup(group1_pointer,proc1); - - if (proc1_pointer == proc2_pointer) { - /* proc2 is in group1 - don't double count */ - found_in_group = 1; - break; - } - } /* end proc1 loop */ - - if (found_in_group) { - continue; - } - - new_group_size++; - } /* end proc loop */ + overlap_count = ompi_group_dense_overlap (group1, group2, &bitmap); + if (0 > overlap_count) { + OBJ_DESTRUCT(&bitmap); + return overlap_count; + } + new_group_size = group1->grp_proc_count + group2->grp_proc_count - overlap_count; if ( 0 == new_group_size ) { *new_group = MPI_GROUP_EMPTY; OBJ_RETAIN(MPI_GROUP_EMPTY); + OBJ_DESTRUCT(&bitmap); return MPI_SUCCESS; } /* get new group struct */ new_group_pointer = ompi_group_allocate(new_group_size); if (NULL == new_group_pointer) { + OBJ_DESTRUCT(&bitmap); return MPI_ERR_GROUP; } /* fill in the new group list */ /* put group1 elements in the list */ - for (proc1 = 0; proc1 < group1_pointer->grp_proc_count; proc1++) { - new_group_pointer->grp_proc_pointers[proc1] = - ompi_group_peer_lookup(group1_pointer,proc1); + for (int proc1 = 0; proc1 < group1->grp_proc_count; ++proc1) { + new_group_pointer->grp_proc_pointers[proc1] = + ompi_group_get_proc_ptr_raw (group1, proc1); } - cnt = group1_pointer->grp_proc_count; + cnt = group1->grp_proc_count; /* check group2 elements to see if they need to be included in the list */ - for (proc2 = 0; proc2 < group2_pointer->grp_proc_count; proc2++) { - proc2_pointer = ompi_group_peer_lookup(group2_pointer,proc2); - - /* check to see if this proc2 is alread in the group */ - found_in_group = 0; - for (proc1 = 0; proc1 < group1_pointer->grp_proc_count; proc1++) { - proc1_pointer = ompi_group_peer_lookup(group1_pointer,proc1); - - if (proc1_pointer == proc2_pointer) { - /* proc2 is in group1 - don't double count */ - found_in_group = 1; - break; - } - } /* end proc1 loop */ - - if (found_in_group) { + for (int proc2 = 0; proc2 < group2->grp_proc_count; ++proc2) { + if (opal_bitmap_is_set_bit (&bitmap, proc2)) { continue; } - new_group_pointer->grp_proc_pointers[cnt] = - ompi_group_peer_lookup(group2_pointer,proc2); - cnt++; + proc2_pointer = ompi_group_get_proc_ptr_raw (group2, proc2); + new_group_pointer->grp_proc_pointers[cnt++] = proc2_pointer; } /* end proc loop */ + OBJ_DESTRUCT(&bitmap); + /* increment proc reference counters */ ompi_group_increment_proc_count(new_group_pointer); /* find my rank */ - my_group_rank = group1_pointer->grp_my_rank; - if (MPI_UNDEFINED == my_group_rank) { - my_group_rank = group2_pointer->grp_my_rank; - if ( MPI_UNDEFINED != my_group_rank) { - my_proc_pointer = ompi_group_peer_lookup(group2_pointer,my_group_rank); - } + if (MPI_UNDEFINED != group1->grp_my_rank || MPI_UNDEFINED != group2->grp_my_rank) { + ompi_set_group_rank(new_group_pointer, ompi_proc_local_proc); } else { - my_proc_pointer = ompi_group_peer_lookup(group1_pointer,my_group_rank); - } - - if ( MPI_UNDEFINED == my_group_rank ) { new_group_pointer->grp_my_rank = MPI_UNDEFINED; } - else { - ompi_set_group_rank(new_group_pointer, my_proc_pointer); - } *new_group = (MPI_Group) new_group_pointer; - return OMPI_SUCCESS; } -/* - * Group Difference has to use the dense format since we don't support +/* + * Group Difference has to use the dense format since we don't support * two parent groups in the group structure and maintain functions */ int ompi_group_difference(ompi_group_t* group1, ompi_group_t* group2, ompi_group_t **new_group) { /* local varibles */ - int new_group_size, proc1, proc2, found_in_group2, cnt; - int my_group_rank; - ompi_group_t *group1_pointer, *group2_pointer, *new_group_pointer; - ompi_proc_t *proc1_pointer, *proc2_pointer, *my_proc_pointer = NULL; - - - group1_pointer=(ompi_group_t *)group1; - group2_pointer=(ompi_group_t *)group2; + int new_group_size, overlap_count, rc; + ompi_group_t *new_group_pointer; + ompi_proc_t *proc1_pointer; + opal_bitmap_t bitmap; /* * form union */ /* get new group size */ - new_group_size=0; + OBJ_CONSTRUCT(&bitmap, opal_bitmap_t); + rc = opal_bitmap_init (&bitmap, 32); + if (OPAL_SUCCESS != rc) { + return rc; + } - /* loop over group1 members */ - for( proc1=0; proc1 < group1_pointer->grp_proc_count; proc1++ ) { - proc1_pointer = ompi_group_peer_lookup(group1_pointer,proc1); - /* check to see if this proc is in group2 */ - found_in_group2=0; - for( proc2=0 ; proc2 < group2_pointer->grp_proc_count ; proc2++ ) { - proc2_pointer = ompi_group_peer_lookup(group2_pointer,proc2); - if( proc1_pointer == proc2_pointer ) { - found_in_group2=true; - break; - } - } /* end proc1 loop */ - if(found_in_group2) { - continue; - } - new_group_size++; - } /* end proc loop */ + /* check group2 elements to see if they need to be included in the list */ + overlap_count = ompi_group_dense_overlap (group2, group1, &bitmap); + if (0 > overlap_count) { + OBJ_DESTRUCT(&bitmap); + return overlap_count; + } + new_group_size = group1->grp_proc_count - overlap_count; if ( 0 == new_group_size ) { *new_group = MPI_GROUP_EMPTY; OBJ_RETAIN(MPI_GROUP_EMPTY); + OBJ_DESTRUCT(&bitmap); return MPI_SUCCESS; } /* allocate a new ompi_group_t structure */ - new_group_pointer=ompi_group_allocate(new_group_size); + new_group_pointer = ompi_group_allocate(new_group_size); if( NULL == new_group_pointer ) { + OBJ_DESTRUCT(&bitmap); return MPI_ERR_GROUP; } /* fill in group list */ - cnt=0; /* loop over group1 members */ - for( proc1=0; proc1 < group1_pointer->grp_proc_count; proc1++ ) { - proc1_pointer = ompi_group_peer_lookup(group1_pointer,proc1); - /* check to see if this proc is in group2 */ - found_in_group2=0; - for( proc2=0 ; proc2 < group2_pointer->grp_proc_count ; proc2++ ) { - proc2_pointer = ompi_group_peer_lookup(group2_pointer,proc2); - if( proc1_pointer == proc2_pointer ) { - found_in_group2=true; - break; - } - } /* end proc1 loop */ - if(found_in_group2) { + for (int proc1 = 0, cnt = 0 ; proc1 < group1->grp_proc_count ; ++proc1) { + if (opal_bitmap_is_set_bit (&bitmap, proc1)) { continue; } - new_group_pointer->grp_proc_pointers[cnt] = - ompi_group_peer_lookup(group1_pointer,proc1); - - cnt++; + proc1_pointer = ompi_group_get_proc_ptr_raw (group1, proc1); + new_group_pointer->grp_proc_pointers[cnt++] = proc1_pointer; } /* end proc loop */ + OBJ_DESTRUCT(&bitmap); + /* increment proc reference counters */ ompi_group_increment_proc_count(new_group_pointer); /* find my rank */ - my_group_rank=group1_pointer->grp_my_rank; - if ( MPI_UNDEFINED != my_group_rank ) { - my_proc_pointer = ompi_group_peer_lookup(group1_pointer,my_group_rank); - } - else { - my_group_rank=group2_pointer->grp_my_rank; - if ( MPI_UNDEFINED != my_group_rank ) { - my_proc_pointer = ompi_group_peer_lookup(group2_pointer,my_group_rank); - } - } - - if ( MPI_UNDEFINED == my_group_rank ) { + if (MPI_UNDEFINED == group1->grp_my_rank || MPI_UNDEFINED != group2->grp_my_rank) { new_group_pointer->grp_my_rank = MPI_UNDEFINED; - } - else { - ompi_set_group_rank(new_group_pointer,my_proc_pointer); + } else { + ompi_set_group_rank(new_group_pointer, ompi_proc_local_proc); } *new_group = (MPI_Group)new_group_pointer; diff --git a/ompi/group/group_set_rank.c b/ompi/group/group_set_rank.c index 27bee6faf12..16b84017433 100644 --- a/ompi/group/group_set_rank.c +++ b/ompi/group/group_set_rank.c @@ -1,3 +1,4 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ /* * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana * University Research and Technology @@ -5,15 +6,17 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2006-2007 University of Houston. All rights reserved. + * Copyright (c) 2015 Los Alamos National Security, LLC. All rights + * reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -38,12 +41,10 @@ void ompi_set_group_rank(ompi_group_t *group, struct ompi_proc_t *proc_pointer) for (proc = 0; proc < group->grp_proc_count; proc++) { /* check and see if this proc pointer matches proc_pointer */ - if (ompi_group_peer_lookup(group,proc) == proc_pointer) { + if (ompi_group_peer_lookup_existing (group, proc) == proc_pointer) { group->grp_my_rank = proc; - } + break; + } } /* end proc loop */ } - - /* return */ - return; } diff --git a/ompi/group/group_sporadic.c b/ompi/group/group_sporadic.c index 8053ceda976..46e0d6bc9f8 100644 --- a/ompi/group/group_sporadic.c +++ b/ompi/group/group_sporadic.c @@ -1,12 +1,12 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ -/* +/* * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. @@ -15,9 +15,9 @@ * Copyright (c) 2013 Los Alamos National Security, LLC. All rights * reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -27,8 +27,8 @@ #include "mpi.h" int ompi_group_calc_sporadic ( int n , const int *ranks) -{ - int i,l=0; +{ + int i,l=0; for (i=0 ; isparse_data.grp_sporadic.grp_sporadic_list_len ; i++) { - if( child_group->sparse_data.grp_sporadic.grp_sporadic_list[i].rank_first - <= ranks1[j] && ranks1[j] <= - child_group->sparse_data.grp_sporadic.grp_sporadic_list[i].rank_first + - child_group->sparse_data.grp_sporadic.grp_sporadic_list[i].length -1 ) { - + for(i=0 ; i sparse_data.grp_sporadic.grp_sporadic_list_len ; i++) { + if( child_group->sparse_data.grp_sporadic.grp_sporadic_list[i].rank_first + <= ranks1[j] && ranks1[j] <= + child_group->sparse_data.grp_sporadic.grp_sporadic_list[i].rank_first + + child_group->sparse_data.grp_sporadic.grp_sporadic_list[i].length -1 ) { + ranks2[j] = ranks1[j] - child_group-> - sparse_data.grp_sporadic.grp_sporadic_list[i].rank_first + count; + sparse_data.grp_sporadic.grp_sporadic_list[i].rank_first + count; break; } else { @@ -80,13 +80,13 @@ int ompi_group_translate_ranks_sporadic ( ompi_group_t *parent_group, return OMPI_SUCCESS; } /* from child group to parent group*/ -int ompi_group_translate_ranks_sporadic_reverse ( ompi_group_t *child_group, +int ompi_group_translate_ranks_sporadic_reverse ( ompi_group_t *child_group, int n_ranks, const int *ranks1, - ompi_group_t *parent_group, - int *ranks2) + ompi_group_t *parent_group, + int *ranks2) { int i,j,count; - + for (j=0 ; jsparse_data.grp_sporadic.grp_sporadic_list_len ; i++) { - if ( ranks1[j] > ( count + - child_group->sparse_data.grp_sporadic.grp_sporadic_list[i].length + for (i=0 ; isparse_data.grp_sporadic.grp_sporadic_list_len ; i++) { + if ( ranks1[j] > ( count + + child_group->sparse_data.grp_sporadic.grp_sporadic_list[i].length - 1) ) { count = count + child_group->sparse_data.grp_sporadic.grp_sporadic_list[i].length; - } - else { - ranks2[j] = child_group->sparse_data.grp_sporadic.grp_sporadic_list[i].rank_first - + (ranks1[j] - count); + } + else { + ranks2[j] = child_group->sparse_data.grp_sporadic.grp_sporadic_list[i].rank_first + + (ranks1[j] - count); break; } } @@ -120,19 +120,19 @@ int ompi_group_incl_spor(ompi_group_t* group, int n, const int *ranks, /* local variables */ int my_group_rank,l,i,j,proc_count; ompi_group_t *group_pointer, *new_group_pointer; - + group_pointer = (ompi_group_t *)group; - + if (0 == n) { *new_group = MPI_GROUP_EMPTY; OBJ_RETAIN(MPI_GROUP_EMPTY); return OMPI_SUCCESS; } - + l=0; j=0; proc_count = 0; - + for(i=0 ; i + + new_group_pointer -> sparse_data.grp_sporadic.grp_sporadic_list[j].rank_first = ranks[0]; - new_group_pointer -> + new_group_pointer -> sparse_data.grp_sporadic.grp_sporadic_list[j].length = 1; - + for(i=1 ; i sparse_data.grp_sporadic.grp_sporadic_list[j].length ++; } else { j++; - new_group_pointer -> + new_group_pointer -> sparse_data.grp_sporadic.grp_sporadic_list[j].rank_first = ranks[i]; - new_group_pointer -> + new_group_pointer -> sparse_data.grp_sporadic.grp_sporadic_list[j].length = 1; } } - + new_group_pointer->sparse_data.grp_sporadic.grp_sporadic_list_len = j+1; new_group_pointer -> grp_parent_group_ptr = group_pointer; - + OBJ_RETAIN(new_group_pointer -> grp_parent_group_ptr); ompi_group_increment_proc_count(new_group_pointer -> grp_parent_group_ptr); - - for(i=0 ; isparse_data.grp_sporadic.grp_sporadic_list_len ; i++) { - proc_count = proc_count + new_group_pointer -> + + for(i=0 ; isparse_data.grp_sporadic.grp_sporadic_list_len ; i++) { + proc_count = proc_count + new_group_pointer -> sparse_data.grp_sporadic.grp_sporadic_list[i].length; } new_group_pointer->grp_proc_count = proc_count; - + ompi_group_increment_proc_count(new_group_pointer); my_group_rank=group_pointer->grp_my_rank; ompi_group_translate_ranks (group_pointer,1,&my_group_rank, new_group_pointer,&new_group_pointer->grp_my_rank); - + *new_group = (MPI_Group)new_group_pointer; - + return OMPI_SUCCESS; } diff --git a/ompi/group/group_strided.c b/ompi/group/group_strided.c index b38a3015f4e..d5f7f2848a8 100644 --- a/ompi/group/group_strided.c +++ b/ompi/group/group_strided.c @@ -1,12 +1,12 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ -/* +/* * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. @@ -15,9 +15,9 @@ * Copyright (c) 2013 Los Alamos National Security, LLC. All rights * reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -33,15 +33,15 @@ int ompi_group_calc_strided ( int n , const int *ranks ) { return -1; } else { - return (sizeof(int)*3); + return (sizeof(int)*3); } } /* from parent group to child group*/ -int ompi_group_translate_ranks_strided (ompi_group_t *parent_group, +int ompi_group_translate_ranks_strided (ompi_group_t *parent_group, int n_ranks, const int *ranks1, - ompi_group_t *child_group, - int *ranks2) + ompi_group_t *child_group, + int *ranks2) { int s,o,l,i; s = child_group->sparse_data.grp_strided.grp_strided_stride; @@ -53,7 +53,7 @@ int ompi_group_translate_ranks_strided (ompi_group_t *parent_group, } else { ranks2[i] = MPI_UNDEFINED; - + if ( (ranks1[i]-o) >= 0 && (ranks1[i]-o)%s == 0 && ranks1[i] <= l) { ranks2[i] = (ranks1[i] - o)/s; } @@ -63,14 +63,14 @@ int ompi_group_translate_ranks_strided (ompi_group_t *parent_group, } /* from child group to parent group*/ -int ompi_group_translate_ranks_strided_reverse (ompi_group_t *child_group, +int ompi_group_translate_ranks_strided_reverse (ompi_group_t *child_group, int n_ranks, const int *ranks1, - ompi_group_t *parent_group, - int *ranks2) + ompi_group_t *parent_group, + int *ranks2) { int s,o,i; s = child_group->sparse_data.grp_strided.grp_strided_stride; - o = child_group->sparse_data.grp_strided.grp_strided_offset; + o = child_group->sparse_data.grp_strided.grp_strided_offset; for (i = 0; i < n_ranks; i++) { if ( MPI_PROC_NULL == ranks1[i]) { ranks2[i] = MPI_PROC_NULL; @@ -98,7 +98,7 @@ static int check_stride(const int incl[],int incllen) { return -1; } } - return s; + return s; } int ompi_group_incl_strided(ompi_group_t* group, int n, const int *ranks, @@ -107,9 +107,9 @@ int ompi_group_incl_strided(ompi_group_t* group, int n, const int *ranks, /* local variables */ int my_group_rank,stride; ompi_group_t *group_pointer, *new_group_pointer; - + group_pointer = (ompi_group_t *)group; - + if ( 0 == n ) { *new_group = MPI_GROUP_EMPTY; OBJ_RETAIN(MPI_GROUP_EMPTY); @@ -130,7 +130,7 @@ int ompi_group_incl_strided(ompi_group_t* group, int n, const int *ranks, new_group_pointer -> sparse_data.grp_strided.grp_strided_offset = ranks[0]; new_group_pointer -> sparse_data.grp_strided.grp_strided_last_element = ranks[n-1]; new_group_pointer -> grp_proc_count = n; - + ompi_group_increment_proc_count(new_group_pointer); my_group_rank = group_pointer->grp_my_rank; ompi_group_translate_ranks (new_group_pointer->grp_parent_group_ptr,1,&my_group_rank, diff --git a/ompi/include/Makefile.am b/ompi/include/Makefile.am index 62768eccc35..7a37223619d 100644 --- a/ompi/include/Makefile.am +++ b/ompi/include/Makefile.am @@ -5,7 +5,7 @@ # Copyright (c) 2004-2005 The University of Tennessee and The University # of Tennessee Research Foundation. All rights # reserved. -# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, # University of Stuttgart. All rights reserved. # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. @@ -14,9 +14,9 @@ # Copyright (c) 2014-2015 Research Organization for Information Science # and Technology (RIST). All rights reserved. # $COPYRIGHT$ -# +# # Additional copyrights may follow -# +# # $HEADER$ # @@ -28,7 +28,7 @@ headers = ompi_config.h nodist_headers = # Install these in $(includedir) -include_HEADERS = +include_HEADERS = # Always install these in $(pkgincludedir) pkginclude_HEADERS = @@ -53,7 +53,8 @@ nodist_include_HEADERS = \ mpif.h \ mpif-ext.h \ mpif-sizeof.h \ - mpi_portable_platform.h + mpif-c-constants-decl.h \ + mpi_portable_platform.h if OMPI_BUILD_FORTRAN_MPIFH_BINDINGS nodist_include_HEADERS += \ @@ -62,8 +63,8 @@ endif include ompi/Makefile.am -# This is complicated, but mpif-values.pl generates -# several mpif-*.h files in this directory (during autogen.pl). +# This is complicated, but mpif-values.pl generates several +# mpif-*.h files in this directory (during autogen.pl). # Hence, if any of those files change, it's safer to just force the # user to re-autogen. #mpif.h: mpif-constants.h mpif-handles.h mpif-io-constants.h mpif-io-handles.h @@ -88,7 +89,22 @@ mpif-sizeof.h: --real16=$(OMPI_HAVE_FORTRAN_REAL16) \ --complex32=$(OMPI_HAVE_FORTRAN_COMPLEX32) -if WANT_INSTALL_HEADERS +# +# mpif-c-constants-decl.h, among other files, is generated based on some +# results from configure tests. +# + +mpif_mangling_pl=$(top_srcdir)/ompi/mpi/fortran/base/gen-mpi-mangling.pl +mpif-c-constants-decl.h: $(top_builddir)/config.status +mpif-c-constants-decl.h: $(mpif_mangling_pl) +mpif-c-constants-decl.h: + $(OMPI_V_GEN) $(mpif_mangling_pl) \ + --caps $(OMPI_FORTRAN_CAPS) \ + --plain $(OMPI_FORTRAN_PLAIN) \ + --single $(OMPI_FORTRAN_SINGLE_UNDERSCORE) \ + --double $(OMPI_FORTRAN_DOUBLE_UNDERSCORE) + +if WANT_INSTALL_HEADERS ompidir = $(ompiincludedir) nobase_dist_ompi_HEADERS = $(headers) nobase_nodist_ompi_HEADERS = $(nodist_headers) @@ -103,7 +119,9 @@ CLEANFILES = mpif-sizeof.f90 # Remove the auto-generated files (they are generated by configure) # Since there is no mpi-ext.h.in, autogen does not know to cleanup this file. distclean-local: - rm -f mpi-ext.h mpif-ext.h mpi_portable_platform.h mpif-sizeof.h + rm -f mpi-ext.h mpif-ext.h mpi_portable_platform.h \ + mpif-sizeof.h \ + mpif-c-constants-decl.h mpif-c-constants.h mpif-f08-types.h mpi_portable_platform.h: $(top_srcdir)/opal/include/opal/opal_portable_platform.h -@rm -f mpi_portable_platform.h diff --git a/ompi/include/mpi.h.in b/ompi/include/mpi.h.in index 31996d978b1..de12a12f0dc 100644 --- a/ompi/include/mpi.h.in +++ b/ompi/include/mpi.h.in @@ -9,13 +9,17 @@ * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. - * Copyright (c) 2007-2014 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2007-2016 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2008-2009 Sun Microsystems, Inc. All rights reserved. * Copyright (c) 2009-2012 Oak Rigde National Laboratory. All rights reserved. * Copyright (c) 2011 Sandia National Laboratories. All rights reserved. * Copyright (c) 2012-2015 Los Alamos National Security, LLC. All rights * reserved. * Copyright (c) 2011-2013 INRIA. All rights reserved. + * Copyright (c) 2015 University of Houston. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. + * Copyright (c) 2017 IBM Corporation. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -238,7 +242,7 @@ * MPI version */ #define MPI_VERSION 3 -#define MPI_SUBVERSION 0 +#define MPI_SUBVERSION 1 /* @@ -431,8 +435,8 @@ typedef int (MPI_Grequest_cancel_function)(void *, int); /* * More constants */ -#define MPI_UNWEIGHTED ((void *) 2) /* unweighted graph */ -#define MPI_WEIGHTS_EMPTY ((void *) 3) /* empty weights */ +#define MPI_UNWEIGHTED ((int *) 2) /* unweighted graph */ +#define MPI_WEIGHTS_EMPTY ((int *) 3) /* empty weights */ #define MPI_BOTTOM ((void *) 0) /* base reference address */ #define MPI_IN_PLACE ((void *) 1) /* in place buffer */ #define MPI_BSEND_OVERHEAD 128 /* size of bsend header + ptr */ @@ -604,6 +608,7 @@ enum { #define MPI_ERR_RMA_FLAVOR 70 #define MPI_ERR_RMA_SHARED 71 #define MPI_T_ERR_INVALID 72 +#define MPI_T_ERR_INVALID_NAME 73 /* Per MPI-3 p349 47, MPI_ERR_LASTCODE must be >= the last predefined MPI_ERR_ code. Set the last code to allow some room for adding @@ -771,6 +776,7 @@ enum { */ #define MPI_T_PVAR_ALL_HANDLES ((MPI_T_pvar_handle) -1) #define MPI_T_PVAR_HANDLE_NULL ((MPI_T_pvar_handle) 0) +#define MPI_T_PVAR_SESSION_NULL ((MPI_T_pvar_session) 0) #define MPI_T_CVAR_HANDLE_NULL ((MPI_T_cvar_handle) 0) /* MPI-2 specifies that the name "MPI_TYPE_NULL_DELETE_FN" (and all @@ -1159,6 +1165,7 @@ OMPI_DECLSPEC extern MPI_Fint *MPI_F_STATUSES_IGNORE; #define MPI_C_LONG_DOUBLE_COMPLEX OMPI_PREDEFINED_GLOBAL(MPI_Datatype, ompi_mpi_c_long_double_complex) #endif #define MPI_CXX_BOOL OMPI_PREDEFINED_GLOBAL(MPI_Datatype, ompi_mpi_cxx_bool) +#define MPI_CXX_COMPLEX OMPI_PREDEFINED_GLOBAL(MPI_Datatype, ompi_mpi_cxx_cplex) #define MPI_CXX_FLOAT_COMPLEX OMPI_PREDEFINED_GLOBAL(MPI_Datatype, ompi_mpi_cxx_cplex) #define MPI_CXX_DOUBLE_COMPLEX OMPI_PREDEFINED_GLOBAL(MPI_Datatype, ompi_mpi_cxx_dblcplex) #define MPI_CXX_LONG_DOUBLE_COMPLEX OMPI_PREDEFINED_GLOBAL(MPI_Datatype, ompi_mpi_cxx_ldblcplex) @@ -1174,6 +1181,11 @@ OMPI_DECLSPEC extern MPI_Fint *MPI_F_STATUSES_IGNORE; #define MPI_TYPECLASS_REAL 2 #define MPI_TYPECLASS_COMPLEX 3 +/* Aint helper macros (MPI-3.1) */ +#define MPI_Aint_add(base, disp) ((MPI_Aint) ((char *) (base) + (disp))) +#define MPI_Aint_diff(addr1, addr2) ((MPI_Aint) ((char *) (addr1) - (char *) (addr2))) +#define PMPI_Aint_add(base, disp) MPI_Aint_add(base, disp) +#define PMPI_Aint_diff(addr1, addr2) MPI_Aint_diff(addr1, addr2) /* * MPI API @@ -1326,7 +1338,7 @@ OMPI_DECLSPEC int MPI_Comm_spawn_multiple(int count, char *array_of_commands[], OMPI_DECLSPEC int MPI_Comm_split(MPI_Comm comm, int color, int key, MPI_Comm *newcomm); OMPI_DECLSPEC int MPI_Comm_split_type(MPI_Comm comm, int split_type, int key, MPI_Info info, MPI_Comm *newcomm); OMPI_DECLSPEC int MPI_Comm_test_inter(MPI_Comm comm, int *flag); -OMPI_DECLSPEC int MPI_Compare_and_swap(void *origin_addr, void *compare_addr, +OMPI_DECLSPEC int MPI_Compare_and_swap(const void *origin_addr, const void *compare_addr, void *result_addr, MPI_Datatype datatype, int target_rank, MPI_Aint target_disp, MPI_Win win); OMPI_DECLSPEC int MPI_Dims_create(int nnodes, int ndims, int dims[]); @@ -1344,7 +1356,7 @@ OMPI_DECLSPEC int MPI_Error_class(int errorcode, int *errorclass); OMPI_DECLSPEC int MPI_Error_string(int errorcode, char *string, int *resultlen); OMPI_DECLSPEC int MPI_Exscan(const void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, MPI_Op op, MPI_Comm comm); -OMPI_DECLSPEC int MPI_Fetch_and_op(void *origin_addr, void *result_addr, MPI_Datatype datatype, +OMPI_DECLSPEC int MPI_Fetch_and_op(const void *origin_addr, void *result_addr, MPI_Datatype datatype, int target_rank, MPI_Aint target_disp, MPI_Op op, MPI_Win win); OMPI_DECLSPEC int MPI_Iexscan(const void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, MPI_Op op, MPI_Comm comm, MPI_Request *request); @@ -1384,6 +1396,10 @@ OMPI_DECLSPEC int MPI_File_iread_at(MPI_File fh, MPI_Offset offset, void *buf, int count, MPI_Datatype datatype, MPI_Request *request); OMPI_DECLSPEC int MPI_File_iwrite_at(MPI_File fh, MPI_Offset offset, const void *buf, int count, MPI_Datatype datatype, MPI_Request *request); +OMPI_DECLSPEC int MPI_File_iread_at_all(MPI_File fh, MPI_Offset offset, void *buf, + int count, MPI_Datatype datatype, MPI_Request *request); +OMPI_DECLSPEC int MPI_File_iwrite_at_all(MPI_File fh, MPI_Offset offset, const void *buf, + int count, MPI_Datatype datatype, MPI_Request *request); OMPI_DECLSPEC int MPI_File_read(MPI_File fh, void *buf, int count, MPI_Datatype datatype, MPI_Status *status); OMPI_DECLSPEC int MPI_File_read_all(MPI_File fh, void *buf, int count, @@ -1396,6 +1412,10 @@ OMPI_DECLSPEC int MPI_File_iread(MPI_File fh, void *buf, int count, MPI_Datatype datatype, MPI_Request *request); OMPI_DECLSPEC int MPI_File_iwrite(MPI_File fh, const void *buf, int count, MPI_Datatype datatype, MPI_Request *request); +OMPI_DECLSPEC int MPI_File_iread_all(MPI_File fh, void *buf, int count, + MPI_Datatype datatype, MPI_Request *request); +OMPI_DECLSPEC int MPI_File_iwrite_all(MPI_File fh, const void *buf, int count, + MPI_Datatype datatype, MPI_Request *request); OMPI_DECLSPEC int MPI_File_seek(MPI_File fh, MPI_Offset offset, int whence); OMPI_DECLSPEC int MPI_File_get_position(MPI_File fh, MPI_Offset *offset); OMPI_DECLSPEC int MPI_File_get_byte_offset(MPI_File fh, MPI_Offset offset, @@ -1612,8 +1632,8 @@ OMPI_DECLSPEC int MPI_Put(const void *origin_addr, int origin_count, MPI_Dataty int target_rank, MPI_Aint target_disp, int target_count, MPI_Datatype target_datatype, MPI_Win win); OMPI_DECLSPEC int MPI_Query_thread(int *provided); -OMPI_DECLSPEC int MPI_Raccumulate(void *origin_addr, int origin_count, MPI_Datatype origin_datatype, - int target_rank, MPI_Aint target_disp, int target_count, +OMPI_DECLSPEC int MPI_Raccumulate(const void *origin_addr, int origin_count, MPI_Datatype origin_datatype, + int target_rank, MPI_Aint target_disp, int target_count, MPI_Datatype target_datatype, MPI_Op op, MPI_Win win, MPI_Request *request); OMPI_DECLSPEC int MPI_Recv_init(void *buf, int count, MPI_Datatype datatype, int source, int tag, MPI_Comm comm, MPI_Request *request); @@ -1643,16 +1663,16 @@ OMPI_DECLSPEC MPI_Request MPI_Request_f2c(MPI_Fint request); OMPI_DECLSPEC int MPI_Request_free(MPI_Request *request); OMPI_DECLSPEC int MPI_Request_get_status(MPI_Request request, int *flag, MPI_Status *status); -OMPI_DECLSPEC int MPI_Rget(void *origin_addr, int origin_count, MPI_Datatype origin_datatype, +OMPI_DECLSPEC int MPI_Rget(void *origin_addr, int origin_count, MPI_Datatype origin_datatype, int target_rank, MPI_Aint target_disp, int target_count, MPI_Datatype target_datatype, MPI_Win win, MPI_Request *request); OMPI_DECLSPEC int MPI_Rget_accumulate(const void *origin_addr, int origin_count, MPI_Datatype origin_datatype, void *result_addr, int result_count, MPI_Datatype result_datatype, - int target_rank, MPI_Aint target_disp, int target_count, + int target_rank, MPI_Aint target_disp, int target_count, MPI_Datatype target_datatype, MPI_Op op, MPI_Win win, MPI_Request *request); OMPI_DECLSPEC int MPI_Rput(const void *origin_addr, int origin_count, MPI_Datatype origin_datatype, - int target_rank, MPI_Aint target_disp, int target_cout, + int target_rank, MPI_Aint target_disp, int target_cout, MPI_Datatype target_datatype, MPI_Win win, MPI_Request *request); OMPI_DECLSPEC int MPI_Rsend(const void *ibuf, int count, MPI_Datatype datatype, int dest, int tag, MPI_Comm comm); @@ -1836,7 +1856,7 @@ OMPI_DECLSPEC int MPI_Win_create_keyval(MPI_Win_copy_attr_function *win_copy_at MPI_Win_delete_attr_function *win_delete_attr_fn, int *win_keyval, void *extra_state); OMPI_DECLSPEC int MPI_Win_delete_attr(MPI_Win win, int win_keyval); -OMPI_DECLSPEC int MPI_Win_detach(MPI_Win win, void *base); +OMPI_DECLSPEC int MPI_Win_detach(MPI_Win win, const void *base); OMPI_DECLSPEC MPI_Win MPI_Win_f2c(MPI_Fint win); OMPI_DECLSPEC int MPI_Win_fence(int assert, MPI_Win win); OMPI_DECLSPEC int MPI_Win_flush(int rank, MPI_Win win); @@ -2018,7 +2038,7 @@ OMPI_DECLSPEC int PMPI_Comm_spawn_multiple(int count, char *array_of_commands[] OMPI_DECLSPEC int PMPI_Comm_split(MPI_Comm comm, int color, int key, MPI_Comm *newcomm); OMPI_DECLSPEC int PMPI_Comm_split_type(MPI_Comm comm, int split_type, int key, MPI_Info info, MPI_Comm *newcomm); OMPI_DECLSPEC int PMPI_Comm_test_inter(MPI_Comm comm, int *flag); -OMPI_DECLSPEC int PMPI_Compare_and_swap(void *origin_addr, void *compare_addr, +OMPI_DECLSPEC int PMPI_Compare_and_swap(const void *origin_addr, const void *compare_addr, void *result_addr, MPI_Datatype datatype, int target_rank, MPI_Aint target_disp, MPI_Win win); OMPI_DECLSPEC int PMPI_Dims_create(int nnodes, int ndims, int dims[]); @@ -2036,7 +2056,7 @@ OMPI_DECLSPEC int PMPI_Error_class(int errorcode, int *errorclass); OMPI_DECLSPEC int PMPI_Error_string(int errorcode, char *string, int *resultlen); OMPI_DECLSPEC int PMPI_Exscan(const void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, MPI_Op op, MPI_Comm comm); -OMPI_DECLSPEC int PMPI_Fetch_and_op(void *origin_addr, void *result_addr, MPI_Datatype datatype, +OMPI_DECLSPEC int PMPI_Fetch_and_op(const void *origin_addr, void *result_addr, MPI_Datatype datatype, int target_rank, MPI_Aint target_disp, MPI_Op op, MPI_Win win); OMPI_DECLSPEC int PMPI_Iexscan(const void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, MPI_Op op, MPI_Comm comm, MPI_Request *request); @@ -2076,6 +2096,10 @@ OMPI_DECLSPEC int PMPI_File_iread_at(MPI_File fh, MPI_Offset offset, void *buf, int count, MPI_Datatype datatype, MPI_Request *request); OMPI_DECLSPEC int PMPI_File_iwrite_at(MPI_File fh, MPI_Offset offset, const void *buf, int count, MPI_Datatype datatype, MPI_Request *request); +OMPI_DECLSPEC int PMPI_File_iread_at_all(MPI_File fh, MPI_Offset offset, void *buf, + int count, MPI_Datatype datatype, MPI_Request *request); +OMPI_DECLSPEC int PMPI_File_iwrite_at_all(MPI_File fh, MPI_Offset offset, const void *buf, + int count, MPI_Datatype datatype, MPI_Request *request); OMPI_DECLSPEC int PMPI_File_read(MPI_File fh, void *buf, int count, MPI_Datatype datatype, MPI_Status *status); OMPI_DECLSPEC int PMPI_File_read_all(MPI_File fh, void *buf, int count, @@ -2088,6 +2112,10 @@ OMPI_DECLSPEC int PMPI_File_iread(MPI_File fh, void *buf, int count, MPI_Datatype datatype, MPI_Request *request); OMPI_DECLSPEC int PMPI_File_iwrite(MPI_File fh, const void *buf, int count, MPI_Datatype datatype, MPI_Request *request); +OMPI_DECLSPEC int PMPI_File_iread_all(MPI_File fh, void *buf, int count, + MPI_Datatype datatype, MPI_Request *request); +OMPI_DECLSPEC int PMPI_File_iwrite_all(MPI_File fh, const void *buf, int count, + MPI_Datatype datatype, MPI_Request *request); OMPI_DECLSPEC int PMPI_File_seek(MPI_File fh, MPI_Offset offset, int whence); OMPI_DECLSPEC int PMPI_File_get_position(MPI_File fh, MPI_Offset *offset); OMPI_DECLSPEC int PMPI_File_get_byte_offset(MPI_File fh, MPI_Offset offset, @@ -2306,8 +2334,8 @@ OMPI_DECLSPEC int PMPI_Put(const void *origin_addr, int origin_count, MPI_Datat int target_rank, MPI_Aint target_disp, int target_count, MPI_Datatype target_datatype, MPI_Win win); OMPI_DECLSPEC int PMPI_Query_thread(int *provided); -OMPI_DECLSPEC int PMPI_Raccumulate(void *origin_addr, int origin_count, MPI_Datatype origin_datatype, - int target_rank, MPI_Aint target_disp, int target_count, +OMPI_DECLSPEC int PMPI_Raccumulate(const void *origin_addr, int origin_count, MPI_Datatype origin_datatype, + int target_rank, MPI_Aint target_disp, int target_count, MPI_Datatype target_datatype, MPI_Op op, MPI_Win win, MPI_Request *request); OMPI_DECLSPEC int PMPI_Recv_init(void *buf, int count, MPI_Datatype datatype, int source, int tag, MPI_Comm comm, MPI_Request *request); @@ -2337,16 +2365,16 @@ OMPI_DECLSPEC MPI_Request PMPI_Request_f2c(MPI_Fint request); OMPI_DECLSPEC int PMPI_Request_free(MPI_Request *request); OMPI_DECLSPEC int PMPI_Request_get_status(MPI_Request request, int *flag, MPI_Status *status); -OMPI_DECLSPEC int PMPI_Rget(void *origin_addr, int origin_count, MPI_Datatype origin_datatype, +OMPI_DECLSPEC int PMPI_Rget(void *origin_addr, int origin_count, MPI_Datatype origin_datatype, int target_rank, MPI_Aint target_disp, int target_count, MPI_Datatype target_datatype, MPI_Win win, MPI_Request *request); OMPI_DECLSPEC int PMPI_Rget_accumulate(const void *origin_addr, int origin_count, MPI_Datatype origin_datatype, void *result_addr, int result_count, MPI_Datatype result_datatype, - int target_rank, MPI_Aint target_disp, int target_count, + int target_rank, MPI_Aint target_disp, int target_count, MPI_Datatype target_datatype, MPI_Op op, MPI_Win win, MPI_Request *request); OMPI_DECLSPEC int PMPI_Rput(const void *origin_addr, int origin_count, MPI_Datatype origin_datatype, - int target_rank, MPI_Aint target_disp, int target_cout, + int target_rank, MPI_Aint target_disp, int target_cout, MPI_Datatype target_datatype, MPI_Win win, MPI_Request *request); OMPI_DECLSPEC int PMPI_Rsend(const void *ibuf, int count, MPI_Datatype datatype, int dest, int tag, MPI_Comm comm); @@ -2530,7 +2558,7 @@ OMPI_DECLSPEC int PMPI_Win_create_keyval(MPI_Win_copy_attr_function *win_copy_a MPI_Win_delete_attr_function *win_delete_attr_fn, int *win_keyval, void *extra_state); OMPI_DECLSPEC int PMPI_Win_delete_attr(MPI_Win win, int win_keyval); -OMPI_DECLSPEC int PMPI_Win_detach(MPI_Win win, void *base); +OMPI_DECLSPEC int PMPI_Win_detach(MPI_Win win, const void *base); OMPI_DECLSPEC MPI_Win PMPI_Win_f2c(MPI_Fint win); OMPI_DECLSPEC int PMPI_Win_fence(int assert, MPI_Win win); OMPI_DECLSPEC int PMPI_Win_flush(int rank, MPI_Win win); @@ -2589,7 +2617,7 @@ OMPI_DECLSPEC int PMPI_T_pvar_get_info(int pvar_index, char *name, int *name_le int *verbosity, int *var_class, MPI_Datatype *datatype, MPI_T_enum *enumtype, char *desc, int *desc_len, int *bind, int *readonly, int *continuous, int *atomic); -OMPI_DECLSPEC int PMPI_T_pvar_get_index (const char *name, int *pvar_index); +OMPI_DECLSPEC int PMPI_T_pvar_get_index (const char *name, int var_class, int *pvar_index); OMPI_DECLSPEC int PMPI_T_pvar_session_create(MPI_T_pvar_session *session); OMPI_DECLSPEC int PMPI_T_pvar_session_free(MPI_T_pvar_session *session); OMPI_DECLSPEC int PMPI_T_pvar_handle_alloc(MPI_T_pvar_session session, int pvar_index, @@ -2639,7 +2667,7 @@ OMPI_DECLSPEC int MPI_T_pvar_get_info(int pvar_index, char *name, int *name_len int *verbosity, int *var_class, MPI_Datatype *datatype, MPI_T_enum *enumtype, char *desc, int *desc_len, int *bind, int *readonly, int *continuous, int *atomic); -OMPI_DECLSPEC int MPI_T_pvar_get_index (const char *name, int *pvar_index); +OMPI_DECLSPEC int MPI_T_pvar_get_index (const char *name, int var_class, int *pvar_index); OMPI_DECLSPEC int MPI_T_pvar_session_create(MPI_T_pvar_session *session); OMPI_DECLSPEC int MPI_T_pvar_session_free(MPI_T_pvar_session *session); OMPI_DECLSPEC int MPI_T_pvar_handle_alloc(MPI_T_pvar_session session, int pvar_index, diff --git a/ompi/include/mpif-mpi-io.h b/ompi/include/mpif-mpi-io.h deleted file mode 100644 index 6ffa9986078..00000000000 --- a/ompi/include/mpif-mpi-io.h +++ /dev/null @@ -1,85 +0,0 @@ -! -*- fortran -*- -! -! Copyright (c) 2004-2006 The Trustees of Indiana University and Indiana -! University Research and Technology -! Corporation. All rights reserved. -! Copyright (c) 2004-2005 The University of Tennessee and The University -! of Tennessee Research Foundation. All rights -! reserved. -! Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, -! University of Stuttgart. All rights reserved. -! Copyright (c) 2004-2005 The Regents of the University of California. -! All rights reserved. -! Copyright (c) 2006 Cisco Systems, Inc. All rights reserved. -! $COPYRIGHT$ -! -! Additional copyrights may follow -! -! $HEADER$ -! -!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! -! WARNING WARNING WARNING WARNING WARNING WARNING WARNING WARNING WARNING -!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! -! -! Do ***not*** copy this file to the directory where your Fortran -! fortran application is compiled unless it is absolutely necessary! Most -! modern Fortran compilers now support the -I command line flag, which -! tells the compiler where to find .h files (specifically, this one). For -! example: -! -! shell$ mpif77 foo.f -o foo -I$OMPI_HOME/include -! -! will probably do the trick (assuming that you have set OMPI_HOME -! properly). -! -! That being said, OMPI's "mpif77" wrapper compiler should -! automatically include the -I option for you. The following command -! should be equivalent to the command listed above: -! -! shell$ mpif77 foo.f -o foo -! -! You should not copy this file to your local directory because it is -! possible that this file will be changed between versions of Open MPI. -! Indeed, this mpif.h is incompatible with the mpif.f of other -! implementations of MPI. Using this mpif.h with other implementations -! of MPI, or with other versions of Open MPI will result in undefined -! behavior (to include incorrect results, segmentation faults, -! unexplainable "hanging" in your application, etc.). Always use the -! -I command line option instead (or let mpif77 do it for you). -! -!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! -! WARNING WARNING WARNING WARNING WARNING WARNING WARNING WARNING WARNING -!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! - -! -! This file is included as a back-end file to both mpif.h (i.e., the -! standardized MPI Fortran header file) and a bunch of the MPI -! Fortran 90 subroutine implementations found in ompi/mpi/f90. -! -! This file contains the output from configure that is relevant for -! Fortran applications (both 77 and 90) and a few values that are -! necessary to compile the F90 module (e.g., MPI_STATUS_SIZE). -! - - integer MPI_FILE_NULL - integer MPI_SEEK_SET, MPI_SEEK_CUR, MPI_SEEK_END - integer MPI_MODE_CREATE - integer MPI_MODE_RDONLY, MPI_MODE_WRONLY, MPI_MODE_RDWR - integer MPI_MODE_DELETE_ON_CLOSE, MPI_MODE_UNIQUE_OPEN - integer MPI_MODE_EXCL, MPI_MODE_APPEND, MPI_MODE_SEQUENTIAL - integer MPI_DISPLACEMENT_CURRENT - - parameter (MPI_FILE_NULL=0) - parameter (MPI_SEEK_SET=600) - parameter (MPI_SEEK_CUR=602) - parameter (MPI_SEEK_END=604) - parameter (MPI_MODE_CREATE=1) - parameter (MPI_MODE_RDONLY=2) - parameter (MPI_MODE_WRONLY=4) - parameter (MPI_MODE_RDWR=8) - parameter (MPI_MODE_DELETE_ON_CLOSE=16) - parameter (MPI_MODE_UNIQUE_OPEN=32) - parameter (MPI_MODE_EXCL=64) - parameter (MPI_MODE_APPEND=128) - parameter (MPI_MODE_SEQUENTIAL=256) - parameter (MPI_DISPLACEMENT_CURRENT=-54278278) diff --git a/ompi/include/mpif-sentinels.h b/ompi/include/mpif-sentinels.h index 59de52e941c..082154cdbbc 100644 --- a/ompi/include/mpif-sentinels.h +++ b/ompi/include/mpif-sentinels.h @@ -26,8 +26,7 @@ ! ! - the "mpi" module bindings ! - the "mpi_f08" module bindings -! - ompi/mpi/fortran/base/constants.h -! - ompi/mpi/runtime/ompi_init.c +! - ompi/mpi/fortran/base/gen-mpi-mangling.pl ! ! MPI_BOTTOM is only used where choice buffers can be used (meaning @@ -41,7 +40,7 @@ ! don't need another interface for MPI_COMM_SPAWN. character MPI_ARGV_NULL(1) ! Ditto for MPI_ARGVS_NULL / MPI_COMM_SPAWN_MULTIPLE. - character MPI_ARGVS_NULL(1) + character MPI_ARGVS_NULL(1, 1) ! MPI_ERRCODES_IGNORE has similar rationale to MPI_ARGV_NULL. The ! F77 functions are all smart enough to check that the errcodes ! parameter is not ERRCODES_IGNORE before assigning values into it @@ -53,8 +52,10 @@ integer MPI_STATUS_IGNORE(MPI_STATUS_SIZE) ! Ditto for MPI_STATUSES_IGNORE integer MPI_STATUSES_IGNORE(MPI_STATUS_SIZE, 1) - integer MPI_UNWEIGHTED - integer MPI_WEIGHTS_EMPTY +! Ditto for MPI_UNWEIGHTED + integer MPI_UNWEIGHTED(1) +! Ditto for MPI_WEIGHTS_EMPTY + integer MPI_WEIGHTS_EMPTY(1) common/mpi_fortran_bottom/MPI_BOTTOM common/mpi_fortran_in_place/MPI_IN_PLACE diff --git a/ompi/include/mpif-values.pl b/ompi/include/mpif-values.pl index 3dcb40e70cf..c52532af108 100755 --- a/ompi/include/mpif-values.pl +++ b/ompi/include/mpif-values.pl @@ -1,10 +1,13 @@ #!/usr/bin/env perl # # Copyright (c) 2011-2014 Cisco Systems, Inc. All rights reserved. +# Copyright (c) 2016 Research Organization for Information Science +# and Technology (RIST). All rights reserved. +# Copyright (c) 2016 FUJITSU LIMITED. All rights reserved. # $COPYRIGHT$ -# +# # Additional copyrights may follow -# +# # $HEADER$ # @@ -51,7 +54,7 @@ sub write_file { $need_write = 1; } } - + if ($need_write) { open(FILE_OUT, ">$filename_out") || die "Couldn't open $filename_out"; print FILE_OUT $str; @@ -83,6 +86,7 @@ sub write_file { #---------------------------------------------------------------------------- my $handles; +my $lhandles; $handles->{MPI_COMM_WORLD} = 0; $handles->{MPI_COMM_SELF} = 1; @@ -103,6 +107,7 @@ sub write_file { $handles->{MPI_MAXLOC} = 11; $handles->{MPI_MINLOC} = 12; $handles->{MPI_REPLACE} = 13; +$handles->{MPI_NO_OP} = 14; $handles->{MPI_COMM_NULL} = 2; $handles->{MPI_DATATYPE_NULL} = 0; @@ -158,20 +163,22 @@ sub write_file { $handles->{MPI_LONG} = 41; $handles->{MPI_UNSIGNED_LONG} = 42; $handles->{MPI_LONG_LONG_INT} = 43; +$handles->{MPI_LONG_LONG} = $handles->{MPI_LONG_LONG_INT}; $handles->{MPI_UNSIGNED_LONG_LONG} = 44; $handles->{MPI_FLOAT} = 45; $handles->{MPI_DOUBLE} = 46; $handles->{MPI_LONG_DOUBLE} = 47; $handles->{MPI_FLOAT_INT} = 48; $handles->{MPI_DOUBLE_INT} = 49; -$handles->{MPI_LONGDBL_INT} = 50; +$handles->{MPI_LONG_DOUBLE_INT} = 50; $handles->{MPI_LONG_INT} = 51; $handles->{MPI_2INT} = 52; $handles->{MPI_SHORT_INT} = 53; $handles->{MPI_CXX_BOOL} = 54; -$handles->{MPI_CXX_CPLEX} = 55; -$handles->{MPI_CXX_DBLCPLEX} = 56; -$handles->{MPI_CXX_LDBLCPLEX} = 57; +$handles->{MPI_CXX_FLOAT_COMPLEX} = 55; +$handles->{MPI_CXX_COMPLEX} = $handles->{MPI_CXX_FLOAT_COMPLEX}; +$handles->{MPI_CXX_DOUBLE_COMPLEX} = 56; +$handles->{MPI_CXX_LONG_DOUBLE_COMPLEX} = 57; $handles->{MPI_INT8_T} = 58; $handles->{MPI_UINT8_T} = 59; $handles->{MPI_INT16_T} = 60; @@ -187,6 +194,7 @@ sub write_file { $handles->{MPI_C_DOUBLE_COMPLEX} = 70; $handles->{MPI_C_LONG_DOUBLE_COMPLEX} = 71; $handles->{MPI_COUNT} = 72; +$handles->{MPI_C_BOOL} = 73; $handles->{MPI_MESSAGE_NO_PROC} = 1; @@ -203,7 +211,7 @@ sub write_file { my $constants; $constants->{MPI_VERSION} = 3; -$constants->{MPI_SUBVERSION} = 0; +$constants->{MPI_SUBVERSION} = 1; $constants->{MPI_ANY_SOURCE} = -1; $constants->{MPI_ANY_TAG} = -1; @@ -227,6 +235,14 @@ sub write_file { $constants->{MPI_WIN_BASE} = 7; $constants->{MPI_WIN_SIZE} = 8; $constants->{MPI_WIN_DISP_UNIT} = 9; +$constants->{MPI_WIN_CREATE_FLAVOR} = 10; +$constants->{MPI_WIN_MODEL} = 11; +$constants->{MPI_WIN_FLAVOR_CREATE} = 1; +$constants->{MPI_WIN_FLAVOR_ALLOCATE} = 2; +$constants->{MPI_WIN_FLAVOR_DYNAMIC} = 3; +$constants->{MPI_WIN_FLAVOR_SHARED} = 4; +$constants->{MPI_WIN_UNIFIED} = 0; +$constants->{MPI_WIN_SEPARATE} = 1; $constants->{MPI_BSEND_OVERHEAD} = 128; $constants->{MPI_ORDER_C} = 0; @@ -383,7 +399,9 @@ sub write_file { $io_constants->{MPI_MODE_EXCL} = 64; $io_constants->{MPI_MODE_APPEND} = 128; $io_constants->{MPI_MODE_SEQUENTIAL} = 256; -$io_constants->{MPI_DISPLACEMENT_CURRENT} = -54278278; + +my $lio_constants; +$lio_constants->{MPI_DISPLACEMENT_CURRENT} = -54278278; #---------------------------------------------------------------------------- @@ -406,6 +424,8 @@ sub write_file { ! All rights reserved. ! Copyright (c) 2006-2012 Cisco Systems, Inc. All rights reserved. ! Copyright (c) 2009 Oak Ridge National Labs. All rights reserved. +! Copyright (c) 2016 Research Organization for Information Science +! and Technology (RIST). All rights reserved. ! $COPYRIGHT$ ! ! Additional copyrights may follow @@ -416,26 +436,32 @@ sub write_file { '; sub write_fortran_file { - my ($header, $vals, $file) = @_; + my ($header, $vals, $lvals, $file) = @_; foreach my $key (sort(keys(%{$vals}))) { $header .= " integer $key\n"; } + foreach my $key (sort(keys(%{$lvals}))) { + $header .= " integer(KIND=MPI_OFFSET_KIND) $key\n"; + } $header .= "\n"; foreach my $key (sort(keys(%{$vals}))) { $header .= " parameter ($key=$vals->{$key})\n"; } + foreach my $key (sort(keys(%{$lvals}))) { + $header .= " parameter ($key=$lvals->{$key})\n"; + } write_file($file, $header); } -write_fortran_file($header, $handles, +write_fortran_file($header, $handles, {}, "$topdir/ompi/include/mpif-handles.h"); -write_fortran_file($header, $constants, +write_fortran_file($header, $constants, {}, "$topdir/ompi/include/mpif-constants.h"); -write_fortran_file($header, $io_handles, +write_fortran_file($header, $io_handles, {}, "$topdir/ompi/include/mpif-io-handles.h"); -write_fortran_file($header, $io_constants, +write_fortran_file($header, $io_constants, $lio_constants, "$topdir/ompi/include/mpif-io-constants.h"); #---------------------------------------------------------------------------- @@ -463,6 +489,8 @@ sub write_fortran_file { * Copyright (c) 2009 Oak Ridge National Labs. All rights reserved. * Copyright (c) 2009-2012 Los Alamos National Security, LLC. * All rights reserved. + * Copyright (c) 2016 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -487,6 +515,9 @@ sub write_fortran_file { foreach my $key (sort(keys(%{$io_constants}))) { $output .= "#define OMPI_$key $io_constants->{$key}\n"; } +foreach my $key (sort(keys(%{$lio_constants}))) { + $output .= "#define OMPI_$key $lio_constants->{$key}\n"; +} $output .= "\n"; foreach my $key (sort(keys(%{$io_handles}))) { $output .= "#define OMPI_$key $io_handles->{$key}\n"; diff --git a/ompi/include/ompi/Makefile.am b/ompi/include/ompi/Makefile.am index 4be821a8234..e49a4e98e60 100644 --- a/ompi/include/ompi/Makefile.am +++ b/ompi/include/ompi/Makefile.am @@ -6,15 +6,15 @@ # Copyright (c) 2004-2005 The University of Tennessee and The University # of Tennessee Research Foundation. All rights # reserved. -# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, # University of Stuttgart. All rights reserved. # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. # Copyright (c) 2012 Cisco Systems, Inc. All rights reserved. # $COPYRIGHT$ -# +# # Additional copyrights may follow -# +# # $HEADER$ # diff --git a/ompi/include/ompi/constants.h b/ompi/include/ompi/constants.h index 0e33ad4ca47..dbe9001baca 100644 --- a/ompi/include/ompi/constants.h +++ b/ompi/include/ompi/constants.h @@ -71,6 +71,7 @@ enum { OMPI_ERR_RMA_RANGE = OMPI_ERR_BASE - 5, OMPI_ERR_RMA_CONFLICT = OMPI_ERR_BASE - 6, OMPI_ERR_WIN = OMPI_ERR_BASE - 7, + OMPI_ERR_RMA_FLAVOR = OMPI_ERR_BASE - 8, }; #define OMPI_ERR_MAX (OMPI_ERR_BASE - 100) diff --git a/ompi/include/ompi/memchecker.h b/ompi/include/ompi/memchecker.h index 6ba22e6016e..ef59948137c 100644 --- a/ompi/include/ompi/memchecker.h +++ b/ompi/include/ompi/memchecker.h @@ -10,6 +10,7 @@ * and Technology (RIST). All rights reserved. * Copyright (c) 2014 Intel, Inc. All rights reserved. * + * Copyright (c) 2016 Cisco Systems, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -100,6 +101,10 @@ static inline int memchecker_call (int (*f)(void *, size_t), const void * addr, return OMPI_SUCCESS; } + if ((0 == count) || (0 == datatype->super.size)) { + return OMPI_SUCCESS; + } + if( datatype->super.size == (size_t) (datatype->super.true_ub - datatype->super.true_lb) ) { /* We have a contiguous type. */ f( (void*)addr , datatype->super.size * count ); diff --git a/ompi/include/ompi/totalview.h b/ompi/include/ompi/totalview.h index 668b3b72c44..f7e50b24fb5 100644 --- a/ompi/include/ompi/totalview.h +++ b/ompi/include/ompi/totalview.h @@ -5,14 +5,14 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ diff --git a/ompi/include/ompi/types.h b/ompi/include/ompi/types.h index c87230fa23e..2c9e387a15e 100644 --- a/ompi/include/ompi/types.h +++ b/ompi/include/ompi/types.h @@ -5,14 +5,14 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -25,8 +25,8 @@ */ typedef char* ompi_job_handle_t; -/* - * Predefine some internal types so we dont need all the include +/* + * Predefine some internal types so we dont need all the include * dependencies. */ struct ompi_communicator_t; diff --git a/ompi/include/ompi/version.h.in b/ompi/include/ompi/version.h.in index 43cd53b9107..35701029524 100644 --- a/ompi/include/ompi/version.h.in +++ b/ompi/include/ompi/version.h.in @@ -5,15 +5,15 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2011 Cisco Systems, Inc. All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ * * This file should be included by any file that needs full diff --git a/ompi/info/Makefile.am b/ompi/info/Makefile.am index ab5931de88c..e4af170dcf8 100644 --- a/ompi/info/Makefile.am +++ b/ompi/info/Makefile.am @@ -6,14 +6,15 @@ # Copyright (c) 2004-2005 The University of Tennessee and The University # of Tennessee Research Foundation. All rights # reserved. -# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, # University of Stuttgart. All rights reserved. # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. +# Copyright (c) 2016 IBM Corporation. All rights reserved. # $COPYRIGHT$ -# +# # Additional copyrights may follow -# +# # $HEADER$ # @@ -22,5 +23,5 @@ headers += \ info/info.h -libmpi_la_SOURCES += \ +lib@OMPI_LIBMPI_NAME@_la_SOURCES += \ info/info.c diff --git a/ompi/info/info.c b/ompi/info/info.c index fded842fa76..8f56311edfb 100644 --- a/ompi/info/info.c +++ b/ompi/info/info.c @@ -6,7 +6,7 @@ * Copyright (c) 2004-2007 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. @@ -17,22 +17,18 @@ * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ #include "ompi_config.h" #include "ompi/constants.h" -#ifdef HAVE_STRING_H #include -#endif #include -#ifdef HAVE_STDLIB_H #include -#endif #ifdef HAVE_UNISTD_H #include #endif @@ -96,10 +92,10 @@ opal_pointer_array_t ompi_info_f_to_c_table = {{0}}; * This function is called during ompi_init and initializes the * fortran to C translation table. It also fills in the values * for the MPI_INFO_GET_ENV object - */ -int ompi_info_init(void) + */ +int ompi_info_init(void) { - char val[MPI_MAX_INFO_VAL]; + char val[OPAL_MAXHOSTNAMELEN]; char *cptr; /* initialize table */ @@ -138,7 +134,7 @@ int ompi_info_init(void) } /* local host name */ - gethostname(val, MPI_MAX_INFO_VAL); + gethostname(val, sizeof(val)); ompi_info_set(&ompi_mpi_info_env.info, "host", val); /* architecture name */ @@ -216,7 +212,7 @@ int ompi_info_init(void) /* * Duplicate an info */ -int ompi_info_dup (ompi_info_t *info, ompi_info_t **newinfo) +int ompi_info_dup (ompi_info_t *info, ompi_info_t **newinfo) { int err; opal_list_item_t *item; @@ -241,7 +237,7 @@ int ompi_info_dup (ompi_info_t *info, ompi_info_t **newinfo) /* * Set a value on the info */ -int ompi_info_set (ompi_info_t *info, const char *key, const char *value) +int ompi_info_set (ompi_info_t *info, const char *key, const char *value) { char *new_value; ompi_info_entry_t *new_info; @@ -276,10 +272,28 @@ int ompi_info_set (ompi_info_t *info, const char *key, const char *value) } +int ompi_info_set_value_enum (ompi_info_t *info, const char *key, int value, + mca_base_var_enum_t *var_enum) +{ + char *string_value; + int ret; + + ret = var_enum->string_from_value (var_enum, value, &string_value); + if (OPAL_SUCCESS != ret) { + return ret; + } + + ret = ompi_info_set (info, key, string_value); + free (string_value); + return ret; +} + + + /* * Free an info handle and all of its keys and values. */ -int ompi_info_free (ompi_info_t **info) +int ompi_info_free (ompi_info_t **info) { (*info)->i_freed = true; OBJ_RELEASE(*info); @@ -292,7 +306,7 @@ int ompi_info_free (ompi_info_t **info) * Get a value from an info */ int ompi_info_get (ompi_info_t *info, const char *key, int valuelen, - char *value, int *flag) + char *value, int *flag) { ompi_info_entry_t *search; int value_length; @@ -377,14 +391,14 @@ int ompi_info_get_bool(ompi_info_t *info, char *key, bool *value, int *flag) --ptr; } ptr = str; - while (ptr < str + sizeof(str) - 1 && *ptr != '\0' && + while (ptr < str + sizeof(str) - 1 && *ptr != '\0' && isspace(*ptr)) { ++ptr; } if ('\0' != *ptr) { if (isdigit(*ptr)) { *value = (bool) atoi(ptr); - } else if (0 == strcasecmp(ptr, "yes") || + } else if (0 == strcasecmp(ptr, "yes") || 0 == strcasecmp(ptr, "true")) { *value = true; } else if (0 != strcasecmp(ptr, "no") && @@ -427,7 +441,7 @@ int ompi_info_delete (ompi_info_t *info, const char *key) * Return the length of a value */ int ompi_info_get_valuelen (ompi_info_t *info, const char *key, int *valuelen, - int *flag) + int *flag) { ompi_info_entry_t *search; @@ -463,7 +477,7 @@ int ompi_info_get_nthkey (ompi_info_t *info, int n, char *key) n > 0; --n) { iterator = (ompi_info_entry_t *)opal_list_get_next(iterator); - if (opal_list_get_end(&(info->super)) == + if (opal_list_get_end(&(info->super)) == (opal_list_item_t *) iterator) { OPAL_THREAD_UNLOCK(info->i_lock); return MPI_ERR_ARG; @@ -483,50 +497,50 @@ int ompi_info_get_nthkey (ompi_info_t *info, int n, char *key) /* * Shut down MPI_Info handling */ -int ompi_info_finalize(void) +int ompi_info_finalize(void) { size_t i, max; ompi_info_t *info; opal_list_item_t *item; ompi_info_entry_t *entry; bool found = false; - + /* Release MPI_INFO_NULL. Do this so that we don't get a bogus leak report on it. Plus, it's statically allocated, so we don't want to call OBJ_RELEASE on it. */ - + OBJ_DESTRUCT(&ompi_mpi_info_null.info); opal_pointer_array_set_item(&ompi_info_f_to_c_table, 0, NULL); - + /* ditto for MPI_INFO_GET_ENV */ OBJ_DESTRUCT(&ompi_mpi_info_env.info); opal_pointer_array_set_item(&ompi_info_f_to_c_table, 1, NULL); /* Go through the f2c table and see if anything is left. Free them all. */ - + max = opal_pointer_array_get_size(&ompi_info_f_to_c_table); for (i = 2; i < max; ++i) { info = (ompi_info_t *)opal_pointer_array_get_item(&ompi_info_f_to_c_table, i); - + /* If the info was freed but still exists because the user told us to never free handles, then do an OBJ_RELEASE it and all is well. Then get the value again and see if it's actually been freed. */ - + if (NULL != info && ompi_debug_no_free_handles && info->i_freed) { OBJ_RELEASE(info); info = (ompi_info_t *)opal_pointer_array_get_item(&ompi_info_f_to_c_table, i); - } - + } + /* If it still exists here and was never freed, then it's an orphan */ - + if (NULL != info) { - + /* If the user wanted warnings about MPI object leaks, print out a message */ - + if (!info->i_freed && ompi_debug_show_handle_leaks) { if (ompi_debug_show_handle_leaks) { opal_output(0, "WARNING: MPI_Info still allocated at MPI_FINALIZE"); @@ -534,7 +548,7 @@ int ompi_info_finalize(void) opal_list_get_end(&(info->super)) != item; item = opal_list_get_next(item)) { entry = (ompi_info_entry_t *) item; - opal_output(0, "WARNING: key=\"%s\", value=\"%s\"", + opal_output(0, "WARNING: key=\"%s\", value=\"%s\"", entry->ie_key, NULL != entry->ie_value ? entry->ie_value : "(null)"); found = true; @@ -542,7 +556,7 @@ int ompi_info_finalize(void) } OBJ_RELEASE(info); } - + /* Don't bother setting each element back down to NULL; it would just take a lot of thread locks / unlocks and since we're destroying everything, it isn't worth it */ @@ -552,7 +566,7 @@ int ompi_info_finalize(void) } } } - + /* All done -- destroy the table */ OBJ_DESTRUCT(&ompi_info_f_to_c_table); @@ -564,9 +578,9 @@ int ompi_info_finalize(void) * This function is invoked when OBJ_NEW() is called. Here, we add this * info pointer to the table and then store its index as the handle */ -static void info_constructor(ompi_info_t *info) +static void info_constructor(ompi_info_t *info) { - info->i_f_to_c_index = opal_pointer_array_add(&ompi_info_f_to_c_table, + info->i_f_to_c_index = opal_pointer_array_add(&ompi_info_f_to_c_table, info); info->i_lock = OBJ_NEW(opal_mutex_t); info->i_freed = false; @@ -581,17 +595,17 @@ static void info_constructor(ompi_info_t *info) /* - * This function is called during OBJ_DESTRUCT of "info". When this - * done, we need to remove the entry from the ompi fortran to C + * This function is called during OBJ_DESTRUCT of "info". When this + * done, we need to remove the entry from the ompi fortran to C * translation table - */ -static void info_destructor(ompi_info_t *info) + */ +static void info_destructor(ompi_info_t *info) { opal_list_item_t *item; ompi_info_entry_t *iterator; /* Remove every key in the list */ - + for (item = opal_list_remove_first(&(info->super)); NULL != item; item = opal_list_remove_first(&(info->super))) { @@ -601,11 +615,11 @@ static void info_destructor(ompi_info_t *info) /* reset the &ompi_info_f_to_c_table entry - make sure that the entry is in the table */ - + if (MPI_UNDEFINED != info->i_f_to_c_index && - NULL != opal_pointer_array_get_item(&ompi_info_f_to_c_table, + NULL != opal_pointer_array_get_item(&ompi_info_f_to_c_table, info->i_f_to_c_index)){ - opal_pointer_array_set_item(&ompi_info_f_to_c_table, + opal_pointer_array_set_item(&ompi_info_f_to_c_table, info->i_f_to_c_index, NULL); } @@ -618,14 +632,14 @@ static void info_destructor(ompi_info_t *info) /* * ompi_info_entry_t interface functions */ -static void info_entry_constructor(ompi_info_entry_t *entry) +static void info_entry_constructor(ompi_info_entry_t *entry) { memset(entry->ie_key, 0, sizeof(entry->ie_key)); entry->ie_key[MPI_MAX_INFO_KEY] = 0; } -static void info_entry_destructor(ompi_info_entry_t *entry) +static void info_entry_destructor(ompi_info_entry_t *entry) { if (NULL != entry->ie_value) { free(entry->ie_value); @@ -645,7 +659,7 @@ static ompi_info_entry_t *info_find_key (ompi_info_t *info, const char *key) /* No thread locking in here! */ - /* Iterate over all the entries. If the key is found, then + /* Iterate over all the entries. If the key is found, then * return immediately. Else, the loop will fall of the edge * and NULL is returned */ @@ -703,7 +717,7 @@ ompi_info_value_to_bool(char *value, bool *interp) *interp = false; } else { *interp = true; - } + } return OMPI_SUCCESS; } diff --git a/ompi/info/info.h b/ompi/info/info.h index 772d207349a..15881273522 100644 --- a/ompi/info/info.h +++ b/ompi/info/info.h @@ -6,7 +6,7 @@ * Copyright (c) 2004-2007 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. @@ -151,7 +151,7 @@ int ompi_info_finalize(void); */ int ompi_info_dup (ompi_info_t *info, ompi_info_t **newinfo); -/* +/** * Set a new key,value pair on info. * * @param info pointer to ompi_info_t object @@ -163,6 +163,21 @@ int ompi_info_dup (ompi_info_t *info, ompi_info_t **newinfo); */ OMPI_DECLSPEC int ompi_info_set (ompi_info_t *info, const char *key, const char *value); +/** + * Set a new key,value pair from a variable enumerator. + * + * @param info pointer to ompi_info_t object + * @param key pointer to the new key object + * @param value integer value of the info key (must be valid in var_enum) + * @param var_enum variable enumerator + * + * @retval MPI_SUCCESS upon success + * @retval MPI_ERR_NO_MEM if out of memory + * @retval OPAL_ERR_VALUE_OUT_OF_BOUNDS if the value is not valid in the enumerator + */ +OMPI_DECLSPEC int ompi_info_set_value_enum (ompi_info_t *info, const char *key, int value, + mca_base_var_enum_t *var_enum); + /** * ompi_info_free - Free an 'MPI_Info' object. * diff --git a/ompi/mca/Makefile.am b/ompi/mca/Makefile.am index 8d3fca3fb4f..89dcb0f652b 100644 --- a/ompi/mca/Makefile.am +++ b/ompi/mca/Makefile.am @@ -5,14 +5,14 @@ # Copyright (c) 2004-2005 The University of Tennessee and The University # of Tennessee Research Foundation. All rights # reserved. -# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, # University of Stuttgart. All rights reserved. # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. # $COPYRIGHT$ -# +# # Additional copyrights may follow -# +# # $HEADER$ # diff --git a/ompi/mca/bcol/Makefile.am b/ompi/mca/bcol/Makefile.am deleted file mode 100644 index 9f78f939f01..00000000000 --- a/ompi/mca/bcol/Makefile.am +++ /dev/null @@ -1,35 +0,0 @@ -# -# Copyright (c) 2009-2012 Oak Ridge National Laboratory. All rights reserved. -# Copyright (c) 2009-2012 Mellanox Technologies. All rights reserved. -# $COPYRIGHT$ -# -# Additional copyrights may follow -# -# $HEADER$ -# - -# main library setup -noinst_LTLIBRARIES = libmca_bcol.la -libmca_bcol_la_SOURCES = - -# header setup -nobase_ompi_HEADERS = -nobase_nodist_ompi_HEADERS = - -# local files -headers = bcol.h -libmca_bcol_la_SOURCES += $(headers) $(nodist_headers) - -# Conditionally install the header files -if WANT_INSTALL_HEADERS -nobase_ompi_HEADERS += $(headers) -nobase_nodist_ompi_HEADERS += $(nodist_headers) -ompidir = $(ompiincludedir)/ompi/mca/bcol -else -ompidir = $(includedir) -endif - -include base/Makefile.am - -distclean-local: - rm -f base/static-components.h diff --git a/ompi/mca/bcol/base/Makefile.am b/ompi/mca/bcol/base/Makefile.am deleted file mode 100644 index 929bef0f5b8..00000000000 --- a/ompi/mca/bcol/base/Makefile.am +++ /dev/null @@ -1,16 +0,0 @@ -# -# Copyright (c) 2009-2012 Oak Ridge National Laboratory. All rights reserved. -# Copyright (c) 2009-2012 Mellanox Technologies. All rights reserved. -# $COPYRIGHT$ -# -# Additional copyrights may follow -# -# $HEADER$ -# - - -headers += \ - base/base.h -libmca_bcol_la_SOURCES += \ - base/bcol_base_frame.c \ - base/bcol_base_init.c diff --git a/ompi/mca/bcol/base/base.h b/ompi/mca/bcol/base/base.h deleted file mode 100644 index b95bea398bb..00000000000 --- a/ompi/mca/bcol/base/base.h +++ /dev/null @@ -1,49 +0,0 @@ -/* -*- Mode: C; c-basic-offset:4 ; -*- */ -/* - * Copyright (c) 2009-2012 Oak Ridge National Laboratory. All rights reserved. - * Copyright (c) 2009-2012 Mellanox Technologies. All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#ifndef MCA_BCOL_BASE_H -#define MCA_BCOL_BASE_H - -#include "ompi_config.h" - -#include "ompi/mca/mca.h" -#include "opal/class/opal_list.h" -#include "ompi/mca/bcol/bcol.h" - -/* - * Global functions for BCOL - */ - -BEGIN_C_DECLS - -OMPI_DECLSPEC extern opal_list_t mca_bcol_base_components_in_use; -OMPI_DECLSPEC extern char *ompi_bcol_bcols_string; - -OMPI_DECLSPEC extern mca_base_framework_t ompi_bcol_base_framework; - -OMPI_DECLSPEC int mca_bcol_base_init(bool enable_progress_threads, bool enable_mpi_threads); - -struct mca_bcol_base_module_t; -OMPI_DECLSPEC int mca_bcol_base_bcol_fns_table_init(struct mca_bcol_base_module_t *bcol_module); - -OMPI_DECLSPEC int mca_bcol_base_fn_table_construct(struct mca_bcol_base_module_t *bcol_module); - -OMPI_DECLSPEC int mca_bcol_base_fn_table_destroy(struct mca_bcol_base_module_t *bcol_module); - -OMPI_DECLSPEC int mca_bcol_base_set_attributes(struct mca_bcol_base_module_t *bcol_module, - mca_bcol_base_coll_fn_comm_attributes_t *comm_attribs, - mca_bcol_base_coll_fn_invoke_attributes_t *inv_attribs, - mca_bcol_base_module_collective_fn_primitives_t bcol_fn, - mca_bcol_base_module_collective_fn_primitives_t progress_fn); - -END_C_DECLS - -#endif /* MCA_BCOL_BASE_H */ diff --git a/ompi/mca/bcol/base/bcol_base_frame.c b/ompi/mca/bcol/base/bcol_base_frame.c deleted file mode 100644 index af9c42a7494..00000000000 --- a/ompi/mca/bcol/base/bcol_base_frame.c +++ /dev/null @@ -1,374 +0,0 @@ -/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ -/* - * Copyright (c) 2009-2012 Oak Ridge National Laboratory. All rights reserved. - * Copyright (c) 2009-2012 Mellanox Technologies. All rights reserved. - * Copyright (c) 2013 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2013-2014 Los Alamos National Security, LLC. All rights - * reserved. - * Copyright (c) 2014-2015 Research Organization for Information Science - * and Technology (RIST). All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - - - -#include "ompi_config.h" -#include - -#ifdef HAVE_UNISTD_H -#include -#endif /* HAVE_UNIST_H */ -#include "ompi/mca/mca.h" -#include "opal/mca/base/base.h" -#include "opal/util/argv.h" - -#include "ompi/mca/bcol/bcol.h" -#include "ompi/mca/bcol/base/base.h" -#include "ompi/include/ompi/constants.h" -#include "opal/mca/mpool/mpool.h" -#include "opal/class/opal_list.h" -/* - * The following file was created by configure. It contains extern - * statements and the definition of an array of pointers to each - * component's public mca_base_component_t struct. - */ - -#include "ompi/mca/bcol/base/static-components.h" - -static int mca_bcol_base_open(mca_base_open_flag_t flags); -static int mca_bcol_base_close (void); -static int mca_bcol_base_register(mca_base_register_flag_t flags); - -/* -** * Global variables -** */ -MCA_BASE_FRAMEWORK_DECLARE(ompi, bcol, NULL, mca_bcol_base_register, mca_bcol_base_open, mca_bcol_base_close, - mca_bcol_base_static_components, 0); - -OMPI_DECLSPEC opal_list_t mca_bcol_base_components_in_use = {{0}}; -OMPI_DECLSPEC char *ompi_bcol_bcols_string = NULL; -OMPI_DECLSPEC int bcol_mpool_compatibility[BCOL_SIZE][BCOL_SIZE] = {{0}}; -OMPI_DECLSPEC int bcol_mpool_index[BCOL_SIZE][BCOL_SIZE] = {{0}}; - -static void bcol_base_module_constructor(mca_bcol_base_module_t *module) -{ - int fnc; - - module->bcol_component = NULL; - module->network_context = NULL; - module->context_index = -1; - module->supported_mode = 0; - module->init_module = NULL; - module->sbgp_partner_module = NULL; - module->squence_number_offset = 0; - module->n_poll_loops = 0; - - for (fnc = 0; fnc < BCOL_NUM_OF_FUNCTIONS; fnc++) { - module->bcol_function_table[fnc] = NULL; - module->small_message_thresholds[fnc] = BCOL_THRESHOLD_UNLIMITED; - } - - module->set_small_msg_thresholds = NULL; - - module->header_size = 0; - module->bcol_memory_init = NULL; - - module->next_inorder = NULL; - - mca_bcol_base_fn_table_construct(module); -} - -static void bcol_base_module_destructor(mca_bcol_base_module_t *module) -{ - int fnc; - - module->bcol_component = NULL; - - module->context_index = -1; - module->init_module = NULL; - module->sbgp_partner_module = NULL; - module->squence_number_offset = 0; - module->n_poll_loops = 0; - - for (fnc = 0; fnc < BCOL_NUM_OF_FUNCTIONS; fnc++) { - module->bcol_function_table[fnc] = NULL; - } - - module->bcol_memory_init = NULL; -} - -OBJ_CLASS_INSTANCE(mca_bcol_base_module_t, - opal_object_t, - bcol_base_module_constructor, - bcol_base_module_destructor); - -static void bcol_base_network_context_constructor(bcol_base_network_context_t *nc) -{ - nc->context_id = -1; - nc->context_data = NULL; -} - -static void bcol_base_network_context_destructor(bcol_base_network_context_t *nc) -{ - nc->context_id = -1; - nc->context_data = NULL; - nc->register_memory_fn = NULL; - nc->deregister_memory_fn = NULL; -} - -OBJ_CLASS_INSTANCE(bcol_base_network_context_t, - opal_object_t, - bcol_base_network_context_constructor, - bcol_base_network_context_destructor); - -/* get list of subgrouping coponents to use */ -static int mca_bcol_base_set_components_to_use(opal_list_t *bcol_components_avail, - opal_list_t *bcol_components_in_use) -{ - /* local variables */ - const mca_base_component_t *b_component; - - mca_base_component_list_item_t *b_cli; - mca_base_component_list_item_t *b_clj; - - char **bcols_requested; - const char *b_component_name; - - /* split the requst for the bcol modules */ - bcols_requested = opal_argv_split(ompi_bcol_bcols_string, ','); - if (NULL == bcols_requested) { - return OMPI_ERROR; - } - - /* Initialize list */ - OBJ_CONSTRUCT(bcol_components_in_use, opal_list_t); - - /* figure out basic collective modules to use */ - /* loop over list of components requested */ - for (int i = 0 ; bcols_requested[i] ; ++i) { - /* loop over discovered components */ - OPAL_LIST_FOREACH(b_cli, bcol_components_avail, mca_base_component_list_item_t) { - b_component = b_cli->cli_component; - b_component_name = b_component->mca_component_name; - - if (0 == strcmp (b_component_name, bcols_requested[i])) { - /* found selected component */ - b_clj = OBJ_NEW(mca_base_component_list_item_t); - if (NULL == b_clj) { - opal_argv_free (bcols_requested); - return OPAL_ERR_OUT_OF_RESOURCE; - } - - b_clj->cli_component = b_component; - opal_list_append(bcol_components_in_use, - (opal_list_item_t *) b_clj); - break; - } /* end check for bcol component */ - } - } - - /* Note: Need to add error checking to make sure all requested functions - ** were found */ - - /* - ** release resources - ** */ - - opal_argv_free (bcols_requested); - - return OMPI_SUCCESS; -} - -static int mca_bcol_base_register(mca_base_register_flag_t flags) -{ - /* figure out which bcol and sbgp components will actually be used */ - /* get list of sub-grouping functions to use */ - ompi_bcol_bcols_string = "basesmuma,basesmuma,iboffload,ptpcoll,ugni"; - (void) mca_base_var_register("ompi", "bcol", "base", "string", - "Default set of basic collective components to use", - MCA_BASE_VAR_TYPE_STRING, NULL, 0, 0, - OPAL_INFO_LVL_9, - MCA_BASE_VAR_SCOPE_READONLY, - &ompi_bcol_bcols_string); - - return OMPI_SUCCESS; -} - -/** - * Function for finding and opening either all MCA components, or the one - * that was specifically requested via a MCA parameter. - */ -static int mca_bcol_base_open(mca_base_open_flag_t flags) -{ - int ret; - - /* Open up all available components */ - if (OMPI_SUCCESS != - (ret = mca_base_framework_components_open(&ompi_bcol_base_framework, flags))) { - return ret; - } - - ret = mca_bcol_base_set_components_to_use(&ompi_bcol_base_framework.framework_components, - &mca_bcol_base_components_in_use); - if (OMPI_SUCCESS != ret) { - return ret; - } - - /* memory registration compatibilities */ - bcol_mpool_compatibility[BCOL_SHARED_MEMORY_UMA][BCOL_SHARED_MEMORY_UMA]=1; - bcol_mpool_compatibility[BCOL_SHARED_MEMORY_UMA][BCOL_SHARED_MEMORY_SOCKET]=1; - bcol_mpool_compatibility[BCOL_SHARED_MEMORY_UMA][BCOL_POINT_TO_POINT]=1; - bcol_mpool_compatibility[BCOL_SHARED_MEMORY_UMA][BCOL_IB_OFFLOAD]=1; - bcol_mpool_compatibility[BCOL_SHARED_MEMORY_SOCKET][BCOL_SHARED_MEMORY_UMA]=1; - bcol_mpool_compatibility[BCOL_POINT_TO_POINT] [BCOL_SHARED_MEMORY_UMA]=1; - bcol_mpool_compatibility[BCOL_IB_OFFLOAD] [BCOL_SHARED_MEMORY_UMA]=1; - - return OMPI_SUCCESS; -} - -static int mca_bcol_base_close (void) -{ - opal_list_item_t *item; - - while (NULL != (item = opal_list_remove_first (&mca_bcol_base_components_in_use))) { - OBJ_RELEASE(item); - } - - OBJ_DESTRUCT(&mca_bcol_base_components_in_use); - - return mca_base_framework_components_close(&ompi_bcol_base_framework, NULL); -} - -/* - * Prototype implementation of selection logic - */ -int mca_bcol_base_fn_table_construct(struct mca_bcol_base_module_t *bcol_module){ - - int bcol_fn; - /* Call all init functions */ - - /* Create a function table */ - for (bcol_fn = 0; bcol_fn < BCOL_NUM_OF_FUNCTIONS; bcol_fn++){ - /* Create a list object for each bcol type list */ - OBJ_CONSTRUCT(&(bcol_module->bcol_fns_table[bcol_fn]), opal_list_t); - } - - return OMPI_SUCCESS; -} - -int mca_bcol_base_fn_table_destroy(struct mca_bcol_base_module_t *bcol_module){ - - int bcol_fn; - - for (bcol_fn = 0; bcol_fn < BCOL_NUM_OF_FUNCTIONS; bcol_fn++){ - /* gvm FIX: Go through the list and destroy each item */ - /* Destroy the function table object for each bcol type list */ - OBJ_DESTRUCT(&(bcol_module->bcol_fns_table[bcol_fn])); - } - - return OMPI_SUCCESS; -} - -int mca_bcol_base_set_attributes(struct mca_bcol_base_module_t *bcol_module, - mca_bcol_base_coll_fn_comm_attributes_t *arg_comm_attribs, - mca_bcol_base_coll_fn_invoke_attributes_t *arg_inv_attribs, - mca_bcol_base_module_collective_fn_primitives_t bcol_fn, - mca_bcol_base_module_collective_fn_primitives_t progress_fn - ) -{ - mca_bcol_base_coll_fn_comm_attributes_t *comm_attribs = NULL; - mca_bcol_base_coll_fn_invoke_attributes_t *inv_attribs = NULL; - struct mca_bcol_base_coll_fn_desc_t *fn_filtered = NULL; - int coll_type; - - comm_attribs = malloc(sizeof(mca_bcol_base_coll_fn_comm_attributes_t)); - if (NULL == comm_attribs) { - return OMPI_ERR_OUT_OF_RESOURCE; - } - inv_attribs = malloc(sizeof(mca_bcol_base_coll_fn_invoke_attributes_t)); - - if (NULL == inv_attribs) { - free(comm_attribs); - return OMPI_ERR_OUT_OF_RESOURCE; - } - - coll_type = comm_attribs->bcoll_type = arg_comm_attribs->bcoll_type; - comm_attribs->comm_size_min = arg_comm_attribs->comm_size_min; - comm_attribs->comm_size_max = arg_comm_attribs->comm_size_max; - comm_attribs->data_src = arg_comm_attribs->data_src; - comm_attribs->waiting_semantics = arg_comm_attribs->waiting_semantics; - - inv_attribs->bcol_msg_min = arg_inv_attribs->bcol_msg_min; - inv_attribs->bcol_msg_max = arg_inv_attribs->bcol_msg_max ; - inv_attribs->datatype_bitmap = arg_inv_attribs->datatype_bitmap ; - inv_attribs->op_types_bitmap = arg_inv_attribs->op_types_bitmap; - - fn_filtered = OBJ_NEW(mca_bcol_base_coll_fn_desc_t); - - fn_filtered->coll_fn = bcol_fn; - fn_filtered->progress_fn = progress_fn; - - fn_filtered->comm_attr = comm_attribs; - fn_filtered->inv_attr = inv_attribs; - - - opal_list_append(&(bcol_module->bcol_fns_table[coll_type]),(opal_list_item_t*)fn_filtered); - - return OMPI_SUCCESS; -} - -int mca_bcol_base_bcol_fns_table_init(struct mca_bcol_base_module_t *bcol_module){ - - int ret, bcol_init_fn; - - for (bcol_init_fn =0; bcol_init_fn < BCOL_NUM_OF_FUNCTIONS; bcol_init_fn++) { - if (NULL != bcol_module->bcol_function_init_table[bcol_init_fn]) { - ret = (bcol_module->bcol_function_init_table[bcol_init_fn]) (bcol_module); - if (OMPI_SUCCESS != ret) { - return OMPI_ERROR; - } - } - } - - return OMPI_SUCCESS; -} - -static void mca_bcol_base_coll_fn_desc_constructor(mca_bcol_base_coll_fn_desc_t *fn) -{ - fn->comm_attr = NULL; - fn->inv_attr = NULL; -} - -static void mca_bcol_base_coll_fn_desc_destructor(mca_bcol_base_coll_fn_desc_t *fn) -{ - if (fn->comm_attr) { - free(fn->comm_attr); - } - - if (fn->inv_attr) { - free(fn->inv_attr); - } -} - -OBJ_CLASS_INSTANCE(mca_bcol_base_coll_fn_desc_t, - opal_list_item_t, - mca_bcol_base_coll_fn_desc_constructor, - mca_bcol_base_coll_fn_desc_destructor); - -static void lmngr_block_constructor(mca_bcol_base_lmngr_block_t *item) -{ - item->base_addr = NULL; -} - -static void lnmgr_block_destructor(mca_bcol_base_lmngr_block_t *item) -{ - /* I have nothing to do here */ -} -OBJ_CLASS_INSTANCE(mca_bcol_base_lmngr_block_t, - opal_list_item_t, - lmngr_block_constructor, - lnmgr_block_destructor); diff --git a/ompi/mca/bcol/base/bcol_base_init.c b/ompi/mca/bcol/base/bcol_base_init.c deleted file mode 100644 index f6f0360cd98..00000000000 --- a/ompi/mca/bcol/base/bcol_base_init.c +++ /dev/null @@ -1,45 +0,0 @@ -/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ -/* - * Copyright (c) 2009-2012 Oak Ridge National Laboratory. All rights reserved. - * Copyright (c) 2009-2012 Mellanox Technologies. All rights reserved. - * Copyright (c) 2013 Los Alamos National Security, LLC. All rights - * reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#include "ompi_config.h" - -#include "ompi/mca/mca.h" -#include "opal/mca/base/base.h" -#include "ompi/mca/bcol/bcol.h" -#include "ompi/mca/bcol/base/base.h" -#include "ompi/include/ompi/constants.h" - -int mca_bcol_base_init(bool enable_progress_threads, bool enable_mpi_threads) -{ - mca_bcol_base_component_t *bcol_component; - mca_base_component_list_item_t *cli; - int ret; - - OPAL_LIST_FOREACH(cli, &mca_bcol_base_components_in_use, mca_base_component_list_item_t) { - bcol_component = (mca_bcol_base_component_t *) cli->cli_component; - - if (false == bcol_component->init_done) { - ret = bcol_component->collm_init_query(true, true); - if (OMPI_SUCCESS != ret) { - return ret; - } - - bcol_component->init_done = true; - } - } - - return OMPI_SUCCESS; -} - - - diff --git a/ompi/mca/bcol/base/owner.txt b/ompi/mca/bcol/base/owner.txt deleted file mode 100644 index 51ea04a5175..00000000000 --- a/ompi/mca/bcol/base/owner.txt +++ /dev/null @@ -1,7 +0,0 @@ -# -# owner/status file -# owner: institution that is responsible for this package -# status: e.g. active, maintenance, unmaintained -# -owner: ORNL? -status: unmaintained diff --git a/ompi/mca/bcol/basesmuma/Makefile.am b/ompi/mca/bcol/basesmuma/Makefile.am deleted file mode 100644 index 7a2a5b8c093..00000000000 --- a/ompi/mca/bcol/basesmuma/Makefile.am +++ /dev/null @@ -1,66 +0,0 @@ -# -# Copyright (c) 2009-2012 Oak Ridge National Laboratory. All rights reserved. -# Copyright (c) 2009-2012 Mellanox Technologies. All rights reserved. -# Copyright (c) 2015 Cisco Systems, Inc. All rights reserved. -# $COPYRIGHT$ -# -# Additional copyrights may follow -# -# $HEADER$ -# - -sources = \ - bcol_basesmuma.h \ - bcol_basesmuma_utils.h \ - bcol_basesmuma_bcast.c \ - bcol_basesmuma_component.c \ - bcol_basesmuma_module.c \ - bcol_basesmuma_buf_mgmt.c \ - bcol_basesmuma_mem_mgmt.c \ - bcol_basesmuma_fanin.c \ - bcol_basesmuma_fanout.c \ - bcol_basesmuma_progress.c \ - bcol_basesmuma_reduce.h \ - bcol_basesmuma_reduce.c \ - bcol_basesmuma_allreduce.c \ - bcol_basesmuma_setup.c \ - bcol_basesmuma_rd_barrier.c \ - bcol_basesmuma_rd_nb_barrier.c \ - bcol_basesmuma_rk_barrier.c \ - bcol_basesmuma_utils.c \ - bcol_basesmuma_bcast_prime.c \ - bcol_basesmuma_lmsg_knomial_bcast.c \ - bcol_basesmuma_lmsg_bcast.c \ - bcol_basesmuma_gather.c \ - bcol_basesmuma_allgather.c \ - bcol_basesmuma_smcm.h \ - bcol_basesmuma_smcm.c - -# Make the output library in this directory, and name it either -# mca__.la (for DSO builds) or libmca__.la -# (for static builds). - -component_noinst = -component_install = -if MCA_BUILD_ompi_bcol_basesmuma_DSO -component_install += mca_bcol_basesmuma.la -else -component_noinst += libmca_bcol_basesmuma.la -endif - -# See ompi/mca/btl/sm/Makefile.am for an explanation of -# libmca_common_sm.la. - -AM_CPPFLAGS = $(btl_portals_CPPFLAGS) - -mcacomponentdir = $(ompilibdir) -mcacomponent_LTLIBRARIES = $(component_install) -mca_bcol_basesmuma_la_SOURCES = $(sources) -mca_bcol_basesmuma_la_LDFLAGS = -module -avoid-version $(btl_portals_LDFLAGS) -mca_bcol_basesmuma_la_LIBADD = \ - $(btl_portals_LIBS) - - -noinst_LTLIBRARIES = $(component_noinst) -libmca_bcol_basesmuma_la_SOURCES =$(sources) -libmca_bcol_basesmuma_la_LDFLAGS = -module -avoid-version $(btl_portals_LDFLAGS) diff --git a/ompi/mca/bcol/basesmuma/bcol_basesmuma.h b/ompi/mca/bcol/basesmuma/bcol_basesmuma.h deleted file mode 100644 index 7b6c69d2c36..00000000000 --- a/ompi/mca/bcol/basesmuma/bcol_basesmuma.h +++ /dev/null @@ -1,1270 +0,0 @@ -/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ -/* - * Copyright (c) 2009-2013 Oak Ridge National Laboratory. All rights reserved. - * Copyright (c) 2009-2012 Mellanox Technologies. All rights reserved. - * Copyright (c) 2013-2014 Los Alamos National Security, LLC. All rights - * reserved. - * Copyright (c) 2014 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2014 Research Organization for Information Science - * and Technology (RIST). All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - - -#ifndef MCA_BCOL_basesmuma_EXPORT_H -#define MCA_BCOL_basesmuma_EXPORT_H - -#include "ompi_config.h" -#include "ompi/mca/bcol/bcol.h" -#include "ompi/mca/bcol/base/base.h" -#include "opal/mca/mpool/mpool.h" -#include "ompi/request/request.h" -#include "ompi/proc/proc.h" -#include "ompi/patterns/net/netpatterns.h" - -#include "ompi/mca/mca.h" -#include "opal/util/arch.h" -#include "opal/util/argv.h" -#include "opal/datatype/opal_datatype.h" -#include "opal/util/output.h" - -#include "bcol_basesmuma_smcm.h" -BEGIN_C_DECLS - -struct list_data_t { - opal_list_item_t super; - void *data; -}; -typedef struct list_data_t list_data_t; -OBJ_CLASS_DECLARATION(list_data_t); - -/* - * Macro's for manipulating the 64 bit shared memory control bits. - * The 64 bit field is devided into 4 bit fields - * - * | 48-63: src | 32-47: index | 16-31: flag | 0-15: sequence number | - * - * Only the low 16 bits of the sequence number will be put in the header - * space. We will use the fact that the use of the shared buffers is - * synchronous, and get the upper 48 bits from the local process space. - */ - -#define BASESMUMA_CACHE_LINE_SIZE 128 - -#define SHIFT_UP << -#define SHIFT_DOWN >> - -#define SEQ_WIDTH 16 -#define SEQ_BASE 0 -#define FIELD_SEQ_MASK ( ( 1 SHIFT_UP SEQ_WIDTH ) - 1 ) -#define INPLACE_SEQ_MASK ( (int64_t)FIELD_SEQ_MASK SHIFT_UP SEQ_BASE) - -#define FLAG_WIDTH 16 -#define FLAG_BASE 16 -#define FIELD_FLAG_MASK ( ( 1 SHIFT_UP FLAG_WIDTH ) - 1 ) -#define INPLACE_FLAG_MASK ( (int64_t)FIELD_FLAG_MASK SHIFT_UP FLAG_BASE) - -#define INDX_WIDTH 16 -#define INDX_BASE 32 -#define FIELD_INDX_MASK ( ( 1 SHIFT_UP INDX_WIDTH ) - 1 ) -#define INPLACE_INDX_MASK ( (int64_t)FIELD_INDX_MASK SHIFT_UP INDX_BASE) - -#define SRC_WIDTH 16 -#define SRC_BASE 48 -#define FIELD_SRC_MASK ( ( 1 SHIFT_UP SRC_WIDTH ) - 1 ) -#define INPLACE_SRC_MASK ( (int64_t)FIELD_SRC_MASK SHIFT_UP SRC_BASE) -/*int64_t INPLACE_SRC_MASK= ((int64_t)FIELD_SRC_MASK SHIFT_UP SRC_BASE); */ - - -#define EXTRACT_FLAG(INPUT, OUTPUT, OUTPUT_TYPE, FIELD_BASE, FIELD_MASK) \ - OUTPUT = (OUTPUT_TYPE) ( (INPUT SHIFT_DOWN FIELD_BASE ) & FIELD_MASK ) - -#define STORE_FLAG(INPUT, OUTPUT, INPUT_TYPE, OUTPUT_TYPE, FIELD_BASE, INPLACE_FIELD_MASK ) \ - OUTPUT = \ - ( \ - /* 3 */ \ - ( \ - /* 2 */ \ - ( \ - /* 1 - shift the input field to the proper location */ \ - (OUTPUT_TYPE)( \ - ((OUTPUT_TYPE)((INPUT_TYPE) (INPUT))) \ - SHIFT_UP FIELD_BASE ) \ - /* mask off the extra bits */ \ - & ((OUTPUT_TYPE)INPLACE_FIELD_MASK) \ - ) \ - /* store back to the OUTPUT field, w/o destroying other fields */ \ - ) | OUTPUT \ - ) - -/** - * Structure to hold the basic shared memory bcoll component. - */ -struct mca_bcol_basesmuma_component_t { - /** Base coll component */ - mca_bcol_base_component_2_0_0_t super; - - /* management data for collectives with no user data */ - - /** MCA parameter: number of memory banks */ - int basesmuma_num_mem_banks; - - /** MCA parameter: number of regions per memory bank */ - int basesmuma_num_regions_per_bank; - - /** MCA parameter: Number of simultaneous groups supported */ - int n_groups_supported; - - /* management data for collectives with user data (ud) - the memory - * is actually obtained at the ML level - */ - - /** MCA paramenter: number of polling loops to run while waiting - * for children or parent to complete their work - */ - int n_poll_loops; - - /* mpool size */ - size_t mpool_size; - - - /* mpool inited - will use this to test whether or not the - * shared memory has been inited - */ - bool mpool_inited; - - /* shared memory control buffer - the control structures reside - * in shared memory */ - bcol_basesmuma_smcm_mmap_t *sm_ctl_structs; - - /* shared memory payload buffer - */ - bcol_basesmuma_smcm_mmap_t *sm_payload_structs; - - /* - * list of shared memory control structures - */ - opal_list_t ctl_structures; - - - /** opal list in which the list of peers that I am "connected" to is stored - */ - opal_list_t sm_connections_list; - - /* opal list in which the list of payload peers that I am "connected" to - * is stored - */ - opal_list_t sm_payload_connections_list; - - /* - * list of non-blocking admin barriers to progress */ - opal_mutex_t nb_admin_barriers_mutex; - opal_list_t nb_admin_barriers; - - /* - * order of fan-in tree - */ - int radix_fanin; - - /* - * order of fan-out tree - */ - int radix_fanout; - - /* - * Order of read tree - */ - int radix_read_tree; - - /* - * order of reduction fan-out tree - */ - int order_reduction_tree; - - /* - * K-nomial tree radix - */ - int k_nomial_radix; - - /* - * K-ary scatter tree radix - */ - int scatter_kary_radix; - - /* - * number of polling loops - */ - int num_to_probe; - - /* - * Portals addressing info - * void*: because wanted to keep portal library dependencies - * as local as possible - */ - void *portals_info; - bool portals_init; - - /* - * verbosity level - */ - int verbose; - - /* - * control file name base string - */ - char *clt_base_fname; - - /* - * data file name base string - */ - char *payload_base_fname; - - /* - * shared memory scratch space. This is mapped at the end of the - * segement of memory holding the control structures. - */ - char *my_scratch_shared_memory; - - /* - * size of scratch memory - */ - size_t my_scratch_shared_memory_size; - - /* the offset will be the same for all ranks */ - size_t scratch_offset_from_base_ctl_file; -}; - -static inline int mca_bcol_basesmuma_err(const char* fmt, ...) -{ - va_list list; - int ret; - - va_start(list, fmt); - ret = vfprintf(stderr, fmt, list); - va_end(list); - return ret; -} - -#if OPAL_ENABLE_DEBUG -#define BASESMUMA_VERBOSE(level, args) \ - do { \ - if(mca_bcol_basesmuma_component.verbose >= level) { \ - mca_bcol_basesmuma_err("[%s]%s[%s:%d:%s] BCOL-BASESMUMA ", \ - ompi_process_info.nodename, \ - OMPI_NAME_PRINT(OMPI_PROC_MY_NAME), \ - __FILE__, __LINE__, __func__); \ - mca_bcol_basesmuma_err args; \ - mca_bcol_basesmuma_err("\n"); \ - } \ - } while(0) -#else -#define BASESMUMA_VERBOSE(level, args) -#endif - - -/** - * Convenience typedef */ -typedef struct mca_bcol_basesmuma_component_t mca_bcol_basesmuma_component_t; - -#if 0 -/* - * Implemented function index list - */ - -/* barrier */ -enum{ - FANIN_FAN_OUT_BARRIER_FN, - RECURSIVE_DOUBLING_BARRIER_FN, - N_BARRIER_FNS -}; - -/* reduce */ -enum{ - FANIN_REDUCE_FN, - REDUCE_SCATTER_GATHER_FN, - N_REDUCE_FNS -}; -enum{ - SHORT_DATA_FN_REDUCE, - LONG_DATA_FN_REDUCE, - N_REDUCE_FNS_USED -}; - -/* all-reduce */ -enum{ - FANIN_FANOUT_ALLREDUCE_FN, - REDUCE_SCATTER_ALLGATHER_FN, - N_ALLREDUCE_FNS -}; -enum{ - SHORT_DATA_FN_ALLREDUCE, - LONG_DATA_FN_ALLREDUCE, - N_ALLREDUCE_FNS_USED -}; - - -/* enum for node type */ -enum{ - ROOT_NODE, - LEAF_NODE, - INTERIOR_NODE -}; - - -/* - * N-order tree node description - */ -struct tree_node_t { - /* my rank within the group */ - int my_rank; - /* my node type - root, leaf, or interior */ - int my_node_type; - /* number of nodes in the tree */ - int tree_size; - /* number of parents (0/1) */ - int n_parents; - /* number of children */ - int n_children; - /* parent rank within the group */ - int parent_rank; - /* chidren ranks within the group */ - int *children_ranks; -}; -typedef struct tree_node_t tree_node_t; - -/* - * Pair-wise data exchange - */ -/* enum for node type */ -enum{ - EXCHANGE_NODE, - EXTRA_NODE -}; - -struct pair_exchange_node_t { - - /* my rank within the group */ - int my_rank; - - /* number of nodes this node will exchange data with */ - int n_exchanges; - - /* ranks of nodes involved in data exchnge */ - int *rank_exchanges; - - /* number of extra sources of data - outside largest power of 2 in - * this group */ - int n_extra_sources; - - /* rank of the extra source */ - int rank_extra_source; - - /* number of tags needed per stripe */ - int n_tags; - - /* log 2 of largest full power of 2 for this node set */ - int log_2; - - /* largest power of 2 that fits in this group */ - int n_largest_pow_2; - - /* node type */ - int node_type; - -}; -typedef struct pair_exchange_node_t pair_exchange_node_t; -#endif -/* - * descriptor for managing the admin nonblocking barrier routine. - * This is an sm internal routine, and assumes only 1 outstanding - * nb-barrier collective call per block. - */ -/* forward declarations */ -struct mca_bcol_basesmuma_module_t; -struct sm_buffer_mgmt; - -struct sm_nbbar_desc_t { - /* make sure we can put this on a list */ - opal_list_item_t super; - - /* phase of the collective operation - needed to know how to continue - * progressing the nb-barrier */ - int collective_phase; - - /* iteration to continue at */ - int recursive_dbl_iteration; - - /* pointer to the collective module this is associated with */ - struct mca_bcol_basesmuma_module_t *sm_module; - - /* pointer to payload/control structs buffers */ - struct sm_buffer_mgmt *coll_buff; - - /* pool index */ - int pool_index; - - /* pointer to the mca_bcol_base_memory_block_desc_t structure - * that is actually managing this registration. - * This is meaningful when these control structures - * are used in conjunction with the user payload - * data that is allocated at the ml level. - */ - void *ml_memory_block_descriptor; - -}; -typedef struct sm_nbbar_desc_t sm_nbbar_desc_t; - -/* - * Barrier request objects - */ - -/* shared memory data strucutures */ -struct mca_bcol_basesmuma_nb_request_process_shared_mem_t { - volatile uint64_t coll_index; - /* flag used to indicate the status of this memory region */ - volatile uint64_t flag; - volatile uint64_t index; - - /* pading */ - /* Note: need to change this so it takes less memory */ - char padding[BASESMUMA_CACHE_LINE_SIZE-3*sizeof(uint64_t)]; -}; - -typedef struct mca_bcol_basesmuma_nb_request_process_shared_mem_t -mca_bcol_basesmuma_nb_request_process_shared_mem_t; - -/* enum for phase at which the nb barrier is in */ -enum{ - NB_BARRIER_INACTIVE, - - /* fan-in/fan-out */ - NB_BARRIER_FAN_IN, - NB_BARRIER_FAN_OUT, - - /* recursive doubling */ - NB_PRE_PHASE, - NB_RECURSIVE_DOUBLING, - NB_POST_PHASE, - - /* done and not started are the same for all practicle - * purposes, as the init funtion always sets this flag - */ - NB_BARRIER_DONE -}; - - - -/* forward declartion */ -struct mca_bcol_basesmuma_module_t; - -struct mca_basesmuma_ctrl_4_hdl_t { - int fd; - bool status; - volatile char buf[128]; - /*volatile char buf[OPAL_PATH_MAX];*/ -}; -typedef struct mca_basesmuma_ctrl_4_hdl_t mca_basesmuma_ctrl_4_hdl_t; - -/* control segment for shared memory */ -struct mca_bcol_basesmuma_ctl_struct_t { - /* collective identifier */ - volatile int64_t sequence_number; - volatile int64_t flag; - volatile int64_t index; - volatile int64_t offset; - volatile int64_t offset_zip; - - - /* used for non-blocking algorithms */ - int status; - int active_requests; - int iteration; - - int *src_ptr; - - int start; - - /* process private data */ - int starting_flag_value; - - /* experiment for large data colls */ - int n_sends; - int length; - - /* hdl framework control structure*/ - /* no need to pad at this level anymore */ - volatile int64_t data_hdl; - volatile mca_basesmuma_ctrl_4_hdl_t hdl_ctrl; - -#ifdef __PORTALS_AVAIL__ - struct mca_bcol_basesmuma_portal_buf_addr_t portals_buf_addr; -#endif - /* padding */ - /*char padding[BASESMUMA_CACHE_LINE_SIZE-4*sizeof(uint64_t)-3*sizeof(int)];*/ - char padding[BASESMUMA_CACHE_LINE_SIZE-6*sizeof(int64_t)-5*sizeof(int)]; -}; -typedef struct mca_bcol_basesmuma_ctl_struct_t mca_bcol_basesmuma_ctl_struct_t; - - -#define SM_BCOLS_MAX 2 - -/* enum for signaling flag bank, when - * adding to this list, please keep - * it alphabetical - */ -enum { - ALLGATHER_FLAG, - ALLREDUCE_FLAG, - BARRIER_FANIN_FLAG, - BARRIER_FANOUT_FLAG, - BARRIER_RKING_FLAG, - BCAST_FLAG, - GATHER_FLAG, - REDUCE_FLAG, - NUM_SIGNAL_FLAGS -}; - - -/* control region for colls with user data - shared memory */ -struct mca_bcol_basesmuma_header_t { - /* collective identifier */ - volatile int64_t sequence_number; - volatile int8_t flags[NUM_SIGNAL_FLAGS][SM_BCOLS_MAX]; - volatile int32_t src; /* src of bcast data for unknown root, - bcol id for known root - */ - /* starting flag - hierarchies */ - int8_t starting_flag_value[SM_BCOLS_MAX]; - int8_t ready_flag; - - /* Manju: Cached array of receive buffer offsets - * - * This array stores the receive buffer offsets (rbuf_offsets) of data buffer. - * In general, we assume that sbuf_offset and rbuf_offset of - * processes invoking the collective primitive is same. This is - * true when the order in which processes invoke their hierarchies are - * same. - * - * For some algorithms (like broadcast, reduce) we split the ML buffer - * and use first half as - * source and second half as receive buffer. We swap these buffers for - * each change when we change levels i.e., if first half is source for - * level 1, in the level 2 of hierarchy it becomes the receive buffer. - * For reduce algorithm, each process can invoke hierarchies - * (primitives) in different order. For example, process 1 might have level 1 as SM - * and level 2 as p2p, and process 2 might have different order where its - * level 1 is p2p and level 2 SM. In this case, if in basesmuma reduce - * algorithm, if parent assumes its rbuf_offset as child's rbuf_offset - * it is wrong. So we cache rbuf_offset of each process so - * it could be accessed by processes to obtain the data. - */ - - volatile int32_t roffsets[SM_BCOLS_MAX]; - - /* Manju Start: Experimental ctl fields and should be removed later; - * This is used for lmsg reduce for testing - * during transition to HDL primitives - */ -#if 0 - int lmsg_reduce_snd_completes; - /* There can be atmost 20 ranks in the subgroup. Since this - * only for testing this should be good enough */ - int lmsg_reduce_peers[20]; - int lmsg_reduce_send_offsets[20]; - /* End: Experimental ctl fields */ - - - /* no need to pad at this level anymore */ - volatile int64_t data_hdl; -#endif -}; -typedef struct mca_bcol_basesmuma_header_t mca_bcol_basesmuma_header_t; - -/* data needed for large messages */ -struct mca_bcol_basesmuma_large_msg_t { - /* scatter allgather data */ - uint64_t offset; - uint64_t n_sends; - uint64_t length; - - /* portals data */ - -}; -typedef struct mca_bcol_basesmuma_large_msg_t mca_bcol_basesmuma_large_msg_t; - -/* payload struct */ -struct mca_bcol_basesmuma_payload_t { - - /* base pointer to shared memory control structure */ - mca_bcol_basesmuma_header_t *ctl_struct; - void *payload; - -}; - -typedef struct mca_bcol_basesmuma_payload_t mca_bcol_basesmuma_payload_t; - - - - -/* memory bank memory management structure */ -struct mem_bank_management_t { - - /* generation counter */ - uint64_t bank_gen_counter; - - /* descriptor for the non-blocking barrier. This is - * used to manage this bank of memory. - */ - sm_nbbar_desc_t nb_barrier_desc; - - /* the number of buffers that are not in use, and are - * available. The assumption is that the buffers are - * recycled all at once, so are available for re-use - * until all buffers have been made available for re-use. - */ - volatile int available_buffers; - - /* - * number of buffers freed */ - volatile int n_buffs_freed; - - /* mutex to ensure atomic recycling of resrouces */ - opal_mutex_t mutex; - - /* number of buffers being managed */ - int number_of_buffers; - - /* shared memory control structures */ - int index_shared_mem_ctl_structs; - - -}; -typedef struct mem_bank_management_t mem_bank_management_t; - -/* data structure for shared buffers */ -struct sm_buffer_mgmt { - /* number of buffers per process */ - int number_of_buffs; - - /* size of group */ - int size_of_group; - - /* number of memory banks */ - int num_mem_banks; - - /* number of buffers per memory bank */ - int num_buffs_per_mem_bank; - - /* log base 2 of num_buffs_per_mem_bank */ - int log2_num_buffs_per_mem_bank; - - /* log base 2 total number of buffers */ - int log2_number_of_buffs; - - /* mask - masks off the bits corresponding to buffer index */ - int mask; - - /* control buffers - these point to regions in shared memory */ - /* leading dimension is the group size - all pointers for a given - * set of buffers appear consecutively in this array - */ - volatile void **ctl_buffs; - - /* management data for the control structures - - * one per bank of control structures - Will be used for - * the payload buffers as well. - */ - mem_bank_management_t *ctl_buffs_mgmt; - - /* data buffers - these point to regions in shared memory */ - /* leading dimension is the group size - all pointers for a given - * set of buffers appear consecutively in this array - */ - - volatile mca_bcol_basesmuma_payload_t *data_buffs; - - - -}; -typedef struct sm_buffer_mgmt sm_buffer_mgmt; - - -struct mca_bcol_basesmuma_nb_coll_buff_desc_t { - void *data_addr; - uint64_t bank_index; - uint64_t buffer_index; - int active_requests; - ompi_request_t **requests; - int data_src; - int radix_mask; - int radix_mask_pow; - int iteration; - int status; - /* this is for testing */ - int tag; - - volatile mca_bcol_basesmuma_ctl_struct_t **ctl_structs; - volatile mca_bcol_basesmuma_ctl_struct_t *my_ctl_pointer; - volatile mca_bcol_basesmuma_ctl_struct_t *parent_ctl_pointer; - volatile mca_bcol_basesmuma_ctl_struct_t *extra_partner_ctl_pointer; -}; - -typedef struct mca_bcol_basesmuma_nb_coll_buff_desc_t mca_bcol_basesmuma_nb_coll_buff_desc_t; - -struct mca_bcol_basesmuma_local_mlmem_desc_t { - - uint32_t bank_index_for_release; - struct mca_bcol_base_memory_block_desc_t *ml_mem_desc; - uint32_t num_banks; - uint32_t num_buffers_per_bank; - uint32_t size_buffer; - uint32_t *bank_release_counter; - - /* - * Number of descriptors allocated is equivalent to number of ml buffers - * (number of banks * number of buffers per bank) - */ - mca_bcol_basesmuma_nb_coll_buff_desc_t *nb_coll_desc; -}; - -typedef struct mca_bcol_basesmuma_local_mlmem_desc_t mca_bcol_basesmuma_local_mlmem_desc_t; - -#ifdef __PORTALS_AVAIL__ -#define MAX_SM_GROUP_SIZE 32 - - -struct portals_scatter_allgather_nb_bcast_state_t -{ - /* local variables */ - uint64_t length; - int my_rank, src, matched; - int src_list[MAX_SM_GROUP_SIZE]; - int group_size; - int64_t ready_flag; - int pow_2, pow_2_levels; - int src_list_index; - uint64_t fragment_size; /* user buffer size */ - - /* Input argument variables */ - void *my_userbuf; - int64_t sequence_number; - - /* Extra source variables */ - bool secondary_root; - int partner , extra_partner; - - /* Scatter Allgather offsets */ - uint64_t local_sg_offset , global_sg_offset , partner_offset ; - - /* Portals messaging relevant variables */ - /* - * ptl_handle_eq_t allgather_eq_h; - */ - ptl_handle_eq_t read_eq; - ptl_event_t allgather_event; - bool msg_posted; - - /* OMPI module and component variables */ - mca_bcol_basesmuma_component_t *cs; - struct mca_bcol_basesmuma_module_t *bcol_module; - - /* Control structure and payload variables */ - volatile mca_bcol_basesmuma_ctl_struct_t **ctl_structs; - volatile mca_bcol_basesmuma_ctl_struct_t *my_ctl_pointer; - volatile mca_bcol_basesmuma_ctl_struct_t *parent_ctl_pointer; /* scatter source */ - volatile mca_bcol_basesmuma_ctl_struct_t *extra_partner_ctl_pointer; /* scatter source */ - - int phase; -}; - - -typedef struct portals_scatter_allgather_nb_bcast_state_t sg_state_t; -#endif - -#define SM_ARRAY_INDEX(LEAD_DIM,BUF_INDEX,PROC_INDEX) \ - ((LEAD_DIM)*(BUF_INDEX)+(PROC_INDEX)) -/* debug */ -#define BARRIER_BANK_LIST_SIZE 32 -/* end debug */ - -struct mca_bcol_basesmuma_module_t { - /* base structure */ - mca_bcol_base_module_t super; - - /* free list item with the control structures used for - * the no user data collective operations - */ - list_data_t *no_userdata_ctl; - - /* free list item with the control structures used for - * the with user data collective operations - */ - list_data_t *userdata_ctl; - - /* - * information on sm control backing files for the subgroup - * associated with this module. - */ - bcol_basesmuma_smcm_proc_item_t **ctl_backing_files_info; - - /* - * information on sm payload backing files for the subgroup - * associated with this module. - */ - bcol_basesmuma_smcm_proc_item_t **payload_backing_files_info; - - /* - * buffers for the collective that do not involve user data - - * barrier, fanin, fanout. - */ - sm_buffer_mgmt colls_no_user_data; - - /* - * buffers for the collective with user data. - */ - sm_buffer_mgmt colls_with_user_data; - - /* recursive-doubling tree node */ - netpatterns_pair_exchange_node_t recursive_doubling_tree; - - /* k-nomial gather/allgather tree */ - netpatterns_k_exchange_node_t knomial_allgather_tree; - - /* fanin tree node - root is rank 0 */ - netpatterns_tree_node_t fanin_node; - - /* fanout tree node - root is rank 0 */ - netpatterns_tree_node_t fanout_node; - - /* index of blocking barrier memory region to use */ - int index_blocking_barrier_memory_bank; - - /* comm to shared memory map */ - int *comm_to_sm_map; - - /* reduction fanout tree */ - netpatterns_tree_node_t* reduction_tree; - - /* broadcast fanout tree */ - netpatterns_tree_node_t* fanout_read_tree; - - /* scatter - k-ary tree */ - int scatter_kary_radix; - netpatterns_tree_node_t *scatter_kary_tree; - - /* Knomial exchange tree */ - /* Currently used for only large message reduce */ - netpatterns_k_exchange_node_t knomial_exchange_tree; - - /* sequence number offset - want to make sure that we start - * id'ing collectives with id 0, so we can have simple - * resource management. - */ - int64_t squence_number_offset; - - /* basesmuma specific header size into ml buffer - * was calculated at ml level - it is the sum of - * all headers from all bcols and then aligned to - * whatever alignment was requested - */ - uint32_t total_header_size; - - /* list of possible sources */ - int *src_list; - - /* Number of possible sources */ - int src_size; - - /* smallest power of k that is smaller - * than or equal in size to the uma group - */ - int pow_k_levels; - - /* size of power-of-k group */ - int pow_k; - - /* smallest power of 2 that is smaller - * than or equal to the smuma group size - */ - int pow_2_levels; - - /* size of power-of-2 group */ - int pow_2; - - /* pointer to the shared memory scratch array of each - * process in the group. - */ - void **shared_memory_scratch_space; - - /* - * Caching information for re-entrant collectives - */ - mca_bcol_basesmuma_local_mlmem_desc_t ml_mem; - - /* - * Cached offsets for lmsg reduce - */ - int **reduce_offsets; - - /*XXX: - * Starting to explore the beauty of zero-copy for large message - */ - struct mca_hdl_base_module_t **hdl_module; - -#ifdef __PORTALS_AVAIL__ - /* - * Store state for NB blocking functions - */ - sg_state_t sg_state; - -#endif -}; - -typedef struct mca_bcol_basesmuma_module_t mca_bcol_basesmuma_module_t; -OBJ_CLASS_DECLARATION(mca_bcol_basesmuma_module_t); - -/* shared memory specific arguments for the bcol registration function */ -typedef struct bcol_basesmuma_registration_data_t { - char *file_name; /* filename for payload */ - void *base_addr; /* base address to be mapped */ - size_t size; /* size of memory block to be "registered" */ - size_t size_ctl_structure; - size_t data_seg_alignment; - bcol_basesmuma_smcm_mmap_t *sm_mmap; /* shared memory map struct */ - mca_bcol_base_release_buff_fn_t buff_release_cb; /* buffer release - call back */ -} bcol_basesmuma_registration_data_t; - - -enum { - BUFFER_AVAILABLE, - STARTED, - FANIN, - FANOUT -}; - -/* enum used for non-blocking large - * message bcast - */ - -enum { - INIT, - START, - NOT_STARTED, - SCATTER, - ALLGATHER, - EXTRA_RANK, - PROBE, - SCATTER_ROOT_WAIT, - SCATTER_EXTRA_ROOT_WAIT, - SCATTER_PARENT_WAIT, - FINISHED -}; - -/** - * Global component instance - */ -OMPI_MODULE_DECLSPEC extern mca_bcol_basesmuma_component_t mca_bcol_basesmuma_component; - - -/* - * coll module functions - */ - -/* query to see if the component is available for use, and can - * satisfy the thread and progress requirements - */ -int mca_bcol_basesmuma_init_query(bool enable_progress_threads, - bool enable_mpi_threads); - -/* query to see if the module is available for use on the given - * communicator, and if so, what it's priority is. - */ -mca_bcol_base_module_t ** -mca_bcol_basesmuma_comm_query(mca_sbgp_base_module_t *module, int *num_modules); - - - -/* shared memory specific memory registration function - this will be passed into the mpool */ -int mca_bcol_basesmuma_register_sm(void *context_data, void *base, size_t size, - void **reg); - -/* shared memory specific memory deregistration function - also needed by the mpool */ -int mca_bcol_basesmuma_deregister_sm(void *context_data, void *reg); - -/* setup the new k_nomial tree for collectives */ -int bcol_basesmuma_setup_knomial_tree(mca_bcol_base_module_t *super); - -/* allocate the memory pool for the shared memory control structures */ -int mca_bcol_basesmuma_allocate_pool_memory(mca_bcol_basesmuma_component_t - *component); - -/* initialize the internal scratch buffers and control structs that will be - used by the module */ -int base_bcol_basesmuma_setup_library_buffers( - mca_bcol_basesmuma_module_t *sm_module, - mca_bcol_basesmuma_component_t *cs); - - -/* shared memory recursive doubling initialization */ -int bcol_basesmuma_rd_barrier_init(mca_bcol_base_module_t *module); - -/* shared memory recusive double barrier */ -int bcol_basesmuma_recursive_double_barrier(bcol_function_args_t *input_args, - mca_bcol_base_function_t *c_input_args); -/* shared memory fanin */ -int bcol_basesmuma_fanin_init(mca_bcol_base_module_t *super); - -/* shared memory fanout */ -int bcol_basesmuma_fanout_init(mca_bcol_base_module_t *super); - -/* shared memory recursive k-ing non-blocking barrier */ -int bcol_basesmuma_barrier_init(mca_bcol_base_module_t *super); - -/* Shared memory broadcast */ -int bcol_basesmuma_bcast_init(mca_bcol_base_module_t *super); - -int bcol_basesmuma_bcast(bcol_function_args_t *input_args, - mca_bcol_base_function_t *c_input_args); - -/* Shared memory non-blocking broadcast */ -int bcol_basesmuma_bcast_k_nomial_anyroot(bcol_function_args_t *input_args, - mca_bcol_base_function_t *c_input_args); - -int bcol_basesmuma_bcast_k_nomial_knownroot(bcol_function_args_t *input_args, - mca_bcol_base_function_t *c_input_args); - -/* Shared memory non-blocking broadcast - Large message anyroot */ -int bcol_basesmuma_binary_scatter_allgather_segment(bcol_function_args_t *input_args, - mca_bcol_base_function_t *c_input_args); - -#if 0 -/*FIXME: having fun here*/ -int bcol_basesmuma_hdl_zerocopy_bcast(bcol_function_args_t *input_args, - mca_bcol_base_function_t *c_input_args); -#endif - -int bcol_basesmuma_lmsg_bcast_k_nomial_anyroot(bcol_function_args_t *input_args, - mca_bcol_base_function_t *c_input_args); - -int bcol_basesmuma_lmsg_scatter_allgather_portals_bcast(bcol_function_args_t *input_args, - mca_bcol_base_function_t *c_input_args); - -int bcol_basesmuma_lmsg_scatter_allgather_portals_nb_bcast(bcol_function_args_t *input_args, - mca_bcol_base_function_t *c_input_args); - -int bcol_basesmuma_lmsg_scatter_allgather_portals_nb_knownroot_bcast(bcol_function_args_t *input_args, - mca_bcol_base_function_t *c_input_args); - -/* - * shared memory scatter - */ -int bcol_basesmuma_scatter_init(mca_bcol_base_module_t *super); - -/* shared memory nonblocking scatter - known root */ -int bcol_basesmuma_nb_scatter_k_array_knownroot( - bcol_function_args_t *input_args, - mca_bcol_base_function_t *c_input_args); - -/* shared memory non-blocking k-nomial barrier init */ -int bcol_basesmuma_k_nomial_barrier_init(bcol_function_args_t *input_args, - struct mca_bcol_base_function_t *const_args); - -/* shared memory non-blocking k-nomial barrier progress */ -int bcol_basesmuma_k_nomial_barrier_progress(bcol_function_args_t *input_args, - struct mca_bcol_base_function_t *const_args); - -/*shared memory non-blocking k-nomial allgather init */ -int bcol_basesmuma_k_nomial_allgather_init(bcol_function_args_t *input_args, - struct mca_bcol_base_function_t *const_args); - -/* shared memory non-blocking k-nomial allgather progress */ -int bcol_basesmuma_k_nomial_allgather_progress(bcol_function_args_t *input_args, - struct mca_bcol_base_function_t *const_args); - -/* shared memory allgather -- selection logic api */ -int bcol_basesmuma_allgather_init(mca_bcol_base_module_t *super); - -/* shared memory blocking k-nomial gather */ -int bcol_basesmuma_k_nomial_gather(bcol_function_args_t *input_args, - mca_bcol_base_function_t *c_input_args); - -/* shared memory non blocking k-nomial gather */ -int bcol_basesmuma_k_nomial_gather_init(bcol_function_args_t *input_args, - mca_bcol_base_function_t *c_input_args); - -/* shared memory non blocking k-nomial gather progress*/ -int bcol_basesmuma_k_nomial_gather_progress(bcol_function_args_t *input_args, - mca_bcol_base_function_t *c_input_args); - -/* shared memory init */ -int bcol_basesmuma_gather_init(mca_bcol_base_module_t *super); - -/* allocate shared memory control memory */ -int mca_bcol_basesmuma_allocate_sm_ctl_memory( - mca_bcol_basesmuma_component_t *cs); - -/* Shared memory basesmuma reduce */ -int bcol_basesmuma_reduce_init(mca_bcol_base_module_t *super); -int bcol_basesmuma_reduce_intra_fanin(bcol_function_args_t *input_args, - mca_bcol_base_function_t *c_input_args); -int bcol_basesmuma_reduce_intra_fanin_old(bcol_function_args_t *input_args, - mca_bcol_base_function_t *c_input_args); - -int bcol_basesmuma_reduce_intra_reducescatter_gather(void *sbuf, void *rbuf, - int count, struct ompi_datatype_t *dtype, - struct ompi_op_t *op, - int root, - struct ompi_communicator_t *comm, - mca_coll_base_module_t *module); - -/* Shared memory basesmuma allreduce */ -int bcol_basesmuma_allreduce_init(mca_bcol_base_module_t *super); - -int bcol_basesmuma_allreduce_intra_fanin_fanout(bcol_function_args_t *input_args, - mca_bcol_base_function_t *c_input_args); - -int bcol_basesmuma_allreduce_intra_recursive_doubling(bcol_function_args_t *input_args, - mca_bcol_base_function_t *c_input_args); - -/* initialize non-blocking barrier for recycling the memory buffers. - * This is not a general purpose nb_barrier, and relies on the - * fact that we will have only one outstanding nb-barrier per bank - * at a time. - */ -int bcol_basesmuma_rd_nb_barrier_init_admin(sm_nbbar_desc_t *sm_desc); - -/* admin nonblocking barrier - progress function */ -int bcol_basesmuma_rd_nb_barrier_progress_admin(sm_nbbar_desc_t *sm_desc); - -/* Memory syncronization registration function */ -int bcol_basesmuma_memsync_init(mca_bcol_base_module_t *super); - -/* smcm allgather function used to exchange file offsets. */ -int bcol_basesmuma_smcm_allgather_connection( - mca_bcol_basesmuma_module_t *sm_bcol_module, - mca_sbgp_base_module_t *module, - opal_list_t *peer_list, - bcol_basesmuma_smcm_proc_item_t ***backing_files, - ompi_communicator_t *comm, - bcol_basesmuma_smcm_file_t input, char *base_fname, - bool map_all); - -/* clean up the backing files associated with a basesmuma bcol module */ -int bcol_basesmuma_smcm_release_connections (mca_bcol_basesmuma_module_t *sm_bcol_module, - mca_sbgp_base_module_t *sbgp_module, opal_list_t *peer_list, - bcol_basesmuma_smcm_proc_item_t ***back_files); - -/* - * this function initializes the internal scratch buffers and control - * structures that will be used by the module - */ -int base_bcol_masesmuma_setup_library_buffers( - mca_bcol_basesmuma_module_t *sm_bcol_module, - mca_bcol_basesmuma_component_t *sm_bcol_component); - -/* get the index of the shared memory buffer to be used */ -int bcol_basesmuma_get_buff_index( sm_buffer_mgmt * buff_block, - uint64_t buff_id ); - -int bcol_basesmuma_free_buff( sm_buffer_mgmt * buff_block, - uint64_t buff_id ); - -/* bank init which is used for shared memory optimization, fall back to - * the bank init above if this causes problems - */ -int bcol_basesmuma_bank_init_opti(struct mca_bcol_base_memory_block_desc_t *payload_block, - uint32_t data_offset, - mca_bcol_base_module_t *bcol_module, - void *reg_data); - -/* cleanup nb_coll_buff_desc */ -void cleanup_nb_coll_buff_desc(mca_bcol_basesmuma_nb_coll_buff_desc_t **desc, - uint32_t num_banks, - uint32_t num_buffers_per_bank); - - -/* used for shared memory offset exchange */ -int base_bcol_basesmuma_exchange_offsets( - mca_bcol_basesmuma_module_t *sm_bcol_module, - void **result_array, uint64_t mem_offset, int loop_limit, - int leading_dim); - - -/* the progress function to be called from the opal progress function - */ -int bcol_basesmuma_progress(void); - -/* Macro for initializing my shared memory control structure */ -#define BASESMUMA_HEADER_INIT(my_ctl_pointer,ready_flag, seqn, bcol_id) \ - do{ \ - int i,j; \ - int8_t flag_offset = 0; \ - /* setup resource recycling */ \ - if( (my_ctl_pointer)->sequence_number < (seqn) ) { \ - /* Signal arrival */ \ - for( j = 0; j < SM_BCOLS_MAX; j++){ \ - (my_ctl_pointer)->starting_flag_value[j]=0; \ - for( i = 0; i < NUM_SIGNAL_FLAGS; i++){ \ - (my_ctl_pointer)->flags[i][j] = -1; \ - } \ - } \ - } \ - /* increment the starting flag by one and return */ \ - flag_offset = (my_ctl_pointer)->starting_flag_value[(bcol_id)]; \ - (ready_flag) = flag_offset + 1; \ - opal_atomic_wmb(); \ - (my_ctl_pointer)->sequence_number = (seqn); \ - }while(0) - -/* these are all the same, am using a single macro for all collectives */ - -#define IS_PEER_READY(peer, my_flag, my_sequence_number,flag_index, bcol_id) \ - (((peer)->sequence_number == (my_sequence_number) && \ - (peer)->flags[flag_index][bcol_id] >= (my_flag))? true : false ) - -#if 0 -#define IS_AR_DATA_READY(peer, my_flag, my_sequence_number) \ - (((peer)->sequence_number == (my_sequence_number) && \ - (peer)->flags[ALLREDUCE_FLAG][bcol_id] >= (my_flag) \ - )? true : false ) - -#define IS_GDATA_READY(peer, my_flag, my_sequence_number) \ - (((peer)->sequence_number == (my_sequence_number) && \ - (peer)->flags[GATHER_FLAG][bcol_id] == (my_flag) \ - )? true : false ) - -#define IS_PEER_READY(peer, my_flag, flag_index, my_sequence_number) \ - ((((volatile int64_t)(peer)->sequence_number > (my_sequence_number)) || \ - (((volatile int64_t)(peer)->sequence_number == (my_sequence_number)) && \ - ((peer)->flags[flag_index][bcol_id] == (my_flag))) \ - )? true : false ) - -#define IS_ALLREDUCE_PEER_READY(peer, my_flag, my_sequence_number) \ - ((((volatile int64_t)(peer)->sequence_number == (my_sequence_number)) && \ - (((peer)->flags[ALLREDUCE_FLAG][bcol_id] == (my_flag))||((peer)->flags[ALLREDUCE_FLAG][bcol_id] == (my_flag) + 1)) \ - )? true : false ) -#endif - -#define IS_LAST_BCOL_FUNC(ml_args) \ - ((((ml_args)->n_of_this_type_in_collective == \ - (ml_args)->index_of_this_type_in_collective + 1 ) )? true : false) - -static inline __opal_attribute_always_inline__ -size_t bcol_basesmuma_data_offset_calc( - mca_bcol_basesmuma_module_t *basesmuma_module) -{ - uint32_t offset = basesmuma_module->super.header_size; - offset = ((offset + BCOL_HEAD_ALIGN - 1) / BCOL_HEAD_ALIGN) * BCOL_HEAD_ALIGN; - - return (size_t) offset; -} - - -END_C_DECLS - -#endif /* MCA_BCOL_basesmuma_EXPORT_H */ diff --git a/ompi/mca/bcol/basesmuma/bcol_basesmuma_allgather.c b/ompi/mca/bcol/basesmuma/bcol_basesmuma_allgather.c deleted file mode 100644 index 97a857ef0c3..00000000000 --- a/ompi/mca/bcol/basesmuma/bcol_basesmuma_allgather.c +++ /dev/null @@ -1,352 +0,0 @@ -/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ -/* - * Copyright (c) 2009-2013 Oak Ridge National Laboratory. All rights reserved. - * Copyright (c) 2009-2012 Mellanox Technologies. All rights reserved. - * Copyright (c) 2013-2014 Los Alamos National Security, LLC. All rights - * reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#include "ompi_config.h" - -#include "ompi/include/ompi/constants.h" -#include "ompi/mca/bcol/base/base.h" -#include "ompi/mca/bcol/bcol.h" -#include "ompi/mca/bcol/basesmuma/bcol_basesmuma.h" -/* - #define IS_AGDATA_READY(peer, my_flag, my_sequence_number)\ - (((peer)->sequence_number == (my_sequence_number) && \ - (peer)->flags[ALLGATHER_FLAG][bcol_id] >= (my_flag) \ - )? true : false ) -*/ - -#define CALC_ACTIVE_REQUESTS(active_requests,peers, tree_order) \ - do{ \ - for( j = 0; j < (tree_order - 1); j++){ \ - if( 0 > peers[j] ) { \ - /* set the bit */ \ - *active_requests ^= (1<bcol_module; - netpatterns_k_exchange_node_t *exchange_node = &bcol_module->knomial_allgather_tree; - int bcol_id = (int) bcol_module->super.bcol_id; - uint32_t buffer_index = input_args->buffer_index; - int *active_requests = - &(bcol_module->ml_mem.nb_coll_desc[buffer_index].active_requests); - - int *iteration = &bcol_module->ml_mem.nb_coll_desc[buffer_index].iteration; - int *status = &bcol_module->ml_mem.nb_coll_desc[buffer_index].status; - int leading_dim, buff_idx, idx; - - int64_t sequence_number = input_args->sequence_num; - int my_rank = bcol_module->super.sbgp_partner_module->my_index; - - volatile mca_bcol_basesmuma_payload_t *data_buffs; - - /* control structures */ - volatile mca_bcol_basesmuma_header_t *my_ctl_pointer; - - volatile int8_t ready_flag; - - /* initialize the iteration counter */ - buff_idx = input_args->src_desc->buffer_index; - leading_dim = bcol_module->colls_no_user_data.size_of_group; - idx=SM_ARRAY_INDEX(leading_dim,buff_idx,0); - data_buffs=(volatile mca_bcol_basesmuma_payload_t *) - bcol_module->colls_with_user_data.data_buffs+idx; - - /* Set pointer to current proc ctrl region */ - my_ctl_pointer = data_buffs[my_rank].ctl_struct; - - /* initialize headers and ready flag */ - BASESMUMA_HEADER_INIT(my_ctl_pointer, ready_flag, sequence_number, bcol_id); - - /* initialize these */ - *iteration = -1; - *active_requests = 0; - *status = ready_flag; - - if (EXTRA_NODE == exchange_node->node_type) { - /* I am ready at this level */ - opal_atomic_wmb (); - my_ctl_pointer->flags[ALLGATHER_FLAG][bcol_id] = ready_flag; - } - - return bcol_basesmuma_k_nomial_allgather_progress (input_args, const_args); -} - - -/* allgather progress function */ - -int bcol_basesmuma_k_nomial_allgather_progress(bcol_function_args_t *input_args, - struct mca_bcol_base_function_t *const_args) -{ - /* local variables */ - int8_t flag_offset; - uint32_t buffer_index = input_args->buffer_index; - volatile int8_t ready_flag; - mca_bcol_basesmuma_module_t *bcol_module = (mca_bcol_basesmuma_module_t *) const_args->bcol_module; - netpatterns_k_exchange_node_t *exchange_node = &bcol_module->knomial_allgather_tree; - int group_size = bcol_module->colls_no_user_data.size_of_group; - int *list_connected = bcol_module->super.list_n_connected; /* critical for hierarchical colls */ - int bcol_id = (int) bcol_module->super.bcol_id; - mca_bcol_basesmuma_component_t *cm = &mca_bcol_basesmuma_component; - int *active_requests = - &(bcol_module->ml_mem.nb_coll_desc[buffer_index].active_requests); - - int *iteration = &bcol_module->ml_mem.nb_coll_desc[buffer_index].iteration; - int *status = &bcol_module->ml_mem.nb_coll_desc[buffer_index].status; - int leading_dim, idx, buff_idx; - - int i, j, probe; - int knt; - int src; - int recv_offset, recv_len; - int max_requests = 0; /* critical to set this */ - int pow_k, tree_order; - - int64_t sequence_number=input_args->sequence_num; - int my_rank = bcol_module->super.sbgp_partner_module->my_index; - - int pack_len = input_args->count * input_args->dtype->super.size; - - void *data_addr = (void*)( - (unsigned char *) input_args->sbuf + - (size_t) input_args->sbuf_offset); - volatile mca_bcol_basesmuma_payload_t *data_buffs; - volatile char *peer_data_pointer; - - /* control structures */ - volatile mca_bcol_basesmuma_header_t *my_ctl_pointer; - volatile mca_bcol_basesmuma_header_t *peer_ctl_pointer; - -#if 0 - fprintf(stderr,"%d: entering sm allgather progress active requests %d iter %d ready_flag %d\n", my_rank, - *active_requests, *iteration, *status); -#endif - - buff_idx = input_args->src_desc->buffer_index; - leading_dim=bcol_module->colls_no_user_data.size_of_group; - idx=SM_ARRAY_INDEX(leading_dim,buff_idx,0); - data_buffs=(volatile mca_bcol_basesmuma_payload_t *) - bcol_module->colls_with_user_data.data_buffs+idx; - - /* Set pointer to current proc ctrl region */ - my_ctl_pointer = data_buffs[my_rank].ctl_struct; - - /* increment the starting flag by one and return */ - /* flag offset seems unnecessary here */ - flag_offset = my_ctl_pointer->starting_flag_value[bcol_id]; - ready_flag = *status; - my_ctl_pointer->sequence_number = sequence_number; - /* k-nomial parameters */ - tree_order = exchange_node->tree_order; - pow_k = exchange_node->log_tree_order; - - /* calculate the maximum number of requests - * at each level each rank communicates with - * at most (k - 1) peers - * so if we set k - 1 bit fields in "max_requests", then - * we have max_request == 2^(k - 1) -1 - */ - for(i = 0; i < (tree_order - 1); i++){ - max_requests ^= (1<node_type) { - /* If I'm in here, then I must be looking for data */ - ready_flag = flag_offset + 1 + pow_k + 2; - - src = exchange_node->rank_extra_sources_array[0]; - peer_data_pointer = data_buffs[src].payload; - peer_ctl_pointer = data_buffs[src].ctl_struct; - - /* calculate the count */ - for (i = 0, knt = 0 ; i < group_size ; ++i){ - knt += list_connected[i]; - } - - for (i = 0 ; i < cm->num_to_probe ; ++i) { - if (IS_PEER_READY(peer_ctl_pointer, ready_flag, sequence_number, ALLGATHER_FLAG, bcol_id)) { - /* we receive the entire message */ - opal_atomic_mb (); - memcpy (data_addr, (void *) peer_data_pointer, knt * pack_len); - - goto FINISHED; - } - } - - /* haven't found it, state is saved, bail out */ - return BCOL_FN_STARTED; - } else if (0 < exchange_node->n_extra_sources) { - /* I am a proxy for someone */ - src = exchange_node->rank_extra_sources_array[0]; - peer_data_pointer = data_buffs[src].payload; - peer_ctl_pointer = data_buffs[src].ctl_struct; - - /* calculate the offset */ - for (i = 0, knt = 0 ; i < src ; ++i){ - knt += list_connected[i]; - } - - /* probe for extra rank's arrival */ - for (i = 0 ; i < cm->num_to_probe ; ++i) { - if (IS_PEER_READY(peer_ctl_pointer, ready_flag, sequence_number, ALLGATHER_FLAG, bcol_id)) { - opal_atomic_mb (); - /* copy it in */ - memcpy ((void *) ((uintptr_t) data_addr + knt * pack_len), - (void *) ((uintptr_t) peer_data_pointer + knt * pack_len), - pack_len * list_connected[src]); - break; - } - } - - if (i == cm->num_to_probe) { - return BCOL_FN_STARTED; - } - } - - /* bump the ready flag to indicate extra node exchange complete */ - ++ready_flag; - *iteration = 0; - } - - /* start the recursive k - ing phase */ - for (i = *iteration ; i < pow_k ; ++i) { - /* I am ready at this level */ - opal_atomic_wmb (); - my_ctl_pointer->flags[ALLGATHER_FLAG][bcol_id] = ready_flag; - - if (0 == *active_requests) { - /* flip some bits, if we don't have active requests from a previous visit */ - CALC_ACTIVE_REQUESTS(active_requests,exchange_node->rank_exchanges[i],tree_order); - } - - for (j = 0; j < (tree_order - 1); ++j) { - - /* recv phase */ - src = exchange_node->rank_exchanges[i][j]; - - if (src < 0) { - /* then not a valid rank, continue */ - continue; - } - - if (!(*active_requests&(1<payload_info[i][j].r_offset * pack_len; - recv_len = exchange_node->payload_info[i][j].r_len * pack_len; - - /* I am putting the probe loop as the inner most loop to achieve - * better temporal locality - */ - for (probe = 0 ; probe < cm->num_to_probe ; ++probe) { - if (IS_PEER_READY(peer_ctl_pointer, ready_flag, sequence_number, ALLGATHER_FLAG, bcol_id)) { - /* flip the request's bit */ - *active_requests ^= (1<flags[ALLGATHER_FLAG][bcol_id]; - *iteration = i; - return BCOL_FN_STARTED; - } - } - - /* bump the flag one more time for the extra rank */ - ready_flag = flag_offset + 1 + pow_k + 2; - - /* finish off the last piece, send the data back to the extra */ - if( 0 < exchange_node->n_extra_sources ) { - /* simply announce my arrival */ - opal_atomic_wmb (); - my_ctl_pointer->flags[ALLGATHER_FLAG][bcol_id] = ready_flag; - } - -FINISHED: - /* bump this up for others to see */ - my_ctl_pointer->starting_flag_value[bcol_id]++; - return BCOL_FN_COMPLETE; -} - -/* Register allreduce functions to the BCOL function table, - * so they can be selected - */ -int bcol_basesmuma_allgather_init(mca_bcol_base_module_t *super) -{ - mca_bcol_base_coll_fn_comm_attributes_t comm_attribs; - mca_bcol_base_coll_fn_invoke_attributes_t inv_attribs; - - comm_attribs.bcoll_type = BCOL_ALLGATHER; - comm_attribs.comm_size_min = 0; - comm_attribs.comm_size_max = 1024 * 1024; - comm_attribs.waiting_semantics = NON_BLOCKING; - - inv_attribs.bcol_msg_min = 0; - inv_attribs.bcol_msg_max = 20000; /* range 1 */ - - inv_attribs.datatype_bitmap = 0xffffffff; - inv_attribs.op_types_bitmap = 0xffffffff; - - comm_attribs.data_src = DATA_SRC_KNOWN; - - mca_bcol_base_set_attributes(super, &comm_attribs, &inv_attribs, - bcol_basesmuma_k_nomial_allgather_init, - bcol_basesmuma_k_nomial_allgather_progress); - - return OMPI_SUCCESS; -} diff --git a/ompi/mca/bcol/basesmuma/bcol_basesmuma_allreduce.c b/ompi/mca/bcol/basesmuma/bcol_basesmuma_allreduce.c deleted file mode 100644 index 0058ec770fb..00000000000 --- a/ompi/mca/bcol/basesmuma/bcol_basesmuma_allreduce.c +++ /dev/null @@ -1,611 +0,0 @@ -/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ -/* - * Copyright (c) 2009-2013 Oak Ridge National Laboratory. All rights reserved. - * Copyright (c) 2009-2012 Mellanox Technologies. All rights reserved. - * Copyright (c) 2013-2014 Los Alamos National Security, LLC. All rights - * reserved. - * Copyright (c) 2015 Cisco Systems, Inc. All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#include "ompi_config.h" - -#include "ompi/constants.h" -#include "ompi/op/op.h" -#include "ompi/datatype/ompi_datatype.h" -#include "ompi/communicator/communicator.h" - -#include "opal/include/opal_stdint.h" - -#include "ompi/mca/bcol/base/base.h" -#include "bcol_basesmuma.h" - -static int bcol_basesmuma_allreduce_intra_fanin_fanout_progress (bcol_function_args_t *input_args, mca_bcol_base_function_t *c_input_args); - -int bcol_basesmuma_allreduce_init(mca_bcol_base_module_t *super) -{ - mca_bcol_base_coll_fn_comm_attributes_t comm_attribs; - mca_bcol_base_coll_fn_invoke_attributes_t inv_attribs; - - comm_attribs.bcoll_type = BCOL_ALLREDUCE; - comm_attribs.comm_size_min = 0; - comm_attribs.comm_size_max = 1048576; - comm_attribs.data_src = DATA_SRC_KNOWN; - - /* selection logic at the ml level specifies a - * request for a non-blocking algorithm - * however, these algorithms are blocking - * following what was done at the p2p level - * we will specify non-blocking, but beware, - * these algorithms are blocking and will not make use - * of the progress engine - */ - comm_attribs.waiting_semantics = NON_BLOCKING; - - inv_attribs.bcol_msg_min = 0; - inv_attribs.bcol_msg_max = 20000; - inv_attribs.datatype_bitmap = 0xffffffff; - inv_attribs.op_types_bitmap = 0xffffffff; - - /* Set attributes for fanin fanout algorithm */ - mca_bcol_base_set_attributes(super, &comm_attribs, &inv_attribs, - bcol_basesmuma_allreduce_intra_fanin_fanout, - bcol_basesmuma_allreduce_intra_fanin_fanout_progress); - - inv_attribs.bcol_msg_min = 20000; - inv_attribs.bcol_msg_max = 10485760; /* range 4 */ - mca_bcol_base_set_attributes(super, &comm_attribs, &inv_attribs, - bcol_basesmuma_allreduce_intra_fanin_fanout, - bcol_basesmuma_allreduce_intra_fanin_fanout_progress); - - /* Differs only in comm size */ - - comm_attribs.data_src = DATA_SRC_UNKNOWN; - comm_attribs.waiting_semantics = BLOCKING; - - comm_attribs.comm_size_min = 0; - comm_attribs.comm_size_max = 8; - - /* Set attributes for recursive doubling algorithm */ - mca_bcol_base_set_attributes(super, &comm_attribs, &inv_attribs, - bcol_basesmuma_allreduce_intra_recursive_doubling, - NULL); - - - return OMPI_SUCCESS; -} - -/* - * Small data fanin reduce - * ML buffers are used for both payload and control structures - * This functions works with hierarchical allreduce and - * progress engine - */ -static inline int reduce_children (mca_bcol_basesmuma_module_t *bcol_module, volatile void *rbuf, netpatterns_tree_node_t *my_reduction_node, - int *iteration, volatile mca_bcol_basesmuma_header_t *my_ctl_pointer, ompi_datatype_t *dtype, - volatile mca_bcol_basesmuma_payload_t *data_buffs, int count, struct ompi_op_t *op, int process_shift) -{ - volatile mca_bcol_basesmuma_header_t *child_ctl_pointer; - int bcol_id = (int) bcol_module->super.bcol_id; - int64_t sequence_number = my_ctl_pointer->sequence_number; - int8_t ready_flag = my_ctl_pointer->ready_flag; - int group_size = bcol_module->colls_no_user_data.size_of_group; - - if (LEAF_NODE != my_reduction_node->my_node_type) { - volatile char *child_data_pointer; - volatile void *child_rbuf; - - /* for each child */ - /* my_result_data = child_result_data (op) my_source_data */ - - for (int child = *iteration ; child < my_reduction_node->n_children ; ++child) { - int child_rank = my_reduction_node->children_ranks[child] + process_shift; - - if (group_size <= child_rank){ - child_rank -= group_size; - } - - child_ctl_pointer = data_buffs[child_rank].ctl_struct; - - if (!IS_PEER_READY(child_ctl_pointer, ready_flag, sequence_number, ALLREDUCE_FLAG, bcol_id)) { - *iteration = child; - return BCOL_FN_STARTED; - } - - child_data_pointer = data_buffs[child_rank].payload; - child_rbuf = child_data_pointer + child_ctl_pointer->roffsets[bcol_id]; - - ompi_op_reduce(op, (void *)child_rbuf, (void *)rbuf, count, dtype); - } /* end child loop */ - } - - if (ROOT_NODE != my_reduction_node->my_node_type) { - opal_atomic_wmb (); - my_ctl_pointer->flags[ALLREDUCE_FLAG][bcol_id] = ready_flag; - } - - /* done with this step. move on to fan out */ - *iteration = -1; - - return BCOL_FN_COMPLETE; -} - -static int allreduce_fanout (mca_bcol_basesmuma_module_t *bcol_module, volatile mca_bcol_basesmuma_header_t *my_ctl_pointer, - volatile void *my_data_pointer, int process_shift, volatile mca_bcol_basesmuma_payload_t *data_buffs, - int sequence_number, int group_size, int rbuf_offset, size_t pack_len) -{ - volatile mca_bcol_basesmuma_header_t *parent_ctl_pointer; - int bcol_id = (int) bcol_module->super.bcol_id; - int8_t ready_flag = my_ctl_pointer->ready_flag + 1; - netpatterns_tree_node_t *my_fanout_read_tree; - volatile void *parent_data_pointer; - int my_fanout_parent, my_rank; - void *parent_rbuf, *rbuf; - - my_rank = bcol_module->super.sbgp_partner_module->my_index; - my_fanout_read_tree = &(bcol_module->fanout_read_tree[my_rank]); - - if (ROOT_NODE != my_fanout_read_tree->my_node_type) { - my_fanout_parent = my_fanout_read_tree->parent_rank + process_shift; - if (group_size <= my_fanout_parent) { - my_fanout_parent -= group_size; - } - - rbuf = (void *)((char *) my_data_pointer + rbuf_offset); - - /* - * Get parent payload data and control data. - * Get the pointer to the base address of the parent's payload buffer. - * Get the parent's control buffer. - */ - parent_data_pointer = data_buffs[my_fanout_parent].payload; - parent_ctl_pointer = data_buffs[my_fanout_parent].ctl_struct; - - parent_rbuf = (void *) ((char *) parent_data_pointer + rbuf_offset); - - /* Wait until parent signals that data is ready */ - /* The order of conditions checked in this loop is important, as it can - * result in a race condition. - */ - if (!IS_PEER_READY(parent_ctl_pointer, ready_flag, sequence_number, ALLREDUCE_FLAG, bcol_id)) { - return BCOL_FN_STARTED; - } - - assert (parent_ctl_pointer->flags[ALLREDUCE_FLAG][bcol_id] == ready_flag); - - /* Copy the rank to a shared buffer writable by the current rank */ - memcpy ((void *) rbuf, (const void*) parent_rbuf, pack_len); - } - - if (LEAF_NODE != my_fanout_read_tree->my_node_type) { - opal_atomic_wmb (); - - /* Signal to children that they may read the data from my shared buffer (bump the ready flag) */ - my_ctl_pointer->flags[ALLREDUCE_FLAG][bcol_id] = ready_flag; - } - - my_ctl_pointer->starting_flag_value[bcol_id] += 1; - - return BCOL_FN_COMPLETE; - -} - -static int bcol_basesmuma_allreduce_intra_fanin_fanout_progress (bcol_function_args_t *input_args, mca_bcol_base_function_t *c_input_args) -{ - mca_bcol_basesmuma_module_t *bcol_module = (mca_bcol_basesmuma_module_t *) c_input_args->bcol_module; - int buff_idx = input_args->src_desc->buffer_index; - int *iteration = &bcol_module->ml_mem.nb_coll_desc[buff_idx].iteration; - void *data_addr = (void *) input_args->src_desc->data_addr; - int my_node_index, my_rank, group_size, leading_dim, idx; - volatile mca_bcol_basesmuma_header_t *my_ctl_pointer; - int64_t sequence_number = input_args->sequence_num; - volatile mca_bcol_basesmuma_payload_t *data_buffs; - struct ompi_datatype_t *dtype = input_args->dtype; - netpatterns_tree_node_t *my_reduction_node; - struct ompi_op_t *op = input_args->op; - volatile void *my_data_pointer; - int count = input_args->count; - int rc, process_shift; - ptrdiff_t lb, extent; - volatile void *rbuf; - - /* get addressing information */ - my_rank = bcol_module->super.sbgp_partner_module->my_index; - group_size = bcol_module->colls_no_user_data.size_of_group; - leading_dim = bcol_module->colls_no_user_data.size_of_group; - idx = SM_ARRAY_INDEX(leading_dim,buff_idx,0); - - /* Align node index to around sbgp root */ - process_shift = input_args->root; - my_node_index = my_rank - input_args->root; - if (0 > my_node_index ) { - my_node_index += group_size; - } - - data_buffs = (volatile mca_bcol_basesmuma_payload_t *) bcol_module->colls_with_user_data.data_buffs + idx; - /* Get control structure and payload buffer */ - my_ctl_pointer = data_buffs[my_rank].ctl_struct; - my_data_pointer = (volatile char *) data_addr; - - my_data_pointer = (volatile char *) data_addr; - rbuf = (volatile void *)((char *) my_data_pointer + input_args->rbuf_offset); - - /*************************** - * Fan into root phase - ***************************/ - - my_reduction_node = &(bcol_module->reduction_tree[my_node_index]); - if (-1 != *iteration) { - rc = reduce_children (bcol_module, rbuf, my_reduction_node, iteration, my_ctl_pointer, - dtype, data_buffs, count, op, process_shift); - if (BCOL_FN_COMPLETE != rc) { - return rc; - } - } - - /* there might be non-contig dtype - so compute the length with get_extent */ - ompi_datatype_get_extent(dtype, &lb, &extent); - - /*************************** - * Fan out from root - ***************************/ - - /* all nodes will have the result after fanout */ - input_args->result_in_rbuf = true; - - /* Signal that you are ready for fanout phase */ - return allreduce_fanout (bcol_module, my_ctl_pointer, my_data_pointer, process_shift, data_buffs, - sequence_number, group_size, input_args->rbuf_offset, count * (size_t) extent); -} - -/** - * Shared memory blocking allreduce. - */ -int bcol_basesmuma_allreduce_intra_fanin_fanout(bcol_function_args_t *input_args, mca_bcol_base_function_t *c_input_args) -{ - /* local variables */ - mca_bcol_basesmuma_module_t *bcol_module = (mca_bcol_basesmuma_module_t *) c_input_args->bcol_module; - int buff_idx = input_args->src_desc->buffer_index; - int *iteration = &bcol_module->ml_mem.nb_coll_desc[buff_idx].iteration; - void *data_addr = (void *) input_args->src_desc->data_addr; - volatile mca_bcol_basesmuma_header_t *my_ctl_pointer; - volatile mca_bcol_basesmuma_payload_t *data_buffs; - struct ompi_datatype_t *dtype = input_args->dtype; - int bcol_id = (int) bcol_module->super.bcol_id; - int rc, my_rank, leading_dim, idx; - volatile void *my_data_pointer; - volatile void *sbuf, *rbuf; - int8_t ready_flag; - - /* get addressing information */ - my_rank = bcol_module->super.sbgp_partner_module->my_index; - leading_dim = bcol_module->colls_no_user_data.size_of_group; - idx = SM_ARRAY_INDEX(leading_dim, buff_idx, 0); - - data_buffs = (volatile mca_bcol_basesmuma_payload_t *) bcol_module->colls_with_user_data.data_buffs + idx; - /* Get control structure */ - my_ctl_pointer = data_buffs[my_rank].ctl_struct; - - my_data_pointer = (volatile char *) data_addr; - rbuf = (volatile void *)((char *) my_data_pointer + input_args->rbuf_offset); - sbuf = (volatile void *)((char *) my_data_pointer + input_args->sbuf_offset); - - /* Setup resource recycling */ - /* Set for multiple instances of bcols */ - BASESMUMA_HEADER_INIT(my_ctl_pointer, ready_flag, input_args->sequence_num, bcol_id); - - if (sbuf != rbuf) { - rc = ompi_datatype_copy_content_same_ddt (dtype, input_args->count, (char *)rbuf, - (char *)sbuf); - if( 0 != rc ) { - return OMPI_ERROR; - } - } - - *iteration = 0; - my_ctl_pointer->ready_flag = ready_flag; - - return bcol_basesmuma_allreduce_intra_fanin_fanout_progress (input_args, c_input_args); -} - - - -/* this thing uses the old bcol private control structures */ -int bcol_basesmuma_allreduce_intra_recursive_doubling(bcol_function_args_t *input_args, - mca_bcol_base_function_t *c_input_args) -{ - - int my_rank,group_size,my_node_index; - int pair_rank, exchange, extra_rank, payload_len; - size_t dt_size; - int read_offset, write_offset; - volatile void *my_data_pointer; - volatile mca_bcol_basesmuma_ctl_struct_t *my_ctl_pointer = NULL, - *partner_ctl_pointer = NULL, - *extra_ctl_pointer = NULL; - volatile void *my_read_pointer, *my_write_pointer, *partner_read_pointer, - *extra_rank_readwrite_data_pointer,*extra_rank_read_data_pointer; - mca_bcol_basesmuma_module_t* bcol_module = - (mca_bcol_basesmuma_module_t *)c_input_args->bcol_module; - - int8_t ready_flag; - int sbuf_offset,rbuf_offset,flag_offset; - int root,count; - struct ompi_op_t *op; - int64_t sequence_number=input_args->sequence_num; - struct ompi_datatype_t *dtype; - int first_instance = 0; - int leading_dim,idx; - int buff_idx; - mca_bcol_basesmuma_ctl_struct_t **ctl_structs; - /*volatile void **data_buffs;*/ - volatile mca_bcol_basesmuma_payload_t *data_buffs; - netpatterns_pair_exchange_node_t *my_exchange_node; - - - /* - * Get addressing information - */ - buff_idx = input_args->src_desc->buffer_index; - - my_rank = bcol_module->super.sbgp_partner_module->my_index; - group_size = bcol_module->colls_no_user_data.size_of_group; - leading_dim = bcol_module->colls_no_user_data.size_of_group; - idx = SM_ARRAY_INDEX(leading_dim,buff_idx,0); - - /* - * Get SM control structures and payload buffers - */ - ctl_structs = (mca_bcol_basesmuma_ctl_struct_t **) - bcol_module->colls_with_user_data.ctl_buffs+idx; - /*data_buffs = (volatile void **) - bcol_module->colls_with_user_data.data_buffs+idx;*/ - - data_buffs = (volatile mca_bcol_basesmuma_payload_t *) - bcol_module->colls_with_user_data.data_buffs + idx; - - - /* - * Get control structure and payload buffer - */ - my_ctl_pointer = ctl_structs[my_rank]; - if (my_ctl_pointer->sequence_number < sequence_number) { - first_instance=1; - } - my_data_pointer = data_buffs[my_rank].payload; - - /* - * Align node index to around sbgp root - */ - root = input_args->root; - my_node_index = my_rank - root; - if (0 > my_node_index) { - my_node_index += group_size; - } - - /* - * Get data from arguments - */ - sbuf_offset = input_args->sbuf_offset; - rbuf_offset = input_args->rbuf_offset; - op = input_args->op; - count = input_args->count; - dtype = input_args->dtype; - - /* - * Get my node for the reduction tree - */ - my_exchange_node = &(bcol_module->recursive_doubling_tree); - - - if (first_instance) { - my_ctl_pointer->index = 1; - my_ctl_pointer->starting_flag_value = 0; - flag_offset = 0; - my_ctl_pointer->flag = -1; - /* - for( i = 0; i < NUM_SIGNAL_FLAGS; i++){ - my_ctl_pointer->flags[ALLREDUCE_FLAG] = -1; - } - */ - } else { - my_ctl_pointer->index++; - flag_offset = my_ctl_pointer->starting_flag_value; - } - - /* signal that I have arrived */ - /* opal_atomic_wmb (); */ - my_ctl_pointer->sequence_number = sequence_number; - - /* If we use this buffer more than once by an sm module in - * a given collective, will need to distinguish between instances, so - * we pick up the right data. - */ - ready_flag = flag_offset + sequence_number + 1; - - /* - * Set up pointers for using during recursive doubling phase - */ - read_offset = sbuf_offset; - write_offset = rbuf_offset; - fprintf(stderr,"read offset %d write offset %d\n",read_offset,write_offset); - my_read_pointer = (volatile void *)((char *) my_data_pointer + read_offset); - my_write_pointer = (volatile void *)((char *) my_data_pointer + write_offset); - - /* - * When there are non-power 2 nodes, the extra nodes' data is copied and - * reduced by partner exchange nodes. - * Extra nodes: Nodes with rank greater nearest power of 2 - * Exchange nodes: Nodes with rank lesser than nearest power of 2 that - * partner with extras nodes during reduction - */ - - if (0 < my_exchange_node->n_extra_sources) { - /* - * Signal extra node that data is ready - */ - opal_atomic_wmb (); - - my_ctl_pointer->flag = ready_flag; - - if (EXCHANGE_NODE == my_exchange_node->node_type) { - extra_rank = my_exchange_node->rank_extra_source; - extra_ctl_pointer = ctl_structs[extra_rank]; - extra_rank_readwrite_data_pointer = (void *) ((char *) data_buffs[extra_rank].payload + - read_offset); - - /* - * Wait for data to get ready - */ - while (!((sequence_number == extra_ctl_pointer->sequence_number) && - (extra_ctl_pointer->flag >= ready_flag))){ - } - - ompi_op_reduce(op,(void *)extra_rank_readwrite_data_pointer, - (void *)my_read_pointer, count, dtype); - } - } - - - /* --Exchange node that reduces with extra node --: Signal to extra node that data is read - * --Exchange node that doesn't reduce data with extra node --: This assignment - * is used so it can sync with other nodes during exchange phase - * --Extra node--: It can pass to next phase - */ - ready_flag++; - /*my_ctl_pointer->flags[ALLREDUCE_FLAG] = ready_flag;*/ - my_ctl_pointer->flag = ready_flag; - - - /* - * Exchange data with all the nodes that are less than max_power_2 - */ - for (exchange=0 ; exchange < my_exchange_node->n_exchanges ; exchange++) { - int tmp=0; - - /*my_ctl_pointer->flags[ALLREDUCE_FLAG] = ready_flag;*/ - my_ctl_pointer->flag = ready_flag; - pair_rank=my_exchange_node->rank_exchanges[exchange]; - partner_ctl_pointer = ctl_structs[pair_rank]; - partner_read_pointer = (volatile void *) ((char *)data_buffs[pair_rank].payload + read_offset); - - my_read_pointer = (volatile void *)((char *) my_data_pointer + read_offset); - my_write_pointer = (volatile void *)((char *) my_data_pointer + write_offset); - - /* - * Wait for partner to be ready, so we can read - */ - /* - JSL ---- FIX ME !!!!! MAKE ME COMPLIANT WITH NEW BUFFERS - while (!IS_ALLREDUCE_PEER_READY(partner_ctl_pointer, - ready_flag, sequence_number)) { - } - */ - - /* - * Perform reduction operation - */ - ompi_3buff_op_reduce(op,(void *)my_read_pointer, (void *)partner_read_pointer, - (void *)my_write_pointer, count, dtype); - - - /* - * Signal that I am done reading my partner's data - */ - ready_flag++; - /*my_ctl_pointer->flags[ALLREDUCE_FLAG] = ready_flag;*/ - my_ctl_pointer->flag = ready_flag; - - while (ready_flag > partner_ctl_pointer->flag){ - opal_progress(); - } - - /* - * Swap read and write offsets - */ - tmp = read_offset; - read_offset = write_offset; - write_offset = tmp; - - } - - - /* - * Copy data in from the "extra" source, if need be - */ - - if (0 < my_exchange_node->n_extra_sources) { - - if (EXTRA_NODE == my_exchange_node->node_type) { - - int extra_rank_read_offset=-1,my_write_offset=-1; - - /* Offset the ready flag to sync with - * exchange node which might going through exchange phases - * unlike the extra node - */ - ready_flag = ready_flag + my_exchange_node->log_2; - - if (my_exchange_node->log_2%2) { - extra_rank_read_offset = rbuf_offset; - my_write_offset = rbuf_offset; - - } else { - extra_rank_read_offset = sbuf_offset; - my_write_offset = sbuf_offset; - - } - - my_write_pointer = (volatile void*)((char *)my_data_pointer + my_write_offset); - extra_rank = my_exchange_node->rank_extra_source; - extra_ctl_pointer = ctl_structs[extra_rank]; - - extra_rank_read_data_pointer = (volatile void *) ((char *)data_buffs[extra_rank].payload + - extra_rank_read_offset); - - /* - * Wait for the exchange node to be ready - */ - ompi_datatype_type_size(dtype, &dt_size); - payload_len = count*dt_size; -#if 0 - fix me JSL !!!!! - while (!IS_DATA_READY(extra_ctl_pointer, ready_flag, sequence_number)){ - } -#endif - memcpy((void *)my_write_pointer,(const void *) - extra_rank_read_data_pointer, payload_len); - - ready_flag++; - /*my_ctl_pointer->flags[ALLREDUCE_FLAG] = ready_flag;*/ - my_ctl_pointer->flag = ready_flag; - - - } else { - - /* - * Signal parent that data is ready - */ - opal_atomic_wmb (); - /*my_ctl_pointer->flags[ALLREDUCE_FLAG] = ready_flag;*/ - my_ctl_pointer->flag = ready_flag; - - /* wait until child is done to move on - this buffer will - * be reused for the next stripe, so don't want to move - * on too quick. - */ - extra_rank = my_exchange_node->rank_extra_source; - extra_ctl_pointer = ctl_structs[extra_rank]; - } - } - - input_args->result_in_rbuf = my_exchange_node->log_2 & 1; - - my_ctl_pointer->starting_flag_value += 1; - - return BCOL_FN_COMPLETE; -} diff --git a/ompi/mca/bcol/basesmuma/bcol_basesmuma_bcast.c b/ompi/mca/bcol/basesmuma/bcol_basesmuma_bcast.c deleted file mode 100644 index 340c0c4c7f1..00000000000 --- a/ompi/mca/bcol/basesmuma/bcol_basesmuma_bcast.c +++ /dev/null @@ -1,487 +0,0 @@ -/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ -/* - * Copyright (c) 2009-2012 Oak Ridge National Laboratory. All rights reserved. - * Copyright (c) 2009-2012 Mellanox Technologies. All rights reserved. - * Copyright (c) 2014 Los Alamos National Security, LLC. All rights - * reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - - -#include "ompi_config.h" - -#include "ompi/constants.h" -#include "ompi/datatype/ompi_datatype.h" -#include "ompi/communicator/communicator.h" - -#include "ompi/mca/bcol/bcol.h" -#include "ompi/mca/bcol/base/base.h" - -#include "bcol_basesmuma.h" - -#define __TEST_BLOCKING__ 1 -#define __TEST_WAIT__ 0 -#define __TEST_TEST__ 0 - -/* debug - * #include "opal/sys/timer.h" - * - * extern uint64_t timers[7]; - * end debug */ - -/* debug */ -/* end debug */ -int bcol_basesmuma_bcast_init(mca_bcol_base_module_t *super) -{ - mca_bcol_base_coll_fn_comm_attributes_t comm_attribs; - mca_bcol_base_coll_fn_invoke_attributes_t inv_attribs; - - comm_attribs.bcoll_type = BCOL_BCAST; - comm_attribs.comm_size_min = 0; - comm_attribs.comm_size_max = 1048576; - comm_attribs.data_src = DATA_SRC_KNOWN; - comm_attribs.waiting_semantics = NON_BLOCKING; - - inv_attribs.bcol_msg_min = 0; - inv_attribs.bcol_msg_max = 20000; /* range 1 */ - inv_attribs.datatype_bitmap = 0xffffffff; - inv_attribs.op_types_bitmap = 0xffffffff; - - mca_bcol_base_set_attributes(super, &comm_attribs, &inv_attribs, - bcol_basesmuma_bcast_k_nomial_knownroot, - bcol_basesmuma_bcast_k_nomial_knownroot); - - inv_attribs.bcol_msg_min = 10000000; - inv_attribs.bcol_msg_max = 10485760; /* range 4 */ - - mca_bcol_base_set_attributes(super, &comm_attribs, &inv_attribs, - bcol_basesmuma_bcast_k_nomial_knownroot, - bcol_basesmuma_bcast_k_nomial_knownroot); - - comm_attribs.data_src = DATA_SRC_UNKNOWN; - inv_attribs.bcol_msg_min = 0; - inv_attribs.bcol_msg_max = 20000; /* range 1 */ - - mca_bcol_base_set_attributes(super, &comm_attribs, &inv_attribs, - bcol_basesmuma_bcast_k_nomial_anyroot, - bcol_basesmuma_bcast_k_nomial_anyroot); - - comm_attribs.data_src = DATA_SRC_UNKNOWN; - inv_attribs.bcol_msg_min = 10000000; - inv_attribs.bcol_msg_max = 10485760; /* range 4 */ - -#ifdef __PORTALS_AVAIL__ - - comm_attribs.waiting_semantics = BLOCKING; - mca_bcol_base_set_attributes(super, &comm_attribs, &inv_attribs, - bcol_basesmuma_lmsg_scatter_allgather_portals_bcast, - bcol_basesmuma_lmsg_scatter_allgather_portals_bcast); - - - comm_attribs.waiting_semantics = NON_BLOCKING; - mca_bcol_base_set_attributes(super, &comm_attribs, &inv_attribs, - bcol_basesmuma_lmsg_scatter_allgather_portals_nb_bcast, - bcol_basesmuma_lmsg_scatter_allgather_portals_nb_bcast); - - comm_attribs.data_src = DATA_SRC_KNOWN; - mca_bcol_base_set_attributes(super, &comm_attribs, &inv_attribs, - bcol_basesmuma_lmsg_scatter_allgather_portals_nb_knownroot_bcast, - bcol_basesmuma_lmsg_scatter_allgather_portals_nb_knownroot_bcast); - -#else - /* - if (super->use_hdl) { - mca_bcol_base_set_attributes(super, &comm_attribs, &inv_attribs, - bcol_basesmuma_hdl_zerocopy_bcast, - bcol_basesmuma_hdl_zerocopy_bcast); - } else { */ - mca_bcol_base_set_attributes(super, &comm_attribs, &inv_attribs, NULL, NULL); - /* - bcol_basesmuma_binary_scatter_allgather_segment, - bcol_basesmuma_binary_scatter_allgather_segment); - */ - /* } */ -#endif - - return OMPI_SUCCESS; -} - -/* includes shared memory optimization */ - -/** - * Shared memory blocking Broadcast - fanin, for small data buffers. - * This routine assumes that buf (the input buffer) is a single writer - * multi reader (SWMR) shared memory buffer owned by the calling rank - * which is the only rank that can write to this buffers. - * It is also assumed that the buffers are registered and fragmented - * at the ML level and that buf is sufficiently large to hold the data. - * - * - * @param buf - SWMR shared buffer within a sbgp that the - * executing rank can write to. - * @param count - the number of elements in the shared buffer. - * @param dtype - the datatype of a shared buffer element. - * @param root - the index within the sbgp of the root. - * @param module - basesmuma module. - */ -int bcol_basesmuma_bcast(bcol_function_args_t *input_args, - mca_bcol_base_function_t *c_input_args) -{ - /* local variables */ - int group_size, process_shift, my_node_index; - int my_rank; - int rc = OMPI_SUCCESS; - int my_fanout_parent; - int leading_dim, buff_idx, idx; - volatile int8_t ready_flag; - int count=input_args->count; - struct ompi_datatype_t* dtype=input_args->dtype; - int root=input_args->root; - int64_t sequence_number=input_args->sequence_num; - mca_bcol_basesmuma_module_t* bcol_module= - (mca_bcol_basesmuma_module_t *)c_input_args->bcol_module; - int bcol_id = (int) bcol_module->super.bcol_id; - volatile mca_bcol_basesmuma_payload_t *data_buffs; - volatile char* parent_data_pointer; - mca_bcol_basesmuma_header_t *my_ctl_pointer; - volatile mca_bcol_basesmuma_header_t *parent_ctl_pointer; - netpatterns_tree_node_t* my_fanout_read_tree; - size_t pack_len = 0, dt_size; - - void *data_addr = (void *)((unsigned char *)input_args->src_desc->data_addr ); - -#if 0 - fprintf(stderr,"Entering sm broadcast input_args->sbuf_offset %d \n",input_args->sbuf_offset); - fflush(stderr); -#endif - - - /* we will work only on packed data - so compute the length*/ - ompi_datatype_type_size(dtype, &dt_size); - pack_len=count*dt_size; - - buff_idx = input_args->src_desc->buffer_index; - - /* Get addressing information */ - my_rank = bcol_module->super.sbgp_partner_module->my_index; - group_size = bcol_module->colls_no_user_data.size_of_group; - leading_dim=bcol_module->colls_no_user_data.size_of_group; - idx=SM_ARRAY_INDEX(leading_dim,buff_idx,0); - data_buffs=(volatile mca_bcol_basesmuma_payload_t *) - bcol_module->colls_with_user_data.data_buffs+idx; - - /* Align node index to around sbgp root */ - process_shift = root; - my_node_index = my_rank - root; - if(0 > my_node_index ) { - my_node_index += group_size; - } - - /* get my node for the bcast tree */ - my_fanout_read_tree = &(bcol_module->fanout_read_tree[my_node_index]); - my_fanout_parent = my_fanout_read_tree->parent_rank + process_shift; - if(group_size <= my_fanout_parent){ - my_fanout_parent -= group_size; - } - - /* Set pointer to current proc ctrl region */ - /*my_ctl_pointer = ctl_structs[my_rank]; */ - my_ctl_pointer = data_buffs[my_rank].ctl_struct; - - /* setup resource recycling */ - - BASESMUMA_HEADER_INIT(my_ctl_pointer, ready_flag, sequence_number, bcol_id); - - /* - * Fan out from root - */ - if(ROOT_NODE == my_fanout_read_tree->my_node_type) { - input_args->result_in_rbuf = false; - /* Root should only signal it is ready */ - my_ctl_pointer->flags[BCAST_FLAG][bcol_id] = ready_flag; - - }else if(LEAF_NODE == my_fanout_read_tree->my_node_type) { - input_args->result_in_rbuf = false; - /* - * Get parent payload data and control data. - * Get the pointer to the base address of the parent's payload buffer. - * Get the parent's control buffer. - */ - parent_data_pointer = data_buffs[my_fanout_parent].payload; - parent_ctl_pointer = data_buffs[my_fanout_parent].ctl_struct; - - /* Wait until parent signals that data is ready */ - /* The order of conditions checked in this loop is important, as it can - * result in a race condition. - */ - while (!IS_PEER_READY(parent_ctl_pointer, ready_flag, sequence_number, BCAST_FLAG, bcol_id)){ - opal_progress(); - } - - /* Copy the rank to a shared buffer writable by the current rank */ - memcpy(data_addr, (void *)parent_data_pointer, pack_len); - - if( 0 != rc ) { - return OMPI_ERROR; - } - - }else{ - input_args->result_in_rbuf = false; - /* Interior node */ - - /* Get parent payload data and control data */ - parent_data_pointer = data_buffs[my_fanout_parent].payload; - parent_ctl_pointer = data_buffs[my_fanout_parent].ctl_struct; - - - /* Wait until parent signals that data is ready */ - /* The order of conditions checked in this loop is important, as it can - * result in a race condition. - */ - while (!IS_PEER_READY(parent_ctl_pointer, ready_flag, sequence_number, BCAST_FLAG, bcol_id)){ - opal_progress(); - } - - /* Copy the rank to a shared buffer writable by the current rank */ - memcpy(data_addr, (void *)parent_data_pointer,pack_len); - - /* Signal to children that they may read the data from my shared buffer */ - opal_atomic_wmb (); - my_ctl_pointer->flags[BCAST_FLAG][bcol_id] = ready_flag; - } - - /* if I am the last instance of a basesmuma function in this collectie, - * release the resrouces */ - - my_ctl_pointer->starting_flag_value[bcol_id]++; - - return rc; -} - - -/*zero-copy large massage communication methods*/ -#if 0 -int bcol_basesmuma_hdl_zerocopy_bcast(bcol_function_args_t *input_args, - mca_bcol_base_function_t *c_input_args) -{ - /* local variables */ - int group_size, process_shift, my_node_index; - int my_rank, first_instance=0, flag_offset; - int rc = OMPI_SUCCESS; - int my_fanout_parent; - int leading_dim, buff_idx, idx; - volatile int64_t ready_flag; - int count=input_args->count; - struct ompi_datatype_t* dtype=input_args->dtype; - int root=input_args->root; - int64_t sequence_number=input_args->sequence_num; - mca_bcol_basesmuma_module_t* bcol_module= - (mca_bcol_basesmuma_module_t *)c_input_args->bcol_module; - - netpatterns_tree_node_t* my_fanout_read_tree; - size_t pack_len = 0, dt_size; - - void *data_addr = (void *)((unsigned char *)input_args->src_desc->data_addr); - - struct mca_hdl_base_descriptor_t *hdl_desc; - struct mca_hdl_base_segment_t *hdl_seg; - int ret, completed, ridx/*remote rank index*/; - bool status; - volatile mca_bcol_basesmuma_ctl_struct_t **ctl_structs; - mca_bcol_basesmuma_ctl_struct_t *my_ctl_pointer= NULL; - volatile mca_bcol_basesmuma_ctl_struct_t *parent_ctl_pointer= NULL; - volatile mca_bcol_basesmuma_ctl_struct_t *child_ctl_pointer= NULL; - struct mca_hdl_base_module_t* hdl = bcol_module->hdl_module[0]; - - - /* we will work only on packed data - so compute the length*/ - ompi_datatype_type_size(dtype, &dt_size); - pack_len = count * dt_size; - - buff_idx = input_args->src_desc->buffer_index; - - /* Get addressing information */ - my_rank = bcol_module->super.sbgp_partner_module->my_index; - group_size = bcol_module->colls_no_user_data.size_of_group; - leading_dim=bcol_module->colls_no_user_data.size_of_group; - idx=SM_ARRAY_INDEX(leading_dim,buff_idx,0); - ctl_structs = (volatile mca_bcol_basesmuma_ctl_struct_t **) - bcol_module->colls_with_user_data.ctl_buffs+idx; - my_ctl_pointer = ctl_structs[my_rank]; - - /* Align node index to around sbgp root */ - process_shift = root; - my_node_index = my_rank - root; - if(0 > my_node_index ) { - my_node_index += group_size; - } - - /* get my node for the bcast tree */ - my_fanout_read_tree = &(bcol_module->fanout_read_tree[my_node_index]); - my_fanout_parent = my_fanout_read_tree->parent_rank + process_shift; - if(group_size <= my_fanout_parent){ - my_fanout_parent -= group_size; - } - - /* setup resource recycling */ - if( my_ctl_pointer->sequence_number < sequence_number ) { - first_instance = 1; - } - - if( first_instance ) { - /* Signal arrival */ - my_ctl_pointer->flag = -1; - my_ctl_pointer->index = 1; - /* this does not need to use any flag values , so only need to - * set the value for subsequent values that may need this */ - my_ctl_pointer->starting_flag_value = 0; - flag_offset = 0; - } else { - /* only one thread at a time will be making progress on this - * collective, so no need to make this atomic */ - my_ctl_pointer->index++; - } - - - /* increment the starting flag by one and return */ - flag_offset = my_ctl_pointer->starting_flag_value; - ready_flag = flag_offset + sequence_number + 1; - my_ctl_pointer->sequence_number = sequence_number; - - hdl_desc = (mca_hdl_base_descriptor_t *) - malloc (sizeof (mca_hdl_base_descriptor_t) * 1); - - /*prepare a hdl data segment*/ - hdl_seg = (mca_hdl_base_segment_t*) - malloc ( sizeof (mca_hdl_base_segment_t) * 1); - hdl_seg->seg_addr.pval = input_args->sbuf; - hdl_seg->seg_len = pack_len; - - - hdl->endpoint->ready_flag = ready_flag; - hdl->endpoint->local_ctrl = my_ctl_pointer; - hdl->endpoint->sbgp_contextid = - bcol_module->super.sbgp_partner_module->group_comm->c_contextid; - - /* - * Fan out from root - */ - if(ROOT_NODE == my_fanout_read_tree->my_node_type) { - input_args->result_in_rbuf = false; - - hdl_desc->des_src = hdl_seg; - hdl_desc->des_src_cnt = 1; - hdl_desc->isroot = true; - - /*As the general semantics, there might multiple pairs of send/recv - *on the topology tree*/ - for (ridx = 0; ridx < my_fanout_read_tree->n_children; ridx++) { - child_ctl_pointer = - ctl_structs[my_fanout_read_tree->children_ranks[ridx]]; - hdl->endpoint->remote_ctrl = child_ctl_pointer; - ret = hdl->hdl_send(hdl, hdl->endpoint, hdl_desc); - if (ret != OMPI_SUCCESS) { - BASESMUMA_VERBOSE(1, ("send eror on rank %d ........", my_rank)); - goto exit_ERROR; - } - } - }else if(LEAF_NODE == my_fanout_read_tree->my_node_type) { - input_args->result_in_rbuf = false; - /* - * Get parent payload data and control data. - * Get the pointer to the base address of the parent's payload buffer. - * Get the parent's control buffer. - */ - parent_ctl_pointer = ctl_structs[my_fanout_parent]; - - hdl_desc->des_dst = hdl_seg; - hdl_desc->des_dst_cnt = 1; - hdl_desc->isroot = false; - hdl->endpoint->remote_ctrl = parent_ctl_pointer; - -#if __TEST_BLOCKING__ - ret = hdl->hdl_recv(hdl, hdl->endpoint, hdl_desc); -#else - ret = hdl->hdl_recvi(hdl, hdl->endpoint, NULL, 0, 0, &hdl_desc); -#endif - -#if __TEST_WAIT__ - ret = hdl->hdl_wait(hdl, hdl->endpoint, hdl_desc); - BASESMUMA_VERBOSE(1,("wait on rank %d is done!", my_rank)); -#endif - if (OMPI_SUCCESS != ret) { - BASESMUMA_VERBOSE(1, ("recvi eror on rank %d ........", my_rank)); - goto exit_ERROR; - } - - status = false; -#if __TEST_TEST__ - while (!status) { - hdl->hdl_test(&hdl_desc, &completed, &status); - opal_progress(); - BASESMUMA_VERBOSE(1, ("test on rank %d ........", my_rank)); - } -#endif - - goto Release; - - }else{ - input_args->result_in_rbuf = false; - /* Interior node */ - - /* Get parent payload data and control data */ - parent_ctl_pointer = ctl_structs[my_fanout_parent]; - - hdl_desc->des_dst = hdl_seg; - hdl_desc->des_dst_cnt = 1; - hdl_desc->isroot = false; - - hdl->endpoint->remote_ctrl = parent_ctl_pointer; - - ret = hdl->hdl_recv(hdl, hdl->endpoint, hdl_desc); - if (OMPI_SUCCESS != ret) { - goto exit_ERROR; - } - if (OMPI_SUCCESS != ret) { - BASESMUMA_VERBOSE(1, ("recvi eror on rank %d ........", my_rank)); - goto exit_ERROR; - } - - /* Signal to children that they may read the data from my shared buffer */ - opal_atomic_wmb (); - hdl_desc->des_src = hdl_seg; - hdl_desc->des_src_cnt = 1; - for (ridx = 0; ridx < my_fanout_read_tree->n_children; ridx++) { - child_ctl_pointer = - ctl_structs[my_fanout_read_tree->children_ranks[ridx]]; - hdl->endpoint->remote_ctrl = child_ctl_pointer; - - ret = hdl->hdl_send(hdl, hdl->endpoint, hdl_desc); - if (ret != OMPI_SUCCESS) { - BASESMUMA_VERBOSE(1, ("send eror on rank %d ........", my_rank)); - goto exit_ERROR; - } - } - goto Release; - } - - Release: - /* if I am the last instance of a basesmuma function in this collectie, - * release the resrouces */ - if (IS_LAST_BCOL_FUNC(c_input_args)) { - rc = bcol_basesmuma_free_buff( - &(bcol_module->colls_with_user_data), - sequence_number); - } - - my_ctl_pointer->starting_flag_value += 1; - - return BCOL_FN_COMPLETE; - exit_ERROR: - return OMPI_ERROR; -} -#endif diff --git a/ompi/mca/bcol/basesmuma/bcol_basesmuma_bcast_prime.c b/ompi/mca/bcol/basesmuma/bcol_basesmuma_bcast_prime.c deleted file mode 100644 index 4bb451f907c..00000000000 --- a/ompi/mca/bcol/basesmuma/bcol_basesmuma_bcast_prime.c +++ /dev/null @@ -1,895 +0,0 @@ -/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ -/* - * Copyright (c) 2009-2013 Oak Ridge National Laboratory. All rights reserved. - * Copyright (c) 2009-2012 Mellanox Technologies. All rights reserved. - * Copyright (c) 2013 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2013 Los Alamos National Security, LLC. All rights - * reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#include "ompi_config.h" -#include "ompi/constants.h" -#include "ompi/datatype/ompi_datatype.h" -#include "ompi/communicator/communicator.h" - -#include "bcol_basesmuma_utils.h" -#include "bcol_basesmuma.h" - -/* debug - * #include "opal/sys/timer.h" - * - * extern uint64_t timers[7]; - * end debug */ - -/* debug */ -#include -/* end debug */ - -/* includes shared memory optimization */ - -#define BCOL_BASESMUMA_SM_PROBE(src_list, n_src, my_index, matched, src) \ - do { \ - int j; \ - for( j = 0; j < n_src; j++) { \ - parent_ctl_pointer = data_buffs[src_list[j]].ctl_struct; \ - parent_data_pointer = data_buffs[src_list[j]].payload; \ - if( IS_DATA_READY(parent_ctl_pointer,ready_flag,sequence_number)) { \ - src = src_list[j]; \ - matched = 1; \ - break; \ - } \ - } \ - } while(0) - -/* - #define IS_LARGE_DATA_READY(peer, my_flag, my_sequence_number) \ - (((peer)->sequence_number == (my_sequence_number) && \ - (peer)->flags[BCAST_FLAG] >= (my_flag) \ - )? true : false ) -*/ - -/* - #define IS_KNOWN_ROOT_DATA_READY(peer, my_flag, my_sequence_number) \ - (((peer)->sequence_number == (my_sequence_number) && \ - (peer)->flags[BCAST_FLAG][bcol_id] >= (my_flag) \ - )? true : false ) -*/ - -#define BCOL_BASESMUMA_SM_LARGE_MSG_PROBE(src_list, n_src, my_index, matched, src, flag_index, bcol_id) \ - do { \ - int j; \ - for( j = 0; j < n_src; j++) { \ - /* fprintf(stderr,"my_rank %d and %d\n",my_rank,1); */ \ - if(src_list[j] != -1) { \ - parent_ctl_pointer = ctl_structs[src_list[j]]; \ - parent_data_pointer = (void *) data_buffs[src_list[j]].ctl_struct; \ - /*fprintf(stderr,"my_rank %d ready flag %d partner flag %d and %d\n",my_rank,ready_flag,parent_ctl_pointer->flag,2); */ \ - if( IS_PEER_READY(parent_ctl_pointer,ready_flag,sequence_number, flag_index, bcol_id)) { \ - src = src_list[j]; \ - matched = 1; \ - index = j; \ - /* fprintf(stderr,"found it from %d!\n",src);*/ \ - break; \ - } \ - } \ - } \ - } while(0) - -#define K_NOMIAL_DATA_SRC(radix, my_group_index, group_size, group_root, data_src, radix_mask) \ - do { \ - int relative_rank = (my_group_index >= group_root) ? my_group_index - group_root : \ - my_group_index - group_root + group_size; \ - radix_mask = 1; \ - while (radix_mask < group_size) { \ - if (relative_rank % (radix * radix_mask)) { \ - data_src = relative_rank/(radix * radix_mask) * (radix * radix_mask) + group_root; \ - if (data_src >= group_size) data_src -= group_size; \ - break; \ - } \ - radix_mask *= radix; \ - } \ - } while (0) - -int bcol_basesmuma_bcast_k_nomial_knownroot(bcol_function_args_t *input_args, - mca_bcol_base_function_t *c_input_args) -{ - /* local variables */ - mca_bcol_basesmuma_module_t* bcol_module= - (mca_bcol_basesmuma_module_t *)c_input_args->bcol_module; - mca_bcol_basesmuma_component_t *cs = &mca_bcol_basesmuma_component; - int i, matched = 0; - int group_size; - int my_rank; - int leading_dim, - buff_idx, - idx; - int count = input_args->count; - struct ompi_datatype_t* dtype = input_args->dtype; - int64_t sequence_number = input_args->sequence_num; - int radix = - mca_bcol_basesmuma_component.k_nomial_radix; - int radix_mask; - int16_t data_src = -1; - - volatile int8_t ready_flag; - int bcol_id = (int) bcol_module->super.bcol_id; - volatile mca_bcol_basesmuma_payload_t *data_buffs; - volatile char* parent_data_pointer; - volatile mca_bcol_basesmuma_header_t *parent_ctl_pointer; - volatile mca_bcol_basesmuma_header_t *my_ctl_pointer; - - size_t pack_len = 0; - void *data_addr = (void *)((unsigned char *)input_args->src_desc->data_addr + - input_args->sbuf_offset); - -#if 0 - fprintf(stderr,"Entering nb-sm broadcast input_args->sbuf_offset %d \n",input_args->sbuf_offset); - fflush(stderr); -#endif - - - /* we will work only on packed data - so compute the length*/ - BASESMUMA_VERBOSE(3, ("Calling bcol_basesmuma_bcast_k_nomial_knownroot")); - - pack_len = mca_bcol_base_get_buff_length(dtype, count); - /* Some hierarchical algorithms have data that is accumulated at each step - * this factor accounts for this - */ - pack_len = pack_len*input_args->hier_factor; - buff_idx = input_args->buffer_index; - - /* Get addressing information */ - my_rank = bcol_module->super.sbgp_partner_module->my_index; - group_size = bcol_module->colls_no_user_data.size_of_group; - leading_dim = bcol_module->colls_no_user_data.size_of_group; - idx = SM_ARRAY_INDEX(leading_dim,buff_idx,0); - data_buffs = (volatile mca_bcol_basesmuma_payload_t *) - bcol_module->colls_with_user_data.data_buffs + idx; - - /* Set pointer to current proc ctrl region */ - my_ctl_pointer = data_buffs[my_rank].ctl_struct; - - /* setup resource recycling */ - BASESMUMA_HEADER_INIT(my_ctl_pointer, ready_flag, sequence_number, bcol_id); - /* removing dependence on sequence number */ - /* I believe this is resolved now with the signaling flags */ - /* - ready_temp = 1 + (int8_t) flag_offset + (int8_t) bcol_id; - if( ready_temp >= my_ctl_pointer->flags[BCAST_FLAG][bcol_id]) { - ready_flag = ready_temp; - } else { - ready_flag = my_ctl_pointer->flags[BCAST_FLAG][bcol_id]; - } - opal_atomic_wmb (); - my_ctl_pointer->sequence_number = sequence_number; - */ - - - /* non-blocking broadcast algorithm */ - - /* If I am the root, then signal ready flag */ - if(input_args->root_flag) { - BASESMUMA_VERBOSE(10,("I am the root of the data")); - /* - * signal ready flag - */ - opal_atomic_wmb (); - my_ctl_pointer->flags[BCAST_FLAG][bcol_id] = ready_flag; - - /* root is finished */ - goto Release; - } - - - /* Calculate source of the data */ - K_NOMIAL_DATA_SRC(radix, my_rank, group_size, - input_args->root_route->rank, data_src, radix_mask); - - - parent_ctl_pointer = data_buffs[data_src].ctl_struct; - parent_data_pointer = data_buffs[data_src].payload; - - for( i = 0; i < cs->num_to_probe && 0 == matched; i++) { - - if(IS_PEER_READY(parent_ctl_pointer,ready_flag,sequence_number, BCAST_FLAG, bcol_id)) { - matched = 1; - break; - } - } - - /* If not matched, then hop out and put me on progress list */ - if(0 == matched ) { - BASESMUMA_VERBOSE(10,("Shared memory probe didn't find a match")); - return BCOL_FN_NOT_STARTED; - } - - /* else, we found our root within the group ... */ - BASESMUMA_VERBOSE(10,("Shared memory probe was matched, the root is %d", data_src)); - - /* copy the data */ - memcpy(data_addr, (void *) parent_data_pointer, pack_len); - /* set the memory barrier to ensure completion */ - opal_atomic_wmb (); - /* signal that I am done */ - my_ctl_pointer->flags[BCAST_FLAG][bcol_id] = ready_flag; - - - Release: - my_ctl_pointer->starting_flag_value[bcol_id]++; - return BCOL_FN_COMPLETE; -} - - -/** - * Shared memory non-blocking Broadcast - K-nomial fan-out for small data buffers. - * This routine assumes that buf (the input buffer) is a single writer - * multi reader (SWMR) shared memory buffer owned by the calling rank - * which is the only rank that can write to this buffers. - * It is also assumed that the buffers are registered and fragmented - * at the ML level and that buf is sufficiently large to hold the data. - * - * - * @param buf - SWMR shared buffer within a sbgp that the - * executing rank can write to. - * @param count - the number of elements in the shared buffer. - * @param dtype - the datatype of a shared buffer element. - * @param root - the index within the sbgp of the root. - * @param module - basesmuma module. - */ -int bcol_basesmuma_bcast_k_nomial_anyroot(bcol_function_args_t *input_args, - mca_bcol_base_function_t *c_input_args) -{ - /* local variables */ - mca_bcol_basesmuma_module_t* bcol_module= - (mca_bcol_basesmuma_module_t *)c_input_args->bcol_module; - mca_bcol_basesmuma_component_t *cs = &mca_bcol_basesmuma_component; - int i; - int group_size; - int my_rank; - int leading_dim, buff_idx, idx; - int count=input_args->count; - struct ompi_datatype_t* dtype=input_args->dtype; - int64_t sequence_number=input_args->sequence_num; - int radix = cs->k_nomial_radix; - int radix_mask; - int relative_rank; - int pow_k_group_size; - - volatile int8_t ready_flag; - int bcol_id = (int) bcol_module->super.bcol_id; - volatile mca_bcol_basesmuma_payload_t *data_buffs; - volatile void* parent_data_pointer; - - volatile mca_bcol_basesmuma_header_t *child_ctl_pointer; - volatile mca_bcol_basesmuma_header_t *my_ctl_pointer; - - size_t pack_len = 0; - void *data_addr = (void *)((unsigned char *)input_args->src_desc->data_addr + - input_args->sbuf_offset); - -#if 0 - fprintf(stderr,"Entering nb-sm broadcast input_args->sbuf_offset %d \n",input_args->sbuf_offset); - fflush(stderr); -#endif - - - - /* we will work only on packed data - so compute the length*/ - pack_len = mca_bcol_base_get_buff_length(dtype, count); - - buff_idx = input_args->buffer_index; - - /* Get addressing information */ - my_rank = bcol_module->super.sbgp_partner_module->my_index; - group_size = bcol_module->colls_no_user_data.size_of_group; - leading_dim=bcol_module->colls_no_user_data.size_of_group; - idx=SM_ARRAY_INDEX(leading_dim,buff_idx,0); - - /* get pow_k_levels and pow_k_group_size */ - pow_k_group_size = bcol_module->pow_k; - - - data_buffs=(volatile mca_bcol_basesmuma_payload_t *) - bcol_module->colls_with_user_data.data_buffs+idx; - - /* Set pointer to current proc ctrl region */ - my_ctl_pointer = data_buffs[my_rank].ctl_struct; - - BASESMUMA_HEADER_INIT(my_ctl_pointer, ready_flag, sequence_number, bcol_id); - - /* non-blocking broadcast algorithm */ - - /* If I am the root, then signal ready flag */ - if(input_args->root_flag) { - - BASESMUMA_VERBOSE(10,("I am the root of the data")); - /* - * set the radix_mask */ - radix_mask = pow_k_group_size; - /* send to children */ - opal_atomic_wmb (); - BASESMUMA_K_NOMIAL_SEND_CHILDREN(radix_mask, - radix,0, - my_rank,group_size, ready_flag); - /* root is finished */ - goto Release; - } - - /* If I am not the root, then poll on possible "senders'" control structs */ - for( i = 0; i < cs->num_to_probe; i++) { - - if( ready_flag == my_ctl_pointer->flags[BCAST_FLAG][bcol_id]) { - - /* else, we found our root within the group ... */ - parent_data_pointer = data_buffs[my_ctl_pointer->src].payload; - BASESMUMA_VERBOSE(5,("%d found it from %d \n",my_rank,my_ctl_pointer->src)); - /* memcopy the data */ - memcpy(data_addr, (void *) parent_data_pointer, pack_len); - /* compute my relative rank */ - relative_rank = (my_rank - my_ctl_pointer->src) < 0 ? my_rank - - my_ctl_pointer->src + group_size : my_rank - my_ctl_pointer->src; - - /* compute my radix mask */ - radix_mask = 1; - while(radix_mask < group_size ){ - if( 0 != relative_rank % (radix*radix_mask)) { - /* found it */ - break; - } - radix_mask *= radix; - } - /* go one step back */ - radix_mask /= radix; - - /* send to children */ - opal_atomic_wmb (); - BASESMUMA_K_NOMIAL_SEND_CHILDREN(radix_mask, - radix, relative_rank, - my_rank, group_size, ready_flag); - /* bail */ - - goto Release; - } - - } - - - - /* If not matched, then hop out and put me on progress list */ - BASESMUMA_VERBOSE(10,("Shared memory probe didn't find a match")); - /*fprintf(stderr,"bcol_id %d Not started\n",bcol_id);*/ - return BCOL_FN_NOT_STARTED; - - - - Release: - - - my_ctl_pointer->starting_flag_value[bcol_id]++; - - return BCOL_FN_COMPLETE; -} - - -/* non-blocking binary scatter allgather anyroot algorithm for large data - * broadcast - */ - - -#if 0 -/* prototype code for shared memory scatter/allgather algorithm. Signaling scheme - * works, should be used as a reference for other types of shared memory scatter/allgather - * algorithms. - */ -int bcol_basesmuma_binary_scatter_allgather_segment(bcol_function_args_t *input_args, - mca_bcol_base_function_t *c_input_args) -{ - - /* local variables */ - int i, j; - int length; - int start; - int my_rank, parent_rank; - int partner; - int src = -1; - int matched = 0; - int group_size; - int first_instance=0; - int leading_dim, buff_idx, idx; - int64_t sequence_number=input_args->sequence_num; - - int64_t ready_flag; - int64_t local_offset; - - int flag_offset; - int pow_2, pow_2_levels; - int index = -1; - - mca_bcol_basesmuma_component_t *cs = &mca_bcol_basesmuma_component; - mca_bcol_basesmuma_module_t *bcol_module = - (mca_bcol_basesmuma_module_t *) c_input_args->bcol_module; - /* use the old control structs for large messages, - * otherwise we will destroy the shared memory - * optimization - */ - mca_bcol_basesmuma_ctl_struct_t **ctl_structs; - mca_bcol_basesmuma_ctl_struct_t *my_ctl_pointer; - mca_bcol_basesmuma_ctl_struct_t *parent_ctl_pointer; /* binomial fanout */ - mca_bcol_basesmuma_ctl_struct_t *partner_ctl_pointer; /* recursive double */ - - /* for now, we use the payload buffer for single fragment */ - volatile mca_bcol_basesmuma_payload_t *data_buffs; - volatile void *parent_data_pointer; /* binomial scatter */ - volatile void *partner_data_pointer; /* recursive double */ - - uint32_t fragment_size; /* ml buffer size for now */ - - /* we will transfer the entire buffer, - * so start at the base address of the ml buffer - */ - void *data_addr = (void *) ((unsigned char *) input_args->src_desc->base_data_addr); -#if 0 - fprintf(stderr,"AAA Entering nb-sm large msg broadcast input_args->frag_size %d \n",input_args->frag_size); - fflush(stderr); -#endif - - buff_idx = input_args->src_desc->buffer_index; - - group_size = bcol_module->colls_no_user_data.size_of_group; - leading_dim=bcol_module->colls_no_user_data.size_of_group; - - /* get the largest power of two that is smaller than - * or equal to the group size - */ - pow_2_levels = bcol_module->pow_2_levels; - pow_2 = bcol_module->pow_2; - - /* get the fragment size - */ - - /* still just the size of the entire buffer */ - fragment_size = input_args->buffer_size; - idx=SM_ARRAY_INDEX(leading_dim,buff_idx,0); - my_rank = bcol_module->super.sbgp_partner_module->my_index; - - - /* grab the control structs */ - ctl_structs = (mca_bcol_basesmuma_ctl_struct_t **) - bcol_module->colls_with_user_data.ctl_buffs+idx; - - /* grab the data buffs */ - data_buffs = (mca_bcol_basesmuma_payload_t *) - bcol_module->colls_with_user_data.data_buffs+idx; - - my_ctl_pointer = ctl_structs[my_rank]; - - if(my_ctl_pointer->sequence_number < sequence_number) { - first_instance = 1; - } - - if(first_instance) { - my_ctl_pointer->flag = -1; - my_ctl_pointer->index = 1; - - my_ctl_pointer->starting_flag_value = 0; - - flag_offset = 0; - - } else { - - my_ctl_pointer->index++; - } - - /* increment the starting flag by one and return */ - flag_offset = my_ctl_pointer->starting_flag_value; - ready_flag = flag_offset + sequence_number + 1; - - my_ctl_pointer->sequence_number = sequence_number; - - /* am I the root */ - if(input_args->root_flag) { - /* if I've already been here, then - * hop down to the allgather - */ - if(ALLGATHER == my_ctl_pointer->status) { - goto Allgather; - } - BASESMUMA_VERBOSE(10,("I am the root of the data")); - /* debug print */ - /*fprintf(stderr,"I am the root %d\n",my_rank);*/ - /* - * signal ready flag - */ - /* set the offset into the buffer */ - my_ctl_pointer->offset = 0; - /* how many children do I have */ - my_ctl_pointer->n_sends = pow_2_levels; - /* my data length */ - my_ctl_pointer->length = fragment_size; - - /* important that these be set before my children - * see the ready flag raised - */ - opal_atomic_wmb (); - my_ctl_pointer->flag = ready_flag; - - /* root is finished */ - if( my_rank < pow_2 ) { - /* if I'm in the power of two group, - * then goto the allgather - */ - my_ctl_pointer->status = ALLGATHER; - goto Allgather; - - } else { - - /* if I'm not, then I'm done and release */ - goto Release; - } - - } - - /* what phase am I participating in - */ - switch(my_ctl_pointer->status) { - - case SCATTER: - goto Scatter; - break; - - case ALLGATHER: - goto Allgather; - break; - - case EXTRA_RANK: - goto Extra; - break; - - default: - break; - } - - - Extra: - /* am I part of the non-power-of-2 group */ - if( my_rank >= pow_2 ) { - /* find parent to copy from */ - parent_rank = my_rank&(pow_2-1); - parent_ctl_pointer = ctl_structs[parent_rank]; - /* start at the base */ - parent_data_pointer = (void *) data_buffs[parent_rank].ctl_struct; - - /* now, I need to do some arithmetic to - * arrive at the value everyone else does - * when they have completed the algorithm - */ - - /* compute ready flag value to poll on */ - ready_flag = ready_flag + pow_2_levels; - - /* start to poll */ - for( i = 0; i< cs->num_to_probe; i++) { - if(IS_LARGE_DATA_READY(parent_ctl_pointer,ready_flag, sequence_number)) { - /* copy the data and bail */ - memcpy(data_addr,(void *)parent_data_pointer,fragment_size); - goto Release; - } - /* - else { - opal_progress(); - } - */ - } - my_ctl_pointer->status = EXTRA_RANK; - - /* hop out and put me onto a progress queue */ - return BCOL_FN_NOT_STARTED; - } - - Scatter: - - /* on first entry, compute the list of possible sources */ - if( NULL == my_ctl_pointer->src_ptr ) { - my_ctl_pointer->src_ptr = (int *) malloc(sizeof(int)*(pow_2_levels+1)); - - for( i = 0; i < pow_2_levels; i++) { - my_ctl_pointer->src_ptr[i] = my_rank ^ (1<src_ptr[i] = my_rank + pow_2; - } else { - /* no extra rank to worry about */ - my_ctl_pointer->src_ptr[i] = -1; - } - } - - /* If I am not the root, then poll on possible "senders'" control structs */ - for( i = 0; i < cs->num_to_probe && 0 == matched; i++) { - - /* Shared memory iprobe */ - BCOL_BASESMUMA_SM_LARGE_MSG_PROBE(my_ctl_pointer->src_ptr, pow_2_levels+1, - my_rank, matched, src); - } - - /* If not matched, then hop out and put me on progress list */ - if(0 == matched ) { - - BASESMUMA_VERBOSE(10,("Shared memory probe didn't find a match")); - - my_ctl_pointer->status = SCATTER; - return BCOL_FN_NOT_STARTED; - - } else if ( src >= pow_2 ){ - - /* If matched from an extra rank, then get the whole message from partner */ - memcpy((void *) data_addr, (void *) parent_data_pointer, - parent_ctl_pointer->length); - - /* now I am the psuedo-root in the power-of-two group */ - my_ctl_pointer->offset = 0; - my_ctl_pointer->length = parent_ctl_pointer->length; - my_ctl_pointer->n_sends = parent_ctl_pointer->n_sends; - - /* set the memory barrier */ - opal_atomic_wmb (); - - /* fire the ready flag */ - my_ctl_pointer->flag = ready_flag; - my_ctl_pointer->status = ALLGATHER; - /* go to the allgather */ - goto Allgather; - } - - - /* we need to see whether this is really - * who we are looking for - */ - for( i = 0; i < parent_ctl_pointer->n_sends; i++) { - /* debug print */ - /* - fprintf(stderr,"I am %d checking on a hit from %d with n_sends %d\n",my_rank,src,parent_ctl_pointer->n_sends); - fflush(stderr); - */ - /* end debug */ - if( my_rank == (src^(1<n_sends = i; - - if ( i > 0) { - /* compute the size of the chunk to copy */ - length = (parent_ctl_pointer->length)/ - (1<<(parent_ctl_pointer->n_sends - my_ctl_pointer->n_sends)); - my_ctl_pointer->length = length; - my_ctl_pointer->offset = - parent_ctl_pointer->offset+length; - - /*fprintf(stderr,"%d's offset %d and length %d \n",my_rank,my_ctl_pointer->offset,length);*/ - - /* now we can copy the data */ - memcpy((void *) ((uint64_t) data_addr+my_ctl_pointer->offset), - (void *) ((uint64_t) parent_data_pointer+(uint64_t) parent_ctl_pointer->offset + - (uint64_t) length), - (size_t)length); - } else { - /* this "trick" takes care of the first level - * of recurssive doubling - */ - length = parent_ctl_pointer->length/ - (1<<(parent_ctl_pointer->n_sends - 1)); - my_ctl_pointer->length = length; - my_ctl_pointer->offset = parent_ctl_pointer->offset; - - /*fprintf(stderr,"%d's offset %d and length %d\n",my_rank,my_ctl_pointer->offset,length);*/ - /* now we can copy the data */ - memcpy((void *) ((uint64_t) data_addr+my_ctl_pointer->offset), - (void *) ((uint64_t) parent_data_pointer+(uint64_t) my_ctl_pointer->offset), - (size_t)length); - } - /* set the memory barrier to ensure completion */ - opal_atomic_wmb (); - /* signal that I am done */ - my_ctl_pointer->flag = ready_flag; - /* set my status */ - my_ctl_pointer->status = ALLGATHER; - /* time for allgather phase */ - goto Allgather; - } - - } - - /* this is not who we are looking for, - * mark as false positive so we don't - * poll here again - */ - my_ctl_pointer->src_ptr[index] = -1; - /* probably we should jump out and put onto progress list */ - my_ctl_pointer->status = SCATTER; - return BCOL_FN_NOT_STARTED; - - Allgather: - - /* zip it back up - we have already taken care of first level */ - /* needed for non-blocking conditional */ - matched = 0; - - /* get my local_offset */ - local_offset = my_ctl_pointer->offset; - - /* bump the ready flag */ - ready_flag++; - - /* first level of zip up */ - length = 2*fragment_size/pow_2; - - /* first level of zip-up - * already includes first level of - * recursive doubling - */ - start = 1; - - /* for non-blocking, check to see if I need to reset the state */ - if(my_ctl_pointer->flag >= ready_flag) { - /* then reset the state */ - ready_flag = my_ctl_pointer->flag; - start = my_ctl_pointer->start; - /* get the local offset */ - local_offset = my_ctl_pointer->offset_zip; - /* compute the correct length */ - length = length*(1<<(start - 1)); - /* careful! skip over the opal_atomic_wmb () to avoid the - * cost on every re-entry - */ - goto Loop; - } - - - opal_atomic_wmb (); - /* I am ready, set the flag */ - my_ctl_pointer->flag = ready_flag; - - Loop: - - for( i = start; i < pow_2_levels; i++) { - /* get my partner for this level */ - partner = my_rank^(1<num_to_probe && matched == 0; j++) { - if(IS_LARGE_DATA_READY(partner_ctl_pointer, ready_flag, sequence_number)) { - - /* debug prints - fprintf(stderr,"666 I am %d and sequence num is %d partner is %d ready_flag %d parent ready_flag %d buff_idx %d partner_offset %d\n", - my_rank,sequence_number,partner, ready_flag,partner_ctl_pointer->flag,buff_idx,partner_ctl_pointer->offset); - */ - /* debug print */ -#if 0 - fprintf(stderr,"I am %d and sequence num is %d partner is %d ready_flag %d parent ready_flag %d buff_idx %d \n", - my_rank,sequence_number,partner, ready_flag,parent_ctl_pointer->flag,buff_idx); -#endif - /* end debug prints */ - - assert(partner_ctl_pointer->flag >= ready_flag); - /* found it */ - matched = 1; - /* only copy it, if you sit at a lower level in the tree */ - if( my_ctl_pointer->n_sends <= partner_ctl_pointer->n_sends ) { - - /* calculate the local offset based on partner's remote offset */ - if( partner_ctl_pointer->offset < my_ctl_pointer->offset ) { - /* then I'm looking "up" the tree */ - local_offset -= length; - /* debug print */ - /*fprintf(stderr,"I am %d and partner is %d partner offset %d length %d \n",my_rank,partner, local_offset,length);*/ - /* end debug */ - memcpy((void *) ((uint64_t) data_addr + (uint64_t) local_offset), - (void *) ((uint64_t) partner_data_pointer + (uint64_t) local_offset), - length); - } else { - /* I'm looking "down" the tree */ - local_offset += length; - /* debug print */ - /*fprintf(stderr,"I am %d and partner is %d partner offset %d length %d \n",my_rank,partner, local_offset,length);*/ - /* end debug */ - memcpy((void *) ((uint64_t) data_addr + (uint64_t) local_offset), - (void *) ((uint64_t) partner_data_pointer + (uint64_t) local_offset), - length); - /* reset my local offset */ - local_offset -= length; - } - - } - /* bump the ready flag */ - ready_flag++; - /* ensure completion */ - opal_atomic_wmb (); - - /* fire the flag for the next level */ - my_ctl_pointer->flag = ready_flag; - - /* double the length */ - length *= 2; - } - } - /* check to see what kind of progress I've made */ - if( 0 == matched ) { - /* save state, hop out and try again later */ - my_ctl_pointer->start = i; - /* save the local offset */ - my_ctl_pointer->offset_zip = local_offset; - /* put in progress queue */ - return BCOL_FN_STARTED; - } - /* else, start next level of recursive doubling */ - matched = 0; - - } - - - /* cleanup */ - if(NULL != my_ctl_pointer->src_ptr) { - free(my_ctl_pointer->src_ptr); - my_ctl_pointer->src_ptr = NULL; - } - - Release: - - - /* If I am the last instance, release the resource */ - /* - if( IS_LAST_BCOL_FUNC(c_input_args)) { - rc = bcol_basesmuma_free_buff( - &(bcol_module->colls_with_user_data), - sequence_number); - } - */ - - my_ctl_pointer->starting_flag_value++; - my_ctl_pointer->status = FINISHED; - return BCOL_FN_COMPLETE; - -} -#endif - -#if 0 -int mca_bcol_basesmuma_bcast_binomial_scatter_allgather(void *desc) -{ - /* local variables */ - int rc, n_frags_sent; - uint32_t stripe_number; - int count, count_processed; - size_t dt_size; - uint32_t n_data_segments_to_schedule; - ompi_datatype_t *dtype; - message_descriptor_t *message_descriptor; - mca_bcol_basesmuma_module_t *bcol_module; - int pipe_depth; - - - /* get the full message descriptor */ - - - /* compute the number of fragments to send */ - - - /* start to fill the pipeline */ - - - return OMPI_SUCCESS; - - - - -} -#endif diff --git a/ompi/mca/bcol/basesmuma/bcol_basesmuma_buf_mgmt.c b/ompi/mca/bcol/basesmuma/bcol_basesmuma_buf_mgmt.c deleted file mode 100644 index 1a65b5cb015..00000000000 --- a/ompi/mca/bcol/basesmuma/bcol_basesmuma_buf_mgmt.c +++ /dev/null @@ -1,485 +0,0 @@ -/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ -/* - * Copyright (c) 2009-2013 Oak Ridge National Laboratory. All rights reserved. - * Copyright (c) 2009-2012 Mellanox Technologies. All rights reserved. - * Copyright (c) 2013-2014 Los Alamos National Security, LLC. - * All rights reserved. - * Copyright (c) 2014 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2014-2015 Research Organization for Information Science - * and Technology (RIST). All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -/** - * @file - * - */ - -#include "ompi_config.h" -#include "ompi/constants.h" -#include "ompi/mca/bcol/bcol.h" -#include "ompi/mca/bcol/base/base.h" -#include "ompi/patterns/comm/coll_ops.h" - -#include "opal/dss/dss.h" - -#include "bcol_basesmuma.h" -/* - * With support for nonblocking collectives, we don't have an upper - * limit on the number of outstanding collectives per communicator. - * Also, since we want to avoid communication to figure out which - * buffers other ranks in the group will use, we will rely on the - * fact that collective operations are called in the same order - * in each process, to assign a unique ID to each collective operation. - * We use this to create a static mapping from the index to the buffer - * that will be used. Also, because there is no limit to the number of - * outstanding collective operations, we use a generation index for each - * memory bank, so the collective will use the buffer only when the - * correct generation of the bank is ready for use. - */ -int bcol_basesmuma_get_buff_index( sm_buffer_mgmt *buff_block, - uint64_t buff_id ) -{ - /* local variables */ - int memory_bank; - uint64_t generation; - int index=-1; - - - /* get the bank index that will be used */ - memory_bank=buff_id& buff_block->mask; - memory_bank = memory_bank SHIFT_DOWN buff_block->log2_num_buffs_per_mem_bank; - - /* get the generation of the bank this maps to */ - generation = buff_id SHIFT_DOWN (buff_block->log2_number_of_buffs); - - /* check to see if the bank is available */ - if( generation == buff_block->ctl_buffs_mgmt[memory_bank]. - bank_gen_counter ) { - - /* get the buffer index that will be returned */ - index=buff_id & buff_block->mask; - - /* no in-use counter increment, as the mapping is static, and - * all we need to know if the number of collectives that complete */ - - } else { - /* progress communications so that resources can be freed up */ - opal_progress(); - } - - /* return */ - return index; -} - -/* release the shared memory buffers - * buf_id is the unique ID assigned to the particular buffer - */ -int bcol_basesmuma_free_buff( sm_buffer_mgmt * buff_block, - uint64_t buff_id ) -{ - /* local variables */ - int ret=OMPI_SUCCESS; - int memory_bank; - uint64_t generation; - mca_bcol_basesmuma_component_t *cs = &mca_bcol_basesmuma_component; - - /* get the bank index that will be used */ - memory_bank=buff_id& buff_block->mask; - memory_bank = memory_bank SHIFT_DOWN buff_block->log2_num_buffs_per_mem_bank; - - /* get the generation of the bank this maps to */ - generation = buff_id SHIFT_DOWN (buff_block->log2_number_of_buffs); - - /* the generation counter should not change until all resrouces - * associated with this bank have been freed. - */ - assert(generation == buff_block->ctl_buffs_mgmt[memory_bank].bank_gen_counter); - - /* - * increment counter of completed buffers - */ - OPAL_THREAD_ADD32(&(buff_block->ctl_buffs_mgmt[memory_bank].n_buffs_freed), - 1); - - /* - * If I am the last to checkin - initiate resource recycling - */ - if( buff_block->ctl_buffs_mgmt[memory_bank].n_buffs_freed == - buff_block->ctl_buffs_mgmt[memory_bank].number_of_buffers ) { - - /* Lock to ensure atomic recycling of resources */ - OPAL_THREAD_LOCK(&(buff_block->ctl_buffs_mgmt[memory_bank].mutex)); - - /* make sure someone else did not already get to this */ - if( buff_block->ctl_buffs_mgmt[memory_bank].n_buffs_freed != - buff_block->ctl_buffs_mgmt[memory_bank].number_of_buffers ) { - /* release lock and exit */ - OPAL_THREAD_UNLOCK(&(buff_block->ctl_buffs_mgmt[memory_bank].mutex)); - } else { - sm_nbbar_desc_t *p_sm_nb_desc = NULL; - /* initiate the freeing of resources. Need to make sure the other - * ranks in the group are also done with their resources before this - * block is made available for use again. - * No one else will try to allocate from this block or free back to - * this block until the next genration counter has been incremented, - * so will just reset the number of freed buffers to 0, so no one else - * will try to also initialize the recycling of these resrouces - */ - buff_block->ctl_buffs_mgmt[memory_bank].n_buffs_freed=0; - - /* Start the nonblocking barrier */ - p_sm_nb_desc = &(buff_block->ctl_buffs_mgmt[memory_bank].nb_barrier_desc); - p_sm_nb_desc->coll_buff = buff_block; - bcol_basesmuma_rd_nb_barrier_init_admin(p_sm_nb_desc); - - if( NB_BARRIER_DONE != - buff_block->ctl_buffs_mgmt[memory_bank]. - nb_barrier_desc.collective_phase) { - - opal_list_t *list=&(cs->nb_admin_barriers); - opal_list_item_t *append_item; - - /* put this onto the progression list */ - OPAL_THREAD_LOCK(&(cs->nb_admin_barriers_mutex)); - append_item=(opal_list_item_t *) - &(buff_block->ctl_buffs_mgmt[memory_bank].nb_barrier_desc); - opal_list_append(list,append_item); - OPAL_THREAD_UNLOCK(&(cs->nb_admin_barriers_mutex)); - /* progress communications so that resources can be freed up */ - opal_progress(); - } else { - /* mark the block as available */ - (buff_block->ctl_buffs_mgmt[memory_bank].bank_gen_counter)++; - } - - /* get out of here */ - OPAL_THREAD_UNLOCK(&(buff_block->ctl_buffs_mgmt[memory_bank].mutex)); - } - - } - - /* return */ - return ret; -} - -/* - * Allocate buffers for storing non-blocking collective descriptions, required - * for making code re-entrant - * - */ -static int init_nb_coll_buff_desc(mca_bcol_basesmuma_nb_coll_buff_desc_t **desc, - void *base_addr, uint32_t num_banks, - uint32_t num_buffers_per_bank, - uint32_t size_buffer, - uint32_t header_size, - int group_size, - int pow_k) -{ - uint32_t i, j, ci; - mca_bcol_basesmuma_nb_coll_buff_desc_t *tmp_desc = NULL; - int k_nomial_radix = mca_bcol_basesmuma_component.k_nomial_radix; - int pow_k_val = (0 == pow_k) ? 1 : pow_k; - int num_to_alloc = (k_nomial_radix - 1) * pow_k_val * 2 + 1 ; - - - *desc = (mca_bcol_basesmuma_nb_coll_buff_desc_t *)calloc(num_banks * num_buffers_per_bank, sizeof(mca_bcol_basesmuma_nb_coll_buff_desc_t)); - if (NULL == *desc) { - return OMPI_ERROR; - } - - tmp_desc = *desc; - - for (i = 0; i < num_banks; i++) { - for (j = 0; j < num_buffers_per_bank; j++) { - ci = i * num_buffers_per_bank + j; - tmp_desc[ci].bank_index = i; - tmp_desc[ci].buffer_index = j; - /* *2 is for gather session +1 for extra peer */ - tmp_desc[ci].requests = (ompi_request_t **) - calloc(num_to_alloc, sizeof(ompi_request_t *)); - tmp_desc[ci].data_addr = (void *) - ((unsigned char*)base_addr + ci * size_buffer + header_size); - BASESMUMA_VERBOSE(10, ("ml memory cache setup %d %d - %p", i, j, tmp_desc[ci].data_addr)); - } - } - - return OMPI_SUCCESS; -} - - -/* - * Free buffers for storing non-blocking collective descriptions. - * - */ -void cleanup_nb_coll_buff_desc(mca_bcol_basesmuma_nb_coll_buff_desc_t **desc, - uint32_t num_banks, - uint32_t num_buffers_per_bank) -{ - uint32_t ci; - if (NULL != *desc) { - for (ci=0; ciml_mem; - - /* first, we get a pointer to the payload buffer management struct */ - pload_mgmt = &(sm_bcol->colls_with_user_data); - - /* go ahead and get the header size that is cached on the payload block - */ - sm_bcol->total_header_size = data_offset; - - /* allocate memory for pointers to mine and my peers' payload buffers - * difference here is that now we use our new data struct - */ - malloc_size = ml_block->num_banks*ml_block->num_buffers_per_bank* - pload_mgmt->size_of_group *sizeof(mca_bcol_basesmuma_payload_t); - pload_mgmt->data_buffs = (mca_bcol_basesmuma_payload_t *) malloc(malloc_size); - if( !pload_mgmt->data_buffs) { - ret = OMPI_ERR_OUT_OF_RESOURCE; - goto exit_ERROR; - } - - /* allocate some memory to hold the offsets */ - results_array = (void **) malloc(pload_mgmt->size_of_group * sizeof (void *)); - if (NULL == results_array) { - ret = OMPI_ERR_OUT_OF_RESOURCE; - goto exit_ERROR; - } - - /* setup the input file for the shared memory connection manager */ - input_file.file_name = sm_reg_data->file_name; - input_file.size = sm_reg_data->size; - input_file.size_ctl_structure = 0; - input_file.data_seg_alignment = BASESMUMA_CACHE_LINE_SIZE; - input_file.mpool_size = sm_reg_data->size; - - /* call the connection manager and map my shared memory peers' file - */ - ret = bcol_basesmuma_smcm_allgather_connection( - sm_bcol, - sm_bcol->super.sbgp_partner_module, - &(cs->sm_connections_list), - &(sm_bcol->payload_backing_files_info), - sm_bcol->super.sbgp_partner_module->group_comm, - input_file, cs->payload_base_fname, - false); - if( OMPI_SUCCESS != ret ) { - goto exit_ERROR; - } - - - /* now we exchange offset info - don't assume symmetric virtual memory - */ - - mem_offset = (void *) ((uintptr_t) ml_block->block->base_addr - - (uintptr_t) cs->sm_payload_structs->data_addr); - - /* call into the exchange offsets function */ - ret=comm_allgather_pml(&mem_offset, results_array, sizeof (void *), MPI_BYTE, - sm_bcol_module->super.sbgp_partner_module->my_index, - sm_bcol_module->super.sbgp_partner_module->group_size, - sm_bcol_module->super.sbgp_partner_module->group_list, - sm_bcol_module->super.sbgp_partner_module->group_comm); - if( OMPI_SUCCESS != ret ) { - goto exit_ERROR; - } - - /* convert memory offset to virtual address in current rank */ - leading_dim = pload_mgmt->size_of_group; - loop_limit = ml_block->num_banks*ml_block->num_buffers_per_bank; - for (i=0;i< sm_bcol_module->super.sbgp_partner_module->group_size;i++) { - - /* get the base pointer */ - int array_id=SM_ARRAY_INDEX(leading_dim,0,i); - if( i == sm_bcol_module->super.sbgp_partner_module->my_index) { - /* me */ - base_ptr=cs->sm_payload_structs->map_addr; - } else { - base_ptr=sm_bcol_module->payload_backing_files_info[i]-> - sm_mmap->map_addr; - } - - /* first, set the pointer to the control struct */ - pload_mgmt->data_buffs[array_id].ctl_struct=(mca_bcol_basesmuma_header_t *) - (uintptr_t)(((uint64_t)(uintptr_t)results_array[array_id])+(uint64_t)(uintptr_t)base_ptr); - /* second, calculate where to set the data pointer */ - pload_mgmt->data_buffs[array_id].payload=(void *) - (uintptr_t)((uint64_t)(uintptr_t) pload_mgmt->data_buffs[array_id].ctl_struct + - (uint64_t)(uintptr_t) data_offset); - - for( buf_id = 1 ; buf_id < loop_limit ; buf_id++ ) { - int array_id_m1=SM_ARRAY_INDEX(leading_dim,(buf_id-1),i); - array_id=SM_ARRAY_INDEX(leading_dim,buf_id,i); - /* now, play the same game as above - * - * first, set the control struct's position */ - pload_mgmt->data_buffs[array_id].ctl_struct=(mca_bcol_basesmuma_header_t *) - (uintptr_t)(((uint64_t)(uintptr_t)(pload_mgmt->data_buffs[array_id_m1].ctl_struct) + - (uint64_t)(uintptr_t)ml_block->size_buffer)); - - /* second, set the payload pointer */ - pload_mgmt->data_buffs[array_id].payload =(void *) - (uintptr_t)((uint64_t)(uintptr_t) pload_mgmt->data_buffs[array_id].ctl_struct + - (uint64_t)(uintptr_t) data_offset); - } - - } - - /* done with the index array */ - free (results_array); - results_array = NULL; - - /* initialize my control structures!! */ - my_idx = sm_bcol_module->super.sbgp_partner_module->my_index; - leading_dim = sm_bcol_module->super.sbgp_partner_module->group_size; - for( buf_id = 0; buf_id < loop_limit; buf_id++){ - array_id = SM_ARRAY_INDEX(leading_dim,buf_id,my_idx); - ctl_ptr = pload_mgmt->data_buffs[array_id].ctl_struct; - - /* initialize the data structures */ - for( j = 0; j < SM_BCOLS_MAX; j++){ - for( i = 0; i < NUM_SIGNAL_FLAGS; i++){ - ctl_ptr->flags[i][j] = -1; - } - } - ctl_ptr->sequence_number = -1; - ctl_ptr->src = -1; - } - - - - - /* setup the data structures needed for releasing the payload - * buffers back to the ml level - */ - for( i=0 ; i < (int) ml_block->num_banks ; i++ ) { - sm_bcol->colls_with_user_data. - ctl_buffs_mgmt[i].nb_barrier_desc.ml_memory_block_descriptor= - ml_block; - } - - ml_mem->num_banks = ml_block->num_banks; - ml_mem->bank_release_counter = calloc(ml_block->num_banks, sizeof(uint32_t)); - ml_mem->num_buffers_per_bank = ml_block->num_buffers_per_bank; - ml_mem->size_buffer = ml_block->size_buffer; - /* pointer to ml level descriptor */ - ml_mem->ml_mem_desc = ml_block; - - if (OMPI_SUCCESS != init_nb_coll_buff_desc(&ml_mem->nb_coll_desc, - ml_block->block->base_addr, - ml_mem->num_banks, - ml_mem->num_buffers_per_bank, - ml_mem->size_buffer, - data_offset, - sm_bcol_module->super.sbgp_partner_module->group_size, - sm_bcol_module->pow_k)) { - - BASESMUMA_VERBOSE(10, ("Failed to allocate memory descriptors for storing state of non-blocking collectives\n")); - return OMPI_ERROR; - } - - return OMPI_SUCCESS; - -exit_ERROR: - if (NULL != results_array) { - free(results_array); - } - return ret; -} - -#endif - - - -/* Basesmuma interface function used for buffer release */ -#if 0 -/* gvm - * A collective operation calls this routine to release the payload buffer. - * All processes in the shared memory sub-group of a bcol should call the non-blocking - * barrier on the last payload buffer of a memory bank. On the completion - * of the non-blocking barrier, the ML callback is called which is responsible - * for recycling the memory bank. - */ -mca_bcol_basesmuma_module_t *sm_bcol_module -int bcol_basesmuma_free_payload_buff( - struct mca_bcol_base_memory_block_desc_t *block, - sm_buffer_mgmt *ctl_mgmt, - uint64_t buff_id) -{ - /* local variables */ - int ret = OMPI_SUCCESS; - - memory_bank = BANK_FROM_BUFFER_IDX(buff_id); - ctl_mgmt->ctl_buffs_mgmt[memory_bank].n_buffs_freed++; - - OPAL_THREAD_ADD32(&(ctl_mgmt->ctl_buffs_mgmt[memory_bank].n_buffs_freed),1); - - if (ctl_mgmt->ctl_buffs_mgmt[memory_bank].n_buffs_freed == block->size_buffers_bank){ - - /* start non-blocking barrier */ - bcol_basesmuma_rd_nb_barrier_init_admin( - &(ctl_mgmt->ctl_buffs_mgmt[memory_bank].nb_barrier_desc)); - - if (NB_BARRIER_DONE != - ctl_mgmt->ctl_buffs_mgmt[memory_bank]. - nb_barrier_desc.collective_phase){ - - /* progress the barrier */ - opal_progress(); - } - else{ - /* free the buffer - i.e. initiate callback to ml level */ - block->ml_release_cb(block,memory_bank); - } - } - return ret; -} -#endif diff --git a/ompi/mca/bcol/basesmuma/bcol_basesmuma_component.c b/ompi/mca/bcol/basesmuma/bcol_basesmuma_component.c deleted file mode 100644 index 1ee19b2fdb5..00000000000 --- a/ompi/mca/bcol/basesmuma/bcol_basesmuma_component.c +++ /dev/null @@ -1,391 +0,0 @@ -/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ -/* - * Copyright (c) 2009-2012 Oak Ridge National Laboratory. All rights reserved. - * Copyright (c) 2009-2012 Mellanox Technologies. All rights reserved. - * Copyright (c) 2014-2015 Los Alamos National Security, LLC. All rights - * reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -/** - * @file - * - */ - -#include "ompi_config.h" - -#include "ompi/constants.h" -#include "ompi/communicator/communicator.h" -#include "opal/mca/mpool/base/base.h" -#include "ompi/mca/bcol/bcol.h" -#include "ompi/mca/bcol/base/base.h" -#include "opal/align.h" -#include "bcol_basesmuma.h" - -/* - * Public string showing the coll ompi_sm V2 component version number - */ -const char *mca_bcol_basesmuma_component_version_string = - "Open MPI bcol - basesmuma collective MCA component version " OMPI_VERSION; - -/* - * Local functions - */ - -static int basesmuma_register(void); -static int basesmuma_open(void); -static int basesmuma_close(void); -static int mca_bcol_basesmuma_deregister_ctl_sm( - mca_bcol_basesmuma_component_t *bcol_component); - - -static inline int mca_bcol_basesmuma_param_register_int( - const char* param_name, int default_value, int *storage) -{ - *storage = default_value; - return mca_base_component_var_register(&mca_bcol_basesmuma_component.super.bcol_version, param_name, - NULL, MCA_BASE_VAR_TYPE_INT, NULL, 0, 0, - OPAL_INFO_LVL_9, - MCA_BASE_VAR_SCOPE_READONLY, storage); -} - -static inline int mca_bcol_basesmuma_param_register_bool( - const char* param_name, bool default_value, bool *storage) -{ - *storage = default_value; - return mca_base_component_var_register(&mca_bcol_basesmuma_component.super.bcol_version, param_name, - NULL, MCA_BASE_VAR_TYPE_BOOL, NULL, 0, 0, - OPAL_INFO_LVL_9, - MCA_BASE_VAR_SCOPE_READONLY, storage); -} - -/* - * Instantiate the public struct with all of our public information - * and pointers to our public functions in it - */ - -mca_bcol_basesmuma_component_t mca_bcol_basesmuma_component = { - - /* First, fill in the super */ - - { - /* First, the mca_component_t struct containing meta - information about the component itself */ - - .bcol_version = { - MCA_BCOL_BASE_VERSION_2_0_0, - - /* Component name and version */ - - .mca_component_name = "basesmuma", - MCA_BASE_MAKE_VERSION(component, OMPI_MAJOR_VERSION, OMPI_MINOR_VERSION, - OMPI_RELEASE_VERSION), - - /* Component open and close functions */ - - .mca_open_component = basesmuma_open, - .mca_close_component = basesmuma_close, - .mca_register_component_params = basesmuma_register, - }, - - /* Initialization / querying functions */ - - .collm_init_query = mca_bcol_basesmuma_init_query, - .collm_comm_query = mca_bcol_basesmuma_comm_query, - .init_done = false, - .need_ordering = false, - .priority = 0, /* (default) priority */ - }, -}; - -/* - * Register the component - */ -static int basesmuma_register(void) -{ - mca_bcol_basesmuma_component_t *cs = &mca_bcol_basesmuma_component; - - /* set component priority */ - mca_bcol_basesmuma_param_register_int("priority", 90, &cs->super.priority); - - /* Number of memory banks */ - mca_bcol_basesmuma_param_register_int("basesmuma_num_ctl_banks", 2, - &cs->basesmuma_num_mem_banks); - - /* Number of regions per memory bank */ - mca_bcol_basesmuma_param_register_int("basesmuma_num_buffs_per_bank", 16, - &cs->basesmuma_num_regions_per_bank); - - /* number of polling loops to allow pending resources to - * complete their work - */ - mca_bcol_basesmuma_param_register_int("n_poll_loops", 4, &cs->n_poll_loops); - - - /* Number of groups supported */ - mca_bcol_basesmuma_param_register_int("n_groups_supported", 100, - &cs->n_groups_supported); - - /* order of fanin tree */ - mca_bcol_basesmuma_param_register_int("radix_fanin", 2, &cs->radix_fanin); - - /* order of fanout tree */ - mca_bcol_basesmuma_param_register_int("radix_fanout", 2, &cs->radix_fanout); - - /* order of read tree */ - mca_bcol_basesmuma_param_register_int("radix_read_tree", 3, - &cs->radix_read_tree); - - /* order of reduction fanout tree */ - mca_bcol_basesmuma_param_register_int("order_reduction_tree", 2, - &cs->order_reduction_tree); - - /* k-nomial radix */ - mca_bcol_basesmuma_param_register_int("k_nomial_radix", 3, &cs->k_nomial_radix); - - /* number of polling loops for non-blocking algorithms */ - mca_bcol_basesmuma_param_register_int("num_to_probe", 10, &cs->num_to_probe); - - /* radix of the k-ary scatter tree */ - mca_bcol_basesmuma_param_register_int("scatter_kary_radix", 4, - &cs->scatter_kary_radix); - - /* register parmeters controlling message fragementation */ - mca_bcol_basesmuma_param_register_int("min_frag_size", getpagesize(), - &cs->super.min_frag_size); - mca_bcol_basesmuma_param_register_int("max_frag_size", FRAG_SIZE_NO_LIMIT, - &cs->super.max_frag_size); - - /* by default use pre-registered shared memory segments */ - /* RLG NOTE: When we have a systematic way to handle single memory - * copy semantics, we need to update this logic - */ - mca_bcol_basesmuma_param_register_bool("can_use_user_buffers", false, - &cs->super.can_use_user_buffers); - - mca_bcol_basesmuma_param_register_int("verbose", 0, &cs->verbose); - - return OMPI_SUCCESS; -} - -/* - * Open the component - */ -static int basesmuma_open(void) -{ - - /* local variables */ - mca_bcol_basesmuma_component_t *cs = &mca_bcol_basesmuma_component; - int ret = OMPI_SUCCESS; - opal_mutex_t *mutex_ptr; - int dummy; - - /* - * Make sure that the number of banks is a power of 2 - */ - cs->basesmuma_num_mem_banks= - roundup_to_power_radix(2,cs->basesmuma_num_mem_banks, &dummy); - if ( 0 == cs->basesmuma_num_mem_banks ) { - ret=OMPI_ERROR; - goto exit_ERROR; - } - - /* - * Make sure that the the number of buffers is a power of 2 - */ - cs->basesmuma_num_regions_per_bank= - roundup_to_power_radix(2,cs->basesmuma_num_regions_per_bank, &dummy); - if ( 0 == cs->basesmuma_num_regions_per_bank ) { - ret=OMPI_ERROR; - goto exit_ERROR; - } - - /* Portals initialization */ - cs->portals_init = false; - cs->portals_info = NULL; - - /* - * initialization - */ - cs->sm_ctl_structs=NULL; - OBJ_CONSTRUCT(&(cs->sm_connections_list),opal_list_t); - OBJ_CONSTRUCT(&(cs->nb_admin_barriers),opal_list_t); - mutex_ptr= &(cs->nb_admin_barriers_mutex); - OBJ_CONSTRUCT(mutex_ptr, opal_mutex_t); - - /* Control structures object construct - */ - OBJ_CONSTRUCT(&(cs->ctl_structures), opal_list_t); - - /* shared memory has not been registered yet */ - cs->mpool_inited = false; - - /* initialize base file names */ - cs->clt_base_fname="sm_ctl_mem_"; - cs->payload_base_fname="sm_payload_mem_"; - - /* initialize the size of the shared memory scartch region */ - cs->my_scratch_shared_memory_size=getpagesize(); - cs->my_scratch_shared_memory=NULL; - cs->scratch_offset_from_base_ctl_file=0; - - /* - * register the progess function - */ - ret=opal_progress_register(bcol_basesmuma_progress); - if (MPI_SUCCESS != ret) { - opal_output(ompi_bcol_base_framework.framework_output, "failed to register the progress function"); - } - - return ret; - - exit_ERROR: - return ret; -} - -/* - * release the control structure backing file - */ -static int mca_bcol_basesmuma_deregister_ctl_sm(mca_bcol_basesmuma_component_t *bcol_component) -{ - if (NULL != bcol_component->sm_ctl_structs) { - OBJ_RELEASE(bcol_component->sm_ctl_structs); - } - - return OMPI_SUCCESS; -} - - -/* - * Close the component - */ -static int basesmuma_close(void) -{ - int ret; - bcol_basesmuma_registration_data_t *net_ctx; - bcol_base_network_context_t *net_reg; - mca_bcol_basesmuma_component_t *cs = &mca_bcol_basesmuma_component; - - /* gvm Leak FIX */ - while(!opal_list_is_empty(&(cs->ctl_structures))) { - opal_list_item_t *item; - item = opal_list_remove_first(&(cs->ctl_structures)); - OBJ_DESTRUCT(item); - } - OBJ_DESTRUCT(&(cs->ctl_structures)); - - /* deregister the progress function */ - ret=opal_progress_unregister(bcol_basesmuma_progress); - if (MPI_SUCCESS != ret) { - opal_output(ompi_bcol_base_framework.framework_output, "failed to unregister the progress function"); - } - - /* remove the control structure backing file */ - ret=mca_bcol_basesmuma_deregister_ctl_sm(&mca_bcol_basesmuma_component); - if (MPI_SUCCESS != ret) { - opal_output(ompi_bcol_base_framework.framework_output, "failed to remove control structure backing file"); - } - - /* remove the network contexts - only one network context defined for - * this component. - */ - /* file_name returne by asprintf, so need to free the resource */ - if(mca_bcol_basesmuma_component.super.network_contexts ) { - net_reg=(bcol_base_network_context_t *) - mca_bcol_basesmuma_component.super.network_contexts[0]; - if(net_reg) { - net_ctx=(bcol_basesmuma_registration_data_t *)net_reg->context_data; - if( net_ctx) { - if(net_ctx->file_name) { - free(net_ctx->file_name); - } - free(net_ctx); - } - free(net_reg); - } - free(mca_bcol_basesmuma_component.super.network_contexts); - mca_bcol_basesmuma_component.super.network_contexts=NULL; - } - - /* normal return */ - return OMPI_SUCCESS; -} - -/* query to see if the component is available for use, and can - * satisfy the thread and progress requirements - */ -int mca_bcol_basesmuma_init_query(bool enable_progress_threads, - bool enable_mpi_threads) -{ - /* done */ - return OMPI_SUCCESS; -} - -/* This routine is used to allocate shared memory for the the shared - * memory control regions. - */ -int mca_bcol_basesmuma_allocate_sm_ctl_memory(mca_bcol_basesmuma_component_t *cs) -{ - /* local variables */ - int name_length, ret; - size_t ctl_length; - char *name; - size_t page_size = getpagesize (); - - /* set the file name */ - name_length=asprintf(&name, - "%s"OPAL_PATH_SEP"%s""%0d", - ompi_process_info.job_session_dir, - cs->clt_base_fname, - (int)getpid()); - if( 0 > name_length ) { - return OMPI_ERROR; - } - /* make sure name is not too long */ - if ( OPAL_PATH_MAX < (name_length-1) ) { - return OMPI_ERROR; - } - - /* compute segment length */ - - ctl_length=(cs->basesmuma_num_mem_banks* - cs->basesmuma_num_regions_per_bank+cs->basesmuma_num_mem_banks) - *sizeof(mca_bcol_basesmuma_ctl_struct_t)*cs->n_groups_supported; - /* need two banks of memory per group - for algorithms that have - * user payload, and those that don't - */ - ctl_length*=2; - - /* add space for internal library management purposes */ - ctl_length+=cs->my_scratch_shared_memory_size; - - /* round up to multiple of page size */ - ctl_length = OPAL_ALIGN(ctl_length, page_size, size_t); - - /* allocate the shared file */ - cs->sm_ctl_structs=bcol_basesmuma_smcm_mem_reg (NULL, ctl_length, getpagesize(), name); - if( !cs->sm_ctl_structs) { - opal_output (ompi_bcol_base_framework.framework_output, - "In mca_bcol_basesmuma_allocate_sm_ctl_memory failed to allocathe backing file %s\n", name); - ret=OMPI_ERR_OUT_OF_RESOURCE; - goto Error; - } - - /* free the memory allocated by asprintf for the file name - - * in mca_base_smcm_mem_reg this name is copied into a new - * memory location */ - free(name); - - /* successful return */ - return OMPI_SUCCESS; - - Error: - if(name) { - free(name); - } - return ret; -} diff --git a/ompi/mca/bcol/basesmuma/bcol_basesmuma_fanin.c b/ompi/mca/bcol/basesmuma/bcol_basesmuma_fanin.c deleted file mode 100644 index 668dde0b09d..00000000000 --- a/ompi/mca/bcol/basesmuma/bcol_basesmuma_fanin.c +++ /dev/null @@ -1,218 +0,0 @@ -/* - * Copyright (c) 2009-2012 Oak Ridge National Laboratory. All rights reserved. - * Copyright (c) 2009-2012 Mellanox Technologies. All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -/* Recursive doubling blocking barrier */ - -#include "ompi_config.h" -#include "ompi/constants.h" -#include "ompi/communicator/communicator.h" -#include "ompi/mca/bcol/bcol.h" -#include "ompi/patterns/net/netpatterns.h" - -#include "opal/sys/atomic.h" - -#include "ompi/mca/bcol/base/base.h" -#include "bcol_basesmuma.h" - -/********************************************************************************/ -/********************************** New Fan-In **********************************/ -/********************************************************************************/ - -static int bcol_basesmuma_fanin_new(bcol_function_args_t *input_args, - mca_bcol_base_function_t *c_input_args) -{ - /* local variables */ - int64_t sequence_number; - - mca_bcol_basesmuma_module_t* bcol_module = - (mca_bcol_basesmuma_module_t *) c_input_args->bcol_module; - - int i, child_rank, idx, n_children, probe, - my_rank = bcol_module->super.sbgp_partner_module->my_index, - leading_dim = bcol_module->colls_no_user_data.size_of_group; - int8_t ready_flag; - int8_t bcol_id = (int8_t) bcol_module->super.bcol_id; - int buff_index = input_args->buffer_index; - int *active_requests = - &(bcol_module->ml_mem.nb_coll_desc[buff_index].active_requests); - mca_bcol_basesmuma_component_t *cm = &mca_bcol_basesmuma_component; - int matched = 0; - - - volatile mca_bcol_basesmuma_payload_t *ctl_structs; - - /* control structures */ - volatile mca_bcol_basesmuma_header_t *my_ctl; - volatile mca_bcol_basesmuma_header_t *child_ctl; - - - netpatterns_tree_node_t *my_tree_node = &(bcol_module->fanin_node); - - /* Figure out - what instance of the basesmuma bcol I am */ - sequence_number = input_args->sequence_num; - - idx = SM_ARRAY_INDEX(leading_dim, buff_index, 0); - ctl_structs = (volatile mca_bcol_basesmuma_payload_t *) - bcol_module->colls_with_user_data.data_buffs + idx; - my_ctl = ctl_structs[my_rank].ctl_struct; - - /* Init the header */ - BASESMUMA_HEADER_INIT(my_ctl, ready_flag, sequence_number, bcol_id); - - /* Cache num of children value in a local variable */ - n_children = my_tree_node->n_children; - - /* initialize the active requests */ - *active_requests = 0; - /* create a bit map for children */ - for( i = 0; i < n_children; i++){ - *active_requests ^= (1<children_ranks[i]; - child_ctl = ctl_structs[child_rank].ctl_struct; - /* I'm sacrificing cache for concurrency */ - for( probe = 0; probe < cm->num_to_probe && (0 == matched); probe++){ - if(IS_PEER_READY(child_ctl, ready_flag, sequence_number,BARRIER_FANIN_FLAG, bcol_id)) { - matched = 1; - /* flip the bit */ - *active_requests ^= (1<my_node_type){ - /* I have no more active requests, - signal my parent */ - my_ctl->flags[BARRIER_FANIN_FLAG][bcol_id] = ready_flag; - } - } else { - return BCOL_FN_STARTED; - } - - my_ctl->starting_flag_value[bcol_id]++; - - return BCOL_FN_COMPLETE; -} - -static int bcol_basesmuma_fanin_new_progress(bcol_function_args_t *input_args, - mca_bcol_base_function_t *c_input_args) -{ - /* local variables */ - int64_t sequence_number; - - mca_bcol_basesmuma_module_t* bcol_module = - (mca_bcol_basesmuma_module_t *) c_input_args->bcol_module; - - int i, child_rank, flag_offset, idx, n_children, probe, - my_rank = bcol_module->super.sbgp_partner_module->my_index, - leading_dim = bcol_module->colls_no_user_data.size_of_group; - int8_t ready_flag; - int8_t bcol_id = (int8_t) bcol_module->super.bcol_id; - int buff_index = input_args->buffer_index; - int *active_requests = - &(bcol_module->ml_mem.nb_coll_desc[buff_index].active_requests); - mca_bcol_basesmuma_component_t *cm = &mca_bcol_basesmuma_component; - int matched = 0; - - - volatile mca_bcol_basesmuma_payload_t *ctl_structs; - - /* control structures */ - volatile mca_bcol_basesmuma_header_t *my_ctl; - volatile mca_bcol_basesmuma_header_t *child_ctl; - - - netpatterns_tree_node_t *my_tree_node = &(bcol_module->fanin_node); - - sequence_number = input_args->sequence_num; - - idx = SM_ARRAY_INDEX(leading_dim, buff_index, 0); - ctl_structs = (volatile mca_bcol_basesmuma_payload_t *) - bcol_module->colls_with_user_data.data_buffs + idx; - my_ctl = ctl_structs[my_rank].ctl_struct; - - - flag_offset = my_ctl->starting_flag_value[bcol_id]; - ready_flag = flag_offset + 1; - my_ctl->sequence_number = sequence_number; - - /* Cache num of children value in a local variable */ - n_children = my_tree_node->n_children; - - - /* Wait until my childeren arrive */ - for (i = 0; i < n_children; ++i) { - matched = 0; - /* Get child ctl struct */ - if ( 1 == ((*active_requests >> i)&1) ) { - child_rank = my_tree_node->children_ranks[i]; - child_ctl = ctl_structs[child_rank].ctl_struct; - /* I'm sacrificing cache for concurrency */ - for( probe = 0; probe < cm->num_to_probe && (0 == matched); probe++){ - if(IS_PEER_READY(child_ctl, ready_flag, sequence_number, BARRIER_FANIN_FLAG,bcol_id)) { - matched = 1; - /* flip the bit */ - *active_requests ^= (1<my_node_type){ - /* If I am not the root of the fanin tree, - then signal my parent */ - my_ctl->flags[BARRIER_FANIN_FLAG][bcol_id] = ready_flag; - } - } else { - return BCOL_FN_STARTED; - } - - my_ctl->starting_flag_value[bcol_id]++; - - return BCOL_FN_COMPLETE; -} - - -int bcol_basesmuma_fanin_init(mca_bcol_base_module_t *super) -{ - mca_bcol_base_coll_fn_comm_attributes_t comm_attribs; - mca_bcol_base_coll_fn_invoke_attributes_t inv_attribs; - - BASESMUMA_VERBOSE(10, ("Basesmuma Fan-In register.\n")); - - comm_attribs.bcoll_type = BCOL_FANIN; - - comm_attribs.comm_size_min = 0; - comm_attribs.comm_size_max = 1024 * 1024; - comm_attribs.waiting_semantics = NON_BLOCKING; - - inv_attribs.bcol_msg_min = 0; - inv_attribs.bcol_msg_max = 20000; /* range 1 */ - - inv_attribs.datatype_bitmap = 0xffffffff; - inv_attribs.op_types_bitmap = 0xffffffff; - - comm_attribs.data_src = DATA_SRC_KNOWN; - - mca_bcol_base_set_attributes(super, - &comm_attribs, &inv_attribs, - bcol_basesmuma_fanin_new, - bcol_basesmuma_fanin_new_progress); - - return OMPI_SUCCESS; -} - - diff --git a/ompi/mca/bcol/basesmuma/bcol_basesmuma_fanout.c b/ompi/mca/bcol/basesmuma/bcol_basesmuma_fanout.c deleted file mode 100644 index f5a1dd38b11..00000000000 --- a/ompi/mca/bcol/basesmuma/bcol_basesmuma_fanout.c +++ /dev/null @@ -1,123 +0,0 @@ -/* - * Copyright (c) 2009-2012 Oak Ridge National Laboratory. All rights reserved. - * Copyright (c) 2009-2012 Mellanox Technologies. All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -/* Recursive doubling blocking barrier */ - -#include "ompi_config.h" -#include "ompi/constants.h" -#include "ompi/communicator/communicator.h" -#include "ompi/mca/bcol/bcol.h" -#include "ompi/patterns/net/netpatterns.h" - -#include "opal/sys/atomic.h" - -#include "ompi/mca/bcol/base/base.h" -#include "bcol_basesmuma.h" - -/***********************************************************************************/ -/*********************************** New Fan-Out ***********************************/ -/***********************************************************************************/ - -static int bcol_basesmuma_fanout_new( - bcol_function_args_t *input_args, - mca_bcol_base_function_t *c_input_args) -{ - /* local variables */ - int64_t sequence_number; - - mca_bcol_basesmuma_module_t* bcol_module = - (mca_bcol_basesmuma_module_t *) c_input_args->bcol_module; - - int idx, probe, - my_rank = bcol_module->super.sbgp_partner_module->my_index, - leading_dim = bcol_module->colls_no_user_data.size_of_group; - int8_t ready_flag; - int8_t bcol_id = (int8_t) bcol_module->super.bcol_id; - int buff_index = input_args->buffer_index; - mca_bcol_basesmuma_component_t *cm = &mca_bcol_basesmuma_component; - - - volatile mca_bcol_basesmuma_payload_t *ctl_structs; - - /* control structures */ - volatile mca_bcol_basesmuma_header_t *my_ctl; - volatile mca_bcol_basesmuma_header_t *parent_ctl; - - - netpatterns_tree_node_t *my_tree_node = &(bcol_module->fanin_node); - - /* Figure out - what instance of the basesmuma bcol I am */ - sequence_number = input_args->sequence_num; - - idx = SM_ARRAY_INDEX(leading_dim, buff_index, 0); - ctl_structs = (volatile mca_bcol_basesmuma_payload_t *) - bcol_module->colls_with_user_data.data_buffs + idx; - my_ctl = ctl_structs[my_rank].ctl_struct; - - /* init the header */ - BASESMUMA_HEADER_INIT(my_ctl, ready_flag, sequence_number, bcol_id); - - /* Wait on my parent to arrive */ - if (my_tree_node->n_parents) { - parent_ctl = ctl_structs[my_tree_node->parent_rank].ctl_struct; - for( probe = 0; probe < cm->num_to_probe; probe++){ - if (IS_PEER_READY(parent_ctl, ready_flag, sequence_number, BARRIER_FANOUT_FLAG, bcol_id)) { - /* signal my children */ - my_ctl->flags[BARRIER_FANOUT_FLAG][bcol_id] = ready_flag; - /* bump the starting flag */ - my_ctl->starting_flag_value[bcol_id]++; - return BCOL_FN_COMPLETE; - - } - } - - } else { - /* I am the root of the fanout */ - my_ctl->flags[BARRIER_FANOUT_FLAG][bcol_id] = ready_flag; - /* bump the starting flag */ - my_ctl->starting_flag_value[bcol_id]++; - return BCOL_FN_COMPLETE; - } - - - - - - return BCOL_FN_STARTED; -} - -int bcol_basesmuma_fanout_init(mca_bcol_base_module_t *super) -{ - mca_bcol_base_coll_fn_comm_attributes_t comm_attribs; - mca_bcol_base_coll_fn_invoke_attributes_t inv_attribs; - - BASESMUMA_VERBOSE(10, ("Basesmuma Fan-Out register.\n")); - - comm_attribs.bcoll_type = BCOL_FANOUT; - - comm_attribs.comm_size_min = 0; - comm_attribs.comm_size_max = 1024 * 1024; - comm_attribs.waiting_semantics = NON_BLOCKING; - - inv_attribs.bcol_msg_min = 0; - inv_attribs.bcol_msg_max = 20000; /* range 1 */ - - inv_attribs.datatype_bitmap = 0xffffffff; - inv_attribs.op_types_bitmap = 0xffffffff; - - comm_attribs.data_src = DATA_SRC_KNOWN; - - mca_bcol_base_set_attributes(super, - &comm_attribs, &inv_attribs, - bcol_basesmuma_fanout_new, - bcol_basesmuma_fanout_new); - - return OMPI_SUCCESS; -} diff --git a/ompi/mca/bcol/basesmuma/bcol_basesmuma_gather.c b/ompi/mca/bcol/basesmuma/bcol_basesmuma_gather.c deleted file mode 100644 index ef3d856b88c..00000000000 --- a/ompi/mca/bcol/basesmuma/bcol_basesmuma_gather.c +++ /dev/null @@ -1,1106 +0,0 @@ -/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ -/* - * Copyright (c) 2009-2012 Oak Ridge National Laboratory. All rights reserved. - * Copyright (c) 2009-2012 Mellanox Technologies. All rights reserved. - * Copyright (c) 2013-2014 Los Alamos National Security, LLC. All rights - * reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#include "ompi_config.h" -#include "ompi/mca/bcol/base/base.h" -#include "ompi/mca/bcol/basesmuma/bcol_basesmuma.h" -#include "ompi/constants.h" -#include "ompi/datatype/ompi_datatype.h" -#include "ompi/communicator/communicator.h" - -/* debug - * #include "opal/sys/timer.h" - * - * extern uint64_t timers[7]; - * end debug */ - -/* debug */ -#include -/* end debug */ - -/* non-blocking gather routines: init and progress functions */ -int bcol_basesmuma_gather_init(mca_bcol_base_module_t *super) -{ - mca_bcol_base_coll_fn_comm_attributes_t comm_attribs; - mca_bcol_base_coll_fn_invoke_attributes_t inv_attribs; - - comm_attribs.bcoll_type = BCOL_GATHER; - comm_attribs.comm_size_min = 0; - comm_attribs.comm_size_max = 1048576; - comm_attribs.data_src = DATA_SRC_KNOWN; - comm_attribs.waiting_semantics = BLOCKING; - - inv_attribs.bcol_msg_min = 0; - inv_attribs.bcol_msg_max = 20000; - inv_attribs.datatype_bitmap = 0x11111111; - inv_attribs.op_types_bitmap = 0x11111111; - - /* Set attributes for fanin fanout algorithm */ - mca_bcol_base_set_attributes(super, &comm_attribs, &inv_attribs, - bcol_basesmuma_k_nomial_gather_init, - bcol_basesmuma_k_nomial_gather_progress); - - return OMPI_SUCCESS; -} - -int bcol_basesmuma_k_nomial_gather_init(bcol_function_args_t *input_args, - mca_bcol_base_function_t *c_input_args) -{ - /* local variables */ - int leading_dim, buff_idx, idx; - int src, i, j, k_temp1, k_temp2; - int pseudo_root, proxy_root, pseudo_base_adj; - volatile int8_t ready_flag; - int count=input_args->count; - struct ompi_datatype_t* dtype=input_args->dtype; - int root=input_args->root; - int base_adj, base; - int total_peers, my_pow_k=0; - int64_t sequence_number=input_args->sequence_num; - mca_bcol_basesmuma_module_t* bcol_module= - (mca_bcol_basesmuma_module_t *)c_input_args->bcol_module; - int bcol_id = (int) bcol_module->super.bcol_id; - int my_rank = bcol_module->super.sbgp_partner_module->my_index; - netpatterns_k_exchange_node_t *exchange_node = - &bcol_module->knomial_allgather_tree; - uint32_t buffer_index = input_args->buffer_index; - int *active_requests = - &(bcol_module->ml_mem.nb_coll_desc[buffer_index].active_requests); - - int *iteration = &bcol_module->ml_mem.nb_coll_desc[buffer_index].iteration; - int *status = &bcol_module->ml_mem.nb_coll_desc[buffer_index].status; - - int buff_offset = bcol_module->super.hier_scather_offset; - - /* "indirectors" */ - int *inv_map = exchange_node->inv_reindex_map; - int *reindex_map = exchange_node->reindex_map; - int stray = exchange_node->k_nomial_stray; - - /* tree radix */ - int tree_order = exchange_node->tree_order; - /* tree depth */ - int pow_k = exchange_node->log_tree_order; - /* largest power of k less than or equal to np */ - int cnt = exchange_node->n_largest_pow_tree_order; - - /* payload structures */ - volatile mca_bcol_basesmuma_payload_t *data_buffs; - - /* control structures */ - volatile mca_bcol_basesmuma_header_t *my_ctl_pointer; - - size_t pack_len = 0, dt_size; - -#if 0 - fprintf(stderr,"Entering sm gather input_args->sbuf_offset %d \n",input_args->sbuf_offset); - fflush(stderr); -#endif - - - /* we will work only on packed data - so compute the length*/ - /* this is the size of my data, this is not gatherv so it's the same - * for all ranks in the communicator. - */ - ompi_datatype_type_size(dtype, &dt_size); - pack_len=count*dt_size; - /* now set the "real" offset */ - buff_offset = buff_offset*pack_len; - - buff_idx = input_args->src_desc->buffer_index; - - /* Get addressing information */ - my_rank = bcol_module->super.sbgp_partner_module->my_index; - - leading_dim=bcol_module->colls_no_user_data.size_of_group; - idx=SM_ARRAY_INDEX(leading_dim,buff_idx,0); - data_buffs=(volatile mca_bcol_basesmuma_payload_t *) - bcol_module->colls_with_user_data.data_buffs+idx; - - /* Set pointer to current proc ctrl region */ - my_ctl_pointer = data_buffs[my_rank].ctl_struct; - - /* init the header */ - BASESMUMA_HEADER_INIT(my_ctl_pointer, ready_flag, sequence_number, bcol_id); - - /* init active requests, iteration, and status */ - *iteration = 0; - *active_requests = 0; - *status = -1; - /* calculate the number of steps necessary for this collective */ - - /* first thing we do is figure out where the root is in our new indexing */ - /* find root in new indexing */ - pseudo_root = inv_map[root]; - /* see if this is larger than the stray */ - if (pseudo_root >= stray) { - /* then we need to define the proxy root, everyone can do this */ - proxy_root = pseudo_root - cnt; - } else { - proxy_root = pseudo_root; - } - - /* do some figuring */ - if (EXCHANGE_NODE == exchange_node->node_type) { - total_peers = 0; - my_pow_k = pow_k; - k_temp1 = tree_order; - k_temp2 = 1; - for( i = 0; i < pow_k; i++) { - /* then find the base */ - FIND_BASE(base,exchange_node->reindex_myid,i+1,tree_order); - /* now find the adjusted base */ - base_adj = base + (base + proxy_root)%k_temp1; - /* ok, now find out WHO is occupying this slot */ - pseudo_base_adj = reindex_map[base_adj]; - - if(my_rank == pseudo_base_adj ) { - /* then go ahead and poll for children's data */ - for( j = 0; j < (tree_order - 1); j++ ) { - /* send phase - */ - /* get communication partner */ - - src = exchange_node->rank_exchanges[i][j]; - /* remember, if we have extra ranks, then we won't participate - * with a least one peer. Make a check - */ - if( src < 0 ){ - continue; - }else{ - - /* flip a bit to represent this request */ - *active_requests ^= (1<node_type || 0 == exchange_node->n_extra_sources) { - if (0 == my_pow_k || EXTRA_NODE == exchange_node->node_type) { - opal_atomic_rmb (); - - my_ctl_pointer->flags[GATHER_FLAG][bcol_id] = ready_flag; - } - - if ((EXTRA_NODE == exchange_node->node_type && root != my_rank) || 0 == my_pow_k) { - /* nothing more to do */ - my_ctl_pointer->starting_flag_value[bcol_id]++; - - return BCOL_FN_COMPLETE; - } - } - - return BCOL_FN_STARTED; -} - - -int bcol_basesmuma_k_nomial_gather_progress(bcol_function_args_t *input_args, - mca_bcol_base_function_t *c_input_args) -{ - /* local variables */ - int group_size; - int flag_offset; - int leading_dim, buff_idx, idx; - int src, knt, i, j, k_temp1, k_temp2; - volatile int8_t ready_flag; - int count=input_args->count; - struct ompi_datatype_t* dtype=input_args->dtype; - int root=input_args->root; - int probe; - int matched; - int64_t sequence_number=input_args->sequence_num; - mca_bcol_basesmuma_module_t* bcol_module= - (mca_bcol_basesmuma_module_t *)c_input_args->bcol_module; - int bcol_id = (int) bcol_module->super.bcol_id; - int my_rank = bcol_module->super.sbgp_partner_module->my_index; - mca_bcol_basesmuma_component_t *cm = &mca_bcol_basesmuma_component; - netpatterns_k_exchange_node_t *exchange_node = - &bcol_module->knomial_allgather_tree; - uint32_t buffer_index = input_args->buffer_index; - int *active_requests = - &(bcol_module->ml_mem.nb_coll_desc[buffer_index].active_requests); - int *iteration = &bcol_module->ml_mem.nb_coll_desc[buffer_index].iteration; - int *status = &bcol_module->ml_mem.nb_coll_desc[buffer_index].status; - int buff_offset = bcol_module->super.hier_scather_offset; - /* "indirectors" */ - int *list_connected = bcol_module->super.list_n_connected; - /* tree radix */ - int tree_order = exchange_node->tree_order; - /* payload structures */ - volatile mca_bcol_basesmuma_payload_t *data_buffs; - volatile char *child_data_pointer; - /* control structures */ - volatile mca_bcol_basesmuma_header_t *my_ctl_pointer; - volatile mca_bcol_basesmuma_header_t *child_ctl_pointer; - /*volatile mca_bcol_basesmuma_ctl_struct_t* parent_ctl_pointer; */ - - size_t pack_len = 0, dt_size; - void *data_addr = (void *)((unsigned char *)input_args->src_desc->data_addr); - - -#if 0 - fprintf(stderr,"Entering sm gather input_args->sbuf_offset %d \n",input_args->sbuf_offset); - fflush(stderr); -#endif - - - /* we will work only on packed data - so compute the length*/ - /* this is the size of my data, this is not gatherv so it's the same - * for all ranks in the communicator. - */ - ompi_datatype_type_size(dtype, &dt_size); - pack_len=count*dt_size; - /* now set the "real" offset */ - buff_offset = buff_offset*pack_len; - - buff_idx = input_args->src_desc->buffer_index; - - /* Get addressing information */ - my_rank = bcol_module->super.sbgp_partner_module->my_index; - - group_size = bcol_module->colls_no_user_data.size_of_group; - leading_dim=bcol_module->colls_no_user_data.size_of_group; - idx=SM_ARRAY_INDEX(leading_dim,buff_idx,0); - data_buffs=(volatile mca_bcol_basesmuma_payload_t *) - bcol_module->colls_with_user_data.data_buffs+idx; - - /* Set pointer to current proc ctrl region */ - my_ctl_pointer = data_buffs[my_rank].ctl_struct; - /* restart the ready_flag state */ - flag_offset = my_ctl_pointer->starting_flag_value[bcol_id]; - ready_flag = flag_offset + 1; - - /* calculate the number of steps necessary for this collective */ - - /* first thing we do is figure out where the root is in our new indexing */ - /* find root in new indexing */ - if( EXTRA_NODE == exchange_node->node_type ) { - - /* poll for data from proxy */ - src = exchange_node->rank_extra_sources_array[0]; - /* get src data buffer */ - child_data_pointer = data_buffs[src].payload; - child_ctl_pointer = data_buffs[src].ctl_struct; - /* remember to bump your flag */ - ready_flag++; - - /* in this case, you must block */ - for (i = 0 ; i < cm->num_to_probe ; ++i) { - if (IS_PEER_READY(child_ctl_pointer,ready_flag,sequence_number, GATHER_FLAG, bcol_id)){ - /* receive the data from the proxy, aka pseudo-root */ - memcpy((void *) ((unsigned char *) data_addr + buff_offset), - (void *) ((unsigned char *) child_data_pointer+buff_offset), - pack_len * group_size); - - goto FINISHED; - } - } - - return BCOL_FN_STARTED; - } - - - if (0 < exchange_node->n_extra_sources && (-1 == (*status))) { - /* am a proxy, poll for pack_len data from extra */ - src = exchange_node->rank_extra_sources_array[0]; - /* get src data buffer */ - child_data_pointer = data_buffs[src].payload; - child_ctl_pointer = data_buffs[src].ctl_struct; - knt = 0; - for( i = 0; i < src; i++){ - knt += list_connected[i]; - } - /* must block here also */ - matched = 0; - for (i = 0, matched = 0 ; i < cm->num_to_probe && (0 == matched) ; ++i) { - if(IS_PEER_READY(child_ctl_pointer,ready_flag,sequence_number, GATHER_FLAG, bcol_id)){ - matched = 1; - memcpy((void *) ((unsigned char *) data_addr + buff_offset + pack_len*knt), - (void *) ((unsigned char *) child_data_pointer + buff_offset + - pack_len*knt), pack_len*list_connected[src]); - *status = 0; - if( 0 == *active_requests ){ - goto LAST_STEP; - } - - break; - } - } - if( 0 == matched ){ - return BCOL_FN_STARTED; - } - } - - /* start the k-nomial gather phase */ - /* only "active ranks participate, once a rank has forwarded its data, it becomes inactive */ - for (probe = 0 ; probe < cm->num_to_probe ; ++probe) { - k_temp1 = tree_order; - k_temp2 = 1; - for (i = 0 ; i < *(iteration) ; ++i) { - - /* then go ahead and poll for children's data */ - for (j = 0 ; j < (tree_order - 1) ; ++j) { - /* send phase - */ - /* get communication partner */ - - src = exchange_node->rank_exchanges[i][j]; - /* remember, if we have extra ranks, then we won't participate - * with a least one peer. Make a check - */ - /* if the bit that corresponds to this child has been set to zero, - * then it has already checked in and data received - */ - if (src < 0 || 1 != ((*active_requests >> ((tree_order - 1)*i + j))&1)){ - continue; - } - child_data_pointer = data_buffs[src].payload; - child_ctl_pointer = data_buffs[src].ctl_struct; - - if(IS_PEER_READY(child_ctl_pointer,ready_flag,sequence_number, GATHER_FLAG, bcol_id)){ - /* copy the data */ - memcpy((void *) ((unsigned char *) data_addr + buff_offset + - exchange_node->payload_info[i][j].r_offset*pack_len), - (void *) ((unsigned char *) child_data_pointer + buff_offset + - exchange_node->payload_info[i][j].r_offset*pack_len), - exchange_node->payload_info[i][j].r_len*pack_len); - /* flip the bit to zero */ - *active_requests ^= (1<<((tree_order - 1)*i + j)); - if(0 == (*active_requests)) { - goto LAST_STEP; - } - } - } - } - - k_temp1 = k_temp1*tree_order; - k_temp2 = k_temp2*tree_order; - } - - - return BCOL_FN_STARTED; - -LAST_STEP: - /* last step, proxies send full data back to the extra ranks */ - if( 0 < exchange_node->n_extra_sources && - root == exchange_node->rank_extra_sources_array[0]) { - /* regardless, I will bump the ready flag and set it in case someone is watching */ - /* announce that data is ready */ - ready_flag++; - } - - /* signal that data is ready */ - opal_atomic_wmb (); - my_ctl_pointer->flags[GATHER_FLAG][bcol_id] = ready_flag; - -FINISHED: - - - my_ctl_pointer->starting_flag_value[bcol_id]++; - - return BCOL_FN_COMPLETE; -} - - -/* Blocking routines, used to prototype and test signaling, - * as well as debug hierarchical algorithm - */ -#if 0 -int bcol_basesmuma_gather_init(mca_bcol_base_module_t *super) -{ - mca_bcol_base_coll_fn_comm_attributes_t comm_attribs; - mca_bcol_base_coll_fn_invoke_attributes_t inv_attribs; - - comm_attribs.bcoll_type = BCOL_GATHER; - comm_attribs.comm_size_min = 0; - comm_attribs.comm_size_max = 16; - comm_attribs.data_src = DATA_SRC_KNOWN; - comm_attribs.waiting_semantics = BLOCKING; - - inv_attribs.bcol_msg_min = 0; - inv_attribs.bcol_msg_max = 20000; - inv_attribs.datatype_bitmap = 0x11111111; - inv_attribs.op_types_bitmap = 0x11111111; - - - /* Set attributes for fanin fanout algorithm */ - mca_bcol_base_set_attributes(super, &comm_attribs, &inv_attribs, bcol_basesmuma_k_nomial_gather, - bcol_basesmuma_k_nomial_gather); - - return OMPI_SUCCESS; -} -#endif - - -/* original, fully blocking, fully synchronous gather - should result in worst performance when used */ -#if 0 -int bcol_basesmuma_k_nomial_gather(bcol_function_args_t *input_args, - mca_bcol_base_function_t *c_input_args) -{ - /* local variables */ - int group_size; - int first_instance=0, flag_offset; - int rc = OMPI_SUCCESS; - int leading_dim, buff_idx, idx; - int *group_list; - int src, comm_src, knt, i, k, j, k_temp1, k_temp2; - int pseudo_root, proxy_root, pseudo_base_adj; - volatile int64_t ready_flag; - int count=input_args->count; - struct ompi_datatype_t* dtype=input_args->dtype; - int root=input_args->root; - int base_adj, base; - int64_t sequence_number=input_args->sequence_num; - mca_bcol_basesmuma_module_t* bcol_module= - (mca_bcol_basesmuma_module_t *)c_input_args->bcol_module; - int my_rank = bcol_module->super.sbgp_partner_module->my_index; - mca_bcol_basesmuma_component_t *cs = &mca_bcol_basesmuma_component; - netpatterns_k_exchange_node_t *exchange_node = - &bcol_module->knomial_allgather_tree; - - int buff_offset = bcol_module->super.hier_scather_offset; - - /* "indirectors" */ - int *list_connected = bcol_module->super.list_n_connected; - int *inv_map = exchange_node->inv_reindex_map; - int *reindex_map = exchange_node->reindex_map; - /*int *reindex_map = exchange_node->reindex_map;*/ - /* stray rank == first rank in the extra set */ - int stray = exchange_node->k_nomial_stray; - - /* tree radix */ - int tree_order = exchange_node->tree_order; - /* tree depth */ - int pow_k = exchange_node->log_tree_order; - /* largest power of k less than or equal to np */ - int cnt = exchange_node->n_largest_pow_tree_order; - - /*fprintf(stderr,"tree order %d pow_k %d stray %d root %d\n",tree_order, pow_k, stray, root);*/ - /* payload structures */ - volatile mca_bcol_basesmuma_payload_t *data_buffs; - volatile char *child_data_pointer; - - /* control structures */ - volatile mca_bcol_basesmuma_header_t *my_ctl_pointer; - volatile mca_bcol_basesmuma_header_t *child_ctl_pointer; - /*volatile mca_bcol_basesmuma_ctl_struct_t* parent_ctl_pointer; */ - - size_t pack_len = 0, dt_size; - void *data_addr = (void *)((unsigned char *)input_args->src_desc->data_addr); - - /* active in the algorithm */ - bool active = true; - -#if 0 - fprintf(stderr,"Entering sm gather input_args->sbuf_offset %d \n",input_args->sbuf_offset); - fflush(stderr); -#endif - - - /* we will work only on packed data - so compute the length*/ - /* this is the size of my data, this is not gatherv so it's the same - * for all ranks in the communicator. - */ - ompi_datatype_type_size(dtype, &dt_size); - pack_len=count*dt_size; - /* now set the "real" offset */ - buff_offset = buff_offset*pack_len; - - buff_idx = input_args->src_desc->buffer_index; - - /* Get addressing information */ - my_rank = bcol_module->super.sbgp_partner_module->my_index; - /* I have a feeling that I'll need this */ - group_list = bcol_module->super.sbgp_partner_module->group_list; - - group_size = bcol_module->colls_no_user_data.size_of_group; - leading_dim=bcol_module->colls_no_user_data.size_of_group; - idx=SM_ARRAY_INDEX(leading_dim,buff_idx,0); - /*ctl_structs=(mca_bcol_basesmuma_ctl_struct_t **) - bcol_module->colls_with_user_data.ctl_buffs+idx; - */ - data_buffs=(volatile mca_bcol_basesmuma_payload_t *) - bcol_module->colls_with_user_data.data_buffs+idx; - - /* Set pointer to current proc ctrl region */ - /*my_ctl_pointer = ctl_structs[my_rank]; */ - my_ctl_pointer = data_buffs[my_rank].ctl_struct; - - /* setup resource recycling */ - if( my_ctl_pointer->sequence_number < sequence_number ) { - first_instance=1; - } - - if( first_instance ) { - /* Signal arrival */ - my_ctl_pointer->flag = -1; - my_ctl_pointer->gflag = -1; - my_ctl_pointer->index=1; - /* this does not need to use any flag values , so only need to - * set the value for subsequent values that may need this */ - my_ctl_pointer->starting_flag_value=0; - flag_offset=0; - - } else { - /* only one thread at a time will be making progress on this - * collective, so no need to make this atomic */ - my_ctl_pointer->index++; - } - - - /* increment the starting flag by one and return */ - flag_offset = my_ctl_pointer->starting_flag_value; - ready_flag = flag_offset + sequence_number + 1; - my_ctl_pointer->sequence_number = sequence_number; - -/* debug - fprintf(stderr," sequence_number %lld flag_offset %d starting flag val %d\n",sequence_number,flag_offset, my_ctl_pointer->starting_flag_value); - fflush(stderr); - end debug */ - - - /* - * Fan out from root - */ - /* don't need this either */ - /* root is the local leader */ - /* calculate the number of steps necessary for this collective */ - - /* first thing we do is figure out where the root is in our new indexing */ - /* find root in new indexing */ - pseudo_root = inv_map[root]; - /* see if this is larger than the stray */ - if( pseudo_root >= stray ) { - /* then we need to define the proxy root, everyone can do this */ - proxy_root = pseudo_root - cnt; - }else { - proxy_root = pseudo_root; - } - - - - if( EXTRA_NODE == exchange_node->node_type ) { - - /* signal arrival */ - my_ctl_pointer->gflag = ready_flag; - - /* send is done */ - - /* poll for data only if I am the root */ - /* bump the ready flag */ - ready_flag++; - if( root == my_rank ){ - /* poll for data from proxy */ - src = exchange_node->rank_extra_sources_array[0]; - /* get src data buffer */ - child_data_pointer = data_buffs[src].payload; - child_ctl_pointer = data_buffs[src].ctl_struct; - while(!IS_GDATA_READY(child_ctl_pointer,ready_flag,sequence_number)){ - opal_progress(); - } - /* receive the data from the proxy, aka pseudo-root */ - - memcpy((void *) ((unsigned char *) data_addr + buff_offset),(void *) ((unsigned char *) child_data_pointer+buff_offset) - ,pack_len*group_size); - } - goto FINISHED; - - - } else if( 0 < exchange_node->n_extra_sources ) { - - /* am a proxy, poll for pack_len data from extra */ - src = exchange_node->rank_extra_sources_array[0]; - /* get src data buffer */ - child_data_pointer = data_buffs[src].payload; - child_ctl_pointer = data_buffs[src].ctl_struct; - knt = 0; - for( i = 0; i < src; i++){ - knt += list_connected[i]; - } - while(!IS_GDATA_READY(child_ctl_pointer,ready_flag,sequence_number)){ - opal_progress(); - } - memcpy((void *) ((unsigned char *) data_addr + buff_offset + pack_len*knt), - (void *) ((unsigned char *) child_data_pointer + buff_offset + - pack_len*knt), pack_len*list_connected[src]); - /*fprintf(stderr,"999 proxy received data from %d at offset %d of length %d\n",src, - buff_offset+pack_len*knt,pack_len*list_connected[src]); - */ - } - - /* start the k-nomial gather phase */ - /* only "active ranks participate, once a rank has forwarded its data, it becomes inactive */ - knt = 0; - while(active){ - k_temp1 = tree_order; - k_temp2 = 1; - for( i = 0; i < pow_k; i++) { - /* then find the base */ - /*FIND_BASE(base,my_rank,i+1,tree_order);*/ - FIND_BASE(base,exchange_node->reindex_myid,i+1,tree_order); - /* now find the adjusted base */ - base_adj = base + (base + proxy_root)%k_temp1; - /* ok, now find out WHO is occupying this slot */ - /*pseudo_base_adj = inv_map[base_adj];*/ - pseudo_base_adj = reindex_map[base_adj]; - - if(my_rank == pseudo_base_adj ) { - /* then go ahead and poll for children's data */ - for( j = 0; j < (tree_order - 1); j++ ) { - /* send phase - */ - /* get communication partner */ - - src = exchange_node->rank_exchanges[i][j]; - /*fprintf(stderr,"comm_src %d\n",comm_src);*/ - /* remember, if we have extra ranks, then we won't participate - * with a least one peer. Make a check - */ - if( src < 0 ){ - continue; - } - - /*fprintf(stderr,"src %d\n",src);*/ - child_data_pointer = data_buffs[src].payload; - child_ctl_pointer = data_buffs[src].ctl_struct; - while(!IS_GDATA_READY(child_ctl_pointer,ready_flag,sequence_number)){ - opal_progress(); - } - memcpy((void *) ((unsigned char *) data_addr + buff_offset + - exchange_node->payload_info[i][j].r_offset*pack_len), - (void *) ((unsigned char *) child_data_pointer + buff_offset + - exchange_node->payload_info[i][j].r_offset*pack_len), - exchange_node->payload_info[i][j].r_len*pack_len); - /* - fprintf(stderr,"999 receiving data from %d at offset %d of length %d\n", - exchange_node->rank_exchanges[i][j], buff_offset + exchange_node->payload_info[i][j].r_offset, - exchange_node->payload_info[i][j].r_len*pack_len); - */ - opal_atomic_wmb (); - knt++; - if(knt == exchange_node->n_actual_exchanges) { - /* this is the trick to break the root out, - * only the root should be able to satisfy this - */ - /* - fprintf(stderr,"hello n_actual is %d \n",knt); - fprintf(stderr,"hello n_actual_exch is %d \n", - exchange_node->n_actual_exchanges); - */ - goto LAST_STEP; - } - } - } else { - /* announce my arrival */ - my_ctl_pointer->gflag = ready_flag; - active = false; - break; - } - - k_temp1 = k_temp1*tree_order; - k_temp2 = k_temp2*tree_order; - } - } -LAST_STEP: - /* last step, proxies send full data back to the extra ranks */ - if( 0 < exchange_node->n_extra_sources && - root == exchange_node->rank_extra_sources_array[0]) { - /* regardless, I will bump the ready flag and set it in case someone is watching */ - /* announce that data is ready */ - ready_flag++; - my_ctl_pointer->gflag = ready_flag; - } - - -FINISHED: - -/* debug - fprintf(stderr," my_ctl_pointer->index %d n of this type %d %u \n", - my_ctl_pointer->index,c_input_args->n_of_this_type_in_collective,getpid()); - fflush(stderr); - end debug */ - - my_ctl_pointer->starting_flag_value+=1; - - return BCOL_FN_COMPLETE; -} - -#endif - - -#if 0 -/* blocking, asynchronous polling gather routine */ -int bcol_basesmuma_k_nomial_gather(bcol_function_args_t *input_args, - mca_bcol_base_function_t *c_input_args) -{ - /* local variables */ - int group_size; - int first_instance=0, flag_offset; - int rc = OMPI_SUCCESS; - int leading_dim, buff_idx, idx; - int *group_list; - int src, comm_src, knt, i, k, j, k_temp1, k_temp2; - int pseudo_root, proxy_root, pseudo_base_adj; - volatile int64_t ready_flag; - int count=input_args->count; - struct ompi_datatype_t* dtype=input_args->dtype; - int root=input_args->root; - int base_adj, base; - int total_peers, my_pow_k; - int probe; - int matched; - int64_t sequence_number=input_args->sequence_num; - mca_bcol_basesmuma_module_t* bcol_module= - (mca_bcol_basesmuma_module_t *)c_input_args->bcol_module; - int my_rank = bcol_module->super.sbgp_partner_module->my_index; - mca_bcol_basesmuma_component_t *cm = &mca_bcol_basesmuma_component; - netpatterns_k_exchange_node_t *exchange_node = - &bcol_module->knomial_allgather_tree; - - int buff_offset = bcol_module->super.hier_scather_offset; - - /* "indirectors" */ - int *list_connected = bcol_module->super.list_n_connected; - int *inv_map = exchange_node->inv_reindex_map; - int *reindex_map = exchange_node->reindex_map; - /*int *reindex_map = exchange_node->reindex_map;*/ - /* stray rank == first rank in the extra set */ - int stray = exchange_node->k_nomial_stray; - - /* tree radix */ - int tree_order = exchange_node->tree_order; - /* tree depth */ - int pow_k = exchange_node->log_tree_order; - /* largest power of k less than or equal to np */ - int cnt = exchange_node->n_largest_pow_tree_order; - - /*fprintf(stderr,"tree order %d pow_k %d stray %d root %d\n",tree_order, pow_k, stray, root);*/ - /* payload structures */ - volatile mca_bcol_basesmuma_payload_t *data_buffs; - volatile char *child_data_pointer; - - /* control structures */ - volatile mca_bcol_basesmuma_header_t *my_ctl_pointer; - volatile mca_bcol_basesmuma_header_t *child_ctl_pointer; - /*volatile mca_bcol_basesmuma_ctl_struct_t* parent_ctl_pointer; */ - - size_t pack_len = 0, dt_size; - void *data_addr = (void *)((unsigned char *)input_args->src_desc->data_addr); - - /* active in the algorithm */ - bool active = true; - -#if 0 - fprintf(stderr,"Entering sm gather root %d \n",root); - fflush(stderr); -#endif - - - /* we will work only on packed data - so compute the length*/ - /* this is the size of my data, this is not gatherv so it's the same - * for all ranks in the communicator. - */ - ompi_datatype_type_size(dtype, &dt_size); - pack_len=count*dt_size; - /* now set the "real" offset */ - buff_offset = buff_offset*pack_len; - - buff_idx = input_args->src_desc->buffer_index; - - /* Get addressing information */ - my_rank = bcol_module->super.sbgp_partner_module->my_index; - /* I have a feeling that I'll need this */ - group_list = bcol_module->super.sbgp_partner_module->group_list; - - group_size = bcol_module->colls_no_user_data.size_of_group; - leading_dim=bcol_module->colls_no_user_data.size_of_group; - idx=SM_ARRAY_INDEX(leading_dim,buff_idx,0); - /*ctl_structs=(mca_bcol_basesmuma_ctl_struct_t **) - bcol_module->colls_with_user_data.ctl_buffs+idx; - */ - data_buffs=(volatile mca_bcol_basesmuma_payload_t *) - bcol_module->colls_with_user_data.data_buffs+idx; - - /* Set pointer to current proc ctrl region */ - /*my_ctl_pointer = ctl_structs[my_rank]; */ - my_ctl_pointer = data_buffs[my_rank].ctl_struct; - - /* setup resource recycling */ - if( my_ctl_pointer->sequence_number < sequence_number ) { - first_instance=1; - } - - if( first_instance ) { - /* Signal arrival */ - my_ctl_pointer->flag = -1; - my_ctl_pointer->gflag = -1; - my_ctl_pointer->index=1; - /* this does not need to use any flag values , so only need to - * set the value for subsequent values that may need this */ - my_ctl_pointer->starting_flag_value=0; - flag_offset=0; - - } else { - /* only one thread at a time will be making progress on this - * collective, so no need to make this atomic */ - my_ctl_pointer->index++; - } - - - /* increment the starting flag by one and return */ - flag_offset = my_ctl_pointer->starting_flag_value; - ready_flag = flag_offset + sequence_number + 1; - my_ctl_pointer->sequence_number = sequence_number; - -/* debug - fprintf(stderr," sequence_number %lld flag_offset %d starting flag val %d\n",sequence_number,flag_offset, my_ctl_pointer->starting_flag_value); - fflush(stderr); - end debug */ - - - /* - * Fan out from root - */ - /* don't need this either */ - /* root is the local leader */ - /* calculate the number of steps necessary for this collective */ - - /* first thing we do is figure out where the root is in our new indexing */ - /* find root in new indexing */ - pseudo_root = inv_map[root]; - /* see if this is larger than the stray */ - if( pseudo_root >= stray ) { - /* then we need to define the proxy root, everyone can do this */ - proxy_root = pseudo_root - cnt; - }else { - proxy_root = pseudo_root; - } - if( EXTRA_NODE == exchange_node->node_type ) { - - /* signal arrival */ - my_ctl_pointer->gflag = ready_flag; - - /* send is done */ - - /* poll for data only if I am the root */ - /* bump the ready flag */ - ready_flag++; - if( root == my_rank ){ - /* poll for data from proxy */ - src = exchange_node->rank_extra_sources_array[0]; - /* get src data buffer */ - child_data_pointer = data_buffs[src].payload; - child_ctl_pointer = data_buffs[src].ctl_struct; - /* in this case, you must block */ - while(!IS_GDATA_READY(child_ctl_pointer,ready_flag,sequence_number)){ - opal_progress(); - } - /* receive the data from the proxy, aka pseudo-root */ - - memcpy((void *) ((unsigned char *) data_addr + buff_offset), - (void *) ((unsigned char *) child_data_pointer+buff_offset) - ,pack_len*group_size); - } - goto FINISHED; - - - } else if( 0 < exchange_node->n_extra_sources ) { - - /* am a proxy, poll for pack_len data from extra */ - src = exchange_node->rank_extra_sources_array[0]; - /* get src data buffer */ - child_data_pointer = data_buffs[src].payload; - child_ctl_pointer = data_buffs[src].ctl_struct; - knt = 0; - for( i = 0; i < src; i++){ - knt += list_connected[i]; - } - /* must block here also */ - while(!IS_GDATA_READY(child_ctl_pointer,ready_flag,sequence_number)){ - opal_progress(); - } - memcpy((void *) ((unsigned char *) data_addr + buff_offset + pack_len*knt), - (void *) ((unsigned char *) child_data_pointer + buff_offset + - pack_len*knt), pack_len*list_connected[src]); - /*fprintf(stderr,"999 proxy received data from %d at offset %d of length %d\n",src, - buff_offset+pack_len*knt,pack_len*list_connected[src]); - */ - } - /* do some figuring */ - - total_peers = 0; - my_pow_k = pow_k; - k_temp1 = tree_order; - k_temp2 = 1; - for( i = 0; i < pow_k; i++) { - /* then find the base */ - /*FIND_BASE(base,my_rank,i+1,tree_order);*/ - FIND_BASE(base,exchange_node->reindex_myid,i+1,tree_order); - /* now find the adjusted base */ - base_adj = base + (base + proxy_root)%k_temp1; - /* ok, now find out WHO is occupying this slot */ - /*pseudo_base_adj = inv_map[base_adj];*/ - pseudo_base_adj = reindex_map[base_adj]; - - if(my_rank == pseudo_base_adj ) { - /* then go ahead and poll for children's data */ - for( j = 0; j < (tree_order - 1); j++ ) { - /* send phase - */ - /* get communication partner */ - - src = exchange_node->rank_exchanges[i][j]; - /*fprintf(stderr,"comm_src %d\n",comm_src);*/ - /* remember, if we have extra ranks, then we won't participate - * with a least one peer. Make a check - */ - if( src < 0 ){ - continue; - }else{ - total_peers++; - } - - - } - } else { - /* announce my arrival */ - my_pow_k = i; - break; - } - - k_temp1 = k_temp1*tree_order; - k_temp2 = k_temp2*tree_order; - } - - if( 0 == my_pow_k ){ - /* signal arrival */ - my_ctl_pointer->gflag = ready_flag; - - goto FINISHED; - } - - - - /* start the k-nomial gather phase */ - /* only "active ranks participate, once a rank has forwarded its data, it becomes inactive */ - knt = 0; - while(active){ - k_temp1 = tree_order; - k_temp2 = 1; - for( i = 0; i < my_pow_k; i++) { - - /* then go ahead and poll for children's data */ - for( j = 0; j < (tree_order - 1); j++ ) { - matched = 0; - /* send phase - */ - /* get communication partner */ - - src = exchange_node->rank_exchanges[i][j]; - /*fprintf(stderr,"comm_src %d\n",comm_src);*/ - /* remember, if we have extra ranks, then we won't participate - * with a least one peer. Make a check - */ - if( src < 0 ){ - continue; - } - - /*fprintf(stderr,"src %d\n",src);*/ - child_data_pointer = data_buffs[src].payload; - child_ctl_pointer = data_buffs[src].ctl_struct; - - /* if child has been marked, then skip */ - if( sequence_number == child_ctl_pointer->mark ){ - continue; - } - - - for( probe = 0; probe < cm->num_to_probe && (0 == matched); probe++){ - if(IS_GDATA_READY(child_ctl_pointer,ready_flag,sequence_number)){ - /* mark the child's pointer */ - child_ctl_pointer->mark = sequence_number; - /* copy the data */ - - memcpy((void *) ((unsigned char *) data_addr + buff_offset + - exchange_node->payload_info[i][j].r_offset*pack_len), - (void *) ((unsigned char *) child_data_pointer + buff_offset + - exchange_node->payload_info[i][j].r_offset*pack_len), - exchange_node->payload_info[i][j].r_len*pack_len); - /* - fprintf(stderr,"999 receiving data from %d at offset %d of length %d\n", - exchange_node->rank_exchanges[i][j], buff_offset + exchange_node->payload_info[i][j].r_offset, - exchange_node->payload_info[i][j].r_len*pack_len); - */ - knt++; - if(knt == total_peers) { - /* this is the trick to break the root out, - * only the root should be able to satisfy this - */ - /* - fprintf(stderr,"hello n_actual is %d \n",knt); - fprintf(stderr,"hello n_actual_exch is %d \n", - exchange_node->n_actual_exchanges); - */ - opal_atomic_wmb (); - my_ctl_pointer->gflag = ready_flag; - - goto LAST_STEP; - } - matched = 1; - }else{ - opal_progress(); - } - } - } - } - - k_temp1 = k_temp1*tree_order; - k_temp2 = k_temp2*tree_order; - } -LAST_STEP: - /* last step, proxies send full data back to the extra ranks */ - if( 0 < exchange_node->n_extra_sources && - root == exchange_node->rank_extra_sources_array[0]) { - /* regardless, I will bump the ready flag and set it in case someone is watching */ - /* announce that data is ready */ - ready_flag++; - my_ctl_pointer->gflag = ready_flag; - } - - -FINISHED: - -/* debug - fprintf(stderr," my_ctl_pointer->index %d n of this type %d %u \n", - my_ctl_pointer->index,c_input_args->n_of_this_type_in_collective,getpid()); - fflush(stderr); - end debug */ - - my_ctl_pointer->starting_flag_value+=1; - - return BCOL_FN_COMPLETE; -} -#endif diff --git a/ompi/mca/bcol/basesmuma/bcol_basesmuma_lmsg_bcast.c b/ompi/mca/bcol/basesmuma/bcol_basesmuma_lmsg_bcast.c deleted file mode 100644 index c985a6889ab..00000000000 --- a/ompi/mca/bcol/basesmuma/bcol_basesmuma_lmsg_bcast.c +++ /dev/null @@ -1,1878 +0,0 @@ -/* - * Copyright (c) 2009-2012 Oak Ridge National Laboratory. All rights reserved. - * Copyright (c) 2009-2012 Mellanox Technologies. All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#include "ompi_config.h" - -#ifdef __PORTALS_AVAIL__ -#define __PORTALS_ENABLE__ - -#include "ompi/constants.h" -#include "ompi/datatype/ompi_datatype.h" -#include "ompi/communicator/communicator.h" - -#include "bcol_basesmuma.h" -#include "bcol_basesmuma_portals.h" -#include "bcol_basesmuma_lmsg_bcast.h" -#include "bcol_basesmuma_utils.h" - - - -/* - * Scatter/Gather Broadcast algorithm - * - * Algorithm highlights: - * - * Uses portals for data transfer - * - * All processes participating in the broadcast are arranged in a - * binmoial tree. - * - * Phase1: Scatter the broadcast data to all the children - * Phase2: All processes in the tree participates in recursive doubling - * algorithm to obtain the missing data. - */ - - -static int completed_scatter = 0; -#if 0 -int bcol_basesmuma_lmsg_scatter_allgather_portals_bcast_old(bcol_function_args_t *input_args, - mca_bcol_base_function_t *c_input_args) -{ - - /* local variables */ - int i; - uint64_t length; - int my_rank, parent_rank, src =-1, matched = 0; - int *src_list = NULL; - int group_size = -1, dummy_group_size; - int first_instance=0; - int rc = OMPI_SUCCESS; - int leading_dim, buff_idx, idx; - int count=input_args->count; - size_t pack_len = 0, dt_size =0 ; - int64_t ready_flag; - int flag_offset; - int pow_2, pow_2_levels; - int src_list_index = -1; - uint64_t fragment_size; /* user buffer size */ - int sg_matchbits = 0; - /* Input argument variables */ - void *my_userbuf = (void*)((unsigned char*)input_args->userbuf); - int64_t sequence_number=input_args->sequence_num; - struct ompi_datatype_t* dtype=input_args->dtype; - - /* Extra source variables */ - bool secondary_root = false; - int partner = -1, extra_partner = -1; - - /* Scatter Allgather offsets */ - uint64_t local_sg_offset = 0, global_sg_offset = 0, partner_offset = 0; - - /* Portals messaging relevant variables */ - mca_bcol_basesmuma_portal_proc_info_t *portals_info; - ptl_handle_eq_t allgather_eq_h; - ptl_event_t allgather_event; - bool blocked_post = false; - bool msg_posted = false; - int total_msg_posts = -1, scatter_posts = -1, allgather_posts = -1, extra_src_posts = -1; - - /* OMPI module and component variables */ - mca_bcol_basesmuma_component_t *cs = &mca_bcol_basesmuma_component; - mca_bcol_basesmuma_module_t *bcol_module = - (mca_bcol_basesmuma_module_t *) c_input_args->bcol_module; - - /* Control structure and payload variables */ - volatile mca_bcol_basesmuma_payload_t *data_buffs; - volatile mca_bcol_basesmuma_header_t *my_ctl_pointer = NULL; - volatile mca_bcol_basesmuma_header_t *parent_ctl_pointer = NULL; - volatile mca_bcol_basesmuma_header_t *partner_ctl_pointer = NULL; - - struct mca_bcol_basesmuma_portal_buf_addr_t *my_lmsg_ctl_pointer = NULL; - struct mca_bcol_basesmuma_portal_buf_addr_t *parent_lmsg_ctl_pointer = NULL; - struct mca_bcol_basesmuma_portal_buf_addr_t *partner_lmsg_ctl_pointer = NULL; - - /* Make sure there userbuffer is not null */ - assert(my_userbuf != NULL); - - /* Get portals info*/ - portals_info = (mca_bcol_basesmuma_portal_proc_info_t*)cs->portals_info; - - /* Get addresing information */ - buff_idx = input_args->src_desc->buffer_index; - group_size = bcol_module->colls_no_user_data.size_of_group; - leading_dim=bcol_module->colls_no_user_data.size_of_group; - my_rank = bcol_module->super.sbgp_partner_module->my_index; - idx=SM_ARRAY_INDEX(leading_dim,buff_idx,0); - - /* calculate the largest power of two that is smaller than - * or equal to the group size - */ - pow_2_levels = pow_sm_k(2,group_size, &(dummy_group_size)); - if( group_size < (1<colls_with_user_data.data_buffs+idx; - - my_ctl_pointer = data_buffs[my_rank].ctl_struct; - my_lmsg_ctl_pointer = (mca_bcol_basesmuma_portal_buf_addr_t*) - data_buffs[my_rank].payload; - - if(my_ctl_pointer->sequence_number < sequence_number) { - first_instance = 1; - } - - if(first_instance) { - my_ctl_pointer->flag = -1; - my_ctl_pointer->index = 1; - - my_ctl_pointer->starting_flag_value = 0; - flag_offset = 0; - - } else { - my_ctl_pointer->index++; - } - - assert( -1 == my_ctl_pointer->flag); - - /* increment the starting flag by one and return */ - flag_offset = my_ctl_pointer->starting_flag_value; - ready_flag = flag_offset + sequence_number + 1; - - my_ctl_pointer->sequence_number = sequence_number; - sg_matchbits = sequence_number ; - - /* Construct my portal buffer address and copy to payload buffer */ - mca_bcol_basesmuma_construct_portal_address(my_lmsg_ctl_pointer, - portals_info->portal_id.nid, - portals_info->portal_id.pid, - sg_matchbits, - bcol_module->super.sbgp_partner_module->group_comm->c_contextid); - - my_lmsg_ctl_pointer->userbuf = my_userbuf; - my_lmsg_ctl_pointer->userbuf_length = fragment_size; - - - /* - * If I am the root of bcast, scatter the data to my children - */ - if (input_args->root_flag) { - BASESMUMA_VERBOSE(10,("I am the root of the data")); - my_lmsg_ctl_pointer->offset = 0; - my_lmsg_ctl_pointer->n_sends = pow_2_levels; - my_lmsg_ctl_pointer->length = fragment_size; - - rc = PtlEQAlloc(((mca_bcol_basesmuma_portal_proc_info_t*) - cs->portals_info)->ni_h, MAX_PORTAL_EVENTS_IN_Q, - PTL_EQ_HANDLER_NONE, &allgather_eq_h); - - if (rc != PTL_OK) { - BASESMUMA_VERBOSE(10,( "PtlEQAlloc() failed: %d \n",rc)); - goto Release; - } - - /* Compute number of posts required - * We post the data buffer for both scatter and allgather phase at once so to avoid - * posting overhead - */ - if (my_rank >= pow_2) { - /* I am root and my rank is greater than pow_2, I will hand - * over to rank (that is < pow_2) to act as secondary root - */ - total_msg_posts = 1; - } - else { - - extra_src_posts = (my_rank + pow_2 < group_size ) ? 1: 0; - scatter_posts = my_lmsg_ctl_pointer->n_sends; - allgather_posts = pow_2_levels - 1; - - total_msg_posts = scatter_posts + allgather_posts + extra_src_posts ; - } - - mca_bcol_basesmuma_portals_post_msg(cs, my_lmsg_ctl_pointer, - my_userbuf, fragment_size, allgather_eq_h, - total_msg_posts, - blocked_post, - PTL_MD_EVENT_START_DISABLE| PTL_MD_EVENT_END_DISABLE | PTL_MD_OP_GET | PTL_MD_MANAGE_REMOTE | PTL_MD_TRUNCATE | PTL_MD_EVENT_AUTO_UNLINK_ENABLE); - msg_posted = true ; - /* important that these be set before my children - * see the ready flag raised - */ - opal_atomic_wmb (); - my_ctl_pointer->flag = ready_flag; - - /* Wait for my scatter partner */ - if (my_rank >= pow_2) { - int scatter_partner = -1; - volatile mca_bcol_basesmuma_header_t *scatter_partner_ctl_pointer = NULL; - - scatter_partner = my_rank - pow_2; - scatter_partner_ctl_pointer = - data_buffs[scatter_partner].ctl_struct; - - while(!IS_SG_DATA_READY(scatter_partner_ctl_pointer, ready_flag, - sequence_number)){ - opal_progress(); - } - - goto Release; - } - else { - wait_for_peers(my_rank, my_lmsg_ctl_pointer->n_sends, data_buffs, - ready_flag, sequence_number); - } - - goto Allgather; - } - - -Extra : - if( my_rank >= pow_2 ) { - parent_rank = my_rank & (pow_2-1); - parent_ctl_pointer = data_buffs[parent_rank].ctl_struct; - parent_lmsg_ctl_pointer = - (mca_bcol_basesmuma_portal_buf_addr_t*)data_buffs[parent_rank].payload; - - ready_flag = ready_flag + pow_2_levels; - - while(!IS_SG_DATA_READY(parent_ctl_pointer, ready_flag, sequence_number)) { - - opal_progress(); - - } - - - mca_bcol_basesmuma_portals_get_msg_fragment_no_eq_h(cs, my_lmsg_ctl_pointer, - parent_lmsg_ctl_pointer, 0, - 0, fragment_size); - - my_ctl_pointer->flag = ready_flag; - - goto Release; - } - -Scatter: - - /* I am not root of bcast compute the list of possible - * where I will receive bcast data from. - */ - src_list = (int *) malloc(sizeof(int) * (pow_2_levels + 1)); - for( i = 0; i < pow_2_levels; i++) { - src_list[i] = my_rank ^ (1< pow_2 */ - if ((my_rank + pow_2) < group_size) { - src_list[i] = my_rank + pow_2; - } else { - src_list[i] = -1; - } - -Probe: - - /* If I am not the root, then poll on possible "senders'" control structs */ - /* For portals we block for now */ - while (!matched) { - /* Shared memory iprobe */ - SG_LARGE_MSG_PROBE(src_list, pow_2_levels + 1, - src_list_index, matched, src, data_buffs, parent_ctl_pointer, - parent_lmsg_ctl_pointer,ready_flag, sequence_number); - } - - /* If I am a secondary root - * Secondary root acts as root of bcast data when real root of data - * is process with group rank greater than pow_2 */ - if ((matched) && (src == pow_2 + my_rank)) { - volatile mca_bcol_basesmuma_header_t *extra_src_ctl_pointer = NULL; - struct mca_bcol_basesmuma_portal_buf_addr_t *extra_src_lmsg_ctl_pointer = NULL; - - secondary_root = true; - BASESMUMA_VERBOSE(10,("I am the secondary root for the data")); - my_lmsg_ctl_pointer->offset = 0; - my_lmsg_ctl_pointer->n_sends = pow_2_levels; - my_lmsg_ctl_pointer->length = fragment_size; - - extra_src_ctl_pointer = data_buffs[src].ctl_struct; - extra_src_lmsg_ctl_pointer = (mca_bcol_basesmuma_portal_buf_addr_t*)data_buffs[src].payload; - - /* create an event queue for the incoming buffer */ - rc = PtlEQAlloc(((mca_bcol_basesmuma_portal_proc_info_t*) - cs->portals_info)->ni_h, MAX_PORTAL_EVENTS_IN_Q, - PTL_EQ_HANDLER_NONE, &allgather_eq_h); - - if (rc != PTL_OK) { - BASESMUMA_VERBOSE(10,( "PtlEQAlloc() failed: %d \n",rc)); - goto Release; - } - - mca_bcol_basesmuma_portals_get_msg_fragment_no_eq_h(cs, my_lmsg_ctl_pointer, - extra_src_lmsg_ctl_pointer, 0, - 0, fragment_size); - - - extra_src_posts = 0; - scatter_posts = my_lmsg_ctl_pointer->n_sends; - allgather_posts = pow_2_levels - 1; - - total_msg_posts = scatter_posts + allgather_posts + extra_src_posts ; - - mca_bcol_basesmuma_portals_post_msg(cs, my_lmsg_ctl_pointer, - my_userbuf, fragment_size, allgather_eq_h, - total_msg_posts, - blocked_post, - PTL_MD_EVENT_START_DISABLE| PTL_MD_EVENT_END_DISABLE | PTL_MD_OP_GET | PTL_MD_MANAGE_REMOTE | PTL_MD_TRUNCATE | PTL_MD_EVENT_AUTO_UNLINK_ENABLE); - msg_posted = true ; - /* important that these be set before my children - * see the ready flag raised - */ - opal_atomic_wmb (); - my_ctl_pointer->flag = ready_flag; - - wait_for_peers(my_rank, my_lmsg_ctl_pointer->n_sends, data_buffs, - ready_flag, sequence_number); - goto Allgather; - } - - /* Verify whether we got the right - * source of the data, by computing the source's intended - * destinations - */ - for( i = 0; i < parent_lmsg_ctl_pointer->n_sends; i++) { - uint64_t local_offset = 0; - uint64_t remote_offset = 0; - - BASESMUMA_VERBOSE(5,("%d found it from %d \n",my_rank,src)); - - if( my_rank == (src^(1<n_sends = i; - - /* Am I source for other process during scatter phase */ - if ( i > 0) { - - /* compute the size of the chunk to copy */ - length = (parent_lmsg_ctl_pointer->length)/ - (1<<(parent_lmsg_ctl_pointer->n_sends - my_lmsg_ctl_pointer->n_sends)); - my_lmsg_ctl_pointer->length = length; - my_lmsg_ctl_pointer->offset = - parent_lmsg_ctl_pointer->offset + length; - - - local_offset = my_lmsg_ctl_pointer->offset; - remote_offset = parent_lmsg_ctl_pointer->offset + length; - - mca_bcol_basesmuma_portals_get_msg_fragment_no_eq_h(cs, my_lmsg_ctl_pointer, - parent_lmsg_ctl_pointer,local_offset, - remote_offset, length); - rc = PtlEQAlloc(((mca_bcol_basesmuma_portal_proc_info_t*) - cs->portals_info)->ni_h, MAX_PORTAL_EVENTS_IN_Q, - PTL_EQ_HANDLER_NONE, - &allgather_eq_h); - - if (rc != PTL_OK) { - BASESMUMA_VERBOSE(10,( "PtlEQAlloc() failed: %d \n",rc)); - goto Release; - } - - /* Now post the message for other children to read */ - extra_src_posts = (my_rank + pow_2 < group_size ) ? 1: 0; - scatter_posts = my_lmsg_ctl_pointer->n_sends; - allgather_posts = pow_2_levels - 1; - - total_msg_posts = scatter_posts + allgather_posts + extra_src_posts ; - - - mca_bcol_basesmuma_portals_post_msg(cs, my_lmsg_ctl_pointer, - my_userbuf, my_lmsg_ctl_pointer->userbuf_length, - allgather_eq_h, - total_msg_posts, - blocked_post, - PTL_MD_EVENT_START_DISABLE| PTL_MD_EVENT_END_DISABLE - | PTL_MD_OP_GET | PTL_MD_MANAGE_REMOTE | PTL_MD_TRUNCATE | PTL_MD_EVENT_AUTO_UNLINK_ENABLE - ); - msg_posted = true; - /* set the memory barrier to ensure completion - * and signal I am done getting scatter data*/ - opal_atomic_wmb (); - my_ctl_pointer->flag = ready_flag; - - wait_for_peers(my_rank, my_lmsg_ctl_pointer->n_sends, data_buffs, - ready_flag, sequence_number); - - } else { - /* takes care of first level recurssive double */ - length = parent_lmsg_ctl_pointer->length/ - (1<<(parent_lmsg_ctl_pointer->n_sends - 1)); - my_lmsg_ctl_pointer->length = length; - my_lmsg_ctl_pointer->offset = parent_lmsg_ctl_pointer->offset; - - local_offset = my_lmsg_ctl_pointer->offset; - remote_offset = my_lmsg_ctl_pointer->offset; - - - while(!IS_SG_DATA_READY(parent_ctl_pointer, ready_flag, sequence_number)) { - opal_progress(); - } - - mca_bcol_basesmuma_portals_get_msg_fragment_no_eq_h(cs, my_lmsg_ctl_pointer, - parent_lmsg_ctl_pointer,local_offset, - remote_offset, length); - - /* signal that I am done reading data from parent */ - opal_atomic_wmb (); - my_ctl_pointer->flag = ready_flag; - } - - /* time for allgather phase */ - input_args->status = ALLGATHER; - - BASESMUMA_VERBOSE(5,("Completed %d found it from %d \n",my_rank,src)); - - while(ready_flag > parent_ctl_pointer->flag); - - goto Allgather; - } - } - - { - /* this is not who we are looking for, - * mark as false positive so we don't - * poll here again - */ - src_list[src_list_index] = -1; - matched = 0; - goto Probe; - } - -Allgather: - - BASESMUMA_VERBOSE(5,(" %d Completed Scatter %d times \n", my_rank, completed_scatter)); - - /* zip it back up - we have already taken care of first level */ - global_sg_offset = my_lmsg_ctl_pointer->offset; - - /* first level of zip up */ - length = 2 * fragment_size/pow_2; - - - if (!msg_posted) { - rc = PtlEQAlloc(((mca_bcol_basesmuma_portal_proc_info_t*) - cs->portals_info)->ni_h, MAX_PORTAL_EVENTS_IN_Q, - PTL_EQ_HANDLER_NONE, &allgather_eq_h); - - /* Posting for all phases of recursive doubling */ - extra_src_posts = (my_rank + pow_2 < group_size ) ? 1: 0; - allgather_posts = pow_2_levels - 1; - total_msg_posts = allgather_posts + extra_src_posts ; - - - mca_bcol_basesmuma_portals_post_msg(cs, my_lmsg_ctl_pointer, - my_userbuf, my_lmsg_ctl_pointer->userbuf_length, - allgather_eq_h, total_msg_posts , blocked_post, - PTL_MD_EVENT_START_DISABLE| PTL_MD_EVENT_END_DISABLE - | PTL_MD_OP_GET | PTL_MD_MANAGE_REMOTE | PTL_MD_TRUNCATE | PTL_MD_EVENT_AUTO_UNLINK_ENABLE - ); - msg_posted = true; - } - - - ready_flag++; - opal_atomic_wmb (); - my_ctl_pointer->flag = ready_flag; - - /* - * Recursive doubling allgather implementation - */ - for( i = 1; i < pow_2_levels; i++) { - /* get my partner for this level */ - partner = my_rank^(1<flag >= ready_flag); - - if (partner_lmsg_ctl_pointer->offset < my_lmsg_ctl_pointer->offset) { - global_sg_offset -= length; - local_sg_offset = global_sg_offset; - } else { - local_sg_offset = global_sg_offset + length; - } - - - BASESMUMA_VERBOSE(10,("Allgather Phase: Get message from process %d, length %d", partner, length)); - mca_bcol_basesmuma_portals_get_msg_fragment_no_eq_h(cs, my_lmsg_ctl_pointer, - partner_lmsg_ctl_pointer,local_sg_offset, - local_sg_offset, length); - - ready_flag++; - opal_atomic_wmb (); - my_ctl_pointer->flag = ready_flag; - - /* Block until partner completed this level of recursive-doubling stage */ - while(!IS_SG_DATA_READY(partner_ctl_pointer, ready_flag, sequence_number)) { - opal_progress(); - } - - /* - * Compute length for next recursive doubling phase - */ - length *= 2; - } - - - /* If I am source for non-power 2 children wait for them */ - /* If I am secondary root then my partner would be real root - * so no need for exchange of data with the extra partner */ - extra_partner = my_rank + pow_2 ; - if ((extra_partner < group_size) && (!secondary_root)) { - volatile mca_bcol_basesmuma_header_t *extra_partner_ctl_pointer = NULL; - - extra_partner_ctl_pointer = data_buffs[extra_partner].ctl_struct; - /* Block until extra partner has copied data */ - while(!IS_SG_DATA_READY(extra_partner_ctl_pointer, ready_flag, sequence_number)) { - opal_progress(); - } - - } - -Release: - - /* free the event queue */ - rc = PtlEQFree(allgather_eq_h); - if (rc != PTL_OK) { - BASESMUMA_VERBOSE(10,("PtlEQFree() failed: %d )\n",rc)); - } - - my_ctl_pointer->starting_flag_value++; - input_args->status = FINISHED; - - return BCOL_FN_COMPLETE; - -} -#endif - -/* - * Blocking Portals Scatter Allgather - * - * - * - * - * - */ - -int bcol_basesmuma_lmsg_scatter_allgather_portals_bcast(bcol_function_args_t *input_args, - mca_bcol_base_function_t *c_input_args) -{ - - /* local variables */ - int i; - uint64_t length; - int my_rank, parent_rank, src =-1, matched = 0; - int *src_list = NULL; - int group_size = -1, dummy_group_size; - int first_instance=0; - int rc = OMPI_SUCCESS; - int leading_dim, buff_idx, idx; - int count=input_args->count; - size_t pack_len = 0, dt_size =0 ; - volatile int8_t ready_flag; - int flag_offset; - int pow_2, pow_2_levels; - int src_list_index = -1; - uint64_t fragment_size; /* user buffer size */ - int sg_matchbits; - - /* Input argument variables */ - void *my_userbuf = (void*)((unsigned char*)input_args->userbuf); - int64_t sequence_number=input_args->sequence_num; - struct ompi_datatype_t* dtype=input_args->dtype; - - /* Extra source variables */ - bool secondary_root = false; - int partner = -1, extra_partner = -1; - - /* Scatter Allgather offsets */ - uint64_t local_sg_offset = 0, global_sg_offset = 0, partner_offset = 0; - - /* Portals messaging relevant variables */ - mca_bcol_basesmuma_portal_proc_info_t *portals_info; - ptl_handle_eq_t allgather_eq_h; - ptl_event_t allgather_event; - bool blocked_post = false; - bool msg_posted = false; - int total_msg_posts = -1, scatter_posts = -1, allgather_posts = -1, extra_src_posts = -1; - - /* OMPI module and component variables */ - mca_bcol_basesmuma_component_t *cs = &mca_bcol_basesmuma_component; - mca_bcol_basesmuma_module_t *bcol_module = - (mca_bcol_basesmuma_module_t *) c_input_args->bcol_module; - - /* Control structure and payload variables */ - volatile mca_bcol_basesmuma_ctl_struct_t **ctl_structs; - volatile mca_bcol_basesmuma_ctl_struct_t *my_ctl_pointer = NULL; - volatile mca_bcol_basesmuma_ctl_struct_t *parent_ctl_pointer = NULL; /* binomial fanout */ - volatile mca_bcol_basesmuma_ctl_struct_t *partner_ctl_pointer = NULL; /* recursive double */ - - /* Make sure there userbuffer is not null */ - assert(my_userbuf != NULL); - - /* Get portals info*/ - portals_info = (mca_bcol_basesmuma_portal_proc_info_t*)cs->portals_info; - - /* Get addresing information */ - buff_idx = input_args->src_desc->buffer_index; - group_size = bcol_module->colls_no_user_data.size_of_group; - leading_dim=bcol_module->colls_no_user_data.size_of_group; - my_rank = bcol_module->super.sbgp_partner_module->my_index; - idx=SM_ARRAY_INDEX(leading_dim,buff_idx,0); - - /* calculate the largest power of two that is smaller than - * or equal to the group size - */ - pow_2_levels = pow_sm_k(2,group_size, &(dummy_group_size)); - if( group_size < (1<colls_with_user_data.ctl_buffs+idx; - - - my_ctl_pointer = ctl_structs[my_rank]; - if(my_ctl_pointer->sequence_number < sequence_number) { - first_instance = 1; - } - - if(first_instance) { - for( i = 0; i < NUM_SIGNAL_FLAGS; i++){ - my_ctl_pointer->flags[i] = -1; - } - my_ctl_pointer->index = 1; - - my_ctl_pointer->starting_flag_value = 0; - flag_offset = 0; - - } else { - my_ctl_pointer->index++; - } - - - /* increment the starting flag by one and return */ - flag_offset = my_ctl_pointer->starting_flag_value; - /*ready_flag = flag_offset + sequence_number + 1;*/ - ready_flag = flag_offset + 1; - - my_ctl_pointer->sequence_number = sequence_number; - sg_matchbits = sequence_number ; - - /* Construct my portal buffer address and copy to payload buffer */ - mca_bcol_basesmuma_construct_portal_address(&my_ctl_pointer->portals_buf_addr, - portals_info->portal_id.nid, - portals_info->portal_id.pid, - sg_matchbits, - bcol_module->super.sbgp_partner_module->group_comm->c_contextid); - - my_ctl_pointer->portals_buf_addr.userbuf = my_userbuf; - my_ctl_pointer->portals_buf_addr.userbuf_length = fragment_size; - - - if (input_args->root_flag) { - my_ctl_pointer->offset = 0; - my_ctl_pointer->n_sends = pow_2_levels; - my_ctl_pointer->length = fragment_size; - - rc = PtlEQAlloc(((mca_bcol_basesmuma_portal_proc_info_t*) - cs->portals_info)->ni_h, MAX_PORTAL_EVENTS_IN_Q, - PTL_EQ_HANDLER_NONE, &allgather_eq_h); - - if (rc != PTL_OK) { - BASESMUMA_VERBOSE(10,( "PtlEQAlloc() failed: %d \n",rc)); - goto Release; - } - - /* Compute number of posts required */ - if (my_rank >= pow_2) { - /* I am root and my rank is greater than pow_2, I will hand - * over to rank (that is < pow_2) to act as secondary root - */ - total_msg_posts = 1; - } - else { - - extra_src_posts = (my_rank + pow_2 < group_size ) ? 1: 0; - scatter_posts = my_ctl_pointer->n_sends; - allgather_posts = pow_2_levels - 1; - - total_msg_posts = scatter_posts + allgather_posts + extra_src_posts ; - } - - mca_bcol_basesmuma_portals_post_msg(cs, - &my_ctl_pointer->portals_buf_addr, - my_userbuf, fragment_size, allgather_eq_h, - total_msg_posts, - blocked_post, - PTL_MD_EVENT_START_DISABLE| PTL_MD_EVENT_END_DISABLE | PTL_MD_OP_GET | PTL_MD_MANAGE_REMOTE | PTL_MD_TRUNCATE | PTL_MD_EVENT_AUTO_UNLINK_ENABLE); - msg_posted = true ; - - /* important that these be set before my children - * see the ready flag raised - */ - opal_atomic_wmb (); - my_ctl_pointer->flags[BCAST_FLAGS] = ready_flag; - BASESMUMA_VERBOSE(1,("I am the root(ctl_pointer %x) of the data flag value %d",my_ctl_pointer, my_ctl_pointer->flag)); - /* Wait for my scatter partner */ - if (my_rank >= pow_2) { - int scatter_partner = -1; - volatile mca_bcol_basesmuma_ctl_struct_t *scatter_partner_ctl_pointer = NULL; - - scatter_partner = my_rank - pow_2; - scatter_partner_ctl_pointer = - ctl_structs[scatter_partner]; - - while(!IS_SG_DATA_READY(scatter_partner_ctl_pointer, ready_flag, - sequence_number)){ -SCATTER_WAIT_FOR_EXTRA: - opal_progress(); - } - - goto Release; - } - else { - - wait_for_peers_nb(my_rank, my_ctl_pointer->n_sends, ctl_structs, - ready_flag, sequence_number); - } - - goto Allgather; - } - - -Extra : - if( my_rank >= pow_2 ) { - parent_rank = my_rank & (pow_2-1); - parent_ctl_pointer = ctl_structs[parent_rank]; - - ready_flag = ready_flag + pow_2_levels; - - while(!IS_SG_DATA_READY(parent_ctl_pointer, ready_flag, sequence_number)) { - - opal_progress(); - - } - - - mca_bcol_basesmuma_portals_get_msg_fragment_no_eq_h(cs, &my_ctl_pointer->portals_buf_addr, - &parent_ctl_pointer->portals_buf_addr, 0, - 0, fragment_size); - - my_ctl_pointer->flags[BCAST_FLAG] = ready_flag; - - goto Release; - } - -Scatter: - - /* compute the list of possible sources */ - src_list = (int *) malloc(sizeof(int) * (pow_2_levels + 1)); - for( i = 0; i < pow_2_levels; i++) { - src_list[i] = my_rank ^ (1< pow_2 */ - if ((my_rank + pow_2) < group_size) { - src_list[i] = my_rank + pow_2; - } else { - src_list[i] = -1; - } - -Probe: - - /* If I am not the root, then poll on possible "senders'" control structs */ - /* For portals we block for now */ - while (!matched) { - /* Shared memory iprobe */ - SG_LARGE_MSG_NB_PROBE(src_list, pow_2_levels + 1, - src_list_index, matched, src, ctl_structs, - parent_ctl_pointer, ready_flag, sequence_number); - } - - BASESMUMA_VERBOSE(1,("Scatter : Im non-root match received")); - /* If I am a secondary root */ - if ((matched) && (src == pow_2 + my_rank)) { - volatile mca_bcol_basesmuma_ctl_struct_t *extra_src_ctl_pointer = NULL; - - secondary_root = true; - BASESMUMA_VERBOSE(10,("I am the secondary root for the data")); - my_ctl_pointer->offset = 0; - my_ctl_pointer->n_sends = pow_2_levels; - my_ctl_pointer->length = fragment_size; - - extra_src_ctl_pointer = ctl_structs[src]; - - /* create an event queue for the incoming buffer */ - rc = PtlEQAlloc(((mca_bcol_basesmuma_portal_proc_info_t*) - cs->portals_info)->ni_h, MAX_PORTAL_EVENTS_IN_Q, - PTL_EQ_HANDLER_NONE, &allgather_eq_h); - - if (rc != PTL_OK) { - BASESMUMA_VERBOSE(10,( "PtlEQAlloc() failed: %d \n",rc)); - goto Release; - } - - mca_bcol_basesmuma_portals_get_msg_fragment_no_eq_h(cs, - &my_ctl_pointer->portals_buf_addr, - &extra_src_ctl_pointer->portals_buf_addr, 0, - 0, fragment_size); - - - extra_src_posts = 0; - scatter_posts = my_ctl_pointer->n_sends; - allgather_posts = pow_2_levels - 1; - - total_msg_posts = scatter_posts + allgather_posts + extra_src_posts ; - - mca_bcol_basesmuma_portals_post_msg(cs, - &my_ctl_pointer->portals_buf_addr, - my_userbuf, fragment_size, allgather_eq_h, - total_msg_posts, - blocked_post, - PTL_MD_EVENT_START_DISABLE| PTL_MD_EVENT_END_DISABLE | PTL_MD_OP_GET - | PTL_MD_MANAGE_REMOTE | PTL_MD_TRUNCATE | PTL_MD_EVENT_AUTO_UNLINK_ENABLE); - msg_posted = true ; - - /* important that these be set before my children - * see the ready flag raised - */ - opal_atomic_wmb (); - my_ctl_pointer->flags[BCAST_FLAG] = ready_flag; - - wait_for_peers_nb(my_rank, my_ctl_pointer->n_sends, ctl_structs, - ready_flag, sequence_number); - goto Allgather; - } - - /* we need to see whether this is really - * who we are looking for - */ - for( i = 0; i < parent_ctl_pointer->n_sends; i++) { - uint64_t local_offset = 0; - uint64_t remote_offset = 0; - - BASESMUMA_VERBOSE(1,("%d found it from %d \n",my_rank,src)); - - if( my_rank == (src^(1<n_sends = i; - - /* Am I source for other process during scatter phase */ - if ( i > 0) { - - /* compute the size of the chunk to copy */ - length = (parent_ctl_pointer->length)/ - (1<<(parent_ctl_pointer->n_sends - my_ctl_pointer->n_sends)); - my_ctl_pointer->length = length; - my_ctl_pointer->offset = - parent_ctl_pointer->offset + length; - - - local_offset = my_ctl_pointer->offset; - remote_offset = parent_ctl_pointer->offset + length; - - mca_bcol_basesmuma_portals_get_msg_fragment_no_eq_h(cs, &my_ctl_pointer->portals_buf_addr, - &parent_ctl_pointer->portals_buf_addr,local_offset, - remote_offset, length); - rc = PtlEQAlloc(((mca_bcol_basesmuma_portal_proc_info_t*) - cs->portals_info)->ni_h, MAX_PORTAL_EVENTS_IN_Q, - PTL_EQ_HANDLER_NONE, - &allgather_eq_h); - - if (rc != PTL_OK) { - BASESMUMA_VERBOSE(10,( "PtlEQAlloc() failed: %d \n",rc)); - goto Release; - } - - /* Now post the message for other children to read */ - extra_src_posts = (my_rank + pow_2 < group_size ) ? 1: 0; - scatter_posts = my_ctl_pointer->n_sends; - allgather_posts = pow_2_levels - 1; - - total_msg_posts = scatter_posts + allgather_posts + extra_src_posts ; - - - mca_bcol_basesmuma_portals_post_msg(cs, &my_ctl_pointer->portals_buf_addr, - my_userbuf, my_ctl_pointer->portals_buf_addr.userbuf_length, - allgather_eq_h, - total_msg_posts, - blocked_post, - PTL_MD_EVENT_START_DISABLE| PTL_MD_EVENT_END_DISABLE - | PTL_MD_OP_GET | PTL_MD_MANAGE_REMOTE | PTL_MD_TRUNCATE | PTL_MD_EVENT_AUTO_UNLINK_ENABLE - ); - msg_posted = true; - /* set the memory barrier to ensure completion */ - opal_atomic_wmb (); - /* signal that I am done */ - my_ctl_pointer->flags[BCAST_FLAG] = ready_flag; - - wait_for_peers_nb(my_rank, my_ctl_pointer->n_sends, ctl_structs, - ready_flag, sequence_number); - - } else { - /* takes care of first level recurssive double */ - length = parent_ctl_pointer->length/ - (1<<(parent_ctl_pointer->n_sends - 1)); - my_ctl_pointer->length = length; - my_ctl_pointer->offset = parent_ctl_pointer->offset; - - local_offset = my_ctl_pointer->offset; - remote_offset = my_ctl_pointer->offset; - - - while(!IS_SG_DATA_READY(parent_ctl_pointer, ready_flag, sequence_number)) { - opal_progress(); - } - - mca_bcol_basesmuma_portals_get_msg_fragment_no_eq_h(cs, - &my_ctl_pointer->portals_buf_addr, - &parent_ctl_pointer->portals_buf_addr, local_offset, - remote_offset, length); - - /* signal that I am done reading data from parent */ - opal_atomic_wmb (); - my_ctl_pointer->flags[BCAST_FLAG] = ready_flag; - } - - /* time for allgather phase */ - input_args->status = ALLGATHER; - - BASESMUMA_VERBOSE(5,("Completed %d found it from %d \n",my_rank,src)); - - while(ready_flag > parent_ctl_pointer->flags[BCAST_FLAG]); - - goto Allgather; - } - } - - { - /* this is not who we are looking for, - * mark as false positive so we don't - * poll here again - */ - src_list[src_list_index] = -1; - matched = 0; - goto Probe; - } - -Allgather: - - BASESMUMA_VERBOSE(5,(" %d Completed Scatter %d times \n", my_rank, completed_scatter)); - - /* zip it back up - we have already taken care of first level */ - global_sg_offset = my_ctl_pointer->offset; - - /* first level of zip up */ - length = 2 * fragment_size/pow_2; - - - if (!msg_posted) { - rc = PtlEQAlloc(((mca_bcol_basesmuma_portal_proc_info_t*) - cs->portals_info)->ni_h, MAX_PORTAL_EVENTS_IN_Q, - PTL_EQ_HANDLER_NONE, &allgather_eq_h); - - /* Posting for all phases of recursive doubling */ - extra_src_posts = (my_rank + pow_2 < group_size ) ? 1: 0; - allgather_posts = pow_2_levels - 1; - total_msg_posts = allgather_posts + extra_src_posts ; - - - mca_bcol_basesmuma_portals_post_msg(cs, &my_ctl_pointer->portals_buf_addr, - my_userbuf, my_ctl_pointer->portals_buf_addr.userbuf_length, - allgather_eq_h, total_msg_posts , blocked_post, - PTL_MD_EVENT_START_DISABLE| PTL_MD_EVENT_END_DISABLE - | PTL_MD_OP_GET | PTL_MD_MANAGE_REMOTE | PTL_MD_TRUNCATE | PTL_MD_EVENT_AUTO_UNLINK_ENABLE - ); - msg_posted = true; - } - - ready_flag++; - opal_atomic_wmb (); - my_ctl_pointer->flags[BCAST_FLAG] = ready_flag; - - for( i = 1; i < pow_2_levels; i++) { - /* get my partner for this level */ - partner = my_rank^(1<flags[BCAST_FLAG] >= ready_flag); - - if (partner_ctl_pointer->offset < my_ctl_pointer->offset) { - global_sg_offset -= length; - local_sg_offset = global_sg_offset; - } else { - local_sg_offset = global_sg_offset + length; - } - - - BASESMUMA_VERBOSE(10,("Allgather Phase: Get message from process %d, length %d", partner, length)); - mca_bcol_basesmuma_portals_get_msg_fragment_no_eq_h(cs, - &my_ctl_pointer->portals_buf_addr, - &partner_ctl_pointer->portals_buf_addr,local_sg_offset, - local_sg_offset, length); - - ready_flag++; - opal_atomic_wmb (); - my_ctl_pointer->flags[BCAST_FLAG] = ready_flag; - - /* Block until partner is at this level of recursive-doubling stage */ - while(!IS_SG_DATA_READY(partner_ctl_pointer, ready_flag, sequence_number)) { - opal_progress(); - } - - /* double the length */ - length *= 2; - } - - - /* If I am source for non-power 2 children wait for them */ - /* If I am secondary root then my partner would be real root - * so no need for exchange of data with the extra partner */ - extra_partner = my_rank + pow_2 ; - if ((extra_partner < group_size) && (!secondary_root)) { - volatile mca_bcol_basesmuma_ctl_struct_t *extra_partner_ctl_pointer = NULL; - - extra_partner_ctl_pointer = ctl_structs[extra_partner]; - /* Block until extra partner has copied data */ - while(!IS_SG_DATA_READY(extra_partner_ctl_pointer, ready_flag, sequence_number)) { - opal_progress(); - } - - } - -Release: - - /* free the event queue */ - rc = PtlEQFree(allgather_eq_h); - if (rc != PTL_OK) { - BASESMUMA_VERBOSE(10,("PtlEQFree() failed: %d )\n",rc)); - } - - my_ctl_pointer->starting_flag_value++; - input_args->status = FINISHED; - - return BCOL_FN_COMPLETE; - -} - - -/* - * static sg_state_t *sg_state = NULL; - */ - -int bcol_basesmuma_lmsg_scatter_allgather_portals_nb_bcast(bcol_function_args_t *input_args, - mca_bcol_base_function_t *c_input_args) -{ - int i; - mca_bcol_basesmuma_portal_proc_info_t *portals_info; - int dummy_group_size; - int rc = OMPI_SUCCESS; - int buff_idx; - int count=input_args->count; - size_t pack_len = 0, dt_size =0 ; - struct ompi_datatype_t* dtype=input_args->dtype; - int completed_posts = 0; - sg_state_t *sg_state = NULL; - mca_bcol_basesmuma_module_t *bcol_module = NULL; - int extra_src_posts = -1,allgather_posts = -1, total_msg_posts = -1; - - bcol_module = (mca_bcol_basesmuma_module_t *) c_input_args->bcol_module; - /* - sg_state = (sg_state_t*)bcol_module->sg_state; - */ - sg_state = (sg_state_t*)&(bcol_module->sg_state); - /* Re-entering the algorithm */ - switch (sg_state->phase) { - case PROBE: - if (input_args->root_flag) { - /* I became a root for this group */ - sg_state->phase = START; - goto Start; - } - goto Probe; - break; - - case SCATTER_ROOT_WAIT: - goto Scatter_root_wait; - - case SCATTER_EXTRA_ROOT_WAIT: - goto Scatter_extra_root_wait; - - case SCATTER_PARENT_WAIT: - goto Scatter_parent_wait; - - default: - break; - } - - sg_state->phase = INIT; - - BASESMUMA_VERBOSE(1,("Im entering portals_nb_bcast Unknown root ")); - /* Allocate space for algorithm state */ - /* - sg_state = (sg_state_t *) malloc(sizeof(sg_state_t)); - bcol_module->sg_state = (void *)sg_state; - - assert(NULL != sg_state); - */ - - sg_state->secondary_root = false; - sg_state->msg_posted = false; - sg_state->matched = 0; - sg_state->phase = SCATTER; - /* Copy input args to local variables */ - sg_state->my_userbuf = (void*)((unsigned char*)input_args->userbuf); - assert(sg_state->my_userbuf != NULL); - sg_state->sequence_number=input_args->sequence_num; - sg_state->cs = &mca_bcol_basesmuma_component; - sg_state->bcol_module = (mca_bcol_basesmuma_module_t *) c_input_args->bcol_module; - /* Should this be buffer index (ML) or control buffer index ? */ - buff_idx = input_args->src_desc->buffer_index; - - /* Initialize SM group info used for control signaling */ - init_sm_group_info(sg_state, buff_idx); - - /* calculate the largest power of two that is smaller than - * or equal to the group size - */ - sg_state->pow_2_levels = pow_sm_k(2, sg_state->group_size, &(dummy_group_size)); - if( sg_state->group_size < (1 << sg_state->pow_2_levels)) { - sg_state->pow_2_levels--; - } - /* power-of-two group size */ - sg_state->pow_2 = 1 << sg_state->pow_2_levels; - - - /* we will work only on packed data - so compute the length*/ - ompi_datatype_type_size(dtype, &dt_size); - sg_state->fragment_size = count*dt_size; - - - /* Init portals scatter allgather info */ - rc = init_sm_portals_sg_info(sg_state); - - if (rc != OMPI_SUCCESS) { - goto Release; - } - -Start : -Extra : - /* - * My rank > pow2 groupsize - */ - if( sg_state->my_rank >= sg_state->pow_2 ) { - - if (input_args->root_flag){ - - rc = sm_portals_extra_root_scatter(sg_state); - if (rc != OMPI_SUCCESS) { - goto Release; - } - - } else { - /* - * Wait for my partner to receive bcast data, and copy from it - */ - int extra_parent_rank; - volatile mca_bcol_basesmuma_ctl_struct_t *extra_parent_ctl_pointer = NULL; /* binomial fanout */ - extra_parent_rank = sg_state->my_rank & (sg_state->pow_2-1); - extra_parent_ctl_pointer = sg_state->ctl_structs[extra_parent_rank]; - - sg_state->ready_flag = sg_state->ready_flag + sg_state->pow_2_levels; - - while(!IS_SG_DATA_READY(extra_parent_ctl_pointer, sg_state->ready_flag, - sg_state->sequence_number)) { - opal_progress(); - - } - - mca_bcol_basesmuma_portals_get_msg_fragment(sg_state->cs, - sg_state->read_eq, - &sg_state->my_ctl_pointer->portals_buf_addr, - &extra_parent_ctl_pointer->portals_buf_addr, 0, - 0, sg_state->fragment_size); - - sg_state->my_ctl_pointer->flag = sg_state->ready_flag; - } - - goto Release; - } - - if (input_args->root_flag) { - - BASESMUMA_VERBOSE(1,("Scatter : Im root (bcol_module %x,ctl_pointer %x) my ready flag %d \n", - sg_state->bcol_module, sg_state->my_ctl_pointer, sg_state->ready_flag)); - rc = sm_portals_root_scatter(sg_state); - - /* gvm Fix: Redudant - opal_atomic_wmb (); - */ - - sg_state->my_ctl_pointer->flag = sg_state->ready_flag; - - if (rc != OMPI_SUCCESS) { - goto Release; - } - -Scatter_root_wait: - - BASESMUMA_VERBOSE(5,("Scatter: Im root waiting for children to complete my flag %d", - sg_state->my_ctl_pointer->flag)); - - for( i = 0; i < sg_state->cs->num_to_probe && completed_posts < sg_state->my_ctl_pointer->n_sends; - i++) { - - completed_posts = wait_for_post_complete_nb(sg_state->my_rank, - sg_state->my_ctl_pointer->n_sends, sg_state->ctl_structs, - sg_state->ready_flag, sg_state->sequence_number); - - } - - if (completed_posts < sg_state->my_ctl_pointer->n_sends) { - sg_state->phase = SCATTER_ROOT_WAIT; - return BCOL_FN_STARTED; - } - - goto Allgather; - } - - -Scatter: - - BASESMUMA_VERBOSE(1,("Scatter : Im non-root probing for data ")); - /* compute the list of possible sources */ - /* - sg_state->src_list = (int *) malloc(sizeof(int) * (sg_state->pow_2_levels + 1)); - */ - assert(MAX_SM_GROUP_SIZE > sg_state->pow_2_levels+1); - - for( i = 0; i < sg_state->pow_2_levels; i++) { - sg_state->src_list[i] = sg_state->my_rank ^ (1< pow_2 */ - - if ((sg_state->my_rank + sg_state->pow_2) < sg_state->group_size) { - sg_state->src_list[i] = sg_state->my_rank + sg_state->pow_2; - } else { - sg_state->src_list[i] = -1; - } - - - BASESMUMA_VERBOSE(1,("Scatter : Ready flag %d Im non-root probing for %d procs %d:%d \n", - sg_state->ready_flag,sg_state->pow_2_levels,sg_state->src_list[0],sg_state->src_list[1])); -Probe: - /* If I am not the root, then poll on possible "senders'" control structs */ - /* For portals we block for now */ - /* Shared memory iprobe */ - - - /* - SG_LARGE_MSG_NB_PROBE(sg_state->src_list, sg_state->pow_2_levels + 1, - sg_state->src_list_index, sg_state->matched, sg_state->src, - sg_state->ctl_structs, - sg_state->parent_ctl_pointer, sg_state->ready_flag, sg_state->sequence_number); - */ - - for( i = 0; i < sg_state->cs->num_to_probe && 0 == sg_state->matched; - i++) { - sg_large_msg_probe(sg_state); - } - - if (!sg_state->matched) { - sg_state->phase = PROBE; - return BCOL_FN_STARTED; - } - - BASESMUMA_VERBOSE(1,("Scatter : Im non-root match received")); - /* If I am a secondary root */ - if ((sg_state->matched) && (sg_state->src == sg_state->pow_2 + sg_state->my_rank)) { - - BASESMUMA_VERBOSE(5,("Scatter : Im secondary root \n")); - - rc = sm_portals_secondary_root_scatter(sg_state); - if (rc != OMPI_SUCCESS) { - goto Release; - } - -Scatter_extra_root_wait: - - for( i = 0; i < sg_state->cs->num_to_probe && completed_posts < sg_state->my_ctl_pointer->n_sends; - i++) { - - completed_posts = wait_for_post_complete_nb(sg_state->my_rank, sg_state->my_ctl_pointer->n_sends, - sg_state->ctl_structs, sg_state->ready_flag, sg_state->sequence_number); - - } - - if (completed_posts < sg_state->my_ctl_pointer->n_sends) { - sg_state->phase = SCATTER_EXTRA_ROOT_WAIT; - return BCOL_FN_STARTED; - } - - goto Allgather; - } - - /* we need to see whether this is really - * who we are looking for - */ - for( i = 0; i < sg_state->parent_ctl_pointer->n_sends; i++) { - uint64_t local_offset = 0; - uint64_t remote_offset = 0; - - BASESMUMA_VERBOSE(5,("%d found it from %d \n",sg_state->my_rank,sg_state->src)); - - if( sg_state->my_rank == (sg_state->src^(1<parent_ctl_pointer = sg_state->ctl_structs[sg_state->src]; - - /* we found our root within the group ... */ - BASESMUMA_VERBOSE(5,("Shared memory probe was matched, the root is %d ",sg_state->src)); - - sg_state->my_ctl_pointer->n_sends = i; - - /* Am I source for other process during scatter phase */ - if ( i > 0) { - BASESMUMA_VERBOSE(1,("Scatter : Im Internal node \n")); - - rc = sm_portals_internode_scatter(sg_state); - - if (rc != OMPI_SUCCESS) { - goto Release; - } - -Scatter_parent_wait: - - for( i = 0; i < sg_state->cs->num_to_probe && completed_posts < sg_state->my_ctl_pointer->n_sends; - i++) { - - completed_posts = wait_for_post_complete_nb(sg_state->my_rank, - sg_state->my_ctl_pointer->n_sends, - sg_state->ctl_structs, - sg_state->ready_flag, sg_state->sequence_number); - } - - if (completed_posts < sg_state->my_ctl_pointer->n_sends) { - sg_state->phase = SCATTER_PARENT_WAIT; - return BCOL_FN_STARTED; - } - - } else { - - BASESMUMA_VERBOSE(1,("Scatter : Im leaf node \n")); - - /* takes care of first level recurssive double */ - sg_state->length = sg_state->parent_ctl_pointer->length/ - (1<<(sg_state->parent_ctl_pointer->n_sends - 1)); - sg_state->my_ctl_pointer->length = sg_state->length; - sg_state->my_ctl_pointer->offset = sg_state->parent_ctl_pointer->offset; - - - while(!IS_SG_DATA_READY(sg_state->parent_ctl_pointer, - sg_state->ready_flag, sg_state->sequence_number)) { - opal_progress(); - } - - mca_bcol_basesmuma_portals_get_msg_fragment(sg_state->cs, - sg_state->read_eq, - &sg_state->my_ctl_pointer->portals_buf_addr, - &sg_state->parent_ctl_pointer->portals_buf_addr, - sg_state->my_ctl_pointer->offset, - sg_state->my_ctl_pointer->offset, sg_state->length); - - /* signal that I am done reading data from parent */ - /* - opal_atomic_wmb (); - */ - sg_state->my_ctl_pointer->flag = sg_state->ready_flag; - } - - BASESMUMA_VERBOSE(1,("Completed %d found it from %d \n", - sg_state->my_rank, sg_state->src)); - - while(sg_state->ready_flag > sg_state->parent_ctl_pointer->flag); - - goto Allgather; - } - } - - { - /* this is not who we are looking for, - * mark as false positive so we don't - * poll here again - */ - sg_state->src_list[sg_state->src_list_index] = -1; - sg_state->matched = 0; - goto Probe; - } - -Allgather: - - BASESMUMA_VERBOSE(5,("Completed Scatter phase")); - - /* zip it back up - we have already taken care of first level */ - sg_state->global_sg_offset = sg_state->my_ctl_pointer->offset; - - /* first level of zip up */ - sg_state->length = 2 * sg_state->fragment_size/sg_state->pow_2; - - - /* Posting for all phases of recursive doubling */ - extra_src_posts = (sg_state->my_rank + sg_state->pow_2 < sg_state->group_size ) ? 1: 0; - allgather_posts = sg_state->pow_2_levels - 1; - total_msg_posts = allgather_posts + extra_src_posts ; - - if ((!sg_state->msg_posted) && (total_msg_posts > 0)){ - - mca_bcol_basesmuma_portals_post_msg(sg_state->cs, &sg_state->my_ctl_pointer->portals_buf_addr, - sg_state->my_userbuf, sg_state->my_ctl_pointer->portals_buf_addr.userbuf_length, - PTL_EQ_NONE, total_msg_posts, blocked_post, - PTL_MD_EVENT_START_DISABLE| PTL_MD_EVENT_END_DISABLE - | PTL_MD_OP_GET | PTL_MD_MANAGE_REMOTE | PTL_MD_TRUNCATE | PTL_MD_EVENT_AUTO_UNLINK_ENABLE - ); - sg_state->msg_posted = true; - } - - BASESMUMA_VERBOSE(5,("Done with allgather phase")); - /* I reached an allgather phase */ - sg_state->ready_flag++; - opal_atomic_wmb (); - sg_state->my_ctl_pointer->flag = sg_state->ready_flag; - - rc = sm_portals_bcasts_allgather_phase(sg_state); - - if (rc != OMPI_SUCCESS) { - BASESMUMA_VERBOSE(10,("Error in Bcast's allgather phase ")); - goto Release; - } - - /* If I am source for non-power 2 children wait for them */ - /* If I am secondary root then my partner would be real root - * so no need for exchange of data with the extra partner */ - sg_state->extra_partner = sg_state->my_rank + sg_state->pow_2 ; - if ((sg_state->extra_partner < sg_state->group_size) && (!sg_state->secondary_root)) { - - sg_state->extra_partner_ctl_pointer = sg_state->ctl_structs[sg_state->extra_partner]; - /* Block until extra partner has copied data */ - while(!IS_SG_DATA_READY(sg_state->extra_partner_ctl_pointer, - sg_state->ready_flag, sg_state->sequence_number)) { - opal_progress(); - } - - } - -Release: - - BASESMUMA_VERBOSE(1,("Im done ")); - - sg_state->my_ctl_pointer->starting_flag_value++; - sg_state->phase = FINISHED; - - - return BCOL_FN_COMPLETE; - -} - - -int bcol_basesmuma_lmsg_scatter_allgather_portals_nb_knownroot_bcast(bcol_function_args_t *input_args, - mca_bcol_base_function_t *c_input_args) -{ - - int i; - mca_bcol_basesmuma_portal_proc_info_t *portals_info; - int dummy_group_size; - int rc = OMPI_SUCCESS; - int buff_idx; - int count=input_args->count; - size_t pack_len = 0, dt_size =0 ; - struct ompi_datatype_t* dtype=input_args->dtype; - int completed_posts = 0; - sg_state_t *sg_state = NULL; - mca_bcol_basesmuma_module_t *bcol_module=NULL; - int extra_src_posts = -1,allgather_posts = -1, total_msg_posts = -1; - bcol_module = (mca_bcol_basesmuma_module_t *) c_input_args->bcol_module; - - sg_state = (sg_state_t*)(&bcol_module->sg_state); - - BASESMUMA_VERBOSE(1,("Im entering nb_knownroot_bcast bcol = %x ", - c_input_args->bcol_module)); - /* Re-entering the algorithm */ - switch (sg_state->phase) { - case PROBE: - if (input_args->root_flag) { - /* I became a root for this group */ - sg_state->phase = START; - goto Start; - } - goto Probe; - break; - - case SCATTER_ROOT_WAIT: - goto Scatter_root_wait; - - case SCATTER_EXTRA_ROOT_WAIT: - goto Scatter_extra_root_wait; - - case SCATTER_PARENT_WAIT: - goto Scatter_parent_wait; - - default: - break; - } - - /* Allocate space for algorithm state */ - /* - sg_state = (sg_state_t *) malloc(sizeof(sg_state_t)); - bcol_module->sg_state = (void*) sg_state; - */ - - /* Make sure there userbuffer is not null */ - - sg_state->phase = INIT; - sg_state->secondary_root = false; - sg_state->msg_posted = false; - sg_state->matched = 0; - /* Copy input args to local variables */ - sg_state->my_userbuf = (void*)((unsigned char*)input_args->userbuf); - assert(sg_state->my_userbuf != NULL); - sg_state->sequence_number=input_args->sequence_num; - sg_state->cs = &mca_bcol_basesmuma_component; - sg_state->bcol_module = bcol_module; - buff_idx = input_args->src_desc->buffer_index; - - /* Initialize SM group info used for control signaling */ - init_sm_group_info(sg_state, buff_idx); - - /* calculate the largest power of two that is smaller than - * or equal to the group size - */ - sg_state->pow_2_levels = pow_sm_k(2, sg_state->group_size, &(dummy_group_size)); - if( sg_state->group_size < (1 << sg_state->pow_2_levels)) { - sg_state->pow_2_levels--; - } - /* power-of-two group size */ - sg_state->pow_2 = 1 << sg_state->pow_2_levels; - - - /* we will work only on packed data - so compute the length*/ - ompi_datatype_type_size(dtype, &dt_size); - sg_state->fragment_size = count*dt_size; - - - /* Init portals scatter allgather info */ - rc = init_sm_portals_sg_info(sg_state); - - if (rc != OMPI_SUCCESS) { - goto Release; - } -Start: -Extra : - /* - * My rank > pow2 groupsize - */ - if( sg_state->my_rank >= sg_state->pow_2 ) { - - if (input_args->root_flag){ - - rc = sm_portals_extra_root_scatter(sg_state); - if (rc != OMPI_SUCCESS) { - goto Release; - } - - } else { - /* - * Wait for my partner to receive bcast data, and copy from it - */ - int extra_parent_rank; - volatile mca_bcol_basesmuma_ctl_struct_t *extra_parent_ctl_pointer = NULL; /* binomial fanout */ - extra_parent_rank = sg_state->my_rank & (sg_state->pow_2-1); - extra_parent_ctl_pointer = sg_state->ctl_structs[extra_parent_rank]; - - sg_state->ready_flag = sg_state->ready_flag + sg_state->pow_2_levels; - - while(!IS_SG_DATA_READY(extra_parent_ctl_pointer, sg_state->ready_flag, - sg_state->sequence_number)) { - opal_progress(); - - } - - mca_bcol_basesmuma_portals_get_msg_fragment(sg_state->cs, - sg_state->read_eq, - &sg_state->my_ctl_pointer->portals_buf_addr, - &extra_parent_ctl_pointer->portals_buf_addr, 0, - 0, sg_state->fragment_size); - - sg_state->my_ctl_pointer->flag = sg_state->ready_flag; - } - - goto Release; - } - - if (input_args->root_flag) { - - BASESMUMA_VERBOSE(1,("Scatter : Im root (bcol_module %x,ctl_pointer %x) my ready flag %d \n", - bcol_module, sg_state->my_ctl_pointer, sg_state->ready_flag)); - rc = sm_portals_root_scatter(sg_state); - - sg_state->my_ctl_pointer->flag = sg_state->ready_flag; - - if (rc != OMPI_SUCCESS) { - goto Release; - } - -Scatter_root_wait: - - BASESMUMA_VERBOSE(5,("Scatter: Im root waiting for children to complete my flag %d", - sg_state->my_ctl_pointer->flag)); - for( i = 0; i < sg_state->cs->num_to_probe && completed_posts < sg_state->my_ctl_pointer->n_sends; - i++) { - completed_posts = wait_for_post_complete_nb(sg_state->my_rank, - sg_state->my_ctl_pointer->n_sends, sg_state->ctl_structs, - sg_state->ready_flag, sg_state->sequence_number); - } - - if (completed_posts < sg_state->my_ctl_pointer->n_sends) { - sg_state->phase = SCATTER_ROOT_WAIT; - return BCOL_FN_STARTED; - } - - goto Allgather; - } - - -Probe: - - sg_state->src = compute_src_from_root(input_args->root_route->rank, sg_state->my_rank, - sg_state->pow_2, sg_state->group_size); - - sg_state->parent_ctl_pointer = sg_state->ctl_structs[sg_state->src]; - - while(!IS_SG_DATA_READY(sg_state->parent_ctl_pointer, sg_state->ready_flag, - sg_state->sequence_number)) { - opal_progress(); - - } - sg_state->matched = true; - - /* If I am a secondary root */ - if ((sg_state->matched) && (sg_state->src == sg_state->pow_2 + sg_state->my_rank)) { - - rc = sm_portals_secondary_root_scatter(sg_state); - if (rc != OMPI_SUCCESS) { - goto Release; - } -Scatter_extra_root_wait: - - for( i = 0; i < sg_state->cs->num_to_probe && completed_posts < sg_state->my_ctl_pointer->n_sends; - i++) { - - completed_posts = wait_for_post_complete_nb(sg_state->my_rank, sg_state->my_ctl_pointer->n_sends, - sg_state->ctl_structs, sg_state->ready_flag, sg_state->sequence_number); - - } - - if (completed_posts < sg_state->my_ctl_pointer->n_sends) { - sg_state->phase = SCATTER_EXTRA_ROOT_WAIT; - return BCOL_FN_STARTED; - } - - goto Allgather; - } - - /* we need to see whether this is really - * who we are looking for - */ - for( i = 0; i < sg_state->parent_ctl_pointer->n_sends; i++) { - uint64_t local_offset = 0; - uint64_t remote_offset = 0; - - BASESMUMA_VERBOSE(5,("%d found it from %d \n",sg_state->my_rank,sg_state->src)); - - if( sg_state->my_rank == (sg_state->src^(1<parent_ctl_pointer = sg_state->ctl_structs[sg_state->src]; - - /* we found our root within the group ... */ - BASESMUMA_VERBOSE(10,("Shared memory probe was matched, the root is %d ",sg_state->src)); - - sg_state->my_ctl_pointer->n_sends = i; - - /* Am I source for other process during scatter phase */ - if ( i > 0) { - - rc = sm_portals_internode_scatter(sg_state); - - if (rc != OMPI_SUCCESS) { - goto Release; - } -Scatter_parent_wait: - - for( i = 0; i < sg_state->cs->num_to_probe && completed_posts < sg_state->my_ctl_pointer->n_sends; - i++) { - - completed_posts = wait_for_post_complete_nb(sg_state->my_rank, - sg_state->my_ctl_pointer->n_sends, - sg_state->ctl_structs, - sg_state->ready_flag, sg_state->sequence_number); - } - - if (completed_posts < sg_state->my_ctl_pointer->n_sends) { - sg_state->phase = SCATTER_PARENT_WAIT; - return BCOL_FN_STARTED; - } - - } else { - - /* takes care of first level recursive double */ - sg_state->length = sg_state->parent_ctl_pointer->length/ - (1<<(sg_state->parent_ctl_pointer->n_sends - 1)); - sg_state->my_ctl_pointer->length = sg_state->length; - sg_state->my_ctl_pointer->offset = sg_state->parent_ctl_pointer->offset; - - - while(!IS_SG_DATA_READY(sg_state->parent_ctl_pointer, - sg_state->ready_flag, sg_state->sequence_number)) { - opal_progress(); - } - - mca_bcol_basesmuma_portals_get_msg_fragment(sg_state->cs, - sg_state->read_eq, - &sg_state->my_ctl_pointer->portals_buf_addr, - &sg_state->parent_ctl_pointer->portals_buf_addr, - sg_state->my_ctl_pointer->offset, - sg_state->my_ctl_pointer->offset, sg_state->length); - - /* signal that I am done reading data from parent */ - sg_state->my_ctl_pointer->flag = sg_state->ready_flag; - } - - BASESMUMA_VERBOSE(5,("Completed %d found it from %d \n", - sg_state->my_rank, sg_state->src)); - - while(sg_state->ready_flag > sg_state->parent_ctl_pointer->flag); - - goto Allgather; - } - } - - { - /* this is not who we are looking for, - * mark as false positive so we don't - * poll here again - */ - sg_state->src_list[sg_state->src_list_index] = -1; - sg_state->matched = 0; - goto Probe; - } - -Allgather: - - /* zip it back up - we have already taken care of first level */ - sg_state->global_sg_offset = sg_state->my_ctl_pointer->offset; - - /* first level of zip up */ - sg_state->length = 2 * sg_state->fragment_size/sg_state->pow_2; - - /* Posting for all phases of recursive doubling */ - extra_src_posts = (sg_state->my_rank + sg_state->pow_2 < sg_state->group_size ) ? 1: 0; - allgather_posts = sg_state->pow_2_levels - 1; - total_msg_posts = allgather_posts + extra_src_posts ; - - if ((!sg_state->msg_posted) && (total_msg_posts > 0)){ - - mca_bcol_basesmuma_portals_post_msg(sg_state->cs, &sg_state->my_ctl_pointer->portals_buf_addr, - sg_state->my_userbuf, sg_state->my_ctl_pointer->portals_buf_addr.userbuf_length, - PTL_EQ_NONE, total_msg_posts, blocked_post, - PTL_MD_EVENT_START_DISABLE| PTL_MD_EVENT_END_DISABLE - | PTL_MD_OP_GET | PTL_MD_MANAGE_REMOTE | PTL_MD_TRUNCATE | PTL_MD_EVENT_AUTO_UNLINK_ENABLE - ); - sg_state->msg_posted = true; - } - - sg_state->ready_flag++; - opal_atomic_wmb (); - sg_state->my_ctl_pointer->flag = sg_state->ready_flag; - - rc = sm_portals_bcasts_allgather_phase(sg_state); - - if (rc != OMPI_SUCCESS) { - BASESMUMA_VERBOSE(10,("Error in Bcast's allgather phase ")); - goto Release; - } - - /* If I am source for non-power 2 children wait for them */ - /* If I am secondary root then my partner would be real root - * so no need for exchange of data with the extra partner */ - sg_state->extra_partner = sg_state->my_rank + sg_state->pow_2 ; - if ((sg_state->extra_partner < sg_state->group_size) && (!sg_state->secondary_root)) { - - sg_state->extra_partner_ctl_pointer = sg_state->ctl_structs[sg_state->extra_partner]; - /* Block until extra partner has copied data */ - while(!IS_SG_DATA_READY(sg_state->extra_partner_ctl_pointer, - sg_state->ready_flag, sg_state->sequence_number)) { - opal_progress(); - } - - } - -Release: - - BASESMUMA_VERBOSE(1,("Im done ")); - - sg_state->my_ctl_pointer->starting_flag_value++; - sg_state->phase = FINISHED; - - return BCOL_FN_COMPLETE; - -} -#endif /* __PORTALS_AVAIL__ */ diff --git a/ompi/mca/bcol/basesmuma/bcol_basesmuma_lmsg_bcast.h b/ompi/mca/bcol/basesmuma/bcol_basesmuma_lmsg_bcast.h deleted file mode 100644 index d15851b036b..00000000000 --- a/ompi/mca/bcol/basesmuma/bcol_basesmuma_lmsg_bcast.h +++ /dev/null @@ -1,626 +0,0 @@ -#ifdef __PORTALS_AVAIL__ -#define __PORTALS_ENABLE__ - -#include - -#include "ompi_config.h" -#include "ompi/constants.h" -#include "ompi/datatype/ompi_datatype.h" -#include "ompi/communicator/communicator.h" - -#include "bcol_basesmuma_utils.h" -#include "bcol_basesmuma_portals.h" -#include "bcol_basesmuma.h" - -#if 0 -struct scatter_allgather_nb_bcast_state_t -{ - /* local variables */ - uint64_t length; - int my_rank, src, matched; - int *src_list; - int group_size; - int64_t ready_flag; - int pow_2, pow_2_levels; - int src_list_index; - uint64_t fragment_size; /* user buffer size */ - - /* Input argument variables */ - void *my_userbuf; - int64_t sequence_number; - - /* Extra source variables */ - bool secondary_root; - int partner , extra_partner; - - /* Scatter Allgather offsets */ - uint64_t local_sg_offset , global_sg_offset , partner_offset ; - - /* Portals messaging relevant variables */ - ptl_handle_eq_t allgather_eq_h; - ptl_handle_eq_t read_eq; - ptl_event_t allgather_event; - bool msg_posted; - - /* OMPI module and component variables */ - mca_bcol_basesmuma_component_t *cs; - mca_bcol_basesmuma_module_t *bcol_module; - - /* Control structure and payload variables */ - volatile mca_bcol_basesmuma_ctl_struct_t **ctl_structs; - volatile mca_bcol_basesmuma_ctl_struct_t *my_ctl_pointer; - volatile mca_bcol_basesmuma_ctl_struct_t *parent_ctl_pointer; /* scatter source */ - volatile mca_bcol_basesmuma_ctl_struct_t *extra_partner_ctl_pointer; /* scatter source */ - - int phase; -}; - -typedef struct scatter_allgather_nb_bcast_state_t sg_state_t; -#endif - -bool blocked_post = false; - -#define IS_SG_DATA_READY(peer, my_flag, my_sequence_number) \ - (((peer)->sequence_number == (my_sequence_number) && \ - (peer)->flags[BCAST_FLAG] >= (my_flag) \ - )? true : false ) - - - -#define SG_LARGE_MSG_PROBE(src_list, n_src, src_list_index, matched, \ - src, data_buffs, data_src_ctl_pointer, \ - data_src_lmsg_ctl_pointer, ready_flag, \ - sequence_number) \ -do { \ - int j; \ - for( j = 0; j < n_src; j++) { \ - if(src_list[j] != -1) { \ - data_src_ctl_pointer = data_buffs[src_list[j]].ctl_struct; \ - data_src_lmsg_ctl_pointer = (mca_bcol_basesmuma_portal_buf_addr_t*) \ - data_buffs[src_list[j]].payload; \ - if( IS_SG_DATA_READY(data_src_ctl_pointer,ready_flag,sequence_number)) { \ - src = src_list[j]; \ - matched = 1; \ - src_list_index = j; \ - break; \ - } \ - } \ - } \ -} while(0) - -#define SG_LARGE_MSG_NB_PROBE(src_list, n_src, src_list_index, matched, \ - src, ctl_structs, data_src_ctl_pointer, \ - ready_flag, sequence_number) \ -do { \ - int j; \ - for( j = 0; j < n_src; j++) { \ - if(src_list[j] != -1) { \ - data_src_ctl_pointer = ctl_structs[src_list[j]]; \ - if( IS_SG_DATA_READY(data_src_ctl_pointer,ready_flag,sequence_number)) { \ - src = src_list[j]; \ - matched = 1; \ - src_list_index = j; \ - break; \ - } \ - } \ - } \ -} while(0) - - - - - -static inline __opal_attribute_always_inline__ -int wait_for_peers(int my_rank, int npeers, volatile mca_bcol_basesmuma_payload_t *data_buffs, - int flag_value, int sn) -{ - int *peers_list = NULL; - int counter = 0, diter = 0; - volatile mca_bcol_basesmuma_header_t *peer_ctl_pointer = NULL; - - peers_list = (int *)malloc(sizeof(int) * npeers); - - for (diter = 0; diter < npeers; diter++ ){ - peers_list[diter] = my_rank ^ (1<pow_2_levels+1; - - - for( j = 0; j < n_src; j++) { - if(sg_state->src_list[j] != -1) { - sg_state->parent_ctl_pointer = sg_state->ctl_structs[sg_state->src_list[j]]; - - BASESMUMA_VERBOSE(5,("Parent %d ctl pointer (parent=%x, my ctl=%x) flag %d", - sg_state->src_list[j],sg_state->parent_ctl_pointer, - sg_state->my_ctl_pointer, - sg_state->parent_ctl_pointer->flag)); - - if (IS_SG_DATA_READY(sg_state->parent_ctl_pointer, - sg_state->ready_flag, sg_state->sequence_number)) { - sg_state->src = sg_state->src_list[j]; - sg_state->matched = 1; - sg_state->src_list_index = j; - break; - } - } - } - - return 0; -} -/* - * I will post message for all the my children - */ -static inline __opal_attribute_always_inline__ -int sm_portals_root_scatter(sg_state_t *sg_state) -{ - int extra_src_posts = -1, scatter_posts = -1, allgather_posts = -1, - total_msg_posts = -1; - - BASESMUMA_VERBOSE(10,("I am the root of the data")); - sg_state->my_ctl_pointer->offset = 0; - sg_state->my_ctl_pointer->n_sends = sg_state->pow_2_levels; - sg_state->my_ctl_pointer->length = sg_state->fragment_size; - - - - extra_src_posts = (sg_state->my_rank + sg_state->pow_2 < sg_state->group_size ) ? 1: 0; - scatter_posts = sg_state->my_ctl_pointer->n_sends; - allgather_posts = sg_state->pow_2_levels - 1; - - total_msg_posts = scatter_posts + allgather_posts + extra_src_posts ; - - if ( total_msg_posts <= 0) { - BASESMUMA_VERBOSE(10,("No need to post the data ")); - return OMPI_SUCCESS; - } - - mca_bcol_basesmuma_portals_post_msg(sg_state->cs, - &sg_state->my_ctl_pointer->portals_buf_addr, - sg_state->my_userbuf, sg_state->fragment_size, - PTL_EQ_NONE, - total_msg_posts, - blocked_post, - PTL_MD_EVENT_START_DISABLE| PTL_MD_EVENT_END_DISABLE | - PTL_MD_OP_GET | PTL_MD_MANAGE_REMOTE | PTL_MD_TRUNCATE | PTL_MD_EVENT_AUTO_UNLINK_ENABLE); - - /* - mca_bcol_basesmuma_portals_post_msg(sg_state->cs, - &sg_state->my_ctl_pointer->portals_buf_addr, - sg_state->my_userbuf, sg_state->fragment_size, - sg_state->allgather_eq_h, - total_msg_posts, - blocked_post, - PTL_MD_EVENT_START_DISABLE| PTL_MD_EVENT_END_DISABLE | - PTL_MD_OP_GET | PTL_MD_MANAGE_REMOTE | PTL_MD_TRUNCATE | PTL_MD_EVENT_AUTO_UNLINK_ENABLE); - */ - - sg_state->msg_posted = true ; - - /* - opal_atomic_wmb(); - */ - sg_state->my_ctl_pointer->flag = sg_state->ready_flag; - - return OMPI_SUCCESS; -} - -/* - * Im root but my rank > pow2_groupsize, so will copy to partner who - * will act as root (secondary) - */ -static inline __opal_attribute_always_inline__ -int sm_portals_extra_root_scatter(sg_state_t *sg_state) -{ - int scatter_partner = -1; - volatile mca_bcol_basesmuma_ctl_struct_t *scatter_partner_ctl_pointer = NULL; - - int total_msg_posts = 1; - - if ( total_msg_posts <= 0) { - BASESMUMA_VERBOSE(10,("No need to post the data ")); - } - else { - mca_bcol_basesmuma_portals_post_msg(sg_state->cs, - &sg_state->my_ctl_pointer->portals_buf_addr, - sg_state->my_userbuf, sg_state->fragment_size, - PTL_EQ_NONE, - total_msg_posts, - blocked_post, - PTL_MD_EVENT_START_DISABLE| PTL_MD_EVENT_END_DISABLE | PTL_MD_OP_GET - | PTL_MD_MANAGE_REMOTE | PTL_MD_TRUNCATE | PTL_MD_EVENT_AUTO_UNLINK_ENABLE); - sg_state->msg_posted = true ; - - } - - opal_atomic_wmb(); - sg_state->my_ctl_pointer->flag = sg_state->ready_flag; - - - - scatter_partner = sg_state->my_rank - sg_state->pow_2; - scatter_partner_ctl_pointer = - sg_state->ctl_structs[scatter_partner]; - - while(!IS_SG_DATA_READY(scatter_partner_ctl_pointer, sg_state->ready_flag, - sg_state->sequence_number)){ - opal_progress(); - } - - return OMPI_SUCCESS; -} - -/* - * Gets msg from the partner (> pow2_groupsize) and posts the - * message acting as root - */ -static inline __opal_attribute_always_inline__ -int sm_portals_secondary_root_scatter(sg_state_t *sg_state) -{ - - volatile mca_bcol_basesmuma_ctl_struct_t *extra_src_ctl_pointer = NULL; - int scatter_posts, allgather_posts, extra_src_posts, total_msg_posts; - - sg_state->secondary_root = true; - BASESMUMA_VERBOSE(10,("I am the secondary root for the data")); - sg_state->my_ctl_pointer->offset = 0; - sg_state->my_ctl_pointer->n_sends = sg_state->pow_2_levels; - sg_state->my_ctl_pointer->length = sg_state->fragment_size; - - extra_src_ctl_pointer = sg_state->ctl_structs[sg_state->src]; - - mca_bcol_basesmuma_portals_get_msg_fragment(sg_state->cs, - sg_state->read_eq, - &sg_state->my_ctl_pointer->portals_buf_addr, - &extra_src_ctl_pointer->portals_buf_addr, 0, - 0, sg_state->fragment_size); - - - extra_src_posts = 0; - scatter_posts = sg_state->my_ctl_pointer->n_sends; - allgather_posts = sg_state->pow_2_levels - 1; - - total_msg_posts = scatter_posts + allgather_posts + extra_src_posts ; - - if (total_msg_posts > 0) { - mca_bcol_basesmuma_portals_post_msg(sg_state->cs, - &sg_state->my_ctl_pointer->portals_buf_addr, - sg_state->my_userbuf, sg_state->fragment_size, - PTL_EQ_NONE, - total_msg_posts, - blocked_post, - PTL_MD_EVENT_START_DISABLE| PTL_MD_EVENT_END_DISABLE | PTL_MD_OP_GET - | PTL_MD_MANAGE_REMOTE | PTL_MD_TRUNCATE | PTL_MD_EVENT_AUTO_UNLINK_ENABLE); - sg_state->msg_posted = true ; - } - opal_atomic_wmb(); - sg_state->my_ctl_pointer->flag = sg_state->ready_flag; - - return OMPI_SUCCESS; -} - -/* - * Internode Scatter: Get data from my parent and post for my children - */ - -static inline __opal_attribute_always_inline__ -int sm_portals_internode_scatter(sg_state_t *sg_state) -{ - - int scatter_posts, allgather_posts, extra_src_posts, - total_msg_posts; - uint64_t local_offset, remote_offset; - - /* compute the size of the chunk to copy */ - sg_state->length = (sg_state->parent_ctl_pointer->length)/ - (1<<(sg_state->parent_ctl_pointer->n_sends - sg_state->my_ctl_pointer->n_sends)); - sg_state->my_ctl_pointer->length = sg_state->length; - sg_state->my_ctl_pointer->offset = - sg_state->parent_ctl_pointer->offset + sg_state->length; - - - local_offset = sg_state->my_ctl_pointer->offset; - remote_offset = sg_state->parent_ctl_pointer->offset + - sg_state->length; - - mca_bcol_basesmuma_portals_get_msg_fragment(sg_state->cs, - sg_state->read_eq, - &sg_state->my_ctl_pointer->portals_buf_addr, - &sg_state->parent_ctl_pointer->portals_buf_addr,local_offset, - remote_offset,sg_state->length); - - /* Now post the message for other children to read */ - extra_src_posts = (sg_state->my_rank + sg_state->pow_2 < - sg_state->group_size ) ? 1: 0; - scatter_posts = sg_state->my_ctl_pointer->n_sends; - allgather_posts = sg_state->pow_2_levels - 1; - - total_msg_posts = scatter_posts + allgather_posts + extra_src_posts ; - - if (total_msg_posts > 0) { - mca_bcol_basesmuma_portals_post_msg(sg_state->cs, &sg_state->my_ctl_pointer->portals_buf_addr, - sg_state->my_userbuf, sg_state->my_ctl_pointer->portals_buf_addr.userbuf_length, - PTL_EQ_NONE, - total_msg_posts, - blocked_post, - PTL_MD_EVENT_START_DISABLE| PTL_MD_EVENT_END_DISABLE - | PTL_MD_OP_GET | PTL_MD_MANAGE_REMOTE | PTL_MD_TRUNCATE | PTL_MD_EVENT_AUTO_UNLINK_ENABLE); - - sg_state->msg_posted = true; - } - /* - opal_atomic_wmb(); - */ - sg_state->my_ctl_pointer->flag = sg_state->ready_flag; - - return OMPI_SUCCESS; -} - -/* - * Bcast's Allgather Phase: - * Combines data from all processes using recursive doubling algorithm - */ -static inline __opal_attribute_always_inline__ -int sm_portals_bcasts_allgather_phase(sg_state_t *sg_state) -{ - int ag_loop, partner; - volatile mca_bcol_basesmuma_ctl_struct_t *partner_ctl_pointer = NULL; /* recursive double */ - - - for( ag_loop = 1; ag_loop < sg_state->pow_2_levels; ag_loop++) { - /* get my partner for this level */ - partner = sg_state->my_rank^(1<ctl_structs[partner]; - - - /* Block until partner is at this level of recursive-doubling stage */ - while(!IS_SG_DATA_READY(partner_ctl_pointer, sg_state->ready_flag, - sg_state->sequence_number)) { - opal_progress(); - } - assert(partner_ctl_pointer->flag >= sg_state->ready_flag); - - if (partner_ctl_pointer->offset < sg_state->my_ctl_pointer->offset) { - sg_state->global_sg_offset -= sg_state->length; - sg_state->local_sg_offset = sg_state->global_sg_offset; - } else { - sg_state->local_sg_offset = sg_state->global_sg_offset + sg_state->length; - } - - - BASESMUMA_VERBOSE(10,("Allgather Phase: Get message from process %d, length %d", - partner, sg_state->length)); - mca_bcol_basesmuma_portals_get_msg_fragment(sg_state->cs, - sg_state->read_eq, - &sg_state->my_ctl_pointer->portals_buf_addr, - &partner_ctl_pointer->portals_buf_addr,sg_state->local_sg_offset, - sg_state->local_sg_offset, sg_state->length); - - sg_state->ready_flag++; - opal_atomic_wmb(); - sg_state->my_ctl_pointer->flag = sg_state->ready_flag; - - /* Block until partner is at this level of recursive-doubling stage */ - while(!IS_SG_DATA_READY(partner_ctl_pointer, sg_state->ready_flag, - sg_state->sequence_number)) { - opal_progress(); - } - - /* double the length */ - sg_state->length *= 2; - } - - return OMPI_SUCCESS; - -} - - -static inline __opal_attribute_always_inline__ -int init_sm_group_info(sg_state_t *sg_state, int buff_idx) -{ - int idx, leading_dim; - int first_instance=0; - int flag_offset; - - /* Get addresing information */ - sg_state->group_size = sg_state->bcol_module->colls_no_user_data.size_of_group; - leading_dim = sg_state->bcol_module->colls_no_user_data.size_of_group; - idx=SM_ARRAY_INDEX(leading_dim,buff_idx,0); - - BASESMUMA_VERBOSE(1,("My buffer idx %d group size %d, leading dim %d, idx %d", - buff_idx,sg_state->group_size,leading_dim,idx)); - /* grab the ctl buffs */ - sg_state->ctl_structs = (volatile mca_bcol_basesmuma_ctl_struct_t **) - sg_state->bcol_module->colls_with_user_data.ctl_buffs+idx; - - sg_state->my_rank = sg_state->bcol_module->super.sbgp_partner_module->my_index; - sg_state->my_ctl_pointer = sg_state->ctl_structs[sg_state->my_rank]; - - if (sg_state->my_ctl_pointer->sequence_number < sg_state->sequence_number) { - first_instance = 1; - } - - if(first_instance) { - sg_state->my_ctl_pointer->flag = -1; - sg_state->my_ctl_pointer->index = 1; - - sg_state->my_ctl_pointer->starting_flag_value = 0; - flag_offset = 0; - - } else { - sg_state->my_ctl_pointer->index++; - } - - /* For bcast we shud have only entry to this bcol - assert(sg_state->my_ctl_pointer->flag == -1); - */ - - /* increment the starting flag by one and return */ - flag_offset = sg_state->my_ctl_pointer->starting_flag_value; - sg_state->ready_flag = flag_offset + sg_state->sequence_number + 1; - - sg_state->my_ctl_pointer->sequence_number = sg_state->sequence_number; - - return OMPI_SUCCESS; - -} - -static inline __opal_attribute_always_inline__ -int init_sm_portals_sg_info(sg_state_t *sg_state) -{ -/* Get portals info*/ - mca_bcol_basesmuma_portal_proc_info_t *portals_info; - int rc = OMPI_SUCCESS; - int sg_matchbits; - - portals_info = (mca_bcol_basesmuma_portal_proc_info_t*)sg_state->cs->portals_info; - - sg_matchbits = sg_state->sequence_number ; - - /* Construct my portal buffer address and copy to payload buffer */ - mca_bcol_basesmuma_construct_portal_address(&sg_state->my_ctl_pointer->portals_buf_addr, - portals_info->portal_id.nid, - portals_info->portal_id.pid, - sg_matchbits, - sg_state->bcol_module->super.sbgp_partner_module->group_comm->c_contextid); - - sg_state->my_ctl_pointer->portals_buf_addr.userbuf = sg_state->my_userbuf; - sg_state->my_ctl_pointer->portals_buf_addr.userbuf_length = sg_state->fragment_size; - - return OMPI_SUCCESS; -} - -static inline __opal_attribute_always_inline__ -int compute_src_from_root(int group_root, int my_group_rank, int pow2, int - group_size) -{ - - int root, relative_rank, src, i; - - if (group_root < pow2) { - root = group_root; - } else { - /* the source of the data is extra node, - the real root it represented by some rank from - pow2 group */ - root = group_root - pow2; - /* shortcut for the case when my rank is root for the group */ - if (my_group_rank == root) { - return group_root; - } - } - - relative_rank = (my_group_rank - root) < 0 ? my_group_rank - root + pow2 : - my_group_rank - root; - - for (i = 1; i < pow2; i<<=1) { - if (relative_rank & i) { - src = my_group_rank ^ i; - if (src >= pow2) - src -= pow2; - - return src; - } - } - - return -1; -} - -int bcol_basesmuma_lmsg_scatter_allgather_portals_bcast(bcol_function_args_t *input_args, - mca_bcol_base_function_t *c_input_args); - -int bcol_basesmuma_lmsg_scatter_allgather_portals_nb_bcast(bcol_function_args_t *input_args, - mca_bcol_base_function_t *c_input_args); - -int bcol_basesmuma_lmsg_scatter_allgather_portals_nb_knownroot_bcast(bcol_function_args_t *input_args, - mca_bcol_base_function_t *c_input_args); - -#endif diff --git a/ompi/mca/bcol/basesmuma/bcol_basesmuma_lmsg_knomial_bcast.c b/ompi/mca/bcol/basesmuma/bcol_basesmuma_lmsg_knomial_bcast.c deleted file mode 100644 index a1454102a84..00000000000 --- a/ompi/mca/bcol/basesmuma/bcol_basesmuma_lmsg_knomial_bcast.c +++ /dev/null @@ -1,452 +0,0 @@ -/* - * Copyright (c) 2009-2012 Oak Ridge National Laboratory. All rights reserved. - * Copyright (c) 2009-2012 Mellanox Technologies. All rights reserved. - * Copyright (c) 2014 Los Alamos National Security, LLC. All rights - * reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#include "ompi_config.h" - -/* #define __PORTALS_AVAIL__ */ -#ifdef __PORTALS_AVAIL__ - -#define __PORTALS_ENABLE__ -#include "ompi/mca/bcol/basesmuma/bcol_basesmuma.h" -#include "ompi/constants.h" -#include "ompi/datatype/ompi_datatype.h" -#include "ompi/communicator/communicator.h" -#include "bcol_basesmuma_utils.h" - -#include "bcol_basesmuma_portals.h" - -/* debug */ -#include -/* end debug */ - - -/** - * Shared memory non-blocking Broadcast - K-nomial fan-out for small data buffers. - * This routine assumes that buf (the input buffer) is a single writer - * multi reader (SWMR) shared memory buffer owned by the calling rank - * which is the only rank that can write to this buffers. - * It is also assumed that the buffers are registered and fragmented - * at the ML level and that buf is sufficiently large to hold the data. - * - * - * @param buf - SWMR shared buffer within a sbgp that the - * executing rank can write to. - * @param count - the number of elements in the shared buffer. - * @param dtype - the datatype of a shared buffer element. - * @param root - the index within the sbgp of the root. - * @param module - basesmuma module. - */ -int bcol_basesmuma_lmsg_bcast_k_nomial_anyroot(bcol_function_args_t *input_args, - mca_bcol_base_function_t *c_input_args) -{ -#if 0 - /* local variables */ - mca_bcol_basesmuma_module_t* bcol_module= - (mca_bcol_basesmuma_module_t *)c_input_args->bcol_module; - mca_bcol_basesmuma_component_t *cs = &mca_bcol_basesmuma_component; - int i, matched = 0; - int src=-1; - int group_size; - int my_rank, first_instance=0, flag_offset; - int rc = OMPI_SUCCESS; - int leading_dim, buff_idx, idx; - int count=input_args->count; - struct ompi_datatype_t* dtype=input_args->dtype; - int64_t sequence_number=input_args->sequence_num; - - volatile int64_t ready_flag; - volatile mca_bcol_basesmuma_payload_t *data_buffs; - volatile char* parent_data_pointer; - volatile mca_bcol_basesmuma_header_t *parent_ctl_pointer; - volatile mca_bcol_basesmuma_header_t *my_ctl_pointer; - void *userbuf = (void *)((unsigned char *)input_args->userbuf); - - size_t pack_len = 0, dt_size; - - struct mca_bcol_basesmuma_portal_buf_addr_t *my_lmsg_ctl_pointer = NULL; - struct mca_bcol_basesmuma_portal_buf_addr_t *parent_lmsg_ctl_pointer = NULL; - mca_bcol_basesmuma_portal_proc_info_t *portals_info; - portals_info = (mca_bcol_basesmuma_portal_proc_info_t*)cs->portals_info; - - /* we will work only on packed data - so compute the length*/ - ompi_datatype_type_size(dtype, &dt_size); - pack_len=count*dt_size; - buff_idx = input_args->src_desc->buffer_index; - - /* Get addressing information */ - my_rank = bcol_module->super.sbgp_partner_module->my_index; - group_size = bcol_module->colls_no_user_data.size_of_group; - leading_dim=bcol_module->colls_no_user_data.size_of_group; - idx=SM_ARRAY_INDEX(leading_dim,buff_idx,0); - - data_buffs=(volatile mca_bcol_basesmuma_payload_t *) - bcol_module->colls_with_user_data.data_buffs+idx; - - /* Set pointer to current proc ctrl region */ - my_ctl_pointer = data_buffs[my_rank].ctl_struct; - my_lmsg_ctl_pointer = (mca_bcol_basesmuma_portal_buf_addr_t*) data_buffs[my_rank].payload; - - /* setup resource recycling */ - if( my_ctl_pointer->sequence_number < sequence_number ) { - first_instance=1; - } - - if( first_instance ) { - /* Signal arrival */ - my_ctl_pointer->flag = -1; - my_ctl_pointer->index=1; - /* this does not need to use any flag values , so only need to - * set the value for subsequent values that may need this */ - my_ctl_pointer->starting_flag_value=0; - flag_offset=0; - - } else { - /* only one thread at a time will be making progress on this - * collective, so no need to make this atomic */ - my_ctl_pointer->index++; - } - - - /* increment the starting flag by one and return */ - flag_offset = my_ctl_pointer->starting_flag_value; - ready_flag = flag_offset + sequence_number + 1; - my_ctl_pointer->sequence_number = sequence_number; - - - /* Construct my portal buffer address and copy to payload buffer */ - mca_bcol_basesmuma_construct_portal_address(my_lmsg_ctl_pointer, - portals_info->portal_id.nid, - portals_info->portal_id.pid, - sequence_number, - bcol_module->super.sbgp_partner_module->group_comm->c_contextid); - - /* non-blocking broadcast algorithm */ - - /* If I am the root, then signal ready flag */ - if(input_args->root_flag) { - ptl_handle_eq_t eq_h; - ptl_event_t event; - int ret; - - BASESMUMA_VERBOSE(10,("I am the root of the data")); - - /* create an event queue for the incoming buffer */ - ret = PtlEQAlloc(((mca_bcol_basesmuma_portal_proc_info_t*) - cs->portals_info)->ni_h, MAX_PORTAL_EVENTS_IN_Q, PTL_EQ_HANDLER_NONE, &eq_h); - - if (ret != PTL_OK) { - fprintf(stderr, "PtlEQAlloc() failed: %d \n",ret); - return OMPI_ERR_OUT_OF_RESOURCE; - } - - /* Post the message using portal copy */ - - mca_bcol_basesmuma_portals_post_msg_nb_nopers(cs, my_lmsg_ctl_pointer, userbuf, - pack_len, eq_h, my_lmsg_ctl_pointer->nsends); - - /* - * signal ready flag - */ - my_ctl_pointer->flag = ready_flag; - - /* wait for a response from the client */ - mca_bcol_basesmuma_portals_wait_event_nopers(eq_h, POST_MSG_EVENT, - &event, my_lmsg_ctl_pointer->nsends); - - /* free the event queue */ - ret = PtlEQFree(eq_h); - if (ret != PTL_OK) { - fprintf(stderr, "PtlEQFree() failed: %d )\n",ret); - } - - /* root is finished */ - goto Release; - } - - /* If I am not the root, then poll on possible "senders'" control structs */ - for( i = 0; i < cs->num_to_probe && 0 == matched; i++) { - - /* Shared memory iprobe */ - /* - BCOL_BASESMUMA_SM_PROBE(bcol_module->src, bcol_module->src_size, - my_rank, matched, src); - */ - do { - int j, n_src, my_index; - n_src = bcol_module->src_size; - - for( j = 0; j < n_src; j++) { - parent_ctl_pointer = data_buffs[bcol_module->src[j]].ctl_struct; - parent_lmsg_ctl_pointer = (mca_bcol_basesmuma_portal_buf_addr_t *) - data_buffs[bcol_module->src[j]].payload; - if (IS_DATA_READY(parent_ctl_pointer,ready_flag,sequence_number)) { - - src = bcol_module->src[j]; - matched = 1; - break; - } - } - } while(0); - - } - - /* If not matched, then hop out and put me on progress list */ - if(0 == matched ) { - BASESMUMA_VERBOSE(10,("Shared memory probe didn't find a match")); - return BCOL_FN_NOT_STARTED; - } - - /* else, we found our root within the group ... */ - BASESMUMA_VERBOSE(10,("Shared memory probe was matched, the root is %d", src)); - - /* receive the data from sender */ - /* get the data buff */ - /* taken care of in the macro */ - /*parent_data_pointer = data_buffs[src].payload;*/ - /* copy the data */ - mca_bcol_basesmuma_portals_get_msg(cs, parent_lmsg_ctl_pointer, userbuf, pack_len); - - /* set the memory barrier to ensure completion */ - opal_atomic_wmb (); - /* signal that I am done */ - my_ctl_pointer->flag = ready_flag; - - /* am I the last one? If so, release buffer */ - -Release: - my_ctl_pointer->starting_flag_value++; - - return BCOL_FN_COMPLETE; -#endif -} - -#if 0 - -#define BASESMUMA_K_NOMIAL_SEND_SIGNAL(radix_mask, radix, my_relative_index, \ - my_group_index, group_size,sm_data_buffs,sender_ready_flag, \ - num_pending_sends) \ -{ \ - int k, rc; \ - int dst; \ - int comm_dst; \ - volatile mca_bcol_basesmuma_header_t *recv_ctl_pointer = NULL; \ - volatile mca_bcol_basesmuma_portal_buf_addr_t *recv_lmsg_ctl_pointer = NULL; \ - \ - num_pending_sends = 0; \ - while(radix_mask > 0) { \ - /* For each level of tree, do sends */ \ - for (k = 1; \ - k < radix && my_relative_index + radix_mask * k < group_size; \ - ++k) { \ - \ - dst = my_group_index + radix_mask * k; \ - if (dst >= group_size) { \ - dst -= group_size; \ - } \ - /* Signal the children to get data */ \ - recv_ctl_pointer = data_buffs[dst].ctl; \ - recv_lmsg_ctl_pointer = (mca_bcol_basesmuma_portal_buf_addr_t *) \ - data_buffs[dst].payload; \ - recv_lmsg_ctl_pointer->src_index = my_group_index; \ - recv_lmsg_ctl_pointer->flag = sender_ready_flag; \ - ++num_pending_sends; \ - } \ - radix_mask /= radix; \ - } \ - \ -} - - - -int bcol_basesmuma_lmsg_bcast_k_nomial_anyroot(bcol_function_args_t *input_args, - mca_bcol_base_function_t *c_input_args) -{ - /* local variables */ - mca_bcol_basesmuma_module_t* bcol_module= - (mca_bcol_basesmuma_module_t *)c_input_args->bcol_module; - mca_bcol_basesmuma_component_t *cs = &mca_bcol_basesmuma_component; - int i, matched = 0; - int src=-1; - int group_size; - int my_rank, first_instance=0, flag_offset; - int rc = OMPI_SUCCESS; - int leading_dim, buff_idx, idx; - int count=input_args->count; - struct ompi_datatype_t* dtype=input_args->dtype; - int64_t sequence_number=input_args->sequence_num; - - volatile int64_t ready_flag; - volatile mca_bcol_basesmuma_payload_t *data_buffs; - volatile char* parent_data_pointer; - volatile mca_bcol_basesmuma_header_t *parent_ctl_pointer; - volatile mca_bcol_basesmuma_header_t *my_ctl_pointer; - void *userbuf = (void *)((unsigned char *)input_args->userbuf); - - size_t pack_len = 0, dt_size; - - struct mca_bcol_basesmuma_portal_buf_addr_t *my_lmsg_ctl_pointer = NULL; - struct mca_bcol_basesmuma_portal_buf_addr_t *parent_lmsg_ctl_pointer = NULL; - mca_bcol_basesmuma_portal_proc_info_t *portals_info; - portals_info = (mca_bcol_basesmuma_portal_proc_info_t*)cs->portals_info; - - /* we will work only on packed data - so compute the length*/ - ompi_datatype_type_size(dtype, &dt_size); - pack_len=count*dt_size; - buff_idx = input_args->src_desc->buffer_index; - - /* Get addressing information */ - my_rank = bcol_module->super.sbgp_partner_module->my_index; - group_size = bcol_module->colls_no_user_data.size_of_group; - leading_dim=bcol_module->colls_no_user_data.size_of_group; - idx=SM_ARRAY_INDEX(leading_dim,buff_idx,0); - - data_buffs=(volatile mca_bcol_basesmuma_payload_t *) - bcol_module->colls_with_user_data.data_buffs+idx; - - /* Set pointer to current proc ctrl region */ - my_ctl_pointer = data_buffs[my_rank].ctl_struct; - my_lmsg_ctl_pointer = (mca_bcol_basesmuma_portal_buf_addr_t*) data_buffs[my_rank].payload; - - /* setup resource recycling */ - if( my_ctl_pointer->sequence_number < sequence_number ) { - first_instance=1; - } - - if( first_instance ) { - /* Signal arrival */ - my_ctl_pointer->flag = -1; - my_ctl_pointer->index=1; - /* this does not need to use any flag values , so only need to - * set the value for subsequent values that may need this */ - my_ctl_pointer->starting_flag_value=0; - flag_offset=0; - - } else { - /* only one thread at a time will be making progress on this - * collective, so no need to make this atomic */ - my_ctl_pointer->index++; - } - - - /* increment the starting flag by one and return */ - flag_offset = my_ctl_pointer->starting_flag_value; - ready_flag = flag_offset + sequence_number + 1; - my_ctl_pointer->sequence_number = sequence_number; - - - /* Construct my portal buffer address and copy to payload buffer */ - mca_bcol_basesmuma_construct_portal_address(my_lmsg_ctl_pointer, - portals_info->portal_id.nid, - portals_info->portal_id.pid, - sequence_number, - bcol_module->super.sbgp_partner_module->group_comm->c_contextid); - - my_lmsg_ctl_pointer->userbuf = userbuff; - my_lsmg_ctl_pointer->userbuf_length = fragment_length; - /* create an event queue */ - ret = PtlEQAlloc(((mca_bcol_basesmuma_portal_proc_info_t*) - cs->portals_info)->ni_h, MAX_PORTAL_EVENTS_IN_Q, PTL_EQ_HANDLER_NONE, &eq_h); - - /* non-blocking broadcast algorithm */ - - /* If I am the root, then signal ready flag */ - if(input_args->root_flag) { - ptl_handle_eq_t eq_h; - ptl_event_t event; - int ret; - int root_radix_mask = sm_module->pow_knum; - - BASESMUMA_VERBOSE(10,("I am the root of the data")); - - - if (ret != PTL_OK) { - fprintf(stderr, "PtlEQAlloc() failed: %d \n",ret); - return OMPI_ERR_OUT_OF_RESOURCE; - } - - BASESMUMA_K_NOMIAL_SEND_SIGNAL(root_radix_mask, radix, 0, - my_rank, group_size, data_buffs, ready_flag, nsends) ; - - mca_bcol_basesmuma_portals_post_msg_nb_nopers(cs, my_lmsg_ctl_pointer, userbuf, - pack_len, eq_h, nsends); - - /* wait for a response from the client */ - mca_bcol_basesmuma_portals_wait_event_nopers(eq_h, POST_MSG_EVENT, - &event, nsends); - - /* root is finished */ - goto Release; - } - - /* Im not a root so wait until someone puts data and - * compute where to get data from */ - - while (my_ctl_pointer->flag != ready_flag) ; - - my_data_source_index = lmsg_ctl_pointer->src_index; - - parent_lmsg_ctl_pointer = (mca_bcol_basesmuma_portal_buf_addr_t *) - data_buffs[my_data_source_index].payload; - - mca_bcol_basesmuma_portals_get_msg(cs, parent_lmsg_ctl_pointer, userbuf, pack_len); - - - - - /* I am done getting data, should I send the data to someone */ - - my_relative_index = (my_rank - my_data_source_index) < 0 ? my_rank - - my_data_source_index + group_size : my_rank - my_data_source_index; - - /* - * 2. Locate myself in the tree: - * calculate number of radix steps that we should to take - */ - radix_mask = 1; - while (radix_mask < group_size) { - if (0 != my_relative_index % (radix * radix_mask)) { - /* I found my level in tree */ - break; - } - radix_mask *= radix; - } - - /* go one step back */ - radix_mask /=radix; - - BASESMUMA_K_NOMIAL_SEND_SIGNAL(radix_mask, radix, my_relative_index, - my_rank, group_size,data_buffs,ready_flag,nsends) - - mca_bcol_basesmuma_portals_post_msg_nb_nopers(cs, my_lmsg_ctl_pointer, userbuf, - pack_len, eq_h, nsends); - - /* wait for childrens to read */ - mca_bcol_basesmuma_portals_wait_event_nopers(eq_h, POST_MSG_EVENT, - &event, nsends); - - - -Release: - /* free the event queue */ - ret = PtlEQFree(eq_h); - if (ret != PTL_OK) { - fprintf(stderr, "PtlEQFree() failed: %d )\n",ret); - } - - - my_ctl_pointer->starting_flag_value++; - - return BCOL_FN_COMPLETE; -} - -#endif -#endif diff --git a/ompi/mca/bcol/basesmuma/bcol_basesmuma_mem_mgmt.c b/ompi/mca/bcol/basesmuma/bcol_basesmuma_mem_mgmt.c deleted file mode 100644 index eff6697ec26..00000000000 --- a/ompi/mca/bcol/basesmuma/bcol_basesmuma_mem_mgmt.c +++ /dev/null @@ -1,101 +0,0 @@ -/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ -/* - * Copyright (c) 2009-2012 Oak Ridge National Laboratory. All rights reserved. - * Copyright (c) 2009-2012 Mellanox Technologies. All rights reserved. - * Copyright (c) 2014 Los Alamos National Security, LLC. All rights - * reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#include "ompi_config.h" -#include "bcol_basesmuma.h" - - -/* Shared memory registration function: Calls into the "shared memory - connection manager" (aka - smcm) and registers a chunk of memory by - opening and mmaping a file. - - @input: - - void *reg_data - shared memory specific data needed by the registration - function. - - void *base - pointer to memory address. - - size_t size - size of memory chunk to be registered with sm. - - mca_mpool_base_registration_t *reg - registration data is cached here. - - @output: - - returns OMPI_SUCCESS on successful registration. - - returns OMPI_ERROR on failure. - -*/ - -int mca_bcol_basesmuma_register_sm(void *context_data, void *base, size_t size, - void **reg_desc) -{ - - /* local variables */ - int ret = OMPI_SUCCESS; - mca_bcol_basesmuma_component_t *cs = &mca_bcol_basesmuma_component; - bcol_basesmuma_registration_data_t *sm_reg = - (bcol_basesmuma_registration_data_t*) context_data; - - /* cache some info on sm_reg aka "context_data", you'll need it later */ - sm_reg->base_addr = base; - sm_reg->size = size; - - /* call into the shared memory registration function in smcm - * we need to be sure that the memory is page aligned in order - * to "map_fixed" - */ - sm_reg->sm_mmap = bcol_basesmuma_smcm_mem_reg(base, size, - sm_reg->data_seg_alignment, - sm_reg->file_name); - if(NULL == sm_reg->sm_mmap) { - opal_output (ompi_bcol_base_framework.framework_output, "Bcol_basesmuma memory registration error"); - return OMPI_ERROR; - } - - /* don't let other communicators re-register me! */ - cs->mpool_inited = true; - /* alias back to component */ - cs->sm_payload_structs = sm_reg->sm_mmap; - - return ret; -} - -/* Shared memory deregistration function - deregisters memory by munmapping it and removing the - shared memory file. - - Basic steps (please let me know if this is incompatible with your notion of deregistration - or if it causes problems on cleanup): - - 1. munmap the shared memory file. - 2. set the base pointer to the mmaped memory to NULL. - 3. permanently remove the shared memory file from the directory. - -*/ - -int mca_bcol_basesmuma_deregister_sm(void *context_data, void *reg) -{ - /* local variables */ - bcol_basesmuma_registration_data_t *sm_reg = - (bcol_basesmuma_registration_data_t*) context_data; - - if (sm_reg->sm_mmap) { - OBJ_RELEASE(sm_reg->sm_mmap); - } - - /* set the pointer to NULL */ - sm_reg->base_addr = NULL; - - return OMPI_SUCCESS; -} diff --git a/ompi/mca/bcol/basesmuma/bcol_basesmuma_module.c b/ompi/mca/bcol/basesmuma/bcol_basesmuma_module.c deleted file mode 100644 index 8770689ed20..00000000000 --- a/ompi/mca/bcol/basesmuma/bcol_basesmuma_module.c +++ /dev/null @@ -1,687 +0,0 @@ -/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ -/* - * Copyright (c) 2009-2012 Oak Ridge National Laboratory. All rights reserved. - * Copyright (c) 2009-2012 Mellanox Technologies. All rights reserved. - * Copyright (c) 2012-2014 Los Alamos National Security, LLC. All rights - * reserved. - * Copyright (c) 2014-2015 Research Organization for Information Science - * and Technology (RIST). All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -/** - * @file - * - */ - -#include "ompi_config.h" -#include "ompi/constants.h" -#include "ompi/communicator/communicator.h" -#include "ompi/mca/bcol/bcol.h" -#include "ompi/mca/bcol/base/base.h" -#include "ompi/patterns/net/netpatterns.h" - -#include "opal/util/show_help.h" -#include "opal/align.h" - -#include "ompi/mca/bcol/basesmuma/bcol_basesmuma_reduce.h" -#include "bcol_basesmuma.h" -#include "bcol_basesmuma_utils.h" - -#ifdef __PORTALS_AVAIL__ -#include "bcol_basesmuma_portals.h" -#endif - - -/* - * Local functions - */ -static int alloc_lmsg_reduce_offsets_array(mca_bcol_basesmuma_module_t *sm_module) -{ - int rc = OMPI_SUCCESS, i = 0; - netpatterns_k_exchange_node_t *k_node = &sm_module->knomial_exchange_tree; - int n_exchanges = k_node->n_exchanges; - - /* Precalculate the allreduce offsets */ - if (0 < k_node->n_exchanges) { - sm_module->reduce_offsets = (int **)malloc(n_exchanges * sizeof(int*)); - - if (!sm_module->reduce_offsets) { - rc = OMPI_ERROR; - return rc; - } - - for (i=0; i < n_exchanges ; i++) { - sm_module->reduce_offsets[i] = (int *)malloc (sizeof(int) * NOFFSETS); - - if (!sm_module->reduce_offsets[i]){ - rc = OMPI_ERROR; - return rc; - } - } - } - return rc; -} - -static int free_lmsg_reduce_offsets_array(mca_bcol_basesmuma_module_t *sm_module) -{ - int rc = OMPI_SUCCESS, i = 0; - netpatterns_k_exchange_node_t *k_node = &sm_module->knomial_exchange_tree; - int n_exchanges = k_node->n_exchanges; - - if (sm_module->reduce_offsets) { - for (i=0; i < n_exchanges; i++) { - free (sm_module->reduce_offsets[i]); - } - - free(sm_module->reduce_offsets); - } - return rc; -} - -static void -mca_bcol_basesmuma_module_construct(mca_bcol_basesmuma_module_t *module) -{ - /* initialize all values to 0 */ - memset((void*)((uintptr_t) module + sizeof (module->super)), 0, sizeof (*module) - sizeof (module->super)); - module->super.bcol_component = (mca_bcol_base_component_t *) &mca_bcol_basesmuma_component; - module->super.list_n_connected = NULL; - module->super.hier_scather_offset = 0; -} - -static void -mca_bcol_basesmuma_module_destruct(mca_bcol_basesmuma_module_t *sm_module) -{ - /* local variables */ - mca_sbgp_base_module_t *sbgp_module = sm_module->super.sbgp_partner_module; - mca_bcol_basesmuma_component_t *cs = &mca_bcol_basesmuma_component; - - /* - * release allocated resrouces - */ - - /* ...but not until you're sure you have no outstanding collectives */ - while(0 != opal_list_get_size(&(cs->nb_admin_barriers))) { - opal_progress(); - } - -#ifdef __PORTALS_AVAIL__ - /* Remove portals bcast specific resources */ - if ( PTL_OK != PtlEQFree(sm_module->sg_state.read_eq)) { - BASESMUMA_VERBOSE(10,("PtlEQFree() failed: )")); - } -#endif - - /* Remove Lmsg Reduce Offsets Array */ - free_lmsg_reduce_offsets_array(sm_module); - - /* collective topology data */ - if( sm_module->fanout_read_tree) { - for (int i = 0 ; i < sm_module->super.size_of_subgroup ; i++ ) { - if(0 < sm_module->fanout_read_tree[i].n_children ) { - free(sm_module->fanout_read_tree[i].children_ranks); - sm_module->fanout_read_tree[i].children_ranks=NULL; - } - } - free(sm_module->fanout_read_tree); - sm_module->fanout_read_tree=NULL; - } - - /* gvm Leak FIX Reduction_tree[].children_ranks has - * to be removed. I don't how to get the size (which is - * size of subgroup) of array reduction_tree - */ - if( sm_module->reduction_tree) { - for (int i = 0 ; i < sm_module->super.size_of_subgroup ; i++ ) { - if(0 < sm_module->reduction_tree[i].n_children ) { - free(sm_module->reduction_tree[i].children_ranks); - sm_module->reduction_tree[i].children_ranks=NULL; - } - } - free(sm_module->reduction_tree); - sm_module->reduction_tree=NULL; - } - - /* gvm Leak FIX */ - if (sm_module->fanout_node.children_ranks){ - free(sm_module->fanout_node.children_ranks); - sm_module->fanout_node.children_ranks = NULL; - } - - if (sm_module->fanin_node.children_ranks){ - free(sm_module->fanin_node.children_ranks); - sm_module->fanin_node.children_ranks = NULL; - } - - /* colls_no_user_data resrouces */ - if(sm_module->colls_no_user_data.ctl_buffs_mgmt){ - free(sm_module->colls_no_user_data.ctl_buffs_mgmt); - sm_module->colls_no_user_data.ctl_buffs_mgmt=NULL; - } - if(sm_module->colls_no_user_data.ctl_buffs){ - free(sm_module->colls_no_user_data.ctl_buffs); - sm_module->colls_no_user_data.ctl_buffs=NULL; - } - - /* return control */ - opal_list_append (&cs->ctl_structures, (opal_list_item_t *) sm_module->no_userdata_ctl); - - /* colls_with_user_data resrouces */ - /* - *debug print */ - /* - fprintf(stderr,"AAA colls_with_user_data.ctl_buffs %p \n", - sm_module->colls_with_user_data.ctl_buffs_mgmt); - end debug */ - - if(sm_module->colls_with_user_data.ctl_buffs_mgmt){ - free(sm_module->colls_with_user_data.ctl_buffs_mgmt); - sm_module->colls_with_user_data.ctl_buffs_mgmt=NULL; - } - if(sm_module->colls_with_user_data.ctl_buffs){ - free(sm_module->colls_with_user_data.ctl_buffs); - sm_module->colls_with_user_data.ctl_buffs=NULL; - } - - if(sm_module->shared_memory_scratch_space) { - free(sm_module->shared_memory_scratch_space); - sm_module->shared_memory_scratch_space=NULL; - } - - /* return control */ - opal_list_append (&cs->ctl_structures, (opal_list_item_t *) sm_module->userdata_ctl); - -#if 1 - if(sm_module->scatter_kary_tree) { - for (int i = 0 ; i < sm_module->super.size_of_subgroup ; i++ ) { - if(0 < sm_module->scatter_kary_tree[i].n_children) { - free(sm_module->scatter_kary_tree[i].children_ranks); - sm_module->scatter_kary_tree[i].children_ranks=NULL; - } - } - free(sm_module->scatter_kary_tree); - } -#endif - - if(NULL != sm_module->super.list_n_connected ){ - free(sm_module->super.list_n_connected); - sm_module->super.list_n_connected = NULL; - } - - cleanup_nb_coll_buff_desc(&sm_module->ml_mem.nb_coll_desc, - sm_module->ml_mem.num_banks, - sm_module->ml_mem.num_buffers_per_bank); - - for (int i = 0; i < BCOL_NUM_OF_FUNCTIONS; i++){ - /* gvm FIX: Go through the list and destroy each item */ - /* Destroy the function table object for each bcol type list */ - OPAL_LIST_DESTRUCT((&sm_module->super.bcol_fns_table[i])); - } - - if (NULL != sm_module->payload_backing_files_info) { - bcol_basesmuma_smcm_release_connections (sm_module, sbgp_module, &cs->sm_connections_list, - &sm_module->payload_backing_files_info); - } - - if (NULL != sm_module->ctl_backing_files_info) { - bcol_basesmuma_smcm_release_connections (sm_module, sbgp_module, &cs->sm_connections_list, - &sm_module->ctl_backing_files_info); - } - - if (NULL != sm_module->ml_mem.bank_release_counter) { - free(sm_module->ml_mem.bank_release_counter); - sm_module->ml_mem.bank_release_counter = NULL; - } - - if (NULL != sm_module->colls_with_user_data.data_buffs) { - free((void *)sm_module->colls_with_user_data.data_buffs); - sm_module->colls_with_user_data.data_buffs = NULL; - } - - /* free the k-nomial allgather tree here */ - netpatterns_cleanup_recursive_knomial_allgather_tree_node(&sm_module->knomial_allgather_tree); - netpatterns_cleanup_recursive_doubling_tree_node(&sm_module->recursive_doubling_tree); - netpatterns_cleanup_recursive_knomial_tree_node(&sm_module->knomial_exchange_tree); - - /* done */ -} - -static void bcol_basesmuma_set_small_msg_thresholds(struct mca_bcol_base_module_t *super) -{ - mca_bcol_basesmuma_module_t *basesmuma_module = - (mca_bcol_basesmuma_module_t *) super; - - size_t basesmuma_offset = bcol_basesmuma_data_offset_calc(basesmuma_module); - - /* Set the Allreduce threshold, for Basesmuma it equals to ML buffer size - data offset */ - super->small_message_thresholds[BCOL_ALLREDUCE] = - basesmuma_module->ml_mem.ml_mem_desc->size_buffer - basesmuma_offset; - - /* Set the Bcast threshold, for Basesmuma it equals to ML buffer size - data offset */ - super->small_message_thresholds[BCOL_BCAST] = - basesmuma_module->ml_mem.ml_mem_desc->size_buffer - basesmuma_offset; - - /* Set the Gather threshold, for Basesmuma it equals to ML buffer size - data offset */ - super->small_message_thresholds[BCOL_GATHER] = - (basesmuma_module->ml_mem.ml_mem_desc->size_buffer - basesmuma_offset) / - ompi_comm_size(basesmuma_module->super.sbgp_partner_module->group_comm); - - /* Set the ALLgather threshold, for Basesmuma it equals to ML buffer size - data offset */ - super->small_message_thresholds[BCOL_ALLGATHER] = - (basesmuma_module->ml_mem.ml_mem_desc->size_buffer - basesmuma_offset) / - ompi_comm_size(basesmuma_module->super.sbgp_partner_module->group_comm); - - /* Set the Reduce threshold, for Basesmuma it equals to ML buffer size - data offset */ - super->small_message_thresholds[BCOL_REDUCE] = - basesmuma_module->ml_mem.ml_mem_desc->size_buffer - basesmuma_offset; - - /* Set the Scatter threshold, for Basesmuma it equals to ML buffer size - data offset */ - super->small_message_thresholds[BCOL_SCATTER] = - basesmuma_module->ml_mem.ml_mem_desc->size_buffer - basesmuma_offset; -} - -/* setup memory management and collective routines */ - -static void load_func(mca_bcol_base_module_t *super) -{ - int fnc; - - /* Loading memory management and collective functions */ - - for (fnc = 0; fnc < BCOL_NUM_OF_FUNCTIONS; fnc++) { - super->bcol_function_table[fnc] = NULL; - } - - /*super->bcol_function_table[BCOL_BARRIER] = bcol_basesmuma_recursive_double_barrier;*/ - -#ifdef __PORTALS_AVAIL__ - super->bcol_function_table[BCOL_BCAST] = bcol_basesmuma_lmsg_scatter_allgather_portals_bcast; - /* super->bcol_function_table[BCOL_BCAST] = - bcol_basesmuma_lmsg_bcast_k_nomial_anyroot; */ -#endif - - /*super->bcol_function_table[BCOL_BCAST] = bcol_basesmuma_bcast;*/ - /*super->bcol_function_table[BCOL_BCAST] = bcol_basesmuma_binary_scatter_allgather_segment;*/ - /*super->bcol_function_table[BCOL_BCAST] = bcol_basesmuma_bcast_k_nomial_anyroot;*/ - super->bcol_function_table[BCOL_BCAST] = bcol_basesmuma_bcast; -#ifdef __PORTALS_AVAIL__ - super->bcol_function_table[BCOL_BCAST] = - bcol_basesmuma_lmsg_scatter_allgather_portals_bcast; -#endif - /* super->bcol_function_table[BCOL_ALLREDUCE] = bcol_basesmuma_allreduce_intra_fanin_fanout; */ - super->bcol_function_table[BCOL_ALLREDUCE] = bcol_basesmuma_allreduce_intra_recursive_doubling; - super->bcol_function_table[BCOL_REDUCE] = bcol_basesmuma_reduce_intra_fanin_old; - /* memory management */ - super->bcol_memory_init = bcol_basesmuma_bank_init_opti; - - super->k_nomial_tree = bcol_basesmuma_setup_knomial_tree; - - /* Set thresholds */ - super->set_small_msg_thresholds = bcol_basesmuma_set_small_msg_thresholds; -} - -static void load_func_with_choices(mca_bcol_base_module_t *super) -{ - int fnc; - - /* Loading memory management and collective functions */ - - for (fnc=0; fnc < BCOL_NUM_OF_FUNCTIONS; fnc++) { - super->bcol_function_init_table[fnc] = NULL; - } - - super->bcol_function_init_table[BCOL_FANIN] = bcol_basesmuma_fanin_init; - super->bcol_function_init_table[BCOL_FANOUT] = bcol_basesmuma_fanout_init; - super->bcol_function_init_table[BCOL_BARRIER] = bcol_basesmuma_barrier_init; - - super->bcol_function_init_table[BCOL_BCAST] = bcol_basesmuma_bcast_init; - super->bcol_function_init_table[BCOL_ALLREDUCE] = bcol_basesmuma_allreduce_init; - super->bcol_function_init_table[BCOL_REDUCE] = bcol_basesmuma_reduce_init; - super->bcol_function_init_table[BCOL_GATHER] = bcol_basesmuma_gather_init; - super->bcol_function_init_table[BCOL_ALLGATHER] = bcol_basesmuma_allgather_init; - super->bcol_function_init_table[BCOL_SYNC] = bcol_basesmuma_memsync_init; - /* memory management */ - super->bcol_memory_init = bcol_basesmuma_bank_init_opti; - - super->k_nomial_tree = bcol_basesmuma_setup_knomial_tree; - -} - -static int load_recursive_knomial_info(mca_bcol_basesmuma_module_t - *sm_module) -{ - int rc = OMPI_SUCCESS; - rc = netpatterns_setup_recursive_knomial_tree_node(sm_module->super.sbgp_partner_module->group_size, - sm_module->super.sbgp_partner_module->my_index, - mca_bcol_basesmuma_component.k_nomial_radix, - &sm_module->knomial_exchange_tree); - return rc; -} - - -int bcol_basesmuma_setup_knomial_tree(mca_bcol_base_module_t *super) -{ - mca_bcol_basesmuma_module_t *sm_module = (mca_bcol_basesmuma_module_t *) super; - - return netpatterns_setup_recursive_knomial_allgather_tree_node(sm_module->super.sbgp_partner_module->group_size, - sm_module->super.sbgp_partner_module->my_index, - mca_bcol_basesmuma_component.k_nomial_radix, - super->list_n_connected, - &sm_module->knomial_allgather_tree); -} - - - - -/* query to see if the module is available for use on the given - * communicator, and if so, what it's priority is. This is where - * the backing shared-memory file is created. - */ -mca_bcol_base_module_t ** -mca_bcol_basesmuma_comm_query(mca_sbgp_base_module_t *module, int *num_modules) -{ - /* local variables */ - mca_bcol_base_module_t **sm_modules = NULL; - mca_bcol_basesmuma_module_t *sm_module; - bcol_basesmuma_registration_data_t *sm_reg_data; - int ret, my_rank, name_length; - char *name; - int i; - - int bcast_radix; - - mca_bcol_basesmuma_component_t *cs = &mca_bcol_basesmuma_component; - /*mca_base_component_list_item_t *hdl_cli = NULL;*/ - /*int hdl_num;*/ - - /* at this point I think there is only a sinle shared - memory bcol that we need to be concerned with */ - - /* No group, no modules */ - if (OPAL_UNLIKELY(NULL == module)) { - return NULL; - } - - /* allocate and initialize an sm_bcol module */ - sm_module = OBJ_NEW(mca_bcol_basesmuma_module_t); - - /* set the subgroup */ - sm_module->super.sbgp_partner_module=module; - - (*num_modules)=1; - cs->super.n_net_contexts = *num_modules; - sm_module->reduction_tree = NULL; - sm_module->fanout_read_tree = NULL; - - ret=netpatterns_setup_recursive_doubling_tree_node( - module->group_size,module->my_index, - &(sm_module->recursive_doubling_tree)); - if(OMPI_SUCCESS != ret) { - opal_output (ompi_bcol_base_framework.framework_output, "Error setting up recursive_doubling_tree \n"); - return NULL; - } - - /* setup the fanin tree - this is used only as part of a hierarchical - * barrier, so will set this up with rank 0 as the root */ - my_rank=module->my_index; - ret=netpatterns_setup_narray_tree(cs->radix_fanin, - my_rank,module->group_size,&(sm_module->fanin_node)); - if(OMPI_SUCCESS != ret) { - opal_output (ompi_bcol_base_framework.framework_output, "Error setting up fanin tree \n"); - return NULL; - } - - /* setup the fanout tree - this is used only as part of a hierarchical - * barrier, so will set this up with rank 0 as the root */ - ret=netpatterns_setup_narray_tree(cs->radix_fanout, - my_rank,module->group_size,&(sm_module->fanout_node)); - if(OMPI_SUCCESS != ret) { - opal_output (ompi_bcol_base_framework.framework_output, "Error setting up fanout tree \n"); - return NULL; - } - - /* - * Setup the broadcast tree - this is used only as part of a hierarchical - * bcast, so will set this up with rank 0 as the root. - */ - - /* set the radix of the bcast tree */ - bcast_radix = cs->radix_read_tree; - - /* initialize fan-out read tree */ - sm_module->fanout_read_tree=(netpatterns_tree_node_t*) malloc( - sizeof(netpatterns_tree_node_t)*module->group_size); - if( NULL == sm_module->fanout_read_tree ) { - goto Error; - } - - for(i = 0; i < module->group_size; i++){ - ret = netpatterns_setup_narray_tree(bcast_radix, - i, module->group_size, &(sm_module->fanout_read_tree[i])); - if(OMPI_SUCCESS != ret) { - goto Error; - } - } - - ret = load_recursive_knomial_info(sm_module); - if (OMPI_SUCCESS != ret) { - BASESMUMA_VERBOSE(10, ("Failed to load recursive knomial tree")); - goto Error; - } - - /* Allocate offsets array for lmsg reduce */ - ret = alloc_lmsg_reduce_offsets_array(sm_module); - if (OMPI_SUCCESS != ret) { - BASESMUMA_VERBOSE(10, ("Failed to allocate reduce offsets array")); - goto Error; - } - - /* initialize reduction tree */ - sm_module->reduction_tree=(netpatterns_tree_node_t *) malloc( - sizeof(netpatterns_tree_node_t )*module->group_size); - if( NULL == sm_module->reduction_tree ) { - goto Error; - } - - ret=netpatterns_setup_multinomial_tree( - cs->order_reduction_tree,module->group_size, - sm_module->reduction_tree); - if( MPI_SUCCESS != ret ) { - goto Error; - } - - /* get largest power of k for given group size */ - sm_module->pow_k_levels = pow_sm_k(cs->k_nomial_radix, - sm_module->super.sbgp_partner_module->group_size, - &(sm_module->pow_k)); - - /* get largest power of 2 for a given group size - * used in scatter allgather - */ - sm_module->pow_2_levels = pow_sm_k(2, - sm_module->super.sbgp_partner_module->group_size, - &(sm_module->pow_2)); - - /* - * setup scatter data - */ - sm_module->scatter_kary_radix=cs->scatter_kary_radix; - sm_module->scatter_kary_tree=NULL; - ret=netpatterns_setup_narray_tree_contigous_ranks( - sm_module->scatter_kary_radix, - sm_module->super.sbgp_partner_module->group_size, - &(sm_module->scatter_kary_tree)); - if(OMPI_SUCCESS != ret) { - opal_output (ompi_bcol_base_framework.framework_output, "In base_bcol_masesmuma_setup_library_buffers and scatter k-ary tree setup failed \n"); - return NULL; - } - - /* setup the module shared memory management */ - ret=base_bcol_basesmuma_setup_library_buffers(sm_module, cs); - - if(OMPI_SUCCESS != ret) { - opal_output (ompi_bcol_base_framework.framework_output, "In base_bcol_masesmuma_setup_library_buffers and mpool was not successfully setup!\n"); - return NULL; - } - - /* setup the collectives and memory management */ - - /* check to see whether or not the mpool has been inited */ - /* allocate some space for the network contexts */ - if(!cs->mpool_inited) { - /* if it's empty, then fill it for first time */ - cs->super.network_contexts = (bcol_base_network_context_t **) - malloc((cs->super.n_net_contexts)* - sizeof(bcol_base_network_context_t *)); - /* you need to do some basic setup - define the file name, - * set data seg alignment and size of cntl structure in sm - * file. - */ - /* give the payload sm file a name */ - name_length=asprintf(&name, - "%s"OPAL_PATH_SEP"0%s%0d", - ompi_process_info.job_session_dir, - cs->payload_base_fname, - (int)getpid()); - if( 0 > name_length ) { - opal_output (ompi_bcol_base_framework.framework_output, "Failed to assign the shared memory payload file a name\n"); - return NULL; - } - /* make sure name is not too long */ - if ( OPAL_PATH_MAX < (name_length-1) ) { - opal_output (ompi_bcol_base_framework.framework_output, "Shared memory file name is too long!\n"); - return NULL; - } - /* set the name and alignment characteristics */ - sm_reg_data = (bcol_basesmuma_registration_data_t *) malloc( - sizeof(bcol_basesmuma_registration_data_t)); - sm_reg_data->file_name = name; - - sm_reg_data->data_seg_alignment = getpagesize(); - sm_reg_data->size_ctl_structure = 0; - cs->super.network_contexts[0] = (bcol_base_network_context_t *) - malloc(sizeof(bcol_base_network_context_t)); - cs->super.network_contexts[0]->context_data = - (void *) sm_reg_data; - cs->super.network_contexts[0]-> - register_memory_fn = mca_bcol_basesmuma_register_sm; - cs->super.network_contexts[0]-> - deregister_memory_fn = mca_bcol_basesmuma_deregister_sm; - sm_module->super.network_context = cs->super.network_contexts[0]; - } else { - - sm_module->super.network_context = cs->super.network_contexts[0]; - } - - /* Set the header size */ - sm_module->super.header_size = sizeof(mca_bcol_basesmuma_header_t); - - /*initialize the hdl module if it's to be enabled*/ -#if 0 - if (module->use_hdl) { - sm_module->super.use_hdl = module->use_hdl; - hdl_cli = (mca_base_component_list_item_t *) - opal_list_get_first(&mca_hdl_base_components_in_use); - sm_module->hdl_module = ((mca_hdl_base_component_t*) - hdl_cli->cli_component)->hdl_comm_query(sm_module, &hdl_num); - if (1 != hdl_num || sm_module->hdl_module == NULL) { - ML_ERROR(("hdl modules are not successfully initialized!\n")); - goto Error; - } - } else { - sm_module->hdl_module = NULL; - } -#else - sm_module->hdl_module = NULL; -#endif - - - /* collective setup */ - load_func(&(sm_module->super)); - load_func_with_choices(&(sm_module->super)); - - /* - * This initializes all collective algorithms - */ - - ret = mca_bcol_base_bcol_fns_table_init(&(sm_module->super)); - - if (OMPI_SUCCESS != ret) { - - goto Error; - } - - sm_module->super.supported_mode = 0; - - /* NTH: this is not set anywhere on the trunk as of 08/13/13 */ -#if 0 - if (module->use_hdl) { - sm_module->super.supported_mode = MCA_BCOL_BASE_ZERO_COPY; - } -#endif - - /* Initializes portals library required for basesmuma large message */ -#ifdef __PORTALS_AVAIL__ - /* Enable zero copy mode */ - sm_module->super.supported_mode = MCA_BCOL_BASE_ZERO_COPY; - - ret = mca_bcol_basesmuma_portals_init(cs); - if (OMPI_SUCCESS != ret) { - return NULL; - } - - sm_module->sg_state.phase = INIT; - - ret = PtlEQAlloc(((mca_bcol_basesmuma_portal_proc_info_t*) - cs->portals_info)->ni_h, MAX_PORTAL_EVENTS_IN_Q, - PTL_EQ_HANDLER_NONE, &sm_module->sg_state.read_eq); - - if (ret != PTL_OK) { - BASESMUMA_VERBOSE(10,( "PtlEQAlloc() failed: %d",ret)); - return NULL; - } - -#endif - /* blocking recursive double barrier test */ - /* - { - opal_output (ompi_bcol_base_framework.framework_output, "BBB About to hit the barrier test\n"); - int rc; - bcol_function_args_t bogus; - rc = bcol_basesmuma_rd_barrier_init(&(sm_module->super)); - rc = bcol_basesmuma_recursive_double_barrier( - &bogus, &(sm_module->super)); - } - */ - - /* in this case we only expect a single network context. - in the future we should loop around this */ - sm_modules = (mca_bcol_base_module_t **) malloc(sizeof(mca_bcol_base_module_t *)); - if( !sm_modules ) { - opal_output (ompi_bcol_base_framework.framework_output, "In base_bcol_masesmuma_setup_library_buffers failed to allocate memory for sm_modules\n"); - return NULL; - } - - sm_modules[0] = &(sm_module->super); - - return sm_modules; - - Error: - - /* cleanup */ - if( sm_module->reduction_tree ) { - free(sm_module->reduction_tree); - sm_module->reduction_tree=NULL; - } - - return NULL; -} - -OBJ_CLASS_INSTANCE(mca_bcol_basesmuma_module_t, - mca_bcol_base_module_t, - mca_bcol_basesmuma_module_construct, - mca_bcol_basesmuma_module_destruct); diff --git a/ompi/mca/bcol/basesmuma/bcol_basesmuma_progress.c b/ompi/mca/bcol/basesmuma/bcol_basesmuma_progress.c deleted file mode 100644 index 77263f94800..00000000000 --- a/ompi/mca/bcol/basesmuma/bcol_basesmuma_progress.c +++ /dev/null @@ -1,74 +0,0 @@ -/* - * Copyright (c) 2009-2012 Oak Ridge National Laboratory. All rights reserved. - * Copyright (c) 2009-2012 Mellanox Technologies. All rights reserved. - * Copyright (c) 2014 Los Alamos National Security, LLC. All rights - * reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -/** - * @file - * - */ - -#include "ompi_config.h" -#include "ompi/constants.h" - -#include "bcol_basesmuma.h" - -/* the progress function to be called from the opal progress function - */ -int bcol_basesmuma_progress(void) -{ - /* local variables */ - volatile int32_t *cntr; - mca_bcol_basesmuma_component_t *cs = &mca_bcol_basesmuma_component; - - /* check to see if release of memory blocks needs to be done */ - if( opal_list_get_size(&(cs->nb_admin_barriers)) ) { - sm_nbbar_desc_t *item_ptr; - opal_list_t *list=&(cs->nb_admin_barriers); - /* process only if the list is non-empty */ - if( !OPAL_THREAD_TRYLOCK(&cs->nb_admin_barriers_mutex)) { - - for (item_ptr = (sm_nbbar_desc_t*) opal_list_get_first(list); - item_ptr != (sm_nbbar_desc_t*) opal_list_get_end(list); - item_ptr = (sm_nbbar_desc_t*) opal_list_get_next(item_ptr) ) - { - bcol_basesmuma_rd_nb_barrier_progress_admin(item_ptr); - /* check to see if an complete */ - if( NB_BARRIER_DONE == item_ptr->collective_phase ) { - /* barrier is complete - remove from the list. No need - * to put it on another list, as it is part of the memory - * bank control structure, and will be picked up - * again when needed. - */ - int index= - item_ptr->pool_index; - /* old way - ctl_struct specific */ - /* - volatile uint64_t *cntr= (volatile uint64_t *) - &(item_ptr->sm_module->colls_no_user_data. - ctl_buffs_mgmt[index].bank_gen_counter); - */ - - cntr= (volatile int32_t *) &(item_ptr->coll_buff-> - ctl_buffs_mgmt[index].bank_gen_counter); - item_ptr=(sm_nbbar_desc_t*)opal_list_remove_item((opal_list_t *)list, - ( opal_list_item_t *)item_ptr); - /* increment the generation number */ - OPAL_THREAD_ADD32(cntr,1); - } - } - - OPAL_THREAD_UNLOCK(&cs->nb_admin_barriers_mutex); - } - - } - return OMPI_SUCCESS; - -} diff --git a/ompi/mca/bcol/basesmuma/bcol_basesmuma_rd_barrier.c b/ompi/mca/bcol/basesmuma/bcol_basesmuma_rd_barrier.c deleted file mode 100644 index 9749491e9f3..00000000000 --- a/ompi/mca/bcol/basesmuma/bcol_basesmuma_rd_barrier.c +++ /dev/null @@ -1,218 +0,0 @@ -/* - * Copyright (c) 2009-2013 Oak Ridge National Laboratory. All rights reserved. - * Copyright (c) 2009-2012 Mellanox Technologies. All rights reserved. - * Copyright (c) 2013-2014 Los Alamos National Security, LLC. All rights - * reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -/* Recursive doubling blocking barrier */ - -#include "ompi_config.h" -#include "ompi/constants.h" -#include "ompi/communicator/communicator.h" -#include "ompi/mca/bcol/bcol.h" -#include "ompi/patterns/net/netpatterns.h" - -#include "opal/sys/atomic.h" - -#include "bcol_basesmuma.h" - -#if 0 -int bcol_basesmuma_recursive_double_barrier(bcol_function_args_t *input_args, - mca_bcol_base_function_t *c_input_args) -{ - - /* local variables */ - int ret=OMPI_SUCCESS, idx, leading_dim, loop_cnt, exchange, flag_to_set; - int pair_rank, flag_offset; - mca_bcol_basesmuma_ctl_struct_t **ctl_structs; - netpatterns_pair_exchange_node_t *my_exchange_node; - int extra_rank, my_rank, pow_2; - volatile mca_bcol_basesmuma_ctl_struct_t *partner_ctl; - volatile mca_bcol_basesmuma_ctl_struct_t *my_ctl; - int64_t sequence_number; - bool found; - int buff_index, first_instance=0; - mca_bcol_basesmuma_module_t* bcol_module = - (mca_bcol_basesmuma_module_t *)c_input_args->bcol_module; -#if 0 - fprintf(stderr,"Entering the sm rd barrier\n"); - fflush(stderr); -#endif - - /* get the pointer to the segment of control structures */ - my_exchange_node=&(bcol_module->recursive_doubling_tree); - my_rank=bcol_module->super.sbgp_partner_module->my_index; - pow_2=bcol_module->super.sbgp_partner_module->pow_2; - - /* figure out what instance of the basesmuma bcol I am */ - leading_dim=bcol_module->colls_no_user_data.size_of_group; - sequence_number=input_args->sequence_num - c_input_args->bcol_module->squence_number_offset; - - buff_index=sequence_number & (bcol_module->colls_no_user_data.mask); - - idx=SM_ARRAY_INDEX(leading_dim,buff_index,0); - ctl_structs=(mca_bcol_basesmuma_ctl_struct_t **) - bcol_module->colls_no_user_data.ctl_buffs+idx; - my_ctl=ctl_structs[my_rank]; - if( my_ctl->sequence_number < sequence_number ) { - first_instance=1; - } - - /* get the pool index */ - if( first_instance ) { - idx = -1; - while( idx == -1 ) { - - idx=bcol_basesmuma_get_buff_index( - &(bcol_module->colls_no_user_data),sequence_number); - } - if( -1 == idx ){ - return ORTE_ERR_TEMP_OUT_OF_RESOURCE; - } - my_ctl->index=1; - /* this does not need to use any flag values , so only need to - * set the value for subsequent values that may need this */ - my_ctl->starting_flag_value=0; - flag_offset=0; - } else { - /* only one thread at a time will be making progress on this - * collective, so no need to make this atomic */ - my_ctl->index++; - flag_offset=my_ctl->starting_flag_value; - } - - /* signal that I have arrived */ - my_ctl->flag = -1; - /* don't need to set this flag anymore */ - my_ctl->sequence_number = sequence_number; - /* opal_atomic_wmb ();*/ - - if(0 < my_exchange_node->n_extra_sources) { - if (EXCHANGE_NODE == my_exchange_node->node_type) { - volatile int64_t *partner_sn; - int cnt=0; - - /* I will participate in the exchange - wait for signal from extra - ** process */ - extra_rank = my_exchange_node->rank_extra_source; - partner_ctl=(volatile mca_bcol_basesmuma_ctl_struct_t *)ctl_structs[extra_rank]; - - /*partner_ctl=ctl_structs[extra_rank];*/ - partner_sn=(volatile int64_t *)&(partner_ctl->sequence_number); - - /* spin n iterations until partner registers */ - loop_cnt=0; - found=false; - while( !found ) - { - if( *partner_sn >= sequence_number ) { - found=true; - } - cnt++; - if( cnt == 1000 ) { - opal_progress(); - cnt=0; - } - } - - } else { - - /* Nothing to do, already registared that I am here */ - } - } - - for(exchange = 0; exchange < my_exchange_node->n_exchanges; exchange++) { - - volatile int64_t *partner_sn; - volatile int *partner_flag; - int cnt=0; - - /* rank of exchange partner */ - pair_rank = my_rank ^ ( 1 SHIFT_UP exchange ); - partner_ctl=ctl_structs[pair_rank]; - partner_sn=(volatile int64_t *)&(partner_ctl->sequence_number); - partner_flag=(volatile int *)&(partner_ctl->flag); - - /* signal that I am at iteration exchange of the algorithm */ - flag_to_set=flag_offset+exchange; - my_ctl->flag = flag_to_set; - - /* check to see if the partner has arrived */ - - /* spin n iterations until partner registers */ - found=false; - while( !found ) - { - if( (*partner_sn > sequence_number) || - ( *partner_sn == sequence_number && - *partner_flag >= flag_to_set ) ) { - found=true; - } else { - cnt++; - if( cnt == 1000 ) { - opal_progress(); - cnt=0; - } - } - } - } - - if(0 < my_exchange_node->n_extra_sources) { - if ( EXTRA_NODE == my_exchange_node->node_type ) { - int cnt=0; - - /* I will not participate in the exchange - - * wait for signal from extra partner */ - extra_rank = my_exchange_node->rank_extra_source; - partner_ctl=ctl_structs[extra_rank]; - flag_to_set=flag_offset+my_exchange_node->log_2; - - /* spin n iterations until partner registers */ - found=false; - while( !found ) - { - if (IS_PEER_READY(partner_ctl, flag_to_set, sequence_number)){ - found=true; - } else { - cnt++; - if( cnt == 1000 ) { - opal_progress(); - cnt=0; - } - } - } - - } else { - - /* signal the extra rank that I am done with the recursive - * doubling phase. - */ - flag_to_set=flag_offset+my_exchange_node->log_2; - my_ctl->flag = flag_to_set; - - } - } - - /* if I am the last instance of a basesmuma function in this collectie, - * release the resrouces */ - if (IS_LAST_BCOL_FUNC(c_input_args)){ - idx=bcol_basesmuma_free_buff( - &(bcol_module->colls_no_user_data), - sequence_number); - } else { - /* increment flag value - so next sm collective in the hierarchy - * will not collide with the current one, as they share the - * control structure */ - my_ctl->starting_flag_value+=(my_exchange_node->log_2+1); - } - - /* return */ - return ret; -} -#endif diff --git a/ompi/mca/bcol/basesmuma/bcol_basesmuma_rd_nb_barrier.c b/ompi/mca/bcol/basesmuma/bcol_basesmuma_rd_nb_barrier.c deleted file mode 100644 index 305a645870e..00000000000 --- a/ompi/mca/bcol/basesmuma/bcol_basesmuma_rd_nb_barrier.c +++ /dev/null @@ -1,462 +0,0 @@ -/* - * Copyright (c) 2009-2012 UT-Battelle, LLC. All rights reserved. - * Copyright (c) 2009-2012 Mellanox Technologies. All rights reserved. - * Copyright (c) 2013 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2014 Los Alamos National Security, LLC. All rights - * reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#include "ompi_config.h" -/* we need make cleanup with all these includes START */ -#include -#include - -#include "ompi/constants.h" -#include "ompi/mca/bcol/bcol.h" -#include "bcol_basesmuma.h" -#include "opal/sys/atomic.h" -#include "ompi/patterns/net/netpatterns.h" -#include "ompi/mca/bcol/base/base.h" - -/* - * Initialize nonblocking barrier. This is code specific for handling - * the recycling of data, and uses only a single set of control buffers. - * It also assumes that for a given process, only a single outstanding - * barrier operation will occur for a given control structure, - * with the sequence number being used for potential overlap in time - * between succesive barrier calls on different processes. - */ -int bcol_basesmuma_rd_nb_barrier_init_admin( - sm_nbbar_desc_t *sm_desc) - -{ - /* local variables */ - int ret=OMPI_SUCCESS, idx, leading_dim, loop_cnt, exchange; - int pair_rank; - mca_bcol_basesmuma_ctl_struct_t **ctl_structs; - netpatterns_pair_exchange_node_t *my_exchange_node; - int extra_rank, my_rank; - mca_bcol_basesmuma_ctl_struct_t volatile *partner_ctl; - mca_bcol_basesmuma_ctl_struct_t volatile *my_ctl; - int64_t bank_genaration; - bool found; - int pool_index=sm_desc->pool_index; - mca_bcol_basesmuma_module_t *bcol_module=sm_desc->sm_module; - - /* get the pointer to the segment of control structures */ - idx=sm_desc->coll_buff->number_of_buffs+pool_index; - leading_dim=sm_desc->coll_buff->size_of_group; - idx=SM_ARRAY_INDEX(leading_dim,idx,0); - ctl_structs=(mca_bcol_basesmuma_ctl_struct_t **) - sm_desc->coll_buff->ctl_buffs+idx; - bank_genaration= sm_desc->coll_buff->ctl_buffs_mgmt[pool_index].bank_gen_counter; - - my_exchange_node=&(bcol_module->recursive_doubling_tree); - my_rank=bcol_module->super.sbgp_partner_module->my_index; - my_ctl=ctl_structs[my_rank]; - /* debug print */ - /* - { - int ii; - for(ii = 0; ii < 6; ii++) { - fprintf(stderr,"UUU ctl_struct[%d] := %p\n",ii, - bcol_module->colls_no_user_data.ctl_buffs[ii]); - fflush(stderr); - } - } - */ - /* end debug */ - - /* signal that I have arrived */ - my_ctl->flag = -1; - - opal_atomic_wmb (); - - /* don't need to set this flag anymore */ - my_ctl->sequence_number = bank_genaration; - - if(0 < my_exchange_node->n_extra_sources) { - if (EXCHANGE_NODE == my_exchange_node->node_type) { - volatile int64_t *partner_sn; - /* I will participate in the exchange - wait for signal from extra - ** process */ - extra_rank = my_exchange_node->rank_extra_source; - partner_ctl=ctl_structs[extra_rank]; - partner_sn=(volatile int64_t *)&(partner_ctl->sequence_number); - - /* spin n iterations until partner registers */ - loop_cnt=0; - found=false; - while( loop_cnt < bcol_module->super.n_poll_loops ) - { - if( *partner_sn >= bank_genaration ) { - found=true; - break; - } - loop_cnt++; - } - if( !found ) { - /* set restart parameters */ - sm_desc->collective_phase=NB_PRE_PHASE; - return OMPI_SUCCESS; - } - - } else { - - /* Nothing to do, already registared that I am here */ - } - } - - for(exchange = 0; exchange < my_exchange_node->n_exchanges; exchange++) { - - volatile int64_t *partner_sn; - volatile int *partner_flag; - - /* rank of exchange partner */ - pair_rank = my_rank ^ ( 1 SHIFT_UP exchange ); - partner_ctl=ctl_structs[pair_rank]; - partner_sn=(volatile int64_t *)&(partner_ctl->sequence_number); - partner_flag=(volatile int *)&(partner_ctl->flag); - - /* signal that I am at iteration exchange of the algorithm */ - my_ctl->flag = exchange; - - /* check to see if the partner has arrived */ - - /* spin n iterations until partner registers */ - loop_cnt=0; - found=false; - while( loop_cnt < bcol_module->super.n_poll_loops ) - { - if( (*partner_sn > bank_genaration) || - ( *partner_sn == bank_genaration && - *partner_flag >= exchange ) ) { - found=true; - break; - } - - loop_cnt++; - - } - if( !found ) { - /* set restart parameters */ - sm_desc->collective_phase=NB_RECURSIVE_DOUBLING; - sm_desc->recursive_dbl_iteration=exchange; - return OMPI_SUCCESS; - } - - } - - if(0 < my_exchange_node->n_extra_sources) { - if ( EXTRA_NODE == my_exchange_node->node_type ) { - volatile int64_t *partner_sn; - volatile int *partner_flag; - - /* I will not participate in the exchange - - * wait for signal from extra partner */ - extra_rank = my_exchange_node->rank_extra_source; - partner_ctl=ctl_structs[extra_rank]; - partner_sn=(volatile int64_t *)&(partner_ctl->sequence_number); - partner_flag=(volatile int *)&(partner_ctl->flag); - - /* spin n iterations until partner registers */ - loop_cnt=0; - found=false; - while( loop_cnt < bcol_module->super.n_poll_loops ) - { - if( (*partner_sn > bank_genaration) || - ( (*partner_sn == bank_genaration) && - (*partner_flag == (my_exchange_node->log_2)) ) ) { - found=true; - break; - } - loop_cnt++; - } - if( !found ) { - /* set restart parameters */ - sm_desc->collective_phase=NB_POST_PHASE; - return OMPI_SUCCESS; - } - - } else { - - /* signal the extra rank that I am done with the recursive - * doubling phase. - */ - my_ctl->flag = my_exchange_node->n_exchanges; - - } - } - - /* set the barrier as complete */ - sm_desc->collective_phase=NB_BARRIER_DONE; - /* return */ - return ret; -} - -/* admin nonblocking barrier - progress function */ -int bcol_basesmuma_rd_nb_barrier_progress_admin( - sm_nbbar_desc_t *sm_desc) - -{ - /* local variables */ - int ret=OMPI_SUCCESS, idx, leading_dim, loop_cnt, exchange; - int pair_rank, start_index, restart_phase; - mca_bcol_basesmuma_ctl_struct_t **ctl_structs; - netpatterns_pair_exchange_node_t *my_exchange_node; - int extra_rank, my_rank; - mca_bcol_basesmuma_ctl_struct_t volatile *partner_ctl; - mca_bcol_basesmuma_ctl_struct_t volatile *my_ctl; - int64_t bank_genaration; - int pool_index=sm_desc->pool_index; - bool found; - mca_bcol_basesmuma_module_t *bcol_module=sm_desc->sm_module; - - /* get the pointer to the segment of control structures */ - idx = sm_desc->coll_buff->number_of_buffs+pool_index; - leading_dim = sm_desc->coll_buff->size_of_group; - idx = SM_ARRAY_INDEX(leading_dim,idx,0); - ctl_structs = (mca_bcol_basesmuma_ctl_struct_t **) - sm_desc->coll_buff->ctl_buffs+idx; - bank_genaration = sm_desc->coll_buff->ctl_buffs_mgmt[pool_index].bank_gen_counter; - - my_exchange_node=&(bcol_module->recursive_doubling_tree); - my_rank=bcol_module->super.sbgp_partner_module->my_index; - my_ctl=ctl_structs[my_rank]; - - /* check to make sure that this should be progressed */ - if( ( sm_desc->collective_phase == NB_BARRIER_INACTIVE ) || - ( sm_desc->collective_phase == NB_BARRIER_DONE ) ) - { - return OMPI_SUCCESS; - } - - /* set the restart up - and jump to the correct place in the algorithm */ - restart_phase=sm_desc->collective_phase; - if ( NB_PRE_PHASE == restart_phase ) { - start_index=0; - } else if ( NB_RECURSIVE_DOUBLING == restart_phase ) { - start_index=sm_desc->recursive_dbl_iteration; - goto Exchange_phase; - } else { - goto Post_phase; - } - - if(0 < my_exchange_node->n_extra_sources) { - if (EXCHANGE_NODE == my_exchange_node->node_type) { - volatile int64_t *partner_sn; - /* I will participate in the exchange - wait for signal from extra - ** process */ - extra_rank = my_exchange_node->rank_extra_source; - partner_ctl=ctl_structs[extra_rank]; - partner_sn=(volatile int64_t *)&(partner_ctl->sequence_number); - - /* spin n iterations until partner registers */ - loop_cnt=0; - while( loop_cnt < bcol_module->super.n_poll_loops ) - { - found=false; - if( *partner_sn >= bank_genaration ) { - found=true; - break; - } - loop_cnt++; - } - if( !found ) { - /* set restart parameters */ - sm_desc->collective_phase=NB_PRE_PHASE; - return OMPI_SUCCESS; - } - - } else { - - /* Nothing to do, already registared that I am here */ - } - } - -Exchange_phase: - - for(exchange = start_index; - exchange < my_exchange_node->n_exchanges; exchange++) { - - volatile int64_t *partner_sn; - volatile int *partner_flag; - - /* rank of exchange partner */ - pair_rank = my_rank ^ ( 1 SHIFT_UP exchange ); - partner_ctl=ctl_structs[pair_rank]; - partner_sn=(volatile int64_t *)&(partner_ctl->sequence_number); - partner_flag=(volatile int *)&(partner_ctl->flag); - - /* signal that I am at iteration exchange of the algorithm */ - my_ctl->flag = exchange; - - /* check to see if the partner has arrived */ - - /* spin n iterations until partner registers */ - loop_cnt=0; - found=false; - while( loop_cnt < bcol_module->super.n_poll_loops ) - { - if( (*partner_sn > bank_genaration) || - ( (*partner_sn == bank_genaration) && - (*partner_flag >= exchange) ) ) { - found=true; - break; - } - loop_cnt++; - } - if( !found ) { - /* set restart parameters */ - sm_desc->collective_phase=NB_RECURSIVE_DOUBLING; - sm_desc->recursive_dbl_iteration=exchange; - return OMPI_SUCCESS; - } - - } - -Post_phase: - if(0 < my_exchange_node->n_extra_sources) { - if ( EXTRA_NODE == my_exchange_node->node_type ) { - volatile int64_t *partner_sn; - volatile int *partner_flag; - - /* I will not participate in the exchange - - * wait for signal from extra partner */ - extra_rank = my_exchange_node->rank_extra_source; - partner_ctl=ctl_structs[extra_rank]; - partner_sn=(volatile int64_t *)&(partner_ctl->sequence_number); - partner_flag=(volatile int *)&(partner_ctl->flag); - - /* spin n iterations until partner registers */ - loop_cnt=0; - found=false; - while( loop_cnt < bcol_module->super.n_poll_loops ) - { - if( (*partner_sn > bank_genaration) || - ( *partner_sn == bank_genaration && - *partner_flag == (my_exchange_node->log_2) ) ) { - found=true; - break; - } - loop_cnt++; - } - if( !found ) { - /* set restart parameters */ - sm_desc->collective_phase=NB_POST_PHASE; - return OMPI_SUCCESS; - } - - } else { - - /* signal the extra rank that I am done with the recursive - * doubling phase. - */ - my_ctl->flag = my_exchange_node->n_exchanges; - - } - } - - /* set the barrier as complete */ - sm_desc->collective_phase=NB_BARRIER_DONE; - - /* return */ - return ret; -} - -static int bcol_basesmuma_memsync(bcol_function_args_t *input_args, - mca_bcol_base_function_t *c_input_args) -{ - int rc; - int memory_bank = input_args->root; - - mca_bcol_basesmuma_module_t* bcol_module = - (mca_bcol_basesmuma_module_t *)c_input_args->bcol_module; - sm_buffer_mgmt *buff_block = &(bcol_module->colls_with_user_data); - sm_nbbar_desc_t *sm_desc = &(buff_block->ctl_buffs_mgmt[memory_bank].nb_barrier_desc); - - sm_desc->coll_buff = buff_block; - /* - printf("XXX SYNC call\n"); - */ - - rc = bcol_basesmuma_rd_nb_barrier_init_admin( - sm_desc); - if (OMPI_SUCCESS != rc) { - return rc; - } - - if (NB_BARRIER_DONE != sm_desc->collective_phase) { - mca_bcol_basesmuma_component_t *cs = &mca_bcol_basesmuma_component; - opal_list_t *list=&(cs->nb_admin_barriers); - opal_list_item_t *append_item; - - /* put this onto the progression list */ - OPAL_THREAD_LOCK(&(cs->nb_admin_barriers_mutex)); - append_item=(opal_list_item_t *) - &(buff_block->ctl_buffs_mgmt[memory_bank].nb_barrier_desc); - opal_list_append(list,append_item); - OPAL_THREAD_UNLOCK(&(cs->nb_admin_barriers_mutex)); - /* progress communications so that resources can be freed up */ - return BCOL_FN_STARTED; - } - - /* Done - bump the counter */ - (buff_block->ctl_buffs_mgmt[memory_bank].bank_gen_counter)++; - /* - printf("XXX SYNC call done \n"); - */ - return BCOL_FN_COMPLETE; -} - -static int bcol_basesmuma_memsync_progress(bcol_function_args_t *input_args, - mca_bcol_base_function_t *c_input_args) -{ - int memory_bank = input_args->root; - - mca_bcol_basesmuma_module_t* bcol_module = - (mca_bcol_basesmuma_module_t *)c_input_args->bcol_module; - sm_buffer_mgmt *buff_block = &(bcol_module->colls_with_user_data); - sm_nbbar_desc_t *sm_desc = &(buff_block->ctl_buffs_mgmt[memory_bank].nb_barrier_desc); - - /* I do not have to do anything, since the - progress done by basesmuma progress engine */ - - if (NB_BARRIER_DONE != sm_desc->collective_phase) { - return BCOL_FN_STARTED; - } - - return BCOL_FN_COMPLETE; -} - -int bcol_basesmuma_memsync_init(mca_bcol_base_module_t *super) -{ - mca_bcol_base_coll_fn_comm_attributes_t comm_attribs; - mca_bcol_base_coll_fn_invoke_attributes_t inv_attribs; - - comm_attribs.bcoll_type = BCOL_SYNC; - - comm_attribs.comm_size_min = 0; - comm_attribs.comm_size_max = 1024 * 1024; - comm_attribs.waiting_semantics = NON_BLOCKING; - - inv_attribs.bcol_msg_min = 0; - inv_attribs.bcol_msg_max = 20000; /* range 1 */ - - inv_attribs.datatype_bitmap = 0xffffffff; - inv_attribs.op_types_bitmap = 0xffffffff; - - comm_attribs.data_src = DATA_SRC_KNOWN; - - mca_bcol_base_set_attributes(super, - &comm_attribs, &inv_attribs, - bcol_basesmuma_memsync, - bcol_basesmuma_memsync_progress); - - return OMPI_SUCCESS; -} diff --git a/ompi/mca/bcol/basesmuma/bcol_basesmuma_reduce.c b/ompi/mca/bcol/basesmuma/bcol_basesmuma_reduce.c deleted file mode 100644 index 570280d0842..00000000000 --- a/ompi/mca/bcol/basesmuma/bcol_basesmuma_reduce.c +++ /dev/null @@ -1,382 +0,0 @@ -/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ -/* - * Copyright (c) 2009-2013 Oak Ridge National Laboratory. All rights reserved. - * Copyright (c) 2009-2012 Mellanox Technologies. All rights reserved. - * Copyright (c) 2013-2014 Los Alamos National Security, LLC. All rights - * reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#include "ompi_config.h" - -#include "ompi/constants.h" -#include "ompi/op/op.h" -#include "ompi/datatype/ompi_datatype.h" -#include "ompi/communicator/communicator.h" -#include "ompi/mca/bcol/base/base.h" -#include "ompi/mca/bcol/bcol.h" - -#include "opal/include/opal_stdint.h" - -#include "bcol_basesmuma.h" -#include "bcol_basesmuma_reduce.h" -/** - * gvm - Shared memory reduce - */ - -static int bcol_basesmuma_reduce_intra_fanin_progress(bcol_function_args_t *input_args, - mca_bcol_base_function_t *c_input_args); - -int bcol_basesmuma_reduce_init(mca_bcol_base_module_t *super) -{ - mca_bcol_base_coll_fn_comm_attributes_t comm_attribs; - mca_bcol_base_coll_fn_invoke_attributes_t inv_attribs; - - comm_attribs.bcoll_type = BCOL_REDUCE; - comm_attribs.comm_size_min = 0; - comm_attribs.comm_size_max = 1048576; - comm_attribs.data_src = DATA_SRC_KNOWN; - comm_attribs.waiting_semantics = NON_BLOCKING; - - inv_attribs.bcol_msg_min = 0; - inv_attribs.bcol_msg_max = 20000; - inv_attribs.datatype_bitmap = 0x11111111; - inv_attribs.op_types_bitmap = 0x11111111; - - - /* Set attributes for fanin fanout algorithm */ - mca_bcol_base_set_attributes(super, &comm_attribs, &inv_attribs, bcol_basesmuma_reduce_intra_fanin, - bcol_basesmuma_reduce_intra_fanin_progress); - - inv_attribs.bcol_msg_min = 10000000; - inv_attribs.bcol_msg_max = 10485760; /* range 4 */ - - mca_bcol_base_set_attributes(super, &comm_attribs, &inv_attribs, NULL, NULL); - - return OMPI_SUCCESS; -} - -/* - * Small data fanin reduce - * ML buffers are used for both payload and control structures - * This functions works with hierarchical allreduce and - * progress engine - */ -static inline int reduce_children (mca_bcol_basesmuma_module_t *bcol_module, volatile void *rbuf, netpatterns_tree_node_t *my_reduction_node, - int *iteration, volatile mca_bcol_basesmuma_header_t *my_ctl_pointer, ompi_datatype_t *dtype, - volatile mca_bcol_basesmuma_payload_t *data_buffs, int count, struct ompi_op_t *op, int process_shift) { - volatile mca_bcol_basesmuma_header_t * child_ctl_pointer; - int bcol_id = (int) bcol_module->super.bcol_id; - int64_t sequence_number = my_ctl_pointer->sequence_number; - int8_t ready_flag = my_ctl_pointer->ready_flag; - int group_size = bcol_module->colls_no_user_data.size_of_group; - - if (LEAF_NODE != my_reduction_node->my_node_type) { - volatile char *child_data_pointer; - volatile void *child_rbuf; - - /* for each child */ - /* my_result_data = child_result_data (op) my_source_data */ - - for (int child = *iteration ; child < my_reduction_node->n_children ; ++child) { - int child_rank = my_reduction_node->children_ranks[child] + process_shift; - - if (group_size <= child_rank){ - child_rank -= group_size; - } - - child_ctl_pointer = data_buffs[child_rank].ctl_struct; - child_data_pointer = data_buffs[child_rank].payload; - - if (!IS_PEER_READY(child_ctl_pointer, ready_flag, sequence_number, REDUCE_FLAG, bcol_id)) { - *iteration = child; - return BCOL_FN_STARTED; - } - - child_rbuf = child_data_pointer + child_ctl_pointer->roffsets[bcol_id]; - - ompi_op_reduce(op,(void *)child_rbuf,(void *)rbuf, count, dtype); - } /* end child loop */ - } - - if (ROOT_NODE != my_reduction_node->my_node_type) { - opal_atomic_wmb (); - my_ctl_pointer->flags[REDUCE_FLAG][bcol_id] = ready_flag; - } - - return BCOL_FN_COMPLETE; -} - -static int bcol_basesmuma_reduce_intra_fanin_progress(bcol_function_args_t *input_args, - mca_bcol_base_function_t *c_input_args) -{ - mca_bcol_basesmuma_module_t* bcol_module = - (mca_bcol_basesmuma_module_t *)c_input_args->bcol_module; - - netpatterns_tree_node_t *my_reduction_node; - int my_rank, my_node_index; - struct ompi_datatype_t *dtype = input_args->dtype; - int leading_dim, idx; - - /* Buffer index */ - int buff_idx = input_args->src_desc->buffer_index; - - int *iteration = &bcol_module->ml_mem.nb_coll_desc[buff_idx].iteration; - - volatile mca_bcol_basesmuma_payload_t *data_buffs; - volatile mca_bcol_basesmuma_header_t *my_ctl_pointer; - void *data_addr = (void *)input_args->src_desc->data_addr; - volatile void *rbuf; - - /* get addressing information */ - my_rank = bcol_module->super.sbgp_partner_module->my_index; - leading_dim = bcol_module->colls_no_user_data.size_of_group; - idx = SM_ARRAY_INDEX(leading_dim, buff_idx, 0); - - data_buffs = (volatile mca_bcol_basesmuma_payload_t *) - bcol_module->colls_with_user_data.data_buffs + idx; - - /* Get control structure and payload buffer */ - my_ctl_pointer = data_buffs[my_rank].ctl_struct; - - my_node_index = my_rank - input_args->root; - if (0 > my_node_index) { - int group_size = bcol_module->colls_no_user_data.size_of_group; - my_node_index += group_size; - } - - my_reduction_node = bcol_module->reduction_tree + my_node_index; - rbuf = (volatile void *)((uintptr_t) data_addr + input_args->rbuf_offset); - - return reduce_children (bcol_module, rbuf, my_reduction_node, iteration, my_ctl_pointer, dtype, - data_buffs, input_args->count, input_args->op, input_args->root); -} - -int bcol_basesmuma_reduce_intra_fanin(bcol_function_args_t *input_args, - mca_bcol_base_function_t *c_input_args) -{ - /* local variables */ - int rc=BCOL_FN_COMPLETE; - int my_rank,group_size,my_node_index; - mca_bcol_basesmuma_module_t* bcol_module = - (mca_bcol_basesmuma_module_t *)c_input_args->bcol_module; - - netpatterns_tree_node_t *my_reduction_node; - volatile int8_t ready_flag; - int bcol_id = (int) bcol_module->super.bcol_id; - volatile void *sbuf,*rbuf; - int sbuf_offset,rbuf_offset; - int root,count; - int64_t sequence_number=input_args->sequence_num; - struct ompi_datatype_t *dtype; - int leading_dim,idx; - - /* Buffer index */ - int buff_idx = input_args->src_desc->buffer_index; - - int *iteration = &bcol_module->ml_mem.nb_coll_desc[buff_idx].iteration; - - volatile mca_bcol_basesmuma_payload_t *data_buffs; - volatile char * my_data_pointer; - volatile mca_bcol_basesmuma_header_t *my_ctl_pointer; - void *data_addr = (void *)input_args->src_desc->data_addr; - -#if 0 - fprintf(stderr,"777 entering sm reduce \n"); -#endif - - /* get addressing information */ - my_rank=bcol_module->super.sbgp_partner_module->my_index; - group_size=bcol_module->colls_no_user_data.size_of_group; - leading_dim=bcol_module->colls_no_user_data.size_of_group; - idx=SM_ARRAY_INDEX(leading_dim,buff_idx,0); - - data_buffs = (volatile mca_bcol_basesmuma_payload_t *) - bcol_module->colls_with_user_data.data_buffs+idx; - /* fprintf(stderr,"AAA the devil!!\n"); */ - /* Get control structure and payload buffer */ - my_ctl_pointer = data_buffs[my_rank].ctl_struct; - my_data_pointer = (volatile char *)data_addr; - - /* Align node index to around sbgp root */ - root = input_args->root; - my_node_index = my_rank - root; - if (0 > my_node_index) { - my_node_index += group_size; - } - - /* get arguments */ - sbuf_offset = input_args->sbuf_offset; - rbuf_offset = input_args->rbuf_offset; - sbuf = (volatile void *)(my_data_pointer + sbuf_offset); - data_buffs[my_rank].payload = (void*)sbuf; - rbuf = (volatile void *)(my_data_pointer + rbuf_offset); - count = input_args->count; - dtype = input_args->dtype; - - /* Cache my rbuf_offset */ - my_ctl_pointer->roffsets[bcol_id] = rbuf_offset; - - /* get my node for the reduction tree */ - my_reduction_node=&(bcol_module->reduction_tree[my_node_index]); - - /* init the header */ - BASESMUMA_HEADER_INIT(my_ctl_pointer, ready_flag, sequence_number, bcol_id); - - input_args->result_in_rbuf = (ROOT_NODE == my_reduction_node->my_node_type); - - /* set starting point for progress loop */ - *iteration = 0; - my_ctl_pointer->ready_flag = ready_flag; - - if (sbuf != rbuf) { - rc = ompi_datatype_copy_content_same_ddt(dtype, count, (char *)rbuf, - (char *)sbuf); - if( 0 != rc ) { - return OMPI_ERROR; - } - } - - rc = reduce_children (bcol_module, rbuf, my_reduction_node, iteration, my_ctl_pointer, dtype, - data_buffs, count, input_args->op, root); - - /* Flag value if other bcols are called */ - my_ctl_pointer->starting_flag_value[bcol_id]++; - - /* Recycle payload buffers */ - - return rc; -} - -/* Small data fanin reduce - * Uses SM buffer (backed by SM file) for both control structures and - * payload - * - * NTH: How does this differ from the new one? Can we replace this - * with a call to the new init then a call the new progress until - * complete? - */ -int bcol_basesmuma_reduce_intra_fanin_old(bcol_function_args_t *input_args, - mca_bcol_base_function_t *c_input_args) -{ - /* local variables */ - int rc=OMPI_SUCCESS; - int my_rank,group_size,process_shift,my_node_index; - int n_children,child; - mca_bcol_basesmuma_module_t* bcol_module = - (mca_bcol_basesmuma_module_t *)c_input_args->bcol_module; - - netpatterns_tree_node_t *my_reduction_node; - volatile int8_t ready_flag; - volatile void *sbuf,*rbuf; - int sbuf_offset,rbuf_offset; - int root,count; - struct ompi_op_t *op; - int64_t sequence_number=input_args->sequence_num; - struct ompi_datatype_t *dtype; - int leading_dim,idx; - int buff_idx; - int child_rank; - int bcol_id = (int) bcol_module->super.bcol_id; - - volatile mca_bcol_basesmuma_payload_t *data_buffs; - volatile char * my_data_pointer; - volatile char * child_data_pointer; - volatile mca_bcol_basesmuma_header_t *my_ctl_pointer; - volatile mca_bcol_basesmuma_header_t * child_ctl_pointer; - -#if 0 - fprintf(stderr,"Entering fanin reduce \n"); -#endif - - /* Buffer index */ - buff_idx = input_args->src_desc->buffer_index; - /* get addressing information */ - my_rank=bcol_module->super.sbgp_partner_module->my_index; - group_size=bcol_module->colls_no_user_data.size_of_group; - leading_dim=bcol_module->colls_no_user_data.size_of_group; - idx=SM_ARRAY_INDEX(leading_dim,buff_idx,0); - - /*ctl_structs=(mca_bcol_basesmuma_ctl_struct_t **) - bcol_module->colls_with_user_data.ctl_buffs+idx;*/ - data_buffs = (volatile mca_bcol_basesmuma_payload_t *) - bcol_module->colls_with_user_data.data_buffs+idx; - - /* Get control structure and payload buffer */ - my_ctl_pointer = data_buffs[my_rank].ctl_struct; - my_data_pointer = (volatile char *) data_buffs[my_rank].payload; - - /* Align node index to around sbgp root */ - root = input_args->root; - process_shift = root; - my_node_index = my_rank - root; - if (0 > my_node_index ) { - my_node_index += group_size; - } - - /* get arguments */ - sbuf_offset = input_args->sbuf_offset; - rbuf_offset = input_args->rbuf_offset; - sbuf = (volatile void *)(my_data_pointer + sbuf_offset); - rbuf = (volatile void *)(my_data_pointer + rbuf_offset); - op = input_args->op; - count = input_args->count; - dtype = input_args->dtype; - - /* get my node for the reduction tree */ - my_reduction_node=&(bcol_module->reduction_tree[my_node_index]); - n_children=my_reduction_node->n_children; - - /* init the header */ - BASESMUMA_HEADER_INIT(my_ctl_pointer, ready_flag, sequence_number, bcol_id); - - input_args->result_in_rbuf = (ROOT_NODE == my_reduction_node->my_node_type); - - rc = ompi_datatype_copy_content_same_ddt(dtype, count, (char *)rbuf, - (char *)sbuf); - if (0 != rc) { - return OMPI_ERROR; - } - - if (LEAF_NODE != my_reduction_node->my_node_type) { - volatile void *child_rbuf; - /* for each child */ - /* my_result_data = child_result_data (op) my_source_data */ - - for (child = 0 ; child < n_children ; ++child) { - child_rank = my_reduction_node->children_ranks[child]; - child_rank += process_shift; - - /* wrap around */ - if( group_size <= child_rank ){ - child_rank-=group_size; - } - - /*child_ctl_pointer = ctl_structs[child_rank];*/ - child_ctl_pointer = data_buffs[child_rank].ctl_struct; - child_data_pointer = data_buffs[child_rank].payload; - - child_rbuf = child_data_pointer + rbuf_offset; - /* wait until child child's data is ready for use */ - while (!IS_PEER_READY(child_ctl_pointer, ready_flag, sequence_number, REDUCE_FLAG, bcol_id)) { - opal_progress(); - } - - /* apply collective operation */ - ompi_op_reduce(op,(void *)child_rbuf,(void *)rbuf, count,dtype); - } /* end child loop */ - } - - if (ROOT_NODE != my_reduction_node->my_node_type) { - opal_atomic_wmb (); - my_ctl_pointer->flags[REDUCE_FLAG][bcol_id] = ready_flag; - } - - my_ctl_pointer->starting_flag_value[bcol_id]++; - - return rc; -} diff --git a/ompi/mca/bcol/basesmuma/bcol_basesmuma_reduce.h b/ompi/mca/bcol/basesmuma/bcol_basesmuma_reduce.h deleted file mode 100644 index 3d6f2094469..00000000000 --- a/ompi/mca/bcol/basesmuma/bcol_basesmuma_reduce.h +++ /dev/null @@ -1,92 +0,0 @@ -#ifndef __BASESMUMA_REDUCE_H_ - -#define __BASESMUMA_REDUCE_H_ - -#include "ompi_config.h" -#include "ompi/mca/bcol/basesmuma/bcol_basesmuma.h" -#include "ompi/constants.h" -#include "ompi/datatype/ompi_datatype.h" -#include "ompi/communicator/communicator.h" -#include "bcol_basesmuma_utils.h" -#include - -enum { - BLOCK_OFFSET = 0, - LOCAL_REDUCE_SEG_OFFSET, - BLOCK_COUNT, - SEG_SIZE, - NOFFSETS -}; - -int compute_knomial_reduce_offsets(int group_index, int count, struct - ompi_datatype_t *dtype,int k_radix,int n_exchanges, - int **offsets); - -int compute_knomial_reduce_offsets_reverse(int group_index, int count, struct - ompi_datatype_t *dtype,int k_radix,int n_exchanges, - int **offsets); - -int bcol_basesmuma_lmsg_reduce_recursivek_scatter_reduce(mca_bcol_basesmuma_module_t *sm_module, - const int buffer_index, void *sbuf, - void *rbuf, - struct ompi_op_t *op, - const int count, struct ompi_datatype_t *dtype, - const int relative_group_index, - const int padded_start_byte, - volatile int8_t ready_flag, - volatile mca_bcol_basesmuma_payload_t *data_buffs); - -int bcol_basesmuma_lmsg_reduce_knomial_gather(mca_bcol_basesmuma_module_t *basesmuma_module, - const int buffer_index, - void *sbuf,void *rbuf, int count, struct - ompi_datatype_t *dtype, - const int my_group_index, - const int padded_start_byte, - volatile int8_t rflag, - volatile mca_bcol_basesmuma_payload_t *data_buffs); - -int bcol_basesmuma_lmsg_reduce_extra_root(mca_bcol_basesmuma_module_t *sm_module, - const int buffer_index, void *sbuf, - void *rbuf, - struct ompi_op_t *op, - const int count, struct ompi_datatype_t *dtype, - const int relative_group_index, - const int padded_start_byte, - volatile int8_t rflag, - volatile mca_bcol_basesmuma_payload_t *data_buffs); - - - -int bcol_basesmuma_lmsg_reduce_extra_non_root(mca_bcol_basesmuma_module_t *sm_module, - const int buffer_index, void *sbuf, - void *rbuf, - int root, - struct ompi_op_t *op, - const int count, struct ompi_datatype_t *dtype, - const int relative_group_index, - const int group_size, - const int padded_start_byte, - volatile int8_t rflag, - volatile mca_bcol_basesmuma_payload_t *data_buffs); - -int bcol_basesmuma_lmsg_reduce(bcol_function_args_t *input_args, - mca_bcol_base_function_t *c_input_args); - -int bcol_basesmuma_lmsg_reduce_extra(bcol_function_args_t *input_args, - mca_bcol_base_function_t *c_input_args); - -void basesmuma_reduce_recv(int my_group_index, int peer, - void *recv_buffer, - int recv_size, - volatile int8_t ready_flag_val, - volatile mca_bcol_basesmuma_payload_t *data_buffs); - -void basesmuma_reduce_send(int my_group_index, - int peer, - void *send_buffer, - int snd_size, - int send_offset, - volatile int8_t ready_flag_val, - volatile mca_bcol_basesmuma_payload_t *data_buffs); - -#endif diff --git a/ompi/mca/bcol/basesmuma/bcol_basesmuma_rk_barrier.c b/ompi/mca/bcol/basesmuma/bcol_basesmuma_rk_barrier.c deleted file mode 100644 index 7c25b866c62..00000000000 --- a/ompi/mca/bcol/basesmuma/bcol_basesmuma_rk_barrier.c +++ /dev/null @@ -1,444 +0,0 @@ -/* - * Copyright (c) 2009-2012 Oak Ridge National Laboratory. All rights reserved. - * Copyright (c) 2009-2012 Mellanox Technologies. All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#include "ompi_config.h" - -#include "ompi/include/ompi/constants.h" -#include "ompi/mca/bcol/bcol.h" -#include "ompi/mca/bcol/base/base.h" -#include "ompi/mca/bcol/basesmuma/bcol_basesmuma.h" - -/* -#define IS_BARRIER_READY(peer, my_flag, my_sequence_number)\ - (((peer)->sequence_number == (my_sequence_number) && \ - (peer)->flags[BARRIER_RKING_FLAG][bcol_id] >= (my_flag) \ - )? true : false ) -*/ - -#define CALC_ACTIVE_REQUESTS(active_requests,peers, tree_order) \ -do{ \ - for( j = 0; j < (tree_order - 1); j++){ \ - if( 0 > peers[j] ) { \ - /* set the bit */ \ - *active_requests ^= (1<bcol_module; - netpatterns_k_exchange_node_t *exchange_node = &bcol_module->knomial_allgather_tree; - mca_bcol_basesmuma_component_t *cm = &mca_bcol_basesmuma_component; - uint32_t buffer_index = input_args->buffer_index; - int *active_requests = - &(bcol_module->ml_mem.nb_coll_desc[buffer_index].active_requests); - - int *iteration = &bcol_module->ml_mem.nb_coll_desc[buffer_index].iteration; - int *status = &bcol_module->ml_mem.nb_coll_desc[buffer_index].status; - int leading_dim, buff_idx, idx; - int bcol_id = (int) bcol_module->super.bcol_id; - - int i, j, probe; - int src; - - int pow_k, tree_order; - int max_requests = 0; /* important to initialize this */ - - int matched = 0; - int64_t sequence_number=input_args->sequence_num; - int my_rank = bcol_module->super.sbgp_partner_module->my_index; - - volatile mca_bcol_basesmuma_payload_t *data_buffs; - - /* control structures */ - volatile mca_bcol_basesmuma_header_t *my_ctl_pointer; - volatile mca_bcol_basesmuma_header_t *peer_ctl_pointer; -#if 0 - fprintf(stderr,"entering sm barrier sn = %d buff index = %d\n",sequence_number,input_args->buffer_index); -#endif - /* initialize the iteration counter */ - buff_idx = input_args->buffer_index; - leading_dim = bcol_module->colls_no_user_data.size_of_group; - idx=SM_ARRAY_INDEX(leading_dim,buff_idx,0); - data_buffs=(volatile mca_bcol_basesmuma_payload_t *) - bcol_module->colls_with_user_data.data_buffs+idx; - /* Set pointer to current proc ctrl region */ - my_ctl_pointer = data_buffs[my_rank].ctl_struct; - - /* init the header */ - BASESMUMA_HEADER_INIT(my_ctl_pointer, ready_flag, sequence_number, bcol_id); - /* initialize these */ - *iteration = 0; - *active_requests = 0; - *status = 0; - - /* k-nomial parameters */ - tree_order = exchange_node->tree_order; - pow_k = exchange_node->log_tree_order; - - /* calculate the maximum number of requests - * at each level each rank communicates with - * at most (k - 1) peers - * so if we set k - 1 bit fields in "max_requests", then - * we have max_request == 2^(k - 1) -1 - */ - for(i = 0; i < (tree_order - 1); i++){ - max_requests ^= (1<node_type ) { - - /* then I will signal to my proxy rank*/ - - my_ctl_pointer->flags[BARRIER_RKING_FLAG][bcol_id] = ready_flag; - ready_flag = flag_offset + 1 + pow_k + 2; - /* now, poll for completion */ - - src = exchange_node->rank_extra_sources_array[0]; - peer_ctl_pointer = data_buffs[src].ctl_struct; - - for( i = 0; i < cm->num_to_probe && (0 == matched); i++ ) { - if(IS_PEER_READY(peer_ctl_pointer, ready_flag, sequence_number, BARRIER_RKING_FLAG, bcol_id)){ - matched = 1; - - goto FINISHED; - } - - } - - /* cache state and bail */ - *iteration = -1; - return BCOL_FN_STARTED; - - }else if ( 0 < exchange_node->n_extra_sources ) { - - /* I am a proxy for someone */ - src = exchange_node->rank_extra_sources_array[0]; - peer_ctl_pointer = data_buffs[src].ctl_struct; - - /* probe for extra rank's arrival */ - for( i = 0; i < cm->num_to_probe && ( 0 == matched); i++) { - if(IS_PEER_READY(peer_ctl_pointer,ready_flag, sequence_number, BARRIER_RKING_FLAG, bcol_id)){ - matched = 1; - /* copy it in */ - goto MAIN_PHASE; - } - } - *status = ready_flag; - *iteration = -1; - return BCOL_FN_STARTED; - - } - -MAIN_PHASE: - /* bump the ready flag */ - ready_flag++; - - /* we start the recursive k - ing phase */ - for( *iteration = 0; *iteration < pow_k; (*iteration)++) { - /* announce my arrival */ - my_ctl_pointer->flags[BARRIER_RKING_FLAG][bcol_id] = ready_flag; - /* calculate the number of active requests */ - CALC_ACTIVE_REQUESTS(active_requests,exchange_node->rank_exchanges[*iteration],tree_order); - /* Now post the recv's */ - for( j = 0; j < (tree_order - 1); j++ ) { - - /* recv phase */ - src = exchange_node->rank_exchanges[*iteration][j]; - if( src < 0 ) { - /* then not a valid rank, continue */ - continue; - } - - peer_ctl_pointer = data_buffs[src].ctl_struct; - if( !(*active_requests&(1<num_to_probe && (0 == matched); probe++){ - if(IS_PEER_READY(peer_ctl_pointer,ready_flag, sequence_number, BARRIER_RKING_FLAG, bcol_id)){ - matched = 1; - /* set this request's bit */ - *active_requests ^= (1<flags[BARRIER_RKING_FLAG][bcol_id]; - return BCOL_FN_STARTED; - } - } - - /* bump the flag one more time for the extra rank */ - ready_flag = flag_offset + 1 + pow_k + 2; - - /* finish off the last piece, send the data back to the extra */ - if( 0 < exchange_node->n_extra_sources ) { - /* simply announce my arrival */ - my_ctl_pointer->flags[BARRIER_RKING_FLAG][bcol_id] = ready_flag; - - } - -FINISHED: - - - my_ctl_pointer->starting_flag_value[bcol_id]++; - return BCOL_FN_COMPLETE; -} - - -/* allgather progress function */ - -int bcol_basesmuma_k_nomial_barrier_progress(bcol_function_args_t *input_args, - struct mca_bcol_base_function_t *const_args) -{ - - - /* local variables */ - int flag_offset; - volatile int8_t ready_flag; - mca_bcol_basesmuma_module_t *bcol_module = (mca_bcol_basesmuma_module_t *) const_args->bcol_module; - netpatterns_k_exchange_node_t *exchange_node = &bcol_module->knomial_allgather_tree; - mca_bcol_basesmuma_component_t *cm = &mca_bcol_basesmuma_component; - uint32_t buffer_index = input_args->buffer_index; - int *active_requests = - &(bcol_module->ml_mem.nb_coll_desc[buffer_index].active_requests); - - int *iteration = &bcol_module->ml_mem.nb_coll_desc[buffer_index].iteration; - int *status = &bcol_module->ml_mem.nb_coll_desc[buffer_index].status; - int *iter = iteration; /* double alias */ - int leading_dim, idx, buff_idx; - - int i, j, probe; - int src; - int max_requests = 0; /* critical to set this */ - int pow_k, tree_order; - int bcol_id = (int) bcol_module->super.bcol_id; - - int matched = 0; - int64_t sequence_number=input_args->sequence_num; - int my_rank = bcol_module->super.sbgp_partner_module->my_index; - - volatile mca_bcol_basesmuma_payload_t *data_buffs; - - /* control structures */ - volatile mca_bcol_basesmuma_header_t *my_ctl_pointer; - volatile mca_bcol_basesmuma_header_t *peer_ctl_pointer; -#if 0 - fprintf(stderr,"%d: entering sm allgather progress active requests %d iter %d ready_flag %d\n",my_rank, - *active_requests,*iter,*status); -#endif - buff_idx = buffer_index; - leading_dim=bcol_module->colls_no_user_data.size_of_group; - idx=SM_ARRAY_INDEX(leading_dim,buff_idx,0); - - data_buffs=(volatile mca_bcol_basesmuma_payload_t *) - bcol_module->colls_with_user_data.data_buffs+idx; - my_ctl_pointer = data_buffs[my_rank].ctl_struct; - - /* increment the starting flag by one and return */ - flag_offset = my_ctl_pointer->starting_flag_value[bcol_id]; - ready_flag = *status; - /* k-nomial parameters */ - tree_order = exchange_node->tree_order; - pow_k = exchange_node->log_tree_order; - - /* calculate the maximum number of requests - * at each level each rank communicates with - * at most (k - 1) peers - * so if we set k - 1 bit fields in "max_requests", then - * we have max_request == 2^(k - 1) -1 - */ - for(i = 0; i < (tree_order - 1); i++){ - max_requests ^= (1<node_type ) { - - /* If I'm in here, then I must be looking for data */ - ready_flag = flag_offset + 1 + pow_k + 2; - - src = exchange_node->rank_extra_sources_array[0]; - peer_ctl_pointer = data_buffs[src].ctl_struct; - - for( i = 0; i < cm->num_to_probe && (0 == matched); i++ ) { - if(IS_PEER_READY(peer_ctl_pointer, ready_flag, sequence_number, BARRIER_RKING_FLAG, bcol_id)){ - matched = 1; - - goto FINISHED; - } - - } - - /* haven't found it, state is cached, bail out */ - return BCOL_FN_STARTED; - - }else if ( ( -1 == *iteration ) && (0 < exchange_node->n_extra_sources) ) { - - /* I am a proxy for someone */ - src = exchange_node->rank_extra_sources_array[0]; - peer_ctl_pointer = data_buffs[src].ctl_struct; - - /* probe for extra rank's arrival */ - for( i = 0; i < cm->num_to_probe && ( 0 == matched); i++) { - if(IS_PEER_READY(peer_ctl_pointer,ready_flag, sequence_number, BARRIER_RKING_FLAG, bcol_id)){ - matched = 1; - /* bump the flag */ - ready_flag++; - *iteration = 0; - goto MAIN_PHASE; - } - } - return BCOL_FN_STARTED; - - } - -MAIN_PHASE: - - /* start the recursive k - ing phase */ - for( *iter=*iteration; *iter < pow_k; (*iter)++) { - /* I am ready at this level */ - my_ctl_pointer->flags[BARRIER_RKING_FLAG][bcol_id] = ready_flag; - if( 0 == *active_requests ) { - /* flip some bits, if we don't have active requests from a previous visit */ - CALC_ACTIVE_REQUESTS(active_requests,exchange_node->rank_exchanges[*iter],tree_order); - } - for( j = 0; j < (tree_order - 1); j++ ) { - - /* recv phase */ - src = exchange_node->rank_exchanges[*iter][j]; - if( src < 0 ) { - /* then not a valid rank, continue - */ - continue; - } - - peer_ctl_pointer = data_buffs[src].ctl_struct; - if( !(*active_requests&(1<num_to_probe && (0 == matched); probe++){ - if(IS_PEER_READY(peer_ctl_pointer,ready_flag, sequence_number, BARRIER_RKING_FLAG, bcol_id)){ - matched = 1; - /* flip the request's bit */ - *active_requests ^= (1<flags[BARRIER_RKING_FLAG][bcol_id]; - return BCOL_FN_STARTED; - } - } - /* bump the flag one more time for the extra rank */ - ready_flag = flag_offset + 1 + pow_k + 2; - - /* finish off the last piece, send the data back to the extra */ - if( 0 < exchange_node->n_extra_sources ) { - /* simply announce my arrival */ - my_ctl_pointer->flags[BARRIER_RKING_FLAG][bcol_id] = ready_flag; - - } - -FINISHED: - - my_ctl_pointer->starting_flag_value[bcol_id]++; - return BCOL_FN_COMPLETE; -} - -/* Register k-nomial barrier functions to the BCOL function table, - * so they can be selected - */ -int bcol_basesmuma_barrier_init(mca_bcol_base_module_t *super) -{ -mca_bcol_base_coll_fn_comm_attributes_t comm_attribs; - mca_bcol_base_coll_fn_invoke_attributes_t inv_attribs; - - comm_attribs.bcoll_type = BCOL_BARRIER; - comm_attribs.comm_size_min = 0; - comm_attribs.comm_size_max = 1024 * 1024; - comm_attribs.waiting_semantics = NON_BLOCKING; - - inv_attribs.bcol_msg_min = 0; - inv_attribs.bcol_msg_max = 20000; /* range 1 */ - - inv_attribs.datatype_bitmap = 0xffffffff; - inv_attribs.op_types_bitmap = 0xffffffff; - - comm_attribs.data_src = DATA_SRC_KNOWN; - - mca_bcol_base_set_attributes(super, &comm_attribs, &inv_attribs, - bcol_basesmuma_k_nomial_barrier_init, - bcol_basesmuma_k_nomial_barrier_progress); - - return OMPI_SUCCESS; -} - diff --git a/ompi/mca/bcol/basesmuma/bcol_basesmuma_setup.c b/ompi/mca/bcol/basesmuma/bcol_basesmuma_setup.c deleted file mode 100644 index 6aefc8eac87..00000000000 --- a/ompi/mca/bcol/basesmuma/bcol_basesmuma_setup.c +++ /dev/null @@ -1,588 +0,0 @@ -/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ -/* - * Copyright (c) 2009-2012 Oak Ridge National Laboratory. All rights reserved. - * Copyright (c) 2009-2012 Mellanox Technologies. All rights reserved. - * Copyright (c) 2013-2014 Los Alamos National Security, LLC. - * All rights reserved. - * Copyright (c) 2014 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2014 Research Organization for Information Science - * and Technology (RIST). All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -/** - * @file - * - */ - -#include "ompi_config.h" -#include "mpi.h" -#include "ompi/constants.h" -#include "ompi/communicator/communicator.h" -#include "opal/mca/mpool/base/base.h" -#include "ompi/mca/bcol/bcol.h" -#include "ompi/mca/bcol/base/base.h" -#include "ompi/patterns/comm/coll_ops.h" - -#include "opal/class/opal_object.h" -#include "opal/dss/dss.h" - -#include "bcol_basesmuma.h" - -int base_bcol_basesmuma_setup_ctl_struct( - mca_bcol_basesmuma_module_t *sm_bcol_module, - mca_bcol_basesmuma_component_t *cs, - sm_buffer_mgmt *ctl_mgmt); - -/* this is the new one, uses the pml allgather */ -int base_bcol_basesmuma_exchange_offsets( - mca_bcol_basesmuma_module_t *sm_bcol_module, - void **result_array, uint64_t mem_offset, int loop_limit, - int leading_dim) -{ - int ret=OMPI_SUCCESS,i; - int count; - int index_in_group; - char *send_buff; - char *recv_buff; - uint64_t rem_mem_offset; - - /* malloc some memory */ - count = sizeof(uint64_t) + sizeof(int); - send_buff = (char *) malloc(count); - recv_buff = (char *) malloc(count * - sm_bcol_module->super.sbgp_partner_module->group_size); - /* exchange the base pointer for the controls structures - gather - * every one else's infromation. - */ - - - /* pack the offset of the allocated region */ - memcpy((void *) send_buff, (void *) &(sm_bcol_module->super.sbgp_partner_module->my_index), sizeof(int)); - memcpy((void *) (send_buff+ sizeof(int)), (void *) &(mem_offset), sizeof(uint64_t)); - - /* get the offsets from all procs, so can setup the control data - * structures. - */ - - ret=comm_allgather_pml((void *) send_buff,(void *) recv_buff,count, - MPI_BYTE, - sm_bcol_module->super.sbgp_partner_module->my_index, - sm_bcol_module->super.sbgp_partner_module->group_size, - sm_bcol_module->super.sbgp_partner_module->group_list, - sm_bcol_module->super.sbgp_partner_module->group_comm); - if( OMPI_SUCCESS != ret ) { - goto exit_ERROR; - } - - /* get the control stucture offsets within the shared memory - * region and populate the control structures - we do not assume - * any symmetry in memory layout of each process - */ - - /* loop over the procs in the group */ - for(i = 0; i < sm_bcol_module->super.sbgp_partner_module->group_size; i++){ - int array_id; - /* get this peer's index in the group */ - memcpy((void *) &index_in_group, (void *) (recv_buff + i*count) , sizeof(int)); - - /* get the offset */ - memcpy((void *) &rem_mem_offset, (void *) (recv_buff + i*count + sizeof(int)), sizeof(uint64_t)); - - array_id=SM_ARRAY_INDEX(leading_dim,0,index_in_group); - result_array[array_id]=(void *)(uintptr_t)rem_mem_offset; - - } - -exit_ERROR: - /* clean up */ - if( NULL != send_buff ) { - free(send_buff); - send_buff = NULL; - } - if( NULL != recv_buff ) { - free(recv_buff); - recv_buff = NULL; - } - - return ret; - - -} - -#if 0 -int base_bcol_basesmuma_exchange_offsets( - mca_bcol_basesmuma_module_t *sm_bcol_module, - void **result_array, uint64_t mem_offset, int loop_limit, - int leading_dim) -{ - int ret=OMPI_SUCCESS,i,dummy; - int index_in_group, pcnt; - opal_list_t peers; - ompi_namelist_t *peer; - ompi_proc_t *proc_temp, *my_id; - opal_buffer_t *send_buffer = OBJ_NEW(opal_buffer_t); - opal_buffer_t *recv_buffer = OBJ_NEW(opal_buffer_t); - uint64_t rem_mem_offset; - - /* exchange the base pointer for the controls structures - gather - * every one else's infromation. - */ - /* get list of procs that will participate in the communication */ - OBJ_CONSTRUCT(&peers, opal_list_t); - for (i = 0; i < sm_bcol_module->super.sbgp_partner_module->group_size; i++) { - /* get the proc info */ - proc_temp = ompi_comm_peer_lookup( - sm_bcol_module->super.sbgp_partner_module->group_comm, - sm_bcol_module->super.sbgp_partner_module->group_list[i]); - peer = OBJ_NEW(ompi_namelist_t); - peer->name.jobid = proc_temp->proc_name.jobid; - peer->name.vpid = proc_temp->proc_name.vpid; - opal_list_append(&peers,&peer->super); /* this is with the new field called "super" in ompi_namelist_t struct */ - } - /* pack up the data into the allgather send buffer */ - if (NULL == send_buffer || NULL == recv_buffer) { - opal_output (ompi_bcol_base_framework.framework_output, "Cannot allocate memory for sbuffer or rbuffer\n"); - ret = OMPI_ERROR; - goto exit_ERROR; - } - - /* get my proc information */ - my_id = ompi_proc_local(); - - /* pack my information */ - ret = opal_dss.pack(send_buffer, - &(sm_bcol_module->super.sbgp_partner_module->my_index),1,OPAL_UINT32); - - if (OMPI_SUCCESS != ret) { - opal_output (ompi_bcol_base_framework.framework_output, "Error packing my_index!!\n"); - goto exit_ERROR; - } - - /* pack the offset of the allocated region */ - ret = opal_dss.pack(send_buffer,&(mem_offset),1,OPAL_UINT64); - if (OMPI_SUCCESS != ret) { - goto exit_ERROR; - } - - /* get the offsets from all procs, so can setup the control data - * structures. - */ - if (OMPI_SUCCESS != (ret = ompi_rte_allgather_list(&peers, send_buffer, recv_buffer))) { - opal_output (ompi_bcol_base_framework.framework_output, "ompi_rte_allgather_list returned error %d\n", ret); - goto exit_ERROR; - } - - /* unpack the dummy */ - pcnt=1; - ret = opal_dss.unpack(recv_buffer,&dummy, &pcnt, OPAL_INT32); - if (OMPI_SUCCESS != ret) { - opal_output (ompi_bcol_base_framework.framework_output, "unpack returned error %d for dummy\n",ret); - goto exit_ERROR; - } - - /* get the control stucture offsets within the shared memory - * region and populate the control structures - we do not assume - * any symmetry in memory layout of each process - */ - - /* loop over the procs in the group */ - for(i = 0; i < sm_bcol_module->super.sbgp_partner_module->group_size; i++){ - int array_id; - pcnt=1; - ret = opal_dss.unpack(recv_buffer,&index_in_group, &pcnt, OPAL_UINT32); - if (OMPI_SUCCESS != ret) { - opal_output (ompi_bcol_base_framework.framework_output, "unpack returned error %d for remote index_in_group\n",ret); - goto exit_ERROR; - } - - /* get the offset */ - pcnt=1; - ret = opal_dss.unpack(recv_buffer,&rem_mem_offset, &pcnt, OPAL_UINT64); - if (OMPI_SUCCESS != ret) { - opal_output (ompi_bcol_base_framework.framework_output, "unpack returned error %d for remote memory offset\n",ret); - goto exit_ERROR; - } - - array_id=SM_ARRAY_INDEX(leading_dim,0,index_in_group); - result_array[array_id]=(void *)rem_mem_offset; - - } - - /* clean up */ - peer=(ompi_namelist_t *)opal_list_remove_first(&peers); - while( NULL !=peer) { - OBJ_RELEASE(peer); - peer=(ompi_namelist_t *)opal_list_remove_first(&peers); - } - OBJ_DESTRUCT(&peers); - if( send_buffer ) { - OBJ_RELEASE(send_buffer); - } - if( recv_buffer ) { - OBJ_RELEASE(recv_buffer); - } - - return ret; - -exit_ERROR: - - /* free peer list */ - peer=(ompi_namelist_t *)opal_list_remove_first(&peers); - while( NULL !=peer) { - OBJ_RELEASE(peer); - peer=(ompi_namelist_t *)opal_list_remove_first(&peers); - } - OBJ_DESTRUCT(&peers); - if( send_buffer ) { - OBJ_RELEASE(send_buffer); - } - if( recv_buffer ) { - OBJ_RELEASE(recv_buffer); - } - return ret; -} -#endif - - -static int base_bcol_basesmuma_exchange_ctl_params( - mca_bcol_basesmuma_module_t *sm_bcol_module, - mca_bcol_basesmuma_component_t *cs, - sm_buffer_mgmt *ctl_mgmt, list_data_t *data_blk) -{ - int ret=OMPI_SUCCESS,i,loop_limit; - int leading_dim, buf_id; - void *mem_offset; - unsigned char *base_ptr; - mca_bcol_basesmuma_ctl_struct_t *ctl_ptr; - - /* data block base offset in the mapped file */ - mem_offset = (void *)((uintptr_t)data_blk->data - - (uintptr_t)cs->sm_ctl_structs->data_addr); - - /* number of buffers in data block */ - loop_limit=cs->basesmuma_num_mem_banks+ctl_mgmt->number_of_buffs; - leading_dim=ctl_mgmt->size_of_group; - ret=comm_allgather_pml(&mem_offset, ctl_mgmt->ctl_buffs, sizeof(void *), - MPI_BYTE, sm_bcol_module->super.sbgp_partner_module->my_index, - sm_bcol_module->super.sbgp_partner_module->group_size, - sm_bcol_module->super.sbgp_partner_module->group_list, - sm_bcol_module->super.sbgp_partner_module->group_comm); - if( OMPI_SUCCESS != ret ) { - goto exit_ERROR; - } - -#if 0 - ret=base_bcol_basesmuma_exchange_offsets( sm_bcol_module, - (void **)ctl_mgmt->ctl_buffs, mem_offset, loop_limit, leading_dim); - if( OMPI_SUCCESS != ret ) { - goto exit_ERROR; - } -#endif - - /* convert memory offset to virtual address in current rank */ - for (i=0;i< sm_bcol_module->super.sbgp_partner_module->group_size;i++) { - - /* get the base pointer */ - int array_id=SM_ARRAY_INDEX(leading_dim,0,i); - if( i == sm_bcol_module->super.sbgp_partner_module->my_index) { - /* me */ - base_ptr=cs->sm_ctl_structs->map_addr; - } else { - base_ptr=sm_bcol_module->ctl_backing_files_info[i]->sm_mmap->map_addr; - } - ctl_mgmt->ctl_buffs[array_id]=(void *) - (uintptr_t)(((uint64_t)(uintptr_t)ctl_mgmt->ctl_buffs[array_id])+(uint64_t)(uintptr_t)base_ptr); - for( buf_id = 1 ; buf_id < loop_limit ; buf_id++ ) { - int array_id_m1=SM_ARRAY_INDEX(leading_dim,(buf_id-1),i); - array_id=SM_ARRAY_INDEX(leading_dim,buf_id,i); - ctl_mgmt->ctl_buffs[array_id]=(void *) (uintptr_t)((uint64_t)(uintptr_t)(ctl_mgmt->ctl_buffs[array_id_m1])+ - (uint64_t)(uintptr_t)sizeof(mca_bcol_basesmuma_ctl_struct_t)); - } - } - /* initialize my control structues */ - for( buf_id = 0 ; buf_id < loop_limit ; buf_id++ ) { - - int my_idx=sm_bcol_module->super.sbgp_partner_module->my_index; - int array_id=SM_ARRAY_INDEX(leading_dim,buf_id,my_idx); - ctl_ptr = (mca_bcol_basesmuma_ctl_struct_t *) - ctl_mgmt->ctl_buffs[array_id]; - - /* initialize the data structures - RLG, this is only one data - * structure that needs to be initialized, more are missing */ - ctl_ptr->sequence_number=-1; - ctl_ptr->flag=-1; - ctl_ptr->index=0; - ctl_ptr->src_ptr = NULL; - } - - return ret; - -exit_ERROR: - - return ret; -} - -static int base_bcol_basesmuma_setup_ctl (mca_bcol_basesmuma_module_t *sm_bcol_module, - mca_bcol_basesmuma_component_t *cs) -{ - const int my_index = sm_bcol_module->super.sbgp_partner_module->my_index;; - bcol_basesmuma_smcm_file_t input_file; - int ret; - - /* exchange remote addressing information if it has not already been done */ - if (NULL == sm_bcol_module->ctl_backing_files_info) { - input_file.file_name=cs->sm_ctl_structs->map_path; - input_file.size=cs->sm_ctl_structs->map_size; - input_file.size_ctl_structure=0; - input_file.data_seg_alignment=BASESMUMA_CACHE_LINE_SIZE; - input_file.mpool_size=cs->sm_ctl_structs->map_size; - ret = bcol_basesmuma_smcm_allgather_connection(sm_bcol_module, - sm_bcol_module->super.sbgp_partner_module, - &(cs->sm_connections_list), - &(sm_bcol_module->ctl_backing_files_info), - sm_bcol_module->super.sbgp_partner_module->group_comm, - input_file, cs->clt_base_fname, - false); - if (OMPI_SUCCESS != ret) { - return ret; - } - } - - /* fill in the pointer to other ranks scartch shared memory */ - if (NULL == sm_bcol_module->shared_memory_scratch_space) { - sm_bcol_module->shared_memory_scratch_space = - calloc (sm_bcol_module->super.sbgp_partner_module->group_size, sizeof (void *)); - if (!sm_bcol_module->shared_memory_scratch_space) { - opal_output (ompi_bcol_base_framework.framework_output, "Cannot allocate memory for shared_memory_scratch_space."); - return OMPI_ERR_OUT_OF_RESOURCE; - } - - for (int i = 0 ; i < sm_bcol_module->super.sbgp_partner_module->group_size ; ++i) { - if (i == my_index) { - /* local file data is not cached in this list */ - continue; - } - - sm_bcol_module->shared_memory_scratch_space[i] = - (void *)((intptr_t) sm_bcol_module->ctl_backing_files_info[i]->sm_mmap + - cs->scratch_offset_from_base_ctl_file); - } - - sm_bcol_module->shared_memory_scratch_space[my_index] = - (void *)((intptr_t) cs->sm_ctl_structs->map_addr + cs->scratch_offset_from_base_ctl_file); - } - - return OMPI_SUCCESS; -} - -int base_bcol_basesmuma_setup_ctl_struct( - mca_bcol_basesmuma_module_t *sm_bcol_module, - mca_bcol_basesmuma_component_t *cs, - sm_buffer_mgmt *ctl_mgmt) -{ - int n_ctl, n_levels; - int n_ctl_structs; - size_t malloc_size; - - /* - * set my no user-data conrol structures - */ - /* number of banks and regions per bank are already a power of 2 */ - n_ctl_structs=cs->basesmuma_num_mem_banks* - cs->basesmuma_num_regions_per_bank; - - /* initialize the control structure management struct - - * for collectives without user data - *--------------------------------------------------------------- - */ - - ctl_mgmt->number_of_buffs=n_ctl_structs; - ctl_mgmt->num_mem_banks= - cs->basesmuma_num_mem_banks; - - ctl_mgmt->num_buffs_per_mem_bank= - cs->basesmuma_num_regions_per_bank; - ctl_mgmt->size_of_group= - sm_bcol_module->super.sbgp_partner_module->group_size; - roundup_to_power_radix(2,cs->basesmuma_num_regions_per_bank,&n_levels); - ctl_mgmt->log2_num_buffs_per_mem_bank=n_levels; - - roundup_to_power_radix(2,n_ctl_structs,&n_levels); - ctl_mgmt->log2_number_of_buffs=n_levels; - ctl_mgmt->mask=n_ctl_structs-1; - sm_bcol_module->super.n_poll_loops=cs->n_poll_loops; - - malloc_size= - (ctl_mgmt->number_of_buffs + - ctl_mgmt->num_mem_banks ) * - ctl_mgmt->size_of_group * - sizeof(void *); - ctl_mgmt->ctl_buffs = malloc(malloc_size); - if (!ctl_mgmt->ctl_buffs) { - return OMPI_ERR_OUT_OF_RESOURCE; - } - - /* - * setup the no-data buffer managment data - */ - n_ctl = ctl_mgmt->num_mem_banks; - ctl_mgmt->ctl_buffs_mgmt = (mem_bank_management_t *) calloc (n_ctl, sizeof (mem_bank_management_t)); - if (!ctl_mgmt->ctl_buffs_mgmt) { - opal_output (ompi_bcol_base_framework.framework_output, "Cannot allocate memory for ctl_buffs_mgmt"); - free (ctl_mgmt->ctl_buffs); - ctl_mgmt->ctl_buffs = NULL; - return OMPI_ERR_OUT_OF_RESOURCE; - } - - /* initialize each individual element */ - for (int i = 0 ; i < n_ctl ; ++i) { - opal_list_item_t *item; - opal_mutex_t *mutex_ptr; - - ctl_mgmt->ctl_buffs_mgmt[i].available_buffers= - ctl_mgmt->num_buffs_per_mem_bank; - ctl_mgmt->ctl_buffs_mgmt[i].number_of_buffers= - ctl_mgmt->num_buffs_per_mem_bank; - mutex_ptr = &(ctl_mgmt->ctl_buffs_mgmt[i].mutex); - OBJ_CONSTRUCT(mutex_ptr, opal_mutex_t); - ctl_mgmt->ctl_buffs_mgmt[i].index_shared_mem_ctl_structs=i; - - item = (opal_list_item_t *)&(ctl_mgmt->ctl_buffs_mgmt[i].nb_barrier_desc); - OBJ_CONSTRUCT(item, opal_list_item_t); - ctl_mgmt->ctl_buffs_mgmt[i].nb_barrier_desc.sm_module = - sm_bcol_module; - ctl_mgmt->ctl_buffs_mgmt[i].nb_barrier_desc.pool_index = i; - /* get the sm_buffer_mgmt pointer for the control structures */ - ctl_mgmt->ctl_buffs_mgmt[i].nb_barrier_desc.coll_buff = ctl_mgmt; - } - - return OMPI_SUCCESS; -} - -/* - * this function initializes the internal scratch buffers and control - * structures that will be used by the module. It also intitializes - * the payload buffer management structures. - */ -int base_bcol_basesmuma_setup_library_buffers( - mca_bcol_basesmuma_module_t *sm_bcol_module, - mca_bcol_basesmuma_component_t *cs) -{ - int ret=OMPI_SUCCESS,i; - int n_ctl_structs; - size_t ctl_segement_size,total_memory; - int max_elements; - unsigned char *data_ptr; - - /* */ - /* setup the control struct memory */ - if(!cs->sm_ctl_structs) { - ret = mca_bcol_basesmuma_allocate_sm_ctl_memory(cs); - if(OMPI_SUCCESS != ret) { - opal_output (ompi_bcol_base_framework.framework_output, "In bcol_comm_query mca_bcol_basesmuma_allocate_sm_ctl_memory failed\n"); - return ret; - } - /* - * put the memory onto the free list - we have worried about - * alignment in the mpool allocation, and assume that the - * ctl structures have the approriate size to mantain alignment - */ - - /* figure out segment size */ - n_ctl_structs=cs->basesmuma_num_mem_banks* - cs->basesmuma_num_regions_per_bank; - - /* add memory for the control structure used for recycling the banks */ - n_ctl_structs+=cs->basesmuma_num_mem_banks; - - ctl_segement_size=n_ctl_structs* - sizeof(mca_bcol_basesmuma_ctl_struct_t); - - total_memory=cs->sm_ctl_structs->map_size - ( - (char *)(cs->sm_ctl_structs->data_addr)- - (char *)(cs->sm_ctl_structs->map_addr)); - total_memory-=cs->my_scratch_shared_memory_size; - max_elements=total_memory/ctl_segement_size; - - /* populate the free list */ - data_ptr=cs->sm_ctl_structs->data_addr; - - for( i=0 ; i < max_elements ; i++ ) { - list_data_t *item = OBJ_NEW(list_data_t); - if( !item ) { - return OMPI_ERR_OUT_OF_RESOURCE; - } - item->data=(void *)data_ptr; - opal_list_append(&(cs->ctl_structures),(opal_list_item_t *)item); - data_ptr+=ctl_segement_size; - } - /* set the scratch memory pointer and offset */ - cs->my_scratch_shared_memory=(char *)data_ptr; - cs->scratch_offset_from_base_ctl_file=(size_t) - ((char *)data_ptr-(char *)cs->sm_ctl_structs->map_addr); - - - /* At this stage the memory is mapped and ready to use by the local rank. - * However, the memory of other processes has not yet been mmaped into the - * memory of this process. - */ - } - - /* intialize no_userdata_ctl */ - sm_bcol_module->no_userdata_ctl=(list_data_t *) - opal_list_remove_last(&(cs->ctl_structures)); - if (!sm_bcol_module->no_userdata_ctl) { - return OMPI_ERR_OUT_OF_RESOURCE; - } - - /* intialize userdata_ctl */ - sm_bcol_module->userdata_ctl = (list_data_t *) - opal_list_remove_last(&(cs->ctl_structures)); - if (!sm_bcol_module->userdata_ctl) { - return OMPI_ERR_OUT_OF_RESOURCE; - } - - ret = base_bcol_basesmuma_setup_ctl (sm_bcol_module, cs); - if (OMPI_SUCCESS != ret) { - return ret; - } - - ret = base_bcol_basesmuma_setup_ctl_struct (sm_bcol_module, cs, &(sm_bcol_module->colls_no_user_data)); - if( OMPI_SUCCESS != ret ) { - return ret; - } - - ret = base_bcol_basesmuma_setup_ctl_struct (sm_bcol_module, cs, &(sm_bcol_module->colls_with_user_data)); - if( OMPI_SUCCESS != ret ) { - return ret; - } - - /* used for blocking recursive doubling barrier */ - sm_bcol_module->index_blocking_barrier_memory_bank=0; - - /* gather the offsets of the control structs relative to the base - * of the shared memory file, and fill in the table with the - * address of all the control structues. - */ - ret = base_bcol_basesmuma_exchange_ctl_params(sm_bcol_module, cs, - &(sm_bcol_module->colls_no_user_data),sm_bcol_module->no_userdata_ctl); - if( OMPI_SUCCESS != ret ) { - return ret; - } - - ret = base_bcol_basesmuma_exchange_ctl_params(sm_bcol_module, cs, - &(sm_bcol_module->colls_with_user_data),sm_bcol_module->userdata_ctl); - if( OMPI_SUCCESS != ret ) { - return ret; - } - - return OMPI_SUCCESS; -} - -OBJ_CLASS_INSTANCE(list_data_t, - opal_list_item_t, NULL, NULL); diff --git a/ompi/mca/bcol/basesmuma/bcol_basesmuma_smcm.c b/ompi/mca/bcol/basesmuma/bcol_basesmuma_smcm.c deleted file mode 100644 index dae24098316..00000000000 --- a/ompi/mca/bcol/basesmuma/bcol_basesmuma_smcm.c +++ /dev/null @@ -1,457 +0,0 @@ -/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ -/* - * - * Copyright (c) 2009-2012 Oak Ridge National Laboratory. All rights reserved. - * Copyright (c) 2009-2012 Mellanox Technologies. All rights reserved. - * Copyright (c) 2012-2014 Los Alamos National Security, LLC. All rights - * reserved. - * Copyright (c) 2014 Intel, Inc. All rights reserved. - * Copyright (c) 2014-2015 Research Organization for Information Science - * and Technology (RIST). All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#include "ompi_config.h" - -#include -#include -#include -#include -#ifdef HAVE_STRINGS_H -#include -#endif - -#include "ompi/proc/proc.h" -#include "ompi/patterns/comm/coll_ops.h" -#include "opal/align.h" - -#include "opal/dss/dss.h" -#include "opal/util/error.h" -#include "opal/util/output.h" -#include "opal/class/opal_list.h" -#include "opal/class/opal_hash_table.h" - -#include "bcol_basesmuma.h" - - - -#define SM_BACKING_FILE_NAME_MAX_LEN 256 - -static bcol_basesmuma_smcm_mmap_t * bcol_basesmuma_smcm_reg_mmap(void *in_ptr, int fd, size_t length, - size_t addr_offset, size_t alignment, - char *file_name); - -struct file_info_t { - uint32_t vpid; - uint32_t jobid; - uint64_t file_size; - uint64_t size_ctl_structure; - uint64_t data_seg_alignment; - char file_name[SM_BACKING_FILE_NAME_MAX_LEN]; -}; - -/* need to allocate space for the peer */ -static void bcol_basesmuma_smcm_proc_item_t_construct (bcol_basesmuma_smcm_proc_item_t * item) -{ - memset ((char *) item + sizeof (item->item), 0, sizeof (*item) - sizeof (item->item)); -} - -/* need to free the space for the peer */ -static void bcol_basesmuma_smcm_proc_item_t_destruct (bcol_basesmuma_smcm_proc_item_t * item) -{ - if (item->sm_mmap) { - OBJ_RELEASE(item->sm_mmap); - } - - if (item->sm_file.file_name) { - free (item->sm_file.file_name); - item->sm_file.file_name = NULL; - } -} - -OBJ_CLASS_INSTANCE(bcol_basesmuma_smcm_proc_item_t, - opal_list_item_t, - bcol_basesmuma_smcm_proc_item_t_construct, - bcol_basesmuma_smcm_proc_item_t_destruct); - -static void bcol_basesmuma_smcm_mmap_construct (bcol_basesmuma_smcm_mmap_t *smcm_mmap) -{ - memset ((char *) smcm_mmap + sizeof (smcm_mmap->super), 0, sizeof (*smcm_mmap) - sizeof (smcm_mmap->super)); -} - -static void bcol_basesmuma_smcm_mmap_destruct (bcol_basesmuma_smcm_mmap_t *smcm_mmap) -{ - if (smcm_mmap->map_seg) { - munmap ((void *)smcm_mmap->map_seg, smcm_mmap->map_size); - smcm_mmap->map_seg = NULL; - } - - if (smcm_mmap->map_path) { - free (smcm_mmap->map_path); - smcm_mmap->map_path = NULL; - } -} - -OBJ_CLASS_INSTANCE(bcol_basesmuma_smcm_mmap_t, opal_list_item_t, - bcol_basesmuma_smcm_mmap_construct, - bcol_basesmuma_smcm_mmap_destruct); - - -/* smcm_allgather_connection: - This function is called when a shared memory subgroup wants to establish shared memory "connections" among - a group of processes. - - This function DOES NOT create any shared memory backing files, it only mmaps already existing files. Shared - memory files are created by the shared memory registration function - ----------------------------------------------------------------------------------------------------------- - Input params: - - - sbgp module The subgrouping module contains the list of ranks to wire up. - - - peer_list An opal list containing a list of bcol_basesmuma_smcm_proc_item_t types. This - contains a list of peers whose shared memory files I have already mapped. - Upon completion of the allgather exchange with all members of the group and depending on the - value of "map_all", my peers' shared memory files are mapped into my local virtual memory - space, with all pertinent information being stored in an bcol_basesmuma_smcm_proc_item_t which is - subsequently appended onto the "peer_list". - - - comm The ompi_communicator_t communicator. - - - input A data struct that caches the information about my shared memory file. - - - map_all Bool that determines whether or not to go ahead and map the files from all of the peers - defined in the sbgp-ing module. If map_all == true, then go ahead and mmap all of the files - obtained in the exchange and append the information to the "peer_list". If map_all == false - then make a check and only mmap those peers' files whose vpid/jobid/filename combination do - not already exist in the "peer_list". Once mapping is completed, append this peer's information - to the "peer_list". - ----------------------------------------------------------------------------------------------------------- - * - */ - - -int bcol_basesmuma_smcm_allgather_connection( - mca_bcol_basesmuma_module_t *sm_bcol_module, - mca_sbgp_base_module_t *module, - opal_list_t *peer_list, - bcol_basesmuma_smcm_proc_item_t ***back_files, - ompi_communicator_t *comm, - bcol_basesmuma_smcm_file_t input, - char *base_fname, - bool map_all) -{ - - /* define local variables */ - - int rc, i, fd; - ptrdiff_t mem_offset; - ompi_proc_t *proc_temp, *my_id; - bcol_basesmuma_smcm_proc_item_t *temp; - bcol_basesmuma_smcm_proc_item_t *item_ptr; - bcol_basesmuma_smcm_proc_item_t **backing_files; - struct file_info_t local_file; - struct file_info_t *all_files=NULL; - - /* sanity check */ - if (strlen(input.file_name) > SM_BACKING_FILE_NAME_MAX_LEN-1) { - opal_output (ompi_bcol_base_framework.framework_output, "backing file name too long: %s len :: %d", - input.file_name, (int) strlen(input.file_name)); - return OMPI_ERR_BAD_PARAM; - } - - backing_files = (bcol_basesmuma_smcm_proc_item_t **) - calloc(module->group_size, sizeof(bcol_basesmuma_smcm_proc_item_t *)); - if (!backing_files) { - return OMPI_ERR_OUT_OF_RESOURCE; - } - - /* FIXME *back_files might have been already allocated - * so free it in order to avoid a memory leak */ - if (NULL != *back_files) { - free (*back_files); - } - *back_files = backing_files; - - my_id = ompi_proc_local(); - - /* Phase One: - gather a list of processes that will participate in the allgather - I'm - preparing this list from the sbgp-ing module that was passed into the function */ - - /* fill in local file information */ - local_file.vpid = ((orte_process_name_t*)&my_id->super.proc_name)->vpid; - local_file.jobid = ((orte_process_name_t*)&my_id->super.proc_name)->jobid; - local_file.file_size=input.size; - local_file.size_ctl_structure=input.size_ctl_structure; - local_file.data_seg_alignment=input.data_seg_alignment; - - strcpy (local_file.file_name, input.file_name); - - /* will exchange this data type as a string of characters - - * this routine is first called before MPI_init() completes - * and before error handling is setup, so can't use the - * MPI data types to send this data */ - all_files = (struct file_info_t *) calloc(module->group_size, - sizeof (struct file_info_t)); - if (!all_files) { - return OMPI_ERR_OUT_OF_RESOURCE; - } - - /* exchange data */ - rc = comm_allgather_pml(&local_file,all_files,sizeof(struct file_info_t), MPI_CHAR, - sm_bcol_module->super.sbgp_partner_module->my_index, - sm_bcol_module->super.sbgp_partner_module->group_size, - sm_bcol_module->super.sbgp_partner_module->group_list, - sm_bcol_module->super.sbgp_partner_module->group_comm); - if( OMPI_SUCCESS != rc ) { - opal_output (ompi_bcol_base_framework.framework_output, "failed in comm_allgather_pml. Error code: %d", rc); - goto Error; - } - - /* Phase four: - loop through the receive buffer, unpack the data recieved from remote peers */ - - for (i = 0; i < module->group_size; i++) { - struct file_info_t *rem_file = all_files + i; - - /* check if this is my index or if the file is already mapped (set above). ther - * is no reason to look through the peer list again because no two members of - * the group will have the same vpid/jobid pair. ignore this previously found - * mapping if map_all was requested (NTH: not sure why exactly since we re-map - * and already mapped file) */ - if (sm_bcol_module->super.sbgp_partner_module->my_index == i) { - continue; - } - - proc_temp = ompi_comm_peer_lookup(comm,module->group_list[i]); - - OPAL_LIST_FOREACH(item_ptr, peer_list, bcol_basesmuma_smcm_proc_item_t) { - /* if the vpid/jobid/filename combination already exists in the list, - then do not map this peer's file --- because you already have */ - if (0 == ompi_rte_compare_name_fields(OMPI_RTE_CMP_ALL, - OMPI_CAST_RTE_NAME(&proc_temp->super.proc_name), - &item_ptr->peer) && - 0 == strcmp (item_ptr->sm_file.file_name, rem_file->file_name)) { - ++item_ptr->refcnt; - /* record file data */ - backing_files[i] = item_ptr; - break; - } - } - - if (!map_all && backing_files[i]) { - continue; - } - - temp = OBJ_NEW(bcol_basesmuma_smcm_proc_item_t); - if (!temp) { - rc = OMPI_ERR_OUT_OF_RESOURCE; - goto Error; - } - - temp->peer.vpid = rem_file->vpid; - temp->peer.jobid = rem_file->jobid; - - temp->sm_file.file_name = strdup (rem_file->file_name); - if (!temp->sm_file.file_name) { - rc = OMPI_ERR_OUT_OF_RESOURCE; - OBJ_RELEASE(temp); - goto Error; - } - - temp->sm_file.size = (size_t) rem_file->file_size; - temp->sm_file.mpool_size = (size_t) rem_file->file_size; - temp->sm_file.size_ctl_structure = (size_t) rem_file->size_ctl_structure; - temp->sm_file.data_seg_alignment = (size_t) rem_file->data_seg_alignment; - temp->refcnt = 1; - - /* Phase Five: - If map_all == true, then we map every peer's file - else we check to see if I have already mapped this - vpid/jobid/filename combination and if I have, then - I do not mmap this peer's file. - * - */ - fd = open(temp->sm_file.file_name, O_RDWR, 0600); - if (0 > fd) { - opal_output (ompi_bcol_base_framework.framework_output, "SMCM Allgather failed to open sm backing file %s. errno = %d", - temp->sm_file.file_name, errno); - rc = OMPI_ERROR; - goto Error; - } - - /* map the file */ - temp->sm_mmap = bcol_basesmuma_smcm_reg_mmap (NULL, fd, temp->sm_file.size, - temp->sm_file.size_ctl_structure, - temp->sm_file.data_seg_alignment, - temp->sm_file.file_name); - close (fd); - if (NULL == temp->sm_mmap) { - opal_output (ompi_bcol_base_framework.framework_output, "mmapping failed to map remote peer's file"); - OBJ_RELEASE(temp); - rc = OMPI_ERROR; - goto Error; - } - - /* compute memory offset */ - mem_offset = (ptrdiff_t) temp->sm_mmap->data_addr - - (ptrdiff_t) temp->sm_mmap->map_seg; - temp->sm_mmap->map_seg->seg_offset = mem_offset; - temp->sm_mmap->map_seg->seg_size = temp->sm_file.size - mem_offset; - /* more stuff to follow */ - - /* append this peer's info, including shared memory map addr, onto the - peer_list */ - - /* record file data */ - backing_files[i] = (bcol_basesmuma_smcm_proc_item_t *) temp; - - opal_list_append(peer_list, (opal_list_item_t*) temp); - } - - rc = OMPI_SUCCESS; - - Error: - - /* error clean-up and return */ - if (NULL != all_files) { - free(all_files); - } - - return rc; -} - -int bcol_basesmuma_smcm_release_connections (mca_bcol_basesmuma_module_t *sm_bcol_module, - mca_sbgp_base_module_t *sbgp_module, opal_list_t *peer_list, - bcol_basesmuma_smcm_proc_item_t ***back_files) -{ - bcol_basesmuma_smcm_proc_item_t **smcm_procs = *back_files; - - for (int i = 0 ; i < sbgp_module->group_size ; ++i) { - if (smcm_procs[i] && 0 == --smcm_procs[i]->refcnt) { - opal_list_remove_item (peer_list, (opal_list_item_t *) smcm_procs[i]); - OBJ_RELEASE(smcm_procs[i]); - } - } - - free (smcm_procs); - *back_files = NULL; - - return OMPI_SUCCESS; - } - - -/* - * mmap the specified file as a shared file. No information exchange with other - * processes takes place within this routine. - * This function assumes that the memory has already been allocated, and only the - * mmap needs to be done. - */ -bcol_basesmuma_smcm_mmap_t *bcol_basesmuma_smcm_mem_reg(void *in_ptr, - size_t length, - size_t alignment, - char* file_name) -{ - /* local variables */ - int fd = -1; - bcol_basesmuma_smcm_mmap_t *map = NULL; - int rc; - - /* if pointer is not allocated - return error. We have no clue how the user will allocate or - * free this memory. - */ - - /* open the shared memory backing file */ - - fd = open(file_name, O_CREAT|O_RDWR,0600); - if (fd < 0) { - opal_output (ompi_bcol_base_framework.framework_output, "basesmuma shared memory allocation open failed with errno: %d", - errno); - } else if (0 != ftruncate(fd,length)) { - opal_output (ompi_bcol_base_framework.framework_output, "basesmuma shared memory allocation ftruncate failed with errno: %d", - errno); - } else { - /* ensure there is enough space for the backing store */ - rc = ftruncate (fd, length); - if (0 > rc) { - opal_output (ompi_bcol_base_framework.framework_output, "failed to truncate the file to be mapped. errno: %d", errno); - close(fd); - return NULL; - } - - map = bcol_basesmuma_smcm_reg_mmap(in_ptr, fd, length, 0, alignment, file_name); - if (NULL == map) { - close(fd); - return NULL; - } - } - /* no longer need this file descriptor. close it */ - close (fd); - - /* takes us to the top of the control structure */ - - return map; - -} - -static bcol_basesmuma_smcm_mmap_t * bcol_basesmuma_smcm_reg_mmap(void *in_ptr, int fd, size_t length, - size_t addr_offset, size_t alignment, - char *file_name) -{ - - /* local variables */ - bcol_basesmuma_smcm_mmap_t *map; - bcol_basesmuma_smcm_file_header_t *seg; - unsigned char* myaddr = NULL; - int flags = MAP_SHARED; - - /* set up the map object */ - map = OBJ_NEW(bcol_basesmuma_smcm_mmap_t); - if (OPAL_UNLIKELY(NULL == map)) { - return NULL; - } - - /* map the file and initialize the segment state */ - if (NULL != in_ptr) { - flags |= MAP_FIXED; - } - seg = (bcol_basesmuma_smcm_file_header_t *) - mmap(in_ptr, length, PROT_READ|PROT_WRITE, flags, fd, 0); - if((void*)-1 == seg) { - OBJ_RELEASE(map); - return NULL; - } - - map->map_path = strdup (file_name); - - /* the first entry in the file is the control structure. the first entry - in the control structure is an mca_common_sm_file_header_t element */ - map->map_seg = seg; - - myaddr = (unsigned char *) seg + addr_offset; - /* if we have a data segment (i.e. if 0 != data_seg_alignement) */ - - if (alignment) { - myaddr = OPAL_ALIGN_PTR(myaddr, alignment, unsigned char*); - - /* is addr past the end of the file? */ - if ((unsigned char *) seg+length < myaddr) { - opal_output (ompi_bcol_base_framework.framework_output, "mca_bcol_basesmuma_sm_alloc_mmap: memory region too small len %lu add %p", - (unsigned long) length, myaddr); - OBJ_RELEASE(map); - munmap ((void *)seg, length); - return NULL; - } - - } - - map->data_addr = (unsigned char*) myaddr; - map->map_addr = (unsigned char*) seg; - map->map_size = length; - - return map; -} diff --git a/ompi/mca/bcol/basesmuma/bcol_basesmuma_smcm.h b/ompi/mca/bcol/basesmuma/bcol_basesmuma_smcm.h deleted file mode 100644 index aebb93db796..00000000000 --- a/ompi/mca/bcol/basesmuma/bcol_basesmuma_smcm.h +++ /dev/null @@ -1,105 +0,0 @@ -/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ -/* - * - * Copyright (c) 2009-2012 Oak Ridge National Laboratory. All rights reserved. - * Copyright (c) 2009-2012 Mellanox Technologies. All rights reserved. - * Copyright (c) 2014 Los Alamos National Security, LLC. All rights - * reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#ifndef BCOL_BASESMUMA_SMCM_H -#define BCOL_BASESMUMA_SMCM_H - -#include -#include - -#include "ompi_config.h" -#include "ompi/proc/proc.h" - -#include "opal/class/opal_object.h" -#include "opal/class/opal_list.h" -#include "opal/sys/atomic.h" - - - -typedef struct bcol_basesmuma_smcm_file_header_t { - /* lock to control atomic access */ - opal_atomic_lock_t seg_lock; - - /* is the segment ready for use */ - volatile int32_t seg_inited; - - /* Offset to next available memory location available for allocation */ - size_t seg_offset; - - /* total size of the segment */ - size_t seg_size; -} bcol_basesmuma_smcm_file_header_t; - - -typedef struct bcol_basesmuma_smcm_mmap_t { - /* double link list element */ - opal_list_item_t super; - /* pointer to header imbeded in the shared memory file */ - bcol_basesmuma_smcm_file_header_t *map_seg; - /* base address of the mmap'ed file */ - unsigned char *map_addr; - /* base address of data segment */ - unsigned char *data_addr; - /* How big it is (in bytes) */ - size_t map_size; - /* Filename */ - char *map_path; -} bcol_basesmuma_smcm_mmap_t; - -OBJ_CLASS_DECLARATION(bcol_basesmuma_smcm_mmap_t); - - -/* Struct that characterizes a shared memory file */ -struct bcol_basesmuma_smcm_file_t { - - char *file_name; - size_t size; - size_t size_ctl_structure; - size_t data_seg_alignment; - size_t mpool_size; - -}; -typedef struct bcol_basesmuma_smcm_file_t bcol_basesmuma_smcm_file_t; - - -struct bcol_basesmuma_smcm_proc_item_t { - opal_list_item_t item; /* can put me on a free list */ - int refcnt; - ompi_process_name_t peer; - bcol_basesmuma_smcm_file_t sm_file; - bcol_basesmuma_smcm_mmap_t *sm_mmap; /* Pointer to peer's sm file */ - -}; -typedef struct bcol_basesmuma_smcm_proc_item_t bcol_basesmuma_smcm_proc_item_t; - -OBJ_CLASS_DECLARATION(bcol_basesmuma_smcm_proc_item_t); - - -/* allocate shared memory file - * in_ptr - pointer to preallocated memory (if NULL, this will be mmaped) - * alignment - region memory alignment - * file name - fully qualified backing file name -*/ - -OMPI_DECLSPEC extern bcol_basesmuma_smcm_mmap_t *bcol_basesmuma_smcm_mem_reg(void *in_ptr, - size_t length, - size_t alignment, - char* file_name); - -OMPI_DECLSPEC extern bcol_basesmuma_smcm_mmap_t* bcol_basesmuma_smcm_create_mmap(int fd, - size_t size, char *file_name, - size_t size_ctl_structure, - size_t data_seg_alignment); - -#endif diff --git a/ompi/mca/bcol/basesmuma/bcol_basesmuma_utils.c b/ompi/mca/bcol/basesmuma/bcol_basesmuma_utils.c deleted file mode 100644 index c5c121cbb26..00000000000 --- a/ompi/mca/bcol/basesmuma/bcol_basesmuma_utils.c +++ /dev/null @@ -1,103 +0,0 @@ -/* - * Copyright (c) 2009-2012 Oak Ridge National Laboratory. All rights reserved. - * Copyright (c) 2009-2012 Mellanox Technologies. All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ -#include "bcol_basesmuma_utils.h" - -/* - * Return closet power of K that is either greater than - * or equal to the group size. - */ -int pow_sm_k(int k, int number, int *pow_k) -{ - int power = 0; - int n = 1; - - if( 2 == k){ - while(n <= number){ - power++; - n <<= 1; - } - *pow_k = n >> 1; - - } else { - while (n <= number) { - n *= k; - power++; - } - *pow_k = n/k; - } - - - return (power-1); -} - - - -int get_k_nomial_src_list(int group_size, - int radix, int my_index, - int *src_list) { - - /* local variables */ - int radix_power; - int offset; - int kount = 0; - int src_temp; - - radix_power = 1; - offset = 1; - while(offset < group_size) { - if( offset % (radix * radix_power) ) { - src_temp = my_index - offset; - /* wrap around */ - if ( src_temp < 0 ) { - src_temp += group_size; - } - /* don't probe ghost nodes */ - if( src_temp < group_size ) { - src_list[kount] = src_temp; - kount++; - } - offset+=radix_power; - } else { - - radix_power *= radix; - } - - } - /* return the actual number of nodes to poll on */ - return kount; -} - -int get_k_nomial_dst_size(int group_size, int radix, int my_index) -{ - int dst_count = 0; - int radix_mask; - int k; - radix_mask = 1; - while (radix_mask < group_size) { - if (0 != my_index % (radix * radix_mask)) { - /* I found my level in tree */ - break; - } - radix_mask *= radix; - } - radix_mask /= radix; - - while(radix_mask > 0) { - /* For each level of tree, do sends */ - for (k = 1; - k < radix && my_index + radix_mask * k < group_size; - ++k) { - dst_count += 1 ; - } - radix_mask /= radix; - } - - return dst_count; -} diff --git a/ompi/mca/bcol/basesmuma/bcol_basesmuma_utils.h b/ompi/mca/bcol/basesmuma/bcol_basesmuma_utils.h deleted file mode 100644 index 8389f822d25..00000000000 --- a/ompi/mca/bcol/basesmuma/bcol_basesmuma_utils.h +++ /dev/null @@ -1,64 +0,0 @@ -/* - * Copyright (c) 2009-2012 Oak Ridge National Laboratory. All rights reserved. - * Copyright (c) 2009-2012 Mellanox Technologies. All rights reserved. - * Copyright (c) 2012 Los Alamos National Security, LLC. - * All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#ifndef MCA_BCOL_BASESMUMA_UTILS_H -#define MCA_BCOL_BASESMUMA_UTILS_H - -#include "ompi_config.h" - -BEGIN_C_DECLS - -#define BASESMUMA_K_NOMIAL_SEND_CHILDREN(radix_mask,radix,relative_index, \ - my_group_index, group_size, ready_flag) \ -do { \ - int k, child; \ - while(radix_mask > 0){ \ - for(k = 1; k < radix && relative_index+radix_mask*k= group_size) { \ - child -= group_size; \ - } \ - /*fprintf(stderr,"I am %d sending to child %d\n",my_group_index,child);*/ \ - child_ctl_pointer = data_buffs[child].ctl_struct; \ - child_ctl_pointer->src = my_group_index; \ - /* this can be improved to make better asynchronous progress, but it's - * fine for now. - */ \ - while(child_ctl_pointer->sequence_number != sequence_number ); \ - child_ctl_pointer->flags[BCAST_FLAG][bcol_id] = ready_flag; \ - } \ - radix_mask = radix_mask/radix; \ - } \ -} while( 0 ) - - - - -/* - * Return closet power of K that is greater than or equal to "number". - */ -int pow_sm_k(int radix_k, int group_size, int *pow_k_group_size); - -/* - * Get list of possible sources from which data may arrive based on a K-nomial tree fan-out. - */ - -int get_k_nomial_src_list(int group_size, int radix, - int my_index, int *src_list); - - -int get_k_nomial_dst_size(int group_size, int radix, int my_index); - -END_C_DECLS - -#endif diff --git a/ompi/mca/bcol/bcol.h b/ompi/mca/bcol/bcol.h deleted file mode 100644 index 61b1f96591d..00000000000 --- a/ompi/mca/bcol/bcol.h +++ /dev/null @@ -1,803 +0,0 @@ -/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ -/* - * Copyright (c) 2009-2012 Oak Ridge National Laboratory. All rights reserved. - * Copyright (c) 2009-2012 Mellanox Technologies. All rights reserved. - * Copyright (c) 2013-2015 Los Alamos National Security, LLC. All rights - * reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#ifndef MCA_BCOL_H -#define MCA_BCOL_H - -#include "ompi_config.h" -#include "opal/class/opal_list.h" -#include "ompi/mca/mca.h" -#include "ompi/mca/coll/coll.h" -#include "opal/mca/mpool/mpool.h" -#include "ompi/mca/sbgp/sbgp.h" -#include "ompi/datatype/ompi_datatype.h" -#include "ompi/op/op.h" -#include "ompi/include/ompi/constants.h" -#include "ompi/patterns/net/netpatterns_knomial_tree.h" - -#include "opal/util/show_help.h" - -#include - -#if defined(c_plusplus) || defined(__cplusplus) -extern "C" { -#endif - -/* Forward declaration - please do not remove it */ -struct ml_buffers_t; - -struct mca_bcol_base_coll_fn_comm_attributes_t; -struct mca_bcol_base_coll_fn_invoke_attributes_t; -struct mca_bcol_base_coll_fn_desc_t; - -#define NUM_MSG_RANGES 5 -#define MSG_RANGE_INITIAL (1024)*12 -#define MSG_RANGE_INC 10 -#define BCOL_THRESHOLD_UNLIMITED (INT_MAX) -/* Maximum size of a bcol's header. This allows us to correctly calculate the message - * thresholds. If the header of any bcol exceeds this value then increase this one - * to match. */ -#define BCOL_HEADER_MAX 96 - -#define BCOL_HEAD_ALIGN 32 /* will turn into an MCA parameter after debug */ - -/* - * Functions supported - */ -enum bcol_coll { - /* blocking functions */ - BCOL_ALLGATHER, - BCOL_ALLGATHERV, - BCOL_ALLREDUCE, - BCOL_ALLTOALL, - BCOL_ALLTOALLV, - BCOL_ALLTOALLW, - BCOL_BARRIER, - BCOL_BCAST, - BCOL_EXSCAN, - BCOL_GATHER, - BCOL_GATHERV, - BCOL_REDUCE, - BCOL_REDUCE_SCATTER, - BCOL_SCAN, - BCOL_SCATTER, - BCOL_SCATTERV, - BCOL_FANIN, - BCOL_FANOUT, - - /* nonblocking functions */ - BCOL_IALLGATHER, - BCOL_IALLGATHERV, - BCOL_IALLREDUCE, - BCOL_IALLTOALL, - BCOL_IALLTOALLV, - BCOL_IALLTOALLW, - BCOL_IBARRIER, - BCOL_IBCAST, - BCOL_IEXSCAN, - BCOL_IGATHER, - BCOL_IGATHERV, - BCOL_IREDUCE, - BCOL_IREDUCE_SCATTER, - BCOL_ISCAN, - BCOL_ISCATTER, - BCOL_ISCATTERV, - BCOL_IFANIN, - BCOL_IFANOUT, - - BCOL_SYNC, - /* New function - needed for intermediate steps */ - BCOL_REDUCE_TO_LEADER, - BCOL_NUM_OF_FUNCTIONS -}; -typedef enum bcol_coll bcol_coll; - -typedef enum bcol_elem_type { - BCOL_SINGLE_ELEM_TYPE, - BCOL_MULTI_ELEM_TYPE, - BCOL_NUM_OF_ELEM_TYPES -} bcol_elem_type; - -typedef int (*mca_bcol_base_module_coll_support_all_types_fn_t)(bcol_coll coll_name); -typedef int (*mca_bcol_base_module_coll_support_fn_t)(int op, int dtype, bcol_elem_type elem_num); - -/* - * Collective function status - */ -enum { - BCOL_FN_NOT_STARTED = (OMPI_ERR_MAX - 1), - BCOL_FN_STARTED = (OMPI_ERR_MAX - 2), - BCOL_FN_COMPLETE = (OMPI_ERR_MAX - 3) -}; - - - -/** - * Collective component initialization - * - * Initialize the given collective component. This function should - * initialize any component-level. data. It will be called exactly - * once during MPI_INIT. - * - * @note The component framework is not lazily opened, so attempts - * should be made to minimze the amount of memory allocated during - * this function. - * - * @param[in] enable_progress_threads True if the component needs to - * support progress threads - * @param[in] enable_mpi_threads True if the component needs to - * support MPI_THREAD_MULTIPLE - * - * @retval OMPI_SUCCESS Component successfully initialized - * @retval ORTE_ERROR An unspecified error occurred - */ -typedef int (*mca_bcol_base_component_init_query_fn_t) - (bool enable_progress_threads, bool enable_mpi_threads); - -/** - * Query whether a component is available for the given sub-group - * - * Query whether the component is available for the given - * sub-group. If the component is available, an array of pointers should be - * allocated and returned (with refcount at 1). The module will not - * be used for collective operations until module_enable() is called - * on the module, but may be destroyed (via OBJ_RELEASE) either before - * or after module_enable() is called. If the module needs to release - * resources obtained during query(), it should do so in the module - * destructor. - * - * A component may provide NULL to this function to indicate it does - * not wish to run or return an error during module_enable(). - * - * @note The communicator is available for point-to-point - * communication, but other functionality is not available during this - * phase of initialization. - * - * @param[in] sbgp Pointer to sub-group module. - * @param[out] priority Priority setting for component on - * this communicator - * @param[out] num_modules Number of modules that where generated - * for the sub-group module. - * - * @returns An array of pointer to an initialized modules structures if the component can - * provide a modules with the requested functionality or NULL if the - * component should not be used on the given communicator. - */ -typedef struct mca_bcol_base_module_t **(*mca_bcol_base_component_comm_query_fn_t) - (mca_sbgp_base_module_t *sbgp, int *num_modules); - - -typedef int (*mca_bcol_barrier_init_fn_t)(struct mca_bcol_base_module_t *bcol_module, - mca_sbgp_base_module_t *sbgp_module); - - - -/* - * Macro for use in modules that are of type btl v2.0.0 - */ -#define MCA_BCOL_BASE_VERSION_2_0_0 \ - OMPI_MCA_BASE_VERSION_2_1_0("bcol", 2, 0, 0) - - -/* This is really an abstarction violation, but is the easiest way to get - * started. For memory management we need to know what bcol components - * have compatible memory management schemes. Such compatibility can - * be used to eliminate memory copies between levels in the collective - * operation hierarchy, by having the output buffer of one level be the - * input buffer to the next level - */ - -enum { - BCOL_SHARED_MEMORY_UMA=0, - BCOL_SHARED_MEMORY_SOCKET, - BCOL_POINT_TO_POINT, - BCOL_IB_OFFLOAD, - BCOL_SIZE -}; - -OMPI_DECLSPEC extern int bcol_mpool_compatibility[BCOL_SIZE][BCOL_SIZE]; -OMPI_DECLSPEC extern int bcol_mpool_index[BCOL_SIZE][BCOL_SIZE]; - -/* what are the input parameters ? too many void * pointers here */ -typedef int (*bcol_register_mem_fn_t)(void *context_data, void *base, - size_t size, void **reg_desc); -/* deregistration function */ -typedef int (*bcol_deregister_mem_fn_t)(void *context_data, void *reg_desc); - -/* Bcol network context definition */ -struct bcol_base_network_context_t { - opal_object_t super; - /* Context id - defined by upper layer, ML */ - int context_id; - /* Any context information that bcol what to use */ - void *context_data; - - /* registration function */ - bcol_register_mem_fn_t register_memory_fn; - /* deregistration function */ - bcol_deregister_mem_fn_t deregister_memory_fn; -}; -typedef struct bcol_base_network_context_t bcol_base_network_context_t; -OMPI_DECLSPEC OBJ_CLASS_DECLARATION(bcol_base_network_context_t); - -/* - *primitive function types - */ - -/* bcast */ -enum { - /* small data function */ - BCOL_BCAST_SMALL_DATA, - - /* small data - dynamic decision making supported */ - BCOL_BCAST_SMALL_DATA_DYNAMIC, - - /* number of functions */ - BCOL_NUM_BCAST_FUNCTIONS -}; - - -/** - * BCOL instance. - */ - -/* no limit on fragment size - this supports using user buffers rather - * than library buffers - */ -#define FRAG_SIZE_NO_LIMIT -1 - -/* forward declaration */ -struct coll_bcol_collective_description_t; - -struct mca_bcol_base_component_2_0_0_t { - - /** Base component description */ - mca_base_component_t bcol_version; - - /** Component initialization function */ - mca_bcol_base_component_init_query_fn_t collm_init_query; - - /** Query whether component is useable for given communicator */ - mca_bcol_base_component_comm_query_fn_t collm_comm_query; - - /** If bcol supports all possible data types */ - mca_bcol_base_module_coll_support_fn_t coll_support; - - /** If bcol supports all possible data types for given collective operation */ - mca_bcol_base_module_coll_support_all_types_fn_t coll_support_all_types; - - /** Use this flag to prevent init_query multiple calls - in case we have the same bcol more than on a single level */ - bool init_done; - - /** If collective calls with bcols of this type need to be ordered */ - bool need_ordering; - - /** MCA parameter: Priority of this component */ - int priority; - - /** Bcast function pointers */ - struct coll_bcol_collective_description_t * - bcast_functions[BCOL_NUM_BCAST_FUNCTIONS]; - - /** Number of network contexts - need this for resource management */ - int n_net_contexts; - - /** List of network contexts */ - bcol_base_network_context_t **network_contexts; - - /* - * Fragmentation support - */ - - /** Minimum fragement size */ - int min_frag_size; - - /** Maximum fragment size */ - int max_frag_size; - - /** Supports direct use of user-buffers */ - bool can_use_user_buffers; -}; -typedef struct mca_bcol_base_component_2_0_0_t mca_bcol_base_component_2_0_0_t; -typedef struct mca_bcol_base_component_2_0_0_t mca_bcol_base_component_t; -OMPI_DECLSPEC OBJ_CLASS_DECLARATION(mca_bcol_base_component_t); - -/* forward declaration */ -struct mca_coll_ml_descriptor_t; -struct mca_bcol_base_payload_buffer_desc_t; -struct mca_bcol_base_route_info_t; - -typedef struct { - int order_num; /* Seq num of collective fragment */ - int bcols_started; /* How many bcols need ordering have been started */ - int n_fns_need_ordering; /* The number of functions are called for bcols need ordering */ -} mca_bcol_base_order_info_t; - -/* structure that encapsultes information propagated amongst multiple - * fragments whereby completing the entire ensemble of fragments is - * necessary in order to complete the entire collective - */ -struct bcol_fragment_descriptor_t { - /* start iterator */ - int head; - /* end iterator */ - int tail; - /* current iteration */ - int start_iter; - /* number of full iterations this frag */ - int num_iter; - /* end iter */ - int end_iter; -}; -typedef struct bcol_fragment_descriptor_t bcol_fragment_descriptor_t; - -struct bcol_function_args_t { - /* full message sequence number */ - int64_t sequence_num; - /* full message descriptor - single copy of fragment invariant - * parameters */ - /* Pasha: We don need this one for new flow - remove it */ - struct mca_coll_ml_descriptor_t *full_message_descriptor; - struct mca_bcol_base_route_info_t *root_route; - /* function status */ - int function_status; - /* root, for rooted operations */ - int root; - /* input buffer */ - void *sbuf; - void *rbuf; - void *userbuf; - struct mca_bcol_base_payload_buffer_desc_t *src_desc; - struct mca_bcol_base_payload_buffer_desc_t *dst_desc; - /* ml buffer size */ - uint32_t buffer_size; - /* index of buffer in ml payload cache */ - int buffer_index; - int count; - struct ompi_datatype_t *dtype; - struct ompi_op_t *op; - int sbuf_offset; - int rbuf_offset; - /* for bcol opaque data */ - void *bcol_opaque_data; - /* An output argument that will be used by BCOL function to tell ML that the result of the BCOL is in rbuf */ - bool result_in_rbuf; - bool root_flag; /* True if the rank is root of operation */ - bool need_dt_support; /* will trigger alternate code path for some colls */ - int status; /* Used for non-blocking collective completion */ - uint32_t frag_size; /* fragment size for large messages */ - int hier_factor; /* factor used when bcast is invoked as a service function back down - * the tree in allgather for example, the pacl_len is not the actual - * len of the data needing bcasting - */ - mca_bcol_base_order_info_t order_info; - bcol_fragment_descriptor_t frag_info; - -}; - -struct mca_bcol_base_route_info_t { - int level; - int rank; -}; -typedef struct mca_bcol_base_route_info_t mca_bcol_base_route_info_t; - -struct mca_bcol_base_lmngr_block_t { - opal_list_item_t super; - struct mca_coll_ml_lmngr_t *lmngr; - void* base_addr; -}; -typedef struct mca_bcol_base_lmngr_block_t mca_bcol_base_lmngr_block_t; -OBJ_CLASS_DECLARATION(mca_bcol_base_lmngr_block_t); - -struct mca_bcol_base_memory_block_desc_t { - - /* memory block for payload buffers */ - struct mca_bcol_base_lmngr_block_t *block; - - /* Address offset in bytes -- Indicates free memory in the block */ - uint64_t block_addr_offset; - - /* size of the memory block */ - size_t size_block; - - /* number of memory banks */ - uint32_t num_banks; - - /* number of buffers per bank */ - uint32_t num_buffers_per_bank; - - /* size of a payload buffer */ - uint32_t size_buffer; - - /* pointer to buffer descriptors initialized */ - struct mca_bcol_base_payload_buffer_desc_t *buffer_descs; - - /* index of the next free buffer in the block */ - uint64_t next_free_buffer; - - uint32_t *bank_release_counters; - - /* Counter that defines what bank should be synchronized next - * since collectives could be completed out of order, we have to make - * sure that memory synchronization collectives started in order ! */ - int memsync_counter; - - /* This arrays of flags used to signal that the bank is ready for recycling */ - bool *ready_for_memsync; - - /* This flags monitors if bank is open for usage. Usually we expect that user - * will do the check only on buffer-zero allocation */ - bool *bank_is_busy; - -}; - -/* convenience typedef */ -typedef struct mca_bcol_base_memory_block_desc_t mca_bcol_base_memory_block_desc_t; - -typedef void (*mca_bcol_base_release_buff_fn_t)(struct mca_bcol_base_memory_block_desc_t *ml_memblock, uint32_t buff_id); - -struct mca_bcol_base_payload_buffer_desc_t { - void *base_data_addr; /* buffer address */ - void *data_addr; /* buffer address + header offset */ - uint64_t generation_number; /* my generation */ - uint64_t bank_index; /* my bank */ - uint64_t buffer_index; /* my buff index */ -}; -/* convenience typedef */ -typedef struct mca_bcol_base_payload_buffer_desc_t mca_bcol_base_payload_buffer_desc_t; - - - - - - -typedef struct bcol_function_args_t bcol_function_args_t; - - -/* The collective operation is defined by a series of collective operations - * invoked through a function pointer. Each function may be different, - * so will store the arguments in a struct and pass a pointer to the struct, - * and use this as a way to hide the different function signatures. - * - * @param[in] input_args Structure with function arguments - * @param[in] bcol_desc Component specific paremeters - * @param[out] status return status of the function - * MCA_BCOL_COMPLETE - function completed - * MCA_BCOL_IN_PROGRESS - function incomplete - * - * @retval OMPI_SUCCESS successful completion - * @retval OMPI_ERROR function returned error - */ -/* forward declaration */ -struct mca_bcol_base_module_t; - -/* collective function prototype - all functions have the same interface - * so that we can call them via a function pointer */ -struct mca_bcol_base_function_t; -typedef int (*mca_bcol_base_module_collective_fn_primitives_t) - (bcol_function_args_t *input_args, struct mca_bcol_base_function_t *const_args); - -typedef int (*mca_bcol_base_module_collective_init_fn_primitives_t) - (struct mca_bcol_base_module_t *bcol_module); - - /** - * function to query for collctive function attributes - * - * @param attribute (IN) the attribute of interest - * @param algorithm_parameters (OUT) the value of attribute for this - * function. If this attribute is not supported, - * OMPI_ERR_NOT_FOUND is returned. - */ - typedef int (*mca_bcol_get_collective_attributes)(int attribute, - void *algorithm_parameters); - -/* data structure for tracking the relevant data needed for ml level - * algorithm construction (e.g., function selection), initialization, and - * usage. - */ -struct coll_bcol_collective_description_t { - /* collective initiation function - first functin called */ - mca_bcol_base_module_collective_fn_primitives_t coll_fn; - - /* collective progress function - first functin called */ - mca_bcol_base_module_collective_fn_primitives_t progress_fn; - - /* collective progress function - first functin called */ - mca_bcol_get_collective_attributes get_attributes; - - /* attributes supported - bit map */ - uint64_t attribute; - -}; -typedef struct coll_bcol_collective_description_t -coll_bcol_collective_description_t; - -/* collective operation attributes */ -enum { - /* supports dynamic decisions - e.g., do not need to have the collective - * operation fully defined before it can be started - */ - BCOL_ATTRIBUTE_DYNAMIC, - - /* number of attributes */ - BCOL_NUM_ATTRIBUTES -}; - -/* For rooted collectives, - * does the algorithm knows its data source ? - */ -enum { - DATA_SRC_KNOWN=0, - DATA_SRC_UNKNOWN, - DATA_SRC_TYPES -}; - -enum { - BLOCKING, - NON_BLOCKING -}; -/* gvm For selection logic */ -struct mca_bcol_base_coll_fn_comm_attributes_t { - int bcoll_type; - int comm_size_min; - int comm_size_max; - int data_src; - int waiting_semantics; -}; - -typedef struct mca_bcol_base_coll_fn_comm_attributes_t - mca_bcol_base_coll_fn_comm_attributes_t; - -struct mca_bcol_base_coll_fn_invoke_attributes_t { - int bcol_msg_min; - int bcol_msg_max; - uint64_t datatype_bitmap; /* Max is OMPI_DATATYPE_MAX_PREDEFINED defined to be 45 */ - uint32_t op_types_bitmap; /* bit map of optypes supported */ -}; - -typedef struct mca_bcol_base_coll_fn_invoke_attributes_t - mca_bcol_base_coll_fn_invoke_attributes_t; - -struct mca_bcol_base_coll_fn_desc_t { - opal_list_item_t super; - struct mca_bcol_base_coll_fn_comm_attributes_t *comm_attr; - struct mca_bcol_base_coll_fn_invoke_attributes_t *inv_attr; - mca_bcol_base_module_collective_fn_primitives_t coll_fn; - mca_bcol_base_module_collective_fn_primitives_t progress_fn; -}; - -typedef struct mca_bcol_base_coll_fn_desc_t mca_bcol_base_coll_fn_desc_t; -OBJ_CLASS_DECLARATION(mca_bcol_base_coll_fn_desc_t); - -/* end selection logic */ - -typedef int (*mca_bcol_base_module_collective_init_fn_t) - (struct mca_bcol_base_module_t *bcol_module, - mca_sbgp_base_module_t *sbgp_module); - - /* per communicator memory initialization function */ -typedef int (*mca_bcol_module_mem_init)(struct ml_buffers_t *registered_buffers, - mca_bcol_base_component_t *module); - -/* Initialize memory block - ml_memory_block initialization interface function - * - * Invoked at the ml level, used to pass bcol specific registration information - * for the "ml_memory_block" - * - * @param[in] ml_memory_block Pointer to the ml_memory_block. This struct - * contains bcol specific registration information and a call back function - * used for resource recycling. - * - * @param[in] reg_data bcol specific registration data. - * - * @returns On Success: OMPI_SUCCESS - * On Failure: OMPI_ERROR - * - */ -/*typedef int (*mca_bcol_base_init_memory_fn_t) - (struct mca_bcol_base_memory_block_desc_t *ml_block, void *reg_data);*/ - -typedef int (*mca_bcol_base_init_memory_fn_t) - (struct mca_bcol_base_memory_block_desc_t *payload_block, - uint32_t data_offset, - struct mca_bcol_base_module_t *bcol, - void *reg_data); - -typedef int (*mca_common_allgather_init_fn_t) - (struct mca_bcol_base_module_t *bcol_module); - -typedef void (*mca_bcol_base_set_thresholds_fn_t) - (struct mca_bcol_base_module_t *bcol_module); - -enum { - MCA_BCOL_BASE_ZERO_COPY = 1, - MCA_BCOL_BASE_NO_ML_BUFFER_FOR_LARGE_MSG = 1 << 1, - MCA_BCOL_BASE_NO_ML_BUFFER_FOR_BARRIER = 1 << 2 -}; - -/* base module */ -struct mca_bcol_base_module_t { - /* base coll component */ - opal_object_t super; - - /* bcol component (Pasha: Do we really need cache the component?)*/ - mca_bcol_base_component_t *bcol_component; - - /* network context that is used by this bcol - only one context per bcol is allowed */ - bcol_base_network_context_t *network_context; - - /* We are going to use the context index a lot, - int order to decrease number of dereferences - bcol->network_context->index - we are caching the value on bcol */ - int context_index; - - /* Set of flags that describe features supported by bcol */ - uint64_t supported_mode; - - /* per communicator memory initialization function */ - mca_bcol_module_mem_init init_module; - - /* sub-grouping module partner */ - mca_sbgp_base_module_t *sbgp_partner_module; - - /* size of subgroup - cache this, so can have access when - * sbgp_partner_module no longer existes */ - int size_of_subgroup; - - /* sequence number offset - want to make sure that we start - * id'ing collectives with id 0, so we can have simple - * resource management. - */ - int64_t squence_number_offset; - - - /* number of times to poll for operation completion before - * breaking out of a non-blocking collective operation - */ - int n_poll_loops; - - /* size of header that will go in data buff, should not include - * any info regarding alignment, let the ml level handle this - */ - uint32_t header_size; - - - /* Each bcol is assigned a unique value - * see if we can get away with 16-bit id - */ - int16_t bcol_id; - - /*FIXME: - * Since mca_bcol_base_module_t is the only parameter which will be passed - * into the bcol_basesmuma_bcast_init(), add the flag to indicate whether - * the hdl-based algorithms will get enabled. - */ - bool use_hdl; - /* - * Collective function pointers - */ - /* changing function signature - will replace bcol_functions */ - mca_bcol_base_module_collective_fn_primitives_t bcol_function_table[BCOL_NUM_OF_FUNCTIONS]; - - /* Tables hold pointers to functions */ - mca_bcol_base_module_collective_init_fn_primitives_t bcol_function_init_table[BCOL_NUM_OF_FUNCTIONS]; - opal_list_t bcol_fns_table[BCOL_NUM_OF_FUNCTIONS]; - struct mca_bcol_base_coll_fn_desc_t* - filtered_fns_table[DATA_SRC_TYPES][2][BCOL_NUM_OF_FUNCTIONS][NUM_MSG_RANGES+1][OMPI_OP_NUM_OF_TYPES][OMPI_DATATYPE_MAX_PREDEFINED]; - - /* - * Bcol interface function to pass bcol specific - * info and memory recycling call back - */ - mca_bcol_base_init_memory_fn_t bcol_memory_init; - - /* - * netpatterns interface function, would like to invoke this on - * on the ml level - */ - mca_common_allgather_init_fn_t k_nomial_tree; - /* Each bcol caches a list which describes how many ranks - * are "below" each rank in this bcol - */ - int *list_n_connected; - - /* offsets for scatter/gather */ - int hier_scather_offset; - - /* Small message threshold for each collective */ - int small_message_thresholds[BCOL_NUM_OF_FUNCTIONS]; - - /* Set small_message_thresholds array */ - mca_bcol_base_set_thresholds_fn_t set_small_msg_thresholds; - - /* Pointer to the order counter on the upper layer, - used if the bcol needs to be ordered */ - int *next_inorder; -}; -typedef struct mca_bcol_base_module_t mca_bcol_base_module_t; -OMPI_DECLSPEC OBJ_CLASS_DECLARATION(mca_bcol_base_module_t); - -/* function description */ -struct mca_bcol_base_function_t { - int fn_idx; - /* module */ - struct mca_bcol_base_module_t *bcol_module; - - /* - * The following two parameters are used for bcol modules - * that want to do some optimizations based on the fact that - * n functions from the same bcol module are called in a row. - * For example, in the iboffload case, on the first call one - * will want to initialize the MWR, and start to instantiate - * it, but only post it at the end of the last call. - * The index of this function in a sequence of consecutive - * functions from the same bcol - */ - int index_in_consecutive_same_bcol_calls; - - /* number of times functions from this bcol are - * called in order - */ - int n_of_this_type_in_a_row; - - /* - * number of times functions from this module are called in the - * collective operation. - */ - int n_of_this_type_in_collective; - int index_of_this_type_in_collective; -}; -typedef struct mca_bcol_base_function_t mca_bcol_base_function_t; - - - - -struct mca_bcol_base_descriptor_t { - opal_free_list_item_t super; -/* Vasily: will be described in the future */ -}; -typedef struct mca_bcol_base_descriptor_t mca_bcol_base_descriptor_t; - -static inline __opal_attribute_always_inline__ size_t - mca_bcol_base_get_buff_length(ompi_datatype_t *dtype, int count) -{ - ptrdiff_t lb, extent; - ompi_datatype_get_extent(dtype, &lb, &extent); - - return (size_t) (extent * count); -} - -#define MCA_BCOL_CHECK_ORDER(module, bcol_function_args) \ - do { \ - if (*((module)->next_inorder) != \ - (bcol_function_args)->order_info.order_num) { \ - return BCOL_FN_NOT_STARTED; \ - } \ - } while (0); - -#define MCA_BCOL_UPDATE_ORDER_COUNTER(module, order_info) \ - do { \ - (order_info)->bcols_started++; \ - if ((order_info)->n_fns_need_ordering == \ - (order_info)->bcols_started) { \ - ++(*((module)->next_inorder)); \ - } \ - } while (0); - -#if defined(c_plusplus) || defined(__cplusplus) -} -#endif -#endif /* MCA_BCOL_H */ diff --git a/ompi/mca/bcol/iboffload/Makefile.am b/ompi/mca/bcol/iboffload/Makefile.am deleted file mode 100644 index 4e9dd0c966a..00000000000 --- a/ompi/mca/bcol/iboffload/Makefile.am +++ /dev/null @@ -1,66 +0,0 @@ -# -# Copyright (c) 2009-2012 Oak Ridge National Laboratory. All rights reserved. -# Copyright (c) 2009-2012 Mellanox Technologies. All rights reserved. -# Copyright (c) 2012-2015 Cisco Systems, Inc. All rights reserved. -# $COPYRIGHT$ -# -# Additional copyrights may follow -# -# $HEADER$ -# - -AM_CPPFLAGS = $(bcol_iboffload_CPPFLAGS) $(btl_openib_CPPFLAGS) - -sources = \ - bcol_iboffload.h \ - bcol_iboffload_device.h \ - bcol_iboffload_module.c \ - bcol_iboffload_mca.h \ - bcol_iboffload_mca.c \ - bcol_iboffload_endpoint.h \ - bcol_iboffload_endpoint.c \ - bcol_iboffload_frag.h \ - bcol_iboffload_frag.c \ - bcol_iboffload_collfrag.h \ - bcol_iboffload_collfrag.c \ - bcol_iboffload_task.h \ - bcol_iboffload_task.c \ - bcol_iboffload_component.c \ - bcol_iboffload_barrier.c \ - bcol_iboffload_bcast.h \ - bcol_iboffload_bcast.c \ - bcol_iboffload_allgather.c \ - bcol_iboffload_collreq.h \ - bcol_iboffload_collreq.c \ - bcol_iboffload_qp_info.c \ - bcol_iboffload_qp_info.h \ - bcol_iboffload_fanin.c \ - bcol_iboffload_fanout.c \ - bcol_iboffload_allreduce.c - -# Make the output library in this directory, and name it either -# mca__.la (for DSO builds) or libmca__.la -# (for static builds). - -component_noinst = -component_install = -if MCA_BUILD_ompi_bcol_iboffload_DSO -component_install += mca_bcol_iboffload.la -else -component_noinst += libmca_bcol_iboffload.la -endif - -# See ompi/mca/btl/sm/Makefile.am for an explanation of -# libmca_common_sm.la. - -mcacomponentdir = $(ompilibdir) -mcacomponent_LTLIBRARIES = $(component_install) -mca_bcol_iboffload_la_SOURCES = $(sources) -mca_bcol_iboffload_la_LDFLAGS = -module -avoid-version $(btl_openib_LDFLAGS) $(bcol_iboffload_LDFLAGS) -mca_bcol_iboffload_la_LIBADD = $(btl_openib_LIBS) $(bcol_iboffload_LIBS) \ - $(OMPI_TOP_BUILDDIR)/ompi/mca/common/ofacm/libmca_common_ofacm.la \ - $(OMPI_TOP_BUILDDIR)/ompi/mca/common/verbs/libmca_common_verbs.la - -noinst_LTLIBRARIES = $(component_noinst) -libmca_bcol_iboffload_la_SOURCES =$(sources) -libmca_bcol_iboffload_la_LDFLAGS = -module -avoid-version $(btl_openib_LDFLAGS) $(bcol_iboffload_LDFLAGS) diff --git a/ompi/mca/bcol/iboffload/bcol_iboffload.h b/ompi/mca/bcol/iboffload/bcol_iboffload.h deleted file mode 100644 index 9566c0aff9f..00000000000 --- a/ompi/mca/bcol/iboffload/bcol_iboffload.h +++ /dev/null @@ -1,765 +0,0 @@ -/* - * Copyright (c) 2009-2012 Oak Ridge National Laboratory. All rights reserved. - * Copyright (c) 2009-2012 Mellanox Technologies. All rights reserved. - * Copyright (c) 2012 Los Alamos National Security, LLC. - * All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#ifndef MCA_BCOL_IBOFFLOAD_H -#define MCA_BCOL_IBOFFLOAD_H - -#include "ompi_config.h" - -#include -#include - -#include -#include -#include - -#include "ompi/mca/mca.h" - -#include "ompi/op/op.h" -#include "ompi/datatype/ompi_datatype.h" -#include "ompi/datatype/ompi_datatype_internal.h" - -#include "ompi/mca/bcol/bcol.h" -#include "ompi/mca/bcol/base/base.h" - -#include "ompi/mca/sbgp/ibnet/sbgp_ibnet.h" - -#include "opal/mca/mpool/mpool.h" -#include "ompi/request/request.h" - -#include "ompi/mca/common/ofacm/connect.h" - -#include "bcol_iboffload_qp_info.h" - -BEGIN_C_DECLS - -#define IMM_RDMA 1 -#define INLINE 1 -#define NO_INLINE 0 - -#define MCA_IBOFFLOAD_CALC_SIZE_EXT 8 -#define MCA_IBOFFLOAD_IB_DRIVER_OPERAND_SIZE 8 -#define MCA_IBOFFLOAD_CACHE_LINE_SIZE 128 - -#if OPAL_HAVE_IBOFFLOAD_CALC_RDMA -#define MCA_BCOL_IBOFFLOAD_SEND_CALC IBV_M_WR_CALC_SEND -#else -#define MCA_BCOL_IBOFFLOAD_SEND_CALC IBV_M_WR_CALC -#endif - - -/* 0 - barrier rdma info - 1 - ML rdma info */ -#define MAX_REMOTE_RDMA_INFO 2 - -/* forward declarations */ -struct mca_bcol_iboffload_module_t; -struct mca_bcol_iboffload_collreq_t; -struct mca_bcol_iboffload_endpoint_t; -struct mca_bcol_iboffload_frag_t; -struct mca_bcol_iboffload_task_t; -struct mca_bcol_iboffload_qp_info_t; -struct mca_bcol_iboffload_collfrag_t; -struct mca_bcol_iboffload_algth_lst_t; -struct mca_bcol_iboffload_device_t; - -typedef int (*mca_bcol_iboffload_coll_algth_fn_t) ( - struct mca_bcol_iboffload_module_t *iboffload, - struct mca_bcol_iboffload_collreq_t *coll_request); - -struct mca_bcol_iboffload_rdma_info_t { - uint64_t addr; - uint32_t rkey; - uint32_t lkey; -}; -typedef struct mca_bcol_iboffload_rdma_info_t mca_bcol_iboffload_rdma_info_t; - -struct mca_bcol_iboffload_rdma_buffer_desc_t { - void *data_addr; /* buffer address */ - uint64_t generation_number; /* my generation */ - uint64_t bank_index; /* my bank */ - uint64_t buffer_index; /* my buff index */ -}; -typedef struct mca_bcol_iboffload_rdma_buffer_desc_t mca_bcol_iboffload_rdma_buffer_desc_t; - -struct mca_bcol_iboffload_rdma_block_desc_t { - /* number of memory banks */ - uint32_t num_banks; - /* number of buffers per bank */ - uint32_t num_buffers_per_bank; - /* size of a payload buffer */ - uint32_t size_buffer; - /* data offset from ML */ - uint32_t data_offset; - /* pointer to buffer descriptors initialized */ - mca_bcol_iboffload_rdma_buffer_desc_t *rdma_desc; -}; -typedef struct mca_bcol_iboffload_rdma_block_desc_t mca_bcol_iboffload_rdma_block_desc_t; - -/* Information that we need to keep in order to access remote - memory. For each remote peer (endpoint) we will keep this - structure */ -struct mca_bcol_iboffload_rem_rdma_block_t { - /* IB related information first */ - mca_bcol_iboffload_rdma_info_t ib_info; - - mca_bcol_iboffload_rdma_buffer_desc_t *rdma_desc; -}; -typedef struct mca_bcol_iboffload_rem_rdma_block_t mca_bcol_iboffload_rem_rdma_block_t; - -enum { - MCA_BCOL_IBOFFLOAD_BK_COUNTER_INDEX = 0, - MCA_BCOL_IBOFFLOAD_BK_SYNC_INDEX, - MCA_BCOL_IBOFFLOAD_BK_LAST -}; - -/* Information that we need to keep in order to access and - track local memory that is used as source and destinatination - for RDMA operations */ -struct mca_bcol_iboffload_local_rdma_block_t { - /* sync counter keeps next to start bank id */ - int sync_counter; - /* Counter for released ml buffers */ - int *bank_buffer_counter[MCA_BCOL_IBOFFLOAD_BK_LAST]; - /* IB related information first */ - struct mca_bcol_iboffload_rdma_info_t ib_info; - /* back pointer to original ML memory descriptor */ - struct mca_bcol_base_memory_block_desc_t *ml_mem_desc; - /* Pasha: do we really need this one ?*/ - /* caching ml memory descriptor configurations localy */ - mca_bcol_iboffload_rdma_block_desc_t bdesc; -}; -typedef struct mca_bcol_iboffload_local_rdma_block_t mca_bcol_iboffload_local_rdma_block_t; - -struct mca_bcol_iboffload_recv_wr_manager { - opal_mutex_t lock; - /** Array of ready to use receive work requests. - * it is 2 dimensional array since for each - * qp size we want to keep separate recv wr */ - struct ibv_recv_wr **recv_work_requests; -}; -typedef struct mca_bcol_iboffload_recv_wr_manager mca_bcol_iboffload_recv_wr_manager; - -/** - * Structure to hold the basic shared memory coll component. First it holds the - * base coll component, and then holds a bunch of - * sm-coll-component-specific stuff (e.g., current MCA param - * values). - */ -struct mca_bcol_iboffload_component_t { - /** Base coll component */ - mca_bcol_base_component_2_0_0_t super; - /** Enable disable verbose mode */ - int verbose; - int num_qps; - /** Whether we want a warning if non default GID prefix is not configured - on multiport setup */ - bool warn_default_gid_prefix; - /** Whether we want a warning if the user specifies a non-existent - device and/or port via bcol_ibofflad_if_[in|ex]clude MCA params */ - bool warn_nonexistent_if; - /** initial size of free lists */ - int free_list_num; - /** maximum size of free lists */ - int free_list_max; - /** number of elements to alloc when growing free lists */ - int free_list_inc; - /** name of ib memory pool */ - char* mpool_name; - /** max outstanding CQE on the CQ */ - int cq_size; - /** Max size of inline data */ - unsigned int max_inline_data; - /** IB partition definition */ - uint32_t pkey_val; - /** Outstanding atomic reads */ - unsigned int qp_ous_rd_atom; - /** IB MTU */ - int mtu; - /** Recv not ready timer */ - int min_rnr_timer; - /** IB timeout */ - int timeout; - /** IB retry count */ - int retry_count; - /** Recv not ready retry count */ - int rnr_retry; - /** IB maximum pending RDMA */ - int max_rdma_dst_ops; - /** IB Service level (QOS) */ - int service_level; - /** Preferred communication buffer alignment in Bytes (must be power of two) */ - int buffer_alignment; - /** Max tasks number for MQ */ - int max_mqe_tasks; - /** Max MQ size */ - int max_mq_size; - /** HCA/Port include exclude list */ - char *if_include; - char **if_include_list; - char *if_exclude; - char **if_exclude_list; - /** Dummy argv-style list; a copy of names from the - if_[in|ex]clude list that we use for error checking (to ensure - that they all exist) */ - char **if_list; - /** Array of ibv devices */ - struct ibv_device **ib_devs; - /** devices count */ - int num_devs; - /** MCA param bcol_iboffload_receive_queues */ - char *receive_queues; - /** Common info about all kinds of QPs on each iboffload module */ - struct mca_bcol_iboffload_qp_info_t qp_infos[MCA_BCOL_IBOFFLOAD_QP_LAST]; - /** Array of iboffload devices */ - opal_pointer_array_t devices; - /** Free lists of collfrag descriptors */ - ompi_free_list_t collfrags_free; - /** Free lists of outstanding collective operations */ - ompi_free_list_t collreqs_free; - /** Free lists for free task operations */ - ompi_free_list_t tasks_free; - /** Free lists for free calc task operations */ - ompi_free_list_t calc_tasks_free; - /** Free list of empty frags, that do not keep any - registration information */ - ompi_free_list_t ml_frags_free; - /** Recv work request mananger */ - mca_bcol_iboffload_recv_wr_manager recv_wrs; - /** We allocate some resources on the component - * with creating of the first iboffload module - * and set this flag to true */ - bool init_done; - /** Maximal number of fragments of the same colective request that can be sent in parallel */ - unsigned int max_pipeline_depth; - /** array mapping Open MPI reduction operators to MVerbs reduction operators */ - enum ibv_m_wr_calc_op map_ompi_to_ib_calcs[OMPI_OP_NUM_OF_TYPES]; - /** array mapping Open MPI data types to MVerbs data types */ - enum ibv_m_wr_data_type map_ompi_to_ib_dt[OMPI_DATATYPE_MPI_MAX_PREDEFINED]; - /** The order of the exchange tree */ - int exchange_tree_order; - /** Knomial tree order */ - int knomial_tree_order; - /** K-nomial radix */ - int k_nomial_radix; - /** Maximum number of pulls for completion check */ - int max_progress_pull; - /** Barrier function selector */ - int barrier_mode; - /** MCA for selecting Bruck's alltoall algorithms */ - int use_brucks_smsg_alltoall_rdma; - int use_brucks_smsg_alltoall_sr; - /** radix of small-data alltoall Bruck-like algorithm */ - int k_alltoall_bruck_radix; - /** alltoall small data buffer alignment */ - int tmp_buf_alignment; -}; - -/** - * Convenience typedef - */ -typedef struct mca_bcol_iboffload_component_t mca_bcol_iboffload_component_t; - -/* List of all algorithms that we use */ -enum { - FANIN_ALG, - FANOUT_ALG, - RECURSIVE_DOUBLING_BARRIER_ALG, - RECURSIVE_KNOMIAL_BARRIER_ALG, - RECURSIVE_DOUBLING_ALLREDUCE_ALG, - RECURSIVE_DOUBLING_REDUCE_ALG, - RECURSIVE_DOUBLING_TREE_BCAST, - ALL_ENDPOINTS, /* connected to all peers */ - ALLGATHER_KNOMIAL_ALG, - ALLGATHER_NEIGHBOR_ALG, - REMOTE_EXCHANGE_ALG, - LAST_ALG -}; - -struct mca_bcol_iboffload_port_t { - int id; /** Port number on device: 1 or 2 */ - int stat; /** Port status - Active,Init,etc.. */ - enum ibv_mtu mtu; /** MTU on this port */ - uint64_t subnet_id; /** Sunnet id for the port */ - uint16_t lid; - uint16_t lmc; -}; -typedef struct mca_bcol_iboffload_port_t mca_bcol_iboffload_port_t; - -enum { - COLL_MQ = 0, - SERVICE_MQ, - BCOL_IBOFFLOAD_MQ_NUM -}; - -struct mca_bcol_iboffload_module_t { - /* base structure */ - mca_bcol_base_module_t super; - - /* size */ - int group_size; - int log_group_size; - - /* size of each memory segment */ - size_t segment_size; - - /* collective tag */ - long long collective_tag; - - /* pointer to device */ - struct mca_bcol_iboffload_device_t *device; - - /* caching port number */ - uint32_t port; - - /* Connecting iboffload with ibnet module information */ - /* pointer to sbgp ibnet */ - mca_sbgp_ibnet_module_t *ibnet; - - /* connection group inder for the ibnet */ - int cgroup_index; - - /* array of endpoints */ - struct mca_bcol_iboffload_endpoint_t **endpoints; - - /* Size of the endpoints array */ - int num_endpoints; - - /* caching port subnet id and lid - * the same information we have on device */ - uint64_t subnet_id; - uint16_t lid; - - /* Pointer to management queue */ - struct mqe_context *mq[BCOL_IBOFFLOAD_MQ_NUM]; - int mq_credit[BCOL_IBOFFLOAD_MQ_NUM]; - - /* pending list of collfrags */ - opal_list_t collfrag_pending; - - /* recursive-doubling tree node */ - netpatterns_pair_exchange_node_t recursive_doubling_tree; - - /* N exchange tree */ - netpatterns_pair_exchange_node_t n_exchange_tree; - - /* Knomial exchange tree */ - netpatterns_k_exchange_node_t knomial_exchange_tree; - - /* Knomial exchange tree */ - netpatterns_k_exchange_node_t knomial_allgather_tree; - - /* The array will keep pre-calculated task consumption per - * algorithm - */ - uint32_t alg_task_consump[LAST_ALG]; - - /* Pointer to a func that's implementation of a barrier algorithm */ - mca_bcol_iboffload_coll_algth_fn_t barrier_algth; - - /* Pointer to a func that's implementation of a fanin algorithm */ - mca_bcol_iboffload_coll_algth_fn_t fanin_algth; - - /* Pointer to a func that's implementation of a fanin algorithm */ - mca_bcol_iboffload_coll_algth_fn_t fanout_algth; - - /* Pointer to a func that's implementation of a allreduce algorithm */ - mca_bcol_iboffload_coll_algth_fn_t allreduce_algth; - - /* Pointer to a func that's implementation of a non blocking memory syncronization algorithm */ - mca_bcol_iboffload_coll_algth_fn_t memsync_algth; - - /* rdma block memory information */ - mca_bcol_iboffload_local_rdma_block_t rdma_block; - - /* The largest power of two which 1 << power_of_2 - is not larger than the group size */ - int power_of_2; - - /* The largest power of two number which is not larger than the group size */ - int power_of_2_ranks; - - /* Connection status array */ - bool connection_status[LAST_ALG]; - - /* map from communicator ranks to ibsubnet */ - int *comm_to_ibnet_map; - - /* order preserving value */ - int64_t prev_sequence_num; - - /* Temp iovec to send the data fragments -- alltoall Brucks */ - struct iovec *alltoall_iovec; - struct iovec *alltoall_recv_iovec; - - /* tree radix for the knomial bruck small data alltoall */ - int k_alltoall_bruck_radix; - - /* Temp buffer alignment for knomial bruck small data alltoall */ - int tmp_buf_alignment; - - /* Free task list with sge's array */ - ompi_free_list_t iovec_tasks_free; -}; - -typedef struct mca_bcol_iboffload_module_t mca_bcol_iboffload_module_t; -OBJ_CLASS_DECLARATION(mca_bcol_iboffload_module_t); - -/** - * Global component instance - */ -OMPI_MODULE_DECLSPEC - extern mca_bcol_iboffload_component_t mca_bcol_iboffload_component; - -static inline int mca_bcol_iboffload_err(const char* fmt, ...) -{ - va_list list; - int ret; - - va_start(list, fmt); - ret = vfprintf(stderr, fmt, list); - va_end(list); - return ret; -} - -#define MCA_BCOL_IBOFFLOAD_ALLREDUCE_DO_CALC(ompi_op, c_type, l_operand, r_operand, result) \ -do { \ - switch (ompi_op) { \ - case OMPI_OP_MAX: \ - *((c_type *)&result) = ((*(c_type *)&(l_operand) > *(c_type *)&(r_operand)) ? \ - *(c_type *)&(l_operand) : *(c_type *)&(r_operand)); \ - break; \ - case OMPI_OP_MIN: \ - *((c_type *)&result) = ((*(c_type *)&(l_operand) < *(c_type *)&(r_operand)) ? \ - *(c_type *)&(l_operand) : *(c_type *)&(r_operand)); \ - break; \ - case OMPI_OP_SUM: \ - *((c_type *)&result) = (*((c_type *)&(l_operand)) + *((c_type *)&(r_operand))); \ - break; \ - default: \ - break; \ - } \ -} while (0); - -#define MCA_BCOL_IBOFFLOAD_PKEY_MASK 0x7fff -#define MCA_BCOL_IBOFFLOAD_DEFAULT_GID_PREFIX 0xfe80000000000000ll - -#define IBOFFLOAD_ERROR(args) \ - do { \ - mca_bcol_iboffload_err("[%s]%s[%s:%d:%s] IBOFFLOAD ", \ - ompi_process_info.nodename, \ - OMPI_NAME_PRINT(OMPI_PROC_MY_NAME), \ - __FILE__, __LINE__, __func__); \ - mca_bcol_iboffload_err args; \ - mca_bcol_iboffload_err("\n"); \ - } while(0) - -#if OPAL_ENABLE_DEBUG -#define IBOFFLOAD_VERBOSE(level, args) \ - do { \ - if (mca_bcol_iboffload_component.verbose >= level) { \ - mca_bcol_iboffload_err("[%s]%s[%s:%d:%s] IBOFFLOAD ", \ - ompi_process_info.nodename, \ - OMPI_NAME_PRINT(OMPI_PROC_MY_NAME), \ - __FILE__, __LINE__, __func__); \ - mca_bcol_iboffload_err args; \ - mca_bcol_iboffload_err("\n"); \ - } \ - } while(0) -#else -#define IBOFFLOAD_VERBOSE(level, args) -#endif - -#define MCA_BCOL_IBOFFLOAD_SET_COLL_REQ_LINKS(coll_req, coll_work_req) \ - do { \ - opal_list_append(&(coll_req)->work_requests, \ - (opal_list_item_t*) (coll_work_req)); \ - (coll_work_req)->coll_full_req = (coll_req); \ - } while(0) -/* Vasily: will be removed soon */ -#define APPEND_TO_TASKLIST(task_ptr_to_set, event, last_event_type) \ - do { \ - *task_ptr_to_set = &(event)->element; \ - last_event_type = &(event)->element; \ - task_ptr_to_set = &((event)->element.next); \ - } while(0) - -#define MCA_BCOL_IBOFFLOAD_APPEND_TASK_TO_LIST(task_ptr_to_set, task) \ - do { \ - *task_ptr_to_set = (task); \ - task_ptr_to_set = &((task)->next_task); \ - } while(0) - -#define MCA_BCOL_IBOFFLOAD_APPEND_MQ_TASK_TO_LIST(task_ptr_to_set, event) \ - do { \ - *task_ptr_to_set = &(event)->element; \ - task_ptr_to_set = &((event)->element.next); \ - } while(0) - -#define BCOL_IS_COMPLETED(req) (((req)->n_frag_mpi_complete == (req)->n_fragments) && \ - ((req)->n_fragments > 0)) - -#define BCOL_AND_NET_ARE_COMPLETED(req) (BCOL_IS_COMPLETED(req) && \ - ((req)->n_frag_net_complete == (req)->n_fragments)) - -/* Pasha: Need to add locks here */ -#define BCOL_IBOFFLOAD_MQ_HAVE_CREDITS(module, mq_index, num_of_credits) \ - (((module)->mq_credit[mq_index] -= (num_of_credits)) < 0 ? false : true) -/* Pasha: Need to add locks here */ -#define BCOL_IBOFFLOAD_MQ_RETURN_CREDITS(module, mq_index, num_of_credits) \ - ((module)->mq_credit[mq_index] += (num_of_credits)) - -#define BCOL_IBOFFLOAD_IS_FIRST_CALL(args) (0 == (args)->index_in_consecutive_same_bcol_calls) - -#define BCOL_IBOFFLOAD_IS_LAST_CALL(args) (((args)->n_of_this_type_in_collective - 1) == \ - (args)->index_of_this_type_in_collective) - -#define BCOL_IBOFFLOAD_READY_TO_POST(args) (((args)->n_of_this_type_in_a_row - 1) == \ - (args)->index_in_consecutive_same_bcol_calls) -/* - * bcol module functions - */ - -int mca_bcol_iboffload_rec_doubling_start_connections(struct mca_bcol_iboffload_module_t *iboffload); - -/* RDMA addr exchange with rem proc */ -int mca_bcol_iboffload_exchange_rem_addr(struct mca_bcol_iboffload_endpoint_t *ep); - -/* Progress function */ -int mca_bcol_iboffload_component_progress(void); - -/* Register memory */ -int mca_bcol_iboffload_register_mr(void *reg_data, void * base, size_t size, - mca_mpool_base_registration_t *reg); - -/* Deregister memory */ -int mca_bcol_iboffload_deregister_mr(void *reg_data, mca_mpool_base_registration_t *reg); - -/* - * The function is used for create CQ in this module. - */ -int mca_bcol_iboffload_adjust_cq(struct mca_bcol_iboffload_device_t *device, - struct ibv_cq **ib_cq); -/* - * Query to see if the component is available for use, - * and can satisfy the thread and progress requirements - */ -int mca_bcol_iboffload_init_query(bool enable_progress_threads, - bool enable_mpi_threads); - - -/* Interface to setup the allgather tree */ -int mca_bcol_iboffload_setup_knomial_tree(mca_bcol_base_module_t *super); - -/* - * Query to see if the module is available for use on - * the given communicator, and if so, what it's priority is. - */ -mca_bcol_base_module_t ** -mca_bcol_iboffload_comm_query(mca_sbgp_base_module_t *sbgp, int *num_modules); - -int -mca_bcol_iboffload_free_tasks_frags_resources( - struct mca_bcol_iboffload_collfrag_t *collfrag, - ompi_free_list_t *frags_free); - -/** - * Shared memory blocking barrier - */ - -int mca_bcol_iboffload_small_msg_bcast_intra(bcol_function_args_t *fn_arguments, - struct mca_bcol_base_function_t - *const_args); - -int mca_bcol_iboffload_barrier_intra_recursive_doubling_start( - struct mca_bcol_iboffload_module_t *iboffload, - struct mca_bcol_iboffload_collreq_t *coll_request); - -int mca_bcol_iboffload_barrier_intra_recursive_knomial_start( - struct mca_bcol_iboffload_module_t *iboffload, - struct mca_bcol_iboffload_collreq_t *coll_request); - -int mca_bcol_iboffload_barrier_intra_recursive_doubling( - mca_bcol_iboffload_module_t *iboffload, - struct mca_bcol_iboffload_collreq_t *coll_request); - -int mca_bcol_iboffload_nb_memory_service_barrier_start( - struct mca_bcol_iboffload_module_t *iboffload, - struct mca_bcol_iboffload_collreq_t *coll_request); - -int mca_bcol_iboffload_fanin_register(mca_bcol_base_module_t *super); -int mca_bcol_iboffload_fanout_register(mca_bcol_base_module_t *super); -int mca_bcol_iboffload_barrier_register(mca_bcol_base_module_t *super); -int mca_bcol_iboffload_memsync_register(mca_bcol_base_module_t *super); -int mca_bcol_iboffload_allreduce_register(mca_bcol_base_module_t *super); - -int mca_bcol_iboffload_new_style_fanin_first_call( - mca_bcol_iboffload_module_t *iboffload, - struct mca_bcol_iboffload_collreq_t *coll_request); - -int mca_bcol_iboffload_new_style_fanout_first_call( - mca_bcol_iboffload_module_t *iboffload, - struct mca_bcol_iboffload_collreq_t *coll_request); - -int mca_bcol_iboffload_nb_memory_service_barrier_intra(bcol_function_args_t *input_args, - struct mca_bcol_base_function_t *const_args); - -int mca_bcol_iboffload_coll_support_all_types(bcol_coll coll_name); -int mca_bcol_iboffload_coll_supported(int op, int dtype, bcol_elem_type elem_type); - -static inline __opal_attribute_always_inline__ int - mca_bcol_iboffload_fls(int num) -{ - int i = 1; - int j = 0; - - if (0 == num) { - return 0; - } - - while (i < num) { - i <<= 1; - j++; - } - - if (i > num) { - j--; - } - - return j; -} - -#define BCOL_IBOFFLOAD_IS_EVEN(num) (!((num) & 1)) -static inline __opal_attribute_always_inline__ int - mca_bcol_iboffload_ffs(int num) -{ - int j = 0; - - if (0 == num) { - return 0; - } - - while (BCOL_IBOFFLOAD_IS_EVEN(num)) { - num >>= 1; - j++; - } - - return j; -} - -#if OPAL_ENABLE_DEBUG - -/* Post task list MQ */ -#define IS_IMM(a) (a & MQE_WR_FLAG_IMM_EXE) -#define IS_SIG(a) (a & MQE_WR_FLAG_SIGNAL) -#define IS_BLK(a) (a & MQE_WR_FLAG_BLOCK) - -int task_to_rank(mca_bcol_iboffload_module_t *iboffload, struct mqe_task *task); -int wait_to_rank(mca_bcol_iboffload_module_t *iboffload, struct mqe_task *task); - -#endif - -/* MQ posting function */ -static inline __opal_attribute_always_inline__ int - mca_bcol_iboffload_post_mqe_tasks( - mca_bcol_iboffload_module_t *iboffload, - struct mqe_task *head_mqe) -{ - int rc; - struct mqe_task *bad_mqe = NULL; - -#if OPAL_ENABLE_DEBUG /* debug code */ - - struct mqe_task *curr_mqe_task = NULL; - int send_count = 0, recv_count = 0, wait_count = 0; - - curr_mqe_task = head_mqe; - IBOFFLOAD_VERBOSE(10, ("Processing MQE Head with addr %p \n", - (uintptr_t) (void*) curr_mqe_task)); - - while (NULL != curr_mqe_task) { - switch(curr_mqe_task->opcode) { - case MQE_WR_SEND: - IBOFFLOAD_VERBOSE(10, ("Posting task %p id 0x%x: send on QP 0x%x\n" - "rank %d, sg_entry: addr %p LEN %d lkey %u, flag[%d-%d-%d]\n", - (void*) curr_mqe_task, (uintptr_t) curr_mqe_task->wr_id, - curr_mqe_task->post.qp->qp_num, - task_to_rank(iboffload, curr_mqe_task), - curr_mqe_task->post.send_wr->sg_list->addr, - curr_mqe_task->post.send_wr->sg_list->length, - curr_mqe_task->post.send_wr->sg_list->lkey, - IS_IMM(curr_mqe_task->flags), IS_SIG(curr_mqe_task->flags), IS_BLK(curr_mqe_task->flags))); - - ++send_count; - break; - case MQE_WR_RECV: - IBOFFLOAD_VERBOSE(10, ("Posting task %p id 0x%x: recv on QP 0x%x rank %d flag[%d-%d-%d]\n", - (void*) curr_mqe_task, (uintptr_t) curr_mqe_task->wr_id, - curr_mqe_task->post.qp->qp_num, task_to_rank(iboffload, curr_mqe_task), - IS_IMM(curr_mqe_task->flags), IS_SIG(curr_mqe_task->flags), IS_BLK(curr_mqe_task->flags))); - - ++recv_count; - break; - case MQE_WR_CQE_WAIT: - - IBOFFLOAD_VERBOSE(10, ("Posting task %p id %x: wait on CQ %p for rank %d num of waits %d flag[%d-%d-%d]\n", - (void*) curr_mqe_task, (uintptr_t) curr_mqe_task->wr_id, - (void*) curr_mqe_task->wait.cq, wait_to_rank(iboffload, curr_mqe_task), - curr_mqe_task->wait.count, - IS_IMM(curr_mqe_task->flags), IS_SIG(curr_mqe_task->flags), IS_BLK(curr_mqe_task->flags))); - - wait_count += curr_mqe_task->wait.count; - break; - default: - IBOFFLOAD_ERROR(("Fatal error, unknow packet type %d\n", - curr_mqe_task->opcode)); - return OMPI_ERROR; - } - - /* pointer to next task */ - curr_mqe_task = curr_mqe_task->next; - } - - IBOFFLOAD_VERBOSE(10, ("wait[%d] send[%d] recv[%d]\n", - wait_count, send_count, recv_count)); -#endif - - IBOFFLOAD_VERBOSE(10, ("Posting MQ %p \n", (uintptr_t) head_mqe->wr_id)); - - rc = mqe_post_task(iboffload->mq[0], head_mqe, &bad_mqe); - if (OPAL_UNLIKELY(0 != rc)) { - IBOFFLOAD_ERROR(("ibv_post_mqe failed, errno says: %s," - " the return code is [%d]\n", - strerror(errno), rc)); - - return OMPI_ERROR; - } - - return OMPI_SUCCESS; -} - -static inline __opal_attribute_always_inline__ - int lognum(int n) { - int count = 1, lognum = 0; - - while (count < n) { - count = count << 1; - lognum++; - } - - return lognum; -} - -END_C_DECLS - -#endif /* MCA_BCOL_IBOFFLOAD_H */ - diff --git a/ompi/mca/bcol/iboffload/bcol_iboffload_allgather.c b/ompi/mca/bcol/iboffload/bcol_iboffload_allgather.c deleted file mode 100644 index 28140e5bb7d..00000000000 --- a/ompi/mca/bcol/iboffload/bcol_iboffload_allgather.c +++ /dev/null @@ -1,1388 +0,0 @@ -/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ -/* - * Copyright (c) 2009-2012 Oak Ridge National Laboratory. All rights reserved. - * Copyright (c) 2009-2012 Mellanox Technologies. All rights reserved. - * Copyright (c) 2014 Los Alamos National Security, LLC. All rights - * reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#include "ompi_config.h" - -#include -#include -#include -#include -#include -#include "opal_stdint.h" - -#include "bcol_iboffload.h" -#include "bcol_iboffload_alltoall.h" -#include "bcol_iboffload_bcast.h" -#include "bcol_iboffload_frag.h" -#include "bcol_iboffload_task.h" -#include "bcol_iboffload_collreq.h" -#include "bcol_iboffload_collfrag.h" -#include "bcol_iboffload_endpoint.h" - -#include "opal/include/opal/types.h" - -static int mca_bcol_iboffload_allgather_init( - bcol_function_args_t *fn_arguments, - mca_bcol_iboffload_module_t *iboffload_module, - mca_bcol_iboffload_collreq_t **coll_request, - bool if_bcol_last, int mq_credits, - collective_message_progress_function progress_fn) -{ - int rc; - - ompi_free_list_item_t *item; - mca_bcol_iboffload_collfrag_t *coll_fragment; - mca_bcol_iboffload_component_t *cm = &mca_bcol_iboffload_component; - - OMPI_FREE_LIST_WAIT(&cm->collreqs_free, item, rc); - if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) { - IBOFFLOAD_ERROR(("Wait for free list failed.\n")); - return rc; - } - /* setup call request */ - (*coll_request) = (mca_bcol_iboffload_collreq_t *) item; - - (*coll_request)->n_fragments = 0; - (*coll_request)->n_frags_sent = 0; - (*coll_request)->n_frag_mpi_complete = 0; - (*coll_request)->n_frag_net_complete = 0; - (*coll_request)->if_bcol_last = if_bcol_last; - (*coll_request)->ml_buffer_index = fn_arguments->buffer_index; - (*coll_request)->completion_cb_fn = NULL; - (*coll_request)->buffer_info[SBUF].buf = (void *) ( - (unsigned char *)fn_arguments->sbuf + - fn_arguments->sbuf_offset); - (*coll_request)->buffer_info[RBUF].buf = (void *) ( - (unsigned char *)fn_arguments->rbuf + - fn_arguments->rbuf_offset); - (*coll_request)->buffer_info[SBUF].offset = fn_arguments->sbuf_offset; - (*coll_request)->buffer_info[RBUF].offset = fn_arguments->rbuf_offset; - /* seems like we should initialize the memory registration pointer to NULL here */ - (*coll_request)->buffer_info[SBUF].iboffload_reg = NULL; - (*coll_request)->buffer_info[RBUF].iboffload_reg = NULL; - (*coll_request)->dtype = fn_arguments->dtype; - (*coll_request)->count = fn_arguments->count; - (*coll_request)->module = iboffload_module; - /* TODO Pasha: we need it for pending quque. Set it later. */ - (*coll_request)->progress_fn = progress_fn; - /* TODO Pasha: fix it later */ - (*coll_request)->qp_index = MCA_BCOL_IBOFFLOAD_QP_BARRIER; - - (*coll_request)->order_info = &fn_arguments->order_info; - - coll_fragment = &((*coll_request)->first_collfrag); - mca_bcol_iboffload_collfrag_init(coll_fragment); - - /** Vasily ????? */ - /* mq_credits = (*coll_request)->total_tasks_num; */ - coll_fragment->mq_credits = mq_credits; - coll_fragment->mq_index = COLL_MQ; - /* pasha: just set it to zero */ - coll_fragment->last_wait_num = 0; - coll_fragment->alg = -2; /* used only for debug */ - /* - if (my_rank == algthm_ptr->root) { - coll_fragment->last_wait_num = 0; - } else { - coll_fragment->last_wait_num = algth_lst->last_wait_num; - } - */ - /* Pasha: we have nothing to unpack */ - coll_fragment->unpack_size = 0; - /* coll_fragment->unpack_size = pack_len; */ - /* coll_fragment->alg = RECURSIVE_DOUBLING_TREE_BCAST; */ - - /* set pointers for (coll frag) <-> (coll full request) */ - (*coll_request)->user_handle_freed = false; - - fn_arguments->bcol_opaque_data = (void *) (*coll_request); - /* We don't have root.. - if (true == fn_arguments->root_flag) { - (*coll_request)->root = my_group_index; - } else { - (*coll_request)->root = fn_arguments->root_route->rank; - } - */ - - MCA_BCOL_IBOFFLOAD_SET_COLL_REQ_LINKS((*coll_request), coll_fragment); - return OMPI_SUCCESS; -} - -#if 1 -static inline void bcol_iboffload_setup_allgather_endpoints_connection(mca_bcol_iboffload_module_t *iboffload) -{ - int i, j; - /*Seems that we don't require this*/ - netpatterns_k_exchange_node_t *exchange_node = &iboffload->knomial_allgather_tree; - - mca_bcol_iboffload_endpoint_t *ep; - - IBOFFLOAD_VERBOSE(10, ("Open connections.\n")); -#if 0 - fprintf(stderr,"Entering Open Connections\n"); -#endif - - /* start with extras and proxy connections */ - if(exchange_node->n_extra_sources > 0) { - /* connect to endpoint */ - /*ep = iboffload->endpoints[comm_to_ibnet[exchange_node->rank_extra_sources_array[0]]];*/ - ep = iboffload->endpoints[exchange_node->rank_extra_sources_array[0]]; - while (OMPI_SUCCESS != - check_endpoint_state(ep, NULL, NULL)) { - opal_progress(); - } - } - /* now move through the recursive k-ing exchanges */ - if(NULL != exchange_node->rank_exchanges) { - for( i = 0; i < exchange_node->log_tree_order; i++) { - for( j = 0; j < ( exchange_node->tree_order - 1 ); j++) { - if( exchange_node->rank_exchanges[i][j] < 0 ){ - continue; - } - /* connect to endpoint */ - /*ep = iboffload->endpoints[comm_to_ibnet[exchange_node->rank_exchanges[i][j]]];*/ - ep = iboffload->endpoints[exchange_node->rank_exchanges[i][j]]; - if (iboffload->ibnet->super.my_index < ep->index) { - while(0 == (ep)->remote_zero_rdma_addr.addr) { - opal_progress(); - } - } else { - IBOFFLOAD_VERBOSE(10, ("Trying to connect - %d", ep->index)); - while (OMPI_SUCCESS != - check_endpoint_state(ep, NULL, NULL)) { - opal_progress(); - } - } - - } - } - } - - /* set the connection status to connected */ - iboffload->connection_status[ALLGATHER_KNOMIAL_ALG] = true; -} -#endif - - -static inline void bcol_iboffload_setup_allgather_ring_endpoints_connection(mca_bcol_iboffload_module_t *iboffload) -{ - int i; - const int group_size = iboffload->ibnet->super.group_size; - mca_bcol_iboffload_endpoint_t *ep; - - IBOFFLOAD_VERBOSE(10, ("Open connections.\n")); - - /* this is algorithm specific - need to move through the algorithm here basically to set up connections, should be - * - */ - - /* I'm going to leave this alone for now, because I'm - * not sure how these endpoints map back to ibnet. Is it mapped to ibnet ids or to communicator ids? - */ - for (i = 0; i < group_size; i++) { - ep = iboffload->endpoints[i]; - while (OMPI_SUCCESS != - check_endpoint_state(ep, NULL, NULL)) { - opal_progress(); - } - } - - /* set the connection status to connected */ - - /*JSL - change this macro */ - iboffload->connection_status[ALLGATHER_NEIGHBOR_ALG] = true; -} - -#if 0 -/* allgather neighbor exchange algorithm N/2 communication steps, 2 connections */ -static int mca_bcol_iboffload_neighbor_allgather_userbuffer_exec(mca_bcol_iboffload_module_t *iboffload_module, - mca_bcol_iboffload_collreq_t *coll_request) -{ - int rc, - src, dst; - - uint32_t pack_len; - int my_group_index = iboffload_module->super.sbgp_partner_module->my_index; - int group_size = iboffload_module->group_size; - int step, roffset, soffset; - int neighbor[2], offset_at_step[2], recv_data_from[2], send_data_from; - int even_rank; - int parity; - - struct mqe_task *last_send = NULL, - *last_wait = NULL; - mca_bcol_iboffload_collfrag_t *coll_fragment = &coll_request->first_collfrag; - -#if 0 - fprintf(stderr,"entering large msg neighbor exchange allgather\n"); -#endif - IBOFFLOAD_VERBOSE(10,("Entering large msg iboffload allgather")); - if (OPAL_UNLIKELY(!iboffload_module->connection_status[ALLGATHER_NEIGHBOR_ALG])) { - IBOFFLOAD_VERBOSE(10,("Allgather open new connection ")); - bcol_iboffload_setup_allgather_ring_endpoints_connection(iboffload_module); - } - - pack_len = coll_request->count * coll_request->dtype->super.size; - IBOFFLOAD_VERBOSE(10,("My packet length %d pack_len frag_count %d dtype size %d ", - pack_len, - coll_request->count, - coll_request->dtype->super.size)); - - /* register send and receive sides */ - /* send side, only sending pack_len data */ - - /* I think that probably I will only register the rbuf */ - /* on receive side I need to register pack_len*group_size data */ - rc = mca_bcol_iboffload_prepare_buffer(coll_request->buffer_info[RBUF].buf, pack_len * group_size, - &coll_request->buffer_info[RBUF].iboffload_reg, iboffload_module); - if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) { - IBOFFLOAD_ERROR(("Cannot register memory: " - "addr - %p, %d bytes.\n", - coll_request->buffer_info[RBUF].buf, pack_len)); - return OMPI_ERROR; - } - coll_request->buffer_info[RBUF].lkey = coll_request->buffer_info[RBUF].iboffload_reg->mr->lkey; - - /* it is estimated mq consumption... */ - if (OPAL_UNLIKELY(false == BCOL_IBOFFLOAD_MQ_HAVE_CREDITS( - iboffload_module, coll_fragment->mq_index, coll_fragment->mq_credits))) { - IBOFFLOAD_VERBOSE(10, ("There are not enough credits on MQ.\n")); - goto out_of_resources; - } - - coll_fragment->tail_next = &coll_fragment->to_post; - - - /* start the neighbor exchange */ - - even_rank = !(my_group_index % 2); - if (even_rank) { - neighbor[0] = (my_group_index + 1) % group_size; - neighbor[1] = (my_group_index - 1 + group_size) % group_size; - recv_data_from[0] = my_group_index; - recv_data_from[1] = my_group_index; - offset_at_step[0] = (+2); - offset_at_step[1] = (-2); - } else { - neighbor[0] = (my_group_index - 1 + group_size) % group_size; - neighbor[1] = (my_group_index + 1) % group_size; - recv_data_from[0] = neighbor[0]; - recv_data_from[1] = neighbor[0]; - offset_at_step[0] = (-2); - offset_at_step[1] = (+2); - } - - /* first step is special step, only send one block */ - roffset = neighbor[0]*pack_len; - soffset = my_group_index*pack_len; - /* send receive this */ - - dst = neighbor[0]; - src = neighbor[0]; - - rc = mca_bcol_iboffload_send_rtr_setup(&last_send, - src, iboffload_module, - coll_fragment); - if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) { - IBOFFLOAD_VERBOSE(10, ("Failed to mca_bcol_iboffload_send_rtr_setup")); - if (OMPI_ERR_TEMP_OUT_OF_RESOURCE == rc){ - goto out_of_resources; - } - return OMPI_ERROR; - } - - - rc = mca_bcol_iboffload_recv_rtr_setup( - &last_wait, dst, iboffload_module, coll_fragment); - /* send the data */ - if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) { - IBOFFLOAD_VERBOSE(10, ("Failed to" - "mca_bcol_iboffload_recv_rtr_setup")); - if (OMPI_ERR_TEMP_OUT_OF_RESOURCE == rc){ - goto out_of_resources; - } - return OMPI_ERROR; - } - - rc = mca_bcol_iboffload_send_large_buff_setup( - &last_send, RBUF, - coll_request->buffer_info[RBUF].offset + - soffset/* offset calc */ , - pack_len, dst, - iboffload_module, coll_fragment); - - if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) { - IBOFFLOAD_VERBOSE(10, ("Failed to" - "mca_bcol_iboffload_send_large_buff_setup")); - if (OMPI_ERR_TEMP_OUT_OF_RESOURCE == rc){ - goto out_of_resources; - } - return OMPI_ERROR; - } - /* send is done */ - - - - rc = mca_bcol_iboffload_recv_large_buff_setup(&last_wait, RBUF, - coll_request->buffer_info[RBUF].offset + - roffset, - pack_len, src, - iboffload_module, coll_fragment); - if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) { - IBOFFLOAD_VERBOSE(10, ("Failed to mca_bcol_iboffload_recv_large_buff_setup")); - if (OMPI_ERR_TEMP_OUT_OF_RESOURCE == rc){ - goto out_of_resources; - } - return OMPI_ERROR; - } - - /* now for the actual neighbor exchange algorithm */ - - - /* determine initial send location */ - if(even_rank) { - send_data_from = my_group_index; - }else { - send_data_from = recv_data_from[0]; - } - for( step = 1; step < (group_size/2); step++) { - - parity = step % 2; - recv_data_from[parity] = - (recv_data_from[parity] + offset_at_step[parity] + group_size) % group_size; - src = neighbor[parity]; - dst = src; - - roffset = recv_data_from[parity] * pack_len; - soffset = send_data_from * pack_len; - - /* post send rtr and recev rtr together */ - if( 1 == step ){ - rc = mca_bcol_iboffload_send_rtr_setup(&last_send, - src, iboffload_module, - coll_fragment); - if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) { - IBOFFLOAD_VERBOSE(10, ("Failed to mca_bcol_iboffload_send_rtr_setup")); - if (OMPI_ERR_TEMP_OUT_OF_RESOURCE == rc){ - goto out_of_resources; - } - return OMPI_ERROR; - } - - rc = mca_bcol_iboffload_recv_rtr_setup( - &last_wait, dst, iboffload_module, coll_fragment); - /* send the data */ - if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) { - IBOFFLOAD_VERBOSE(10, ("Failed to" - "mca_bcol_iboffload_recv_rtr_setup")); - if (OMPI_ERR_TEMP_OUT_OF_RESOURCE == rc){ - goto out_of_resources; - } - return OMPI_ERROR; - } - } - - - /* I'm using the hierarchy offset used in the k-nomial allgather */ - /* this won't work...*/ - rc = mca_bcol_iboffload_send_large_buff_setup( - &last_send, RBUF, - coll_request->buffer_info[RBUF].offset + - soffset/* offset calc */ , - 2 * pack_len, dst, - iboffload_module, coll_fragment); - - if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) { - IBOFFLOAD_VERBOSE(10, ("Failed to" - "mca_bcol_iboffload_send_large_buff_setup")); - if (OMPI_ERR_TEMP_OUT_OF_RESOURCE == rc){ - goto out_of_resources; - } - return OMPI_ERROR; - } - /* send is done */ - - - rc = mca_bcol_iboffload_recv_large_buff_setup(&last_wait, RBUF, - coll_request->buffer_info[RBUF].offset + - roffset, - 2 * pack_len, src, - iboffload_module, coll_fragment); - if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) { - IBOFFLOAD_VERBOSE(10, ("Failed to mca_bcol_iboffload_recv_large_buff_setup")); - if (OMPI_ERR_TEMP_OUT_OF_RESOURCE == rc){ - goto out_of_resources; - } - return OMPI_ERROR; - } - send_data_from = recv_data_from[parity]; - - } - - /* end of list */ - *coll_fragment->tail_next = NULL; - - /* finish initializing full message descriptor */ - (coll_request)->n_fragments = 1; - (coll_request)->n_frags_sent = 1; - - assert(NULL != last_wait); - last_wait->flags |= MQE_WR_FLAG_SIGNAL; - coll_fragment->signal_task_wr_id = last_wait->wr_id; - last_wait->wr_id = (uint64_t) (uintptr_t) coll_fragment; - - assert(MCA_COLL_ML_NO_BUFFER == coll_request->ml_buffer_index); - /* post the mwr */ - rc = mca_bcol_iboffload_post_mqe_tasks(iboffload_module, coll_fragment->to_post); - if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) { - IBOFFLOAD_VERBOSE(10, ("MQE task posting failing.\n")); - /* Note: need to clean up */ - return rc; - } - - MCA_BCOL_UPDATE_ORDER_COUNTER(&iboffload_module->super, coll_request->order_info); - - IBOFFLOAD_VERBOSE(10, ("Return success.\n")); - return BCOL_FN_STARTED; - -out_of_resources: - /* Release all resources */ - IBOFFLOAD_VERBOSE(10, ("Allgather, adding collfrag to collfrag_pending.\n")); - rc = - mca_bcol_iboffload_free_resources_and_move_to_pending(coll_fragment, iboffload_module); - return (OMPI_SUCCESS != rc) ? BCOL_FN_NOT_STARTED : BCOL_FN_STARTED; -} -#endif - -#if 0 -/* debug connection routine */ -static inline void bcol_iboffload_setup_allgather_endpoints_connection(mca_bcol_iboffload_module_t *iboffload) -{ - int i; - const int group_size = iboffload->ibnet->super.group_size; - mca_bcol_iboffload_endpoint_t *ep; - - IBOFFLOAD_VERBOSE(10, ("Open connections.\n")); - - /* this is algorithm specific - need to move through the algorithm here basically to set up connections, should be - * - */ - - /* I'm going to leave this alone for now, because I'm - * not sure how these endpoints map back to ibnet. Is it mapped to ibnet ids or to communicator ids? - */ - for (i = 0; i < group_size; i++) { - ep = iboffload->endpoints[i]; - while (OMPI_SUCCESS != - check_endpoint_state(ep, NULL, NULL)) { - opal_progress(); - } - } - - /* set the connection status to connected */ - - /*JSL - change this macro */ - iboffload->connection_status[ALLGATHER_KNOMIAL_ALG] = true; -} -#endif - -static int mca_bcol_iboffload_k_nomial_allgather_userbuffer_exec(mca_bcol_iboffload_module_t *iboffload_module, - mca_bcol_iboffload_collreq_t *coll_request) -{ - int rc, - src, dst, comm_dst, comm_src; - int tree_order, pow_k, i, j; - - uint32_t pack_len; - int my_group_index = iboffload_module->super.sbgp_partner_module->my_index; - int group_size = iboffload_module->group_size; - int *group_list = iboffload_module->super.sbgp_partner_module->group_list; - int my_comm_index = group_list[my_group_index]; - - netpatterns_k_exchange_node_t *exchange_node = &iboffload_module->knomial_allgather_tree; - - struct mqe_task *last_send = NULL, - *last_wait = NULL; - mca_bcol_iboffload_collfrag_t *coll_fragment = &coll_request->first_collfrag; - -#if 0 - fprintf(stderr,"entering large msg allgather\n"); -#endif - IBOFFLOAD_VERBOSE(10,("Entering large msg iboffload allgather")); - if (OPAL_UNLIKELY(!iboffload_module->connection_status[ALLGATHER_KNOMIAL_ALG])) { - IBOFFLOAD_VERBOSE(10,("Allgather open new connection ")); - bcol_iboffload_setup_allgather_endpoints_connection(iboffload_module); - } - - pack_len = coll_request->count * coll_request->dtype->super.size; - IBOFFLOAD_VERBOSE(10,("My packet length %d pack_len frag_count %d dtype size %d ", - pack_len, - coll_request->count, - coll_request->dtype->super.size)); - - /* register send and receive sides */ - /* send side, only sending pack_len data */ - - /* I think that probably I will only register the rbuf */ - /* on receive side I need to register pack_len*group_size data */ - - rc = mca_bcol_iboffload_prepare_buffer(coll_request->buffer_info[RBUF].buf, pack_len * group_size, - &coll_request->buffer_info[RBUF].iboffload_reg, iboffload_module); - if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) { - IBOFFLOAD_ERROR(("Cannot register memory: " - "addr - %p, %d bytes.\n", - coll_request->buffer_info[RBUF].buf, pack_len)); - return OMPI_ERROR; - } - coll_request->buffer_info[RBUF].lkey = coll_request->buffer_info[RBUF].iboffload_reg->mr->lkey; - - /* it is estimated mq consumption... */ - if (OPAL_UNLIKELY(false == BCOL_IBOFFLOAD_MQ_HAVE_CREDITS( - iboffload_module, coll_fragment->mq_index, coll_fragment->mq_credits))) { - IBOFFLOAD_VERBOSE(10, ("There are not enough credits on MQ.\n")); - goto out_of_resources; - } - - coll_fragment->tail_next = &coll_fragment->to_post; - - /* start with the extra / proxy phase */ - if( EXTRA_NODE == exchange_node->node_type ) { - - - /* send pack_len data to proxy */ - comm_dst = exchange_node->rank_extra_sources_array[0]; - /* get ib subnet id */ - dst = comm_dst; /* comm_to_ibnet[comm_dst];*/ - /* post ready-to-receive receive on sender's side */ - rc = mca_bcol_iboffload_recv_rtr_setup( - &last_wait, dst, iboffload_module, coll_fragment); - - /* send the data */ - if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) { - IBOFFLOAD_VERBOSE(10, ("Failed to" - "mca_bcol_iboffload_recv_rtr_setup")); - if (OMPI_ERR_TEMP_OUT_OF_RESOURCE == rc){ - goto out_of_resources; - } - return OMPI_ERROR; - } - - rc = mca_bcol_iboffload_send_large_buff_setup( - &last_send, RBUF, coll_request->buffer_info[RBUF].offset + my_comm_index*pack_len, - pack_len, dst, - iboffload_module, coll_fragment); - - if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) { - IBOFFLOAD_VERBOSE(10, ("Failed to" - "mca_bcol_iboffload_send_large_buff_setup")); - if (OMPI_ERR_TEMP_OUT_OF_RESOURCE == rc){ - goto out_of_resources; - } - return OMPI_ERROR; - } - /* send is done */ - - /* post the receive */ - comm_src = comm_dst; - src = dst; - /* Sending this results in a race condition where if the rtr send bypasses - the large msg receive on proxy's side, then it triggers the start of the - recurssive k-ing phase prematurely causing random data corruption. - */ - /* - rc = mca_bcol_iboffload_send_rtr_setup(&last_send, - src, iboffload_module, - coll_fragment); - if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) { - IBOFFLOAD_VERBOSE(10, ("Failed to mca_bcol_iboffload_send_rtr_setup")); - if (OMPI_ERR_TEMP_OUT_OF_RESOURCE == rc){ - goto out_of_resources; - } - return OMPI_ERROR; - } - */ - rc = mca_bcol_iboffload_recv_large_buff_setup(&last_wait, - RBUF, coll_request->buffer_info[RBUF].offset, - pack_len*group_size, src, - iboffload_module, coll_fragment); - if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) { - IBOFFLOAD_VERBOSE(10, ("Failed to mca_bcol_iboffload_recv_large_buff_setup")); - if (OMPI_ERR_TEMP_OUT_OF_RESOURCE == rc){ - goto out_of_resources; - } - return OMPI_ERROR; - } - - goto FINISHED; - - - } else if( 0 < exchange_node->n_extra_sources ) { - - /* am a proxy, receive pack_len data from extra */ - comm_src = exchange_node->rank_extra_sources_array[0]; - /* get ib subnet */ - src = comm_src; /*comm_to_ibnet[comm_src];*/ - - rc = mca_bcol_iboffload_send_rtr_setup(&last_send, - src, iboffload_module, - coll_fragment); - if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) { - IBOFFLOAD_VERBOSE(10, ("Failed to mca_bcol_iboffload_send_rtr_setup")); - if (OMPI_ERR_TEMP_OUT_OF_RESOURCE == rc){ - goto out_of_resources; - } - return OMPI_ERROR; - } - - - rc = mca_bcol_iboffload_recv_large_buff_setup(&last_wait, - RBUF, coll_request->buffer_info[RBUF].offset + pack_len*comm_src, - pack_len, src, - iboffload_module, coll_fragment); - if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) { - IBOFFLOAD_VERBOSE(10, ("Failed to mca_bcol_iboffload_recv_large_buff_setup")); - if (OMPI_ERR_TEMP_OUT_OF_RESOURCE == rc){ - goto out_of_resources; - } - return OMPI_ERROR; - } - - } - - /* start recursive k - ing */ - tree_order = exchange_node->tree_order; - pow_k = exchange_node->log_tree_order; - for( i = 0; i < pow_k; i++) { - - - /* Post ready-to-recv messages - I am here */ - for( j = 0; j <( tree_order - 1); j++) { - comm_src = exchange_node->rank_exchanges[i][j]; - if( comm_src < 0 ){ - continue; - } - /* get ib subnet */ - src = comm_src; /*comm_to_ibnet[comm_src];*/ - - rc = mca_bcol_iboffload_send_rtr_setup(&last_send, - src, iboffload_module, - coll_fragment); - if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) { - IBOFFLOAD_VERBOSE(10, ("Failed to mca_bcol_iboffload_send_rtr_setup")); - if (OMPI_ERR_TEMP_OUT_OF_RESOURCE == rc){ - goto out_of_resources; - } - return OMPI_ERROR; - } - } - - /* Post receive ready-to-recev message - I can send to you */ - for( j = 0; j < (tree_order - 1); j++) { - /* recev ready-to-receive message */ - comm_dst = exchange_node->rank_exchanges[i][j]; - /* remember, if we have extra ranks, then we won't participate - * with a least one peer. Make a check: - */ - if( comm_dst < 0 ){ - continue; - } - - /* get ib subnet id */ - dst = comm_dst; /*comm_to_ibnet[comm_dst];*/ - /* post ready-to-receive receive on sender's side */ - rc = mca_bcol_iboffload_recv_rtr_setup( - &last_wait, dst, iboffload_module, coll_fragment); - /* send the data */ - if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) { - IBOFFLOAD_VERBOSE(10, ("Failed to" - "mca_bcol_iboffload_recv_rtr_setup")); - if (OMPI_ERR_TEMP_OUT_OF_RESOURCE == rc){ - goto out_of_resources; - } - return OMPI_ERROR; - } - } - - - /* (k-1) sends */ - for( j = 0; j < (tree_order - 1); j++ ) { - - /* send phase - */ - comm_dst = exchange_node->rank_exchanges[i][j]; - /* remember, if we have extra ranks, then we won't participate - * with a least one peer. Make a check - */ - if( comm_dst < 0 ){ - continue; - } - - /* get ib subnet id */ - dst = comm_dst; /*comm_to_ibnet[comm_dst];*/ - rc = mca_bcol_iboffload_send_large_buff_setup( - &last_send, RBUF, - coll_request->buffer_info[RBUF].offset + pack_len*exchange_node->payload_info[i][j].s_offset/* offset calc */ , - exchange_node->payload_info[i][j].s_len*pack_len, dst, - iboffload_module, coll_fragment); - - if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) { - IBOFFLOAD_VERBOSE(10, ("Failed to" - "mca_bcol_iboffload_send_large_buff_setup")); - if (OMPI_ERR_TEMP_OUT_OF_RESOURCE == rc){ - goto out_of_resources; - } - return OMPI_ERROR; - } - /* send is done */ - - } - - /* we post receives after all sends in order to achieve concurrent - * sends as well as assuring blocking until completely receiving - * all data at level k before starting level k+1 sends - */ - /* (k-1) receives - these are blocking */ - for( j = 0; j < (tree_order - 1); j++) { - /*recv phase */ - comm_src = exchange_node->rank_exchanges[i][j]; - if( comm_src < 0 ){ - continue; - } - /* get ib subnet */ - src = comm_src; /*comm_to_ibnet[comm_src];*/ - - rc = mca_bcol_iboffload_recv_large_buff_setup(&last_wait, RBUF, - coll_request->buffer_info[RBUF].offset + pack_len*exchange_node->payload_info[i][j].r_offset, - exchange_node->payload_info[i][j].r_len*pack_len, src, - iboffload_module, coll_fragment); - if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) { - IBOFFLOAD_VERBOSE(10, ("Failed to mca_bcol_iboffload_recv_large_buff_setup")); - if (OMPI_ERR_TEMP_OUT_OF_RESOURCE == rc){ - goto out_of_resources; - } - return OMPI_ERROR; - } - - - - } - - - } - - /* last step, just send it back to the extra if I have one */ - if( 0 < exchange_node->n_extra_sources ) { - - comm_dst = exchange_node->rank_extra_sources_array[0]; - - /* get ib subnet id */ - dst = comm_dst; /*comm_to_ibnet[comm_dst];*/ - /* - rc = mca_bcol_iboffload_recv_rtr_setup( - &last_wait, dst, iboffload_module, coll_fragment); - - // send the data - we are already guaranteed that extra rank is waiting - if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) { - IBOFFLOAD_VERBOSE(10, ("Failed to" - "mca_bcol_iboffload_recv_rtr_setup")); - if (OMPI_ERR_TEMP_OUT_OF_RESOURCE == rc){ - goto out_of_resources; - } - return OMPI_ERROR; - } - */ - - rc = mca_bcol_iboffload_send_large_buff_setup( - &last_send, RBUF, coll_request->buffer_info[RBUF].offset, - pack_len*group_size, dst, - iboffload_module, coll_fragment); - - if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) { - IBOFFLOAD_VERBOSE(10, ("Failed to" - "mca_bcol_iboffload_send_large_buff_setup")); - if (OMPI_ERR_TEMP_OUT_OF_RESOURCE == rc){ - goto out_of_resources; - } - return OMPI_ERROR; - } - /* send is done */ - - } - -FINISHED: - - /* end of list */ - *coll_fragment->tail_next = NULL; - - /* finish initializing full message descriptor */ - (coll_request)->n_fragments = 1; - (coll_request)->n_frags_sent = 1; - - assert(NULL != last_wait); - last_wait->flags |= MQE_WR_FLAG_SIGNAL; - coll_fragment->signal_task_wr_id = last_wait->wr_id; - last_wait->wr_id = (uint64_t) (uintptr_t) coll_fragment; - - assert(MCA_COLL_ML_NO_BUFFER == coll_request->ml_buffer_index); - /* post the mwr */ - rc = mca_bcol_iboffload_post_mqe_tasks(iboffload_module, coll_fragment->to_post); - if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) { - IBOFFLOAD_VERBOSE(10, ("MQE task posting failing.\n")); - /* Note: need to clean up */ - return rc; - } - - MCA_BCOL_UPDATE_ORDER_COUNTER(&iboffload_module->super, coll_request->order_info); - - IBOFFLOAD_VERBOSE(10, ("Return success.\n")); - return BCOL_FN_STARTED; - -out_of_resources: - /* Release all resources */ - IBOFFLOAD_VERBOSE(10, ("Allgather, adding collfrag to collfrag_pending.\n")); - rc = - mca_bcol_iboffload_free_resources_and_move_to_pending(coll_fragment, iboffload_module); - return (OMPI_SUCCESS != rc) ? BCOL_FN_NOT_STARTED : BCOL_FN_STARTED; -} - -static int mca_bcol_iboffload_k_nomial_allgather_mlbuffer_exec(mca_bcol_iboffload_module_t *iboffload_module, - mca_bcol_iboffload_collreq_t *coll_request) -{ - int rc, - src, dst, comm_dst, comm_src, i, j; - int tree_order, pow_k, knt; - uint32_t pack_len; - int my_group_index = iboffload_module->super.sbgp_partner_module->my_index; - int group_size = iboffload_module->group_size; - netpatterns_k_exchange_node_t *exchange_node = - &iboffload_module->knomial_allgather_tree; - - struct mqe_task *last_send = NULL, - *last_wait = NULL; - mca_bcol_iboffload_collfrag_t *coll_fragment = &coll_request->first_collfrag; - int *list_connected = iboffload_module->super.list_n_connected; - - /* test test */ - int buff_offset = iboffload_module->super.hier_scather_offset; - - IBOFFLOAD_VERBOSE(10,("Entering small msg iboffload bcast")); - - - if (OPAL_UNLIKELY(!iboffload_module->connection_status[ALLGATHER_KNOMIAL_ALG])) { - IBOFFLOAD_VERBOSE(10,("Bcast open new connection ")); - bcol_iboffload_setup_allgather_endpoints_connection(iboffload_module); - } - - pack_len = coll_request->count * coll_request->dtype->super.size; - IBOFFLOAD_VERBOSE(10,("My packet length %d pack_len frag_count %d dtype size %d ", - pack_len, - coll_request->count, - coll_request->dtype->super.size)); - - /* now we calculate the actual buff_offset */ - buff_offset = buff_offset*pack_len; - - /* it is estimated mq consumption... */ - if (OPAL_UNLIKELY(false == BCOL_IBOFFLOAD_MQ_HAVE_CREDITS( - iboffload_module, coll_fragment->mq_index, coll_fragment->mq_credits))) { - IBOFFLOAD_VERBOSE(10, ("There are not enough credits on MQ.\n")); - goto out_of_resources; - } - - coll_fragment->tail_next = &coll_fragment->to_post; - /* we put this in to propagate the lkey into this local data structure */ - coll_request->buffer_info[SBUF].lkey = iboffload_module->rdma_block.ib_info.lkey; - /* end hack */ - if( EXTRA_NODE == exchange_node->node_type ) { - /* setup the rdma "send" pack_len data to proxy rank */ - comm_dst = exchange_node->rank_extra_sources_array[0]; - /* get ib subnet id */ - dst = comm_dst; - /* now I need to calculate my own offset info */ - knt = 0; - for( i = 0; i < my_group_index; i++){ - knt += list_connected[i]; - } - - rc = mca_bcol_iboffload_rdma_write_imm_small_buff_setup( - &last_send, pack_len*list_connected[my_group_index], pack_len*knt /* source offset */, - pack_len*knt /* destination offset */, dst, - iboffload_module, coll_fragment); -#if 0 - rc = mca_bcol_iboffload_rdma_write_imm_small_buff_setup( - &last_send, pack_len, pack_len*group_list[my_group_index] /* source offset */, - pack_len*group_list[my_group_index] /* destination offset */, dst, - iboffload_module, coll_fragment); -#endif - /* old flow with ml offset */ -#if 0 - rc = mca_bcol_iboffload_rdma_write_imm_small_buff_setup( - &last_send, pack_len, pack_len*group_list[my_group_index] /* source offset */, - coll_request->buffer_info[RBUF].offset + pack_len*group_list[my_group_index] /* destination offset */, dst, - iboffload_module, coll_fragment); -#endif - if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) { - IBOFFLOAD_VERBOSE(10, ("Failed to" - " mca_bcol_iboffload_send_small_buff_setup")); - if (OMPI_ERR_TEMP_OUT_OF_RESOURCE == rc){ - goto out_of_resources; - } - return OMPI_ERROR; - } - /* send is done */ - - /* setup the rdma "receive" from proxy */ - comm_src = comm_dst; - src = dst; - /* more general is the number connected */ - knt = 0; - for( i = 0; i < group_size; i++) { - knt += list_connected[i]; - } - - - rc = mca_bcol_iboffload_recv_small_buff_setup(&last_wait, - pack_len*knt, src, - iboffload_module, coll_fragment); - - /* - rc = mca_bcol_iboffload_recv_small_buff_setup(&last_wait, - pack_len*group_size, src, - iboffload_module, coll_fragment); - */ - if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) { - IBOFFLOAD_VERBOSE(10, ("Failed to setup data receive")); - if (OMPI_ERR_TEMP_OUT_OF_RESOURCE == rc){ - goto out_of_resources; - } - return OMPI_ERROR; - } - - goto FINISHED; - } else if( 0 < exchange_node->n_extra_sources ) { - - /* am a proxy, receive pack_len data from extra */ - comm_src = exchange_node->rank_extra_sources_array[0]; - /* get ib subnet */ - src = comm_src; - rc = mca_bcol_iboffload_recv_small_buff_setup(&last_wait, - pack_len*list_connected[src], src, - iboffload_module, coll_fragment); - /* - rc = mca_bcol_iboffload_recv_small_buff_setup(&last_wait, - pack_len, src, - iboffload_module, coll_fragment); - */ - if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) { - IBOFFLOAD_VERBOSE(10, ("Failed to setup data receive")); - if (OMPI_ERR_TEMP_OUT_OF_RESOURCE == rc){ - goto out_of_resources; - } - return OMPI_ERROR; - } - - - } - - /* start recursive k - ing */ - tree_order = exchange_node->tree_order; - pow_k = exchange_node->log_tree_order; - /*fprintf(stderr,"tree order %d pow_k %d\n",tree_order,pow_k);*/ - for( i = 0; i < pow_k; i++) { - for( j = 0; j < (tree_order - 1); j++ ) { - /* send phase - */ - comm_dst = exchange_node->rank_exchanges[i][j]; - /* remember, if we have extra ranks, then we won't participate - * with a least one peer. Make a check - */ - /*fprintf(stderr,"AAA my index %d comm_dst %d\n",my_group_index,comm_dst);*/ - if( comm_dst < 0 ){ - continue; - } - - /* get ib subnet id */ - /* again, don't think we need this */ - /*dst = ibnet_map[comm_dst];*/ - dst = comm_dst; - /* - fprintf(stderr,"BBB my index %d dst %d pack len %d s_len %d src offset %d r_len %d \n",my_group_index,dst, - pack_len,exchange_node->payload_info[i][j].s_len,exchange_node->payload_info[i][j].s_offset, - exchange_node->payload_info[i][j].r_len); - */ - /* rdma "send" setup */ - - - rc = mca_bcol_iboffload_rdma_write_imm_small_buff_setup( - &last_send, exchange_node->payload_info[i][j].s_len * pack_len, - exchange_node->payload_info[i][j].s_offset * pack_len /* source offset */, - exchange_node->payload_info[i][j].s_offset * pack_len /* destination offset */, dst, - iboffload_module, coll_fragment); - -#if 0 - rc = mca_bcol_iboffload_rdma_write_imm_small_buff_setup( - &last_send, exchange_node->payload_info[i][j].s_len * pack_len, - exchange_node->payload_info[i][j].s_offset * exchange_node->payload_info[i][j].s_len*pack_len /* source offset */, - exchange_node->payload_info[i][j].s_offset * exchange_node->payload_info[i][j].s_len*pack_len /* destination offset */, dst, - iboffload_module, coll_fragment); -#endif - -#if 0 - rc = mca_bcol_iboffload_rdma_write_imm_small_buff_setup( - &last_send, exchange_node->payload_info[i][j].s_len * pack_len, - exchange_node->payload_info[i][j].s_offset * pack_len /* source offset */, - exchange_node->payload_info[i][j].s_offset * pack_len /* destination offset */, dst, - iboffload_module, coll_fragment); -#endif -#if 0 - rc = mca_bcol_iboffload_rdma_write_imm_small_buff_setup( - &last_send, exchange_node->payload_info[i][j].s_len * pack_len, - coll_request->buffer_info[SBUF].offset + exchange_node->payload_info[i][j].s_offset * pack_len /* source offset */, - coll_request->buffer_info[SBUF].offset + exchange_node->payload_info[i][j].s_offset * pack_len /* destination offset */, dst, - iboffload_module, coll_fragment); -#endif - if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) { - IBOFFLOAD_VERBOSE(10, ("Failed to" - " mca_bcol_iboffload_send_small_buff_setup")); - if (OMPI_ERR_TEMP_OUT_OF_RESOURCE == rc){ - goto out_of_resources; - } - return OMPI_ERROR; - } - - /* send is done */ - } - - for( j = 0; j < (tree_order - 1); j++) { - - /* rdma "recv" phase */ - comm_src = exchange_node->rank_exchanges[i][j]; - /* remember, if we have extra ranks, then we won't participate - * with a least one peer. Make a check - */ - if( comm_src < 0 ){ - continue; - } - - /* get ib subnet id */ - /* shouldn't need this */ - src = comm_src; - - rc = mca_bcol_iboffload_recv_small_buff_setup(&last_wait, - exchange_node->payload_info[i][j].r_len * pack_len, src, - iboffload_module, coll_fragment); - if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) { - IBOFFLOAD_VERBOSE(10, ("Failed to setup data receive")); - if (OMPI_ERR_TEMP_OUT_OF_RESOURCE == rc){ - goto out_of_resources; - } - return OMPI_ERROR; - } - - } - } - - /* last step, proxies send full data back to the extra ranks */ - if( 0 < exchange_node->n_extra_sources ) { - /* send pack_len data to proxy */ - comm_dst = exchange_node->rank_extra_sources_array[0]; - /* get ibnet id */ - dst = comm_dst; - - knt = 0; - for( i = 0; i < group_size; i++){ - knt += list_connected[i]; - } - - rc = mca_bcol_iboffload_rdma_write_imm_small_buff_setup( - &last_send, pack_len*knt, 0 /* source offset */, - 0 /* destination offset */, dst, - iboffload_module, coll_fragment); -#if 0 - rc = mca_bcol_iboffload_rdma_write_imm_small_buff_setup( - &last_send, pack_len*group_size, 0 /* source offset */, - 0 /* destination offset */, dst, - iboffload_module, coll_fragment); -#endif -#if 0 - rc = mca_bcol_iboffload_rdma_write_imm_small_buff_setup( - &last_send, pack_len*group_size, coll_request->buffer_info[RBUF].offset /* source offset */, - coll_request->buffer_info[SBUF].offset /* destination offset */, dst, - iboffload_module, coll_fragment); -#endif - if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) { - IBOFFLOAD_VERBOSE(10, ("Failed to" - " mca_bcol_iboffload_send_small_buff_setup")); - if (OMPI_ERR_TEMP_OUT_OF_RESOURCE == rc){ - goto out_of_resources; - fprintf(stderr,"I'm out of resources \n"); - } - return OMPI_ERROR; - } - /* send is done */ - - } - -FINISHED: - - /* end of list */ - *coll_fragment->tail_next = NULL; - - /* finish initializing full message descriptor */ - (coll_request)->n_fragments = 1; - (coll_request)->n_frags_sent = 1; - - assert(NULL != last_wait); - last_wait->flags |= MQE_WR_FLAG_SIGNAL; - coll_fragment->signal_task_wr_id = last_wait->wr_id; - last_wait->wr_id = (uint64_t) (uintptr_t) coll_fragment; - - assert(MCA_COLL_ML_NO_BUFFER != coll_request->ml_buffer_index); - /* post the mwr */ - rc = mca_bcol_iboffload_post_mqe_tasks(iboffload_module, coll_fragment->to_post); - if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) { - IBOFFLOAD_VERBOSE(10, ("MQE task posting failing.\n")); - /* Note: need to clean up */ - return rc; - } - - MCA_BCOL_UPDATE_ORDER_COUNTER(&iboffload_module->super, coll_request->order_info); - - IBOFFLOAD_VERBOSE(10, ("Return success.\n")); - return BCOL_FN_STARTED; - -out_of_resources: - /* Release all resources */ - IBOFFLOAD_VERBOSE(10, ("Allgather, adding collfrag to collfrag_pending.\n")); - rc = - mca_bcol_iboffload_free_resources_and_move_to_pending(coll_fragment, iboffload_module); - return (OMPI_SUCCESS != rc) ? BCOL_FN_NOT_STARTED : BCOL_FN_STARTED; -} - -#if 0 -static int mca_bcol_iboffload_neighbor_allgather_userbuffer_intra( - bcol_function_args_t *fn_arguments, - struct mca_bcol_base_function_t *const_args) -{ - mca_bcol_iboffload_module_t *iboffload_module = - (mca_bcol_iboffload_module_t *)const_args->bcol_module; - - int rc; - int mq_credits = iboffload_module->group_size * 2 * 2; /* large message protocol consumes - * twice as many mq credits - */ - - bool if_bcol_last = BCOL_IBOFFLOAD_IS_LAST_CALL(const_args); - mca_bcol_iboffload_collreq_t *coll_request; - - MCA_BCOL_CHECK_ORDER(const_args->bcol_module, fn_arguments); - - rc = mca_bcol_iboffload_allgather_init(fn_arguments, iboffload_module, - &coll_request, if_bcol_last, mq_credits, - mca_bcol_iboffload_neighbor_allgather_userbuffer_exec); - if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) { - return rc; - } - - rc = coll_request->progress_fn(iboffload_module, coll_request); - - IBOFFLOAD_VERBOSE(10, ("mca_bcol_iboffload_k_nomial_allgather_userbuffer_intra was started [%d]\n", rc)); - return rc; -} -#endif - -#if 1 -static int mca_bcol_iboffload_k_nomial_allgather_userbuffer_intra(bcol_function_args_t *fn_arguments, - struct mca_bcol_base_function_t *const_args) -{ - mca_bcol_iboffload_module_t *iboffload_module = - (mca_bcol_iboffload_module_t *)const_args->bcol_module; - - int rc; - int mq_credits = ((iboffload_module->knomial_allgather_tree.tree_order - 1)* - iboffload_module->knomial_allgather_tree.log_tree_order + 1) * 2 * 2; /* large message protocol - * consumes twice as much - */ - - bool if_bcol_last = BCOL_IBOFFLOAD_IS_LAST_CALL(const_args); - mca_bcol_iboffload_collreq_t *coll_request; - - MCA_BCOL_CHECK_ORDER(const_args->bcol_module, fn_arguments); - - rc = mca_bcol_iboffload_allgather_init(fn_arguments, iboffload_module, - &coll_request, if_bcol_last, mq_credits, - mca_bcol_iboffload_k_nomial_allgather_userbuffer_exec); - if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) { - return rc; - } - - rc = coll_request->progress_fn(iboffload_module, coll_request); - - IBOFFLOAD_VERBOSE(10, ("mca_bcol_iboffload_k_nomial_allgather_userbuffer_intra was started [%d]\n", rc)); - return rc; -} -#endif - -static int mca_bcol_iboffload_k_nomial_allgather_mlbuffer_intra(bcol_function_args_t *fn_arguments, - struct mca_bcol_base_function_t *const_args) -{ - mca_bcol_iboffload_module_t *iboffload_module = - (mca_bcol_iboffload_module_t *)const_args->bcol_module; - - int rc; - - /* I'll add one for everyone, since nobody wants to feel left out */ - int mq_credits = ((iboffload_module->knomial_allgather_tree.tree_order - 1)* - iboffload_module->knomial_allgather_tree.log_tree_order + 1) * 2 ; - bool if_bcol_last = BCOL_IBOFFLOAD_IS_LAST_CALL(const_args); - mca_bcol_iboffload_collreq_t *coll_request; - - MCA_BCOL_CHECK_ORDER(const_args->bcol_module, fn_arguments); - - rc = mca_bcol_iboffload_allgather_init(fn_arguments, iboffload_module, - &coll_request, if_bcol_last, mq_credits, - mca_bcol_iboffload_k_nomial_allgather_mlbuffer_exec); - if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) { - return rc; - } - - rc = coll_request->progress_fn(iboffload_module, coll_request); - - IBOFFLOAD_VERBOSE(10, ("mca_bcol_iboffload_small_msg_bcast_intra was started [%d]\n", rc)); - return rc; -} - - -/* these progress engines are shared between alltoall and allgather and exist in both files, - * should be moved to a common .h file - */ -static int mca_bcol_iboffload_collreq_mlbuffer_progress( - bcol_function_args_t *input_args, - struct mca_bcol_base_function_t *const_args) -{ - int i; - mca_bcol_iboffload_collreq_t *coll_request = - (mca_bcol_iboffload_collreq_t *) - input_args->bcol_opaque_data; - IBOFFLOAD_VERBOSE(10, ("Run progress (ml buffer).\n")); - for (i = 0; i < mca_bcol_iboffload_component.max_progress_pull; i++) { - if (BCOL_IS_COMPLETED(coll_request)) { - - coll_request->user_handle_freed = true; - - if (COLLREQ_IS_DONE(coll_request)) { - IBOFFLOAD_VERBOSE(10, ("Coll request already done.\n")); - RELEASE_COLLREQ(coll_request); - } - IBOFFLOAD_VERBOSE(10, ("Collective finished (ml buffer).\n")); - - return BCOL_FN_COMPLETE; - } - } - IBOFFLOAD_VERBOSE(10, ("Collective not finished (ml buffer).\n")); - return BCOL_FN_STARTED; -} - - -static int mca_bcol_iboffload_collreq_userbuffer_progress( - bcol_function_args_t *input_args, - struct mca_bcol_base_function_t *const_args) -{ - int i; - mca_bcol_iboffload_collreq_t *coll_request = - (mca_bcol_iboffload_collreq_t *) - input_args->bcol_opaque_data; - - IBOFFLOAD_VERBOSE(10, ("Run progress (user buffer)\n")); - - /* Complete the allgather - progress releases full request descriptors */ - - for (i = 0; i < mca_bcol_iboffload_component.max_progress_pull; i++) { - if (coll_request->n_frag_mpi_complete == coll_request->n_fragments && - coll_request->n_frag_net_complete == coll_request->n_fragments) { - - IBOFFLOAD_VERBOSE(10, ("Deregister user buff.\n")); - - if (NULL != coll_request->buffer_info[SBUF].iboffload_reg) { - coll_request->module->device->mpool->mpool_deregister( - coll_request->module->device->mpool, - (mca_mpool_base_registration_t *) coll_request->buffer_info[SBUF].iboffload_reg); - coll_request->buffer_info[SBUF].iboffload_reg = NULL; - } - - - if (NULL != coll_request->buffer_info[RBUF].iboffload_reg) { - coll_request->module->device->mpool->mpool_deregister( - coll_request->module->device->mpool, - (mca_mpool_base_registration_t *) coll_request->buffer_info[RBUF].iboffload_reg); - coll_request->buffer_info[RBUF].iboffload_reg = NULL; - } - - RELEASE_COLLREQ(coll_request); - IBOFFLOAD_VERBOSE(10, ("New bcast done !!!")); - return BCOL_FN_COMPLETE; - } - } - - IBOFFLOAD_VERBOSE(10, ("Collective finished (user buffer).\n")); - - /* We are not done */ - return BCOL_FN_STARTED; -} - -int mca_bcol_iboffload_allgather_register(mca_bcol_base_module_t *super) -{ - mca_bcol_base_coll_fn_comm_attributes_t comm_attribs; - mca_bcol_base_coll_fn_invoke_attributes_t inv_attribs; - - IBOFFLOAD_VERBOSE(10, ("Register iboffload Allgather.\n")); - comm_attribs.bcoll_type = BCOL_ALLGATHER; - - comm_attribs.comm_size_min = 0; - comm_attribs.comm_size_max = 1024 * 1024; - comm_attribs.waiting_semantics = NON_BLOCKING; - - inv_attribs.bcol_msg_min = 0; - inv_attribs.bcol_msg_max = 20000; /* range 1 */ - - inv_attribs.datatype_bitmap = 0xffffffff; - inv_attribs.op_types_bitmap = 0xffffffff; - - comm_attribs.data_src = DATA_SRC_KNOWN; - - mca_bcol_base_set_attributes(super, - &comm_attribs, &inv_attribs, - mca_bcol_iboffload_k_nomial_allgather_mlbuffer_intra, - mca_bcol_iboffload_collreq_mlbuffer_progress); - - inv_attribs.bcol_msg_min = 10000000; - inv_attribs.bcol_msg_max = 10485760; /* range 4 */ - - - /* zero-copy k-nomial algorithm */ -#if 1 - mca_bcol_base_set_attributes(super, - &comm_attribs, &inv_attribs, - mca_bcol_iboffload_k_nomial_allgather_userbuffer_intra, - mca_bcol_iboffload_collreq_userbuffer_progress); -#endif - /* zero-copy neighbor exchange algorithm */ -#if 0 - mca_bcol_base_set_attributes(super, - &comm_attribs, &inv_attribs, - mca_bcol_iboffload_neighbor_allgather_userbuffer_intra, - mca_bcol_iboffload_collreq_userbuffer_progress); -#endif - return OMPI_SUCCESS; -} diff --git a/ompi/mca/bcol/iboffload/bcol_iboffload_allreduce.c b/ompi/mca/bcol/iboffload/bcol_iboffload_allreduce.c deleted file mode 100644 index 406442ff7c0..00000000000 --- a/ompi/mca/bcol/iboffload/bcol_iboffload_allreduce.c +++ /dev/null @@ -1,1418 +0,0 @@ -/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ -/* - * Copyright (c) 2009-2012 Oak Ridge National Laboratory. All rights reserved. - * Copyright (c) 2009-2012 Mellanox Technologies. All rights reserved. - * Copyright (c) 2014 Los Alamos National Security, LLC. All rights - * reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -/* - * - * Additional copyrights may follow - * - * $HEADER$ - */ -/** @file */ - -#include "ompi_config.h" - -#include -#include -#include -#include -#include -#include "opal_stdint.h" - -#include "bcol_iboffload.h" -#include "bcol_iboffload_frag.h" -#include "bcol_iboffload_task.h" -#include "bcol_iboffload_collfrag.h" -#include "bcol_iboffload_endpoint.h" - -#include "opal/include/opal/types.h" - -static int mca_bcol_iboffload_calc_res_to_user(void *callback_data) -{ - int rc; - uint64_t result = 0; - - uint64_t l_operand = 0; - uint64_t r_operand = 0; - - mca_bcol_iboffload_collfrag_t *coll_frag = - (mca_bcol_iboffload_collfrag_t *) callback_data; - - mca_bcol_iboffload_collreq_t *coll_request = coll_frag->coll_full_req; - - ompi_op_t *op = coll_request->op; - ompi_datatype_t *dtype = coll_request->dtype; - - mca_bcol_iboffload_component_t *cm = &mca_bcol_iboffload_component; - struct ibv_context *ib_dev_context = coll_request->module->device->dev.ib_dev_context; - - IBOFFLOAD_VERBOSE(10, ("Start calculating.\n")); - - rc = unpack_data_from_calc(ib_dev_context, - cm->map_ompi_to_ib_calcs[op->op_type], - cm->map_ompi_to_ib_dt[dtype->id], false, - (void *) (uintptr_t) coll_request->l_operand, - NULL, (void *) &l_operand); - if (0 != rc) { - IBOFFLOAD_VERBOSE(10, ("unpack_data_from_calc for l_operand failed: op %s, type %s\n", - op->o_name, dtype->name)); - return OMPI_ERROR; - } - - rc = unpack_data_from_calc(ib_dev_context, - cm->map_ompi_to_ib_calcs[op->op_type], - cm->map_ompi_to_ib_dt[dtype->id], false, - (void *) (uintptr_t) coll_request->r_operand, - NULL, (void *) &r_operand); - if (0 != rc) { - IBOFFLOAD_VERBOSE(10, ("unpack_data_from_calc for r_operand failed: op %s, type %s\n", - op->o_name, dtype->name)); - return OMPI_ERROR; - } - - switch (op->op_type) { - case OMPI_OP_PROD: - break; /* ronni todo - ????? */ - case OMPI_OP_LAND: - result = l_operand && r_operand; - break; - case OMPI_OP_BAND: - result = l_operand & r_operand; - break; - case OMPI_OP_LOR: - result = l_operand || r_operand; - break; - case OMPI_OP_BOR: - result = l_operand | r_operand; - break; - case OMPI_OP_LXOR: - result = ((l_operand && !r_operand) || (!l_operand && r_operand)); - break; - case OMPI_OP_BXOR: - result = l_operand ^ r_operand; - break; - case OMPI_OP_MAXLOC: - case OMPI_OP_MINLOC: - break; - case OMPI_OP_MAX: - case OMPI_OP_MIN: - case OMPI_OP_SUM: - switch (cm->map_ompi_to_ib_dt[dtype->id]) { - case IBV_M_DATA_TYPE_INT8: - MCA_BCOL_IBOFFLOAD_ALLREDUCE_DO_CALC(coll_request->op->op_type, char, l_operand, r_operand, result); - break; - case IBV_M_DATA_TYPE_INT16: - MCA_BCOL_IBOFFLOAD_ALLREDUCE_DO_CALC(coll_request->op->op_type, int16_t, l_operand, r_operand, result); - break; - case IBV_M_DATA_TYPE_INT32: - MCA_BCOL_IBOFFLOAD_ALLREDUCE_DO_CALC(coll_request->op->op_type, int32_t, l_operand, r_operand, result); - break; - case IBV_M_DATA_TYPE_INT64: - MCA_BCOL_IBOFFLOAD_ALLREDUCE_DO_CALC(coll_request->op->op_type, int64_t, l_operand, r_operand, result); - break; - case IBV_M_DATA_TYPE_FLOAT32: - MCA_BCOL_IBOFFLOAD_ALLREDUCE_DO_CALC(coll_request->op->op_type, float, l_operand, r_operand, result); - break; - case IBV_M_DATA_TYPE_FLOAT64: - MCA_BCOL_IBOFFLOAD_ALLREDUCE_DO_CALC(coll_request->op->op_type, double, l_operand, r_operand, result); - break; - default: - IBOFFLOAD_VERBOSE(10, ("Unsupported data type: %s.\n", dtype->name)); - return OMPI_ERROR; - } - - break; - - default: - IBOFFLOAD_VERBOSE(10, ("Unsupported op: %s.\n", coll_request->op->o_name)); - return OMPI_ERROR; - } - - memcpy(coll_request->buffer_info[RBUF].buf, &result, coll_frag->unpack_size); - IBOFFLOAD_VERBOSE(10, ("The output data after calc is %lf, result %lf, l_operand %lf, r_operand %lf: " - "sbuf addr %p, rbuf addr %p.\n", - *(double *) coll_request->buffer_info[RBUF].buf, *(double *) &result, - *(double *) &l_operand, *(double *) &r_operand, - coll_request->buffer_info[SBUF].buf, - coll_request->buffer_info[RBUF].buf)); - - return OMPI_SUCCESS; -} - -static int mca_bcol_iboffload_unpack_res_to_user(void *callback_data) -{ - int rc; - - mca_bcol_iboffload_collfrag_t *coll_frag = - (mca_bcol_iboffload_collfrag_t *) callback_data; - - mca_bcol_iboffload_collreq_t *coll_request = coll_frag->coll_full_req; - mca_bcol_iboffload_task_t *task = (mca_bcol_iboffload_task_t *) coll_frag->signal_task_wr_id; - - mca_bcol_iboffload_frag_t *recv_frag = task->frag; - mca_bcol_iboffload_component_t *cm = &mca_bcol_iboffload_component; - - struct ibv_context *ib_dev_context = coll_request->module->device->dev.ib_dev_context; - - rc = unpack_data_from_calc(ib_dev_context, - cm->map_ompi_to_ib_calcs[coll_request->op->op_type], - cm->map_ompi_to_ib_dt[coll_request->dtype->id], - false, (void*) (uintptr_t) recv_frag->sg_entry.addr, - NULL, coll_request->buffer_info[RBUF].buf); - if (0 != rc) { - IBOFFLOAD_VERBOSE(10, ("unpack_data_from_calc is failed: op %s, type %s\n", - coll_request->op->o_name, coll_request->dtype->name)); - return OMPI_ERROR; - } - - IBOFFLOAD_VERBOSE(10, ("The naitive output data is %" PRId64 ".\n" - "The output data is %" PRId64 ".\n", - *(uint64_t *) recv_frag->sg_entry.addr, - *(uint64_t *) coll_request->buffer_info[RBUF].buf)); - - return OMPI_SUCCESS; -} - -static int -allreduce_extra_node(mca_bcol_iboffload_module_t *iboffload, - mca_bcol_iboffload_collreq_t *coll_request) -/* (EXTRA_NODE == my_exchange_node->node_type) */ -{ - /* local variables */ - int rc, extra_rank; - - mca_bcol_iboffload_frag_t *send_fragment, - *preposted_recv_frag; - - mca_bcol_iboffload_task_t *send_task, - *wait_task; - - struct mqe_task *last_wait, /* we need ask from completion on last wait */ - *last_send; - - netpatterns_pair_exchange_node_t *my_exchange_node = - &iboffload->recursive_doubling_tree; - - struct mqe_task **mqe_ptr_to_set; - mca_bcol_iboffload_collfrag_t *coll_fragment = (mca_bcol_iboffload_collfrag_t *) - opal_list_get_last(&coll_request->work_requests); - - mqe_ptr_to_set = &coll_fragment->to_post; - - if (OPAL_UNLIKELY(false == BCOL_IBOFFLOAD_MQ_HAVE_CREDITS( - iboffload, coll_fragment->mq_index, coll_fragment->mq_credits))) { - IBOFFLOAD_VERBOSE(10, ("There are not enough credits on MQ.\n")); - - rc = OMPI_ERR_RESOURCE_BUSY; - goto out_of_resources; - } - - /* I will NOT participate in the exchange - so just "register" as here */ - extra_rank = my_exchange_node->rank_extra_source; - - send_fragment = mca_bcol_iboffload_get_send_frag(coll_request, - extra_rank, coll_request->qp_index, - MCA_IBOFFLOAD_IB_DRIVER_OPERAND_SIZE, 0, - SBUF, - MCA_BCOL_IBOFFLOAD_SEND_FRAG_ML_CALC); - - if (OPAL_UNLIKELY(NULL == send_fragment)) { - IBOFFLOAD_VERBOSE(10, ("Failing for getting and packing send frag.\n")); - rc = OMPI_ERR_RESOURCE_BUSY; - goto out_of_resources; - } - - /* send my operand to EXCHANGE NODE */ - send_task = mca_bcol_iboffload_get_send_task(iboffload, extra_rank, - coll_request->qp_index, send_fragment, coll_fragment, INLINE); - if (OPAL_UNLIKELY(NULL == send_task)) { - IBOFFLOAD_VERBOSE(10, ("Failing for getting send task.\n")); - rc = OMPI_ERR_RESOURCE_BUSY; - goto out_of_resources; - } - - APPEND_TO_TASKLIST(mqe_ptr_to_set, send_task, last_send); - MCA_BCOL_IBOFFLOAD_APPEND_TASK_TO_LIST(coll_fragment->task_next, send_task); - - preposted_recv_frag = - mca_bcol_iboffload_get_preposted_recv_frag( - iboffload, extra_rank, coll_request->qp_index); - if (OPAL_UNLIKELY(NULL == preposted_recv_frag)) { - /* RLG need cleanup */ - rc = OMPI_ERR_RESOURCE_BUSY; - goto out_of_resources; - } - - /* Wait for final result from EXCHANGE NODE */ - wait_task = mca_bcol_iboffload_get_wait_task(iboffload, extra_rank, 1, - preposted_recv_frag, coll_request->qp_index, NULL); - if (OPAL_UNLIKELY(NULL == wait_task)) { - IBOFFLOAD_VERBOSE(10, ("Failing for getting wait task.\n")); - rc = OMPI_ERR_RESOURCE_BUSY; - goto out_of_resources; - } - - APPEND_TO_TASKLIST(mqe_ptr_to_set, wait_task, last_wait); - MCA_BCOL_IBOFFLOAD_APPEND_TASK_TO_LIST(coll_fragment->task_next, wait_task); - - *mqe_ptr_to_set = NULL; - - /* finish initializing full message descriptor */ - coll_request->n_fragments = 1; - coll_request->n_frags_sent = 1; - - /* Pasha: need to set to true in upper layer */ - coll_request->user_handle_freed = false; - - last_wait->flags |= MQE_WR_FLAG_SIGNAL; - - coll_fragment->signal_task_wr_id = last_wait->wr_id; - last_wait->wr_id = (uint64_t) (uintptr_t) coll_fragment; - - /* post the mwr */ - IBOFFLOAD_VERBOSE(10, ("Post tasks.\n")); - rc = mca_bcol_iboffload_post_mqe_tasks(iboffload, coll_fragment->to_post); - if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) { - IBOFFLOAD_ERROR(("MQE task posting failing.\n")); - /* Note: need to clean up */ - return rc; - } - - MCA_BCOL_UPDATE_ORDER_COUNTER(&iboffload->super, coll_request->order_info); - - return OMPI_SUCCESS; - -out_of_resources: - /* Release all resources */ - IBOFFLOAD_VERBOSE(10, ("Allreduce: adding collfrag to collfrag_pending.\n")); - return mca_bcol_iboffload_free_resources_and_move_to_pending(coll_fragment, iboffload); -} - -/** - * Start allreduce - */ -static int do_exchange(mca_bcol_iboffload_module_t *iboffload, - mca_bcol_iboffload_collreq_t *coll_request, - struct mqe_task ***mqe_ptr_to_set, - struct mqe_task **last_wait, - struct ibv_sge **l_operand, - struct ibv_sge **r_operand) -{ - int rc = OMPI_SUCCESS, exchange, pair_rank, - my_rank = ((mca_sbgp_base_module_t *) iboffload->ibnet)->my_index; - - mca_bcol_iboffload_frag_t *preposted_recv_frag; - - mca_bcol_iboffload_task_t *wait_task, - *calc_task; - - struct mqe_task *last_send; - netpatterns_pair_exchange_node_t *my_exchange_node = - &iboffload->recursive_doubling_tree; - - mca_bcol_iboffload_collfrag_t *coll_fragment = (mca_bcol_iboffload_collfrag_t *) - opal_list_get_last(&coll_request->work_requests); - - size_t calc_size = MCA_IBOFFLOAD_IB_DRIVER_OPERAND_SIZE + MCA_IBOFFLOAD_CALC_SIZE_EXT; - - pair_rank = my_exchange_node->rank_exchanges[0]; - preposted_recv_frag = - mca_bcol_iboffload_get_preposted_recv_frag( - iboffload, pair_rank, coll_request->qp_index); - if (OPAL_UNLIKELY(NULL == preposted_recv_frag)) { - /* RLG need cleanup */ - IBOFFLOAD_VERBOSE(10, ("Get prepost recv fag fail.\n")); - rc = OMPI_ERR_RESOURCE_BUSY; - goto out_of_resources; - } - - /* Wait for send from first algorithm partner */ - wait_task = mca_bcol_iboffload_get_wait_task(iboffload, pair_rank, 1, - preposted_recv_frag, coll_request->qp_index, NULL); - if (OPAL_UNLIKELY(NULL == wait_task)) { - IBOFFLOAD_VERBOSE(10, ("Failing for getting wait task.\n")); - rc = OMPI_ERR_RESOURCE_BUSY; - goto out_of_resources; - } - - APPEND_TO_TASKLIST((*mqe_ptr_to_set), wait_task, (*last_wait)); - MCA_BCOL_IBOFFLOAD_APPEND_TASK_TO_LIST(coll_fragment->task_next, wait_task); - - (*l_operand)->length = calc_size; - for (exchange = 1; exchange < my_exchange_node->n_exchanges; ++exchange) { - pair_rank = my_exchange_node->rank_exchanges[exchange]; - - (*r_operand) = &preposted_recv_frag->sg_entry; - (*r_operand)->length = calc_size; - - /* Calc and send the result to the partner */ - calc_task = mca_bcol_iboffload_get_calc_task(iboffload, - pair_rank, coll_request->qp_index, NULL, - *l_operand, *r_operand, - coll_request, NO_INLINE); - if (OPAL_UNLIKELY(NULL == calc_task)) { - IBOFFLOAD_VERBOSE(10, ("Failing for getting calc task.\n")); - rc = OMPI_ERR_RESOURCE_BUSY; - goto out_of_resources; - } - - APPEND_TO_TASKLIST((*mqe_ptr_to_set), calc_task, last_send); - MCA_BCOL_IBOFFLOAD_APPEND_TASK_TO_LIST(coll_fragment->task_next, calc_task); - - /* Calc and send the result to myself */ - calc_task = mca_bcol_iboffload_get_calc_task(iboffload, - my_rank, coll_request->qp_index, NULL, - *l_operand, *r_operand, coll_request, NO_INLINE); - if (OPAL_UNLIKELY(NULL == calc_task)) { - IBOFFLOAD_VERBOSE(10, ("Failing for getting calc task.\n")); - rc = OMPI_ERR_RESOURCE_BUSY; - goto out_of_resources; - } - - APPEND_TO_TASKLIST((*mqe_ptr_to_set), calc_task, last_send); - MCA_BCOL_IBOFFLOAD_APPEND_TASK_TO_LIST(coll_fragment->task_next, calc_task); - - preposted_recv_frag = - mca_bcol_iboffload_get_preposted_recv_frag( - iboffload, my_rank, coll_request->qp_index); - if (OPAL_UNLIKELY(NULL == preposted_recv_frag)) { - /* RLG need cleanup */ - IBOFFLOAD_VERBOSE(10, ("Get prepost recv fag fail.\n")); - rc = OMPI_ERR_RESOURCE_BUSY; - goto out_of_resources; - } - - /* Wait for calc from myself */ - wait_task = mca_bcol_iboffload_get_wait_task(iboffload, my_rank, 1, - preposted_recv_frag, coll_request->qp_index, NULL); - if (NULL == wait_task) { - IBOFFLOAD_VERBOSE(10, ("Failing for getting wait task.\n")); - rc = OMPI_ERR_RESOURCE_BUSY; - goto out_of_resources; - } - - APPEND_TO_TASKLIST((*mqe_ptr_to_set), wait_task, (*last_wait)); - MCA_BCOL_IBOFFLOAD_APPEND_TASK_TO_LIST(coll_fragment->task_next, wait_task); - - (*l_operand) = &preposted_recv_frag->sg_entry; - (*l_operand)->length = calc_size; - - preposted_recv_frag = - mca_bcol_iboffload_get_preposted_recv_frag( - iboffload, pair_rank, coll_request->qp_index); - if (OPAL_UNLIKELY(NULL == preposted_recv_frag)) { - /* RLG need cleanup */ - IBOFFLOAD_VERBOSE(10, ("Get prepost recv fag fail.\n")); - rc = OMPI_ERR_RESOURCE_BUSY; - goto out_of_resources; - } - - /* Wait for calc from the current algorithm partner */ - wait_task = mca_bcol_iboffload_get_wait_task(iboffload, pair_rank, 1, - preposted_recv_frag, coll_request->qp_index, NULL); - if (OPAL_UNLIKELY(NULL == wait_task)) { - IBOFFLOAD_VERBOSE(10, ("Failing for getting wait task.\n")); - rc = OMPI_ERR_RESOURCE_BUSY; - goto out_of_resources; - } - - APPEND_TO_TASKLIST((*mqe_ptr_to_set), wait_task, (*last_wait)); - MCA_BCOL_IBOFFLOAD_APPEND_TASK_TO_LIST(coll_fragment->task_next, wait_task); - - } - - (*r_operand) = &preposted_recv_frag->sg_entry; - (*r_operand)->length = calc_size; - - return OMPI_SUCCESS; - -out_of_resources: - /* Release all resources */ - IBOFFLOAD_VERBOSE(10, ("Adding collfrag to collfrag_pending")); - return mca_bcol_iboffload_free_resources_and_move_to_pending(coll_fragment, iboffload); -} - -/* Power of 2 case */ -static int -pure_recursive_doubling(mca_bcol_iboffload_module_t *iboffload, - mca_bcol_iboffload_collreq_t *coll_request) -{ - /* local variables */ - int rc = OMPI_SUCCESS, pair_rank, - my_rank = ((mca_sbgp_base_module_t *) iboffload->ibnet)->my_index; - - struct mqe_task *last_send, - *last_wait; - - mca_bcol_iboffload_task_t *send_task, - *wait_task, - *calc_task; - - mca_bcol_iboffload_frag_t *send_fragment, - *preposted_recv_frag; - - mca_bcol_iboffload_component_t *cm = &mca_bcol_iboffload_component; - netpatterns_pair_exchange_node_t *my_exchange_node = - &iboffload->recursive_doubling_tree; - - struct ibv_sge *r_operand = NULL, - *l_operand = NULL; - - struct mqe_task **mqe_ptr_to_set; - mca_bcol_iboffload_collfrag_t *coll_fragment = (mca_bcol_iboffload_collfrag_t *) - opal_list_get_last(&coll_request->work_requests); - - mqe_ptr_to_set = &coll_fragment->to_post; - - if (OPAL_UNLIKELY(false == BCOL_IBOFFLOAD_MQ_HAVE_CREDITS( - iboffload, coll_fragment->mq_index, coll_fragment->mq_credits))) { - IBOFFLOAD_VERBOSE(10, ("There are not enough credits on MQ.\n")); - - rc = OMPI_ERR_RESOURCE_BUSY; - goto out_of_resources; - } - - IBOFFLOAD_VERBOSE(10, ("Allreduce starting: type %d op %d, " - "n_extra_sources - %d.\n", cm->map_ompi_to_ib_dt[coll_request->dtype->id], - cm->map_ompi_to_ib_calcs[coll_request->op->op_type], - my_exchange_node->n_extra_sources)); - - pair_rank = my_exchange_node->rank_exchanges[0]; - - send_fragment = mca_bcol_iboffload_get_send_frag(coll_request, - pair_rank, coll_request->qp_index, - (MCA_IBOFFLOAD_IB_DRIVER_OPERAND_SIZE + MCA_IBOFFLOAD_CALC_SIZE_EXT), 0, - SBUF, - MCA_BCOL_IBOFFLOAD_SEND_FRAG_ML_CALC); - if (OPAL_UNLIKELY(NULL == send_fragment)) { - IBOFFLOAD_VERBOSE(10, ("Failing for getting and packing send frag.\n")); - rc = OMPI_ERR_RESOURCE_BUSY; - goto out_of_resources; - } -/* Vasily: NO_INLINE ????? */ - /* send my operand to the first algorithm partner */ - send_task = mca_bcol_iboffload_get_send_task(iboffload, pair_rank, - coll_request->qp_index, send_fragment, coll_fragment, NO_INLINE); - if (OPAL_UNLIKELY(NULL == send_task)) { - IBOFFLOAD_VERBOSE(10, ("Failing for getting send task.\n")); - rc = OMPI_ERR_RESOURCE_BUSY; - goto out_of_resources; - } - - APPEND_TO_TASKLIST(mqe_ptr_to_set, send_task, last_send); - MCA_BCOL_IBOFFLOAD_APPEND_TASK_TO_LIST(coll_fragment->task_next, send_task); - - l_operand = &send_fragment->sg_entry; - /* Recursive-doubling exchange */ - rc = do_exchange(iboffload, coll_request, &mqe_ptr_to_set, - &last_wait, &l_operand, &r_operand); - if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) { - return rc; - } - if (false == coll_request->do_calc_in_cpu) { - /* Calc and send the result to myself */ - calc_task = mca_bcol_iboffload_get_calc_task(iboffload, - my_rank, coll_request->qp_index, NULL, - l_operand, - r_operand, coll_request, NO_INLINE); - if (OPAL_UNLIKELY(NULL == calc_task)) { - IBOFFLOAD_VERBOSE(10, ("Failing for getting calc task.\n")); - rc = OMPI_ERR_RESOURCE_BUSY; - goto out_of_resources; - } - - APPEND_TO_TASKLIST(mqe_ptr_to_set, calc_task, last_send); - MCA_BCOL_IBOFFLOAD_APPEND_TASK_TO_LIST(coll_fragment->task_next, calc_task); - - preposted_recv_frag = - mca_bcol_iboffload_get_preposted_recv_frag( - iboffload, my_rank, coll_request->qp_index); - if (OPAL_UNLIKELY(NULL == preposted_recv_frag)) { - /* RLG need cleanup */ - rc = OMPI_ERR_RESOURCE_BUSY; - goto out_of_resources; - } - - /* Wait for calc from myself */ - wait_task = mca_bcol_iboffload_get_wait_task(iboffload, my_rank, 1, - preposted_recv_frag, coll_request->qp_index, NULL); - if (OPAL_UNLIKELY(NULL == wait_task)) { - IBOFFLOAD_VERBOSE(10, ("Failing for getting wait task.\n")); - rc = OMPI_ERR_RESOURCE_BUSY; - goto out_of_resources; - } - - APPEND_TO_TASKLIST(mqe_ptr_to_set, wait_task, last_wait); - MCA_BCOL_IBOFFLOAD_APPEND_TASK_TO_LIST(coll_fragment->task_next, wait_task); - } else { - coll_request->l_operand = l_operand->addr; - coll_request->r_operand = r_operand->addr; - } - - *mqe_ptr_to_set = NULL; -/* Vasily: TODO with MACRO */ - /* finish initializing full message descriptor */ - coll_request->n_fragments = 1; - coll_request->n_frags_sent = 1; - - /* Pasha: need to set to true in upper layer */ - coll_request->user_handle_freed = false; - - last_wait->flags |= MQE_WR_FLAG_SIGNAL; - - coll_fragment->signal_task_wr_id = last_wait->wr_id; - last_wait->wr_id = (uint64_t) (uintptr_t) coll_fragment; - - /* post the mwr */ - IBOFFLOAD_VERBOSE(10, ("Post tasks.\n")); - rc = mca_bcol_iboffload_post_mqe_tasks(iboffload, coll_fragment->to_post); - if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) { - IBOFFLOAD_ERROR(("MQE task posting failing.\n")); - /* Note: need to clean up */ - return rc; - } - - MCA_BCOL_UPDATE_ORDER_COUNTER(&iboffload->super, coll_request->order_info); - - return OMPI_SUCCESS; - -out_of_resources: - /* Release all resources */ - IBOFFLOAD_VERBOSE(10, ("Adding collfrag to collfrag_pending")); - return mca_bcol_iboffload_free_resources_and_move_to_pending(coll_fragment, iboffload); -} - -static int rdma_do_exchange(mca_bcol_iboffload_module_t *iboffload, - mca_bcol_iboffload_collreq_t *coll_request, - struct mqe_task ***mqe_ptr_to_set, - struct mqe_task **last_wait, - struct ibv_sge **l_operand, - struct ibv_sge **r_operand) -{ - int rc = OMPI_SUCCESS, exchange, pair_rank, - my_rank = ((mca_sbgp_base_module_t *) iboffload->ibnet)->my_index; - - mca_bcol_iboffload_frag_t *preposted_recv_frag; - - mca_bcol_iboffload_task_t *wait_task, - *calc_task; - - struct mqe_task *last_send; - netpatterns_pair_exchange_node_t *my_exchange_node = - &iboffload->recursive_doubling_tree; - - mca_bcol_iboffload_collfrag_t *coll_fragment = (mca_bcol_iboffload_collfrag_t *) - opal_list_get_last(&coll_request->work_requests); - - const size_t calc_size = MCA_IBOFFLOAD_IB_DRIVER_OPERAND_SIZE + MCA_IBOFFLOAD_CALC_SIZE_EXT; - size_t remote_offset = calc_size; - size_t self_offset = 0; - - pair_rank = my_exchange_node->rank_exchanges[0]; - preposted_recv_frag = - mca_bcol_iboffload_get_preposted_recv_frag( - iboffload, pair_rank, coll_request->qp_index); - if (OPAL_UNLIKELY(NULL == preposted_recv_frag)) { - /* RLG need cleanup */ - IBOFFLOAD_VERBOSE(10, ("Get prepost recv fag fail.\n")); - rc = OMPI_ERR_RESOURCE_BUSY; - goto out_of_resources; - } - - /* Wait for send from first algorithm partner */ - wait_task = mca_bcol_iboffload_get_wait_task(iboffload, pair_rank, 1, - preposted_recv_frag, coll_request->qp_index, NULL); - if (OPAL_UNLIKELY(NULL == wait_task)) { - IBOFFLOAD_VERBOSE(10, ("Failing for getting wait task.\n")); - rc = OMPI_ERR_RESOURCE_BUSY; - goto out_of_resources; - } - - APPEND_TO_TASKLIST((*mqe_ptr_to_set), wait_task, (*last_wait)); - MCA_BCOL_IBOFFLOAD_APPEND_TASK_TO_LIST(coll_fragment->task_next, wait_task); - - (*l_operand)->length = 2 * calc_size ; - for (exchange = 1; exchange < my_exchange_node->n_exchanges; ++exchange) { - pair_rank = my_exchange_node->rank_exchanges[exchange]; - /* Pasha: Not used - (*r_operand) = &preposted_recv_frag->sg_entry; - (*r_operand)->length = calc_size; - */ - - remote_offset += 2 * calc_size; - self_offset += 2 * calc_size; - - /* Calc and send the result to the partner */ - /* - calc_task = mca_bcol_iboffload_get_calc_task(iboffload, - pair_rank, coll_request->qp_index, NULL, - *l_operand, *r_operand, - coll_request, NO_INLINE); - */ - calc_task = mca_bcol_iboffload_get_rdma_calc_task(iboffload, - pair_rank, coll_request->qp_index, NULL, - *l_operand, NULL, - coll_request, remote_offset); - if (OPAL_UNLIKELY(NULL == calc_task)) { - IBOFFLOAD_VERBOSE(10, ("Failing for getting calc task.\n")); - rc = OMPI_ERR_RESOURCE_BUSY; - goto out_of_resources; - } - - APPEND_TO_TASKLIST((*mqe_ptr_to_set), calc_task, last_send); - MCA_BCOL_IBOFFLOAD_APPEND_TASK_TO_LIST(coll_fragment->task_next, calc_task); - - /* Calc and send the result to myself */ - /* - calc_task = mca_bcol_iboffload_get_calc_task(iboffload, - my_rank, coll_request->qp_index, NULL, - *l_operand, NULL, - coll_request, NO_INLINE); - */ - calc_task = mca_bcol_iboffload_get_rdma_calc_task(iboffload, - my_rank, coll_request->qp_index, NULL, - *l_operand, NULL, - coll_request, self_offset); - if (OPAL_UNLIKELY(NULL == calc_task)) { - IBOFFLOAD_VERBOSE(10, ("Failing for getting calc task.\n")); - rc = OMPI_ERR_RESOURCE_BUSY; - goto out_of_resources; - } - - APPEND_TO_TASKLIST((*mqe_ptr_to_set), calc_task, last_send); - MCA_BCOL_IBOFFLOAD_APPEND_TASK_TO_LIST(coll_fragment->task_next, calc_task); - - preposted_recv_frag = - mca_bcol_iboffload_get_preposted_recv_frag( - iboffload, my_rank, coll_request->qp_index); - if (OPAL_UNLIKELY(NULL == preposted_recv_frag)) { - /* RLG need cleanup */ - IBOFFLOAD_VERBOSE(10, ("Get prepost recv fag fail.\n")); - rc = OMPI_ERR_RESOURCE_BUSY; - goto out_of_resources; - } - - /* Wait for calc from myself */ - wait_task = mca_bcol_iboffload_get_wait_task(iboffload, my_rank, 1, - preposted_recv_frag, coll_request->qp_index, NULL); - if (NULL == wait_task) { - IBOFFLOAD_VERBOSE(10, ("Failing for getting wait task.\n")); - rc = OMPI_ERR_RESOURCE_BUSY; - goto out_of_resources; - } - - APPEND_TO_TASKLIST((*mqe_ptr_to_set), wait_task, (*last_wait)); - MCA_BCOL_IBOFFLOAD_APPEND_TASK_TO_LIST(coll_fragment->task_next, wait_task); - - /* - (*l_operand) = &preposted_recv_frag->sg_entry; - */ - - /* (*l_operand)->length = 2 * calc_size; */ - (*l_operand)->addr = (uint64_t) (uintptr_t) ((unsigned char *) (*l_operand)->addr + 2 * calc_size); - - preposted_recv_frag = - mca_bcol_iboffload_get_preposted_recv_frag( - iboffload, pair_rank, coll_request->qp_index); - if (OPAL_UNLIKELY(NULL == preposted_recv_frag)) { - /* RLG need cleanup */ - IBOFFLOAD_VERBOSE(10, ("Get prepost recv fag fail.\n")); - rc = OMPI_ERR_RESOURCE_BUSY; - goto out_of_resources; - } - - /* Wait for calc from the current algorithm partner */ - wait_task = mca_bcol_iboffload_get_wait_task(iboffload, pair_rank, 1, - preposted_recv_frag, coll_request->qp_index, NULL); - if (OPAL_UNLIKELY(NULL == wait_task)) { - IBOFFLOAD_VERBOSE(10, ("Failing for getting wait task.\n")); - rc = OMPI_ERR_RESOURCE_BUSY; - goto out_of_resources; - } - - APPEND_TO_TASKLIST((*mqe_ptr_to_set), wait_task, (*last_wait)); - MCA_BCOL_IBOFFLOAD_APPEND_TASK_TO_LIST(coll_fragment->task_next, wait_task); - - } - /* Pasha: not used - (*r_operand) = &preposted_recv_frag->sg_entry; - (*r_operand)->length = calc_size; - */ - - return OMPI_SUCCESS; - -out_of_resources: - /* Release all resources */ - IBOFFLOAD_VERBOSE(10, ("Adding collfrag to collfrag_pending")); - return mca_bcol_iboffload_free_resources_and_move_to_pending(coll_fragment, iboffload); -} - -#define ALLREDUCE_BASE_OFFSET (MCA_IBOFFLOAD_IB_DRIVER_OPERAND_SIZE + MCA_IBOFFLOAD_CALC_SIZE_EXT) - -/* RDMA Recursive doubling + cache friendly version */ -static int -rdma_pure_recursive_doubling(mca_bcol_iboffload_module_t *iboffload, - mca_bcol_iboffload_collreq_t *coll_request) -{ - /* local variables */ - int rc = OMPI_SUCCESS, pair_rank, - my_rank = ((mca_sbgp_base_module_t *) iboffload->ibnet)->my_index; - - struct mqe_task *last_send, - *last_wait; - - mca_bcol_iboffload_task_t *send_task, - *wait_task, - *calc_task; - - mca_bcol_iboffload_frag_t *send_fragment, - *preposted_recv_frag; - struct ibv_sge operand; - - mca_bcol_iboffload_component_t *cm = &mca_bcol_iboffload_component; - netpatterns_pair_exchange_node_t *my_exchange_node = - &iboffload->recursive_doubling_tree; - - struct ibv_sge *r_operand = NULL, - *l_operand = NULL; - - struct mqe_task **mqe_ptr_to_set; - mca_bcol_iboffload_collfrag_t *coll_fragment = (mca_bcol_iboffload_collfrag_t *) - opal_list_get_last(&coll_request->work_requests); - - mqe_ptr_to_set = &coll_fragment->to_post; - - if (OPAL_UNLIKELY(false == BCOL_IBOFFLOAD_MQ_HAVE_CREDITS( - iboffload, coll_fragment->mq_index, coll_fragment->mq_credits))) { - IBOFFLOAD_VERBOSE(10, ("There are not enough credits on MQ.\n")); - - rc = OMPI_ERR_RESOURCE_BUSY; - goto out_of_resources; - } - - IBOFFLOAD_VERBOSE(10, ("Allreduce starting: type %d op %d, " - "n_extra_sources - %d.\n", cm->map_ompi_to_ib_dt[coll_request->dtype->id], - cm->map_ompi_to_ib_calcs[coll_request->op->op_type], - my_exchange_node->n_extra_sources)); - - pair_rank = my_exchange_node->rank_exchanges[0]; - - send_fragment = mca_bcol_iboffload_get_send_frag(coll_request, - pair_rank, coll_request->qp_index, - (MCA_IBOFFLOAD_IB_DRIVER_OPERAND_SIZE + MCA_IBOFFLOAD_CALC_SIZE_EXT), - 0, - SBUF, - MCA_BCOL_IBOFFLOAD_SEND_FRAG_ML_CALC); - if (OPAL_UNLIKELY(NULL == send_fragment)) { - IBOFFLOAD_VERBOSE(10, ("Failing for getting and packing send frag.\n")); - rc = OMPI_ERR_RESOURCE_BUSY; - goto out_of_resources; - } - /* Vasily: NO_INLINE ????? */ - /* send my operand to the first algorithm partner */ - /* send_task = mca_bcol_iboffload_get_send_task(iboffload, pair_rank, - coll_request->qp_index, send_fragment, coll_fragment, NO_INLINE); */ - - send_task = mca_bcol_iboffload_get_rdma_task( - pair_rank, ALLREDUCE_BASE_OFFSET, - send_fragment, iboffload, coll_fragment); - if (OPAL_UNLIKELY(NULL == send_task)) { - IBOFFLOAD_VERBOSE(10, ("Failing for getting send task.\n")); - rc = OMPI_ERR_RESOURCE_BUSY; - goto out_of_resources; - } - - /* Pasha: ugly but faster, set inline on first send */ - SENDWR(send_task)->send_flags |= IBV_SEND_INLINE; - - - APPEND_TO_TASKLIST(mqe_ptr_to_set, send_task, last_send); - MCA_BCOL_IBOFFLOAD_APPEND_TASK_TO_LIST(coll_fragment->task_next, send_task); - - /* l_operand = &send_fragment->sg_entry; */ - operand = send_fragment->sg_entry; - l_operand = &operand; - - /* Recursive-doubling exchange */ - rc = rdma_do_exchange(iboffload, coll_request, &mqe_ptr_to_set, - &last_wait, &l_operand, &r_operand); - if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) { - return rc; - } - - /* Pasha: This flow is broken, print error */ - if (false == coll_request->do_calc_in_cpu) { - ML_ERROR(("Calc in CPU must be enabled !!!")); - /* Calc and send the result to myself */ - calc_task = mca_bcol_iboffload_get_calc_task(iboffload, - my_rank, coll_request->qp_index, NULL, - l_operand, - r_operand, coll_request, NO_INLINE); - if (OPAL_UNLIKELY(NULL == calc_task)) { - IBOFFLOAD_VERBOSE(10, ("Failing for getting calc task.\n")); - rc = OMPI_ERR_RESOURCE_BUSY; - goto out_of_resources; - } - - APPEND_TO_TASKLIST(mqe_ptr_to_set, calc_task, last_send); - MCA_BCOL_IBOFFLOAD_APPEND_TASK_TO_LIST(coll_fragment->task_next, calc_task); - - preposted_recv_frag = - mca_bcol_iboffload_get_preposted_recv_frag( - iboffload, my_rank, coll_request->qp_index); - if (OPAL_UNLIKELY(NULL == preposted_recv_frag)) { - /* RLG need cleanup */ - rc = OMPI_ERR_RESOURCE_BUSY; - goto out_of_resources; - } - - /* Wait for calc from myself */ - wait_task = mca_bcol_iboffload_get_wait_task(iboffload, my_rank, 1, - preposted_recv_frag, coll_request->qp_index, NULL); - if (OPAL_UNLIKELY(NULL == wait_task)) { - IBOFFLOAD_VERBOSE(10, ("Failing for getting wait task.\n")); - rc = OMPI_ERR_RESOURCE_BUSY; - goto out_of_resources; - } - - APPEND_TO_TASKLIST(mqe_ptr_to_set, wait_task, last_wait); - MCA_BCOL_IBOFFLOAD_APPEND_TASK_TO_LIST(coll_fragment->task_next, wait_task); - } else { - coll_request->l_operand = (uint64_t) (uintptr_t) - ((unsigned char *)l_operand->addr); - coll_request->r_operand = (uint64_t) (uintptr_t) - ((unsigned char *) (coll_request->l_operand) + ALLREDUCE_BASE_OFFSET); - } - - *mqe_ptr_to_set = NULL; -/* Vasily: TODO with MACRO */ - /* finish initializing full message descriptor */ - coll_request->n_fragments = 1; - coll_request->n_frags_sent = 1; - - /* Pasha: need to set to true in upper layer */ - coll_request->user_handle_freed = false; - - last_wait->flags |= MQE_WR_FLAG_SIGNAL; - - coll_fragment->signal_task_wr_id = last_wait->wr_id; - last_wait->wr_id = (uint64_t) (uintptr_t) coll_fragment; - - /* post the mwr */ - IBOFFLOAD_VERBOSE(10, ("Post tasks.\n")); - rc = mca_bcol_iboffload_post_mqe_tasks(iboffload, coll_fragment->to_post); - if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) { - IBOFFLOAD_ERROR(("MQE task posting failing.\n")); - /* Note: need to clean up */ - return rc; - } - - MCA_BCOL_UPDATE_ORDER_COUNTER(&iboffload->super, coll_request->order_info); - - return OMPI_SUCCESS; - -out_of_resources: - /* Release all resources */ - IBOFFLOAD_VERBOSE(10, ("Adding collfrag to collfrag_pending")); - return mca_bcol_iboffload_free_resources_and_move_to_pending(coll_fragment, iboffload); -} -/* - * non power of 2 & EXCHANGE_NODE case, - * need to wait for message from "extra" proc. - */ -static int -non_pure_recursive_doubling(mca_bcol_iboffload_module_t *iboffload, - mca_bcol_iboffload_collreq_t *coll_request) -{ - /* local variables */ - int rc = OMPI_SUCCESS, extra_rank, pair_rank, - my_rank = ((mca_sbgp_base_module_t *) iboffload->ibnet)->my_index; - - mca_bcol_iboffload_frag_t *calc_fragment, - *preposted_recv_frag; - - mca_bcol_iboffload_task_t *wait_task, - *calc_task; - - struct ibv_sge *r_operand = NULL, - *l_operand = NULL; - - struct mqe_task *last_wait, /* we need ask from completion on last wait */ - *last_send; - - mca_bcol_iboffload_component_t *cm = &mca_bcol_iboffload_component; - netpatterns_pair_exchange_node_t *my_exchange_node = - &iboffload->recursive_doubling_tree; - - struct mqe_task **mqe_ptr_to_set; - mca_bcol_iboffload_collfrag_t *coll_fragment = (mca_bcol_iboffload_collfrag_t *) - opal_list_get_last(&coll_request->work_requests); - - mqe_ptr_to_set = &coll_fragment->to_post; - - if (OPAL_UNLIKELY(false == BCOL_IBOFFLOAD_MQ_HAVE_CREDITS( - iboffload, coll_fragment->mq_index, coll_fragment->mq_credits))) { - IBOFFLOAD_VERBOSE(10, ("There are not enough credits on MQ.\n")); - - rc = OMPI_ERR_RESOURCE_BUSY; - goto out_of_resources; - } - - IBOFFLOAD_VERBOSE(10, ("Allreduce starting: type %d op %d, " - "n_extra_sources - %d.\n", cm->map_ompi_to_ib_dt[coll_request->dtype->id], - cm->map_ompi_to_ib_calcs[coll_request->op->op_type], - my_exchange_node->n_extra_sources)); - - extra_rank = my_exchange_node->rank_extra_source; - - preposted_recv_frag = - mca_bcol_iboffload_get_preposted_recv_frag( - iboffload, extra_rank, coll_request->qp_index); - if (OPAL_UNLIKELY(NULL == preposted_recv_frag)) { - /* RLG need cleanup */ - rc = OMPI_ERR_RESOURCE_BUSY; - goto out_of_resources; - } - - /* Wait for data from extra node */ - wait_task = mca_bcol_iboffload_get_wait_task(iboffload, extra_rank, 1, - preposted_recv_frag, coll_request->qp_index, NULL); - if (OPAL_UNLIKELY(NULL == wait_task)) { - IBOFFLOAD_VERBOSE(10, ("Failing for getting wait task.\n")); - rc = OMPI_ERR_RESOURCE_BUSY; - goto out_of_resources; - } - - APPEND_TO_TASKLIST(mqe_ptr_to_set, wait_task, last_wait); - MCA_BCOL_IBOFFLOAD_APPEND_TASK_TO_LIST(coll_fragment->task_next, wait_task); - - pair_rank = my_exchange_node->rank_exchanges[0]; - - calc_fragment = mca_bcol_iboffload_get_send_frag(coll_request, - pair_rank, coll_request->qp_index, - MCA_IBOFFLOAD_IB_DRIVER_OPERAND_SIZE + - MCA_IBOFFLOAD_CALC_SIZE_EXT, 0, - SBUF, - MCA_BCOL_IBOFFLOAD_SEND_FRAG_ML_CALC); - if (OPAL_UNLIKELY(NULL == calc_fragment)) { - IBOFFLOAD_VERBOSE(10, ("Failing for getting and packing send frag.\n")); - rc = OMPI_ERR_RESOURCE_BUSY; - goto out_of_resources; - } - - /* Calc extra node operand with mine and send the result - to the first algorithm partner */ - preposted_recv_frag->sg_entry.length = MCA_IBOFFLOAD_IB_DRIVER_OPERAND_SIZE + - MCA_IBOFFLOAD_CALC_SIZE_EXT; - calc_task = mca_bcol_iboffload_get_calc_task(iboffload, - pair_rank, coll_request->qp_index, calc_fragment, - &preposted_recv_frag->sg_entry, - &calc_fragment->sg_entry, coll_request, NO_INLINE); - if (OPAL_UNLIKELY(NULL == calc_task)) { - IBOFFLOAD_VERBOSE(10, ("Failing for getting calc task.\n")); - rc = OMPI_ERR_RESOURCE_BUSY; - goto out_of_resources; - } - - APPEND_TO_TASKLIST(mqe_ptr_to_set, calc_task, last_send); - MCA_BCOL_IBOFFLOAD_APPEND_TASK_TO_LIST(coll_fragment->task_next, calc_task); - - /* Calc extra node operand with mine and store the result on my buff */ - calc_task = mca_bcol_iboffload_get_calc_task(iboffload, - my_rank, coll_request->qp_index, NULL, - &preposted_recv_frag->sg_entry, - &calc_fragment->sg_entry, coll_request, NO_INLINE); - if (OPAL_UNLIKELY(NULL == calc_task)) { - IBOFFLOAD_VERBOSE(10, ("Failing for getting calc task.\n")); - rc = OMPI_ERR_RESOURCE_BUSY; - goto out_of_resources; - } - - APPEND_TO_TASKLIST(mqe_ptr_to_set, calc_task, last_send); - MCA_BCOL_IBOFFLOAD_APPEND_TASK_TO_LIST(coll_fragment->task_next, calc_task); - - preposted_recv_frag = - mca_bcol_iboffload_get_preposted_recv_frag( - iboffload, my_rank, coll_request->qp_index); - if (OPAL_UNLIKELY(NULL == preposted_recv_frag)) { - /* RLG need cleanup */ - rc = OMPI_ERR_RESOURCE_BUSY; - goto out_of_resources; - } - - /* Wait for calc from myself */ - wait_task = mca_bcol_iboffload_get_wait_task(iboffload, my_rank, 1, - preposted_recv_frag, coll_request->qp_index, NULL); - if (OPAL_UNLIKELY(NULL == wait_task)) { - IBOFFLOAD_VERBOSE(10, ("Failing for getting wait task.\n")); - rc = OMPI_ERR_RESOURCE_BUSY; - goto out_of_resources; - } - - APPEND_TO_TASKLIST(mqe_ptr_to_set, wait_task, last_wait); - MCA_BCOL_IBOFFLOAD_APPEND_TASK_TO_LIST(coll_fragment->task_next, wait_task); - - l_operand = &preposted_recv_frag->sg_entry; - l_operand->length = MCA_IBOFFLOAD_IB_DRIVER_OPERAND_SIZE + - MCA_IBOFFLOAD_CALC_SIZE_EXT; - /* Recursive-doubling exchange */ - rc = do_exchange(iboffload, coll_request, &mqe_ptr_to_set, - &last_wait, &l_operand, &r_operand); - if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) { - return rc; - } - - /* Need to send message to "extra" proc => - one more final result calc for extra node */ - calc_task = mca_bcol_iboffload_get_calc_task(iboffload, - extra_rank, coll_request->qp_index, NULL, - l_operand, - r_operand, coll_request, NO_INLINE); - if (OPAL_UNLIKELY(NULL == calc_task)) { - IBOFFLOAD_VERBOSE(10, ("Failing for getting calc task.\n")); - rc = OMPI_ERR_RESOURCE_BUSY; - goto out_of_resources; - } - - APPEND_TO_TASKLIST(mqe_ptr_to_set, calc_task, last_send); - MCA_BCOL_IBOFFLOAD_APPEND_TASK_TO_LIST(coll_fragment->task_next, calc_task); - - if (false == coll_request->do_calc_in_cpu) { - /* Calc and send the result to myself */ - calc_task = mca_bcol_iboffload_get_calc_task(iboffload, - my_rank, coll_request->qp_index, NULL, - l_operand, - r_operand, coll_request, NO_INLINE); - if (OPAL_UNLIKELY(NULL == calc_task)) { - IBOFFLOAD_VERBOSE(10, ("Failing for getting calc task.\n")); - rc = OMPI_ERR_RESOURCE_BUSY; - goto out_of_resources; - } - - APPEND_TO_TASKLIST(mqe_ptr_to_set, calc_task, last_send); - MCA_BCOL_IBOFFLOAD_APPEND_TASK_TO_LIST(coll_fragment->task_next, calc_task); - - preposted_recv_frag = - mca_bcol_iboffload_get_preposted_recv_frag( - iboffload, my_rank, coll_request->qp_index); - if (OPAL_UNLIKELY(NULL == preposted_recv_frag)) { - /* RLG need cleanup */ - rc = OMPI_ERR_RESOURCE_BUSY; - goto out_of_resources; - } - - /* Wait for calc from myself */ - wait_task = mca_bcol_iboffload_get_wait_task(iboffload, my_rank, 1, - preposted_recv_frag, coll_request->qp_index, NULL); - if (OPAL_UNLIKELY(NULL == wait_task)) { - IBOFFLOAD_VERBOSE(10, ("Failing for getting wait task.\n")); - rc = OMPI_ERR_RESOURCE_BUSY; - goto out_of_resources; - } - - APPEND_TO_TASKLIST(mqe_ptr_to_set, wait_task, last_wait); - MCA_BCOL_IBOFFLOAD_APPEND_TASK_TO_LIST(coll_fragment->task_next, wait_task); - } else { - coll_request->l_operand = l_operand->addr; - coll_request->r_operand = r_operand->addr; - } - - *mqe_ptr_to_set = NULL; - - /* finish initializing full message descriptor */ - coll_request->n_fragments = 1; - coll_request->n_frags_sent = 1; - - assert(NULL != last_wait); - - last_wait->flags |= MQE_WR_FLAG_SIGNAL; - coll_fragment->signal_task_wr_id = last_wait->wr_id; - last_wait->wr_id = (uint64_t) (uintptr_t) coll_fragment; - - /* post the mwr */ - IBOFFLOAD_VERBOSE(10, ("Post tasks.\n")); - rc = mca_bcol_iboffload_post_mqe_tasks(iboffload, coll_fragment->to_post); - if(OPAL_UNLIKELY(OMPI_SUCCESS != rc)) { - IBOFFLOAD_ERROR(("MQE task posting failing.\n")); - /* Note: need to clean up */ - return rc; - } - - MCA_BCOL_UPDATE_ORDER_COUNTER(&iboffload->super, coll_request->order_info); - - return OMPI_SUCCESS; - -out_of_resources: - /* Release all resources */ - IBOFFLOAD_VERBOSE(10, ("Adding collfrag to collfrag_pending")); - return mca_bcol_iboffload_free_resources_and_move_to_pending(coll_fragment, iboffload); -} - -static int mca_bcol_iboffload_allreduce_init( - bcol_function_args_t *fn_arguments, - mca_bcol_iboffload_module_t *iboffload, - struct mca_bcol_iboffload_collreq_t **coll_request, - bool if_bcol_last) -{ - int rc; - - bool exclude_case; - ompi_free_list_item_t *item; - mca_bcol_iboffload_collfrag_t *coll_fragment; - - mca_bcol_iboffload_component_t *cm = &mca_bcol_iboffload_component; - - IBOFFLOAD_VERBOSE(10, ("Calling for mca_bcol_iboffload_allreduce_init.\n")); - - OMPI_FREE_LIST_WAIT(&cm->collreqs_free, item, rc); - if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) { - IBOFFLOAD_VERBOSE(10, ("Failing for coll request free list waiting.\n")); - return rc; - } - - (*coll_request) = (mca_bcol_iboffload_collreq_t *) item; - (*coll_request)->progress_fn = iboffload->allreduce_algth; - - (*coll_request)->if_bcol_last = if_bcol_last; - - exclude_case = (non_pure_recursive_doubling == iboffload->allreduce_algth && - (OMPI_OP_SUM == fn_arguments->op->op_type && - OMPI_DATATYPE_MPI_DOUBLE == fn_arguments->dtype->id)); - - (*coll_request)->do_calc_in_cpu = cm->last_calc_in_cpu && !exclude_case; - - if (false == (*coll_request)->do_calc_in_cpu || - allreduce_extra_node == iboffload->allreduce_algth) { - (*coll_request)->do_calc_in_cpu = false; /* Relevant for extra node only */ - (*coll_request)->completion_cb_fn = - mca_bcol_iboffload_unpack_res_to_user; - } else { - (*coll_request)->completion_cb_fn = - mca_bcol_iboffload_calc_res_to_user; - } - - (*coll_request)->module = iboffload; - (*coll_request)->op = fn_arguments->op; - - (*coll_request)->dtype = fn_arguments->dtype; - (*coll_request)->count = fn_arguments->count; - - (*coll_request)->ml_buffer_index = fn_arguments->buffer_index; - (*coll_request)->buffer_info[SBUF].lkey = iboffload->rdma_block.ib_info.lkey; - - (*coll_request)->order_info = &fn_arguments->order_info; - - /* ML buffer was provided, no need to pack the data. - * It is few assumption here: - * we CAN touch and change ML buffer - */ - (*coll_request)->buffer_info[SBUF].buf = (void *) ( - (unsigned char *) fn_arguments->sbuf + - (size_t) fn_arguments->sbuf_offset); - - (*coll_request)->buffer_info[SBUF].offset = fn_arguments->sbuf_offset; - - (*coll_request)->buffer_info[RBUF].buf = (void *) ( - (unsigned char *) fn_arguments->rbuf + - (size_t) fn_arguments->rbuf_offset); - - (*coll_request)->buffer_info[RBUF].offset = fn_arguments->rbuf_offset; - - if(mca_bcol_iboffload_component.enable_rdma_calc) { - (*coll_request)->qp_index = MCA_BCOL_IBOFFLOAD_QP_BARRIER; - } else { - (*coll_request)->qp_index = MCA_BCOL_IBOFFLOAD_QP_REGULAR; - } - - (*coll_request)->n_frag_mpi_complete = 0; - (*coll_request)->n_frag_net_complete = 0; - - fn_arguments->bcol_opaque_data = (void *) (*coll_request); - - /* - * setup collective work request - */ - - /* get collective frag */ - coll_fragment = &((*coll_request)->first_collfrag); - mca_bcol_iboffload_collfrag_init(coll_fragment); - - coll_fragment->mq_index = COLL_MQ; - coll_fragment->alg = RECURSIVE_DOUBLING_ALLREDUCE_ALG; - - coll_fragment->mq_credits = - iboffload->alg_task_consump[RECURSIVE_DOUBLING_ALLREDUCE_ALG]; - - /* set pointers for (coll frag) <-> (coll full request) */ - MCA_BCOL_IBOFFLOAD_SET_COLL_REQ_LINKS(*coll_request, coll_fragment); - - coll_fragment->unpack_size = - mca_bcol_base_get_buff_length(fn_arguments->dtype, fn_arguments->count); - - IBOFFLOAD_VERBOSE(10, ("The input data is %lf", *(double *) (*coll_request)->buffer_info[SBUF].buf)); - - return OMPI_SUCCESS; -} - -static int mca_bcol_iboffload_allreduce_intra(bcol_function_args_t *fn_arguments, - struct mca_bcol_base_function_t *const_args) -{ - /* local variables */ - int rc; - - mca_bcol_iboffload_collreq_t *coll_request = NULL; - mca_bcol_iboffload_module_t *iboffload = - (mca_bcol_iboffload_module_t *) const_args->bcol_module; - - /* Pasha: please do not touch this line, it used for ML buffer recycling barrier call */ - bool if_bcol_last = ((const_args->index_of_this_type_in_collective + 1) == - const_args->n_of_this_type_in_collective); - - MCA_BCOL_CHECK_ORDER(const_args->bcol_module, fn_arguments); - - IBOFFLOAD_VERBOSE(10, ("n_of_this_type_in_a_row %d, index_in_consecutive_same_bcol_calls %d", - const_args->n_of_this_type_in_a_row, - const_args->index_in_consecutive_same_bcol_calls + 1)); - - IBOFFLOAD_VERBOSE(10, ("Allreduce started.\n")); - fn_arguments->result_in_rbuf = true; - - rc = mca_bcol_iboffload_allreduce_init(fn_arguments, iboffload, - &coll_request, if_bcol_last); - if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) { - IBOFFLOAD_VERBOSE(10, ("Get error from mca_bcol_iboffload_allreduce_init.\n")); - return rc; - } - - /* Allreduce starting */ - rc = iboffload->allreduce_algth(iboffload, coll_request); - if (OPAL_UNLIKELY(OMPI_ERROR == rc)) { - return BCOL_FN_NOT_STARTED; - } - - IBOFFLOAD_VERBOSE(10, ("Wait for completions.\n")); - - /* done */ - return BCOL_FN_STARTED; -} - -static int mca_bcol_iboffload_allreduce_progress( - bcol_function_args_t *input_args, - struct mca_bcol_base_function_t *const_args) -{ - mca_bcol_iboffload_collreq_t *coll_request = - (mca_bcol_iboffload_collreq_t *) - input_args->bcol_opaque_data; - - if (BCOL_IS_COMPLETED(coll_request)) { - coll_request->user_handle_freed = true; - if (COLLREQ_IS_DONE(coll_request)) { - IBOFFLOAD_VERBOSE(10, ("Coll request already done.\n")); - RELEASE_COLLREQ(coll_request); - } - - IBOFFLOAD_VERBOSE(10, ("Allreduce already done.\n")); - return BCOL_FN_COMPLETE; - } - - return BCOL_FN_STARTED; -} - -int mca_bcol_iboffload_allreduce_first_call(mca_bcol_iboffload_module_t *iboffload, - mca_bcol_iboffload_collreq_t *coll_request) -{ - netpatterns_pair_exchange_node_t *my_exchange_node = - &iboffload->recursive_doubling_tree; - - int i = 0, my_rank = iboffload->ibnet->super.my_index, - n_exchanges = my_exchange_node->n_exchanges, - *exchanges = my_exchange_node->rank_exchanges, - n_extra_src = my_exchange_node->n_extra_sources, - rank_extra_src = my_exchange_node->rank_extra_source; - - mca_bcol_iboffload_endpoint_t *ep = iboffload->endpoints[my_rank]; - - /* Connecting to myself */ - while (OMPI_SUCCESS != - check_endpoint_state(ep, NULL, NULL)) { - opal_progress(); - } - - iboffload->alg_task_consump[RECURSIVE_DOUBLING_ALLREDUCE_ALG] = 0; - - if (0 < n_extra_src) { - iboffload->alg_task_consump[RECURSIVE_DOUBLING_ALLREDUCE_ALG] += 4; /* Two CALCs and two WAITs tasks */ - ep = iboffload->endpoints[rank_extra_src]; - while (OMPI_SUCCESS != - check_endpoint_state(ep, NULL, NULL)) { - opal_progress(); - } - } - - for (i = 0; i < n_exchanges; ++i) { - iboffload->alg_task_consump[RECURSIVE_DOUBLING_ALLREDUCE_ALG] += 4; /* Two CALCs and two WAITs tasks */ - ep = iboffload->endpoints[exchanges[i]]; - - while (OMPI_SUCCESS != - check_endpoint_state(ep, NULL, NULL)) { - opal_progress(); - } - } - - iboffload->alg_task_consump[RECURSIVE_DOUBLING_ALLREDUCE_ALG] += 4; /* Two CALCs and two WAITs tasks */ - - if (0 < my_exchange_node->n_extra_sources) { - iboffload->allreduce_algth = - (EXTRA_NODE == my_exchange_node->node_type)? - allreduce_extra_node: - non_pure_recursive_doubling; - } else { - if(mca_bcol_iboffload_component.enable_rdma_calc) { - iboffload->allreduce_algth = - rdma_pure_recursive_doubling; - } else { - iboffload->allreduce_algth = - pure_recursive_doubling; - } - } - - return iboffload->allreduce_algth(iboffload, coll_request); -} - -int mca_bcol_iboffload_allreduce_register(mca_bcol_base_module_t *super) -{ - mca_bcol_base_coll_fn_comm_attributes_t comm_attribs; - mca_bcol_base_coll_fn_invoke_attributes_t inv_attribs; - - IBOFFLOAD_VERBOSE(10, ("Register iboffload Allreduce.\n")); - - comm_attribs.bcoll_type = BCOL_ALLREDUCE; - - comm_attribs.comm_size_min = 0; - comm_attribs.comm_size_max = 1024 * 1024; - comm_attribs.waiting_semantics = NON_BLOCKING; - - inv_attribs.bcol_msg_min = 0; - inv_attribs.bcol_msg_max = 20000; /* range 1 */ - - inv_attribs.datatype_bitmap = 0xffffffff; - inv_attribs.op_types_bitmap = 0xffffffff; - - comm_attribs.data_src = DATA_SRC_KNOWN; - - mca_bcol_base_set_attributes(super, - &comm_attribs, &inv_attribs, - mca_bcol_iboffload_allreduce_intra, - mca_bcol_iboffload_allreduce_progress); - - return OMPI_SUCCESS; -} diff --git a/ompi/mca/bcol/iboffload/bcol_iboffload_barrier.c b/ompi/mca/bcol/iboffload/bcol_iboffload_barrier.c deleted file mode 100644 index 1eb47f5921a..00000000000 --- a/ompi/mca/bcol/iboffload/bcol_iboffload_barrier.c +++ /dev/null @@ -1,934 +0,0 @@ -/* - * Copyright (c) 2009-2012 Oak Ridge National Laboratory. All rights reserved. - * Copyright (c) 2009-2012 Mellanox Technologies. All rights reserved. - * Copyright (c) 2013 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#include "ompi_config.h" - -#include -#include -#include -#include -#include - -#include "bcol_iboffload.h" -#include "bcol_iboffload_frag.h" -#include "bcol_iboffload_task.h" -#include "bcol_iboffload_collreq.h" -#include "bcol_iboffload_collfrag.h" -#include "bcol_iboffload_endpoint.h" -#include "ompi/mca/coll/ml/coll_ml_allocation.h" - -static int mca_bcol_iboffload_barrier_init( - bcol_function_args_t *input_args, - mca_bcol_iboffload_module_t *iboffload, - collective_message_completion_callback_function cb_fn, - struct mca_bcol_iboffload_collreq_t **coll_request); - -/** - * Start barrier - */ - -int mca_bcol_iboffload_barrier_intra_recursive_doubling( - mca_bcol_iboffload_module_t *iboffload, - struct mca_bcol_iboffload_collreq_t *coll_request) -{ - /* local variables */ - mca_bcol_iboffload_task_t *send_task = NULL, - *wait_task = NULL; - - struct mqe_task **mqe_ptr_to_set = NULL; - mca_bcol_iboffload_collfrag_t *coll_fragment = NULL; - - struct mqe_task *last_wait = NULL, /* we need ask from completion on last wait */ - *last_send = NULL; /* If it no wait, we need ask for completion on last send */ - - int rc, exchange, extra_rank, pair_rank; - - - mca_bcol_iboffload_frag_t *send_fragment = NULL, - *preposted_recv_frag = NULL; - - netpatterns_pair_exchange_node_t *my_exchange_node = - &iboffload->recursive_doubling_tree; - - IBOFFLOAD_VERBOSE(10, ("Calling for mca_bcol_iboffload_barrier_intra_recursive_doubling.\n")); - - coll_fragment = (mca_bcol_iboffload_collfrag_t *) - opal_list_get_last(&coll_request->work_requests); - /* Set mq credits */ - coll_fragment->mq_credits = iboffload->alg_task_consump[RECURSIVE_DOUBLING_BARRIER_ALG]; - - if (OPAL_UNLIKELY(false == BCOL_IBOFFLOAD_MQ_HAVE_CREDITS( - iboffload, coll_fragment->mq_index, coll_fragment->mq_credits))) { - IBOFFLOAD_VERBOSE(10, ("There are not enough credits on MQ.\n")); - - goto out_of_resources; - } - - coll_fragment->alg = RECURSIVE_DOUBLING_BARRIER_ALG; - - /* - * NOTE: need to generate template, if this will be a multiple fragment - * message. This way we can progress the collective w/o knowing it's - * type - actually, this is not the case for barrier, but just a note - * to remind us that we need to generalize this. - */ - - mqe_ptr_to_set = &coll_fragment->to_post; - - /* - * Fill in the communication pattern - */ - - /* - * If non power of 2, may need to wait for message from "extra" proc. - */ - - if (0 < my_exchange_node->n_extra_sources) { - if (EXCHANGE_NODE == my_exchange_node->node_type) { - /* I will participate in the exchange (of the algorithm) - - * wait for signal from extra process */ - extra_rank = my_exchange_node->rank_extra_source; - preposted_recv_frag = mca_bcol_iboffload_get_preposted_recv_frag( - iboffload, extra_rank, coll_request->qp_index); - - if (OPAL_UNLIKELY(NULL == preposted_recv_frag)) { - IBOFFLOAD_VERBOSE(10, ("Non power of 2 case: " - "Failing for getting prepost recv frag.\n")); - goto out_of_resources; - } - - wait_task = mca_bcol_iboffload_get_wait_task(iboffload, - extra_rank, 1, preposted_recv_frag, coll_request->qp_index, NULL); - if (OPAL_UNLIKELY(NULL == wait_task)) { - IBOFFLOAD_VERBOSE(10, ("Non power of 2 case: " - "Failing for getting wait task.\n")); - goto out_of_resources; - } - - APPEND_TO_TASKLIST(mqe_ptr_to_set, wait_task, last_wait); - MCA_BCOL_IBOFFLOAD_APPEND_TASK_TO_LIST(coll_fragment->task_next, wait_task); - } else { - /* I will not participate in the exchange - so just "register" as here */ - extra_rank = my_exchange_node->rank_extra_source; - /* send - no need to send any data, in-order delivery */ - send_fragment = mca_bcol_iboffload_get_send_frag(coll_request, - extra_rank, coll_request->qp_index, 0, - 0, SBUF,MCA_BCOL_IBOFFLOAD_SEND_FRAG_DUMMY); - - send_task = mca_bcol_iboffload_get_send_task(iboffload, extra_rank, - coll_request->qp_index, send_fragment, coll_fragment, INLINE); - if (OPAL_UNLIKELY(NULL == send_task)) { - IBOFFLOAD_VERBOSE(10, ("Non power of 2 case: " - "Failing for getting send task.\n")); - goto out_of_resources; - } - - APPEND_TO_TASKLIST(mqe_ptr_to_set, send_task, last_send); - MCA_BCOL_IBOFFLOAD_APPEND_TASK_TO_LIST(coll_fragment->task_next, send_task); - } - } - - /* loop over exchange send/recv pairs */ - for (exchange = 0; exchange < my_exchange_node->n_exchanges; ++exchange) { - /* rank of exchange partner */ - pair_rank = my_exchange_node->rank_exchanges[exchange]; - /* post send */ - send_fragment = mca_bcol_iboffload_get_send_frag(coll_request, - pair_rank, coll_request->qp_index, 0, - 0, SBUF, MCA_BCOL_IBOFFLOAD_SEND_FRAG_DUMMY); - - assert(NULL != send_fragment); - - send_task = mca_bcol_iboffload_get_send_task(iboffload, pair_rank, - coll_request->qp_index, - send_fragment, coll_fragment, INLINE); - if (OPAL_UNLIKELY(NULL == send_task)) { - IBOFFLOAD_VERBOSE(10, ("Exchaging: " - "Failing for getting send task.\n")); - goto out_of_resources; - } - - APPEND_TO_TASKLIST(mqe_ptr_to_set, send_task, last_send); - MCA_BCOL_IBOFFLOAD_APPEND_TASK_TO_LIST(coll_fragment->task_next, send_task); - - /* post wait */ - preposted_recv_frag = mca_bcol_iboffload_get_preposted_recv_frag( - iboffload, pair_rank, coll_request->qp_index); - if (OPAL_UNLIKELY(NULL == preposted_recv_frag)) { - IBOFFLOAD_VERBOSE(10, ("Exchaging: " - "Failing for getting prepost recv frag.\n")); - goto out_of_resources; - } - - wait_task = mca_bcol_iboffload_get_wait_task(iboffload, pair_rank, 1, - preposted_recv_frag, - coll_request->qp_index, NULL); - if (OPAL_UNLIKELY(NULL == wait_task)) { - IBOFFLOAD_VERBOSE(10, ("Exchaging: " - "Failing for getting wait task.\n")); - goto out_of_resources; - } - - APPEND_TO_TASKLIST(mqe_ptr_to_set, wait_task, last_wait); - MCA_BCOL_IBOFFLOAD_APPEND_TASK_TO_LIST(coll_fragment->task_next, wait_task); - } - - /* if non power of 2, may need to send message to "extra" proc */ - if (0 < my_exchange_node->n_extra_sources) { - if (EXTRA_NODE == my_exchange_node->node_type) { - /* I will not participate in the exchange - - * wait for signal from exchange process */ - extra_rank = my_exchange_node->rank_extra_source; - /* post wait */ - preposted_recv_frag = - mca_bcol_iboffload_get_preposted_recv_frag(iboffload, extra_rank, - coll_request->qp_index); - if (OPAL_UNLIKELY(NULL == preposted_recv_frag)) { - IBOFFLOAD_VERBOSE(10, ("Sending to 'extra' node: " - "Failing for getting prepost recv frag.\n")); - goto out_of_resources; - } - - wait_task = mca_bcol_iboffload_get_wait_task(iboffload, extra_rank, 1, - preposted_recv_frag, - coll_request->qp_index, NULL); - if (OPAL_UNLIKELY(NULL == wait_task)) { - IBOFFLOAD_VERBOSE(10, ("Sending to 'extra' node: " - "Failing for getting wait task.\n")); - goto out_of_resources; - } - - APPEND_TO_TASKLIST(mqe_ptr_to_set, wait_task, last_wait); - MCA_BCOL_IBOFFLOAD_APPEND_TASK_TO_LIST(coll_fragment->task_next, wait_task); - - } else { - /* I will participate in the exchange - - * send signal to extra process */ - extra_rank = my_exchange_node->rank_extra_source; - send_fragment = mca_bcol_iboffload_get_send_frag(coll_request, - extra_rank, coll_request->qp_index, 0, - 0, SBUF, MCA_BCOL_IBOFFLOAD_SEND_FRAG_DUMMY); - - send_task = mca_bcol_iboffload_get_send_task( - iboffload, extra_rank, - coll_request->qp_index, - send_fragment, coll_fragment, INLINE); - if (OPAL_UNLIKELY(NULL == send_task)) { - IBOFFLOAD_VERBOSE(10, ("Sending to 'extra' node: " - "Failing for getting send task.\n")); - goto out_of_resources; - } - - APPEND_TO_TASKLIST(mqe_ptr_to_set, send_task, last_send); - MCA_BCOL_IBOFFLOAD_APPEND_TASK_TO_LIST(coll_fragment->task_next, send_task); - } - } - - /* Fill in the the rest of the coll_fragment */ - IBOFFLOAD_VERBOSE(10, ("Fill in the the rest of the coll_fragment.\n")); - /* end of list */ - *mqe_ptr_to_set = NULL; - - /* finish initializing full message descriptor */ - coll_request->n_fragments = 1; - coll_request->n_frags_sent = 1; - - coll_request->n_frag_mpi_complete = 0; - coll_request->n_frag_net_complete = 0; - - coll_request->user_handle_freed = false; - - last_wait->flags |= MQE_WR_FLAG_SIGNAL; - coll_fragment->signal_task_wr_id = last_wait->wr_id; - last_wait->wr_id = (uint64_t) (uintptr_t) coll_fragment; - - /* post the mwr */ - if (MCA_BCOL_IBOFFLOAD_QP_SYNC != coll_request->qp_index) { - rc = mca_bcol_iboffload_post_mqe_tasks(iboffload, coll_fragment->to_post); - if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) { - IBOFFLOAD_VERBOSE(10, ("MQE task posting failing.\n")); - /* Note: need to clean up */ - return rc; - } - - MCA_BCOL_UPDATE_ORDER_COUNTER(&iboffload->super, coll_request->order_info); - } else { - /* Special flow for ML service barrier , only this function supposed to - post service requests */ - struct mqe_task *bad_mqe = NULL; - assert (MCA_BCOL_IBOFFLOAD_QP_SYNC == coll_request->qp_index ); - /* Post to special service MQ - 1 */ - rc = mqe_post_task(iboffload->mq[1], coll_fragment->to_post, &bad_mqe); - if (OPAL_UNLIKELY(0 != rc)) { - IBOFFLOAD_ERROR(("ibv_post_mqe failed on device (%s), errno says: %s," - " the return code is [%d]\n", - ibv_get_device_name(iboffload->device->dev.ib_dev), - strerror(errno), rc)); - return OMPI_ERROR; - } - } - - IBOFFLOAD_VERBOSE(10, ("Return success.\n")); - return OMPI_SUCCESS; - -out_of_resources: - /* Release all resources */ - IBOFFLOAD_VERBOSE(10, ("Barrier, adding collfrag to collfrag_pending.\n")); - return mca_bcol_iboffload_free_resources_and_move_to_pending(coll_fragment, iboffload); -} - -int mca_bcol_iboffload_barrier_intra_recursive_doubling_start( - struct mca_bcol_iboffload_module_t *iboffload, - struct mca_bcol_iboffload_collreq_t *coll_request) -{ - int rc; - - rc = mca_bcol_iboffload_rec_doubling_start_connections(iboffload); - if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) { - return rc; - } - - iboffload->barrier_algth = - mca_bcol_iboffload_barrier_intra_recursive_doubling; - return - mca_bcol_iboffload_barrier_intra_recursive_doubling(iboffload, coll_request); -} - -int mca_bcol_iboffload_nb_memory_service_barrier_start( - struct mca_bcol_iboffload_module_t *iboffload, - struct mca_bcol_iboffload_collreq_t *coll_request) -{ - int rc; - - rc = mca_bcol_iboffload_rec_doubling_start_connections(iboffload); - if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) { - return rc; - } - - iboffload->memsync_algth = - mca_bcol_iboffload_barrier_intra_recursive_doubling; - - return - mca_bcol_iboffload_barrier_intra_recursive_doubling - (iboffload, coll_request); -} - -int mca_bcol_iboffload_nb_memory_service_barrier_intra(bcol_function_args_t *input_args, - struct mca_bcol_base_function_t *const_args) -{ - - /* local variables */ - int rc; - mca_bcol_iboffload_collreq_t *coll_request; - mca_bcol_iboffload_module_t *iboffload = - (mca_bcol_iboffload_module_t *) const_args->bcol_module; - /* - * recursive doubling - */ - - - IBOFFLOAD_VERBOSE(10, ("Memory syncranization barrier was started\n")); - - /* init barrier collective request */ - rc = mca_bcol_iboffload_barrier_init(input_args, iboffload, NULL, &coll_request); - if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) { - IBOFFLOAD_ERROR(("Get error from mca_bcol_iboffload_barrier_init")); - return rc; - } - - /* set the qp index to special qp that is used only for synchronization */ - coll_request->qp_index = MCA_BCOL_IBOFFLOAD_QP_SYNC; - /* overwrite mq index to run over service setup */ - coll_request->first_collfrag.mq_index = SERVICE_MQ; - - /* start the barrier */ - rc = iboffload->memsync_algth(iboffload, coll_request); - if (OPAL_UNLIKELY(OMPI_ERROR == rc)) { - return rc; - } - - /* complete the barrier - progress releases full request descriptors */ - IBOFFLOAD_VERBOSE(10, ("Memory syncranization barrier was started\n")); - - /* done */ - return BCOL_FN_STARTED; -} - -/* Recursive K - ing*/ -static int recursive_knomial_start_connections(struct mca_bcol_iboffload_module_t *iboffload) -{ - netpatterns_k_exchange_node_t *my_exchange_node = - &iboffload->knomial_exchange_tree; - int k, i, n_exchanges = my_exchange_node->n_exchanges, - **exchanges = my_exchange_node->rank_exchanges, - n_extra_src = my_exchange_node->n_extra_sources, - tree_order = my_exchange_node->tree_order - 1, - rank_extra_src; - - mca_bcol_iboffload_endpoint_t *ep; - - iboffload->alg_task_consump[RECURSIVE_KNOMIAL_BARRIER_ALG] += 0; - - IBOFFLOAD_VERBOSE(10, ("\nMy sbgp rank (index) - %d, " - "num of endpoints = %d, iboffload module - %p" - " extra n %d, n_exchanges %d", - iboffload->ibnet->super.my_index, iboffload->num_endpoints, iboffload, - n_extra_src, n_exchanges)); - if (0 < n_extra_src) { - for (k = 0; k < n_extra_src; k++) { - iboffload->alg_task_consump[RECURSIVE_KNOMIAL_BARRIER_ALG] += 2; /* One send task one wait */ - rank_extra_src = my_exchange_node->rank_extra_sources_array[k]; - ep = iboffload->endpoints[rank_extra_src]; - if (iboffload->ibnet->super.my_index < ep->index) { - while(0 == (ep)->remote_zero_rdma_addr.addr) { - opal_progress(); - } - } else { - IBOFFLOAD_VERBOSE(10, ("Trying to connect - %d", ep->index)); - while (OMPI_SUCCESS != - check_endpoint_state(ep, NULL, NULL)) { - opal_progress(); - } - } - } - } - - for (i = 0; i < n_exchanges; ++i) { - for (k = 0; k < tree_order; k++) { - iboffload->alg_task_consump[RECURSIVE_KNOMIAL_BARRIER_ALG] += 2; /* One send task one wait */ - ep = iboffload->endpoints[exchanges[i][k]]; - - IBOFFLOAD_VERBOSE(10, ("Trying to connect - %d", ep->index)); - if (iboffload->ibnet->super.my_index < ep->index) { - while(0 == (ep)->remote_zero_rdma_addr.addr) { - opal_progress(); - } - } else { - while (OMPI_SUCCESS != - check_endpoint_state(ep, NULL, NULL)) { - opal_progress(); - } - } - } - } - - return OMPI_SUCCESS; -} - -static int mca_bcol_iboffload_barrier_intra_recursive_knomial( - mca_bcol_iboffload_module_t *iboffload, - struct mca_bcol_iboffload_collreq_t *coll_request) -{ - /* local variables */ - mca_bcol_iboffload_task_t *send_task = NULL, - *wait_task = NULL; - - struct mqe_task **mqe_ptr_to_set = NULL; - mca_bcol_iboffload_collfrag_t *coll_fragment = NULL; - - struct mqe_task *last_wait = NULL, /* we need ask from completion on last wait */ - *last_send = NULL; /* If it no wait, we need ask for completion on last send */ - - int rc, exchange, extra_rank, pair_rank, k; - - - mca_bcol_iboffload_frag_t *send_fragment = NULL, - *preposted_recv_frag = NULL; - - netpatterns_k_exchange_node_t *my_exchange_node = - &iboffload->knomial_exchange_tree; - IBOFFLOAD_VERBOSE(10, ("Calling for mca_bcol_iboffload_barrier_intra_recursive_knomial. Node type %d\n", my_exchange_node->node_type)); - - coll_fragment = (mca_bcol_iboffload_collfrag_t *) - opal_list_get_last(&coll_request->work_requests); - - /* Set mq credits */ - coll_fragment->mq_credits = iboffload->alg_task_consump[RECURSIVE_KNOMIAL_BARRIER_ALG]; - - if (OPAL_UNLIKELY(false == BCOL_IBOFFLOAD_MQ_HAVE_CREDITS( - iboffload, coll_fragment->mq_index, coll_fragment->mq_credits))) { - IBOFFLOAD_VERBOSE(10, ("There are not enough credits on MQ.\n")); - - goto out_of_resources; - } - - coll_fragment->alg = RECURSIVE_KNOMIAL_BARRIER_ALG; - - /* - * NOTE: need to generate template, if this will be a multiple fragment - * message. This way we can progress the collective w/o knowing it's - * type - actually, this is not the case for barrier, but just a note - * to remind us that we need to generalize this. - */ - - mqe_ptr_to_set = &coll_fragment->to_post; - - /* - * Fill in the communication pattern - */ - - /* - * If non power of 2, may need to wait for message from "extra" proc. - */ - - if (0 < my_exchange_node->n_extra_sources) { - if (EXCHANGE_NODE == my_exchange_node->node_type) { - /* I will participate in the exchange (of the algorithm) - - * wait for signal from extra process */ - for (k = 0; k < my_exchange_node->n_extra_sources; k++) { - extra_rank = my_exchange_node->rank_extra_sources_array[k]; - IBOFFLOAD_VERBOSE(10,("Exchange [ %d ] extra get %d", k, extra_rank)); - - preposted_recv_frag = mca_bcol_iboffload_get_preposted_recv_frag( - iboffload, extra_rank, coll_request->qp_index); - - if (OPAL_UNLIKELY(NULL == preposted_recv_frag)) { - IBOFFLOAD_VERBOSE(10, ("Non power of 2 case: " - "Failing for getting prepost recv frag.\n")); - goto out_of_resources; - } - - wait_task = mca_bcol_iboffload_get_wait_task(iboffload, - extra_rank, 1, preposted_recv_frag, coll_request->qp_index, NULL); - if (OPAL_UNLIKELY(NULL == wait_task)) { - IBOFFLOAD_VERBOSE(10, ("Non power of 2 case: " - "Failing for getting wait task.\n")); - goto out_of_resources; - } - - APPEND_TO_TASKLIST(mqe_ptr_to_set, wait_task, last_wait); - MCA_BCOL_IBOFFLOAD_APPEND_TASK_TO_LIST(coll_fragment->task_next, wait_task); - } - } else { - /* I will not participate in the exchange - so just "register" as here */ - extra_rank = my_exchange_node->rank_extra_sources_array[0]; - IBOFFLOAD_VERBOSE(10,("Send to proxy %d", extra_rank)); - /* send - no need to send any data, in-order delivery */ - send_fragment = mca_bcol_iboffload_get_send_frag(coll_request, - extra_rank, coll_request->qp_index, 0, - 0, SBUF, MCA_BCOL_IBOFFLOAD_SEND_FRAG_DUMMY); - - send_task = mca_bcol_iboffload_get_send_task(iboffload, extra_rank, - coll_request->qp_index, send_fragment, coll_fragment, INLINE); - if (OPAL_UNLIKELY(NULL == send_task)) { - IBOFFLOAD_VERBOSE(10, ("Non power of 2 case: " - "Failing for getting send task.\n")); - goto out_of_resources; - } - - APPEND_TO_TASKLIST(mqe_ptr_to_set, send_task, last_send); - MCA_BCOL_IBOFFLOAD_APPEND_TASK_TO_LIST(coll_fragment->task_next, send_task); - } - } - - /* loop over exchange send/recv pairs */ - for (exchange = 0; exchange < my_exchange_node->n_exchanges; ++exchange) { - for (k = 0; k < my_exchange_node->tree_order - 1; k++) { - /* rank of exchange partner */ - pair_rank = my_exchange_node->rank_exchanges[exchange][k]; - IBOFFLOAD_VERBOSE(10,("Exchange [ %d ,%d ] send to %d", exchange, k, pair_rank)); - /* post send */ - send_fragment = mca_bcol_iboffload_get_send_frag(coll_request, - pair_rank, coll_request->qp_index, 0, - 0, SBUF, MCA_BCOL_IBOFFLOAD_SEND_FRAG_DUMMY); - - send_task = mca_bcol_iboffload_get_send_task(iboffload, pair_rank, - coll_request->qp_index, - send_fragment, coll_fragment, INLINE); - if (OPAL_UNLIKELY(NULL == send_task)) { - IBOFFLOAD_VERBOSE(10, ("Exchaging: " - "Failing for getting send task.\n")); - goto out_of_resources; - } - - APPEND_TO_TASKLIST(mqe_ptr_to_set, send_task, last_send); - MCA_BCOL_IBOFFLOAD_APPEND_TASK_TO_LIST(coll_fragment->task_next, send_task); - } - - for (k = 0; k < my_exchange_node->tree_order - 1; k++) { - - pair_rank = my_exchange_node->rank_exchanges[exchange][k]; - IBOFFLOAD_VERBOSE(10,("Exchange [ %d ,%d ] recv %d", exchange, k, pair_rank)); - /* post wait */ - preposted_recv_frag = mca_bcol_iboffload_get_preposted_recv_frag( - iboffload, pair_rank, coll_request->qp_index); - if (OPAL_UNLIKELY(NULL == preposted_recv_frag)) { - IBOFFLOAD_VERBOSE(10, ("Exchaging: " - "Failing for getting prepost recv frag.\n")); - goto out_of_resources; - } - - wait_task = mca_bcol_iboffload_get_wait_task(iboffload, pair_rank, 1, - preposted_recv_frag, coll_request->qp_index, NULL); - if (OPAL_UNLIKELY(NULL == wait_task)) { - IBOFFLOAD_VERBOSE(10, ("Exchaging: " - "Failing for getting wait task.\n")); - goto out_of_resources; - } - - APPEND_TO_TASKLIST(mqe_ptr_to_set, wait_task, last_wait); - MCA_BCOL_IBOFFLOAD_APPEND_TASK_TO_LIST(coll_fragment->task_next, wait_task); - } - } - - /* if non power of 2, may need to send message to "extra" proc */ - if (0 < my_exchange_node->n_extra_sources) { - if (EXTRA_NODE == my_exchange_node->node_type) { - /* I will not participate in the exchange - - * wait for signal from exchange process */ - extra_rank = my_exchange_node->rank_extra_sources_array[0]; - IBOFFLOAD_VERBOSE(10,("Wait from proxy %d", extra_rank)); - /* post wait */ - preposted_recv_frag = - mca_bcol_iboffload_get_preposted_recv_frag(iboffload, extra_rank, - coll_request->qp_index); - if (OPAL_UNLIKELY(NULL == preposted_recv_frag)) { - IBOFFLOAD_VERBOSE(10, ("Sending to 'extra' node: " - "Failing for getting prepost recv frag.\n")); - goto out_of_resources; - } - - wait_task = mca_bcol_iboffload_get_wait_task(iboffload, extra_rank, 1, - preposted_recv_frag, - coll_request->qp_index, NULL); - if (OPAL_UNLIKELY(NULL == wait_task)) { - IBOFFLOAD_VERBOSE(10, ("Sending to 'extra' node: " - "Failing for getting wait task.\n")); - goto out_of_resources; - } - - APPEND_TO_TASKLIST(mqe_ptr_to_set, wait_task, last_wait); - MCA_BCOL_IBOFFLOAD_APPEND_TASK_TO_LIST(coll_fragment->task_next, wait_task); - - } else { - /* I will participate in the exchange - - * send signal to extra process */ - for (k = 0; k < my_exchange_node->n_extra_sources; k++) { - extra_rank = my_exchange_node->rank_extra_sources_array[k]; - IBOFFLOAD_VERBOSE(10,("Exchange [ %d ] extra release %d", k, extra_rank)); - - send_fragment = mca_bcol_iboffload_get_send_frag(coll_request, - extra_rank, coll_request->qp_index, 0, - 0, SBUF, MCA_BCOL_IBOFFLOAD_SEND_FRAG_DUMMY); - - send_task = mca_bcol_iboffload_get_send_task( - iboffload, extra_rank, - coll_request->qp_index, - send_fragment, coll_fragment, INLINE); - if (OPAL_UNLIKELY(NULL == send_task)) { - IBOFFLOAD_VERBOSE(10, ("Sending to 'extra' node: " - "Failing for getting send task.\n")); - goto out_of_resources; - } - - APPEND_TO_TASKLIST(mqe_ptr_to_set, send_task, last_send); - MCA_BCOL_IBOFFLOAD_APPEND_TASK_TO_LIST(coll_fragment->task_next, send_task); - } - } - } - - /* Fill in the the rest of the coll_fragment */ - IBOFFLOAD_VERBOSE(10, ("Fill in the the rest of the coll_fragment.\n")); - /* end of list */ - *mqe_ptr_to_set = NULL; - - /* finish initializing full message descriptor */ - coll_request->n_fragments = 1; - coll_request->n_frags_sent = 1; - - coll_request->n_frag_mpi_complete = 0; - coll_request->n_frag_net_complete = 0; - - coll_request->user_handle_freed = false; - - last_wait->flags |= MQE_WR_FLAG_SIGNAL; - coll_fragment->signal_task_wr_id = last_wait->wr_id; - last_wait->wr_id = (uint64_t) (uintptr_t) coll_fragment; - - /* post the mwr */ - if (MCA_BCOL_IBOFFLOAD_QP_SYNC != coll_request->qp_index) { - rc = mca_bcol_iboffload_post_mqe_tasks(iboffload, coll_fragment->to_post); - if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) { - IBOFFLOAD_VERBOSE(10, ("MQE task posting failing.\n")); - /* Note: need to clean up */ - return rc; - } - - MCA_BCOL_UPDATE_ORDER_COUNTER(&iboffload->super, coll_request->order_info); - } else { - /* Special flow for ML service barrier , only this function supposed to - post service requests */ - struct mqe_task *bad_mqe = NULL; - assert (MCA_BCOL_IBOFFLOAD_QP_SYNC == coll_request->qp_index ); - /* Post to special service MQ - 1 */ - rc = mqe_post_task(iboffload->mq[1], coll_fragment->to_post, &bad_mqe); - if (OPAL_UNLIKELY(0 != rc)) { - IBOFFLOAD_ERROR(("ibv_post_mqe failed on device (%s), errno says: %s," - " the return code is [%d]\n", - ibv_get_device_name(iboffload->device->dev.ib_dev), - strerror(errno), rc)); - return OMPI_ERROR; - } - } - - IBOFFLOAD_VERBOSE(10, ("Return success.\n")); - return OMPI_SUCCESS; - -out_of_resources: - /* Release all resources */ - IBOFFLOAD_VERBOSE(10, ("Barrier, adding collfrag to collfrag_pending.\n")); - return mca_bcol_iboffload_free_resources_and_move_to_pending(coll_fragment, iboffload); -} - -int mca_bcol_iboffload_barrier_intra_recursive_knomial_start( - struct mca_bcol_iboffload_module_t *iboffload, - struct mca_bcol_iboffload_collreq_t *coll_request) -{ - int rc; - - rc = recursive_knomial_start_connections(iboffload); - if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) { - return rc; - } - - iboffload->barrier_algth = - mca_bcol_iboffload_barrier_intra_recursive_knomial; - return - mca_bcol_iboffload_barrier_intra_recursive_knomial(iboffload, coll_request); -} - -int mca_bcol_iboffload_rec_doubling_start_connections(mca_bcol_iboffload_module_t *iboffload) -{ - netpatterns_pair_exchange_node_t *my_exchange_node = - &iboffload->recursive_doubling_tree; - - int i, n_exchanges = my_exchange_node->n_exchanges, - *exchanges = my_exchange_node->rank_exchanges, - n_extra_src = my_exchange_node->n_extra_sources, - rank_extra_src = my_exchange_node->rank_extra_source; - - mca_bcol_iboffload_endpoint_t *ep; - - IBOFFLOAD_VERBOSE(10, ("\nMy sbgp rank (index) - %d, " - "num of endpoints = %d, iboffload module - %p\n", - iboffload->ibnet->super.my_index, iboffload->num_endpoints, iboffload)); - if (0 < n_extra_src) { - iboffload->alg_task_consump[RECURSIVE_DOUBLING_BARRIER_ALG] += 2; /* One send task one wait */ - ep = iboffload->endpoints[rank_extra_src]; - - if (iboffload->ibnet->super.my_index < ep->index) { - while(0 == (ep)->remote_zero_rdma_addr.addr) { - opal_progress(); - } - } else { - IBOFFLOAD_VERBOSE(10, ("Trying to connect - %d", ep->index)); - while (OMPI_SUCCESS != - check_endpoint_state(ep, NULL, NULL)) { - opal_progress(); - } - } - } - - for (i = 0; i < n_exchanges; ++i) { - iboffload->alg_task_consump[RECURSIVE_DOUBLING_BARRIER_ALG] += 2; /* One send task one wait */ - ep = iboffload->endpoints[exchanges[i]]; - - if (iboffload->ibnet->super.my_index < ep->index) { - while(0 == (ep)->remote_zero_rdma_addr.addr) { - opal_progress(); - } - } else { - IBOFFLOAD_VERBOSE(10, ("Trying to connect - %d", ep->index)); - while (OMPI_SUCCESS != - check_endpoint_state(ep, NULL, NULL)) { - opal_progress(); - } - } - } - - return OMPI_SUCCESS; -} - -static int mca_bcol_iboffload_barrier_init( - bcol_function_args_t *input_args, - mca_bcol_iboffload_module_t *iboffload, - collective_message_completion_callback_function cb_fn, - struct mca_bcol_iboffload_collreq_t **coll_request) -{ - ompi_free_list_item_t *item; - mca_bcol_iboffload_collfrag_t *coll_fragment; - - mca_bcol_iboffload_component_t *cm = &mca_bcol_iboffload_component; - - IBOFFLOAD_VERBOSE(10, ("Calling for mca_bcol_iboffload_barrier_init")); - - OMPI_FREE_LIST_WAIT_MT(&cm->collreqs_free, item); - if (OPAL_UNLIKELY(NULL == item)) { - IBOFFLOAD_VERBOSE(10, ("Failing for coll request free list waiting.\n")); - return OMPI_ERR_OUT_OF_RESOURCE; - } - - (*coll_request) = (mca_bcol_iboffload_collreq_t *) item; - (*coll_request)->progress_fn = iboffload->barrier_algth; - - /* - * For usual barrier it is null. For memory - * service barrier we need some work to do - */ - (*coll_request)->completion_cb_fn = cb_fn; - (*coll_request)->order_info = &input_args->order_info; - - (*coll_request)->module = iboffload; - (*coll_request)->ml_buffer_index = input_args->buffer_index; - (*coll_request)->buffer_info[SBUF].offset = 0; - (*coll_request)->buffer_info[RBUF].offset = 0; - (*coll_request)->qp_index = MCA_BCOL_IBOFFLOAD_QP_BARRIER; - - input_args->bcol_opaque_data = (void *) (*coll_request); - - /* - * setup collective work request - */ - - /* get collective frag */ - coll_fragment = &(*coll_request)->first_collfrag; - mca_bcol_iboffload_collfrag_init(coll_fragment); - - coll_fragment->mq_index = COLL_MQ; - - /* set pointers for (coll frag) <-> (coll full request) */ - MCA_BCOL_IBOFFLOAD_SET_COLL_REQ_LINKS(*coll_request, coll_fragment); - - return OMPI_SUCCESS; -} - -/************************************************************************ - ************************ New style Barrier ***************************** - ***********************************************************************/ - -static int mca_bcol_iboffload_new_style_barrier_progress( - bcol_function_args_t *input_args, - struct mca_bcol_base_function_t *const_args) -{ - mca_bcol_iboffload_collreq_t *coll_request = - (mca_bcol_iboffload_collreq_t *) - input_args->bcol_opaque_data; - - if (BCOL_IS_COMPLETED(coll_request)) { - coll_request->user_handle_freed = true; - if (COLLREQ_IS_DONE(coll_request)) { - IBOFFLOAD_VERBOSE(10, ("Coll request already done.\n")); - RELEASE_COLLREQ(coll_request); - } - - IBOFFLOAD_VERBOSE(10, ("Barrier already done.\n")); - return BCOL_FN_COMPLETE; - } - - return BCOL_FN_STARTED; -} - -static int mca_bcol_iboffload_new_style_barrier_intra( - bcol_function_args_t *input_args, - struct mca_bcol_base_function_t *const_args) -{ - /* local variables */ - int rc; - mca_bcol_iboffload_collreq_t *coll_request; - mca_bcol_iboffload_module_t *iboffload = - (mca_bcol_iboffload_module_t *) const_args->bcol_module; - - /* check for ordering */ - MCA_BCOL_CHECK_ORDER(const_args->bcol_module, input_args); - - /* - * recursive doubling - */ - - - IBOFFLOAD_VERBOSE(10, ("Barrier starts.\n")); - - /* init barrier collective reqeust */ - rc = mca_bcol_iboffload_barrier_init(input_args, iboffload, NULL, &coll_request); - if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) { - IBOFFLOAD_ERROR(("Get error from mca_bcol_iboffload_barrier_init")); - return rc; - } - - /* start the barrier */ - rc = iboffload->barrier_algth(iboffload, coll_request); - if (OPAL_UNLIKELY(OMPI_ERROR == rc)) { - return BCOL_FN_NOT_STARTED; - } - - /* done */ - return BCOL_FN_STARTED; -} - -int mca_bcol_iboffload_barrier_register(mca_bcol_base_module_t *super) -{ - mca_bcol_base_coll_fn_comm_attributes_t comm_attribs; - mca_bcol_base_coll_fn_invoke_attributes_t inv_attribs; - - IBOFFLOAD_VERBOSE(10, ("Register iboffload Barrier.\n")); - - comm_attribs.bcoll_type = BCOL_BARRIER; - - comm_attribs.comm_size_min = 0; - comm_attribs.comm_size_max = 1024 * 1024; - comm_attribs.waiting_semantics = NON_BLOCKING; - - inv_attribs.bcol_msg_min = 0; - inv_attribs.bcol_msg_max = 20000; /* range 1 */ - - inv_attribs.datatype_bitmap = 0xffffffff; - inv_attribs.op_types_bitmap = 0xffffffff; - - comm_attribs.data_src = DATA_SRC_KNOWN; - - mca_bcol_base_set_attributes(super, - &comm_attribs, &inv_attribs, - mca_bcol_iboffload_new_style_barrier_intra, - mca_bcol_iboffload_new_style_barrier_progress); - - return OMPI_SUCCESS; -} - -int mca_bcol_iboffload_memsync_register(mca_bcol_base_module_t *super) -{ - mca_bcol_base_coll_fn_comm_attributes_t comm_attribs; - mca_bcol_base_coll_fn_invoke_attributes_t inv_attribs; - - IBOFFLOAD_VERBOSE(10, ("Register sync function\n")); - - comm_attribs.bcoll_type = BCOL_SYNC; - - comm_attribs.comm_size_min = 0; - comm_attribs.comm_size_max = 1024 * 1024; - comm_attribs.waiting_semantics = NON_BLOCKING; - - inv_attribs.bcol_msg_min = 0; - inv_attribs.bcol_msg_max = 20000; /* range 1 */ - - inv_attribs.datatype_bitmap = 0xffffffff; - inv_attribs.op_types_bitmap = 0xffffffff; - - comm_attribs.data_src = DATA_SRC_KNOWN; - - mca_bcol_base_set_attributes(super, - &comm_attribs, &inv_attribs, - mca_bcol_iboffload_nb_memory_service_barrier_intra, - mca_bcol_iboffload_new_style_barrier_progress); - - return OMPI_SUCCESS; -} diff --git a/ompi/mca/bcol/iboffload/bcol_iboffload_bcast.c b/ompi/mca/bcol/iboffload/bcol_iboffload_bcast.c deleted file mode 100644 index 9b5b216e65b..00000000000 --- a/ompi/mca/bcol/iboffload/bcol_iboffload_bcast.c +++ /dev/null @@ -1,1065 +0,0 @@ -/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ -/* - * Copyright (c) 2009-2012 Oak Ridge National Laboratory. All rights reserved. - * Copyright (c) 2009-2012 Mellanox Technologies. All rights reserved. - * Copyright (c) 2013 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2014 Los Alamos National Security, LLC. All rights - * reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#include "ompi_config.h" - -#include -#include -#include -#include -#include -#include "opal_stdint.h" - -#include "bcol_iboffload.h" -#include "bcol_iboffload_bcast.h" -#include "bcol_iboffload_frag.h" -#include "bcol_iboffload_task.h" -#include "bcol_iboffload_collreq.h" -#include "bcol_iboffload_collfrag.h" -#include "bcol_iboffload_endpoint.h" - -#include "opal/include/opal/types.h" - -static int mca_bcol_iboffload_bcast_init( - bcol_function_args_t *fn_arguments, - mca_bcol_iboffload_module_t *iboffload_module, - mca_bcol_iboffload_collreq_t **coll_request, - bool if_bcol_last, int mq_credits, - collective_message_progress_function progress_fn) -{ - ompi_free_list_item_t *item; - mca_bcol_iboffload_collfrag_t *coll_fragment; - mca_bcol_iboffload_component_t *cm = &mca_bcol_iboffload_component; - int my_group_index = iboffload_module->super.sbgp_partner_module->my_index; - - OMPI_FREE_LIST_WAIT_MT(&cm->collreqs_free, item); - if (OPAL_UNLIKELY(NULL == item)) { - IBOFFLOAD_ERROR(("Wait for free list failed.\n")); - return OMPI_ERR_OUT_OF_RESOURCE; - } - /* setup call request */ - (*coll_request) = (mca_bcol_iboffload_collreq_t *) item; - - (*coll_request)->n_fragments = 0; - (*coll_request)->n_frags_sent = 0; - (*coll_request)->n_frag_mpi_complete = 0; - (*coll_request)->n_frag_net_complete = 0; - (*coll_request)->if_bcol_last = if_bcol_last; - (*coll_request)->ml_buffer_index = fn_arguments->buffer_index; - (*coll_request)->completion_cb_fn = NULL; - (*coll_request)->buffer_info[SBUF].buf = (void *) ( - (unsigned char *)fn_arguments->sbuf + - fn_arguments->sbuf_offset); - (*coll_request)->buffer_info[SBUF].offset = fn_arguments->sbuf_offset; - (*coll_request)->buffer_info[RBUF].offset = fn_arguments->rbuf_offset; - - (*coll_request)->dtype = fn_arguments->dtype; - (*coll_request)->count = fn_arguments->count; - (*coll_request)->module = iboffload_module; - /* TODO Pasha: we need it for pending quque. Set it later. */ - (*coll_request)->progress_fn = progress_fn; - /* TODO Pasha: fix it later */ - (*coll_request)->qp_index = MCA_BCOL_IBOFFLOAD_QP_REGULAR; - - (*coll_request)->order_info = &fn_arguments->order_info; - - coll_fragment = &((*coll_request)->first_collfrag); - mca_bcol_iboffload_collfrag_init(coll_fragment); - - /** Vasily ????? */ - /* mq_credits = (*coll_request)->total_tasks_num; */ - coll_fragment->mq_credits = mq_credits; - coll_fragment->mq_index = COLL_MQ; - /* Pasha: just set it to zero */ - coll_fragment->last_wait_num = 0; - coll_fragment->alg = -2; /* used only for debug */ - /* - if (my_rank == algthm_ptr->root) { - coll_fragment->last_wait_num = 0; - } else { - coll_fragment->last_wait_num = algth_lst->last_wait_num; - } - */ - /* Pasha: we have nothing to unpack */ - coll_fragment->unpack_size = 0; - /* coll_fragment->unpack_size = pack_len; */ - /* coll_fragment->alg = RECURSIVE_DOUBLING_TREE_BCAST; */ - - /* set pointers for (coll frag) <-> (coll full request) */ - (*coll_request)->user_handle_freed = false; - - fn_arguments->bcol_opaque_data = (void *) (*coll_request); - - if (true == fn_arguments->root_flag) { - (*coll_request)->root = my_group_index; - } else { - (*coll_request)->root = fn_arguments->root_route->rank; - } - - MCA_BCOL_IBOFFLOAD_SET_COLL_REQ_LINKS((*coll_request), coll_fragment); - return OMPI_SUCCESS; -} -static inline __opal_attribute_always_inline__ int -binomial_scatter_smsg( - mca_bcol_iboffload_module_t *iboffload_module, - mca_bcol_iboffload_collfrag_t *coll_fragment, - struct mqe_task **last_send, - int radix_mask_pow, - uint32_t my_group_index, - size_t send_size - ) -{ - int rc, dst; - int radix_mask = radix_mask_pow >= 0 ? 1 << radix_mask_pow : 0; - - while(radix_mask > 0) { - /* For each level of tree, do sends */ - dst = my_group_index ^ radix_mask; - rc = mca_bcol_iboffload_send_small_buff_setup( - last_send, send_size, dst, - iboffload_module, coll_fragment); - if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) { - IBOFFLOAD_VERBOSE(10, ("Failed to isend data")); - return rc; - } - - radix_mask >>= 1; - } - - return OMPI_SUCCESS; -} - -#define BINOMIAL_SMALL_SCATTER( \ - iboffload_module, coll_fragment, \ - last_wait, last_send, \ - distance, \ - my_group_index, \ - segment_size \ - ) \ -do { \ - int rc = OMPI_SUCCESS; \ - int dst; \ - int send_size; \ - int dst_boundary_rank; \ - int radix_mask_pow = distance; \ - int radix_mask = (distance) >= 0 ? 1 << (distance) : 0; \ - IBOFFLOAD_VERBOSE(10, ("BCAST SCATTER %d %d", radix_mask, distance)); \ - \ - while(radix_mask > 0) { \ - /* For each level of tree, do sends */ \ - dst = my_group_index ^ radix_mask; \ - dst_boundary_rank = dst & ((~(int)0) << (radix_mask_pow)); \ - \ - IBOFFLOAD_VERBOSE(10, ("Scatter data to %d , len %d offset %d", dst, send_size, send_offset)); \ - \ - rc = mca_bcol_iboffload_send_small_buff_setup( \ - &last_send, send_size, dst, \ - iboffload_module, coll_fragment); \ - if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) { \ - IBOFFLOAD_VERBOSE(10, ("Failed to isend data")); \ - return rc; \ - } \ - radix_mask >>= 1; \ - /* radix_mask_pow--; */ \ - } \ -} while(0) - - -int mca_bcol_iboffload_small_msg_bcast_progress( - bcol_function_args_t *input_args, - struct mca_bcol_base_function_t *const_args) -{ - mca_bcol_iboffload_collreq_t *coll_request = - (mca_bcol_iboffload_collreq_t *) - input_args->bcol_opaque_data; - - IBOFFLOAD_VERBOSE(10, ("Run progress.\n")); - - /* We should send the data to our children in the tree before - the upper layer will start with buffers recycling */ - if (BCOL_AND_NET_ARE_COMPLETED(coll_request)) { - coll_request->user_handle_freed = true; - if (COLLREQ_IS_DONE(coll_request)) { - IBOFFLOAD_VERBOSE(10, ("Coll request already done.\n")); - RELEASE_COLLREQ(coll_request); - } - - IBOFFLOAD_VERBOSE(10, ("New bcast done !!!")); - return BCOL_FN_COMPLETE; - } - - return BCOL_FN_STARTED; -} - -static int mca_bcol_iboffload_small_msg_bcast_exec(mca_bcol_iboffload_module_t *iboffload_module, - mca_bcol_iboffload_collreq_t *coll_request) -{ - netpatterns_pair_exchange_node_t *recursive_doubling_tree = - &iboffload_module->recursive_doubling_tree; - - int rc, - distance_mask_pow , dst, - group_src, power_of_2_distance; - - uint32_t pack_len; - int my_group_index = iboffload_module->super.sbgp_partner_module->my_index; - - struct mqe_task *last_send = NULL, - *last_wait = NULL; - mca_bcol_iboffload_collfrag_t *coll_fragment = &coll_request->first_collfrag; - - IBOFFLOAD_VERBOSE(10,("Entering small msg iboffload bcast")); - - if (OPAL_UNLIKELY(!iboffload_module->connection_status[RECURSIVE_DOUBLING_TREE_BCAST])) { - IBOFFLOAD_VERBOSE(10,("Bcast open new connection ")); - bcol_iboffload_setup_binomial_connection(iboffload_module); - } - - pack_len = coll_request->count * coll_request->dtype->super.size; - IBOFFLOAD_VERBOSE(10,("My packet length %d pack_len frag_count %d dtype size %d ", - pack_len, - coll_request->count, - coll_request->dtype->super.size)); - - /* it is estimated mq consumption... */ - if (OPAL_UNLIKELY(false == BCOL_IBOFFLOAD_MQ_HAVE_CREDITS( - iboffload_module, coll_fragment->mq_index, coll_fragment->mq_credits))) { - IBOFFLOAD_VERBOSE(10, ("There are not enough credits on MQ.\n")); - goto out_of_resources; - } - - coll_fragment->tail_next = &coll_fragment->to_post; - coll_request->buffer_info[SBUF].lkey = iboffload_module->rdma_block.ib_info.lkey; - - if (coll_request->root == my_group_index) { - IBOFFLOAD_VERBOSE(10, ("I'm root of the data")); - - /* Send data to the extra peer */ - if (recursive_doubling_tree->n_extra_sources > 0) { - /* send the all data to your extra peer */ - dst = recursive_doubling_tree->rank_extra_source; - IBOFFLOAD_VERBOSE(10,("Sending the dat to Dst %d",dst)); - rc = mca_bcol_iboffload_send_small_buff_setup( - &last_send, pack_len, dst, - iboffload_module, coll_fragment); - if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) { - IBOFFLOAD_VERBOSE(10, ("Failed to" - " mca_bcol_iboffload_send_large_buff_setup")); - goto out_of_resources; - } - } - - distance_mask_pow = - iboffload_module->power_of_2 - 1; - - rc = binomial_scatter_smsg(iboffload_module, coll_fragment, - &last_send, distance_mask_pow, - my_group_index, pack_len); - if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) { - IBOFFLOAD_VERBOSE(10, ("Failed to binomial_scatter_smsg")); - goto out_of_resources; - } - - goto finalize; - } - - /* prepare and post recv operation */ - group_src = bcol_iboffload_binomial_root_to_src(coll_request->root, - my_group_index, iboffload_module->power_of_2_ranks, - iboffload_module->group_size, &power_of_2_distance); - assert(group_src >= 0); - - if (0 > power_of_2_distance) { - /* the rank is virtual root for this group, receive the data - and scatter gather as root */ - IBOFFLOAD_VERBOSE(10,("Virtual root distance_mask_pow %d ",iboffload_module->power_of_2)); - distance_mask_pow = iboffload_module->power_of_2 - 1; - } else { - distance_mask_pow = power_of_2_distance - 1; - } - - IBOFFLOAD_VERBOSE(10, ("Bcast, receive data from %d[%d], count %d, offset %d", - group_src)); - - rc = mca_bcol_iboffload_recv_small_buff_setup(&last_wait, - pack_len, group_src, - iboffload_module, coll_fragment); - if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) { - IBOFFLOAD_VERBOSE(10, ("Failed to setup data receive")); - goto out_of_resources; - } - - rc = binomial_scatter_smsg(iboffload_module, coll_fragment, - &last_send, distance_mask_pow, - my_group_index, pack_len); - if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) { - IBOFFLOAD_VERBOSE(10, ("Failed to binomial_scatter_smsg")); - goto out_of_resources; - } - - if (recursive_doubling_tree->n_extra_sources > 0 && - iboffload_module->power_of_2 - 1 != distance_mask_pow) { -/* - - if ((recursive_doubling_tree->n_extra_sources > 0) && - ((my_group_index + iboffload_module->power_of_2_ranks ) < - iboffload_module->group_size) ) { - */ - dst = recursive_doubling_tree->rank_extra_source; - /* - dst = my_group_index + iboffload_module->power_of_2_ranks; - */ - - rc = mca_bcol_iboffload_send_small_buff_setup( - &last_send, pack_len, dst, - iboffload_module, coll_fragment); - if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) { - IBOFFLOAD_VERBOSE(10, ("Failed to" - " mca_bcol_iboffload_send_small_buff_setup")); - goto out_of_resources; - } - } - -finalize: - /* end of list */ - *coll_fragment->tail_next = NULL; - - /* finish initializing full message descriptor */ - (coll_request)->n_fragments += 1; - (coll_request)->n_frags_sent += 1; - - if (NULL != last_wait) { - last_wait->flags |= MQE_WR_FLAG_SIGNAL; - coll_fragment->signal_task_wr_id = last_wait->wr_id; - last_wait->wr_id = (uint64_t) (uintptr_t) coll_fragment; - } else { - last_send->flags |= MQE_WR_FLAG_SIGNAL; - coll_fragment->signal_task_wr_id = last_send->wr_id; - last_send->wr_id = (uint64_t) (uintptr_t) coll_fragment; - } - /* post the mwr */ - rc = mca_bcol_iboffload_post_mqe_tasks(iboffload_module, coll_fragment->to_post); - if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) { - IBOFFLOAD_VERBOSE(10, ("MQE task posting failing.\n")); - /* Note: need to clean up */ - return rc; - } - - MCA_BCOL_UPDATE_ORDER_COUNTER(&iboffload_module->super, coll_request->order_info); - - IBOFFLOAD_VERBOSE(10, ("Return success.\n")); - return BCOL_FN_STARTED; - -out_of_resources: - /* Release all resources */ - IBOFFLOAD_VERBOSE(10, ("Barrier, adding collfrag to collfrag_pending.\n")); - rc = - mca_bcol_iboffload_free_resources_and_move_to_pending(coll_fragment, iboffload_module); - return (OMPI_SUCCESS != rc) ? BCOL_FN_NOT_STARTED : BCOL_FN_STARTED; -} - -int mca_bcol_iboffload_small_msg_bcast_intra(bcol_function_args_t *fn_arguments, - struct mca_bcol_base_function_t *const_args) -{ - mca_bcol_iboffload_module_t *iboffload_module = - (mca_bcol_iboffload_module_t *) const_args->bcol_module; - - int rc; - int mq_credits = iboffload_module->power_of_2 + 2; - bool if_bcol_last = BCOL_IBOFFLOAD_IS_LAST_CALL(const_args); - mca_bcol_iboffload_collreq_t *coll_request; - - MCA_BCOL_CHECK_ORDER(const_args->bcol_module, fn_arguments); - - rc = mca_bcol_iboffload_bcast_init(fn_arguments, iboffload_module, - &coll_request, if_bcol_last, mq_credits, - mca_bcol_iboffload_small_msg_bcast_exec); - if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) { - return rc; - } - - rc = coll_request->progress_fn(iboffload_module, coll_request); - - IBOFFLOAD_VERBOSE(10, ("mca_bcol_iboffload_small_msg_bcast_intra was started [%d]\n", rc)); - return rc; -} - -static int mca_bcol_iboffload_small_msg_bcast_extra_exec(mca_bcol_iboffload_module_t *iboffload_module, - mca_bcol_iboffload_collreq_t *coll_request) -{ - netpatterns_pair_exchange_node_t *recursive_doubling_tree = - &iboffload_module->recursive_doubling_tree; - - int rc, - dst; - int my_group_index = iboffload_module->super.sbgp_partner_module->my_index; - uint32_t pack_len; - - struct mqe_task *last_send = NULL, - *last_wait = NULL; - mca_bcol_iboffload_collfrag_t *coll_fragment = &coll_request->first_collfrag; - - IBOFFLOAD_VERBOSE(10,("Entering small msg extra iboffload bcast")); - - if (OPAL_UNLIKELY(!iboffload_module->connection_status[RECURSIVE_DOUBLING_TREE_BCAST])) { - IBOFFLOAD_VERBOSE(10,("Bcast open new connection ")); - bcol_iboffload_setup_binomial_connection(iboffload_module); - } - - - pack_len = coll_request->count * coll_request->dtype->super.size; - coll_request->buffer_info[SBUF].lkey = iboffload_module->rdma_block.ib_info.lkey; - - IBOFFLOAD_VERBOSE(10,("My packet length %d pack_len frag_count %d dtype size %d ", - pack_len, - coll_request->count, - coll_request->dtype->super.size)); - - /* it is estimated mq consumption... */ - if (OPAL_UNLIKELY(false == BCOL_IBOFFLOAD_MQ_HAVE_CREDITS( - iboffload_module, - coll_fragment->mq_index, coll_fragment->mq_credits))) { - IBOFFLOAD_VERBOSE(10, ("There are not enough credits on MQ.\n")); - goto out_of_resources; - } - - coll_fragment->tail_next = &coll_fragment->to_post; - - - if (coll_request->root == my_group_index) { - IBOFFLOAD_VERBOSE(10, ("I'm root of the data %d", iboffload_module->power_of_2)); - /* send the all data to your extra peer */ - - dst = recursive_doubling_tree->rank_extra_source; - IBOFFLOAD_VERBOSE(10,("Im extra root sending data to %d \n",dst)); - rc = mca_bcol_iboffload_send_small_buff_setup( - &last_send, pack_len, dst, - iboffload_module, coll_fragment); - if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) { - IBOFFLOAD_VERBOSE(10, ("Failed to" - " mca_bcol_iboffload_send_small_buff_setup")); - goto out_of_resources; - } - } else { - /* Not root case */ - dst = recursive_doubling_tree->rank_extra_source; - rc = mca_bcol_iboffload_recv_small_buff_setup(&last_wait, - pack_len, dst, - iboffload_module, coll_fragment); - - if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) { - IBOFFLOAD_VERBOSE(10, ("Failed to setup data receive")); - return OMPI_ERROR; - } - } - - /* end of list */ - *coll_fragment->tail_next = NULL; - - /* finish initializing full message descriptor */ - (coll_request)->n_fragments = 1; - (coll_request)->n_frags_sent = 1; - - if (NULL != last_wait) { - last_wait->flags |= MQE_WR_FLAG_SIGNAL; - coll_fragment->signal_task_wr_id = last_wait->wr_id; - last_wait->wr_id = (uint64_t) (uintptr_t) coll_fragment; - } else { - last_send->flags |= MQE_WR_FLAG_SIGNAL; - coll_fragment->signal_task_wr_id = last_send->wr_id; - last_send->wr_id = (uint64_t) (uintptr_t) coll_fragment; - } - /* post the mwr */ - rc = mca_bcol_iboffload_post_mqe_tasks(iboffload_module, coll_fragment->to_post); - if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) { - IBOFFLOAD_VERBOSE(10, ("MQE task posting failing.\n")); - /* Note: need to clean up */ - return rc; - } - - MCA_BCOL_UPDATE_ORDER_COUNTER(&iboffload_module->super, coll_request->order_info); - - IBOFFLOAD_VERBOSE(10, ("Return success.\n")); - return BCOL_FN_STARTED; - -out_of_resources: - /* Release all resources */ - IBOFFLOAD_VERBOSE(10, ("Barrier, adding collfrag to collfrag_pending.\n")); - rc = - mca_bcol_iboffload_free_resources_and_move_to_pending(coll_fragment, iboffload_module); - return (OMPI_SUCCESS != rc) ? BCOL_FN_NOT_STARTED : BCOL_FN_STARTED; -} - -int mca_bcol_iboffload_small_msg_bcast_extra_intra(bcol_function_args_t *fn_arguments, - struct mca_bcol_base_function_t *const_args) -{ - mca_bcol_iboffload_module_t *iboffload_module = - (mca_bcol_iboffload_module_t *)const_args->bcol_module; - - int rc; - int mq_credits = 2; - bool if_bcol_last = BCOL_IBOFFLOAD_IS_LAST_CALL(const_args); - mca_bcol_iboffload_collreq_t *coll_request; - - MCA_BCOL_CHECK_ORDER(const_args->bcol_module, fn_arguments); - - rc = mca_bcol_iboffload_bcast_init(fn_arguments, iboffload_module, - &coll_request, if_bcol_last, mq_credits, - mca_bcol_iboffload_small_msg_bcast_extra_exec); - if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) { - return rc; - } - - rc = coll_request->progress_fn(iboffload_module, coll_request); - - IBOFFLOAD_VERBOSE(10, ("mca_bcol_iboffload_small_msg_bcast_extra_exec was started [%d]\n", rc)); - return rc; -} - -/* Large message scatter-allgather with zero copy */ -int mca_bcol_iboffload_zero_copy_progress(bcol_function_args_t *fn_arguments, - struct mca_bcol_base_function_t *const_args) -{ - int i; - mca_bcol_iboffload_collreq_t *coll_request = - (mca_bcol_iboffload_collreq_t *)fn_arguments->bcol_opaque_data; - - /* IBOFFLOAD_VERBOSE(10, ("Run general progress. %d == %d * %d == %d", - coll_request->n_frag_mpi_complete, coll_request->n_fragments, - coll_request->n_frag_net_complete, coll_request->n_fragments)); */ - - /* Complete the bcast - progress releases full request descriptors */ - for (i = 0; i < mca_bcol_iboffload_component.max_progress_pull; i++) { - if (coll_request->n_frag_mpi_complete == coll_request->n_fragments && - coll_request->n_frag_net_complete == coll_request->n_fragments) { - - IBOFFLOAD_VERBOSE(10, ("Deregister user buff.\n")); - coll_request->module->device->mpool->mpool_deregister( - coll_request->module->device->mpool, - (mca_mpool_base_registration_t *) coll_request->buffer_info[SBUF].iboffload_reg); - coll_request->buffer_info[SBUF].iboffload_reg = NULL; - - RELEASE_COLLREQ(coll_request); - IBOFFLOAD_VERBOSE(10, ("New bcast done !!!")); - return BCOL_FN_COMPLETE; - } - } - - /* IBOFFLOAD_VERBOSE(10, ("Bcast general progress done")); */ - - /* done */ - return BCOL_FN_STARTED; -} -/* Pasha: I have to move it to static inline later, it looks too ugly for macro */ -#define BINOMIAL_SCATTER( \ - iboffload_module, coll_fragment, \ - last_wait, last_send, \ - distance, \ - my_group_index, \ - segment_size, count \ - ) \ -do { \ - int rc = OMPI_SUCCESS; \ - int dst; \ - int send_size; \ - int send_offset; \ - int delta; \ - int dst_boundary_rank; \ - int radix_mask_pow = distance; \ - int radix_mask = (distance) >= 0 ? 1 << (distance) : 0; \ - IBOFFLOAD_VERBOSE(10, ("BCAST SCATTER %d %d", radix_mask, distance)); \ - \ - while(radix_mask > 0) { \ - /* For each level of tree, do sends */ \ - dst = my_group_index ^ radix_mask; \ - dst_boundary_rank = dst & ((~(int)0) << (radix_mask_pow)); \ - send_offset = segment_size * dst_boundary_rank; \ - /* Pasha: make sure that we handle the corner cases */ \ - delta = count - send_offset; \ - if (OPAL_UNLIKELY(delta <= 0)) { \ - radix_mask >>= 1; \ - radix_mask_pow--; \ - continue; /* we have to send something, other way it will hang */ \ - } else { \ - /* the tail case */ \ - send_size = (int) \ - (delta - (int)segment_size * radix_mask) < 0 ? delta : \ - (int)segment_size * radix_mask; \ - } \ - IBOFFLOAD_VERBOSE(10, ("Scatter data to %d , len %d offset %d", dst, send_size, send_offset)); \ - rc = mca_bcol_iboffload_recv_rtr_setup( \ - &last_wait, dst, iboffload_module, coll_fragment); \ - if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) { \ - IBOFFLOAD_VERBOSE(10, ("Failed to isend data")); \ - return OMPI_ERROR; \ - } \ - rc = mca_bcol_iboffload_send_large_buff_setup( \ - &last_send, SBUF, send_offset, send_size, dst, \ - iboffload_module, coll_fragment); \ - if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) { \ - IBOFFLOAD_VERBOSE(10, ("Failed to isend data")); \ - return OMPI_ERROR; \ - } \ - radix_mask >>= 1; \ - radix_mask_pow--; \ - } \ -} while(0) - -static int mca_bcol_iboffload_bcast_scatter_allgather_exec(mca_bcol_iboffload_module_t *iboffload_module, - mca_bcol_iboffload_collreq_t *coll_request) -{ - netpatterns_pair_exchange_node_t *recursive_doubling_tree = - &iboffload_module->recursive_doubling_tree; - - int rc, - dst, - group_src, power_of_2_distance, - recv_count; - size_t offset; - int count = coll_request->count * coll_request->dtype->super.size; - int my_group_index = iboffload_module->ibnet->super.my_index; - size_t base_block_size = - (count + iboffload_module->power_of_2_ranks - 1) / - iboffload_module->power_of_2_ranks; - - struct mqe_task *last_send = NULL, - *last_wait = NULL; - mca_bcol_iboffload_collfrag_t *coll_fragment = &coll_request->first_collfrag; - - if (OPAL_UNLIKELY(!iboffload_module->connection_status[RECURSIVE_DOUBLING_TREE_BCAST])) { - bcol_iboffload_setup_binomial_connection(iboffload_module); - } - - /* register memory in mpool/rcache */ - rc = mca_bcol_iboffload_prepare_buffer(coll_request->buffer_info[SBUF].buf, count, - &coll_request->buffer_info[SBUF].iboffload_reg, iboffload_module); - if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) { - IBOFFLOAD_ERROR(("Cannot register memory: " - "addr - %p, %d bytes.\n", - coll_request->buffer_info[SBUF].buf, count)); - return OMPI_ERROR; - } - - coll_request->buffer_info[SBUF].lkey = coll_request->buffer_info[SBUF].iboffload_reg->mr->lkey; - - /* it is estimated mq consumption... */ - if (OPAL_UNLIKELY(false == BCOL_IBOFFLOAD_MQ_HAVE_CREDITS( - iboffload_module, coll_fragment->mq_index, coll_fragment->mq_credits) || - false == opal_list_is_empty(&iboffload_module->collfrag_pending))) { - IBOFFLOAD_VERBOSE(10, ("There are not enough credits on MQ.\n")); - goto out_of_resources; - } - - coll_fragment->tail_next = &coll_fragment->to_post; - - if (coll_request->root == my_group_index) { - IBOFFLOAD_VERBOSE(10, ("I'm root of the data %d %d", - iboffload_module->power_of_2, recursive_doubling_tree->n_extra_sources )); - /* for proxy we have little bit more work to do */ - if (recursive_doubling_tree->n_extra_sources > 0) { - /* send the all data to your extra peer */ - dst = recursive_doubling_tree->rank_extra_source; - rc = mca_bcol_iboffload_recv_rtr_setup( - &last_wait, dst, iboffload_module, coll_fragment); - if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) { - IBOFFLOAD_VERBOSE(10, ("Failed to" - " mca_bcol_iboffload_recv_rtr_setup")); - return OMPI_ERROR; - } - rc = mca_bcol_iboffload_send_large_buff_setup( - &last_send, SBUF, 0, count, dst, - iboffload_module, coll_fragment); - if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) { - IBOFFLOAD_VERBOSE(10, ("Failed to" - " mca_bcol_iboffload_send_large_buff_setup")); - return OMPI_ERROR; - } - } - power_of_2_distance = iboffload_module->power_of_2; - - BINOMIAL_SCATTER(iboffload_module, coll_fragment, - last_wait, last_send, power_of_2_distance - 1, - my_group_index, base_block_size, count - ); - /* EXIT OR GO TO Gather */ - goto GATHER; - } - - /* prepare and post recv operation */ - group_src = bcol_iboffload_binomial_root_to_src(coll_request->root, - my_group_index, iboffload_module->power_of_2_ranks, - iboffload_module->group_size, &power_of_2_distance); - - IBOFFLOAD_VERBOSE(10, ("SRC %d DIST %d ranks %d gsize %d root %d my rank %d", - group_src, power_of_2_distance, iboffload_module->power_of_2_ranks, - iboffload_module->group_size, - coll_request->root, my_group_index)); - assert(group_src >= 0); - - if (0 > power_of_2_distance) { - /* the rank is virtual root for this group, receive the data - and scatter gather as root */ - power_of_2_distance = - iboffload_module->power_of_2; - offset = 0; - recv_count = count; - IBOFFLOAD_VERBOSE(10, ("Virtual root %d , set mask to %d", - my_group_index, power_of_2_distance)); - } else { - int my_left_boundary_rank; - int delta; - recv_count = base_block_size * (1 << power_of_2_distance); /* we may receive larger data */ - my_left_boundary_rank = my_group_index & ((~(int)0) << power_of_2_distance ); - offset = (size_t) (base_block_size * my_left_boundary_rank); - delta = count - offset; - if (OPAL_UNLIKELY(delta <= 0)) { - /* no data to recv */ - goto GATHER; - } else { - recv_count = (delta < recv_count) ? delta : recv_count; - } - - IBOFFLOAD_VERBOSE(10, ("Recv data set mask to %d", - power_of_2_distance)); - } - - IBOFFLOAD_VERBOSE(10, ("Bcast, receive data from %d[%d], count %d, offset %d", - group_src, recv_count, offset)); - - /* Receive data to user buffer */ - rc = mca_bcol_iboffload_send_rtr_setup(&last_send, - group_src, iboffload_module, - coll_fragment); - if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) { - IBOFFLOAD_VERBOSE(10, ("Failed to setup send rtr")); - return OMPI_ERROR; - } - - rc = mca_bcol_iboffload_recv_large_buff_setup(&last_wait, - SBUF, offset, recv_count, group_src, - iboffload_module, coll_fragment); - if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) { - IBOFFLOAD_VERBOSE(10, ("Failed to setup data receive")); - return OMPI_ERROR; - } - - BINOMIAL_SCATTER(iboffload_module, coll_fragment, - last_wait, last_send, power_of_2_distance - 1, - my_group_index, base_block_size, count); - -GATHER: - rc = bcol_iboffload_bcast_binomial_gather(iboffload_module, - &last_send, &last_wait, coll_fragment, - count, base_block_size, power_of_2_distance); - if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) { - IBOFFLOAD_VERBOSE(10, ("Failed to setup gather. Return %d", rc)); - return rc; - } - - if (recursive_doubling_tree->n_extra_sources > 0 && - iboffload_module->power_of_2 != power_of_2_distance) { - dst = recursive_doubling_tree->rank_extra_source; - - rc = mca_bcol_iboffload_recv_rtr_setup( - &last_wait, dst, iboffload_module, coll_fragment); - if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) { - IBOFFLOAD_VERBOSE(10, ("Failed to" - " mca_bcol_iboffload_recv_rtr_setup")); - return OMPI_ERROR; - } - - rc = mca_bcol_iboffload_send_large_buff_setup( - &last_send, SBUF, 0, count, dst, - iboffload_module, coll_fragment); - if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) { - IBOFFLOAD_VERBOSE(10, ("Failed to" - " mca_bcol_iboffload_send_large_buff_setup")); - return OMPI_ERROR; - } - } - - IBOFFLOAD_VERBOSE(10, ("Fill in the the rest of the coll_fragment.\n")); - - /* end of list */ - *coll_fragment->tail_next = NULL; - - /* finish initializing full message descriptor */ - coll_request->n_fragments += 1; - coll_request->n_frags_sent += 1; - - if (NULL != last_wait) { - last_wait->flags |= MQE_WR_FLAG_SIGNAL; - coll_fragment->signal_task_wr_id = last_wait->wr_id; - last_wait->wr_id = (uint64_t) (uintptr_t) coll_fragment; - } else { - last_send->flags |= MQE_WR_FLAG_SIGNAL; - coll_fragment->signal_task_wr_id = last_send->wr_id; - last_send->wr_id = (uint64_t) (uintptr_t) coll_fragment; - } - - /* post the mwr */ - rc = mca_bcol_iboffload_post_mqe_tasks(iboffload_module, coll_fragment->to_post); - if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) { - IBOFFLOAD_VERBOSE(10, ("MQE task posting failing.\n")); - /* Note: need to clean up */ - return rc; - } - - MCA_BCOL_UPDATE_ORDER_COUNTER(&iboffload_module->super, coll_request->order_info); - - IBOFFLOAD_VERBOSE(10, ("Return success.\n")); - - return BCOL_FN_STARTED; - -out_of_resources: - /* Release all resources */ - IBOFFLOAD_VERBOSE(10, ("Barrier, adding collfrag to collfrag_pending.\n")); - rc = - mca_bcol_iboffload_free_resources_and_move_to_pending(coll_fragment, iboffload_module); - return (OMPI_SUCCESS != rc) ? BCOL_FN_NOT_STARTED : BCOL_FN_STARTED; -} - -int mca_bcol_iboffload_bcast_scatter_allgather_intra(bcol_function_args_t *fn_arguments, - struct mca_bcol_base_function_t *const_args) -{ - mca_bcol_iboffload_module_t *iboffload_module = - (mca_bcol_iboffload_module_t *) const_args->bcol_module; - - int rc; - int mq_credits = iboffload_module->power_of_2 * 3 + 4; - bool if_bcol_last = BCOL_IBOFFLOAD_IS_LAST_CALL(const_args); - mca_bcol_iboffload_collreq_t *coll_request; - - MCA_BCOL_CHECK_ORDER(const_args->bcol_module, fn_arguments); - - rc = mca_bcol_iboffload_bcast_init(fn_arguments, iboffload_module, - &coll_request, if_bcol_last, mq_credits, - mca_bcol_iboffload_bcast_scatter_allgather_exec); - if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) { - return rc; - } - - rc = coll_request->progress_fn(iboffload_module, coll_request); - - IBOFFLOAD_VERBOSE(10, ("mca_bcol_iboffload_bcast_scatter_allgather_intra was started [%d]\n", rc)); - return rc; -} - -static int mca_bcol_iboffload_bcast_scatter_allgather_extra_exec(mca_bcol_iboffload_module_t *iboffload_module, - mca_bcol_iboffload_collreq_t *coll_request) -{ - netpatterns_pair_exchange_node_t *recursive_doubling_tree = - &iboffload_module->recursive_doubling_tree; - - int rc, dst; - int count = coll_request->count * coll_request->dtype->super.size; - int my_group_index = iboffload_module->ibnet->super.my_index; - struct mqe_task *last_send = NULL, - *last_wait = NULL; - mca_bcol_iboffload_collfrag_t *coll_fragment = &coll_request->first_collfrag; - - if (OPAL_UNLIKELY(!iboffload_module->connection_status[RECURSIVE_DOUBLING_TREE_BCAST])) { - bcol_iboffload_setup_binomial_connection(iboffload_module); - } - - /* register memory in mpool/rcache */ - rc = mca_bcol_iboffload_prepare_buffer(coll_request->buffer_info[SBUF].buf, count, - &coll_request->buffer_info[SBUF].iboffload_reg, iboffload_module); - if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) { - IBOFFLOAD_ERROR(("Cannot register memory: " - "addr - %p, %d bytes.\n", - coll_request->buffer_info[SBUF].buf, count)); - return OMPI_ERROR; - } - - coll_request->buffer_info[SBUF].lkey = coll_request->buffer_info[SBUF].iboffload_reg->mr->lkey; - - /* it is estimated mq consumption... */ - if (OPAL_UNLIKELY(false == BCOL_IBOFFLOAD_MQ_HAVE_CREDITS( - iboffload_module, coll_fragment->mq_index, coll_fragment->mq_credits) || - false == opal_list_is_empty(&iboffload_module->collfrag_pending))) { - IBOFFLOAD_VERBOSE(10, ("There are not enough credits on MQ.\n")); - goto out_of_resources; - } - - coll_fragment->tail_next = &coll_fragment->to_post; - - /* send or recv the data */ - - if (coll_request->root == my_group_index) { - IBOFFLOAD_VERBOSE(10, ("I'm root of the data %d", iboffload_module->power_of_2)); - /* send the all data to your extra peer */ - dst = recursive_doubling_tree->rank_extra_source; - rc = mca_bcol_iboffload_recv_rtr_setup( - &last_wait, dst, iboffload_module, coll_fragment); - if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) { - IBOFFLOAD_VERBOSE(10, ("Failed to" - " mca_bcol_iboffload_recv_rtr_setup")); - return OMPI_ERROR; - } - rc = mca_bcol_iboffload_send_large_buff_setup( - &last_send, SBUF, 0, count, dst, - iboffload_module, coll_fragment); - if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) { - IBOFFLOAD_VERBOSE(10, ("Failed to" - " mca_bcol_iboffload_send_large_buff_setup")); - return OMPI_ERROR; - } - } else { - /* Not root case */ - dst = recursive_doubling_tree->rank_extra_source; - rc = mca_bcol_iboffload_send_rtr_setup(&last_send, - dst, iboffload_module, - coll_fragment); - if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) { - IBOFFLOAD_VERBOSE(10, ("Failed to setup send rtr")); - return OMPI_ERROR; - } - - rc = mca_bcol_iboffload_recv_large_buff_setup(&last_wait, - SBUF, 0, count, dst, - iboffload_module, coll_fragment); - if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) { - IBOFFLOAD_VERBOSE(10, ("Failed to setup data receive")); - return OMPI_ERROR; - } - } - - IBOFFLOAD_VERBOSE(10, ("Fill in the the rest of the coll_fragment.\n")); - - /* end of list */ - *coll_fragment->tail_next = NULL; - - /* finish initializing full message descriptor */ - coll_request->n_fragments += 1; - coll_request->n_frags_sent += 1; - - if (NULL != last_wait) { - last_wait->flags |= MQE_WR_FLAG_SIGNAL; - coll_fragment->signal_task_wr_id = last_wait->wr_id; - last_wait->wr_id = (uint64_t) (uintptr_t) coll_fragment; - } else { - last_send->flags |= MQE_WR_FLAG_SIGNAL; - coll_fragment->signal_task_wr_id = last_send->wr_id; - last_send->wr_id = (uint64_t) (uintptr_t) coll_fragment; - } - - /* post the mwr */ - rc = mca_bcol_iboffload_post_mqe_tasks(iboffload_module, coll_fragment->to_post); - if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) { - IBOFFLOAD_VERBOSE(10, ("MQE task posting failing.\n")); - /* Note: need to clean up */ - return rc; - } - - MCA_BCOL_UPDATE_ORDER_COUNTER(&iboffload_module->super, coll_request->order_info); - - IBOFFLOAD_VERBOSE(10, ("Return success.\n")); - - return BCOL_FN_STARTED; - -out_of_resources: - /* Release all resources */ - IBOFFLOAD_VERBOSE(10, ("Barrier, adding collfrag to collfrag_pending.\n")); - rc = - mca_bcol_iboffload_free_resources_and_move_to_pending(coll_fragment, iboffload_module); - return (OMPI_SUCCESS != rc) ? BCOL_FN_NOT_STARTED : BCOL_FN_STARTED; -} - -int mca_bcol_iboffload_bcast_scatter_allgather_extra_intra(bcol_function_args_t *fn_arguments, - struct mca_bcol_base_function_t *const_args) -{ - mca_bcol_iboffload_module_t *iboffload_module = - (mca_bcol_iboffload_module_t *) const_args->bcol_module; - - int rc; - int mq_credits = iboffload_module->power_of_2 * 3 + 4; - bool if_bcol_last = BCOL_IBOFFLOAD_IS_LAST_CALL(const_args); - mca_bcol_iboffload_collreq_t *coll_request; - - MCA_BCOL_CHECK_ORDER(const_args->bcol_module, fn_arguments); - - rc = mca_bcol_iboffload_bcast_init(fn_arguments, iboffload_module, - &coll_request, if_bcol_last, mq_credits, - mca_bcol_iboffload_bcast_scatter_allgather_extra_exec); - if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) { - return rc; - } - - rc = coll_request->progress_fn(iboffload_module, coll_request); - - IBOFFLOAD_VERBOSE(10, ("mca_bcol_iboffload_bcast_scatter_allgather_extra_intra was started [%d]\n", rc)); - return rc; -} - -int mca_bcol_iboffload_bcast_register(mca_bcol_base_module_t *super) -{ - mca_bcol_iboffload_module_t *iboffload_module = - (mca_bcol_iboffload_module_t *) super; - - int my_group_index = iboffload_module->ibnet->super.my_index; - - mca_bcol_base_coll_fn_comm_attributes_t comm_attribs; - mca_bcol_base_coll_fn_invoke_attributes_t inv_attribs; - - IBOFFLOAD_VERBOSE(10, ("Register iboffload Bcast.\n")); - - comm_attribs.bcoll_type = BCOL_BCAST; - - comm_attribs.comm_size_min = 0; - comm_attribs.comm_size_max = 1024 * 1024; - comm_attribs.waiting_semantics = NON_BLOCKING; - - inv_attribs.bcol_msg_min = 0; - inv_attribs.bcol_msg_max = 20000; /* range 1 */ - - inv_attribs.datatype_bitmap = 0xffffffff; - inv_attribs.op_types_bitmap = 0xffffffff; - - comm_attribs.data_src = DATA_SRC_KNOWN; - - if (my_group_index < iboffload_module->power_of_2_ranks) { - mca_bcol_base_set_attributes(super, - &comm_attribs, &inv_attribs, - mca_bcol_iboffload_small_msg_bcast_intra, - mca_bcol_iboffload_small_msg_bcast_progress); - - inv_attribs.bcol_msg_min = 10000000; - inv_attribs.bcol_msg_max = 10485760; /* range 4 */ - - mca_bcol_base_set_attributes(super, - &comm_attribs, &inv_attribs, - mca_bcol_iboffload_bcast_scatter_allgather_intra, - mca_bcol_iboffload_zero_copy_progress); - - } else { - mca_bcol_base_set_attributes(super, - &comm_attribs, &inv_attribs, - mca_bcol_iboffload_small_msg_bcast_extra_intra, - mca_bcol_iboffload_small_msg_bcast_progress); - - inv_attribs.bcol_msg_min = 10000000; - inv_attribs.bcol_msg_max = 10485760; /* range 4 */ - - mca_bcol_base_set_attributes(super, - &comm_attribs, &inv_attribs, - mca_bcol_iboffload_bcast_scatter_allgather_extra_intra, - mca_bcol_iboffload_zero_copy_progress); - - } - - return OMPI_SUCCESS; -} diff --git a/ompi/mca/bcol/iboffload/bcol_iboffload_bcast.h b/ompi/mca/bcol/iboffload/bcol_iboffload_bcast.h deleted file mode 100644 index f283ab65e91..00000000000 --- a/ompi/mca/bcol/iboffload/bcol_iboffload_bcast.h +++ /dev/null @@ -1,606 +0,0 @@ -/* - * Copyright (c) 2009-2012 Oak Ridge National Laboratory. All rights reserved. - * Copyright (c) 2009-2012 Mellanox Technologies. All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#ifndef MCA_BCOL_IBOFFLOAD_BCAST_H -#define MCA_BCOL_IBOFFLOAD_BCAST_H - -#include "ompi_config.h" - -#include "bcol_iboffload.h" -#include "bcol_iboffload_frag.h" -#include "bcol_iboffload_task.h" -#include "bcol_iboffload_collreq.h" -#include "bcol_iboffload_collfrag.h" -#include "bcol_iboffload_endpoint.h" - -#include "opal/include/opal/types.h" - -BEGIN_C_DECLS - -int mca_bcol_iboffload_small_msg_bcast_progress( - bcol_function_args_t *input_args, - struct mca_bcol_base_function_t *const_args); -int mca_bcol_iboffload_small_msg_bcast_extra_intra(bcol_function_args_t *fn_arguments, - struct mca_bcol_base_function_t *const_args); -int mca_bcol_iboffload_small_msg_bcast_intra(bcol_function_args_t *fn_arguments, - struct mca_bcol_base_function_t *const_args); -int mca_bcol_iboffload_bcast_scatter_allgather_intra(bcol_function_args_t *fn_arguments, - struct mca_bcol_base_function_t *const_args); -int mca_bcol_iboffload_zero_copy_progress(bcol_function_args_t *fn_arguments, - struct mca_bcol_base_function_t *const_args); -int mca_bcol_iboffload_bcast_scatter_allgather_extra_intra(bcol_function_args_t *fn_arguments, - struct mca_bcol_base_function_t *const_args); -int mca_bcol_iboffload_bcast_register(mca_bcol_base_module_t *super); - -static inline __opal_attribute_always_inline__ int -mca_bcol_iboffload_recv_rtr_setup( - struct mqe_task **last_wait, - uint32_t dest_rank, - mca_bcol_iboffload_module_t *iboffload, - mca_bcol_iboffload_collfrag_t *coll_fragment) -{ - mca_bcol_iboffload_task_t *task; - mca_bcol_iboffload_frag_t *fragment; - - /* Wait for RTR message over credit QP */ - fragment = mca_bcol_iboffload_get_preposted_recv_frag( - iboffload, dest_rank, - MCA_BCOL_IBOFFLOAD_QP_CREDIT); - if (OPAL_UNLIKELY(NULL == fragment)) { - IBOFFLOAD_VERBOSE(10, ("Failed to get recv frag.\n")); - return OMPI_ERR_TEMP_OUT_OF_RESOURCE; - } - - task = mca_bcol_iboffload_get_wait_task( - iboffload, dest_rank, 1, fragment, MCA_BCOL_IBOFFLOAD_QP_CREDIT, - iboffload->endpoints[dest_rank]->qps[MCA_BCOL_IBOFFLOAD_QP_LARGE_BUFF].qp->lcl_qp); - if (OPAL_UNLIKELY(NULL == task)) { - IBOFFLOAD_VERBOSE(10, ("Failed to get wait task.\n")); - return OMPI_ERR_TEMP_OUT_OF_RESOURCE; - } - - MCA_BCOL_IBOFFLOAD_APPEND_TASK_TO_LIST(coll_fragment->task_next, task); - MCA_BCOL_IBOFFLOAD_APPEND_MQ_TASK_TO_LIST(coll_fragment->tail_next, task); - - return OMPI_SUCCESS; -} - -static inline __opal_attribute_always_inline__ int -mca_bcol_iboffload_send_small_buff_setup( - struct mqe_task **last_send, - size_t len, uint32_t dest_rank, - mca_bcol_iboffload_module_t *iboffload, - mca_bcol_iboffload_collfrag_t *coll_fragment) -{ - mca_bcol_iboffload_task_t *task; - mca_bcol_iboffload_frag_t *fragment; - - mca_bcol_iboffload_collreq_t *coll_request = - coll_fragment->coll_full_req; - - IBOFFLOAD_VERBOSE(10,("Get ml frag that I will send dest rank %d, len %d, lkey %d", - dest_rank, len, iboffload->rdma_block.ib_info.lkey)); - - fragment = mca_bcol_iboffload_get_send_frag(coll_request, dest_rank, - coll_request->qp_index, len, 0, - SBUF, /* this could be problematic */ - MCA_BCOL_IBOFFLOAD_SEND_FRAG_ML); - if (OPAL_UNLIKELY(NULL == fragment)) { - IBOFFLOAD_VERBOSE(10, ("Failed to get frag.\n")); - return OMPI_ERR_TEMP_OUT_OF_RESOURCE; - } - - IBOFFLOAD_VERBOSE(10,("Get an rdma task for dest %d for packet size %d", - dest_rank,len)); - task = mca_bcol_iboffload_get_rdma_task( - dest_rank, 0, - fragment, iboffload, coll_fragment); - - if (OPAL_UNLIKELY(NULL == task)) { - IBOFFLOAD_VERBOSE(10, ("Failed to get send task.\n")); - return OMPI_ERR_TEMP_OUT_OF_RESOURCE; - } - - *last_send = &task->element; - - MCA_BCOL_IBOFFLOAD_APPEND_TASK_TO_LIST(coll_fragment->task_next, task); - MCA_BCOL_IBOFFLOAD_APPEND_MQ_TASK_TO_LIST(coll_fragment->tail_next, task); - - return OMPI_SUCCESS; -} - -static inline __opal_attribute_always_inline__ int -mca_bcol_iboffload_send_large_buff_setup( - struct mqe_task **last_send, - int buf_index, int offset, - size_t len, uint32_t dest_rank, - mca_bcol_iboffload_module_t *iboffload, - mca_bcol_iboffload_collfrag_t *coll_fragment) -{ - mca_bcol_iboffload_task_t *task; - mca_bcol_iboffload_frag_t *fragment; - - mca_bcol_iboffload_collreq_t *coll_request = - coll_fragment->coll_full_req; - - fragment = mca_bcol_iboffload_get_send_frag(coll_request, dest_rank, - MCA_BCOL_IBOFFLOAD_QP_LARGE_BUFF, - len, - offset, buf_index, MCA_BCOL_IBOFFLOAD_SEND_FRAG_ML); - if (OPAL_UNLIKELY(NULL == fragment)) { - IBOFFLOAD_VERBOSE(10, ("Failed to get frag.\n")); - return OMPI_ERR_TEMP_OUT_OF_RESOURCE; - } - - task = mca_bcol_iboffload_get_send_task( - iboffload, dest_rank, - MCA_BCOL_IBOFFLOAD_QP_LARGE_BUFF, - fragment, coll_fragment, NO_INLINE); - - if (OPAL_UNLIKELY(NULL == task)) { - IBOFFLOAD_VERBOSE(10, ("Failed to get send task.\n")); - return OMPI_ERR_TEMP_OUT_OF_RESOURCE; - } - - *last_send = &task->element; - - MCA_BCOL_IBOFFLOAD_APPEND_TASK_TO_LIST(coll_fragment->task_next, task); - MCA_BCOL_IBOFFLOAD_APPEND_MQ_TASK_TO_LIST(coll_fragment->tail_next, task); - - return OMPI_SUCCESS; -} - -static inline __opal_attribute_always_inline__ int -mca_bcol_iboffload_send_rtr_setup( - struct mqe_task **last_send, - uint32_t dest_rank, - mca_bcol_iboffload_module_t *iboffload, - mca_bcol_iboffload_collfrag_t *coll_fragment) -{ - mca_bcol_iboffload_task_t *task; - mca_bcol_iboffload_frag_t *fragment; - - /* Recv is ready , Send RTR message */ - fragment = mca_bcol_iboffload_get_send_frag(coll_fragment->coll_full_req, - dest_rank, MCA_BCOL_IBOFFLOAD_QP_CREDIT, 0, - 0, RBUF, MCA_BCOL_IBOFFLOAD_SEND_FRAG_DUMMY); - if (OPAL_UNLIKELY(NULL == fragment)) { - IBOFFLOAD_VERBOSE(10, ("Failed to get frag.\n")); - return OMPI_ERR_TEMP_OUT_OF_RESOURCE; - } - - task = mca_bcol_iboffload_get_send_task(iboffload, dest_rank, - MCA_BCOL_IBOFFLOAD_QP_CREDIT, - fragment, coll_fragment, INLINE); - if (OPAL_UNLIKELY(NULL == task)) { - IBOFFLOAD_VERBOSE(10, ("Failed to get send task.\n")); - return OMPI_ERR_TEMP_OUT_OF_RESOURCE; - } - - IBOFFLOAD_VERBOSE(10, ("dest_rank - %d. qp index - %d.\n", - dest_rank, MCA_BCOL_IBOFFLOAD_QP_CREDIT)); - - *last_send = &task->element; - - MCA_BCOL_IBOFFLOAD_APPEND_TASK_TO_LIST(coll_fragment->task_next, task); - MCA_BCOL_IBOFFLOAD_APPEND_MQ_TASK_TO_LIST(coll_fragment->tail_next, task); - - return OMPI_SUCCESS; -} - -static inline __opal_attribute_always_inline__ int -mca_bcol_iboffload_recv_small_preposted_buff_setup( - struct mqe_task **last_wait, - size_t len, uint32_t dest_rank, - int qp_index, - int nwaits, - mca_bcol_iboffload_module_t *iboffload, - mca_bcol_iboffload_collfrag_t *coll_fragment) -{ - mca_bcol_iboffload_task_t *task; - mca_bcol_iboffload_frag_t *fragment; - - IBOFFLOAD_VERBOSE(10,("Get preposted recv from rank %d", dest_rank)); - - fragment = mca_bcol_iboffload_get_preposted_recv_frag( - iboffload, dest_rank, - qp_index); - if (OPAL_UNLIKELY(NULL == fragment)) { - IBOFFLOAD_VERBOSE(10, ("Failed to get recv frag.\n")); - return OMPI_ERR_TEMP_OUT_OF_RESOURCE; - } - - task = mca_bcol_iboffload_get_wait_task(iboffload, dest_rank, nwaits, - fragment, qp_index, NULL); - if (OPAL_UNLIKELY(NULL == task)) { - IBOFFLOAD_VERBOSE(10, ("Failed to get wait task.\n")); - return OMPI_ERR_TEMP_OUT_OF_RESOURCE; - } - - *last_wait = &task->element; - - MCA_BCOL_IBOFFLOAD_APPEND_TASK_TO_LIST(coll_fragment->task_next, task); - MCA_BCOL_IBOFFLOAD_APPEND_MQ_TASK_TO_LIST(coll_fragment->tail_next, task); - - return OMPI_SUCCESS; -} - -static inline __opal_attribute_always_inline__ int -mca_bcol_iboffload_recv_small_buff_setup( - struct mqe_task **last_wait, - size_t len, uint32_t dest_rank, - mca_bcol_iboffload_module_t *iboffload, - mca_bcol_iboffload_collfrag_t *coll_fragment) -{ - mca_bcol_iboffload_task_t *task; - mca_bcol_iboffload_frag_t *fragment; - - mca_bcol_iboffload_collreq_t *coll_request = - coll_fragment->coll_full_req; - - IBOFFLOAD_VERBOSE(10, ("Get preposted recv from rank %d", dest_rank)); - - fragment = mca_bcol_iboffload_get_preposted_recv_frag( - iboffload, dest_rank, - coll_request->qp_index); - if (OPAL_UNLIKELY(NULL == fragment)) { - IBOFFLOAD_VERBOSE(10, ("Failed to get recv frag.\n")); - return OMPI_ERR_TEMP_OUT_OF_RESOURCE; - } - - task = mca_bcol_iboffload_get_wait_task(iboffload, dest_rank, 1, - fragment, coll_request->qp_index, NULL); - if (OPAL_UNLIKELY(NULL == task)) { - IBOFFLOAD_VERBOSE(10, ("Failed to get wait task.\n")); - return OMPI_ERR_TEMP_OUT_OF_RESOURCE; - } - - *last_wait = &task->element; - - MCA_BCOL_IBOFFLOAD_APPEND_TASK_TO_LIST(coll_fragment->task_next, task); - MCA_BCOL_IBOFFLOAD_APPEND_MQ_TASK_TO_LIST(coll_fragment->tail_next, task); - - return OMPI_SUCCESS; -} - -static inline __opal_attribute_always_inline__ int -mca_bcol_iboffload_recv_large_buff_setup( - struct mqe_task **last_wait, - int buf_index, int offset, - size_t len, uint32_t dest_rank, - mca_bcol_iboffload_module_t *iboffload, - mca_bcol_iboffload_collfrag_t *coll_fragment) -{ - int num_preposted; - - mca_bcol_iboffload_task_t *task; - mca_bcol_iboffload_frag_t *fragment; - - mca_bcol_iboffload_collreq_t *coll_request = coll_fragment->coll_full_req; - - /* Post message to recv queue for large messages */ - fragment = mca_bcol_iboffload_get_ml_frag( - iboffload, MCA_BCOL_IBOFFLOAD_QP_LARGE_BUFF, len, - coll_request->buffer_info[buf_index].iboffload_reg->mr->lkey, - (uint64_t)((unsigned char *)coll_request->buffer_info[buf_index].buf + offset)); - if (OPAL_UNLIKELY(NULL == fragment)) { - IBOFFLOAD_VERBOSE(10, ("Failed to get recv frag.\n")); - return OMPI_ERR_TEMP_OUT_OF_RESOURCE; - } - - num_preposted = mca_bcol_iboffload_prepost_ml_recv_frag( - MCA_BCOL_IBOFFLOAD_QP_LARGE_BUFF, - dest_rank, fragment, iboffload); - if (0 >= num_preposted) { - IBOFFLOAD_ERROR(("Failed to prepost recv fragments " - "return code - %d; dest_rank - %d", - num_preposted, dest_rank)); - - return OMPI_ERR_TEMP_OUT_OF_RESOURCE; - } - - task = mca_bcol_iboffload_get_wait_task(iboffload, dest_rank, 1, - fragment, MCA_BCOL_IBOFFLOAD_QP_LARGE_BUFF, NULL); - if (OPAL_UNLIKELY(NULL == task)) { - IBOFFLOAD_VERBOSE(10, ("Failed to get wait task.\n")); - return OMPI_ERR_TEMP_OUT_OF_RESOURCE; - } - - *last_wait = &task->element; - - MCA_BCOL_IBOFFLOAD_APPEND_TASK_TO_LIST(coll_fragment->task_next, task); - MCA_BCOL_IBOFFLOAD_APPEND_MQ_TASK_TO_LIST(coll_fragment->tail_next, task); - - return OMPI_SUCCESS; -} - -static inline __opal_attribute_always_inline__ -int bcol_iboffload_binomial_root_to_src(int group_root, int my_rank, - int pow2_size, int group_size, int *distance) -{ - int root, relative_rank, src, - pow2_distance = 0, i; - - if (group_root < pow2_size) { - root = group_root; - } else { - /* the source of the data is extra node, - the real root it represented by some rank from - pow2 group */ - root = group_root - pow2_size; - /* shortcut for the case when my rank is root for the group */ - if (my_rank == root) { - *distance = -1; - return group_root; - } - } - - relative_rank = (my_rank - root) < 0 ? my_rank - root + pow2_size : - my_rank - root; - - for (i = 1; i < pow2_size; i<<=1, pow2_distance++) { - if (relative_rank & i) { - src = my_rank ^ i; - if (src >= pow2_size) - src -= pow2_size; - - *distance = pow2_distance; - IBOFFLOAD_VERBOSE(10, ("AAAAA d %d rel %d it %d root %d my %d", *distance, relative_rank, i, root, my_rank)); - return src; - } - } - - /* error case */ - *distance = -1; - return -1; -} - -static inline void bcol_iboffload_setup_binomial_connection(mca_bcol_iboffload_module_t *iboffload) -{ - netpatterns_pair_exchange_node_t *my_exchange_node = - &iboffload->recursive_doubling_tree; - - int i, n_exchanges = my_exchange_node->n_exchanges, - *exchanges = my_exchange_node->rank_exchanges, - n_extra_src = my_exchange_node->n_extra_sources, - my_rank = iboffload->ibnet->super.my_index, - rank_extra_src = my_exchange_node->rank_extra_source; - - mca_bcol_iboffload_endpoint_t *ep; - - IBOFFLOAD_VERBOSE(10, ("Open connections.\n")); - - if (0 < n_extra_src) { - ep = iboffload->endpoints[rank_extra_src]; - while (OMPI_SUCCESS != - check_endpoint_state(ep, NULL, NULL)) { - opal_progress(); - } - -#if OPAL_ENABLE_DEBUG - { - int qp_index, num_qps = mca_bcol_iboffload_component.num_qps; - for (qp_index = 0; qp_index < num_qps; ++qp_index) { - assert(NULL != ep->qps[qp_index].qp->lcl_qp); - IBOFFLOAD_VERBOSE(10, ("Endpoint - %p, QP index - %d: qp num - %x.", - ep, qp_index, ep->qps[qp_index].qp->lcl_qp->qp_num)); - } - } -#endif - - /* Connect to all extra nodes */ - if (EXTRA_NODE == my_exchange_node->node_type) { - for (i = iboffload->power_of_2_ranks; - i < iboffload->num_endpoints; ++i) { - if (i != my_rank) { - ep = iboffload->endpoints[i]; - - IBOFFLOAD_VERBOSE(10, ("subgroup rank %d: Connect to rank %d.\n", my_rank, i)); - - while (OMPI_SUCCESS != - check_endpoint_state(ep, NULL, NULL)) { - opal_progress(); - } - -#if OPAL_ENABLE_DEBUG - { - int qp_index, num_qps = mca_bcol_iboffload_component.num_qps; - for (qp_index = 0; qp_index < num_qps; ++qp_index) { - assert(NULL != ep->qps[qp_index].qp->lcl_qp); - IBOFFLOAD_VERBOSE(10, ("Endpoint - %p, QP index - %d: qp num - %x.", - ep, qp_index, ep->qps[qp_index].qp->lcl_qp->qp_num)); - } - } -#endif - } - } - } - } - - for (i = 0; i < n_exchanges; ++i) { - ep = iboffload->endpoints[exchanges[i]]; - - while (OMPI_SUCCESS != - check_endpoint_state(ep, NULL, NULL)) { - opal_progress(); - } - -#if OPAL_ENABLE_DEBUG - { - int qp_index, num_qps = mca_bcol_iboffload_component.num_qps; - for (qp_index = 0; qp_index < num_qps; ++qp_index) { - assert(NULL != ep->qps[qp_index].qp->lcl_qp); - IBOFFLOAD_VERBOSE(10, ("Endpoint - %p, QP index - %d: qp num - %x.", - ep, qp_index, ep->qps[qp_index].qp->lcl_qp->qp_num)); - } - } -#endif - } - /* set the connection status to connected */ - iboffload->connection_status[RECURSIVE_DOUBLING_TREE_BCAST] = true; -} - -static inline __opal_attribute_always_inline__ -int bcol_iboffload_bcast_binomial_gather(mca_bcol_iboffload_module_t *iboffload_module, - struct mqe_task **last_send, struct mqe_task **last_wait, - mca_bcol_iboffload_collfrag_t *coll_fragment, - int count, int base_block_size, int radix_mask_pow) -{ - int rc; - int i; - int my_group_index = iboffload_module->ibnet->super.my_index; - int delta, rdelta; - - IBOFFLOAD_VERBOSE(10, ("bcol_iboffload_bcast_binomial_gather %d %d", - radix_mask_pow, my_group_index)); - - /* we assume the iteration #iteration already was completed with probe */ - for (i = 0; i < iboffload_module->power_of_2; i++) { - int pow2 = 1 << i; - int peer_index = my_group_index ^ pow2; - int slen, rlen, - send_offset, - recv_offset; - - if (i > radix_mask_pow) { - slen = rlen = pow2 * base_block_size; - send_offset = base_block_size * ((my_group_index) & ((~(int)0) << i)); - recv_offset = base_block_size * ((peer_index) & ((~(int)0) << i)); - - rdelta = count - recv_offset; - if (rdelta > 0) { - IBOFFLOAD_VERBOSE(10, ("Recv1 [ pow2 %d, radix %d ] offset %d , len %d , dest %d", - pow2, 1 << iboffload_module->power_of_2, - recv_offset, rlen, peer_index)); - - rc = mca_bcol_iboffload_send_rtr_setup(last_send, - peer_index, iboffload_module, - coll_fragment); - if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) { - IBOFFLOAD_VERBOSE(10, ("Failed to setup send rtr")); - return OMPI_ERROR; - } - } - - delta = count - send_offset; - if (delta > 0) { - if (delta < slen) { - /* recv the tail */ - slen = delta; - } - - IBOFFLOAD_VERBOSE(10, ("Send1 [ pow2 %d, radix %d ] offset %d , len %d , dest %d", - pow2, 1 << iboffload_module->power_of_2, - send_offset, slen, peer_index)); - rc = mca_bcol_iboffload_recv_rtr_setup(last_wait, peer_index, iboffload_module, coll_fragment); - if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) { - IBOFFLOAD_VERBOSE(10, ("Failed to isend data")); - return OMPI_ERROR; - } - - rc = mca_bcol_iboffload_send_large_buff_setup(last_send, SBUF, send_offset, slen, peer_index, - iboffload_module, coll_fragment); - if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) { - IBOFFLOAD_VERBOSE(10, ("Failed to isend data")); - return OMPI_ERROR; - } - } - - if (rdelta > 0) { - if (rdelta < rlen) { - /* recv the tail */ - rlen = rdelta; - } - - rc = mca_bcol_iboffload_recv_large_buff_setup(last_wait, - SBUF, recv_offset, rlen, peer_index, - iboffload_module, coll_fragment); - if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) { - IBOFFLOAD_VERBOSE(10, ("Failed to setup data receive")); - return OMPI_ERROR; - } - } - - } else if (i == radix_mask_pow) { - /* only receive data */ - rlen = pow2 * base_block_size; - recv_offset = base_block_size * ((peer_index) & ((~(int)0) << i)); - delta = count - recv_offset; - if (0 >= delta) { - /* we have nothing to send, skip the iteration */ - continue; - } - if (delta < rlen) { - /* recv the tail */ - rlen = delta; - } - /* receive data from the peer */ - IBOFFLOAD_VERBOSE(10, ("Recv2 [ pow2 %d, radix %d ] offset %d , len %d , dest %d", - pow2, - 1 << iboffload_module->power_of_2, - recv_offset, - rlen, peer_index)); - rc = mca_bcol_iboffload_send_rtr_setup(last_send, - peer_index, iboffload_module, - coll_fragment); - if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) { - IBOFFLOAD_VERBOSE(10, ("Failed to setup send rtr")); - return OMPI_ERROR; - } - - rc = mca_bcol_iboffload_recv_large_buff_setup(last_wait, - SBUF, recv_offset, rlen, peer_index, - iboffload_module, coll_fragment); - if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) { - IBOFFLOAD_VERBOSE(10, ("Failed to setup data receive")); - return OMPI_ERROR; - } - } else if (i < radix_mask_pow) { - /* Only send data */ - slen = pow2 * base_block_size; - send_offset = base_block_size * ((my_group_index) & ((~(int)0) << i)); - delta = count - send_offset; - if (0 >= delta) { - /* we have nothing to send, skip the iteration */ - continue; - } - - if (delta < slen) { - slen = delta; - } - - IBOFFLOAD_VERBOSE(10, ("Send2 [ pow2 %d, radix %d ] offset %d , len %d , dest %d", - pow2, - 1 << iboffload_module->power_of_2, - send_offset, - slen, - peer_index)); - - rc = mca_bcol_iboffload_recv_rtr_setup(last_wait, peer_index, iboffload_module, coll_fragment); - if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) { - IBOFFLOAD_VERBOSE(10, ("Failed to isend data")); - return OMPI_ERROR; - } - - rc = mca_bcol_iboffload_send_large_buff_setup(last_send, SBUF, send_offset, slen, peer_index, - iboffload_module, coll_fragment); - if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) { - IBOFFLOAD_VERBOSE(10, ("Failed to isend data")); - return OMPI_ERROR; - } - } - } - - return OMPI_SUCCESS; -} - -END_C_DECLS - -#endif diff --git a/ompi/mca/bcol/iboffload/bcol_iboffload_collfrag.c b/ompi/mca/bcol/iboffload/bcol_iboffload_collfrag.c deleted file mode 100644 index dc447d879d9..00000000000 --- a/ompi/mca/bcol/iboffload/bcol_iboffload_collfrag.c +++ /dev/null @@ -1,51 +0,0 @@ -/* - * Copyright (c) 2009-2012 Oak Ridge National Laboratory. All rights reserved. - * Copyright (c) 2009-2012 Mellanox Technologies. All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#include "ompi_config.h" - -#include - -#include "bcol_iboffload_collreq.h" -#include "bcol_iboffload_collfrag.h" - -static void -collfrag_constructor(struct mca_bcol_iboffload_collfrag_t *collfrag) -{ - collfrag->n_sends = 0; - collfrag->n_sends_completed = 0; - - memset(collfrag->pre_posted_recvs, 0, - sizeof(struct mca_bcol_iboffload_task_t *) * MAX_MQE_TASKS); - - collfrag->signal_task_wr_id = (uint64_t) 0; - collfrag->complete = false; - - collfrag->seq_n = -1; - collfrag->coll_full_req = NULL; - - collfrag->unpack_size = 0; - - collfrag->tasks_posted = 0; - collfrag->to_post = NULL; - collfrag->task_next = NULL; - collfrag->tasks_to_release = NULL; - - collfrag->in_pending_list = false; -} - -static void -collfrag_destruct(struct mca_bcol_iboffload_collfrag_t *collfrag) -{ -} - -OBJ_CLASS_INSTANCE(mca_bcol_iboffload_collfrag_t, - ompi_free_list_item_t, - collfrag_constructor, - collfrag_destruct); diff --git a/ompi/mca/bcol/iboffload/bcol_iboffload_collfrag.h b/ompi/mca/bcol/iboffload/bcol_iboffload_collfrag.h deleted file mode 100644 index 3be53aacaac..00000000000 --- a/ompi/mca/bcol/iboffload/bcol_iboffload_collfrag.h +++ /dev/null @@ -1,144 +0,0 @@ -/* - * Copyright (c) 2009-2012 Oak Ridge National Laboratory. All rights reserved. - * Copyright (c) 2009-2012 Mellanox Technologies. All rights reserved. - * Copyright (c) 2013 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#ifndef MCA_BCOL_IBOFFLOAD_COLLFRAG_H -#define MCA_BCOL_IBOFFLOAD_COLLFRAG_H - -#include "ompi_config.h" - -#include -#include -#include - -#include "bcol_iboffload.h" - -#include "opal/class/ompi_free_list.h" - -BEGIN_C_DECLS - -#define MAX_MQE_TASKS 128 /* Pasha - do we want to make it dynamic ?*/ - -struct mca_bcol_iboffload_task_t; -struct mca_bcol_iboffload_collreq_t; - -/* collective fragment descriptor */ -struct mca_bcol_iboffload_collfrag_t { - ompi_free_list_item_t super; - - /* number of asynchronous sends scheduled */ - uint32_t n_sends; - - /* number of sends completed */ - uint32_t n_sends_completed; - - /* Algorithm ID that was user for this fragment*/ - int32_t alg; - - /* pre-posted receive sources */ - struct mca_bcol_iboffload_task_t *pre_posted_recvs[MAX_MQE_TASKS]; - - /* cache here pointer to signaled task */ - uint64_t signal_task_wr_id; - - /* mwr completion from the mcq */ - volatile bool complete; - - /* sequence number - we use it for - correct ordering of resources release */ - uint32_t seq_n; - - /* pointer to the full collective request descriptor */ - struct mca_bcol_iboffload_collreq_t *coll_full_req; - - size_t unpack_size; - - bool in_pending_list; - - /* Num of posted tasks */ - int tasks_posted; - - /* Pointer to head of not posted elements list */ - struct mqe_task *to_post; - - /* Pointer to tail next */ - struct mqe_task **tail_next; - - /* List of the all tasks of this coll frag */ - struct mca_bcol_iboffload_task_t *tasks_to_release; - - /* Pointer to the next elem in All tasks list */ - struct mca_bcol_iboffload_task_t **task_next; - - /* Num of needed mq credits */ - int mq_credits; - - /* MQ index, that used for this frag */ - int mq_index; - - /* - * Last wait sequence number; zero i.e. - * there isn't any wait in the coll request - */ - int32_t last_wait_num; - /* fragment descriptor for non contiguous data */ - bcol_fragment_descriptor_t *bcol_frag_info; - /* frag-len of ml buffer */ - int frag_len; -}; -typedef struct mca_bcol_iboffload_collfrag_t mca_bcol_iboffload_collfrag_t; -OBJ_CLASS_DECLARATION(mca_bcol_iboffload_collfrag_t); - -static inline __opal_attribute_always_inline__ - void mca_bcol_iboffload_collfrag_init( - mca_bcol_iboffload_collfrag_t *cf) -{ - /* init the request */ - cf->n_sends = 0; - cf->complete = false; - cf->n_sends_completed = 0; - cf->alg = -1; - cf->in_pending_list = false; - cf->tail_next = NULL; - cf->tasks_posted = 0; - cf->to_post = NULL; - cf->mq_credits = 0; - cf->mq_index = 0; - cf->tasks_to_release = NULL; - cf->task_next = &cf->tasks_to_release; - cf->last_wait_num = 0; -} - -static inline __opal_attribute_always_inline__ - struct mca_bcol_iboffload_collfrag_t * - mca_bcol_iboffload_get_collfrag(void) -{ - ompi_free_list_item_t *item; - mca_bcol_iboffload_collfrag_t *cf; - mca_bcol_iboffload_component_t *cm = &mca_bcol_iboffload_component; - - /* blocking allocation for collectives fragment */ - OMPI_FREE_LIST_GET_MT(&cm->collfrags_free, item); - if (OPAL_UNLIKELY(NULL == item)) { - IBOFFLOAD_ERROR(("Failed to allocated collfrag.\n")); - return NULL; - } - - cf = (mca_bcol_iboffload_collfrag_t*) item; - mca_bcol_iboffload_collfrag_init(cf); - - return cf; -} - -END_C_DECLS - -#endif diff --git a/ompi/mca/bcol/iboffload/bcol_iboffload_collreq.c b/ompi/mca/bcol/iboffload/bcol_iboffload_collreq.c deleted file mode 100644 index bae677bb510..00000000000 --- a/ompi/mca/bcol/iboffload/bcol_iboffload_collreq.c +++ /dev/null @@ -1,50 +0,0 @@ -/* - * Copyright (c) 2009-2012 Oak Ridge National Laboratory. All rights reserved. - * Copyright (c) 2009-2012 Mellanox Technologies. All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#include "ompi_config.h" - -#include "bcol_iboffload_collreq.h" - -static void -collreq_construct(struct mca_bcol_iboffload_collreq_t *collreq) -{ - int i; - collreq->n_fragments = 0; - collreq->n_frag_mpi_complete = 0; - collreq->n_frag_net_complete = 0; - collreq->user_handle_freed = false; - - for (i = 0; i < BCOL_IBOFFLOAD_BUFFERS; i++) { - collreq->buffer_info[i].buf = NULL; - collreq->buffer_info[i].offset = 0; - collreq->buffer_info[i].iboffload_reg = NULL; - } - - OBJ_CONSTRUCT(&collreq->work_requests, opal_list_t); - OBJ_CONSTRUCT(&collreq->first_collfrag, mca_bcol_iboffload_collfrag_t); - - OBJ_CONSTRUCT(&collreq->send_convertor, opal_convertor_t); - OBJ_CONSTRUCT(&collreq->recv_convertor, opal_convertor_t); -} - -static void -collreq_destruct(struct mca_bcol_iboffload_collreq_t *collreq) -{ - OBJ_DESTRUCT(&collreq->work_requests); - OBJ_DESTRUCT(&collreq->first_collfrag); - - OBJ_DESTRUCT(&collreq->send_convertor); - OBJ_DESTRUCT(&collreq->recv_convertor); -} - -OBJ_CLASS_INSTANCE(mca_bcol_iboffload_collreq_t, - ompi_request_t, - collreq_construct, - collreq_destruct); diff --git a/ompi/mca/bcol/iboffload/bcol_iboffload_collreq.h b/ompi/mca/bcol/iboffload/bcol_iboffload_collreq.h deleted file mode 100644 index 31344009d3b..00000000000 --- a/ompi/mca/bcol/iboffload/bcol_iboffload_collreq.h +++ /dev/null @@ -1,273 +0,0 @@ -/* - * Copyright (c) 2009-2012 Oak Ridge National Laboratory. All rights reserved. - * Copyright (c) 2009-2012 Mellanox Technologies. All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#ifndef MCA_BCOL_IBOFFLOAD_COLLREQ_H -#define MCA_BCOL_IBOFFLOAD_COLLREQ_H - -#include "ompi_config.h" - -#include -#include -#include - -#include "opal/class/ompi_free_list.h" - -#include "bcol_iboffload.h" -#include "bcol_iboffload_device.h" -#include "bcol_iboffload_collfrag.h" - -#define SBUF 0 -#define RBUF 1 - -#define BCOL_IBOFFLOAD_BUFFERS 2 - -BEGIN_C_DECLS - -struct mca_bcol_iboffload_reg_t; - -/* - * collective progress function - */ -typedef int (*collective_message_progress_function)( - struct mca_bcol_iboffload_module_t *iboffload, - struct mca_bcol_iboffload_collreq_t *full_message_descriptor); -/* - * callback function to be called after the collective work request - * completes. This is invoked in user-space, and is typically where - * data may be copied out of library buffers, or when any other user- - * level protocol may be completed - * - * input: - * callback data: typically, this may be the work request just finished - */ -typedef int (*collective_message_completion_callback_function)( - void *callback_data); - -struct mca_bcol_iboffload_buff_info { - void *buf; - size_t offset; - uint32_t lkey; - struct mca_bcol_iboffload_reg_t *iboffload_reg; -}; -typedef struct mca_bcol_iboffload_buff_info mca_bcol_iboffload_buff_info; - -/* - * Collective message descriptor - * the mca_bcol_iboffload_message_desc_t was replaced with mca_bcol_iboffload_collreq_t - * ************************************************************************************************* - * - * Brief description of iboffload collective request dependencies: - * - * mca_bcol_iboffload_collreq_t <----<< Full coll request - * | - * --(0)-- mca_bcol_iboffload_collfrag_t <----<< Fragment of coll request ( for example - * | | 10MB Bcast maybe split to 2MB fragments ) - * | | - * | --(0)-- mca_bcol_iboffload_task_t---mqe_task - * | | | - * | | ---mca_bcol_iboffload_frag_t---ibv_sge - * | --(1)-- mca_bcol_iboffload_task_t---mqe_task - * | | | - * | | ---mca_bcol_iboffload_frag_t---ibv_sge - * | ..(M).. - * | - * --(1)-- mca_bcol_iboffload_collfrag_t - * | - * ..(N).. - * - * ************************************************************************************************* - */ - -struct mca_bcol_iboffload_collreq_t { - ompi_request_t super; - - /* op type */ - struct ompi_op_t *op; - - /* Sometimes the operation that should be performed - by the IB is different than the mpi_op and is then set - by the pack_data_for_calc function */ - enum ibv_m_wr_calc_op actual_ib_op; - - /* Sometimes the data type that should be used by the IB - to peroform the calc s different than the mpi dtype, - and is then set by the pack_data_for_calc function */ - enum ibv_m_wr_data_type actual_ib_dtype; - - /* data type */ - struct ompi_datatype_t *dtype; - - /* convertor for send operation */ - opal_convertor_t send_conv; - - /* convertor for recv operation */ - opal_convertor_t recv_conv; - - /* - * count (in data type units) - */ - uint64_t count; - - /* - * root of collective operation - */ - int root; - - /* number of message fragments */ - int n_fragments; - - /* number of fragments sent - all resrouces for a fragment are allocated - * or none at all are - */ - int n_frags_sent; - - /* number of fragments completed from the MPI perspective */ - int n_frag_mpi_complete; - - /* number of fragments completed from a network perspective */ - int n_frag_net_complete; - - /* collective free and may be released - message complete from the - ** MPI perspective, the network prespective, and the user is done - ** with the message handle */ - volatile bool user_handle_freed; - - /* list of collective fragements - only 1 for now */ - opal_list_t work_requests; - - /* message progress function */ - collective_message_progress_function progress_fn; - - /* work request completion callback function */ - collective_message_completion_callback_function completion_cb_fn; - - /* index of qp with enough length of buffs for this collective */ - int qp_index; - - bool if_bcol_last; - - /* The flag is used for the last bcol to indicate if the calculation should be done by the cpu */ - bool do_calc_in_cpu; - - /* in Allreduce case, if (true == do_calc_in_cpu) => - the final result will be calc on local CPU */ - uint64_t l_operand; - uint64_t r_operand; - - /* caching ML-rdma buffer descriptor */ - mca_bcol_iboffload_rdma_buffer_desc_t *ml_rdma_desc; - - /* ML buffer index code */ - int ml_buffer_index; - - /* In the current implementation the collrequest connected to 1 single - iboffload module */ - struct mca_bcol_iboffload_module_t *module; - - mca_bcol_iboffload_collfrag_t first_collfrag; - - /* Send/recv buffs info - user buffers registration if needed etc. */ - mca_bcol_iboffload_buff_info buffer_info[BCOL_IBOFFLOAD_BUFFERS]; - - /* My bi nominal tree children in this collective */ - int *bi_nominal_tree_children; - - /* Convertors for send/recv if needed */ - opal_convertor_t send_convertor; - opal_convertor_t recv_convertor; - - /* Order info from upper layer */ - mca_bcol_base_order_info_t *order_info; -}; -typedef struct mca_bcol_iboffload_collreq_t mca_bcol_iboffload_collreq_t; -OBJ_CLASS_DECLARATION(mca_bcol_iboffload_collreq_t); - -#define COLLREQ_IS_DONE(cr) (cr->user_handle_freed && \ - (cr->n_frag_mpi_complete == cr->n_fragments) && \ - (cr->n_frag_net_complete == cr->n_fragments)) - -#define RELEASE_COLLREQ(cr) \ -do { \ - (cr)->user_handle_freed = false; \ - OMPI_FREE_LIST_RETURN_MT(&mca_bcol_iboffload_component.collreqs_free, \ - (ompi_free_list_item_t *) (cr)); \ -} while (0) - -static inline __opal_attribute_always_inline__ - int mca_bcol_iboffload_free_resources_and_move_to_pending( - mca_bcol_iboffload_collfrag_t *coll_fragment, - mca_bcol_iboffload_module_t *iboffload) -{ - int rc = mca_bcol_iboffload_free_tasks_frags_resources(coll_fragment, - iboffload->device->frags_free); - - if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) { - return rc; - } - - IBOFFLOAD_VERBOSE(10, ("iboffload - %p, coll_fragment - %p, " - "coll frag in_pending_list ? - %d, pending_list size - %d.\n", - iboffload, coll_fragment, coll_fragment->in_pending_list, - opal_list_get_size(&iboffload->collfrag_pending))); - - BCOL_IBOFFLOAD_MQ_RETURN_CREDITS(iboffload, coll_fragment->mq_index, coll_fragment->mq_credits); - - /* Remove coll frag from coll request opal_list */ - opal_list_remove_item(&coll_fragment->coll_full_req->work_requests, - (opal_list_item_t *) coll_fragment); - - if (false == coll_fragment->in_pending_list) { - /* Put the collfrag on pending list */ - coll_fragment->in_pending_list = true; - opal_list_append(&iboffload->collfrag_pending, - (opal_list_item_t *) coll_fragment); - } else { - /* The item is already on pending list => - insert it first that not break order - between frags on the list */ - opal_list_prepend(&iboffload->collfrag_pending, - (opal_list_item_t *) coll_fragment); - } - - return OMPI_SUCCESS; -} - -/* Forward declaration */ -struct mca_bcol_iboffload_reg_t; -static inline __opal_attribute_always_inline__ - int mca_bcol_iboffload_prepare_buffer( - void *buffer, - size_t size, - struct mca_bcol_iboffload_reg_t **registration_handler, - mca_bcol_iboffload_module_t *iboffload) -{ - int rc; - mca_mpool_base_registration_t *reg = NULL; - - assert(size > 0); - rc = iboffload->device->mpool->mpool_register( - iboffload->device->mpool, - buffer, size, - (uint32_t) 0 /* flags */, - ®); - - *registration_handler = - (struct mca_bcol_iboffload_reg_t *) reg; - - return rc; -} - -int mca_bcol_iboffload_coll_req_implement( - mca_bcol_iboffload_module_t *iboffload, - mca_bcol_iboffload_collreq_t *coll_request); - -END_C_DECLS - -#endif diff --git a/ompi/mca/bcol/iboffload/bcol_iboffload_component.c b/ompi/mca/bcol/iboffload/bcol_iboffload_component.c deleted file mode 100644 index dbf3f6c21fe..00000000000 --- a/ompi/mca/bcol/iboffload/bcol_iboffload_component.c +++ /dev/null @@ -1,1076 +0,0 @@ -/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ -/* - * Copyright (c) 2009-2012 Oak Ridge National Laboratory. All rights reserved. - * Copyright (c) 2009-2012 Mellanox Technologies. All rights reserved. - * Copyright (c) 2012 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2014 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2014 Research Organization for Information Science - * and Technology (RIST). All rights reserved. - * Copyright (c) 2015 Los Alamos National Security, LLC. All rights - * reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#include "ompi_config.h" - -#include -#include -#include -#include - -#include - -#include "ompi/constants.h" -#include "ompi/mca/bcol/bcol.h" -#include "ompi/mca/bcol/base/base.h" -#include "opal/mca/mpool/base/base.h" -#include "ompi/mca/common/ofacm/connect.h" -#include "ompi/communicator/communicator.h" -#include "ompi/mca/common/ofacm/base.h" -#include "ompi/mca/common/verbs/common_verbs.h" - -#include "opal/util/argv.h" -#include "opal/include/opal/types.h" - -#include "bcol_iboffload_mca.h" -#include "bcol_iboffload_frag.h" -#include "bcol_iboffload_task.h" -#include "bcol_iboffload_device.h" -#include "bcol_iboffload_qp_info.h" -#include "bcol_iboffload_collreq.h" -#include "bcol_iboffload_collfrag.h" - -/* - * Public string showing the bcol ompi_sm V2 component version number - */ -const char *mca_bcol_iboffload_component_version_string = - "Open MPI bcol - iboffload collective MCA component version " OMPI_VERSION; - -/* - * Local functions - */ - -static int setup_qps(void); -static int iboffload_open(void); -static int iboffload_close(void); - -#define GET_IB_DTYPE_BY_CTYPE(ctype, is_int, ib_dtype) \ -do { \ - switch (sizeof(ctype)) { \ - case 1: \ - ib_dtype = ((is_int) ? IBV_M_DATA_TYPE_INT8 : IBV_M_DATA_TYPE_INVALID); \ - break; \ - case 2: \ - ib_dtype = ((is_int) ? IBV_M_DATA_TYPE_INT16 : IBV_M_DATA_TYPE_INVALID); \ - break; \ - case 4: \ - ib_dtype = ((is_int) ? IBV_M_DATA_TYPE_INT32 : IBV_M_DATA_TYPE_FLOAT32); \ - break; \ - case 8: \ - ib_dtype = ((is_int) ? IBV_M_DATA_TYPE_INT64 : IBV_M_DATA_TYPE_FLOAT64); \ - break; \ - default: \ - ib_dtype = IBV_M_DATA_TYPE_INVALID; \ - } \ -} while (0) - -/* - * Instantiate the public struct with all of our public information - * and pointers to our public functions in it - */ -mca_bcol_iboffload_component_t mca_bcol_iboffload_component = { - - /* First, fill in the super */ - - .super = { - /* First, the mca_component_t struct containing meta - information about the component itself */ - - .bcol_version = { - MCA_BCOL_BASE_VERSION_2_0_0, - - /* Component name and version */ - - .mca_component_name = "iboffload", - MCA_BASE_MAKE_VERSION(component, OMPI_MAJOR_VERSION, OMPI_MINOR_VERSION, - OMPI_RELEASE_VERSION), - - /* Component open and close functions */ - - .mca_open_component = iboffload_open, - .mca_close_component = iboffload_close, - .mca_register_component_params = mca_bcol_iboffload_register_params, - }, - - .collm_init_query = mca_bcol_iboffload_init_query, - .collm_comm_query = mca_bcol_iboffload_comm_query, - .coll_support = mca_bcol_iboffload_coll_supported, - .coll_support_all_types = mca_bcol_iboffload_coll_support_all_types, - .init_done = false, - .need_ordering = true, /* collective calls with iboffload should to be ordered */ - }, - /* iboffload-component specifc information */ - .verbose = 0, /* verbose */ - .num_qps = 0, /* number of qps to use */ - .warn_default_gid_prefix = false, /* warn_default_gid_prefix */ - .warn_nonexistent_if = false, /* warn_nonexistent_if */ - .free_list_num = 0, /* free_list_num */ - .free_list_max = 0, /* free_list_max */ - .free_list_inc = 0, /* free_list_inc */ - .mpool_name = NULL, /* mpool_name */ - .cq_size = 0, /* cq_size */ - .max_inline_data = 0, /* max_inline_data */ - .pkey_val = 0, /* pkey_val */ - .qp_ous_rd_atom = 0, /* qp_ous_rd_atom */ - .mtu = 0, /* mtu */ - .min_rnr_timer = 0, /* min_rnr_timer */ - .timeout = 0, /* timeout */ - .retry_count = 0, /* retry_count */ - .rnr_retry = 0, /* rnr_retry */ - .max_rdma_dst_ops = 0, /* max_rdma_dst_ops */ - .service_level = 0, /* service_level */ - .buffer_alignment = 0, /* buffer_alignment */ - .max_mqe_tasks = 0, /* max_mqe_tasks */ - .max_mq_size = 0, /* max_mq_size */ - .if_include = NULL, /* if_include */ - .if_include_list = NULL, /* if_include_list */ - .if_exclude = NULL, /* if_exclude */ - .if_exclude_list = NULL, /* if_exclude_list */ - .if_list = NULL, /* if_list */ - .ib_devs = NULL, /* ib_devs */ - .num_devs = 0, /* num_devs */ - .receive_queues = NULL, /* receive_queues */ -}; - -static int mca_bcol_iboffload_dummy_init_query( - bool enable_progress_threads, bool enable_mpi_threads) -{ - return OMPI_SUCCESS; -} - -static void mca_bcol_iboffload_device_constructor - (mca_bcol_iboffload_device_t *device) -{ - /* Init OFACM stuf */ - device->dev.ib_dev = NULL; - device->dev.ib_dev_context = NULL; - device->dev.capabilities = 0; - /* device->dev.type = MCA_COMMON_OFACM_COLL;*/ - /* Init other stuff */ - device->ib_pd = NULL; - device->ib_cq = NULL; - device->ports = NULL; - - device->mpool = NULL; - device->ib_mq_cq = NULL; - device->frags_free = NULL; - - device->activated = false; - device->num_act_ports = 0; - - memset(&device->ib_dev_attr, 0, sizeof(struct ibv_device_attr)); - memset(&device->dummy_reg, 0, sizeof( mca_bcol_iboffload_reg_t)); -} - -static void mca_bcol_iboffload_device_destructor - (mca_bcol_iboffload_device_t *device) -{ - int qp_index, num_qps = mca_bcol_iboffload_component.num_qps; - - IBOFFLOAD_VERBOSE(10, ("Device %s will be destroyed.\n", - ibv_get_device_name(device->dev.ib_dev))); - - if (NULL != device->frags_free) { - for (qp_index = 0; qp_index < num_qps; ++qp_index) { - mca_bcol_iboffload_dealloc_qps_resource_fn_t dealloc_resource = - mca_bcol_iboffload_component.qp_infos[qp_index].dealloc_resource; - if (NULL != dealloc_resource) { - dealloc_resource(qp_index, device); - } - } - - free(device->frags_free); - } - - if (NULL != device->mpool) { - IBOFFLOAD_VERBOSE(10, ("Mpool destroy - %p.\n", device->mpool)); - if (OMPI_SUCCESS != mca_mpool_base_module_destroy(device->mpool)) { - IBOFFLOAD_ERROR(("Device %s, failed to destroy mpool", - ibv_get_device_name(device->dev.ib_dev))); - } - } - - if (NULL != device->dummy_reg.mr) { - IBOFFLOAD_VERBOSE(10, ("Dummy memory MR unregister - %p.\n", device->dummy_reg.mr)); - if (OMPI_SUCCESS != - mca_bcol_iboffload_deregister_mr((void *) device, &device->dummy_reg.base)) { - IBOFFLOAD_ERROR(("Device %s: failed to unregister dummy memory MR.", - ibv_get_device_name(device->dev.ib_dev))); - } - } - - if (NULL != device->ib_cq) { - if (ibv_destroy_cq(device->ib_cq)) { - IBOFFLOAD_ERROR(("Device %s, failed to destroy CQ, errno says %s", - ibv_get_device_name(device->dev.ib_dev), strerror(errno))); - } - } - - if (NULL != device->ib_mq_cq) { - if (ibv_destroy_cq(device->ib_mq_cq)) { - IBOFFLOAD_ERROR(("Device %s, failed to destroy mq CQ, errno says %s", - ibv_get_device_name(device->dev.ib_dev), strerror(errno))); - } - } - - /* Release IB PD if we have one */ - if (NULL != device->ib_pd) { - if(ibv_dealloc_pd(device->ib_pd)){ - IBOFFLOAD_ERROR(("Device %s, failed to release PD, errno says %s", - ibv_get_device_name(device->dev.ib_dev), strerror(errno))); - } - } - - /* close the device */ - if (NULL != device->dev.ib_dev_context) { - if (ibv_close_device(device->dev.ib_dev_context)) { - IBOFFLOAD_ERROR(("Device %s " - ", failed to close the device, errno says %s", - ibv_get_device_name(device->dev.ib_dev), strerror(errno))); - } - } - - /* release memory */ - if (NULL != device->ports) { - free(device->ports); - } -} - -OBJ_CLASS_INSTANCE(mca_bcol_iboffload_device_t, - opal_list_item_t, - mca_bcol_iboffload_device_constructor, - mca_bcol_iboffload_device_destructor); - -int mca_bcol_iboffload_coll_supported(int op, int dtype, bcol_elem_type elem_type) -{ - mca_bcol_iboffload_component_t *cm = &mca_bcol_iboffload_component; - - return (IBV_M_DATA_TYPE_INVALID != cm->map_ompi_to_ib_dt[dtype]) && - (IBV_M_CALC_OP_INVALID != cm->map_ompi_to_ib_calcs[op]) && - (BCOL_SINGLE_ELEM_TYPE == elem_type); -} - -int mca_bcol_iboffload_coll_support_all_types(bcol_coll coll_name) -{ - return BCOL_ALLREDUCE ^ coll_name; -} - -/* Unload devices */ -static int iboffload_release_devices(void) -{ - int i; - mca_bcol_iboffload_device_t *device = NULL; - - mca_bcol_iboffload_component_t *cm = &mca_bcol_iboffload_component; - opal_pointer_array_t *devs = &cm->devices; - - IBOFFLOAD_VERBOSE(10, ("Destroy all devices.\n")); - - for (i = 0; i < cm->num_devs; i++) { - device = opal_pointer_array_get_item(devs, i); - - IBOFFLOAD_VERBOSE(10, ("Device %s with index %d will be destroyed.\n", - ibv_get_device_name(device->dev.ib_dev), i)); - if (NULL != device) { - OBJ_RELEASE(device); - } - } - - IBOFFLOAD_VERBOSE(10, ("All devices were destroyed.\n")); - - opal_pointer_array_remove_all(devs); - OBJ_DESTRUCT(devs); - - /* release device list */ - /*ibv_free_device_list_compat(cm->ib_devs);*/ - ompi_ibv_free_device_list(cm->ib_devs); - cm->ib_devs = NULL; - - IBOFFLOAD_VERBOSE(10, ("All devices destroyed.\n")); - - return OMPI_SUCCESS; -} - -/* Create list of IB HCA that have active port */ -static int iboffload_load_devices(void) -{ - int num_devs = 0, i; - mca_bcol_iboffload_device_t *device = NULL; - mca_bcol_iboffload_component_t *cm = &mca_bcol_iboffload_component; - - IBOFFLOAD_VERBOSE(10, ("Entering to iboffload_load_devices")); - - /* Get list of devices */ - /*cm->ib_devs = ibv_get_device_list_compat(&num_devs);*/ - cm->ib_devs = ompi_ibv_get_device_list(&num_devs); - if (0 == num_devs || NULL == cm->ib_devs) { - IBOFFLOAD_ERROR(("No IB devices found")); - /* No hca error*/ - opal_show_help("help-mpi-btl-base.txt", "btl:no-nics", true); - return OMPI_ERROR; - } - - cm->num_devs = num_devs; - - for (i = 0; i < num_devs; i++) { - device = OBJ_NEW(mca_bcol_iboffload_device_t); - if (NULL != device) { - opal_pointer_array_set_item(&cm->devices, i, (void *) device); - device->dev.ib_dev = cm->ib_devs[i]; - - IBOFFLOAD_VERBOSE(10, ("Device %s with index %d was appended.\n", - ibv_get_device_name(device->dev.ib_dev), i)); - } - } - - if (0 == opal_pointer_array_get_size(&cm->devices)) { - /* No relevand devices were found, return error */ - IBOFFLOAD_ERROR(("No active devices found.\n")); - - return OMPI_ERROR; - } - - return OMPI_SUCCESS; -} - -static void map_ompi_to_ib_dtype(void) -{ - int dt; - mca_bcol_iboffload_component_t *cm = &mca_bcol_iboffload_component; - - for (dt = 0; dt < OMPI_DATATYPE_MAX_PREDEFINED; ++dt) { - cm->map_ompi_to_ib_dt[dt] = IBV_M_DATA_TYPE_INVALID; - } - - GET_IB_DTYPE_BY_CTYPE(char, true, cm->map_ompi_to_ib_dt[OMPI_DATATYPE_MPI_SIGNED_CHAR]); - - GET_IB_DTYPE_BY_CTYPE(short, true, cm->map_ompi_to_ib_dt[OMPI_DATATYPE_MPI_SHORT]); - GET_IB_DTYPE_BY_CTYPE(int, true, cm->map_ompi_to_ib_dt[OMPI_DATATYPE_MPI_INT]); - GET_IB_DTYPE_BY_CTYPE(long, true, cm->map_ompi_to_ib_dt[OMPI_DATATYPE_MPI_LONG]); - GET_IB_DTYPE_BY_CTYPE(long long, true, cm->map_ompi_to_ib_dt[OMPI_DATATYPE_MPI_LONG_LONG]); - GET_IB_DTYPE_BY_CTYPE(float, false, cm->map_ompi_to_ib_dt[OMPI_DATATYPE_MPI_FLOAT]); - GET_IB_DTYPE_BY_CTYPE(double, false, cm->map_ompi_to_ib_dt[OMPI_DATATYPE_MPI_DOUBLE]); - - /* Check (only in DEBUG mode) if size of double equal to 64 bit */ - assert(8 == sizeof(double)); -} - -static void map_ompi_to_ib_op_type(void) -{ - int op; - mca_bcol_iboffload_component_t *cm = &mca_bcol_iboffload_component; - - for (op = 0; op < OMPI_OP_NUM_OF_TYPES; ++op) { - cm->map_ompi_to_ib_calcs[op] = IBV_M_CALC_OP_INVALID; - } - - cm->map_ompi_to_ib_calcs[OMPI_OP_MAX] = IBV_M_CALC_OP_MAX; - cm->map_ompi_to_ib_calcs[OMPI_OP_MIN] = IBV_M_CALC_OP_MIN; - cm->map_ompi_to_ib_calcs[OMPI_OP_SUM] = IBV_M_CALC_OP_ADD; - - cm->map_ompi_to_ib_calcs[OMPI_OP_LAND] = IBV_M_CALC_OP_LAND; - cm->map_ompi_to_ib_calcs[OMPI_OP_BAND] = IBV_M_CALC_OP_BAND; - cm->map_ompi_to_ib_calcs[OMPI_OP_LOR] = IBV_M_CALC_OP_LOR; - cm->map_ompi_to_ib_calcs[OMPI_OP_BOR] = IBV_M_CALC_OP_BOR; - cm->map_ompi_to_ib_calcs[OMPI_OP_LXOR] = IBV_M_CALC_OP_LXOR; - cm->map_ompi_to_ib_calcs[OMPI_OP_BXOR] = IBV_M_CALC_OP_BXOR; -} - -/* - * Open the component - */ -static int iboffload_open(void) -{ - int rc; - - /* local variables */ - mca_bcol_iboffload_component_t *cm = &mca_bcol_iboffload_component; - - IBOFFLOAD_VERBOSE(10, ("Open Iboffload component.\n")); - - (void) mca_bcol_iboffload_verify_params(); - - cm->super.priority = 100; - cm->super.n_net_contexts = 0; - cm->super.network_contexts = NULL; - - OBJ_CONSTRUCT(&cm->recv_wrs.lock, opal_mutex_t); - - /* construct lists */ - OBJ_CONSTRUCT(&cm->devices, opal_pointer_array_t); - rc = opal_pointer_array_init(&cm->devices, 10, INT_MAX, 10); - if (OMPI_SUCCESS != rc) { - goto close_device; - } - - /* Check MCA parameters */ - if (0 != (mca_bcol_iboffload_component.exchange_tree_order & (mca_bcol_iboffload_component.exchange_tree_order - 1))) { - IBOFFLOAD_ERROR(("Warning: ibcol_iboffload_exchange_tree_order is %d which is not a power of 2, setting it to 2", - mca_bcol_iboffload_component.exchange_tree_order)); - mca_bcol_iboffload_component.exchange_tree_order = 2; - } - - /* Pasha: Since we do not have max inline check like in openib, - I will put some dummy check here. All mlnx devices support at least 512b */ - if (mca_bcol_iboffload_component.max_inline_data > 512) { - IBOFFLOAD_ERROR(("Warning the inline %d, is to big and unsupported", - mca_bcol_iboffload_component.max_inline_data)); - rc = OMPI_ERROR; - goto close_device; - } - - /* Register the progress function */ - rc = opal_progress_register(mca_bcol_iboffload_component_progress); - if (OMPI_SUCCESS != rc) { - IBOFFLOAD_ERROR(("Failed to register the progress function" - " for iboffload component.\n")); - goto close_device; - } - - map_ompi_to_ib_dtype(); - map_ompi_to_ib_op_type(); - - /* The init_done set to true on first component usage */ - cm->init_done = false; - - return OMPI_SUCCESS; - -close_device: - OBJ_DESTRUCT(&cm->devices); - OBJ_DESTRUCT(&cm->recv_wrs.lock); - return rc; -} - -/* - * Close the component - */ -static int iboffload_close(void) -{ - int rc; - - mca_bcol_iboffload_component_t *cm = &mca_bcol_iboffload_component; - - IBOFFLOAD_VERBOSE(10, ("Destroy component free lists.\n")); - - if (true == cm->init_done) { - OBJ_DESTRUCT(&cm->tasks_free); - OBJ_DESTRUCT(&cm->collreqs_free); - OBJ_DESTRUCT(&cm->collfrags_free); - OBJ_DESTRUCT(&cm->calc_tasks_free); - } - - /* Unregister the progress function */ - rc = opal_progress_unregister(mca_bcol_iboffload_component_progress); - if (OMPI_SUCCESS != rc) { - IBOFFLOAD_ERROR(("Failed to unregister the progress function" - " for iboffload component.\n")); - } - - rc = iboffload_release_devices(); - if (OMPI_SUCCESS != rc) { - return rc; - } - - if (NULL != cm->receive_queues) { - free(cm->receive_queues); - } - - OBJ_DESTRUCT(&cm->recv_wrs.lock); - - IBOFFLOAD_VERBOSE(10, ("The component closed.\n")); - - return OMPI_SUCCESS; -} - -/* query to see if the component is available for use, and can - * satisfy the thread and progress requirements - */ -int mca_bcol_iboffload_init_query(bool enable_progress_threads, - bool enable_mpi_threads) -{ - int rc; - mca_bcol_iboffload_component_t *cm = &mca_bcol_iboffload_component; - - IBOFFLOAD_VERBOSE(10, ("Init Iboffload component.\n")); - - /* Get list of HCAs and ports */ - rc = iboffload_load_devices(); - if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) { - IBOFFLOAD_ERROR(("Load devices error.\n")); - goto unload_devices; - } - - /* Setup the BSRQ QP's based on the final value of - mca_bcol_iboffload_component.receive_queues. */ - rc = setup_qps(); - if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) { - IBOFFLOAD_ERROR(("QPs setup error.\n")); - goto unload_devices; - } - - cm->super.collm_init_query = mca_bcol_iboffload_dummy_init_query; - - return OMPI_SUCCESS; - - /* done */ -unload_devices: - IBOFFLOAD_ERROR(("Release devices: an error occured.\n")); - - iboffload_release_devices(); - - return rc; -} - -static int32_t atoi_param(char *param, int32_t dflt) -{ - if (NULL == param || '\0' == param[0]) { - return dflt ? dflt : 1; - } - - return atoi(param); -} - -static int setup_qps(void) -{ - int ret = OMPI_SUCCESS, qp = 0; - int rd_num = 0, rd_low = 0, size = 0, - rd_win = 0, rd_rsv = 0, sd_max = 0; - - mca_bcol_iboffload_qp_type_t type = 0; - - char **queues = NULL, **params = NULL; - - queues = opal_argv_split(mca_bcol_iboffload_component.receive_queues, ':'); - if (0 == opal_argv_count(queues)) { - opal_show_help("help-mpi-btl-openib.txt", - "no qps in receive_queues", true, - ompi_process_info.nodename, - mca_bcol_iboffload_component.receive_queues); - - ret = OMPI_ERROR; - - goto exit; - } - - while (queues[qp] != NULL) { - if (0 == strncmp("P,", queues[qp], 2)) { - type = MCA_BCOL_IBOFFLOAD_PP_QP; - } else if (0 == strncmp("S,", queues[qp], 2)) { - type = MCA_BCOL_IBOFFLOAD_SRQ_QP; - } else if (0 == strncmp("X,", queues[qp], 2)) { -#if HAVE_XRC - type = MCA_BCOL_IBOFFLOAD_XRC_QP; -#else - opal_show_help("help-mpi-btl-openib.txt", "No XRC support", true, - ompi_process_info.nodename, - mca_bcol_iboffload_component.receive_queues); - ret = OMPI_ERR_NOT_AVAILABLE; - goto exit; -#endif - } else { - opal_show_help("help-mpi-btl-openib.txt", - "invalid qp type in receive_queues", true, - ompi_process_info.nodename, - mca_bcol_iboffload_component.receive_queues, - queues[qp]); - - ret = OMPI_ERR_BAD_PARAM; - - goto exit; - } - - ++qp; - } - - mca_bcol_iboffload_component.num_qps = MCA_BCOL_IBOFFLOAD_QP_LAST; - - qp = 0; -#define P(N) (((N) > count) ? NULL : params[(N)]) - while (NULL != queues[qp]) { - int count; - - params = opal_argv_split_with_empty(queues[qp], ','); - count = opal_argv_count(params); - - if ('P' == params[0][0]) { - if (count < 3 || count > 6) { - opal_show_help("help-mpi-btl-openib.txt", - "invalid pp qp specification", true, - ompi_process_info.nodename, queues[qp]); - - ret = OMPI_ERR_BAD_PARAM; - - goto exit; - } - - size = atoi_param(P(1), 0); - - rd_num = atoi_param(P(2), 256); - - /* by default set rd_low to be 3/4 of rd_num */ - rd_low = atoi_param(P(3), rd_num - (rd_num / 4)); - rd_win = atoi_param(P(4), (rd_num - rd_low) * 2); - rd_rsv = atoi_param(P(5), (rd_num * 2) / rd_win); - - - if ((rd_num - rd_low) > rd_win) { - opal_show_help("help-mpi-btl-openib.txt", "non optimal rd_win", - true, rd_win, rd_num - rd_low); - } - } else { - if (count < 3 || count > 5) { - opal_show_help("help-mpi-btl-openib.txt", - "invalid srq specification", true, - ompi_process_info.nodename, queues[qp]); - - ret = OMPI_ERR_BAD_PARAM; - - goto exit; - } - - size = atoi_param(P(1), 0); - rd_num = atoi_param(P(2), 256); - - /* by default set rd_low to be 3/4 of rd_num */ - rd_low = atoi_param(P(3), rd_num - (rd_num / 4)); - sd_max = atoi_param(P(4), rd_low / 4); - - IBOFFLOAD_VERBOSE(10, ("srq: rd_num is %d rd_low is %d sd_max is %d", - rd_num, rd_low, sd_max)); - - } - - if (rd_num <= rd_low) { - opal_show_help("help-mpi-btl-openib.txt", "rd_num must be > rd_low", - true, ompi_process_info.nodename, queues[qp]); - ret = OMPI_ERR_BAD_PARAM; - - goto exit; - } - - opal_argv_free(params); - - ++qp; - } - - params = NULL; - - for (qp = 0; qp < MCA_BCOL_IBOFFLOAD_QP_LAST; ++qp) { - mca_bcol_iboffload_component.qp_infos[qp].qp_index = qp; - - mca_bcol_iboffload_component.qp_infos[qp].type = type; - mca_bcol_iboffload_component.qp_infos[qp].size = size; - - mca_bcol_iboffload_component.qp_infos[qp].rd_num = rd_num; - mca_bcol_iboffload_component.qp_infos[qp].rd_low = rd_low; - - mca_bcol_iboffload_component.qp_infos[qp].rd_pp_win = rd_num - rd_low; - - if (MCA_BCOL_IBOFFLOAD_PP_QP == type) { - mca_bcol_iboffload_component.qp_infos[qp].u.pp_qp.rd_win = rd_win; - mca_bcol_iboffload_component.qp_infos[qp].u.pp_qp.rd_rsv = rd_rsv; - } else { - mca_bcol_iboffload_component.qp_infos[qp].u.srq_qp.sd_max = sd_max; - } - - if (NULL != setup_qps_fn[qp]) { - setup_qps_fn[qp](&mca_bcol_iboffload_component.qp_infos[qp]); - } - } - -exit: - if (NULL != params) { - opal_argv_free(params); - } - - if (NULL != queues) { - opal_argv_free(queues); - } - - return ret; -} - -static int progress_pending_collfrags(mca_bcol_iboffload_module_t *iboffload) -{ - mca_bcol_iboffload_collfrag_t *pending_collfrag; - int rc, size = opal_list_get_size(&iboffload->collfrag_pending); - - IBOFFLOAD_VERBOSE(10, ("Calling progress_pending_collfrags")); - - do { - pending_collfrag = (mca_bcol_iboffload_collfrag_t *) - opal_list_remove_first(&iboffload->collfrag_pending); - - IBOFFLOAD_VERBOSE(10, ("Get pending_collfrag - %p, iboffload - %p, " - "pending list size - %d.", pending_collfrag, iboffload, - opal_list_get_size(&iboffload->collfrag_pending))); - - /* Return back coll frag to coll request opal_list */ - opal_list_append(&pending_collfrag->coll_full_req->work_requests, - (opal_list_item_t *) pending_collfrag); - - rc = pending_collfrag->coll_full_req->progress_fn - (iboffload, pending_collfrag->coll_full_req); - if (OPAL_UNLIKELY(BCOL_FN_STARTED != rc && OMPI_SUCCESS != rc)) { - return OMPI_ERROR; - } - } while (--size > 0); - - return OMPI_SUCCESS; -} - - -/** - * Test - if we finished with the coll fragment descriptor, - * and free all resouces if so. - **/ -int -mca_bcol_iboffload_free_tasks_frags_resources( - mca_bcol_iboffload_collfrag_t *collfrag, - ompi_free_list_t *frags_free) -{ - int rc; - - mca_bcol_iboffload_task_t *task = collfrag->tasks_to_release; - mca_bcol_iboffload_component_t *cm = &mca_bcol_iboffload_component; - - /* Support for multiple frags we will add later - * n_outstanding_frags = coll_req->n_frags_sent - coll_req->n_frag_net_complete; */ - - while (NULL != task) { - /* Return frag (is the reference counter is zero)*/ - rc = release_frags_on_task(task, frags_free); - if (OMPI_SUCCESS != rc) { - return OMPI_ERROR; - } - - /* Return task: if the pointer is NULL => we assume the task - is a member of the common task list (tasks_free) */ - if (NULL == task->task_list) { - OMPI_FREE_LIST_RETURN_MT(&cm->tasks_free, - (ompi_free_list_item_t *) task); - } else { - OMPI_FREE_LIST_RETURN_MT(task->task_list, - (ompi_free_list_item_t *) task); - } - - task = task->next_task; - } - - return OMPI_SUCCESS; -} - -static void fatal_error(char *mesg) -{ - IBOFFLOAD_ERROR(("FATAL ERROR: %s", mesg)); - ompi_mpi_abort(&ompi_mpi_comm_world.comm, MPI_ERR_INTERN); -} - -#define RELEASE_COLLFRAG(cf) \ - do { \ - opal_list_remove_item(&(cf)->coll_full_req->work_requests, \ - (opal_list_item_t *) (cf)); \ - if (&(cf)->coll_full_req->first_collfrag != (cf)) { \ - OMPI_FREE_LIST_RETURN_MT(&mca_bcol_iboffload_component.collfrags_free, \ - (ompi_free_list_item_t *) (cf)); \ - } \ - } while (0) - -#define COLLFRAG_IS_DONE(cf) ((cf)->complete && (cf)->n_sends_completed == (cf)->n_sends) - -/* Pasha: Need to modify the code to progress pending queue only if relevant -* resource was released */ -#define PROGRESS_PENDING_COLLFRAG(cf) \ - if (OPAL_UNLIKELY(opal_list_get_size(&(cf)->coll_full_req->module->collfrag_pending) > 0)) { \ - int rc; \ - IBOFFLOAD_VERBOSE(10, ("Calling for PROGRESS_PENDING_COLLFRAG")); \ - rc = progress_pending_collfrags((cf)->coll_full_req->module); \ - if (OPAL_UNLIKELY(OMPI_ERROR == rc)) { \ - fatal_error("failed to progress_pending_collfrags\n"); \ - return 0; \ - } \ - } - - -static inline __opal_attribute_always_inline__ int - handle_collfrag_done(mca_bcol_iboffload_collfrag_t *coll_frag, - mca_bcol_iboffload_collreq_t *coll_request, - mca_bcol_iboffload_device_t *device) -{ - int rc; - - if (COLLFRAG_IS_DONE(coll_frag)) { - IBOFFLOAD_VERBOSE(10, ("Coll frag - %p already done.\n", coll_frag)); - - coll_request->n_frag_net_complete++; - IBOFFLOAD_VERBOSE(10, ("Free tasks resourse.\n")); - /* Check if we are done with this coll_frag and release resources if so. */ - rc = mca_bcol_iboffload_free_tasks_frags_resources(coll_frag, device->frags_free); - if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) { - IBOFFLOAD_ERROR(("mca_bcol_iboffload_free_tasks_frags_resources FAILED")); - fatal_error("Failed to mca_bcol_iboffload_free_tasks_frags_resources\n"); - return -1; - } - - BCOL_IBOFFLOAD_MQ_RETURN_CREDITS(coll_request->module, coll_frag->mq_index, coll_frag->mq_credits); - - RELEASE_COLLFRAG(coll_frag); - - PROGRESS_PENDING_COLLFRAG(coll_frag); - - IBOFFLOAD_VERBOSE(10, ("Alg %d: user_handle_freed - %d, n_frag_mpi_complete - %d, " - "n_fragments- %d, n_frag_net_complete - %d, n_fragments - %d.\n", - coll_frag->alg, - coll_request->user_handle_freed, - coll_request->n_frag_mpi_complete, - coll_request->n_fragments, - coll_request->n_frag_net_complete, - coll_request->n_fragments)); - - /* check for full message completion */ - if (COLLREQ_IS_DONE(coll_request)) { - IBOFFLOAD_VERBOSE(10, ("Coll request already done.\n")); - RELEASE_COLLREQ(coll_request); - } - } - - IBOFFLOAD_VERBOSE(10, ("Exit with success.\n")); - - return 0; -} - -/* - * IBOFFLOAD component progress. - */ - -static int progress_one_device(mca_bcol_iboffload_device_t *device) -{ - int ne, rc, count = 0; - - mca_bcol_iboffload_collfrag_t *coll_frag; - mca_bcol_iboffload_collreq_t *coll_request; - - struct ibv_wc wc; - memset(&wc, 0, sizeof(struct ibv_wc)); - - /* - * poll for collective completion - does not mean resources can - * be freed, as incomplete network level sends may still be pending - */ - - /* Poll for completion on completion on wait MQEs */ - if(0 != (ne = ibv_poll_cq(device->ib_mq_cq, 1, &wc))) { - do { - if (OPAL_UNLIKELY(0 > ne)) { - IBOFFLOAD_ERROR(("Device %s: " - "failed to poll MQ completion queue\n", - ibv_get_device_name(device->dev.ib_dev))); - fatal_error("failed to poll MQ completion queue\n"); - return count; - } - - if (OPAL_UNLIKELY(IBV_WC_SUCCESS != wc.status)) { - IBOFFLOAD_ERROR(("Device %s: " - "the completion with error on wait was gotten, status %d, opcode %d, " - "vendor_err 0x%x, qp %x, id 0x%x\n", ibv_get_device_name(device->dev.ib_dev), - wc.status, wc.opcode, wc.vendor_err, wc.qp_num, wc.wr_id)); - fatal_error("wc.status \n"); - return count; - } - - IBOFFLOAD_VERBOSE(10, ("The MQ completion was polled.\n")); - - ++count; - - /* get pointer to mca_bcol_iboffload_collfrag_t */ - coll_frag = (mca_bcol_iboffload_collfrag_t*) - (uint64_t) (uintptr_t) wc.wr_id; - - /* Only last MQ task of collective frag - sends completion signal, so if we got it => - all MQEs were done. */ - coll_frag->complete = true; - - IBOFFLOAD_VERBOSE(10, ("MQ completion for algorithm %d coll_frag_addr %p ml buffer index %d", - coll_frag->alg, (void *)coll_frag, coll_frag->coll_full_req->ml_buffer_index)); - - /* full request descriptor */ - coll_request = coll_frag->coll_full_req; - - coll_request->n_frag_mpi_complete++; - - /* - * at this stage all receives have been completed, so - * unpack the data to user buffer, the resources will be released when we will done with all - * element in the task list - */ - - if (NULL != coll_request->completion_cb_fn) { - if (OMPI_SUCCESS != - coll_request->completion_cb_fn(coll_frag)) { - fatal_error("coll_request->completion_cb_fn\n"); - return count; - } - } - - if (coll_request->n_frag_mpi_complete == - coll_request->n_fragments) { - coll_request->super.req_complete = true; - opal_condition_broadcast(&ompi_request_cond); - IBOFFLOAD_VERBOSE(10, ("After opal_condition_broadcast.\n")); - } - - rc = handle_collfrag_done(coll_frag, coll_request, device); - if (0 != rc) { - return count; - } - } while(0 != (ne = ibv_poll_cq(device->ib_mq_cq, 1, &wc))); - - return count; - } - - /* poll the send completion queue */ - do { - ne = ibv_poll_cq(device->ib_cq, 1, &wc); - if (0 < ne) { - if (OPAL_UNLIKELY(IBV_WC_SUCCESS != wc.status)) { - IBOFFLOAD_ERROR(("Device %s, " - "the completion with error on send was gotten, status %d, opcode %d, " - "vendor_err 0x%x, qp %x, id 0x%x\n", ibv_get_device_name(device->dev.ib_dev), - wc.status, wc.opcode, wc.vendor_err, wc.qp_num, wc.wr_id)); - -#if OPAL_ENABLE_DEBUG - { - mca_bcol_iboffload_module_t *iboffload; - int i, qp_index, num_qps = mca_bcol_iboffload_component.num_qps; - - coll_frag = (mca_bcol_iboffload_collfrag_t*) - (uint64_t) (uintptr_t) wc.wr_id; - - iboffload = coll_frag->coll_full_req->module; - - for (i = 0; i < iboffload->num_endpoints; ++i) { - mca_bcol_iboffload_endpoint_t *ep = iboffload->endpoints[i]; - - for (qp_index = 0; qp_index < num_qps; ++qp_index) { - if (NULL != ep->qps[qp_index].qp->lcl_qp && - wc.qp_num == ep->qps[qp_index].qp->lcl_qp->qp_num) { - IBOFFLOAD_ERROR(("Module - %p, coll_frag - %p, " - "destination %d, qp index - %d.", - iboffload, coll_frag, i, qp_index)); - } - } - } - } -#endif - fatal_error("Failed to ibv_poll_cq\n"); - return count; - } - - ++count; - - /* get pointer to mca_bcol_iboffload_collfrag_t */ - coll_frag = (mca_bcol_iboffload_collfrag_t*) - (uint64_t) (uintptr_t) wc.wr_id; - - /* update the number of completed sends */ - coll_frag->n_sends_completed++; - - IBOFFLOAD_VERBOSE(10, ("Send CQ completion for algorithm %d coll_frag_addr %p ml buffer index %d", - coll_frag->alg, (void *)coll_frag, coll_frag->coll_full_req->ml_buffer_index)); - - IBOFFLOAD_VERBOSE(10, ("Alg %d coll_frag_addr %p: n_sends_completed - %d, n_sends - %d.\n", - coll_frag->alg, (void *)coll_frag, - coll_frag->n_sends_completed, - coll_frag->n_sends)); - - assert(coll_frag->n_sends_completed <= coll_frag->n_sends); - - /* full message descriptor */ - coll_request = coll_frag->coll_full_req; - - /* check to see if all sends are complete from the network - * perspective */ - rc = handle_collfrag_done(coll_frag, coll_request, device); - if (0 != rc) { - return count; - } - } else if (OPAL_UNLIKELY(0 > ne)) { - IBOFFLOAD_ERROR(("Device %s: " - "failed to poll send completion queue\n", - ibv_get_device_name(device->dev.ib_dev))); - fatal_error("failed to poll send completion queue\n"); - return count; - } - } while (0 != ne); - - return count; -} - -int mca_bcol_iboffload_component_progress(void) -{ - int i, count = 0; - - mca_bcol_iboffload_component_t *cm = &mca_bcol_iboffload_component; - opal_pointer_array_t *devs = &cm->devices; - - int devices_count = cm->num_devs; - - for(i = 0; i < devices_count; ++i) { - mca_bcol_iboffload_device_t *device = - opal_pointer_array_get_item(devs, i); - - if (OPAL_LIKELY(device->activated)) { - count += progress_one_device(device); - } - } - - return count; -} - -#if OPAL_ENABLE_DEBUG /* debug code */ -int task_to_rank(mca_bcol_iboffload_module_t *iboffload, struct mqe_task *task) -{ - int i, j, num_qps = mca_bcol_iboffload_component.num_qps; - for (i = 0; i < iboffload->num_endpoints; i++) { - for (j = 0; j < num_qps; j++) { - if (task->post.qp == iboffload->endpoints[i]->qps[j].qp->lcl_qp) { - return i; - } - } - } - - return -1; /* not found ! */ -} - -int wait_to_rank(mca_bcol_iboffload_module_t *iboffload, struct mqe_task *task) -{ - int i, j; - for (i = 0; i < iboffload->num_endpoints; i++) { - for (j = 0; j < IBOFFLOAD_CQ_LAST; j++) { - if (task->wait.cq == iboffload->endpoints[i]->recv_cq[j]) { - return i; - } - } - } - - return -1; /* not found ! */ -} - -#endif /* debug code */ diff --git a/ompi/mca/bcol/iboffload/bcol_iboffload_device.h b/ompi/mca/bcol/iboffload/bcol_iboffload_device.h deleted file mode 100644 index a7503df4f07..00000000000 --- a/ompi/mca/bcol/iboffload/bcol_iboffload_device.h +++ /dev/null @@ -1,73 +0,0 @@ -/* - * Copyright (c) 2009-2012 Oak Ridge National Laboratory. All rights reserved. - * Copyright (c) 2009-2012 Mellanox Technologies. All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#ifndef MCA_BCOL_IBOFFLOAD_DEVICE_H -#define MCA_BCOL_IBOFFLOAD_DEVICE_H - -#include "ompi_config.h" - -#include -#include - -#include - -#include "bcol_iboffload.h" -#include "bcol_iboffload_frag.h" - -#define BCOL_IBOFFLOAD_DUMMY_MEM_SIZE 1 - -BEGIN_C_DECLS - -/* Device OBJ */ -struct mca_bcol_iboffload_device_t { - opal_list_item_t super; - - bool activated; - - struct ompi_common_ofacm_base_dev_desc_t dev; - struct ibv_pd *ib_pd; - struct ibv_device_attr ib_dev_attr; - - int num_act_ports; - - struct mca_bcol_iboffload_port_t *ports; - struct ibv_cq *ib_cq; - - /* CQ for MQs of all iboffload modules on this device */ - struct ibv_cq *ib_mq_cq; - - /* The free list of registered buffers - * since the registration depends on PD, it is - * most resonable place to keep the frags */ - ompi_free_list_t *frags_free; - mca_mpool_base_module_t *mpool; - - /* netowrk context */ - bcol_base_network_context_t *net_context; - - /* We keep dummy frags for all QPs on each device, - possibly some of QPs don't need it but anyway we distribute dummy - for them. All dummies point to a same byte of memory. */ - mca_bcol_iboffload_frag_t dummy_frags[MCA_BCOL_IBOFFLOAD_QP_LAST]; - - /* Registred memory for the dummy frags */ - char dummy_mem[BCOL_IBOFFLOAD_DUMMY_MEM_SIZE]; - - /* Registration info of the dummy memory */ - mca_bcol_iboffload_reg_t dummy_reg; -}; - -typedef struct mca_bcol_iboffload_device_t mca_bcol_iboffload_device_t; -OBJ_CLASS_DECLARATION(mca_bcol_iboffload_device_t); - -END_C_DECLS - -#endif /* MCA_BCOL_IBOFFLOAD_DEVICE_H */ - diff --git a/ompi/mca/bcol/iboffload/bcol_iboffload_endpoint.c b/ompi/mca/bcol/iboffload/bcol_iboffload_endpoint.c deleted file mode 100644 index 50d0eeeb8c7..00000000000 --- a/ompi/mca/bcol/iboffload/bcol_iboffload_endpoint.c +++ /dev/null @@ -1,373 +0,0 @@ -/* - * Copyright (c) 2009-2012 Oak Ridge National Laboratory. All rights reserved. - * Copyright (c) 2009-2012 Mellanox Technologies. All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#include "ompi_config.h" - -#include - -#include "ompi/constants.h" -#include "ompi/mca/bcol/bcol.h" -#include "ompi/mca/bcol/base/base.h" -#include "ompi/mca/common/ofacm/connect.h" - -#include "opal/threads/mutex.h" -#include "opal/class/opal_object.h" - -#include "bcol_iboffload.h" -#include "bcol_iboffload_frag.h" -#include "bcol_iboffload_device.h" -#include "bcol_iboffload_endpoint.h" - -static void mca_bcol_iboffload_endpoint_construct(mca_bcol_iboffload_endpoint_t *ep) -{ - ep->iboffload_module = NULL; - ep->ibnet_proc = NULL; - - ep->qps = (mca_bcol_iboffload_endpoint_qp_t *) - calloc(mca_bcol_iboffload_component.num_qps, - sizeof(mca_bcol_iboffload_endpoint_qp_t)); - - ep->index = 0; - OBJ_CONSTRUCT(&ep->endpoint_lock, opal_mutex_t); - OBJ_CONSTRUCT(&ep->pending_frags, opal_list_t); - - memset(ep->recv_cq, 0, IBOFFLOAD_CQ_LAST * sizeof(ep->recv_cq[0])); - memset(&ep->qp_config, 0, sizeof(ompi_common_ofacm_base_qp_config_t)); - - ep->cpc_context = NULL; - - memset(&ep->remote_zero_rdma_addr, 0, sizeof(mca_bcol_iboffload_rdma_info_t)); - memset(&ep->remote_rdma_block, 0, sizeof(mca_bcol_iboffload_rem_rdma_block_t)); - - ep->need_toset_remote_rdma_info = false; -} - -static void mca_bcol_iboffload_endpoint_destruct(mca_bcol_iboffload_endpoint_t *ep) -{ - int qp_index, num_qps, i; - ompi_free_list_item_t *item; - - mca_bcol_iboffload_component_t *cm = &mca_bcol_iboffload_component; - - num_qps = cm->num_qps; - - IBOFFLOAD_VERBOSE(10, ("Destruct: ep - %p, ep->index - %d", ep, ep->index)); - - if (NULL != ep->qps) { - for (qp_index = 0; qp_index < num_qps; ++qp_index) { - do { - item = (ompi_free_list_item_t *) - opal_list_remove_first(&ep->qps[qp_index].preposted_frags); - if(OPAL_LIKELY(NULL != item)) { - OMPI_FREE_LIST_RETURN_MT(&ep->device->frags_free[qp_index], item); - } - } while (NULL != item); - - OBJ_DESTRUCT(&ep->qps[qp_index].preposted_frags); - } - - free(ep->qps); - } - - OBJ_DESTRUCT(&ep->endpoint_lock); - OBJ_DESTRUCT(&ep->pending_frags); - - /* If the CPC has an endpoint_finalize function, call it */ - if (NULL != ep->endpoint_cpc->cbm_endpoint_finalize) { - ep->endpoint_cpc->cbm_endpoint_finalize(ep->cpc_context); - } - - for (i = 0; i < IBOFFLOAD_CQ_LAST; i++) { - if (NULL != ep->recv_cq[i]) { - if (ibv_destroy_cq(ep->recv_cq[i])) { - IBOFFLOAD_ERROR(("Endpoint %x " - ", failed to destroy CQ, errno says %s", - ep, strerror(errno))); - } - } - } -} - -OBJ_CLASS_INSTANCE(mca_bcol_iboffload_endpoint_t, - opal_list_item_t, - mca_bcol_iboffload_endpoint_construct, - mca_bcol_iboffload_endpoint_destruct); - -/* Pasha: Add some error message here */ - -/* - * Called when the CPC has established a connection on an endpoint - */ -static void mca_bcol_iboffload_endpoint_invoke_error(void *context) -{ - mca_bcol_iboffload_endpoint_t *endpoint = (mca_bcol_iboffload_endpoint_t *) context; - IBOFFLOAD_ERROR(("Getting error on endpoint - %p!", endpoint)); -} - - -/* Pasha: Need to add more logic here */ -static void mca_bcol_iboffload_endpoint_cpc_complete(void *context) -{ - mca_bcol_iboffload_endpoint_t *endpoint = (mca_bcol_iboffload_endpoint_t *) context; - - IBOFFLOAD_VERBOSE(10, ("Endpoint - %p for comm rank %d: CPC complete.\n", - endpoint, endpoint->iboffload_module->ibnet->super.group_list[endpoint->index])); - - if (OMPI_SUCCESS != - mca_bcol_iboffload_exchange_rem_addr(endpoint)) { - IBOFFLOAD_ERROR(("endpoint - %p, " - "remote addr exchange error.\n", endpoint)); - } - /* The connection is correctly setup. Now we can decrease the - event trigger. */ - opal_progress_event_users_decrement(); -} - -/* Vasily: Need to add more logic here */ -int mca_bcol_iboffload_endpoint_post_recvs(void *context) -{ - int qp_index, rc, num_qps; - mca_bcol_iboffload_component_t *cm = &mca_bcol_iboffload_component; - - mca_bcol_iboffload_endpoint_t *endpoint = - (mca_bcol_iboffload_endpoint_t *) context; - - IBOFFLOAD_VERBOSE(10, ("endpoint - %p, post of %d recvs !", - endpoint, cm->qp_infos[0].rd_num)); - /* TODO Pasha - fix later */ - num_qps = cm->num_qps; - for (qp_index = 0; qp_index < num_qps; ++qp_index) { - rc = mca_bcol_iboffload_prepost_recv(endpoint, qp_index, - cm->qp_infos[qp_index].rd_num); - if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) { - /* Pasha: Need to add more failure logic */ - IBOFFLOAD_ERROR(("Failed to prepost recv fragments " - "on qp index %d, return code - %d", - qp_index, rc)); - - return OMPI_ERROR; - } - } - - return OMPI_SUCCESS; -} - -/* The function go over each ibnet proc and creates endpoint for each one */ -int mca_bcol_iboffloads_create_endpoints(mca_sbgp_ibnet_connection_group_info_t *cgroup, - mca_bcol_iboffload_module_t *module) { - uint32_t i; - mca_bcol_iboffload_endpoint_t *ep; - - if (NULL == cgroup || NULL == module) { - IBOFFLOAD_ERROR(("Bad parameters for create endpoints function.")); - return OMPI_ERROR; - } - - module->num_endpoints = cgroup->num_procs; - module->endpoints = (mca_bcol_iboffload_endpoint_t **) - calloc(module->num_endpoints, - sizeof(mca_bcol_iboffload_endpoint_t *)); - if (NULL == module->endpoints) { - IBOFFLOAD_ERROR(("Error memory allocation for endpoints array" - ", errno says %s", strerror(errno))); - return OMPI_ERROR; - } - - IBOFFLOAD_VERBOSE(10, ("iboffload - %p, num of endpoints - %d.\n", - module, module->num_endpoints)); -/* Ishai: No need to open so many endpoints. We are not talking with all procs */ - for (i = 0; i < cgroup->num_procs; i++) { - ep = OBJ_NEW(mca_bcol_iboffload_endpoint_t); - /* check qp memory allocation */ - if (NULL == ep->qps) { - IBOFFLOAD_ERROR(("Failed to allocate memory for qps")); - return OMPI_ERROR; - } - /* init new endpoint */ - ep->index = i; - ep->iboffload_module = module; - /* saving the device for the destruction - iboffload module amy not exist than */ - ep->device = ep->iboffload_module->device; - ep->ibnet_proc = (mca_sbgp_ibnet_proc_t *) - opal_pointer_array_get_item(cgroup->ibnet_procs, i); - if (NULL == ep->ibnet_proc) { - IBOFFLOAD_ERROR(("Failed to get proc pointer, for index %d", i)); - return OMPI_ERROR; - } - - if (OMPI_SUCCESS != - mca_bcol_iboffload_endpoint_init(ep)) { - IBOFFLOAD_ERROR(("Failed to init endpoint - %p", ep)); - return OMPI_ERROR; - } - - IBOFFLOAD_VERBOSE(10, ("Endpoint - %p, ep index - %d, iboffload - %p, " - "cpc contex - %p.\n", ep, ep->index, - ep->iboffload_module, ep->cpc_context)); - - /* Add the new endpoint to array of endpoints */ - module->endpoints[i] = ep; - } - - /* Pasha: Need to add better clean-up here */ - return OMPI_SUCCESS; -} - -static int config_qps(mca_bcol_iboffload_endpoint_t *ep) -{ - int qp_index; - int ret = OMPI_SUCCESS; - - ompi_common_ofacm_base_qp_config_t *qp_config = &ep->qp_config; - mca_bcol_iboffload_component_t *cm = &mca_bcol_iboffload_component; - - qp_config->num_srqs = 0; - qp_config->srq_num = NULL; - - qp_config->num_qps = cm->num_qps; - - qp_config->init_attr = (struct ibv_qp_init_attr *) - calloc(qp_config->num_qps, sizeof(struct ibv_qp_init_attr)); - - if (NULL == qp_config->init_attr) { - IBOFFLOAD_ERROR(("Failed allocate memory for qp init attributes")); - ret = OMPI_ERR_OUT_OF_RESOURCE; - - goto config_qps_exit; - } - - qp_config->attr = (struct ibv_qp_attr *) - calloc(qp_config->num_qps, sizeof(struct ibv_qp_attr)); - - if (OPAL_UNLIKELY(NULL == qp_config->attr)) { - IBOFFLOAD_ERROR(("Failed allocate memory for qp attributes")); - ret = OMPI_ERR_OUT_OF_RESOURCE; - - goto config_qps_exit; - } - - /* we must to specify that the qps are special */ - qp_config->init_attr_mask = (uint32_t *) - calloc(qp_config->num_qps, sizeof(uint32_t)); - - if (OPAL_UNLIKELY(NULL == qp_config->init_attr_mask)) { - IBOFFLOAD_ERROR(("Failed allocate memory for qp mask.")); - ret = OMPI_ERR_OUT_OF_RESOURCE; - - goto config_qps_exit; - } - - /* qp_config->rtr_attr_mask = qp_config->rts_attr_mask = NULL; */ - - qp_config->rtr_attr_mask = (uint32_t *) - calloc(qp_config->num_qps, sizeof(uint32_t)); - - if (OPAL_UNLIKELY(NULL == qp_config->rtr_attr_mask)) { - IBOFFLOAD_ERROR(("Failled allocate memory for qp rtr attributes mask.")); - ret = OMPI_ERR_OUT_OF_RESOURCE; - - goto config_qps_exit; - } - - qp_config->rts_attr_mask = (uint32_t *) - calloc(qp_config->num_qps, sizeof(uint32_t)); - - if (OPAL_UNLIKELY(NULL == qp_config->rts_attr_mask)) { - IBOFFLOAD_ERROR(("Failled allocate memory for qp rts attributes mask.")); - ret = OMPI_ERR_OUT_OF_RESOURCE; - - goto config_qps_exit; - } - - for (qp_index = 0; qp_index < qp_config->num_qps; ++qp_index) { - mca_bcol_iboffload_config_qps_fn_t config_qp = - cm->qp_infos[qp_index].config_qp; - - if (NULL != config_qp) { - config_qp(qp_index, ep, qp_config); - } - } - -config_qps_exit: - return ret; -} - -/* The fucntion is called for endpoints - * with MCA_COMMON_OFACM_USER_CUSTOM state only, - * we need a OPAL_THREAD_LOCK before call to this function */ -int mca_bcol_iboffload_endpoint_init(mca_bcol_iboffload_endpoint_t *ep) -{ - int qp_index, cq_index, num_qps; - ompi_common_ofacm_base_module_t *cpc; - - mca_bcol_iboffload_device_t *device = ep->iboffload_module->device; - - mca_sbgp_ibnet_connection_group_info_t *cgroup = - &ep->iboffload_module->ibnet->cgroups[ep->iboffload_module->cgroup_index]; - - for (cq_index = 0; cq_index < IBOFFLOAD_CQ_LAST; cq_index++) { - if (OMPI_SUCCESS != - mca_bcol_iboffload_adjust_cq(device, &ep->recv_cq[cq_index])) { - IBOFFLOAD_ERROR(("Error creating CQ for %s errno says %s", - ibv_get_device_name(device->dev.ib_dev), strerror(errno))); - /* OBJ_RELEASE(ep); */ /* Vasily: What must we do in this case ??? */ - return OMPI_ERROR; - } - } - - if (OPAL_UNLIKELY(OMPI_SUCCESS != config_qps(ep))) { - IBOFFLOAD_ERROR(("Error configure QPs for endpoint %x errno says %s", - ep, strerror(errno))); - return OMPI_ERROR; - } - - /* Adding here one more redirection in critical path. Need to think - * what is the best way to prevent it */ - - IBOFFLOAD_VERBOSE(10, ("Endpoint - %p, rem port - %d", ep, - ep->ibnet_proc->remote_ports_info[BCOL_IBOFFLOAD_ENDPOINT_PORT_IDX(cgroup, ep)].id)); - - cpc = ep->ibnet_proc->remote_ports_info[BCOL_IBOFFLOAD_ENDPOINT_PORT_IDX(cgroup, ep)].local_cpc; - ep->endpoint_cpc = cpc; /* caching pointer to cpc */ - - if (NULL != cpc->cbm_endpoint_init) { - ep->cpc_context = cpc->cbm_endpoint_init( - ep->ibnet_proc->ompi_proc, - &ep->qp_config, - device->ib_pd, - ep->iboffload_module->subnet_id, - ep->iboffload_module->ibnet->group_id, - ep->iboffload_module->lid, - /* Remote lid of target module */ - ep->ibnet_proc->remote_ports_info[BCOL_IBOFFLOAD_ENDPOINT_PORT_IDX(cgroup, ep)].lid, - ep->index, /* user context index */ - (void *) ep, /* user context */ - cpc, - mca_bcol_iboffload_endpoint_cpc_complete, - mca_bcol_iboffload_endpoint_invoke_error, - mca_bcol_iboffload_endpoint_post_recvs); - - if (OPAL_UNLIKELY(NULL == ep->cpc_context)) { - IBOFFLOAD_ERROR(("Endpoint - %p, failed to init context", ep)); - /* OBJ_RELEASE(ep); */ /* Vasily: What must we do in this case ??? */ - return OMPI_ERROR; - } - - /* Updating remote port info */ - num_qps = mca_bcol_iboffload_component.num_qps; - - ep->remote_info = &ep->cpc_context->remote_info; - for (qp_index = 0; qp_index < num_qps; ++qp_index) { - ep->qps[qp_index].qp = &ep->cpc_context->qps[qp_index]; - } - } - - return OMPI_SUCCESS; -} diff --git a/ompi/mca/bcol/iboffload/bcol_iboffload_endpoint.h b/ompi/mca/bcol/iboffload/bcol_iboffload_endpoint.h deleted file mode 100644 index 7a57b57a7b9..00000000000 --- a/ompi/mca/bcol/iboffload/bcol_iboffload_endpoint.h +++ /dev/null @@ -1,328 +0,0 @@ -/* - * Copyright (c) 2009-2012 Oak Ridge National Laboratory. All rights reserved. - * Copyright (c) 2009-2012 Mellanox Technologies. All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#ifndef MCA_BCOL_IBOFFLOAD_ENDPOINT_H -#define MCA_BCOL_IBOFFLOAD_ENDPOINT_H - -#include "ompi_config.h" -#include "bcol_iboffload.h" -#include "bcol_iboffload_frag.h" - -#include "ompi/mca/sbgp/ibnet/sbgp_ibnet.h" - -#define BCOL_IBOFFLOAD_ENDPOINT_PORT(cgroup, ep) (ep)->ibnet_proc->use_port[(cgroup)->index] -#define BCOL_IBOFFLOAD_ENDPOINT_PORT_IDX(cgroup, ep) (BCOL_IBOFFLOAD_ENDPOINT_PORT(cgroup, ep) - 1) - -BEGIN_C_DECLS - -struct mca_bcol_iboffload_endpoint_qp_t { - struct ompi_common_ofacm_base_qp_t *qp; - size_t ib_inline_max; - int32_t sd_wqe; /* Number of available send wqe entries */ - int32_t rd_wqe; /* Number of available recv wqe entries */ - opal_list_t preposted_frags; /* List of preposted frags */ - /* opal_mutex_t lock; */ /* Do I need lock here ? */ -}; - -typedef struct mca_bcol_iboffload_endpoint_qp_t mca_bcol_iboffload_endpoint_qp_t; - -enum { - IBOFFLOAD_CQ_SMALL_MESSAGES = 0, - IBOFFLOAD_CQ_SYNC, - IBOFFLOAD_CQ_LARGE_MESSAGES, - IBOFFLOAD_CQ_LAST -}; - -/* Endpoint object */ -struct mca_bcol_iboffload_endpoint_t { - opal_list_item_t super; - - /** BTL module that created this connection */ - mca_bcol_iboffload_module_t *iboffload_module; - - /** proc structure corresponding to endpoint */ - mca_sbgp_ibnet_proc_t *ibnet_proc; - - /** lock for concurrent access to endpoint state */ - opal_mutex_t endpoint_lock; - - /** Penging frag list */ - opal_list_t pending_frags; - - /** QPs information */ - mca_bcol_iboffload_endpoint_qp_t *qps; - - /** endpoint index on array */ - int32_t index; - - /** CQ for receive queues on this endpoint */ - struct ibv_cq *recv_cq[IBOFFLOAD_CQ_LAST]; - - /** QP configuration information */ - ompi_common_ofacm_base_qp_config_t qp_config; - - /** cpc context */ - ompi_common_ofacm_base_local_connection_context_t *cpc_context; - - /** caching pointer to remote info */ - ompi_common_ofacm_base_remote_connection_context_t *remote_info; - - /** caching pointer to cpc */ - ompi_common_ofacm_base_module_t *endpoint_cpc; - - /** The struct is used for zero RDMA with immediate - in some collectives, in barrier for example. */ - mca_bcol_iboffload_rdma_info_t remote_zero_rdma_addr; - mca_bcol_iboffload_rem_rdma_block_t remote_rdma_block; - - /** The pointer to device - In the destruction function - the iboffload module may not exist any more - caching the device */ - struct mca_bcol_iboffload_device_t *device; - - bool need_toset_remote_rdma_info; - - mca_bcol_iboffload_rdma_info_t remote_rdma_info[MAX_REMOTE_RDMA_INFO]; -}; -typedef struct mca_bcol_iboffload_endpoint_t mca_bcol_iboffload_endpoint_t; -OBJ_CLASS_DECLARATION(mca_bcol_iboffload_endpoint_t); - -/* Function declaration */ -int mca_bcol_iboffload_endpoint_init(mca_bcol_iboffload_endpoint_t *ep); - -static inline __opal_attribute_always_inline__ - int check_endpoint_state(mca_bcol_iboffload_endpoint_t *ep, - mca_bcol_base_descriptor_t *des, - opal_list_t *pending_list) -{ - int rc = OMPI_ERR_RESOURCE_BUSY; - - OPAL_THREAD_LOCK(&ep->cpc_context->context_lock); - /* Adding here one more redirection in critical path. Need to think - * what is the best way to prevent it */ - switch(ep->cpc_context->state) { - case MCA_COMMON_OFACM_CLOSED: - rc = ep->endpoint_cpc->cbm_start_connect(ep->cpc_context); - if (OMPI_SUCCESS == rc) { - rc = OMPI_ERR_RESOURCE_BUSY; - } - /* - * As long as we expect a message from the peer (in order - * to setup the connection) let the event engine pool the - * OOB events. Note: we increment it once peer active - * connection. - */ - opal_progress_event_users_increment(); - /* fall through */ - default: - /* opal_list_append(pending_list, (opal_list_item_t *)des); */ /* Vasily: will be uncomment later */ - break; - case MCA_COMMON_OFACM_FAILED: - rc = OMPI_ERR_UNREACH; - break; - case MCA_COMMON_OFACM_CONNECTED: - rc = OMPI_SUCCESS; - break; - } - - OPAL_THREAD_UNLOCK(&ep->cpc_context->context_lock); - return rc; -} - -int mca_bcol_iboffloads_create_endpoints(mca_sbgp_ibnet_connection_group_info_t *cgroup, - mca_bcol_iboffload_module_t *module); - -int mca_bcol_iboffload_endpoint_post_recvs(void *context); - -static inline __opal_attribute_always_inline__ int - mca_bcol_iboffload_prepost_recv( - mca_bcol_iboffload_endpoint_t *endpoint, - int qp_index, int num_to_prepost) -{ - mca_bcol_iboffload_prepost_qps_fn_t prepost_recv = - mca_bcol_iboffload_component.qp_infos[qp_index].prepost_recv; - if (NULL != prepost_recv) { - return prepost_recv(endpoint, qp_index, num_to_prepost); - } - - return OMPI_SUCCESS; -} - -static inline __opal_attribute_always_inline__ int - mca_bcol_iboffload_post_ml_scatter_recv_frag( - int qp_index, uint32_t dest_rank, - int nitems, struct iovec *buff_iovec, - uint32_t lkey, - struct ibv_sge *sg_entries, - mca_bcol_iboffload_frag_t *frag, - mca_bcol_iboffload_module_t *iboffload) -{ - int ret, start_wr_index; - struct ibv_recv_wr *recv_wr, *recv_bad; - int i; - - mca_bcol_iboffload_component_t *cm = &mca_bcol_iboffload_component; - mca_bcol_iboffload_endpoint_t *endpoint = iboffload->endpoints[dest_rank]; - - mca_bcol_iboffload_recv_wr_manager *recv_wrs = &cm->recv_wrs; - mca_bcol_iboffload_device_t *device = endpoint->iboffload_module->device; - - IBOFFLOAD_VERBOSE(10, ("Recv prepost call: endpoint %p, qp_index %d", - (void *) endpoint, qp_index)); - - /* make sure that we do not overrun number of rd_wqe */ - if (0 >= endpoint->qps[qp_index].rd_wqe) { - IBOFFLOAD_VERBOSE(10, ("There are no rd_wqe - %d", - endpoint->qps[qp_index].rd_wqe)); - - return 0; - } - - OPAL_THREAD_LOCK(&recv_wrs->lock); - - /* Calculate start index in array - * of pre-allocated work requests */ - start_wr_index = cm->qp_infos[qp_index].rd_num - 1; - recv_wr = &recv_wrs->recv_work_requests[qp_index][start_wr_index]; - - IBOFFLOAD_VERBOSE(10, ("Endpoint %p, qp_index - %d, " - "start index of WRs - %d", (void *) endpoint, - qp_index, start_wr_index)); - - for (i = 0; i < nitems; i++) { - sg_entries[i].length = buff_iovec[i].iov_len; - sg_entries[i].addr = (uint64_t)buff_iovec[i].iov_base; - sg_entries[i].lkey = lkey; - - IBOFFLOAD_VERBOSE(10, ("Recv SGE List item %d , length %d , address %p", - i, sg_entries[i].length, sg_entries[i].addr)); - - IBOFFLOAD_VERBOSE(10, ("Recv SGE List item %d , iovec length %d", - i, buff_iovec[i].iov_len)); - } - - recv_wr->num_sge = nitems; - recv_wr->sg_list = sg_entries; - - /* Set the tail */ - recv_wr->next = NULL; - - /* post the list of recvs */ - ret = ibv_post_recv(endpoint->qps[qp_index].qp->lcl_qp, recv_wr, &recv_bad); - if (OPAL_UNLIKELY(0 != ret)) { - IBOFFLOAD_ERROR(("ibv_post_recv failed (%s), error: %s [%d], " - "qp_index - %d.\n", - ibv_get_device_name(device->dev.ib_dev), - strerror(errno), ret, qp_index)); - - return -1; - } - - /* decresing numbers of free recv wqe */ - --endpoint->qps[qp_index].rd_wqe; - - OPAL_THREAD_UNLOCK(&recv_wrs->lock); - - IBOFFLOAD_VERBOSE(10, ("Return success: " - "endpoint %p, qp_index %d, dest_rank %d", - endpoint, qp_index, dest_rank)); - - return 1; -} - -static inline __opal_attribute_always_inline__ int - mca_bcol_iboffload_prepost_ml_recv_frag( - int qp_index, uint32_t dest_rank, - mca_bcol_iboffload_frag_t *frag, - mca_bcol_iboffload_module_t *iboffload) -{ - int ret, start_wr_index; - struct ibv_recv_wr *recv_wr, *recv_bad; - - mca_bcol_iboffload_component_t *cm = &mca_bcol_iboffload_component; - mca_bcol_iboffload_endpoint_t *endpoint = iboffload->endpoints[dest_rank]; - - mca_bcol_iboffload_recv_wr_manager *recv_wrs = &cm->recv_wrs; - mca_bcol_iboffload_device_t *device = endpoint->iboffload_module->device; - - IBOFFLOAD_VERBOSE(10, ("Recv prepost call: endpoint %p, qp_index %d", - (void *) endpoint, qp_index)); - - /* make sure that we do not overrun number of rd_wqe */ - if (0 >= endpoint->qps[qp_index].rd_wqe) { - IBOFFLOAD_VERBOSE(10, ("There are no rd_wqe - %d", - endpoint->qps[qp_index].rd_wqe)); - - return 0; - } - - OPAL_THREAD_LOCK(&recv_wrs->lock); - - /* Calculate start index in array - * of pre-allocated work requests */ - start_wr_index = cm->qp_infos[qp_index].rd_num - 1; - recv_wr = &recv_wrs->recv_work_requests[qp_index][start_wr_index]; - - IBOFFLOAD_VERBOSE(10, ("Endpoint %p, qp_index - %d, " - "start index of WRs - %d", (void *) endpoint, - qp_index, start_wr_index)); - - recv_wr->sg_list = &frag->sg_entry; - - /* Set the tail */ - recv_wr->next = NULL; - - /* post the list of recvs */ - ret = ibv_post_recv(endpoint->qps[qp_index].qp->lcl_qp, recv_wr, &recv_bad); - if (OPAL_UNLIKELY(0 != ret)) { - IBOFFLOAD_ERROR(("ibv_post_recv failed (%s), error: %s [%d], " - "qp_index - %d.\n", - ibv_get_device_name(device->dev.ib_dev), - strerror(errno), ret, qp_index)); - - return -1; - } - - /* decresing numbers of free recv wqe */ - --endpoint->qps[qp_index].rd_wqe; - - OPAL_THREAD_UNLOCK(&recv_wrs->lock); - - IBOFFLOAD_VERBOSE(10, ("Return success: " - "endpoint %p, qp_index %d, dest_rank %d", - endpoint, qp_index, dest_rank)); - - return 1; -} - -static inline __opal_attribute_always_inline__ - mca_bcol_iboffload_frag_t* mca_bcol_iboffload_get_preposted_recv_frag( - mca_bcol_iboffload_module_t *iboffload, - int source, int qp_index) -{ - mca_bcol_iboffload_frag_t *frag; - mca_bcol_iboffload_endpoint_t *endpoint = iboffload->endpoints[source]; - - frag = mca_bcol_iboffload_component.qp_infos[qp_index].get_preposted_recv(endpoint, qp_index); - - /* do we want to run prepost */ - IBOFFLOAD_VERBOSE(10, ("source - %d, qp_index - %d; " - "allocating preposted addr %p.\n", - source, qp_index, (void *) frag->sg_entry.addr)); - - if (OPAL_LIKELY(NULL != frag)) { - frag->next = NULL; - } - - return frag; -} - -END_C_DECLS - -#endif /* MCA_BCOL_IBOFFLOAD_ENDPOINT_H */ diff --git a/ompi/mca/bcol/iboffload/bcol_iboffload_fanin.c b/ompi/mca/bcol/iboffload/bcol_iboffload_fanin.c deleted file mode 100644 index 49f771d46b5..00000000000 --- a/ompi/mca/bcol/iboffload/bcol_iboffload_fanin.c +++ /dev/null @@ -1,350 +0,0 @@ -/* - * Copyright (c) 2009-2012 Oak Ridge National Laboratory. All rights reserved. - * Copyright (c) 2009-2012 Mellanox Technologies. All rights reserved. - * Copyright (c) 2013 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#include "ompi_config.h" - -#include -#include -#include -#include -#include - -#include "bcol_iboffload.h" -#include "bcol_iboffload_frag.h" -#include "bcol_iboffload_task.h" -#include "bcol_iboffload_collfrag.h" -#include "bcol_iboffload_endpoint.h" - -static int mca_bcol_iboffload_fanin_leader_progress( - mca_bcol_iboffload_module_t *iboffload, - struct mca_bcol_iboffload_collreq_t *coll_request) -{ - int rc = OMPI_SUCCESS, leader_rank = 0, rank, - sbgp_size = iboffload->ibnet->super.group_size; - - struct mqe_task *last_wait = NULL; - - mca_bcol_iboffload_task_t *wait_task = NULL; - mca_bcol_iboffload_frag_t *preposted_recv_frag = NULL; - - struct mqe_task **mqe_ptr_to_set; - mca_bcol_iboffload_collfrag_t *coll_fragment; - - coll_fragment = (mca_bcol_iboffload_collfrag_t *) - opal_list_get_last(&coll_request->work_requests); - - mqe_ptr_to_set = &coll_fragment->to_post; - - if (OPAL_UNLIKELY(false == BCOL_IBOFFLOAD_MQ_HAVE_CREDITS( - iboffload, coll_fragment->mq_index, coll_fragment->mq_credits))) { - IBOFFLOAD_VERBOSE(10, ("There are not enough credits on MQ.\n")); - goto out_of_resources; - } - - for (rank = leader_rank + 1; rank < sbgp_size; ++rank) { - /* post wait */ - preposted_recv_frag = mca_bcol_iboffload_get_preposted_recv_frag( - iboffload, rank, coll_request->qp_index); - if(NULL == preposted_recv_frag) { - IBOFFLOAD_VERBOSE(10, ("Failing for getting prepost recv frag.\n")); - goto out_of_resources; - } - - wait_task = mca_bcol_iboffload_get_wait_task(iboffload, rank, 1, - preposted_recv_frag, coll_request->qp_index, NULL); - if(NULL == wait_task) { - IBOFFLOAD_VERBOSE(10, ("Failing for getting wait task.\n")); - goto out_of_resources; - } - - APPEND_TO_TASKLIST(mqe_ptr_to_set, wait_task, last_wait); - MCA_BCOL_IBOFFLOAD_APPEND_TASK_TO_LIST(coll_fragment->task_next, wait_task); - } - - /* end of list */ - *mqe_ptr_to_set = NULL; - - last_wait->flags |= MQE_WR_FLAG_SIGNAL; - - coll_fragment->signal_task_wr_id = last_wait->wr_id; - last_wait->wr_id = (uint64_t) (uintptr_t) coll_fragment; - - /* post the mwr */ - rc = mca_bcol_iboffload_post_mqe_tasks(iboffload, coll_fragment->to_post); - if(OMPI_SUCCESS != rc) { - IBOFFLOAD_VERBOSE(10, ("MQE task posting failing.\n")); - /* Note: need to clean up */ - return rc; - } - - MCA_BCOL_UPDATE_ORDER_COUNTER(&iboffload->super, coll_request->order_info); - - return OMPI_SUCCESS; - -out_of_resources: - /* Release all resources */ - IBOFFLOAD_VERBOSE(10, ("Fan-in, adding collfrag to collfrag_pending")); - return mca_bcol_iboffload_free_resources_and_move_to_pending(coll_fragment, iboffload); -} - -static int mca_bcol_iboffload_fanin_proxy_progress( - mca_bcol_iboffload_module_t *iboffload, - struct mca_bcol_iboffload_collreq_t *coll_request) -{ - int rc = OMPI_SUCCESS, leader_rank = 0; - - struct mqe_task *last_send = NULL; - mca_bcol_iboffload_task_t *send_task = NULL; - mca_bcol_iboffload_frag_t *send_fragment = NULL; - - struct mqe_task **mqe_ptr_to_set; - mca_bcol_iboffload_collfrag_t *coll_fragment; - - coll_fragment = (mca_bcol_iboffload_collfrag_t *) - opal_list_get_last(&coll_request->work_requests); - - mqe_ptr_to_set = &coll_fragment->to_post; - - if (OPAL_UNLIKELY(false == BCOL_IBOFFLOAD_MQ_HAVE_CREDITS( - iboffload, coll_fragment->mq_index, coll_fragment->mq_credits))) { - IBOFFLOAD_VERBOSE(10, ("There are not enough credits on MQ.\n")); - goto out_of_resources; - } - - /* post send */ - send_fragment = mca_bcol_iboffload_get_send_frag(coll_request, - leader_rank, coll_request->qp_index, 0, - 0, SBUF, MCA_BCOL_IBOFFLOAD_SEND_FRAG_DUMMY); - if(NULL == send_fragment) { - IBOFFLOAD_VERBOSE(10, ("Failing for getting and packing send frag.\n")); - goto out_of_resources; - } - - send_task = mca_bcol_iboffload_get_send_task(iboffload, leader_rank, MCA_BCOL_IBOFFLOAD_QP_BARRIER, - send_fragment, coll_fragment, INLINE); - if(NULL == send_task) { - IBOFFLOAD_VERBOSE(10, ("Failing for getting send task.\n")); - goto out_of_resources; - } - - APPEND_TO_TASKLIST(mqe_ptr_to_set, send_task, last_send); - MCA_BCOL_IBOFFLOAD_APPEND_TASK_TO_LIST(coll_fragment->task_next, send_task); - - /* end of list */ - *mqe_ptr_to_set = NULL; - assert(NULL != last_send); - - last_send->flags |= MQE_WR_FLAG_SIGNAL; - - coll_fragment->signal_task_wr_id = last_send->wr_id; - last_send->wr_id = (uint64_t) (uintptr_t) coll_fragment; - - /* post the mwr */ - rc = mca_bcol_iboffload_post_mqe_tasks(iboffload, coll_fragment->to_post); - if(OMPI_SUCCESS != rc) { - IBOFFLOAD_VERBOSE(10, ("MQE task posting failing.\n")); - /* Note: need to clean up */ - return rc; - } - - MCA_BCOL_UPDATE_ORDER_COUNTER(&iboffload->super, coll_request->order_info); - - return OMPI_SUCCESS; - -out_of_resources: - /* Release all resources */ - IBOFFLOAD_VERBOSE(10, ("Fan-in, adding collfrag to collfrag_pending")); - return mca_bcol_iboffload_free_resources_and_move_to_pending(coll_fragment, iboffload); -} - -static int mca_bcol_iboffload_fanin_init( - bcol_function_args_t *input_args, - mca_bcol_iboffload_module_t *iboffload, - struct mca_bcol_iboffload_collreq_t **coll_request) -{ - ompi_free_list_item_t *item = NULL; - mca_bcol_iboffload_collfrag_t *coll_fragment = NULL; - - mca_bcol_iboffload_component_t *cm = &mca_bcol_iboffload_component; - - IBOFFLOAD_VERBOSE(10, ("Calling for mca_bcol_iboffload_barrier_init")); - - OMPI_FREE_LIST_WAIT_MT(&cm->collreqs_free, item); - if(OPAL_UNLIKELY(NULL == item)) { - IBOFFLOAD_VERBOSE(10, ("Failing for coll request free list waiting.\n")); - return OMPI_ERR_OUT_OF_RESOURCE; - } - - (*coll_request) = (mca_bcol_iboffload_collreq_t *) item; - (*coll_request)->progress_fn = iboffload->fanin_algth; - - (*coll_request)->completion_cb_fn = NULL; - (*coll_request)->order_info = &input_args->order_info; - - (*coll_request)->module = iboffload; - (*coll_request)->ml_buffer_index = input_args->buffer_index; - (*coll_request)->buffer_info[SBUF].offset = 0; - (*coll_request)->buffer_info[RBUF].offset = 0; - (*coll_request)->qp_index = MCA_BCOL_IBOFFLOAD_QP_BARRIER; - - input_args->bcol_opaque_data = (void *) (*coll_request); - - /* finish initializing full message descriptor */ - (*coll_request)->n_fragments = 1; - (*coll_request)->n_frags_sent = 1; - - (*coll_request)->n_frag_mpi_complete = 0; - (*coll_request)->n_frag_net_complete = 0; - - (*coll_request)->user_handle_freed = false; - - /* - * setup collective work request - */ - - /* get collective frag */ - coll_fragment = &(*coll_request)->first_collfrag; - mca_bcol_iboffload_collfrag_init(coll_fragment); - - coll_fragment->alg = FANIN_ALG; - coll_fragment->mq_index = COLL_MQ; - - /* Set mq credits */ - coll_fragment->mq_credits = iboffload->alg_task_consump[FANIN_ALG]; - - /* set pointers for (coll frag) <-> (coll full request) */ - MCA_BCOL_IBOFFLOAD_SET_COLL_REQ_LINKS(*coll_request, coll_fragment); - - return OMPI_SUCCESS; -} - -/************************************************************************ - ************************ New style Fan-In ****************************** - ***********************************************************************/ -static int mca_bcol_iboffload_new_style_fanin_progress( - bcol_function_args_t *input_args, - struct mca_bcol_base_function_t *const_args) -{ - mca_bcol_iboffload_collreq_t *coll_request = - (mca_bcol_iboffload_collreq_t *) - input_args->bcol_opaque_data; - - if (BCOL_IS_COMPLETED(coll_request)) { - coll_request->user_handle_freed = true; - if (COLLREQ_IS_DONE(coll_request)) { - IBOFFLOAD_VERBOSE(10, ("Coll request already done.\n")); - RELEASE_COLLREQ(coll_request); - } - - IBOFFLOAD_VERBOSE(10, ("Fan-In already done.\n")); - return BCOL_FN_COMPLETE; - } - - return BCOL_FN_STARTED; -} - -int mca_bcol_iboffload_new_style_fanin_first_call( - mca_bcol_iboffload_module_t *iboffload, - struct mca_bcol_iboffload_collreq_t *coll_request) -{ - int i = 0, leader_rank = 0, /* We always suppose - the lowest index is a leader */ - my_rank = iboffload->ibnet->super.my_index, - sbgp_size = iboffload->ibnet->super.group_size; - - mca_bcol_iboffload_endpoint_t *ep = NULL; - mca_sbgp_ibnet_proc_t *my_ibnet_proc = iboffload->endpoints[my_rank]->ibnet_proc; - - assert(NULL != my_ibnet_proc); - - if (MCA_SBGP_IBNET_NODE_LEADER == my_ibnet_proc->duty) { - iboffload->fanin_algth = mca_bcol_iboffload_fanin_leader_progress; - iboffload->alg_task_consump[FANIN_ALG] += sbgp_size; - - for (i = leader_rank + 1; i < sbgp_size; ++i) { - ep = iboffload->endpoints[i]; - while (OMPI_SUCCESS != - check_endpoint_state(ep, NULL, NULL)) { - opal_progress(); - } - } - } else { - iboffload->fanin_algth = mca_bcol_iboffload_fanin_proxy_progress; - iboffload->alg_task_consump[FANIN_ALG] += 1; - - ep = iboffload->endpoints[leader_rank]; - while(OMPI_SUCCESS != - check_endpoint_state(ep, NULL, NULL)) { - opal_progress(); - } - } - - return iboffload->fanin_algth(iboffload, coll_request); -} - -static int mca_bcol_iboffload_new_style_fanin_intra( - bcol_function_args_t *input_args, - struct mca_bcol_base_function_t *const_args) -{ - int rc = OMPI_SUCCESS; - - struct mca_bcol_iboffload_collreq_t *coll_request = NULL; - mca_bcol_iboffload_module_t *iboffload = - (mca_bcol_iboffload_module_t *) const_args->bcol_module; - - assert(NULL != iboffload); - - MCA_BCOL_CHECK_ORDER(const_args->bcol_module, input_args); - - /* Init Fan-In collective reqeust */ - rc = mca_bcol_iboffload_fanin_init(input_args, iboffload, &coll_request); - if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) { - IBOFFLOAD_VERBOSE(10, ("Error from mca_bcol_iboffload_fanin_init.\n")); - return BCOL_FN_NOT_STARTED; - } - - rc = iboffload->fanin_algth(iboffload, coll_request); - if (OPAL_UNLIKELY(OMPI_ERROR == rc)) { - return BCOL_FN_NOT_STARTED; - } - - return BCOL_FN_STARTED; -} - -int mca_bcol_iboffload_fanin_register(mca_bcol_base_module_t *super) -{ - mca_bcol_base_coll_fn_comm_attributes_t comm_attribs; - mca_bcol_base_coll_fn_invoke_attributes_t inv_attribs; - - IBOFFLOAD_VERBOSE(10, ("Register iboffload Fan-In.\n")); - - comm_attribs.bcoll_type = BCOL_FANIN; - - comm_attribs.comm_size_min = 0; - comm_attribs.comm_size_max = 1024 * 1024; - comm_attribs.waiting_semantics = NON_BLOCKING; - - inv_attribs.bcol_msg_min = 0; - inv_attribs.bcol_msg_max = 20000; /* range 1 */ - - inv_attribs.datatype_bitmap = 0xffffffff; - inv_attribs.op_types_bitmap = 0xffffffff; - - comm_attribs.data_src = DATA_SRC_KNOWN; - - mca_bcol_base_set_attributes(super, - &comm_attribs, &inv_attribs, - mca_bcol_iboffload_new_style_fanin_intra, - mca_bcol_iboffload_new_style_fanin_progress); - - return OMPI_SUCCESS; -} diff --git a/ompi/mca/bcol/iboffload/bcol_iboffload_fanout.c b/ompi/mca/bcol/iboffload/bcol_iboffload_fanout.c deleted file mode 100644 index 9ac93d16e71..00000000000 --- a/ompi/mca/bcol/iboffload/bcol_iboffload_fanout.c +++ /dev/null @@ -1,349 +0,0 @@ -/* - * Copyright (c) 2009-2012 Oak Ridge National Laboratory. All rights reserved. - * Copyright (c) 2009-2012 Mellanox Technologies. All rights reserved. - * Copyright (c) 2013 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#include "ompi_config.h" - -#include -#include -#include -#include -#include - -#include "bcol_iboffload.h" -#include "bcol_iboffload_frag.h" -#include "bcol_iboffload_task.h" -#include "bcol_iboffload_collfrag.h" -#include "bcol_iboffload_endpoint.h" - -static int mca_bcol_iboffload_fanout_leader_progress( - mca_bcol_iboffload_module_t *iboffload, - struct mca_bcol_iboffload_collreq_t *coll_request) -{ - int rc = OMPI_SUCCESS, leader_rank = 0, rank, - sbgp_size = iboffload->ibnet->super.group_size; - - struct mqe_task *last_send = NULL; - mca_bcol_iboffload_task_t *send_task = NULL; - mca_bcol_iboffload_frag_t *send_fragment = NULL; - - struct mqe_task **mqe_ptr_to_set; - mca_bcol_iboffload_collfrag_t *coll_fragment; - - coll_fragment = (mca_bcol_iboffload_collfrag_t *) - opal_list_get_last(&coll_request->work_requests); - - mqe_ptr_to_set = &coll_fragment->to_post; - - if (OPAL_UNLIKELY(false == BCOL_IBOFFLOAD_MQ_HAVE_CREDITS( - iboffload, coll_fragment->mq_index, coll_fragment->mq_credits))) { - IBOFFLOAD_VERBOSE(10, ("There are not enough credits on MQ.\n")); - goto out_of_resources; - } - - for (rank = leader_rank + 1; rank < sbgp_size; ++rank) { - /* post send */ - send_fragment = mca_bcol_iboffload_get_send_frag(coll_request, - rank, coll_request->qp_index, 0, - 0, SBUF, MCA_BCOL_IBOFFLOAD_SEND_FRAG_DUMMY); - if(NULL == send_fragment) { - IBOFFLOAD_VERBOSE(10, ("Failing for getting and packing send frag.\n")); - goto out_of_resources; - } - - send_task = mca_bcol_iboffload_get_send_task(iboffload, rank, MCA_BCOL_IBOFFLOAD_QP_BARRIER, - send_fragment, coll_fragment, INLINE); - if(NULL == send_task) { - IBOFFLOAD_VERBOSE(10, ("Failing for getting send task.\n")); - goto out_of_resources; - } - - APPEND_TO_TASKLIST(mqe_ptr_to_set, send_task, last_send); - MCA_BCOL_IBOFFLOAD_APPEND_TASK_TO_LIST(coll_fragment->task_next, send_task); - } - - /* end of list */ - *mqe_ptr_to_set = NULL; - assert(NULL != last_send); - - last_send->flags |= MQE_WR_FLAG_SIGNAL; - - coll_fragment->signal_task_wr_id = last_send->wr_id; - last_send->wr_id = (uint64_t) (uintptr_t) coll_fragment; - - /* post the mwr */ - rc = mca_bcol_iboffload_post_mqe_tasks(iboffload, coll_fragment->to_post); - if(OMPI_SUCCESS != rc) { - IBOFFLOAD_VERBOSE(10, ("MQE task posting failing.\n")); - /* Note: need to clean up */ - return rc; - } - - MCA_BCOL_UPDATE_ORDER_COUNTER(&iboffload->super, coll_request->order_info); - - return OMPI_SUCCESS; - -out_of_resources: - /* Release all resources */ - IBOFFLOAD_VERBOSE(10, ("Fan-in, adding collfrag to collfrag_pending")); - return mca_bcol_iboffload_free_resources_and_move_to_pending(coll_fragment, iboffload); -} - -static int mca_bcol_iboffload_fanout_proxy_progress( - mca_bcol_iboffload_module_t *iboffload, - struct mca_bcol_iboffload_collreq_t *coll_request) -{ - int rc = OMPI_SUCCESS, leader_rank = 0; - - struct mqe_task *last_wait = NULL; - mca_bcol_iboffload_task_t *wait_task = NULL; - mca_bcol_iboffload_frag_t *preposted_recv_frag = NULL; - - struct mqe_task **mqe_ptr_to_set; - mca_bcol_iboffload_collfrag_t *coll_fragment; - - coll_fragment = (mca_bcol_iboffload_collfrag_t *) - opal_list_get_last(&coll_request->work_requests); - - mqe_ptr_to_set = &coll_fragment->to_post; - - if (OPAL_UNLIKELY(false == BCOL_IBOFFLOAD_MQ_HAVE_CREDITS( - iboffload, coll_fragment->mq_index, coll_fragment->mq_credits))) { - IBOFFLOAD_VERBOSE(10, ("There are not enough credits on MQ.\n")); - goto out_of_resources; - } - - /* post wait */ - preposted_recv_frag = mca_bcol_iboffload_get_preposted_recv_frag( - iboffload, leader_rank, coll_request->qp_index); - if(NULL == preposted_recv_frag) { - IBOFFLOAD_VERBOSE(10, ("Failing for getting prepost recv frag.\n")); - goto out_of_resources; - } - - wait_task = mca_bcol_iboffload_get_wait_task(iboffload, leader_rank, 1, - preposted_recv_frag, coll_request->qp_index, NULL); - if(NULL == wait_task) { - IBOFFLOAD_VERBOSE(10, ("Failing for getting wait task.\n")); - goto out_of_resources; - } - - APPEND_TO_TASKLIST(mqe_ptr_to_set, wait_task, last_wait); - MCA_BCOL_IBOFFLOAD_APPEND_TASK_TO_LIST(coll_fragment->task_next, wait_task); - - /* end of list */ - *mqe_ptr_to_set = NULL; - - last_wait->flags |= MQE_WR_FLAG_SIGNAL; - - coll_fragment->signal_task_wr_id = last_wait->wr_id; - last_wait->wr_id = (uint64_t) (uintptr_t) coll_fragment; - - /* post the mwr */ - rc = mca_bcol_iboffload_post_mqe_tasks(iboffload, coll_fragment->to_post); - if(OMPI_SUCCESS != rc) { - IBOFFLOAD_VERBOSE(10, ("MQE task posting failing.\n")); - /* Note: need to clean up */ - return rc; - } - - MCA_BCOL_UPDATE_ORDER_COUNTER(&iboffload->super, coll_request->order_info); - - return OMPI_SUCCESS; - -out_of_resources: - /* Release all resources */ - IBOFFLOAD_VERBOSE(10, ("Fan-in, adding collfrag to collfrag_pending")); - return mca_bcol_iboffload_free_resources_and_move_to_pending(coll_fragment, iboffload); -} - -static int mca_bcol_iboffload_fanout_init( - bcol_function_args_t *input_args, - mca_bcol_iboffload_module_t *iboffload, - struct mca_bcol_iboffload_collreq_t **coll_request) -{ - ompi_free_list_item_t *item = NULL; - mca_bcol_iboffload_collfrag_t *coll_fragment = NULL; - - mca_bcol_iboffload_component_t *cm = &mca_bcol_iboffload_component; - - IBOFFLOAD_VERBOSE(10, ("Calling for mca_bcol_iboffload_barrier_init")); - - OMPI_FREE_LIST_WAIT_MT(&cm->collreqs_free, item); - if(NULL == item) { - IBOFFLOAD_VERBOSE(10, ("Failing for coll request free list waiting.\n")); - return OMPI_ERR_OUT_OF_RESOURCE; - } - - (*coll_request) = (mca_bcol_iboffload_collreq_t *) item; - (*coll_request)->progress_fn = iboffload->fanout_algth; - - (*coll_request)->completion_cb_fn = NULL; - (*coll_request)->order_info = &input_args->order_info; - - (*coll_request)->module = iboffload; - (*coll_request)->ml_buffer_index = input_args->buffer_index; - (*coll_request)->buffer_info[SBUF].offset = 0; - (*coll_request)->buffer_info[RBUF].offset = 0; - (*coll_request)->qp_index = MCA_BCOL_IBOFFLOAD_QP_BARRIER; - - /* finish initializing full message descriptor */ - (*coll_request)->n_fragments = 1; - (*coll_request)->n_frags_sent = 1; - - (*coll_request)->n_frag_mpi_complete = 0; - (*coll_request)->n_frag_net_complete = 0; - - (*coll_request)->user_handle_freed = false; - - input_args->bcol_opaque_data = (void *) (*coll_request); - - /* - * setup collective work request - */ - - /* get collective frag */ - coll_fragment = &(*coll_request)->first_collfrag; - mca_bcol_iboffload_collfrag_init(coll_fragment); - - coll_fragment->alg = FANOUT_ALG; - coll_fragment->mq_index = COLL_MQ; - - /* Set mq credits */ - coll_fragment->mq_credits = iboffload->alg_task_consump[FANOUT_ALG]; - - /* set pointers for (coll frag) <-> (coll full request) */ - MCA_BCOL_IBOFFLOAD_SET_COLL_REQ_LINKS(*coll_request, coll_fragment); - - return OMPI_SUCCESS; -} - -/************************************************************************ - ************************ New style Fan-In ****************************** - ***********************************************************************/ -static int mca_bcol_iboffload_new_style_fanout_progress( - bcol_function_args_t *input_args, - struct mca_bcol_base_function_t *const_args) -{ - mca_bcol_iboffload_collreq_t *coll_request = - (mca_bcol_iboffload_collreq_t *) - input_args->bcol_opaque_data; - - if (BCOL_IS_COMPLETED(coll_request)) { - coll_request->user_handle_freed = true; - if (COLLREQ_IS_DONE(coll_request)) { - IBOFFLOAD_VERBOSE(10, ("Coll request already done.\n")); - RELEASE_COLLREQ(coll_request); - } - - IBOFFLOAD_VERBOSE(10, ("Fan-Out already done.\n")); - return BCOL_FN_COMPLETE; - } - - return BCOL_FN_STARTED; -} - -int mca_bcol_iboffload_new_style_fanout_first_call( - mca_bcol_iboffload_module_t *iboffload, - struct mca_bcol_iboffload_collreq_t *coll_request) -{ - int i = 0, leader_rank = 0, /* We always suppose - the lowest index is a leader */ - my_rank = iboffload->ibnet->super.my_index, - sbgp_size = iboffload->ibnet->super.group_size; - - mca_bcol_iboffload_endpoint_t *ep = NULL; - mca_sbgp_ibnet_proc_t *my_ibnet_proc = iboffload->endpoints[my_rank]->ibnet_proc; - - assert(NULL != my_ibnet_proc); - - if (MCA_SBGP_IBNET_NODE_LEADER == my_ibnet_proc->duty) { - iboffload->fanout_algth = mca_bcol_iboffload_fanout_leader_progress; - iboffload->alg_task_consump[FANOUT_ALG] += sbgp_size; - - for (i = leader_rank + 1; i < sbgp_size; ++i) { - ep = iboffload->endpoints[i]; - while (OMPI_SUCCESS != - check_endpoint_state(ep, NULL, NULL)) { - opal_progress(); - } - } - } else { - iboffload->fanout_algth = mca_bcol_iboffload_fanout_proxy_progress; - iboffload->alg_task_consump[FANOUT_ALG] += 1; - - ep = iboffload->endpoints[leader_rank]; - while(OMPI_SUCCESS != - check_endpoint_state(ep, NULL, NULL)) { - opal_progress(); - } - } - - return iboffload->fanout_algth(iboffload, coll_request); -} - -static int mca_bcol_iboffload_new_style_fanout_intra( - bcol_function_args_t *input_args, - struct mca_bcol_base_function_t *const_args) -{ - int rc = OMPI_SUCCESS; - - struct mca_bcol_iboffload_collreq_t *coll_request = NULL; - mca_bcol_iboffload_module_t *iboffload = - (mca_bcol_iboffload_module_t *) const_args->bcol_module; - - assert(NULL != iboffload); - - MCA_BCOL_CHECK_ORDER(const_args->bcol_module, input_args); - - /* Init Fan-In collective reqeust */ - rc = mca_bcol_iboffload_fanout_init(input_args, iboffload, &coll_request); - if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) { - IBOFFLOAD_VERBOSE(10, ("Error from mca_bcol_iboffload_fanin_init.\n")); - return BCOL_FN_NOT_STARTED; - } - - rc = iboffload->fanout_algth(iboffload, coll_request); - if (OPAL_UNLIKELY(OMPI_ERROR == rc)) { - return BCOL_FN_NOT_STARTED; - } - - return BCOL_FN_STARTED; -} - -int mca_bcol_iboffload_fanout_register(mca_bcol_base_module_t *super) -{ - mca_bcol_base_coll_fn_comm_attributes_t comm_attribs; - mca_bcol_base_coll_fn_invoke_attributes_t inv_attribs; - - IBOFFLOAD_VERBOSE(10, ("Register iboffload Fan-In.\n")); - - comm_attribs.bcoll_type = BCOL_FANOUT; - - comm_attribs.comm_size_min = 0; - comm_attribs.comm_size_max = 1024 * 1024; - comm_attribs.waiting_semantics = NON_BLOCKING; - - inv_attribs.bcol_msg_min = 0; - inv_attribs.bcol_msg_max = 20000; /* range 1 */ - - inv_attribs.datatype_bitmap = 0xffffffff; - inv_attribs.op_types_bitmap = 0xffffffff; - - comm_attribs.data_src = DATA_SRC_KNOWN; - - mca_bcol_base_set_attributes(super, - &comm_attribs, &inv_attribs, - mca_bcol_iboffload_new_style_fanout_intra, - mca_bcol_iboffload_new_style_fanout_progress); - - return OMPI_SUCCESS; -} diff --git a/ompi/mca/bcol/iboffload/bcol_iboffload_frag.c b/ompi/mca/bcol/iboffload/bcol_iboffload_frag.c deleted file mode 100644 index 0ecf1ef62ea..00000000000 --- a/ompi/mca/bcol/iboffload/bcol_iboffload_frag.c +++ /dev/null @@ -1,272 +0,0 @@ -/* - * Copyright (c) 2009-2012 Oak Ridge National Laboratory. All rights reserved. - * Copyright (c) 2009-2012 Mellanox Technologies. All rights reserved. - * Copyright (c) 2013 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#include "ompi_config.h" - -#include "opal/include/opal/types.h" -#include "opal/datatype/opal_convertor.h" - -#include "bcol_iboffload.h" -#include "bcol_iboffload_frag.h" -#include "bcol_iboffload_device.h" -#include "bcol_iboffload_collreq.h" -#include "bcol_iboffload_endpoint.h" - -static void frag_constructor(mca_bcol_iboffload_frag_t *frag) -{ - mca_bcol_iboffload_reg_t* reg = - (mca_bcol_iboffload_reg_t*) frag->super.registration; - - memset(&frag->sg_entry, 0, sizeof(struct ibv_sge)); - frag->sg_entry.addr = (uint64_t) (uintptr_t) frag->super.ptr; - - frag->registration = reg; - - if (NULL != reg) { - frag->sg_entry.lkey = reg->mr->lkey; - } - - frag->next = NULL; - frag->type = MCA_BCOL_IBOFFLOAD_NONE_OWNER; - frag->ref_counter = 0; - frag->qp_index = -1; -} - -OBJ_CLASS_INSTANCE( - mca_bcol_iboffload_frag_t, - ompi_free_list_item_t, - frag_constructor, - NULL); - - -static mca_bcol_iboffload_frag_t* - mca_bcol_iboffload_get_ml_frag_calc(mca_bcol_iboffload_module_t *iboffload, - mca_bcol_iboffload_collreq_t *coll_request, - size_t len, size_t src_offset) -{ - int rc; - - mca_bcol_iboffload_frag_t *fragment; - mca_bcol_iboffload_component_t *cm = &mca_bcol_iboffload_component; - - uint64_t sbuff = (uint64_t) (uintptr_t) coll_request->buffer_info[SBUF].buf + - src_offset; - - /* The buffer was allocated on ML level, - no need to allocate local buffer */ - rc = pack_data_for_calc(iboffload->device->dev.ib_dev_context, - cm->map_ompi_to_ib_calcs[coll_request->op->op_type], - cm->map_ompi_to_ib_dt[coll_request->dtype->id], - false /* host order */, - (void *) sbuff, 0, - &coll_request->actual_ib_op, - &coll_request->actual_ib_dtype, - (void *) sbuff); - if (OPAL_UNLIKELY(0 != rc)) { - IBOFFLOAD_VERBOSE(10, ("pack_data_for_calc failed, op: %s, type: %s\n", - coll_request->op->o_name, coll_request->dtype->name)); - return NULL; - } - - fragment = mca_bcol_iboffload_get_ml_frag( - iboffload, coll_request->qp_index, len, - coll_request->buffer_info[SBUF].lkey, - sbuff); - - return fragment; -} - -static mca_bcol_iboffload_frag_t * -mca_bcol_iboffload_get_packed_frag(mca_bcol_iboffload_module_t *iboffload, - uint32_t destination, int qp_index, size_t len, - struct opal_convertor_t *convertor) -{ - /* local variables */ - int rc; - uint32_t out_size; - size_t max_size = 0; - - struct iovec payload_iovec; - - ompi_free_list_item_t *item; - mca_bcol_iboffload_frag_t *frag; - - mca_bcol_iboffload_device_t *device = iboffload->device; - - /* Get frag from free list */ - OMPI_FREE_LIST_GET_MT(&device->frags_free[qp_index], item); - if (OPAL_UNLIKELY(NULL == item)) { - return NULL; - } - - frag = (mca_bcol_iboffload_frag_t *) item; - - /* Pack data into the buffer */ - out_size = 1; - payload_iovec.iov_len = len; - - payload_iovec.iov_base = (void *) (uintptr_t) frag->sg_entry.addr; - - rc = opal_convertor_pack(convertor, &(payload_iovec), - &out_size, &max_size); - if (OPAL_UNLIKELY(rc < 0)) { - /* Error: put the fragment back */ - OMPI_FREE_LIST_RETURN_MT(&device->frags_free[qp_index], item); - return NULL; - } - - return frag; -} - -static mca_bcol_iboffload_frag_t * -mca_bcol_iboffload_get_calc_frag(mca_bcol_iboffload_module_t *iboffload, int qp_index, - struct mca_bcol_iboffload_collreq_t *coll_request) -{ - int rc; - - ompi_free_list_item_t *item; - mca_bcol_iboffload_frag_t *frag; - - mca_bcol_iboffload_device_t *device = iboffload->device; - mca_bcol_iboffload_component_t *cm = &mca_bcol_iboffload_component; - - IBOFFLOAD_VERBOSE(10, ("Start to pack frag.\n")); - - /* Get frag from free list */ - OMPI_FREE_LIST_GET_MT(&device->frags_free[qp_index], item); - if (OPAL_UNLIKELY(NULL == item)) { - return NULL; - } - - frag = (mca_bcol_iboffload_frag_t *) item; - - /* Pack data into the buffer */ - rc = pack_data_for_calc(device->dev.ib_dev_context, - cm->map_ompi_to_ib_calcs[coll_request->op->op_type], - cm->map_ompi_to_ib_dt[coll_request->dtype->id], false, - coll_request->buffer_info[SBUF].buf, 0, - &coll_request->actual_ib_op, - &coll_request->actual_ib_dtype, - (void *) (uintptr_t) frag->sg_entry.addr); - if (OPAL_UNLIKELY(0 != rc)) { - IBOFFLOAD_ERROR(("pack_data_for_calc failed, op: %s, type: %s\n", - coll_request->op->o_name, coll_request->dtype->name)); - return NULL; - } - - return frag; -} - -mca_bcol_iboffload_frag_t* -mca_bcol_iboffload_get_send_frag(mca_bcol_iboffload_collreq_t *coll_request, - uint32_t destination, int qp_index, size_t len, - size_t src_offset, int buf_index, int send_frag_type) -{ - /* local variables */ - mca_bcol_iboffload_frag_t *frag; - mca_bcol_iboffload_module_t *iboffload = coll_request->module; - - mca_bcol_iboffload_endpoint_t *endpoint = - iboffload->endpoints[destination]; - - IBOFFLOAD_VERBOSE(10, ("Calling mca_bcol_iboffload_get_send_frag qp_index %d", - qp_index)); - - if ((endpoint->qps[qp_index].sd_wqe) <= 0) { - IBOFFLOAD_VERBOSE(10, ("No send wqe %d", - endpoint->qps[qp_index].sd_wqe)); - return NULL; - } - - --endpoint->qps[qp_index].sd_wqe; - - IBOFFLOAD_VERBOSE(10, ("Endpoint %p: qp_index %d, destination %d, sd_wqe %d", - endpoint, qp_index, destination, endpoint->qps[qp_index].sd_wqe)); - - switch (send_frag_type) { - case MCA_BCOL_IBOFFLOAD_SEND_FRAG_DUMMY: - IBOFFLOAD_VERBOSE(10, ("Getting MCA_BCOL_IBOFFLOAD_SEND_FRAG_DUMMY")); - assert(NULL != &iboffload->device->dummy_frags[qp_index]); - return &iboffload->device->dummy_frags[qp_index]; - - case MCA_BCOL_IBOFFLOAD_SEND_FRAG: - { - ompi_free_list_item_t *item; - IBOFFLOAD_VERBOSE(10, ("Getting MCA_BCOL_IBOFFLOAD_SEND_FRAG")); - - /* Get frag from free list */ - OMPI_FREE_LIST_GET_MT(&iboffload->device->frags_free[qp_index], item); - - frag = (mca_bcol_iboffload_frag_t *) item; - } - - break; - case MCA_BCOL_IBOFFLOAD_SEND_FRAG_CONVERT: - IBOFFLOAD_VERBOSE(10, ("Getting MCA_BCOL_IBOFFLOAD_SEND_FRAG_CONVERT")); - frag = mca_bcol_iboffload_get_packed_frag(iboffload, destination, - qp_index, len, &coll_request->send_convertor); - - break; - case MCA_BCOL_IBOFFLOAD_SEND_FRAG_CALC: - IBOFFLOAD_VERBOSE(10, ("Getting MCA_BCOL_IBOFFLOAD_SEND_FRAG_CALC")); - frag = mca_bcol_iboffload_get_calc_frag(iboffload, qp_index, coll_request); - - break; - case MCA_BCOL_IBOFFLOAD_SEND_FRAG_ML: - IBOFFLOAD_VERBOSE(10, ("Getting MCA_BCOL_IBOFFLOAD_SEND_FRAG_ML")); - frag = mca_bcol_iboffload_get_ml_frag( - iboffload, qp_index, len, coll_request->buffer_info[buf_index].lkey, - (uint64_t)(uintptr_t) coll_request->buffer_info[buf_index].buf + src_offset); - - break; - case MCA_BCOL_IBOFFLOAD_SEND_FRAG_ML_CALC: - frag = mca_bcol_iboffload_get_ml_frag_calc(iboffload, coll_request, len, src_offset); - IBOFFLOAD_VERBOSE(10, ("Getting MCA_BCOL_IBOFFLOAD_SEND_FRAG_ML_CALC")); - - break; - default: - IBOFFLOAD_VERBOSE(10, ("Getting default")); - frag = NULL; - IBOFFLOAD_ERROR(("Unknown send frag type %d for QP index %d", - send_frag_type, qp_index)); - } - - if (OPAL_UNLIKELY(NULL == frag)) { - IBOFFLOAD_VERBOSE(10, ("Getting NULL")); - return NULL; - } - - frag->sg_entry.length = len; - frag->next = NULL; - - return frag; -} - -void -mca_bcol_iboffload_frag_init(ompi_free_list_item_t* item, void* ctx) -{ - int qp_index = *(int *) ctx; - mca_bcol_iboffload_frag_t *frag = (mca_bcol_iboffload_frag_t *) item; - - frag->qp_index = qp_index; - frag->type = MCA_BCOL_IBOFFLOAD_BCOL_OWNER; -} - -void -mca_bcol_iboffload_ml_frag_init(ompi_free_list_item_t* item, void* ctx) -{ - mca_bcol_iboffload_frag_t *frag = (mca_bcol_iboffload_frag_t *) item; - - frag->qp_index = -1; - frag->type = MCA_BCOL_IBOFFLOAD_ML_OWNER; -} diff --git a/ompi/mca/bcol/iboffload/bcol_iboffload_frag.h b/ompi/mca/bcol/iboffload/bcol_iboffload_frag.h deleted file mode 100644 index fffc33f2935..00000000000 --- a/ompi/mca/bcol/iboffload/bcol_iboffload_frag.h +++ /dev/null @@ -1,154 +0,0 @@ -/* - * Copyright (c) 2009-2012 Oak Ridge National Laboratory. All rights reserved. - * Copyright (c) 2009-2012 Mellanox Technologies. All rights reserved. - * Copyright (c) 2013 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#ifndef MCA_BCOL_IBOFFLOAD_FRAG_H -#define MCA_BCOL_IBOFFLOAD_FRAG_H - -#include "ompi_config.h" - -#include - -#include "opal/datatype/opal_convertor.h" - -#include "opal/mca/mpool/mpool.h" -#include "opal/class/ompi_free_list.h" - -#include "bcol_iboffload.h" - -BEGIN_C_DECLS - -/* forward declarations */ -struct mca_bcol_iboffload_collreq_t; - -struct mca_bcol_iboffload_reg_t { - mca_mpool_base_registration_t base; - struct ibv_mr *mr; -}; -typedef struct mca_bcol_iboffload_reg_t mca_bcol_iboffload_reg_t; - -typedef enum { - MCA_BCOL_IBOFFLOAD_NONE_OWNER = -1, - MCA_BCOL_IBOFFLOAD_DUMMY_OWNER, - MCA_BCOL_IBOFFLOAD_BCOL_OWNER, - MCA_BCOL_IBOFFLOAD_ML_OWNER -} frag_type; - -typedef enum { - MCA_BCOL_IBOFFLOAD_SEND_FRAG, - MCA_BCOL_IBOFFLOAD_SEND_FRAG_ML, - MCA_BCOL_IBOFFLOAD_SEND_FRAG_ML_CALC, - MCA_BCOL_IBOFFLOAD_SEND_FRAG_CONVERT, - MCA_BCOL_IBOFFLOAD_SEND_FRAG_CALC, - MCA_BCOL_IBOFFLOAD_SEND_FRAG_DUMMY -} send_frag_type; - -struct mca_bcol_iboffload_frag_t { - ompi_free_list_item_t super; - - struct mca_bcol_iboffload_frag_t *next; - struct mca_bcol_iboffload_reg_t *registration; - - struct ibv_sge sg_entry; - - frag_type type; - - int ref_counter; - int qp_index; -}; -typedef struct mca_bcol_iboffload_frag_t mca_bcol_iboffload_frag_t; -OBJ_CLASS_DECLARATION(mca_bcol_iboffload_frag_t); - -/* The same fragment maybe shared by multiple task. - * In order to manage right release and allocation flow - * we use reference counter on each fragment and the follow - * wrapper allocation and release function that hides - * the counter */ - -#define IBOFFLOAD_SET_SINGLE_FRAG_ON_TASK(fragment, task) \ - do { \ - ++((fragment)->ref_counter); \ - (task)->frag = (fragment); \ - } while(0) - -#define IBOFFLOAD_SET_FRAGS_ON_TASK(fragment, task) \ - do { \ - struct mca_bcol_iboffload_frag_t *temp_frag = fragment; \ - while (NULL != temp_frag) { \ - ++(temp_frag->ref_counter); \ - temp_frag = temp_frag->next; \ - } \ - (task)->frag = fragment; \ - } while(0) - -/* function declarations */ -mca_bcol_iboffload_frag_t * -mca_bcol_iboffload_get_send_frag(struct mca_bcol_iboffload_collreq_t *coll_request, - uint32_t destination, int qp_index, size_t len, - size_t src_offset, int buff_index, int send_frag_type); - -void -mca_bcol_iboffload_frag_init(ompi_free_list_item_t* item, void* ctx); -void -mca_bcol_iboffload_ml_frag_init(ompi_free_list_item_t* item, void* ctx); - -static inline __opal_attribute_always_inline__ -mca_bcol_iboffload_frag_t* mca_bcol_iboffload_get_ml_empty_frag( - mca_bcol_iboffload_module_t *iboffload, - int qp_index) -{ - ompi_free_list_item_t *item; - mca_bcol_iboffload_frag_t *frag; - - mca_bcol_iboffload_component_t *cm = &mca_bcol_iboffload_component; - - /* Get frag from free list */ - OMPI_FREE_LIST_GET_MT(&cm->ml_frags_free, item); - if (OPAL_UNLIKELY(NULL == item)) { - return NULL; - } - - frag = (mca_bcol_iboffload_frag_t *) item; - - frag->qp_index = qp_index; - frag->next = NULL; - - return frag; -} - -static inline __opal_attribute_always_inline__ -mca_bcol_iboffload_frag_t* mca_bcol_iboffload_get_ml_frag( - mca_bcol_iboffload_module_t *iboffload, - int qp_index, size_t len, uint32_t lkey, uint64_t addr) -{ - /* local variables */ - mca_bcol_iboffload_frag_t *frag; - - IBOFFLOAD_VERBOSE(10, ("Call for get ML frag - addr 0x%x", addr)); - - frag = mca_bcol_iboffload_get_ml_empty_frag(iboffload, qp_index); - - frag->sg_entry.addr = addr; - frag->sg_entry.lkey = lkey; - frag->sg_entry.length = len; - - IBOFFLOAD_VERBOSE(10, ("Setting ml frag lkey %u, " - "addr %p, qp_index %d, send value - %lf", - frag->sg_entry.lkey, frag->sg_entry.addr, - qp_index, *(double *) frag->sg_entry.addr)); - - return frag; -} - -END_C_DECLS - -#endif diff --git a/ompi/mca/bcol/iboffload/bcol_iboffload_mca.c b/ompi/mca/bcol/iboffload/bcol_iboffload_mca.c deleted file mode 100644 index 04f6f94ad9c..00000000000 --- a/ompi/mca/bcol/iboffload/bcol_iboffload_mca.c +++ /dev/null @@ -1,451 +0,0 @@ -/* - * Copyright (c) 2009-2012 Oak Ridge National Laboratory. All rights reserved. - * Copyright (c) 2009-2012 Mellanox Technologies. All rights reserved. - * Copyright (c) 2014 Research Organization for Information Science - * and Technology (RIST). All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#include "ompi_config.h" - -#include -#include -#include -#include - -#include "bcol_iboffload.h" -#include "bcol_iboffload_mca.h" - -#include "ompi/constants.h" -#include "ompi/mca/common/ofacm/base.h" -#include "ompi/communicator/communicator.h" - -#include "opal/util/show_help.h" - -/* - * Local flags - */ -enum { - REGINT_NEG_ONE_OK = 0x01, - REGINT_GE_ZERO = 0x02, - REGINT_GE_ONE = 0x04, - REGINT_NONZERO = 0x08, - REGINT_MAX = 0x88 -}; - -enum { - REGSTR_EMPTY_OK = 0x01, - REGSTR_MAX = 0x88 -}; - -mca_base_var_enum_value_t mtu_values[] = { - {IBV_MTU_256, "256B"}, - {IBV_MTU_512, "512B"}, - {IBV_MTU_1024, "1k"}, - {IBV_MTU_4096, "4k"}, - {0, NULL} -}; - -/* - * utility routine for string parameter registration - */ -static int reg_string(const char* param_name, - const char* deprecated_param_name, - const char* param_desc, - const char* default_value, char **storage, - int flags) -{ - int index; - - /* the MCA variable system will not attempt to modify this value */ - *storage = (char *) default_value; - index = mca_base_component_var_register(&mca_bcol_iboffload_component.super.bcol_version, - param_name, param_desc, MCA_BASE_VAR_TYPE_STRING, - NULL, 0, 0, OPAL_INFO_LVL_9, - MCA_BASE_VAR_SCOPE_READONLY, storage); - if (NULL != deprecated_param_name) { - (void) mca_base_var_register_synonym(index, "ompi", "bcol", "iboffload", deprecated_param_name, - MCA_BASE_VAR_SYN_FLAG_DEPRECATED); - } - - if (0 != (flags & REGSTR_EMPTY_OK) && 0 == strlen(*storage)) { - opal_output(0, "Bad parameter value for parameter \"%s\"", - param_name); - return OMPI_ERR_BAD_PARAM; - } - - return OMPI_SUCCESS; -} - -/* - * utility routine for integer parameter registration - */ -static int reg_int(const char* param_name, - const char* deprecated_param_name, - const char* param_desc, - int default_value, int *storage, int flags) -{ - int index; - - *storage = default_value; - index = mca_base_component_var_register(&mca_bcol_iboffload_component.super.bcol_version, - param_name, param_desc, MCA_BASE_VAR_TYPE_INT, - NULL, 0, 0, OPAL_INFO_LVL_9, - MCA_BASE_VAR_SCOPE_READONLY, storage); - if (NULL != deprecated_param_name) { - (void) mca_base_var_register_synonym(index, "ompi", "bcol", "iboffload", deprecated_param_name, - MCA_BASE_VAR_SYN_FLAG_DEPRECATED); - } - - if (0 != (flags & REGINT_NEG_ONE_OK) && -1 == *storage) { - return OMPI_SUCCESS; - } - - if ((0 != (flags & REGINT_GE_ZERO) && *storage < 0) || - (0 != (flags & REGINT_GE_ONE) && *storage < 1) || - (0 != (flags & REGINT_NONZERO) && 0 == *storage)) { - opal_output(0, "Bad parameter value for parameter \"%s\"", - param_name); - return OMPI_ERR_BAD_PARAM; - } - - return OMPI_SUCCESS; -} - -/* - * utility routine for integer parameter registration - */ -static int reg_bool(const char* param_name, - const char* deprecated_param_name, - const char* param_desc, - bool default_value, bool *storage) -{ - int index; - - *storage = default_value; - index = mca_base_component_var_register(&mca_bcol_iboffload_component.super.bcol_version, - param_name, param_desc, MCA_BASE_VAR_TYPE_BOOL, - NULL, 0, 0, OPAL_INFO_LVL_9, - MCA_BASE_VAR_SCOPE_READONLY, storage); - if (NULL != deprecated_param_name) { - (void) mca_base_var_register_synonym(index, "ompi", "bcol", "iboffload", deprecated_param_name, - MCA_BASE_VAR_SYN_FLAG_DEPRECATED); - } - - return OMPI_SUCCESS; -} - -int mca_bcol_iboffload_verify_params(void) -{ - if (mca_bcol_iboffload_component.min_rnr_timer > 31) { - opal_show_help("help-mpi-btl-openib.txt", "invalid mca param value", - true, "bcol_iboffload_ib_min_rnr_timer > 31", - "bcol_iboffload_ib_min_rnr_timer reset to 31"); - mca_bcol_iboffload_component.min_rnr_timer = 31; - } else if (mca_bcol_iboffload_component.min_rnr_timer < 0){ - opal_show_help("help-mpi-btl-openib.txt", "invalid mca param value", - true, "bcol_iboffload_ib_min_rnr_timer < 0", - "bcol_iboffload_ib_min_rnr_timer reset to 0"); - mca_bcol_iboffload_component.min_rnr_timer = 0; - } - - if (mca_bcol_iboffload_component.timeout > 31) { - opal_show_help("help-mpi-btl-openib.txt", "invalid mca param value", - true, "bcol_iboffload_ib_timeout > 31", - "bcol_iboffload_ib_timeout reset to 31"); - mca_bcol_iboffload_component.timeout = 31; - } else if (mca_bcol_iboffload_component.timeout < 0) { - opal_show_help("help-mpi-btl-openib.txt", "invalid mca param value", - true, "bcol_iboffload_ib_timeout < 0", - "bcol_iboffload_ib_timeout reset to 0"); - mca_bcol_iboffload_component.timeout = 0; - } - - if (mca_bcol_iboffload_component.retry_count > 7) { - opal_show_help("help-mpi-btl-openib.txt", "invalid mca param value", - true, "bcol_iboffload_ib_retry_count > 7", - "bcol_iboffload_ib_retry_count reset to 7"); - mca_bcol_iboffload_component.retry_count = 7; - } else if (mca_bcol_iboffload_component.retry_count < 0) { - opal_show_help("help-mpi-btl-openib.txt", "invalid mca param value", - true, "bcol_iboffload_ib_retry_count < 0", - "bcol_iboffload_ib_retry_count reset to 0"); - mca_bcol_iboffload_component.retry_count = 0; - } - - if (mca_bcol_iboffload_component.max_rdma_dst_ops > 7) { - opal_show_help("help-mpi-btl-openib.txt", "invalid mca param value", - true, "bcol_iboffload_ib_rnr_retry > 7", - "bcol_iboffload_ib_rnr_retry reset to 7"); - mca_bcol_iboffload_component.max_rdma_dst_ops = 7; - } else if (mca_bcol_iboffload_component.max_rdma_dst_ops < 0) { - opal_show_help("help-mpi-btl-openib.txt", "invalid mca param value", - true, "bcol_iboffload_ib_rnr_retry < 0", - "bcol_iboffload_ib_rnr_retry reset to 0"); - mca_bcol_iboffload_component.max_rdma_dst_ops = 0; - } - - if (mca_bcol_iboffload_component.service_level > 15) { - opal_show_help("help-mpi-btl-openib.txt", "invalid mca param value", - true, "bcol_iboffload_ib_service_level > 15", - "bcol_iboffload_ib_service_level reset to 15"); - mca_bcol_iboffload_component.service_level = 15; - } else if (mca_bcol_iboffload_component.service_level < 0) { - opal_show_help("help-mpi-btl-openib.txt", "invalid mca param value", - true, "bcol_iboffload_ib_service_level < 0", - "bcol_iboffload_ib_service_level reset to 0"); - mca_bcol_iboffload_component.service_level = 0; - } - - if(mca_bcol_iboffload_component.buffer_alignment <= 1 || - (mca_bcol_iboffload_component.buffer_alignment & (mca_bcol_iboffload_component.buffer_alignment - 1))) { - opal_show_help("help-mpi-btl-openib.txt", "wrong buffer alignment", - true, mca_bcol_iboffload_component.buffer_alignment, ompi_process_info.nodename, 64); - mca_bcol_iboffload_component.buffer_alignment = 64; - } - - return OMPI_SUCCESS; -} - -int mca_bcol_iboffload_register_params(void) -{ - mca_base_var_enum_t *new_enum; - char *msg; - int ret = OMPI_SUCCESS, tmp; - -#define CHECK(expr) do { \ - tmp = (expr); \ - if (OMPI_SUCCESS != tmp) ret = tmp; \ - } while (0) - - /* register openib component parameters */ - CHECK(reg_int("k_nomial_radix", NULL, - "The radix of the K-nomial tree for scatther-gather type algorithms" - "(starts from 2)", 2, &mca_bcol_iboffload_component.k_nomial_radix, - REGINT_GE_ONE)); - - CHECK(reg_int("priority", NULL, - "IB offload component priority" - "(from 0(low) to 90 (high))", 90, - &mca_bcol_iboffload_component.super.priority, 0)); - - CHECK(reg_int("verbose", NULL, - "Output some verbose IB offload BTL information " - "(0 = no output, nonzero = output)", 0, - &mca_bcol_iboffload_component.verbose, 0)); - - CHECK(reg_bool("warn_default_gid_prefix", NULL, - "Warn when there is more than one active ports and at least one of them connected to the network with only default GID prefix configured (0 = do not warn; any other value = warn)", - true, &mca_bcol_iboffload_component.warn_default_gid_prefix)); - - CHECK(reg_bool("warn_nonexistent_if", NULL, - "Warn if non-existent devices and/or ports are specified in the bcol_iboffla_if_[in|ex]clude MCA parameters (0 = do not warn; any other value = warn)", - true, &mca_bcol_iboffload_component.warn_nonexistent_if)); - - CHECK(reg_int("max_pipeline_depth", NULL, - "The maximal number of fragments of the same collective request that can be transferred in parallel", 3, - (int *) &mca_bcol_iboffload_component.max_pipeline_depth, 0)); - - CHECK(reg_int("max_mqe_tasks", NULL, - "Maximum number of MQEs for each iboffload module", - 1024, &mca_bcol_iboffload_component.max_mqe_tasks, 0)); - CHECK(reg_int("max_mq_size", NULL, - "Maximum size of each MQ for each iboffload module", - 1024, &mca_bcol_iboffload_component.max_mq_size, 0)); - CHECK(reg_int("free_list_num", NULL, - "Intial size of free lists (must be >= 1)", - 256, &mca_bcol_iboffload_component.free_list_num, - REGINT_GE_ONE)); - CHECK(reg_int("free_list_max", NULL, - "Maximum size of free lists " - "(-1 = infinite, otherwise must be >= 0)", - -1, &mca_bcol_iboffload_component.free_list_max, - REGINT_NEG_ONE_OK | REGINT_GE_ONE)); - CHECK(reg_int("free_list_inc", NULL, - "Increment size of free lists (must be >= 1)", - 32, &mca_bcol_iboffload_component.free_list_inc, - REGINT_GE_ONE)); - /* rdma mpool no longer exists - must use the grdma mpool component, should resolve errors in - * mtt testing - */ - /* - CHECK(reg_string("mpool", NULL, - "Name of the memory pool to be used (it is unlikely that you will ever want to change this", - "rdma", &mca_bcol_iboffload_component.mpool_name, - 0)); - */ - CHECK(reg_string("mpool", NULL, - "Name of the memory pool to be used (it is unlikely that you will ever want to change this", - "grdma", &mca_bcol_iboffload_component.mpool_name, - 0)); - CHECK(reg_int("cq_size", "cq_size", - "Size of the OpenFabrics completion " - "queue (will automatically be set to a minimum of " - "(2 * number_of_peers * bcol_iboffload_rd_num))", - 1024, &mca_bcol_iboffload_component.cq_size, REGINT_GE_ONE)); - - CHECK(reg_int("exchange_tree_order", NULL, - "The order of the exchange tree. " - "Must be power of two.", - 2, &mca_bcol_iboffload_component.exchange_tree_order, REGINT_GE_ONE)); - - CHECK(reg_int("knomial_tree_order", NULL, - "The order of the knomial exchange tree. ", - 3, &mca_bcol_iboffload_component.knomial_tree_order, REGINT_GE_ONE)); - - - CHECK(reg_int("max_inline_data", "max_inline_data", - "Maximum size of inline data segment " - "(-1 = run-time probe to discover max value, " - "otherwise must be >= 0). " - "If not explicitly set, use max_inline_data from " - "the INI file containing device-specific parameters", - 128, (int *) &mca_bcol_iboffload_component.max_inline_data, - REGINT_NEG_ONE_OK | REGINT_GE_ZERO)); - -#if 0 - CHECK(reg_string("pkey", "ib_pkey_val", - "OpenFabrics partition key (pkey) value. " - "Unsigned integer decimal or hex values are allowed (e.g., \"3\" or \"0x3f\") and will be masked against the maximum allowable IB paritition key value (0x7fff)", - "0", &pkey, 0)); - /* Pasha - mca_bcol_iboffload_component.pkey_val = - ompi_btl_openib_ini_intify(pkey) & MCA_BTL_IB_PKEY_MASK; - free(pkey); - */ -#endif - - CHECK(reg_string("receive_queues", NULL, - "Colon-delimited, comma delimited list of receive queues: P,4096,8,6,4:P,32768,8,6,4", - "P,512,256,192,128", &mca_bcol_iboffload_component.receive_queues, - 0)); - - CHECK(reg_int("qp_ous_rd_atom", NULL, - "InfiniBand outstanding atomic reads (must be >= 0)", 4, - (int *) &mca_bcol_iboffload_component.qp_ous_rd_atom, REGINT_GE_ZERO)); - - asprintf(&msg, "OpenFabrics MTU, in bytes (if not specified in INI files). Valid values are: %d=256 bytes, %d=512 bytes, %d=1024 bytes, %d=2048 bytes, %d=4096 bytes", - IBV_MTU_256, - IBV_MTU_512, - IBV_MTU_1024, - IBV_MTU_2048, - IBV_MTU_4096); - if (NULL == msg) { - /* Don't try to recover from this */ - return OMPI_ERR_OUT_OF_RESOURCE; - } - CHECK(mca_base_var_enum_create("infiniband mtu", mtu_values, &new_enum)); - mca_bcol_iboffload_component.mtu = IBV_MTU_1024; - tmp = mca_base_component_var_register(&mca_bcol_iboffload_component.super.bcol_version, - "mtu", msg, MCA_BASE_VAR_TYPE_INT, new_enum, 0, 0, - OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_READONLY, - &mca_bcol_iboffload_component.mtu); - OBJ_RELEASE(new_enum); - free(msg); - - if (0 > tmp) ret = tmp; - - tmp = mca_base_var_register_synonym(tmp, "ompi", "bcol", "iboffload", "ib_mtu", - MCA_BASE_VAR_SYN_FLAG_DEPRECATED); - if (0 > tmp) ret = tmp; - - CHECK(reg_int("ib_min_rnr_timer", NULL, "InfiniBand minimum " - "\"receiver not ready\" timer, in seconds " - "(must be >= 0 and <= 31)", - 1 , &mca_bcol_iboffload_component.min_rnr_timer, 0)); - - CHECK(reg_int("ib_timeout", NULL, "InfiniBand transmit timeout, plugged into formula: 4.096 microseconds * " - "(2^bcol_iboffload_ib_timeout) (must be >= 0 and <= 31)", - 20, &mca_bcol_iboffload_component.timeout, 0)); - - CHECK(reg_int("ib_retry_count", NULL, "InfiniBand transmit retry count " - "(must be >= 0 and <= 7)", - 7, &mca_bcol_iboffload_component.retry_count, 0)); - - CHECK(reg_int("ib_rnr_retry", NULL, "InfiniBand \"receiver not ready\" " - "retry count; applies *only* to SRQ/XRC queues. PP queues " - "use RNR retry values of 0 because Open MPI performs " - "software flow control to guarantee that RNRs never occur " - "(must be >= 0 and <= 7; 7 = \"infinite\")", - 7, &mca_bcol_iboffload_component.rnr_retry, 0)); - - CHECK(reg_int("ib_max_rdma_dst_ops", NULL, "InfiniBand maximum pending RDMA " - "destination operations " - "(must be >= 0)", - 4, &mca_bcol_iboffload_component.max_rdma_dst_ops, REGINT_GE_ZERO)); - - CHECK(reg_int("ib_service_level", NULL, "InfiniBand service level " - "(must be >= 0 and <= 15)", - 0, &mca_bcol_iboffload_component.service_level, 0)); - - CHECK(reg_int("buffer_alignment", NULL, - "Prefered communication buffer alignment, in bytes " - "(must be > 0 and power of two)", - 64, &mca_bcol_iboffload_component.buffer_alignment, REGINT_GE_ZERO)); - - /* register parmeters controlling message fragementation */ - CHECK(reg_int("min_frag_size", NULL, - "Minimum fragment size", - getpagesize(), &mca_bcol_iboffload_component.super.min_frag_size, - REGINT_GE_ONE)); - - CHECK(reg_int("max_frag_size", NULL, - "Maximum fragment size", - FRAG_SIZE_NO_LIMIT, &mca_bcol_iboffload_component.super.max_frag_size, - REGINT_NONZERO)); - - CHECK(reg_bool("can_use_user_buffers", NULL, - "User memory can be used by the collective algorithms", - true, &mca_bcol_iboffload_component.super.can_use_user_buffers)); - - CHECK(reg_int("barrier_mode", NULL, - "Barrier mode: 0 - Recursive doubling; 1 - Recursive K-ing", - 0, &mca_bcol_iboffload_component.barrier_mode, REGINT_GE_ZERO)); - - CHECK(reg_int("max_progress_pull", NULL, - "Max number of progress pull checks", - 8, &mca_bcol_iboffload_component.max_progress_pull, REGINT_GE_ZERO)); - - CHECK(reg_int("use_brucks_smsg_alltoall_rdma", NULL, - "Use brucks algorithm for smsg alltoall and RDMA semantics 1 = No Temp buffer recycling" - "1 = Alg with no Temp Buffer Recycling (faster), 2 = Alg with temp Buffer Recycling (slower)", - 0, &mca_bcol_iboffload_component.use_brucks_smsg_alltoall_rdma, 0)); - - CHECK(reg_int("use_brucks_smsg_alltoall_sr", NULL, - "Use brucks algorithm for smsg alltoall and Send/Recv semantics " - "1 = Alg with RTR (faster), 2 = Alg with RNR (slower)", - 0, &mca_bcol_iboffload_component.use_brucks_smsg_alltoall_sr, 0)); - - CHECK(reg_int("alltoall_bruck_radix", NULL, - "Radix for Bruck algorithm for smsg alltoall", - 3, &mca_bcol_iboffload_component.k_alltoall_bruck_radix, 0)); - - CHECK(reg_int("k_alltoall_bruck_radix", NULL, - "Temp Buffer alignment for Bruck algorithm for smsg alltoall", - 64, &mca_bcol_iboffload_component.tmp_buf_alignment, 0)); - - /* - CHECK(reg_string("if_include", NULL, - "Comma-delimited list of devices/ports to be used (e.g. \"mthca0,mthca1:2\"; empty value means to use all ports found). Mutually exclusive with bcol_iboffload_if_exclude.", - NULL, &mca_bcol_iboffload_component.if_include, - 0)); - - CHECK(reg_string("if_exclude", NULL, - "Comma-delimited list of device/ports to be excluded (empty value means to not exclude any ports). Mutually exclusive with bcol_iboffload_if_include.", - NULL, &mca_bcol_iboffload_component.if_exclude, - 0)); - */ - - CHECK(mca_bcol_iboffload_verify_params()); - - /* Register any MCA params for the connect pseudo-components */ - if (OMPI_SUCCESS == ret) { - ret = ompi_common_ofacm_base_register(&mca_bcol_iboffload_component.super.bcol_version); - } - - return ret; -} diff --git a/ompi/mca/bcol/iboffload/bcol_iboffload_mca.h b/ompi/mca/bcol/iboffload/bcol_iboffload_mca.h deleted file mode 100644 index 193860f2c47..00000000000 --- a/ompi/mca/bcol/iboffload/bcol_iboffload_mca.h +++ /dev/null @@ -1,20 +0,0 @@ -/* - * Copyright (c) 2009-2012 Oak Ridge National Laboratory. All rights reserved. - * Copyright (c) 2009-2012 Mellanox Technologies. All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - /** @file */ - -#ifndef MCA_BCOL_IBOFFLOAD_MCA_H -#define MCA_BCOL_IBOFFLOAD_MCA_H - -#include "ompi_config.h" - -int mca_bcol_iboffload_register_params(void); -int mca_bcol_iboffload_verify_params(void); - -#endif diff --git a/ompi/mca/bcol/iboffload/bcol_iboffload_module.c b/ompi/mca/bcol/iboffload/bcol_iboffload_module.c deleted file mode 100644 index 90107b21ee8..00000000000 --- a/ompi/mca/bcol/iboffload/bcol_iboffload_module.c +++ /dev/null @@ -1,1538 +0,0 @@ -/* - * Copyright (c) 2009-2012 Oak Ridge National Laboratory. All rights reserved. - * Copyright (c) 2009-2012 Mellanox Technologies. All rights reserved. - * Copyright (c) 2013 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -/** - * @file - * - */ - -#include "ompi_config.h" - -#include -#include -#include -#include -#include - -#include -#include -#include - -#include "opal/util/arch.h" -#include "opal/include/opal/types.h" -#include "opal/datatype/opal_datatype.h" - -#include "ompi/mca/bcol/base/base.h" -#include "opal/mca/mpool/base/base.h" -#include "ompi/communicator/communicator.h" -#include "opal/mca/mpool/grdma/mpool_grdma.h" -#include "ompi/mca/coll/ml/coll_ml_allocation.h" - -#include "bcol_iboffload.h" -#include "bcol_iboffload_frag.h" -#include "bcol_iboffload_task.h" -#include "bcol_iboffload_bcast.h" -#include "bcol_iboffload_device.h" -#include "bcol_iboffload_collreq.h" -#include "bcol_iboffload_collfrag.h" -#include "bcol_iboffload_endpoint.h" - -static int init_rdma_buf_desc(mca_bcol_iboffload_rdma_buffer_desc_t **desc, void *base_addr, uint32_t num_banks, - uint32_t num_buffers_per_bank, uint32_t size_buffer, uint32_t header_size); - -static int set_endpoint_remote_rdma_info(mca_bcol_iboffload_endpoint_t *ep, mca_bcol_iboffload_rdma_info_t *remote_rdma_info); - -static void -mca_bcol_iboffload_module_construct(mca_bcol_iboffload_module_t *module) -{ - int i; - mca_bcol_iboffload_component_t *cm = &mca_bcol_iboffload_component; - - /* set all to zero */ - module->group_size = 0; - module->segment_size = 0; - module->collective_tag = 0; - module->ibnet = NULL; - module->cgroup_index = 0; - - module->num_endpoints = 0; - module->endpoints = NULL; - - /* initi the previous sequence number */ - module->prev_sequence_num = -1; - - switch (cm->barrier_mode) { - case (0): module->barrier_algth = - mca_bcol_iboffload_barrier_intra_recursive_doubling_start; - break; - case (1): module->barrier_algth = - mca_bcol_iboffload_barrier_intra_recursive_knomial_start; - break; - default: module->barrier_algth = NULL; - } - - module->allreduce_algth = NULL; - module->fanin_algth = mca_bcol_iboffload_new_style_fanin_first_call; - module->fanout_algth = mca_bcol_iboffload_new_style_fanout_first_call; - module->memsync_algth = mca_bcol_iboffload_nb_memory_service_barrier_start; - - memset(module->mq, 0, sizeof(module->mq[0]) * BCOL_IBOFFLOAD_MQ_NUM); - memset(module->alg_task_consump, 0, sizeof(uint32_t) * LAST_ALG); - memset(module->connection_status, 0, sizeof(bool) * LAST_ALG); - - for (i = 0; i < BCOL_IBOFFLOAD_MQ_NUM; i++) { - module->mq_credit[i] = mca_bcol_iboffload_component.max_mqe_tasks; - } - - module->super.bcol_component = - (mca_bcol_base_component_t *) &mca_bcol_iboffload_component; - - /* We need two MQ's tasks for exchange with remote addresses */ - module->alg_task_consump[REMOTE_EXCHANGE_ALG] += 2; - - module->power_of_2_ranks = 0; - /* it is safe to set all the remote block to zero */ - memset(&module->rdma_block, 0, sizeof(mca_bcol_iboffload_local_rdma_block_t)); - - module->super.list_n_connected = NULL; - - OBJ_CONSTRUCT(&module->collfrag_pending, opal_list_t); -} - -static void -mca_bcol_iboffload_module_destruct(mca_bcol_iboffload_module_t *module) -{ - int i = 0; - mca_bcol_iboffload_component_t *cm = &mca_bcol_iboffload_component; - - IBOFFLOAD_VERBOSE(10, ("Module - %p: start to destroy; " - "pending queue size - %d.\n", - module, opal_list_get_size(&module->collfrag_pending))); - - /* Make sure that we done with all pending collective frags */ - while (opal_list_get_size(&module->collfrag_pending) > 0) { - opal_progress(); - } - - OBJ_DESTRUCT(&module->collfrag_pending); - - IBOFFLOAD_VERBOSE(10, ("module->mq_credit - %d, cm->max_mqe_tasks - %d.\n", - module->mq_credit[0], cm->max_mqe_tasks)); - /* Make sure that you got completion on all outstanding collectives */ - for (i = 0; i < BCOL_IBOFFLOAD_MQ_NUM; i++) { - while (module->mq_credit[i] != (int) cm->max_mqe_tasks) { - opal_progress(); - } - } - - IBOFFLOAD_VERBOSE(10, ("All credits were returned.\n")); - - if (NULL != module && NULL != module->mq) { - for (i = 0; i < BCOL_IBOFFLOAD_MQ_NUM; i++) { - if (0 != mqe_context_destroy(module->mq[i])) { - IBOFFLOAD_ERROR(("Error destroying MQ for device (%s), error: %s\n", - ibv_get_device_name(module->device->dev.ib_dev), strerror(errno))); - } - } - - IBOFFLOAD_VERBOSE(10, ("MQ %d was destroyed.\n", i)); - } - - if (NULL != module->endpoints) { - mca_bcol_iboffload_endpoint_t *ep; - int qp_index, num_qps = cm->num_qps; - - for (i = 0; i < module->num_endpoints; ++i) { - if (NULL != module->endpoints[i]) { - /* Make sure that we get completions on all outstanding send requests */ - ep = module->endpoints[i]; - for (qp_index = 0; qp_index < num_qps; ++qp_index) { - IBOFFLOAD_VERBOSE(10, ("qp_index - %d, ep->index - %d, " - "ep->qps[qp_index].sd_wqe - %d, " - "cm->qp_infos[qp_index].rd_num - %d.\n", - qp_index, ep->index, - ep->qps[qp_index].sd_wqe, - cm->qp_infos[qp_index].rd_num)); - - while (ep->qps[qp_index].sd_wqe != cm->qp_infos[qp_index].rd_num) { - opal_progress(); - } - - IBOFFLOAD_VERBOSE(10, ("qp_index - %d, ep->index - %d; " - "All sends were sent.\n", - qp_index, ep->index)); - } - - OBJ_RELEASE(ep); - } - } - - free(module->endpoints); - } - - netpatterns_free_recursive_doubling_tree_node(&module->n_exchange_tree); - netpatterns_free_recursive_doubling_tree_node(&module->recursive_doubling_tree); - - OBJ_RELEASE(module->device->net_context); - OBJ_RELEASE(module->device); - - if (NULL != module->super.list_n_connected) { - free(module->super.list_n_connected); - module->super.list_n_connected = NULL; - } - - OBJ_DESTRUCT(&module->iovec_tasks_free); - - IBOFFLOAD_VERBOSE(10, ("module - %p was successfully destructed.\n", module)); -} - -OBJ_CLASS_INSTANCE(mca_bcol_iboffload_module_t, - mca_bcol_base_module_t, - mca_bcol_iboffload_module_construct, - mca_bcol_iboffload_module_destruct); - -static int iboffload_init_port(struct mca_bcol_iboffload_device_t *device, - struct mca_bcol_iboffload_port_t *p) -{ - union ibv_gid gid; - struct ibv_port_attr ib_port_attr; - - if (ibv_query_port(device->dev.ib_dev_context, p->id, &ib_port_attr)){ - IBOFFLOAD_ERROR(("Error getting port attributes for device %s " - "port number %d errno says %s", - ibv_get_device_name(device->dev.ib_dev), p->id, strerror(errno))); - return OMPI_ERR_NOT_FOUND; - } - - /* Set port data */ - p->lmc = (1 << ib_port_attr.lmc); - p->lid = ib_port_attr.lid; - p->stat = ib_port_attr.state; - p->mtu = ib_port_attr.active_mtu; - - IBOFFLOAD_VERBOSE(10, (" Setting port data (%s:%d) lid=%d, lmc=%d, stat=%d, mtu=%d\n", - ibv_get_device_name(device->dev.ib_dev), p->id, p->lid, - p->lmc, p->stat, p->mtu)); - - if (0 != ibv_query_gid(device->dev.ib_dev_context, p->id, 0, &gid)) { - IBOFFLOAD_ERROR(("ibv_query_gid failed (%s:%d)\n", - ibv_get_device_name(device->dev.ib_dev), p->id)); - return OMPI_ERR_NOT_FOUND; - } - - /* set subnet data */ - p->subnet_id = ntoh64(gid.global.subnet_prefix); - IBOFFLOAD_VERBOSE(10, ("my IB-only subnet_id for HCA %s port %d is %lx", - ibv_get_device_name(device->dev.ib_dev), p->id, p->subnet_id)); - - return OMPI_SUCCESS; -} - -/* mpool allocation maybe changed in future, so lets keep it as separate function */ -static int prepare_mpool(mca_bcol_iboffload_device_t *device) -{ - int ret = OMPI_SUCCESS; - mca_mpool_base_resources_t resources; - - resources.reg_data = (void *) device; - resources.sizeof_reg = sizeof(mca_bcol_iboffload_reg_t); - - resources.register_mem = mca_bcol_iboffload_register_mr; - resources.deregister_mem = mca_bcol_iboffload_deregister_mr; - - device->mpool = - mca_mpool_base_module_create(mca_bcol_iboffload_component.mpool_name, - device, &resources); - if (NULL == device->mpool){ - opal_output(0, "error creating IB memory pool for %s errno says %s\n", - ibv_get_device_name(device->dev.ib_dev), strerror(errno)); - ret = OMPI_ERROR; - } - - return ret; -} - -/* Allocate device related resources: mpool, pd, cq, free_lists */ -static int allocate_device_resources(mca_bcol_iboffload_device_t *device) -{ - int qp_index, num_qps, rc; - mca_bcol_iboffload_component_t *cm = &mca_bcol_iboffload_component; - - void* dummy_mem = (void *) &device->dummy_mem[0]; - - num_qps = cm->num_qps; - - /* We have some active ports, alloce pd */ - device->ib_pd = ibv_alloc_pd(device->dev.ib_dev_context); - if (NULL == device->ib_pd){ - IBOFFLOAD_ERROR(("Error allocating protection domain for %s errno says %s", - ibv_get_device_name(device->dev.ib_dev), strerror(errno))); - return OMPI_ERROR; - } - - /* Pasha: allocate mpool here */ - if (OMPI_SUCCESS != prepare_mpool(device)) { - return OMPI_ERROR; - } - - /* Allocating free list of memory registered fragments */ - device->frags_free = (ompi_free_list_t *) calloc( - num_qps, sizeof(ompi_free_list_t)); - - if (NULL == device->frags_free) { - IBOFFLOAD_ERROR(("Error allocating memory for " - "frags array, dev: %s errno says %s", - ibv_get_device_name(device->dev.ib_dev), - strerror(errno))); - - return OMPI_ERROR; - } - - for (qp_index = 0; qp_index < num_qps; ++qp_index) { - mca_bcol_iboffload_alloc_qps_resource_fn_t alloc_resource = - cm->qp_infos[qp_index].alloc_resource; - - if (NULL != alloc_resource) { - if (OMPI_SUCCESS != alloc_resource(qp_index, device)) { - return OMPI_ERROR; - } - } - - } - - if (OMPI_SUCCESS != - mca_bcol_iboffload_adjust_cq(device, &device->ib_cq)) { - IBOFFLOAD_ERROR(("Error creating CQ for %s errno says %s", - ibv_get_device_name(device->dev.ib_dev), strerror(errno))); - return OMPI_ERROR; - } - - if (OMPI_SUCCESS != - mca_bcol_iboffload_adjust_cq(device, &device->ib_mq_cq)) { - IBOFFLOAD_ERROR(("Error creating mq CQ for %s errno says %s", - ibv_get_device_name(device->dev.ib_dev), strerror(errno))); - return OMPI_ERROR; - } - - rc = mca_bcol_iboffload_register_mr((void *) device, dummy_mem, - sizeof(char) * BCOL_IBOFFLOAD_DUMMY_MEM_SIZE, - &device->dummy_reg.base); - - if (OMPI_SUCCESS != rc) { - IBOFFLOAD_ERROR(("Dummy memory registration failed for %s errno says %s", - ibv_get_device_name(device->dev.ib_dev), strerror(errno))); - return OMPI_ERROR; - } - - for (qp_index = 0; qp_index < num_qps; ++qp_index) { - mca_bcol_iboffload_frag_t *frag = &device->dummy_frags[qp_index]; - - memset(&frag->super.registration, 0, sizeof(mca_mpool_base_registration_t)); - OBJ_CONSTRUCT(frag, mca_bcol_iboffload_frag_t); - - frag->qp_index = qp_index; - frag->type = MCA_BCOL_IBOFFLOAD_DUMMY_OWNER; - - frag->registration = &device->dummy_reg; - - frag->super.ptr = dummy_mem; - frag->super.registration = &device->dummy_reg.base; - - frag->sg_entry.length = 0; - frag->sg_entry.lkey = device->dummy_reg.mr->lkey; - frag->sg_entry.addr = (uint64_t) (uintptr_t) dummy_mem; - } - - return OMPI_SUCCESS; -} - -/* Register memory */ -int mca_bcol_iboffload_register_mr(void *reg_data, void *base, size_t size, - mca_mpool_base_registration_t *reg) -{ - mca_bcol_iboffload_device_t *device = (mca_bcol_iboffload_device_t *) reg_data; - mca_bcol_iboffload_reg_t *iboffload_reg = (mca_bcol_iboffload_reg_t *) reg; - - iboffload_reg->mr = ibv_reg_mr(device->ib_pd, base, size, - IBV_ACCESS_LOCAL_WRITE | - IBV_ACCESS_REMOTE_WRITE | - IBV_ACCESS_REMOTE_READ); - - if (NULL == iboffload_reg->mr) { - IBOFFLOAD_ERROR(("Device %s: %p addr, %d bytes registration failed.", - ibv_get_device_name(device->dev.ib_dev), base, size)); - return OMPI_ERR_OUT_OF_RESOURCE; - } - - IBOFFLOAD_VERBOSE(10, ("Device %s: memory register addr=%p, len=%d, mr - %p.", - ibv_get_device_name(device->dev.ib_dev), base, size, iboffload_reg->mr)); - - return OMPI_SUCCESS; -} - -/* Deregister memory */ -int mca_bcol_iboffload_deregister_mr(void *reg_data, mca_mpool_base_registration_t *reg) -{ - mca_bcol_iboffload_device_t *device = (mca_bcol_iboffload_device_t *) reg_data; - mca_bcol_iboffload_reg_t *iboffload_reg = (mca_bcol_iboffload_reg_t *) reg; - - IBOFFLOAD_VERBOSE(10, ("Device %s: mr - %p.", - ibv_get_device_name(device->dev.ib_dev), iboffload_reg->mr)); - - if (NULL != iboffload_reg->mr) { - if (ibv_dereg_mr(iboffload_reg->mr)) { - IBOFFLOAD_ERROR(("Device %s: error unpinning iboffload memory errno says %s", - ibv_get_device_name(device->dev.ib_dev), strerror(errno))); - return OMPI_ERROR; - } - } - - IBOFFLOAD_VERBOSE(10, ("Device %s: memory deregister succeeded.", - ibv_get_device_name(device->dev.ib_dev))); - - iboffload_reg->mr = NULL; - - return OMPI_SUCCESS; -} - -/* We need to keep separate registration function for - ML list memory managment */ -static int mca_bcol_iboffload_lmngr_register(void *context_data, - void *base, size_t size, - void **reg_desc) -{ - struct ibv_mr *mr; - mca_bcol_iboffload_device_t *device = - (mca_bcol_iboffload_device_t *) context_data; - - mr = ibv_reg_mr(device->ib_pd, base, size, - IBV_ACCESS_LOCAL_WRITE | - IBV_ACCESS_REMOTE_WRITE | - IBV_ACCESS_REMOTE_READ); - - if (NULL == mr) { - return OMPI_ERR_OUT_OF_RESOURCE; - } - - IBOFFLOAD_VERBOSE(10, ("Device %s: memory register addr=%p, len=%d", - ibv_get_device_name(device->dev.ib_dev), base, size)); - - *reg_desc = (void *) mr; - - /* Make sure that the addr stays the same */ - assert(mr->addr == base); - - return OMPI_SUCCESS; -} - -static int mca_bcol_iboffload_lmngr_deregister(void *context_data, void *reg_desc) -{ - struct ibv_mr *mr = (struct ibv_mr *) reg_desc; - mca_bcol_iboffload_device_t *device = - (mca_bcol_iboffload_device_t *) context_data; - - if (mr != NULL) { - if (ibv_dereg_mr(mr)) { - IBOFFLOAD_ERROR(("Device %s: error unpinning iboffload memory errno says %s", - ibv_get_device_name(device->dev.ib_dev), strerror(errno))); - return OMPI_ERROR; - } - } - - return OMPI_SUCCESS; -} - -static int iboffload_start_device(mca_bcol_iboffload_device_t *device) -{ - int port_cnt, port, ret; - mca_bcol_iboffload_component_t *cm = &mca_bcol_iboffload_component; - -#if HAVE_STRUCT_IBV_DEVICE_TRANSPORT_TYPE - if (IBV_TRANSPORT_IB != device->dev.ib_dev->transport_type) { - IBOFFLOAD_VERBOSE(10, ("Skipping non IB device %s", - ibv_get_device_name(device->dev.ib_dev))); - goto error; - } -#endif - - /* Open device context */ - IBOFFLOAD_VERBOSE(10, ("Open IB device - %p", device->dev.ib_dev)); - - device->dev.ib_dev_context = ibv_open_device(device->dev.ib_dev); - if (NULL == device->dev.ib_dev_context) { - IBOFFLOAD_ERROR(("Error obtaining device context for %s errno says %s", - ibv_get_device_name(device->dev.ib_dev), strerror(errno))); - goto error; - } - - if (ibv_query_device(device->dev.ib_dev_context, &device->ib_dev_attr)) { - IBOFFLOAD_ERROR(("error obtaining device attributes for %s errno says %s", - ibv_get_device_name(device->dev.ib_dev), strerror(errno))); - goto error; - } - - port_cnt = device->ib_dev_attr.phys_port_cnt; - if (0 == port_cnt) { - goto error; - } - - device->ports = (mca_bcol_iboffload_port_t *) - calloc(port_cnt, sizeof(mca_bcol_iboffload_port_t)); - if (NULL == device->ports) { - goto error; - } - - /* Note ports are 1 based (i >= 1) */ - for (port = 1; port <= port_cnt; port++) { - int pi = port - 1; /* port array index starts from zero */ - - struct ibv_port_attr ib_port_attr; - memset(&ib_port_attr, 0, sizeof(ib_port_attr)); - - if (ibv_query_port(device->dev.ib_dev_context, (uint8_t) port, &ib_port_attr)) { - IBOFFLOAD_ERROR(("Error getting port attributes for device %s " - "port number %d errno says %s", - ibv_get_device_name(device->dev.ib_dev), port, strerror(errno))); - continue; - } - - if (IBV_PORT_ACTIVE == ib_port_attr.state) { - /* Pasha: Need to think how we want to handle MTUs - if (ib_port_attr.active_mtu < mca_bcol_iboffload_component.mtu){ - device->mtu = ib_port_attr.active_mtu; - } - */ - /* start to put port info */ - ++device->num_act_ports; - device->ports[pi].id = port; - device->ports[pi].stat = ib_port_attr.state; - device->ports[pi].mtu = ib_port_attr.active_mtu; - - if (0 == cm->pkey_val) { - ret = iboffload_init_port(device, &device->ports[pi]); - if (OMPI_SUCCESS != ret) { - IBOFFLOAD_ERROR(("Device %s " - "port number %d , failed to init port, errno says %s", - ibv_get_device_name(device->dev.ib_dev), - port, strerror(errno))); - continue; - } - } else { - uint16_t pkey, j; - for (j = 0; j < device->ib_dev_attr.max_pkeys; j++) { - if (ibv_query_pkey(device->dev.ib_dev_context, (uint8_t) port, j, &pkey)) { - IBOFFLOAD_ERROR(("error getting pkey for index %d, device %s " - "port number %d errno says %s", - j, ibv_get_device_name(device->dev.ib_dev), port, strerror(errno))); - continue; - } - - pkey = ntohs(pkey) & MCA_BCOL_IBOFFLOAD_PKEY_MASK; - if (pkey == cm->pkey_val) { - ret = iboffload_init_port(device, &device->ports[pi]); - if (OMPI_SUCCESS != ret) { - IBOFFLOAD_ERROR(("Device %s " - "port number %d , failed to init port, errno says %s", - ibv_get_device_name(device->dev.ib_dev), - port, strerror(errno))); - continue; - } - } - } - } - } - } - - if (0 == device->num_act_ports) { - goto error; - } - - if (OMPI_SUCCESS != allocate_device_resources(device)) { - goto error; - } - - /* setup network context on device */ - device->net_context = OBJ_NEW(bcol_base_network_context_t); - - device->net_context->context_data = (void *) device; - - device->net_context->register_memory_fn = mca_bcol_iboffload_lmngr_register; - device->net_context->deregister_memory_fn = mca_bcol_iboffload_lmngr_deregister; - - /* the device is ready now */ - device->activated = true; - return OMPI_SUCCESS; - -error: - /* Pasha: need to add nice resource clean up */ - return OMPI_ERROR; -} -static void mca_bcol_iboffload_set_small_msg_thresholds(struct mca_bcol_base_module_t *super) -{ - mca_bcol_iboffload_module_t *iboffload_module = - (mca_bcol_iboffload_module_t *) super; - - /* Set the Bcast threshold, for IB it equals to ML buffer size */ - super->small_message_thresholds[BCOL_BCAST] = - iboffload_module->rdma_block.ml_mem_desc->size_buffer; - - if ((mca_bcol_iboffload_component.use_brucks_smsg_alltoall_rdma) - || (mca_bcol_iboffload_component.use_brucks_smsg_alltoall_sr)) { - /* Set the Alltoall threshold, for Bruck's algth we use 1.5 of the buff size */ - super->small_message_thresholds[BCOL_ALLTOALL] = - (iboffload_module->rdma_block.ml_mem_desc->size_buffer / 3) * 2; - } else { - /* Set the Alltoall threshold, for this case it equals to a half of the ML buffer size */ - super->small_message_thresholds[BCOL_ALLTOALL] = - iboffload_module->rdma_block.ml_mem_desc->size_buffer / 2; - } - - /* Set the Allreduce threshold, for IB it equals to ML buffer size */ - super->small_message_thresholds[BCOL_ALLREDUCE] = - iboffload_module->rdma_block.ml_mem_desc->size_buffer; - - /* Set the Allgather threshold, for IB it equals to ML buffer size */ - super->small_message_thresholds[BCOL_ALLGATHER] = - iboffload_module->rdma_block.ml_mem_desc->size_buffer / - ompi_comm_size(iboffload_module->super.sbgp_partner_module->group_comm); -} - -static int mca_bcol_iboffload_init_buffer_memory(struct mca_coll_ml_module_t *ml_module, - struct mca_bcol_base_module_t *bcol, - void *reg_data) -{ - mca_bcol_iboffload_module_t *iboffload_module = (mca_bcol_iboffload_module_t *) bcol; - mca_bcol_iboffload_local_rdma_block_t *rdma_block = &iboffload_module->rdma_block; - - struct mca_bcol_base_memory_block_desc_t *desc = ml_module->payload_block; - struct ibv_mr *mr = (struct ibv_mr *) desc->block->lmngr->reg_desc[bcol->context_index]; - int i; - - IBOFFLOAD_VERBOSE(10, ("mca_bcol_iboffload_init_buffer_memory was called")); - - /* Set rdma block data */ - rdma_block->ib_info.rkey = mr->rkey; - rdma_block->ib_info.lkey = mr->lkey; - - rdma_block->ib_info.addr = (uint64_t) (uintptr_t) desc->block->base_addr; - IBOFFLOAD_VERBOSE(10, ("Caching rkey %u lkey %u addr %p", - rdma_block->ib_info.rkey, - rdma_block->ib_info.lkey, - rdma_block->ib_info.addr)); - - /* cache ml mem desc tunings localy */ - rdma_block->bdesc.num_banks = desc->num_banks; - rdma_block->bdesc.num_buffers_per_bank = desc->num_buffers_per_bank; - rdma_block->bdesc.size_buffer = desc->size_buffer; - rdma_block->bdesc.data_offset = ml_module->data_offset; - - IBOFFLOAD_VERBOSE(10, ("RDMA buffer configuration num banks %d num_per_bank %d size %d base addr %p", - mr->addr, desc->num_banks, desc->num_buffers_per_bank, desc->size_buffer)); - - /* pointer to ml level descriptor */ - rdma_block->ml_mem_desc = desc; - - rdma_block->sync_counter = 0; /* reset the counter */ - /* Allocate and set bank block counters */ - for (i = 0; i < MCA_BCOL_IBOFFLOAD_BK_LAST; i++) { - rdma_block->bank_buffer_counter[i] = (int *) calloc(rdma_block->bdesc.num_banks, - sizeof(int)); - if (NULL == rdma_block->bank_buffer_counter[i]) { - IBOFFLOAD_VERBOSE(10, ("Failed to allocate bank_block_counter\n")); - return OMPI_ERROR; - } - } - - if (OMPI_SUCCESS != init_rdma_buf_desc(&rdma_block->bdesc.rdma_desc, - desc->block->base_addr, - rdma_block->bdesc.num_banks, - rdma_block->bdesc.num_buffers_per_bank, - rdma_block->bdesc.size_buffer, - ml_module->data_offset)) { - IBOFFLOAD_VERBOSE(10, ("Failed to allocate rdma memory descriptor\n")); - return OMPI_ERROR; - } - - /* The all data is now cached on module level. The - real data exchange will happen during qp creation and - data exchange */ - - IBOFFLOAD_VERBOSE(10, ("ml_module = %p, iboffload_module = %p, ml_mem_desc = %p.\n", - ml_module, iboffload_module, rdma_block->ml_mem_desc)); - - for (i = 0; i < iboffload_module->num_endpoints; ++i) { - mca_bcol_iboffload_endpoint_t *ep = iboffload_module->endpoints[i]; - - if (true == ep->need_toset_remote_rdma_info) { - IBOFFLOAD_VERBOSE(10, ("ep %p index %d: postponed remote rdma block init.", ep, ep->index)); - if (OPAL_UNLIKELY(OMPI_SUCCESS != - set_endpoint_remote_rdma_info(ep, ep->remote_rdma_info))) { - return OMPI_ERROR; - } - } - } - - /* Hack: - !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! - Work around for deadlock caused by connection setup - for asyc service barrier. Asyc service barrier use own set of - MQ and QP _BUT_ the exchange operation uses the MQ that is used for - primary set of collectives operations like Allgahter, Barrier,etc. - As result exchange wait operation could be pushed to primary MQ and - cause dead-lock. - !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! - Create connection for service barrier and memory address exchange - for ml buffers and asyc service barrier - */ - /* This nasty hack was moved to ml discovery - rc = mca_bcol_iboffload_rec_doubling_start_connections(iboffload_module); - if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) { - return rc; - } - */ - - return OMPI_SUCCESS; -} - -static void load_func(mca_bcol_base_module_t *super) -{ - int fnc; - - /* Loading Memory managment functions */ - /* NULL means that mpool may decide about prefered memory allocate functions */ - /* super->memory_management_functions.malloc_fn = NULL;*/ - /* NULL means that mpool may decide about prefered memory release functions */ - /* super->memory_management_functions.free_fn = NULL; */ - - /* JSL: setting the bcol_memory_init function to NULL, not sure what ib needs to do with - * the ml_memory_block - */ - super->bcol_memory_init = NULL; - - - /* Loading collective functions */ - for (fnc = 0; fnc < BCOL_NUM_OF_FUNCTIONS; ++fnc) { - super->bcol_function_table[fnc] = NULL; - } - - super->bcol_function_init_table[BCOL_FANIN] = mca_bcol_iboffload_fanin_register; - super->bcol_function_init_table[BCOL_FANOUT] = mca_bcol_iboffload_fanout_register; - - super->bcol_function_init_table[BCOL_BARRIER] = mca_bcol_iboffload_barrier_register; - super->bcol_function_init_table[BCOL_BCAST] = mca_bcol_iboffload_bcast_register; - /*super->bcol_function_init_table[BCOL_ALLTOALL] = mca_bcol_iboffload_alltoall_register;*/ - super->bcol_function_init_table[BCOL_ALLGATHER] = mca_bcol_iboffload_allgather_register; - super->bcol_function_init_table[BCOL_SYNC] = mca_bcol_iboffload_memsync_register; - super->bcol_function_init_table[BCOL_ALLREDUCE] = mca_bcol_iboffload_allreduce_register; - - super->bcol_memory_init = mca_bcol_iboffload_init_buffer_memory; - - /* Set thresholds */ - super->set_small_msg_thresholds = mca_bcol_iboffload_set_small_msg_thresholds; - - super->k_nomial_tree = mca_bcol_iboffload_setup_knomial_tree; -} - -int mca_bcol_iboffload_setup_knomial_tree(mca_bcol_base_module_t *super) -{ - int rc; - mca_bcol_iboffload_module_t *ib_module = (mca_bcol_iboffload_module_t *) super; - rc = netpatterns_setup_recursive_knomial_allgather_tree_node( - ib_module->super.sbgp_partner_module->group_size, - ib_module->super.sbgp_partner_module->my_index, - mca_bcol_iboffload_component.k_nomial_radix, - super->list_n_connected, - &ib_module->knomial_allgather_tree); - - return rc; -} - -static inline struct ibv_cq *ibv_create_cq_compat(struct ibv_context *context, - int cqe, void *cq_context, struct ibv_comp_channel *channel, - int comp_vector) -{ -#if OPAL_IBV_CREATE_CQ_ARGS == 3 - return ibv_create_cq(context, cqe, channel); -#else - return ibv_create_cq(context, cqe, cq_context, channel, comp_vector); -#endif -} - -int mca_bcol_iboffload_adjust_cq(mca_bcol_iboffload_device_t *device, - struct ibv_cq **ib_cq) -{ - uint32_t cq_size = (uint32_t) mca_bcol_iboffload_component.cq_size; - - if (NULL == *ib_cq) { - *ib_cq = ibv_create_cq_compat(device->dev.ib_dev_context, cq_size, -#if OPAL_ENABLE_PROGRESS_THREADS == 1 - device, device->ib_channel, -#else - NULL, NULL, -#endif - 0); - - if (NULL == *ib_cq) { - IBOFFLOAD_ERROR(("Device %s " - ", failed to create CQ, errno says %s", - ibv_get_device_name(device->dev.ib_dev), strerror(errno))); - - return OMPI_ERROR; - } - } - - return OMPI_SUCCESS; -} - -static int init_recv_wr_manager(mca_bcol_iboffload_recv_wr_manager *recv_wr_manager) -{ - - struct ibv_recv_wr *recv_wr = NULL; - int ret = OMPI_SUCCESS, qp, wr, num_qps; - - mca_bcol_iboffload_component_t *cm = &mca_bcol_iboffload_component; - - num_qps = cm->num_qps; - OPAL_THREAD_LOCK(&recv_wr_manager->lock); - - recv_wr_manager->recv_work_requests = - (struct ibv_recv_wr **) calloc(num_qps, sizeof(struct ibv_recv_wr *)); - if (NULL == recv_wr_manager->recv_work_requests) { - IBOFFLOAD_ERROR(("Failed to allocate memory for recv_wr_manager->recv_work_requests")); - ret = OMPI_ERR_OUT_OF_RESOURCE; - goto error; - } - - for (qp = 0; qp < num_qps; ++qp) { - int recv_queue_size = cm->qp_infos[qp].rd_num; - - recv_wr_manager->recv_work_requests[qp] = - (struct ibv_recv_wr *) calloc(recv_queue_size, sizeof(struct ibv_recv_wr)); - if (NULL == recv_wr_manager->recv_work_requests[qp]) { - IBOFFLOAD_ERROR(("Failed to allocate memory for recv_wr_manager->recv_work_requests")); - ret = OMPI_ERR_OUT_OF_RESOURCE; - goto error; - } - - for (wr = 0; wr < recv_queue_size - 1; ++wr) { - recv_wr = &recv_wr_manager->recv_work_requests[qp][wr]; - recv_wr->next = &recv_wr_manager->recv_work_requests[qp][wr + 1]; - /* init receive work request. - * Real sg_list value we fill during receive prepost flow. - * recv_wr->wr_id and recv_wr->sg_list is zero by default */ - recv_wr->wr_id = 0; - recv_wr->sg_list = NULL; - recv_wr->num_sge = 1; /* single sge will be filled later */ - } - - recv_wr->next->num_sge = 1; /* for the last entry everything is null except the num_sge */ - } - -error: - OPAL_THREAD_UNLOCK(&recv_wr_manager->lock); - return ret; -} - -/* On first access to the component - allocate all memory resources */ -static int component_first_usage(void) -{ - mca_bcol_iboffload_component_t *cm = &mca_bcol_iboffload_component; - int ret = OMPI_SUCCESS; - - /* creating collfrag free list */ - OBJ_CONSTRUCT(&cm->collfrags_free, ompi_free_list_t); - ret = ompi_free_list_init_new(&cm->collfrags_free, - sizeof(mca_bcol_iboffload_collfrag_t), - MCA_IBOFFLOAD_CACHE_LINE_SIZE, - OBJ_CLASS(mca_bcol_iboffload_collfrag_t), - 0, MCA_IBOFFLOAD_CACHE_LINE_SIZE, - cm->free_list_num, - cm->free_list_max, - cm->free_list_inc, - NULL); - if (OMPI_SUCCESS != ret) { - IBOFFLOAD_ERROR(("Failed to allocate mwr_free %s:%d\n", __FILE__, __LINE__)); - return ret; - } - - /* allocate free list of collective message requests */ - OBJ_CONSTRUCT(&cm->collreqs_free, ompi_free_list_t); - ret = ompi_free_list_init_new(&cm->collreqs_free, - sizeof(mca_bcol_iboffload_collreq_t), - MCA_IBOFFLOAD_CACHE_LINE_SIZE, - OBJ_CLASS(mca_bcol_iboffload_collreq_t), - 0, MCA_IBOFFLOAD_CACHE_LINE_SIZE, - cm->free_list_num * 2, - cm->free_list_max * 2, - cm->free_list_inc * 2, - NULL); - if (OMPI_SUCCESS != ret) { - IBOFFLOAD_ERROR(("Error creating free list, error: %s\n", strerror(errno))); - goto release_collfrag; - } - - OBJ_CONSTRUCT(&cm->tasks_free, ompi_free_list_t); - ret = ompi_free_list_init_new(&cm->tasks_free, - sizeof(mca_bcol_iboffload_task_t), - MCA_IBOFFLOAD_CACHE_LINE_SIZE, - OBJ_CLASS(mca_bcol_iboffload_task_t), - 0, MCA_IBOFFLOAD_CACHE_LINE_SIZE, - cm->free_list_num * 2, - cm->free_list_max * 2, - cm->free_list_inc * 2, - NULL); - if (OMPI_SUCCESS != ret) { - IBOFFLOAD_ERROR(("Error creating free list, error: %s\n", strerror(errno))); - goto release_collreq; - } - - OBJ_CONSTRUCT(&cm->calc_tasks_free, ompi_free_list_t); - ret = ompi_free_list_init_ex_new(&cm->calc_tasks_free, - sizeof(mca_bcol_iboffload_task_t), - MCA_IBOFFLOAD_CACHE_LINE_SIZE, - OBJ_CLASS(mca_bcol_iboffload_task_t), - 0, MCA_IBOFFLOAD_CACHE_LINE_SIZE, - cm->free_list_num * 2, - cm->free_list_max * 2, - cm->free_list_inc * 2, - NULL, - mca_bcol_iboffload_calc_task_init, - &cm->calc_tasks_free); - if (OMPI_SUCCESS != ret) { - IBOFFLOAD_ERROR(("Error creating free list, error: %s\n", strerror(errno))); - goto release_collreq; - } - - /* Initialization for frags that handle ML allocated memory, - it is NO registration is required ! - */ - - OBJ_CONSTRUCT(&cm->ml_frags_free, ompi_free_list_t); - ret = ompi_free_list_init_ex_new(&cm->ml_frags_free, - sizeof(mca_bcol_iboffload_frag_t), - MCA_IBOFFLOAD_CACHE_LINE_SIZE, - OBJ_CLASS(mca_bcol_iboffload_frag_t), - 0, MCA_IBOFFLOAD_CACHE_LINE_SIZE, - cm->free_list_num * 2, - cm->free_list_max * 2, - cm->free_list_inc * 2, - NULL, - mca_bcol_iboffload_ml_frag_init, - NULL); - if (OMPI_SUCCESS != ret) { - IBOFFLOAD_ERROR(("Error creating free list, error: %s\n", strerror(errno))); - goto release_collreq; - } - - ret = init_recv_wr_manager(&cm->recv_wrs); - if (OMPI_SUCCESS != ret){ - IBOFFLOAD_ERROR(("Failed to prepare recv wrs")); - goto release_tasks; - } - - cm->init_done = true; - - return OMPI_SUCCESS; - -release_tasks: - OBJ_DESTRUCT(&cm->tasks_free); -release_collreq: - OBJ_DESTRUCT(&cm->collreqs_free); -release_collfrag: - OBJ_DESTRUCT(&cm->collfrags_free); - return ret; -} - - -/* query to see if some modules are available for use on the given - * communicator, and if so, what it's priority is. - */ -mca_bcol_base_module_t ** -mca_bcol_iboffload_comm_query(mca_sbgp_base_module_t *sbgp, int *num_modules) -{ - /* local variables */ - int i, mq_index, rc, my_rank = 0; - struct mqe_context_attr mqe_attr; - - mca_sbgp_ibnet_module_t *ibnet = NULL; - mca_bcol_base_module_t **iboffload_modules = NULL; - mca_bcol_iboffload_module_t *iboffload_module = NULL; - - mca_bcol_iboffload_component_t *cm = &mca_bcol_iboffload_component; - /* Bruck's alltoall iovec */ - size_t iovec_size; - - if (OPAL_UNLIKELY(false == cm->init_done)) { - if (OMPI_SUCCESS != component_first_usage()) { - return NULL; - } - } - - /* No group - no modules*/ - if (OPAL_UNLIKELY(NULL == sbgp)) { - return NULL; - } - /* - * This is activated only for intra-communicators - */ - if (OPAL_UNLIKELY(OMPI_COMM_IS_INTER(sbgp->group_comm))) { - return NULL; - } - - ibnet = (mca_sbgp_ibnet_module_t *) sbgp; - if (OPAL_UNLIKELY(0 == ibnet->num_cgroups)) { - /* we have no connection group */ - return NULL; - } - - my_rank = sbgp->my_index; - - iboffload_modules = (mca_bcol_base_module_t **) calloc - (ibnet->num_cgroups, sizeof(mca_bcol_base_module_t *)); - if (OPAL_UNLIKELY(NULL == iboffload_modules)) { - return NULL; - } - - /* Go through list of connection groups that we have on ibnet - * and create bcol module for each one */ - *num_modules = 0; - for (i = 0; i < ibnet->num_cgroups; i++) { - mca_sbgp_ibnet_connection_group_info_t *cgroup = - &ibnet->cgroups[i]; - - iboffload_module = OBJ_NEW(mca_bcol_iboffload_module_t); - - iboffload_modules[i] = &(iboffload_module->super); - - /* - * In fact the value == ibnet->num_cgroups in the end - * of the loop, but we need always to know how many modules - * release in the error case (under CLEANUP label) - */ - - (*num_modules)++; - - iboffload_module->cgroup_index = i; - iboffload_module->group_size = ibnet->super.group_size; - iboffload_module->log_group_size = lognum(iboffload_module->group_size); - /* Put pointer to sbgp module */ - iboffload_module->super.sbgp_partner_module = sbgp; - /* Put cgroup information on module */ - iboffload_module->ibnet = ibnet; - - iboffload_module->device = opal_pointer_array_get_item(&cm->devices, cgroup->device_index); - - IBOFFLOAD_VERBOSE(10, ("Iboffload module - %p uses " - "device - %p with index - %d.\n", - iboffload_module, - iboffload_module->device->dev.ib_dev, - cgroup->device_index)); - - OBJ_RETAIN(iboffload_module->device); - /* Pasha: Need to print NICE error in future */ - assert(NULL != iboffload_module->device); - iboffload_module->port = cgroup->port; - - IBOFFLOAD_VERBOSE(10, ("Iboffload module - %p on local port %d.\n", - iboffload_module, iboffload_module->port)); - - if (OPAL_UNLIKELY(!iboffload_module->device->activated)) { - /* this device was never used before, need to activate it */ - if (OMPI_SUCCESS != iboffload_start_device(iboffload_module->device)) { - OBJ_RELEASE(iboffload_module->device); - goto CLEANUP; - } - } - /* Set pointer to network contest on bcol base, we need it for ml - memory managment */ - OBJ_RETAIN(iboffload_module->device->net_context); - iboffload_module->super.network_context = iboffload_module->device->net_context; - - iboffload_module->subnet_id = iboffload_module->device->ports[iboffload_module->port - 1].subnet_id; - iboffload_module->lid = iboffload_module->device->ports[iboffload_module->port - 1].lid; - - load_func(&iboffload_module->super); - - IBOFFLOAD_VERBOSE(10, ("Call for create endpoints for iboffload module %p," - " cgroup num (index) %d.\n", iboffload_module, i)); - - /* create endpoints and store its in the endpoints pointer of iboffload_module structer */ - if (OMPI_SUCCESS != - mca_bcol_iboffloads_create_endpoints(cgroup, iboffload_module)) { - goto CLEANUP; - } - - memset(&mqe_attr, 0, sizeof(mqe_attr)); - mqe_attr.max_mqe_tasks = (uint32_t)mca_bcol_iboffload_component.max_mqe_tasks; - mqe_attr.max_mq_size = (uint32_t)mca_bcol_iboffload_component.max_mq_size; - mqe_attr.cq = iboffload_module->device->ib_mq_cq; - - /* ALL MQs have the same configuration */ - for (mq_index = 0; mq_index < BCOL_IBOFFLOAD_MQ_NUM; mq_index++) { - iboffload_module->mq[mq_index] = - mqe_context_create(iboffload_module->device->dev.ib_dev_context, - iboffload_module->device->ib_pd, &mqe_attr); - if (OPAL_UNLIKELY(NULL == iboffload_module->mq[mq_index])) { - IBOFFLOAD_ERROR(("Error creating MQ for device (%s), error: %s\n", - ibv_get_device_name(iboffload_module->device->dev.ib_dev), strerror(errno))); - goto CLEANUP; - } - } - - /* Barrier initialization - recuresive doubling */ -#if 1 - if (OMPI_SUCCESS != - netpatterns_setup_recursive_doubling_tree_node( - iboffload_module->group_size, my_rank, - &iboffload_module->recursive_doubling_tree)) { - IBOFFLOAD_ERROR(("Failed to setup recursive doubling tree," - " error: %s\n", strerror(errno))); - goto CLEANUP; - } -#endif - - /* Barrier initialization - N exchange tree */ - if (OMPI_SUCCESS != - netpatterns_setup_recursive_doubling_n_tree_node( - iboffload_module->group_size, my_rank, - cm->exchange_tree_order, - &iboffload_module->n_exchange_tree)) { - IBOFFLOAD_ERROR(("Failed to setup recursive doubling tree," - " error: %s\n", strerror(errno))); - goto CLEANUP; - } - - - /* Recursive K-ing initialization - Knomial exchange tree */ - if (OMPI_SUCCESS != - netpatterns_setup_recursive_knomial_tree_node( - iboffload_module->group_size, my_rank, - cm->knomial_tree_order, - &iboffload_module->knomial_exchange_tree)) { - IBOFFLOAD_ERROR(("Failed to setup recursive Knomial tree," - " error: %s\n", strerror(errno))); - goto CLEANUP; - } - - /* Manju Brucks alltoall temp iovec list */ - iovec_size = iboffload_module->group_size / 2 + iboffload_module->group_size % 2; - iboffload_module->alltoall_iovec = (struct iovec *) malloc(sizeof(struct iovec) - * iovec_size); - iboffload_module->alltoall_recv_iovec = (struct iovec *) malloc(sizeof(struct iovec) - * iovec_size); - - - iboffload_module->k_alltoall_bruck_radix=cm->k_alltoall_bruck_radix; - iboffload_module->tmp_buf_alignment=cm->tmp_buf_alignment; - -#if 1 /* Disabling this code since it brakes all iboffload functionality */ - /* Sorry Pasha, gotta do this. Recursive K-ing allgather initialization - Knomial exchange tree */ - /*Pretty sure I need to pass in the communicator rank */ - /* I need to reindex this mess */ - /* this looks silly, I know but it allows for minimal changes to existing code */ - iboffload_module->comm_to_ibnet_map = sbgp->group_list; - - -#endif -#if 0 - if ( NULL == iboffload_module->comm_to_ibnet_map ) { - IBOFFLOAD_ERROR(("Out of resources\n")); - goto CLEANUP; - } - for( i = 0; i < iboffload_module->group_size; i++) { - int j = 0; - while( sbgp->group_list[j] != i){ - j++; - } - iboffload_module->comm_to_ibnet_map[i] = j; - } - /* that should take care of that */ - if (OMPI_SUCCESS != - netpatterns_setup_recursive_knomial_allgather_tree_node( - iboffload_module->group_size, sbgp->group_list[my_rank], - cm->k_nomial_radix, iboffload_module->super.list_n_connected, - &iboffload_module->knomial_allgather_tree)) { - IBOFFLOAD_ERROR(("Failed to setup recursive Knomial tree," - " error: %s\n", strerror(errno))); - goto CLEANUP; - } -#endif - - iboffload_module->power_of_2 = - mca_bcol_iboffload_fls(iboffload_module->num_endpoints); - iboffload_module->power_of_2_ranks = - (1 << iboffload_module->power_of_2); - - /* header into ml buffer, we don't support header for anyone other than shared memory - * at the moment - */ - iboffload_module->super.header_size = 0; - - iboffload_module->super.supported_mode = MCA_BCOL_BASE_ZERO_COPY | - MCA_BCOL_BASE_NO_ML_BUFFER_FOR_LARGE_MSG | - MCA_BCOL_BASE_NO_ML_BUFFER_FOR_BARRIER; - - rc = mca_bcol_base_bcol_fns_table_init(&(iboffload_module->super)); - if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) { - goto CLEANUP; - } - - OBJ_CONSTRUCT(&iboffload_module->iovec_tasks_free, ompi_free_list_t); - rc = ompi_free_list_init_ex_new(&iboffload_module->iovec_tasks_free, - sizeof(mca_bcol_iboffload_task_t), - MCA_IBOFFLOAD_CACHE_LINE_SIZE, - OBJ_CLASS(mca_bcol_iboffload_task_t), - 0, MCA_IBOFFLOAD_CACHE_LINE_SIZE, - cm->free_list_num * 2, - cm->free_list_max * 2, - cm->free_list_inc * 2, - NULL, - mca_bcol_iboffload_iovec_task_init, - iboffload_module); - if (OMPI_SUCCESS != rc) { - IBOFFLOAD_ERROR(("Error creating free list, error: %s\n", strerror(errno))); - goto CLEANUP; - } - } - - IBOFFLOAD_VERBOSE(10, ("Finished with success, num of cgroups is %d, num of modules is %d.\n", - ibnet->num_cgroups, *num_modules)); - - return iboffload_modules; - -CLEANUP: - for (i = 0; i < *num_modules; i++) { - if (NULL != iboffload_modules[i]) { - OBJ_RELEASE(iboffload_modules[i]); - } - } - free(iboffload_modules); - return NULL; -} - -static int init_rdma_buf_desc(mca_bcol_iboffload_rdma_buffer_desc_t **desc, void *base_addr, uint32_t num_banks, - uint32_t num_buffers_per_bank, uint32_t size_buffer, uint32_t header_size) -{ - uint32_t i, j, ci; - mca_bcol_iboffload_rdma_buffer_desc_t *tmp_desc; - - IBOFFLOAD_VERBOSE(10, ("init_rdma_buf_desc base addr %p, num_n %d , " - "num_per_bank %d, size %d, header size %d", - base_addr, num_banks, num_buffers_per_bank, - size_buffer, header_size)); - *desc = (mca_bcol_iboffload_rdma_buffer_desc_t *) - calloc(num_banks * num_buffers_per_bank, - sizeof(mca_bcol_iboffload_rdma_buffer_desc_t)); - if (OPAL_UNLIKELY(NULL == *desc)) { - IBOFFLOAD_ERROR(("Failed to allocate memory")); - return OMPI_ERROR; - } - - tmp_desc = *desc; - - for (i = 0; i < num_banks; i++) { - for (j = 0; j < num_buffers_per_bank; j++) { - ci = i * num_buffers_per_bank + j; - tmp_desc[ci].generation_number = 0; - tmp_desc[ci].bank_index = i; - tmp_desc[ci].buffer_index = j; - /* - * iboffload don't have any header, but other bcols may to have. So - * we need to take it in account. - */ - tmp_desc[ci].data_addr = (void *) - ((unsigned char *) base_addr + ci * size_buffer + header_size); - IBOFFLOAD_VERBOSE(10, ("RDMA setup %d %d - %p", i, j, tmp_desc[ci].data_addr)); - } - } - - return OMPI_SUCCESS; -} - -static int set_endpoint_remote_rdma_info(mca_bcol_iboffload_endpoint_t *ep, mca_bcol_iboffload_rdma_info_t *remote_rdma_info) -{ - mca_bcol_iboffload_rem_rdma_block_t *rem_block = &ep->remote_rdma_block; - - /* We'll continue if - - 1. The module rdma_block is already initilized on this stage - 2. All peers have the same rdma block configuration that actually is - define on ML level - - Otherwise set flag to init it lately. - */ - if (NULL == ep->iboffload_module->rdma_block.ml_mem_desc) { - IBOFFLOAD_VERBOSE(10, ("RDMA block information hasn't been inited yet.")); - ep->need_toset_remote_rdma_info = true; - return OMPI_SUCCESS; - } - - /* set the rdma addr for barrier */ - ep->remote_zero_rdma_addr = remote_rdma_info[0]; - - IBOFFLOAD_VERBOSE(10, ("RDMA block information %p %d", - remote_rdma_info[0].addr, remote_rdma_info[0].rkey)); - - /* set the rdma block memory structs */ - rem_block->ib_info = remote_rdma_info[1]; - - - /* if we got some real data. lets init memory adress sctructures */ - if (0 != rem_block->ib_info.addr) { - if (OMPI_SUCCESS != init_rdma_buf_desc(&rem_block->rdma_desc, (void *)rem_block->ib_info.addr, - ep->iboffload_module->rdma_block.bdesc.num_banks, - ep->iboffload_module->rdma_block.bdesc.num_buffers_per_bank, - ep->iboffload_module->rdma_block.bdesc.size_buffer, - /* remember, we use lkey to pass the data offset value */ - rem_block->ib_info.lkey)) { - IBOFFLOAD_VERBOSE(10, ("Failed to allocate RDMA buffer descriptor")); - return OMPI_ERROR; - } - } - - IBOFFLOAD_VERBOSE(10, ("endpoint - %p, recv barrier rdma: rem addr - %p, rem rkey - %d.\n", - ep, ep->remote_zero_rdma_addr.addr, ep->remote_zero_rdma_addr.rkey)); - IBOFFLOAD_VERBOSE(10, ("endpoint - %p, recv ml rdma: rem addr - %p, rem rkey - %d.\n", - ep, ep->remote_rdma_block.ib_info.addr, ep->remote_rdma_block.ib_info.rkey)); - - return OMPI_SUCCESS; -} - -static int unpack_endpoint_rdma_addr(void *callback_data) -{ - int rc; - struct iovec payload_iovec; - - size_t max_size = 0; - uint32_t out_size = 1; - - mca_bcol_iboffload_collfrag_t *coll_frag = (mca_bcol_iboffload_collfrag_t *) callback_data; - mca_bcol_iboffload_collreq_t* collreq = coll_frag->coll_full_req; - - mca_bcol_iboffload_task_t *wait_task = (mca_bcol_iboffload_task_t *) coll_frag->signal_task_wr_id; - - mca_bcol_iboffload_frag_t *recv_frag = wait_task->frag; - mca_bcol_iboffload_endpoint_t *ep = wait_task->endpoint; - - rc = opal_convertor_copy_and_prepare_for_recv( - ompi_mpi_local_convertor, - &opal_datatype_uint1, - sizeof(mca_bcol_iboffload_rdma_info_t) * MAX_REMOTE_RDMA_INFO, - ep->remote_rdma_info, 0, - &collreq->recv_convertor); - if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) { - return OMPI_ERROR; - } - - payload_iovec.iov_base = (void*) (uintptr_t) - recv_frag->sg_entry.addr; - - payload_iovec.iov_len = sizeof(mca_bcol_iboffload_rdma_info_t) * MAX_REMOTE_RDMA_INFO; - - if (0 > opal_convertor_unpack(&collreq->recv_convertor, - &payload_iovec, &out_size, &max_size)) { - return OMPI_ERROR; - } - - if (OMPI_SUCCESS != set_endpoint_remote_rdma_info(ep, ep->remote_rdma_info)) { - return OMPI_ERROR; - } - - opal_convertor_cleanup(&collreq->send_convertor); - opal_convertor_cleanup(&collreq->recv_convertor); - - return OMPI_SUCCESS; -} - -/* RDMA addr exchange with rem proc */ -int mca_bcol_iboffload_exchange_rem_addr(mca_bcol_iboffload_endpoint_t *ep) -{ - int rc; - /* the [0] used for constant barrier rdma operations - the [1] used for rdma block inforation exchange. The rdma - block is used for RDMA operation over ML allocated memory */ - mca_bcol_iboffload_rdma_info_t remote_rdma_addr[MAX_REMOTE_RDMA_INFO]; - - mca_bcol_iboffload_task_t *send_task, - *wait_task; - - mca_bcol_iboffload_frag_t *send_fragment, - *preposted_recv_frag; - - ompi_free_list_item_t *item; - - mca_bcol_iboffload_collreq_t *coll_request; - mca_bcol_iboffload_collfrag_t *coll_fragment; - - mca_bcol_iboffload_component_t *cm = &mca_bcol_iboffload_component; - - OMPI_FREE_LIST_WAIT_MT(&cm->collreqs_free, item); - if (NULL == item) { - IBOFFLOAD_ERROR(("Failing for coll request free list waiting.\n")); - return OMPI_ERR_OUT_OF_RESOURCE; - } - - coll_request = (mca_bcol_iboffload_collreq_t *) item; - - coll_request->completion_cb_fn = unpack_endpoint_rdma_addr; - /* For the exchange the progress_fn should be never used */ - coll_request->progress_fn = NULL; - coll_request->module = ep->iboffload_module; - coll_request->ml_buffer_index = MCA_COLL_ML_NO_BUFFER; - coll_request->buffer_info[SBUF].offset = 0; - coll_request->buffer_info[RBUF].offset = 0; - coll_request->qp_index = MCA_BCOL_IBOFFLOAD_QP_REGULAR; - /* - * setup collective work request - */ - - /* get collective frag */ - coll_fragment = &coll_request->first_collfrag; - mca_bcol_iboffload_collfrag_init(coll_fragment); - - coll_fragment->mq_credits = 2; - coll_fragment->mq_index = COLL_MQ; - coll_fragment->tail_next = &coll_fragment->to_post; - /* overwrite mq index to run over service setup */ - - /* Update the algorithm type in order to support credit mechanism */ - coll_fragment->alg = REMOTE_EXCHANGE_ALG; - if (OPAL_UNLIKELY(false == - BCOL_IBOFFLOAD_MQ_HAVE_CREDITS(ep->iboffload_module, - coll_fragment->mq_index, 2))) { - IBOFFLOAD_VERBOSE(10, ("There are not enough credits on MQ.\n")); - - goto out_of_resources; - } - - /* set pointers for (coll frag) <-> (coll full request) */ - MCA_BCOL_IBOFFLOAD_SET_COLL_REQ_LINKS(coll_request, coll_fragment); - - remote_rdma_addr[0].addr = - ep->iboffload_module->device->dummy_frags[MCA_BCOL_IBOFFLOAD_QP_BARRIER].sg_entry.addr; - remote_rdma_addr[0].rkey = - ep->iboffload_module->device->dummy_frags[MCA_BCOL_IBOFFLOAD_QP_BARRIER].registration->mr->rkey; - - if (NULL != ep->iboffload_module->rdma_block.ml_mem_desc) { - remote_rdma_addr[1].addr = ep->iboffload_module->rdma_block.ib_info.addr; - remote_rdma_addr[1].rkey = ep->iboffload_module->rdma_block.ib_info.rkey; - /* Little bit ugly, but easy solution. The data_offset */ - remote_rdma_addr[1].lkey = ep->iboffload_module->rdma_block.bdesc.data_offset; - } else { - /* since it is no data lets send 0, so remote side will knox that no real - data was send */ - remote_rdma_addr[1].addr = 0; - remote_rdma_addr[1].rkey = 0; - remote_rdma_addr[1].lkey = 0; - } - - IBOFFLOAD_VERBOSE(10, ("endpoint - %p, sending barrier rdma: addr - %p, rkey - %d.\n", - ep, remote_rdma_addr[0].addr, remote_rdma_addr[0].rkey)); - IBOFFLOAD_VERBOSE(10, ("endpoint - %p, sending ml rdma: addr - %p, rkey - %d.\n", - ep, remote_rdma_addr[1].addr, remote_rdma_addr[1].rkey)); - - rc = opal_convertor_copy_and_prepare_for_send( - ompi_mpi_local_convertor, - &opal_datatype_uint1, - sizeof(mca_bcol_iboffload_rdma_info_t) * MAX_REMOTE_RDMA_INFO, - &remote_rdma_addr, 0, - &coll_request->send_convertor); - if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) { - goto out_of_resources; - } - - send_fragment = mca_bcol_iboffload_get_send_frag( - coll_request, ep->index, coll_request->qp_index, - sizeof(mca_bcol_iboffload_rdma_info_t) * MAX_REMOTE_RDMA_INFO, - 0, SBUF, MCA_BCOL_IBOFFLOAD_SEND_FRAG_CONVERT); - if (OPAL_UNLIKELY(NULL == send_fragment)) { - IBOFFLOAD_ERROR(("Failing for getting and packing send frag.\n")); - goto out_of_resources; - } - - send_task = mca_bcol_iboffload_get_send_task(ep->iboffload_module, - ep->index, coll_request->qp_index, send_fragment, - coll_fragment, INLINE); - if (OPAL_UNLIKELY(NULL == send_task)) { - IBOFFLOAD_ERROR(("Failing for getting send task.\n")); - goto out_of_resources; - } - - MCA_BCOL_IBOFFLOAD_APPEND_TASK_TO_LIST(coll_fragment->task_next, send_task); - MCA_BCOL_IBOFFLOAD_APPEND_MQ_TASK_TO_LIST(coll_fragment->tail_next, send_task); - - /* post wait */ - preposted_recv_frag = mca_bcol_iboffload_get_preposted_recv_frag( - ep->iboffload_module, ep->index, coll_request->qp_index); - if (OPAL_UNLIKELY(NULL == preposted_recv_frag)) { - IBOFFLOAD_ERROR(("Exchaging: " - "Failing for getting prepost recv frag.\n")); - goto out_of_resources; - } - - wait_task = mca_bcol_iboffload_get_wait_task(ep->iboffload_module, - ep->index, 1, preposted_recv_frag, coll_request->qp_index, NULL); - - if (OPAL_UNLIKELY(NULL == wait_task)) { - IBOFFLOAD_VERBOSE(10, ("Exchanging: " - "Failing for getting wait task.\n")); - goto out_of_resources; - } - - MCA_BCOL_IBOFFLOAD_APPEND_TASK_TO_LIST(coll_fragment->task_next, wait_task); - MCA_BCOL_IBOFFLOAD_APPEND_MQ_TASK_TO_LIST(coll_fragment->tail_next, wait_task); - - /* The last element must end with ZERO */ - wait_task->element.next = NULL; - - /* number of sends that need to be completed asynchronously */ - coll_fragment->n_sends = 1; - SENDWR(send_task)->send_flags |= IBV_SEND_SIGNALED; - - /* finish initializing full message descriptor */ - coll_request->n_fragments = 1; - coll_request->n_frags_sent = 1; - - coll_request->n_frag_mpi_complete = 0; - coll_request->n_frag_net_complete = 0; - coll_request->user_handle_freed = false; - - wait_task->element.flags |= MQE_WR_FLAG_SIGNAL; - coll_fragment->signal_task_wr_id = - (uint64_t) (uintptr_t) wait_task->element.wr_id; - - wait_task->element.wr_id = (uint64_t) (uintptr_t) coll_fragment; - - /* post the mwr */ - rc = mca_bcol_iboffload_post_mqe_tasks(coll_request->module, coll_fragment->to_post); - if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) { - IBOFFLOAD_VERBOSE(10, ("MQE task posting failing.\n")); - /* Note: need to clean up */ - return rc; - } - - coll_request->user_handle_freed = true; - /* complete the exchange - progress releases full request descriptors */ - while (!BCOL_IS_COMPLETED(coll_request)) { - mca_bcol_iboffload_component_progress(); - } - - IBOFFLOAD_VERBOSE(10, ("RDMA addr exchange with comm rank: %d was finished.\n", - ep->iboffload_module->ibnet->super.group_list[ep->index])); - - return OMPI_SUCCESS; - -out_of_resources: - /* Release all resources */ - IBOFFLOAD_VERBOSE(10, ("RDMA addr exchange, adding collfrag to collfrag_pending.\n")); - return mca_bcol_iboffload_free_resources_and_move_to_pending(coll_fragment, ep->iboffload_module); -} diff --git a/ompi/mca/bcol/iboffload/bcol_iboffload_qp_info.c b/ompi/mca/bcol/iboffload/bcol_iboffload_qp_info.c deleted file mode 100644 index a342cbd2318..00000000000 --- a/ompi/mca/bcol/iboffload/bcol_iboffload_qp_info.c +++ /dev/null @@ -1,452 +0,0 @@ -/* - * Copyright (c) 2009-2012 Oak Ridge National Laboratory. All rights reserved. - * Copyright (c) 2009-2012 Mellanox Technologies. All rights reserved. - * Copyright (c) 2013 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#include "ompi_config.h" - -#include -#include -#include - -#include "bcol_iboffload_frag.h" -#include "bcol_iboffload_device.h" -#include "bcol_iboffload_qp_info.h" -#include "bcol_iboffload_collreq.h" -#include "bcol_iboffload_endpoint.h" - -static int mca_bcol_iboffload_dummy_frag_qp_prepost( - mca_bcol_iboffload_endpoint_t *endpoint, - int qp_index, int num_to_prepost) -{ - struct ibv_recv_wr *recv_wr, *recv_bad; - int ret, num_preposted = 0, start_wr_index; - - mca_bcol_iboffload_component_t *cm = &mca_bcol_iboffload_component; - mca_bcol_iboffload_recv_wr_manager *recv_wrs = &cm->recv_wrs; - - IBOFFLOAD_VERBOSE(10, ("Recv prepost call: endpoint %p, to prepost %d", - (void *) endpoint, num_to_prepost)); - - if (OPAL_UNLIKELY(0 == num_to_prepost)) { - IBOFFLOAD_VERBOSE(10, ("num_to_prepost = 0, return immediate")); - return OMPI_SUCCESS; - } - - /* make sure that we do not overrun number of rd_wqe */ - if (num_to_prepost > endpoint->qps[qp_index].rd_wqe) { - IBOFFLOAD_VERBOSE(10, ("Reset num_to_prepost = %d, to rd_wqe = %d", - num_to_prepost, endpoint->qps[qp_index].rd_wqe)); - - num_to_prepost = endpoint->qps[qp_index].rd_wqe; - } - - OPAL_THREAD_LOCK(&recv_wrs->lock); - - /* calculate start index in array - * of pre-allocated work requests */ - start_wr_index = cm->qp_infos[qp_index].rd_num - num_to_prepost; - recv_wr = &recv_wrs->recv_work_requests[qp_index][start_wr_index]; - - IBOFFLOAD_VERBOSE(10, ("Endpoint %p, qp_index - %d, to_porepost %d, " - "start index of WRs - %d, rd_wqe - %d", - (void *) endpoint, qp_index, num_to_prepost, - start_wr_index, endpoint->qps[qp_index].rd_wqe)); - - while (num_preposted < num_to_prepost) { - /* prepost the special barrier frag to recv queue */ - struct ibv_sge *dummy_sg_entry = - &endpoint->iboffload_module->device->dummy_frags[qp_index].sg_entry; - - recv_wr[num_preposted].sg_list = dummy_sg_entry; - ++num_preposted; - } - - if (OPAL_LIKELY(num_preposted > 0)) { - /* Set the tail */ - recv_wr[num_preposted - 1].next = NULL; - - /* post the list of recvs */ - ret = ibv_post_recv(endpoint->qps[qp_index].qp->lcl_qp, recv_wr, &recv_bad); - if (OPAL_UNLIKELY(0 != ret)) { - IBOFFLOAD_ERROR(("ibv_post_recv failed, error: %s [%d], " - "qp_index - %d.\n", strerror(errno), ret, qp_index)); - - return OMPI_ERROR; - } - - /* recover last recv_wr if needed */ - if (OPAL_UNLIKELY(num_to_prepost != num_preposted)) { - recv_wr[num_preposted - 1].next = &recv_wr[num_preposted]; - } - - /* decresing numbers of free recv wqe */ - endpoint->qps[qp_index].rd_wqe -= num_preposted; - } - - OPAL_THREAD_UNLOCK(&recv_wrs->lock); - - IBOFFLOAD_VERBOSE(10, ("Endpoint %p, to_porepost %d, num preposted - %d, qp_index - %d", - (void *) endpoint, num_to_prepost, num_preposted, qp_index)); - - return OMPI_SUCCESS; -} - -/* - * Receive prepost: - * return values: - * 0 - no prepost was done - * -1 - fatal error during prepost - * other value - number preposted elements - */ -static int mca_bcol_iboffload_frag_reg_qp_prepost( - mca_bcol_iboffload_endpoint_t *endpoint, - int qp_index, int num_to_prepost) -{ - ompi_free_list_item_t *item; - mca_bcol_iboffload_frag_t *frag; - - struct ibv_recv_wr *recv_wr, *recv_bad; - int i, ret, num_preposted = 0, start_wr_index; - - mca_bcol_iboffload_component_t *cm = &mca_bcol_iboffload_component; - mca_bcol_iboffload_device_t *device = endpoint->iboffload_module->device; - - opal_list_t *preposted = &(endpoint->qps[qp_index].preposted_frags); - mca_bcol_iboffload_recv_wr_manager *recv_wrs = &cm->recv_wrs; - - IBOFFLOAD_VERBOSE(10, ("Recv prepost call: endpoint %p, to prepost %d", - (void *) endpoint, num_to_prepost)); - - if (OPAL_UNLIKELY(0 == num_to_prepost)) { - IBOFFLOAD_VERBOSE(10, ("num_to_prepost = 0, return immediate")); - return OMPI_SUCCESS; - } - - /* make sure that we do not overrun number of rd_wqe */ - if (num_to_prepost > endpoint->qps[qp_index].rd_wqe) { - IBOFFLOAD_VERBOSE(10, ("Reset num_to_prepost = %d, to rd_wqe = %d", - num_to_prepost, endpoint->qps[qp_index].rd_wqe)); - - num_to_prepost = endpoint->qps[qp_index].rd_wqe; - } - - OPAL_THREAD_LOCK(&recv_wrs->lock); - - /* calculate start index in array - * of pre-allocated work requests */ - start_wr_index = cm->qp_infos[qp_index].rd_num - num_to_prepost; - recv_wr = &recv_wrs->recv_work_requests[qp_index][start_wr_index]; - - IBOFFLOAD_VERBOSE(10, ("Endpoint %p, qp_index - %d, to_porepost %d, " - "start index of WRs - %d, rd_wqe - %d", - (void *) endpoint, qp_index, num_to_prepost, - start_wr_index, endpoint->qps[qp_index].rd_wqe)); - - while (num_preposted < num_to_prepost) { - /* put the item on list of preposted */ - OMPI_FREE_LIST_GET_MT(&device->frags_free[qp_index], item); - if (OPAL_UNLIKELY(NULL == item)) { - break; - } - - frag = (mca_bcol_iboffload_frag_t *) item; - opal_list_append(preposted, (opal_list_item_t *) item); - - recv_wr[num_preposted].sg_list = &frag->sg_entry; - /* TODO Pasha - fix it later */ /* Vasily: Is it right place to take a size value ???? */ - frag->sg_entry.length = cm->qp_infos[qp_index].size; - ++num_preposted; - } - - if (OPAL_LIKELY(num_preposted > 0)) { - /* Set the tail */ - recv_wr[num_preposted - 1].next = NULL; - - /* post the list of recvs */ - ret = ibv_post_recv(endpoint->qps[qp_index].qp->lcl_qp, recv_wr, &recv_bad); - if (OPAL_UNLIKELY(0 != ret)) { - IBOFFLOAD_ERROR(("ibv_post_recv failed (%s), error: %s [%d], " - "qp_index - %d.\n", - ibv_get_device_name(device->dev.ib_dev), - strerror(errno), ret, qp_index)); - - /* Return allocated frags */ - for (i = 0; i < num_preposted; i++) { - OMPI_FREE_LIST_RETURN_MT(&device->frags_free[qp_index], - (ompi_free_list_item_t *) - opal_list_remove_last(preposted)); - } - - return OMPI_ERROR; - } - - /* recover last recv_wr if needed */ - if (OPAL_UNLIKELY(num_to_prepost != num_preposted)) { - recv_wr[num_preposted - 1].next = &recv_wr[num_preposted]; - } - - /* decresing numbers of free recv wqe */ - endpoint->qps[qp_index].rd_wqe -= num_preposted; - } - - OPAL_THREAD_UNLOCK(&recv_wrs->lock); - - IBOFFLOAD_VERBOSE(10, ("Endpoint %p, to_porepost %d, num preposted - %d", - (void *) endpoint, num_to_prepost, num_preposted)); - - return OMPI_SUCCESS; -} - - -static void mca_bcol_iboffload_fillin_qp_attr(int qp_index, - mca_bcol_iboffload_endpoint_t *ep, - ompi_common_ofacm_base_qp_config_t *qp_config) -{ - uint32_t max_sge, *init_attr_mask = - &qp_config->init_attr_mask[qp_index]; - - struct ibv_qp_attr *attr = &qp_config->attr[qp_index]; - struct ibv_qp_init_attr *init_attr = &qp_config->init_attr[qp_index]; - - mca_bcol_iboffload_component_t *cm = &mca_bcol_iboffload_component; - - /* Set special init attributes mask */ - *init_attr_mask = IBV_M_QP_EXT_CLASS_1 | - IBV_M_QP_EXT_CLASS_2 | - IBV_M_QP_EXT_IGNORE_RQ_OVERFLOW; - - /* Set init attributes */ - init_attr->qp_type = IBV_QPT_RC; - -/* Vasily: ?????? - init_attr->cap.max_inline_data = - max_inline_size(qp, iboffload_module->device); -*/ - /* Pasha: we can not leave max_inline empty ! - Todo: copy max_inline_size() from ofacm to - common area. - */ - init_attr->cap.max_inline_data = (int32_t) cm->max_inline_data; - - /* We allocate SG list for some algorithms (Bruck's alltoall) */ - max_sge = ep->iboffload_module->group_size / 2 + - ep->iboffload_module->group_size % 2; - - /* max send sge should be less than device maximums */ - if (max_sge > (uint32_t) - ep->iboffload_module->device->ib_dev_attr.max_sge) { - max_sge = (uint32_t) ep->iboffload_module->device->ib_dev_attr.max_sge; - } - - init_attr->cap.max_send_sge = max_sge; - init_attr->cap.max_recv_sge = max_sge; -/* Vasily: the value will be changed later */ -/* TODO Pasha: this is real crap */ - init_attr->cap.max_recv_wr = (uint32_t) cm->cq_size; - init_attr->cap.max_send_wr = (uint32_t) cm->cq_size; - - /* Set attributes */ - - /* attr->pkey_index = 0; */ /* Vasily: ????? */ - - attr->port_num = ep->iboffload_module->port; -/* Vasily: the value will be changed later */ - attr->path_mtu = (uint32_t)cm->mtu; - - attr->max_dest_rd_atomic = cm->max_rdma_dst_ops; - attr->min_rnr_timer = (uint32_t)cm->min_rnr_timer; - - attr->ah_attr.is_global = 0; - attr->ah_attr.sl = (uint32_t)cm->service_level; -/* Vasily: from struct mca_bcol_iboffload_port_t ????? */ -/* - attr->ah_attr.src_path_bits = iboffload_module->src_path_bits; -*/ - attr->ah_attr.port_num = ep->iboffload_module->port; - /* JMS to be filled in later dynamically */ - attr->ah_attr.static_rate = 0; - /* RTS params */ - attr->timeout = (uint32_t)cm->timeout; - attr->retry_cnt = (uint32_t)cm->retry_count; - attr->rnr_retry = (uint32_t)cm->rnr_retry; - attr->max_rd_atomic = (uint32_t)cm->max_rdma_dst_ops; - - /* Init for local mca_bcol_iboffload_endpoint_qp_t qps structure - * that caches the qp information on endpoint */ - OBJ_CONSTRUCT(&ep->qps[qp_index].preposted_frags, opal_list_t); - - /* Pasha: Need to add function that will */ - ep->qps[qp_index].ib_inline_max = cm->max_inline_data; - /* TODO Pasha - this is crap too... we do not have info for sevice qps. Fix it later */ - - ep->qps[qp_index].sd_wqe = cm->qp_infos[qp_index].rd_num; - ep->qps[qp_index].rd_wqe = cm->qp_infos[qp_index].rd_num; - - IBOFFLOAD_VERBOSE(10, ("ep - %p, qp index - %d, num of rd_wqe - %d.", - ep, qp_index, ep->qps[qp_index].rd_wqe)); -} - -static int mca_bcol_iboffload_alloc_reg_qp_resource(int qp_index, mca_bcol_iboffload_device_t *device) -{ - int length; - mca_bcol_iboffload_component_t *cm = &mca_bcol_iboffload_component; - - ompi_free_list_t *frags_free = &device->frags_free[qp_index]; - - OBJ_CONSTRUCT(frags_free, ompi_free_list_t); - length = cm->qp_infos[qp_index].size; - - IBOFFLOAD_VERBOSE(10, ("free list len %d\n", length)); - if (OMPI_SUCCESS != ompi_free_list_init_ex_new(frags_free, - sizeof(mca_bcol_iboffload_frag_t), MCA_IBOFFLOAD_CACHE_LINE_SIZE, - OBJ_CLASS(mca_bcol_iboffload_frag_t), - length, cm->buffer_alignment, - cm->free_list_num, - cm->free_list_max, - cm->free_list_inc, - device->mpool, - mca_bcol_iboffload_frag_init, - (void *) &cm->qp_infos[qp_index].qp_index)) { - IBOFFLOAD_ERROR(("Failed to allocate frags_free")); - return OMPI_ERROR; - } - - return OMPI_SUCCESS; -} -static int mca_bcol_iboffload_dealloc_reg_qp_resource(int qp_index, mca_bcol_iboffload_device_t *device) -{ - OBJ_DESTRUCT(&device->frags_free[qp_index]); - - return OMPI_SUCCESS; -} - -static mca_bcol_iboffload_frag_t *mca_bcol_iboffload_get_dummy_frag( - mca_bcol_iboffload_endpoint_t *ep, int qp_index) -{ - return &ep->iboffload_module->device->dummy_frags[qp_index]; -} - -static mca_bcol_iboffload_frag_t *mca_bcol_iboffload_endpoint_get_preposted_frag( - mca_bcol_iboffload_endpoint_t *ep, int qp_index) -{ - return (mca_bcol_iboffload_frag_t *) - opal_list_remove_first(&ep->qps[qp_index].preposted_frags); -} - -static void mca_bcol_iboffload_regular_qp_attr(int qp_index, - mca_bcol_iboffload_endpoint_t *ep, - ompi_common_ofacm_base_qp_config_t *qp_config) -{ - struct ibv_qp_init_attr *init_attr = &qp_config->init_attr[qp_index]; - - mca_bcol_iboffload_fillin_qp_attr(qp_index, ep, qp_config); - - init_attr->send_cq = ep->iboffload_module->device->ib_cq; - init_attr->recv_cq = ep->recv_cq[IBOFFLOAD_CQ_SMALL_MESSAGES]; -} - -static void mca_bcol_iboffload_large_buff_qp_attr(int qp_index, - mca_bcol_iboffload_endpoint_t *ep, - ompi_common_ofacm_base_qp_config_t *qp_config) -{ - struct ibv_qp_init_attr *init_attr = &qp_config->init_attr[qp_index]; - - mca_bcol_iboffload_fillin_qp_attr(qp_index, ep, qp_config); - - init_attr->send_cq = ep->iboffload_module->device->ib_cq; - init_attr->recv_cq = ep->recv_cq[IBOFFLOAD_CQ_LARGE_MESSAGES]; -} - -static void mca_bcol_iboffload_sync_qp_attr(int qp_index, - mca_bcol_iboffload_endpoint_t *ep, - ompi_common_ofacm_base_qp_config_t *qp_config) -{ - struct ibv_qp_init_attr *init_attr = &qp_config->init_attr[qp_index]; - - mca_bcol_iboffload_fillin_qp_attr(qp_index, ep, qp_config); - - init_attr->send_cq = ep->iboffload_module->device->ib_cq; - init_attr->recv_cq = ep->recv_cq[IBOFFLOAD_CQ_SYNC]; -} - -static int mca_bcol_iboffload_setup_barrier_qp(mca_bcol_iboffload_qp_info_t* qp_info) -{ - qp_info->config_qp = mca_bcol_iboffload_regular_qp_attr; - qp_info->prepost_recv = mca_bcol_iboffload_dummy_frag_qp_prepost; - - qp_info->alloc_resource = NULL; - qp_info->dealloc_resource = NULL; - - qp_info->get_preposted_recv = mca_bcol_iboffload_get_dummy_frag; - - return OMPI_SUCCESS; -} - -static int mca_bcol_iboffload_setup_regular_qp(mca_bcol_iboffload_qp_info_t* qp_info) -{ - qp_info->config_qp = mca_bcol_iboffload_regular_qp_attr; - qp_info->prepost_recv = mca_bcol_iboffload_frag_reg_qp_prepost; - - qp_info->alloc_resource = mca_bcol_iboffload_alloc_reg_qp_resource; - qp_info->dealloc_resource = mca_bcol_iboffload_dealloc_reg_qp_resource; - - qp_info->get_preposted_recv = mca_bcol_iboffload_endpoint_get_preposted_frag; - - return OMPI_SUCCESS; -} - -static int mca_bcol_iboffload_setup_large_buff_qp(mca_bcol_iboffload_qp_info_t* qp_info) -{ - qp_info->config_qp = mca_bcol_iboffload_large_buff_qp_attr; - - qp_info->prepost_recv = NULL; /* We use "manual" ML frag preposting for this QP */ - qp_info->alloc_resource = NULL; - qp_info->dealloc_resource = NULL; - qp_info->get_preposted_recv = NULL; - - return OMPI_SUCCESS; -} - -static int mca_bcol_iboffload_setup_credit_qp(mca_bcol_iboffload_qp_info_t* qp_info) -{ - qp_info->config_qp = mca_bcol_iboffload_large_buff_qp_attr; - qp_info->prepost_recv = mca_bcol_iboffload_dummy_frag_qp_prepost; - - qp_info->alloc_resource = NULL; - qp_info->dealloc_resource = NULL; - - qp_info->get_preposted_recv = mca_bcol_iboffload_get_dummy_frag; - - return OMPI_SUCCESS; -} - -static int mca_bcol_iboffload_setup_sync_qp(mca_bcol_iboffload_qp_info_t* qp_info) -{ - qp_info->config_qp = mca_bcol_iboffload_sync_qp_attr; - qp_info->prepost_recv = mca_bcol_iboffload_dummy_frag_qp_prepost; - - qp_info->alloc_resource = NULL; - qp_info->dealloc_resource = NULL; - - qp_info->get_preposted_recv = mca_bcol_iboffload_get_dummy_frag; - - return OMPI_SUCCESS; -} - -mca_bcol_iboffload_setup_qps_fn_t setup_qps_fn[MCA_BCOL_IBOFFLOAD_QP_LAST] = { - mca_bcol_iboffload_setup_barrier_qp, /* MCA_BCOL_IBOFFLOAD_QP_BARRIER */ - mca_bcol_iboffload_setup_regular_qp, /* MCA_BCOL_IBOFFLOAD_QP_REGULAR */ - mca_bcol_iboffload_setup_sync_qp, /* MCA_BCOL_IBOFFLOAD_QP_SYNC */ - mca_bcol_iboffload_setup_credit_qp, /* MCA_BCOL_IBOFFLOAD_QP_CREDIT */ - mca_bcol_iboffload_setup_large_buff_qp, /* MCA_BCOL_IBOFFLOAD_QP_LARGE_BUFF */ - /* MCA_BCOL_IBOFFLOAD_QP_LAST */ -}; diff --git a/ompi/mca/bcol/iboffload/bcol_iboffload_qp_info.h b/ompi/mca/bcol/iboffload/bcol_iboffload_qp_info.h deleted file mode 100644 index e904e10888e..00000000000 --- a/ompi/mca/bcol/iboffload/bcol_iboffload_qp_info.h +++ /dev/null @@ -1,127 +0,0 @@ -/* - * Copyright (c) 2009-2012 Oak Ridge National Laboratory. All rights reserved. - * Copyright (c) 2009-2012 Mellanox Technologies. All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -/* - * In order to add a new QP you need to do next steps: - * - * 1) Add new index to enum with list of the all QPs, - * MCA_BCOL_IBOFFLOAD_QP_NEW_QP e.g. - * - * 2) In the setup_qps_fn array init MCA_BCOL_IBOFFLOAD_QP_NEW_QP - * index with your init func for this QP. - * - * 3) In the init func you added init the next func pointers: - * a) config_qp - in this func you need to fill in ibv_qp_init_attr - * structure will be used for this QP creation. - * - * b) prepost_recv - you have to specify this poiner if you want - * automatically executed preposting to your new QP. - * - * c) alloc_resource - will be called during device activation, - * if you need any device resource (list of frags for example) - * for your new QP here the right place to allocate it. - * - * d) dealloc_resource - if any resource was allocated dynamically - * by alloc_resource func destruct it in this func. - * - * e) get_preposted_recv - the function returns preposted recieve for 'wait task'. - * - * d) If you don't need any of these funcs you have to init appropriate pointer with NULL. - */ - -#ifndef MCA_BCOL_IBOFFLOAD_QP_INFO_H -#define MCA_BCOL_IBOFFLOAD_QP_INFO_H - -#include "ompi_config.h" - -BEGIN_C_DECLS - -/* forward declarations */ -struct mca_bcol_iboffload_device_t; -struct mca_bcol_iboffload_collreq_t; -struct mca_bcol_iboffload_qp_info_t; -struct mca_bcol_iboffload_endpoint_t; - -/* The list of the all required QPs */ -enum { - MCA_BCOL_IBOFFLOAD_QP_BARRIER, - MCA_BCOL_IBOFFLOAD_QP_REGULAR, - MCA_BCOL_IBOFFLOAD_QP_SYNC, - MCA_BCOL_IBOFFLOAD_QP_CREDIT, - MCA_BCOL_IBOFFLOAD_QP_LARGE_BUFF, - MCA_BCOL_IBOFFLOAD_QP_LAST -}; - -typedef enum { - MCA_BCOL_IBOFFLOAD_PP_QP, - MCA_BCOL_IBOFFLOAD_SRQ_QP, - MCA_BCOL_IBOFFLOAD_XRC_QP -} mca_bcol_iboffload_qp_type_t; - -struct mca_bcol_iboffload_pp_qp_info_t { - int32_t rd_win; - int32_t rd_rsv; -}; typedef struct mca_bcol_iboffload_pp_qp_info_t mca_bcol_iboffload_pp_qp_info_t; - -struct mca_bcol_iboffload_srq_qp_info_t { - int32_t sd_max; -}; typedef struct mca_bcol_iboffload_srq_qp_info_t mca_bcol_iboffload_srq_qp_info_t; - -typedef int (*mca_bcol_iboffload_setup_qps_fn_t) (struct mca_bcol_iboffload_qp_info_t*); -typedef int (*mca_bcol_iboffload_prepost_qps_fn_t) - (struct mca_bcol_iboffload_endpoint_t *endpoint, - int qp_index, int num_to_prepost); - -typedef void (*mca_bcol_iboffload_config_qps_fn_t) - (int qp_index, - struct mca_bcol_iboffload_endpoint_t *ep, - ompi_common_ofacm_base_qp_config_t *qp_config); - -typedef int (*mca_bcol_iboffload_alloc_qps_resource_fn_t) - (int qp_index, - struct mca_bcol_iboffload_device_t *device); - -typedef int (*mca_bcol_iboffload_dealloc_qps_resource_fn_t) - (int qp_index, - struct mca_bcol_iboffload_device_t *device); - -typedef struct mca_bcol_iboffload_frag_t* (*mca_bcol_iboffload_get_preposted_recv_fn_t) - (struct mca_bcol_iboffload_endpoint_t *ep, int qp_index); - -struct mca_bcol_iboffload_qp_info_t { - size_t size; - - int32_t rd_num; - int32_t rd_low; - int32_t rd_pp_win; /* prepost window = rd_num - rd_low */ - int qp_index; - - mca_bcol_iboffload_qp_type_t type; - - mca_bcol_iboffload_config_qps_fn_t config_qp; - mca_bcol_iboffload_prepost_qps_fn_t prepost_recv; - - mca_bcol_iboffload_alloc_qps_resource_fn_t alloc_resource; - mca_bcol_iboffload_dealloc_qps_resource_fn_t dealloc_resource; - - mca_bcol_iboffload_get_preposted_recv_fn_t get_preposted_recv; - - union { - mca_bcol_iboffload_pp_qp_info_t pp_qp; - mca_bcol_iboffload_srq_qp_info_t srq_qp; - } u; -}; typedef struct mca_bcol_iboffload_qp_info_t mca_bcol_iboffload_qp_info_t; - -extern mca_bcol_iboffload_setup_qps_fn_t setup_qps_fn[MCA_BCOL_IBOFFLOAD_QP_LAST]; - -END_C_DECLS - -#endif /* MCA_BCOL_IBOFFLOAD_QP_INFO_H */ - diff --git a/ompi/mca/bcol/iboffload/bcol_iboffload_task.c b/ompi/mca/bcol/iboffload/bcol_iboffload_task.c deleted file mode 100644 index 6fcb62391a3..00000000000 --- a/ompi/mca/bcol/iboffload/bcol_iboffload_task.c +++ /dev/null @@ -1,81 +0,0 @@ - /* - * Copyright (c) 2009-2012 Oak Ridge National Laboratory. All rights reserved. - * Copyright (c) 2009-2012 Mellanox Technologies. All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#include "ompi_config.h" - -#include "bcol_iboffload.h" -#include "bcol_iboffload_frag.h" -#include "bcol_iboffload_task.h" - -static void task_constructor(mca_bcol_iboffload_task_t *task) -{ - task->frag = NULL; - task->collfrag = NULL; - task->endpoint = NULL; - task->next_task = NULL; - - task->sg_entries = NULL; - task->sg_entries_num = 0; - - task->task_list = NULL; - - memset(&task->wr, 0, sizeof(task->wr)); - - memset(&task->element, 0, sizeof(struct mqe_task)); - memset(&task->task_mqe_qp_entry, 0, sizeof(struct mqe_qp_entry)); -} - -static void task_destructor(mca_bcol_iboffload_task_t *task) -{ - if (NULL != task->sg_entries) { - free(task->sg_entries); - } -} - -OBJ_CLASS_INSTANCE( - mca_bcol_iboffload_task_t, - ompi_free_list_item_t, - task_constructor, - task_destructor); - -void -mca_bcol_iboffload_calc_task_init(ompi_free_list_item_t* item, void* ctx) -{ - mca_bcol_iboffload_task_t *calc_task = - (mca_bcol_iboffload_task_t *) item; - - calc_task->task_list = (ompi_free_list_t *) ctx; - - calc_task->sg_entries_num = 2; - calc_task->sg_entries = (struct ibv_sge *) malloc (2 * sizeof(struct ibv_sge)); -} - -void -mca_bcol_iboffload_iovec_task_init(ompi_free_list_item_t* item, void* ctx) -{ - mca_bcol_iboffload_task_t *iovec_task = - (mca_bcol_iboffload_task_t *) item; - - mca_bcol_iboffload_module_t *iboffload_module = - (mca_bcol_iboffload_module_t *) ctx; - - int nitems, group_size = iboffload_module->group_size; - - nitems = group_size / 2 + group_size % 2; - if (nitems > iboffload_module->device->ib_dev_attr.max_sge) { - nitems = iboffload_module->device->ib_dev_attr.max_sge; - } - - iovec_task->sg_entries_num = nitems; - iovec_task->task_list = &iboffload_module->iovec_tasks_free; - - iovec_task->sg_entries = (struct ibv_sge *) - malloc(nitems * sizeof(struct ibv_sge)); -} diff --git a/ompi/mca/bcol/iboffload/bcol_iboffload_task.h b/ompi/mca/bcol/iboffload/bcol_iboffload_task.h deleted file mode 100644 index 99bbe8eb1a1..00000000000 --- a/ompi/mca/bcol/iboffload/bcol_iboffload_task.h +++ /dev/null @@ -1,613 +0,0 @@ -/* - * Copyright (c) 2009-2012 Oak Ridge National Laboratory. All rights reserved. - * Copyright (c) 2009-2012 Mellanox Technologies. All rights reserved. - * Copyright (c) 2013 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#ifndef MCA_BCOL_IBOFFLOAD_TASK_H -#define MCA_BCOL_IBOFFLOAD_TASK_H - -#include "ompi_config.h" - -#include -#include -#include - -#include "bcol_iboffload.h" -#include "bcol_iboffload_frag.h" -#include "bcol_iboffload_collreq.h" -#include "bcol_iboffload_endpoint.h" -#include "bcol_iboffload_collfrag.h" - -#define SENDWR(task) ((task)->element.post.send_wr) - -BEGIN_C_DECLS - -/* the mca_bcol_ibv_mwr_task_t name was replaced with mca_bcol_iboffload_task_t */ -struct mca_bcol_iboffload_task_t { - ompi_free_list_item_t super; - - /* pointer to the memory descriptor associated with the task */ - mca_bcol_iboffload_frag_t *frag; - - /* pointer to the bcol descriptor, - * we need it for send task only becasue we complete them in async maner - */ - mca_bcol_iboffload_collfrag_t *collfrag; - - /* task to be posted */ - struct mqe_task element; - - /* allocate ibv_sge structs array - in a CALC case - * for example it will have two entries. - */ - struct ibv_sge *sg_entries; - - /* sg_entries array length */ - int sg_entries_num; - - /* Each task is a member of some free list, - if the pointer is NULL => we assume the task - is a member of the common task list (tasks_free) */ - ompi_free_list_t *task_list; - - /* Pointer to the next task */ - struct mca_bcol_iboffload_task_t *next_task; - - /* pasha - it is crappy work around for driver interface - * the send_wr and recv_wr should be part of mqe_task and not pointers ! - */ - union { - struct ibv_m_send_wr send_wr; - struct ibv_recv_wr recv_wr; - } wr; - - /* If we'll decide to post a task to a different qp */ - struct mqe_qp_entry task_mqe_qp_entry; - - /* Pointer to endpoint for this task */ - mca_bcol_iboffload_endpoint_t *endpoint; -}; -typedef struct mca_bcol_iboffload_task_t mca_bcol_iboffload_task_t; -OBJ_CLASS_DECLARATION(mca_bcol_iboffload_task_t); - - -/* calc_tasks_free free list init function */ -void -mca_bcol_iboffload_calc_task_init(ompi_free_list_item_t* item, void* ctx); - -/* iovec_tasks_free free list init function */ -void -mca_bcol_iboffload_iovec_task_init(ompi_free_list_item_t* item, void* ctx); - -static inline __opal_attribute_always_inline__ void - mca_bcol_iboffload_return_frag_tolist( - mca_bcol_iboffload_frag_t *frag, - ompi_free_list_t *list) -{ - if (NULL != frag) { - mca_bcol_iboffload_component_t *cm = &mca_bcol_iboffload_component; - assert(MCA_BCOL_IBOFFLOAD_NONE_OWNER != frag->type); - - if (MCA_BCOL_IBOFFLOAD_DUMMY_OWNER != frag->type && - 0 == frag->ref_counter) { - if (MCA_BCOL_IBOFFLOAD_BCOL_OWNER == frag->type) { - OMPI_FREE_LIST_RETURN_MT((&(list[frag->qp_index])), - (ompi_free_list_item_t*) frag); - } else if (MCA_BCOL_IBOFFLOAD_ML_OWNER == frag->type) { - OMPI_FREE_LIST_RETURN_MT((&(cm->ml_frags_free)), - (ompi_free_list_item_t*) frag); - } - } - } -} - -static inline __opal_attribute_always_inline__ void - mca_bcol_iboffload_return_recv_frags_toendpoint( - mca_bcol_iboffload_frag_t *frags, - mca_bcol_iboffload_endpoint_t *ep, - int qp_index) -{ - mca_bcol_iboffload_frag_t *recv_frag = frags; - mca_bcol_iboffload_component_t *cm = &mca_bcol_iboffload_component; - - while (NULL != recv_frag) { - assert(MCA_BCOL_IBOFFLOAD_NONE_OWNER != recv_frag->type); - if (MCA_BCOL_IBOFFLOAD_ML_OWNER != recv_frag->type) { - opal_list_prepend(&ep->qps[qp_index].preposted_frags, - (opal_list_item_t *) recv_frag); - } else { - OMPI_FREE_LIST_RETURN_MT((&(cm->ml_frags_free)), - (ompi_free_list_item_t*) recv_frag); - } - - recv_frag = recv_frag->next; - } -} - -/* Wait task allocation and initialization */ -static inline __opal_attribute_always_inline__ mca_bcol_iboffload_task_t* - mca_bcol_iboffload_get_wait_task(mca_bcol_iboffload_module_t *iboffload, - uint32_t source, int num_waits, - mca_bcol_iboffload_frag_t *frags, - int qp_index, struct ibv_qp *qp) -{ - ompi_free_list_item_t *item; - mca_bcol_iboffload_task_t *task; - - mca_bcol_iboffload_component_t *cm = &mca_bcol_iboffload_component; - mca_bcol_iboffload_endpoint_t *endpoint = iboffload->endpoints[source]; - - /* blocking allocation for send fragment */ - OMPI_FREE_LIST_GET_MT(&cm->tasks_free, item); - if (OPAL_UNLIKELY(NULL == item)) { - mca_bcol_iboffload_return_recv_frags_toendpoint(frags, endpoint, qp_index); - return NULL; - } - - task = (mca_bcol_iboffload_task_t *) item; - /* set pointer to corresponding recv fragment */ - IBOFFLOAD_SET_FRAGS_ON_TASK(frags, task); - - task->next_task = NULL; - task->endpoint = endpoint; - - /* set opcode */ - task->element.opcode = MQE_WR_CQE_WAIT; - task->element.flags = 0; /* Here maybe ANY flag, anyway driver ignore it */ - /* set task id */ - task->element.wr_id = (uint64_t) (uintptr_t) task; - /* set CQ */ - task->element.wait.cq = endpoint->qp_config.init_attr[qp_index].recv_cq; - - /* set number of tasks to task */ - task->element.wait.count = num_waits; - /* set pointer to QP */ - - if (NULL == qp) { /* NULL means use MQ's QP */ - task->element.wait.mqe_qp = NULL; - } else { /* Post wait to the SQ of this QP */ - task->task_mqe_qp_entry.next = NULL; - task->task_mqe_qp_entry.qp = qp; - - task->element.wait.mqe_qp = &task->task_mqe_qp_entry; - } - - IBOFFLOAD_VERBOSE(10, ("Allocating task %p, cq: %p, num waits: %d, qp_index - %d, " - "destination %d for comm rank: %d.\n", - (void *) task, (void *) task->element.wait.cq, - task->element.wait.count, qp_index, source, - endpoint->iboffload_module->ibnet->super.group_list[endpoint->index])); - return task; -} - -static inline __opal_attribute_always_inline__ mca_bcol_iboffload_task_t* -mca_bcol_iboffload_prepare_send_task( - mca_bcol_iboffload_module_t *iboffload, - mca_bcol_iboffload_endpoint_t *endpoint, - int qp_index, ompi_free_list_t *task_list, - mca_bcol_iboffload_collfrag_t *collfrag) -{ - ompi_free_list_item_t *item; - mca_bcol_iboffload_task_t *task; - - IBOFFLOAD_VERBOSE(10, ("Destination rank - %d, QP index - %d, " - "for comm rank - %d\n", endpoint->index, qp_index, - endpoint->iboffload_module->ibnet->super.group_list[endpoint->index])); - - /* get item from free list */ - OMPI_FREE_LIST_GET_MT(task_list, item); - if (OPAL_UNLIKELY(NULL == item)) { - return NULL; - } - - task = (mca_bcol_iboffload_task_t*) item; - task->endpoint = endpoint; - - ++(collfrag->n_sends); - task->collfrag = collfrag; - - task->next_task = NULL; - task->element.wr_id = (uint64_t) (uintptr_t) task; - - task->element.post.qp = endpoint->qps[qp_index].qp->lcl_qp; - - task->element.opcode = MQE_WR_SEND; - - /* define send work request */ - SENDWR(task) = &(task->wr.send_wr); - - SENDWR(task)->next = NULL; - - SENDWR(task)->wr_id = (uint64_t) (uintptr_t) collfrag; - IBOFFLOAD_VERBOSE(10, ("coll_frag - %p.\n", collfrag)); - - /* Allways send IMM on sends ! */ - task->element.flags = MQE_WR_FLAG_IMM_EXE; - - /* Always signal completion */ - SENDWR(task)->send_flags = IBV_SEND_SIGNALED; - - return task; -} - -static inline __opal_attribute_always_inline__ mca_bcol_iboffload_task_t* -mca_bcol_iboffload_get_send_task( - mca_bcol_iboffload_module_t *iboffload, - uint32_t destination, int qp_index, - mca_bcol_iboffload_frag_t *frag, - mca_bcol_iboffload_collfrag_t *collfrag, - bool enable_inline) -{ - mca_bcol_iboffload_task_t *task; - - mca_bcol_iboffload_component_t *cm = &mca_bcol_iboffload_component; - mca_bcol_iboffload_endpoint_t *endpoint = iboffload->endpoints[destination]; - - IBOFFLOAD_VERBOSE(10, ("mca_bcol_iboffload_get_send_task qp_index %d\n", - qp_index)); - - task = mca_bcol_iboffload_prepare_send_task(iboffload, endpoint, qp_index, - &cm->tasks_free, - collfrag); - - if (OPAL_UNLIKELY(NULL == task)) { - mca_bcol_iboffload_return_frag_tolist(frag, iboffload->device->frags_free); - return NULL; - } - - /* no support for multiple frags */ - IBOFFLOAD_SET_SINGLE_FRAG_ON_TASK(frag, task); - - /* We can not do send with 0 byte but we can do zero byte RDMA with immidiate */ - if (0 == frag->sg_entry.length) { - SENDWR(task)->imm_data = 0; - SENDWR(task)->opcode = IBV_WR_RDMA_WRITE_WITH_IMM; - - SENDWR(task)->wr.rdma.rkey = endpoint->remote_zero_rdma_addr.rkey; - SENDWR(task)->wr.rdma.remote_addr = endpoint->remote_zero_rdma_addr.addr; - } else { - SENDWR(task)->opcode = IBV_WR_SEND; - } - - /* single sge */ - SENDWR(task)->num_sge = 1; - SENDWR(task)->sg_list = &(frag->sg_entry); - - /* Use inline send when it is possible */ - if (enable_inline && - frag->sg_entry.length < cm->max_inline_data) { - IBOFFLOAD_VERBOSE(10, ("Setting inline for len %d\n", frag->sg_entry.length)); - SENDWR(task)->send_flags |= IBV_SEND_INLINE; - } - - return task; -} - -static inline __opal_attribute_always_inline__ mca_bcol_iboffload_task_t* -mca_bcol_iboffload_get_send_vec_task( - mca_bcol_iboffload_module_t *iboffload, - uint32_t destination, int qp_index, - size_t nitems, - struct iovec *buff_iovec, - uint32_t lkey, - mca_bcol_iboffload_frag_t *frag, - mca_bcol_iboffload_collfrag_t *collfrag, - bool enable_inline) -{ - mca_bcol_iboffload_task_t *task; - int i; - - mca_bcol_iboffload_component_t *cm = &mca_bcol_iboffload_component; - mca_bcol_iboffload_endpoint_t *endpoint = iboffload->endpoints[destination]; - - IBOFFLOAD_VERBOSE(10, ("mca_bcol_iboffload_get_send_task qp_index %d\n", - qp_index)); - - task = mca_bcol_iboffload_prepare_send_task(iboffload, endpoint, qp_index, - &iboffload->iovec_tasks_free, - collfrag); - - if (OPAL_UNLIKELY(NULL == task)) { - mca_bcol_iboffload_return_frag_tolist(frag, iboffload->device->frags_free); - return NULL; - } - - /* no support for multiple frags */ - IBOFFLOAD_SET_SINGLE_FRAG_ON_TASK(frag, task); - - /* We can not do send with 0 byte but we can do zero byte RDMA with immidiate */ - SENDWR(task)->opcode = IBV_WR_SEND; - - assert (task->sg_entries != NULL); - - for (i = 0; (size_t) i < nitems; ++i){ - task->sg_entries[i].length = buff_iovec[i].iov_len; - task->sg_entries[i].addr = (uint64_t) buff_iovec[i].iov_base; - task->sg_entries[i].lkey = lkey; - } - - /* multiple sge */ - SENDWR(task)->num_sge = nitems; - SENDWR(task)->sg_list = (task->sg_entries); - - /* Use inline send when it is possible */ - if (enable_inline && - frag->sg_entry.length < cm->max_inline_data) { - IBOFFLOAD_VERBOSE(10, ("Setting inline for len %d\n", frag->sg_entry.length)); - SENDWR(task)->send_flags |= IBV_SEND_INLINE; - } - - return task; -} -static inline __opal_attribute_always_inline__ mca_bcol_iboffload_task_t* - mca_bcol_iboffload_get_rdma_vec_task( - uint32_t destination, size_t offset, size_t nitems, - mca_bcol_iboffload_frag_t *frag, - mca_bcol_iboffload_module_t *iboffload, - struct iovec *buff_iovec, uint32_t lkey, - mca_bcol_iboffload_collfrag_t *collfrag) -{ - int i; - mca_bcol_iboffload_collreq_t *coll_request = collfrag->coll_full_req; - - mca_bcol_iboffload_task_t *task; - mca_bcol_iboffload_endpoint_t *endpoint = - iboffload->endpoints[destination]; - - task = mca_bcol_iboffload_prepare_send_task(iboffload, endpoint, - coll_request->qp_index, - &iboffload->iovec_tasks_free, - collfrag); - if (OPAL_UNLIKELY(NULL == task)) { - mca_bcol_iboffload_return_frag_tolist(frag, iboffload->device->frags_free); - return NULL; - } - - /* no support for multiple frags */ - IBOFFLOAD_SET_SINGLE_FRAG_ON_TASK(frag, task); - - SENDWR(task)->imm_data = 0; - SENDWR(task)->opcode = IBV_WR_RDMA_WRITE_WITH_IMM; - SENDWR(task)->wr.rdma.rkey = endpoint->remote_rdma_block.ib_info.rkey; - - SENDWR(task)->wr.rdma.remote_addr = (uint64_t) (uintptr_t) - ((unsigned char *) endpoint->remote_rdma_block.rdma_desc[coll_request->ml_buffer_index].data_addr + offset); - - for (i = 0; (size_t) i < nitems; ++i){ - task->sg_entries[i].length = buff_iovec[i].iov_len; - task->sg_entries[i].addr = (uint64_t) buff_iovec[i].iov_base; - task->sg_entries[i].lkey = lkey; - } - - /* single sge */ - SENDWR(task)->num_sge = nitems; - SENDWR(task)->sg_list = (task->sg_entries); - - IBOFFLOAD_VERBOSE(10, ("The remote offset %ld \n", offset)); - return task; -} - -static inline __opal_attribute_always_inline__ mca_bcol_iboffload_task_t* - mca_bcol_iboffload_get_rdma_task( - uint32_t destination, size_t offset, - mca_bcol_iboffload_frag_t *frag, - mca_bcol_iboffload_module_t *iboffload, - mca_bcol_iboffload_collfrag_t *collfrag) -{ - mca_bcol_iboffload_collreq_t *coll_request = collfrag->coll_full_req; - - mca_bcol_iboffload_task_t *task; - mca_bcol_iboffload_endpoint_t *endpoint = - iboffload->endpoints[destination]; - - mca_bcol_iboffload_component_t *cm = &mca_bcol_iboffload_component; - task = mca_bcol_iboffload_prepare_send_task(iboffload, endpoint, - coll_request->qp_index, - &cm->tasks_free, collfrag); - if (OPAL_UNLIKELY(NULL == task)) { - mca_bcol_iboffload_return_frag_tolist(frag, iboffload->device->frags_free); - return NULL; - } - - /* no support for multiple frags */ - IBOFFLOAD_SET_SINGLE_FRAG_ON_TASK(frag, task); - - SENDWR(task)->imm_data = 0; - SENDWR(task)->opcode = IBV_WR_RDMA_WRITE_WITH_IMM; - SENDWR(task)->wr.rdma.rkey = endpoint->remote_rdma_block.ib_info.rkey; - /* Pasha: I really not happy with the way we calculate remote addresses. - why we don't use rbuf + offset ?*/ - SENDWR(task)->wr.rdma.remote_addr = (uint64_t) (uintptr_t) - ((unsigned char *) endpoint->remote_rdma_block.rdma_desc[coll_request->ml_buffer_index].data_addr + offset); - /* single sge */ - SENDWR(task)->num_sge = 1; - SENDWR(task)->sg_list = &(frag->sg_entry); - - IBOFFLOAD_VERBOSE(10, ("The remote offset %ld \n", offset)); - return task; -} - -/* Pasha: hacking version of calc operation */ - static inline __opal_attribute_always_inline__ mca_bcol_iboffload_task_t* -mca_bcol_iboffload_get_calc_task(mca_bcol_iboffload_module_t *iboffload, - uint32_t destination, int qp_index, mca_bcol_iboffload_frag_t *frag, - struct ibv_sge *l_operand, struct ibv_sge *r_operand, - mca_bcol_iboffload_collreq_t *coll_request, - bool enable_inline) -/* Some specifications for this function: - * 1) We assume that the len of two operands (ibv_sge structs) is a same. - * 2) Possibly we use the results (ibv_sge structs) from previous - * calc operations => maybe the frag pointer is NULL. - */ -{ - mca_bcol_iboffload_task_t *task; - mca_bcol_iboffload_endpoint_t *endpoint = - iboffload->endpoints[destination]; - - mca_bcol_iboffload_collfrag_t *collfrag = - (mca_bcol_iboffload_collfrag_t *) - opal_list_get_last(&coll_request->work_requests); - - mca_bcol_iboffload_component_t *cm = &mca_bcol_iboffload_component; - task = mca_bcol_iboffload_prepare_send_task(iboffload, endpoint, qp_index, - &cm->calc_tasks_free, collfrag); - if (OPAL_UNLIKELY(NULL == task)) { - mca_bcol_iboffload_return_frag_tolist(frag, iboffload->device->frags_free); - return NULL; - } - - if (NULL != frag) { - IBOFFLOAD_SET_SINGLE_FRAG_ON_TASK(frag, task); - } else { - task->frag = NULL; - } - - task->sg_entries[0] = *l_operand; - task->sg_entries[1] = *r_operand; - - SENDWR(task)->num_sge = 2; - SENDWR(task)->sg_list = task->sg_entries; - - SENDWR(task)->opcode = MCA_BCOL_IBOFFLOAD_SEND_CALC; -#if OPAL_HAVE_IBOFFLOAD_CALC_RDMA - SENDWR(task)->wr.calc_send.data_type = coll_request->actual_ib_dtype; - SENDWR(task)->wr.calc_send.calc_op = coll_request->actual_ib_op; -#else - SENDWR(task)->wr.calc.data_type = coll_request->actual_ib_dtype; - SENDWR(task)->wr.calc.calc_op = coll_request->actual_ib_op; -#endif - - return task; -} - -static inline __opal_attribute_always_inline__ mca_bcol_iboffload_task_t* - mca_bcol_iboffload_get_rdma_calc_task(mca_bcol_iboffload_module_t *iboffload, - uint32_t destination, int qp_index, mca_bcol_iboffload_frag_t *frag, - struct ibv_sge *l_operand, struct ibv_sge *r_operand, - mca_bcol_iboffload_collreq_t *coll_request, - size_t offset) -/* Some specifications for this function: - * 1) We assume that the len of two operands (ibv_sge structs) is a same. - * 2) Possibly we use the results (ibv_sge structs) from previous - * calc operations => maybe the frag pointer is NULL. - */ -{ - mca_bcol_iboffload_task_t *task; - mca_bcol_iboffload_endpoint_t *endpoint = - iboffload->endpoints[destination]; - - mca_bcol_iboffload_collfrag_t *collfrag = - (mca_bcol_iboffload_collfrag_t *) - opal_list_get_last(&coll_request->work_requests); - - mca_bcol_iboffload_component_t *cm = &mca_bcol_iboffload_component; - task = mca_bcol_iboffload_prepare_send_task(iboffload, endpoint, qp_index, - &cm->calc_tasks_free, collfrag); - if (OPAL_UNLIKELY(NULL == task)) { - mca_bcol_iboffload_return_frag_tolist(frag, iboffload->device->frags_free); - return NULL; - } - - if (NULL != frag) { - IBOFFLOAD_SET_SINGLE_FRAG_ON_TASK(frag, task); - } else { - task->frag = NULL; - } - - task->sg_entries[0] = *l_operand; - - /* Hack - we don't really use it. - task->sg_entries[1] = *r_operand; - */ - /* We use only single entry - SENDWR(task)->num_sge = 2; - */ - SENDWR(task)->num_sge = 1; - SENDWR(task)->sg_list = task->sg_entries; - -#if OPAL_HAVE_IBOFFLOAD_CALC_RDMA - SENDWR(task)->opcode = IBV_M_WR_CALC_RDMA_WRITE_WITH_IMM; - SENDWR(task)->wr.calc_rdma.data_type = coll_request->actual_ib_dtype; - SENDWR(task)->wr.calc_rdma.calc_op = coll_request->actual_ib_op; - SENDWR(task)->wr.calc_rdma.rkey = endpoint->remote_rdma_block.ib_info.rkey; - SENDWR(task)->wr.calc_rdma.remote_addr = (uint64_t) (uintptr_t) - ((unsigned char *) endpoint->remote_rdma_block.rdma_desc[coll_request->ml_buffer_index].data_addr + offset); -#else - IBOFFLOAD_ERROR(("Fatal error: RDMA CALC was called, but the driver does not support this operation")); - return NULL; -#endif - - return task; -} - -static inline __opal_attribute_always_inline__ - int release_frags_on_task(mca_bcol_iboffload_task_t *task, - ompi_free_list_t *list) -{ - int rc, qp_index; - - mca_bcol_iboffload_frag_t *temp_frag = task->frag; - mca_bcol_iboffload_endpoint_t *endpoint = task->endpoint; - - mca_bcol_iboffload_component_t *cm = - &mca_bcol_iboffload_component; - - IBOFFLOAD_VERBOSE(10, ("\nCalling release_frags_on_task")); - - while (NULL != temp_frag) { - qp_index = temp_frag->qp_index; - - --(temp_frag->ref_counter); - - /* Return credits */ - if (MQE_WR_CQE_WAIT == task->element.opcode) { - ++(endpoint->qps[qp_index].rd_wqe); - - IBOFFLOAD_VERBOSE(10, ("Return rd_wqe %d pp_win %d", - endpoint->qps[qp_index].rd_wqe, - cm->qp_infos[qp_index].rd_pp_win)); - - /* Call for recv prepost */ - if (endpoint->qps[qp_index].rd_wqe >= - cm->qp_infos[qp_index].rd_pp_win) { - IBOFFLOAD_VERBOSE(10, ("Prepost to endpoint->index - %d, qp_index - %d", endpoint->index, qp_index)); - rc = mca_bcol_iboffload_prepost_recv(endpoint, qp_index, - endpoint->qps[qp_index].rd_wqe); - if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) { - IBOFFLOAD_ERROR(("QP %d: failed to prepost.\n", qp_index)); - return OMPI_ERROR; - } - /* What happens if we can not prepost ?*/ - } - } else if (MQE_WR_SEND == task->element.opcode) { - ++(endpoint->qps[qp_index].sd_wqe); - - assert(endpoint->qps[qp_index].sd_wqe <= cm->qp_infos[qp_index].rd_num); - - IBOFFLOAD_VERBOSE(10, ("Return sd_wqe %d, qp_index - %d, endpoint - %p", - endpoint->qps[qp_index].sd_wqe, qp_index, endpoint)); - } else { - /* We should not arrive to this case */ - IBOFFLOAD_ERROR(("Unsupporeted operation")); - - return OMPI_ERROR; - } - - mca_bcol_iboffload_return_frag_tolist(temp_frag, list); - temp_frag = temp_frag->next; - } - - return OMPI_SUCCESS; -} - -END_C_DECLS - -#endif diff --git a/ompi/mca/bcol/iboffload/configure.m4 b/ompi/mca/bcol/iboffload/configure.m4 deleted file mode 100644 index 142f4f7f1c5..00000000000 --- a/ompi/mca/bcol/iboffload/configure.m4 +++ /dev/null @@ -1,38 +0,0 @@ -# -*- shell-script -*- -# -# Copyright (c) 2009-2012 Oak Ridge National Laboratory. All rights reserved. -# Copyright (c) 2009-2012 Mellanox Technologies. All rights reserved. -# $COPYRIGHT$ -# -# Additional copyrights may follow -# -# $HEADER$ -# - -# MCA_ompi_bcol_iboffload_CONFIG([should_build]) -# ------------------------------------------ -# AC_DEFUN([MCA_ompi_bcol_iboffload_POST_CONFIG], [ -# ]) - - -# MCA_ompi_bcol_iboffload_CONFIG([action-if-can-compile], -# [action-if-cant-compile]) -# ------------------------------------------------ -AC_DEFUN([MCA_ompi_bcol_iboffload_CONFIG],[ - AC_CONFIG_FILES([ompi/mca/bcol/iboffload/Makefile]) - bcol_ofa_happy="no" - bcol_mlnx_ofed_happy="no" - - OPAL_CHECK_OPENFABRICS([bcol_iboffload], [bcol_ofa_happy="yes"]) - OPAL_CHECK_MLNX_OPENFABRICS([bcol_iboffload], [bcol_mlnx_ofed_happy="yes"]) - - AS_IF([test "$bcol_ofa_happy" = "yes" -a "$bcol_mlnx_ofed_happy" = "yes"], - [$1], - [$2]) - - # substitute in the things needed to build iboffload - AC_SUBST([bcol_iboffload_CFLAGS]) - AC_SUBST([bcol_iboffload_CPPFLAGS]) - AC_SUBST([bcol_iboffload_LDFLAGS]) - AC_SUBST([bcol_iboffload_LIBS]) -])dnl diff --git a/ompi/mca/bcol/iboffload/owner.txt b/ompi/mca/bcol/iboffload/owner.txt deleted file mode 100644 index 51ea04a5175..00000000000 --- a/ompi/mca/bcol/iboffload/owner.txt +++ /dev/null @@ -1,7 +0,0 @@ -# -# owner/status file -# owner: institution that is responsible for this package -# status: e.g. active, maintenance, unmaintained -# -owner: ORNL? -status: unmaintained diff --git a/ompi/mca/bcol/ptpcoll/Makefile.am b/ompi/mca/bcol/ptpcoll/Makefile.am deleted file mode 100644 index 51a3ea6491b..00000000000 --- a/ompi/mca/bcol/ptpcoll/Makefile.am +++ /dev/null @@ -1,57 +0,0 @@ -# -# Copyright (c) 2009-2012 Oak Ridge National Laboratory. All rights reserved. -# Copyright (c) 2009-2013 Mellanox Technologies. All rights reserved. -# Copyright (c) 2013 Los Alamos National Security, LLC. All rights -# reserved. -# Copyright (c) 2015 Cisco Systems, Inc. All rights reserved. -# $COPYRIGHT$ -# -# Additional copyrights may follow -# -# $HEADER$ -# - -sources = \ - bcol_ptpcoll.h \ - bcol_ptpcoll_utils.h \ - bcol_ptpcoll_utils.c \ - bcol_ptpcoll_mca.h \ - bcol_ptpcoll_mca.c \ - bcol_ptpcoll_barrier.c \ - bcol_ptpcoll_bcast.c \ - bcol_ptpcoll_bcast.h \ - bcol_ptpcoll_component.c \ - bcol_ptpcoll_fanin.c \ - bcol_ptpcoll_fanout.c \ - bcol_ptpcoll_module.c \ - bcol_ptpcoll_allreduce.h \ - bcol_ptpcoll_allreduce.c \ - bcol_ptpcoll_reduce.h \ - bcol_ptpcoll_reduce.c \ - bcol_ptpcoll_allgather.c - - -# Make the output library in this directory, and name it either -# mca__.la (for DSO builds) or libmca__.la -# (for static builds). - -component_noinst = -component_install = -if MCA_BUILD_ompi_bcol_ptpcoll_DSO -component_install += mca_bcol_ptpcoll.la -else -component_noinst += libmca_bcol_ptpcoll.la -endif - -# See ompi/mca/btl/sm/Makefile.am for an explanation of -# libmca_common_sm.la. - -mcacomponentdir = $(ompilibdir) -mcacomponent_LTLIBRARIES = $(component_install) -mca_bcol_ptpcoll_la_SOURCES = $(sources) -mca_bcol_ptpcoll_la_LDFLAGS = -module -avoid-version -mca_bcol_ptpcoll_la_LIBADD = - -noinst_LTLIBRARIES = $(component_noinst) -libmca_bcol_ptpcoll_la_SOURCES =$(sources) -libmca_bcol_ptpcoll_la_LDFLAGS = -module -avoid-version diff --git a/ompi/mca/bcol/ptpcoll/bcol_ptpcoll.h b/ompi/mca/bcol/ptpcoll/bcol_ptpcoll.h deleted file mode 100644 index 6a8b2e53d12..00000000000 --- a/ompi/mca/bcol/ptpcoll/bcol_ptpcoll.h +++ /dev/null @@ -1,474 +0,0 @@ -/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ -/* - * Copyright (c) 2009-2012 Oak Ridge National Laboratory. All rights reserved. - * Copyright (c) 2009-2012 Mellanox Technologies. All rights reserved. - * Copyright (c) 2015 Los Alamos National Security, LLC. All rights - * reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#ifndef MCA_BCOL_PTPCOLL_EXPORT_H -#define MCA_BCOL_PTPCOLL_EXPORT_H - -#include "ompi_config.h" - -#include "mpi.h" -#include "ompi/mca/mca.h" -#include "ompi/mca/bcol/bcol.h" -#include "ompi/mca/bcol/base/base.h" -#include "opal/mca/mpool/mpool.h" -#include "ompi/request/request.h" -#include "ompi/mca/pml/pml.h" -#include "ompi/patterns/net/netpatterns.h" - -BEGIN_C_DECLS - -#ifdef HAVE_SCHED_YIELD -# include -# define SPIN sched_yield() -#else /* no switch available */ -# define SPIN -#endif - -/** - * Structure to hold the basic shared memory coll component. First it holds the - * base coll component, and then holds a bunch of - * sm-coll-component-specific stuff (e.g., current MCA param - * values). - */ -struct mca_bcol_ptpcoll_component_t { - /** Base coll component */ - mca_bcol_base_component_2_0_0_t super; - /** Verbosity level, used only in debug enabled builds */ - int verbose; - /** The radix of K-nomial tree, initilized by mca parameter */ - int k_nomial_radix; - /** The radix of narray tree, initilized by mca parameter */ - int narray_radix; - /** The radix is used for narray scatther and knomail gather for - large message bcast **/ - int narray_knomial_radix; - /** Number of times to poll for specific tag/src */ - int num_to_probe; - /* - * bcast small messages algorithm - * 1 - Knomial bcast - * 2 - Narray bcast - */ - int bcast_small_messages_known_root_alg; - /* - * bcast large messages algorithm - * 1 - binomial scatter-gather - * 2 - Narray scatther, knomial gather - */ - int bcast_large_messages_known_root_alg; - /* - * barrier algorithm - * 1 - recursive doubling - * 2 - recursive K-ing - */ - int barrier_alg; - - int use_brucks_smsg_alltoall_rdma; -}; - -struct mca_bcol_ptpcoll_collreq_t { - opal_free_list_item_t super; - - int tag; - int num_reqs; - int exchange; - - int need_toserv_extra; - int extra_partner_rank; - - ompi_request_t **requests; -}; -typedef struct mca_bcol_ptpcoll_collreq_t mca_bcol_ptpcoll_collreq_t; -OBJ_CLASS_DECLARATION(mca_bcol_ptpcoll_collreq_t); - -/** - * Convenience typedef - */ -typedef struct mca_bcol_ptpcoll_component_t mca_bcol_ptpcoll_component_t; - -/* Bcast small messages, - known root algorithm */ -enum { - PTPCOLL_KNOMIAL = 1, - PTPCOLL_NARRAY -}; - -/* Bcast large messages, - known root algorithm */ -enum { - PTPCOLL_BINOMIAL_SG = 1, /* Binomila scatter-gather */ - PTPCOLL_NARRAY_KNOMIAL_SG /* Narray-Knomial scatter-gather */ -}; - -/* - * Implemented function index list - */ - -/* barrier */ -enum{ - FANIN_FAN_OUT_BARRIER_FN, - RECURSIVE_DOUBLING_BARRIER_FN, - N_BARRIER_FNS -}; - -/* reduce */ -enum{ - FANIN_REDUCE_FN, - REDUCE_SCATTER_GATHER_FN, - N_REDUCE_FNS -}; -enum{ - SHORT_DATA_FN_REDUCE, - LONG_DATA_FN_REDUCE, - N_REDUCE_FNS_USED -}; - -/* all-reduce */ -enum{ - FANIN_FANOUT_ALLREDUCE_FN, - REDUCE_SCATTER_ALLGATHER_FN, - N_ALLREDUCE_FNS -}; -enum{ - SHORT_DATA_FN_ALLREDUCE, - LONG_DATA_FN_ALLREDUCE, - N_ALLREDUCE_FNS_USED -}; - - -/* - * N-order tree node description - */ -struct tree_node_t { - /* my rank within the group */ - int my_rank; - /* my node type - root, leaf, or interior */ - int my_node_type; - /* number of nodes in the tree */ - int tree_size; - /* number of parents (0/1) */ - int n_parents; - /* number of children */ - int n_children; - /* parent rank within the group */ - int parent_rank; - /* chidren ranks within the group */ - int *children_ranks; -}; -typedef struct tree_node_t tree_node_t; - -struct pair_exchange_node_t { - - /* number of nodes this node will exchange data with */ - int n_exchanges; - - /* ranks of nodes involved in data exchnge */ - int *rank_exchanges; - - /* number of extra sources of data - outside largest power of 2 in - * this group */ - int n_extra_sources; - - /* rank of the extra source */ - int rank_extra_source; - - /* number of tags needed per stripe */ - int n_tags; - - /* log 2 of largest full power of 2 for this node set */ - int log_2; - - /* largest power of 2 that fits in this group */ - int n_largest_pow_2; - - /* node type */ - int node_type; - -}; -typedef struct pair_exchange_node_t pair_exchange_node_t; - -/* - * Barrier request objects - */ - -/* enum for phase at which the nb barrier is in */ -enum{ - NB_BARRIER_INACTIVE, - NB_BARRIER_FAN_IN, - NB_BARRIER_FAN_OUT, - /* done and not started are the same for all practicle - * purposes, as the init funtion always sets this flag - */ - NB_BARRIER_DONE -}; - -typedef enum { - PTPCOLL_NOT_STARTED = 1, - PTPCOLL_WAITING_FOR_DATA = 1 << 1, - PTPCOLL_SCATTER_STARTED = 1 << 2, - PTPCOLL_GATHER_STARTED = 1 << 3, - PTPCOLL_EXTRA_SEND_STARTED = 1 << 4, - PTPCOLL_ROOT_SEND_STARTED = 1 << 5 -} ptpcoll_op_status; - -struct mca_bcol_ptpcoll_ml_buffer_desc_t { - void *data_addr; /* buffer address */ - uint64_t bank_index; /* my bank */ - uint64_t buffer_index; /* my buff index */ - int active_requests; /* keep number of active requests */ - ompi_request_t **requests; /* caching pointers to requests */ - int data_src; /* used for bcast to cache internal data */ - int radix_mask; /* used for bcast to cache internal data */ - int radix_mask_pow; /* used for bcast to cache internal data */ - int iteration; /* buffer iteration in knomial, binomail, etc. algorithms */ - int tag; /* tag number that is attached to this operation */ - int status; /* operation status */ - /* Fixme: Probably we can get rid of these fields by redesigning - * the reduce implementation - */ - int reduction_status; /* used for reduction to cache internal - reduction status */ - bool reduce_init_called; -}; -typedef struct mca_bcol_ptpcoll_ml_buffer_desc_t mca_bcol_ptpcoll_ml_buffer_desc_t; - -/* - * Information that we need to keep in order to access and - * track local ML memory that is used as source and destinatination - * for collectives operations - */ -struct mca_bcol_ptpcoll_local_mlmem_desc_t { - /* Bank index to release */ - uint32_t bank_index_for_release; - /* number of memory banks */ - uint32_t num_banks; - /* number of buffers per bank */ - uint32_t num_buffers_per_bank; - /* size of a payload buffer */ - uint32_t size_buffer; - /* pointer to buffer descriptors initialized */ - mca_bcol_ptpcoll_ml_buffer_desc_t *ml_buf_desc; -}; -typedef struct mca_bcol_ptpcoll_local_mlmem_desc_t mca_bcol_ptpcoll_local_mlmem_desc_t; - -typedef enum { - PTPCOLL_PROXY = 1, - PTPCOLL_IN_GROUP = 1 << 1, - PTPCOLL_EXTRA = 1 << 2, - PTPCOLL_KN_PROXY = 1 << 3, - PTPCOLL_KN_IN_GROUP = 1 << 4, - PTPCOLL_KN_EXTRA = 1 << 5 -} node_type_pow2; - -struct mca_bcol_ptpcoll_module_t { - /* base structure */ - mca_bcol_base_module_t super; - - /* size */ - int group_size; - - /* size of each memory segment */ - size_t segment_size; - - /* k_nomial radix */ - int k_nomial_radix; - /* caching power of K, for K-nomial operations */ - int pow_k; - /* caching power of K number that is smaller or equal to size of group */ - int pow_knum; - /* caching power of 2, it is special case for some algorithms */ - int pow_2; - /* caching power of 2 number that is closet to size of group */ - int pow_2num; - /* type of this node in group of power 2 */ - int pow_2type; - /* type of this node in group of K-nomaial tree */ - int pow_ktype; - /* type of this node in group of narray tree */ - int narray_type; - /* size of full narray tree */ - int full_narray_tree_size; - /* num leafs on last level */ - int full_narray_tree_num_leafs; - - /* Nary tree info */ - netpatterns_tree_node_t *narray_node; - - /* if the rank in group, it keeps the extra peer. - if the rank is extra, it keeps the proxy peer. - */ - int proxy_extra_index; /* pow2 algorithm */ - int *kn_proxy_extra_index; /* K nomaila algorithm */ - int kn_proxy_extra_num; /* number of extra peers , maximum k - 1*/ - - /* collective tag */ - long long collective_tag; - - /* tag mask - the pml has a limit on tag size, so need - * to wrap around - */ - uint64_t tag_mask; - - /* Caching information about local ml memory. - * Since ptpcoll does not support RDMA operations over pml, - * we don't need to keep any information about remote buffers - */ - mca_bcol_ptpcoll_local_mlmem_desc_t ml_mem; - - - /* Narray-Knomial scatther gather */ - - /* list of extra indexes */ - int *narray_knomial_proxy_extra_index; - /* number of extra peers , maximum k - 1*/ - int narray_knomial_proxy_num; - /* Narray-Knomial node information array */ - netpatterns_narray_knomial_tree_node_t *narray_knomial_node; - /* Knomial exchange tree */ - netpatterns_k_exchange_node_t knomial_exchange_tree; - /* knomial allgather tree --- Do not disable, we need both - different algorithms define recursive k - ing differently - */ - netpatterns_k_exchange_node_t knomial_allgather_tree; - - /* Knomial allgather offsets */ - int **allgather_offsets; - - /* Free lists of outstanding collective operations */ - opal_free_list_t collreqs_free; - - int log_group_size; - struct iovec *alltoall_iovec; -}; - -typedef struct mca_bcol_ptpcoll_module_t mca_bcol_ptpcoll_module_t; -OBJ_CLASS_DECLARATION(mca_bcol_ptpcoll_module_t); - - -/** - * Global component instance - */ -OMPI_MODULE_DECLSPEC extern mca_bcol_ptpcoll_component_t -mca_bcol_ptpcoll_component; - - -/* - * coll module functions - */ - -/* query to see if the component is available for use, and can - * satisfy the thread and progress requirements - */ -int mca_bcol_ptpcoll_init_query(bool enable_progress_threads, - bool enable_mpi_threads); - -/* query to see if the module is available for use on the given - * communicator, and if so, what it's priority is. - */ -mca_bcol_base_module_t ** -mca_bcol_ptpcoll_comm_query(mca_sbgp_base_module_t *sbgp, int *num_modules); - -/* interface function to setup recursive k-ing tree */ -int mca_bcol_ptpcoll_setup_knomial_tree(mca_bcol_base_module_t *super); - -/* barrier routines */ -int bcol_ptpcoll_barrier_recurs_dbl(bcol_function_args_t *input_args, - struct mca_bcol_base_function_t *const_args); -int bcol_ptpcoll_barrier_recurs_knomial(bcol_function_args_t *input_args, - struct mca_bcol_base_function_t *const_args); -int bcol_ptpcoll_barrier_init(mca_bcol_base_module_t *super); -int mca_bcol_ptpcoll_memsync_init(mca_bcol_base_module_t *super); -void * bcol_ptpcoll_allocate_memory(size_t length, size_t alignment, - struct mca_bcol_base_module_t *bcol_module); -int bcol_ptpcoll_register_memory(void * in_ptr, size_t length, size_t alignment, - struct mca_bcol_base_module_t *bcol_module); -int bcol_ptpcoll_deregister_memory( void * in_ptr, - struct mca_bcol_base_module_t *bcol_module); -int bcol_ptpcoll_free_memory(void *ptr, - struct mca_bcol_base_module_t *bcol_module); -int bcol_ptpcoll_fanin( bcol_function_args_t *input_args, - struct mca_bcol_base_module_t *module); -int bcol_ptpcoll_fanout( bcol_function_args_t *input_args, - struct mca_bcol_base_function_t *const_args); - - -/* allgather routine */ -int bcol_ptpcoll_k_nomial_allgather_init(bcol_function_args_t *input_args, - struct mca_bcol_base_function_t *const_args); - -/* allgather progress */ -int bcol_ptpcoll_k_nomial_allgather_progress(bcol_function_args_t *input_args, - struct mca_bcol_base_function_t *const_args); -/* allgather register */ -int bcol_ptpcoll_allgather_init(mca_bcol_base_module_t *super); - -static inline __opal_attribute_always_inline__ - int mca_bcol_ptpcoll_test_for_match(ompi_request_t **request , int *rc) -{ - int matched = 0; - int i; - mca_bcol_ptpcoll_component_t *cm = &mca_bcol_ptpcoll_component; - *rc = OMPI_SUCCESS; - - for (i = 0; i < cm->num_to_probe && - 0 == matched && OMPI_SUCCESS == *rc ; i++) { - *rc = ompi_request_test(request, &matched, MPI_STATUS_IGNORE); - } - - return matched; -} - -static inline __opal_attribute_always_inline__ - int mca_bcol_ptpcoll_test_all_for_match(int *n_requests, ompi_request_t **requests , int *rc) -{ - int matched = 0; - int i; - mca_bcol_ptpcoll_component_t *cm = &mca_bcol_ptpcoll_component; - *rc = OMPI_SUCCESS; - - assert(*n_requests >= 0); - - if (0 == *n_requests) { - return 1; - } - - for (i = 0; i < cm->num_to_probe && - 0 == matched && OMPI_SUCCESS == *rc; i++) { - *rc = ompi_request_test_all - (*n_requests, requests, &matched, MPI_STATUS_IGNORE); - } - - if (matched) { - *n_requests = 0; - } - - return matched; -} - -/* Some negative tags already used by OMPI, making sure that we take safe offset */ -#define PTPCOLL_TAG_OFFSET 100 -#define PTPCOLL_TAG_FACTOR 2 - -static inline int lognum(int n){ - int count = 1, lognum = 0; - - while (count < n) { - count = count << 1; - lognum++; - } - return lognum; -} - -END_C_DECLS - -#endif /* MCA_BCOL_PTPCOLL_EXPORT_H */ diff --git a/ompi/mca/bcol/ptpcoll/bcol_ptpcoll_allgather.c b/ompi/mca/bcol/ptpcoll/bcol_ptpcoll_allgather.c deleted file mode 100644 index eeed28e9fea..00000000000 --- a/ompi/mca/bcol/ptpcoll/bcol_ptpcoll_allgather.c +++ /dev/null @@ -1,605 +0,0 @@ -/* - * Copyright (c) 2009-2012 Oak Ridge National Laboratory. All rights reserved. - * Copyright (c) 2009-2012 Mellanox Technologies. All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#include "ompi_config.h" - -#include "ompi/include/ompi/constants.h" -#include "ompi/mca/bcol/bcol.h" -#include "bcol_ptpcoll_allreduce.h" -/* - * Recursive K-ing allgather - */ - -/* - * - * Recurssive k-ing algorithm - * Example k=3 n=9 - * - * - * Number of Exchange steps = log (basek) n - * Number of steps in exchange step = k (radix) - * - */ - -int bcol_ptpcoll_k_nomial_allgather_init(bcol_function_args_t *input_args, - struct mca_bcol_base_function_t *const_args) -{ - /* local variables */ - - mca_bcol_ptpcoll_module_t *ptpcoll_module = (mca_bcol_ptpcoll_module_t *) const_args->bcol_module; - int *group_list = ptpcoll_module->super.sbgp_partner_module->group_list; - netpatterns_k_exchange_node_t *exchange_node = &ptpcoll_module->knomial_allgather_tree; - int my_group_index = ptpcoll_module->super.sbgp_partner_module->my_index; - int group_size = ptpcoll_module->group_size; - int *list_connected = ptpcoll_module->super.list_n_connected; /* critical for hierarchical colls */ - - int tag; - int i, j; - int knt; - int comm_src, comm_dst, src, dst; - int recv_offset, recv_len; - int send_offset, send_len; - - uint32_t buffer_index = input_args->buffer_index; - int pow_k, tree_order; - int rc = OMPI_SUCCESS; - ompi_communicator_t* comm = ptpcoll_module->super.sbgp_partner_module->group_comm; - ompi_request_t **requests = - ptpcoll_module->ml_mem.ml_buf_desc[buffer_index].requests; - int *active_requests = - &(ptpcoll_module->ml_mem.ml_buf_desc[buffer_index].active_requests); - int completed = 0; /* initialized */ - void *data_buffer = (void*)( - (unsigned char *) input_args->sbuf + - (size_t) input_args->sbuf_offset); - int pack_len = input_args->count * input_args->dtype->super.size; - -#if 0 - fprintf(stderr,"entering p2p allgather pack_len %d. exchange node: %p\n",pack_len, exchange_node); -#endif - /* initialize the iteration counter */ - int *iteration = &ptpcoll_module->ml_mem.ml_buf_desc[buffer_index].iteration; - *iteration = 0; - - /* reset active request counter */ - *active_requests = 0; - - /* keep tag within the limit supported by the pml */ - tag = (PTPCOLL_TAG_OFFSET + input_args->sequence_num * PTPCOLL_TAG_FACTOR) & (ptpcoll_module->tag_mask); - /* mark this as a collective tag, to avoid conflict with user-level flags */ - tag = -tag; - - /* k-nomial parameters */ - tree_order = exchange_node->tree_order; - pow_k = exchange_node->log_tree_order; - - - /* let's begin the collective, starting with extra ranks and their - * respective proxies - */ - if( EXTRA_NODE == exchange_node->node_type ) { - - /* then I will send to my proxy rank*/ - dst = exchange_node->rank_extra_sources_array[0]; - /* find rank in the communicator */ - comm_dst = group_list[dst]; - /* now I need to calculate my own offset */ - knt = 0; - for (i = 0 ; i < my_group_index; i++){ - knt += list_connected[i]; - } - - /* send the data to my proxy */ - rc = MCA_PML_CALL(isend((void *) ( (unsigned char *) data_buffer + - knt*pack_len), - pack_len * list_connected[my_group_index], - MPI_BYTE, - comm_dst, tag, - MCA_PML_BASE_SEND_STANDARD, comm, - &(requests[*active_requests]))); - - if( OMPI_SUCCESS != rc ) { - PTPCOLL_VERBOSE(10,("Failed to isend data")); - return OMPI_ERROR; - } - ++(*active_requests); - - /* now I go ahead and post the receive from my proxy */ - comm_src = comm_dst; - knt = 0; - for( i =0; i < group_size; i++){ - knt += list_connected[i]; - } - rc = MCA_PML_CALL(irecv(data_buffer, - knt * pack_len, - MPI_BYTE, - comm_src, - tag , comm, &(requests[*active_requests]))); - if( OMPI_SUCCESS != rc ) { - PTPCOLL_VERBOSE(10, ("Failed to post ireceive ")); - return OMPI_ERROR; - } - - ++(*active_requests); - /* poll for completion */ - /* this polls internally */ - completed = mca_bcol_ptpcoll_test_all_for_match(active_requests, requests, &rc); - if(completed){ - /* go to buffer release */ - goto FINISHED; - }else{ - /* save state and hop out - * nothing to save here - */ - return ((OMPI_SUCCESS != rc) ? OMPI_ERROR : BCOL_FN_STARTED); - } - }else if ( 0 < exchange_node->n_extra_sources ) { - - /* I am a proxy for someone */ - src = exchange_node->rank_extra_sources_array[0]; - /* find the rank in the communicator */ - comm_src = group_list[src]; - knt = 0; - for(i = 0; i < src; i++){ - knt += list_connected[i]; - } - /* post the receive */ - rc = MCA_PML_CALL(irecv((void *) ( (unsigned char *) data_buffer - + knt*pack_len), - pack_len * list_connected[src], - MPI_BYTE, - comm_src, - tag , comm, &(requests[*active_requests]))); - if( OMPI_SUCCESS != rc ) { - PTPCOLL_VERBOSE(10, ("Failed to post ireceive ")); - return OMPI_ERROR; - } - - ++(*active_requests); - /* poll for completion */ - /* this routine polls internally */ - completed = mca_bcol_ptpcoll_test_all_for_match(active_requests, requests, &rc); - if(!completed){ - /* save state and hop out - * We really do need to block here so set - * the iteration to -1 indicating we need to - * finish this part first - */ - *iteration = -1; - return ((OMPI_SUCCESS != rc )? OMPI_ERROR : BCOL_FN_STARTED); - } - - } - - /* we start the recursive k - ing phase */ - /* fprintf(stderr,"tree order %d pow_k %d \n",tree_order,pow_k);*/ - for( i = 0; i < pow_k; i++) { - for(j = 0; j < (tree_order - 1); j++) { - - /* send phase */ - dst = exchange_node->rank_exchanges[i][j]; - if( dst < 0 ){ - continue; - } - comm_dst = group_list[dst]; - send_offset = exchange_node->payload_info[i][j].s_offset * pack_len; - send_len = exchange_node->payload_info[i][j].s_len * pack_len; - /* debug print */ - /* fprintf(stderr,"sending %d bytes to rank %d at offset %d\n",send_len, */ - /* comm_dst,send_offset); */ - rc = MCA_PML_CALL(isend((void*)((unsigned char *) data_buffer + - send_offset), - send_len, - MPI_BYTE, - comm_dst, tag, - MCA_PML_BASE_SEND_STANDARD, comm, - &(requests[*active_requests]))); - - if( OMPI_SUCCESS != rc ) { - PTPCOLL_VERBOSE(10,("Failed to isend data")); - return OMPI_ERROR; - } - ++(*active_requests); - - /* sends are posted */ - } - - /* Now post the recv's */ - for( j = 0; j < (tree_order - 1); j++ ) { - - /* recv phase */ - src = exchange_node->rank_exchanges[i][j]; - if( src < 0 ) { - continue; - } - comm_src = group_list[src]; - recv_offset = exchange_node->payload_info[i][j].r_offset * pack_len; - recv_len = exchange_node->payload_info[i][j].r_len * pack_len; - /* debug print */ - /* fprintf(stderr,"recving %d bytes to rank %d at offset %d\n",recv_len, */ - /* comm_src,recv_offset); */ - /* post the receive */ - rc = MCA_PML_CALL(irecv((void *) ((unsigned char *) data_buffer + - recv_offset), - recv_len, - MPI_BYTE, - comm_src, - tag, comm, &(requests[*active_requests]))); - if( OMPI_SUCCESS != rc ) { - PTPCOLL_VERBOSE(10, ("Failed to post ireceive ")); - return OMPI_ERROR; - } - - ++(*active_requests); - } - /* finished all send/recv's now poll for completion before - * continuing to next iteration - */ - completed = 0; - /* polling internally on 2*(k - 1) requests */ - completed = mca_bcol_ptpcoll_test_all_for_match(active_requests, requests, &rc); - - if(!completed){ - /* save state and hop out - * only the iteration needs to be tracked - */ - *iteration = i; /* need to pick up here */ - - return ((OMPI_SUCCESS != rc) ? OMPI_ERROR : BCOL_FN_STARTED); - } - } - - /* finish off the last piece, send the data back to the extra */ - if( 0 < exchange_node->n_extra_sources ) { - dst = exchange_node->rank_extra_sources_array[0]; - comm_dst = group_list[dst]; - knt = 0; - for( i = 0; i < group_size; i++){ - knt += list_connected[i]; - } - /* debug print */ - /* - fprintf(stderr,"sending %d bytes to extra %d \n",pack_len*knt,comm_dst); - */ - rc = MCA_PML_CALL(isend(data_buffer, - pack_len * knt, - MPI_BYTE, - comm_dst, tag, - MCA_PML_BASE_SEND_STANDARD, comm, - &(requests[*active_requests]))); - - if( OMPI_SUCCESS != rc ) { - PTPCOLL_VERBOSE(10,("Failed to isend data")); - return OMPI_ERROR; - } - ++(*active_requests); - - /* probe for send completion */ - completed = 0; - /* polling internally */ - completed = mca_bcol_ptpcoll_test_all_for_match(active_requests, requests, &rc); - if(!completed){ - /* save state and hop out - * We really do need to block here so set - * the iteration to pow_k +1 indicating we need to - * finish progressing the last part - */ - *iteration = pow_k + 1; - - return (OMPI_SUCCESS != rc ? OMPI_ERROR : BCOL_FN_STARTED); - } - } - -FINISHED: - /* recycle buffer if need be */ - return BCOL_FN_COMPLETE; -} - -/* allgather progress function */ - -int bcol_ptpcoll_k_nomial_allgather_progress(bcol_function_args_t *input_args, - struct mca_bcol_base_function_t *const_args) -{ - - - /* local variables */ - - mca_bcol_ptpcoll_module_t *ptpcoll_module = (mca_bcol_ptpcoll_module_t *) const_args->bcol_module; - int *group_list = ptpcoll_module->super.sbgp_partner_module->group_list; - netpatterns_k_exchange_node_t *exchange_node = &ptpcoll_module->knomial_allgather_tree; - int group_size = ptpcoll_module->group_size; - int *list_connected = ptpcoll_module->super.list_n_connected; /* critical for hierarchical colls */ - - - int tag; - int i, j; - int knt; - int comm_src, comm_dst, src, dst; - int recv_offset, recv_len; - int send_offset, send_len; - uint32_t buffer_index = input_args->buffer_index; - - int pow_k, tree_order; - int rc = OMPI_SUCCESS; - ompi_communicator_t* comm = ptpcoll_module->super.sbgp_partner_module->group_comm; - ompi_request_t **requests = - ptpcoll_module->ml_mem.ml_buf_desc[buffer_index].requests; - int *active_requests = - &(ptpcoll_module->ml_mem.ml_buf_desc[buffer_index].active_requests); - int completed = 0; /* initialized */ - void *data_buffer = (void*)( - (unsigned char *) input_args->sbuf + - (size_t) input_args->sbuf_offset); - int pack_len = input_args->count * input_args->dtype->super.size; - /* initialize the counter */ - int *iteration = &ptpcoll_module->ml_mem.ml_buf_desc[buffer_index].iteration; - - -#if 0 - fprintf(stderr,"%d: entering p2p allgather progress AR: %d iter: %d\n",my_group_index,*active_requests, - *iteration); -#endif - /* keep tag within the limit supported by the pml */ - tag = (PTPCOLL_TAG_OFFSET + input_args->sequence_num * PTPCOLL_TAG_FACTOR) & (ptpcoll_module->tag_mask); - /* mark this as a collective tag, to avoid conflict with user-level flags */ - tag = -tag; - - /* k-nomial tree parameters */ - tree_order = exchange_node->tree_order; - pow_k = exchange_node->log_tree_order; - - /* let's begin the collective, starting with extra ranks and their - * respective proxies - */ - if( EXTRA_NODE == exchange_node->node_type ) { - - /* debug print */ - /*fprintf(stderr,"666 \n");*/ - /* simply poll for completion */ - completed = 0; - /* polling internally */ - completed = mca_bcol_ptpcoll_test_all_for_match(active_requests, requests, &rc); - if(completed){ - /* go to buffer release */ - goto FINISHED; - }else{ - /* save state and hop out - * nothing to save here - */ - return ((OMPI_SUCCESS != rc) ? OMPI_ERROR : BCOL_FN_STARTED); - } - }else if ( 0 < exchange_node->n_extra_sources && (-1 == *iteration)) { - - /* I am a proxy for someone */ - /* Simply poll for completion */ - completed = 0; - /* polling internally */ - assert( 1 == *active_requests); - completed = mca_bcol_ptpcoll_test_all_for_match(active_requests, requests, &rc); - if(!completed){ - /* save state and hop out - * We really do need to block here so set - * the iteration to -1 indicating we need to - * finish this part first - */ - (*iteration) = -1; - return ((OMPI_SUCCESS != rc) ? OMPI_ERROR : BCOL_FN_STARTED); - } - /* I may now proceed to the recursive k - ing phase */ - *iteration = 0; - } - - - /* the ordering here between the extra rank and progress active requests - * is critical - */ - /* extra rank */ - if( (pow_k + 1) == *iteration ){ - /* finish off the last one */ - goto PROGRESS_EXTRA; - } - - /* active requests must be completed before continuing on to - * recursive k -ing step - * CAREFUL HERE, IT THIS REALLY WHAT YOU WANT?? - */ - if( 0 < (*active_requests) ) { - /* then we have something to progress from last step */ - /* debug print */ - /* - fprintf(stderr,"%d: entering progress AR: %d iter: %d\n",my_group_index,*active_requests, - *iteration); - */ - completed = 0; - completed = mca_bcol_ptpcoll_test_all_for_match(active_requests, requests, &rc); - if(!completed){ - /* save state and hop out - * state hasn't changed - */ - - return ((MPI_SUCCESS != rc) ? OMPI_ERROR : BCOL_FN_STARTED); - } - ++(*iteration); - } - - - - /* we start the recursive k - ing phase */ - for( i = *iteration; i < pow_k; i++) { - /* nothing changes here */ - for(j = 0; j < (tree_order - 1); j++) { - - /* send phase */ - dst = exchange_node->rank_exchanges[i][j]; - if( dst < 0 ){ - continue; - } - comm_dst = group_list[dst]; - send_offset = exchange_node->payload_info[i][j].s_offset * pack_len; - send_len = exchange_node->payload_info[i][j].s_len * pack_len; - rc = MCA_PML_CALL(isend((void*)((unsigned char *) data_buffer + - send_offset), - send_len, - MPI_BYTE, - comm_dst, tag, - MCA_PML_BASE_SEND_STANDARD, comm, - &(requests[*active_requests]))); - - if( OMPI_SUCCESS != rc ) { - PTPCOLL_VERBOSE(10,("Failed to isend data")); - return OMPI_ERROR; - } - ++(*active_requests); - - /* sends are posted */ - } - - /* Now post the recv's */ - for( j = 0; j < (tree_order - 1); j++ ) { - - /* recv phase */ - src = exchange_node->rank_exchanges[i][j]; - if( src < 0 ) { - continue; - } - comm_src = group_list[src]; - recv_offset = exchange_node->payload_info[i][j].r_offset * pack_len; - recv_len = exchange_node->payload_info[i][j].r_len * pack_len; - /* post the receive */ - rc = MCA_PML_CALL(irecv((void *) ((unsigned char *) data_buffer + - recv_offset), - recv_len, - MPI_BYTE, - comm_src, - tag, comm, &(requests[*active_requests]))); - if( OMPI_SUCCESS != rc ) { - PTPCOLL_VERBOSE(10, ("Failed to post ireceive ")); - return OMPI_ERROR; - } - - ++(*active_requests); - } - /* finished all send/recv's now poll for completion before - * continuing to next iteration - */ - completed = 0; - /* make this non-blocking */ - completed = mca_bcol_ptpcoll_test_all_for_match(active_requests, requests, &rc); - if(!completed){ - /* save state and hop out - * We really do need to block here so set - * the iteration to -1 indicating we need to - * finish this part first - */ - *iteration = i; /* need to pick up here */ - - return ((OMPI_SUCCESS != rc) ? OMPI_ERROR : BCOL_FN_STARTED); - } - } - - /* finish off the last piece, send the data back to the extra */ - if( 0 < exchange_node->n_extra_sources ) { - dst = exchange_node->rank_extra_sources_array[0]; - comm_dst = group_list[dst]; - knt = 0; - for( i = 0; i < group_size; i++){ - knt += list_connected[i]; - } - rc = MCA_PML_CALL(isend(data_buffer, - pack_len * knt, - MPI_BYTE, - comm_dst, tag, - MCA_PML_BASE_SEND_STANDARD, comm, - &(requests[*active_requests]))); - - if( OMPI_SUCCESS != rc ) { - PTPCOLL_VERBOSE(10,("Failed to isend data")); - return OMPI_ERROR; - } - ++(*active_requests); - - /* probe for send completion */ - completed = 0; - /* make this non-blocking */ - completed = mca_bcol_ptpcoll_test_all_for_match(active_requests, requests, &rc); - if(!completed){ - /* save state and hop out - * We really do need to block here so set - * the iteration to pow_k +1 indicating we need to - * finish progressing the last part - */ - *iteration = pow_k + 1; - - return ((OMPI_SUCCESS != rc) ? OMPI_ERROR : BCOL_FN_STARTED); - } - } - /* folks need to skip this unless they really are the proxy - * reentering with the intent of progressing the final send - */ - goto FINISHED; - -PROGRESS_EXTRA: - - /* probe for send completion */ - completed = 0; - /* make this non-blocking */ - completed = mca_bcol_ptpcoll_test_all_for_match(active_requests, requests, &rc); - if(!completed){ - /* save state and hop out - * We really do need to block here so set - * the iteration to pow_k +1 indicating we need to - * finish progressing the last part - */ - - return ((OMPI_SUCCESS != rc) ? OMPI_ERROR : BCOL_FN_STARTED); - } - -FINISHED: - /* recycle buffer if need be */ - return BCOL_FN_COMPLETE; -} - -/* - * Register allreduce functions to the BCOL function table, - * so they can be selected - */ -int bcol_ptpcoll_allgather_init(mca_bcol_base_module_t *super) -{ - mca_bcol_base_coll_fn_comm_attributes_t comm_attribs; - mca_bcol_base_coll_fn_invoke_attributes_t inv_attribs; - - comm_attribs.bcoll_type = BCOL_ALLGATHER; - comm_attribs.comm_size_min = 0; - comm_attribs.comm_size_max = 1024 * 1024; - comm_attribs.waiting_semantics = NON_BLOCKING; - - inv_attribs.bcol_msg_min = 0; - inv_attribs.bcol_msg_max = 20000; /* range 1 */ - - inv_attribs.datatype_bitmap = 0xffffffff; - inv_attribs.op_types_bitmap = 0xffffffff; - - comm_attribs.data_src = DATA_SRC_KNOWN; - - mca_bcol_base_set_attributes(super, &comm_attribs, &inv_attribs, - bcol_ptpcoll_k_nomial_allgather_init, - bcol_ptpcoll_k_nomial_allgather_progress); - - - comm_attribs.data_src = DATA_SRC_KNOWN; - inv_attribs.bcol_msg_min = 10000000; - inv_attribs.bcol_msg_max = 10485760; /* range 4 */ - - mca_bcol_base_set_attributes(super, &comm_attribs, &inv_attribs, - bcol_ptpcoll_k_nomial_allgather_init, - bcol_ptpcoll_k_nomial_allgather_progress); - - return OMPI_SUCCESS; -} diff --git a/ompi/mca/bcol/ptpcoll/bcol_ptpcoll_allreduce.c b/ompi/mca/bcol/ptpcoll/bcol_ptpcoll_allreduce.c deleted file mode 100644 index 200e109f71e..00000000000 --- a/ompi/mca/bcol/ptpcoll/bcol_ptpcoll_allreduce.c +++ /dev/null @@ -1,1029 +0,0 @@ -/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ -/* - * Copyright (c) 2009-2013 Oak Ridge National Laboratory. All rights reserved. - * Copyright (c) 2009-2012 Mellanox Technologies. All rights reserved. - * Copyright (c) 2013 Los Alamos National Security, LLC. All rights - * reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#include "ompi_config.h" - -#include "ompi/include/ompi/constants.h" -#include "ompi/mca/bcol/bcol.h" -#include "bcol_ptpcoll_allreduce.h" - -/* - * Recursive K-ing allreduce - */ -static inline int bcol_ptpcoll_allreduce_narray_schedule_extra_node_exchange (mca_bcol_ptpcoll_module_t *ptpcoll_module, netpatterns_k_exchange_node_t *k_node, - void *data_buffer, size_t data_size, ompi_request_t **requests, int *active_requests, - int tag) -{ - ompi_communicator_t *comm = ptpcoll_module->super.sbgp_partner_module->group_comm; - int peer_comm_rank, k, offset, rc; - - if (EXCHANGE_NODE == k_node->node_type) { - /* the send data resides in the first part of the buffer */ - for (k = 0, offset = data_size ; k < k_node->n_extra_sources ; ++k, offset += data_size) { - peer_comm_rank = ptpcoll_module->super.sbgp_partner_module->group_list[k_node->rank_extra_sources_array[k]]; - - PTPCOLL_VERBOSE(10, ("Recv data from %d, addr %p len %d tag %d", - peer_comm_rank, data_buffer, data_size, tag)); - rc = MCA_PML_CALL(irecv((void *)((unsigned char *)data_buffer + offset), - data_size, MPI_BYTE, peer_comm_rank, tag, comm, - &requests[*active_requests])); - if( OMPI_SUCCESS != rc ) { - PTPCOLL_VERBOSE(10, ("Failed to receive data")); - return OMPI_ERROR; - } - - ++(*active_requests); - } - } else { - peer_comm_rank = ptpcoll_module->super.sbgp_partner_module->group_list[k_node->rank_extra_sources_array[0]]; - - PTPCOLL_VERBOSE(10, ("Send data to %d, addr %p len %d tag %d", - peer_comm_rank, data_buffer, data_size, tag)); - - rc = MCA_PML_CALL(isend(data_buffer, data_size, MPI_BYTE, peer_comm_rank, - tag, MCA_PML_BASE_SEND_STANDARD, comm, - &(requests[*active_requests]))); - if( OMPI_SUCCESS != rc ) { - PTPCOLL_VERBOSE(10, ("Failed to send data")); - return OMPI_ERROR; - } - - ++(*active_requests); - } - - return OMPI_SUCCESS; -} - -static inline void bcol_ptpcoll_allreduce_narray_reduce (void *data_buffer, struct ompi_datatype_t *data_type, int count, struct ompi_op_t *op, int sources) -{ - size_t data_size = mca_bcol_base_get_buff_length(data_type, count); - - for (int k = 0, offset = data_size ; k < sources ; ++k, offset += data_size) { - ompi_op_reduce(op, (char *) data_buffer + offset, data_buffer, count, data_type); - } -} - -static int bcol_ptpcoll_allreduce_narraying_progress (bcol_function_args_t *input_args, - struct mca_bcol_base_function_t *const_args) -{ - mca_bcol_ptpcoll_module_t *ptpcoll_module = (mca_bcol_ptpcoll_module_t *) const_args->bcol_module; - void *data_buffer = (void *) ( (unsigned char *) input_args->sbuf + - (size_t) input_args->sbuf_offset); - struct ompi_datatype_t *data_type = input_args->dtype; - uint32_t buffer_index = input_args->buffer_index; - struct ompi_op_t *op = input_args->op; - int count = input_args->count; - int *active_requests = - &(ptpcoll_module->ml_mem.ml_buf_desc[buffer_index].active_requests); - int tag = ptpcoll_module->ml_mem.ml_buf_desc[buffer_index].tag - 1; - int *group_list = ptpcoll_module->super.sbgp_partner_module->group_list; - int k, rc, peer, group_peer; - int offset = 0; - ompi_communicator_t *comm = ptpcoll_module->super.sbgp_partner_module->group_comm; - ompi_request_t **requests = - ptpcoll_module->ml_mem.ml_buf_desc[buffer_index].requests; - - netpatterns_k_exchange_node_t *k_node = &ptpcoll_module->knomial_exchange_tree; - int k_radix = k_node->tree_order; - - size_t data_size = mca_bcol_base_get_buff_length(data_type, count); - int *iteration = - &(ptpcoll_module->ml_mem.ml_buf_desc[buffer_index].iteration); - - /* if we are just staring the collective and there are extra sources then schedule the - * extra node exchange. otherwise check if the exchange is complete. */ - if (-1 == *iteration) { - if (0 < k_node->n_extra_sources) { - if (!(*active_requests)) { - rc = bcol_ptpcoll_allreduce_narray_schedule_extra_node_exchange (ptpcoll_module, k_node, data_buffer, data_size, - requests, active_requests, tag); - if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) { - return rc; - } - } - - /* check for extra node exchange completion */ - if (!mca_bcol_ptpcoll_test_all_for_match (active_requests, requests, &rc)) { - return (OMPI_SUCCESS == rc) ? BCOL_FN_STARTED : rc; - } - - if (EXCHANGE_NODE == k_node->node_type) { - bcol_ptpcoll_allreduce_narray_reduce (data_buffer, data_type, count, op, k_node->n_extra_sources); - } - } - - /* start recursive k-ing */ - *iteration = 0; - } - - if (*iteration < k_node->n_exchanges) { - if (*active_requests) { - if (!mca_bcol_ptpcoll_test_all_for_match(active_requests, requests, &rc)) { - return (OMPI_SUCCESS == rc) ? BCOL_FN_STARTED : rc; - } - - ++(*iteration); - bcol_ptpcoll_allreduce_narray_reduce (data_buffer, data_type, count, op, k_radix - 1); - } - } - - for ( ; *iteration < k_node->n_exchanges ; ++(*iteration)) { - for (k = 0; k < k_radix - 1; k++) { - group_peer = k_node->rank_exchanges[*iteration][k]; - - peer = group_list[group_peer]; - - PTPCOLL_VERBOSE(10, ("Send data to %d, addr %p len %d tag %d", - peer, data_buffer, data_size, tag)); - rc = MCA_PML_CALL(isend(data_buffer, data_size, MPI_BYTE, peer, tag, - MCA_PML_BASE_SEND_STANDARD, comm, - &(requests[*active_requests]))); - if( OMPI_SUCCESS != rc ) { - PTPCOLL_VERBOSE(10, ("Failed to send data")); - return OMPI_ERROR; - } - - ++(*active_requests); - } - - for (k = 0, offset = data_size ; k < k_radix - 1 ; ++k, offset += data_size) { - group_peer = k_node->rank_exchanges[*iteration][k]; - peer = group_list[group_peer]; - - PTPCOLL_VERBOSE(10, ("Recv data from %d, addr %p len %d tag %d", - peer, data_buffer, data_size, tag)); - rc = MCA_PML_CALL(irecv((void *)((unsigned char *)data_buffer + offset ), - data_size, MPI_BYTE, peer, tag, comm, - &requests[*active_requests])); - if( OMPI_SUCCESS != rc ) { - PTPCOLL_VERBOSE(10, ("Failed to receive data")); - return OMPI_ERROR; - } - - ++(*active_requests); - } - - if (!mca_bcol_ptpcoll_test_all_for_match(active_requests, requests, &rc)) { - return (OMPI_SUCCESS == rc) ? BCOL_FN_STARTED : rc; - } - - bcol_ptpcoll_allreduce_narray_reduce (data_buffer, data_type, count, op, k_radix - 1); - } - - /* ensure extra nodes get the result */ - if (0 < k_node->n_extra_sources) { - if (!(*active_requests)) { - int peer_comm_rank; - - if (EXTRA_NODE == k_node->node_type) { - peer_comm_rank = ptpcoll_module->super.sbgp_partner_module->group_list[k_node->rank_extra_sources_array[0]]; - - PTPCOLL_VERBOSE(10, ("EXTRA_NODE: Recv data from %d, addr %p len %d tag %d", - peer_comm_rank, data_buffer, data_size, tag)); - rc = MCA_PML_CALL(irecv(data_buffer, data_size, MPI_BYTE, peer_comm_rank, - tag, comm, &requests[*active_requests])); - if( OMPI_SUCCESS != rc ) { - PTPCOLL_VERBOSE(10, ("Failed to receive data")); - return OMPI_ERROR; - } - - ++(*active_requests); - } else { - for (k = 0; k < k_node->n_extra_sources; k++) { - peer_comm_rank = ptpcoll_module->super.sbgp_partner_module->group_list[k_node->rank_extra_sources_array[k]]; - - PTPCOLL_VERBOSE(10, ("EXCHANGE_NODE: Send data to %d, addr %p len %d tag %d", - peer_comm_rank, data_buffer, data_size, tag)); - rc = MCA_PML_CALL(isend(data_buffer, data_size, MPI_BYTE, peer_comm_rank, - tag, MCA_PML_BASE_SEND_STANDARD, comm, - &(requests[*active_requests]))); - - if( OMPI_SUCCESS != rc ) { - PTPCOLL_VERBOSE(10, ("Failed to send data")); - return OMPI_ERROR; - } - - ++(*active_requests); - } - } - } - - if (!mca_bcol_ptpcoll_test_all_for_match(active_requests, requests, &rc)) { - return (OMPI_SUCCESS == rc) ? BCOL_FN_STARTED : rc; - } - } - - return BCOL_FN_COMPLETE; -} - -int bcol_ptpcoll_allreduce_narraying_init(bcol_function_args_t *input_args, - struct mca_bcol_base_function_t *const_args){ - - mca_bcol_ptpcoll_module_t *ptpcoll_module = (mca_bcol_ptpcoll_module_t *)const_args->bcol_module; - uint64_t sequence_number = input_args->sequence_num; - uint32_t buffer_index = input_args->buffer_index; - int count = input_args->count; - struct ompi_datatype_t *dtype = input_args->dtype; - size_t buffer_size; - int tag; - - tag = (PTPCOLL_TAG_OFFSET + sequence_number * PTPCOLL_TAG_FACTOR) & (ptpcoll_module->tag_mask); - ptpcoll_module->ml_mem.ml_buf_desc[buffer_index].tag = tag = -tag; - ptpcoll_module->ml_mem.ml_buf_desc[buffer_index].radix_mask = 1; - - /* start with extra node exchange if needed */ - ptpcoll_module->ml_mem.ml_buf_desc[buffer_index].iteration = -1; - ptpcoll_module->ml_mem.ml_buf_desc[buffer_index].active_requests = 0; - ptpcoll_module->ml_mem.ml_buf_desc[buffer_index].status = PTPCOLL_NOT_STARTED; - - /* - * ML bufer is segmented into k segments and each of the k segment is used - * for reductions - */ - /* This has to be based on ml buffer size. Need to take into account the space used - * by the headers of other bcol modules. */ - buffer_size = ptpcoll_module->ml_mem.size_buffer - BCOL_HEADER_MAX; - assert(buffer_size >= count * dtype->super.size * - ptpcoll_module->k_nomial_radix); - - return bcol_ptpcoll_allreduce_narraying_progress (input_args, const_args); -} - -static inline int compute_seg_index(int peer, int kpow_num, int tree_order) { - - int peer_base, peer_position, peer_base_rank, peer_index; - - peer_base = peer / (kpow_num * tree_order); - peer_base_rank = peer_base * kpow_num * tree_order ; - peer_position = peer_base_rank == 0 ? peer : peer % (peer_base_rank); - peer_index = peer_position / kpow_num ; - - return peer_index; -} - -int compute_knomial_allgather_offsets(int group_index, int count, struct - ompi_datatype_t *dtype,int k_radix,int n_exchanges, - int **offsets){ - - int modulo_group_size; - size_t seg_count, seg_size, seg_index, seg_offset; - size_t block_offset, block_count; - int exchange_step; - ptrdiff_t lb, extent; - - if (0 >= n_exchanges) { - PTPCOLL_VERBOSE(10,("Nothing to initialize ")); - return 0; - } - modulo_group_size = 1; - seg_count = count / k_radix; - ompi_datatype_get_extent(dtype, &lb, &extent); - seg_size = seg_count * extent; - - seg_index = group_index % k_radix; - seg_offset = seg_index * seg_size; - - offsets[0][BLOCK_OFFSET] = block_offset = 0; - offsets[0][BLOCK_COUNT] = block_count = count; - offsets[0][LOCAL_REDUCE_SEG_OFFSET] = seg_offset; - offsets[0][SEG_SIZE] = seg_size; - - - for(exchange_step = 1; exchange_step < n_exchanges; exchange_step++) { - - /* Previous step's segment is this exchange step's block */ - block_count = seg_count; - block_offset = seg_offset; - - /* Divide the segment into k parts */ - seg_count = seg_count / k_radix; - seg_size = seg_count * extent; - - /* Among different segments in block, which segment should I reduce ? */ - /* For allgather phase, I will not send out this segment to peers */ - modulo_group_size *= k_radix; - seg_index = compute_seg_index(group_index, modulo_group_size, k_radix); - seg_offset = seg_index * seg_size; - - - offsets[exchange_step][BLOCK_OFFSET] = block_offset; - offsets[exchange_step][LOCAL_REDUCE_SEG_OFFSET] = seg_offset; - offsets[exchange_step][BLOCK_COUNT] = block_count; - offsets[exchange_step][SEG_SIZE] = seg_size; - - /* Change to absolute offset */ - seg_offset = block_offset + seg_offset; - - } - - return 0; -} - -static inline int compute_send_segment_size(int block_offset, - int send_offset, - int segment_size, - int padded_offset) { - int send_size = -1; - /* segment to be sent starts here */ - int segment_offset = block_offset + send_offset ; - send_size = (segment_offset + segment_size) >= padded_offset ? - segment_size - (segment_offset + segment_size - padded_offset) : segment_size; - return send_size; -} - -static inline int compute_recv_segment_size(int block_offset, - int recv_offset, - int segment_size, - int padded_offset) { - int recv_size = -1; - /* segment to be sent starts here */ - int segment_offset = block_offset + recv_offset ; - recv_size = (segment_offset + segment_size) >= padded_offset ? - segment_size - (segment_offset + segment_size - padded_offset) : segment_size; - - return recv_size; -} - -/* - * - * K-nomial Reduce Scatter - * Example k=3 n=9 - * - * | ABCDEFGH |0| - * - * Number of Exchange steps = log (basek) n - * Number of steps in exchange step = k (radix) - * - * block_size = Size of data that is reduce in exchange step - * segment_size = Size of data that is send or received by rank in radix step - * - * block_size = segment_size * k - * - * my_block_start_addr = Address of the segment in the block where I reference my - * offsets - * - * This is version 1 : Experimenting with decoupling offset calcuations - */ -int bcol_ptpcoll_allreduce_recursivek_scatter_reduce(mca_bcol_ptpcoll_module_t *ptpcoll_module, - const int buffer_index, void *sbuf, - void *rbuf, - struct ompi_op_t *op, - const int count, struct ompi_datatype_t *dtype, - const int relative_group_index, - const int padded_start_byte){ - int blocks_in_step = - ptpcoll_module->ml_mem.ml_buf_desc[buffer_index].radix_mask; - int tag = ptpcoll_module->ml_mem.ml_buf_desc[buffer_index].tag - 1; - int *group_list = ptpcoll_module->super.sbgp_partner_module->group_list; - netpatterns_k_exchange_node_t *k_node = &ptpcoll_module->knomial_exchange_tree; - mca_bcol_ptpcoll_component_t *cm = - &mca_bcol_ptpcoll_component; - void *my_block_start_addr = NULL, *my_block_addr = NULL; - int i, k, group_peer, peer ; - int k_radix = k_node->tree_order; - int rc = OMPI_SUCCESS; - ompi_communicator_t* comm = ptpcoll_module->super.sbgp_partner_module->group_comm; - ompi_request_t **requests = - ptpcoll_module->ml_mem.ml_buf_desc[buffer_index].requests; - int *active_requests = - &(ptpcoll_module->ml_mem.ml_buf_desc[buffer_index].active_requests); - int completed; - void *my_recv_start_addr, *my_recv_addr; - size_t block_offset, reduce_seg_offset, send_offset, recv_offset; - int seg_size, block_size; - int block_count, seg_count; - ptrdiff_t lb, extent; - ompi_datatype_get_extent(dtype, &lb, &extent); - - my_recv_start_addr = rbuf; - my_block_start_addr = sbuf; - block_count = count; - block_size = count * extent; - - - for (i = ptpcoll_module->ml_mem.ml_buf_desc[buffer_index].iteration; - i < k_node->n_exchanges; i++, blocks_in_step *= cm->narray_knomial_radix) { - - block_offset = ptpcoll_module->allgather_offsets[i][BLOCK_OFFSET]; - reduce_seg_offset = ptpcoll_module->allgather_offsets[i][LOCAL_REDUCE_SEG_OFFSET]; - block_count = ptpcoll_module->allgather_offsets[i][BLOCK_COUNT]; - seg_size = ptpcoll_module->allgather_offsets[i][SEG_SIZE]; - block_size = block_count * extent; - - PTPCOLL_VERBOSE(10,("Block offset %d, reduce_seg_offset %d, block_count %d seg_size %d", - block_offset, reduce_seg_offset, block_count, seg_size)); - - seg_count = block_count / k_radix; - my_block_addr = (void*)((char*)my_block_start_addr + block_offset); - my_recv_addr = (void*)((char*)my_recv_start_addr + block_offset); - - for (k = 0; k < k_radix - 1; k++) { - size_t soffset; - int snd_size = 0; - - group_peer = k_node->rank_exchanges[i][k]; - peer = group_list[group_peer]; - - send_offset = reduce_seg_offset + (seg_size * (k + 1)); - - if ((int)send_offset + seg_size > block_size) { - send_offset = send_offset % block_size; - } - - PTPCOLL_VERBOSE(10, ("Send data to %d,send offset %d len %d", - peer, send_offset, seg_size)); - - soffset = send_offset; - snd_size = - compute_send_segment_size((int)block_offset,(int)soffset,(int)seg_size,padded_start_byte); - - if (snd_size > 0) { - rc = MCA_PML_CALL(isend((void *)((unsigned char *)my_block_addr - + soffset), - snd_size, MPI_BYTE, - peer, tag, MCA_PML_BASE_SEND_STANDARD, comm, - &(requests[*active_requests]))); - - if( OMPI_SUCCESS != rc ) { - PTPCOLL_VERBOSE(10, ("Failed to send the segment to %d", peer)); - return OMPI_ERROR; - } - ++(*active_requests); - } - - } - - /* - * Receive the segments to tmp addr and then do a reduction - */ - for (k = 0; k < k_radix - 1; k++) { - int recv_size=0; - - group_peer = k_node->rank_exchanges[i][k]; - peer = group_list[group_peer]; - - recv_offset = reduce_seg_offset + (seg_size * (k+1)); - - if ((int)recv_offset + seg_size > block_size) { - recv_offset = recv_offset % block_size; - } - - PTPCOLL_VERBOSE(10, ("Receive data to receive buffer at offset %d\n", - recv_offset)); - recv_size = compute_recv_segment_size((int)block_offset, - (int)reduce_seg_offset, (int)seg_size, - padded_start_byte); - - if (recv_size > 0 ) { - rc = MCA_PML_CALL(irecv((void *)((unsigned char *) - my_recv_addr + recv_offset), - recv_size, MPI_BYTE, - peer, tag, comm, &requests[*active_requests])); - if( OMPI_SUCCESS != rc ) { - PTPCOLL_VERBOSE(10, ("Failed to receive the segment from %d", peer)); - return OMPI_ERROR; - } - ++(*active_requests); - } - - } - - completed = 0; - while(!completed){ - completed = mca_bcol_ptpcoll_test_all_for_match(active_requests, requests, &rc); - } - - /* Do a reduction on received buffers */ - { - void *src_data_buffer = NULL, *dst_data_buffer = NULL; - int reduce_data_count = 0; - - src_data_buffer = my_block_addr; - dst_data_buffer = my_recv_addr; - - for (k = 0; k < k_radix - 1; k++) { - recv_offset = reduce_seg_offset + (seg_size * (k+1)); - - if ((int)recv_offset + seg_size > block_size) { - recv_offset = recv_offset % block_size; - } - - reduce_data_count = (int)(block_offset + reduce_seg_offset) + seg_size >= padded_start_byte ? - (seg_size - (((int)(block_offset + reduce_seg_offset) + seg_size) - padded_start_byte))/(int)dtype->super.size - : (int)seg_count; - - if (reduce_data_count > 0) { - ompi_3buff_op_reduce(op, - (void*)((unsigned char*)my_recv_addr + recv_offset), - (void*)((unsigned char*)src_data_buffer + - reduce_seg_offset), - (void*)((unsigned char*)dst_data_buffer + - reduce_seg_offset), - reduce_data_count,dtype); - } - - src_data_buffer = dst_data_buffer; - - } - } - - /* After first iteration we have data (to work with) in recv buffer */ - my_block_start_addr = rbuf; - - } - - return rc; -} - - -int bcol_ptpcoll_allreduce_knomial_allgather(mca_bcol_ptpcoll_module_t *ptpcoll_module, - const int buffer_index, - void *sbuf,void *rbuf, int count, struct - ompi_datatype_t *dtype, - const int relative_group_index, - const int padded_start_byte){ - - size_t block_offset = 0, send_offset = 0, recv_offset = 0; - int seg_size=0, block_size=0; - int i,k,completed; - void *my_block_start_addr = rbuf, *my_block_addr; - size_t block_count = count; - netpatterns_k_exchange_node_t *k_node = &ptpcoll_module->knomial_exchange_tree; - int k_radix = k_node->tree_order; - int peer, group_peer; - int rc = OMPI_SUCCESS; - int tag = ptpcoll_module->ml_mem.ml_buf_desc[buffer_index].tag - 1; - int *active_requests = - &(ptpcoll_module->ml_mem.ml_buf_desc[buffer_index].active_requests); - ompi_request_t **requests = - ptpcoll_module->ml_mem.ml_buf_desc[buffer_index].requests; - ompi_communicator_t* comm = ptpcoll_module->super.sbgp_partner_module->group_comm; - int exchange_step; - int *group_list = ptpcoll_module->super.sbgp_partner_module->group_list; - ptrdiff_t lb, extent; - ompi_datatype_get_extent(dtype, &lb, &extent); - - - for (i = ptpcoll_module->ml_mem.ml_buf_desc[buffer_index].iteration; - i < k_node->n_exchanges; i++) { - - exchange_step = k_node->n_exchanges - 1 - i; - - block_offset = ptpcoll_module->allgather_offsets[exchange_step][BLOCK_OFFSET]; - send_offset = ptpcoll_module->allgather_offsets[exchange_step][LOCAL_REDUCE_SEG_OFFSET]; - block_count = ptpcoll_module->allgather_offsets[exchange_step][BLOCK_COUNT]; - seg_size = ptpcoll_module->allgather_offsets[exchange_step][SEG_SIZE]; - block_size = block_count * extent; - - - PTPCOLL_VERBOSE(10, ("Send offset %d block_offset %d seg_size %\n", - send_offset, block_offset, seg_size)); - - my_block_addr = (void*)((unsigned char*)my_block_start_addr + block_offset); - - for (k = 0; k < k_radix - 1; k++) { - size_t soffset=0; int snd_size = 0; - group_peer = k_node->rank_exchanges[exchange_step][k]; - peer = group_list[group_peer]; - - soffset = send_offset; - snd_size = compute_send_segment_size((int)block_offset, - (int)soffset, - (int)seg_size, - padded_start_byte); - if (snd_size > 0) { - rc = MCA_PML_CALL(isend((void *)((unsigned char *)my_block_addr - + soffset), - snd_size, MPI_BYTE, - peer, tag, MCA_PML_BASE_SEND_STANDARD, comm, - &(requests[*active_requests]))); - - if( OMPI_SUCCESS != rc ) { - PTPCOLL_VERBOSE(10, ("Failed to send the segment to %d", peer)); - return OMPI_ERROR; - } - - ++(*active_requests); - } - - PTPCOLL_VERBOSE(10, ("Send data to receive buffer at offset %d to %d\n", - send_offset, peer)); - } - - for (k = 0; k < k_radix - 1; k++) { - int recv_size=0; - - group_peer = k_node->rank_exchanges[exchange_step][k]; - peer = group_list[group_peer]; - - recv_offset = send_offset + (k + 1) * seg_size; - - if ((int)recv_offset + seg_size > block_size){ - recv_offset = recv_offset % block_size; - } - - PTPCOLL_VERBOSE(10, ("Receive data to receive buffer at offset %d from %d\n", - recv_offset, peer)); - - - recv_size = compute_recv_segment_size((int)block_offset, - (int)recv_offset, - (int)seg_size, - padded_start_byte); - if (recv_size > 0) { - rc = MCA_PML_CALL(irecv((void *)((unsigned char *) - my_block_addr + recv_offset), - recv_size, MPI_BYTE, - peer, tag, comm, &requests[*active_requests])); - - if( OMPI_SUCCESS != rc ) { - PTPCOLL_VERBOSE(10, ("Failed to receive the segment from %d", peer)); - return OMPI_ERROR; - } - ++(*active_requests); - } - - } - - completed = 0; - while(!completed){ - completed = mca_bcol_ptpcoll_test_all_for_match(active_requests, requests, &rc); - } - - block_count = block_count * k_radix; - block_size = block_count * extent; - - } - - return rc; - -} - -static inline int compute_padding_count(int count, int k_radix, int n_exchanges){ - bool fpadding = false; - size_t dsize; - int i, pad_count=0, kpow; - - /* is padding required */ - dsize = count; - kpow = 1; - for ( i=0; i < n_exchanges; i++) { - if (dsize % k_radix) { - fpadding = true; - } - dsize /= k_radix; - kpow *= k_radix; - } - - if (fpadding) { - pad_count = count % kpow; - pad_count = kpow - pad_count; - } - - return pad_count; -} - - -int bcol_ptpcoll_allreduce_recursivek_scatter_reduce_allgather_init(bcol_function_args_t *input_args, - struct mca_bcol_base_function_t *const_args){ - - mca_bcol_ptpcoll_module_t *ptpcoll_module = (mca_bcol_ptpcoll_module_t *)const_args->bcol_module; - struct ompi_op_t *op = input_args->op; - int tag; - int my_group_index = ptpcoll_module->super.sbgp_partner_module->my_index; - uint64_t sequence_number = input_args->sequence_num; - uint32_t buffer_index = input_args->buffer_index; - void *src_buffer = (void *) ( - (unsigned char *)input_args->sbuf + - (size_t)input_args->sbuf_offset); - - void *recv_buffer = (void *) ( - (unsigned char *)input_args->rbuf + - (size_t)input_args->rbuf_offset); - - int count = input_args->count; - struct ompi_datatype_t *dtype = input_args->dtype; - int *iteration = - &(ptpcoll_module->ml_mem.ml_buf_desc[buffer_index].iteration); - int *active_requests = - &(ptpcoll_module->ml_mem.ml_buf_desc[buffer_index].active_requests); - int *status = - &(ptpcoll_module->ml_mem.ml_buf_desc[buffer_index].status); - ptrdiff_t lb, extent; - - /* Get the knomial tree */ - netpatterns_k_exchange_node_t *k_node = &ptpcoll_module->knomial_exchange_tree; - int k_radix = k_node->tree_order; - int n_exchanges = k_node->n_exchanges; - int padded_start_byte; - int padding_count = compute_padding_count(count, k_radix, n_exchanges); - - ompi_datatype_get_extent(dtype, &lb, &extent); - padded_start_byte = count * extent; - - - /* Init for making the functions Re-entrant */ - tag = (PTPCOLL_TAG_OFFSET + sequence_number * PTPCOLL_TAG_FACTOR) & (ptpcoll_module->tag_mask); - ptpcoll_module->ml_mem.ml_buf_desc[buffer_index].tag = tag = -tag; - ptpcoll_module->ml_mem.ml_buf_desc[buffer_index].radix_mask = 1; - *active_requests = 0; - *iteration = -1; - *status = PTPCOLL_NOT_STARTED; - *iteration = 0; - - compute_knomial_allgather_offsets(my_group_index,count + padding_count, dtype,k_radix,n_exchanges, - ptpcoll_module->allgather_offsets); - - /* Perform a recursive k'ing reduce scatter */ - bcol_ptpcoll_allreduce_recursivek_scatter_reduce(ptpcoll_module, buffer_index, - src_buffer, recv_buffer, op, count + padding_count, dtype, - my_group_index,padded_start_byte); - - - /* Perform a recursive k'ing allgather */ - bcol_ptpcoll_allreduce_knomial_allgather(ptpcoll_module, - buffer_index, - src_buffer, recv_buffer, count + padding_count, dtype, - my_group_index, padded_start_byte); - - return BCOL_FN_COMPLETE; -} - -int bcol_ptpcoll_allreduce_recursivek_scatter_reduce_extra(mca_bcol_ptpcoll_module_t *ptpcoll_module, - int buffer_index, - void *sbuf, - void *rbuf, - struct ompi_op_t *op, - const int count, struct ompi_datatype_t *dtype){ - int tag = ptpcoll_module->ml_mem.ml_buf_desc[buffer_index].tag - 1; - netpatterns_k_exchange_node_t *k_node = &ptpcoll_module->knomial_exchange_tree; - int k, peer ; - int rc = OMPI_SUCCESS; - ompi_communicator_t* comm = ptpcoll_module->super.sbgp_partner_module->group_comm; - int block_count, block_size; - char *tmprecv_buffer = NULL, *data_src_buffer, *data_dst_buffer; - ptrdiff_t lb, extent; - ompi_datatype_get_extent(dtype, &lb, &extent); - - block_count = count; - block_size = count * extent; - - - if (0 < block_size) { - tmprecv_buffer = (void*)malloc(block_size); - } - - data_src_buffer = sbuf; - data_dst_buffer = rbuf; - - if (EXCHANGE_NODE == k_node->node_type) { - for (k = 0; k < k_node->n_extra_sources; k++){ - - peer = ptpcoll_module->super.sbgp_partner_module->group_list[ - k_node->rank_extra_sources_array[k]]; - - rc = MCA_PML_CALL(recv((void *)((unsigned char *)tmprecv_buffer), - block_size, MPI_BYTE, - peer, tag, comm, MPI_STATUS_IGNORE)); - - if( OMPI_SUCCESS != rc ) { - PTPCOLL_VERBOSE(10, ("Failed to receive the segment from %d", peer)); - rc = OMPI_ERROR; - goto clean; - } - - ompi_3buff_op_reduce(op, (void*)((unsigned char*)data_src_buffer), - (void*)((unsigned char*)tmprecv_buffer), - (void*)((unsigned char*)data_dst_buffer), - block_count,dtype); - data_src_buffer = data_dst_buffer; - } - } else { - peer = ptpcoll_module->super.sbgp_partner_module->group_list[ - k_node->rank_extra_sources_array[0]]; - - rc = MCA_PML_CALL(send((void *)((unsigned char *)sbuf), - block_size, MPI_BYTE, - peer, tag, MCA_PML_BASE_SEND_STANDARD, comm)); - - if( OMPI_SUCCESS != rc ) { - PTPCOLL_VERBOSE(10, ("Failed to send data")); - rc = OMPI_ERROR; - goto clean; - } - } - -clean: - if (tmprecv_buffer) { - free(tmprecv_buffer); - } - return rc; -} - -int bcol_ptpcoll_allreduce_knomial_allgather_extra(mca_bcol_ptpcoll_module_t *ptpcoll_module, - int buffer_index, - void *sbuf, - void *rbuf, - const int count, struct ompi_datatype_t *dtype){ - int tag = ptpcoll_module->ml_mem.ml_buf_desc[buffer_index].tag - 1; - netpatterns_k_exchange_node_t *k_node = &ptpcoll_module->knomial_exchange_tree; - int k, peer ; - int rc = OMPI_SUCCESS; - ompi_communicator_t* comm = ptpcoll_module->super.sbgp_partner_module->group_comm; - int block_size, completed; - ompi_request_t **requests = - ptpcoll_module->ml_mem.ml_buf_desc[buffer_index].requests; - int *active_requests = - &(ptpcoll_module->ml_mem.ml_buf_desc[buffer_index].active_requests); - ptrdiff_t lb, extent; - ompi_datatype_get_extent(dtype, &lb, &extent); - - - block_size = count * extent; - - if (EXTRA_NODE == k_node->node_type) { - peer = ptpcoll_module->super.sbgp_partner_module->group_list[ - k_node->rank_extra_sources_array[0]]; - - rc = MCA_PML_CALL(irecv((void *)((unsigned char *)rbuf), - block_size, MPI_BYTE, - peer, tag, comm, &requests[*active_requests])); - if( OMPI_SUCCESS != rc ) { - PTPCOLL_VERBOSE(10, ("Failed to receive data")); - return OMPI_ERROR; - } - - ++(*active_requests); - } else { - for (k = 0; k < k_node->n_extra_sources; k++) { - peer = ptpcoll_module->super.sbgp_partner_module->group_list[ - k_node->rank_extra_sources_array[k]]; - - rc = MCA_PML_CALL(isend((void *)((unsigned char *)rbuf), - block_size, MPI_BYTE, - peer, tag, MCA_PML_BASE_SEND_STANDARD, comm, - &(requests[*active_requests]))); - - if( OMPI_SUCCESS != rc ) { - PTPCOLL_VERBOSE(10, ("Failed to send data")); - return OMPI_ERROR; - } - - ++(*active_requests); - } - - } - - completed = 0; - - while(!completed){ - completed = mca_bcol_ptpcoll_test_all_for_match(active_requests, requests, &rc); - } - - return rc; -} - -int bcol_ptpcoll_allreduce_recursivek_scatter_reduce_allgather_extra_init(bcol_function_args_t *input_args, - struct mca_bcol_base_function_t *const_args){ - - mca_bcol_ptpcoll_module_t *ptpcoll_module = (mca_bcol_ptpcoll_module_t *)const_args->bcol_module; - struct ompi_op_t *op = input_args->op; - int tag; - int my_group_index = ptpcoll_module->super.sbgp_partner_module->my_index; - uint64_t sequence_number = input_args->sequence_num; - uint32_t buffer_index = input_args->buffer_index; - void *src_buffer = (void *) ( - (unsigned char *)input_args->sbuf + - (size_t)input_args->sbuf_offset); - - void *recv_buffer = (void *) ( - (unsigned char *)input_args->rbuf + - (size_t)input_args->rbuf_offset); - - int count = input_args->count; - struct ompi_datatype_t *dtype = input_args->dtype; - int *iteration = - &(ptpcoll_module->ml_mem.ml_buf_desc[buffer_index].iteration); - int *active_requests = - &(ptpcoll_module->ml_mem.ml_buf_desc[buffer_index].active_requests); - int *status = - &(ptpcoll_module->ml_mem.ml_buf_desc[buffer_index].status); - ptrdiff_t lb, extent; - /* Get the knomial tree */ - netpatterns_k_exchange_node_t *k_node = &ptpcoll_module->knomial_exchange_tree; - int k_radix = k_node->tree_order; - int n_exchanges = k_node->n_exchanges; - int padded_start_byte; - int padding_count = compute_padding_count(count, k_radix, n_exchanges); - void *tmpsrc_buffer = NULL; - - ompi_datatype_get_extent(dtype, &lb, &extent); - padded_start_byte = count * extent; - - /* Init for making the functions Re-entrant */ - tag = (PTPCOLL_TAG_OFFSET + sequence_number * PTPCOLL_TAG_FACTOR) & (ptpcoll_module->tag_mask); - ptpcoll_module->ml_mem.ml_buf_desc[buffer_index].tag = tag = -tag; - ptpcoll_module->ml_mem.ml_buf_desc[buffer_index].radix_mask = 1; - *active_requests = 0; - *iteration = -1; - *status = PTPCOLL_NOT_STARTED; - *iteration = 0; - - compute_knomial_allgather_offsets(my_group_index,count + padding_count, dtype,k_radix,n_exchanges, - ptpcoll_module->allgather_offsets); - - if (EXCHANGE_NODE == k_node->node_type) { - bcol_ptpcoll_allreduce_recursivek_scatter_reduce_extra(ptpcoll_module, - buffer_index, - src_buffer, recv_buffer, op, count, dtype); - tmpsrc_buffer = src_buffer; - if ( k_node->n_extra_sources > 0){ - tmpsrc_buffer = recv_buffer; - } - bcol_ptpcoll_allreduce_recursivek_scatter_reduce(ptpcoll_module, buffer_index, - tmpsrc_buffer, recv_buffer, op, count + padding_count, dtype, - my_group_index,padded_start_byte); - bcol_ptpcoll_allreduce_knomial_allgather(ptpcoll_module, - buffer_index, - src_buffer, recv_buffer, count + padding_count, dtype, - my_group_index, padded_start_byte); - bcol_ptpcoll_allreduce_knomial_allgather_extra(ptpcoll_module, - buffer_index, - src_buffer, recv_buffer, count, dtype); - - } - else if (EXTRA_NODE == k_node->node_type) { - bcol_ptpcoll_allreduce_recursivek_scatter_reduce_extra(ptpcoll_module, - buffer_index, - src_buffer, recv_buffer, op, count, dtype); - bcol_ptpcoll_allreduce_knomial_allgather_extra(ptpcoll_module, - buffer_index, - src_buffer, recv_buffer, count, dtype); - } - - return BCOL_FN_COMPLETE; -} - - - -/* - * Register allreduce functions to the BCOL function table, - * so they can be selected - */ -int bcol_ptpcoll_allreduce_init(mca_bcol_base_module_t *super) -{ - mca_bcol_ptpcoll_module_t *ptpcoll_module = - (mca_bcol_ptpcoll_module_t *) super; - - mca_bcol_base_coll_fn_comm_attributes_t comm_attribs; - mca_bcol_base_coll_fn_invoke_attributes_t inv_attribs; - - comm_attribs.bcoll_type = BCOL_ALLREDUCE; - comm_attribs.comm_size_min = 0; - comm_attribs.comm_size_max = 1024 * 1024; - - /* not an accurate attribute, none of these algorithms - * are non-blocking - */ - comm_attribs.waiting_semantics = NON_BLOCKING; - - inv_attribs.bcol_msg_min = 0; - inv_attribs.bcol_msg_max = 20000; /* range 1 */ - - inv_attribs.datatype_bitmap = 0xffffffff; - inv_attribs.op_types_bitmap = 0xffffffff; - - comm_attribs.data_src = DATA_SRC_KNOWN; - - mca_bcol_base_set_attributes(super, &comm_attribs, &inv_attribs, - bcol_ptpcoll_allreduce_narraying_init, - bcol_ptpcoll_allreduce_narraying_progress); - - inv_attribs.bcol_msg_min = 10000000; - inv_attribs.bcol_msg_max = 10485760; /* range 4 */ - - if (ptpcoll_module->pow_knum == ptpcoll_module->group_size) { - mca_bcol_base_set_attributes(super, &comm_attribs, &inv_attribs, - bcol_ptpcoll_allreduce_recursivek_scatter_reduce_allgather_init, - NULL); - - } else { - - mca_bcol_base_set_attributes(super, &comm_attribs, &inv_attribs, - bcol_ptpcoll_allreduce_recursivek_scatter_reduce_allgather_extra_init, - NULL); - - } - - return OMPI_SUCCESS; -} diff --git a/ompi/mca/bcol/ptpcoll/bcol_ptpcoll_allreduce.h b/ompi/mca/bcol/ptpcoll/bcol_ptpcoll_allreduce.h deleted file mode 100644 index 144e256761e..00000000000 --- a/ompi/mca/bcol/ptpcoll/bcol_ptpcoll_allreduce.h +++ /dev/null @@ -1,95 +0,0 @@ -/* - * Copyright (c) 2009-2012 Oak Ridge National Laboratory. All rights reserved. - * Copyright (c) 2009-2012 Mellanox Technologies. All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#ifndef MCA_BCOL_PTPCOLL_ALLREDUCE_H -#define MCA_BCOL_PTPCOLL_ALLREDUCE_H - -#include "ompi_config.h" -#include "ompi/op/op.h" -#include "ompi/datatype/ompi_datatype.h" -#include "bcol_ptpcoll.h" -#include "bcol_ptpcoll_utils.h" - -enum { - BLOCK_OFFSET = 0, - LOCAL_REDUCE_SEG_OFFSET, - BLOCK_COUNT, - SEG_SIZE, - NOFFSETS -}; - -BEGIN_C_DECLS -int bcol_ptpcoll_allreduce_narraying(mca_bcol_ptpcoll_module_t *ptpcoll_module, - const int buffer_index, void *data_buffer, - struct ompi_op_t *op, - const int count, struct ompi_datatype_t *dtype, const int - buffer_size, const int relative_group_index); - - -int bcol_ptpcoll_allreduce_narraying_init(bcol_function_args_t *input_args, - struct mca_bcol_base_function_t *const_args); - -int bcol_ptpcoll_allreduce_recursivek_scatter_reduce(mca_bcol_ptpcoll_module_t *ptpcoll_module, - const int buffer_index, void *sbuf, - void *rbuf, - struct ompi_op_t *op, - const int count, struct ompi_datatype_t *dtype, - const int relative_group_index, - const int padded_start_byte); - -int bcol_ptpcoll_allreduce_knomial_allgather(mca_bcol_ptpcoll_module_t *ptpcoll_module, - const int buffer_index, - void *sbuf,void *rbuf, int count, struct - ompi_datatype_t *dtype, - const int relative_group_index, - const int padded_start_byte); - -int bcol_ptpcoll_allreduce_recursivek_scatter_reduce_allgather_init(bcol_function_args_t *input_args, - struct mca_bcol_base_function_t *const_args); - - -int compute_knomial_allgather_offsets(int group_index, int count, struct - ompi_datatype_t *dtype,int k_radix,int n_exchanges, - int **offsets); - - -int bcol_ptpcoll_allreduce_recursivek_scatter_reduce_extra(mca_bcol_ptpcoll_module_t *ptpcoll_module, - int buffer_index, - void *sbuf, - void *rbuf, - struct ompi_op_t *op, - const int count, struct ompi_datatype_t *dtype); - -int bcol_ptpcoll_allreduce_knomial_allgather_extra(mca_bcol_ptpcoll_module_t *ptpcoll_module, - int buffer_index, - void *sbuf, - void *rbuf, - const int count, struct ompi_datatype_t *dtype); - -int bcol_ptpcoll_allreduce_recursivek_scatter_reduce_allgather_extra_init(bcol_function_args_t *input_args, - struct mca_bcol_base_function_t *const_args); - -int bcol_ptpcoll_allreduce_init(mca_bcol_base_module_t *super); - -#if 0 -int knomial_reduce_scatter_offsets(int group_index,int count, struct ompi_datatype_t *dtype, int k_radix, - int n_exchanges, int nth_exchange, size_t *recv_offset, size_t - *block_offset, size_t *block_count, size_t *block_size, size_t - *seg_size); - -int allgather_offsets(int group_index,int count, struct ompi_datatype_t *dtype, int k_radix, - int n_exchanges, int nth_exchange, size_t *send_offset, size_t - *block_offset, size_t *block_count, size_t *block_size, size_t - *seg_size); -#endif - -END_C_DECLS - -#endif diff --git a/ompi/mca/bcol/ptpcoll/bcol_ptpcoll_barrier.c b/ompi/mca/bcol/ptpcoll/bcol_ptpcoll_barrier.c deleted file mode 100644 index 6ad04db6c68..00000000000 --- a/ompi/mca/bcol/ptpcoll/bcol_ptpcoll_barrier.c +++ /dev/null @@ -1,933 +0,0 @@ -/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ -/* - * Copyright (c) 2009-2012 Oak Ridge National Laboratory. All rights reserved. - * Copyright (c) 2009-2012 Mellanox Technologies. All rights reserved. - * Copyright (c) 2013 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2015 Los Alamos National Security, LLC. All rights - * reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#include "ompi_config.h" - -#include "ompi/include/ompi/constants.h" -#include "bcol_ptpcoll.h" -#include "bcol_ptpcoll_utils.h" - -/* - * Fanin routines - no user data - */ - -/********************************************* New Barrier *********************************************/ -/*******************************************************************************************************/ -/*******************************************************************************************************/ - -/*************************************** K-nominal ***************************************/ -/*****************************************************************************************/ -static int bcol_ptpcoll_barrier_recurs_knomial_new( - bcol_function_args_t *input_args, - struct mca_bcol_base_function_t *const_args) -{ - /* local variable */ - uint64_t sequence_number; - mca_bcol_ptpcoll_module_t *ptpcoll_module = - (mca_bcol_ptpcoll_module_t *) const_args->bcol_module; - - netpatterns_k_exchange_node_t *my_exchange_node = - &ptpcoll_module->knomial_exchange_tree; - - int rc, k, pair_comm_rank, exchange, completed, - tree_order = my_exchange_node->tree_order, tag, - n_extra_sources = my_exchange_node->n_extra_sources, - n_exchange = my_exchange_node->n_exchanges, num_reqs; - - ompi_communicator_t *comm = - ptpcoll_module->super.sbgp_partner_module->group_comm; - - int *extra_sources_array = NULL, - **rank_exchanges = my_exchange_node->rank_exchanges; - - ompi_request_t **requests; - opal_free_list_item_t *item; - - mca_bcol_ptpcoll_collreq_t *collreq; - - item = opal_free_list_wait (&ptpcoll_module->collreqs_free); - if (OPAL_UNLIKELY(NULL == item)) { - PTPCOLL_ERROR(("Free list waiting failed.")); - return OMPI_ERR_OUT_OF_RESOURCE; - } - - collreq = (mca_bcol_ptpcoll_collreq_t *) item; - input_args->bcol_opaque_data = (void *) collreq; - - requests = collreq->requests; - - /* TAG Calculation */ - sequence_number = input_args->sequence_num; - - /* Keep tag within the limit supportd by the pml */ - tag = (PTPCOLL_TAG_OFFSET + sequence_number * PTPCOLL_TAG_FACTOR) & (ptpcoll_module->tag_mask); - - /* Mark this as a collective tag, to avoid conflict with user-level flags */ - tag = -tag; - - if (0 < n_extra_sources) { /* EXCHANGE_NODE case */ - collreq->need_toserv_extra = 1; - extra_sources_array = my_exchange_node->rank_extra_sources_array; - - /* I will participate in the exchange (of the algorithm) - - * wait for signal from extra process */ - for (k = 0; k < n_extra_sources; ++k) { - pair_comm_rank = - ptpcoll_module->super.sbgp_partner_module->group_list[extra_sources_array[k]]; - - rc = MCA_PML_CALL(irecv( - NULL, 0, MPI_INT, - pair_comm_rank, tag, - comm, &(requests[k]))); - if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) { - PTPCOLL_ERROR(("IRecv failed.")); - return rc; - } - } - - num_reqs = n_extra_sources; - - /* Test for completion */ - completed = - mca_bcol_ptpcoll_test_all_for_match(&num_reqs, requests, &rc); - if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) { - PTPCOLL_ERROR(("Test for all failed.")); - return rc; - } - - if (!completed) { - collreq->tag = tag; - collreq->num_reqs = num_reqs; - collreq->exchange = 0; - - return BCOL_FN_STARTED; - } - } else { - collreq->need_toserv_extra = 0; - } - - /* loop over exchange send/recv pairs */ - for (exchange = 0; exchange < n_exchange; ++exchange) { - for (k = 0; k < tree_order - 1; ++k) { - /* rank of exchange partner within the group */ - pair_comm_rank = - ptpcoll_module->super.sbgp_partner_module->group_list[rank_exchanges[exchange][k]]; - - assert(2 * ptpcoll_module->k_nomial_radix > (k * 2 + 1)); - - /* send to partner - we will wait for completion, as send - * completion is at the MPI level, and will not - * incur network level completion costs - */ - rc = MCA_PML_CALL(isend( - NULL, 0, MPI_INT, - pair_comm_rank, tag, - MCA_PML_BASE_SEND_STANDARD, - comm, &(requests[k * 2 + 1]))); - if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) { - PTPCOLL_ERROR(("ISend failed.")); - return rc; - } - - PTPCOLL_VERBOSE(10, ("Ex %d, K %d send to %d[%d]", exchange, k, - pair_comm_rank, rank_exchanges[exchange][k])); - - /* recive from partner */ - rc = MCA_PML_CALL(irecv( - NULL, 0, MPI_INT, - pair_comm_rank, tag, - comm, &(requests[k * 2]))); - if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) { - PTPCOLL_ERROR(("IRecv failed.")); - return rc; - } - - PTPCOLL_VERBOSE(10, ("Ex %d, K %d irecv from %d[%d]", exchange, k, - pair_comm_rank, rank_exchanges[exchange][k])); - } - - num_reqs = 2 * (tree_order - 1); - - /* Test for completion */ - completed = - mca_bcol_ptpcoll_test_all_for_match(&num_reqs, requests, &rc); - if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) { - PTPCOLL_ERROR(("Test for all failed.")); - return rc; - } - - if (!completed) { - collreq->tag = tag; - collreq->num_reqs = num_reqs; - collreq->exchange = exchange + 1; - - return BCOL_FN_STARTED; - } - } - - /* If non power of 2, may need to send message to "extra" proc */ - if (0 < n_extra_sources) { /* EXCHANGE_NODE case */ - for (k = 0; k < n_extra_sources; ++k) { - pair_comm_rank = - ptpcoll_module->super.sbgp_partner_module->group_list[extra_sources_array[k]]; - - rc = MCA_PML_CALL(isend( - NULL, 0, MPI_INT, - pair_comm_rank, tag, - MCA_PML_BASE_SEND_STANDARD, - comm, &(requests[k]))); - if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) { - PTPCOLL_ERROR(("ISend failed.")); - return rc; - } - } - - num_reqs = n_extra_sources; - - /* Test for completion */ - completed = - mca_bcol_ptpcoll_test_all_for_match(&num_reqs, requests, &rc); - if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) { - PTPCOLL_ERROR(("Test for all failed.")); - return rc; - } - - if (!completed) { - collreq->tag = tag; - collreq->num_reqs = num_reqs; - - collreq->exchange = n_exchange; - collreq->need_toserv_extra = 0; - - return BCOL_FN_STARTED; - } - } - - opal_free_list_return (&ptpcoll_module->collreqs_free, (opal_free_list_item_t *) collreq); - return BCOL_FN_COMPLETE; -} - -static int bcol_ptpcoll_barrier_recurs_knomial_new_progress( - bcol_function_args_t *input_args, - struct mca_bcol_base_function_t *const_args) -{ - /* local variable */ - mca_bcol_ptpcoll_module_t *ptpcoll_module = - (mca_bcol_ptpcoll_module_t *) const_args->bcol_module; - - netpatterns_k_exchange_node_t *my_exchange_node = - &ptpcoll_module->knomial_exchange_tree; - - int rc, k, tag, pair_comm_rank, exchange, - tree_order = my_exchange_node->tree_order, num_reqs, - n_exchange = my_exchange_node->n_exchanges, completed, - n_extra_sources = my_exchange_node->n_extra_sources; - - ompi_communicator_t *comm = - ptpcoll_module->super.sbgp_partner_module->group_comm; - - int *extra_sources_array, - **rank_exchanges = my_exchange_node->rank_exchanges; - - mca_bcol_ptpcoll_collreq_t *collreq = - (mca_bcol_ptpcoll_collreq_t *) input_args->bcol_opaque_data; - - ompi_request_t **requests = collreq->requests; - - num_reqs = collreq->num_reqs; - - /* Test for completion */ - completed = - mca_bcol_ptpcoll_test_all_for_match(&num_reqs, requests, &rc); - if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) { - PTPCOLL_ERROR(("Test for all failed.")); - return rc; - } - - if (!completed) { - return BCOL_FN_STARTED; - } - - /* Continue loop over exchange send/recv pairs */ - tag = collreq->tag; - - for (exchange = collreq->exchange; exchange < n_exchange; ++exchange) { - for (k = 0; k < tree_order - 1; ++k) { - /* rank of exchange partner within the group */ - pair_comm_rank = - ptpcoll_module->super.sbgp_partner_module->group_list[rank_exchanges[exchange][k]]; - - assert(2 * ptpcoll_module->k_nomial_radix > (k * 2 + 1)); - - /* send to partner - we will wait for completion, as send - * completion is at the MPI level, and will not - * incur network level completion costs - */ - rc = MCA_PML_CALL(isend( - NULL, 0, MPI_INT, - pair_comm_rank, tag, - MCA_PML_BASE_SEND_STANDARD, - comm, &(requests[k * 2 + 1]))); - if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) { - PTPCOLL_ERROR(("ISend failed.")); - return rc; - } - - PTPCOLL_VERBOSE(10, ("Ex %d, K %d send to %d[%d]", exchange, k, - pair_comm_rank, rank_exchanges[exchange][k])); - - /* recive from partner */ - rc = MCA_PML_CALL(irecv( - NULL, 0, MPI_INT, - pair_comm_rank, tag, - comm, &(requests[k * 2]))); - if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) { - PTPCOLL_ERROR(("IRecv failed.")); - return rc; - } - - PTPCOLL_VERBOSE(10, ("Ex %d, K %d irecv from %d[%d]", exchange, k, - pair_comm_rank, rank_exchanges[exchange][k])); - } - - num_reqs = 2 * (tree_order - 1); - - /* Test for completion */ - completed = - mca_bcol_ptpcoll_test_all_for_match(&num_reqs, requests, &rc); - if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) { - PTPCOLL_ERROR(("Test for all failed.")); - return rc; - } - - if (!completed) { - collreq->num_reqs = num_reqs; - collreq->exchange = exchange + 1; - - return BCOL_FN_STARTED; - } - } - - /* If non power of 2, may need to send message to "extra" proc */ - if (collreq->need_toserv_extra) { /* EXCHANGE_NODE case */ - extra_sources_array = my_exchange_node->rank_extra_sources_array; - - for (k = 0; k < n_extra_sources; ++k) { - pair_comm_rank = - ptpcoll_module->super.sbgp_partner_module->group_list[extra_sources_array[k]]; - - rc = MCA_PML_CALL(isend( - NULL, 0, MPI_INT, - pair_comm_rank, tag, - MCA_PML_BASE_SEND_STANDARD, - comm, &(requests[k]))); - if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) { - PTPCOLL_ERROR(("ISend failed.")); - return rc; - } - } - - num_reqs = n_extra_sources; - - /* Test for completion */ - completed = - mca_bcol_ptpcoll_test_all_for_match(&num_reqs, requests, &rc); - if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) { - PTPCOLL_ERROR(("Test for all failed.")); - return rc; - } - - if (!completed) { - collreq->num_reqs = num_reqs; - collreq->exchange = n_exchange; - collreq->need_toserv_extra = 0; - - return BCOL_FN_STARTED; - } - } - - return BCOL_FN_COMPLETE; -} - -/****************************************** Extra node Barrier ******************************************/ - -static int bcol_ptpcoll_barrier_recurs_knomial_extra_new( - bcol_function_args_t *input_args, - struct mca_bcol_base_function_t *const_args) -{ - /* local variable */ - uint64_t sequence_number; - int rc, tag, pair_comm_rank, - completed, num_reqs = 2; - - mca_bcol_ptpcoll_module_t *ptpcoll_module = - (mca_bcol_ptpcoll_module_t *) const_args->bcol_module; - - netpatterns_k_exchange_node_t *my_exchange_node = - &ptpcoll_module->knomial_exchange_tree; - - ompi_communicator_t *comm = - ptpcoll_module->super.sbgp_partner_module->group_comm; - - int *extra_sources_array = my_exchange_node->rank_extra_sources_array; - - ompi_request_t **requests; - opal_free_list_item_t *item; - - mca_bcol_ptpcoll_collreq_t *collreq; - - item = opal_free_list_wait (&ptpcoll_module->collreqs_free); - if (OPAL_UNLIKELY(NULL == item)) { - PTPCOLL_ERROR(("Free list waiting failed.")); - return OMPI_ERR_OUT_OF_RESOURCE; - } - - collreq = (mca_bcol_ptpcoll_collreq_t *) item; - input_args->bcol_opaque_data = (void *) collreq; - - requests = collreq->requests; - - /* TAG Calculation */ - sequence_number = input_args->sequence_num; - - /* Keep tag within the limit supportd by the pml */ - tag = (PTPCOLL_TAG_OFFSET + sequence_number * PTPCOLL_TAG_FACTOR) & (ptpcoll_module->tag_mask); - - /* Mark this as a collective tag, to avoid conflict with user-level flags */ - tag = -tag; - - pair_comm_rank = - ptpcoll_module->super.sbgp_partner_module->group_list[extra_sources_array[0]]; - - rc = MCA_PML_CALL(isend( - NULL, 0, MPI_INT, - pair_comm_rank, tag, - MCA_PML_BASE_SEND_STANDARD, - comm, &(requests[0]))); - if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) { - PTPCOLL_ERROR(("ISend failed.")); - return rc; - } - - rc = MCA_PML_CALL(irecv( - NULL, 0, MPI_INT, - pair_comm_rank, tag, - comm, &(requests[1]))); - if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) { - PTPCOLL_ERROR(("IRecv failed.")); - return rc; - } - - /* Test for completion */ - completed = - mca_bcol_ptpcoll_test_all_for_match(&num_reqs, requests, &rc); - if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) { - PTPCOLL_ERROR(("Test for all failed.")); - return rc; - } - - if (!completed) { - return BCOL_FN_STARTED; - } - - opal_free_list_return (&ptpcoll_module->collreqs_free, (opal_free_list_item_t *) collreq); - return BCOL_FN_COMPLETE; -} - -/*************************************** Recursive-Doubling ***************************************/ -/**************************************************************************************************/ - -static int bcol_ptpcoll_barrier_recurs_dbl_new( - bcol_function_args_t *input_args, - struct mca_bcol_base_function_t *const_args) -{ - /* local variable */ - uint64_t sequence_number; - mca_bcol_ptpcoll_module_t *ptp_module = - (mca_bcol_ptpcoll_module_t *) const_args->bcol_module; - - ompi_communicator_t *comm = ptp_module->super.sbgp_partner_module->group_comm; - - int rc, my_extra_partner_comm_rank = 0, exchange, completed, - pair_comm_rank, pair_rank, delta, tag, num_reqs = 0, - my_rank = ptp_module->super.sbgp_partner_module->my_index, - n_exchange = ptp_module->super.sbgp_partner_module->n_levels_pow2; - - ompi_request_t **requests; - opal_free_list_item_t *item; - - mca_bcol_ptpcoll_collreq_t *collreq; - - item = opal_free_list_wait (&ptp_module->collreqs_free); - if (OPAL_UNLIKELY(NULL == item)) { - PTPCOLL_ERROR(("Free list waiting failed.")); - return OMPI_ERR_OUT_OF_RESOURCE; - } - - collreq = (mca_bcol_ptpcoll_collreq_t *) item; - input_args->bcol_opaque_data = (void *) collreq; - - assert(PTPCOLL_EXTRA != ptp_module->pow_2type); - - requests = collreq->requests; - - /* TAG Calculation */ - sequence_number = input_args->sequence_num; - - /* keep tag within the limit supportd by the pml */ - tag = (PTPCOLL_TAG_OFFSET + sequence_number * PTPCOLL_TAG_FACTOR) & (ptp_module->tag_mask); - - /* mark this as a collective tag, to avoid conflict with user-level flags */ - tag = -tag; - - if (PTPCOLL_PROXY == ptp_module->pow_2type) { - /* I will participate in the exchange - wait for signal from extra - ** process */ - /* - * recv from extra rank - my_extra_partner_comm_rank - * can use blocking recv, as no other communications - * need to take place. - */ - my_extra_partner_comm_rank = - ptp_module->super.sbgp_partner_module->group_list[ptp_module->proxy_extra_index]; - - collreq->need_toserv_extra = 1; - collreq->extra_partner_rank = my_extra_partner_comm_rank; - - rc = MCA_PML_CALL(irecv(NULL, 0, MPI_INT, - my_extra_partner_comm_rank, tag, comm, - &(requests[0]))); - if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) { - PTPCOLL_ERROR(("IRecv failed.")); - return rc; - } - - completed = mca_bcol_ptpcoll_test_for_match(&requests[0], &rc); - if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) { - PTPCOLL_ERROR(("Test for irecv failed.")); - return rc; - } - - if (!completed) { - collreq->tag = tag; - collreq->num_reqs = 1; - collreq->exchange = 0; - - return BCOL_FN_STARTED; - } - } else { - collreq->need_toserv_extra = 0; - } - - /* Loop over exchange send/recv pairs */ - delta = 1; - for (exchange = 0; exchange < n_exchange; ++exchange) { - - /* rank of exchange partner within the group */ - pair_rank = my_rank ^ delta; - - /* rank within the communicator */ - pair_comm_rank = - ptp_module->super.sbgp_partner_module->group_list[pair_rank]; - - /* send to partner - we will wait for completion, as send - * completion is at the MPI level, and will not - * incur network level completion costs - */ - rc = MCA_PML_CALL(isend(NULL, 0, MPI_INT, - pair_comm_rank, tag, - MCA_PML_BASE_SEND_STANDARD, comm, - &(requests[0]))); - if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) { - PTPCOLL_ERROR(("ISend failed.")); - return rc; - } - - ++num_reqs; - - /* recive from partner */ - rc = MCA_PML_CALL(irecv(NULL, 0, MPI_INT, - pair_comm_rank, tag, comm, - &(requests[1]))); - if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) { - PTPCOLL_ERROR(("IRecv failed.")); - return rc; - } - - ++num_reqs; - - PTPCOLL_VERBOSE(5, ("exchange - %d, pair_rank - %d, pair_comm_rank - %d", - exchange, pair_rank, pair_comm_rank)); - - /* test for completion */ - completed = - mca_bcol_ptpcoll_test_all_for_match(&num_reqs, requests, &rc); - if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) { - PTPCOLL_ERROR(("Test for all failed.")); - return rc; - } - - if (!completed) { - collreq->tag = tag; - collreq->num_reqs = num_reqs; - - collreq->exchange = exchange + 1; - assert(collreq->exchange >= 0); - - return BCOL_FN_STARTED; - } - - delta <<= 1; /* delta *= 2 */ - } - - if (PTPCOLL_PROXY == ptp_module->pow_2type) { - /* send - let the extra rank know that we are done */ - rc = MCA_PML_CALL(isend(NULL, 0, MPI_INT, - my_extra_partner_comm_rank, tag, - MCA_PML_BASE_SEND_STANDARD, comm, - &(requests[0]))); - if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) { - PTPCOLL_ERROR(("ISend failed.")); - return rc; - } - - completed = mca_bcol_ptpcoll_test_for_match(&requests[0], &rc); - if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) { - PTPCOLL_ERROR(("Test for isend failed.")); - return rc; - } - - if (!completed) { - collreq->tag = tag; - collreq->num_reqs = 1; - - collreq->need_toserv_extra = 0; - collreq->exchange = n_exchange; - - return BCOL_FN_STARTED; - } - } - - opal_free_list_return (&ptp_module->collreqs_free, (opal_free_list_item_t *) collreq); - return BCOL_FN_COMPLETE; -} - -static int bcol_ptpcoll_barrier_recurs_dbl_new_progress( - bcol_function_args_t *input_args, - struct mca_bcol_base_function_t *const_args) -{ - /* local variable */ - mca_bcol_ptpcoll_module_t *ptp_module = - (mca_bcol_ptpcoll_module_t *) const_args->bcol_module; - - ompi_communicator_t *comm = ptp_module->super.sbgp_partner_module->group_comm; - - int rc, exchange, pair_comm_rank, tag, - pair_rank, delta, num_reqs, completed, - my_rank = ptp_module->super.sbgp_partner_module->my_index, - n_exchange = ptp_module->super.sbgp_partner_module->n_levels_pow2; - - ompi_request_t **requests; - mca_bcol_ptpcoll_collreq_t *collreq = - (mca_bcol_ptpcoll_collreq_t *) input_args->bcol_opaque_data; - - num_reqs = collreq->num_reqs; - requests = collreq->requests; - - /* test for completion */ - completed = - mca_bcol_ptpcoll_test_all_for_match(&num_reqs, requests, &rc); - if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) { - PTPCOLL_ERROR(("Test for all failed.")); - return rc; - } - - if (!completed) { - return BCOL_FN_STARTED; - } - - assert(PTPCOLL_EXTRA != ptp_module->pow_2type); - - /* Continue loop over exchange send/recv pairs */ - num_reqs = 0; - tag = collreq->tag; - - exchange = collreq->exchange; - assert(exchange >= 0); - - delta = 1 << exchange; - for (; exchange < n_exchange; ++exchange) { - - /* rank of exchange partner within the group */ - pair_rank = my_rank ^ delta; - - /* rank within the communicator */ - pair_comm_rank = - ptp_module->super.sbgp_partner_module->group_list[pair_rank]; - - /* send to partner - we will wait for completion, as send - * completion is at the MPI level, and will not - * incur network level completion costs - */ - rc = MCA_PML_CALL(isend(NULL, 0, MPI_INT, - pair_comm_rank, tag, - MCA_PML_BASE_SEND_STANDARD, comm, - &(requests[0]))); - if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) { - PTPCOLL_ERROR(("ISend failed.")); - return rc; - } - - ++num_reqs; - - /* recive from partner */ - rc = MCA_PML_CALL(irecv(NULL, 0, MPI_INT, - pair_comm_rank, tag, comm, - &(requests[1]))); - if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) { - PTPCOLL_ERROR(("IRecv failed.")); - return rc; - } - - ++num_reqs; - - PTPCOLL_VERBOSE(5, ("exchange - %d, pair_rank - %d, pair_comm_rank - %d", - exchange, pair_rank, pair_comm_rank)); - - /* test for completion */ - completed = - mca_bcol_ptpcoll_test_all_for_match(&num_reqs, requests, &rc); - if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) { - PTPCOLL_ERROR(("Test for all failed.")); - return rc; - } - - if (!completed) { - collreq->num_reqs = num_reqs; - collreq->exchange = exchange + 1; - assert(collreq->exchange >= 0); - - return BCOL_FN_STARTED; - } - - delta <<= 1; /* delta *= 2 */ - } - - /* if non power of 2, may need to send message to "extra" proc */ - if (collreq->need_toserv_extra) { - /* send - let the extra rank know that we are done */ - rc = MCA_PML_CALL(isend(NULL, 0, MPI_INT, - collreq->extra_partner_rank, tag, - MCA_PML_BASE_SEND_STANDARD, comm, - &(requests[0]))); - if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) { - PTPCOLL_ERROR(("ISend failed.")); - return rc; - } - - completed = mca_bcol_ptpcoll_test_for_match(&requests[0], &rc); - if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) { - PTPCOLL_ERROR(("Test for isend failed.")); - return rc; - } - - if (!completed) { - collreq->num_reqs = 1; - collreq->need_toserv_extra = 0; - collreq->exchange = n_exchange; - - return BCOL_FN_STARTED; - } - } - - return BCOL_FN_COMPLETE; -} - -/****************************************** Extra node Barrier ******************************************/ - -static int bcol_ptpcoll_barrier_recurs_dbl_extra_new( - bcol_function_args_t *input_args, - struct mca_bcol_base_function_t *const_args) -{ - /* local variable */ - uint64_t sequence_number; - int rc, completed, num_reqs = 2, - tag, my_extra_partner_comm_rank; - - ompi_request_t **requests; - opal_free_list_item_t *item; - - mca_bcol_ptpcoll_collreq_t *collreq; - - mca_bcol_ptpcoll_module_t *ptp_module = - (mca_bcol_ptpcoll_module_t *) const_args->bcol_module; - ompi_communicator_t *comm = ptp_module->super.sbgp_partner_module->group_comm; - - item = opal_free_list_wait (&ptp_module->collreqs_free); - if (OPAL_UNLIKELY(NULL == item)) { - PTPCOLL_ERROR(("Free list waiting failed.")); - return OMPI_ERR_OUT_OF_RESOURCE; - } - - collreq = (mca_bcol_ptpcoll_collreq_t *) item; - input_args->bcol_opaque_data = (void *) collreq; - - requests = collreq->requests; - - /* TAG Calculation */ - sequence_number = input_args->sequence_num; - - /* Keep tag within the limit supportd by the pml */ - tag = (PTPCOLL_TAG_OFFSET + sequence_number * PTPCOLL_TAG_FACTOR) & (ptp_module->tag_mask); - - /* mark this as a collective tag, to avoid conflict with user-level flags */ - tag = -tag; - - /* I will not participate in the exchange - so just "register" as here, - * signal the extra rank that I am here */ - - my_extra_partner_comm_rank = - ptp_module->super.sbgp_partner_module->group_list[ptp_module->proxy_extra_index]; - - rc = MCA_PML_CALL(isend(NULL, 0, MPI_INT, - my_extra_partner_comm_rank, tag, - MCA_PML_BASE_SEND_STANDARD, comm, - &(requests[0]))); - if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) { - PTPCOLL_ERROR(("Send failed.")); - return rc; - } - - /* Recv signal that the rest are done - my_extra_partner_comm_rank */ - rc = MCA_PML_CALL(irecv(NULL, 0, MPI_INT, - my_extra_partner_comm_rank, tag, comm, - &(requests[1]))); - if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) { - PTPCOLL_ERROR(("IRecv failed.")); - return rc; - } - - /* Test for completion */ - completed = - mca_bcol_ptpcoll_test_all_for_match(&num_reqs, requests, &rc); - if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) { - PTPCOLL_ERROR(("Test for all failed.")); - return rc; - } - - if (!completed) { - return BCOL_FN_STARTED; - } - - opal_free_list_return (&ptp_module->collreqs_free, (opal_free_list_item_t *) collreq); - return BCOL_FN_COMPLETE; -} - -/* We have the same progress func for both cases (R-D and K-Nominal) */ -static int bcol_ptpcoll_barrier_extra_node_progress( - bcol_function_args_t *input_args, - struct mca_bcol_base_function_t *const_args) -{ - /* local variable */ - ompi_request_t **requests; - int rc, completed, num_reqs = 2; - - mca_bcol_ptpcoll_collreq_t *collreq = - (mca_bcol_ptpcoll_collreq_t *) input_args->bcol_opaque_data; - - requests = collreq->requests; - - /* test for completion */ - completed = - mca_bcol_ptpcoll_test_all_for_match(&num_reqs, requests, &rc); - if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) { - PTPCOLL_ERROR(("Test for all failed.")); - return rc; - } - - if (!completed) { - return BCOL_FN_STARTED; - } - - return BCOL_FN_COMPLETE; -} - -static int mca_bcol_ptpcoll_barrier_setup(mca_bcol_base_module_t *super, int bcoll_type) -{ - netpatterns_k_exchange_node_t *my_exchange_node; - mca_bcol_ptpcoll_module_t * ptpcoll_module = - (mca_bcol_ptpcoll_module_t *) super; - - mca_bcol_base_coll_fn_comm_attributes_t comm_attribs; - mca_bcol_base_coll_fn_invoke_attributes_t inv_attribs; - - comm_attribs.bcoll_type = bcoll_type; - - comm_attribs.comm_size_min = 0; - comm_attribs.comm_size_max = 1024 * 1024; - comm_attribs.waiting_semantics = NON_BLOCKING; - - inv_attribs.bcol_msg_min = 0; - inv_attribs.bcol_msg_max = 20000; /* range 1 */ - - inv_attribs.datatype_bitmap = 0xffffffff; - inv_attribs.op_types_bitmap = 0xffffffff; - - comm_attribs.data_src = DATA_SRC_KNOWN; - - switch(mca_bcol_ptpcoll_component.barrier_alg) { - case 1: - if (PTPCOLL_EXTRA == ptpcoll_module->pow_2type) { - mca_bcol_base_set_attributes(super, &comm_attribs, &inv_attribs, - bcol_ptpcoll_barrier_recurs_dbl_extra_new, - bcol_ptpcoll_barrier_extra_node_progress); - break; - } - - mca_bcol_base_set_attributes(super, &comm_attribs, &inv_attribs, - bcol_ptpcoll_barrier_recurs_dbl_new, - bcol_ptpcoll_barrier_recurs_dbl_new_progress); - break; - case 2: - my_exchange_node = &ptpcoll_module->knomial_exchange_tree; - if (my_exchange_node->n_extra_sources > 0 && - EXTRA_NODE == my_exchange_node->node_type) { - mca_bcol_base_set_attributes(super, &comm_attribs, &inv_attribs, - bcol_ptpcoll_barrier_recurs_knomial_extra_new, - bcol_ptpcoll_barrier_extra_node_progress); - break; - } - - mca_bcol_base_set_attributes(super, &comm_attribs, &inv_attribs, - bcol_ptpcoll_barrier_recurs_knomial_new, - bcol_ptpcoll_barrier_recurs_knomial_new_progress); - break; - default: - PTPCOLL_ERROR(("Wrong barrier_alg flag value.")); - } - - return OMPI_SUCCESS; -} - -int mca_bcol_ptpcoll_memsync_init(mca_bcol_base_module_t *super) -{ - return mca_bcol_ptpcoll_barrier_setup(super, BCOL_SYNC); -} - -int bcol_ptpcoll_barrier_init(mca_bcol_base_module_t *super) -{ - return mca_bcol_ptpcoll_barrier_setup(super, BCOL_BARRIER); -} diff --git a/ompi/mca/bcol/ptpcoll/bcol_ptpcoll_bcast.c b/ompi/mca/bcol/ptpcoll/bcol_ptpcoll_bcast.c deleted file mode 100644 index 4cb0b6ea807..00000000000 --- a/ompi/mca/bcol/ptpcoll/bcol_ptpcoll_bcast.c +++ /dev/null @@ -1,2318 +0,0 @@ -/* - * Copyright (c) 2009-2012 Oak Ridge National Laboratory. All rights reserved. - * Copyright (c) 2009-2012 Mellanox Technologies. All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#include "ompi_config.h" - -#include "ompi/include/ompi/constants.h" -#include "ompi/mca/bcol/bcol.h" -#include "bcol_ptpcoll_bcast.h" -#include "bcol_ptpcoll_utils.h" - -#define K_NOMIAL_ROOT_BCAST_NB_NOTEST(step_info, radix, \ - my_group_index, group_list, \ - data_buffer, count, tag, comm, send_requests, num_pending_sends) \ -do { \ - int rc = OMPI_SUCCESS; \ - int dst; \ - int comm_dst; \ - *num_pending_sends = 0; \ - \ - while(MCA_COMMON_NETPATTERNS_GET_NEXT_KNOMIAL_PEER_CHECK_LEVEL(step_info)) { \ - /* For each level of tree, do sends */ \ - MCA_COMMON_NETPATTERNS_GET_NEXT_KNOMIAL_PEER(my_group_index, \ - radix, step_info, dst); \ - comm_dst = group_list[dst]; \ - \ - /* Non blocking send .... */ \ - PTPCOLL_VERBOSE(9 , ("Bcast, Isend data to %d[%d], count %d, tag %d, addr %p", \ - dst, comm_dst, count, tag, \ - data_buffer)); \ - rc = MCA_PML_CALL(isend(data_buffer, count, MPI_BYTE, \ - comm_dst, tag, \ - MCA_PML_BASE_SEND_STANDARD, comm, \ - &(send_requests[*num_pending_sends]))); \ - PTPCOLL_VERBOSE(10, ("send request addr is %p", send_requests[*num_pending_sends])); \ - if( OMPI_SUCCESS != rc ) { \ - PTPCOLL_VERBOSE(10, ("Failed to isend data")); \ - return OMPI_ERROR; \ - } \ - ++(*num_pending_sends); \ - } \ -} while(0) - -#define NARRAY_BCAST_NB(narray_node, process_shift, group_size, \ - data_buffer, count, tag, comm, send_requests, \ - num_pending_sends) \ -do { \ - int n, rc = OMPI_SUCCESS; \ - int dst; \ - int comm_dst; \ - \ - /* Send out data to all relevant childrens */ \ - for (n = 0; n < narray_node->n_children; n++) { \ - \ - dst = narray_node->children_ranks[n] + process_shift; \ - if (dst >= group_size) { \ - dst -= group_size; \ - } \ - comm_dst = group_list[dst]; \ - \ - /* Non blocking send .... */ \ - PTPCOLL_VERBOSE(9 , ("Bcast, Isend data to %d[%d], count %d, tag %d, addr %p", \ - dst, comm_dst, count, tag, \ - data_buffer)); \ - rc = MCA_PML_CALL(isend(data_buffer, count, MPI_BYTE, \ - comm_dst, tag, \ - MCA_PML_BASE_SEND_STANDARD, comm, \ - &(send_requests[*num_pending_sends]))); \ - if( OMPI_SUCCESS != rc ) { \ - PTPCOLL_VERBOSE(10, ("Failed to isend data")); \ - return OMPI_ERROR; \ - } \ - ++(*num_pending_sends); \ - } \ -} while(0) - - -int bcol_ptpcoll_bcast_k_nomial_anyroot_progress(bcol_function_args_t *input_args, - struct mca_bcol_base_function_t *const_args) -{ - int completed = 0; - int rc; - mca_bcol_ptpcoll_module_t *ptpcoll_module = (mca_bcol_ptpcoll_module_t *)const_args->bcol_module; - uint32_t buffer_index = input_args->buffer_index; - - ompi_request_t **send_requests = - ptpcoll_module->ml_mem.ml_buf_desc[buffer_index].requests; - int *active_requests = - &(ptpcoll_module->ml_mem.ml_buf_desc[buffer_index].active_requests); - - completed = mca_bcol_ptpcoll_test_all_for_match(active_requests, send_requests, &rc); - if (OMPI_SUCCESS != rc) { - return OMPI_ERROR; - } - - /* DONE */ - if(completed) { - PTPCOLL_VERBOSE(10, ("bcast root is done")); - return BCOL_FN_COMPLETE; - } else { - PTPCOLL_VERBOSE(10, ("bcast root is started")); - return BCOL_FN_STARTED; - } -} - -/* K-nomial tree ( with any root ) algorithm */ -int bcol_ptpcoll_bcast_k_nomial_anyroot(bcol_function_args_t *input_args, - struct mca_bcol_base_function_t *const_args) -{ - mca_bcol_ptpcoll_module_t *ptpcoll_module = (mca_bcol_ptpcoll_module_t *)const_args->bcol_module; - mca_bcol_ptpcoll_component_t *cm = &mca_bcol_ptpcoll_component; - - int tag; - int rc; - int matched = 0; /* not matched */ - int comm_root = 0; /* no root */ - int i; - int my_group_index = ptpcoll_module->super.sbgp_partner_module->my_index; - int *group_list = ptpcoll_module->super.sbgp_partner_module->group_list; - int radix = ptpcoll_module->k_nomial_radix; - int root_radix_mask = ptpcoll_module->pow_knum; - int peer = -1; - uint64_t sequence_number = input_args->sequence_num; - uint32_t buffer_index = input_args->buffer_index; - int extra_root = -1; - - ompi_communicator_t* comm = ptpcoll_module->super.sbgp_partner_module->group_comm; - ompi_status_public_t status; - ompi_request_t **send_requests = - ptpcoll_module->ml_mem.ml_buf_desc[buffer_index].requests; - void *data_buffer = (void *) ( - (unsigned char *)input_args->sbuf + - (size_t)input_args->sbuf_offset); - int count = input_args->count * input_args->dtype->super.size; - int *active_requests = - &(ptpcoll_module->ml_mem.ml_buf_desc[buffer_index].active_requests); - netpatterns_knomial_step_info_t step_info = {0, 0, 0}; - - PTPCOLL_VERBOSE(3, ("BCAST Anyroot, index_this_type %d, num_of_this_type %d", - const_args->index_of_this_type_in_collective + 1, - const_args->n_of_this_type_in_collective)); - - /* keep tag within the limit support by the pml */ - tag = (PTPCOLL_TAG_OFFSET + sequence_number * PTPCOLL_TAG_FACTOR) & (ptpcoll_module->tag_mask); - /* mark this as a collective tag, to avoid conflict with user-level flags */ - tag = -tag; - /* reset requests */ - *active_requests = 0; - - PTPCOLL_VERBOSE(8, ("bcol_ptpcoll_bcast_k_nomial_anyroot, buffer index: %d \n" - "tag: %d " - "tag_mask: %d " - "sn: %d " - "root: %d " - "pow_k: %d %d " - "buff: %p " - "radix: %d", - buffer_index, tag, - ptpcoll_module->tag_mask, sequence_number, - input_args->root_flag, - ptpcoll_module->pow_k, ptpcoll_module->pow_knum, - data_buffer, - radix)); - - if (input_args->root_flag) { - PTPCOLL_VERBOSE(10, ("I'm root of the data")); - /* - * I'm root of the operation - * send data to (k - 1) * log base k N neighbors - */ - MCA_COMMON_NETPATTERNS_GET_NEXT_KNOMIAL_INIT(step_info, - ptpcoll_module->pow_knum, my_group_index); - K_NOMIAL_ROOT_BCAST_NB_NOTEST(step_info, radix, - my_group_index, group_list, - data_buffer, count, tag, comm, send_requests, - active_requests); - - goto ANY_ROOT_KNOMIAL_EXTRA; - } - - /* - * I'm not root, and I don't know to calculate root, so just - * wait for data from ANY_SOURCE, once you get it, proceed like a root - */ - - for (i = 0; i < cm->num_to_probe; i++) { - MCA_COMMON_NETPATTERNS_GET_NEXT_KNOMIAL_INIT(step_info, ptpcoll_module->pow_knum, my_group_index); - while(MCA_COMMON_NETPATTERNS_GET_NEXT_KNOMIAL_PEER_CHECK_LEVEL(step_info)) { - MCA_COMMON_NETPATTERNS_GET_NEXT_KNOMIAL_PEER(my_group_index, radix, step_info, peer); - PTPCOLL_VERBOSE(10, ("Bcast, iprobe tag %d rank %d", - tag, group_list[peer])); - MCA_PML_CALL(iprobe(group_list[peer], tag, - comm, &matched, &status)); - if (matched) { - MCA_COMMON_NETPATTERNS_GET_NEXT_KNOMIAL_UPDATE_LEVEL_FOR_BCAST(step_info, radix); - break; - } - } - - /* Check of the */ - if (PTPCOLL_KN_PROXY & ptpcoll_module->pow_ktype) { - for (i = 0 ; i < ptpcoll_module->kn_proxy_extra_num; i++) { - PTPCOLL_VERBOSE(10, ("Bcast, iprobe tag %d rank %d", - tag, group_list[peer])); - MCA_PML_CALL(iprobe(group_list[ptpcoll_module->kn_proxy_extra_index[i]], tag, - comm, &matched, &status)); - if (matched) { - step_info.k_level = root_radix_mask; - extra_root = group_list[ptpcoll_module->kn_proxy_extra_index[i]]; - goto ANY_ROOT_KNOMIAL_BCAST; - } - } - } - } - - /* the function always returns OMPI_SUCCESS, so we don't check return code */ - if (0 == matched) { - PTPCOLL_VERBOSE(10, ("IPROBE was not matched")); - /* No data was received, return no match error */ - return BCOL_FN_NOT_STARTED; - } - - /* set the source of data */ - comm_root = status.MPI_SOURCE; - - PTPCOLL_VERBOSE(10, ("A. step info %d %d %d", step_info.k_level, step_info.k_step, step_info.k_tmp_peer)); - - /* Bcast the data */ - PTPCOLL_VERBOSE(10, ("Starting data bcast")); - -ANY_ROOT_KNOMIAL_BCAST: - /* Post receive that will fetch the data */ - PTPCOLL_VERBOSE(10, ("Bcast, receive data from %d[%d], count %d, tag %d, addr %p", - comm_root, count, tag, data_buffer)); - - rc = MCA_PML_CALL(recv(data_buffer, count, MPI_BYTE, comm_root, tag, comm, MPI_STATUS_IGNORE)); - if( OMPI_SUCCESS != rc ) { - PTPCOLL_VERBOSE(10, ("Failed to receive data")); - return OMPI_ERROR; - } - PTPCOLL_VERBOSE(10, ("Bcast, Data was received")); - - /* Sending forward the data over K-nomial tree */ - MCA_COMMON_NETPATTERNS_GET_NEXT_KNOMIAL_INIT(step_info, step_info.k_level, my_group_index); - - PTPCOLL_VERBOSE(10, ("B. step info %d %d %d", step_info.k_level, step_info.k_step, step_info.k_tmp_peer)); - K_NOMIAL_ROOT_BCAST_NB_NOTEST(step_info, radix, - my_group_index, group_list, - data_buffer, count, tag, comm, send_requests, - active_requests); - -ANY_ROOT_KNOMIAL_EXTRA: - /* Proxy node but NOT virtual root */ - if (PTPCOLL_KN_PROXY & ptpcoll_module->pow_ktype) { - for (i = 0 ; i < ptpcoll_module->kn_proxy_extra_num; i++) { - if (ptpcoll_module->kn_proxy_extra_index[i] == extra_root) - continue; - - PTPCOLL_VERBOSE(10, ("Extra_Isend to %d", ptpcoll_module->kn_proxy_extra_index[i])); - rc = MCA_PML_CALL(isend(data_buffer, count, MPI_BYTE, - group_list[ptpcoll_module->kn_proxy_extra_index[i]], tag - 1, - MCA_PML_BASE_SEND_STANDARD, comm, - &(send_requests[*active_requests]))); - if( OMPI_SUCCESS != rc ) { - PTPCOLL_VERBOSE(10, ("Failed to send data")); - return OMPI_ERROR; - } - ++(*active_requests); - } - } - - if (*active_requests > 0) { - matched = - mca_bcol_ptpcoll_test_all_for_match - (active_requests, send_requests, &rc); - } - - /* If it is last call, we have to recycle memory */ - if(matched) { - PTPCOLL_VERBOSE(10, ("bcast root is done")); - return BCOL_FN_COMPLETE; - } else { - PTPCOLL_VERBOSE(10, ("bcast root is started")); - return BCOL_FN_STARTED; - } -} - -static int bcol_ptpcoll_bcast_k_nomial_extra_known_and_anyroot(bcol_function_args_t *input_args, - struct mca_bcol_base_function_t *const_args) -{ - mca_bcol_ptpcoll_module_t *ptpcoll_module = (mca_bcol_ptpcoll_module_t *)const_args->bcol_module; - - int tag; - int rc; - int i; - int completed = 0; /* not completed */ - uint32_t buffer_index = input_args->buffer_index; - - ompi_communicator_t* comm = ptpcoll_module->super.sbgp_partner_module->group_comm; - ompi_request_t **requests = - ptpcoll_module->ml_mem.ml_buf_desc[buffer_index].requests; - void *data_buffer = (void *) ( - (unsigned char *)input_args->sbuf + - (size_t)input_args->sbuf_offset); - int count = input_args->count * input_args->dtype->super.size; - int *iteration = - &(ptpcoll_module->ml_mem.ml_buf_desc[buffer_index].iteration); - int *active_requests = - &(ptpcoll_module->ml_mem.ml_buf_desc[buffer_index].active_requests); - int *group_list = ptpcoll_module->super.sbgp_partner_module->group_list; - mca_bcol_ptpcoll_component_t *cm = &mca_bcol_ptpcoll_component; - ompi_status_public_t status; - - PTPCOLL_VERBOSE(3, ("Knomial Anyroot, index_this_type %d, num_of_this_type %d", - const_args->index_of_this_type_in_collective + 1, - const_args->n_of_this_type_in_collective)); - - /* keep tag within the limit support by the pml */ - tag = (PTPCOLL_TAG_OFFSET + input_args->sequence_num * PTPCOLL_TAG_FACTOR) & (ptpcoll_module->tag_mask); - /* mark this as a collective tag, to avoid conflict with user-level flags */ - tag = -tag; - /* reset active requests */ - *active_requests = 0; - /* reset iteration counter */ - *iteration = -1; - - PTPCOLL_VERBOSE(8, ("bcol_ptpcoll_bcast_k_nomial_anyroot extra, buffer index: %d \n" - "tag: %d " - "tag_mask: %d " - "sn: %d " - "root: %d " - "pow_k: %d %d " - "buff: %p " - ,buffer_index, tag, - ptpcoll_module->tag_mask, input_args->sequence_num, - input_args->root_flag, - ptpcoll_module->pow_k, ptpcoll_module->pow_knum, - data_buffer - )); - - /* we have a power 2 group */ - if (input_args->root_flag) { - - PTPCOLL_VERBOSE(10, ("I'm EXTRA root of the data, v root %d", ptpcoll_module->kn_proxy_extra_index[0])); - /* send the all data to your proxy peer */ - rc = MCA_PML_CALL(isend(data_buffer, count, MPI_BYTE, - group_list[ptpcoll_module->kn_proxy_extra_index[0]], tag, - MCA_PML_BASE_SEND_STANDARD, comm, - &(requests[*active_requests]))); - if( OMPI_SUCCESS != rc ) { - PTPCOLL_VERBOSE(10, ("Failed to send data")); - return OMPI_ERROR; - } - ++(*active_requests); - - completed = mca_bcol_ptpcoll_test_all_for_match(active_requests, requests, &rc); - if (0 == completed) { - /* we have to store the iteration number somewhere */ - PTPCOLL_VERBOSE(10, ("Extra was started")); - return (OMPI_SUCCESS != rc) ? rc : BCOL_FN_STARTED; - } - } else { - for (i = 0; i < cm->num_to_probe && - 0 == completed; i++) { - MCA_PML_CALL(iprobe(group_list[ptpcoll_module->kn_proxy_extra_index[0]], tag - 1, - comm, &completed, &status)); - } - if (0 == completed) { - /* No data was received */ - return BCOL_FN_NOT_STARTED; - } - - /* the data is ready */ - rc = MCA_PML_CALL(recv(data_buffer, count, MPI_BYTE, - group_list[ptpcoll_module->kn_proxy_extra_index[0]], tag - 1, - comm, MPI_STATUS_IGNORE)); - if( OMPI_SUCCESS != rc ) { - PTPCOLL_VERBOSE(10, ("Failed to send data")); - return OMPI_ERROR; - } - } - - PTPCOLL_VERBOSE(10, ("Extra was done")); - return BCOL_FN_COMPLETE; -} - -static int bcol_ptpcoll_bcast_k_nomial_extra_known_and_anyroot_progress(bcol_function_args_t *input_args, - struct mca_bcol_base_function_t *const_args) -{ - int rc; - int completed = 0; /* not completed */ - int i; - mca_bcol_ptpcoll_module_t *ptpcoll_module = (mca_bcol_ptpcoll_module_t *)const_args->bcol_module; - ompi_request_t **requests = - ptpcoll_module->ml_mem.ml_buf_desc[input_args->buffer_index].requests; - uint32_t buffer_index = input_args->buffer_index; - mca_bcol_ptpcoll_component_t *cm = &mca_bcol_ptpcoll_component; - int *active_requests = - &(ptpcoll_module->ml_mem.ml_buf_desc[buffer_index].active_requests); - ompi_status_public_t status; - void *data_buffer = (void *) ( - (unsigned char *)input_args->sbuf + - (size_t)input_args->sbuf_offset); - int count = input_args->count * input_args->dtype->super.size; - /* keep tag within the limit support by the pml */ - int tag = -((PTPCOLL_TAG_OFFSET + input_args->sequence_num * PTPCOLL_TAG_FACTOR) & (ptpcoll_module->tag_mask)); - ompi_communicator_t* comm = ptpcoll_module->super.sbgp_partner_module->group_comm; - int *group_list = ptpcoll_module->super.sbgp_partner_module->group_list; - - PTPCOLL_VERBOSE(10, ("bcol_ptpcoll_bcast_k_nomial_extra_known_and_anyroot_progress extra, was called, tag %d\n", tag)); - if (input_args->root_flag) { - PTPCOLL_VERBOSE(10, ("I'm EXTRA root of the data")); - completed = mca_bcol_ptpcoll_test_all_for_match(active_requests, requests, &rc); - if (0 == completed) { - return (OMPI_SUCCESS != rc) ? rc : BCOL_FN_STARTED; - } - } else { - for (i = 0; i < cm->num_to_probe && - 0 == completed; i++) { - MCA_PML_CALL(iprobe(group_list[ptpcoll_module->kn_proxy_extra_index[0]], tag - 1, - comm, &completed, &status)); - } - if (0 == completed) { - return BCOL_FN_STARTED; - } - /* the data is ready */ - - rc = MCA_PML_CALL(recv(data_buffer, count, MPI_BYTE, - group_list[ptpcoll_module->kn_proxy_extra_index[0]], tag - 1, - comm, MPI_STATUS_IGNORE)); - if( OMPI_SUCCESS != rc ) { - PTPCOLL_VERBOSE(10, ("Failed to send data")); - return OMPI_ERROR; - } - } - - /* Done */ - return BCOL_FN_COMPLETE; \ -} - -/* Know root means that we know exactly the source of data and we do not have to check multiple - * sources - */ - -#define K_NOMIAL_DATA_SRC(radix, my_group_index, group_size, group_root, data_src, radix_mask) \ - do { \ - int relative_rank = (my_group_index >= group_root) ? my_group_index - group_root : \ - my_group_index - group_root + group_size; \ - \ - radix_mask = 1; \ - while (radix_mask < group_size) { \ - if (relative_rank % (radix * radix_mask)) { \ - data_src = relative_rank/(radix * radix_mask) * (radix * radix_mask) + group_root; \ - if (data_src >= group_size) data_src -= group_size; \ - break; \ - } \ - radix_mask *= radix; \ - } \ - } while (0) - - -int bcol_ptpcoll_bcast_k_nomial_known_root_progress(bcol_function_args_t *input_args, - struct mca_bcol_base_function_t *const_args) -{ - mca_bcol_ptpcoll_module_t *ptpcoll_module = (mca_bcol_ptpcoll_module_t *)const_args->bcol_module; - - int tag; - int rc = OMPI_SUCCESS; - int my_group_index = ptpcoll_module->super.sbgp_partner_module->my_index; - int *group_list = ptpcoll_module->super.sbgp_partner_module->group_list; - int radix = ptpcoll_module->k_nomial_radix; - int radix_mask; - uint64_t sequence_number = input_args->sequence_num; - uint32_t buffer_index = input_args->buffer_index; - int group_root_index = 0; - - ompi_communicator_t* comm = ptpcoll_module->super.sbgp_partner_module->group_comm; - ompi_request_t **send_requests = - ptpcoll_module->ml_mem.ml_buf_desc[buffer_index].requests; - ompi_request_t **recv_request = - &ptpcoll_module->ml_mem.ml_buf_desc[buffer_index].requests[0]; - void *data_buffer = (void *) ( - (unsigned char *)input_args->sbuf + - (size_t)input_args->sbuf_offset); - int count = input_args->count * input_args->dtype->super.size; - int completed = 0; - int *active_requests = - &(ptpcoll_module->ml_mem.ml_buf_desc[buffer_index].active_requests); - - tag = (PTPCOLL_TAG_OFFSET + sequence_number * PTPCOLL_TAG_FACTOR) & (ptpcoll_module->tag_mask); - /* mark this as a collective tag, to avoid conflict with user-level flags */ - tag = -tag; - - PTPCOLL_VERBOSE(3, ("BCAST Know root, index_this_type %d, num_of_this_type %d", - const_args->index_of_this_type_in_collective + 1, - const_args->n_of_this_type_in_collective)); - - PTPCOLL_VERBOSE(10, ("bcol_ptpcoll_bcast_k_nomial_known_root_progress, buffer index: %d \n" - "tag: %d " - "tag_mask: %d " - "sn: %d " - "root: %d " - "pow_k: %d %d " - "buff: %p " - "radix: %d", - buffer_index, tag, - ptpcoll_module->tag_mask, sequence_number, - input_args->root_flag, - ptpcoll_module->pow_k, ptpcoll_module->pow_knum, - data_buffer, - radix)); - - if (input_args->root_flag) { - /* Check for completion */ - assert(*active_requests > 0); - PTPCOLL_VERBOSE(10, ("Requests %d", *active_requests)); - completed = mca_bcol_ptpcoll_test_all_for_match(active_requests, send_requests, &rc); - if (OMPI_SUCCESS != rc) { - return OMPI_ERROR; - } - } else { - /* No data was received. Waiting for data */ - if (0 == (*active_requests)) { - int extra_root = -1; - netpatterns_knomial_step_info_t step_info; - /* We can not block. So run couple of test for data arrival */ - if (0 == mca_bcol_ptpcoll_test_for_match(recv_request, &rc)) { - PTPCOLL_VERBOSE(10, ("Test was not matched (active request %d)", - *active_requests)); - /* No data was received, return no match error */ - return (OMPI_SUCCESS != rc) ? rc : BCOL_FN_STARTED; - } - - radix_mask = ptpcoll_module->ml_mem.ml_buf_desc[buffer_index].radix_mask; - group_root_index = input_args->root_route->rank; - - PTPCOLL_VERBOSE(10, ("Test was matched - radix %d", radix_mask)); - /* Bcast the data */ - MCA_COMMON_NETPATTERNS_GET_NEXT_KNOMIAL_INIT(step_info, - radix_mask, my_group_index); - K_NOMIAL_ROOT_BCAST_NB_NOTEST(step_info, radix, - my_group_index, group_list, - data_buffer, count, tag, comm, send_requests, - active_requests); - - if (PTPCOLL_KN_PROXY & ptpcoll_module->pow_ktype) { - int i; - if (radix_mask == ptpcoll_module->pow_knum) { - extra_root = group_root_index; - } - for (i = 0 ; i < ptpcoll_module->kn_proxy_extra_num; i++) { - if (ptpcoll_module->kn_proxy_extra_index[i] == extra_root) - continue; - PTPCOLL_VERBOSE(10, ("Extra_Isend to %d", ptpcoll_module->kn_proxy_extra_index[i])); - rc = MCA_PML_CALL(isend(data_buffer, count, MPI_BYTE, - group_list[ptpcoll_module->kn_proxy_extra_index[i]], tag - 1, - MCA_PML_BASE_SEND_STANDARD, comm, - &(send_requests[*active_requests]))); - if( OMPI_SUCCESS != rc ) { - PTPCOLL_VERBOSE(10, ("Failed to send data")); - return OMPI_ERROR; - } - ++(*active_requests); - } - } - if (*active_requests > 0) { - completed = mca_bcol_ptpcoll_test_all_for_match - (active_requests, send_requests, &rc); - } else { - completed = 1; - } - } else { - /* Data was received and sent out, check for completion */ - completed = mca_bcol_ptpcoll_test_all_for_match(active_requests, send_requests, &rc); - if (OMPI_SUCCESS != rc) { - PTPCOLL_VERBOSE(10, ("Test was not matched (active request %d)", - *active_requests)); - return OMPI_ERROR; - } - } - } - /* DONE */ - if(completed) { - return BCOL_FN_COMPLETE; - } else { - PTPCOLL_VERBOSE(10, ("bcast root is started")); - return BCOL_FN_STARTED; - } -} - -int bcol_ptpcoll_bcast_k_nomial_known_root(bcol_function_args_t *input_args, - struct mca_bcol_base_function_t *const_args) -{ - mca_bcol_ptpcoll_module_t *ptpcoll_module = (mca_bcol_ptpcoll_module_t *)const_args->bcol_module; - - int tag; - int rc; - int comm_root; - int data_src = -1; - int group_root_index; - int my_group_index = ptpcoll_module->super.sbgp_partner_module->my_index; - int *group_list = ptpcoll_module->super.sbgp_partner_module->group_list; - int radix = ptpcoll_module->k_nomial_radix; - uint32_t buffer_index = input_args->buffer_index; - - ompi_communicator_t* comm = ptpcoll_module->super.sbgp_partner_module->group_comm; - ompi_request_t **send_requests = - ptpcoll_module->ml_mem.ml_buf_desc[buffer_index].requests; - ompi_request_t **recv_request = - &ptpcoll_module->ml_mem.ml_buf_desc[buffer_index].requests[0]; - void *data_buffer = (void *) ( - (unsigned char *)input_args->sbuf + - (size_t)input_args->sbuf_offset); - int count = input_args->count * input_args->dtype->super.size; - int *active_requests = - &(ptpcoll_module->ml_mem.ml_buf_desc[buffer_index].active_requests); - int matched = 0; - int k_level, logk_level; - int extra_root = -1; - netpatterns_knomial_step_info_t step_info; - - PTPCOLL_VERBOSE(3, ("BCAST Know root, index_this_type %d, num_of_this_type %d", - const_args->index_of_this_type_in_collective + 1, - const_args->n_of_this_type_in_collective)); - - /* reset active request counter */ - (*active_requests) = 0; - /* keep tag within the limit support by the pml */ - tag = (PTPCOLL_TAG_OFFSET + input_args->sequence_num * PTPCOLL_TAG_FACTOR) & (ptpcoll_module->tag_mask); - /* mark this as a collective tag, to avoid conflict with user-level flags */ - tag = -tag; - - PTPCOLL_VERBOSE(8, ("bcol_ptpcoll_bcast_k_nomial_known_root, buffer index: %d \n" - "tag: %d " - "tag_mask: %d " - "sn: %d " - "root: %d " - "pow_k: %d %d " - "buff: %p " - "radix: %d", - buffer_index, tag, - ptpcoll_module->tag_mask, input_args->sequence_num, - input_args->root_flag, - ptpcoll_module->pow_k, ptpcoll_module->pow_knum, - data_buffer, - radix)); - - if (input_args->root_flag) { - PTPCOLL_VERBOSE(10, ("I'm root of the data")); - /* - * I'm root of the operation - * send data to (k - 1) * log base k N neighbors - */ - MCA_COMMON_NETPATTERNS_GET_NEXT_KNOMIAL_INIT(step_info, - ptpcoll_module->pow_knum, my_group_index); - K_NOMIAL_ROOT_BCAST_NB_NOTEST(step_info, radix, - my_group_index, group_list, - data_buffer, count, tag, comm, send_requests, - active_requests); - goto KNOWN_ROOT_KNOMIAL_BCAST_EXTRA; - } - - /* I'm not root */ - group_root_index = input_args->root_route->rank; - - /* If Proxy node, check if extra node is root */ - PTPCOLL_VERBOSE(10, ("Check if I virtual root, groop root %d group_size_pow %d type %d\n", - group_root_index, ptpcoll_module->pow_knum , ptpcoll_module->pow_ktype)); - if (group_root_index >= ptpcoll_module->pow_knum) { - /* Chech if the rank is virtual root */ - int virtual_root = (group_root_index - - ptpcoll_module->pow_knum) / (radix - 1); - - if (my_group_index == virtual_root) { - MCA_COMMON_NETPATTERNS_GET_NEXT_KNOMIAL_INIT(step_info, - ptpcoll_module->pow_knum, my_group_index); - k_level = ptpcoll_module->pow_knum; - comm_root = group_list[group_root_index]; - extra_root = group_root_index; - PTPCOLL_VERBOSE(10, ("Im virtual root klevel %d, comm_root %d vroot %d\n", - k_level, comm_root, virtual_root)); - goto KNOWN_ROOT_KNOMIAL_BCAST; - } else { - /* set virtual root as real root of the group */ - group_root_index = virtual_root; - PTPCOLL_VERBOSE(10, ("My virtual root vroot %d\n", group_root_index)); - } - } - - data_src = netpatterns_get_knomial_data_source( - my_group_index, group_root_index, radix, ptpcoll_module->pow_knum, - &k_level, &logk_level); - - comm_root = group_list[data_src]; - -KNOWN_ROOT_KNOMIAL_BCAST: - PTPCOLL_VERBOSE(10, ("Bcast, receive data from %d[%d], count %d, tag %d, addr %p", - comm_root, data_src, count, tag, data_buffer)); - - rc = MCA_PML_CALL(irecv(data_buffer, count, MPI_BYTE, comm_root, tag, comm, recv_request)); - if( OMPI_SUCCESS != rc ) { - PTPCOLL_VERBOSE(10, ("Failed to receive data")); - return OMPI_ERROR; - } - - /* We can not block. So run couple of test for data arrival */ - if (0 == mca_bcol_ptpcoll_test_for_match(recv_request, &rc)) { - PTPCOLL_VERBOSE(10, ("Test was not matched - %d", rc)); - /* cache the radix mask for future progress */ - ptpcoll_module->ml_mem.ml_buf_desc[buffer_index].radix_mask = k_level; - /* No data was received, return no match error */ - return (OMPI_SUCCESS != rc) ? rc : BCOL_FN_STARTED; - } - - /* Bcast the data */ - MCA_COMMON_NETPATTERNS_GET_NEXT_KNOMIAL_INIT(step_info, - k_level, my_group_index); - - K_NOMIAL_ROOT_BCAST_NB_NOTEST(step_info, radix, - my_group_index, group_list, - data_buffer, count, tag, comm, send_requests, - active_requests); - -KNOWN_ROOT_KNOMIAL_BCAST_EXTRA: - /* Proxy node but NOT virtual root */ - if (PTPCOLL_KN_PROXY & ptpcoll_module->pow_ktype) { - int i; - for (i = 0 ; i < ptpcoll_module->kn_proxy_extra_num; i++) { - if (ptpcoll_module->kn_proxy_extra_index[i] == extra_root) - continue; - - PTPCOLL_VERBOSE(10, ("Extra_Isend to %d", ptpcoll_module->kn_proxy_extra_index[i])); - rc = MCA_PML_CALL(isend(data_buffer, count, MPI_BYTE, - group_list[ptpcoll_module->kn_proxy_extra_index[i]], tag - 1, - MCA_PML_BASE_SEND_STANDARD, comm, - &(send_requests[*active_requests]))); - if( OMPI_SUCCESS != rc ) { - PTPCOLL_VERBOSE(10, ("Failed to send data")); - return OMPI_ERROR; - } - ++(*active_requests); - } - } - - if (*active_requests > 0) { - matched = - mca_bcol_ptpcoll_test_all_for_match - (active_requests, send_requests, &rc); - } else { - matched = 1; - } - - /* If it is last call, we have to recycle memory */ - if(matched) { - return BCOL_FN_COMPLETE; - } else { - PTPCOLL_VERBOSE(10, ("bcast root is started")); - return BCOL_FN_STARTED; - } -} - -int bcol_ptpcoll_bcast_binomial_scatter_gatther_anyroot_extra(bcol_function_args_t *input_args, - struct mca_bcol_base_function_t *const_args) -{ - mca_bcol_ptpcoll_module_t *ptpcoll_module = (mca_bcol_ptpcoll_module_t *)const_args->bcol_module; - - int tag; - int rc; - int i; - int completed = 0; /* not completed */ - uint32_t buffer_index = input_args->buffer_index; - - ompi_communicator_t* comm = ptpcoll_module->super.sbgp_partner_module->group_comm; - ompi_request_t **requests = - ptpcoll_module->ml_mem.ml_buf_desc[buffer_index].requests; - void *data_buffer = (void *) ( - (unsigned char *)input_args->sbuf + - (size_t)input_args->sbuf_offset); - int count = input_args->count * input_args->dtype->super.size; - int *iteration = - &(ptpcoll_module->ml_mem.ml_buf_desc[buffer_index].iteration); - int *active_requests = - &(ptpcoll_module->ml_mem.ml_buf_desc[buffer_index].active_requests); - mca_bcol_ptpcoll_component_t *cm = &mca_bcol_ptpcoll_component; - ompi_status_public_t status; - int *group_list = ptpcoll_module->super.sbgp_partner_module->group_list; - - PTPCOLL_VERBOSE(3, ("BCAST Anyroot, index_this_type %d, num_of_this_type %d", - const_args->index_of_this_type_in_collective + 1, - const_args->n_of_this_type_in_collective)); - - /* keep tag within the limit support by the pml */ - tag = (PTPCOLL_TAG_OFFSET + input_args->sequence_num * PTPCOLL_TAG_FACTOR) & (ptpcoll_module->tag_mask); - /* mark this as a collective tag, to avoid conflict with user-level flags */ - tag = -tag; - /* reset active requests */ - *active_requests = 0; - /* reset iteration counter */ - *iteration = -1; - - PTPCOLL_VERBOSE(8, ("bcol_ptpcoll_bcast_k_nomial_anyroot extra, buffer index: %d \n" - "tag: %d " - "tag_mask: %d " - "sn: %d " - "root: %d " - "pow_k: %d %d " - "buff: %p " - "radix: %d" , - buffer_index, tag, - ptpcoll_module->tag_mask, input_args->sequence_num, - input_args->root_flag, - ptpcoll_module->pow_k, ptpcoll_module->pow_knum, - data_buffer, - 2 - )); - - /* we have a power 2 group */ - if (input_args->root_flag) { - - PTPCOLL_VERBOSE(10, ("I'm EXTRA root of the data")); - /* send the all data to your proxy peer */ - rc = MCA_PML_CALL(isend(data_buffer, count, MPI_BYTE, - group_list[ptpcoll_module->proxy_extra_index], tag, - MCA_PML_BASE_SEND_STANDARD, comm, - &(requests[*active_requests]))); - if( OMPI_SUCCESS != rc ) { - PTPCOLL_VERBOSE(10, ("Failed to send data")); - return OMPI_ERROR; - } - ++(*active_requests); - - completed = mca_bcol_ptpcoll_test_all_for_match(active_requests, requests, &rc); - if (0 == completed) { - /* we have to store the iteration number somewhere */ - return (OMPI_SUCCESS != rc) ? rc : BCOL_FN_STARTED; - } - } else { - for (i = 0; i < cm->num_to_probe && - 0 == completed; i++) { - MCA_PML_CALL(iprobe(group_list[ptpcoll_module->proxy_extra_index], tag - 1, - comm, &completed, &status)); - } - if (0 == completed) { - /* No data was received */ - return BCOL_FN_NOT_STARTED; - } - - /* the data is ready */ - rc = MCA_PML_CALL(recv(data_buffer, count, MPI_BYTE, - group_list[ptpcoll_module->proxy_extra_index], tag - 1, - comm, MPI_STATUS_IGNORE)); - if( OMPI_SUCCESS != rc ) { - PTPCOLL_VERBOSE(10, ("Failed to send data")); - return OMPI_ERROR; - } - } - - return BCOL_FN_COMPLETE; -} - -int bcol_ptpcoll_bcast_binomial_scatter_gatther_anyroot_extra_progress(bcol_function_args_t *input_args, - struct mca_bcol_base_function_t *const_args) -{ - int rc; - int completed = 0; /* not completed */ - int i; - mca_bcol_ptpcoll_module_t *ptpcoll_module = (mca_bcol_ptpcoll_module_t *)const_args->bcol_module; - ompi_request_t **requests = - ptpcoll_module->ml_mem.ml_buf_desc[input_args->buffer_index].requests; - uint32_t buffer_index = input_args->buffer_index; - mca_bcol_ptpcoll_component_t *cm = &mca_bcol_ptpcoll_component; - int *active_requests = - &(ptpcoll_module->ml_mem.ml_buf_desc[buffer_index].active_requests); - ompi_status_public_t status; - void *data_buffer = (void *) ( - (unsigned char *)input_args->sbuf + - (size_t)input_args->sbuf_offset); - int count = input_args->count * input_args->dtype->super.size; - /* keep tag within the limit support by the pml */ - int tag = -((PTPCOLL_TAG_OFFSET + input_args->sequence_num * PTPCOLL_TAG_FACTOR) & (ptpcoll_module->tag_mask)); - ompi_communicator_t* comm = ptpcoll_module->super.sbgp_partner_module->group_comm; - int *group_list = ptpcoll_module->super.sbgp_partner_module->group_list; - - PTPCOLL_VERBOSE(10, ("bcol_ptpcoll_bcast_k_nomial_extra_known_and_anyroot_progress extra, was called, tag %d\n", tag)); - if (input_args->root_flag) { - PTPCOLL_VERBOSE(10, ("I'm EXTRA root of the data")); - completed = mca_bcol_ptpcoll_test_all_for_match(active_requests, requests, &rc); - if (0 == completed) { - return (OMPI_SUCCESS != rc) ? rc : BCOL_FN_STARTED; - } - } else { - for (i = 0; i < cm->num_to_probe && - 0 == completed; i++) { - MCA_PML_CALL(iprobe(group_list[ptpcoll_module->proxy_extra_index], tag - 1, - comm, &completed, &status)); - } - if (0 == completed) { - return BCOL_FN_STARTED; - } - /* the data is ready */ - - rc = MCA_PML_CALL(recv(data_buffer, count, MPI_BYTE, - group_list[ptpcoll_module->proxy_extra_index], tag - 1, - comm, MPI_STATUS_IGNORE)); - if( OMPI_SUCCESS != rc ) { - PTPCOLL_VERBOSE(10, ("Failed to send data")); - return OMPI_ERROR; - } - } - - /* Done */ - return BCOL_FN_COMPLETE; -} - -int bcol_ptpcoll_bcast_binomial_scatter_gatther_anyroot_progress(bcol_function_args_t *input_args, - struct mca_bcol_base_function_t *const_args) -{ - mca_bcol_ptpcoll_module_t *ptpcoll_module = (mca_bcol_ptpcoll_module_t *)const_args->bcol_module; - - int rc; - int completed = 0; /* not completed */ - uint32_t buffer_index = input_args->buffer_index; - - ompi_request_t **requests = - ptpcoll_module->ml_mem.ml_buf_desc[buffer_index].requests; - void *data_buffer = (void *) ( - (unsigned char *)input_args->sbuf + - (size_t)input_args->sbuf_offset); - int count = input_args->count * input_args->dtype->super.size; - int *iteration = - &(ptpcoll_module->ml_mem.ml_buf_desc[buffer_index].iteration); - int *active_requests = - &(ptpcoll_module->ml_mem.ml_buf_desc[buffer_index].active_requests); - size_t base_block_size = (count + ptpcoll_module->pow_2num - 1) / - ptpcoll_module->pow_2num; - int tag = ptpcoll_module->ml_mem.ml_buf_desc[buffer_index].tag; - ompi_communicator_t* comm = ptpcoll_module->super.sbgp_partner_module->group_comm; - int *status = - &ptpcoll_module->ml_mem.ml_buf_desc[buffer_index].status; - - PTPCOLL_VERBOSE(10, ("bcol_ptpcoll_bcast_binomial_scatter_gatther_anyroot_progress, buffer index: %d \n" - "tag: %d " - "tag_mask: %d " - "sn: %d " - "root: %d " - "pow_2: %d %d " - "buff: %p " - "radix: %d" - "block_size: %d", - buffer_index, tag, - ptpcoll_module->tag_mask, 0, - input_args->root_flag, - ptpcoll_module->pow_2, ptpcoll_module->pow_2num, - data_buffer, - 2, - base_block_size)); - - switch(*status) { - case PTPCOLL_GATHER_STARTED: - completed = mca_bcol_ptpcoll_test_all_for_match(active_requests, requests, &rc); - if (0 == completed) { - PTPCOLL_VERBOSE(10, ("Not done, have to complete %d, Return %d", *active_requests, rc)); - return (OMPI_SUCCESS != rc) ? rc : BCOL_FN_STARTED; - } - ++(*iteration); /* start from next iteration */ - PTPCOLL_VERBOSE(10, ("Outstanding operation was comleted, starting next one ! %d", *iteration)); - break; - case PTPCOLL_EXTRA_SEND_STARTED: - completed = mca_bcol_ptpcoll_test_all_for_match(active_requests, requests, &rc); - if (0 == completed) { - PTPCOLL_VERBOSE(10, ("Not done, have to complete %d, Return %d", *active_requests, rc)); - return (OMPI_SUCCESS != rc) ? rc : BCOL_FN_STARTED; - } - return BCOL_FN_COMPLETE; - default: - PTPCOLL_VERBOSE(10, ("Unknown status %d", *status)); - return OMPI_ERROR; - } - - PTPCOLL_VERBOSE(10, ("Stating PR_GATHER")); - /* Gather, continue the recoursive doubling iterations */ - rc = bcol_ptpcoll_bcast_binomial_gather_anyroot(ptpcoll_module, buffer_index, data_buffer, - count, base_block_size); - if (BCOL_FN_COMPLETE != rc) { - assert(0 != *active_requests); - PTPCOLL_VERBOSE(10, ("Not done. Return %d", rc)); - return rc; - } - PTPCOLL_VERBOSE(10, ("PR_GATHER done")); - - /* it the process is proxy , it has to send full - message to remote peer */ - if ((PTPCOLL_PROXY & ptpcoll_module->pow_2type) && - ! CHECK_IF_ROOT_OR_VROOT(ptpcoll_module, buffer_index)) { - *status = PTPCOLL_EXTRA_SEND_STARTED; - rc = bcol_ptpcoll_bcast_binomial_scatter_gatther_send_extra( - ptpcoll_module, - data_buffer, count, tag - 1, - ptpcoll_module->proxy_extra_index, comm, - active_requests, requests); - if (BCOL_FN_COMPLETE != rc) { - return rc; - } - } - /* return */ - return BCOL_FN_COMPLETE; -} - -int bcol_ptpcoll_bcast_binomial_scatter_gatther_anyroot(bcol_function_args_t *input_args, - struct mca_bcol_base_function_t *const_args) -{ - mca_bcol_ptpcoll_module_t *ptpcoll_module = (mca_bcol_ptpcoll_module_t *)const_args->bcol_module; - - int tag; - int rc; - int my_group_index = ptpcoll_module->super.sbgp_partner_module->my_index; - int *group_list = ptpcoll_module->super.sbgp_partner_module->group_list; - uint64_t sequence_number = input_args->sequence_num; - uint32_t buffer_index = input_args->buffer_index; - - ompi_communicator_t* comm = ptpcoll_module->super.sbgp_partner_module->group_comm; - ompi_request_t **requests = - ptpcoll_module->ml_mem.ml_buf_desc[buffer_index].requests; - void *data_buffer = (void *) ( - (unsigned char *)input_args->sbuf + - (size_t)input_args->sbuf_offset); - int count = input_args->count * input_args->dtype->super.size; - int *iteration = - &(ptpcoll_module->ml_mem.ml_buf_desc[buffer_index].iteration); - int *radix_mask_pow = - &(ptpcoll_module->ml_mem.ml_buf_desc[buffer_index].radix_mask_pow); - int *active_requests = - &(ptpcoll_module->ml_mem.ml_buf_desc[buffer_index].active_requests); - size_t base_block_size = (count + ptpcoll_module->pow_2num - 1) / - ptpcoll_module->pow_2num; - int root_pow2 = ptpcoll_module->pow_2 - 1; - int *status = - &ptpcoll_module->ml_mem.ml_buf_desc[buffer_index].status; - - PTPCOLL_VERBOSE(3, ("BCAST Anyroot, index_this_type %d, num_of_this_type %d", - const_args->index_of_this_type_in_collective + 1, - const_args->n_of_this_type_in_collective)); - - /* keep tag within the limit support by the pml */ - tag = (PTPCOLL_TAG_OFFSET + sequence_number * PTPCOLL_TAG_FACTOR) & (ptpcoll_module->tag_mask); - /* mark this as a collective tag, to avoid conflict with user-level flags */ - ptpcoll_module->ml_mem.ml_buf_desc[buffer_index].tag = tag = -tag; - /* reset active requests */ - *active_requests = 0; - /* reset iteration counter */ - *iteration = -1; - /* set initial status */ - *status = PTPCOLL_NOT_STARTED; - - PTPCOLL_VERBOSE(8, ("bcol_ptpcoll_bcast_k_nomial_anyroot, buffer index: %d \n" - "tag: %d " - "tag_mask: %d " - "sn: %d " - "root: %d " - "pow_2: %d %d " - "buff: %p " - "radix: %d" - "block_size: %d", - buffer_index, tag, - ptpcoll_module->tag_mask, sequence_number, - input_args->root_flag, - ptpcoll_module->pow_2, ptpcoll_module->pow_2num, - data_buffer, - 2, - base_block_size)); - - /* we have a power 2 group */ - if (input_args->root_flag) { - - PTPCOLL_VERBOSE(10, ("I'm root of the data")); - /* for proxy we have little bit more work to do */ - if (PTPCOLL_PROXY & ptpcoll_module->pow_2type) { - /* send the all data to your extra peer */ - rc = MCA_PML_CALL(isend(data_buffer, count, MPI_BYTE, - group_list[ptpcoll_module->proxy_extra_index], - tag - 1, - MCA_PML_BASE_SEND_STANDARD, comm, - &(requests[*active_requests]))); - if( OMPI_SUCCESS != rc ) { - PTPCOLL_VERBOSE(10, ("Failed to send data")); - return OMPI_ERROR; - } - ++(*active_requests); - } - /* - * I'm root of the operation - * send data to (k - 1) * log base k N neighbors - */ - *radix_mask_pow = ptpcoll_module->pow_2; - - K_NOMIAL_ROOT_BCAST_NB_BINOMIAL_SCATTER(root_pow2, - my_group_index, group_size, group_list, - data_buffer, base_block_size, count, tag, comm, requests, - active_requests); - - goto GATHER; - } - - /* <-- non root flow --> */ - rc = bcol_ptpcoll_bcast_binomial_probe_and_scatter_anyroot(ptpcoll_module, buffer_index, - data_buffer, count, base_block_size); - if (BCOL_FN_COMPLETE != rc) { - PTPCOLL_VERBOSE(10, ("Not done. Return %d", rc)); - return rc; - } - -GATHER: - *iteration = 0; - *status = PTPCOLL_GATHER_STARTED; - rc = bcol_ptpcoll_bcast_binomial_gather_anyroot(ptpcoll_module, buffer_index, - data_buffer, count, base_block_size); - - if (BCOL_FN_COMPLETE != rc) { - assert(0 != *active_requests); - PTPCOLL_VERBOSE(10, ("Not done. Return %d", rc)); - return rc; - } - - ++(*iteration); /* I need it for progress */ - - /* proxy case */ - if ((PTPCOLL_PROXY & ptpcoll_module->pow_2type) && - ! CHECK_IF_ROOT_OR_VROOT(ptpcoll_module, buffer_index)) { - *status = PTPCOLL_EXTRA_SEND_STARTED; - rc = bcol_ptpcoll_bcast_binomial_scatter_gatther_send_extra(ptpcoll_module, - data_buffer, count, tag - 1, - ptpcoll_module->proxy_extra_index, comm, - active_requests, requests); - if (BCOL_FN_COMPLETE != rc) { - return rc; - } - } - - return BCOL_FN_COMPLETE; -} - -int bcol_ptpcoll_bcast_binomial_scatter_gatther_known_root_progress(bcol_function_args_t *input_args, - struct mca_bcol_base_function_t *const_args) -{ - mca_bcol_ptpcoll_module_t *ptpcoll_module = (mca_bcol_ptpcoll_module_t *)const_args->bcol_module; - - int rc; - int completed = 0; /* not completed */ - uint32_t buffer_index = input_args->buffer_index; - - ompi_request_t **requests = - ptpcoll_module->ml_mem.ml_buf_desc[buffer_index].requests; - void *data_buffer = (void *) ( - (unsigned char *)input_args->sbuf + - (size_t)input_args->sbuf_offset); - int count = input_args->count * input_args->dtype->super.size; - int *iteration = - &(ptpcoll_module->ml_mem.ml_buf_desc[buffer_index].iteration); - int *active_requests = - &(ptpcoll_module->ml_mem.ml_buf_desc[buffer_index].active_requests); - size_t base_block_size = (count + ptpcoll_module->pow_2num - 1) / - ptpcoll_module->pow_2num; - int tag = ptpcoll_module->ml_mem.ml_buf_desc[buffer_index].tag; - ompi_communicator_t* comm = ptpcoll_module->super.sbgp_partner_module->group_comm; - int *status = - &(ptpcoll_module->ml_mem.ml_buf_desc[buffer_index].status); - - PTPCOLL_VERBOSE(10, ("bcol_ptpcoll_bcast_binomial_scatter_gatther_known_progress, buffer index: %d \n" - "tag: %d " - "tag_mask: %d " - "sn: %d " - "root: %d " - "pow_2: %d %d " - "buff: %p " - "radix: %d" - "block_size: %d", - buffer_index, tag, - ptpcoll_module->tag_mask, 0, - input_args->root_flag, - ptpcoll_module->pow_2, ptpcoll_module->pow_2num, - data_buffer, - 2, - base_block_size)); - - switch(*status) { - case PTPCOLL_WAITING_FOR_DATA: - PTPCOLL_VERBOSE(10, ("Probe for the data")); - rc = bcol_ptpcoll_bcast_binomial_test_and_scatter_known_root(ptpcoll_module, buffer_index, - data_buffer, count, base_block_size); - if (BCOL_FN_COMPLETE != rc) { - assert(0 != *active_requests); - PTPCOLL_VERBOSE(10, ("Not done. Return %d", rc)); - return rc; - } - *iteration = 0; - *status = PTPCOLL_GATHER_STARTED; - break; - case PTPCOLL_GATHER_STARTED: - completed = mca_bcol_ptpcoll_test_all_for_match(active_requests, requests, &rc); - if (0 == completed) { - PTPCOLL_VERBOSE(10, ("Not done, have to complete %d, Return %d", *active_requests, rc)); - return (OMPI_SUCCESS != rc) ? rc : BCOL_FN_STARTED; - } - ++(*iteration); /* start from next iteration */ - PTPCOLL_VERBOSE(10, ("Outstanding operation was comleted, starting next one ! %d", *iteration)); - break; - case PTPCOLL_EXTRA_SEND_STARTED: - completed = mca_bcol_ptpcoll_test_all_for_match(active_requests, requests, &rc); - if (0 == completed) { - PTPCOLL_VERBOSE(10, ("Not done, have to complete %d, Return %d", *active_requests, rc)); - return (OMPI_SUCCESS != rc) ? rc : BCOL_FN_STARTED; - } - return BCOL_FN_COMPLETE; - default: - PTPCOLL_VERBOSE(10, ("Unknown status %d", *status)); - return OMPI_ERROR; - } - - PTPCOLL_VERBOSE(10, ("Stating PR_GATHER")); - /* Gather, continue the recoursive doubling iterations */ - rc = bcol_ptpcoll_bcast_binomial_gather_anyroot(ptpcoll_module, buffer_index, data_buffer, - count, base_block_size); - if (BCOL_FN_COMPLETE != rc) { - assert(0 != *active_requests); - PTPCOLL_VERBOSE(10, ("Not done. Return %d", rc)); - return rc; - } - PTPCOLL_VERBOSE(10, ("PR_GATHER done")); - - /* it the process is proxy , it has to send full - message to remote peer */ - if ((PTPCOLL_PROXY & ptpcoll_module->pow_2type) && - ! CHECK_IF_ROOT_OR_VROOT(ptpcoll_module, buffer_index)) { - *status = PTPCOLL_EXTRA_SEND_STARTED; - rc = bcol_ptpcoll_bcast_binomial_scatter_gatther_send_extra( - ptpcoll_module, - data_buffer, count, tag - 1, - ptpcoll_module->proxy_extra_index, comm, - active_requests, requests); - if (BCOL_FN_COMPLETE != rc) { - return rc; - } - } - - /* return */ - return BCOL_FN_COMPLETE; -} - -int bcol_ptpcoll_bcast_binomial_scatter_gatther_known_root(bcol_function_args_t *input_args, - struct mca_bcol_base_function_t *const_args) -{ - mca_bcol_ptpcoll_module_t *ptpcoll_module = (mca_bcol_ptpcoll_module_t *)const_args->bcol_module; - - int tag; - int rc; - int my_group_index = ptpcoll_module->super.sbgp_partner_module->my_index; - int group_src, comm_root; - int *group_list = ptpcoll_module->super.sbgp_partner_module->group_list; - int pow2_distance; - void *curr_data_buffer; - int recv_count; - uint64_t sequence_number = input_args->sequence_num; - uint32_t buffer_index = input_args->buffer_index; - - ompi_communicator_t* comm = ptpcoll_module->super.sbgp_partner_module->group_comm; - ompi_request_t **requests = - ptpcoll_module->ml_mem.ml_buf_desc[buffer_index].requests; - void *data_buffer = (void *) ( - (unsigned char *)input_args->sbuf + - (size_t)input_args->sbuf_offset); - int count = input_args->count * input_args->dtype->super.size; - int *iteration = - &(ptpcoll_module->ml_mem.ml_buf_desc[buffer_index].iteration); - int *radix_mask_pow = - &(ptpcoll_module->ml_mem.ml_buf_desc[buffer_index].radix_mask_pow); - int *active_requests = - &(ptpcoll_module->ml_mem.ml_buf_desc[buffer_index].active_requests); - size_t base_block_size = (count + ptpcoll_module->pow_2num - 1) / - ptpcoll_module->pow_2num; - int root_pow2 = ptpcoll_module->pow_2 - 1; - int *status = - &(ptpcoll_module->ml_mem.ml_buf_desc[buffer_index].status); - - PTPCOLL_VERBOSE(3, ("BCAST Anyroot, index_this_type %d, num_of_this_type %d", - const_args->index_of_this_type_in_collective + 1, - const_args->n_of_this_type_in_collective)); - - /* keep tag within the limit support by the pml */ - tag = (PTPCOLL_TAG_OFFSET + sequence_number * PTPCOLL_TAG_FACTOR) & (ptpcoll_module->tag_mask); - /* mark this as a collective tag, to avoid conflict with user-level flags */ - ptpcoll_module->ml_mem.ml_buf_desc[buffer_index].tag = tag = -tag; - /* reset active requests */ - *active_requests = 0; - /* reset iteration counter */ - *iteration = -1; - /* set initial status */ - *status = PTPCOLL_NOT_STARTED; - - PTPCOLL_VERBOSE(8, ("bcol_ptpcoll_bcast_binomial_scatter_gatther_known, buffer index: %d \n" - "tag: %d " - "tag_mask: %d " - "sn: %d " - "root: %d " - "pow_2: %d %d " - "buff: %p " - "radix: %d" - "block_size: %d", - buffer_index, tag, - ptpcoll_module->tag_mask, sequence_number, - input_args->root_flag, - ptpcoll_module->pow_2, ptpcoll_module->pow_2num, - data_buffer, - 2, - base_block_size)); - - /* we have a power 2 group */ - if (input_args->root_flag) { - - PTPCOLL_VERBOSE(10, ("I'm root of the data")); - /* for proxy we have little bit more work to do */ - if (PTPCOLL_PROXY & ptpcoll_module->pow_2type) { - /* send the all data to your extra peer */ - rc = MCA_PML_CALL(isend(data_buffer, count, MPI_BYTE, - group_list[ptpcoll_module->proxy_extra_index], tag - 1, - MCA_PML_BASE_SEND_STANDARD, comm, - &(requests[*active_requests]))); - if( OMPI_SUCCESS != rc ) { - PTPCOLL_VERBOSE(10, ("Failed to send data")); - return OMPI_ERROR; - } - *active_requests = 1; - } - /* - * I'm root of the operation - * send data to (k - 1) * log base k N neighbors - */ - K_NOMIAL_ROOT_BCAST_NB_BINOMIAL_SCATTER(root_pow2, - my_group_index, group_size, group_list, - data_buffer, base_block_size, count, tag, comm, requests, - active_requests); - - /* EXIT OR GO TO Gather */ - *iteration = 0; - *radix_mask_pow = ptpcoll_module->pow_2; - goto GATHER; - } - - /* <-- non root flow --> */ - /* prapare and post recv operation */ - group_src = bcol_ptpcoll_binomial_root_to_src(input_args->root_route->rank, - my_group_index, ptpcoll_module->pow_2num, - ptpcoll_module->group_size, &pow2_distance); - - assert(group_src >= 0); - - if (0 > pow2_distance) { - /* the rank is virtual root for this group, receive the data - and scatter gather as root */ - PTPCOLL_VERBOSE(10, ("Virtual root %d , set mask to %d", my_group_index, ptpcoll_module->pow_2)); - *radix_mask_pow = ptpcoll_module->pow_2; - curr_data_buffer = data_buffer; - recv_count = count; - } else { - int my_left_boundary_rank; - recv_count = base_block_size * (1 << pow2_distance); /* we may receive larger data */ - my_left_boundary_rank = my_group_index & ((~(int)0) << pow2_distance ); - curr_data_buffer = (void *)((unsigned char *)data_buffer + - (size_t) base_block_size * my_left_boundary_rank); - *radix_mask_pow = pow2_distance; - } - - comm_root = group_list[group_src]; - - PTPCOLL_VERBOSE(10, ("Bcast, receive data from %d[%d], count %d, tag %d, addr %p", - comm_root, group_src, count, tag, data_buffer)); - - rc = MCA_PML_CALL(irecv(curr_data_buffer, recv_count, MPI_BYTE, comm_root, - tag, comm, &requests[*active_requests])); - if( OMPI_SUCCESS != rc ) { - PTPCOLL_VERBOSE(10, ("Failed to receive data")); - return OMPI_ERROR; - } - - ++(*active_requests); - - *status = PTPCOLL_WAITING_FOR_DATA; - rc = bcol_ptpcoll_bcast_binomial_test_and_scatter_known_root(ptpcoll_module, - buffer_index, data_buffer, count, base_block_size); - - if (BCOL_FN_COMPLETE != rc) { - PTPCOLL_VERBOSE(10, ("Not done. Return %d", rc)); - return rc; - } - - /* recv operation is done */ - - *iteration = 0; - -GATHER: - - *status = PTPCOLL_GATHER_STARTED; - rc = bcol_ptpcoll_bcast_binomial_gather_anyroot(ptpcoll_module, buffer_index, - data_buffer, count, base_block_size); - - if (BCOL_FN_COMPLETE != rc) { - assert(0 != *active_requests); - PTPCOLL_VERBOSE(10, ("Not done. Return %d", rc)); - return rc; - } - - ++(*iteration); /* I need it for progress */ - - /* proxy case */ - if ((PTPCOLL_PROXY & ptpcoll_module->pow_2type) && - ! CHECK_IF_ROOT_OR_VROOT(ptpcoll_module, buffer_index)) { - *status = PTPCOLL_EXTRA_SEND_STARTED; - rc = bcol_ptpcoll_bcast_binomial_scatter_gatther_send_extra( - ptpcoll_module, - data_buffer, count, tag - 1, - ptpcoll_module->proxy_extra_index, comm, - active_requests, requests); - if (BCOL_FN_COMPLETE != rc) { - return rc; - } - } - - return BCOL_FN_COMPLETE; -} - -int bcol_ptpcoll_bcast_binomial_scatter_gatther_known_root_extra(bcol_function_args_t *input_args, - struct mca_bcol_base_function_t *const_args) -{ - mca_bcol_ptpcoll_module_t *ptpcoll_module = (mca_bcol_ptpcoll_module_t *)const_args->bcol_module; - - int tag; - int rc; - int completed = 0; /* not completed */ - uint32_t buffer_index = input_args->buffer_index; - - ompi_communicator_t* comm = ptpcoll_module->super.sbgp_partner_module->group_comm; - ompi_request_t **requests = - ptpcoll_module->ml_mem.ml_buf_desc[buffer_index].requests; - void *data_buffer = (void *) ( - (unsigned char *)input_args->sbuf + - (size_t)input_args->sbuf_offset); - int count = input_args->count * input_args->dtype->super.size; - int *iteration = - &(ptpcoll_module->ml_mem.ml_buf_desc[buffer_index].iteration); - int *active_requests = - &(ptpcoll_module->ml_mem.ml_buf_desc[buffer_index].active_requests); - int *group_list = ptpcoll_module->super.sbgp_partner_module->group_list; - - PTPCOLL_VERBOSE(3, ("BCAST known root, index_this_type %d, num_of_this_type %d", - const_args->index_of_this_type_in_collective + 1, - const_args->n_of_this_type_in_collective)); - - /* keep tag within the limit support by the pml */ - tag = (PTPCOLL_TAG_OFFSET + input_args->sequence_num * PTPCOLL_TAG_FACTOR) & (ptpcoll_module->tag_mask); - /* mark this as a collective tag, to avoid conflict with user-level flags */ - tag = -tag; - /* reset active requests */ - *active_requests = 0; - /* reset iteration counter */ - *iteration = -1; - - PTPCOLL_VERBOSE(8, ("bcol_ptpcoll_bcast_k_nomial_anyroot extra, buffer index: %d \n" - "tag: %d " - "tag_mask: %d " - "sn: %d " - "root: %d " - "pow_k: %d %d " - "buff: %p " - "radix: %d" , - buffer_index, tag, - ptpcoll_module->tag_mask, input_args->sequence_num, - input_args->root_flag, - ptpcoll_module->pow_k, ptpcoll_module->pow_knum, - data_buffer, - 2 - )); - - /* we have a power 2 group */ - if (input_args->root_flag) { - PTPCOLL_VERBOSE(10, ("I'm EXTRA root of the data")); - /* send the all data to your proxy peer */ - rc = MCA_PML_CALL(isend(data_buffer, count, MPI_BYTE, - group_list[ptpcoll_module->proxy_extra_index], tag, - MCA_PML_BASE_SEND_STANDARD, comm, - &(requests[*active_requests]))); - if( OMPI_SUCCESS != rc ) { - PTPCOLL_VERBOSE(10, ("Failed to send data")); - return OMPI_ERROR; - } - ++(*active_requests); - - completed = mca_bcol_ptpcoll_test_all_for_match(active_requests, requests, &rc); - if (0 == completed) { - /* we have to store the iteration number somewhere */ - return (OMPI_SUCCESS != rc) ? rc : BCOL_FN_STARTED; - } - } else { - rc = MCA_PML_CALL(irecv(data_buffer, count, MPI_BYTE, - group_list[ptpcoll_module->proxy_extra_index], - tag - 1, comm, &requests[*active_requests])); - ++(*active_requests); - - completed = mca_bcol_ptpcoll_test_all_for_match(active_requests, requests, &rc); - if (0 == completed) { - PTPCOLL_VERBOSE(10, ("Test was not matched - %d", rc)); - return (OMPI_SUCCESS != rc) ? rc : BCOL_FN_STARTED; - } - } - - return BCOL_FN_COMPLETE; -} - -int bcol_ptpcoll_bcast_binomial_scatter_gatther_known_root_extra_progress(bcol_function_args_t *input_args, - struct mca_bcol_base_function_t *const_args) -{ - int rc; - int completed = 0; /* not completed */ - mca_bcol_ptpcoll_module_t *ptpcoll_module = (mca_bcol_ptpcoll_module_t *)const_args->bcol_module; - ompi_request_t **requests = - ptpcoll_module->ml_mem.ml_buf_desc[input_args->buffer_index].requests; - uint32_t buffer_index = input_args->buffer_index; - int *active_requests = - &(ptpcoll_module->ml_mem.ml_buf_desc[buffer_index].active_requests); - - PTPCOLL_VERBOSE(10, ("bcol_ptpcoll_bcast_binomial_known_root_extra_progress extra, was called\n")); - - completed = mca_bcol_ptpcoll_test_all_for_match(active_requests, requests, &rc); - if (0 == completed) { - return (OMPI_SUCCESS != rc) ? rc : BCOL_FN_STARTED; - } - - return BCOL_FN_COMPLETE; -} - -static int bcol_ptpcoll_bcast_narray_knomial_scatter_gatther_known_root_progress( - bcol_function_args_t *input_args, struct mca_bcol_base_function_t *const_args) -{ - mca_bcol_ptpcoll_module_t *ptpcoll_module = (mca_bcol_ptpcoll_module_t *)const_args->bcol_module; - - int rc; - int completed = 0; /* not completed */ - int my_group_index = ptpcoll_module->super.sbgp_partner_module->my_index; - uint32_t buffer_index = input_args->buffer_index; - - ompi_request_t **requests = - ptpcoll_module->ml_mem.ml_buf_desc[buffer_index].requests; - void *data_buffer = (void *) ( - (unsigned char *)input_args->sbuf + - (size_t)input_args->sbuf_offset); - int count = input_args->count * input_args->dtype->super.size; - int *iteration = - &(ptpcoll_module->ml_mem.ml_buf_desc[buffer_index].iteration); - int *active_requests = - &(ptpcoll_module->ml_mem.ml_buf_desc[buffer_index].active_requests); - int tag = ptpcoll_module->ml_mem.ml_buf_desc[buffer_index].tag; - ompi_communicator_t* comm = ptpcoll_module->super.sbgp_partner_module->group_comm; - int *status = - &(ptpcoll_module->ml_mem.ml_buf_desc[buffer_index].status); - int relative_group_index, - group_root_index = 0; - int group_size = ptpcoll_module->full_narray_tree_size; - - PTPCOLL_VERBOSE(8, ("bcol_ptpcoll_bcast_narray_knomial_scatter_gatther_known_root_progress, buffer index: %d " - "tag: %d " - "tag_mask: %d " - "root: %d " - "buff: %p " - "radix: %d" - , buffer_index, tag, - ptpcoll_module->tag_mask, - input_args->root_flag, - data_buffer, - ptpcoll_module->narray_knomial_proxy_num - )); - - if (input_args->root_flag || - /* virtual root case */ - (input_args->root_route->rank >= group_size && - my_group_index == (input_args->root_route->rank - group_size) / - mca_bcol_ptpcoll_component.narray_knomial_radix)) { - relative_group_index = 0; - group_root_index = my_group_index; - } else { - if (input_args->root_route->rank >= group_size) { - group_root_index = (input_args->root_route->rank - group_size) / - mca_bcol_ptpcoll_component.narray_knomial_radix; - } else { - group_root_index = input_args->root_route->rank; - } - relative_group_index = my_group_index - group_root_index; - if (relative_group_index < 0) { - relative_group_index += group_size; - } - } - - switch(*status) { - case PTPCOLL_WAITING_FOR_DATA: - PTPCOLL_VERBOSE(10, ("Probe for the data")); - rc = bcol_ptpcoll_bcast_narray_test_and_scatter_known_root(ptpcoll_module, - buffer_index, data_buffer, count, group_root_index, - relative_group_index); - - if (BCOL_FN_COMPLETE != rc) { - assert(0 != *active_requests); - PTPCOLL_VERBOSE(10, ("Not done. Return %d", rc)); - return rc; - } - *iteration = 0; - *status = PTPCOLL_GATHER_STARTED; - break; - case PTPCOLL_ROOT_SEND_STARTED: - case PTPCOLL_GATHER_STARTED: - completed = mca_bcol_ptpcoll_test_all_for_match(active_requests, requests, &rc); - if (0 == completed) { - PTPCOLL_VERBOSE(10, ("Not done, have to complete %d, Return %d", *active_requests, rc)); - return (OMPI_SUCCESS != rc) ? rc : BCOL_FN_STARTED; - } - ++(*iteration); /* start from next iteration */ - PTPCOLL_VERBOSE(10, ("Outstanding operation was comleted, starting next one ! %d", *iteration)); - break; - case PTPCOLL_EXTRA_SEND_STARTED: - completed = mca_bcol_ptpcoll_test_all_for_match(active_requests, requests, &rc); - if (0 == completed) { - PTPCOLL_VERBOSE(10, ("Not done, have to complete %d, Return %d", *active_requests, rc)); - return (OMPI_SUCCESS != rc) ? rc : BCOL_FN_STARTED; - } - return BCOL_FN_COMPLETE; - default: - PTPCOLL_VERBOSE(10, ("Unknown status %d", *status)); - return OMPI_ERROR; - } - - PTPCOLL_VERBOSE(10, ("Stating PR_GATHER")); - /* Gather, continue the recoursive doubling iterations */ - rc = bcol_ptpcoll_bcast_narray_knomial_gather(ptpcoll_module, - buffer_index, data_buffer, count, - relative_group_index); - if (BCOL_FN_COMPLETE != rc) { - assert(0 != *active_requests); - PTPCOLL_VERBOSE(10, ("Not done. Return %d", rc)); - return rc; - } - PTPCOLL_VERBOSE(10, ("PR_GATHER done")); - - /* it the process is proxy , it has to send full - message to remote peer */ - if ((PTPCOLL_PROXY & ptpcoll_module->narray_type) && - !input_args->root_flag) { - *status = PTPCOLL_EXTRA_SEND_STARTED; - rc = bcol_ptpcoll_send_n_extra( - ptpcoll_module, - data_buffer, count, tag - 1, - ptpcoll_module->narray_knomial_proxy_extra_index, - ptpcoll_module->narray_knomial_proxy_num, - input_args->root_route->rank, - comm, active_requests, requests); - if (BCOL_FN_COMPLETE != rc) { - return rc; - } - } - - /* return */ - return BCOL_FN_COMPLETE; -} - - -static int bcol_ptpcoll_bcast_narray_knomial_scatter_gatther_known_root(bcol_function_args_t *input_args, - struct mca_bcol_base_function_t *const_args) -{ - mca_bcol_ptpcoll_module_t *ptpcoll_module = (mca_bcol_ptpcoll_module_t *)const_args->bcol_module; - - int tag, rc, i; - int my_group_index = ptpcoll_module->super.sbgp_partner_module->my_index; - int data_src, offset, - comm_root; - int *group_list = ptpcoll_module->super.sbgp_partner_module->group_list; - void *curr_data_buffer; - int recv_count; - uint64_t sequence_number = input_args->sequence_num; - uint32_t buffer_index = input_args->buffer_index; - - ompi_communicator_t* comm = ptpcoll_module->super.sbgp_partner_module->group_comm; - ompi_request_t **requests = - ptpcoll_module->ml_mem.ml_buf_desc[buffer_index].requests; - void *data_buffer = (void *) ( - (unsigned char *)input_args->sbuf + - (size_t)input_args->sbuf_offset); - int count = input_args->count * input_args->dtype->super.size; - int *iteration = - &(ptpcoll_module->ml_mem.ml_buf_desc[buffer_index].iteration); - int *active_requests = - &(ptpcoll_module->ml_mem.ml_buf_desc[buffer_index].active_requests); - size_t base_block_size = 0; - int *status = - &(ptpcoll_module->ml_mem.ml_buf_desc[buffer_index].status); - int relative_group_index, - group_root_index; - int group_size = ptpcoll_module->full_narray_tree_size; - int completed = 0; - int virtual_root; - netpatterns_narray_knomial_tree_node_t *narray_knomial_node = NULL; - netpatterns_narray_knomial_tree_node_t *narray_node = NULL; - - PTPCOLL_VERBOSE(3, ("BCAST Anyroot, index_this_type %d, num_of_this_type %d", - const_args->index_of_this_type_in_collective + 1, - const_args->n_of_this_type_in_collective)); - - /* keep tag within the limit support by the pml */ - tag = (PTPCOLL_TAG_OFFSET + sequence_number * PTPCOLL_TAG_FACTOR) & (ptpcoll_module->tag_mask); - /* mark this as a collective tag, to avoid conflict with user-level flags */ - ptpcoll_module->ml_mem.ml_buf_desc[buffer_index].tag = tag = -tag; - /* reset radix mask, it used to keep last block size */ - ptpcoll_module->ml_mem.ml_buf_desc[buffer_index].radix_mask = 1; - /* reset active requests */ - *active_requests = 0; - /* reset iteration counter */ - *iteration = -1; - /* set initial status */ - *status = PTPCOLL_NOT_STARTED; - - PTPCOLL_VERBOSE(8, ("bcol_ptpcoll_bcast_narray_knomial_scatter_gatther_known_root, buffer index: %d " - "tag: %d " - "tag_mask: %d " - "sn: %d " - "root: %d " - "buff: %p " - "radix: %d" - ,buffer_index, tag, - ptpcoll_module->tag_mask, sequence_number, - input_args->root_flag, - data_buffer, - ptpcoll_module->narray_knomial_proxy_num - )); - - /* we have a power 2 group */ - if (input_args->root_flag) { - PTPCOLL_VERBOSE(10, ("I'm root of the data")); - narray_knomial_node = &ptpcoll_module->narray_knomial_node[0]; - relative_group_index = 0; - group_root_index = my_group_index; - - /* for proxy we have little bit more work to do */ - if (PTPCOLL_PROXY & ptpcoll_module->narray_type) { - /* send the all data to your extra peer */ - for (i = 0; i < ptpcoll_module->narray_knomial_proxy_num; ++i) { - PTPCOLL_VERBOSE(9, ("Extra send %d, dst %d, tag %d", - i, ptpcoll_module->narray_knomial_proxy_extra_index[i], tag - 1)); - rc = MCA_PML_CALL(isend(data_buffer, count, MPI_BYTE, - group_list[ptpcoll_module->narray_knomial_proxy_extra_index[i]], - tag - 1, - MCA_PML_BASE_SEND_STANDARD, comm, - &(requests[*active_requests]))); - if( OMPI_SUCCESS != rc ) { - PTPCOLL_VERBOSE(10, ("Failed to send data")); - return OMPI_ERROR; - } - ++(*active_requests); - } - } - /* - * I'm root of the operation - * send data to radix_k neighbors - */ - base_block_size = NARRAY_BLOCK_SIZE(count, ptpcoll_module, - narray_knomial_node->level_size); - - NARRAY_SCATTER_B(narray_knomial_node, my_group_index, - group_size, data_buffer, - base_block_size, count, tag, comm, requests, - active_requests, completed); - if (0 == completed) { - *status = PTPCOLL_ROOT_SEND_STARTED; - return BCOL_FN_STARTED; - } - goto EXIT; - } - - /* <-- non root flow --> */ - group_root_index = input_args->root_route->rank; - - if (group_root_index >= group_size) { - /* calculate virtual root */ - virtual_root = - (group_root_index - group_size) / - mca_bcol_ptpcoll_component.narray_knomial_radix; - if (my_group_index == virtual_root) { - PTPCOLL_VERBOSE(10, ("I'm virtual root of the data")); - - rc = MCA_PML_CALL(irecv(data_buffer, count, MPI_BYTE, - group_list[group_root_index], - tag, comm, &requests[*active_requests])); - if( OMPI_SUCCESS != rc ) { - PTPCOLL_VERBOSE(10, ("Failed to receive data")); - return OMPI_ERROR; - } - ++(*active_requests); - /* act like a root */ - relative_group_index = 0; - group_root_index = my_group_index; - goto SCATTER; - } - group_root_index = virtual_root; - } - - relative_group_index = my_group_index - group_root_index; - if (relative_group_index < 0) { - relative_group_index += group_size; - } - - narray_node = &ptpcoll_module->narray_knomial_node[relative_group_index]; - - data_src = narray_node->parent_rank + group_root_index; - if (data_src >= group_size) { - data_src -= group_size; - } - - comm_root = group_list[data_src]; - - recv_count = NARRAY_BLOCK_SIZE(count, ptpcoll_module, narray_node->level_size); - offset = recv_count * narray_node->rank_on_level; - /* make sure that we do not overun memory */ - if (OPAL_UNLIKELY(offset + recv_count > count)) { - recv_count = count - offset; - if (0 >= recv_count) { - goto GATHER; - } - } - - curr_data_buffer = (void *)((unsigned char *)data_buffer + (size_t)offset); - PTPCOLL_VERBOSE(10, ("Bcast, receive data from %d[%d], count %d, tag %d, addr %p len %d offset %d", - comm_root, data_src, count, tag, data_buffer, recv_count, offset)); - - rc = MCA_PML_CALL(irecv(curr_data_buffer, recv_count, MPI_BYTE, comm_root, - tag, comm, &requests[*active_requests])); - if( OMPI_SUCCESS != rc ) { - PTPCOLL_VERBOSE(10, ("Failed to receive data")); - return OMPI_ERROR; - } - - ++(*active_requests); - -SCATTER: - *status = PTPCOLL_WAITING_FOR_DATA; - - rc = bcol_ptpcoll_bcast_narray_test_and_scatter_known_root(ptpcoll_module, - buffer_index, data_buffer, - count, group_root_index, relative_group_index); - - if (BCOL_FN_COMPLETE != rc) { - PTPCOLL_VERBOSE(10, ("Not done. Return %d", rc)); - return rc; - } - -GATHER: - /* recv operation is done */ - *iteration = 0; - *status = PTPCOLL_GATHER_STARTED; - rc = bcol_ptpcoll_bcast_narray_knomial_gather(ptpcoll_module, - buffer_index, data_buffer, count, - relative_group_index); - if (BCOL_FN_COMPLETE != rc) { - assert(0 != *active_requests); - PTPCOLL_VERBOSE(10, ("Not done. Return %d", rc)); - return rc; - } - - ++(*iteration); /* I need it for progress */ - - /* proxy case */ - if ((PTPCOLL_PROXY & ptpcoll_module->narray_type) && - ! input_args->root_flag) { - *status = PTPCOLL_EXTRA_SEND_STARTED; - rc = bcol_ptpcoll_send_n_extra( - ptpcoll_module, - data_buffer, count, tag - 1, - ptpcoll_module->narray_knomial_proxy_extra_index, - ptpcoll_module->narray_knomial_proxy_num, - input_args->root_route->rank, - comm, active_requests, requests); - if (BCOL_FN_COMPLETE != rc) { - return rc; - } - } - -EXIT: - return BCOL_FN_COMPLETE; -} - -/* Pasha : need to move this code to some common function */ -static int bcol_ptpcoll_bcast_narray_knomial_scatter_gatther_known_root_extra(bcol_function_args_t *input_args, - struct mca_bcol_base_function_t *const_args) -{ - mca_bcol_ptpcoll_module_t *ptpcoll_module = (mca_bcol_ptpcoll_module_t *)const_args->bcol_module; - - int tag; - int rc; - int completed = 0; /* not completed */ - uint32_t buffer_index = input_args->buffer_index; - - ompi_communicator_t* comm = ptpcoll_module->super.sbgp_partner_module->group_comm; - ompi_request_t **requests = - ptpcoll_module->ml_mem.ml_buf_desc[buffer_index].requests; - void *data_buffer = (void *) ( - (unsigned char *)input_args->sbuf + - (size_t)input_args->sbuf_offset); - int count = input_args->count * input_args->dtype->super.size; - int *iteration = - &(ptpcoll_module->ml_mem.ml_buf_desc[buffer_index].iteration); - int *active_requests = - &(ptpcoll_module->ml_mem.ml_buf_desc[buffer_index].active_requests); - int *group_list = ptpcoll_module->super.sbgp_partner_module->group_list; - - PTPCOLL_VERBOSE(3, ("BCAST known root, index_this_type %d, num_of_this_type %d", - const_args->index_of_this_type_in_collective + 1, - const_args->n_of_this_type_in_collective)); - - /* keep tag within the limit support by the pml */ - tag = (PTPCOLL_TAG_OFFSET + input_args->sequence_num * PTPCOLL_TAG_FACTOR) & (ptpcoll_module->tag_mask); - /* mark this as a collective tag, to avoid conflict with user-level flags */ - tag = -tag; - /* reset active requests */ - *active_requests = 0; - /* reset iteration counter */ - *iteration = -1; - - PTPCOLL_VERBOSE(8, ("bcol_ptpcoll_bcast_narray_knomial_scatter_gatther_known_root_extra, buffer index: %d " - "tag: %d " - "tag_mask: %d " - "sn: %d " - "root: %d " - "buff: %p " - ,buffer_index, tag, - ptpcoll_module->tag_mask, input_args->sequence_num, - input_args->root_flag, - data_buffer - )); - - /* we have a power 2 group */ - if (input_args->root_flag) { - PTPCOLL_VERBOSE(10, ("I'm EXTRA root of the data")); - /* send the all data to your proxy peer */ - rc = MCA_PML_CALL(isend(data_buffer, count, MPI_BYTE, - group_list[ptpcoll_module->narray_knomial_proxy_extra_index[0]], tag, - MCA_PML_BASE_SEND_STANDARD, comm, - &(requests[*active_requests]))); - if( OMPI_SUCCESS != rc ) { - PTPCOLL_VERBOSE(10, ("Failed to send data")); - return OMPI_ERROR; - } - ++(*active_requests); - - completed = mca_bcol_ptpcoll_test_all_for_match(active_requests, requests, &rc); - if (0 == completed) { - /* we have to store the iteration number somewhere */ - return (OMPI_SUCCESS != rc) ? rc : BCOL_FN_STARTED; - } - } else { - PTPCOLL_VERBOSE(9, ("Posting recive from %d tag %d", - ptpcoll_module->narray_knomial_proxy_extra_index[0], tag - 1)); - rc = MCA_PML_CALL(irecv(data_buffer, count, MPI_BYTE, - group_list[ptpcoll_module->narray_knomial_proxy_extra_index[0]], - tag - 1, comm, &requests[*active_requests])); - ++(*active_requests); - - completed = mca_bcol_ptpcoll_test_all_for_match(active_requests, requests, &rc); - if (0 == completed) { - PTPCOLL_VERBOSE(10, ("Test was not matched - %d", rc)); - return (OMPI_SUCCESS != rc) ? rc : BCOL_FN_STARTED; - } - } - - return BCOL_FN_COMPLETE; -} - -static int bcol_ptpcoll_bcast_known_root_extra_progress(bcol_function_args_t *input_args, - struct mca_bcol_base_function_t *const_args) -{ - int rc; - int completed = 0; /* not completed */ - mca_bcol_ptpcoll_module_t *ptpcoll_module = (mca_bcol_ptpcoll_module_t *)const_args->bcol_module; - ompi_request_t **requests = - ptpcoll_module->ml_mem.ml_buf_desc[input_args->buffer_index].requests; - uint32_t buffer_index = input_args->buffer_index; - int *active_requests = - &(ptpcoll_module->ml_mem.ml_buf_desc[buffer_index].active_requests); - - PTPCOLL_VERBOSE(10, ("bcol_ptpcoll_bcast_binomial_known_root_extra_progress extra, was called\n")); - - completed = mca_bcol_ptpcoll_test_all_for_match(active_requests, requests, &rc); - if (0 == completed) { - return (OMPI_SUCCESS != rc) ? rc : BCOL_FN_STARTED; - } - - PTPCOLL_VERBOSE(10, ("Test was matched - %d", rc)); - return BCOL_FN_COMPLETE; -} - - -static int bcol_ptpcoll_bcast_narray_progress(bcol_function_args_t *input_args, - struct mca_bcol_base_function_t *const_args) -{ - mca_bcol_ptpcoll_module_t *ptpcoll_module = (mca_bcol_ptpcoll_module_t *)const_args->bcol_module; - - int tag = -1; - int rc; - int group_size = ptpcoll_module->group_size; - int *group_list = ptpcoll_module->super.sbgp_partner_module->group_list; - uint32_t buffer_index = input_args->buffer_index; - - ompi_communicator_t* comm = ptpcoll_module->super.sbgp_partner_module->group_comm; - ompi_request_t **send_requests = - ptpcoll_module->ml_mem.ml_buf_desc[buffer_index].requests; - ompi_request_t **recv_request = - &ptpcoll_module->ml_mem.ml_buf_desc[buffer_index].requests[0]; - void *data_buffer = (void *) ( - (unsigned char *)input_args->sbuf + - (size_t)input_args->sbuf_offset); - int count = input_args->count * input_args->dtype->super.size; - int *active_requests = - &(ptpcoll_module->ml_mem.ml_buf_desc[buffer_index].active_requests); - int matched = true; - int my_group_index = ptpcoll_module->super.sbgp_partner_module->my_index; - int relative_group_index = 0; - netpatterns_tree_node_t *narray_node = NULL; - - PTPCOLL_VERBOSE(3, ("Bcast, Narray tree Progress")); - - - PTPCOLL_VERBOSE(8, ("bcol_ptpcoll_bcast_k_nomial_known_root, buffer index: %d " - "tag: %d " - "tag_mask: %d " - "sn: %d " - "root: %d [%d]" - "buff: %p ", - buffer_index, tag, - ptpcoll_module->tag_mask, input_args->sequence_num, - input_args->root_flag, input_args->root_route->rank, - data_buffer)); - - if (0 == *active_requests) { - int group_root_index = input_args->root_route->rank; - /* If the collective does not have any active requests, it - means the initial data was not received from parent. - Check if some data arrived - */ - if (0 == mca_bcol_ptpcoll_test_for_match(recv_request, &rc)) { - PTPCOLL_VERBOSE(10, ("Test was not matched - %d", rc)); - /* No data was received, return no match error */ - return (OMPI_SUCCESS != rc) ? rc : BCOL_FN_STARTED; - } - - /* set all paremetres */ - relative_group_index = my_group_index - group_root_index; - if (relative_group_index < 0) { - relative_group_index +=group_size; - } - narray_node = &ptpcoll_module->narray_node[relative_group_index]; - /* keep tag within the limit support by the pml */ - tag = (PTPCOLL_TAG_OFFSET + input_args->sequence_num * PTPCOLL_TAG_FACTOR) & (ptpcoll_module->tag_mask); - /* mark this as a collective tag, to avoid conflict with user-level flags */ - tag = -tag; - /* Bcast the data */ - NARRAY_BCAST_NB(narray_node, group_root_index, group_size, - data_buffer, count, tag, comm, send_requests, active_requests); - } - - /* All data was received and sent out. - Check if the completion arrived */ - matched = mca_bcol_ptpcoll_test_all_for_match - (active_requests, send_requests, &rc); - if (OMPI_SUCCESS != rc) { - return OMPI_ERROR; - } - - /* If it is last call, we have to recycle memory */ - if(matched) { - return BCOL_FN_COMPLETE; - } else { - PTPCOLL_VERBOSE(10, ("bcast root is started")); - return BCOL_FN_STARTED; - } -} - -static int bcol_ptpcoll_bcast_narray(bcol_function_args_t *input_args, - struct mca_bcol_base_function_t *const_args) -{ - mca_bcol_ptpcoll_module_t *ptpcoll_module = (mca_bcol_ptpcoll_module_t *)const_args->bcol_module; - - int tag; - int rc; - int data_src; - int group_size = ptpcoll_module->group_size; - int *group_list = ptpcoll_module->super.sbgp_partner_module->group_list; - uint32_t buffer_index = input_args->buffer_index; - - ompi_communicator_t* comm = ptpcoll_module->super.sbgp_partner_module->group_comm; - ompi_request_t **send_requests = - ptpcoll_module->ml_mem.ml_buf_desc[buffer_index].requests; - ompi_request_t **recv_request = - &ptpcoll_module->ml_mem.ml_buf_desc[buffer_index].requests[0]; - void *data_buffer = (void *) ( - (unsigned char *)input_args->sbuf + - (size_t)input_args->sbuf_offset); - int count = input_args->count * input_args->dtype->super.size; - int *active_requests = - &(ptpcoll_module->ml_mem.ml_buf_desc[buffer_index].active_requests); - int matched = true; - int my_group_index = ptpcoll_module->super.sbgp_partner_module->my_index; - int group_root_index; - int relative_group_index = 0; - netpatterns_tree_node_t *narray_node = NULL; - - PTPCOLL_VERBOSE(3, ("Bcast, Narray tree")); - - /* reset active request counter */ - (*active_requests) = 0; - /* keep tag within the limit support by the pml */ - tag = (PTPCOLL_TAG_OFFSET + input_args->sequence_num * PTPCOLL_TAG_FACTOR) & (ptpcoll_module->tag_mask); - /* mark this as a collective tag, to avoid conflict with user-level flags */ - tag = -tag; - - PTPCOLL_VERBOSE(8, ("bcol_ptpcoll_bcast_narray, buffer index: %d " - "tag: %d " - "tag_mask: %d " - "sn: %d " - "root: %d " - "buff: %p ", - buffer_index, tag, - ptpcoll_module->tag_mask, input_args->sequence_num, - input_args->root_flag, - data_buffer)); - - - if (input_args->root_flag) { - PTPCOLL_VERBOSE(10, ("I'm root of the data")); - narray_node = &ptpcoll_module->narray_node[0]; - group_root_index = my_group_index; - /* - * I'm root of the operation - * send data to N childrens - */ - goto NARRAY_BCAST_START; - } - - /* I'm not root */ - group_root_index = input_args->root_route->rank; - - relative_group_index = my_group_index - group_root_index; - if (relative_group_index < 0) { - relative_group_index += group_size; - } - - data_src = - ptpcoll_module->narray_node[relative_group_index].parent_rank + - group_root_index; - if (data_src >= group_size) { - data_src -= group_size; - } - - PTPCOLL_VERBOSE(10, ("Bcast, receive data from %d [%d], count %d, tag %d, addr %p", - group_list[data_src], data_src, - count, tag, data_buffer)); - - - rc = MCA_PML_CALL(irecv(data_buffer, count, MPI_BYTE, - group_list[data_src], - tag, comm, recv_request)); - if( OMPI_SUCCESS != rc ) { - PTPCOLL_VERBOSE(10, ("Failed to receive data")); - return OMPI_ERROR; - } - - /* We can not block. So run couple of test for data arrival */ - if (0 == mca_bcol_ptpcoll_test_for_match(recv_request, &rc)) { - PTPCOLL_VERBOSE(10, ("Test was not matched - %d", rc)); - /* No data was received, return no match error */ - return (OMPI_SUCCESS != rc) ? rc : BCOL_FN_STARTED; - } - - narray_node = &ptpcoll_module->narray_node[relative_group_index]; - -NARRAY_BCAST_START: - /* Bcast the data */ - NARRAY_BCAST_NB(narray_node, group_root_index, group_size, - data_buffer, count, tag, comm, send_requests, active_requests); - - matched = mca_bcol_ptpcoll_test_all_for_match - (active_requests, send_requests, &rc); - if (OMPI_SUCCESS != rc) { - return OMPI_ERROR; - } - - /* If it is last call, we have to recycle memory */ - if(matched) { - return BCOL_FN_COMPLETE; - } else { - PTPCOLL_VERBOSE(10, ("bcast root is started")); - return BCOL_FN_STARTED; - } -} - -int bcol_ptpcoll_bcast_init(mca_bcol_base_module_t *super) -{ - mca_bcol_ptpcoll_module_t *ptpcoll_module = - (mca_bcol_ptpcoll_module_t *) super; - - mca_bcol_base_coll_fn_comm_attributes_t comm_attribs; - mca_bcol_base_coll_fn_invoke_attributes_t inv_attribs; - - comm_attribs.bcoll_type = BCOL_BCAST; - comm_attribs.comm_size_min = 0; - comm_attribs.comm_size_max = 1024 * 1024; - comm_attribs.waiting_semantics = NON_BLOCKING; - - inv_attribs.bcol_msg_min = 0; - inv_attribs.bcol_msg_max = 20000; /* range 1 */ - - inv_attribs.datatype_bitmap = 0xffffffff; - inv_attribs.op_types_bitmap = 0xffffffff; - - - comm_attribs.data_src = DATA_SRC_UNKNOWN; - - if(PTPCOLL_KN_EXTRA == ptpcoll_module->pow_ktype) { - mca_bcol_base_set_attributes(super, &comm_attribs, &inv_attribs, - bcol_ptpcoll_bcast_k_nomial_extra_known_and_anyroot, - bcol_ptpcoll_bcast_k_nomial_extra_known_and_anyroot_progress); - } else { - mca_bcol_base_set_attributes(super, &comm_attribs, &inv_attribs, - bcol_ptpcoll_bcast_k_nomial_anyroot, - bcol_ptpcoll_bcast_k_nomial_anyroot_progress); - } - - comm_attribs.data_src = DATA_SRC_KNOWN; - switch(mca_bcol_ptpcoll_component.bcast_small_messages_known_root_alg) { - case PTPCOLL_KNOMIAL: - if(PTPCOLL_KN_EXTRA == ptpcoll_module->pow_ktype) { - mca_bcol_base_set_attributes(super, &comm_attribs, &inv_attribs, - bcol_ptpcoll_bcast_k_nomial_extra_known_and_anyroot, - bcol_ptpcoll_bcast_k_nomial_extra_known_and_anyroot_progress); - } else { - mca_bcol_base_set_attributes(super, &comm_attribs, &inv_attribs, - bcol_ptpcoll_bcast_k_nomial_known_root, - bcol_ptpcoll_bcast_k_nomial_known_root_progress); - } - break; - case PTPCOLL_NARRAY: - mca_bcol_base_set_attributes(super, &comm_attribs, &inv_attribs, - bcol_ptpcoll_bcast_narray, - bcol_ptpcoll_bcast_narray_progress); - break; - default: - PTPCOLL_ERROR(("Unknown algorithm index was selected %", - mca_bcol_ptpcoll_component.bcast_small_messages_known_root_alg)); - return OMPI_ERROR; - } - - comm_attribs.data_src = DATA_SRC_UNKNOWN; - inv_attribs.bcol_msg_min = 10000000; - inv_attribs.bcol_msg_max = 10485760; /* range 4 */ - - /* Anyroot large messages functions registration */ - - if (PTPCOLL_EXTRA == ptpcoll_module->pow_2type) { - mca_bcol_base_set_attributes(super, &comm_attribs, &inv_attribs, - bcol_ptpcoll_bcast_binomial_scatter_gatther_anyroot_extra, - bcol_ptpcoll_bcast_binomial_scatter_gatther_anyroot_extra_progress); - } else { - mca_bcol_base_set_attributes(super, &comm_attribs, &inv_attribs, - bcol_ptpcoll_bcast_binomial_scatter_gatther_anyroot, - bcol_ptpcoll_bcast_binomial_scatter_gatther_anyroot_progress); - } - - /* Known-root large messages functions registration */ - - comm_attribs.data_src = DATA_SRC_KNOWN; - switch(mca_bcol_ptpcoll_component.bcast_large_messages_known_root_alg) { - case PTPCOLL_BINOMIAL_SG: - if (PTPCOLL_EXTRA == ptpcoll_module->pow_2type) { - mca_bcol_base_set_attributes(super, &comm_attribs, &inv_attribs, - bcol_ptpcoll_bcast_binomial_scatter_gatther_known_root_extra, - bcol_ptpcoll_bcast_known_root_extra_progress); - /* bcol_ptpcoll_bcast_binomial_scatter_gatther_known_root_extra_progress); */ - } else { - mca_bcol_base_set_attributes(super, &comm_attribs, &inv_attribs, - bcol_ptpcoll_bcast_binomial_scatter_gatther_known_root, - bcol_ptpcoll_bcast_binomial_scatter_gatther_known_root_progress); - } - break; - case PTPCOLL_NARRAY_KNOMIAL_SG: - if (PTPCOLL_EXTRA == ptpcoll_module->narray_type) { - mca_bcol_base_set_attributes(super, &comm_attribs, &inv_attribs, - bcol_ptpcoll_bcast_narray_knomial_scatter_gatther_known_root_extra, - bcol_ptpcoll_bcast_known_root_extra_progress); - } else { - mca_bcol_base_set_attributes(super, &comm_attribs, &inv_attribs, - bcol_ptpcoll_bcast_narray_knomial_scatter_gatther_known_root, - bcol_ptpcoll_bcast_narray_knomial_scatter_gatther_known_root_progress); - } - break; - default: - PTPCOLL_ERROR(("Unknown algorithm index was selected %", - mca_bcol_ptpcoll_component.bcast_large_messages_known_root_alg)); - return OMPI_ERROR; - } - - return OMPI_SUCCESS; -} diff --git a/ompi/mca/bcol/ptpcoll/bcol_ptpcoll_bcast.h b/ompi/mca/bcol/ptpcoll/bcol_ptpcoll_bcast.h deleted file mode 100644 index 74180bbcf9d..00000000000 --- a/ompi/mca/bcol/ptpcoll/bcol_ptpcoll_bcast.h +++ /dev/null @@ -1,865 +0,0 @@ -/* - * Copyright (c) 2009-2012 Oak Ridge National Laboratory. All rights reserved. - * Copyright (c) 2009-2012 Mellanox Technologies. All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#ifndef MCA_BCOL_PTPCOLL_BCAST_H -#define MCA_BCOL_PTPCOLL_BCAST_H - -#include "ompi_config.h" -#include "bcol_ptpcoll.h" -#include "bcol_ptpcoll_utils.h" - -BEGIN_C_DECLS - -int bcol_ptpcoll_bcast_init(mca_bcol_base_module_t *super); - -int bcol_ptpcoll_bcast_k_nomial_anyroot (bcol_function_args_t *input_args, - struct mca_bcol_base_function_t *const_args); -int bcol_ptpcoll_bcast_k_nomial_anyroot_progress(bcol_function_args_t *input_args, - struct mca_bcol_base_function_t *const_args); - -int bcol_ptpcoll_bcast_k_nomial_known_root(bcol_function_args_t *input_args, - struct mca_bcol_base_function_t *const_args); -int bcol_ptpcoll_bcast_k_nomial_known_root_progress(bcol_function_args_t *input_args, - struct mca_bcol_base_function_t *const_args); - -int bcol_ptpcoll_bcast_binomial_scatter_gatther_anyroot(bcol_function_args_t *input_args, - struct mca_bcol_base_function_t *const_args); -int bcol_ptpcoll_bcast_binomial_scatter_gatther_anyroot_progress(bcol_function_args_t *input_args, - struct mca_bcol_base_function_t *const_args); - -int bcol_ptpcoll_bcast_binomial_scatter_gatther_anyroot_extra(bcol_function_args_t *input_args, - struct mca_bcol_base_function_t *const_args); -int bcol_ptpcoll_bcast_binomial_scatter_gatther_anyroot_extra_progress(bcol_function_args_t *input_args, - struct mca_bcol_base_function_t *const_args); - -int bcol_ptpcoll_bcast_binomial_scatter_gatther_known_root(bcol_function_args_t *input_args, - struct mca_bcol_base_function_t *const_args); -int bcol_ptpcoll_bcast_binomial_scatter_gatther_known_root_progress(bcol_function_args_t *input_args, - struct mca_bcol_base_function_t *const_args); - -int bcol_ptpcoll_bcast_binomial_scatter_gatther_known_root_extra(bcol_function_args_t *input_args, - struct mca_bcol_base_function_t *const_args); -int bcol_ptpcoll_bcast_binomial_scatter_gatther_known_root_extra_progress(bcol_function_args_t *input_args, - struct mca_bcol_base_function_t *const_args); - - -/* macros */ -#define K_NOMIAL_ROOT_BCAST_NB_BINOMIAL_SCATTER( \ - radix_mask_pow, \ - my_group_index, group_size, group_list, \ - data_buffer, segment_size, count, tag, \ - comm, send_requests, num_pending_sends) \ -do { \ - int rc = OMPI_SUCCESS; \ - int dst; \ - int comm_dst; \ - int send_size; \ - int send_offset; \ - int delta; \ - int dst_boundary_rank; \ - int radix_mask = radix_mask_pow >= 0 ? 1 << radix_mask_pow : 0; \ - \ - while(radix_mask > 0) { \ - /* For each level of tree, do sends */ \ - dst = my_group_index ^ radix_mask; \ - comm_dst = group_list[dst]; \ - \ - dst_boundary_rank = dst & ((~(int)0) << (radix_mask_pow)); \ - \ - send_offset = segment_size * dst_boundary_rank; \ - /* Pasha: make sure that we handle the corner cases */ \ - delta = count - send_offset; \ - if (delta <= 0) { \ - send_size = 0; /* we have to send something, other way it will hang */ \ - } else { \ - /* the tail case */ \ - send_size = (int) \ - (delta - (int)segment_size * radix_mask) < 0 ? delta : \ - (int)segment_size * radix_mask; \ - } \ - \ - /* Non blocking send .... */ \ - PTPCOLL_VERBOSE(9 , \ - ("Bcast p2s, Isend to %d[%d],count %d,tag %d,addr %p [%p] send_size %d,send_offset %d, radix %d %d",\ - dst, comm_dst, count, tag, \ - data_buffer, (void *)((unsigned char *)data_buffer + (size_t)send_offset), \ - send_size, \ - send_offset, \ - radix_mask, \ - radix_mask_pow \ - )); \ - rc = MCA_PML_CALL(isend((void *)((unsigned char *)data_buffer + (size_t)send_offset), \ - send_size, MPI_BYTE, \ - comm_dst, tag, \ - MCA_PML_BASE_SEND_STANDARD, comm, \ - &(send_requests[*num_pending_sends]))); \ - PTPCOLL_VERBOSE(10, ("send request addr is %p", send_requests[*num_pending_sends])); \ - if( OMPI_SUCCESS != rc ) { \ - PTPCOLL_VERBOSE(10, ("Failed to isend data")); \ - return OMPI_ERROR; \ - } \ - ++(*num_pending_sends); \ - radix_mask >>= 1; \ - radix_mask_pow--; \ - } \ -} while(0) - -#define NARRAY_SCATTER_NB(narray_node, process_shift, group_size, \ - data_buffer, base_block_size, count, tag, comm, send_requests, \ - num_pending_sends) \ -do { \ - int n, rc = OMPI_SUCCESS; \ - int dst; \ - int comm_dst; \ - int offset; \ - int size_count = count; \ - \ - /* Send out data to all relevant childrens */ \ - for (n = 0; n < narray_node->n_children && size_count > 0; n++) { \ - \ - dst = narray_node->children_ranks[n] + process_shift; \ - if (dst >= group_size) { \ - dst -= group_size; \ - } \ - \ - comm_dst = group_list[dst]; \ - offset = n * base_block_size; \ - size_count -= base_block_size; \ - if (OPAL_UNLIKELY(size_count < 0)) { \ - count = base_block_size + size_count; \ - } else { \ - count = base_block_size; \ - } \ - \ - /* Non blocking send .... */ \ - PTPCOLL_VERBOSE(9 , ("Bcast, Isend data to %d[%d], count %d, tag %d, addr %p", \ - dst, comm_dst, count, tag, \ - data_buffer)); \ - rc = MCA_PML_CALL(isend((void *)((char *)data_buffer + (size_t)offset), count, MPI_BYTE,\ - comm_dst, tag, \ - MCA_PML_BASE_SEND_STANDARD, comm, \ - &(send_requests[*num_pending_sends]))); \ - if( OMPI_SUCCESS != rc ) { \ - PTPCOLL_VERBOSE(10, ("Failed to isend data")); \ - return OMPI_ERROR; \ - } \ - ++(*num_pending_sends); \ - } \ -} while(0) - -#define NARRAY_SCATTER_B(narray_node, process_shift, group_size, \ - data_buffer, base_block_size, count, tag, comm, send_requests, \ - num_pending_sends, completed) \ -do { \ - NARRAY_SCATTER_NB(narray_node, process_shift, group_size, \ - data_buffer, base_block_size, count, tag, comm, send_requests, \ - num_pending_sends); \ - if (*num_pending_sends > 0) { \ - completed = mca_bcol_ptpcoll_test_all_for_match(num_pending_sends, send_requests, &rc); \ - if (OMPI_SUCCESS != rc) { \ - return OMPI_ERROR; \ - } \ - } else { \ - completed = 1; \ - } \ -} while (0) - -#define CHECK_IF_ROOT_OR_VROOT(module, i) \ - (module->pow_2 == module->ml_mem.ml_buf_desc[i].radix_mask_pow) - -/* inline functions */ -static inline __opal_attribute_always_inline__ -int bcol_ptpcoll_bcast_binomial_scatter_gatther_send_extra( - mca_bcol_ptpcoll_module_t *ptpcoll_module, - void *data_buffer, int count, int tag, - int extra_peer, ompi_communicator_t *comm, - int *active_requests, ompi_request_t **requests) -{ - int rc = OMPI_SUCCESS; - int completed = 0; - int *group_list = ptpcoll_module->super.sbgp_partner_module->group_list; - - /* tag is -1 already */ - /* send the all data to your extra peer */ - PTPCOLL_VERBOSE(10, ("bcol_ptpcoll_bcast_binomial_scatter_gatther_send_extra to %d tag %d", - extra_peer, tag)); - rc = MCA_PML_CALL(isend(data_buffer, count, MPI_BYTE, - group_list[extra_peer], tag, - MCA_PML_BASE_SEND_STANDARD, comm, - &(requests[*active_requests]))); - if( OMPI_SUCCESS != rc ) { - PTPCOLL_VERBOSE(10, ("Failed to send data")); - return OMPI_ERROR; - } - - ++(*active_requests); - - completed = mca_bcol_ptpcoll_test_all_for_match(active_requests, requests, &rc); - if (0 == completed) { - PTPCOLL_VERBOSE(10, ("PR Extra send was not completed")); - /* we have to store the iteration number somewhere */ - return (OMPI_SUCCESS != rc) ? rc : BCOL_FN_STARTED; - } - - return BCOL_FN_COMPLETE; -} - -static inline __opal_attribute_always_inline__ -int bcol_ptpcoll_send_n_extra(mca_bcol_ptpcoll_module_t *ptpcoll_module, - void *data_buffer, int count, int tag, - int *extra_peers, int num_peers, int skip, - ompi_communicator_t *comm, - int *active_requests, ompi_request_t **requests) -{ - int rc = OMPI_SUCCESS; - int completed = 0; - int i; - int *group_list = ptpcoll_module->super.sbgp_partner_module->group_list; - - /* send the all data to your extra peer */ - for (i = 0; i < num_peers; i++) { - PTPCOLL_VERBOSE(10, ("send_n_extra to %d tag %d", - extra_peers[i], tag)); - if (extra_peers[i] == skip) { - PTPCOLL_VERBOSE(10, ("SKIP")); - continue; - } - - rc = MCA_PML_CALL(isend(data_buffer, count, MPI_BYTE, - group_list[extra_peers[i]], tag, - MCA_PML_BASE_SEND_STANDARD, comm, - &(requests[*active_requests]))); - if( OMPI_SUCCESS != rc ) { - PTPCOLL_VERBOSE(10, ("Failed to send data")); - return OMPI_ERROR; - } - - ++(*active_requests); - } - - completed = mca_bcol_ptpcoll_test_all_for_match(active_requests, requests, &rc); - if (0 == completed) { - PTPCOLL_VERBOSE(10, ("PR Extra send was not completed")); - /* we have to store the iteration number somewhere */ - return (OMPI_SUCCESS != rc) ? rc : BCOL_FN_STARTED; - } - - return BCOL_FN_COMPLETE; -} - -static inline __opal_attribute_always_inline__ -int bcol_ptpcoll_bcast_binomial_gather_anyroot(mca_bcol_ptpcoll_module_t *ptpcoll_module, - int buffer_index, void *data_buffer, int count, int base_block_size) -{ - int rc; - int completed = 0; /* not completed */ - int *active_requests = - &(ptpcoll_module->ml_mem.ml_buf_desc[buffer_index].active_requests); - int i; - int *iteration = - &ptpcoll_module->ml_mem.ml_buf_desc[buffer_index].iteration; - ompi_communicator_t* comm = ptpcoll_module->super.sbgp_partner_module->group_comm; - ompi_request_t **requests = - ptpcoll_module->ml_mem.ml_buf_desc[buffer_index].requests; - int my_group_index = ptpcoll_module->super.sbgp_partner_module->my_index; - void *curr_data_sbuffer = NULL, - *curr_data_rbuffer = NULL; - int radix_mask_pow = ptpcoll_module->ml_mem.ml_buf_desc[buffer_index].radix_mask_pow; - int delta; - int tag = ptpcoll_module->ml_mem.ml_buf_desc[buffer_index].tag - 1; - int *group_list = ptpcoll_module->super.sbgp_partner_module->group_list; - - PTPCOLL_VERBOSE(10, ("bcol_ptpcoll_bcast_binomial_gather_anyroot %d %d %d", - ptpcoll_module->ml_mem.ml_buf_desc[buffer_index].iteration, - ptpcoll_module->pow_2, - 1 << ptpcoll_module->pow_2)); - - /* we assume the iteration #iteration already was completed with probe */ - for (i = ptpcoll_module->ml_mem.ml_buf_desc[buffer_index].iteration; - i < ptpcoll_module->pow_2; i++) { - int pow2 = 1 << i; - int peer_index = my_group_index ^ pow2; - int comm_rank = group_list[peer_index]; - int slen, rlen, - send_offset, - recv_offset; - - if (i > radix_mask_pow) { - /* *active_requests = 0; */ - /* send - receive data from the peer */ - slen = rlen = pow2 * base_block_size; - send_offset = base_block_size * ((my_group_index) & ((~(int)0) << i)); - recv_offset = base_block_size * ((peer_index) & ((~(int)0) << i)); - curr_data_sbuffer = (void *)((unsigned char *)data_buffer + send_offset); - curr_data_rbuffer = (void *)((unsigned char *)data_buffer + recv_offset); - - delta = count - recv_offset; - if (delta > 0) { - if (delta < rlen) { - /* recv the tail */ - rlen = delta; - } - PTPCOLL_VERBOSE(10, ("[ pow2 %d, radix %d ] recv data %p (offset %d) , len %d , dest %d", - pow2, - 1 << ptpcoll_module->pow_2, - curr_data_rbuffer, - recv_offset, - rlen, - comm_rank)); - rc = MCA_PML_CALL(irecv(curr_data_rbuffer, rlen, MPI_BYTE, - comm_rank, tag, comm, &requests[*active_requests])); - if( OMPI_SUCCESS != rc ) { - PTPCOLL_VERBOSE(10, ("Failed to receive data")); - return OMPI_ERROR; - } - ++(*active_requests); - } - - delta = count - send_offset; - if (delta > 0) { - if (delta < slen) { - /* recv the tail */ - slen = delta; - } - PTPCOLL_VERBOSE(10, ("[ pow2 %d, radix %d ] sending data %p (offset %d) , len %d , dest %d", - pow2, - 1 << ptpcoll_module->pow_2, - curr_data_sbuffer, - send_offset, - slen, - comm_rank)); - rc = MCA_PML_CALL(isend(curr_data_sbuffer, slen, MPI_BYTE, - comm_rank, tag, - MCA_PML_BASE_SEND_STANDARD, comm, - &(requests[*active_requests]))); - if( OMPI_SUCCESS != rc ) { - PTPCOLL_VERBOSE(10, ("Failed to send data")); - return OMPI_ERROR; - } - ++(*active_requests); - } - - if (*active_requests > 0) { - completed = mca_bcol_ptpcoll_test_all_for_match(active_requests, requests, &rc); - if (0 == completed) { - *iteration = i; - /* we have to store the iteration number somewhere */ - return (OMPI_SUCCESS != rc) ? rc : BCOL_FN_STARTED; - } - } - } else if (i == radix_mask_pow) { - /* only receive data */ - rlen = pow2 * base_block_size; - recv_offset = base_block_size * ((peer_index) & ((~(int)0) << i)); - curr_data_rbuffer = (void *)((unsigned char *)data_buffer + recv_offset); - delta = count - recv_offset; - if (0 >= delta) { - /* we have nothing to send, skip the iteration */ - continue; - } - if (delta < rlen) { - /* recv the tail */ - rlen = delta; - } - /* receive data from the peer */ - PTPCOLL_VERBOSE(10, ("[ pow2 %d, radix %d ] recv data %p (offset %d) , len %d , dest %d", - pow2, - 1 << ptpcoll_module->pow_2, - curr_data_rbuffer, - recv_offset, - rlen, - comm_rank)); - rc = MCA_PML_CALL(irecv(curr_data_rbuffer, rlen, MPI_BYTE, - comm_rank, tag, comm, &(requests[*active_requests]))); - if( OMPI_SUCCESS != rc ) { - PTPCOLL_VERBOSE(10, ("Failed to receive data")); - return OMPI_ERROR; - } - ++(*active_requests); - completed = mca_bcol_ptpcoll_test_all_for_match(active_requests, requests, &rc); - if (0 == completed) { - *iteration = i; - PTPCOLL_VERBOSE(10, ("Recv was not completed")); - /* we have to store the iteration number somewhere */ - return (OMPI_SUCCESS != rc) ? rc : BCOL_FN_STARTED; - } - PTPCOLL_VERBOSE(10, ("Recv was completed")); - } else if (i < radix_mask_pow) { - /* Only send data */ - slen = pow2 * base_block_size; - send_offset = base_block_size * ((my_group_index) & ((~(int)0) << i)); - curr_data_sbuffer = (void *)((unsigned char *)data_buffer + send_offset); - delta = count - send_offset; - if (0 >= delta) { - /* we have nothing to send, skip the iteration */ - continue; - } - if (delta < slen) { - slen = delta; - } - PTPCOLL_VERBOSE(10, ("[ pow2 %d, radix %d ] sending data %p (offset %d) , len %d , dest %d", - pow2, - 1 << ptpcoll_module->pow_2, - curr_data_sbuffer, - send_offset, - slen, - comm_rank)); - rc = MCA_PML_CALL(isend(curr_data_sbuffer, slen, MPI_BYTE, - comm_rank, tag, MCA_PML_BASE_SEND_STANDARD, comm, - &(requests[*active_requests]))); - if( OMPI_SUCCESS != rc ) { - PTPCOLL_VERBOSE(10, ("Failed to send data")); - return OMPI_ERROR; - } - ++(*active_requests); - completed = mca_bcol_ptpcoll_test_all_for_match(active_requests, requests, &rc); - if (0 == completed) { - *iteration = i; - /* we have to store the iteration number somewhere */ - return (OMPI_SUCCESS != rc) ? rc : BCOL_FN_STARTED; - } - } - } - - return BCOL_FN_COMPLETE; -} - -static inline __opal_attribute_always_inline__ -int bcol_ptpcoll_bcast_binomial_probe_and_scatter_anyroot(mca_bcol_ptpcoll_module_t *ptpcoll_module, - int buffer_index, void *data_buffer, int count, int base_block_size) -{ - mca_bcol_ptpcoll_component_t *cm = &mca_bcol_ptpcoll_component; - int *group_list = ptpcoll_module->super.sbgp_partner_module->group_list; - int rc; - int completed = 0; /* not completed */ - int comm_root; - int i; - int *radix_mask_pow = - &(ptpcoll_module->ml_mem.ml_buf_desc[buffer_index].radix_mask_pow); - int *active_requests = - &(ptpcoll_module->ml_mem.ml_buf_desc[buffer_index].active_requests); - ompi_communicator_t* comm = ptpcoll_module->super.sbgp_partner_module->group_comm; - ompi_status_public_t status; - ompi_request_t **requests = - ptpcoll_module->ml_mem.ml_buf_desc[buffer_index].requests; - int pow2_group_size = ptpcoll_module->pow_2num; - int pow2_distance; - int my_left_boundary_rank; - int my_group_index = ptpcoll_module->super.sbgp_partner_module->my_index; - int group_root_index = 0; - void *curr_data_buffer = NULL; - int tag = - ptpcoll_module->ml_mem.ml_buf_desc[buffer_index].tag; - int recv_count = 0; - int *coll_status = - &ptpcoll_module->ml_mem.ml_buf_desc[buffer_index].status; - - assert(0 == *active_requests); - - PTPCOLL_VERBOSE(10, ("Running bcol_ptpcoll_bcast_binomial_probe_and_scatter_anyroot")); - for (i = 0; i < cm->num_to_probe && - 0 == completed; i++) { - MCA_PML_CALL(iprobe(MPI_ANY_SOURCE, tag, - comm, &completed, &status)); - PTPCOLL_VERBOSE(10, ("Bcast, iprobe tag %d", - tag)); - } - - /* the function always returns OMPI_SUCCESS, so we don't check return code */ - if (0 == completed) { - PTPCOLL_VERBOSE(10, ("IPROBE was not matched")); - /* No data was received, return no match error */ - return BCOL_FN_NOT_STARTED; - } - - comm_root = status.MPI_SOURCE; - - - PTPCOLL_VERBOSE(9, ("IPROBE was matched, root of the data on communicator is %d", comm_root)); - - /* For proxy we have to check if we got something from extra node */ - if (PTPCOLL_PROXY & ptpcoll_module->pow_2type) { - if (group_list[ptpcoll_module->proxy_extra_index] == comm_root) { - PTPCOLL_VERBOSE(9, ("IPROBE was matched, root of the data on communicator is extra node %d", - comm_root)); - /* scatter the data among other peer in the pow2 group */ - *radix_mask_pow = ptpcoll_module->pow_2; - - pow2_distance = ptpcoll_module->pow_2 - 1; - curr_data_buffer = data_buffer; - recv_count = count; - goto PR_SCATTHER; - } - } - - /* Find group index for communicator root of the data */ - group_root_index = get_group_index_and_distance_for_binomial - (my_group_index, comm_root, pow2_group_size, group_list, &pow2_distance); - if (OPAL_UNLIKELY(group_root_index < 0)) { - PTPCOLL_ERROR(("Fatal error, no group root index found, my id %d, pow2_g_size %d comm_root %d", - my_group_index, pow2_group_size, comm_root)); - return OMPI_ERROR; - } - - PTPCOLL_VERBOSE(10, ("Group root index is %d distance is %d", - group_root_index, pow2_distance)); - - /* Use group_root_index to calculate the */ - - /* Post receive that will fetch the data */ - /* Pasha: Who is packing data ? - Should I assume that we get contiguous buffer ? - Or should I pack by myself - =================================================================================================== - === On this stage I assume that data is contiguous. So I use MPI_BYTE datatype and COUNT = size === - =================================================================================================== - */ - - recv_count = base_block_size * (1 << pow2_distance); /* we may receive larger data */ - - my_left_boundary_rank = my_group_index & ((~(int)0) << pow2_distance ); - - curr_data_buffer = (void *)((unsigned char *)data_buffer + - (size_t) base_block_size * my_left_boundary_rank); - - *radix_mask_pow = pow2_distance; - - pow2_distance--; - -PR_SCATTHER: - PTPCOLL_VERBOSE(10, ("Bcast, receive data from %d[%d], " - "recv_count %d, tag %d, addr %p, offset %d, pow2_distace %d", - comm_root, group_root_index, recv_count, - tag, curr_data_buffer, - my_group_index * base_block_size, pow2_distance)); - - rc = MCA_PML_CALL(recv(curr_data_buffer, recv_count, MPI_BYTE, - comm_root, tag, comm, MPI_STATUS_IGNORE)); - if( OMPI_SUCCESS != rc ) { - PTPCOLL_VERBOSE(10, ("Failed to receive data")); - return OMPI_ERROR; - } - - PTPCOLL_VERBOSE(10, ("Bcast, Data was received")); - - /* Sending forward the data over K-nomial tree */ - *coll_status = PTPCOLL_SCATTER_STARTED; - K_NOMIAL_ROOT_BCAST_NB_BINOMIAL_SCATTER( - pow2_distance, - my_group_index, group_size, group_list, - data_buffer, base_block_size, - count, tag, comm, requests, - active_requests); - - /* Since the next step (gather) does not really require - completion on scatter , we may return complete */ - return BCOL_FN_COMPLETE; -} - -static inline __opal_attribute_always_inline__ -int bcol_ptpcoll_binomial_root_to_src(int group_root, int my_rank, - int pow2_size, int group_size, int *distance) -{ - int root, relative_rank, src, - pow2_distance = 0, i; - - if (group_root < pow2_size) { - root = group_root; - } else { - /* the source of the data is extra node, - the real root it represented by some rank from - pow2 group */ - root = group_root - pow2_size; - /* shortcut for the case when my rank is root for the group */ - if (my_rank == root) { - *distance = -1; - return group_root; - } - } - - relative_rank = (my_rank - root) < 0 ? my_rank - root + pow2_size : - my_rank - root; - - for (i = 1; i < pow2_size; i<<=1, pow2_distance++) { - if (relative_rank & i) { - src = my_rank ^ i; - if (src >= pow2_size) - src -= pow2_size; - - *distance = pow2_distance; - return src; - } - } - - /* error case */ - *distance = -1; - return -1; -} - -static inline __opal_attribute_always_inline__ -int bcol_ptpcoll_bcast_binomial_test_and_scatter_known_root(mca_bcol_ptpcoll_module_t *ptpcoll_module, - int buffer_index, void *data_buffer, int count, int base_block_size) -{ - int *group_list = ptpcoll_module->super.sbgp_partner_module->group_list; - int rc; - int *active_requests = - &(ptpcoll_module->ml_mem.ml_buf_desc[buffer_index].active_requests); - ompi_communicator_t* comm = ptpcoll_module->super.sbgp_partner_module->group_comm; - ompi_request_t **requests = - ptpcoll_module->ml_mem.ml_buf_desc[buffer_index].requests; - int my_group_index = ptpcoll_module->super.sbgp_partner_module->my_index; - int tmp_radix_mask_pow = - ptpcoll_module->ml_mem.ml_buf_desc[buffer_index].radix_mask_pow - 1; - int tag = - ptpcoll_module->ml_mem.ml_buf_desc[buffer_index].tag; - int *status = - &ptpcoll_module->ml_mem.ml_buf_desc[buffer_index].status; - - PTPCOLL_VERBOSE(10, ("Running bcol_ptpcoll_bcast_binomial_probe_and_scatter_anyroot")); - - if (0 == mca_bcol_ptpcoll_test_all_for_match(active_requests, - requests, &rc)) { - PTPCOLL_VERBOSE(10, ("Test was not matched - %d", rc)); - return (OMPI_SUCCESS != rc) ? rc : BCOL_FN_STARTED; - } - - PTPCOLL_VERBOSE(10, ("Bcast, Data was received")); - - /* Sending forward the data over binimial nomial tree */ - *status = PTPCOLL_SCATTER_STARTED; - K_NOMIAL_ROOT_BCAST_NB_BINOMIAL_SCATTER( - tmp_radix_mask_pow, - my_group_index, group_size, group_list, - data_buffer, base_block_size, - count, tag, comm, requests, - active_requests); - - - return BCOL_FN_COMPLETE; -} - -#define NARRAY_BLOCK_SIZE(size, module, level_size) \ - ((size + (module)->full_narray_tree_num_leafs - 1) / \ - (module)->full_narray_tree_num_leafs) * \ - ((module)->full_narray_tree_num_leafs / \ - ((0 == level_size) ? \ - mca_bcol_ptpcoll_component.narray_knomial_radix : \ - level_size)) - -static inline __opal_attribute_always_inline__ -int bcol_ptpcoll_bcast_narray_test_and_scatter_known_root(mca_bcol_ptpcoll_module_t *ptpcoll_module, - int buffer_index, void *data_buffer, int count, int process_shift, - int relative_group_index) -{ - int *group_list = ptpcoll_module->super.sbgp_partner_module->group_list; - int rc; - int *active_requests = - &(ptpcoll_module->ml_mem.ml_buf_desc[buffer_index].active_requests); - ompi_communicator_t* comm = ptpcoll_module->super.sbgp_partner_module->group_comm; - ompi_request_t **requests = - ptpcoll_module->ml_mem.ml_buf_desc[buffer_index].requests; - int tag = ptpcoll_module->ml_mem.ml_buf_desc[buffer_index].tag; - int *status = - &ptpcoll_module->ml_mem.ml_buf_desc[buffer_index].status; - int scatter_count = 0; - int offset = 0; - int base_block_size = 0; - void *curr_data_buffer = NULL; - - PTPCOLL_VERBOSE(10, ("Running bcol_ptpcoll_bcast_narray_test_and_scatter_known_root")); - - if (0 == mca_bcol_ptpcoll_test_all_for_match(active_requests, - requests, &rc)) { - PTPCOLL_VERBOSE(10, ("Test was not matched - %d", rc)); - return (OMPI_SUCCESS != rc) ? rc : BCOL_FN_STARTED; - } - - /* Sending forward the data over binimial nomial tree */ - *status = PTPCOLL_SCATTER_STARTED; - if(0 == relative_group_index) { - scatter_count = count; - } else { - scatter_count = NARRAY_BLOCK_SIZE(count, ptpcoll_module, - ptpcoll_module->narray_knomial_node[relative_group_index].level_size); - } - - offset = scatter_count * - ptpcoll_module->narray_knomial_node[relative_group_index].rank_on_level; - - /* make sure that we do not overun memory */ - if (OPAL_UNLIKELY(offset + scatter_count > count)) { - scatter_count = count - offset; - } - - PTPCOLL_VERBOSE(10, ("Bcast, Data was received %d %d %d", - scatter_count, - ptpcoll_module->narray_knomial_node[relative_group_index].level_size, - ptpcoll_module->narray_knomial_node[relative_group_index].rank_on_level)); - - - curr_data_buffer = (void *)((unsigned char *)data_buffer + (size_t)offset); - - /* calculating scatter block size for next level of tree */ - base_block_size = NARRAY_BLOCK_SIZE(count, ptpcoll_module, - ptpcoll_module->narray_knomial_node[relative_group_index].level_size * - mca_bcol_ptpcoll_component.narray_knomial_radix); - - PTPCOLL_VERBOSE(10, ("scatter_known_rootaaa %d %d %d %d %d",scatter_count, offset, base_block_size, - ptpcoll_module->narray_knomial_node[relative_group_index].level_size /mca_bcol_ptpcoll_component.narray_knomial_radix, - ptpcoll_module->full_narray_tree_num_leafs)); - - NARRAY_SCATTER_NB((&ptpcoll_module->narray_knomial_node[relative_group_index]), - process_shift, ptpcoll_module->full_narray_tree_size, - curr_data_buffer, base_block_size, scatter_count, tag, comm, - requests, active_requests); - - /* Bummer, I tried to prevent this, special case for virtual root */ - if(0 == relative_group_index) { - if (0 == mca_bcol_ptpcoll_test_all_for_match(active_requests, - requests, &rc)) { - PTPCOLL_VERBOSE(10, ("Test was not matched - %d", rc)); - *status = PTPCOLL_ROOT_SEND_STARTED; - return (OMPI_SUCCESS != rc) ? rc : BCOL_FN_STARTED; - } - } - - return BCOL_FN_COMPLETE; -} - -static inline __opal_attribute_always_inline__ -int bcol_ptpcoll_bcast_narray_knomial_gather(mca_bcol_ptpcoll_module_t *ptpcoll_module, - const int buffer_index, void *data_buffer, const int count, - const int relative_group_index) -{ - int completed = 0; /* not completed */ - int *active_requests = - &(ptpcoll_module->ml_mem.ml_buf_desc[buffer_index].active_requests); - int my_group_index = ptpcoll_module->super.sbgp_partner_module->my_index; - int blocks_in_step = - ptpcoll_module->ml_mem.ml_buf_desc[buffer_index].radix_mask; - int tag = ptpcoll_module->ml_mem.ml_buf_desc[buffer_index].tag - 1; - int *group_list = ptpcoll_module->super.sbgp_partner_module->group_list; - int group_size = ptpcoll_module->full_narray_tree_size; - int i, k, - rc, - len, slen, rlen, - peer, group_peer; - size_t s_offset, - r_offset; - ompi_communicator_t* comm = ptpcoll_module->super.sbgp_partner_module->group_comm; - ompi_request_t **requests = - ptpcoll_module->ml_mem.ml_buf_desc[buffer_index].requests; - netpatterns_narray_knomial_tree_node_t *narray_node = - &ptpcoll_module->narray_knomial_node[relative_group_index]; - netpatterns_k_exchange_node_t *k_node = - &narray_node->k_node; - mca_bcol_ptpcoll_component_t *cm = - &mca_bcol_ptpcoll_component; - size_t base_block_size = - NARRAY_BLOCK_SIZE(count, ptpcoll_module, narray_node->level_size); - - PTPCOLL_VERBOSE(10, ("bcol_ptpcoll_bcast_narray_knomial_gather %d %d %d %d %d %d %d", - ptpcoll_module->ml_mem.ml_buf_desc[buffer_index].iteration, - base_block_size, count, narray_node->level_size, - relative_group_index, k_node->n_exchanges, tag)); - - /* we assume the iteration #iteration already was completed with probe */ - for (i = ptpcoll_module->ml_mem.ml_buf_desc[buffer_index].iteration; - i < k_node->n_exchanges; i++, blocks_in_step *= cm->narray_knomial_radix) { - - len = base_block_size * blocks_in_step; - - for (k = 0; k < cm->narray_knomial_radix - 1; k++) { - group_peer = my_group_index + - (k_node->rank_exchanges[i][k] - narray_node->rank_on_level); - if (group_peer >= group_size) { - group_peer -= group_size; - } else if (group_peer < 0) { - group_peer += group_size; - } - peer = group_list[group_peer]; - - r_offset = (size_t)k_node->rank_exchanges[i][k] / blocks_in_step * - len; - - /* check that we do not run out of message boundary */ - if (OPAL_UNLIKELY(r_offset + len > (size_t)count)) { - rlen = count - r_offset; - if (OPAL_UNLIKELY(rlen <= 0)) { - continue; - } - } else { - rlen = len; - } - PTPCOLL_VERBOSE(10, ("Recv data from %d, addr %p offset %d len %d %d %d tag %d", - peer, data_buffer, r_offset, rlen, len, blocks_in_step, tag)); - rc = MCA_PML_CALL(irecv((void *)((unsigned char *)data_buffer + r_offset), - rlen, MPI_BYTE, - peer, tag, comm, &requests[*active_requests])); - if( OMPI_SUCCESS != rc ) { - PTPCOLL_VERBOSE(10, ("Failed to receive data")); - return OMPI_ERROR; - } - ++(*active_requests); - } - - for (k = 0; k < cm->narray_knomial_radix - 1; k++) { - group_peer = my_group_index + - (k_node->rank_exchanges[i][k] - narray_node->rank_on_level); - if (group_peer >= group_size) { - group_peer -= group_size; - } else if (group_peer < 0) { - group_peer += group_size; - } - peer = group_list[group_peer]; - - s_offset = (size_t)narray_node->rank_on_level / blocks_in_step * - len; - - /* check that we do not run out of message boundary */ - if (OPAL_UNLIKELY(s_offset + len > (size_t)count)) { - slen = count - s_offset; - if (OPAL_UNLIKELY(slen <= 0)) { - continue; - } - } else { - slen = len; - } - - PTPCOLL_VERBOSE(10, ("Send data from %d, addr %p offset %d len %d %d %d tag %d", - peer, data_buffer, s_offset, slen, len, blocks_in_step, tag)); - rc = MCA_PML_CALL(isend((void *)((unsigned char *)data_buffer + s_offset), - slen, MPI_BYTE, - peer, tag, MCA_PML_BASE_SEND_STANDARD, comm, - &(requests[*active_requests]))); - if( OMPI_SUCCESS != rc ) { - PTPCOLL_VERBOSE(10, ("Failed to send data")); - return OMPI_ERROR; - } - ++(*active_requests); - } - - completed = mca_bcol_ptpcoll_test_all_for_match(active_requests, requests, &rc); - if (0 == completed) { - /* cache data for next iteration */ - ptpcoll_module->ml_mem.ml_buf_desc[buffer_index].iteration = - i; /* why not to store step for next iteration ?! */ - ptpcoll_module->ml_mem.ml_buf_desc[buffer_index].radix_mask = - blocks_in_step * cm->narray_knomial_radix; - return (OMPI_SUCCESS != rc) ? rc : BCOL_FN_STARTED; - } - } - - return BCOL_FN_COMPLETE; -} - -END_C_DECLS - -#endif diff --git a/ompi/mca/bcol/ptpcoll/bcol_ptpcoll_component.c b/ompi/mca/bcol/ptpcoll/bcol_ptpcoll_component.c deleted file mode 100644 index c8c238c280f..00000000000 --- a/ompi/mca/bcol/ptpcoll/bcol_ptpcoll_component.c +++ /dev/null @@ -1,174 +0,0 @@ -/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ -/* - * Copyright (c) 2009-2012 Oak Ridge National Laboratory. All rights reserved. - * Copyright (c) 2009-2012 Mellanox Technologies. All rights reserved. - * Copyright (c) 2015 Los Alamos National Security, LLC. All rights - * reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -/** - * @file - * - */ - -#include "ompi_config.h" -#include -#include -#include -#include - -#include "ompi/constants.h" -#include "ompi/communicator/communicator.h" -#include "ompi/mca/bcol/bcol.h" -#include "bcol_ptpcoll.h" -#include "ompi/mca/bcol/base/base.h" - -#include "bcol_ptpcoll_mca.h" -#include "bcol_ptpcoll_utils.h" - -/* - * Public string showing the bcol ptpcoll V2 component version number - */ -const char *mca_bcol_ptpcoll_component_version_string = - "Open MPI bcol - ptpcoll collective MCA component version " OMPI_VERSION; - - -/* - * Local functions - */ - -static int ptpcoll_open(void); -static int ptpcoll_close(void); - -/* - * Instantiate the public struct with all of our public information - * and pointers to our public functions in it - */ - -mca_bcol_ptpcoll_component_t mca_bcol_ptpcoll_component = { - - /* First, fill in the super */ - - { - /* First, the mca_component_t struct containing meta - information about the component itself */ - - .bcol_version = { - MCA_BCOL_BASE_VERSION_2_0_0, - - /* Component name and version */ - - .mca_component_name = "ptpcoll", - MCA_BASE_MAKE_VERSION(component, OMPI_MAJOR_VERSION, OMPI_MINOR_VERSION, - OMPI_RELEASE_VERSION), - - /* Component open and close functions */ - - .mca_open_component = ptpcoll_open, - .mca_close_component = ptpcoll_close, - .mca_register_component_params = mca_bcol_ptpcoll_register_mca_params, - }, - - /* Initialization / querying functions */ - - .collm_init_query = mca_bcol_ptpcoll_init_query, - .collm_comm_query = mca_bcol_ptpcoll_comm_query, - .init_done = false, - .need_ordering = false, - }, - - /* component specific */ - -}; - -static void -collreq_construct(mca_bcol_ptpcoll_collreq_t *collreq) -{ - collreq->requests = NULL; -} - -static void -collreq_destruct(mca_bcol_ptpcoll_collreq_t *collreq) -{ - if (NULL != collreq->requests) { - free(collreq->requests); - } -} - -OBJ_CLASS_INSTANCE(mca_bcol_ptpcoll_collreq_t, - opal_free_list_item_t, - collreq_construct, - collreq_destruct); - -/* - * Open the component - */ -static int ptpcoll_open(void) -{ - return OMPI_SUCCESS; -} - -/* - * Close the component - */ -static int ptpcoll_close(void) -{ - return OMPI_SUCCESS; -} - -/* query to see if the component is available for use, and can - * satisfy the thread and progress requirements - */ -int mca_bcol_ptpcoll_init_query(bool enable_progress_threads, - bool enable_mpi_threads) -{ - /* at this stage there is no reason to disaulify this component */ - - /* done */ - return OMPI_SUCCESS; -} - -/* memory management routines */ - -/* allocte memory - this is a no-op function intended to work with - * mpool2, which will use malloc for allocation, if no other allocator - * is available. - */ -void * bcol_ptpcoll_allocate_memory(size_t length, size_t alignment, - struct mca_bcol_base_module_t *bcol_module) -{ - /* do nothing */ - return NULL; -} - -/* - * register memory - nothing to do - */ -int bcol_ptpcoll_register_memory(void * in_ptr, size_t length, size_t alignment, - struct mca_bcol_base_module_t *bcol_module) -{ - /* nothing to do */ - return OMPI_SUCCESS; -} - -/* deregister memory - nothing to do - */ -int bcol_ptpcoll_deregister_memory( void * in_ptr, - struct mca_bcol_base_module_t *bcol_module) -{ - /* nothing to do */ - return OMPI_SUCCESS; -} - -/* free memory - since we don't allocate, we also don't free */ -int bcol_ptpcoll_free_memory(void *ptr, - struct mca_bcol_base_module_t *bcol_module) -{ - /* nnthing to do */ - return OMPI_SUCCESS; -} diff --git a/ompi/mca/bcol/ptpcoll/bcol_ptpcoll_fanin.c b/ompi/mca/bcol/ptpcoll/bcol_ptpcoll_fanin.c deleted file mode 100644 index 57dafce7bdf..00000000000 --- a/ompi/mca/bcol/ptpcoll/bcol_ptpcoll_fanin.c +++ /dev/null @@ -1,28 +0,0 @@ -/* - * Copyright (c) 2009-2012 Oak Ridge National Laboratory. All rights reserved. - * Copyright (c) 2009-2012 Mellanox Technologies. All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - - -#include "ompi/include/ompi/constants.h" -#include "ompi/mca/bcol/ptpcoll/bcol_ptpcoll.h" - -/* - * Fanin routines - no user data - */ - -int bcol_ptpcoll_fanin( bcol_function_args_t *input_args, - struct mca_bcol_base_module_t *module) -{ - /* local variable */ - int ret=OMPI_SUCCESS; - /* mca_bcol_ptpcoll_module_t *ptp_module=(mca_bcol_ptpcoll_module_t *) module; */ - - /* done */ - return ret; -} diff --git a/ompi/mca/bcol/ptpcoll/bcol_ptpcoll_fanout.c b/ompi/mca/bcol/ptpcoll/bcol_ptpcoll_fanout.c deleted file mode 100644 index ae5739391b7..00000000000 --- a/ompi/mca/bcol/ptpcoll/bcol_ptpcoll_fanout.c +++ /dev/null @@ -1,30 +0,0 @@ -/* - * Copyright (c) 2009-2012 Oak Ridge National Laboratory. All rights reserved. - * Copyright (c) 2009-2012 Mellanox Technologies. All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - - -#include "ompi/include/ompi/constants.h" -#include "ompi/mca/bcol/ptpcoll/bcol_ptpcoll.h" - -/* - * Fanin routines - no user data - */ - -int bcol_ptpcoll_fanout( bcol_function_args_t *input_args, - struct mca_bcol_base_function_t *const_args) -{ - /* local variable */ - int ret = OMPI_SUCCESS; - /* TBD: - mca_bcol_ptpcoll_module_t *ptp_module=(mca_bcol_ptpcoll_module_t *) const_args->bcol_module; - */ - - /* done */ - return ret; -} diff --git a/ompi/mca/bcol/ptpcoll/bcol_ptpcoll_mca.c b/ompi/mca/bcol/ptpcoll/bcol_ptpcoll_mca.c deleted file mode 100644 index 57caf7c1106..00000000000 --- a/ompi/mca/bcol/ptpcoll/bcol_ptpcoll_mca.c +++ /dev/null @@ -1,197 +0,0 @@ -/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ -/* - * Copyright (c) 2009-2012 Oak Ridge National Laboratory. All rights reserved. - * Copyright (c) 2009-2012 Mellanox Technologies. All rights reserved. - * Copyright (c) 2013 Los Alamos National Security, LLC. All rights - * reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#include "ompi_config.h" -#include -#include -#include -#include - -#include "bcol_ptpcoll_mca.h" -#include "bcol_ptpcoll.h" - -/* - * Local flags - */ -enum { - REGINT_NEG_ONE_OK = 0x01, - REGINT_GE_ZERO = 0x02, - REGINT_GE_ONE = 0x04, - REGINT_NONZERO = 0x08, - REGINT_MAX = 0x88 -}; - -enum { - REGSTR_EMPTY_OK = 0x01, - - REGSTR_MAX = 0x88 -}; - -#if 0 /* Pasha: we will be need this function in future */ -/* - * utility routine for string parameter registration - */ -static int reg_string(const char* param_name, - const char* deprecated_param_name, - const char* param_desc, - const char* default_value, char **storage, - int flags) -{ - int index; - - *storage = default_value; - index = mca_base_component_var_register(&mca_bcol_ptpcoll_component.super.bcol_version, - param_name, param_desc, MCA_BASE_VAR_TYPE_STRING, - NULL, 0, 0, OPAL_INFO_LVL_9, - MCA_BASE_VAR_SCOPE_READONLY, storage); - if (NULL != deprecated_param_name) { - (void) mca_base_var_register_synonym(index, "ompi", "bcol", "ptpcoll", - deprecated_param_name, - MCA_BASE_VAR_SYN_FLAG_DEPRECATED); - } - - if (0 != (flags & REGSTR_EMPTY_OK) && (NULL == *storage || 0 == strlen(*storage))) { - opal_output(0, "Bad parameter value for parameter \"%s\"", - param_name); - return OMPI_ERR_BAD_PARAM; - } - - return OMPI_SUCCESS; -} -#endif - -/* - * utility routine for integer parameter registration - */ -static int reg_int(const char* param_name, - const char* deprecated_param_name, - const char* param_desc, - int default_value, int *storage, int flags) -{ - int index; - - *storage = default_value; - index = mca_base_component_var_register(&mca_bcol_ptpcoll_component.super.bcol_version, - param_name, param_desc, MCA_BASE_VAR_TYPE_INT, - NULL, 0, 0, OPAL_INFO_LVL_9, - MCA_BASE_VAR_SCOPE_READONLY, storage); - if (NULL != deprecated_param_name) { - (void) mca_base_var_register_synonym(index, "ompi", "bcol", "ptpcoll", - deprecated_param_name, - MCA_BASE_VAR_SYN_FLAG_DEPRECATED); - } - - if (0 != (flags & REGINT_NEG_ONE_OK) && -1 == *storage) { - return OMPI_SUCCESS; - } - if ((0 != (flags & REGINT_GE_ZERO) && *storage < 0) || - (0 != (flags & REGINT_GE_ONE) && *storage < 1) || - (0 != (flags & REGINT_NONZERO) && 0 == *storage)) { - opal_output(0, "Bad parameter value for parameter \"%s\"", - param_name); - return OMPI_ERR_BAD_PARAM; - } - - return OMPI_SUCCESS; -} - -static int reg_bool(const char* param_name, - const char* deprecated_param_name, - const char* param_desc, - bool default_value, bool *storage) -{ - int index; - - *storage = default_value; - index = mca_base_component_var_register(&mca_bcol_ptpcoll_component.super.bcol_version, - param_name, param_desc, MCA_BASE_VAR_TYPE_BOOL, - NULL, 0, 0, OPAL_INFO_LVL_9, - MCA_BASE_VAR_SCOPE_READONLY, storage); - if (0 > index) { - return index; - } - - if (NULL != deprecated_param_name) { - (void) mca_base_var_register_synonym(index, "ompi", "bcol", "ptpcoll", - deprecated_param_name, - MCA_BASE_VAR_SYN_FLAG_DEPRECATED); - } - - return OMPI_SUCCESS; -} - -int mca_bcol_ptpcoll_register_mca_params(void) -{ - int ret, tmp; - mca_bcol_ptpcoll_component_t *cm = &mca_bcol_ptpcoll_component; - - ret = OMPI_SUCCESS; -#define CHECK(expr) do {\ - tmp = (expr); \ - if (OMPI_SUCCESS != tmp) ret = tmp; \ - } while (0) - - CHECK(reg_int("priority", NULL, - "PTPCOLL component priority" - "(from 0(low) to 90 (high))", 90, &cm->super.priority, 0)); - - CHECK(reg_int("verbose", NULL, - "Output some verbose PTPCOLL information " - "(0 = no output, nonzero = output)", 0, &cm->verbose, REGINT_GE_ZERO)); - - CHECK(reg_int("k_nomial_radix", NULL, - "The radix of K-Nomial Tree " - "(starts from 2)", 2, &cm->k_nomial_radix, REGINT_GE_ONE)); - - CHECK(reg_int("narray_radix", NULL, - "The radix of Narray Tree " - "(starts from 2)", 2, &cm->narray_radix, REGINT_GE_ONE)); - - CHECK(reg_int("narray_knomial_radix", NULL, - "The radix of Narray/Knomial Tree for scatther-gather type algorithms" - "(starts from 2)", 2, &cm->narray_knomial_radix, REGINT_GE_ONE)); - - CHECK(reg_int("num_to_probe", NULL, - "Number of probe operation in single source data check" - "(starts from 8)", 8, &cm->num_to_probe, REGINT_GE_ONE)); - - CHECK(reg_int("bcast_small_msg_known_root_alg", NULL, - "Algorithm selection for bcast small messages known root" - "(1 - K-nomial, 2 - N-array)", 1, &cm->bcast_small_messages_known_root_alg, - REGINT_GE_ZERO)); - - CHECK(reg_int("bcast_large_msg_known_root_alg", NULL, - "Algorithm selection for bcast large messages known root" - "(1 - Binomial scatther-gather, 2 - N-array scather, K-nomial gather)", - 1, &cm->bcast_large_messages_known_root_alg, REGINT_GE_ZERO)); - - CHECK(reg_int("barrier_alg", NULL, - "Algorithm selection for Barrier" - "(1 - Recursive doubling, 2 - Recursive K-ing)", - 1, &cm->barrier_alg, REGINT_GE_ZERO)); - - /* register parmeters controlling message fragementation */ - CHECK(reg_int("min_frag_size", NULL, - "Minimum fragment size", - getpagesize(), &cm->super.min_frag_size, REGINT_GE_ONE)); - - CHECK(reg_int("max_frag_size", NULL, - "Maximum fragment size", - FRAG_SIZE_NO_LIMIT, &cm->super.max_frag_size, REGINT_NONZERO)); - - CHECK(reg_bool("can_use_user_buffers", NULL, - "User memory can be used by the collective algorithms", - 1, &cm->super.can_use_user_buffers)); - - return ret; -} diff --git a/ompi/mca/bcol/ptpcoll/bcol_ptpcoll_mca.h b/ompi/mca/bcol/ptpcoll/bcol_ptpcoll_mca.h deleted file mode 100644 index 4d1067d9e49..00000000000 --- a/ompi/mca/bcol/ptpcoll/bcol_ptpcoll_mca.h +++ /dev/null @@ -1,20 +0,0 @@ -/* - * Copyright (c) 2009-2012 Oak Ridge National Laboratory. All rights reserved. - * Copyright (c) 2009-2012 Mellanox Technologies. All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ -#ifndef MCA_BCOL_PTPCOLL_MCA_H -#define MCA_BCOL_PTPCOLL_MCA_H - -#include "ompi_config.h" - -BEGIN_C_DECLS - -int mca_bcol_ptpcoll_register_mca_params(void); - -END_C_DECLS -#endif diff --git a/ompi/mca/bcol/ptpcoll/bcol_ptpcoll_module.c b/ompi/mca/bcol/ptpcoll/bcol_ptpcoll_module.c deleted file mode 100644 index ca8c32ec8dd..00000000000 --- a/ompi/mca/bcol/ptpcoll/bcol_ptpcoll_module.c +++ /dev/null @@ -1,760 +0,0 @@ -/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ -/* - * Copyright (c) 2009-2013 Oak Ridge National Laboratory. All rights reserved. - * Copyright (c) 2009-2012 Mellanox Technologies. All rights reserved. - * Copyright (c) 2012-2015 Los Alamos National Security, LLC. All rights - * reserved. - * Copyright (c) 2014-2015 Research Organization for Information Science - * and Technology (RIST). All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -/** - * @file - * - */ - -#include "ompi_config.h" -#include -#include -#include -#include -#include - -#include "ompi/constants.h" -#include "ompi/communicator/communicator.h" -#include "ompi/mca/bcol/bcol.h" -#include "opal/util/show_help.h" -#include "ompi/mca/bcol/base/base.h" -#include "ompi/mca/pml/pml.h" /* need this for the max tag size */ - -#include "bcol_ptpcoll.h" -#include "bcol_ptpcoll_utils.h" -#include "bcol_ptpcoll_bcast.h" -#include "bcol_ptpcoll_allreduce.h" -#include "bcol_ptpcoll_reduce.h" - -#define BCOL_PTP_CACHE_LINE_SIZE 128 - -/* - * Local functions - */ -static int alloc_allreduce_offsets_array(mca_bcol_ptpcoll_module_t *ptpcoll_module) -{ - int rc = OMPI_SUCCESS, i = 0; - netpatterns_k_exchange_node_t *k_node = &ptpcoll_module->knomial_exchange_tree; - int n_exchanges = k_node->n_exchanges; - - /* Precalculate the allreduce offsets */ - if (0 < k_node->n_exchanges) { - ptpcoll_module->allgather_offsets = (int **) calloc (n_exchanges, sizeof(int *)); - - if (!ptpcoll_module->allgather_offsets) { - return OMPI_ERROR; - } - - for (i = 0; i < n_exchanges ; i++) { - ptpcoll_module->allgather_offsets[i] = (int *) calloc (NOFFSETS, sizeof(int)); - - if (!ptpcoll_module->allgather_offsets[i]){ - return OMPI_ERROR; - } - } - } - - return rc; -} - -static int free_allreduce_offsets_array(mca_bcol_ptpcoll_module_t *ptpcoll_module) -{ - int rc = OMPI_SUCCESS, i = 0; - netpatterns_k_exchange_node_t *k_node = &ptpcoll_module->knomial_exchange_tree; - int n_exchanges = k_node->n_exchanges; - - if (ptpcoll_module->allgather_offsets) { - for (i=0; i < n_exchanges; i++) { - free (ptpcoll_module->allgather_offsets[i]); - } - } - - free(ptpcoll_module->allgather_offsets); - ptpcoll_module->allgather_offsets = NULL; - return rc; -} - -static void -mca_bcol_ptpcoll_module_construct(mca_bcol_ptpcoll_module_t *ptpcoll_module) -{ - uint64_t i; - /* Pointer to component */ - ptpcoll_module->narray_node = NULL; - ptpcoll_module->allgather_offsets = NULL; - ptpcoll_module->super.bcol_component = (mca_bcol_base_component_t *) &mca_bcol_ptpcoll_component; - ptpcoll_module->super.list_n_connected = NULL; - ptpcoll_module->super.hier_scather_offset = 0; - /* no header support in ptp */ - ptpcoll_module->super.header_size = 0; - /* No network context */ - ptpcoll_module->super.network_context = NULL; - /* set the upper limit on the tag */ - i = 2; - ptpcoll_module->tag_mask = 1; - while ( i <= (uint64_t) mca_pml.pml_max_tag && i > 0) { - i <<= 1; - } - ptpcoll_module->ml_mem.ml_buf_desc = NULL; - ptpcoll_module->tag_mask = i - 1; -} - -static void -mca_bcol_ptpcoll_module_destruct(mca_bcol_ptpcoll_module_t *ptpcoll_module) -{ - int i; - mca_bcol_ptpcoll_local_mlmem_desc_t *ml_mem = &ptpcoll_module->ml_mem; - - if (NULL != ml_mem->ml_buf_desc) { - /* Release the memory structs that were cache ML memory data */ - uint32_t i, j, ci; - for (i = 0; i < ml_mem->num_banks; i++) { - for (j = 0; j < ml_mem->num_buffers_per_bank; j++) { - ci = i * ml_mem->num_buffers_per_bank + j; - if (NULL != ml_mem->ml_buf_desc[ci].requests) { - free(ml_mem->ml_buf_desc[ci].requests); - } - } - } - /* release the buffer descriptor */ - free(ml_mem->ml_buf_desc); - ml_mem->ml_buf_desc = NULL; - } - - if (NULL != ptpcoll_module->allgather_offsets) { - free_allreduce_offsets_array(ptpcoll_module); - } - - if (NULL != ptpcoll_module->narray_node) { - for (i = 0; i < ptpcoll_module->group_size; i++) { - if (NULL != ptpcoll_module->narray_node[i].children_ranks) { - free(ptpcoll_module->narray_node[i].children_ranks); - } - } - - free(ptpcoll_module->narray_node); - ptpcoll_module->narray_node = NULL; - } - - OBJ_DESTRUCT(&ptpcoll_module->collreqs_free); - - if (NULL != ptpcoll_module->super.list_n_connected) { - free(ptpcoll_module->super.list_n_connected); - ptpcoll_module->super.list_n_connected = NULL; - } - - for (i = 0; i < BCOL_NUM_OF_FUNCTIONS; i++){ - OPAL_LIST_DESTRUCT((&ptpcoll_module->super.bcol_fns_table[i])); - } - - - if (NULL != ptpcoll_module->kn_proxy_extra_index) { - free(ptpcoll_module->kn_proxy_extra_index); - ptpcoll_module->kn_proxy_extra_index = NULL; - } - - if (NULL != ptpcoll_module->alltoall_iovec) { - free(ptpcoll_module->alltoall_iovec); - ptpcoll_module->alltoall_iovec = NULL; - } - - if (NULL != ptpcoll_module->narray_knomial_proxy_extra_index) { - free(ptpcoll_module->narray_knomial_proxy_extra_index); - ptpcoll_module->narray_knomial_proxy_extra_index = NULL; - } - - if (NULL != ptpcoll_module->narray_knomial_node) { - for(i = 0; i < ptpcoll_module->full_narray_tree_size; i++) { - netpatterns_cleanup_narray_knomial_tree (ptpcoll_module->narray_knomial_node + i); - } - free(ptpcoll_module->narray_knomial_node); - ptpcoll_module->narray_knomial_node = NULL; - } - - netpatterns_cleanup_recursive_knomial_allgather_tree_node(&ptpcoll_module->knomial_allgather_tree); - netpatterns_cleanup_recursive_knomial_tree_node(&ptpcoll_module->knomial_exchange_tree); - -} - -OBJ_CLASS_INSTANCE(mca_bcol_ptpcoll_module_t, - mca_bcol_base_module_t, - mca_bcol_ptpcoll_module_construct, - mca_bcol_ptpcoll_module_destruct); - -static int init_ml_buf_desc(mca_bcol_ptpcoll_ml_buffer_desc_t **desc, void *base_addr, uint32_t num_banks, - uint32_t num_buffers_per_bank, uint32_t size_buffer, uint32_t header_size, int group_size, int pow_k) -{ - uint32_t i, j, ci; - mca_bcol_ptpcoll_ml_buffer_desc_t *tmp_desc = NULL; - int k_nomial_radix = mca_bcol_ptpcoll_component.k_nomial_radix; - int pow_k_val = (0 == pow_k) ? 1 : pow_k; - int num_to_alloc = - ((k_nomial_radix - 1) * pow_k_val * 2 + 1 > mca_bcol_ptpcoll_component.narray_radix) ? - (k_nomial_radix - 1) * pow_k_val * 2 + 1 : - mca_bcol_ptpcoll_component.narray_radix * 2; - - - *desc = (mca_bcol_ptpcoll_ml_buffer_desc_t *)calloc(num_banks * num_buffers_per_bank, - sizeof(mca_bcol_ptpcoll_ml_buffer_desc_t)); - if (NULL == *desc) { - PTPCOLL_ERROR(("Failed to allocate memory")); - return OMPI_ERROR; - } - - tmp_desc = *desc; - - for (i = 0; i < num_banks; i++) { - for (j = 0; j < num_buffers_per_bank; j++) { - ci = i * num_buffers_per_bank + j; - tmp_desc[ci].bank_index = i; - tmp_desc[ci].buffer_index = j; - /* *2 is for gather session +1 for extra peer */ - tmp_desc[ci].requests = (ompi_request_t **) - calloc(num_to_alloc, sizeof(ompi_request_t *)); - if (NULL == tmp_desc[ci].requests) { - PTPCOLL_ERROR(("Failed to allocate memory for requests")); - return OMPI_ERROR; - } - /* - * ptpcoll don't have any header, but other bcols may to have. So - * we need to take it in account. - */ - tmp_desc[ci].data_addr = (void *) - ((unsigned char*)base_addr + ci * size_buffer + header_size); - PTPCOLL_VERBOSE(10, ("ml memory cache setup %d %d - %p", i, j, tmp_desc[ci].data_addr)); - - /* init reduce implementation flags */ - tmp_desc[ci].reduce_init_called = false; - tmp_desc[ci].reduction_status = 0; - } - } - - return OMPI_SUCCESS; -} - -static void mca_bcol_ptpcoll_set_small_msg_thresholds(struct mca_bcol_base_module_t *super) -{ - mca_bcol_ptpcoll_module_t *ptpcoll_module = - (mca_bcol_ptpcoll_module_t *) super; - mca_bcol_ptpcoll_component_t *cm = &mca_bcol_ptpcoll_component; - - /* Subtract out the maximum header size when calculating the thresholds. This - * will account for the headers used by the basesmuma component. If we do not - * take these headers into account we may overrun our buffer. */ - - /* Set the Allgather threshold equals to a ML buff size */ - super->small_message_thresholds[BCOL_ALLGATHER] = - (ptpcoll_module->ml_mem.size_buffer - BCOL_HEADER_MAX) / - ompi_comm_size(ptpcoll_module->super.sbgp_partner_module->group_comm); - - /* Set the Bcast threshold, all Bcast algths have the same threshold */ - super->small_message_thresholds[BCOL_BCAST] = - (ptpcoll_module->ml_mem.size_buffer - BCOL_HEADER_MAX); - - /* Set the Alltoall threshold, the Ring algth sets some limitation */ - super->small_message_thresholds[BCOL_ALLTOALL] = - (ptpcoll_module->ml_mem.size_buffer - BCOL_HEADER_MAX) / 2; - - /* Set the Allreduce threshold, NARRAY algth sets some limitation */ - super->small_message_thresholds[BCOL_ALLREDUCE] = - (ptpcoll_module->ml_mem.size_buffer - BCOL_HEADER_MAX) / ptpcoll_module->k_nomial_radix; - - /* Set the Reduce threshold, NARRAY algth sets some limitation */ - super->small_message_thresholds[BCOL_REDUCE] = - (ptpcoll_module->ml_mem.size_buffer - BCOL_HEADER_MAX) / cm->narray_radix; -} - -/* - * Cache information about ML memory - */ -static int mca_bcol_ptpcoll_cache_ml_memory_info(struct mca_bcol_base_memory_block_desc_t *payload_block, - uint32_t data_offset, - struct mca_bcol_base_module_t *bcol, - void *reg_data) -{ - mca_bcol_ptpcoll_module_t *ptpcoll_module = (mca_bcol_ptpcoll_module_t *) bcol; - mca_bcol_ptpcoll_local_mlmem_desc_t *ml_mem = &ptpcoll_module->ml_mem; - struct mca_bcol_base_memory_block_desc_t *desc = payload_block; - int group_size = ptpcoll_module->super.sbgp_partner_module->group_size; - - PTPCOLL_VERBOSE(10, ("mca_bcol_ptpcoll_init_buffer_memory was called")); - - /* cache ml mem desc tunings localy */ - ml_mem->num_banks = desc->num_banks; - ml_mem->num_buffers_per_bank = desc->num_buffers_per_bank; - ml_mem->size_buffer = desc->size_buffer; - - PTPCOLL_VERBOSE(10, ("ML buffer configuration num banks %d num_per_bank %d size %d base addr %p", - desc->num_banks, desc->num_buffers_per_bank, desc->size_buffer, desc->block->base_addr)); - - /* Set first bank index for release */ - ml_mem->bank_index_for_release = 0; - - if (OMPI_SUCCESS != init_ml_buf_desc(&ml_mem->ml_buf_desc, - desc->block->base_addr, - ml_mem->num_banks, - ml_mem->num_buffers_per_bank, - ml_mem->size_buffer, - data_offset, - group_size, - ptpcoll_module->pow_k)) { - PTPCOLL_VERBOSE(10, ("Failed to allocate rdma memory descriptor\n")); - return OMPI_ERROR; - } - - PTPCOLL_VERBOSE(10, ("ptpcoll_module = %p, ml_mem_desc = %p.\n", - ptpcoll_module)); - - return OMPI_SUCCESS; -} - -/* - * Load ptpcoll bcol functions - */ -static void load_func(mca_bcol_ptpcoll_module_t *ptpcoll_module) -{ - int fnc; - - /* reset everything to NULL */ - for (fnc = 0; fnc < BCOL_NUM_OF_FUNCTIONS; fnc++) { - - /*ptpcoll_module->super.bcol_function_table[fnc] = NULL;*/ - ptpcoll_module->super.bcol_function_table[fnc] = NULL; - ptpcoll_module->super.bcol_function_init_table[fnc] = NULL; - } - - ptpcoll_module->super.bcol_function_init_table[BCOL_BARRIER] = bcol_ptpcoll_barrier_init; - - ptpcoll_module->super.bcol_function_init_table[BCOL_BCAST] = bcol_ptpcoll_bcast_init; - ptpcoll_module->super.bcol_function_init_table[BCOL_ALLREDUCE] = bcol_ptpcoll_allreduce_init; - ptpcoll_module->super.bcol_function_init_table[BCOL_ALLGATHER] = bcol_ptpcoll_allgather_init; - ptpcoll_module->super.bcol_function_table[BCOL_BCAST] = bcol_ptpcoll_bcast_k_nomial_anyroot; - ptpcoll_module->super.bcol_function_init_table[BCOL_ALLTOALL] = NULL; - ptpcoll_module->super.bcol_function_init_table[BCOL_SYNC] = mca_bcol_ptpcoll_memsync_init; - ptpcoll_module->super.bcol_function_init_table[BCOL_REDUCE] = bcol_ptpcoll_reduce_init; - - /* ML memory cacher */ - ptpcoll_module->super.bcol_memory_init = mca_bcol_ptpcoll_cache_ml_memory_info; - - /* Set thresholds */ - ptpcoll_module->super.set_small_msg_thresholds = mca_bcol_ptpcoll_set_small_msg_thresholds; - - /* setup recursive k-ing tree */ - ptpcoll_module->super.k_nomial_tree = mca_bcol_ptpcoll_setup_knomial_tree; -} - -int mca_bcol_ptpcoll_setup_knomial_tree(mca_bcol_base_module_t *super) -{ - mca_bcol_ptpcoll_module_t *p2p_module = (mca_bcol_ptpcoll_module_t *) super; - int rc = 0; - - rc = netpatterns_setup_recursive_knomial_allgather_tree_node( - p2p_module->super.sbgp_partner_module->group_size, - p2p_module->super.sbgp_partner_module->my_index, - mca_bcol_ptpcoll_component.k_nomial_radix, - super->list_n_connected, - &p2p_module->knomial_allgather_tree); - - return rc; -} - -/* The function used to calculate size */ -static int calc_full_tree_size(int radix, int group_size, int *num_leafs) -{ - int level_cnt = 1; - int total_cnt = 0; - - while( total_cnt < group_size ) { - total_cnt += level_cnt; - level_cnt *= radix; - } - - if (total_cnt > group_size) { - *num_leafs = level_cnt / radix; - return total_cnt - level_cnt / radix; - } else { - *num_leafs = level_cnt; - return group_size; - } -} - -/* Setup N-array scatter Knomial-gather static information */ -static int load_narray_knomial_tree (mca_bcol_ptpcoll_module_t *ptpcoll_module) -{ - int rc, i, peer; - mca_bcol_ptpcoll_component_t *cm = &mca_bcol_ptpcoll_component; - - ptpcoll_module->full_narray_tree_size = calc_full_tree_size( - cm->narray_knomial_radix, - ptpcoll_module->group_size, - &ptpcoll_module->full_narray_tree_num_leafs); - - ptpcoll_module->narray_knomial_proxy_extra_index = (int *) - malloc(sizeof(int) * (cm->narray_knomial_radix)); - if (NULL == ptpcoll_module->narray_knomial_proxy_extra_index) { - PTPCOLL_ERROR(("Failed to allocate memory")); - goto Error; - } - - ptpcoll_module->narray_knomial_node = calloc( - ptpcoll_module->full_narray_tree_size, - sizeof(netpatterns_narray_knomial_tree_node_t)); - if(NULL == ptpcoll_module->narray_knomial_node) { - goto Error; - } - - PTPCOLL_VERBOSE(10 ,("My type is proxy, full tree size = %d [%d]", - ptpcoll_module->full_narray_tree_size, - cm->narray_knomial_radix - )); - - if (ptpcoll_module->super.sbgp_partner_module->my_index < - ptpcoll_module->full_narray_tree_size) { - if (ptpcoll_module->super.sbgp_partner_module->my_index < - ptpcoll_module->group_size - ptpcoll_module->full_narray_tree_size) { - ptpcoll_module->narray_type = PTPCOLL_PROXY; - for (i = 0; i < cm->narray_knomial_radix; i++) { - peer = - ptpcoll_module->super.sbgp_partner_module->my_index * - cm->narray_knomial_radix + i + - ptpcoll_module->full_narray_tree_size; - if (peer >= ptpcoll_module->group_size) { - break; - } - ptpcoll_module->narray_knomial_proxy_extra_index[i] = peer; - } - ptpcoll_module->narray_knomial_proxy_num = i; - } else { - ptpcoll_module->narray_type = PTPCOLL_IN_GROUP;; - } - /* Setting node info */ - for(i = 0; i < ptpcoll_module->full_narray_tree_size; i++) { - rc = netpatterns_setup_narray_knomial_tree( - cm->narray_knomial_radix, - i, - ptpcoll_module->full_narray_tree_size, - &ptpcoll_module->narray_knomial_node[i]); - if(OMPI_SUCCESS != rc) { - goto Error; - } - } - } else { - ptpcoll_module->narray_type = PTPCOLL_EXTRA; - ptpcoll_module->narray_knomial_proxy_extra_index[0] = - (ptpcoll_module->super.sbgp_partner_module->my_index - - ptpcoll_module->full_narray_tree_size) / - cm->narray_knomial_radix; - } - - return OMPI_SUCCESS; - -Error: - if (NULL != ptpcoll_module->narray_knomial_node) { - free(ptpcoll_module->narray_knomial_node); - } - if (NULL != ptpcoll_module->narray_knomial_proxy_extra_index) { - free(ptpcoll_module->narray_knomial_proxy_extra_index); - } - return OMPI_ERROR; -} - -/* Setup N-array static information */ -static int load_narray_tree(mca_bcol_ptpcoll_module_t *ptpcoll_module) -{ - int rc, i; - mca_bcol_ptpcoll_component_t *cm = &mca_bcol_ptpcoll_component; - - ptpcoll_module->narray_node = calloc(ptpcoll_module->group_size, - sizeof(netpatterns_tree_node_t)); - if(NULL == ptpcoll_module->narray_node ) { - goto Error; - } - - for(i = 0; i < ptpcoll_module->group_size; i++) { - rc = netpatterns_setup_narray_tree( - cm->narray_radix, - i, - ptpcoll_module->group_size, - &ptpcoll_module->narray_node[i]); - if(OMPI_SUCCESS != rc) { - goto Error; - } - } - - return OMPI_SUCCESS; - -Error: - if (NULL != ptpcoll_module->narray_node) { - free(ptpcoll_module->narray_node); - } - return OMPI_ERROR; -} - -static int load_knomial_info(mca_bcol_ptpcoll_module_t *ptpcoll_module) -{ - int i; - mca_bcol_ptpcoll_component_t *cm = &mca_bcol_ptpcoll_component; - - ptpcoll_module->k_nomial_radix = - cm->k_nomial_radix > ptpcoll_module->group_size ? - ptpcoll_module->group_size : - cm->k_nomial_radix; - - ptpcoll_module->pow_k = pow_k_calc(ptpcoll_module->k_nomial_radix, - ptpcoll_module->group_size, - &ptpcoll_module->pow_knum); - - ptpcoll_module->kn_proxy_extra_index = (int *) - malloc(sizeof(int) * (ptpcoll_module->k_nomial_radix - 1)); - if (NULL == ptpcoll_module->kn_proxy_extra_index) { - PTPCOLL_ERROR(("Failed to allocate memory")); - goto Error; - } - - /* Setting peer type for K-nomial algorithm*/ - if (ptpcoll_module->super.sbgp_partner_module->my_index < ptpcoll_module->pow_knum ) { - if (ptpcoll_module->super.sbgp_partner_module->my_index < - ptpcoll_module->group_size - ptpcoll_module->pow_knum) { - for (i = 0; - i < (ptpcoll_module->k_nomial_radix - 1) && - ptpcoll_module->super.sbgp_partner_module->my_index * - (ptpcoll_module->k_nomial_radix - 1) + - i + ptpcoll_module->pow_knum < ptpcoll_module->group_size - ; i++) { - ptpcoll_module->pow_ktype = PTPCOLL_KN_PROXY; - ptpcoll_module->kn_proxy_extra_index[i] = - ptpcoll_module->super.sbgp_partner_module->my_index * - (ptpcoll_module->k_nomial_radix - 1) + - i + ptpcoll_module->pow_knum; - PTPCOLL_VERBOSE(10 ,("My type is proxy, pow_knum = %d [%d] my extra %d", - ptpcoll_module->pow_knum, - ptpcoll_module->pow_k, - ptpcoll_module->kn_proxy_extra_index[i])); - } - ptpcoll_module->kn_proxy_extra_num = i; - } else { - PTPCOLL_VERBOSE(10 ,("My type is in group, pow_knum = %d [%d]", ptpcoll_module->pow_knum, - ptpcoll_module->pow_k)); - ptpcoll_module->pow_ktype = PTPCOLL_KN_IN_GROUP; - } - } else { - ptpcoll_module->pow_ktype = PTPCOLL_KN_EXTRA; - ptpcoll_module->kn_proxy_extra_index[0] = (ptpcoll_module->super.sbgp_partner_module->my_index - - ptpcoll_module->pow_knum) / (ptpcoll_module->k_nomial_radix - 1); - PTPCOLL_VERBOSE(10 ,("My type is extra , pow_knum = %d [%d] my proxy %d", - ptpcoll_module->pow_knum, - ptpcoll_module->pow_k, - ptpcoll_module->kn_proxy_extra_index[0])); - } - - return OMPI_SUCCESS; - -Error: - if (NULL == ptpcoll_module->kn_proxy_extra_index) { - free(ptpcoll_module->kn_proxy_extra_index); - } - - return OMPI_ERROR; -} - -static int load_binomial_info(mca_bcol_ptpcoll_module_t *ptpcoll_module) -{ - ptpcoll_module->pow_2 = pow_k_calc(2, - ptpcoll_module->group_size, - &ptpcoll_module->pow_2num); - - assert(ptpcoll_module->pow_2num == 1 << ptpcoll_module->pow_2); - assert(ptpcoll_module->pow_2num <= ptpcoll_module->group_size); - - /* Setting peer type for binary algorithm*/ - if (ptpcoll_module->super.sbgp_partner_module->my_index < ptpcoll_module->pow_2num ) { - if (ptpcoll_module->super.sbgp_partner_module->my_index < - ptpcoll_module->group_size - ptpcoll_module->pow_2num) { - PTPCOLL_VERBOSE(10 ,("My type is proxy, pow_2num = %d [%d]", ptpcoll_module->pow_2num, - ptpcoll_module->pow_2)); - ptpcoll_module->pow_2type = PTPCOLL_PROXY; - ptpcoll_module->proxy_extra_index = ptpcoll_module->super.sbgp_partner_module->my_index + - ptpcoll_module->pow_2num; - } else { - PTPCOLL_VERBOSE(10 ,("My type is in group, pow_2num = %d [%d]", ptpcoll_module->pow_2num, - ptpcoll_module->pow_2)); - ptpcoll_module->pow_2type = PTPCOLL_IN_GROUP; - } - } else { - PTPCOLL_VERBOSE(10 ,("My type is extra , pow_2num = %d [%d]", ptpcoll_module->pow_2num, - ptpcoll_module->pow_2)); - ptpcoll_module->pow_2type = PTPCOLL_EXTRA; - ptpcoll_module->proxy_extra_index = ptpcoll_module->super.sbgp_partner_module->my_index - - ptpcoll_module->pow_2num; - } - return OMPI_SUCCESS; -} - -static int load_recursive_knomial_info(mca_bcol_ptpcoll_module_t *ptpcoll_module) -{ - int rc = OMPI_SUCCESS; - rc = netpatterns_setup_recursive_knomial_tree_node( - ptpcoll_module->group_size, - ptpcoll_module->super.sbgp_partner_module->my_index, - mca_bcol_ptpcoll_component.k_nomial_radix, - &ptpcoll_module->knomial_exchange_tree); - return rc; -} - -static int bcol_ptpcoll_collreq_init(opal_free_list_item_t *item, void* ctx) -{ - mca_bcol_ptpcoll_module_t *ptpcoll_module= (mca_bcol_ptpcoll_module_t *) ctx; - mca_bcol_ptpcoll_collreq_t *collreq = (mca_bcol_ptpcoll_collreq_t *) item; - - switch(mca_bcol_ptpcoll_component.barrier_alg) { - case 1: - collreq->requests = (ompi_request_t **) - calloc(2, sizeof(ompi_request_t *)); - break; - case 2: - collreq->requests = (ompi_request_t **) - calloc(2 * ptpcoll_module->k_nomial_radix, sizeof(ompi_request_t *)); - break; - } - - if (NULL == collreq->requests) { - return OPAL_ERR_OUT_OF_RESOURCE; - } - - return OPAL_SUCCESS; -} - -/* query to see if the module is available for use on the given - * communicator, and if so, what it's priority is. This is where - * the backing shared-memory file is created. - */ -mca_bcol_base_module_t **mca_bcol_ptpcoll_comm_query(mca_sbgp_base_module_t *sbgp, - int *num_modules) -{ - int rc; - /* local variables */ - struct ompi_communicator_t *comm = sbgp->group_comm; - mca_bcol_ptpcoll_module_t *ptpcoll_module = NULL; - mca_bcol_base_module_t **ptpcoll_modules = NULL; - int iovec_size; - - /* initialize local variables */ - *num_modules = 0; - - /* - * This is activated only for intra-communicators - */ - if (OMPI_COMM_IS_INTER(comm) ) { - return NULL; - } - - /* allocate and initialize an sm-v2 module */ - ptpcoll_modules = (mca_bcol_base_module_t **) malloc(sizeof(mca_bcol_base_module_t *)); - if (NULL == ptpcoll_modules) { - return NULL; - } - - ptpcoll_module = OBJ_NEW(mca_bcol_ptpcoll_module_t); - if (NULL == ptpcoll_module) { - free(ptpcoll_modules); - return NULL; - } - - /* On this stage we support only one single module */ - ptpcoll_modules[*num_modules] = &(ptpcoll_module->super); - - (*num_modules)++; - /* set the subgroup */ - ptpcoll_module->super.sbgp_partner_module = sbgp; - /* caching some useful information */ - ptpcoll_module->group_size = - ptpcoll_module->super.sbgp_partner_module->group_size; - - rc = load_binomial_info(ptpcoll_module); - if (OMPI_SUCCESS != rc) { - PTPCOLL_VERBOSE(10, ("Failed to load knomial info")); - goto CLEANUP; - } - - rc = load_knomial_info(ptpcoll_module); - if (OMPI_SUCCESS != rc) { - PTPCOLL_VERBOSE(10, ("Failed to load knomial info")); - goto CLEANUP; - } - - rc = load_narray_tree(ptpcoll_module); - if (OMPI_SUCCESS != rc) { - PTPCOLL_VERBOSE(10, ("Failed to load narray tree")); - goto CLEANUP; - } - - rc = load_narray_knomial_tree(ptpcoll_module); - if (OMPI_SUCCESS != rc) { - PTPCOLL_VERBOSE(10, ("Failed to load narray-knomila tree")); - goto CLEANUP; - } - - rc = load_recursive_knomial_info(ptpcoll_module); - if (OMPI_SUCCESS != rc) { - PTPCOLL_VERBOSE(10, ("Failed to load recursive knomial tree")); - goto CLEANUP; - } - - /* creating collfrag free list */ - OBJ_CONSTRUCT(&ptpcoll_module->collreqs_free, opal_free_list_t); - rc = opal_free_list_init (&ptpcoll_module->collreqs_free, - sizeof(mca_bcol_ptpcoll_collreq_t), - BCOL_PTP_CACHE_LINE_SIZE, - OBJ_CLASS(mca_bcol_ptpcoll_collreq_t), - 0, BCOL_PTP_CACHE_LINE_SIZE, - 256 /* free_list_num */, - -1 /* free_list_max, -1 = infinite */, - 32 /* free_list_inc */, - NULL, 0, NULL, - bcol_ptpcoll_collreq_init, - ptpcoll_module); - if (OMPI_SUCCESS != rc) { - goto CLEANUP; - } - - load_func(ptpcoll_module); - - rc = alloc_allreduce_offsets_array(ptpcoll_module); - if (OMPI_SUCCESS != rc) { - goto CLEANUP; - } - - /* Allocating iovec for PTP alltoall */ - iovec_size = ptpcoll_module->group_size / 2 + ptpcoll_module->group_size % 2; - ptpcoll_module->alltoall_iovec = (struct iovec *) malloc(sizeof(struct iovec) - * iovec_size); - ptpcoll_module->log_group_size = lognum(ptpcoll_module->group_size); - - rc = mca_bcol_base_bcol_fns_table_init(&(ptpcoll_module->super)); - if (OMPI_SUCCESS != rc) { - goto CLEANUP; - } - - /* Zero copy is supported */ - ptpcoll_module->super.supported_mode = MCA_BCOL_BASE_ZERO_COPY; - - /* return */ - return ptpcoll_modules; - -CLEANUP: - - OBJ_RELEASE(ptpcoll_module); - free(ptpcoll_modules); - return NULL; -} diff --git a/ompi/mca/bcol/ptpcoll/bcol_ptpcoll_reduce.c b/ompi/mca/bcol/ptpcoll/bcol_ptpcoll_reduce.c deleted file mode 100644 index d8fe566543c..00000000000 --- a/ompi/mca/bcol/ptpcoll/bcol_ptpcoll_reduce.c +++ /dev/null @@ -1,405 +0,0 @@ -/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ -/* - * Copyright (c) 2009-2013 Oak Ridge National Laboratory. All rights reserved. - * Copyright (c) 2009-2012 Mellanox Technologies. All rights reserved. - * Copyright (c) 2013 Los Alamos National Security, LLC. All rights - * reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#include "ompi_config.h" - -#include "ompi/include/ompi/constants.h" -#include "ompi/mca/bcol/bcol.h" -#include "bcol_ptpcoll_reduce.h" -#include "bcol_ptpcoll_utils.h" - -static int bcol_ptpcoll_reduce_narray_progress(bcol_function_args_t *input_args, - struct mca_bcol_base_function_t *const_args); - -static int bcol_ptpcoll_reduce_narray(bcol_function_args_t *input_args, - struct mca_bcol_base_function_t *const_args); - - -#define NARRAY_RECV_NB(narray_node, process_shift, group_size, \ - recv_buffer, pack_len, tag, comm, recv_requests, \ - num_pending_recvs) \ -do { \ - int n, rc = OMPI_SUCCESS; \ - int dst; \ - int comm_dst; \ - int offset = 0 ; \ - \ - /* Recieve data from all relevant childrens */ \ - for (n = 0; n < narray_node->n_children; n++) { \ - \ - dst = narray_node->children_ranks[n] + process_shift; \ - if (dst >= group_size) { \ - dst -= group_size; \ - } \ - comm_dst = group_list[dst]; \ - \ - /* Non blocking send .... */ \ - PTPCOLL_VERBOSE(1 , ("Reduce, Irecv data to %d[%d], count %d, tag %d, addr %p", \ - dst, comm_dst, pack_len, tag, \ - data_buffer)); \ - rc = MCA_PML_CALL(irecv((void *)((unsigned char*)recv_buffer + offset), pack_len, MPI_BYTE, \ - comm_dst, tag, comm, \ - &(recv_requests[*num_pending_recvs]))); \ - if( OMPI_SUCCESS != rc ) { \ - PTPCOLL_VERBOSE(10, ("Failed to start non-blocking receive")); \ - return OMPI_ERROR; \ - } \ - ++(*num_pending_recvs); \ - offset += pack_len; \ - } \ -} while(0) - - -static inline int narray_reduce(void *data_buffer, void *recv_buffer, - int nrecvs, int count, - struct ompi_datatype_t *dtype, struct ompi_op_t *op, - int *reduction_status) { - int pack_len = count * dtype->super.size; - int i = 0; - void *source_buffer = NULL, *result_buffer = NULL; - - source_buffer = data_buffer; - result_buffer = recv_buffer; - - for (i = 0; i < nrecvs; i++) { - ompi_op_reduce(op, (void*)((unsigned char*) source_buffer) , - (void*)((unsigned char*) result_buffer), - count,dtype); - - source_buffer = (void *)((unsigned char*)recv_buffer - + (i+1) * pack_len); - } - - *reduction_status = 1; - return OMPI_SUCCESS; -} -static int bcol_ptpcoll_reduce_narray_progress(bcol_function_args_t *input_args, - struct mca_bcol_base_function_t *const_args) -{ - mca_bcol_ptpcoll_module_t *ptpcoll_module = (mca_bcol_ptpcoll_module_t *)const_args->bcol_module; - - int tag = -1; - int rc; - int group_size = ptpcoll_module->group_size; - int *group_list = ptpcoll_module->super.sbgp_partner_module->group_list; - uint32_t buffer_index = input_args->buffer_index; - struct ompi_op_t *op = input_args->op; - ompi_communicator_t* comm = ptpcoll_module->super.sbgp_partner_module->group_comm; - ompi_request_t **send_request = - &ptpcoll_module->ml_mem.ml_buf_desc[buffer_index].requests[0]; - ompi_request_t **recv_requests = - &ptpcoll_module->ml_mem.ml_buf_desc[buffer_index].requests[1]; - void *data_buffer = NULL; - void *src_buffer = (void *) ( - (unsigned char *)input_args->sbuf + - (size_t)input_args->sbuf_offset); - void *recv_buffer = (void *) ( - (unsigned char *)input_args->rbuf + - (size_t)input_args->rbuf_offset); - int count = input_args->count; - struct ompi_datatype_t *dtype = input_args->dtype; - int pack_len = input_args->count * input_args->dtype->super.size; - int *active_requests = - &(ptpcoll_module->ml_mem.ml_buf_desc[buffer_index].active_requests); - int matched = false; - int my_group_index = ptpcoll_module->super.sbgp_partner_module->my_index; - int relative_group_index = 0; - netpatterns_tree_node_t *narray_node = NULL; - bool not_sent = false; - int parent_rank = -1, comm_parent_rank = -1; - int group_root_index = input_args->root; - - if (!ptpcoll_module->ml_mem.ml_buf_desc[buffer_index].reduce_init_called) { - bcol_ptpcoll_reduce_narray(input_args, const_args); - } - /* - * By default the src buffer is the data buffer, - * only after reduction, the recv buffer becomes the - * data buffer - */ - data_buffer = src_buffer; - - relative_group_index = my_group_index - group_root_index; - if (relative_group_index < 0) { - relative_group_index +=group_size; - } - - /* keep tag within the limit support by the pml */ - tag = (PTPCOLL_TAG_OFFSET + input_args->sequence_num * PTPCOLL_TAG_FACTOR) & (ptpcoll_module->tag_mask); - /* mark this as a collective tag, to avoid conflict with user-level tags */ - tag = -tag; - - narray_node = &ptpcoll_module->narray_node[relative_group_index]; - - PTPCOLL_VERBOSE(3, ("reduce, Narray tree Progress")); - - PTPCOLL_VERBOSE(8, ("bcol_ptpcoll_reduce_narray, buffer index: %d " - "tag: %d " - "tag_mask: %d " - "sn: %d " - "root: %d [%d]" - "buff: %p ", - buffer_index, tag, - ptpcoll_module->tag_mask, input_args->sequence_num, - input_args->root_flag, input_args->root_route->rank, - data_buffer)); - - /* - Check if the data was received - */ - if (0 != *active_requests) { - matched = mca_bcol_ptpcoll_test_all_for_match - (active_requests, recv_requests, &rc); - if (OMPI_SUCCESS != rc) { - return OMPI_ERROR; - } - - - /* All data was received, then do a reduction*/ - if(matched) { - narray_reduce(data_buffer, recv_buffer, narray_node->n_children, count, dtype, op, - &ptpcoll_module->ml_mem.ml_buf_desc[buffer_index].reduction_status); - - /* - * The reduction result is in the recv buffer, so it is the new data - * buffer - */ - data_buffer = recv_buffer; - - /* If not reduced, means also, you might not posted a send */ - not_sent = true; - } else { - PTPCOLL_VERBOSE(10, ("reduce root is started")); - return BCOL_FN_STARTED; - } - } - - /* I'm root, I'm done */ - if (input_args->root_flag) { - return BCOL_FN_COMPLETE; - } - - PTPCOLL_VERBOSE(1,("Testing Sending Match")); - - /* If send was not posted */ - /* Manju: Leaf node should never post in the progress logic */ - if (not_sent) { - parent_rank = - ptpcoll_module->narray_node[relative_group_index].parent_rank + - group_root_index; - if (parent_rank >= group_size) { - parent_rank -= group_size; - } - - comm_parent_rank = group_list[parent_rank]; - PTPCOLL_VERBOSE(1,("Sending data to %d ",comm_parent_rank)); - - rc = MCA_PML_CALL(isend(data_buffer, pack_len, MPI_BYTE, - comm_parent_rank, - tag, MCA_PML_BASE_SEND_STANDARD, comm, send_request)); - if( OMPI_SUCCESS != rc ) { - PTPCOLL_VERBOSE(10, ("Failed to send data")); - return OMPI_ERROR; - } - } - - if (0 == mca_bcol_ptpcoll_test_for_match(send_request, &rc)) { - PTPCOLL_VERBOSE(10, ("Test was not matched - %d", rc)); - /* Data has not been sent. Return that the collective has been stated - * because we MUST call test on this request once it is finished to - * ensure that it is properly freed. */ - return (OMPI_SUCCESS != rc) ? rc : BCOL_FN_STARTED; - } - - return BCOL_FN_COMPLETE; -} - -static int bcol_ptpcoll_reduce_narray(bcol_function_args_t *input_args, - struct mca_bcol_base_function_t *const_args) -{ - mca_bcol_ptpcoll_module_t *ptpcoll_module = (mca_bcol_ptpcoll_module_t *)const_args->bcol_module; - - int tag; - int rc; - int group_size = ptpcoll_module->group_size; - int *group_list = ptpcoll_module->super.sbgp_partner_module->group_list; - uint32_t buffer_index = input_args->buffer_index; - - struct ompi_op_t *op = input_args->op; - ompi_communicator_t* comm = ptpcoll_module->super.sbgp_partner_module->group_comm; - ompi_request_t **recv_requests = - &ptpcoll_module->ml_mem.ml_buf_desc[buffer_index].requests[1]; - ompi_request_t **send_request = - &ptpcoll_module->ml_mem.ml_buf_desc[buffer_index].requests[0]; - - void *data_buffer = NULL; - void *src_buffer = (void *) ( - (unsigned char *)input_args->sbuf + - (size_t)input_args->sbuf_offset); - void *recv_buffer = (void *) ( - (unsigned char *)input_args->rbuf + - (size_t)input_args->rbuf_offset); - int count = input_args->count; - struct ompi_datatype_t *dtype = input_args->dtype; - int pack_len = input_args->count * input_args->dtype->super.size; - int *active_requests = - &(ptpcoll_module->ml_mem.ml_buf_desc[buffer_index].active_requests); - int matched = true; - int my_group_index = ptpcoll_module->super.sbgp_partner_module->my_index; - int group_root_index = -1; - int relative_group_index = 0; - netpatterns_tree_node_t *narray_node = NULL; - int parent_rank = -1, comm_parent_rank = -1; - - - /* This is first function call that should be called, not progress. - * The fragmentation code does this, so switch from progress to here. - * The flag indicates whether, we have entered this code * - */ - ptpcoll_module->ml_mem.ml_buf_desc[buffer_index].reduce_init_called = true; - - PTPCOLL_VERBOSE(1, ("Reduce, Narray tree")); - /* reset active request counter */ - (*active_requests) = 0; - /* keep tag within the limit support by the pml */ - tag = (PTPCOLL_TAG_OFFSET + input_args->sequence_num * PTPCOLL_TAG_FACTOR) & (ptpcoll_module->tag_mask); - /* mark this as a collective tag, to avoid conflict with user-level flags */ - tag = -tag; - - PTPCOLL_VERBOSE(1, ("bcol_ptpcoll_reduce_narray, buffer index: %d " - "tag: %d " - "tag_mask: %d " - "sn: %d " - "root: %d " - "buff: %p ", - buffer_index, tag, - ptpcoll_module->tag_mask, input_args->sequence_num, - input_args->root_flag, - src_buffer)); - - /* Compute Root Index Shift */ - group_root_index = input_args->root; - relative_group_index = my_group_index - group_root_index; - if (relative_group_index < 0) { - relative_group_index += group_size; - } - - narray_node = &ptpcoll_module->narray_node[relative_group_index]; - - if (0 == narray_node->n_children) { - PTPCOLL_VERBOSE(10, ("I'm leaf of the data")); - /* - * I'm root of the operation - * send data to N childrens - */ - data_buffer = src_buffer; - goto NARRAY_SEND_DATA; - } - - /* Not leaf, either an internal node or root */ - NARRAY_RECV_NB(narray_node, group_root_index, group_size, - recv_buffer, pack_len, tag, comm, recv_requests, - active_requests); - - - /* We have not done reduction, yet */ - ptpcoll_module->ml_mem.ml_buf_desc[buffer_index].reduction_status = 0; - - /* We can not block. So run couple of test for data arrival */ - matched = mca_bcol_ptpcoll_test_all_for_match - (active_requests, recv_requests, &rc); - - /* Check if received the data */ - if(matched) { - - narray_reduce(src_buffer, recv_buffer, narray_node->n_children, - count, dtype, op, &ptpcoll_module->ml_mem.ml_buf_desc[buffer_index].reduction_status); - PTPCOLL_VERBOSE(1, ("Reduce, received data from all childrend ")); - data_buffer = recv_buffer; - - } else { - - PTPCOLL_VERBOSE(1, ("reduce root is started")); - return BCOL_FN_STARTED; - } - - /* I'm root, I'm done */ - if (input_args->root_flag) { - return BCOL_FN_COMPLETE; - } - - -NARRAY_SEND_DATA: - - /* - * Send the data (reduce in case of internal nodes, or just data in - * case of leaf nodes) to the parent - */ - narray_node = &ptpcoll_module->narray_node[relative_group_index]; - - parent_rank = - ptpcoll_module->narray_node[relative_group_index].parent_rank + - group_root_index; - if (parent_rank >= group_size) { - parent_rank -= group_size; - } - - comm_parent_rank = group_list[parent_rank]; - PTPCOLL_VERBOSE(1,("Sending data to %d ",comm_parent_rank)); - - rc = MCA_PML_CALL(isend(data_buffer, pack_len, MPI_BYTE, - comm_parent_rank, - tag, MCA_PML_BASE_SEND_STANDARD, comm, send_request)); - if( OMPI_SUCCESS != rc ) { - PTPCOLL_VERBOSE(10, ("Failed to send data")); - return OMPI_ERROR; - } - - /* We can not block. So run couple of test for data arrival */ - if (0 == mca_bcol_ptpcoll_test_for_match(send_request, &rc)) { - PTPCOLL_VERBOSE(10, ("Test was not matched - %d", rc)); - /* No data was received, return no match error */ - return (OMPI_SUCCESS != rc) ? rc : BCOL_FN_STARTED; - } - - return BCOL_FN_COMPLETE; -} - - -int bcol_ptpcoll_reduce_init(mca_bcol_base_module_t *super) -{ - mca_bcol_base_coll_fn_comm_attributes_t comm_attribs; - mca_bcol_base_coll_fn_invoke_attributes_t inv_attribs; - - PTPCOLL_VERBOSE(1,("Initialization Reduce - Narray")); - comm_attribs.bcoll_type = BCOL_REDUCE; - comm_attribs.comm_size_min = 0; - comm_attribs.comm_size_max = 1024 * 1024; - comm_attribs.waiting_semantics = NON_BLOCKING; - - inv_attribs.bcol_msg_min = 0; - inv_attribs.bcol_msg_max = 20000; /* range 1 */ - - inv_attribs.datatype_bitmap = 0xffffffff; - inv_attribs.op_types_bitmap = 0xffffffff; - - - comm_attribs.data_src = DATA_SRC_KNOWN; - mca_bcol_base_set_attributes(super, &comm_attribs, &inv_attribs, - bcol_ptpcoll_reduce_narray, - bcol_ptpcoll_reduce_narray_progress); - - comm_attribs.data_src = DATA_SRC_KNOWN; - - return OMPI_SUCCESS; -} diff --git a/ompi/mca/bcol/ptpcoll/bcol_ptpcoll_reduce.h b/ompi/mca/bcol/ptpcoll/bcol_ptpcoll_reduce.h deleted file mode 100644 index 195ce7fad93..00000000000 --- a/ompi/mca/bcol/ptpcoll/bcol_ptpcoll_reduce.h +++ /dev/null @@ -1,25 +0,0 @@ -/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ -/* - * Copyright (c) 2009-2013 Oak Ridge National Laboratory. All rights reserved. - * Copyright (c) 2009-2012 Mellanox Technologies. All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#ifndef MCA_BCOL_PTPCOLL_REDUCE_H -#define MCA_BCOL_PTPCOLL_REDUCE_H - -#include "ompi_config.h" -#include "bcol_ptpcoll.h" -#include "bcol_ptpcoll_utils.h" - -BEGIN_C_DECLS - -int bcol_ptpcoll_reduce_init(mca_bcol_base_module_t *super); - -int bcol_ptpcoll_reduce_init(mca_bcol_base_module_t *super); - -#endif /* MCA_BCOL_PTPCOLL_REDUCE_H */ diff --git a/ompi/mca/bcol/ptpcoll/bcol_ptpcoll_utils.c b/ompi/mca/bcol/ptpcoll/bcol_ptpcoll_utils.c deleted file mode 100644 index 6fc2fa8ab76..00000000000 --- a/ompi/mca/bcol/ptpcoll/bcol_ptpcoll_utils.c +++ /dev/null @@ -1,139 +0,0 @@ -/* - * Copyright (c) 2009-2012 Oak Ridge National Laboratory. All rights reserved. - * Copyright (c) 2009-2012 Mellanox Technologies. All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ -#include "ompi_config.h" - -#include "bcol_ptpcoll.h" -#include "bcol_ptpcoll_utils.h" - -/* - * Return closet power of K, for the number, and the number - */ -int pow_k_calc(int k, int number, int *out_number) -{ - int power = 0; - int n = 1; - - while (n < number) { - n *= k; - ++power; - } - - if (n > number) { - n /= k; - --power; - } - if (NULL != out_number) { - *out_number = n; - } - - return power; -} - -/* - * Communicator rank to group index conversion function for K-nomial tree. - * Complexity: (K-1) Log _base_K N - * - * Input: - * my_group_index - my process index in the group - * comm_source - the communicator rank of the source of data - * radix - radix of K-nomial tree - * group_size - the size of my group - * group_array[] - one to one map from group index to communicator rank - * - * Output: - * Group index for comm_source. - */ - -int get_group_index_and_distance_for_binomial(int my_group_index, int comm_source, - int group_size, int *group_array, int *pow_distance) -{ - int group_index; - int i; - *pow_distance = 0; - - for (i = 1; i < group_size; i<<=1, (*pow_distance)++) { - group_index = my_group_index ^ i; - if (comm_source == group_array[group_index]) { - return group_index; - } - } - - *pow_distance = -1; - return -1; -} - -int get_group_index_and_distance_for_k_nomial(int my_group_index, int comm_source, int radix, - int group_size, int *group_array, int *pow_distance) -{ - int group_index; - int offset = 1; /* offset equal to 1 (radix_power) */ - int radix_power = 1; /* radix power 0 */ - *pow_distance = 0; - - /* - * Go trough range of possible offsets from my rank, - * for each offset we calculate k-nomial tree root. - */ - while(offset < group_size) { - /* K-nomial tree root calculation for the offset */ - if (offset % (radix * radix_power)) { - group_index = my_group_index - offset; - /* wrap around if the group is negative */ - if (group_index < 0) { - group_index += group_size; - } - PTPCOLL_VERBOSE(10, ("Checking %d", group_index)); - if (comm_source == group_array[group_index]) { - return group_index; - } - offset += radix_power; - } else { - /* we done with the section of the tree, go to next one */ - radix_power *= radix; - (*pow_distance)++; - } - } - - /* No source was found, return -1 */ - *pow_distance = -1; - return -1; -} - -int get_group_index_for_k_nomial(int my_group_index, int comm_source, int radix, int group_size, int *group_array) -{ - int group_index; - int radix_power = 1; /* radix power 0 */ - int offset = 1; /* offset equal to 1 (radix_power) */ - - /* - * Go trough range of possible offsets from my rank, - * for each offset we calculate k-nomial tree root. - */ - while(offset < group_size) { - /* K-nomial tree root calculation for the offset */ - if (offset % (radix * radix_power)) { - group_index = my_group_index - offset; - /* wrap around if the group is negative */ - if (group_index < 0) { - group_index += group_size; - } - if (comm_source == group_array[group_index]) { - return group_index; - } - offset += radix_power; - } else { - /* we done with the section of the tree, go to next one */ - radix_power *= radix; - } - } - - /* No source was found, return -1 */ - return -1; -} diff --git a/ompi/mca/bcol/ptpcoll/bcol_ptpcoll_utils.h b/ompi/mca/bcol/ptpcoll/bcol_ptpcoll_utils.h deleted file mode 100644 index e5cffe0fb4c..00000000000 --- a/ompi/mca/bcol/ptpcoll/bcol_ptpcoll_utils.h +++ /dev/null @@ -1,80 +0,0 @@ -/* - * Copyright (c) 2009-2012 Oak Ridge National Laboratory. All rights reserved. - * Copyright (c) 2009-2012 Mellanox Technologies. All rights reserved. - * Copyright (c) 2012 Los Alamos National Security, LLC. - * All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#ifndef MCA_BCOL_PTPCOLL_UTILS_H -#define MCA_BCOL_PTPCOLL_UTILS_H - -#include "ompi_config.h" - -#include "ompi/mca/rte/rte.h" - -BEGIN_C_DECLS - -/* - * Return closet power of K, for the number - */ -int pow_k_calc(int k, int number, int *out_number); - -/* - * Communicator rank to group index conversion function for K-nomial tree. - */ -int get_group_index_for_k_nomial(int my_group_index, int comm_source, int radix, int group_size, int *group_array); - -/* the same like above, just more information on return */ -int get_group_index_and_distance_for_k_nomial(int my_group_index, int comm_source, int radix, - int group_size, int *group_array, int *pow_distance); - -int get_group_index_and_distance_for_binomial(int my_group_index, int comm_source, - int group_size, int *group_array, int *pow_distance); -/* - * Error and debug Macros/Functions - */ -static inline int mca_bcol_ptpcoll_err(const char* fmt, ...) -{ - va_list list; - int ret; - - va_start(list, fmt); - ret = vfprintf(stderr, fmt, list); - va_end(list); - return ret; -} - -#define PTPCOLL_ERROR(args) \ - do { \ - mca_bcol_ptpcoll_err("[%s]%s[%s:%d:%s] PTPCOLL ", \ - ompi_process_info.nodename, \ - OMPI_NAME_PRINT(OMPI_PROC_MY_NAME), \ - __FILE__, __LINE__, __func__); \ - mca_bcol_ptpcoll_err args; \ - mca_bcol_ptpcoll_err("\n"); \ - } while(0) - -#if OPAL_ENABLE_DEBUG -#define PTPCOLL_VERBOSE(level, args) \ - do { \ - if (mca_bcol_ptpcoll_component.verbose >= level) { \ - mca_bcol_ptpcoll_err("[%s]%s[%s:%d:%s] PTPCOLL ", \ - ompi_process_info.nodename, \ - OMPI_NAME_PRINT(OMPI_PROC_MY_NAME), \ - __FILE__, __LINE__, __func__); \ - mca_bcol_ptpcoll_err args; \ - mca_bcol_ptpcoll_err("\n"); \ - } \ - } while(0) -#else -#define PTPCOLL_VERBOSE(level, args) -#endif - -END_C_DECLS - -#endif diff --git a/ompi/mca/bcol/ptpcoll/owner.txt b/ompi/mca/bcol/ptpcoll/owner.txt deleted file mode 100644 index 51ea04a5175..00000000000 --- a/ompi/mca/bcol/ptpcoll/owner.txt +++ /dev/null @@ -1,7 +0,0 @@ -# -# owner/status file -# owner: institution that is responsible for this package -# status: e.g. active, maintenance, unmaintained -# -owner: ORNL? -status: unmaintained diff --git a/ompi/mca/bml/Makefile.am b/ompi/mca/bml/Makefile.am index b8463a4d794..6604329605f 100644 --- a/ompi/mca/bml/Makefile.am +++ b/ompi/mca/bml/Makefile.am @@ -5,15 +5,15 @@ # Copyright (c) 2004-2005 The University of Tennessee and The University # of Tennessee Research Foundation. All rights # reserved. -# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, # University of Stuttgart. All rights reserved. # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. # Copyright (c) 2010 Cisco Systems, Inc. All rights reserved. # $COPYRIGHT$ -# +# # Additional copyrights may follow -# +# # $HEADER$ # diff --git a/ompi/mca/bml/base/Makefile.am b/ompi/mca/bml/base/Makefile.am index b8ba13a85bf..faebf1c8089 100644 --- a/ompi/mca/bml/base/Makefile.am +++ b/ompi/mca/bml/base/Makefile.am @@ -5,15 +5,16 @@ # Copyright (c) 2004-2005 The University of Tennessee and The University # of Tennessee Research Foundation. All rights # reserved. -# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, # University of Stuttgart. All rights reserved. # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. # Copyright (c) 2008 Cisco Systems, Inc. All rights reserved. +# Copyright (c) 2015 Los Alamos National Security, LLC. All rights reserved. # $COPYRIGHT$ -# +# # Additional copyrights may follow -# +# # $HEADER$ # @@ -26,5 +27,4 @@ libmca_bml_la_SOURCES += \ base/bml_base_btl.h \ base/bml_base_endpoint.c \ base/bml_base_init.c \ - base/bml_base_frame.c \ - base/bml_base_ft.c + base/bml_base_frame.c diff --git a/ompi/mca/bml/base/base.h b/ompi/mca/bml/base/base.h index b4f7280d529..1d05e4a9318 100644 --- a/ompi/mca/bml/base/base.h +++ b/ompi/mca/bml/base/base.h @@ -1,3 +1,4 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ /* * Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana * University Research and Technology @@ -5,15 +6,17 @@ * Copyright (c) 2004-2006 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2006 The Regents of the University of California. * All rights reserved. * Copyright (c) 2009 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Los Alamos National Security, LLC. All rights + * reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -25,6 +28,7 @@ #include "ompi/mca/mca.h" #include "opal/mca/base/mca_base_framework.h" #include "ompi/mca/bml/bml.h" +#include "ompi/proc/proc.h" /* @@ -40,13 +44,13 @@ struct mca_bml_base_selected_module_t { }; typedef struct mca_bml_base_selected_module_t mca_bml_base_selected_module_t; -OMPI_DECLSPEC OBJ_CLASS_DECLARATION(mca_bml_base_selected_module_t); +OMPI_DECLSPEC OBJ_CLASS_DECLARATION(mca_bml_base_selected_module_t); /* * Global functions for MCA: overall BTL open and close */ -OMPI_DECLSPEC int mca_bml_base_init(bool enable_progress_threads, +OMPI_DECLSPEC int mca_bml_base_init(bool enable_progress_threads, bool enable_mpi_threads); OMPI_DECLSPEC bool mca_bml_base_inited(void); @@ -57,8 +61,21 @@ OMPI_DECLSPEC int mca_bml_base_ft_event(int state); * Globals */ OMPI_DECLSPEC extern mca_bml_base_component_t mca_bml_component; -OMPI_DECLSPEC extern mca_bml_base_module_t mca_bml; +OMPI_DECLSPEC extern mca_bml_base_module_t mca_bml; OMPI_DECLSPEC extern mca_base_framework_t ompi_bml_base_framework; +OMPI_DECLSPEC extern opal_mutex_t mca_bml_lock; + +static inline struct mca_bml_base_endpoint_t *mca_bml_base_get_endpoint (struct ompi_proc_t *proc) { + if (OPAL_UNLIKELY(NULL == proc->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_BML])) { + OPAL_THREAD_LOCK(&mca_bml_lock); + if (NULL == proc->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_BML]) { + mca_bml.bml_add_proc (proc); + } + OPAL_THREAD_UNLOCK(&mca_bml_lock); + } + + return (struct mca_bml_base_endpoint_t *) proc->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_BML]; +} END_C_DECLS diff --git a/ompi/mca/bml/base/bml_base_btl.c b/ompi/mca/bml/base/bml_base_btl.c index d5fd5d6b59f..c35bb57e7e3 100644 --- a/ompi/mca/bml/base/bml_base_btl.c +++ b/ompi/mca/bml/base/bml_base_btl.c @@ -5,14 +5,14 @@ * Copyright (c) 2004-2007 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2006 The Regents of the University of California. * All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -59,7 +59,7 @@ int mca_bml_base_btl_array_reserve(mca_bml_base_btl_array_t* array, size_t size) size_t new_len = sizeof(mca_bml_base_btl_t)*size; if(old_len >= new_len) return OMPI_SUCCESS; - + array->bml_btls = (mca_bml_base_btl_t*)realloc(array->bml_btls, new_len); if(NULL == array->bml_btls) return OMPI_ERR_OUT_OF_RESOURCE; @@ -99,15 +99,15 @@ static void mca_bml_base_completion( des->des_cbfunc(btl,ep,des,status); } -int mca_bml_base_send( mca_bml_base_btl_t* bml_btl, - mca_btl_base_descriptor_t* des, - mca_btl_base_tag_t tag ) -{ - des->des_context = (void*)bml_btl; +int mca_bml_base_send( mca_bml_base_btl_t* bml_btl, + mca_btl_base_descriptor_t* des, + mca_btl_base_tag_t tag ) +{ + des->des_context = (void*)bml_btl; if(mca_bml_base_error_count <= 0 && mca_bml_base_error_rate_ceiling > 0) { - mca_bml_base_error_count = (int) (((double) mca_bml_base_error_rate_ceiling * + mca_bml_base_error_count = (int) (((double) mca_bml_base_error_rate_ceiling * opal_rand(&mca_bml_base_rand_buff))/(UINT32_MAX+1.0)); - if(mca_bml_base_error_count < (double) mca_bml_base_error_rate_floor) { + if(mca_bml_base_error_count < (double) mca_bml_base_error_rate_floor) { mca_bml_base_error_count = (double) mca_bml_base_error_rate_floor; } if(mca_bml_base_error_count % 2) { @@ -117,7 +117,7 @@ int mca_bml_base_send( mca_bml_base_btl_t* bml_btl, return OMPI_SUCCESS; } else { /* corrupt data */ - mca_bml_base_context_t* ctx = (mca_bml_base_context_t*) + mca_bml_base_context_t* ctx = (mca_bml_base_context_t*) malloc(sizeof(mca_bml_base_context_t)); if(NULL != ctx) { opal_output(0, "%s:%d: corrupting data\n", __FILE__, __LINE__); @@ -133,7 +133,7 @@ int mca_bml_base_send( mca_bml_base_btl_t* bml_btl, } mca_bml_base_error_count--; return bml_btl->btl_send( bml_btl->btl, - bml_btl->btl_endpoint, + bml_btl->btl_endpoint, des, tag ); } diff --git a/ompi/mca/bml/base/bml_base_btl.h b/ompi/mca/bml/base/bml_base_btl.h index b2957620dc1..c439e6723e4 100644 --- a/ompi/mca/bml/base/bml_base_btl.h +++ b/ompi/mca/bml/base/bml_base_btl.h @@ -5,14 +5,14 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2006 The Regents of the University of California. * All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -35,7 +35,7 @@ struct mca_bml_base_btl_array_t; OMPI_DECLSPEC int mca_bml_base_btl_array_reserve(struct mca_bml_base_btl_array_t* array, size_t size); - + END_C_DECLS #endif /* MCA_BML_BASE_H */ diff --git a/ompi/mca/bml/base/bml_base_endpoint.c b/ompi/mca/bml/base/bml_base_endpoint.c index 1ba5aaa4588..393ba82b404 100644 --- a/ompi/mca/bml/base/bml_base_endpoint.c +++ b/ompi/mca/bml/base/bml_base_endpoint.c @@ -5,15 +5,15 @@ * Copyright (c) 2004-2008 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2006 The Regents of the University of California. * All rights reserved. * Copyright (c) 2008 Cisco Systems, Inc. All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -46,7 +46,7 @@ static void mca_bml_base_endpoint_destruct(mca_bml_base_endpoint_t* ep) OBJ_CLASS_INSTANCE( mca_bml_base_endpoint_t, opal_object_t, - mca_bml_base_endpoint_construct, - mca_bml_base_endpoint_destruct + mca_bml_base_endpoint_construct, + mca_bml_base_endpoint_destruct ); diff --git a/ompi/mca/bml/base/bml_base_frame.c b/ompi/mca/bml/base/bml_base_frame.c index de48d7681e5..b5a63dd9a2e 100644 --- a/ompi/mca/bml/base/bml_base_frame.c +++ b/ompi/mca/bml/base/bml_base_frame.c @@ -6,7 +6,7 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2006 The Regents of the University of California. * All rights reserved. @@ -14,9 +14,9 @@ * Copyright (c) 2014 Los Alamos National Security, LLC. All rights * reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -50,6 +50,8 @@ static bool mca_bml_base_srand; opal_rng_buff_t mca_bml_base_rand_buff; #endif +opal_mutex_t mca_bml_lock = OPAL_MUTEX_STATIC_INIT; + static int mca_bml_base_register(mca_base_register_flag_t flags) { #if OPAL_ENABLE_DEBUG_RELIABILITY @@ -89,7 +91,7 @@ static int mca_bml_base_register(mca_base_register_flag_t flags) return OMPI_SUCCESS; } -static int mca_bml_base_open(mca_base_open_flag_t flags) +static int mca_bml_base_open(mca_base_open_flag_t flags) { int ret; @@ -105,9 +107,9 @@ static int mca_bml_base_open(mca_base_open_flag_t flags) opal_srand(&mca_bml_base_rand_buff,(uint32_t)(getpid() * tv.tv_usec)); /* initialize count */ - if(mca_bml_base_error_rate_ceiling > 0 + if(mca_bml_base_error_rate_ceiling > 0 && mca_bml_base_error_rate_floor <= mca_bml_base_error_rate_ceiling) { - mca_bml_base_error_count = (int) (((double) mca_bml_base_error_rate_ceiling * + mca_bml_base_error_count = (int) (((double) mca_bml_base_error_rate_ceiling * opal_rand(&mca_bml_base_rand_buff))/(UINT32_MAX+1.0)); } #endif diff --git a/ompi/mca/bml/base/bml_base_ft.c b/ompi/mca/bml/base/bml_base_ft.c deleted file mode 100644 index 76bba51b2f4..00000000000 --- a/ompi/mca/bml/base/bml_base_ft.c +++ /dev/null @@ -1,68 +0,0 @@ -/* - * Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2006 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * Copyright (c) 2008 Cisco Systems, Inc. All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#include "ompi_config.h" -#include "ompi/mca/bml/base/base.h" -#include "opal/mca/btl/base/base.h" -#include "ompi/mca/bml/base/bml_base_btl.h" - -int mca_bml_base_ft_event(int state) -{ - if(OPAL_CRS_CHECKPOINT == state) { - ; - } - else if(OPAL_CRS_CONTINUE == state) { - ; - } - else if(OPAL_CRS_RESTART == state) { - ; - } - else if(OPAL_CRS_TERM == state ) { - ; - } - else { - ; - } - - /* - * BML is expected to call ft_event in - * - BTL(s) - * - MPool(s) - * Currently you can't do this from outside a component - * So just return Unimplemented - */ - - if(OPAL_CRS_CHECKPOINT == state) { - ; - } - else if(OPAL_CRS_CONTINUE == state) { - ; - } - else if(OPAL_CRS_RESTART == state) { - ; - } - else if(OPAL_CRS_TERM == state ) { - ; - } - else { - ; - } - - return OMPI_ERR_NOT_IMPLEMENTED; -} diff --git a/ompi/mca/bml/base/bml_base_init.c b/ompi/mca/bml/base/bml_base_init.c index 66eb0c55efa..9a2efec8ccc 100644 --- a/ompi/mca/bml/base/bml_base_init.c +++ b/ompi/mca/bml/base/bml_base_init.c @@ -1,3 +1,4 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ /* * Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana * University Research and Technology @@ -5,21 +6,23 @@ * Copyright (c) 2004-2006 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2007 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2007 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2006 The Regents of the University of California. * All rights reserved. * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. + * Copyright (c) 2016 Los Alamos National Security, LLC. All rights + * reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ #include "ompi_config.h" -#include "ompi/mca/bml/base/base.h" +#include "ompi/mca/bml/base/base.h" #include "opal/mca/base/base.h" #include "ompi/mca/mca.h" @@ -27,13 +30,13 @@ mca_bml_base_module_t mca_bml = { NULL, /* bml_component */ - NULL, /* bml_add_procs */ + NULL, /* bml_add_procs */ NULL, /* bml_del_procs */ NULL, /* bml_add_btl */ NULL, /* bml_del_btl */ NULL, /* bml_del_proc_btl */ - NULL, /* bml_register */ - NULL, /* bml_register_error */ + NULL, /* bml_register */ + NULL, /* bml_register_error */ NULL, /* bml_finalize*/ NULL /* FT event */ }; @@ -47,47 +50,47 @@ mca_bml_base_inited(void) return init_called; } -int mca_bml_base_init( bool enable_progress_threads, +int mca_bml_base_init( bool enable_progress_threads, bool enable_mpi_threads) { - opal_list_item_t *item = NULL; - mca_bml_base_component_t *component = NULL, *best_component = NULL; - mca_bml_base_module_t *module = NULL, *best_module = NULL; - int priority = 0, best_priority = -1; - mca_base_component_list_item_t *cli = NULL; + mca_bml_base_component_t *component = NULL, *best_component = NULL; + mca_bml_base_module_t *module = NULL, *best_module = NULL; + int priority = 0, best_priority = -1; + mca_base_component_list_item_t *cli = NULL; + + if (init_called) { + return OPAL_SUCCESS; + } init_called = true; - for (item = opal_list_get_first(&ompi_bml_base_framework.framework_components); - opal_list_get_end(&ompi_bml_base_framework.framework_components) != item; - item = opal_list_get_next(item)) { - cli = (mca_base_component_list_item_t*) item; - component = (mca_bml_base_component_t*) cli->cli_component; + OPAL_LIST_FOREACH(cli, &ompi_bml_base_framework.framework_components, mca_base_component_list_item_t) { + component = (mca_bml_base_component_t*) cli->cli_component; if(NULL == component->bml_init) { - opal_output_verbose( 10, ompi_bml_base_framework.framework_output, - "select: no init function; ignoring component %s", - component->bml_version.mca_component_name ); - continue; + opal_output_verbose( 10, ompi_bml_base_framework.framework_output, + "select: no init function; ignoring component %s", + component->bml_version.mca_component_name ); + continue; } - module = component->bml_init(&priority, - enable_progress_threads, - enable_mpi_threads); + module = component->bml_init(&priority, + enable_progress_threads, + enable_mpi_threads); - if(NULL == module) { - continue; - } - if(priority > best_priority) { + if(NULL == module) { + continue; + } + if(priority > best_priority) { best_priority = priority; best_component = component; best_module = module; } - + } - if(NULL == best_module) { - return OMPI_SUCCESS; + if(NULL == best_module) { + return OMPI_SUCCESS; } - mca_bml_component = *best_component; - mca_bml = *best_module; + mca_bml_component = *best_component; + mca_bml = *best_module; return mca_base_framework_components_close(&ompi_bml_base_framework, (mca_base_component_t*) best_component); } diff --git a/ompi/mca/bml/bml.h b/ompi/mca/bml/bml.h index 59a2fae645c..0edc1ad1442 100644 --- a/ompi/mca/bml/bml.h +++ b/ompi/mca/bml/bml.h @@ -6,17 +6,17 @@ * Copyright (c) 2004-2009 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2006 The Regents of the University of California. * All rights reserved. * Copyright (c) 2008 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2012-2015 Los Alamos National Security, LLC. All rights - * reserved. + * reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ /** @@ -32,8 +32,6 @@ #include "ompi_config.h" #include "ompi/mca/mca.h" #include "opal/datatype/opal_convertor.h" -#include "opal/mca/crs/crs.h" -#include "opal/mca/crs/base/base.h" #include "opal/mca/btl/btl.h" #include "ompi/mca/bml/base/bml_base_btl.h" @@ -41,19 +39,19 @@ #include "ompi/constants.h" -#define OPAL_ENABLE_DEBUG_RELIABILITY 0 +#define OPAL_ENABLE_DEBUG_RELIABILITY 0 /* * BML types */ -struct ompi_proc_t; +struct ompi_proc_t; struct mca_bml_base_module_t; struct mca_bml_base_endpoint_t; struct mca_mpool_base_resources_t; /* - * Cached set of information for each btl + * Cached set of information for each btl */ struct mca_bml_base_btl_t { @@ -85,7 +83,7 @@ OMPI_DECLSPEC OBJ_CLASS_DECLARATION(mca_bml_base_btl_array_t); /** * If required, reallocate (grow) the array to the indicate size. - * + * * @param array (IN) * @param size (IN) */ @@ -96,7 +94,7 @@ static inline size_t mca_bml_base_btl_array_get_size(mca_bml_base_btl_array_t* a /** * Grow the array if required, and set the size. - * + * * @param array (IN) * @param size (IN) */ @@ -109,14 +107,14 @@ static inline void mca_bml_base_btl_array_set_size(mca_bml_base_btl_array_t* arr /** * Grow the array size by one and return the item at that index. - * + * * @param array (IN) */ static inline mca_bml_base_btl_t* mca_bml_base_btl_array_insert(mca_bml_base_btl_array_t* array) { #if OPAL_ENABLE_DEBUG if(array->arr_size >= array->arr_reserve) { - opal_output(0, "mca_bml_base_btl_array_insert: invalid array index %lu >= %lu", + opal_output(0, "mca_bml_base_btl_array_insert: invalid array index %lu >= %lu", (unsigned long)array->arr_size, (unsigned long)array->arr_reserve); return 0; } @@ -124,22 +122,22 @@ static inline mca_bml_base_btl_t* mca_bml_base_btl_array_insert(mca_bml_base_btl return &array->bml_btls[array->arr_size++]; } -/** - * Remove a btl from a bml_btl +/** + * Remove a btl from a bml_btl * * @param array (IN) * @param btl (IN) */ -static inline bool mca_bml_base_btl_array_remove( mca_bml_base_btl_array_t* array, +static inline bool mca_bml_base_btl_array_remove( mca_bml_base_btl_array_t* array, struct mca_btl_base_module_t* btl ) -{ +{ size_t i = 0; /* find the btl */ for( i = 0; i < array->arr_size; i++ ) { if( array->bml_btls[i].btl == btl ) { /* make sure not to go out of bounds */ for( ; i < array->arr_size-1; i++ ) { - /* move all btl's back by 1, so the found + /* move all btl's back by 1, so the found btl is "removed" */ array->bml_btls[i] = array->bml_btls[(i+1)]; } @@ -154,25 +152,22 @@ static inline bool mca_bml_base_btl_array_remove( mca_bml_base_btl_array_t* arra /** * Return an array item at the specified index. - * + * * @param array (IN) * @param item_index (IN) */ static inline mca_bml_base_btl_t* mca_bml_base_btl_array_get_index(mca_bml_base_btl_array_t* array, size_t item_index) { -#if OPAL_ENABLE_DEBUG - if(item_index >= array->arr_size) { - opal_output(0, "mca_bml_base_btl_array_get_index: invalid array index %lu >= %lu", - (unsigned long)item_index, (unsigned long)array->arr_size); - return 0; + if (item_index < array->arr_size) { + return &array->bml_btls[item_index]; } -#endif - return &array->bml_btls[item_index]; + + return NULL; } /** * Return the next LRU index in the array. - * + * * @param array (IN) * * @param index (OUT) @@ -200,14 +195,14 @@ static inline mca_bml_base_btl_t* mca_bml_base_btl_array_get_next(mca_bml_base_b /** * Locate an element in the array - * + * * @param array (IN) * @param index (IN) */ static inline mca_bml_base_btl_t* mca_bml_base_btl_array_find( mca_bml_base_btl_array_t* array, struct mca_btl_base_module_t* btl) { - size_t i=0; + size_t i=0; for(i=0; iarr_size; i++) { if(array->bml_btls[i].btl == btl) { return &array->bml_btls[i]; @@ -233,8 +228,8 @@ struct mca_bml_base_endpoint_t { uint32_t btl_flags_or; /**< the bitwise OR of the btl flags */ }; typedef struct mca_bml_base_endpoint_t mca_bml_base_endpoint_t; - - + + OMPI_DECLSPEC OBJ_CLASS_DECLARATION(mca_bml_base_endpoint_t); static inline void mca_bml_base_alloc( mca_bml_base_btl_t* bml_btl, @@ -247,10 +242,10 @@ static inline void mca_bml_base_alloc( mca_bml_base_btl_t* bml_btl, static inline void mca_bml_base_free( mca_bml_base_btl_t* bml_btl, mca_btl_base_descriptor_t* des ) -{ +{ mca_btl_base_module_t* btl = bml_btl->btl; - btl->btl_free( btl, des ); + btl->btl_free( btl, des ); /* The previous function is supposed to release the des object * so we should not touch it anymore. */ @@ -258,8 +253,8 @@ static inline void mca_bml_base_free( mca_bml_base_btl_t* bml_btl, #if OPAL_ENABLE_DEBUG_RELIABILITY -int mca_bml_base_send( mca_bml_base_btl_t* bml_btl, - mca_btl_base_descriptor_t* des, +int mca_bml_base_send( mca_bml_base_btl_t* bml_btl, + mca_btl_base_descriptor_t* des, mca_btl_base_tag_t tag ); @@ -303,7 +298,7 @@ static inline int mca_bml_base_sendi( mca_bml_base_btl_t* bml_btl, mca_btl_base_descriptor_t** descriptor ) { mca_btl_base_module_t* btl = bml_btl->btl; - return btl->btl_sendi(btl, bml_btl->btl_endpoint, + return btl->btl_sendi(btl, bml_btl->btl_endpoint, convertor, header, header_size, payload_size, order, flags, tag, descriptor); } @@ -331,10 +326,10 @@ static inline int mca_bml_base_get( mca_bml_base_btl_t* bml_btl, void *local_add } -static inline void mca_bml_base_prepare_src(mca_bml_base_btl_t* bml_btl, - struct opal_convertor_t* conv, +static inline void mca_bml_base_prepare_src(mca_bml_base_btl_t* bml_btl, + struct opal_convertor_t* conv, uint8_t order, - size_t reserve, + size_t reserve, size_t *size, uint32_t flags, mca_btl_base_descriptor_t** des) @@ -383,14 +378,14 @@ static inline void mca_bml_base_deregister_mem (mca_bml_base_btl_t* bml_btl, mca * indicates whether multiple threads may invoke this component * simultaneously or not. * - * @return Array of pointers to BML modules, or NULL if the transport + * @return Array of pointers to BML modules, or NULL if the transport * is not available. * * During component initialization, the BML component should discover * the physical devices that are available for the given transport, - * and create a BML module to represent each device. Any addressing - * information required by peers to reach the device should be published - * during this function via the mca_base_modex_send() interface. + * and create a BML module to represent each device. Any addressing + * information required by peers to reach the device should be published + * during this function via the mca_base_modex_send() interface. * */ @@ -419,39 +414,39 @@ typedef struct mca_bml_base_component_2_0_0_t mca_bml_base_component_t; */ /** - * MCA->BML Clean up any resources held by BML module + * MCA->BML Clean up any resources held by BML module * before the module is unloaded. - * + * * @param bml (IN) BML module. * - * Prior to unloading a BML module, the MCA framework will call - * the BML finalize method of the module. Any resources held by + * Prior to unloading a BML module, the MCA framework will call + * the BML finalize method of the module. Any resources held by * the BML should be released and if required the memory corresponding * to the BML module freed. - * + * */ typedef int (*mca_bml_base_module_finalize_fn_t)( void ); - + /** - * PML->BML notification of change in the process list. + * PML->BML notification of change in the process list. * * @param nprocs (IN) Number of processes * @param procs (IN) Set of processes * @param reachable (OUT) Bitmask indicating set of peer processes that are reachable by this BML. * @return OMPI_SUCCESS or error status on failure. * - * The mca_bml_base_module_add_procs_fn_t() is called by the PML to - * determine the set of BMLs that should be used to reach each process. + * The mca_bml_base_module_add_procs_fn_t() is called by the PML to + * determine the set of BTLs that should be used to reach each process. * Any addressing information exported by the peer via the mca_base_modex_send() - * function should be available during this call via the corresponding - * mca_base_modex_recv() function. The BML may utilize this information to - * determine reachability of each peer process. + * function should be available during this call via the corresponding + * mca_base_modex_recv() function. The BML may utilize this information to + * determine reachability of each peer process. * - * For each process that is reachable by the BML, the bit corresponding to the index + * For each process that is reachable by the BML, the bit corresponding to the index * into the proc array (nprocs) should be set in the reachable bitmask. The PML * provides the BML the option to return a pointer to a data structure defined * by the BML that is returned to the BML on subsequent calls to the BML data - * transfer functions (e.g bml_send). This may be used by the BML to cache any addressing + * transfer functions (e.g bml_send). This may be used by the BML to cache any addressing * or connection information (e.g. TCP socket, IP queue pair). * * \note This function will return OMPI_ERR_UNREACH if one or more @@ -461,10 +456,29 @@ typedef int (*mca_bml_base_module_finalize_fn_t)( void ); */ typedef int (*mca_bml_base_module_add_procs_fn_t)( size_t nprocs, - struct ompi_proc_t** procs, + struct ompi_proc_t** procs, struct opal_bitmap_t* reachable ); +/** + * PML->BML notification of change in the process list. + * + * @param proc (IN) Process + * @return OMPI_SUCCESS or error status on failure. + * + * The mca_bml_base_module_add_proc_fn_t() is called by the PML to + * determine the set of BTLs that should be used to reach each process. + * Any addressing information exported by the peer via the mca_base_modex_send() + * function should be available during this call via the corresponding + * mca_base_modex_recv() function. The BML may utilize this information to + * determine reachability of each peer process. + * + * \note This function will return OMPI_ERR_UNREACH if the process can not + * be reached by a currently active BTL. This is not a fatal error, and the + * calling layer is free to continue using the BML interface. + */ +typedef int (*mca_bml_base_module_add_proc_fn_t) (struct ompi_proc_t *proc); + /** * Notification of change to the process list. * @@ -512,7 +526,7 @@ typedef int (*mca_bml_base_module_del_btl_fn_t)( struct mca_btl_base_module_t* ) * On failure of a btl, remove it from the set of forwarding * entries used by the BML. */ -typedef int (*mca_bml_base_module_del_proc_btl_fn_t)( +typedef int (*mca_bml_base_module_del_proc_btl_fn_t)( struct ompi_proc_t*, struct mca_btl_base_module_t* ); @@ -559,6 +573,7 @@ struct mca_bml_base_module_t { mca_bml_base_component_t* bml_component; /**< pointer back to the BML component structure */ /* BML function table */ + mca_bml_base_module_add_proc_fn_t bml_add_proc; mca_bml_base_module_add_procs_fn_t bml_add_procs; mca_bml_base_module_del_procs_fn_t bml_del_procs; mca_bml_base_module_add_btl_fn_t bml_add_btl; diff --git a/ompi/mca/bml/r2/Makefile.am b/ompi/mca/bml/r2/Makefile.am index 1ed9b51909d..d91fc8fc678 100644 --- a/ompi/mca/bml/r2/Makefile.am +++ b/ompi/mca/bml/r2/Makefile.am @@ -3,24 +3,23 @@ # All rights reserved. # Copyright (c) 2004-2005 The Trustees of the University of Tennessee. # All rights reserved. -# Copyright (c) 2004-2009 High Performance Computing Center Stuttgart, +# Copyright (c) 2004-2009 High Performance Computing Center Stuttgart, # University of Stuttgart. All rights reserved. # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. # Copyright (c) 2010 Cisco Systems, Inc. All rights reserved. +# Copyright (c) 2015 Los Alamos National Security, LLC. All rights reserved. # $COPYRIGHT$ -# +# # Additional copyrights may follow -# +# # $HEADER$ # r2_sources = \ bml_r2.c \ bml_r2.h \ - bml_r2_component.c \ - bml_r2_ft.c \ - bml_r2_ft.h + bml_r2_component.c dist_ompidata_DATA = help-mca-bml-r2.txt diff --git a/ompi/mca/bml/r2/bml_r2.c b/ompi/mca/bml/r2/bml_r2.c index ca3d2acfaa9..69d4ff7f97c 100644 --- a/ompi/mca/bml/r2/bml_r2.c +++ b/ompi/mca/bml/r2/bml_r2.c @@ -3,26 +3,27 @@ * Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. - * Copyright (c) 2004-2014 The University of Tennessee and The University + * Copyright (c) 2004-2016 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2006 The Regents of the University of California. * All rights reserved. - * Copyright (c) 2007-2014 Los Alamos National Security, LLC. All rights - * reserved. - * Copyright (c) 2008-2014 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2007-2015 Los Alamos National Security, LLC. All rights + * reserved. + * Copyright (c) 2008-2016 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2013 Intel, Inc. All rights reserved * Copyright (c) 2014 NVIDIA Corporation. All rights reserved. * Copyright (c) 2014 Research Organization for Information Science * and Technology (RIST). All rights reserved. * Copyright (c) 2014-2015 Los Alamos National Security, LLC. All rights * reserved. + * Copyright (c) 2016 Intel, Inc. All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -39,11 +40,11 @@ #include "ompi/mca/bml/base/base.h" #include "opal/mca/btl/btl.h" #include "opal/mca/btl/base/base.h" -#include "ompi/mca/bml/base/bml_base_btl.h" +#include "ompi/mca/bml/base/bml_base_btl.h" #include "bml_r2.h" #include "ompi/proc/proc.h" -extern mca_bml_base_component_t mca_bml_r2_component; +extern mca_bml_base_component_t mca_bml_r2_component; /* Names of all the BTL components that this BML is aware of */ static char *btl_names = NULL; @@ -64,13 +65,13 @@ static int btl_exclusivity_compare(const void* arg1, const void* arg2) static int mca_bml_r2_add_btls( void ) { int i; - opal_list_t *btls = NULL; + opal_list_t *btls = NULL; mca_btl_base_selected_module_t* selected_btl; - size_t num_btls = 0; + size_t num_btls = 0; char **btl_names_argv = NULL; - + if(true == mca_bml_r2.btls_added) { - return OMPI_SUCCESS; + return OMPI_SUCCESS; } /* build an array of r2s and r2 modules */ @@ -79,11 +80,11 @@ static int mca_bml_r2_add_btls( void ) mca_bml_r2.num_btl_modules = 0; mca_bml_r2.num_btl_progress = 0; - + mca_bml_r2.btl_modules = (mca_btl_base_module_t **)malloc(sizeof(mca_btl_base_module_t*) * num_btls); mca_bml_r2.btl_progress = (mca_btl_base_component_progress_fn_t*)malloc(sizeof(mca_btl_base_component_progress_fn_t) * num_btls); - - if (NULL == mca_bml_r2.btl_modules || + + if (NULL == mca_bml_r2.btl_modules || NULL == mca_bml_r2.btl_progress) { return OMPI_ERR_OUT_OF_RESOURCE; } @@ -92,14 +93,14 @@ static int mca_bml_r2_add_btls( void ) mca_btl_base_module_t *btl = selected_btl->btl_module; mca_bml_r2.btl_modules[mca_bml_r2.num_btl_modules++] = btl; for (i = 0; NULL != btl_names_argv && NULL != btl_names_argv[i]; ++i) { - if (0 == + if (0 == strcmp(btl_names_argv[i], btl->btl_component->btl_version.mca_component_name)) { break; } } if (NULL == btl_names_argv || NULL == btl_names_argv[i]) { - opal_argv_append_nosize(&btl_names_argv, + opal_argv_append_nosize(&btl_names_argv, btl->btl_component->btl_version.mca_component_name); } } @@ -111,11 +112,11 @@ static int mca_bml_r2_add_btls( void ) } /* sort r2 list by exclusivity */ - qsort(mca_bml_r2.btl_modules, - mca_bml_r2.num_btl_modules, - sizeof(struct mca_btl_base_module_t*), + qsort(mca_bml_r2.btl_modules, + mca_bml_r2.num_btl_modules, + sizeof(struct mca_btl_base_module_t*), btl_exclusivity_compare); - mca_bml_r2.btls_added = true; + mca_bml_r2.btls_added = true; return OMPI_SUCCESS; } @@ -144,36 +145,338 @@ static void mca_bml_r2_calculate_bandwidth_latency (mca_bml_base_btl_array_t *bt } } +static mca_bml_base_endpoint_t *mca_bml_r2_allocate_endpoint (ompi_proc_t *proc) { + mca_bml_base_endpoint_t *bml_endpoint; + + /* allocate bml specific proc data */ + bml_endpoint = OBJ_NEW(mca_bml_base_endpoint_t); + if (NULL == bml_endpoint) { + opal_output(0, "%s: unable to allocate resources", __func__); + return NULL; + } + + /* preallocate space in array for max number of r2s */ + mca_bml_base_btl_array_reserve(&bml_endpoint->btl_eager, mca_bml_r2.num_btl_modules); + mca_bml_base_btl_array_reserve(&bml_endpoint->btl_send, mca_bml_r2.num_btl_modules); + mca_bml_base_btl_array_reserve(&bml_endpoint->btl_rdma, mca_bml_r2.num_btl_modules); + bml_endpoint->btl_max_send_size = -1; + bml_endpoint->btl_proc = proc; + + bml_endpoint->btl_flags_or = 0; + return bml_endpoint; +} + +static void mca_bml_r2_register_progress (mca_btl_base_module_t *btl, bool hp) +{ + if (NULL != btl->btl_component->btl_progress) { + bool found = false; + size_t p; + + for (p = 0 ; p < mca_bml_r2.num_btl_progress ; ++p) { + if(mca_bml_r2.btl_progress[p] == btl->btl_component->btl_progress) { + found = true; + break; + } + } + + if (found == false || hp) { + if (found == false) { + mca_bml_r2.btl_progress[mca_bml_r2.num_btl_progress++] = + btl->btl_component->btl_progress; + } + + if (hp) { + opal_progress_register (btl->btl_component->btl_progress); + } else { + opal_progress_register_lp (btl->btl_component->btl_progress); + } + } + } +} + +static int mca_bml_r2_endpoint_add_btl (struct ompi_proc_t *proc, mca_bml_base_endpoint_t *bml_endpoint, + mca_btl_base_module_t *btl, struct mca_btl_base_endpoint_t *btl_endpoint) +{ + mca_bml_base_btl_t* bml_btl = NULL; + int btl_flags = btl->btl_flags; + bool btl_in_use = false; + size_t size; + + /* NTH: these flags should have been sanitized by the btl. Once that is verified these + * checks can be safely removed. */ + if ((btl_flags & MCA_BTL_FLAGS_PUT) && (NULL == btl->btl_put)) { + opal_output(0, "%s: The PUT flag is specified for" + " the %s BTL without any PUT function attached. Discard the flag !", + __func__, + btl->btl_component->btl_version.mca_component_name); + btl_flags ^= MCA_BTL_FLAGS_PUT; + } + if ((btl_flags & MCA_BTL_FLAGS_GET) && (NULL == btl->btl_get)) { + opal_output(0, "%s: The GET flag is specified for" + " the %s BTL without any GET function attached. Discard the flag !", + __func__, btl->btl_component->btl_version.mca_component_name); + btl_flags ^= MCA_BTL_FLAGS_GET; + } + + if ((btl_flags & (MCA_BTL_FLAGS_PUT | MCA_BTL_FLAGS_GET | MCA_BTL_FLAGS_SEND)) == 0) { + /* If no protocol specified, we have 2 choices: we ignore the BTL + * as we don't know which protocl to use, or we suppose that all + * BTLs support the send protocol. This is really a btl error as + * these flags should have been sanitized by the btl. */ + btl_flags |= MCA_BTL_FLAGS_SEND; + } + + if (btl_flags & MCA_BTL_FLAGS_SEND) { + /* dont allow an additional BTL with a lower exclusivity ranking */ + size = mca_bml_base_btl_array_get_size (&bml_endpoint->btl_send); + bml_btl = mca_bml_base_btl_array_get_index (&bml_endpoint->btl_send, size - 1); + + if (!bml_btl || bml_btl->btl->btl_exclusivity <= btl->btl_exclusivity) { + /* this btl has higher exclusivity than an existing btl or none exists */ + + opal_output_verbose(1, opal_btl_base_framework.framework_output, + "mca: bml: Using %s btl for send to %s on node %s", + btl->btl_component->btl_version.mca_component_name, + OMPI_NAME_PRINT(&proc->super.proc_name), + proc->super.proc_hostname); + + /* cache the endpoint on the proc */ + if (NULL == bml_btl || (bml_btl->btl->btl_exclusivity <= btl->btl_exclusivity)) { + bml_btl = mca_bml_base_btl_array_insert (&bml_endpoint->btl_send); + bml_btl->btl = btl; + bml_btl->btl_endpoint = btl_endpoint; + bml_btl->btl_weight = 0; + bml_btl->btl_flags = btl_flags; + + /** + * calculate the bitwise OR of the btl flags + */ + bml_endpoint->btl_flags_or |= bml_btl->btl_flags; + } else { + opal_output_verbose(20, opal_btl_base_framework.framework_output, + "mca: bml: Not using %s btl for send to %s on node %s " + "because %s btl has higher exclusivity (%d > %d)", + btl->btl_component->btl_version.mca_component_name, + OMPI_NAME_PRINT(&proc->super.proc_name), proc->super.proc_hostname, + bml_btl->btl->btl_component->btl_version.mca_component_name, + bml_btl->btl->btl_exclusivity, + btl->btl_exclusivity); + } + + btl_in_use = true; + } + } + + /* always add rdma endpoints if they support full rdma */ + if (((btl_in_use && (btl_flags & MCA_BTL_FLAGS_RDMA)) || + (btl_flags & (MCA_BTL_FLAGS_RDMA | MCA_BTL_FLAGS_ATOMIC_FOPS)) == (MCA_BTL_FLAGS_RDMA | MCA_BTL_FLAGS_ATOMIC_FOPS)) && + !((proc->super.proc_arch != ompi_proc_local_proc->super.proc_arch) && + (0 == (btl->btl_flags & MCA_BTL_FLAGS_HETEROGENEOUS_RDMA)))) { + mca_bml_base_btl_t *bml_btl_rdma = mca_bml_base_btl_array_insert(&bml_endpoint->btl_rdma); + + bml_btl_rdma->btl = btl; + bml_btl_rdma->btl_endpoint = btl_endpoint; + bml_btl_rdma->btl_weight = 0; + bml_btl_rdma->btl_flags = btl_flags; + + if (bml_endpoint->btl_pipeline_send_length < btl->btl_rdma_pipeline_send_length) { + bml_endpoint->btl_pipeline_send_length = btl->btl_rdma_pipeline_send_length; + } + + if (bml_endpoint->btl_send_limit < btl->btl_min_rdma_pipeline_size) { + bml_endpoint->btl_send_limit = btl->btl_min_rdma_pipeline_size; + } + + btl_in_use = true; + } + + return btl_in_use ? OMPI_SUCCESS : OMPI_ERR_NOT_AVAILABLE; +} + +static void mca_bml_r2_compute_endpoint_metrics (mca_bml_base_endpoint_t *bml_endpoint) +{ + double total_bandwidth = 0; + uint32_t latency; + size_t n_send, n_rdma; + + /* (1) determine the total bandwidth available across all btls + * note that we need to do this here, as we may already have btls configured + * (2) determine the highest priority ranking for latency + * (3) compute the maximum amount of bytes that can be send without any + * weighting. Once the left over is smaller than this number we will + * start using the weight to compute the correct amount. + */ + n_send = mca_bml_base_btl_array_get_size (&bml_endpoint->btl_send); + n_rdma = mca_bml_base_btl_array_get_size (&bml_endpoint->btl_rdma); + + /* sort BTLs in descending order according to bandwidth value */ + qsort (bml_endpoint->btl_send.bml_btls, n_send, + sizeof(mca_bml_base_btl_t), btl_bandwidth_compare); + + bml_endpoint->btl_rdma_index = 0; + + mca_bml_r2_calculate_bandwidth_latency (&bml_endpoint->btl_send, &total_bandwidth, &latency); + + /* (1) set the weight of each btl as a percentage of overall bandwidth + * (2) copy all btl instances at the highest priority ranking into the + * list of btls used for first fragments + */ + for (size_t n_index = 0 ; n_index < n_send ; ++n_index) { + mca_bml_base_btl_t *bml_btl = + mca_bml_base_btl_array_get_index(&bml_endpoint->btl_send, n_index); + mca_btl_base_module_t *btl = bml_btl->btl; + + /* compute weighting factor for this r2 */ + if(btl->btl_bandwidth > 0) { + bml_btl->btl_weight = (float)(btl->btl_bandwidth / total_bandwidth); + } else { + bml_btl->btl_weight = (float)(1.0 / n_send); + } + + /* check to see if this r2 is already in the array of r2s + * used for first fragments - if not add it. + */ + if(btl->btl_latency == latency) { + mca_bml_base_btl_t* bml_btl_new = + mca_bml_base_btl_array_insert(&bml_endpoint->btl_eager); + *bml_btl_new = *bml_btl; + } + + /* set endpoint max send size as min of available btls */ + if (bml_endpoint->btl_max_send_size > btl->btl_max_send_size) + bml_endpoint->btl_max_send_size = btl->btl_max_send_size; + } + + /* sort BTLs in descending order according to bandwidth value */ + qsort(bml_endpoint->btl_rdma.bml_btls, n_rdma, + sizeof(mca_bml_base_btl_t), btl_bandwidth_compare); + + mca_bml_r2_calculate_bandwidth_latency (&bml_endpoint->btl_rdma, &total_bandwidth, &latency); + + /* set rdma btl weights */ + for (size_t n_index = 0 ; n_index < n_rdma ; ++n_index) { + mca_bml_base_btl_t *bml_btl = + mca_bml_base_btl_array_get_index(&bml_endpoint->btl_rdma, n_index); + + /* compute weighting factor for this r2 */ + if (bml_btl->btl->btl_bandwidth > 0.0) { + bml_btl->btl_weight = (float)(bml_btl->btl->btl_bandwidth / total_bandwidth); + } else { + bml_btl->btl_weight = (float)(1.0 / n_rdma); + } + } +} + +static int mca_bml_r2_add_proc (struct ompi_proc_t *proc) +{ + mca_bml_base_endpoint_t *bml_endpoint; + /* at least one btl is in use */ + bool btl_in_use = false; + int rc; + + if (OPAL_UNLIKELY(NULL == proc)) { + return OMPI_ERR_BAD_PARAM; + } + + /* check if this endpoint is already set up */ + if (NULL != proc->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_BML]) { + OBJ_RETAIN(proc); + return OMPI_SUCCESS; + } + + /* add btls if not already done */ + if (OMPI_SUCCESS != (rc = mca_bml_r2_add_btls())) { + return rc; + } + + bml_endpoint = mca_bml_r2_allocate_endpoint (proc); + if (OPAL_UNLIKELY(NULL == bml_endpoint)) { + return OMPI_ERR_OUT_OF_RESOURCE; + } + + for (size_t p_index = 0 ; p_index < mca_bml_r2.num_btl_modules ; ++p_index) { + mca_btl_base_module_t *btl = mca_bml_r2.btl_modules[p_index]; + struct mca_btl_base_endpoint_t *btl_endpoint = NULL; + + /* if the r2 can reach the destination proc it sets the + * corresponding bit (proc index) in the reachable bitmap + * and can return addressing information for each proc + * that is passed back to the r2 on data transfer calls + */ + rc = btl->btl_add_procs (btl, 1, (opal_proc_t **) &proc, &btl_endpoint, NULL); + if (OMPI_SUCCESS != rc || NULL == btl_endpoint) { + /* This BTL has troubles adding the nodes. Let's continue maybe some other BTL + * can take care of this task. */ + continue; + } + + rc = mca_bml_r2_endpoint_add_btl (proc, bml_endpoint, btl, btl_endpoint); + if (OMPI_SUCCESS != rc) { + btl->btl_del_procs (btl, 1, (opal_proc_t **) &proc, &btl_endpoint); + } else { + mca_bml_r2_register_progress (btl, true); + btl_in_use = true; + } + } + + if (!btl_in_use) { + proc->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_BML] = NULL; + OBJ_RELEASE(bml_endpoint); + /* no btl is available for this proc */ + if (mca_bml_r2.show_unreach_errors) { + opal_show_help ("help-mca-bml-r2.txt", "unreachable proc", true, + OMPI_NAME_PRINT(&(ompi_proc_local_proc->super.proc_name)), + (NULL != ompi_proc_local_proc->super.proc_hostname ? + ompi_proc_local_proc->super.proc_hostname : "unknown!"), + OMPI_NAME_PRINT(&(proc->super.proc_name)), + (NULL != proc->super.proc_hostname ? + proc->super.proc_hostname : "unknown!"), + btl_names); + } + + return OMPI_ERR_UNREACH; + } + + /* compute metrics for registered btls */ + mca_bml_r2_compute_endpoint_metrics (bml_endpoint); + + /* do it last, for the lazy initialization check in bml_base_get* */ + opal_atomic_wmb(); + proc->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_BML] = bml_endpoint; + + return OMPI_SUCCESS; +} + /* * For each proc setup a datastructure that indicates the BTLs * that can be used to reach the destination. * */ -static int mca_bml_r2_add_procs( size_t nprocs, - struct ompi_proc_t** procs, +static int mca_bml_r2_add_procs( size_t nprocs, + struct ompi_proc_t** procs, struct opal_bitmap_t* reachable ) { - size_t p, p_index, n_new_procs = 0; - struct mca_btl_base_endpoint_t ** btl_endpoints = NULL; - struct ompi_proc_t** new_procs = NULL; + size_t n_new_procs = 0; + struct mca_btl_base_endpoint_t ** btl_endpoints = NULL; + struct ompi_proc_t** new_procs = NULL; int rc, ret = OMPI_SUCCESS; if(0 == nprocs) { return OMPI_SUCCESS; } - + if(OMPI_SUCCESS != (rc = mca_bml_r2_add_btls()) ) { return rc; } - + /* Select only the procs that don't yet have the BML proc struct. This prevent * us from calling btl->add_procs several times on the same destination proc. */ - for(p_index = 0; p_index < nprocs; p_index++) { - struct ompi_proc_t* proc = procs[p_index]; + for (size_t p_index = 0 ; p_index < nprocs ; ++p_index) { + struct ompi_proc_t* proc = procs[p_index]; - if(NULL != proc->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_BML]) { + if(NULL != proc->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_BML]) { continue; /* go to the next proc */ } /* Allocate the new_procs on demand */ @@ -184,7 +487,7 @@ static int mca_bml_r2_add_procs( size_t nprocs, } } OBJ_RETAIN(proc); - new_procs[n_new_procs++] = proc; + new_procs[n_new_procs++] = proc; } if ( 0 == n_new_procs ) { @@ -192,21 +495,20 @@ static int mca_bml_r2_add_procs( size_t nprocs, } /* Starting from here we only work on the unregistered procs */ - procs = new_procs; - nprocs = n_new_procs; - + procs = new_procs; + nprocs = n_new_procs; + /* attempt to add all procs to each r2 */ - btl_endpoints = (struct mca_btl_base_endpoint_t **) - malloc(nprocs * sizeof(struct mca_btl_base_endpoint_t*)); + btl_endpoints = (struct mca_btl_base_endpoint_t **) + malloc(nprocs * sizeof(struct mca_btl_base_endpoint_t*)); if (NULL == btl_endpoints) { free(new_procs); return OMPI_ERR_OUT_OF_RESOURCE; } - for(p_index = 0; p_index < mca_bml_r2.num_btl_modules; p_index++) { - mca_btl_base_module_t* btl = mca_bml_r2.btl_modules[p_index]; + for (size_t p_index = 0 ; p_index < mca_bml_r2.num_btl_modules ; ++p_index) { + mca_btl_base_module_t *btl = mca_bml_r2.btl_modules[p_index]; int btl_inuse = 0; - int btl_flags; /* if the r2 can reach the destination proc it sets the * corresponding bit (proc index) in the reachable bitmap @@ -214,243 +516,69 @@ static int mca_bml_r2_add_procs( size_t nprocs, * that is passed back to the r2 on data transfer calls */ opal_bitmap_clear_all_bits(reachable); - memset(btl_endpoints, 0, nprocs *sizeof(struct mca_btl_base_endpoint_t*)); + memset(btl_endpoints, 0, nprocs *sizeof(struct mca_btl_base_endpoint_t*)); rc = btl->btl_add_procs(btl, n_new_procs, (opal_proc_t**)new_procs, btl_endpoints, reachable); - if(OMPI_SUCCESS != rc) { - /* This BTL has troubles adding the nodes. Let's continue maybe some other BTL - * can take care of this task. - */ + if (OMPI_SUCCESS != rc) { + /* This BTL encountered an error while adding procs. Continue in case some other + * BTL(s) can be used. */ continue; } /* for each proc that is reachable */ - for( p = 0; p < n_new_procs; p++ ) { - if(opal_bitmap_is_set_bit(reachable, p)) { - ompi_proc_t *proc = new_procs[p]; - mca_bml_base_endpoint_t * bml_endpoint = - (mca_bml_base_endpoint_t*) proc->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_BML]; - mca_bml_base_btl_t* bml_btl = NULL; - size_t size; - - if(NULL == bml_endpoint) { - /* allocate bml specific proc data */ - bml_endpoint = OBJ_NEW(mca_bml_base_endpoint_t); - if (NULL == bml_endpoint) { - opal_output(0, "mca_bml_r2_add_procs: unable to allocate resources"); - free(btl_endpoints); - free(new_procs); - return OMPI_ERR_OUT_OF_RESOURCE; - } - - /* preallocate space in array for max number of r2s */ - mca_bml_base_btl_array_reserve(&bml_endpoint->btl_eager, mca_bml_r2.num_btl_modules); - mca_bml_base_btl_array_reserve(&bml_endpoint->btl_send, mca_bml_r2.num_btl_modules); - mca_bml_base_btl_array_reserve(&bml_endpoint->btl_rdma, mca_bml_r2.num_btl_modules); - bml_endpoint->btl_max_send_size = -1; - bml_endpoint->btl_proc = proc; - proc->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_BML] = bml_endpoint; - - bml_endpoint->btl_flags_or = 0; - } - - btl_flags = btl->btl_flags; - if( (btl_flags & MCA_BTL_FLAGS_PUT) && (NULL == btl->btl_put) ) { - opal_output(0, "mca_bml_r2_add_procs: The PUT flag is specified for" - " the %s BTL without any PUT function attached. Discard the flag !", - btl->btl_component->btl_version.mca_component_name); - btl_flags ^= MCA_BTL_FLAGS_PUT; - } - if( (btl_flags & MCA_BTL_FLAGS_GET) && (NULL == btl->btl_get) ) { - opal_output(0, "mca_bml_r2_add_procs: The GET flag is specified for" - " the %s BTL without any GET function attached. Discard the flag !", - btl->btl_component->btl_version.mca_component_name); - btl_flags ^= MCA_BTL_FLAGS_GET; - } - - if( (btl_flags & (MCA_BTL_FLAGS_PUT | MCA_BTL_FLAGS_GET | MCA_BTL_FLAGS_SEND)) == 0 ) { - /** - * If no protocol specified, we have 2 choices: we ignore the BTL - * as we don't know which protocl to use, or we suppose that all - * BTLs support the send protocol. - */ - btl_flags |= MCA_BTL_FLAGS_SEND; - } - - /* dont allow an additional BTL with a lower exclusivity ranking */ - size = mca_bml_base_btl_array_get_size(&bml_endpoint->btl_send); - if(size > 0) { - bml_btl = mca_bml_base_btl_array_get_index(&bml_endpoint->btl_send, size-1); - /* skip this btl if the exclusivity is less than the previous only if the btl does not provide full rdma (for one-sided) */ - if(bml_btl->btl->btl_exclusivity > btl->btl_exclusivity && ((btl_flags & MCA_BTL_FLAGS_RDMA) != MCA_BTL_FLAGS_RDMA)) { - btl->btl_del_procs(btl, 1, (opal_proc_t**)&proc, &btl_endpoints[p]); - opal_output_verbose(20, opal_btl_base_framework.framework_output, - "mca: bml: Not using %s btl to %s on node %s " - "because %s btl has higher exclusivity (%d > %d)", - btl->btl_component->btl_version.mca_component_name, - OMPI_NAME_PRINT(&proc->super.proc_name), proc->super.proc_hostname, - bml_btl->btl->btl_component->btl_version.mca_component_name, - bml_btl->btl->btl_exclusivity, - btl->btl_exclusivity); - continue; - } - } - opal_output_verbose(1, opal_btl_base_framework.framework_output, - "mca: bml: Using %s btl to %s on node %s", - btl->btl_component->btl_version.mca_component_name, - OMPI_NAME_PRINT(&proc->super.proc_name), - proc->super.proc_hostname); - - /* cache the endpoint on the proc */ - if (NULL == bml_btl || (bml_btl->btl->btl_exclusivity <= btl->btl_exclusivity)) { - bml_btl = mca_bml_base_btl_array_insert(&bml_endpoint->btl_send); - bml_btl->btl = btl; - bml_btl->btl_endpoint = btl_endpoints[p]; - bml_btl->btl_weight = 0; - bml_btl->btl_flags = btl_flags; - - /** - * calculate the bitwise OR of the btl flags - */ - bml_endpoint->btl_flags_or |= bml_btl->btl_flags; - } - - /* always add rdma endpoints */ - if ((btl_flags & MCA_BTL_FLAGS_RDMA) && - !((proc->super.proc_arch != ompi_proc_local_proc->super.proc_arch) && - (0 == (btl->btl_flags & MCA_BTL_FLAGS_HETEROGENEOUS_RDMA)))) { - mca_bml_base_btl_t *bml_btl_rdma = mca_bml_base_btl_array_insert(&bml_endpoint->btl_rdma); - - bml_btl_rdma->btl = btl; - bml_btl_rdma->btl_endpoint = btl_endpoints[p]; - bml_btl_rdma->btl_weight = 0; - bml_btl_rdma->btl_flags = btl_flags; - - if (bml_endpoint->btl_pipeline_send_length < btl->btl_rdma_pipeline_send_length) { - bml_endpoint->btl_pipeline_send_length = btl->btl_rdma_pipeline_send_length; - } - - if (bml_endpoint->btl_send_limit < btl->btl_min_rdma_pipeline_size) { - bml_endpoint->btl_send_limit = btl->btl_min_rdma_pipeline_size; - } - } - - /* This BTL is in use, allow the progress registration */ - btl_inuse++; + for (size_t p = 0 ; p < n_new_procs ; ++p) { + if (!opal_bitmap_is_set_bit(reachable, p)) { + continue; } - } - if(btl_inuse > 0 && NULL != btl->btl_component->btl_progress) { - size_t p; - bool found = false; - for( p = 0; p < mca_bml_r2.num_btl_progress; p++ ) { - if(mca_bml_r2.btl_progress[p] == btl->btl_component->btl_progress) { - found = true; - break; + ompi_proc_t *proc = new_procs[p]; + mca_bml_base_endpoint_t *bml_endpoint = + (mca_bml_base_endpoint_t *) proc->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_BML]; + + if (NULL == bml_endpoint) { + bml_endpoint = mca_bml_r2_allocate_endpoint (proc); + proc->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_BML] = bml_endpoint; + if (NULL == bml_endpoint) { + free(btl_endpoints); + free(new_procs); + return OPAL_ERR_OUT_OF_RESOURCE; } } - if(found == false) { - mca_bml_r2.btl_progress[mca_bml_r2.num_btl_progress] = - btl->btl_component->btl_progress; - mca_bml_r2.num_btl_progress++; - opal_progress_register( btl->btl_component->btl_progress ); - } - } - } - free(btl_endpoints); - /* iterate back through procs and compute metrics for registered r2s */ - for(p=0; pproc_endpoints[OMPI_PROC_ENDPOINT_TAG_BML]; - double total_bandwidth = 0; - uint32_t latency; - size_t n_send, n_rdma; - - /* skip over procs w/ no btl's registered */ - if(NULL == bml_endpoint) { - continue; - } - - /* (1) determine the total bandwidth available across all btls - * note that we need to do this here, as we may already have btls configured - * (2) determine the highest priority ranking for latency - * (3) compute the maximum amount of bytes that can be send without any - * weighting. Once the left over is smaller than this number we will - * start using the weight to compute the correct amount. - */ - n_send = mca_bml_base_btl_array_get_size(&bml_endpoint->btl_send); - n_rdma = mca_bml_base_btl_array_get_size(&bml_endpoint->btl_rdma); - - /* sort BTLs in descending order according to bandwidth value */ - qsort(bml_endpoint->btl_send.bml_btls, n_send, - sizeof(mca_bml_base_btl_t), btl_bandwidth_compare); - - bml_endpoint->btl_rdma_index = 0; - - mca_bml_r2_calculate_bandwidth_latency (&bml_endpoint->btl_send, &total_bandwidth, &latency); - - /* (1) set the weight of each btl as a percentage of overall bandwidth - * (2) copy all btl instances at the highest priority ranking into the - * list of btls used for first fragments - */ - for (size_t n_index = 0 ; n_index < n_send ; ++n_index) { - mca_bml_base_btl_t* bml_btl = - mca_bml_base_btl_array_get_index(&bml_endpoint->btl_send, n_index); - mca_btl_base_module_t *btl = bml_btl->btl; - - /* compute weighting factor for this r2 */ - if(btl->btl_bandwidth > 0) { - bml_btl->btl_weight = (float)(btl->btl_bandwidth / total_bandwidth); - } else { - bml_btl->btl_weight = (float)(1.0 / n_send); - } - - /* check to see if this r2 is already in the array of r2s - * used for first fragments - if not add it. - */ - if(btl->btl_latency == latency) { - mca_bml_base_btl_t* bml_btl_new = - mca_bml_base_btl_array_insert(&bml_endpoint->btl_eager); - *bml_btl_new = *bml_btl; + rc = mca_bml_r2_endpoint_add_btl (proc, bml_endpoint, btl, btl_endpoints[p]); + if (OMPI_SUCCESS != rc) { + btl->btl_del_procs(btl, 1, (opal_proc_t**)&proc, &btl_endpoints[p]); + continue; } - /* set endpoint max send size as min of available btls */ - if(bml_endpoint->btl_max_send_size > btl->btl_max_send_size) - bml_endpoint->btl_max_send_size = btl->btl_max_send_size; + /* This BTL is in use, allow the progress registration */ + btl_inuse++; } - /* sort BTLs in descending order according to bandwidth value */ - qsort(bml_endpoint->btl_rdma.bml_btls, n_rdma, - sizeof(mca_bml_base_btl_t), btl_bandwidth_compare); + mca_bml_r2_register_progress (btl, !!(btl_inuse)); + } - mca_bml_r2_calculate_bandwidth_latency (&bml_endpoint->btl_rdma, &total_bandwidth, &latency); + free(btl_endpoints); - /* set rdma btl weights */ - for (size_t n_index = 0 ; n_index < n_rdma ; ++n_index) { - mca_bml_base_btl_t *bml_btl = - mca_bml_base_btl_array_get_index(&bml_endpoint->btl_rdma, n_index); + /* iterate back through procs and compute metrics for registered r2s */ + for (size_t p = 0; p < n_new_procs ; ++p) { + mca_bml_base_endpoint_t *bml_endpoint = + (mca_bml_base_endpoint_t *) new_procs[p]->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_BML]; - /* compute weighting factor for this r2 */ - if (bml_btl->btl->btl_bandwidth > 0.0) { - bml_btl->btl_weight = (float)(bml_btl->btl->btl_bandwidth / total_bandwidth); - } else { - bml_btl->btl_weight = (float)(1.0 / n_rdma); - } + /* skip over procs w/ no btl's registered */ + if (NULL != bml_endpoint) { + mca_bml_r2_compute_endpoint_metrics (bml_endpoint); } } /* see if we have a connection to everyone else */ - for(p = 0; p < n_new_procs; p++) { + for(size_t p = 0; p < n_new_procs ; ++p) { ompi_proc_t *proc = new_procs[p]; if (NULL == proc->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_BML]) { ret = OMPI_ERR_UNREACH; if (mca_bml_r2.show_unreach_errors) { - opal_show_help("help-mca-bml-r2.txt", - "unreachable proc", - true, + opal_show_help("help-mca-bml-r2.txt", "unreachable proc", true, OMPI_NAME_PRINT(&(ompi_proc_local_proc->super.proc_name)), (NULL != ompi_proc_local_proc->super.proc_hostname ? ompi_proc_local_proc->super.proc_hostname : "unknown!"), @@ -459,6 +587,7 @@ static int mca_bml_r2_add_procs( size_t nprocs, proc->super.proc_hostname : "unknown!"), btl_names); } + break; } } @@ -473,39 +602,39 @@ static int mca_bml_r2_add_procs( size_t nprocs, * with the proc that it is/has gone away */ -static int mca_bml_r2_del_procs(size_t nprocs, - struct ompi_proc_t** procs) +static int mca_bml_r2_del_procs(size_t nprocs, + struct ompi_proc_t** procs) { - size_t p; int rc; - struct ompi_proc_t** del_procs = (struct ompi_proc_t**) - malloc(nprocs * sizeof(struct ompi_proc_t*)); - size_t n_del_procs = 0; + struct ompi_proc_t** del_procs = (struct ompi_proc_t**) + malloc(nprocs * sizeof(struct ompi_proc_t*)); + size_t n_del_procs = 0; if (NULL == del_procs) { return OMPI_ERR_OUT_OF_RESOURCE; } - for(p = 0; p < nprocs; p++) { - ompi_proc_t *proc = procs[p]; + for (size_t p = 0 ; p < nprocs ; ++p) { + ompi_proc_t *proc = procs[p]; /* We much check that there are 2 references to the proc (not 1). The * first reference belongs to ompi/proc the second belongs to the bml * since we retained it. We will release that reference at the end of * the loop below. */ - if(((opal_object_t*)proc)->obj_reference_count == 2) { - del_procs[n_del_procs++] = proc; + if (((opal_object_t*)proc)->obj_reference_count == 2 && + NULL != proc->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_BML]) { + del_procs[n_del_procs++] = proc; } } - for(p = 0; p < n_del_procs; p++) { + for (size_t p = 0 ; p < n_del_procs ; ++p) { ompi_proc_t *proc = del_procs[p]; mca_bml_base_endpoint_t* bml_endpoint = (mca_bml_base_endpoint_t*) proc->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_BML]; - size_t f_index, f_size; + size_t f_size; /* notify each btl that the proc is going away */ f_size = mca_bml_base_btl_array_get_size(&bml_endpoint->btl_send); - for(f_index = 0; f_index < f_size; f_index++) { + for (size_t f_index = 0 ; f_index < f_size ; ++f_index) { mca_bml_base_btl_t* bml_btl = mca_bml_base_btl_array_get_index(&bml_endpoint->btl_send, f_index); mca_btl_base_module_t* btl = bml_btl->btl; @@ -521,10 +650,12 @@ static int mca_bml_r2_del_procs(size_t nprocs, */ } + proc->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_BML] = NULL; + OBJ_RELEASE(proc); + /* do any required cleanup */ OBJ_RELEASE(bml_endpoint); - proc->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_BML] = NULL; } free(del_procs); @@ -542,7 +673,7 @@ static inline int bml_r2_remove_btl_progress(mca_btl_base_module_t* btl) if(btl->btl_component->btl_progress != mca_bml_r2.btl_progress[p]) continue; opal_progress_unregister( btl->btl_component->btl_progress ); - if( p < (mca_bml_r2.num_btl_progress-1) ) { + if( p < (mca_bml_r2.num_btl_progress-1) ) { mca_bml_r2.btl_progress[p] = mca_bml_r2.btl_progress[mca_bml_r2.num_btl_progress-1]; } mca_bml_r2.num_btl_progress--; @@ -564,11 +695,11 @@ static int mca_bml_r2_del_proc_btl(ompi_proc_t* proc, mca_btl_base_module_t* btl /* remove btl from eager list */ mca_bml_base_btl_array_remove(&ep->btl_eager, btl); - - /* remove btl from send list */ - if(mca_bml_base_btl_array_remove(&ep->btl_send, btl)) { - - /* compute total_bandwidth and + + /* remove btl from send list */ + if(mca_bml_base_btl_array_remove(&ep->btl_send, btl)) { + + /* compute total_bandwidth and reset max_send_size to the min of all btl's */ total_bandwidth = 0; ep->btl_max_send_size = -1; @@ -581,7 +712,7 @@ static int mca_bml_r2_del_proc_btl(ompi_proc_t* proc, mca_btl_base_module_t* btl ep->btl_max_send_size = ep_btl->btl_max_send_size; } } - + /* compute weighting factor for this btl */ for(b=0; b< mca_bml_base_btl_array_get_size(&ep->btl_send); b++) { bml_btl = mca_bml_base_btl_array_get_index(&ep->btl_send, b); @@ -596,8 +727,8 @@ static int mca_bml_r2_del_proc_btl(ompi_proc_t* proc, mca_btl_base_module_t* btl } /* remove btl from RDMA list */ - if(mca_bml_base_btl_array_remove(&ep->btl_rdma, btl)) { - + if(mca_bml_base_btl_array_remove(&ep->btl_rdma, btl)) { + /* compute total bandwidth */ total_bandwidth = 0; ep->btl_pipeline_send_length = 0; @@ -615,7 +746,7 @@ static int mca_bml_r2_del_proc_btl(ompi_proc_t* proc, mca_btl_base_module_t* btl ep->btl_send_limit = ep_btl->btl_min_rdma_pipeline_size; } } - + /* compute weighting factor for this btl */ for(b=0; b< mca_bml_base_btl_array_get_size(&ep->btl_rdma); b++) { bml_btl = mca_bml_base_btl_array_get_index(&ep->btl_rdma, b); @@ -628,12 +759,12 @@ static int mca_bml_r2_del_proc_btl(ompi_proc_t* proc, mca_btl_base_module_t* btl } } } - + return OMPI_SUCCESS; } int mca_bml_r2_finalize( void ) -{ +{ ompi_proc_t** procs; size_t p, num_procs; opal_list_item_t* w_item; @@ -686,7 +817,7 @@ int mca_bml_r2_finalize( void ) care of that. */ return OMPI_SUCCESS; -} +} /* @@ -701,16 +832,16 @@ static int mca_bml_r2_del_btl(mca_btl_base_module_t* btl) opal_list_item_t* item; mca_btl_base_module_t** modules; bool found = false; - - if(opal_list_get_size(&mca_btl_base_modules_initialized) == 2) { + + if(opal_list_get_size(&mca_btl_base_modules_initialized) == 2) { opal_output(0, "only one BTL left, can't failover"); return OMPI_SUCCESS; } - + procs = ompi_proc_all(&num_procs); if(NULL == procs) return OMPI_SUCCESS; - + /* Get rid of the associated progress function */ bml_r2_remove_btl_progress(btl); @@ -767,8 +898,8 @@ static int mca_bml_r2_add_btl(mca_btl_base_module_t* btl) /* * Register callback w/ all active btls */ -static int mca_bml_r2_register( mca_btl_base_tag_t tag, - mca_btl_base_module_recv_cb_fn_t cbfunc, +static int mca_bml_r2_register( mca_btl_base_tag_t tag, + mca_btl_base_module_recv_cb_fn_t cbfunc, void* data ) { mca_btl_base_active_message_trigger[tag].cbfunc = cbfunc; @@ -780,18 +911,18 @@ static int mca_bml_r2_register( mca_btl_base_tag_t tag, int i, rc; mca_btl_base_module_t *btl; - for(i = 0; i < (int)mca_bml_r2.num_btl_modules; i++) { + for(i = 0; i < (int)mca_bml_r2.num_btl_modules; i++) { btl = mca_bml_r2.btl_modules[i]; if( NULL == btl->btl_register ) continue; - rc = btl->btl_register(btl, tag, cbfunc, data); + rc = btl->btl_register(btl, tag, cbfunc, data); if(OMPI_SUCCESS != rc) { return rc; } } } - return OMPI_SUCCESS; + return OMPI_SUCCESS; } @@ -802,28 +933,28 @@ static int mca_bml_r2_register( mca_btl_base_tag_t tag, static int mca_bml_r2_register_error( mca_btl_base_module_error_cb_fn_t cbfunc) { - uint32_t i; + uint32_t i; int rc; - mca_btl_base_module_t *btl; + mca_btl_base_module_t *btl; uint32_t ver; - - for(i = 0; i < mca_bml_r2.num_btl_modules; i++) { - btl = mca_bml_r2.btl_modules[i]; - /* this wont work for version numbers greater than 256... seems + + for(i = 0; i < mca_bml_r2.num_btl_modules; i++) { + btl = mca_bml_r2.btl_modules[i]; + /* this wont work for version numbers greater than 256... seems reasonable.. */ ver = btl->btl_component->btl_version.mca_type_major_version << 16 | btl->btl_component->btl_version.mca_type_minor_version << 8 | btl->btl_component->btl_version.mca_type_release_version; /* is version number greater than or equal to 1.0.1? */ - if(ver >= ((1 << 16) | (0 << 8) | 1) && - NULL != btl->btl_register_error) { - rc = btl->btl_register_error(btl, cbfunc); + if(ver >= ((1 << 16) | (0 << 8) | 1) && + NULL != btl->btl_register_error) { + rc = btl->btl_register_error(btl, cbfunc); if(OMPI_SUCCESS != rc) { return rc; } } } - return OMPI_SUCCESS; + return OMPI_SUCCESS; } @@ -834,18 +965,18 @@ int mca_bml_r2_component_fini(void) } mca_bml_r2_module_t mca_bml_r2 = { - { - &mca_bml_r2_component, - mca_bml_r2_add_procs, - mca_bml_r2_del_procs, - mca_bml_r2_add_btl, - mca_bml_r2_del_btl, - mca_bml_r2_del_proc_btl, - mca_bml_r2_register, - mca_bml_r2_register_error, - mca_bml_r2_finalize, - mca_bml_r2_ft_event - } - + .super = { + .bml_component = &mca_bml_r2_component, + .bml_add_proc = mca_bml_r2_add_proc, + .bml_add_procs = mca_bml_r2_add_procs, + .bml_del_procs = mca_bml_r2_del_procs, + .bml_add_btl = mca_bml_r2_add_btl, + .bml_del_btl = mca_bml_r2_del_btl, + .bml_del_proc_btl = mca_bml_r2_del_proc_btl, + .bml_register = mca_bml_r2_register, + .bml_register_error = mca_bml_r2_register_error, + .bml_finalize = mca_bml_r2_finalize, + .bml_ft_event = NULL, + }, }; diff --git a/ompi/mca/bml/r2/bml_r2.h b/ompi/mca/bml/r2/bml_r2.h index 087d68eab51..69d4e7eacee 100644 --- a/ompi/mca/bml/r2/bml_r2.h +++ b/ompi/mca/bml/r2/bml_r2.h @@ -5,14 +5,16 @@ * Copyright (c) 2004-2006 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2006 The Regents of the University of California. * All rights reserved. + * Copyright (c) 2015 Los Alamos National Security, LLC. All rights + * reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ /** @@ -38,11 +40,11 @@ BEGIN_C_DECLS * BML module interface functions and attributes. */ struct mca_bml_r2_module_t { - mca_bml_base_module_t super; + mca_bml_base_module_t super; size_t num_btl_modules; - mca_btl_base_module_t** btl_modules; - size_t num_btl_progress; - mca_btl_base_component_progress_fn_t * btl_progress; + mca_btl_base_module_t** btl_modules; + size_t num_btl_progress; + mca_btl_base_component_progress_fn_t * btl_progress; bool btls_added; bool show_unreach_errors; }; @@ -50,21 +52,19 @@ struct mca_bml_r2_module_t { typedef struct mca_bml_r2_module_t mca_bml_r2_module_t; OMPI_DECLSPEC extern mca_bml_base_component_2_0_0_t mca_bml_r2_component; -extern mca_bml_r2_module_t mca_bml_r2; +extern mca_bml_r2_module_t mca_bml_r2; -int mca_bml_r2_component_open(void); -int mca_bml_r2_component_close(void); +int mca_bml_r2_component_open(void); +int mca_bml_r2_component_close(void); mca_bml_base_module_t* mca_bml_r2_component_init( int* priority, - bool enable_progress_threads, - bool enable_mpi_threads ); + bool enable_progress_threads, + bool enable_mpi_threads ); -int mca_bml_r2_progress(void); +int mca_bml_r2_progress(void); int mca_bml_r2_component_fini(void); -int mca_bml_r2_ft_event(int status); - int mca_bml_r2_finalize( void ); END_C_DECLS diff --git a/ompi/mca/bml/r2/bml_r2_component.c b/ompi/mca/bml/r2/bml_r2_component.c index e5cc4b7b06d..acb4189fb5e 100644 --- a/ompi/mca/bml/r2/bml_r2_component.c +++ b/ompi/mca/bml/r2/bml_r2_component.c @@ -6,7 +6,7 @@ * Copyright (c) 2004-2006 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2006 The Regents of the University of California. * All rights reserved. @@ -14,9 +14,9 @@ * Copyright (c) 2015 Los Alamos National Security, LLC. All rights * reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -67,29 +67,29 @@ static int mca_bml_r2_component_register(void) int mca_bml_r2_component_open(void) { - return OMPI_SUCCESS; + return OMPI_SUCCESS; } int mca_bml_r2_component_close(void) { - + /* OBJ_DESTRUCT(&mca_bml_r2.lock); */ return OMPI_SUCCESS; } -mca_bml_base_module_t* mca_bml_r2_component_init( int* priority, +mca_bml_base_module_t* mca_bml_r2_component_init( int* priority, bool enable_progress_threads, bool enable_mpi_threads ) { /* initialize BTLs */ - + if(OMPI_SUCCESS != mca_btl_base_select(enable_progress_threads,enable_mpi_threads)) return NULL; - - *priority = 100; - mca_bml_r2.btls_added = false; + + *priority = 100; + mca_bml_r2.btls_added = false; return &mca_bml_r2.super; } diff --git a/ompi/mca/bml/r2/bml_r2_ft.c b/ompi/mca/bml/r2/bml_r2_ft.c deleted file mode 100644 index 47fdecbac23..00000000000 --- a/ompi/mca/bml/r2/bml_r2_ft.c +++ /dev/null @@ -1,294 +0,0 @@ -/* - * Copyright (c) 2004-2010 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2011 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2006 The Regents of the University of California. - * All rights reserved. - * Copyright (c) 2007-2012 Los Alamos National Security, LLC. All rights - * reserved. - * Copyright (c) 2008 Cisco Systems, Inc. All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#include "ompi_config.h" -#include "opal/util/output.h" - -#include -#include - -#include "opal/runtime/opal_progress.h" -#include "opal/mca/btl/base/base.h" -#include "opal/mca/pmix/pmix.h" - -#include "ompi/runtime/ompi_cr.h" -#include "ompi/mca/bml/base/base.h" -#include "ompi/mca/bml/base/bml_base_btl.h" -#include "ompi/mca/pml/base/base.h" -#include "ompi/proc/proc.h" - -#include "bml_r2.h" -#include "bml_r2_ft.h" - -int mca_bml_r2_ft_event(int state) -{ -#if OPAL_ENABLE_FT_CR == 1 - static bool first_continue_pass = false; - ompi_proc_t** procs = NULL; - size_t num_procs; - size_t btl_idx; - int ret, p; - int loc_state; - int param_type = -1; - const char **btl_list; - - if(OPAL_CRS_CHECKPOINT == state) { - /* Do nothing for now */ - } - else if(OPAL_CRS_CONTINUE == state) { - first_continue_pass = !first_continue_pass; - - /* Since nothing in Checkpoint, we are fine here (unless required by BTL) */ - if (opal_cr_continue_like_restart && !first_continue_pass) { - procs = ompi_proc_all(&num_procs); - if(NULL == procs) { - return OMPI_ERR_OUT_OF_RESOURCE; - } - } - } - else if(OPAL_CRS_RESTART_PRE == state ) { - /* Nothing here */ - } - else if(OPAL_CRS_RESTART == state ) { - procs = ompi_proc_all(&num_procs); - if(NULL == procs) { - return OMPI_ERR_OUT_OF_RESOURCE; - } - } - else if(OPAL_CRS_TERM == state ) { - ; - } - else { - ; - } - - /* Never call the ft_event functions attached to the BTLs on the second - * pass of RESTART since on the first pass they were unloaded and therefore - * no longer exist. - */ - if( OPAL_CRS_RESTART != state ) { - if( OPAL_CRS_CONTINUE == state && !first_continue_pass ) { - ; - } else { - /* Since we only ever call into the BTLs once during the first restart - * pass, just lie to them on this pass for a bit of local clarity. - */ - if( OPAL_CRS_RESTART_PRE == state ) { - loc_state = OPAL_CRS_RESTART; - } else { - loc_state = state; - } - - /* - * Call ft_event in: - * - BTL modules - * - MPool modules - * - * These should be cleaning out stale state, and memory references in - * preparation for being shut down. - */ - for(btl_idx = 0; btl_idx < mca_bml_r2.num_btl_modules; btl_idx++) { - /* - * Notify Mpool - */ - if( NULL != (mca_bml_r2.btl_modules[btl_idx])->btl_mpool && - NULL != (mca_bml_r2.btl_modules[btl_idx])->btl_mpool->mpool_ft_event ) { - opal_output_verbose(10, ompi_cr_output, - "bml:r2: ft_event: Notify the %s MPool.\n", - (mca_bml_r2.btl_modules[btl_idx])->btl_mpool->mpool_component->mpool_version.mca_component_name); - if(OMPI_SUCCESS != (ret = (mca_bml_r2.btl_modules[btl_idx])->btl_mpool->mpool_ft_event(loc_state) ) ) { - continue; - } - } - - /* - * Notify BTL - */ - if( NULL != (mca_bml_r2.btl_modules[btl_idx])->btl_ft_event) { - opal_output_verbose(10, ompi_cr_output, - "bml:r2: ft_event: Notify the %s BTL.\n", - (mca_bml_r2.btl_modules[btl_idx])->btl_component->btl_version.mca_component_name); - if(OMPI_SUCCESS != (ret = (mca_bml_r2.btl_modules[btl_idx])->btl_ft_event(loc_state) ) ) { - continue; - } - } - } - } /* OPAL_CRS_CONTINUE == state && !first_continue_pass */ - } - - if(OPAL_CRS_CHECKPOINT == state) { - ; - } - else if(OPAL_CRS_CONTINUE == state) { - /* Matches OPAL_CRS_RESTART_PRE */ - if (opal_cr_continue_like_restart && first_continue_pass) { - if( OMPI_SUCCESS != (ret = mca_bml_r2_finalize()) ) { - opal_output(0, "bml:r2: ft_event(Restart): Failed to finalize BML framework\n"); - return ret; - } - if( OMPI_SUCCESS != (ret = mca_base_framework_close(&opal_btl_base_framework)) ) { - opal_output(0, "bml:r2: ft_event(Restart): Failed to close BTL framework\n"); - return ret; - } - } - /* Matches OPAL_CRS_RESTART */ - else if (opal_cr_continue_like_restart && !first_continue_pass) { - /* - * Barrier to make all processes have been successfully restarted before - * we try to remove some restart only files. - */ - opal_pmix.fence(NULL, 0); - - /* - * Re-open the BTL framework to get the full list of components. - */ - if( OMPI_SUCCESS != (ret = mca_base_framework_open(&opal_btl_base_framework, 0)) ) { - opal_output(0, "bml:r2: ft_event(Restart): Failed to open BTL framework\n"); - return ret; - } - - /* - * Re-select the BTL components/modules - * This will cause the BTL components to discover the available - * network options on this machine, and post proper modex informaiton. - */ - if( OMPI_SUCCESS != (ret = mca_btl_base_select(OPAL_ENABLE_PROGRESS_THREADS, - OMPI_ENABLE_THREAD_MULTIPLE) ) ) { - opal_output(0, "bml:r2: ft_event(Restart): Failed to select in BTL framework\n"); - return ret; - } - - /* - * Clear some structures so we can properly repopulate them - */ - mca_bml_r2.btls_added = false; - - for(p = 0; p < (int)num_procs; ++p) { - if( NULL != procs[p]->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_BML]) { - OBJ_RELEASE(procs[p]->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_BML]); - procs[p]->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_BML] = NULL; - } - - OBJ_RELEASE(procs[p]); - } - - if( NULL != procs ) { - free(procs); - procs = NULL; - } - } - } - else if(OPAL_CRS_RESTART_PRE == state ) { - opal_output_verbose(10, ompi_cr_output, - "bml:r2: ft_event(Restart): Finalize BML\n"); - - /* - * Finalize the BML - * - Flush progress functions - * - Flush module references - * - mca_btl_base_close() - * Need to do this because we may have BTL components that were - * unloaded in the first selection that may be available now. - * Conversely we may have BTL components loaded now that - * are not available now. - */ - if( OMPI_SUCCESS != (ret = mca_bml_r2_finalize()) ) { - opal_output(0, "bml:r2: ft_event(Restart): Failed to finalize BML framework\n"); - return ret; - } - if( OMPI_SUCCESS != (ret = mca_base_framework_close(&opal_btl_base_framework)) ) { - opal_output(0, "bml:r2: ft_event(Restart): Failed to close BTL framework\n"); - return ret; - } - } - else if(OPAL_CRS_RESTART == state ) { - - /* - * Barrier to make all processes have been successfully restarted before - * we try to remove some restart only files. - */ - opal_pmix.fence(NULL, 0); - - /* - * Re-open the BTL framework to get the full list of components. - * - but first clear the MCA value that was there - */ - param_type = mca_base_var_find("ompi", "btl", NULL, NULL); - btl_list = NULL; - mca_base_var_get_value(param_type, &btl_list, NULL, NULL); - opal_output_verbose(11, ompi_cr_output, - "Restart (Previous BTL MCA): <%s>\n", btl_list ? btl_list[0] : ""); - - if( OMPI_SUCCESS != (ret = mca_base_framework_open(&opal_btl_base_framework, 0)) ) { - opal_output(0, "bml:r2: ft_event(Restart): Failed to open BTL framework\n"); - return ret; - } - - /* The reregistered paramter is guaranteed to have the same index */ - btl_list = NULL; - mca_base_var_get_value(param_type, &btl_list, NULL, NULL); - opal_output_verbose(11, ompi_cr_output, - "Restart (New BTL MCA): <%s>\n", btl_list ? btl_list[0] : ""); - if( NULL != btl_list ) { - free(btl_list); - btl_list = NULL; - } - - /* - * Re-select the BTL components/modules - * This will cause the BTL components to discover the available - * network options on this machine, and post proper modex informaiton. - */ - if( OMPI_SUCCESS != (ret = mca_btl_base_select(OPAL_ENABLE_PROGRESS_THREADS, - OMPI_ENABLE_THREAD_MULTIPLE) ) ) { - opal_output(0, "bml:r2: ft_event(Restart): Failed to select in BTL framework\n"); - return ret; - } - - /* - * Clear some structures so we can properly repopulate them - */ - mca_bml_r2.btls_added = false; - - for(p = 0; p < (int)num_procs; ++p) { - if( NULL != procs[p]->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_BML]) { - OBJ_RELEASE(procs[p]->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_BML]); - procs[p]->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_BML] = NULL; - } - - OBJ_RELEASE(procs[p]); - } - - if( NULL != procs ) { - free(procs); - procs = NULL; - } - } - else if(OPAL_CRS_TERM == state ) { - ; - } - else { - ; - } -#endif - - return OMPI_SUCCESS; -} diff --git a/ompi/mca/bml/r2/bml_r2_ft.h b/ompi/mca/bml/r2/bml_r2_ft.h deleted file mode 100644 index 59b3445fcb0..00000000000 --- a/ompi/mca/bml/r2/bml_r2_ft.h +++ /dev/null @@ -1,29 +0,0 @@ -/* - * Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2007 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2006 The Regents of the University of California. - * All rights reserved. - * Copyright (c) 2007 Los Alamos National Security, LLC. All rights - * reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#ifndef MCA_BML_R2_FT_H -#define MCA_BML_R2_FT_H - - -BEGIN_C_DECLS - -END_C_DECLS - -#endif diff --git a/ompi/mca/bml/r2/configure.m4 b/ompi/mca/bml/r2/configure.m4 index 308c6827929..95cb2a94cc5 100644 --- a/ompi/mca/bml/r2/configure.m4 +++ b/ompi/mca/bml/r2/configure.m4 @@ -2,9 +2,9 @@ # # Copyright (c) 2013 Sandia National Laboratories. All rights reserved. # $COPYRIGHT$ -# +# # Additional copyrights may follow -# +# # $HEADER$ # @@ -16,7 +16,7 @@ AC_DEFUN([MCA_ompi_bml_r2_POST_CONFIG], [ AS_IF([test "$1" = "1"], [OMPI_REQUIRE_ENDPOINT_TAG([BML])]) ])dnl -# MCA_ompi_bml_r2_CONFIG(action-if-can-compile, +# MCA_ompi_bml_r2_CONFIG(action-if-can-compile, # [action-if-cant-compile]) # ------------------------------------------------ # We can always build, unless we were explicitly disabled. diff --git a/ompi/mca/bml/r2/help-mca-bml-r2.txt b/ompi/mca/bml/r2/help-mca-bml-r2.txt index f141c4e7ef8..69d7c24db64 100644 --- a/ompi/mca/bml/r2/help-mca-bml-r2.txt +++ b/ompi/mca/bml/r2/help-mca-bml-r2.txt @@ -6,15 +6,15 @@ # Copyright (c) 2004-2005 The University of Tennessee and The University # of Tennessee Research Foundation. All rights # reserved. -# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, # University of Stuttgart. All rights reserved. # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. # Copyright (c) 2008 Cisco Systems, Inc. All rights reserved. # $COPYRIGHT$ -# +# # Additional copyrights may follow -# +# # $HEADER$ # # This is the US/English general help file for Open MPI. diff --git a/ompi/mca/coll/Makefile.am b/ompi/mca/coll/Makefile.am index 51630d2543c..b6b27ddb1ae 100644 --- a/ompi/mca/coll/Makefile.am +++ b/ompi/mca/coll/Makefile.am @@ -5,15 +5,15 @@ # Copyright (c) 2004-2005 The University of Tennessee and The University # of Tennessee Research Foundation. All rights # reserved. -# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, # University of Stuttgart. All rights reserved. # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. # Copyright (c) 2010 Cisco Systems, Inc. All rights reserved. # $COPYRIGHT$ -# +# # Additional copyrights may follow -# +# # $HEADER$ # diff --git a/ompi/mca/coll/base/Makefile.am b/ompi/mca/coll/base/Makefile.am index 7f917c76c13..21c144bf782 100644 --- a/ompi/mca/coll/base/Makefile.am +++ b/ompi/mca/coll/base/Makefile.am @@ -5,14 +5,14 @@ # Copyright (c) 2004-2015 The University of Tennessee and The University # of Tennessee Research Foundation. All rights # reserved. -# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, # University of Stuttgart. All rights reserved. # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. # $COPYRIGHT$ -# +# # Additional copyrights may follow -# +# # $HEADER$ # diff --git a/ompi/mca/coll/base/README.memory_management b/ompi/mca/coll/base/README.memory_management new file mode 100644 index 00000000000..1e34f577c1e --- /dev/null +++ b/ompi/mca/coll/base/README.memory_management @@ -0,0 +1,124 @@ + /* This comment applies to all collectives (including the basic + * module) where we allocate a temporary buffer. For the next few + * lines of code, it's tremendously complicated how we decided that + * this was the Right Thing to do. Sit back and enjoy. And prepare + * to have your mind warped. :-) + * + * Recall some definitions (I always get these backwards, so I'm + * going to put them here): + * + * extent: the length from the lower bound to the upper bound -- may + * be considerably larger than the buffer required to hold the data + * (or smaller! But it's easiest to think about when it's larger). + * + * true extent: the exact number of bytes required to hold the data + * in the layout pattern in the datatype. + * + * For example, consider the following buffer (just talking about + * true_lb, extent, and true extent -- extrapolate for true_ub: + * + * A B C + * -------------------------------------------------------- + * | | | + * -------------------------------------------------------- + * + * There are multiple cases: + * + * 1. A is what we give to MPI_Send (and friends), and A is where + * the data starts, and C is where the data ends. In this case: + * + * - extent: C-A + * - true extent: C-A + * - true_lb: 0 + * + * A C + * -------------------------------------------------------- + * | | + * -------------------------------------------------------- + * <=======================extent=========================> + * <======================true extent=====================> + * + * 2. A is what we give to MPI_Send (and friends), B is where the + * data starts, and C is where the data ends. In this case: + * + * - extent: C-A + * - true extent: C-B + * - true_lb: positive + * + * A B C + * -------------------------------------------------------- + * | | User buffer | + * -------------------------------------------------------- + * <=======================extent=========================> + * <===============true extent=============> + * + * 3. B is what we give to MPI_Send (and friends), A is where the + * data starts, and C is where the data ends. In this case: + * + * - extent: C-A + * - true extent: C-A + * - true_lb: negative + * + * A B C + * -------------------------------------------------------- + * | | User buffer | + * -------------------------------------------------------- + * <=======================extent=========================> + * <======================true extent=====================> + * + * 4. MPI_BOTTOM is what we give to MPI_Send (and friends), B is + * where the data starts, and C is where the data ends. In this + * case: + * + * - extent: C-MPI_BOTTOM + * - true extent: C-B + * - true_lb: [potentially very large] positive + * + * MPI_BOTTOM B C + * -------------------------------------------------------- + * | | User buffer | + * -------------------------------------------------------- + * <=======================extent=========================> + * <===============true extent=============> + * + * So in all cases, for a temporary buffer, all we need to malloc() + * is a buffer of size true_extent. We therefore need to know two + * pointer values: what value to give to MPI_Send (and friends) and + * what value to give to free(), because they might not be the same. + * + * Clearly, what we give to free() is exactly what was returned from + * malloc(). That part is easy. :-) + * + * What we give to MPI_Send (and friends) is a bit more complicated. + * Let's take the 4 cases from above: + * + * 1. If A is what we give to MPI_Send and A is where the data + * starts, then clearly we give to MPI_Send what we got back from + * malloc(). + * + * 2. If B is what we get back from malloc, but we give A to + * MPI_Send, then the buffer range [A,B) represents "dead space" + * -- no data will be put there. So it's safe to give B-true_lb to + * MPI_Send. More specifically, the true_lb is positive, so B-true_lb is + * actually A. + * + * 3. If A is what we get back from malloc, and B is what we give to + * MPI_Send, then the true_lb is negative, so A-true_lb will actually equal + * B. + * + * 4. Although this seems like the weirdest case, it's actually + * quite similar to case #2 -- the pointer we give to MPI_Send is + * smaller than the pointer we got back from malloc(). + * + * Hence, in all cases, we give (return_from_malloc - true_lb) to MPI_Send. + * + * This works fine and dandy if we only have (count==1), which we + * rarely do. ;-) So we really need to allocate (true_extent + + * ((count - 1) * extent)) to get enough space for the rest. This may + * be more than is necessary, but it's ok. + * + * Simple, no? :-) + * + */ + + diff --git a/ompi/mca/coll/base/base.h b/ompi/mca/coll/base/base.h index 3d54de22bf2..9fc431f1ff2 100644 --- a/ompi/mca/coll/base/base.h +++ b/ompi/mca/coll/base/base.h @@ -5,14 +5,14 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ * */ @@ -23,7 +23,7 @@ * These functions are normally invoked by the back-ends of: * * - The back-ends of MPI_Init() and MPI_Finalize() - * - Communuicactor constructors (e.g., MPI_Comm_split()) and + * - Communicator constructors (e.g., MPI_Comm_split()) and * destructors (e.g., MPI_Comm_free()) * - The laminfo command */ diff --git a/ompi/mca/coll/base/coll_base_allgather.c b/ompi/mca/coll/base/coll_base_allgather.c index 6c90b10fa56..cafc6eac3ee 100644 --- a/ompi/mca/coll/base/coll_base_allgather.c +++ b/ompi/mca/coll/base/coll_base_allgather.c @@ -12,7 +12,7 @@ * Copyright (c) 2009 University of Houston. All rights reserved. * Copyright (c) 2013 Los Alamos National Security, LLC. All Rights * reserved. - * Copyright (c) 2014 Research Organization for Information Science + * Copyright (c) 2014-2016 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * @@ -82,7 +82,7 @@ * [4] [4] [4] [4] [4] [4] * [5] [5] [5] [5] [5] [5] */ -int ompi_coll_base_allgather_intra_bruck(void *sbuf, int scount, +int ompi_coll_base_allgather_intra_bruck(const void *sbuf, int scount, struct ompi_datatype_t *sdtype, void* rbuf, int rcount, struct ompi_datatype_t *rdtype, @@ -167,19 +167,16 @@ int ompi_coll_base_allgather_intra_bruck(void *sbuf, int scount, - copy blocks from shift buffer starting at block [rank] in rbuf. */ if (0 != rank) { - ptrdiff_t true_extent, true_lb; char *free_buf = NULL, *shift_buf = NULL; + ptrdiff_t span, gap; - err = ompi_datatype_get_true_extent(rdtype, &true_lb, &true_extent); - if (MPI_SUCCESS != err) { line = __LINE__; goto err_hndl; } + span = opal_datatype_span(&rdtype->super, (int64_t)(size - rank) * rcount, &gap); - free_buf = (char*) calloc(((true_extent + - ((ptrdiff_t)(size - rank) * (ptrdiff_t)rcount - 1) * rext)), - sizeof(char)); + free_buf = (char*)calloc(span, sizeof(char)); if (NULL == free_buf) { line = __LINE__; err = OMPI_ERR_OUT_OF_RESOURCE; goto err_hndl; } - shift_buf = free_buf - true_lb; + shift_buf = free_buf - gap; /* 1. copy blocks [0 .. (size - rank - 1)] from rbuf to shift buffer */ err = ompi_datatype_copy_content_same_ddt(rdtype, ((ptrdiff_t)(size - rank) * (ptrdiff_t)rcount), @@ -255,7 +252,7 @@ int ompi_coll_base_allgather_intra_bruck(void *sbuf, int scount, * step, and send them appropriate messages. */ int -ompi_coll_base_allgather_intra_recursivedoubling(void *sbuf, int scount, +ompi_coll_base_allgather_intra_recursivedoubling(const void *sbuf, int scount, struct ompi_datatype_t *sdtype, void* rbuf, int rcount, struct ompi_datatype_t *rdtype, @@ -362,7 +359,7 @@ ompi_coll_base_allgather_intra_recursivedoubling(void *sbuf, int scount, * No additional memory requirements. * */ -int ompi_coll_base_allgather_intra_ring(void *sbuf, int scount, +int ompi_coll_base_allgather_intra_ring(const void *sbuf, int scount, struct ompi_datatype_t *sdtype, void* rbuf, int rcount, struct ompi_datatype_t *rdtype, @@ -490,7 +487,7 @@ int ompi_coll_base_allgather_intra_ring(void *sbuf, int scount, * [5] [5] [5] [5] [5] [5] */ int -ompi_coll_base_allgather_intra_neighborexchange(void *sbuf, int scount, +ompi_coll_base_allgather_intra_neighborexchange(const void *sbuf, int scount, struct ompi_datatype_t *sdtype, void* rbuf, int rcount, struct ompi_datatype_t *rdtype, @@ -606,7 +603,7 @@ ompi_coll_base_allgather_intra_neighborexchange(void *sbuf, int scount, } -int ompi_coll_base_allgather_intra_two_procs(void *sbuf, int scount, +int ompi_coll_base_allgather_intra_two_procs(const void *sbuf, int scount, struct ompi_datatype_t *sdtype, void* rbuf, int rcount, struct ompi_datatype_t *rdtype, @@ -622,6 +619,10 @@ int ompi_coll_base_allgather_intra_two_procs(void *sbuf, int scount, OPAL_OUTPUT((ompi_coll_base_framework.framework_output, "ompi_coll_base_allgather_intra_two_procs rank %d", rank)); + if (2 != ompi_comm_size(comm)) { + return MPI_ERR_UNSUPPORTED_OPERATION; + } + err = ompi_datatype_get_extent (sdtype, &lb, &sext); if (MPI_SUCCESS != err) { line = __LINE__; goto err_hndl; } @@ -687,7 +688,7 @@ int ompi_coll_base_allgather_intra_two_procs(void *sbuf, int scount, * Returns: - MPI_SUCCESS or error code */ int -ompi_coll_base_allgather_intra_basic_linear(void *sbuf, int scount, +ompi_coll_base_allgather_intra_basic_linear(const void *sbuf, int scount, struct ompi_datatype_t *sdtype, void *rbuf, int rcount, diff --git a/ompi/mca/coll/base/coll_base_allgatherv.c b/ompi/mca/coll/base/coll_base_allgatherv.c index 797a317e8fd..164978fddc9 100644 --- a/ompi/mca/coll/base/coll_base_allgatherv.c +++ b/ompi/mca/coll/base/coll_base_allgatherv.c @@ -13,8 +13,10 @@ * Copyright (c) 2009 University of Houston. All rights reserved. * Copyright (c) 2013 Los Alamos National Security, LLC. All Rights * reserved. - * Copyright (c) 2015 Research Organization for Information Science + * Copyright (c) 2015-2016 Research Organization for Information Science * and Technology (RIST). All rights reserved. + * Copyright (c) 2017 IBM Corporation. All rights reserved. + * Copyright (c) 2017 Cisco Systems, Inc. All rights reserved * $COPYRIGHT$ * * Additional copyrights may follow @@ -100,10 +102,10 @@ mca_base_var_enum_value_t coll_base_allgatherv_algorithms[] = { * [5] [5] [5] [5] [5] [5] [5] * [6] [6] [6] [6] [6] [6] [6] */ -int ompi_coll_base_allgatherv_intra_bruck(void *sbuf, int scount, +int ompi_coll_base_allgatherv_intra_bruck(const void *sbuf, int scount, struct ompi_datatype_t *sdtype, - void *rbuf, int *rcounts, - int *rdispls, + void *rbuf, const int *rcounts, + const int *rdispls, struct ompi_datatype_t *rdtype, struct ompi_communicator_t *comm, mca_coll_base_module_t *module) @@ -230,9 +232,9 @@ int ompi_coll_base_allgatherv_intra_bruck(void *sbuf, int scount, * No additional memory requirements. * */ -int ompi_coll_base_allgatherv_intra_ring(void *sbuf, int scount, +int ompi_coll_base_allgatherv_intra_ring(const void *sbuf, int scount, struct ompi_datatype_t *sdtype, - void* rbuf, int *rcounts, int *rdisps, + void* rbuf, const int *rcounts, const int *rdisps, struct ompi_datatype_t *rdtype, struct ompi_communicator_t *comm, mca_coll_base_module_t *module) @@ -359,9 +361,9 @@ int ompi_coll_base_allgatherv_intra_ring(void *sbuf, int scount, * [5] [5] [5] [5] [5] [5] */ int -ompi_coll_base_allgatherv_intra_neighborexchange(void *sbuf, int scount, +ompi_coll_base_allgatherv_intra_neighborexchange(const void *sbuf, int scount, struct ompi_datatype_t *sdtype, - void* rbuf, int *rcounts, int *rdispls, + void* rbuf, const int *rcounts, const int *rdispls, struct ompi_datatype_t *rdtype, struct ompi_communicator_t *comm, mca_coll_base_module_t *module) @@ -506,10 +508,10 @@ ompi_coll_base_allgatherv_intra_neighborexchange(void *sbuf, int scount, } -int ompi_coll_base_allgatherv_intra_two_procs(void *sbuf, int scount, +int ompi_coll_base_allgatherv_intra_two_procs(const void *sbuf, int scount, struct ompi_datatype_t *sdtype, - void* rbuf, int *rcounts, - int *rdispls, + void* rbuf, const int *rcounts, + const int *rdispls, struct ompi_datatype_t *rdtype, struct ompi_communicator_t *comm, mca_coll_base_module_t *module) @@ -523,6 +525,10 @@ int ompi_coll_base_allgatherv_intra_two_procs(void *sbuf, int scount, OPAL_OUTPUT((ompi_coll_base_framework.framework_output, "ompi_coll_base_allgatherv_intra_two_procs rank %d", rank)); + if (2 != ompi_comm_size(comm)) { + return MPI_ERR_UNSUPPORTED_OPERATION; + } + err = ompi_datatype_get_extent (sdtype, &lb, &sext); if (MPI_SUCCESS != err) { line = __LINE__; goto err_hndl; } @@ -590,15 +596,15 @@ int ompi_coll_base_allgatherv_intra_two_procs(void *sbuf, int scount, * Returns: - MPI_SUCCESS or error code */ int -ompi_coll_base_allgatherv_intra_basic_default(void *sbuf, int scount, +ompi_coll_base_allgatherv_intra_basic_default(const void *sbuf, int scount, struct ompi_datatype_t *sdtype, - void *rbuf, int *rcounts, - int *disps, + void *rbuf, const int *rcounts, + const int *disps, struct ompi_datatype_t *rdtype, struct ompi_communicator_t *comm, mca_coll_base_module_t *module) { - int i, size, rank, err; + int size, rank, err; MPI_Aint extent, lb; char *send_buf = NULL; struct ompi_datatype_t *newtype, *send_type; @@ -618,16 +624,15 @@ ompi_coll_base_allgatherv_intra_basic_default(void *sbuf, int scount, ompi_datatype_get_extent(rdtype, &lb, &extent); send_type = rdtype; send_buf = (char*)rbuf; - for (i = 0; i < rank; ++i) { - send_buf += ((ptrdiff_t)rcounts[i] * extent); - } + send_buf += ((ptrdiff_t)disps[rank] * extent); + scount = rcounts[rank]; } else { send_buf = (char*)sbuf; send_type = sdtype; } err = comm->c_coll.coll_gatherv(send_buf, - rcounts[rank], send_type,rbuf, + scount, send_type,rbuf, rcounts, disps, rdtype, 0, comm, comm->c_coll.coll_gatherv_module); if (MPI_SUCCESS != err) { diff --git a/ompi/mca/coll/base/coll_base_allreduce.c b/ompi/mca/coll/base/coll_base_allreduce.c index 54f444b6cf0..b75e58a83aa 100644 --- a/ompi/mca/coll/base/coll_base_allreduce.c +++ b/ompi/mca/coll/base/coll_base_allreduce.c @@ -13,6 +13,8 @@ * Copyright (c) 2009 University of Houston. All rights reserved. * Copyright (c) 2013 Los Alamos National Security, LLC. All Rights * reserved. + * Copyright (c) 2015-2016 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -47,7 +49,7 @@ * */ int -ompi_coll_base_allreduce_intra_nonoverlapping(void *sbuf, void *rbuf, int count, +ompi_coll_base_allreduce_intra_nonoverlapping(const void *sbuf, void *rbuf, int count, struct ompi_datatype_t *dtype, struct ompi_op_t *op, struct ompi_communicator_t *comm, @@ -123,7 +125,7 @@ ompi_coll_base_allreduce_intra_nonoverlapping(void *sbuf, void *rbuf, int count, * */ int -ompi_coll_base_allreduce_intra_recursivedoubling(void *sbuf, void *rbuf, +ompi_coll_base_allreduce_intra_recursivedoubling(const void *sbuf, void *rbuf, int count, struct ompi_datatype_t *dtype, struct ompi_op_t *op, @@ -132,9 +134,9 @@ ompi_coll_base_allreduce_intra_recursivedoubling(void *sbuf, void *rbuf, { int ret, line, rank, size, adjsize, remote, distance; int newrank, newremote, extra_ranks; - char *tmpsend = NULL, *tmprecv = NULL, *tmpswap = NULL, *inplacebuf = NULL; - ptrdiff_t true_lb, true_extent, lb, extent; + char *tmpsend = NULL, *tmprecv = NULL, *tmpswap = NULL, *inplacebuf_free = NULL, *inplacebuf; ompi_request_t *reqs[2] = {NULL, NULL}; + OPAL_PTRDIFF_TYPE span, gap; size = ompi_comm_size(comm); rank = ompi_comm_rank(comm); @@ -152,13 +154,10 @@ ompi_coll_base_allreduce_intra_recursivedoubling(void *sbuf, void *rbuf, } /* Allocate and initialize temporary send buffer */ - ret = ompi_datatype_get_extent(dtype, &lb, &extent); - if (MPI_SUCCESS != ret) { line = __LINE__; goto error_hndl; } - ret = ompi_datatype_get_true_extent(dtype, &true_lb, &true_extent); - if (MPI_SUCCESS != ret) { line = __LINE__; goto error_hndl; } - - inplacebuf = (char*) malloc(true_extent + (ptrdiff_t)(count - 1) * extent); - if (NULL == inplacebuf) { ret = -1; line = __LINE__; goto error_hndl; } + span = opal_datatype_span(&dtype->super, count, &gap); + inplacebuf_free = (char*) malloc(span); + if (NULL == inplacebuf_free) { ret = -1; line = __LINE__; goto error_hndl; } + inplacebuf = inplacebuf_free - gap; if (MPI_IN_PLACE == sbuf) { ret = ompi_datatype_copy_content_same_ddt(dtype, count, inplacebuf, (char*)rbuf); @@ -265,13 +264,14 @@ ompi_coll_base_allreduce_intra_recursivedoubling(void *sbuf, void *rbuf, if (ret < 0) { line = __LINE__; goto error_hndl; } } - if (NULL != inplacebuf) free(inplacebuf); + if (NULL != inplacebuf_free) free(inplacebuf_free); return MPI_SUCCESS; error_hndl: OPAL_OUTPUT((ompi_coll_base_framework.framework_output, "%s:%4d\tRank %d Error occurred %d\n", __FILE__, line, rank, ret)); - if (NULL != inplacebuf) free(inplacebuf); + (void)line; // silence compiler warning + if (NULL != inplacebuf_free) free(inplacebuf_free); return ret; } @@ -340,7 +340,7 @@ ompi_coll_base_allreduce_intra_recursivedoubling(void *sbuf, void *rbuf, * */ int -ompi_coll_base_allreduce_intra_ring(void *sbuf, void *rbuf, int count, +ompi_coll_base_allreduce_intra_ring(const void *sbuf, void *rbuf, int count, struct ompi_datatype_t *dtype, struct ompi_op_t *op, struct ompi_communicator_t *comm, @@ -615,7 +615,7 @@ ompi_coll_base_allreduce_intra_ring(void *sbuf, void *rbuf, int count, * */ int -ompi_coll_base_allreduce_intra_ring_segmented(void *sbuf, void *rbuf, int count, +ompi_coll_base_allreduce_intra_ring_segmented(const void *sbuf, void *rbuf, int count, struct ompi_datatype_t *dtype, struct ompi_op_t *op, struct ompi_communicator_t *comm, @@ -627,9 +627,9 @@ ompi_coll_base_allreduce_intra_ring_segmented(void *sbuf, void *rbuf, int count, int segcount, max_segcount, num_phases, phase, block_count, inbi; size_t typelng; char *tmpsend = NULL, *tmprecv = NULL, *inbuf[2] = {NULL, NULL}; - ptrdiff_t true_lb, true_extent, lb, extent; ptrdiff_t block_offset, max_real_segsize; ompi_request_t *reqs[2] = {NULL, NULL}; + OPAL_PTRDIFF_TYPE lb, extent, gap; size = ompi_comm_size(comm); rank = ompi_comm_rank(comm); @@ -647,10 +647,6 @@ ompi_coll_base_allreduce_intra_ring_segmented(void *sbuf, void *rbuf, int count, } /* Determine segment count based on the suggested segment size */ - ret = ompi_datatype_get_extent(dtype, &lb, &extent); - if (MPI_SUCCESS != ret) { line = __LINE__; goto error_hndl; } - ret = ompi_datatype_get_true_extent(dtype, &true_lb, &true_extent); - if (MPI_SUCCESS != ret) { line = __LINE__; goto error_hndl; } ret = ompi_datatype_type_size( dtype, &typelng); if (MPI_SUCCESS != ret) { line = __LINE__; goto error_hndl; } segcount = count; @@ -683,7 +679,10 @@ ompi_coll_base_allreduce_intra_ring_segmented(void *sbuf, void *rbuf, int count, early_blockcount, late_blockcount ); COLL_BASE_COMPUTE_BLOCKCOUNT( early_blockcount, num_phases, inbi, max_segcount, k); - max_real_segsize = true_extent + (ptrdiff_t)(max_segcount - 1) * extent; + + ret = ompi_datatype_get_extent(dtype, &lb, &extent); + if (MPI_SUCCESS != ret) { line = __LINE__; goto error_hndl; } + max_real_segsize = opal_datatype_span(&dtype->super, max_segcount, &gap); /* Allocate and initialize temporary buffers */ inbuf[0] = (char*)malloc(max_real_segsize); @@ -738,8 +737,8 @@ ompi_coll_base_allreduce_intra_ring_segmented(void *sbuf, void *rbuf, int count, block_count = ((rank < split_rank)? early_blockcount : late_blockcount); COLL_BASE_COMPUTE_BLOCKCOUNT(block_count, num_phases, split_phase, early_phase_segcount, late_phase_segcount) - phase_count = ((phase < split_phase)? - (early_phase_segcount) : (late_phase_segcount)); + phase_count = ((phase < split_phase)? + (early_phase_segcount) : (late_phase_segcount)); phase_offset = ((phase < split_phase)? ((ptrdiff_t)phase * (ptrdiff_t)early_phase_segcount) : ((ptrdiff_t)phase * (ptrdiff_t)late_phase_segcount + split_phase)); @@ -877,7 +876,7 @@ ompi_coll_base_allreduce_intra_ring_segmented(void *sbuf, void *rbuf, int count, * Returns: - MPI_SUCCESS or error code */ int -ompi_coll_base_allreduce_intra_basic_linear(void *sbuf, void *rbuf, int count, +ompi_coll_base_allreduce_intra_basic_linear(const void *sbuf, void *rbuf, int count, struct ompi_datatype_t *dtype, struct ompi_op_t *op, struct ompi_communicator_t *comm, diff --git a/ompi/mca/coll/base/coll_base_alltoall.c b/ompi/mca/coll/base/coll_base_alltoall.c index baed3f3a1b8..cfa9bc3551b 100644 --- a/ompi/mca/coll/base/coll_base_alltoall.c +++ b/ompi/mca/coll/base/coll_base_alltoall.c @@ -3,16 +3,16 @@ * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. - * Copyright (c) 2004-2015 The University of Tennessee and The University + * Copyright (c) 2004-2016 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. - * Copyright (c) 2013 Los Alamos National Security, LLC. All Rights + * Copyright (c) 2013-2016 Los Alamos National Security, LLC. All Rights * reserved. - * Copyright (c) 2014-2015 Research Organization for Information Science + * Copyright (c) 2014-2016 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * @@ -36,17 +36,16 @@ /* MPI_IN_PLACE all to all algorithm. TODO: implement a better one. */ int -mca_coll_base_alltoall_intra_basic_inplace(void *rbuf, int rcount, +mca_coll_base_alltoall_intra_basic_inplace(const void *rbuf, int rcount, struct ompi_datatype_t *rdtype, struct ompi_communicator_t *comm, mca_coll_base_module_t *module) { - mca_coll_base_module_t *base_module = (mca_coll_base_module_t*) module; - int i, j, size, rank, err=MPI_SUCCESS; - MPI_Request *preq; + int i, j, size, rank, err = MPI_SUCCESS, line; + OPAL_PTRDIFF_TYPE ext, gap; + ompi_request_t *req; char *tmp_buffer; size_t max_size; - ptrdiff_t ext; /* Initialize. */ @@ -60,57 +59,58 @@ mca_coll_base_alltoall_intra_basic_inplace(void *rbuf, int rcount, /* Find the largest receive amount */ ompi_datatype_type_extent (rdtype, &ext); - max_size = ext * rcount; + max_size = opal_datatype_span(&rdtype->super, rcount, &gap); + + /* Initiate all send/recv to/from others. */ /* Allocate a temporary buffer */ tmp_buffer = calloc (max_size, 1); if (NULL == tmp_buffer) { return OMPI_ERR_OUT_OF_RESOURCE; } + tmp_buffer -= gap; + max_size = ext * rcount; /* in-place alltoall slow algorithm (but works) */ for (i = 0 ; i < size ; ++i) { for (j = i+1 ; j < size ; ++j) { - /* Initiate all send/recv to/from others. */ - preq = coll_base_comm_get_reqs(base_module->base_data, size * 2); - if (i == rank) { /* Copy the data into the temporary buffer */ err = ompi_datatype_copy_content_same_ddt (rdtype, rcount, tmp_buffer, (char *) rbuf + j * max_size); - if (MPI_SUCCESS != err) { goto error_hndl; } + if (MPI_SUCCESS != err) { line = __LINE__; goto error_hndl; } /* Exchange data with the peer */ err = MCA_PML_CALL(irecv ((char *) rbuf + max_size * j, rcount, rdtype, - j, MCA_COLL_BASE_TAG_ALLTOALL, comm, preq++)); - if (MPI_SUCCESS != err) { goto error_hndl; } + j, MCA_COLL_BASE_TAG_ALLTOALL, comm, &req)); + if (MPI_SUCCESS != err) { line = __LINE__; goto error_hndl; } - err = MCA_PML_CALL(isend ((char *) tmp_buffer, rcount, rdtype, + err = MCA_PML_CALL(send ((char *) tmp_buffer, rcount, rdtype, j, MCA_COLL_BASE_TAG_ALLTOALL, MCA_PML_BASE_SEND_STANDARD, - comm, preq++)); - if (MPI_SUCCESS != err) { goto error_hndl; } + comm)); + if (MPI_SUCCESS != err) { line = __LINE__; goto error_hndl; } } else if (j == rank) { /* Copy the data into the temporary buffer */ err = ompi_datatype_copy_content_same_ddt (rdtype, rcount, tmp_buffer, (char *) rbuf + i * max_size); - if (MPI_SUCCESS != err) { goto error_hndl; } + if (MPI_SUCCESS != err) { line = __LINE__; goto error_hndl; } /* Exchange data with the peer */ err = MCA_PML_CALL(irecv ((char *) rbuf + max_size * i, rcount, rdtype, - i, MCA_COLL_BASE_TAG_ALLTOALL, comm, preq++)); - if (MPI_SUCCESS != err) { goto error_hndl; } + i, MCA_COLL_BASE_TAG_ALLTOALL, comm, &req)); + if (MPI_SUCCESS != err) { line = __LINE__; goto error_hndl; } - err = MCA_PML_CALL(isend ((char *) tmp_buffer, rcount, rdtype, + err = MCA_PML_CALL(send ((char *) tmp_buffer, rcount, rdtype, i, MCA_COLL_BASE_TAG_ALLTOALL, MCA_PML_BASE_SEND_STANDARD, - comm, preq++)); - if (MPI_SUCCESS != err) { goto error_hndl; } + comm)); + if (MPI_SUCCESS != err) { line = __LINE__; goto error_hndl; } } else { continue; } /* Wait for the requests to complete */ - err = ompi_request_wait_all (2, base_module->base_data->mcct_reqs, MPI_STATUSES_IGNORE); - if (MPI_SUCCESS != err) { goto error_hndl; } + err = ompi_request_wait ( &req, MPI_STATUSES_IGNORE); + if (MPI_SUCCESS != err) { line = __LINE__; goto error_hndl; } } } @@ -118,12 +118,18 @@ mca_coll_base_alltoall_intra_basic_inplace(void *rbuf, int rcount, /* Free the temporary buffer */ free (tmp_buffer); - /* All done */ + if( MPI_SUCCESS != err ) { + OPAL_OUTPUT((ompi_coll_base_framework.framework_output, + "%s:%4d\tError occurred %d, rank %2d", __FILE__, line, err, + rank)); + (void)line; // silence compiler warning + } + /* All done */ return err; } -int ompi_coll_base_alltoall_intra_pairwise(void *sbuf, int scount, +int ompi_coll_base_alltoall_intra_pairwise(const void *sbuf, int scount, struct ompi_datatype_t *sdtype, void* rbuf, int rcount, struct ompi_datatype_t *rdtype, @@ -181,7 +187,7 @@ int ompi_coll_base_alltoall_intra_pairwise(void *sbuf, int scount, } -int ompi_coll_base_alltoall_intra_bruck(void *sbuf, int scount, +int ompi_coll_base_alltoall_intra_bruck(const void *sbuf, int scount, struct ompi_datatype_t *sdtype, void* rbuf, int rcount, struct ompi_datatype_t *rdtype, @@ -191,7 +197,7 @@ int ompi_coll_base_alltoall_intra_bruck(void *sbuf, int scount, int i, k, line = -1, rank, size, err = 0; int sendto, recvfrom, distance, *displs = NULL, *blen = NULL; char *tmpbuf = NULL, *tmpbuf_free = NULL; - ptrdiff_t rlb, slb, tlb, sext, rext, tsext; + OPAL_PTRDIFF_TYPE sext, rext, span, gap; struct ompi_datatype_t *new_ddt; if (MPI_IN_PLACE == sbuf) { @@ -205,15 +211,13 @@ int ompi_coll_base_alltoall_intra_bruck(void *sbuf, int scount, OPAL_OUTPUT((ompi_coll_base_framework.framework_output, "coll:base:alltoall_intra_bruck rank %d", rank)); - err = ompi_datatype_get_extent (sdtype, &slb, &sext); - if (err != MPI_SUCCESS) { line = __LINE__; goto err_hndl; } - - err = ompi_datatype_get_true_extent(sdtype, &tlb, &tsext); + err = ompi_datatype_type_extent (sdtype, &sext); if (err != MPI_SUCCESS) { line = __LINE__; goto err_hndl; } - err = ompi_datatype_get_extent (rdtype, &rlb, &rext); + err = ompi_datatype_type_extent (rdtype, &rext); if (err != MPI_SUCCESS) { line = __LINE__; goto err_hndl; } + span = opal_datatype_span(&sdtype->super, (int64_t)size * scount, &gap); displs = (int *) malloc(size * sizeof(int)); if (displs == NULL) { line = __LINE__; err = -1; goto err_hndl; } @@ -221,9 +225,9 @@ int ompi_coll_base_alltoall_intra_bruck(void *sbuf, int scount, if (blen == NULL) { line = __LINE__; err = -1; goto err_hndl; } /* tmp buffer allocation for message data */ - tmpbuf_free = (char *) malloc(tsext + ((ptrdiff_t)scount * (ptrdiff_t)size - 1) * sext); + tmpbuf_free = (char *)malloc(span); if (tmpbuf_free == NULL) { line = __LINE__; err = -1; goto err_hndl; } - tmpbuf = tmpbuf_free - slb; + tmpbuf = tmpbuf_free - gap; /* Step 1 - local rotation - shift up by rank */ err = ompi_datatype_copy_content_same_ddt (sdtype, @@ -324,7 +328,7 @@ int ompi_coll_base_alltoall_intra_bruck(void *sbuf, int scount, * - wait for any request to complete * - replace that request by the new one of the same type. */ -int ompi_coll_base_alltoall_intra_linear_sync(void *sbuf, int scount, +int ompi_coll_base_alltoall_intra_linear_sync(const void *sbuf, int scount, struct ompi_datatype_t *sdtype, void* rbuf, int rcount, struct ompi_datatype_t *rdtype, @@ -332,7 +336,8 @@ int ompi_coll_base_alltoall_intra_linear_sync(void *sbuf, int scount, mca_coll_base_module_t *module, int max_outstanding_reqs) { - int line, error, ri, si, rank, size, nreqs, nrreqs, nsreqs, total_reqs; + int line, error, ri, si, rank, size, nrreqs, nsreqs, total_reqs; + int nreqs = 0; char *psnd, *prcv; ptrdiff_t slb, sext, rlb, rext; @@ -383,29 +388,30 @@ int ompi_coll_base_alltoall_intra_linear_sync(void *sbuf, int scount, total_reqs = (((max_outstanding_reqs > (size - 1)) || (max_outstanding_reqs <= 0)) ? (size - 1) : (max_outstanding_reqs)); - reqs = (ompi_request_t**) malloc( 2 * total_reqs * - sizeof(ompi_request_t*)); - if (NULL == reqs) { error = -1; line = __LINE__; goto error_hndl; } + if (0 < total_reqs) { + reqs = coll_base_comm_get_reqs(module->base_data, 2 * total_reqs); + if (NULL == reqs) { error = -1; line = __LINE__; goto error_hndl; } + } prcv = (char *) rbuf; psnd = (char *) sbuf; /* Post first batch or ireceive and isend requests */ for (nreqs = 0, nrreqs = 0, ri = (rank + 1) % size; nreqs < total_reqs; - ri = (ri + 1) % size, ++nreqs, ++nrreqs) { - error = - MCA_PML_CALL(irecv - (prcv + (ptrdiff_t)ri * rext, rcount, rdtype, ri, - MCA_COLL_BASE_TAG_ALLTOALL, comm, &reqs[nreqs])); + ri = (ri + 1) % size, ++nrreqs) { + nreqs++; + error = MCA_PML_CALL(irecv + (prcv + (ptrdiff_t)ri * rext, rcount, rdtype, ri, + MCA_COLL_BASE_TAG_ALLTOALL, comm, &reqs[nreqs])); if (MPI_SUCCESS != error) { line = __LINE__; goto error_hndl; } } - for ( nsreqs = 0, si = (rank + size - 1) % size; nreqs < 2 * total_reqs; - si = (si + size - 1) % size, ++nreqs, ++nsreqs) { - error = - MCA_PML_CALL(isend - (psnd + (ptrdiff_t)si * sext, scount, sdtype, si, - MCA_COLL_BASE_TAG_ALLTOALL, - MCA_PML_BASE_SEND_STANDARD, comm, &reqs[nreqs])); + for (nsreqs = 0, si = (rank + size - 1) % size; nreqs < 2 * total_reqs; + si = (si + size - 1) % size, ++nsreqs) { + nreqs++; + error = MCA_PML_CALL(isend + (psnd + (ptrdiff_t)si * sext, scount, sdtype, si, + MCA_COLL_BASE_TAG_ALLTOALL, + MCA_PML_BASE_SEND_STANDARD, comm, &reqs[nreqs])); if (MPI_SUCCESS != error) { line = __LINE__; goto error_hndl; } } @@ -433,11 +439,10 @@ int ompi_coll_base_alltoall_intra_linear_sync(void *sbuf, int scount, ncreqs++; if (completed < total_reqs) { if (nrreqs < (size - 1)) { - error = - MCA_PML_CALL(irecv - (prcv + (ptrdiff_t)ri * rext, rcount, rdtype, ri, - MCA_COLL_BASE_TAG_ALLTOALL, comm, - &reqs[completed])); + error = MCA_PML_CALL(irecv + (prcv + (ptrdiff_t)ri * rext, rcount, rdtype, ri, + MCA_COLL_BASE_TAG_ALLTOALL, comm, + &reqs[completed])); if (MPI_SUCCESS != error) { line = __LINE__; goto error_hndl; } ++nrreqs; ri = (ri + 1) % size; @@ -449,6 +454,7 @@ int ompi_coll_base_alltoall_intra_linear_sync(void *sbuf, int scount, MCA_COLL_BASE_TAG_ALLTOALL, MCA_PML_BASE_SEND_STANDARD, comm, &reqs[completed])); + if (MPI_SUCCESS != error) { line = __LINE__; goto error_hndl; } ++nsreqs; si = (si + size - 1) % size; } @@ -456,9 +462,6 @@ int ompi_coll_base_alltoall_intra_linear_sync(void *sbuf, int scount, } } - /* Free the reqs */ - free(reqs); - /* All done */ return MPI_SUCCESS; @@ -466,12 +469,12 @@ int ompi_coll_base_alltoall_intra_linear_sync(void *sbuf, int scount, OPAL_OUTPUT((ompi_coll_base_framework.framework_output, "%s:%4d\tError occurred %d, rank %2d", __FILE__, line, error, rank)); - if (NULL != reqs) free(reqs); + ompi_coll_base_free_reqs(reqs, nreqs); return error; } -int ompi_coll_base_alltoall_intra_two_procs(void *sbuf, int scount, +int ompi_coll_base_alltoall_intra_two_procs(const void *sbuf, int scount, struct ompi_datatype_t *sdtype, void* rbuf, int rcount, struct ompi_datatype_t *rdtype, @@ -492,6 +495,10 @@ int ompi_coll_base_alltoall_intra_two_procs(void *sbuf, int scount, OPAL_OUTPUT((ompi_coll_base_framework.framework_output, "ompi_coll_base_alltoall_intra_two_procs rank %d", rank)); + if (2 != ompi_comm_size(comm)) { + return MPI_ERR_UNSUPPORTED_OPERATION; + } + err = ompi_datatype_get_extent (sdtype, &lb, &sext); if (err != MPI_SUCCESS) { line = __LINE__; goto err_hndl; } @@ -545,14 +552,15 @@ int ompi_coll_base_alltoall_intra_two_procs(void *sbuf, int scount, /* copied function (with appropriate renaming) starts here */ -int ompi_coll_base_alltoall_intra_basic_linear(void *sbuf, int scount, +int ompi_coll_base_alltoall_intra_basic_linear(const void *sbuf, int scount, struct ompi_datatype_t *sdtype, void* rbuf, int rcount, struct ompi_datatype_t *rdtype, struct ompi_communicator_t *comm, mca_coll_base_module_t *module) { - int i, rank, size, err, nreqs; + int i, rank, size, err, line; + int nreqs = 0; char *psnd, *prcv; MPI_Aint lb, sndinc, rcvinc; ompi_request_t **req, **sreq, **rreq; @@ -603,6 +611,7 @@ int ompi_coll_base_alltoall_intra_basic_linear(void *sbuf, int scount, /* Initiate all send/recv to/from others. */ req = rreq = coll_base_comm_get_reqs(data, (size - 1) * 2); + if (NULL == req) { err = OMPI_ERR_OUT_OF_RESOURCE; line = __LINE__; goto err_hndl; } prcv = (char *) rbuf; psnd = (char *) sbuf; @@ -610,14 +619,12 @@ int ompi_coll_base_alltoall_intra_basic_linear(void *sbuf, int scount, /* Post all receives first -- a simple optimization */ for (nreqs = 0, i = (rank + 1) % size; i != rank; - i = (i + 1) % size, ++rreq, ++nreqs) { + i = (i + 1) % size, ++rreq) { + nreqs++; err = MCA_PML_CALL(irecv_init (prcv + (ptrdiff_t)i * rcvinc, rcount, rdtype, i, MCA_COLL_BASE_TAG_ALLTOALL, comm, rreq)); - if (MPI_SUCCESS != err) { - ompi_coll_base_free_reqs(req, nreqs); - return err; - } + if (MPI_SUCCESS != err) { line = __LINE__; goto err_hndl; } } /* Now post all sends in reverse order @@ -626,15 +633,13 @@ int ompi_coll_base_alltoall_intra_basic_linear(void *sbuf, int scount, */ sreq = rreq; for (i = (rank + size - 1) % size; i != rank; - i = (i + size - 1) % size, ++sreq, ++nreqs) { + i = (i + size - 1) % size, ++sreq) { + nreqs++; err = MCA_PML_CALL(isend_init (psnd + (ptrdiff_t)i * sndinc, scount, sdtype, i, MCA_COLL_BASE_TAG_ALLTOALL, MCA_PML_BASE_SEND_STANDARD, comm, sreq)); - if (MPI_SUCCESS != err) { - ompi_coll_base_free_reqs(req, nreqs); - return err; - } + if (MPI_SUCCESS != err) { line = __LINE__; goto err_hndl; } } /* Start your engines. This will never return an error. */ @@ -650,7 +655,12 @@ int ompi_coll_base_alltoall_intra_basic_linear(void *sbuf, int scount, err = ompi_request_wait_all(nreqs, req, MPI_STATUSES_IGNORE); - /* Free the reqs */ + err_hndl: + if( MPI_SUCCESS != err ) { + OPAL_OUTPUT( (ompi_coll_base_framework.framework_output,"%s:%4d\tError occurred %d, rank %2d", + __FILE__, line, err, rank) ); + } + /* Free the reqs in all cases as they are persistent requests */ ompi_coll_base_free_reqs(req, nreqs); /* All done */ diff --git a/ompi/mca/coll/base/coll_base_alltoallv.c b/ompi/mca/coll/base/coll_base_alltoallv.c index 39774b989fb..efe054d696f 100644 --- a/ompi/mca/coll/base/coll_base_alltoallv.c +++ b/ompi/mca/coll/base/coll_base_alltoallv.c @@ -3,7 +3,7 @@ * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. - * Copyright (c) 2004-2015 The University of Tennessee and The University + * Copyright (c) 2004-2016 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, @@ -14,7 +14,7 @@ * Copyright (c) 2013 Los Alamos National Security, LLC. All Rights * reserved. * Copyright (c) 2013 FUJITSU LIMITED. All rights reserved. - * Copyright (c) 2014 Research Organization for Information Science + * Copyright (c) 2014-2016 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * @@ -37,17 +37,16 @@ #include "coll_base_util.h" int -mca_coll_base_alltoallv_intra_basic_inplace(void *rbuf, const int *rcounts, const int *rdisps, - struct ompi_datatype_t *rdtype, - struct ompi_communicator_t *comm, - mca_coll_base_module_t *module) +mca_coll_base_alltoallv_intra_basic_inplace(const void *rbuf, const int *rcounts, const int *rdisps, + struct ompi_datatype_t *rdtype, + struct ompi_communicator_t *comm, + mca_coll_base_module_t *module) { - mca_coll_base_module_t *base_module = (mca_coll_base_module_t*) module; int i, j, size, rank, err=MPI_SUCCESS; - MPI_Request *preq; - char *tmp_buffer; + ompi_request_t *req; + char *allocated_buffer, *tmp_buffer; size_t max_size, rdtype_size; - ptrdiff_t ext; + OPAL_PTRDIFF_TYPE ext, gap; /* Initialize. */ @@ -63,23 +62,22 @@ mca_coll_base_alltoallv_intra_basic_inplace(void *rbuf, const int *rcounts, cons /* Find the largest receive amount */ ompi_datatype_type_extent (rdtype, &ext); for (i = 0, max_size = 0 ; i < size ; ++i) { - size_t size = ext * rcounts[i]; - + size_t size = opal_datatype_span(&rdtype->super, rcounts[i], &gap); max_size = size > max_size ? size : max_size; } + /* The gap will always be the same as we are working on the same datatype */ /* Allocate a temporary buffer */ - tmp_buffer = calloc (max_size, 1); - if (NULL == tmp_buffer) { + allocated_buffer = calloc (max_size, 1); + if (NULL == allocated_buffer) { return OMPI_ERR_OUT_OF_RESOURCE; } + tmp_buffer = allocated_buffer - gap; + /* Initiate all send/recv to/from others. */ /* in-place alltoallv slow algorithm (but works) */ for (i = 0 ; i < size ; ++i) { for (j = i+1 ; j < size ; ++j) { - /* Initiate all send/recv to/from others. */ - preq = coll_base_comm_get_reqs(base_module->base_data, 2); - if (i == rank && rcounts[j]) { /* Copy the data into the temporary buffer */ err = ompi_datatype_copy_content_same_ddt (rdtype, rcounts[j], @@ -88,12 +86,12 @@ mca_coll_base_alltoallv_intra_basic_inplace(void *rbuf, const int *rcounts, cons /* Exchange data with the peer */ err = MCA_PML_CALL(irecv ((char *) rbuf + rdisps[j] * ext, rcounts[j], rdtype, - j, MCA_COLL_BASE_TAG_ALLTOALLV, comm, preq++)); + j, MCA_COLL_BASE_TAG_ALLTOALLV, comm, &req)); if (MPI_SUCCESS != err) { goto error_hndl; } - err = MCA_PML_CALL(isend ((void *) tmp_buffer, rcounts[j], rdtype, + err = MCA_PML_CALL(send ((void *) tmp_buffer, rcounts[j], rdtype, j, MCA_COLL_BASE_TAG_ALLTOALLV, MCA_PML_BASE_SEND_STANDARD, - comm, preq++)); + comm)); if (MPI_SUCCESS != err) { goto error_hndl; } } else if (j == rank && rcounts[i]) { /* Copy the data into the temporary buffer */ @@ -103,36 +101,35 @@ mca_coll_base_alltoallv_intra_basic_inplace(void *rbuf, const int *rcounts, cons /* Exchange data with the peer */ err = MCA_PML_CALL(irecv ((char *) rbuf + rdisps[i] * ext, rcounts[i], rdtype, - i, MCA_COLL_BASE_TAG_ALLTOALLV, comm, preq++)); + i, MCA_COLL_BASE_TAG_ALLTOALLV, comm, &req)); if (MPI_SUCCESS != err) { goto error_hndl; } - err = MCA_PML_CALL(isend ((void *) tmp_buffer, rcounts[i], rdtype, + err = MCA_PML_CALL(send ((void *) tmp_buffer, rcounts[i], rdtype, i, MCA_COLL_BASE_TAG_ALLTOALLV, MCA_PML_BASE_SEND_STANDARD, - comm, preq++)); + comm)); if (MPI_SUCCESS != err) { goto error_hndl; } } else { continue; } /* Wait for the requests to complete */ - err = ompi_request_wait_all (2, base_module->base_data->mcct_reqs, MPI_STATUSES_IGNORE); + err = ompi_request_wait (&req, MPI_STATUSES_IGNORE); if (MPI_SUCCESS != err) { goto error_hndl; } } } error_hndl: /* Free the temporary buffer */ - free (tmp_buffer); + free (allocated_buffer); /* All done */ - return err; } int -ompi_coll_base_alltoallv_intra_pairwise(void *sbuf, int *scounts, int *sdisps, +ompi_coll_base_alltoallv_intra_pairwise(const void *sbuf, const int *scounts, const int *sdisps, struct ompi_datatype_t *sdtype, - void* rbuf, int *rcounts, int *rdisps, + void* rbuf, const int *rcounts, const int *rdisps, struct ompi_datatype_t *rdtype, struct ompi_communicator_t *comm, mca_coll_base_module_t *module) @@ -193,9 +190,9 @@ ompi_coll_base_alltoallv_intra_pairwise(void *sbuf, int *scounts, int *sdisps, * differently and so will not have to duplicate code. */ int -ompi_coll_base_alltoallv_intra_basic_linear(void *sbuf, int *scounts, int *sdisps, +ompi_coll_base_alltoallv_intra_basic_linear(const void *sbuf, const int *scounts, const int *sdisps, struct ompi_datatype_t *sdtype, - void *rbuf, int *rcounts, int *rdisps, + void *rbuf, const int *rcounts, const int *rdisps, struct ompi_datatype_t *rdtype, struct ompi_communicator_t *comm, mca_coll_base_module_t *module) @@ -203,7 +200,7 @@ ompi_coll_base_alltoallv_intra_basic_linear(void *sbuf, int *scounts, int *sdisp int i, size, rank, err, nreqs; char *psnd, *prcv; ptrdiff_t sext, rext; - MPI_Request *preq; + ompi_request_t **preq, **reqs; mca_coll_base_module_t *base_module = (mca_coll_base_module_t*) module; mca_coll_base_comm_t *data = base_module->base_data; @@ -239,7 +236,8 @@ ompi_coll_base_alltoallv_intra_basic_linear(void *sbuf, int *scounts, int *sdisp /* Now, initiate all send/recv to/from others. */ nreqs = 0; - preq = coll_base_comm_get_reqs(data, 2 * size); + reqs = preq = coll_base_comm_get_reqs(data, 2 * size); + if( NULL == reqs ) { err = OMPI_ERR_OUT_OF_RESOURCE; goto err_hndl; } /* Post all receives first */ for (i = 0; i < size; ++i) { @@ -247,15 +245,12 @@ ompi_coll_base_alltoallv_intra_basic_linear(void *sbuf, int *scounts, int *sdisp continue; } + ++nreqs; prcv = ((char *) rbuf) + (ptrdiff_t)rdisps[i] * rext; err = MCA_PML_CALL(irecv_init(prcv, rcounts[i], rdtype, i, MCA_COLL_BASE_TAG_ALLTOALLV, comm, preq++)); - ++nreqs; - if (MPI_SUCCESS != err) { - ompi_coll_base_free_reqs(data->mcct_reqs, nreqs); - return err; - } + if (MPI_SUCCESS != err) { goto err_hndl; } } /* Now post all sends */ @@ -264,20 +259,17 @@ ompi_coll_base_alltoallv_intra_basic_linear(void *sbuf, int *scounts, int *sdisp continue; } + ++nreqs; psnd = ((char *) sbuf) + (ptrdiff_t)sdisps[i] * sext; err = MCA_PML_CALL(isend_init(psnd, scounts[i], sdtype, i, MCA_COLL_BASE_TAG_ALLTOALLV, MCA_PML_BASE_SEND_STANDARD, comm, preq++)); - ++nreqs; - if (MPI_SUCCESS != err) { - ompi_coll_base_free_reqs(data->mcct_reqs, nreqs); - return err; - } + if (MPI_SUCCESS != err) { goto err_hndl; } } /* Start your engines. This will never return an error. */ - MCA_PML_CALL(start(nreqs, data->mcct_reqs)); + MCA_PML_CALL(start(nreqs, reqs)); /* Wait for them all. If there's an error, note that we don't care * what the error was -- just that there *was* an error. The PML @@ -285,11 +277,11 @@ ompi_coll_base_alltoallv_intra_basic_linear(void *sbuf, int *scounts, int *sdisp * i.e., by the end of this call, all the requests are free-able. * So free them anyway -- even if there was an error, and return the * error after we free everything. */ - err = ompi_request_wait_all(nreqs, data->mcct_reqs, - MPI_STATUSES_IGNORE); + err = ompi_request_wait_all(nreqs, reqs, MPI_STATUSES_IGNORE); - /* Free the requests. */ - ompi_coll_base_free_reqs(data->mcct_reqs, nreqs); + err_hndl: + /* Free the requests in all cases as they are persistent */ + ompi_coll_base_free_reqs(reqs, nreqs); return err; } diff --git a/ompi/mca/coll/base/coll_base_barrier.c b/ompi/mca/coll/base/coll_base_barrier.c index 1896f23e225..5ebae30b788 100644 --- a/ompi/mca/coll/base/coll_base_barrier.c +++ b/ompi/mca/coll/base/coll_base_barrier.c @@ -3,7 +3,7 @@ * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. - * Copyright (c) 2004-2015 The University of Tennessee and The University + * Copyright (c) 2004-2016 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, @@ -13,7 +13,7 @@ * Copyright (c) 2008 Sun Microsystems, Inc. All rights reserved. * Copyright (c) 2013 Los Alamos National Security, LLC. All Rights * reserved. - * Copyright (c) 2015 Research Organization for Information Science + * Copyright (c) 2015-2016 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * @@ -41,52 +41,40 @@ * signal a two peer synchronization. */ static inline int -ompi_coll_base_sendrecv_zero(int dest, int stag, +ompi_coll_base_sendrecv_zero( int dest, int stag, int source, int rtag, - MPI_Comm comm) + MPI_Comm comm ) { - int err, line = 0; - ompi_request_t* reqs[2]; - ompi_status_public_t statuses[2]; + int rc, line = 0; + ompi_request_t *req = MPI_REQUEST_NULL; + ompi_status_public_t status; /* post new irecv */ - err = MCA_PML_CALL(irecv( NULL, 0, MPI_BYTE, source, rtag, - comm, &reqs[0])); - if (err != MPI_SUCCESS) { line = __LINE__; goto error_handler; } + rc = MCA_PML_CALL(irecv( NULL, 0, MPI_BYTE, source, rtag, + comm, &req )); + if( MPI_SUCCESS != rc ) { line = __LINE__; goto error_handler; } /* send data to children */ - err = MCA_PML_CALL(isend( NULL, 0, MPI_BYTE, dest, stag, - MCA_PML_BASE_SEND_STANDARD, comm, &reqs[1])); - if (err != MPI_SUCCESS) { line = __LINE__; goto error_handler; } - - err = ompi_request_wait_all( 2, reqs, statuses ); - if( MPI_ERR_IN_STATUS == err ) { - /* As we use wait_all we will get MPI_ERR_IN_STATUS which is not an error - * code that we can propagate up the stack. Instead, look for the real - * error code from the MPI_ERROR in the status. - */ - int err_index = 0; - if( MPI_SUCCESS == statuses[0].MPI_ERROR ) { - err_index = 1; - } - err = statuses[err_index].MPI_ERROR; - OPAL_OUTPUT ((ompi_coll_base_framework.framework_output, "%s:%d: Error %d occurred in the %s" - " stage of ompi_coll_base_sendrecv_zero\n", - __FILE__, line, err, (0 == err_index ? "receive" : "send"))); - return err; - } - if (err != MPI_SUCCESS) { line = __LINE__; goto error_handler; } + rc = MCA_PML_CALL(send( NULL, 0, MPI_BYTE, dest, stag, + MCA_PML_BASE_SEND_STANDARD, comm )); + if( MPI_SUCCESS != rc ) { line = __LINE__; goto error_handler; } + + rc = ompi_request_wait( &req, &status ); + if( MPI_SUCCESS != rc ) { line = __LINE__; goto error_handler; } return (MPI_SUCCESS); error_handler: - /* Error discovered during the posting of the irecv or isend, - * and no status is available. - */ + if( MPI_REQUEST_NULL != req ) { /* cancel and complete the receive request */ + (void)ompi_request_cancel(req); + (void)ompi_request_wait(&req, &status); + } + OPAL_OUTPUT ((ompi_coll_base_framework.framework_output, "%s:%d: Error %d occurred\n", - __FILE__, line, err)); - return err; + __FILE__, line, rc)); + (void)line; // silence compiler warning + return rc; } /* @@ -198,8 +186,8 @@ int ompi_coll_base_barrier_intra_recursivedoubling(struct ompi_communicator_t *c /* send message to lower ranked node */ remote = rank - adjsize; err = ompi_coll_base_sendrecv_zero(remote, MCA_COLL_BASE_TAG_BARRIER, - remote, MCA_COLL_BASE_TAG_BARRIER, - comm); + remote, MCA_COLL_BASE_TAG_BARRIER, + comm); if (err != MPI_SUCCESS) { line = __LINE__; goto err_hndl;} } else if (rank < (size - adjsize)) { @@ -223,8 +211,8 @@ int ompi_coll_base_barrier_intra_recursivedoubling(struct ompi_communicator_t *c /* post receive from the remote node */ err = ompi_coll_base_sendrecv_zero(remote, MCA_COLL_BASE_TAG_BARRIER, - remote, MCA_COLL_BASE_TAG_BARRIER, - comm); + remote, MCA_COLL_BASE_TAG_BARRIER, + comm); if (err != MPI_SUCCESS) { line = __LINE__; goto err_hndl;} } } @@ -272,8 +260,8 @@ int ompi_coll_base_barrier_intra_bruck(struct ompi_communicator_t *comm, /* send message to lower ranked node */ err = ompi_coll_base_sendrecv_zero(to, MCA_COLL_BASE_TAG_BARRIER, - from, MCA_COLL_BASE_TAG_BARRIER, - comm); + from, MCA_COLL_BASE_TAG_BARRIER, + comm); if (err != MPI_SUCCESS) { line = __LINE__; goto err_hndl;} } @@ -298,11 +286,16 @@ int ompi_coll_base_barrier_intra_two_procs(struct ompi_communicator_t *comm, remote = ompi_comm_rank(comm); OPAL_OUTPUT((ompi_coll_base_framework.framework_output, "ompi_coll_base_barrier_intra_two_procs rank %d", remote)); + + if (2 != ompi_comm_size(comm)) { + return MPI_ERR_UNSUPPORTED_OPERATION; + } + remote = (remote + 1) & 0x1; err = ompi_coll_base_sendrecv_zero(remote, MCA_COLL_BASE_TAG_BARRIER, - remote, MCA_COLL_BASE_TAG_BARRIER, - comm); + remote, MCA_COLL_BASE_TAG_BARRIER, + comm); return (err); } @@ -324,7 +317,8 @@ int ompi_coll_base_barrier_intra_two_procs(struct ompi_communicator_t *comm, int ompi_coll_base_barrier_intra_basic_linear(struct ompi_communicator_t *comm, mca_coll_base_module_t *module) { - int i, err, rank, size; + int i, err, rank, size, line; + ompi_request_t** requests = NULL; rank = ompi_comm_rank(comm); size = ompi_comm_size(comm); @@ -334,50 +328,46 @@ int ompi_coll_base_barrier_intra_basic_linear(struct ompi_communicator_t *comm, err = MCA_PML_CALL(send (NULL, 0, MPI_BYTE, 0, MCA_COLL_BASE_TAG_BARRIER, MCA_PML_BASE_SEND_STANDARD, comm)); - if (MPI_SUCCESS != err) { - return err; - } + if (MPI_SUCCESS != err) { line = __LINE__; goto err_hndl; } err = MCA_PML_CALL(recv (NULL, 0, MPI_BYTE, 0, MCA_COLL_BASE_TAG_BARRIER, comm, MPI_STATUS_IGNORE)); - if (MPI_SUCCESS != err) { - return err; - } + if (MPI_SUCCESS != err) { line = __LINE__; goto err_hndl; } } /* The root collects and broadcasts the messages. */ else { - ompi_request_t** requests; + requests = coll_base_comm_get_reqs(module->base_data, size); + if( NULL == requests ) { err = OMPI_ERR_OUT_OF_RESOURCE; line = __LINE__; goto err_hndl; } - requests = (ompi_request_t**)malloc( size * sizeof(ompi_request_t*) ); for (i = 1; i < size; ++i) { err = MCA_PML_CALL(irecv(NULL, 0, MPI_BYTE, MPI_ANY_SOURCE, MCA_COLL_BASE_TAG_BARRIER, comm, &(requests[i]))); - if (MPI_SUCCESS != err) { - return err; - } + if (MPI_SUCCESS != err) { line = __LINE__; goto err_hndl; } } - ompi_request_wait_all( size-1, requests+1, MPI_STATUSES_IGNORE ); + err = ompi_request_wait_all( size-1, requests+1, MPI_STATUSES_IGNORE ); + if (MPI_SUCCESS != err) { line = __LINE__; goto err_hndl; } + requests = NULL; /* we're done the requests array is clean */ for (i = 1; i < size; ++i) { err = MCA_PML_CALL(send(NULL, 0, MPI_BYTE, i, MCA_COLL_BASE_TAG_BARRIER, MCA_PML_BASE_SEND_STANDARD, comm)); - if (MPI_SUCCESS != err) { - return err; - } + if (MPI_SUCCESS != err) { line = __LINE__; goto err_hndl; } } - - free( requests ); } /* All done */ - return MPI_SUCCESS; - + err_hndl: + OPAL_OUTPUT( (ompi_coll_base_framework.framework_output,"%s:%4d\tError occurred %d, rank %2d", + __FILE__, line, err, rank) ); + if( NULL != requests ) + ompi_coll_base_free_reqs(requests, size); + return err; } /* copied function (with appropriate renaming) ends here */ diff --git a/ompi/mca/coll/base/coll_base_bcast.c b/ompi/mca/coll/base/coll_base_bcast.c index 0480b95f8d1..429674123f7 100644 --- a/ompi/mca/coll/base/coll_base_bcast.c +++ b/ompi/mca/coll/base/coll_base_bcast.c @@ -3,14 +3,16 @@ * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. - * Copyright (c) 2004-2015 The University of Tennessee and The University + * Copyright (c) 2004-2016 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. - * Copyright (c) 2012 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2012 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2016 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -48,9 +50,7 @@ ompi_coll_base_bcast_intra_generic( void* buffer, char *tmpbuf; ptrdiff_t extent, lb; ompi_request_t *recv_reqs[2] = {MPI_REQUEST_NULL, MPI_REQUEST_NULL}; -#if !defined(COLL_BASE_BCAST_USE_BLOCKING) ompi_request_t **send_reqs = NULL; -#endif size = ompi_comm_size(comm); rank = ompi_comm_rank(comm); @@ -64,12 +64,10 @@ ompi_coll_base_bcast_intra_generic( void* buffer, /* Set the buffer pointers */ tmpbuf = (char *) buffer; -#if !defined(COLL_BASE_BCAST_USE_BLOCKING) if( tree->tree_nextsize != 0 ) { - send_reqs = (ompi_request_t**)malloc( (ptrdiff_t)tree->tree_nextsize * - sizeof(ompi_request_t*) ); + send_reqs = coll_base_comm_get_reqs(module->base_data, tree->tree_nextsize); + if( NULL == send_reqs ) { err = OMPI_ERR_OUT_OF_RESOURCE; line = __LINE__; goto error_hndl; } } -#endif /* Root code */ if( rank == root ) { @@ -84,27 +82,18 @@ ompi_coll_base_bcast_intra_generic( void* buffer, sendcount = original_count - segindex * count_by_segment; } for( i = 0; i < tree->tree_nextsize; i++ ) { -#if defined(COLL_BASE_BCAST_USE_BLOCKING) - err = MCA_PML_CALL(send(tmpbuf, sendcount, datatype, - tree->tree_next[i], - MCA_COLL_BASE_TAG_BCAST, - MCA_PML_BASE_SEND_STANDARD, comm)); -#else err = MCA_PML_CALL(isend(tmpbuf, sendcount, datatype, tree->tree_next[i], MCA_COLL_BASE_TAG_BCAST, MCA_PML_BASE_SEND_STANDARD, comm, &send_reqs[i])); -#endif /* COLL_BASE_BCAST_USE_BLOCKING */ if (err != MPI_SUCCESS) { line = __LINE__; goto error_hndl; } } -#if !defined(COLL_BASE_BCAST_USE_BLOCKING) /* complete the sends before starting the next sends */ err = ompi_request_wait_all( tree->tree_nextsize, send_reqs, MPI_STATUSES_IGNORE ); if (err != MPI_SUCCESS) { line = __LINE__; goto error_hndl; } -#endif /* not COLL_BASE_BCAST_USE_BLOCKING */ /* update tmp buffer */ tmpbuf += realsegsize; @@ -144,31 +133,22 @@ ompi_coll_base_bcast_intra_generic( void* buffer, /* wait for and forward the previous segment to children */ err = ompi_request_wait( &recv_reqs[req_index ^ 0x1], - MPI_STATUSES_IGNORE ); + MPI_STATUS_IGNORE ); if (err != MPI_SUCCESS) { line = __LINE__; goto error_hndl; } for( i = 0; i < tree->tree_nextsize; i++ ) { -#if defined(COLL_BASE_BCAST_USE_BLOCKING) - err = MCA_PML_CALL(send(tmpbuf, count_by_segment, datatype, - tree->tree_next[i], - MCA_COLL_BASE_TAG_BCAST, - MCA_PML_BASE_SEND_STANDARD, comm)); -#else err = MCA_PML_CALL(isend(tmpbuf, count_by_segment, datatype, tree->tree_next[i], MCA_COLL_BASE_TAG_BCAST, MCA_PML_BASE_SEND_STANDARD, comm, &send_reqs[i])); -#endif /* COLL_BASE_BCAST_USE_BLOCKING */ if (err != MPI_SUCCESS) { line = __LINE__; goto error_hndl; } } -#if !defined(COLL_BASE_BCAST_USE_BLOCKING) /* complete the sends before starting the next iteration */ err = ompi_request_wait_all( tree->tree_nextsize, send_reqs, MPI_STATUSES_IGNORE ); if (err != MPI_SUCCESS) { line = __LINE__; goto error_hndl; } -#endif /* COLL_BASE_BCAST_USE_BLOCKING */ /* Update the receive buffer */ tmpbuf += realsegsize; @@ -176,30 +156,21 @@ ompi_coll_base_bcast_intra_generic( void* buffer, } /* Process the last segment */ - err = ompi_request_wait( &recv_reqs[req_index], MPI_STATUSES_IGNORE ); + err = ompi_request_wait( &recv_reqs[req_index], MPI_STATUS_IGNORE ); if (err != MPI_SUCCESS) { line = __LINE__; goto error_hndl; } sendcount = original_count - (ptrdiff_t)(num_segments - 1) * count_by_segment; for( i = 0; i < tree->tree_nextsize; i++ ) { -#if defined(COLL_BASE_BCAST_USE_BLOCKING) - err = MCA_PML_CALL(send(tmpbuf, sendcount, datatype, - tree->tree_next[i], - MCA_COLL_BASE_TAG_BCAST, - MCA_PML_BASE_SEND_STANDARD, comm)); -#else err = MCA_PML_CALL(isend(tmpbuf, sendcount, datatype, tree->tree_next[i], MCA_COLL_BASE_TAG_BCAST, MCA_PML_BASE_SEND_STANDARD, comm, &send_reqs[i])); -#endif /* COLL_BASE_BCAST_USE_BLOCKING */ if (err != MPI_SUCCESS) { line = __LINE__; goto error_hndl; } } -#if !defined(COLL_BASE_BCAST_USE_BLOCKING) err = ompi_request_wait_all( tree->tree_nextsize, send_reqs, MPI_STATUSES_IGNORE ); if (err != MPI_SUCCESS) { line = __LINE__; goto error_hndl; } -#endif /* COLL_BASE_BCAST_USE_BLOCKING */ } /* Leaf nodes */ @@ -236,19 +207,17 @@ ompi_coll_base_bcast_intra_generic( void* buffer, if (err != MPI_SUCCESS) { line = __LINE__; goto error_hndl; } } -#if !defined(COLL_BASE_BCAST_USE_BLOCKING) - if( NULL != send_reqs ) free(send_reqs); -#endif - return (MPI_SUCCESS); error_hndl: OPAL_OUTPUT( (ompi_coll_base_framework.framework_output,"%s:%4d\tError occurred %d, rank %2d", __FILE__, line, err, rank) ); -#if !defined(COLL_BASE_BCAST_USE_BLOCKING) - if( NULL != send_reqs ) free(send_reqs); -#endif - return (err); + ompi_coll_base_free_reqs( recv_reqs, 2); + if( NULL != send_reqs ) { + ompi_coll_base_free_reqs(send_reqs, tree->tree_nextsize); + } + + return err; } int @@ -382,7 +351,6 @@ ompi_coll_base_bcast_intra_split_bintree ( void* buffer, ptrdiff_t type_extent, lb; ompi_request_t *base_req, *new_req; ompi_coll_tree_t *tree; - mca_coll_base_comm_t *data = module->base_data; size = ompi_comm_size(comm); rank = ompi_comm_rank(comm); @@ -395,7 +363,7 @@ ompi_coll_base_bcast_intra_split_bintree ( void* buffer, /* setup the binary tree topology. */ COLL_BASE_UPDATE_BINTREE( comm, module, root ); - tree = data->cached_bintree; + tree = module->base_data->cached_bintree; err = ompi_datatype_type_size( datatype, &type_size ); @@ -505,8 +473,8 @@ ompi_coll_base_bcast_intra_split_bintree ( void* buffer, comm, &new_req)); if (err != MPI_SUCCESS) { line = __LINE__; goto error_hndl; } - /* wait for and forward current segment */ - err = ompi_request_wait_all( 1, &base_req, MPI_STATUSES_IGNORE ); + /* wait for and forward the previous segment */ + err = ompi_request_wait( &base_req, MPI_STATUS_IGNORE ); for( i = 0; i < tree->tree_nextsize; i++ ) { /* send data to children (segcount[lr]) */ err = MCA_PML_CALL(send( tmpbuf[lr], segcount[lr], datatype, tree->tree_next[i], MCA_COLL_BASE_TAG_BCAST, @@ -521,7 +489,7 @@ ompi_coll_base_bcast_intra_split_bintree ( void* buffer, } /* end of for segindex */ /* wait for the last segment and forward current segment */ - err = ompi_request_wait_all( 1, &base_req, MPI_STATUSES_IGNORE ); + err = ompi_request_wait( &base_req, MPI_STATUS_IGNORE ); for( i = 0; i < tree->tree_nextsize; i++ ) { /* send data to children */ err = MCA_PML_CALL(send(tmpbuf[lr], sendcount[lr], datatype, tree->tree_next[i], MCA_COLL_BASE_TAG_BCAST, @@ -637,15 +605,15 @@ ompi_coll_base_bcast_intra_basic_linear(void *buff, int count, mca_coll_base_module_t *module) { int i, size, rank, err; - mca_coll_base_comm_t *data = module->base_data; ompi_request_t **preq, **reqs; - size = ompi_comm_size(comm); rank = ompi_comm_rank(comm); OPAL_OUTPUT((ompi_coll_base_framework.framework_output,"ompi_coll_base_bcast_intra_basic_linear rank %d root %d", rank, root)); + if (1 == size) return OMPI_SUCCESS; + /* Non-root receive the data. */ if (rank != root) { @@ -655,27 +623,22 @@ ompi_coll_base_bcast_intra_basic_linear(void *buff, int count, } /* Root sends data to all others. */ - preq = reqs = coll_base_comm_get_reqs(data, size-1); + preq = reqs = coll_base_comm_get_reqs(module->base_data, size-1); + if( NULL == reqs ) { err = OMPI_ERR_OUT_OF_RESOURCE; goto err_hndl; } + for (i = 0; i < size; ++i) { if (i == rank) { continue; } - err = MCA_PML_CALL(isend_init(buff, count, datatype, i, - MCA_COLL_BASE_TAG_BCAST, - MCA_PML_BASE_SEND_STANDARD, - comm, preq++)); - if (MPI_SUCCESS != err) { - ompi_coll_base_free_reqs(data->mcct_reqs, i); - return err; - } + err = MCA_PML_CALL(isend(buff, count, datatype, i, + MCA_COLL_BASE_TAG_BCAST, + MCA_PML_BASE_SEND_STANDARD, + comm, preq++)); + if (MPI_SUCCESS != err) { goto err_hndl; } } --i; - /* Start your engines. This will never return an error. */ - - MCA_PML_CALL(start(i, reqs)); - /* Wait for them all. If there's an error, note that we don't * care what the error was -- just that there *was* an error. The * PML will finish all requests, even if one or more of them fail. @@ -684,9 +647,10 @@ ompi_coll_base_bcast_intra_basic_linear(void *buff, int count, * the error after we free everything. */ err = ompi_request_wait_all(i, reqs, MPI_STATUSES_IGNORE); - - /* Free the reqs */ - ompi_coll_base_free_reqs(reqs, i); + err_hndl: + if( MPI_SUCCESS != err ) { /* Free the reqs */ + ompi_coll_base_free_reqs(reqs, i); + } /* All done */ return err; diff --git a/ompi/mca/coll/base/coll_base_comm_select.c b/ompi/mca/coll/base/coll_base_comm_select.c index bf6179e20af..6335654f3fc 100644 --- a/ompi/mca/coll/base/coll_base_comm_select.c +++ b/ompi/mca/coll/base/coll_base_comm_select.c @@ -6,7 +6,7 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. @@ -19,10 +19,11 @@ * reserved. * Copyright (c) 2014 Research Organization for Information Science * and Technology (RIST). All rights reserved. + * Copyright (c) 2016 IBM Corporation. All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -52,6 +53,7 @@ struct avail_coll_t { int ac_priority; mca_coll_base_module_2_1_0_t *ac_module; + const char * ac_component_name; }; typedef struct avail_coll_t avail_coll_t; @@ -110,7 +112,7 @@ int mca_coll_base_comm_select(ompi_communicator_t * comm) int ret; /* Announce */ - opal_output_verbose(10, ompi_coll_base_framework.framework_output, + opal_output_verbose(9, ompi_coll_base_framework.framework_output, "coll:base:comm_select: new communicator: %s (cid %d)", comm->c_name, comm->c_contextid); @@ -143,6 +145,12 @@ int mca_coll_base_comm_select(ompi_communicator_t * comm) /* initialize the module */ ret = avail->ac_module->coll_module_enable(avail->ac_module, comm); + + opal_output_verbose(9, ompi_coll_base_framework.framework_output, + "coll:base:comm_select: selecting %10s, priority %3d, %s", + avail->ac_component_name, avail->ac_priority, + (OMPI_SUCCESS == ret ? "Enabled": "Disabled") ); + if (OMPI_SUCCESS == ret) { /* copy over any of the pointers */ @@ -295,10 +303,24 @@ static opal_list_t *check_components(opal_list_t * components, avail = OBJ_NEW(avail_coll_t); avail->ac_priority = priority; avail->ac_module = module; + // Point to the string so we don't have to free later + avail->ac_component_name = component->mca_component_name; opal_list_append(selectable, &avail->super); } - } + else { + opal_output_verbose(10, ompi_coll_base_framework.framework_output, + "coll:base:comm_select: component disqualified: %s (priority %d < 0)", + component->mca_component_name, priority ); + + // If the disqualified collective returned a module make sure we + // release it here, since it will become a leak otherwise. + if( NULL != module ) { + OBJ_RELEASE(module); + module = NULL; + } + } + } /* If we didn't find any available components, return an error */ if (0 == opal_list_get_size(selectable)) { diff --git a/ompi/mca/coll/base/coll_base_comm_unselect.c b/ompi/mca/coll/base/coll_base_comm_unselect.c index c2b97970972..f6f7a699908 100644 --- a/ompi/mca/coll/base/coll_base_comm_unselect.c +++ b/ompi/mca/coll/base/coll_base_comm_unselect.c @@ -5,19 +5,19 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. - * Copyright (c) 2012 Oak Rigde National Laboratory. + * Copyright (c) 2012 Oak Rigde National Laboratory. * All rights reserved. * Copyright (c) 2013 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2014 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -91,7 +91,7 @@ int mca_coll_base_comm_unselect(ompi_communicator_t * comm) CLOSE(comm, neighbor_alltoall); CLOSE(comm, neighbor_alltoallv); CLOSE(comm, neighbor_alltoallw); - + CLOSE(comm, ineighbor_allgather); CLOSE(comm, ineighbor_allgatherv); CLOSE(comm, ineighbor_alltoall); diff --git a/ompi/mca/coll/base/coll_base_find_available.c b/ompi/mca/coll/base/coll_base_find_available.c index feba164777a..e1f69d4ba47 100644 --- a/ompi/mca/coll/base/coll_base_find_available.c +++ b/ompi/mca/coll/base/coll_base_find_available.c @@ -5,7 +5,7 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. @@ -15,9 +15,9 @@ * Copyright (c) 2014 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -159,7 +159,7 @@ static int init_query(const mca_base_component_t * component, * Query a specific component, coll v2.0.0 */ static int init_query_2_0_0(const mca_base_component_t * component, - bool enable_progress_threads, + bool enable_progress_threads, bool enable_mpi_threads) { mca_coll_base_component_2_0_0_t *coll = diff --git a/ompi/mca/coll/base/coll_base_frame.c b/ompi/mca/coll/base/coll_base_frame.c index a4e1038b104..edbbe04db1c 100644 --- a/ompi/mca/coll/base/coll_base_frame.c +++ b/ompi/mca/coll/base/coll_base_frame.c @@ -3,7 +3,7 @@ * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. - * Copyright (c) 2004-2015 The University of Tennessee and The University + * Copyright (c) 2004-2016 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, @@ -68,25 +68,14 @@ OBJ_CLASS_INSTANCE(mca_coll_base_module_t, opal_object_t, static void coll_base_comm_construct(mca_coll_base_comm_t *data) { - data->mcct_reqs = NULL; - data->mcct_num_reqs = 0; - data->cached_ntree = NULL; - data->cached_bintree = NULL; - data->cached_bmtree = NULL; - data->cached_in_order_bmtree = NULL; - data->cached_chain = NULL; - data->cached_pipeline = NULL; - data->cached_in_order_bintree = NULL; + memset ((char *) data + sizeof (data->super), 0, sizeof (*data) - sizeof (data->super)); } static void coll_base_comm_destruct(mca_coll_base_comm_t *data) { if( NULL != data->mcct_reqs ) { - for( int i = 0; i < data->mcct_num_reqs; ++i ) { - if( MPI_REQUEST_NULL != data->mcct_reqs[i] ) - ompi_request_free(&data->mcct_reqs[i]); - } + ompi_coll_base_free_reqs( data->mcct_reqs, data->mcct_num_reqs ); free(data->mcct_reqs); data->mcct_reqs = NULL; data->mcct_num_reqs = 0; @@ -122,20 +111,18 @@ OBJ_CLASS_INSTANCE(mca_coll_base_comm_t, opal_object_t, ompi_request_t** coll_base_comm_get_reqs(mca_coll_base_comm_t* data, int nreqs) { - int startfrom = data->mcct_num_reqs; + if( 0 == nreqs ) return NULL; - if( NULL == data->mcct_reqs ) { - assert(0 == data->mcct_num_reqs); - data->mcct_reqs = (ompi_request_t**)malloc(sizeof(ompi_request_t*) * nreqs); - } else if( data->mcct_num_reqs <= nreqs ) { + if( data->mcct_num_reqs < nreqs ) { data->mcct_reqs = (ompi_request_t**)realloc(data->mcct_reqs, sizeof(ompi_request_t*) * nreqs); + + if( NULL != data->mcct_reqs ) { + for( int i = data->mcct_num_reqs; i < nreqs; i++ ) + data->mcct_reqs[i] = MPI_REQUEST_NULL; + data->mcct_num_reqs = nreqs; + } else + data->mcct_num_reqs = 0; /* nothing to return */ } - if( NULL != data->mcct_reqs ) { - data->mcct_num_reqs = nreqs; - for( int i = startfrom; i < data->mcct_num_reqs; i++ ) - data->mcct_reqs[i] = MPI_REQUEST_NULL; - } else - data->mcct_num_reqs = 0; /* nothing to return */ return data->mcct_reqs; } diff --git a/ompi/mca/coll/base/coll_base_functions.h b/ompi/mca/coll/base/coll_base_functions.h index d42c0627cb6..0d1c4b34eb7 100644 --- a/ompi/mca/coll/base/coll_base_functions.h +++ b/ompi/mca/coll/base/coll_base_functions.h @@ -12,7 +12,7 @@ * All rights reserved. * Copyright (c) 2008 Sun Microsystems, Inc. All rights reserved. * Copyright (c) 2008 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2013 Los Alamos National Security, LLC. All rights + * Copyright (c) 2013-2016 Los Alamos National Security, LLC. All rights * reserved. * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. @@ -58,22 +58,22 @@ typedef enum COLLTYPE { } COLLTYPE_T; /* defined arg lists to simply auto inclusion of user overriding decision functions */ -#define ALLGATHER_ARGS void *sbuf, int scount, struct ompi_datatype_t *sdtype, void *rbuf, int rcount, struct ompi_datatype_t *rdtype, struct ompi_communicator_t *comm, mca_coll_base_module_t *module -#define ALLGATHERV_ARGS void *sbuf, int scount, struct ompi_datatype_t *sdtype, void * rbuf, int *rcounts, int *disps, struct ompi_datatype_t *rdtype, struct ompi_communicator_t *comm, mca_coll_base_module_t *module -#define ALLREDUCE_ARGS void *sbuf, void *rbuf, int count, struct ompi_datatype_t *dtype, struct ompi_op_t *op, struct ompi_communicator_t *comm, mca_coll_base_module_t *module -#define ALLTOALL_ARGS void *sbuf, int scount, struct ompi_datatype_t *sdtype, void* rbuf, int rcount, struct ompi_datatype_t *rdtype, struct ompi_communicator_t *comm, mca_coll_base_module_t *module -#define ALLTOALLV_ARGS void *sbuf, int *scounts, int *sdisps, struct ompi_datatype_t *sdtype, void *rbuf, int *rcounts, int *rdisps, struct ompi_datatype_t *rdtype, struct ompi_communicator_t *comm, mca_coll_base_module_t *module -#define ALLTOALLW_ARGS void *sbuf, int *scounts, int *sdisps, struct ompi_datatype_t **sdtypes, void *rbuf, int *rcounts, int *rdisps, struct ompi_datatype_t **rdtypes, struct ompi_communicator_t *comm, mca_coll_base_module_t *module +#define ALLGATHER_ARGS const void *sbuf, int scount, struct ompi_datatype_t *sdtype, void *rbuf, int rcount, struct ompi_datatype_t *rdtype, struct ompi_communicator_t *comm, mca_coll_base_module_t *module +#define ALLGATHERV_ARGS const void *sbuf, int scount, struct ompi_datatype_t *sdtype, void * rbuf, const int *rcounts, const int *disps, struct ompi_datatype_t *rdtype, struct ompi_communicator_t *comm, mca_coll_base_module_t *module +#define ALLREDUCE_ARGS const void *sbuf, void *rbuf, int count, struct ompi_datatype_t *dtype, struct ompi_op_t *op, struct ompi_communicator_t *comm, mca_coll_base_module_t *module +#define ALLTOALL_ARGS const void *sbuf, int scount, struct ompi_datatype_t *sdtype, void* rbuf, int rcount, struct ompi_datatype_t *rdtype, struct ompi_communicator_t *comm, mca_coll_base_module_t *module +#define ALLTOALLV_ARGS const void *sbuf, const int *scounts, const int *sdisps, struct ompi_datatype_t *sdtype, void *rbuf, const int *rcounts, const int *rdisps, struct ompi_datatype_t *rdtype, struct ompi_communicator_t *comm, mca_coll_base_module_t *module +#define ALLTOALLW_ARGS const void *sbuf, const int *scounts, const int *sdisps, struct ompi_datatype_t * const *sdtypes, void *rbuf, const int *rcounts, const int *rdisps, struct ompi_datatype_t * const *rdtypes, struct ompi_communicator_t *comm, mca_coll_base_module_t *module #define BARRIER_ARGS struct ompi_communicator_t *comm, mca_coll_base_module_t *module #define BCAST_ARGS void *buff, int count, struct ompi_datatype_t *datatype, int root, struct ompi_communicator_t *comm, mca_coll_base_module_t *module -#define EXSCAN_ARGS void *sbuf, void *rbuf, int count, struct ompi_datatype_t *dtype, struct ompi_op_t *op, struct ompi_communicator_t *comm, mca_coll_base_module_t *module -#define GATHER_ARGS void *sbuf, int scount, struct ompi_datatype_t *sdtype, void *rbuf, int rcount, struct ompi_datatype_t *rdtype, int root, struct ompi_communicator_t *comm, mca_coll_base_module_t *module +#define EXSCAN_ARGS const void *sbuf, void *rbuf, int count, struct ompi_datatype_t *dtype, struct ompi_op_t *op, struct ompi_communicator_t *comm, mca_coll_base_module_t *module +#define GATHER_ARGS const void *sbuf, int scount, struct ompi_datatype_t *sdtype, void *rbuf, int rcount, struct ompi_datatype_t *rdtype, int root, struct ompi_communicator_t *comm, mca_coll_base_module_t *module #define GATHERV_ARGS void *sbuf, int scount, struct ompi_datatype_t *sdtype, void *rbuf, int *rcounts, int *disps, struct ompi_datatype_t *rdtype, int root, struct ompi_communicator_t *comm, mca_coll_base_module_t *module -#define REDUCE_ARGS void *sbuf, void* rbuf, int count, struct ompi_datatype_t *dtype, struct ompi_op_t *op, int root, struct ompi_communicator_t *comm, mca_coll_base_module_t *module -#define REDUCESCATTER_ARGS void *sbuf, void *rbuf, int *rcounts, struct ompi_datatype_t *dtype, struct ompi_op_t *op, struct ompi_communicator_t *comm, mca_coll_base_module_t *module -#define SCAN_ARGS void *sbuf, void *rbuf, int count, struct ompi_datatype_t *dtype, struct ompi_op_t *op, struct ompi_communicator_t *comm, mca_coll_base_module_t *module -#define SCATTER_ARGS void *sbuf, int scount, struct ompi_datatype_t *sdtype, void *rbuf, int rcount, struct ompi_datatype_t *rdtype, int root, struct ompi_communicator_t *comm, mca_coll_base_module_t *module -#define SCATTERV_ARGS void *sbuf, int *scounts, int *disps, struct ompi_datatype_t *sdtype, void* rbuf, int rcount, struct ompi_datatype_t *rdtype, int root, struct ompi_communicator_t *comm, mca_coll_base_module_t *module +#define REDUCE_ARGS const void *sbuf, void* rbuf, int count, struct ompi_datatype_t *dtype, struct ompi_op_t *op, int root, struct ompi_communicator_t *comm, mca_coll_base_module_t *module +#define REDUCESCATTER_ARGS const void *sbuf, void *rbuf, const int *rcounts, struct ompi_datatype_t *dtype, struct ompi_op_t *op, struct ompi_communicator_t *comm, mca_coll_base_module_t *module +#define SCAN_ARGS const void *sbuf, void *rbuf, int count, struct ompi_datatype_t *dtype, struct ompi_op_t *op, struct ompi_communicator_t *comm, mca_coll_base_module_t *module +#define SCATTER_ARGS const void *sbuf, int scount, struct ompi_datatype_t *sdtype, void *rbuf, int rcount, struct ompi_datatype_t *rdtype, int root, struct ompi_communicator_t *comm, mca_coll_base_module_t *module +#define SCATTERV_ARGS const void *sbuf, const int *scounts, const int *disps, struct ompi_datatype_t *sdtype, void* rbuf, int rcount, struct ompi_datatype_t *rdtype, int root, struct ompi_communicator_t *comm, mca_coll_base_module_t *module /* end defined arg lists to simply auto inclusion of user overriding decision functions */ BEGIN_C_DECLS @@ -106,7 +106,7 @@ int ompi_coll_base_alltoall_intra_bruck(ALLTOALL_ARGS); int ompi_coll_base_alltoall_intra_basic_linear(ALLTOALL_ARGS); int ompi_coll_base_alltoall_intra_linear_sync(ALLTOALL_ARGS, int max_requests); int ompi_coll_base_alltoall_intra_two_procs(ALLTOALL_ARGS); -int mca_coll_base_alltoall_intra_basic_inplace(void *rbuf, int rcount, +int mca_coll_base_alltoall_intra_basic_inplace(const void *rbuf, int rcount, struct ompi_datatype_t *rdtype, struct ompi_communicator_t *comm, mca_coll_base_module_t *module); /* special version for INPLACE */ @@ -114,7 +114,7 @@ int mca_coll_base_alltoall_intra_basic_inplace(void *rbuf, int rcount, /* AlltoAllV */ int ompi_coll_base_alltoallv_intra_pairwise(ALLTOALLV_ARGS); int ompi_coll_base_alltoallv_intra_basic_linear(ALLTOALLV_ARGS); -int mca_coll_base_alltoallv_intra_basic_inplace(void *rbuf, const int *rcounts, const int *rdisps, +int mca_coll_base_alltoallv_intra_basic_inplace(const void *rbuf, const int *rcounts, const int *rdisps, struct ompi_datatype_t *rdtype, struct ompi_communicator_t *comm, mca_coll_base_module_t *module); /* special version for INPLACE */ @@ -343,11 +343,24 @@ struct mca_coll_base_comm_t { typedef struct mca_coll_base_comm_t mca_coll_base_comm_t; OMPI_DECLSPEC OBJ_CLASS_DECLARATION(mca_coll_base_comm_t); +/** + * Free all requests in an array. As these requests are usually used during + * collective communications, and as on a succesful collective they are + * expected to be released during the corresponding wait, the array should + * generally be empty. However, this function might be used on error conditions + * where it will allow a correct cleanup. + */ static inline void ompi_coll_base_free_reqs(ompi_request_t **reqs, int count) { - int i; - for (i = 0; i < count; ++i) - ompi_request_free(&reqs[i]); + if (OPAL_UNLIKELY(NULL == reqs)) { + return; + } + + for (int i = 0; i < count; ++i) { + if( MPI_REQUEST_NULL != reqs[i] ) { + ompi_request_free(&reqs[i]); + } + } } /** diff --git a/ompi/mca/coll/base/coll_base_gather.c b/ompi/mca/coll/base/coll_base_gather.c index b947a14d0e5..ff83c291609 100644 --- a/ompi/mca/coll/base/coll_base_gather.c +++ b/ompi/mca/coll/base/coll_base_gather.c @@ -12,7 +12,7 @@ * All rights reserved. * Copyright (c) 2013 Los Alamos National Security, LLC. All Rights * reserved. - * Copyright (c) 2015 Research Organization for Information Science + * Copyright (c) 2015-2016 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * @@ -37,7 +37,7 @@ /* Todo: gather_intra_generic, gather_intra_binary, gather_intra_chain, * gather_intra_pipeline, segmentation? */ int -ompi_coll_base_gather_intra_binomial(void *sbuf, int scount, +ompi_coll_base_gather_intra_binomial(const void *sbuf, int scount, struct ompi_datatype_t *sdtype, void *rbuf, int rcount, struct ompi_datatype_t *rdtype, @@ -49,8 +49,8 @@ ompi_coll_base_gather_intra_binomial(void *sbuf, int scount, char *ptmp = NULL, *tempbuf = NULL; ompi_coll_tree_t* bmtree; MPI_Status status; - MPI_Aint sextent, slb, strue_lb, strue_extent; - MPI_Aint rextent, rlb, rtrue_lb, rtrue_extent; + MPI_Aint sextent, sgap, ssize; + MPI_Aint rextent, rgap, rsize; mca_coll_base_module_t *base_module = (mca_coll_base_module_t*) module; mca_coll_base_comm_t *data = base_module->base_data; @@ -64,34 +64,34 @@ ompi_coll_base_gather_intra_binomial(void *sbuf, int scount, COLL_BASE_UPDATE_IN_ORDER_BMTREE( comm, base_module, root ); bmtree = data->cached_in_order_bmtree; - ompi_datatype_get_extent(sdtype, &slb, &sextent); - ompi_datatype_get_true_extent(sdtype, &strue_lb, &strue_extent); + ompi_datatype_type_extent(sdtype, &sextent); + ssize = opal_datatype_span(&sdtype->super, (int64_t)scount * size, &sgap); vrank = (rank - root + size) % size; if (rank == root) { - ompi_datatype_get_extent(rdtype, &rlb, &rextent); - ompi_datatype_get_true_extent(rdtype, &rtrue_lb, &rtrue_extent); + ompi_datatype_type_extent(rdtype, &rextent); + rsize = opal_datatype_span(&rdtype->super, (int64_t)rcount * size, &rgap); if (0 == root){ /* root on 0, just use the recv buffer */ ptmp = (char *) rbuf; if (sbuf != MPI_IN_PLACE) { - err = ompi_datatype_sndrcv(sbuf, scount, sdtype, + err = ompi_datatype_sndrcv((void *)sbuf, scount, sdtype, ptmp, rcount, rdtype); if (MPI_SUCCESS != err) { line = __LINE__; goto err_hndl; } } } else { /* root is not on 0, allocate temp buffer for recv, * rotate data at the end */ - tempbuf = (char *) malloc(rtrue_extent + ((ptrdiff_t)rcount * (ptrdiff_t)size - 1) * rextent); + tempbuf = (char *) malloc(rsize); if (NULL == tempbuf) { err= OMPI_ERR_OUT_OF_RESOURCE; line = __LINE__; goto err_hndl; } - ptmp = tempbuf - rtrue_lb; + ptmp = tempbuf - rgap; if (sbuf != MPI_IN_PLACE) { /* copy from sbuf to temp buffer */ - err = ompi_datatype_sndrcv(sbuf, scount, sdtype, + err = ompi_datatype_sndrcv((void *)sbuf, scount, sdtype, ptmp, rcount, rdtype); if (MPI_SUCCESS != err) { line = __LINE__; goto err_hndl; } } else { @@ -106,14 +106,14 @@ ompi_coll_base_gather_intra_binomial(void *sbuf, int scount, /* other non-leaf nodes, allocate temp buffer for data received from * children, the most we need is half of the total data elements due * to the property of binimoal tree */ - tempbuf = (char *) malloc(strue_extent + ((ptrdiff_t)scount * (ptrdiff_t)size - 1) * sextent); + tempbuf = (char *) malloc(ssize); if (NULL == tempbuf) { err= OMPI_ERR_OUT_OF_RESOURCE; line = __LINE__; goto err_hndl; } - ptmp = tempbuf - strue_lb; + ptmp = tempbuf - sgap; /* local copy to tempbuf */ - err = ompi_datatype_sndrcv(sbuf, scount, sdtype, + err = ompi_datatype_sndrcv((void *)sbuf, scount, sdtype, ptmp, scount, sdtype); if (MPI_SUCCESS != err) { line = __LINE__; goto err_hndl; } @@ -204,7 +204,7 @@ ompi_coll_base_gather_intra_binomial(void *sbuf, int scount, * Returns: - MPI_SUCCESS or error code */ int -ompi_coll_base_gather_intra_linear_sync(void *sbuf, int scount, +ompi_coll_base_gather_intra_linear_sync(const void *sbuf, int scount, struct ompi_datatype_t *sdtype, void *rbuf, int rcount, struct ompi_datatype_t *rdtype, @@ -237,7 +237,7 @@ ompi_coll_base_gather_intra_linear_sync(void *sbuf, int scount, COLL_BASE_COMPUTED_SEGCOUNT( (size_t) first_segment_size, typelng, first_segment_count ); - ret = MCA_PML_CALL(recv(sbuf, 0, MPI_BYTE, root, + ret = MCA_PML_CALL(recv(rbuf, 0, MPI_BYTE, root, MCA_COLL_BASE_TAG_GATHER, comm, MPI_STATUS_IGNORE)); if (ret != MPI_SUCCESS) { line = __LINE__; goto error_hndl; } @@ -266,7 +266,7 @@ ompi_coll_base_gather_intra_linear_sync(void *sbuf, int scount, */ char *ptmp; ompi_request_t *first_segment_req; - reqs = (ompi_request_t**) calloc(size, sizeof(ompi_request_t*)); + reqs = coll_base_comm_get_reqs(module->base_data, size); if (NULL == reqs) { ret = -1; line = __LINE__; goto error_hndl; } ompi_datatype_type_size(rdtype, &typelng); @@ -310,7 +310,7 @@ ompi_coll_base_gather_intra_linear_sync(void *sbuf, int scount, /* copy local data if necessary */ if (MPI_IN_PLACE != sbuf) { - ret = ompi_datatype_sndrcv(sbuf, scount, sdtype, + ret = ompi_datatype_sndrcv((void *)sbuf, scount, sdtype, (char*)rbuf + (ptrdiff_t)rank * (ptrdiff_t)rcount * extent, rcount, rdtype); if (ret != MPI_SUCCESS) { line = __LINE__; goto error_hndl; } @@ -319,16 +319,13 @@ ompi_coll_base_gather_intra_linear_sync(void *sbuf, int scount, /* wait all second segments to complete */ ret = ompi_request_wait_all(size, reqs, MPI_STATUSES_IGNORE); if (ret != MPI_SUCCESS) { line = __LINE__; goto error_hndl; } - - free(reqs); } /* All done */ - return MPI_SUCCESS; error_hndl: if (NULL != reqs) { - free(reqs); + ompi_coll_base_free_reqs(reqs, size); } OPAL_OUTPUT (( ompi_coll_base_framework.framework_output, "ERROR_HNDL: node %d file %s line %d error %d\n", @@ -357,7 +354,7 @@ ompi_coll_base_gather_intra_linear_sync(void *sbuf, int scount, * Returns: - MPI_SUCCESS or error code */ int -ompi_coll_base_gather_intra_basic_linear(void *sbuf, int scount, +ompi_coll_base_gather_intra_basic_linear(const void *sbuf, int scount, struct ompi_datatype_t *sdtype, void *rbuf, int rcount, struct ompi_datatype_t *rdtype, @@ -389,7 +386,7 @@ ompi_coll_base_gather_intra_basic_linear(void *sbuf, int scount, for (i = 0, ptmp = (char *) rbuf; i < size; ++i, ptmp += incr) { if (i == rank) { if (MPI_IN_PLACE != sbuf) { - err = ompi_datatype_sndrcv(sbuf, scount, sdtype, + err = ompi_datatype_sndrcv((void *)sbuf, scount, sdtype, ptmp, rcount, rdtype); } else { err = MPI_SUCCESS; @@ -405,7 +402,6 @@ ompi_coll_base_gather_intra_basic_linear(void *sbuf, int scount, } /* All done */ - return MPI_SUCCESS; } diff --git a/ompi/mca/coll/base/coll_base_reduce.c b/ompi/mca/coll/base/coll_base_reduce.c index 644ff66f76b..a54a9b19cf5 100644 --- a/ompi/mca/coll/base/coll_base_reduce.c +++ b/ompi/mca/coll/base/coll_base_reduce.c @@ -12,7 +12,7 @@ * All rights reserved. * Copyright (c) 2013 Los Alamos National Security, LLC. All Rights * reserved. - * Copyright (c) 2015 Research Organization for Information Science + * Copyright (c) 2015-2016 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * @@ -45,7 +45,7 @@ * for the first block: thus we must copy sendbuf to accumbuf on intermediate * to keep the optimized loop happy. */ -int ompi_coll_base_reduce_generic( void* sendbuf, void* recvbuf, int original_count, +int ompi_coll_base_reduce_generic( const void* sendbuf, void* recvbuf, int original_count, ompi_datatype_t* datatype, ompi_op_t* op, int root, ompi_communicator_t* comm, mca_coll_base_module_t *module, @@ -55,9 +55,8 @@ int ompi_coll_base_reduce_generic( void* sendbuf, void* recvbuf, int original_co char *inbuf[2] = {NULL, NULL}, *inbuf_free[2] = {NULL, NULL}; char *accumbuf = NULL, *accumbuf_free = NULL; char *local_op_buffer = NULL, *sendtmpbuf = NULL; - ptrdiff_t extent, lower_bound, segment_increment; - size_t typelng; - ompi_request_t* reqs[2] = {MPI_REQUEST_NULL, MPI_REQUEST_NULL}; + ptrdiff_t extent, size, gap, segment_increment; + ompi_request_t **sreq = NULL, *reqs[2] = {MPI_REQUEST_NULL, MPI_REQUEST_NULL}; int num_segments, line, ret, segindex, i, rank; int recvcount, prevcount, inbi; @@ -65,8 +64,7 @@ int ompi_coll_base_reduce_generic( void* sendbuf, void* recvbuf, int original_co * Determine number of segments and number of elements * sent per operation */ - ompi_datatype_get_extent( datatype, &lower_bound, &extent ); - ompi_datatype_type_size( datatype, &typelng ); + ompi_datatype_type_extent( datatype, &extent ); num_segments = (original_count + count_by_segment - 1) / count_by_segment; segment_increment = (ptrdiff_t)count_by_segment * extent; @@ -84,37 +82,36 @@ int ompi_coll_base_reduce_generic( void* sendbuf, void* recvbuf, int original_co /* non-leaf nodes - wait for children to send me data & forward up (if needed) */ if( tree->tree_nextsize > 0 ) { - ptrdiff_t true_lower_bound, true_extent, real_segment_size; - ompi_datatype_get_true_extent( datatype, &true_lower_bound, - &true_extent ); + ptrdiff_t real_segment_size; /* handle non existant recv buffer (i.e. its NULL) and protect the recv buffer on non-root nodes */ accumbuf = (char*)recvbuf; if( (NULL == accumbuf) || (root != rank) ) { /* Allocate temporary accumulator buffer. */ - accumbuf_free = (char*)malloc(true_extent + - (ptrdiff_t)(original_count - 1) * extent); + size = opal_datatype_span(&datatype->super, original_count, &gap); + accumbuf_free = (char*)malloc(size); if (accumbuf_free == NULL) { line = __LINE__; ret = -1; goto error_hndl; } - accumbuf = accumbuf_free - lower_bound; + accumbuf = accumbuf_free - gap; } /* If this is a non-commutative operation we must copy sendbuf to the accumbuf, in order to simplfy the loops */ - if (!ompi_op_is_commute(op)) { + + if (!ompi_op_is_commute(op) && MPI_IN_PLACE != sendbuf) { ompi_datatype_copy_content_same_ddt(datatype, original_count, (char*)accumbuf, (char*)sendtmpbuf); } /* Allocate two buffers for incoming segments */ - real_segment_size = true_extent + (ptrdiff_t)(count_by_segment - 1) * extent; + real_segment_size = opal_datatype_span(&datatype->super, count_by_segment, &gap); inbuf_free[0] = (char*) malloc(real_segment_size); if( inbuf_free[0] == NULL ) { line = __LINE__; ret = -1; goto error_hndl; } - inbuf[0] = inbuf_free[0] - lower_bound; + inbuf[0] = inbuf_free[0] - gap; /* if there is chance to overlap communication - allocate second buffer */ if( (num_segments > 1) || (tree->tree_nextsize > 1) ) { @@ -122,7 +119,7 @@ int ompi_coll_base_reduce_generic( void* sendbuf, void* recvbuf, int original_co if( inbuf_free[1] == NULL ) { line = __LINE__; ret = -1; goto error_hndl; } - inbuf[1] = inbuf_free[1] - lower_bound; + inbuf[1] = inbuf_free[1] - gap; } /* reset input buffer index and receive count */ @@ -172,8 +169,8 @@ int ompi_coll_base_reduce_generic( void* sendbuf, void* recvbuf, int original_co if there are no requests reqs[inbi ^1] will be MPI_REQUEST_NULL. */ /* wait on data from last child for previous segment */ - ret = ompi_request_wait_all( 1, &reqs[inbi ^ 1], - MPI_STATUSES_IGNORE ); + ret = ompi_request_wait(&reqs[inbi ^ 1], + MPI_STATUSES_IGNORE ); if (ret != MPI_SUCCESS) { line = __LINE__; goto error_hndl; } local_op_buffer = inbuf[inbi ^ 1]; if( i > 0 ) { @@ -279,10 +276,8 @@ int ompi_coll_base_reduce_generic( void* sendbuf, void* recvbuf, int original_co else { int creq = 0; - ompi_request_t **sreq = NULL; - sreq = (ompi_request_t**) calloc( max_outstanding_reqs, - sizeof(ompi_request_t*) ); + sreq = coll_base_comm_get_reqs(module->base_data, max_outstanding_reqs); if (NULL == sreq) { line = __LINE__; ret = -1; goto error_hndl; } /* post first group of requests */ @@ -303,7 +298,6 @@ int ompi_coll_base_reduce_generic( void* sendbuf, void* recvbuf, int original_co /* wait on a posted request to complete */ ret = ompi_request_wait(&sreq[creq], MPI_STATUS_IGNORE); if (ret != MPI_SUCCESS) { line = __LINE__; goto error_hndl; } - sreq[creq] = MPI_REQUEST_NULL; if( original_count < count_by_segment ) { count_by_segment = original_count; @@ -325,9 +319,6 @@ int ompi_coll_base_reduce_generic( void* sendbuf, void* recvbuf, int original_co ret = ompi_request_wait_all( max_outstanding_reqs, sreq, MPI_STATUSES_IGNORE ); if (ret != MPI_SUCCESS) { line = __LINE__; goto error_hndl; } - - /* free requests */ - free(sreq); } } return OMPI_SUCCESS; @@ -339,6 +330,9 @@ int ompi_coll_base_reduce_generic( void* sendbuf, void* recvbuf, int original_co if( inbuf_free[0] != NULL ) free(inbuf_free[0]); if( inbuf_free[1] != NULL ) free(inbuf_free[1]); if( accumbuf_free != NULL ) free(accumbuf); + if( NULL != sreq ) { + ompi_coll_base_free_reqs(sreq, max_outstanding_reqs); + } return ret; } @@ -349,7 +343,7 @@ int ompi_coll_base_reduce_generic( void* sendbuf, void* recvbuf, int original_co meaning that at least one datatype must fit in the segment ! */ -int ompi_coll_base_reduce_intra_chain( void *sendbuf, void *recvbuf, int count, +int ompi_coll_base_reduce_intra_chain( const void *sendbuf, void *recvbuf, int count, ompi_datatype_t* datatype, ompi_op_t* op, int root, ompi_communicator_t* comm, @@ -379,7 +373,7 @@ int ompi_coll_base_reduce_intra_chain( void *sendbuf, void *recvbuf, int count, } -int ompi_coll_base_reduce_intra_pipeline( void *sendbuf, void *recvbuf, +int ompi_coll_base_reduce_intra_pipeline( const void *sendbuf, void *recvbuf, int count, ompi_datatype_t* datatype, ompi_op_t* op, int root, ompi_communicator_t* comm, @@ -410,7 +404,7 @@ int ompi_coll_base_reduce_intra_pipeline( void *sendbuf, void *recvbuf, segcount, max_outstanding_reqs ); } -int ompi_coll_base_reduce_intra_binary( void *sendbuf, void *recvbuf, +int ompi_coll_base_reduce_intra_binary( const void *sendbuf, void *recvbuf, int count, ompi_datatype_t* datatype, ompi_op_t* op, int root, ompi_communicator_t* comm, @@ -441,7 +435,7 @@ int ompi_coll_base_reduce_intra_binary( void *sendbuf, void *recvbuf, segcount, max_outstanding_reqs ); } -int ompi_coll_base_reduce_intra_binomial( void *sendbuf, void *recvbuf, +int ompi_coll_base_reduce_intra_binomial( const void *sendbuf, void *recvbuf, int count, ompi_datatype_t* datatype, ompi_op_t* op, int root, ompi_communicator_t* comm, @@ -479,7 +473,7 @@ int ompi_coll_base_reduce_intra_binomial( void *sendbuf, void *recvbuf, * Acecpts: same as MPI_Reduce() * Returns: MPI_SUCCESS or error code */ -int ompi_coll_base_reduce_intra_in_order_binary( void *sendbuf, void *recvbuf, +int ompi_coll_base_reduce_intra_in_order_binary( const void *sendbuf, void *recvbuf, int count, ompi_datatype_t* datatype, ompi_op_t* op, int root, @@ -489,7 +483,9 @@ int ompi_coll_base_reduce_intra_in_order_binary( void *sendbuf, void *recvbuf, int max_outstanding_reqs ) { int ret, rank, size, io_root, segcount = count; - void *use_this_sendbuf = NULL, *use_this_recvbuf = NULL; + void *use_this_sendbuf = NULL; + void *use_this_recvbuf = NULL; + char *tmpbuf_free = NULL; size_t typelng; mca_coll_base_module_t *base_module = (mca_coll_base_module_t*) module; mca_coll_base_comm_t *data = base_module->base_data; @@ -516,29 +512,30 @@ int ompi_coll_base_reduce_intra_in_order_binary( void *sendbuf, void *recvbuf, operations for non-commutative ops. */ io_root = size - 1; - use_this_sendbuf = sendbuf; + use_this_sendbuf = (void *)sendbuf; use_this_recvbuf = recvbuf; if (io_root != root) { - ptrdiff_t tlb, text, lb, ext; - char *tmpbuf = NULL; + ptrdiff_t dsize, gap; + char *tmpbuf; - ompi_datatype_get_extent(datatype, &lb, &ext); - ompi_datatype_get_true_extent(datatype, &tlb, &text); + dsize = opal_datatype_span(&datatype->super, count, &gap); if ((root == rank) && (MPI_IN_PLACE == sendbuf)) { - tmpbuf = (char *) malloc(text + (ptrdiff_t)(count - 1) * ext); - if (NULL == tmpbuf) { + tmpbuf_free = (char *) malloc(dsize); + if (NULL == tmpbuf_free) { return MPI_ERR_INTERN; } + tmpbuf = tmpbuf_free - gap; ompi_datatype_copy_content_same_ddt(datatype, count, (char*)tmpbuf, (char*)recvbuf); use_this_sendbuf = tmpbuf; } else if (io_root == rank) { - tmpbuf = (char *) malloc(text + (ptrdiff_t)(count - 1) * ext); - if (NULL == tmpbuf) { + tmpbuf_free = (char *) malloc(dsize); + if (NULL == tmpbuf_free) { return MPI_ERR_INTERN; } + tmpbuf = tmpbuf_free - gap; use_this_recvbuf = tmpbuf; } } @@ -558,9 +555,6 @@ int ompi_coll_base_reduce_intra_in_order_binary( void *sendbuf, void *recvbuf, MCA_COLL_BASE_TAG_REDUCE, comm, MPI_STATUS_IGNORE)); if (MPI_SUCCESS != ret) { return ret; } - if (MPI_IN_PLACE == sendbuf) { - free(use_this_sendbuf); - } } else if (io_root == rank) { /* Send result from use_this_recvbuf to root */ @@ -568,9 +562,11 @@ int ompi_coll_base_reduce_intra_in_order_binary( void *sendbuf, void *recvbuf, MCA_COLL_BASE_TAG_REDUCE, MCA_PML_BASE_SEND_STANDARD, comm)); if (MPI_SUCCESS != ret) { return ret; } - free(use_this_recvbuf); } } + if (NULL != tmpbuf_free) { + free(tmpbuf_free); + } return MPI_SUCCESS; } @@ -587,8 +583,6 @@ int ompi_coll_base_reduce_intra_in_order_binary( void *sendbuf, void *recvbuf, * GEF Oct05 after asking Jeff. */ -/* copied function (with appropriate renaming) starts here */ - /* * reduce_lin_intra * @@ -597,7 +591,7 @@ int ompi_coll_base_reduce_intra_in_order_binary( void *sendbuf, void *recvbuf, * Returns: - MPI_SUCCESS or error code */ int -ompi_coll_base_reduce_intra_basic_linear(void *sbuf, void *rbuf, int count, +ompi_coll_base_reduce_intra_basic_linear(const void *sbuf, void *rbuf, int count, struct ompi_datatype_t *dtype, struct ompi_op_t *op, int root, @@ -605,10 +599,10 @@ ompi_coll_base_reduce_intra_basic_linear(void *sbuf, void *rbuf, int count, mca_coll_base_module_t *module) { int i, rank, err, size; - ptrdiff_t true_lb, true_extent, lb, extent; + ptrdiff_t extent, dsize, gap; char *free_buffer = NULL; char *pml_buffer = NULL; - char *inplace_temp = NULL; + char *inplace_temp_free = NULL; char *inbuf; /* Initialize */ @@ -625,151 +619,27 @@ ompi_coll_base_reduce_intra_basic_linear(void *sbuf, void *rbuf, int count, return err; } - /* Root receives and reduces messages. Allocate buffer to receive - * messages. This comment applies to all collectives in this basic - * module where we allocate a temporary buffer. For the next few - * lines of code, it's tremendously complicated how we decided that - * this was the Right Thing to do. Sit back and enjoy. And prepare - * to have your mind warped. :-) - * - * Recall some definitions (I always get these backwards, so I'm - * going to put them here): - * - * extent: the length from the lower bound to the upper bound -- may - * be considerably larger than the buffer required to hold the data - * (or smaller! But it's easiest to think about when it's larger). - * - * true extent: the exact number of bytes required to hold the data - * in the layout pattern in the datatype. - * - * For example, consider the following buffer (just talking about - * true_lb, extent, and true extent -- extrapolate for true_ub: - * - * A B C - * -------------------------------------------------------- - * | | | - * -------------------------------------------------------- - * - * There are multiple cases: - * - * 1. A is what we give to MPI_Send (and friends), and A is where - * the data starts, and C is where the data ends. In this case: - * - * - extent: C-A - * - true extent: C-A - * - true_lb: 0 - * - * A C - * -------------------------------------------------------- - * | | - * -------------------------------------------------------- - * <=======================extent=========================> - * <======================true extent=====================> - * - * 2. A is what we give to MPI_Send (and friends), B is where the - * data starts, and C is where the data ends. In this case: - * - * - extent: C-A - * - true extent: C-B - * - true_lb: positive - * - * A B C - * -------------------------------------------------------- - * | | User buffer | - * -------------------------------------------------------- - * <=======================extent=========================> - * <===============true extent=============> - * - * 3. B is what we give to MPI_Send (and friends), A is where the - * data starts, and C is where the data ends. In this case: - * - * - extent: C-A - * - true extent: C-A - * - true_lb: negative - * - * A B C - * -------------------------------------------------------- - * | | User buffer | - * -------------------------------------------------------- - * <=======================extent=========================> - * <======================true extent=====================> - * - * 4. MPI_BOTTOM is what we give to MPI_Send (and friends), B is - * where the data starts, and C is where the data ends. In this - * case: - * - * - extent: C-MPI_BOTTOM - * - true extent: C-B - * - true_lb: [potentially very large] positive - * - * MPI_BOTTOM B C - * -------------------------------------------------------- - * | | User buffer | - * -------------------------------------------------------- - * <=======================extent=========================> - * <===============true extent=============> - * - * So in all cases, for a temporary buffer, all we need to malloc() - * is a buffer of size true_extent. We therefore need to know two - * pointer values: what value to give to MPI_Send (and friends) and - * what value to give to free(), because they might not be the same. - * - * Clearly, what we give to free() is exactly what was returned from - * malloc(). That part is easy. :-) - * - * What we give to MPI_Send (and friends) is a bit more complicated. - * Let's take the 4 cases from above: - * - * 1. If A is what we give to MPI_Send and A is where the data - * starts, then clearly we give to MPI_Send what we got back from - * malloc(). - * - * 2. If B is what we get back from malloc, but we give A to - * MPI_Send, then the buffer range [A,B) represents "dead space" - * -- no data will be put there. So it's safe to give B-true_lb to - * MPI_Send. More specifically, the true_lb is positive, so B-true_lb is - * actually A. - * - * 3. If A is what we get back from malloc, and B is what we give to - * MPI_Send, then the true_lb is negative, so A-true_lb will actually equal - * B. - * - * 4. Although this seems like the weirdest case, it's actually - * quite similar to case #2 -- the pointer we give to MPI_Send is - * smaller than the pointer we got back from malloc(). - * - * Hence, in all cases, we give (return_from_malloc - true_lb) to MPI_Send. - * - * This works fine and dandy if we only have (count==1), which we - * rarely do. ;-) So we really need to allocate (true_extent + - * ((count - 1) * extent)) to get enough space for the rest. This may - * be more than is necessary, but it's ok. - * - * Simple, no? :-) - * - */ - - ompi_datatype_get_extent(dtype, &lb, &extent); - ompi_datatype_get_true_extent(dtype, &true_lb, &true_extent); + dsize = opal_datatype_span(&dtype->super, count, &gap); + ompi_datatype_type_extent(dtype, &extent); if (MPI_IN_PLACE == sbuf) { sbuf = rbuf; - inplace_temp = (char*)malloc(true_extent + (count - 1) * extent); - if (NULL == inplace_temp) { + inplace_temp_free = (char*)malloc(dsize); + if (NULL == inplace_temp_free) { return OMPI_ERR_OUT_OF_RESOURCE; } - rbuf = inplace_temp - true_lb; + rbuf = inplace_temp_free - gap; } if (size > 1) { - free_buffer = (char*)malloc(true_extent + (count - 1) * extent); + free_buffer = (char*)malloc(dsize); if (NULL == free_buffer) { - if (NULL != inplace_temp) { - free(inplace_temp); + if (NULL != inplace_temp_free) { + free(inplace_temp_free); } return OMPI_ERR_OUT_OF_RESOURCE; } - pml_buffer = free_buffer - true_lb; + pml_buffer = free_buffer - gap; } /* Initialize the receive buffer. */ @@ -812,9 +682,9 @@ ompi_coll_base_reduce_intra_basic_linear(void *sbuf, void *rbuf, int count, ompi_op_reduce(op, inbuf, rbuf, count, dtype); } - if (NULL != inplace_temp) { - err = ompi_datatype_copy_content_same_ddt(dtype, count, (char*)sbuf, inplace_temp); - free(inplace_temp); + if (NULL != inplace_temp_free) { + err = ompi_datatype_copy_content_same_ddt(dtype, count, (char*)sbuf, rbuf); + free(inplace_temp_free); } if (NULL != free_buffer) { free(free_buffer); @@ -825,4 +695,3 @@ ompi_coll_base_reduce_intra_basic_linear(void *sbuf, void *rbuf, int count, return MPI_SUCCESS; } -/* copied function (with appropriate renaming) ends here */ diff --git a/ompi/mca/coll/base/coll_base_reduce_scatter.c b/ompi/mca/coll/base/coll_base_reduce_scatter.c index 0c23206c0e3..ffb9138b669 100644 --- a/ompi/mca/coll/base/coll_base_reduce_scatter.c +++ b/ompi/mca/coll/base/coll_base_reduce_scatter.c @@ -14,6 +14,8 @@ * Copyright (c) 2009 University of Houston. All rights reserved. * Copyright (c) 2013 Los Alamos National Security, LLC. All rights * reserved. + * Copyright (c) 2015-2016 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -41,8 +43,8 @@ * This function just calls a reduce to rank 0, followed by an * appropriate scatterv call. */ -int ompi_coll_base_reduce_scatter_intra_nonoverlapping(void *sbuf, void *rbuf, - int *rcounts, +int ompi_coll_base_reduce_scatter_intra_nonoverlapping(const void *sbuf, void *rbuf, + const int *rcounts, struct ompi_datatype_t *dtype, struct ompi_op_t *op, struct ompi_communicator_t *comm, @@ -74,13 +76,11 @@ int ompi_coll_base_reduce_scatter_intra_nonoverlapping(void *sbuf, void *rbuf, if (root == rank) { /* We must allocate temporary receive buffer on root to ensure that rbuf is big enough */ - ptrdiff_t lb, extent, tlb, textent; + ptrdiff_t dsize, gap; + dsize = opal_datatype_span(&dtype->super, total_count, &gap); - ompi_datatype_get_extent(dtype, &lb, &extent); - ompi_datatype_get_true_extent(dtype, &tlb, &textent); - - tmprbuf_free = (char*) malloc(textent + (ptrdiff_t)(total_count - 1) * extent); - tmprbuf = tmprbuf_free - lb; + tmprbuf_free = (char*) malloc(dsize); + tmprbuf = tmprbuf_free - gap; } err = comm->c_coll.coll_reduce (sbuf, tmprbuf, total_count, dtype, op, root, comm, comm->c_coll.coll_reduce_module); @@ -95,9 +95,15 @@ int ompi_coll_base_reduce_scatter_intra_nonoverlapping(void *sbuf, void *rbuf, for (i = 1; i < size; i++) { displs[i] = displs[i-1] + rcounts[i-1]; } - err = comm->c_coll.coll_scatterv (tmprbuf, rcounts, displs, dtype, - rbuf, rcounts[rank], dtype, - root, comm, comm->c_coll.coll_scatterv_module); + if (MPI_IN_PLACE == sbuf && root == rank) { + err = comm->c_coll.coll_scatterv (tmprbuf, rcounts, displs, dtype, + MPI_IN_PLACE, 0, MPI_DATATYPE_NULL, + root, comm, comm->c_coll.coll_scatterv_module); + } else { + err = comm->c_coll.coll_scatterv (tmprbuf, rcounts, displs, dtype, + rbuf, rcounts[rank], dtype, + root, comm, comm->c_coll.coll_scatterv_module); + } free(displs); if (NULL != tmprbuf_free) free(tmprbuf_free); @@ -122,9 +128,9 @@ int ompi_coll_base_reduce_scatter_intra_nonoverlapping(void *sbuf, void *rbuf, * Limitation: - Works only for commutative operations. */ int -ompi_coll_base_reduce_scatter_intra_basic_recursivehalving(void *sbuf, +ompi_coll_base_reduce_scatter_intra_basic_recursivehalving( const void *sbuf, void *rbuf, - int *rcounts, + const int *rcounts, struct ompi_datatype_t *dtype, struct ompi_op_t *op, struct ompi_communicator_t *comm, @@ -132,7 +138,7 @@ ompi_coll_base_reduce_scatter_intra_basic_recursivehalving(void *sbuf, { int i, rank, size, count, err = OMPI_SUCCESS; int tmp_size, remain = 0, tmp_rank, *disps = NULL; - ptrdiff_t true_lb, true_extent, lb, extent, buf_size; + ptrdiff_t extent, buf_size, gap; char *recv_buf = NULL, *recv_buf_free = NULL; char *result_buf = NULL, *result_buf_free = NULL; @@ -159,9 +165,8 @@ ompi_coll_base_reduce_scatter_intra_basic_recursivehalving(void *sbuf, } /* get datatype information */ - ompi_datatype_get_extent(dtype, &lb, &extent); - ompi_datatype_get_true_extent(dtype, &true_lb, &true_extent); - buf_size = true_extent + (ptrdiff_t)(count - 1) * extent; + ompi_datatype_type_extent(dtype, &extent); + buf_size = opal_datatype_span(&dtype->super, count, &gap); /* Handle MPI_IN_PLACE */ if (MPI_IN_PLACE == sbuf) { @@ -170,7 +175,7 @@ ompi_coll_base_reduce_scatter_intra_basic_recursivehalving(void *sbuf, /* Allocate temporary receive buffer. */ recv_buf_free = (char*) malloc(buf_size); - recv_buf = recv_buf_free - true_lb; + recv_buf = recv_buf_free - gap; if (NULL == recv_buf_free) { err = OMPI_ERR_OUT_OF_RESOURCE; goto cleanup; @@ -178,7 +183,7 @@ ompi_coll_base_reduce_scatter_intra_basic_recursivehalving(void *sbuf, /* allocate temporary buffer for results */ result_buf_free = (char*) malloc(buf_size); - result_buf = result_buf_free - true_lb; + result_buf = result_buf_free - gap; /* copy local buffer into the temporary results */ err = ompi_datatype_sndrcv(sbuf, count, dtype, result_buf, count, dtype); @@ -447,7 +452,7 @@ ompi_coll_base_reduce_scatter_intra_basic_recursivehalving(void *sbuf, * */ int -ompi_coll_base_reduce_scatter_intra_ring(void *sbuf, void *rbuf, int *rcounts, +ompi_coll_base_reduce_scatter_intra_ring( const void *sbuf, void *rbuf, const int *rcounts, struct ompi_datatype_t *dtype, struct ompi_op_t *op, struct ompi_communicator_t *comm, @@ -457,9 +462,8 @@ ompi_coll_base_reduce_scatter_intra_ring(void *sbuf, void *rbuf, int *rcounts, int inbi, *displs = NULL; char *tmpsend = NULL, *tmprecv = NULL, *accumbuf = NULL, *accumbuf_free = NULL; char *inbuf_free[2] = {NULL, NULL}, *inbuf[2] = {NULL, NULL}; - ptrdiff_t true_lb, true_extent, lb, extent, max_real_segsize; + ptrdiff_t extent, max_real_segsize, dsize, gap; ompi_request_t *reqs[2] = {NULL, NULL}; - size_t typelng; size = ompi_comm_size(comm); rank = ompi_comm_rank(comm); @@ -498,26 +502,23 @@ ompi_coll_base_reduce_scatter_intra_ring(void *sbuf, void *rbuf, int *rcounts, rbuf can be of rcounts[rank] size. - up to two temporary buffers used for communication/computation overlap. */ - ret = ompi_datatype_get_extent(dtype, &lb, &extent); - if (MPI_SUCCESS != ret) { line = __LINE__; goto error_hndl; } - ret = ompi_datatype_get_true_extent(dtype, &true_lb, &true_extent); - if (MPI_SUCCESS != ret) { line = __LINE__; goto error_hndl; } - ret = ompi_datatype_type_size( dtype, &typelng); + ret = ompi_datatype_type_extent(dtype, &extent); if (MPI_SUCCESS != ret) { line = __LINE__; goto error_hndl; } - max_real_segsize = true_extent + (ptrdiff_t)(max_block_count - 1) * extent; + max_real_segsize = opal_datatype_span(&dtype->super, max_block_count, &gap); + dsize = opal_datatype_span(&dtype->super, total_count, &gap); - accumbuf_free = (char*)malloc(true_extent + (ptrdiff_t)(total_count - 1) * extent); + accumbuf_free = (char*)malloc(dsize); if (NULL == accumbuf_free) { ret = -1; line = __LINE__; goto error_hndl; } - accumbuf = accumbuf_free - lb; + accumbuf = accumbuf_free - gap; inbuf_free[0] = (char*)malloc(max_real_segsize); if (NULL == inbuf_free[0]) { ret = -1; line = __LINE__; goto error_hndl; } - inbuf[0] = inbuf_free[0] - lb; + inbuf[0] = inbuf_free[0] - gap; if (size > 2) { inbuf_free[1] = (char*)malloc(max_real_segsize); if (NULL == inbuf_free[1]) { ret = -1; line = __LINE__; goto error_hndl; } - inbuf[1] = inbuf_free[1] - lb; + inbuf[1] = inbuf_free[1] - gap; } /* Handle MPI_IN_PLACE for size > 1 */ diff --git a/ompi/mca/coll/base/coll_base_scatter.c b/ompi/mca/coll/base/coll_base_scatter.c index bd4b1400783..b86cea4ca68 100644 --- a/ompi/mca/coll/base/coll_base_scatter.c +++ b/ompi/mca/coll/base/coll_base_scatter.c @@ -12,6 +12,8 @@ * All rights reserved. * Copyright (c) 2013 Los Alamos National Security, LLC. All rights * reserved. + * Copyright (c) 2015-2016 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -33,7 +35,7 @@ #include "coll_base_util.h" int -ompi_coll_base_scatter_intra_binomial(void *sbuf, int scount, +ompi_coll_base_scatter_intra_binomial( const void *sbuf, int scount, struct ompi_datatype_t *sdtype, void *rbuf, int rcount, struct ompi_datatype_t *rdtype, @@ -45,10 +47,10 @@ ompi_coll_base_scatter_intra_binomial(void *sbuf, int scount, char *ptmp, *tempbuf = NULL; ompi_coll_tree_t* bmtree; MPI_Status status; - MPI_Aint sextent, slb, strue_lb, strue_extent; - MPI_Aint rextent, rlb, rtrue_lb, rtrue_extent; mca_coll_base_module_t *base_module = (mca_coll_base_module_t*) module; mca_coll_base_comm_t *data = base_module->base_data; + ptrdiff_t sextent, rextent, ssize, rsize, sgap, rgap; + size = ompi_comm_size(comm); rank = ompi_comm_rank(comm); @@ -60,15 +62,16 @@ ompi_coll_base_scatter_intra_binomial(void *sbuf, int scount, COLL_BASE_UPDATE_IN_ORDER_BMTREE( comm, base_module, root ); bmtree = data->cached_in_order_bmtree; - ompi_datatype_get_extent(sdtype, &slb, &sextent); - ompi_datatype_get_true_extent(sdtype, &strue_lb, &strue_extent); - ompi_datatype_get_extent(rdtype, &rlb, &rextent); - ompi_datatype_get_true_extent(rdtype, &rtrue_lb, &rtrue_extent); + ompi_datatype_type_extent(rdtype, &rextent); + + rsize = opal_datatype_span(&rdtype->super, (int64_t)rcount * size, &rgap); vrank = (rank - root + size) % size; ptmp = (char *) rbuf; /* by default suppose leaf nodes, just use rbuf */ if (rank == root) { + ompi_datatype_type_extent(sdtype, &sextent); + ssize = opal_datatype_span(&sdtype->super, (int64_t)scount * size, &sgap); if (0 == root) { /* root on 0, just use the send buffer */ ptmp = (char *) sbuf; @@ -80,12 +83,11 @@ ompi_coll_base_scatter_intra_binomial(void *sbuf, int scount, } } else { /* root is not on 0, allocate temp buffer for send */ - tempbuf = (char *) malloc(strue_extent + ((ptrdiff_t)scount * (ptrdiff_t)size - 1) * sextent); + tempbuf = (char *) malloc(ssize); if (NULL == tempbuf) { err = OMPI_ERR_OUT_OF_RESOURCE; line = __LINE__; goto err_hndl; } - - ptmp = tempbuf - strue_lb; + ptmp = tempbuf - sgap; /* and rotate data so they will eventually in the right place */ err = ompi_datatype_copy_content_same_ddt(sdtype, (ptrdiff_t)scount * (ptrdiff_t)(size - root), @@ -108,12 +110,11 @@ ompi_coll_base_scatter_intra_binomial(void *sbuf, int scount, } else if (!(vrank % 2)) { /* non-root, non-leaf nodes, allocte temp buffer for recv * the most we need is rcount*size/2 */ - tempbuf = (char *) malloc(rtrue_extent + ((ptrdiff_t)rcount * (ptrdiff_t)size - 1) * rextent); + tempbuf = (char *) malloc(rsize); if (NULL == tempbuf) { err= OMPI_ERR_OUT_OF_RESOURCE; line = __LINE__; goto err_hndl; } - - ptmp = tempbuf - rtrue_lb; + ptmp = tempbuf - rgap; sdtype = rdtype; scount = rcount; @@ -193,7 +194,7 @@ ompi_coll_base_scatter_intra_binomial(void *sbuf, int scount, * Returns: - MPI_SUCCESS or error code */ int -ompi_coll_base_scatter_intra_basic_linear(void *sbuf, int scount, +ompi_coll_base_scatter_intra_basic_linear(const void *sbuf, int scount, struct ompi_datatype_t *sdtype, void *rbuf, int rcount, struct ompi_datatype_t *rdtype, @@ -202,7 +203,7 @@ ompi_coll_base_scatter_intra_basic_linear(void *sbuf, int scount, mca_coll_base_module_t *module) { int i, rank, size, err; - ptrdiff_t lb, incr; + ptrdiff_t incr; char *ptmp; /* Initialize */ @@ -221,7 +222,7 @@ ompi_coll_base_scatter_intra_basic_linear(void *sbuf, int scount, /* I am the root, loop sending data. */ - err = ompi_datatype_get_extent(sdtype, &lb, &incr); + err = ompi_datatype_type_extent(sdtype, &incr); if (OMPI_SUCCESS != err) { return OMPI_ERROR; } diff --git a/ompi/mca/coll/base/coll_base_util.c b/ompi/mca/coll/base/coll_base_util.c index 226457185bb..58f1f4f46e6 100644 --- a/ompi/mca/coll/base/coll_base_util.c +++ b/ompi/mca/coll/base/coll_base_util.c @@ -2,19 +2,19 @@ * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. - * Copyright (c) 2004-2015 The University of Tennessee and The University + * Copyright (c) 2004-2016 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. - * Copyright (c) 2014-2015 Research Organization for Information Science + * Copyright (c) 2014-2016 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -29,63 +29,43 @@ #include "ompi/mca/pml/pml.h" #include "coll_base_util.h" -int ompi_coll_base_sendrecv_nonzero_actual( void* sendbuf, size_t scount, +int ompi_coll_base_sendrecv_nonzero_actual( void* sendbuf, size_t scount, ompi_datatype_t* sdatatype, int dest, int stag, - void* recvbuf, size_t rcount, + void* recvbuf, size_t rcount, ompi_datatype_t* rdatatype, int source, int rtag, struct ompi_communicator_t* comm, ompi_status_public_t* status ) { /* post receive first, then send, then waitall... should be fast (I hope) */ - int err, line = 0, nreqs = 0; - size_t typesize; - ompi_request_t* reqs[2], **req = reqs; - ompi_status_public_t statuses[2]; + int err, line = 0; + size_t rtypesize, stypesize; + ompi_request_t *req; + ompi_status_public_t rstatus; /* post new irecv */ - ompi_datatype_type_size(rdatatype, &typesize); - if (0 != rcount && 0 != typesize) { - err = MCA_PML_CALL(irecv( recvbuf, rcount, rdatatype, source, rtag, - comm, req++)); - ++nreqs; + ompi_datatype_type_size(rdatatype, &rtypesize); + if (0 != rcount && 0 != rtypesize) { + err = MCA_PML_CALL(irecv( recvbuf, rcount, rdatatype, source, rtag, + comm, &req)); if (err != MPI_SUCCESS) { line = __LINE__; goto error_handler; } } /* send data to children */ - ompi_datatype_type_size(sdatatype, &typesize); - if (0 != scount && 0 != typesize) { - err = MCA_PML_CALL(isend( sendbuf, scount, sdatatype, dest, stag, - MCA_PML_BASE_SEND_STANDARD, comm, req++)); - ++nreqs; + ompi_datatype_type_size(sdatatype, &stypesize); + if (0 != scount && 0 != stypesize) { + err = MCA_PML_CALL(send( sendbuf, scount, sdatatype, dest, stag, + MCA_PML_BASE_SEND_STANDARD, comm)); if (err != MPI_SUCCESS) { line = __LINE__; goto error_handler; } } - if (0 != nreqs) { - err = ompi_request_wait_all( nreqs, reqs, statuses ); - if( MPI_ERR_IN_STATUS == err ) { - /* As we use wait_all we will get MPI_ERR_IN_STATUS which is not an error - * code that we can propagate up the stack. Instead, look for the real - * error code from the MPI_ERROR in the status. - */ - int err_index = 0; - if( MPI_SUCCESS == statuses[0].MPI_ERROR ) { - err_index = 1; - } - if (MPI_STATUS_IGNORE != status) { - *status = statuses[err_index]; - } - err = statuses[err_index].MPI_ERROR; - OPAL_OUTPUT ((ompi_coll_base_framework.framework_output, "%s:%d: Error %d occurred in the %s" - " stage of ompi_coll_base_sendrecv_zero\n", - __FILE__, line, err, (0 == err_index ? "receive" : "send"))); - return err; - } + if (0 != rcount && 0 != rtypesize) { + err = ompi_request_wait( &req, &rstatus); if (err != MPI_SUCCESS) { line = __LINE__; goto error_handler; } if (MPI_STATUS_IGNORE != status) { - *status = statuses[0]; + *status = rstatus; } } else { if( MPI_STATUS_IGNORE != status ) @@ -95,7 +75,7 @@ int ompi_coll_base_sendrecv_nonzero_actual( void* sendbuf, size_t scount, return (MPI_SUCCESS); error_handler: - /* Error discovered during the posting of the irecv or isend, + /* Error discovered during the posting of the irecv or send, * and no status is available. */ OPAL_OUTPUT ((ompi_coll_base_framework.framework_output, "%s:%d: Error %d occurred\n", diff --git a/ompi/mca/coll/base/coll_tags.h b/ompi/mca/coll/base/coll_tags.h index ca229ef8af4..45c9724dba3 100644 --- a/ompi/mca/coll/base/coll_tags.h +++ b/ompi/mca/coll/base/coll_tags.h @@ -5,14 +5,14 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ diff --git a/ompi/mca/coll/base/help-mca-coll-base.txt b/ompi/mca/coll/base/help-mca-coll-base.txt index f7c0faf863a..d6e0071fa7a 100644 --- a/ompi/mca/coll/base/help-mca-coll-base.txt +++ b/ompi/mca/coll/base/help-mca-coll-base.txt @@ -6,18 +6,19 @@ # Copyright (c) 2004-2005 The University of Tennessee and The University # of Tennessee Research Foundation. All rights # reserved. -# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, # University of Stuttgart. All rights reserved. # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. +# Copyright (c) 2015 Cisco Systems, Inc. All rights reserved. # $COPYRIGHT$ -# +# # Additional copyrights may follow -# +# # $HEADER$ # # This is the US/English help file for Open MPI MCA coll-specific -# error messages. +# error messages. # [comm-select:none-available] Although some coll components are available on your system, none of @@ -40,15 +41,7 @@ check that the "basic", "libnbc" and "self" coll components are available on your system -- check the output of the "ompi_info" command). #[comm-unselect:failed-finalize] A coll module failed to finalize properly when a communicator that was -using it was destroyed. +using it was destroyed. This is somewhat unusual: the module itself may be at fault, or this may be a symptom of another issue (e.g., a memory problem). -# -[comm-unselect:basic-failed-finalize] -The basic coll module failed to finalize properly when a communicator -that was using it was destroyed. This happened on the communicator -named "%s". - -This is extremely unusual and typically indicates some other kind of -problem (e.g., a memory problem) diff --git a/ompi/mca/coll/basic/Makefile.am b/ompi/mca/coll/basic/Makefile.am index 403637882ce..e0abe4f3211 100644 --- a/ompi/mca/coll/basic/Makefile.am +++ b/ompi/mca/coll/basic/Makefile.am @@ -5,7 +5,7 @@ # Copyright (c) 2004-2005 The University of Tennessee and The University # of Tennessee Research Foundation. All rights # reserved. -# Copyright (c) 2004-2009 High Performance Computing Center Stuttgart, +# Copyright (c) 2004-2009 High Performance Computing Center Stuttgart, # University of Stuttgart. All rights reserved. # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. @@ -14,9 +14,9 @@ # Copyright (c) 2013 Los Alamos National Security, LLC. All rights # reserved. # $COPYRIGHT$ -# +# # Additional copyrights may follow -# +# # $HEADER$ # diff --git a/ompi/mca/coll/basic/coll_basic.h b/ompi/mca/coll/basic/coll_basic.h index e0254167af3..b2e3e3e11b8 100644 --- a/ompi/mca/coll/basic/coll_basic.h +++ b/ompi/mca/coll/basic/coll_basic.h @@ -3,10 +3,10 @@ * Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. - * Copyright (c) 2004-2015 The University of Tennessee and The University + * Copyright (c) 2004-2016 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. @@ -14,10 +14,12 @@ * Copyright (c) 2012 Sandia National Laboratories. All rights reserved. * Copyright (c) 2013 Los Alamos National Security, LLC. All rights * reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -53,62 +55,62 @@ BEGIN_C_DECLS int mca_coll_basic_module_enable(mca_coll_base_module_t *module, struct ompi_communicator_t *comm); - int mca_coll_basic_allgather_inter(void *sbuf, int scount, + int mca_coll_basic_allgather_inter(const void *sbuf, int scount, struct ompi_datatype_t *sdtype, void *rbuf, int rcount, struct ompi_datatype_t *rdtype, struct ompi_communicator_t *comm, mca_coll_base_module_t *module); - int mca_coll_basic_allgatherv_inter(void *sbuf, int scount, + int mca_coll_basic_allgatherv_inter(const void *sbuf, int scount, struct ompi_datatype_t *sdtype, - void *rbuf, int *rcounts, - int *disps, + void *rbuf, const int *rcounts, + const int *disps, struct ompi_datatype_t *rdtype, struct ompi_communicator_t *comm, mca_coll_base_module_t *module); - int mca_coll_basic_allreduce_intra(void *sbuf, void *rbuf, int count, + int mca_coll_basic_allreduce_intra(const void *sbuf, void *rbuf, int count, struct ompi_datatype_t *dtype, struct ompi_op_t *op, struct ompi_communicator_t *comm, mca_coll_base_module_t *module); - int mca_coll_basic_allreduce_inter(void *sbuf, void *rbuf, int count, + int mca_coll_basic_allreduce_inter(const void *sbuf, void *rbuf, int count, struct ompi_datatype_t *dtype, struct ompi_op_t *op, struct ompi_communicator_t *comm, mca_coll_base_module_t *module); - int mca_coll_basic_alltoall_inter(void *sbuf, int scount, + int mca_coll_basic_alltoall_inter(const void *sbuf, int scount, struct ompi_datatype_t *sdtype, void *rbuf, int rcount, struct ompi_datatype_t *rdtype, struct ompi_communicator_t *comm, mca_coll_base_module_t *module); - int mca_coll_basic_alltoallv_inter(void *sbuf, int *scounts, - int *sdisps, + int mca_coll_basic_alltoallv_inter(const void *sbuf, const int *scounts, + const int *sdisps, struct ompi_datatype_t *sdtype, - void *rbuf, int *rcounts, - int *rdisps, + void *rbuf, const int *rcounts, + const int *rdisps, struct ompi_datatype_t *rdtype, struct ompi_communicator_t *comm, mca_coll_base_module_t *module); - int mca_coll_basic_alltoallw_intra(void *sbuf, int *scounts, - int *sdisps, - struct ompi_datatype_t **sdtypes, - void *rbuf, int *rcounts, - int *rdisps, - struct ompi_datatype_t **rdtypes, + int mca_coll_basic_alltoallw_intra(const void *sbuf, const int *scounts, + const int *sdisps, + struct ompi_datatype_t * const *sdtypes, + void *rbuf, const int *rcounts, + const int *rdisps, + struct ompi_datatype_t * const *rdtypes, struct ompi_communicator_t *comm, mca_coll_base_module_t *module); - int mca_coll_basic_alltoallw_inter(void *sbuf, int *scounts, - int *sdisps, - struct ompi_datatype_t **sdtypes, - void *rbuf, int *rcounts, - int *rdisps, - struct ompi_datatype_t **rdtypes, + int mca_coll_basic_alltoallw_inter(const void *sbuf, const int *scounts, + const int *sdisps, + struct ompi_datatype_t * const *sdtypes, + void *rbuf, const int *rcounts, + const int *rdisps, + struct ompi_datatype_t * const *rdtypes, struct ompi_communicator_t *comm, mca_coll_base_module_t *module); @@ -136,19 +138,19 @@ BEGIN_C_DECLS struct ompi_communicator_t *comm, mca_coll_base_module_t *module); - int mca_coll_basic_exscan_intra(void *sbuf, void *rbuf, int count, + int mca_coll_basic_exscan_intra(const void *sbuf, void *rbuf, int count, struct ompi_datatype_t *dtype, struct ompi_op_t *op, struct ompi_communicator_t *comm, mca_coll_base_module_t *module); - int mca_coll_basic_exscan_inter(void *sbuf, void *rbuf, int count, + int mca_coll_basic_exscan_inter(const void *sbuf, void *rbuf, int count, struct ompi_datatype_t *dtype, struct ompi_op_t *op, struct ompi_communicator_t *comm, mca_coll_base_module_t *module); - int mca_coll_basic_gather_inter(void *sbuf, int scount, + int mca_coll_basic_gather_inter(const void *sbuf, int scount, struct ompi_datatype_t *sdtype, void *rbuf, int rcount, struct ompi_datatype_t *rdtype, @@ -156,82 +158,82 @@ BEGIN_C_DECLS struct ompi_communicator_t *comm, mca_coll_base_module_t *module); - int mca_coll_basic_gatherv_intra(void *sbuf, int scount, + int mca_coll_basic_gatherv_intra(const void *sbuf, int scount, struct ompi_datatype_t *sdtype, - void *rbuf, int *rcounts, int *disps, + void *rbuf, const int *rcounts, const int *disps, struct ompi_datatype_t *rdtype, int root, struct ompi_communicator_t *comm, mca_coll_base_module_t *module); - int mca_coll_basic_gatherv_inter(void *sbuf, int scount, + int mca_coll_basic_gatherv_inter(const void *sbuf, int scount, struct ompi_datatype_t *sdtype, - void *rbuf, int *rcounts, int *disps, + void *rbuf, const int *rcounts, const int *disps, struct ompi_datatype_t *rdtype, int root, struct ompi_communicator_t *comm, mca_coll_base_module_t *module); - int mca_coll_basic_reduce_lin_inter(void *sbuf, void *rbuf, int count, + int mca_coll_basic_reduce_lin_inter(const void *sbuf, void *rbuf, int count, struct ompi_datatype_t *dtype, struct ompi_op_t *op, int root, struct ompi_communicator_t *comm, mca_coll_base_module_t *module); - int mca_coll_basic_reduce_log_intra(void *sbuf, void *rbuf, int count, + int mca_coll_basic_reduce_log_intra(const void *sbuf, void *rbuf, int count, struct ompi_datatype_t *dtype, struct ompi_op_t *op, int root, struct ompi_communicator_t *comm, mca_coll_base_module_t *module); - int mca_coll_basic_reduce_log_inter(void *sbuf, void *rbuf, int count, + int mca_coll_basic_reduce_log_inter(const void *sbuf, void *rbuf, int count, struct ompi_datatype_t *dtype, struct ompi_op_t *op, int root, struct ompi_communicator_t *comm, mca_coll_base_module_t *module); - int mca_coll_basic_reduce_scatter_block_intra(void *sbuf, void *rbuf, + int mca_coll_basic_reduce_scatter_block_intra(const void *sbuf, void *rbuf, int rcount, struct ompi_datatype_t *dtype, struct ompi_op_t *op, struct ompi_communicator_t *comm, mca_coll_base_module_t *module); - int mca_coll_basic_reduce_scatter_block_inter(void *sbuf, void *rbuf, + int mca_coll_basic_reduce_scatter_block_inter(const void *sbuf, void *rbuf, int rcount, struct ompi_datatype_t *dtype, struct ompi_op_t *op, struct ompi_communicator_t *comm, mca_coll_base_module_t *module); - int mca_coll_basic_reduce_scatter_intra(void *sbuf, void *rbuf, - int *rcounts, + int mca_coll_basic_reduce_scatter_intra(const void *sbuf, void *rbuf, + const int *rcounts, struct ompi_datatype_t *dtype, struct ompi_op_t *op, struct ompi_communicator_t *comm, mca_coll_base_module_t *module); - int mca_coll_basic_reduce_scatter_inter(void *sbuf, void *rbuf, - int *rcounts, + int mca_coll_basic_reduce_scatter_inter(const void *sbuf, void *rbuf, + const int *rcounts, struct ompi_datatype_t *dtype, struct ompi_op_t *op, struct ompi_communicator_t *comm, mca_coll_base_module_t *module); - int mca_coll_basic_scan_intra(void *sbuf, void *rbuf, int count, + int mca_coll_basic_scan_intra(const void *sbuf, void *rbuf, int count, struct ompi_datatype_t *dtype, struct ompi_op_t *op, struct ompi_communicator_t *comm, mca_coll_base_module_t *module); - int mca_coll_basic_scan_inter(void *sbuf, void *rbuf, int count, + int mca_coll_basic_scan_inter(const void *sbuf, void *rbuf, int count, struct ompi_datatype_t *dtype, struct ompi_op_t *op, struct ompi_communicator_t *comm, mca_coll_base_module_t *module); - int mca_coll_basic_scatter_inter(void *sbuf, int scount, + int mca_coll_basic_scatter_inter(const void *sbuf, int scount, struct ompi_datatype_t *sdtype, void *rbuf, int rcount, struct ompi_datatype_t *rdtype, @@ -239,14 +241,14 @@ BEGIN_C_DECLS struct ompi_communicator_t *comm, mca_coll_base_module_t *module); - int mca_coll_basic_scatterv_intra(void *sbuf, int *scounts, int *disps, + int mca_coll_basic_scatterv_intra(const void *sbuf, const int *scounts, const int *disps, struct ompi_datatype_t *sdtype, void *rbuf, int rcount, struct ompi_datatype_t *rdtype, int root, struct ompi_communicator_t *comm, mca_coll_base_module_t *module); - int mca_coll_basic_scatterv_inter(void *sbuf, int *scounts, int *disps, + int mca_coll_basic_scatterv_inter(const void *sbuf, const int *scounts, const int *disps, struct ompi_datatype_t *sdtype, void *rbuf, int rcount, struct ompi_datatype_t *rdtype, @@ -254,52 +256,38 @@ BEGIN_C_DECLS struct ompi_communicator_t *comm, mca_coll_base_module_t *module); - int mca_coll_basic_neighbor_allgather(void *sbuf, int scount, + int mca_coll_basic_neighbor_allgather(const void *sbuf, int scount, struct ompi_datatype_t *sdtype, void *rbuf, int rcount, struct ompi_datatype_t *rdtype, struct ompi_communicator_t *comm, mca_coll_base_module_t *module); - int mca_coll_basic_neighbor_allgatherv(void *sbuf, int scount, struct ompi_datatype_t *sdtype, - void *rbuf, int rcounts[], int disps[], struct ompi_datatype_t *rdtype, + int mca_coll_basic_neighbor_allgatherv(const void *sbuf, int scount, struct ompi_datatype_t *sdtype, + void *rbuf, const int rcounts[], const int disps[], struct ompi_datatype_t *rdtype, struct ompi_communicator_t *comm, mca_coll_base_module_t *module); - int mca_coll_basic_neighbor_alltoall(void *sbuf, int scount, struct ompi_datatype_t *sdtype, void *rbuf, + int mca_coll_basic_neighbor_alltoall(const void *sbuf, int scount, struct ompi_datatype_t *sdtype, void *rbuf, int rcount, struct ompi_datatype_t *rdtype, struct ompi_communicator_t *comm, mca_coll_base_module_t *module); - int mca_coll_basic_neighbor_alltoallv(void *sbuf, int scounts[], int sdisps[], - struct ompi_datatype_t *sdtype, void *rbuf, int rcounts[], - int rdisps[], struct ompi_datatype_t *rdtype, + int mca_coll_basic_neighbor_alltoallv(const void *sbuf, const int scounts[], const int sdisps[], + struct ompi_datatype_t *sdtype, void *rbuf, const int rcounts[], + const int rdisps[], struct ompi_datatype_t *rdtype, struct ompi_communicator_t *comm, mca_coll_base_module_t *module); - int mca_coll_basic_neighbor_alltoallw(void *sbuf, int scounts[], MPI_Aint sdisps[], - struct ompi_datatype_t *sdtypes[], void *rbuf, int rcounts[], - MPI_Aint rdisps[], struct ompi_datatype_t *rdtypes[], + int mca_coll_basic_neighbor_alltoallw(const void *sbuf, const int scounts[], const MPI_Aint sdisps[], + struct ompi_datatype_t * const *sdtypes, void *rbuf, const int rcounts[], + const MPI_Aint rdisps[], struct ompi_datatype_t * const *rdtypes, struct ompi_communicator_t *comm, mca_coll_base_module_t *module); int mca_coll_basic_ft_event(int status); -/* Utility functions */ - - static inline void mca_coll_basic_free_reqs(ompi_request_t ** reqs, - int count) - { - int i; - for (i = 0; i < count; ++i) - ompi_request_free(&reqs[i]); - } - - struct mca_coll_basic_module_t { mca_coll_base_module_t super; - - ompi_request_t **mccb_reqs; - int mccb_num_reqs; }; typedef struct mca_coll_basic_module_t mca_coll_basic_module_t; -OBJ_CLASS_DECLARATION(mca_coll_basic_module_t); +OMPI_DECLSPEC OBJ_CLASS_DECLARATION(mca_coll_basic_module_t); END_C_DECLS diff --git a/ompi/mca/coll/basic/coll_basic_allgather.c b/ompi/mca/coll/basic/coll_basic_allgather.c index 1ad3a700782..efda8bbe91f 100644 --- a/ompi/mca/coll/basic/coll_basic_allgather.c +++ b/ompi/mca/coll/basic/coll_basic_allgather.c @@ -2,19 +2,19 @@ * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. - * Copyright (c) 2004-2015 The University of Tennessee and The University + * Copyright (c) 2004-2016 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. - * Copyright (c) 2014 Research Organization for Information Science + * Copyright (c) 2014-2016 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -40,19 +40,19 @@ * Returns: - MPI_SUCCESS or error code */ int -mca_coll_basic_allgather_inter(void *sbuf, int scount, +mca_coll_basic_allgather_inter(const void *sbuf, int scount, struct ompi_datatype_t *sdtype, void *rbuf, int rcount, struct ompi_datatype_t *rdtype, struct ompi_communicator_t *comm, mca_coll_base_module_t *module) { - int rank, root = 0, size, rsize, err, i; - char *tmpbuf = NULL, *ptmp; - ptrdiff_t rlb, slb, rextent, sextent, incr; + int rank, root = 0, size, rsize, err, i, line; + char *tmpbuf_free = NULL, *tmpbuf, *ptmp; + ptrdiff_t rlb, rextent, incr; + ptrdiff_t gap, span; ompi_request_t *req; - mca_coll_basic_module_t *basic_module = (mca_coll_basic_module_t*) module; - ompi_request_t **reqs = basic_module->mccb_reqs; + ompi_request_t **reqs = NULL; rank = ompi_comm_rank(comm); size = ompi_comm_size(comm); @@ -61,7 +61,7 @@ mca_coll_basic_allgather_inter(void *sbuf, int scount, /* Algorithm: * - a gather to the root in remote group (simultaniously executed, * thats why we cannot use coll_gather). - * - exchange the temp-results between two roots + * - exchange the temp-results between two roots * - inter-bcast (again simultanious). */ @@ -71,35 +71,27 @@ mca_coll_basic_allgather_inter(void *sbuf, int scount, err = MCA_PML_CALL(send(sbuf, scount, sdtype, root, MCA_COLL_BASE_TAG_ALLGATHER, MCA_PML_BASE_SEND_STANDARD, comm)); - if (OMPI_SUCCESS != err) { - return err; - } + if (OMPI_SUCCESS != err) { line = __LINE__; goto exit; } } else { /* receive a msg. from all other procs. */ err = ompi_datatype_get_extent(rdtype, &rlb, &rextent); - if (OMPI_SUCCESS != err) { - return err; - } - err = ompi_datatype_get_extent(sdtype, &slb, &sextent); - if (OMPI_SUCCESS != err) { - return err; - } + if (OMPI_SUCCESS != err) { line = __LINE__; goto exit; } + + /* Get a requests arrays of the right size */ + reqs = coll_base_comm_get_reqs(module->base_data, rsize + 1); + if( NULL == reqs ) { line = __LINE__; err = OMPI_ERR_OUT_OF_RESOURCE; goto exit; } /* Do a send-recv between the two root procs. to avoid deadlock */ err = MCA_PML_CALL(isend(sbuf, scount, sdtype, 0, MCA_COLL_BASE_TAG_ALLGATHER, MCA_PML_BASE_SEND_STANDARD, comm, &reqs[rsize])); - if (OMPI_SUCCESS != err) { - return err; - } + if (OMPI_SUCCESS != err) { line = __LINE__; goto exit; } err = MCA_PML_CALL(irecv(rbuf, rcount, rdtype, 0, MCA_COLL_BASE_TAG_ALLGATHER, comm, &reqs[0])); - if (OMPI_SUCCESS != err) { - return err; - } + if (OMPI_SUCCESS != err) { line = __LINE__; goto exit; } incr = rextent * rcount; ptmp = (char *) rbuf + incr; @@ -107,55 +99,43 @@ mca_coll_basic_allgather_inter(void *sbuf, int scount, err = MCA_PML_CALL(irecv(ptmp, rcount, rdtype, i, MCA_COLL_BASE_TAG_ALLGATHER, comm, &reqs[i])); - if (MPI_SUCCESS != err) { - return err; - } + if (MPI_SUCCESS != err) { line = __LINE__; goto exit; } } err = ompi_request_wait_all(rsize + 1, reqs, MPI_STATUSES_IGNORE); - if (OMPI_SUCCESS != err) { - return err; - } + if (OMPI_SUCCESS != err) { line = __LINE__; goto exit; } /* Step 2: exchange the resuts between the root processes */ - tmpbuf = (char *) malloc(scount * size * sextent); - if (NULL == tmpbuf) { - return err; - } + span = opal_datatype_span(&sdtype->super, (int64_t)scount * (int64_t)size, &gap); + tmpbuf_free = (char *) malloc(span); + if (NULL == tmpbuf_free) { line = __LINE__; err = OMPI_ERR_OUT_OF_RESOURCE; goto exit; } + tmpbuf = tmpbuf_free - gap; err = MCA_PML_CALL(isend(rbuf, rsize * rcount, rdtype, 0, MCA_COLL_BASE_TAG_ALLGATHER, MCA_PML_BASE_SEND_STANDARD, comm, &req)); - if (OMPI_SUCCESS != err) { - goto exit; - } + if (OMPI_SUCCESS != err) { line = __LINE__; goto exit; } err = MCA_PML_CALL(recv(tmpbuf, size * scount, sdtype, 0, MCA_COLL_BASE_TAG_ALLGATHER, comm, MPI_STATUS_IGNORE)); - if (OMPI_SUCCESS != err) { - goto exit; - } + if (OMPI_SUCCESS != err) { line = __LINE__; goto exit; } err = ompi_request_wait( &req, MPI_STATUS_IGNORE); - if (OMPI_SUCCESS != err) { - goto exit; - } + if (OMPI_SUCCESS != err) { line = __LINE__; goto exit; } } - /* Step 3: bcast the data to the remote group. This - * happens in both groups simultaniously, thus we can - * not use coll_bcast (this would deadlock). + /* Step 3: bcast the data to the remote group. This + * happens in both groups simultaneously, thus we can + * not use coll_bcast (this would deadlock). */ if (rank != root) { /* post the recv */ err = MCA_PML_CALL(recv(rbuf, rsize * rcount, rdtype, 0, MCA_COLL_BASE_TAG_ALLGATHER, comm, MPI_STATUS_IGNORE)); - if (OMPI_SUCCESS != err) { - goto exit; - } + if (OMPI_SUCCESS != err) { line = __LINE__; goto exit; } } else { /* Send the data to every other process in the remote group @@ -165,21 +145,21 @@ mca_coll_basic_allgather_inter(void *sbuf, int scount, MCA_COLL_BASE_TAG_ALLGATHER, MCA_PML_BASE_SEND_STANDARD, comm, &reqs[i - 1])); - if (OMPI_SUCCESS != err) { - goto exit; - } - + if (OMPI_SUCCESS != err) { line = __LINE__; goto exit; } } err = ompi_request_wait_all(rsize - 1, reqs, MPI_STATUSES_IGNORE); - if (OMPI_SUCCESS != err) { - goto exit; - } + if (OMPI_SUCCESS != err) { line = __LINE__; goto exit; } } exit: - if (NULL != tmpbuf) { - free(tmpbuf); + if( MPI_SUCCESS != err ) { + OPAL_OUTPUT( (ompi_coll_base_framework.framework_output,"%s:%4d\tError occurred %d, rank %2d", + __FILE__, line, err, rank) ); + if( NULL != reqs ) ompi_coll_base_free_reqs(reqs, rsize+1); + } + if (NULL != tmpbuf_free) { + free(tmpbuf_free); } return err; diff --git a/ompi/mca/coll/basic/coll_basic_allgatherv.c b/ompi/mca/coll/basic/coll_basic_allgatherv.c index 329eaf8ad8f..027499298d8 100644 --- a/ompi/mca/coll/basic/coll_basic_allgatherv.c +++ b/ompi/mca/coll/basic/coll_basic_allgatherv.c @@ -5,15 +5,17 @@ * Copyright (c) 2004-2015 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2012 Oak Ridge National Labs. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -36,9 +38,9 @@ * Returns: - MPI_SUCCESS or error code */ int -mca_coll_basic_allgatherv_inter(void *sbuf, int scount, +mca_coll_basic_allgatherv_inter(const void *sbuf, int scount, struct ompi_datatype_t *sdtype, - void *rbuf, int *rcounts, int *disps, + void *rbuf, const int *rcounts, const int *disps, struct ompi_datatype_t *rdtype, struct ompi_communicator_t *comm, mca_coll_base_module_t *module) diff --git a/ompi/mca/coll/basic/coll_basic_allreduce.c b/ompi/mca/coll/basic/coll_basic_allreduce.c index 9978297a0de..d697d71c936 100644 --- a/ompi/mca/coll/basic/coll_basic_allreduce.c +++ b/ompi/mca/coll/basic/coll_basic_allreduce.c @@ -2,17 +2,19 @@ * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. - * Copyright (c) 2004-2014 The University of Tennessee and The University + * Copyright (c) 2004-2015 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -37,7 +39,7 @@ * Returns: - MPI_SUCCESS or error code */ int -mca_coll_basic_allreduce_intra(void *sbuf, void *rbuf, int count, +mca_coll_basic_allreduce_intra(const void *sbuf, void *rbuf, int count, struct ompi_datatype_t *dtype, struct ompi_op_t *op, struct ompi_communicator_t *comm, @@ -72,19 +74,17 @@ mca_coll_basic_allreduce_intra(void *sbuf, void *rbuf, int count, * Returns: - MPI_SUCCESS or error code */ int -mca_coll_basic_allreduce_inter(void *sbuf, void *rbuf, int count, +mca_coll_basic_allreduce_inter(const void *sbuf, void *rbuf, int count, struct ompi_datatype_t *dtype, struct ompi_op_t *op, struct ompi_communicator_t *comm, mca_coll_base_module_t *module) { - int err, i, rank, root = 0, rsize; - ptrdiff_t lb, extent; - ptrdiff_t true_lb, true_extent; + int err, i, rank, root = 0, rsize, line; + ptrdiff_t extent, dsize, gap; char *tmpbuf = NULL, *pml_buffer = NULL; ompi_request_t *req[2]; - mca_coll_basic_module_t *basic_module = (mca_coll_basic_module_t*) module; - ompi_request_t **reqs = basic_module->mccb_reqs; + ompi_request_t **reqs = NULL; rank = ompi_comm_rank(comm); rsize = ompi_comm_remote_size(comm); @@ -99,51 +99,41 @@ mca_coll_basic_allreduce_inter(void *sbuf, void *rbuf, int count, * simultaniously. */ /*****************************************************************/ if (rank == root) { - err = ompi_datatype_get_extent(dtype, &lb, &extent); - if (OMPI_SUCCESS != err) { - return OMPI_ERROR; - } - err = ompi_datatype_get_true_extent(dtype, &true_lb, &true_extent); + err = ompi_datatype_type_extent(dtype, &extent); if (OMPI_SUCCESS != err) { return OMPI_ERROR; } + dsize = opal_datatype_span(&dtype->super, count, &gap); + tmpbuf = (char *) malloc(dsize); + if (NULL == tmpbuf) { err = OMPI_ERR_OUT_OF_RESOURCE; line = __LINE__; goto exit; } + pml_buffer = tmpbuf - gap; - tmpbuf = (char *) malloc(true_extent + (count - 1) * extent); - if (NULL == tmpbuf) { - return OMPI_ERR_OUT_OF_RESOURCE; + if (rsize > 1) { + reqs = coll_base_comm_get_reqs(module->base_data, rsize - 1); + if( NULL == reqs ) { err = OMPI_ERR_OUT_OF_RESOURCE; line = __LINE__; goto exit; } } - pml_buffer = tmpbuf - true_lb; /* Do a send-recv between the two root procs. to avoid deadlock */ err = MCA_PML_CALL(irecv(rbuf, count, dtype, 0, MCA_COLL_BASE_TAG_ALLREDUCE, comm, &(req[0]))); - if (OMPI_SUCCESS != err) { - goto exit; - } + if (OMPI_SUCCESS != err) { line = __LINE__; goto exit; } err = MCA_PML_CALL(isend(sbuf, count, dtype, 0, MCA_COLL_BASE_TAG_ALLREDUCE, MCA_PML_BASE_SEND_STANDARD, comm, &(req[1]))); - if (OMPI_SUCCESS != err) { - goto exit; - } + if (OMPI_SUCCESS != err) { line = __LINE__; goto exit; } err = ompi_request_wait_all(2, req, MPI_STATUSES_IGNORE); - if (OMPI_SUCCESS != err) { - goto exit; - } - + if (OMPI_SUCCESS != err) { line = __LINE__; goto exit; } /* Loop receiving and calling reduction function (C or Fortran). */ for (i = 1; i < rsize; i++) { err = MCA_PML_CALL(recv(pml_buffer, count, dtype, i, MCA_COLL_BASE_TAG_ALLREDUCE, comm, MPI_STATUS_IGNORE)); - if (MPI_SUCCESS != err) { - goto exit; - } + if (OMPI_SUCCESS != err) { line = __LINE__; goto exit; } /* Perform the reduction */ ompi_op_reduce(op, pml_buffer, rbuf, count, dtype); @@ -153,15 +143,13 @@ mca_coll_basic_allreduce_inter(void *sbuf, void *rbuf, int count, err = MCA_PML_CALL(send(sbuf, count, dtype, root, MCA_COLL_BASE_TAG_ALLREDUCE, MCA_PML_BASE_SEND_STANDARD, comm)); - if (OMPI_SUCCESS != err) { - goto exit; - } + if (OMPI_SUCCESS != err) { line = __LINE__; goto exit; } } /* now we have on one process the result of the remote group. To distribute * the data to all processes in the local group, we exchange the data between - * the two root processes. They then send it to every other process in the + * the two root processes. They then send it to every other process in the * remote group. */ /***************************************************************************/ if (rank == root) { @@ -169,26 +157,21 @@ mca_coll_basic_allreduce_inter(void *sbuf, void *rbuf, int count, err = MCA_PML_CALL(irecv(pml_buffer, count, dtype, 0, MCA_COLL_BASE_TAG_ALLREDUCE, comm, &(req[1]))); - if (OMPI_SUCCESS != err) { - goto exit; - } + if (OMPI_SUCCESS != err) { line = __LINE__; goto exit; } err = MCA_PML_CALL(isend(rbuf, count, dtype, 0, MCA_COLL_BASE_TAG_ALLREDUCE, MCA_PML_BASE_SEND_STANDARD, comm, &(req[0]))); - if (OMPI_SUCCESS != err) { - goto exit; - } + if (OMPI_SUCCESS != err) { line = __LINE__; goto exit; } + err = ompi_request_wait_all(2, req, MPI_STATUSES_IGNORE); - if (OMPI_SUCCESS != err) { - goto exit; - } + if (OMPI_SUCCESS != err) { line = __LINE__; goto exit; } /* distribute the data to other processes in remote group. * Note that we start from 1 (not from zero), since zero - * has already the correct data AND we avoid a potential - * deadlock here. + * has already the correct data AND we avoid a potential + * deadlock here. */ if (rsize > 1) { for (i = 1; i < rsize; i++) { @@ -196,17 +179,13 @@ mca_coll_basic_allreduce_inter(void *sbuf, void *rbuf, int count, MCA_COLL_BASE_TAG_ALLREDUCE, MCA_PML_BASE_SEND_STANDARD, comm, &reqs[i - 1])); - if (OMPI_SUCCESS != err) { - goto exit; - } + if (OMPI_SUCCESS != err) { line = __LINE__; goto exit; } } err = ompi_request_wait_all(rsize - 1, reqs, MPI_STATUSES_IGNORE); - if (OMPI_SUCCESS != err) { - goto exit; - } + if (OMPI_SUCCESS != err) { line = __LINE__; goto exit; } } } else { err = MCA_PML_CALL(recv(rbuf, count, dtype, root, @@ -215,10 +194,14 @@ mca_coll_basic_allreduce_inter(void *sbuf, void *rbuf, int count, } exit: + if( MPI_SUCCESS != err ) { + OPAL_OUTPUT((ompi_coll_base_framework.framework_output,"%s:%4d\tError occurred %d, rank %2d", __FILE__, + line, err, rank)); + ompi_coll_base_free_reqs(reqs, rsize - 1); + } if (NULL != tmpbuf) { free(tmpbuf); } - return err; } diff --git a/ompi/mca/coll/basic/coll_basic_alltoall.c b/ompi/mca/coll/basic/coll_basic_alltoall.c index 6f46594b406..acb08b8455c 100644 --- a/ompi/mca/coll/basic/coll_basic_alltoall.c +++ b/ompi/mca/coll/basic/coll_basic_alltoall.c @@ -3,21 +3,21 @@ * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. - * Copyright (c) 2004-2015 The University of Tennessee and The University + * Copyright (c) 2004-2016 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2013 Los Alamos National Security, LLC. All rights * reserved. - * Copyright (c) 2014 Research Organization for Information Science + * Copyright (c) 2014-2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -35,12 +35,12 @@ /* * alltoall_inter * - * Function: - MPI_Alltoall + * Function: - MPI_Alltoall * Accepts: - same as MPI_Alltoall() * Returns: - MPI_SUCCESS or an MPI error code */ int -mca_coll_basic_alltoall_inter(void *sbuf, int scount, +mca_coll_basic_alltoall_inter(const void *sbuf, int scount, struct ompi_datatype_t *sdtype, void *rbuf, int rcount, struct ompi_datatype_t *rdtype, @@ -57,11 +57,7 @@ mca_coll_basic_alltoall_inter(void *sbuf, int scount, MPI_Aint sndinc; MPI_Aint rcvinc; - ompi_request_t **req; - ompi_request_t **sreq; - ompi_request_t **rreq; - - mca_coll_basic_module_t *basic_module = (mca_coll_basic_module_t*) module; + ompi_request_t **req, **sreq, **rreq; /* Initialize. */ @@ -81,7 +77,8 @@ mca_coll_basic_alltoall_inter(void *sbuf, int scount, /* Initiate all send/recv to/from others. */ nreqs = size * 2; - req = rreq = basic_module->mccb_reqs; + req = rreq = coll_base_comm_get_reqs( module->base_data, nreqs); + if( NULL == req ) { return OMPI_ERR_OUT_OF_RESOURCE; } sreq = rreq + size; prcv = (char *) rbuf; @@ -92,6 +89,7 @@ mca_coll_basic_alltoall_inter(void *sbuf, int scount, err = MCA_PML_CALL(irecv(prcv + (i * rcvinc), rcount, rdtype, i, MCA_COLL_BASE_TAG_ALLTOALL, comm, rreq)); if (OMPI_SUCCESS != err) { + ompi_coll_base_free_reqs(req, i + 1); return err; } } @@ -102,6 +100,7 @@ mca_coll_basic_alltoall_inter(void *sbuf, int scount, MCA_COLL_BASE_TAG_ALLTOALL, MCA_PML_BASE_SEND_STANDARD, comm, sreq)); if (OMPI_SUCCESS != err) { + ompi_coll_base_free_reqs(req, i + size + 1); return err; } } @@ -113,6 +112,9 @@ mca_coll_basic_alltoall_inter(void *sbuf, int scount, * So free them anyway -- even if there was an error, and return * the error after we free everything. */ err = ompi_request_wait_all(nreqs, req, MPI_STATUSES_IGNORE); + if (OMPI_SUCCESS != err) { + ompi_coll_base_free_reqs(req, nreqs); + } /* All done */ return err; diff --git a/ompi/mca/coll/basic/coll_basic_alltoallv.c b/ompi/mca/coll/basic/coll_basic_alltoallv.c index 77245cb4e44..aa66aa3c075 100644 --- a/ompi/mca/coll/basic/coll_basic_alltoallv.c +++ b/ompi/mca/coll/basic/coll_basic_alltoallv.c @@ -3,22 +3,22 @@ * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. - * Copyright (c) 2004-2015 The University of Tennessee and The University + * Copyright (c) 2004-2016 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2013 Los Alamos National Security, LLC. All rights * reserved. * Copyright (c) 2013 FUJITSU LIMITED. All rights reserved. - * Copyright (c) 2014 Research Organization for Information Science + * Copyright (c) 2014-2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -41,9 +41,9 @@ * Returns: - MPI_SUCCESS or an MPI error code */ int -mca_coll_basic_alltoallv_inter(void *sbuf, int *scounts, int *sdisps, +mca_coll_basic_alltoallv_inter(const void *sbuf, const int *scounts, const int *sdisps, struct ompi_datatype_t *sdtype, void *rbuf, - int *rcounts, int *rdisps, + const int *rcounts, const int *rdisps, struct ompi_datatype_t *rdtype, struct ompi_communicator_t *comm, mca_coll_base_module_t *module) @@ -57,8 +57,7 @@ mca_coll_basic_alltoallv_inter(void *sbuf, int *scounts, int *sdisps, MPI_Aint sndextent; MPI_Aint rcvextent; - mca_coll_basic_module_t *basic_module = (mca_coll_basic_module_t*) module; - ompi_request_t **preq = basic_module->mccb_reqs; + ompi_request_t **preq; /* Initialize. */ @@ -69,6 +68,8 @@ mca_coll_basic_alltoallv_inter(void *sbuf, int *scounts, int *sdisps, /* Initiate all send/recv to/from others. */ nreqs = rsize * 2; + preq = coll_base_comm_get_reqs(module->base_data, nreqs); + if( NULL == preq ) { return OMPI_ERR_OUT_OF_RESOURCE; } /* Post all receives first */ /* A simple optimization: do not send and recv msgs of length zero */ @@ -79,10 +80,9 @@ mca_coll_basic_alltoallv_inter(void *sbuf, int *scounts, int *sdisps, i, MCA_COLL_BASE_TAG_ALLTOALLV, comm, &preq[i])); if (MPI_SUCCESS != err) { + ompi_coll_base_free_reqs(preq, i + 1); return err; } - } else { - preq[i] = MPI_REQUEST_NULL; } } @@ -95,14 +95,16 @@ mca_coll_basic_alltoallv_inter(void *sbuf, int *scounts, int *sdisps, MCA_PML_BASE_SEND_STANDARD, comm, &preq[rsize + i])); if (MPI_SUCCESS != err) { + ompi_coll_base_free_reqs(preq, rsize + i + 1); return err; } - } else { - preq[rsize + i] = MPI_REQUEST_NULL; } } err = ompi_request_wait_all(nreqs, preq, MPI_STATUSES_IGNORE); + if (MPI_SUCCESS != err) { + ompi_coll_base_free_reqs(preq, nreqs); + } /* All done */ return err; diff --git a/ompi/mca/coll/basic/coll_basic_alltoallw.c b/ompi/mca/coll/basic/coll_basic_alltoallw.c index 9f85da09277..9dabfc52bf6 100644 --- a/ompi/mca/coll/basic/coll_basic_alltoallw.c +++ b/ompi/mca/coll/basic/coll_basic_alltoallw.c @@ -3,10 +3,10 @@ * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. - * Copyright (c) 2004-2005 The University of Tennessee and The University + * Copyright (c) 2004-2016 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. @@ -14,13 +14,13 @@ * Copyright (c) 2013 Los Alamos National Security, LLC. All rights * reserved. * Copyright (c) 2013 FUJITSU LIMITED. All rights reserved. - * Copyright (c) 2014 Research Organization for Information Science + * Copyright (c) 2014-2016 Research Organization for Information Science * and Technology (RIST). All rights reserved. * Copyright (c) 2014 Cisco Systems, Inc. All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -36,16 +36,15 @@ static int -mca_coll_basic_alltoallw_intra_inplace(void *rbuf, int *rcounts, const int *rdisps, +mca_coll_basic_alltoallw_intra_inplace(const void *rbuf, const int *rcounts, const int *rdisps, struct ompi_datatype_t * const *rdtypes, struct ompi_communicator_t *comm, mca_coll_base_module_t *module) { - mca_coll_basic_module_t *basic_module = (mca_coll_basic_module_t*) module; - int i, j, size, rank, err=MPI_SUCCESS, max_size; - MPI_Request *preq; - char *tmp_buffer; - ptrdiff_t ext; + int i, j, size, rank, err = MPI_SUCCESS, max_size; + ompi_request_t *req; + char *tmp_buffer, *save_buffer = NULL; + ptrdiff_t ext, gap; /* Initialize. */ @@ -59,17 +58,17 @@ mca_coll_basic_alltoallw_intra_inplace(void *rbuf, int *rcounts, const int *rdis /* Find the largest receive amount */ for (i = 0, max_size = 0 ; i < size ; ++i) { - ompi_datatype_type_extent (rdtypes[i], &ext); - ext *= rcounts[i]; + ext = opal_datatype_span(&rdtypes[i]->super, rcounts[i], &gap); max_size = ext > max_size ? ext : max_size; } /* Allocate a temporary buffer */ - tmp_buffer = calloc (max_size, 1); + tmp_buffer = save_buffer = calloc (max_size, 1); if (NULL == tmp_buffer) { return OMPI_ERR_OUT_OF_RESOURCE; } + tmp_buffer -= gap; /* in-place alltoallw slow algorithm (but works) */ for (i = 0 ; i < size ; ++i) { @@ -82,8 +81,6 @@ mca_coll_basic_alltoallw_intra_inplace(void *rbuf, int *rcounts, const int *rdis msg_size_j *= rcounts[j]; /* Initiate all send/recv to/from others. */ - preq = basic_module->mccb_reqs; - if (i == rank && msg_size_j != 0) { /* Copy the data into the temporary buffer */ err = ompi_datatype_copy_content_same_ddt (rdtypes[j], rcounts[j], @@ -92,12 +89,12 @@ mca_coll_basic_alltoallw_intra_inplace(void *rbuf, int *rcounts, const int *rdis /* Exchange data with the peer */ err = MCA_PML_CALL(irecv ((char *) rbuf + rdisps[j], rcounts[j], rdtypes[j], - j, MCA_COLL_BASE_TAG_ALLTOALLW, comm, preq++)); + j, MCA_COLL_BASE_TAG_ALLTOALLW, comm, &req)); if (MPI_SUCCESS != err) { goto error_hndl; } - err = MCA_PML_CALL(isend ((void *) tmp_buffer, rcounts[j], rdtypes[j], + err = MCA_PML_CALL(send ((void *) tmp_buffer, rcounts[j], rdtypes[j], j, MCA_COLL_BASE_TAG_ALLTOALLW, MCA_PML_BASE_SEND_STANDARD, - comm, preq++)); + comm)); if (MPI_SUCCESS != err) { goto error_hndl; } } else if (j == rank && msg_size_i != 0) { /* Copy the data into the temporary buffer */ @@ -107,29 +104,26 @@ mca_coll_basic_alltoallw_intra_inplace(void *rbuf, int *rcounts, const int *rdis /* Exchange data with the peer */ err = MCA_PML_CALL(irecv ((char *) rbuf + rdisps[i], rcounts[i], rdtypes[i], - i, MCA_COLL_BASE_TAG_ALLTOALLW, comm, preq++)); + i, MCA_COLL_BASE_TAG_ALLTOALLW, comm, &req)); if (MPI_SUCCESS != err) { goto error_hndl; } - err = MCA_PML_CALL(isend ((void *) tmp_buffer, rcounts[i], rdtypes[i], + err = MCA_PML_CALL(send ((void *) tmp_buffer, rcounts[i], rdtypes[i], i, MCA_COLL_BASE_TAG_ALLTOALLW, MCA_PML_BASE_SEND_STANDARD, - comm, preq++)); + comm)); if (MPI_SUCCESS != err) { goto error_hndl; } } else { continue; } /* Wait for the requests to complete */ - err = ompi_request_wait_all (2, basic_module->mccb_reqs, MPI_STATUSES_IGNORE); + err = ompi_request_wait (&req, MPI_STATUSES_IGNORE); if (MPI_SUCCESS != err) { goto error_hndl; } - - /* Free the requests. */ - mca_coll_basic_free_reqs(basic_module->mccb_reqs, 2); } } error_hndl: /* Free the temporary buffer */ - free (tmp_buffer); + free (save_buffer); /* All done */ @@ -145,22 +139,16 @@ mca_coll_basic_alltoallw_intra_inplace(void *rbuf, int *rcounts, const int *rdis * Returns: - MPI_SUCCESS or an MPI error code */ int -mca_coll_basic_alltoallw_intra(void *sbuf, int *scounts, int *sdisps, - struct ompi_datatype_t **sdtypes, - void *rbuf, int *rcounts, int *rdisps, - struct ompi_datatype_t **rdtypes, +mca_coll_basic_alltoallw_intra(const void *sbuf, const int *scounts, const int *sdisps, + struct ompi_datatype_t * const *sdtypes, + void *rbuf, const int *rcounts, const int *rdisps, + struct ompi_datatype_t * const *rdtypes, struct ompi_communicator_t *comm, mca_coll_base_module_t *module) { - int i; - int size; - int rank; - int err; - char *psnd; - char *prcv; - int nreqs; - MPI_Request *preq; - mca_coll_basic_module_t *basic_module = (mca_coll_basic_module_t*) module; + int i, size, rank, err, nreqs; + char *psnd, *prcv; + ompi_request_t **preq, **reqs; /* Initialize. */ if (MPI_IN_PLACE == sbuf) { @@ -191,7 +179,8 @@ mca_coll_basic_alltoallw_intra(void *sbuf, int *scounts, int *sdisps, /* Initiate all send/recv to/from others. */ nreqs = 0; - preq = basic_module->mccb_reqs; + reqs = preq = coll_base_comm_get_reqs(module->base_data, 2 * size); + if( NULL == reqs ) { return OMPI_ERR_OUT_OF_RESOURCE; } /* Post all receives first -- a simple optimization */ @@ -199,7 +188,7 @@ mca_coll_basic_alltoallw_intra(void *sbuf, int *scounts, int *sdisps, size_t msg_size; ompi_datatype_type_size(rdtypes[i], &msg_size); msg_size *= rcounts[i]; - + if (i == rank || 0 == msg_size) continue; @@ -209,8 +198,7 @@ mca_coll_basic_alltoallw_intra(void *sbuf, int *scounts, int *sdisps, preq++)); ++nreqs; if (MPI_SUCCESS != err) { - mca_coll_basic_free_reqs(basic_module->mccb_reqs, - nreqs); + ompi_coll_base_free_reqs(reqs, nreqs); return err; } } @@ -232,15 +220,14 @@ mca_coll_basic_alltoallw_intra(void *sbuf, int *scounts, int *sdisps, preq++)); ++nreqs; if (MPI_SUCCESS != err) { - mca_coll_basic_free_reqs(basic_module->mccb_reqs, - nreqs); + ompi_coll_base_free_reqs(reqs, nreqs); return err; } } /* Start your engines. This will never return an error. */ - MCA_PML_CALL(start(nreqs, basic_module->mccb_reqs)); + MCA_PML_CALL(start(nreqs, reqs)); /* Wait for them all. If there's an error, note that we don't care * what the error was -- just that there *was* an error. The PML @@ -249,15 +236,11 @@ mca_coll_basic_alltoallw_intra(void *sbuf, int *scounts, int *sdisps, * So free them anyway -- even if there was an error, and return the * error after we free everything. */ - err = ompi_request_wait_all(nreqs, basic_module->mccb_reqs, - MPI_STATUSES_IGNORE); - - /* Free the requests. */ - - mca_coll_basic_free_reqs(basic_module->mccb_reqs, nreqs); + err = ompi_request_wait_all(nreqs, reqs, MPI_STATUSES_IGNORE); + /* Free the requests in all cases as they are persistent */ + ompi_coll_base_free_reqs(reqs, nreqs); /* All done */ - return err; } @@ -270,35 +253,31 @@ mca_coll_basic_alltoallw_intra(void *sbuf, int *scounts, int *sdisps, * Returns: - MPI_SUCCESS or an MPI error code */ int -mca_coll_basic_alltoallw_inter(void *sbuf, int *scounts, int *sdisps, - struct ompi_datatype_t **sdtypes, - void *rbuf, int *rcounts, int *rdisps, - struct ompi_datatype_t **rdtypes, +mca_coll_basic_alltoallw_inter(const void *sbuf, const int *scounts, const int *sdisps, + struct ompi_datatype_t * const *sdtypes, + void *rbuf, const int *rcounts, const int *rdisps, + struct ompi_datatype_t * const *rdtypes, struct ompi_communicator_t *comm, mca_coll_base_module_t *module) { - int i; - int size; - int err; - char *psnd; - char *prcv; - int nreqs; - MPI_Request *preq; - mca_coll_basic_module_t *basic_module = (mca_coll_basic_module_t*) module; + int i, size, err, nreqs; + char *psnd, *prcv; + ompi_request_t **preq, **reqs; /* Initialize. */ size = ompi_comm_remote_size(comm); /* Initiate all send/recv to/from others. */ nreqs = 0; - preq = basic_module->mccb_reqs; + reqs = preq = coll_base_comm_get_reqs(module->base_data, 2 * size); + if( NULL == reqs ) { return OMPI_ERR_OUT_OF_RESOURCE; } /* Post all receives first -- a simple optimization */ for (i = 0; i < size; ++i) { size_t msg_size; ompi_datatype_type_size(rdtypes[i], &msg_size); msg_size *= rcounts[i]; - + if (0 == msg_size) continue; @@ -308,8 +287,7 @@ mca_coll_basic_alltoallw_inter(void *sbuf, int *scounts, int *sdisps, comm, preq++)); ++nreqs; if (OMPI_SUCCESS != err) { - mca_coll_basic_free_reqs(basic_module->mccb_reqs, - nreqs); + ompi_coll_base_free_reqs(reqs, nreqs); return err; } } @@ -330,14 +308,13 @@ mca_coll_basic_alltoallw_inter(void *sbuf, int *scounts, int *sdisps, preq++)); ++nreqs; if (OMPI_SUCCESS != err) { - mca_coll_basic_free_reqs(basic_module->mccb_reqs, - nreqs); + ompi_coll_base_free_reqs(reqs, nreqs); return err; } } /* Start your engines. This will never return an error. */ - MCA_PML_CALL(start(nreqs, basic_module->mccb_reqs)); + MCA_PML_CALL(start(nreqs, reqs)); /* Wait for them all. If there's an error, note that we don't care * what the error was -- just that there *was* an error. The PML @@ -345,11 +322,10 @@ mca_coll_basic_alltoallw_inter(void *sbuf, int *scounts, int *sdisps, * i.e., by the end of this call, all the requests are free-able. * So free them anyway -- even if there was an error, and return the * error after we free everything. */ - err = ompi_request_wait_all(nreqs, basic_module->mccb_reqs, - MPI_STATUSES_IGNORE); + err = ompi_request_wait_all(nreqs, reqs, MPI_STATUSES_IGNORE); - /* Free the requests. */ - mca_coll_basic_free_reqs(basic_module->mccb_reqs, nreqs); + /* Free the requests in all cases as they are persistent */ + ompi_coll_base_free_reqs(reqs, nreqs); /* All done */ return err; diff --git a/ompi/mca/coll/basic/coll_basic_barrier.c b/ompi/mca/coll/basic/coll_basic_barrier.c index 2c9568a2a67..2e261090220 100644 --- a/ompi/mca/coll/basic/coll_basic_barrier.c +++ b/ompi/mca/coll/basic/coll_basic_barrier.c @@ -5,16 +5,16 @@ * Copyright (c) 2004-2015 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2012 Oak Ridge National Labs. All rights reserved. * Copyright (c) 2015 Cisco Systems, Inc. All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -55,9 +55,6 @@ mca_coll_basic_barrier_intra_log(struct ompi_communicator_t *comm, dim = comm->c_cube_dim; hibit = opal_hibit(rank, dim); - if (hibit < 0) { - return MPI_ERR_OTHER; - } --dim; /* Receive from children. */ diff --git a/ompi/mca/coll/basic/coll_basic_bcast.c b/ompi/mca/coll/basic/coll_basic_bcast.c index f03bf1251d5..9dbbb9ac36c 100644 --- a/ompi/mca/coll/basic/coll_basic_bcast.c +++ b/ompi/mca/coll/basic/coll_basic_bcast.c @@ -2,18 +2,20 @@ * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. - * Copyright (c) 2004-2015 The University of Tennessee and The University + * Copyright (c) 2004-2016 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. - * Copyright (c) 2015 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2016 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -53,9 +55,7 @@ mca_coll_basic_bcast_log_intra(void *buff, int count, int mask; int err; int nreqs; - ompi_request_t **preq; - mca_coll_basic_module_t *basic_module = (mca_coll_basic_module_t*) module; - ompi_request_t **reqs = basic_module->mccb_reqs; + ompi_request_t **preq, **reqs; size = ompi_comm_size(comm); rank = ompi_comm_rank(comm); @@ -63,14 +63,12 @@ mca_coll_basic_bcast_log_intra(void *buff, int count, dim = comm->c_cube_dim; hibit = opal_hibit(vrank, dim); - if (hibit < 0) { - return MPI_ERR_OTHER; - } --dim; /* Receive data from parent in the tree. */ if (vrank > 0) { + assert(hibit >= 0); peer = ((vrank & ~(1 << hibit)) + root) % size; err = MCA_PML_CALL(recv(buff, count, datatype, peer, @@ -83,6 +81,9 @@ mca_coll_basic_bcast_log_intra(void *buff, int count, /* Send data to the children. */ + reqs = coll_base_comm_get_reqs(module->base_data, size); + if( NULL == reqs ) { return OMPI_ERR_OUT_OF_RESOURCE; } + err = MPI_SUCCESS; preq = reqs; nreqs = 0; @@ -92,12 +93,12 @@ mca_coll_basic_bcast_log_intra(void *buff, int count, peer = (peer + root) % size; ++nreqs; - err = MCA_PML_CALL(isend_init(buff, count, datatype, peer, - MCA_COLL_BASE_TAG_BCAST, - MCA_PML_BASE_SEND_STANDARD, - comm, preq++)); + err = MCA_PML_CALL(isend(buff, count, datatype, peer, + MCA_COLL_BASE_TAG_BCAST, + MCA_PML_BASE_SEND_STANDARD, + comm, preq++)); if (MPI_SUCCESS != err) { - mca_coll_basic_free_reqs(reqs, nreqs); + ompi_coll_base_free_reqs(reqs, nreqs); return err; } } @@ -107,10 +108,6 @@ mca_coll_basic_bcast_log_intra(void *buff, int count, if (nreqs > 0) { - /* Start your engines. This will never return an error. */ - - MCA_PML_CALL(start(nreqs, reqs)); - /* Wait for them all. If there's an error, note that we don't * care what the error was -- just that there *was* an error. * The PML will finish all requests, even if one or more of them @@ -119,10 +116,9 @@ mca_coll_basic_bcast_log_intra(void *buff, int count, * error, and return the error after we free everything. */ err = ompi_request_wait_all(nreqs, reqs, MPI_STATUSES_IGNORE); - - /* Free the reqs */ - - mca_coll_basic_free_reqs(reqs, nreqs); + if( MPI_SUCCESS != err ) { + ompi_coll_base_free_reqs(reqs, nreqs); + } } /* All done */ @@ -147,8 +143,7 @@ mca_coll_basic_bcast_lin_inter(void *buff, int count, int i; int rsize; int err; - mca_coll_basic_module_t *basic_module = (mca_coll_basic_module_t*) module; - ompi_request_t **reqs = basic_module->mccb_reqs; + ompi_request_t **reqs = NULL; rsize = ompi_comm_remote_size(comm); @@ -161,6 +156,9 @@ mca_coll_basic_bcast_lin_inter(void *buff, int count, MCA_COLL_BASE_TAG_BCAST, comm, MPI_STATUS_IGNORE)); } else { + reqs = coll_base_comm_get_reqs(module->base_data, rsize); + if( NULL == reqs ) { return OMPI_ERR_OUT_OF_RESOURCE; } + /* root section */ for (i = 0; i < rsize; i++) { err = MCA_PML_CALL(isend(buff, count, datatype, i, @@ -168,10 +166,14 @@ mca_coll_basic_bcast_lin_inter(void *buff, int count, MCA_PML_BASE_SEND_STANDARD, comm, &(reqs[i]))); if (OMPI_SUCCESS != err) { + ompi_coll_base_free_reqs(reqs, i + 1); return err; } } err = ompi_request_wait_all(rsize, reqs, MPI_STATUSES_IGNORE); + if (OMPI_SUCCESS != err) { + ompi_coll_base_free_reqs(reqs, rsize); + } } diff --git a/ompi/mca/coll/basic/coll_basic_component.c b/ompi/mca/coll/basic/coll_basic_component.c index 0057b72434d..2aeb5d26298 100644 --- a/ompi/mca/coll/basic/coll_basic_component.c +++ b/ompi/mca/coll/basic/coll_basic_component.c @@ -3,10 +3,10 @@ * Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. - * Copyright (c) 2004-2005 The University of Tennessee and The University + * Copyright (c) 2004-2016 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. @@ -14,9 +14,9 @@ * Copyright (c) 2015 Los Alamos National Security, LLC. All rights * reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ * * These symbols are in a file by themselves to provide nice linker @@ -106,22 +106,8 @@ basic_register(void) return OMPI_SUCCESS; } - -static void -mca_coll_basic_module_construct(mca_coll_basic_module_t *module) -{ - module->mccb_reqs = NULL; - module->mccb_num_reqs = 0; -} - -static void -mca_coll_basic_module_destruct(mca_coll_basic_module_t *module) -{ - if (NULL != module->mccb_reqs) free(module->mccb_reqs); -} - - OBJ_CLASS_INSTANCE(mca_coll_basic_module_t, mca_coll_base_module_t, - mca_coll_basic_module_construct, - mca_coll_basic_module_destruct); + NULL, + NULL); + diff --git a/ompi/mca/coll/basic/coll_basic_exscan.c b/ompi/mca/coll/basic/coll_basic_exscan.c index 890f305c773..057bcfa48c5 100644 --- a/ompi/mca/coll/basic/coll_basic_exscan.c +++ b/ompi/mca/coll/basic/coll_basic_exscan.c @@ -5,15 +5,17 @@ * Copyright (c) 2004-2014 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2011 NVIDIA Corporation. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -40,14 +42,14 @@ * Returns: - MPI_SUCCESS or error code */ int -mca_coll_basic_exscan_intra(void *sbuf, void *rbuf, int count, +mca_coll_basic_exscan_intra(const void *sbuf, void *rbuf, int count, struct ompi_datatype_t *dtype, struct ompi_op_t *op, struct ompi_communicator_t *comm, mca_coll_base_module_t *module) { int size, rank, err; - ptrdiff_t true_lb, true_extent, lb, extent; + ptrdiff_t dsize, gap; char *free_buffer = NULL; char *reduce_buffer = NULL; @@ -81,15 +83,14 @@ mca_coll_basic_exscan_intra(void *sbuf, void *rbuf, int count, /* Get a temporary buffer to perform the reduction into. Rationale * for malloc'ing this size is provided in coll_basic_reduce.c. */ - ompi_datatype_get_extent(dtype, &lb, &extent); - ompi_datatype_get_true_extent(dtype, &true_lb, &true_extent); + dsize = opal_datatype_span(&dtype->super, count, &gap); - free_buffer = (char*)malloc(true_extent + (count - 1) * extent); + free_buffer = (char*)malloc(dsize); if (NULL == free_buffer) { return OMPI_ERR_OUT_OF_RESOURCE; } - reduce_buffer = free_buffer - true_lb; - err = ompi_datatype_copy_content_same_ddt(dtype, count, + reduce_buffer = free_buffer - gap; + err = ompi_datatype_copy_content_same_ddt(dtype, count, reduce_buffer, (char*)sbuf); /* Receive the reduced value from the prior rank */ @@ -124,7 +125,7 @@ mca_coll_basic_exscan_intra(void *sbuf, void *rbuf, int count, * Returns: - MPI_SUCCESS or error code */ int -mca_coll_basic_exscan_inter(void *sbuf, void *rbuf, int count, +mca_coll_basic_exscan_inter(const void *sbuf, void *rbuf, int count, struct ompi_datatype_t *dtype, struct ompi_op_t *op, struct ompi_communicator_t *comm, diff --git a/ompi/mca/coll/basic/coll_basic_gather.c b/ompi/mca/coll/basic/coll_basic_gather.c index 74353fa3d58..32753ad1532 100644 --- a/ompi/mca/coll/basic/coll_basic_gather.c +++ b/ompi/mca/coll/basic/coll_basic_gather.c @@ -5,14 +5,16 @@ * Copyright (c) 2004-2015 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -36,7 +38,7 @@ * Returns: - MPI_SUCCESS or error code */ int -mca_coll_basic_gather_inter(void *sbuf, int scount, +mca_coll_basic_gather_inter(const void *sbuf, int scount, struct ompi_datatype_t *sdtype, void *rbuf, int rcount, struct ompi_datatype_t *rdtype, diff --git a/ompi/mca/coll/basic/coll_basic_gatherv.c b/ompi/mca/coll/basic/coll_basic_gatherv.c index df1489e91cc..047a70d4e01 100644 --- a/ompi/mca/coll/basic/coll_basic_gatherv.c +++ b/ompi/mca/coll/basic/coll_basic_gatherv.c @@ -2,17 +2,19 @@ * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. - * Copyright (c) 2004-2005 The University of Tennessee and The University + * Copyright (c) 2004-2016 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -35,9 +37,9 @@ * Returns: - MPI_SUCCESS or error code */ int -mca_coll_basic_gatherv_intra(void *sbuf, int scount, +mca_coll_basic_gatherv_intra(const void *sbuf, int scount, struct ompi_datatype_t *sdtype, - void *rbuf, int *rcounts, int *disps, + void *rbuf, const int *rcounts, const int *disps, struct ompi_datatype_t *rdtype, int root, struct ompi_communicator_t *comm, mca_coll_base_module_t *module) @@ -108,9 +110,9 @@ mca_coll_basic_gatherv_intra(void *sbuf, int scount, * Returns: - MPI_SUCCESS or error code */ int -mca_coll_basic_gatherv_inter(void *sbuf, int scount, +mca_coll_basic_gatherv_inter(const void *sbuf, int scount, struct ompi_datatype_t *sdtype, - void *rbuf, int *rcounts, int *disps, + void *rbuf, const int *rcounts, const int *disps, struct ompi_datatype_t *rdtype, int root, struct ompi_communicator_t *comm, mca_coll_base_module_t *module) @@ -118,8 +120,7 @@ mca_coll_basic_gatherv_inter(void *sbuf, int scount, int i, size, err; char *ptmp; ptrdiff_t lb, extent; - mca_coll_basic_module_t *basic_module = (mca_coll_basic_module_t*) module; - ompi_request_t **reqs = basic_module->mccb_reqs; + ompi_request_t **reqs = NULL; size = ompi_comm_remote_size(comm); @@ -141,17 +142,24 @@ mca_coll_basic_gatherv_inter(void *sbuf, int scount, return OMPI_ERROR; } + reqs = coll_base_comm_get_reqs(module->base_data, size); + if( NULL == reqs ) { return OMPI_ERR_OUT_OF_RESOURCE; } + for (i = 0; i < size; ++i) { ptmp = ((char *) rbuf) + (extent * disps[i]); err = MCA_PML_CALL(irecv(ptmp, rcounts[i], rdtype, i, MCA_COLL_BASE_TAG_GATHERV, comm, &reqs[i])); if (OMPI_SUCCESS != err) { + ompi_coll_base_free_reqs(reqs, i + 1); return err; } } err = ompi_request_wait_all(size, reqs, MPI_STATUSES_IGNORE); + if (OMPI_SUCCESS != err) { + ompi_coll_base_free_reqs(reqs, size); + } } /* All done */ diff --git a/ompi/mca/coll/basic/coll_basic_module.c b/ompi/mca/coll/basic/coll_basic_module.c index a857fb811d6..df5296df419 100644 --- a/ompi/mca/coll/basic/coll_basic_module.c +++ b/ompi/mca/coll/basic/coll_basic_module.c @@ -3,7 +3,7 @@ * Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. - * Copyright (c) 2004-2015 The University of Tennessee and The University + * Copyright (c) 2004-2016 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, @@ -11,14 +11,14 @@ * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2012 Sandia National Laboratories. All rights reserved. - * Copyright (c) 2013 Los Alamos National Security, LLC. All rights + * Copyright (c) 2013-2015 Los Alamos National Security, LLC. All rights * reserved. * Copyright (c) 2014 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -56,7 +56,7 @@ mca_coll_basic_init_query(bool enable_progress_threads, * priority we want to return. */ mca_coll_base_module_t * -mca_coll_basic_comm_query(struct ompi_communicator_t *comm, +mca_coll_basic_comm_query(struct ompi_communicator_t *comm, int *priority) { int size; @@ -101,14 +101,11 @@ mca_coll_basic_comm_query(struct ompi_communicator_t *comm, size = dist_graph_size; } } - basic_module->mccb_num_reqs = size; - basic_module->mccb_reqs = (ompi_request_t**) - malloc(sizeof(ompi_request_t *) * basic_module->mccb_num_reqs); /* Choose whether to use [intra|inter], and [linear|log]-based * algorithms. */ basic_module->super.coll_module_enable = mca_coll_basic_module_enable; - basic_module->super.ft_event = mca_coll_basic_ft_event; + basic_module->super.ft_event = NULL; if (OMPI_COMM_IS_INTER(comm)) { basic_module->super.coll_allgather = mca_coll_basic_allgather_inter; @@ -184,28 +181,13 @@ int mca_coll_basic_module_enable(mca_coll_base_module_t *module, struct ompi_communicator_t *comm) { - /* All done */ - return OMPI_SUCCESS; -} - - -int -mca_coll_basic_ft_event(int state) { - if(OPAL_CRS_CHECKPOINT == state) { - ; - } - else if(OPAL_CRS_CONTINUE == state) { - ; - } - else if(OPAL_CRS_RESTART == state) { - ; - } - else if(OPAL_CRS_TERM == state ) { - ; - } - else { - ; + /* prepare the placeholder for the array of request* */ + module->base_data = OBJ_NEW(mca_coll_base_comm_t); + if (NULL == module->base_data) { + return OMPI_ERROR; } + /* All done */ return OMPI_SUCCESS; } + diff --git a/ompi/mca/coll/basic/coll_basic_neighbor_allgather.c b/ompi/mca/coll/basic/coll_basic_neighbor_allgather.c index 8d8242dc9b0..3bd17f0614f 100644 --- a/ompi/mca/coll/basic/coll_basic_neighbor_allgather.c +++ b/ompi/mca/coll/basic/coll_basic_neighbor_allgather.c @@ -3,7 +3,7 @@ * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. - * Copyright (c) 2004-2006 The University of Tennessee and The University + * Copyright (c) 2004-2016 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, @@ -12,7 +12,7 @@ * All rights reserved. * Copyright (c) 2013 Los Alamos National Security, LLC. All rights * reserved. - * Copyright (c) 2014 Research Organization for Information Science + * Copyright (c) 2014-2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * @@ -42,18 +42,22 @@ mca_coll_basic_neighbor_allgather_cart(const void *sbuf, int scount, struct ompi_communicator_t *comm, mca_coll_base_module_t *module) { - mca_coll_basic_module_t *basic_module = (mca_coll_basic_module_t *) module; const mca_topo_base_comm_cart_2_2_0_t *cart = comm->c_topo->mtc.cart; const int rank = ompi_comm_rank (comm); - ompi_request_t **reqs; + ompi_request_t **reqs, **preqs; ptrdiff_t lb, extent; int rc = MPI_SUCCESS, dim, nreqs; + if( 0 == cart->ndims ) return OMPI_SUCCESS; + ompi_datatype_get_extent(rdtype, &lb, &extent); + reqs = preqs = coll_base_comm_get_reqs( module->base_data, 4 * cart->ndims ); + if( NULL == reqs ) { return OMPI_ERR_OUT_OF_RESOURCE; } + /* The ordering is defined as -1 then +1 in each dimension in * order of dimension. */ - for (dim = 0, reqs = basic_module->mccb_reqs, nreqs = 0 ; dim < cart->ndims ; ++dim) { + for (dim = 0, nreqs = 0 ; dim < cart->ndims ; ++dim) { int srank = MPI_PROC_NULL, drank = MPI_PROC_NULL; if (cart->dims[dim] > 1) { @@ -63,49 +67,52 @@ mca_coll_basic_neighbor_allgather_cart(const void *sbuf, int scount, } if (MPI_PROC_NULL != srank) { + nreqs++; rc = MCA_PML_CALL(irecv(rbuf, rcount, rdtype, srank, MCA_COLL_BASE_TAG_ALLGATHER, - comm, reqs++)); + comm, preqs++)); if (OMPI_SUCCESS != rc) break; + nreqs++; /* remove cast from const when the pml layer is updated to take * a const for the send buffer. */ rc = MCA_PML_CALL(isend((void *) sbuf, scount, sdtype, srank, MCA_COLL_BASE_TAG_ALLGATHER, MCA_PML_BASE_SEND_STANDARD, - comm, reqs++)); + comm, preqs++)); if (OMPI_SUCCESS != rc) break; - - nreqs += 2; } rbuf = (char *) rbuf + extent * rcount; if (MPI_PROC_NULL != drank) { + nreqs++; rc = MCA_PML_CALL(irecv(rbuf, rcount, rdtype, drank, MCA_COLL_BASE_TAG_ALLGATHER, - comm, reqs++)); + comm, preqs++)); if (OMPI_SUCCESS != rc) break; - + nreqs++; rc = MCA_PML_CALL(isend((void *) sbuf, scount, sdtype, drank, MCA_COLL_BASE_TAG_ALLGATHER, MCA_PML_BASE_SEND_STANDARD, - comm, reqs++)); + comm, preqs++)); if (OMPI_SUCCESS != rc) break; - - nreqs += 2; } rbuf = (char *) rbuf + extent * rcount; } if (OMPI_SUCCESS != rc) { - /* should probably try to clean up here */ + ompi_coll_base_free_reqs(reqs, nreqs); return rc; } - return ompi_request_wait_all (nreqs, basic_module->mccb_reqs, MPI_STATUSES_IGNORE); + rc = ompi_request_wait_all (nreqs, reqs, MPI_STATUSES_IGNORE); + if (OMPI_SUCCESS != rc) { + ompi_coll_base_free_reqs(reqs, nreqs); + } + return rc; } static int @@ -115,16 +122,16 @@ mca_coll_basic_neighbor_allgather_graph(const void *sbuf, int scount, struct ompi_communicator_t *comm, mca_coll_base_module_t *module) { - mca_coll_basic_module_t *basic_module = (mca_coll_basic_module_t *) module; const mca_topo_base_comm_graph_2_2_0_t *graph = comm->c_topo->mtc.graph; const int rank = ompi_comm_rank (comm); const int *edges; int degree; - ompi_request_t **reqs; + ompi_request_t **reqs, **preqs; ptrdiff_t lb, extent; int rc = MPI_SUCCESS, neighbor; mca_topo_base_graph_neighbors_count (comm, rank, °ree); + if( 0 == degree) return OMPI_SUCCESS; edges = graph->edges; if (rank > 0) { @@ -132,10 +139,12 @@ mca_coll_basic_neighbor_allgather_graph(const void *sbuf, int scount, } ompi_datatype_get_extent(rdtype, &lb, &extent); + reqs = preqs = coll_base_comm_get_reqs( module->base_data, 2 * degree); + if( NULL == reqs ) { return OMPI_ERR_OUT_OF_RESOURCE; } - for (neighbor = 0, reqs = basic_module->mccb_reqs ; neighbor < degree ; ++neighbor) { + for (neighbor = 0; neighbor < degree ; ++neighbor) { rc = MCA_PML_CALL(irecv(rbuf, rcount, rdtype, edges[neighbor], MCA_COLL_BASE_TAG_ALLGATHER, - comm, reqs++)); + comm, preqs++)); if (OMPI_SUCCESS != rc) break; rbuf = (char *) rbuf + extent * rcount; @@ -143,16 +152,20 @@ mca_coll_basic_neighbor_allgather_graph(const void *sbuf, int scount, * a const for the send buffer. */ rc = MCA_PML_CALL(isend((void *) sbuf, scount, sdtype, edges[neighbor], MCA_COLL_BASE_TAG_ALLGATHER, MCA_PML_BASE_SEND_STANDARD, - comm, reqs++)); + comm, preqs++)); if (OMPI_SUCCESS != rc) break; } if (OMPI_SUCCESS != rc) { - /* should probably try to clean up here */ + ompi_coll_base_free_reqs( reqs, (2 * neighbor + 1)); return rc; } - return ompi_request_wait_all (degree * 2, basic_module->mccb_reqs, MPI_STATUSES_IGNORE); + rc = ompi_request_wait_all (degree * 2, reqs, MPI_STATUSES_IGNORE); + if (OMPI_SUCCESS != rc) { + ompi_coll_base_free_reqs( reqs, degree * 2); + } + return rc; } static int @@ -162,32 +175,34 @@ mca_coll_basic_neighbor_allgather_dist_graph(const void *sbuf, int scount, struct ompi_communicator_t *comm, mca_coll_base_module_t *module) { - mca_coll_basic_module_t *basic_module = (mca_coll_basic_module_t *) module; const mca_topo_base_comm_dist_graph_2_2_0_t *dist_graph = comm->c_topo->mtc.dist_graph; const int *inedges, *outedges; int indegree, outdegree; - ompi_request_t **reqs; + ompi_request_t **reqs, **preqs; ptrdiff_t lb, extent; int rc = MPI_SUCCESS, neighbor; indegree = dist_graph->indegree; outdegree = dist_graph->outdegree; + if( 0 == (indegree + outdegree) ) return OMPI_SUCCESS; inedges = dist_graph->in; outedges = dist_graph->out; ompi_datatype_get_extent(rdtype, &lb, &extent); + reqs = preqs = coll_base_comm_get_reqs( module->base_data, indegree + outdegree); + if( NULL == reqs ) { return OMPI_ERR_OUT_OF_RESOURCE; } - for (neighbor = 0, reqs = basic_module->mccb_reqs ; neighbor < indegree ; ++neighbor) { + for (neighbor = 0; neighbor < indegree ; ++neighbor) { rc = MCA_PML_CALL(irecv(rbuf, rcount, rdtype, inedges[neighbor], MCA_COLL_BASE_TAG_ALLGATHER, - comm, reqs++)); + comm, preqs++)); if (OMPI_SUCCESS != rc) break; rbuf = (char *) rbuf + extent * rcount; } if (OMPI_SUCCESS != rc) { - /* should probably try to clean up here */ + ompi_coll_base_free_reqs(reqs, neighbor + 1); return rc; } @@ -197,19 +212,23 @@ mca_coll_basic_neighbor_allgather_dist_graph(const void *sbuf, int scount, rc = MCA_PML_CALL(isend((void *) sbuf, scount, sdtype, outedges[neighbor], MCA_COLL_BASE_TAG_ALLGATHER, MCA_PML_BASE_SEND_STANDARD, - comm, reqs++)); + comm, preqs++)); if (OMPI_SUCCESS != rc) break; } if (OMPI_SUCCESS != rc) { - /* should probably try to clean up here */ + ompi_coll_base_free_reqs(reqs, indegree + neighbor + 1); return rc; } - return ompi_request_wait_all (indegree + outdegree, basic_module->mccb_reqs, MPI_STATUSES_IGNORE); + rc = ompi_request_wait_all (indegree + outdegree, reqs, MPI_STATUSES_IGNORE); + if (OMPI_SUCCESS != rc) { + ompi_coll_base_free_reqs(reqs, indegree + outdegree); + } + return rc; } -int mca_coll_basic_neighbor_allgather(void *sbuf, int scount, +int mca_coll_basic_neighbor_allgather(const void *sbuf, int scount, struct ompi_datatype_t *sdtype, void *rbuf, int rcount, struct ompi_datatype_t *rdtype, struct ompi_communicator_t *comm, diff --git a/ompi/mca/coll/basic/coll_basic_neighbor_allgatherv.c b/ompi/mca/coll/basic/coll_basic_neighbor_allgatherv.c index cdcf91de95b..33465f55479 100644 --- a/ompi/mca/coll/basic/coll_basic_neighbor_allgatherv.c +++ b/ompi/mca/coll/basic/coll_basic_neighbor_allgatherv.c @@ -3,7 +3,7 @@ * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. - * Copyright (c) 2004-2006 The University of Tennessee and The University + * Copyright (c) 2004-2016 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, @@ -12,7 +12,7 @@ * All rights reserved. * Copyright (c) 2013 Los Alamos National Security, LLC. All rights * reserved. - * Copyright (c) 2014 Research Organization for Information Science + * Copyright (c) 2014-2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * @@ -41,16 +41,18 @@ mca_coll_basic_neighbor_allgatherv_cart(const void *sbuf, int scount, struct omp struct ompi_datatype_t *rdtype, struct ompi_communicator_t *comm, mca_coll_base_module_t *module) { - mca_coll_basic_module_t *basic_module = (mca_coll_basic_module_t *) module; const mca_topo_base_comm_cart_2_2_0_t *cart = comm->c_topo->mtc.cart; const int rank = ompi_comm_rank (comm); - ompi_request_t **reqs; + ompi_request_t **reqs, **preqs; ptrdiff_t lb, extent; int rc = MPI_SUCCESS, dim, i, nreqs; + if( 0 == cart->ndims ) return OMPI_SUCCESS; + ompi_datatype_get_extent(rdtype, &lb, &extent); - reqs = basic_module->mccb_reqs; + reqs = preqs = coll_base_comm_get_reqs( module->base_data, 4 * cart->ndims); + if( NULL == reqs ) { return OMPI_ERR_OUT_OF_RESOURCE; } /* The ordering is defined as -1 then +1 in each dimension in * order of dimension. */ @@ -64,36 +66,42 @@ mca_coll_basic_neighbor_allgatherv_cart(const void *sbuf, int scount, struct omp } if (MPI_PROC_NULL != srank) { + nreqs++; rc = MCA_PML_CALL(irecv((char *) rbuf + disps[i] * extent, rcounts[i], rdtype, srank, - MCA_COLL_BASE_TAG_ALLGATHER, comm, reqs++)); + MCA_COLL_BASE_TAG_ALLGATHER, comm, preqs++)); if (OMPI_SUCCESS != rc) break; /* remove cast from const when the pml layer is updated to take * a const for the send buffer. */ + nreqs++; rc = MCA_PML_CALL(isend((void *) sbuf, scount, sdtype, srank, MCA_COLL_BASE_TAG_ALLGATHER, - MCA_PML_BASE_SEND_STANDARD, comm, reqs++)); + MCA_PML_BASE_SEND_STANDARD, comm, preqs++)); if (OMPI_SUCCESS != rc) break; - nreqs += 2; } if (MPI_PROC_NULL != drank) { + nreqs++; rc = MCA_PML_CALL(irecv((char *) rbuf + disps[i+1] * extent, rcounts[i+1], rdtype, drank, - MCA_COLL_BASE_TAG_ALLGATHER, comm, reqs++)); + MCA_COLL_BASE_TAG_ALLGATHER, comm, preqs++)); if (OMPI_SUCCESS != rc) break; + nreqs++; rc = MCA_PML_CALL(isend((void *) sbuf, scount, sdtype, drank, MCA_COLL_BASE_TAG_ALLGATHER, - MCA_PML_BASE_SEND_STANDARD, comm, reqs++)); + MCA_PML_BASE_SEND_STANDARD, comm, preqs++)); if (OMPI_SUCCESS != rc) break; - nreqs += 2; } } if (OMPI_SUCCESS != rc) { - /* should probably try to clean up here */ + ompi_coll_base_free_reqs(reqs, nreqs); return rc; } - return ompi_request_wait_all (nreqs, basic_module->mccb_reqs, MPI_STATUSES_IGNORE); + rc = ompi_request_wait_all (nreqs, reqs, MPI_STATUSES_IGNORE); + if (OMPI_SUCCESS != rc) { + ompi_coll_base_free_reqs(reqs, nreqs); + } + return rc; } static int @@ -102,16 +110,15 @@ mca_coll_basic_neighbor_allgatherv_graph(const void *sbuf, int scount, struct om struct ompi_datatype_t *rdtype, struct ompi_communicator_t *comm, mca_coll_base_module_t *module) { - mca_coll_basic_module_t *basic_module = (mca_coll_basic_module_t *) module; const mca_topo_base_comm_graph_2_2_0_t *graph = comm->c_topo->mtc.graph; const int rank = ompi_comm_rank (comm); const int *edges; - int degree; - ompi_request_t **reqs; + int rc = MPI_SUCCESS, neighbor, degree; + ompi_request_t **reqs, **preqs; ptrdiff_t lb, extent; - int rc = MPI_SUCCESS, neighbor; mca_topo_base_graph_neighbors_count (comm, rank, °ree); + if( 0 == degree ) return OMPI_SUCCESS; edges = graph->edges; if (rank > 0) { @@ -119,26 +126,32 @@ mca_coll_basic_neighbor_allgatherv_graph(const void *sbuf, int scount, struct om } ompi_datatype_get_extent(rdtype, &lb, &extent); + reqs = preqs = coll_base_comm_get_reqs( module->base_data, 2 * degree); + if( NULL == reqs ) { return OMPI_ERR_OUT_OF_RESOURCE; } - for (neighbor = 0, reqs = basic_module->mccb_reqs ; neighbor < degree ; ++neighbor) { + for (neighbor = 0; neighbor < degree ; ++neighbor) { rc = MCA_PML_CALL(irecv((char *) rbuf + disps[neighbor] * extent, rcounts[neighbor], - rdtype, edges[neighbor], MCA_COLL_BASE_TAG_ALLGATHER, comm, reqs++)); + rdtype, edges[neighbor], MCA_COLL_BASE_TAG_ALLGATHER, comm, preqs++)); if (OMPI_SUCCESS != rc) break; /* remove cast from const when the pml layer is updated to take * a const for the send buffer. */ rc = MCA_PML_CALL(isend((void *) sbuf, scount, sdtype, edges[neighbor], MCA_COLL_BASE_TAG_ALLGATHER, MCA_PML_BASE_SEND_STANDARD, - comm, reqs++)); + comm, preqs++)); if (OMPI_SUCCESS != rc) break; } if (OMPI_SUCCESS != rc) { - /* should probably try to clean up here */ + ompi_coll_base_free_reqs( reqs, 2 * (neighbor + 1) ); return rc; } - return ompi_request_wait_all (degree * 2, basic_module->mccb_reqs, MPI_STATUSES_IGNORE); + rc = ompi_request_wait_all (degree * 2, reqs, MPI_STATUSES_IGNORE); + if (OMPI_SUCCESS != rc) { + ompi_coll_base_free_reqs( reqs, 2 * degree ); + } + return rc; } static int @@ -147,30 +160,32 @@ mca_coll_basic_neighbor_allgatherv_dist_graph(const void *sbuf, int scount, stru struct ompi_datatype_t *rdtype, struct ompi_communicator_t *comm, mca_coll_base_module_t *module) { - mca_coll_basic_module_t *basic_module = (mca_coll_basic_module_t *) module; const mca_topo_base_comm_dist_graph_2_2_0_t *dist_graph = comm->c_topo->mtc.dist_graph; const int *inedges, *outedges; int indegree, outdegree; - ompi_request_t **reqs; + ompi_request_t **reqs, **preqs; ptrdiff_t lb, extent; int rc = MPI_SUCCESS, neighbor; indegree = dist_graph->indegree; outdegree = dist_graph->outdegree; + if( 0 == (indegree + outdegree) ) return OMPI_SUCCESS; inedges = dist_graph->in; outedges = dist_graph->out; ompi_datatype_get_extent(rdtype, &lb, &extent); + reqs = preqs = coll_base_comm_get_reqs( module->base_data, indegree + outdegree); + if( NULL == reqs ) { return OMPI_ERR_OUT_OF_RESOURCE; } - for (neighbor = 0, reqs = basic_module->mccb_reqs ; neighbor < indegree ; ++neighbor) { + for (neighbor = 0; neighbor < indegree ; ++neighbor) { rc = MCA_PML_CALL(irecv((char *) rbuf + disps[neighbor] * extent, rcounts[neighbor], rdtype, - inedges[neighbor], MCA_COLL_BASE_TAG_ALLGATHER, comm, reqs++)); + inedges[neighbor], MCA_COLL_BASE_TAG_ALLGATHER, comm, preqs++)); if (OMPI_SUCCESS != rc) break; } if (OMPI_SUCCESS != rc) { - /* should probably try to clean up here */ + ompi_coll_base_free_reqs(reqs, neighbor + 1); return rc; } @@ -179,20 +194,24 @@ mca_coll_basic_neighbor_allgatherv_dist_graph(const void *sbuf, int scount, stru * a const for the send buffer. */ rc = MCA_PML_CALL(isend((void *) sbuf, scount, sdtype, outedges[neighbor], MCA_COLL_BASE_TAG_ALLGATHER, MCA_PML_BASE_SEND_STANDARD, - comm, reqs++)); + comm, preqs++)); if (OMPI_SUCCESS != rc) break; } if (OMPI_SUCCESS != rc) { - /* should probably try to clean up here */ + ompi_coll_base_free_reqs(reqs, indegree + neighbor + 1); return rc; } - return ompi_request_wait_all (indegree + outdegree, basic_module->mccb_reqs, MPI_STATUSES_IGNORE); + rc = ompi_request_wait_all (indegree + outdegree, reqs, MPI_STATUSES_IGNORE); + if (OMPI_SUCCESS != rc) { + ompi_coll_base_free_reqs(reqs, indegree + outdegree); + } + return rc; } -int mca_coll_basic_neighbor_allgatherv(void *sbuf, int scount, struct ompi_datatype_t *sdtype, - void *rbuf, int rcounts[], int disps[], struct ompi_datatype_t *rdtype, +int mca_coll_basic_neighbor_allgatherv(const void *sbuf, int scount, struct ompi_datatype_t *sdtype, + void *rbuf, const int rcounts[], const int disps[], struct ompi_datatype_t *rdtype, struct ompi_communicator_t *comm, mca_coll_base_module_t *module) { if (OMPI_COMM_IS_INTER(comm)) { diff --git a/ompi/mca/coll/basic/coll_basic_neighbor_alltoall.c b/ompi/mca/coll/basic/coll_basic_neighbor_alltoall.c index 289f60acbc8..804d398d500 100644 --- a/ompi/mca/coll/basic/coll_basic_neighbor_alltoall.c +++ b/ompi/mca/coll/basic/coll_basic_neighbor_alltoall.c @@ -3,7 +3,7 @@ * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. - * Copyright (c) 2004-2006 The University of Tennessee and The University + * Copyright (c) 2004-2016 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, @@ -12,7 +12,7 @@ * All rights reserved. * Copyright (c) 2013 Los Alamos National Security, LLC. All rights * reserved. - * Copyright (c) 2014 Research Organization for Information Science + * Copyright (c) 2014-2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * @@ -40,18 +40,21 @@ mca_coll_basic_neighbor_alltoall_cart(const void *sbuf, int scount, struct ompi_ int rcount, struct ompi_datatype_t *rdtype, struct ompi_communicator_t *comm, mca_coll_base_module_t *module) { - mca_coll_basic_module_t *basic_module = (mca_coll_basic_module_t *) module; const mca_topo_base_comm_cart_2_2_0_t *cart = comm->c_topo->mtc.cart; const int rank = ompi_comm_rank (comm); - ompi_request_t **reqs; + ompi_request_t **reqs, **preqs; ptrdiff_t lb, rdextent, sdextent; int rc = MPI_SUCCESS, dim, nreqs; + if( 0 == cart->ndims ) return OMPI_SUCCESS; + ompi_datatype_get_extent(rdtype, &lb, &rdextent); ompi_datatype_get_extent(sdtype, &lb, &sdextent); + reqs = preqs = coll_base_comm_get_reqs( module->base_data, 4 * cart->ndims); + if( NULL == reqs ) { return OMPI_ERR_OUT_OF_RESOURCE; } /* post receives first */ - for (dim = 0, nreqs = 0, reqs = basic_module->mccb_reqs ; dim < cart->ndims ; ++dim) { + for (dim = 0, nreqs = 0; dim < cart->ndims ; ++dim) { int srank = MPI_PROC_NULL, drank = MPI_PROC_NULL; if (cart->dims[dim] > 1) { @@ -61,28 +64,28 @@ mca_coll_basic_neighbor_alltoall_cart(const void *sbuf, int scount, struct ompi_ } if (MPI_PROC_NULL != srank) { + nreqs++; rc = MCA_PML_CALL(irecv(rbuf, rcount, rdtype, srank, MCA_COLL_BASE_TAG_ALLTOALL, - comm, reqs++)); + comm, preqs++)); if (OMPI_SUCCESS != rc) break; - nreqs++; } rbuf = (char *) rbuf + rdextent * rcount; if (MPI_PROC_NULL != drank) { + nreqs++; rc = MCA_PML_CALL(irecv(rbuf, rcount, rdtype, drank, MCA_COLL_BASE_TAG_ALLTOALL, - comm, reqs++)); + comm, preqs++)); if (OMPI_SUCCESS != rc) break; - nreqs++; } rbuf = (char *) rbuf + rdextent * rcount; } if (OMPI_SUCCESS != rc) { - /* should probably try to clean up here */ + ompi_coll_base_free_reqs( reqs, nreqs); return rc; } @@ -98,34 +101,38 @@ mca_coll_basic_neighbor_alltoall_cart(const void *sbuf, int scount, struct ompi_ if (MPI_PROC_NULL != srank) { /* remove cast from const when the pml layer is updated to take * a const for the send buffer. */ + nreqs++; rc = MCA_PML_CALL(isend((void *) sbuf, scount, sdtype, srank, MCA_COLL_BASE_TAG_ALLTOALL, MCA_PML_BASE_SEND_STANDARD, - comm, reqs++)); + comm, preqs++)); if (OMPI_SUCCESS != rc) break; - nreqs++; } sbuf = (const char *) sbuf + sdextent * scount; if (MPI_PROC_NULL != drank) { + nreqs++; rc = MCA_PML_CALL(isend((void *) sbuf, scount, sdtype, drank, MCA_COLL_BASE_TAG_ALLTOALL, MCA_PML_BASE_SEND_STANDARD, - comm, reqs++)); + comm, preqs++)); if (OMPI_SUCCESS != rc) break; - nreqs++; } sbuf = (const char *) sbuf + sdextent * scount; } if (OMPI_SUCCESS != rc) { - /* should probably try to clean up here */ + ompi_coll_base_free_reqs( reqs, nreqs); return rc; } - return ompi_request_wait_all (nreqs, basic_module->mccb_reqs, MPI_STATUSES_IGNORE); + rc = ompi_request_wait_all (nreqs, reqs, MPI_STATUSES_IGNORE); + if (OMPI_SUCCESS != rc) { + ompi_coll_base_free_reqs( reqs, nreqs); + } + return rc; } static int @@ -133,15 +140,15 @@ mca_coll_basic_neighbor_alltoall_graph(const void *sbuf, int scount, struct ompi int rcount, struct ompi_datatype_t *rdtype, struct ompi_communicator_t *comm, mca_coll_base_module_t *module) { - mca_coll_basic_module_t *basic_module = (mca_coll_basic_module_t *) module; const mca_topo_base_comm_graph_2_2_0_t *graph = comm->c_topo->mtc.graph; const int rank = ompi_comm_rank (comm); int rc = MPI_SUCCESS, neighbor, degree; ptrdiff_t lb, rdextent, sdextent; - ompi_request_t **reqs; + ompi_request_t **reqs, **preqs; const int *edges; mca_topo_base_graph_neighbors_count (comm, rank, °ree); + if( 0 == degree ) return OMPI_SUCCESS; edges = graph->edges; if (rank > 0) { @@ -150,31 +157,41 @@ mca_coll_basic_neighbor_alltoall_graph(const void *sbuf, int scount, struct ompi ompi_datatype_get_extent(rdtype, &lb, &rdextent); ompi_datatype_get_extent(sdtype, &lb, &sdextent); + reqs = preqs = coll_base_comm_get_reqs( module->base_data, 2 * degree); + if( NULL == reqs ) { return OMPI_ERR_OUT_OF_RESOURCE; } /* post receives first */ - for (neighbor = 0, reqs = basic_module->mccb_reqs ; neighbor < degree ; ++neighbor) { + for (neighbor = 0; neighbor < degree ; ++neighbor) { rc = MCA_PML_CALL(irecv(rbuf, rcount, rdtype, edges[neighbor], MCA_COLL_BASE_TAG_ALLTOALL, - comm, reqs++)); + comm, preqs++)); if (OMPI_SUCCESS != rc) break; rbuf = (char *) rbuf + rdextent * rcount; } + if( MPI_SUCCESS != rc ) { + ompi_coll_base_free_reqs( reqs, neighbor + 1 ); + return rc; + } for (neighbor = 0 ; neighbor < degree ; ++neighbor) { /* remove cast from const when the pml layer is updated to take * a const for the send buffer. */ rc = MCA_PML_CALL(isend((void *) sbuf, scount, sdtype, edges[neighbor], MCA_COLL_BASE_TAG_ALLTOALL, MCA_PML_BASE_SEND_STANDARD, - comm, reqs++)); + comm, preqs++)); if (OMPI_SUCCESS != rc) break; sbuf = (const char *) sbuf + sdextent * scount; } - if (OMPI_SUCCESS != rc) { - /* should probably try to clean up here */ + if( MPI_SUCCESS != rc ) { + ompi_coll_base_free_reqs( reqs, degree + neighbor + 1 ); return rc; } - return ompi_request_wait_all (degree * 2, basic_module->mccb_reqs, MPI_STATUSES_IGNORE); + rc = ompi_request_wait_all (degree * 2, reqs, MPI_STATUSES_IGNORE); + if( MPI_SUCCESS != rc ) { + ompi_coll_base_free_reqs( reqs, 2 * degree ); + } + return rc; } static int @@ -182,55 +199,61 @@ mca_coll_basic_neighbor_alltoall_dist_graph(const void *sbuf, int scount,struct int rcount, struct ompi_datatype_t *rdtype, struct ompi_communicator_t *comm, mca_coll_base_module_t *module) { - mca_coll_basic_module_t *basic_module = (mca_coll_basic_module_t *) module; const mca_topo_base_comm_dist_graph_2_2_0_t *dist_graph = comm->c_topo->mtc.dist_graph; ptrdiff_t lb, rdextent, sdextent; int rc = MPI_SUCCESS, neighbor; const int *inedges, *outedges; int indegree, outdegree; - ompi_request_t **reqs; + ompi_request_t **reqs, **preqs; indegree = dist_graph->indegree; outdegree = dist_graph->outdegree; + if( 0 == (indegree + outdegree) ) return OMPI_SUCCESS; inedges = dist_graph->in; outedges = dist_graph->out; ompi_datatype_get_extent(rdtype, &lb, &rdextent); ompi_datatype_get_extent(sdtype, &lb, &sdextent); + reqs = preqs = coll_base_comm_get_reqs( module->base_data, indegree + outdegree); + if( NULL == reqs ) { return OMPI_ERR_OUT_OF_RESOURCE; } /* post receives first */ - for (neighbor = 0, reqs = basic_module->mccb_reqs ; neighbor < indegree ; ++neighbor, ++reqs) { + for (neighbor = 0; neighbor < indegree ; ++neighbor) { rc = MCA_PML_CALL(irecv(rbuf, rcount, rdtype, inedges[neighbor], MCA_COLL_BASE_TAG_ALLTOALL, - comm, reqs)); + comm, preqs++)); if (OMPI_SUCCESS != rc) break; rbuf = (char *) rbuf + rdextent * rcount; } if (OMPI_SUCCESS != rc) { - /* should probably try to clean up here */ + ompi_coll_base_free_reqs(reqs, neighbor + 1); return rc; } - for (neighbor = 0 ; neighbor < outdegree ; ++neighbor, ++reqs) { + for (neighbor = 0 ; neighbor < outdegree ; ++neighbor) { /* remove cast from const when the pml layer is updated to take a const for the send buffer */ rc = MCA_PML_CALL(isend((void *) sbuf, scount, sdtype, outedges[neighbor], MCA_COLL_BASE_TAG_ALLTOALL, MCA_PML_BASE_SEND_STANDARD, - comm, reqs)); + comm, preqs++)); if (OMPI_SUCCESS != rc) break; sbuf = (char *) sbuf + sdextent * scount; } if (OMPI_SUCCESS != rc) { - /* should probably try to clean up here */ + ompi_coll_base_free_reqs(reqs, indegree + neighbor + 1); return rc; } - return ompi_request_wait_all (indegree + outdegree, basic_module->mccb_reqs, MPI_STATUSES_IGNORE); + rc = ompi_request_wait_all (indegree + outdegree, reqs, MPI_STATUSES_IGNORE); + if (OMPI_SUCCESS != rc) { + ompi_coll_base_free_reqs(reqs, indegree + outdegree); + } + return rc; } -int mca_coll_basic_neighbor_alltoall(void *sbuf, int scount, struct ompi_datatype_t *sdtype, void *rbuf, +int mca_coll_basic_neighbor_alltoall(const void *sbuf, int scount, struct ompi_datatype_t *sdtype, void *rbuf, int rcount, struct ompi_datatype_t *rdtype, struct ompi_communicator_t *comm, mca_coll_base_module_t *module) { diff --git a/ompi/mca/coll/basic/coll_basic_neighbor_alltoallv.c b/ompi/mca/coll/basic/coll_basic_neighbor_alltoallv.c index 9ace9006624..d6c41777856 100644 --- a/ompi/mca/coll/basic/coll_basic_neighbor_alltoallv.c +++ b/ompi/mca/coll/basic/coll_basic_neighbor_alltoallv.c @@ -3,7 +3,7 @@ * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. - * Copyright (c) 2004-2006 The University of Tennessee and The University + * Copyright (c) 2004-2016 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, @@ -12,7 +12,7 @@ * All rights reserved. * Copyright (c) 2013 Los Alamos National Security, LLC. All rights * reserved. - * Copyright (c) 2014 Research Organization for Information Science + * Copyright (c) 2014-2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * @@ -41,18 +41,21 @@ mca_coll_basic_neighbor_alltoallv_cart(const void *sbuf, const int scounts[], co const int rdisps[], struct ompi_datatype_t *rdtype, struct ompi_communicator_t *comm, mca_coll_base_module_t *module) { - mca_coll_basic_module_t *basic_module = (mca_coll_basic_module_t *) module; const mca_topo_base_comm_cart_2_2_0_t *cart = comm->c_topo->mtc.cart; const int rank = ompi_comm_rank (comm); int rc = MPI_SUCCESS, dim, i, nreqs; ptrdiff_t lb, rdextent, sdextent; - ompi_request_t **reqs; + ompi_request_t **reqs, **preqs; + + if( 0 == cart->ndims ) return OMPI_SUCCESS; ompi_datatype_get_extent(rdtype, &lb, &rdextent); ompi_datatype_get_extent(sdtype, &lb, &sdextent); + reqs = preqs = coll_base_comm_get_reqs( module->base_data, 4 * cart->ndims ); + if( NULL == reqs ) { return OMPI_ERR_OUT_OF_RESOURCE; } /* post receives first */ - for (dim = 0, nreqs = 0, i = 0, reqs = basic_module->mccb_reqs ; dim < cart->ndims ; ++dim, i += 2) { + for (dim = 0, nreqs = 0, i = 0; dim < cart->ndims ; ++dim, i += 2) { int srank = MPI_PROC_NULL, drank = MPI_PROC_NULL; if (cart->dims[dim] > 1) { @@ -62,22 +65,22 @@ mca_coll_basic_neighbor_alltoallv_cart(const void *sbuf, const int scounts[], co } if (MPI_PROC_NULL != srank) { + nreqs++; rc = MCA_PML_CALL(irecv((char *) rbuf + rdisps[i] * rdextent, rcounts[i], rdtype, srank, - MCA_COLL_BASE_TAG_ALLTOALL, comm, reqs++)); + MCA_COLL_BASE_TAG_ALLTOALL, comm, preqs++)); if (OMPI_SUCCESS != rc) break; - nreqs++; } if (MPI_PROC_NULL != drank) { + nreqs++; rc = MCA_PML_CALL(irecv((char *) rbuf + rdisps[i+1] * rdextent, rcounts[i+1], rdtype, drank, - MCA_COLL_BASE_TAG_ALLTOALL, comm, reqs++)); + MCA_COLL_BASE_TAG_ALLTOALL, comm, preqs++)); if (OMPI_SUCCESS != rc) break; - nreqs++; } } if (OMPI_SUCCESS != rc) { - /* should probably try to clean up here */ + ompi_coll_base_free_reqs( reqs, nreqs ); return rc; } @@ -91,27 +94,31 @@ mca_coll_basic_neighbor_alltoallv_cart(const void *sbuf, const int scounts[], co } if (MPI_PROC_NULL != srank) { + nreqs++; /* remove cast from const when the pml layer is updated to take a const for the send buffer */ rc = MCA_PML_CALL(isend((char *) sbuf + sdisps[i] * sdextent, scounts[i], sdtype, srank, - MCA_COLL_BASE_TAG_ALLTOALL, MCA_PML_BASE_SEND_STANDARD, comm, reqs++)); + MCA_COLL_BASE_TAG_ALLTOALL, MCA_PML_BASE_SEND_STANDARD, comm, preqs++)); if (OMPI_SUCCESS != rc) break; - nreqs++; } if (MPI_PROC_NULL != drank) { + nreqs++; rc = MCA_PML_CALL(isend((char *) sbuf + sdisps[i+1] * sdextent, scounts[i+1], sdtype, drank, - MCA_COLL_BASE_TAG_ALLTOALL, MCA_PML_BASE_SEND_STANDARD, comm, reqs++)); + MCA_COLL_BASE_TAG_ALLTOALL, MCA_PML_BASE_SEND_STANDARD, comm, preqs++)); if (OMPI_SUCCESS != rc) break; - nreqs++; } } if (OMPI_SUCCESS != rc) { - /* should probably try to clean up here */ + ompi_coll_base_free_reqs( reqs, nreqs ); return rc; } - return ompi_request_wait_all (nreqs, basic_module->mccb_reqs, MPI_STATUSES_IGNORE); + rc = ompi_request_wait_all (nreqs, reqs, MPI_STATUSES_IGNORE); + if (OMPI_SUCCESS != rc) { + ompi_coll_base_free_reqs( reqs, nreqs ); + } + return rc; } static int @@ -120,15 +127,15 @@ mca_coll_basic_neighbor_alltoallv_graph(const void *sbuf, const int scounts[], c const int rdisps[], struct ompi_datatype_t *rdtype, struct ompi_communicator_t *comm, mca_coll_base_module_t *module) { - mca_coll_basic_module_t *basic_module = (mca_coll_basic_module_t *) module; const mca_topo_base_comm_graph_2_2_0_t *graph = comm->c_topo->mtc.graph; int rc = MPI_SUCCESS, neighbor, degree; const int rank = ompi_comm_rank (comm); ptrdiff_t lb, rdextent, sdextent; - ompi_request_t **reqs; + ompi_request_t **reqs, **preqs; const int *edges; mca_topo_base_graph_neighbors_count (comm, rank, °ree); + if( 0 == degree ) return OMPI_SUCCESS; edges = graph->edges; if (rank > 0) { @@ -137,16 +144,18 @@ mca_coll_basic_neighbor_alltoallv_graph(const void *sbuf, const int scounts[], c ompi_datatype_get_extent(rdtype, &lb, &rdextent); ompi_datatype_get_extent(sdtype, &lb, &sdextent); + reqs = preqs = coll_base_comm_get_reqs( module->base_data, 2 * degree ); + if( NULL == reqs ) { return OMPI_ERR_OUT_OF_RESOURCE; } /* post all receives first */ - for (neighbor = 0, reqs = basic_module->mccb_reqs ; neighbor < degree ; ++neighbor) { + for (neighbor = 0; neighbor < degree ; ++neighbor) { rc = MCA_PML_CALL(irecv((char *) rbuf + rdisps[neighbor] * rdextent, rcounts[neighbor], rdtype, - edges[neighbor], MCA_COLL_BASE_TAG_ALLTOALL, comm, reqs++)); + edges[neighbor], MCA_COLL_BASE_TAG_ALLTOALL, comm, preqs++)); if (OMPI_SUCCESS != rc) break; } if (OMPI_SUCCESS != rc) { - /* should probably try to clean up here */ + ompi_coll_base_free_reqs( reqs, neighbor + 1); return rc; } @@ -154,16 +163,20 @@ mca_coll_basic_neighbor_alltoallv_graph(const void *sbuf, const int scounts[], c /* remove cast from const when the pml layer is updated to take a const for the send buffer */ rc = MCA_PML_CALL(isend((char *) sbuf + sdisps[neighbor] * sdextent, scounts[neighbor], sdtype, edges[neighbor], MCA_COLL_BASE_TAG_ALLTOALL, MCA_PML_BASE_SEND_STANDARD, - comm, reqs++)); + comm, preqs++)); if (OMPI_SUCCESS != rc) break; } if (OMPI_SUCCESS != rc) { - /* should probably try to clean up here */ + ompi_coll_base_free_reqs( reqs, degree + neighbor + 1); return rc; } - return ompi_request_wait_all (degree * 2, basic_module->mccb_reqs, MPI_STATUSES_IGNORE); + rc = ompi_request_wait_all (degree * 2, reqs, MPI_STATUSES_IGNORE); + if (OMPI_SUCCESS != rc) { + ompi_coll_base_free_reqs( reqs, degree * 2); + } + return rc; } static int @@ -172,32 +185,34 @@ mca_coll_basic_neighbor_alltoallv_dist_graph(const void *sbuf, const int scounts const int rdisps[], struct ompi_datatype_t *rdtype, struct ompi_communicator_t *comm, mca_coll_base_module_t *module) { - mca_coll_basic_module_t *basic_module = (mca_coll_basic_module_t *) module; const mca_topo_base_comm_dist_graph_2_2_0_t *dist_graph = comm->c_topo->mtc.dist_graph; ptrdiff_t lb, rdextent, sdextent; int rc = MPI_SUCCESS, neighbor; const int *inedges, *outedges; int indegree, outdegree; - ompi_request_t **reqs; + ompi_request_t **reqs, **preqs; indegree = dist_graph->indegree; outdegree = dist_graph->outdegree; + if( 0 == (indegree + outdegree) ) return OMPI_SUCCESS; inedges = dist_graph->in; outedges = dist_graph->out; ompi_datatype_get_extent(rdtype, &lb, &rdextent); ompi_datatype_get_extent(sdtype, &lb, &sdextent); + reqs = preqs = coll_base_comm_get_reqs( module->base_data, indegree + outdegree); + if( NULL == reqs ) { return OMPI_ERR_OUT_OF_RESOURCE; } /* post all receives first */ - for (neighbor = 0, reqs = basic_module->mccb_reqs ; neighbor < indegree ; ++neighbor) { + for (neighbor = 0; neighbor < indegree ; ++neighbor) { rc = MCA_PML_CALL(irecv((char *) rbuf + rdisps[neighbor] * rdextent, rcounts[neighbor], rdtype, - inedges[neighbor], MCA_COLL_BASE_TAG_ALLTOALL, comm, reqs++)); + inedges[neighbor], MCA_COLL_BASE_TAG_ALLTOALL, comm, preqs++)); if (OMPI_SUCCESS != rc) break; } if (OMPI_SUCCESS != rc) { - /* should probably try to clean up here */ + ompi_coll_base_free_reqs(reqs, neighbor + 1); return rc; } @@ -205,21 +220,25 @@ mca_coll_basic_neighbor_alltoallv_dist_graph(const void *sbuf, const int scounts /* remove cast from const when the pml layer is updated to take a const for the send buffer */ rc = MCA_PML_CALL(isend((char *) sbuf + sdisps[neighbor] * sdextent, scounts[neighbor], sdtype, outedges[neighbor], MCA_COLL_BASE_TAG_ALLTOALL, MCA_PML_BASE_SEND_STANDARD, - comm, reqs++)); + comm, preqs++)); if (OMPI_SUCCESS != rc) break; } if (OMPI_SUCCESS != rc) { - /* should probably try to clean up here */ + ompi_coll_base_free_reqs(reqs, indegree + neighbor + 1); return rc; } - return ompi_request_wait_all (indegree + outdegree, basic_module->mccb_reqs, MPI_STATUSES_IGNORE); + rc = ompi_request_wait_all (indegree + outdegree, reqs, MPI_STATUSES_IGNORE); + if (OMPI_SUCCESS != rc) { + ompi_coll_base_free_reqs( reqs, indegree + outdegree ); + } + return rc; } -int mca_coll_basic_neighbor_alltoallv(void *sbuf, int scounts[], int sdisps[], - struct ompi_datatype_t *sdtype, void *rbuf, int rcounts[], - int rdisps[], struct ompi_datatype_t *rdtype, +int mca_coll_basic_neighbor_alltoallv(const void *sbuf, const int scounts[], const int sdisps[], + struct ompi_datatype_t *sdtype, void *rbuf, const int rcounts[], + const int rdisps[], struct ompi_datatype_t *rdtype, struct ompi_communicator_t *comm, mca_coll_base_module_t *module) { if (OMPI_COMM_IS_INTER(comm)) { diff --git a/ompi/mca/coll/basic/coll_basic_neighbor_alltoallw.c b/ompi/mca/coll/basic/coll_basic_neighbor_alltoallw.c index 28ecf04cbbb..5b15574d0ec 100644 --- a/ompi/mca/coll/basic/coll_basic_neighbor_alltoallw.c +++ b/ompi/mca/coll/basic/coll_basic_neighbor_alltoallw.c @@ -3,7 +3,7 @@ * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. - * Copyright (c) 2004-2006 The University of Tennessee and The University + * Copyright (c) 2004-2016 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, @@ -12,8 +12,9 @@ * All rights reserved. * Copyright (c) 2013 Los Alamos National Security, LLC. All rights * reserved. - * Copyright (c) 2014 Research Organization for Information Science + * Copyright (c) 2014-2016 Research Organization for Information Science * and Technology (RIST). All rights reserved. + * Copyright (c) 2017 IBM Corporation. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -37,19 +38,22 @@ static int mca_coll_basic_neighbor_alltoallw_cart(const void *sbuf, const int scounts[], const MPI_Aint sdisps[], - struct ompi_datatype_t * const sdtypes[], void *rbuf, const int rcounts[], - const MPI_Aint rdisps[], struct ompi_datatype_t * const rdtypes[], + struct ompi_datatype_t * const *sdtypes, void *rbuf, const int rcounts[], + const MPI_Aint rdisps[], struct ompi_datatype_t * const *rdtypes, struct ompi_communicator_t *comm, mca_coll_base_module_t *module) { - mca_coll_basic_module_t *basic_module = (mca_coll_basic_module_t *) module; const mca_topo_base_comm_cart_2_2_0_t *cart = comm->c_topo->mtc.cart; const int rank = ompi_comm_rank (comm); int rc = MPI_SUCCESS, dim, i, nreqs; - ompi_request_t **reqs; + ompi_request_t **reqs, **preqs; + if (0 == cart->ndims) return OMPI_SUCCESS; + + reqs = preqs = coll_base_comm_get_reqs( module->base_data, 4 * cart->ndims ); + if( NULL == reqs ) { return OMPI_ERR_OUT_OF_RESOURCE; } /* post receives first */ - for (dim = 0, i = 0, nreqs = 0, reqs = basic_module->mccb_reqs ; dim < cart->ndims ; ++dim, i += 2) { + for (dim = 0, i = 0, nreqs = 0; dim < cart->ndims ; ++dim, i += 2) { int srank = MPI_PROC_NULL, drank = MPI_PROC_NULL; if (cart->dims[dim] > 1) { @@ -59,22 +63,22 @@ mca_coll_basic_neighbor_alltoallw_cart(const void *sbuf, const int scounts[], co } if (MPI_PROC_NULL != srank) { + nreqs++; rc = MCA_PML_CALL(irecv((char *) rbuf + rdisps[i], rcounts[i], rdtypes[i], srank, - MCA_COLL_BASE_TAG_ALLTOALL, comm, reqs++)); + MCA_COLL_BASE_TAG_ALLTOALL, comm, preqs++)); if (OMPI_SUCCESS != rc) break; - nreqs++; } if (MPI_PROC_NULL != drank) { + nreqs++; rc = MCA_PML_CALL(irecv((char *) rbuf + rdisps[i+1], rcounts[i+1], rdtypes[i+1], drank, - MCA_COLL_BASE_TAG_ALLTOALL, comm, reqs++)); + MCA_COLL_BASE_TAG_ALLTOALL, comm, preqs++)); if (OMPI_SUCCESS != rc) break; - nreqs++; } } if (OMPI_SUCCESS != rc) { - /* should probably try to clean up here */ + ompi_coll_base_free_reqs( reqs, nreqs ); return rc; } @@ -88,27 +92,31 @@ mca_coll_basic_neighbor_alltoallw_cart(const void *sbuf, const int scounts[], co } if (MPI_PROC_NULL != srank) { + nreqs++; /* remove cast from const when the pml layer is updated to take a const for the send buffer */ rc = MCA_PML_CALL(isend((char *) sbuf + sdisps[i], scounts[i], sdtypes[i], srank, - MCA_COLL_BASE_TAG_ALLTOALL, MCA_PML_BASE_SEND_STANDARD, comm, reqs++)); + MCA_COLL_BASE_TAG_ALLTOALL, MCA_PML_BASE_SEND_STANDARD, comm, preqs++)); if (OMPI_SUCCESS != rc) break; - nreqs++; } if (MPI_PROC_NULL != drank) { + nreqs++; rc = MCA_PML_CALL(isend((char *) sbuf + sdisps[i+1], scounts[i+1], sdtypes[i+1], drank, - MCA_COLL_BASE_TAG_ALLTOALL, MCA_PML_BASE_SEND_STANDARD, comm, reqs++)); + MCA_COLL_BASE_TAG_ALLTOALL, MCA_PML_BASE_SEND_STANDARD, comm, preqs++)); if (OMPI_SUCCESS != rc) break; - nreqs++; } } if (OMPI_SUCCESS != rc) { - /* should probably try to clean up here */ + ompi_coll_base_free_reqs( reqs, nreqs ); return rc; } - return ompi_request_wait_all (nreqs, basic_module->mccb_reqs, MPI_STATUSES_IGNORE); + rc = ompi_request_wait_all (nreqs, reqs, MPI_STATUSES_IGNORE); + if (OMPI_SUCCESS != rc) { + ompi_coll_base_free_reqs( reqs, nreqs ); + } + return rc; } static int @@ -117,14 +125,17 @@ mca_coll_basic_neighbor_alltoallw_graph(const void *sbuf, const int scounts[], c const MPI_Aint rdisps[], struct ompi_datatype_t * const rdtypes[], struct ompi_communicator_t *comm, mca_coll_base_module_t *module) { - mca_coll_basic_module_t *basic_module = (mca_coll_basic_module_t *) module; const mca_topo_base_comm_graph_2_2_0_t *graph = comm->c_topo->mtc.graph; int rc = MPI_SUCCESS, neighbor, degree; const int rank = ompi_comm_rank (comm); - ompi_request_t **reqs; + ompi_request_t **reqs, **preqs; const int *edges; mca_topo_base_graph_neighbors_count (comm, rank, °ree); + if (0 == degree) return OMPI_SUCCESS; + + reqs = preqs = coll_base_comm_get_reqs( module->base_data, 2 * degree ); + if( NULL == reqs ) { return OMPI_ERR_OUT_OF_RESOURCE; } edges = graph->edges; if (rank > 0) { @@ -132,14 +143,14 @@ mca_coll_basic_neighbor_alltoallw_graph(const void *sbuf, const int scounts[], c } /* post all receives first */ - for (neighbor = 0, reqs = basic_module->mccb_reqs ; neighbor < degree ; ++neighbor) { + for (neighbor = 0; neighbor < degree ; ++neighbor) { rc = MCA_PML_CALL(irecv((char *) rbuf + rdisps[neighbor], rcounts[neighbor], rdtypes[neighbor], - edges[neighbor], MCA_COLL_BASE_TAG_ALLTOALL, comm, reqs++)); + edges[neighbor], MCA_COLL_BASE_TAG_ALLTOALL, comm, preqs++)); if (OMPI_SUCCESS != rc) break; } if (OMPI_SUCCESS != rc) { - /* should probably try to clean up here */ + ompi_coll_base_free_reqs(reqs, neighbor + 1); return rc; } @@ -147,46 +158,55 @@ mca_coll_basic_neighbor_alltoallw_graph(const void *sbuf, const int scounts[], c /* remove cast from const when the pml layer is updated to take a const for the send buffer */ rc = MCA_PML_CALL(isend((char *) sbuf + sdisps[neighbor], scounts[neighbor], sdtypes[neighbor], edges[neighbor], MCA_COLL_BASE_TAG_ALLTOALL, MCA_PML_BASE_SEND_STANDARD, - comm, reqs++)); + comm, preqs++)); if (OMPI_SUCCESS != rc) break; } if (OMPI_SUCCESS != rc) { - /* should probably try to clean up here */ + ompi_coll_base_free_reqs(reqs, neighbor + degree + 1); return rc; } - return ompi_request_wait_all (degree * 2, basic_module->mccb_reqs, MPI_STATUSES_IGNORE); + rc = ompi_request_wait_all (degree * 2, reqs, MPI_STATUSES_IGNORE); + if (OMPI_SUCCESS != rc) { + ompi_coll_base_free_reqs(reqs, degree * 2); + } + return rc; } static int mca_coll_basic_neighbor_alltoallw_dist_graph(const void *sbuf, const int scounts[], const MPI_Aint sdisps[], - struct ompi_datatype_t * const sdtypes[], void *rbuf, const int rcounts[], - const MPI_Aint rdisps[], struct ompi_datatype_t * const rdtypes[], + struct ompi_datatype_t * const *sdtypes, void *rbuf, const int rcounts[], + const MPI_Aint rdisps[], struct ompi_datatype_t * const *rdtypes, struct ompi_communicator_t *comm, mca_coll_base_module_t *module) { - mca_coll_basic_module_t *basic_module = (mca_coll_basic_module_t *) module; const mca_topo_base_comm_dist_graph_2_2_0_t *dist_graph = comm->c_topo->mtc.dist_graph; int rc = MPI_SUCCESS, neighbor; const int *inedges, *outedges; int indegree, outdegree; - ompi_request_t **reqs; + ompi_request_t **reqs, **preqs; indegree = dist_graph->indegree; outdegree = dist_graph->outdegree; + if( 0 == (indegree + outdegree) ) return OMPI_SUCCESS; inedges = dist_graph->in; outedges = dist_graph->out; + if (0 == indegree+outdegree) return OMPI_SUCCESS; + + reqs = preqs = coll_base_comm_get_reqs( module->base_data, indegree + outdegree ); + if( NULL == reqs ) { return OMPI_ERR_OUT_OF_RESOURCE; } + /* post all receives first */ - for (neighbor = 0, reqs = basic_module->mccb_reqs ; neighbor < indegree ; ++neighbor) { + for (neighbor = 0; neighbor < indegree ; ++neighbor) { rc = MCA_PML_CALL(irecv((char *) rbuf + rdisps[neighbor], rcounts[neighbor], rdtypes[neighbor], - inedges[neighbor], MCA_COLL_BASE_TAG_ALLTOALL, comm, reqs++)); + inedges[neighbor], MCA_COLL_BASE_TAG_ALLTOALL, comm, preqs++)); if (OMPI_SUCCESS != rc) break; } if (OMPI_SUCCESS != rc) { - /* should probably try to clean up here */ + ompi_coll_base_free_reqs(reqs, neighbor + 1); return rc; } @@ -194,21 +214,25 @@ mca_coll_basic_neighbor_alltoallw_dist_graph(const void *sbuf, const int scounts /* remove cast from const when the pml layer is updated to take a const for the send buffer */ rc = MCA_PML_CALL(isend((char *) sbuf + sdisps[neighbor], scounts[neighbor], sdtypes[neighbor], outedges[neighbor], MCA_COLL_BASE_TAG_ALLTOALL, MCA_PML_BASE_SEND_STANDARD, - comm, reqs++)); + comm, preqs++)); if (OMPI_SUCCESS != rc) break; } if (OMPI_SUCCESS != rc) { - /* should probably try to clean up here */ + ompi_coll_base_free_reqs(reqs, indegree + neighbor + 1); return rc; } - return ompi_request_wait_all (indegree + outdegree, basic_module->mccb_reqs, MPI_STATUSES_IGNORE); + rc = ompi_request_wait_all (indegree + outdegree, reqs, MPI_STATUSES_IGNORE); + if (OMPI_SUCCESS != rc) { + ompi_coll_base_free_reqs( reqs, indegree + outdegree ); + } + return rc; } -int mca_coll_basic_neighbor_alltoallw(void *sbuf, int scounts[], MPI_Aint sdisps[], - struct ompi_datatype_t *sdtypes[], void *rbuf, int rcounts[], - MPI_Aint rdisps[], struct ompi_datatype_t *rdtypes[], +int mca_coll_basic_neighbor_alltoallw(const void *sbuf, const int scounts[], const MPI_Aint sdisps[], + struct ompi_datatype_t * const *sdtypes, void *rbuf, const int rcounts[], + const MPI_Aint rdisps[], struct ompi_datatype_t * const *rdtypes, struct ompi_communicator_t *comm, mca_coll_base_module_t *module) { if (OMPI_COMM_IS_INTER(comm)) { diff --git a/ompi/mca/coll/basic/coll_basic_reduce.c b/ompi/mca/coll/basic/coll_basic_reduce.c index a8b8a7bd945..ad2fd1e6f3e 100644 --- a/ompi/mca/coll/basic/coll_basic_reduce.c +++ b/ompi/mca/coll/basic/coll_basic_reduce.c @@ -9,6 +9,8 @@ * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -82,7 +84,7 @@ * */ int -mca_coll_basic_reduce_log_intra(void *sbuf, void *rbuf, int count, +mca_coll_basic_reduce_log_intra(const void *sbuf, void *rbuf, int count, struct ompi_datatype_t *dtype, struct ompi_op_t *op, int root, struct ompi_communicator_t *comm, @@ -90,7 +92,7 @@ mca_coll_basic_reduce_log_intra(void *sbuf, void *rbuf, int count, { int i, size, rank, vrank; int err, peer, dim, mask; - ptrdiff_t true_lb, true_extent, lb, extent; + ptrdiff_t lb, extent, dsize, gap; char *free_buffer = NULL; char *free_rbuf = NULL; char *pml_buffer = NULL; @@ -118,14 +120,14 @@ mca_coll_basic_reduce_log_intra(void *sbuf, void *rbuf, int count, * rationale above. */ ompi_datatype_get_extent(dtype, &lb, &extent); - ompi_datatype_get_true_extent(dtype, &true_lb, &true_extent); + dsize = opal_datatype_span(&dtype->super, count, &gap); - free_buffer = (char*)malloc(true_extent + (count - 1) * extent); + free_buffer = (char*)malloc(dsize); if (NULL == free_buffer) { return OMPI_ERR_OUT_OF_RESOURCE; } - pml_buffer = free_buffer - true_lb; + pml_buffer = free_buffer - gap; /* read the comment about commutative operations (few lines down * the page) */ if (ompi_op_is_commute(op)) { @@ -136,12 +138,12 @@ mca_coll_basic_reduce_log_intra(void *sbuf, void *rbuf, int count, * rationale above. */ if (MPI_IN_PLACE == sbuf) { - inplace_temp = (char*)malloc(true_extent + (count - 1) * extent); + inplace_temp = (char*)malloc(dsize); if (NULL == inplace_temp) { err = OMPI_ERR_OUT_OF_RESOURCE; goto cleanup_and_return; } - sbuf = inplace_temp - true_lb; + sbuf = inplace_temp - gap; err = ompi_datatype_copy_content_same_ddt(dtype, count, (char*)sbuf, (char*)rbuf); } snd_buffer = (char*)sbuf; @@ -150,12 +152,12 @@ mca_coll_basic_reduce_log_intra(void *sbuf, void *rbuf, int count, /* root is the only one required to provide a valid rbuf. * Assume rbuf is invalid for all other ranks, so fix it up * here to be valid on all non-leaf ranks */ - free_rbuf = (char*)malloc(true_extent + (count - 1) * extent); + free_rbuf = (char*)malloc(dsize); if (NULL == free_rbuf) { err = OMPI_ERR_OUT_OF_RESOURCE; goto cleanup_and_return; } - rbuf = free_rbuf - true_lb; + rbuf = free_rbuf - gap; } /* Loop over cube dimensions. High processes send to low ones in the @@ -224,7 +226,7 @@ mca_coll_basic_reduce_log_intra(void *sbuf, void *rbuf, int count, (char*)sbuf); ompi_op_reduce(op, rbuf, pml_buffer, count, dtype); } else { - ompi_op_reduce(op, sbuf, pml_buffer, count, dtype); + ompi_op_reduce(op, (void *)sbuf, pml_buffer, count, dtype); } /* now we have to send the buffer containing the computed data */ snd_buffer = pml_buffer; @@ -279,14 +281,14 @@ mca_coll_basic_reduce_log_intra(void *sbuf, void *rbuf, int count, * Returns: - MPI_SUCCESS or error code */ int -mca_coll_basic_reduce_lin_inter(void *sbuf, void *rbuf, int count, +mca_coll_basic_reduce_lin_inter(const void *sbuf, void *rbuf, int count, struct ompi_datatype_t *dtype, struct ompi_op_t *op, int root, struct ompi_communicator_t *comm, mca_coll_base_module_t *module) { int i, err, size; - ptrdiff_t true_lb, true_extent, lb, extent; + ptrdiff_t dsize, gap; char *free_buffer = NULL; char *pml_buffer = NULL; @@ -303,14 +305,13 @@ mca_coll_basic_reduce_lin_inter(void *sbuf, void *rbuf, int count, MCA_PML_BASE_SEND_STANDARD, comm)); } else { /* Root receives and reduces messages */ - ompi_datatype_get_extent(dtype, &lb, &extent); - ompi_datatype_get_true_extent(dtype, &true_lb, &true_extent); + dsize = opal_datatype_span(&dtype->super, count, &gap); - free_buffer = (char*)malloc(true_extent + (count - 1) * extent); + free_buffer = (char*)malloc(dsize); if (NULL == free_buffer) { return OMPI_ERR_OUT_OF_RESOURCE; } - pml_buffer = free_buffer - true_lb; + pml_buffer = free_buffer - gap; /* Initialize the receive buffer. */ @@ -358,7 +359,7 @@ mca_coll_basic_reduce_lin_inter(void *sbuf, void *rbuf, int count, * Returns: - MPI_SUCCESS or error code */ int -mca_coll_basic_reduce_log_inter(void *sbuf, void *rbuf, int count, +mca_coll_basic_reduce_log_inter(const void *sbuf, void *rbuf, int count, struct ompi_datatype_t *dtype, struct ompi_op_t *op, int root, struct ompi_communicator_t *comm, diff --git a/ompi/mca/coll/basic/coll_basic_reduce_scatter.c b/ompi/mca/coll/basic/coll_basic_reduce_scatter.c index 2ddb3a849e7..8fa4c129cef 100644 --- a/ompi/mca/coll/basic/coll_basic_reduce_scatter.c +++ b/ompi/mca/coll/basic/coll_basic_reduce_scatter.c @@ -6,7 +6,7 @@ * Copyright (c) 2004-2014 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. @@ -14,12 +14,12 @@ * Copyright (c) 2012 Oak Ridge National Labs. All rights reserved. * Copyright (c) 2013 Los Alamos National Security, LLC. All rights * reserved. - * Copyright (c) 2014 Research Organization for Information Science + * Copyright (c) 2014-2016 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -52,7 +52,7 @@ * Cummutative, reasonable sized messages * recursive halving algorithm * Others: - * reduce and scatterv (needs to be cleaned + * reduce and scatterv (needs to be cleaned * up at some point) * * NOTE: that the recursive halving algorithm should be faster than @@ -64,14 +64,14 @@ * so this should be investigated further. */ int -mca_coll_basic_reduce_scatter_intra(void *sbuf, void *rbuf, int *rcounts, +mca_coll_basic_reduce_scatter_intra(const void *sbuf, void *rbuf, const int *rcounts, struct ompi_datatype_t *dtype, struct ompi_op_t *op, struct ompi_communicator_t *comm, mca_coll_base_module_t *module) { int i, rank, size, count, err = OMPI_SUCCESS; - ptrdiff_t true_lb, true_extent, lb, extent, buf_size; + ptrdiff_t extent, buf_size, gap; int *disps = NULL; char *recv_buf = NULL, *recv_buf_free = NULL; char *result_buf = NULL, *result_buf_free = NULL; @@ -96,9 +96,8 @@ mca_coll_basic_reduce_scatter_intra(void *sbuf, void *rbuf, int *rcounts, } /* get datatype information */ - ompi_datatype_get_extent(dtype, &lb, &extent); - ompi_datatype_get_true_extent(dtype, &true_lb, &true_extent); - buf_size = true_extent + (count - 1) * extent; + ompi_datatype_type_extent(dtype, &extent); + buf_size = opal_datatype_span(&dtype->super, count, &gap); /* Handle MPI_IN_PLACE */ if (MPI_IN_PLACE == sbuf) { @@ -111,7 +110,7 @@ mca_coll_basic_reduce_scatter_intra(void *sbuf, void *rbuf, int *rcounts, /* temporary receive buffer. See coll_basic_reduce.c for details on sizing */ recv_buf_free = (char*) malloc(buf_size); - recv_buf = recv_buf_free - true_lb; + recv_buf = recv_buf_free - gap; if (NULL == recv_buf_free) { err = OMPI_ERR_OUT_OF_RESOURCE; goto cleanup; @@ -119,7 +118,7 @@ mca_coll_basic_reduce_scatter_intra(void *sbuf, void *rbuf, int *rcounts, /* allocate temporary buffer for results */ result_buf_free = (char*) malloc(buf_size); - result_buf = result_buf_free - true_lb; + result_buf = result_buf_free - gap; /* copy local buffer into the temporary results */ err = ompi_datatype_sndrcv(sbuf, count, dtype, result_buf, count, dtype); @@ -137,7 +136,7 @@ mca_coll_basic_reduce_scatter_intra(void *sbuf, void *rbuf, int *rcounts, two procs to do the rest of the algorithm */ if (rank < 2 * remain) { if ((rank & 1) == 0) { - err = MCA_PML_CALL(send(result_buf, count, dtype, rank + 1, + err = MCA_PML_CALL(send(result_buf, count, dtype, rank + 1, MCA_COLL_BASE_TAG_REDUCE_SCATTER, MCA_PML_BASE_SEND_STANDARD, comm)); @@ -243,11 +242,11 @@ mca_coll_basic_reduce_scatter_intra(void *sbuf, void *rbuf, int *rcounts, free(tmp_rcounts); free(tmp_disps); goto cleanup; - } + } } if (send_count > 0) { err = MCA_PML_CALL(send(result_buf + tmp_disps[send_index] * extent, - send_count, dtype, peer, + send_count, dtype, peer, MCA_COLL_BASE_TAG_REDUCE_SCATTER, MCA_PML_BASE_SEND_STANDARD, comm)); @@ -255,7 +254,7 @@ mca_coll_basic_reduce_scatter_intra(void *sbuf, void *rbuf, int *rcounts, free(tmp_rcounts); free(tmp_disps); goto cleanup; - } + } } /* if we received something on this step, push it into @@ -266,10 +265,10 @@ mca_coll_basic_reduce_scatter_intra(void *sbuf, void *rbuf, int *rcounts, free(tmp_rcounts); free(tmp_disps); goto cleanup; - } + } - ompi_op_reduce(op, - recv_buf + tmp_disps[recv_index] * extent, + ompi_op_reduce(op, + recv_buf + tmp_disps[recv_index] * extent, result_buf + tmp_disps[recv_index] * extent, recv_count, dtype); } @@ -283,13 +282,13 @@ mca_coll_basic_reduce_scatter_intra(void *sbuf, void *rbuf, int *rcounts, /* copy local results from results buffer into real receive buffer */ if (0 != rcounts[rank]) { err = ompi_datatype_sndrcv(result_buf + disps[rank] * extent, - rcounts[rank], dtype, + rcounts[rank], dtype, rbuf, rcounts[rank], dtype); if (OMPI_SUCCESS != err) { free(tmp_rcounts); free(tmp_disps); goto cleanup; - } + } } free(tmp_rcounts); @@ -315,7 +314,7 @@ mca_coll_basic_reduce_scatter_intra(void *sbuf, void *rbuf, int *rcounts, comm)); if (OMPI_SUCCESS != err) goto cleanup; } - } + } } } else { @@ -323,7 +322,7 @@ mca_coll_basic_reduce_scatter_intra(void *sbuf, void *rbuf, int *rcounts, /* temporary receive buffer. See coll_basic_reduce.c for details on sizing */ recv_buf_free = (char*) malloc(buf_size); - recv_buf = recv_buf_free - true_lb; + recv_buf = recv_buf_free - gap; if (NULL == recv_buf_free) { err = OMPI_ERR_OUT_OF_RESOURCE; goto cleanup; @@ -360,7 +359,7 @@ mca_coll_basic_reduce_scatter_intra(void *sbuf, void *rbuf, int *rcounts, * Returns: - MPI_SUCCESS or error code */ int -mca_coll_basic_reduce_scatter_inter(void *sbuf, void *rbuf, int *rcounts, +mca_coll_basic_reduce_scatter_inter(const void *sbuf, void *rbuf, const int *rcounts, struct ompi_datatype_t *dtype, struct ompi_op_t *op, struct ompi_communicator_t *comm, @@ -368,8 +367,9 @@ mca_coll_basic_reduce_scatter_inter(void *sbuf, void *rbuf, int *rcounts, { int err, i, rank, root = 0, rsize, lsize; int totalcounts; - ptrdiff_t lb, extent; + ptrdiff_t gap, span; char *tmpbuf = NULL, *tmpbuf2 = NULL; + char *lbuf, *buf; ompi_request_t *req; int *disps = NULL; @@ -382,7 +382,7 @@ mca_coll_basic_reduce_scatter_inter(void *sbuf, void *rbuf, int *rcounts, totalcounts += rcounts[i]; } - /* + /* * The following code basically does an interreduce followed by a * intrascatterv. This is implemented by having the roots of each * group exchange their sbuf. Then, the roots receive the data @@ -400,10 +400,7 @@ mca_coll_basic_reduce_scatter_inter(void *sbuf, void *rbuf, int *rcounts, * its size is the same as the local communicator size. */ if (rank == root) { - err = ompi_datatype_get_extent(dtype, &lb, &extent); - if (OMPI_SUCCESS != err) { - return OMPI_ERROR; - } + span = opal_datatype_span(&dtype->super, totalcounts, &gap); /* Generate displacements for the scatterv part */ disps = (int*) malloc(sizeof(int) * lsize); @@ -415,12 +412,14 @@ mca_coll_basic_reduce_scatter_inter(void *sbuf, void *rbuf, int *rcounts, disps[i + 1] = disps[i] + rcounts[i]; } - tmpbuf = (char *) malloc(totalcounts * extent); - tmpbuf2 = (char *) malloc(totalcounts * extent); + tmpbuf = (char *) malloc(span); + tmpbuf2 = (char *) malloc(span); if (NULL == tmpbuf || NULL == tmpbuf2) { err = OMPI_ERR_OUT_OF_RESOURCE; goto exit; } + lbuf = tmpbuf - gap; + buf = tmpbuf2 - gap; /* Do a send-recv between the two root procs. to avoid deadlock */ err = MCA_PML_CALL(isend(sbuf, totalcounts, dtype, 0, @@ -430,7 +429,7 @@ mca_coll_basic_reduce_scatter_inter(void *sbuf, void *rbuf, int *rcounts, goto exit; } - err = MCA_PML_CALL(recv(tmpbuf2, totalcounts, dtype, 0, + err = MCA_PML_CALL(recv(lbuf, totalcounts, dtype, 0, MCA_COLL_BASE_TAG_REDUCE_SCATTER, comm, MPI_STATUS_IGNORE)); if (OMPI_SUCCESS != err) { @@ -444,11 +443,12 @@ mca_coll_basic_reduce_scatter_inter(void *sbuf, void *rbuf, int *rcounts, /* Loop receiving and calling reduction function (C or Fortran) - * The result of this reduction operations is then in - * tmpbuf2. + * The result of this reduction operations is then in + * lbuf. */ for (i = 1; i < rsize; i++) { - err = MCA_PML_CALL(recv(tmpbuf, totalcounts, dtype, i, + char *tbuf; + err = MCA_PML_CALL(recv(buf, totalcounts, dtype, i, MCA_COLL_BASE_TAG_REDUCE_SCATTER, comm, MPI_STATUS_IGNORE)); if (MPI_SUCCESS != err) { @@ -456,7 +456,9 @@ mca_coll_basic_reduce_scatter_inter(void *sbuf, void *rbuf, int *rcounts, } /* Perform the reduction */ - ompi_op_reduce(op, tmpbuf, tmpbuf2, totalcounts, dtype); + ompi_op_reduce(op, lbuf, buf, totalcounts, dtype); + /* swap the buffers */ + tbuf = lbuf; lbuf = buf; buf = tbuf; } } else { /* If not root, send data to the root. */ @@ -469,7 +471,7 @@ mca_coll_basic_reduce_scatter_inter(void *sbuf, void *rbuf, int *rcounts, } /* Now do a scatterv on the local communicator */ - err = comm->c_local_comm->c_coll.coll_scatterv(tmpbuf2, rcounts, disps, dtype, + err = comm->c_local_comm->c_coll.coll_scatterv(lbuf, rcounts, disps, dtype, rbuf, rcounts[rank], dtype, 0, comm->c_local_comm, comm->c_local_comm->c_coll.coll_scatterv_module); diff --git a/ompi/mca/coll/basic/coll_basic_reduce_scatter_block.c b/ompi/mca/coll/basic/coll_basic_reduce_scatter_block.c index ddd0015394d..37fd1ccbfc6 100644 --- a/ompi/mca/coll/basic/coll_basic_reduce_scatter_block.c +++ b/ompi/mca/coll/basic/coll_basic_reduce_scatter_block.c @@ -5,19 +5,19 @@ * Copyright (c) 2004-2014 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2008 Sun Microsystems, Inc. All rights reserved. * Copyright (c) 2012 Oak Ridge National Labs. All rights reserved. * Copyright (c) 2012 Sandia National Laboratories. All rights reserved. - * Copyright (c) 2014 Research Organization for Information Science + * Copyright (c) 2014-2016 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -47,18 +47,18 @@ * Returns: - MPI_SUCCESS or error code * * Algorithm: - * reduce and scatter (needs to be cleaned + * reduce and scatter (needs to be cleaned * up at some point) */ int -mca_coll_basic_reduce_scatter_block_intra(void *sbuf, void *rbuf, int rcount, +mca_coll_basic_reduce_scatter_block_intra(const void *sbuf, void *rbuf, int rcount, struct ompi_datatype_t *dtype, struct ompi_op_t *op, struct ompi_communicator_t *comm, mca_coll_base_module_t *module) { int rank, size, count, err = OMPI_SUCCESS; - ptrdiff_t true_lb, true_extent, lb, extent, buf_size; + ptrdiff_t gap, span; char *recv_buf = NULL, *recv_buf_free = NULL; /* Initialize */ @@ -72,9 +72,7 @@ mca_coll_basic_reduce_scatter_block_intra(void *sbuf, void *rbuf, int rcount, } /* get datatype information */ - ompi_datatype_get_extent(dtype, &lb, &extent); - ompi_datatype_get_true_extent(dtype, &true_lb, &true_extent); - buf_size = true_extent + (count - 1) * extent; + span = opal_datatype_span(&dtype->super, count, &gap); /* Handle MPI_IN_PLACE */ if (MPI_IN_PLACE == sbuf) { @@ -84,12 +82,12 @@ mca_coll_basic_reduce_scatter_block_intra(void *sbuf, void *rbuf, int rcount, if (0 == rank) { /* temporary receive buffer. See coll_basic_reduce.c for details on sizing */ - recv_buf_free = (char*) malloc(buf_size); - recv_buf = recv_buf_free - true_lb; + recv_buf_free = (char*) malloc(span); if (NULL == recv_buf_free) { err = OMPI_ERR_OUT_OF_RESOURCE; goto cleanup; } + recv_buf = recv_buf_free - gap; } /* reduction */ @@ -119,7 +117,7 @@ mca_coll_basic_reduce_scatter_block_intra(void *sbuf, void *rbuf, int rcount, * Returns: - MPI_SUCCESS or error code */ int -mca_coll_basic_reduce_scatter_block_inter(void *sbuf, void *rbuf, int rcount, +mca_coll_basic_reduce_scatter_block_inter(const void *sbuf, void *rbuf, int rcount, struct ompi_datatype_t *dtype, struct ompi_op_t *op, struct ompi_communicator_t *comm, @@ -127,8 +125,9 @@ mca_coll_basic_reduce_scatter_block_inter(void *sbuf, void *rbuf, int rcount, { int err, i, rank, root = 0, rsize, lsize; int totalcounts; - ptrdiff_t lb, extent; + ptrdiff_t gap, span; char *tmpbuf = NULL, *tmpbuf2 = NULL; + char *lbuf = NULL, *buf; ompi_request_t *req; rank = ompi_comm_rank(comm); @@ -137,7 +136,7 @@ mca_coll_basic_reduce_scatter_block_inter(void *sbuf, void *rbuf, int rcount, totalcounts = lsize * rcount; - /* + /* * The following code basically does an interreduce followed by a * intrascatter. This is implemented by having the roots of each * group exchange their sbuf. Then, the roots receive the data @@ -152,16 +151,15 @@ mca_coll_basic_reduce_scatter_block_inter(void *sbuf, void *rbuf, int rcount, * */ if (rank == root) { - err = ompi_datatype_get_extent(dtype, &lb, &extent); - if (OMPI_SUCCESS != err) { - return OMPI_ERROR; - } + span = opal_datatype_span(&dtype->super, totalcounts, &gap); - tmpbuf = (char *) malloc(totalcounts * extent); - tmpbuf2 = (char *) malloc(totalcounts * extent); + tmpbuf = (char *) malloc(span); + tmpbuf2 = (char *) malloc(span); if (NULL == tmpbuf || NULL == tmpbuf2) { return OMPI_ERR_OUT_OF_RESOURCE; } + lbuf = tmpbuf - gap; + buf = tmpbuf2 - gap; /* Do a send-recv between the two root procs. to avoid deadlock */ err = MCA_PML_CALL(isend(sbuf, totalcounts, dtype, 0, @@ -171,7 +169,7 @@ mca_coll_basic_reduce_scatter_block_inter(void *sbuf, void *rbuf, int rcount, goto exit; } - err = MCA_PML_CALL(recv(tmpbuf2, totalcounts, dtype, 0, + err = MCA_PML_CALL(recv(lbuf, totalcounts, dtype, 0, MCA_COLL_BASE_TAG_REDUCE_SCATTER, comm, MPI_STATUS_IGNORE)); if (OMPI_SUCCESS != err) { @@ -185,11 +183,12 @@ mca_coll_basic_reduce_scatter_block_inter(void *sbuf, void *rbuf, int rcount, /* Loop receiving and calling reduction function (C or Fortran) - * The result of this reduction operations is then in - * tmpbuf2. + * The result of this reduction operations is then in + * tmpbuf2. */ for (i = 1; i < rsize; i++) { - err = MCA_PML_CALL(recv(tmpbuf, totalcounts, dtype, i, + char *tbuf; + err = MCA_PML_CALL(recv(buf, totalcounts, dtype, i, MCA_COLL_BASE_TAG_REDUCE_SCATTER, comm, MPI_STATUS_IGNORE)); if (MPI_SUCCESS != err) { @@ -197,7 +196,9 @@ mca_coll_basic_reduce_scatter_block_inter(void *sbuf, void *rbuf, int rcount, } /* Perform the reduction */ - ompi_op_reduce(op, tmpbuf, tmpbuf2, totalcounts, dtype); + ompi_op_reduce(op, lbuf, buf, totalcounts, dtype); + /* swap the buffers */ + tbuf = lbuf; lbuf = buf; buf = tbuf; } } else { /* If not root, send data to the root. */ @@ -210,7 +211,7 @@ mca_coll_basic_reduce_scatter_block_inter(void *sbuf, void *rbuf, int rcount, } /* Now do a scatterv on the local communicator */ - err = comm->c_local_comm->c_coll.coll_scatter(tmpbuf2, rcount, dtype, + err = comm->c_local_comm->c_coll.coll_scatter(lbuf, rcount, dtype, rbuf, rcount, dtype, 0, comm->c_local_comm, comm->c_local_comm->c_coll.coll_scatter_module); diff --git a/ompi/mca/coll/basic/coll_basic_scan.c b/ompi/mca/coll/basic/coll_basic_scan.c index 0048e0a3bf0..2ee07d0fd24 100644 --- a/ompi/mca/coll/basic/coll_basic_scan.c +++ b/ompi/mca/coll/basic/coll_basic_scan.c @@ -5,14 +5,16 @@ * Copyright (c) 2004-2014 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -38,14 +40,14 @@ * Returns: - MPI_SUCCESS or error code */ int -mca_coll_basic_scan_intra(void *sbuf, void *rbuf, int count, +mca_coll_basic_scan_intra(const void *sbuf, void *rbuf, int count, struct ompi_datatype_t *dtype, struct ompi_op_t *op, struct ompi_communicator_t *comm, mca_coll_base_module_t *module) { int size, rank, err; - ptrdiff_t true_lb, true_extent, lb, extent; + ptrdiff_t dsize, gap; char *free_buffer = NULL; char *pml_buffer = NULL; @@ -72,14 +74,12 @@ mca_coll_basic_scan_intra(void *sbuf, void *rbuf, int count, * listed in coll_basic_reduce.c. Use this temporary buffer to * receive into, later. */ - ompi_datatype_get_extent(dtype, &lb, &extent); - ompi_datatype_get_true_extent(dtype, &true_lb, &true_extent); - - free_buffer = (char*)malloc(true_extent + (count - 1) * extent); + dsize = opal_datatype_span(&dtype->super, count, &gap); + free_buffer = malloc(dsize); if (NULL == free_buffer) { return OMPI_ERR_OUT_OF_RESOURCE; } - pml_buffer = free_buffer - true_lb; + pml_buffer = free_buffer - gap; /* Copy the send buffer into the receive buffer. */ diff --git a/ompi/mca/coll/basic/coll_basic_scatter.c b/ompi/mca/coll/basic/coll_basic_scatter.c index 74aa9e8ed63..eef5f3136bb 100644 --- a/ompi/mca/coll/basic/coll_basic_scatter.c +++ b/ompi/mca/coll/basic/coll_basic_scatter.c @@ -2,17 +2,19 @@ * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. - * Copyright (c) 2004-2015 The University of Tennessee and The University + * Copyright (c) 2004-2016 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -36,7 +38,7 @@ * Returns: - MPI_SUCCESS or error code */ int -mca_coll_basic_scatter_inter(void *sbuf, int scount, +mca_coll_basic_scatter_inter(const void *sbuf, int scount, struct ompi_datatype_t *sdtype, void *rbuf, int rcount, struct ompi_datatype_t *rdtype, @@ -46,8 +48,7 @@ mca_coll_basic_scatter_inter(void *sbuf, int scount, int i, size, err; char *ptmp; ptrdiff_t lb, incr; - mca_coll_basic_module_t *basic_module = (mca_coll_basic_module_t*) module; - ompi_request_t **reqs = basic_module->mccb_reqs; + ompi_request_t **reqs; /* Initialize */ size = ompi_comm_remote_size(comm); @@ -67,6 +68,9 @@ mca_coll_basic_scatter_inter(void *sbuf, int scount, return OMPI_ERROR; } + reqs = coll_base_comm_get_reqs(module->base_data, size); + if( NULL == reqs ) { return OMPI_ERR_OUT_OF_RESOURCE; } + incr *= scount; for (i = 0, ptmp = (char *) sbuf; i < size; ++i, ptmp += incr) { err = MCA_PML_CALL(isend(ptmp, scount, sdtype, i, @@ -74,13 +78,15 @@ mca_coll_basic_scatter_inter(void *sbuf, int scount, MCA_PML_BASE_SEND_STANDARD, comm, reqs++)); if (OMPI_SUCCESS != err) { + ompi_coll_base_free_reqs(reqs, i + 1); return err; } } - err = - ompi_request_wait_all(size, basic_module->mccb_reqs, - MPI_STATUSES_IGNORE); + err = ompi_request_wait_all(size, reqs, MPI_STATUSES_IGNORE); + if (OMPI_SUCCESS != err) { + ompi_coll_base_free_reqs(reqs, size); + } } return err; diff --git a/ompi/mca/coll/basic/coll_basic_scatterv.c b/ompi/mca/coll/basic/coll_basic_scatterv.c index 06dcd7c3d04..fe0a49be223 100644 --- a/ompi/mca/coll/basic/coll_basic_scatterv.c +++ b/ompi/mca/coll/basic/coll_basic_scatterv.c @@ -2,17 +2,19 @@ * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. - * Copyright (c) 2004-2005 The University of Tennessee and The University + * Copyright (c) 2004-2016 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -36,8 +38,8 @@ * Returns: - MPI_SUCCESS or error code */ int -mca_coll_basic_scatterv_intra(void *sbuf, int *scounts, - int *disps, struct ompi_datatype_t *sdtype, +mca_coll_basic_scatterv_intra(const void *sbuf, const int *scounts, + const int *disps, struct ompi_datatype_t *sdtype, void *rbuf, int rcount, struct ompi_datatype_t *rdtype, int root, struct ompi_communicator_t *comm, @@ -109,8 +111,8 @@ mca_coll_basic_scatterv_intra(void *sbuf, int *scounts, * Returns: - MPI_SUCCESS or error code */ int -mca_coll_basic_scatterv_inter(void *sbuf, int *scounts, - int *disps, struct ompi_datatype_t *sdtype, +mca_coll_basic_scatterv_inter(const void *sbuf, const int *scounts, + const int *disps, struct ompi_datatype_t *sdtype, void *rbuf, int rcount, struct ompi_datatype_t *rdtype, int root, struct ompi_communicator_t *comm, @@ -119,8 +121,7 @@ mca_coll_basic_scatterv_inter(void *sbuf, int *scounts, int i, size, err; char *ptmp; ptrdiff_t lb, extent; - mca_coll_basic_module_t *basic_module = (mca_coll_basic_module_t*) module; - ompi_request_t **reqs = basic_module->mccb_reqs; + ompi_request_t **reqs; /* Initialize */ size = ompi_comm_remote_size(comm); @@ -143,6 +144,9 @@ mca_coll_basic_scatterv_inter(void *sbuf, int *scounts, return OMPI_ERROR; } + reqs = coll_base_comm_get_reqs(module->base_data, size); + if( NULL == reqs ) { return OMPI_ERR_OUT_OF_RESOURCE; } + for (i = 0; i < size; ++i) { ptmp = ((char *) sbuf) + (extent * disps[i]); err = MCA_PML_CALL(isend(ptmp, scounts[i], sdtype, i, @@ -150,11 +154,15 @@ mca_coll_basic_scatterv_inter(void *sbuf, int *scounts, MCA_PML_BASE_SEND_STANDARD, comm, &(reqs[i]))); if (OMPI_SUCCESS != err) { + ompi_coll_base_free_reqs(reqs, i + 1); return err; } } err = ompi_request_wait_all(size, reqs, MPI_STATUSES_IGNORE); + if (OMPI_SUCCESS != err) { + ompi_coll_base_free_reqs(reqs, size); + } } /* All done */ diff --git a/ompi/mca/coll/coll.h b/ompi/mca/coll/coll.h index acb3b5c563f..ba22061e239 100644 --- a/ompi/mca/coll/coll.h +++ b/ompi/mca/coll/coll.h @@ -15,7 +15,7 @@ * Copyright (c) 2012 Oak Rigde National Laboratory. All rights reserved. * Copyright (c) 2013-2015 Los Alamos National Security, LLC. All rights * reserved. - * Copyright (c) 2014 Research Organization for Information Science + * Copyright (c) 2014-2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * @@ -67,8 +67,6 @@ #include "ompi/mca/mca.h" #include "opal/mca/base/base.h" -#include "opal/mca/crs/crs.h" -#include "opal/mca/crs/base/base.h" #include "ompi/request/request.h" BEGIN_C_DECLS @@ -196,27 +194,27 @@ typedef int typedef int (*mca_coll_base_module_allgather_fn_t) - (void *sbuf, int scount, struct ompi_datatype_t *sdtype, + (const void *sbuf, int scount, struct ompi_datatype_t *sdtype, void *rbuf, int rcount, struct ompi_datatype_t *rdtype, struct ompi_communicator_t *comm, struct mca_coll_base_module_2_1_0_t *module); typedef int (*mca_coll_base_module_allgatherv_fn_t) - (void *sbuf, int scount, struct ompi_datatype_t *sdtype, - void * rbuf, int *rcounts, int *disps, struct ompi_datatype_t *rdtype, + (const void *sbuf, int scount, struct ompi_datatype_t *sdtype, + void * rbuf, const int *rcounts, const int *disps, struct ompi_datatype_t *rdtype, struct ompi_communicator_t *comm, struct mca_coll_base_module_2_1_0_t *module); typedef int (*mca_coll_base_module_allreduce_fn_t) - (void *sbuf, void *rbuf, int count, struct ompi_datatype_t *dtype, + (const void *sbuf, void *rbuf, int count, struct ompi_datatype_t *dtype, struct ompi_op_t *op, struct ompi_communicator_t *comm, struct mca_coll_base_module_2_1_0_t *module); typedef int (*mca_coll_base_module_alltoall_fn_t) - (void *sbuf, int scount, struct ompi_datatype_t *sdtype, + (const void *sbuf, int scount, struct ompi_datatype_t *sdtype, void* rbuf, int rcount, struct ompi_datatype_t *rdtype, struct ompi_communicator_t *comm, struct mca_coll_base_module_2_1_0_t *module); typedef int (*mca_coll_base_module_alltoallv_fn_t) - (void *sbuf, int *scounts, int *sdisps, struct ompi_datatype_t *sdtype, - void *rbuf, int *rcounts, int *rdisps, struct ompi_datatype_t *rdtype, + (const void *sbuf, const int *scounts, const int *sdisps, struct ompi_datatype_t *sdtype, + void *rbuf, const int *rcounts, const int *rdisps, struct ompi_datatype_t *rdtype, struct ompi_communicator_t *comm, struct mca_coll_base_module_2_1_0_t *module); typedef int (*mca_coll_base_module_alltoallw_fn_t) - (void *sbuf, int *scounts, int *sdisps, struct ompi_datatype_t **sdtypes, - void *rbuf, int *rcounts, int *rdisps, struct ompi_datatype_t **rdtypes, + (const void *sbuf, const int *scounts, const int *sdisps, struct ompi_datatype_t * const *sdtypes, + void *rbuf, const int *rcounts, const int *rdisps, struct ompi_datatype_t * const *rdtypes, struct ompi_communicator_t *comm, struct mca_coll_base_module_2_1_0_t *module); typedef int (*mca_coll_base_module_barrier_fn_t) (struct ompi_communicator_t *comm, struct mca_coll_base_module_2_1_0_t *module); @@ -224,65 +222,65 @@ typedef int (*mca_coll_base_module_bcast_fn_t) (void *buff, int count, struct ompi_datatype_t *datatype, int root, struct ompi_communicator_t *comm, struct mca_coll_base_module_2_1_0_t *module); typedef int (*mca_coll_base_module_exscan_fn_t) - (void *sbuf, void *rbuf, int count, struct ompi_datatype_t *dtype, + (const void *sbuf, void *rbuf, int count, struct ompi_datatype_t *dtype, struct ompi_op_t *op, struct ompi_communicator_t *comm, struct mca_coll_base_module_2_1_0_t *module); typedef int (*mca_coll_base_module_gather_fn_t) - (void *sbuf, int scount, struct ompi_datatype_t *sdtype, + (const void *sbuf, int scount, struct ompi_datatype_t *sdtype, void *rbuf, int rcount, struct ompi_datatype_t *rdtype, int root, struct ompi_communicator_t *comm, struct mca_coll_base_module_2_1_0_t *module); typedef int (*mca_coll_base_module_gatherv_fn_t) - (void *sbuf, int scount, struct ompi_datatype_t *sdtype, - void *rbuf, int *rcounts, int *disps, struct ompi_datatype_t *rdtype, + (const void *sbuf, int scount, struct ompi_datatype_t *sdtype, + void *rbuf, const int *rcounts, const int *disps, struct ompi_datatype_t *rdtype, int root, struct ompi_communicator_t *comm, struct mca_coll_base_module_2_1_0_t *module); typedef int (*mca_coll_base_module_reduce_fn_t) - (void *sbuf, void* rbuf, int count, struct ompi_datatype_t *dtype, + (const void *sbuf, void* rbuf, int count, struct ompi_datatype_t *dtype, struct ompi_op_t *op, int root, struct ompi_communicator_t *comm, struct mca_coll_base_module_2_1_0_t *module); typedef int (*mca_coll_base_module_reduce_scatter_fn_t) - (void *sbuf, void *rbuf, int *rcounts, struct ompi_datatype_t *dtype, + (const void *sbuf, void *rbuf, const int *rcounts, struct ompi_datatype_t *dtype, struct ompi_op_t *op, struct ompi_communicator_t *comm, struct mca_coll_base_module_2_1_0_t *module); typedef int (*mca_coll_base_module_reduce_scatter_block_fn_t) - (void *sbuf, void *rbuf, int rcount, struct ompi_datatype_t *dtype, + (const void *sbuf, void *rbuf, int rcount, struct ompi_datatype_t *dtype, struct ompi_op_t *op, struct ompi_communicator_t *comm, struct mca_coll_base_module_2_1_0_t *module); typedef int (*mca_coll_base_module_scan_fn_t) - (void *sbuf, void *rbuf, int count, struct ompi_datatype_t *dtype, + (const void *sbuf, void *rbuf, int count, struct ompi_datatype_t *dtype, struct ompi_op_t *op, struct ompi_communicator_t *comm, struct mca_coll_base_module_2_1_0_t *module); typedef int (*mca_coll_base_module_scatter_fn_t) - (void *sbuf, int scount, struct ompi_datatype_t *sdtype, + (const void *sbuf, int scount, struct ompi_datatype_t *sdtype, void *rbuf, int rcount, struct ompi_datatype_t *rdtype, int root, struct ompi_communicator_t *comm, struct mca_coll_base_module_2_1_0_t *module); typedef int (*mca_coll_base_module_scatterv_fn_t) - (void *sbuf, int *scounts, int *disps, struct ompi_datatype_t *sdtype, + (const void *sbuf, const int *scounts, const int *disps, struct ompi_datatype_t *sdtype, void* rbuf, int rcount, struct ompi_datatype_t *rdtype, int root, struct ompi_communicator_t *comm, struct mca_coll_base_module_2_1_0_t *module); /* nonblocking collectives */ typedef int (*mca_coll_base_module_iallgather_fn_t) - (void *sbuf, int scount, struct ompi_datatype_t *sdtype, + (const void *sbuf, int scount, struct ompi_datatype_t *sdtype, void *rbuf, int rcount, struct ompi_datatype_t *rdtype, struct ompi_communicator_t *comm, ompi_request_t ** request, struct mca_coll_base_module_2_1_0_t *module); typedef int (*mca_coll_base_module_iallgatherv_fn_t) - (void *sbuf, int scount, struct ompi_datatype_t *sdtype, - void * rbuf, int *rcounts, int *disps, struct ompi_datatype_t *rdtype, + (const void *sbuf, int scount, struct ompi_datatype_t *sdtype, + void * rbuf, const int *rcounts, const int *disps, struct ompi_datatype_t *rdtype, struct ompi_communicator_t *comm, ompi_request_t ** request, struct mca_coll_base_module_2_1_0_t *module); typedef int (*mca_coll_base_module_iallreduce_fn_t) - (void *sbuf, void *rbuf, int count, struct ompi_datatype_t *dtype, + (const void *sbuf, void *rbuf, int count, struct ompi_datatype_t *dtype, struct ompi_op_t *op, struct ompi_communicator_t *comm, ompi_request_t ** request, struct mca_coll_base_module_2_1_0_t *module); typedef int (*mca_coll_base_module_ialltoall_fn_t) - (void *sbuf, int scount, struct ompi_datatype_t *sdtype, + (const void *sbuf, int scount, struct ompi_datatype_t *sdtype, void* rbuf, int rcount, struct ompi_datatype_t *rdtype, struct ompi_communicator_t *comm, ompi_request_t ** request, struct mca_coll_base_module_2_1_0_t *module); typedef int (*mca_coll_base_module_ialltoallv_fn_t) - (void *sbuf, int *scounts, int *sdisps, struct ompi_datatype_t *sdtype, - void *rbuf, int *rcounts, int *rdisps, struct ompi_datatype_t *rdtype, + (const void *sbuf, const int *scounts, const int *sdisps, struct ompi_datatype_t *sdtype, + void *rbuf, const int *rcounts, const int *rdisps, struct ompi_datatype_t *rdtype, struct ompi_communicator_t *comm, ompi_request_t ** request, struct mca_coll_base_module_2_1_0_t *module); typedef int (*mca_coll_base_module_ialltoallw_fn_t) - (void *sbuf, int *scounts, int *sdisps, struct ompi_datatype_t **sdtypes, - void *rbuf, int *rcounts, int *rdisps, struct ompi_datatype_t **rdtypes, + (const void *sbuf, const int *scounts, const int *sdisps, struct ompi_datatype_t * const *sdtypes, + void *rbuf, const int *rcounts, const int *rdisps, struct ompi_datatype_t * const *rdtypes, struct ompi_communicator_t *comm, ompi_request_t ** request, struct mca_coll_base_module_2_1_0_t *module); typedef int (*mca_coll_base_module_ibarrier_fn_t) @@ -293,42 +291,42 @@ typedef int (*mca_coll_base_module_ibcast_fn_t) struct ompi_communicator_t *comm, ompi_request_t ** request, struct mca_coll_base_module_2_1_0_t *module); typedef int (*mca_coll_base_module_iexscan_fn_t) - (void *sbuf, void *rbuf, int count, struct ompi_datatype_t *dtype, + (const void *sbuf, void *rbuf, int count, struct ompi_datatype_t *dtype, struct ompi_op_t *op, struct ompi_communicator_t *comm, ompi_request_t ** request, struct mca_coll_base_module_2_1_0_t *module); typedef int (*mca_coll_base_module_igather_fn_t) - (void *sbuf, int scount, struct ompi_datatype_t *sdtype, + (const void *sbuf, int scount, struct ompi_datatype_t *sdtype, void *rbuf, int rcount, struct ompi_datatype_t *rdtype, int root, struct ompi_communicator_t *comm, ompi_request_t ** request, struct mca_coll_base_module_2_1_0_t *module); typedef int (*mca_coll_base_module_igatherv_fn_t) - (void *sbuf, int scount, struct ompi_datatype_t *sdtype, - void *rbuf, int *rcounts, int *disps, struct ompi_datatype_t *rdtype, + (const void *sbuf, int scount, struct ompi_datatype_t *sdtype, + void *rbuf, const int *rcounts, const int *disps, struct ompi_datatype_t *rdtype, int root, struct ompi_communicator_t *comm, ompi_request_t ** request, struct mca_coll_base_module_2_1_0_t *module); typedef int (*mca_coll_base_module_ireduce_fn_t) - (void *sbuf, void* rbuf, int count, struct ompi_datatype_t *dtype, + (const void *sbuf, void* rbuf, int count, struct ompi_datatype_t *dtype, struct ompi_op_t *op, int root, struct ompi_communicator_t *comm, ompi_request_t ** request, struct mca_coll_base_module_2_1_0_t *module); typedef int (*mca_coll_base_module_ireduce_scatter_fn_t) - (void *sbuf, void *rbuf, int *rcounts, struct ompi_datatype_t *dtype, + (const void *sbuf, void *rbuf, const int *rcounts, struct ompi_datatype_t *dtype, struct ompi_op_t *op, struct ompi_communicator_t *comm, ompi_request_t ** request, struct mca_coll_base_module_2_1_0_t *module); typedef int (*mca_coll_base_module_ireduce_scatter_block_fn_t) - (void *sbuf, void *rbuf, int rcount, struct ompi_datatype_t *dtype, + (const void *sbuf, void *rbuf, int rcount, struct ompi_datatype_t *dtype, struct ompi_op_t *op, struct ompi_communicator_t *comm, ompi_request_t ** request, struct mca_coll_base_module_2_1_0_t *module); typedef int (*mca_coll_base_module_iscan_fn_t) - (void *sbuf, void *rbuf, int count, struct ompi_datatype_t *dtype, + (const void *sbuf, void *rbuf, int count, struct ompi_datatype_t *dtype, struct ompi_op_t *op, struct ompi_communicator_t *comm, ompi_request_t ** request, struct mca_coll_base_module_2_1_0_t *module); typedef int (*mca_coll_base_module_iscatter_fn_t) - (void *sbuf, int scount, struct ompi_datatype_t *sdtype, + (const void *sbuf, int scount, struct ompi_datatype_t *sdtype, void *rbuf, int rcount, struct ompi_datatype_t *rdtype, int root, struct ompi_communicator_t *comm, ompi_request_t ** request, struct mca_coll_base_module_2_1_0_t *module); typedef int (*mca_coll_base_module_iscatterv_fn_t) - (void *sbuf, int *scounts, int *disps, struct ompi_datatype_t *sdtype, + (const void *sbuf, const int *scounts, const int *disps, struct ompi_datatype_t *sdtype, void* rbuf, int rcount, struct ompi_datatype_t *rdtype, int root, struct ompi_communicator_t *comm, ompi_request_t ** request, struct mca_coll_base_module_2_1_0_t *module); @@ -337,12 +335,12 @@ typedef int (*mca_coll_base_module_iscatterv_fn_t) * The signature of the neighborhood alltoallw differs from alltoallw */ typedef int (*mca_coll_base_module_neighbor_alltoallw_fn_t) - (void *sbuf, int *scounts, MPI_Aint *sdisps, struct ompi_datatype_t **sdtypes, - void *rbuf, int *rcounts, MPI_Aint *rdisps, struct ompi_datatype_t **rdtypes, + (const void *sbuf, const int *scounts, const MPI_Aint *sdisps, struct ompi_datatype_t * const *sdtypes, + void *rbuf, const int *rcounts, const MPI_Aint *rdisps, struct ompi_datatype_t * const *rdtypes, struct ompi_communicator_t *comm, struct mca_coll_base_module_2_1_0_t *module); typedef int (*mca_coll_base_module_ineighbor_alltoallw_fn_t) - (void *sbuf, int *scounts, MPI_Aint *sdisps, struct ompi_datatype_t **sdtypes, - void *rbuf, int *rcounts, MPI_Aint *rdisps, struct ompi_datatype_t **rdtypes, + (const void *sbuf, const int *scounts, const MPI_Aint *sdisps, struct ompi_datatype_t * const *sdtypes, + void *rbuf, const int *rcounts, const MPI_Aint *rdisps, struct ompi_datatype_t * const *rdtypes, struct ompi_communicator_t *comm, ompi_request_t ** request, struct mca_coll_base_module_2_1_0_t *module); @@ -466,7 +464,7 @@ struct mca_coll_base_module_2_1_0_t { /** Fault tolerance event trigger function */ mca_coll_base_module_ft_event_fn_t ft_event; - /** Disable function called when a collective module will not + /** Disable function called when a collective module will not be used for the given communicator */ mca_coll_base_module_disable_1_1_0_fn_t coll_module_disable; diff --git a/ompi/mca/coll/cuda/Makefile.am b/ompi/mca/coll/cuda/Makefile.am index 69a26d690ac..e81d7ec45e3 100644 --- a/ompi/mca/coll/cuda/Makefile.am +++ b/ompi/mca/coll/cuda/Makefile.am @@ -4,9 +4,9 @@ # reserved. # Copyright (c) 2014 NVIDIA Corporation. All rights reserved. # $COPYRIGHT$ -# +# # Additional copyrights may follow -# +# # $HEADER$ # dist_ompidata_DATA = help-mpi-coll-cuda.txt diff --git a/ompi/mca/coll/cuda/coll_cuda.h b/ompi/mca/coll/cuda/coll_cuda.h index 9350424525b..6b566c8eb1f 100644 --- a/ompi/mca/coll/cuda/coll_cuda.h +++ b/ompi/mca/coll/cuda/coll_cuda.h @@ -2,11 +2,11 @@ * Copyright (c) 2014 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2014 NVIDIA Corporation. All rights reserved. + * Copyright (c) 2014-2015 NVIDIA Corporation. All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -39,33 +39,33 @@ int mca_coll_cuda_module_enable(mca_coll_base_module_t *module, struct ompi_communicator_t *comm); int -mca_coll_cuda_allreduce(void *sbuf, void *rbuf, int count, +mca_coll_cuda_allreduce(const void *sbuf, void *rbuf, int count, struct ompi_datatype_t *dtype, struct ompi_op_t *op, struct ompi_communicator_t *comm, mca_coll_base_module_t *module); -int mca_coll_cuda_reduce(void *sbuf, void *rbuf, int count, +int mca_coll_cuda_reduce(const void *sbuf, void *rbuf, int count, struct ompi_datatype_t *dtype, struct ompi_op_t *op, int root, struct ompi_communicator_t *comm, mca_coll_base_module_t *module); -int mca_coll_cuda_exscan(void *sbuf, void *rbuf, int count, +int mca_coll_cuda_exscan(const void *sbuf, void *rbuf, int count, struct ompi_datatype_t *dtype, struct ompi_op_t *op, struct ompi_communicator_t *comm, mca_coll_base_module_t *module); -int mca_coll_cuda_scan(void *sbuf, void *rbuf, int count, +int mca_coll_cuda_scan(const void *sbuf, void *rbuf, int count, struct ompi_datatype_t *dtype, struct ompi_op_t *op, struct ompi_communicator_t *comm, mca_coll_base_module_t *module); int -mca_coll_cuda_reduce_scatter_block(void *sbuf, void *rbuf, int rcount, +mca_coll_cuda_reduce_scatter_block(const void *sbuf, void *rbuf, int rcount, struct ompi_datatype_t *dtype, struct ompi_op_t *op, struct ompi_communicator_t *comm, diff --git a/ompi/mca/coll/cuda/coll_cuda_allreduce.c b/ompi/mca/coll/cuda/coll_cuda_allreduce.c index 7797700c388..1606bcdf928 100644 --- a/ompi/mca/coll/cuda/coll_cuda_allreduce.c +++ b/ompi/mca/coll/cuda/coll_cuda_allreduce.c @@ -1,12 +1,12 @@ /* - * Copyright (c) 2014 The University of Tennessee and The University + * Copyright (c) 2014-2015 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. * Copyright (c) 2014-2015 NVIDIA Corporation. All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -27,22 +27,21 @@ * Returns: - MPI_SUCCESS or error code */ int -mca_coll_cuda_allreduce(void *sbuf, void *rbuf, int count, +mca_coll_cuda_allreduce(const void *sbuf, void *rbuf, int count, struct ompi_datatype_t *dtype, struct ompi_op_t *op, struct ompi_communicator_t *comm, mca_coll_base_module_t *module) { mca_coll_cuda_module_t *s = (mca_coll_cuda_module_t*) module; - ptrdiff_t true_lb, true_extent, lb, extent; + ptrdiff_t gap; char *rbuf1 = NULL, *sbuf1 = NULL, *rbuf2 = NULL; const char *sbuf2; size_t bufsize; int rc; - ompi_datatype_get_extent(dtype, &lb, &extent); - ompi_datatype_get_true_extent(dtype, &true_lb, &true_extent); - bufsize = true_extent + (ptrdiff_t)(count - 1) * extent; + bufsize = opal_datatype_span(&dtype->super, count, &gap); + if ((MPI_IN_PLACE != sbuf) && (opal_cuda_check_bufs((char *)sbuf, NULL))) { sbuf1 = (char*)malloc(bufsize); if (NULL == sbuf1) { @@ -50,7 +49,7 @@ mca_coll_cuda_allreduce(void *sbuf, void *rbuf, int count, } opal_cuda_memcpy_sync(sbuf1, sbuf, bufsize); sbuf2 = sbuf; /* save away original buffer */ - sbuf = sbuf1 - true_lb; + sbuf = sbuf1 - gap; } if (opal_cuda_check_bufs(rbuf, NULL)) { @@ -61,7 +60,7 @@ mca_coll_cuda_allreduce(void *sbuf, void *rbuf, int count, } opal_cuda_memcpy_sync(rbuf1, rbuf, bufsize); rbuf2 = rbuf; /* save away original buffer */ - rbuf = rbuf1 - true_lb; + rbuf = rbuf1 - gap; } rc = s->c_coll.coll_allreduce(sbuf, rbuf, count, dtype, op, comm, s->c_coll.coll_allreduce_module); if (NULL != sbuf1) { diff --git a/ompi/mca/coll/cuda/coll_cuda_component.c b/ompi/mca/coll/cuda/coll_cuda_component.c index 76c9b578ddd..10b0acf081c 100644 --- a/ompi/mca/coll/cuda/coll_cuda_component.c +++ b/ompi/mca/coll/cuda/coll_cuda_component.c @@ -7,9 +7,9 @@ * Copyright (c) 2015 Los Alamos National Security, LLC. All rights * reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ diff --git a/ompi/mca/coll/cuda/coll_cuda_exscan.c b/ompi/mca/coll/cuda/coll_cuda_exscan.c index 2bfe197d1e2..bc336341ac0 100644 --- a/ompi/mca/coll/cuda/coll_cuda_exscan.c +++ b/ompi/mca/coll/cuda/coll_cuda_exscan.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2014 The University of Tennessee and The University + * Copyright (c) 2014-2015 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. * Copyright (c) 2014-2015 NVIDIA Corporation. All rights reserved. @@ -19,22 +19,21 @@ #include "opal/datatype/opal_convertor.h" #include "opal/datatype/opal_datatype_cuda.h" -int mca_coll_cuda_exscan(void *sbuf, void *rbuf, int count, +int mca_coll_cuda_exscan(const void *sbuf, void *rbuf, int count, struct ompi_datatype_t *dtype, struct ompi_op_t *op, struct ompi_communicator_t *comm, mca_coll_base_module_t *module) { mca_coll_cuda_module_t *s = (mca_coll_cuda_module_t*) module; - ptrdiff_t true_lb, true_extent, lb, extent; + ptrdiff_t gap; char *rbuf1 = NULL, *sbuf1 = NULL, *rbuf2 = NULL; const char *sbuf2; size_t bufsize; int rc; - ompi_datatype_get_extent(dtype, &lb, &extent); - ompi_datatype_get_true_extent(dtype, &true_lb, &true_extent); - bufsize = true_extent + (ptrdiff_t)(count - 1) * extent; + bufsize = opal_datatype_span(&dtype->super, count, &gap); + if ((MPI_IN_PLACE != sbuf) && (opal_cuda_check_bufs((char *)sbuf, NULL))) { sbuf1 = (char*)malloc(bufsize); if (NULL == sbuf1) { @@ -42,7 +41,7 @@ int mca_coll_cuda_exscan(void *sbuf, void *rbuf, int count, } opal_cuda_memcpy_sync(sbuf1, sbuf, bufsize); sbuf2 = sbuf; /* save away original buffer */ - sbuf = sbuf1 - true_lb; + sbuf = sbuf1 - gap; } if (opal_cuda_check_bufs(rbuf, NULL)) { @@ -53,7 +52,7 @@ int mca_coll_cuda_exscan(void *sbuf, void *rbuf, int count, } opal_cuda_memcpy_sync(rbuf1, rbuf, bufsize); rbuf2 = rbuf; /* save away original buffer */ - rbuf = rbuf1 - true_lb; + rbuf = rbuf1 - gap; } rc = s->c_coll.coll_exscan(sbuf, rbuf, count, dtype, op, comm, diff --git a/ompi/mca/coll/cuda/coll_cuda_module.c b/ompi/mca/coll/cuda/coll_cuda_module.c index e5496db01e5..6535cf5f09a 100644 --- a/ompi/mca/coll/cuda/coll_cuda_module.c +++ b/ompi/mca/coll/cuda/coll_cuda_module.c @@ -4,17 +4,15 @@ * reserved. * Copyright (c) 2014 NVIDIA Corporation. All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ #include "ompi_config.h" -#ifdef HAVE_STRING_H #include -#endif #include #include "coll_cuda.h" @@ -52,7 +50,7 @@ static void mca_coll_cuda_module_destruct(mca_coll_cuda_module_t *module) } OBJ_CLASS_INSTANCE(mca_coll_cuda_module_t, mca_coll_base_module_t, - mca_coll_cuda_module_construct, + mca_coll_cuda_module_construct, mca_coll_cuda_module_destruct); @@ -76,7 +74,7 @@ int mca_coll_cuda_init_query(bool enable_progress_threads, * priority we want to return. */ mca_coll_base_module_t * -mca_coll_cuda_comm_query(struct ompi_communicator_t *comm, +mca_coll_cuda_comm_query(struct ompi_communicator_t *comm, int *priority) { mca_coll_cuda_module_t *cuda_module; @@ -149,7 +147,7 @@ int mca_coll_cuda_module_enable(mca_coll_base_module_t *module, return OMPI_SUCCESS; } else { orte_show_help("help-mpi-coll-cuda.txt", "missing collective", true, - orte_process_info.nodename, + orte_process_info.nodename, mca_coll_cuda_component.priority, msg); return OMPI_ERR_NOT_FOUND; } diff --git a/ompi/mca/coll/cuda/coll_cuda_reduce.c b/ompi/mca/coll/cuda/coll_cuda_reduce.c index 8bcd5c42a2b..2bcce13c75c 100644 --- a/ompi/mca/coll/cuda/coll_cuda_reduce.c +++ b/ompi/mca/coll/cuda/coll_cuda_reduce.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2004-2014 The University of Tennessee and The University + * Copyright (c) 2004-2015 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. * Copyright (c) 2014-2015 NVIDIA Corporation. All rights reserved. @@ -27,22 +27,22 @@ * Returns: - MPI_SUCCESS or error code */ int -mca_coll_cuda_reduce(void *sbuf, void *rbuf, int count, +mca_coll_cuda_reduce(const void *sbuf, void *rbuf, int count, struct ompi_datatype_t *dtype, struct ompi_op_t *op, int root, struct ompi_communicator_t *comm, mca_coll_base_module_t *module) { mca_coll_cuda_module_t *s = (mca_coll_cuda_module_t*) module; - ptrdiff_t true_lb, true_extent, lb, extent; + ptrdiff_t gap; char *rbuf1 = NULL, *sbuf1 = NULL, *rbuf2 = NULL; const char *sbuf2; size_t bufsize; int rc; - ompi_datatype_get_extent(dtype, &lb, &extent); - ompi_datatype_get_true_extent(dtype, &true_lb, &true_extent); - bufsize = true_extent + (ptrdiff_t)(count - 1) * extent; + bufsize = opal_datatype_span(&dtype->super, count, &gap); + + if ((MPI_IN_PLACE != sbuf) && (opal_cuda_check_bufs((char *)sbuf, NULL))) { sbuf1 = (char*)malloc(bufsize); if (NULL == sbuf1) { @@ -50,7 +50,7 @@ mca_coll_cuda_reduce(void *sbuf, void *rbuf, int count, } opal_cuda_memcpy_sync(sbuf1, sbuf, bufsize); sbuf2 = sbuf; /* save away original buffer */ - sbuf = sbuf1 - lb; + sbuf = sbuf1 - gap; } if (opal_cuda_check_bufs(rbuf, NULL)) { @@ -61,7 +61,7 @@ mca_coll_cuda_reduce(void *sbuf, void *rbuf, int count, } opal_cuda_memcpy_sync(rbuf1, rbuf, bufsize); rbuf2 = rbuf; /* save away original buffer */ - rbuf = rbuf1 - lb; + rbuf = rbuf1 - gap; } rc = s->c_coll.coll_reduce((void *) sbuf, rbuf, count, dtype, op, root, comm, diff --git a/ompi/mca/coll/cuda/coll_cuda_reduce_scatter_block.c b/ompi/mca/coll/cuda/coll_cuda_reduce_scatter_block.c index fa3f8c0da91..0dccbc580fe 100644 --- a/ompi/mca/coll/cuda/coll_cuda_reduce_scatter_block.c +++ b/ompi/mca/coll/cuda/coll_cuda_reduce_scatter_block.c @@ -1,12 +1,12 @@ /* - * Copyright (c) 2014 The University of Tennessee and The University + * Copyright (c) 2014-2015 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. * Copyright (c) 2014-2015 NVIDIA Corporation. All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -27,27 +27,27 @@ * Returns: - MPI_SUCCESS or error code * * Algorithm: - * reduce and scatter (needs to be cleaned + * reduce and scatter (needs to be cleaned * up at some point) */ int -mca_coll_cuda_reduce_scatter_block(void *sbuf, void *rbuf, int rcount, +mca_coll_cuda_reduce_scatter_block(const void *sbuf, void *rbuf, int rcount, struct ompi_datatype_t *dtype, struct ompi_op_t *op, struct ompi_communicator_t *comm, mca_coll_base_module_t *module) { mca_coll_cuda_module_t *s = (mca_coll_cuda_module_t*) module; - ptrdiff_t true_lb, true_extent, lb, extent; + ptrdiff_t gap; char *rbuf1 = NULL, *sbuf1 = NULL, *rbuf2 = NULL; const char *sbuf2; size_t sbufsize, rbufsize; int rc; - ompi_datatype_get_extent(dtype, &lb, &extent); - ompi_datatype_get_true_extent(dtype, &true_lb, &true_extent); - sbufsize = (true_extent + (ptrdiff_t)(rcount - 1) * extent) * ompi_comm_size(comm); - rbufsize = true_extent + (ptrdiff_t)(rcount - 1) * extent; + rbufsize = opal_datatype_span(&dtype->super, rcount, &gap); + + sbufsize = rbufsize * ompi_comm_size(comm); + if ((MPI_IN_PLACE != sbuf) && (opal_cuda_check_bufs((char *)sbuf, NULL))) { sbuf1 = (char*)malloc(sbufsize); if (NULL == sbuf1) { @@ -55,7 +55,7 @@ mca_coll_cuda_reduce_scatter_block(void *sbuf, void *rbuf, int rcount, } opal_cuda_memcpy_sync(sbuf1, sbuf, sbufsize); sbuf2 = sbuf; /* save away original buffer */ - sbuf = sbuf1 - true_lb; + sbuf = sbuf1 - gap; } if (opal_cuda_check_bufs(rbuf, NULL)) { @@ -66,7 +66,7 @@ mca_coll_cuda_reduce_scatter_block(void *sbuf, void *rbuf, int rcount, } opal_cuda_memcpy_sync(rbuf1, rbuf, rbufsize); rbuf2 = rbuf; /* save away original buffer */ - rbuf = rbuf1 - true_lb; + rbuf = rbuf1 - gap; } rc = s->c_coll.coll_reduce_scatter_block(sbuf, rbuf, rcount, dtype, op, comm, s->c_coll.coll_reduce_scatter_block_module); diff --git a/ompi/mca/coll/cuda/coll_cuda_scan.c b/ompi/mca/coll/cuda/coll_cuda_scan.c index f5f9224774d..e9afde81075 100644 --- a/ompi/mca/coll/cuda/coll_cuda_scan.c +++ b/ompi/mca/coll/cuda/coll_cuda_scan.c @@ -1,12 +1,12 @@ /* - * Copyright (c) 2014 The University of Tennessee and The University + * Copyright (c) 2014-2015 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. * Copyright (c) 2014-2015 NVIDIA Corporation. All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -26,22 +26,21 @@ * Accepts: - same arguments as MPI_Scan() * Returns: - MPI_SUCCESS or error code */ -int mca_coll_cuda_scan(void *sbuf, void *rbuf, int count, +int mca_coll_cuda_scan(const void *sbuf, void *rbuf, int count, struct ompi_datatype_t *dtype, struct ompi_op_t *op, struct ompi_communicator_t *comm, mca_coll_base_module_t *module) { mca_coll_cuda_module_t *s = (mca_coll_cuda_module_t*) module; - ptrdiff_t true_lb, true_extent, lb, extent; + ptrdiff_t gap; char *rbuf1 = NULL, *sbuf1 = NULL, *rbuf2 = NULL; const char *sbuf2; size_t bufsize; int rc; - ompi_datatype_get_extent(dtype, &lb, &extent); - ompi_datatype_get_true_extent(dtype, &true_lb, &true_extent); - bufsize = true_extent + (ptrdiff_t)(count - 1) * extent; + bufsize = opal_datatype_span(&dtype->super, count, &gap); + if ((MPI_IN_PLACE != sbuf) && (opal_cuda_check_bufs((char *)sbuf, NULL))) { sbuf1 = (char*)malloc(bufsize); if (NULL == sbuf1) { @@ -49,7 +48,7 @@ int mca_coll_cuda_scan(void *sbuf, void *rbuf, int count, } opal_cuda_memcpy_sync(sbuf1, sbuf, bufsize); sbuf2 = sbuf; /* save away original buffer */ - sbuf = sbuf1 - true_lb; + sbuf = sbuf1 - gap; } if (opal_cuda_check_bufs(rbuf, NULL)) { @@ -60,7 +59,7 @@ int mca_coll_cuda_scan(void *sbuf, void *rbuf, int count, } opal_cuda_memcpy_sync(rbuf1, rbuf, bufsize); rbuf2 = rbuf; /* save away original buffer */ - rbuf = rbuf1 - true_lb; + rbuf = rbuf1 - gap; } rc = s->c_coll.coll_scan(sbuf, rbuf, count, dtype, op, comm, s->c_coll.coll_scan_module); diff --git a/ompi/mca/coll/cuda/help-mpi-coll-cuda.txt b/ompi/mca/coll/cuda/help-mpi-coll-cuda.txt index b73074ceac9..5537420ab4e 100644 --- a/ompi/mca/coll/cuda/help-mpi-coll-cuda.txt +++ b/ompi/mca/coll/cuda/help-mpi-coll-cuda.txt @@ -6,15 +6,15 @@ # Copyright (c) 2004-2005 The University of Tennessee and The University # of Tennessee Research Foundation. All rights # reserved. -# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, # University of Stuttgart. All rights reserved. # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. # Copyright (c) 2014 NVIDIA Corporation. All rights reserved. # $COPYRIGHT$ -# +# # Additional copyrights may follow -# +# # $HEADER$ # # This is the US/English general help file for Open MPI's CUDA diff --git a/ompi/mca/coll/demo/Makefile.am b/ompi/mca/coll/demo/Makefile.am index 8c2a9eaea8d..235ba68883a 100644 --- a/ompi/mca/coll/demo/Makefile.am +++ b/ompi/mca/coll/demo/Makefile.am @@ -5,15 +5,15 @@ # Copyright (c) 2004-2005 The University of Tennessee and The University # of Tennessee Research Foundation. All rights # reserved. -# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, # University of Stuttgart. All rights reserved. # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. # Copyright (c) 2010 Cisco Systems, Inc. All rights reserved. # $COPYRIGHT$ -# +# # Additional copyrights may follow -# +# # $HEADER$ # diff --git a/ompi/mca/coll/demo/coll_demo.h b/ompi/mca/coll/demo/coll_demo.h index 0cb4827abd6..1a194b36a5e 100644 --- a/ompi/mca/coll/demo/coll_demo.h +++ b/ompi/mca/coll/demo/coll_demo.h @@ -5,15 +5,15 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2008 Cisco Systems, Inc. All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -41,85 +41,85 @@ OMPI_MODULE_DECLSPEC extern const mca_coll_base_component_2_0_0_t mca_coll_demo_ bool enable_mpi_threads); mca_coll_base_module_t * mca_coll_demo_comm_query(struct ompi_communicator_t *comm, int *priority); - + /* Module functions */ int mca_coll_demo_module_enable(mca_coll_base_module_t *module, struct ompi_communicator_t *comm); - int mca_coll_demo_allgather_intra(void *sbuf, int scount, - struct ompi_datatype_t *sdtype, - void *rbuf, int rcount, - struct ompi_datatype_t *rdtype, + int mca_coll_demo_allgather_intra(void *sbuf, int scount, + struct ompi_datatype_t *sdtype, + void *rbuf, int rcount, + struct ompi_datatype_t *rdtype, struct ompi_communicator_t *comm, mca_coll_base_module_t *module); - int mca_coll_demo_allgather_inter(void *sbuf, int scount, - struct ompi_datatype_t *sdtype, - void *rbuf, int rcount, - struct ompi_datatype_t *rdtype, + int mca_coll_demo_allgather_inter(void *sbuf, int scount, + struct ompi_datatype_t *sdtype, + void *rbuf, int rcount, + struct ompi_datatype_t *rdtype, struct ompi_communicator_t *comm, mca_coll_base_module_t *module); - int mca_coll_demo_allgatherv_intra(void *sbuf, int scount, - struct ompi_datatype_t *sdtype, - void * rbuf, int *rcounts, int *disps, - struct ompi_datatype_t *rdtype, + int mca_coll_demo_allgatherv_intra(void *sbuf, int scount, + struct ompi_datatype_t *sdtype, + void * rbuf, int *rcounts, int *disps, + struct ompi_datatype_t *rdtype, struct ompi_communicator_t *comm, mca_coll_base_module_t *module); - int mca_coll_demo_allgatherv_inter(void *sbuf, int scount, - struct ompi_datatype_t *sdtype, - void * rbuf, int *rcounts, int *disps, - struct ompi_datatype_t *rdtype, + int mca_coll_demo_allgatherv_inter(void *sbuf, int scount, + struct ompi_datatype_t *sdtype, + void * rbuf, int *rcounts, int *disps, + struct ompi_datatype_t *rdtype, struct ompi_communicator_t *comm, mca_coll_base_module_t *module); - int mca_coll_demo_allreduce_intra(void *sbuf, void *rbuf, int count, - struct ompi_datatype_t *dtype, - struct ompi_op_t *op, + int mca_coll_demo_allreduce_intra(void *sbuf, void *rbuf, int count, + struct ompi_datatype_t *dtype, + struct ompi_op_t *op, struct ompi_communicator_t *comm, mca_coll_base_module_t *module); - int mca_coll_demo_allreduce_inter(void *sbuf, void *rbuf, int count, - struct ompi_datatype_t *dtype, - struct ompi_op_t *op, + int mca_coll_demo_allreduce_inter(void *sbuf, void *rbuf, int count, + struct ompi_datatype_t *dtype, + struct ompi_op_t *op, struct ompi_communicator_t *comm, mca_coll_base_module_t *module); - int mca_coll_demo_alltoall_intra(void *sbuf, int scount, - struct ompi_datatype_t *sdtype, - void* rbuf, int rcount, - struct ompi_datatype_t *rdtype, + int mca_coll_demo_alltoall_intra(void *sbuf, int scount, + struct ompi_datatype_t *sdtype, + void* rbuf, int rcount, + struct ompi_datatype_t *rdtype, struct ompi_communicator_t *comm, mca_coll_base_module_t *module); - int mca_coll_demo_alltoall_inter(void *sbuf, int scount, - struct ompi_datatype_t *sdtype, - void* rbuf, int rcount, - struct ompi_datatype_t *rdtype, + int mca_coll_demo_alltoall_inter(void *sbuf, int scount, + struct ompi_datatype_t *sdtype, + void* rbuf, int rcount, + struct ompi_datatype_t *rdtype, struct ompi_communicator_t *comm, mca_coll_base_module_t *module); - int mca_coll_demo_alltoallv_intra(void *sbuf, int *scounts, int *sdisps, - struct ompi_datatype_t *sdtype, - void *rbuf, int *rcounts, int *rdisps, - struct ompi_datatype_t *rdtype, + int mca_coll_demo_alltoallv_intra(void *sbuf, int *scounts, int *sdisps, + struct ompi_datatype_t *sdtype, + void *rbuf, int *rcounts, int *rdisps, + struct ompi_datatype_t *rdtype, struct ompi_communicator_t *comm, mca_coll_base_module_t *module); - int mca_coll_demo_alltoallv_inter(void *sbuf, int *scounts, int *sdisps, - struct ompi_datatype_t *sdtype, - void *rbuf, int *rcounts, int *rdisps, - struct ompi_datatype_t *rdtype, + int mca_coll_demo_alltoallv_inter(void *sbuf, int *scounts, int *sdisps, + struct ompi_datatype_t *sdtype, + void *rbuf, int *rcounts, int *rdisps, + struct ompi_datatype_t *rdtype, struct ompi_communicator_t *comm, mca_coll_base_module_t *module); - int mca_coll_demo_alltoallw_intra(void *sbuf, int *scounts, int *sdisps, - struct ompi_datatype_t **sdtypes, - void *rbuf, int *rcounts, int *rdisps, - struct ompi_datatype_t **rdtypes, + int mca_coll_demo_alltoallw_intra(void *sbuf, int *scounts, int *sdisps, + struct ompi_datatype_t **sdtypes, + void *rbuf, int *rcounts, int *rdisps, + struct ompi_datatype_t **rdtypes, struct ompi_communicator_t *comm, mca_coll_base_module_t *module); - int mca_coll_demo_alltoallw_inter(void *sbuf, int *scounts, int *sdisps, - struct ompi_datatype_t **sdtypes, - void *rbuf, int *rcounts, int *rdisps, - struct ompi_datatype_t **rdtypes, + int mca_coll_demo_alltoallw_inter(void *sbuf, int *scounts, int *sdisps, + struct ompi_datatype_t **sdtypes, + void *rbuf, int *rcounts, int *rdisps, + struct ompi_datatype_t **rdtypes, struct ompi_communicator_t *comm, mca_coll_base_module_t *module); @@ -128,110 +128,110 @@ int mca_coll_demo_module_enable(mca_coll_base_module_t *module, int mca_coll_demo_barrier_inter(struct ompi_communicator_t *comm, mca_coll_base_module_t *module); - int mca_coll_demo_bcast_intra(void *buff, int count, + int mca_coll_demo_bcast_intra(void *buff, int count, struct ompi_datatype_t *datatype, - int root, + int root, struct ompi_communicator_t *comm, mca_coll_base_module_t *module); - int mca_coll_demo_bcast_inter(void *buff, int count, - struct ompi_datatype_t *datatype, - int root, + int mca_coll_demo_bcast_inter(void *buff, int count, + struct ompi_datatype_t *datatype, + int root, struct ompi_communicator_t *comm, mca_coll_base_module_t *module); - int mca_coll_demo_exscan_intra(void *sbuf, void *rbuf, int count, - struct ompi_datatype_t *dtype, - struct ompi_op_t *op, + int mca_coll_demo_exscan_intra(void *sbuf, void *rbuf, int count, + struct ompi_datatype_t *dtype, + struct ompi_op_t *op, struct ompi_communicator_t *comm, mca_coll_base_module_t *module); - int mca_coll_demo_exscan_inter(void *sbuf, void *rbuf, int count, - struct ompi_datatype_t *dtype, - struct ompi_op_t *op, + int mca_coll_demo_exscan_inter(void *sbuf, void *rbuf, int count, + struct ompi_datatype_t *dtype, + struct ompi_op_t *op, struct ompi_communicator_t *comm, mca_coll_base_module_t *module); - int mca_coll_demo_gather_intra(void *sbuf, int scount, - struct ompi_datatype_t *sdtype, void *rbuf, - int rcount, struct ompi_datatype_t *rdtype, + int mca_coll_demo_gather_intra(void *sbuf, int scount, + struct ompi_datatype_t *sdtype, void *rbuf, + int rcount, struct ompi_datatype_t *rdtype, int root, struct ompi_communicator_t *comm, mca_coll_base_module_t *module); - int mca_coll_demo_gather_inter(void *sbuf, int scount, - struct ompi_datatype_t *sdtype, void *rbuf, - int rcount, struct ompi_datatype_t *rdtype, + int mca_coll_demo_gather_inter(void *sbuf, int scount, + struct ompi_datatype_t *sdtype, void *rbuf, + int rcount, struct ompi_datatype_t *rdtype, int root, struct ompi_communicator_t *comm, mca_coll_base_module_t *module); - int mca_coll_demo_gatherv_intra(void *sbuf, int scount, - struct ompi_datatype_t *sdtype, void *rbuf, - int *rcounts, int *disps, - struct ompi_datatype_t *rdtype, int root, + int mca_coll_demo_gatherv_intra(void *sbuf, int scount, + struct ompi_datatype_t *sdtype, void *rbuf, + int *rcounts, int *disps, + struct ompi_datatype_t *rdtype, int root, struct ompi_communicator_t *comm, mca_coll_base_module_t *module); - int mca_coll_demo_gatherv_inter(void *sbuf, int scount, - struct ompi_datatype_t *sdtype, void *rbuf, - int *rcounts, int *disps, - struct ompi_datatype_t *rdtype, int root, + int mca_coll_demo_gatherv_inter(void *sbuf, int scount, + struct ompi_datatype_t *sdtype, void *rbuf, + int *rcounts, int *disps, + struct ompi_datatype_t *rdtype, int root, struct ompi_communicator_t *comm, mca_coll_base_module_t *module); - int mca_coll_demo_reduce_intra(void *sbuf, void* rbuf, int count, - struct ompi_datatype_t *dtype, - struct ompi_op_t *op, + int mca_coll_demo_reduce_intra(void *sbuf, void* rbuf, int count, + struct ompi_datatype_t *dtype, + struct ompi_op_t *op, int root, struct ompi_communicator_t *comm, mca_coll_base_module_t *module); - int mca_coll_demo_reduce_inter(void *sbuf, void* rbuf, int count, + int mca_coll_demo_reduce_inter(void *sbuf, void* rbuf, int count, struct ompi_datatype_t *dtype, - struct ompi_op_t *op, + struct ompi_op_t *op, int root, struct ompi_communicator_t *comm, mca_coll_base_module_t *module); - int mca_coll_demo_reduce_scatter_intra(void *sbuf, void *rbuf, - int *rcounts, - struct ompi_datatype_t *dtype, - struct ompi_op_t *op, + int mca_coll_demo_reduce_scatter_intra(void *sbuf, void *rbuf, + int *rcounts, + struct ompi_datatype_t *dtype, + struct ompi_op_t *op, struct ompi_communicator_t *comm, mca_coll_base_module_t *module); - int mca_coll_demo_reduce_scatter_inter(void *sbuf, void *rbuf, - int *rcounts, - struct ompi_datatype_t *dtype, - struct ompi_op_t *op, + int mca_coll_demo_reduce_scatter_inter(void *sbuf, void *rbuf, + int *rcounts, + struct ompi_datatype_t *dtype, + struct ompi_op_t *op, struct ompi_communicator_t *comm, mca_coll_base_module_t *module); - int mca_coll_demo_scan_intra(void *sbuf, void *rbuf, int count, - struct ompi_datatype_t *dtype, - struct ompi_op_t *op, + int mca_coll_demo_scan_intra(void *sbuf, void *rbuf, int count, + struct ompi_datatype_t *dtype, + struct ompi_op_t *op, struct ompi_communicator_t *comm, mca_coll_base_module_t *module); - int mca_coll_demo_scan_inter(void *sbuf, void *rbuf, int count, - struct ompi_datatype_t *dtype, - struct ompi_op_t *op, + int mca_coll_demo_scan_inter(void *sbuf, void *rbuf, int count, + struct ompi_datatype_t *dtype, + struct ompi_op_t *op, struct ompi_communicator_t *comm, mca_coll_base_module_t *module); - int mca_coll_demo_scatter_intra(void *sbuf, int scount, - struct ompi_datatype_t *sdtype, void *rbuf, - int rcount, struct ompi_datatype_t *rdtype, + int mca_coll_demo_scatter_intra(void *sbuf, int scount, + struct ompi_datatype_t *sdtype, void *rbuf, + int rcount, struct ompi_datatype_t *rdtype, int root, struct ompi_communicator_t *comm, mca_coll_base_module_t *module); - int mca_coll_demo_scatter_inter(void *sbuf, int scount, - struct ompi_datatype_t *sdtype, void *rbuf, - int rcount, struct ompi_datatype_t *rdtype, + int mca_coll_demo_scatter_inter(void *sbuf, int scount, + struct ompi_datatype_t *sdtype, void *rbuf, + int rcount, struct ompi_datatype_t *rdtype, int root, struct ompi_communicator_t *comm, mca_coll_base_module_t *module); - int mca_coll_demo_scatterv_intra(void *sbuf, int *scounts, int *disps, - struct ompi_datatype_t *sdtype, - void* rbuf, int rcount, - struct ompi_datatype_t *rdtype, int root, + int mca_coll_demo_scatterv_intra(void *sbuf, int *scounts, int *disps, + struct ompi_datatype_t *sdtype, + void* rbuf, int rcount, + struct ompi_datatype_t *rdtype, int root, struct ompi_communicator_t *comm, mca_coll_base_module_t *module); - int mca_coll_demo_scatterv_inter(void *sbuf, int *scounts, int *disps, - struct ompi_datatype_t *sdtype, - void* rbuf, int rcount, - struct ompi_datatype_t *rdtype, int root, + int mca_coll_demo_scatterv_inter(void *sbuf, int *scounts, int *disps, + struct ompi_datatype_t *sdtype, + void* rbuf, int rcount, + struct ompi_datatype_t *rdtype, int root, struct ompi_communicator_t *comm, mca_coll_base_module_t *module); diff --git a/ompi/mca/coll/demo/coll_demo_allgather.c b/ompi/mca/coll/demo/coll_demo_allgather.c index dddd757fe4f..f33042a71d5 100644 --- a/ompi/mca/coll/demo/coll_demo_allgather.c +++ b/ompi/mca/coll/demo/coll_demo_allgather.c @@ -5,14 +5,14 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -33,9 +33,9 @@ * Accepts: - same as MPI_Allgather() * Returns: - MPI_SUCCESS or error code */ -int mca_coll_demo_allgather_intra(void *sbuf, int scount, - struct ompi_datatype_t *sdtype, void *rbuf, - int rcount, struct ompi_datatype_t *rdtype, +int mca_coll_demo_allgather_intra(void *sbuf, int scount, + struct ompi_datatype_t *sdtype, void *rbuf, + int rcount, struct ompi_datatype_t *rdtype, struct ompi_communicator_t *comm, mca_coll_base_module_t *module) { @@ -54,10 +54,10 @@ int mca_coll_demo_allgather_intra(void *sbuf, int scount, * Accepts: - same as MPI_Allgather() * Returns: - MPI_SUCCESS or error code */ -int mca_coll_demo_allgather_inter(void *sbuf, int scount, - struct ompi_datatype_t *sdtype, - void *rbuf, int rcount, - struct ompi_datatype_t *rdtype, +int mca_coll_demo_allgather_inter(void *sbuf, int scount, + struct ompi_datatype_t *sdtype, + void *rbuf, int rcount, + struct ompi_datatype_t *rdtype, struct ompi_communicator_t *comm, mca_coll_base_module_t *module) { diff --git a/ompi/mca/coll/demo/coll_demo_allgatherv.c b/ompi/mca/coll/demo/coll_demo_allgatherv.c index d0449868f05..b6503ec6865 100644 --- a/ompi/mca/coll/demo/coll_demo_allgatherv.c +++ b/ompi/mca/coll/demo/coll_demo_allgatherv.c @@ -5,14 +5,14 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -33,10 +33,10 @@ * Accepts: - same as MPI_Allgatherv() * Returns: - MPI_SUCCESS or error code */ -int mca_coll_demo_allgatherv_intra(void *sbuf, int scount, - struct ompi_datatype_t *sdtype, - void * rbuf, int *rcounts, int *disps, - struct ompi_datatype_t *rdtype, +int mca_coll_demo_allgatherv_intra(void *sbuf, int scount, + struct ompi_datatype_t *sdtype, + void * rbuf, int *rcounts, int *disps, + struct ompi_datatype_t *rdtype, struct ompi_communicator_t *comm, mca_coll_base_module_t *module) { @@ -56,10 +56,10 @@ int mca_coll_demo_allgatherv_intra(void *sbuf, int scount, * Accepts: - same as MPI_Allgatherv() * Returns: - MPI_SUCCESS or error code */ -int mca_coll_demo_allgatherv_inter(void *sbuf, int scount, - struct ompi_datatype_t *sdtype, - void * rbuf, int *rcounts, int *disps, - struct ompi_datatype_t *rdtype, +int mca_coll_demo_allgatherv_inter(void *sbuf, int scount, + struct ompi_datatype_t *sdtype, + void * rbuf, int *rcounts, int *disps, + struct ompi_datatype_t *rdtype, struct ompi_communicator_t *comm, mca_coll_base_module_t *module) { diff --git a/ompi/mca/coll/demo/coll_demo_allreduce.c b/ompi/mca/coll/demo/coll_demo_allreduce.c index 1d57ce7ec56..15975bacb1c 100644 --- a/ompi/mca/coll/demo/coll_demo_allreduce.c +++ b/ompi/mca/coll/demo/coll_demo_allreduce.c @@ -5,14 +5,14 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -34,7 +34,7 @@ * Returns: - MPI_SUCCESS or error code */ int mca_coll_demo_allreduce_intra(void *sbuf, void *rbuf, int count, - struct ompi_datatype_t *dtype, + struct ompi_datatype_t *dtype, struct ompi_op_t *op, struct ompi_communicator_t *comm, mca_coll_base_module_t *module) diff --git a/ompi/mca/coll/demo/coll_demo_alltoall.c b/ompi/mca/coll/demo/coll_demo_alltoall.c index 4de3d8c2d2b..d3559970121 100644 --- a/ompi/mca/coll/demo/coll_demo_alltoall.c +++ b/ompi/mca/coll/demo/coll_demo_alltoall.c @@ -5,14 +5,14 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -29,13 +29,13 @@ /* * alltoall_intra * - * Function: - MPI_Alltoall + * Function: - MPI_Alltoall * Accepts: - same as MPI_Alltoall() * Returns: - MPI_SUCCESS or an MPI error code */ int mca_coll_demo_alltoall_intra(void *sbuf, int scount, - struct ompi_datatype_t *sdtype, - void *rbuf, int rcount, + struct ompi_datatype_t *sdtype, + void *rbuf, int rcount, struct ompi_datatype_t *rdtype, struct ompi_communicator_t *comm, mca_coll_base_module_t *module) @@ -43,7 +43,7 @@ int mca_coll_demo_alltoall_intra(void *sbuf, int scount, mca_coll_demo_module_t *demo_module = (mca_coll_demo_module_t*) module; opal_output_verbose(10, ompi_coll_base_framework.framework_output, "In demo alltoall_intra\n"); return demo_module->underlying.coll_alltoall(sbuf, scount, sdtype, - rbuf, rcount, rdtype, + rbuf, rcount, rdtype, comm, demo_module->underlying.coll_alltoall_module); } @@ -52,13 +52,13 @@ int mca_coll_demo_alltoall_intra(void *sbuf, int scount, /* * alltoall_inter * - * Function: - MPI_Alltoall + * Function: - MPI_Alltoall * Accepts: - same as MPI_Alltoall() * Returns: - MPI_SUCCESS or an MPI error code */ int mca_coll_demo_alltoall_inter(void *sbuf, int scount, - struct ompi_datatype_t *sdtype, - void *rbuf, int rcount, + struct ompi_datatype_t *sdtype, + void *rbuf, int rcount, struct ompi_datatype_t *rdtype, struct ompi_communicator_t *comm, mca_coll_base_module_t *module) @@ -66,7 +66,7 @@ int mca_coll_demo_alltoall_inter(void *sbuf, int scount, mca_coll_demo_module_t *demo_module = (mca_coll_demo_module_t*) module; opal_output_verbose(10, ompi_coll_base_framework.framework_output, "In demo alltoall_inter\n"); return demo_module->underlying.coll_alltoall(sbuf, scount, sdtype, - rbuf, rcount, rdtype, + rbuf, rcount, rdtype, comm, demo_module->underlying.coll_alltoall_module); } diff --git a/ompi/mca/coll/demo/coll_demo_alltoallv.c b/ompi/mca/coll/demo/coll_demo_alltoallv.c index a1d4b06c17c..0e8cf13861b 100644 --- a/ompi/mca/coll/demo/coll_demo_alltoallv.c +++ b/ompi/mca/coll/demo/coll_demo_alltoallv.c @@ -5,14 +5,14 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -37,7 +37,7 @@ int mca_coll_demo_alltoallv_intra(void *sbuf, int *scounts, int *sdisps, struct ompi_datatype_t *sdtype, void *rbuf, int *rcounts, int *rdisps, - struct ompi_datatype_t *rdtype, + struct ompi_datatype_t *rdtype, struct ompi_communicator_t *comm, mca_coll_base_module_t *module) { @@ -61,7 +61,7 @@ int mca_coll_demo_alltoallv_inter(void *sbuf, int *scounts, int *sdisps, struct ompi_datatype_t *sdtype, void *rbuf, int *rcounts, int *rdisps, - struct ompi_datatype_t *rdtype, + struct ompi_datatype_t *rdtype, struct ompi_communicator_t *comm, mca_coll_base_module_t *module) { diff --git a/ompi/mca/coll/demo/coll_demo_alltoallw.c b/ompi/mca/coll/demo/coll_demo_alltoallw.c index 80c279c8ee2..b9c29693178 100644 --- a/ompi/mca/coll/demo/coll_demo_alltoallw.c +++ b/ompi/mca/coll/demo/coll_demo_alltoallw.c @@ -5,14 +5,14 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -34,9 +34,9 @@ * Returns: - MPI_SUCCESS or an MPI error code */ int mca_coll_demo_alltoallw_intra(void *sbuf, int *scounts, int *sdisps, - struct ompi_datatype_t **sdtypes, + struct ompi_datatype_t **sdtypes, void *rbuf, int *rcounts, int *rdisps, - struct ompi_datatype_t **rdtypes, + struct ompi_datatype_t **rdtypes, struct ompi_communicator_t *comm, mca_coll_base_module_t *module) { diff --git a/ompi/mca/coll/demo/coll_demo_barrier.c b/ompi/mca/coll/demo/coll_demo_barrier.c index e0f2ba5a6ff..bcede2bf5b5 100644 --- a/ompi/mca/coll/demo/coll_demo_barrier.c +++ b/ompi/mca/coll/demo/coll_demo_barrier.c @@ -5,14 +5,14 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ diff --git a/ompi/mca/coll/demo/coll_demo_bcast.c b/ompi/mca/coll/demo/coll_demo_bcast.c index 4bdcee66733..645c9e0dd62 100644 --- a/ompi/mca/coll/demo/coll_demo_bcast.c +++ b/ompi/mca/coll/demo/coll_demo_bcast.c @@ -5,14 +5,14 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ diff --git a/ompi/mca/coll/demo/coll_demo_component.c b/ompi/mca/coll/demo/coll_demo_component.c index 471b5350de5..b957e2c5d87 100644 --- a/ompi/mca/coll/demo/coll_demo_component.c +++ b/ompi/mca/coll/demo/coll_demo_component.c @@ -6,7 +6,7 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. @@ -14,9 +14,9 @@ * Copyright (c) 2015 Los Alamos National Security, LLC. All rights * reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ * * These symbols are in a file by themselves to provide nice linker @@ -118,22 +118,22 @@ mca_coll_demo_module_construct(mca_coll_demo_module_t *module) static void mca_coll_demo_module_destruct(mca_coll_demo_module_t *module) { - RELEASE(module, allgather); - RELEASE(module, allgatherv); - RELEASE(module, allreduce); - RELEASE(module, alltoall); - RELEASE(module, alltoallv); - RELEASE(module, alltoallw); - RELEASE(module, barrier); - RELEASE(module, bcast); - RELEASE(module, exscan); - RELEASE(module, gather); - RELEASE(module, gatherv); - RELEASE(module, reduce); - RELEASE(module, reduce_scatter); - RELEASE(module, scan); - RELEASE(module, scatter); - RELEASE(module, scatterv); + RELEASE(module, allgather); + RELEASE(module, allgatherv); + RELEASE(module, allreduce); + RELEASE(module, alltoall); + RELEASE(module, alltoallv); + RELEASE(module, alltoallw); + RELEASE(module, barrier); + RELEASE(module, bcast); + RELEASE(module, exscan); + RELEASE(module, gather); + RELEASE(module, gatherv); + RELEASE(module, reduce); + RELEASE(module, reduce_scatter); + RELEASE(module, scan); + RELEASE(module, scatter); + RELEASE(module, scatterv); } diff --git a/ompi/mca/coll/demo/coll_demo_exscan.c b/ompi/mca/coll/demo/coll_demo_exscan.c index 616936ba260..c970369d0dd 100644 --- a/ompi/mca/coll/demo/coll_demo_exscan.c +++ b/ompi/mca/coll/demo/coll_demo_exscan.c @@ -5,14 +5,14 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -34,8 +34,8 @@ * Returns: - MPI_SUCCESS or error code */ int mca_coll_demo_exscan_intra(void *sbuf, void *rbuf, int count, - struct ompi_datatype_t *dtype, - struct ompi_op_t *op, + struct ompi_datatype_t *dtype, + struct ompi_op_t *op, struct ompi_communicator_t *comm, mca_coll_base_module_t *module) { diff --git a/ompi/mca/coll/demo/coll_demo_gather.c b/ompi/mca/coll/demo/coll_demo_gather.c index 9f5ab09cd01..9f9840acf8f 100644 --- a/ompi/mca/coll/demo/coll_demo_gather.c +++ b/ompi/mca/coll/demo/coll_demo_gather.c @@ -5,14 +5,14 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -32,10 +32,10 @@ * Accepts: - same arguments as MPI_Gather() * Returns: - MPI_SUCCESS or error code */ -int mca_coll_demo_gather_intra(void *sbuf, int scount, - struct ompi_datatype_t *sdtype, - void *rbuf, int rcount, - struct ompi_datatype_t *rdtype, +int mca_coll_demo_gather_intra(void *sbuf, int scount, + struct ompi_datatype_t *sdtype, + void *rbuf, int rcount, + struct ompi_datatype_t *rdtype, int root, struct ompi_communicator_t *comm, mca_coll_base_module_t *module) { @@ -56,9 +56,9 @@ int mca_coll_demo_gather_intra(void *sbuf, int scount, * Returns: - MPI_SUCCESS or error code */ int mca_coll_demo_gather_inter(void *sbuf, int scount, - struct ompi_datatype_t *sdtype, - void *rbuf, int rcount, - struct ompi_datatype_t *rdtype, + struct ompi_datatype_t *sdtype, + void *rbuf, int rcount, + struct ompi_datatype_t *rdtype, int root, struct ompi_communicator_t *comm, mca_coll_base_module_t *module) { diff --git a/ompi/mca/coll/demo/coll_demo_gatherv.c b/ompi/mca/coll/demo/coll_demo_gatherv.c index 70d704698bb..f23b37a0d88 100644 --- a/ompi/mca/coll/demo/coll_demo_gatherv.c +++ b/ompi/mca/coll/demo/coll_demo_gatherv.c @@ -5,14 +5,14 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -33,7 +33,7 @@ * Accepts: - same arguments as MPI_Gatherv() * Returns: - MPI_SUCCESS or error code */ -int mca_coll_demo_gatherv_intra(void *sbuf, int scount, +int mca_coll_demo_gatherv_intra(void *sbuf, int scount, struct ompi_datatype_t *sdtype, void *rbuf, int *rcounts, int *disps, struct ompi_datatype_t *rdtype, int root, diff --git a/ompi/mca/coll/demo/coll_demo_module.c b/ompi/mca/coll/demo/coll_demo_module.c index eab6cbd9ed3..ebb43ef7707 100644 --- a/ompi/mca/coll/demo/coll_demo_module.c +++ b/ompi/mca/coll/demo/coll_demo_module.c @@ -5,14 +5,15 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. + * Copyright (c) 2015 Los Alamos National Security, LLC. All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -103,7 +104,7 @@ int mca_coll_demo_init_query(bool enable_progress_threads, bool enable_mpi_threads) { /* Nothing to do */ - + return OMPI_SUCCESS; } @@ -123,7 +124,7 @@ mca_coll_demo_comm_query(struct ompi_communicator_t *comm, int *priority) *priority = mca_coll_demo_priority; demo_module->super.coll_module_enable = mca_coll_demo_module_enable; - demo_module->super.ft_event = mca_coll_demo_ft_event; + demo_module->super.ft_event = NULL; if (OMPI_COMM_IS_INTRA(comm)) { demo_module->super.coll_allgather = mca_coll_demo_allgather_intra; @@ -184,43 +185,23 @@ mca_coll_demo_module_enable(mca_coll_base_module_t *module, } /* save the old pointers */ - COPY(comm, demo_module, allgather); - COPY(comm, demo_module, allgatherv); - COPY(comm, demo_module, allreduce); - COPY(comm, demo_module, alltoall); - COPY(comm, demo_module, alltoallv); - COPY(comm, demo_module, alltoallw); - COPY(comm, demo_module, barrier); - COPY(comm, demo_module, bcast); - COPY(comm, demo_module, exscan); - COPY(comm, demo_module, gather); - COPY(comm, demo_module, gatherv); - COPY(comm, demo_module, reduce); - COPY(comm, demo_module, reduce_scatter); - COPY(comm, demo_module, scan); - COPY(comm, demo_module, scatter); - COPY(comm, demo_module, scatterv); + COPY(comm, demo_module, allgather); + COPY(comm, demo_module, allgatherv); + COPY(comm, demo_module, allreduce); + COPY(comm, demo_module, alltoall); + COPY(comm, demo_module, alltoallv); + COPY(comm, demo_module, alltoallw); + COPY(comm, demo_module, barrier); + COPY(comm, demo_module, bcast); + COPY(comm, demo_module, exscan); + COPY(comm, demo_module, gather); + COPY(comm, demo_module, gatherv); + COPY(comm, demo_module, reduce); + COPY(comm, demo_module, reduce_scatter); + COPY(comm, demo_module, scan); + COPY(comm, demo_module, scatter); + COPY(comm, demo_module, scatterv); return OMPI_SUCCESS; } - -int mca_coll_demo_ft_event(int state) { - if(OPAL_CRS_CHECKPOINT == state) { - ; - } - else if(OPAL_CRS_CONTINUE == state) { - ; - } - else if(OPAL_CRS_RESTART == state) { - ; - } - else if(OPAL_CRS_TERM == state ) { - ; - } - else { - ; - } - - return OMPI_SUCCESS; -} diff --git a/ompi/mca/coll/demo/coll_demo_reduce.c b/ompi/mca/coll/demo/coll_demo_reduce.c index 38579e347e0..6df413902b6 100644 --- a/ompi/mca/coll/demo/coll_demo_reduce.c +++ b/ompi/mca/coll/demo/coll_demo_reduce.c @@ -5,14 +5,14 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -34,7 +34,7 @@ * Returns: - MPI_SUCCESS or error code */ int mca_coll_demo_reduce_intra(void *sbuf, void *rbuf, int count, - struct ompi_datatype_t *dtype, + struct ompi_datatype_t *dtype, struct ompi_op_t *op, int root, struct ompi_communicator_t *comm, mca_coll_base_module_t *module) @@ -50,12 +50,12 @@ int mca_coll_demo_reduce_intra(void *sbuf, void *rbuf, int count, /* * reduce_inter * - * Function: - reduction + * Function: - reduction * Accepts: - same as MPI_Reduce() * Returns: - MPI_SUCCESS or error code */ int mca_coll_demo_reduce_inter(void *sbuf, void *rbuf, int count, - struct ompi_datatype_t *dtype, + struct ompi_datatype_t *dtype, struct ompi_op_t *op, int root, struct ompi_communicator_t *comm, mca_coll_base_module_t *module) diff --git a/ompi/mca/coll/demo/coll_demo_reduce_scatter.c b/ompi/mca/coll/demo/coll_demo_reduce_scatter.c index e4be2d19219..438f1008b3a 100644 --- a/ompi/mca/coll/demo/coll_demo_reduce_scatter.c +++ b/ompi/mca/coll/demo/coll_demo_reduce_scatter.c @@ -5,14 +5,14 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ diff --git a/ompi/mca/coll/demo/coll_demo_scan.c b/ompi/mca/coll/demo/coll_demo_scan.c index ece10931627..90d3cb343b1 100644 --- a/ompi/mca/coll/demo/coll_demo_scan.c +++ b/ompi/mca/coll/demo/coll_demo_scan.c @@ -5,14 +5,14 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -34,8 +34,8 @@ * Returns: - MPI_SUCCESS or error code */ int mca_coll_demo_scan_intra(void *sbuf, void *rbuf, int count, - struct ompi_datatype_t *dtype, - struct ompi_op_t *op, + struct ompi_datatype_t *dtype, + struct ompi_op_t *op, struct ompi_communicator_t *comm, mca_coll_base_module_t *module) { diff --git a/ompi/mca/coll/demo/coll_demo_scatter.c b/ompi/mca/coll/demo/coll_demo_scatter.c index f6a6c48d396..ccc2e401df6 100644 --- a/ompi/mca/coll/demo/coll_demo_scatter.c +++ b/ompi/mca/coll/demo/coll_demo_scatter.c @@ -5,14 +5,14 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -33,11 +33,11 @@ * Accepts: - same arguments as MPI_Scatter() * Returns: - MPI_SUCCESS or error code */ -int mca_coll_demo_scatter_intra(void *sbuf, int scount, +int mca_coll_demo_scatter_intra(void *sbuf, int scount, struct ompi_datatype_t *sdtype, - void *rbuf, int rcount, + void *rbuf, int rcount, struct ompi_datatype_t *rdtype, - int root, + int root, struct ompi_communicator_t *comm, mca_coll_base_module_t *module) { @@ -57,11 +57,11 @@ int mca_coll_demo_scatter_intra(void *sbuf, int scount, * Accepts: - same arguments as MPI_Scatter() * Returns: - MPI_SUCCESS or error code */ -int mca_coll_demo_scatter_inter(void *sbuf, int scount, +int mca_coll_demo_scatter_inter(void *sbuf, int scount, struct ompi_datatype_t *sdtype, - void *rbuf, int rcount, + void *rbuf, int rcount, struct ompi_datatype_t *rdtype, - int root, + int root, struct ompi_communicator_t *comm, mca_coll_base_module_t *module) { diff --git a/ompi/mca/coll/demo/coll_demo_scatterv.c b/ompi/mca/coll/demo/coll_demo_scatterv.c index 267cd209927..3084efc0de5 100644 --- a/ompi/mca/coll/demo/coll_demo_scatterv.c +++ b/ompi/mca/coll/demo/coll_demo_scatterv.c @@ -5,14 +5,14 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -42,7 +42,7 @@ int mca_coll_demo_scatterv_intra(void *sbuf, int *scounts, { mca_coll_demo_module_t *demo_module = (mca_coll_demo_module_t*) module; opal_output_verbose(10, ompi_coll_base_framework.framework_output, "In demo scatterv_intra"); - return demo_module->underlying.coll_scatterv(sbuf, scounts, disps, + return demo_module->underlying.coll_scatterv(sbuf, scounts, disps, sdtype, rbuf, rcount, rdtype, root, comm, demo_module->underlying.coll_scatterv_module); @@ -65,7 +65,7 @@ int mca_coll_demo_scatterv_inter(void *sbuf, int *scounts, { mca_coll_demo_module_t *demo_module = (mca_coll_demo_module_t*) module; opal_output_verbose(10, ompi_coll_base_framework.framework_output, "In demo scatterv_inter"); - return demo_module->underlying.coll_scatterv(sbuf, scounts, disps, + return demo_module->underlying.coll_scatterv(sbuf, scounts, disps, sdtype, rbuf, rcount, rdtype, root, comm, demo_module->underlying.coll_scatterv_module); diff --git a/ompi/mca/coll/demo/configure.stub b/ompi/mca/coll/demo/configure.stub index 420a70ec47f..da069fa6b61 100644 --- a/ompi/mca/coll/demo/configure.stub +++ b/ompi/mca/coll/demo/configure.stub @@ -5,14 +5,14 @@ # Copyright (c) 2004-2005 The University of Tennessee and The University # of Tennessee Research Foundation. All rights # reserved. -# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, # University of Stuttgart. All rights reserved. # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. # $COPYRIGHT$ -# +# # Additional copyrights may follow -# +# # $HEADER$ # # Don't really need anything in here -- we just want a configure diff --git a/ompi/mca/coll/fca/Makefile.am b/ompi/mca/coll/fca/Makefile.am index 35e22e44e0a..9298b6f60ef 100644 --- a/ompi/mca/coll/fca/Makefile.am +++ b/ompi/mca/coll/fca/Makefile.am @@ -42,5 +42,5 @@ mca_coll_fca_la_LDFLAGS = -module -avoid-version $(coll_fca_LDFLAGS) noinst_LTLIBRARIES = $(component_noinst) libmca_coll_fca_la_SOURCES =$(coll_fca_sources) -libmca_coll_fca_la_LIBADD = $(coll_fca_LIBS) +libmca_coll_fca_la_LIBADD = $(coll_fca_LIBS) libmca_coll_fca_la_LDFLAGS = -module -avoid-version $(coll_fca_LDFLAGS) diff --git a/ompi/mca/coll/fca/coll_fca.h b/ompi/mca/coll/fca/coll_fca.h index 7785f6970c5..cc3b7cb4f86 100644 --- a/ompi/mca/coll/fca/coll_fca.h +++ b/ompi/mca/coll/fca/coll_fca.h @@ -118,7 +118,7 @@ struct mca_coll_fca_component_t { /** MCA parameter: FCA device */ char* fca_dev; - + /** MCA parameter: Enable FCA */ int fca_enable; @@ -199,7 +199,7 @@ struct mca_coll_fca_component_t { /** MCA parameter hash table size*/ int fca_hash_size; - + /** MCA parameter hash table size*/ int fca_number_of_primes; diff --git a/ompi/mca/coll/fca/coll_fca_api.h b/ompi/mca/coll/fca/coll_fca_api.h index a16f7b56e32..705c9ecbcb5 100644 --- a/ompi/mca/coll/fca/coll_fca_api.h +++ b/ompi/mca/coll/fca/coll_fca_api.h @@ -1,5 +1,7 @@ /** - Copyright (c) 2011 Mellanox Technologies. All rights reserved. + Copyright (c) 2011 Mellanox Technologies. All rights reserved. + Copyright (c) 2015 Research Organization for Information Science + and Technology (RIST). All rights reserved. $COPYRIGHT$ Additional copyrights may follow @@ -9,8 +11,8 @@ #include "ompi_config.h" -#include -#include +#include +#include #ifndef FCA_API #define OMPI_FCA_VERSION 12 diff --git a/ompi/mca/coll/fca/coll_fca_component.c b/ompi/mca/coll/fca/coll_fca_component.c index 65a3b2fe180..7f3c4a49358 100644 --- a/ompi/mca/coll/fca/coll_fca_component.c +++ b/ompi/mca/coll/fca/coll_fca_component.c @@ -45,1005 +45,1005 @@ const char *mca_coll_fca_component_version_string = int mca_coll_fca_output = -1; int mca_coll_fca_primes[] = { -2, 3, 5, 7, 11, 13, 17, 19, 23, 29, -31, 37, 41, 43, 47, 53, 59, 61, 67, 71, -73, 79, 83, 89, 97, 101, 103, 107, 109, 113, -127, 131, 137, 139, 149, 151, 157, 163, 167, 173, -179, 181, 191, 193, 197, 199, 211, 223, 227, 229, -233, 239, 241, 251, 257, 263, 269, 271, 277, 281, -283, 293, 307, 311, 313, 317, 331, 337, 347, 349, -353, 359, 367, 373, 379, 383, 389, 397, 401, 409, -419, 421, 431, 433, 439, 443, 449, 457, 461, 463, -467, 479, 487, 491, 499, 503, 509, 521, 523, 541, -547, 557, 563, 569, 571, 577, 587, 593, 599, 601, -607, 613, 617, 619, 631, 641, 643, 647, 653, 659, -661, 673, 677, 683, 691, 701, 709, 719, 727, 733, -739, 743, 751, 757, 761, 769, 773, 787, 797, 809, -811, 821, 823, 827, 829, 839, 853, 857, 859, 863, -877, 881, 883, 887, 907, 911, 919, 929, 937, 941, -947, 953, 967, 971, 977, 983, 991, 997, 1009, 1013, -1019, 1021, 1031, 1033, 1039, 1049, 1051, 1061, 1063, 1069, -1087, 1091, 1093, 1097, 1103, 1109, 1117, 1123, 1129, 1151, -1153, 1163, 1171, 1181, 1187, 1193, 1201, 1213, 1217, 1223, -1229, 1231, 1237, 1249, 1259, 1277, 1279, 1283, 1289, 1291, -1297, 1301, 1303, 1307, 1319, 1321, 1327, 1361, 1367, 1373, -1381, 1399, 1409, 1423, 1427, 1429, 1433, 1439, 1447, 1451, -1453, 1459, 1471, 1481, 1483, 1487, 1489, 1493, 1499, 1511, -1523, 1531, 1543, 1549, 1553, 1559, 1567, 1571, 1579, 1583, -1597, 1601, 1607, 1609, 1613, 1619, 1621, 1627, 1637, 1657, -1663, 1667, 1669, 1693, 1697, 1699, 1709, 1721, 1723, 1733, -1741, 1747, 1753, 1759, 1777, 1783, 1787, 1789, 1801, 1811, -1823, 1831, 1847, 1861, 1867, 1871, 1873, 1877, 1879, 1889, -1901, 1907, 1913, 1931, 1933, 1949, 1951, 1973, 1979, 1987, -1993, 1997, 1999, 2003, 2011, 2017, 2027, 2029, 2039, 2053, -2063, 2069, 2081, 2083, 2087, 2089, 2099, 2111, 2113, 2129, -2131, 2137, 2141, 2143, 2153, 2161, 2179, 2203, 2207, 2213, -2221, 2237, 2239, 2243, 2251, 2267, 2269, 2273, 2281, 2287, -2293, 2297, 2309, 2311, 2333, 2339, 2341, 2347, 2351, 2357, -2371, 2377, 2381, 2383, 2389, 2393, 2399, 2411, 2417, 2423, -2437, 2441, 2447, 2459, 2467, 2473, 2477, 2503, 2521, 2531, -2539, 2543, 2549, 2551, 2557, 2579, 2591, 2593, 2609, 2617, -2621, 2633, 2647, 2657, 2659, 2663, 2671, 2677, 2683, 2687, -2689, 2693, 2699, 2707, 2711, 2713, 2719, 2729, 2731, 2741, -2749, 2753, 2767, 2777, 2789, 2791, 2797, 2801, 2803, 2819, -2833, 2837, 2843, 2851, 2857, 2861, 2879, 2887, 2897, 2903, -2909, 2917, 2927, 2939, 2953, 2957, 2963, 2969, 2971, 2999, -3001, 3011, 3019, 3023, 3037, 3041, 3049, 3061, 3067, 3079, -3083, 3089, 3109, 3119, 3121, 3137, 3163, 3167, 3169, 3181, -3187, 3191, 3203, 3209, 3217, 3221, 3229, 3251, 3253, 3257, -3259, 3271, 3299, 3301, 3307, 3313, 3319, 3323, 3329, 3331, -3343, 3347, 3359, 3361, 3371, 3373, 3389, 3391, 3407, 3413, -3433, 3449, 3457, 3461, 3463, 3467, 3469, 3491, 3499, 3511, -3517, 3527, 3529, 3533, 3539, 3541, 3547, 3557, 3559, 3571, -3581, 3583, 3593, 3607, 3613, 3617, 3623, 3631, 3637, 3643, -3659, 3671, 3673, 3677, 3691, 3697, 3701, 3709, 3719, 3727, -3733, 3739, 3761, 3767, 3769, 3779, 3793, 3797, 3803, 3821, -3823, 3833, 3847, 3851, 3853, 3863, 3877, 3881, 3889, 3907, -3911, 3917, 3919, 3923, 3929, 3931, 3943, 3947, 3967, 3989, -4001, 4003, 4007, 4013, 4019, 4021, 4027, 4049, 4051, 4057, -4073, 4079, 4091, 4093, 4099, 4111, 4127, 4129, 4133, 4139, -4153, 4157, 4159, 4177, 4201, 4211, 4217, 4219, 4229, 4231, -4241, 4243, 4253, 4259, 4261, 4271, 4273, 4283, 4289, 4297, -4327, 4337, 4339, 4349, 4357, 4363, 4373, 4391, 4397, 4409, -4421, 4423, 4441, 4447, 4451, 4457, 4463, 4481, 4483, 4493, -4507, 4513, 4517, 4519, 4523, 4547, 4549, 4561, 4567, 4583, -4591, 4597, 4603, 4621, 4637, 4639, 4643, 4649, 4651, 4657, -4663, 4673, 4679, 4691, 4703, 4721, 4723, 4729, 4733, 4751, -4759, 4783, 4787, 4789, 4793, 4799, 4801, 4813, 4817, 4831, -4861, 4871, 4877, 4889, 4903, 4909, 4919, 4931, 4933, 4937, -4943, 4951, 4957, 4967, 4969, 4973, 4987, 4993, 4999, 5003, -5009, 5011, 5021, 5023, 5039, 5051, 5059, 5077, 5081, 5087, -5099, 5101, 5107, 5113, 5119, 5147, 5153, 5167, 5171, 5179, -5189, 5197, 5209, 5227, 5231, 5233, 5237, 5261, 5273, 5279, -5281, 5297, 5303, 5309, 5323, 5333, 5347, 5351, 5381, 5387, -5393, 5399, 5407, 5413, 5417, 5419, 5431, 5437, 5441, 5443, -5449, 5471, 5477, 5479, 5483, 5501, 5503, 5507, 5519, 5521, -5527, 5531, 5557, 5563, 5569, 5573, 5581, 5591, 5623, 5639, -5641, 5647, 5651, 5653, 5657, 5659, 5669, 5683, 5689, 5693, -5701, 5711, 5717, 5737, 5741, 5743, 5749, 5779, 5783, 5791, -5801, 5807, 5813, 5821, 5827, 5839, 5843, 5849, 5851, 5857, -5861, 5867, 5869, 5879, 5881, 5897, 5903, 5923, 5927, 5939, -5953, 5981, 5987, 6007, 6011, 6029, 6037, 6043, 6047, 6053, -6067, 6073, 6079, 6089, 6091, 6101, 6113, 6121, 6131, 6133, -6143, 6151, 6163, 6173, 6197, 6199, 6203, 6211, 6217, 6221, -6229, 6247, 6257, 6263, 6269, 6271, 6277, 6287, 6299, 6301, -6311, 6317, 6323, 6329, 6337, 6343, 6353, 6359, 6361, 6367, -6373, 6379, 6389, 6397, 6421, 6427, 6449, 6451, 6469, 6473, -6481, 6491, 6521, 6529, 6547, 6551, 6553, 6563, 6569, 6571, -6577, 6581, 6599, 6607, 6619, 6637, 6653, 6659, 6661, 6673, -6679, 6689, 6691, 6701, 6703, 6709, 6719, 6733, 6737, 6761, -6763, 6779, 6781, 6791, 6793, 6803, 6823, 6827, 6829, 6833, -6841, 6857, 6863, 6869, 6871, 6883, 6899, 6907, 6911, 6917, -6947, 6949, 6959, 6961, 6967, 6971, 6977, 6983, 6991, 6997, -7001, 7013, 7019, 7027, 7039, 7043, 7057, 7069, 7079, 7103, -7109, 7121, 7127, 7129, 7151, 7159, 7177, 7187, 7193, 7207, -7211, 7213, 7219, 7229, 7237, 7243, 7247, 7253, 7283, 7297, -7307, 7309, 7321, 7331, 7333, 7349, 7351, 7369, 7393, 7411, -7417, 7433, 7451, 7457, 7459, 7477, 7481, 7487, 7489, 7499, -7507, 7517, 7523, 7529, 7537, 7541, 7547, 7549, 7559, 7561, -7573, 7577, 7583, 7589, 7591, 7603, 7607, 7621, 7639, 7643, -7649, 7669, 7673, 7681, 7687, 7691, 7699, 7703, 7717, 7723, -7727, 7741, 7753, 7757, 7759, 7789, 7793, 7817, 7823, 7829, -7841, 7853, 7867, 7873, 7877, 7879, 7883, 7901, 7907, 7919, -7927, 7933, 7937, 7949, 7951, 7963, 7993, 8009, 8011, 8017, -8039, 8053, 8059, 8069, 8081, 8087, 8089, 8093, 8101, 8111, -8117, 8123, 8147, 8161, 8167, 8171, 8179, 8191, 8209, 8219, -8221, 8231, 8233, 8237, 8243, 8263, 8269, 8273, 8287, 8291, -8293, 8297, 8311, 8317, 8329, 8353, 8363, 8369, 8377, 8387, -8389, 8419, 8423, 8429, 8431, 8443, 8447, 8461, 8467, 8501, -8513, 8521, 8527, 8537, 8539, 8543, 8563, 8573, 8581, 8597, -8599, 8609, 8623, 8627, 8629, 8641, 8647, 8663, 8669, 8677, -8681, 8689, 8693, 8699, 8707, 8713, 8719, 8731, 8737, 8741, -8747, 8753, 8761, 8779, 8783, 8803, 8807, 8819, 8821, 8831, -8837, 8839, 8849, 8861, 8863, 8867, 8887, 8893, 8923, 8929, -8933, 8941, 8951, 8963, 8969, 8971, 8999, 9001, 9007, 9011, -9013, 9029, 9041, 9043, 9049, 9059, 9067, 9091, 9103, 9109, -9127, 9133, 9137, 9151, 9157, 9161, 9173, 9181, 9187, 9199, -9203, 9209, 9221, 9227, 9239, 9241, 9257, 9277, 9281, 9283, -9293, 9311, 9319, 9323, 9337, 9341, 9343, 9349, 9371, 9377, -9391, 9397, 9403, 9413, 9419, 9421, 9431, 9433, 9437, 9439, -9461, 9463, 9467, 9473, 9479, 9491, 9497, 9511, 9521, 9533, -9539, 9547, 9551, 9587, 9601, 9613, 9619, 9623, 9629, 9631, -9643, 9649, 9661, 9677, 9679, 9689, 9697, 9719, 9721, 9733, -9739, 9743, 9749, 9767, 9769, 9781, 9787, 9791, 9803, 9811, -9817, 9829, 9833, 9839, 9851, 9857, 9859, 9871, 9883, 9887, -9901, 9907, 9923, 9929, 9931, 9941, 9949, 9967, 9973, 10007, -10009, 10037, 10039, 10061, 10067, 10069, 10079, 10091, 10093, 10099, -10103, 10111, 10133, 10139, 10141, 10151, 10159, 10163, 10169, 10177, -10181, 10193, 10211, 10223, 10243, 10247, 10253, 10259, 10267, 10271, -10273, 10289, 10301, 10303, 10313, 10321, 10331, 10333, 10337, 10343, -10357, 10369, 10391, 10399, 10427, 10429, 10433, 10453, 10457, 10459, -10463, 10477, 10487, 10499, 10501, 10513, 10529, 10531, 10559, 10567, -10589, 10597, 10601, 10607, 10613, 10627, 10631, 10639, 10651, 10657, -10663, 10667, 10687, 10691, 10709, 10711, 10723, 10729, 10733, 10739, -10753, 10771, 10781, 10789, 10799, 10831, 10837, 10847, 10853, 10859, -10861, 10867, 10883, 10889, 10891, 10903, 10909, 10937, 10939, 10949, -10957, 10973, 10979, 10987, 10993, 11003, 11027, 11047, 11057, 11059, -11069, 11071, 11083, 11087, 11093, 11113, 11117, 11119, 11131, 11149, -11159, 11161, 11171, 11173, 11177, 11197, 11213, 11239, 11243, 11251, -11257, 11261, 11273, 11279, 11287, 11299, 11311, 11317, 11321, 11329, -11351, 11353, 11369, 11383, 11393, 11399, 11411, 11423, 11437, 11443, -11447, 11467, 11471, 11483, 11489, 11491, 11497, 11503, 11519, 11527, -11549, 11551, 11579, 11587, 11593, 11597, 11617, 11621, 11633, 11657, -11677, 11681, 11689, 11699, 11701, 11717, 11719, 11731, 11743, 11777, -11779, 11783, 11789, 11801, 11807, 11813, 11821, 11827, 11831, 11833, -11839, 11863, 11867, 11887, 11897, 11903, 11909, 11923, 11927, 11933, -11939, 11941, 11953, 11959, 11969, 11971, 11981, 11987, 12007, 12011, -12037, 12041, 12043, 12049, 12071, 12073, 12097, 12101, 12107, 12109, -12113, 12119, 12143, 12149, 12157, 12161, 12163, 12197, 12203, 12211, -12227, 12239, 12241, 12251, 12253, 12263, 12269, 12277, 12281, 12289, -12301, 12323, 12329, 12343, 12347, 12373, 12377, 12379, 12391, 12401, -12409, 12413, 12421, 12433, 12437, 12451, 12457, 12473, 12479, 12487, -12491, 12497, 12503, 12511, 12517, 12527, 12539, 12541, 12547, 12553, -12569, 12577, 12583, 12589, 12601, 12611, 12613, 12619, 12637, 12641, -12647, 12653, 12659, 12671, 12689, 12697, 12703, 12713, 12721, 12739, -12743, 12757, 12763, 12781, 12791, 12799, 12809, 12821, 12823, 12829, -12841, 12853, 12889, 12893, 12899, 12907, 12911, 12917, 12919, 12923, -12941, 12953, 12959, 12967, 12973, 12979, 12983, 13001, 13003, 13007, -13009, 13033, 13037, 13043, 13049, 13063, 13093, 13099, 13103, 13109, -13121, 13127, 13147, 13151, 13159, 13163, 13171, 13177, 13183, 13187, -13217, 13219, 13229, 13241, 13249, 13259, 13267, 13291, 13297, 13309, -13313, 13327, 13331, 13337, 13339, 13367, 13381, 13397, 13399, 13411, -13417, 13421, 13441, 13451, 13457, 13463, 13469, 13477, 13487, 13499, -13513, 13523, 13537, 13553, 13567, 13577, 13591, 13597, 13613, 13619, -13627, 13633, 13649, 13669, 13679, 13681, 13687, 13691, 13693, 13697, -13709, 13711, 13721, 13723, 13729, 13751, 13757, 13759, 13763, 13781, -13789, 13799, 13807, 13829, 13831, 13841, 13859, 13873, 13877, 13879, -13883, 13901, 13903, 13907, 13913, 13921, 13931, 13933, 13963, 13967, -13997, 13999, 14009, 14011, 14029, 14033, 14051, 14057, 14071, 14081, -14083, 14087, 14107, 14143, 14149, 14153, 14159, 14173, 14177, 14197, -14207, 14221, 14243, 14249, 14251, 14281, 14293, 14303, 14321, 14323, -14327, 14341, 14347, 14369, 14387, 14389, 14401, 14407, 14411, 14419, -14423, 14431, 14437, 14447, 14449, 14461, 14479, 14489, 14503, 14519, -14533, 14537, 14543, 14549, 14551, 14557, 14561, 14563, 14591, 14593, -14621, 14627, 14629, 14633, 14639, 14653, 14657, 14669, 14683, 14699, -14713, 14717, 14723, 14731, 14737, 14741, 14747, 14753, 14759, 14767, -14771, 14779, 14783, 14797, 14813, 14821, 14827, 14831, 14843, 14851, -14867, 14869, 14879, 14887, 14891, 14897, 14923, 14929, 14939, 14947, -14951, 14957, 14969, 14983, 15013, 15017, 15031, 15053, 15061, 15073, -15077, 15083, 15091, 15101, 15107, 15121, 15131, 15137, 15139, 15149, -15161, 15173, 15187, 15193, 15199, 15217, 15227, 15233, 15241, 15259, -15263, 15269, 15271, 15277, 15287, 15289, 15299, 15307, 15313, 15319, -15329, 15331, 15349, 15359, 15361, 15373, 15377, 15383, 15391, 15401, -15413, 15427, 15439, 15443, 15451, 15461, 15467, 15473, 15493, 15497, -15511, 15527, 15541, 15551, 15559, 15569, 15581, 15583, 15601, 15607, -15619, 15629, 15641, 15643, 15647, 15649, 15661, 15667, 15671, 15679, -15683, 15727, 15731, 15733, 15737, 15739, 15749, 15761, 15767, 15773, -15787, 15791, 15797, 15803, 15809, 15817, 15823, 15859, 15877, 15881, -15887, 15889, 15901, 15907, 15913, 15919, 15923, 15937, 15959, 15971, -15973, 15991, 16001, 16007, 16033, 16057, 16061, 16063, 16067, 16069, -16073, 16087, 16091, 16097, 16103, 16111, 16127, 16139, 16141, 16183, -16187, 16189, 16193, 16217, 16223, 16229, 16231, 16249, 16253, 16267, -16273, 16301, 16319, 16333, 16339, 16349, 16361, 16363, 16369, 16381, -16411, 16417, 16421, 16427, 16433, 16447, 16451, 16453, 16477, 16481, -16487, 16493, 16519, 16529, 16547, 16553, 16561, 16567, 16573, 16603, -16607, 16619, 16631, 16633, 16649, 16651, 16657, 16661, 16673, 16691, -16693, 16699, 16703, 16729, 16741, 16747, 16759, 16763, 16787, 16811, -16823, 16829, 16831, 16843, 16871, 16879, 16883, 16889, 16901, 16903, -16921, 16927, 16931, 16937, 16943, 16963, 16979, 16981, 16987, 16993, -17011, 17021, 17027, 17029, 17033, 17041, 17047, 17053, 17077, 17093, -17099, 17107, 17117, 17123, 17137, 17159, 17167, 17183, 17189, 17191, -17203, 17207, 17209, 17231, 17239, 17257, 17291, 17293, 17299, 17317, -17321, 17327, 17333, 17341, 17351, 17359, 17377, 17383, 17387, 17389, -17393, 17401, 17417, 17419, 17431, 17443, 17449, 17467, 17471, 17477, -17483, 17489, 17491, 17497, 17509, 17519, 17539, 17551, 17569, 17573, -17579, 17581, 17597, 17599, 17609, 17623, 17627, 17657, 17659, 17669, -17681, 17683, 17707, 17713, 17729, 17737, 17747, 17749, 17761, 17783, -17789, 17791, 17807, 17827, 17837, 17839, 17851, 17863, 17881, 17891, -17903, 17909, 17911, 17921, 17923, 17929, 17939, 17957, 17959, 17971, -17977, 17981, 17987, 17989, 18013, 18041, 18043, 18047, 18049, 18059, -18061, 18077, 18089, 18097, 18119, 18121, 18127, 18131, 18133, 18143, -18149, 18169, 18181, 18191, 18199, 18211, 18217, 18223, 18229, 18233, -18251, 18253, 18257, 18269, 18287, 18289, 18301, 18307, 18311, 18313, -18329, 18341, 18353, 18367, 18371, 18379, 18397, 18401, 18413, 18427, -18433, 18439, 18443, 18451, 18457, 18461, 18481, 18493, 18503, 18517, -18521, 18523, 18539, 18541, 18553, 18583, 18587, 18593, 18617, 18637, -18661, 18671, 18679, 18691, 18701, 18713, 18719, 18731, 18743, 18749, -18757, 18773, 18787, 18793, 18797, 18803, 18839, 18859, 18869, 18899, -18911, 18913, 18917, 18919, 18947, 18959, 18973, 18979, 19001, 19009, -19013, 19031, 19037, 19051, 19069, 19073, 19079, 19081, 19087, 19121, -19139, 19141, 19157, 19163, 19181, 19183, 19207, 19211, 19213, 19219, -19231, 19237, 19249, 19259, 19267, 19273, 19289, 19301, 19309, 19319, -19333, 19373, 19379, 19381, 19387, 19391, 19403, 19417, 19421, 19423, -19427, 19429, 19433, 19441, 19447, 19457, 19463, 19469, 19471, 19477, -19483, 19489, 19501, 19507, 19531, 19541, 19543, 19553, 19559, 19571, -19577, 19583, 19597, 19603, 19609, 19661, 19681, 19687, 19697, 19699, -19709, 19717, 19727, 19739, 19751, 19753, 19759, 19763, 19777, 19793, -19801, 19813, 19819, 19841, 19843, 19853, 19861, 19867, 19889, 19891, -19913, 19919, 19927, 19937, 19949, 19961, 19963, 19973, 19979, 19991, -19993, 19997, 20011, 20021, 20023, 20029, 20047, 20051, 20063, 20071, -20089, 20101, 20107, 20113, 20117, 20123, 20129, 20143, 20147, 20149, -20161, 20173, 20177, 20183, 20201, 20219, 20231, 20233, 20249, 20261, -20269, 20287, 20297, 20323, 20327, 20333, 20341, 20347, 20353, 20357, -20359, 20369, 20389, 20393, 20399, 20407, 20411, 20431, 20441, 20443, -20477, 20479, 20483, 20507, 20509, 20521, 20533, 20543, 20549, 20551, -20563, 20593, 20599, 20611, 20627, 20639, 20641, 20663, 20681, 20693, -20707, 20717, 20719, 20731, 20743, 20747, 20749, 20753, 20759, 20771, -20773, 20789, 20807, 20809, 20849, 20857, 20873, 20879, 20887, 20897, -20899, 20903, 20921, 20929, 20939, 20947, 20959, 20963, 20981, 20983, -21001, 21011, 21013, 21017, 21019, 21023, 21031, 21059, 21061, 21067, -21089, 21101, 21107, 21121, 21139, 21143, 21149, 21157, 21163, 21169, -21179, 21187, 21191, 21193, 21211, 21221, 21227, 21247, 21269, 21277, -21283, 21313, 21317, 21319, 21323, 21341, 21347, 21377, 21379, 21383, -21391, 21397, 21401, 21407, 21419, 21433, 21467, 21481, 21487, 21491, -21493, 21499, 21503, 21517, 21521, 21523, 21529, 21557, 21559, 21563, -21569, 21577, 21587, 21589, 21599, 21601, 21611, 21613, 21617, 21647, -21649, 21661, 21673, 21683, 21701, 21713, 21727, 21737, 21739, 21751, -21757, 21767, 21773, 21787, 21799, 21803, 21817, 21821, 21839, 21841, -21851, 21859, 21863, 21871, 21881, 21893, 21911, 21929, 21937, 21943, -21961, 21977, 21991, 21997, 22003, 22013, 22027, 22031, 22037, 22039, -22051, 22063, 22067, 22073, 22079, 22091, 22093, 22109, 22111, 22123, -22129, 22133, 22147, 22153, 22157, 22159, 22171, 22189, 22193, 22229, -22247, 22259, 22271, 22273, 22277, 22279, 22283, 22291, 22303, 22307, -22343, 22349, 22367, 22369, 22381, 22391, 22397, 22409, 22433, 22441, -22447, 22453, 22469, 22481, 22483, 22501, 22511, 22531, 22541, 22543, -22549, 22567, 22571, 22573, 22613, 22619, 22621, 22637, 22639, 22643, -22651, 22669, 22679, 22691, 22697, 22699, 22709, 22717, 22721, 22727, -22739, 22741, 22751, 22769, 22777, 22783, 22787, 22807, 22811, 22817, -22853, 22859, 22861, 22871, 22877, 22901, 22907, 22921, 22937, 22943, -22961, 22963, 22973, 22993, 23003, 23011, 23017, 23021, 23027, 23029, -23039, 23041, 23053, 23057, 23059, 23063, 23071, 23081, 23087, 23099, -23117, 23131, 23143, 23159, 23167, 23173, 23189, 23197, 23201, 23203, -23209, 23227, 23251, 23269, 23279, 23291, 23293, 23297, 23311, 23321, -23327, 23333, 23339, 23357, 23369, 23371, 23399, 23417, 23431, 23447, -23459, 23473, 23497, 23509, 23531, 23537, 23539, 23549, 23557, 23561, -23563, 23567, 23581, 23593, 23599, 23603, 23609, 23623, 23627, 23629, -23633, 23663, 23669, 23671, 23677, 23687, 23689, 23719, 23741, 23743, -23747, 23753, 23761, 23767, 23773, 23789, 23801, 23813, 23819, 23827, -23831, 23833, 23857, 23869, 23873, 23879, 23887, 23893, 23899, 23909, -23911, 23917, 23929, 23957, 23971, 23977, 23981, 23993, 24001, 24007, -24019, 24023, 24029, 24043, 24049, 24061, 24071, 24077, 24083, 24091, -24097, 24103, 24107, 24109, 24113, 24121, 24133, 24137, 24151, 24169, -24179, 24181, 24197, 24203, 24223, 24229, 24239, 24247, 24251, 24281, -24317, 24329, 24337, 24359, 24371, 24373, 24379, 24391, 24407, 24413, -24419, 24421, 24439, 24443, 24469, 24473, 24481, 24499, 24509, 24517, -24527, 24533, 24547, 24551, 24571, 24593, 24611, 24623, 24631, 24659, -24671, 24677, 24683, 24691, 24697, 24709, 24733, 24749, 24763, 24767, -24781, 24793, 24799, 24809, 24821, 24841, 24847, 24851, 24859, 24877, -24889, 24907, 24917, 24919, 24923, 24943, 24953, 24967, 24971, 24977, -24979, 24989, 25013, 25031, 25033, 25037, 25057, 25073, 25087, 25097, -25111, 25117, 25121, 25127, 25147, 25153, 25163, 25169, 25171, 25183, -25189, 25219, 25229, 25237, 25243, 25247, 25253, 25261, 25301, 25303, -25307, 25309, 25321, 25339, 25343, 25349, 25357, 25367, 25373, 25391, -25409, 25411, 25423, 25439, 25447, 25453, 25457, 25463, 25469, 25471, -25523, 25537, 25541, 25561, 25577, 25579, 25583, 25589, 25601, 25603, -25609, 25621, 25633, 25639, 25643, 25657, 25667, 25673, 25679, 25693, -25703, 25717, 25733, 25741, 25747, 25759, 25763, 25771, 25793, 25799, -25801, 25819, 25841, 25847, 25849, 25867, 25873, 25889, 25903, 25913, -25919, 25931, 25933, 25939, 25943, 25951, 25969, 25981, 25997, 25999, -26003, 26017, 26021, 26029, 26041, 26053, 26083, 26099, 26107, 26111, -26113, 26119, 26141, 26153, 26161, 26171, 26177, 26183, 26189, 26203, -26209, 26227, 26237, 26249, 26251, 26261, 26263, 26267, 26293, 26297, -26309, 26317, 26321, 26339, 26347, 26357, 26371, 26387, 26393, 26399, -26407, 26417, 26423, 26431, 26437, 26449, 26459, 26479, 26489, 26497, -26501, 26513, 26539, 26557, 26561, 26573, 26591, 26597, 26627, 26633, -26641, 26647, 26669, 26681, 26683, 26687, 26693, 26699, 26701, 26711, -26713, 26717, 26723, 26729, 26731, 26737, 26759, 26777, 26783, 26801, -26813, 26821, 26833, 26839, 26849, 26861, 26863, 26879, 26881, 26891, -26893, 26903, 26921, 26927, 26947, 26951, 26953, 26959, 26981, 26987, -26993, 27011, 27017, 27031, 27043, 27059, 27061, 27067, 27073, 27077, -27091, 27103, 27107, 27109, 27127, 27143, 27179, 27191, 27197, 27211, -27239, 27241, 27253, 27259, 27271, 27277, 27281, 27283, 27299, 27329, -27337, 27361, 27367, 27397, 27407, 27409, 27427, 27431, 27437, 27449, -27457, 27479, 27481, 27487, 27509, 27527, 27529, 27539, 27541, 27551, -27581, 27583, 27611, 27617, 27631, 27647, 27653, 27673, 27689, 27691, -27697, 27701, 27733, 27737, 27739, 27743, 27749, 27751, 27763, 27767, -27773, 27779, 27791, 27793, 27799, 27803, 27809, 27817, 27823, 27827, -27847, 27851, 27883, 27893, 27901, 27917, 27919, 27941, 27943, 27947, -27953, 27961, 27967, 27983, 27997, 28001, 28019, 28027, 28031, 28051, -28057, 28069, 28081, 28087, 28097, 28099, 28109, 28111, 28123, 28151, -28163, 28181, 28183, 28201, 28211, 28219, 28229, 28277, 28279, 28283, -28289, 28297, 28307, 28309, 28319, 28349, 28351, 28387, 28393, 28403, -28409, 28411, 28429, 28433, 28439, 28447, 28463, 28477, 28493, 28499, -28513, 28517, 28537, 28541, 28547, 28549, 28559, 28571, 28573, 28579, -28591, 28597, 28603, 28607, 28619, 28621, 28627, 28631, 28643, 28649, -28657, 28661, 28663, 28669, 28687, 28697, 28703, 28711, 28723, 28729, -28751, 28753, 28759, 28771, 28789, 28793, 28807, 28813, 28817, 28837, -28843, 28859, 28867, 28871, 28879, 28901, 28909, 28921, 28927, 28933, -28949, 28961, 28979, 29009, 29017, 29021, 29023, 29027, 29033, 29059, -29063, 29077, 29101, 29123, 29129, 29131, 29137, 29147, 29153, 29167, -29173, 29179, 29191, 29201, 29207, 29209, 29221, 29231, 29243, 29251, -29269, 29287, 29297, 29303, 29311, 29327, 29333, 29339, 29347, 29363, -29383, 29387, 29389, 29399, 29401, 29411, 29423, 29429, 29437, 29443, -29453, 29473, 29483, 29501, 29527, 29531, 29537, 29567, 29569, 29573, -29581, 29587, 29599, 29611, 29629, 29633, 29641, 29663, 29669, 29671, -29683, 29717, 29723, 29741, 29753, 29759, 29761, 29789, 29803, 29819, -29833, 29837, 29851, 29863, 29867, 29873, 29879, 29881, 29917, 29921, -29927, 29947, 29959, 29983, 29989, 30011, 30013, 30029, 30047, 30059, -30071, 30089, 30091, 30097, 30103, 30109, 30113, 30119, 30133, 30137, -30139, 30161, 30169, 30181, 30187, 30197, 30203, 30211, 30223, 30241, -30253, 30259, 30269, 30271, 30293, 30307, 30313, 30319, 30323, 30341, -30347, 30367, 30389, 30391, 30403, 30427, 30431, 30449, 30467, 30469, -30491, 30493, 30497, 30509, 30517, 30529, 30539, 30553, 30557, 30559, -30577, 30593, 30631, 30637, 30643, 30649, 30661, 30671, 30677, 30689, -30697, 30703, 30707, 30713, 30727, 30757, 30763, 30773, 30781, 30803, -30809, 30817, 30829, 30839, 30841, 30851, 30853, 30859, 30869, 30871, -30881, 30893, 30911, 30931, 30937, 30941, 30949, 30971, 30977, 30983, -31013, 31019, 31033, 31039, 31051, 31063, 31069, 31079, 31081, 31091, -31121, 31123, 31139, 31147, 31151, 31153, 31159, 31177, 31181, 31183, -31189, 31193, 31219, 31223, 31231, 31237, 31247, 31249, 31253, 31259, -31267, 31271, 31277, 31307, 31319, 31321, 31327, 31333, 31337, 31357, -31379, 31387, 31391, 31393, 31397, 31469, 31477, 31481, 31489, 31511, -31513, 31517, 31531, 31541, 31543, 31547, 31567, 31573, 31583, 31601, -31607, 31627, 31643, 31649, 31657, 31663, 31667, 31687, 31699, 31721, -31723, 31727, 31729, 31741, 31751, 31769, 31771, 31793, 31799, 31817, -31847, 31849, 31859, 31873, 31883, 31891, 31907, 31957, 31963, 31973, -31981, 31991, 32003, 32009, 32027, 32029, 32051, 32057, 32059, 32063, -32069, 32077, 32083, 32089, 32099, 32117, 32119, 32141, 32143, 32159, -32173, 32183, 32189, 32191, 32203, 32213, 32233, 32237, 32251, 32257, -32261, 32297, 32299, 32303, 32309, 32321, 32323, 32327, 32341, 32353, -32359, 32363, 32369, 32371, 32377, 32381, 32401, 32411, 32413, 32423, -32429, 32441, 32443, 32467, 32479, 32491, 32497, 32503, 32507, 32531, -32533, 32537, 32561, 32563, 32569, 32573, 32579, 32587, 32603, 32609, -32611, 32621, 32633, 32647, 32653, 32687, 32693, 32707, 32713, 32717, -32719, 32749, 32771, 32779, 32783, 32789, 32797, 32801, 32803, 32831, -32833, 32839, 32843, 32869, 32887, 32909, 32911, 32917, 32933, 32939, -32941, 32957, 32969, 32971, 32983, 32987, 32993, 32999, 33013, 33023, -33029, 33037, 33049, 33053, 33071, 33073, 33083, 33091, 33107, 33113, -33119, 33149, 33151, 33161, 33179, 33181, 33191, 33199, 33203, 33211, -33223, 33247, 33287, 33289, 33301, 33311, 33317, 33329, 33331, 33343, -33347, 33349, 33353, 33359, 33377, 33391, 33403, 33409, 33413, 33427, -33457, 33461, 33469, 33479, 33487, 33493, 33503, 33521, 33529, 33533, -33547, 33563, 33569, 33577, 33581, 33587, 33589, 33599, 33601, 33613, -33617, 33619, 33623, 33629, 33637, 33641, 33647, 33679, 33703, 33713, -33721, 33739, 33749, 33751, 33757, 33767, 33769, 33773, 33791, 33797, -33809, 33811, 33827, 33829, 33851, 33857, 33863, 33871, 33889, 33893, -33911, 33923, 33931, 33937, 33941, 33961, 33967, 33997, 34019, 34031, -34033, 34039, 34057, 34061, 34123, 34127, 34129, 34141, 34147, 34157, -34159, 34171, 34183, 34211, 34213, 34217, 34231, 34253, 34259, 34261, -34267, 34273, 34283, 34297, 34301, 34303, 34313, 34319, 34327, 34337, -34351, 34361, 34367, 34369, 34381, 34403, 34421, 34429, 34439, 34457, -34469, 34471, 34483, 34487, 34499, 34501, 34511, 34513, 34519, 34537, -34543, 34549, 34583, 34589, 34591, 34603, 34607, 34613, 34631, 34649, -34651, 34667, 34673, 34679, 34687, 34693, 34703, 34721, 34729, 34739, -34747, 34757, 34759, 34763, 34781, 34807, 34819, 34841, 34843, 34847, -34849, 34871, 34877, 34883, 34897, 34913, 34919, 34939, 34949, 34961, -34963, 34981, 35023, 35027, 35051, 35053, 35059, 35069, 35081, 35083, -35089, 35099, 35107, 35111, 35117, 35129, 35141, 35149, 35153, 35159, -35171, 35201, 35221, 35227, 35251, 35257, 35267, 35279, 35281, 35291, -35311, 35317, 35323, 35327, 35339, 35353, 35363, 35381, 35393, 35401, -35407, 35419, 35423, 35437, 35447, 35449, 35461, 35491, 35507, 35509, -35521, 35527, 35531, 35533, 35537, 35543, 35569, 35573, 35591, 35593, -35597, 35603, 35617, 35671, 35677, 35729, 35731, 35747, 35753, 35759, -35771, 35797, 35801, 35803, 35809, 35831, 35837, 35839, 35851, 35863, -35869, 35879, 35897, 35899, 35911, 35923, 35933, 35951, 35963, 35969, -35977, 35983, 35993, 35999, 36007, 36011, 36013, 36017, 36037, 36061, -36067, 36073, 36083, 36097, 36107, 36109, 36131, 36137, 36151, 36161, -36187, 36191, 36209, 36217, 36229, 36241, 36251, 36263, 36269, 36277, -36293, 36299, 36307, 36313, 36319, 36341, 36343, 36353, 36373, 36383, -36389, 36433, 36451, 36457, 36467, 36469, 36473, 36479, 36493, 36497, -36523, 36527, 36529, 36541, 36551, 36559, 36563, 36571, 36583, 36587, -36599, 36607, 36629, 36637, 36643, 36653, 36671, 36677, 36683, 36691, -36697, 36709, 36713, 36721, 36739, 36749, 36761, 36767, 36779, 36781, -36787, 36791, 36793, 36809, 36821, 36833, 36847, 36857, 36871, 36877, -36887, 36899, 36901, 36913, 36919, 36923, 36929, 36931, 36943, 36947, -36973, 36979, 36997, 37003, 37013, 37019, 37021, 37039, 37049, 37057, -37061, 37087, 37097, 37117, 37123, 37139, 37159, 37171, 37181, 37189, -37199, 37201, 37217, 37223, 37243, 37253, 37273, 37277, 37307, 37309, -37313, 37321, 37337, 37339, 37357, 37361, 37363, 37369, 37379, 37397, -37409, 37423, 37441, 37447, 37463, 37483, 37489, 37493, 37501, 37507, -37511, 37517, 37529, 37537, 37547, 37549, 37561, 37567, 37571, 37573, -37579, 37589, 37591, 37607, 37619, 37633, 37643, 37649, 37657, 37663, -37691, 37693, 37699, 37717, 37747, 37781, 37783, 37799, 37811, 37813, -37831, 37847, 37853, 37861, 37871, 37879, 37889, 37897, 37907, 37951, -37957, 37963, 37967, 37987, 37991, 37993, 37997, 38011, 38039, 38047, -38053, 38069, 38083, 38113, 38119, 38149, 38153, 38167, 38177, 38183, -38189, 38197, 38201, 38219, 38231, 38237, 38239, 38261, 38273, 38281, -38287, 38299, 38303, 38317, 38321, 38327, 38329, 38333, 38351, 38371, -38377, 38393, 38431, 38447, 38449, 38453, 38459, 38461, 38501, 38543, -38557, 38561, 38567, 38569, 38593, 38603, 38609, 38611, 38629, 38639, -38651, 38653, 38669, 38671, 38677, 38693, 38699, 38707, 38711, 38713, -38723, 38729, 38737, 38747, 38749, 38767, 38783, 38791, 38803, 38821, -38833, 38839, 38851, 38861, 38867, 38873, 38891, 38903, 38917, 38921, -38923, 38933, 38953, 38959, 38971, 38977, 38993, 39019, 39023, 39041, -39043, 39047, 39079, 39089, 39097, 39103, 39107, 39113, 39119, 39133, -39139, 39157, 39161, 39163, 39181, 39191, 39199, 39209, 39217, 39227, -39229, 39233, 39239, 39241, 39251, 39293, 39301, 39313, 39317, 39323, -39341, 39343, 39359, 39367, 39371, 39373, 39383, 39397, 39409, 39419, -39439, 39443, 39451, 39461, 39499, 39503, 39509, 39511, 39521, 39541, -39551, 39563, 39569, 39581, 39607, 39619, 39623, 39631, 39659, 39667, -39671, 39679, 39703, 39709, 39719, 39727, 39733, 39749, 39761, 39769, -39779, 39791, 39799, 39821, 39827, 39829, 39839, 39841, 39847, 39857, -39863, 39869, 39877, 39883, 39887, 39901, 39929, 39937, 39953, 39971, -39979, 39983, 39989, 40009, 40013, 40031, 40037, 40039, 40063, 40087, -40093, 40099, 40111, 40123, 40127, 40129, 40151, 40153, 40163, 40169, -40177, 40189, 40193, 40213, 40231, 40237, 40241, 40253, 40277, 40283, -40289, 40343, 40351, 40357, 40361, 40387, 40423, 40427, 40429, 40433, -40459, 40471, 40483, 40487, 40493, 40499, 40507, 40519, 40529, 40531, -40543, 40559, 40577, 40583, 40591, 40597, 40609, 40627, 40637, 40639, -40693, 40697, 40699, 40709, 40739, 40751, 40759, 40763, 40771, 40787, -40801, 40813, 40819, 40823, 40829, 40841, 40847, 40849, 40853, 40867, -40879, 40883, 40897, 40903, 40927, 40933, 40939, 40949, 40961, 40973, -40993, 41011, 41017, 41023, 41039, 41047, 41051, 41057, 41077, 41081, -41113, 41117, 41131, 41141, 41143, 41149, 41161, 41177, 41179, 41183, -41189, 41201, 41203, 41213, 41221, 41227, 41231, 41233, 41243, 41257, -41263, 41269, 41281, 41299, 41333, 41341, 41351, 41357, 41381, 41387, -41389, 41399, 41411, 41413, 41443, 41453, 41467, 41479, 41491, 41507, -41513, 41519, 41521, 41539, 41543, 41549, 41579, 41593, 41597, 41603, -41609, 41611, 41617, 41621, 41627, 41641, 41647, 41651, 41659, 41669, -41681, 41687, 41719, 41729, 41737, 41759, 41761, 41771, 41777, 41801, -41809, 41813, 41843, 41849, 41851, 41863, 41879, 41887, 41893, 41897, -41903, 41911, 41927, 41941, 41947, 41953, 41957, 41959, 41969, 41981, -41983, 41999, 42013, 42017, 42019, 42023, 42043, 42061, 42071, 42073, -42083, 42089, 42101, 42131, 42139, 42157, 42169, 42179, 42181, 42187, -42193, 42197, 42209, 42221, 42223, 42227, 42239, 42257, 42281, 42283, -42293, 42299, 42307, 42323, 42331, 42337, 42349, 42359, 42373, 42379, -42391, 42397, 42403, 42407, 42409, 42433, 42437, 42443, 42451, 42457, -42461, 42463, 42467, 42473, 42487, 42491, 42499, 42509, 42533, 42557, -42569, 42571, 42577, 42589, 42611, 42641, 42643, 42649, 42667, 42677, -42683, 42689, 42697, 42701, 42703, 42709, 42719, 42727, 42737, 42743, -42751, 42767, 42773, 42787, 42793, 42797, 42821, 42829, 42839, 42841, -42853, 42859, 42863, 42899, 42901, 42923, 42929, 42937, 42943, 42953, -42961, 42967, 42979, 42989, 43003, 43013, 43019, 43037, 43049, 43051, -43063, 43067, 43093, 43103, 43117, 43133, 43151, 43159, 43177, 43189, -43201, 43207, 43223, 43237, 43261, 43271, 43283, 43291, 43313, 43319, -43321, 43331, 43391, 43397, 43399, 43403, 43411, 43427, 43441, 43451, -43457, 43481, 43487, 43499, 43517, 43541, 43543, 43573, 43577, 43579, -43591, 43597, 43607, 43609, 43613, 43627, 43633, 43649, 43651, 43661, -43669, 43691, 43711, 43717, 43721, 43753, 43759, 43777, 43781, 43783, -43787, 43789, 43793, 43801, 43853, 43867, 43889, 43891, 43913, 43933, -43943, 43951, 43961, 43963, 43969, 43973, 43987, 43991, 43997, 44017, -44021, 44027, 44029, 44041, 44053, 44059, 44071, 44087, 44089, 44101, -44111, 44119, 44123, 44129, 44131, 44159, 44171, 44179, 44189, 44201, -44203, 44207, 44221, 44249, 44257, 44263, 44267, 44269, 44273, 44279, -44281, 44293, 44351, 44357, 44371, 44381, 44383, 44389, 44417, 44449, -44453, 44483, 44491, 44497, 44501, 44507, 44519, 44531, 44533, 44537, -44543, 44549, 44563, 44579, 44587, 44617, 44621, 44623, 44633, 44641, -44647, 44651, 44657, 44683, 44687, 44699, 44701, 44711, 44729, 44741, -44753, 44771, 44773, 44777, 44789, 44797, 44809, 44819, 44839, 44843, -44851, 44867, 44879, 44887, 44893, 44909, 44917, 44927, 44939, 44953, -44959, 44963, 44971, 44983, 44987, 45007, 45013, 45053, 45061, 45077, -45083, 45119, 45121, 45127, 45131, 45137, 45139, 45161, 45179, 45181, -45191, 45197, 45233, 45247, 45259, 45263, 45281, 45289, 45293, 45307, -45317, 45319, 45329, 45337, 45341, 45343, 45361, 45377, 45389, 45403, -45413, 45427, 45433, 45439, 45481, 45491, 45497, 45503, 45523, 45533, -45541, 45553, 45557, 45569, 45587, 45589, 45599, 45613, 45631, 45641, -45659, 45667, 45673, 45677, 45691, 45697, 45707, 45737, 45751, 45757, -45763, 45767, 45779, 45817, 45821, 45823, 45827, 45833, 45841, 45853, -45863, 45869, 45887, 45893, 45943, 45949, 45953, 45959, 45971, 45979, -45989, 46021, 46027, 46049, 46051, 46061, 46073, 46091, 46093, 46099, -46103, 46133, 46141, 46147, 46153, 46171, 46181, 46183, 46187, 46199, -46219, 46229, 46237, 46261, 46271, 46273, 46279, 46301, 46307, 46309, -46327, 46337, 46349, 46351, 46381, 46399, 46411, 46439, 46441, 46447, -46451, 46457, 46471, 46477, 46489, 46499, 46507, 46511, 46523, 46549, -46559, 46567, 46573, 46589, 46591, 46601, 46619, 46633, 46639, 46643, -46649, 46663, 46679, 46681, 46687, 46691, 46703, 46723, 46727, 46747, -46751, 46757, 46769, 46771, 46807, 46811, 46817, 46819, 46829, 46831, -46853, 46861, 46867, 46877, 46889, 46901, 46919, 46933, 46957, 46993, -46997, 47017, 47041, 47051, 47057, 47059, 47087, 47093, 47111, 47119, -47123, 47129, 47137, 47143, 47147, 47149, 47161, 47189, 47207, 47221, -47237, 47251, 47269, 47279, 47287, 47293, 47297, 47303, 47309, 47317, -47339, 47351, 47353, 47363, 47381, 47387, 47389, 47407, 47417, 47419, -47431, 47441, 47459, 47491, 47497, 47501, 47507, 47513, 47521, 47527, -47533, 47543, 47563, 47569, 47581, 47591, 47599, 47609, 47623, 47629, -47639, 47653, 47657, 47659, 47681, 47699, 47701, 47711, 47713, 47717, -47737, 47741, 47743, 47777, 47779, 47791, 47797, 47807, 47809, 47819, -47837, 47843, 47857, 47869, 47881, 47903, 47911, 47917, 47933, 47939, -47947, 47951, 47963, 47969, 47977, 47981, 48017, 48023, 48029, 48049, -48073, 48079, 48091, 48109, 48119, 48121, 48131, 48157, 48163, 48179, -48187, 48193, 48197, 48221, 48239, 48247, 48259, 48271, 48281, 48299, -48311, 48313, 48337, 48341, 48353, 48371, 48383, 48397, 48407, 48409, -48413, 48437, 48449, 48463, 48473, 48479, 48481, 48487, 48491, 48497, -48523, 48527, 48533, 48539, 48541, 48563, 48571, 48589, 48593, 48611, -48619, 48623, 48647, 48649, 48661, 48673, 48677, 48679, 48731, 48733, -48751, 48757, 48761, 48767, 48779, 48781, 48787, 48799, 48809, 48817, -48821, 48823, 48847, 48857, 48859, 48869, 48871, 48883, 48889, 48907, -48947, 48953, 48973, 48989, 48991, 49003, 49009, 49019, 49031, 49033, -49037, 49043, 49057, 49069, 49081, 49103, 49109, 49117, 49121, 49123, -49139, 49157, 49169, 49171, 49177, 49193, 49199, 49201, 49207, 49211, -49223, 49253, 49261, 49277, 49279, 49297, 49307, 49331, 49333, 49339, -49363, 49367, 49369, 49391, 49393, 49409, 49411, 49417, 49429, 49433, -49451, 49459, 49463, 49477, 49481, 49499, 49523, 49529, 49531, 49537, -49547, 49549, 49559, 49597, 49603, 49613, 49627, 49633, 49639, 49663, -49667, 49669, 49681, 49697, 49711, 49727, 49739, 49741, 49747, 49757, -49783, 49787, 49789, 49801, 49807, 49811, 49823, 49831, 49843, 49853, -49871, 49877, 49891, 49919, 49921, 49927, 49937, 49939, 49943, 49957, -49991, 49993, 49999, 50021, 50023, 50033, 50047, 50051, 50053, 50069, -50077, 50087, 50093, 50101, 50111, 50119, 50123, 50129, 50131, 50147, -50153, 50159, 50177, 50207, 50221, 50227, 50231, 50261, 50263, 50273, -50287, 50291, 50311, 50321, 50329, 50333, 50341, 50359, 50363, 50377, -50383, 50387, 50411, 50417, 50423, 50441, 50459, 50461, 50497, 50503, -50513, 50527, 50539, 50543, 50549, 50551, 50581, 50587, 50591, 50593, -50599, 50627, 50647, 50651, 50671, 50683, 50707, 50723, 50741, 50753, -50767, 50773, 50777, 50789, 50821, 50833, 50839, 50849, 50857, 50867, -50873, 50891, 50893, 50909, 50923, 50929, 50951, 50957, 50969, 50971, -50989, 50993, 51001, 51031, 51043, 51047, 51059, 51061, 51071, 51109, -51131, 51133, 51137, 51151, 51157, 51169, 51193, 51197, 51199, 51203, -51217, 51229, 51239, 51241, 51257, 51263, 51283, 51287, 51307, 51329, -51341, 51343, 51347, 51349, 51361, 51383, 51407, 51413, 51419, 51421, -51427, 51431, 51437, 51439, 51449, 51461, 51473, 51479, 51481, 51487, -51503, 51511, 51517, 51521, 51539, 51551, 51563, 51577, 51581, 51593, -51599, 51607, 51613, 51631, 51637, 51647, 51659, 51673, 51679, 51683, -51691, 51713, 51719, 51721, 51749, 51767, 51769, 51787, 51797, 51803, -51817, 51827, 51829, 51839, 51853, 51859, 51869, 51871, 51893, 51899, -51907, 51913, 51929, 51941, 51949, 51971, 51973, 51977, 51991, 52009, -52021, 52027, 52051, 52057, 52067, 52069, 52081, 52103, 52121, 52127, -52147, 52153, 52163, 52177, 52181, 52183, 52189, 52201, 52223, 52237, -52249, 52253, 52259, 52267, 52289, 52291, 52301, 52313, 52321, 52361, -52363, 52369, 52379, 52387, 52391, 52433, 52453, 52457, 52489, 52501, -52511, 52517, 52529, 52541, 52543, 52553, 52561, 52567, 52571, 52579, -52583, 52609, 52627, 52631, 52639, 52667, 52673, 52691, 52697, 52709, -52711, 52721, 52727, 52733, 52747, 52757, 52769, 52783, 52807, 52813, -52817, 52837, 52859, 52861, 52879, 52883, 52889, 52901, 52903, 52919, -52937, 52951, 52957, 52963, 52967, 52973, 52981, 52999, 53003, 53017, -53047, 53051, 53069, 53077, 53087, 53089, 53093, 53101, 53113, 53117, -53129, 53147, 53149, 53161, 53171, 53173, 53189, 53197, 53201, 53231, -53233, 53239, 53267, 53269, 53279, 53281, 53299, 53309, 53323, 53327, -53353, 53359, 53377, 53381, 53401, 53407, 53411, 53419, 53437, 53441, -53453, 53479, 53503, 53507, 53527, 53549, 53551, 53569, 53591, 53593, -53597, 53609, 53611, 53617, 53623, 53629, 53633, 53639, 53653, 53657, -53681, 53693, 53699, 53717, 53719, 53731, 53759, 53773, 53777, 53783, -53791, 53813, 53819, 53831, 53849, 53857, 53861, 53881, 53887, 53891, -53897, 53899, 53917, 53923, 53927, 53939, 53951, 53959, 53987, 53993, -54001, 54011, 54013, 54037, 54049, 54059, 54083, 54091, 54101, 54121, -54133, 54139, 54151, 54163, 54167, 54181, 54193, 54217, 54251, 54269, -54277, 54287, 54293, 54311, 54319, 54323, 54331, 54347, 54361, 54367, -54371, 54377, 54401, 54403, 54409, 54413, 54419, 54421, 54437, 54443, -54449, 54469, 54493, 54497, 54499, 54503, 54517, 54521, 54539, 54541, -54547, 54559, 54563, 54577, 54581, 54583, 54601, 54617, 54623, 54629, -54631, 54647, 54667, 54673, 54679, 54709, 54713, 54721, 54727, 54751, -54767, 54773, 54779, 54787, 54799, 54829, 54833, 54851, 54869, 54877, -54881, 54907, 54917, 54919, 54941, 54949, 54959, 54973, 54979, 54983, -55001, 55009, 55021, 55049, 55051, 55057, 55061, 55073, 55079, 55103, -55109, 55117, 55127, 55147, 55163, 55171, 55201, 55207, 55213, 55217, -55219, 55229, 55243, 55249, 55259, 55291, 55313, 55331, 55333, 55337, -55339, 55343, 55351, 55373, 55381, 55399, 55411, 55439, 55441, 55457, -55469, 55487, 55501, 55511, 55529, 55541, 55547, 55579, 55589, 55603, -55609, 55619, 55621, 55631, 55633, 55639, 55661, 55663, 55667, 55673, -55681, 55691, 55697, 55711, 55717, 55721, 55733, 55763, 55787, 55793, -55799, 55807, 55813, 55817, 55819, 55823, 55829, 55837, 55843, 55849, -55871, 55889, 55897, 55901, 55903, 55921, 55927, 55931, 55933, 55949, -55967, 55987, 55997, 56003, 56009, 56039, 56041, 56053, 56081, 56087, -56093, 56099, 56101, 56113, 56123, 56131, 56149, 56167, 56171, 56179, -56197, 56207, 56209, 56237, 56239, 56249, 56263, 56267, 56269, 56299, -56311, 56333, 56359, 56369, 56377, 56383, 56393, 56401, 56417, 56431, -56437, 56443, 56453, 56467, 56473, 56477, 56479, 56489, 56501, 56503, -56509, 56519, 56527, 56531, 56533, 56543, 56569, 56591, 56597, 56599, -56611, 56629, 56633, 56659, 56663, 56671, 56681, 56687, 56701, 56711, -56713, 56731, 56737, 56747, 56767, 56773, 56779, 56783, 56807, 56809, -56813, 56821, 56827, 56843, 56857, 56873, 56891, 56893, 56897, 56909, -56911, 56921, 56923, 56929, 56941, 56951, 56957, 56963, 56983, 56989, -56993, 56999, 57037, 57041, 57047, 57059, 57073, 57077, 57089, 57097, -57107, 57119, 57131, 57139, 57143, 57149, 57163, 57173, 57179, 57191, -57193, 57203, 57221, 57223, 57241, 57251, 57259, 57269, 57271, 57283, -57287, 57301, 57329, 57331, 57347, 57349, 57367, 57373, 57383, 57389, -57397, 57413, 57427, 57457, 57467, 57487, 57493, 57503, 57527, 57529, -57557, 57559, 57571, 57587, 57593, 57601, 57637, 57641, 57649, 57653, -57667, 57679, 57689, 57697, 57709, 57713, 57719, 57727, 57731, 57737, -57751, 57773, 57781, 57787, 57791, 57793, 57803, 57809, 57829, 57839, -57847, 57853, 57859, 57881, 57899, 57901, 57917, 57923, 57943, 57947, -57973, 57977, 57991, 58013, 58027, 58031, 58043, 58049, 58057, 58061, -58067, 58073, 58099, 58109, 58111, 58129, 58147, 58151, 58153, 58169, -58171, 58189, 58193, 58199, 58207, 58211, 58217, 58229, 58231, 58237, -58243, 58271, 58309, 58313, 58321, 58337, 58363, 58367, 58369, 58379, -58391, 58393, 58403, 58411, 58417, 58427, 58439, 58441, 58451, 58453, -58477, 58481, 58511, 58537, 58543, 58549, 58567, 58573, 58579, 58601, -58603, 58613, 58631, 58657, 58661, 58679, 58687, 58693, 58699, 58711, -58727, 58733, 58741, 58757, 58763, 58771, 58787, 58789, 58831, 58889, -58897, 58901, 58907, 58909, 58913, 58921, 58937, 58943, 58963, 58967, -58979, 58991, 58997, 59009, 59011, 59021, 59023, 59029, 59051, 59053, -59063, 59069, 59077, 59083, 59093, 59107, 59113, 59119, 59123, 59141, -59149, 59159, 59167, 59183, 59197, 59207, 59209, 59219, 59221, 59233, -59239, 59243, 59263, 59273, 59281, 59333, 59341, 59351, 59357, 59359, -59369, 59377, 59387, 59393, 59399, 59407, 59417, 59419, 59441, 59443, -59447, 59453, 59467, 59471, 59473, 59497, 59509, 59513, 59539, 59557, -59561, 59567, 59581, 59611, 59617, 59621, 59627, 59629, 59651, 59659, -59663, 59669, 59671, 59693, 59699, 59707, 59723, 59729, 59743, 59747, -59753, 59771, 59779, 59791, 59797, 59809, 59833, 59863, 59879, 59887, -59921, 59929, 59951, 59957, 59971, 59981, 59999, 60013, 60017, 60029, -60037, 60041, 60077, 60083, 60089, 60091, 60101, 60103, 60107, 60127, -60133, 60139, 60149, 60161, 60167, 60169, 60209, 60217, 60223, 60251, -60257, 60259, 60271, 60289, 60293, 60317, 60331, 60337, 60343, 60353, -60373, 60383, 60397, 60413, 60427, 60443, 60449, 60457, 60493, 60497, -60509, 60521, 60527, 60539, 60589, 60601, 60607, 60611, 60617, 60623, -60631, 60637, 60647, 60649, 60659, 60661, 60679, 60689, 60703, 60719, -60727, 60733, 60737, 60757, 60761, 60763, 60773, 60779, 60793, 60811, -60821, 60859, 60869, 60887, 60889, 60899, 60901, 60913, 60917, 60919, -60923, 60937, 60943, 60953, 60961, 61001, 61007, 61027, 61031, 61043, -61051, 61057, 61091, 61099, 61121, 61129, 61141, 61151, 61153, 61169, -61211, 61223, 61231, 61253, 61261, 61283, 61291, 61297, 61331, 61333, -61339, 61343, 61357, 61363, 61379, 61381, 61403, 61409, 61417, 61441, -61463, 61469, 61471, 61483, 61487, 61493, 61507, 61511, 61519, 61543, -61547, 61553, 61559, 61561, 61583, 61603, 61609, 61613, 61627, 61631, -61637, 61643, 61651, 61657, 61667, 61673, 61681, 61687, 61703, 61717, -61723, 61729, 61751, 61757, 61781, 61813, 61819, 61837, 61843, 61861, -61871, 61879, 61909, 61927, 61933, 61949, 61961, 61967, 61979, 61981, -61987, 61991, 62003, 62011, 62017, 62039, 62047, 62053, 62057, 62071, -62081, 62099, 62119, 62129, 62131, 62137, 62141, 62143, 62171, 62189, -62191, 62201, 62207, 62213, 62219, 62233, 62273, 62297, 62299, 62303, -62311, 62323, 62327, 62347, 62351, 62383, 62401, 62417, 62423, 62459, -62467, 62473, 62477, 62483, 62497, 62501, 62507, 62533, 62539, 62549, -62563, 62581, 62591, 62597, 62603, 62617, 62627, 62633, 62639, 62653, -62659, 62683, 62687, 62701, 62723, 62731, 62743, 62753, 62761, 62773, -62791, 62801, 62819, 62827, 62851, 62861, 62869, 62873, 62897, 62903, -62921, 62927, 62929, 62939, 62969, 62971, 62981, 62983, 62987, 62989, -63029, 63031, 63059, 63067, 63073, 63079, 63097, 63103, 63113, 63127, -63131, 63149, 63179, 63197, 63199, 63211, 63241, 63247, 63277, 63281, -63299, 63311, 63313, 63317, 63331, 63337, 63347, 63353, 63361, 63367, -63377, 63389, 63391, 63397, 63409, 63419, 63421, 63439, 63443, 63463, -63467, 63473, 63487, 63493, 63499, 63521, 63527, 63533, 63541, 63559, -63577, 63587, 63589, 63599, 63601, 63607, 63611, 63617, 63629, 63647, -63649, 63659, 63667, 63671, 63689, 63691, 63697, 63703, 63709, 63719, -63727, 63737, 63743, 63761, 63773, 63781, 63793, 63799, 63803, 63809, -63823, 63839, 63841, 63853, 63857, 63863, 63901, 63907, 63913, 63929, -63949, 63977, 63997, 64007, 64013, 64019, 64033, 64037, 64063, 64067, -64081, 64091, 64109, 64123, 64151, 64153, 64157, 64171, 64187, 64189, -64217, 64223, 64231, 64237, 64271, 64279, 64283, 64301, 64303, 64319, -64327, 64333, 64373, 64381, 64399, 64403, 64433, 64439, 64451, 64453, -64483, 64489, 64499, 64513, 64553, 64567, 64577, 64579, 64591, 64601, -64609, 64613, 64621, 64627, 64633, 64661, 64663, 64667, 64679, 64693, -64709, 64717, 64747, 64763, 64781, 64783, 64793, 64811, 64817, 64849, -64853, 64871, 64877, 64879, 64891, 64901, 64919, 64921, 64927, 64937, -64951, 64969, 64997, 65003, 65011, 65027, 65029, 65033, 65053, 65063, -65071, 65089, 65099, 65101, 65111, 65119, 65123, 65129, 65141, 65147, -65167, 65171, 65173, 65179, 65183, 65203, 65213, 65239, 65257, 65267, -65269, 65287, 65293, 65309, 65323, 65327, 65353, 65357, 65371, 65381, -65393, 65407, 65413, 65419, 65423, 65437, 65447, 65449, 65479, 65497, -65519, 65521, 65537, 65539, 65543, 65551, 65557, 65563, 65579, 65581, -65587, 65599, 65609, 65617, 65629, 65633, 65647, 65651, 65657, 65677, -65687, 65699, 65701, 65707, 65713, 65717, 65719, 65729, 65731, 65761, -65777, 65789, 65809, 65827, 65831, 65837, 65839, 65843, 65851, 65867, -65881, 65899, 65921, 65927, 65929, 65951, 65957, 65963, 65981, 65983, -65993, 66029, 66037, 66041, 66047, 66067, 66071, 66083, 66089, 66103, -66107, 66109, 66137, 66161, 66169, 66173, 66179, 66191, 66221, 66239, -66271, 66293, 66301, 66337, 66343, 66347, 66359, 66361, 66373, 66377, -66383, 66403, 66413, 66431, 66449, 66457, 66463, 66467, 66491, 66499, -66509, 66523, 66529, 66533, 66541, 66553, 66569, 66571, 66587, 66593, -66601, 66617, 66629, 66643, 66653, 66683, 66697, 66701, 66713, 66721, -66733, 66739, 66749, 66751, 66763, 66791, 66797, 66809, 66821, 66841, -66851, 66853, 66863, 66877, 66883, 66889, 66919, 66923, 66931, 66943, -66947, 66949, 66959, 66973, 66977, 67003, 67021, 67033, 67043, 67049, -67057, 67061, 67073, 67079, 67103, 67121, 67129, 67139, 67141, 67153, -67157, 67169, 67181, 67187, 67189, 67211, 67213, 67217, 67219, 67231, -67247, 67261, 67271, 67273, 67289, 67307, 67339, 67343, 67349, 67369, -67391, 67399, 67409, 67411, 67421, 67427, 67429, 67433, 67447, 67453, -67477, 67481, 67489, 67493, 67499, 67511, 67523, 67531, 67537, 67547, -67559, 67567, 67577, 67579, 67589, 67601, 67607, 67619, 67631, 67651, -67679, 67699, 67709, 67723, 67733, 67741, 67751, 67757, 67759, 67763, -67777, 67783, 67789, 67801, 67807, 67819, 67829, 67843, 67853, 67867, -67883, 67891, 67901, 67927, 67931, 67933, 67939, 67943, 67957, 67961, -67967, 67979, 67987, 67993, 68023, 68041, 68053, 68059, 68071, 68087, -68099, 68111, 68113, 68141, 68147, 68161, 68171, 68207, 68209, 68213, -68219, 68227, 68239, 68261, 68279, 68281, 68311, 68329, 68351, 68371, -68389, 68399, 68437, 68443, 68447, 68449, 68473, 68477, 68483, 68489, -68491, 68501, 68507, 68521, 68531, 68539, 68543, 68567, 68581, 68597, -68611, 68633, 68639, 68659, 68669, 68683, 68687, 68699, 68711, 68713, -68729, 68737, 68743, 68749, 68767, 68771, 68777, 68791, 68813, 68819, -68821, 68863, 68879, 68881, 68891, 68897, 68899, 68903, 68909, 68917, -68927, 68947, 68963, 68993, 69001, 69011, 69019, 69029, 69031, 69061, -69067, 69073, 69109, 69119, 69127, 69143, 69149, 69151, 69163, 69191, -69193, 69197, 69203, 69221, 69233, 69239, 69247, 69257, 69259, 69263, -69313, 69317, 69337, 69341, 69371, 69379, 69383, 69389, 69401, 69403, -69427, 69431, 69439, 69457, 69463, 69467, 69473, 69481, 69491, 69493, -69497, 69499, 69539, 69557, 69593, 69623, 69653, 69661, 69677, 69691, -69697, 69709, 69737, 69739, 69761, 69763, 69767, 69779, 69809, 69821, -69827, 69829, 69833, 69847, 69857, 69859, 69877, 69899, 69911, 69929, -69931, 69941, 69959, 69991, 69997, 70001, 70003, 70009, 70019, 70039, -70051, 70061, 70067, 70079, 70099, 70111, 70117, 70121, 70123, 70139, -70141, 70157, 70163, 70177, 70181, 70183, 70199, 70201, 70207, 70223, -70229, 70237, 70241, 70249, 70271, 70289, 70297, 70309, 70313, 70321, -70327, 70351, 70373, 70379, 70381, 70393, 70423, 70429, 70439, 70451, -70457, 70459, 70481, 70487, 70489, 70501, 70507, 70529, 70537, 70549, -70571, 70573, 70583, 70589, 70607, 70619, 70621, 70627, 70639, 70657, -70663, 70667, 70687, 70709, 70717, 70729, 70753, 70769, 70783, 70793, -70823, 70841, 70843, 70849, 70853, 70867, 70877, 70879, 70891, 70901, -70913, 70919, 70921, 70937, 70949, 70951, 70957, 70969, 70979, 70981, -70991, 70997, 70999, 71011, 71023, 71039, 71059, 71069, 71081, 71089, -71119, 71129, 71143, 71147, 71153, 71161, 71167, 71171, 71191, 71209, -71233, 71237, 71249, 71257, 71261, 71263, 71287, 71293, 71317, 71327, -71329, 71333, 71339, 71341, 71347, 71353, 71359, 71363, 71387, 71389, -71399, 71411, 71413, 71419, 71429, 71437, 71443, 71453, 71471, 71473, -71479, 71483, 71503, 71527, 71537, 71549, 71551, 71563, 71569, 71593, -71597, 71633, 71647, 71663, 71671, 71693, 71699, 71707, 71711, 71713, -71719, 71741, 71761, 71777, 71789, 71807, 71809, 71821, 71837, 71843, -71849, 71861, 71867, 71879, 71881, 71887, 71899, 71909, 71917, 71933, -71941, 71947, 71963, 71971, 71983, 71987, 71993, 71999, 72019, 72031, -72043, 72047, 72053, 72073, 72077, 72089, 72091, 72101, 72103, 72109, -72139, 72161, 72167, 72169, 72173, 72211, 72221, 72223, 72227, 72229, -72251, 72253, 72269, 72271, 72277, 72287, 72307, 72313, 72337, 72341, -72353, 72367, 72379, 72383, 72421, 72431, 72461, 72467, 72469, 72481, -72493, 72497, 72503, 72533, 72547, 72551, 72559, 72577, 72613, 72617, -72623, 72643, 72647, 72649, 72661, 72671, 72673, 72679, 72689, 72701, -72707, 72719, 72727, 72733, 72739, 72763, 72767, 72797, 72817, 72823, -72859, 72869, 72871, 72883, 72889, 72893, 72901, 72907, 72911, 72923, -72931, 72937, 72949, 72953, 72959, 72973, 72977, 72997, 73009, 73013, -73019, 73037, 73039, 73043, 73061, 73063, 73079, 73091, 73121, 73127, -73133, 73141, 73181, 73189, 73237, 73243, 73259, 73277, 73291, 73303, -73309, 73327, 73331, 73351, 73361, 73363, 73369, 73379, 73387, 73417, -73421, 73433, 73453, 73459, 73471, 73477, 73483, 73517, 73523, 73529, -73547, 73553, 73561, 73571, 73583, 73589, 73597, 73607, 73609, 73613, -73637, 73643, 73651, 73673, 73679, 73681, 73693, 73699, 73709, 73721, -73727, 73751, 73757, 73771, 73783, 73819, 73823, 73847, 73849, 73859, -73867, 73877, 73883, 73897, 73907, 73939, 73943, 73951, 73961, 73973, -73999, 74017, 74021, 74027, 74047, 74051, 74071, 74077, 74093, 74099, -74101, 74131, 74143, 74149, 74159, 74161, 74167, 74177, 74189, 74197, -74201, 74203, 74209, 74219, 74231, 74257, 74279, 74287, 74293, 74297, -74311, 74317, 74323, 74353, 74357, 74363, 74377, 74381, 74383, 74411, -74413, 74419, 74441, 74449, 74453, 74471, 74489, 74507, 74509, 74521, -74527, 74531, 74551, 74561, 74567, 74573, 74587, 74597, 74609, 74611, -74623, 74653, 74687, 74699, 74707, 74713, 74717, 74719, 74729, 74731, -74747, 74759, 74761, 74771, 74779, 74797, 74821, 74827, 74831, 74843, -74857, 74861, 74869, 74873, 74887, 74891, 74897, 74903, 74923, 74929, -74933, 74941, 74959, 75011, 75013, 75017, 75029, 75037, 75041, 75079, -75083, 75109, 75133, 75149, 75161, 75167, 75169, 75181, 75193, 75209, -75211, 75217, 75223, 75227, 75239, 75253, 75269, 75277, 75289, 75307, -75323, 75329, 75337, 75347, 75353, 75367, 75377, 75389, 75391, 75401, -75403, 75407, 75431, 75437, 75479, 75503, 75511, 75521, 75527, 75533, -75539, 75541, 75553, 75557, 75571, 75577, 75583, 75611, 75617, 75619, -75629, 75641, 75653, 75659, 75679, 75683, 75689, 75703, 75707, 75709, -75721, 75731, 75743, 75767, 75773, 75781, 75787, 75793, 75797, 75821, -75833, 75853, 75869, 75883, 75913, 75931, 75937, 75941, 75967, 75979, -75983, 75989, 75991, 75997, 76001, 76003, 76031, 76039, 76079, 76081, -76091, 76099, 76103, 76123, 76129, 76147, 76157, 76159, 76163, 76207, -76213, 76231, 76243, 76249, 76253, 76259, 76261, 76283, 76289, 76303, -76333, 76343, 76367, 76369, 76379, 76387, 76403, 76421, 76423, 76441, -76463, 76471, 76481, 76487, 76493, 76507, 76511, 76519, 76537, 76541, -76543, 76561, 76579, 76597, 76603, 76607, 76631, 76649, 76651, 76667, -76673, 76679, 76697, 76717, 76733, 76753, 76757, 76771, 76777, 76781, -76801, 76819, 76829, 76831, 76837, 76847, 76871, 76873, 76883, 76907, -76913, 76919, 76943, 76949, 76961, 76963, 76991, 77003, 77017, 77023, -77029, 77041, 77047, 77069, 77081, 77093, 77101, 77137, 77141, 77153, -77167, 77171, 77191, 77201, 77213, 77237, 77239, 77243, 77249, 77261, -77263, 77267, 77269, 77279, 77291, 77317, 77323, 77339, 77347, 77351, -77359, 77369, 77377, 77383, 77417, 77419, 77431, 77447, 77471, 77477, -77479, 77489, 77491, 77509, 77513, 77521, 77527, 77543, 77549, 77551, -77557, 77563, 77569, 77573, 77587, 77591, 77611, 77617, 77621, 77641, -77647, 77659, 77681, 77687, 77689, 77699, 77711, 77713, 77719, 77723, -77731, 77743, 77747, 77761, 77773, 77783, 77797, 77801, 77813, 77839, -77849, 77863, 77867, 77893, 77899, 77929, 77933, 77951, 77969, 77977, -77983, 77999, 78007, 78017, 78031, 78041, 78049, 78059, 78079, 78101, -78121, 78137, 78139, 78157, 78163, 78167, 78173, 78179, 78191, 78193, -78203, 78229, 78233, 78241, 78259, 78277, 78283, 78301, 78307, 78311, -78317, 78341, 78347, 78367, 78401, 78427, 78437, 78439, 78467, 78479, -78487, 78497, 78509, 78511, 78517, 78539, 78541, 78553, 78569, 78571, -78577, 78583, 78593, 78607, 78623, 78643, 78649, 78653, 78691, 78697, -78707, 78713, 78721, 78737, 78779, 78781, 78787, 78791, 78797, 78803, -78809, 78823, 78839, 78853, 78857, 78877, 78887, 78889, 78893, 78901, -78919, 78929, 78941, 78977, 78979, 78989, 79031, 79039, 79043, 79063, -79087, 79103, 79111, 79133, 79139, 79147, 79151, 79153, 79159, 79181, -79187, 79193, 79201, 79229, 79231, 79241, 79259, 79273, 79279, 79283, -79301, 79309, 79319, 79333, 79337, 79349, 79357, 79367, 79379, 79393, -79397, 79399, 79411, 79423, 79427, 79433, 79451, 79481, 79493, 79531, -79537, 79549, 79559, 79561, 79579, 79589, 79601, 79609, 79613, 79621, -79627, 79631, 79633, 79657, 79669, 79687, 79691, 79693, 79697, 79699, -79757, 79769, 79777, 79801, 79811, 79813, 79817, 79823, 79829, 79841, -79843, 79847, 79861, 79867, 79873, 79889, 79901, 79903, 79907, 79939, -79943, 79967, 79973, 79979, 79987, 79997, 79999, 80021, 80039, 80051, -80071, 80077, 80107, 80111, 80141, 80147, 80149, 80153, 80167, 80173, -80177, 80191, 80207, 80209, 80221, 80231, 80233, 80239, 80251, 80263, -80273, 80279, 80287, 80309, 80317, 80329, 80341, 80347, 80363, 80369, -80387, 80407, 80429, 80447, 80449, 80471, 80473, 80489, 80491, 80513, -80527, 80537, 80557, 80567, 80599, 80603, 80611, 80621, 80627, 80629, -80651, 80657, 80669, 80671, 80677, 80681, 80683, 80687, 80701, 80713, -80737, 80747, 80749, 80761, 80777, 80779, 80783, 80789, 80803, 80809, -80819, 80831, 80833, 80849, 80863, 80897, 80909, 80911, 80917, 80923, -80929, 80933, 80953, 80963, 80989, 81001, 81013, 81017, 81019, 81023, -81031, 81041, 81043, 81047, 81049, 81071, 81077, 81083, 81097, 81101, -81119, 81131, 81157, 81163, 81173, 81181, 81197, 81199, 81203, 81223, -81233, 81239, 81281, 81283, 81293, 81299, 81307, 81331, 81343, 81349, -81353, 81359, 81371, 81373, 81401, 81409, 81421, 81439, 81457, 81463, -81509, 81517, 81527, 81533, 81547, 81551, 81553, 81559, 81563, 81569, -81611, 81619, 81629, 81637, 81647, 81649, 81667, 81671, 81677, 81689, -81701, 81703, 81707, 81727, 81737, 81749, 81761, 81769, 81773, 81799, -81817, 81839, 81847, 81853, 81869, 81883, 81899, 81901, 81919, 81929, -81931, 81937, 81943, 81953, 81967, 81971, 81973, 82003, 82007, 82009, -82013, 82021, 82031, 82037, 82039, 82051, 82067, 82073, 82129, 82139, -82141, 82153, 82163, 82171, 82183, 82189, 82193, 82207, 82217, 82219, -82223, 82231, 82237, 82241, 82261, 82267, 82279, 82301, 82307, 82339, -82349, 82351, 82361, 82373, 82387, 82393, 82421, 82457, 82463, 82469, -82471, 82483, 82487, 82493, 82499, 82507, 82529, 82531, 82549, 82559, -82561, 82567, 82571, 82591, 82601, 82609, 82613, 82619, 82633, 82651, -82657, 82699, 82721, 82723, 82727, 82729, 82757, 82759, 82763, 82781, -82787, 82793, 82799, 82811, 82813, 82837, 82847, 82883, 82889, 82891, -82903, 82913, 82939, 82963, 82981, 82997, 83003, 83009, 83023, 83047, -83059, 83063, 83071, 83077, 83089, 83093, 83101, 83117, 83137, 83177, -83203, 83207, 83219, 83221, 83227, 83231, 83233, 83243, 83257, 83267, -83269, 83273, 83299, 83311, 83339, 83341, 83357, 83383, 83389, 83399, -83401, 83407, 83417, 83423, 83431, 83437, 83443, 83449, 83459, 83471, -83477, 83497, 83537, 83557, 83561, 83563, 83579, 83591, 83597, 83609, -83617, 83621, 83639, 83641, 83653, 83663, 83689, 83701, 83717, 83719, -83737, 83761, 83773, 83777, 83791, 83813, 83833, 83843, 83857, 83869, -83873, 83891, 83903, 83911, 83921, 83933, 83939, 83969, 83983, 83987, -84011, 84017, 84047, 84053, 84059, 84061, 84067, 84089, 84121, 84127, -84131, 84137, 84143, 84163, 84179, 84181, 84191, 84199, 84211, 84221, -84223, 84229, 84239, 84247, 84263, 84299, 84307, 84313, 84317, 84319, -84347, 84349, 84377, 84389, 84391, 84401, 84407, 84421, 84431, 84437, -84443, 84449, 84457, 84463, 84467, 84481, 84499, 84503, 84509, 84521, -84523, 84533, 84551, 84559, 84589, 84629, 84631, 84649, 84653, 84659, -84673, 84691, 84697, 84701, 84713, 84719, 84731, 84737, 84751, 84761, -84787, 84793, 84809, 84811, 84827, 84857, 84859, 84869, 84871, 84913, -84919, 84947, 84961, 84967, 84977, 84979, 84991, 85009, 85021, 85027, -85037, 85049, 85061, 85081, 85087, 85091, 85093, 85103, 85109, 85121, -85133, 85147, 85159, 85193, 85199, 85201, 85213, 85223, 85229, 85237, -85243, 85247, 85259, 85297, 85303, 85313, 85331, 85333, 85361, 85363, -85369, 85381, 85411, 85427, 85429, 85439, 85447, 85451, 85453, 85469, -85487, 85513, 85517, 85523, 85531, 85549, 85571, 85577, 85597, 85601, -85607, 85619, 85621, 85627, 85639, 85643, 85661, 85667, 85669, 85691, -85703, 85711, 85717, 85733, 85751, 85781, 85793, 85817, 85819, 85829, -85831, 85837, 85843, 85847, 85853, 85889, 85903, 85909, 85931, 85933, -85991, 85999, 86011, 86017, 86027, 86029, 86069, 86077, 86083, 86111, -86113, 86117, 86131, 86137, 86143, 86161, 86171, 86179, 86183, 86197, -86201, 86209, 86239, 86243, 86249, 86257, 86263, 86269, 86287, 86291, -86293, 86297, 86311, 86323, 86341, 86351, 86353, 86357, 86369, 86371, -86381, 86389, 86399, 86413, 86423, 86441, 86453, 86461, 86467, 86477, -86491, 86501, 86509, 86531, 86533, 86539, 86561, 86573, 86579, 86587, -86599, 86627, 86629, 86677, 86689, 86693, 86711, 86719, 86729, 86743, -86753, 86767, 86771, 86783, 86813, 86837, 86843, 86851, 86857, 86861, -86869, 86923, 86927, 86929, 86939, 86951, 86959, 86969, 86981, 86993, -87011, 87013, 87037, 87041, 87049, 87071, 87083, 87103, 87107, 87119, -87121, 87133, 87149, 87151, 87179, 87181, 87187, 87211, 87221, 87223, -87251, 87253, 87257, 87277, 87281, 87293, 87299, 87313, 87317, 87323, -87337, 87359, 87383, 87403, 87407, 87421, 87427, 87433, 87443, 87473, -87481, 87491, 87509, 87511, 87517, 87523, 87539, 87541, 87547, 87553, -87557, 87559, 87583, 87587, 87589, 87613, 87623, 87629, 87631, 87641, -87643, 87649, 87671, 87679, 87683, 87691, 87697, 87701, 87719, 87721, -87739, 87743, 87751, 87767, 87793, 87797, 87803, 87811, 87833, 87853, -87869, 87877, 87881, 87887, 87911, 87917, 87931, 87943, 87959, 87961, -87973, 87977, 87991, 88001, 88003, 88007, 88019, 88037, 88069, 88079, -88093, 88117, 88129, 88169, 88177, 88211, 88223, 88237, 88241, 88259, -88261, 88289, 88301, 88321, 88327, 88337, 88339, 88379, 88397, 88411, -88423, 88427, 88463, 88469, 88471, 88493, 88499, 88513, 88523, 88547, -88589, 88591, 88607, 88609, 88643, 88651, 88657, 88661, 88663, 88667, -88681, 88721, 88729, 88741, 88747, 88771, 88789, 88793, 88799, 88801, -88807, 88811, 88813, 88817, 88819, 88843, 88853, 88861, 88867, 88873, -88883, 88897, 88903, 88919, 88937, 88951, 88969, 88993, 88997, 89003, -89009, 89017, 89021, 89041, 89051, 89057, 89069, 89071, 89083, 89087, -89101, 89107, 89113, 89119, 89123, 89137, 89153, 89189, 89203, 89209, -89213, 89227, 89231, 89237, 89261, 89269, 89273, 89293, 89303, 89317, -89329, 89363, 89371, 89381, 89387, 89393, 89399, 89413, 89417, 89431, -89443, 89449, 89459, 89477, 89491, 89501, 89513, 89519, 89521, 89527, -89533, 89561, 89563, 89567, 89591, 89597, 89599, 89603, 89611, 89627, -89633, 89653, 89657, 89659, 89669, 89671, 89681, 89689, 89753, 89759, -89767, 89779, 89783, 89797, 89809, 89819, 89821, 89833, 89839, 89849, -89867, 89891, 89897, 89899, 89909, 89917, 89923, 89939, 89959, 89963, -89977, 89983, 89989, 90001, 90007, 90011, 90017, 90019, 90023, 90031, -90053, 90059, 90067, 90071, 90073, 90089, 90107, 90121, 90127, 90149, -90163, 90173, 90187, 90191, 90197, 90199, 90203, 90217, 90227, 90239, -90247, 90263, 90271, 90281, 90289, 90313, 90353, 90359, 90371, 90373, -90379, 90397, 90401, 90403, 90407, 90437, 90439, 90469, 90473, 90481, -90499, 90511, 90523, 90527, 90529, 90533, 90547, 90583, 90599, 90617, -90619, 90631, 90641, 90647, 90659, 90677, 90679, 90697, 90703, 90709, -90731, 90749, 90787, 90793, 90803, 90821, 90823, 90833, 90841, 90847, -90863, 90887, 90901, 90907, 90911, 90917, 90931, 90947, 90971, 90977, -90989, 90997, 91009, 91019, 91033, 91079, 91081, 91097, 91099, 91121, -91127, 91129, 91139, 91141, 91151, 91153, 91159, 91163, 91183, 91193, -91199, 91229, 91237, 91243, 91249, 91253, 91283, 91291, 91297, 91303, -91309, 91331, 91367, 91369, 91373, 91381, 91387, 91393, 91397, 91411, -91423, 91433, 91453, 91457, 91459, 91463, 91493, 91499, 91513, 91529, -91541, 91571, 91573, 91577, 91583, 91591, 91621, 91631, 91639, 91673, -91691, 91703, 91711, 91733, 91753, 91757, 91771, 91781, 91801, 91807, -91811, 91813, 91823, 91837, 91841, 91867, 91873, 91909, 91921, 91939, -91943, 91951, 91957, 91961, 91967, 91969, 91997, 92003, 92009, 92033, -92041, 92051, 92077, 92083, 92107, 92111, 92119, 92143, 92153, 92173, -92177, 92179, 92189, 92203, 92219, 92221, 92227, 92233, 92237, 92243, -92251, 92269, 92297, 92311, 92317, 92333, 92347, 92353, 92357, 92363, -92369, 92377, 92381, 92383, 92387, 92399, 92401, 92413, 92419, 92431, -92459, 92461, 92467, 92479, 92489, 92503, 92507, 92551, 92557, 92567, -92569, 92581, 92593, 92623, 92627, 92639, 92641, 92647, 92657, 92669, -92671, 92681, 92683, 92693, 92699, 92707, 92717, 92723, 92737, 92753, -92761, 92767, 92779, 92789, 92791, 92801, 92809, 92821, 92831, 92849, -92857, 92861, 92863, 92867, 92893, 92899, 92921, 92927, 92941, 92951, -92957, 92959, 92987, 92993, 93001, 93047, 93053, 93059, 93077, 93083, -93089, 93097, 93103, 93113, 93131, 93133, 93139, 93151, 93169, 93179, -93187, 93199, 93229, 93239, 93241, 93251, 93253, 93257, 93263, 93281, -93283, 93287, 93307, 93319, 93323, 93329, 93337, 93371, 93377, 93383, -93407, 93419, 93427, 93463, 93479, 93481, 93487, 93491, 93493, 93497, -93503, 93523, 93529, 93553, 93557, 93559, 93563, 93581, 93601, 93607, -93629, 93637, 93683, 93701, 93703, 93719, 93739, 93761, 93763, 93787, -93809, 93811, 93827, 93851, 93871, 93887, 93889, 93893, 93901, 93911, -93913, 93923, 93937, 93941, 93949, 93967, 93971, 93979, 93983, 93997, -94007, 94009, 94033, 94049, 94057, 94063, 94079, 94099, 94109, 94111, -94117, 94121, 94151, 94153, 94169, 94201, 94207, 94219, 94229, 94253, -94261, 94273, 94291, 94307, 94309, 94321, 94327, 94331, 94343, 94349, -94351, 94379, 94397, 94399, 94421, 94427, 94433, 94439, 94441, 94447, -94463, 94477, 94483, 94513, 94529, 94531, 94541, 94543, 94547, 94559, -94561, 94573, 94583, 94597, 94603, 94613, 94621, 94649, 94651, 94687, -94693, 94709, 94723, 94727, 94747, 94771, 94777, 94781, 94789, 94793, -94811, 94819, 94823, 94837, 94841, 94847, 94849, 94873, 94889, 94903, -94907, 94933, 94949, 94951, 94961, 94993, 94999, 95003, 95009, 95021, -95027, 95063, 95071, 95083, 95087, 95089, 95093, 95101, 95107, 95111, -95131, 95143, 95153, 95177, 95189, 95191, 95203, 95213, 95219, 95231, -95233, 95239, 95257, 95261, 95267, 95273, 95279, 95287, 95311, 95317, -95327, 95339, 95369, 95383, 95393, 95401, 95413, 95419, 95429, 95441, -95443, 95461, 95467, 95471, 95479, 95483, 95507, 95527, 95531, 95539, -95549, 95561, 95569, 95581, 95597, 95603, 95617, 95621, 95629, 95633, -95651, 95701, 95707, 95713, 95717, 95723, 95731, 95737, 95747, 95773, -95783, 95789, 95791, 95801, 95803, 95813, 95819, 95857, 95869, 95873, -95881, 95891, 95911, 95917, 95923, 95929, 95947, 95957, 95959, 95971, -95987, 95989, 96001, 96013, 96017, 96043, 96053, 96059, 96079, 96097, -96137, 96149, 96157, 96167, 96179, 96181, 96199, 96211, 96221, 96223, -96233, 96259, 96263, 96269, 96281, 96289, 96293, 96323, 96329, 96331, -96337, 96353, 96377, 96401, 96419, 96431, 96443, 96451, 96457, 96461, -96469, 96479, 96487, 96493, 96497, 96517, 96527, 96553, 96557, 96581, -96587, 96589, 96601, 96643, 96661, 96667, 96671, 96697, 96703, 96731, -96737, 96739, 96749, 96757, 96763, 96769, 96779, 96787, 96797, 96799, -96821, 96823, 96827, 96847, 96851, 96857, 96893, 96907, 96911, 96931, -96953, 96959, 96973, 96979, 96989, 96997, 97001, 97003, 97007, 97021, -97039, 97073, 97081, 97103, 97117, 97127, 97151, 97157, 97159, 97169, -97171, 97177, 97187, 97213, 97231, 97241, 97259, 97283, 97301, 97303, -97327, 97367, 97369, 97373, 97379, 97381, 97387, 97397, 97423, 97429, -97441, 97453, 97459, 97463, 97499, 97501, 97511, 97523, 97547, 97549, -97553, 97561, 97571, 97577, 97579, 97583, 97607, 97609, 97613, 97649, -97651, 97673, 97687, 97711, 97729, 97771, 97777, 97787, 97789, 97813, -97829, 97841, 97843, 97847, 97849, 97859, 97861, 97871, 97879, 97883, -97919, 97927, 97931, 97943, 97961, 97967, 97973, 97987, 98009, 98011, -98017, 98041, 98047, 98057, 98081, 98101, 98123, 98129, 98143, 98179, -98207, 98213, 98221, 98227, 98251, 98257, 98269, 98297, 98299, 98317, -98321, 98323, 98327, 98347, 98369, 98377, 98387, 98389, 98407, 98411, -98419, 98429, 98443, 98453, 98459, 98467, 98473, 98479, 98491, 98507, -98519, 98533, 98543, 98561, 98563, 98573, 98597, 98621, 98627, 98639, -98641, 98663, 98669, 98689, 98711, 98713, 98717, 98729, 98731, 98737, -98773, 98779, 98801, 98807, 98809, 98837, 98849, 98867, 98869, 98873, -98887, 98893, 98897, 98899, 98909, 98911, 98927, 98929, 98939, 98947, -98953, 98963, 98981, 98993, 98999, 99013, 99017, 99023, 99041, 99053, -99079, 99083, 99089, 99103, 99109, 99119, 99131, 99133, 99137, 99139, -99149, 99173, 99181, 99191, 99223, 99233, 99241, 99251, 99257, 99259, -99277, 99289, 99317, 99347, 99349, 99367, 99371, 99377, 99391, 99397, -99401, 99409, 99431, 99439, 99469, 99487, 99497, 99523, 99527, 99529, -99551, 99559, 99563, 99571, 99577, 99581, 99607, 99611, 99623, 99643, -99661, 99667, 99679, 99689, 99707, 99709, 99713, 99719, 99721, 99733, -99761, 99767, 99787, 99793, 99809, 99817, 99823, 99829, 99833, 99839, -99859, 99871, 99877, 99881, 99901, 99907, 99923, 99929, 99961, 99971, -99989, 99991, 100003, 100019, 100043, 100049, 100057, 100069, 100103, 100109, -100129, 100151, 100153, 100169, 100183, 100189, 100193, 100207, 100213, 100237, -100267, 100271, 100279, 100291, 100297, 100313, 100333, 100343, 100357, 100361, -100363, 100379, 100391, 100393, 100403, 100411, 100417, 100447, 100459, 100469, -100483, 100493, 100501, 100511, 100517, 100519, 100523, 100537, 100547, 100549, -100559, 100591, 100609, 100613, 100621, 100649, 100669, 100673, 100693, 100699, -100703, 100733, 100741, 100747, 100769, 100787, 100799, 100801, 100811, 100823, -100829, 100847, 100853, 100907, 100913, 100927, 100931, 100937, 100943, 100957, -100981, 100987, 100999, 101009, 101021, 101027, 101051, 101063, 101081, 101089, -101107, 101111, 101113, 101117, 101119, 101141, 101149, 101159, 101161, 101173, -101183, 101197, 101203, 101207, 101209, 101221, 101267, 101273, 101279, 101281, -101287, 101293, 101323, 101333, 101341, 101347, 101359, 101363, 101377, 101383, -101399, 101411, 101419, 101429, 101449, 101467, 101477, 101483, 101489, 101501, -101503, 101513, 101527, 101531, 101533, 101537, 101561, 101573, 101581, 101599, -101603, 101611, 101627, 101641, 101653, 101663, 101681, 101693, 101701, 101719, -101723, 101737, 101741, 101747, 101749, 101771, 101789, 101797, 101807, 101833, -101837, 101839, 101863, 101869, 101873, 101879, 101891, 101917, 101921, 101929, -101939, 101957, 101963, 101977, 101987, 101999, 102001, 102013, 102019, 102023, -102031, 102043, 102059, 102061, 102071, 102077, 102079, 102101, 102103, 102107, -102121, 102139, 102149, 102161, 102181, 102191, 102197, 102199, 102203, 102217, -102229, 102233, 102241, 102251, 102253, 102259, 102293, 102299, 102301, 102317, -102329, 102337, 102359, 102367, 102397, 102407, 102409, 102433, 102437, 102451, -102461, 102481, 102497, 102499, 102503, 102523, 102533, 102539, 102547, 102551, -102559, 102563, 102587, 102593, 102607, 102611, 102643, 102647, 102653, 102667, -102673, 102677, 102679, 102701, 102761, 102763, 102769, 102793, 102797, 102811, -102829, 102841, 102859, 102871, 102877, 102881, 102911, 102913, 102929, 102931, -102953, 102967, 102983, 103001, 103007, 103043, 103049, 103067, 103069, 103079, -103087, 103091, 103093, 103099, 103123, 103141, 103171, 103177, 103183, 103217, -103231, 103237, 103289, 103291, 103307, 103319, 103333, 103349, 103357, 103387, -103391, 103393, 103399, 103409, 103421, 103423, 103451, 103457, 103471, 103483, -103511, 103529, 103549, 103553, 103561, 103567, 103573, 103577, 103583, 103591, -103613, 103619, 103643, 103651, 103657, 103669, 103681, 103687, 103699, 103703, -103723, 103769, 103787, 103801, 103811, 103813, 103837, 103841, 103843, 103867, -103889, 103903, 103913, 103919, 103951, 103963, 103967, 103969, 103979, 103981, -103991, 103993, 103997, 104003, 104009, 104021, 104033, 104047, 104053, 104059, -104087, 104089, 104107, 104113, 104119, 104123, 104147, 104149, 104161, 104173, -104179, 104183, 104207, 104231, 104233, 104239, 104243, 104281, 104287, 104297, -104309, 104311, 104323, 104327, 104347, 104369, 104381, 104383, 104393, 104399, -104417, 104459, 104471, 104473, 104479, 104491, 104513, 104527, 104537, 104543, -104549, 104551, 104561, 104579, 104593, 104597, 104623, 104639, 104651, 104659, +2, 3, 5, 7, 11, 13, 17, 19, 23, 29, +31, 37, 41, 43, 47, 53, 59, 61, 67, 71, +73, 79, 83, 89, 97, 101, 103, 107, 109, 113, +127, 131, 137, 139, 149, 151, 157, 163, 167, 173, +179, 181, 191, 193, 197, 199, 211, 223, 227, 229, +233, 239, 241, 251, 257, 263, 269, 271, 277, 281, +283, 293, 307, 311, 313, 317, 331, 337, 347, 349, +353, 359, 367, 373, 379, 383, 389, 397, 401, 409, +419, 421, 431, 433, 439, 443, 449, 457, 461, 463, +467, 479, 487, 491, 499, 503, 509, 521, 523, 541, +547, 557, 563, 569, 571, 577, 587, 593, 599, 601, +607, 613, 617, 619, 631, 641, 643, 647, 653, 659, +661, 673, 677, 683, 691, 701, 709, 719, 727, 733, +739, 743, 751, 757, 761, 769, 773, 787, 797, 809, +811, 821, 823, 827, 829, 839, 853, 857, 859, 863, +877, 881, 883, 887, 907, 911, 919, 929, 937, 941, +947, 953, 967, 971, 977, 983, 991, 997, 1009, 1013, +1019, 1021, 1031, 1033, 1039, 1049, 1051, 1061, 1063, 1069, +1087, 1091, 1093, 1097, 1103, 1109, 1117, 1123, 1129, 1151, +1153, 1163, 1171, 1181, 1187, 1193, 1201, 1213, 1217, 1223, +1229, 1231, 1237, 1249, 1259, 1277, 1279, 1283, 1289, 1291, +1297, 1301, 1303, 1307, 1319, 1321, 1327, 1361, 1367, 1373, +1381, 1399, 1409, 1423, 1427, 1429, 1433, 1439, 1447, 1451, +1453, 1459, 1471, 1481, 1483, 1487, 1489, 1493, 1499, 1511, +1523, 1531, 1543, 1549, 1553, 1559, 1567, 1571, 1579, 1583, +1597, 1601, 1607, 1609, 1613, 1619, 1621, 1627, 1637, 1657, +1663, 1667, 1669, 1693, 1697, 1699, 1709, 1721, 1723, 1733, +1741, 1747, 1753, 1759, 1777, 1783, 1787, 1789, 1801, 1811, +1823, 1831, 1847, 1861, 1867, 1871, 1873, 1877, 1879, 1889, +1901, 1907, 1913, 1931, 1933, 1949, 1951, 1973, 1979, 1987, +1993, 1997, 1999, 2003, 2011, 2017, 2027, 2029, 2039, 2053, +2063, 2069, 2081, 2083, 2087, 2089, 2099, 2111, 2113, 2129, +2131, 2137, 2141, 2143, 2153, 2161, 2179, 2203, 2207, 2213, +2221, 2237, 2239, 2243, 2251, 2267, 2269, 2273, 2281, 2287, +2293, 2297, 2309, 2311, 2333, 2339, 2341, 2347, 2351, 2357, +2371, 2377, 2381, 2383, 2389, 2393, 2399, 2411, 2417, 2423, +2437, 2441, 2447, 2459, 2467, 2473, 2477, 2503, 2521, 2531, +2539, 2543, 2549, 2551, 2557, 2579, 2591, 2593, 2609, 2617, +2621, 2633, 2647, 2657, 2659, 2663, 2671, 2677, 2683, 2687, +2689, 2693, 2699, 2707, 2711, 2713, 2719, 2729, 2731, 2741, +2749, 2753, 2767, 2777, 2789, 2791, 2797, 2801, 2803, 2819, +2833, 2837, 2843, 2851, 2857, 2861, 2879, 2887, 2897, 2903, +2909, 2917, 2927, 2939, 2953, 2957, 2963, 2969, 2971, 2999, +3001, 3011, 3019, 3023, 3037, 3041, 3049, 3061, 3067, 3079, +3083, 3089, 3109, 3119, 3121, 3137, 3163, 3167, 3169, 3181, +3187, 3191, 3203, 3209, 3217, 3221, 3229, 3251, 3253, 3257, +3259, 3271, 3299, 3301, 3307, 3313, 3319, 3323, 3329, 3331, +3343, 3347, 3359, 3361, 3371, 3373, 3389, 3391, 3407, 3413, +3433, 3449, 3457, 3461, 3463, 3467, 3469, 3491, 3499, 3511, +3517, 3527, 3529, 3533, 3539, 3541, 3547, 3557, 3559, 3571, +3581, 3583, 3593, 3607, 3613, 3617, 3623, 3631, 3637, 3643, +3659, 3671, 3673, 3677, 3691, 3697, 3701, 3709, 3719, 3727, +3733, 3739, 3761, 3767, 3769, 3779, 3793, 3797, 3803, 3821, +3823, 3833, 3847, 3851, 3853, 3863, 3877, 3881, 3889, 3907, +3911, 3917, 3919, 3923, 3929, 3931, 3943, 3947, 3967, 3989, +4001, 4003, 4007, 4013, 4019, 4021, 4027, 4049, 4051, 4057, +4073, 4079, 4091, 4093, 4099, 4111, 4127, 4129, 4133, 4139, +4153, 4157, 4159, 4177, 4201, 4211, 4217, 4219, 4229, 4231, +4241, 4243, 4253, 4259, 4261, 4271, 4273, 4283, 4289, 4297, +4327, 4337, 4339, 4349, 4357, 4363, 4373, 4391, 4397, 4409, +4421, 4423, 4441, 4447, 4451, 4457, 4463, 4481, 4483, 4493, +4507, 4513, 4517, 4519, 4523, 4547, 4549, 4561, 4567, 4583, +4591, 4597, 4603, 4621, 4637, 4639, 4643, 4649, 4651, 4657, +4663, 4673, 4679, 4691, 4703, 4721, 4723, 4729, 4733, 4751, +4759, 4783, 4787, 4789, 4793, 4799, 4801, 4813, 4817, 4831, +4861, 4871, 4877, 4889, 4903, 4909, 4919, 4931, 4933, 4937, +4943, 4951, 4957, 4967, 4969, 4973, 4987, 4993, 4999, 5003, +5009, 5011, 5021, 5023, 5039, 5051, 5059, 5077, 5081, 5087, +5099, 5101, 5107, 5113, 5119, 5147, 5153, 5167, 5171, 5179, +5189, 5197, 5209, 5227, 5231, 5233, 5237, 5261, 5273, 5279, +5281, 5297, 5303, 5309, 5323, 5333, 5347, 5351, 5381, 5387, +5393, 5399, 5407, 5413, 5417, 5419, 5431, 5437, 5441, 5443, +5449, 5471, 5477, 5479, 5483, 5501, 5503, 5507, 5519, 5521, +5527, 5531, 5557, 5563, 5569, 5573, 5581, 5591, 5623, 5639, +5641, 5647, 5651, 5653, 5657, 5659, 5669, 5683, 5689, 5693, +5701, 5711, 5717, 5737, 5741, 5743, 5749, 5779, 5783, 5791, +5801, 5807, 5813, 5821, 5827, 5839, 5843, 5849, 5851, 5857, +5861, 5867, 5869, 5879, 5881, 5897, 5903, 5923, 5927, 5939, +5953, 5981, 5987, 6007, 6011, 6029, 6037, 6043, 6047, 6053, +6067, 6073, 6079, 6089, 6091, 6101, 6113, 6121, 6131, 6133, +6143, 6151, 6163, 6173, 6197, 6199, 6203, 6211, 6217, 6221, +6229, 6247, 6257, 6263, 6269, 6271, 6277, 6287, 6299, 6301, +6311, 6317, 6323, 6329, 6337, 6343, 6353, 6359, 6361, 6367, +6373, 6379, 6389, 6397, 6421, 6427, 6449, 6451, 6469, 6473, +6481, 6491, 6521, 6529, 6547, 6551, 6553, 6563, 6569, 6571, +6577, 6581, 6599, 6607, 6619, 6637, 6653, 6659, 6661, 6673, +6679, 6689, 6691, 6701, 6703, 6709, 6719, 6733, 6737, 6761, +6763, 6779, 6781, 6791, 6793, 6803, 6823, 6827, 6829, 6833, +6841, 6857, 6863, 6869, 6871, 6883, 6899, 6907, 6911, 6917, +6947, 6949, 6959, 6961, 6967, 6971, 6977, 6983, 6991, 6997, +7001, 7013, 7019, 7027, 7039, 7043, 7057, 7069, 7079, 7103, +7109, 7121, 7127, 7129, 7151, 7159, 7177, 7187, 7193, 7207, +7211, 7213, 7219, 7229, 7237, 7243, 7247, 7253, 7283, 7297, +7307, 7309, 7321, 7331, 7333, 7349, 7351, 7369, 7393, 7411, +7417, 7433, 7451, 7457, 7459, 7477, 7481, 7487, 7489, 7499, +7507, 7517, 7523, 7529, 7537, 7541, 7547, 7549, 7559, 7561, +7573, 7577, 7583, 7589, 7591, 7603, 7607, 7621, 7639, 7643, +7649, 7669, 7673, 7681, 7687, 7691, 7699, 7703, 7717, 7723, +7727, 7741, 7753, 7757, 7759, 7789, 7793, 7817, 7823, 7829, +7841, 7853, 7867, 7873, 7877, 7879, 7883, 7901, 7907, 7919, +7927, 7933, 7937, 7949, 7951, 7963, 7993, 8009, 8011, 8017, +8039, 8053, 8059, 8069, 8081, 8087, 8089, 8093, 8101, 8111, +8117, 8123, 8147, 8161, 8167, 8171, 8179, 8191, 8209, 8219, +8221, 8231, 8233, 8237, 8243, 8263, 8269, 8273, 8287, 8291, +8293, 8297, 8311, 8317, 8329, 8353, 8363, 8369, 8377, 8387, +8389, 8419, 8423, 8429, 8431, 8443, 8447, 8461, 8467, 8501, +8513, 8521, 8527, 8537, 8539, 8543, 8563, 8573, 8581, 8597, +8599, 8609, 8623, 8627, 8629, 8641, 8647, 8663, 8669, 8677, +8681, 8689, 8693, 8699, 8707, 8713, 8719, 8731, 8737, 8741, +8747, 8753, 8761, 8779, 8783, 8803, 8807, 8819, 8821, 8831, +8837, 8839, 8849, 8861, 8863, 8867, 8887, 8893, 8923, 8929, +8933, 8941, 8951, 8963, 8969, 8971, 8999, 9001, 9007, 9011, +9013, 9029, 9041, 9043, 9049, 9059, 9067, 9091, 9103, 9109, +9127, 9133, 9137, 9151, 9157, 9161, 9173, 9181, 9187, 9199, +9203, 9209, 9221, 9227, 9239, 9241, 9257, 9277, 9281, 9283, +9293, 9311, 9319, 9323, 9337, 9341, 9343, 9349, 9371, 9377, +9391, 9397, 9403, 9413, 9419, 9421, 9431, 9433, 9437, 9439, +9461, 9463, 9467, 9473, 9479, 9491, 9497, 9511, 9521, 9533, +9539, 9547, 9551, 9587, 9601, 9613, 9619, 9623, 9629, 9631, +9643, 9649, 9661, 9677, 9679, 9689, 9697, 9719, 9721, 9733, +9739, 9743, 9749, 9767, 9769, 9781, 9787, 9791, 9803, 9811, +9817, 9829, 9833, 9839, 9851, 9857, 9859, 9871, 9883, 9887, +9901, 9907, 9923, 9929, 9931, 9941, 9949, 9967, 9973, 10007, +10009, 10037, 10039, 10061, 10067, 10069, 10079, 10091, 10093, 10099, +10103, 10111, 10133, 10139, 10141, 10151, 10159, 10163, 10169, 10177, +10181, 10193, 10211, 10223, 10243, 10247, 10253, 10259, 10267, 10271, +10273, 10289, 10301, 10303, 10313, 10321, 10331, 10333, 10337, 10343, +10357, 10369, 10391, 10399, 10427, 10429, 10433, 10453, 10457, 10459, +10463, 10477, 10487, 10499, 10501, 10513, 10529, 10531, 10559, 10567, +10589, 10597, 10601, 10607, 10613, 10627, 10631, 10639, 10651, 10657, +10663, 10667, 10687, 10691, 10709, 10711, 10723, 10729, 10733, 10739, +10753, 10771, 10781, 10789, 10799, 10831, 10837, 10847, 10853, 10859, +10861, 10867, 10883, 10889, 10891, 10903, 10909, 10937, 10939, 10949, +10957, 10973, 10979, 10987, 10993, 11003, 11027, 11047, 11057, 11059, +11069, 11071, 11083, 11087, 11093, 11113, 11117, 11119, 11131, 11149, +11159, 11161, 11171, 11173, 11177, 11197, 11213, 11239, 11243, 11251, +11257, 11261, 11273, 11279, 11287, 11299, 11311, 11317, 11321, 11329, +11351, 11353, 11369, 11383, 11393, 11399, 11411, 11423, 11437, 11443, +11447, 11467, 11471, 11483, 11489, 11491, 11497, 11503, 11519, 11527, +11549, 11551, 11579, 11587, 11593, 11597, 11617, 11621, 11633, 11657, +11677, 11681, 11689, 11699, 11701, 11717, 11719, 11731, 11743, 11777, +11779, 11783, 11789, 11801, 11807, 11813, 11821, 11827, 11831, 11833, +11839, 11863, 11867, 11887, 11897, 11903, 11909, 11923, 11927, 11933, +11939, 11941, 11953, 11959, 11969, 11971, 11981, 11987, 12007, 12011, +12037, 12041, 12043, 12049, 12071, 12073, 12097, 12101, 12107, 12109, +12113, 12119, 12143, 12149, 12157, 12161, 12163, 12197, 12203, 12211, +12227, 12239, 12241, 12251, 12253, 12263, 12269, 12277, 12281, 12289, +12301, 12323, 12329, 12343, 12347, 12373, 12377, 12379, 12391, 12401, +12409, 12413, 12421, 12433, 12437, 12451, 12457, 12473, 12479, 12487, +12491, 12497, 12503, 12511, 12517, 12527, 12539, 12541, 12547, 12553, +12569, 12577, 12583, 12589, 12601, 12611, 12613, 12619, 12637, 12641, +12647, 12653, 12659, 12671, 12689, 12697, 12703, 12713, 12721, 12739, +12743, 12757, 12763, 12781, 12791, 12799, 12809, 12821, 12823, 12829, +12841, 12853, 12889, 12893, 12899, 12907, 12911, 12917, 12919, 12923, +12941, 12953, 12959, 12967, 12973, 12979, 12983, 13001, 13003, 13007, +13009, 13033, 13037, 13043, 13049, 13063, 13093, 13099, 13103, 13109, +13121, 13127, 13147, 13151, 13159, 13163, 13171, 13177, 13183, 13187, +13217, 13219, 13229, 13241, 13249, 13259, 13267, 13291, 13297, 13309, +13313, 13327, 13331, 13337, 13339, 13367, 13381, 13397, 13399, 13411, +13417, 13421, 13441, 13451, 13457, 13463, 13469, 13477, 13487, 13499, +13513, 13523, 13537, 13553, 13567, 13577, 13591, 13597, 13613, 13619, +13627, 13633, 13649, 13669, 13679, 13681, 13687, 13691, 13693, 13697, +13709, 13711, 13721, 13723, 13729, 13751, 13757, 13759, 13763, 13781, +13789, 13799, 13807, 13829, 13831, 13841, 13859, 13873, 13877, 13879, +13883, 13901, 13903, 13907, 13913, 13921, 13931, 13933, 13963, 13967, +13997, 13999, 14009, 14011, 14029, 14033, 14051, 14057, 14071, 14081, +14083, 14087, 14107, 14143, 14149, 14153, 14159, 14173, 14177, 14197, +14207, 14221, 14243, 14249, 14251, 14281, 14293, 14303, 14321, 14323, +14327, 14341, 14347, 14369, 14387, 14389, 14401, 14407, 14411, 14419, +14423, 14431, 14437, 14447, 14449, 14461, 14479, 14489, 14503, 14519, +14533, 14537, 14543, 14549, 14551, 14557, 14561, 14563, 14591, 14593, +14621, 14627, 14629, 14633, 14639, 14653, 14657, 14669, 14683, 14699, +14713, 14717, 14723, 14731, 14737, 14741, 14747, 14753, 14759, 14767, +14771, 14779, 14783, 14797, 14813, 14821, 14827, 14831, 14843, 14851, +14867, 14869, 14879, 14887, 14891, 14897, 14923, 14929, 14939, 14947, +14951, 14957, 14969, 14983, 15013, 15017, 15031, 15053, 15061, 15073, +15077, 15083, 15091, 15101, 15107, 15121, 15131, 15137, 15139, 15149, +15161, 15173, 15187, 15193, 15199, 15217, 15227, 15233, 15241, 15259, +15263, 15269, 15271, 15277, 15287, 15289, 15299, 15307, 15313, 15319, +15329, 15331, 15349, 15359, 15361, 15373, 15377, 15383, 15391, 15401, +15413, 15427, 15439, 15443, 15451, 15461, 15467, 15473, 15493, 15497, +15511, 15527, 15541, 15551, 15559, 15569, 15581, 15583, 15601, 15607, +15619, 15629, 15641, 15643, 15647, 15649, 15661, 15667, 15671, 15679, +15683, 15727, 15731, 15733, 15737, 15739, 15749, 15761, 15767, 15773, +15787, 15791, 15797, 15803, 15809, 15817, 15823, 15859, 15877, 15881, +15887, 15889, 15901, 15907, 15913, 15919, 15923, 15937, 15959, 15971, +15973, 15991, 16001, 16007, 16033, 16057, 16061, 16063, 16067, 16069, +16073, 16087, 16091, 16097, 16103, 16111, 16127, 16139, 16141, 16183, +16187, 16189, 16193, 16217, 16223, 16229, 16231, 16249, 16253, 16267, +16273, 16301, 16319, 16333, 16339, 16349, 16361, 16363, 16369, 16381, +16411, 16417, 16421, 16427, 16433, 16447, 16451, 16453, 16477, 16481, +16487, 16493, 16519, 16529, 16547, 16553, 16561, 16567, 16573, 16603, +16607, 16619, 16631, 16633, 16649, 16651, 16657, 16661, 16673, 16691, +16693, 16699, 16703, 16729, 16741, 16747, 16759, 16763, 16787, 16811, +16823, 16829, 16831, 16843, 16871, 16879, 16883, 16889, 16901, 16903, +16921, 16927, 16931, 16937, 16943, 16963, 16979, 16981, 16987, 16993, +17011, 17021, 17027, 17029, 17033, 17041, 17047, 17053, 17077, 17093, +17099, 17107, 17117, 17123, 17137, 17159, 17167, 17183, 17189, 17191, +17203, 17207, 17209, 17231, 17239, 17257, 17291, 17293, 17299, 17317, +17321, 17327, 17333, 17341, 17351, 17359, 17377, 17383, 17387, 17389, +17393, 17401, 17417, 17419, 17431, 17443, 17449, 17467, 17471, 17477, +17483, 17489, 17491, 17497, 17509, 17519, 17539, 17551, 17569, 17573, +17579, 17581, 17597, 17599, 17609, 17623, 17627, 17657, 17659, 17669, +17681, 17683, 17707, 17713, 17729, 17737, 17747, 17749, 17761, 17783, +17789, 17791, 17807, 17827, 17837, 17839, 17851, 17863, 17881, 17891, +17903, 17909, 17911, 17921, 17923, 17929, 17939, 17957, 17959, 17971, +17977, 17981, 17987, 17989, 18013, 18041, 18043, 18047, 18049, 18059, +18061, 18077, 18089, 18097, 18119, 18121, 18127, 18131, 18133, 18143, +18149, 18169, 18181, 18191, 18199, 18211, 18217, 18223, 18229, 18233, +18251, 18253, 18257, 18269, 18287, 18289, 18301, 18307, 18311, 18313, +18329, 18341, 18353, 18367, 18371, 18379, 18397, 18401, 18413, 18427, +18433, 18439, 18443, 18451, 18457, 18461, 18481, 18493, 18503, 18517, +18521, 18523, 18539, 18541, 18553, 18583, 18587, 18593, 18617, 18637, +18661, 18671, 18679, 18691, 18701, 18713, 18719, 18731, 18743, 18749, +18757, 18773, 18787, 18793, 18797, 18803, 18839, 18859, 18869, 18899, +18911, 18913, 18917, 18919, 18947, 18959, 18973, 18979, 19001, 19009, +19013, 19031, 19037, 19051, 19069, 19073, 19079, 19081, 19087, 19121, +19139, 19141, 19157, 19163, 19181, 19183, 19207, 19211, 19213, 19219, +19231, 19237, 19249, 19259, 19267, 19273, 19289, 19301, 19309, 19319, +19333, 19373, 19379, 19381, 19387, 19391, 19403, 19417, 19421, 19423, +19427, 19429, 19433, 19441, 19447, 19457, 19463, 19469, 19471, 19477, +19483, 19489, 19501, 19507, 19531, 19541, 19543, 19553, 19559, 19571, +19577, 19583, 19597, 19603, 19609, 19661, 19681, 19687, 19697, 19699, +19709, 19717, 19727, 19739, 19751, 19753, 19759, 19763, 19777, 19793, +19801, 19813, 19819, 19841, 19843, 19853, 19861, 19867, 19889, 19891, +19913, 19919, 19927, 19937, 19949, 19961, 19963, 19973, 19979, 19991, +19993, 19997, 20011, 20021, 20023, 20029, 20047, 20051, 20063, 20071, +20089, 20101, 20107, 20113, 20117, 20123, 20129, 20143, 20147, 20149, +20161, 20173, 20177, 20183, 20201, 20219, 20231, 20233, 20249, 20261, +20269, 20287, 20297, 20323, 20327, 20333, 20341, 20347, 20353, 20357, +20359, 20369, 20389, 20393, 20399, 20407, 20411, 20431, 20441, 20443, +20477, 20479, 20483, 20507, 20509, 20521, 20533, 20543, 20549, 20551, +20563, 20593, 20599, 20611, 20627, 20639, 20641, 20663, 20681, 20693, +20707, 20717, 20719, 20731, 20743, 20747, 20749, 20753, 20759, 20771, +20773, 20789, 20807, 20809, 20849, 20857, 20873, 20879, 20887, 20897, +20899, 20903, 20921, 20929, 20939, 20947, 20959, 20963, 20981, 20983, +21001, 21011, 21013, 21017, 21019, 21023, 21031, 21059, 21061, 21067, +21089, 21101, 21107, 21121, 21139, 21143, 21149, 21157, 21163, 21169, +21179, 21187, 21191, 21193, 21211, 21221, 21227, 21247, 21269, 21277, +21283, 21313, 21317, 21319, 21323, 21341, 21347, 21377, 21379, 21383, +21391, 21397, 21401, 21407, 21419, 21433, 21467, 21481, 21487, 21491, +21493, 21499, 21503, 21517, 21521, 21523, 21529, 21557, 21559, 21563, +21569, 21577, 21587, 21589, 21599, 21601, 21611, 21613, 21617, 21647, +21649, 21661, 21673, 21683, 21701, 21713, 21727, 21737, 21739, 21751, +21757, 21767, 21773, 21787, 21799, 21803, 21817, 21821, 21839, 21841, +21851, 21859, 21863, 21871, 21881, 21893, 21911, 21929, 21937, 21943, +21961, 21977, 21991, 21997, 22003, 22013, 22027, 22031, 22037, 22039, +22051, 22063, 22067, 22073, 22079, 22091, 22093, 22109, 22111, 22123, +22129, 22133, 22147, 22153, 22157, 22159, 22171, 22189, 22193, 22229, +22247, 22259, 22271, 22273, 22277, 22279, 22283, 22291, 22303, 22307, +22343, 22349, 22367, 22369, 22381, 22391, 22397, 22409, 22433, 22441, +22447, 22453, 22469, 22481, 22483, 22501, 22511, 22531, 22541, 22543, +22549, 22567, 22571, 22573, 22613, 22619, 22621, 22637, 22639, 22643, +22651, 22669, 22679, 22691, 22697, 22699, 22709, 22717, 22721, 22727, +22739, 22741, 22751, 22769, 22777, 22783, 22787, 22807, 22811, 22817, +22853, 22859, 22861, 22871, 22877, 22901, 22907, 22921, 22937, 22943, +22961, 22963, 22973, 22993, 23003, 23011, 23017, 23021, 23027, 23029, +23039, 23041, 23053, 23057, 23059, 23063, 23071, 23081, 23087, 23099, +23117, 23131, 23143, 23159, 23167, 23173, 23189, 23197, 23201, 23203, +23209, 23227, 23251, 23269, 23279, 23291, 23293, 23297, 23311, 23321, +23327, 23333, 23339, 23357, 23369, 23371, 23399, 23417, 23431, 23447, +23459, 23473, 23497, 23509, 23531, 23537, 23539, 23549, 23557, 23561, +23563, 23567, 23581, 23593, 23599, 23603, 23609, 23623, 23627, 23629, +23633, 23663, 23669, 23671, 23677, 23687, 23689, 23719, 23741, 23743, +23747, 23753, 23761, 23767, 23773, 23789, 23801, 23813, 23819, 23827, +23831, 23833, 23857, 23869, 23873, 23879, 23887, 23893, 23899, 23909, +23911, 23917, 23929, 23957, 23971, 23977, 23981, 23993, 24001, 24007, +24019, 24023, 24029, 24043, 24049, 24061, 24071, 24077, 24083, 24091, +24097, 24103, 24107, 24109, 24113, 24121, 24133, 24137, 24151, 24169, +24179, 24181, 24197, 24203, 24223, 24229, 24239, 24247, 24251, 24281, +24317, 24329, 24337, 24359, 24371, 24373, 24379, 24391, 24407, 24413, +24419, 24421, 24439, 24443, 24469, 24473, 24481, 24499, 24509, 24517, +24527, 24533, 24547, 24551, 24571, 24593, 24611, 24623, 24631, 24659, +24671, 24677, 24683, 24691, 24697, 24709, 24733, 24749, 24763, 24767, +24781, 24793, 24799, 24809, 24821, 24841, 24847, 24851, 24859, 24877, +24889, 24907, 24917, 24919, 24923, 24943, 24953, 24967, 24971, 24977, +24979, 24989, 25013, 25031, 25033, 25037, 25057, 25073, 25087, 25097, +25111, 25117, 25121, 25127, 25147, 25153, 25163, 25169, 25171, 25183, +25189, 25219, 25229, 25237, 25243, 25247, 25253, 25261, 25301, 25303, +25307, 25309, 25321, 25339, 25343, 25349, 25357, 25367, 25373, 25391, +25409, 25411, 25423, 25439, 25447, 25453, 25457, 25463, 25469, 25471, +25523, 25537, 25541, 25561, 25577, 25579, 25583, 25589, 25601, 25603, +25609, 25621, 25633, 25639, 25643, 25657, 25667, 25673, 25679, 25693, +25703, 25717, 25733, 25741, 25747, 25759, 25763, 25771, 25793, 25799, +25801, 25819, 25841, 25847, 25849, 25867, 25873, 25889, 25903, 25913, +25919, 25931, 25933, 25939, 25943, 25951, 25969, 25981, 25997, 25999, +26003, 26017, 26021, 26029, 26041, 26053, 26083, 26099, 26107, 26111, +26113, 26119, 26141, 26153, 26161, 26171, 26177, 26183, 26189, 26203, +26209, 26227, 26237, 26249, 26251, 26261, 26263, 26267, 26293, 26297, +26309, 26317, 26321, 26339, 26347, 26357, 26371, 26387, 26393, 26399, +26407, 26417, 26423, 26431, 26437, 26449, 26459, 26479, 26489, 26497, +26501, 26513, 26539, 26557, 26561, 26573, 26591, 26597, 26627, 26633, +26641, 26647, 26669, 26681, 26683, 26687, 26693, 26699, 26701, 26711, +26713, 26717, 26723, 26729, 26731, 26737, 26759, 26777, 26783, 26801, +26813, 26821, 26833, 26839, 26849, 26861, 26863, 26879, 26881, 26891, +26893, 26903, 26921, 26927, 26947, 26951, 26953, 26959, 26981, 26987, +26993, 27011, 27017, 27031, 27043, 27059, 27061, 27067, 27073, 27077, +27091, 27103, 27107, 27109, 27127, 27143, 27179, 27191, 27197, 27211, +27239, 27241, 27253, 27259, 27271, 27277, 27281, 27283, 27299, 27329, +27337, 27361, 27367, 27397, 27407, 27409, 27427, 27431, 27437, 27449, +27457, 27479, 27481, 27487, 27509, 27527, 27529, 27539, 27541, 27551, +27581, 27583, 27611, 27617, 27631, 27647, 27653, 27673, 27689, 27691, +27697, 27701, 27733, 27737, 27739, 27743, 27749, 27751, 27763, 27767, +27773, 27779, 27791, 27793, 27799, 27803, 27809, 27817, 27823, 27827, +27847, 27851, 27883, 27893, 27901, 27917, 27919, 27941, 27943, 27947, +27953, 27961, 27967, 27983, 27997, 28001, 28019, 28027, 28031, 28051, +28057, 28069, 28081, 28087, 28097, 28099, 28109, 28111, 28123, 28151, +28163, 28181, 28183, 28201, 28211, 28219, 28229, 28277, 28279, 28283, +28289, 28297, 28307, 28309, 28319, 28349, 28351, 28387, 28393, 28403, +28409, 28411, 28429, 28433, 28439, 28447, 28463, 28477, 28493, 28499, +28513, 28517, 28537, 28541, 28547, 28549, 28559, 28571, 28573, 28579, +28591, 28597, 28603, 28607, 28619, 28621, 28627, 28631, 28643, 28649, +28657, 28661, 28663, 28669, 28687, 28697, 28703, 28711, 28723, 28729, +28751, 28753, 28759, 28771, 28789, 28793, 28807, 28813, 28817, 28837, +28843, 28859, 28867, 28871, 28879, 28901, 28909, 28921, 28927, 28933, +28949, 28961, 28979, 29009, 29017, 29021, 29023, 29027, 29033, 29059, +29063, 29077, 29101, 29123, 29129, 29131, 29137, 29147, 29153, 29167, +29173, 29179, 29191, 29201, 29207, 29209, 29221, 29231, 29243, 29251, +29269, 29287, 29297, 29303, 29311, 29327, 29333, 29339, 29347, 29363, +29383, 29387, 29389, 29399, 29401, 29411, 29423, 29429, 29437, 29443, +29453, 29473, 29483, 29501, 29527, 29531, 29537, 29567, 29569, 29573, +29581, 29587, 29599, 29611, 29629, 29633, 29641, 29663, 29669, 29671, +29683, 29717, 29723, 29741, 29753, 29759, 29761, 29789, 29803, 29819, +29833, 29837, 29851, 29863, 29867, 29873, 29879, 29881, 29917, 29921, +29927, 29947, 29959, 29983, 29989, 30011, 30013, 30029, 30047, 30059, +30071, 30089, 30091, 30097, 30103, 30109, 30113, 30119, 30133, 30137, +30139, 30161, 30169, 30181, 30187, 30197, 30203, 30211, 30223, 30241, +30253, 30259, 30269, 30271, 30293, 30307, 30313, 30319, 30323, 30341, +30347, 30367, 30389, 30391, 30403, 30427, 30431, 30449, 30467, 30469, +30491, 30493, 30497, 30509, 30517, 30529, 30539, 30553, 30557, 30559, +30577, 30593, 30631, 30637, 30643, 30649, 30661, 30671, 30677, 30689, +30697, 30703, 30707, 30713, 30727, 30757, 30763, 30773, 30781, 30803, +30809, 30817, 30829, 30839, 30841, 30851, 30853, 30859, 30869, 30871, +30881, 30893, 30911, 30931, 30937, 30941, 30949, 30971, 30977, 30983, +31013, 31019, 31033, 31039, 31051, 31063, 31069, 31079, 31081, 31091, +31121, 31123, 31139, 31147, 31151, 31153, 31159, 31177, 31181, 31183, +31189, 31193, 31219, 31223, 31231, 31237, 31247, 31249, 31253, 31259, +31267, 31271, 31277, 31307, 31319, 31321, 31327, 31333, 31337, 31357, +31379, 31387, 31391, 31393, 31397, 31469, 31477, 31481, 31489, 31511, +31513, 31517, 31531, 31541, 31543, 31547, 31567, 31573, 31583, 31601, +31607, 31627, 31643, 31649, 31657, 31663, 31667, 31687, 31699, 31721, +31723, 31727, 31729, 31741, 31751, 31769, 31771, 31793, 31799, 31817, +31847, 31849, 31859, 31873, 31883, 31891, 31907, 31957, 31963, 31973, +31981, 31991, 32003, 32009, 32027, 32029, 32051, 32057, 32059, 32063, +32069, 32077, 32083, 32089, 32099, 32117, 32119, 32141, 32143, 32159, +32173, 32183, 32189, 32191, 32203, 32213, 32233, 32237, 32251, 32257, +32261, 32297, 32299, 32303, 32309, 32321, 32323, 32327, 32341, 32353, +32359, 32363, 32369, 32371, 32377, 32381, 32401, 32411, 32413, 32423, +32429, 32441, 32443, 32467, 32479, 32491, 32497, 32503, 32507, 32531, +32533, 32537, 32561, 32563, 32569, 32573, 32579, 32587, 32603, 32609, +32611, 32621, 32633, 32647, 32653, 32687, 32693, 32707, 32713, 32717, +32719, 32749, 32771, 32779, 32783, 32789, 32797, 32801, 32803, 32831, +32833, 32839, 32843, 32869, 32887, 32909, 32911, 32917, 32933, 32939, +32941, 32957, 32969, 32971, 32983, 32987, 32993, 32999, 33013, 33023, +33029, 33037, 33049, 33053, 33071, 33073, 33083, 33091, 33107, 33113, +33119, 33149, 33151, 33161, 33179, 33181, 33191, 33199, 33203, 33211, +33223, 33247, 33287, 33289, 33301, 33311, 33317, 33329, 33331, 33343, +33347, 33349, 33353, 33359, 33377, 33391, 33403, 33409, 33413, 33427, +33457, 33461, 33469, 33479, 33487, 33493, 33503, 33521, 33529, 33533, +33547, 33563, 33569, 33577, 33581, 33587, 33589, 33599, 33601, 33613, +33617, 33619, 33623, 33629, 33637, 33641, 33647, 33679, 33703, 33713, +33721, 33739, 33749, 33751, 33757, 33767, 33769, 33773, 33791, 33797, +33809, 33811, 33827, 33829, 33851, 33857, 33863, 33871, 33889, 33893, +33911, 33923, 33931, 33937, 33941, 33961, 33967, 33997, 34019, 34031, +34033, 34039, 34057, 34061, 34123, 34127, 34129, 34141, 34147, 34157, +34159, 34171, 34183, 34211, 34213, 34217, 34231, 34253, 34259, 34261, +34267, 34273, 34283, 34297, 34301, 34303, 34313, 34319, 34327, 34337, +34351, 34361, 34367, 34369, 34381, 34403, 34421, 34429, 34439, 34457, +34469, 34471, 34483, 34487, 34499, 34501, 34511, 34513, 34519, 34537, +34543, 34549, 34583, 34589, 34591, 34603, 34607, 34613, 34631, 34649, +34651, 34667, 34673, 34679, 34687, 34693, 34703, 34721, 34729, 34739, +34747, 34757, 34759, 34763, 34781, 34807, 34819, 34841, 34843, 34847, +34849, 34871, 34877, 34883, 34897, 34913, 34919, 34939, 34949, 34961, +34963, 34981, 35023, 35027, 35051, 35053, 35059, 35069, 35081, 35083, +35089, 35099, 35107, 35111, 35117, 35129, 35141, 35149, 35153, 35159, +35171, 35201, 35221, 35227, 35251, 35257, 35267, 35279, 35281, 35291, +35311, 35317, 35323, 35327, 35339, 35353, 35363, 35381, 35393, 35401, +35407, 35419, 35423, 35437, 35447, 35449, 35461, 35491, 35507, 35509, +35521, 35527, 35531, 35533, 35537, 35543, 35569, 35573, 35591, 35593, +35597, 35603, 35617, 35671, 35677, 35729, 35731, 35747, 35753, 35759, +35771, 35797, 35801, 35803, 35809, 35831, 35837, 35839, 35851, 35863, +35869, 35879, 35897, 35899, 35911, 35923, 35933, 35951, 35963, 35969, +35977, 35983, 35993, 35999, 36007, 36011, 36013, 36017, 36037, 36061, +36067, 36073, 36083, 36097, 36107, 36109, 36131, 36137, 36151, 36161, +36187, 36191, 36209, 36217, 36229, 36241, 36251, 36263, 36269, 36277, +36293, 36299, 36307, 36313, 36319, 36341, 36343, 36353, 36373, 36383, +36389, 36433, 36451, 36457, 36467, 36469, 36473, 36479, 36493, 36497, +36523, 36527, 36529, 36541, 36551, 36559, 36563, 36571, 36583, 36587, +36599, 36607, 36629, 36637, 36643, 36653, 36671, 36677, 36683, 36691, +36697, 36709, 36713, 36721, 36739, 36749, 36761, 36767, 36779, 36781, +36787, 36791, 36793, 36809, 36821, 36833, 36847, 36857, 36871, 36877, +36887, 36899, 36901, 36913, 36919, 36923, 36929, 36931, 36943, 36947, +36973, 36979, 36997, 37003, 37013, 37019, 37021, 37039, 37049, 37057, +37061, 37087, 37097, 37117, 37123, 37139, 37159, 37171, 37181, 37189, +37199, 37201, 37217, 37223, 37243, 37253, 37273, 37277, 37307, 37309, +37313, 37321, 37337, 37339, 37357, 37361, 37363, 37369, 37379, 37397, +37409, 37423, 37441, 37447, 37463, 37483, 37489, 37493, 37501, 37507, +37511, 37517, 37529, 37537, 37547, 37549, 37561, 37567, 37571, 37573, +37579, 37589, 37591, 37607, 37619, 37633, 37643, 37649, 37657, 37663, +37691, 37693, 37699, 37717, 37747, 37781, 37783, 37799, 37811, 37813, +37831, 37847, 37853, 37861, 37871, 37879, 37889, 37897, 37907, 37951, +37957, 37963, 37967, 37987, 37991, 37993, 37997, 38011, 38039, 38047, +38053, 38069, 38083, 38113, 38119, 38149, 38153, 38167, 38177, 38183, +38189, 38197, 38201, 38219, 38231, 38237, 38239, 38261, 38273, 38281, +38287, 38299, 38303, 38317, 38321, 38327, 38329, 38333, 38351, 38371, +38377, 38393, 38431, 38447, 38449, 38453, 38459, 38461, 38501, 38543, +38557, 38561, 38567, 38569, 38593, 38603, 38609, 38611, 38629, 38639, +38651, 38653, 38669, 38671, 38677, 38693, 38699, 38707, 38711, 38713, +38723, 38729, 38737, 38747, 38749, 38767, 38783, 38791, 38803, 38821, +38833, 38839, 38851, 38861, 38867, 38873, 38891, 38903, 38917, 38921, +38923, 38933, 38953, 38959, 38971, 38977, 38993, 39019, 39023, 39041, +39043, 39047, 39079, 39089, 39097, 39103, 39107, 39113, 39119, 39133, +39139, 39157, 39161, 39163, 39181, 39191, 39199, 39209, 39217, 39227, +39229, 39233, 39239, 39241, 39251, 39293, 39301, 39313, 39317, 39323, +39341, 39343, 39359, 39367, 39371, 39373, 39383, 39397, 39409, 39419, +39439, 39443, 39451, 39461, 39499, 39503, 39509, 39511, 39521, 39541, +39551, 39563, 39569, 39581, 39607, 39619, 39623, 39631, 39659, 39667, +39671, 39679, 39703, 39709, 39719, 39727, 39733, 39749, 39761, 39769, +39779, 39791, 39799, 39821, 39827, 39829, 39839, 39841, 39847, 39857, +39863, 39869, 39877, 39883, 39887, 39901, 39929, 39937, 39953, 39971, +39979, 39983, 39989, 40009, 40013, 40031, 40037, 40039, 40063, 40087, +40093, 40099, 40111, 40123, 40127, 40129, 40151, 40153, 40163, 40169, +40177, 40189, 40193, 40213, 40231, 40237, 40241, 40253, 40277, 40283, +40289, 40343, 40351, 40357, 40361, 40387, 40423, 40427, 40429, 40433, +40459, 40471, 40483, 40487, 40493, 40499, 40507, 40519, 40529, 40531, +40543, 40559, 40577, 40583, 40591, 40597, 40609, 40627, 40637, 40639, +40693, 40697, 40699, 40709, 40739, 40751, 40759, 40763, 40771, 40787, +40801, 40813, 40819, 40823, 40829, 40841, 40847, 40849, 40853, 40867, +40879, 40883, 40897, 40903, 40927, 40933, 40939, 40949, 40961, 40973, +40993, 41011, 41017, 41023, 41039, 41047, 41051, 41057, 41077, 41081, +41113, 41117, 41131, 41141, 41143, 41149, 41161, 41177, 41179, 41183, +41189, 41201, 41203, 41213, 41221, 41227, 41231, 41233, 41243, 41257, +41263, 41269, 41281, 41299, 41333, 41341, 41351, 41357, 41381, 41387, +41389, 41399, 41411, 41413, 41443, 41453, 41467, 41479, 41491, 41507, +41513, 41519, 41521, 41539, 41543, 41549, 41579, 41593, 41597, 41603, +41609, 41611, 41617, 41621, 41627, 41641, 41647, 41651, 41659, 41669, +41681, 41687, 41719, 41729, 41737, 41759, 41761, 41771, 41777, 41801, +41809, 41813, 41843, 41849, 41851, 41863, 41879, 41887, 41893, 41897, +41903, 41911, 41927, 41941, 41947, 41953, 41957, 41959, 41969, 41981, +41983, 41999, 42013, 42017, 42019, 42023, 42043, 42061, 42071, 42073, +42083, 42089, 42101, 42131, 42139, 42157, 42169, 42179, 42181, 42187, +42193, 42197, 42209, 42221, 42223, 42227, 42239, 42257, 42281, 42283, +42293, 42299, 42307, 42323, 42331, 42337, 42349, 42359, 42373, 42379, +42391, 42397, 42403, 42407, 42409, 42433, 42437, 42443, 42451, 42457, +42461, 42463, 42467, 42473, 42487, 42491, 42499, 42509, 42533, 42557, +42569, 42571, 42577, 42589, 42611, 42641, 42643, 42649, 42667, 42677, +42683, 42689, 42697, 42701, 42703, 42709, 42719, 42727, 42737, 42743, +42751, 42767, 42773, 42787, 42793, 42797, 42821, 42829, 42839, 42841, +42853, 42859, 42863, 42899, 42901, 42923, 42929, 42937, 42943, 42953, +42961, 42967, 42979, 42989, 43003, 43013, 43019, 43037, 43049, 43051, +43063, 43067, 43093, 43103, 43117, 43133, 43151, 43159, 43177, 43189, +43201, 43207, 43223, 43237, 43261, 43271, 43283, 43291, 43313, 43319, +43321, 43331, 43391, 43397, 43399, 43403, 43411, 43427, 43441, 43451, +43457, 43481, 43487, 43499, 43517, 43541, 43543, 43573, 43577, 43579, +43591, 43597, 43607, 43609, 43613, 43627, 43633, 43649, 43651, 43661, +43669, 43691, 43711, 43717, 43721, 43753, 43759, 43777, 43781, 43783, +43787, 43789, 43793, 43801, 43853, 43867, 43889, 43891, 43913, 43933, +43943, 43951, 43961, 43963, 43969, 43973, 43987, 43991, 43997, 44017, +44021, 44027, 44029, 44041, 44053, 44059, 44071, 44087, 44089, 44101, +44111, 44119, 44123, 44129, 44131, 44159, 44171, 44179, 44189, 44201, +44203, 44207, 44221, 44249, 44257, 44263, 44267, 44269, 44273, 44279, +44281, 44293, 44351, 44357, 44371, 44381, 44383, 44389, 44417, 44449, +44453, 44483, 44491, 44497, 44501, 44507, 44519, 44531, 44533, 44537, +44543, 44549, 44563, 44579, 44587, 44617, 44621, 44623, 44633, 44641, +44647, 44651, 44657, 44683, 44687, 44699, 44701, 44711, 44729, 44741, +44753, 44771, 44773, 44777, 44789, 44797, 44809, 44819, 44839, 44843, +44851, 44867, 44879, 44887, 44893, 44909, 44917, 44927, 44939, 44953, +44959, 44963, 44971, 44983, 44987, 45007, 45013, 45053, 45061, 45077, +45083, 45119, 45121, 45127, 45131, 45137, 45139, 45161, 45179, 45181, +45191, 45197, 45233, 45247, 45259, 45263, 45281, 45289, 45293, 45307, +45317, 45319, 45329, 45337, 45341, 45343, 45361, 45377, 45389, 45403, +45413, 45427, 45433, 45439, 45481, 45491, 45497, 45503, 45523, 45533, +45541, 45553, 45557, 45569, 45587, 45589, 45599, 45613, 45631, 45641, +45659, 45667, 45673, 45677, 45691, 45697, 45707, 45737, 45751, 45757, +45763, 45767, 45779, 45817, 45821, 45823, 45827, 45833, 45841, 45853, +45863, 45869, 45887, 45893, 45943, 45949, 45953, 45959, 45971, 45979, +45989, 46021, 46027, 46049, 46051, 46061, 46073, 46091, 46093, 46099, +46103, 46133, 46141, 46147, 46153, 46171, 46181, 46183, 46187, 46199, +46219, 46229, 46237, 46261, 46271, 46273, 46279, 46301, 46307, 46309, +46327, 46337, 46349, 46351, 46381, 46399, 46411, 46439, 46441, 46447, +46451, 46457, 46471, 46477, 46489, 46499, 46507, 46511, 46523, 46549, +46559, 46567, 46573, 46589, 46591, 46601, 46619, 46633, 46639, 46643, +46649, 46663, 46679, 46681, 46687, 46691, 46703, 46723, 46727, 46747, +46751, 46757, 46769, 46771, 46807, 46811, 46817, 46819, 46829, 46831, +46853, 46861, 46867, 46877, 46889, 46901, 46919, 46933, 46957, 46993, +46997, 47017, 47041, 47051, 47057, 47059, 47087, 47093, 47111, 47119, +47123, 47129, 47137, 47143, 47147, 47149, 47161, 47189, 47207, 47221, +47237, 47251, 47269, 47279, 47287, 47293, 47297, 47303, 47309, 47317, +47339, 47351, 47353, 47363, 47381, 47387, 47389, 47407, 47417, 47419, +47431, 47441, 47459, 47491, 47497, 47501, 47507, 47513, 47521, 47527, +47533, 47543, 47563, 47569, 47581, 47591, 47599, 47609, 47623, 47629, +47639, 47653, 47657, 47659, 47681, 47699, 47701, 47711, 47713, 47717, +47737, 47741, 47743, 47777, 47779, 47791, 47797, 47807, 47809, 47819, +47837, 47843, 47857, 47869, 47881, 47903, 47911, 47917, 47933, 47939, +47947, 47951, 47963, 47969, 47977, 47981, 48017, 48023, 48029, 48049, +48073, 48079, 48091, 48109, 48119, 48121, 48131, 48157, 48163, 48179, +48187, 48193, 48197, 48221, 48239, 48247, 48259, 48271, 48281, 48299, +48311, 48313, 48337, 48341, 48353, 48371, 48383, 48397, 48407, 48409, +48413, 48437, 48449, 48463, 48473, 48479, 48481, 48487, 48491, 48497, +48523, 48527, 48533, 48539, 48541, 48563, 48571, 48589, 48593, 48611, +48619, 48623, 48647, 48649, 48661, 48673, 48677, 48679, 48731, 48733, +48751, 48757, 48761, 48767, 48779, 48781, 48787, 48799, 48809, 48817, +48821, 48823, 48847, 48857, 48859, 48869, 48871, 48883, 48889, 48907, +48947, 48953, 48973, 48989, 48991, 49003, 49009, 49019, 49031, 49033, +49037, 49043, 49057, 49069, 49081, 49103, 49109, 49117, 49121, 49123, +49139, 49157, 49169, 49171, 49177, 49193, 49199, 49201, 49207, 49211, +49223, 49253, 49261, 49277, 49279, 49297, 49307, 49331, 49333, 49339, +49363, 49367, 49369, 49391, 49393, 49409, 49411, 49417, 49429, 49433, +49451, 49459, 49463, 49477, 49481, 49499, 49523, 49529, 49531, 49537, +49547, 49549, 49559, 49597, 49603, 49613, 49627, 49633, 49639, 49663, +49667, 49669, 49681, 49697, 49711, 49727, 49739, 49741, 49747, 49757, +49783, 49787, 49789, 49801, 49807, 49811, 49823, 49831, 49843, 49853, +49871, 49877, 49891, 49919, 49921, 49927, 49937, 49939, 49943, 49957, +49991, 49993, 49999, 50021, 50023, 50033, 50047, 50051, 50053, 50069, +50077, 50087, 50093, 50101, 50111, 50119, 50123, 50129, 50131, 50147, +50153, 50159, 50177, 50207, 50221, 50227, 50231, 50261, 50263, 50273, +50287, 50291, 50311, 50321, 50329, 50333, 50341, 50359, 50363, 50377, +50383, 50387, 50411, 50417, 50423, 50441, 50459, 50461, 50497, 50503, +50513, 50527, 50539, 50543, 50549, 50551, 50581, 50587, 50591, 50593, +50599, 50627, 50647, 50651, 50671, 50683, 50707, 50723, 50741, 50753, +50767, 50773, 50777, 50789, 50821, 50833, 50839, 50849, 50857, 50867, +50873, 50891, 50893, 50909, 50923, 50929, 50951, 50957, 50969, 50971, +50989, 50993, 51001, 51031, 51043, 51047, 51059, 51061, 51071, 51109, +51131, 51133, 51137, 51151, 51157, 51169, 51193, 51197, 51199, 51203, +51217, 51229, 51239, 51241, 51257, 51263, 51283, 51287, 51307, 51329, +51341, 51343, 51347, 51349, 51361, 51383, 51407, 51413, 51419, 51421, +51427, 51431, 51437, 51439, 51449, 51461, 51473, 51479, 51481, 51487, +51503, 51511, 51517, 51521, 51539, 51551, 51563, 51577, 51581, 51593, +51599, 51607, 51613, 51631, 51637, 51647, 51659, 51673, 51679, 51683, +51691, 51713, 51719, 51721, 51749, 51767, 51769, 51787, 51797, 51803, +51817, 51827, 51829, 51839, 51853, 51859, 51869, 51871, 51893, 51899, +51907, 51913, 51929, 51941, 51949, 51971, 51973, 51977, 51991, 52009, +52021, 52027, 52051, 52057, 52067, 52069, 52081, 52103, 52121, 52127, +52147, 52153, 52163, 52177, 52181, 52183, 52189, 52201, 52223, 52237, +52249, 52253, 52259, 52267, 52289, 52291, 52301, 52313, 52321, 52361, +52363, 52369, 52379, 52387, 52391, 52433, 52453, 52457, 52489, 52501, +52511, 52517, 52529, 52541, 52543, 52553, 52561, 52567, 52571, 52579, +52583, 52609, 52627, 52631, 52639, 52667, 52673, 52691, 52697, 52709, +52711, 52721, 52727, 52733, 52747, 52757, 52769, 52783, 52807, 52813, +52817, 52837, 52859, 52861, 52879, 52883, 52889, 52901, 52903, 52919, +52937, 52951, 52957, 52963, 52967, 52973, 52981, 52999, 53003, 53017, +53047, 53051, 53069, 53077, 53087, 53089, 53093, 53101, 53113, 53117, +53129, 53147, 53149, 53161, 53171, 53173, 53189, 53197, 53201, 53231, +53233, 53239, 53267, 53269, 53279, 53281, 53299, 53309, 53323, 53327, +53353, 53359, 53377, 53381, 53401, 53407, 53411, 53419, 53437, 53441, +53453, 53479, 53503, 53507, 53527, 53549, 53551, 53569, 53591, 53593, +53597, 53609, 53611, 53617, 53623, 53629, 53633, 53639, 53653, 53657, +53681, 53693, 53699, 53717, 53719, 53731, 53759, 53773, 53777, 53783, +53791, 53813, 53819, 53831, 53849, 53857, 53861, 53881, 53887, 53891, +53897, 53899, 53917, 53923, 53927, 53939, 53951, 53959, 53987, 53993, +54001, 54011, 54013, 54037, 54049, 54059, 54083, 54091, 54101, 54121, +54133, 54139, 54151, 54163, 54167, 54181, 54193, 54217, 54251, 54269, +54277, 54287, 54293, 54311, 54319, 54323, 54331, 54347, 54361, 54367, +54371, 54377, 54401, 54403, 54409, 54413, 54419, 54421, 54437, 54443, +54449, 54469, 54493, 54497, 54499, 54503, 54517, 54521, 54539, 54541, +54547, 54559, 54563, 54577, 54581, 54583, 54601, 54617, 54623, 54629, +54631, 54647, 54667, 54673, 54679, 54709, 54713, 54721, 54727, 54751, +54767, 54773, 54779, 54787, 54799, 54829, 54833, 54851, 54869, 54877, +54881, 54907, 54917, 54919, 54941, 54949, 54959, 54973, 54979, 54983, +55001, 55009, 55021, 55049, 55051, 55057, 55061, 55073, 55079, 55103, +55109, 55117, 55127, 55147, 55163, 55171, 55201, 55207, 55213, 55217, +55219, 55229, 55243, 55249, 55259, 55291, 55313, 55331, 55333, 55337, +55339, 55343, 55351, 55373, 55381, 55399, 55411, 55439, 55441, 55457, +55469, 55487, 55501, 55511, 55529, 55541, 55547, 55579, 55589, 55603, +55609, 55619, 55621, 55631, 55633, 55639, 55661, 55663, 55667, 55673, +55681, 55691, 55697, 55711, 55717, 55721, 55733, 55763, 55787, 55793, +55799, 55807, 55813, 55817, 55819, 55823, 55829, 55837, 55843, 55849, +55871, 55889, 55897, 55901, 55903, 55921, 55927, 55931, 55933, 55949, +55967, 55987, 55997, 56003, 56009, 56039, 56041, 56053, 56081, 56087, +56093, 56099, 56101, 56113, 56123, 56131, 56149, 56167, 56171, 56179, +56197, 56207, 56209, 56237, 56239, 56249, 56263, 56267, 56269, 56299, +56311, 56333, 56359, 56369, 56377, 56383, 56393, 56401, 56417, 56431, +56437, 56443, 56453, 56467, 56473, 56477, 56479, 56489, 56501, 56503, +56509, 56519, 56527, 56531, 56533, 56543, 56569, 56591, 56597, 56599, +56611, 56629, 56633, 56659, 56663, 56671, 56681, 56687, 56701, 56711, +56713, 56731, 56737, 56747, 56767, 56773, 56779, 56783, 56807, 56809, +56813, 56821, 56827, 56843, 56857, 56873, 56891, 56893, 56897, 56909, +56911, 56921, 56923, 56929, 56941, 56951, 56957, 56963, 56983, 56989, +56993, 56999, 57037, 57041, 57047, 57059, 57073, 57077, 57089, 57097, +57107, 57119, 57131, 57139, 57143, 57149, 57163, 57173, 57179, 57191, +57193, 57203, 57221, 57223, 57241, 57251, 57259, 57269, 57271, 57283, +57287, 57301, 57329, 57331, 57347, 57349, 57367, 57373, 57383, 57389, +57397, 57413, 57427, 57457, 57467, 57487, 57493, 57503, 57527, 57529, +57557, 57559, 57571, 57587, 57593, 57601, 57637, 57641, 57649, 57653, +57667, 57679, 57689, 57697, 57709, 57713, 57719, 57727, 57731, 57737, +57751, 57773, 57781, 57787, 57791, 57793, 57803, 57809, 57829, 57839, +57847, 57853, 57859, 57881, 57899, 57901, 57917, 57923, 57943, 57947, +57973, 57977, 57991, 58013, 58027, 58031, 58043, 58049, 58057, 58061, +58067, 58073, 58099, 58109, 58111, 58129, 58147, 58151, 58153, 58169, +58171, 58189, 58193, 58199, 58207, 58211, 58217, 58229, 58231, 58237, +58243, 58271, 58309, 58313, 58321, 58337, 58363, 58367, 58369, 58379, +58391, 58393, 58403, 58411, 58417, 58427, 58439, 58441, 58451, 58453, +58477, 58481, 58511, 58537, 58543, 58549, 58567, 58573, 58579, 58601, +58603, 58613, 58631, 58657, 58661, 58679, 58687, 58693, 58699, 58711, +58727, 58733, 58741, 58757, 58763, 58771, 58787, 58789, 58831, 58889, +58897, 58901, 58907, 58909, 58913, 58921, 58937, 58943, 58963, 58967, +58979, 58991, 58997, 59009, 59011, 59021, 59023, 59029, 59051, 59053, +59063, 59069, 59077, 59083, 59093, 59107, 59113, 59119, 59123, 59141, +59149, 59159, 59167, 59183, 59197, 59207, 59209, 59219, 59221, 59233, +59239, 59243, 59263, 59273, 59281, 59333, 59341, 59351, 59357, 59359, +59369, 59377, 59387, 59393, 59399, 59407, 59417, 59419, 59441, 59443, +59447, 59453, 59467, 59471, 59473, 59497, 59509, 59513, 59539, 59557, +59561, 59567, 59581, 59611, 59617, 59621, 59627, 59629, 59651, 59659, +59663, 59669, 59671, 59693, 59699, 59707, 59723, 59729, 59743, 59747, +59753, 59771, 59779, 59791, 59797, 59809, 59833, 59863, 59879, 59887, +59921, 59929, 59951, 59957, 59971, 59981, 59999, 60013, 60017, 60029, +60037, 60041, 60077, 60083, 60089, 60091, 60101, 60103, 60107, 60127, +60133, 60139, 60149, 60161, 60167, 60169, 60209, 60217, 60223, 60251, +60257, 60259, 60271, 60289, 60293, 60317, 60331, 60337, 60343, 60353, +60373, 60383, 60397, 60413, 60427, 60443, 60449, 60457, 60493, 60497, +60509, 60521, 60527, 60539, 60589, 60601, 60607, 60611, 60617, 60623, +60631, 60637, 60647, 60649, 60659, 60661, 60679, 60689, 60703, 60719, +60727, 60733, 60737, 60757, 60761, 60763, 60773, 60779, 60793, 60811, +60821, 60859, 60869, 60887, 60889, 60899, 60901, 60913, 60917, 60919, +60923, 60937, 60943, 60953, 60961, 61001, 61007, 61027, 61031, 61043, +61051, 61057, 61091, 61099, 61121, 61129, 61141, 61151, 61153, 61169, +61211, 61223, 61231, 61253, 61261, 61283, 61291, 61297, 61331, 61333, +61339, 61343, 61357, 61363, 61379, 61381, 61403, 61409, 61417, 61441, +61463, 61469, 61471, 61483, 61487, 61493, 61507, 61511, 61519, 61543, +61547, 61553, 61559, 61561, 61583, 61603, 61609, 61613, 61627, 61631, +61637, 61643, 61651, 61657, 61667, 61673, 61681, 61687, 61703, 61717, +61723, 61729, 61751, 61757, 61781, 61813, 61819, 61837, 61843, 61861, +61871, 61879, 61909, 61927, 61933, 61949, 61961, 61967, 61979, 61981, +61987, 61991, 62003, 62011, 62017, 62039, 62047, 62053, 62057, 62071, +62081, 62099, 62119, 62129, 62131, 62137, 62141, 62143, 62171, 62189, +62191, 62201, 62207, 62213, 62219, 62233, 62273, 62297, 62299, 62303, +62311, 62323, 62327, 62347, 62351, 62383, 62401, 62417, 62423, 62459, +62467, 62473, 62477, 62483, 62497, 62501, 62507, 62533, 62539, 62549, +62563, 62581, 62591, 62597, 62603, 62617, 62627, 62633, 62639, 62653, +62659, 62683, 62687, 62701, 62723, 62731, 62743, 62753, 62761, 62773, +62791, 62801, 62819, 62827, 62851, 62861, 62869, 62873, 62897, 62903, +62921, 62927, 62929, 62939, 62969, 62971, 62981, 62983, 62987, 62989, +63029, 63031, 63059, 63067, 63073, 63079, 63097, 63103, 63113, 63127, +63131, 63149, 63179, 63197, 63199, 63211, 63241, 63247, 63277, 63281, +63299, 63311, 63313, 63317, 63331, 63337, 63347, 63353, 63361, 63367, +63377, 63389, 63391, 63397, 63409, 63419, 63421, 63439, 63443, 63463, +63467, 63473, 63487, 63493, 63499, 63521, 63527, 63533, 63541, 63559, +63577, 63587, 63589, 63599, 63601, 63607, 63611, 63617, 63629, 63647, +63649, 63659, 63667, 63671, 63689, 63691, 63697, 63703, 63709, 63719, +63727, 63737, 63743, 63761, 63773, 63781, 63793, 63799, 63803, 63809, +63823, 63839, 63841, 63853, 63857, 63863, 63901, 63907, 63913, 63929, +63949, 63977, 63997, 64007, 64013, 64019, 64033, 64037, 64063, 64067, +64081, 64091, 64109, 64123, 64151, 64153, 64157, 64171, 64187, 64189, +64217, 64223, 64231, 64237, 64271, 64279, 64283, 64301, 64303, 64319, +64327, 64333, 64373, 64381, 64399, 64403, 64433, 64439, 64451, 64453, +64483, 64489, 64499, 64513, 64553, 64567, 64577, 64579, 64591, 64601, +64609, 64613, 64621, 64627, 64633, 64661, 64663, 64667, 64679, 64693, +64709, 64717, 64747, 64763, 64781, 64783, 64793, 64811, 64817, 64849, +64853, 64871, 64877, 64879, 64891, 64901, 64919, 64921, 64927, 64937, +64951, 64969, 64997, 65003, 65011, 65027, 65029, 65033, 65053, 65063, +65071, 65089, 65099, 65101, 65111, 65119, 65123, 65129, 65141, 65147, +65167, 65171, 65173, 65179, 65183, 65203, 65213, 65239, 65257, 65267, +65269, 65287, 65293, 65309, 65323, 65327, 65353, 65357, 65371, 65381, +65393, 65407, 65413, 65419, 65423, 65437, 65447, 65449, 65479, 65497, +65519, 65521, 65537, 65539, 65543, 65551, 65557, 65563, 65579, 65581, +65587, 65599, 65609, 65617, 65629, 65633, 65647, 65651, 65657, 65677, +65687, 65699, 65701, 65707, 65713, 65717, 65719, 65729, 65731, 65761, +65777, 65789, 65809, 65827, 65831, 65837, 65839, 65843, 65851, 65867, +65881, 65899, 65921, 65927, 65929, 65951, 65957, 65963, 65981, 65983, +65993, 66029, 66037, 66041, 66047, 66067, 66071, 66083, 66089, 66103, +66107, 66109, 66137, 66161, 66169, 66173, 66179, 66191, 66221, 66239, +66271, 66293, 66301, 66337, 66343, 66347, 66359, 66361, 66373, 66377, +66383, 66403, 66413, 66431, 66449, 66457, 66463, 66467, 66491, 66499, +66509, 66523, 66529, 66533, 66541, 66553, 66569, 66571, 66587, 66593, +66601, 66617, 66629, 66643, 66653, 66683, 66697, 66701, 66713, 66721, +66733, 66739, 66749, 66751, 66763, 66791, 66797, 66809, 66821, 66841, +66851, 66853, 66863, 66877, 66883, 66889, 66919, 66923, 66931, 66943, +66947, 66949, 66959, 66973, 66977, 67003, 67021, 67033, 67043, 67049, +67057, 67061, 67073, 67079, 67103, 67121, 67129, 67139, 67141, 67153, +67157, 67169, 67181, 67187, 67189, 67211, 67213, 67217, 67219, 67231, +67247, 67261, 67271, 67273, 67289, 67307, 67339, 67343, 67349, 67369, +67391, 67399, 67409, 67411, 67421, 67427, 67429, 67433, 67447, 67453, +67477, 67481, 67489, 67493, 67499, 67511, 67523, 67531, 67537, 67547, +67559, 67567, 67577, 67579, 67589, 67601, 67607, 67619, 67631, 67651, +67679, 67699, 67709, 67723, 67733, 67741, 67751, 67757, 67759, 67763, +67777, 67783, 67789, 67801, 67807, 67819, 67829, 67843, 67853, 67867, +67883, 67891, 67901, 67927, 67931, 67933, 67939, 67943, 67957, 67961, +67967, 67979, 67987, 67993, 68023, 68041, 68053, 68059, 68071, 68087, +68099, 68111, 68113, 68141, 68147, 68161, 68171, 68207, 68209, 68213, +68219, 68227, 68239, 68261, 68279, 68281, 68311, 68329, 68351, 68371, +68389, 68399, 68437, 68443, 68447, 68449, 68473, 68477, 68483, 68489, +68491, 68501, 68507, 68521, 68531, 68539, 68543, 68567, 68581, 68597, +68611, 68633, 68639, 68659, 68669, 68683, 68687, 68699, 68711, 68713, +68729, 68737, 68743, 68749, 68767, 68771, 68777, 68791, 68813, 68819, +68821, 68863, 68879, 68881, 68891, 68897, 68899, 68903, 68909, 68917, +68927, 68947, 68963, 68993, 69001, 69011, 69019, 69029, 69031, 69061, +69067, 69073, 69109, 69119, 69127, 69143, 69149, 69151, 69163, 69191, +69193, 69197, 69203, 69221, 69233, 69239, 69247, 69257, 69259, 69263, +69313, 69317, 69337, 69341, 69371, 69379, 69383, 69389, 69401, 69403, +69427, 69431, 69439, 69457, 69463, 69467, 69473, 69481, 69491, 69493, +69497, 69499, 69539, 69557, 69593, 69623, 69653, 69661, 69677, 69691, +69697, 69709, 69737, 69739, 69761, 69763, 69767, 69779, 69809, 69821, +69827, 69829, 69833, 69847, 69857, 69859, 69877, 69899, 69911, 69929, +69931, 69941, 69959, 69991, 69997, 70001, 70003, 70009, 70019, 70039, +70051, 70061, 70067, 70079, 70099, 70111, 70117, 70121, 70123, 70139, +70141, 70157, 70163, 70177, 70181, 70183, 70199, 70201, 70207, 70223, +70229, 70237, 70241, 70249, 70271, 70289, 70297, 70309, 70313, 70321, +70327, 70351, 70373, 70379, 70381, 70393, 70423, 70429, 70439, 70451, +70457, 70459, 70481, 70487, 70489, 70501, 70507, 70529, 70537, 70549, +70571, 70573, 70583, 70589, 70607, 70619, 70621, 70627, 70639, 70657, +70663, 70667, 70687, 70709, 70717, 70729, 70753, 70769, 70783, 70793, +70823, 70841, 70843, 70849, 70853, 70867, 70877, 70879, 70891, 70901, +70913, 70919, 70921, 70937, 70949, 70951, 70957, 70969, 70979, 70981, +70991, 70997, 70999, 71011, 71023, 71039, 71059, 71069, 71081, 71089, +71119, 71129, 71143, 71147, 71153, 71161, 71167, 71171, 71191, 71209, +71233, 71237, 71249, 71257, 71261, 71263, 71287, 71293, 71317, 71327, +71329, 71333, 71339, 71341, 71347, 71353, 71359, 71363, 71387, 71389, +71399, 71411, 71413, 71419, 71429, 71437, 71443, 71453, 71471, 71473, +71479, 71483, 71503, 71527, 71537, 71549, 71551, 71563, 71569, 71593, +71597, 71633, 71647, 71663, 71671, 71693, 71699, 71707, 71711, 71713, +71719, 71741, 71761, 71777, 71789, 71807, 71809, 71821, 71837, 71843, +71849, 71861, 71867, 71879, 71881, 71887, 71899, 71909, 71917, 71933, +71941, 71947, 71963, 71971, 71983, 71987, 71993, 71999, 72019, 72031, +72043, 72047, 72053, 72073, 72077, 72089, 72091, 72101, 72103, 72109, +72139, 72161, 72167, 72169, 72173, 72211, 72221, 72223, 72227, 72229, +72251, 72253, 72269, 72271, 72277, 72287, 72307, 72313, 72337, 72341, +72353, 72367, 72379, 72383, 72421, 72431, 72461, 72467, 72469, 72481, +72493, 72497, 72503, 72533, 72547, 72551, 72559, 72577, 72613, 72617, +72623, 72643, 72647, 72649, 72661, 72671, 72673, 72679, 72689, 72701, +72707, 72719, 72727, 72733, 72739, 72763, 72767, 72797, 72817, 72823, +72859, 72869, 72871, 72883, 72889, 72893, 72901, 72907, 72911, 72923, +72931, 72937, 72949, 72953, 72959, 72973, 72977, 72997, 73009, 73013, +73019, 73037, 73039, 73043, 73061, 73063, 73079, 73091, 73121, 73127, +73133, 73141, 73181, 73189, 73237, 73243, 73259, 73277, 73291, 73303, +73309, 73327, 73331, 73351, 73361, 73363, 73369, 73379, 73387, 73417, +73421, 73433, 73453, 73459, 73471, 73477, 73483, 73517, 73523, 73529, +73547, 73553, 73561, 73571, 73583, 73589, 73597, 73607, 73609, 73613, +73637, 73643, 73651, 73673, 73679, 73681, 73693, 73699, 73709, 73721, +73727, 73751, 73757, 73771, 73783, 73819, 73823, 73847, 73849, 73859, +73867, 73877, 73883, 73897, 73907, 73939, 73943, 73951, 73961, 73973, +73999, 74017, 74021, 74027, 74047, 74051, 74071, 74077, 74093, 74099, +74101, 74131, 74143, 74149, 74159, 74161, 74167, 74177, 74189, 74197, +74201, 74203, 74209, 74219, 74231, 74257, 74279, 74287, 74293, 74297, +74311, 74317, 74323, 74353, 74357, 74363, 74377, 74381, 74383, 74411, +74413, 74419, 74441, 74449, 74453, 74471, 74489, 74507, 74509, 74521, +74527, 74531, 74551, 74561, 74567, 74573, 74587, 74597, 74609, 74611, +74623, 74653, 74687, 74699, 74707, 74713, 74717, 74719, 74729, 74731, +74747, 74759, 74761, 74771, 74779, 74797, 74821, 74827, 74831, 74843, +74857, 74861, 74869, 74873, 74887, 74891, 74897, 74903, 74923, 74929, +74933, 74941, 74959, 75011, 75013, 75017, 75029, 75037, 75041, 75079, +75083, 75109, 75133, 75149, 75161, 75167, 75169, 75181, 75193, 75209, +75211, 75217, 75223, 75227, 75239, 75253, 75269, 75277, 75289, 75307, +75323, 75329, 75337, 75347, 75353, 75367, 75377, 75389, 75391, 75401, +75403, 75407, 75431, 75437, 75479, 75503, 75511, 75521, 75527, 75533, +75539, 75541, 75553, 75557, 75571, 75577, 75583, 75611, 75617, 75619, +75629, 75641, 75653, 75659, 75679, 75683, 75689, 75703, 75707, 75709, +75721, 75731, 75743, 75767, 75773, 75781, 75787, 75793, 75797, 75821, +75833, 75853, 75869, 75883, 75913, 75931, 75937, 75941, 75967, 75979, +75983, 75989, 75991, 75997, 76001, 76003, 76031, 76039, 76079, 76081, +76091, 76099, 76103, 76123, 76129, 76147, 76157, 76159, 76163, 76207, +76213, 76231, 76243, 76249, 76253, 76259, 76261, 76283, 76289, 76303, +76333, 76343, 76367, 76369, 76379, 76387, 76403, 76421, 76423, 76441, +76463, 76471, 76481, 76487, 76493, 76507, 76511, 76519, 76537, 76541, +76543, 76561, 76579, 76597, 76603, 76607, 76631, 76649, 76651, 76667, +76673, 76679, 76697, 76717, 76733, 76753, 76757, 76771, 76777, 76781, +76801, 76819, 76829, 76831, 76837, 76847, 76871, 76873, 76883, 76907, +76913, 76919, 76943, 76949, 76961, 76963, 76991, 77003, 77017, 77023, +77029, 77041, 77047, 77069, 77081, 77093, 77101, 77137, 77141, 77153, +77167, 77171, 77191, 77201, 77213, 77237, 77239, 77243, 77249, 77261, +77263, 77267, 77269, 77279, 77291, 77317, 77323, 77339, 77347, 77351, +77359, 77369, 77377, 77383, 77417, 77419, 77431, 77447, 77471, 77477, +77479, 77489, 77491, 77509, 77513, 77521, 77527, 77543, 77549, 77551, +77557, 77563, 77569, 77573, 77587, 77591, 77611, 77617, 77621, 77641, +77647, 77659, 77681, 77687, 77689, 77699, 77711, 77713, 77719, 77723, +77731, 77743, 77747, 77761, 77773, 77783, 77797, 77801, 77813, 77839, +77849, 77863, 77867, 77893, 77899, 77929, 77933, 77951, 77969, 77977, +77983, 77999, 78007, 78017, 78031, 78041, 78049, 78059, 78079, 78101, +78121, 78137, 78139, 78157, 78163, 78167, 78173, 78179, 78191, 78193, +78203, 78229, 78233, 78241, 78259, 78277, 78283, 78301, 78307, 78311, +78317, 78341, 78347, 78367, 78401, 78427, 78437, 78439, 78467, 78479, +78487, 78497, 78509, 78511, 78517, 78539, 78541, 78553, 78569, 78571, +78577, 78583, 78593, 78607, 78623, 78643, 78649, 78653, 78691, 78697, +78707, 78713, 78721, 78737, 78779, 78781, 78787, 78791, 78797, 78803, +78809, 78823, 78839, 78853, 78857, 78877, 78887, 78889, 78893, 78901, +78919, 78929, 78941, 78977, 78979, 78989, 79031, 79039, 79043, 79063, +79087, 79103, 79111, 79133, 79139, 79147, 79151, 79153, 79159, 79181, +79187, 79193, 79201, 79229, 79231, 79241, 79259, 79273, 79279, 79283, +79301, 79309, 79319, 79333, 79337, 79349, 79357, 79367, 79379, 79393, +79397, 79399, 79411, 79423, 79427, 79433, 79451, 79481, 79493, 79531, +79537, 79549, 79559, 79561, 79579, 79589, 79601, 79609, 79613, 79621, +79627, 79631, 79633, 79657, 79669, 79687, 79691, 79693, 79697, 79699, +79757, 79769, 79777, 79801, 79811, 79813, 79817, 79823, 79829, 79841, +79843, 79847, 79861, 79867, 79873, 79889, 79901, 79903, 79907, 79939, +79943, 79967, 79973, 79979, 79987, 79997, 79999, 80021, 80039, 80051, +80071, 80077, 80107, 80111, 80141, 80147, 80149, 80153, 80167, 80173, +80177, 80191, 80207, 80209, 80221, 80231, 80233, 80239, 80251, 80263, +80273, 80279, 80287, 80309, 80317, 80329, 80341, 80347, 80363, 80369, +80387, 80407, 80429, 80447, 80449, 80471, 80473, 80489, 80491, 80513, +80527, 80537, 80557, 80567, 80599, 80603, 80611, 80621, 80627, 80629, +80651, 80657, 80669, 80671, 80677, 80681, 80683, 80687, 80701, 80713, +80737, 80747, 80749, 80761, 80777, 80779, 80783, 80789, 80803, 80809, +80819, 80831, 80833, 80849, 80863, 80897, 80909, 80911, 80917, 80923, +80929, 80933, 80953, 80963, 80989, 81001, 81013, 81017, 81019, 81023, +81031, 81041, 81043, 81047, 81049, 81071, 81077, 81083, 81097, 81101, +81119, 81131, 81157, 81163, 81173, 81181, 81197, 81199, 81203, 81223, +81233, 81239, 81281, 81283, 81293, 81299, 81307, 81331, 81343, 81349, +81353, 81359, 81371, 81373, 81401, 81409, 81421, 81439, 81457, 81463, +81509, 81517, 81527, 81533, 81547, 81551, 81553, 81559, 81563, 81569, +81611, 81619, 81629, 81637, 81647, 81649, 81667, 81671, 81677, 81689, +81701, 81703, 81707, 81727, 81737, 81749, 81761, 81769, 81773, 81799, +81817, 81839, 81847, 81853, 81869, 81883, 81899, 81901, 81919, 81929, +81931, 81937, 81943, 81953, 81967, 81971, 81973, 82003, 82007, 82009, +82013, 82021, 82031, 82037, 82039, 82051, 82067, 82073, 82129, 82139, +82141, 82153, 82163, 82171, 82183, 82189, 82193, 82207, 82217, 82219, +82223, 82231, 82237, 82241, 82261, 82267, 82279, 82301, 82307, 82339, +82349, 82351, 82361, 82373, 82387, 82393, 82421, 82457, 82463, 82469, +82471, 82483, 82487, 82493, 82499, 82507, 82529, 82531, 82549, 82559, +82561, 82567, 82571, 82591, 82601, 82609, 82613, 82619, 82633, 82651, +82657, 82699, 82721, 82723, 82727, 82729, 82757, 82759, 82763, 82781, +82787, 82793, 82799, 82811, 82813, 82837, 82847, 82883, 82889, 82891, +82903, 82913, 82939, 82963, 82981, 82997, 83003, 83009, 83023, 83047, +83059, 83063, 83071, 83077, 83089, 83093, 83101, 83117, 83137, 83177, +83203, 83207, 83219, 83221, 83227, 83231, 83233, 83243, 83257, 83267, +83269, 83273, 83299, 83311, 83339, 83341, 83357, 83383, 83389, 83399, +83401, 83407, 83417, 83423, 83431, 83437, 83443, 83449, 83459, 83471, +83477, 83497, 83537, 83557, 83561, 83563, 83579, 83591, 83597, 83609, +83617, 83621, 83639, 83641, 83653, 83663, 83689, 83701, 83717, 83719, +83737, 83761, 83773, 83777, 83791, 83813, 83833, 83843, 83857, 83869, +83873, 83891, 83903, 83911, 83921, 83933, 83939, 83969, 83983, 83987, +84011, 84017, 84047, 84053, 84059, 84061, 84067, 84089, 84121, 84127, +84131, 84137, 84143, 84163, 84179, 84181, 84191, 84199, 84211, 84221, +84223, 84229, 84239, 84247, 84263, 84299, 84307, 84313, 84317, 84319, +84347, 84349, 84377, 84389, 84391, 84401, 84407, 84421, 84431, 84437, +84443, 84449, 84457, 84463, 84467, 84481, 84499, 84503, 84509, 84521, +84523, 84533, 84551, 84559, 84589, 84629, 84631, 84649, 84653, 84659, +84673, 84691, 84697, 84701, 84713, 84719, 84731, 84737, 84751, 84761, +84787, 84793, 84809, 84811, 84827, 84857, 84859, 84869, 84871, 84913, +84919, 84947, 84961, 84967, 84977, 84979, 84991, 85009, 85021, 85027, +85037, 85049, 85061, 85081, 85087, 85091, 85093, 85103, 85109, 85121, +85133, 85147, 85159, 85193, 85199, 85201, 85213, 85223, 85229, 85237, +85243, 85247, 85259, 85297, 85303, 85313, 85331, 85333, 85361, 85363, +85369, 85381, 85411, 85427, 85429, 85439, 85447, 85451, 85453, 85469, +85487, 85513, 85517, 85523, 85531, 85549, 85571, 85577, 85597, 85601, +85607, 85619, 85621, 85627, 85639, 85643, 85661, 85667, 85669, 85691, +85703, 85711, 85717, 85733, 85751, 85781, 85793, 85817, 85819, 85829, +85831, 85837, 85843, 85847, 85853, 85889, 85903, 85909, 85931, 85933, +85991, 85999, 86011, 86017, 86027, 86029, 86069, 86077, 86083, 86111, +86113, 86117, 86131, 86137, 86143, 86161, 86171, 86179, 86183, 86197, +86201, 86209, 86239, 86243, 86249, 86257, 86263, 86269, 86287, 86291, +86293, 86297, 86311, 86323, 86341, 86351, 86353, 86357, 86369, 86371, +86381, 86389, 86399, 86413, 86423, 86441, 86453, 86461, 86467, 86477, +86491, 86501, 86509, 86531, 86533, 86539, 86561, 86573, 86579, 86587, +86599, 86627, 86629, 86677, 86689, 86693, 86711, 86719, 86729, 86743, +86753, 86767, 86771, 86783, 86813, 86837, 86843, 86851, 86857, 86861, +86869, 86923, 86927, 86929, 86939, 86951, 86959, 86969, 86981, 86993, +87011, 87013, 87037, 87041, 87049, 87071, 87083, 87103, 87107, 87119, +87121, 87133, 87149, 87151, 87179, 87181, 87187, 87211, 87221, 87223, +87251, 87253, 87257, 87277, 87281, 87293, 87299, 87313, 87317, 87323, +87337, 87359, 87383, 87403, 87407, 87421, 87427, 87433, 87443, 87473, +87481, 87491, 87509, 87511, 87517, 87523, 87539, 87541, 87547, 87553, +87557, 87559, 87583, 87587, 87589, 87613, 87623, 87629, 87631, 87641, +87643, 87649, 87671, 87679, 87683, 87691, 87697, 87701, 87719, 87721, +87739, 87743, 87751, 87767, 87793, 87797, 87803, 87811, 87833, 87853, +87869, 87877, 87881, 87887, 87911, 87917, 87931, 87943, 87959, 87961, +87973, 87977, 87991, 88001, 88003, 88007, 88019, 88037, 88069, 88079, +88093, 88117, 88129, 88169, 88177, 88211, 88223, 88237, 88241, 88259, +88261, 88289, 88301, 88321, 88327, 88337, 88339, 88379, 88397, 88411, +88423, 88427, 88463, 88469, 88471, 88493, 88499, 88513, 88523, 88547, +88589, 88591, 88607, 88609, 88643, 88651, 88657, 88661, 88663, 88667, +88681, 88721, 88729, 88741, 88747, 88771, 88789, 88793, 88799, 88801, +88807, 88811, 88813, 88817, 88819, 88843, 88853, 88861, 88867, 88873, +88883, 88897, 88903, 88919, 88937, 88951, 88969, 88993, 88997, 89003, +89009, 89017, 89021, 89041, 89051, 89057, 89069, 89071, 89083, 89087, +89101, 89107, 89113, 89119, 89123, 89137, 89153, 89189, 89203, 89209, +89213, 89227, 89231, 89237, 89261, 89269, 89273, 89293, 89303, 89317, +89329, 89363, 89371, 89381, 89387, 89393, 89399, 89413, 89417, 89431, +89443, 89449, 89459, 89477, 89491, 89501, 89513, 89519, 89521, 89527, +89533, 89561, 89563, 89567, 89591, 89597, 89599, 89603, 89611, 89627, +89633, 89653, 89657, 89659, 89669, 89671, 89681, 89689, 89753, 89759, +89767, 89779, 89783, 89797, 89809, 89819, 89821, 89833, 89839, 89849, +89867, 89891, 89897, 89899, 89909, 89917, 89923, 89939, 89959, 89963, +89977, 89983, 89989, 90001, 90007, 90011, 90017, 90019, 90023, 90031, +90053, 90059, 90067, 90071, 90073, 90089, 90107, 90121, 90127, 90149, +90163, 90173, 90187, 90191, 90197, 90199, 90203, 90217, 90227, 90239, +90247, 90263, 90271, 90281, 90289, 90313, 90353, 90359, 90371, 90373, +90379, 90397, 90401, 90403, 90407, 90437, 90439, 90469, 90473, 90481, +90499, 90511, 90523, 90527, 90529, 90533, 90547, 90583, 90599, 90617, +90619, 90631, 90641, 90647, 90659, 90677, 90679, 90697, 90703, 90709, +90731, 90749, 90787, 90793, 90803, 90821, 90823, 90833, 90841, 90847, +90863, 90887, 90901, 90907, 90911, 90917, 90931, 90947, 90971, 90977, +90989, 90997, 91009, 91019, 91033, 91079, 91081, 91097, 91099, 91121, +91127, 91129, 91139, 91141, 91151, 91153, 91159, 91163, 91183, 91193, +91199, 91229, 91237, 91243, 91249, 91253, 91283, 91291, 91297, 91303, +91309, 91331, 91367, 91369, 91373, 91381, 91387, 91393, 91397, 91411, +91423, 91433, 91453, 91457, 91459, 91463, 91493, 91499, 91513, 91529, +91541, 91571, 91573, 91577, 91583, 91591, 91621, 91631, 91639, 91673, +91691, 91703, 91711, 91733, 91753, 91757, 91771, 91781, 91801, 91807, +91811, 91813, 91823, 91837, 91841, 91867, 91873, 91909, 91921, 91939, +91943, 91951, 91957, 91961, 91967, 91969, 91997, 92003, 92009, 92033, +92041, 92051, 92077, 92083, 92107, 92111, 92119, 92143, 92153, 92173, +92177, 92179, 92189, 92203, 92219, 92221, 92227, 92233, 92237, 92243, +92251, 92269, 92297, 92311, 92317, 92333, 92347, 92353, 92357, 92363, +92369, 92377, 92381, 92383, 92387, 92399, 92401, 92413, 92419, 92431, +92459, 92461, 92467, 92479, 92489, 92503, 92507, 92551, 92557, 92567, +92569, 92581, 92593, 92623, 92627, 92639, 92641, 92647, 92657, 92669, +92671, 92681, 92683, 92693, 92699, 92707, 92717, 92723, 92737, 92753, +92761, 92767, 92779, 92789, 92791, 92801, 92809, 92821, 92831, 92849, +92857, 92861, 92863, 92867, 92893, 92899, 92921, 92927, 92941, 92951, +92957, 92959, 92987, 92993, 93001, 93047, 93053, 93059, 93077, 93083, +93089, 93097, 93103, 93113, 93131, 93133, 93139, 93151, 93169, 93179, +93187, 93199, 93229, 93239, 93241, 93251, 93253, 93257, 93263, 93281, +93283, 93287, 93307, 93319, 93323, 93329, 93337, 93371, 93377, 93383, +93407, 93419, 93427, 93463, 93479, 93481, 93487, 93491, 93493, 93497, +93503, 93523, 93529, 93553, 93557, 93559, 93563, 93581, 93601, 93607, +93629, 93637, 93683, 93701, 93703, 93719, 93739, 93761, 93763, 93787, +93809, 93811, 93827, 93851, 93871, 93887, 93889, 93893, 93901, 93911, +93913, 93923, 93937, 93941, 93949, 93967, 93971, 93979, 93983, 93997, +94007, 94009, 94033, 94049, 94057, 94063, 94079, 94099, 94109, 94111, +94117, 94121, 94151, 94153, 94169, 94201, 94207, 94219, 94229, 94253, +94261, 94273, 94291, 94307, 94309, 94321, 94327, 94331, 94343, 94349, +94351, 94379, 94397, 94399, 94421, 94427, 94433, 94439, 94441, 94447, +94463, 94477, 94483, 94513, 94529, 94531, 94541, 94543, 94547, 94559, +94561, 94573, 94583, 94597, 94603, 94613, 94621, 94649, 94651, 94687, +94693, 94709, 94723, 94727, 94747, 94771, 94777, 94781, 94789, 94793, +94811, 94819, 94823, 94837, 94841, 94847, 94849, 94873, 94889, 94903, +94907, 94933, 94949, 94951, 94961, 94993, 94999, 95003, 95009, 95021, +95027, 95063, 95071, 95083, 95087, 95089, 95093, 95101, 95107, 95111, +95131, 95143, 95153, 95177, 95189, 95191, 95203, 95213, 95219, 95231, +95233, 95239, 95257, 95261, 95267, 95273, 95279, 95287, 95311, 95317, +95327, 95339, 95369, 95383, 95393, 95401, 95413, 95419, 95429, 95441, +95443, 95461, 95467, 95471, 95479, 95483, 95507, 95527, 95531, 95539, +95549, 95561, 95569, 95581, 95597, 95603, 95617, 95621, 95629, 95633, +95651, 95701, 95707, 95713, 95717, 95723, 95731, 95737, 95747, 95773, +95783, 95789, 95791, 95801, 95803, 95813, 95819, 95857, 95869, 95873, +95881, 95891, 95911, 95917, 95923, 95929, 95947, 95957, 95959, 95971, +95987, 95989, 96001, 96013, 96017, 96043, 96053, 96059, 96079, 96097, +96137, 96149, 96157, 96167, 96179, 96181, 96199, 96211, 96221, 96223, +96233, 96259, 96263, 96269, 96281, 96289, 96293, 96323, 96329, 96331, +96337, 96353, 96377, 96401, 96419, 96431, 96443, 96451, 96457, 96461, +96469, 96479, 96487, 96493, 96497, 96517, 96527, 96553, 96557, 96581, +96587, 96589, 96601, 96643, 96661, 96667, 96671, 96697, 96703, 96731, +96737, 96739, 96749, 96757, 96763, 96769, 96779, 96787, 96797, 96799, +96821, 96823, 96827, 96847, 96851, 96857, 96893, 96907, 96911, 96931, +96953, 96959, 96973, 96979, 96989, 96997, 97001, 97003, 97007, 97021, +97039, 97073, 97081, 97103, 97117, 97127, 97151, 97157, 97159, 97169, +97171, 97177, 97187, 97213, 97231, 97241, 97259, 97283, 97301, 97303, +97327, 97367, 97369, 97373, 97379, 97381, 97387, 97397, 97423, 97429, +97441, 97453, 97459, 97463, 97499, 97501, 97511, 97523, 97547, 97549, +97553, 97561, 97571, 97577, 97579, 97583, 97607, 97609, 97613, 97649, +97651, 97673, 97687, 97711, 97729, 97771, 97777, 97787, 97789, 97813, +97829, 97841, 97843, 97847, 97849, 97859, 97861, 97871, 97879, 97883, +97919, 97927, 97931, 97943, 97961, 97967, 97973, 97987, 98009, 98011, +98017, 98041, 98047, 98057, 98081, 98101, 98123, 98129, 98143, 98179, +98207, 98213, 98221, 98227, 98251, 98257, 98269, 98297, 98299, 98317, +98321, 98323, 98327, 98347, 98369, 98377, 98387, 98389, 98407, 98411, +98419, 98429, 98443, 98453, 98459, 98467, 98473, 98479, 98491, 98507, +98519, 98533, 98543, 98561, 98563, 98573, 98597, 98621, 98627, 98639, +98641, 98663, 98669, 98689, 98711, 98713, 98717, 98729, 98731, 98737, +98773, 98779, 98801, 98807, 98809, 98837, 98849, 98867, 98869, 98873, +98887, 98893, 98897, 98899, 98909, 98911, 98927, 98929, 98939, 98947, +98953, 98963, 98981, 98993, 98999, 99013, 99017, 99023, 99041, 99053, +99079, 99083, 99089, 99103, 99109, 99119, 99131, 99133, 99137, 99139, +99149, 99173, 99181, 99191, 99223, 99233, 99241, 99251, 99257, 99259, +99277, 99289, 99317, 99347, 99349, 99367, 99371, 99377, 99391, 99397, +99401, 99409, 99431, 99439, 99469, 99487, 99497, 99523, 99527, 99529, +99551, 99559, 99563, 99571, 99577, 99581, 99607, 99611, 99623, 99643, +99661, 99667, 99679, 99689, 99707, 99709, 99713, 99719, 99721, 99733, +99761, 99767, 99787, 99793, 99809, 99817, 99823, 99829, 99833, 99839, +99859, 99871, 99877, 99881, 99901, 99907, 99923, 99929, 99961, 99971, +99989, 99991, 100003, 100019, 100043, 100049, 100057, 100069, 100103, 100109, +100129, 100151, 100153, 100169, 100183, 100189, 100193, 100207, 100213, 100237, +100267, 100271, 100279, 100291, 100297, 100313, 100333, 100343, 100357, 100361, +100363, 100379, 100391, 100393, 100403, 100411, 100417, 100447, 100459, 100469, +100483, 100493, 100501, 100511, 100517, 100519, 100523, 100537, 100547, 100549, +100559, 100591, 100609, 100613, 100621, 100649, 100669, 100673, 100693, 100699, +100703, 100733, 100741, 100747, 100769, 100787, 100799, 100801, 100811, 100823, +100829, 100847, 100853, 100907, 100913, 100927, 100931, 100937, 100943, 100957, +100981, 100987, 100999, 101009, 101021, 101027, 101051, 101063, 101081, 101089, +101107, 101111, 101113, 101117, 101119, 101141, 101149, 101159, 101161, 101173, +101183, 101197, 101203, 101207, 101209, 101221, 101267, 101273, 101279, 101281, +101287, 101293, 101323, 101333, 101341, 101347, 101359, 101363, 101377, 101383, +101399, 101411, 101419, 101429, 101449, 101467, 101477, 101483, 101489, 101501, +101503, 101513, 101527, 101531, 101533, 101537, 101561, 101573, 101581, 101599, +101603, 101611, 101627, 101641, 101653, 101663, 101681, 101693, 101701, 101719, +101723, 101737, 101741, 101747, 101749, 101771, 101789, 101797, 101807, 101833, +101837, 101839, 101863, 101869, 101873, 101879, 101891, 101917, 101921, 101929, +101939, 101957, 101963, 101977, 101987, 101999, 102001, 102013, 102019, 102023, +102031, 102043, 102059, 102061, 102071, 102077, 102079, 102101, 102103, 102107, +102121, 102139, 102149, 102161, 102181, 102191, 102197, 102199, 102203, 102217, +102229, 102233, 102241, 102251, 102253, 102259, 102293, 102299, 102301, 102317, +102329, 102337, 102359, 102367, 102397, 102407, 102409, 102433, 102437, 102451, +102461, 102481, 102497, 102499, 102503, 102523, 102533, 102539, 102547, 102551, +102559, 102563, 102587, 102593, 102607, 102611, 102643, 102647, 102653, 102667, +102673, 102677, 102679, 102701, 102761, 102763, 102769, 102793, 102797, 102811, +102829, 102841, 102859, 102871, 102877, 102881, 102911, 102913, 102929, 102931, +102953, 102967, 102983, 103001, 103007, 103043, 103049, 103067, 103069, 103079, +103087, 103091, 103093, 103099, 103123, 103141, 103171, 103177, 103183, 103217, +103231, 103237, 103289, 103291, 103307, 103319, 103333, 103349, 103357, 103387, +103391, 103393, 103399, 103409, 103421, 103423, 103451, 103457, 103471, 103483, +103511, 103529, 103549, 103553, 103561, 103567, 103573, 103577, 103583, 103591, +103613, 103619, 103643, 103651, 103657, 103669, 103681, 103687, 103699, 103703, +103723, 103769, 103787, 103801, 103811, 103813, 103837, 103841, 103843, 103867, +103889, 103903, 103913, 103919, 103951, 103963, 103967, 103969, 103979, 103981, +103991, 103993, 103997, 104003, 104009, 104021, 104033, 104047, 104053, 104059, +104087, 104089, 104107, 104113, 104119, 104123, 104147, 104149, 104161, 104173, +104179, 104183, 104207, 104231, 104233, 104239, 104243, 104281, 104287, 104297, +104309, 104311, 104323, 104327, 104347, 104369, 104381, 104383, 104393, 104399, +104417, 104459, 104471, 104473, 104479, 104491, 104513, 104527, 104537, 104543, +104549, 104551, 104561, 104579, 104593, 104597, 104623, 104639, 104651, 104659, 104677, 104681, 104683, 104693, 104701, 104707, 104711, 104717, 104723, 104729}; /* @@ -1376,8 +1376,8 @@ static int fca_register(void) MCA_BASE_VAR_TYPE_INT, NULL, 0, 0, OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_READONLY, - &mca_coll_fca_component.fca_enable_cache); - + &mca_coll_fca_component.fca_enable_cache); + mca_coll_fca_component.fca_enable_hash = 0; (void) mca_base_component_var_register(c, "enable_hash", "[1|0|] Enable/Disable hash for fca comms cache", @@ -1393,23 +1393,23 @@ static int fca_register(void) MCA_BASE_VAR_TYPE_INT, NULL, 0, 0, OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_READONLY, - &mca_coll_fca_component.fca_parallel_hash_calc); + &mca_coll_fca_component.fca_parallel_hash_calc); - mca_coll_fca_component.fca_hash_size = 5096; + mca_coll_fca_component.fca_hash_size = 5096; (void) mca_base_component_var_register(c, "hash_size", "[integer] Length of hash table", MCA_BASE_VAR_TYPE_INT, NULL, 0, 0, OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_READONLY, - &mca_coll_fca_component.fca_hash_size); - + &mca_coll_fca_component.fca_hash_size); + mca_coll_fca_component.fca_number_of_primes = 1024; (void) mca_base_component_var_register(c, "number_of_primes", "[integer] Number of primes to use", MCA_BASE_VAR_TYPE_INT, NULL, 0, 0, OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_READONLY, - &mca_coll_fca_component.fca_number_of_primes); + &mca_coll_fca_component.fca_number_of_primes); mca_coll_fca_component.compiletime_version = FCA_VERNO_STRING; (void) mca_base_component_var_register(c, MCA_COMPILETIME_VER, @@ -1435,8 +1435,8 @@ static int fca_register(void) mca_coll_fca_component.fca_cache_hit = 0; mca_coll_fca_component.fca_cache_miss = 0; mca_coll_fca_component.fca_hash_hit = 0; - mca_coll_fca_component.fca_hash_miss = 0; - mca_coll_fca_component.fca_max_deep_in_cache = 0; + mca_coll_fca_component.fca_hash_miss = 0; + mca_coll_fca_component.fca_max_deep_in_cache = 0; mca_coll_fca_component.fca_primes = mca_coll_fca_primes; @@ -1464,7 +1464,7 @@ static int fca_open(void) for(i = 0; i< mca_coll_fca_component.fca_hash_size; i++) { mca_coll_fca_component.fca_hash[i] = NULL; } - } + } return OMPI_SUCCESS; } @@ -1472,7 +1472,7 @@ static int fca_open(void) static int fca_close(void) { if(mca_coll_fca_component.fca_enable_cache) { - + mca_coll_fca_c_cache_item_t *item; while(NULL != (item = (mca_coll_fca_c_cache_item_t *)opal_list_remove_first(&mca_coll_fca_component.c_cache))) { OBJ_RELEASE(item); @@ -1481,14 +1481,14 @@ static int fca_close(void) OBJ_DESTRUCT(&mca_coll_fca_component.c_cache); } - + if(mca_coll_fca_component.fca_enable_hash && mca_coll_fca_component.fca_enable_hash) { int i = 0; mca_coll_fca_c_cache_item_t *item; for(i = 0; i< mca_coll_fca_component.fca_hash_size; i++) { - + if(mca_coll_fca_component.fca_hash[i] != NULL) { - + while(NULL != (item = (mca_coll_fca_c_cache_item_t *)opal_list_remove_first(mca_coll_fca_component.fca_hash[i]))) { OBJ_RELEASE(item); } @@ -1498,7 +1498,7 @@ static int fca_close(void) } } free(mca_coll_fca_component.fca_hash); - } + } if(mca_coll_fca_component.fca_verbose == 10) { char file_name[30]; @@ -1516,17 +1516,17 @@ static int fca_close(void) fclose(fileHandle); } - + FCA_VERBOSE(10,"fca_total_work_time %f\n", mca_coll_fca_component.fca_total_work_time); - + FCA_VERBOSE(10,"fca_cache_hit %d\n", mca_coll_fca_component.fca_cache_hit); - + FCA_VERBOSE(10,"fca_cache_miss %d\n", mca_coll_fca_component.fca_cache_miss); - + FCA_VERBOSE(10,"fca_hash_hit %d\n", mca_coll_fca_component.fca_hash_hit); - + FCA_VERBOSE(10,"fca_hash_miss %d\n", mca_coll_fca_component.fca_hash_miss); - + FCA_VERBOSE(10,"fca_max_deep %d\n", mca_coll_fca_component.fca_max_deep_in_cache); FCA_VERBOSE(2, "==>"); diff --git a/ompi/mca/coll/fca/coll_fca_module.c b/ompi/mca/coll/fca/coll_fca_module.c index 36fbebf3165..4c250e0eea2 100644 --- a/ompi/mca/coll/fca/coll_fca_module.c +++ b/ompi/mca/coll/fca/coll_fca_module.c @@ -2,6 +2,7 @@ * Copyright (c) 2011 Mellanox Technologies. All rights reserved. * Copyright (c) 2014 Research Organization for Information Science * and Technology (RIST). All rights reserved. + * Copyright (c) 2015 Los Alamos National Security, LLC. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -35,25 +36,6 @@ int mca_coll_fca_init_query(bool enable_progress_threads, return OMPI_SUCCESS; } -static int have_remote_peers(ompi_group_t *group, size_t size, int *local_peers) -{ - ompi_proc_t *proc; - size_t i; - int ret; - - *local_peers = 0; - ret = 0; - for (i = 0; i < size; ++i) { - proc = ompi_group_peer_lookup(group, i); - if (OPAL_PROC_ON_LOCAL_NODE(proc->super.proc_flags)) { - ++*local_peers; - } else { - ret = 1; - } - } - return ret; -} - static inline ompi_proc_t* __local_rank_lookup(ompi_communicator_t *comm, int rank) { return ompi_group_peer_lookup(comm->c_local_group, rank); @@ -281,7 +263,7 @@ static int __create_fca_comm(mca_coll_fca_module_t *fca_module) opal_list_t *c_cache; struct timeval start, end, seq_start, seq_end, par_start, par_end; int act_deep; - + if(mca_coll_fca_component.fca_verbose == 10) { gettimeofday(&start, NULL); @@ -297,7 +279,7 @@ static int __create_fca_comm(mca_coll_fca_module_t *fca_module) int hash_index, part_of_hash_index; if(mca_coll_fca_component.fca_parallel_hash_calc == 1) { - + if(mca_coll_fca_component.fca_verbose == 10){ gettimeofday(&par_start, NULL); } @@ -365,7 +347,7 @@ static int __create_fca_comm(mca_coll_fca_module_t *fca_module) /* first check the size */ if( c_item && (comm_size == c_item->size)) { /* then we have a potential cache hit */ - ompi_comm_compare(comm, c_item->comm, &result); + ompi_comm_compare(comm, c_item->comm, &result); if( MPI_CONGRUENT == result) { /* cache hit! Return the context and be done with it */ /* first bump the score */ @@ -378,12 +360,12 @@ static int __create_fca_comm(mca_coll_fca_module_t *fca_module) if(mca_coll_fca_component.fca_verbose == 10) { gettimeofday(&end, NULL); - + mca_coll_fca_component.fca_total_work_time =+ end.tv_sec - start.tv_sec + 1e-6 * (end.tv_usec - start.tv_usec); - + mca_coll_fca_component.fca_cache_hit += 1; - + if(act_deep>mca_coll_fca_component.fca_max_deep_in_cache) mca_coll_fca_component.fca_max_deep_in_cache = act_deep; } @@ -426,7 +408,7 @@ static int __create_fca_comm(mca_coll_fca_module_t *fca_module) c_item_new = OBJ_NEW(mca_coll_fca_c_cache_item_t); c_item_new->fca_comm_wrap = OBJ_NEW(mca_coll_fca_comm_wrap_t); - OBJ_RETAIN(comm); + OBJ_RETAIN(comm); c_item_new->size = comm_size; c_item_new->comm = comm; @@ -438,9 +420,9 @@ static int __create_fca_comm(mca_coll_fca_module_t *fca_module) } if(mca_coll_fca_component.fca_verbose == 10) { - + gettimeofday(&end, NULL); - + mca_coll_fca_component.fca_total_work_time =+ end.tv_sec - start.tv_sec + 1e-6 * (end.tv_usec - start.tv_usec); @@ -534,11 +516,6 @@ static int mca_coll_fca_module_enable(mca_coll_base_module_t *module, } -static int mca_coll_fca_ft_event(int state) -{ - return OMPI_SUCCESS; -} - static void mca_coll_fca_module_clear(mca_coll_fca_module_t *fca_module) { fca_module->num_local_procs = 0; @@ -572,7 +549,7 @@ static void mca_coll_fca_module_destruct(mca_coll_fca_module_t *fca_module) if (fca_module->fca_comm) { __destroy_fca_comm(fca_module); - } + } } OBJ_RELEASE(fca_module->previous_barrier_module); @@ -618,7 +595,7 @@ mca_coll_fca_comm_query(struct ompi_communicator_t *comm, int *priority) if (size < mca_coll_fca_component.fca_np) goto exit; - if (!have_remote_peers(comm->c_local_group, size, &local_peers) || OMPI_COMM_IS_INTER(comm)) + if (!ompi_group_have_remote_peers(comm->c_local_group) || OMPI_COMM_IS_INTER(comm)) goto exit; fca_module = OBJ_NEW(mca_coll_fca_module_t); @@ -626,7 +603,7 @@ mca_coll_fca_comm_query(struct ompi_communicator_t *comm, int *priority) goto exit; fca_module->super.coll_module_enable = mca_coll_fca_module_enable; - fca_module->super.ft_event = mca_coll_fca_ft_event; + fca_module->super.ft_event = NULL; fca_module->super.coll_allgather = mca_coll_fca_component.fca_enable_allgather? mca_coll_fca_allgather : NULL; fca_module->super.coll_allgatherv = mca_coll_fca_component.fca_enable_allgatherv? mca_coll_fca_allgatherv : NULL; fca_module->super.coll_allreduce = mca_coll_fca_component.fca_enable_allreduce? mca_coll_fca_allreduce : NULL; @@ -703,7 +680,7 @@ static void mca_coll_fca_c_cache_item_destruct(mca_coll_fca_c_cache_item_t *item OBJ_RELEASE(item->fca_comm_wrap); /* OBJ_RELEASE(item->comm); */ } -} +} OBJ_CLASS_INSTANCE(mca_coll_fca_c_cache_item_t, opal_list_item_t, diff --git a/ompi/mca/coll/fca/coll_fca_ops.c b/ompi/mca/coll/fca/coll_fca_ops.c index 093bd46988d..e95c97a8349 100644 --- a/ompi/mca/coll/fca/coll_fca_ops.c +++ b/ompi/mca/coll/fca/coll_fca_ops.c @@ -1,12 +1,18 @@ -/** - Copyright (c) 2011 Mellanox Technologies. All rights reserved. - $COPYRIGHT$ - - Additional copyrights may follow - - $HEADER$ +/* + * Copyright (c) 2011 Mellanox Technologies. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ */ +#ifdef HAVE_ALLOCA_H +#include +#endif + #include "ompi_config.h" #include "ompi/constants.h" #include "coll_fca.h" @@ -153,6 +159,10 @@ int mca_coll_fca_barrier(struct ompi_communicator_t *comm, int ret; FCA_VERBOSE(5,"Using FCA Barrier"); + if (OPAL_UNLIKELY(ompi_mpi_finalize_started)) { + FCA_VERBOSE(5, "In finalize, reverting to previous barrier"); + goto orig_barrier; + } ret = fca_do_barrier(fca_module->fca_comm); if (ret < 0) { if (ret == -EUSEMPI) { diff --git a/ompi/mca/coll/hcoll/Makefile.am b/ompi/mca/coll/hcoll/Makefile.am index 709b5376cda..dafa2b32f91 100644 --- a/ompi/mca/coll/hcoll/Makefile.am +++ b/ompi/mca/coll/hcoll/Makefile.am @@ -2,6 +2,8 @@ # # # Copyright (c) 2011 Mellanox Technologies. All rights reserved. +# Copyright (c) 2015 Research Organization for Information Science +# and Technology (RIST). All rights reserved. # $COPYRIGHT$ # # Additional copyrights may follow @@ -10,7 +12,7 @@ # # -AM_CPPFLAGS = $(coll_hcoll_CPPFLAGS) -DCOLL_HCOLL_HOME=\"$(coll_hcoll_HOME)\" $(coll_hcoll_extra_CPPFLAGS) +AM_CPPFLAGS = $(coll_hcoll_CPPFLAGS) coll_hcoll_sources = \ coll_hcoll.h \ diff --git a/ompi/mca/coll/hcoll/coll_hcoll.h b/ompi/mca/coll/hcoll/coll_hcoll.h index 3483dc50e88..50d7aa85c3e 100644 --- a/ompi/mca/coll/hcoll/coll_hcoll.h +++ b/ompi/mca/coll/hcoll/coll_hcoll.h @@ -1,5 +1,7 @@ /** - Copyright (c) 2011 Mellanox Technologies. All rights reserved. + Copyright (c) 2011 Mellanox Technologies. All rights reserved. + Copyright (c) 2015 Research Organization for Information Science + and Technology (RIST). All rights reserved. $COPYRIGHT$ Additional copyrights may follow @@ -15,6 +17,7 @@ #include "mpi.h" #include "ompi/mca/mca.h" #include "opal/memoryhooks/memory.h" +#include "opal/mca/memory/base/base.h" #include "ompi/mca/coll/coll.h" #include "ompi/request/request.h" #include "ompi/mca/pml/pml.h" @@ -25,8 +28,8 @@ #include "orte/runtime/orte_globals.h" -#include "hcoll_api.h" -#include "hcoll_constants.h" +#include "hcoll/api/hcoll_api.h" +#include "hcoll/api/hcoll_constants.h" #include "coll_hcoll_debug.h" @@ -47,6 +50,11 @@ typedef struct mca_coll_hcoll_ops_t { int (*hcoll_barrier)(void *); } mca_coll_hcoll_ops_t; +typedef struct { + opal_free_list_item_t super; + dte_data_representation_t type; +} mca_coll_hcoll_dtype_t; +OBJ_CLASS_DECLARATION(mca_coll_hcoll_dtype_t); struct mca_coll_hcoll_component_t { /** Base coll component */ @@ -87,6 +95,8 @@ struct mca_coll_hcoll_component_t { /* FCA global stuff */ mca_coll_hcoll_ops_t hcoll_ops; opal_free_list_t requests; + opal_free_list_t dtypes; + int derived_types_support_enabled; }; typedef struct mca_coll_hcoll_component_t mca_coll_hcoll_component_t; @@ -135,10 +145,18 @@ struct mca_coll_hcoll_module_t { mca_coll_base_module_t *previous_ibarrier_module; mca_coll_base_module_iallgather_fn_t previous_iallgather; mca_coll_base_module_t *previous_iallgather_module; + mca_coll_base_module_iallgatherv_fn_t previous_iallgatherv; + mca_coll_base_module_t *previous_iallgatherv_module; mca_coll_base_module_iallreduce_fn_t previous_iallreduce; mca_coll_base_module_t *previous_iallreduce_module; + mca_coll_base_module_ireduce_fn_t previous_ireduce; + mca_coll_base_module_t *previous_ireduce_module; mca_coll_base_module_igatherv_fn_t previous_igatherv; mca_coll_base_module_t *previous_igatherv_module; + mca_coll_base_module_ialltoall_fn_t previous_ialltoall; + mca_coll_base_module_t *previous_ialltoall_module; + mca_coll_base_module_ialltoallv_fn_t previous_ialltoallv; + mca_coll_base_module_t *previous_ialltoallv_module; }; typedef struct mca_coll_hcoll_module_t mca_coll_hcoll_module_t; @@ -169,7 +187,15 @@ int mca_coll_hcoll_allgather(void *sbuf, int scount, struct ompi_communicator_t *comm, mca_coll_base_module_t *module); -int mca_coll_hcoll_gather(void *sbuf, int scount, +int mca_coll_hcoll_allgatherv(const void *sbuf, int scount, + struct ompi_datatype_t *sdtype, + void *rbuf, const int *rcount, + const int *displs, + struct ompi_datatype_t *rdtype, + struct ompi_communicator_t *comm, + mca_coll_base_module_t *module); + +int mca_coll_hcoll_gather(const void *sbuf, int scount, struct ompi_datatype_t *sdtype, void *rbuf, int rcount, struct ompi_datatype_t *rdtype, @@ -183,13 +209,29 @@ int mca_coll_hcoll_allreduce(void *sbuf, void *rbuf, int count, struct ompi_communicator_t *comm, mca_coll_base_module_t *module); -int mca_coll_hcoll_alltoall(void *sbuf, int scount, +int mca_coll_hcoll_reduce(const void *sbuf, void *rbuf, int count, + struct ompi_datatype_t *dtype, + struct ompi_op_t *op, + int root, + struct ompi_communicator_t *comm, + mca_coll_base_module_t *module); + +int mca_coll_hcoll_alltoall(const void *sbuf, int scount, struct ompi_datatype_t *sdtype, void* rbuf, int rcount, struct ompi_datatype_t *rdtype, struct ompi_communicator_t *comm, mca_coll_base_module_t *module); +int mca_coll_hcoll_alltoallv(void *sbuf, int *scounts, + int *sdisps, + struct ompi_datatype_t *sdtype, + void *rbuf, int *rcounts, + int *rdisps, + struct ompi_datatype_t *rdtype, + struct ompi_communicator_t *comm, + mca_coll_base_module_t *module); + int mca_coll_hcoll_gatherv(void* sbuf, int scount, struct ompi_datatype_t *sdtype, void* rbuf, int *rcounts, int *displs, @@ -203,10 +245,10 @@ int mca_coll_hcoll_ibarrier(struct ompi_communicator_t *comm, mca_coll_base_module_t *module); int mca_coll_hcoll_ibcast(void *buff, int count, - struct ompi_datatype_t *datatype, int root, - struct ompi_communicator_t *comm, - ompi_request_t** request, - mca_coll_base_module_t *module); + struct ompi_datatype_t *datatype, int root, + struct ompi_communicator_t *comm, + ompi_request_t** request, + mca_coll_base_module_t *module); int mca_coll_hcoll_iallgather(void *sbuf, int scount, struct ompi_datatype_t *sdtype, @@ -216,14 +258,51 @@ int mca_coll_hcoll_iallgather(void *sbuf, int scount, ompi_request_t** request, mca_coll_base_module_t *module); -int mca_coll_hcoll_iallreduce(void *sbuf, void *rbuf, int count, +int mca_coll_hcoll_iallgatherv(const void *sbuf, int scount, + struct ompi_datatype_t *sdtype, + void *rbuf, const int *rcount, + const int *displs, + struct ompi_datatype_t *rdtype, + struct ompi_communicator_t *comm, + ompi_request_t** request, + mca_coll_base_module_t *module); + +int mca_coll_hcoll_iallreduce(const void *sbuf, void *rbuf, int count, + struct ompi_datatype_t *dtype, + struct ompi_op_t *op, + struct ompi_communicator_t *comm, + ompi_request_t** request, + mca_coll_base_module_t *module); + +int mca_coll_hcoll_ireduce(const void *sbuf, void *rbuf, int count, struct ompi_datatype_t *dtype, struct ompi_op_t *op, + int root, struct ompi_communicator_t *comm, ompi_request_t** request, mca_coll_base_module_t *module); -int mca_coll_hcoll_igatherv(void* sbuf, int scount, +int mca_coll_hcoll_ialltoall(const void *sbuf, int scount, + struct ompi_datatype_t *sdtype, + void* rbuf, int rcount, + struct ompi_datatype_t *rdtype, + struct ompi_communicator_t *comm, + ompi_request_t **req, + mca_coll_base_module_t *module); + +#if HCOLL_API >= HCOLL_VERSION(3,7) +int mca_coll_hcoll_ialltoallv(void *sbuf, int *scounts, + int *sdisps, + struct ompi_datatype_t *sdtype, + void *rbuf, int *rcounts, + int *rdisps, + struct ompi_datatype_t *rdtype, + struct ompi_communicator_t *comm, + ompi_request_t **req, + mca_coll_base_module_t *module); +#endif + +int mca_coll_hcoll_igatherv(const void* sbuf, int scount, struct ompi_datatype_t *sdtype, void* rbuf, int *rcounts, int *displs, struct ompi_datatype_t *rdtype, diff --git a/ompi/mca/coll/hcoll/coll_hcoll_component.c b/ompi/mca/coll/hcoll/coll_hcoll_component.c index 1a9bf793079..95277ec2f7a 100644 --- a/ompi/mca/coll/hcoll/coll_hcoll_component.c +++ b/ompi/mca/coll/hcoll/coll_hcoll_component.c @@ -17,6 +17,7 @@ #include "coll_hcoll.h" #include "opal/mca/installdirs/installdirs.h" +#include "coll_hcoll_dtypes.h" /* * Public string showing the coll ompi_hcol component version number @@ -198,16 +199,24 @@ static int hcoll_register(void) CHECK(reg_int("np",NULL, "Minimal number of processes in the communicator" " for the corresponding hcoll context to be created (default: 32)", - 2, + 2, &mca_coll_hcoll_component.hcoll_np, 0)); CHECK(reg_int("datatype_fallback",NULL, "[1|0|] Enable/Disable user defined dattypes fallback", - 1, + 1, &mca_coll_hcoll_component.hcoll_datatype_fallback, 0)); - +#if HCOLL_API >= HCOLL_VERSION(3,6) + CHECK(reg_int("dts",NULL, + "[1|0|] Enable/Disable derived types support", + 1, + &mca_coll_hcoll_component.derived_types_support_enabled, + 0)); +#else + mca_coll_hcoll_component.derived_types_support_enabled = 0; +#endif mca_coll_hcoll_component.compiletime_version = HCOLL_VERNO_STRING; mca_base_component_var_register(&mca_coll_hcoll_component.super.collm_version, MCA_COMPILETIME_VER, @@ -242,6 +251,8 @@ static int hcoll_open(void) cm->libhcoll_initialized = false; + (void)mca_base_framework_open(&opal_memory_base_framework, 0); + /* Register memory hooks */ if ((OPAL_MEMORY_FREE_SUPPORT | OPAL_MEMORY_MUNMAP_SUPPORT) == ((OPAL_MEMORY_FREE_SUPPORT | OPAL_MEMORY_MUNMAP_SUPPORT) & @@ -278,11 +289,14 @@ static int hcoll_close(void) HCOL_VERBOSE(5,"HCOLL FINALIZE"); rc = hcoll_finalize(); - + OBJ_DESTRUCT(&cm->dtypes); opal_progress_unregister(mca_coll_hcoll_progress); if (HCOLL_SUCCESS != rc){ HCOL_VERBOSE(1,"Hcol library finalize failed"); return OMPI_ERROR; } + + mca_base_framework_close(&opal_memory_base_framework); + return OMPI_SUCCESS; } diff --git a/ompi/mca/coll/hcoll/coll_hcoll_dtypes.h b/ompi/mca/coll/hcoll/coll_hcoll_dtypes.h index c1c83aa4455..f0efb41c4fd 100644 --- a/ompi/mca/coll/hcoll/coll_hcoll_dtypes.h +++ b/ompi/mca/coll/hcoll/coll_hcoll_dtypes.h @@ -6,8 +6,10 @@ It is used to extract allreduce bcol functions where the arrhythmetics has to be done*/ #include "ompi/datatype/ompi_datatype.h" +#include "ompi/datatype/ompi_datatype_internal.h" #include "ompi/mca/op/op.h" -#include "hcoll_dte.h" +#include "hcoll/api/hcoll_dte.h" +extern int hcoll_type_attr_keyval; /*to keep this at hand: Ids of the basic opal_datatypes: #define OPAL_DATATYPE_INT1 4 @@ -25,13 +27,13 @@ #define OPAL_DATATYPE_FLOAT8 16 #define OPAL_DATATYPE_FLOAT12 17 #define OPAL_DATATYPE_FLOAT16 18 +#define OPAL_DATATYPE_FLOAT_COMPLEX 19 +#define OPAL_DATATYPE_DOUBLE_COMPLEX 20 total 15 types */ - - -static dte_data_representation_t* ompi_datatype_2_dte_data_rep[OPAL_DATATYPE_MAX_PREDEFINED] = { +static dte_data_representation_t* ompi_datatype_2_dte_data_rep[OMPI_DATATYPE_MAX_PREDEFINED] = { &DTE_ZERO, /*OPAL_DATATYPE_LOOP 0 */ &DTE_ZERO, /*OPAL_DATATYPE_END_LOOP 1 */ &DTE_ZERO, /*OPAL_DATATYPE_LB 2 */ @@ -51,29 +53,140 @@ static dte_data_representation_t* ompi_datatype_2_dte_data_rep[OPAL_DATATYPE_MAX &DTE_FLOAT64, /*OPAL_DATATYPE_FLOAT8 16 */ &DTE_FLOAT96, /*OPAL_DATATYPE_FLOAT12 17 */ &DTE_FLOAT128, /*OPAL_DATATYPE_FLOAT16 18 */ - &DTE_ZERO, /*OPAL_DATATYPE_COMPLEX8 19 */ - &DTE_ZERO, /*OPAL_DATATYPE_COMPLEX16 20 */ - &DTE_ZERO, /*OPAL_DATATYPE_COMPLEX32 21 */ +#if defined(DTE_FLOAT32_COMPLEX) + &DTE_FLOAT32_COMPLEX, /*OPAL_DATATYPE_COMPLEX8 19 */ +#else + &DTE_ZERO, +#endif +#if defined(DTE_FLOAT64_COMPLEX) + &DTE_FLOAT64_COMPLEX, /*OPAL_DATATYPE_COMPLEX32 20 */ +#else + &DTE_ZERO, +#endif +#if defined(DTE_FLOAT128_COMPLEX) + &DTE_FLOAT128_COMPLEX, /*OPAL_DATATYPE_COMPLEX64 21 */ +#else + &DTE_ZERO, +#endif &DTE_ZERO, /*OPAL_DATATYPE_BOOL 22 */ &DTE_ZERO, /*OPAL_DATATYPE_WCHAR 23 */ &DTE_ZERO /*OPAL_DATATYPE_UNAVAILABLE 24 */ }; -static dte_data_representation_t ompi_dtype_2_dte_dtype(ompi_datatype_t *dtype){ +enum { + TRY_FIND_DERIVED, + NO_DERIVED +}; + + +#if HCOLL_API >= HCOLL_VERSION(3,6) +static inline +int hcoll_map_derived_type(ompi_datatype_t *dtype, dte_data_representation_t *new_dte) +{ + int rc; + if (NULL == dtype->args) { + /* predefined type, shouldn't call this */ + return OMPI_SUCCESS; + } + rc = hcoll_create_mpi_type((void*)dtype, new_dte); + return rc == HCOLL_SUCCESS ? OMPI_SUCCESS : OMPI_ERROR; +} + +static dte_data_representation_t find_derived_mapping(ompi_datatype_t *dtype){ + dte_data_representation_t dte = DTE_ZERO; + mca_coll_hcoll_dtype_t *hcoll_dtype; + if (mca_coll_hcoll_component.derived_types_support_enabled) { + int map_found = 0; + ompi_attr_get_c(dtype->d_keyhash, hcoll_type_attr_keyval, + (void**)&hcoll_dtype, &map_found); + if (!map_found) + hcoll_map_derived_type(dtype, &dte); + else + dte = hcoll_dtype->type; + } + + return dte; +} + + + +static inline dte_data_representation_t +ompi_predefined_derived_2_hcoll(int ompi_id) { + switch(ompi_id) { + case OMPI_DATATYPE_MPI_FLOAT_INT: + return DTE_FLOAT_INT; + case OMPI_DATATYPE_MPI_DOUBLE_INT: + return DTE_DOUBLE_INT; + case OMPI_DATATYPE_MPI_LONG_INT: + return DTE_LONG_INT; + case OMPI_DATATYPE_MPI_SHORT_INT: + return DTE_SHORT_INT; + case OMPI_DATATYPE_MPI_LONG_DOUBLE_INT: + return DTE_LONG_DOUBLE_INT; + case OMPI_DATATYPE_MPI_2INT: + return DTE_2INT; +#if HCOLL_API >= HCOLL_VERSION(3,7) + case OMPI_DATATYPE_MPI_2INTEGER: +#if OMPI_SIZEOF_FORTRAN_INTEGER == 4 + return DTE_2INT; +#elif OMPI_SIZEOF_FORTRAN_INTEGER == 8 + return DTE_2INT64; +#else + return DTE_ZERO; +#endif + case OMPI_DATATYPE_MPI_2REAL: +#if OMPI_SIZEOF_FORTRAN_REAL == 4 + return DTE_2FLOAT32; +#elif OMPI_SIZEOF_FORTRAN_REAL == 8 + return DTE_2FLOAT64; +#else + return DTE_ZERO; +#endif + case OMPI_DATATYPE_MPI_2DBLPREC: +#if OMPI_SIZEOF_FORTRAN_DOUBLE_PRECISION == 4 + return DTE_2FLOAT32; +#elif OMPI_SIZEOF_FORTRAN_DOUBLE_PRECISION == 8 + return DTE_2FLOAT64; +#else + return DTE_ZERO; +#endif +#endif + default: + break; + } + return DTE_ZERO; +} +#endif + +static dte_data_representation_t +ompi_dtype_2_hcoll_dtype( ompi_datatype_t *dtype, + const int mode) +{ int ompi_type_id = dtype->id; int opal_type_id = dtype->super.id; - dte_data_representation_t dte_data_rep; - if (!(dtype->super.flags & OPAL_DATATYPE_FLAG_NO_GAPS)) { - ompi_type_id = -1; + dte_data_representation_t dte_data_rep = DTE_ZERO; + + if (ompi_type_id < OMPI_DATATYPE_MPI_MAX_PREDEFINED && + dtype->super.flags & OMPI_DATATYPE_FLAG_PREDEFINED) { + if (opal_type_id > 0 && opal_type_id < OPAL_DATATYPE_MAX_PREDEFINED) { + dte_data_rep = *ompi_datatype_2_dte_data_rep[opal_type_id]; + } +#if HCOLL_API >= HCOLL_VERSION(3,6) + else if (TRY_FIND_DERIVED == mode){ + dte_data_rep = ompi_predefined_derived_2_hcoll(ompi_type_id); + } + } else { + if (TRY_FIND_DERIVED == mode) + dte_data_rep = find_derived_mapping(dtype); +#endif } - if (OPAL_UNLIKELY( ompi_type_id < 0 || - ompi_type_id >= OPAL_DATATYPE_MAX_PREDEFINED)){ + if (HCOL_DTE_IS_ZERO(dte_data_rep) && TRY_FIND_DERIVED == mode && + !mca_coll_hcoll_component.hcoll_datatype_fallback) { dte_data_rep = DTE_ZERO; dte_data_rep.rep.in_line_rep.data_handle.in_line.in_line = 0; dte_data_rep.rep.in_line_rep.data_handle.pointer_to_handle = (uint64_t ) &dtype->super; - return dte_data_rep; } - return *ompi_datatype_2_dte_data_rep[opal_type_id]; + return dte_data_rep; } static hcoll_dte_op_t* ompi_op_2_hcoll_op[OMPI_OP_BASE_FORTRAN_OP_MAX + 1] = { @@ -101,4 +214,27 @@ static hcoll_dte_op_t* ompi_op_2_hcolrte_op(ompi_op_t *op) { return ompi_op_2_hcoll_op[op->o_f_to_c_index]; } + +#if HCOLL_API >= HCOLL_VERSION(3,6) +static int hcoll_type_attr_del_fn(MPI_Datatype type, int keyval, void *attr_val, void *extra) { + int ret = OMPI_SUCCESS; + mca_coll_hcoll_dtype_t *dtype = + (mca_coll_hcoll_dtype_t*) attr_val; + + assert(dtype); + if (HCOLL_SUCCESS != (ret = hcoll_dt_destroy(dtype->type))) { + HCOL_ERROR("failed to delete type attr: hcoll_dte_destroy returned %d",ret); + return OMPI_ERROR; + } + opal_free_list_return(&mca_coll_hcoll_component.dtypes, + &dtype->super); + + return OMPI_SUCCESS; +} +#else +static int hcoll_type_attr_del_fn(MPI_Datatype type, int keyval, void *attr_val, void *extra) { + /*Do nothing - it's an old version of hcoll w/o dtypes support */ + return OMPI_SUCCESS; +} +#endif #endif /* COLL_HCOLL_DTYPES_H */ diff --git a/ompi/mca/coll/hcoll/coll_hcoll_module.c b/ompi/mca/coll/hcoll/coll_hcoll_module.c index d2623ad1292..699f8c00656 100644 --- a/ompi/mca/coll/hcoll/coll_hcoll_module.c +++ b/ompi/mca/coll/hcoll/coll_hcoll_module.c @@ -1,5 +1,6 @@ /** Copyright (c) 2011 Mellanox Technologies. All rights reserved. + Copyright (c) 2016 IBM Corporation. All rights reserved. $COPYRIGHT$ Additional copyrights may follow @@ -9,8 +10,10 @@ #include "ompi_config.h" #include "coll_hcoll.h" +#include "coll_hcoll_dtypes.h" int hcoll_comm_attr_keyval; +int hcoll_type_attr_keyval; /* * Initial query function that is invoked during MPI_INIT, allowing @@ -41,12 +44,41 @@ static void mca_coll_hcoll_module_clear(mca_coll_hcoll_module_t *hcoll_module) hcoll_module->previous_alltoall = NULL; hcoll_module->previous_alltoallv = NULL; hcoll_module->previous_alltoallw = NULL; + hcoll_module->previous_reduce = NULL; hcoll_module->previous_reduce_scatter = NULL; hcoll_module->previous_ibarrier = NULL; hcoll_module->previous_ibcast = NULL; hcoll_module->previous_iallreduce = NULL; hcoll_module->previous_iallgather = NULL; + hcoll_module->previous_iallgatherv = NULL; hcoll_module->previous_igatherv = NULL; + hcoll_module->previous_ireduce = NULL; + hcoll_module->previous_ialltoall = NULL; + hcoll_module->previous_ialltoallv = NULL; + + hcoll_module->previous_barrier_module = NULL; + hcoll_module->previous_bcast_module = NULL; + hcoll_module->previous_allreduce_module = NULL; + hcoll_module->previous_reduce_module = NULL; + hcoll_module->previous_allgather_module = NULL; + hcoll_module->previous_allgatherv_module = NULL; + hcoll_module->previous_gather_module = NULL; + hcoll_module->previous_gatherv_module = NULL; + hcoll_module->previous_alltoall_module = NULL; + hcoll_module->previous_alltoallv_module = NULL; + hcoll_module->previous_alltoallw_module = NULL; + hcoll_module->previous_reduce_scatter_module = NULL; + hcoll_module->previous_ibarrier_module = NULL; + hcoll_module->previous_ibcast_module = NULL; + hcoll_module->previous_iallreduce_module = NULL; + hcoll_module->previous_ireduce_module = NULL; + hcoll_module->previous_iallgather_module = NULL; + hcoll_module->previous_iallgatherv_module = NULL; + hcoll_module->previous_igatherv_module = NULL; + hcoll_module->previous_ialltoall_module = NULL; + hcoll_module->previous_ialltoallv_module = NULL; + + } static void mca_coll_hcoll_module_construct(mca_coll_hcoll_module_t *hcoll_module) @@ -60,6 +92,8 @@ void mca_coll_hcoll_mem_release_cb(void *buf, size_t length, hcoll_mem_unmap(buf, length, cbdata, from_alloc); } +#define OBJ_RELEASE_IF_NOT_NULL( obj ) if( NULL != (obj) ) OBJ_RELEASE( obj ); + static void mca_coll_hcoll_module_destruct(mca_coll_hcoll_module_t *hcoll_module) { int context_destroyed; @@ -76,33 +110,40 @@ static void mca_coll_hcoll_module_destruct(mca_coll_hcoll_module_t *hcoll_module destroy hcoll context*/ if (hcoll_module->hcoll_context != NULL){ - OBJ_RELEASE(hcoll_module->previous_barrier_module); - OBJ_RELEASE(hcoll_module->previous_bcast_module); - OBJ_RELEASE(hcoll_module->previous_allreduce_module); - OBJ_RELEASE(hcoll_module->previous_allgather_module); - OBJ_RELEASE(hcoll_module->previous_gatherv_module); - - OBJ_RELEASE(hcoll_module->previous_ibarrier_module); - OBJ_RELEASE(hcoll_module->previous_ibcast_module); - OBJ_RELEASE(hcoll_module->previous_iallreduce_module); - OBJ_RELEASE(hcoll_module->previous_iallgather_module); - OBJ_RELEASE(hcoll_module->previous_igatherv_module); + OBJ_RELEASE_IF_NOT_NULL(hcoll_module->previous_barrier_module); + OBJ_RELEASE_IF_NOT_NULL(hcoll_module->previous_bcast_module); + OBJ_RELEASE_IF_NOT_NULL(hcoll_module->previous_allreduce_module); + OBJ_RELEASE_IF_NOT_NULL(hcoll_module->previous_allgather_module); + OBJ_RELEASE_IF_NOT_NULL(hcoll_module->previous_allgatherv_module); + OBJ_RELEASE_IF_NOT_NULL(hcoll_module->previous_gatherv_module); + OBJ_RELEASE_IF_NOT_NULL(hcoll_module->previous_alltoall_module); + OBJ_RELEASE_IF_NOT_NULL(hcoll_module->previous_alltoallv_module); + OBJ_RELEASE_IF_NOT_NULL(hcoll_module->previous_reduce_module); + + OBJ_RELEASE_IF_NOT_NULL(hcoll_module->previous_ibarrier_module); + OBJ_RELEASE_IF_NOT_NULL(hcoll_module->previous_ibcast_module); + OBJ_RELEASE_IF_NOT_NULL(hcoll_module->previous_iallreduce_module); + OBJ_RELEASE_IF_NOT_NULL(hcoll_module->previous_iallgather_module); + OBJ_RELEASE_IF_NOT_NULL(hcoll_module->previous_iallgatherv_module); + OBJ_RELEASE_IF_NOT_NULL(hcoll_module->previous_igatherv_module); + OBJ_RELEASE_IF_NOT_NULL(hcoll_module->previous_ialltoall_module); + OBJ_RELEASE_IF_NOT_NULL(hcoll_module->previous_ialltoallv_module); + OBJ_RELEASE_IF_NOT_NULL(hcoll_module->previous_ireduce_module); /* OBJ_RELEASE(hcoll_module->previous_allgatherv_module); OBJ_RELEASE(hcoll_module->previous_gather_module); OBJ_RELEASE(hcoll_module->previous_gatherv_module); - OBJ_RELEASE(hcoll_module->previous_alltoall_module); - OBJ_RELEASE(hcoll_module->previous_alltoallv_module); OBJ_RELEASE(hcoll_module->previous_alltoallw_module); OBJ_RELEASE(hcoll_module->previous_reduce_scatter_module); OBJ_RELEASE(hcoll_module->previous_reduce_module); */ - +#if !defined(HAVE_HCOLL_CONTEXT_FREE) context_destroyed = 0; hcoll_destroy_context(hcoll_module->hcoll_context, (rte_grp_handle_t)hcoll_module->comm, &context_destroyed); +#endif } mca_coll_hcoll_module_clear(hcoll_module); } @@ -125,14 +166,22 @@ static int mca_coll_hcoll_save_coll_handlers(mca_coll_hcoll_module_t *hcoll_modu HCOL_SAVE_PREV_COLL_API(barrier); HCOL_SAVE_PREV_COLL_API(bcast); HCOL_SAVE_PREV_COLL_API(allreduce); + HCOL_SAVE_PREV_COLL_API(reduce); HCOL_SAVE_PREV_COLL_API(allgather); + HCOL_SAVE_PREV_COLL_API(allgatherv); HCOL_SAVE_PREV_COLL_API(gatherv); + HCOL_SAVE_PREV_COLL_API(alltoall); + HCOL_SAVE_PREV_COLL_API(alltoallv); HCOL_SAVE_PREV_COLL_API(ibarrier); HCOL_SAVE_PREV_COLL_API(ibcast); HCOL_SAVE_PREV_COLL_API(iallreduce); + HCOL_SAVE_PREV_COLL_API(ireduce); HCOL_SAVE_PREV_COLL_API(iallgather); + HCOL_SAVE_PREV_COLL_API(iallgatherv); HCOL_SAVE_PREV_COLL_API(igatherv); + HCOL_SAVE_PREV_COLL_API(ialltoall); + HCOL_SAVE_PREV_COLL_API(ialltoallv); /* These collectives are not yet part of hcoll, so @@ -141,8 +190,6 @@ static int mca_coll_hcoll_save_coll_handlers(mca_coll_hcoll_module_t *hcoll_modu HCOL_SAVE_PREV_COLL_API(gather); HCOL_SAVE_PREV_COLL_API(reduce); HCOL_SAVE_PREV_COLL_API(allgatherv); - HCOL_SAVE_PREV_COLL_API(alltoall); - HCOL_SAVE_PREV_COLL_API(alltoallv); HCOL_SAVE_PREV_COLL_API(alltoallw); */ return OMPI_SUCCESS; @@ -159,7 +206,11 @@ int hcoll_comm_attr_del_fn(MPI_Comm comm, int keyval, void *attr_val, void *extr mca_coll_hcoll_module_t *hcoll_module; hcoll_module = (mca_coll_hcoll_module_t*) attr_val; +#ifdef HAVE_HCOLL_CONTEXT_FREE + hcoll_context_free(hcoll_module->hcoll_context, (rte_grp_handle_t)comm); +#else hcoll_group_destroy_notify(hcoll_module->hcoll_context); +#endif return OMPI_SUCCESS; } @@ -196,6 +247,10 @@ int mca_coll_hcoll_progress(void) } +OBJ_CLASS_INSTANCE(mca_coll_hcoll_dtype_t, + opal_free_list_item_t, + NULL,NULL); + /* * Invoked when there's a new communicator that has been created. * Look at the communicator and decide which set of functions and @@ -273,6 +328,24 @@ mca_coll_hcoll_comm_query(struct ompi_communicator_t *comm, int *priority) HCOL_ERROR("Hcol comm keyval create failed"); return NULL; } + + if (mca_coll_hcoll_component.derived_types_support_enabled) { + copy_fn.attr_datatype_copy_fn = (MPI_Type_internal_copy_attr_function *) MPI_TYPE_NULL_COPY_FN; + del_fn.attr_datatype_delete_fn = hcoll_type_attr_del_fn; + err = ompi_attr_create_keyval(TYPE_ATTR, copy_fn, del_fn, &hcoll_type_attr_keyval, NULL ,0, NULL); + if (OMPI_SUCCESS != err) { + cm->hcoll_enable = 0; + hcoll_finalize(); + opal_progress_unregister(mca_coll_hcoll_progress); + HCOL_ERROR("Hcol type keyval create failed"); + return NULL; + } + } + OBJ_CONSTRUCT(&cm->dtypes, opal_free_list_t); + opal_free_list_init(&cm->dtypes, sizeof(mca_coll_hcoll_dtype_t), + 8, OBJ_CLASS(mca_coll_hcoll_dtype_t), 0, 0, + 32, -1, 32, NULL, 0, NULL, NULL, NULL); + } hcoll_module = OBJ_NEW(mca_coll_hcoll_module_t); @@ -308,16 +381,34 @@ mca_coll_hcoll_comm_query(struct ompi_communicator_t *comm, int *priority) hcoll_module->super.coll_barrier = hcoll_collectives.coll_barrier ? mca_coll_hcoll_barrier : NULL; hcoll_module->super.coll_bcast = hcoll_collectives.coll_bcast ? mca_coll_hcoll_bcast : NULL; hcoll_module->super.coll_allgather = hcoll_collectives.coll_allgather ? mca_coll_hcoll_allgather : NULL; + hcoll_module->super.coll_allgatherv = hcoll_collectives.coll_allgatherv ? mca_coll_hcoll_allgatherv : NULL; hcoll_module->super.coll_allreduce = hcoll_collectives.coll_allreduce ? mca_coll_hcoll_allreduce : NULL; - hcoll_module->super.coll_alltoall = /*hcoll_collectives.coll_alltoall ? mca_coll_hcoll_alltoall : */ NULL; + hcoll_module->super.coll_alltoall = hcoll_collectives.coll_alltoall ? mca_coll_hcoll_alltoall : NULL; + hcoll_module->super.coll_alltoallv = hcoll_collectives.coll_alltoallv ? mca_coll_hcoll_alltoallv : NULL; hcoll_module->super.coll_gatherv = hcoll_collectives.coll_gatherv ? mca_coll_hcoll_gatherv : NULL; + hcoll_module->super.coll_reduce = hcoll_collectives.coll_reduce ? mca_coll_hcoll_reduce : NULL; hcoll_module->super.coll_ibarrier = hcoll_collectives.coll_ibarrier ? mca_coll_hcoll_ibarrier : NULL; hcoll_module->super.coll_ibcast = hcoll_collectives.coll_ibcast ? mca_coll_hcoll_ibcast : NULL; hcoll_module->super.coll_iallgather = hcoll_collectives.coll_iallgather ? mca_coll_hcoll_iallgather : NULL; +#if HCOLL_API >= HCOLL_VERSION(3,5) + hcoll_module->super.coll_iallgatherv = hcoll_collectives.coll_iallgatherv ? mca_coll_hcoll_iallgatherv : NULL; +#else + hcoll_module->super.coll_iallgatherv = NULL; +#endif hcoll_module->super.coll_iallreduce = hcoll_collectives.coll_iallreduce ? mca_coll_hcoll_iallreduce : NULL; +#if HCOLL_API >= HCOLL_VERSION(3,5) + hcoll_module->super.coll_ireduce = hcoll_collectives.coll_ireduce ? mca_coll_hcoll_ireduce : NULL; +#else + hcoll_module->super.coll_ireduce = NULL; +#endif hcoll_module->super.coll_gather = /*hcoll_collectives.coll_gather ? mca_coll_hcoll_gather :*/ NULL; hcoll_module->super.coll_igatherv = hcoll_collectives.coll_igatherv ? mca_coll_hcoll_igatherv : NULL; - + hcoll_module->super.coll_ialltoall = /*hcoll_collectives.coll_ialltoall ? mca_coll_hcoll_ialltoall : */ NULL; +#if HCOLL_API >= HCOLL_VERSION(3,7) + hcoll_module->super.coll_ialltoallv = hcoll_collectives.coll_ialltoallv ? mca_coll_hcoll_ialltoallv : NULL; +#else + hcoll_module->super.coll_ialltoallv = NULL; +#endif *priority = cm->hcoll_priority; module = &hcoll_module->super; diff --git a/ompi/mca/coll/hcoll/coll_hcoll_ops.c b/ompi/mca/coll/hcoll/coll_hcoll_ops.c index 266f1e2103b..c8494105070 100644 --- a/ompi/mca/coll/hcoll/coll_hcoll_ops.c +++ b/ompi/mca/coll/hcoll/coll_hcoll_ops.c @@ -1,5 +1,7 @@ /** - Copyright (c) 2011 Mellanox Technologies. All rights reserved. + Copyright (c) 2011 Mellanox Technologies. All rights reserved. + Copyright (c) 2015 Research Organization for Information Science + and Technology (RIST). All rights reserved. $COPYRIGHT$ Additional copyrights may follow @@ -10,20 +12,27 @@ #include "ompi_config.h" #include "ompi/constants.h" #include "coll_hcoll.h" -#include "hcoll_constants.h" +#include "hcoll/api/hcoll_constants.h" #include "coll_hcoll_dtypes.h" -#include "hcoll_dte.h" +#include "hcoll/api/hcoll_dte.h" int mca_coll_hcoll_barrier(struct ompi_communicator_t *comm, mca_coll_base_module_t *module){ int rc; - HCOL_VERBOSE(20,"RUNNING HCOL BARRIER"); mca_coll_hcoll_module_t *hcoll_module = (mca_coll_hcoll_module_t*)module; + HCOL_VERBOSE(20,"RUNNING HCOL BARRIER"); + + if (OPAL_UNLIKELY(ompi_mpi_finalize_started)) { + HCOL_VERBOSE(5, "In finalize, reverting to previous barrier"); + goto orig_barrier; + } rc = hcoll_collectives.coll_barrier(hcoll_module->hcoll_context); if (HCOLL_SUCCESS != rc){ HCOL_VERBOSE(20,"RUNNING FALLBACK BARRIER"); rc = hcoll_module->previous_barrier(comm,hcoll_module->previous_barrier_module); } return rc; +orig_barrier: + return hcoll_module->previous_barrier(comm,hcoll_module->previous_barrier_module); } int mca_coll_hcoll_bcast(void *buff, int count, @@ -35,9 +44,9 @@ int mca_coll_hcoll_bcast(void *buff, int count, int rc; HCOL_VERBOSE(20,"RUNNING HCOL BCAST"); mca_coll_hcoll_module_t *hcoll_module = (mca_coll_hcoll_module_t*)module; - dtype = ompi_dtype_2_dte_dtype(datatype); - if (OPAL_UNLIKELY((HCOL_DTE_IS_ZERO(dtype) || HCOL_DTE_IS_COMPLEX(dtype))) - && mca_coll_hcoll_component.hcoll_datatype_fallback){ + dtype = ompi_dtype_2_hcoll_dtype(datatype, TRY_FIND_DERIVED); + + if (OPAL_UNLIKELY(HCOL_DTE_IS_ZERO(dtype))) { /*If we are here then datatype is not simple predefined datatype */ /*In future we need to add more complex mapping to the dte_data_representation_t */ /* Now use fallback */ @@ -67,11 +76,12 @@ int mca_coll_hcoll_allgather(void *sbuf, int scount, int rc; HCOL_VERBOSE(20,"RUNNING HCOL ALLGATHER"); mca_coll_hcoll_module_t *hcoll_module = (mca_coll_hcoll_module_t*)module; - stype = ompi_dtype_2_dte_dtype(sdtype); - rtype = ompi_dtype_2_dte_dtype(rdtype); - if (OPAL_UNLIKELY((HCOL_DTE_IS_ZERO(stype) || HCOL_DTE_IS_ZERO(rtype) - || HCOL_DTE_IS_COMPLEX(stype) || HCOL_DTE_IS_COMPLEX(rtype))) - && mca_coll_hcoll_component.hcoll_datatype_fallback){ + stype = ompi_dtype_2_hcoll_dtype(sdtype, TRY_FIND_DERIVED); + rtype = ompi_dtype_2_hcoll_dtype(rdtype, TRY_FIND_DERIVED); + if (sbuf == MPI_IN_PLACE) { + stype = rtype; + } + if (OPAL_UNLIKELY(HCOL_DTE_IS_ZERO(stype) || HCOL_DTE_IS_ZERO(rtype))) { /*If we are here then datatype is not simple predefined datatype */ /*In future we need to add more complex mapping to the dte_data_representation_t */ /* Now use fallback */ @@ -95,7 +105,50 @@ int mca_coll_hcoll_allgather(void *sbuf, int scount, return rc; } -int mca_coll_hcoll_gather(void *sbuf, int scount, +int mca_coll_hcoll_allgatherv(const void *sbuf, int scount, + struct ompi_datatype_t *sdtype, + void *rbuf, const int *rcount, + const int *displs, + struct ompi_datatype_t *rdtype, + struct ompi_communicator_t *comm, + mca_coll_base_module_t *module) +{ + dte_data_representation_t stype; + dte_data_representation_t rtype; + int rc; + HCOL_VERBOSE(20,"RUNNING HCOL ALLGATHERV"); + mca_coll_hcoll_module_t *hcoll_module = (mca_coll_hcoll_module_t*)module; + stype = ompi_dtype_2_hcoll_dtype(sdtype, NO_DERIVED); + rtype = ompi_dtype_2_hcoll_dtype(rdtype, NO_DERIVED); + if (OPAL_UNLIKELY(HCOL_DTE_IS_ZERO(stype) || HCOL_DTE_IS_ZERO(rtype))) { + /*If we are here then datatype is not simple predefined datatype */ + /*In future we need to add more complex mapping to the dte_data_representation_t */ + /* Now use fallback */ + HCOL_VERBOSE(20,"Ompi_datatype is not supported: sdtype = %s, rdtype = %s; calling fallback allgatherv;", + sdtype->super.name, + rdtype->super.name); + rc = hcoll_module->previous_allgatherv(sbuf,scount,sdtype, + rbuf,rcount, + displs, + rdtype, + comm, + hcoll_module->previous_allgatherv_module); + return rc; + } + rc = hcoll_collectives.coll_allgatherv((void *)sbuf,scount,stype,rbuf,rcount,displs,rtype,hcoll_module->hcoll_context); + if (HCOLL_SUCCESS != rc){ + HCOL_VERBOSE(20,"RUNNING FALLBACK ALLGATHERV"); + rc = hcoll_module->previous_allgatherv(sbuf,scount,sdtype, + rbuf,rcount, + displs, + rdtype, + comm, + hcoll_module->previous_allgatherv_module); + } + return rc; +} + +int mca_coll_hcoll_gather(const void *sbuf, int scount, struct ompi_datatype_t *sdtype, void *rbuf, int rcount, struct ompi_datatype_t *rdtype, @@ -107,11 +160,9 @@ int mca_coll_hcoll_gather(void *sbuf, int scount, int rc; HCOL_VERBOSE(20,"RUNNING HCOL GATHER"); mca_coll_hcoll_module_t *hcoll_module = (mca_coll_hcoll_module_t*)module; - stype = ompi_dtype_2_dte_dtype(sdtype); - rtype = ompi_dtype_2_dte_dtype(rdtype); - if (OPAL_UNLIKELY((HCOL_DTE_IS_ZERO(stype) || HCOL_DTE_IS_ZERO(rtype) - || HCOL_DTE_IS_COMPLEX(stype) || HCOL_DTE_IS_COMPLEX(rtype))) - && mca_coll_hcoll_component.hcoll_datatype_fallback){ + stype = ompi_dtype_2_hcoll_dtype(sdtype, NO_DERIVED); + rtype = ompi_dtype_2_hcoll_dtype(rdtype, NO_DERIVED); + if (OPAL_UNLIKELY(HCOL_DTE_IS_ZERO(stype) || HCOL_DTE_IS_ZERO(rtype))) { /*If we are here then datatype is not simple predefined datatype */ /*In future we need to add more complex mapping to the dte_data_representation_t */ /* Now use fallback */ @@ -147,9 +198,8 @@ int mca_coll_hcoll_allreduce(void *sbuf, void *rbuf, int count, int rc; HCOL_VERBOSE(20,"RUNNING HCOL ALLREDUCE"); mca_coll_hcoll_module_t *hcoll_module = (mca_coll_hcoll_module_t*)module; - Dtype = ompi_dtype_2_dte_dtype(dtype); - if (OPAL_UNLIKELY((HCOL_DTE_IS_ZERO(Dtype) || HCOL_DTE_IS_COMPLEX(Dtype))) - && mca_coll_hcoll_component.hcoll_datatype_fallback){ + Dtype = ompi_dtype_2_hcoll_dtype(dtype, NO_DERIVED); + if (OPAL_UNLIKELY(HCOL_DTE_IS_ZERO(Dtype))){ /*If we are here then datatype is not simple predefined datatype */ /*In future we need to add more complex mapping to the dte_data_representation_t */ /* Now use fallback */ @@ -184,7 +234,58 @@ int mca_coll_hcoll_allreduce(void *sbuf, void *rbuf, int count, return rc; } -int mca_coll_hcoll_alltoall(void *sbuf, int scount, +int mca_coll_hcoll_reduce(const void *sbuf, void *rbuf, int count, + struct ompi_datatype_t *dtype, + struct ompi_op_t *op, + int root, + struct ompi_communicator_t *comm, + mca_coll_base_module_t *module) +{ + dte_data_representation_t Dtype; + hcoll_dte_op_t *Op; + int rc; + HCOL_VERBOSE(20,"RUNNING HCOL REDUCE"); + mca_coll_hcoll_module_t *hcoll_module = (mca_coll_hcoll_module_t*)module; + Dtype = ompi_dtype_2_hcoll_dtype(dtype, NO_DERIVED); + if (OPAL_UNLIKELY(HCOL_DTE_IS_ZERO(Dtype))){ + /*If we are here then datatype is not simple predefined datatype */ + /*In future we need to add more complex mapping to the dte_data_representation_t */ + /* Now use fallback */ + HCOL_VERBOSE(20,"Ompi_datatype is not supported: dtype = %s; calling fallback reduce;", + dtype->super.name); + rc = hcoll_module->previous_reduce(sbuf,rbuf, + count,dtype,op, + root, + comm, hcoll_module->previous_reduce_module); + return rc; + } + + Op = ompi_op_2_hcolrte_op(op); + if (OPAL_UNLIKELY(HCOL_DTE_OP_NULL == Op->id)){ + /*If we are here then datatype is not simple predefined datatype */ + /*In future we need to add more complex mapping to the dte_data_representation_t */ + /* Now use fallback */ + HCOL_VERBOSE(20,"ompi_op_t is not supported: op = %s; calling fallback reduce;", + op->o_name); + rc = hcoll_module->previous_reduce(sbuf,rbuf, + count,dtype,op, + root, + comm, hcoll_module->previous_reduce_module); + return rc; + } + + rc = hcoll_collectives.coll_reduce((void *)sbuf,rbuf,count,Dtype,Op,root,hcoll_module->hcoll_context); + if (HCOLL_SUCCESS != rc){ + HCOL_VERBOSE(20,"RUNNING FALLBACK REDUCE"); + rc = hcoll_module->previous_reduce(sbuf,rbuf, + count,dtype,op, + root, + comm, hcoll_module->previous_reduce_module); + } + return rc; +} + +int mca_coll_hcoll_alltoall(const void *sbuf, int scount, struct ompi_datatype_t *sdtype, void* rbuf, int rcount, struct ompi_datatype_t *rdtype, @@ -196,11 +297,9 @@ int mca_coll_hcoll_alltoall(void *sbuf, int scount, int rc; HCOL_VERBOSE(20,"RUNNING HCOL ALLTOALL"); mca_coll_hcoll_module_t *hcoll_module = (mca_coll_hcoll_module_t*)module; - stype = ompi_dtype_2_dte_dtype(sdtype); - rtype = ompi_dtype_2_dte_dtype(rdtype); - if (OPAL_UNLIKELY((HCOL_DTE_IS_ZERO(stype) || HCOL_DTE_IS_ZERO(rtype) - || HCOL_DTE_IS_COMPLEX(stype) || HCOL_DTE_IS_COMPLEX(rtype))) - && mca_coll_hcoll_component.hcoll_datatype_fallback){ + stype = ompi_dtype_2_hcoll_dtype(sdtype, NO_DERIVED); + rtype = ompi_dtype_2_hcoll_dtype(rdtype, NO_DERIVED); + if (OPAL_UNLIKELY(HCOL_DTE_IS_ZERO(stype) || HCOL_DTE_IS_ZERO(rtype))) { /*If we are here then datatype is not simple predefined datatype */ /*In future we need to add more complex mapping to the dte_data_representation_t */ /* Now use fallback */ @@ -224,6 +323,41 @@ int mca_coll_hcoll_alltoall(void *sbuf, int scount, return rc; } +int mca_coll_hcoll_alltoallv(void *sbuf, int *scounts, int *sdisps, + struct ompi_datatype_t *sdtype, + void *rbuf, int *rcounts, int *rdisps, + struct ompi_datatype_t *rdtype, + struct ompi_communicator_t *comm, + mca_coll_base_module_t *module) +{ + dte_data_representation_t stype; + dte_data_representation_t rtype; + int rc; + HCOL_VERBOSE(20,"RUNNING HCOL ALLTOALLV"); + mca_coll_hcoll_module_t *hcoll_module = (mca_coll_hcoll_module_t*)module; + stype = ompi_dtype_2_hcoll_dtype(sdtype, NO_DERIVED); + rtype = ompi_dtype_2_hcoll_dtype(rdtype, NO_DERIVED); + if (OPAL_UNLIKELY(HCOL_DTE_IS_ZERO(stype) || HCOL_DTE_IS_ZERO(rtype))) { + HCOL_VERBOSE(20,"Ompi_datatype is not supported: sdtype = %s, rdtype = %s; calling fallback alltoallv;", + sdtype->super.name, + rdtype->super.name); + rc = hcoll_module->previous_alltoallv(sbuf, scounts, sdisps, sdtype, + rbuf, rcounts, rdisps, rdtype, + comm, hcoll_module->previous_alltoallv_module); + return rc; + } + rc = hcoll_collectives.coll_alltoallv(sbuf, scounts, sdisps, stype, + rbuf, rcounts, rdisps, rtype, + hcoll_module->hcoll_context); + if (HCOLL_SUCCESS != rc){ + HCOL_VERBOSE(20,"RUNNING FALLBACK ALLTOALLV"); + rc = hcoll_module->previous_alltoallv(sbuf, scounts, sdisps, sdtype, + rbuf, rcounts, rdisps, rdtype, + comm, hcoll_module->previous_alltoallv_module); + } + return rc; +} + int mca_coll_hcoll_gatherv(void* sbuf, int scount, struct ompi_datatype_t *sdtype, void* rbuf, int *rcounts, int *displs, @@ -237,11 +371,9 @@ int mca_coll_hcoll_gatherv(void* sbuf, int scount, int rc; HCOL_VERBOSE(20,"RUNNING HCOL GATHERV"); mca_coll_hcoll_module_t *hcoll_module = (mca_coll_hcoll_module_t*)module; - stype = ompi_dtype_2_dte_dtype(sdtype); - rtype = ompi_dtype_2_dte_dtype(rdtype); - if (OPAL_UNLIKELY((HCOL_DTE_IS_ZERO(stype) || HCOL_DTE_IS_ZERO(rtype) - || HCOL_DTE_IS_COMPLEX(stype) || HCOL_DTE_IS_COMPLEX(rtype))) - && mca_coll_hcoll_component.hcoll_datatype_fallback){ + stype = ompi_dtype_2_hcoll_dtype(sdtype, NO_DERIVED); + rtype = ompi_dtype_2_hcoll_dtype(rdtype, NO_DERIVED); + if (OPAL_UNLIKELY(HCOL_DTE_IS_ZERO(stype) || HCOL_DTE_IS_ZERO(rtype))) { /*If we are here then datatype is not simple predefined datatype */ /*In future we need to add more complex mapping to the dte_data_representation_t */ /* Now use fallback */ @@ -293,9 +425,8 @@ int mca_coll_hcoll_ibcast(void *buff, int count, HCOL_VERBOSE(20,"RUNNING HCOL NON-BLOCKING BCAST"); mca_coll_hcoll_module_t *hcoll_module = (mca_coll_hcoll_module_t*)module; rt_handle = (void**) request; - dtype = ompi_dtype_2_dte_dtype(datatype); - if (OPAL_UNLIKELY((HCOL_DTE_IS_ZERO(dtype) || HCOL_DTE_IS_COMPLEX(dtype))) - && mca_coll_hcoll_component.hcoll_datatype_fallback){ + dtype = ompi_dtype_2_hcoll_dtype(datatype, TRY_FIND_DERIVED); + if (OPAL_UNLIKELY(HCOL_DTE_IS_ZERO(dtype))){ /*If we are here then datatype is not simple predefined datatype */ /*In future we need to add more complex mapping to the dte_data_representation_t */ /* Now use fallback */ @@ -328,11 +459,9 @@ int mca_coll_hcoll_iallgather(void *sbuf, int scount, HCOL_VERBOSE(20,"RUNNING HCOL NON-BLOCKING ALLGATHER"); mca_coll_hcoll_module_t *hcoll_module = (mca_coll_hcoll_module_t*)module; rt_handle = (void**) request; - stype = ompi_dtype_2_dte_dtype(sdtype); - rtype = ompi_dtype_2_dte_dtype(rdtype); - if (OPAL_UNLIKELY((HCOL_DTE_IS_ZERO(stype) || HCOL_DTE_IS_ZERO(rtype) - || HCOL_DTE_IS_COMPLEX(stype) || HCOL_DTE_IS_COMPLEX(rtype))) - && mca_coll_hcoll_component.hcoll_datatype_fallback){ + stype = ompi_dtype_2_hcoll_dtype(sdtype, TRY_FIND_DERIVED); + rtype = ompi_dtype_2_hcoll_dtype(rdtype, TRY_FIND_DERIVED); + if (OPAL_UNLIKELY(HCOL_DTE_IS_ZERO(stype) || HCOL_DTE_IS_ZERO(rtype))) { /*If we are here then datatype is not simple predefined datatype */ /*In future we need to add more complex mapping to the dte_data_representation_t */ /* Now use fallback */ @@ -358,7 +487,56 @@ int mca_coll_hcoll_iallgather(void *sbuf, int scount, return rc; } -int mca_coll_hcoll_iallreduce(void *sbuf, void *rbuf, int count, +#if HCOLL_API >= HCOLL_VERSION(3,5) +int mca_coll_hcoll_iallgatherv(const void *sbuf, int scount, + struct ompi_datatype_t *sdtype, + void *rbuf, const int *rcount, + const int *displs, + struct ompi_datatype_t *rdtype, + struct ompi_communicator_t *comm, + ompi_request_t ** request, + mca_coll_base_module_t *module) +{ + dte_data_representation_t stype; + dte_data_representation_t rtype; + int rc; + HCOL_VERBOSE(20,"RUNNING HCOL NON-BLOCKING ALLGATHERV"); + mca_coll_hcoll_module_t *hcoll_module = (mca_coll_hcoll_module_t*)module; + stype = ompi_dtype_2_hcoll_dtype(sdtype, NO_DERIVED); + rtype = ompi_dtype_2_hcoll_dtype(rdtype, NO_DERIVED); + void **rt_handle = (void **) request; + if (OPAL_UNLIKELY(HCOL_DTE_IS_ZERO(stype) || HCOL_DTE_IS_ZERO(rtype))) { + /*If we are here then datatype is not simple predefined datatype */ + /*In future we need to add more complex mapping to the dte_data_representation_t */ + /* Now use fallback */ + HCOL_VERBOSE(20,"Ompi_datatype is not supported: sdtype = %s, rdtype = %s; calling fallback non-blocking allgatherv;", + sdtype->super.name, + rdtype->super.name); + rc = hcoll_module->previous_iallgatherv(sbuf,scount,sdtype, + rbuf,rcount, + displs, + rdtype, + comm, + request, + hcoll_module->previous_iallgatherv_module); + return rc; + } + rc = hcoll_collectives.coll_iallgatherv((void *)sbuf,scount,stype,rbuf,rcount,displs,rtype, + hcoll_module->hcoll_context, rt_handle); + if (HCOLL_SUCCESS != rc){ + HCOL_VERBOSE(20,"RUNNING FALLBACK NON-BLOCKING ALLGATHER"); + rc = hcoll_module->previous_iallgatherv(sbuf,scount,sdtype, + rbuf,rcount, + displs, + rdtype, + comm, + request, + hcoll_module->previous_iallgatherv_module); + } + return rc; +} +#endif +int mca_coll_hcoll_iallreduce(const void *sbuf, void *rbuf, int count, struct ompi_datatype_t *dtype, struct ompi_op_t *op, struct ompi_communicator_t *comm, @@ -372,9 +550,8 @@ int mca_coll_hcoll_iallreduce(void *sbuf, void *rbuf, int count, HCOL_VERBOSE(20,"RUNNING HCOL NON-BLOCKING ALLREDUCE"); mca_coll_hcoll_module_t *hcoll_module = (mca_coll_hcoll_module_t*)module; rt_handle = (void**) request; - Dtype = ompi_dtype_2_dte_dtype(dtype); - if (OPAL_UNLIKELY((HCOL_DTE_IS_ZERO(Dtype) || HCOL_DTE_IS_COMPLEX(Dtype))) - && mca_coll_hcoll_component.hcoll_datatype_fallback){ + Dtype = ompi_dtype_2_hcoll_dtype(dtype, NO_DERIVED); + if (OPAL_UNLIKELY(HCOL_DTE_IS_ZERO(Dtype))){ /*If we are here then datatype is not simple predefined datatype */ /*In future we need to add more complex mapping to the dte_data_representation_t */ /* Now use fallback */ @@ -408,8 +585,64 @@ int mca_coll_hcoll_iallreduce(void *sbuf, void *rbuf, int count, } return rc; } +#if HCOLL_API >= HCOLL_VERSION(3,5) +int mca_coll_hcoll_ireduce(const void *sbuf, void *rbuf, int count, + struct ompi_datatype_t *dtype, + struct ompi_op_t *op, + int root, + struct ompi_communicator_t *comm, + ompi_request_t ** request, + mca_coll_base_module_t *module) +{ + dte_data_representation_t Dtype; + hcoll_dte_op_t *Op; + int rc; + HCOL_VERBOSE(20,"RUNNING HCOL NON-BLOCKING REDUCE"); + mca_coll_hcoll_module_t *hcoll_module = (mca_coll_hcoll_module_t*)module; + Dtype = ompi_dtype_2_hcoll_dtype(dtype, NO_DERIVED); + void **rt_handle = (void**) request; + if (OPAL_UNLIKELY(HCOL_DTE_IS_ZERO(Dtype))){ + /*If we are here then datatype is not simple predefined datatype */ + /*In future we need to add more complex mapping to the dte_data_representation_t */ + /* Now use fallback */ + HCOL_VERBOSE(20,"Ompi_datatype is not supported: dtype = %s; calling fallback non-blocking reduce;", + dtype->super.name); + rc = hcoll_module->previous_ireduce(sbuf,rbuf,count,dtype,op, + root, + comm, request, + hcoll_module->previous_ireduce_module); + return rc; + } -int mca_coll_hcoll_igatherv(void* sbuf, int scount, + Op = ompi_op_2_hcolrte_op(op); + if (OPAL_UNLIKELY(HCOL_DTE_OP_NULL == Op->id)){ + /*If we are here then datatype is not simple predefined datatype */ + /*In future we need to add more complex mapping to the dte_data_representation_t */ + /* Now use fallback */ + HCOL_VERBOSE(20,"ompi_op_t is not supported: op = %s; calling fallback non-blocking reduce;", + op->o_name); + rc = hcoll_module->previous_ireduce(sbuf,rbuf, + count,dtype,op, + root, + comm, request, + hcoll_module->previous_ireduce_module); + return rc; + } + + rc = hcoll_collectives.coll_ireduce((void *)sbuf,rbuf,count,Dtype,Op,root,hcoll_module->hcoll_context,rt_handle); + if (HCOLL_SUCCESS != rc){ + HCOL_VERBOSE(20,"RUNNING FALLBACK NON-BLOCKING REDUCE"); + rc = hcoll_module->previous_ireduce(sbuf,rbuf, + count,dtype,op, + root, + comm, + request, + hcoll_module->previous_ireduce_module); + } + return rc; +} +#endif +int mca_coll_hcoll_igatherv(const void* sbuf, int scount, struct ompi_datatype_t *sdtype, void* rbuf, int *rcounts, int *displs, struct ompi_datatype_t *rdtype, @@ -425,11 +658,9 @@ int mca_coll_hcoll_igatherv(void* sbuf, int scount, HCOL_VERBOSE(20,"RUNNING HCOL IGATHERV"); mca_coll_hcoll_module_t *hcoll_module = (mca_coll_hcoll_module_t*)module; rt_handle = (void**) request; - stype = ompi_dtype_2_dte_dtype(sdtype); - rtype = ompi_dtype_2_dte_dtype(rdtype); - if (OPAL_UNLIKELY((HCOL_DTE_IS_ZERO(stype) || HCOL_DTE_IS_ZERO(rtype) - || HCOL_DTE_IS_COMPLEX(stype) || HCOL_DTE_IS_COMPLEX(rtype))) - && mca_coll_hcoll_component.hcoll_datatype_fallback){ + stype = ompi_dtype_2_hcoll_dtype(sdtype, NO_DERIVED); + rtype = ompi_dtype_2_hcoll_dtype(rdtype, NO_DERIVED); + if (OPAL_UNLIKELY(HCOL_DTE_IS_ZERO(stype) || HCOL_DTE_IS_ZERO(rtype))) { /*If we are here then datatype is not simple predefined datatype */ /*In future we need to add more complex mapping to the dte_data_representation_t */ /* Now use fallback */ @@ -454,3 +685,41 @@ int mca_coll_hcoll_igatherv(void* sbuf, int scount, } + +#if HCOLL_API >= HCOLL_VERSION(3,7) +int mca_coll_hcoll_ialltoallv(void *sbuf, int *scounts, int *sdisps, + struct ompi_datatype_t *sdtype, + void *rbuf, int *rcounts, int *rdisps, + struct ompi_datatype_t *rdtype, + struct ompi_communicator_t *comm, + ompi_request_t ** request, + mca_coll_base_module_t *module) +{ + dte_data_representation_t stype; + dte_data_representation_t rtype; + int rc; + HCOL_VERBOSE(20,"RUNNING HCOL IALLTOALLV"); + mca_coll_hcoll_module_t *hcoll_module = (mca_coll_hcoll_module_t*)module; + stype = ompi_dtype_2_hcoll_dtype(sdtype, NO_DERIVED); + rtype = ompi_dtype_2_hcoll_dtype(rdtype, NO_DERIVED); + if (OPAL_UNLIKELY(HCOL_DTE_IS_ZERO(stype) || HCOL_DTE_IS_ZERO(rtype))) { + HCOL_VERBOSE(20,"Ompi_datatype is not supported: sdtype = %s, rdtype = %s; calling fallback ialltoallv;", + sdtype->super.name, + rdtype->super.name); + rc = hcoll_module->previous_ialltoallv(sbuf, scounts, sdisps, sdtype, + rbuf, rcounts, rdisps, rdtype, + comm, request, hcoll_module->previous_alltoallv_module); + return rc; + } + rc = hcoll_collectives.coll_ialltoallv((void *)sbuf, (int *)scounts, (int *)sdisps, stype, + rbuf, (int *)rcounts, (int *)rdisps, rtype, + hcoll_module->hcoll_context, (void**)request); + if (HCOLL_SUCCESS != rc){ + HCOL_VERBOSE(20,"RUNNING FALLBACK IALLTOALLV"); + rc = hcoll_module->previous_ialltoallv(sbuf, scounts, sdisps, sdtype, + rbuf, rcounts, rdisps, rdtype, + comm, request, hcoll_module->previous_alltoallv_module); + } + return rc; +} +#endif diff --git a/ompi/mca/coll/hcoll/coll_hcoll_rte.c b/ompi/mca/coll/hcoll/coll_hcoll_rte.c index 22f2ef63e9d..f86846e527c 100644 --- a/ompi/mca/coll/hcoll/coll_hcoll_rte.c +++ b/ompi/mca/coll/hcoll/coll_hcoll_rte.c @@ -7,6 +7,8 @@ * reserved. * Copyright (c) 2015 Los Alamos National Security, LLC. All rights * reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -39,9 +41,10 @@ #include "ompi/mca/pml/pml.h" -#include "hcoll_dte.h" -#include "hcoll_api.h" -#include "hcoll_constants.h" +#include "hcoll/api/hcoll_dte.h" +#include "hcoll/api/hcoll_api.h" +#include "hcoll/api/hcoll_constants.h" +#include "coll_hcoll_dtypes.h" /* * Local functions */ @@ -97,6 +100,22 @@ static int group_id(rte_grp_handle_t group); static int world_rank(rte_grp_handle_t grp_h, rte_ec_handle_t ec); /* Module Constructors */ +#if HCOLL_API >= HCOLL_VERSION(3,6) +static int get_mpi_type_envelope(void *mpi_type, int *num_integers, + int *num_addresses, int *num_datatypes, + hcoll_mpi_type_combiner_t *combiner); +static int get_mpi_type_contents(void *mpi_type, int max_integers, int max_addresses, + int max_datatypes, int *array_of_integers, + void *array_of_addresses, void *array_of_datatypes); +static int get_hcoll_type(void *mpi_type, dte_data_representation_t *hcoll_type); +static int set_hcoll_type(void *mpi_type, dte_data_representation_t hcoll_type); +static int get_mpi_constants(size_t *mpi_datatype_size, + int *mpi_order_c, int *mpi_order_fortran, + int *mpi_distribute_block, + int *mpi_distribute_cyclic, + int *mpi_distribute_none, + int *mpi_distribute_dflt_darg); +#endif static void init_module_fns(void){ hcoll_rte_functions.send_fn = send_nb; @@ -116,6 +135,13 @@ static void init_module_fns(void){ hcoll_rte_functions.rte_coll_handle_complete_fn = coll_handle_complete; hcoll_rte_functions.rte_group_id_fn = group_id; hcoll_rte_functions.rte_world_rank_fn = world_rank; +#if HCOLL_API >= HCOLL_VERSION(3,6) + hcoll_rte_functions.rte_get_mpi_type_envelope_fn = get_mpi_type_envelope; + hcoll_rte_functions.rte_get_mpi_type_contents_fn = get_mpi_type_contents; + hcoll_rte_functions.rte_get_hcoll_type_fn = get_hcoll_type; + hcoll_rte_functions.rte_set_hcoll_type_fn = set_hcoll_type; + hcoll_rte_functions.rte_get_mpi_constants_fn = get_mpi_constants; +#endif } @@ -144,22 +170,6 @@ void hcoll_rte_fns_setup(void) ); } -/* This one converts dte_general_representation data into regular iovec array which is - used in rml - */ - -static inline int count_total_dte_repeat_entries(struct dte_data_representation_t *data){ - unsigned int i; - - struct dte_generalized_iovec_t * dte_iovec = - data->rep.general_rep->data_representation.data; - int total_entries_number = 0; - for (i=0; i< dte_iovec->repeat_count; i++){ - total_entries_number += dte_iovec->repeat[i].n_elements; - } - return total_entries_number; -} - static int recv_nb(struct dte_data_representation_t data, uint32_t count , void *buffer, @@ -175,56 +185,27 @@ static int recv_nb(struct dte_data_representation_t data, "ec_h.handle = %p, ec_h.rank = %d\n",ec_h.handle,ec_h.rank); return 1; } - if (HCOL_DTE_IS_INLINE(data)){ - /*do inline nb recv*/ - size_t size; - ompi_request_t *ompi_req; - opal_free_list_item_t *item; - - if (!buffer && !HCOL_DTE_IS_ZERO(data)) { - fprintf(stderr, "***Error in hcolrte_rml_recv_nb: buffer pointer is NULL" - " for non DTE_ZERO INLINE data representation\n"); - return 1; - } - size = (size_t)data.rep.in_line_rep.data_handle.in_line.packed_size*count/8; - - HCOL_VERBOSE(30,"PML_IRECV: dest = %d: buf = %p: size = %u: comm = %p", - ec_h.rank, buffer, (unsigned int)size, (void *)comm); - if (MCA_PML_CALL(irecv(buffer,size,&(ompi_mpi_unsigned_char.dt),ec_h.rank, - tag,comm,&ompi_req))) - { - return 1; - } - req->data = (void *)ompi_req; - req->status = HCOLRTE_REQUEST_ACTIVE; - }else{ - /*do iovec nb recv*/ - int total_entries_number; - int i; - unsigned int j; - void *buf; - uint64_t len; - int repeat_count; - struct dte_struct_t * repeat; - if (NULL != buffer) { - /* We have a full data description & buffer pointer simultaneously. - It is ambiguous. Throw a warning since the user might have made a - mistake with data reps*/ - fprintf(stderr,"Warning: buffer_pointer != NULL for NON-inline data representation: buffer_pointer is ignored.\n"); - } - total_entries_number = count_total_dte_repeat_entries(&data); - repeat = data.rep.general_rep->data_representation.data->repeat; - repeat_count = data.rep.general_rep->data_representation.data->repeat_count; - for (i=0; i< repeat_count; i++){ - for (j=0; jdata = (void *)ompi_req; + req->status = HCOLRTE_REQUEST_ACTIVE; return HCOLL_SUCCESS; } @@ -245,51 +226,25 @@ static int send_nb( dte_data_representation_t data, "ec_h.handle = %p, ec_h.rank = %d\n",ec_h.handle,ec_h.rank); return 1; } - if (HCOL_DTE_IS_INLINE(data)){ - /*do inline nb recv*/ - size_t size; - ompi_request_t *ompi_req; - if (!buffer && !HCOL_DTE_IS_ZERO(data)) { - fprintf(stderr, "***Error in hcolrte_rml_send_nb: buffer pointer is NULL" - " for non DTE_ZERO INLINE data representation\n"); - return 1; - } - size = (size_t)data.rep.in_line_rep.data_handle.in_line.packed_size*count/8; - HCOL_VERBOSE(30,"PML_ISEND: dest = %d: buf = %p: size = %u: comm = %p", - ec_h.rank, buffer, (unsigned int)size, (void *)comm); - if (MCA_PML_CALL(isend(buffer,size,&(ompi_mpi_unsigned_char.dt),ec_h.rank, - tag,MCA_PML_BASE_SEND_STANDARD,comm,&ompi_req))) - { - return 1; - } - req->data = (void *)ompi_req; - req->status = HCOLRTE_REQUEST_ACTIVE; - }else{ - int total_entries_number; - int i; - unsigned int j; - void *buf; - uint64_t len; - int repeat_count; - struct dte_struct_t * repeat; - if (NULL != buffer) { - /* We have a full data description & buffer pointer simultaneously. - It is ambiguous. Throw a warning since the user might have made a - mistake with data reps*/ - fprintf(stderr,"Warning: buffer_pointer != NULL for NON-inline data representation: buffer_pointer is ignored.\n"); - } - total_entries_number = count_total_dte_repeat_entries(&data); - repeat = data.rep.general_rep->data_representation.data->repeat; - repeat_count = data.rep.general_rep->data_representation.data->repeat_count; - for (i=0; i< repeat_count; i++){ - for (j=0; jdata = (void *)ompi_req; + req->status = HCOLRTE_REQUEST_ACTIVE; return HCOLL_SUCCESS; } @@ -303,7 +258,7 @@ static int test( rte_request_handle_t * request , } /*ompi_request_test(&ompi_req,completed,MPI_STATUS_IGNORE); */ - *completed = ompi_req->req_complete; + *completed = REQUEST_COMPLETE(ompi_req); if (*completed){ ompi_request_free(&ompi_req); request->status = HCOLRTE_REQUEST_DONE; @@ -377,7 +332,7 @@ static int group_id(rte_grp_handle_t group){ return ((ompi_communicator_t *)group)->c_contextid; } -static int +static int request_free(struct ompi_request_t **ompi_req) { ompi_request_t *req = *ompi_req; @@ -402,14 +357,16 @@ static void* get_coll_handle(void) OMPI_REQUEST_INIT(ompi_req,false); ompi_req->req_complete_cb = NULL; ompi_req->req_status.MPI_ERROR = MPI_SUCCESS; + ompi_req->req_state = OMPI_REQUEST_ACTIVE; ompi_req->req_free = request_free; + ompi_req->req_type = OMPI_REQUEST_COLL; return (void *)ompi_req; } static int coll_handle_test(void* handle) { ompi_request_t *ompi_req = (ompi_request_t *)handle; - return ompi_req->req_complete; + return REQUEST_COMPLETE(ompi_req);; } static void coll_handle_free(void *handle){ @@ -429,3 +386,108 @@ static int world_rank(rte_grp_handle_t grp_h, rte_ec_handle_t ec){ ompi_proc_t *proc = (ompi_proc_t *)ec.handle; return ((ompi_process_name_t*)&proc->super.proc_name)->vpid; } + +#if HCOLL_API >= HCOLL_VERSION(3,6) +hcoll_mpi_type_combiner_t ompi_combiner_2_hcoll_combiner(int ompi_combiner) { + switch (ompi_combiner) + { + case MPI_COMBINER_CONTIGUOUS: + return HCOLL_MPI_COMBINER_CONTIGUOUS; + case MPI_COMBINER_VECTOR: + return HCOLL_MPI_COMBINER_VECTOR; + case MPI_COMBINER_HVECTOR: + return HCOLL_MPI_COMBINER_HVECTOR; + case MPI_COMBINER_INDEXED: + return HCOLL_MPI_COMBINER_INDEXED; + case MPI_COMBINER_HINDEXED_INTEGER: + case MPI_COMBINER_HINDEXED: + return HCOLL_MPI_COMBINER_HINDEXED; + case MPI_COMBINER_DUP: + return HCOLL_MPI_COMBINER_DUP; + case MPI_COMBINER_INDEXED_BLOCK: + return HCOLL_MPI_COMBINER_INDEXED_BLOCK; + case MPI_COMBINER_HINDEXED_BLOCK: + return HCOLL_MPI_COMBINER_HINDEXED_BLOCK; + case MPI_COMBINER_SUBARRAY: + return HCOLL_MPI_COMBINER_SUBARRAY; + case MPI_COMBINER_DARRAY: + return HCOLL_MPI_COMBINER_DARRAY; + case MPI_COMBINER_F90_REAL: + return HCOLL_MPI_COMBINER_F90_REAL; + case MPI_COMBINER_F90_COMPLEX: + return HCOLL_MPI_COMBINER_F90_COMPLEX; + case MPI_COMBINER_F90_INTEGER: + return HCOLL_MPI_COMBINER_F90_INTEGER; + case MPI_COMBINER_RESIZED: + return HCOLL_MPI_COMBINER_RESIZED; + case MPI_COMBINER_STRUCT: + case MPI_COMBINER_STRUCT_INTEGER: + return HCOLL_MPI_COMBINER_STRUCT; + default: + break; + } + return HCOLL_MPI_COMBINER_LAST; +} + + +static int get_mpi_type_envelope(void *mpi_type, int *num_integers, + int *num_addresses, int *num_datatypes, + hcoll_mpi_type_combiner_t *combiner) { + int ompi_combiner, rc; + rc = ompi_datatype_get_args( (ompi_datatype_t*)mpi_type, 0, num_integers, NULL, + num_addresses, NULL, + num_datatypes, NULL, &ompi_combiner); + *combiner = ompi_combiner_2_hcoll_combiner(ompi_combiner); + return rc == OMPI_SUCCESS ? HCOLL_SUCCESS : HCOLL_ERROR; +} + +static int get_mpi_type_contents(void *mpi_type, int max_integers, int max_addresses, + int max_datatypes, int *array_of_integers, + void *array_of_addresses, void *array_of_datatypes) { + int rc; + rc = ompi_datatype_get_args( (ompi_datatype_t*)mpi_type, 1, &max_integers, array_of_integers, + &max_addresses, array_of_addresses, + &max_datatypes, array_of_datatypes, NULL ); + return rc == OMPI_SUCCESS ? HCOLL_SUCCESS : HCOLL_ERROR; +} + +static int get_hcoll_type(void *mpi_type, dte_data_representation_t *hcoll_type) { + *hcoll_type = ompi_dtype_2_hcoll_dtype((ompi_datatype_t*)mpi_type, TRY_FIND_DERIVED); + return HCOL_DTE_IS_ZERO((*hcoll_type)) ? HCOLL_ERR_NOT_FOUND : HCOLL_SUCCESS; +} + +static int set_hcoll_type(void *mpi_type, dte_data_representation_t hcoll_type) { + int rc; + mca_coll_hcoll_dtype_t *hcoll_dtype = (mca_coll_hcoll_dtype_t*) + opal_free_list_get(&mca_coll_hcoll_component.dtypes); + ompi_datatype_t *dtype = (ompi_datatype_t*)mpi_type; + hcoll_dtype->type = hcoll_type; + rc = ompi_attr_set_c(TYPE_ATTR, (void*)dtype, &(dtype->d_keyhash), hcoll_type_attr_keyval, (void *)hcoll_dtype, false); + if (OMPI_SUCCESS != rc) { + HCOL_VERBOSE(1,"hcoll ompi_attr_set_c failed for derived dtype"); + goto Cleanup; + } + return HCOLL_SUCCESS; +Cleanup: + opal_free_list_return(&mca_coll_hcoll_component.dtypes, + &hcoll_dtype->super); + return rc; +} + +static int get_mpi_constants(size_t *mpi_datatype_size, + int *mpi_order_c, int *mpi_order_fortran, + int *mpi_distribute_block, + int *mpi_distribute_cyclic, + int *mpi_distribute_none, + int *mpi_distribute_dflt_darg) { + *mpi_datatype_size = sizeof(MPI_Datatype); + *mpi_order_c = MPI_ORDER_C; + *mpi_order_fortran = MPI_ORDER_FORTRAN; + *mpi_distribute_block = MPI_DISTRIBUTE_BLOCK; + *mpi_distribute_cyclic = MPI_DISTRIBUTE_CYCLIC; + *mpi_distribute_none = MPI_DISTRIBUTE_NONE; + *mpi_distribute_dflt_darg = MPI_DISTRIBUTE_DFLT_DARG; + return HCOLL_SUCCESS; +} + +#endif diff --git a/ompi/mca/coll/hcoll/configure.m4 b/ompi/mca/coll/hcoll/configure.m4 index 78734309a5e..f3f57532e22 100644 --- a/ompi/mca/coll/hcoll/configure.m4 +++ b/ompi/mca/coll/hcoll/configure.m4 @@ -1,7 +1,9 @@ # -*- shell-script -*- # # -# Copyright (c) 2011 Mellanox Technologies. All rights reserved. +# Copyright (c) 2011 Mellanox Technologies. All rights reserved. +# Copyright (c) 2015 Research Organization for Information Science +# and Technology (RIST). All rights reserved. # $COPYRIGHT$ # # Additional copyrights may follow @@ -23,7 +25,6 @@ AC_DEFUN([MCA_ompi_coll_hcoll_CONFIG],[ AS_IF([test "$coll_hcoll_happy" = "yes"], [coll_hcoll_WRAPPER_EXTRA_LDFLAGS="$coll_hcoll_LDFLAGS" coll_hcoll_CPPFLAGS="$coll_hcoll_CPPFLAGS" - coll_hcoll_WRAPPER_EXTRA_CPPFLAGS="$coll_hcoll_CPPFLAGS" coll_hcoll_WRAPPER_EXTRA_LIBS="$coll_hcoll_LIBS" $1], [$2]) diff --git a/ompi/mca/coll/hierarch/owner.txt b/ompi/mca/coll/hierarch/owner.txt deleted file mode 100644 index 0cba68ac764..00000000000 --- a/ompi/mca/coll/hierarch/owner.txt +++ /dev/null @@ -1,7 +0,0 @@ -# -# owner/status file -# owner: institution that is responsible for this package -# status: e.g. active, maintenance, unmaintained -# -owner: UH -status: unmaintained diff --git a/ompi/mca/coll/inter/Makefile.am b/ompi/mca/coll/inter/Makefile.am index d61c5c67548..fb6585488e7 100644 --- a/ompi/mca/coll/inter/Makefile.am +++ b/ompi/mca/coll/inter/Makefile.am @@ -5,15 +5,15 @@ # Copyright (c) 2004-2005 The University of Tennessee and The University # of Tennessee Research Foundation. All rights # reserved. -# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, # University of Stuttgart. All rights reserved. # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. # Copyright (c) 2010 Cisco Systems, Inc. All rights reserved. # $COPYRIGHT$ -# +# # Additional copyrights may follow -# +# # $HEADER$ # # Make the output library in this directory, and name it either diff --git a/ompi/mca/coll/inter/coll_inter.c b/ompi/mca/coll/inter/coll_inter.c index 666d1f90b64..2973acdd47b 100644 --- a/ompi/mca/coll/inter/coll_inter.c +++ b/ompi/mca/coll/inter/coll_inter.c @@ -5,7 +5,7 @@ * Copyright (c) 2004-2006 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. @@ -13,9 +13,9 @@ * Copyright (c) 2008 Sun Microsystems, Inc. All rights reserved. * Copyright (c) 2013 Cisco Systems, Inc. All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -49,19 +49,19 @@ static const mca_coll_base_module_1_0_0_t inter = { and will use the functions provided in the basic module */ mca_coll_inter_allgather_inter, mca_coll_inter_allgatherv_inter, - mca_coll_inter_allreduce_inter, + mca_coll_inter_allreduce_inter, NULL, /* alltoall */ NULL, /* alltoallv */ NULL, /* alltoallw */ NULL, /* barrier */ mca_coll_inter_bcast_inter, NULL, /* exscan */ - mca_coll_inter_gather_inter, - mca_coll_inter_gatherv_inter, + mca_coll_inter_gather_inter, + mca_coll_inter_gatherv_inter, mca_coll_inter_reduce_inter, NULL, /* reduce_scatter */ NULL, /* scan */ - mca_coll_inter_scatter_inter, + mca_coll_inter_scatter_inter, mca_coll_inter_scatterv_inter }; #endif @@ -94,7 +94,7 @@ mca_coll_inter_comm_query(struct ompi_communicator_t *comm, int *priority) if (!OMPI_COMM_IS_INTER(comm)) { return NULL; } - + /* Get the priority level attached to this module. If priority is less * than or equal to 0, then the module is unavailable. */ *priority = mca_coll_inter_priority_param; @@ -104,7 +104,7 @@ mca_coll_inter_comm_query(struct ompi_communicator_t *comm, int *priority) size = ompi_comm_size(comm); rsize = ompi_comm_remote_size(comm); - + if ( size < mca_coll_inter_crossover && rsize < mca_coll_inter_crossover) { return NULL; } @@ -133,10 +133,10 @@ mca_coll_inter_comm_query(struct ompi_communicator_t *comm, int *priority) inter_module->super.coll_scan = NULL; inter_module->super.coll_scatter = mca_coll_inter_scatter_inter; inter_module->super.coll_scatterv = mca_coll_inter_scatterv_inter; - + return &(inter_module->super); } - + /* * Init module on the communicator @@ -146,15 +146,15 @@ mca_coll_inter_module_enable(mca_coll_base_module_t *module, struct ompi_communicator_t *comm) { mca_coll_inter_module_t *inter_module = (mca_coll_inter_module_t*) module; - + inter_module->inter_comm = comm; - + #if 0 if ( mca_coll_inter_verbose_param ) { mca_coll_inter_dump_struct (data); } #endif - + return OMPI_SUCCESS; } @@ -166,10 +166,10 @@ static void mca_coll_inter_dump_struct ( struct mca_coll_base_comm_t *c) rank = ompi_comm_rank ( c->inter_comm ); - printf("%d: Dump of inter-struct for comm %s cid %u\n", + printf("%d: Dump of inter-struct for comm %s cid %u\n", rank, c->inter_comm->c_name, c->inter_comm->c_contextid); - + return; } #endif diff --git a/ompi/mca/coll/inter/coll_inter.h b/ompi/mca/coll/inter/coll_inter.h index a8fa29aa85d..d3b74658c65 100644 --- a/ompi/mca/coll/inter/coll_inter.h +++ b/ompi/mca/coll/inter/coll_inter.h @@ -5,16 +5,18 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2006-2007 University of Houston. All rights reserved. * Copyright (c) 2008 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -51,57 +53,57 @@ mca_coll_inter_comm_query(struct ompi_communicator_t *comm, int *priority); int mca_coll_inter_module_enable(mca_coll_base_module_t *module, struct ompi_communicator_t *comm); -int mca_coll_inter_allgather_inter(void *sbuf, int scount, - struct ompi_datatype_t *sdtype, - void *rbuf, int rcount, - struct ompi_datatype_t *rdtype, +int mca_coll_inter_allgather_inter(const void *sbuf, int scount, + struct ompi_datatype_t *sdtype, + void *rbuf, int rcount, + struct ompi_datatype_t *rdtype, struct ompi_communicator_t *comm, mca_coll_base_module_t *module); -int mca_coll_inter_allgatherv_inter(void *sbuf, int scount, +int mca_coll_inter_allgatherv_inter(const void *sbuf, int scount, struct ompi_datatype_t *sdtype, - void *rbuf, int *rcounts, int *disps, + void *rbuf, const int *rcounts, const int *disps, struct ompi_datatype_t *rdtype, struct ompi_communicator_t *comm, mca_coll_base_module_t *module); -int mca_coll_inter_allreduce_inter(void *sbuf, void *rbuf, int count, - struct ompi_datatype_t *dtype, - struct ompi_op_t *op, +int mca_coll_inter_allreduce_inter(const void *sbuf, void *rbuf, int count, + struct ompi_datatype_t *dtype, + struct ompi_op_t *op, struct ompi_communicator_t *comm, mca_coll_base_module_t *module); -int mca_coll_inter_bcast_inter(void *buff, int count, +int mca_coll_inter_bcast_inter(void *buff, int count, struct ompi_datatype_t *datatype, - int root, + int root, struct ompi_communicator_t *comm, mca_coll_base_module_t *module); -int mca_coll_inter_gather_inter(void *sbuf, int scount, - struct ompi_datatype_t *sdtype, - void *rbuf, int rcount, - struct ompi_datatype_t *rdtype, - int root, +int mca_coll_inter_gather_inter(const void *sbuf, int scount, + struct ompi_datatype_t *sdtype, + void *rbuf, int rcount, + struct ompi_datatype_t *rdtype, + int root, struct ompi_communicator_t *comm, mca_coll_base_module_t *module); -int mca_coll_inter_gatherv_inter(void *sbuf, int scount, - struct ompi_datatype_t *sdtype, - void *rbuf, int *rcounts, int *disps, - struct ompi_datatype_t *rdtype, - int root, +int mca_coll_inter_gatherv_inter(const void *sbuf, int scount, + struct ompi_datatype_t *sdtype, + void *rbuf, const int *rcounts, const int *disps, + struct ompi_datatype_t *rdtype, + int root, struct ompi_communicator_t *comm, mca_coll_base_module_t *module); -int mca_coll_inter_reduce_inter(void *sbuf, void* rbuf, int count, - struct ompi_datatype_t *dtype, - struct ompi_op_t *op, +int mca_coll_inter_reduce_inter(const void *sbuf, void* rbuf, int count, + struct ompi_datatype_t *dtype, + struct ompi_op_t *op, int root, struct ompi_communicator_t *comm, mca_coll_base_module_t *module); -int mca_coll_inter_scatter_inter(void *sbuf, int scount, - struct ompi_datatype_t *sdtype, void *rbuf, - int rcount, struct ompi_datatype_t *rdtype, +int mca_coll_inter_scatter_inter(const void *sbuf, int scount, + struct ompi_datatype_t *sdtype, void *rbuf, + int rcount, struct ompi_datatype_t *rdtype, int root, struct ompi_communicator_t *comm, mca_coll_base_module_t *module); -int mca_coll_inter_scatterv_inter(void *sbuf, int *scounts, int *disps, - struct ompi_datatype_t *sdtype, - void* rbuf, int rcount, - struct ompi_datatype_t *rdtype, int root, +int mca_coll_inter_scatterv_inter(const void *sbuf, const int *scounts, const int *disps, + struct ompi_datatype_t *sdtype, + void* rbuf, int rcount, + struct ompi_datatype_t *rdtype, int root, struct ompi_communicator_t *comm, mca_coll_base_module_t *module); @@ -112,7 +114,7 @@ struct mca_coll_inter_module_t { /* Clarifying some terminology: * comm: the input communicator, consisting of several lower level communicators. */ - struct ompi_communicator_t *inter_comm; /* link back to the attached comm */ + struct ompi_communicator_t *inter_comm; /* link back to the attached comm */ }; typedef struct mca_coll_inter_module_t mca_coll_inter_module_t; OBJ_CLASS_DECLARATION(mca_coll_inter_module_t); diff --git a/ompi/mca/coll/inter/coll_inter_allgather.c b/ompi/mca/coll/inter/coll_inter_allgather.c index a8845d1ca1f..7f08ff5e28f 100644 --- a/ompi/mca/coll/inter/coll_inter_allgather.c +++ b/ompi/mca/coll/inter/coll_inter_allgather.c @@ -5,15 +5,17 @@ * Copyright (c) 2004-2006 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2006-2010 University of Houston. All rights reserved. + * Copyright (c) 2015-2016 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -39,16 +41,16 @@ * Returns: - MPI_SUCCESS or error code */ int -mca_coll_inter_allgather_inter(void *sbuf, int scount, +mca_coll_inter_allgather_inter(const void *sbuf, int scount, struct ompi_datatype_t *sdtype, void *rbuf, int rcount, struct ompi_datatype_t *rdtype, struct ompi_communicator_t *comm, mca_coll_base_module_t *module) { - int rank, root = 0, size, rsize, err; - char *ptmp = NULL; - ptrdiff_t slb, sextent, incr; + int rank, root = 0, size, rsize, err = OMPI_SUCCESS; + char *ptmp_free = NULL, *ptmp = NULL; + ptrdiff_t gap, span; ompi_request_t *req[2]; rank = ompi_comm_rank(comm); @@ -56,20 +58,16 @@ mca_coll_inter_allgather_inter(void *sbuf, int scount, rsize = ompi_comm_remote_size(comm); /* Perform the gather locally at the root */ - err = ompi_datatype_get_extent(sdtype, &slb, &sextent); - if (OMPI_SUCCESS != err) { - return OMPI_ERROR; - } - if ( scount > 0 ) { - incr = sextent * scount; - ptmp = (char*)malloc(size * incr); - if (NULL == ptmp) { + span = opal_datatype_span(&sdtype->super, (int64_t)scount*(int64_t)size, &gap); + ptmp_free = (char*)malloc(span); + if (NULL == ptmp_free) { return OMPI_ERR_OUT_OF_RESOURCE; } + ptmp = ptmp_free - gap; - err = comm->c_local_comm->c_coll.coll_gather(sbuf, scount, sdtype, - ptmp, scount, sdtype, + err = comm->c_local_comm->c_coll.coll_gather(sbuf, scount, sdtype, + ptmp, scount, sdtype, 0, comm->c_local_comm, comm->c_local_comm->c_coll.coll_gather_module); if (OMPI_SUCCESS != err) { @@ -101,7 +99,7 @@ mca_coll_inter_allgather_inter(void *sbuf, int scount, } /* bcast the message to all the local processes */ if ( rcount > 0 ) { - err = comm->c_local_comm->c_coll.coll_bcast(rbuf, rcount*rsize, rdtype, + err = comm->c_local_comm->c_coll.coll_bcast(rbuf, rcount*rsize, rdtype, root, comm->c_local_comm, comm->c_local_comm->c_coll.coll_bcast_module); if (OMPI_SUCCESS != err) { @@ -110,8 +108,8 @@ mca_coll_inter_allgather_inter(void *sbuf, int scount, } exit: - if (NULL != ptmp) { - free(ptmp); + if (NULL != ptmp_free) { + free(ptmp_free); } return err; diff --git a/ompi/mca/coll/inter/coll_inter_allgatherv.c b/ompi/mca/coll/inter/coll_inter_allgatherv.c index aa4f6c1cd68..1cbe4ed1cdf 100644 --- a/ompi/mca/coll/inter/coll_inter_allgatherv.c +++ b/ompi/mca/coll/inter/coll_inter_allgatherv.c @@ -5,15 +5,17 @@ * Copyright (c) 2004-2014 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2006-2010 University of Houston. All rights reserved. + * Copyright (c) 2015-2016 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -38,19 +40,16 @@ * Returns: - MPI_SUCCESS or error code */ int -mca_coll_inter_allgatherv_inter(void *sbuf, int scount, +mca_coll_inter_allgatherv_inter(const void *sbuf, int scount, struct ompi_datatype_t *sdtype, - void *rbuf, int *rcounts, int *disps, + void *rbuf, const int *rcounts, const int *disps, struct ompi_datatype_t *rdtype, struct ompi_communicator_t *comm, mca_coll_base_module_t *module) { int i, rank, size, size_local, total=0, err; int *count=NULL,*displace=NULL; - char *ptmp=NULL; - MPI_Aint incr; - MPI_Aint extent; - MPI_Aint lb; + char *ptmp_free=NULL, *ptmp=NULL; ompi_datatype_t *ndtype = NULL; ompi_request_t *req[2]; @@ -67,8 +66,8 @@ mca_coll_inter_allgatherv_inter(void *sbuf, int scount, } } /* Local gather to get the scount of each process */ - err = comm->c_local_comm->c_coll.coll_gather(&scount, 1, MPI_INT, - count, 1, MPI_INT, + err = comm->c_local_comm->c_coll.coll_gather(&scount, 1, MPI_INT, + count, 1, MPI_INT, 0, comm->c_local_comm, comm->c_local_comm->c_coll.coll_gather_module); if (OMPI_SUCCESS != err) { @@ -79,26 +78,23 @@ mca_coll_inter_allgatherv_inter(void *sbuf, int scount, for (i = 1; i < size_local; i++) { displace[i] = displace[i-1] + count[i-1]; } - /* Perform the gatherv locally with the first process as root */ - err = ompi_datatype_get_extent(sdtype, &lb, &extent); - if (OMPI_SUCCESS != err) { - err = OMPI_ERROR; - goto exit; - } - incr = 0; + total = 0; for (i = 0; i < size_local; i++) { - incr = incr + extent*count[i]; + total = total + count[i]; } - if ( incr > 0 ) { - ptmp = (char*)malloc(incr); - if (NULL == ptmp) { + if ( total > 0 ) { + ptrdiff_t gap, span; + span = opal_datatype_span(&sdtype->super, total, &gap); + ptmp_free = (char*)malloc(span); + if (NULL == ptmp_free) { err = OMPI_ERR_OUT_OF_RESOURCE; goto exit; } + ptmp = ptmp_free - gap; } } - err = comm->c_local_comm->c_coll.coll_gatherv(sbuf, scount, sdtype, - ptmp, count, displace, + err = comm->c_local_comm->c_coll.coll_gatherv(sbuf, scount, sdtype, + ptmp, count, displace, sdtype,0, comm->c_local_comm, comm->c_local_comm->c_coll.coll_gatherv_module); if (OMPI_SUCCESS != err) { @@ -108,10 +104,7 @@ mca_coll_inter_allgatherv_inter(void *sbuf, int scount, ompi_datatype_create_indexed(size,rcounts,disps,rdtype,&ndtype); ompi_datatype_commit(&ndtype); - if (0 == rank) { - for (i = 0; i < size_local; i++) { - total = total + count[i]; - } + if (0 == rank) { /* Exchange data between roots */ err = MCA_PML_CALL(irecv(rbuf, 1, ndtype, 0, MCA_COLL_BASE_TAG_ALLGATHERV, comm, @@ -133,17 +126,17 @@ mca_coll_inter_allgatherv_inter(void *sbuf, int scount, goto exit; } } - + /* bcast the message to all the local processes */ - err = comm->c_local_comm->c_coll.coll_bcast(rbuf, 1, ndtype, + err = comm->c_local_comm->c_coll.coll_bcast(rbuf, 1, ndtype, 0, comm->c_local_comm, comm->c_local_comm->c_coll.coll_bcast_module); exit: if( NULL != ndtype ) { ompi_datatype_destroy(&ndtype); } - if (NULL != ptmp) { - free(ptmp); + if (NULL != ptmp_free) { + free(ptmp_free); } if (NULL != displace) { free(displace); diff --git a/ompi/mca/coll/inter/coll_inter_allreduce.c b/ompi/mca/coll/inter/coll_inter_allreduce.c index 7446030f670..58665461d17 100644 --- a/ompi/mca/coll/inter/coll_inter_allreduce.c +++ b/ompi/mca/coll/inter/coll_inter_allreduce.c @@ -5,16 +5,18 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2006-2007 University of Houston. All rights reserved. - * Copyright (c) 2013 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2013 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015-2016 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -39,33 +41,30 @@ * Returns: - MPI_SUCCESS or error code */ int -mca_coll_inter_allreduce_inter(void *sbuf, void *rbuf, int count, +mca_coll_inter_allreduce_inter(const void *sbuf, void *rbuf, int count, struct ompi_datatype_t *dtype, struct ompi_op_t *op, struct ompi_communicator_t *comm, mca_coll_base_module_t *module) { int err, rank, root = 0; - ptrdiff_t lb, extent; char *tmpbuf = NULL, *pml_buffer = NULL; ompi_request_t *req[2]; + ptrdiff_t gap, span; rank = ompi_comm_rank(comm); - + /* Perform the reduction locally */ - err = ompi_datatype_get_extent(dtype, &lb, &extent); - if (OMPI_SUCCESS != err) { - return OMPI_ERROR; - } - - tmpbuf = (char *) malloc(count * extent); + span = opal_datatype_span(&dtype->super, count, &gap); + + tmpbuf = (char *) malloc(span); if (NULL == tmpbuf) { return OMPI_ERR_OUT_OF_RESOURCE; } - pml_buffer = tmpbuf - lb; - + pml_buffer = tmpbuf - gap; + err = comm->c_local_comm->c_coll.coll_reduce(sbuf, pml_buffer, count, - dtype, op, root, + dtype, op, root, comm->c_local_comm, comm->c_local_comm->c_coll.coll_reduce_module); if (OMPI_SUCCESS != err) { @@ -96,7 +95,7 @@ mca_coll_inter_allreduce_inter(void *sbuf, void *rbuf, int count, } /* bcast the message to all the local processes */ - err = comm->c_local_comm->c_coll.coll_bcast(rbuf, count, dtype, + err = comm->c_local_comm->c_coll.coll_bcast(rbuf, count, dtype, root, comm->c_local_comm, comm->c_local_comm->c_coll.coll_bcast_module); if (OMPI_SUCCESS != err) { diff --git a/ompi/mca/coll/inter/coll_inter_bcast.c b/ompi/mca/coll/inter/coll_inter_bcast.c index 391feb587cc..957978ee1e3 100644 --- a/ompi/mca/coll/inter/coll_inter_bcast.c +++ b/ompi/mca/coll/inter/coll_inter_bcast.c @@ -5,16 +5,16 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2006-2007 University of Houston. All rights reserved. * Copyright (c) 2013 Cisco Systems, Inc. All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -60,14 +60,14 @@ mca_coll_inter_bcast_inter(void *buff, int count, return err; } } - err = comm->c_local_comm->c_coll.coll_bcast(buff, count, datatype, 0, + err = comm->c_local_comm->c_coll.coll_bcast(buff, count, datatype, 0, comm->c_local_comm, comm->c_local_comm->c_coll.coll_bcast_module); } else { /* root section, send to the first process of the remote group */ err = MCA_PML_CALL(send(buff, count, datatype, 0, MCA_COLL_BASE_TAG_BCAST, - MCA_PML_BASE_SEND_STANDARD, + MCA_PML_BASE_SEND_STANDARD, comm)); if (OMPI_SUCCESS != err) { return err; diff --git a/ompi/mca/coll/inter/coll_inter_component.c b/ompi/mca/coll/inter/coll_inter_component.c index 18467ec545c..d201c82e1c7 100644 --- a/ompi/mca/coll/inter/coll_inter_component.c +++ b/ompi/mca/coll/inter/coll_inter_component.c @@ -6,7 +6,7 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. @@ -15,9 +15,9 @@ * Copyright (c) 2015 Los Alamos National Security, LLC. All rights * reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ * * These symbols are in a file by themselves to provide nice linker diff --git a/ompi/mca/coll/inter/coll_inter_gather.c b/ompi/mca/coll/inter/coll_inter_gather.c index 958cf243517..d797d644846 100644 --- a/ompi/mca/coll/inter/coll_inter_gather.c +++ b/ompi/mca/coll/inter/coll_inter_gather.c @@ -5,15 +5,17 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2006-2007 University of Houston. All rights reserved. + * Copyright (c) 2015-2016 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -36,7 +38,7 @@ * Returns: - MPI_SUCCESS or error code */ int -mca_coll_inter_gather_inter(void *sbuf, int scount, +mca_coll_inter_gather_inter(const void *sbuf, int scount, struct ompi_datatype_t *sdtype, void *rbuf, int rcount, struct ompi_datatype_t *rdtype, @@ -45,11 +47,7 @@ mca_coll_inter_gather_inter(void *sbuf, int scount, { int err; int rank; - int size,size_local; - char *ptmp = NULL; - MPI_Aint incr; - MPI_Aint extent; - MPI_Aint lb; + int size; size = ompi_comm_remote_size(comm); rank = ompi_comm_rank(comm); @@ -59,23 +57,24 @@ mca_coll_inter_gather_inter(void *sbuf, int scount, err = OMPI_SUCCESS; } else if (MPI_ROOT != root) { /* Perform the gather locally with the first process as root */ - err = ompi_datatype_get_extent(sdtype, &lb, &extent); - if (OMPI_SUCCESS != err) { - return OMPI_ERROR; - } - - incr = extent * scount; - size_local = ompi_comm_size(comm->c_local_comm); - ptmp = (char*)malloc(size_local * incr); - if (NULL == ptmp) { + char *ptmp_free = NULL, *ptmp; + int size_local; + ptrdiff_t gap, span; + + size_local = ompi_comm_size(comm->c_local_comm); + span = opal_datatype_span(&sdtype->super, (int64_t)scount*(int64_t)size_local, &gap); + + ptmp_free = (char*)malloc(span); + if (NULL == ptmp_free) { return OMPI_ERR_OUT_OF_RESOURCE; } - - err = comm->c_local_comm->c_coll.coll_gather(sbuf, scount, sdtype, - ptmp, scount, sdtype, + ptmp = ptmp_free - gap; + + err = comm->c_local_comm->c_coll.coll_gather(sbuf, scount, sdtype, + ptmp, scount, sdtype, 0, comm->c_local_comm, comm->c_local_comm->c_coll.coll_gather_module); - if (0 == rank) { + if (0 == rank) { /* First process sends data to the root */ err = MCA_PML_CALL(send(ptmp, scount*size_local, sdtype, root, MCA_COLL_BASE_TAG_GATHER, @@ -84,9 +83,7 @@ mca_coll_inter_gather_inter(void *sbuf, int scount, return err; } } - if (NULL != ptmp) { - free(ptmp); - } + free(ptmp_free); } else { /* I am the root, loop receiving the data. */ err = MCA_PML_CALL(recv(rbuf, rcount*size, rdtype, 0, diff --git a/ompi/mca/coll/inter/coll_inter_gatherv.c b/ompi/mca/coll/inter/coll_inter_gatherv.c index d2339e1634d..a5e11a763b5 100644 --- a/ompi/mca/coll/inter/coll_inter_gatherv.c +++ b/ompi/mca/coll/inter/coll_inter_gatherv.c @@ -10,6 +10,8 @@ * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2006-2010 University of Houston. All rights reserved. + * Copyright (c) 2015-2016 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -35,19 +37,16 @@ * Returns: - MPI_SUCCESS or error code */ int -mca_coll_inter_gatherv_inter(void *sbuf, int scount, +mca_coll_inter_gatherv_inter(const void *sbuf, int scount, struct ompi_datatype_t *sdtype, - void *rbuf, int *rcounts, int *disps, + void *rbuf, const int *rcounts, const int *disps, struct ompi_datatype_t *rdtype, int root, struct ompi_communicator_t *comm, mca_coll_base_module_t *module) { int i, rank, size, size_local, total=0, err; int *count=NULL, *displace=NULL; - char *ptmp=NULL; - MPI_Aint incr; - MPI_Aint extent; - MPI_Aint lb; + char *ptmp_free=NULL, *ptmp=NULL; ompi_datatype_t *ndtype; if (MPI_PROC_NULL == root) { /* do nothing */ @@ -90,21 +89,18 @@ mca_coll_inter_gatherv_inter(void *sbuf, int scount, displace[i] = displace[i-1] + count[i-1]; } /* Perform the gatherv locally with the first process as root */ - err = ompi_datatype_get_extent(sdtype, &lb, &extent); - if (OMPI_SUCCESS != err) { - err = OMPI_ERROR; - goto exit; - } - incr = 0; for (i = 0; i < size_local; i++) { - incr = incr + extent*count[i]; + total = total + count[i]; } - if ( incr > 0 ) { - ptmp = (char*)malloc(incr); - if (NULL == ptmp) { + if ( total > 0 ) { + ptrdiff_t gap, span; + span = opal_datatype_span(&sdtype->super, total, &gap); + ptmp_free = (char*)malloc(span); + if (NULL == ptmp_free) { err = OMPI_ERR_OUT_OF_RESOURCE; goto exit; } + ptmp = ptmp_free - gap; } } err = comm->c_local_comm->c_coll.coll_gatherv(sbuf, scount, sdtype, @@ -116,9 +112,6 @@ mca_coll_inter_gatherv_inter(void *sbuf, int scount, } if (0 == rank) { - for (i = 0; i < size_local; i++) { - total = total + count[i]; - } /* First process sends data to the root */ err = MCA_PML_CALL(send(ptmp, total, sdtype, root, MCA_COLL_BASE_TAG_GATHERV, @@ -126,8 +119,8 @@ mca_coll_inter_gatherv_inter(void *sbuf, int scount, } exit: - if (NULL != ptmp) { - free(ptmp); + if (NULL != ptmp_free) { + free(ptmp_free); } if (NULL != displace) { free(displace); diff --git a/ompi/mca/coll/inter/coll_inter_reduce.c b/ompi/mca/coll/inter/coll_inter_reduce.c index 40afab9911b..a0a2ec90856 100644 --- a/ompi/mca/coll/inter/coll_inter_reduce.c +++ b/ompi/mca/coll/inter/coll_inter_reduce.c @@ -5,16 +5,18 @@ * Copyright (c) 2004-2014 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2006-2007 University of Houston. All rights reserved. - * Copyright (c) 2013 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2013 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015-2016 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -38,16 +40,13 @@ * Returns: - MPI_SUCCESS or error code */ int -mca_coll_inter_reduce_inter(void *sbuf, void *rbuf, int count, +mca_coll_inter_reduce_inter(const void *sbuf, void *rbuf, int count, struct ompi_datatype_t *dtype, struct ompi_op_t *op, int root, struct ompi_communicator_t *comm, mca_coll_base_module_t *module) { int rank, err; - ptrdiff_t true_lb, true_extent, lb, extent; - char *free_buffer = NULL; - char *pml_buffer = NULL; /* Initialize */ rank = ompi_comm_rank(comm); @@ -56,15 +55,18 @@ mca_coll_inter_reduce_inter(void *sbuf, void *rbuf, int count, /* do nothing */ err = OMPI_SUCCESS; } else if (MPI_ROOT != root) { + ptrdiff_t gap, span; + char *free_buffer = NULL; + char *pml_buffer = NULL; + /* Perform the reduce locally with the first process as root */ - ompi_datatype_get_extent(dtype, &lb, &extent); - ompi_datatype_get_true_extent(dtype, &true_lb, &true_extent); + span = opal_datatype_span(&dtype->super, count, &gap); - free_buffer = (char*)malloc(true_extent + (count - 1) * extent); + free_buffer = (char*)malloc(span); if (NULL == free_buffer) { return OMPI_ERR_OUT_OF_RESOURCE; } - pml_buffer = free_buffer - true_lb; + pml_buffer = free_buffer - gap; err = comm->c_local_comm->c_coll.coll_reduce(sbuf, pml_buffer, count, dtype, op, 0, comm->c_local_comm, @@ -78,7 +80,7 @@ mca_coll_inter_reduce_inter(void *sbuf, void *rbuf, int count, return err; } } - + if (NULL != free_buffer) { free(free_buffer); } diff --git a/ompi/mca/coll/inter/coll_inter_scatter.c b/ompi/mca/coll/inter/coll_inter_scatter.c index 1350a69ed9f..25e6e6f35ce 100644 --- a/ompi/mca/coll/inter/coll_inter_scatter.c +++ b/ompi/mca/coll/inter/coll_inter_scatter.c @@ -5,15 +5,17 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2006-2008 University of Houston. All rights reserved. + * Copyright (c) 2015-2016 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -35,16 +37,14 @@ * Returns: - MPI_SUCCESS or error code */ int -mca_coll_inter_scatter_inter(void *sbuf, int scount, +mca_coll_inter_scatter_inter(const void *sbuf, int scount, struct ompi_datatype_t *sdtype, void *rbuf, int rcount, struct ompi_datatype_t *rdtype, int root, struct ompi_communicator_t *comm, mca_coll_base_module_t *module) { - int rank, size, size_local, err; - char *ptmp = NULL; - ptrdiff_t lb, incr; + int rank, size, err; /* Initialize */ @@ -56,20 +56,20 @@ mca_coll_inter_scatter_inter(void *sbuf, int scount, err = OMPI_SUCCESS; } else if (MPI_ROOT != root) { /* First process receives the data from root */ - if(0 == rank) { - err = ompi_datatype_get_extent(rdtype, &lb, &incr); - if (OMPI_SUCCESS != err) { - return OMPI_ERROR; - } - - incr *= rcount; + char *ptmp_free = NULL, *ptmp = NULL; + if(0 == rank) { + int size_local; + ptrdiff_t gap, span; + size_local = ompi_comm_size(comm->c_local_comm); - ptmp = (char*)malloc(size_local * incr); - if (NULL == ptmp) { + span = opal_datatype_span(&rdtype->super, (int64_t)rcount*(int64_t)size_local, &gap); + ptmp_free = malloc(span); + if (NULL == ptmp_free) { return OMPI_ERR_OUT_OF_RESOURCE; } + ptmp = ptmp_free - gap; - err = MCA_PML_CALL(recv(ptmp, rcount*size_local, rdtype, + err = MCA_PML_CALL(recv(ptmp, rcount*size_local, rdtype, root, MCA_COLL_BASE_TAG_SCATTER, comm, MPI_STATUS_IGNORE)); if (OMPI_SUCCESS != err) { @@ -77,12 +77,12 @@ mca_coll_inter_scatter_inter(void *sbuf, int scount, } } /* Perform the scatter locally with the first process as root */ - err = comm->c_local_comm->c_coll.coll_scatter(ptmp, rcount, rdtype, - rbuf, rcount, rdtype, + err = comm->c_local_comm->c_coll.coll_scatter(ptmp, rcount, rdtype, + rbuf, rcount, rdtype, 0, comm->c_local_comm, comm->c_local_comm->c_coll.coll_scatter_module); - if (NULL != ptmp) { - free(ptmp); + if (NULL != ptmp_free) { + free(ptmp_free); } } else { /* Root sends data to the first process in the remote group */ diff --git a/ompi/mca/coll/inter/coll_inter_scatterv.c b/ompi/mca/coll/inter/coll_inter_scatterv.c index 26ad961e6fb..b037232ebd2 100644 --- a/ompi/mca/coll/inter/coll_inter_scatterv.c +++ b/ompi/mca/coll/inter/coll_inter_scatterv.c @@ -5,15 +5,17 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2006-2010 University of Houston. All rights reserved. + * Copyright (c) 2015-2016 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -36,19 +38,16 @@ * Returns: - MPI_SUCCESS or error code */ int -mca_coll_inter_scatterv_inter(void *sbuf, int *scounts, - int *disps, struct ompi_datatype_t *sdtype, +mca_coll_inter_scatterv_inter(const void *sbuf, const int *scounts, + const int *disps, struct ompi_datatype_t *sdtype, void *rbuf, int rcount, struct ompi_datatype_t *rdtype, int root, struct ompi_communicator_t *comm, mca_coll_base_module_t *module) { - int i, rank, size, err, total, size_local; + int i, rank, size, err, total=0, size_local; int *counts=NULL,*displace=NULL; - char *ptmp=NULL; - MPI_Aint incr; - MPI_Aint extent; - MPI_Aint lb; + char *ptmp_free=NULL, *ptmp=NULL; ompi_datatype_t *ndtype; /* Initialize */ @@ -70,24 +69,18 @@ mca_coll_inter_scatterv_inter(void *sbuf, int *scounts, if (OMPI_SUCCESS != err) { return err; } - /* calculate the whole buffer size and recieve it from root */ - err = ompi_datatype_get_extent(rdtype, &lb, &extent); - if (OMPI_SUCCESS != err) { - return OMPI_ERROR; - } - incr = 0; + /* calculate the whole buffer size and receive it from root */ for (i = 0; i < size_local; i++) { - incr = incr + extent*counts[i]; + total = total + counts[i]; } - if ( incr > 0 ) { - ptmp = (char*)malloc(incr); - if (NULL == ptmp) { + if ( total > 0 ) { + ptrdiff_t gap, span; + span = opal_datatype_span(&rdtype->super, total, &gap); + ptmp_free = (char*)malloc(span); + if (NULL == ptmp_free) { return OMPI_ERR_OUT_OF_RESOURCE; } - } - total = 0; - for (i = 0; i < size_local; i++) { - total = total + counts[i]; + ptmp = ptmp_free - gap; } err = MCA_PML_CALL(recv(ptmp, total, rdtype, root, MCA_COLL_BASE_TAG_SCATTERV, @@ -103,16 +96,16 @@ mca_coll_inter_scatterv_inter(void *sbuf, int *scounts, } } /* perform the scatterv locally */ - err = comm->c_local_comm->c_coll.coll_scatterv(ptmp, counts, displace, - rdtype, rbuf, rcount, + err = comm->c_local_comm->c_coll.coll_scatterv(ptmp, counts, displace, + rdtype, rbuf, rcount, rdtype, 0, comm->c_local_comm, comm->c_local_comm->c_coll.coll_scatterv_module); if (OMPI_SUCCESS != err) { return err; } - if (NULL != ptmp) { - free(ptmp); + if (NULL != ptmp_free) { + free(ptmp_free); } if (NULL != displace) { free(displace); @@ -131,7 +124,7 @@ mca_coll_inter_scatterv_inter(void *sbuf, int *scounts, ompi_datatype_create_indexed(size,scounts,disps,sdtype,&ndtype); ompi_datatype_commit(&ndtype); - + err = MCA_PML_CALL(send(sbuf, 1, ndtype, 0, MCA_COLL_BASE_TAG_SCATTERV, MCA_PML_BASE_SEND_STANDARD, comm)); diff --git a/ompi/mca/coll/libnbc/Makefile.am b/ompi/mca/coll/libnbc/Makefile.am index d630413e007..4d3e90186a9 100644 --- a/ompi/mca/coll/libnbc/Makefile.am +++ b/ompi/mca/coll/libnbc/Makefile.am @@ -5,7 +5,7 @@ # Copyright (c) 2004-2005 The University of Tennessee and The University # of Tennessee Research Foundation. All rights # reserved. -# Copyright (c) 2004-2009 High Performance Computing Center Stuttgart, +# Copyright (c) 2004-2009 High Performance Computing Center Stuttgart, # University of Stuttgart. All rights reserved. # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. @@ -13,16 +13,15 @@ # Copyright (c) 2013 Los Alamos National Security, LLC. All rights # reserved. # $COPYRIGHT$ -# +# # Additional copyrights may follow -# +# # $HEADER$ # sources = \ coll_libnbc.h \ coll_libnbc_component.c \ - coll_libnbc_ireduce_scatter_block.c \ nbc.c \ nbc_internal.h \ libdict/dict.h \ @@ -49,6 +48,7 @@ sources = \ nbc_ineighbor_alltoallw.c \ nbc_ireduce.c \ nbc_ireduce_scatter.c \ + nbc_ireduce_scatter_block.c \ nbc_iscan.c \ nbc_iscatter.c \ nbc_iscatterv.c \ diff --git a/ompi/mca/coll/libnbc/coll_libnbc.h b/ompi/mca/coll/libnbc/coll_libnbc.h index a66e8aed7cd..c8034c53165 100644 --- a/ompi/mca/coll/libnbc/coll_libnbc.h +++ b/ompi/mca/coll/libnbc/coll_libnbc.h @@ -6,19 +6,20 @@ * Copyright (c) 2004-2013 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2008 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2013-2015 Los Alamos National Security, LLC. All rights * reserved. - * Copyright (c) 2014 Research Organization for Information Science + * Copyright (c) 2014-2016 Research Organization for Information Science * and Technology (RIST). All rights reserved. + * Copyright (c) 2016 IBM Corporation. All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -40,7 +41,7 @@ BEGIN_C_DECLS /* TODO: this whole schedule cache stuff does not work with the tmbuf * :-( - first, the tmpbuf must not be freed if a schedule using it is * still in the cache and second, the tmpbuf used by the schedule must - * be attached to the handle that uses this schedule !!!! + * be attached to the handle that uses this schedule !!!! * I.E., THIS IS EXPERIMENTAL AND MIGHT NOT WORK */ /* It also leaks memory because the schedule is never cleaned up when the communicator is destroyed, so don't use it for now */ @@ -67,12 +68,15 @@ BEGIN_C_DECLS /* number of implemented collective functions */ #define NBC_NUM_COLL 17 +extern bool libnbc_ibcast_skip_dt_decision; + struct ompi_coll_libnbc_component_t { mca_coll_base_component_2_0_0_t super; opal_free_list_t requests; opal_list_t active_requests; int32_t active_comms; - opal_atomic_lock_t progress_lock; + opal_atomic_lock_t progress_lock; /* protect from recursive calls */ + opal_mutex_t lock; /* protect access to the active_requests list */ }; typedef struct ompi_coll_libnbc_component_t ompi_coll_libnbc_component_t; @@ -98,9 +102,16 @@ OBJ_CLASS_DECLARATION(ompi_coll_libnbc_module_t); typedef ompi_coll_libnbc_module_t NBC_Comminfo; -/* a schedule is basically a pointer to some memory location where the - * schedule array resides */ -typedef void* NBC_Schedule; +struct NBC_Schedule { + opal_object_t super; + volatile int size; + volatile int current_round_offset; + char *data; +}; + +typedef struct NBC_Schedule NBC_Schedule; + +OBJ_CLASS_DECLARATION(NBC_Schedule); struct ompi_coll_libnbc_request_t { ompi_request_t super; @@ -110,7 +121,7 @@ struct ompi_coll_libnbc_request_t { volatile int req_count; ompi_request_t **req_array; NBC_Comminfo *comminfo; - volatile NBC_Schedule *schedule; + NBC_Schedule *schedule; void *tmpbuf; /* temporary buffer e.g. used for Reduce */ /* TODO: we should make a handle pointer to a state later (that the user * can move request handles) */ @@ -134,9 +145,9 @@ typedef ompi_coll_libnbc_request_t NBC_Handle; #define OMPI_COLL_LIBNBC_REQUEST_RETURN(req) \ do { \ - OMPI_REQUEST_FINI(&request->super); \ + OMPI_REQUEST_FINI(&(req)->super); \ opal_free_list_return (&mca_coll_libnbc_component.requests, \ - (opal_free_list_item_t*) req); \ + (opal_free_list_item_t*) (req)); \ } while (0) int ompi_coll_libnbc_progress(void); @@ -145,132 +156,132 @@ int NBC_Init_comm(MPI_Comm comm, ompi_coll_libnbc_module_t *module); int NBC_Progress(NBC_Handle *handle); -int ompi_coll_libnbc_iallgather(void* sendbuf, int sendcount, MPI_Datatype sendtype, void* recvbuf, int recvcount, +int ompi_coll_libnbc_iallgather(const void* sendbuf, int sendcount, MPI_Datatype sendtype, void* recvbuf, int recvcount, MPI_Datatype recvtype, struct ompi_communicator_t *comm, ompi_request_t ** request, struct mca_coll_base_module_2_1_0_t *module); -int ompi_coll_libnbc_iallgatherv(void* sendbuf, int sendcount, MPI_Datatype sendtype, void* recvbuf, int *recvcounts, int *displs, +int ompi_coll_libnbc_iallgatherv(const void* sendbuf, int sendcount, MPI_Datatype sendtype, void* recvbuf, const int *recvcounts, const int *displs, MPI_Datatype recvtype, struct ompi_communicator_t *comm, ompi_request_t ** request, struct mca_coll_base_module_2_1_0_t *module); -int ompi_coll_libnbc_iallreduce(void* sendbuf, void* recvbuf, int count, MPI_Datatype datatype, MPI_Op op, +int ompi_coll_libnbc_iallreduce(const void* sendbuf, void* recvbuf, int count, MPI_Datatype datatype, MPI_Op op, struct ompi_communicator_t *comm, ompi_request_t ** request, struct mca_coll_base_module_2_1_0_t *module); -int ompi_coll_libnbc_ialltoall(void* sendbuf, int sendcount, MPI_Datatype sendtype, void* recvbuf, int recvcount, +int ompi_coll_libnbc_ialltoall(const void* sendbuf, int sendcount, MPI_Datatype sendtype, void* recvbuf, int recvcount, MPI_Datatype recvtype, struct ompi_communicator_t *comm, ompi_request_t ** request, struct mca_coll_base_module_2_1_0_t *module); -int ompi_coll_libnbc_ialltoallv(void* sendbuf, int *sendcounts, int *sdispls, - MPI_Datatype sendtype, void* recvbuf, int *recvcounts, int *rdispls, +int ompi_coll_libnbc_ialltoallv(const void* sendbuf, const int *sendcounts, const int *sdispls, + MPI_Datatype sendtype, void* recvbuf, const int *recvcounts, const int *rdispls, MPI_Datatype recvtype, struct ompi_communicator_t *comm, ompi_request_t ** request, struct mca_coll_base_module_2_1_0_t *module); -int ompi_coll_libnbc_ialltoallw(void *sbuf, int *scounts, int *sdisps, struct ompi_datatype_t **sdtypes, - void *rbuf, int *rcounts, int *rdisps, struct ompi_datatype_t **rdtypes, - struct ompi_communicator_t *comm, ompi_request_t **request, +int ompi_coll_libnbc_ialltoallw(const void *sbuf, const int *scounts, const int *sdisps, struct ompi_datatype_t * const *sdtypes, + void *rbuf, const int *rcounts, const int *rdisps, struct ompi_datatype_t * const *rdtypes, + struct ompi_communicator_t *comm, ompi_request_t **request, struct mca_coll_base_module_2_1_0_t *module); int ompi_coll_libnbc_ibarrier(struct ompi_communicator_t *comm, ompi_request_t ** request, struct mca_coll_base_module_2_1_0_t *module); int ompi_coll_libnbc_ibcast(void *buffer, int count, MPI_Datatype datatype, int root, struct ompi_communicator_t *comm, ompi_request_t ** request, struct mca_coll_base_module_2_1_0_t *module); -int ompi_coll_libnbc_iexscan(void *sbuf, void *rbuf, int count, struct ompi_datatype_t *dtype, +int ompi_coll_libnbc_iexscan(const void *sbuf, void *rbuf, int count, struct ompi_datatype_t *dtype, struct ompi_op_t *op, struct ompi_communicator_t *comm, ompi_request_t **request, struct mca_coll_base_module_2_1_0_t *module); -int ompi_coll_libnbc_igather(void* sendbuf, int sendcount, MPI_Datatype sendtype, void* recvbuf, int recvcount, +int ompi_coll_libnbc_igather(const void* sendbuf, int sendcount, MPI_Datatype sendtype, void* recvbuf, int recvcount, MPI_Datatype recvtype, int root, struct ompi_communicator_t *comm, ompi_request_t ** request, struct mca_coll_base_module_2_1_0_t *module); -int ompi_coll_libnbc_igatherv(void* sendbuf, int sendcount, MPI_Datatype sendtype, - void* recvbuf, int *recvcounts, int *displs, MPI_Datatype recvtype, +int ompi_coll_libnbc_igatherv(const void* sendbuf, int sendcount, MPI_Datatype sendtype, + void* recvbuf, const int *recvcounts, const int *displs, MPI_Datatype recvtype, int root, struct ompi_communicator_t *comm, ompi_request_t ** request, struct mca_coll_base_module_2_1_0_t *module); -int ompi_coll_libnbc_ireduce(void* sendbuf, void* recvbuf, int count, MPI_Datatype datatype, +int ompi_coll_libnbc_ireduce(const void* sendbuf, void* recvbuf, int count, MPI_Datatype datatype, MPI_Op op, int root, struct ompi_communicator_t *comm, ompi_request_t ** request, struct mca_coll_base_module_2_1_0_t *module); -int ompi_coll_libnbc_ireduce_scatter(void* sendbuf, void* recvbuf, int *recvcounts, MPI_Datatype datatype, +int ompi_coll_libnbc_ireduce_scatter(const void* sendbuf, void* recvbuf, const int *recvcounts, MPI_Datatype datatype, MPI_Op op, struct ompi_communicator_t *comm, ompi_request_t ** request, struct mca_coll_base_module_2_1_0_t *module); -int ompi_coll_libnbc_ireduce_scatter_block(void *sbuf, void *rbuf, int rcount, struct ompi_datatype_t *dtype, - struct ompi_op_t *op, struct ompi_communicator_t *comm, +int ompi_coll_libnbc_ireduce_scatter_block(const void *sbuf, void *rbuf, int rcount, struct ompi_datatype_t *dtype, + struct ompi_op_t *op, struct ompi_communicator_t *comm, ompi_request_t **request, struct mca_coll_base_module_2_1_0_t *module); -int ompi_coll_libnbc_iscan(void* sendbuf, void* recvbuf, int count, MPI_Datatype datatype, MPI_Op op, +int ompi_coll_libnbc_iscan(const void* sendbuf, void* recvbuf, int count, MPI_Datatype datatype, MPI_Op op, struct ompi_communicator_t *comm, ompi_request_t ** request, struct mca_coll_base_module_2_1_0_t *module); -int ompi_coll_libnbc_iscatter(void* sendbuf, int sendcount, MPI_Datatype sendtype, - void* recvbuf, int recvcount, MPI_Datatype recvtype, int root, +int ompi_coll_libnbc_iscatter(const void* sendbuf, int sendcount, MPI_Datatype sendtype, + void* recvbuf, int recvcount, MPI_Datatype recvtype, int root, struct ompi_communicator_t *comm, ompi_request_t ** request, struct mca_coll_base_module_2_1_0_t *module); -int ompi_coll_libnbc_iscatterv(void* sendbuf, int *sendcounts, int *displs, MPI_Datatype sendtype, - void* recvbuf, int recvcount, MPI_Datatype recvtype, int root, +int ompi_coll_libnbc_iscatterv(const void* sendbuf, const int *sendcounts, const int *displs, MPI_Datatype sendtype, + void* recvbuf, int recvcount, MPI_Datatype recvtype, int root, struct ompi_communicator_t *comm, ompi_request_t ** request, struct mca_coll_base_module_2_1_0_t *module); -int ompi_coll_libnbc_iallgather_inter(void* sendbuf, int sendcount, MPI_Datatype sendtype, void* recvbuf, int recvcount, +int ompi_coll_libnbc_iallgather_inter(const void* sendbuf, int sendcount, MPI_Datatype sendtype, void* recvbuf, int recvcount, MPI_Datatype recvtype, struct ompi_communicator_t *comm, ompi_request_t ** request, struct mca_coll_base_module_2_1_0_t *module); -int ompi_coll_libnbc_iallgatherv_inter(void* sendbuf, int sendcount, MPI_Datatype sendtype, void* recvbuf, int *recvcounts, int *displs, +int ompi_coll_libnbc_iallgatherv_inter(const void* sendbuf, int sendcount, MPI_Datatype sendtype, void* recvbuf, const int *recvcounts, const int *displs, MPI_Datatype recvtype, struct ompi_communicator_t *comm, ompi_request_t ** request, struct mca_coll_base_module_2_1_0_t *module); -int ompi_coll_libnbc_iallreduce_inter(void* sendbuf, void* recvbuf, int count, MPI_Datatype datatype, MPI_Op op, +int ompi_coll_libnbc_iallreduce_inter(const void* sendbuf, void* recvbuf, int count, MPI_Datatype datatype, MPI_Op op, struct ompi_communicator_t *comm, ompi_request_t ** request, struct mca_coll_base_module_2_1_0_t *module); -int ompi_coll_libnbc_ialltoall_inter(void* sendbuf, int sendcount, MPI_Datatype sendtype, void* recvbuf, int recvcount, +int ompi_coll_libnbc_ialltoall_inter(const void* sendbuf, int sendcount, MPI_Datatype sendtype, void* recvbuf, int recvcount, MPI_Datatype recvtype, struct ompi_communicator_t *comm, ompi_request_t ** request, struct mca_coll_base_module_2_1_0_t *module); -int ompi_coll_libnbc_ialltoallv_inter(void* sendbuf, int *sendcounts, int *sdispls, - MPI_Datatype sendtype, void* recvbuf, int *recvcounts, int *rdispls, +int ompi_coll_libnbc_ialltoallv_inter(const void* sendbuf, const int *sendcounts, const int *sdispls, + MPI_Datatype sendtype, void* recvbuf, const int *recvcounts, const int *rdispls, MPI_Datatype recvtype, struct ompi_communicator_t *comm, ompi_request_t ** request, struct mca_coll_base_module_2_1_0_t *module); -int ompi_coll_libnbc_ialltoallw_inter(void *sbuf, int *scounts, int *sdisps, struct ompi_datatype_t **sdtypes, - void *rbuf, int *rcounts, int *rdisps, struct ompi_datatype_t **rdtypes, - struct ompi_communicator_t *comm, ompi_request_t **request, +int ompi_coll_libnbc_ialltoallw_inter(const void *sbuf, const int *scounts, const int *sdisps, struct ompi_datatype_t * const *sdtypes, + void *rbuf, const int *rcounts, const int *rdisps, struct ompi_datatype_t * const *rdtypes, + struct ompi_communicator_t *comm, ompi_request_t **request, struct mca_coll_base_module_2_1_0_t *module); int ompi_coll_libnbc_ibarrier_inter(struct ompi_communicator_t *comm, ompi_request_t ** request, struct mca_coll_base_module_2_1_0_t *module); int ompi_coll_libnbc_ibcast_inter(void *buffer, int count, MPI_Datatype datatype, int root, struct ompi_communicator_t *comm, ompi_request_t ** request, struct mca_coll_base_module_2_1_0_t *module); -int ompi_coll_libnbc_igather_inter(void* sendbuf, int sendcount, MPI_Datatype sendtype, void* recvbuf, int recvcount, +int ompi_coll_libnbc_igather_inter(const void* sendbuf, int sendcount, MPI_Datatype sendtype, void* recvbuf, int recvcount, MPI_Datatype recvtype, int root, struct ompi_communicator_t *comm, ompi_request_t ** request, struct mca_coll_base_module_2_1_0_t *module); -int ompi_coll_libnbc_igatherv_inter(void* sendbuf, int sendcount, MPI_Datatype sendtype, - void* recvbuf, int *recvcounts, int *displs, MPI_Datatype recvtype, +int ompi_coll_libnbc_igatherv_inter(const void* sendbuf, int sendcount, MPI_Datatype sendtype, + void* recvbuf, const int *recvcounts, const int *displs, MPI_Datatype recvtype, int root, struct ompi_communicator_t *comm, ompi_request_t ** request, struct mca_coll_base_module_2_1_0_t *module); -int ompi_coll_libnbc_ireduce_inter(void* sendbuf, void* recvbuf, int count, MPI_Datatype datatype, +int ompi_coll_libnbc_ireduce_inter(const void* sendbuf, void* recvbuf, int count, MPI_Datatype datatype, MPI_Op op, int root, struct ompi_communicator_t *comm, ompi_request_t ** request, struct mca_coll_base_module_2_1_0_t *module); -int ompi_coll_libnbc_ireduce_scatter_inter(void* sendbuf, void* recvbuf, int *recvcounts, MPI_Datatype datatype, +int ompi_coll_libnbc_ireduce_scatter_inter(const void* sendbuf, void* recvbuf, const int *recvcounts, MPI_Datatype datatype, MPI_Op op, struct ompi_communicator_t *comm, ompi_request_t ** request, struct mca_coll_base_module_2_1_0_t *module); -int ompi_coll_libnbc_ireduce_scatter_block_inter(void *sbuf, void *rbuf, int rcount, struct ompi_datatype_t *dtype, - struct ompi_op_t *op, struct ompi_communicator_t *comm, +int ompi_coll_libnbc_ireduce_scatter_block_inter(const void *sbuf, void *rbuf, int rcount, struct ompi_datatype_t *dtype, + struct ompi_op_t *op, struct ompi_communicator_t *comm, ompi_request_t **request, struct mca_coll_base_module_2_1_0_t *module); -int ompi_coll_libnbc_iscatter_inter(void* sendbuf, int sendcount, MPI_Datatype sendtype, - void* recvbuf, int recvcount, MPI_Datatype recvtype, int root, +int ompi_coll_libnbc_iscatter_inter(const void* sendbuf, int sendcount, MPI_Datatype sendtype, + void* recvbuf, int recvcount, MPI_Datatype recvtype, int root, struct ompi_communicator_t *comm, ompi_request_t ** request, struct mca_coll_base_module_2_1_0_t *module); -int ompi_coll_libnbc_iscatterv_inter(void* sendbuf, int *sendcounts, int *displs, MPI_Datatype sendtype, - void* recvbuf, int recvcount, MPI_Datatype recvtype, int root, +int ompi_coll_libnbc_iscatterv_inter(const void* sendbuf, const int *sendcounts, const int *displs, MPI_Datatype sendtype, + void* recvbuf, int recvcount, MPI_Datatype recvtype, int root, struct ompi_communicator_t *comm, ompi_request_t ** request, struct mca_coll_base_module_2_1_0_t *module); -int ompi_coll_libnbc_ineighbor_allgather(void *sbuf, int scount, MPI_Datatype stype, void *rbuf, +int ompi_coll_libnbc_ineighbor_allgather(const void *sbuf, int scount, MPI_Datatype stype, void *rbuf, int rcount, MPI_Datatype rtype, struct ompi_communicator_t *comm, ompi_request_t ** request, struct mca_coll_base_module_2_1_0_t *module); -int ompi_coll_libnbc_ineighbor_allgatherv(void *sbuf, int scount, MPI_Datatype stype, void *rbuf, - int *rcounts, int *displs, MPI_Datatype rtype, +int ompi_coll_libnbc_ineighbor_allgatherv(const void *sbuf, int scount, MPI_Datatype stype, void *rbuf, + const int *rcounts, const int *displs, MPI_Datatype rtype, struct ompi_communicator_t *comm, ompi_request_t ** request, struct mca_coll_base_module_2_1_0_t *module); -int ompi_coll_libnbc_ineighbor_alltoall(void *sbuf, int scount, MPI_Datatype stype, void *rbuf, +int ompi_coll_libnbc_ineighbor_alltoall(const void *sbuf, int scount, MPI_Datatype stype, void *rbuf, int rcount, MPI_Datatype rtype, struct ompi_communicator_t *comm, ompi_request_t ** request, struct mca_coll_base_module_2_1_0_t *module); -int ompi_coll_libnbc_ineighbor_alltoallv(void *sbuf, int *scounts, int *sdispls, MPI_Datatype stype, - void *rbuf, int *rcounts, int *rdispls, MPI_Datatype rtype, +int ompi_coll_libnbc_ineighbor_alltoallv(const void *sbuf, const int *scounts, const int *sdispls, MPI_Datatype stype, + void *rbuf, const int *rcounts, const int *rdispls, MPI_Datatype rtype, struct ompi_communicator_t *comm, ompi_request_t ** request, struct mca_coll_base_module_2_1_0_t *module); -int ompi_coll_libnbc_ineighbor_alltoallw(void *sbuf, int *scounts, MPI_Aint *sdisps, MPI_Datatype *stypes, - void *rbuf, int *rcounts, MPI_Aint *rdisps, MPI_Datatype *rtypes, +int ompi_coll_libnbc_ineighbor_alltoallw(const void *sbuf, const int *scounts, const MPI_Aint *sdisps, struct ompi_datatype_t * const *stypes, + void *rbuf, const int *rcounts, const MPI_Aint *rdisps, struct ompi_datatype_t * const *rtypes, struct ompi_communicator_t *comm, ompi_request_t ** request, struct mca_coll_base_module_2_1_0_t *module); diff --git a/ompi/mca/coll/libnbc/coll_libnbc_component.c b/ompi/mca/coll/libnbc/coll_libnbc_component.c index 22d4f5f718a..1ac5b0b943e 100644 --- a/ompi/mca/coll/libnbc/coll_libnbc_component.c +++ b/ompi/mca/coll/libnbc/coll_libnbc_component.c @@ -3,20 +3,23 @@ * Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. - * Copyright (c) 2004-2005 The University of Tennessee and The University + * Copyright (c) 2004-2016 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2008 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2013-2015 Los Alamos National Security, LLC. All rights * reserved. + * Copyright (c) 2016 Research Organization for Information Science + * and Technology (RIST). All rights reserved. + * Copyright (c) 2016 IBM Corporation. All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -36,6 +39,7 @@ const char *mca_coll_libnbc_component_version_string = static int libnbc_priority = 10; +bool libnbc_ibcast_skip_dt_decision = true; static int libnbc_open(void); @@ -87,6 +91,7 @@ libnbc_open(void) OBJ_CONSTRUCT(&mca_coll_libnbc_component.requests, opal_free_list_t); OBJ_CONSTRUCT(&mca_coll_libnbc_component.active_requests, opal_list_t); + OBJ_CONSTRUCT(&mca_coll_libnbc_component.lock, opal_mutex_t); ret = opal_free_list_init (&mca_coll_libnbc_component.requests, sizeof(ompi_coll_libnbc_request_t), 8, OBJ_CLASS(ompi_coll_libnbc_request_t), @@ -111,6 +116,7 @@ libnbc_close(void) OBJ_DESTRUCT(&mca_coll_libnbc_component.requests); OBJ_DESTRUCT(&mca_coll_libnbc_component.active_requests); + OBJ_DESTRUCT(&mca_coll_libnbc_component.lock); return OMPI_SUCCESS; } @@ -128,6 +134,27 @@ libnbc_register(void) MCA_BASE_VAR_SCOPE_READONLY, &libnbc_priority); + /* ibcast decision function can make the wrong decision if a legal + * non-uniform data type signature is used. This has resulted in the + * collective operation failing, and possibly producing wrong answers. + * We are investigating a fix for this problem, but it is taking a while. + * https://github.com/open-mpi/ompi/issues/2256 + * https://github.com/open-mpi/ompi/issues/1763 + * As a result we are adding an MCA parameter to make a conservative + * decision to avoid this issue. If the user knows that their application + * does not use data types in this way, then they can set this parameter + * to get the old behavior. Once the issue is truely fixed, then this + * parameter can be removed. + */ + libnbc_ibcast_skip_dt_decision = true; + (void) mca_base_component_var_register(&mca_coll_libnbc_component.super.collm_version, + "ibcast_skip_dt_decision", + "In ibcast only use size of communicator to choose algorithm, exclude data type signature. Set to 'false' to use data type signature in decision. WARNING: If you set this to 'false' then your application should not use non-uniform data type signatures in calls to ibcast.", + MCA_BASE_VAR_TYPE_BOOL, NULL, 0, 0, + OPAL_INFO_LVL_9, + MCA_BASE_VAR_SCOPE_READONLY, + &libnbc_ibcast_skip_dt_decision); + return OMPI_SUCCESS; } @@ -152,7 +179,7 @@ libnbc_init_query(bool enable_progress_threads, * priority we want to return. */ mca_coll_base_module_t * -libnbc_comm_query(struct ompi_communicator_t *comm, +libnbc_comm_query(struct ompi_communicator_t *comm, int *priority) { ompi_coll_libnbc_module_t *module; @@ -234,22 +261,36 @@ int ompi_coll_libnbc_progress(void) { ompi_coll_libnbc_request_t* request, *next; + int res; + /* return if invoked recursively */ if (opal_atomic_trylock(&mca_coll_libnbc_component.progress_lock)) return 0; + /* process active requests, and use mca_coll_libnbc_component.lock to access the + * mca_coll_libnbc_component.active_requests list */ + OPAL_THREAD_LOCK(&mca_coll_libnbc_component.lock); OPAL_LIST_FOREACH_SAFE(request, next, &mca_coll_libnbc_component.active_requests, ompi_coll_libnbc_request_t) { - if (NBC_OK == NBC_Progress(request)) { + OPAL_THREAD_UNLOCK(&mca_coll_libnbc_component.lock); + res = NBC_Progress(request); + if( NBC_CONTINUE != res ) { /* done, remove and complete */ + OPAL_THREAD_LOCK(&mca_coll_libnbc_component.lock); opal_list_remove_item(&mca_coll_libnbc_component.active_requests, &request->super.super.super); - - request->super.req_status.MPI_ERROR = OMPI_SUCCESS; - OPAL_THREAD_LOCK(&ompi_request_lock); + OPAL_THREAD_UNLOCK(&mca_coll_libnbc_component.lock); + + if( OMPI_SUCCESS == res || NBC_OK == res || NBC_SUCCESS == res ) { + request->super.req_status.MPI_ERROR = OMPI_SUCCESS; + } + else { + request->super.req_status.MPI_ERROR = res; + } ompi_request_complete(&request->super, true); - OPAL_THREAD_UNLOCK(&ompi_request_lock); } + OPAL_THREAD_LOCK(&mca_coll_libnbc_component.lock); } + OPAL_THREAD_UNLOCK(&mca_coll_libnbc_component.lock); opal_atomic_unlock(&mca_coll_libnbc_component.progress_lock); @@ -272,7 +313,7 @@ libnbc_module_destruct(ompi_coll_libnbc_module_t *module) /* if we ever were used for a collective op, do the progress cleanup. */ if (true == module->comm_registered) { - int32_t tmp = + int32_t tmp = OPAL_THREAD_ADD32(&mca_coll_libnbc_component.active_comms, -1); if (0 == tmp) { opal_progress_unregister(ompi_coll_libnbc_progress); @@ -297,10 +338,10 @@ request_cancel(struct ompi_request_t *request, int complete) static int request_free(struct ompi_request_t **ompi_req) { - ompi_coll_libnbc_request_t *request = + ompi_coll_libnbc_request_t *request = (ompi_coll_libnbc_request_t*) *ompi_req; - if (true != request->super.req_complete) { + if( !REQUEST_COMPLETE(&request->super) ) { return MPI_ERR_REQUEST; } @@ -322,7 +363,7 @@ request_construct(ompi_coll_libnbc_request_t *request) } -OBJ_CLASS_INSTANCE(ompi_coll_libnbc_request_t, +OBJ_CLASS_INSTANCE(ompi_coll_libnbc_request_t, ompi_request_t, request_construct, NULL); diff --git a/ompi/mca/coll/libnbc/coll_libnbc_ireduce_scatter_block.c b/ompi/mca/coll/libnbc/coll_libnbc_ireduce_scatter_block.c deleted file mode 100644 index bb0f0b7cbde..00000000000 --- a/ompi/mca/coll/libnbc/coll_libnbc_ireduce_scatter_block.c +++ /dev/null @@ -1,242 +0,0 @@ -/* - * Copyright (c) 2006 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2006 The Technical University of Chemnitz. All - * rights reserved. - * Copyright (c) 2012 Sandia National Laboratories. All rights reserved. - * Copyright (c) 2013 Los Alamos National Security, LLC. All rights - * reserved. - * Copyright (c) 2014 Research Organization for Information Science - * and Technology (RIST). All rights reserved. - * - * Author(s): Torsten Hoefler - * - */ -#include "nbc_internal.h" - -/* an reduce_csttare schedule can not be cached easily because the contents - * ot the recvcount value may change, so a comparison of the address - * would not be sufficient ... we simply do not cache it */ - -/* binomial reduce to rank 0 followed by a linear scatter ... - * - * Algorithm: - * pairwise exchange - * round r: - * grp = rank % 2^r - * if grp == 0: receive from rank + 2^(r-1) if it exists and reduce value - * if grp == 1: send to rank - 2^(r-1) and exit function - * - * do this for R=log_2(p) rounds - * - */ - -int ompi_coll_libnbc_ireduce_scatter_block(void* sendbuf, void* recvbuf, int recvcount, MPI_Datatype datatype, - MPI_Op op, struct ompi_communicator_t *comm, ompi_request_t ** request, - struct mca_coll_base_module_2_1_0_t *module) { - int peer, rank, maxr, p, r, res, count, offset, firstred; - MPI_Aint ext; - char *redbuf, *sbuf, inplace; - NBC_Schedule *schedule; - NBC_Handle *handle; - ompi_coll_libnbc_request_t **coll_req = (ompi_coll_libnbc_request_t**) request; - ompi_coll_libnbc_module_t *libnbc_module = (ompi_coll_libnbc_module_t*) module; - - NBC_IN_PLACE(sendbuf, recvbuf, inplace); - - res = NBC_Init_handle(comm, coll_req, libnbc_module); - if(res != NBC_OK) { printf("Error in NBC_Init_handle(%i)\n", res); return res; } - handle = (*coll_req); - res = MPI_Comm_rank(comm, &rank); - if (MPI_SUCCESS != res) { printf("MPI Error in MPI_Comm_rank() (%i)\n", res); return res; } - res = MPI_Comm_size(comm, &p); - if (MPI_SUCCESS != res || 0 == p) { printf("MPI Error in MPI_Comm_size() (%i:%i)\n", res, p); return (MPI_SUCCESS == res) ? MPI_ERR_SIZE : res; } - res = MPI_Type_extent(datatype, &ext); - if (MPI_SUCCESS != res || 0 == ext) { printf("MPI Error in MPI_Type_extent() (%i:%i)\n", res, (int)ext); return (MPI_SUCCESS == res) ? MPI_ERR_SIZE : res; } - - schedule = (NBC_Schedule*)malloc(sizeof(NBC_Schedule)); - if (NULL == schedule) { printf("Error in malloc()\n"); return NBC_OOR; } - - res = NBC_Sched_create(schedule); - if(res != NBC_OK) { printf("Error in NBC_Sched_create (%i)\n", res); return res; } - - maxr = (int)ceil((log((double)p)/LOG2)); - - count = 0; - for(r=0;rtmpbuf = malloc(ext*count*2); - if(handle->tmpbuf == NULL) { printf("Error in malloc()\n"); return NBC_OOR; } - - redbuf = ((char*)handle->tmpbuf)+(ext*count); - - /* copy data to redbuf if we only have a single node */ - if((p==1) && !inplace) { - res = NBC_Copy(sendbuf, count, datatype, redbuf, count, datatype, comm); - if (NBC_OK != res) { printf("Error in NBC_Copy() (%i)\n", res); return res; } - } - - firstred = 1; - for(r=1; r<=maxr; r++) { - if((rank % (1<tmpbuf); printf("Error in NBC_Sched_recv() (%i)\n", res); return res; } - /* we have to wait until we have the data */ - res = NBC_Sched_barrier(schedule); - if (NBC_OK != res) { free(handle->tmpbuf); printf("Error in NBC_Sched_barrier() (%i)\n", res); return res; } - if(firstred) { - /* take reduce data from the sendbuf in the first round -> save copy */ - res = NBC_Sched_op(redbuf-(unsigned long)handle->tmpbuf, true, sendbuf, false, 0, true, count, datatype, op, schedule); - firstred = 0; - } else { - /* perform the reduce in my local buffer */ - res = NBC_Sched_op(redbuf-(unsigned long)handle->tmpbuf, true, redbuf-(unsigned long)handle->tmpbuf, true, 0, true, count, datatype, op, schedule); - } - if (NBC_OK != res) { free(handle->tmpbuf); printf("Error in NBC_Sched_op() (%i)\n", res); return res; } - /* this cannot be done until handle->tmpbuf is unused :-( */ - res = NBC_Sched_barrier(schedule); - if (NBC_OK != res) { free(handle->tmpbuf); printf("Error in NBC_Sched_barrier() (%i)\n", res); return res; } - } - } else { - /* we have to send this round */ - peer = rank - (1<<(r-1)); - if(firstred) { - /* we have to send the senbuf */ - res = NBC_Sched_send(sendbuf, false, count, datatype, peer, schedule); - } else { - /* we send an already reduced value from redbuf */ - res = NBC_Sched_send(redbuf-(unsigned long)handle->tmpbuf, true, count, datatype, peer, schedule); - } - if (NBC_OK != res) { free(handle->tmpbuf); printf("Error in NBC_Sched_send() (%i)\n", res); return res; } - /* leave the game */ - break; - } - } - - res = NBC_Sched_barrier(schedule); - if (NBC_OK != res) { free(handle->tmpbuf); printf("Error in NBC_Sched_barrier() (%i)\n", res); return res; } - - /* rank 0 is root and sends - all others receive */ - if(rank != 0) { - res = NBC_Sched_recv(recvbuf, false, recvcount, datatype, 0, schedule); - if (NBC_OK != res) { free(handle->tmpbuf); printf("Error in NBC_Sched_recv() (%i)\n", res); return res; } - } - - if(rank == 0) { - offset = 0; - for(r=1;rtmpbuf, true, recvcount, datatype, r, schedule); - if (NBC_OK != res) { free(handle->tmpbuf); printf("Error in NBC_Sched_send() (%i)\n", res); return res; } - } - res = NBC_Sched_copy(redbuf-(unsigned long)handle->tmpbuf, true, recvcount, datatype, recvbuf, false, recvcount, datatype, schedule); - if (NBC_OK != res) { free(handle->tmpbuf); printf("Error in NBC_Sched_copy() (%i)\n", res); return res; } - } - - /*NBC_PRINT_SCHED(*schedule);*/ - - res = NBC_Sched_commit(schedule); - if (NBC_OK != res) { free(handle->tmpbuf); printf("Error in NBC_Sched_commit() (%i)\n", res); return res; } - - res = NBC_Start(handle, schedule); - if (NBC_OK != res) { free(handle->tmpbuf); printf("Error in NBC_Start() (%i)\n", res); return res; } - - /* tmpbuf is freed with the handle */ - return NBC_OK; -} - -int ompi_coll_libnbc_ireduce_scatter_block_inter(void *sbuf, void *rbuf, int rcount, struct ompi_datatype_t *dtype, - struct ompi_op_t *op, struct ompi_communicator_t *comm, - ompi_request_t **request, struct mca_coll_base_module_2_1_0_t *module) { - int peer, rank, res, count, rsize; - MPI_Aint ext; - NBC_Schedule *schedule; - NBC_Handle *handle; - ompi_coll_libnbc_request_t **coll_req = (ompi_coll_libnbc_request_t**) request; - ompi_coll_libnbc_module_t *libnbc_module = (ompi_coll_libnbc_module_t*) module; - - res = NBC_Init_handle(comm, coll_req, libnbc_module); - if(res != NBC_OK) { printf("Error in NBC_Init_handle(%i)\n", res); return res; } - handle = (*coll_req); - res = MPI_Comm_rank(comm, &rank); - if (MPI_SUCCESS != res) { printf("MPI Error in MPI_Comm_rank() (%i)\n", res); return res; } - res = MPI_Comm_remote_size(comm, &rsize); - if (MPI_SUCCESS != res) { printf("MPI Error in MPI_Comm_remote_size() (%i)\n", res); return res; } - res = MPI_Type_extent(dtype, &ext); - if (MPI_SUCCESS != res) { printf("MPI Error in MPI_Type_extent() (%i)\n", res); return res; } - - schedule = (NBC_Schedule*)malloc(sizeof(NBC_Schedule)); - if (NULL == schedule) { printf("Error in malloc()\n"); return NBC_OOR; } - - res = NBC_Sched_create(schedule); - if(res != NBC_OK) { printf("Error in NBC_Sched_create (%i)\n", res); return res; } - - count = rcount * rsize; - - handle->tmpbuf = malloc(2*ext*count); - if(handle->tmpbuf == NULL) { printf("Error in malloc()\n"); return NBC_OOR; } - - /* send my data to the remote root */ - res = NBC_Sched_send(sbuf, false, count, dtype, 0, schedule); - if (NBC_OK != res) { printf("Error in NBC_Sched_send() (%i)\n", res); return res; } - - if (0 == rank) { - res = NBC_Sched_recv((void *) 0, true, count, dtype, 0, schedule); - if (NBC_OK != res) { free(handle->tmpbuf); printf("Error in NBC_Sched_recv() (%i)\n", res); return res; } - - res = NBC_Sched_barrier(schedule); - if (NBC_OK != res) { free(handle->tmpbuf); printf("Error in NBC_Sched_barrier() (%i)\n", res); return res; } - - for (peer = 1 ; peer < rsize ; ++peer) { - res = NBC_Sched_recv((void *)(ext * count), true, count, dtype, peer, schedule); - if (NBC_OK != res) { free(handle->tmpbuf); printf("Error in NBC_Sched_recv() (%i)\n", res); return res; } - - res = NBC_Sched_barrier(schedule); - if (NBC_OK != res) { printf("Error in NBC_Sched_barrier() (%i)\n", res); return res; } - - res = NBC_Sched_op((void *) 0, true, (void *)(ext * count), true, (void *) 0, true, count, dtype, op, schedule); - if (NBC_OK != res) { free(handle->tmpbuf); printf("Error in NBC_Sched_op() (%i)\n", res); return res; } - - res = NBC_Sched_barrier(schedule); - if (NBC_OK != res) { printf("Error in NBC_Sched_barrier() (%i)\n", res); return res; } - - } - - /* exchange data with remote root for scatter phase (we *could* use the local communicator to do the scatter) */ - res = NBC_Sched_recv((void *)(ext * count), true, count, dtype, 0, schedule); - if (NBC_OK != res) { free(handle->tmpbuf); printf("Error in NBC_Sched_recv() (%i)\n", res); return res; } - - res = NBC_Sched_send((void *) 0, true, count, dtype, 0, schedule); - if (NBC_OK != res) { printf("Error in NBC_Sched_send() (%i)\n", res); return res; } - - res = NBC_Sched_barrier(schedule); - if (NBC_OK != res) { printf("Error in NBC_Sched_barrier() (%i)\n", res); return res; } - - /* scatter */ - for (peer = 0 ; peer < rsize ; ++peer) { - res = NBC_Sched_send((void *)(ext * (count + peer * rcount)), true, rcount, dtype, peer, schedule); - if (NBC_OK != res) { printf("Error in NBC_Sched_send() (%i)\n", res); return res; } - } - } - - /* receive my block */ - res = NBC_Sched_recv(rbuf, true, rcount, dtype, 0, schedule); - if (NBC_OK != res) { free(handle->tmpbuf); printf("Error in NBC_Sched_recv() (%i)\n", res); return res; } - - /*NBC_PRINT_SCHED(*schedule);*/ - - res = NBC_Sched_commit(schedule); - if (NBC_OK != res) { free(handle->tmpbuf); printf("Error in NBC_Sched_commit() (%i)\n", res); return res; } - - res = NBC_Start(handle, schedule); - if (NBC_OK != res) { free(handle->tmpbuf); printf("Error in NBC_Start() (%i)\n", res); return res; } - - /* tmpbuf is freed with the handle */ - return NBC_OK; -} diff --git a/ompi/mca/coll/libnbc/libdict/dict_private.h b/ompi/mca/coll/libnbc/libdict/dict_private.h index 5939182e19b..da2b6dbdeab 100644 --- a/ompi/mca/coll/libnbc/libdict/dict_private.h +++ b/ompi/mca/coll/libnbc/libdict/dict_private.h @@ -47,7 +47,7 @@ typedef int (*icompare_func) __P((void *, void *itor2)); # define ASSERT(expr) \ if (!(expr)) \ fprintf(stderr, "\n%s:%d (%s) assertion failed: `%s'\n", \ - __FILE__, __LINE__, __PRETTY_FUNCTION__, #expr), \ + __FILE__, __LINE__, __func__, #expr), \ abort() # else # define ASSERT(expr) \ diff --git a/ompi/mca/coll/libnbc/nbc.c b/ompi/mca/coll/libnbc/nbc.c index 9c7f90773dc..7949fe1b90f 100644 --- a/ompi/mca/coll/libnbc/nbc.c +++ b/ompi/mca/coll/libnbc/nbc.c @@ -1,3 +1,4 @@ +/* -*- Mode: C; c-basic-offset:2 ; indent-tabs-mode:nil -*- */ /* * Copyright (c) 2006 The Trustees of Indiana University and Indiana * University Research and Technology @@ -5,12 +6,17 @@ * Copyright (c) 2013 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2006 The Technical University of Chemnitz. All + * Copyright (c) 2006 The Technical University of Chemnitz. All * rights reserved. + * Copyright (c) 2015 Los Alamos National Security, LLC. All rights + * reserved. + * Copyright (c) 2015-2016 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * * Author(s): Torsten Hoefler * * Copyright (c) 2012 Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2016 IBM Corporation. All rights reserved. * */ #include "nbc_internal.h" @@ -33,275 +39,271 @@ void NBC_Print_times(double div) { } #endif -/* allocates a new schedule array */ -int NBC_Sched_create(NBC_Schedule* schedule) { - int *ptr; +static void nbc_schedule_constructor (NBC_Schedule *schedule) { + /* initial total size of the schedule */ + schedule->size = sizeof (int); + schedule->current_round_offset = 0; + schedule->data = calloc (1, schedule->size); +} + +static void nbc_schedule_destructor (NBC_Schedule *schedule) { + free (schedule->data); + schedule->data = NULL; +} + +OBJ_CLASS_INSTANCE(NBC_Schedule, opal_object_t, nbc_schedule_constructor, + nbc_schedule_destructor); + +static int nbc_schedule_grow (NBC_Schedule *schedule, int additional) { + void *tmp; + int size; - *schedule=malloc(2*sizeof(int)); - if(*schedule == NULL) { return NBC_OOR; } + /* get current size of schedule */ + size = nbc_schedule_get_size (schedule); - /* initialize the schedule */ - ptr = (int*) *schedule; - ptr[0] = 2 * sizeof(int); /* initial total size of the schedule */ - ptr[1] = 0; /* initial round-schedule has num=(int)0 and no actions */ - /* The schedule's final end=(char)0 delimiter won't be added until NBC_Sched_commit(). */ + tmp = realloc (schedule->data, size + additional); + if (NULL == tmp) { + NBC_Error ("Could not increase the size of NBC schedule"); + return OMPI_ERR_OUT_OF_RESOURCE; + } - return NBC_OK; + schedule->data = tmp; + return OMPI_SUCCESS; +} + +static int nbc_schedule_round_append (NBC_Schedule *schedule, void *data, int data_size, bool barrier) { + int ret, size = nbc_schedule_get_size (schedule); + + if (barrier) { + ret = nbc_schedule_grow (schedule, data_size + 1 + sizeof (int)); + } else { + ret = nbc_schedule_grow (schedule, data_size); + } + if (OMPI_SUCCESS != ret) { + return ret; + } + + /* append to the round-schedule */ + if (data_size) { + memcpy (schedule->data + size, data, data_size); + + /* increase number of elements in round-schedule */ + nbc_schedule_inc_round (schedule); + + /* increase size of schedule */ + nbc_schedule_inc_size (schedule, data_size); + } + + if (barrier) { + /* add the barrier */ + schedule->data[size + data_size] = 1; + /* set next round counter to 0 */ + memset (schedule->data + size + data_size + 1, 0, sizeof (int)); + + NBC_DEBUG(10, "ended round at byte %i\n", size + data_size + 1); + + schedule->current_round_offset = size + data_size + 1; + + /* increase size of schedule */ + nbc_schedule_inc_size (schedule, sizeof (int) + 1); + } + + return OMPI_SUCCESS; } /* this function puts a send into the schedule */ -int NBC_Sched_send(void* buf, char tmpbuf, int count, MPI_Datatype datatype, int dest, NBC_Schedule *schedule) { - int size; - char* ptr; - NBC_Fn_type type = SEND; +static int NBC_Sched_send_internal (const void* buf, char tmpbuf, int count, MPI_Datatype datatype, int dest, bool local, NBC_Schedule *schedule, bool barrier) { NBC_Args_send send_args; - - /* get size of actual schedule */ - NBC_GET_SIZE(*schedule, size); - /*printf("schedule is %i bytes\n", size);*/ - *schedule = (NBC_Schedule)realloc(*schedule, size+sizeof(NBC_Fn_type)+sizeof(NBC_Args_send)); - if(*schedule == NULL) { printf("Error in realloc()\n"); return NBC_OOR; } - + int ret; + /* store the passed arguments */ - send_args.buf=buf; - send_args.tmpbuf=tmpbuf; - send_args.count=count; - send_args.datatype=datatype; - send_args.dest=dest; + send_args.type = SEND; + send_args.buf = buf; + send_args.tmpbuf = tmpbuf; + send_args.count = count; + send_args.datatype = datatype; + send_args.dest = dest; + send_args.local = local; /* append to the round-schedule */ - ptr = (char*)*schedule + size; - NBC_PUT_BYTES(ptr,type); - NBC_PUT_BYTES(ptr,send_args); + ret = nbc_schedule_round_append (schedule, &send_args, sizeof (send_args), barrier); + if (OMPI_SUCCESS != ret) { + return ret; + } - /* increase number of elements in round-schedule */ - NBC_INC_NUM_ROUND(*schedule); - NBC_DEBUG(10, "adding send - ends at byte %i\n", (int)(size+sizeof(NBC_Fn_type)+sizeof(NBC_Args_send))); + NBC_DEBUG(10, "added send - ends at byte %i\n", nbc_schedule_get_size (schedule)); - /* increase size of schedule */ - NBC_INC_SIZE(*schedule, sizeof(NBC_Fn_type)+sizeof(NBC_Args_send)); + return OMPI_SUCCESS; +} + +int NBC_Sched_send (const void* buf, char tmpbuf, int count, MPI_Datatype datatype, int dest, NBC_Schedule *schedule, bool barrier) { + return NBC_Sched_send_internal (buf, tmpbuf, count, datatype, dest, false, schedule, barrier); +} - return NBC_OK; +int NBC_Sched_local_send (const void* buf, char tmpbuf, int count, MPI_Datatype datatype, int dest, NBC_Schedule *schedule, bool barrier) { + return NBC_Sched_send_internal (buf, tmpbuf, count, datatype, dest, true, schedule, barrier); } /* this function puts a receive into the schedule */ -int NBC_Sched_recv(void* buf, char tmpbuf, int count, MPI_Datatype datatype, int source, NBC_Schedule *schedule) { - int size; - char* ptr; - NBC_Fn_type type = RECV; +static int NBC_Sched_recv_internal (void* buf, char tmpbuf, int count, MPI_Datatype datatype, int source, bool local, NBC_Schedule *schedule, bool barrier) { NBC_Args_recv recv_args; - - /* get size of actual schedule */ - NBC_GET_SIZE(*schedule, size); - /*printf("schedule is %i bytes\n", size);*/ - *schedule = (NBC_Schedule)realloc(*schedule, size+sizeof(NBC_Fn_type)+sizeof(NBC_Args_recv)); - if(*schedule == NULL) { printf("Error in realloc()\n"); return NBC_OOR; } - + int ret; + /* store the passed arguments */ - recv_args.buf=buf; - recv_args.tmpbuf=tmpbuf; - recv_args.count=count; - recv_args.datatype=datatype; - recv_args.source=source; + recv_args.type = RECV; + recv_args.buf = buf; + recv_args.tmpbuf = tmpbuf; + recv_args.count = count; + recv_args.datatype = datatype; + recv_args.source = source; + recv_args.local = local; /* append to the round-schedule */ - ptr = (char*)*schedule + size; - NBC_PUT_BYTES(ptr,type); - NBC_PUT_BYTES(ptr,recv_args); + ret = nbc_schedule_round_append (schedule, &recv_args, sizeof (recv_args), barrier); + if (OMPI_SUCCESS != ret) { + return ret; + } - /* increase number of elements in round-schedule */ - NBC_INC_NUM_ROUND(*schedule); - NBC_DEBUG(10, "adding receive - ends at byte %i\n", (int)(size+sizeof(NBC_Fn_type)+sizeof(NBC_Args_recv))); + NBC_DEBUG(10, "added receive - ends at byte %d\n", nbc_schedule_get_size (schedule)); - /* increase size of schedule */ - NBC_INC_SIZE(*schedule, sizeof(NBC_Fn_type)+sizeof(NBC_Args_recv)); + return OMPI_SUCCESS; +} - return NBC_OK; +int NBC_Sched_recv (void* buf, char tmpbuf, int count, MPI_Datatype datatype, int source, NBC_Schedule *schedule, bool barrier) { + return NBC_Sched_recv_internal(buf, tmpbuf, count, datatype, source, false, schedule, barrier); +} + +int NBC_Sched_local_recv (void* buf, char tmpbuf, int count, MPI_Datatype datatype, int source, NBC_Schedule *schedule, bool barrier) { + return NBC_Sched_recv_internal(buf, tmpbuf, count, datatype, source, true, schedule, barrier); } /* this function puts an operation into the schedule */ -int NBC_Sched_op(void *buf3, char tmpbuf3, void* buf1, char tmpbuf1, void* buf2, char tmpbuf2, int count, MPI_Datatype datatype, MPI_Op op, NBC_Schedule *schedule) { - int size; - char* ptr; - NBC_Fn_type type = OP; +int NBC_Sched_op (const void* buf1, char tmpbuf1, void* buf2, char tmpbuf2, int count, MPI_Datatype datatype, + MPI_Op op, NBC_Schedule *schedule, bool barrier) { NBC_Args_op op_args; - - /* get size of actual schedule */ - NBC_GET_SIZE(*schedule, size); - /*printf("schedule is %i bytes\n", size);*/ - *schedule = (NBC_Schedule)realloc(*schedule, size+sizeof(NBC_Fn_type)+sizeof(NBC_Args_op)); - if(*schedule == NULL) { printf("Error in realloc()\n"); return NBC_OOR; } - + int ret; + /* store the passed arguments */ - op_args.buf1=buf1; - op_args.buf2=buf2; - op_args.buf3=buf3; - op_args.tmpbuf1=tmpbuf1; - op_args.tmpbuf2=tmpbuf2; - op_args.tmpbuf3=tmpbuf3; - op_args.count=count; - op_args.op=op; - op_args.datatype=datatype; + op_args.type = OP; + op_args.buf1 = buf1; + op_args.buf2 = buf2; + op_args.tmpbuf1 = tmpbuf1; + op_args.tmpbuf2 = tmpbuf2; + op_args.count = count; + op_args.op = op; + op_args.datatype = datatype; /* append to the round-schedule */ - ptr = (char*)*schedule + size; - NBC_PUT_BYTES(ptr,type); - NBC_PUT_BYTES(ptr,op_args); + ret = nbc_schedule_round_append (schedule, &op_args, sizeof (op_args), barrier); + if (OMPI_SUCCESS != ret) { + return ret; + } - /* increase number of elements in round-schedule */ - NBC_INC_NUM_ROUND(*schedule); - NBC_DEBUG(10, "adding op - ends at byte %i\n", (int)(size+sizeof(NBC_Fn_type)+sizeof(NBC_Args_op))); + NBC_DEBUG(10, "added op2 - ends at byte %i\n", nbc_schedule_get_size (schedule)); - /* increase size of schedule */ - NBC_INC_SIZE(*schedule, sizeof(NBC_Fn_type)+sizeof(NBC_Args_op)); - - return NBC_OK; + return OMPI_SUCCESS; } /* this function puts a copy into the schedule */ -int NBC_Sched_copy(void *src, char tmpsrc, int srccount, MPI_Datatype srctype, void *tgt, char tmptgt, int tgtcount, MPI_Datatype tgttype, NBC_Schedule *schedule) { - int size; - char* ptr; - NBC_Fn_type type = COPY; +int NBC_Sched_copy (void *src, char tmpsrc, int srccount, MPI_Datatype srctype, void *tgt, char tmptgt, int tgtcount, + MPI_Datatype tgttype, NBC_Schedule *schedule, bool barrier) { NBC_Args_copy copy_args; - - /* get size of actual schedule */ - NBC_GET_SIZE(*schedule, size); - /*printf("schedule is %i bytes\n", size);*/ - *schedule = (NBC_Schedule)realloc(*schedule, size+sizeof(NBC_Fn_type)+sizeof(NBC_Args_copy)); - if(*schedule == NULL) { printf("Error in realloc()\n"); return NBC_OOR; } - + int ret; + /* store the passed arguments */ - copy_args.src=src; - copy_args.tmpsrc=tmpsrc; - copy_args.srccount=srccount; - copy_args.srctype=srctype; - copy_args.tgt=tgt; - copy_args.tmptgt=tmptgt; - copy_args.tgtcount=tgtcount; - copy_args.tgttype=tgttype; + copy_args.type = COPY; + copy_args.src = src; + copy_args.tmpsrc = tmpsrc; + copy_args.srccount = srccount; + copy_args.srctype = srctype; + copy_args.tgt = tgt; + copy_args.tmptgt = tmptgt; + copy_args.tgtcount = tgtcount; + copy_args.tgttype = tgttype; /* append to the round-schedule */ - ptr = (char*)*schedule + size; - NBC_PUT_BYTES(ptr,type); - NBC_PUT_BYTES(ptr,copy_args); - - /* increase number of elements in round-schedule */ - NBC_INC_NUM_ROUND(*schedule); - NBC_DEBUG(10, "adding copy - ends at byte %i\n", (int)(size+sizeof(NBC_Fn_type)+sizeof(NBC_Args_copy))); + ret = nbc_schedule_round_append (schedule, ©_args, sizeof (copy_args), barrier); + if (OMPI_SUCCESS != ret) { + return ret; + } - /* increase size of schedule */ - NBC_INC_SIZE(*schedule, sizeof(NBC_Fn_type)+sizeof(NBC_Args_copy)); + NBC_DEBUG(10, "added copy - ends at byte %i\n", nbc_schedule_get_size (schedule)); - return NBC_OK; + return OMPI_SUCCESS; } /* this function puts a unpack into the schedule */ -int NBC_Sched_unpack(void *inbuf, char tmpinbuf, int count, MPI_Datatype datatype, void *outbuf, char tmpoutbuf, NBC_Schedule *schedule) { - int size; - char* ptr; - NBC_Fn_type type = UNPACK; +int NBC_Sched_unpack (void *inbuf, char tmpinbuf, int count, MPI_Datatype datatype, void *outbuf, char tmpoutbuf, + NBC_Schedule *schedule, bool barrier) { NBC_Args_unpack unpack_args; - - /* get size of actual schedule */ - NBC_GET_SIZE(*schedule, size); - /*printf("schedule is %i bytes\n", size);*/ - *schedule = (NBC_Schedule)realloc(*schedule, size+sizeof(NBC_Fn_type)+sizeof(NBC_Args_unpack)); - if(*schedule == NULL) { printf("Error in realloc()\n"); return NBC_OOR; } - + int ret; + /* store the passed arguments */ - unpack_args.inbuf=inbuf; - unpack_args.tmpinbuf=tmpinbuf; - unpack_args.count=count; - unpack_args.datatype=datatype; - unpack_args.outbuf=outbuf; - unpack_args.tmpoutbuf=tmpoutbuf; + unpack_args.type = UNPACK; + unpack_args.inbuf = inbuf; + unpack_args.tmpinbuf = tmpinbuf; + unpack_args.count = count; + unpack_args.datatype = datatype; + unpack_args.outbuf = outbuf; + unpack_args.tmpoutbuf = tmpoutbuf; /* append to the round-schedule */ - ptr = (char*)*schedule + size; - NBC_PUT_BYTES(ptr,type); - NBC_PUT_BYTES(ptr,unpack_args); - - /* increase number of elements in round-schedule */ - NBC_INC_NUM_ROUND(*schedule); - NBC_DEBUG(10, "adding unpack - ends at byte %i\n", (int)(size+sizeof(NBC_Fn_type)+sizeof(NBC_Args_unpack))); + ret = nbc_schedule_round_append (schedule, &unpack_args, sizeof (unpack_args), barrier); + if (OMPI_SUCCESS != ret) { + return ret; + } - /* increase size of schedule */ - NBC_INC_SIZE(*schedule, sizeof(NBC_Fn_type)+sizeof(NBC_Args_unpack)); + NBC_DEBUG(10, "added unpack - ends at byte %i\n", nbc_schedule_get_size (schedule)); - return NBC_OK; + return OMPI_SUCCESS; } /* this function ends a round of a schedule */ -int NBC_Sched_barrier(NBC_Schedule *schedule) { - int size, num = 0; - char *ptr; - char delimiter = 1; - - /* get size of actual schedule */ - NBC_GET_SIZE(*schedule, size); - /*printf("round terminated at %i bytes\n", size);*/ - *schedule = (NBC_Schedule)realloc(*schedule, size+sizeof(char)+sizeof(int)); - if(*schedule == NULL) { printf("Error in realloc()\n"); return NBC_OOR; } - - ptr = (char*)*schedule + size; - NBC_PUT_BYTES(ptr,delimiter); /* round-schedule delimiter */ - NBC_PUT_BYTES(ptr,num); /* initialize num=0 for next round-schedule */ - - NBC_DEBUG(10, "ending round at byte %i\n", (int)(size+sizeof(char)+sizeof(int))); - - /* increase size of schedule */ - NBC_INC_SIZE(*schedule, sizeof(char)+sizeof(int)); - - return NBC_OK; +int NBC_Sched_barrier (NBC_Schedule *schedule) { + return nbc_schedule_round_append (schedule, NULL, 0, true); } /* this function ends a schedule */ int NBC_Sched_commit(NBC_Schedule *schedule) { - int size; - - /* get size of actual schedule */ - NBC_GET_SIZE(*schedule, size); - /*printf("schedule terminated at %i bytes\n", size);*/ - *schedule = (NBC_Schedule)realloc(*schedule, size+sizeof(char)); - if(*schedule == NULL) { printf("Error in realloc()\n"); return NBC_OOR; } - + int size = nbc_schedule_get_size (schedule); + char *ptr; + int ret; + + ret = nbc_schedule_grow (schedule, 1); + if (OMPI_SUCCESS != ret) { + return ret; + } + /* add the barrier char (0) because this is the last round */ - *(char*)((char*)*schedule+size)=0; - NBC_DEBUG(10, "closing schedule %p at byte %i\n", *schedule, (int)(size+sizeof(char))); + ptr = schedule->data + size; + *((char *) ptr) = 0; /* increase size of schedule */ - NBC_INC_SIZE(*schedule, sizeof(char)); - - return NBC_OK; + nbc_schedule_inc_size (schedule, 1); + + NBC_DEBUG(10, "closed schedule %p at byte %i\n", schedule, (int)(size + 1)); + + return OMPI_SUCCESS; } /* finishes a request * * to be called *only* from the progress thread !!! */ -static inline int NBC_Free(NBC_Handle* handle) { +static inline void NBC_Free (NBC_Handle* handle) { -#ifdef NBC_CACHE_SCHEDULE - /* do not free schedule because it is in the cache */ - handle->schedule = NULL; -#else - if(handle->schedule != NULL) { - /* free schedule */ - free((void*)*(handle->schedule)); - free((void*)handle->schedule); + if (NULL != handle->schedule) { + /* release schedule */ + OBJ_RELEASE (handle->schedule); handle->schedule = NULL; } -#endif /* if the nbc_I attached some data */ /* problems with schedule cache here, see comment (TODO) in * nbc_internal.h */ - if(NULL != handle->tmpbuf) { + if (NULL != handle->tmpbuf) { free((void*)handle->tmpbuf); handle->tmpbuf = NULL; } - - return NBC_OK; } /* progresses a request @@ -309,121 +311,162 @@ static inline int NBC_Free(NBC_Handle* handle) { * to be called *only* from the progress thread !!! */ int NBC_Progress(NBC_Handle *handle) { int flag, res, ret=NBC_CONTINUE; - long size; + unsigned long size; char *delim; + int i; + ompi_status_public_t status; /* the handle is done if there is no schedule attached */ - if(handle->schedule != NULL) { + if (NULL == handle->schedule) { + return NBC_OK; + } - if((handle->req_count > 0) && (handle->req_array != NULL)) { - NBC_DEBUG(50, "NBC_Progress: testing for %i requests\n", handle->req_count); + if ((handle->req_count > 0) && (handle->req_array != NULL)) { + NBC_DEBUG(50, "NBC_Progress: testing for %i requests\n", handle->req_count); #ifdef NBC_TIMING - Test_time -= MPI_Wtime(); + Test_time -= MPI_Wtime(); #endif - res = ompi_request_test_all(handle->req_count, handle->req_array, &flag, MPI_STATUSES_IGNORE); - if(res != OMPI_SUCCESS) { printf("MPI Error in MPI_Testall() (%i)\n", res); ret=res; goto error; } + res = ompi_request_test_all(handle->req_count, handle->req_array, &flag, MPI_STATUSES_IGNORE); + if(res != OMPI_SUCCESS) { + // Attempt to cancel outstanding requests + for(i = 0; i < handle->req_count; ++i ) { + // If the request is complete, then try to report the error code + if( handle->req_array[i]->req_complete ) { + if( OMPI_SUCCESS != handle->req_array[i]->req_status.MPI_ERROR ) { + NBC_Error ("MPI Error in MPI_Testall() (req %d = %d)", i, handle->req_array[i]->req_status.MPI_ERROR); + } + } + else { + ompi_request_cancel(handle->req_array[i]); + // If the PML actually canceled the request, then wait on it + if( handle->req_array[i]->req_status._cancelled) { + ompi_request_wait(&handle->req_array[i], &status); + } + // Warn the user that we had to leave a PML message outstanding so + // bad things could happen if they continue using nonblocking collectives + else { + NBC_Error ("MPI Error: Not able to cancel the internal request %d. " + "Be aware that continuing to use nonblocking collectives on this communicator may result in undefined behavior.", i); + } + } + } + + return OMPI_ERROR; + } #ifdef NBC_TIMING - Test_time += MPI_Wtime(); + Test_time += MPI_Wtime(); #endif - } else { - flag = 1; /* we had no open requests -> proceed to next round */ + } else { + flag = 1; /* we had no open requests -> proceed to next round */ + } + + /* a round is finished */ + if (flag) { + /* adjust delim to start of current round */ + NBC_DEBUG(5, "NBC_Progress: going in schedule %p to row-offset: %li\n", handle->schedule, handle->row_offset); + delim = handle->schedule->data + handle->row_offset; + NBC_DEBUG(10, "delim: %p\n", delim); + nbc_get_round_size(delim, &size); + NBC_DEBUG(10, "size: %li\n", size); + /* adjust delim to end of current round -> delimiter */ + delim = delim + size; + + if (NULL != handle->req_array) { + /* free request array */ + free (handle->req_array); + handle->req_array = NULL; } - /* a round is finished */ - if(flag) { - /* adjust delim to start of current round */ - NBC_DEBUG(5, "NBC_Progress: going in schedule %p to row-offset: %li\n", *handle->schedule, handle->row_offset); - delim = (char*)*handle->schedule + handle->row_offset; - NBC_DEBUG(10, "delim: %p\n", delim); - NBC_GET_ROUND_SIZE(delim, size); - NBC_DEBUG(10, "size: %li\n", size); - /* adjust delim to end of current round -> delimiter */ - delim = delim + size; - - if(handle->req_array != NULL) { - /* free request array */ - free((void*)handle->req_array); - handle->req_array = NULL; - } - handle->req_count = 0; - - if(*delim == 0) { - /* this was the last round - we're done */ - NBC_DEBUG(5, "NBC_Progress last round finished - we're done\n"); - - res = NBC_Free(handle); - if((NBC_OK != res)) { printf("Error in NBC_Free() (%i)\n", res); ret=res; goto error; } - - return NBC_OK; - } else { - NBC_DEBUG(5, "NBC_Progress round finished - goto next round\n"); - /* move delim to start of next round */ - delim = delim+1; - /* initializing handle for new virgin round */ - handle->row_offset = (long)delim - (long)*handle->schedule; - /* kick it off */ - res = NBC_Start_round(handle); - if(NBC_OK != res) { printf("Error in NBC_Start_round() (%i)\n", res); ret=res; goto error; } - } + handle->req_count = 0; + + if (*delim == 0) { + /* this was the last round - we're done */ + NBC_DEBUG(5, "NBC_Progress last round finished - we're done\n"); + + NBC_Free(handle); + + return NBC_OK; + } + + NBC_DEBUG(5, "NBC_Progress round finished - goto next round\n"); + /* move delim to start of next round */ + /* initializing handle for new virgin round */ + handle->row_offset = (intptr_t) (delim + 1) - (intptr_t) handle->schedule->data; + /* kick it off */ + res = NBC_Start_round(handle); + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + NBC_Error ("Error in NBC_Start_round() (%i)", res); + return res; } - } else { - ret= NBC_OK; } -error: return ret; } static inline int NBC_Start_round(NBC_Handle *handle) { int num; /* number of operations */ - int i, res, ret=NBC_OK; + int res; char* ptr; + MPI_Request *tmp; NBC_Fn_type type; - NBC_Args_send sendargs; - NBC_Args_recv recvargs; - NBC_Args_op opargs; - NBC_Args_copy copyargs; - NBC_Args_unpack unpackargs; - NBC_Schedule myschedule; - void *buf1, *buf2, *buf3; + NBC_Args_send sendargs; + NBC_Args_recv recvargs; + NBC_Args_op opargs; + NBC_Args_copy copyargs; + NBC_Args_unpack unpackargs; + void *buf1, *buf2; /* get round-schedule address */ - myschedule = (NBC_Schedule*)((char*)*handle->schedule + handle->row_offset); - ptr = (char*) myschedule; + ptr = handle->schedule->data + handle->row_offset; NBC_GET_BYTES(ptr,num); - NBC_DEBUG(10, "start_round round at address %p : posting %i operations\n", myschedule, num); + NBC_DEBUG(10, "start_round round at offset %d : posting %i operations\n", handle->row_offset, num); + + for (int i = 0 ; i < num ; ++i) { + int offset = (intptr_t)(ptr - handle->schedule->data); - for (i=0; itag); + NBC_DEBUG(5,"*buf: %p, count: %i, type: %p, dest: %i, tag: %i)\n", sendargs.buf, + sendargs.count, sendargs.datatype, sendargs.dest, handle->tag); /* get an additional request */ handle->req_count++; /* get buffer */ if(sendargs.tmpbuf) { buf1=(char*)handle->tmpbuf+(long)sendargs.buf; } else { - buf1=sendargs.buf; + buf1=(void *)sendargs.buf; } #ifdef NBC_TIMING Isend_time -= MPI_Wtime(); #endif - handle->req_array = (MPI_Request*)realloc((void*)handle->req_array, (handle->req_count)*sizeof(MPI_Request)); - NBC_CHECK_NULL(handle->req_array); - res = MCA_PML_CALL(isend(buf1, sendargs.count, sendargs.datatype, sendargs.dest, handle->tag, MCA_PML_BASE_SEND_STANDARD, handle->comm, handle->req_array+handle->req_count-1)); - if(OMPI_SUCCESS != res) { printf("Error in MPI_Isend(%lu, %i, %lu, %i, %i, %lu) (%i)\n", (unsigned long)buf1, sendargs.count, (unsigned long)sendargs.datatype, sendargs.dest, handle->tag, (unsigned long)handle->comm, res); ret=res; goto error; } + tmp = (MPI_Request *) realloc ((void *) handle->req_array, handle->req_count * sizeof (MPI_Request)); + if (NULL == tmp) { + return OMPI_ERR_OUT_OF_RESOURCE; + } + + handle->req_array = tmp; + + res = MCA_PML_CALL(isend(buf1, sendargs.count, sendargs.datatype, sendargs.dest, handle->tag, + MCA_PML_BASE_SEND_STANDARD, sendargs.local?handle->comm->c_local_comm:handle->comm, + handle->req_array+handle->req_count - 1)); + if (OMPI_SUCCESS != res) { + NBC_Error ("Error in MPI_Isend(%lu, %i, %p, %i, %i, %lu) (%i)", (unsigned long)buf1, sendargs.count, + sendargs.datatype, sendargs.dest, handle->tag, (unsigned long)handle->comm, res); + return res; + } #ifdef NBC_TIMING Isend_time += MPI_Wtime(); #endif break; case RECV: - NBC_DEBUG(5, " RECV (offset %li) ", (long)ptr-(long)myschedule); + NBC_DEBUG(5, " RECV (offset %li) ", offset); NBC_GET_BYTES(ptr,recvargs); - NBC_DEBUG(5, "*buf: %p, count: %i, type: %lu, source: %i, tag: %i)\n", recvargs.buf, recvargs.count, (unsigned long)recvargs.datatype, recvargs.source, handle->tag); + NBC_DEBUG(5, "*buf: %p, count: %i, type: %p, source: %i, tag: %i)\n", recvargs.buf, recvargs.count, + recvargs.datatype, recvargs.source, handle->tag); /* get an additional request - TODO: req_count NOT thread safe */ handle->req_count++; /* get buffer */ @@ -435,40 +478,48 @@ static inline int NBC_Start_round(NBC_Handle *handle) { #ifdef NBC_TIMING Irecv_time -= MPI_Wtime(); #endif - handle->req_array = (MPI_Request*)realloc((void*)handle->req_array, (handle->req_count)*sizeof(MPI_Request)); - NBC_CHECK_NULL(handle->req_array); - res = MCA_PML_CALL(irecv(buf1, recvargs.count, recvargs.datatype, recvargs.source, handle->tag, handle->comm, handle->req_array+handle->req_count-1)); - if(OMPI_SUCCESS != res) { printf("Error in MPI_Irecv(%lu, %i, %lu, %i, %i, %lu) (%i)\n", (unsigned long)buf1, recvargs.count, (unsigned long)recvargs.datatype, recvargs.source, handle->tag, (unsigned long)handle->comm, res); ret=res; goto error; } + tmp = (MPI_Request *) realloc ((void *) handle->req_array, handle->req_count * sizeof (MPI_Request)); + if (NULL == tmp) { + return OMPI_ERR_OUT_OF_RESOURCE; + } + + handle->req_array = tmp; + + res = MCA_PML_CALL(irecv(buf1, recvargs.count, recvargs.datatype, recvargs.source, handle->tag, recvargs.local?handle->comm->c_local_comm:handle->comm, + handle->req_array+handle->req_count-1)); + if (OMPI_SUCCESS != res) { + NBC_Error("Error in MPI_Irecv(%lu, %i, %p, %i, %i, %lu) (%i)", (unsigned long)buf1, recvargs.count, + recvargs.datatype, recvargs.source, handle->tag, (unsigned long)handle->comm, res); + return res; + } #ifdef NBC_TIMING Irecv_time += MPI_Wtime(); #endif break; case OP: - NBC_DEBUG(5, " OP (offset %li) ", (long)ptr-(long)myschedule); + NBC_DEBUG(5, " OP2 (offset %li) ", offset); NBC_GET_BYTES(ptr,opargs); - NBC_DEBUG(5, "*buf1: %p, buf2: %p, buf3: %p, count: %i, type: %lu)\n", opargs.buf1, opargs.buf2, opargs.buf3, opargs.count, (unsigned long)opargs.datatype); + NBC_DEBUG(5, "*buf1: %p, buf2: %p, count: %i, type: %p)\n", opargs.buf1, opargs.buf2, + opargs.count, opargs.datatype); /* get buffers */ if(opargs.tmpbuf1) { buf1=(char*)handle->tmpbuf+(long)opargs.buf1; } else { - buf1=opargs.buf1; + buf1=(void *)opargs.buf1; } if(opargs.tmpbuf2) { buf2=(char*)handle->tmpbuf+(long)opargs.buf2; } else { buf2=opargs.buf2; } - if(opargs.tmpbuf3) { - buf3=(char*)handle->tmpbuf+(long)opargs.buf3; - } else { - buf3=opargs.buf3; - } - ompi_3buff_op_reduce(opargs.op, buf1, buf2, buf3, opargs.count, opargs.datatype); + ompi_op_reduce(opargs.op, buf1, buf2, opargs.count, opargs.datatype); break; case COPY: - NBC_DEBUG(5, " COPY (offset %li) ", (long)ptr-(long)myschedule); + NBC_DEBUG(5, " COPY (offset %li) ", offset); NBC_GET_BYTES(ptr,copyargs); - NBC_DEBUG(5, "*src: %lu, srccount: %i, srctype: %lu, *tgt: %lu, tgtcount: %i, tgttype: %lu)\n", (unsigned long)copyargs.src, copyargs.srccount, (unsigned long)copyargs.srctype, (unsigned long)copyargs.tgt, copyargs.tgtcount, (unsigned long)copyargs.tgttype); + NBC_DEBUG(5, "*src: %lu, srccount: %i, srctype: %p, *tgt: %lu, tgtcount: %i, tgttype: %p)\n", + (unsigned long) copyargs.src, copyargs.srccount, copyargs.srctype, + (unsigned long) copyargs.tgt, copyargs.tgtcount, copyargs.tgttype); /* get buffers */ if(copyargs.tmpsrc) { buf1=(char*)handle->tmpbuf+(long)copyargs.src; @@ -480,46 +531,54 @@ static inline int NBC_Start_round(NBC_Handle *handle) { } else { buf2=copyargs.tgt; } - res = NBC_Copy(buf1, copyargs.srccount, copyargs.srctype, buf2, copyargs.tgtcount, copyargs.tgttype, handle->comm); - if(res != NBC_OK) { printf("NBC_Copy() failed (code: %i)\n", res); ret=res; goto error; } + res = NBC_Copy (buf1, copyargs.srccount, copyargs.srctype, buf2, copyargs.tgtcount, copyargs.tgttype, + handle->comm); + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + return res; + } break; case UNPACK: - NBC_DEBUG(5, " UNPACK (offset %li) ", (long)ptr-(long)myschedule); + NBC_DEBUG(5, " UNPACK (offset %li) ", offset); NBC_GET_BYTES(ptr,unpackargs); - NBC_DEBUG(5, "*src: %lu, srccount: %i, srctype: %lu, *tgt: %lu\n", (unsigned long)unpackargs.inbuf, unpackargs.count, (unsigned long)unpackargs.datatype, (unsigned long)unpackargs.outbuf); + NBC_DEBUG(5, "*src: %lu, srccount: %i, srctype: %p, *tgt: %lu\n", (unsigned long) unpackargs.inbuf, + unpackargs.count, unpackargs.datatype, (unsigned long) unpackargs.outbuf); /* get buffers */ if(unpackargs.tmpinbuf) { buf1=(char*)handle->tmpbuf+(long)unpackargs.inbuf; } else { - buf1=unpackargs.outbuf; + buf1=unpackargs.inbuf; } if(unpackargs.tmpoutbuf) { buf2=(char*)handle->tmpbuf+(long)unpackargs.outbuf; } else { buf2=unpackargs.outbuf; } - res = NBC_Unpack(buf1, unpackargs.count, unpackargs.datatype, buf2, handle->comm); - if(res != NBC_OK) { printf("NBC_Unpack() failed (code: %i)\n", res); ret=res; goto error; } + res = NBC_Unpack (buf1, unpackargs.count, unpackargs.datatype, buf2, handle->comm); + if (OMPI_SUCCESS != res) { + NBC_Error ("NBC_Unpack() failed (code: %i)", res); + return res; + } + break; default: - printf("NBC_Start_round: bad type %li at offset %li\n", (long)type, (long)ptr-(long)myschedule); - ret=NBC_BAD_SCHED; - goto error; + NBC_Error ("NBC_Start_round: bad type %li at offset %li", (long)type, offset); + return OMPI_ERROR; } } /* check if we can make progress - not in the first round, this allows us to leave the - * initialization faster and to reach more overlap + * initialization faster and to reach more overlap * * threaded case: calling progress in the first round can lead to a * deadlock if NBC_Free is called in this round :-( */ - if(handle->row_offset != sizeof(int)) { + if (handle->row_offset) { res = NBC_Progress(handle); - if((NBC_OK != res) && (NBC_CONTINUE != res)) { printf("Error in NBC_Progress() (%i)\n", res); ret=res; goto error; } + if ((NBC_OK != res) && (NBC_CONTINUE != res)) { + return OMPI_ERROR; + } } -error: - return ret; + return OMPI_SUCCESS; } int NBC_Init_handle(struct ompi_communicator_t *comm, ompi_coll_libnbc_request_t **request, ompi_coll_libnbc_module_t *comminfo) @@ -537,8 +596,7 @@ int NBC_Init_handle(struct ompi_communicator_t *comm, ompi_coll_libnbc_request_t handle->req_array = NULL; handle->comm = comm; handle->schedule = NULL; - /* first int is the schedule size */ - handle->row_offset = sizeof(int); + handle->row_offset = 0; /******************** Do the tag and shadow comm administration ... ***************/ @@ -546,7 +604,7 @@ int NBC_Init_handle(struct ompi_communicator_t *comm, ompi_coll_libnbc_request_t tmp_tag = comminfo->tag--; if (tmp_tag == MCA_COLL_BASE_TAG_NONBLOCKING_END) { tmp_tag = comminfo->tag = MCA_COLL_BASE_TAG_NONBLOCKING_BASE; - NBC_DEBUG(2,"resetting tags ...\n"); + NBC_DEBUG(2,"resetting tags ...\n"); } if (true != comminfo->comm_registered) { @@ -555,11 +613,11 @@ int NBC_Init_handle(struct ompi_communicator_t *comm, ompi_coll_libnbc_request_t } OPAL_THREAD_UNLOCK(&comminfo->mutex); - handle->tag=comminfo->tag; + handle->tag = tmp_tag; /* register progress */ if (need_register) { - int32_t tmp = + int32_t tmp = OPAL_THREAD_ADD32(&mca_coll_libnbc_component.active_comms, 1); if (tmp == 1) { opal_progress_register(ompi_coll_libnbc_progress); @@ -568,13 +626,18 @@ int NBC_Init_handle(struct ompi_communicator_t *comm, ompi_coll_libnbc_request_t handle->comm=comm; /*printf("got comminfo: %lu tag: %i\n", comminfo, comminfo->tag);*/ - + /******************** end of tag and shadow comm administration ... ***************/ handle->comminfo = comminfo; - + NBC_DEBUG(3, "got tag %i\n", handle->tag); - return NBC_OK; + return OMPI_SUCCESS; +} + +void NBC_Return_handle(ompi_coll_libnbc_request_t *request) { + NBC_Free (request); + OMPI_COLL_LIBNBC_REQUEST_RETURN(request); } int NBC_Init_comm(MPI_Comm comm, NBC_Comminfo *comminfo) { @@ -636,27 +699,25 @@ int NBC_Start(NBC_Handle *handle, NBC_Schedule *schedule) { /* kick off first round */ res = NBC_Start_round(handle); - if((NBC_OK != res)) { printf("Error in NBC_Start_round() (%i)\n", res); return res; } - + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + return res; + } + OPAL_THREAD_LOCK(&mca_coll_libnbc_component.lock); opal_list_append(&mca_coll_libnbc_component.active_requests, &(handle->super.super.super)); + OPAL_THREAD_UNLOCK(&mca_coll_libnbc_component.lock); - return NBC_OK; + return OMPI_SUCCESS; } #ifdef NBC_CACHE_SCHEDULE void NBC_SchedCache_args_delete_key_dummy(void *k) { - /* do nothing because the key and the data element are identical :-) + /* do nothing because the key and the data element are identical :-) * both (the single one :) is freed in NBC__args_delete() */ } void NBC_SchedCache_args_delete(void *entry) { - struct NBC_dummyarg *tmp; - - tmp = (struct NBC_dummyarg*)entry; - /* free taglistentry */ - free((void*)*(tmp->schedule)); - /* the schedule pointer itself is also malloc'd */ - free((void*)tmp->schedule); - free((void*)tmp); + struct NBC_dummyarg *tmp = (struct NBC_dummyarg*)entry; + OBJ_RELEASE(tmp->schedule); + free(entry); } #endif diff --git a/ompi/mca/coll/libnbc/nbc_iallgather.c b/ompi/mca/coll/libnbc/nbc_iallgather.c index a6af0303065..9f0fea3706d 100644 --- a/ompi/mca/coll/libnbc/nbc_iallgather.c +++ b/ompi/mca/coll/libnbc/nbc_iallgather.c @@ -1,11 +1,14 @@ +/* -*- Mode: C; c-basic-offset:2 ; indent-tabs-mode:nil -*- */ /* * Copyright (c) 2006 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. - * Copyright (c) 2006 The Technical University of Chemnitz. All + * Copyright (c) 2006 The Technical University of Chemnitz. All * rights reserved. - * Copyright (c) 2014 Research Organization for Information Science + * Copyright (c) 2014-2016 Research Organization for Information Science * and Technology (RIST). All rights reserved. + * Copyright (c) 2015 Los Alamos National Security, LLC. All rights + * reserved. * * Author(s): Torsten Hoefler * @@ -15,19 +18,20 @@ #ifdef NBC_CACHE_SCHEDULE /* tree comparison function for schedule cache */ int NBC_Allgather_args_compare(NBC_Allgather_args *a, NBC_Allgather_args *b, void *param) { - - if( (a->sendbuf == b->sendbuf) && - (a->sendcount == b->sendcount) && + if ((a->sendbuf == b->sendbuf) && + (a->sendcount == b->sendcount) && (a->sendtype == b->sendtype) && (a->recvbuf == b->recvbuf) && (a->recvcount == b->recvcount) && (a->recvtype == b->recvtype) ) { - return 0; + return 0; } - if( a->sendbuf < b->sendbuf ) { + + if( a->sendbuf < b->sendbuf ) { return -1; - } - return +1; + } + + return 1; } #endif @@ -35,11 +39,11 @@ int NBC_Allgather_args_compare(NBC_Allgather_args *a, NBC_Allgather_args *b, voi * the algorithm uses p-1 rounds * each node sends the packet it received last round (or has in round 0) to it's right neighbor (modulo p) * each node receives from it's left (modulo p) neighbor */ -int ompi_coll_libnbc_iallgather(void* sendbuf, int sendcount, MPI_Datatype sendtype, void* recvbuf, int recvcount, +int ompi_coll_libnbc_iallgather(const void* sendbuf, int sendcount, MPI_Datatype sendtype, void* recvbuf, int recvcount, MPI_Datatype recvtype, struct ompi_communicator_t *comm, ompi_request_t ** request, struct mca_coll_base_module_2_1_0_t *module) { - int rank, p, res, r; + int rank, p, res; MPI_Aint rcvext; NBC_Schedule *schedule; char *rbuf, *sbuf, inplace; @@ -47,146 +51,181 @@ int ompi_coll_libnbc_iallgather(void* sendbuf, int sendcount, MPI_Datatype sendt NBC_Allgather_args *args, *found, search; #endif NBC_Handle *handle; - ompi_coll_libnbc_request_t **coll_req = (ompi_coll_libnbc_request_t**) request; ompi_coll_libnbc_module_t *libnbc_module = (ompi_coll_libnbc_module_t*) module; NBC_IN_PLACE(sendbuf, recvbuf, inplace); - - res = NBC_Init_handle(comm, coll_req, libnbc_module); - if(res != NBC_OK) { printf("Error in NBC_Init_handle(%i)\n", res); return res; } - handle = (*coll_req); - res = MPI_Comm_rank(comm, &rank); - if (MPI_SUCCESS != res) { printf("MPI Error in MPI_Comm_rank() (%i)\n", res); return res; } - res = MPI_Comm_size(comm, &p); - if (MPI_SUCCESS != res) { printf("MPI Error in MPI_Comm_size() (%i)\n", res); return res; } - res = MPI_Type_extent(recvtype, &rcvext); - if (MPI_SUCCESS != res) { printf("MPI Error in MPI_Type_extent() (%i)\n", res); return res; } - - handle->tmpbuf = NULL; + + rank = ompi_comm_rank (comm); + p = ompi_comm_size (comm); + + res = ompi_datatype_type_extent(recvtype, &rcvext); + if (MPI_SUCCESS != res) { + return res; + } if (inplace) { - sendtype = recvtype; - sendcount = recvcount; + sendtype = recvtype; + sendcount = recvcount; } else { /* copy my data to receive buffer */ - rbuf = ((char *)recvbuf) + (rank*recvcount*rcvext); - res = NBC_Copy(sendbuf, sendcount, sendtype, rbuf, recvcount, recvtype, comm); - if (NBC_OK != res) { printf("Error in NBC_Copy() (%i)\n", res); return res; } + rbuf = (char *) recvbuf + rank * recvcount * rcvext; + res = NBC_Copy (sendbuf, sendcount, sendtype, rbuf, recvcount, recvtype, comm); + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + return res; + } } #ifdef NBC_CACHE_SCHEDULE /* search schedule in communicator specific tree */ - search.sendbuf=sendbuf; - search.sendcount=sendcount; - search.sendtype=sendtype; - search.recvbuf=recvbuf; - search.recvcount=recvcount; - search.recvtype=recvtype; - found = (NBC_Allgather_args *)hb_tree_search((hb_tree*)handle->comminfo->NBC_Dict[NBC_ALLGATHER], &search); - if(found == NULL) { + search.sendbuf = sendbuf; + search.sendcount = sendcount; + search.sendtype = sendtype; + search.recvbuf = recvbuf; + search.recvcount = recvcount; + search.recvtype = recvtype; + found = (NBC_Allgather_args *) hb_tree_search ((hb_tree*)libnbc_module->NBC_Dict[NBC_ALLGATHER], &search); + if (NULL == found) { #endif - schedule = (NBC_Schedule*)malloc(sizeof(NBC_Schedule)); - if (NULL == schedule) { printf("Error in malloc()\n"); return res; } + schedule = OBJ_NEW(NBC_Schedule); + if (OPAL_UNLIKELY(NULL == schedule)) { + return OMPI_ERR_OUT_OF_RESOURCE; + } - res = NBC_Sched_create(schedule); - if(NBC_OK != res) { printf("Error in NBC_Sched_create, (%i)\n", res); return res; } - - sbuf = ((char *)recvbuf) + (rank*recvcount*rcvext); + sbuf = (char *)recvbuf + rank * recvcount * rcvext; /* do p-1 rounds */ - for(r=0;rsendbuf=sendbuf; - args->sendcount=sendcount; - args->sendtype=sendtype; - args->recvbuf=recvbuf; - args->recvcount=recvcount; - args->recvtype=recvtype; - args->schedule=schedule; - res = hb_tree_insert ((hb_tree*)handle->comminfo->NBC_Dict[NBC_ALLGATHER], args, args, 0); - if(res != 0) printf("error in dict_insert() (%i)\n", res); + args = (NBC_Allgather_args *) malloc (sizeof (args)); + args->sendbuf = sendbuf; + args->sendcount = sendcount; + args->sendtype = sendtype; + args->recvbuf = recvbuf; + args->recvcount = recvcount; + args->recvtype = recvtype; + args->schedule = schedule; + + res = hb_tree_insert ((hb_tree *) libnbc_module->NBC_Dict[NBC_ALLGATHER], args, args, 0); + if (res != 0) { + free (args); + } else { + OBJ_RETAIN(schedule); + } + /* increase number of elements for A2A */ - if(++handle->comminfo->NBC_Dict_size[NBC_ALLGATHER] > NBC_SCHED_DICT_UPPER) { - NBC_SchedCache_dictwipe((hb_tree*)handle->comminfo->NBC_Dict[NBC_ALLGATHER], &handle->comminfo->NBC_Dict_size[NBC_ALLGATHER]); + if (++libnbc_module->NBC_Dict_size[NBC_ALLGATHER] > NBC_SCHED_DICT_UPPER) { + NBC_SchedCache_dictwipe ((hb_tree *) libnbc_module->NBC_Dict[NBC_ALLGATHER], &libnbc_module->NBC_Dict_size[NBC_ALLGATHER]); } } else { /* found schedule */ - schedule=found->schedule; + schedule = found->schedule; + OBJ_RETAIN(schedule); } #endif - /*NBC_PRINT_SCHED(*schedule);*/ - - res = NBC_Start(handle, schedule); - if (NBC_OK != res) { printf("Error in NBC_Start() (%i)\n", res); return res; } - - return NBC_OK; + res = NBC_Init_handle (comm, &handle, libnbc_module); + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + OBJ_RELEASE(schedule); + return res; + } + + res = NBC_Start (handle, schedule); + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + OMPI_COLL_LIBNBC_REQUEST_RETURN(handle); + return res; + } + + *request = (ompi_request_t *) handle; + + return OMPI_SUCCESS; } -int ompi_coll_libnbc_iallgather_inter(void* sendbuf, int sendcount, MPI_Datatype sendtype, void* recvbuf, int recvcount, +int ompi_coll_libnbc_iallgather_inter(const void* sendbuf, int sendcount, MPI_Datatype sendtype, void* recvbuf, int recvcount, MPI_Datatype recvtype, struct ompi_communicator_t *comm, ompi_request_t ** request, struct mca_coll_base_module_2_1_0_t *module) { - int rank, res, r, rsize; + int res, rsize; MPI_Aint rcvext; NBC_Schedule *schedule; char *rbuf; NBC_Handle *handle; - ompi_coll_libnbc_request_t **coll_req = (ompi_coll_libnbc_request_t**) request; ompi_coll_libnbc_module_t *libnbc_module = (ompi_coll_libnbc_module_t*) module; - res = NBC_Init_handle(comm, coll_req, libnbc_module); - if(res != NBC_OK) { printf("Error in NBC_Init_handle(%i)\n", res); return res; } - handle = (*coll_req); - res = MPI_Comm_rank(comm, &rank); - if (MPI_SUCCESS != res) { printf("MPI Error in MPI_Comm_rank() (%i)\n", res); return res; } - res = MPI_Comm_remote_size(comm, &rsize); - if (MPI_SUCCESS != res) { printf("MPI Error in MPI_Comm_remote_size() (%i)\n", res); return res; } - res = MPI_Type_extent(recvtype, &rcvext); - if (MPI_SUCCESS != res) { printf("MPI Error in MPI_Type_extent() (%i)\n", res); return res; } - - handle->tmpbuf = NULL; + res = ompi_datatype_type_extent(recvtype, &rcvext); + if (MPI_SUCCESS != res) { + NBC_Error ("MPI Error in ompi_datatype_type_extent() (%i)", res); + return res; + } - schedule = (NBC_Schedule*)malloc(sizeof(NBC_Schedule)); - if (NULL == schedule) { printf("Error in malloc()\n"); return res; } + rsize = ompi_comm_remote_size (comm); - res = NBC_Sched_create(schedule); - if(NBC_OK != res) { printf("Error in NBC_Sched_create, (%i)\n", res); return res; } + /* set up schedule */ + schedule = OBJ_NEW(NBC_Schedule); + if (OPAL_UNLIKELY(NULL == schedule)) { + return OMPI_ERR_OUT_OF_RESOURCE; + } /* do rsize - 1 rounds */ - for(r = 0 ; r < rsize ; ++r) { + for (int r = 0 ; r < rsize ; ++r) { /* recv from rank r */ - rbuf = ((char *)recvbuf) + r*(recvcount*rcvext); - res = NBC_Sched_recv(rbuf, false, recvcount, recvtype, r, schedule); - if (NBC_OK != res) { printf("Error in NBC_Sched_recv() (%i)\n", res); return res; } + rbuf = (char *) recvbuf + r * recvcount * rcvext; + res = NBC_Sched_recv (rbuf, false, recvcount, recvtype, r, schedule, false); + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + OBJ_RELEASE(schedule); + return res; + } /* send to rank r */ - res = NBC_Sched_send(sendbuf, false, sendcount, sendtype, r, schedule); - if (NBC_OK != res) { printf("Error in NBC_Sched_send() (%i)\n", res); return res; } + res = NBC_Sched_send (sendbuf, false, sendcount, sendtype, r, schedule, false); + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + OBJ_RELEASE(schedule); + return res; + } + } + + res = NBC_Sched_commit (schedule); + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + OBJ_RELEASE(schedule); + return res; } - res = NBC_Sched_commit(schedule); - if (NBC_OK != res) { printf("Error in NBC_Sched_commit() (%i)\n", res); return res; } + res = NBC_Init_handle (comm, &handle, libnbc_module); + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + OBJ_RELEASE(schedule); + return res; + } - /*NBC_PRINT_SCHED(*schedule);*/ + res = NBC_Start (handle, schedule); + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + OMPI_COLL_LIBNBC_REQUEST_RETURN(handle); + return res; + } - res = NBC_Start(handle, schedule); - if (NBC_OK != res) { printf("Error in NBC_Start() (%i)\n", res); return res; } + *request = (ompi_request_t *) handle; - return NBC_OK; + return OMPI_SUCCESS; } diff --git a/ompi/mca/coll/libnbc/nbc_iallgatherv.c b/ompi/mca/coll/libnbc/nbc_iallgatherv.c index be0ca4a9ec7..ea3c3634403 100644 --- a/ompi/mca/coll/libnbc/nbc_iallgatherv.c +++ b/ompi/mca/coll/libnbc/nbc_iallgatherv.c @@ -1,16 +1,17 @@ +/* -*- Mode: C; c-basic-offset:2 ; indent-tabs-mode:nil -*- */ /* * Copyright (c) 2006 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. - * Copyright (c) 2006 The Technical University of Chemnitz. All + * Copyright (c) 2006 The Technical University of Chemnitz. All * rights reserved. * * Author(s): Torsten Hoefler * * Copyright (c) 2012 Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2013 Los Alamos National Security, LLC. All rights + * Copyright (c) 2013-2015 Los Alamos National Security, LLC. All rights * reserved. - * Copyright (c) 2014 Research Organization for Information Science + * Copyright (c) 2014-2016 Research Organization for Information Science * and Technology (RIST). All rights reserved. * */ @@ -23,126 +24,160 @@ /* simple linear MPI_Iallgatherv * the algorithm uses p-1 rounds * first round: - * each node sends to it's left node (rank+1)%p sendcount elements + * each node sends to it's left node (rank+1)%p sendcount elements * each node begins with it's right node (rank-11)%p and receives from it recvcounts[(rank+1)%p] elements - * second round: - * each node sends to node (rank+2)%p sendcount elements + * second round: + * each node sends to node (rank+2)%p sendcount elements * each node receives from node (rank-2)%p recvcounts[(rank+2)%p] elements */ -int ompi_coll_libnbc_iallgatherv(void* sendbuf, int sendcount, MPI_Datatype sendtype, void* recvbuf, int *recvcounts, int *displs, +int ompi_coll_libnbc_iallgatherv(const void* sendbuf, int sendcount, MPI_Datatype sendtype, void* recvbuf, const int *recvcounts, const int *displs, MPI_Datatype recvtype, struct ompi_communicator_t *comm, ompi_request_t ** request, struct mca_coll_base_module_2_1_0_t *module) { - int rank, p, res, r, speer, rpeer; + int rank, p, res, speer, rpeer; MPI_Aint rcvext; NBC_Schedule *schedule; char *rbuf, *sbuf, inplace; NBC_Handle *handle; - ompi_coll_libnbc_request_t **coll_req = (ompi_coll_libnbc_request_t**) request; ompi_coll_libnbc_module_t *libnbc_module = (ompi_coll_libnbc_module_t*) module; - + NBC_IN_PLACE(sendbuf, recvbuf, inplace); - - res = NBC_Init_handle(comm, coll_req, libnbc_module); - if(res != NBC_OK) { printf("Error in NBC_Init_handle(%i)\n", res); return res; } - handle = (*coll_req); - res = MPI_Comm_rank(comm, &rank); - if (MPI_SUCCESS != res) { printf("MPI Error in MPI_Comm_rank() (%i)\n", res); return res; } - res = MPI_Comm_size(comm, &p); - if (MPI_SUCCESS != res) { printf("MPI Error in MPI_Comm_size() (%i)\n", res); return res; } - res = MPI_Type_extent(recvtype, &rcvext); - if (MPI_SUCCESS != res) { printf("MPI Error in MPI_Type_extent() (%i)\n", res); return res; } - - schedule = (NBC_Schedule*)malloc(sizeof(NBC_Schedule)); - if (NULL == schedule) { printf("Error in malloc() (%i)\n", res); return res; } - - handle->tmpbuf=NULL; - - res = NBC_Sched_create(schedule); - if(res != NBC_OK) { printf("Error in NBC_Sched_create, (%i)\n", res); return res; } - + + rank = ompi_comm_rank (comm); + p = ompi_comm_size (comm); + + res = ompi_datatype_type_extent (recvtype, &rcvext); + if (OPAL_UNLIKELY(MPI_SUCCESS != res)) { + NBC_Error ("MPI Error in ompi_datatype_type_extent() (%i)", res); + return res; + } + if (inplace) { sendtype = recvtype; sendcount = recvcounts[rank]; } else { /* copy my data to receive buffer */ - rbuf = ((char *)recvbuf) + (displs[rank]*rcvext); - res = NBC_Copy(sendbuf, sendcount, sendtype, rbuf, recvcounts[rank], recvtype, comm); - if (NBC_OK != res) { printf("Error in NBC_Copy() (%i)\n", res); return res; } + rbuf = (char *) recvbuf + displs[rank] * rcvext; + res = NBC_Copy (sendbuf, sendcount, sendtype, rbuf, recvcounts[rank], recvtype, comm); + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + return res; + } + } + + schedule = OBJ_NEW(NBC_Schedule); + if (NULL == schedule) { + return OMPI_ERR_OUT_OF_RESOURCE; } - sbuf = ((char*) recvbuf) + (displs[rank]*rcvext); + + sbuf = (char *) recvbuf + displs[rank] * rcvext; /* do p-1 rounds */ - for(r=1;rtmpbuf=NULL; + res = ompi_datatype_type_extent(recvtype, &rcvext); + if (OPAL_UNLIKELY(MPI_SUCCESS != res)) { + NBC_Error ("MPI Error in ompi_datatype_type_extent() (%i)", res); + return res; + } - res = NBC_Sched_create(schedule); - if(res != NBC_OK) { printf("Error in NBC_Sched_create, (%i)\n", res); return res; } + schedule = OBJ_NEW(NBC_Schedule); + if (NULL == schedule) { + return OMPI_ERR_OUT_OF_RESOURCE; + } /* do rsize rounds */ - for (r = 0 ; r < rsize ; ++r) { - char *rbuf = ((char *)recvbuf) + (displs[r]*rcvext); + for (int r = 0 ; r < rsize ; ++r) { + char *rbuf = (char *) recvbuf + displs[r] * rcvext; if (recvcounts[r]) { - res = NBC_Sched_recv(rbuf, false, recvcounts[r], recvtype, r, schedule); - if (NBC_OK != res) { printf("Error in NBC_Sched_recv() (%i)\n", res); return res; } + res = NBC_Sched_recv (rbuf, false, recvcounts[r], recvtype, r, schedule, false); + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + OBJ_RELEASE(schedule); + return res; + } } } if (sendcount) { - for (r = 0 ; r < rsize ; ++r) { - res = NBC_Sched_send(sendbuf, false, sendcount, sendtype, r, schedule); - if (NBC_OK != res) { printf("Error in NBC_Sched_send() (%i)\n", res); return res; } + for (int r = 0 ; r < rsize ; ++r) { + res = NBC_Sched_send (sendbuf, false, sendcount, sendtype, r, schedule, false); + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + OBJ_RELEASE(schedule); + return res; + } } } - res = NBC_Sched_commit(schedule); - if (NBC_OK != res) { printf("Error in NBC_Sched_commit() (%i)\n", res); return res; } + res = NBC_Sched_commit (schedule); + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + OBJ_RELEASE(schedule); + return res; + } + + res = NBC_Init_handle (comm, &handle, libnbc_module); + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + OBJ_RELEASE(schedule); + return res; + } + + res = NBC_Start (handle, schedule); + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + NBC_Return_handle (handle); + return res; + } - res = NBC_Start(handle, schedule); - if (NBC_OK != res) { printf("Error in NBC_Start() (%i)\n", res); return res; } + *request = (ompi_request_t *) handle; - return NBC_OK; + return OMPI_SUCCESS; } diff --git a/ompi/mca/coll/libnbc/nbc_iallreduce.c b/ompi/mca/coll/libnbc/nbc_iallreduce.c index 53200f9cbd0..0b624f90c44 100644 --- a/ompi/mca/coll/libnbc/nbc_iallreduce.c +++ b/ompi/mca/coll/libnbc/nbc_iallreduce.c @@ -1,12 +1,13 @@ +/* -*- Mode: C; c-basic-offset:2 ; indent-tabs-mode:nil -*- */ /* * Copyright (c) 2006 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. * Copyright (c) 2006 The Technical University of Chemnitz. All * rights reserved. - * Copyright (c) 2013 Los Alamos National Security, LLC. All rights + * Copyright (c) 2013-2015 Los Alamos National Security, LLC. All rights * reserved. - * Copyright (c) 2014 Research Organization for Information Science + * Copyright (c) 2014-2016 Research Organization for Information Science * and Technology (RIST). All rights reserved. * * Author(s): Torsten Hoefler @@ -15,32 +16,39 @@ #include "nbc_internal.h" #include "ompi/communicator/communicator.h" #include "ompi/datatype/ompi_datatype.h" +#include "ompi/op/op.h" #include -static inline int allred_sched_diss(int rank, int p, int count, MPI_Datatype datatype, void *sendbuf, void *recvbuf, MPI_Op op, NBC_Schedule *schedule, NBC_Handle *handle); -static inline int allred_sched_ring(int rank, int p, int count, MPI_Datatype datatype, void *sendbuf, void *recvbuf, MPI_Op op, int size, int ext, NBC_Schedule *schedule, NBC_Handle *handle); -static inline int allred_sched_linear(int rank, int p, void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, MPI_Op op, int ext, int size, NBC_Schedule *schedule, NBC_Handle *handle); +static inline int allred_sched_diss(int rank, int p, int count, MPI_Datatype datatype, ptrdiff_t gap, const void *sendbuf, + void *recvbuf, MPI_Op op, char inplace, NBC_Schedule *schedule, NBC_Handle *handle); +static inline int allred_sched_ring(int rank, int p, int count, MPI_Datatype datatype, const void *sendbuf, + void *recvbuf, MPI_Op op, int size, int ext, NBC_Schedule *schedule, + NBC_Handle *handle); +static inline int allred_sched_linear(int rank, int p, const void *sendbuf, void *recvbuf, int count, + MPI_Datatype datatype, ptrdiff_t gap, MPI_Op op, int ext, int size, + NBC_Schedule *schedule, NBC_Handle *handle); #ifdef NBC_CACHE_SCHEDULE /* tree comparison function for schedule cache */ int NBC_Allreduce_args_compare(NBC_Allreduce_args *a, NBC_Allreduce_args *b, void *param) { - - if( (a->sendbuf == b->sendbuf) && + if ((a->sendbuf == b->sendbuf) && (a->recvbuf == b->recvbuf) && (a->count == b->count) && (a->datatype == b->datatype) && - (a->op == b->op) ) { - return 0; + (a->op == b->op)) { + return 0; } - if( a->sendbuf < b->sendbuf ) { + + if( a->sendbuf < b->sendbuf ) { return -1; - } - return +1; + } + + return 1; } #endif -int ompi_coll_libnbc_iallreduce(void* sendbuf, void* recvbuf, int count, MPI_Datatype datatype, MPI_Op op, +int ompi_coll_libnbc_iallreduce(const void* sendbuf, void* recvbuf, int count, MPI_Datatype datatype, MPI_Op op, struct ompi_communicator_t *comm, ompi_request_t ** request, struct mca_coll_base_module_2_1_0_t *module) { @@ -54,33 +62,49 @@ int ompi_coll_libnbc_iallreduce(void* sendbuf, void* recvbuf, int count, MPI_Dat enum { NBC_ARED_BINOMIAL, NBC_ARED_RING } alg; char inplace; NBC_Handle *handle; - ompi_coll_libnbc_request_t **coll_req = (ompi_coll_libnbc_request_t**) request; ompi_coll_libnbc_module_t *libnbc_module = (ompi_coll_libnbc_module_t*) module; + ptrdiff_t span, gap; NBC_IN_PLACE(sendbuf, recvbuf, inplace); - res = NBC_Init_handle(comm, coll_req, libnbc_module); - if(res != NBC_OK) { printf("Error in NBC_Init_handle(%i)\n", res); return res; } - handle = (*coll_req); - rank = ompi_comm_rank (comm); p = ompi_comm_size (comm); + res = ompi_datatype_get_extent(datatype, &lb, &ext); - if (OMPI_SUCCESS != res) { printf("MPI Error in MPI_Type_extent() (%i)\n", res); return res; } + if (OMPI_SUCCESS != res) { + NBC_Error ("MPI Error in ompi_datatype_type_extent() (%i)", res); + return res; + } + res = ompi_datatype_type_size (datatype, &size); - if (OMPI_SUCCESS != res) { printf("MPI Error in MPI_Type_size() (%i)\n", res); return res; } + if (OMPI_SUCCESS != res) { + NBC_Error ("MPI Error in ompi_datatype_type_size() (%i)", res); + return res; + } - handle->tmpbuf = malloc(ext*count); - if(handle->tmpbuf == NULL) { printf("Error in malloc() (%i)\n", res); return NBC_OOR; } + res = NBC_Init_handle (comm, &handle, libnbc_module); + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + return res; + } + + span = opal_datatype_span(&datatype->super, count, &gap); + handle->tmpbuf = malloc (span); + if (OPAL_UNLIKELY(NULL == handle->tmpbuf)) { + NBC_Return_handle (handle); + return OMPI_ERR_OUT_OF_RESOURCE; + } - if((p == 1) && !inplace) { + if ((p == 1) && !inplace) { /* for a single node - copy data to receivebuf */ res = NBC_Copy(sendbuf, count, datatype, recvbuf, count, datatype, comm); - if (NBC_OK != res) { printf("Error in NBC_Copy() (%i)\n", res); return res; } + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + NBC_Return_handle (handle); + return res; + } } /* algorithm selection */ - if(p < 4 || size*count < 65536 || inplace) { + if(p < 4 || size*count < 65536 || !ompi_op_is_commute(op) || inplace) { alg = NBC_ARED_BINOMIAL; } else { alg = NBC_ARED_RING; @@ -88,104 +112,157 @@ int ompi_coll_libnbc_iallreduce(void* sendbuf, void* recvbuf, int count, MPI_Dat #ifdef NBC_CACHE_SCHEDULE /* search schedule in communicator specific tree */ - search.sendbuf=sendbuf; - search.recvbuf=recvbuf; - search.count=count; - search.datatype=datatype; - search.op=op; - found = (NBC_Allreduce_args*)hb_tree_search((hb_tree*)handle->comminfo->NBC_Dict[NBC_ALLREDUCE], &search); - if(found == NULL) { + search.sendbuf = sendbuf; + search.recvbuf = recvbuf; + search.count = count; + search.datatype = datatype; + search.op = op; + found = (NBC_Allreduce_args *) hb_tree_search ((hb_tree *) libnbc_module->NBC_Dict[NBC_ALLREDUCE], &search); + if (NULL == found) { #endif - schedule = (NBC_Schedule*)malloc(sizeof(NBC_Schedule)); - if (NULL == schedule) { printf("Error in malloc()\n"); return res; } + schedule = OBJ_NEW(NBC_Schedule); + if (NULL == schedule) { + NBC_Return_handle (handle); + return OMPI_ERR_OUT_OF_RESOURCE; + } - res = NBC_Sched_create(schedule); - if(res != NBC_OK) { printf("Error in NBC_Sched_create (%i)\n", res); return res; } + /* ensure the schedule is released with the handle on error */ + handle->schedule = schedule; switch(alg) { case NBC_ARED_BINOMIAL: - res = allred_sched_diss(rank, p, count, datatype, sendbuf, recvbuf, op, schedule, handle); + res = allred_sched_diss(rank, p, count, datatype, gap, sendbuf, recvbuf, op, inplace, schedule, handle); break; case NBC_ARED_RING: res = allred_sched_ring(rank, p, count, datatype, sendbuf, recvbuf, op, size, ext, schedule, handle); break; } - if (NBC_OK != res) { printf("Error in Schedule creation() (%i)\n", res); return res; } + + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + NBC_Return_handle (handle); + return res; + } res = NBC_Sched_commit(schedule); - if(res != NBC_OK) { free(handle->tmpbuf); printf("Error in NBC_Sched_commit() (%i)\n", res); return res; } + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + NBC_Return_handle (handle); + return res; + } #ifdef NBC_CACHE_SCHEDULE /* save schedule to tree */ - args = (NBC_Allreduce_args*)malloc(sizeof(NBC_Allreduce_args)); - args->sendbuf=sendbuf; - args->recvbuf=recvbuf; - args->count=count; - args->datatype=datatype; - args->op=op; - args->schedule=schedule; - res = hb_tree_insert ((hb_tree*)handle->comminfo->NBC_Dict[NBC_ALLREDUCE], args, args, 0); - if(res != 0) printf("error in dict_insert() (%i)\n", res); - /* increase number of elements for A2A */ - if(++handle->comminfo->NBC_Dict_size[NBC_ALLREDUCE] > NBC_SCHED_DICT_UPPER) { - NBC_SchedCache_dictwipe((hb_tree*)handle->comminfo->NBC_Dict[NBC_ALLREDUCE], &handle->comminfo->NBC_Dict_size[NBC_ALLREDUCE]); + args = (NBC_Allreduce_args *) malloc (sizeof(args)); + if (NULL != args) { + args->sendbuf = sendbuf; + args->recvbuf = recvbuf; + args->count = count; + args->datatype = datatype; + args->op = op; + args->schedule = schedule; + res = hb_tree_insert ((hb_tree *) libnbc_module->NBC_Dict[NBC_ALLREDUCE], args, args, 0); + if (0 == res) { + OBJ_RETAIN(schedule); + + /* increase number of elements for A2A */ + if (++libnbc_module->NBC_Dict_size[NBC_ALLREDUCE] > NBC_SCHED_DICT_UPPER) { + NBC_SchedCache_dictwipe ((hb_tree *) libnbc_module->NBC_Dict[NBC_ALLREDUCE], + &libnbc_module->NBC_Dict_size[NBC_ALLREDUCE]); + } + } else { + NBC_Error("error in dict_insert() (%i)", res); + free (args); + } } } else { /* found schedule */ - schedule=found->schedule; + schedule = found->schedule; + OBJ_RETAIN(schedule); } #endif - res = NBC_Start(handle, schedule); - if(res != NBC_OK) { free(handle->tmpbuf); printf("Error in NBC_Start() (%i)\n", res); return res; } + res = NBC_Start (handle, schedule); + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + NBC_Return_handle (handle); + return res; + } + + *request = (ompi_request_t *) handle; /* tmpbuf is freed with the handle */ - return NBC_OK; + return OMPI_SUCCESS; } -int ompi_coll_libnbc_iallreduce_inter(void* sendbuf, void* recvbuf, int count, MPI_Datatype datatype, MPI_Op op, +int ompi_coll_libnbc_iallreduce_inter(const void* sendbuf, void* recvbuf, int count, MPI_Datatype datatype, MPI_Op op, struct ompi_communicator_t *comm, ompi_request_t ** request, struct mca_coll_base_module_2_1_0_t *module) { - int rank, res, size, rsize; + int rank, res, rsize; + size_t size; MPI_Aint ext; NBC_Schedule *schedule; NBC_Handle *handle; - ompi_coll_libnbc_request_t **coll_req = (ompi_coll_libnbc_request_t**) request; ompi_coll_libnbc_module_t *libnbc_module = (ompi_coll_libnbc_module_t*) module; + ptrdiff_t span, gap; - res = NBC_Init_handle(comm, coll_req, libnbc_module); - if(res != NBC_OK) { printf("Error in NBC_Init_handle(%i)\n", res); return res; } - handle = (*coll_req); - res = MPI_Comm_rank(comm, &rank); - if (MPI_SUCCESS != res) { printf("MPI Error in MPI_Comm_rank() (%i)\n", res); return res; } - res = MPI_Comm_remote_size(comm, &rsize); - if (MPI_SUCCESS != res) { printf("MPI Error in MPI_Comm_remote_size() (%i)\n", res); return res; } - res = MPI_Type_extent(datatype, &ext); - if (MPI_SUCCESS != res) { printf("MPI Error in MPI_Type_extent() (%i)\n", res); return res; } - res = MPI_Type_size(datatype, &size); - if (MPI_SUCCESS != res) { printf("MPI Error in MPI_Type_size() (%i)\n", res); return res; } + rank = ompi_comm_rank (comm); + rsize = ompi_comm_remote_size (comm); - handle->tmpbuf = malloc(ext*count); - if(handle->tmpbuf == NULL) { printf("Error in malloc() (%i)\n", res); return NBC_OOR; } + res = ompi_datatype_type_extent(datatype, &ext); + if (MPI_SUCCESS != res) { + NBC_Error("MPI Error in ompi_datatype_type_extent() (%i)", res); + return res; + } + + res = ompi_datatype_type_size(datatype, &size); + if (MPI_SUCCESS != res) { + NBC_Error("MPI Error in ompi_datatype_type_size() (%i)", res); + return res; + } - schedule = (NBC_Schedule*)malloc(sizeof(NBC_Schedule)); - if (NULL == schedule) { printf("Error in malloc()\n"); return res; } + res = NBC_Init_handle (comm, &handle, libnbc_module); + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + return res; + } - res = NBC_Sched_create(schedule); - if(res != NBC_OK) { printf("Error in NBC_Sched_create (%i)\n", res); return res; } + span = opal_datatype_span(&datatype->super, count, &gap); + handle->tmpbuf = malloc (span); + if (OPAL_UNLIKELY(NULL == handle->tmpbuf)) { + NBC_Return_handle (handle); + return OMPI_ERR_OUT_OF_RESOURCE; + } + + schedule = OBJ_NEW(NBC_Schedule); + if (OPAL_UNLIKELY(NULL == schedule)) { + NBC_Return_handle (handle); + return OMPI_ERR_OUT_OF_RESOURCE; + } - res = allred_sched_linear(rank, rsize, sendbuf, recvbuf, count, datatype, op, ext, size, schedule, handle); - if (NBC_OK != res) { printf("Error in Schedule creation() (%i)\n", res); return res; } + /* ensure the schedule is released with the handle on error */ + handle->schedule = schedule; + + res = allred_sched_linear (rank, rsize, sendbuf, recvbuf, count, datatype, gap, op, + ext, size, schedule, handle); + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + NBC_Return_handle (handle); + return res; + } res = NBC_Sched_commit(schedule); - if(res != NBC_OK) { free(handle->tmpbuf); printf("Error in NBC_Sched_commit() (%i)\n", res); return res; } + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + NBC_Return_handle (handle); + return res; + } res = NBC_Start(handle, schedule); - if(res != NBC_OK) { free(handle->tmpbuf); printf("Error in NBC_Start() (%i)\n", res); return res; } + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + NBC_Return_handle (handle); + return res; + } + + *request = (ompi_request_t *) handle; /* tmpbuf is freed with the handle */ - return NBC_OK; + return OMPI_SUCCESS; } @@ -224,50 +301,78 @@ int ompi_coll_libnbc_iallreduce_inter(void* sendbuf, void* recvbuf, int count, M if (vrank == 0) rank = root; \ if (vrank == root) rank = 0; \ } -static inline int allred_sched_diss(int rank, int p, int count, MPI_Datatype datatype, void *sendbuf, void *recvbuf, MPI_Op op, NBC_Schedule *schedule, NBC_Handle *handle) { - int root, vrank, r, maxr, firstred, vpeer, peer, res; +static inline int allred_sched_diss(int rank, int p, int count, MPI_Datatype datatype, ptrdiff_t gap, const void *sendbuf, void *recvbuf, + MPI_Op op, char inplace, NBC_Schedule *schedule, NBC_Handle *handle) { + int root, vrank, maxr, vpeer, peer, res; + char *rbuf, *lbuf, *buf; + int tmprbuf, tmplbuf; root = 0; /* this makes the code for ireduce and iallreduce nearly identical - could be changed to improve performance */ RANK2VRANK(rank, vrank, root); maxr = (int)ceil((log((double)p)/LOG2)); + /* ensure the result ends up in recvbuf on vrank 0 */ + if (0 == (maxr%2)) { + rbuf = (void *)(-gap); + tmprbuf = true; + lbuf = recvbuf; + tmplbuf = false; + } else { + lbuf = (void *)(-gap); + tmplbuf = true; + rbuf = recvbuf; + tmprbuf = false; + if (inplace) { + res = NBC_Copy(rbuf, count, datatype, ((char *)handle->tmpbuf) - gap, count, datatype, MPI_COMM_SELF); + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + return res; + } + } + } - firstred = 1; - for(r=1; r<=maxr; r++) { - if((vrank % (1<tmpbuf); printf("Error in NBC_Sched_recv() (%i)\n", res); return res; } + if (peer < p) { /* we have to wait until we have the data */ - res = NBC_Sched_barrier(schedule); - if(res != NBC_OK) { free(handle->tmpbuf); printf("Error in NBC_Sched_barrier() (%i)\n", res); return res; } - if(firstred && MPI_IN_PLACE != sendbuf) { + res = NBC_Sched_recv (rbuf, tmprbuf, count, datatype, peer, schedule, true); + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + return res; + } + + /* this cannot be done until handle->tmpbuf is unused :-( so barrier after the op */ + if (firstred && !inplace) { /* perform the reduce with the senbuf */ - res = NBC_Sched_op(recvbuf, false, sendbuf, false, 0, true, count, datatype, op, schedule); + res = NBC_Sched_op (sendbuf, false, rbuf, tmprbuf, count, datatype, op, schedule, true); firstred = 0; } else { /* perform the reduce in my local buffer */ - res = NBC_Sched_op(recvbuf, false, recvbuf, false, 0, true, count, datatype, op, schedule); + res = NBC_Sched_op (lbuf, tmplbuf, rbuf, tmprbuf, count, datatype, op, schedule, true); + } + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + return res; } - if(res != NBC_OK) { free(handle->tmpbuf); printf("Error in NBC_Sched_op() (%i)\n", res); return res; } - /* this cannot be done until handle->tmpbuf is unused :-( */ - res = NBC_Sched_barrier(schedule); - if(res != NBC_OK) { free(handle->tmpbuf); printf("Error in NBC_Sched_barrier() (%i)\n", res); return res; } + /* swap left and right buffers */ + buf = rbuf; rbuf = lbuf ; lbuf = buf; + tmprbuf ^= 1; tmplbuf ^= 1; } } else { /* we have to send this round */ - vpeer = vrank - (1<<(r-1)); + vpeer = vrank - (1 << (r - 1)); VRANK2RANK(peer, vpeer, root) - if(firstred && MPI_IN_PLACE != sendbuf) { + if (firstred && !inplace) { /* we have to use the sendbuf in the first round .. */ - res = NBC_Sched_send(sendbuf, false, count, datatype, peer, schedule); + res = NBC_Sched_send (sendbuf, false, count, datatype, peer, schedule, false); } else { - /* and the recvbuf in all remeining rounds */ - res = NBC_Sched_send(recvbuf, false, count, datatype, peer, schedule); + /* and the recvbuf in all remaining rounds */ + res = NBC_Sched_send (lbuf, tmplbuf, count, datatype, peer, schedule, false); } - if(res != NBC_OK) { free(handle->tmpbuf); printf("Error in NBC_Sched_send() (%i)\n", res); return res; } + + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + return res; + } + /* leave the game */ break; } @@ -278,61 +383,76 @@ static inline int allred_sched_diss(int rank, int p, int count, MPI_Datatype dat RANK2VRANK(rank, vrank, root); /* receive from the right hosts */ - if(vrank != 0) { - for(r=0; r= (1<tmpbuf); printf("Error in NBC_Sched_recv() (%i)\n", res); return res; } + if (vrank != 0) { + for (int r = 0; r < maxr ; ++r) { + if ((vrank >= (1 << r)) && (vrank < (1 << (r + 1)))) { + VRANK2RANK(peer, vrank - (1 << r), root); + res = NBC_Sched_recv (recvbuf, false, count, datatype, peer, schedule, false); + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + return res; + } } } - res = NBC_Sched_barrier(schedule); - if(NBC_OK != res) { free(handle->tmpbuf); printf("Error in NBC_Sched_barrier() (%i)\n", res); return res; } + + res = NBC_Sched_barrier (schedule); + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + return res; + } } + if (0 == vrank) assert(lbuf == recvbuf); /* now send to the right hosts */ - for(r=0; rtmpbuf); printf("Error in NBC_Sched_send() (%i)\n", res); return res; } + for (int r = 0; r < maxr; ++r) { + if (((vrank + (1 << r) < p) && (vrank < (1 << r))) || (vrank == 0)) { + VRANK2RANK(peer, vrank + (1 << r), root); + res = NBC_Sched_send (recvbuf, false, count, datatype, peer, schedule, false); + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + return res; + } } } - /* end of the bcast */ - return NBC_OK; + /* end of the bcast */ + return OMPI_SUCCESS; } -static inline int allred_sched_ring(int r, int p, int count, MPI_Datatype datatype, void *sendbuf, void *recvbuf, MPI_Op op, int size, int ext, NBC_Schedule *schedule, NBC_Handle *handle) { - int i; /* runner */ +static inline int allred_sched_ring (int r, int p, int count, MPI_Datatype datatype, const void *sendbuf, void *recvbuf, MPI_Op op, + int size, int ext, NBC_Schedule *schedule, NBC_Handle *handle) { int segsize, *segsizes, *segoffsets; /* segment sizes and offsets per segment (number of segments == number of nodes */ int speer, rpeer; /* send and recvpeer */ + int res = OMPI_SUCCESS; - if(count == 0) return NBC_OK; - - { - int mycount; /* temporary */ - segsizes = (int*)malloc(sizeof(int)*p); - segoffsets = (int*)malloc(sizeof(int)*p); - segsize = count/p; /* size of the segments */ - if(count%p != 0) segsize++; - mycount = count; - segoffsets[0] = 0; - for(i = 0; i reduced this round * / -> sum (reduced in a previous step) @@ -432,96 +552,152 @@ static inline int allred_sched_ring(int r, int p, int count, MPI_Datatype dataty * 2p-2 rounds ... every node does p-1 reductions and p-1 sends * */ - { - int round = 0; - /* first p-1 rounds are reductions */ - do { - int selement = (r+1-round + 2*p /*2*p avoids negative mod*/)%p; /* the element I am sending */ - int soffset = segoffsets[selement]*ext; - int relement = (r-round + 2*p /*2*p avoids negative mod*/)%p; /* the element that I receive from my neighbor */ - int roffset = segoffsets[relement]*ext; - - /* first message come out of sendbuf */ - if(round == 0) { - NBC_Sched_send((char*)sendbuf+soffset, false, segsizes[selement], datatype, speer, schedule); - } else { - NBC_Sched_send((char*)recvbuf+soffset, false, segsizes[selement], datatype, speer, schedule); - } - NBC_Sched_recv((char*)recvbuf+roffset, false, segsizes[relement], datatype, rpeer, schedule); + /* first p-1 rounds are reductions */ + for (int round = 0 ; round < p - 1 ; ++round) { + int selement = (r+1-round + 2*p /*2*p avoids negative mod*/)%p; /* the element I am sending */ + int soffset = segoffsets[selement]*ext; + int relement = (r-round + 2*p /*2*p avoids negative mod*/)%p; /* the element that I receive from my neighbor */ + int roffset = segoffsets[relement]*ext; + + /* first message come out of sendbuf */ + if (round == 0) { + res = NBC_Sched_send ((char *) sendbuf + soffset, false, segsizes[selement], datatype, speer, + schedule, false); + } else { + res = NBC_Sched_send ((char *) recvbuf + soffset, false, segsizes[selement], datatype, speer, + schedule, false); + } + + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + break; + } + + res = NBC_Sched_recv ((char *) recvbuf + roffset, false, segsizes[relement], datatype, rpeer, + schedule, true); + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + break; + } + + res = NBC_Sched_op ((char *) sendbuf + roffset, false, (char *) recvbuf + roffset, false, + segsizes[relement], datatype, op, schedule, true); + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + break; + } + } - NBC_Sched_barrier(schedule); - NBC_Sched_op((char*)recvbuf+roffset, false, (char*)sendbuf+roffset, false, (char*)recvbuf+roffset, false, segsizes[relement], datatype, op, schedule); - NBC_Sched_barrier(schedule); + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + free (segsizes); + free (segoffsets); + return res; + } - round++; - } while(round < p-1); + for (int round = p - 1 ; round < 2 * p - 2 ; ++round) { + int selement = (r+1-round + 2*p /*2*p avoids negative mod*/)%p; /* the element I am sending */ + int soffset = segoffsets[selement]*ext; + int relement = (r-round + 2*p /*2*p avoids negative mod*/)%p; /* the element that I receive from my neighbor */ + int roffset = segoffsets[relement]*ext; - do { - int selement = (r+1-round + 2*p /*2*p avoids negative mod*/)%p; /* the element I am sending */ - int soffset = segoffsets[selement]*ext; - int relement = (r-round + 2*p /*2*p avoids negative mod*/)%p; /* the element that I receive from my neighbor */ - int roffset = segoffsets[relement]*ext; + res = NBC_Sched_send ((char *) recvbuf + soffset, false, segsizes[selement], datatype, speer, + schedule, false); + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + break; + } - NBC_Sched_send((char*)recvbuf+soffset, false, segsizes[selement], datatype, speer, schedule); - NBC_Sched_recv((char*)recvbuf+roffset, false, segsizes[relement], datatype, rpeer, schedule); - NBC_Sched_barrier(schedule); - round++; - } while (round < 2*p-2); + res = NBC_Sched_recv ((char *) recvbuf + roffset, false, segsizes[relement], datatype, rpeer, + schedule, true); + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + break; + } } - return NBC_OK; + free (segsizes); + free (segoffsets); + + return res; } -static inline int allred_sched_linear(int rank, int rsize, void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, - MPI_Op op, int ext, int size, NBC_Schedule *schedule, NBC_Handle *handle) { - int res, rpeer; +static inline int allred_sched_linear(int rank, int rsize, const void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, + ptrdiff_t gap, MPI_Op op, int ext, int size, NBC_Schedule *schedule, NBC_Handle *handle) { + int res; - if(count == 0) return NBC_OK; + if (0 == count) { + return OMPI_SUCCESS; + } /* send my data to the remote root */ - res = NBC_Sched_send (sendbuf, false, count, datatype, 0, schedule); - if (NBC_OK != res) { printf("Error in NBC_Sched_send() (%i)\n", res); return res; } + res = NBC_Sched_send (sendbuf, false, count, datatype, 0, schedule, false); + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + return res; + } - res = NBC_Sched_recv (recvbuf, false, count, datatype, 0, schedule); - if (NBC_OK != res) { printf("Error in NBC_Sched_recv() (%i)\n", res); return res; } + /* recv my data to the remote root */ + if (0 != rank || 1 ==(rsize%2)) { + res = NBC_Sched_recv (recvbuf, false, count, datatype, 0, schedule, false); + } else { + res = NBC_Sched_recv ((void *)(-gap), true, count, datatype, 0, schedule, false); + } + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + return res; + } if (0 == rank) { - /* wait for data from the remote root */ - res = NBC_Sched_barrier (schedule); - if (NBC_OK != res) { printf("Error in NBC_Sched_barrier() (%i)\n", res); return res; } + char *rbuf, *lbuf, *buf; + int tmprbuf, tmplbuf; - /* get data from remote peers and reduce */ - for (rpeer = 1 ; rpeer < rsize ; ++rpeer) { - res = NBC_Sched_recv (0, true, count, datatype, rpeer, schedule); - if (NBC_OK != res) { printf("Error in NBC_Sched_recv() (%i)\n", res); return res; } + res = NBC_Sched_barrier (schedule); + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + return res; + } - res = NBC_Sched_barrier(schedule); - if (NBC_OK != res) { printf("Error in NBC_Sched_barrier() (%i)\n", res); return res; } + /* ensure the result ends up in recvbuf */ + if (0 == (rsize%2)) { + lbuf = (void *)(-gap); + tmplbuf = true; + rbuf = recvbuf; + tmprbuf = false; + } else { + rbuf = (void *)(-gap); + tmprbuf = true; + lbuf = recvbuf; + tmplbuf = false; + } - res = NBC_Sched_op (recvbuf, false, 0, true, recvbuf, false, count, datatype, op, schedule); - if (NBC_OK != res) { printf("Error in NBC_Sched_op() (%i)\n", res); return res; } + /* get data from remote peers and reduce */ + for (int rpeer = 1 ; rpeer < rsize ; ++rpeer) { + res = NBC_Sched_recv (rbuf, tmprbuf, count, datatype, rpeer, schedule, true); + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + return res; + } - res = NBC_Sched_barrier(schedule); - if (NBC_OK != res) { printf("Error in NBC_Sched_barrier() (%i)\n", res); return res; } + res = NBC_Sched_op (lbuf, tmplbuf, rbuf, tmprbuf, count, datatype, op, schedule, true); + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + return res; + } + /* swap left and right buffers */ + buf = rbuf; rbuf = lbuf ; lbuf = buf; + tmprbuf ^= 1; tmplbuf ^= 1; } /* exchange our result with the remote root (each root will broadcast to the other's peers) */ - res = NBC_Sched_recv (0, true, count, datatype, 0, schedule); - if (NBC_OK != res) { printf("Error in NBC_Sched_recv() (%i)\n", res); return res; } - - res = NBC_Sched_send (recvbuf, false, count, datatype, 0, schedule); - if (NBC_OK != res) { printf("Error in NBC_Sched_send() (%i)\n", res); return res; } + res = NBC_Sched_recv ((void *)(-gap), true, count, datatype, 0, schedule, false); + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + return res; + } /* wait for data from remote root */ - res = NBC_Sched_barrier(schedule); - if (NBC_OK != res) { printf("Error in NBC_Sched_barrier() (%i)\n", res); return res; } + res = NBC_Sched_send (recvbuf, false, count, datatype, 0, schedule, true); + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + return res; + } /* broadcast the result to all remote peers */ - for (rpeer = 1 ; rpeer < rsize ; ++rpeer) { - res = NBC_Sched_send (0, true, count, datatype, rpeer, schedule); - if (NBC_OK != res) { printf("Error in NBC_Sched_send() (%i)\n", res); return res; } + for (int rpeer = 1 ; rpeer < rsize ; ++rpeer) { + res = NBC_Sched_send ((void *)(-gap), true, count, datatype, rpeer, schedule, false); + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + return res; + } } } - return NBC_OK; + return OMPI_SUCCESS; } diff --git a/ompi/mca/coll/libnbc/nbc_ialltoall.c b/ompi/mca/coll/libnbc/nbc_ialltoall.c index 3712dae40e7..c9df894cc01 100644 --- a/ompi/mca/coll/libnbc/nbc_ialltoall.c +++ b/ompi/mca/coll/libnbc/nbc_ialltoall.c @@ -1,13 +1,14 @@ +/* -*- Mode: C; c-basic-offset:2 ; indent-tabs-mode:nil -*- */ /* * Copyright (c) 2006 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. * Copyright (c) 2006 The Technical University of Chemnitz. All * rights reserved. - * Copyright (c) 2013 Los Alamos National Security, LLC. All rights + * Copyright (c) 2013-2015 Los Alamos National Security, LLC. All rights * reserved. * Copyright (c) 2014 NVIDIA Corporation. All rights reserved. - * Copyright (c) 2014 Research Organization for Information Science + * Copyright (c) 2014-2016 Research Organization for Information Science * and Technology (RIST). All rights reserved. * * Author(s): Torsten Hoefler @@ -15,146 +16,206 @@ */ #include "nbc_internal.h" -static inline int a2a_sched_linear(int rank, int p, MPI_Aint sndext, MPI_Aint rcvext, NBC_Schedule *schedule, void* sendbuf, int sendcount, MPI_Datatype sendtype, void* recvbuf, int recvcount, MPI_Datatype recvtype, MPI_Comm comm); -static inline int a2a_sched_pairwise(int rank, int p, MPI_Aint sndext, MPI_Aint rcvext, NBC_Schedule *schedule, void* sendbuf, int sendcount, MPI_Datatype sendtype, void* recvbuf, int recvcount, MPI_Datatype recvtype, MPI_Comm comm); -static inline int a2a_sched_diss(int rank, int p, MPI_Aint sndext, MPI_Aint rcvext, NBC_Schedule* schedule, void* sendbuf, int sendcount, MPI_Datatype sendtype, void* recvbuf, int recvcount, MPI_Datatype recvtype, MPI_Comm comm, NBC_Handle *handle); +static inline int a2a_sched_linear(int rank, int p, MPI_Aint sndext, MPI_Aint rcvext, NBC_Schedule *schedule, + const void* sendbuf, int sendcount, MPI_Datatype sendtype, void* recvbuf, + int recvcount, MPI_Datatype recvtype, MPI_Comm comm); +static inline int a2a_sched_pairwise(int rank, int p, MPI_Aint sndext, MPI_Aint rcvext, NBC_Schedule *schedule, + const void* sendbuf, int sendcount, MPI_Datatype sendtype, void* recvbuf, + int recvcount, MPI_Datatype recvtype, MPI_Comm comm); +static inline int a2a_sched_diss(int rank, int p, MPI_Aint sndext, MPI_Aint rcvext, NBC_Schedule* schedule, + const void* sendbuf, int sendcount, MPI_Datatype sendtype, void* recvbuf, + int recvcount, MPI_Datatype recvtype, MPI_Comm comm, NBC_Handle *handle); +static inline int a2a_sched_inplace(int rank, int p, NBC_Schedule* schedule, void* buf, int count, + MPI_Datatype type, MPI_Aint ext, ptrdiff_t gap, MPI_Comm comm); #ifdef NBC_CACHE_SCHEDULE /* tree comparison function for schedule cache */ int NBC_Alltoall_args_compare(NBC_Alltoall_args *a, NBC_Alltoall_args *b, void *param) { - - if( (a->sendbuf == b->sendbuf) && - (a->sendcount == b->sendcount) && + if ((a->sendbuf == b->sendbuf) && + (a->sendcount == b->sendcount) && (a->sendtype == b->sendtype) && (a->recvbuf == b->recvbuf) && (a->recvcount == b->recvcount) && - (a->recvtype == b->recvtype) ) { - return 0; + (a->recvtype == b->recvtype)) { + return 0; } - if( a->sendbuf < b->sendbuf ) { + + if( a->sendbuf < b->sendbuf ) { return -1; - } - return +1; + } + + return 1; } #endif /* simple linear MPI_Ialltoall the (simple) algorithm just sends to all nodes */ -int ompi_coll_libnbc_ialltoall(void* sendbuf, int sendcount, MPI_Datatype sendtype, void* recvbuf, int recvcount, +int ompi_coll_libnbc_ialltoall(const void* sendbuf, int sendcount, MPI_Datatype sendtype, void* recvbuf, int recvcount, MPI_Datatype recvtype, struct ompi_communicator_t *comm, ompi_request_t ** request, struct mca_coll_base_module_2_1_0_t *module) { - int rank, p, res, a2asize, sndsize, datasize; + int rank, p, res, datasize; + size_t a2asize, sndsize; NBC_Schedule *schedule; MPI_Aint rcvext, sndext; #ifdef NBC_CACHE_SCHEDULE NBC_Alltoall_args *args, *found, search; #endif char *rbuf, *sbuf, inplace; - enum {NBC_A2A_LINEAR, NBC_A2A_PAIRWISE, NBC_A2A_DISS} alg; + enum {NBC_A2A_LINEAR, NBC_A2A_PAIRWISE, NBC_A2A_DISS, NBC_A2A_INPLACE} alg; NBC_Handle *handle; - ompi_coll_libnbc_request_t **coll_req = (ompi_coll_libnbc_request_t**) request; ompi_coll_libnbc_module_t *libnbc_module = (ompi_coll_libnbc_module_t*) module; + ptrdiff_t span, gap; NBC_IN_PLACE(sendbuf, recvbuf, inplace); - - res = NBC_Init_handle(comm, coll_req, libnbc_module); - if(res != NBC_OK) { printf("Error in NBC_Init_handle(%i)\n", res); return res; } - handle = (*coll_req); - res = MPI_Comm_rank(comm, &rank); - if (MPI_SUCCESS != res) { printf("MPI Error in MPI_Comm_rank() (%i)\n", res); return res; } - res = MPI_Comm_size(comm, &p); - if (MPI_SUCCESS != res) { printf("MPI Error in MPI_Comm_size() (%i)\n", res); return res; } - res = MPI_Type_extent(sendtype, &sndext); - if (MPI_SUCCESS != res) { printf("MPI Error in MPI_Type_extent() (%i)\n", res); return res; } - res = MPI_Type_extent(recvtype, &rcvext); - if (MPI_SUCCESS != res) { printf("MPI Error in MPI_Type_extent() (%i)\n", res); return res; } - res = MPI_Type_size(sendtype, &sndsize); - if (MPI_SUCCESS != res) { printf("MPI Error in MPI_Type_size() (%i)\n", res); return res; } + + rank = ompi_comm_rank (comm); + p = ompi_comm_size (comm); + + res = ompi_datatype_type_extent(sendtype, &sndext); + if (MPI_SUCCESS != res) { + NBC_Error("MPI Error in ompi_datatype_type_extent() (%i)", res); + return res; + } + + res = ompi_datatype_type_extent(recvtype, &rcvext); + if (MPI_SUCCESS != res) { + NBC_Error("MPI Error in ompi_datatype_type_extent() (%i)", res); + return res; + } + + res = ompi_datatype_type_size(sendtype, &sndsize); + if (MPI_SUCCESS != res) { + NBC_Error("MPI Error in ompi_datatype_type_size() (%i)", res); + return res; + } /* algorithm selection */ - a2asize = sndsize*sendcount*p; + a2asize = sndsize * sendcount * p; /* this number is optimized for TCP on odin.cs.indiana.edu */ - if((p <= 8) && ((a2asize < 1<<17) || (sndsize*sendcount < 1<<12))) { + if (inplace) { + alg = NBC_A2A_INPLACE; + } else if((p <= 8) && ((a2asize < 1<<17) || (sndsize*sendcount < 1<<12))) { /* just send as fast as we can if we have less than 8 peers, if the * total communicated size is smaller than 1<<17 *and* if we don't * have eager messages (msgsize < 1<<13) */ alg = NBC_A2A_LINEAR; - } else if(a2asize < (1<<12)*p) { + } else if(a2asize < (1<<12)*(unsigned int)p) { /*alg = NBC_A2A_DISS;*/ alg = NBC_A2A_LINEAR; } else alg = NBC_A2A_LINEAR; /*NBC_A2A_PAIRWISE;*/ - if(!inplace) { + if (!inplace) { /* copy my data to receive buffer */ - rbuf = ((char *)recvbuf) + (rank*recvcount*rcvext); - sbuf = ((char *)sendbuf) + (rank*sendcount*sndext); - res = NBC_Copy(sbuf, sendcount, sendtype, rbuf, recvcount, recvtype, comm); - if (NBC_OK != res) { printf("Error in NBC_Copy() (%i)\n", res); return res; } + rbuf = (char *) recvbuf + rank * recvcount * rcvext; + sbuf = (char *) sendbuf + rank * sendcount * sndext; + res = NBC_Copy (sbuf, sendcount, sendtype, rbuf, recvcount, recvtype, comm); + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + return res; + } + } + + res = NBC_Init_handle(comm, &handle, libnbc_module); + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + return res; } /* allocate temp buffer if we need one */ - if(alg == NBC_A2A_DISS) { + if (alg == NBC_A2A_INPLACE) { + span = opal_datatype_span(&recvtype->super, recvcount, &gap); + handle->tmpbuf = malloc(span); + if (OPAL_UNLIKELY(NULL == handle->tmpbuf)) { + NBC_Return_handle (handle); + return OMPI_ERR_OUT_OF_RESOURCE; + } + } else if (alg == NBC_A2A_DISS) { /* only A2A_DISS needs buffers */ if(NBC_Type_intrinsic(sendtype)) { - datasize = sndext*sendcount; + datasize = sndext * sendcount; } else { - res = MPI_Pack_size(sendcount, sendtype, comm, &datasize); - if (MPI_SUCCESS != res) { printf("MPI Error in MPI_Pack_size() (%i)\n", res); return res; } + res = PMPI_Pack_size (sendcount, sendtype, comm, &datasize); + if (MPI_SUCCESS != res) { + NBC_Error("MPI Error in PMPI_Pack_size() (%i)", res); + NBC_Return_handle (handle); + return res; + } } + /* allocate temporary buffers */ - if(p % 2 == 0) { - handle->tmpbuf=malloc(datasize*p*2); + if ((p & 1) == 0) { + handle->tmpbuf = malloc (datasize * p * 2); } else { /* we cannot divide p by two, so alloc more to be safe ... */ - handle->tmpbuf=malloc(datasize*(p/2+1)*2*2); + handle->tmpbuf = malloc (datasize * (p / 2 + 1) * 2 * 2); + } + + if (OPAL_UNLIKELY(NULL == handle->tmpbuf)) { + NBC_Return_handle (handle); + return OMPI_ERR_OUT_OF_RESOURCE; } /* phase 1 - rotate n data blocks upwards into the tmpbuffer */ #if OPAL_CUDA_SUPPORT - if(NBC_Type_intrinsic(sendtype) && !(opal_cuda_check_bufs((char *)sendbuf, (char *)recvbuf))) { + if (NBC_Type_intrinsic(sendtype) && !(opal_cuda_check_bufs((char *)sendbuf, (char *)recvbuf))) { #else - if(NBC_Type_intrinsic(sendtype)) { + if (NBC_Type_intrinsic(sendtype)) { #endif /* OPAL_CUDA_SUPPORT */ /* contiguous - just copy (1st copy) */ - memcpy(handle->tmpbuf, (char*)sendbuf+datasize*rank, datasize*(p-rank)); - if(rank != 0) memcpy((char*)handle->tmpbuf+datasize*(p-rank), sendbuf, datasize*(rank)); + memcpy (handle->tmpbuf, (char *) sendbuf + datasize * rank, datasize * (p - rank)); + if (rank != 0) { + memcpy ((char *) handle->tmpbuf + datasize * (p - rank), sendbuf, datasize * rank); + } } else { int pos=0; /* non-contiguous - pack */ - res = MPI_Pack((char*)sendbuf+rank*sendcount*sndext, (p-rank)*sendcount, sendtype, handle->tmpbuf, (p-rank)*datasize, &pos, comm); - if (MPI_SUCCESS != res) { printf("MPI Error in MPI_Pack() (%i)\n", res); return res; } - if(rank != 0) { + res = PMPI_Pack ((char *) sendbuf + rank * sendcount * sndext, (p - rank) * sendcount, sendtype, handle->tmpbuf, + (p - rank) * datasize, &pos, comm); + if (OPAL_UNLIKELY(MPI_SUCCESS != res)) { + NBC_Error("MPI Error in PMPI_Pack() (%i)", res); + NBC_Return_handle (handle); + return res; + } + + if (rank != 0) { pos = 0; - MPI_Pack(sendbuf, rank*sendcount, sendtype, (char*)handle->tmpbuf+datasize*(p-rank), rank*datasize, &pos, comm); - if (MPI_SUCCESS != res) { printf("MPI Error in MPI_Pack() (%i)\n", res); return res; } + res = PMPI_Pack(sendbuf, rank * sendcount, sendtype, (char *) handle->tmpbuf + datasize * (p - rank), + rank * datasize, &pos, comm); + if (OPAL_UNLIKELY(MPI_SUCCESS != res)) { + NBC_Error("MPI Error in PMPI_Pack() (%i)", res); + NBC_Return_handle (handle); + return res; + } } } - } else { - handle->tmpbuf=NULL; } #ifdef NBC_CACHE_SCHEDULE /* search schedule in communicator specific tree */ - search.sendbuf=sendbuf; - search.sendcount=sendcount; - search.sendtype=sendtype; - search.recvbuf=recvbuf; - search.recvcount=recvcount; - search.recvtype=recvtype; - found = (NBC_Alltoall_args*)hb_tree_search((hb_tree*)handle->comminfo->NBC_Dict[NBC_ALLTOALL], &search); - if(found == NULL) { + search.sendbuf = sendbuf; + search.sendcount = sendcount; + search.sendtype = sendtype; + search.recvbuf = recvbuf; + search.recvcount = recvcount; + search.recvtype = recvtype; + found = (NBC_Alltoall_args *) hb_tree_search ((hb_tree *) libnbc_module->NBC_Dict[NBC_ALLTOALL], &search); + if (NULL == found) { #endif /* not found - generate new schedule */ - schedule = (NBC_Schedule*)malloc(sizeof(NBC_Schedule)); - if (NULL == schedule) { printf("Error in malloc()\n"); return res; } + schedule = OBJ_NEW(NBC_Schedule); + if (OPAL_UNLIKELY(NULL == schedule)) { + NBC_Return_handle (handle); + return OMPI_ERR_OUT_OF_RESOURCE; + } + + /* ensure the schedule is released with the handle on error */ + handle->schedule = schedule; - res = NBC_Sched_create(schedule); - if(res != NBC_OK) { printf("Error in NBC_Sched_create (%i)\n", res); return res; } - switch(alg) { - case NBC_A2A_LINEAR: + case NBC_A2A_INPLACE: + res = a2a_sched_inplace(rank, p, schedule, recvbuf, recvcount, recvtype, rcvext, gap, comm); + break; + case NBC_A2A_LINEAR: res = a2a_sched_linear(rank, p, sndext, rcvext, schedule, sendbuf, sendcount, sendtype, recvbuf, recvcount, recvtype, comm); break; case NBC_A2A_DISS: @@ -164,206 +225,267 @@ int ompi_coll_libnbc_ialltoall(void* sendbuf, int sendcount, MPI_Datatype sendty res = a2a_sched_pairwise(rank, p, sndext, rcvext, schedule, sendbuf, sendcount, sendtype, recvbuf, recvcount, recvtype, comm); break; } - - if (NBC_OK != res) { return res; } + + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + NBC_Return_handle (handle); + return res; + } res = NBC_Sched_commit(schedule); - if (NBC_OK != res) { printf("Error in NBC_Sched_commit() (%i)\n", res); return res; } - + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + NBC_Return_handle (handle); + return res; + } + #ifdef NBC_CACHE_SCHEDULE /* save schedule to tree */ - args = (NBC_Alltoall_args*)malloc(sizeof(NBC_Alltoall_args)); - args->sendbuf=sendbuf; - args->sendcount=sendcount; - args->sendtype=sendtype; - args->recvbuf=recvbuf; - args->recvcount=recvcount; - args->recvtype=recvtype; - args->schedule=schedule; - res = hb_tree_insert ((hb_tree*)handle->comminfo->NBC_Dict[NBC_ALLTOALL], args, args, 0); - if(res != 0) printf("error in dict_insert() (%i)\n", res); - /* increase number of elements for A2A */ - if(++handle->comminfo->NBC_Dict_size[NBC_ALLTOALL] > NBC_SCHED_DICT_UPPER) { - NBC_SchedCache_dictwipe((hb_tree*)handle->comminfo->NBC_Dict[NBC_ALLTOALL], &handle->comminfo->NBC_Dict_size[NBC_ALLTOALL]); - /*if(!rank) printf("[%i] removing %i elements - new size: %i \n", rank, SCHED_DICT_UPPER-SCHED_DICT_LOWER, handle->comminfo->NBC_Alltoall_size);*/ - } - /*if(!rank) printf("[%i] added new schedule to tree - number %i\n", rank, handle->comminfo->NBC_Dict_size[NBC_ALLTOALL]);*/ + args = (NBC_Alltoall_args *) malloc (sizeof (args)); + if (NULL != args) { + args->sendbuf = sendbuf; + args->sendcount = sendcount; + args->sendtype = sendtype; + args->recvbuf = recvbuf; + args->recvcount = recvcount; + args->recvtype = recvtype; + args->schedule = schedule; + res = hb_tree_insert ((hb_tree *) libnbc_module->NBC_Dict[NBC_ALLTOALL], args, args, 0); + if (0 == res) { + OBJ_RETAIN(schedule); + + /* increase number of elements for A2A */ + if (++libnbc_module->NBC_Dict_size[NBC_ALLTOALL] > NBC_SCHED_DICT_UPPER) { + NBC_SchedCache_dictwipe ((hb_tree *) libnbc_module->NBC_Dict[NBC_ALLTOALL], + &libnbc_module->NBC_Dict_size[NBC_ALLTOALL]); + } + } else { + NBC_Error("error in dict_insert() (%i)", res); + free (args); + } + } } else { /* found schedule */ - schedule=found->schedule; + schedule = found->schedule; + OBJ_RETAIN(schedule); } #endif - res = NBC_Start(handle, schedule); - if (NBC_OK != res) { printf("Error in NBC_Start() (%i)\n", res); return res; } - - return NBC_OK; + res = NBC_Start (handle, schedule); + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + NBC_Return_handle (handle); + return res; + } + + *request = (ompi_request_t *) handle; + + return OMPI_SUCCESS; } -int ompi_coll_libnbc_ialltoall_inter (void* sendbuf, int sendcount, MPI_Datatype sendtype, void* recvbuf, int recvcount, +int ompi_coll_libnbc_ialltoall_inter (const void* sendbuf, int sendcount, MPI_Datatype sendtype, void* recvbuf, int recvcount, MPI_Datatype recvtype, struct ompi_communicator_t *comm, ompi_request_t ** request, struct mca_coll_base_module_2_1_0_t *module) { - int rank, res, i, rsize; + int res, rsize; MPI_Aint sndext, rcvext; NBC_Schedule *schedule; char *rbuf, *sbuf; NBC_Handle *handle; - ompi_coll_libnbc_request_t **coll_req = (ompi_coll_libnbc_request_t**) request; ompi_coll_libnbc_module_t *libnbc_module = (ompi_coll_libnbc_module_t*) module; - res = NBC_Init_handle(comm, coll_req, libnbc_module); - if(res != NBC_OK) { printf("Error in NBC_Init_handle(%i)\n", res); return res; } - handle = (*coll_req); - - res = MPI_Comm_remote_size (comm, &rsize); - if (MPI_SUCCESS != res) { printf("MPI Error in MPI_Comm_remote_size() (%i)\n", res); return res; } - res = MPI_Comm_rank(comm, &rank); - if (MPI_SUCCESS != res) { printf("MPI Error in MPI_Comm_rank() (%i)\n", res); return res; } - res = MPI_Type_extent(sendtype, &sndext); - if (MPI_SUCCESS != res) { printf("MPI Error in MPI_Type_extent() (%i)\n", res); return res; } - res = MPI_Type_extent(recvtype, &rcvext); - if (MPI_SUCCESS != res) { printf("MPI Error in MPI_Type_extent() (%i)\n", res); return res; } + rsize = ompi_comm_remote_size (comm); - schedule = (NBC_Schedule*)malloc(sizeof(NBC_Schedule)); - if (NULL == schedule) { printf("Error in malloc() (%i)\n", res); return res; } + res = ompi_datatype_type_extent (sendtype, &sndext); + if (MPI_SUCCESS != res) { + NBC_Error("MPI Error in ompi_datatype_type_extent() (%i)", res); + return res; + } - handle->tmpbuf=NULL; + res = ompi_datatype_type_extent (recvtype, &rcvext); + if (MPI_SUCCESS != res) { + NBC_Error("MPI Error in ompi_datatype_type_extent() (%i)", res); + return res; + } - res = NBC_Sched_create(schedule); - if(res != NBC_OK) { printf("Error in NBC_Sched_create (%i)\n", res); return res; } + schedule = OBJ_NEW(NBC_Schedule); + if (OPAL_UNLIKELY(NULL == schedule)) { + return OMPI_ERR_OUT_OF_RESOURCE; + } - for (i = 0; i < rsize; i++) { + for (int i = 0; i < rsize; i++) { /* post all sends */ - sbuf = ((char *) sendbuf) + (i * sendcount * sndext); - res = NBC_Sched_send(sbuf, false, sendcount, sendtype, i, schedule); - if (NBC_OK != res) { printf("Error in NBC_Sched_send() (%i)\n", res); return res; } + sbuf = (char *) sendbuf + i * sendcount * sndext; + res = NBC_Sched_send (sbuf, false, sendcount, sendtype, i, schedule, false); + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + break; + } + /* post all receives */ - rbuf = ((char *) recvbuf) + (i * recvcount * rcvext); - res = NBC_Sched_recv(rbuf, false, recvcount, recvtype, i, schedule); - if (NBC_OK != res) { printf("Error in NBC_Sched_recv() (%i)\n", res); return res; } + rbuf = (char *) recvbuf + i * recvcount * rcvext; + res = NBC_Sched_recv (rbuf, false, recvcount, recvtype, i, schedule, false); + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + break; + } + } + + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + OBJ_RELEASE(schedule); + return res; } - /*NBC_PRINT_SCHED(*schedule);*/ + res = NBC_Sched_commit (schedule); + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + OBJ_RELEASE(schedule); + return res; + } + + res = NBC_Init_handle (comm, &handle, libnbc_module); + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + OBJ_RELEASE(schedule); + return res; + } - res = NBC_Sched_commit(schedule); - if (NBC_OK != res) { printf("Error in NBC_Sched_commit() (%i)\n", res); return res; } + res = NBC_Start (handle, schedule); + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + NBC_Return_handle (handle); + return res; + } - res = NBC_Start(handle, schedule); - if (NBC_OK != res) { printf("Error in NBC_Start() (%i)\n", res); return res; } + *request = (ompi_request_t *) handle; - return NBC_OK; + return OMPI_SUCCESS; } -static inline int a2a_sched_pairwise(int rank, int p, MPI_Aint sndext, MPI_Aint rcvext, NBC_Schedule* schedule, void* sendbuf, int sendcount, MPI_Datatype sendtype, void* recvbuf, int recvcount, MPI_Datatype recvtype, MPI_Comm comm) { - int res, r, sndpeer, rcvpeer; - char *rbuf, *sbuf; +static inline int a2a_sched_pairwise(int rank, int p, MPI_Aint sndext, MPI_Aint rcvext, NBC_Schedule* schedule, + const void* sendbuf, int sendcount, MPI_Datatype sendtype, void* recvbuf, int recvcount, + MPI_Datatype recvtype, MPI_Comm comm) { + int res; - res = NBC_OK; - if(p < 2) return res; - - for(r=1;rtmpbuf+datasize*p; - stmpbuf = (char*)handle->tmpbuf+datasize*(p+p/2); + if ((p & 1) == 0) { + rtmpbuf = (char *) handle->tmpbuf + datasize * p; + stmpbuf = (char *) handle->tmpbuf + datasize * (p + p / 2); } else { /* we cannot divide p by two, so alloc more to be safe ... */ - virtp = (p/2+1)*2; - rtmpbuf = (char*)handle->tmpbuf+datasize*p; - stmpbuf = (char*)handle->tmpbuf+datasize*(p+virtp/2); + virtp = (p / 2 + 1) * 2; + rtmpbuf = (char *) handle->tmpbuf + datasize * p; + stmpbuf = (char *) handle->tmpbuf + datasize * (p + virtp / 2); } /* phase 2 - communicate */ - /*printf("[%i] temp buffer is at %lu of size %i, maxround: %i\n", rank, (unsigned long)handle->tmpbuf, (int)datasize*p*(1<tmpbuf, true, datasize, MPI_BYTE, schedule); + res = NBC_Sched_copy((void *)(intptr_t)(i * datasize), true, datasize, MPI_BYTE, stmpbuf + offset - + (intptr_t) handle->tmpbuf, true, datasize, MPI_BYTE, schedule, false); + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + return res; + } offset += datasize; } } - - speer = ( rank + r) % p; + + speer = (rank + r) % p; /* add p because modulo does not work with negative values */ - rpeer = ((rank - r)+p) % p; - - /*printf("[%i] receiving %i bytes from host %i into rbuf %lu\n", rank, offset, rpeer, (unsigned long)rtmpbuf);*/ - res = NBC_Sched_recv(rtmpbuf-(unsigned long)handle->tmpbuf, true, offset, MPI_BYTE, rpeer, schedule); - if (NBC_OK != res) { printf("Error in NBC_Sched_recv() (%i)\n", res); return res; } - - /*printf("[%i] sending %i bytes to host %i from sbuf %lu\n", rank, offset, speer, (unsigned long)stmpbuf);*/ - res = NBC_Sched_send(stmpbuf-(unsigned long)handle->tmpbuf, true, offset, MPI_BYTE, speer, schedule); - if (NBC_OK != res) { printf("Error in NBC_Sched_send() (%i)\n", res); return res; } - - res = NBC_Sched_barrier(schedule); - if (NBC_OK != res) { printf("Error in NBC_Sched_barrier() (%i)\n", res); return res; } - + rpeer = ((rank - r) + p) % p; + + res = NBC_Sched_recv (rtmpbuf - (intptr_t) handle->tmpbuf, true, offset, MPI_BYTE, rpeer, schedule, false); + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + return res; + } + + res = NBC_Sched_send (stmpbuf - (intptr_t) handle->tmpbuf, true, offset, MPI_BYTE, speer, schedule, true); + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + return res; + } + /* unpack from buffer */ offset = 0; - for(i=1; itmpbuf, true, datasize, MPI_BYTE, (void*)(long)(i*datasize), true, datasize, MPI_BYTE, schedule); + res = NBC_Sched_copy (rtmpbuf + offset - (intptr_t) handle->tmpbuf, true, datasize, MPI_BYTE, + (void *)(intptr_t)(i * datasize), true, datasize, MPI_BYTE, schedule, + false); + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + return res; + } + offset += datasize; } } @@ -371,12 +493,71 @@ static inline int a2a_sched_diss(int rank, int p, MPI_Aint sndext, MPI_Aint rcve /* phase 3 - reorder - data is now in wrong order in handle->tmpbuf - * reorder it into recvbuf */ - for(i=0; i * */ #include "nbc_internal.h" +static inline int a2av_sched_linear(int rank, int p, NBC_Schedule *schedule, + const void *sendbuf, const int *sendcounts, + const int *sdispls, MPI_Aint sndext, MPI_Datatype sendtype, + void *recvbuf, const int *recvcounts, + const int *rdispls, MPI_Aint rcvext, MPI_Datatype recvtype); + +static inline int a2av_sched_pairwise(int rank, int p, NBC_Schedule *schedule, + const void *sendbuf, const int *sendcounts, const int *sdispls, + MPI_Aint sndext, MPI_Datatype sendtype, + void *recvbuf, const int *recvcounts, const int *rdispls, + MPI_Aint rcvext, MPI_Datatype recvtype); + +static inline int a2av_sched_inplace(int rank, int p, NBC_Schedule *schedule, + void *buf, const int *counts, const int *displs, + MPI_Aint ext, MPI_Datatype type, ptrdiff_t gap); + /* an alltoallv schedule can not be cached easily because the contents * ot the recvcounts array may change, so a comparison of the address * would not be sufficient ... we simply do not cache it */ /* simple linear Alltoallv */ -int ompi_coll_libnbc_ialltoallv(void* sendbuf, int *sendcounts, int *sdispls, - MPI_Datatype sendtype, void* recvbuf, int *recvcounts, int *rdispls, +int ompi_coll_libnbc_ialltoallv(const void* sendbuf, const int *sendcounts, const int *sdispls, + MPI_Datatype sendtype, void* recvbuf, const int *recvcounts, const int *rdispls, MPI_Datatype recvtype, struct ompi_communicator_t *comm, ompi_request_t ** request, - struct mca_coll_base_module_2_1_0_t *module) + struct mca_coll_base_module_2_1_0_t *module) { - int rank, p, res, i; + int rank, p, res; MPI_Aint sndext, rcvext; NBC_Schedule *schedule; char *rbuf, *sbuf, inplace; + ptrdiff_t gap, span; NBC_Handle *handle; - ompi_coll_libnbc_request_t **coll_req = (ompi_coll_libnbc_request_t**) request; ompi_coll_libnbc_module_t *libnbc_module = (ompi_coll_libnbc_module_t*) module; - + NBC_IN_PLACE(sendbuf, recvbuf, inplace); - - res = NBC_Init_handle(comm, coll_req, libnbc_module); - if(res != NBC_OK) { printf("Error in NBC_Init_handle(%i)\n", res); return res; } - handle = (*coll_req); - res = MPI_Comm_rank(comm, &rank); - if (MPI_SUCCESS != res) { printf("MPI Error in MPI_Comm_rank() (%i)\n", res); return res; } - res= MPI_Comm_size(comm, &p); - if (MPI_SUCCESS != res) { printf("MPI Error in MPI_Comm_size() (%i)\n", res); return res; } - res = MPI_Type_extent(sendtype, &sndext); - if (MPI_SUCCESS != res) { printf("MPI Error in MPI_Type_extent() (%i)\n", res); return res; } - res = MPI_Type_extent(recvtype, &rcvext); - if (MPI_SUCCESS != res) { printf("MPI Error in MPI_Type_extent() (%i)\n", res); return res; } - - schedule = (NBC_Schedule*)malloc(sizeof(NBC_Schedule)); - if (NULL == schedule) { printf("Error in malloc() (%i)\n", res); return res; } - - handle->tmpbuf=NULL; - - res = NBC_Sched_create(schedule); - if(res != NBC_OK) { printf("Error in NBC_Sched_create (%i)\n", res); return res; } - /* copy data to receivbuffer */ - if((sendcounts[rank] != 0) && !inplace) { - rbuf = ((char *) recvbuf) + (rdispls[rank] * rcvext); - sbuf = ((char *) sendbuf) + (sdispls[rank] * sndext); - res = NBC_Copy(sbuf, sendcounts[rank], sendtype, rbuf, recvcounts[rank], recvtype, comm); - if (NBC_OK != res) { printf("Error in NBC_Copy() (%i)\n", res); return res; } + rank = ompi_comm_rank (comm); + p = ompi_comm_size (comm); + + res = ompi_datatype_type_extent (recvtype, &rcvext); + if (MPI_SUCCESS != res) { + NBC_Error("MPI Error in ompi_datatype_type_extent() (%i)", res); + return res; } - for (i = 0; i < p; i++) { - if (i == rank) { continue; } - /* post all sends */ - if(sendcounts[i] != 0) { - sbuf = ((char *) sendbuf) + (sdispls[i] * sndext); - res = NBC_Sched_send(sbuf, false, sendcounts[i], sendtype, i, schedule); - if (NBC_OK != res) { printf("Error in NBC_Sched_send() (%i)\n", res); return res; } + res = NBC_Init_handle (comm, &handle, libnbc_module); + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + return res; + } + + /* copy data to receivbuffer */ + if (inplace) { + int count = 0; + for (int i = 0; i < p; i++) { + if (recvcounts[i] > count) { + count = recvcounts[i]; + } } - /* post all receives */ - if(recvcounts[i] != 0) { - rbuf = ((char *) recvbuf) + (rdispls[i] * rcvext); - res = NBC_Sched_recv(rbuf, false, recvcounts[i], recvtype, i, schedule); - if (NBC_OK != res) { printf("Error in NBC_Sched_recv() (%i)\n", res); return res; } + span = opal_datatype_span(&recvtype->super, count, &gap); + handle->tmpbuf = malloc(span); + if (OPAL_UNLIKELY(NULL == handle->tmpbuf)) { + NBC_Return_handle (handle); + return OMPI_ERR_OUT_OF_RESOURCE; + } + sendcounts = recvcounts; + sdispls = rdispls; + } else { + res = ompi_datatype_type_extent (sendtype, &sndext); + if (MPI_SUCCESS != res) { + NBC_Error("MPI Error in ompi_datatype_type_extent() (%i)", res); + return res; + } + if (sendcounts[rank] != 0) { + rbuf = (char *) recvbuf + rdispls[rank] * rcvext; + sbuf = (char *) sendbuf + sdispls[rank] * sndext; + res = NBC_Copy (sbuf, sendcounts[rank], sendtype, rbuf, recvcounts[rank], recvtype, comm); + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + return res; + } } } - /*NBC_PRINT_SCHED(*schedule);*/ + schedule = OBJ_NEW(NBC_Schedule); + if (OPAL_UNLIKELY(NULL == schedule)) { + NBC_Return_handle (handle); + return OMPI_ERR_OUT_OF_RESOURCE; + } - res = NBC_Sched_commit(schedule); - if (NBC_OK != res) { printf("Error in NBC_Sched_commit() (%i)\n", res); return res; } + + if (inplace) { + res = a2av_sched_inplace(rank, p, schedule, recvbuf, recvcounts, + rdispls, rcvext, recvtype, gap); + } else { + res = a2av_sched_linear(rank, p, schedule, + sendbuf, sendcounts, sdispls, sndext, sendtype, + recvbuf, recvcounts, rdispls, rcvext, recvtype); + } + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + NBC_Return_handle (handle); + OBJ_RELEASE(schedule); + return res; + } + + res = NBC_Sched_commit (schedule); + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + NBC_Return_handle (handle); + OBJ_RELEASE(schedule); + return res; + } res = NBC_Start(handle, schedule); - if (NBC_OK != res) { printf("Error in NBC_Start() (%i)\n", res); return res; } - - return NBC_OK; + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + NBC_Return_handle (handle); + OBJ_RELEASE(schedule); + return res; + } + + *request = (ompi_request_t *) handle; + + return OMPI_SUCCESS; } /* simple linear Alltoallv */ -int ompi_coll_libnbc_ialltoallv_inter (void* sendbuf, int *sendcounts, int *sdispls, - MPI_Datatype sendtype, void* recvbuf, int *recvcounts, int *rdispls, +int ompi_coll_libnbc_ialltoallv_inter (const void* sendbuf, const int *sendcounts, const int *sdispls, + MPI_Datatype sendtype, void* recvbuf, const int *recvcounts, const int *rdispls, MPI_Datatype recvtype, struct ompi_communicator_t *comm, ompi_request_t ** request, struct mca_coll_base_module_2_1_0_t *module) { - int rank, res, i, rsize; + int res, rsize; MPI_Aint sndext, rcvext; NBC_Schedule *schedule; - char *rbuf, *sbuf; NBC_Handle *handle; - ompi_coll_libnbc_request_t **coll_req = (ompi_coll_libnbc_request_t**) request; ompi_coll_libnbc_module_t *libnbc_module = (ompi_coll_libnbc_module_t*) module; - res = NBC_Init_handle(comm, coll_req, libnbc_module); - if(res != NBC_OK) { printf("Error in NBC_Init_handle(%i)\n", res); return res; } - handle = (*coll_req); - res = MPI_Comm_rank(comm, &rank); - if (MPI_SUCCESS != res) { printf("MPI Error in MPI_Comm_rank() (%i)\n", res); return res; } - res = MPI_Type_extent(sendtype, &sndext); - if (MPI_SUCCESS != res) { printf("MPI Error in MPI_Type_extent() (%i)\n", res); return res; } - res = MPI_Type_extent(recvtype, &rcvext); - if (MPI_SUCCESS != res) { printf("MPI Error in MPI_Type_extent() (%i)\n", res); return res; } - MPI_Comm_remote_size (comm, &rsize); + res = ompi_datatype_type_extent(sendtype, &sndext); + if (MPI_SUCCESS != res) { + NBC_Error("MPI Error in ompi_datatype_type_extent() (%i)", res); + return res; + } - schedule = (NBC_Schedule*)malloc(sizeof(NBC_Schedule)); - if (NULL == schedule) { printf("Error in malloc() (%i)\n", res); return res; } + res = ompi_datatype_type_extent(recvtype, &rcvext); + if (MPI_SUCCESS != res) { + NBC_Error("MPI Error in ompi_datatype_type_extent() (%i)", res); + return res; + } - handle->tmpbuf=NULL; + rsize = ompi_comm_remote_size (comm); - res = NBC_Sched_create(schedule); - if(res != NBC_OK) { printf("Error in NBC_Sched_create (%i)\n", res); return res; } + schedule = OBJ_NEW(NBC_Schedule); + if (OPAL_UNLIKELY(NULL == schedule)) { + return OMPI_ERR_OUT_OF_RESOURCE; + } - for (i = 0; i < rsize; i++) { + for (int i = 0; i < rsize; i++) { /* post all sends */ - if(sendcounts[i] != 0) { - sbuf = ((char *) sendbuf) + (sdispls[i] * sndext); - res = NBC_Sched_send(sbuf, false, sendcounts[i], sendtype, i, schedule); - if (NBC_OK != res) { printf("Error in NBC_Sched_send() (%i)\n", res); return res; } + if (sendcounts[i] != 0) { + char *sbuf = (char *) sendbuf + sdispls[i] * sndext; + res = NBC_Sched_send (sbuf, false, sendcounts[i], sendtype, i, schedule, false); + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + OBJ_RELEASE(schedule); + return res; + } } /* post all receives */ - if(recvcounts[i] != 0) { - rbuf = ((char *) recvbuf) + (rdispls[i] * rcvext); - res = NBC_Sched_recv(rbuf, false, recvcounts[i], recvtype, i, schedule); - if (NBC_OK != res) { printf("Error in NBC_Sched_recv() (%i)\n", res); return res; } + if (recvcounts[i] != 0) { + char *rbuf = (char *) recvbuf + rdispls[i] * rcvext; + res = NBC_Sched_recv (rbuf, false, recvcounts[i], recvtype, i, schedule, false); + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + OBJ_RELEASE(schedule); + return res; + } } } - /*NBC_PRINT_SCHED(*schedule);*/ - res = NBC_Sched_commit(schedule); - if (NBC_OK != res) { printf("Error in NBC_Sched_commit() (%i)\n", res); return res; } + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + OBJ_RELEASE(schedule); + return res; + } + + res = NBC_Init_handle(comm, &handle, libnbc_module); + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + OBJ_RELEASE(schedule); + return res; + } res = NBC_Start(handle, schedule); - if (NBC_OK != res) { printf("Error in NBC_Start() (%i)\n", res); return res; } + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + NBC_Return_handle (handle); + OBJ_RELEASE(schedule); + return res; + } + + *request = (ompi_request_t *) handle; + + return OMPI_SUCCESS; +} + +static inline int a2av_sched_linear(int rank, int p, NBC_Schedule *schedule, + const void *sendbuf, const int *sendcounts, const int *sdispls, + MPI_Aint sndext, MPI_Datatype sendtype, + void *recvbuf, const int *recvcounts, const int *rdispls, + MPI_Aint rcvext, MPI_Datatype recvtype) { + int res; + + for (int i = 0 ; i < p ; ++i) { + if (i == rank) { + continue; + } + + /* post send */ + if (sendcounts[i] != 0) { + char *sbuf = ((char *) sendbuf) + (sdispls[i] * sndext); + res = NBC_Sched_send(sbuf, false, sendcounts[i], sendtype, i, schedule, false); + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + return res; + } + } + + /* post receive */ + if (recvcounts[i] != 0) { + char *rbuf = ((char *) recvbuf) + (rdispls[i] * rcvext); + res = NBC_Sched_recv(rbuf, false, recvcounts[i], recvtype, i, schedule, false); + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + return res; + } + } + } + + return OMPI_SUCCESS; +} + +static inline int a2av_sched_pairwise(int rank, int p, NBC_Schedule *schedule, + const void *sendbuf, const int *sendcounts, const int *sdispls, + MPI_Aint sndext, MPI_Datatype sendtype, + void *recvbuf, const int *recvcounts, const int *rdispls, + MPI_Aint rcvext, MPI_Datatype recvtype) { + int res; + + for (int i = 1 ; i < p ; ++i) { + int sndpeer = (rank + i) % p; + int rcvpeer = (rank + p - i) %p; + + /* post send */ + if (sendcounts[sndpeer] != 0) { + char *sbuf = ((char *) sendbuf) + (sdispls[sndpeer] * sndext); + res = NBC_Sched_send(sbuf, false, sendcounts[sndpeer], sendtype, sndpeer, schedule, false); + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + return res; + } + } + + /* post receive */ + if (recvcounts[rcvpeer] != 0) { + char *rbuf = ((char *) recvbuf) + (rdispls[rcvpeer] * rcvext); + res = NBC_Sched_recv(rbuf, false, recvcounts[rcvpeer], recvtype, rcvpeer, schedule, true); + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + return res; + } + } + } + + return OMPI_SUCCESS; +} + +static inline int a2av_sched_inplace(int rank, int p, NBC_Schedule *schedule, + void *buf, const int *counts, const int *displs, + MPI_Aint ext, MPI_Datatype type, ptrdiff_t gap) { + int res; + + for (int i = 1; i < (p+1)/2; i++) { + int speer = (rank + i) % p; + int rpeer = (rank + p - i) % p; + char *sbuf = (char *) buf + displs[speer] * ext; + char *rbuf = (char *) buf + displs[rpeer] * ext; + + if (0 != counts[rpeer]) { + res = NBC_Sched_copy (rbuf, false, counts[rpeer], type, + (void *)(-gap), true, counts[rpeer], type, + schedule, true); + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + return res; + } + } + if (0 != counts[speer]) { + res = NBC_Sched_send (sbuf, false , counts[speer], type, speer, schedule, false); + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + return res; + } + } + if (0 != counts[rpeer]) { + res = NBC_Sched_recv (rbuf, false , counts[rpeer], type, rpeer, schedule, true); + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + return res; + } + } + + if (0 != counts[rpeer]) { + res = NBC_Sched_send ((void *)(-gap), true, counts[rpeer], type, rpeer, schedule, false); + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + return res; + } + } + if (0 != counts[speer]) { + res = NBC_Sched_recv (sbuf, false, counts[speer], type, speer, schedule, true); + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + return res; + } + } + } + if (0 == (p%2)) { + int peer = (rank + p/2) % p; + + char *tbuf = (char *) buf + displs[peer] * ext; + res = NBC_Sched_copy (tbuf, false, counts[peer], type, + (void *)(-gap), true, counts[peer], type, + schedule, true); + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + return res; + } + res = NBC_Sched_send ((void *)(-gap), true , counts[peer], type, peer, schedule, false); + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + return res; + } + res = NBC_Sched_recv (tbuf, false , counts[peer], type, peer, schedule, true); + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + return res; + } + } - return NBC_OK; + return OMPI_SUCCESS; } diff --git a/ompi/mca/coll/libnbc/nbc_ialltoallw.c b/ompi/mca/coll/libnbc/nbc_ialltoallw.c index e3fe6ab42e0..ec29a3a355f 100644 --- a/ompi/mca/coll/libnbc/nbc_ialltoallw.c +++ b/ompi/mca/coll/libnbc/nbc_ialltoallw.c @@ -1,138 +1,322 @@ +/* -*- Mode: C; c-basic-offset:2 ; indent-tabs-mode:nil -*- */ /* * Copyright (c) 2006 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. * Copyright (c) 2006 The Technical University of Chemnitz. All * rights reserved. - * Copyright (c) 2014 Research Organization for Information Science + * Copyright (c) 2014-2016 Research Organization for Information Science * and Technology (RIST). All rights reserved. + * Copyright (c) 2015 Los Alamos National Security, LLC. All rights + * reserved. * * Author(s): Torsten Hoefler * */ #include "nbc_internal.h" +static inline int a2aw_sched_linear(int rank, int p, NBC_Schedule *schedule, + const void *sendbuf, const int *sendcounts, const int *sdispls, + struct ompi_datatype_t * const * sendtypes, + void *recvbuf, const int *recvcounts, const int *rdispls, + struct ompi_datatype_t * const * recvtypes); + +static inline int a2aw_sched_pairwise(int rank, int p, NBC_Schedule *schedule, + const void *sendbuf, const int *sendcounts, const int *sdispls, + struct ompi_datatype_t * const * sendtypes, + void *recvbuf, const int *recvcounts, const int *rdispls, + struct ompi_datatype_t * const * recvtypes); + +static inline int a2aw_sched_inplace(int rank, int p, NBC_Schedule *schedule, + void *buf, const int *counts, const int *displs, + struct ompi_datatype_t * const * types); + /* an alltoallw schedule can not be cached easily because the contents * ot the recvcounts array may change, so a comparison of the address * would not be sufficient ... we simply do not cache it */ /* simple linear Alltoallw */ -int ompi_coll_libnbc_ialltoallw(void* sendbuf, int *sendcounts, int *sdispls, - MPI_Datatype sendtypes[], void* recvbuf, int *recvcounts, int *rdispls, - MPI_Datatype recvtypes[], struct ompi_communicator_t *comm, ompi_request_t ** request, +int ompi_coll_libnbc_ialltoallw(const void* sendbuf, const int *sendcounts, const int *sdispls, + struct ompi_datatype_t * const *sendtypes, void* recvbuf, const int *recvcounts, const int *rdispls, + struct ompi_datatype_t * const *recvtypes, struct ompi_communicator_t *comm, ompi_request_t ** request, struct mca_coll_base_module_2_1_0_t *module) { - int rank, p, res, i; + int rank, p, res; NBC_Schedule *schedule; char *rbuf, *sbuf, inplace; + ptrdiff_t span=0; NBC_Handle *handle; - ompi_coll_libnbc_request_t **coll_req = (ompi_coll_libnbc_request_t**) request; ompi_coll_libnbc_module_t *libnbc_module = (ompi_coll_libnbc_module_t*) module; NBC_IN_PLACE(sendbuf, recvbuf, inplace); - res = NBC_Init_handle(comm, coll_req, libnbc_module); - if(res != NBC_OK) { printf("Error in NBC_Init_handle(%i)\n", res); return res; } - handle = (*coll_req); - res = MPI_Comm_rank(comm, &rank); - if (MPI_SUCCESS != res) { printf("MPI Error in MPI_Comm_rank() (%i)\n", res); return res; } - res= MPI_Comm_size(comm, &p); - if (MPI_SUCCESS != res) { printf("MPI Error in MPI_Comm_size() (%i)\n", res); return res; } - - schedule = (NBC_Schedule*)malloc(sizeof(NBC_Schedule)); - if (NULL == schedule) { printf("Error in malloc() (%i)\n", res); return res; } + rank = ompi_comm_rank (comm); + p = ompi_comm_size (comm); - handle->tmpbuf=NULL; - - res = NBC_Sched_create(schedule); - if(res != NBC_OK) { printf("Error in NBC_Sched_create (%i)\n", res); return res; } + res = NBC_Init_handle (comm, &handle, libnbc_module); + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + return res; + } /* copy data to receivbuffer */ - if((sendcounts[rank] != 0) && !inplace) { - rbuf = ((char *) recvbuf) + rdispls[rank]; - sbuf = ((char *) sendbuf) + sdispls[rank]; + if (inplace) { + ptrdiff_t lgap, lspan; + for (int i = 0; i < p; i++) { + lspan = opal_datatype_span(&recvtypes[i]->super, recvcounts[i], &lgap); + if (lspan > span) { + span = lspan; + } + } + handle->tmpbuf = malloc(span); + if (OPAL_UNLIKELY(NULL == handle->tmpbuf)) { + NBC_Return_handle (handle); + return OMPI_ERR_OUT_OF_RESOURCE; + } + sendcounts = recvcounts; + sdispls = rdispls; + sendtypes = recvtypes; + } else if (sendcounts[rank] != 0) { + rbuf = (char *) recvbuf + rdispls[rank]; + sbuf = (char *) sendbuf + sdispls[rank]; res = NBC_Copy(sbuf, sendcounts[rank], sendtypes[rank], rbuf, recvcounts[rank], recvtypes[rank], comm); - if (NBC_OK != res) { printf("Error in NBC_Copy() (%i)\n", res); return res; } + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + return res; + } } - for (i = 0; i < p; i++) { - if (i == rank) { continue; } - /* post all sends */ - if(sendcounts[i] != 0) { - sbuf = ((char *) sendbuf) + sdispls[i]; - res = NBC_Sched_send(sbuf, false, sendcounts[i], sendtypes[i], i, schedule); - if (NBC_OK != res) { printf("Error in NBC_Sched_send() (%i)\n", res); return res; } - } - /* post all receives */ - if(recvcounts[i] != 0) { - rbuf = ((char *) recvbuf) + rdispls[i]; - res = NBC_Sched_recv(rbuf, false, recvcounts[i], recvtypes[i], i, schedule); - if (NBC_OK != res) { printf("Error in NBC_Sched_recv() (%i)\n", res); return res; } - } + schedule = OBJ_NEW(NBC_Schedule); + if (OPAL_UNLIKELY(NULL == schedule)) { + NBC_Return_handle (handle); + return OMPI_ERR_OUT_OF_RESOURCE; + } + + if (inplace) { + res = a2aw_sched_inplace(rank, p, schedule, recvbuf, + recvcounts, rdispls, recvtypes); + } else { + res = a2aw_sched_linear(rank, p, schedule, + sendbuf, sendcounts, sdispls, sendtypes, + recvbuf, recvcounts, rdispls, recvtypes); + } + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + NBC_Return_handle (handle); + OBJ_RELEASE(schedule); + return res; } - /*NBC_PRINT_SCHED(*schedule);*/ + res = NBC_Sched_commit (schedule); + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + NBC_Return_handle (handle); + OBJ_RELEASE(schedule); + return res; + } - res = NBC_Sched_commit(schedule); - if (NBC_OK != res) { printf("Error in NBC_Sched_commit() (%i)\n", res); return res; } + res = NBC_Start (handle, schedule); + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + NBC_Return_handle (handle); + return res; + } - res = NBC_Start(handle, schedule); - if (NBC_OK != res) { printf("Error in NBC_Start() (%i)\n", res); return res; } + *request = (ompi_request_t *) handle; - return NBC_OK; -} + return OMPI_SUCCESS; +} /* simple linear Alltoallw */ -int ompi_coll_libnbc_ialltoallw_inter (void* sendbuf, int *sendcounts, int *sdispls, - MPI_Datatype sendtypes[], void* recvbuf, int *recvcounts, int *rdispls, - MPI_Datatype recvtypes[], struct ompi_communicator_t *comm, ompi_request_t ** request, +int ompi_coll_libnbc_ialltoallw_inter (const void* sendbuf, const int *sendcounts, const int *sdispls, + struct ompi_datatype_t * const *sendtypes, void* recvbuf, const int *recvcounts, const int *rdispls, + struct ompi_datatype_t * const *recvtypes, struct ompi_communicator_t *comm, ompi_request_t ** request, struct mca_coll_base_module_2_1_0_t *module) { - int rank, res, i, rsize; + int res, rsize; NBC_Schedule *schedule; char *rbuf, *sbuf; NBC_Handle *handle; - ompi_coll_libnbc_request_t **coll_req = (ompi_coll_libnbc_request_t**) request; ompi_coll_libnbc_module_t *libnbc_module = (ompi_coll_libnbc_module_t*) module; - res = NBC_Init_handle(comm, coll_req, libnbc_module); - if(res != NBC_OK) { printf("Error in NBC_Init_handle(%i)\n", res); return res; } - handle = (*coll_req); - res = MPI_Comm_rank(comm, &rank); - if (MPI_SUCCESS != res) { printf("MPI Error in MPI_Comm_rank() (%i)\n", res); return res; } + rsize = ompi_comm_remote_size (comm); - MPI_Comm_remote_size (comm, &rsize); + schedule = OBJ_NEW(NBC_Schedule); + if (OPAL_UNLIKELY(NULL == schedule)) { + return OMPI_ERR_OUT_OF_RESOURCE; + } - schedule = (NBC_Schedule*)malloc(sizeof(NBC_Schedule)); - if (NULL == schedule) { printf("Error in malloc() (%i)\n", res); return res; } + for (int i = 0 ; i < rsize ; ++i) { + /* post all sends */ + if (sendcounts[i] != 0) { + sbuf = (char *) sendbuf + sdispls[i]; + res = NBC_Sched_send (sbuf, false, sendcounts[i], sendtypes[i], i, schedule, false); + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + OBJ_RELEASE(schedule); + return res; + } + } + /* post all receives */ + if (recvcounts[i] != 0) { + rbuf = (char *) recvbuf + rdispls[i]; + res = NBC_Sched_recv (rbuf, false, recvcounts[i], recvtypes[i], i, schedule, false); + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + OBJ_RELEASE(schedule); + return res; + } + } + } - handle->tmpbuf=NULL; + res = NBC_Sched_commit (schedule); + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + OBJ_RELEASE(schedule); + return res; + } - res = NBC_Sched_create(schedule); - if(res != NBC_OK) { printf("Error in NBC_Sched_create (%i)\n", res); return res; } + res = NBC_Init_handle (comm, &handle, libnbc_module); + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + OBJ_RELEASE(schedule); + return res; + } - for (i = 0; i < rsize; i++) { - /* post all sends */ - if(sendcounts[i] != 0) { - sbuf = ((char *) sendbuf) + sdispls[i]; - res = NBC_Sched_send(sbuf, false, sendcounts[i], sendtypes[i], i, schedule); - if (NBC_OK != res) { printf("Error in NBC_Sched_send() (%i)\n", res); return res; } + res = NBC_Start (handle, schedule); + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + NBC_Return_handle (handle); + return res; + } + + *request = (ompi_request_t *) handle; + + return OMPI_SUCCESS; +} + +static inline int a2aw_sched_linear(int rank, int p, NBC_Schedule *schedule, + const void *sendbuf, const int *sendcounts, const int *sdispls, + struct ompi_datatype_t * const * sendtypes, + void *recvbuf, const int *recvcounts, const int *rdispls, + struct ompi_datatype_t * const * recvtypes) { + int res; + + for (int i = 0; i < p; i++) { + if (i == rank) { + continue; } - /* post all receives */ - if(recvcounts[i] != 0) { - rbuf = ((char *) recvbuf) + rdispls[i]; - res = NBC_Sched_recv(rbuf, false, recvcounts[i], recvtypes[i], i, schedule); - if (NBC_OK != res) { printf("Error in NBC_Sched_recv() (%i)\n", res); return res; } + + /* post send */ + if (sendcounts[i] != 0) { + char *sbuf = (char *) sendbuf + sdispls[i]; + res = NBC_Sched_send (sbuf, false, sendcounts[i], sendtypes[i], i, schedule, false); + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + return res; + } + } + /* post receive */ + if (recvcounts[i] != 0) { + char *rbuf = (char *) recvbuf + rdispls[i]; + res = NBC_Sched_recv (rbuf, false, recvcounts[i], recvtypes[i], i, schedule, false); + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + return res; + } } } - /*NBC_PRINT_SCHED(*schedule);*/ + return OMPI_SUCCESS; +} - res = NBC_Sched_commit(schedule); - if (NBC_OK != res) { printf("Error in NBC_Sched_commit() (%i)\n", res); return res; } +static inline int a2aw_sched_pairwise(int rank, int p, NBC_Schedule *schedule, + const void *sendbuf, const int *sendcounts, const int *sdispls, + struct ompi_datatype_t * const * sendtypes, + void *recvbuf, const int *recvcounts, const int *rdispls, + struct ompi_datatype_t * const * recvtypes) { + int res; + + for (int i = 1; i < p; i++) { + int sndpeer = (rank + i) % p; + int rcvpeer = (rank + p - i) % p; + + /* post send */ + if (sendcounts[sndpeer] != 0) { + char *sbuf = (char *) sendbuf + sdispls[sndpeer]; + res = NBC_Sched_send (sbuf, false, sendcounts[sndpeer], sendtypes[sndpeer], sndpeer, schedule, false); + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + return res; + } + } + /* post receive */ + if (recvcounts[rcvpeer] != 0) { + char *rbuf = (char *) recvbuf + rdispls[rcvpeer]; + res = NBC_Sched_recv (rbuf, false, recvcounts[rcvpeer], recvtypes[rcvpeer], rcvpeer, schedule, true); + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + return res; + } + } + } + + return OMPI_SUCCESS; +} - res = NBC_Start(handle, schedule); - if (NBC_OK != res) { printf("Error in NBC_Start() (%i)\n", res); return res; } +static inline int a2aw_sched_inplace(int rank, int p, NBC_Schedule *schedule, + void *buf, const int *counts, const int *displs, + struct ompi_datatype_t * const * types) { + ptrdiff_t gap; + int res; + + for (int i = 1; i < (p+1)/2; i++) { + int speer = (rank + i) % p; + int rpeer = (rank + p - i) % p; + char *sbuf = (char *) buf + displs[speer]; + char *rbuf = (char *) buf + displs[rpeer]; + + if (0 != counts[rpeer]) { + (void)opal_datatype_span(&types[rpeer]->super, counts[rpeer], &gap); + res = NBC_Sched_copy (rbuf, false, counts[rpeer], types[rpeer], + (void *)(-gap), true, counts[rpeer], types[rpeer], + schedule, true); + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + return res; + } + } + if (0 != counts[speer]) { + res = NBC_Sched_send (sbuf, false , counts[speer], types[speer], speer, schedule, false); + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + return res; + } + } + if (0 != counts[rpeer]) { + res = NBC_Sched_recv (rbuf, false , counts[rpeer], types[rpeer], rpeer, schedule, true); + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + return res; + } + } + + if (0 != counts[rpeer]) { + res = NBC_Sched_send ((void *)(-gap), true, counts[rpeer], types[rpeer], rpeer, schedule, false); + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + return res; + } + } + if (0 != counts[speer]) { + res = NBC_Sched_recv (sbuf, false, counts[speer], types[speer], speer, schedule, true); + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + return res; + } + } + } + if (0 == (p%2)) { + int peer = (rank + p/2) % p; + + char *tbuf = (char *) buf + displs[peer]; + (void)opal_datatype_span(&types[peer]->super, counts[peer], &gap); + res = NBC_Sched_copy (tbuf, false, counts[peer], types[peer], + (void *)(-gap), true, counts[peer], types[peer], + schedule, true); + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + return res; + } + res = NBC_Sched_send ((void *)(-gap), true , counts[peer], types[peer], peer, schedule, false); + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + return res; + } + res = NBC_Sched_recv (tbuf, false , counts[peer], types[peer], peer, schedule, true); + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + return res; + } + } - return NBC_OK; + return OMPI_SUCCESS; } diff --git a/ompi/mca/coll/libnbc/nbc_ibarrier.c b/ompi/mca/coll/libnbc/nbc_ibarrier.c index 4016f323bdf..9d2c2f2c450 100644 --- a/ompi/mca/coll/libnbc/nbc_ibarrier.c +++ b/ompi/mca/coll/libnbc/nbc_ibarrier.c @@ -1,13 +1,15 @@ +/* -*- Mode: C; c-basic-offset:2 ; indent-tabs-mode:nil -*- */ /* * Copyright (c) 2006 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. * Copyright (c) 2006 The Technical University of Chemnitz. All * rights reserved. - * Copyright (c) 2013 Los Alamos National Security, LLC. All rights + * Copyright (c) 2013-2015 Los Alamos National Security, LLC. All rights * reserved. * Copyright (c) 2014 Research Organization for Information Science * and Technology (RIST). All rights reserved. + * Copyright (c) 2015 Mellanox Technologies. All rights reserved. * * Author(s): Torsten Hoefler * @@ -18,137 +20,174 @@ int ompi_coll_libnbc_ibarrier(struct ompi_communicator_t *comm, ompi_request_t ** request, struct mca_coll_base_module_2_1_0_t *module) { - int round, rank, p, maxround, res, recvpeer, sendpeer; + int rank, p, maxround, res, recvpeer, sendpeer; NBC_Schedule *schedule; NBC_Handle *handle; - ompi_coll_libnbc_request_t **coll_req = (ompi_coll_libnbc_request_t**) request; ompi_coll_libnbc_module_t *libnbc_module = (ompi_coll_libnbc_module_t*) module; - res = NBC_Init_handle(comm, coll_req, libnbc_module); - if(res != NBC_OK) { printf("Error in NBC_Init_handle(%i)\n", res); return res; } - handle = (*coll_req); + rank = ompi_comm_rank (comm); + p = ompi_comm_size (comm); - res = MPI_Comm_rank(comm, &rank); - if (MPI_SUCCESS != res) { printf("MPI Error in MPI_Comm_rank() (%i)\n", res); return res; } - res = MPI_Comm_size(comm, &p); - if (MPI_SUCCESS != res) { printf("MPI Error in MPI_Comm_size() (%i)\n", res); return res; } - - handle->tmpbuf=(void*)malloc(2*sizeof(char)); + res = NBC_Init_handle(comm, &handle, libnbc_module); + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + return res; + } #ifdef NBC_CACHE_SCHEDULE /* there only one argument set per communicator -> hang it directly at * the tree-position, NBC_Dict_size[...] is 0 for not initialized and * 1 for initialized. NBC_Dict[...] is a pointer to the schedule in * this case */ - if(handle->comminfo->NBC_Dict_size[NBC_BARRIER] == 0) { + if (libnbc_module->NBC_Dict_size[NBC_BARRIER] == 0) { /* we did not init it yet */ #endif - schedule = (NBC_Schedule*)malloc(sizeof(NBC_Schedule)); - if (NULL == schedule) { printf("Error in malloc()\n"); return res; } + schedule = OBJ_NEW(NBC_Schedule); + if (OPAL_UNLIKELY(NULL == schedule)) { + NBC_Return_handle (handle); + return OMPI_ERR_OUT_OF_RESOURCE; + } - round = -1; - res = NBC_Sched_create(schedule); - if(res != NBC_OK) { printf("Error in NBC_Sched_create (%i)\n", res); return res; } + /* ensure the schedule is released with the handle on error */ + handle->schedule = schedule; maxround = (int)ceil((log((double)p)/LOG2)-1); - do { - round++; - sendpeer = (rank + (1<comminfo->NBC_Dict[NBC_BARRIER] = (hb_tree*)schedule; - handle->comminfo->NBC_Dict_size[NBC_BARRIER] = 1; + libnbc_module->NBC_Dict[NBC_BARRIER] = (hb_tree *) schedule; + libnbc_module->NBC_Dict_size[NBC_BARRIER] = 1; } else { /* we found it */ - schedule = (NBC_Schedule*)handle->comminfo->NBC_Dict[NBC_BARRIER]; + handle->schedule = schedule = (NBC_Schedule *) libnbc_module->NBC_Dict[NBC_BARRIER]; } + OBJ_RETAIN(schedule); #endif - res = NBC_Start(handle, schedule); - if (NBC_OK != res) { printf("Error in NBC_Start() (%i)\n", res); return res; } + res = NBC_Start (handle, schedule); + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + NBC_Return_handle (handle); + return OMPI_ERR_OUT_OF_RESOURCE; + } - return NBC_OK; + *request = (ompi_request_t *) handle; + + return OMPI_SUCCESS; } int ompi_coll_libnbc_ibarrier_inter(struct ompi_communicator_t *comm, ompi_request_t ** request, struct mca_coll_base_module_2_1_0_t *module) { - int rank, res, rsize, peer; + int rank, res, rsize; NBC_Schedule *schedule; NBC_Handle *handle; - ompi_coll_libnbc_request_t **coll_req = (ompi_coll_libnbc_request_t**) request; ompi_coll_libnbc_module_t *libnbc_module = (ompi_coll_libnbc_module_t*) module; - res = NBC_Init_handle(comm, coll_req, libnbc_module); - if(res != NBC_OK) { printf("Error in NBC_Init_handle(%i)\n", res); return res; } - handle = (*coll_req); - - res = MPI_Comm_rank(comm, &rank); - if (MPI_SUCCESS != res) { printf("MPI Error in MPI_Comm_rank() (%i)\n", res); return res; } - res = MPI_Comm_remote_size(comm, &rsize); - if (MPI_SUCCESS != res) { printf("MPI Error in MPI_Comm_remote_size() (%i)\n", res); return res; } + rank = ompi_comm_rank (comm); + rsize = ompi_comm_remote_size (comm); - handle->tmpbuf=(void*)malloc(2*sizeof(char)); + res = NBC_Init_handle(comm, &handle, libnbc_module); + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + return res; + } - schedule = (NBC_Schedule*)malloc(sizeof(NBC_Schedule)); - if (NULL == schedule) { printf("Error in malloc()\n"); return res; } + schedule = OBJ_NEW(NBC_Schedule); + if (OPAL_UNLIKELY(NULL == schedule)) { + NBC_Return_handle (handle); + return OMPI_ERR_OUT_OF_RESOURCE; + } - res = NBC_Sched_create(schedule); - if(res != NBC_OK) { printf("Error in NBC_Sched_create (%i)\n", res); return res; } + /* ensure the schedule is released with the handle on error */ + handle->schedule = schedule; if (0 == rank) { - for (peer = 1 ; peer < rsize ; ++peer) { - res = NBC_Sched_recv (0, true, 1, MPI_BYTE, peer, schedule); - if (NBC_OK != res) { printf("Error in NBC_Sched_recv() (%i)\n", res); return res; } + for (int peer = 1 ; peer < rsize ; ++peer) { + res = NBC_Sched_recv (NULL, false, 0, MPI_BYTE, peer, schedule, false); + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + NBC_Return_handle (handle); + return OMPI_ERR_OUT_OF_RESOURCE; + } } } /* synchronize with the remote root */ - res = NBC_Sched_recv (0, true, 1, MPI_BYTE, 0, schedule); - if (NBC_OK != res) { printf("Error in NBC_Sched_recv() (%i)\n", res); return res; } + res = NBC_Sched_recv (NULL, false, 0, MPI_BYTE, 0, schedule, false); + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + NBC_Return_handle (handle); + return OMPI_ERR_OUT_OF_RESOURCE; + } - res = NBC_Sched_send (0, true, 1, MPI_BYTE, 0, schedule); - if (NBC_OK != res) { printf("Error in NBC_Sched_send() (%i)\n", res); return res; } + res = NBC_Sched_send (NULL, false, 0, MPI_BYTE, 0, schedule, false); + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + NBC_Return_handle (handle); + return OMPI_ERR_OUT_OF_RESOURCE; + } if (0 == rank) { /* wait for the remote root */ - res = NBC_Sched_barrier(schedule); - if (NBC_OK != res) { printf("Error in NBC_Sched_barrier() (%i)\n", res); return res; } + res = NBC_Sched_barrier (schedule); + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + NBC_Return_handle (handle); + return OMPI_ERR_OUT_OF_RESOURCE; + } /* inform remote peers that all local peers have entered the barrier */ - for (peer = 0 ; peer < rsize ; ++peer) { - res = NBC_Sched_send (0, true, 1, MPI_BYTE, peer, schedule); - if (NBC_OK != res) { printf("Error in NBC_Sched_send() (%i)\n", res); return res; } + for (int peer = 1; peer < rsize ; ++peer) { + res = NBC_Sched_send (NULL, false, 0, MPI_BYTE, peer, schedule, false); + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + NBC_Return_handle (handle); + return OMPI_ERR_OUT_OF_RESOURCE; + } } } - res = NBC_Sched_commit(schedule); - if (NBC_OK != res) { printf("Error in NBC_Sched_commit() (%i)\n", res); return res; } + res = NBC_Sched_commit (schedule); + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + NBC_Return_handle (handle); + return OMPI_ERR_OUT_OF_RESOURCE; + } + + res = NBC_Start (handle, schedule); + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + NBC_Return_handle (handle); + return OMPI_ERR_OUT_OF_RESOURCE; + } - res = NBC_Start(handle, schedule); - if (NBC_OK != res) { printf("Error in NBC_Start() (%i)\n", res); return res; } + *request = (ompi_request_t *) handle; - return NBC_OK; + return OMPI_SUCCESS; } diff --git a/ompi/mca/coll/libnbc/nbc_ibcast.c b/ompi/mca/coll/libnbc/nbc_ibcast.c index dc56090090a..840e6cdce9b 100644 --- a/ompi/mca/coll/libnbc/nbc_ibcast.c +++ b/ompi/mca/coll/libnbc/nbc_ibcast.c @@ -1,91 +1,106 @@ +/* -*- Mode: C; c-basic-offset:2 ; indent-tabs-mode:nil -*- */ /* - * Copyright (c) 2006 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2006 The Technical University of Chemnitz. All - * rights reserved. - * Copyright (c) 2014 Research Organization for Information Science + * Copyright (c) 2006 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2006 The Technical University of Chemnitz. All + * rights reserved. + * Copyright (c) 2014-2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. + * Copyright (c) 2015 Los Alamos National Security, LLC. All rights + * reserved. + * Copyright (c) 2016 IBM Corporation. All rights reserved. * * Author(s): Torsten Hoefler * */ #include "nbc_internal.h" -static inline int bcast_sched_binomial(int rank, int p, int root, NBC_Schedule *schedule, void *buffer, int count, MPI_Datatype datatype); -static inline int bcast_sched_linear(int rank, int p, int root, NBC_Schedule *schedule, void *buffer, int count, MPI_Datatype datatype); -static inline int bcast_sched_chain(int rank, int p, int root, NBC_Schedule *schedule, void *buffer, int count, MPI_Datatype datatype, int fragsize, int size); +static inline int bcast_sched_binomial(int rank, int p, int root, NBC_Schedule *schedule, void *buffer, int count, + MPI_Datatype datatype); +static inline int bcast_sched_linear(int rank, int p, int root, NBC_Schedule *schedule, void *buffer, int count, + MPI_Datatype datatype); +static inline int bcast_sched_chain(int rank, int p, int root, NBC_Schedule *schedule, void *buffer, int count, + MPI_Datatype datatype, int fragsize, size_t size); #ifdef NBC_CACHE_SCHEDULE /* tree comparison function for schedule cache */ int NBC_Bcast_args_compare(NBC_Bcast_args *a, NBC_Bcast_args *b, void *param) { - - if( (a->buffer == b->buffer) && - (a->count == b->count) && + if ((a->buffer == b->buffer) && + (a->count == b->count) && (a->datatype == b->datatype) && (a->root == b->root) ) { - return 0; + return 0; } - if( a->buffer < b->buffer ) { + + if( a->buffer < b->buffer ) { return -1; - } - return +1; + } + + return 1; } #endif int ompi_coll_libnbc_ibcast(void *buffer, int count, MPI_Datatype datatype, int root, struct ompi_communicator_t *comm, ompi_request_t ** request, - struct mca_coll_base_module_2_1_0_t *module) + struct mca_coll_base_module_2_1_0_t *module) { - int rank, p, res, size, segsize; + int rank, p, res, segsize; + size_t size; NBC_Schedule *schedule; #ifdef NBC_CACHE_SCHEDULE NBC_Bcast_args *args, *found, search; #endif enum { NBC_BCAST_LINEAR, NBC_BCAST_BINOMIAL, NBC_BCAST_CHAIN } alg; NBC_Handle *handle; - ompi_coll_libnbc_request_t **coll_req = (ompi_coll_libnbc_request_t**) request; ompi_coll_libnbc_module_t *libnbc_module = (ompi_coll_libnbc_module_t*) module; - res = NBC_Init_handle(comm, coll_req, libnbc_module); - if(res != NBC_OK) { printf("Error in NBC_Init_handle(%i)\n", res); return res; } - handle = (*coll_req); - res = MPI_Comm_rank(comm, &rank); - if (MPI_SUCCESS != res) { printf("MPI Error in MPI_Comm_rank() (%i)\n", res); return res; } - res = MPI_Comm_size(comm, &p); - if (MPI_SUCCESS != res) { printf("MPI Error in MPI_Comm_rank() (%i)\n", res); return res; } - res = MPI_Type_size(datatype, &size); - if (MPI_SUCCESS != res) { printf("MPI Error in MPI_Type_size() (%i)\n", res); return res; } - + rank = ompi_comm_rank (comm); + p = ompi_comm_size (comm); + + res = ompi_datatype_type_size(datatype, &size); + if (MPI_SUCCESS != res) { + NBC_Error("MPI Error in ompi_datatype_type_size() (%i)", res); + return res; + } + segsize = 16384; /* algorithm selection */ - if(p <= 4) { - alg = NBC_BCAST_LINEAR; - } else if(size*count < 65536) { - alg = NBC_BCAST_BINOMIAL; - } else if(size*count < 524288) { - alg = NBC_BCAST_CHAIN; - segsize = 16384/2; - } else { - alg = NBC_BCAST_CHAIN; - segsize = 65536/2; + if( libnbc_ibcast_skip_dt_decision ) { + if (p <= 4) { + alg = NBC_BCAST_LINEAR; + } + else { + alg = NBC_BCAST_BINOMIAL; + } + } + else { + if (p <= 4) { + alg = NBC_BCAST_LINEAR; + } else if (size * count < 65536) { + alg = NBC_BCAST_BINOMIAL; + } else if (size * count < 524288) { + alg = NBC_BCAST_CHAIN; + segsize = 8192; + } else { + alg = NBC_BCAST_CHAIN; + segsize = 32768; + } } - - handle->tmpbuf=NULL; #ifdef NBC_CACHE_SCHEDULE /* search schedule in communicator specific tree */ - search.buffer=buffer; - search.count=count; - search.datatype=datatype; - search.root=root; - found = (NBC_Bcast_args*)hb_tree_search((hb_tree*)handle->comminfo->NBC_Dict[NBC_BCAST], &search); - if(found == NULL) { + search.buffer = buffer; + search.count = count; + search.datatype = datatype; + search.root = root; + found = (NBC_Bcast_args *) hb_tree_search ((hb_tree *) libnbc_module->NBC_Dict[NBC_BCAST], &search); + if (NULL == found) { #endif - schedule = (NBC_Schedule*)malloc(sizeof(NBC_Schedule)); - - res = NBC_Sched_create(schedule); - if(res != NBC_OK) { printf("Error in NBC_Sched_create, res = %i\n", res); return res; } + schedule = OBJ_NEW(NBC_Schedule); + if (OPAL_UNLIKELY(NULL == schedule)) { + return OMPI_ERR_OUT_OF_RESOURCE; + } switch(alg) { case NBC_BCAST_LINEAR: @@ -98,40 +113,69 @@ int ompi_coll_libnbc_ibcast(void *buffer, int count, MPI_Datatype datatype, int res = bcast_sched_chain(rank, p, root, schedule, buffer, count, datatype, segsize, size); break; } - if (NBC_OK != res) { printf("Error in Schedule creation() (%i)\n", res); return res; } - - res = NBC_Sched_commit(schedule); - if (NBC_OK != res) { printf("Error in NBC_Sched_commit() (%i)\n", res); return res; } + + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + OBJ_RELEASE(schedule); + return res; + } + + res = NBC_Sched_commit (schedule); + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + OBJ_RELEASE(schedule); + return res; + } + #ifdef NBC_CACHE_SCHEDULE /* save schedule to tree */ - args = (NBC_Bcast_args*)malloc(sizeof(NBC_Bcast_args)); - args->buffer=buffer; - args->count=count; - args->datatype=datatype; - args->root=root; - args->schedule=schedule; - res = hb_tree_insert ((hb_tree*)handle->comminfo->NBC_Dict[NBC_BCAST], args, args, 0); - if(res != 0) printf("error in dict_insert() (%i)\n", res); - /* increase number of elements for A2A */ - if(++handle->comminfo->NBC_Dict_size[NBC_BCAST] > NBC_SCHED_DICT_UPPER) { - NBC_SchedCache_dictwipe((hb_tree*)handle->comminfo->NBC_Dict[NBC_BCAST], &handle->comminfo->NBC_Dict_size[NBC_BCAST]); + args = (NBC_Bcast_args *) malloc (sizeof (args)); + if (NULL != args) { + args->buffer = buffer; + args->count = count; + args->datatype = datatype; + args->root = root; + args->schedule = schedule; + res = hb_tree_insert ((hb_tree *) libnbc_module->NBC_Dict[NBC_BCAST], args, args, 0); + if (0 == res) { + OBJ_RETAIN (schedule); + + /* increase number of elements for A2A */ + if (++libnbc_module->NBC_Dict_size[NBC_BCAST] > NBC_SCHED_DICT_UPPER) { + NBC_SchedCache_dictwipe ((hb_tree *) libnbc_module->NBC_Dict[NBC_BCAST], + &libnbc_module->NBC_Dict_size[NBC_BCAST]); + } + } else { + NBC_Error("error in dict_insert() (%i)", res); + free (args); + } } } else { /* found schedule */ - schedule=found->schedule; + schedule = found->schedule; + OBJ_RETAIN(schedule); } #endif - - res = NBC_Start(handle, schedule); - if (NBC_OK != res) { printf("Error in NBC_Start() (%i)\n", res); return res; } - - return NBC_OK; + + res = NBC_Init_handle (comm, &handle, libnbc_module); + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + OBJ_RELEASE(schedule); + return res; + } + + res = NBC_Start (handle, schedule); + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + NBC_Return_handle (handle); + return res; + } + + *request = (ompi_request_t *) handle; + + return OMPI_SUCCESS; } -/* better binomial bcast +/* better binomial bcast * working principle: * - each node gets a virtual rank vrank - * - the 'root' node get vrank 0 + * - the 'root' node get vrank 0 * - node 0 gets the vrank of the 'root' * - all other ranks stay identical (they do not matter) * @@ -139,7 +183,7 @@ int ompi_coll_libnbc_ibcast(void *buffer, int count, MPI_Datatype datatype, int * - each node with vrank > 2^r and vrank < 2^r+1 receives from node * vrank - 2^r (vrank=1 receives from 0, vrank 0 receives never) * - each node sends each round r to node vrank + 2^r - * - a node stops to send if 2^r > commsize + * - a node stops to send if 2^r > commsize */ #define RANK2VRANK(rank, vrank, root) \ { \ @@ -154,101 +198,128 @@ int ompi_coll_libnbc_ibcast(void *buffer, int count, MPI_Datatype datatype, int if (vrank == root) rank = 0; \ } static inline int bcast_sched_binomial(int rank, int p, int root, NBC_Schedule *schedule, void *buffer, int count, MPI_Datatype datatype) { - int maxr, vrank, peer, r, res; - + int maxr, vrank, peer, res; + maxr = (int)ceil((log((double)p)/LOG2)); RANK2VRANK(rank, vrank, root); /* receive from the right hosts */ - if(vrank != 0) { - for(r=0; r= (1<= (1 << r)) && (vrank < (1 << (r + 1)))) { + VRANK2RANK(peer, vrank - (1 << r), root); + res = NBC_Sched_recv (buffer, false, count, datatype, peer, schedule, false); + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + return res; + } } } - res = NBC_Sched_barrier(schedule); - if (NBC_OK != res) { printf("Error in NBC_Sched_barrier() (%i)\n", res); return res; } + + res = NBC_Sched_barrier (schedule); + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + return res; + } } /* now send to the right hosts */ - for(r=0; r * @@ -15,54 +18,60 @@ int ompi_coll_libnbc_ibcast_inter(void *buffer, int count, MPI_Datatype datatype, int root, struct ompi_communicator_t *comm, ompi_request_t ** request, struct mca_coll_base_module_2_1_0_t *module) { - int rank, p, res, size, peer; + int res; NBC_Schedule *schedule; NBC_Handle *handle; - ompi_coll_libnbc_request_t **coll_req = (ompi_coll_libnbc_request_t**) request; ompi_coll_libnbc_module_t *libnbc_module = (ompi_coll_libnbc_module_t*) module; - res = NBC_Init_handle(comm, coll_req, libnbc_module); - if(res != NBC_OK) { printf("Error in NBC_Init_handle(%i)\n", res); return res; } - handle = (*coll_req); - res = MPI_Comm_rank(comm, &rank); - if (MPI_SUCCESS != res) { printf("MPI Error in MPI_Comm_rank() (%i)\n", res); return res; } - res = MPI_Comm_size(comm, &p); - if (MPI_SUCCESS != res) { printf("MPI Error in MPI_Comm_rank() (%i)\n", res); return res; } - res = MPI_Type_size(datatype, &size); - if (MPI_SUCCESS != res) { printf("MPI Error in MPI_Type_size() (%i)\n", res); return res; } - - handle->tmpbuf=NULL; - - schedule = (NBC_Schedule*)malloc(sizeof(NBC_Schedule)); - - res = NBC_Sched_create(schedule); - if(res != NBC_OK) { printf("Error in NBC_Sched_create, res = %i\n", res); return res; } + schedule = OBJ_NEW(NBC_Schedule); + if (OPAL_UNLIKELY(NULL == schedule)) { + return OMPI_ERR_OUT_OF_RESOURCE; + } - if(root != MPI_PROC_NULL) { + if (root != MPI_PROC_NULL) { /* send to all others */ - if(root == MPI_ROOT) { + if (root == MPI_ROOT) { int remsize; - res = MPI_Comm_remote_size(comm, &remsize); - if(MPI_SUCCESS != res) { printf("MPI_Comm_remote_size() failed\n"); return res; } + remsize = ompi_comm_remote_size (comm); - for (peer=0;peer @@ -18,18 +18,19 @@ #ifdef NBC_CACHE_SCHEDULE /* tree comparison function for schedule cache */ int NBC_Scan_args_compare(NBC_Scan_args *a, NBC_Scan_args *b, void *param) { - - if( (a->sendbuf == b->sendbuf) && + if ((a->sendbuf == b->sendbuf) && (a->recvbuf == b->recvbuf) && (a->count == b->count) && (a->datatype == b->datatype) && (a->op == b->op) ) { - return 0; + return 0; } + if( a->sendbuf < b->sendbuf ) { return -1; } - return +1; + + return 1; } #endif @@ -40,125 +41,154 @@ int NBC_Scan_args_compare(NBC_Scan_args *a, NBC_Scan_args *b, void *param) { * 3. all but rank p-1 do sends to it's right neigbor and exits * */ -int ompi_coll_libnbc_iexscan(void* sendbuf, void* recvbuf, int count, MPI_Datatype datatype, MPI_Op op, +int ompi_coll_libnbc_iexscan(const void* sendbuf, void* recvbuf, int count, MPI_Datatype datatype, MPI_Op op, struct ompi_communicator_t *comm, ompi_request_t ** request, struct mca_coll_base_module_2_1_0_t *module) { int rank, p, res; - MPI_Aint ext; + ptrdiff_t gap, span; NBC_Schedule *schedule; #ifdef NBC_CACHE_SCHEDULE NBC_Scan_args *args, *found, search; #endif char inplace; NBC_Handle *handle; - ompi_coll_libnbc_request_t **coll_req = (ompi_coll_libnbc_request_t**) request; ompi_coll_libnbc_module_t *libnbc_module = (ompi_coll_libnbc_module_t*) module; NBC_IN_PLACE(sendbuf, recvbuf, inplace); - res = NBC_Init_handle(comm, coll_req, libnbc_module); - if (res != NBC_OK) { printf("Error in NBC_Init_handle(%i)\n", res); return res; } - - handle = (*coll_req); - - res = MPI_Comm_rank(comm, &rank); - if (MPI_SUCCESS != res) { printf("MPI Error in MPI_Comm_rank() (%i)\n", res); return res; } - res = MPI_Comm_size(comm, &p); - if (MPI_SUCCESS != res) { printf("MPI Error in MPI_Comm_size() (%i)\n", res); return res; } - res = MPI_Type_extent(datatype, &ext); - if (MPI_SUCCESS != res) { printf("MPI Error in MPI_Type_extent() (%i)\n", res); return res; } + rank = ompi_comm_rank (comm); + p = ompi_comm_size (comm); - if (inplace && rank < p - 1) - /* need more buffer space for the inplace case */ - handle->tmpbuf = malloc(ext * count * 2); - else - handle->tmpbuf = malloc(ext * count); + res = NBC_Init_handle(comm, &handle, libnbc_module); + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + return res; + } - if (handle->tmpbuf == NULL) { printf("Error in malloc()\n"); return NBC_OOR; } + span = opal_datatype_span(&datatype->super, count, &gap); + if (0 < rank) { + handle->tmpbuf = malloc(span); + if (handle->tmpbuf == NULL) { + NBC_Return_handle (handle); + return OMPI_ERR_OUT_OF_RESOURCE; + } + if (inplace) { + res = NBC_Copy(recvbuf, count, datatype, (char *)handle->tmpbuf-gap, count, datatype, comm); + } else { + res = NBC_Copy(sendbuf, count, datatype, (char *)handle->tmpbuf-gap, count, datatype, comm); + } + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + NBC_Return_handle (handle); + return res; + } + } #ifdef NBC_CACHE_SCHEDULE - fprintf (stderr, "NBC_CACHE_SCHEDULE\n"); /* search schedule in communicator specific tree */ - search.sendbuf=sendbuf; - search.recvbuf=recvbuf; - search.count=count; - search.datatype=datatype; - search.op=op; - found = (NBC_Scan_args*)hb_tree_search((hb_tree*)handle->comminfo->NBC_Dict[NBC_EXSCAN], &search); - if (found == NULL) { + search.sendbuf = sendbuf; + search.recvbuf = recvbuf; + search.count = count; + search.datatype = datatype; + search.op = op; + found = (NBC_Scan_args *) hb_tree_search ((hb_tree *) libnbc_module->NBC_Dict[NBC_EXSCAN], &search); + if (NULL == found) { #endif - schedule = (NBC_Schedule*)malloc(sizeof(NBC_Schedule)); - if (NULL == schedule) { printf("Error in malloc()\n"); return res; } + schedule = OBJ_NEW(NBC_Schedule); + if (OPAL_UNLIKELY(NULL == schedule)) { + NBC_Return_handle (handle); + return OMPI_ERR_OUT_OF_RESOURCE; + } - res = NBC_Sched_create(schedule); - if (res != NBC_OK) { printf("Error in NBC_Sched_create (%i)\n", res); return res; } + /* make sure the schedule is released with the handle on error */ + handle->schedule = schedule; if (rank != 0) { - if (inplace && rank < p - 1) - /* if sendbuf == recvbuf do not clobber the send buffer until it has been combined - * with the incoming data. */ - res = NBC_Sched_recv((void *)(ext * count), true, count, datatype, rank-1, schedule); - else - res = NBC_Sched_recv(recvbuf, false, count, datatype, rank-1, schedule); + res = NBC_Sched_recv (recvbuf, false, count, datatype, rank-1, schedule, false); - if (NBC_OK != res) { free(handle->tmpbuf); printf("Error in NBC_Sched_recv() (%i)\n", res); return res; } + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + NBC_Return_handle (handle); + return res; + } if (rank < p - 1) { /* we have to wait until we have the data */ res = NBC_Sched_barrier(schedule); - if (NBC_OK != res) { free(handle->tmpbuf); printf("Error in NBC_Sched_barrier() (%i)\n", res); return res; } + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + NBC_Return_handle (handle); + return res; + } - /* perform the reduce in my temporary buffer */ - if (inplace) - res = NBC_Sched_op(0, true, sendbuf, false, (void *)(ext * count), true, count, datatype, op, schedule); - else - res = NBC_Sched_op(0, true, sendbuf, false, recvbuf, false, count, datatype, op, schedule); - if (NBC_OK != res) { free(handle->tmpbuf); printf("Error in NBC_Sched_op() (%i)\n", res); return res; } + res = NBC_Sched_op (recvbuf, false, (void *)(-gap), true, count, + datatype, op, schedule, true); - /* this cannot be done until handle->tmpbuf is unused :-( */ - res = NBC_Sched_barrier(schedule); - if (NBC_OK != res) { free(handle->tmpbuf); printf("Error in NBC_Sched_barrier() (%i)\n", res); return res; } + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + NBC_Return_handle (handle); + return res; + } /* send reduced data onward */ - res = NBC_Sched_send(0, true, count, datatype, rank + 1, schedule); - if (NBC_OK != res) { free(handle->tmpbuf); printf("Error in NBC_Sched_send() (%i)\n", res); return res; } - - if (inplace) - /* copy the received data into the receive buffer */ - NBC_Sched_copy ((void *)(ext * count), true, count, datatype, recvbuf, false, count, datatype, schedule); + res = NBC_Sched_send ((void *)(-gap), true, count, datatype, rank + 1, schedule, false); + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + NBC_Return_handle (handle); + return res; + } } } else if (p > 1) { - res = NBC_Sched_send(sendbuf, false, count, datatype, 1, schedule); - if (NBC_OK != res) { free(handle->tmpbuf); printf("Error in NBC_Sched_send() (%i)\n", res); return res; } + if (inplace) { + res = NBC_Sched_send (recvbuf, false, count, datatype, 1, schedule, false); + } else { + res = NBC_Sched_send (sendbuf, false, count, datatype, 1, schedule, false); + } + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + NBC_Return_handle (handle); + return res; + } } res = NBC_Sched_commit(schedule); - if (NBC_OK != res) { free(handle->tmpbuf); printf("Error in NBC_Sched_commit() (%i)\n", res); return res; } + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + NBC_Return_handle (handle); + return res; + } #ifdef NBC_CACHE_SCHEDULE /* save schedule to tree */ - args = (NBC_Scan_args*)malloc(sizeof(NBC_Alltoall_args)); - args->sendbuf=sendbuf; - args->recvbuf=recvbuf; - args->count=count; - args->datatype=datatype; - args->op=op; - args->schedule=schedule; - res = hb_tree_insert ((hb_tree*)handle->comminfo->NBC_Dict[NBC_EXSCAN], args, args, 0); - if(res != 0) printf("error in dict_insert() (%i)\n", res); - /* increase number of elements for A2A */ - if(++handle->comminfo->NBC_Dict_size[NBC_EXSCAN] > NBC_SCHED_DICT_UPPER) { - NBC_SchedCache_dictwipe((hb_tree*)handle->comminfo->NBC_Dict[NBC_EXSCAN], &handle->comminfo->NBC_Dict_size[NBC_EXSCAN]); + args = (NBC_Scan_args *) malloc (sizeof (args)); + if (NULL != args) { + args->sendbuf = sendbuf; + args->recvbuf = recvbuf; + args->count = count; + args->datatype = datatype; + args->op = op; + args->schedule = schedule; + res = hb_tree_insert ((hb_tree *) libnbc_module->NBC_Dict[NBC_EXSCAN], args, args, 0); + if (0 == res) { + OBJ_RETAIN(schedule); + + /* increase number of elements for A2A */ + if (++libnbc_module->NBC_Dict_size[NBC_EXSCAN] > NBC_SCHED_DICT_UPPER) { + NBC_SchedCache_dictwipe ((hb_tree *) libnbc_module->NBC_Dict[NBC_EXSCAN], + &libnbc_module->NBC_Dict_size[NBC_EXSCAN]); + } + } else { + NBC_Error("error in dict_insert() (%i)", res); + free (args); + } } } else { /* found schedule */ - schedule=found->schedule; + schedule = found->schedule; + OBJ_RETAIN(schedule); } #endif - res = NBC_Start(handle, schedule); - if (NBC_OK != res) { free(handle->tmpbuf); printf("Error in NBC_Start() (%i)\n", res); return res; } + res = NBC_Start (handle, schedule); + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + NBC_Return_handle (handle); + return res; + } + + *request = (ompi_request_t *) handle; /* tmpbuf is freed with the handle */ - return NBC_OK; + return OMPI_SUCCESS; } diff --git a/ompi/mca/coll/libnbc/nbc_igather.c b/ompi/mca/coll/libnbc/nbc_igather.c index de63f8fd2f1..edb334a7ea1 100644 --- a/ompi/mca/coll/libnbc/nbc_igather.c +++ b/ompi/mca/coll/libnbc/nbc_igather.c @@ -1,3 +1,4 @@ +/* -*- Mode: C; c-basic-offset:2 ; indent-tabs-mode:nil -*- */ /* * Copyright (c) 2006 The Trustees of Indiana University and Indiana * University Research and Technology @@ -7,8 +8,10 @@ * Copyright (c) 2013 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2014 Research Organization for Information Science + * Copyright (c) 2014-2016 Research Organization for Information Science * and Technology (RIST). All rights reserved. + * Copyright (c) 2015 Los Alamos National Security, LLC. All rights + * reserved. * * Author(s): Torsten Hoefler * @@ -18,180 +21,224 @@ #ifdef NBC_CACHE_SCHEDULE /* tree comparison function for schedule cache */ int NBC_Gather_args_compare(NBC_Gather_args *a, NBC_Gather_args *b, void *param) { - - if( (a->sendbuf == b->sendbuf) && + if ((a->sendbuf == b->sendbuf) && (a->sendcount == b->sendcount) && (a->sendtype == b->sendtype) && (a->recvbuf == b->recvbuf) && (a->recvcount == b->recvcount) && (a->recvtype == b->recvtype) && - (a->root == b->root) ) { - return 0; + (a->root == b->root)) { + return 0; } - if( a->sendbuf < b->sendbuf ) { + + if( a->sendbuf < b->sendbuf ) { return -1; - } - return +1; + } + + return 1; } #endif -int ompi_coll_libnbc_igather(void* sendbuf, int sendcount, MPI_Datatype sendtype, void* recvbuf, int recvcount, - MPI_Datatype recvtype, int root, struct ompi_communicator_t *comm, ompi_request_t ** request, +int ompi_coll_libnbc_igather(const void* sendbuf, int sendcount, MPI_Datatype sendtype, void* recvbuf, + int recvcount, MPI_Datatype recvtype, int root, + struct ompi_communicator_t *comm, ompi_request_t ** request, struct mca_coll_base_module_2_1_0_t *module) { - int rank, p, res, i; + int rank, p, res; MPI_Aint rcvext = 0; NBC_Schedule *schedule; - char *rbuf, inplace; -#ifdef NBC_CACHE_SCHEDULE - NBC_Gather_args *args, *found, search; -#endif + char *rbuf, inplace = 0; NBC_Handle *handle; - ompi_coll_libnbc_request_t **coll_req = (ompi_coll_libnbc_request_t**) request; ompi_coll_libnbc_module_t *libnbc_module = (ompi_coll_libnbc_module_t*) module; - NBC_IN_PLACE(sendbuf, recvbuf, inplace); + rank = ompi_comm_rank (comm); + if (root == rank) { + NBC_IN_PLACE(sendbuf, recvbuf, inplace); + } + p = ompi_comm_size (comm); - res = NBC_Init_handle(comm, coll_req, libnbc_module); - if(res != NBC_OK) { printf("Error in NBC_Init_handle(%i)\n", res); return res; } - handle = (*coll_req); - res = MPI_Comm_rank(comm, &rank); - if (MPI_SUCCESS != res) { printf("MPI Error in MPI_Comm_rank() (%i)\n", res); return res; } - res = MPI_Comm_size(comm, &p); - if (MPI_SUCCESS != res) { printf("MPI Error in MPI_Comm_size() (%i)\n", res); return res; } if (rank == root) { - res = MPI_Type_extent(recvtype, &rcvext); - if (MPI_SUCCESS != res) { printf("MPI Error in MPI_Type_extent() (%i)\n", res); return res; } + res = ompi_datatype_type_extent (recvtype, &rcvext); + if (MPI_SUCCESS != res) { + NBC_Error("MPI Error in ompi_datatype_type_extent() (%i)", res); + return res; + } } - handle->tmpbuf = NULL; if (inplace) { - sendcount = recvcount; - sendtype = recvtype; + sendcount = recvcount; + sendtype = recvtype; } else if (rank == root) { rbuf = ((char *)recvbuf) + (rank*recvcount*rcvext); /* if I am the root - just copy the message (only without MPI_IN_PLACE) */ res = NBC_Copy(sendbuf, sendcount, sendtype, rbuf, recvcount, recvtype, comm); - if (NBC_OK != res) { printf("Error in NBC_Copy() (%i)\n", res); return res; } + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + return res; + } } #ifdef NBC_CACHE_SCHEDULE + NBC_Gather_args *args, *found, search; + /* search schedule in communicator specific tree */ - search.sendbuf=sendbuf; - search.sendcount=sendcount; - search.sendtype=sendtype; - search.recvbuf=recvbuf; - search.recvcount=recvcount; - search.recvtype=recvtype; - search.root=root; - found = (NBC_Gather_args*)hb_tree_search((hb_tree*)handle->comminfo->NBC_Dict[NBC_GATHER], &search); - if(found == NULL) { + search.sendbuf = sendbuf; + search.sendcount = sendcount; + search.sendtype = sendtype; + search.recvbuf = recvbuf; + search.recvcount = recvcount; + search.recvtype = recvtype; + search.root = root; + found = (NBC_Gather_args *) hb_tree_search ((hb_tree *) libnbc_module->NBC_Dict[NBC_GATHER], + &search); + if (NULL == found) { #endif - schedule = (NBC_Schedule*)malloc(sizeof(NBC_Schedule)); - if (NULL == schedule) { printf("Error in malloc() (%i)\n", res); return res; } - - res = NBC_Sched_create(schedule); - if(res != NBC_OK) { printf("Error in NBC_Sched_create (%i)\n", res); return res; } + schedule = OBJ_NEW(NBC_Schedule); + if (OPAL_UNLIKELY(NULL == schedule)) { + return OMPI_ERR_OUT_OF_RESOURCE; + } /* send to root */ - if(rank != root) { + if (rank != root) { /* send msg to root */ - res = NBC_Sched_send(sendbuf, false, sendcount, sendtype, root, schedule); - if (NBC_OK != res) { printf("Error in NBC_Sched_send() (%i)\n", res); return res; } + res = NBC_Sched_send(sendbuf, false, sendcount, sendtype, root, schedule, false); + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + OBJ_RELEASE(schedule); + return res; + } } else { - for(i=0;isendbuf=sendbuf; - args->sendcount=sendcount; - args->sendtype=sendtype; - args->recvbuf=recvbuf; - args->recvcount=recvcount; - args->recvtype=recvtype; - args->root=root; - args->schedule=schedule; - res = hb_tree_insert ((hb_tree*)handle->comminfo->NBC_Dict[NBC_GATHER], args, args, 0); - if(res != 0) printf("error in dict_insert() (%i)\n", res); - /* increase number of elements for A2A */ - if(++handle->comminfo->NBC_Dict_size[NBC_GATHER] > NBC_SCHED_DICT_UPPER) { - NBC_SchedCache_dictwipe((hb_tree*)handle->comminfo->NBC_Dict[NBC_GATHER], &handle->comminfo->NBC_Dict_size[NBC_GATHER]); + args = (NBC_Gather_args *) malloc (sizeof (args)); + if (NULL != args) { + args->sendbuf = sendbuf; + args->sendcount = sendcount; + args->sendtype = sendtype; + args->recvbuf = recvbuf; + args->recvcount = recvcount; + args->recvtype = recvtype; + args->root = root; + args->schedule = schedule; + res = hb_tree_insert ((hb_tree *) libnbc_module->NBC_Dict[NBC_GATHER], args, args, 0); + if (0 == res) { + OBJ_RETAIN(schedule); + + /* increase number of elements for A2A */ + if (++libnbc_module->NBC_Dict_size[NBC_GATHER] > NBC_SCHED_DICT_UPPER) { + NBC_SchedCache_dictwipe ((hb_tree *) libnbc_module->NBC_Dict[NBC_GATHER], + &libnbc_module->NBC_Dict_size[NBC_GATHER]); + } + } else { + NBC_Error("error in dict_insert() (%i)", res); + free (args); + } } } else { /* found schedule */ - schedule=found->schedule; + schedule = found->schedule; + OBJ_RETAIN(schedule); } #endif - res = NBC_Start(handle, schedule); - if (NBC_OK != res) { printf("Error in NBC_Start() (%i)\n", res); return res; } + res = NBC_Init_handle (comm, &handle, libnbc_module); + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + OBJ_RELEASE(schedule); + return res; + } + + res = NBC_Start (handle, schedule); + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + NBC_Return_handle (handle); + return res; + } + + *request = (ompi_request_t *) handle; - return NBC_OK; + return OMPI_SUCCESS; } -int ompi_coll_libnbc_igather_inter (void* sendbuf, int sendcount, MPI_Datatype sendtype, void* recvbuf, int recvcount, - MPI_Datatype recvtype, int root, struct ompi_communicator_t *comm, ompi_request_t ** request, +int ompi_coll_libnbc_igather_inter (const void* sendbuf, int sendcount, MPI_Datatype sendtype, void* recvbuf, + int recvcount, MPI_Datatype recvtype, int root, + struct ompi_communicator_t *comm, ompi_request_t ** request, struct mca_coll_base_module_2_1_0_t *module) { - int rank, p, res, i, rsize; + int res, rsize; MPI_Aint rcvext = 0; NBC_Schedule *schedule; char *rbuf; NBC_Handle *handle; - ompi_coll_libnbc_request_t **coll_req = (ompi_coll_libnbc_request_t**) request; ompi_coll_libnbc_module_t *libnbc_module = (ompi_coll_libnbc_module_t*) module; - res = NBC_Init_handle(comm, coll_req, libnbc_module); - if(res != NBC_OK) { printf("Error in NBC_Init_handle(%i)\n", res); return res; } - handle = (*coll_req); - res = MPI_Comm_rank(comm, &rank); - if (MPI_SUCCESS != res) { printf("MPI Error in MPI_Comm_rank() (%i)\n", res); return res; } - res = MPI_Comm_size(comm, &p); - if (MPI_SUCCESS != res) { printf("MPI Error in MPI_Comm_size() (%i)\n", res); return res; } - res = MPI_Comm_remote_size (comm, &rsize); - if (MPI_SUCCESS != res) { printf("MPI Error in MPI_Comm_remote_size() (%i)\n", res); return res; } + rsize = ompi_comm_remote_size (comm); if (root == MPI_ROOT) { - res = MPI_Type_extent(recvtype, &rcvext); - if (MPI_SUCCESS != res) { printf("MPI Error in MPI_Type_extent() (%i)\n", res); return res; } + res = ompi_datatype_type_extent(recvtype, &rcvext); + if (MPI_SUCCESS != res) { + NBC_Error("MPI Error in ompi_datatype_type_extent() (%i)", res); + return res; + } } - handle->tmpbuf = NULL; - schedule = (NBC_Schedule*)malloc(sizeof(NBC_Schedule)); - if (NULL == schedule) { printf("Error in malloc() (%i)\n", res); return res; } - - res = NBC_Sched_create(schedule); - if(res != NBC_OK) { printf("Error in NBC_Sched_create (%i)\n", res); return res; } + schedule = OBJ_NEW(NBC_Schedule); + if (OPAL_UNLIKELY(NULL == schedule)) { + return OMPI_ERR_OUT_OF_RESOURCE; + } /* send to root */ - if(root != MPI_ROOT && root != MPI_PROC_NULL) { + if (root != MPI_ROOT && root != MPI_PROC_NULL) { /* send msg to root */ - res = NBC_Sched_send(sendbuf, false, sendcount, sendtype, root, schedule); - if (NBC_OK != res) { printf("Error in NBC_Sched_send() (%i)\n", res); return res; } + res = NBC_Sched_send (sendbuf, false, sendcount, sendtype, root, schedule, false); + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + OBJ_RELEASE(schedule); + return res; + } } else if (MPI_ROOT == root) { - for (i = 0 ; i < rsize ; ++i) { + for (int i = 0 ; i < rsize ; ++i) { rbuf = ((char *)recvbuf) + (i * recvcount * rcvext); /* root receives message to the right buffer */ - res = NBC_Sched_recv(rbuf, false, recvcount, recvtype, i, schedule); - if (NBC_OK != res) { printf("Error in NBC_Sched_recv() (%i)\n", res); return res; } + res = NBC_Sched_recv (rbuf, false, recvcount, recvtype, i, schedule, false); + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + OBJ_RELEASE(schedule); + return res; + } } } - res = NBC_Sched_commit(schedule); - if (NBC_OK != res) { printf("Error in NBC_Sched_commit() (%i)\n", res); return res; } + res = NBC_Sched_commit (schedule); + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + OBJ_RELEASE(schedule); + return res; + } + + res = NBC_Init_handle (comm, &handle, libnbc_module); + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + OBJ_RELEASE(schedule); + return res; + } + + res = NBC_Start (handle, schedule); + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + NBC_Return_handle (handle); + return res; + } - res = NBC_Start(handle, schedule); - if (NBC_OK != res) { printf("Error in NBC_Start() (%i)\n", res); return res; } + *request = (ompi_request_t *) handle; - return NBC_OK; + return OMPI_SUCCESS; } diff --git a/ompi/mca/coll/libnbc/nbc_igatherv.c b/ompi/mca/coll/libnbc/nbc_igatherv.c index 3a8157a6585..1b55002c4b2 100644 --- a/ompi/mca/coll/libnbc/nbc_igatherv.c +++ b/ompi/mca/coll/libnbc/nbc_igatherv.c @@ -1,3 +1,4 @@ +/* -*- Mode: C; c-basic-offset:2 ; indent-tabs-mode:nil -*- */ /* * Copyright (c) 2006 The Trustees of Indiana University and Indiana * University Research and Technology @@ -7,8 +8,11 @@ * Copyright (c) 2013 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2014 Research Organization for Information Science + * Copyright (c) 2014-2016 Research Organization for Information Science * and Technology (RIST). All rights reserved. + * Copyright (c) 2015 Los Alamos National Security, LLC. All rights + * reserved. + * Copyright (c) 2015 Mellanox Technologies. All rights reserved. * * Author(s): Torsten Hoefler * @@ -20,122 +24,156 @@ * would not be sufficient ... we simply do not cache it */ -int ompi_coll_libnbc_igatherv(void* sendbuf, int sendcount, MPI_Datatype sendtype, - void* recvbuf, int *recvcounts, int *displs, MPI_Datatype recvtype, +int ompi_coll_libnbc_igatherv(const void* sendbuf, int sendcount, MPI_Datatype sendtype, + void* recvbuf, const int *recvcounts, const int *displs, MPI_Datatype recvtype, int root, struct ompi_communicator_t *comm, ompi_request_t ** request, struct mca_coll_base_module_2_1_0_t *module) { - int rank, p, res, i; + int rank, p, res; MPI_Aint rcvext = 0; NBC_Schedule *schedule; - char *rbuf, inplace; + char *rbuf, inplace = 0; NBC_Handle *handle; - ompi_coll_libnbc_request_t **coll_req = (ompi_coll_libnbc_request_t**) request; ompi_coll_libnbc_module_t *libnbc_module = (ompi_coll_libnbc_module_t*) module; - NBC_IN_PLACE(sendbuf, recvbuf, inplace); + rank = ompi_comm_rank (comm); + if (root == rank) { + NBC_IN_PLACE(sendbuf, recvbuf, inplace); + } + p = ompi_comm_size (comm); - res = NBC_Init_handle(comm, coll_req, libnbc_module); - if(res != NBC_OK) { printf("Error in NBC_Init_handle(%i)\n", res); return res; } - handle = (*coll_req); - res = MPI_Comm_rank(comm, &rank); - if (MPI_SUCCESS != res) { printf("MPI Error in MPI_Comm_rank() (%i)\n", res); return res; } - res = MPI_Comm_size(comm, &p); - if (MPI_SUCCESS != res) { printf("MPI Error in MPI_Comm_size() (%i)\n", res); return res; } if (rank == root) { - res = MPI_Type_extent(recvtype, &rcvext); - if (MPI_SUCCESS != res) { printf("MPI Error in MPI_Type_extent() (%i)\n", res); return res; } + res = ompi_datatype_type_extent(recvtype, &rcvext); + if (MPI_SUCCESS != res) { + NBC_Error("MPI Error in ompi_datatype_type_extent() (%i)", res); + return res; + } } - handle->tmpbuf = NULL; - schedule = (NBC_Schedule*)malloc(sizeof(NBC_Schedule)); - if (NULL == schedule) { printf("Error in malloc() (%i)\n", res); return res; } - res = NBC_Sched_create(schedule); - if(res != NBC_OK) { printf("Error in NBC_Sched_create (%i)\n", res); return res; } + schedule = OBJ_NEW(NBC_Schedule); + if (OPAL_UNLIKELY(NULL == schedule)) { + return OMPI_ERR_OUT_OF_RESOURCE; + } /* send to root */ - if(rank != root) { + if (rank != root) { /* send msg to root */ - res = NBC_Sched_send(sendbuf, false, sendcount, sendtype, root, schedule); - if (NBC_OK != res) { printf("Error in NBC_Sched_send() (%i)\n", res); return res; } + res = NBC_Sched_send (sendbuf, false, sendcount, sendtype, root, schedule, false); + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + OBJ_RELEASE(schedule); + return res; + } } else { - for(i=0;itmpbuf = NULL; - schedule = (NBC_Schedule*)malloc(sizeof(NBC_Schedule)); - if (NULL == schedule) { printf("Error in malloc() (%i)\n", res); return res; } - - res = NBC_Sched_create(schedule); - if(res != NBC_OK) { printf("Error in NBC_Sched_create (%i)\n", res); return res; } + schedule = OBJ_NEW(NBC_Schedule); + if (OPAL_UNLIKELY(NULL == schedule)) { + return OMPI_ERR_OUT_OF_RESOURCE; + } /* send to root */ if (MPI_ROOT != root && MPI_PROC_NULL != root) { /* send msg to root */ - res = NBC_Sched_send(sendbuf, false, sendcount, sendtype, root, schedule); - if (NBC_OK != res) { printf("Error in NBC_Sched_send() (%i)\n", res); return res; } + res = NBC_Sched_send (sendbuf, false, sendcount, sendtype, root, schedule, false); + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + OBJ_RELEASE(schedule); + return res; + } } else if (MPI_ROOT == root) { - for (i = 0 ; i < rsize ; ++i) { - rbuf = ((char *)recvbuf) + (displs[i]*rcvext); + for (int i = 0 ; i < rsize ; ++i) { + rbuf = (char *) recvbuf + displs[i] * rcvext; /* root receives message to the right buffer */ - res = NBC_Sched_recv(rbuf, false, recvcounts[i], recvtype, i, schedule); - if (NBC_OK != res) { printf("Error in NBC_Sched_recv() (%i)\n", res); return res; } + res = NBC_Sched_recv (rbuf, false, recvcounts[i], recvtype, i, schedule, false); + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + OBJ_RELEASE(schedule); + return res; + } } } - res = NBC_Sched_commit(schedule); - if (NBC_OK != res) { printf("Error in NBC_Sched_commit() (%i)\n", res); return res; } + res = NBC_Sched_commit (schedule); + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + OBJ_RELEASE(schedule); + return res; + } + + res = NBC_Init_handle (comm, &handle, libnbc_module); + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + OBJ_RELEASE(schedule); + return res; + } + + res = NBC_Start (handle, schedule); + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + NBC_Return_handle (handle); + return res; + } - res = NBC_Start(handle, schedule); - if (NBC_OK != res) { printf("Error in NBC_Start() (%i)\n", res); return res; } + *request = (ompi_request_t *) handle; - return NBC_OK; + return OMPI_SUCCESS; } diff --git a/ompi/mca/coll/libnbc/nbc_ineighbor_allgather.c b/ompi/mca/coll/libnbc/nbc_ineighbor_allgather.c index 88825becc98..e9ae7b388a9 100644 --- a/ompi/mca/coll/libnbc/nbc_ineighbor_allgather.c +++ b/ompi/mca/coll/libnbc/nbc_ineighbor_allgather.c @@ -1,11 +1,14 @@ +/* -*- Mode: C; c-basic-offset:2 ; indent-tabs-mode:nil -*- */ /* * Copyright (c) 2006 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. * Copyright (c) 2006 The Technical University of Chemnitz. All * rights reserved. - * Copyright (c) 2014 Research Organization for Information Science + * Copyright (c) 2014-2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. + * Copyright (c) 2015 Los Alamos National Security, LLC. All rights + * reserved. * * Author(s): Torsten Hoefler * @@ -18,7 +21,7 @@ #ifdef NBC_CACHE_SCHEDULE /* tree comparison function for schedule cache */ int NBC_Ineighbor_allgather_args_compare(NBC_Ineighbor_allgather_args *a, NBC_Ineighbor_allgather_args *b, void *param) { - if( (a->sbuf == b->sbuf) && + if ((a->sbuf == b->sbuf) && (a->scount == b->scount) && (a->stype == b->stype) && (a->rbuf == b->rbuf) && @@ -26,140 +29,140 @@ int NBC_Ineighbor_allgather_args_compare(NBC_Ineighbor_allgather_args *a, NBC_In (a->rtype == b->rtype) ) { return 0; } + if( a->sbuf < b->sbuf ) { return -1; } - return +1; + + return 1; } #endif -int ompi_coll_libnbc_ineighbor_allgather(void *sbuf, int scount, MPI_Datatype stype, void *rbuf, +int ompi_coll_libnbc_ineighbor_allgather(const void *sbuf, int scount, MPI_Datatype stype, void *rbuf, int rcount, MPI_Datatype rtype, struct ompi_communicator_t *comm, ompi_request_t ** request, struct mca_coll_base_module_2_1_0_t *module) { - int rank, size, res, worldsize; - MPI_Aint sndext, rcvext; + int res, indegree, outdegree, *srcs, *dsts; + MPI_Aint rcvext; NBC_Handle *handle; - ompi_coll_libnbc_request_t **coll_req = (ompi_coll_libnbc_request_t**) request; ompi_coll_libnbc_module_t *libnbc_module = (ompi_coll_libnbc_module_t*) module; - - res = NBC_Init_handle(comm, coll_req, libnbc_module); - handle = *coll_req; - if(res != NBC_OK) { printf("Error in NBC_Init_handle(%i)\n", res); return res; } - res = MPI_Comm_size(comm, &size); - if (MPI_SUCCESS != res) { printf("MPI Error in MPI_Comm_size() (%i)\n", res); return res; } - res = MPI_Comm_size(MPI_COMM_WORLD, &worldsize); - if (MPI_SUCCESS != res) { printf("MPI Error in MPI_Comm_size() (%i)\n", res); return res; } - res = MPI_Comm_rank(comm, &rank); - if (MPI_SUCCESS != res) { printf("MPI Error in MPI_Comm_rank() (%i)\n", res); return res; } - - res = MPI_Type_extent(stype, &sndext); - if (MPI_SUCCESS != res) { printf("MPI Error in MPI_Type_extent() (%i)\n", res); return res; } - res = MPI_Type_extent(rtype, &rcvext); - if (MPI_SUCCESS != res) { printf("MPI Error in MPI_Type_extent() (%i)\n", res); return res; } - - char inplace; NBC_Schedule *schedule; -#ifdef NBC_CACHE_SCHEDULE - NBC_Ineighbor_allgather_args *args, *found, search; -#endif - - NBC_IN_PLACE(sbuf, rbuf, inplace); - handle->tmpbuf=NULL; + res = ompi_datatype_type_extent (rtype, &rcvext); + if (MPI_SUCCESS != res) { + NBC_Error("MPI Error in ompi_datatype_type_extent() (%i)", res); + return res; + } #ifdef NBC_CACHE_SCHEDULE + NBC_Ineighbor_allgather_args *args, *found, search; + /* search schedule in communicator specific tree */ - search.sbuf=sbuf; - search.scount=scount; - search.stype=stype; - search.rbuf=rbuf; - search.rcount=rcount; - search.rtype=rtype; - found = (NBC_Ineighbor_allgather_args*)hb_tree_search((hb_tree*)handle->comminfo->NBC_Dict[NBC_NEIGHBOR_ALLGATHER], &search); - if(found == NULL) { + search.sbuf = sbuf; + search.scount = scount; + search.stype = stype; + search.rbuf = rbuf; + search.rcount = rcount; + search.rtype = rtype; + found = (NBC_Ineighbor_allgather_args *) hb_tree_search ((hb_tree *) libnbc_module->NBC_Dict[NBC_NEIGHBOR_ALLGATHER], + &search); + if (NULL == found) { #endif - schedule = (NBC_Schedule*)malloc(sizeof(NBC_Schedule)); - - res = NBC_Sched_create(schedule); - if(res != NBC_OK) { printf("Error in NBC_Sched_create, res = %i\n", res); return res; } + schedule = OBJ_NEW(NBC_Schedule); + if (OPAL_UNLIKELY(NULL == schedule)) { + return OMPI_ERR_OUT_OF_RESOURCE; + } - { - int indegree, outdegree, weighted, *srcs, *dsts, i; - res = NBC_Comm_neighbors_count(comm, &indegree, &outdegree, &weighted); - if(res != NBC_OK) return res; + res = NBC_Comm_neighbors (comm, &srcs, &indegree, &dsts, &outdegree); + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + OBJ_RELEASE(schedule); + return res; + } - srcs = (int*)malloc(sizeof(int)*indegree); - dsts = (int*)malloc(sizeof(int)*outdegree); + for (int i = 0 ; i < indegree ; ++i) { + if (MPI_PROC_NULL != srcs[i]) { + res = NBC_Sched_recv ((char *) rbuf + i * rcount * rcvext, true, rcount, rtype, srcs[i], schedule, false); + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + break; + } + } + } - res = NBC_Comm_neighbors(comm, indegree, srcs, MPI_UNWEIGHTED, outdegree, dsts, MPI_UNWEIGHTED); - if(res != NBC_OK) return res; + free (srcs); - if(inplace) { /* we need an extra buffer to be deadlock-free */ - handle->tmpbuf = malloc(indegree*rcvext*rcount); + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + OBJ_RELEASE(schedule); + free (dsts); + return res; + } - for(i = 0; i < indegree; i++) { - if (MPI_PROC_NULL != srcs[i]) { - res = NBC_Sched_recv((char*)0+i*rcount*rcvext, true, rcount, rtype, srcs[i], schedule); - if (NBC_OK != res) { printf("Error in NBC_Sched_recv() (%i)\n", res); return res; } - } - } - for(i = 0; i < outdegree; i++) { - if (MPI_PROC_NULL != dsts[i]) { - res = NBC_Sched_send((char*)sbuf, false, scount, stype, dsts[i], schedule); - if (NBC_OK != res) { printf("Error in NBC_Sched_send() (%i)\n", res); return res; } - } - } - /* unpack from buffer */ - for(i = 0; i < indegree; i++) { - res = NBC_Sched_barrier(schedule); - if (NBC_OK != res) { printf("Error in NBC_Sched_barrier() (%i)\n", res); return res; } - res = NBC_Sched_copy((char*)0+i*rcount*rcvext, true, rcount, rtype, (char*)rbuf+i*rcount*rcvext, false, rcount, rtype, schedule); - if (NBC_OK != res) { printf("Error in NBC_Sched_copy() (%i)\n", res); return res; } - } - } else { /* non INPLACE case */ - /* simply loop over neighbors and post send/recv operations */ - for(i = 0; i < indegree; i++) { - if (MPI_PROC_NULL != srcs[i]) { - res = NBC_Sched_recv((char*)rbuf+i*rcount*rcvext, false, rcount, rtype, srcs[i], schedule); - if (NBC_OK != res) { printf("Error in NBC_Sched_recv() (%i)\n", res); return res; } - } - } - for(i = 0; i < outdegree; i++) { - if (MPI_PROC_NULL != dsts[i]) { - res = NBC_Sched_send((char*)sbuf, false, scount, stype, dsts[i], schedule); - if (NBC_OK != res) { printf("Error in NBC_Sched_send() (%i)\n", res); return res; } - } + for (int i = 0 ; i < outdegree ; ++i) { + if (MPI_PROC_NULL != dsts[i]) { + res = NBC_Sched_send ((char *) sbuf, false, scount, stype, dsts[i], schedule, false); + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + break; } } } - res = NBC_Sched_commit(schedule); - if (NBC_OK != res) { printf("Error in NBC_Sched_commit() (%i)\n", res); return res; } + free (dsts); + + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + OBJ_RELEASE(schedule); + return res; + } + + res = NBC_Sched_commit (schedule); + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + OBJ_RELEASE(schedule); + return res; + } + #ifdef NBC_CACHE_SCHEDULE /* save schedule to tree */ - args = (NBC_Ineighbor_allgather_args*)malloc(sizeof(NBC_Ineighbor_allgather_args)); - args->sbuf=sbuf; - args->scount=scount; - args->stype=stype; - args->rbuf=rbuf; - args->rcount=rcount; - args->rtype=rtype; - args->schedule=schedule; - res = hb_tree_insert ((hb_tree*)handle->comminfo->NBC_Dict[NBC_NEIGHBOR_ALLGATHER], args, args, 0); - if(res != 0) printf("error in dict_insert() (%i)\n", res); - /* increase number of elements for A2A */ - if(++handle->comminfo->NBC_Dict_size[NBC_NEIGHBOR_ALLGATHER] > NBC_SCHED_DICT_UPPER) { - NBC_SchedCache_dictwipe((hb_tree*)handle->comminfo->NBC_Dict[NBC_NEIGHBOR_ALLGATHER], &handle->comminfo->NBC_Dict_size[NBC_NEIGHBOR_ALLGATHER]); - } + args = (NBC_Ineighbor_allgather_args *) malloc (sizeof (args)); + if (NULL != args) { + args->sbuf = sbuf; + args->scount = scount; + args->stype = stype; + args->rbuf = rbuf; + args->rcount = rcount; + args->rtype = rtype; + args->schedule = schedule; + res = hb_tree_insert ((hb_tree *) libnbc_module->NBC_Dict[NBC_NEIGHBOR_ALLGATHER], args, args, 0); + if (0 == res) { + OBJ_RETAIN(schedule); + + /* increase number of elements for A2A */ + if (++libnbc_module->NBC_Dict_size[NBC_NEIGHBOR_ALLGATHER] > NBC_SCHED_DICT_UPPER) { + NBC_SchedCache_dictwipe ((hb_tree *) libnbc_module->NBC_Dict[NBC_NEIGHBOR_ALLGATHER], + &libnbc_module->NBC_Dict_size[NBC_NEIGHBOR_ALLGATHER]); + } + } else { + NBC_Error("error in dict_insert() (%i)", res); + free (args); + } } else { /* found schedule */ - schedule=found->schedule; + schedule = found->schedule; + OBJ_RETAIN(schedule); } #endif + res = NBC_Init_handle(comm, &handle, libnbc_module); + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + OBJ_RELEASE(schedule); + return res; + } + res = NBC_Start(handle, schedule); - if (NBC_OK != res) { printf("Error in NBC_Start() (%i)\n", res); return res; } + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + NBC_Return_handle (handle); + OBJ_RELEASE(schedule); + return res; + } + + *request = (ompi_request_t *) handle; - return NBC_OK; + return OMPI_SUCCESS; } diff --git a/ompi/mca/coll/libnbc/nbc_ineighbor_allgatherv.c b/ompi/mca/coll/libnbc/nbc_ineighbor_allgatherv.c index b44cd893c86..50d85ee406a 100644 --- a/ompi/mca/coll/libnbc/nbc_ineighbor_allgatherv.c +++ b/ompi/mca/coll/libnbc/nbc_ineighbor_allgatherv.c @@ -1,11 +1,14 @@ +/* -*- Mode: C; c-basic-offset:2 ; indent-tabs-mode:nil -*- */ /* * Copyright (c) 2006 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. * Copyright (c) 2006 The Technical University of Chemnitz. All * rights reserved. - * Copyright (c) 2014 Research Organization for Information Science + * Copyright (c) 2014-2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. + * Copyright (c) 2015 Los Alamos National Security, LLC. All rights + * reserved. * * Author(s): Torsten Hoefler * @@ -18,158 +21,150 @@ #ifdef NBC_CACHE_SCHEDULE /* tree comparison function for schedule cache */ int NBC_Ineighbor_allgatherv_args_compare(NBC_Ineighbor_allgatherv_args *a, NBC_Ineighbor_allgatherv_args *b, void *param) { - if( (a->sbuf == b->sbuf) && + if ((a->sbuf == b->sbuf) && (a->scount == b->scount) && (a->stype == b->stype) && (a->rbuf == b->rbuf) && (a->rcount == b->rcount) && (a->rtype == b->rtype) ) { - return 0; + return 0; } + if( a->sbuf < b->sbuf ) { return -1; } - return +1; + + return 1; } #endif -int ompi_coll_libnbc_ineighbor_allgatherv(void *sbuf, int scount, MPI_Datatype stype, void *rbuf, - int *rcounts, int *displs, MPI_Datatype rtype, +int ompi_coll_libnbc_ineighbor_allgatherv(const void *sbuf, int scount, MPI_Datatype stype, void *rbuf, + const int *rcounts, const int *displs, MPI_Datatype rtype, struct ompi_communicator_t *comm, ompi_request_t ** request, struct mca_coll_base_module_2_1_0_t *module) { - int rank, size, res, worldsize; - MPI_Aint sndext, rcvext; + int res, indegree, outdegree, *srcs, *dsts; + MPI_Aint rcvext; NBC_Handle *handle; - ompi_coll_libnbc_request_t **coll_req = (ompi_coll_libnbc_request_t**) request; ompi_coll_libnbc_module_t *libnbc_module = (ompi_coll_libnbc_module_t*) module; - - res = NBC_Init_handle(comm, coll_req, libnbc_module); - handle = *coll_req; - if(res != NBC_OK) { printf("Error in NBC_Init_handle(%i)\n", res); return res; } - res = MPI_Comm_size(comm, &size); - if (MPI_SUCCESS != res) { printf("MPI Error in MPI_Comm_size() (%i)\n", res); return res; } - res = MPI_Comm_size(MPI_COMM_WORLD, &worldsize); - if (MPI_SUCCESS != res) { printf("MPI Error in MPI_Comm_size() (%i)\n", res); return res; } - res = MPI_Comm_rank(comm, &rank); - if (MPI_SUCCESS != res) { printf("MPI Error in MPI_Comm_rank() (%i)\n", res); return res; } - - res = MPI_Type_extent(stype, &sndext); - if (MPI_SUCCESS != res) { printf("MPI Error in MPI_Type_extent() (%i)\n", res); return res; } - res = MPI_Type_extent(rtype, &rcvext); - if (MPI_SUCCESS != res) { printf("MPI Error in MPI_Type_extent() (%i)\n", res); return res; } - - char inplace; NBC_Schedule *schedule; -#ifdef NBC_CACHE_SCHEDULE - NBC_Ineighbor_allgatherv_args *args, *found, search; -#endif - - NBC_IN_PLACE(sbuf, rbuf, inplace); - handle->tmpbuf=NULL; + res = ompi_datatype_type_extent(rtype, &rcvext); + if (MPI_SUCCESS != res) { + NBC_Error("MPI Error in ompi_datatype_type_extent() (%i)", res); + return res; + } #ifdef NBC_CACHE_SCHEDULE + NBC_Ineighbor_allgatherv_args *args, *found, search; + /* search schedule in communicator specific tree */ - search.sbuf=sbuf; - search.scount=scount; - search.stype=stype; - search.rbuf=rbuf; - search.rcount=rcount; - search.rtype=rtype; - found = (NBC_Ineighbor_allgatherv_args*)hb_tree_search((hb_tree*)handle->comminfo->NBC_Dict[NBC_NEIGHBOR_ALLGATHERV], &search); - if(found == NULL) { + search.sbuf = sbuf; + search.scount = scount; + search.stype = stype; + search.rbuf = rbuf; + search.rcount = rcount; + search.rtype = rtype; + found = (NBC_Ineighbor_allgatherv_args *) hb_tree_search ((hb_tree *) libnbc_module->NBC_Dict[NBC_NEIGHBOR_ALLGATHERV], + &search); + if (NULL == found) { #endif - schedule = (NBC_Schedule*)malloc(sizeof(NBC_Schedule)); - - res = NBC_Sched_create(schedule); - if(res != NBC_OK) { printf("Error in NBC_Sched_create, res = %i\n", res); return res; } - - { - int indegree, outdegree, weighted, *srcs, *dsts, i; - res = NBC_Comm_neighbors_count(comm, &indegree, &outdegree, &weighted); - if(res != NBC_OK) return res; - - srcs = (int*)malloc(sizeof(int)*indegree); - dsts = (int*)malloc(sizeof(int)*outdegree); - - res = NBC_Comm_neighbors(comm, indegree, srcs, MPI_UNWEIGHTED, outdegree, dsts, MPI_UNWEIGHTED); - if(res != NBC_OK) return res; - - if(inplace) { /* we need an extra buffer to be deadlock-free */ - int sumrcounts=0; - int offset=0; - for(i=0; itmpbuf = malloc(rcvext*sumrcounts); - - for(i = 0; i < indegree; i++) { - if(srcs[i] != MPI_PROC_NULL) { - res = NBC_Sched_recv((char*)0+offset, true, rcounts[i], rtype, srcs[i], schedule); - if (NBC_OK != res) { printf("Error in NBC_Sched_recv() (%i)\n", res); return res; } - } - offset += rcounts[i]*rcvext; - } - for(i = 0; i < outdegree; i++) { - if(dsts[i] != MPI_PROC_NULL) { - res = NBC_Sched_send((char*)sbuf, false, scount, stype, dsts[i], schedule); - if (NBC_OK != res) { printf("Error in NBC_Sched_send() (%i)\n", res); return res; } - } - } - /* unpack from buffer */ - offset=0; - for(i = 0; i < indegree; i++) { - if(srcs[i] != MPI_PROC_NULL) { - res = NBC_Sched_barrier(schedule); - if (NBC_OK != res) { printf("Error in NBC_Sched_barrier() (%i)\n", res); return res; } - res = NBC_Sched_copy((char*)0+offset, true, rcounts[i], rtype, (char*)rbuf+displs[i]*rcvext, false, rcounts[i], rtype, schedule); - if (NBC_OK != res) { printf("Error in NBC_Sched_copy() (%i)\n", res); return res; } - } - offset += rcounts[i]*rcvext; - } - } else { /* non INPLACE case */ - - /* simply loop over neighbors and post send/recv operations */ - for(i = 0; i < indegree; i++) { - if(srcs[i] != MPI_PROC_NULL) { - res = NBC_Sched_recv((char*)rbuf+displs[i]*rcvext, false, rcounts[i], rtype, srcs[i], schedule); - if (NBC_OK != res) { printf("Error in NBC_Sched_recv() (%i)\n", res); return res; } - } + schedule = OBJ_NEW(NBC_Schedule); + if (OPAL_UNLIKELY(NULL == schedule)) { + return OMPI_ERR_OUT_OF_RESOURCE; + } + + res = NBC_Comm_neighbors(comm, &srcs, &indegree, &dsts, &outdegree); + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + OBJ_RELEASE(schedule); + return res; + } + + /* simply loop over neighbors and post send/recv operations */ + for (int i = 0 ; i < indegree ; ++i) { + if (srcs[i] != MPI_PROC_NULL) { + res = NBC_Sched_recv ((char *) rbuf + displs[i] * rcvext, false, rcounts[i], rtype, srcs[i], schedule, false); + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + break; } - for(i = 0; i < outdegree; i++) { - if(dsts[i] != MPI_PROC_NULL) { - res = NBC_Sched_send((char*)sbuf, false, scount, stype, dsts[i], schedule); - if (NBC_OK != res) { printf("Error in NBC_Sched_send() (%i)\n", res); return res; } - } + } + } + + free (srcs); + + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + free (dsts); + OBJ_RELEASE(schedule); + return res; + } + + for (int i = 0 ; i < outdegree ; ++i) { + if (dsts[i] != MPI_PROC_NULL) { + res = NBC_Sched_send ((char *) sbuf, false, scount, stype, dsts[i], schedule, false); + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + break; } } } - res = NBC_Sched_commit(schedule); - if (NBC_OK != res) { printf("Error in NBC_Sched_commit() (%i)\n", res); return res; } + free (dsts); + + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + OBJ_RELEASE(schedule); + return res; + } + + res = NBC_Sched_commit (schedule); + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + OBJ_RELEASE(schedule); + return res; + } #ifdef NBC_CACHE_SCHEDULE /* save schedule to tree */ - args = (NBC_Ineighbor_allgatherv_args*)malloc(sizeof(NBC_Ineighbor_allgatherv_args)); - args->sbuf=sbuf; - args->scount=scount; - args->stype=stype; - args->rbuf=rbuf; - args->rcount=rcount; - args->rtype=rtype; - args->schedule=schedule; - res = hb_tree_insert ((hb_tree*)handle->comminfo->NBC_Dict[NBC_NEIGHBOR_ALLGATHERV], args, args, 0); - if(res != 0) printf("error in dict_insert() (%i)\n", res); - /* increase number of elements for A2A */ - if(++handle->comminfo->NBC_Dict_size[NBC_NEIGHBOR_ALLGATHERV] > NBC_SCHED_DICT_UPPER) { - NBC_SchedCache_dictwipe((hb_tree*)handle->comminfo->NBC_Dict[NBC_NEIGHBOR_ALLGATHERV], &handle->comminfo->NBC_Dict_size[NBC_NEIGHBOR_ALLGATHERV]); + args = (NBC_Ineighbor_allgatherv_args *) malloc (sizeof (args)); + if (NULL != args) { + args->sbuf = sbuf; + args->scount = scount; + args->stype = stype; + args->rbuf = rbuf; + args->rcount = rcount; + args->rtype = rtype; + args->schedule = schedule; + res = hb_tree_insert ((hb_tree *) libnbc_module->NBC_Dict[NBC_NEIGHBOR_ALLGATHERV], args, args, 0); + if (0 == res) { + OBJ_RETAIN(schedule); + + /* increase number of elements for A2A */ + if(++libnbc_module->NBC_Dict_size[NBC_NEIGHBOR_ALLGATHERV] > NBC_SCHED_DICT_UPPER) { + NBC_SchedCache_dictwipe ((hb_tree *) libnbc_module->NBC_Dict[NBC_NEIGHBOR_ALLGATHERV], + &libnbc_module->NBC_Dict_size[NBC_NEIGHBOR_ALLGATHERV]); + } + } else { + NBC_Error("error in dict_insert() (%i)", res); + free (args); + } } } else { /* found schedule */ - schedule=found->schedule; + schedule = found->schedule; + OBJ_RETAIN(schedule); } #endif - res = NBC_Start(handle, schedule); - if (NBC_OK != res) { printf("Error in NBC_Start() (%i)\n", res); return res; } + res = NBC_Init_handle(comm, &handle, libnbc_module); + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + OBJ_RELEASE(schedule); + return res; + } + + res = NBC_Start (handle, schedule); + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + NBC_Return_handle (handle); + OBJ_RELEASE(schedule); + return OMPI_ERR_OUT_OF_RESOURCE; + } + + *request = (ompi_request_t *) handle; - return NBC_OK; + return OMPI_SUCCESS; } diff --git a/ompi/mca/coll/libnbc/nbc_ineighbor_alltoall.c b/ompi/mca/coll/libnbc/nbc_ineighbor_alltoall.c index b63283190cc..3d8b34a1918 100644 --- a/ompi/mca/coll/libnbc/nbc_ineighbor_alltoall.c +++ b/ompi/mca/coll/libnbc/nbc_ineighbor_alltoall.c @@ -1,11 +1,14 @@ +/* -*- Mode: C; c-basic-offset:2 ; indent-tabs-mode:nil -*- */ /* * Copyright (c) 2006 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. * Copyright (c) 2006 The Technical University of Chemnitz. All * rights reserved. - * Copyright (c) 2014 Research Organization for Information Science + * Copyright (c) 2014-2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. + * Copyright (c) 2015 Los Alamos National Security, LLC. All rights + * reserved. * * Author(s): Torsten Hoefler * @@ -33,132 +36,136 @@ int NBC_Ineighbor_alltoall_args_compare(NBC_Ineighbor_alltoall_args *a, NBC_Inei } #endif -int ompi_coll_libnbc_ineighbor_alltoall(void *sbuf, int scount, MPI_Datatype stype, void *rbuf, +int ompi_coll_libnbc_ineighbor_alltoall(const void *sbuf, int scount, MPI_Datatype stype, void *rbuf, int rcount, MPI_Datatype rtype, struct ompi_communicator_t *comm, ompi_request_t ** request, struct mca_coll_base_module_2_1_0_t *module) { - int rank, size, res, worldsize; + int res, indegree, outdegree, *srcs, *dsts; MPI_Aint sndext, rcvext; NBC_Handle *handle; - ompi_coll_libnbc_request_t **coll_req = (ompi_coll_libnbc_request_t**) request; ompi_coll_libnbc_module_t *libnbc_module = (ompi_coll_libnbc_module_t*) module; - - res = NBC_Init_handle(comm, coll_req, libnbc_module); - handle = *coll_req; - if(res != NBC_OK) { printf("Error in NBC_Init_handle(%i)\n", res); return res; } - res = MPI_Comm_size(comm, &size); - if (MPI_SUCCESS != res) { printf("MPI Error in MPI_Comm_size() (%i)\n", res); return res; } - res = MPI_Comm_size(MPI_COMM_WORLD, &worldsize); - if (MPI_SUCCESS != res) { printf("MPI Error in MPI_Comm_size() (%i)\n", res); return res; } - res = MPI_Comm_rank(comm, &rank); - if (MPI_SUCCESS != res) { printf("MPI Error in MPI_Comm_rank() (%i)\n", res); return res; } - - res = MPI_Type_extent(stype, &sndext); - if (MPI_SUCCESS != res) { printf("MPI Error in MPI_Type_extent() (%i)\n", res); return res; } - res = MPI_Type_extent(rtype, &rcvext); - if (MPI_SUCCESS != res) { printf("MPI Error in MPI_Type_extent() (%i)\n", res); return res; } - - char inplace; NBC_Schedule *schedule; -#ifdef NBC_CACHE_SCHEDULE - NBC_Ineighbor_alltoall_args *args, *found, search; -#endif - NBC_IN_PLACE(sbuf, rbuf, inplace); + res = ompi_datatype_type_extent(stype, &sndext); + if (MPI_SUCCESS != res) { + NBC_Error("MPI Error in ompi_datatype_type_extent() (%i)", res); + return res; + } - handle->tmpbuf=NULL; + res = ompi_datatype_type_extent(rtype, &rcvext); + if (MPI_SUCCESS != res) { + NBC_Error("MPI Error in ompi_datatype_type_extent() (%i)", res); + return res; + } #ifdef NBC_CACHE_SCHEDULE + NBC_Ineighbor_alltoall_args *args, *found, search; + /* search schedule in communicator specific tree */ - search.sbuf=sbuf; - search.scount=scount; - search.stype=stype; - search.rbuf=rbuf; - search.rcount=rcount; - search.rtype=rtype; - found = (NBC_Ineighbor_alltoall_args*)hb_tree_search((hb_tree*)handle->comminfo->NBC_Dict[NBC_NEIGHBOR_ALLTOALL], &search); - if(found == NULL) { + search.sbuf = sbuf; + search.scount = scount; + search.stype = stype; + search.rbuf = rbuf; + search.rcount = rcount; + search.rtype = rtype; + found = (NBC_Ineighbor_alltoall_args *) hb_tree_search ((hb_tree *) libnbc_module->NBC_Dict[NBC_NEIGHBOR_ALLTOALL], + &search); + if (NULL == found) { #endif - schedule = (NBC_Schedule*)malloc(sizeof(NBC_Schedule)); - - res = NBC_Sched_create(schedule); - if(res != NBC_OK) { printf("Error in NBC_Sched_create, res = %i\n", res); return res; } + schedule = OBJ_NEW(NBC_Schedule); + if (OPAL_UNLIKELY(NULL == schedule)) { + return OMPI_ERR_OUT_OF_RESOURCE; + } - { - int indegree, outdegree, weighted, *srcs, *dsts, i; - res = NBC_Comm_neighbors_count(comm, &indegree, &outdegree, &weighted); - if(res != NBC_OK) return res; + res = NBC_Comm_neighbors(comm, &srcs, &indegree, &dsts, &outdegree); + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + OBJ_RELEASE(schedule); + return res; + } - srcs = indegree ? (int*)malloc(sizeof(int)*indegree) : NULL; - dsts = outdegree ? (int*)malloc(sizeof(int)*outdegree) : NULL; + for (int i = 0 ; i < indegree ; ++i) { + if (MPI_PROC_NULL != srcs[i]) { + res = NBC_Sched_recv ((char *) rbuf + i * rcount * rcvext, true, rcount, rtype, srcs[i], schedule, false); + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + break; + } + } + } - res = NBC_Comm_neighbors(comm, indegree, srcs, MPI_UNWEIGHTED, outdegree, dsts, MPI_UNWEIGHTED); - if(res != NBC_OK) return res; + free (srcs); - if(inplace) { /* we need an extra buffer to be deadlock-free */ - handle->tmpbuf = malloc(indegree*rcvext*rcount); + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + OBJ_RELEASE(schedule); + free (dsts); + return res; + } - for(i = 0; i < indegree; i++) { - if (MPI_PROC_NULL != srcs[i]) { - res = NBC_Sched_recv((char*)0+i*rcount*rcvext, true, rcount, rtype, srcs[i], schedule); - if (NBC_OK != res) { printf("Error in NBC_Sched_recv() (%i)\n", res); return res; } - } - } - for(i = 0; i < outdegree; i++) { - if (MPI_PROC_NULL != dsts[i]) { - res = NBC_Sched_send((char*)sbuf+i*scount*sndext, false, scount, stype, dsts[i], schedule); - if (NBC_OK != res) { printf("Error in NBC_Sched_send() (%i)\n", res); return res; } - } - } - /* unpack from buffer */ - for(i = 0; i < indegree; i++) { - res = NBC_Sched_barrier(schedule); - if (NBC_OK != res) { printf("Error in NBC_Sched_barrier() (%i)\n", res); return res; } - res = NBC_Sched_copy((char*)0+i*rcount*rcvext, true, rcount, rtype, (char*)rbuf+i*rcount*rcvext, false, rcount, rtype, schedule); - if (NBC_OK != res) { printf("Error in NBC_Sched_copy() (%i)\n", res); return res; } - } - } else { /* non INPLACE case */ - /* simply loop over neighbors and post send/recv operations */ - for(i = 0; i < indegree; i++) { - if (MPI_PROC_NULL != srcs[i]) { - res = NBC_Sched_recv((char*)rbuf+i*rcount*rcvext, false, rcount, rtype, srcs[i], schedule); - if (NBC_OK != res) { printf("Error in NBC_Sched_recv() (%i)\n", res); return res; } - } - } - for(i = 0; i < outdegree; i++) { - if (MPI_PROC_NULL != dsts[i]) { - res = NBC_Sched_send((char*)sbuf+i*scount*sndext, false, scount, stype, dsts[i], schedule); - if (NBC_OK != res) { printf("Error in NBC_Sched_send() (%i)\n", res); return res; } - } + for (int i = 0 ; i < outdegree ; ++i) { + if (MPI_PROC_NULL != dsts[i]) { + res = NBC_Sched_send ((char *) sbuf + i * scount * sndext, false, scount, stype, dsts[i], schedule, false); + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + break; } } } - res = NBC_Sched_commit(schedule); - if (NBC_OK != res) { printf("Error in NBC_Sched_commit() (%i)\n", res); return res; } + free (dsts); + + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + OBJ_RELEASE(schedule); + return res; + } + + res = NBC_Sched_commit (schedule); + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + OBJ_RELEASE(schedule); + return res; + } + #ifdef NBC_CACHE_SCHEDULE /* save schedule to tree */ - args = (NBC_Ineighbor_alltoall_args*)malloc(sizeof(NBC_Ineighbor_alltoall_args)); - args->sbuf=sbuf; - args->scount=scount; - args->stype=stype; - args->rbuf=rbuf; - args->rcount=rcount; - args->rtype=rtype; - args->schedule=schedule; - res = hb_tree_insert ((hb_tree*)handle->comminfo->NBC_Dict[NBC_NEIGHBOR_ALLTOALL], args, args, 0); - if(res != 0) printf("error in dict_insert() (%i)\n", res); - /* increase number of elements for A2A */ - if(++handle->comminfo->NBC_Dict_size[NBC_NEIGHBOR_ALLTOALL] > NBC_SCHED_DICT_UPPER) { - NBC_SchedCache_dictwipe((hb_tree*)handle->comminfo->NBC_Dict[NBC_NEIGHBOR_ALLTOALL], &handle->comminfo->NBC_Dict_size[NBC_NEIGHBOR_ALLTOALL]); + args = (NBC_Ineighbor_alltoall_args *) malloc (sizeof (args)); + if (NULL != args) { + args->sbuf = sbuf; + args->scount = scount; + args->stype = stype; + args->rbuf = rbuf; + args->rcount = rcount; + args->rtype = rtype; + args->schedule = schedule; + res = hb_tree_insert ((hb_tree *) libnbc_module->NBC_Dict[NBC_NEIGHBOR_ALLTOALL], args, args, 0); + if (0 == res) { + OBJ_RETAIN(schedule); + + /* increase number of elements for A2A */ + if (++libnbc_module->NBC_Dict_size[NBC_NEIGHBOR_ALLTOALL] > NBC_SCHED_DICT_UPPER) { + NBC_SchedCache_dictwipe ((hb_tree *) libnbc_module->NBC_Dict[NBC_NEIGHBOR_ALLTOALL], + &libnbc_module->NBC_Dict_size[NBC_NEIGHBOR_ALLTOALL]); + } + } else { + NBC_Error("error in dict_insert() (%i)", res); + free (args); + } } } else { /* found schedule */ - schedule=found->schedule; + schedule = found->schedule; + OBJ_RETAIN(schedule); } #endif - res = NBC_Start(handle, schedule); - if (NBC_OK != res) { printf("Error in NBC_Start() (%i)\n", res); return res; } + res = NBC_Init_handle (comm, &handle, libnbc_module); + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + OBJ_RELEASE(schedule); + return res; + } + + res = NBC_Start (handle, schedule); + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + NBC_Return_handle (handle); + return OMPI_ERR_OUT_OF_RESOURCE; + } + + *request = (ompi_request_t *) handle; - return NBC_OK; + return OMPI_SUCCESS; } diff --git a/ompi/mca/coll/libnbc/nbc_ineighbor_alltoallv.c b/ompi/mca/coll/libnbc/nbc_ineighbor_alltoallv.c index 8d79d46cbeb..52983b1632b 100644 --- a/ompi/mca/coll/libnbc/nbc_ineighbor_alltoallv.c +++ b/ompi/mca/coll/libnbc/nbc_ineighbor_alltoallv.c @@ -1,11 +1,14 @@ +/* -*- Mode: C; c-basic-offset:2 ; indent-tabs-mode:nil -*- */ /* * Copyright (c) 2006 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. * Copyright (c) 2006 The Technical University of Chemnitz. All * rights reserved. - * Copyright (c) 2014 Research Organization for Information Science + * Copyright (c) 2014-2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. + * Copyright (c) 2015 Los Alamos National Security, LLC. All rights + * reserved. * * Author(s): Torsten Hoefler * @@ -18,158 +21,156 @@ #ifdef NBC_CACHE_SCHEDULE /* tree comparison function for schedule cache */ int NBC_Ineighbor_alltoallv_args_compare(NBC_Ineighbor_alltoallv_args *a, NBC_Ineighbor_alltoallv_args *b, void *param) { - if( (a->sbuf == b->sbuf) && + if ((a->sbuf == b->sbuf) && (a->scount == b->scount) && (a->stype == b->stype) && (a->rbuf == b->rbuf) && (a->rcount == b->rcount) && (a->rtype == b->rtype) ) { - return 0; + return 0; } - if( a->sbuf < b->sbuf ) { + + if (a->sbuf < b->sbuf) { return -1; } - return +1; + + return 1; } #endif -int ompi_coll_libnbc_ineighbor_alltoallv(void *sbuf, int *scounts, int *sdispls, MPI_Datatype stype, - void *rbuf, int *rcounts, int *rdispls, MPI_Datatype rtype, +int ompi_coll_libnbc_ineighbor_alltoallv(const void *sbuf, const int *scounts, const int *sdispls, MPI_Datatype stype, + void *rbuf, const int *rcounts, const int *rdispls, MPI_Datatype rtype, struct ompi_communicator_t *comm, ompi_request_t ** request, struct mca_coll_base_module_2_1_0_t *module) { - int rank, size, res, worldsize; + int res, indegree, outdegree, *srcs, *dsts; MPI_Aint sndext, rcvext; NBC_Handle *handle; - ompi_coll_libnbc_request_t **coll_req = (ompi_coll_libnbc_request_t**) request; ompi_coll_libnbc_module_t *libnbc_module = (ompi_coll_libnbc_module_t*) module; - - res = NBC_Init_handle(comm, coll_req, libnbc_module); - handle = *coll_req; - if(res != NBC_OK) { printf("Error in NBC_Init_handle(%i)\n", res); return res; } - res = MPI_Comm_size(comm, &size); - if (MPI_SUCCESS != res) { printf("MPI Error in MPI_Comm_size() (%i)\n", res); return res; } - res = MPI_Comm_size(MPI_COMM_WORLD, &worldsize); - if (MPI_SUCCESS != res) { printf("MPI Error in MPI_Comm_size() (%i)\n", res); return res; } - res = MPI_Comm_rank(comm, &rank); - if (MPI_SUCCESS != res) { printf("MPI Error in MPI_Comm_rank() (%i)\n", res); return res; } - - res = MPI_Type_extent(stype, &sndext); - if (MPI_SUCCESS != res) { printf("MPI Error in MPI_Type_extent() (%i)\n", res); return res; } - res = MPI_Type_extent(rtype, &rcvext); - if (MPI_SUCCESS != res) { printf("MPI Error in MPI_Type_extent() (%i)\n", res); return res; } - - char inplace; NBC_Schedule *schedule; -#ifdef NBC_CACHE_SCHEDULE - NBC_Ineighbor_alltoallv_args *args, *found, search; -#endif - NBC_IN_PLACE(sbuf, rbuf, inplace); + res = ompi_datatype_type_extent (stype, &sndext); + if (MPI_SUCCESS != res) { + NBC_Error("MPI Error in ompi_datatype_type_extent() (%i)", res); + return res; + } - handle->tmpbuf=NULL; + res = ompi_datatype_type_extent (rtype, &rcvext); + if (MPI_SUCCESS != res) { + NBC_Error("MPI Error in ompi_datatype_type_extent() (%i)", res); + return res; + } #ifdef NBC_CACHE_SCHEDULE + NBC_Ineighbor_alltoallv_args *args, *found, search; + /* search schedule in communicator specific tree */ - search.sbuf=sbuf; - search.scount=scount; - search.stype=stype; - search.rbuf=rbuf; - search.rcount=rcount; - search.rtype=rtype; - found = (NBC_Ineighbor_alltoallv_args*)hb_tree_search((hb_tree*)handle->comminfo->NBC_Dict[NBC_NEIGHBOR_ALLTOALLV], &search); - if(found == NULL) { + search.sbuf = sbuf; + search.scount = scount; + search.stype = stype; + search.rbuf = rbuf; + search.rcount = rcount; + search.rtype = rtype; + found = (NBC_Ineighbor_alltoallv_args *) hb_tree_search ((hb_tree *) libnbc_module->NBC_Dict[NBC_NEIGHBOR_ALLTOALLV], + &search); + if (NULL == found) { #endif - schedule = (NBC_Schedule*)malloc(sizeof(NBC_Schedule)); - - res = NBC_Sched_create(schedule); - if(res != NBC_OK) { printf("Error in NBC_Sched_create, res = %i\n", res); return res; } - - { - int indegree, outdegree, weighted, *srcs, *dsts, i; - res = NBC_Comm_neighbors_count(comm, &indegree, &outdegree, &weighted); - if(res != NBC_OK) return res; - - srcs = (int*)malloc(sizeof(int)*indegree); - dsts = (int*)malloc(sizeof(int)*outdegree); - - res = NBC_Comm_neighbors(comm, indegree, srcs, MPI_UNWEIGHTED, outdegree, dsts, MPI_UNWEIGHTED); - if(res != NBC_OK) return res; - - if(inplace) { /* we need an extra buffer to be deadlock-free */ - int sumrcounts=0; - int offset=0; - for(i=0; itmpbuf = malloc(rcvext*sumrcounts); - - for(i = 0; i < indegree; i++) { - if(srcs[i] != MPI_PROC_NULL) { - res = NBC_Sched_recv((char*)0+offset, true, rcounts[i], rtype, srcs[i], schedule); - if (NBC_OK != res) { printf("Error in NBC_Sched_recv() (%i)\n", res); return res; } - } - offset += rcounts[i]*rcvext; - } + schedule = OBJ_NEW(NBC_Schedule); + if (OPAL_UNLIKELY(NULL == schedule)) { + return OMPI_ERR_OUT_OF_RESOURCE; + } - for(i = 0; i < outdegree; i++) { - if(dsts[i] != MPI_PROC_NULL) { - res = NBC_Sched_send((char*)sbuf+sdispls[i]*sndext, false, scounts[i], stype, dsts[i], schedule); - if (NBC_OK != res) { printf("Error in NBC_Sched_send() (%i)\n", res); return res; } - } - } - /* unpack from buffer */ - offset=0; - for(i = 0; i < indegree; i++) { - if(srcs[i] != MPI_PROC_NULL) { - res = NBC_Sched_barrier(schedule); - if (NBC_OK != res) { printf("Error in NBC_Sched_barrier() (%i)\n", res); return res; } - res = NBC_Sched_copy((char*)0+offset, true, rcounts[i], rtype, (char*)rbuf+rdispls[i]*rcvext, false, rcounts[i], rtype, schedule); - if (NBC_OK != res) { printf("Error in NBC_Sched_copy() (%i)\n", res); return res; } - } - offset += rcounts[i]*rcvext; - } - } else { /* non INPLACE case */ - /* simply loop over neighbors and post send/recv operations */ - for(i = 0; i < indegree; i++) { - if(srcs[i] != MPI_PROC_NULL) { - res = NBC_Sched_recv((char*)rbuf+rdispls[i]*rcvext, false, rcounts[i], rtype, srcs[i], schedule); - if (NBC_OK != res) { printf("Error in NBC_Sched_recv() (%i)\n", res); return res; } - } + res = NBC_Comm_neighbors (comm, &srcs, &indegree, &dsts, &outdegree); + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + OBJ_RELEASE(schedule); + return res; + } + + /* simply loop over neighbors and post send/recv operations */ + for (int i = 0 ; i < indegree ; ++i) { + if (srcs[i] != MPI_PROC_NULL) { + res = NBC_Sched_recv ((char *) rbuf + rdispls[i] * rcvext, false, rcounts[i], rtype, srcs[i], schedule, false); + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + break; } - for(i = 0; i < outdegree; i++) { - if(dsts[i] != MPI_PROC_NULL) { - res = NBC_Sched_send((char*)sbuf+sdispls[i]*sndext, false, scounts[i], stype, dsts[i], schedule); - if (NBC_OK != res) { printf("Error in NBC_Sched_send() (%i)\n", res); return res; } - } + } + } + + free (srcs); + + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + OBJ_RELEASE(schedule); + free (dsts); + return res; + } + + for (int i = 0 ; i < outdegree ; ++i) { + if (dsts[i] != MPI_PROC_NULL) { + res = NBC_Sched_send ((char *) sbuf + sdispls[i] * sndext, false, scounts[i], stype, dsts[i], schedule, false); + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + break; } } } - res = NBC_Sched_commit(schedule); - if (NBC_OK != res) { printf("Error in NBC_Sched_commit() (%i)\n", res); return res; } + free (dsts); + + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + OBJ_RELEASE(schedule); + return res; + } + + res = NBC_Sched_commit (schedule); + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + OBJ_RELEASE(schedule); + return res; + } + #ifdef NBC_CACHE_SCHEDULE /* save schedule to tree */ - args = (NBC_Ineighbor_alltoallv_args*)malloc(sizeof(NBC_Ineighbor_alltoallv_args)); - args->sbuf=sbuf; - args->scount=scount; - args->stype=stype; - args->rbuf=rbuf; - args->rcount=rcount; - args->rtype=rtype; - args->schedule=schedule; - res = hb_tree_insert ((hb_tree*)handle->comminfo->NBC_Dict[NBC_NEIGHBOR_ALLTOALLV], args, args, 0); - if(res != 0) printf("error in dict_insert() (%i)\n", res); - /* increase number of elements for A2A */ - if(++handle->comminfo->NBC_Dict_size[NBC_NEIGHBOR_ALLTOALLV] > NBC_SCHED_DICT_UPPER) { - NBC_SchedCache_dictwipe((hb_tree*)handle->comminfo->NBC_Dict[NBC_NEIGHBOR_ALLTOALLV], &handle->comminfo->NBC_Dict_size[NBC_NEIGHBOR_ALLTOALLV]); + args = (NBC_Ineighbor_alltoallv_args *) malloc (sizeof (args)); + if (NULL != args) { + args->sbuf = sbuf; + args->scount = scount; + args->stype = stype; + args->rbuf = rbuf; + args->rcount = rcount; + args->rtype = rtype; + args->schedule = schedule; + res = hb_tree_insert ((hb_tree *) libnbc_module->NBC_Dict[NBC_NEIGHBOR_ALLTOALLV], args, args, 0); + if (0 == res) { + OBJ_RETAIN(schedule); + + /* increase number of elements for A2A */ + if (++libnbc_module->NBC_Dict_size[NBC_NEIGHBOR_ALLTOALLV] > NBC_SCHED_DICT_UPPER) { + NBC_SchedCache_dictwipe ((hb_tree *) libnbc_module->NBC_Dict[NBC_NEIGHBOR_ALLTOALLV], + &libnbc_module->NBC_Dict_size[NBC_NEIGHBOR_ALLTOALLV]); + } + } else { + NBC_Error("error in dict_insert() (%i)", res); + free (args); + } } } else { /* found schedule */ - schedule=found->schedule; + schedule = found->schedule; + OBJ_RETAIN(schedule); } #endif - res = NBC_Start(handle, schedule); - if (NBC_OK != res) { printf("Error in NBC_Start() (%i)\n", res); return res; } + res = NBC_Init_handle(comm, &handle, libnbc_module); + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + OBJ_RELEASE(schedule); + return res; + } + + res = NBC_Start (handle, schedule); + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + NBC_Return_handle (handle); + return res; + } + + *request = (ompi_request_t *) handle; - return NBC_OK; + return OMPI_SUCCESS; } diff --git a/ompi/mca/coll/libnbc/nbc_ineighbor_alltoallw.c b/ompi/mca/coll/libnbc/nbc_ineighbor_alltoallw.c index 55b116d3d56..8bab6a00811 100644 --- a/ompi/mca/coll/libnbc/nbc_ineighbor_alltoallw.c +++ b/ompi/mca/coll/libnbc/nbc_ineighbor_alltoallw.c @@ -1,11 +1,14 @@ +/* -*- Mode: C; c-basic-offset:2 ; indent-tabs-mode:nil -*- */ /* * Copyright (c) 2006 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. * Copyright (c) 2006 The Technical University of Chemnitz. All * rights reserved. - * Copyright (c) 2014 Research Organization for Information Science + * Copyright (c) 2014-2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. + * Copyright (c) 2015 Los Alamos National Security, LLC. All rights + * reserved. * * Author(s): Torsten Hoefler * @@ -18,156 +21,141 @@ #ifdef NBC_CACHE_SCHEDULE /* tree comparison function for schedule cache */ int NBC_Ineighbor_alltoallw_args_compare(NBC_Ineighbor_alltoallw_args *a, NBC_Ineighbor_alltoallw_args *b, void *param) { - if( (a->sbuf == b->sbuf) && + if ((a->sbuf == b->sbuf) && (a->scount == b->scount) && (a->stype == b->stype) && (a->rbuf == b->rbuf) && (a->rcount == b->rcount) && - (a->rtype == b->rtype) ) { - return 0; + (a->rtype == b->rtype)) { + return 0; } - if( a->sbuf < b->sbuf ) { + + if (a->sbuf < b->sbuf) { return -1; } - return +1; + + return 1; } #endif -int ompi_coll_libnbc_ineighbor_alltoallw(void *sbuf, int *scounts, MPI_Aint *sdisps, MPI_Datatype *stypes, - void *rbuf, int *rcounts, MPI_Aint *rdisps, MPI_Datatype *rtypes, +int ompi_coll_libnbc_ineighbor_alltoallw(const void *sbuf, const int *scounts, const MPI_Aint *sdisps, struct ompi_datatype_t * const *stypes, + void *rbuf, const int *rcounts, const MPI_Aint *rdisps, struct ompi_datatype_t * const *rtypes, struct ompi_communicator_t *comm, ompi_request_t ** request, struct mca_coll_base_module_2_1_0_t *module) { - int rank, size, res, worldsize; - MPI_Aint *sndexts, *rcvexts; + int res, indegree, outdegree, *srcs, *dsts; NBC_Handle *handle; - ompi_coll_libnbc_request_t **coll_req = (ompi_coll_libnbc_request_t**) request; ompi_coll_libnbc_module_t *libnbc_module = (ompi_coll_libnbc_module_t*) module; - - res = NBC_Init_handle(comm, coll_req, libnbc_module); - handle = *coll_req; - if(res != NBC_OK) { printf("Error in NBC_Init_handle(%i)\n", res); return res; } - res = MPI_Comm_size(comm, &size); - if (MPI_SUCCESS != res) { printf("MPI Error in MPI_Comm_size() (%i)\n", res); return res; } - res = MPI_Comm_size(MPI_COMM_WORLD, &worldsize); - if (MPI_SUCCESS != res) { printf("MPI Error in MPI_Comm_size() (%i)\n", res); return res; } - res = MPI_Comm_rank(comm, &rank); - if (MPI_SUCCESS != res) { printf("MPI Error in MPI_Comm_rank() (%i)\n", res); return res; } - - char inplace; NBC_Schedule *schedule; + #ifdef NBC_CACHE_SCHEDULE NBC_Ineighbor_alltoallw_args *args, *found, search; -#endif - - NBC_IN_PLACE(sbuf, rbuf, inplace); - handle->tmpbuf=NULL; - -#ifdef NBC_CACHE_SCHEDULE /* search schedule in communicator specific tree */ - search.sbuf=sbuf; - search.scount=scount; - search.stype=stype; - search.rbuf=rbuf; - search.rcount=rcount; - search.rtype=rtype; - found = (NBC_Ineighbor_alltoallw_args*)hb_tree_search((hb_tree*)handle->comminfo->NBC_Dict[NBC_NEIGHBOR_ALLTOALLW], &search); + search.sbuf = sbuf; + search.scount = scount; + search.stype = stype; + search.rbuf = rbuf; + search.rcount = rcount; + search.rtype = rtype; + found = (NBC_Ineighbor_alltoallw_args *) hb_tree_search ((hb_tree *) libnbc_module->NBC_Dict[NBC_NEIGHBOR_ALLTOALLW], + &search); if(found == NULL) { #endif - schedule = (NBC_Schedule*)malloc(sizeof(NBC_Schedule)); - - res = NBC_Sched_create(schedule); - if(res != NBC_OK) { printf("Error in NBC_Sched_create, res = %i\n", res); return res; } - - { - int indegree, outdegree, weighted, *srcs, *dsts, i; - res = NBC_Comm_neighbors_count(comm, &indegree, &outdegree, &weighted); - if(res != NBC_OK) return res; + schedule = OBJ_NEW(NBC_Schedule); + if (OPAL_UNLIKELY(NULL == schedule)) { + return OMPI_ERR_OUT_OF_RESOURCE; + } - srcs = (int*)malloc(sizeof(int)*indegree); - dsts = (int*)malloc(sizeof(int)*outdegree); + res = NBC_Comm_neighbors (comm, &srcs, &indegree, &dsts, &outdegree); + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + OBJ_RELEASE(schedule); + return res; + } - sndexts = (MPI_Aint*)malloc(sizeof(MPI_Aint)*outdegree); - for(i=0; itmpbuf = malloc(sumrbytes); + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + free (dsts); + OBJ_RELEASE(schedule); + return res; + } - for(i = 0; i < indegree; i++) { - if(srcs[i] != MPI_PROC_NULL) { - res = NBC_Sched_recv((char*)0+rdisps[i], true, rcounts[i], rtypes[i], srcs[i], schedule); - if (NBC_OK != res) { printf("Error in NBC_Sched_recv() (%i)\n", res); return res; } - } - } - for(i = 0; i < outdegree; i++) { - if(dsts[i] != MPI_PROC_NULL) { - res = NBC_Sched_send((char*)sbuf+sdisps[i], false, scounts[i], stypes[i], dsts[i], schedule); - if (NBC_OK != res) { printf("Error in NBC_Sched_send() (%i)\n", res); return res; } - } - } - /* unpack from buffer */ - for(i = 0; i < indegree; i++) { - res = NBC_Sched_barrier(schedule); - if (NBC_OK != res) { printf("Error in NBC_Sched_barrier() (%i)\n", res); return res; } - res = NBC_Sched_copy((char*)0+rdisps[i], true, rcounts[i], rtypes[i], (char*)rbuf+rdisps[i], false, rcounts[i], rtypes[i], schedule); - if (NBC_OK != res) { printf("Error in NBC_Sched_copy() (%i)\n", res); return res; } - } - } else { /* non INPLACE case */ - /* simply loop over neighbors and post send/recv operations */ - for(i = 0; i < indegree; i++) { - if(srcs[i] != MPI_PROC_NULL) { - res = NBC_Sched_recv((char*)rbuf+rdisps[i], false, rcounts[i], rtypes[i], srcs[i], schedule); - if (NBC_OK != res) { printf("Error in NBC_Sched_recv() (%i)\n", res); return res; } - } - } - for(i = 0; i < outdegree; i++) { - if(dsts[i] != MPI_PROC_NULL) { - res = NBC_Sched_send((char*)sbuf+sdisps[i], false, scounts[i], stypes[i], dsts[i], schedule); - if (NBC_OK != res) { printf("Error in NBC_Sched_send() (%i)\n", res); return res; } - } + for (int i = 0 ; i < outdegree ; ++i) { + if (dsts[i] != MPI_PROC_NULL) { + res = NBC_Sched_send ((char *) sbuf + sdisps[i], false, scounts[i], stypes[i], dsts[i], schedule, false); + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + break; } } } + free (dsts); + + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + OBJ_RELEASE(schedule); + return res; + } + res = NBC_Sched_commit(schedule); - if (NBC_OK != res) { printf("Error in NBC_Sched_commit() (%i)\n", res); return res; } + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + OBJ_RELEASE(schedule); + return res; + } + #ifdef NBC_CACHE_SCHEDULE /* save schedule to tree */ - args = (NBC_Ineighbor_alltoallw_args*)malloc(sizeof(NBC_Ineighbor_alltoallw_args)); - args->sbuf=sbuf; - args->scount=scount; - args->stype=stype; - args->rbuf=rbuf; - args->rcount=rcount; - args->rtype=rtype; - args->schedule=schedule; - res = hb_tree_insert ((hb_tree*)handle->comminfo->NBC_Dict[NBC_NEIGHBOR_ALLTOALLW], args, args, 0); - if(res != 0) printf("error in dict_insert() (%i)\n", res); - /* increase number of elements for A2A */ - if(++handle->comminfo->NBC_Dict_size[NBC_NEIGHBOR_ALLTOALLW] > NBC_SCHED_DICT_UPPER) { - NBC_SchedCache_dictwipe((hb_tree*)handle->comminfo->NBC_Dict[NBC_NEIGHBOR_ALLTOALLW], &handle->comminfo->NBC_Dict_size[NBC_NEIGHBOR_ALLTOALLW]); - } + args = (NBC_Ineighbor_alltoallw_args *) malloc (sizeof (args)); + if (NULL != args) { + args->sbuf = sbuf; + args->scount = scount; + args->stype = stype; + args->rbuf = rbuf; + args->rcount = rcount; + args->rtype = rtype; + args->schedule = schedule; + res = hb_tree_insert ((hb_tree *) libnbc_module->NBC_Dict[NBC_NEIGHBOR_ALLTOALLW], args, args, 0); + if (0 == res) { + OBJ_RETAIN(schedule); + + /* increase number of elements for A2A */ + if (++libnbc_module->NBC_Dict_size[NBC_NEIGHBOR_ALLTOALLW] > NBC_SCHED_DICT_UPPER) { + NBC_SchedCache_dictwipe ((hb_tree *) libnbc_module->NBC_Dict[NBC_NEIGHBOR_ALLTOALLW], + &libnbc_module->NBC_Dict_size[NBC_NEIGHBOR_ALLTOALLW]); + } + } else { + NBC_Error("error in dict_insert() (%i)", res); + free (args); + } } else { /* found schedule */ - schedule=found->schedule; + schedule = found->schedule; + OBJ_RETAIN(schedule); } #endif + res = NBC_Init_handle(comm, &handle, libnbc_module); + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + OBJ_RELEASE(schedule); + return res; + } + res = NBC_Start(handle, schedule); - if (NBC_OK != res) { printf("Error in NBC_Start() (%i)\n", res); return res; } + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + NBC_Return_handle (handle); + return res; + } + + *request = (ompi_request_t *) handle; - return NBC_OK; + return OMPI_SUCCESS; } diff --git a/ompi/mca/coll/libnbc/nbc_internal.h b/ompi/mca/coll/libnbc/nbc_internal.h index 10728d6c866..915e4e232a9 100644 --- a/ompi/mca/coll/libnbc/nbc_internal.h +++ b/ompi/mca/coll/libnbc/nbc_internal.h @@ -1,14 +1,19 @@ +/* -*- Mode: C; c-basic-offset:2 ; indent-tabs-mode:nil -*- */ /* * Copyright (c) 2006 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. - * Copyright (c) 2006 The Technical University of Chemnitz. All + * Copyright (c) 2006 The Technical University of Chemnitz. All * rights reserved. * * Author(s): Torsten Hoefler * * Copyright (c) 2012 Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2014 NVIDIA Corporation. All rights reserved. + * Copyright (c) 2015-2016 Research Organization for Information Science + * and Technology (RIST). All rights reserved. + * Copyright (c) 2015 Los Alamos National Security, LLC. All rights + * reserved. * */ #ifndef __NBC_INTERNAL_H__ @@ -28,6 +33,7 @@ #include "ompi/include/ompi/constants.h" #include "ompi/request/request.h" #include "ompi/datatype/ompi_datatype.h" +#include "ompi/communicator/communicator.h" #include #include @@ -66,7 +72,7 @@ extern "C" { #define NBC_SCATTER 14 #define NBC_SCATTERV 15 /* set the number of collectives in nbc.h !!!! */ - + /* several typedefs for NBC */ /* the function type enum */ @@ -80,64 +86,71 @@ typedef enum { /* the send argument struct */ typedef struct { - void *buf; - char tmpbuf; + NBC_Fn_type type; int count; + const void *buf; MPI_Datatype datatype; int dest; + char tmpbuf; + bool local; } NBC_Args_send; /* the receive argument struct */ typedef struct { - void *buf; - char tmpbuf; + NBC_Fn_type type; int count; + void *buf; MPI_Datatype datatype; + char tmpbuf; int source; + bool local; } NBC_Args_recv; /* the operation argument struct */ typedef struct { - void *buf1; + NBC_Fn_type type; char tmpbuf1; - void *buf2; char tmpbuf2; - void *buf3; - char tmpbuf3; - int count; + const void *buf1; + void *buf2; MPI_Op op; MPI_Datatype datatype; + int count; } NBC_Args_op; /* the copy argument struct */ typedef struct { - void *src; - char tmpsrc; + NBC_Fn_type type; int srccount; - MPI_Datatype srctype; + void *src; void *tgt; - char tmptgt; - int tgtcount; + MPI_Datatype srctype; MPI_Datatype tgttype; + int tgtcount; + char tmpsrc; + char tmptgt; } NBC_Args_copy; /* unpack operation arguments */ typedef struct { - void *inbuf; - char tmpinbuf; + NBC_Fn_type type; int count; + void *inbuf; + void *outbuf; MPI_Datatype datatype; - void *outbuf; + char tmpinbuf; char tmpoutbuf; } NBC_Args_unpack; /* internal function prototypes */ -int NBC_Sched_create(NBC_Schedule* schedule); -int NBC_Sched_send(void* buf, char tmpbuf, int count, MPI_Datatype datatype, int dest, NBC_Schedule *schedule); -int NBC_Sched_recv(void* buf, char tmpbuf, int count, MPI_Datatype datatype, int source, NBC_Schedule *schedule); -int NBC_Sched_op(void* buf3, char tmpbuf3, void* buf1, char tmpbuf1, void* buf2, char tmpbuf2, int count, MPI_Datatype datatype, MPI_Op op, NBC_Schedule *schedule); -int NBC_Sched_copy(void *src, char tmpsrc, int srccount, MPI_Datatype srctype, void *tgt, char tmptgt, int tgtcount, MPI_Datatype tgttype, NBC_Schedule *schedule); -int NBC_Sched_unpack(void *inbuf, char tmpinbuf, int count, MPI_Datatype datatype, void *outbuf, char tmpoutbuf, NBC_Schedule *schedule); +int NBC_Sched_send(const void* buf, char tmpbuf, int count, MPI_Datatype datatype, int dest, NBC_Schedule *schedule, bool barrier); +int NBC_Sched_local_send (const void* buf, char tmpbuf, int count, MPI_Datatype datatype, int dest,NBC_Schedule *schedule, bool barrier); +int NBC_Sched_recv(void* buf, char tmpbuf, int count, MPI_Datatype datatype, int source, NBC_Schedule *schedule, bool barrier); +int NBC_Sched_local_recv (void* buf, char tmpbuf, int count, MPI_Datatype datatype, int source, NBC_Schedule *schedule, bool barrier); +int NBC_Sched_op (const void* buf1, char tmpbuf1, void* buf2, char tmpbuf2, int count, MPI_Datatype datatype, + MPI_Op op, NBC_Schedule *schedule, bool barrier); +int NBC_Sched_copy(void *src, char tmpsrc, int srccount, MPI_Datatype srctype, void *tgt, char tmptgt, int tgtcount, MPI_Datatype tgttype, NBC_Schedule *schedule, bool barrier); +int NBC_Sched_unpack(void *inbuf, char tmpinbuf, int count, MPI_Datatype datatype, void *outbuf, char tmpoutbuf, NBC_Schedule *schedule, bool barrier); int NBC_Sched_barrier(NBC_Schedule *schedule); int NBC_Sched_commit(NBC_Schedule *schedule); @@ -239,18 +252,27 @@ int NBC_Scatter_args_compare(NBC_Scatter_args *a, NBC_Scatter_args *b, void *par /* Schedule cache structures/functions */ void NBC_SchedCache_args_delete(void *entry); void NBC_SchedCache_args_delete_key_dummy(void *k); - + #endif int NBC_Start(NBC_Handle *handle, NBC_Schedule *schedule); int NBC_Init_handle(struct ompi_communicator_t *comm, ompi_coll_libnbc_request_t **request, ompi_coll_libnbc_module_t *module); +void NBC_Return_handle(ompi_coll_libnbc_request_t *request); static inline int NBC_Type_intrinsic(MPI_Datatype type); -static inline int NBC_Copy(void *src, int srccount, MPI_Datatype srctype, void *tgt, int tgtcount, MPI_Datatype tgttype, MPI_Comm comm); int NBC_Create_fortran_handle(int *fhandle, NBC_Handle **handle); /* some macros */ +static inline void NBC_Error (char *format, ...) { + va_list args; + + va_start (args, format); + vfprintf (stderr, format, args); + fprintf (stderr, "\n"); + va_end (args); +} + /* a schedule has the following format: * [schedule] ::= [size][round-schedule][delimiter][round-schedule][delimiter]...[end] * [size] ::= size of the schedule (int) @@ -259,7 +281,7 @@ int NBC_Create_fortran_handle(int *fhandle, NBC_Handle **handle); * [type] ::= function type (NBC_Fn_type) * [type-args] ::= type specific arguments (NBC_Args_send, NBC_Args_recv or, NBC_Args_op) * [delimiter] ::= 1 (char) - indicates that a round follows - * [end] ::= 0 (char) - indicates that this is the last round + * [end] ::= 0 (char) - indicates that this is the last round */ /* @@ -275,83 +297,68 @@ int NBC_Create_fortran_handle(int *fhandle, NBC_Handle **handle); * schedule. A round has the format: * [num]{[type][type-args]} * e.g. [(int)2][(NBC_Fn_type)SEND][(NBC_Args_send)SEND-ARGS][(NBC_Fn_type)RECV][(NBC_Args_recv)RECV-ARGS] */ -#define NBC_GET_ROUND_SIZE(schedule, size) \ - { \ - int num; \ - char *p = (char*) schedule; \ - NBC_Fn_type type; \ - int i; \ - \ - NBC_GET_BYTES(p,num); \ - /*NBC_DEBUG(10, "GET_ROUND_SIZE got %i elements\n", num); */\ - for (i=0; isize; } /* increase the size of a schedule by size bytes */ -#define NBC_INC_SIZE(schedule, size) \ -{ \ - *(int*)schedule+=size; \ +static inline void nbc_schedule_inc_size (NBC_Schedule *schedule, int size) { + schedule->size += size; } /* increments the number of operations in the last round */ -#define NBC_INC_NUM_ROUND(schedule) \ -{ \ - int total_size, num_last_round; \ - long round_size; \ - char *ptr, *lastround; \ - \ - NBC_GET_SIZE(schedule, total_size); \ - \ - /* ptr begins at first round (first int is overall size) */ \ - ptr = (char*)schedule+sizeof(int); \ - lastround = ptr; \ - while ((long)ptr-(long)schedule < total_size) { \ - NBC_GET_ROUND_SIZE(ptr, round_size); \ - /*printf("got round_size %i\n", round_size);*/ \ - lastround = ptr; \ - ptr += round_size; \ - ptr += sizeof(char); /* barrier delimiter */ \ - /*printf("(long)ptr-(long)schedule=%li, total_size=%i\n", (long)ptr-(long)schedule, total_size); */\ - } \ - /*printf("lastround count is at offset: %li\n", (long)lastround-(long)schedule);*/ \ - /* increment the count in the last round of the schedule */ \ - memcpy(&num_last_round, lastround, sizeof(int)); \ - num_last_round++; \ - memcpy(lastround, &num_last_round, sizeof(int)); \ +static inline void nbc_schedule_inc_round (NBC_Schedule *schedule) { + int last_round_num; + char *lastround; + + lastround = schedule->data + schedule->current_round_offset; + + /* increment the count in the last round of the schedule (memcpy is used + * to protect against unaligned access) */ + memcpy (&last_round_num, lastround, sizeof (last_round_num)); + ++last_round_num; + memcpy (lastround, &last_round_num, sizeof (last_round_num)); } /* NBC_PRINT_ROUND prints a round in a schedule. A round has the format: @@ -428,39 +435,30 @@ int NBC_Create_fortran_handle(int *fhandle, NBC_Handle **handle); } \ } -#define NBC_CHECK_NULL(ptr) \ -{ \ - if(ptr == NULL) { \ - printf("realloc error :-(\n"); \ - } \ -} - - - /* -#define NBC_DEBUG(level, ...) {} +#define NBC_DEBUG(level, ...) {} */ -static inline void NBC_DEBUG(int level, const char *fmt, ...) -{ +static inline void NBC_DEBUG(int level, const char *fmt, ...) +{ #if NBC_DLEVEL > 0 va_list ap; - int rank; - - if(NBC_DLEVEL >= level) { - MPI_Comm_rank(MPI_COMM_WORLD, &rank); - - printf("[LibNBC - %i] ", rank); + int rank; + + if(NBC_DLEVEL >= level) { + MPI_Comm_rank(MPI_COMM_WORLD, &rank); + + printf("[LibNBC - %i] ", rank); va_start(ap, fmt); vprintf(fmt, ap); va_end (ap); - } + } #endif } /* returns true (1) or false (0) if type is intrinsic or not */ static inline int NBC_Type_intrinsic(MPI_Datatype type) { - + if( ( type == MPI_INT ) || ( type == MPI_LONG ) || ( type == MPI_SHORT ) || @@ -476,16 +474,15 @@ static inline int NBC_Type_intrinsic(MPI_Datatype type) { ( type == MPI_LONG_INT) || ( type == MPI_2INT) || ( type == MPI_SHORT_INT) || - ( type == MPI_LONG_DOUBLE_INT)) + ( type == MPI_LONG_DOUBLE_INT)) return 1; - else + else return 0; } /* let's give a try to inline functions */ -static inline int NBC_Copy(void *src, int srccount, MPI_Datatype srctype, void *tgt, int tgtcount, MPI_Datatype tgttype, MPI_Comm comm) { +static inline int NBC_Copy(const void *src, int srccount, MPI_Datatype srctype, void *tgt, int tgtcount, MPI_Datatype tgttype, MPI_Comm comm) { int size, pos, res; - OPAL_PTRDIFF_TYPE ext, lb; void *packbuf; #if OPAL_CUDA_SUPPORT @@ -495,25 +492,46 @@ static inline int NBC_Copy(void *src, int srccount, MPI_Datatype srctype, void * #endif /* OPAL_CUDA_SUPPORT */ /* if we have the same types and they are contiguous (intrinsic * types are contiguous), we can just use a single memcpy */ - res = ompi_datatype_get_extent(srctype, &lb, &ext); - if (OMPI_SUCCESS != res) { printf("MPI Error in MPI_Type_extent() (%i)\n", res); return res; } - memcpy(tgt, src, srccount*ext); + ptrdiff_t gap, span; + span = opal_datatype_span(&srctype->super, srccount, &gap); + + memcpy(tgt, src, span); } else { /* we have to pack and unpack */ - res = MPI_Pack_size(srccount, srctype, comm, &size); - if (MPI_SUCCESS != res || 0 == size) { printf("MPI Error in MPI_Pack_size() (%i:%i)\n", res, size); return (MPI_SUCCESS == res) ? MPI_ERR_SIZE : res; } + res = PMPI_Pack_size(srccount, srctype, comm, &size); + if (MPI_SUCCESS != res) { + NBC_Error ("MPI Error in PMPI_Pack_size() (%i:%i)", res, size); + return res; + } + + if (0 == size) { + return OMPI_SUCCESS; + } packbuf = malloc(size); - if (NULL == packbuf) { printf("Error in malloc()\n"); return res; } + if (NULL == packbuf) { + NBC_Error("Error in malloc()"); + return res; + } + pos=0; - res = MPI_Pack(src, srccount, srctype, packbuf, size, &pos, comm); - if (MPI_SUCCESS != res) { printf("MPI Error in MPI_Pack() (%i)\n", res); return res; } + res = PMPI_Pack(src, srccount, srctype, packbuf, size, &pos, comm); + + if (MPI_SUCCESS != res) { + NBC_Error ("MPI Error in PMPI_Pack() (%i)", res); + free (packbuf); + return res; + } + pos=0; - res = MPI_Unpack(packbuf, size, &pos, tgt, tgtcount, tgttype, comm); - if (MPI_SUCCESS != res) { printf("MPI Error in MPI_Unpack() (%i)\n", res); return res; } + res = PMPI_Unpack(packbuf, size, &pos, tgt, tgtcount, tgttype, comm); free(packbuf); + if (MPI_SUCCESS != res) { + NBC_Error ("MPI Error in PMPI_Unpack() (%i)", res); + return res; + } } - return NBC_OK; + return OMPI_SUCCESS; } static inline int NBC_Unpack(void *src, int srccount, MPI_Datatype srctype, void *tgt, MPI_Comm comm) { @@ -528,25 +546,35 @@ static inline int NBC_Unpack(void *src, int srccount, MPI_Datatype srctype, void /* if we have the same types and they are contiguous (intrinsic * types are contiguous), we can just use a single memcpy */ res = ompi_datatype_get_extent (srctype, &lb, &ext); - if (OMPI_SUCCESS != res) { printf("MPI Error in MPI_Type_extent() (%i)\n", res); return res; } + if (OMPI_SUCCESS != res) { + NBC_Error ("MPI Error in MPI_Type_extent() (%i)", res); + return res; + } + memcpy(tgt, src, srccount * ext); } else { /* we have to unpack */ - res = MPI_Pack_size(srccount, srctype, comm, &size); - if (MPI_SUCCESS != res) { printf("MPI Error in MPI_Pack_size() (%i)\n", res); return res; } - pos=0; - res = MPI_Unpack(src, size, &pos, tgt, srccount, srctype, comm); - if (MPI_SUCCESS != res) { printf("MPI Error in MPI_Unpack() (%i)\n", res); return res; } + res = PMPI_Pack_size(srccount, srctype, comm, &size); + if (MPI_SUCCESS != res) { + NBC_Error ("MPI Error in PMPI_Pack_size() (%i)", res); + return res; + } + pos = 0; + res = PMPI_Unpack(src, size, &pos, tgt, srccount, srctype, comm); + if (MPI_SUCCESS != res) { + NBC_Error ("MPI Error in PMPI_Unpack() (%i)", res); + return res; + } } - return NBC_OK; + return OMPI_SUCCESS; } /* deletes elements from dict until low watermark is reached */ static inline void NBC_SchedCache_dictwipe(hb_tree *dict, int *size) { hb_itor *itor; - + itor = hb_itor_new(dict); for (; hb_itor_valid(itor) && (*size>NBC_SCHED_DICT_LOWER); hb_itor_next(itor)) { hb_tree_remove(dict, hb_itor_key(itor), 0); @@ -566,18 +594,18 @@ static inline void NBC_SchedCache_dictwipe(hb_tree *dict, int *size) { inplace = 1; \ } else \ if(recvbuf == MPI_IN_PLACE) { \ - recvbuf = sendbuf; \ + recvbuf = (void *)sendbuf; \ inplace = 1; \ } \ } -int NBC_Comm_neighbors_count(MPI_Comm comm, int *indegree, int *outdegree, int *weighted); -int NBC_Comm_neighbors(MPI_Comm comm, int maxindegree, int sources[], int sourceweights[], int maxoutdegree, int destinations[], int destweights[]); +int NBC_Comm_neighbors_count (ompi_communicator_t *comm, int *indegree, int *outdegree); +int NBC_Comm_neighbors (ompi_communicator_t *comm, int **sources, int *source_count, int **destinations, int *dest_count); #ifdef __cplusplus } #endif - + #endif diff --git a/ompi/mca/coll/libnbc/nbc_ireduce.c b/ompi/mca/coll/libnbc/nbc_ireduce.c index 349f0230be8..cf78d073b16 100644 --- a/ompi/mca/coll/libnbc/nbc_ireduce.c +++ b/ompi/mca/coll/libnbc/nbc_ireduce.c @@ -1,214 +1,285 @@ +/* -*- Mode: C; c-basic-offset:2 ; indent-tabs-mode:nil -*- */ /* * Copyright (c) 2006 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. * Copyright (c) 2006 The Technical University of Chemnitz. All * rights reserved. - * Copyright (c) 2013 Los Alamos National Security, LLC. All rights + * Copyright (c) 2013-2015 Los Alamos National Security, LLC. All rights * reserved. - * Copyright (c) 2014 Research Organization for Information Science + * Copyright (c) 2014-2017 Research Organization for Information Science * and Technology (RIST). All rights reserved. * * Author(s): Torsten Hoefler * */ + +#include "opal/include/opal/align.h" +#include "ompi/op/op.h" + #include "nbc_internal.h" -static inline int red_sched_binomial(int rank, int p, int root, void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, MPI_Op op, void *redbuf, NBC_Schedule *schedule, NBC_Handle *handle); -static inline int red_sched_chain(int rank, int p, int root, void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, MPI_Op op, int ext, int size, NBC_Schedule *schedule, NBC_Handle *handle, int fragsize); +static inline int red_sched_binomial (int rank, int p, int root, const void *sendbuf, void *redbuf, char tmpredbuf, int count, MPI_Datatype datatype, + MPI_Op op, char inplace, NBC_Schedule *schedule, NBC_Handle *handle); +static inline int red_sched_chain (int rank, int p, int root, const void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, + MPI_Op op, int ext, size_t size, NBC_Schedule *schedule, NBC_Handle *handle, int fragsize); -static inline int red_sched_linear(int rank, int rsize, int root, void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, MPI_Op op, NBC_Schedule *schedule, NBC_Handle *handle); +static inline int red_sched_linear (int rank, int rsize, int root, const void *sendbuf, void *recvbuf, void *tmpbuf, int count, MPI_Datatype datatype, + MPI_Op op, NBC_Schedule *schedule, NBC_Handle *handle); #ifdef NBC_CACHE_SCHEDULE /* tree comparison function for schedule cache */ int NBC_Reduce_args_compare(NBC_Reduce_args *a, NBC_Reduce_args *b, void *param) { - if( (a->sendbuf == b->sendbuf) && + if ((a->sendbuf == b->sendbuf) && (a->recvbuf == b->recvbuf) && - (a->count == b->count) && + (a->count == b->count) && (a->datatype == b->datatype) && (a->op == b->op) && - (a->root == b->root) ) { - return 0; + (a->root == b->root)) { + return 0; } - if( a->sendbuf < b->sendbuf ) { + + if (a->sendbuf < b->sendbuf) { return -1; } - return +1; + + return 1; } #endif /* the non-blocking reduce */ -int ompi_coll_libnbc_ireduce(void* sendbuf, void* recvbuf, int count, MPI_Datatype datatype, +int ompi_coll_libnbc_ireduce(const void* sendbuf, void* recvbuf, int count, MPI_Datatype datatype, MPI_Op op, int root, struct ompi_communicator_t *comm, ompi_request_t ** request, struct mca_coll_base_module_2_1_0_t *module) { - int rank, p, res, segsize, size; + int rank, p, res, segsize; + size_t size; MPI_Aint ext; NBC_Schedule *schedule; char *redbuf=NULL, inplace; -#ifdef NBC_CACHE_SCHEDULE - NBC_Reduce_args *args, *found, search; -#endif + char tmpredbuf = 0; enum { NBC_RED_BINOMIAL, NBC_RED_CHAIN } alg; NBC_Handle *handle; - ompi_coll_libnbc_request_t **coll_req = (ompi_coll_libnbc_request_t**) request; ompi_coll_libnbc_module_t *libnbc_module = (ompi_coll_libnbc_module_t*) module; - + ptrdiff_t span, gap; + NBC_IN_PLACE(sendbuf, recvbuf, inplace); - - res = NBC_Init_handle(comm, coll_req, libnbc_module); - if(res != NBC_OK) { printf("Error in NBC_Init_handle(%i)\n", res); return res; } - handle = (*coll_req); - res = MPI_Comm_rank(comm, &rank); - if (MPI_SUCCESS != res) { printf("MPI Error in MPI_Comm_rank() (%i)\n", res); return res; } - res = MPI_Comm_size(comm, &p); - if (MPI_SUCCESS != res) { printf("MPI Error in MPI_Comm_size() (%i)\n", res); return res; } - res = MPI_Type_extent(datatype, &ext); - if (MPI_SUCCESS != res) { printf("MPI Error in MPI_Type_extent() (%i)\n", res); return res; } - res = MPI_Type_size(datatype, &size); - if (MPI_SUCCESS != res) { printf("MPI Error in MPI_Type_size() (%i)\n", res); return res; } - + + rank = ompi_comm_rank (comm); + p = ompi_comm_size (comm); + + res = ompi_datatype_type_extent(datatype, &ext); + if (MPI_SUCCESS != res) { + NBC_Error("MPI Error in ompi_datatype_type_extent() (%i)", res); + return res; + } + + res = ompi_datatype_type_size(datatype, &size); + if (MPI_SUCCESS != res) { + NBC_Error("MPI Error in ompi_datatype_type_size() (%i)", res); + return res; + } + /* only one node -> copy data */ - if((p == 1) && !inplace) { - res = NBC_Copy(sendbuf, count, datatype, recvbuf, count, datatype, comm); - if (NBC_OK != res) { printf("Error in NBC_Copy() (%i)\n", res); return res; } + if ((p == 1) && !inplace) { + res = NBC_Copy (sendbuf, count, datatype, recvbuf, count, datatype, comm); + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + return res; + } + + *request = &ompi_request_empty; + return OMPI_SUCCESS; + } + + res = NBC_Init_handle (comm, &handle, libnbc_module); + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + return res; } - + + span = opal_datatype_span(&datatype->super, count, &gap); + /* algorithm selection */ - if(p > 4 || size*count < 65536) { + if (p > 4 || size * count < 65536 || !ompi_op_is_commute(op)) { alg = NBC_RED_BINOMIAL; if(rank == root) { /* root reduces in receivebuffer */ - handle->tmpbuf = malloc(ext*count); + handle->tmpbuf = malloc (span); + redbuf = recvbuf; } else { /* recvbuf may not be valid on non-root nodes */ - handle->tmpbuf = malloc(ext*count*2); - redbuf = ((char*)handle->tmpbuf)+(ext*count); + ptrdiff_t span_align = OPAL_ALIGN(span, datatype->super.align, ptrdiff_t); + handle->tmpbuf = malloc (span_align + span); + redbuf = (char*)span_align - gap; + tmpredbuf = 1; } } else { - handle->tmpbuf = malloc(ext*count); + handle->tmpbuf = malloc (span); alg = NBC_RED_CHAIN; segsize = 16384/2; } - if (NULL == handle->tmpbuf) { printf("Error in malloc() (%i)\n", res); return res; } + + if (OPAL_UNLIKELY(NULL == handle->tmpbuf)) { + NBC_Return_handle (handle); + return OMPI_ERR_OUT_OF_RESOURCE; + } #ifdef NBC_CACHE_SCHEDULE + NBC_Reduce_args *args, *found, search; + /* search schedule in communicator specific tree */ - search.sendbuf=sendbuf; - search.recvbuf=recvbuf; - search.count=count; - search.datatype=datatype; - search.op=op; - search.root=root; - found = (NBC_Reduce_args*)hb_tree_search((hb_tree*)handle->comminfo->NBC_Dict[NBC_REDUCE], &search); - if(found == NULL) { + search.sendbuf = sendbuf; + search.recvbuf = recvbuf; + search.count = count; + search.datatype = datatype; + search.op = op; + search.root = root; + found = (NBC_Reduce_args *) hb_tree_search ((hb_tree *) libnbc_module->NBC_Dict[NBC_REDUCE], &search); + if (NULL == found) { #endif - schedule = (NBC_Schedule*)malloc(sizeof(NBC_Schedule)); - if (NULL == schedule) { printf("Error in malloc() (%i)\n", res); return res; } + schedule = OBJ_NEW(NBC_Schedule); + if (OPAL_UNLIKELY(NULL == schedule)) { + NBC_Return_handle (handle); + return OMPI_ERR_OUT_OF_RESOURCE; + } - res = NBC_Sched_create(schedule); - if(res != NBC_OK) { printf("Error in NBC_Sched_create (%i)\n", res); return res; } + /* make sure the schedule is released with the handle on error */ + handle->schedule = schedule; switch(alg) { case NBC_RED_BINOMIAL: - res = red_sched_binomial(rank, p, root, sendbuf, recvbuf, count, datatype, op, redbuf, schedule, handle); + res = red_sched_binomial(rank, p, root, sendbuf, redbuf, tmpredbuf, count, datatype, op, inplace, schedule, handle); break; case NBC_RED_CHAIN: res = red_sched_chain(rank, p, root, sendbuf, recvbuf, count, datatype, op, ext, size, schedule, handle, segsize); break; } - if (NBC_OK != res) { printf("Error in Schedule creation() (%i)\n", res); return res; } - + + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + NBC_Return_handle (handle); + return res; + } + res = NBC_Sched_commit(schedule); - if (NBC_OK != res) { free(handle->tmpbuf); printf("Error in NBC_Sched_commit() (%i)\n", res); return res; } + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + NBC_Return_handle (handle); + return res; + } #ifdef NBC_CACHE_SCHEDULE /* save schedule to tree */ - args = (NBC_Reduce_args*)malloc(sizeof(NBC_Alltoall_args)); - args->sendbuf=sendbuf; - args->recvbuf=recvbuf; - args->count=count; - args->datatype=datatype; - args->op=op; - args->root=root; - args->schedule=schedule; - res = hb_tree_insert ((hb_tree*)handle->comminfo->NBC_Dict[NBC_REDUCE], args, args, 0); - if(res != 0) printf("error in dict_insert() (%i)\n", res); - /* increase number of elements for Reduce */ - if(++handle->comminfo->NBC_Dict_size[NBC_REDUCE] > NBC_SCHED_DICT_UPPER) { - NBC_SchedCache_dictwipe((hb_tree*)handle->comminfo->NBC_Dict[NBC_REDUCE], &handle->comminfo->NBC_Dict_size[NBC_REDUCE]); + args = (NBC_Reduce_args *) malloc (sizeof (args)); + if (NULL != args) { + args->sendbuf = sendbuf; + args->recvbuf = recvbuf; + args->count = count; + args->datatype = datatype; + args->op = op; + args->root = root; + args->schedule = schedule; + res = hb_tree_insert ((hb_tree *) libnbc_module->NBC_Dict[NBC_REDUCE], args, args, 0); + if (0 == res) { + OBJ_RETAIN(schedule); + + /* increase number of elements for Reduce */ + if (++libnbc_module->NBC_Dict_size[NBC_REDUCE] > NBC_SCHED_DICT_UPPER) { + NBC_SchedCache_dictwipe ((hb_tree *) libnbc_module->NBC_Dict[NBC_REDUCE], + &libnbc_module->NBC_Dict_size[NBC_REDUCE]); + } + } else { + NBC_Error("error in dict_insert() (%i)", res); + free (args); + } } } else { /* found schedule */ - schedule=found->schedule; + schedule = found->schedule; + OBJ_RETAIN(schedule); } #endif - + res = NBC_Start(handle, schedule); - if (NBC_OK != res) { free(handle->tmpbuf); printf("Error in NBC_Start() (%i)\n", res); return res; } - + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + NBC_Return_handle (handle); + return res; + } + + *request = (ompi_request_t *) handle; + /* tmpbuf is freed with the handle */ - return NBC_OK; + return OMPI_SUCCESS; } -int ompi_coll_libnbc_ireduce_inter(void* sendbuf, void* recvbuf, int count, MPI_Datatype datatype, +int ompi_coll_libnbc_ireduce_inter(const void* sendbuf, void* recvbuf, int count, MPI_Datatype datatype, MPI_Op op, int root, struct ompi_communicator_t *comm, ompi_request_t ** request, struct mca_coll_base_module_2_1_0_t *module) { int rank, res, rsize; NBC_Schedule *schedule; - MPI_Aint ext; NBC_Handle *handle; - ompi_coll_libnbc_request_t **coll_req = (ompi_coll_libnbc_request_t**) request; ompi_coll_libnbc_module_t *libnbc_module = (ompi_coll_libnbc_module_t*) module; + ptrdiff_t span, gap; - res = NBC_Init_handle(comm, coll_req, libnbc_module); - if(res != NBC_OK) { printf("Error in NBC_Init_handle(%i)\n", res); return res; } - handle = (*coll_req); - - res = MPI_Comm_rank(comm, &rank); - if (MPI_SUCCESS != res) { printf("MPI Error in MPI_Comm_rank() (%i)\n", res); return res; } - res = MPI_Comm_remote_size(comm, &rsize); - if (MPI_SUCCESS != res) { printf("MPI Error in MPI_Comm_remote_size() (%i)\n", res); return res; } - res = MPI_Type_extent(datatype, &ext); - if (MPI_SUCCESS != res) { printf("MPI Error in MPI_Type_extent() (%i)\n", res); return res; } + rank = ompi_comm_rank (comm); + rsize = ompi_comm_remote_size (comm); - handle->tmpbuf = malloc(ext*count); - if (NULL == handle->tmpbuf) { printf("Error in malloc() (%i)\n", res); return res; } + res = NBC_Init_handle(comm, &handle, libnbc_module); + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + return res; + } - schedule = (NBC_Schedule*)malloc(sizeof(NBC_Schedule)); - if (NULL == schedule) { printf("Error in malloc() (%i)\n", res); return res; } + span = opal_datatype_span(&datatype->super, count, &gap); + handle->tmpbuf = malloc (span); + if (OPAL_UNLIKELY(NULL == handle->tmpbuf)) { + NBC_Return_handle (handle); + return OMPI_ERR_OUT_OF_RESOURCE; + } - res = NBC_Sched_create(schedule); - if(res != NBC_OK) { printf("Error in NBC_Sched_create (%i)\n", res); return res; } + schedule = OBJ_NEW(NBC_Schedule); + if (OPAL_UNLIKELY(NULL == schedule)) { + NBC_Return_handle (handle); + return OMPI_ERR_OUT_OF_RESOURCE; + } - res = red_sched_linear (rank, rsize, root, sendbuf, recvbuf, count, datatype, op, schedule, handle); - if (NBC_OK != res) { printf("Error in Schedule creation() (%i)\n", res); return res; } + res = red_sched_linear (rank, rsize, root, sendbuf, recvbuf, (void *)(-gap), count, datatype, op, schedule, handle); + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + NBC_Return_handle (handle); + return OMPI_ERR_OUT_OF_RESOURCE; + } res = NBC_Sched_commit(schedule); - if (NBC_OK != res) { free(handle->tmpbuf); printf("Error in NBC_Sched_commit() (%i)\n", res); return res; } + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + NBC_Return_handle (handle); + return OMPI_ERR_OUT_OF_RESOURCE; + } res = NBC_Start(handle, schedule); - if (NBC_OK != res) { free(handle->tmpbuf); printf("Error in NBC_Start() (%i)\n", res); return res; } + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + NBC_Return_handle (handle); + return OMPI_ERR_OUT_OF_RESOURCE; + } + + *request = (ompi_request_t *) handle; /* tmpbuf is freed with the handle */ - return NBC_OK; + return OMPI_SUCCESS; } /* binomial reduce + * if op is not commutative, reduce on rank 0, and then send the result to root rank + * * working principle: * - each node gets a virtual rank vrank - * - the 'root' node get vrank 0 + * - the 'root' node get vrank 0 * - node 0 gets the vrank of the 'root' * - all other ranks stay identical (they do not matter) * * Algorithm: * pairwise exchange - * round r: + * round r: * grp = rank % 2^r * if grp == 0: receive from rank + 2^(r-1) if it exists and reduce value * if grp == 1: send to rank - 2^(r-1) and exit function - * + * * do this for R=log_2(p) rounds - * + * */ #define RANK2VRANK(rank, vrank, root) \ { \ @@ -222,156 +293,235 @@ int ompi_coll_libnbc_ireduce_inter(void* sendbuf, void* recvbuf, int count, MPI_ if (vrank == 0) rank = root; \ if (vrank == root) rank = 0; \ } -static inline int red_sched_binomial(int rank, int p, int root, void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, MPI_Op op, void *redbuf, NBC_Schedule *schedule, NBC_Handle *handle) { - int firstred, vrank, vpeer, peer, res, maxr, r; - - RANK2VRANK(rank, vrank, root); +static inline int red_sched_binomial (int rank, int p, int root, const void *sendbuf, void *redbuf, char tmpredbuf, int count, MPI_Datatype datatype, + MPI_Op op, char inplace, NBC_Schedule *schedule, NBC_Handle *handle) { + int vroot, vrank, vpeer, peer, res, maxr; + char *rbuf, *lbuf, *buf, tmpbuf; + int tmprbuf, tmplbuf; + ptrdiff_t gap; + (void)opal_datatype_span(&datatype->super, count, &gap); + + if (ompi_op_is_commute(op)) { + vroot = root; + } else { + vroot = 0; + } + RANK2VRANK(rank, vrank, vroot); maxr = (int)ceil((log((double)p)/LOG2)); - firstred = 1; - for(r=1; r<=maxr; r++) { - if((vrank % (1<tmpbuf)-gap, count, datatype, MPI_COMM_SELF); + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + return res; + } + } + } + + for (int r = 1, firstred = 1 ; r <= maxr ; ++r) { + if ((vrank % (1 << r)) == 0) { /* we have to receive this round */ - vpeer = vrank + (1<<(r-1)); - VRANK2RANK(peer, vpeer, root) - if(peertmpbuf); printf("Error in NBC_Sched_recv() (%i)\n", res); return res; } + vpeer = vrank + (1 << (r - 1)); + VRANK2RANK(peer, vpeer, vroot) + if (peer < p) { /* we have to wait until we have the data */ - res = NBC_Sched_barrier(schedule); - if (NBC_OK != res) { free(handle->tmpbuf); printf("Error in NBC_Sched_barrier() (%i)\n", res); return res; } + res = NBC_Sched_recv (rbuf, tmprbuf, count, datatype, peer, schedule, true); + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + return res; + } + /* perform the reduce in my local buffer */ - if(firstred) { - if(rank == root) { - /* root is the only one who reduces in the receivebuffer - * take data from sendbuf in first round - save copy */ - res = NBC_Sched_op(recvbuf, false, sendbuf, false, 0, true, count, datatype, op, schedule); - } else { - /* all others may not have a receive buffer - * take data from sendbuf in first round - save copy */ - res = NBC_Sched_op((char *)redbuf-(unsigned long)handle->tmpbuf, true, sendbuf, false, 0, true, count, datatype, op, schedule); - } + /* this cannot be done until handle->tmpbuf is unused :-( so barrier after the op */ + if (firstred && !inplace) { + /* perform the reduce with the senbuf */ + res = NBC_Sched_op (sendbuf, false, rbuf, tmprbuf, count, datatype, op, schedule, true); firstred = 0; } else { - if(rank == root) { - /* root is the only one who reduces in the receivebuffer */ - res = NBC_Sched_op(recvbuf, false, recvbuf, false, 0, true, count, datatype, op, schedule); - } else { - /* all others may not have a receive buffer */ - res = NBC_Sched_op((char *)redbuf-(unsigned long)handle->tmpbuf, true, (char *)redbuf-(unsigned long)handle->tmpbuf, true, 0, true, count, datatype, op, schedule); - } + /* perform the reduce in my local buffer */ + res = NBC_Sched_op (lbuf, tmplbuf, rbuf, tmprbuf, count, datatype, op, schedule, true); + } + + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + return res; } - if (NBC_OK != res) { free(handle->tmpbuf); printf("Error in NBC_Sched_op() (%i)\n", res); return res; } - /* this cannot be done until handle->tmpbuf is unused :-( */ - res = NBC_Sched_barrier(schedule); - if (NBC_OK != res) { free(handle->tmpbuf); printf("Error in NBC_Sched_barrier() (%i)\n", res); return res; } + /* swap left and right buffers */ + buf = rbuf; rbuf = lbuf ; lbuf = buf; + tmpbuf = tmprbuf; tmprbuf = tmplbuf; tmplbuf = tmpbuf; } } else { /* we have to send this round */ - vpeer = vrank - (1<<(r-1)); - VRANK2RANK(peer, vpeer, root) - if(firstred) { - /* we did not reduce anything */ - res = NBC_Sched_send(sendbuf, false, count, datatype, peer, schedule); + vpeer = vrank - (1 << (r - 1)); + VRANK2RANK(peer, vpeer, vroot) + if (firstred && !inplace) { + /* we have to use the sendbuf in the first round .. */ + res = NBC_Sched_send (sendbuf, false, count, datatype, peer, schedule, false); } else { - /* we have to use the redbuf the root (which works in receivebuf) is never sending .. */ - res = NBC_Sched_send((char *)redbuf-(unsigned long)handle->tmpbuf, true, count, datatype, peer, schedule); + /* and the redbuf in all remaining rounds */ + res = NBC_Sched_send (lbuf, tmplbuf, count, datatype, peer, schedule, false); } - if (NBC_OK != res) { free(handle->tmpbuf); printf("Error in NBC_Sched_send() (%i)\n", res); return res; } + + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + return res; + } + /* leave the game */ break; } } + /* send to root if vroot ! root */ + if (vroot != root) { + if (0 == rank) { + res = NBC_Sched_send (redbuf, tmpredbuf, count, datatype, root, schedule, false); + } else if (root == rank) { + res = NBC_Sched_recv (redbuf, tmpredbuf, count, datatype, vroot, schedule, false); + } + } - return NBC_OK; + return OMPI_SUCCESS; } -/* chain send ... */ -static inline int red_sched_chain(int rank, int p, int root, void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, MPI_Op op, int ext, int size, NBC_Schedule *schedule, NBC_Handle *handle, int fragsize) { - int res, vrank, rpeer, speer, numfrag, fragnum, fragcount, thiscount; +/* chain send ... */ +static inline int red_sched_chain (int rank, int p, int root, const void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, + MPI_Op op, int ext, size_t size, NBC_Schedule *schedule, NBC_Handle *handle, int fragsize) { + int res, vrank, rpeer, speer, numfrag, fragcount, thiscount; long offset; - + RANK2VRANK(rank, vrank, root); VRANK2RANK(rpeer, vrank+1, root); VRANK2RANK(speer, vrank-1, root); - - if(count == 0) return NBC_OK; - - numfrag = count*size/fragsize; - if((count*size)%fragsize != 0) numfrag++; - fragcount = count/numfrag; - /*printf("numfrag: %i, count: %i, size: %i, fragcount: %i\n", numfrag, count, size, fragcount);*/ - - for(fragnum = 0; fragnum < numfrag; fragnum++) { - offset = fragnum*fragcount*ext; + + if (0 == count) { + return OMPI_SUCCESS; + } + + numfrag = count * size / fragsize; + if ((count * size) % fragsize != 0) { + numfrag++; + } + + fragcount = count / numfrag; + + for (int fragnum = 0 ; fragnum < numfrag ; ++fragnum) { + offset = fragnum * fragcount * ext; thiscount = fragcount; - if(fragnum == numfrag-1) { + if(fragnum == numfrag - 1) { /* last fragment may not be full */ - thiscount = count-fragcount*fragnum; + thiscount = count - fragcount * fragnum; } /* last node does not recv */ - if(vrank != p-1) { - res = NBC_Sched_recv((char*)offset, true, thiscount, datatype, rpeer, schedule); - if (NBC_OK != res) { printf("Error in NBC_Sched_recv() (%i)\n", res); return res; } - res = NBC_Sched_barrier(schedule); + if (vrank != p-1) { + if (vrank == 0 && sendbuf != recvbuf) { + res = NBC_Sched_recv ((char *)recvbuf+offset, false, thiscount, datatype, rpeer, schedule, true); + } else { + res = NBC_Sched_recv ((char *)offset, true, thiscount, datatype, rpeer, schedule, true); + } + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + return res; + } + /* root reduces into receivebuf */ if(vrank == 0) { - res = NBC_Sched_op((char*)recvbuf+offset, false, (char*)sendbuf+offset, false, (char*)offset, true, thiscount, datatype, op, schedule); + if (sendbuf != recvbuf) { + res = NBC_Sched_op ((char *) sendbuf + offset, false, (char *) recvbuf + offset, false, + thiscount, datatype, op, schedule, true); + } else { + res = NBC_Sched_op ((char *)offset, true, (char *) recvbuf + offset, false, + thiscount, datatype, op, schedule, true); + } } else { - res = NBC_Sched_op((char*)offset, true, (char*)sendbuf+offset, false, (char*)offset, true, thiscount, datatype, op, schedule); + res = NBC_Sched_op ((char *) sendbuf + offset, false, (char *) offset, true, thiscount, + datatype, op, schedule, true); + } + + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + return res; } - res = NBC_Sched_barrier(schedule); } /* root does not send */ - if(vrank != 0) { + if (vrank != 0) { /* rank p-1 has to send out of sendbuffer :) */ - if(vrank == p-1) { - res = NBC_Sched_send((char*)sendbuf+offset, false, thiscount, datatype, speer, schedule); + /* the barrier here seems awkward but isn't!!!! */ + if (vrank == p-1) { + res = NBC_Sched_send ((char *) sendbuf + offset, false, thiscount, datatype, speer, schedule, true); } else { - res = NBC_Sched_send((char*)offset, true, thiscount, datatype, speer, schedule); + res = NBC_Sched_send ((char *) offset, true, thiscount, datatype, speer, schedule, true); + } + + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + return res; } - if (NBC_OK != res) { printf("Error in NBC_Sched_send() (%i)\n", res); return res; } - /* this barrier here seems awkward but isn't!!!! */ - res = NBC_Sched_barrier(schedule); } } - return NBC_OK; + return OMPI_SUCCESS; } /* simple linear algorithm for intercommunicators */ -static inline int red_sched_linear(int rank, int rsize, int root, void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, MPI_Op op, NBC_Schedule *schedule, NBC_Handle *handle) { - int res, peer; - - if(count == 0) return NBC_OK; +static inline int red_sched_linear (int rank, int rsize, int root, const void *sendbuf, void *recvbuf, void *tmpbuf, int count, MPI_Datatype datatype, + MPI_Op op, NBC_Schedule *schedule, NBC_Handle *handle) { + int res; + char *rbuf, *lbuf, *buf; + int tmprbuf, tmplbuf; + + if (0 == count) { + return OMPI_SUCCESS; + } if (MPI_ROOT == root) { - res = NBC_Sched_recv (recvbuf, false, count, datatype, 0, schedule); - if (NBC_OK != res) { printf("Error in NBC_Sched_recv() (%i)\n", res); return res; } - - res = NBC_Sched_barrier (schedule); - if (NBC_OK != res) { printf("Error in NBC_Sched_barrier() (%i)\n", res); return res; } - - for (peer = 1 ; peer < rsize ; ++peer) { - res = NBC_Sched_recv (0, true, count, datatype, peer, schedule); - if (NBC_OK != res) { printf("Error in NBC_Sched_recv() (%i)\n", res); return res; } - - res = NBC_Sched_barrier(schedule); - if (NBC_OK != res) { printf("Error in NBC_Sched_barrier() (%i)\n", res); return res; } + /* ensure the result ends up in recvbuf */ + if (0 == (rsize%2)) { + lbuf = tmpbuf; + tmplbuf = true; + rbuf = recvbuf; + tmprbuf = false; + } else { + rbuf = tmpbuf; + tmprbuf = true; + lbuf = recvbuf; + tmplbuf = false; + } - res = NBC_Sched_op (recvbuf, false, 0, true, recvbuf, false, count, datatype, op, schedule); - if (NBC_OK != res) { printf("Error in NBC_Sched_op() (%i)\n", res); return res; } + res = NBC_Sched_recv (lbuf, tmplbuf, count, datatype, 0, schedule, false); + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + return res; + } - res = NBC_Sched_barrier(schedule); - if (NBC_OK != res) { printf("Error in NBC_Sched_barrier() (%i)\n", res); return res; } + for (int peer = 1 ; peer < rsize ; ++peer) { + res = NBC_Sched_recv (rbuf, tmprbuf, count, datatype, peer, schedule, true); + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + return res; + } + res = NBC_Sched_op (lbuf, tmplbuf, rbuf, tmprbuf, count, datatype, op, schedule, true); + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + return res; + } + /* swap left and right buffers */ + buf = rbuf; rbuf = lbuf ; lbuf = buf; + tmprbuf ^= 1; tmplbuf ^= 1; } } else if (MPI_PROC_NULL != root) { - res = NBC_Sched_send (sendbuf, false, count, datatype, root, schedule); - if (NBC_OK != res) { printf("Error in NBC_Sched_send() (%i)\n", res); return res; } + res = NBC_Sched_send (sendbuf, false, count, datatype, root, schedule, true); + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + return res; + } } - return NBC_OK; + return OMPI_SUCCESS; } diff --git a/ompi/mca/coll/libnbc/nbc_ireduce_scatter.c b/ompi/mca/coll/libnbc/nbc_ireduce_scatter.c index fe7f280d825..58ce5334191 100644 --- a/ompi/mca/coll/libnbc/nbc_ireduce_scatter.c +++ b/ompi/mca/coll/libnbc/nbc_ireduce_scatter.c @@ -1,12 +1,13 @@ +/* -*- Mode: C; c-basic-offset:2 ; indent-tabs-mode:nil -*- */ /* * Copyright (c) 2006 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. * Copyright (c) 2006 The Technical University of Chemnitz. All * rights reserved. - * Copyright (c) 2013 Los Alamos National Security, LLC. All rights + * Copyright (c) 2013-2015 Los Alamos National Security, LLC. All rights * reserved. - * Copyright (c) 2014-2015 Research Organization for Information Science + * Copyright (c) 2014-2016 Research Organization for Information Science * and Technology (RIST). All rights reserved. * Copyright (c) 2015 The University of Tennessee and The University * of Tennessee Research Foundation. All rights @@ -15,6 +16,8 @@ * Author(s): Torsten Hoefler * */ +#include "opal/include/opal/align.h" + #include "nbc_internal.h" /* an reduce_csttare schedule can not be cached easily because the contents @@ -25,232 +28,305 @@ * * Algorithm: * pairwise exchange - * round r: + * round r: * grp = rank % 2^r * if grp == 0: receive from rank + 2^(r-1) if it exists and reduce value * if grp == 1: send to rank - 2^(r-1) and exit function - * + * * do this for R=log_2(p) rounds - * + * */ -int ompi_coll_libnbc_ireduce_scatter(void* sendbuf, void* recvbuf, int *recvcounts, MPI_Datatype datatype, +int ompi_coll_libnbc_ireduce_scatter(const void* sendbuf, void* recvbuf, const int *recvcounts, MPI_Datatype datatype, MPI_Op op, struct ompi_communicator_t *comm, ompi_request_t ** request, struct mca_coll_base_module_2_1_0_t *module) { - int peer, rank, maxr, p, r, res, count, offset, firstred; + int peer, rank, maxr, p, res, count; MPI_Aint ext; - char *redbuf, *sbuf, inplace; + ptrdiff_t gap, span, span_align; + char *sbuf, inplace; NBC_Schedule *schedule; NBC_Handle *handle; - ompi_coll_libnbc_request_t **coll_req = (ompi_coll_libnbc_request_t**) request; ompi_coll_libnbc_module_t *libnbc_module = (ompi_coll_libnbc_module_t*) module; - + char *rbuf, *lbuf, *buf; + NBC_IN_PLACE(sendbuf, recvbuf, inplace); - res = NBC_Init_handle(comm, coll_req, libnbc_module); - if(res != NBC_OK) { printf("Error in NBC_Init_handle(%i)\n", res); return res; } - res = MPI_Comm_size(comm, &p); - if (MPI_SUCCESS != res) { printf("MPI Error in MPI_Comm_size() (%i)\n", res); return res; } + rank = ompi_comm_rank (comm); + p = ompi_comm_size (comm); - if(p==1) { - if(!inplace) { + res = ompi_datatype_type_extent (datatype, &ext); + if (MPI_SUCCESS != res) { + NBC_Error("MPI Error in ompi_datatype_type_extent() (%i)", res); + return res; + } + + count = 0; + for (int r = 0 ; r < p ; ++r) { + count += recvcounts[r]; + } + + if (p == 1 || 0 == count) { + if (!inplace) { /* single node not in_place: copy data to recvbuf */ res = NBC_Copy(sendbuf, recvcounts[0], datatype, recvbuf, recvcounts[0], datatype, comm); - if (NBC_OK != res) { printf("Error in NBC_Copy() (%i)\n", res); return res; } + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + return res; + } } - /* manually complete the request */ - (*request)->req_status.MPI_ERROR = OMPI_SUCCESS; - OPAL_THREAD_LOCK(&ompi_request_lock); - ompi_request_complete(*request, true); - OPAL_THREAD_UNLOCK(&ompi_request_lock); - return NBC_OK; + + *request = &ompi_request_empty; + return OMPI_SUCCESS; } - handle = (*coll_req); + res = NBC_Init_handle (comm, &handle, libnbc_module); + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + return res; + } - res = MPI_Comm_rank(comm, &rank); - if (MPI_SUCCESS != res) { printf("MPI Error in MPI_Comm_rank() (%i)\n", res); return res; } - res = MPI_Type_extent(datatype, &ext); - if (MPI_SUCCESS != res) { printf("MPI Error in MPI_Type_extent() (%i)\n", res); return res; } - - schedule = (NBC_Schedule*)malloc(sizeof(NBC_Schedule)); - if (NULL == schedule) { printf("Error in malloc()\n"); return NBC_OOR; } + maxr = (int) ceil ((log((double) p) / LOG2)); - res = NBC_Sched_create(schedule); - if(res != NBC_OK) { printf("Error in NBC_Sched_create (%i)\n", res); return res; } + span = opal_datatype_span(&datatype->super, count, &gap); + span_align = OPAL_ALIGN(span, datatype->super.align, ptrdiff_t); + handle->tmpbuf = malloc (span_align + span); + if (OPAL_UNLIKELY(NULL == handle->tmpbuf)) { + NBC_Return_handle (handle); + return OMPI_ERR_OUT_OF_RESOURCE; + } - maxr = (int)ceil((log((double)p)/LOG2)); + rbuf = (char *)(-gap); + lbuf = (char *)(span_align - gap); - count = 0; - for(r=0;rtmpbuf = malloc(ext*count*2); - if(handle->tmpbuf == NULL) { printf("Error in malloc()\n"); return NBC_OOR; } + schedule = OBJ_NEW(NBC_Schedule); + if (OPAL_UNLIKELY(NULL == schedule)) { + NBC_Return_handle (handle); + return OMPI_ERR_OUT_OF_RESOURCE; + } - redbuf = ((char*)handle->tmpbuf)+(ext*count); + /* make sure the schedule is released with the handle on error */ + handle->schedule = schedule; - firstred = 1; - for(r=1; r<=maxr; r++) { - if((rank % (1<tmpbuf); printf("Error in NBC_Sched_recv() (%i)\n", res); return res; } + peer = rank + (1 << (r - 1)); + if (peer < p) { /* we have to wait until we have the data */ - res = NBC_Sched_barrier(schedule); - if (NBC_OK != res) { free(handle->tmpbuf); printf("Error in NBC_Sched_barrier() (%i)\n", res); return res; } - if(firstred) { + res = NBC_Sched_recv(rbuf, true, count, datatype, peer, schedule, true); + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + NBC_Return_handle (handle); + return res; + } + + /* this cannot be done until handle->tmpbuf is unused :-( so barrier after the op */ + if (firstred) { /* take reduce data from the sendbuf in the first round -> save copy */ - res = NBC_Sched_op(redbuf-(unsigned long)handle->tmpbuf, true, sendbuf, false, 0, true, count, datatype, op, schedule); + res = NBC_Sched_op (sendbuf, false, rbuf, true, count, datatype, op, schedule, true); firstred = 0; } else { /* perform the reduce in my local buffer */ - res = NBC_Sched_op(redbuf-(unsigned long)handle->tmpbuf, true, redbuf-(unsigned long)handle->tmpbuf, true, 0, true, count, datatype, op, schedule); + res = NBC_Sched_op (lbuf, true, rbuf, true, count, datatype, op, schedule, true); + } + + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + NBC_Return_handle (handle); + return res; } - if (NBC_OK != res) { free(handle->tmpbuf); printf("Error in NBC_Sched_op() (%i)\n", res); return res; } - /* this cannot be done until handle->tmpbuf is unused :-( */ - res = NBC_Sched_barrier(schedule); - if (NBC_OK != res) { free(handle->tmpbuf); printf("Error in NBC_Sched_barrier() (%i)\n", res); return res; } + /* swap left and right buffers */ + buf = rbuf; rbuf = lbuf ; lbuf = buf; } } else { /* we have to send this round */ - peer = rank - (1<<(r-1)); - if(firstred) { + peer = rank - (1 << (r - 1)); + if (firstred) { /* we have to send the senbuf */ - res = NBC_Sched_send(sendbuf, false, count, datatype, peer, schedule); + res = NBC_Sched_send (sendbuf, false, count, datatype, peer, schedule, false); } else { - /* we send an already reduced value from redbuf */ - res = NBC_Sched_send(redbuf-(unsigned long)handle->tmpbuf, true, count, datatype, peer, schedule); + /* we send an already reduced value from lbuf */ + res = NBC_Sched_send (lbuf, true, count, datatype, peer, schedule, false); } - if (NBC_OK != res) { free(handle->tmpbuf); printf("Error in NBC_Sched_send() (%i)\n", res); return res; } + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + NBC_Return_handle (handle); + return res; + } + /* leave the game */ break; } } - - res = NBC_Sched_barrier(schedule); - if (NBC_OK != res) { free(handle->tmpbuf); printf("Error in NBC_Sched_barrier() (%i)\n", res); return res; } - /* rank 0 is root and sends - all others receive */ - if(rank != 0) { - res = NBC_Sched_recv(recvbuf, false, recvcounts[rank], datatype, 0, schedule); - if (NBC_OK != res) { free(handle->tmpbuf); printf("Error in NBC_Sched_recv() (%i)\n", res); return res; } + res = NBC_Sched_barrier(schedule); + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + NBC_Return_handle (handle); + return res; } - if(rank == 0) { - offset = 0; - for(r=1;rtmpbuf, true, recvcounts[r], datatype, r, schedule); - if (NBC_OK != res) { free(handle->tmpbuf); printf("Error in NBC_Sched_send() (%i)\n", res); return res; } + res = NBC_Sched_send (sbuf, true, recvcounts[r], datatype, r, schedule, + false); + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + NBC_Return_handle (handle); + return res; + } } - res = NBC_Sched_copy(redbuf-(unsigned long)handle->tmpbuf, true, recvcounts[0], datatype, recvbuf, false, recvcounts[0], datatype, schedule); - if (NBC_OK != res) { free(handle->tmpbuf); printf("Error in NBC_Sched_copy() (%i)\n", res); return res; } + + res = NBC_Sched_copy (lbuf, true, recvcounts[0], datatype, recvbuf, false, + recvcounts[0], datatype, schedule, false); + } else { + res = NBC_Sched_recv (recvbuf, false, recvcounts[rank], datatype, 0, schedule, false); + } + + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + NBC_Return_handle (handle); + return res; } - /*NBC_PRINT_SCHED(*schedule);*/ - - res = NBC_Sched_commit(schedule); - if (NBC_OK != res) { free(handle->tmpbuf); printf("Error in NBC_Sched_commit() (%i)\n", res); return res; } - - res = NBC_Start(handle, schedule); - if (NBC_OK != res) { free(handle->tmpbuf); printf("Error in NBC_Start() (%i)\n", res); return res; } - + res = NBC_Sched_commit (schedule); + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + NBC_Return_handle (handle); + return res; + } + + res = NBC_Start (handle, schedule); + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + NBC_Return_handle (handle); + return res; + } + + *request = (ompi_request_t *) handle; + /* tmpbuf is freed with the handle */ - return NBC_OK; + return OMPI_SUCCESS; } -int ompi_coll_libnbc_ireduce_scatter_inter(void* sendbuf, void* recvbuf, int *recvcounts, MPI_Datatype datatype, - MPI_Op op, struct ompi_communicator_t *comm, ompi_request_t ** request, - struct mca_coll_base_module_2_1_0_t *module) { - int peer, rank, r, res, count, rsize, offset; +int ompi_coll_libnbc_ireduce_scatter_inter (const void* sendbuf, void* recvbuf, const int *recvcounts, MPI_Datatype datatype, + MPI_Op op, struct ompi_communicator_t *comm, ompi_request_t ** request, + struct mca_coll_base_module_2_1_0_t *module) { + int rank, res, count, lsize, rsize; MPI_Aint ext; + ptrdiff_t gap, span, span_align; NBC_Schedule *schedule; NBC_Handle *handle; - ompi_coll_libnbc_request_t **coll_req = (ompi_coll_libnbc_request_t**) request; ompi_coll_libnbc_module_t *libnbc_module = (ompi_coll_libnbc_module_t*) module; - res = NBC_Init_handle(comm, coll_req, libnbc_module); - if(res != NBC_OK) { printf("Error in NBC_Init_handle(%i)\n", res); return res; } - handle = (*coll_req); - res = MPI_Comm_rank(comm, &rank); - if (MPI_SUCCESS != res) { printf("MPI Error in MPI_Comm_rank() (%i)\n", res); return res; } - res = MPI_Comm_remote_size(comm, &rsize); - if (MPI_SUCCESS != res) { printf("MPI Error in MPI_Comm_remote_size() (%i)\n", res); return res; } - res = MPI_Type_extent(datatype, &ext); - if (MPI_SUCCESS != res) { printf("MPI Error in MPI_Type_extent() (%i)\n", res); return res; } - - schedule = (NBC_Schedule*)malloc(sizeof(NBC_Schedule)); - if (NULL == schedule) { printf("Error in malloc()\n"); return NBC_OOR; } + rank = ompi_comm_rank (comm); + lsize = ompi_comm_size(comm); + rsize = ompi_comm_remote_size (comm); - res = NBC_Sched_create(schedule); - if(res != NBC_OK) { printf("Error in NBC_Sched_create (%i)\n", res); return res; } + res = ompi_datatype_type_extent (datatype, &ext); + if (MPI_SUCCESS != res) { + NBC_Error("MPI Error in ompi_datatype_type_extent() (%i)", res); + return res; + } count = 0; - for (r = 0 ; r < rsize ; ++r) count += recvcounts[r]; - - handle->tmpbuf = malloc(2 * ext * count); - if(handle->tmpbuf == NULL) { printf("Error in malloc()\n"); return NBC_OOR; } + for (int r = 0 ; r < lsize ; ++r) { + count += recvcounts[r]; + } - /* send my data to the remote root */ - res = NBC_Sched_send(sendbuf, false, count, datatype, 0, schedule); - if (NBC_OK != res) { printf("Error in NBC_Sched_send() (%i)\n", res); return res; } + span = opal_datatype_span(&datatype->super, count, &gap); + span_align = OPAL_ALIGN(span, datatype->super.align, ptrdiff_t); - if (0 == rank) { - res = NBC_Sched_recv((void *) 0, true, count, datatype, 0, schedule); - if (NBC_OK != res) { free(handle->tmpbuf); printf("Error in NBC_Sched_recv() (%i)\n", res); return res; } - - res = NBC_Sched_barrier(schedule); - if (NBC_OK != res) { free(handle->tmpbuf); printf("Error in NBC_Sched_barrier() (%i)\n", res); return res; } + res = NBC_Init_handle(comm, &handle, libnbc_module); + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + return res; + } - for (peer = 1 ; peer < rsize ; ++peer) { - res = NBC_Sched_recv((void *)(ext * count), true, count, datatype, peer, schedule); - if (NBC_OK != res) { free(handle->tmpbuf); printf("Error in NBC_Sched_recv() (%i)\n", res); return res; } + if (count > 0) { + handle->tmpbuf = malloc (span_align + span); + if (OPAL_UNLIKELY(NULL == handle->tmpbuf)) { + NBC_Return_handle (handle); + return OMPI_ERR_OUT_OF_RESOURCE; + } + } - res = NBC_Sched_barrier(schedule); - if (NBC_OK != res) { printf("Error in NBC_Sched_barrier() (%i)\n", res); return res; } + schedule = OBJ_NEW(NBC_Schedule); + if (OPAL_UNLIKELY(NULL == schedule)) { + NBC_Return_handle (handle); + return OMPI_ERR_OUT_OF_RESOURCE; + } - res = NBC_Sched_op((void *) 0, true, (void *)(ext * count), true, (void *) 0, true, count, datatype, op, schedule); - if (NBC_OK != res) { free(handle->tmpbuf); printf("Error in NBC_Sched_op() (%i)\n", res); return res; } + /* make sure the schedule is released with the handle on error */ + handle->schedule = schedule; - res = NBC_Sched_barrier(schedule); - if (NBC_OK != res) { printf("Error in NBC_Sched_barrier() (%i)\n", res); return res; } + /* send my data to the remote root */ + res = NBC_Sched_send(sendbuf, false, count, datatype, 0, schedule, false); + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + NBC_Return_handle (handle); + return res; + } + if (0 == rank) { + char *lbuf, *rbuf; + lbuf = (char *)(-gap); + rbuf = (char *)(span_align-gap); + res = NBC_Sched_recv (lbuf, true, count, datatype, 0, schedule, true); + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + NBC_Return_handle (handle); + return res; } - /* exchange data with remote root for scatter phase (we *could* use the local communicator to do the scatter) */ - res = NBC_Sched_recv((void *)(ext * count), true, count, datatype, 0, schedule); - if (NBC_OK != res) { free(handle->tmpbuf); printf("Error in NBC_Sched_recv() (%i)\n", res); return res; } + for (int peer = 1 ; peer < rsize ; ++peer) { + char *tbuf; + res = NBC_Sched_recv (rbuf, true, count, datatype, peer, schedule, true); + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + NBC_Return_handle (handle); + return res; + } - res = NBC_Sched_send((void *) 0, true, count, datatype, 0, schedule); - if (NBC_OK != res) { printf("Error in NBC_Sched_send() (%i)\n", res); return res; } + res = NBC_Sched_op (lbuf, true, rbuf, true, count, datatype, + op, schedule, true); + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + NBC_Return_handle (handle); + return res; + } + tbuf = lbuf; lbuf = rbuf; rbuf = tbuf; + } - res = NBC_Sched_barrier(schedule); - if (NBC_OK != res) { printf("Error in NBC_Sched_barrier() (%i)\n", res); return res; } + /* do the local scatterv with the local communicator */ + res = NBC_Sched_copy (lbuf, true, recvcounts[0], datatype, recvbuf, false, + recvcounts[0], datatype, schedule, false); + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + NBC_Return_handle (handle); + return res; + } + for (int peer = 1, offset = recvcounts[0] * ext; peer < lsize ; ++peer) { + res = NBC_Sched_local_send (lbuf + offset, true, recvcounts[peer], datatype, peer, schedule, + false); + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + NBC_Return_handle (handle); + return res; + } - /* scatter */ - for (peer = 0, offset = ext * count ; peer < rsize ; ++peer) { - res = NBC_Sched_send((void *)(uintptr_t) offset, true, recvcounts[peer], datatype, peer, schedule); - if (NBC_OK != res) { printf("Error in NBC_Sched_send() (%i)\n", res); return res; } offset += recvcounts[peer] * ext; } + } else { + /* receive my block */ + res = NBC_Sched_local_recv (recvbuf, false, recvcounts[rank], datatype, 0, schedule, false); + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + NBC_Return_handle (handle); + return res; + } } - /* receive my block */ - res = NBC_Sched_recv(recvbuf, false, recvcounts[rank], datatype, 0, schedule); - if (NBC_OK != res) { free(handle->tmpbuf); printf("Error in NBC_Sched_recv() (%i)\n", res); return res; } - - /*NBC_PRINT_SCHED(*schedule);*/ + res = NBC_Sched_commit (schedule); + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + NBC_Return_handle (handle); + return res; + } - res = NBC_Sched_commit(schedule); - if (NBC_OK != res) { free(handle->tmpbuf); printf("Error in NBC_Sched_commit() (%i)\n", res); return res; } + res = NBC_Start (handle, schedule); + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + NBC_Return_handle (handle); + return res; + } - res = NBC_Start(handle, schedule); - if (NBC_OK != res) { free(handle->tmpbuf); printf("Error in NBC_Start() (%i)\n", res); return res; } + *request = (ompi_request_t *) handle; /* tmpbuf is freed with the handle */ - return NBC_OK; + return OMPI_SUCCESS; } diff --git a/ompi/mca/coll/libnbc/nbc_ireduce_scatter_block.c b/ompi/mca/coll/libnbc/nbc_ireduce_scatter_block.c new file mode 100644 index 00000000000..f05c9d5cb09 --- /dev/null +++ b/ompi/mca/coll/libnbc/nbc_ireduce_scatter_block.c @@ -0,0 +1,330 @@ +/* -*- Mode: C; c-basic-offset:2 ; indent-tabs-mode:nil -*- */ +/* + * Copyright (c) 2006 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2006 The Technical University of Chemnitz. All + * rights reserved. + * Copyright (c) 2012 Sandia National Laboratories. All rights reserved. + * Copyright (c) 2013-2015 Los Alamos National Security, LLC. All rights + * reserved. + * Copyright (c) 2014-2016 Research Organization for Information Science + * and Technology (RIST). All rights reserved. + * + * Author(s): Torsten Hoefler + * + */ +#include "opal/include/opal/align.h" + +#include "nbc_internal.h" + +/* an reduce_csttare schedule can not be cached easily because the contents + * ot the recvcount value may change, so a comparison of the address + * would not be sufficient ... we simply do not cache it */ + +/* binomial reduce to rank 0 followed by a linear scatter ... + * + * Algorithm: + * pairwise exchange + * round r: + * grp = rank % 2^r + * if grp == 0: receive from rank + 2^(r-1) if it exists and reduce value + * if grp == 1: send to rank - 2^(r-1) and exit function + * + * do this for R=log_2(p) rounds + * + */ + +int ompi_coll_libnbc_ireduce_scatter_block(const void* sendbuf, void* recvbuf, int recvcount, MPI_Datatype datatype, + MPI_Op op, struct ompi_communicator_t *comm, ompi_request_t ** request, + struct mca_coll_base_module_2_1_0_t *module) { + int peer, rank, maxr, p, res, count; + MPI_Aint ext; + ptrdiff_t gap, span; + char *redbuf, *sbuf, inplace; + NBC_Schedule *schedule; + NBC_Handle *handle; + ompi_coll_libnbc_module_t *libnbc_module = (ompi_coll_libnbc_module_t*) module; + + NBC_IN_PLACE(sendbuf, recvbuf, inplace); + + rank = ompi_comm_rank (comm); + p = ompi_comm_size (comm); + + res = ompi_datatype_type_extent(datatype, &ext); + if (MPI_SUCCESS != res || 0 == ext) { + NBC_Error ("MPI Error in ompi_datatype_type_extent() (%i:%i)", res, (int) ext); + return (MPI_SUCCESS == res) ? MPI_ERR_SIZE : res; + } + + res = NBC_Init_handle(comm, &handle, libnbc_module); + if (OMPI_SUCCESS != res) { + return res; + } + + schedule = OBJ_NEW(NBC_Schedule); + if (NULL == schedule) { + OMPI_COLL_LIBNBC_REQUEST_RETURN(handle); + return OMPI_ERR_OUT_OF_RESOURCE; + } + + /* make sure the schedule is released with the handle on error */ + handle->schedule = schedule; + + maxr = (int)ceil((log((double)p)/LOG2)); + + count = p * recvcount; + + if (0 < count) { + char *rbuf, *lbuf, *buf; + ptrdiff_t span_align; + + span = opal_datatype_span(&datatype->super, count, &gap); + span_align = OPAL_ALIGN(span, datatype->super.align, ptrdiff_t); + handle->tmpbuf = malloc (span_align + span); + if (NULL == handle->tmpbuf) { + OMPI_COLL_LIBNBC_REQUEST_RETURN(handle); + OBJ_RELEASE(schedule); + return OMPI_ERR_OUT_OF_RESOURCE; + } + + rbuf = (void *)(-gap); + lbuf = (char *)(span_align - gap); + redbuf = (char *) handle->tmpbuf + span_align - gap; + + /* copy data to redbuf if we only have a single node */ + if ((p == 1) && !inplace) { + res = NBC_Copy (sendbuf, count, datatype, redbuf, count, datatype, comm); + if (OMPI_SUCCESS != res) { + NBC_Return_handle (handle); + OBJ_RELEASE(schedule); + return res; + } + } + + for (int r = 1, firstred = 1 ; r <= maxr; ++r) { + if ((rank % (1 << r)) == 0) { + /* we have to receive this round */ + peer = rank + (1 << (r - 1)); + if (peer < p) { + /* we have to wait until we have the data */ + res = NBC_Sched_recv (rbuf, true, count, datatype, peer, schedule, true); + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + NBC_Return_handle (handle); + return res; + } + + if (firstred) { + /* take reduce data from the sendbuf in the first round -> save copy */ + res = NBC_Sched_op (sendbuf, false, rbuf, true, count, datatype, op, schedule, true); + firstred = 0; + } else { + /* perform the reduce in my local buffer */ + res = NBC_Sched_op (lbuf, true, rbuf, true, count, datatype, op, schedule, true); + } + + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + NBC_Return_handle (handle); + return res; + } + /* swap left and right buffers */ + buf = rbuf; rbuf = lbuf ; lbuf = buf; + } + } else { + /* we have to send this round */ + peer = rank - (1 << (r - 1)); + if(firstred) { + /* we have to send the senbuf */ + res = NBC_Sched_send (sendbuf, false, count, datatype, peer, schedule, false); + } else { + /* we send an already reduced value from redbuf */ + res = NBC_Sched_send (lbuf, true, count, datatype, peer, schedule, false); + } + + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + NBC_Return_handle (handle); + return res; + } + + /* leave the game */ + break; + } + } + + res = NBC_Sched_barrier(schedule); + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + NBC_Return_handle (handle); + return res; + } + + /* rank 0 is root and sends - all others receive */ + if (rank != 0) { + res = NBC_Sched_recv (recvbuf, false, recvcount, datatype, 0, schedule, false); + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + NBC_Return_handle (handle); + return res; + } + } else { + for (int r = 1, offset = 0 ; r < p ; ++r) { + offset += recvcount; + sbuf = lbuf + (offset*ext); + /* root sends the right buffer to the right receiver */ + res = NBC_Sched_send (sbuf, true, recvcount, datatype, r, schedule, false); + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + NBC_Return_handle (handle); + return res; + } + } + + if ((p != 1) || !inplace) { + res = NBC_Sched_copy (lbuf, true, recvcount, datatype, recvbuf, false, recvcount, + datatype, schedule, false); + } + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + NBC_Return_handle (handle); + return res; + } + } + } + + res = NBC_Sched_commit (schedule); + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + NBC_Return_handle (handle); + return res; + } + + res = NBC_Start (handle, schedule); + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + NBC_Return_handle (handle); + return res; + } + + *request = (ompi_request_t *) handle; + + /* tmpbuf is freed with the handle */ + return OMPI_SUCCESS; +} + +int ompi_coll_libnbc_ireduce_scatter_block_inter(const void *sendbuf, void *recvbuf, int rcount, struct ompi_datatype_t *dtype, + struct ompi_op_t *op, struct ompi_communicator_t *comm, + ompi_request_t **request, struct mca_coll_base_module_2_1_0_t *module) { + int rank, res, count, lsize, rsize; + MPI_Aint ext; + ptrdiff_t gap, span, span_align; + NBC_Schedule *schedule; + NBC_Handle *handle; + ompi_coll_libnbc_module_t *libnbc_module = (ompi_coll_libnbc_module_t*) module; + + rank = ompi_comm_rank (comm); + lsize = ompi_comm_size (comm); + rsize = ompi_comm_remote_size (comm); + + res = ompi_datatype_type_extent (dtype, &ext); + if (MPI_SUCCESS != res) { + NBC_Error ("MPI Error in ompi_datatype_type_extent() (%i)", res); + return res; + } + + res = NBC_Init_handle(comm, &handle, libnbc_module); + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + return res; + } + + count = rcount * lsize; + + span = opal_datatype_span(&dtype->super, count, &gap); + span_align = OPAL_ALIGN(span, dtype->super.align, ptrdiff_t); + + if (count > 0) { + handle->tmpbuf = malloc (span_align + span); + if (NULL == handle->tmpbuf) { + NBC_Return_handle (handle); + return OMPI_ERR_OUT_OF_RESOURCE; + } + } + + schedule = OBJ_NEW(NBC_Schedule); + if (NULL == schedule) { + NBC_Return_handle (handle); + return OMPI_ERR_OUT_OF_RESOURCE; + } + + /* make sure the schedule is released with the handle on error */ + handle->schedule = schedule; + + /* send my data to the remote root */ + res = NBC_Sched_send (sendbuf, false, count, dtype, 0, schedule, false); + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + NBC_Return_handle (handle); + return res; + } + + if (0 == rank) { + char *lbuf, *rbuf; + lbuf = (char *)(-gap); + rbuf = (char *)(span_align-gap); + res = NBC_Sched_recv (lbuf, true, count, dtype, 0, schedule, true); + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + NBC_Return_handle (handle); + return res; + } + + for (int peer = 1 ; peer < rsize ; ++peer) { + char *tbuf; + res = NBC_Sched_recv (rbuf, true, count, dtype, peer, schedule, true); + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + NBC_Return_handle (handle); + return res; + } + + res = NBC_Sched_op (lbuf, true, rbuf, true, count, dtype, + op, schedule, true); + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + NBC_Return_handle (handle); + return res; + } + tbuf = lbuf; lbuf = rbuf; rbuf = tbuf; + } + + /* do the scatter with the local communicator */ + res = NBC_Sched_copy (lbuf, true, rcount, dtype, recvbuf, false, rcount, + dtype, schedule, false); + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + NBC_Return_handle (handle); + return res; + } + for (int peer = 1 ; peer < lsize ; ++peer) { + res = NBC_Sched_local_send (lbuf + ext * rcount * peer, true, rcount, dtype, peer, schedule, false); + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + NBC_Return_handle (handle); + return res; + } + } + } else { + /* receive my block */ + res = NBC_Sched_local_recv(recvbuf, false, rcount, dtype, 0, schedule, false); + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + NBC_Return_handle (handle); + return res; + } + } + + /*NBC_PRINT_SCHED(*schedule);*/ + + res = NBC_Sched_commit(schedule); + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + NBC_Return_handle (handle); + return res; + } + + res = NBC_Start(handle, schedule); + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + NBC_Return_handle (handle); + return res; + } + + *request = (ompi_request_t *) handle; + + /* tmpbuf is freed with the handle */ + return OMPI_SUCCESS; +} diff --git a/ompi/mca/coll/libnbc/nbc_iscan.c b/ompi/mca/coll/libnbc/nbc_iscan.c index c87013a562f..5b8b0bbdc1d 100644 --- a/ompi/mca/coll/libnbc/nbc_iscan.c +++ b/ompi/mca/coll/libnbc/nbc_iscan.c @@ -1,11 +1,14 @@ +/* -*- Mode: C; c-basic-offset:2 ; indent-tabs-mode:nil -*- */ /* * Copyright (c) 2006 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. * Copyright (c) 2006 The Technical University of Chemnitz. All * rights reserved. - * Copyright (c) 2014 Research Organization for Information Science + * Copyright (c) 2014-2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. + * Copyright (c) 2015 Los Alamos National Security, LLC. All rights + * reserved. * * Author(s): Torsten Hoefler * @@ -15,124 +18,156 @@ #ifdef NBC_CACHE_SCHEDULE /* tree comparison function for schedule cache */ int NBC_Scan_args_compare(NBC_Scan_args *a, NBC_Scan_args *b, void *param) { - - if( (a->sendbuf == b->sendbuf) && + if ((a->sendbuf == b->sendbuf) && (a->recvbuf == b->recvbuf) && - (a->count == b->count) && + (a->count == b->count) && (a->datatype == b->datatype) && (a->op == b->op) ) { - return 0; + return 0; } - if( a->sendbuf < b->sendbuf ) { + + if (a->sendbuf < b->sendbuf) { return -1; - } - return +1; + } + + return 1; } #endif /* linear iscan * working principle: - * 1. each node (but node 0) receives from left neigbor + * 1. each node (but node 0) receives from left neighbor * 2. performs op - * 3. all but rank p-1 do sends to it's right neigbor and exits + * 3. all but rank p-1 do sends to it's right neighbor and exits * */ -int ompi_coll_libnbc_iscan(void* sendbuf, void* recvbuf, int count, MPI_Datatype datatype, MPI_Op op, +int ompi_coll_libnbc_iscan(const void* sendbuf, void* recvbuf, int count, MPI_Datatype datatype, MPI_Op op, struct ompi_communicator_t *comm, ompi_request_t ** request, struct mca_coll_base_module_2_1_0_t *module) { int rank, p, res; - MPI_Aint ext; + ptrdiff_t gap, span; NBC_Schedule *schedule; -#ifdef NBC_CACHE_SCHEDULE - NBC_Scan_args *args, *found, search; -#endif char inplace; NBC_Handle *handle; - ompi_coll_libnbc_request_t **coll_req = (ompi_coll_libnbc_request_t**) request; ompi_coll_libnbc_module_t *libnbc_module = (ompi_coll_libnbc_module_t*) module; - + NBC_IN_PLACE(sendbuf, recvbuf, inplace); - - res = NBC_Init_handle(comm, coll_req, libnbc_module); - if(res != NBC_OK) { printf("Error in NBC_Init_handle(%i)\n", res); return res; } - handle = (*coll_req); - res = MPI_Comm_rank(comm, &rank); - if (MPI_SUCCESS != res) { printf("MPI Error in MPI_Comm_rank() (%i)\n", res); return res; } - res = MPI_Comm_size(comm, &p); - if (MPI_SUCCESS != res) { printf("MPI Error in MPI_Comm_size() (%i)\n", res); return res; } - res = MPI_Type_extent(datatype, &ext); - if (MPI_SUCCESS != res) { printf("MPI Error in MPI_Type_extent() (%i)\n", res); return res; } - - handle->tmpbuf = malloc(ext*count); - if(handle->tmpbuf == NULL) { printf("Error in malloc()\n"); return NBC_OOR; } - - if((rank == 0) && !inplace) { + + rank = ompi_comm_rank (comm); + p = ompi_comm_size (comm); + + if (!inplace) { /* copy data to receivebuf */ - res = NBC_Copy(sendbuf, count, datatype, recvbuf, count, datatype, comm); - if (NBC_OK != res) { printf("Error in NBC_Copy() (%i)\n", res); return res; } + res = NBC_Copy (sendbuf, count, datatype, recvbuf, count, datatype, comm); + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + return res; + } + } + + res = NBC_Init_handle(comm, &handle, libnbc_module); + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + return res; } #ifdef NBC_CACHE_SCHEDULE + NBC_Scan_args *args, *found, search; + /* search schedule in communicator specific tree */ - search.sendbuf=sendbuf; - search.recvbuf=recvbuf; - search.count=count; - search.datatype=datatype; - search.op=op; - found = (NBC_Scan_args*)hb_tree_search((hb_tree*)handle->comminfo->NBC_Dict[NBC_SCAN], &search); - if(found == NULL) { + search.sendbuf = sendbuf; + search.recvbuf = recvbuf; + search.count = count; + search.datatype = datatype; + search.op = op; + found = (NBC_Scan_args *) hb_tree_search ((hb_tree *) libnbc_module->NBC_Dict[NBC_SCAN], &search); + if (NULL == found) { #endif - schedule = (NBC_Schedule*)malloc(sizeof(NBC_Schedule)); - if (NULL == schedule) { printf("Error in malloc()\n"); return res; } + schedule = OBJ_NEW(NBC_Schedule); + if (OPAL_UNLIKELY(NULL == schedule)) { + NBC_Return_handle (handle); + return OMPI_ERR_OUT_OF_RESOURCE; + } - res = NBC_Sched_create(schedule); - if(res != NBC_OK) { printf("Error in NBC_Sched_create (%i)\n", res); return res; } + /* ensure the schedule is released with the handle */ + handle->schedule = schedule; if(rank != 0) { - res = NBC_Sched_recv(0, true, count, datatype, rank-1, schedule); - if (NBC_OK != res) { free(handle->tmpbuf); printf("Error in NBC_Sched_recv() (%i)\n", res); return res; } + span = opal_datatype_span(&datatype->super, count, &gap); + handle->tmpbuf = malloc (span); + if (NULL == handle->tmpbuf) { + NBC_Return_handle (handle); + return OMPI_ERR_OUT_OF_RESOURCE; + } + /* we have to wait until we have the data */ - res = NBC_Sched_barrier(schedule); - if (NBC_OK != res) { free(handle->tmpbuf); printf("Error in NBC_Sched_barrier() (%i)\n", res); return res; } + res = NBC_Sched_recv ((void *)(-gap), true, count, datatype, rank-1, schedule, true); + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + NBC_Return_handle (handle); + return res; + } + /* perform the reduce in my local buffer */ - res = NBC_Sched_op(recvbuf, false, sendbuf, false, 0, true, count, datatype, op, schedule); - if (NBC_OK != res) { free(handle->tmpbuf); printf("Error in NBC_Sched_op() (%i)\n", res); return res; } - /* this cannot be done until handle->tmpbuf is unused :-( */ - res = NBC_Sched_barrier(schedule); - if (NBC_OK != res) { free(handle->tmpbuf); printf("Error in NBC_Sched_barrier() (%i)\n", res); return res; } + /* this cannot be done until handle->tmpbuf is unused :-( so barrier after the op */ + res = NBC_Sched_op ((void *)(-gap), true, recvbuf, false, count, datatype, op, schedule, + true); + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + NBC_Return_handle (handle); + return res; + } } - if(rank != p-1) { - res = NBC_Sched_send(recvbuf, false, count, datatype, rank+1, schedule); - if (NBC_OK != res) { free(handle->tmpbuf); printf("Error in NBC_Sched_send() (%i)\n", res); return res; } + + if (rank != p-1) { + res = NBC_Sched_send (recvbuf, false, count, datatype, rank+1, schedule, false); + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + NBC_Return_handle (handle); + return res; + } } - res = NBC_Sched_commit(schedule); - if (NBC_OK != res) { free(handle->tmpbuf); printf("Error in NBC_Sched_commit() (%i)\n", res); return res; } + res = NBC_Sched_commit (schedule); + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + NBC_Return_handle (handle); + return res; + } #ifdef NBC_CACHE_SCHEDULE /* save schedule to tree */ - args = (NBC_Scan_args*)malloc(sizeof(NBC_Alltoall_args)); - args->sendbuf=sendbuf; - args->recvbuf=recvbuf; - args->count=count; - args->datatype=datatype; - args->op=op; - args->schedule=schedule; - res = hb_tree_insert ((hb_tree*)handle->comminfo->NBC_Dict[NBC_SCAN], args, args, 0); - if(res != 0) printf("error in dict_insert() (%i)\n", res); - /* increase number of elements for A2A */ - if(++handle->comminfo->NBC_Dict_size[NBC_SCAN] > NBC_SCHED_DICT_UPPER) { - NBC_SchedCache_dictwipe((hb_tree*)handle->comminfo->NBC_Dict[NBC_SCAN], &handle->comminfo->NBC_Dict_size[NBC_SCAN]); + args = (NBC_Scan_args *) malloc (sizeof (args)); + if (NULL != args) { + args->sendbuf = sendbuf; + args->recvbuf = recvbuf; + args->count = count; + args->datatype = datatype; + args->op = op; + args->schedule = schedule; + res = hb_tree_insert ((hb_tree *) libnbc_module->NBC_Dict[NBC_SCAN], args, args, 0); + if (0 == res) { + OBJ_RETAIN(schedule); + + /* increase number of elements for A2A */ + if (++libnbc_module->NBC_Dict_size[NBC_SCAN] > NBC_SCHED_DICT_UPPER) { + NBC_SchedCache_dictwipe ((hb_tree *) libnbc_module->NBC_Dict[NBC_SCAN], + &libnbc_module->NBC_Dict_size[NBC_SCAN]); + } + } else { + NBC_Error("error in dict_insert() (%i)", res); + free (args); + } } } else { /* found schedule */ - schedule=found->schedule; + schedule = found->schedule; + OBJ_RETAIN(schedule); } #endif - + res = NBC_Start(handle, schedule); - if (NBC_OK != res) { free(handle->tmpbuf); printf("Error in NBC_Start() (%i)\n", res); return res; } - + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + NBC_Return_handle (handle); + return res; + } + + *request = (ompi_request_t *) handle; + /* tmpbuf is freed with the handle */ - return NBC_OK; + return OMPI_SUCCESS; } diff --git a/ompi/mca/coll/libnbc/nbc_iscatter.c b/ompi/mca/coll/libnbc/nbc_iscatter.c index c72dc8684fe..20481ad057a 100644 --- a/ompi/mca/coll/libnbc/nbc_iscatter.c +++ b/ompi/mca/coll/libnbc/nbc_iscatter.c @@ -1,15 +1,16 @@ +/* -*- Mode: C; c-basic-offset:2 ; indent-tabs-mode:nil -*- */ /* * Copyright (c) 2006 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. * Copyright (c) 2006 The Technical University of Chemnitz. All * rights reserved. - * Copyright (c) 2013 Los Alamos National Security, LLC. All rights + * Copyright (c) 2013-2015 Los Alamos National Security, LLC. All rights * reserved. * Copyright (c) 2013 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2014 Research Organization for Information Science + * Copyright (c) 2014-2016 Research Organization for Information Science * and Technology (RIST). All rights reserved. * * Author(s): Torsten Hoefler @@ -20,62 +21,63 @@ #ifdef NBC_CACHE_SCHEDULE /* tree comparison function for schedule cache */ int NBC_Scatter_args_compare(NBC_Scatter_args *a, NBC_Scatter_args *b, void *param) { - if( (a->sendbuf == b->sendbuf) && + if ((a->sendbuf == b->sendbuf) && (a->sendcount == b->sendcount) && (a->sendtype == b->sendtype) && (a->recvbuf == b->recvbuf) && (a->recvcount == b->recvcount) && (a->recvtype == b->recvtype) && - (a->root == b->root) ) { - return 0; + (a->root == b->root)) { + return 0; } - if( a->sendbuf < b->sendbuf ) { + + if (a->sendbuf < b->sendbuf) { return -1; } - return +1; + + return 1; } #endif /* simple linear MPI_Iscatter */ -int ompi_coll_libnbc_iscatter(void* sendbuf, int sendcount, MPI_Datatype sendtype, +int ompi_coll_libnbc_iscatter(const void* sendbuf, int sendcount, MPI_Datatype sendtype, void* recvbuf, int recvcount, MPI_Datatype recvtype, int root, struct ompi_communicator_t *comm, ompi_request_t ** request, struct mca_coll_base_module_2_1_0_t *module) { - int rank, p, res, i; + int rank, p, res; MPI_Aint sndext = 0; NBC_Schedule *schedule; - char *sbuf, inplace; -#ifdef NBC_CACHE_SCHEDULE - NBC_Scatter_args *args, *found, search; -#endif + char *sbuf, inplace = 0; NBC_Handle *handle; - ompi_coll_libnbc_request_t **coll_req = (ompi_coll_libnbc_request_t**) request; ompi_coll_libnbc_module_t *libnbc_module = (ompi_coll_libnbc_module_t*) module; - NBC_IN_PLACE(sendbuf, recvbuf, inplace); - res = NBC_Init_handle(comm, coll_req, libnbc_module); - if(res != NBC_OK) { printf("Error in NBC_Init_handle(%i)\n", res); return res; } - handle = (*coll_req); - res = MPI_Comm_rank(comm, &rank); - if (MPI_SUCCESS != res) { printf("MPI Error in MPI_Comm_rank() (%i)\n", res); return res; } - res = MPI_Comm_size(comm, &p); - if (MPI_SUCCESS != res) { printf("MPI Error in MPI_Comm_size() (%i)\n", res); return res; } - if (rank == root) { - res = MPI_Type_extent(sendtype, &sndext); - if (MPI_SUCCESS != res) { printf("MPI Error in MPI_Type_extent() (%i)\n", res); return res; } + rank = ompi_comm_rank (comm); + if (root == rank) { + NBC_IN_PLACE(sendbuf, recvbuf, inplace); } + p = ompi_comm_size (comm); - handle->tmpbuf=NULL; + if (rank == root) { + res = ompi_datatype_type_extent (sendtype, &sndext); + if (MPI_SUCCESS != res) { + NBC_Error("MPI Error in ompi_datatype_type_extent() (%i)", res); + return res; + } + } - if((rank == root) && (!inplace)) { - sbuf = ((char *)sendbuf) + (rank*sendcount*sndext); + if ((rank == root) && (!inplace)) { + sbuf = (char *) sendbuf + rank * sendcount * sndext; /* if I am the root - just copy the message (not for MPI_IN_PLACE) */ - res = NBC_Copy(sbuf, sendcount, sendtype, recvbuf, recvcount, recvtype, comm); - if (NBC_OK != res) { printf("Error in NBC_Copy() (%i)\n", res); return res; } + res = NBC_Copy (sbuf, sendcount, sendtype, recvbuf, recvcount, recvtype, comm); + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + return res; + } } #ifdef NBC_CACHE_SCHEDULE + NBC_Scatter_args *args, *found, search; + /* search schedule in communicator specific tree */ search.sendbuf=sendbuf; search.sendcount=sendcount; @@ -84,114 +86,156 @@ int ompi_coll_libnbc_iscatter(void* sendbuf, int sendcount, MPI_Datatype sendtyp search.recvcount=recvcount; search.recvtype=recvtype; search.root=root; - found = (NBC_Scatter_args*)hb_tree_search((hb_tree*)handle->comminfo->NBC_Dict[NBC_SCATTER], &search); - if(found == NULL) { + found = (NBC_Scatter_args *) hb_tree_search ((hb_tree *) libnbc_module->NBC_Dict[NBC_SCATTER], &search); + if (NULL == found) { #endif - schedule = (NBC_Schedule*)malloc(sizeof(NBC_Schedule)); - if (NULL == schedule) { printf("Error in malloc()\n"); return res; } - - res = NBC_Sched_create(schedule); - if(res != NBC_OK) { printf("Error in NBC_Sched_create (%i)\n", res); return res; } + schedule = OBJ_NEW(NBC_Schedule); + if (OPAL_UNLIKELY(NULL == schedule)) { + return OMPI_ERR_OUT_OF_RESOURCE; + } /* receive from root */ - if(rank != root) { + if (rank != root) { /* recv msg from root */ - res = NBC_Sched_recv(recvbuf, false, recvcount, recvtype, root, schedule); - if (NBC_OK != res) { printf("Error in NBC_Sched_recv() (%i)\n", res); return res; } + res = NBC_Sched_recv (recvbuf, false, recvcount, recvtype, root, schedule, false); + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + OBJ_RELEASE(schedule); + return res; + } } else { - for(i=0;isendbuf=sendbuf; - args->sendcount=sendcount; - args->sendtype=sendtype; - args->recvbuf=recvbuf; - args->recvcount=recvcount; - args->recvtype=recvtype; - args->root=root; - args->schedule=schedule; - res = hb_tree_insert ((hb_tree*)handle->comminfo->NBC_Dict[NBC_SCATTER], args, args, 0); - if(res != 0) printf("error in dict_insert() (%i)\n", res); - /* increase number of elements for A2A */ - if(++handle->comminfo->NBC_Dict_size[NBC_SCATTER] > NBC_SCHED_DICT_UPPER) { - NBC_SchedCache_dictwipe((hb_tree*)handle->comminfo->NBC_Dict[NBC_SCATTER], &handle->comminfo->NBC_Dict_size[NBC_SCATTER]); + args = (NBC_Scatter_args *) malloc (sizeof (args)); + if (NULL != args) { + args->sendbuf = sendbuf; + args->sendcount = sendcount; + args->sendtype = sendtype; + args->recvbuf = recvbuf; + args->recvcount = recvcount; + args->recvtype = recvtype; + args->root = root; + args->schedule = schedule; + res = hb_tree_insert ((hb_tree *) libnbc_module->NBC_Dict[NBC_SCATTER], args, args, 0); + if (0 == res) { + OBJ_RETAIN(schedule); + + /* increase number of elements for A2A */ + if (++libnbc_module->NBC_Dict_size[NBC_SCATTER] > NBC_SCHED_DICT_UPPER) { + NBC_SchedCache_dictwipe ((hb_tree *) libnbc_module->NBC_Dict[NBC_SCATTER], + &libnbc_module->NBC_Dict_size[NBC_SCATTER]); + } + } else { + NBC_Error("error in dict_insert() (%i)", res); + free (args); + } } } else { /* found schedule */ - schedule=found->schedule; + schedule = found->schedule; + OBJ_RETAIN(schedule); } #endif + res = NBC_Init_handle(comm, &handle, libnbc_module); + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + OBJ_RELEASE(schedule); + return res; + } res = NBC_Start(handle, schedule); - if (NBC_OK != res) { printf("Error in NBC_Start() (%i)\n", res); return res; } + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + NBC_Return_handle (handle); + return res; + } - return NBC_OK; + *request = (ompi_request_t *) handle; + + return OMPI_SUCCESS; } -int ompi_coll_libnbc_iscatter_inter(void* sendbuf, int sendcount, MPI_Datatype sendtype, +int ompi_coll_libnbc_iscatter_inter(const void* sendbuf, int sendcount, MPI_Datatype sendtype, void* recvbuf, int recvcount, MPI_Datatype recvtype, int root, struct ompi_communicator_t *comm, ompi_request_t ** request, struct mca_coll_base_module_2_1_0_t *module) { - int rank, res, i, rsize; + int res, rsize; MPI_Aint sndext; NBC_Schedule *schedule; char *sbuf; NBC_Handle *handle; - ompi_coll_libnbc_request_t **coll_req = (ompi_coll_libnbc_request_t**) request; ompi_coll_libnbc_module_t *libnbc_module = (ompi_coll_libnbc_module_t*) module; - res = NBC_Init_handle(comm, coll_req, libnbc_module); - if(res != NBC_OK) { printf("Error in NBC_Init_handle(%i)\n", res); return res; } - handle = (*coll_req); - res = MPI_Comm_rank(comm, &rank); - if (MPI_SUCCESS != res) { printf("MPI Error in MPI_Comm_rank() (%i)\n", res); return res; } + rsize = ompi_comm_remote_size (comm); + if (MPI_ROOT == root) { - res = MPI_Type_extent(sendtype, &sndext); - if (MPI_SUCCESS != res) { printf("MPI Error in MPI_Type_extent() (%i)\n", res); return res; } + res = ompi_datatype_type_extent(sendtype, &sndext); + if (MPI_SUCCESS != res) { + NBC_Error("MPI Error in ompi_datatype_type_extent() (%i)", res); + return res; + } } - res = MPI_Comm_remote_size (comm, &rsize); - if (MPI_SUCCESS != res) { printf("MPI Error in MPI_Comm_remote_size() (%i)\n", res); return res; } - - handle->tmpbuf = NULL; - schedule = (NBC_Schedule*)malloc(sizeof(NBC_Schedule)); - if (NULL == schedule) { printf("Error in malloc()\n"); return res; } - - res = NBC_Sched_create(schedule); - if(res != NBC_OK) { printf("Error in NBC_Sched_create (%i)\n", res); return res; } + schedule = OBJ_NEW(NBC_Schedule); + if (OPAL_UNLIKELY(NULL == schedule)) { + return OMPI_ERR_OUT_OF_RESOURCE; + } /* receive from root */ if (MPI_ROOT != root && MPI_PROC_NULL != root) { /* recv msg from remote root */ - res = NBC_Sched_recv(recvbuf, false, recvcount, recvtype, root, schedule); - if (NBC_OK != res) { printf("Error in NBC_Sched_recv() (%i)\n", res); return res; } + res = NBC_Sched_recv(recvbuf, false, recvcount, recvtype, root, schedule, false); + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + OBJ_RELEASE(schedule); + return res; + } } else if (MPI_ROOT == root) { - for (i = 0 ; i < rsize ; ++i) { + for (int i = 0 ; i < rsize ; ++i) { sbuf = ((char *)sendbuf) + (i * sendcount * sndext); /* root sends the right buffer to the right receiver */ - res = NBC_Sched_send(sbuf, false, sendcount, sendtype, i, schedule); - if (NBC_OK != res) { printf("Error in NBC_Sched_send() (%i)\n", res); return res; } + res = NBC_Sched_send(sbuf, false, sendcount, sendtype, i, schedule, false); + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + OBJ_RELEASE(schedule); + return res; + } } } res = NBC_Sched_commit(schedule); - if (NBC_OK != res) { printf("Error in NBC_Sched_commit() (%i)\n", res); return res; } + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + OBJ_RELEASE(schedule); + return res; + } + + res = NBC_Init_handle(comm, &handle, libnbc_module); + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + OBJ_RELEASE(schedule); + return res; + } res = NBC_Start(handle, schedule); - if (NBC_OK != res) { printf("Error in NBC_Start() (%i)\n", res); return res; } + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + NBC_Return_handle (handle); + return res; + } + + *request = (ompi_request_t *) handle; - return NBC_OK; + return OMPI_SUCCESS; } diff --git a/ompi/mca/coll/libnbc/nbc_iscatterv.c b/ompi/mca/coll/libnbc/nbc_iscatterv.c index d5791b9aff6..14ad8a5336c 100644 --- a/ompi/mca/coll/libnbc/nbc_iscatterv.c +++ b/ompi/mca/coll/libnbc/nbc_iscatterv.c @@ -1,15 +1,16 @@ +/* -*- Mode: C; c-basic-offset:2 ; indent-tabs-mode:nil -*- */ /* * Copyright (c) 2006 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. * Copyright (c) 2006 The Technical University of Chemnitz. All * rights reserved. - * Copyright (c) 2013 Los Alamos National Security, LLC. All rights + * Copyright (c) 2013-2015 Los Alamos National Security, LLC. All rights * reserved. * Copyright (c) 2013 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2014 Research Organization for Information Science + * Copyright (c) 2014-2016 Research Organization for Information Science * and Technology (RIST). All rights reserved. * * Author(s): Torsten Hoefler @@ -22,121 +23,153 @@ * would not be sufficient ... we simply do not cache it */ /* simple linear MPI_Iscatterv */ -int ompi_coll_libnbc_iscatterv(void* sendbuf, int *sendcounts, int *displs, MPI_Datatype sendtype, +int ompi_coll_libnbc_iscatterv(const void* sendbuf, const int *sendcounts, const int *displs, MPI_Datatype sendtype, void* recvbuf, int recvcount, MPI_Datatype recvtype, int root, struct ompi_communicator_t *comm, ompi_request_t ** request, struct mca_coll_base_module_2_1_0_t *module) { - int rank, p, res, i; + int rank, p, res; MPI_Aint sndext; NBC_Schedule *schedule; - char *sbuf, inplace; + char *sbuf, inplace = 0; NBC_Handle *handle; - ompi_coll_libnbc_request_t **coll_req = (ompi_coll_libnbc_request_t**) request; ompi_coll_libnbc_module_t *libnbc_module = (ompi_coll_libnbc_module_t*) module; - NBC_IN_PLACE(sendbuf, recvbuf, inplace); - - res = NBC_Init_handle(comm, coll_req, libnbc_module); - if(res != NBC_OK) { printf("Error in NBC_Init_handle(%i)\n", res); return res; } - handle = (*coll_req); - res = MPI_Comm_rank(comm, &rank); - if (MPI_SUCCESS != res) { printf("MPI Error in MPI_Comm_rank() (%i)\n", res); return res; } - res = MPI_Comm_size(comm, &p); - if (MPI_SUCCESS != res) { printf("MPI Error in MPI_Comm_size() (%i)\n", res); return res; } - if (rank == root) { - res = MPI_Type_extent(sendtype, &sndext); - if (MPI_SUCCESS != res) { printf("MPI Error in MPI_Type_extent() (%i)\n", res); return res; } + rank = ompi_comm_rank (comm); + if (root == rank) { + NBC_IN_PLACE(sendbuf, recvbuf, inplace); } - schedule = (NBC_Schedule*)malloc(sizeof(NBC_Schedule)); - if (NULL == schedule) { printf("Error in malloc()\n"); return res; } - handle->tmpbuf=NULL; + p = ompi_comm_size (comm); - res = NBC_Sched_create(schedule); - if(res != NBC_OK) { printf("Error in NBC_Sched_create (%i)\n", res); return res; } + schedule = OBJ_NEW(NBC_Schedule); + if (OPAL_UNLIKELY(NULL == schedule)) { + return OMPI_ERR_OUT_OF_RESOURCE; + } /* receive from root */ - if(rank != root) { - /* recv msg from root */ - res = NBC_Sched_recv(recvbuf, false, recvcount, recvtype, root, schedule); - if (NBC_OK != res) { printf("Error in NBC_Sched_recv() (%i)\n", res); return res; } - } else { - for(i=0;itmpbuf = NULL; - - schedule = (NBC_Schedule*)malloc(sizeof(NBC_Schedule)); - if (NULL == schedule) { printf("Error in malloc()\n"); return res; } + rsize = ompi_comm_remote_size (comm); - res = NBC_Sched_create(schedule); - if(res != NBC_OK) { printf("Error in NBC_Sched_create (%i)\n", res); return res; } + schedule = OBJ_NEW(NBC_Schedule); + if (OPAL_UNLIKELY(NULL == schedule)) { + return OMPI_ERR_OUT_OF_RESOURCE; + } /* receive from root */ if (MPI_ROOT != root && MPI_PROC_NULL != root) { /* recv msg from root */ - res = NBC_Sched_recv(recvbuf, false, recvcount, recvtype, root, schedule); - if (NBC_OK != res) { printf("Error in NBC_Sched_recv() (%i)\n", res); return res; } + res = NBC_Sched_recv(recvbuf, false, recvcount, recvtype, root, schedule, false); + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + OBJ_RELEASE(schedule); + return res; + } } else if (MPI_ROOT == root) { - for (i = 0 ; i < rsize ; ++i) { - sbuf = ((char *)sendbuf) + (displs[i] * sndext); + res = ompi_datatype_type_extent(sendtype, &sndext); + if (MPI_SUCCESS != res) { + NBC_Error("MPI Error in ompi_datatype_type_extent() (%i)", res); + OBJ_RELEASE(schedule); + return res; + } + + for (int i = 0 ; i < rsize ; ++i) { + sbuf = (char *)sendbuf + displs[i] * sndext; /* root sends the right buffer to the right receiver */ - res = NBC_Sched_send(sbuf, false, sendcounts[i], sendtype, i, schedule); - if (NBC_OK != res) { printf("Error in NBC_Sched_send() (%i)\n", res); return res; } + res = NBC_Sched_send (sbuf, false, sendcounts[i], sendtype, i, schedule, false); + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + OBJ_RELEASE(schedule); + return res; + } } } res = NBC_Sched_commit(schedule); - if (NBC_OK != res) { printf("Error in NBC_Sched_commit() (%i)\n", res); return res; } + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + OBJ_RELEASE(schedule); + return res; + } + + res = NBC_Init_handle(comm, &handle, libnbc_module); + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + OBJ_RELEASE(schedule); + return res; + } res = NBC_Start(handle, schedule); - if (NBC_OK != res) { printf("Error in NBC_Start() (%i)\n", res); return res; } + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + NBC_Return_handle (handle); + return res; + } + + *request = (ompi_request_t *) handle; - return NBC_OK; + return OMPI_SUCCESS; } diff --git a/ompi/mca/coll/libnbc/nbc_neighbor_helpers.c b/ompi/mca/coll/libnbc/nbc_neighbor_helpers.c index edff3fcf860..924e852d58a 100644 --- a/ompi/mca/coll/libnbc/nbc_neighbor_helpers.c +++ b/ompi/mca/coll/libnbc/nbc_neighbor_helpers.c @@ -1,106 +1,103 @@ +/* -*- Mode: C; c-basic-offset:2 ; indent-tabs-mode:nil -*- */ /* * Copyright (c) 2006 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. * Copyright (c) 2006 The Technical University of Chemnitz. All * rights reserved. + * Copyright (c) 2015 Los Alamos National Security, LLC. All rights + * reserved. * * Author(s): Torsten Hoefler * */ + #include "nbc_internal.h" +#include "ompi/mca/topo/base/base.h" + +int NBC_Comm_neighbors_count (ompi_communicator_t *comm, int *indegree, int *outdegree) { + if (OMPI_COMM_IS_CART(comm)) { + /* cartesian */ + /* outdegree is always 2*ndims because we need to iterate over empty buffers for MPI_PROC_NULL */ + *outdegree = *indegree = 2 * comm->c_topo->mtc.cart->ndims; + } else if (OMPI_COMM_IS_GRAPH(comm)) { + /* graph */ + int rank, nneighbors; + + rank = ompi_comm_rank (comm); + mca_topo_base_graph_neighbors_count (comm, rank, &nneighbors); -int NBC_Comm_neighbors_count(MPI_Comm comm, int *indegree, int *outdegree, int *weighted) { - int topo, res; - - res = MPI_Topo_test(comm, &topo); - if (MPI_SUCCESS != res) { printf("MPI Error in MPI_Topo_test() (%i)\n", res); return res; } - - switch(topo) { - case MPI_CART: /* cartesian */ - { - int ndims; - res = MPI_Cartdim_get(comm, &ndims) ; - if (MPI_SUCCESS != res) { printf("MPI Error in MPI_Cartdim_get() (%i)\n", res); return res; } - /* outdegree is always 2*ndims because we need to iterate over empty buffers for MPI_PROC_NULL */ - *outdegree = *indegree = 2*ndims; - *weighted = 0; - } - break; - case MPI_GRAPH: /* graph */ - { - int rank, nneighbors; - MPI_Comm_rank(comm, &rank); - res = MPI_Graph_neighbors_count(comm, rank, &nneighbors); - if (MPI_SUCCESS != res) { printf("MPI Error in MPI_Graph_neighbors_count() (%i)\n", res); return res; } - *outdegree = *indegree = nneighbors; - *weighted = 0; - } - break; - case MPI_DIST_GRAPH: /* graph */ - { - res = MPI_Dist_graph_neighbors_count(comm, indegree, outdegree, weighted); - if (MPI_SUCCESS != res) { printf("MPI Error in MPI_Dist_graph_neighbors_count() (%i)\n", res); return res; } - } - break; - case MPI_UNDEFINED: - return NBC_INVALID_TOPOLOGY_COMM; - break; - default: - return NBC_INVALID_PARAM; - break; + *outdegree = *indegree = nneighbors; + } else if (OMPI_COMM_IS_DIST_GRAPH(comm)) { + /* graph */ + *indegree = comm->c_topo->mtc.dist_graph->indegree; + *outdegree = comm->c_topo->mtc.dist_graph->outdegree; + } else { + return OMPI_ERR_BAD_PARAM; } - return NBC_OK; + + return OMPI_SUCCESS; } -int NBC_Comm_neighbors(MPI_Comm comm, int maxindegree, int sources[], int sourceweights[], int maxoutdegree, int destinations[], int destweights[]) { - int topo, res; - int index = 0; - - int indeg, outdeg, wgtd; - res = NBC_Comm_neighbors_count(comm, &indeg, &outdeg, &wgtd); - if(indeg > maxindegree && outdeg > maxoutdegree) return NBC_INVALID_PARAM; /* we want to return *all* neighbors */ - - res = MPI_Topo_test(comm, &topo); - if (MPI_SUCCESS != res) { printf("MPI Error in MPI_Topo_test() (%i)\n", res); return res; } - - switch(topo) { - case MPI_CART: /* cartesian */ - { - int ndims, i, rpeer, speer; - res = MPI_Cartdim_get(comm, &ndims); - if (MPI_SUCCESS != res) { printf("MPI Error in MPI_Cartdim_get() (%i)\n", res); return res; } - - for(i = 0; ic_topo->mtc.cart->ndims ; ++dim) { + mca_topo_base_cart_shift (comm, dim, 1, &rpeer, &speer); + sources[0][i] = destinations[0][i] = rpeer; i++; + sources[0][i] = destinations[0][i] = speer; i++; + } + } else if (OMPI_COMM_IS_GRAPH(comm)) { + /* graph */ + mca_topo_base_graph_neighbors (comm, ompi_comm_rank (comm), indeg, sources[0]); + memcpy (destinations[0], sources[0], indeg * sizeof (int)); + } else if (OMPI_COMM_IS_DIST_GRAPH(comm)) { + /* dist graph */ + mca_topo_base_dist_graph_neighbors (comm, indeg, sources[0], MPI_UNWEIGHTED, outdeg, destinations[0], + MPI_UNWEIGHTED); } - return NBC_OK; + return OMPI_SUCCESS; } diff --git a/ompi/mca/coll/ml/Makefile.am b/ompi/mca/coll/ml/Makefile.am deleted file mode 100644 index 0cf8a840df1..00000000000 --- a/ompi/mca/coll/ml/Makefile.am +++ /dev/null @@ -1,84 +0,0 @@ -# -# Copyright (c) 2009-2012 Oak Ridge National Laboratory. All rights reserved. -# Copyright (c) 2009-2012 Mellanox Technologies. All rights reserved. -# Copyright (c) 2013-2014 Los Alamos National Security, LLC. All rights -# reserved. -# Copyright (c) 2015 Cisco Systems, Inc. All rights reserved. -# $COPYRIGHT$ -# -# Additional copyrights may follow -# -# $HEADER$ -# -AM_LFLAGS = -Pcoll_ml_config_yy -LEX_OUTPUT_ROOT = lex.coll_ml_config_yy - -dist_ompidata_DATA = \ - mca-coll-ml.config \ - help-mpi-coll-ml.txt - -sources = coll_ml.h \ - coll_ml_inlines.h \ - coll_ml_module.c \ - coll_ml_allocation.h \ - coll_ml_allocation.c \ - coll_ml_barrier.c \ - coll_ml_bcast.c \ - coll_ml_colls.h \ - coll_ml_component.c \ - coll_ml_copy_fns.c \ - coll_ml_descriptors.c \ - coll_ml_functions.h \ - coll_ml_hier_algorithms.c \ - coll_ml_hier_algorithms_setup.c \ - coll_ml_hier_algorithms_bcast_setup.c \ - coll_ml_hier_algorithms_allreduce_setup.c \ - coll_ml_hier_algorithms_reduce_setup.c \ - coll_ml_hier_algorithms_common_setup.c \ - coll_ml_hier_algorithms_common_setup.h \ - coll_ml_hier_algorithms_allgather_setup.c \ - coll_ml_hier_algorithm_memsync_setup.c \ - coll_ml_custom_utils.h \ - coll_ml_custom_utils.c \ - coll_ml_progress.c \ - coll_ml_reduce.c \ - coll_ml_allreduce.c \ - coll_ml_allgather.c \ - coll_ml_mca.h \ - coll_ml_mca.c \ - coll_ml_lmngr.h \ - coll_ml_lmngr.c \ - coll_ml_hier_algorithms_barrier_setup.c \ - coll_ml_select.h \ - coll_ml_select.c \ - coll_ml_memsync.c \ - coll_ml_lex.h \ - coll_ml_lex.l \ - coll_ml_config.c \ - coll_ml_config.h - -# Make the output library in this directory, and name it either -# mca__.la (for DSO builds) or libmca__.la -# (for static builds). - -component_noinst = -component_install = -if MCA_BUILD_ompi_coll_ml_DSO -component_install += mca_coll_ml.la -else -component_noinst += libmca_coll_ml.la -endif - -# See ompi/mca/btl/ml/Makefile.am for an explanation of -# libmca_common_ml.la. - -mcacomponentdir = $(ompilibdir) -mcacomponent_LTLIBRARIES = $(component_install) -mca_coll_ml_la_SOURCES = $(sources) -mca_coll_ml_la_LDFLAGS = -module -avoid-version -mca_coll_ml_la_LIBADD = - - -noinst_LTLIBRARIES = $(component_noinst) -libmca_coll_ml_la_SOURCES =$(sources) -libmca_coll_ml_la_LDFLAGS = -module -avoid-version diff --git a/ompi/mca/coll/ml/coll_ml.h b/ompi/mca/coll/ml/coll_ml.h deleted file mode 100644 index e6b6f092f84..00000000000 --- a/ompi/mca/coll/ml/coll_ml.h +++ /dev/null @@ -1,1022 +0,0 @@ -/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ -/* - * Copyright (c) 2009-2012 Oak Ridge National Laboratory. All rights reserved. - * Copyright (c) 2009-2012 Mellanox Technologies. All rights reserved. - * Copyright (c) 2013-2015 Los Alamos National Security, LLC. All rights - * reserved. - * Copyright (c) 2014 Research Organization for Information Science - * and Technology (RIST). All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ -/** @file */ - -#ifndef MCA_COLL_ML_ML_H -#define MCA_COLL_ML_ML_H - -#include "ompi_config.h" - -#include "ompi/mca/mca.h" -#include "opal/mca/base/base.h" -#include "opal/datatype/opal_convertor.h" -#include "opal/threads/mutex.h" - -#include "ompi/mca/coll/coll.h" -#include "ompi/request/request.h" -#include "ompi/mca/bcol/bcol.h" -#include "ompi/mca/sbgp/sbgp.h" -#include "ompi/op/op.h" -#include "opal/class/opal_free_list.h" - -#include "coll_ml_lmngr.h" -#include "coll_ml_functions.h" -#include "coll_ml_colls.h" -#include "coll_ml_allocation.h" -#include "coll_ml_config.h" - -BEGIN_C_DECLS - -/* macros for return status */ -enum { - ML_OMPI_COMPLETE = 1, - ML_OMPI_INCOMPLETE -}; - -enum { - ML_SMALL_MSG, - ML_LARGE_MSG, - ML_NUM_MSG -}; - -/* ML collectives IDs */ -enum { - /* blocking functions */ - ML_ALLGATHER, - ML_ALLGATHERV, - ML_ALLREDUCE, - ML_ALLTOALL, - ML_ALLTOALLV, - ML_ALLTOALLW, - ML_BARRIER, - ML_BCAST, - ML_EXSCAN, - ML_GATHER, - ML_GATHERV, - ML_REDUCE, - ML_REDUCE_SCATTER, - ML_SCAN, - ML_SCATTER, - ML_SCATTERV, - ML_FANIN, - ML_FANOUT, - - /* nonblocking functions */ - ML_IALLGATHER, - ML_IALLGATHERV, - ML_IALLREDUCE, - ML_IALLTOALL, - ML_IALLTOALLV, - ML_IALLTOALLW, - ML_IBARRIER, - ML_IBCAST, - ML_IEXSCAN, - ML_IGATHER, - ML_IGATHERV, - ML_IREDUCE, - ML_IREDUCE_SCATTER, - ML_ISCAN, - ML_ISCATTER, - ML_ISCATTERV, - ML_IFANIN, - ML_IFANOUT, - ML_NUM_OF_FUNCTIONS -}; - -/* ML broadcast algorithms */ -enum { - COLL_ML_STATIC_BCAST, - COLL_ML_SEQ_BCAST, - COLL_ML_UNKNOWN_BCAST, -}; - -struct mca_bcol_base_module_t; - -/* collective function arguments - gives - * one function signature for calling all collective setup - * routines, with the initial call to a collective function having - * the context to access the right parts of the data structure. - * this information is used by each of the setup functions to - * setup the correct information for each of the functions in the - * hierarchy that will be called. */ - -/* RLG NOTE: Need to figure out what arguments to store here, - * and which ones directly in the message descriptor - */ -struct mpi_coll_fn_params_t { - union { - struct { - ompi_communicator_t *comm; - int n_fanin_steps; - int n_fanout_steps; - int n_recursive_doubling_steps; - } ibarrier_recursive_doubling; - - struct { - int root; - ompi_communicator_t *comm; - struct ompi_datatype_t *datatype; - } ibcast; - } coll_fn; -}; -typedef struct mpi_coll_fn_params_t mpi_coll_fn_params_t; - -/* algorithm parmeters needed for the setup function */ -struct mpi_coll_algorithm_params_t { - union { - struct { - int n_fanin_steps; - int n_fanout_steps; - int n_recursive_doubling_steps; - } ibarrier_recursive_doubling; - - struct { - int place_holder; - } ibcast; - } coll_fn; -}; -typedef struct mpi_coll_algorithm_params_t mpi_coll_algorithm_params_t; - -/* setup function - used to setup each segment (or fragment) - * to be processed - */ -struct mca_coll_ml_module_t; -struct mca_coll_ml_topology_t; - -typedef int (*coll_fragment_comm_setup_fn)(struct mca_coll_ml_module_t *ml_module, - mpi_coll_fn_params_t *fn_params, mpi_coll_algorithm_params_t *algorithm_params); -/* full collective description */ -struct coll_ml_collective_description_t { - /* number of temp buffers */ - int n_buffers; - - /* description size */ - int n_functions; - - /* collective setup function - called for every non-blocking - * function, and for each fragment of such a message - */ - coll_fragment_comm_setup_fn *coll_fn_setup_fn; - - /* algorithm parameters */ - mpi_coll_algorithm_params_t alg_params; - - /* list of functions */ - mca_bcol_base_function_t *functions; - - /* function names - for debugging */ - char **function_names; - - /* Signalling collective completion */ - bool completion_flag; -}; - -typedef struct coll_ml_collective_description_t coll_ml_collective_description_t; - -/* Utility data structure */ -struct rank_properties_t { - int rank; - int leaf; - int num_of_ranks_represented; -}; typedef struct rank_properties_t rank_properties_t; - -/* data structure for holding node information for the nodes of the - * hierarchical communications tree. - */ -struct sub_group_params_t { - /* rank of root in the communicator */ - int root_rank_in_comm; - - /* index in subgroup */ - int root_index; - - /* number of ranks in subgroup */ - int n_ranks; - - /* index of the first element in the subgroup. The - * assumption is that - * ranks for all subgroups are stored in a single - * linear array - */ - int index_of_first_element; - - /* - * level in the hierarchy - subgroups at the same - * level don't overlap. May not be the same as the - * sbgp level. - */ - int level_in_hierarchy; - - /* - * Information on the ranks in the subgroup. This includes - * the rank, and wether or not the rank is a source/sink of - * of data in this subgroup, or just a "pass through". - */ - rank_properties_t *rank_data; - - /* level one index - for example, - for( i = 0; i < level_one_index; i++) will loop - through all level one subgroups, this is significant - since level one is a disjoint partitioning of all ranks - i.e. all ranks appear once and only once at level one - */ - int level_one_index; -}; -typedef struct sub_group_params_t sub_group_params_t; - -/* function to setup information on the order of a given bcol within - * a specific ML-level algorithm. - */ -int mca_coll_ml_setup_scratch_vals(mca_coll_ml_compound_functions_t *func_list, - int *scratch_indx, int *scratch_num, int n_hiers); - -/* driver for setting up collective communication description */ - -int ml_coll_schedule_setup(struct mca_coll_ml_module_t *ml_module); - -int ml_coll_up_and_down_hier_setup( - struct mca_coll_ml_module_t *ml_module, - struct mca_coll_ml_topology_t *topo_info, - int up_function_idx, - int top_function_idx, - int down_function_idx, - int collective); - -int ml_coll_barrier_constant_group_data_setup( - struct mca_coll_ml_topology_t *topo_info, - mca_coll_ml_collective_operation_description_t *schedule); - -/* Barrier */ -int ml_coll_hier_barrier_setup(struct mca_coll_ml_module_t *ml_module); - -/* allreduce */ -int ml_coll_hier_allreduce_setup(struct mca_coll_ml_module_t *ml_module); -int ml_coll_hier_allreduce_setup_new(struct mca_coll_ml_module_t *ml_module); -void ml_coll_hier_allreduce_cleanup_new(struct mca_coll_ml_module_t *ml_module); - -/* alltoall */ -int ml_coll_hier_alltoall_setup(struct mca_coll_ml_module_t *ml_module); -int ml_coll_hier_alltoall_setup_new(struct mca_coll_ml_module_t *ml_module); - -/* allgather */ -int ml_coll_hier_allgather_setup(struct mca_coll_ml_module_t *ml_module); -void ml_coll_hier_allgather_cleanup(struct mca_coll_ml_module_t *ml_module); - -/* gather */ -int ml_coll_hier_gather_setup(struct mca_coll_ml_module_t *ml_module); - -/* broadcast */ -int ml_coll_hier_bcast_setup(struct mca_coll_ml_module_t *ml_module); -void ml_coll_hier_bcast_cleanup(struct mca_coll_ml_module_t *ml_module); - -/* reduce */ -int ml_coll_hier_reduce_setup(struct mca_coll_ml_module_t *ml_module); -void ml_coll_hier_reduce_cleanup(struct mca_coll_ml_module_t *ml_module); - -/* reduce */ -int ml_coll_hier_scatter_setup(struct mca_coll_ml_module_t *ml_module); - -/* alltoall */ -int mca_coll_ml_alltoall(void *sbuf, int scount, - struct ompi_datatype_t *sdtype, - void* rbuf, int rcount, - struct ompi_datatype_t *rdtype, - struct ompi_communicator_t *comm, - mca_coll_base_module_t *module); - -int mca_coll_ml_alltoall_nb(void *sbuf, int scount, - struct ompi_datatype_t *sdtype, - void* rbuf, int rcount, - struct ompi_datatype_t *rdtype, - struct ompi_communicator_t *comm, - ompi_request_t **req, - mca_coll_base_module_t *module); - - -/* allgather */ -int mca_coll_ml_allgather(void *sbuf, int scount, - struct ompi_datatype_t *sdtype, - void* rbuf, int rcount, - struct ompi_datatype_t *rdtype, - struct ompi_communicator_t *comm, - mca_coll_base_module_t *module); - -/* non-blocking allgather */ -int mca_coll_ml_allgather_nb(void *sbuf, int scount, - struct ompi_datatype_t *sdtype, - void* rbuf, int rcount, - struct ompi_datatype_t *rdtype, - struct ompi_communicator_t *comm, - ompi_request_t **req, - mca_coll_base_module_t *module); - -/* gather */ -int mca_coll_ml_gather(void *sbuf, int scount, - struct ompi_datatype_t *sdtype, - void* rbuf, int rcount, - struct ompi_datatype_t *rdtype, - int root, - struct ompi_communicator_t *comm, - mca_coll_base_module_t *module); - -/* nonblocking Barrier */ -int ml_coll_hier_nonblocking_barrier_setup(struct mca_coll_ml_module_t *ml_module, struct mca_coll_ml_topology_t *topo_info); - -/* Memory syncronization collective setup */ -int ml_coll_memsync_setup(struct mca_coll_ml_module_t *ml_module); - -/* Fragment descriptor */ -struct mca_coll_ml_descriptor_t; -struct mca_coll_ml_fragment_t { - opal_list_item_t super; - - struct mca_coll_ml_descriptor_t *full_msg_descriptor; - int offset; /*offset for progress pointer*/ - int length; /*fragment length I assume*/ - opal_convertor_t convertor; /*convertor for copy/pack data*/ - - /* current function index */ - int current_fn_index; - - /* array of function arguments */ - struct bcol_function_args_t *fn_args; - -}; -typedef struct mca_coll_ml_fragment_t mca_coll_ml_fragment_t; -OMPI_DECLSPEC OBJ_CLASS_DECLARATION(mca_coll_ml_fragment_t); - -#define MCA_COLL_ML_NO_BUFFER -1 - -#define MCA_COLL_IBOFFLOAD_SET_ML_BUFFER_INFO(coll_op, index, desc) \ -do { \ - (coll_op)->variable_fn_params.buffer_index = index; \ - (coll_op)->fragment_data.buffer_desc = desc; \ - /* pasha - why we duplicate it ? */ \ - (coll_op)->variable_fn_params.src_desc = desc; \ - (coll_op)->variable_fn_params.hier_factor = 1; \ - (coll_op)->variable_fn_params.need_dt_support = false; \ -} while (0) - -/*Full message descriptor*/ -struct mca_coll_ml_descriptor_t { - ompi_request_t super; /*base request*/ - struct ompi_datatype_t *datatype; /*ompi datatype*/ - size_t count; /*count of user datatype elements*/ - uint32_t sequence_num; /*sequence number for collective operation*/ - size_t frags_limit; /*upper limit on # of fragments*/ - size_t frags_start; /*number of fragments started*/ - - /*number of fragments completed*/ - size_t frags_complete; - - /* number of fragments needed to process this message */ - size_t n_fragments; - - volatile bool free_resource; /*signals release resource*/ - - /*pointer to reduction operation, e.g. MPI_MIN - need to handle - * user defined functions also */ - /* ompi_predefined_op_t *operation; */ - - /*pointer to a communication schedule, data struct undefined*/ - struct coll_ml_collective_description_t *local_comm_description; - - /* fragment descriptor - we always have a fragment descriptor - * if we get a full message descriptor. Optimization for - * small messages */ - mca_coll_ml_fragment_t fragment; - /* The ML memory buffer index that should consist the send and - recv information - if the index is -1, it means no buffer was allocated */ - uint64_t buffer_index; -}; -typedef struct mca_coll_ml_descriptor_t mca_coll_ml_descriptor_t; -OMPI_DECLSPEC OBJ_CLASS_DECLARATION(mca_coll_ml_descriptor_t); - -/* sbgp and bcol module pairs */ -struct hierarchy_pairs { - mca_sbgp_base_module_t *subgroup_module; - struct mca_bcol_base_module_t **bcol_modules; - int num_bcol_modules; - int bcol_index; - mca_bcol_base_component_t *bcol_component; -}; -typedef struct hierarchy_pairs hierarchy_pairs; - -/* list of ranks in each group */ -struct ml_level_t { - int n_modules; - hierarchy_pairs *modules; -}; - -typedef struct ml_level_t ml_level_t; - -enum { - COLL_ML_HR_FULL, /* Full hierarchy topology, all bcols and sbgps attends in discovery */ - COLL_ML_HR_ALLREDUCE, - COLL_ML_HR_NBS, /* All hierarchy except base socket */ - COLL_ML_HR_SINGLE_PTP, /* Single flat ptp hierarchy */ - COLL_ML_HR_SINGLE_IBOFFLOAD, /* Single flat iboffload hierarchy */ - COLL_ML_TOPO_MAX -}; - -/* Topology-hierarchy discovery function */ -struct mca_coll_ml_module_t; /* forward declaration for the function */ - -typedef int (* mca_coll_topo_discovery_fn_t) - (struct mca_coll_ml_module_t *ml_module, int n_hierarchies); - -typedef enum { - COLL_ML_TOPO_DISABLED = 0, - COLL_ML_TOPO_ENABLED = 1 -} topo_status_t; - -/** - * Structure to hold the sm coll component. First it holds the - * base coll component, and then holds a bunch of - * sm-coll-component-specific stuff (e.g., current MCA param - * values). - */ -struct mca_coll_ml_component_t { - /** Base coll component */ - mca_coll_base_component_2_0_0_t super; - - /** MCA parameter: Priority of this component */ - int ml_priority; - - /** MCA parameter: subgrouping components to use */ - char *subgroups_string; - - /** MCA parameter: basic collectives components to use */ - char *bcols_string; - - /** verbosity level */ - int verbose; - - /** max of communicators available to run ML */ - unsigned int max_comm; - - /** min size of comm to be available to run ML */ - int min_comm_size; - - /* base sequence number to use - the expectation is that - * this will be used as a basis for generating IDs for - * specific collective operations - */ - int64_t base_sequence_number; - - /** memory pool */ - mca_coll_ml_lmngr_t memory_manager; - - /* We need it because some bcols cannot - support all possible allreduce data types */ - bool need_allreduce_support; - - int use_knomial_allreduce; - - /* use hdl_framework */ - bool use_hdl_bcast; - - /* Enable / Disable fragmentation (0 - off, 1 - on, 2 - auto) */ - int enable_fragmentation; - - /* Broadcast algorithm */ - int bcast_algorithm; - - /* frag size that is used by list memory_manager */ - size_t lmngr_block_size; - - /* alignment that is used by list memory_manager */ - size_t lmngr_alignment; - - /* list size for memory_manager */ - size_t lmngr_size; - - /* number of payload memory banks */ - int n_payload_mem_banks; - - /* number of payload buffers per bank */ - int n_payload_buffs_per_bank; - - /* size of payload buffer */ - unsigned long long payload_buffer_size; - - /* pipeline depth for msg fragmentation */ - int pipeline_depth; - - /* Free list tunings */ - int free_list_init_size; - - int free_list_grow_size; - - int free_list_max_size; - - /* - * queues for asynchronous collective progress - */ - /* tasks that have not started, either because dependencies are not - * statisfied, or resources are lacking - */ - opal_list_t pending_tasks; - opal_mutex_t pending_tasks_mutex; - - /* active incomplete tasks */ - opal_list_t active_tasks; - opal_mutex_t active_tasks_mutex; - - /* sequential collectives to progress */ - opal_list_t sequential_collectives; - opal_mutex_t sequential_collectives_mutex; - - bool progress_is_busy; - - /* Temporary hack for IMB test - not all bcols have allgather */ - bool disable_allgather; - - /* Temporary hack for IMB test - not all bcols have alltoall */ - bool disable_alltoall; - - /* Disable Reduce */ - bool disable_reduce; - - /* Brucks alltoall mca and other params */ - int use_brucks_smsg_alltoall; - - mca_coll_topo_discovery_fn_t topo_discovery_fn[COLL_ML_TOPO_MAX]; - - /* Configure file for collectives */ - char *config_file_name; - - per_collective_configuration_t coll_config[ML_NUM_OF_FUNCTIONS][ML_NUM_MSG]; -}; - -/** - * Convenience typedef - */ -typedef struct mca_coll_ml_component_t mca_coll_ml_component_t; - -/** - * Global component instance - */ -OMPI_MODULE_DECLSPEC extern mca_coll_ml_component_t mca_coll_ml_component; - -struct mca_coll_ml_leader_offset_info_t { - size_t offset; - int level_one_index; - bool leader; -}; -typedef struct mca_coll_ml_leader_offset_info_t mca_coll_ml_leader_offset_info_t; - -/* Topolody data structure */ -struct mca_coll_ml_topology_t { - topo_status_t status; /* 0 - enabled , 1 - disabled */ - /* information on the selected groups - needed for collective - ** algorithms */ - int32_t global_lowest_hier_group_index; - int32_t global_highest_hier_group_index; - int number_of_all_subgroups; - int n_levels; - /* bcols bits that describe supported features/modes */ - uint64_t all_bcols_mode; - mca_bcol_base_route_info_t *route_vector; - coll_ml_collective_description_t *hierarchical_algorithms[BCOL_NUM_OF_FUNCTIONS]; - sub_group_params_t *array_of_all_subgroups; - /* (sbgp, bcol) pairs */ - hierarchy_pairs *component_pairs; - /* ordering of ranks when I am the root of the operation. - * This ordering guarantees that data need to be re-ordered - * only at the first or last step in rooted operations, - * depending on whether the opearation is a scatter or - * gather operation. - */ - int *sort_list; - mca_coll_ml_leader_offset_info_t *hier_layout_info; - /* are ranks laid out contiguously */ - bool ranks_contiguous; - struct ordering_info_t { - int next_inorder; - int next_order_num; - int num_bcols_need_ordering; - } topo_ordering_info; -}; -typedef struct mca_coll_ml_topology_t mca_coll_ml_topology_t; - -struct mca_coll_ml_bcol_list_item_t { - opal_list_item_t super; - mca_bcol_base_module_t *bcol_module; -}; -typedef struct mca_coll_ml_bcol_list_item_t mca_coll_ml_bcol_list_item_t; -OPAL_DECLSPEC OBJ_CLASS_DECLARATION(mca_coll_ml_bcol_list_item_t); - -#define MCA_COLL_MAX_NUM_COLLECTIVES 40 /* ... I do not remember how much exactly collectives do we have */ -#define MCA_COLL_MAX_NUM_SUBTYPES 15 /* Maximum number of algorithms per collective */ - -struct mca_coll_ml_module_t { - /* base structure */ - mca_coll_base_module_t super; - - /* ML module status - 0 was not initialized, 1 - was initialized */ - bool initialized; - /* communicator */ - struct ompi_communicator_t *comm; - - /* reference convertor */ - opal_convertor_t *reference_convertor; - - mca_coll_ml_topology_t topo_list[COLL_ML_TOPO_MAX]; - - /* Collectives - Topology map */ - int collectives_topology_map - [MCA_COLL_MAX_NUM_COLLECTIVES][MCA_COLL_MAX_NUM_SUBTYPES]; - - /* largest number of function calls for the collective routines. - * This is used to allocate resources */ - int max_fn_calls; - - /* collective sequence number - unique id for barrier type operations */ - int32_t no_data_collective_sequence_num; - - /* collective sequence number - unique id for each collective */ - int32_t collective_sequence_num; - - /** ompi free list of full message descriptors **/ - opal_free_list_t message_descriptors; - - /** ompi free list of message fragment descriptors **/ - opal_free_list_t fragment_descriptors; - - /** pointer to the payload memory block **/ - struct mca_bcol_base_memory_block_desc_t *payload_block; - - /** the maximum size of collective function description */ - int max_dag_size; - - /** data used to initialize coll_ml_collective_descriptors */ - struct coll_desc_init { - int max_dag_size; - size_t max_n_bytes_per_proc_total; - mca_coll_base_module_t *bcol_base_module; - } coll_desc_init_data; - - /** collective operation descriptor free list - used to manage a single - * collective operation. */ - opal_free_list_t coll_ml_collective_descriptors; - - /** multiple function collective operation support */ - /** broadcast */ - mca_coll_ml_collective_operation_description_t * - coll_ml_bcast_functions[ML_NUM_BCAST_FUNCTIONS]; - - /* bcast size selection criteria - cutoff for the largest size of - * data for which to apply the specified collective operation. - * This gives us the ability to choose algorithm based on size */ - size_t bcast_cutoff_size[ML_N_DATASIZE_BINS]; - - /** Allreduce functions */ - mca_coll_ml_collective_operation_description_t * - coll_ml_allreduce_functions[ML_NUM_ALLREDUCE_FUNCTIONS]; - - /** Reduce functions */ - mca_coll_ml_collective_operation_description_t * - coll_ml_reduce_functions[ML_NUM_REDUCE_FUNCTIONS]; - - - /** scatter */ - mca_coll_ml_collective_operation_description_t * - coll_ml_scatter_functions[ML_NUM_SCATTER_FUNCTIONS]; - - /** alltoall */ - mca_coll_ml_collective_operation_description_t * - coll_ml_alltoall_functions[ML_NUM_ALLTOALL_FUNCTIONS]; - - /** allgather */ - mca_coll_ml_collective_operation_description_t * - coll_ml_allgather_functions[ML_NUM_ALLGATHER_FUNCTIONS]; - - /** gather */ - mca_coll_ml_collective_operation_description_t * - coll_ml_gather_functions[ML_NUM_GATHER_FUNCTIONS]; - - /** Barrier */ - mca_coll_ml_collective_operation_description_t * - coll_ml_barrier_function; - - /** ML Memory Syncronization collective operation */ - mca_coll_ml_collective_operation_description_t * - coll_ml_memsync_function; - - /** The table of allreduce functions for specific type and op **/ - bool allreduce_matrix[OMPI_OP_NUM_OF_TYPES][OMPI_DATATYPE_MAX_PREDEFINED][BCOL_NUM_OF_ELEM_TYPES]; - - /* data offset from ML */ - int32_t data_offset; - - int small_message_thresholds[BCOL_NUM_OF_FUNCTIONS]; - - /* fragmenation parameters */ - int use_user_buffers; - uint64_t fragment_size; - uint32_t ml_fragment_size; - - /* Bcast index table. Pasha: Do we need to define something more generic ? - the table x 2 (large/small)*/ - int bcast_fn_index_table[2]; - - /* List of pointer to bcols that have been initilized and used. - * So far we use it only for ML memory management */ - opal_list_t active_bcols_list; - - /* Buffer size required for Bruck's algorithm */ - int brucks_buffer_threshold_const; - - /* log comm size */ - /* We require this for alltoall algorithm */ - int log_comm_size; - /* On this list we keep coll_op descriptors that were not - * be able to start, since no ml buffers were available */ - opal_list_t waiting_for_memory_list; - - /* fallback collectives */ - mca_coll_base_comm_coll_t fallback; -}; - -typedef struct mca_coll_ml_module_t mca_coll_ml_module_t; -OBJ_CLASS_DECLARATION(mca_coll_ml_module_t); - - -/* query to see if the component is available for use, and can - * satisfy the thread and progress requirements - */ -int mca_coll_ml_init_query(bool enable_progress_threads, - bool enable_mpi_threads); - -/* query to see if the module is available for use on the given - * communicator, and if so, what it's priority is. This is where - * the backing shared-memory file is created. - */ -mca_coll_base_module_t * -mca_coll_ml_comm_query(struct ompi_communicator_t *comm, int *priority); - -/* Barrier - blocking */ -int mca_coll_ml_barrier_intra(struct ompi_communicator_t *comm, - mca_coll_base_module_t *module); - -/* Barrier - non-blocking */ -int mca_coll_ml_ibarrier_intra(struct ompi_communicator_t *comm, - ompi_request_t **req, - mca_coll_base_module_t *module); - -/* Allreduce with EXTRA TOPO using - blocking */ -int mca_coll_ml_allreduce_dispatch(void *sbuf, void *rbuf, int count, - struct ompi_datatype_t *dtype, struct ompi_op_t *op, - struct ompi_communicator_t *comm, mca_coll_base_module_t *module); - -/* Allreduce with EXTRA TOPO using - Non-blocking */ -int mca_coll_ml_allreduce_dispatch_nb(void *sbuf, void *rbuf, int count, - ompi_datatype_t *dtype, ompi_op_t *op, - ompi_communicator_t *comm, - ompi_request_t **req, - mca_coll_base_module_t *module); - -/* Allreduce - blocking */ -int mca_coll_ml_allreduce(void *sbuf, void *rbuf, int count, - struct ompi_datatype_t *dtype, struct ompi_op_t *op, - struct ompi_communicator_t *comm, - mca_coll_base_module_t *module); - -/* Allreduce - Non-blocking */ -int mca_coll_ml_allreduce_nb(void *sbuf, void *rbuf, int count, - struct ompi_datatype_t *dtype, struct ompi_op_t *op, - struct ompi_communicator_t *comm, - ompi_request_t **req, - mca_coll_base_module_t *module); - -/* Reduce - Blocking */ -int mca_coll_ml_reduce(void *sbuf, void *rbuf, int count, - struct ompi_datatype_t *dtype, struct ompi_op_t *op, - int root, struct ompi_communicator_t *comm, - mca_coll_base_module_t *module); - -int mca_coll_ml_reduce_nb(void *sbuf, void *rbuf, int count, - struct ompi_datatype_t *dtype, struct ompi_op_t *op, - int root, struct ompi_communicator_t *comm, - ompi_request_t **req, - mca_coll_base_module_t *module); - -int mca_coll_ml_memsync_intra(mca_coll_ml_module_t *module, int bank_index); - - -int coll_ml_progress_individual_message(mca_coll_ml_fragment_t *frag_descriptor); - -/* - * the ml entry point for the broadcast function - */ -int mca_coll_ml_parallel_bcast(void *buf, int count, struct ompi_datatype_t *dtype, - int root, struct ompi_communicator_t *comm, - mca_coll_base_module_t *module); -int mca_coll_ml_parallel_bcast_nb(void *buf, int count, struct ompi_datatype_t *dtype, - int root, struct ompi_communicator_t *comm, - ompi_request_t **req, - mca_coll_base_module_t *module); -int mca_coll_ml_bcast_sequential_root(void *buf, int count, struct ompi_datatype_t *dtype, - int root, struct ompi_communicator_t *comm, - mca_coll_base_module_t *module); - -/* - * The ml function interface for non-blocking routines - */ -int mca_coll_ml_bcast_unknown_root_nb(void *buf, int count, struct ompi_datatype_t *dtype, - int root, struct ompi_communicator_t *comm, - ompi_request_t **req, - mca_coll_base_module_t *module); - -int mca_coll_ml_bcast_known_root_nb(void *buf, int count, struct ompi_datatype_t *dtype, - int root, struct ompi_communicator_t *comm, - ompi_request_t **req, - mca_coll_base_module_t *module); -OMPI_DECLSPEC int mca_coll_ml_bcast_unknown_root_with_frags_nb(void *buf, int count, - struct ompi_datatype_t *dtype, - int root, struct ompi_communicator_t *comm, - ompi_request_t **req, mca_coll_base_module_t *module); - -/* This routine sets up a sequential hierarchical scatter algorithm. The - * assumptions are that each rank knows in which sub-group that data will show - * up first, and that the scatter is executed sequentially, one subgroup at a - * time. This is needed, when the full collective needs to be specified before - * the collective operation starts up. The algorithm handles all data sizes - * and data types. - */ - -OMPI_DECLSPEC int mca_coll_ml_scatter_sequential( - void *sbuf, int scount, struct ompi_datatype_t *sdtype, - void *rbuf, int rcount, struct ompi_datatype_t *rdtype, - int root, struct ompi_communicator_t *comm, - mca_coll_base_module_t *module); - -#if 0 -int mca_coll_ml_bcast_small_dynamic_root(void *buf, int count, struct ompi_datatype_t *dtype, - int root, struct ompi_communicator_t *comm, - mca_coll_base_module_t *module); -int mca_coll_ml_bcast_small_known_root(void *buf, int count, struct ompi_datatype_t *dtype, - int root, struct ompi_communicator_t *comm, - mca_coll_base_module_t *module); -#endif - -/* Topology discovery function */ - -int mca_coll_ml_fulltree_hierarchy_discovery(mca_coll_ml_module_t *ml_module, - int n_hierarchies); -int mca_coll_ml_allreduce_hierarchy_discovery(mca_coll_ml_module_t *ml_module, - int n_hierarchies); -int mca_coll_ml_fulltree_exclude_basesmsocket_hierarchy_discovery(mca_coll_ml_module_t *ml_module, - int n_hierarchies); -int mca_coll_ml_fulltree_ptp_only_hierarchy_discovery(mca_coll_ml_module_t *ml_module, - int n_hierarchies); -int mca_coll_ml_fulltree_iboffload_only_hierarchy_discovery(mca_coll_ml_module_t *ml_module, - int n_hierarchies); - -void mca_coll_ml_allreduce_matrix_init(mca_coll_ml_module_t *ml_module, - const mca_bcol_base_component_2_0_0_t *bcol_component); -static inline int mca_coll_ml_err(const char* fmt, ...) -{ - va_list list; - int ret; - - va_start(list, fmt); - ret = vfprintf(stderr, fmt, list); - va_end(list); - return ret; -} - - -#define ML_ERROR(args) \ -do { \ - mca_coll_ml_err("[%s]%s[%s:%d:%s] COLL-ML ", \ - ompi_process_info.nodename, \ - OMPI_NAME_PRINT(OMPI_PROC_MY_NAME), \ - __FILE__, __LINE__, __func__); \ - mca_coll_ml_err args; \ - mca_coll_ml_err("\n"); \ -} while(0) - -#if OPAL_ENABLE_DEBUG -#define ML_VERBOSE(level, args) \ -do { \ - if(mca_coll_ml_component.verbose >= level) { \ - mca_coll_ml_err("[%s]%s[%s:%d:%s] COLL-ML ", \ - ompi_process_info.nodename, \ - OMPI_NAME_PRINT(OMPI_PROC_MY_NAME), \ - __FILE__, __LINE__, __func__); \ - mca_coll_ml_err args; \ - mca_coll_ml_err("\n"); \ - } \ -} while(0) -#else -#define ML_VERBOSE(level, args) -#endif - -#define IS_BCOL_TYPE_IDENTICAL(bcol1, bcol2) \ - ( (NULL != bcol1 && NULL != bcol2) && \ - ( /* chech if the len is the same */ \ - (strlen(((mca_base_component_t *)((bcol1)->bcol_component))->mca_component_name) == \ - strlen(((mca_base_component_t *)((bcol2)->bcol_component))->mca_component_name)) \ - && /* check if the string are identical */ \ - (0 == strncmp(((mca_base_component_t *)((bcol1)->bcol_component))->mca_component_name, \ - ((mca_base_component_t *)((bcol2)->bcol_component))->mca_component_name, \ - strlen(((mca_base_component_t *)((bcol2)->bcol_component))->mca_component_name))) \ - ) ? true : false) - -#define GET_BCOL(module, indx) ((module)->component_pairs[(indx)].bcol_modules[0]) - -#define GET_BCOL_SYNC_FN(bcol) ((bcol)->filtered_fns_table[DATA_SRC_KNOWN][NON_BLOCKING] \ - [BCOL_SYNC][1][0][0]) - -/* Allocator macros */ -#define BUFFER_INDEX(bank,nbuffs,buffer) (bank*nbuffs+buffer) - -#define ML_GET_FRAG_SIZE(op, coll) \ - ((op)->fragment_data.message_descriptor->n_bytes_total - \ - (op)->fragment_data.message_descriptor->n_bytes_scheduled < \ - (size_t) OP_ML_MODULE((op))->small_message_thresholds[coll] ? \ - (op)->fragment_data.message_descriptor->n_bytes_total - \ - (op)->fragment_data.message_descriptor->n_bytes_scheduled : \ - (size_t) OP_ML_MODULE((op))->small_message_thresholds[coll]) - -/* Abort mpi process in case of fatal error */ -void mca_coll_ml_abort_ml(char *message); - -#define ML_SET_VARIABLE_PARAMS_BCAST(op, ml, cnt, datatype, b_desc, \ - s_offset, r_offset, frag_len, buf) \ -do { \ - op->variable_fn_params.sequence_num = \ - OPAL_THREAD_ADD32(&((ml)->collective_sequence_num), 1); \ - op->variable_fn_params.count = cnt; \ - op->variable_fn_params.dtype = datatype; \ - op->variable_fn_params.buffer_index = (b_desc)->buffer_index; \ - op->variable_fn_params.src_desc = (b_desc); \ - op->variable_fn_params.sbuf_offset = s_offset; \ - op->variable_fn_params.rbuf_offset = r_offset; \ - op->variable_fn_params.frag_size = frag_len; \ - op->variable_fn_params.sbuf = buf; \ -} while (0) - -#define MCA_COLL_ML_OP_BASIC_SETUP(op, total_bytes, offset_into_user_buff, src, dst, collective_schedule) \ - do { \ - op->coll_schedule = collective_schedule; \ - op->process_fn = NULL; \ - op->full_message.n_bytes_total = total_bytes; \ - op->full_message.n_bytes_delivered = 0; \ - op->full_message.n_bytes_scheduled = 0; \ - op->full_message.dest_user_addr = dst; \ - op->full_message.src_user_addr = src; \ - op->full_message.n_active = 0; \ - op->full_message.n_bytes_per_proc_total = 0; \ - op->full_message.send_count = 0; \ - op->full_message.recv_count = 0; \ - op->full_message.send_extent = 0; \ - op->full_message.recv_extent = 0; \ - op->full_message.offset_into_send_buffer = 0; \ - op->full_message.offset_into_recv_buffer = 0; \ - op->full_message.send_data_type = 0; \ - op->full_message.recv_data_type = 0; \ - op->full_message.fragment_launcher = 0; \ - op->sequential_routine.current_active_bcol_fn = 0; \ - op->sequential_routine.current_bcol_status = SEQ_TASK_NOT_STARTED; \ - \ - op->fragment_data.offset_into_user_buffer = offset_into_user_buff; \ - /* Pasha, is it constant ? what to put here */ \ - op->fragment_data.fragment_size = total_bytes; \ - op->fragment_data.message_descriptor = &op->full_message; \ - op->fragment_data.current_coll_op = -1; \ - } while (0) - -/* This routine re-orders and packs user data. The assumption is that - * there is per-process data, the amount of data is the same for all * ranks, - * and the user data is contigous. - */ -int mca_coll_ml_pack_reorder_contiguous_data( - mca_coll_ml_collective_operation_progress_t *coll_op); - -/* This routine re-orders and packs user data. The assumption is that - * there is per-process data, the amount of data is the same for all * ranks, - * and the user data is noncontigous. - */ -int mca_coll_ml_pack_reorder_noncontiguous_data( - mca_coll_ml_collective_operation_progress_t *coll_op); - -END_C_DECLS - - -#endif /* MCA_COLL_ML_ML_H */ diff --git a/ompi/mca/coll/ml/coll_ml_allgather.c b/ompi/mca/coll/ml/coll_ml_allgather.c deleted file mode 100644 index 95011126279..00000000000 --- a/ompi/mca/coll/ml/coll_ml_allgather.c +++ /dev/null @@ -1,631 +0,0 @@ -/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ -/* - * Copyright (c) 2009-2012 Oak Ridge National Laboratory. All rights reserved. - * Copyright (c) 2009-2012 Mellanox Technologies. All rights reserved. - * Copyright (c) 2013-2014 Los Alamos National Security, LLC. All rights - * reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ -/** @file */ - -#include "ompi_config.h" - -#include - -#include "ompi/constants.h" -#include "opal/threads/mutex.h" -#include "ompi/communicator/communicator.h" -#include "ompi/mca/coll/coll.h" -#include "ompi/mca/bcol/bcol.h" -#include "opal/sys/atomic.h" -#include "coll_ml.h" -#include "coll_ml_select.h" -#include "coll_ml_allocation.h" - -static int mca_coll_ml_allgather_small_unpack_data(mca_coll_ml_collective_operation_progress_t *coll_op) -{ - bool rcontig = coll_op->full_message.recv_data_continguous; - int n_ranks_in_comm = ompi_comm_size(OP_ML_MODULE(coll_op)->comm); - - void *dest = (void *)((uintptr_t)coll_op->full_message.dest_user_addr + - (uintptr_t)coll_op->full_message.n_bytes_delivered); - void *src = (void *)((uintptr_t)coll_op->fragment_data.buffer_desc->data_addr + - (size_t)coll_op->variable_fn_params.rbuf_offset); - - if (rcontig) { - memcpy(dest, src, n_ranks_in_comm * coll_op->full_message.n_bytes_scheduled); - } else { - mca_coll_ml_convertor_unpack(src, n_ranks_in_comm * coll_op->full_message.n_bytes_scheduled, - &coll_op->fragment_data.message_descriptor->recv_convertor); - } - - return OMPI_SUCCESS; -} - -static inline void copy_data (mca_coll_ml_collective_operation_progress_t *coll_op, rank_properties_t *rank_props, int soffset) { - bool rcontig = coll_op->fragment_data.message_descriptor->recv_data_continguous; - size_t total_bytes = coll_op->fragment_data.message_descriptor->n_bytes_total; - size_t pack_len = coll_op->fragment_data.fragment_size; - int doffset = rank_props->rank; - void *dest, *src; - - src = (void *) ((uintptr_t)coll_op->fragment_data.buffer_desc->data_addr + - (size_t)coll_op->variable_fn_params.rbuf_offset + soffset * pack_len); - - if (rcontig) { - dest = (void *) ((uintptr_t) coll_op->full_message.dest_user_addr + - (uintptr_t) coll_op->fragment_data.offset_into_user_buffer + - doffset * total_bytes); - - memcpy(dest, src, pack_len); - } else { - size_t position; - opal_convertor_t *recv_convertor = - &coll_op->fragment_data.message_descriptor->recv_convertor; - - position = (size_t) coll_op->fragment_data.offset_into_user_buffer + - doffset * total_bytes; - - opal_convertor_set_position(recv_convertor, &position); - mca_coll_ml_convertor_unpack(src, pack_len, recv_convertor); - } -} - -static int mca_coll_ml_allgather_noncontiguous_unpack_data(mca_coll_ml_collective_operation_progress_t *coll_op) -{ - int i, j, n_level_one_sbgps; - size_t soffset; - - mca_coll_ml_topology_t *topo_info = coll_op->coll_schedule->topo_info; - sub_group_params_t *array_of_all_subgroup_ranks = topo_info->array_of_all_subgroups; - - n_level_one_sbgps = array_of_all_subgroup_ranks->level_one_index; - - for (i = 0 ; i < n_level_one_sbgps; i++) { - /* determine where in the source buffer the data can be found */ - soffset = array_of_all_subgroup_ranks[i].index_of_first_element; - for (j = 0 ; j < array_of_all_subgroup_ranks[i].n_ranks; j++, ++soffset) { - copy_data (coll_op, array_of_all_subgroup_ranks[i].rank_data + j, soffset); - } - } - - return OMPI_SUCCESS; -} - -/* Allgather dependencies seem easy, everyone needs to work from the "bottom up". - * Following Pasha, I too will put the simplest dependencies graph and change it later - * when we add hierarchy. Basically, allgather has the same dependency profile as the - * sequential broadcast except that there is only a single ordering of tasks. - */ -static int mca_coll_ml_allgather_task_setup(mca_coll_ml_collective_operation_progress_t *coll_op) -{ - int fn_idx, h_level, my_index, root; - mca_sbgp_base_module_t *sbgp; - mca_coll_ml_topology_t *topo = coll_op->coll_schedule->topo_info; - - fn_idx = coll_op->sequential_routine.current_active_bcol_fn; - h_level = coll_op->coll_schedule->component_functions[fn_idx].h_level; - sbgp = topo->component_pairs[h_level]. - subgroup_module; - my_index = sbgp->my_index; - - /* In the case of allgather, the local leader is always the root */ - root = 0; - if (my_index == root) { - coll_op->variable_fn_params.root_flag = true; - coll_op->variable_fn_params.root_route = NULL; - } else { - coll_op->variable_fn_params.root_flag = false; - coll_op->variable_fn_params.root_route = &topo->route_vector[root]; - } - - return OMPI_SUCCESS; -} - -static int mca_coll_ml_allgather_frag_progress(mca_coll_ml_collective_operation_progress_t *coll_op) -{ - /* local variables */ - int ret; - size_t frag_len, dt_size; - - void *buf; - mca_bcol_base_payload_buffer_desc_t *src_buffer_desc; - mca_coll_ml_collective_operation_progress_t *new_op; - - mca_coll_ml_module_t *ml_module = OP_ML_MODULE(coll_op); - bool scontig = coll_op->fragment_data.message_descriptor->send_data_continguous; - - ompi_datatype_type_size(coll_op->variable_fn_params.dtype, &dt_size); - /* Keep the pipeline filled with fragments */ - while (coll_op->fragment_data.message_descriptor->n_active < - coll_op->fragment_data.message_descriptor->pipeline_depth) { - /* If an active fragment happens to have completed the collective during - * a hop into the progress engine, then don't launch a new fragment, - * instead break and return. - */ - if (coll_op->fragment_data.message_descriptor->n_bytes_scheduled - == coll_op->fragment_data.message_descriptor->n_bytes_total) { - break; - } - /* Get an ml buffer */ - src_buffer_desc = mca_coll_ml_alloc_buffer(ml_module); - if (NULL == src_buffer_desc) { - /* If there exist outstanding fragments, then break out - * and let an active fragment deal with this later, - * there are no buffers available. - */ - if (0 < coll_op->fragment_data.message_descriptor->n_active) { - return OMPI_SUCCESS; - } else { - /* The fragment is already on list and - * the we still have no ml resources - * Return busy */ - if (coll_op->pending & REQ_OUT_OF_MEMORY) { - ML_VERBOSE(10,("Out of resources %p", coll_op)); - return OMPI_ERR_TEMP_OUT_OF_RESOURCE; - } - - coll_op->pending |= REQ_OUT_OF_MEMORY; - opal_list_append(&((OP_ML_MODULE(coll_op))->waiting_for_memory_list), - (opal_list_item_t *)coll_op); - ML_VERBOSE(10,("Out of resources %p adding to pending queue", coll_op)); - return OMPI_ERR_TEMP_OUT_OF_RESOURCE; - } - } - - /* Get a new collective descriptor and initialize it */ - new_op = mca_coll_ml_alloc_op_prog_single_frag_dag(ml_module, - ml_module->coll_ml_allgather_functions[ML_SMALL_DATA_ALLGATHER], - coll_op->fragment_data.message_descriptor->src_user_addr, - coll_op->fragment_data.message_descriptor->dest_user_addr, - coll_op->fragment_data.message_descriptor->n_bytes_total, - coll_op->fragment_data.message_descriptor->n_bytes_scheduled); - - new_op->fragment_data.current_coll_op = coll_op->fragment_data.current_coll_op; - new_op->fragment_data.message_descriptor = coll_op->fragment_data.message_descriptor; - - /* set the task setup callback */ - new_op->sequential_routine.seq_task_setup = mca_coll_ml_allgather_task_setup; - - /* - MCA_COLL_IBOFFLOAD_SET_ML_BUFFER_INFO(new_op, - src_buffer_desc->buffer_index, src_buffer_desc); - */ - - /* We need this address for pointer arithmetic in memcpy */ - buf = coll_op->fragment_data.message_descriptor->src_user_addr; - - if (!scontig) { - frag_len = ml_module->small_message_thresholds[BCOL_ALLGATHER]; - mca_coll_ml_convertor_get_send_frag_size( - ml_module, &frag_len, - coll_op->fragment_data.message_descriptor); - - mca_coll_ml_convertor_pack( - (void *) ((uintptr_t) src_buffer_desc->data_addr + - frag_len * coll_op->coll_schedule->topo_info->hier_layout_info[0].offset + - frag_len * coll_op->coll_schedule->topo_info->hier_layout_info[0].level_one_index), - frag_len, &coll_op->fragment_data.message_descriptor->send_convertor); - } else { - /* calculate new frag length, there are some issues here */ - frag_len = (coll_op->fragment_data.message_descriptor->n_bytes_total - - coll_op->fragment_data.message_descriptor->n_bytes_scheduled < - coll_op->fragment_data.fragment_size ? - coll_op->fragment_data.message_descriptor->n_bytes_total - - coll_op->fragment_data.message_descriptor->n_bytes_scheduled : - coll_op->fragment_data.fragment_size); - - /* everybody copies in, based on the new values */ - memcpy((void *) ((uintptr_t)src_buffer_desc->data_addr + - frag_len * new_op->coll_schedule->topo_info->hier_layout_info[0].offset + - frag_len * new_op->coll_schedule->topo_info->hier_layout_info[0].level_one_index), - (void *) ((uintptr_t) buf + (uintptr_t) - coll_op->fragment_data.message_descriptor->n_bytes_scheduled), frag_len); - } - - new_op->variable_fn_params.sbuf = (void *) src_buffer_desc->data_addr; - new_op->variable_fn_params.rbuf = (void *) src_buffer_desc->data_addr; - - /* update the number of bytes scheduled */ - new_op->fragment_data.message_descriptor->n_bytes_scheduled += frag_len; - /* everyone needs an unpack function */ - new_op->process_fn = mca_coll_ml_allgather_noncontiguous_unpack_data; - - new_op->fragment_data.fragment_size = frag_len; - new_op->fragment_data.buffer_desc = src_buffer_desc; - - /* Setup fragment specific data */ - ++(new_op->fragment_data.message_descriptor->n_active); - - ML_VERBOSE(10, ("Start more, My index %d ", - new_op->fragment_data.buffer_desc->buffer_index)); - - /* this is a bit buggy */ - ML_SET_VARIABLE_PARAMS_BCAST( - new_op, - OP_ML_MODULE(new_op), - frag_len /* yes, we have consistent units, so this makes sense */, - MPI_BYTE /* we fragment according to buffer size - * we don't reduce the data thus we needn't - * keep "whole" datatypes, we may freely - * fragment without regard for multiples - * of any specific datatype - */, - src_buffer_desc, - 0, - 0, - frag_len, - src_buffer_desc->data_addr); - /* initialize first coll */ - ret = new_op->sequential_routine.seq_task_setup(new_op); - if (OMPI_SUCCESS != ret) { - ML_VERBOSE(3, ("Fragment failed to initialize itself")); - return ret; - } - - new_op->variable_fn_params.buffer_size = frag_len; - new_op->variable_fn_params.hier_factor = coll_op->variable_fn_params.hier_factor; - new_op->variable_fn_params.root = 0; - - MCA_COLL_ML_SET_NEW_FRAG_ORDER_INFO(new_op); - - /* append this collective !! */ - OPAL_THREAD_LOCK(&(mca_coll_ml_component.sequential_collectives_mutex)); - opal_list_append(&mca_coll_ml_component.sequential_collectives, - (opal_list_item_t *)new_op); - OPAL_THREAD_UNLOCK(&(mca_coll_ml_component.sequential_collectives_mutex)); - } - - return OMPI_SUCCESS; -} - -static inline __opal_attribute_always_inline__ -int mca_coll_ml_allgather_start (void *sbuf, int scount, - struct ompi_datatype_t *sdtype, - void* rbuf, int rcount, - struct ompi_datatype_t *rdtype, - struct ompi_communicator_t *comm, - mca_coll_base_module_t *module, - ompi_request_t **req) -{ - size_t pack_len, sdt_size; - int ret, n_fragments = 1, comm_size; - - mca_coll_ml_topology_t *topo_info; - mca_bcol_base_payload_buffer_desc_t *src_buffer_desc; - - mca_coll_ml_component_t *cm = &mca_coll_ml_component; - - mca_coll_ml_collective_operation_progress_t *coll_op; - mca_coll_ml_module_t *ml_module = (mca_coll_ml_module_t *) module; - - ptrdiff_t lb, extent; - bool scontig, rcontig, in_place = false; - - /* check for in place setting */ - if (MPI_IN_PLACE == sbuf) { - in_place = true; - sdtype = rdtype; - scount = rcount; - } - - /* scontig could be != to rcontig */ - scontig = ompi_datatype_is_contiguous_memory_layout(sdtype, scount); - rcontig = ompi_datatype_is_contiguous_memory_layout(rdtype, rcount); - - comm_size = ompi_comm_size(comm); - - ML_VERBOSE(10, ("Starting allgather")); - - assert(NULL != sdtype); - /* Calculate size of the data, - * at this stage, only contiguous data is supported */ - - /* this is valid for allagther */ - ompi_datatype_type_size(sdtype, &sdt_size); - pack_len = scount * sdt_size; - - if (in_place) { - sbuf = (char *) rbuf + ompi_comm_rank(comm) * pack_len; - } - - /* Allocate collective schedule and pack message */ - /* this is the total ending message size that will need to fit in the ml-buffer */ - if (pack_len <= (size_t) ml_module->small_message_thresholds[BCOL_ALLGATHER]) { - /* The len of the message can not be larger than ML buffer size */ - ML_VERBOSE(10, ("Single frag %d %d %d", pack_len, comm_size, ml_module->payload_block->size_buffer)); - assert(pack_len * comm_size <= ml_module->payload_block->size_buffer); - - src_buffer_desc = mca_coll_ml_alloc_buffer(ml_module); - while (NULL == src_buffer_desc) { - opal_progress(); - src_buffer_desc = mca_coll_ml_alloc_buffer(ml_module); - } - - /* change 1 */ - coll_op = mca_coll_ml_alloc_op_prog_single_frag_dag(ml_module, - ml_module->coll_ml_allgather_functions[ML_SMALL_DATA_ALLGATHER], - sbuf, rbuf, pack_len, 0 /* offset for first pack */); - - MCA_COLL_IBOFFLOAD_SET_ML_BUFFER_INFO(coll_op, - src_buffer_desc->buffer_index, src_buffer_desc); - - coll_op->fragment_data.current_coll_op = ML_SMALL_DATA_ALLGATHER; - /* task setup callback function */ - coll_op->sequential_routine.seq_task_setup = mca_coll_ml_allgather_task_setup; - - /* change 2 */ - if (!scontig) { - coll_op->full_message.n_bytes_scheduled = - mca_coll_ml_convertor_prepare(sdtype, scount, sbuf, - &coll_op->full_message.send_convertor, MCA_COLL_ML_NET_STREAM_SEND); - - mca_coll_ml_convertor_pack( - (void *) ((uintptr_t) src_buffer_desc->data_addr + pack_len * - (coll_op->coll_schedule->topo_info->hier_layout_info[0].offset + - coll_op->coll_schedule->topo_info->hier_layout_info[0].level_one_index)), - pack_len, &coll_op->full_message.send_convertor); - } else { - /* change 3 */ - memcpy((void *)((uintptr_t) src_buffer_desc->data_addr + pack_len * - (coll_op->coll_schedule->topo_info->hier_layout_info[0].offset + - coll_op->coll_schedule->topo_info->hier_layout_info[0].level_one_index)), - sbuf, pack_len); - - coll_op->full_message.n_bytes_scheduled = pack_len; - } - - if (!rcontig) { - mca_coll_ml_convertor_prepare(rdtype, rcount * comm_size, rbuf, - &coll_op->full_message.recv_convertor, MCA_COLL_ML_NET_STREAM_RECV); - } - - if (coll_op->coll_schedule->topo_info->ranks_contiguous) { - coll_op->process_fn = mca_coll_ml_allgather_small_unpack_data; - } else { - coll_op->process_fn = mca_coll_ml_allgather_noncontiguous_unpack_data; - } - - /* whole ml-buffer is used to send AND receive */ - coll_op->variable_fn_params.sbuf = (void *) src_buffer_desc->data_addr; - coll_op->variable_fn_params.rbuf = (void *) src_buffer_desc->data_addr; - - /* we can set the initial offset here */ - coll_op->variable_fn_params.sbuf_offset = 0; - coll_op->variable_fn_params.rbuf_offset = 0; - - coll_op->variable_fn_params.count = scount; - coll_op->fragment_data.fragment_size = - coll_op->full_message.n_bytes_scheduled; - - /* For small CINCO, we may use the native datatype */ - coll_op->variable_fn_params.dtype = sdtype; - coll_op->variable_fn_params.buffer_size = pack_len; - coll_op->variable_fn_params.root = 0; - } else if (cm->enable_fragmentation || pack_len * comm_size < (1 << 20)) { - /* calculate the number of fragments and the size of each frag */ - size_t n_dts_per_frag, frag_len; - int pipeline_depth = mca_coll_ml_component.pipeline_depth; - - /* Calculate the number of fragments required for this message careful watch the integer division !*/ - frag_len = (pack_len <= (size_t) ml_module->small_message_thresholds[BCOL_ALLGATHER] ? - pack_len : (size_t) ml_module->small_message_thresholds[BCOL_ALLGATHER]); - - n_dts_per_frag = frag_len / sdt_size; - n_fragments = (pack_len + sdt_size * n_dts_per_frag - 1) / (sdt_size * n_dts_per_frag); - pipeline_depth = (n_fragments < pipeline_depth ? n_fragments : pipeline_depth); - - src_buffer_desc = mca_coll_ml_alloc_buffer(ml_module); - while (NULL == src_buffer_desc) { - opal_progress(); - src_buffer_desc = mca_coll_ml_alloc_buffer(ml_module); - } - - /* change 4 */ - coll_op = mca_coll_ml_alloc_op_prog_single_frag_dag(ml_module, - ml_module->coll_ml_allgather_functions[ML_SMALL_DATA_ALLGATHER], - sbuf, rbuf, pack_len, - 0 /* offset for first pack */); - - MCA_COLL_IBOFFLOAD_SET_ML_BUFFER_INFO(coll_op, - src_buffer_desc->buffer_index, src_buffer_desc); - topo_info = coll_op->coll_schedule->topo_info; - - /* task setup callback function */ - coll_op->sequential_routine.seq_task_setup = mca_coll_ml_allgather_task_setup; - - if (!scontig) { - coll_op->full_message.send_converter_bytes_packed = - mca_coll_ml_convertor_prepare( - sdtype, scount, NULL, - &coll_op->full_message.dummy_convertor, - MCA_COLL_ML_NET_STREAM_SEND); - - coll_op->full_message.dummy_conv_position = 0; - mca_coll_ml_convertor_get_send_frag_size( - ml_module, &frag_len, - &coll_op->full_message); - - /* change 5 */ - mca_coll_ml_convertor_prepare(sdtype, scount, sbuf, - &coll_op->full_message.send_convertor, MCA_COLL_ML_NET_STREAM_SEND); - - mca_coll_ml_convertor_pack( - (void *) ((uintptr_t) src_buffer_desc->data_addr + frag_len * - (topo_info->hier_layout_info[0].offset + - topo_info->hier_layout_info[0].level_one_index)), - frag_len, &coll_op->full_message.send_convertor); - } else { - /* change 6 */ - memcpy((void *)((uintptr_t)src_buffer_desc->data_addr + frag_len * - (topo_info->hier_layout_info[0].offset + - topo_info->hier_layout_info[0].level_one_index)), - sbuf, frag_len); - } - - if (!rcontig) { - mca_coll_ml_convertor_prepare(rdtype, rcount * comm_size, rbuf, - &coll_op->full_message.recv_convertor, MCA_COLL_ML_NET_STREAM_RECV); - } - - coll_op->process_fn = mca_coll_ml_allgather_noncontiguous_unpack_data; - - /* hopefully this doesn't royaly screw things up idea behind this is the - * whole ml-buffer is used to send and receive - */ - coll_op->variable_fn_params.sbuf = (void *) src_buffer_desc->data_addr; - coll_op->variable_fn_params.rbuf = (void *) src_buffer_desc->data_addr; - - /* we can set the initial offset here */ - coll_op->variable_fn_params.sbuf_offset = 0; - coll_op->variable_fn_params.rbuf_offset = 0; - - coll_op->fragment_data.buffer_desc = src_buffer_desc; - - coll_op->fragment_data.fragment_size = frag_len; - coll_op->fragment_data.message_descriptor->n_active = 1; - - coll_op->full_message.n_bytes_scheduled = frag_len; - coll_op->full_message.fragment_launcher = mca_coll_ml_allgather_frag_progress; - - coll_op->full_message.pipeline_depth = pipeline_depth; - coll_op->fragment_data.current_coll_op = ML_SMALL_DATA_ALLGATHER; - - /* remember this is different for frags !! Caused data corruption when - * not properly set. Need to be sure you have consistent units. - */ - coll_op->variable_fn_params.count = frag_len; - coll_op->variable_fn_params.dtype = MPI_BYTE; /* for fragmented data, we work in - * units of bytes. This means that - * all of our arithmetic is done - * in terms of bytes - */ - - coll_op->variable_fn_params.root = 0; - coll_op->variable_fn_params.frag_size = frag_len; - coll_op->variable_fn_params.buffer_size = frag_len; - } else { - /* change 7 */ - ML_VERBOSE(10, ("ML_ALLGATHER_LARGE_DATA_KNOWN case.")); - coll_op = mca_coll_ml_alloc_op_prog_single_frag_dag(ml_module, - ml_module->coll_ml_allgather_functions[ML_LARGE_DATA_ALLGATHER], - sbuf, rbuf, pack_len, 0 /* offset for first pack */); - topo_info = coll_op->coll_schedule->topo_info; - if (MCA_BCOL_BASE_NO_ML_BUFFER_FOR_LARGE_MSG & topo_info->all_bcols_mode) { - MCA_COLL_IBOFFLOAD_SET_ML_BUFFER_INFO(coll_op, MCA_COLL_ML_NO_BUFFER, NULL); - } else { - src_buffer_desc = mca_coll_ml_alloc_buffer(ml_module); - while (NULL == src_buffer_desc) { - opal_progress(); - src_buffer_desc = mca_coll_ml_alloc_buffer(ml_module); - } - - MCA_COLL_IBOFFLOAD_SET_ML_BUFFER_INFO(coll_op, src_buffer_desc->buffer_index, src_buffer_desc); - } - - /* not sure if I really need this here */ - coll_op->sequential_routine.seq_task_setup = mca_coll_ml_allgather_task_setup; - coll_op->process_fn = NULL; - /* probably the most important piece */ - coll_op->variable_fn_params.sbuf = sbuf; - coll_op->variable_fn_params.rbuf = rbuf; - coll_op->variable_fn_params.sbuf_offset = 0; - coll_op->variable_fn_params.rbuf_offset = 0; - coll_op->variable_fn_params.count = scount; - coll_op->variable_fn_params.dtype = sdtype;/* for zero copy, we want the - * native datatype and actual count - */ - coll_op->variable_fn_params.root = 0; - - /* you still need to copy in your own data into the rbuf */ - /* don't need to do this if you have in place data */ - if (!in_place) { - memcpy((char *) rbuf + ompi_comm_rank(comm) * pack_len, sbuf, pack_len); - } - } - - coll_op->full_message.send_count = scount; - coll_op->full_message.recv_count = rcount; - - coll_op->full_message.send_data_continguous = scontig; - coll_op->full_message.recv_data_continguous = rcontig; - - ompi_datatype_get_extent(sdtype, &lb, &extent); - coll_op->full_message.send_extent = (size_t) extent; - - ompi_datatype_get_extent(rdtype, &lb, &extent); - coll_op->full_message.recv_extent = (size_t) extent; - - - /* Fill in the function arguments */ - coll_op->variable_fn_params.sequence_num = - OPAL_THREAD_ADD32(&(ml_module->collective_sequence_num), 1); - coll_op->variable_fn_params.hier_factor = comm_size; - - MCA_COLL_ML_SET_ORDER_INFO(coll_op, n_fragments); - - - ret = mca_coll_ml_launch_sequential_collective (coll_op); - if (OMPI_SUCCESS != ret) { - ML_VERBOSE(10, ("Failed to launch")); - return ret; - } - - *req = &coll_op->full_message.super; - - return OMPI_SUCCESS; -} - -int mca_coll_ml_allgather(void *sbuf, int scount, - struct ompi_datatype_t *sdtype, - void* rbuf, int rcount, - struct ompi_datatype_t *rdtype, - struct ompi_communicator_t *comm, - mca_coll_base_module_t *module) -{ - ompi_request_t *req; - int ret; - - ML_VERBOSE(10, ("Starting blocking allgather")); - - ret = mca_coll_ml_allgather_start (sbuf, scount, sdtype, - rbuf, rcount, rdtype, - comm, module, &req); - if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) { - return ret; - } - - ret = ompi_request_wait (&req, MPI_STATUS_IGNORE); - - ML_VERBOSE(10, ("Blocking allgather is complete")); - - return ret; -} - -int mca_coll_ml_allgather_nb(void *sbuf, int scount, - struct ompi_datatype_t *sdtype, - void* rbuf, int rcount, - struct ompi_datatype_t *rdtype, - struct ompi_communicator_t *comm, - ompi_request_t **req, - mca_coll_base_module_t *module) -{ - int ret; - - ML_VERBOSE(10, ("Starting non-blocking allgather")); - - ret = mca_coll_ml_allgather_start (sbuf, scount, sdtype, - rbuf, rcount, rdtype, - comm, module, req); - if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) { - return ret; - } - - ML_VERBOSE(10, ("Non-blocking allgather started")); - - return ret; -} diff --git a/ompi/mca/coll/ml/coll_ml_allocation.c b/ompi/mca/coll/ml/coll_ml_allocation.c deleted file mode 100644 index 555c5e9aff8..00000000000 --- a/ompi/mca/coll/ml/coll_ml_allocation.c +++ /dev/null @@ -1,223 +0,0 @@ -/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ -/* - * Copyright (c) 2009-2012 Oak Ridge National Laboratory. All rights reserved. - * Copyright (c) 2009-2012 Mellanox Technologies. All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - - -#include "ompi_config.h" -#ifdef HAVE_STDLIB_H -#include -#endif /* HAVE_STDLIB_H */ - -#include "coll_ml.h" -#include "coll_ml_inlines.h" -#include "coll_ml_allocation.h" - -long memory_buffer_index; - -mca_bcol_base_memory_block_desc_t *mca_coll_ml_allocate_block(struct mca_coll_ml_component_t *ml_component, - mca_bcol_base_memory_block_desc_t *ml_memblock) -{ - mca_bcol_base_memory_block_desc_t *ret = NULL; - mca_bcol_base_memory_block_desc_t *memory_block = NULL; - mca_coll_ml_lmngr_t *memory_manager = NULL; - - if (ml_memblock) { - ML_ERROR(("Memory already allocated - expecting NULL pointer")); - return ret; - } - memory_block = (mca_bcol_base_memory_block_desc_t*) calloc(1, sizeof(mca_bcol_base_memory_block_desc_t)); - - if (NULL == memory_block){ - ML_ERROR(("Couldn't allocate memory for ml_memblock")); - return ret; - } - - memory_manager = &ml_component->memory_manager; - memory_block->block = mca_coll_ml_lmngr_alloc(memory_manager); - memory_block->size_block = memory_manager->list_block_size; - - if (!memory_block->block){ - ML_VERBOSE(1, ("lmngr failed.")); - ret = NULL; - goto exit_ERROR; - } - - return memory_block; - -exit_ERROR: - if (memory_block){ - free(memory_block); - return ret; - } - - return ret; -} - -void mca_coll_ml_free_block (mca_bcol_base_memory_block_desc_t *ml_memblock) -{ - if (!ml_memblock) - return; - - if (ml_memblock->buffer_descs){ - free(ml_memblock->buffer_descs); - } - - mca_coll_ml_lmngr_free(ml_memblock->block); - free(ml_memblock->bank_release_counters); - free(ml_memblock->ready_for_memsync); - free(ml_memblock->bank_is_busy); - free(ml_memblock); -} - -int mca_coll_ml_initialize_block(mca_bcol_base_memory_block_desc_t *ml_memblock, - uint32_t num_buffers, - uint32_t num_banks, - uint32_t buffer_size, - int32_t data_offset, - opal_list_t *bcols_in_use) -{ - int ret = OMPI_SUCCESS; - uint32_t bank_loop, buff_loop; - uint64_t addr_offset = 0; - mca_bcol_base_payload_buffer_desc_t *pbuff_descs = NULL,*pbuff_desc = NULL; - - if (0 == num_banks || 0 == num_buffers || 0 == buffer_size) { - return OMPI_ERR_BAD_PARAM; - } - - if (NULL == ml_memblock){ - ML_ERROR(("Memory block not initialized")); - ret = OMPI_ERROR; - goto exit_ERROR; - } - - if (ml_memblock->size_block < (num_buffers * num_banks * buffer_size) ){ - ML_ERROR(("Not enough memory for all buffers and banks in the memory block")); - ret = OMPI_ERROR; - goto exit_ERROR; - } - - pbuff_descs = (mca_bcol_base_payload_buffer_desc_t*) malloc(sizeof(mca_bcol_base_payload_buffer_desc_t) - * num_banks * num_buffers); - if (NULL == pbuff_descs) { - return OMPI_ERR_OUT_OF_RESOURCE; - } - - for(bank_loop = 0; bank_loop < num_banks; bank_loop++) - for(buff_loop = 0; buff_loop < num_buffers; buff_loop++){ - pbuff_desc = &pbuff_descs[bank_loop*num_buffers + buff_loop]; - - pbuff_desc->base_data_addr = (void *) - ((char *)ml_memblock->block->base_addr + addr_offset); - pbuff_desc->data_addr = (void *) - ((char *)pbuff_desc->base_data_addr + (size_t)data_offset); - - addr_offset+=buffer_size; - pbuff_desc->buffer_index = BUFFER_INDEX(bank_loop,num_buffers,buff_loop); - - pbuff_desc->bank_index=bank_loop; - pbuff_desc->generation_number=0; - } - - /* Initialize ml memory block */ - /* gvm FIX:This counter when zero indicates that the bank is ready for - * recycle. This is initialized to number of bcol components as each bcol is responsible for - * releasing the buffers of a bank. This initialization will have - * faulty behavior, example in case of multiple interfaces, when more than - * one bcol module of the component type is in use. - */ - ml_memblock->bank_release_counters = (uint32_t *) calloc(num_banks, sizeof(uint32_t)); - if (NULL == ml_memblock->bank_release_counters) { - ret = OMPI_ERR_OUT_OF_RESOURCE; - goto exit_ERROR; - } - - ml_memblock->ready_for_memsync = (bool *) calloc(num_banks, sizeof(bool)); - if (NULL == ml_memblock->ready_for_memsync) { - ret = OMPI_ERR_OUT_OF_RESOURCE; - goto exit_ERROR; - } - - ml_memblock->bank_is_busy = (bool *) calloc(num_banks, sizeof(bool)); - if (NULL == ml_memblock->bank_is_busy) { - ret = OMPI_ERR_OUT_OF_RESOURCE; - goto exit_ERROR; - } - - /* Set index for first bank to sync */ - ml_memblock->memsync_counter = 0; - - /* use first bank and first buffer */ - ml_memblock->next_free_buffer = 0; - - ml_memblock->block_addr_offset = addr_offset; - ml_memblock->num_buffers_per_bank = num_buffers; - ml_memblock->num_banks = num_banks; - ml_memblock->size_buffer = buffer_size; - ml_memblock->buffer_descs = pbuff_descs; - - return ret; - -exit_ERROR: - /* Free all buffer descriptors */ - if (pbuff_descs){ - free(pbuff_descs); - } - - return ret; -} - -mca_bcol_base_payload_buffer_desc_t *mca_coll_ml_alloc_buffer (mca_coll_ml_module_t *module) -{ - uint64_t bindex; - uint32_t bank, buffer, num_buffers; - mca_bcol_base_memory_block_desc_t *ml_memblock = module->payload_block; - mca_bcol_base_payload_buffer_desc_t *pbuff_descs = NULL, - *ml_membuffer = NULL; - - /* Return a buffer */ - num_buffers = ml_memblock->num_buffers_per_bank; - pbuff_descs = ml_memblock->buffer_descs; - bindex = ml_memblock->next_free_buffer; - buffer = bindex % num_buffers; - bank = bindex/num_buffers; - - ML_VERBOSE(10, ("ML allocator: allocating buffer index %d, bank index %d", buffer, bank)); - - /* First buffer in bank, use next bank */ - if (0 == buffer) { - if(!ml_memblock->bank_is_busy[bank]) { - /* the bank is free, mark it busy */ - ml_memblock->bank_is_busy[bank] = true; - ML_VERBOSE(10, ("ML allocator: reset bank %d to value %d", bank, - ml_memblock->bank_release_counters[bank])); - } else { - /* the bank is busy, return NULL and upper layer will handle it */ - ML_VERBOSE(10, ("No free payload buffers are available for use." - " Next memory bank is still used by one of bcols")); - return NULL; - } - } - - assert(true == ml_memblock->bank_is_busy[bank]); - - ml_membuffer = &pbuff_descs[bindex]; - ML_VERBOSE(10, ("ML allocator: ml buffer index %d", bindex)); - - /* Compute next free buffer */ - buffer = (buffer == num_buffers - 1) ? 0 : buffer + 1; - if (0 == buffer) { - bank = (bank == ml_memblock->num_banks - 1) ? 0 : bank + 1; - } - - ml_memblock->next_free_buffer = BUFFER_INDEX(bank,num_buffers,buffer); - - return ml_membuffer; -} diff --git a/ompi/mca/coll/ml/coll_ml_allocation.h b/ompi/mca/coll/ml/coll_ml_allocation.h deleted file mode 100644 index 7bb7f63242c..00000000000 --- a/ompi/mca/coll/ml/coll_ml_allocation.h +++ /dev/null @@ -1,111 +0,0 @@ -/* - * Copyright (c) 2009-2012 Oak Ridge National Laboratory. All rights reserved. - * Copyright (c) 2009-2012 Mellanox Technologies. All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#ifndef MCA_ML_ALLOC_H -#define MCA_ML_ALLOC_H - -#include "ompi_config.h" -#include "ompi/include/ompi/constants.h" -#include "ompi/communicator/communicator.h" -#include "ompi/mca/coll/coll.h" -#include "ompi/mca/bcol/base/base.h" -#include "opal/sys/atomic.h" -#include "opal/mca/mpool/base/base.h" -#include "coll_ml_lmngr.h" - -/* - Returns a block of memory from mpool - - ARGS: - IN ml_component: component descriptor - OUT ml_memblock: block_addr - Starting address of the memory block - size - Size of the block - register_info - Register information passed from the mpool - - Return - On Sucess : Returns size of memory block - On Failure: Returns -1 - - */ - -struct mca_coll_ml_component_t; -struct mca_coll_ml_module_t; - -mca_bcol_base_memory_block_desc_t *mca_coll_ml_allocate_block( - struct mca_coll_ml_component_t *ml_component, - struct mca_bcol_base_memory_block_desc_t *ml_memblock - ); - /* Allocate the memory from mpool */ - /* Register the memory block with bcols */ - -void mca_coll_ml_free_block( - mca_bcol_base_memory_block_desc_t *ml_memblock - ); - - - - -/* - Initialize the memory block and map into buffers and memory banks, and - also buffer descriptors are initialized. - - IN ml_memblock: Memory block descriptor - IN num_buffers: number of buffers - IN num_banks: number of banks - Return - On Sucess: OMPI_SUCCESS - On Failure: OMPI_ERROR - */ -int mca_coll_ml_initialize_block( - mca_bcol_base_memory_block_desc_t *ml_memblock, - uint32_t num_buffers, - uint32_t num_banks, - uint32_t buffer_size, - int32_t data_offset, - opal_list_t *bcols_in_use - ); - /* Map blocks into buffers and banks */ - /* Initialize the descriptors */ - - - -/* - Allocate a memory buffer from the block - IN ml_memblock: Memory block descriptor - OUT ml_membuffer: Buffer allocated for data from the block - - Return - On Sucess: OMPI_SUCCESS - On Failure: OMPI_ERROR - */ -mca_bcol_base_payload_buffer_desc_t *mca_coll_ml_alloc_buffer( - struct mca_coll_ml_module_t *module); - -int mca_coll_ml_free_buffer( - mca_bcol_base_memory_block_desc_t *ml_memblock, - struct mca_bcol_base_payload_buffer_desc_t *ml_membuffer - ); - -/* - Register the memory block with bcol component - - IN ml_memblock: Memory block descriptor - OUT registerations (ml_memblock) - - Return - On Sucess: OMPI_SUCCESS - On Failure: OMPI_ERROR - - */ -int mca_coll_ml_register_block_bcol( - mca_bcol_base_memory_block_desc_t *ml_memblock - ); - -#endif /* MCA_ML_ALLOC_H */ diff --git a/ompi/mca/coll/ml/coll_ml_allreduce.c b/ompi/mca/coll/ml/coll_ml_allreduce.c deleted file mode 100644 index e5ee83dbe3e..00000000000 --- a/ompi/mca/coll/ml/coll_ml_allreduce.c +++ /dev/null @@ -1,551 +0,0 @@ -/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ -/* - * Copyright (c) 2009-2012 Oak Ridge National Laboratory. All rights reserved. - * Copyright (c) 2009-2012 Mellanox Technologies. All rights reserved. - * Copyright (c) 2013 Los Alamos National Security, LLC. All rights - * reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ -/** @file */ - -#include "ompi_config.h" - -#include - -#include "ompi/constants.h" -#include "opal/threads/mutex.h" -#include "ompi/communicator/communicator.h" -#include "ompi/mca/coll/coll.h" -#include "ompi/mca/bcol/bcol.h" -#include "opal/sys/atomic.h" -#include "coll_ml.h" -#include "coll_ml_select.h" -#include "coll_ml_allocation.h" - -static int mca_coll_ml_allreduce_small_unpack(mca_coll_ml_collective_operation_progress_t *coll_op) -{ - int ret; - /* need to put in more */ - int count = coll_op->variable_fn_params.count; - ompi_datatype_t *dtype = coll_op->variable_fn_params.dtype; - - void *dest = (void *)((uintptr_t)coll_op->full_message.dest_user_addr + - (uintptr_t)coll_op->fragment_data.offset_into_user_buffer); - void *src = (void *)((uintptr_t)coll_op->fragment_data.buffer_desc->data_addr + - (size_t)coll_op->variable_fn_params.rbuf_offset); - - ret = ompi_datatype_copy_content_same_ddt(dtype, (int32_t) count, (char *) dest, - (char *) src); - if (ret < 0) { - return OMPI_ERROR; - } - - ML_VERBOSE(10, ("sbuf addr %p, sbuf offset %d, rbuf addr %p, rbuf offset %d.", - src, coll_op->variable_fn_params.sbuf_offset, dest, - coll_op->variable_fn_params.rbuf_offset)); - - return OMPI_SUCCESS; -} - -static int mca_coll_ml_allreduce_task_setup(mca_coll_ml_collective_operation_progress_t *coll_op) -{ - int fn_idx, h_level, my_index, root; - mca_sbgp_base_module_t *sbgp; - mca_coll_ml_topology_t *topo = coll_op->coll_schedule->topo_info; - - fn_idx = coll_op->sequential_routine.current_active_bcol_fn; - h_level = coll_op->coll_schedule->component_functions[fn_idx].h_level; - sbgp = topo->component_pairs[h_level].subgroup_module; - my_index = sbgp->my_index; - - /* In the case of allreduce, the local leader is always the root */ - root = 0; - if (my_index == root) { - coll_op->variable_fn_params.root_flag = true; - coll_op->variable_fn_params.root_route = NULL; - } else { - coll_op->variable_fn_params.root_flag = false; - coll_op->variable_fn_params.root_route = &topo->route_vector[root]; - } - - /* NTH: This was copied from the old allreduce launcher. */ - if (0 < fn_idx) { - coll_op->variable_fn_params.sbuf = coll_op->variable_fn_params.rbuf; - coll_op->variable_fn_params.userbuf = coll_op->variable_fn_params.rbuf; - } - - return OMPI_SUCCESS; -} - -static int mca_coll_ml_allreduce_frag_progress(mca_coll_ml_collective_operation_progress_t *coll_op) -{ - /* local variables */ - void *buf; - - size_t dt_size; - int ret, frag_len, count; - - ptrdiff_t lb, extent; - - mca_bcol_base_payload_buffer_desc_t *src_buffer_desc; - mca_coll_ml_collective_operation_progress_t *new_op; - - mca_coll_ml_module_t *ml_module = OP_ML_MODULE(coll_op); - - ret = ompi_datatype_get_extent(coll_op->variable_fn_params.dtype, &lb, &extent); - if (ret < 0) { - return OMPI_ERROR; - } - - dt_size = (size_t) extent; - - /* Keep the pipeline filled with fragments */ - while (coll_op->fragment_data.message_descriptor->n_active < - coll_op->fragment_data.message_descriptor->pipeline_depth) { - /* If an active fragment happens to have completed the collective during - * a hop into the progress engine, then don't launch a new fragment, - * instead break and return. - */ - if (coll_op->fragment_data.message_descriptor->n_bytes_scheduled - == coll_op->fragment_data.message_descriptor->n_bytes_total) { - break; - } - - /* Get an ml buffer */ - src_buffer_desc = mca_coll_ml_alloc_buffer(OP_ML_MODULE(coll_op)); - if (NULL == src_buffer_desc) { - /* If there exist outstanding fragments, then break out - * and let an active fragment deal with this later, - * there are no buffers available. - */ - if (0 < coll_op->fragment_data.message_descriptor->n_active) { - return OMPI_SUCCESS; - } - - /* It is useless to call progress from here, since - * ml progress can't be executed as result ml memsync - * call will not be completed and no memory will be - * recycled. So we put the element on the list, and we will - * progress it later when memsync will recycle some memory*/ - - /* The fragment is already on list and - * the we still have no ml resources - * Return busy */ - if (!(coll_op->pending & REQ_OUT_OF_MEMORY)) { - coll_op->pending |= REQ_OUT_OF_MEMORY; - opal_list_append(&((OP_ML_MODULE(coll_op))->waiting_for_memory_list), - (opal_list_item_t *)coll_op); - ML_VERBOSE(10,("Out of resources %p adding to pending queue", coll_op)); - } else { - ML_VERBOSE(10,("Out of resources %p", coll_op)); - } - - return OMPI_ERR_TEMP_OUT_OF_RESOURCE; - } - - /* Get a new collective descriptor and initialize it */ - new_op = mca_coll_ml_alloc_op_prog_single_frag_dag(ml_module, - ml_module->coll_ml_allreduce_functions[coll_op->fragment_data.current_coll_op], - coll_op->fragment_data.message_descriptor->src_user_addr, - coll_op->fragment_data.message_descriptor->dest_user_addr, - coll_op->fragment_data.message_descriptor->n_bytes_total, - coll_op->fragment_data.message_descriptor->n_bytes_scheduled); - - MCA_COLL_IBOFFLOAD_SET_ML_BUFFER_INFO(new_op, - src_buffer_desc->buffer_index, src_buffer_desc); - - new_op->fragment_data.current_coll_op = coll_op->fragment_data.current_coll_op; - new_op->fragment_data.message_descriptor = coll_op->fragment_data.message_descriptor; - - /* set the task setup callback */ - new_op->sequential_routine.seq_task_setup = mca_coll_ml_allreduce_task_setup; - /* We need this address for pointer arithmetic in memcpy */ - buf = coll_op->fragment_data.message_descriptor->src_user_addr; - /* calculate the number of data types in this packet */ - count = (coll_op->fragment_data.message_descriptor->n_bytes_total - - coll_op->fragment_data.message_descriptor->n_bytes_scheduled < - (size_t) OP_ML_MODULE(coll_op)->small_message_thresholds[BCOL_ALLREDUCE] ? - (coll_op->fragment_data.message_descriptor->n_bytes_total - - coll_op->fragment_data.message_descriptor->n_bytes_scheduled) / dt_size : - (size_t) coll_op->variable_fn_params.count); - - /* calculate the fragment length */ - frag_len = count*dt_size; - - ret = ompi_datatype_copy_content_same_ddt(coll_op->variable_fn_params.dtype, count, - (char *) src_buffer_desc->data_addr, (char *) ((uintptr_t) buf + (uintptr_t) - coll_op->fragment_data.message_descriptor->n_bytes_scheduled)); - if (ret < 0) { - return OMPI_ERROR; - } - - /* No unpack for root */ - new_op->process_fn = mca_coll_ml_allreduce_small_unpack; - - /* Setup fragment specific data */ - new_op->fragment_data.message_descriptor->n_bytes_scheduled += frag_len; - new_op->fragment_data.buffer_desc = src_buffer_desc; - new_op->fragment_data.fragment_size = frag_len; - (new_op->fragment_data.message_descriptor->n_active)++; - - ML_SET_VARIABLE_PARAMS_BCAST( - new_op, - OP_ML_MODULE(new_op), - count, - MPI_BYTE, - src_buffer_desc, - 0, - 0, - frag_len, - src_buffer_desc->data_addr); - /* Fill in bcast specific arguments */ - /* TBD: remove buffer_size */ - new_op->variable_fn_params.buffer_size = frag_len; - new_op->variable_fn_params.count = count; - new_op->variable_fn_params.hier_factor = coll_op->variable_fn_params.hier_factor; - new_op->variable_fn_params.op = coll_op->variable_fn_params.op; - new_op->variable_fn_params.dtype = coll_op->variable_fn_params.dtype; - new_op->variable_fn_params.root = 0; - new_op->variable_fn_params.sbuf = src_buffer_desc->data_addr; - new_op->variable_fn_params.rbuf = src_buffer_desc->data_addr; - new_op->sequential_routine.current_bcol_status = SEQ_TASK_PENDING; - - MCA_COLL_ML_SET_NEW_FRAG_ORDER_INFO(new_op); - - ML_VERBOSE(10,("FFFF Contig + fragmentation [0-sk, 1-lk, 3-su, 4-lu] %d %d %d", - new_op->variable_fn_params.buffer_size, - new_op->fragment_data.fragment_size, - new_op->fragment_data.message_descriptor->n_bytes_scheduled)); - /* initialize first coll */ - ret = new_op->sequential_routine.seq_task_setup(new_op); - if (OMPI_SUCCESS != ret) { - ML_VERBOSE(3,("Fragment failed to initialize itself")); - return ret; - } - - /* append this collective !! */ - OPAL_THREAD_LOCK(&(mca_coll_ml_component.sequential_collectives_mutex)); - opal_list_append(&mca_coll_ml_component.sequential_collectives, - (opal_list_item_t *)new_op); - OPAL_THREAD_UNLOCK(&(mca_coll_ml_component.sequential_collectives_mutex)); - - } - - return OMPI_SUCCESS; -} - -static inline __opal_attribute_always_inline__ -int parallel_allreduce_start(void *sbuf, void *rbuf, int count, - struct ompi_datatype_t *dtype, struct ompi_op_t *op, - struct ompi_communicator_t *comm, - mca_coll_ml_module_t *ml_module, - ompi_request_t **req, - int small_data_allreduce, - int large_data_allreduce) -{ - int ret, n_fragments = 1, frag_len, - pipeline_depth, n_dts_per_frag ; - - ptrdiff_t lb, extent; - size_t pack_len, dt_size; - - mca_bcol_base_payload_buffer_desc_t *src_buffer_desc; - mca_coll_ml_collective_operation_progress_t *coll_op; - - mca_coll_ml_component_t *cm = &mca_coll_ml_component; - - bool contiguous = ompi_datatype_is_contiguous_memory_layout(dtype, count); - - if (MPI_IN_PLACE == sbuf) { - sbuf = rbuf; - } - - ret = ompi_datatype_get_extent(dtype, &lb, &extent); - if (ret < 0) { - return OMPI_ERROR; - } - - dt_size = (size_t) extent; - pack_len = count * dt_size; - - ML_VERBOSE(1,("The allreduce requested %d enable fragmentation %d ", - pack_len, - cm->enable_fragmentation)); - if (pack_len <= (size_t) ml_module->small_message_thresholds[BCOL_ALLREDUCE]) { - /* The len of the message can not be larger than ML buffer size */ - assert(pack_len <= ml_module->payload_block->size_buffer); - - ML_VERBOSE(1,("Using small data allreduce (threshold = %d)", - ml_module->small_message_thresholds[BCOL_ALLREDUCE])); - - src_buffer_desc = mca_coll_ml_alloc_buffer(ml_module); - while (OPAL_UNLIKELY(NULL == src_buffer_desc)) { - opal_progress(); - src_buffer_desc = mca_coll_ml_alloc_buffer(ml_module); - } - - coll_op = mca_coll_ml_alloc_op_prog_single_frag_dag(ml_module, - ml_module->coll_ml_allreduce_functions[small_data_allreduce], - sbuf, rbuf, pack_len, 0); - - coll_op->variable_fn_params.rbuf = src_buffer_desc->data_addr; - coll_op->variable_fn_params.sbuf = src_buffer_desc->data_addr; - coll_op->variable_fn_params.count = count; - - ret = ompi_datatype_copy_content_same_ddt(dtype, count, - (void *) (uintptr_t) src_buffer_desc->data_addr, (char *) sbuf); - if (ret < 0){ - return OMPI_ERROR; - } - - /* unpack function */ - coll_op->process_fn = mca_coll_ml_allreduce_small_unpack; - } else if (cm->enable_fragmentation || !contiguous) { - ML_VERBOSE(1,("Using Fragmented Allreduce")); - - /* fragment the data */ - /* check for retarded application programming decisions */ - if (dt_size > (size_t) ml_module->small_message_thresholds[BCOL_ALLREDUCE]) { - ML_ERROR(("Sorry, but we don't support datatypes that large")); - return OMPI_ERROR; - } - - /* calculate the number of data types that can fit per ml-buffer */ - n_dts_per_frag = ml_module->small_message_thresholds[BCOL_ALLREDUCE] / dt_size; - - /* calculate the number of fragments */ - n_fragments = (count + n_dts_per_frag - 1) / n_dts_per_frag; /* round up */ - - /* calculate the actual pipeline depth */ - pipeline_depth = n_fragments < cm->pipeline_depth ? n_fragments : cm->pipeline_depth; - - /* calculate the fragment size */ - frag_len = n_dts_per_frag * dt_size; - - /* allocate an ml buffer */ - src_buffer_desc = mca_coll_ml_alloc_buffer(ml_module); - while (NULL == src_buffer_desc) { - opal_progress(); - src_buffer_desc = mca_coll_ml_alloc_buffer(ml_module); - } - - coll_op = mca_coll_ml_alloc_op_prog_single_frag_dag(ml_module, - ml_module->coll_ml_allreduce_functions[small_data_allreduce], - sbuf, rbuf, pack_len, 0 /* offset for first pack */); - - /* task setup callback function */ - coll_op->sequential_routine.seq_task_setup = mca_coll_ml_allreduce_task_setup; - - coll_op->process_fn = mca_coll_ml_allreduce_small_unpack; - - coll_op->variable_fn_params.sbuf = (void *) src_buffer_desc->data_addr; - coll_op->variable_fn_params.rbuf = (void *) src_buffer_desc->data_addr; - - coll_op->fragment_data.message_descriptor->n_active = 1; - coll_op->full_message.n_bytes_scheduled = frag_len; - coll_op->full_message.fragment_launcher = mca_coll_ml_allreduce_frag_progress; - coll_op->full_message.pipeline_depth = pipeline_depth; - coll_op->fragment_data.current_coll_op = small_data_allreduce; - coll_op->fragment_data.fragment_size = frag_len; - - coll_op->variable_fn_params.count = n_dts_per_frag; /* seems fishy */ - coll_op->variable_fn_params.buffer_size = frag_len; - - /* copy into the ml-buffer */ - ret = ompi_datatype_copy_content_same_ddt(dtype, n_dts_per_frag, - (char *) src_buffer_desc->data_addr, (char *) sbuf); - if (ret < 0) { - return OMPI_ERROR; - } - } else { - ML_VERBOSE(1,("Using zero-copy ptp allreduce")); - coll_op = mca_coll_ml_alloc_op_prog_single_frag_dag(ml_module, - ml_module->coll_ml_allreduce_functions[large_data_allreduce], - sbuf, rbuf, pack_len, 0); - - coll_op->variable_fn_params.userbuf = - coll_op->variable_fn_params.sbuf = sbuf; - - coll_op->variable_fn_params.rbuf = rbuf; - - /* The ML buffer is used for testing. Later, when we - * switch to use knem/mmap/portals this should be replaced - * appropriately - */ - src_buffer_desc = mca_coll_ml_alloc_buffer(ml_module); - while (NULL == src_buffer_desc) { - opal_progress(); - src_buffer_desc = mca_coll_ml_alloc_buffer(ml_module); - } - - coll_op->variable_fn_params.count = count; - } - - MCA_COLL_IBOFFLOAD_SET_ML_BUFFER_INFO(coll_op, src_buffer_desc->buffer_index, - src_buffer_desc); - - /* set the offset */ - coll_op->variable_fn_params.sbuf_offset = 0; - coll_op->variable_fn_params.rbuf_offset = 0; - - /* Fill in the function arguments */ - coll_op->variable_fn_params.sequence_num = - OPAL_THREAD_ADD32(&(ml_module->collective_sequence_num), 1); - coll_op->sequential_routine.current_active_bcol_fn = 0; - coll_op->variable_fn_params.dtype = dtype; - coll_op->variable_fn_params.op = op; - coll_op->variable_fn_params.root = 0; - coll_op->sequential_routine.seq_task_setup = mca_coll_ml_allreduce_task_setup; /* invoked after each level in sequential - * progress call - */ - MCA_COLL_ML_SET_ORDER_INFO(coll_op, n_fragments); - - ret = mca_coll_ml_launch_sequential_collective (coll_op); - if (ret != OMPI_SUCCESS) { - ML_VERBOSE(10, ("Failed to launch")); - return ret; - } - - *req = &coll_op->full_message.super; - - return OMPI_SUCCESS; -} - -int mca_coll_ml_allreduce(void *sbuf, void *rbuf, int count, - struct ompi_datatype_t *dtype, struct ompi_op_t *op, - struct ompi_communicator_t *comm, - mca_coll_base_module_t *module) -{ - mca_coll_ml_module_t *ml_module = (mca_coll_ml_module_t*)module; - ompi_request_t *req; - int ret; - - if (OPAL_UNLIKELY(!ompi_op_is_commute(op))) { - /* coll/ml does not handle non-communative operations at this time. fallback - * on another collective module */ - return ml_module->fallback.coll_allreduce (sbuf, rbuf, count, dtype, op, comm, - ml_module->fallback.coll_allreduce_module); - } - - ret = parallel_allreduce_start(sbuf, rbuf, count, dtype, op, comm, - (mca_coll_ml_module_t *) module, &req, - ML_SMALL_DATA_ALLREDUCE, - ML_LARGE_DATA_ALLREDUCE); - if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) { - ML_ERROR(("Failed to launch")); - return ret; - } - - ompi_request_wait_completion(req); - ompi_request_free(&req); - - ML_VERBOSE(10, ("Blocking NB allreduce is done")); - - return OMPI_SUCCESS; -} - -int mca_coll_ml_allreduce_nb(void *sbuf, void *rbuf, int count, - struct ompi_datatype_t *dtype, struct ompi_op_t *op, - struct ompi_communicator_t *comm, - ompi_request_t **req, - mca_coll_base_module_t *module) -{ - mca_coll_ml_module_t *ml_module = (mca_coll_ml_module_t*)module; - int ret; - - if (OPAL_UNLIKELY(!ompi_op_is_commute(op))) { - /* coll/ml does not handle non-communative operations at this time. fallback - * on another collective module */ - return ml_module->fallback.coll_iallreduce (sbuf, rbuf, count, dtype, op, comm, req, - ml_module->fallback.coll_iallreduce_module); - } - - ret = parallel_allreduce_start(sbuf, rbuf, count, dtype, op, comm, - (mca_coll_ml_module_t *) module, req, - ML_SMALL_DATA_ALLREDUCE, - ML_LARGE_DATA_ALLREDUCE); - if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) { - ML_ERROR(("Failed to launch")); - return ret; - } - - ML_VERBOSE(10, ("Blocking NB allreduce is done")); - - return OMPI_SUCCESS; -} - -int mca_coll_ml_allreduce_dispatch(void *sbuf, void *rbuf, int count, - struct ompi_datatype_t *dtype, struct ompi_op_t *op, - struct ompi_communicator_t *comm, mca_coll_base_module_t *module) -{ - int rc; - bool use_extra_topo; - ompi_request_t *req; - - mca_coll_ml_module_t *ml_module = (mca_coll_ml_module_t *) module; - - use_extra_topo = (count > 1) ? - !ml_module->allreduce_matrix[op->op_type][dtype->id][BCOL_MULTI_ELEM_TYPE] : - !ml_module->allreduce_matrix[op->op_type][dtype->id][BCOL_SINGLE_ELEM_TYPE]; - - if (use_extra_topo) { - rc = parallel_allreduce_start(sbuf, rbuf, count, dtype, - op, comm, ml_module, &req, - ML_SMALL_DATA_EXTRA_TOPO_ALLREDUCE, - ML_LARGE_DATA_EXTRA_TOPO_ALLREDUCE); - } else { - rc = parallel_allreduce_start(sbuf, rbuf, count, dtype, - op, comm, ml_module, &req, - ML_SMALL_DATA_ALLREDUCE, - ML_LARGE_DATA_ALLREDUCE); - } - - if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) { - ML_ERROR(("Failed to launch")); - return rc; - } - - ompi_request_wait_completion(req); - ompi_request_free(&req); - - return OMPI_SUCCESS; -} - -int mca_coll_ml_allreduce_dispatch_nb(void *sbuf, void *rbuf, int count, - ompi_datatype_t *dtype, ompi_op_t *op, - ompi_communicator_t *comm, - ompi_request_t **req, - mca_coll_base_module_t *module) -{ - int rc; - bool use_extra_topo; - - mca_coll_ml_module_t *ml_module = (mca_coll_ml_module_t *) module; - - use_extra_topo = (count > 1) ? - !ml_module->allreduce_matrix[op->op_type][dtype->id][BCOL_MULTI_ELEM_TYPE] : - !ml_module->allreduce_matrix[op->op_type][dtype->id][BCOL_SINGLE_ELEM_TYPE]; - - if (use_extra_topo) { - rc = parallel_allreduce_start(sbuf, rbuf, count, dtype, - op, comm, ml_module, req, - ML_SMALL_DATA_EXTRA_TOPO_ALLREDUCE, - ML_LARGE_DATA_EXTRA_TOPO_ALLREDUCE); - } else { - rc = parallel_allreduce_start(sbuf, rbuf, count, dtype, - op, comm, ml_module, req, - ML_SMALL_DATA_ALLREDUCE, - ML_LARGE_DATA_ALLREDUCE); - } - - if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) { - ML_ERROR(("Failed to launch")); - return rc; - } - - return OMPI_SUCCESS; -} diff --git a/ompi/mca/coll/ml/coll_ml_barrier.c b/ompi/mca/coll/ml/coll_ml_barrier.c deleted file mode 100644 index ce593be5194..00000000000 --- a/ompi/mca/coll/ml/coll_ml_barrier.c +++ /dev/null @@ -1,146 +0,0 @@ -/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ -/* - * Copyright (c) 2009-2012 Oak Ridge National Laboratory. All rights reserved. - * Copyright (c) 2009-2012 Mellanox Technologies. All rights reserved. - * Copyright (c) 2013 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2015 Los Alamos National Security, LLC. All rights - * reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ -/** @file */ - -#include "ompi_config.h" - -#include "ompi/constants.h" -#include "opal/threads/mutex.h" -#include "ompi/communicator/communicator.h" -#include "ompi/mca/bcol/bcol.h" -#include "ompi/mca/coll/coll.h" -#include "opal/sys/atomic.h" -#include "ompi/mca/coll/ml/coll_ml.h" -#include "ompi/mca/coll/ml/coll_ml_inlines.h" - -static void mca_coll_ml_barrier_task_setup( - mca_coll_ml_task_status_t *task_status, - int index, mca_coll_ml_compound_functions_t *func) -{ - task_status->rt_num_dependencies = func->num_dependencies; - task_status->rt_num_dependent_tasks = func->num_dependent_tasks; - task_status->rt_dependent_task_indices = func->dependent_task_indices; -} - -static int mca_coll_ml_barrier_launch(mca_coll_ml_module_t *ml_module, - ompi_request_t **req) -{ - opal_free_list_item_t *item; - mca_coll_ml_collective_operation_progress_t *coll_op; - mca_bcol_base_payload_buffer_desc_t *src_buffer_desc = NULL; - - /* allocate an ml buffer for signaling purposes */ - src_buffer_desc = mca_coll_ml_alloc_buffer(ml_module); - - while (NULL == src_buffer_desc) { - opal_progress(); - src_buffer_desc = mca_coll_ml_alloc_buffer(ml_module); - } - - - /* Blocking call on fragment allocation (Maybe we want to make it non blocking ?) */ - item = opal_free_list_wait (&(ml_module->coll_ml_collective_descriptors)); - - coll_op = (mca_coll_ml_collective_operation_progress_t *) item; - assert(NULL != coll_op); - - ML_VERBOSE(10, ("Get coll request %p", coll_op)); - - MCA_COLL_ML_OP_BASIC_SETUP(coll_op, 0, 0, NULL, NULL, ml_module->coll_ml_barrier_function); - - coll_op->fragment_data.buffer_desc = src_buffer_desc; - coll_op->dag_description.num_tasks_completed = 0; - - coll_op->variable_fn_params.buffer_index = src_buffer_desc->buffer_index; - - coll_op->variable_fn_params.sequence_num = - OPAL_THREAD_ADD32(&(ml_module->collective_sequence_num), 1); - - /* Pointer to a coll finalize function */ - coll_op->process_fn = NULL; - - (*req) = &coll_op->full_message.super; - - OMPI_REQUEST_INIT((*req), false); - - (*req)->req_status._cancelled = 0; - (*req)->req_state = OMPI_REQUEST_ACTIVE; - (*req)->req_status.MPI_ERROR = OMPI_SUCCESS; - - /* Set order info if there is a bcol needs ordering */ - MCA_COLL_ML_SET_ORDER_INFO(coll_op, 1); - - return mca_coll_ml_generic_collectives_launcher(coll_op, mca_coll_ml_barrier_task_setup); -} - -/** - * Hierarchical blocking barrier - */ -int mca_coll_ml_barrier_intra(struct ompi_communicator_t *comm, - mca_coll_base_module_t *module) -{ - int rc; - ompi_request_t *req; - - mca_coll_ml_module_t *ml_module = (mca_coll_ml_module_t *) module; - -#if OPAL_ENABLE_DEBUG - static int barriers_count = 0; -#endif - - ML_VERBOSE(10, ("Barrier num %d start.", ++barriers_count)); - - rc = mca_coll_ml_barrier_launch(ml_module, &req); - if (OPAL_UNLIKELY(rc != OMPI_SUCCESS)) { - ML_ERROR(("Failed to launch a barrier.")); - return rc; - } - - /* Blocking barrier */ - ompi_request_wait_completion(req); - ompi_request_free(&req); - - ML_VERBOSE(10, ("Barrier num %d was done.", barriers_count)); - - return OMPI_SUCCESS; -} - -/** - * Hierarchical non-blocking barrier - */ -int mca_coll_ml_ibarrier_intra(struct ompi_communicator_t *comm, - ompi_request_t **req, - mca_coll_base_module_t *module) -{ - int rc; - mca_coll_ml_module_t *ml_module = (mca_coll_ml_module_t *) module; - -#if OPAL_ENABLE_DEBUG - static int barriers_count = 0; -#endif - - ML_VERBOSE(10, ("IBarrier num %d start.", ++barriers_count)); - - rc = mca_coll_ml_barrier_launch(ml_module, req); - if (OPAL_UNLIKELY(rc != OMPI_SUCCESS)) { - ML_ERROR(("Failed to launch a barrier.")); - return rc; - } - - ML_VERBOSE(10, ("IBarrier num %d was done.", barriers_count)); - - return OMPI_SUCCESS; -} diff --git a/ompi/mca/coll/ml/coll_ml_bcast.c b/ompi/mca/coll/ml/coll_ml_bcast.c deleted file mode 100644 index 891838f9442..00000000000 --- a/ompi/mca/coll/ml/coll_ml_bcast.c +++ /dev/null @@ -1,849 +0,0 @@ -/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ -/* - * Copyright (c) 2009-2012 Oak Ridge National Laboratory. All rights reserved. - * Copyright (c) 2009-2012 Mellanox Technologies. All rights reserved. - * Copyright (c) 2013-2014 Los Alamos National Security, LLC. All rights - * reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ -/** @file */ - -#include "ompi_config.h" - -#include -#include - -#include "opal/threads/mutex.h" -#include "opal/sys/atomic.h" - -#include "ompi/constants.h" -#include "ompi/communicator/communicator.h" -#include "ompi/mca/coll/coll.h" -#include "ompi/mca/bcol/bcol.h" - -#include "coll_ml.h" -#include "coll_ml_inlines.h" -#include "coll_ml_colls.h" -#include "coll_ml_allocation.h" - -#define ML_BUFFER_ALLOC_WAIT(ml, buffer) \ -do { \ - buffer = mca_coll_ml_alloc_buffer(ml); \ - while (NULL == buffer) { \ - opal_progress(); \ - buffer = mca_coll_ml_alloc_buffer(ml); \ - } \ -} while (0) - -#define COLL_ML_SETUP_ORDERING_INFO(op, last, prev) \ -do { \ - /* Don't change order of commands !!!! */ \ - (op)->prev_frag = prev; \ - (op)->fragment_data.message_descriptor->last_started_frag = last; \ - /* op->next_to_process_frag = NULL; */ \ -} while (0) - -#define ALLOCATE_AND_PACK_CONTIG_BCAST_FRAG(ml_module, op, coll_index, root, \ - total_len, frag_len, buf, ml_buff_desc) \ -do { \ - op = mca_coll_ml_alloc_op_prog_single_frag_dag(ml_module, \ - ml_module->coll_ml_bcast_functions[coll_index], \ - buf, buf, \ - total_len, \ - 0 /* offset for first pack */); \ - if (OPAL_LIKELY(frag_len > 0)) { \ - if (ompi_comm_rank(ml_module->comm) == root) { \ - /* single frag, pack the data */ \ - memcpy((void *)(uintptr_t)(ml_buff_desc)->data_addr, \ - buf, frag_len); \ - /* No unpack for root */ \ - op->process_fn = NULL; \ - } else { \ - op->process_fn = mca_coll_ml_bcast_small_unpack_data; \ - } \ - } \ - op->full_message.n_bytes_scheduled = frag_len; \ -} while (0) - -#define SMALL_BCAST 0 -#define LARGE_BCAST (SMALL_BCAST + 1) - -/* bcast data unpack */ -static int mca_coll_ml_bcast_converter_unpack_data(mca_coll_ml_collective_operation_progress_t *coll_op) -{ - struct iovec iov; - uint32_t iov_count = 1; - size_t max_data = 0; - - mca_coll_ml_collective_operation_progress_t *next_op; - mca_coll_ml_module_t *ml_module = - (mca_coll_ml_module_t *) coll_op->coll_module; - - size_t max_index = - ml_module->payload_block->num_banks * ml_module->payload_block->num_buffers_per_bank; - - bool is_first = true; - int ret; - - /* Check if the fragment delivered in order */ - if (coll_op->fragment_data.buffer_desc->buffer_index != - coll_op->fragment_data.message_descriptor->next_expected_index) { - mca_coll_ml_collective_operation_progress_t *prev_coll_op = coll_op->prev_frag; - assert(NULL == prev_coll_op->next_to_process_frag); - /* make sure that previous process will have pointer to the out - of order process */ - prev_coll_op->next_to_process_frag = coll_op; - assert(!(coll_op->pending & REQ_OUT_OF_ORDER)); - coll_op->pending |= REQ_OUT_OF_ORDER; - /* we will unpack it later */ - ML_VERBOSE(10, ("Get %d expecting %d previous %d", - coll_op->fragment_data.buffer_desc->buffer_index, - coll_op->fragment_data.message_descriptor->next_expected_index, - prev_coll_op->fragment_data.buffer_desc->buffer_index)); - return ORTE_ERR_NO_MATCH_YET; - } - - do { - iov.iov_len = coll_op->fragment_data.fragment_size; - iov.iov_base = (void *)((uintptr_t) coll_op->fragment_data.buffer_desc->data_addr); - - ML_VERBOSE(10, ("Data unpack with convertern index %d", - coll_op->fragment_data.buffer_desc->buffer_index)); - - opal_convertor_unpack(&coll_op->fragment_data.message_descriptor->recv_convertor, - &iov, &iov_count, &max_data); - - /* update next index */ - ++coll_op->fragment_data.message_descriptor->next_expected_index; - if (coll_op->fragment_data.message_descriptor->next_expected_index >= max_index) { - coll_op->fragment_data.message_descriptor->next_expected_index = 0; - } - - /* Return to queue if the packet is done, - the exeption is first packet, we release it later. - */ - next_op = coll_op->next_to_process_frag; - coll_op->next_to_process_frag = NULL; - if ((!is_first) && - (0 != coll_op->fragment_data.offset_into_user_buffer)) { - assert(coll_op->pending & REQ_OUT_OF_ORDER); - coll_op->pending ^= REQ_OUT_OF_ORDER; - /* Pasha: On one hand - I'm not sure that conceptually it is right place to call buffer recycling. Potentially, - coll_ml_fragment_completion_processing() sounds like right place for out of order unpack/sync handling. - * On the other hand - non contiguous data is not supper common and we would like to minimize effect on critical pass - * for non contiguous data types. */ - ret = mca_coll_ml_buffer_recycling(coll_op); - if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) { - return OMPI_ERROR; - } - - CHECK_AND_RECYCLE(coll_op); - } - - coll_op = next_op; - is_first = false; - } while (NULL != coll_op); - - return OMPI_SUCCESS; -} - -static int mca_coll_ml_bcast_small_unpack_data(mca_coll_ml_collective_operation_progress_t *coll_op) -{ - void * dest = (void *)((uintptr_t) coll_op->full_message.dest_user_addr + - (uintptr_t) coll_op->full_message.n_bytes_delivered); - void * src = (void *)((uintptr_t) coll_op->fragment_data.buffer_desc->data_addr); - - memcpy(dest, src, coll_op->fragment_data.fragment_size); - return OMPI_SUCCESS; -} - -static int mca_coll_ml_bcast_large_unpack_data(mca_coll_ml_collective_operation_progress_t *coll_op) -{ - void * dest = (void *)((uintptr_t) coll_op->fragment_data.message_descriptor->dest_user_addr + - (uintptr_t) coll_op->fragment_data.offset_into_user_buffer); - void * src = (void *)((uintptr_t) coll_op->fragment_data.buffer_desc->data_addr); - - memcpy(dest, src, coll_op->fragment_data.fragment_size); - return OMPI_SUCCESS; -} - -static int mca_coll_ml_bcast_frag_converter_progress(mca_coll_ml_collective_operation_progress_t *coll_op) -{ - /* local variables */ - int ret, frag_len; - size_t max_data = 0; - - mca_bcol_base_payload_buffer_desc_t *src_buffer_desc = NULL; - mca_coll_ml_collective_operation_progress_t *new_op = NULL; - mca_coll_ml_task_setup_fn_t task_setup = NULL; - mca_coll_ml_module_t *ml_module = OP_ML_MODULE(coll_op); - - /* Keep the pipeline filled with fragments */ - while (coll_op->fragment_data.message_descriptor->n_active < - mca_coll_ml_component.pipeline_depth) { - /* If an active fragment happens to have completed the collective during - * a hop into the progress engine, then don't launch a new fragment, - * instead break and return. - */ - if (coll_op->fragment_data.message_descriptor->n_bytes_scheduled - == coll_op->fragment_data.message_descriptor->n_bytes_total) { - break; - } - - /* Get an ml buffer */ - src_buffer_desc = mca_coll_ml_alloc_buffer(ml_module); - if (OPAL_UNLIKELY(NULL == src_buffer_desc)) { - /* If there exist outstanding fragments, then break out - * and let an active fragment deal with this later, - * there are no buffers available. - */ - if (0 < coll_op->fragment_data.message_descriptor->n_active) { - return OMPI_SUCCESS; - } - - /* It is useless to call progress from here, since - * ml progress can't be executed as result ml memsync - * call will not be completed and no memory will be - * recycled. So we put the element on the list, and we will - * progress it later when memsync will recycle some memory*/ - - /* The fragment is already on list and - * the we still have no ml resources - * Return busy */ - if (!(coll_op->pending & REQ_OUT_OF_MEMORY)) { - coll_op->pending |= REQ_OUT_OF_MEMORY; - opal_list_append(&ml_module->waiting_for_memory_list, - (opal_list_item_t *)coll_op); - } - - return OMPI_ERR_TEMP_OUT_OF_RESOURCE; - } - - /* Get a new collective descriptor and initialize it */ - new_op = mca_coll_ml_duplicate_op_prog_single_frag_dag - (ml_module, coll_op); - /* We need this address for pointer arithmetic in memcpy */ - frag_len = ML_GET_FRAG_SIZE(coll_op, BCOL_BCAST); - /* Decide based on global flag, not variable one */ - if (coll_op->fragment_data.message_descriptor->root) { - struct iovec iov; - uint32_t iov_count = 1; - - /* OBJ_RETAIN(new_op->variable_fn_params.dtype); */ - iov.iov_base = (IOVBASE_TYPE*) src_buffer_desc->data_addr; - iov.iov_len = ml_module->small_message_thresholds[BCOL_BCAST]; - assert(0 != iov.iov_len); - - max_data = ml_module->small_message_thresholds[BCOL_BCAST]; - opal_convertor_pack(&new_op->fragment_data.message_descriptor->send_convertor, - &iov, &iov_count, &max_data); - - new_op->process_fn = NULL; - new_op->variable_fn_params.root_flag = true; - new_op->variable_fn_params.root_route = NULL; - - task_setup = OP_ML_MODULE(new_op)-> - coll_ml_bcast_functions[new_op->fragment_data.current_coll_op]-> - task_setup_fn[COLL_ML_ROOT_TASK_FN]; - } else { - new_op->process_fn = mca_coll_ml_bcast_converter_unpack_data; - new_op->variable_fn_params.root_flag = false; - new_op->variable_fn_params.root_route = coll_op->variable_fn_params.root_route; - - task_setup = OP_ML_MODULE(new_op)-> - coll_ml_bcast_functions[new_op->fragment_data.current_coll_op]-> - task_setup_fn[COLL_ML_GENERAL_TASK_FN]; - - max_data = ml_module->small_message_thresholds[BCOL_BCAST]; - mca_coll_ml_convertor_get_send_frag_size( - ml_module, &max_data, - new_op->fragment_data.message_descriptor); - } - - new_op->fragment_data.message_descriptor->n_bytes_scheduled += max_data; - new_op->fragment_data.fragment_size = max_data; - new_op->fragment_data.buffer_desc = src_buffer_desc; - - /* Setup fragment specific data */ - ++(new_op->fragment_data.message_descriptor->n_active); - - COLL_ML_SETUP_ORDERING_INFO(new_op, new_op, - new_op->fragment_data.message_descriptor->last_started_frag); - ML_VERBOSE(10, ("Start more, My index %d my prev %d", - new_op->fragment_data.buffer_desc->buffer_index, - new_op->prev_frag->fragment_data.buffer_desc->buffer_index)); - - ML_SET_VARIABLE_PARAMS_BCAST( - new_op, - OP_ML_MODULE(new_op), - frag_len, - MPI_BYTE, - src_buffer_desc, - 0, - 0, - frag_len, - src_buffer_desc->data_addr); - - /* TBD: remove buffer_size */ - new_op->variable_fn_params.buffer_size = coll_op->variable_fn_params.buffer_size; - new_op->variable_fn_params.hier_factor = coll_op->variable_fn_params.hier_factor; - - /* Set order info for new frag if there is a bcol needs ordering */ - MCA_COLL_ML_SET_NEW_FRAG_ORDER_INFO(new_op); - - /* Launch this collective !! */ - ret = mca_coll_ml_generic_collectives_append_to_queue(new_op, task_setup); - - if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) { - ML_ERROR(("Failed to launch")); - return ret; - } - } - - return OMPI_SUCCESS; -} - -static int mca_coll_ml_bcast_frag_progress(mca_coll_ml_collective_operation_progress_t *coll_op) -{ - /* local variables */ - int ret; - int frag_len, current_coll_op = coll_op->fragment_data.current_coll_op; - size_t dt_size; - void *buf; - - mca_bcol_base_payload_buffer_desc_t *src_buffer_desc = NULL; - mca_coll_ml_collective_operation_progress_t *new_op = NULL; - mca_coll_ml_task_setup_fn_t task_setup = NULL; - - ompi_datatype_type_size(coll_op->variable_fn_params.dtype, &dt_size); - - /* Keep the pipeline filled with fragments */ - while (coll_op->fragment_data.message_descriptor->n_active < - coll_op->fragment_data.message_descriptor->pipeline_depth) { - /* If an active fragment happens to have completed the collective during - * a hop into the progress engine, then don't launch a new fragment, - * instead break and return. - */ - if (coll_op->fragment_data.message_descriptor->n_bytes_scheduled - == coll_op->fragment_data.message_descriptor->n_bytes_total) { - break; - } - - /* Get an ml buffer */ - src_buffer_desc = mca_coll_ml_alloc_buffer(OP_ML_MODULE(coll_op)); - if (NULL == src_buffer_desc) { - /* If there exist outstanding fragments, then break out - * and let an active fragment deal with this later, - * there are no buffers available. - */ - if (0 < coll_op->fragment_data.message_descriptor->n_active) { - return OMPI_SUCCESS; - } - - /* It is useless to call progress from here, since - * ml progress can't be executed as result ml memsync - * call will not be completed and no memory will be - * recycled. So we put the element on the list, and we will - * progress it later when memsync will recycle some memory*/ - - /* The fragment is already on list and - * the we still have no ml resources - * Return busy */ - if (!(coll_op->pending & REQ_OUT_OF_MEMORY)) { - ML_VERBOSE(10,("Out of resources %p adding to pending queue", coll_op)); - coll_op->pending |= REQ_OUT_OF_MEMORY; - opal_list_append(&((OP_ML_MODULE(coll_op))->waiting_for_memory_list), - (opal_list_item_t *) coll_op); - } else { - ML_VERBOSE(10,("Out of resources %p", coll_op)); - } - - return OMPI_ERR_TEMP_OUT_OF_RESOURCE; - } - - /* Get a new collective descriptor and initialize it */ - new_op = mca_coll_ml_duplicate_op_prog_single_frag_dag - (OP_ML_MODULE(coll_op), coll_op); - /* We need this address for pointer arithmetic in memcpy */ - buf = coll_op->fragment_data.message_descriptor->dest_user_addr; - frag_len = ML_GET_FRAG_SIZE(coll_op, BCOL_BCAST); - - /* Decide based on global flag, not variable one */ - if (coll_op->fragment_data.message_descriptor->root) { - memcpy((void *)(uintptr_t)src_buffer_desc->data_addr, - (void *) ((uintptr_t) buf + (uintptr_t) coll_op-> - fragment_data.message_descriptor->n_bytes_scheduled) , frag_len); - - /* No unpack for root */ - new_op->process_fn = NULL; - new_op->variable_fn_params.root_flag = true; - new_op->variable_fn_params.root_route = NULL; - task_setup = OP_ML_MODULE(new_op)->coll_ml_bcast_functions[current_coll_op]-> - task_setup_fn[COLL_ML_ROOT_TASK_FN]; - - } else { - new_op->process_fn = mca_coll_ml_bcast_large_unpack_data; - new_op->variable_fn_params.root_flag = false; - new_op->variable_fn_params.root_route = coll_op->variable_fn_params.root_route; - task_setup = OP_ML_MODULE(new_op)->coll_ml_bcast_functions[current_coll_op]-> - task_setup_fn[COLL_ML_GENERAL_TASK_FN]; - } - - /* Setup fragment specific data */ - new_op->fragment_data.message_descriptor->n_bytes_scheduled += frag_len; - new_op->fragment_data.buffer_desc = src_buffer_desc; - new_op->fragment_data.fragment_size = frag_len; - new_op->fragment_data.message_descriptor->n_active++; - - ML_SET_VARIABLE_PARAMS_BCAST( - new_op, - OP_ML_MODULE(new_op), - frag_len, - MPI_BYTE, - src_buffer_desc, - 0, - 0, - frag_len, - src_buffer_desc->data_addr); - - /* Fill in bcast specific arguments */ - /* TBD: remove buffer_size */ - new_op->variable_fn_params.buffer_size = coll_op->variable_fn_params.buffer_size; - new_op->variable_fn_params.hier_factor = coll_op->variable_fn_params.hier_factor; - - /* Set order info for new frag if there is a bcol needs ordering */ - MCA_COLL_ML_SET_NEW_FRAG_ORDER_INFO(new_op); - - ML_VERBOSE(10, ("FFFF Contig + fragmentation [0-sk, 1-lk, 3-su, 4-lu] %d %d %d", - new_op->variable_fn_params.buffer_size , - new_op->fragment_data.fragment_size, - new_op->fragment_data.message_descriptor->n_bytes_scheduled)); - - /* Launch this collective !! */ - ret = mca_coll_ml_generic_collectives_append_to_queue(new_op, task_setup); - if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) { - ML_VERBOSE(10, ("Failed to launch")); - return ret; - } - } - - return OMPI_SUCCESS; -} - -#define BCAST_FRAGMENTATION_IS_ENABLED(module) \ - (module->bcast_fn_index_table[LARGE_BCAST] < ML_BCAST_LARGE_DATA_KNOWN) - -static inline __opal_attribute_always_inline__ - int parallel_bcast_start(void *buf, int count, struct ompi_datatype_t *dtype, - int root, mca_coll_base_module_t *module, ompi_request_t **req) -{ - size_t pack_len = 0; - size_t dt_size = 0; - bool contig = false; - int bcast_index, n_fragments = 1; - - mca_coll_ml_collective_operation_progress_t * coll_op = NULL; - mca_coll_ml_module_t *ml_module = (mca_coll_ml_module_t *) module; - mca_bcol_base_payload_buffer_desc_t *src_buffer_desc = NULL; - mca_coll_ml_task_setup_fn_t task_setup; - OPAL_PTRDIFF_TYPE lb, extent; - - /* actual starting place of the user buffer (lb added) */ - void *actual_buf; - - ML_VERBOSE(10, ("Starting bcast, mca_coll_ml_bcast_uknown_root buf: %p", buf)); - - ompi_datatype_type_size(dtype, &dt_size); - pack_len = count * dt_size; - - /* Setup data buffer */ - ML_BUFFER_ALLOC_WAIT(ml_module, src_buffer_desc); - /* Get information about memory layout */ - contig = opal_datatype_is_contiguous_memory_layout((opal_datatype_t *)dtype, count); - - ompi_datatype_get_extent (dtype, &lb, &extent); - - actual_buf = (void *) ((uintptr_t) buf + lb); - - /* Allocate collective schedule and pack message */ - if (contig) { - if (pack_len <= (size_t) ml_module->small_message_thresholds[BCOL_BCAST]) { - assert(pack_len <= ml_module->payload_block->size_buffer); - bcast_index = ml_module->bcast_fn_index_table[SMALL_BCAST]; - - ML_VERBOSE(10, ("Contig + small message %d [0-sk, 1-lk, 3-su, 4-lu]", bcast_index)); - ALLOCATE_AND_PACK_CONTIG_BCAST_FRAG(ml_module, coll_op, bcast_index, root, pack_len, - pack_len, actual_buf, src_buffer_desc); - - ML_SET_VARIABLE_PARAMS_BCAST(coll_op, ml_module, count, dtype, - src_buffer_desc, 0, 0, ml_module->payload_block->size_buffer, - (src_buffer_desc->data_addr)); - } else if (BCAST_FRAGMENTATION_IS_ENABLED(ml_module)) { - /* We moved the fragmentation decision from communication creation time to - runtime, since for large messages the if latency is not so critical */ - size_t n_dts_per_frag; - int frag_len, pipeline_depth = mca_coll_ml_component.pipeline_depth; - bcast_index = ml_module->bcast_fn_index_table[LARGE_BCAST]; - - ML_VERBOSE(10, ("Contig + fragmentation %d [0-sk, 1-lk, 3-su, 4-lu]", bcast_index)); - - /* Calculate the number of fragments required for this message */ - frag_len = (pack_len < (size_t) ml_module->small_message_thresholds[BCOL_BCAST] ? - pack_len : (size_t) ml_module->small_message_thresholds[BCOL_BCAST]); - - n_dts_per_frag = frag_len/dt_size; - n_fragments = (pack_len + dt_size*n_dts_per_frag - 1)/(dt_size*n_dts_per_frag); - pipeline_depth = (n_fragments < pipeline_depth ? n_fragments : pipeline_depth); - - ALLOCATE_AND_PACK_CONTIG_BCAST_FRAG(ml_module, coll_op, bcast_index, root, pack_len, - frag_len, actual_buf, src_buffer_desc); - ML_SET_VARIABLE_PARAMS_BCAST(coll_op, ml_module, (frag_len/dt_size), dtype, - src_buffer_desc, 0, 0, frag_len, (src_buffer_desc->data_addr)); - - coll_op->full_message.fragment_launcher = mca_coll_ml_bcast_frag_progress; - coll_op->full_message.pipeline_depth = pipeline_depth; - /* Initialize fragment specific information */ - coll_op->fragment_data.current_coll_op = bcast_index; - /* coll_op->fragment_data.message_descriptor->n_bytes_scheduled += frag_len; */ - coll_op->fragment_data.fragment_size = frag_len; - coll_op->fragment_data.message_descriptor->n_active++; - /* should be removed */ - coll_op->variable_fn_params.buffer_size = frag_len; - - ML_VERBOSE(10, ("Contig + fragmentation [0-sk, 1-lk, 3-su, 4-lu] %d %d", - coll_op->variable_fn_params.buffer_size, - coll_op->fragment_data.fragment_size)); - } else { - bcast_index = ml_module->bcast_fn_index_table[LARGE_BCAST]; - ML_VERBOSE(10, ("Contig + zero copy %d [0-sk, 1-lk, 3-su, 4-lu]", bcast_index)); - - coll_op = mca_coll_ml_alloc_op_prog_single_frag_dag(ml_module, - ml_module->coll_ml_bcast_functions[bcast_index], - actual_buf, actual_buf, pack_len, - 0 /* offset for first pack */); - /* For large messages (bcast) this points to userbuf */ - /* Pasha: temporary work around for basesmuma, userbuf should - be removed */ - coll_op->variable_fn_params.userbuf = buf; - coll_op->process_fn = NULL; - coll_op->full_message.n_bytes_scheduled = pack_len; - - ML_SET_VARIABLE_PARAMS_BCAST(coll_op, ml_module, count, dtype, - src_buffer_desc, 0, 0, - ml_module->payload_block->size_buffer, buf); - } - } else { - /* Non contiguous data type */ - bcast_index = ml_module->bcast_fn_index_table[SMALL_BCAST]; - ML_VERBOSE(10, ("NON Contig + fragmentation %d [0-sk, 1-lk, 3-su, 4-lu]", bcast_index)); - - coll_op = mca_coll_ml_alloc_op_prog_single_frag_dag(ml_module, - ml_module->coll_ml_bcast_functions[bcast_index], - actual_buf, actual_buf, pack_len, - 0 /* offset for first pack */); - if (OPAL_LIKELY(pack_len > 0)) { - size_t max_data = 0; - - if (ompi_comm_rank(ml_module->comm) == root) { - struct iovec iov; - uint32_t iov_count = 1; - - opal_convertor_copy_and_prepare_for_send( - ompi_mpi_local_convertor, - &dtype->super, count, buf, 0, - &coll_op->full_message.send_convertor); - - opal_convertor_get_packed_size(&coll_op->full_message.send_convertor, - &coll_op->full_message.send_converter_bytes_packed); - - coll_op->full_message.n_bytes_total = - coll_op->full_message.send_converter_bytes_packed; - - iov.iov_base = (IOVBASE_TYPE*) src_buffer_desc->data_addr; - iov.iov_len = ml_module->small_message_thresholds[BCOL_BCAST]; - max_data = ml_module->small_message_thresholds[BCOL_BCAST]; - opal_convertor_pack(&coll_op->full_message.send_convertor, - &iov, &iov_count, &max_data); - coll_op->process_fn = NULL; - coll_op->full_message.n_bytes_scheduled = max_data; - - /* We need prepare the data for future pipe line comunication */ - coll_op->full_message.fragment_launcher = mca_coll_ml_bcast_frag_converter_progress; - coll_op->full_message.pipeline_depth = mca_coll_ml_component.pipeline_depth; - coll_op->full_message.root = true; - - } else { - opal_convertor_copy_and_prepare_for_send( - ompi_mpi_local_convertor, - &dtype->super, count, NULL, 0, - &coll_op->full_message.dummy_convertor); - - /* In non-root case we use it for #bytes remaining to receive */ - opal_convertor_get_packed_size(&coll_op->full_message.dummy_convertor, - &coll_op->full_message.send_converter_bytes_packed); - - opal_convertor_copy_and_prepare_for_recv( - ompi_mpi_local_convertor, - &dtype->super, count, buf, 0, - &coll_op->full_message.recv_convertor); - - opal_convertor_get_unpacked_size(&coll_op->full_message.recv_convertor, - &coll_op->full_message.recv_converter_bytes_packed); - - coll_op->full_message.root = false; - coll_op->full_message.n_bytes_total = - coll_op->full_message.recv_converter_bytes_packed; - coll_op->process_fn = mca_coll_ml_bcast_converter_unpack_data; - - coll_op->full_message.fragment_launcher = mca_coll_ml_bcast_frag_converter_progress; - coll_op->full_message.pipeline_depth = mca_coll_ml_component.pipeline_depth; - - max_data = ml_module->small_message_thresholds[BCOL_BCAST]; - coll_op->full_message.dummy_conv_position = 0; - mca_coll_ml_convertor_get_send_frag_size( - ml_module, &max_data, - &coll_op->full_message); - - coll_op->full_message.n_bytes_scheduled = max_data; - } - } - coll_op->fragment_data.current_coll_op = bcast_index; - coll_op->fragment_data.message_descriptor->n_active++; - coll_op->fragment_data.fragment_size = coll_op->full_message.n_bytes_scheduled; - - /* Set initial index */ - coll_op->full_message.next_expected_index = src_buffer_desc->buffer_index; - - /* Prepare linking information for future frags */ - COLL_ML_SETUP_ORDERING_INFO(coll_op, coll_op, NULL); - - /* Since the data is already packed we will use MPI_BYTE and byte count as datatype */ - ML_SET_VARIABLE_PARAMS_BCAST(coll_op, ml_module, coll_op->full_message.n_bytes_scheduled, MPI_BYTE, - src_buffer_desc, 0, 0, ml_module->payload_block->size_buffer,(src_buffer_desc->data_addr)); - - n_fragments = (coll_op->full_message.n_bytes_total + - ml_module->small_message_thresholds[BCOL_BCAST] - 1) / ml_module->small_message_thresholds[BCOL_BCAST]; - } - - coll_op->variable_fn_params.hier_factor = 1; - coll_op->fragment_data.buffer_desc = src_buffer_desc; - - /* Set order info if there is a bcol needs ordering */ - MCA_COLL_ML_SET_ORDER_INFO(coll_op, n_fragments); - - if (ompi_comm_rank(ml_module->comm) == root) { - coll_op->full_message.root = - coll_op->variable_fn_params.root_flag = true; - coll_op->variable_fn_params.root_route = NULL; - task_setup = ml_module->coll_ml_bcast_functions[bcast_index]-> - task_setup_fn[COLL_ML_ROOT_TASK_FN]; - } else { - coll_op->full_message.root = - coll_op->variable_fn_params.root_flag = false; - - coll_op->variable_fn_params.root_route = - (NULL == coll_op->coll_schedule->topo_info->route_vector ? - NULL : &coll_op->coll_schedule->topo_info->route_vector[root]); - - task_setup = ml_module->coll_ml_bcast_functions[bcast_index]-> - task_setup_fn[COLL_ML_GENERAL_TASK_FN]; - } - - *req = &coll_op->full_message.super; - return mca_coll_ml_generic_collectives_launcher(coll_op, task_setup); -} - -int mca_coll_ml_parallel_bcast(void *buf, int count, struct ompi_datatype_t *dtype, - int root, struct ompi_communicator_t *comm, - mca_coll_base_module_t *module) -{ - int ret; - ompi_request_t *req; - - ret = parallel_bcast_start(buf, count, dtype, root, module, &req); - if (OPAL_UNLIKELY(ret != OMPI_SUCCESS)) { - ML_VERBOSE(10, ("Failed to launch")); - return ret; - } - - /* Blocking bcast */ - ompi_request_wait_completion(req); - ompi_request_free(&req); - - ML_VERBOSE(10, ("Bcast is done mca_coll_ml_bcast_known")); - - return OMPI_SUCCESS; -} - -int mca_coll_ml_parallel_bcast_nb(void *buf, int count, struct ompi_datatype_t *dtype, - int root, struct ompi_communicator_t *comm, - ompi_request_t **req, - mca_coll_base_module_t *module) -{ - int ret; - - ret = parallel_bcast_start(buf, count, dtype, root, module, req); - if (OPAL_UNLIKELY(ret != OMPI_SUCCESS)) { - ML_VERBOSE(10, ("Failed to launch")); - return ret; - } - - ML_VERBOSE(10, ("Bcast is done mca_coll_ml_bcast_known")); - - return OMPI_SUCCESS; -} - -int mca_coll_ml_bcast_sequential_root(void *buf, int count, struct ompi_datatype_t *dtype, - int root, struct ompi_communicator_t *comm, - mca_coll_base_module_t *module) -{ - - /* local variables */ - int ret, fn_idx; - size_t pack_len = 0; - size_t dt_size = 0; - - mca_coll_ml_collective_operation_progress_t * coll_op = NULL; - mca_coll_ml_compound_functions_t *fixed_schedule; - mca_coll_ml_module_t *ml_module = (mca_coll_ml_module_t *) module; - mca_bcol_base_payload_buffer_desc_t *src_buffer_desc = NULL; - mca_bcol_base_coll_fn_desc_t *func; - OPAL_PTRDIFF_TYPE lb, extent; - - /* actual starting place of the user buffer (lb added) */ - void *actual_buf; - - ML_VERBOSE(10, ("Starting static bcast, small messages")); - - assert(NULL != dtype); - /* Calculate size of the data, - * on this stage only contiguous data is supported */ - ompi_datatype_type_size(dtype, &dt_size); - pack_len = count * dt_size; - ompi_datatype_get_extent (dtype, &lb, &extent); - - actual_buf = (void *) ((uintptr_t) buf + lb); - - /* Setup data buffer */ - src_buffer_desc = mca_coll_ml_alloc_buffer(ml_module); - while (NULL == src_buffer_desc) { - opal_progress(); - src_buffer_desc = mca_coll_ml_alloc_buffer(ml_module); - } - - /* Allocate collective schedule and pack message */ - if (pack_len <= (size_t) ml_module->small_message_thresholds[BCOL_BCAST]) { - /* The len of the message can not be larger than ML buffer size */ - assert(pack_len <= ml_module->payload_block->size_buffer); - - coll_op = mca_coll_ml_alloc_op_prog_single_frag_dag(ml_module, - ml_module->coll_ml_bcast_functions[ML_BCAST_SMALL_DATA_SEQUENTIAL], - actual_buf, actual_buf, pack_len, - 0 /* offset for first pack */); - if (ompi_comm_rank(comm) == root) { - /* single frag, pack the data */ - memcpy((void *)(uintptr_t)src_buffer_desc->data_addr, - buf, pack_len); - /* No unpack for root */ - coll_op->process_fn = NULL; - } else { - coll_op->process_fn = mca_coll_ml_bcast_small_unpack_data; - } - - coll_op->variable_fn_params.sbuf = - src_buffer_desc->data_addr; - } else { - ML_VERBOSE(10, ("ML_BCAST_LARGE_DATA_KNOWN case.")); - coll_op = mca_coll_ml_alloc_op_prog_single_frag_dag(ml_module, - ml_module->coll_ml_bcast_functions[ML_BCAST_LARGE_DATA_SEQUENTIAL], - actual_buf, actual_buf, pack_len, - 0 /* offset for first pack */); - /* For large messages (bcast) this points to userbuf */ - /* Pasha: temporary work around for basesmuma, userbuf should - be removed */ - coll_op->variable_fn_params.userbuf = - coll_op->variable_fn_params.sbuf = actual_buf; - - coll_op->process_fn = NULL; - } - - /* Fill in the function arguments */ - coll_op->variable_fn_params.sequence_num = - OPAL_THREAD_ADD32(&(ml_module->collective_sequence_num), 1); - coll_op->variable_fn_params.count = count; - coll_op->variable_fn_params.dtype = dtype; - - coll_op->variable_fn_params.buffer_index = src_buffer_desc->buffer_index; - coll_op->variable_fn_params.src_desc = src_buffer_desc; - coll_op->variable_fn_params.sbuf_offset = 0; - coll_op->variable_fn_params.rbuf_offset = 0; - - /* pasha - why we duplicate it ? */ - coll_op->fragment_data.buffer_desc = src_buffer_desc; - - /* pack data into payload buffer - NOTE: assume no fragmenation at this stage */ - if (ompi_comm_rank(comm) == root) { - coll_op->variable_fn_params.root_flag = true; - coll_op->variable_fn_params.root_route = - &coll_op->coll_schedule->topo_info->route_vector[root]; - - coll_op->full_message.n_bytes_scheduled = pack_len; - } else { - coll_op->variable_fn_params.root_flag = false; - coll_op->variable_fn_params.root_route = - &coll_op->coll_schedule->topo_info->route_vector[root]; - } - - /* seems like we should fix a schedule here and now */ - fixed_schedule = coll_op->coll_schedule-> - comp_fn_arr[coll_op->variable_fn_params.root_route->level]; - - /* now we set this schedule as the compound function list */ - coll_op->coll_schedule->component_functions = fixed_schedule; - - coll_op->sequential_routine.current_active_bcol_fn = 0; - - while (true) { - /* ready, aim, fire collective(s)!! */ - fn_idx = coll_op->sequential_routine.current_active_bcol_fn; - - func = fixed_schedule[fn_idx].bcol_function; - ret = func->coll_fn(&coll_op->variable_fn_params, - (struct mca_bcol_base_function_t *) &fixed_schedule[fn_idx].constant_group_data); - /* set the coll_fn_started flag to true */ - if (BCOL_FN_COMPLETE == ret) { - /* done with this routine, bump the active counter */ - coll_op->sequential_routine.current_active_bcol_fn++; - coll_op->variable_fn_params.root_flag = true; - /* check for collective completion */ - if (coll_op->sequential_routine.current_active_bcol_fn == - coll_op->coll_schedule->n_fns) { - /* handle fragment completion */ - ret = coll_ml_fragment_completion_processing(coll_op); - if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) { - mca_coll_ml_abort_ml("Failed to run coll_ml_fragment_completion_processing"); - } - - /* break out of while loop */ - break; - } - } else { - /* put entire collective opperation onto sequential queue */ - opal_list_append(&mca_coll_ml_component.sequential_collectives, - (opal_list_item_t *) coll_op); - break; - } - } - - /* Blocking bcast */ - ompi_request_wait_completion(&coll_op->full_message.super); - ompi_request_free((ompi_request_t **) &coll_op); - - ML_VERBOSE(10, ("Bcast is done")); - - return OMPI_SUCCESS; -} diff --git a/ompi/mca/coll/ml/coll_ml_colls.h b/ompi/mca/coll/ml/coll_ml_colls.h deleted file mode 100644 index 35bb4da072a..00000000000 --- a/ompi/mca/coll/ml/coll_ml_colls.h +++ /dev/null @@ -1,552 +0,0 @@ -/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ -/* - * Copyright (c) 2009-2012 Oak Ridge National Laboratory. All rights reserved. - * Copyright (c) 2009-2012 Mellanox Technologies. All rights reserved. - * Copyright (c) 2014-2015 Los Alamos National Security, LLC. All rights - * reserved. - * Copyright (c) 2014 Research Organization for Information Science - * and Technology (RIST). All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#ifndef MCA_COLL_ML_COLLS_H -#define MCA_COLL_ML_COLLS_H - -#include "ompi_config.h" -#include "ompi/mca/bcol/bcol.h" - -#define COLL_ML_FN_NAME_LEN 256 - - -/* utility information used to coordinate activities, such as resource - * management between different functions in the hierarchy - */ -struct mca_coll_ml_utility_data_t { - - /* RLG - temp fix !!!! - really need to remove this, but right now - do not want to change the signature of the collective primitives to - use coll_ml_utility_data_t rather than mca_bcol_base_function_t */ - int dummy; - - /* module */ - struct mca_bcol_base_module_t *bcol_module; - - /* */ - int index_in_consecutive_same_bcol_calls; - - /* number of times functions from this bcol are called in order */ - int n_of_this_type_in_a_row; - - /* number of times functions from this module are called - * in the collective operation. */ - int n_of_this_type_in_collective; - int index_of_this_type_in_collective; - -}; -typedef struct mca_coll_ml_utility_data_t mca_coll_ml_utility_data_t; - - -/* forward declaration */ -struct mca_coll_ml_collective_operation_progress_t; -struct mca_coll_ml_task_status_t; - -typedef int (* mca_coll_ml_process_op_fn_t) - (struct mca_coll_ml_collective_operation_progress_t *coll_op); - -typedef int (* mca_coll_ml_task_comp_fn_t) - (struct mca_coll_ml_task_status_t *coll_op); - -typedef int (* mca_coll_ml_fragment_launch_fn_t) - ( struct mca_coll_ml_collective_operation_progress_t *coll_op); - -typedef int (* mca_coll_ml_sequential_task_setup_fn_t) - ( struct mca_coll_ml_collective_operation_progress_t *coll_op); -/* This data structure defines the dependencies for a given - * compound operation. We will use this as a basis for implementing - * collective operations. - */ -struct mca_coll_ml_compound_functions_t { - /* label */ - char fn_name[COLL_ML_FN_NAME_LEN]; - - /* hierarchy level that is used for this bcol */ - int h_level; - - /* the list of functions that make up this task */ - /* coll_bcol_collective_description_t *bcol_function; */ - mca_bcol_base_coll_fn_desc_t *bcol_function; - /* task completion function for this compound function */ - mca_coll_ml_task_comp_fn_t task_comp_fn; - - /* module specific information that is a constant on a per group - * basis - */ - mca_coll_ml_utility_data_t constant_group_data; - - /* number of dependencies to be satified before these function can be - * started */ - int num_dependencies; - - /* - * number of notifications to perform on completion. The assumption - * is that a counter will be incremented. - */ - int num_dependent_tasks; - - /* - * pointers to counters that need be updated. This assumes - * an array of tasks is used to describe the ML level - * collective operation, with these indecies referencing elements - * in this array. - */ - int *dependent_task_indices; - -}; - -typedef struct mca_coll_ml_compound_functions_t mca_coll_ml_compound_functions_t; - -/* Forward declaration for operation_description_t */ -struct mca_coll_ml_module_t; - -enum { - COLL_ML_GENERAL_TASK_FN, - COLL_ML_ROOT_TASK_FN, - COLL_ML_MAX_TASK_FN -}; - -enum { - SEQ_TASK_NOT_STARTED, - SEQ_TASK_PENDING, - SEQ_TASK_IN_PROG -}; - -typedef void (*mca_coll_ml_task_setup_fn_t) (struct mca_coll_ml_task_status_t *task_status, int index, struct mca_coll_ml_compound_functions_t *func); - -/* - * Collective operation definition - */ -struct mca_coll_ml_collective_operation_description_t { - - /* - * Type of collective opeartion - there are two types: - * 1) sequential progress through the collectives is sufficient - * 2) general treatment, popping tasks onto execution queus is needed. - */ - int progress_type; - - struct mca_coll_ml_topology_t *topo_info; - - /* - * number of functions in collective operation - */ - int n_fns; - - /* - * list of functions - */ - mca_coll_ml_compound_functions_t *component_functions; - - /* - * array of lists of functions - */ - mca_coll_ml_compound_functions_t **comp_fn_arr; - - /* - * indices into the list - fixes a sequential schedule - */ - int *sch_idx; - - /* - * Task setup functions, so far we have only 3 - root and non-root - */ - mca_coll_ml_task_setup_fn_t task_setup_fn[COLL_ML_MAX_TASK_FN]; - - /* number of functions are called for bcols need ordering */ - int n_fns_need_ordering; -}; -typedef struct mca_coll_ml_collective_operation_description_t - mca_coll_ml_collective_operation_description_t; - -/* Data structure used to track the state of individual bcol - * functions. This is used to track dependencies and completion - * to progress the ML level function correctly. - * - * mca_coll_ml_task_status_t will be associated with an - * mca_coll_ml_collective_operation_progress_t structure for - * the duration of the lifetime of a communicator. - * An array of task statuses will be stored with - * the mca_coll_ml_collective_operation_progress_t data structure, so - * that the taks status elements do not need to be moved back to - * a free list before they are re-used. When the ML level function - * is complete, all mca_coll_ml_task_status_t are available for - * re-use. - */ -struct mca_coll_ml_task_status_t{ - /* need to move this between lists to progress this correctly */ - opal_list_item_t item; - - /* number of dependencies satisfied */ - int n_dep_satisfied; - - /* *************************************************************** - * Pasha: - * I'm adding to the status: num_dependencies, num_dependent_tasks and - * dependent_task_indices. The information originally resided on mca_coll_ml_compound_functions_t. - * For collective operation with static nature it is not problem. - * But for Bcast operation, where run time parameters, like root, actually - * define the dependency. rt prefix mean run-time. - */ - - /* number of dependencies to be satisfied before these function can be - * started */ - int rt_num_dependencies; - - /* - * number of notifications to perform on completion. The assumption - * is that a counter will be incremented. - */ - int rt_num_dependent_tasks; - - /* - * pointers to counters that need be updated. This assumes - * an array of tasks is used to describe the ML level - * collective operation, with these indecies referencing elements - * in this array. - */ - int *rt_dependent_task_indices; - /* - * - * ***************************************************************/ - - /* index in collective schedule */ - int my_index_in_coll_schedule; - - /* function pointers */ - mca_bcol_base_coll_fn_desc_t *bcol_fn; - - /* association with a specific collective task - the ML - * mca_coll_ml_collective_operation_progress_t stores the - * specific function parameters */ - struct mca_coll_ml_collective_operation_progress_t *ml_coll_operation; - - mca_coll_ml_task_comp_fn_t task_comp_fn; -}; -typedef struct mca_coll_ml_task_status_t mca_coll_ml_task_status_t; - -typedef enum mca_coll_ml_pending_type_t { - REQ_OUT_OF_ORDER = 1, - REQ_OUT_OF_MEMORY = 1 << 1 -} mca_coll_ml_pending_type_t; - -/* Forward declaration */ -struct mca_bcol_base_payload_buffer_desc_t; -/* Data structure used to track ML level collective operation - * progress. - */ -struct mca_coll_ml_collective_operation_progress_t { - /* need this to put on a list properly */ - /* Full message information */ - struct full_message_t { - /* make this a list item */ - ompi_request_t super; - /* Next expected fragment. - * It used for controling order of converter unpack operation */ - size_t next_expected_index; - /* Pointer to last intilized fragment. - * It used for controling order of converter unpack operation */ - struct mca_coll_ml_collective_operation_progress_t *last_started_frag; - /* destination data address in user memory */ - void *dest_user_addr; - /* source data address in user memory */ - void *src_user_addr; - /* total message size */ - size_t n_bytes_total; - /* per-process total message size - relevant for operations - * such as gather and scatter, where each rank has it's - * own unique data - */ - size_t n_bytes_per_proc_total; - size_t max_n_bytes_per_proc_total; - /* data processes - from a local perspective */ - size_t n_bytes_delivered; - /* current offset - where to continue with next fragment */ - size_t n_bytes_scheduled; - /* number of fragments needed to process this message */ - size_t n_fragments; - /* number of active frags */ - int n_active; - /* actual pipeline depth */ - int pipeline_depth; - /* am I the real root of the collective ? */ - bool root; - /* collective fragment launcher */ - mca_coll_ml_fragment_launch_fn_t fragment_launcher; - /* is data contingous */ - bool send_data_continguous; - bool recv_data_continguous; - /* data type count */ - int64_t send_count; - int64_t recv_count; - /* extent of the data types */ - size_t send_extent; - size_t recv_extent; - /* send data type */ - struct ompi_datatype_t * send_data_type; - /* needed for non-contigous buffers */ - size_t offset_into_send_buffer; - /* receive data type */ - struct ompi_datatype_t * recv_data_type; - /* needed for non-contigous buffers */ - size_t offset_into_recv_buffer; - /* Convertors for non contigous data */ - opal_convertor_t send_convertor; - opal_convertor_t recv_convertor; - /* Will be used by receiver for #bytes calc in the next frag */ - opal_convertor_t dummy_convertor; - size_t dummy_conv_position; - /* Size of packed data */ - size_t send_converter_bytes_packed; - size_t recv_converter_bytes_packed; - /* In case if ordering is needed: order num for next frag */ - int next_frag_num; - /* The variable is used by non-blocking memory synchronization code - * for caching bank index */ - int bank_index_to_recycle; - /* need a handle for collective progress e.g. alltoall*/ - bcol_fragment_descriptor_t frag_info; - } full_message; - - /* collective operation being progressed */ - mca_coll_ml_collective_operation_description_t *coll_schedule; - /* */ - mca_coll_ml_process_op_fn_t process_fn; - - mca_coll_base_module_t *coll_module; - - /* If not null , we have to release next fragment */ - struct mca_coll_ml_collective_operation_progress_t *next_to_process_frag; - /* pointer to previous fragment */ - struct mca_coll_ml_collective_operation_progress_t *prev_frag; - /* This flag marks that the fragment is pending on the waiting - * to be processed prior to recycling - */ - enum mca_coll_ml_pending_type_t pending; - - /* Fragment data */ - struct fragment_data_t { - /* current buffer pointer - offset (in bytes) into the user data */ - size_t offset_into_user_buffer; - size_t offset_into_user_buffer_per_proc; - - /* amount of data (in bytes) in this fragment - amount of data - * actually processed */ - size_t fragment_size; - size_t per_rank_fragment_size; - size_t data_type_count_per_frag; - - /* pointer to full message progress data */ - struct full_message_t *message_descriptor; - - /* ML buffer descriptor attached to this buffer */ - struct mca_bcol_base_payload_buffer_desc_t *buffer_desc; - /* handle for collective progress, e.g. alltoall */ - bcol_fragment_descriptor_t bcol_fragment_desc; - - /* Which collective algorithm */ - int current_coll_op; - } fragment_data; - - /* specific function parameters */ - /* the assumption is that the variable parameters passed into - * the ML level function will persist until the collective operation - * is complete. For a blocking function this is until the collective - * function is exited, and for nonblocking collective functions this - * is until test or wait completes the collective. - */ - int global_root; - bcol_function_args_t variable_fn_params; - - struct{ - /* current active function - for sequential algorithms */ - int current_active_bcol_fn; - - /* current function status - not started, or in progress. - * When the routine has completed, the active bcol index is - * incremented, so no need to keep track of a completed - * status. - */ - int current_bcol_status; - - /* use this call back to setup algorithm specific info - after each level necessary - */ - mca_coll_ml_sequential_task_setup_fn_t seq_task_setup; - - } sequential_routine; - - struct{ - /* - * BCOL function status - individual elements will be posted to - * ml level component queues, as appropriate. - */ - mca_coll_ml_task_status_t *status_array; - - /* number of completed tasks - need this for collective completion. - * Resource completion is tracked by each BCOL module . - */ - int num_tasks_completed; - } dag_description; -}; -typedef struct mca_coll_ml_collective_operation_progress_t -mca_coll_ml_collective_operation_progress_t; -OBJ_CLASS_DECLARATION(mca_coll_ml_collective_operation_progress_t); - -#define OP_ML_MODULE(op) ((mca_coll_ml_module_t *)((op)->coll_module)) -#define GET_COMM(op) ((OP_ML_MODULE(op))->comm) -#define IS_COLL_SYNCMEM(op) (ML_MEMSYNC == op->fragment_data.current_coll_op) - -#define CHECK_AND_RECYCLE(op) \ -do { \ - if (0 == (op)->pending) { \ - /* Caching 2 values that we can't to touch on op after returing it */ \ - /* back to the free list (free list may release memory on distruct )*/ \ - struct ompi_communicator_t *comm = GET_COMM(op); \ - bool is_coll_sync = IS_COLL_SYNCMEM(op); \ - ML_VERBOSE(10, ("Releasing %p", op)); \ - OMPI_REQUEST_FINI(&(op)->full_message.super); \ - opal_free_list_return (&(((mca_coll_ml_module_t *)(op)->coll_module)-> \ - coll_ml_collective_descriptors), \ - (opal_free_list_item_t *)op); \ - /* Special check for memory synchronization completion */ \ - /* We have to return it first to free list, since the communicator */ \ - /* release potentially may trigger ML module distraction and having */ \ - /* the element not on the list may cause memory leak. */ \ - if (OPAL_UNLIKELY(is_coll_sync)) { \ - if (OMPI_COMM_IS_INTRINSIC(comm)) { \ - opal_show_help("help-mpi-coll-ml.txt", \ - "coll-ml-check-fatal-error", true, \ - comm->c_name); \ - ompi_mpi_abort(comm, 6); \ - } else { \ - opal_show_help("help-mpi-coll-ml.txt", \ - "coll-ml-check-error", true, \ - comm->c_name); \ - /* After this point it is UNSAFE to touch ml module */ \ - /* or communicator */ \ - OBJ_RELEASE(comm); \ - } \ - } \ - } \ -} while (0) - -#define MCA_COLL_ML_SET_ORDER_INFO(coll_progress, num_frags) \ -do { \ - mca_coll_ml_topology_t *topo = (coll_progress)->coll_schedule->topo_info; \ - bcol_function_args_t *variable_params = &(coll_progress)->variable_fn_params; \ - if (topo->topo_ordering_info.num_bcols_need_ordering > 0) { \ - variable_params->order_info.bcols_started = 0; \ - variable_params->order_info.order_num = \ - topo->topo_ordering_info.next_order_num; \ - variable_params->order_info.n_fns_need_ordering = \ - (coll_progress)->coll_schedule->n_fns_need_ordering; \ - topo->topo_ordering_info.next_order_num += num_frags; \ - (coll_progress)->fragment_data.message_descriptor->next_frag_num = \ - variable_params->order_info.order_num + 1; \ - } \ -} while (0) - -#define MCA_COLL_ML_SET_NEW_FRAG_ORDER_INFO(coll_progress) \ -do { \ - mca_coll_ml_topology_t *topo = (coll_progress)->coll_schedule->topo_info; \ - if (topo->topo_ordering_info.num_bcols_need_ordering > 0) { \ - bcol_function_args_t *variable_params = &(coll_progress)->variable_fn_params; \ - struct fragment_data_t *frag_data = &(coll_progress)->fragment_data; \ - variable_params->order_info.bcols_started = 0; \ - variable_params->order_info.order_num = frag_data->message_descriptor->next_frag_num; \ - variable_params->order_info.n_fns_need_ordering = \ - (coll_progress)->coll_schedule->n_fns_need_ordering; \ - frag_data->message_descriptor->next_frag_num++; \ - } \ -} while (0) - -#define MCA_COLL_ML_SET_SCHEDULE_ORDER_INFO(schedule) \ -do { \ - int i; \ - (schedule)->n_fns_need_ordering = 0; \ - for (i = 0; i < (schedule)->n_fns; ++i) { \ - mca_bcol_base_module_t *current_bcol = \ - (schedule)->component_functions[i].constant_group_data.bcol_module; \ - assert (NULL != current_bcol); \ - if (current_bcol->bcol_component->need_ordering) { \ - (schedule)->n_fns_need_ordering++; \ - } \ - } \ -} while (0) - -enum { - MCA_COLL_ML_NET_STREAM_SEND, - MCA_COLL_ML_NET_STREAM_RECV -}; - -static inline __opal_attribute_always_inline__ - int mca_coll_ml_convertor_prepare(ompi_datatype_t *dtype, int count, void *buff, - opal_convertor_t *convertor, int stream) -{ - size_t bytes_packed; - - if (MCA_COLL_ML_NET_STREAM_SEND == stream) { - opal_convertor_copy_and_prepare_for_send( - ompi_mpi_local_convertor, - &dtype->super, count, buff, 0, - convertor); - } else { - opal_convertor_copy_and_prepare_for_recv( - ompi_mpi_local_convertor, - &dtype->super, count, buff, 0, - convertor); - } - - opal_convertor_get_packed_size(convertor, &bytes_packed); - - return bytes_packed; -} - -static inline __opal_attribute_always_inline__ - int mca_coll_ml_convertor_pack(void *data_addr, size_t buff_size, - opal_convertor_t *convertor) -{ - struct iovec iov; - - size_t max_data = 0; - uint32_t iov_count = 1; - - iov.iov_base = (IOVBASE_TYPE*) data_addr; - iov.iov_len = buff_size; - - opal_convertor_pack(convertor, &iov, &iov_count, &max_data); - - return max_data; -} - -static inline __opal_attribute_always_inline__ - int mca_coll_ml_convertor_unpack(void *data_addr, size_t buff_size, - opal_convertor_t *convertor) -{ - struct iovec iov; - - size_t max_data = 0; - uint32_t iov_count = 1; - - iov.iov_base = (void *) (uintptr_t) data_addr; - iov.iov_len = buff_size; - - opal_convertor_unpack(convertor, &iov, &iov_count, &max_data); - - return max_data; -} -#endif /* MCA_COLL_ML_COLLS_H */ - diff --git a/ompi/mca/coll/ml/coll_ml_component.c b/ompi/mca/coll/ml/coll_ml_component.c deleted file mode 100644 index 4b4cf277c15..00000000000 --- a/ompi/mca/coll/ml/coll_ml_component.c +++ /dev/null @@ -1,449 +0,0 @@ -/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ -/* - * Copyright (c) 2009-2012 Oak Ridge National Laboratory. All rights reserved. - * Copyright (c) 2009-2012 Mellanox Technologies. All rights reserved. - * Copyright (c) 2013-2015 Los Alamos National Security, LLC. All rights - * reserved. - * Copyright (c) 2014 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ -/** - * @file - * - * Most of the description of the data layout is in the - * coll_sm_module.c file. - */ - -#include "ompi_config.h" - -#include -#include -#include -#include - -#include "ompi/constants.h" -#include "ompi/mca/coll/base/base.h" -#include "opal/mca/mpool/base/base.h" -#include "opal/mca/mpool/mpool.h" -#include "ompi/mca/bcol/base/base.h" -#include "ompi/mca/sbgp/base/base.h" - -#include "coll_ml.h" -#include "coll_ml_inlines.h" - -#include "ompi/patterns/net/netpatterns.h" -#include "coll_ml_mca.h" -#include "coll_ml_custom_utils.h" - - -/* - * Public string showing the coll ompi_ml V2 component version number - */ -const char *mca_coll_ml_component_version_string = -"Open MPI ml-V2 collective MCA component version " OMPI_VERSION; - -/* - * Local functions - */ - -static int ml_open(void); -static int ml_close(void); -static int coll_ml_progress(void); - -/* - * Instantiate the public struct with all of our public information - * and pointers to our public functions in it - */ - -mca_coll_ml_component_t mca_coll_ml_component = { - - /* First, fill in the super */ - - .super = { - /* First, the mca_component_t struct containing meta - information about the component itself */ - - .collm_version = { - MCA_COLL_BASE_VERSION_2_0_0, - - /* Component name and version */ - - .mca_component_name = "ml", - MCA_BASE_MAKE_VERSION(component, OMPI_MAJOR_VERSION, OMPI_MINOR_VERSION, - OMPI_RELEASE_VERSION), - - /* Component open, close, and register functions */ - - .mca_open_component = ml_open, - .mca_close_component = ml_close, - .mca_register_component_params = mca_coll_ml_register_params - }, - .collm_data = { - /* The component is not checkpoint ready */ - MCA_BASE_METADATA_PARAM_NONE - }, - - /* Initialization / querying functions */ - .collm_init_query = mca_coll_ml_init_query, - .collm_comm_query = mca_coll_ml_comm_query, - }, -}; - -void mca_coll_ml_abort_ml(char *message) -{ - ML_ERROR(("ML Collective FATAL ERROR: %s", message)); - /* shutdown the MPI */ - ompi_mpi_abort(&ompi_mpi_comm_world.comm, MPI_ERR_INTERN); -} -/* - * progress function - */ - -#define INDEX(task) ((task)->my_index_in_coll_schedule) -#define ACTIVE_L (&mca_coll_ml_component.active_tasks) -#define PENDING_L (&mca_coll_ml_component.pending_tasks) -#define SEQ_L (&mca_coll_ml_component.sequential_collectives) - -static int coll_ml_progress() -{ - - int rc = OMPI_SUCCESS; - int fn_idx; - - mca_coll_ml_task_status_t *task_status, *task_status_tmp; - mca_coll_ml_collective_operation_progress_t *seq_coll_op; - mca_coll_ml_collective_operation_progress_t *seq_coll_op_tmp; - - mca_bcol_base_module_collective_fn_primitives_t progress_fn, - coll_fn; - mca_coll_ml_utility_data_t *const_args; - mca_coll_ml_component_t *cm = &mca_coll_ml_component; - - /* Pasha: Not sure that is it correct way to resolve the problem. - Iprobe call for progress engine. The progress engine calls for our - progress and as result the first element on the list is progressed again - and so we call for Iprobe again.... as result we get HUGE stack. - - One way to prevent it - remove the item from the list, and once you finish - to process it - put it back. - - Other way - put flag on component, if the progress is running - exit immediate. - */ - if (cm->progress_is_busy) { - /* We are already working...*/ - return OMPI_SUCCESS; - } else { - cm->progress_is_busy = true; - } - - /* progress sequential collective operations */ - /* RLG - need to do better here for parallel progress */ - OPAL_THREAD_LOCK(&(cm->sequential_collectives_mutex)); - OPAL_LIST_FOREACH_SAFE(seq_coll_op, seq_coll_op_tmp, SEQ_L, mca_coll_ml_collective_operation_progress_t) { - do { - fn_idx = seq_coll_op->sequential_routine.current_active_bcol_fn; - /* initialize the task */ - - if (SEQ_TASK_IN_PROG == seq_coll_op->sequential_routine.current_bcol_status){ - progress_fn = seq_coll_op->coll_schedule-> - component_functions[fn_idx].bcol_function->progress_fn; - } else { - /* PPP Pasha - apparently task setup should be called only here. see linr 190 */ - progress_fn = seq_coll_op->coll_schedule-> - component_functions[fn_idx].bcol_function->coll_fn; - } - - const_args = &seq_coll_op->coll_schedule->component_functions[fn_idx].constant_group_data; - /* RLG - note need to move to useing coll_ml_utility_data_t as - * collective argument, rather than mca_bcol_base_function_t - */ - rc = progress_fn(&(seq_coll_op->variable_fn_params), (mca_bcol_base_function_t *)const_args); - if (BCOL_FN_COMPLETE == rc) { - /* done with this routine */ - seq_coll_op->sequential_routine.current_active_bcol_fn++; - /* this is totally hardwired for bcast, need a general call-back */ - - fn_idx = seq_coll_op->sequential_routine.current_active_bcol_fn; - if (fn_idx == seq_coll_op->coll_schedule->n_fns) { - /* done with this collective - recycle descriptor */ - - /* remove from the progress list */ - (void) opal_list_remove_item(SEQ_L, (opal_list_item_t *)seq_coll_op); - - /* handle fragment completion */ - rc = coll_ml_fragment_completion_processing(seq_coll_op); - - if (OMPI_SUCCESS != rc) { - mca_coll_ml_abort_ml("Failed to run coll_ml_fragment_completion_processing"); - } - } else { - rc = seq_coll_op->sequential_routine.seq_task_setup(seq_coll_op); - if (OMPI_SUCCESS != rc) { - mca_coll_ml_abort_ml("Failed to run sequential task setup"); - } - - seq_coll_op->sequential_routine.current_bcol_status = SEQ_TASK_PENDING; - continue; - } - } else if (BCOL_FN_NOT_STARTED == rc) { - seq_coll_op->sequential_routine.current_bcol_status = SEQ_TASK_PENDING; - } else if (BCOL_FN_STARTED == rc) { - seq_coll_op->sequential_routine.current_bcol_status = SEQ_TASK_IN_PROG; - } - - break; - } while (true); - } - OPAL_THREAD_UNLOCK(&(cm->sequential_collectives_mutex)); - - /* general dag's */ - /* see if active tasks can be progressed */ - OPAL_THREAD_LOCK(&(cm->active_tasks_mutex)); - OPAL_LIST_FOREACH(task_status, ACTIVE_L, mca_coll_ml_task_status_t) { - /* progress task */ - progress_fn = task_status->bcol_fn->progress_fn; - const_args = &task_status->ml_coll_operation->coll_schedule-> - component_functions[INDEX(task_status)].constant_group_data; - rc = progress_fn(&(task_status->ml_coll_operation->variable_fn_params), - (mca_bcol_base_function_t *)const_args); - if (BCOL_FN_COMPLETE == rc) { - ML_VERBOSE(3, ("GOT BCOL_COMPLETED!!!!")); - rc = mca_coll_ml_task_completion_processing(&task_status, ACTIVE_L); - if (OMPI_SUCCESS != rc) { - mca_coll_ml_abort_ml("Failed to run mca_coll_ml_task_completion_processing"); - } - } else if (BCOL_FN_STARTED == rc) { - /* nothing to do */ - } else { - mca_coll_ml_abort_ml("Failed to run mca_coll_ml_task_completion_processing"); - } - } - OPAL_THREAD_UNLOCK(&(cm->active_tasks_mutex)); - - /* see if new tasks can be initiated */ - OPAL_THREAD_LOCK(&(cm->pending_tasks_mutex)); - OPAL_LIST_FOREACH_SAFE(task_status, task_status_tmp, PENDING_L, mca_coll_ml_task_status_t) { - /* check to see if dependencies are satisfied */ - int n_dependencies = task_status->rt_num_dependencies; - int n_dependencies_satisfied = task_status->n_dep_satisfied; - - if (n_dependencies == n_dependencies_satisfied) { - /* initiate the task */ - coll_fn = task_status->bcol_fn->coll_fn; - const_args = &task_status->ml_coll_operation->coll_schedule-> - component_functions[INDEX(task_status)].constant_group_data; - rc = coll_fn(&(task_status->ml_coll_operation->variable_fn_params), - (mca_bcol_base_function_t *)const_args); - if (BCOL_FN_COMPLETE == rc) { - ML_VERBOSE(3, ("GOT BCOL_COMPLETED!")); - rc = mca_coll_ml_task_completion_processing(&task_status, PENDING_L); - if (OMPI_SUCCESS != rc) { - mca_coll_ml_abort_ml("Failed to run mca_coll_ml_task_completion_processing"); - } - } else if ( BCOL_FN_STARTED == rc ) { - ML_VERBOSE(3, ("GOT BCOL_STARTED!")); - (void) opal_list_remove_item(PENDING_L, (opal_list_item_t *)task_status); - /* RLG - is there potential for deadlock here ? Need to - * look at this closely - */ - OPAL_THREAD_LOCK(&(cm->active_tasks_mutex)); - opal_list_append(ACTIVE_L, (opal_list_item_t *)task_status); - OPAL_THREAD_UNLOCK(&(cm->active_tasks_mutex)); - } else if( BCOL_FN_NOT_STARTED == rc ) { - /* nothing to do */ - ML_VERBOSE(10, ("GOT BCOL_FN_NOT_STARTED!")); - } else { - OPAL_THREAD_UNLOCK(&(cm->pending_tasks_mutex)); - /* error will be returned - RLG : need to reconsider return - * types - we have no way to convey error information - * the way the code is implemented now */ - ML_VERBOSE(3, ("GOT error !")); - rc = OMPI_ERROR; - OMPI_ERRHANDLER_RETURN(rc,MPI_COMM_WORLD,rc,"Error returned from bcol function: aborting"); - break; - } - } - } - OPAL_THREAD_UNLOCK(&(cm->pending_tasks_mutex)); - - /* return */ - cm->progress_is_busy = false; - - return rc; -} - - -static void adjust_coll_config_by_mca_param(void) -{ - /* setting bcast mca params */ - if (COLL_ML_STATIC_BCAST == mca_coll_ml_component.bcast_algorithm) { - mca_coll_ml_component.coll_config[ML_BCAST][ML_SMALL_MSG].algorithm_id = ML_BCAST_SMALL_DATA_KNOWN; - mca_coll_ml_component.coll_config[ML_BCAST][ML_LARGE_MSG].algorithm_id = ML_BCAST_LARGE_DATA_KNOWN; - } else if (COLL_ML_SEQ_BCAST == mca_coll_ml_component.bcast_algorithm) { - mca_coll_ml_component.coll_config[ML_BCAST][ML_SMALL_MSG].algorithm_id = ML_BCAST_SMALL_DATA_SEQUENTIAL; - mca_coll_ml_component.coll_config[ML_BCAST][ML_LARGE_MSG].algorithm_id = ML_BCAST_LARGE_DATA_SEQUENTIAL; - } else { /* Unknown root */ - mca_coll_ml_component.coll_config[ML_BCAST][ML_SMALL_MSG].algorithm_id = ML_BCAST_SMALL_DATA_UNKNOWN; - mca_coll_ml_component.coll_config[ML_BCAST][ML_LARGE_MSG].algorithm_id = ML_BCAST_LARGE_DATA_UNKNOWN; - } -} - -/* - * Open the component - */ -static int ml_open(void) -{ - /* local variables */ - int rc, c_idx, m_idx; - mca_coll_ml_component_t *cs = &mca_coll_ml_component; - - /* set the starting sequence number */ - cs->base_sequence_number = -1; - cs->progress_is_busy = false; - - /* If the priority is zero (default) disable the component */ - if (mca_coll_ml_component.ml_priority <= 0) { - return OMPI_ERR_NOT_AVAILABLE; - } - - /* Init memory structures (no real memory is allocated) */ - OBJ_CONSTRUCT(&cs->memory_manager, mca_coll_ml_lmngr_t); - - if (OMPI_SUCCESS != (rc = mca_base_framework_open(&ompi_sbgp_base_framework, 0))) { - fprintf(stderr," failure in open mca_sbgp_base_open \n"); - return rc; - } - if (OMPI_SUCCESS != (rc = mca_base_framework_open(&ompi_bcol_base_framework, 0))) { - fprintf(stderr," failure in open mca_bcol_base_open \n"); - return rc; - } - - /* Reset collective tunings cache */ - for (c_idx = 0; c_idx < ML_NUM_OF_FUNCTIONS; c_idx++) { - for (m_idx = 0; m_idx < ML_NUM_MSG; m_idx++) { - mca_coll_ml_reset_config(&cs->coll_config[c_idx][m_idx]); - } - } - - adjust_coll_config_by_mca_param(); - - /* Load configuration file and cache the configuration on component */ - rc = mca_coll_ml_config_file_init(); - if (OMPI_SUCCESS != rc) { - return OMPI_ERROR; - } - - - /* reigster the progress function */ - rc = opal_progress_register(coll_ml_progress); - if (OMPI_SUCCESS != rc ) { - fprintf(stderr," failed to register the ml progress function \n"); - fflush(stderr); - return rc; - } - - OBJ_CONSTRUCT(&(cs->pending_tasks_mutex), opal_mutex_t); - OBJ_CONSTRUCT(&(cs->pending_tasks), opal_list_t); - OBJ_CONSTRUCT(&(cs->active_tasks_mutex), opal_mutex_t); - OBJ_CONSTRUCT(&(cs->active_tasks), opal_list_t); - OBJ_CONSTRUCT(&(cs->sequential_collectives_mutex), opal_mutex_t); - OBJ_CONSTRUCT(&(cs->sequential_collectives), opal_list_t); - - rc = netpatterns_init(); - if (OMPI_SUCCESS != rc) { - return rc; - } - - cs->topo_discovery_fn[COLL_ML_HR_FULL] = - mca_coll_ml_fulltree_hierarchy_discovery; - - cs->topo_discovery_fn[COLL_ML_HR_ALLREDUCE] = - mca_coll_ml_allreduce_hierarchy_discovery; - - cs->topo_discovery_fn[COLL_ML_HR_NBS] = - mca_coll_ml_fulltree_exclude_basesmsocket_hierarchy_discovery; - - cs->topo_discovery_fn[COLL_ML_HR_SINGLE_PTP] = - mca_coll_ml_fulltree_ptp_only_hierarchy_discovery; - - cs->topo_discovery_fn[COLL_ML_HR_SINGLE_IBOFFLOAD] = - mca_coll_ml_fulltree_iboffload_only_hierarchy_discovery; - - cs->need_allreduce_support = false; - - return OMPI_SUCCESS; -} - -/* - * Close the component - */ -static int ml_close(void) -{ - int ret; - - mca_coll_ml_component_t *cs = &mca_coll_ml_component; - - /* There is not need to release/close resource if the - * priority was set to zero */ - if (cs->ml_priority <= 0) { - return OMPI_SUCCESS; - } - - OBJ_DESTRUCT(&cs->memory_manager); - OBJ_DESTRUCT(&cs->pending_tasks_mutex); - OBJ_DESTRUCT(&cs->pending_tasks); - OBJ_DESTRUCT(&cs->active_tasks_mutex); - OBJ_DESTRUCT(&cs->active_tasks); - OBJ_DESTRUCT(&cs->sequential_collectives_mutex); - OBJ_DESTRUCT(&cs->sequential_collectives); - - /* deregister progress function */ - ret = opal_progress_unregister(coll_ml_progress); - if (OMPI_SUCCESS != ret ) { - OMPI_ERROR_LOG(ret); - return ret; - } - - /* close the sbgp and bcol frameworks */ - if (OMPI_SUCCESS != (ret = mca_base_framework_close(&ompi_sbgp_base_framework))) { - OMPI_ERROR_LOG(ret); - return ret; - } - - if (OMPI_SUCCESS != (ret = mca_base_framework_close(&ompi_bcol_base_framework))) { - OMPI_ERROR_LOG(ret); - return ret; - } - - return OMPI_SUCCESS; -} - -/* query to see if the component is available for use, and can - * satisfy the thread and progress requirements - */ -int mca_coll_ml_init_query(bool enable_progress_threads, - bool enable_mpi_threads) -{ - int ret; - - /* at this stage there is no reason to disaulify this component */ - /* Add here bcol init nand sbgp init */ - ret = mca_sbgp_base_init(enable_progress_threads, enable_mpi_threads); - if (OMPI_SUCCESS != ret) { - return ret; - } - - ret = mca_bcol_base_init(enable_progress_threads, enable_mpi_threads); - if (OMPI_SUCCESS != ret) { - return ret; - } - - /* done */ - return OMPI_SUCCESS; -} diff --git a/ompi/mca/coll/ml/coll_ml_config.c b/ompi/mca/coll/ml/coll_ml_config.c deleted file mode 100644 index aedc4aaf813..00000000000 --- a/ompi/mca/coll/ml/coll_ml_config.c +++ /dev/null @@ -1,629 +0,0 @@ -/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ -/* - * Copyright (c) 2009-2012 Oak Ridge National Laboratory. All rights reserved. - * Copyright (c) 2009-2012 Mellanox Technologies. All rights reserved. - * Copyright (c) 2013-2014 Los Alamos National Security, LLC. All rights - * reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#include "ompi_config.h" - -#include -#include -#include - -#ifdef HAVE_UNISTD_H -#include -#endif - -#include "coll_ml.h" -#include "coll_ml_inlines.h" -#include "coll_ml_config.h" -#include "coll_ml_lex.h" - -static char *key_buffer = NULL; -static size_t key_buffer_len = 0; - -typedef struct section_config_t { - char *section_name; - int section_id; - per_collective_configuration_t config; -} section_config_t; - -typedef struct coll_config_t { - char *coll_name; - int coll_id; - section_config_t section; -} coll_config_t; - -static int algorithm_name_to_id(char *name) -{ - assert (NULL != name); - if (!strcasecmp(name,"ML_BCAST_SMALL_DATA_KNOWN")) - return ML_BCAST_SMALL_DATA_KNOWN; - if (!strcasecmp(name,"ML_BCAST_SMALL_DATA_UNKNOWN")) - return ML_BCAST_SMALL_DATA_UNKNOWN; - if (!strcasecmp(name,"ML_BCAST_SMALL_DATA_SEQUENTIAL")) - return ML_BCAST_SMALL_DATA_SEQUENTIAL; - if (!strcasecmp(name,"ML_BCAST_LARGE_DATA_KNOWN")) - return ML_BCAST_LARGE_DATA_KNOWN; - if (!strcasecmp(name,"ML_BCAST_LARGE_DATA_UNKNOWN")) - return ML_BCAST_LARGE_DATA_UNKNOWN; - if (!strcasecmp(name,"ML_BCAST_LARGE_DATA_SEQUENTIAL")) - return ML_BCAST_LARGE_DATA_SEQUENTIAL; - if (!strcasecmp(name,"ML_N_DATASIZE_BINS")) - return ML_N_DATASIZE_BINS; - if (!strcasecmp(name,"ML_NUM_BCAST_FUNCTIONS")) - return ML_NUM_BCAST_FUNCTIONS; - if (!strcasecmp(name,"ML_SCATTER_SMALL_DATA_KNOWN")) - return ML_SCATTER_SMALL_DATA_KNOWN; - if (!strcasecmp(name,"ML_SCATTER_N_DATASIZE_BINS")) - return ML_SCATTER_N_DATASIZE_BINS; - if (!strcasecmp(name,"ML_SCATTER_SMALL_DATA_UNKNOWN")) - return ML_SCATTER_SMALL_DATA_UNKNOWN; - if (!strcasecmp(name,"ML_SCATTER_SMALL_DATA_SEQUENTIAL")) - return ML_SCATTER_SMALL_DATA_SEQUENTIAL; - if (!strcasecmp(name,"ML_NUM_SCATTER_FUNCTIONS")) - return ML_NUM_SCATTER_FUNCTIONS; - if (!strcasecmp(name,"ML_SMALL_DATA_ALLREDUCE")) - return ML_SMALL_DATA_ALLREDUCE; - if (!strcasecmp(name,"ML_LARGE_DATA_ALLREDUCE")) - return ML_LARGE_DATA_ALLREDUCE; - if (!strcasecmp(name,"ML_SMALL_DATA_REDUCE")) - return ML_SMALL_DATA_ALLREDUCE; - if (!strcasecmp(name,"ML_LARGE_DATA_REDUCE")) - return ML_LARGE_DATA_ALLREDUCE; - if (!strcasecmp(name,"ML_SMALL_DATA_REDUCE")) - return ML_SMALL_DATA_REDUCE; - if (!strcasecmp(name,"ML_LARGE_DATA_REDUCE")) - return ML_LARGE_DATA_REDUCE; - if (!strcasecmp(name,"ML_NUM_ALLREDUCE_FUNCTIONS")) - return ML_NUM_ALLREDUCE_FUNCTIONS; - if (!strcasecmp(name,"ML_SMALL_DATA_ALLTOALL")) - return ML_SMALL_DATA_ALLTOALL; - if (!strcasecmp(name,"ML_LARGE_DATA_ALLTOALL")) - return ML_LARGE_DATA_ALLTOALL; - if (!strcasecmp(name,"ML_NUM_ALLTOALL_FUNCTIONS")) - return ML_NUM_ALLTOALL_FUNCTIONS; - if (!strcasecmp(name,"ML_SMALL_DATA_ALLGATHER")) - return ML_SMALL_DATA_ALLGATHER; - if (!strcasecmp(name,"ML_LARGE_DATA_ALLGATHER")) - return ML_LARGE_DATA_ALLGATHER; - if (!strcasecmp(name,"ML_NUM_ALLGATHER_FUNCTIONS")) - return ML_NUM_ALLGATHER_FUNCTIONS; - if (!strcasecmp(name,"ML_SMALL_DATA_GATHER")) - return ML_SMALL_DATA_GATHER; - if (!strcasecmp(name,"ML_LARGE_DATA_GATHER")) - return ML_LARGE_DATA_GATHER; - if (!strcasecmp(name,"ML_NUM_GATHER_FUNCTIONS")) - return ML_NUM_GATHER_FUNCTIONS; - if (!strcasecmp(name,"ML_BARRIER_DEFAULT")) - return ML_BARRIER_DEFAULT; - - /* ERROR */ - return ML_UNDEFINED; -} - -static int hierarchy_name_to_id(char *name) -{ - assert (NULL != name); - if (!strcasecmp(name, "FULL_HR")) { - return COLL_ML_HR_FULL; - } - if (!strcasecmp(name, "FULL_HR_NO_BASESOCKET")) { - return COLL_ML_HR_NBS; - } - if (!strcasecmp(name, "PTP_ONLY")) { - return COLL_ML_HR_SINGLE_PTP; - } - if (!strcasecmp(name, "IBOFFLOAD_ONLY")) { - return COLL_ML_HR_SINGLE_IBOFFLOAD; - } - /* Error */ - return ML_UNDEFINED; -} - -static int section_name_to_id(char *name) -{ - assert (NULL != name); - if (!strcasecmp(name, "SMALL")) { - return ML_SMALL_MSG; - } - - if (!strcasecmp(name, "LARGE")) { - return ML_LARGE_MSG; - } - /* Error */ - return ML_UNDEFINED; -} - -static int coll_name_to_id(char *name) -{ - assert (NULL != name); - if (!strcasecmp(name, "ALLGATHER")) { - return ML_ALLGATHER; - } - if (!strcasecmp(name, "ALLGATHERV")) { - return ML_ALLGATHERV; - } - if (!strcasecmp(name, "ALLREDUCE")) { - return ML_ALLREDUCE; - } - if (!strcasecmp(name, "ALLTOALL")) { - return ML_ALLTOALL; - } - if (!strcasecmp(name, "ALLTOALLV")) { - return ML_ALLTOALLV; - } - if (!strcasecmp(name, "ALLTOALLW")) { - return ML_ALLTOALLW; - } - if (!strcasecmp(name, "ALLTOALLW")) { - return ML_ALLTOALLW; - } - if (!strcasecmp(name, "BARRIER")) { - return ML_BARRIER; - } - if (!strcasecmp(name, "BCAST")) { - return ML_BCAST; - } - if (!strcasecmp(name, "EXSCAN")) { - return ML_EXSCAN; - } - if (!strcasecmp(name, "GATHER")) { - return ML_GATHER; - } - if (!strcasecmp(name, "GATHERV")) { - return ML_GATHERV; - } - if (!strcasecmp(name, "REDUCE")) { - return ML_REDUCE; - } - if (!strcasecmp(name, "REDUCE_SCATTER")) { - return ML_REDUCE_SCATTER; - } - if (!strcasecmp(name, "SCAN")) { - return ML_SCAN; - } - if (!strcasecmp(name, "SCATTER")) { - return ML_SCATTER; - } - if (!strcasecmp(name, "SCATTERV")) { - return ML_SCATTERV; - } - - /* nonblocking functions */ - - if (!strcasecmp(name, "IALLGATHER")) { - return ML_IALLGATHER; - } - if (!strcasecmp(name, "IALLGATHERV")) { - return ML_IALLGATHERV; - } - if (!strcasecmp(name, "IALLREDUCE")) { - return ML_IALLREDUCE; - } - if (!strcasecmp(name, "IALLTOALL")) { - return ML_IALLTOALL; - } - if (!strcasecmp(name, "IALLTOALLV")) { - return ML_IALLTOALLV; - } - if (!strcasecmp(name, "IALLTOALLW")) { - return ML_IALLTOALLW; - } - if (!strcasecmp(name, "IALLTOALLW")) { - return ML_IALLTOALLW; - } - if (!strcasecmp(name, "IBARRIER")) { - return ML_IBARRIER; - } - if (!strcasecmp(name, "IBCAST")) { - return ML_IBCAST; - } - if (!strcasecmp(name, "IEXSCAN")) { - return ML_IEXSCAN; - } - if (!strcasecmp(name, "IGATHER")) { - return ML_IGATHER; - } - if (!strcasecmp(name, "IGATHERV")) { - return ML_IGATHERV; - } - if (!strcasecmp(name, "IREDUCE")) { - return ML_IREDUCE; - } - if (!strcasecmp(name, "IREDUCE_SCATTER")) { - return ML_IREDUCE_SCATTER; - } - if (!strcasecmp(name, "ISCAN")) { - return ML_ISCAN; - } - if (!strcasecmp(name, "ISCATTER")) { - return ML_ISCATTER; - } - if (!strcasecmp(name, "ISCATTERV")) { - return ML_ISCATTERV; - } - - /* Error - collecives name was not matched */ - return ML_UNDEFINED; -} -static int set_collective_name(coll_config_t *coll_config) -{ - int coll_id = - coll_name_to_id(coll_ml_config_yytext); - - if (ML_UNDEFINED == coll_id) { - return OMPI_ERROR; - } - - coll_config->coll_id = coll_id; - coll_config->coll_name = strdup(coll_ml_config_yytext); - - return OMPI_SUCCESS; -} - -static int set_section_name(section_config_t *section_config) -{ - int section_id; - - section_id = section_name_to_id(coll_ml_config_yytext); - - if (ML_UNDEFINED == section_id) { - return OMPI_ERROR; - } - - section_config->section_id = section_id; - section_config->section_name = strdup(coll_ml_config_yytext); - - return OMPI_SUCCESS; -} - -void mca_coll_ml_reset_config(per_collective_configuration_t *config) -{ - config->topology_id = ML_UNDEFINED; - config->threshold = ML_UNDEFINED; - config->algorithm_id = ML_UNDEFINED; - config->fragmentation_enabled = ML_UNDEFINED; -} - -static void reset_section(section_config_t *section_cf) -{ - if (section_cf->section_name) { - free (section_cf->section_name); - section_cf->section_name = NULL; - } - - section_cf->section_id = ML_UNDEFINED; - mca_coll_ml_reset_config(§ion_cf->config); -} - -static void reset_collective(coll_config_t *coll_cf) -{ - if (coll_cf->coll_name) { - free (coll_cf->coll_name); - coll_cf->coll_name = NULL; - } - - coll_cf->coll_id = ML_UNDEFINED; - reset_section(&coll_cf->section); -} - -/* - * String to integer; - */ -static int string_to_int(char *str) -{ - while (isspace(*str)) { - ++str; - } - - /* Nope -- just decimal, so use atoi() */ - return atoi(str); -} - -static int parse_algorithm_key(section_config_t *section, char *value) -{ - int ret; - ret = algorithm_name_to_id(value); - if (ML_UNDEFINED == ret) { - return OMPI_ERROR; - } else { - section->config.algorithm_id = ret; - } - - return OMPI_SUCCESS; -} - -static int parse_threshold_key(section_config_t *section, char *value) -{ - assert (NULL != value); - - if(!strcasecmp(value, "unlimited")) { - section->config.threshold = -1; - } else { - section->config.threshold = string_to_int(value); - } - - return OMPI_SUCCESS; -} - -static int parse_hierarchy_key(section_config_t *section, char *value) -{ - int ret; - - ret = hierarchy_name_to_id(value); - if (ML_UNDEFINED == ret) { - return OMPI_ERROR; - } - - section->config.topology_id = ret; - - return OMPI_SUCCESS; -} - -static int parse_fragmentation_key(section_config_t *section, char *value) -{ - assert (NULL != value); - - if(!strcasecmp(value, "enable")) { - section->config.fragmentation_enabled = 1; - } else if (!strcasecmp(value, "disable")) { - section->config.fragmentation_enabled = 0; - } else { - ML_ERROR(("Line %d, unexpected fragmentation value %s. Legal values are: enable/disable", - coll_ml_config_yynewlines, value)); - return OMPI_ERROR; - } - return OMPI_SUCCESS; -} - -/* Save configuration that have been collected so far */ -static int save_settings(coll_config_t *coll_config) -{ - per_collective_configuration_t *cf; - - if (ML_UNDEFINED == coll_config->coll_id || ML_UNDEFINED == coll_config->section.section_id) { - return OMPI_ERROR; - } - - cf = &mca_coll_ml_component.coll_config[coll_config->coll_id][coll_config->section.section_id]; - - cf->topology_id = coll_config->section.config.topology_id; - cf->threshold = coll_config->section.config.threshold; - cf->algorithm_id = coll_config->section.config.algorithm_id; - cf->fragmentation_enabled = coll_config->section.config.fragmentation_enabled; - - return OMPI_SUCCESS; -} - -/* - * Parse a single line - */ -static int parse_line(section_config_t *section) -{ - int val, ret = OMPI_SUCCESS; - char *value = NULL; - - /* Save the name name */ - if (key_buffer_len < strlen(coll_ml_config_yytext) + 1) { - char *tmp; - key_buffer_len = strlen(coll_ml_config_yytext) + 1; - tmp = (char *) realloc(key_buffer, key_buffer_len); - if (NULL == tmp) { - free(key_buffer); - key_buffer_len = 0; - key_buffer = NULL; - return OMPI_ERR_TEMP_OUT_OF_RESOURCE; - } - key_buffer = tmp; - } - strncpy(key_buffer, coll_ml_config_yytext, key_buffer_len); - - /* The first thing we have to see is an "=" */ - val = coll_ml_config_yylex(); - if (coll_ml_config_parse_done || COLL_ML_CONFIG_PARSE_EQUAL != val) { - ML_ERROR(("Line %d, expected = before key: %s", - coll_ml_config_yynewlines, - key_buffer)); - return OMPI_ERROR; - } - - /* Next we get the value */ - val = coll_ml_config_yylex(); - if (COLL_ML_CONFIG_PARSE_SINGLE_WORD == val || - COLL_ML_CONFIG_PARSE_VALUE == val) { - value = strdup(coll_ml_config_yytext); - - /* Now we need to see the newline */ - val = coll_ml_config_yylex(); - if (COLL_ML_CONFIG_PARSE_NEWLINE != val && - COLL_ML_CONFIG_PARSE_DONE != val) { - ML_ERROR(("Line %d, expected new line after %s", - coll_ml_config_yynewlines, - key_buffer)); - free(value); - return OMPI_ERROR; - } - } - - /* If we did not get EOL or EOF, something is wrong */ - else if (COLL_ML_CONFIG_PARSE_DONE != val && - COLL_ML_CONFIG_PARSE_NEWLINE != val) { - ML_ERROR(("Line %d, expected new line or end of line", - coll_ml_config_yynewlines)); - ret = OMPI_ERROR; - goto Error; - } - - /* Line parsing is done, read the values */ - if (!strcasecmp(key_buffer, "algorithm")) { - ret = parse_algorithm_key(section, value); - if (OMPI_SUCCESS != ret) { - goto Error; - } - } - - else if (!strcasecmp(key_buffer, "threshold")) { - ret = parse_threshold_key(section, value); - if (OMPI_SUCCESS != ret) { - goto Error; - } - } - - else if (!strcasecmp(key_buffer, "hierarchy")) { - ret = parse_hierarchy_key(section, value); - if (OMPI_SUCCESS != ret) { - goto Error; - } - } - - else if (!strcasecmp(key_buffer, "fragmentation")) { - ret = parse_fragmentation_key(section, value); - if (OMPI_SUCCESS != ret) { - goto Error; - } - /* Failed to parse the key */ - } else { - ML_ERROR(("Line %d, unknown key %s", - coll_ml_config_yynewlines, key_buffer)); - } - - /* All done */ -Error: - if (NULL != value) { - free(value); - } - - return ret; -} - -/**************************************************************************/ - -/* - * Parse a single file - */ -static int parse_file(char *filename) -{ - int val; - int ret = OMPI_SUCCESS; - bool first_section = true, first_coll = true; - coll_config_t coll_config; - - memset (&coll_config, 0, sizeof (coll_config)); - reset_collective(&coll_config); - - /* Open the file */ - coll_ml_config_yyin = fopen(filename, "r"); - if (NULL == coll_ml_config_yyin) { - ML_ERROR(("Failed to open config file %s", filename)); - ret = OMPI_ERR_NOT_FOUND; - goto cleanup; - } - - /* Do the parsing */ - coll_ml_config_parse_done = false; - coll_ml_config_yynewlines = 1; - coll_ml_config_init_buffer(coll_ml_config_yyin); - while (!coll_ml_config_parse_done) { - val = coll_ml_config_yylex(); - switch (val) { - case COLL_ML_CONFIG_PARSE_DONE: - case COLL_ML_CONFIG_PARSE_NEWLINE: - break; - case COLL_ML_CONFIG_PARSE_COLLECTIVE: - /* dump all the information to last section that was defined */ - if (!first_coll) { - ret = save_settings(&coll_config); - - if (OMPI_SUCCESS != ret) { - ML_ERROR(("Error in syntax for collective %s", coll_config.coll_name)); - goto cleanup; - } - } - - /* reset collective config */ - reset_collective(&coll_config); - - first_coll = false; - first_section = true; - - ret = set_collective_name(&coll_config); - if (OMPI_SUCCESS != ret) { - goto cleanup; - } - break; - case COLL_ML_CONFIG_PARSE_SECTION: - if (ML_UNDEFINED == coll_config.coll_id) { - ML_ERROR(("Collective section wasn't defined !")); - ret = OMPI_ERROR; - goto cleanup; - } - - if (!first_section) { - /* dump all the information to last section that was defined */ - ret = save_settings(&coll_config); - if (OMPI_SUCCESS != ret) { - ML_ERROR(("Error in syntax for collective %s section %s", coll_config.coll_name, - coll_config.section.section_name)); - goto cleanup; - } - } - - first_section = false; - - /* reset all section values */ - reset_section(&coll_config.section); - - /* set new section name */ - ret = set_section_name(&coll_config.section); - if (OMPI_SUCCESS != ret) { - goto cleanup; - } - break; - case COLL_ML_CONFIG_PARSE_SINGLE_WORD: - if (ML_UNDEFINED == coll_config.coll_id || - ML_UNDEFINED == coll_config.section.section_id) { - ML_ERROR(("Collective section or sub-section was not defined !")); - ret = OMPI_ERROR; - goto cleanup; - } else { - parse_line(&coll_config.section); - } - break; - - default: - /* anything else is an error */ - ML_ERROR(("Unexpected token!")); - ret = OMPI_ERROR; - goto cleanup; - break; - } - } - - save_settings(&coll_config); - fclose(coll_ml_config_yyin); - coll_ml_config_yylex_destroy (); - ret = OMPI_SUCCESS; - -cleanup: - reset_collective(&coll_config); - if (NULL != key_buffer) { - free(key_buffer); - key_buffer = NULL; - key_buffer_len = 0; - } - return ret; -} - -int mca_coll_ml_config_file_init(void) -{ - return parse_file(mca_coll_ml_component.config_file_name); -} - diff --git a/ompi/mca/coll/ml/coll_ml_config.h b/ompi/mca/coll/ml/coll_ml_config.h deleted file mode 100644 index 92a39d50305..00000000000 --- a/ompi/mca/coll/ml/coll_ml_config.h +++ /dev/null @@ -1,23 +0,0 @@ -#ifndef COLL_ML_CONFIG_H_ -#define COLL_ML_CONFIG_H_ - -#include "opal_config.h" -#include - -BEGIN_C_DECLS - -#define ML_UNDEFINED -1 - -struct per_collective_configuration_t { - int topology_id; - int threshold; - int algorithm_id; - int fragmentation_enabled; -}; -typedef struct per_collective_configuration_t per_collective_configuration_t; - -void mca_coll_ml_reset_config(per_collective_configuration_t *config); -int mca_coll_ml_config_file_init(void); - -END_C_DECLS -#endif diff --git a/ompi/mca/coll/ml/coll_ml_copy_fns.c b/ompi/mca/coll/ml/coll_ml_copy_fns.c deleted file mode 100644 index 447e0df101e..00000000000 --- a/ompi/mca/coll/ml/coll_ml_copy_fns.c +++ /dev/null @@ -1,129 +0,0 @@ -/* - * Copyright (c) 2009-2012 Oak Ridge National Laboratory. All rights reserved. - * Copyright (c) 2009-2012 Mellanox Technologies. All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ -/** @file */ - -#include "ompi_config.h" - -#include "ompi/constants.h" -#include "opal/threads/mutex.h" -#include "ompi/communicator/communicator.h" -#include "ompi/mca/coll/coll.h" -#include "ompi/mca/bcol/bcol.h" -#include "opal/sys/atomic.h" -#include "ompi/mca/coll/ml/coll_ml.h" -#include "ompi/mca/coll/ml/coll_ml_inlines.h" -#include "ompi/mca/coll/ml/coll_ml_allocation.h" -#include "coll_ml_colls.h" -#include -#include - - - -/* This routine re-orders and packs user data. The assumption is that - * there is per-process data, the amount of data is the same for all - * ranks, and the user data is contigous. - */ -int mca_coll_ml_pack_reorder_contiguous_data(mca_coll_ml_collective_operation_progress_t *coll_op) -{ - int i, rank; - void *user_buf, *library_buf; - size_t bytes_per_proc; - mca_coll_ml_module_t *ml_module = (mca_coll_ml_module_t *) - coll_op->coll_module; - mca_coll_ml_topology_t *topo_info = coll_op->coll_schedule->topo_info; - ptrdiff_t ptr_dif; - - /* get the offset into each processes data. The assumption is that - * we are manipulating the same amount of data for each process. - */ - - /* figure out how much data per-proc to copy */ - bytes_per_proc=coll_op->fragment_data.per_rank_fragment_size; - - /* loop over all the ranks in the communicator */ - for( i=0 ; i < ompi_comm_size(ml_module->comm) ; i++ ) { - - /* look up the rank of the i'th element in the sorted list */ - rank = topo_info->sort_list[i]; - - /* get the pointer to user data */ - user_buf=coll_op->full_message.src_user_addr; - /* compute offset into the user buffer */ - - /* offset for data already processed */ - ptr_dif=rank*coll_op->full_message.n_bytes_per_proc_total+ - coll_op->fragment_data.offset_into_user_buffer_per_proc; - user_buf=(void *) ((char *)user_buf+ptr_dif); - /* - rank*coll_op->full_message.n_bytes_per_proc_total+ - coll_op->fragment_data.offset_into_user_buffer_per_proc); - */ - - /* get the pointer to the ML buffer */ - library_buf= (void *) - ((char *)coll_op->variable_fn_params.src_desc->data_addr+i*bytes_per_proc); - - /* copy the data */ - memcpy(library_buf, user_buf, bytes_per_proc); - - } - - return OMPI_SUCCESS; -} - -/* This routine re-orders and packs user data. The assumption is that - * there is per-process data, the amount of data is the same for all - * ranks, and the user data is contigous. - */ -int mca_coll_ml_pack_reorder_noncontiguous_data(mca_coll_ml_collective_operation_progress_t *coll_op) -{ - int i, rank; - void *user_buf, *library_buf; - size_t bytes_per_proc; - ptrdiff_t ptr_dif; - mca_coll_ml_module_t *ml_module = (mca_coll_ml_module_t *) - coll_op->coll_module; - mca_coll_ml_topology_t *topo_info = coll_op->coll_schedule->topo_info; - - /* get the offset into each processes data. The assumption is that - * we are manipulating the same amount of data for each process. - */ - - /* figure out how much data per-proc to copy */ - bytes_per_proc = coll_op->fragment_data.per_rank_fragment_size; - - /* loop over all the ranks in the communicator */ - for(i = 0; i < ompi_comm_size(ml_module->comm); i++ ) { - - /* look up the rank of the i'th element in the sorted list */ - rank = topo_info->sort_list[i]; - - /* get the pointer to user data */ - user_buf=coll_op->full_message.src_user_addr; - /* compute offset into the user buffer */ - - /* offset for data already processed */ - ptr_dif=rank*coll_op->full_message.send_count* - coll_op->full_message.send_extent+ - coll_op->fragment_data.offset_into_user_buffer_per_proc; - user_buf=(void *) ((char *)user_buf+ptr_dif); - - /* get the pointer to the ML buffer */ - library_buf= (void *) - ((char *)coll_op->variable_fn_params.src_desc->data_addr+i*bytes_per_proc); - - /* copy the data */ - memcpy(library_buf, user_buf, bytes_per_proc); - - } - - return OMPI_SUCCESS; -} - diff --git a/ompi/mca/coll/ml/coll_ml_custom_utils.c b/ompi/mca/coll/ml/coll_ml_custom_utils.c deleted file mode 100644 index c00c4a5439b..00000000000 --- a/ompi/mca/coll/ml/coll_ml_custom_utils.c +++ /dev/null @@ -1,139 +0,0 @@ -/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ -/* - * Copyright (c) 2009-2012 Oak Ridge National Laboratory. All rights reserved. - * Copyright (c) 2009-2012 Mellanox Technologies. All rights reserved. - * Copyright (c) 2014 Los Alamos National Security, LLC. All rights - * reserved. - * Copyright (c) 2014 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2014 Research Organization for Information Science - * and Technology (RIST). All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ -/** @file */ - -#include "ompi_config.h" - -#include -#include -#include - -#include "opal/util/output.h" -#include "opal/class/opal_list.h" -#include "opal/class/opal_object.h" -#include "ompi/mca/mca.h" -#include "opal/mca/base/base.h" -#include "opal/threads/mutex.h" -#include "opal/sys/atomic.h" - -#include "ompi/op/op.h" -#include "ompi/constants.h" -#include "ompi/mca/coll/coll.h" -#include "ompi/mca/bcol/bcol.h" -#include "ompi/mca/coll/base/base.h" -#include "ompi/mca/coll/ml/coll_ml.h" -#include "ompi/mca/coll/ml/coll_ml_inlines.h" -#include "ompi/patterns/comm/coll_ops.h" - -#include "ompi/datatype/ompi_datatype.h" -#include "ompi/communicator/communicator.h" - -#include "ompi/mca/bcol/base/base.h" -#include "coll_ml_custom_utils.h" - -/* - * Local types - */ - -struct avail_coll_t { - opal_list_item_t super; - int ac_priority; - mca_coll_base_module_2_1_0_t *ac_module; -}; -typedef struct avail_coll_t avail_coll_t; - -/* - * Stuff for the OBJ interface - * If topo_index == COLL_ML_TOPO_MAX it looks over all possilbe topologies, otherwhise it looks - * in the topology that was specified. - */ - -int mca_coll_ml_check_if_bcol_is_used(const char *bcol_name, const mca_coll_ml_module_t *ml_module, - int topo_index) -{ - int i, rc, hier, *ranks_in_comm, - is_used = 0, - comm_size = ompi_comm_size(ml_module->comm); - int n_hier, tp , max_tp; - const mca_coll_ml_topology_t *topo_info; - - ranks_in_comm = (int *) malloc(comm_size * sizeof(int)); - if (OPAL_UNLIKELY(NULL == ranks_in_comm)) { - ML_ERROR(("Memory allocation failed.")); - ompi_mpi_abort(&ompi_mpi_comm_world.comm, MPI_ERR_NO_MEM); - /* not reached but causes a clang warning to not return here */ - return OMPI_ERR_OUT_OF_RESOURCE; - } - - for (i = 0; i < comm_size; ++i) { - ranks_in_comm[i] = i; - } - - if (COLL_ML_TOPO_MAX == topo_index) { - tp = 0; - max_tp = COLL_ML_TOPO_MAX; - } else { - tp = topo_index; - max_tp = topo_index + 1; - } - - for (; tp < max_tp; tp++) { - topo_info = &ml_module->topo_list[tp]; - n_hier = topo_info->n_levels; - for (hier = 0; hier < n_hier; ++hier) { - hierarchy_pairs *pair = &topo_info->component_pairs[hier]; - mca_bcol_base_component_t *b_cm = pair->bcol_component; - if(0 == strcmp(bcol_name, - b_cm->bcol_version.mca_component_name)) { - is_used = 1; - break; - } - } - } - - rc = comm_allreduce_pml(&is_used, &is_used, 1, MPI_INT, - ompi_comm_rank(ml_module->comm), MPI_MAX, - comm_size, ranks_in_comm, ml_module->comm); - - if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) { - ML_ERROR(("comm_allreduce_pml failed.")); - ompi_mpi_abort(&ompi_mpi_comm_world.comm, MPI_ERR_OP); - } - - free(ranks_in_comm); - - return is_used; -} - -/* The function is very different from the above function */ -int mca_coll_ml_check_if_bcol_is_requested(const char *component_name) -{ - mca_base_component_list_item_t *bcol_comp; - - ML_VERBOSE(10, ("Loop over bcol components")); - OPAL_LIST_FOREACH(bcol_comp, &mca_bcol_base_components_in_use, mca_base_component_list_item_t) { - if(0 == strcmp(component_name, - ((mca_bcol_base_component_2_0_0_t *) - bcol_comp->cli_component)->bcol_version.mca_component_name)) { - return true; - } - } - - /* the component was not resquested */ - return false; -} diff --git a/ompi/mca/coll/ml/coll_ml_custom_utils.h b/ompi/mca/coll/ml/coll_ml_custom_utils.h deleted file mode 100644 index 7d6a8feb00f..00000000000 --- a/ompi/mca/coll/ml/coll_ml_custom_utils.h +++ /dev/null @@ -1,28 +0,0 @@ -/* - * Copyright (c) 2009-2012 Oak Ridge National Laboratory. All rights reserved. - * Copyright (c) 2009-2012 Mellanox Technologies. All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ -/** @file */ - -#ifndef MCA_COLL_ML_CUSTOM_UTILS_H -#define MCA_COLL_ML_CUSTOM_UTILS_H - -#include "ompi_config.h" - -#include "coll_ml.h" - -/* the function is used to check if the bcol name is used in this ml module */ -int mca_coll_ml_check_if_bcol_is_used(const char *bcol_name, const mca_coll_ml_module_t *ml_module, - int topo_index); - -/* The function is used to check if the bcol component was REQUESTED by user */ -int mca_coll_ml_check_if_bcol_is_requested(const char *component_name); - -END_C_DECLS - -#endif /* MCA_COLL_ML_ML_H */ diff --git a/ompi/mca/coll/ml/coll_ml_descriptors.c b/ompi/mca/coll/ml/coll_ml_descriptors.c deleted file mode 100644 index 4060c27ed72..00000000000 --- a/ompi/mca/coll/ml/coll_ml_descriptors.c +++ /dev/null @@ -1,60 +0,0 @@ -/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ -/* - * Copyright (c) 2009-2012 Oak Ridge National Laboratory. All rights reserved. - * Copyright (c) 2009-2012 Mellanox Technologies. All rights reserved. - * Copyright (c) 2014 Research Organization for Information Science - * and Technology (RIST). All rights reserved. - * Copyright (c) 2014 Los Alamos National Security, LLC. All rights - * reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ -#include "ompi_config.h" -#include "coll_ml.h" -#include "coll_ml_inlines.h" - - -static inline void mca_coll_ml_fragment_constructor(mca_coll_ml_fragment_t *frag) -{ - frag->fn_args = NULL; -} - -static inline void mca_coll_ml_fragment_destructor(mca_coll_ml_fragment_t *frag) -{ - if (frag->fn_args) { - free(frag->fn_args); - frag->fn_args = NULL; - } -} - -static inline void mca_coll_ml_descriptor_constructor(mca_coll_ml_descriptor_t *descriptor) -{ - - OBJ_CONSTRUCT(&(descriptor->fragment),mca_coll_ml_fragment_t); - - /* this fragment is alway associated with this message descriptor */ - descriptor->fragment.full_msg_descriptor=descriptor; - -} - - -static inline void mca_coll_ml_descriptor_destructor(mca_coll_ml_descriptor_t *descriptor) -{ - OBJ_DESTRUCT(&(descriptor->fragment)); -} - -OBJ_CLASS_INSTANCE( - mca_coll_ml_fragment_t, - opal_list_item_t, - mca_coll_ml_fragment_constructor, - mca_coll_ml_fragment_destructor); - -OBJ_CLASS_INSTANCE( - mca_coll_ml_descriptor_t, - ompi_request_t, - mca_coll_ml_descriptor_constructor, - mca_coll_ml_descriptor_destructor); - diff --git a/ompi/mca/coll/ml/coll_ml_functions.h b/ompi/mca/coll/ml/coll_ml_functions.h deleted file mode 100644 index 5d0d0d7b1af..00000000000 --- a/ompi/mca/coll/ml/coll_ml_functions.h +++ /dev/null @@ -1,132 +0,0 @@ -/* - * Copyright (c) 2009-2012 Oak Ridge National Laboratory. All rights reserved. - * Copyright (c) 2009-2012 Mellanox Technologies. All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ -/** @file */ - -#ifndef MCA_COLL_ML_FUNCTIONS_H -#define MCA_COLL_ML_FUNCTIONS_H - -#include "ompi_config.h" - -BEGIN_C_DECLS - -#define ML_MEMSYNC -100 - -enum { - ML_BARRIER_DEFAULT -}; - /* small data algorithm */ -/* broadcast functions */ -enum { - /* small data algorithm */ - ML_BCAST_SMALL_DATA_KNOWN, - /* small data - dynamic decision making supported */ - ML_BCAST_SMALL_DATA_UNKNOWN, - /* Sequential algorithm */ - ML_BCAST_SMALL_DATA_SEQUENTIAL, - - ML_BCAST_LARGE_DATA_KNOWN, - - ML_BCAST_LARGE_DATA_UNKNOWN, - - ML_BCAST_LARGE_DATA_SEQUENTIAL, - - /* marker - all routines about this are expected to be used in - * selection logic that is based on size of the data */ - ML_N_DATASIZE_BINS, - - /* number of functions - also counts some markers, but ... */ - ML_NUM_BCAST_FUNCTIONS -}; - - -/* scatter functions */ -enum { - /* small data algorithm */ - ML_SCATTER_SMALL_DATA_KNOWN, - - /* marker - all routines about this are expected to be used in - * selection logic that is based on size of the data */ - ML_SCATTER_N_DATASIZE_BINS, - - /* small data - dynamic decision making supported */ - ML_SCATTER_SMALL_DATA_UNKNOWN, - - /* Sequential algorithm */ - ML_SCATTER_SMALL_DATA_SEQUENTIAL, - - /* number of functions - also counts some markers, but ... */ - ML_NUM_SCATTER_FUNCTIONS -}; - - -/* Allreduce functions */ -enum { - /* small data algorithm */ - ML_SMALL_DATA_ALLREDUCE, - - /* Large data algorithm */ - ML_LARGE_DATA_ALLREDUCE, - - /* If some of bcols doesn't support - all possibles types, use these extra algthms */ - /* small data algorithm */ - ML_SMALL_DATA_EXTRA_TOPO_ALLREDUCE, - - /* large data algorithm */ - ML_LARGE_DATA_EXTRA_TOPO_ALLREDUCE, - - /* number of functions */ - ML_NUM_ALLREDUCE_FUNCTIONS -}; - -/* Reduce functions */ -enum { - /* small data algorithm */ - ML_SMALL_DATA_REDUCE, - - /* Large data algorithm */ - ML_LARGE_DATA_REDUCE, - - /* number of functions */ - ML_NUM_REDUCE_FUNCTIONS -}; -/* Alltoall functions */ -enum { - /* small data algorithm */ - ML_SMALL_DATA_ALLTOALL, - /* large all to all */ - ML_LARGE_DATA_ALLTOALL, - /* number of functions */ - ML_NUM_ALLTOALL_FUNCTIONS -}; - -/* Allgather functions */ -enum { - /* small data */ - ML_SMALL_DATA_ALLGATHER, - /* large data */ - ML_LARGE_DATA_ALLGATHER, - /* number of functions */ - ML_NUM_ALLGATHER_FUNCTIONS -}; - -/* gather functions */ -enum { - /* small data */ - ML_SMALL_DATA_GATHER, - /* large data */ - ML_LARGE_DATA_GATHER, - /* number of functions */ - ML_NUM_GATHER_FUNCTIONS -}; - -END_C_DECLS - -#endif /* MCA_COLL_ML_FUNCTIONS_H */ diff --git a/ompi/mca/coll/ml/coll_ml_hier_algorithm_memsync_setup.c b/ompi/mca/coll/ml/coll_ml_hier_algorithm_memsync_setup.c deleted file mode 100644 index f50d040f619..00000000000 --- a/ompi/mca/coll/ml/coll_ml_hier_algorithm_memsync_setup.c +++ /dev/null @@ -1,195 +0,0 @@ -/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ -/* - * Copyright (c) 2009-2012 Oak Ridge National Laboratory. All rights reserved. - * Copyright (c) 2009-2012 Mellanox Technologies. All rights reserved. - * Copyright (c) 2014 Los Alamos National Security, LLC. All rights - * reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#include "ompi_config.h" - -#include "ompi/mca/coll/ml/coll_ml.h" -#include "ompi/mca/coll/ml/coll_ml_inlines.h" -#include "ompi/include/ompi/constants.h" -#include "ompi/mca/coll/ml/coll_ml_functions.h" - -static int mca_coll_ml_build_memsync_schedule( - mca_coll_ml_topology_t *topo_info, - mca_coll_ml_collective_operation_description_t **coll_desc) -{ - int i_hier, rc, i_fn, n_fcns, i, - n_hiers = topo_info->n_levels; - - bool call_for_top_func; - mca_bcol_base_module_t *bcol_module; - - mca_coll_ml_compound_functions_t *comp_fn; - mca_coll_ml_collective_operation_description_t *schedule; - - *coll_desc = (mca_coll_ml_collective_operation_description_t *) - calloc(1, sizeof(mca_coll_ml_collective_operation_description_t)); - - schedule = *coll_desc; - if (OPAL_UNLIKELY(NULL == schedule)) { - ML_ERROR(("Can't allocate memory.")); - return OMPI_ERR_OUT_OF_RESOURCE; - } - - if (topo_info->global_highest_hier_group_index == - topo_info->component_pairs[n_hiers - 1].bcol_index) { - /* The process that is member of highest level subgroup - should call for top algorithms in addition to fan-in/out steps */ - call_for_top_func = true; - n_fcns = 2 * n_hiers - 1; /* Up + Top + Down */ - } else { - /* The process is not member of highest level subgroup, - as result it does not call for top algorithm, - but it calls for all fan-in/out steps */ - call_for_top_func = false; - n_fcns = 2 * n_hiers; - } - - /* Set dependencies equal to number of hierarchies */ - schedule->n_fns = n_fcns; - schedule->topo_info = topo_info; - - /* Allocated the component function */ - schedule->component_functions = (struct mca_coll_ml_compound_functions_t *) - calloc(n_fcns, sizeof(struct mca_coll_ml_compound_functions_t)); - - if (OPAL_UNLIKELY(NULL == schedule->component_functions)) { - ML_ERROR(("Can't allocate memory.")); - rc = OMPI_ERR_OUT_OF_RESOURCE; - goto Barrier_Setup_Error; - } - - for (i_fn = 0; i_fn < n_fcns; ++i_fn) { - i_hier = (i_fn < n_hiers ? i_fn : n_fcns - i_fn - 1); - comp_fn = &schedule->component_functions[i_fn]; - - /* The hierarchial level */ - comp_fn->h_level = i_hier; - bcol_module = GET_BCOL(topo_info, i_hier); - - /* The UP direction */ - if (1 + i_fn < n_hiers || (1 + i_fn == n_hiers && !call_for_top_func)) { - /* Pasha: We do not have memory syncronization FANIN function, instead I use barrier. - * In future we have to replace it with memsync fan-in function - * comp_fn->bcol_function = - * bcol_module->filtered_fns_table[DATA_SRC_KNOWN][NON_BLOCKING][BCOL_FANIN][1][0][0]; - */ - comp_fn->bcol_function = GET_BCOL_SYNC_FN(bcol_module); - - - assert(NULL != comp_fn->bcol_function); - - /* Each function call with index K is depended of all K-1 previous indices - - in simple words we will do sequential Fan-In calls */ - comp_fn->num_dependencies = i_fn; - comp_fn->num_dependent_tasks = n_fcns - i_fn - 1; - - /* Init component function */ - strcpy(comp_fn->fn_name, "MEMSYNC-FANIN"); - - /* On the highest level */ - } else if ((1 + i_fn == n_hiers && call_for_top_func)) { - comp_fn->bcol_function = GET_BCOL_SYNC_FN(bcol_module); - - /* Each function call with index K is depended of all K-1 previous indices - - in simple words we do sequential calls */ - comp_fn->num_dependencies = n_hiers - 1; /* All Fan-Ins */ - comp_fn->num_dependent_tasks = n_fcns - n_hiers; /* All Fan-Outs */ - - /* Init component function */ - strcpy(comp_fn->fn_name, "MEMSYNC-BARRIER"); - - assert(NULL != comp_fn->bcol_function); - ML_VERBOSE(10, ("func indx %d set to BARRIER %p", i_fn, comp_fn->bcol_function)); - - /* The DOWN direction */ - } else { - /* Pasha: We do not have memory syncronization FANOUT function, instead I use barrier. - * In future we have to replace it with memsync fan-out function - * comp_fn->bcol_function = - * bcol_module->filtered_fns_table[DATA_SRC_KNOWN][NON_BLOCKING][BCOL_FANOUT][1][0][0]; - */ - comp_fn->bcol_function = GET_BCOL_SYNC_FN(bcol_module); - - /* Each function call with index K is depended of all UP and TOP algths */ - comp_fn->num_dependencies = n_hiers; - comp_fn->num_dependent_tasks = 0; - - /* Init component function */ - strcpy(comp_fn->fn_name, "MEMSYNC-FANOUT"); - } - - assert(NULL != comp_fn->bcol_function); - ML_VERBOSE(10, ("func indx %d set to %p", i_fn, comp_fn->bcol_function)); - - if (comp_fn->num_dependent_tasks > 0) { - comp_fn->dependent_task_indices = (int *) calloc(comp_fn->num_dependent_tasks, sizeof(int)); - if (OPAL_UNLIKELY(NULL == comp_fn->dependent_task_indices)) { - ML_ERROR(("Can't allocate memory.")); - rc = OMPI_ERR_OUT_OF_RESOURCE; - goto Barrier_Setup_Error; - } - - /* All indexes follow after this one */ - for (i = 0; i < comp_fn->num_dependent_tasks; ++i) { - comp_fn->dependent_task_indices[i] = i_fn + i + 1; - } - } - - /* No need completion func for Barrier */ - comp_fn->task_comp_fn = NULL; - - ML_VERBOSE(10, ("Setting collective [Barrier] fn_idx %d, n_of_this_type_in_a_row %d, " - "index_in_consecutive_same_bcol_calls %d.", - i_fn, comp_fn->constant_group_data.n_of_this_type_in_a_row, - comp_fn->constant_group_data.index_in_consecutive_same_bcol_calls)); - } - - rc = ml_coll_barrier_constant_group_data_setup(topo_info, schedule); - if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) { - ML_ERROR(("Failed to init const group data.")); - goto Barrier_Setup_Error; - } - - schedule->progress_type = 0; - - return OMPI_SUCCESS; - -Barrier_Setup_Error: - if (NULL != schedule->component_functions) { - free(schedule->component_functions); - schedule->component_functions = NULL; - } - - free (schedule); - *coll_desc = NULL; - - return rc; -} - -int ml_coll_memsync_setup(mca_coll_ml_module_t *ml_module) -{ - int ret; - /* For barrier syncronization we use barrier topology */ - mca_coll_ml_topology_t *topo_info = - &ml_module->topo_list[ml_module->collectives_topology_map[ML_BARRIER][ML_SMALL_MSG]]; - - ret = mca_coll_ml_build_memsync_schedule(topo_info, - &ml_module->coll_ml_memsync_function); - - if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) { - ML_VERBOSE(10, ("Failed to setup static bcast")); - return ret; - } - - return OMPI_SUCCESS; -} diff --git a/ompi/mca/coll/ml/coll_ml_hier_algorithms.c b/ompi/mca/coll/ml/coll_ml_hier_algorithms.c deleted file mode 100644 index efbb9fac36e..00000000000 --- a/ompi/mca/coll/ml/coll_ml_hier_algorithms.c +++ /dev/null @@ -1,187 +0,0 @@ -/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ -/* - * Copyright (c) 2009-2012 Oak Ridge National Laboratory. All rights reserved. - * Copyright (c) 2009-2012 Mellanox Technologies. All rights reserved. - * Copyright (c) 2014-2015 Los Alamos National Security, LLC. All rights - * reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#include "ompi/include/ompi/constants.h" -#include "ompi/mca/coll/ml/coll_ml.h" -#include "ompi/mca/coll/ml/coll_ml_inlines.h" -#include "ompi/mca/coll/ml/coll_ml_allocation.h" - -/* collective managment descriptor initialization - called right after - * the constructor by opal_free_list code - */ -static void mca_coll_ml_collective_operation_progress_init - (opal_free_list_item_t* item, void* ctx) -{ - int i; - int max_dag_size = ((struct coll_desc_init *)ctx)->max_dag_size; - size_t max_n_bytes_per_proc_total = - ((struct coll_desc_init *)ctx)->max_n_bytes_per_proc_total; - mca_coll_ml_collective_operation_progress_t *coll_op = - (mca_coll_ml_collective_operation_progress_t *) item; - - coll_op->dag_description.status_array = - (mca_coll_ml_task_status_t *) - calloc(max_dag_size, sizeof(mca_coll_ml_task_status_t)); - assert(coll_op->dag_description.status_array); - - /* initialize individual elements */ - for (i = 0; i < max_dag_size; i++ ) { - /* Pasha: We assume here index syncronization between - task indexes and indexes in component_function array - (mca_coll_ml_collective_operation_description) - */ - coll_op->dag_description.status_array[i]. - my_index_in_coll_schedule = i; - coll_op->dag_description.status_array[i]. - ml_coll_operation = coll_op; - - OBJ_CONSTRUCT(&coll_op->dag_description.status_array[i].item, opal_list_item_t); - } - - /* set the size per proc of the ML buffer */ - coll_op->full_message.max_n_bytes_per_proc_total= - max_n_bytes_per_proc_total; - - /* set the pointer to the bcol module */ - coll_op->coll_module = - ((struct coll_desc_init *)ctx)->bcol_base_module; - -} - -int ml_coll_schedule_setup(mca_coll_ml_module_t *ml_module) -{ - /* local variables */ - int ret = OMPI_SUCCESS, comm_size; - mca_coll_ml_component_t *cm = &mca_coll_ml_component; - size_t ml_per_proc_buffer_size; - - /* Barrier */ - ret = ml_coll_hier_barrier_setup(ml_module); - if( OMPI_SUCCESS != ret ) { - return ret; - } - - /* Broadcast */ - ret = ml_coll_hier_bcast_setup(ml_module); - if( OMPI_SUCCESS != ret ) { - return ret; - } - - /* Allreduce */ - if (!mca_coll_ml_component.use_knomial_allreduce) { - ret = ml_coll_hier_allreduce_setup(ml_module); - } else { - ret = ml_coll_hier_allreduce_setup_new(ml_module); - } - - if( OMPI_SUCCESS != ret ) { - return ret; - } - - - /* Alltoall */ - /* - ret = ml_coll_hier_alltoall_setup_new(ml_module); - - if( OMPI_SUCCESS != ret ) { - return ret; - } - */ - - /* Allgather */ - ret = ml_coll_hier_allgather_setup(ml_module); - - if( OMPI_SUCCESS != ret ) { - return ret; - } - - /* Gather */ - /* - ret = ml_coll_hier_gather_setup(ml_module); - - if( OMPI_SUCCESS != ret ) { - return ret; - } - */ - - /* Reduce */ - ret = ml_coll_hier_reduce_setup(ml_module); - if( OMPI_SUCCESS != ret ) { - return ret; - } - - /* Scatter */ - /* - ret = ml_coll_hier_scatter_setup(ml_module); - if( OMPI_SUCCESS != ret ) { - return ret; - } - */ - - ret = ml_coll_memsync_setup(ml_module); - if( OMPI_SUCCESS != ret ) { - return ret; - } - - /* nonblocking Reduce */ - - /* Alltoall */ - - /* nonblocking alltoall */ - - /* max_dag_size will be set here, so initialize it */ - - /* Pasha: Do we have to keep the max_dag_size ? - In most generic case, it will be equal to max_fn_calls */ - ml_module->max_dag_size = ml_module->max_fn_calls; - - assert(ml_module->max_dag_size > 0); - - /* initialize the mca_coll_ml_collective_operation_progress_t free list */ - /* NOTE: as part of initialization each routine needs to make sure that - * the module element max_dag_size is set large enough - space for - * tracking collective progress is allocated based on this value. */ - - /* figure out what the size of the ml buffer is */ - ml_per_proc_buffer_size=ml_module->payload_block->size_buffer; - comm_size=ompi_comm_size(ml_module->comm); - ml_per_proc_buffer_size/=comm_size; - ml_module->coll_desc_init_data.max_dag_size=ml_module->max_dag_size; - ml_module->coll_desc_init_data.max_n_bytes_per_proc_total=ml_per_proc_buffer_size; - ml_module->coll_desc_init_data.bcol_base_module=(mca_coll_base_module_t *) - ml_module; - - ret = opal_free_list_init ( - &(ml_module->coll_ml_collective_descriptors), - sizeof(mca_coll_ml_collective_operation_progress_t), - /* no special alignment needed */ - 8, - OBJ_CLASS(mca_coll_ml_collective_operation_progress_t), - /* no payload data */ - 0, 0, - /* NOTE: hack - need to parametrize this */ - cm->free_list_init_size, - cm->free_list_max_size, - cm->free_list_grow_size, - /* No Mpool */ - NULL, 0, NULL, - mca_coll_ml_collective_operation_progress_init, - (void *)&(ml_module->coll_desc_init_data) - ); - if (OMPI_SUCCESS != ret) { - return ret; - } - - /* done */ - return ret; -} diff --git a/ompi/mca/coll/ml/coll_ml_hier_algorithms_allgather_setup.c b/ompi/mca/coll/ml/coll_ml_hier_algorithms_allgather_setup.c deleted file mode 100644 index cd964d41dd9..00000000000 --- a/ompi/mca/coll/ml/coll_ml_hier_algorithms_allgather_setup.c +++ /dev/null @@ -1,240 +0,0 @@ -/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ -/* - * Copyright (c) 2009-2012 Oak Ridge National Laboratory. All rights reserved. - * Copyright (c) 2009-2012 Mellanox Technologies. All rights reserved. - * Copyright (c) 2013-2014 Los Alamos National Security, LLC. All rights - * reserved. - * Copyright (c) 2014 Research Organization for Information Science - * and Technology (RIST). All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#include "ompi_config.h" - -#include "ompi/mca/coll/ml/coll_ml.h" -#include "ompi/include/ompi/constants.h" -#include "ompi/mca/coll/ml/coll_ml_functions.h" -#include "ompi/mca/coll/ml/coll_ml_hier_algorithms_common_setup.h" -#include "ompi/patterns/net/netpatterns_knomial_tree.h" - -#define SMALL_MSG_RANGE 1 -#define LARGE_MSG_RANGE 5 - -static int mca_coll_ml_build_allgather_schedule(mca_coll_ml_topology_t *topo_info, - mca_coll_ml_collective_operation_description_t **coll_desc, int bcol_func_index) -{ - int ret; /* exit code in case of error */ - int nfn = 0; - int i; - int *scratch_indx = NULL, - *scratch_num = NULL; - - mca_coll_ml_collective_operation_description_t *schedule = NULL; - mca_coll_ml_compound_functions_t *comp_fn; - mca_coll_ml_schedule_hier_info_t h_info; - - ML_VERBOSE(9, ("Setting hierarchy, inputs : n_levels %d, hiest %d ", - topo_info->n_levels, topo_info->global_highest_hier_group_index)); - MCA_COLL_ML_INIT_HIER_INFO(h_info, topo_info->n_levels, - topo_info->global_highest_hier_group_index, topo_info); - - ret = mca_coll_ml_schedule_init_scratch(topo_info, &h_info, - &scratch_indx, &scratch_num); - if (OMPI_SUCCESS != ret) { - ML_ERROR(("Can't mca_coll_ml_schedule_init_scratch.")); - goto Error; - } - assert(NULL != scratch_indx); - assert(NULL != scratch_num); - - schedule = *coll_desc = - mca_coll_ml_schedule_alloc(&h_info); - if (NULL == schedule) { - ML_ERROR(("Can't allocate memory.")); - ret = OMPI_ERR_OUT_OF_RESOURCE; - goto Error; - } - /* Setting topology information */ - schedule->topo_info = topo_info; - - /* Set dependencies equal to number of hierarchies */ - for (i = 0; i < h_info.num_up_levels; i++) { - int query_conf[MCA_COLL_ML_QUERY_SIZE]; - MCA_COLL_ML_SET_QUERY(query_conf, DATA_SRC_KNOWN, BLOCKING, BCOL_GATHER, bcol_func_index, 0, 0); - comp_fn = &schedule->component_functions[i]; - MCA_COLL_ML_SET_COMP_FN(comp_fn, i, topo_info, - i, scratch_indx, scratch_num, query_conf, "GATHER_DATA"); - } - - nfn = i; - if (h_info.call_for_top_function) { - int query_conf[MCA_COLL_ML_QUERY_SIZE]; - MCA_COLL_ML_SET_QUERY(query_conf, DATA_SRC_KNOWN, NON_BLOCKING, BCOL_ALLGATHER, bcol_func_index, 0, 0); - comp_fn = &schedule->component_functions[nfn]; - MCA_COLL_ML_SET_COMP_FN(comp_fn, nfn, topo_info, - nfn, scratch_indx, scratch_num, query_conf, "ALLGATHER_DATA"); - ++nfn; - } - - /* coming down the hierarchy */ - for (i = h_info.num_up_levels - 1; i >= 0; i--, nfn++) { - int query_conf[MCA_COLL_ML_QUERY_SIZE]; - MCA_COLL_ML_SET_QUERY(query_conf, DATA_SRC_KNOWN, NON_BLOCKING, BCOL_BCAST, bcol_func_index, 0, 0); - comp_fn = &schedule->component_functions[nfn]; - MCA_COLL_ML_SET_COMP_FN(comp_fn, i, topo_info, - nfn, scratch_indx, scratch_num, query_conf, "BCAST_DATA"); - } - - /* Fill the rest of constant data */ - mca_coll_ml_call_types(&h_info, schedule); - - MCA_COLL_ML_SET_SCHEDULE_ORDER_INFO(schedule); - - free(scratch_num); - free(scratch_indx); - - return OMPI_SUCCESS; - - Error: - if (NULL != scratch_indx) { - free(scratch_indx); - } - if (NULL != scratch_num) { - free(scratch_num); - } - - return ret; -} - -int ml_coll_hier_allgather_setup(mca_coll_ml_module_t *ml_module) -{ - /* Hierarchy Setup */ - int ret, topo_index, alg; - mca_coll_ml_topology_t *topo_info = ml_module->topo_list; - - ML_VERBOSE(10,("entering allgather setup")); - -#if 0 - /* used to validate the recursive k - ing allgather tree */ - { - /* debug print */ - int ii, jj; - netpatterns_k_exchange_node_t exchange_node; - - ret = netpatterns_setup_recursive_knomial_allgather_tree_node(8, 3, 3, &exchange_node); - fprintf(stderr,"log tree order %d tree_order %d\n", exchange_node.log_tree_order,exchange_node.tree_order); - if( EXCHANGE_NODE == exchange_node.node_type){ - if( exchange_node.n_extra_sources > 0){ - fprintf(stderr,"Receiving data from extra rank %d\n",exchange_node.rank_extra_sources_array[0]); - } - for( ii = 0; ii < exchange_node.log_tree_order; ii++){ - for( jj = 0; jj < (exchange_node.tree_order-1); jj++) { - if( exchange_node.rank_exchanges[ii][jj] >= 0){ - fprintf(stderr,"level %d I send %d bytes to %d from offset %d \n",ii+1, - exchange_node.payload_info[ii][jj].s_len, - exchange_node.rank_exchanges[ii][jj], - exchange_node.payload_info[ii][jj].s_offset); - fprintf(stderr,"level %d I receive %d bytes from %d at offset %d\n",ii+1, - exchange_node.payload_info[ii][jj].r_len, - exchange_node.rank_exchanges[ii][jj], - exchange_node.payload_info[ii][jj].r_offset); - } - } - } - fprintf(stderr,"exchange_node.n_extra_sources %d\n",exchange_node.n_extra_sources); - fprintf(stderr,"exchange_node.myid_reindex %d\n",exchange_node.reindex_myid); - if( exchange_node.n_extra_sources > 0){ - fprintf(stderr,"Sending back data to extra rank %d\n",exchange_node.rank_extra_sources_array[0]); - } - } else { - fprintf(stderr,"I am an extra and send to proxy %d\n", - exchange_node.rank_extra_sources_array[0]); - } - } -#endif - - alg = mca_coll_ml_component.coll_config[ML_ALLGATHER][ML_SMALL_MSG].algorithm_id; - topo_index = ml_module->collectives_topology_map[ML_ALLGATHER][alg]; - if (ML_UNDEFINED == alg || ML_UNDEFINED == topo_index) { - ML_ERROR(("No topology index or algorithm was defined")); - topo_info->hierarchical_algorithms[ML_ALLGATHER] = NULL; - return OMPI_ERROR; - } - - ret = mca_coll_ml_build_allgather_schedule(&ml_module->topo_list[topo_index], - &ml_module->coll_ml_allgather_functions[alg], - SMALL_MSG_RANGE); - if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) { - ML_VERBOSE(10, ("Failed to setup static alltoall")); - return ret; - } - - alg = mca_coll_ml_component.coll_config[ML_ALLGATHER][ML_LARGE_MSG].algorithm_id; - topo_index = ml_module->collectives_topology_map[ML_ALLGATHER][alg]; - if (ML_UNDEFINED == alg || ML_UNDEFINED == topo_index) { - ML_ERROR(("No topology index or algorithm was defined")); - topo_info->hierarchical_algorithms[ML_ALLGATHER] = NULL; - return OMPI_ERROR; - } - - ret = mca_coll_ml_build_allgather_schedule(&ml_module->topo_list[topo_index], - &ml_module->coll_ml_allgather_functions[alg], - LARGE_MSG_RANGE); - if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) { - ML_VERBOSE(10, ("Failed to setup static alltoall")); - return ret; - } - - return OMPI_SUCCESS; -} - -void ml_coll_hier_allgather_cleanup(mca_coll_ml_module_t *ml_module) -{ - /* Hierarchy Setup */ - int topo_index, alg; - mca_coll_ml_topology_t *topo_info = ml_module->topo_list; - - alg = mca_coll_ml_component.coll_config[ML_ALLGATHER][ML_SMALL_MSG].algorithm_id; - topo_index = ml_module->collectives_topology_map[ML_ALLGATHER][alg]; - if (ML_UNDEFINED == alg || ML_UNDEFINED == topo_index) { - ML_ERROR(("No topology index or algorithm was defined")); - topo_info->hierarchical_algorithms[ML_ALLGATHER] = NULL; - return; - } - - if (NULL == ml_module->coll_ml_allgather_functions[alg]) { - return; - } - - if (ml_module->coll_ml_allgather_functions[alg]->component_functions) { - free(ml_module->coll_ml_allgather_functions[alg]->component_functions); - ml_module->coll_ml_allgather_functions[alg]->component_functions = NULL; - } - - if (ml_module->coll_ml_allgather_functions[alg]) { - free(ml_module->coll_ml_allgather_functions[alg]); - ml_module->coll_ml_allgather_functions[alg] = NULL; - } - - alg = mca_coll_ml_component.coll_config[ML_ALLGATHER][ML_LARGE_MSG].algorithm_id; - topo_index = ml_module->collectives_topology_map[ML_ALLGATHER][alg]; - if (ML_UNDEFINED == alg || ML_UNDEFINED == topo_index) { - ML_ERROR(("No topology index or algorithm was defined")); - topo_info->hierarchical_algorithms[ML_ALLGATHER] = NULL; - return; - } - - if (ml_module->coll_ml_allgather_functions[alg]->component_functions) { - free(ml_module->coll_ml_allgather_functions[alg]->component_functions); - ml_module->coll_ml_allgather_functions[alg]->component_functions = NULL; - } - - if (ml_module->coll_ml_allgather_functions[alg]) { - free(ml_module->coll_ml_allgather_functions[alg]); - ml_module->coll_ml_allgather_functions[alg] = NULL; - } -} diff --git a/ompi/mca/coll/ml/coll_ml_hier_algorithms_allreduce_setup.c b/ompi/mca/coll/ml/coll_ml_hier_algorithms_allreduce_setup.c deleted file mode 100644 index a371d51b7a3..00000000000 --- a/ompi/mca/coll/ml/coll_ml_hier_algorithms_allreduce_setup.c +++ /dev/null @@ -1,434 +0,0 @@ -/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ -/* - * Copyright (c) 2009-2012 Oak Ridge National Laboratory. All rights reserved. - * Copyright (c) 2009-2012 Mellanox Technologies. All rights reserved. - * Copyright (c) 2014 Los Alamos National Security, LLC. All rights - * reserved. - * Copyright (c) 2014 Research Organization for Information Science - * and Technology (RIST). All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#include "ompi_config.h" - -#include "ompi/mca/coll/ml/coll_ml.h" -#include "ompi/include/ompi/constants.h" -#include "ompi/mca/coll/ml/coll_ml_functions.h" - -#define ALLREDUCE_SMALL 1 -#define ALLREDUCE_LARGE 5 -#define SMALL_MSG_RANGE 1 -#define LARGE_MSG_RANGE 5 - -static int mca_coll_ml_build_allreduce_schedule( - mca_coll_ml_topology_t *topo_info, - mca_coll_ml_collective_operation_description_t **coll_desc, int bcol_func_index) -{ - - bool call_for_top_function, prev_is_zero; - int n_hiers = topo_info->n_levels; - int i_hier, j_hier; - int cnt, value_to_set = 0; - int ret; /* exit code in case of error */ - int nfn=0; - int *scratch_indx = NULL, - *scratch_num = NULL; - int global_high_hierarchy_index = - topo_info->global_highest_hier_group_index; - - mca_coll_ml_collective_operation_description_t *schedule; - mca_coll_ml_compound_functions_t *comp_fn; - mca_bcol_base_module_t *prev_bcol, - *bcol_module; - int num_up_levels,nbcol_functions,i; - - if (global_high_hierarchy_index == - topo_info->component_pairs[n_hiers - 1].bcol_index) { - /* The process that is member of highest level subgroup - should call for top algorithms in addition to fan-in/out steps*/ - call_for_top_function = true; - /* hier level run only top algorithm, so we deduct 1 */ - num_up_levels = n_hiers - 1; - /* Top algorithm is called only once, so we deduct 1 */ - nbcol_functions = 2 * n_hiers - 1; - } else { - /* The process is not member of highest level subgroup, - as result it does not call for top algorithm, - but it calls for all fan-in/out steps */ - call_for_top_function = false; - num_up_levels = n_hiers; - nbcol_functions = 2 * n_hiers; - } - - *coll_desc = (mca_coll_ml_collective_operation_description_t *) - calloc(1, sizeof(mca_coll_ml_collective_operation_description_t)); - schedule = *coll_desc; - if (NULL == schedule) { - ML_ERROR(("Can't allocate memory.")); - return OMPI_ERR_OUT_OF_RESOURCE; - } - - scratch_indx = (int *) calloc(n_hiers * 2, sizeof (int)); - if (NULL == scratch_indx) { - ML_ERROR(("Can't allocate memory.")); - ret = OMPI_ERR_OUT_OF_RESOURCE; - goto Allreduce_Setup_Error; - } - - scratch_num = (int *) malloc(sizeof(int) * (n_hiers * 2)); - if (NULL == scratch_num) { - ML_ERROR(("Can't allocate memory.")); - ret = OMPI_ERR_OUT_OF_RESOURCE; - goto Allreduce_Setup_Error; - } - - prev_bcol = NULL; - - for (i = 0, cnt = 0; i < num_up_levels; ++i, ++cnt) { - if (IS_BCOL_TYPE_IDENTICAL(prev_bcol, GET_BCOL(topo_info, i))) { - scratch_indx[cnt] = scratch_indx[cnt - 1] + 1; - } else { - prev_bcol = GET_BCOL(topo_info, i); - } - } - - /* top - only if the proc arrive to highest_level_is_global_highest_level */ - if (call_for_top_function) { - if (IS_BCOL_TYPE_IDENTICAL(prev_bcol, GET_BCOL(topo_info, n_hiers - 1))) { - scratch_indx[cnt] = scratch_indx[cnt - 1] + 1; - } else { - prev_bcol = GET_BCOL(topo_info, n_hiers - 1); - } - - ++cnt; - } - - /* going down */ - for (i = num_up_levels - 1; i >= 0; --i, ++cnt) { - if (IS_BCOL_TYPE_IDENTICAL(prev_bcol, GET_BCOL(topo_info, i))) { - scratch_indx[cnt] = scratch_indx[cnt - 1] + 1; - } else { - prev_bcol = GET_BCOL(topo_info, i); - } - } - - i = cnt - 1; - prev_is_zero = true; - - do { - if (prev_is_zero) { - value_to_set = scratch_indx[i] + 1; - prev_is_zero = false; - } - - if (0 == scratch_indx[i]) { - prev_is_zero = true; - } - - scratch_num[i] = value_to_set; - --i; - } while(i >= 0); - - /* Set dependencies equal to number of hierarchies */ - schedule->n_fns = nbcol_functions; - schedule->topo_info = topo_info; - schedule->progress_type = 0; - - /* Allocated the component function */ - schedule->component_functions = (struct mca_coll_ml_compound_functions_t *) - calloc(nbcol_functions, sizeof(struct mca_coll_ml_compound_functions_t)); - - if (NULL == schedule->component_functions) { - ML_ERROR(("Can't allocate memory.")); - ret = OMPI_ERR_OUT_OF_RESOURCE; - goto Allreduce_Setup_Error; - } - - for (i = 0; i < num_up_levels; i++) { - comp_fn = &schedule->component_functions[i]; - comp_fn->h_level = i; /* hierarchy level */ - bcol_module = GET_BCOL(topo_info, i); - - /* strcpy (comp_fn->fn_name, "ALLREDUCE_SMALL_DATA"); */ - - comp_fn->num_dependent_tasks = 0; - comp_fn->num_dependencies = 0; - - comp_fn->bcol_function = - bcol_module->filtered_fns_table[DATA_SRC_KNOWN][NON_BLOCKING][BCOL_REDUCE][bcol_func_index][0][0]; - if (NULL == comp_fn->bcol_function) { - /* if there isn't a bcol function for this then we can't continue */ - ret = OMPI_ERR_NOT_SUPPORTED; - goto Allreduce_Setup_Error; - } - - comp_fn->task_comp_fn = NULL; - - comp_fn->constant_group_data.bcol_module = bcol_module; - comp_fn->constant_group_data.index_in_consecutive_same_bcol_calls = scratch_indx[i]; - comp_fn->constant_group_data.n_of_this_type_in_a_row = scratch_num[i]; - comp_fn->constant_group_data.n_of_this_type_in_collective = 0; - comp_fn->constant_group_data.index_of_this_type_in_collective = 0; - } - - nfn = i; - if (call_for_top_function) { - comp_fn = &schedule->component_functions[nfn]; - comp_fn->h_level = nfn; /* hierarchy level */ - bcol_module = GET_BCOL(topo_info, nfn); - - assert (NULL != bcol_module); - - /* strcpy (comp_fn->fn_name, "ALLREDUCE_SMALL_DATA"); */ - - /* The allreduce should depend on the reduce */ - comp_fn->num_dependent_tasks = 0; - comp_fn->num_dependencies = 0; - comp_fn->bcol_function = - bcol_module->filtered_fns_table[DATA_SRC_KNOWN][NON_BLOCKING][BCOL_ALLREDUCE][bcol_func_index][0][0]; - if (NULL == comp_fn->bcol_function) { - /* if there isn't a bcol function for this then we can't continue */ - ret = OMPI_ERR_NOT_SUPPORTED; - goto Allreduce_Setup_Error; - } - - comp_fn->task_comp_fn = NULL; - - comp_fn->constant_group_data.bcol_module = bcol_module; - comp_fn->constant_group_data.index_in_consecutive_same_bcol_calls = scratch_indx[nfn]; - comp_fn->constant_group_data.n_of_this_type_in_a_row = scratch_num[nfn]; - comp_fn->constant_group_data.n_of_this_type_in_collective = 0; - comp_fn->constant_group_data.index_of_this_type_in_collective = 0; - - ++nfn; - } - - for (i = num_up_levels - 1; i >= 0; i--) { - comp_fn = &schedule->component_functions[nfn]; - comp_fn->h_level = i; /* hierarchy level */ - bcol_module = GET_BCOL(topo_info, i); - - assert (NULL != bcol_module); - - /* strcpy (comp_fn->fn_name, "ALLREDUCE_SMALL_DATA"); */ - - comp_fn->num_dependent_tasks = 0; - comp_fn->num_dependencies = 0; - - comp_fn->bcol_function = - bcol_module->filtered_fns_table[DATA_SRC_KNOWN][NON_BLOCKING][BCOL_BCAST][bcol_func_index][0][0]; - if (NULL == comp_fn->bcol_function) { - /* if there isn't a bcol function for this then we can't continue */ - ret = OMPI_ERR_NOT_SUPPORTED; - goto Allreduce_Setup_Error; - } - - comp_fn->task_comp_fn = NULL; - - comp_fn->constant_group_data.bcol_module = bcol_module; - comp_fn->constant_group_data.index_in_consecutive_same_bcol_calls = scratch_indx[nfn]; - comp_fn->constant_group_data.n_of_this_type_in_a_row = scratch_num[nfn]; - comp_fn->constant_group_data.n_of_this_type_in_collective = 0; - comp_fn->constant_group_data.index_of_this_type_in_collective = 0; - - ++nfn; - } - - /* Fill the rest of constant data */ - for (i_hier = 0; i_hier < n_hiers; i_hier++) { - mca_bcol_base_module_t *current_bcol = - schedule->component_functions[i_hier]. - constant_group_data.bcol_module; - cnt = 0; - for (j_hier = 0; j_hier < n_hiers; j_hier++) { - if (current_bcol == - schedule->component_functions[j_hier]. - constant_group_data.bcol_module) { - schedule->component_functions[j_hier]. - constant_group_data.index_of_this_type_in_collective = cnt; - cnt++; - } - } - - schedule->component_functions[i_hier]. - constant_group_data.n_of_this_type_in_collective = cnt; - } - - MCA_COLL_ML_SET_SCHEDULE_ORDER_INFO(schedule); - - free(scratch_num); - free(scratch_indx); - - return OMPI_SUCCESS; - -Allreduce_Setup_Error: - - if (NULL != scratch_indx) { - free(scratch_indx); - } - - if (NULL != scratch_num) { - free(scratch_num); - } - - if (NULL != schedule->component_functions) { - free(schedule->component_functions); - } - *coll_desc = NULL; - free (schedule); - - return ret; -} - -int ml_coll_hier_allreduce_setup_new(mca_coll_ml_module_t *ml_module) -{ - /* Hierarchy Setup */ - int ret; - int topo_index; - int alg; - mca_coll_ml_topology_t *topo_info = ml_module->topo_list; - - alg = mca_coll_ml_component.coll_config[ML_ALLREDUCE][ML_SMALL_MSG].algorithm_id; - topo_index = ml_module->collectives_topology_map[ML_ALLREDUCE][alg]; - if (ML_UNDEFINED == alg || ML_UNDEFINED == topo_index) { - ML_ERROR(("No topology index or algorithm was defined")); - topo_info->hierarchical_algorithms[ML_ALLREDUCE] = NULL; - return OMPI_ERROR; - } - - ret = mca_coll_ml_build_allreduce_schedule( - &ml_module->topo_list[topo_index], - &ml_module->coll_ml_allreduce_functions[alg], - SMALL_MSG_RANGE); - - if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) { - ML_VERBOSE(10, ("Failed to setup Small Message Allreduce")); - return ret; - } - - alg = mca_coll_ml_component.coll_config[ML_ALLREDUCE][ML_LARGE_MSG].algorithm_id; - topo_index = ml_module->collectives_topology_map[ML_ALLREDUCE][alg]; - if (ML_UNDEFINED == alg || ML_UNDEFINED == topo_index) { - ML_ERROR(("No topology index or algorithm was defined")); - topo_info->hierarchical_algorithms[ML_ALLREDUCE] = NULL; - return OMPI_ERROR; - } - - ret = mca_coll_ml_build_allreduce_schedule( - &ml_module->topo_list[topo_index], - &ml_module->coll_ml_allreduce_functions[alg], - LARGE_MSG_RANGE); - - if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) { - ML_VERBOSE(10, ("Failed to setup Large Message Allreduce")); - return ret; - } - - if (true == mca_coll_ml_component.need_allreduce_support) { - topo_index = ml_module->collectives_topology_map[ML_ALLREDUCE][ML_SMALL_DATA_EXTRA_TOPO_ALLREDUCE]; - if (ML_UNDEFINED == topo_index) { - ML_ERROR(("No topology index was defined")); - topo_info->hierarchical_algorithms[ML_ALLREDUCE] = NULL; - return OMPI_ERROR; - } - - ret = mca_coll_ml_build_allreduce_schedule( - &ml_module->topo_list[topo_index], - &ml_module->coll_ml_allreduce_functions[ML_SMALL_DATA_EXTRA_TOPO_ALLREDUCE], - SMALL_MSG_RANGE); - - if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) { - ML_VERBOSE(10, ("Failed to setup Extra Small Message Allreduce")); - return ret; - } - - topo_index = ml_module->collectives_topology_map[ML_ALLREDUCE][ML_LARGE_DATA_EXTRA_TOPO_ALLREDUCE]; - if (ML_UNDEFINED == topo_index) { - ML_ERROR(("No topology index was defined")); - topo_info->hierarchical_algorithms[ML_ALLREDUCE] = NULL; - return OMPI_ERROR; - } - - ret = mca_coll_ml_build_allreduce_schedule( - &ml_module->topo_list[topo_index], - &ml_module->coll_ml_allreduce_functions[ML_LARGE_DATA_EXTRA_TOPO_ALLREDUCE], - LARGE_MSG_RANGE); - - if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) { - ML_VERBOSE(10, ("Failed to setup Extra Large Message Allreduce")); - return ret; - } - } - - return OMPI_SUCCESS; -} - -void ml_coll_hier_allreduce_cleanup_new(mca_coll_ml_module_t *ml_module) -{ - /* Hierarchy Setup */ - int topo_index; - int alg; - mca_coll_ml_topology_t *topo_info = ml_module->topo_list; - - alg = mca_coll_ml_component.coll_config[ML_ALLREDUCE][ML_SMALL_MSG].algorithm_id; - topo_index = ml_module->collectives_topology_map[ML_ALLREDUCE][alg]; - if (ML_UNDEFINED == alg || ML_UNDEFINED == topo_index) { - ML_ERROR(("No topology index or algorithm was defined")); - topo_info->hierarchical_algorithms[ML_ALLREDUCE] = NULL; - return; - } - - if (NULL == ml_module->coll_ml_allreduce_functions[alg]) { - return; - } - - free(ml_module->coll_ml_allreduce_functions[alg]->component_functions); - ml_module->coll_ml_allreduce_functions[alg]->component_functions = NULL; - free(ml_module->coll_ml_allreduce_functions[alg]); - ml_module->coll_ml_allreduce_functions[alg] = NULL; - - alg = mca_coll_ml_component.coll_config[ML_ALLREDUCE][ML_LARGE_MSG].algorithm_id; - topo_index = ml_module->collectives_topology_map[ML_ALLREDUCE][alg]; - if (ML_UNDEFINED == alg || ML_UNDEFINED == topo_index) { - ML_ERROR(("No topology index or algorithm was defined")); - topo_info->hierarchical_algorithms[ML_ALLREDUCE] = NULL; - return; - } - - free(ml_module->coll_ml_allreduce_functions[alg]->component_functions); - ml_module->coll_ml_allreduce_functions[alg]->component_functions = NULL; - free(ml_module->coll_ml_allreduce_functions[alg]); - ml_module->coll_ml_allreduce_functions[alg] = NULL; - - if (true == mca_coll_ml_component.need_allreduce_support) { - topo_index = ml_module->collectives_topology_map[ML_ALLREDUCE][ML_SMALL_DATA_EXTRA_TOPO_ALLREDUCE]; - if (ML_UNDEFINED == topo_index) { - ML_ERROR(("No topology index was defined")); - topo_info->hierarchical_algorithms[ML_ALLREDUCE] = NULL; - return; - } - - alg = ML_SMALL_DATA_EXTRA_TOPO_ALLREDUCE; - free(ml_module->coll_ml_allreduce_functions[alg]->component_functions); - ml_module->coll_ml_allreduce_functions[alg]->component_functions = NULL; - free(ml_module->coll_ml_allreduce_functions[alg]); - ml_module->coll_ml_allreduce_functions[alg] = NULL; - - topo_index = ml_module->collectives_topology_map[ML_ALLREDUCE][ML_LARGE_DATA_EXTRA_TOPO_ALLREDUCE]; - if (ML_UNDEFINED == topo_index) { - ML_ERROR(("No topology index was defined")); - topo_info->hierarchical_algorithms[ML_ALLREDUCE] = NULL; - return; - } - - alg = ML_LARGE_DATA_EXTRA_TOPO_ALLREDUCE; - free(ml_module->coll_ml_allreduce_functions[alg]->component_functions); - ml_module->coll_ml_allreduce_functions[alg]->component_functions = NULL; - free(ml_module->coll_ml_allreduce_functions[alg]); - ml_module->coll_ml_allreduce_functions[alg] = NULL; - } -} diff --git a/ompi/mca/coll/ml/coll_ml_hier_algorithms_barrier_setup.c b/ompi/mca/coll/ml/coll_ml_hier_algorithms_barrier_setup.c deleted file mode 100644 index 54aeac6f86d..00000000000 --- a/ompi/mca/coll/ml/coll_ml_hier_algorithms_barrier_setup.c +++ /dev/null @@ -1,206 +0,0 @@ -/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ -/* - * Copyright (c) 2009-2012 Oak Ridge National Laboratory. All rights reserved. - * Copyright (c) 2009-2012 Mellanox Technologies. All rights reserved. - * Copyright (c) 2014 Los Alamos National Security, LLC. All rights - * reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#include "ompi_config.h" - -#include "ompi/mca/coll/ml/coll_ml.h" -#include "ompi/mca/coll/ml/coll_ml_inlines.h" -#include "ompi/include/ompi/constants.h" -#include "ompi/mca/coll/ml/coll_ml_functions.h" - -static int mca_coll_ml_build_barrier_schedule( - mca_coll_ml_topology_t *topo_info, - mca_coll_ml_collective_operation_description_t - **coll_desc, - mca_coll_ml_module_t *ml_module) -{ - int i_hier, rc, i_fn, n_fcns, i, - n_hiers = topo_info->n_levels; - - bool call_for_top_func; - mca_bcol_base_module_t *bcol_module; - - mca_coll_ml_compound_functions_t *comp_fn; - mca_coll_ml_collective_operation_description_t *schedule; - - *coll_desc = (mca_coll_ml_collective_operation_description_t *) - malloc(sizeof(mca_coll_ml_collective_operation_description_t)); - - schedule = *coll_desc; - if (OPAL_UNLIKELY(NULL == schedule)) { - ML_ERROR(("Can't allocate memory.")); - rc = OMPI_ERR_OUT_OF_RESOURCE; - goto Barrier_Setup_Error; - } - - if (topo_info->global_highest_hier_group_index == - topo_info->component_pairs[n_hiers - 1].bcol_index) { - /* The process that is member of highest level subgroup - should call for top algorithms in addition to fan-in/out steps */ - call_for_top_func = true; - n_fcns = 2 * n_hiers - 1; /* Up + Top + Down */ - } else { - /* The process is not member of highest level subgroup, - as result it does not call for top algorithm, - but it calls for all fan-in/out steps */ - call_for_top_func = false; - n_fcns = 2 * n_hiers; - } - - if( ml_module->max_fn_calls < n_fcns ) { - ml_module->max_fn_calls = n_fcns; - } - - /* Set dependencies equal to number of hierarchies */ - schedule->n_fns = n_fcns; - schedule->topo_info = topo_info; - - /* Allocated the component function */ - schedule->component_functions = (struct mca_coll_ml_compound_functions_t *) - calloc(n_fcns, sizeof(struct mca_coll_ml_compound_functions_t)); - - if (OPAL_UNLIKELY(NULL == schedule->component_functions)) { - ML_ERROR(("Can't allocate memory.")); - rc = OMPI_ERR_OUT_OF_RESOURCE; - goto Barrier_Setup_Error; - } - for (i_fn = 0; i_fn < n_fcns; ++i_fn) { - i_hier = (i_fn < n_hiers ? i_fn : n_fcns - i_fn - 1); - comp_fn = &schedule->component_functions[i_fn]; - - /* The hierarchial level */ - comp_fn->h_level = i_hier; - bcol_module = GET_BCOL(topo_info, i_hier); - - /* The UP direction */ - if (1 + i_fn < n_hiers || (1 + i_fn == n_hiers && !call_for_top_func)) { - comp_fn->bcol_function = - bcol_module->filtered_fns_table[DATA_SRC_KNOWN][NON_BLOCKING][BCOL_FANIN][1][0][0]; - - if (NULL == comp_fn->bcol_function) { - ML_VERBOSE(10, ("no function available for BCOL_FANIN, NON_BLOCKING, DATA_SRC_KNOWN")); - rc = OMPI_ERR_NOT_AVAILABLE; - goto Barrier_Setup_Error; - } - - /* Each function call with index K is depended of all K-1 previous indices - - in simple words we will do sequential Fan-In calls */ - comp_fn->num_dependencies = (0 == i_fn) ? 0 : 1; - comp_fn->num_dependent_tasks = 1; - /* Init component function */ - strcpy(comp_fn->fn_name, "FANIN"); - /* On the highest level */ - } else if ((1 + i_fn == n_hiers && call_for_top_func)) { - comp_fn->bcol_function = - bcol_module->filtered_fns_table[DATA_SRC_KNOWN][NON_BLOCKING][BCOL_BARRIER][1][0][0]; - - if (NULL == comp_fn->bcol_function) { - ML_VERBOSE(10, ("no function available for BCOL_BARRIER, NON_BLOCKING, DATA_SRC_KNOWN")); - rc = OMPI_ERR_NOT_AVAILABLE; - goto Barrier_Setup_Error; - } - - /* Each function call with index K is depended of all K-1 previous indices - - in simple words we do sequential calls */ - comp_fn->num_dependencies = (1 == n_hiers) ? 0 : 1; /* All Fan-Ins */ - comp_fn->num_dependent_tasks = n_fcns - n_hiers; /* All Fan-Outs */ - - /* Init component function */ - strcpy(comp_fn->fn_name, "BARRIER"); - - ML_VERBOSE(10, ("func indx %d set to BARRIER %p", i_fn, comp_fn->bcol_function)); - - /* The DOWN direction */ - } else { - comp_fn->bcol_function = - bcol_module->filtered_fns_table[DATA_SRC_KNOWN][NON_BLOCKING][BCOL_FANOUT][1][0][0]; - - if (NULL == comp_fn->bcol_function) { - ML_VERBOSE(10, ("no function available for BCOL_FANOUT, NON_BLOCKING, DATA_SRC_KNOWN")); - rc = OMPI_ERR_NOT_AVAILABLE; - goto Barrier_Setup_Error; - } - - /* Each function call with index K is depended of all UP and TOP algths */ - comp_fn->num_dependencies = 1; - comp_fn->num_dependent_tasks = call_for_top_func ? 0 : - (i_fn + 1 == n_fcns ? 0 : 1); - - /* Init component function */ - strcpy(comp_fn->fn_name, "FANOUT"); - } - - ML_VERBOSE(10, ("func indx %d set to %p", i_fn, comp_fn->bcol_function)); - - if (comp_fn->num_dependent_tasks > 0) { - comp_fn->dependent_task_indices = (int *) calloc(comp_fn->num_dependent_tasks, sizeof(int)); - if (OPAL_UNLIKELY(NULL == comp_fn->dependent_task_indices)) { - ML_ERROR(("Can't allocate memory.")); - rc = OMPI_ERR_OUT_OF_RESOURCE; - goto Barrier_Setup_Error; - } - - /* All indexes follow after this one */ - for (i = 0; i < comp_fn->num_dependent_tasks; ++i) { - comp_fn->dependent_task_indices[i] = i_fn + i + 1; - } - } else { - comp_fn->dependent_task_indices = NULL; - } - - - /* No need completion func for Barrier */ - comp_fn->task_comp_fn = NULL; - - ML_VERBOSE(10, ("Setting collective [Barrier] fn_idx %d, n_of_this_type_in_a_row %d, " - "index_in_consecutive_same_bcol_calls %d.", - i_fn, comp_fn->constant_group_data.n_of_this_type_in_a_row, - comp_fn->constant_group_data.index_in_consecutive_same_bcol_calls)); - } - - rc = ml_coll_barrier_constant_group_data_setup(topo_info, schedule); - if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) { - ML_ERROR(("Failed to init const group data.")); - goto Barrier_Setup_Error; - } - - schedule->progress_type = 0; - - return OMPI_SUCCESS; - -Barrier_Setup_Error: - if (NULL != schedule->component_functions) { - free(schedule->component_functions); - schedule->component_functions = NULL; - } - - return rc; -} - -int ml_coll_hier_barrier_setup(mca_coll_ml_module_t *ml_module) -{ - int rc; - mca_coll_ml_topology_t *topo_info = - &ml_module->topo_list[ml_module->collectives_topology_map[ML_BARRIER][ML_SMALL_MSG]]; - - rc = mca_coll_ml_build_barrier_schedule(topo_info, - &ml_module->coll_ml_barrier_function, ml_module); - if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) { - /* Make sure to reset the barrier pointer to NULL */ - topo_info->hierarchical_algorithms[BCOL_BARRIER] = NULL; - - return rc; - } - - return OMPI_SUCCESS; -} diff --git a/ompi/mca/coll/ml/coll_ml_hier_algorithms_bcast_setup.c b/ompi/mca/coll/ml/coll_ml_hier_algorithms_bcast_setup.c deleted file mode 100644 index 627baf63441..00000000000 --- a/ompi/mca/coll/ml/coll_ml_hier_algorithms_bcast_setup.c +++ /dev/null @@ -1,851 +0,0 @@ -/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ -/* - * Copyright (c) 2009-2012 Oak Ridge National Laboratory. All rights reserved. - * Copyright (c) 2009-2012 Mellanox Technologies. All rights reserved. - * Copyright (c) 2014 Research Organization for Information Science - * and Technology (RIST). All rights reserved. - * Copyright (c) 2014 Los Alamos National Security, LLC. All rights - * reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#include "ompi_config.h" - -#include "ompi/mca/coll/ml/coll_ml.h" -#include "ompi/mca/coll/ml/coll_ml_inlines.h" -#include "ompi/include/ompi/constants.h" -#include "ompi/mca/coll/ml/coll_ml_functions.h" - -static int mca_coll_ml_task_comp_dynamic_root_small_message - (struct mca_coll_ml_task_status_t *task) { - - task->ml_coll_operation->variable_fn_params.root_flag = true; - - return OMPI_SUCCESS; -} - - -int mca_coll_ml_setup_scratch_vals(mca_coll_ml_compound_functions_t *func_list, - int *scratch_indx, int *scratch_num, int n_hiers) -{ - int i_hier, j_hier; - int cnt, value_to_set = 0; - bool prev_is_zero; - mca_coll_ml_compound_functions_t *comp_fn; - mca_bcol_base_module_t *prev_bcol = NULL, - *bcol_module; - - /* Calculate scratch numbers */ - for (i_hier = 0; i_hier < n_hiers; i_hier++) { - bcol_module = func_list[i_hier].constant_group_data.bcol_module; - if (IS_BCOL_TYPE_IDENTICAL(prev_bcol, bcol_module)) { - scratch_indx[i_hier] = scratch_indx[i_hier - 1] + 1; - } else { - scratch_indx[i_hier] = 0; - prev_bcol = bcol_module; - } - } - - --i_hier; - prev_is_zero = true; - - do { - if (prev_is_zero) { - value_to_set = scratch_indx[i_hier] + 1; - prev_is_zero = false; - } - - if (0 == scratch_indx[i_hier]) { - prev_is_zero = true; - } - - scratch_num[i_hier] = value_to_set; - --i_hier; - } while(i_hier >= 0); - - - /* Each hierarchy has one function to be implemented */ - /* this is the basic setup required of the bcol function */ - for (i_hier = 0; i_hier < n_hiers; i_hier++) { - /* We want to be generic, but on this stage we support only single - * bcol per hierarchy level - */ - comp_fn = &func_list[i_hier]; - comp_fn->h_level = i_hier; /* hierarchy level */ - - /* we can change this */ - comp_fn->task_comp_fn = mca_coll_ml_task_comp_dynamic_root_small_message; - /* assert(NULL != comp_fn->bcol_function); */ - /* Constants */ - comp_fn->constant_group_data.index_in_consecutive_same_bcol_calls = scratch_indx[i_hier]; - comp_fn->constant_group_data.n_of_this_type_in_a_row = scratch_num[i_hier]; - comp_fn->constant_group_data.n_of_this_type_in_collective = 0; - comp_fn->constant_group_data.index_of_this_type_in_collective = 0; - - ML_VERBOSE(10, ("Setting collective [bcast] fn_idx %d, index_in_consecutive_same_bcol_calls %d, n_of_this_type_in_a_row %d", - i_hier, - comp_fn->constant_group_data.index_in_consecutive_same_bcol_calls, - comp_fn->constant_group_data.n_of_this_type_in_a_row)); - } - - /* Fill the rest of constant data */ - for (i_hier = 0; i_hier < n_hiers; i_hier++) { - mca_bcol_base_module_t *current_bcol = - func_list[i_hier]. - constant_group_data.bcol_module; - cnt = 0; - for (j_hier = 0; j_hier < n_hiers; j_hier++) { - if (current_bcol == - func_list[j_hier]. - constant_group_data.bcol_module) { - func_list[j_hier].constant_group_data. - index_of_this_type_in_collective = cnt; - - cnt++; - } - } - func_list[i_hier].constant_group_data.n_of_this_type_in_collective = cnt; - } - - return OMPI_SUCCESS; - -} - -static void mca_coll_ml_zero_dep_bcast(mca_coll_ml_task_status_t *task_status, int index, mca_coll_ml_compound_functions_t *func) -{ - /* no real dependency, set everything to zero */ - task_status->rt_num_dependencies = 0; - task_status->rt_num_dependent_tasks = 0; - task_status->rt_dependent_task_indices = NULL; -} - -/* - * Build schedule without runtime attributes - */ -static int mca_coll_ml_build_bcast_dynamic_schedule_no_attributes( - mca_coll_ml_topology_t *topo_info, - mca_coll_ml_collective_operation_description_t **coll_desc, int bcol_func_index) -{ - - int n_hiers = topo_info->n_levels; - int i_hier, j_hier; - int cnt, value_to_set = 0; - int ret; /* exit code in case of error */ - bool prev_is_zero; - int *scratch_indx = NULL, - *scratch_num = NULL; - - mca_coll_ml_collective_operation_description_t *schedule; - mca_coll_ml_compound_functions_t *comp_fn; - mca_bcol_base_module_t *prev_bcol, - *bcol_module; - - *coll_desc = (mca_coll_ml_collective_operation_description_t *) - calloc(1, sizeof(mca_coll_ml_collective_operation_description_t)); - schedule = *coll_desc; - if (NULL == schedule) { - ML_ERROR(("Can't allocate memory.")); - return OMPI_ERR_OUT_OF_RESOURCE; - } - - scratch_indx = (int *) calloc(n_hiers, sizeof (int)); - if (NULL == scratch_indx) { - ML_ERROR(("Can't allocate memory.")); - ret = OMPI_ERR_OUT_OF_RESOURCE; - goto Bcast_Setup_Error; - } - - scratch_num = (int *) malloc(sizeof(int) * (n_hiers)); - if (NULL == scratch_num) { - ML_ERROR(("Can't allocate memory.")); - ret = OMPI_ERR_OUT_OF_RESOURCE; - goto Bcast_Setup_Error; - } - - prev_bcol = NULL; - - /* Calculate scratch numbers */ - for (i_hier = 0; i_hier < n_hiers; i_hier++) { - if (IS_BCOL_TYPE_IDENTICAL(prev_bcol, GET_BCOL(topo_info, i_hier))) { - scratch_indx[i_hier] = scratch_indx[i_hier - 1] + 1; - } else { - scratch_indx[i_hier] = 0; - prev_bcol = GET_BCOL(topo_info, i_hier); - } - } - - --i_hier; - prev_is_zero = true; - - do { - if (prev_is_zero) { - value_to_set = scratch_indx[i_hier] + 1; - prev_is_zero = false; - } - - if (0 == scratch_indx[i_hier]) { - prev_is_zero = true; - } - - scratch_num[i_hier] = value_to_set; - --i_hier; - } while(i_hier >= 0); - - /* Set dependencies equal to number of hierarchies */ - schedule->n_fns = n_hiers; - schedule->topo_info = topo_info; - schedule->progress_type = 0; /* Pasha: Not really defined, puting zero */ - - /* Allocated the component function */ - schedule->component_functions = (struct mca_coll_ml_compound_functions_t *) - calloc(n_hiers, sizeof(struct mca_coll_ml_compound_functions_t)); - if (NULL == schedule->component_functions) { - ML_ERROR(("Can't allocate memory.")); - ret = OMPI_ERR_OUT_OF_RESOURCE; - goto Bcast_Setup_Error; - } - - /* Each hierarchy has one function to be implemented */ - for (i_hier = 0; i_hier < n_hiers; i_hier++) { - /* We want to be generic, but on this stage we support only single - * bcol per hierarchy level - */ - comp_fn = &schedule->component_functions[i_hier]; - comp_fn->h_level = i_hier; /* hierarchy level */ - bcol_module = GET_BCOL(topo_info, i_hier); - /* Init component function */ - strcpy (comp_fn->fn_name, "BCAST_TEST_SMALL_DYNAMIC"); - comp_fn->num_dependent_tasks = 0; - comp_fn->num_dependencies = 0; - comp_fn->dependent_task_indices = NULL; - comp_fn->bcol_function = - bcol_module->filtered_fns_table[DATA_SRC_UNKNOWN][NON_BLOCKING][BCOL_BCAST][bcol_func_index][0][0]; - comp_fn->task_comp_fn = mca_coll_ml_task_comp_dynamic_root_small_message; - assert(NULL != comp_fn->bcol_function); - /* - comp_fn->bcol_function->progress_fn = - bcol_module->filtered_fns_table[BCOL_BCAST][1][0][0]; - */ - /* Constants */ - comp_fn->constant_group_data.bcol_module = bcol_module; - comp_fn->constant_group_data.index_in_consecutive_same_bcol_calls = scratch_indx[i_hier]; - comp_fn->constant_group_data.n_of_this_type_in_a_row = scratch_num[i_hier]; - comp_fn->constant_group_data.n_of_this_type_in_collective = 0; - comp_fn->constant_group_data.index_of_this_type_in_collective = 0; - - ML_VERBOSE(10, ("Setting collective [bcast] fn_idx %d, index_in_consecutive_same_bcol_calls %d, n_of_this_type_in_a_row %d", - i_hier, - comp_fn->constant_group_data.index_in_consecutive_same_bcol_calls, - comp_fn->constant_group_data.n_of_this_type_in_a_row)); - } - - /* Fill the rest of constant data */ - for (i_hier = 0; i_hier < n_hiers; i_hier++) { - mca_bcol_base_module_t *current_bcol = - schedule->component_functions[i_hier]. - constant_group_data.bcol_module; - cnt = 0; - for (j_hier = 0; j_hier < n_hiers; j_hier++) { - if (current_bcol == - schedule->component_functions[j_hier]. - constant_group_data.bcol_module) { - schedule->component_functions[j_hier]. - constant_group_data.index_of_this_type_in_collective = cnt; - ML_VERBOSE(10, ("Pasha: Setting collective [bcast small][count %d], fn_idx %d, collective_alg->functions[i].index_of_this_type_in_collective %d", - cnt, i_hier, - schedule->component_functions[j_hier]. - constant_group_data.index_of_this_type_in_collective)); - cnt++; - } - } - - schedule->component_functions[i_hier]. - constant_group_data.n_of_this_type_in_collective = cnt; - } - - schedule->task_setup_fn[COLL_ML_ROOT_TASK_FN] = mca_coll_ml_zero_dep_bcast; - schedule->task_setup_fn[COLL_ML_GENERAL_TASK_FN] = mca_coll_ml_zero_dep_bcast; - - MCA_COLL_ML_SET_SCHEDULE_ORDER_INFO(schedule); - - free(scratch_num); - free(scratch_indx); - - return OMPI_SUCCESS; - -Bcast_Setup_Error: - - if (NULL != scratch_indx) { - free(scratch_indx); - } - - if (NULL != scratch_num) { - free(scratch_num); - } - - if (NULL != schedule->component_functions) { - free(schedule->component_functions); - } - - return ret; -} - -static int mca_coll_ml_build_bcast_sequential_schedule_no_attributes( - mca_coll_ml_topology_t *topo_info, - mca_coll_ml_collective_operation_description_t **coll_desc, int bcol_func_index) -{ - - int n_hiers = topo_info->n_levels; - int i_hier, j_hier; - int cnt, value_to_set = 0; - int ret; /* exit code in case of error */ - bool prev_is_zero; - int *scratch_indx = NULL, - *scratch_num = NULL; - - mca_coll_ml_collective_operation_description_t *schedule; - mca_coll_ml_compound_functions_t *comp_fn; - mca_coll_ml_compound_functions_t *comp_fns_temp; - mca_bcol_base_module_t *prev_bcol, - *bcol_module; - - *coll_desc = (mca_coll_ml_collective_operation_description_t *) - calloc(1, sizeof(mca_coll_ml_collective_operation_description_t)); - schedule = *coll_desc; - if (NULL == schedule) { - ML_ERROR(("Can't allocate memory.")); - return OMPI_ERR_OUT_OF_RESOURCE; - } - - scratch_indx = (int *) calloc(n_hiers, sizeof (int)); - if (NULL == scratch_indx) { - ML_ERROR(("Can't allocate memory.")); - ret = OMPI_ERR_OUT_OF_RESOURCE; - goto Bcast_Setup_Error; - } - - scratch_num = (int *) malloc(sizeof(int) * (n_hiers)); - if (NULL == scratch_num) { - ML_ERROR(("Can't allocate memory.")); - ret = OMPI_ERR_OUT_OF_RESOURCE; - goto Bcast_Setup_Error; - } - - prev_bcol = NULL; - - /* Calculate scratch numbers */ - for (i_hier = 0; i_hier < n_hiers; i_hier++) { - if (IS_BCOL_TYPE_IDENTICAL(prev_bcol, GET_BCOL(topo_info, i_hier))) { - scratch_indx[i_hier] = scratch_indx[i_hier - 1] + 1; - } else { - scratch_indx[i_hier] = 0; - prev_bcol = GET_BCOL(topo_info, i_hier); - } - } - - --i_hier; - prev_is_zero = true; - - do { - if (prev_is_zero) { - value_to_set = scratch_indx[i_hier] + 1; - prev_is_zero = false; - } - - if (0 == scratch_indx[i_hier]) { - prev_is_zero = true; - } - - scratch_num[i_hier] = value_to_set; - --i_hier; - } while(i_hier >= 0); - - /* Set dependencies equal to number of hierarchies */ - schedule->n_fns = n_hiers; - schedule->topo_info = topo_info; - schedule->progress_type = 0; /* Pasha: Not really defined, puting zero - * Josh: would be nice to define it as "sequential" - * or "concurrent" - */ - - /* Allocated the component function */ - schedule->component_functions = (struct mca_coll_ml_compound_functions_t *) - calloc(n_hiers, sizeof(struct mca_coll_ml_compound_functions_t)); - if (NULL == schedule->component_functions) { - ML_ERROR(("Can't allocate memory.")); - ret = OMPI_ERR_OUT_OF_RESOURCE; - goto Bcast_Setup_Error; - } - /* Allocate the schedule list */ - schedule->comp_fn_arr = (struct mca_coll_ml_compound_functions_t **) - calloc(n_hiers,sizeof(struct mca_coll_ml_compound_functions_t *)); - if (NULL == schedule->comp_fn_arr) { - ML_ERROR(("Can't allocate memory.")); - ret = OMPI_ERR_OUT_OF_RESOURCE; - goto Bcast_Setup_Error; - } - /* Each hierarchy has one function to be implemented */ - /* this is the basic setup required of the bcol function */ - for (i_hier = 0; i_hier < n_hiers; i_hier++) { - /* We want to be generic, but on this stage we support only single - * bcol per hierarchy level - */ - comp_fn = &schedule->component_functions[i_hier]; - comp_fn->h_level = i_hier; /* hierarchy level */ - bcol_module = GET_BCOL(topo_info, i_hier); - /* Init component function */ - strcpy (comp_fn->fn_name, "BCAST_TEST_SMALL_SEQUENTIAL"); - - /* should be very simple, shouldn't require any kind of fancy dependencies set*/ - - comp_fn->bcol_function = - bcol_module->filtered_fns_table[DATA_SRC_KNOWN][NON_BLOCKING][BCOL_BCAST][bcol_func_index][0][0]; - - /* initialize the coll_fn_started flag to false */ - /*comp_fn->coll_fn_started = false;*/ - /* debug print */ - - /* - if(comp_fn->coll_fn_started){ - fprintf(stderr,"this statement is true\n"); - } else { - fprintf(stderr,"done setting to false \n"); - } - */ - - comp_fn->task_comp_fn = mca_coll_ml_task_comp_dynamic_root_small_message; - /* assert(NULL != comp_fn->bcol_function); */ - /* Constants */ - comp_fn->constant_group_data.bcol_module = bcol_module; - comp_fn->constant_group_data.index_in_consecutive_same_bcol_calls = scratch_indx[i_hier]; - comp_fn->constant_group_data.n_of_this_type_in_a_row = scratch_num[i_hier]; - comp_fn->constant_group_data.n_of_this_type_in_collective = 0; - comp_fn->constant_group_data.index_of_this_type_in_collective = 0; - - ML_VERBOSE(10, ("Setting collective [bcast] fn_idx %d, index_in_consecutive_same_bcol_calls %d, n_of_this_type_in_a_row %d", - i_hier, - comp_fn->constant_group_data.index_in_consecutive_same_bcol_calls, - comp_fn->constant_group_data.n_of_this_type_in_a_row)); - } - - /* Fill the rest of constant data */ - for (i_hier = 0; i_hier < n_hiers; i_hier++) { - mca_bcol_base_module_t *current_bcol = - schedule->component_functions[i_hier]. - constant_group_data.bcol_module; - cnt = 0; - for (j_hier = 0; j_hier < n_hiers; j_hier++) { - if (current_bcol == - schedule->component_functions[j_hier]. - constant_group_data.bcol_module) { - schedule->component_functions[j_hier]. - constant_group_data.index_of_this_type_in_collective = cnt; - ML_VERBOSE(10, ("Pasha: Setting collective [bcast small][count %d], fn_idx %d, collective_alg->functions[i].index_of_this_type_in_collective %d", - cnt, i_hier, - schedule->component_functions[j_hier]. - constant_group_data.index_of_this_type_in_collective)); - cnt++; - } - } - schedule->component_functions[i_hier]. - constant_group_data.n_of_this_type_in_collective = cnt; - } - /* Now that the functions have been set-up properly, we can simple permute the ordering a bit */ - for (i_hier = 0; i_hier < n_hiers; i_hier++) { - /* first one is trivial */ - comp_fns_temp = (struct mca_coll_ml_compound_functions_t *) - calloc(n_hiers, sizeof(struct mca_coll_ml_compound_functions_t)); - /* else we need to build the schedule */ - - for(j_hier = 0; j_hier < n_hiers; j_hier++) { - /* put the i_hier-th function first in the list */ - if( 0 == j_hier ) { - comp_fns_temp[j_hier] = schedule->component_functions[i_hier]; - } else if( j_hier <= i_hier ) { - comp_fns_temp[j_hier] = schedule->component_functions[j_hier-1]; - } else { - comp_fns_temp[j_hier] = schedule->component_functions[j_hier]; - } - } - /* now let's attach this list to our array of lists */ - schedule->comp_fn_arr[i_hier] = comp_fns_temp; - - } - - -#if 1 - /* I'm going to just loop over each schedule and - * set up the scratch indices, scratch numbers - * and other constant data - */ - for( i_hier = 1; i_hier < n_hiers; i_hier++) { - /* calculate the scratch indices and associated numbers */ - ret = mca_coll_ml_setup_scratch_vals(schedule->comp_fn_arr[i_hier], scratch_indx, - scratch_num, n_hiers); - if( OMPI_SUCCESS != ret ) { - ret = OMPI_ERROR; - goto Bcast_Setup_Error; - } - - } -#endif - - MCA_COLL_ML_SET_SCHEDULE_ORDER_INFO(schedule); - - free(scratch_num); - free(scratch_indx); - - return OMPI_SUCCESS; - -Bcast_Setup_Error: - - if (NULL != scratch_indx) { - free(scratch_indx); - } - - if (NULL != scratch_num) { - free(scratch_num); - } - - if (NULL != schedule->component_functions) { - free(schedule->component_functions); - } - - if (NULL != schedule->comp_fn_arr) { - free(schedule->comp_fn_arr); - } - free (schedule); - *coll_desc = NULL; - - return ret; -} - -static void mca_coll_ml_static_bcast_root(mca_coll_ml_task_status_t *task_status, int index, - mca_coll_ml_compound_functions_t *func) -{ - task_status->rt_num_dependencies = 0; - task_status->rt_num_dependent_tasks = 0; - task_status->rt_dependent_task_indices = 0; -} - -static void mca_coll_ml_static_bcast_non_root(mca_coll_ml_task_status_t *task_status, int index, - mca_coll_ml_compound_functions_t *func) -{ - /* Make active only the first level of hierarchy the gets the data, all the rest of levels - will be activated by dependency list */ - if (task_status->ml_coll_operation->variable_fn_params.root_route->level == index) { - task_status->rt_num_dependencies = 0; - task_status->rt_num_dependent_tasks = func->num_dependent_tasks; - task_status->rt_dependent_task_indices = func->dependent_task_indices; - task_status->ml_coll_operation->variable_fn_params.root = - task_status->ml_coll_operation->variable_fn_params.root_route->rank; - } else { - task_status->rt_num_dependencies = 1; /* wait for root */ - task_status->rt_num_dependent_tasks = 0; /* no depended task */ - task_status->rt_dependent_task_indices = NULL; /* NULL */ - } -} - -static int mca_coll_ml_build_bcast_known_schedule_no_attributes( - mca_coll_ml_topology_t *topo_info, - mca_coll_ml_collective_operation_description_t **coll_desc, int bcol_func_index) -{ - - int n_hiers = topo_info->n_levels; - int i_hier, j_hier; - int cnt, value_to_set = 0; - int ret; /* exit code in case of error */ - bool prev_is_zero; - int *scratch_indx = NULL, - *scratch_num = NULL; - - mca_coll_ml_collective_operation_description_t *schedule; - mca_coll_ml_compound_functions_t *comp_fn; - mca_bcol_base_module_t *prev_bcol, - *bcol_module; - - *coll_desc = (mca_coll_ml_collective_operation_description_t *) - calloc(1, sizeof(mca_coll_ml_collective_operation_description_t)); - schedule = *coll_desc; - if (NULL == schedule) { - ML_ERROR(("Can't allocate memory.")); - return OMPI_ERR_OUT_OF_RESOURCE; - } - - scratch_indx = (int *) calloc(n_hiers, sizeof (int)); - if (NULL == scratch_indx) { - ML_ERROR(("Can't allocate memory.")); - ret = OMPI_ERR_OUT_OF_RESOURCE; - goto Bcast_Setup_Error; - } - - scratch_num = (int *) malloc(sizeof(int) * (n_hiers)); - if (NULL == scratch_num) { - ML_ERROR(("Can't allocate memory.")); - ret = OMPI_ERR_OUT_OF_RESOURCE; - goto Bcast_Setup_Error; - } - - prev_bcol = NULL; - - /* Calculate scratch numbers */ - for (i_hier = 0; i_hier < n_hiers; i_hier++) { - if (IS_BCOL_TYPE_IDENTICAL(prev_bcol, GET_BCOL(topo_info, i_hier))) { - scratch_indx[i_hier] = scratch_indx[i_hier - 1] + 1; - } else { - prev_bcol = GET_BCOL(topo_info, i_hier); - } - } - - --i_hier; - prev_is_zero = true; - - do { - if (prev_is_zero) { - value_to_set = scratch_indx[i_hier] + 1; - prev_is_zero = false; - } - - if (0 == scratch_indx[i_hier]) { - prev_is_zero = true; - } - - scratch_num[i_hier] = value_to_set; - --i_hier; - } while(i_hier >= 0); - - /* Set dependencies equal to number of hierarchies */ - schedule->n_fns = n_hiers; - schedule->topo_info = topo_info; - schedule->progress_type = 0; /* Pasha: Not really defined, puting zero */ - - /* Allocated the component function */ - schedule->component_functions = (struct mca_coll_ml_compound_functions_t *) - calloc(n_hiers, sizeof(struct mca_coll_ml_compound_functions_t)); - if (NULL == schedule->component_functions) { - ML_ERROR(("Can't allocate memory.")); - ret = OMPI_ERR_OUT_OF_RESOURCE; - goto Bcast_Setup_Error; - } - - /* Each hierarchy has one function to be implemented */ - for (i_hier = 0; i_hier < n_hiers; i_hier++) { - int j; - /* We want to be generic, but on this stage we support only single - * bcol per hierarchy level - */ - comp_fn = &schedule->component_functions[i_hier]; - comp_fn->h_level = i_hier; /* hierarchy level */ - bcol_module = GET_BCOL(topo_info, i_hier); - /* Init component function */ - strcpy (comp_fn->fn_name, "BCAST_TEST_SMALL_STATIC"); - /* Hack for single layer of hierarchy */ - if (1 == n_hiers) { - comp_fn->num_dependent_tasks = n_hiers - 1; - comp_fn->num_dependencies = 0; - } else { - comp_fn->num_dependent_tasks = n_hiers; /* root will have n_hier - 1 depended tasks, non root zero*/ - comp_fn->num_dependencies = 0; /* root will have zero dependencies */ - } - - if (0 != comp_fn->num_dependent_tasks) { - comp_fn->dependent_task_indices = (int *)calloc(n_hiers, sizeof(int)); - for (j = 0; j < n_hiers; j++) { - comp_fn->dependent_task_indices[j] = j; /* only root will use this one */ - } - } - - comp_fn->bcol_function = - bcol_module->filtered_fns_table[DATA_SRC_KNOWN][NON_BLOCKING][BCOL_BCAST][bcol_func_index][0][0]; - - comp_fn->task_comp_fn = mca_coll_ml_task_comp_dynamic_root_small_message; - /* assert(NULL != comp_fn->bcol_function); */ - /* Constants */ - comp_fn->constant_group_data.bcol_module = bcol_module; - comp_fn->constant_group_data.index_in_consecutive_same_bcol_calls = scratch_indx[i_hier]; - comp_fn->constant_group_data.n_of_this_type_in_a_row = scratch_num[i_hier]; - comp_fn->constant_group_data.n_of_this_type_in_collective = 0; - comp_fn->constant_group_data.index_of_this_type_in_collective = 0; - - ML_VERBOSE(10, ("Setting collective [bcast] fn_idx %d, index_in_consecutive_same_bcol_calls %d, n_of_this_type_in_a_row %d", - i_hier, - comp_fn->constant_group_data.index_in_consecutive_same_bcol_calls, - comp_fn->constant_group_data.n_of_this_type_in_a_row)); - } - - /* Fill the rest of constant data */ - for (i_hier = 0; i_hier < n_hiers; i_hier++) { - mca_bcol_base_module_t *current_bcol = - schedule->component_functions[i_hier]. - constant_group_data.bcol_module; - cnt = 0; - for (j_hier = 0; j_hier < n_hiers; j_hier++) { - if (current_bcol == - schedule->component_functions[j_hier]. - constant_group_data.bcol_module) { - schedule->component_functions[j_hier]. - constant_group_data.index_of_this_type_in_collective = cnt; - ML_VERBOSE(10, ("Pasha: Setting collective [bcast small][count %d], fn_idx %d, collective_alg->functions[i].index_of_this_type_in_collective %d", - cnt, i_hier, - schedule->component_functions[j_hier]. - constant_group_data.index_of_this_type_in_collective)); - cnt++; - } - } - schedule->component_functions[i_hier]. - constant_group_data.n_of_this_type_in_collective = cnt; - } - - schedule->task_setup_fn[COLL_ML_ROOT_TASK_FN] = mca_coll_ml_static_bcast_root; - schedule->task_setup_fn[COLL_ML_GENERAL_TASK_FN] = mca_coll_ml_static_bcast_non_root; - - MCA_COLL_ML_SET_SCHEDULE_ORDER_INFO(schedule); - - free(scratch_num); - free(scratch_indx); - - return OMPI_SUCCESS; - -Bcast_Setup_Error: - - if (NULL != scratch_indx) { - free(scratch_indx); - } - - if (NULL != scratch_num) { - free(scratch_num); - } - - if (NULL != schedule->component_functions) { - free(schedule->component_functions); - } - free (schedule); - *coll_desc = NULL; - - return ret; -} - - - -#define BCAST_SMALL 1 -#define BCAST_LARGE 5 - -int ml_coll_hier_bcast_setup(mca_coll_ml_module_t *ml_module) -{ - /* Hierarchy Setup */ - int ret, i , size_code, alg; - int topo_index = 0; - mca_coll_ml_topology_t *topo_info = ml_module->topo_list; - - for (i = 0; i < ML_NUM_MSG; i++) { - - switch (i) { - case ML_SMALL_MSG: - size_code = BCAST_SMALL; - break; - case ML_LARGE_MSG: - size_code = BCAST_LARGE; - break; - default: - topo_info->hierarchical_algorithms[ML_BCAST] = NULL; - return OMPI_ERROR; - } - - alg = mca_coll_ml_component.coll_config[ML_BCAST][i].algorithm_id; - topo_index = ml_module->collectives_topology_map[ML_BCAST][alg]; - if (ML_UNDEFINED == alg || ML_UNDEFINED == topo_index) { - ML_ERROR(("No topology index or algorithm was defined")); - topo_info->hierarchical_algorithms[ML_BCAST] = NULL; - return OMPI_ERROR; - } - - switch (alg) { - case ML_BCAST_SMALL_DATA_KNOWN: - case ML_BCAST_LARGE_DATA_KNOWN: - ret = mca_coll_ml_build_bcast_known_schedule_no_attributes(&topo_info[topo_index], - &ml_module->coll_ml_bcast_functions[alg], size_code); - if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) { - ML_VERBOSE(10, ("Failed to setup static bcast")); - topo_info->hierarchical_algorithms[ML_BCAST] = NULL; - return ret; - } - break; - case ML_BCAST_SMALL_DATA_UNKNOWN: - case ML_BCAST_LARGE_DATA_UNKNOWN: - ret = mca_coll_ml_build_bcast_dynamic_schedule_no_attributes(&topo_info[topo_index], - &ml_module->coll_ml_bcast_functions[alg], size_code); - if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) { - ML_VERBOSE(10, ("Failed to setup dynamic bcast")); - topo_info->hierarchical_algorithms[ML_BCAST] = NULL; - return ret; - } - break; - case ML_BCAST_SMALL_DATA_SEQUENTIAL: - case ML_BCAST_LARGE_DATA_SEQUENTIAL: - ret = mca_coll_ml_build_bcast_sequential_schedule_no_attributes(&topo_info[topo_index], - &ml_module->coll_ml_bcast_functions[alg], size_code); - if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) { - ML_VERBOSE(10, ("Failed to setup static bcast")); - topo_info->hierarchical_algorithms[ML_BCAST] = NULL; - return ret; - } - break; - default: - topo_info->hierarchical_algorithms[ML_BCAST] = NULL; - return OMPI_ERROR; - } - assert(NULL != ml_module->coll_ml_bcast_functions[alg] && - NULL != ml_module->coll_ml_bcast_functions[alg]); - } - - topo_info->hierarchical_algorithms[BCOL_BCAST] = NULL; - return ret; -} - -void ml_coll_hier_bcast_cleanup(mca_coll_ml_module_t *ml_module) -{ - /* Hierarchy Setup */ - int i, alg; - int topo_index = 0; - mca_coll_ml_topology_t *topo_info = ml_module->topo_list; - - assert (NULL != ml_module); - - for (i = 0; i < ML_NUM_MSG; i++) { - - switch (i) { - case ML_SMALL_MSG: - case ML_LARGE_MSG: - break; - default: - topo_info->hierarchical_algorithms[ML_BCAST] = NULL; - return; - } - - alg = mca_coll_ml_component.coll_config[ML_BCAST][i].algorithm_id; - topo_index = ml_module->collectives_topology_map[ML_BCAST][alg]; - if (ML_UNDEFINED == alg || ML_UNDEFINED == topo_index) { - ML_ERROR(("No topology index or algorithm was defined")); - topo_info->hierarchical_algorithms[ML_BCAST] = NULL; - return; - } - - if (NULL != ml_module->coll_ml_bcast_functions[alg]) { - if (ML_BCAST_SMALL_DATA_KNOWN <= alg && ML_BCAST_LARGE_DATA_SEQUENTIAL >= alg) { - if (ml_module->coll_ml_bcast_functions[alg]->component_functions) { - free(ml_module->coll_ml_bcast_functions[alg]->component_functions); - ml_module->coll_ml_bcast_functions[alg]->component_functions = NULL; - } - - free(ml_module->coll_ml_bcast_functions[alg]); - ml_module->coll_ml_bcast_functions[alg] = NULL; - } else { - topo_info->hierarchical_algorithms[ML_BCAST] = NULL; - } - } - } -} diff --git a/ompi/mca/coll/ml/coll_ml_hier_algorithms_common_setup.c b/ompi/mca/coll/ml/coll_ml_hier_algorithms_common_setup.c deleted file mode 100644 index baab9624468..00000000000 --- a/ompi/mca/coll/ml/coll_ml_hier_algorithms_common_setup.c +++ /dev/null @@ -1,142 +0,0 @@ -/* - * Copyright (c) 2009-2012 Oak Ridge National Laboratory. All rights reserved. - * Copyright (c) 2009-2012 Mellanox Technologies. All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#include "ompi_config.h" -#include "ompi/mca/bcol/bcol.h" -#include "ompi/mca/coll/ml/coll_ml.h" -#include "ompi/mca/coll/ml/coll_ml_inlines.h" -#include "ompi/mca/coll/ml/coll_ml_hier_algorithms_common_setup.h" - -int mca_coll_ml_schedule_init_scratch(mca_coll_ml_topology_t *topo_info, - mca_coll_ml_schedule_hier_info_t *h_info, - int **out_scratch_indx, int **out_scratch_num) -{ - bool prev_is_zero; - int i, cnt; - int n_hiers = h_info->n_hiers; - int value_to_set = 0; - mca_bcol_base_module_t *prev_bcol = NULL; - int *scratch_indx, *scratch_num; - - scratch_indx = *out_scratch_indx = - (int *) calloc(n_hiers * 2, sizeof(int)); - if (NULL == *out_scratch_indx) { - ML_ERROR(("Can't allocate memory.")); - return OMPI_ERR_OUT_OF_RESOURCE; - } - - scratch_num = *out_scratch_num = - (int *) calloc(n_hiers * 2, sizeof(int)); - if (NULL == *out_scratch_num) { - ML_ERROR(("Can't allocate memory.")); - free(out_scratch_indx); - return OMPI_ERR_OUT_OF_RESOURCE; - } - - for (i = 0, cnt = 0; i < h_info->num_up_levels; ++i, ++cnt) { - if (IS_BCOL_TYPE_IDENTICAL(prev_bcol, GET_BCOL(topo_info, i))) { - scratch_indx[cnt] = scratch_indx[cnt - 1] + 1; - } else { - scratch_indx[cnt] = 0; - prev_bcol = GET_BCOL(topo_info, i); - } - } - - /* top - only if the proc arrive to highest_level_is_global_highest_level */ - if (h_info->call_for_top_function) { - if (IS_BCOL_TYPE_IDENTICAL(prev_bcol, GET_BCOL(topo_info, n_hiers - 1))) { - scratch_indx[cnt] = scratch_indx[cnt - 1] + 1; - } else { - scratch_indx[cnt] = 0; - prev_bcol = GET_BCOL(topo_info, n_hiers - 1); - } - ++cnt; - } - - /* going down */ - for (i = h_info->num_up_levels - 1; i >= 0; --i, ++cnt) { - if (IS_BCOL_TYPE_IDENTICAL(prev_bcol, GET_BCOL(topo_info, i))) { - scratch_indx[cnt] = scratch_indx[cnt - 1] + 1; - } else { - scratch_indx[cnt] = 0; - prev_bcol = GET_BCOL(topo_info, i); - } - } - - i = cnt - 1; - prev_is_zero = true; - - do { - if (prev_is_zero) { - value_to_set = scratch_indx[i] + 1; - prev_is_zero = false; - } - - if (0 == scratch_indx[i]) { - prev_is_zero = true; - } - - scratch_num[i] = value_to_set; - --i; - } while(i >= 0); - - return OMPI_SUCCESS; -} - -mca_coll_ml_collective_operation_description_t * - mca_coll_ml_schedule_alloc(mca_coll_ml_schedule_hier_info_t *h_info) -{ - mca_coll_ml_collective_operation_description_t *schedule = NULL; - - schedule = (mca_coll_ml_collective_operation_description_t *) - malloc(sizeof(mca_coll_ml_collective_operation_description_t)); - if (NULL == schedule) { - ML_ERROR(("Can't allocate memory.")); - return NULL; - } - - /* Set dependencies equal to number of hierarchies */ - schedule->n_fns = h_info->nbcol_functions; - schedule->progress_type = 0; - /* Allocated the component function */ - schedule->component_functions = (struct mca_coll_ml_compound_functions_t *) - calloc(h_info->nbcol_functions, sizeof(struct mca_coll_ml_compound_functions_t)); - if (NULL == schedule->component_functions) { - ML_ERROR(("Can't allocate memory.")); - free(schedule); - return NULL; - } - return schedule; -} - -void mca_coll_ml_call_types(mca_coll_ml_schedule_hier_info_t *h_info, - mca_coll_ml_collective_operation_description_t *schedule) -{ - int i_hier, j_hier, cnt; - mca_bcol_base_module_t *current_bcol = NULL; - - for (i_hier = 0; i_hier < h_info->n_hiers; i_hier++) { - current_bcol = - schedule->component_functions[i_hier]. - constant_group_data.bcol_module; - cnt = 0; - for (j_hier = 0; j_hier < h_info->n_hiers; j_hier++) { - if (current_bcol == - schedule->component_functions[j_hier]. - constant_group_data.bcol_module) { - schedule->component_functions[j_hier]. - constant_group_data.index_of_this_type_in_collective = cnt; - cnt++; - } - } - schedule->component_functions[i_hier]. - constant_group_data.n_of_this_type_in_collective = cnt; - } -} diff --git a/ompi/mca/coll/ml/coll_ml_hier_algorithms_common_setup.h b/ompi/mca/coll/ml/coll_ml_hier_algorithms_common_setup.h deleted file mode 100644 index ba5f9aea990..00000000000 --- a/ompi/mca/coll/ml/coll_ml_hier_algorithms_common_setup.h +++ /dev/null @@ -1,96 +0,0 @@ -/* - * Copyright (c) 2009-2012 Oak Ridge National Laboratory. All rights reserved. - * Copyright (c) 2009-2012 Mellanox Technologies. All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#ifndef MCA_COLL_ML_COMMON_SETUP_H -#define MCA_COLL_ML_COMMON_SETUP_H - -#include "ompi_config.h" - -#include "ompi/mca/bcol/bcol.h" -#include "ompi/mca/coll/ml/coll_ml.h" - -struct mca_coll_ml_schedule_hier_info_t { - int n_hiers; - int num_up_levels; - int nbcol_functions; - bool call_for_top_function; -}; -typedef struct mca_coll_ml_schedule_hier_info_t - mca_coll_ml_schedule_hier_info_t; - -#define MCA_COLL_ML_INIT_HIER_INFO(info, n_hr, g_hr, ml_module) \ -do { \ - info.n_hiers = n_hr; \ - if (g_hr == \ - ml_module->component_pairs[n_hr - 1].bcol_index) { \ - /* The process that is member of highest level subgroup \ - should call for top algorithms in addition to fan-in/out steps*/ \ - ML_VERBOSE(9, ("Setting top %d %d", n_hr, ml_module->component_pairs[g_hr - 1].bcol_index)); \ - info.call_for_top_function = true; \ - /* hier level run only top algorithm, so we deduct 1 */ \ - info.num_up_levels = n_hr - 1; \ - /* Top algorithm is called only once, so we deduct 1 */ \ - info.nbcol_functions = 2 * n_hr - 1; \ - } else { \ - ML_VERBOSE(9, ("not setting top %d %d", n_hr, ml_module->component_pairs[g_hr - 1].bcol_index)); \ - /* The process is not member of highest level subgroup, \ - as result it does not call for top algorithm, \ - but it calls for all fan-in/out steps */ \ - info.call_for_top_function = false; \ - info.num_up_levels = n_hr; \ - info.nbcol_functions = 2 * n_hr; \ - } \ -} while (0); - -#define MCA_COLL_ML_SET_COMP_FN(fn, level, module, s_level, \ - scratch_indx, scratch_num, qc, name) \ -do { \ - fn->h_level = level; /* hierarchy level */ \ - strcpy (fn->fn_name, "name"); \ - fn->num_dependent_tasks = 0; \ - fn->num_dependencies = 0; \ - fn->task_comp_fn = NULL; \ - fn->constant_group_data.bcol_module = GET_BCOL(module, level); \ - fn->constant_group_data.index_in_consecutive_same_bcol_calls = \ - scratch_indx[s_level];\ - fn->constant_group_data.n_of_this_type_in_a_row = scratch_num[s_level]; \ - fn->constant_group_data.n_of_this_type_in_collective = 0; \ - fn->constant_group_data.index_of_this_type_in_collective = 0; \ - fn->bcol_function = fn->constant_group_data.bcol_module-> \ - filtered_fns_table[qc[0]] \ - [qc[1]] \ - [qc[2]] \ - [qc[3]] \ - [qc[4]] \ - [qc[5]]; \ -} while (0); - -#define MCA_COLL_ML_QUERY_SIZE 6 - -#define MCA_COLL_ML_SET_QUERY(query, src_type, blocking, coll_type, index, other0, other1) \ -do { \ - query[0] = src_type; \ - query[1] = blocking; \ - query[2] = coll_type; \ - query[3] = index; \ - query[4] = other0; \ - query[5] = other1; \ -} while (0); - -int mca_coll_ml_schedule_init_scratch(mca_coll_ml_topology_t *topo_info, - mca_coll_ml_schedule_hier_info_t *h_info, - int **out_scratch_indx, int **out_scratch_num); - -mca_coll_ml_collective_operation_description_t* -mca_coll_ml_schedule_alloc(mca_coll_ml_schedule_hier_info_t *h_info); - -void mca_coll_ml_call_types(mca_coll_ml_schedule_hier_info_t *h_info, - mca_coll_ml_collective_operation_description_t *schedule); -#endif diff --git a/ompi/mca/coll/ml/coll_ml_hier_algorithms_reduce_setup.c b/ompi/mca/coll/ml/coll_ml_hier_algorithms_reduce_setup.c deleted file mode 100644 index 579f77d12b7..00000000000 --- a/ompi/mca/coll/ml/coll_ml_hier_algorithms_reduce_setup.c +++ /dev/null @@ -1,371 +0,0 @@ -/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ -/* - * Copyright (c) 2009-2012 Oak Ridge National Laboratory. All rights reserved. - * Copyright (c) 2009-2012 Mellanox Technologies. All rights reserved. - * Copyright (c) 2014 Los Alamos National Security, LLC. All rights - * reserved. - * Copyright (c) 2014 Research Organization for Information Science - * and Technology (RIST). All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#include "ompi_config.h" - -#include "ompi/mca/coll/ml/coll_ml.h" -#include "ompi/include/ompi/constants.h" -#include "ompi/mca/coll/ml/coll_ml_functions.h" -static int mca_coll_ml_task_comp_static_reduce - (struct mca_coll_ml_task_status_t *task) { - - task->ml_coll_operation->variable_fn_params.root_flag = true; - - return OMPI_SUCCESS; -} - -static void mca_coll_ml_static_reduce_non_root(mca_coll_ml_task_status_t *task_status, int index, - mca_coll_ml_compound_functions_t *func) -{ - /* I am not a root rank, but someone in my group is a root*/ - if (task_status->ml_coll_operation->variable_fn_params.root_route->level == index) { - task_status->rt_num_dependencies = func->num_dependencies; - task_status->rt_num_dependent_tasks = 0; - task_status->rt_dependent_task_indices = NULL; - task_status->ml_coll_operation->variable_fn_params.root = - task_status->ml_coll_operation->variable_fn_params.root_route->rank; - } else { - task_status->rt_num_dependencies = 0; - task_status->rt_num_dependent_tasks = 1; - task_status->rt_dependent_task_indices = &task_status->ml_coll_operation->variable_fn_params.root_route->level; - } - -} - -static void mca_coll_ml_static_reduce_root(mca_coll_ml_task_status_t *task_status, int index, - mca_coll_ml_compound_functions_t *func) -{ - task_status->rt_num_dependencies = func->num_dependencies; - task_status->rt_num_dependent_tasks = 0; - task_status->rt_dependent_task_indices = NULL; -} - -/* - * Fill up the collective descriptor - * - */ -static int mca_coll_ml_build_static_reduce_schedule( - mca_coll_ml_topology_t *topo_info, - mca_coll_ml_collective_operation_description_t **coll_desc) -{ - int i_hier, j_hier, n_fcns, - n_hiers = topo_info->n_levels; - int *scratch_indx = NULL, - *scratch_num = NULL; - int cnt, value_to_set = 0; - int ret = OMPI_SUCCESS; - bool prev_is_zero; - mca_coll_ml_compound_functions_t *comp_fns_temp; - mca_bcol_base_module_t *prev_bcol, - *bcol_module; - mca_coll_ml_compound_functions_t *comp_fn; - mca_coll_ml_collective_operation_description_t *schedule = NULL; - - *coll_desc = (mca_coll_ml_collective_operation_description_t *) - calloc(1, sizeof(mca_coll_ml_collective_operation_description_t)); - - schedule = *coll_desc; - if (OPAL_UNLIKELY(NULL == schedule)) { - ML_ERROR(("Can't allocate memory.")); - ret = OMPI_ERR_OUT_OF_RESOURCE; - goto Error; - } - - scratch_indx = (int *) calloc (n_hiers, sizeof (int)); - if (NULL == scratch_indx) { - ML_ERROR(("Can't allocate memory.")); - ret = OMPI_ERR_OUT_OF_RESOURCE; - goto Error; - } - - scratch_num = (int *) malloc(sizeof(int) * (n_hiers)); - if (NULL == scratch_num) { - ML_ERROR(("Can't allocate memory.")); - ret = OMPI_ERR_OUT_OF_RESOURCE; - goto Error; - } - - prev_bcol = NULL; - - /* Calculate scratch numbers */ - for (i_hier = 0; i_hier < n_hiers; i_hier++) { - if (IS_BCOL_TYPE_IDENTICAL(prev_bcol, GET_BCOL(topo_info, i_hier))) { - scratch_indx[i_hier] = scratch_indx[i_hier - 1] + 1; - } else { - scratch_indx[i_hier] = 0; - prev_bcol = GET_BCOL(topo_info, i_hier); - } - } - - --i_hier; - prev_is_zero = true; - - do { - if (prev_is_zero) { - value_to_set = scratch_indx[i_hier] + 1; - prev_is_zero = false; - } - - if (0 == scratch_indx[i_hier]) { - prev_is_zero = true; - } - - scratch_num[i_hier] = value_to_set; - --i_hier; - } while(i_hier >= 0); - - /* All hierarchies call one function, unlike other collectives */ - n_fcns = n_hiers; - - /* Set dependencies equal to number of hierarchies */ - schedule->n_fns = n_fcns; - schedule->topo_info = topo_info; - schedule->progress_type = 0; - /* Allocated the component function */ - schedule->component_functions = (struct mca_coll_ml_compound_functions_t *) - calloc(n_fcns, sizeof(struct mca_coll_ml_compound_functions_t)); - - if (OPAL_UNLIKELY(NULL == schedule->component_functions)) { - ML_ERROR(("Can't allocate memory.")); - ret = OMPI_ERR_OUT_OF_RESOURCE; - goto Error; - } - - - for (i_hier = 0; i_hier < n_hiers; ++i_hier) { - comp_fn = &schedule->component_functions[i_hier]; - - /* The hierarchial level */ - comp_fn->h_level = i_hier; - bcol_module = GET_BCOL(topo_info, i_hier); - - comp_fn->bcol_function = - bcol_module->filtered_fns_table[DATA_SRC_KNOWN][NON_BLOCKING][BCOL_REDUCE][1][0][0]; - - strcpy(comp_fn->fn_name, "REDUCE"); - ML_VERBOSE(10, ("func indx %d set to %p", i_hier, comp_fn->bcol_function)); - - - ML_VERBOSE(1,("In ML_REDUCE_SETUP .. looks fine here")); - /* No need completion func for Barrier */ - comp_fn->task_comp_fn = mca_coll_ml_task_comp_static_reduce; - - /* Constants */ - comp_fn->constant_group_data.bcol_module = bcol_module; - comp_fn->constant_group_data.index_in_consecutive_same_bcol_calls = scratch_indx[i_hier]; - comp_fn->constant_group_data.n_of_this_type_in_a_row = scratch_num[i_hier]; - comp_fn->constant_group_data.n_of_this_type_in_collective = 0; - comp_fn->constant_group_data.index_of_this_type_in_collective = 0; - - ML_VERBOSE(10, ("Setting collective [reduce] fn_idx %d, n_of_this_type_in_a_row %d, " - "index_in_consecutive_same_bcol_calls %d.", - i_hier, comp_fn->constant_group_data.n_of_this_type_in_a_row, - comp_fn->constant_group_data.index_in_consecutive_same_bcol_calls)); - } - - - /* Fill the rest of constant data */ - for (i_hier = 0; i_hier < n_hiers; i_hier++) { - mca_bcol_base_module_t *current_bcol = - schedule->component_functions[i_hier]. - constant_group_data.bcol_module; - cnt = 0; - for (j_hier = 0; j_hier < n_hiers; j_hier++) { - if (current_bcol == - schedule->component_functions[j_hier]. - constant_group_data.bcol_module) { - schedule->component_functions[j_hier]. - constant_group_data.index_of_this_type_in_collective = cnt; - cnt++; - } - } - schedule->component_functions[i_hier]. - constant_group_data.n_of_this_type_in_collective = cnt; - } - - /* Manju: Reduction should always use the fixed schedule. - * The subgroups that this process is leader should be executed first, then - * it should execute the subgroups where this process is not a leader, and - * then execute the subgroup that includes the root. - */ - - /* Allocate the schedule list */ - schedule->comp_fn_arr = (struct mca_coll_ml_compound_functions_t **) - calloc(n_hiers,sizeof(struct mca_coll_ml_compound_functions_t *)); - if (NULL == schedule->comp_fn_arr) { - ML_ERROR(("Can't allocate memory.")); - ret = OMPI_ERR_OUT_OF_RESOURCE; - goto Error; - } - - /* Now that the functions have been set-up properly, we can simple permute the ordering a bit */ - - for (i_hier = 0; i_hier < n_hiers; i_hier++) { - /* first one is trivial */ - int leader_hierarchy = 0; - int non_leader_hierarchy = 0; - int func_index; - - comp_fns_temp = (struct mca_coll_ml_compound_functions_t *) - calloc(n_hiers, sizeof(struct mca_coll_ml_compound_functions_t)); - - leader_hierarchy = 0; - non_leader_hierarchy = n_hiers - 2; - - for(j_hier = 0; j_hier < n_hiers - 1 ; j_hier++) { - - func_index = j_hier < i_hier ? j_hier : j_hier + 1; - /* I'm a leader for this group */ - if (0 == topo_info->component_pairs->subgroup_module->my_index) { - comp_fns_temp[leader_hierarchy++] = - schedule->component_functions[func_index]; - } - else { - comp_fns_temp[non_leader_hierarchy--] = - schedule->component_functions[func_index]; - } - } - - comp_fns_temp[j_hier] = schedule->component_functions[i_hier]; - /* now let's attach this list to our array of lists */ - schedule->comp_fn_arr[i_hier] = comp_fns_temp; - } - - /* Manju: Do we need this ? */ - - /* I'm going to just loop over each schedule and - * set up the scratch indices, scratch numbers - * and other constant data - */ - /* - for( i_hier = 1; i_hier < n_hiers; i_hier++) { - ret = mca_coll_ml_setup_scratch_vals(schedule->comp_fn_arr[i_hier], scratch_indx, - scratch_num, n_hiers); - if( OMPI_SUCCESS != ret ) { - ret = OMPI_ERROR; - goto Error; - } - - } - */ - - /* Do I need this ? */ - schedule->task_setup_fn[COLL_ML_ROOT_TASK_FN] = mca_coll_ml_static_reduce_root; - schedule->task_setup_fn[COLL_ML_GENERAL_TASK_FN] = mca_coll_ml_static_reduce_non_root; - - MCA_COLL_ML_SET_SCHEDULE_ORDER_INFO(schedule); - - /* reduce does not use the component functions so we no longer need this. see - * coll_ml_reduce.c:442 */ - free (schedule->component_functions); - schedule->component_functions = NULL; - - free(scratch_num); - free(scratch_indx); - - return OMPI_SUCCESS; - -Error: - if (NULL != scratch_num) { - free (scratch_num); - } - - if (NULL != scratch_indx) { - free (scratch_indx); - } - - if (NULL != schedule) { - if (NULL != schedule->component_functions) { - free(schedule->component_functions); - schedule->component_functions = NULL; - } - free (schedule); - *coll_desc = NULL; - } - - return ret; -} - - -int ml_coll_hier_reduce_setup(mca_coll_ml_module_t *ml_module) -{ - int alg, ret, topo_index=0; - mca_coll_ml_topology_t *topo_info = - &ml_module->topo_list[ml_module->collectives_topology_map[ML_REDUCE][ML_SMALL_MSG]]; - - if ( ml_module->max_fn_calls < topo_info->n_levels ) { - ml_module->max_fn_calls = topo_info->n_levels; - } - - - alg = mca_coll_ml_component.coll_config[ML_REDUCE][ML_SMALL_MSG].algorithm_id; - topo_index = ml_module->collectives_topology_map[ML_REDUCE][alg]; - if (ML_UNDEFINED == alg || ML_UNDEFINED == topo_index) { - ML_ERROR(("No topology index or algorithm was defined")); - topo_info->hierarchical_algorithms[ML_REDUCE] = NULL; - return OMPI_ERROR; - } - - ret = mca_coll_ml_build_static_reduce_schedule(&ml_module->topo_list[topo_index], - &ml_module->coll_ml_reduce_functions[alg]); - if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) { - ML_VERBOSE(10, ("Failed to setup static reduce")); - return ret; - } - - - return OMPI_SUCCESS; -} - -void ml_coll_hier_reduce_cleanup(mca_coll_ml_module_t *ml_module) -{ - int alg, i, topo_index=0; - mca_coll_ml_topology_t *topo_info = - &ml_module->topo_list[ml_module->collectives_topology_map[ML_REDUCE][ML_SMALL_MSG]]; - - if ( ml_module->max_fn_calls < topo_info->n_levels ) { - ml_module->max_fn_calls = topo_info->n_levels; - } - - - alg = mca_coll_ml_component.coll_config[ML_REDUCE][ML_SMALL_MSG].algorithm_id; - topo_index = ml_module->collectives_topology_map[ML_REDUCE][alg]; - if (ML_UNDEFINED == alg || ML_UNDEFINED == topo_index) { - ML_ERROR(("No topology index or algorithm was defined")); - topo_info->hierarchical_algorithms[ML_REDUCE] = NULL; - return; - } - - if (NULL == ml_module->coll_ml_reduce_functions[alg]) { - return; - } - - if (ml_module->coll_ml_reduce_functions[alg]->comp_fn_arr) { - for (i=0; itopo_list[topo_index].n_levels; i++) { - if (ml_module->coll_ml_reduce_functions[alg]->comp_fn_arr[i]) { - free(ml_module->coll_ml_reduce_functions[alg]->comp_fn_arr[i]); - ml_module->coll_ml_reduce_functions[alg]->comp_fn_arr[i] = NULL; - } - } - - free(ml_module->coll_ml_reduce_functions[alg]->comp_fn_arr); - ml_module->coll_ml_reduce_functions[alg]->comp_fn_arr = NULL; - } - - ml_module->coll_ml_reduce_functions[alg]->component_functions = NULL; - - free(ml_module->coll_ml_reduce_functions[alg]); - ml_module->coll_ml_reduce_functions[alg] = NULL; -} diff --git a/ompi/mca/coll/ml/coll_ml_hier_algorithms_setup.c b/ompi/mca/coll/ml/coll_ml_hier_algorithms_setup.c deleted file mode 100644 index 8751a25ab9a..00000000000 --- a/ompi/mca/coll/ml/coll_ml_hier_algorithms_setup.c +++ /dev/null @@ -1,539 +0,0 @@ -/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ -/* - * Copyright (c) 2009-2012 Oak Ridge National Laboratory. All rights reserved. - * Copyright (c) 2009-2012 Mellanox Technologies. All rights reserved. - * Copyright (c) 2014 Los Alamos National Security, LLC. All rights - * reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#include "ompi_config.h" - -#include "ompi/mca/coll/ml/coll_ml.h" -#include "ompi/mca/coll/ml/coll_ml_inlines.h" -#include "ompi/include/ompi/constants.h" - -int ml_coll_up_and_down_hier_setup(mca_coll_ml_module_t *ml_module, - mca_coll_ml_topology_t *topo_info, - int up_function_idx, - int top_function_idx, - int down_function_idx, - int collective) -{ - /* local variables */ - int i, j, cnt, value_to_set = -1; - int ret = OMPI_SUCCESS, num_up_levels; - - int num_hierarchies = topo_info->n_levels; - int global_high_hierarchy_index = topo_info->global_highest_hier_group_index; - - bool call_for_top_function, prev_is_zero; - - int *scratch_indx = NULL, *scratch_num = NULL; - - coll_ml_collective_description_t *collective_alg = NULL; - mca_bcol_base_module_t *bcol_module = NULL, - *prev_bcol = NULL; - - /* RLG: one blocking barrier collective algorithm - this is really a hack, - * we need to figure out how to do this in a bit more extensible - * manner. - */ - collective_alg = (coll_ml_collective_description_t *) - malloc(sizeof(coll_ml_collective_description_t)); - if (NULL == collective_alg) { - ML_ERROR(("Can't allocate memory.")); - ret = OMPI_ERR_OUT_OF_RESOURCE; - goto Error; - } - - /* am I a member of the highest level subgroup ? */ - if (global_high_hierarchy_index == - topo_info->component_pairs[num_hierarchies - 1].bcol_index) { - /* The process that is member of highest level subgroup - should call for top algorithms in addition to fan-in/out steps*/ - call_for_top_function = true; - /* hier level run only top algorithm, so we deduct 1 */ - num_up_levels = num_hierarchies - 1; - /* Top algorithm is called only once, so we deduct 1 */ - collective_alg->n_functions = 2 * num_hierarchies - 1; - } else { - /* The process is not member of highest level subgroup, - as result it does not call for top algorithm, - but it calls for all fan-in/out steps */ - call_for_top_function = false; - num_up_levels = num_hierarchies; - collective_alg->n_functions = 2 * num_hierarchies; - } - - ML_VERBOSE(10, ("high_index %d == bcol_index %d: Call top %d, num_up_levels %d, collective_alg->n_functions %d", - global_high_hierarchy_index, - topo_info->component_pairs[num_hierarchies - 1].bcol_index, - call_for_top_function, - num_up_levels, - collective_alg->n_functions )); - - /* allocate space for the functions */ - collective_alg->functions = (mca_bcol_base_function_t *) - calloc(collective_alg->n_functions, sizeof(mca_bcol_base_function_t)); - if( NULL == collective_alg->functions) { - ML_ERROR(("Can't allocate memory.")); - ret = OMPI_ERR_OUT_OF_RESOURCE; - goto Error; - } - - /* Algorithm Description: - * ===================== - * The algorithm used here for an N level system - * - up to level N-2, inclusive : up algorithm (fan in in barrier, reduce in Allreduce) - * - level N-1: top algorithm (barrier or allreduce) - * - level N-2, to level 0: down algorithm (fanout) - */ - - - /* Starting scratch_num and scratch_index calculations */ - /* =================================================== */ - - /* Figure out how many of the same bcols are called in a row. - * The index of the bcol in row we store in scratch_indx and - * the total number of bcols in the row we store in scratch_num */ - scratch_indx = (int *) calloc (2 * num_hierarchies, sizeof (int)); - if(NULL == scratch_indx) { - ML_ERROR(("Can't allocate memory.")); - ret = OMPI_ERR_OUT_OF_RESOURCE; - goto Error; - } - - scratch_num = (int *) malloc(sizeof(int) * (2 * num_hierarchies)); - if(NULL == scratch_num) { - ML_ERROR(("Can't allocate memory.")); - ret = OMPI_ERR_OUT_OF_RESOURCE; - goto Error; - } - - /* We go through all stages of algorithm (up, top, down) - * and calculate bcol index. If previous bcol is the same type as current - * one the counter index is increased, other way the index is zero */ - prev_bcol = NULL; - /* going up */ - for (i = 0, cnt = 0; i < num_up_levels; ++i, ++cnt) { - if (IS_BCOL_TYPE_IDENTICAL(prev_bcol, GET_BCOL(topo_info, i))) { - scratch_indx[cnt] = scratch_indx[cnt - 1] + 1; - } else { - scratch_indx[cnt] = 0; - prev_bcol = GET_BCOL(topo_info, i); - } - } - - /* top - only if the proc arrive to highest_level_is_global_highest_level */ - if (call_for_top_function) { - if (IS_BCOL_TYPE_IDENTICAL(prev_bcol, GET_BCOL(topo_info, num_hierarchies - 1))) { - scratch_indx[cnt] = scratch_indx[cnt - 1] + 1; - } else { - scratch_indx[cnt] = 0; - prev_bcol = GET_BCOL(topo_info, num_hierarchies - 1); - } - - ++cnt; - } - - /* going down */ - for (i = num_up_levels - 1; i >= 0; --i, ++cnt) { - if (IS_BCOL_TYPE_IDENTICAL(prev_bcol, GET_BCOL(topo_info, i))) { - scratch_indx[cnt] = scratch_indx[cnt - 1] + 1; - } else { - scratch_indx[cnt] = 0; - prev_bcol = GET_BCOL(topo_info, i); - } - } - - /* - * Calculate the number of the same bcols in row. - * We parse the index array, if index is zero - * it means that the row is done and we start - * to calculate next bcols row. The maximum number - * for the row is equal to maximal bcol index in the row + 1 - */ - i = cnt - 1; - prev_is_zero = true; - do { - if (prev_is_zero) { - value_to_set = scratch_indx[i] + 1; - prev_is_zero = false; - } - - if (0 == scratch_indx[i]) { - prev_is_zero = true; - } - - scratch_num[i] = value_to_set; - --i; - } while(i >= 0); - - /* =========================================================== */ - /* We are done with scratch_num and scratch_index calculations */ - - /* Setup function call for each algorithm step */ - cnt = 0; - /* up phase */ - for (i = 0; i < num_up_levels; i++) { - bcol_module = GET_BCOL(topo_info, i); - collective_alg->functions[cnt].fn_idx = up_function_idx; - collective_alg->functions[cnt].bcol_module = bcol_module; - collective_alg->functions[cnt].index_in_consecutive_same_bcol_calls = scratch_indx[cnt]; - collective_alg->functions[cnt].n_of_this_type_in_a_row = scratch_num[cnt]; - ML_VERBOSE(10, ("Setting collective [collective code %d][count %d], fn_idx %d, index_in_consecutive_same_bcol_calls %d, n_of_this_type_in_a_row %d", - collective, cnt, collective_alg->functions[cnt].fn_idx, - collective_alg->functions[cnt].index_in_consecutive_same_bcol_calls, - collective_alg->functions[cnt].n_of_this_type_in_a_row)); - ++cnt; - } - - /* top function */ - if (call_for_top_function) { - bcol_module = GET_BCOL(topo_info, num_hierarchies - 1); - collective_alg->functions[cnt].fn_idx = top_function_idx; - collective_alg->functions[cnt].bcol_module = bcol_module; - collective_alg->functions[cnt].index_in_consecutive_same_bcol_calls = scratch_indx[cnt]; - collective_alg->functions[cnt].n_of_this_type_in_a_row = scratch_num[cnt]; - ML_VERBOSE(10, ("Setting collective [collective code %d][count %d], fn_idx %d, index_in_consecutive_same_bcol_calls %d, n_of_this_type_in_a_row %d", - collective, cnt, collective_alg->functions[cnt].fn_idx, - collective_alg->functions[cnt].index_in_consecutive_same_bcol_calls, - collective_alg->functions[cnt].n_of_this_type_in_a_row)); - ++cnt; - } - - /* down phase*/ - for (i = num_up_levels - 1; i >= 0; i--) { - bcol_module = GET_BCOL(topo_info, i); - collective_alg->functions[cnt].fn_idx = down_function_idx; - collective_alg->functions[cnt].bcol_module = bcol_module; - collective_alg->functions[cnt].index_in_consecutive_same_bcol_calls = scratch_indx[cnt]; - collective_alg->functions[cnt].n_of_this_type_in_a_row = scratch_num[cnt]; - ML_VERBOSE(10, ("Setting collective [collective code %d][count %d], fn_idx %d, index_in_consecutive_same_bcol_calls %d, n_of_this_type_in_a_row %d", - collective, cnt, collective_alg->functions[cnt].fn_idx, - collective_alg->functions[cnt].index_in_consecutive_same_bcol_calls, - collective_alg->functions[cnt].n_of_this_type_in_a_row)); - ++cnt; - } - - /* figure out how many times this bcol is used in this collective call */ - for (i = 0; i < collective_alg->n_functions; i++) { - mca_bcol_base_module_t *current_bcol= - collective_alg->functions[i].bcol_module; - - cnt = 0; - for (j = 0; j < collective_alg->n_functions; ++j) { - if (current_bcol == - collective_alg->functions[j].bcol_module) { - collective_alg->functions[j].index_of_this_type_in_collective = cnt; - ML_VERBOSE(10, ("Pasha: Setting collective [collective code %d][count %d], fn_idx %d, collective_alg->functions[i].index_of_this_type_in_collective %d", - collective, cnt, i, - collective_alg->functions[j].index_of_this_type_in_collective)); - cnt++; - } - } - - collective_alg->functions[i].n_of_this_type_in_collective=cnt; - ML_VERBOSE(10, ("Pasha: Setting collective [collective code %d][count %d], fn_idx %d, collective_alg->functions[i].n_of_this_type_in_collective %d", - collective, cnt, i, - collective_alg->functions[i].n_of_this_type_in_collective)); - } - - /* set Barrier algorithm */ - topo_info->hierarchical_algorithms[collective] = collective_alg; - /* Setup maximum number function calls, it is used for resource allocation */ - ml_module->max_fn_calls = (collective_alg->n_functions > ml_module->max_fn_calls) ? - collective_alg->n_functions : ml_module->max_fn_calls; - /* Ishai: What is this n_buffers? I did not find where it is being used*/ - topo_info->hierarchical_algorithms[collective]->n_buffers = 1; - - /* Release temporary memories */ - if (NULL != scratch_indx) { - free(scratch_indx); - } - - if (NULL != scratch_num) { - free(scratch_num); - } - - return OMPI_SUCCESS; - -Error: - if (NULL != collective_alg->functions) { - free(collective_alg->functions); - } - - if (NULL != collective_alg) { - free(collective_alg); - } - - if (NULL != scratch_indx) { - free(scratch_indx); - } - - if (NULL != scratch_num) { - free(scratch_num); - } - - return ret; -} - -int ml_coll_hier_allreduce_setup(mca_coll_ml_module_t *ml_module) -{ - int topo_index = - ml_module->collectives_topology_map[ML_ALLREDUCE][ML_SMALL_DATA_ALLREDUCE]; - int ret = ml_coll_up_and_down_hier_setup(ml_module, - &ml_module->topo_list[topo_index], - BCOL_REDUCE, - BCOL_ALLREDUCE, - BCOL_BCAST, - BCOL_ALLREDUCE); - - if (OMPI_SUCCESS == ret) { - return ret; - } - - /* Make sure to reset the allreduce pointer to NULL */ - ml_module->topo_list[topo_index].hierarchical_algorithms[BCOL_ALLREDUCE] = NULL; - return ret; -} - -#if 0 -/* - * Manju: New setup function in coll_ml_hier_algorithms_reduce_setup.c - */ -/* Ishai: Reduce is not an hier algorithm (it is rooted) - it needs a different ML algorithm */ -/* Need to rewrite */ -int ml_coll_hier_reduce_setup(mca_coll_ml_module_t *ml_module) -{ - int topo_index = ml_module->collectives_topology_map[ML_ALLREDUCE][ML_SMALL_DATA_GATHER]; - /* Hierarchy Setup */ - int ret = ml_coll_up_and_down_hier_setup(ml_module, - &ml_module->topo_list[topo_index], - BCOL_REDUCE, /*NULL,*/ - BCOL_REDUCE, - BCOL_REDUCE, /*NULL,*/ - BCOL_REDUCE); - if (OMPI_SUCCESS == ret) { - return ret; - } - /* Make sure to reset the bcast pointer to NULL */ - ml_module->topo_list[topo_index].hierarchical_algorithms[BCOL_BCAST] = NULL; - return ret; -} -#endif - -int ml_coll_barrier_constant_group_data_setup( - mca_coll_ml_topology_t *topo_info, - mca_coll_ml_collective_operation_description_t *schedule) -{ - /* local variables */ - int i, j, cnt, value_to_set = -1, ret = OMPI_SUCCESS, num_up_levels, - num_hierarchies = topo_info->n_levels, n_functions = schedule->n_fns, - global_high_hierarchy_index = topo_info->global_highest_hier_group_index; - - bool call_for_top_function, prev_is_zero; - mca_coll_ml_utility_data_t *constant_group_data = NULL; - - int *scratch_indx = NULL, *scratch_num = NULL; - - mca_bcol_base_module_t *prev_bcol = NULL, - *bcol_module = NULL; - - /* Am I a member of the highest level subgroup ? */ - if (global_high_hierarchy_index == - topo_info->component_pairs[num_hierarchies - 1].bcol_index) { - /* The process that is member of highest level subgroup - should call for top algorithms in addition to fan-in/out steps*/ - call_for_top_function = true; - /* hier level run only top algorithm, so we deduct 1 */ - num_up_levels = num_hierarchies - 1; - } else { - /* The process is not member of highest level subgroup, - as result it does not call for top algorithm, - but it calls for all fan-in/out steps */ - call_for_top_function = false; - num_up_levels = num_hierarchies; - } - - /* Algorithm Description: - * ===================== - * The algorithm used here for an N level system - * - up to level N-2, inclusive : up algorithm (Fan-In in Barrier) - * - level N-1: top algorithm (Barrier algth) - * - level N-2, to level 0: down algorithm (Fan-out) - */ - - - /* Starting scratch_num and scratch_index calculations */ - /* =================================================== */ - - /* Figure out how many of the same bcols are called in a row. - * The index of the bcol in row we store in scratch_indx and - * the total number of bcols in the row we store in scratch_num */ - scratch_indx = (int *) calloc (2 * num_hierarchies, sizeof (int)); - if(NULL == scratch_indx) { - ML_ERROR(("Can't allocate memory.")); - ret = OMPI_ERR_OUT_OF_RESOURCE; - goto Const_Data_Setup_Error; - } - - scratch_num = (int *) malloc(sizeof(int) * (2 * num_hierarchies)); - if(NULL == scratch_num) { - ML_ERROR(("Can't allocate memory.")); - ret = OMPI_ERR_OUT_OF_RESOURCE; - goto Const_Data_Setup_Error; - } - - /* We go through all stages of algorithm (up, top, down) - * and calculate bcol index. If previous bcol is the same type as current - * one the counter index is increased, other way the index is zero */ - prev_bcol = NULL; - - /* Going up */ - for (i = 0, cnt = 0; i < num_up_levels; ++i, ++cnt) { - if (IS_BCOL_TYPE_IDENTICAL(prev_bcol, GET_BCOL(topo_info, i))) { - scratch_indx[cnt] = scratch_indx[cnt - 1] + 1; - } else { - scratch_indx[cnt] = 0; - prev_bcol = GET_BCOL(topo_info, i); - } - } - - /* Top - only if the proc arrive to highest_level_is_global_highest_level */ - if (call_for_top_function) { - if (IS_BCOL_TYPE_IDENTICAL(prev_bcol, GET_BCOL(topo_info, num_hierarchies - 1))) { - scratch_indx[cnt] = scratch_indx[cnt - 1] + 1; - } else { - scratch_indx[cnt] = 0; - prev_bcol = GET_BCOL(topo_info, num_hierarchies - 1); - } - - ++cnt; - } - - /* Going down */ - for (i = num_up_levels - 1; i >= 0; --i, ++cnt) { - if (IS_BCOL_TYPE_IDENTICAL(prev_bcol, GET_BCOL(topo_info, i))) { - scratch_indx[cnt] = scratch_indx[cnt - 1] + 1; - } else { - scratch_indx[cnt] = 0; - prev_bcol = GET_BCOL(topo_info, i); - } - } - - /* - * Calculate the number of the same bcols in row. - * We parse the index array, if index is zero - * it means that the row is done and we start - * to calculate next bcols row. The maximum number - * for the row is equal to maximal bcol index in the row + 1 - */ - i = cnt - 1; - prev_is_zero = true; - do { - if (prev_is_zero) { - value_to_set = scratch_indx[i] + 1; - prev_is_zero = false; - } - - if (0 == scratch_indx[i]) { - prev_is_zero = true; - } - - scratch_num[i] = value_to_set; - --i; - } while(i >= 0); - - /* =========================================================== */ - /* We are done with scratch_num and scratch_index calculations */ - - /* Setup function call for each algorithm step */ - cnt = 0; - - /* Up phase */ - for (i = 0; i < num_up_levels; ++i) { - bcol_module = GET_BCOL(topo_info, i); - constant_group_data = &schedule->component_functions[cnt].constant_group_data; - - constant_group_data->bcol_module = bcol_module; - constant_group_data->index_in_consecutive_same_bcol_calls = scratch_indx[cnt]; - constant_group_data->n_of_this_type_in_a_row = scratch_num[cnt]; - - ++cnt; - } - - /* Top function */ - if (call_for_top_function) { - bcol_module = GET_BCOL(topo_info, num_hierarchies - 1); - constant_group_data = &schedule->component_functions[cnt].constant_group_data; - - constant_group_data->bcol_module = bcol_module; - constant_group_data->index_in_consecutive_same_bcol_calls = scratch_indx[cnt]; - constant_group_data->n_of_this_type_in_a_row = scratch_num[cnt]; - - ++cnt; - } - - /* Down phase */ - for (i = num_up_levels - 1; i >= 0; --i) { - bcol_module = GET_BCOL(topo_info, i); - constant_group_data = &schedule->component_functions[cnt].constant_group_data; - - constant_group_data->bcol_module = bcol_module; - - /* All Fan-Outs will be done in parallel */ - constant_group_data->index_in_consecutive_same_bcol_calls = 0; - constant_group_data->n_of_this_type_in_a_row = 1; - - ++cnt; - } - - /* Figure out how many times this bcol is used in this collective call */ - for (i = 0; i < n_functions; ++i) { - struct mca_coll_ml_compound_functions_t *component_functions = - schedule->component_functions; - mca_bcol_base_module_t *current_bcol = - component_functions[i].constant_group_data.bcol_module; - - /* silence clang warning about possible NULL dereference of component_functions. - * this case is a developer error if it occurs */ - assert (NULL != component_functions && NULL != constant_group_data); - - cnt = 0; - for (j = 0; j < n_functions; ++j) { - if (current_bcol == - component_functions[j].constant_group_data.bcol_module) { - constant_group_data->index_of_this_type_in_collective = cnt; - - ++cnt; - } - } - - component_functions[i].constant_group_data.n_of_this_type_in_collective = cnt; - } - - MCA_COLL_ML_SET_SCHEDULE_ORDER_INFO(schedule); - - /* Release temporary memories */ - free(scratch_num); - free(scratch_indx); - - return OMPI_SUCCESS; - -Const_Data_Setup_Error: - if (NULL != scratch_indx) { - free(scratch_indx); - } - - if (NULL != scratch_num) { - free(scratch_num); - } - - return ret; -} diff --git a/ompi/mca/coll/ml/coll_ml_inlines.h b/ompi/mca/coll/ml/coll_ml_inlines.h deleted file mode 100644 index ea326508992..00000000000 --- a/ompi/mca/coll/ml/coll_ml_inlines.h +++ /dev/null @@ -1,637 +0,0 @@ -/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ -/* - * Copyright (c) 2009-2012 Oak Ridge National Laboratory. All rights reserved. - * Copyright (c) 2009-2012 Mellanox Technologies. All rights reserved. - * Copyright (c) 2013 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2015 Los Alamos National Security, LLC. All rights - * reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ -/** @file */ - -#ifndef MCA_COLL_ML_INLINES_H -#define MCA_COLL_ML_INLINES_H - -#include "ompi_config.h" - -BEGIN_C_DECLS - -static inline __opal_attribute_always_inline__ int ml_fls(int num) -{ - int i = 1; - int j = 0; - - if (0 == num) { - return 0; - } - - while (i < num) { - i *= 2; - j++; - } - - if (i > num) { - j--; - } - - return j; -} - -static inline __opal_attribute_always_inline__ - int mca_coll_ml_buffer_recycling(mca_coll_ml_collective_operation_progress_t *ml_request) -{ - mca_coll_ml_module_t *ml_module = (mca_coll_ml_module_t *)ml_request->coll_module; - mca_bcol_base_memory_block_desc_t *ml_memblock = ml_module->payload_block; - uint64_t bank_index = ml_request->fragment_data.buffer_desc->bank_index; - int rc; - - opal_atomic_add(&ml_memblock->bank_release_counters[bank_index], 1); - - /* Check if the bank is ready for recycling */ - if (ml_memblock->bank_release_counters[bank_index] == - ml_memblock->num_buffers_per_bank ) { - ml_memblock->ready_for_memsync[bank_index] = true; - - ML_VERBOSE(10, ("Sync count %d, bank %d", ml_memblock->memsync_counter, bank_index)); - assert(ml_memblock->bank_is_busy); - if (ml_memblock->memsync_counter == (int)bank_index) { - while(ml_memblock->ready_for_memsync[ml_memblock->memsync_counter]) { - ML_VERBOSE(10, ("Calling for service barrier: ml_buffer_index - %d %d %d == %d.", - ml_request->fragment_data.buffer_desc->buffer_index, - ml_memblock->memsync_counter, - ml_memblock->bank_release_counters[ml_memblock->memsync_counter], - ml_memblock->num_buffers_per_bank)); - /* Setting the ready flag to 0 - unready - done */ - ml_memblock->ready_for_memsync[ml_memblock->memsync_counter] = false; - - rc = mca_coll_ml_memsync_intra(ml_module, ml_memblock->memsync_counter); - if (OMPI_SUCCESS != rc) { - ML_ERROR(("Failed to start memory sync !!!")); - return rc; - } - - opal_atomic_add(&ml_memblock->memsync_counter, 1); - if (ml_memblock->memsync_counter == (int)ml_memblock->num_banks) { - ml_memblock->memsync_counter = 0; - } - ML_VERBOSE(10, ("After service barrier.")); - } - } else { - ML_VERBOSE(10, ("Out of order %d", ml_memblock->memsync_counter)); - } - } - - return OMPI_SUCCESS; -} - -static inline __opal_attribute_always_inline__ int coll_ml_fragment_completion_processing( - mca_coll_ml_collective_operation_progress_t *coll_op) -{ - /* local variables */ - int ret = OMPI_SUCCESS; - size_t bytes_in_this_frag; - struct full_message_t *full_msg_desc = coll_op->fragment_data.message_descriptor; - bool ready_to_release = true, out_of_resource = false; - - ML_VERBOSE(10, ("Coll_op %p processing completion", coll_op)); - /* Call unpack/pack function */ - if (OPAL_LIKELY(NULL != coll_op->process_fn)) { - ret = coll_op->process_fn(coll_op); - switch(ret) { - case OMPI_SUCCESS: - ML_VERBOSE(10, ("unpack done")); - ready_to_release = true; - break; - case ORTE_ERR_NO_MATCH_YET: - ML_VERBOSE(10, ("unexpected packet")); - ready_to_release = false; - break; - default: - ML_ERROR(("Error, unexpected error code %d", ret)); - return ret; - } - } - - bytes_in_this_frag = coll_op->fragment_data.fragment_size; - - ML_VERBOSE(10, ("Delivered %d bytes in frag %d total %d", - full_msg_desc->n_bytes_delivered, - bytes_in_this_frag, - full_msg_desc->n_bytes_total)); - - /* check for full message completion */ - if(full_msg_desc->n_bytes_delivered + bytes_in_this_frag == - full_msg_desc->n_bytes_total) { - /* message complete - don't update number of bytes delivered, just - * mark the message complete - */ - full_msg_desc->n_bytes_delivered += bytes_in_this_frag; - - /* decrement the number of fragments */ - full_msg_desc->n_active--; - - ML_VERBOSE(10, ("Signaling completion")); - - /* here we need to be sure that we point to the first fragment only */ - ompi_request_complete(&(coll_op->fragment_data.message_descriptor->super), true); - coll_op->fragment_data.message_descriptor->super.req_status.MPI_ERROR = OMPI_SUCCESS; - } else { - assert(NULL != coll_op->fragment_data.buffer_desc); - /* update the number of bytes delivered */ - full_msg_desc->n_bytes_delivered += bytes_in_this_frag; - /* decrement the number of fragments */ - full_msg_desc->n_active--; - /* here we need to start the next fragment */ - ML_VERBOSE(10, ("Launch frags for %p", coll_op)); - if (full_msg_desc->n_bytes_scheduled < full_msg_desc->n_bytes_total) { - ret = coll_op->fragment_data.message_descriptor->fragment_launcher(coll_op); - if (OPAL_UNLIKELY(OMPI_ERR_TEMP_OUT_OF_RESOURCE == ret)) { - out_of_resource = true; - } else if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) { - ML_VERBOSE(10, ("Failed to launch fragment")); - return ret; - } - } - } - - if (ready_to_release) { - /* Check if we have to recycle memory. - * Note: It is safe to recycle ML buffers since the ML buffer data - * already was unpacked to user buffer - */ - if (NULL != coll_op->fragment_data.buffer_desc) { - ret = mca_coll_ml_buffer_recycling(coll_op); - if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) { - return ret; - } - } - /* if this is not fragment 0, return fragment to the free list. - * fragment 0 will be returned in mca_ml_request_free() which - * is called from the MPI wait() and test() routines. - * We can recover the pointer to the fragement descriptor from - * the MPI level request object, wich is the first element - * in the fragment descriptor. - */ - /* I contend that this is a bug. This is not the right way to check - * for the first fragment as it assumes that the first fragment would always - * for any collective have zero as the first offset or that other subsequent - * fragments would not. It is not safe to assume this. The correct check is - * the following one - */ - - ML_VERBOSE(10, ("Master ? %p %d", coll_op, coll_op->fragment_data.offset_into_user_buffer)); - /* This check is in fact a bug. Not the correct definiton of first - * fragment. First fragment is the only fragment that satisfies the - * following criteria - */ - /*if (0 != coll_op->fragment_data.offset_into_user_buffer && - !out_of_resource) { - */ - if (((&coll_op->full_message != coll_op->fragment_data.message_descriptor) && - !out_of_resource) || IS_COLL_SYNCMEM(coll_op)) { - /* non-zero offset ==> this is not fragment 0 */ - CHECK_AND_RECYCLE(coll_op); - } - } - - /* return */ - return OMPI_SUCCESS; -} - -/* task completion */ -static inline __opal_attribute_always_inline__ int coll_ml_task_dependency_processing( - mca_coll_ml_task_status_t *task) -{ - /* update dependencies */ - mca_coll_ml_collective_operation_progress_t *my_schedule_instance = - task->ml_coll_operation; - int n_dependent_tasks = task->rt_num_dependent_tasks; - int dep_task; - - for (dep_task = 0; dep_task < n_dependent_tasks; dep_task++) - { - int task_index; - task_index = task->rt_dependent_task_indices[dep_task]; - my_schedule_instance->dag_description.status_array[task_index].n_dep_satisfied++; - } - - /* return */ - return OMPI_SUCCESS; -} - -/* collective task completion processing - - * "task" may be removed from list in this routine. - * Thread safety is assumed to be handled outside this routine. - */ -static inline __opal_attribute_always_inline__ int mca_coll_ml_task_completion_processing( - mca_coll_ml_task_status_t **task_status_g, opal_list_t *list) -{ - /* local variables */ - int ret = OMPI_SUCCESS; - mca_coll_ml_task_status_t *task_status = *task_status_g; - - mca_coll_ml_collective_operation_progress_t *coll_op = - task_status->ml_coll_operation; - - /* Pasha: Since all our collectives so far use the root - flag, I replacing the call for custom call back function - with setting root_flag. - If we will see that we need some custom functionality, - we will enable it later. - */ - - task_status->ml_coll_operation->variable_fn_params.root_flag = true; - -#if 0 - /* process task completion function, - if any was defined */ - if (OPAL_LIKELY(NULL != task_status->task_comp_fn)) { - ret = task_status->task_comp_fn(task_status); - if (ret != OMPI_SUCCESS) { - return ret; - } - } -#endif - - /* update dependencies */ - ret = coll_ml_task_dependency_processing(task_status); - if (ret != OMPI_SUCCESS) { - ML_VERBOSE(3,("Failed to coll_ml_task_dependency_processing")); - return ret; - } - - /* process task completion function, - if any was defined */ - if (OPAL_LIKELY(NULL != task_status->task_comp_fn)) { - ret = task_status->task_comp_fn(task_status); - if (ret != OMPI_SUCCESS) { - ML_VERBOSE(3,("Failed to task_comp_fn")); - return ret; - } - } - - /* remove the descriptor from the incomplete list - (Pasha: if the list was provided) */ - /* No need to put this an any new list - it is associcated - * with the mca_coll_ml_collective_operation_progress_t - * descriptor already - */ - - if (NULL != list) { - (*task_status_g) = (mca_coll_ml_task_status_t *) - opal_list_remove_item(list, (opal_list_item_t *)(task_status)); - } - - /* update completion counter */ - coll_op->dag_description.num_tasks_completed++; - - if(coll_op->dag_description.num_tasks_completed == - coll_op->coll_schedule->n_fns) - { - /* the actual fragment descriptor is not on any list, as - * we can get at it from the task descriptors - */ - ret = coll_ml_fragment_completion_processing(coll_op); - if (OMPI_SUCCESS != ret) { - ML_VERBOSE(3,("Failed to coll_ml_fragment_completion_processing")); - return ret; - } - } - - /* return */ - return ret; -} - -static inline __opal_attribute_always_inline__ int mca_coll_ml_generic_collectives_append_to_queue( - mca_coll_ml_collective_operation_progress_t *op_prog, - mca_coll_ml_task_setup_fn_t task_setup) -{ - int fn_index; - mca_coll_ml_collective_operation_description_t *op_desc = - op_prog->coll_schedule; - mca_coll_ml_compound_functions_t *func = NULL; - mca_coll_ml_task_status_t *task_status = NULL; - mca_coll_ml_component_t *cm = &mca_coll_ml_component; - - ML_VERBOSE(9, ("Calling mca_coll_ml_generic_collectives_launcher")); - - /* Init all tasks, before we start them */ - for (fn_index = 0; fn_index < op_desc->n_fns; fn_index++) { - func = &op_desc->component_functions[fn_index]; - task_status = &op_prog->dag_description.status_array[fn_index]; - - ML_VERBOSE(9, ("Processing function index %d", fn_index)); - - assert(NULL != func); - - /* Init task status */ - task_status->n_dep_satisfied = 0; /* start from zero */ - task_status->bcol_fn = func->bcol_function; - /* setup run time parametres */ - /* Pasha: do we need the if proctection ? */ - if (OPAL_LIKELY(NULL != task_setup)) { - task_setup(task_status, fn_index, func); - } - - /* the pointer to operation progress supposed to be set during - construction time. Just want to make sure that it is ok */ - assert(task_status->ml_coll_operation == op_prog); - - /* We assume that all pointer to functions are defined and it - is not reson to check for null */ - assert(NULL != func->bcol_function->coll_fn); - - /* In order to preserve ordering on all ranks we have to add it to tail */ - /* TBD: Need to review the way we launch fragments */ - ML_VERBOSE(9, ("The task %p dependency is %d, appending it on pending list", - (void *)task_status, func->num_dependencies)); - OPAL_THREAD_LOCK(&(mca_coll_ml_component.pending_tasks_mutex)); - opal_list_append(&cm->pending_tasks, (opal_list_item_t *)task_status); - OPAL_THREAD_UNLOCK(&(mca_coll_ml_component.pending_tasks_mutex)); - } - - ML_VERBOSE(9, ("Collective was launched !")); - return OMPI_SUCCESS; -} - -static inline __opal_attribute_always_inline__ int mca_coll_ml_generic_collectives_launcher( - mca_coll_ml_collective_operation_progress_t *op_prog, - mca_coll_ml_task_setup_fn_t task_setup) -{ - int fn_index; - int rc, ret; - mca_coll_ml_collective_operation_description_t *op_desc = - op_prog->coll_schedule; - mca_coll_ml_compound_functions_t *func = NULL; - mca_coll_ml_task_status_t *task_status = NULL; - mca_coll_ml_component_t *cm = &mca_coll_ml_component; - - ML_VERBOSE(9, ("Calling mca_coll_ml_generic_collectives_launcher")); - - /* Init all tasks, before we start them */ - for (fn_index = 0; fn_index < op_desc->n_fns; fn_index++) { - func = &op_desc->component_functions[fn_index]; - task_status = &op_prog->dag_description.status_array[fn_index]; - - ML_VERBOSE(9, ("Processing function index %d", fn_index)); - - assert(NULL != func); - - /* Init task status */ - task_status->n_dep_satisfied = 0; /* start from zero */ - /* task_status->my_index_in_coll_schedule = fn_index; - pasha: the value is set during init */ - task_status->bcol_fn = func->bcol_function; - /* Pasha: disabling support for custom complition functions - task_status->task_comp_fn = func->task_comp_fn; - */ - - /* setup run time parametres */ - /* Pasha: do we need the if proctection ? */ - if (OPAL_LIKELY(NULL != task_setup)) { - task_setup(task_status, fn_index, func); - } - - /* the pointer to operation progress supposed to be set during - construction time. Just want to make sure that it is ok */ - assert(task_status->ml_coll_operation == op_prog); - /* Task status is done */ - - /* launch the task and put it on corresponding list (if required) */ - - /* We assume that all pointer to functions are defined and it - is not reason to check for null */ - assert(NULL != func->bcol_function->coll_fn); - } - - /* try to start startable */ - for (fn_index = 0; fn_index < op_desc->n_fns; fn_index++) { - func = &op_desc->component_functions[fn_index]; - task_status = &op_prog->dag_description.status_array[fn_index]; - /* fire the collective immediately if it has no dependencies */ - if (0 == task_status->rt_num_dependencies) { - rc = func->bcol_function->coll_fn(&op_prog->variable_fn_params, - /* Pasha: Need to update the prototype of the func, - right now it is ugly hack for compilation */ - (struct mca_bcol_base_function_t *)&func->constant_group_data); - switch(rc) { - case BCOL_FN_NOT_STARTED: - /* put it on pending list */ - ML_VERBOSE(9, ("Call to bcol collecitive return BCOL_FN_NOT_STARTED, putting the task on pending list")); - OPAL_THREAD_LOCK(&(mca_coll_ml_component.pending_tasks_mutex)); - opal_list_append(&cm->pending_tasks, (opal_list_item_t *)task_status); - OPAL_THREAD_UNLOCK(&(mca_coll_ml_component.pending_tasks_mutex)); - break; - case BCOL_FN_STARTED: - /* put it on started list */ - ML_VERBOSE(9, ("Call to bcol collecitive return BCOL_FN_STARTED, puting the task on active list")); - OPAL_THREAD_LOCK(&(mca_coll_ml_component.active_tasks_mutex)); - opal_list_append(&cm->active_tasks, (opal_list_item_t *)task_status); - OPAL_THREAD_UNLOCK(&(mca_coll_ml_component.active_tasks_mutex)); - break; - case BCOL_FN_COMPLETE: - /* the task is done ! lets start relevant dependencies */ - ML_VERBOSE(9, ("Call to bcol collecitive return BCOL_FN_COMPLETE")); - /* the task does not belong to any list, yes. So passing NULL */ - ret = mca_coll_ml_task_completion_processing(&task_status, NULL); - if (OMPI_SUCCESS != ret) { - ML_VERBOSE(9, ("Failed to mca_coll_ml_task_completion_processing")); - return ret; - } - break; - default: - ML_ERROR(("Unknow exit status %d", rc)); - return OMPI_ERROR; - } - } else { - /* the task is depend on other, lets put it on pending list */ - ML_VERBOSE(9, ("The task %p dependency is %d, putting it on pending list", - (void *)task_status, func->num_dependencies)); - OPAL_THREAD_LOCK(&(mca_coll_ml_component.pending_tasks_mutex)); - opal_list_append(&cm->pending_tasks, (opal_list_item_t *)task_status); - OPAL_THREAD_UNLOCK(&(mca_coll_ml_component.pending_tasks_mutex)); - } - } - ML_VERBOSE(9, ("Collective was launched !")); - return OMPI_SUCCESS; -} - -static inline __opal_attribute_always_inline__ mca_coll_ml_collective_operation_progress_t * -mca_coll_ml_alloc_op_prog_single_frag_dag( - mca_coll_ml_module_t *ml_module, - mca_coll_ml_collective_operation_description_t *coll_schedule, - void *src, void *dst, size_t total_bytes, - size_t offset_into_user_buffer - ) -{ - opal_free_list_item_t *item; - mca_coll_ml_collective_operation_progress_t *coll_op = NULL; - ompi_request_t *req; - - /* Blocking call on fragment allocation (Maybe we want to make it non blocking ?) */ - item = opal_free_list_wait (&(ml_module->coll_ml_collective_descriptors)); - - coll_op = (mca_coll_ml_collective_operation_progress_t *) item; - ML_VERBOSE(10, (">>> Allocating coll op %p", coll_op)); - assert(NULL != coll_op); - assert(coll_op->dag_description.status_array[0].item.opal_list_item_refcount == 0); - req = &(coll_op->full_message.super); - - OMPI_REQUEST_INIT(req, false); - /* Mark the request ACTIVE. It is critical for MPI_Test()*/ - req->req_state = OMPI_REQUEST_ACTIVE; - req->req_status._cancelled = 0; - req->req_status.MPI_ERROR = OMPI_SUCCESS; - - MCA_COLL_ML_OP_BASIC_SETUP(coll_op, total_bytes, - offset_into_user_buffer, src, dst, coll_schedule); - - /* We do not set sequential, since it is not sequential call */ - coll_op->dag_description.num_tasks_completed = 0; - - /* Release reference counter have to be zero */ - assert(0 == coll_op->pending); - - return coll_op; -} - -static inline __opal_attribute_always_inline__ mca_coll_ml_collective_operation_progress_t * -mca_coll_ml_duplicate_op_prog_single_frag_dag( - mca_coll_ml_module_t *ml_module, - mca_coll_ml_collective_operation_progress_t *old_op) -{ - mca_coll_ml_collective_operation_progress_t *new_op = NULL; - - new_op = mca_coll_ml_alloc_op_prog_single_frag_dag(ml_module, - ml_module->coll_ml_bcast_functions[old_op->fragment_data.current_coll_op], - old_op->fragment_data.message_descriptor->dest_user_addr, - old_op->fragment_data.message_descriptor->src_user_addr, - old_op->fragment_data.message_descriptor->n_bytes_total, - old_op->fragment_data.message_descriptor->n_bytes_scheduled); - - new_op->fragment_data.current_coll_op = old_op->fragment_data.current_coll_op; - new_op->fragment_data.message_descriptor = old_op->fragment_data.message_descriptor; - - return new_op; -} - -static inline __opal_attribute_always_inline__ mca_coll_ml_collective_operation_progress_t * - mca_coll_ml_alloc_op_prog_single_frag_seq( - mca_coll_ml_module_t *ml_module, - mca_coll_ml_collective_operation_description_t *coll_schedule, - void *src, void *dst, - size_t total_bytes, - size_t offset_into_user_buffer - ) -{ - opal_free_list_item_t *item; - mca_coll_ml_collective_operation_progress_t *coll_op = NULL; - - /* Blocking call on fragment allocation (Maybe we want to make it non blocking ?) */ - item = opal_free_list_wait (&(ml_module->coll_ml_collective_descriptors)); - - coll_op = (mca_coll_ml_collective_operation_progress_t *) item; - - assert(NULL != coll_op); - - MCA_COLL_ML_OP_BASIC_SETUP(coll_op, total_bytes, - offset_into_user_buffer, src, dst, coll_schedule); - - /* set sequential data */ - /* pasha - do we have something to set ? */ - - return coll_op; -} - -static inline __opal_attribute_always_inline__ - void mca_coll_ml_convertor_get_send_frag_size(mca_coll_ml_module_t *ml_module, - size_t *frag_size, struct full_message_t *message_descriptor) -{ - size_t fragment_size = *frag_size; - opal_convertor_t *dummy_convertor = &message_descriptor->dummy_convertor; - - /* The last frag needs special service */ - if (fragment_size > - (size_t) message_descriptor->send_converter_bytes_packed) { - *frag_size = message_descriptor->send_converter_bytes_packed; - message_descriptor->send_converter_bytes_packed = 0; - - return; - } - if( (message_descriptor->dummy_conv_position + fragment_size) > - message_descriptor->n_bytes_total ) { - message_descriptor->dummy_conv_position = (message_descriptor->dummy_conv_position + fragment_size) - - message_descriptor->n_bytes_total; - } else { - message_descriptor->dummy_conv_position += fragment_size; - } - - opal_convertor_generic_simple_position(dummy_convertor, &message_descriptor->dummy_conv_position); - *frag_size -= dummy_convertor->partial_length; - - message_descriptor->send_converter_bytes_packed -= (*frag_size); -} - -static inline __opal_attribute_always_inline__ int -mca_coll_ml_launch_sequential_collective (mca_coll_ml_collective_operation_progress_t *coll_op) -{ - mca_bcol_base_coll_fn_desc_t *bcol_func; - int ifunc, n_fn, ih, ret; - mca_coll_ml_collective_operation_description_t *sched = - coll_op->coll_schedule; - - n_fn = sched->n_fns; - ih = coll_op->sequential_routine.current_active_bcol_fn; - - /* if collectives are already pending just add this one to the list */ - if (opal_list_get_size (&mca_coll_ml_component.sequential_collectives)) { - opal_list_append(&mca_coll_ml_component.sequential_collectives, (opal_list_item_t *) coll_op); - - return OMPI_SUCCESS; - } - - for (ifunc = ih; ifunc < n_fn; ifunc++, coll_op->sequential_routine.current_active_bcol_fn++) { - ret = coll_op->sequential_routine.seq_task_setup(coll_op); - if (OMPI_SUCCESS != ret) { - return ret; - } - - bcol_func = (sched->component_functions[ifunc].bcol_function); - ret = bcol_func->coll_fn(&coll_op->variable_fn_params, - (struct mca_bcol_base_function_t *) &sched->component_functions[ifunc].constant_group_data); - - if (BCOL_FN_COMPLETE == ret) { - if (ifunc == n_fn - 1) { - ret = coll_ml_fragment_completion_processing(coll_op); - if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) { - mca_coll_ml_abort_ml("Failed to run coll_ml_fragment_completion_processing"); - } - - return OMPI_SUCCESS; - } - } else { - if (BCOL_FN_STARTED == ret) { - coll_op->sequential_routine.current_bcol_status = SEQ_TASK_IN_PROG; - } else { - coll_op->sequential_routine.current_bcol_status = SEQ_TASK_PENDING; - } - - ML_VERBOSE(10, ("Adding pending bcol to the progress list to access by ml_progress func-id %d", ifunc)); - opal_list_append(&mca_coll_ml_component.sequential_collectives, (opal_list_item_t *) coll_op); - - break; - } - } - - return OMPI_SUCCESS; -} - -END_C_DECLS - -#endif diff --git a/ompi/mca/coll/ml/coll_ml_lex.h b/ompi/mca/coll/ml/coll_ml_lex.h deleted file mode 100644 index d09fe45bf99..00000000000 --- a/ompi/mca/coll/ml/coll_ml_lex.h +++ /dev/null @@ -1,40 +0,0 @@ -#ifndef COLL_ML_LEX_H_ -#define COLL_ML_LEX_H_ - -#include "opal_config.h" -#include - -BEGIN_C_DECLS - -int coll_ml_config_yylex(void); -int coll_ml_config_init_buffer(FILE *file); -int coll_ml_config_yylex_destroy(void); - -extern FILE *coll_ml_config_yyin; -extern bool coll_ml_config_parse_done; -extern char *coll_ml_config_yytext; -extern int coll_ml_config_yynewlines; - -/* - * Make lex-generated files not issue compiler warnings - */ -#define YY_STACK_USED 0 -#define YY_ALWAYS_INTERACTIVE 0 -#define YY_NEVER_INTERACTIVE 0 -#define YY_MAIN 0 -#define YY_NO_UNPUT 1 -#define YY_SKIP_YYWRAP 1 - -enum { - COLL_ML_CONFIG_PARSE_DONE, - COLL_ML_CONFIG_PARSE_ERROR, - COLL_ML_CONFIG_PARSE_NEWLINE, - COLL_ML_CONFIG_PARSE_SECTION, - COLL_ML_CONFIG_PARSE_COLLECTIVE, - COLL_ML_CONFIG_PARSE_EQUAL, - COLL_ML_CONFIG_PARSE_SINGLE_WORD, - COLL_ML_CONFIG_PARSE_VALUE, - COLL_ML_CONFIG_PARSE_MAX -}; -END_C_DECLS -#endif diff --git a/ompi/mca/coll/ml/coll_ml_lex.l b/ompi/mca/coll/ml/coll_ml_lex.l deleted file mode 100644 index b97b04e0413..00000000000 --- a/ompi/mca/coll/ml/coll_ml_lex.l +++ /dev/null @@ -1,141 +0,0 @@ -%option nounput -%option noinput - -%{ /* -*- C -*- */ -#include "opal_config.h" - -#include -#ifdef HAVE_UNISTD_H -#include -#endif - -#include "coll_ml_lex.h" - -BEGIN_C_DECLS - -/* - * local functions - */ -static int coll_ml_config_yywrap(void); - -END_C_DECLS - -/* - * global variables - */ -int coll_ml_config_yynewlines = 1; -bool coll_ml_config_parse_done = false; -char *coll_ml_config_string = NULL; - -%} - -WHITE [\f\t\v ] -CHAR [A-Za-z0-9_\-\.] -NAME_CHAR [A-Za-z0-9_\-\.\\\/] - -%x comment -%x section_name -%x collective_name -%x section_end -%x collective_end -%x value - -%% - -{WHITE}*\n { ++coll_ml_config_yynewlines; - return COLL_ML_CONFIG_PARSE_NEWLINE; } -#.*\n { ++coll_ml_config_yynewlines; - return COLL_ML_CONFIG_PARSE_NEWLINE; } -"//".*\n { ++coll_ml_config_yynewlines; - return COLL_ML_CONFIG_PARSE_NEWLINE; } - -"/*" { BEGIN(comment); - return COLL_ML_CONFIG_PARSE_NEWLINE; } -[^*\n]* ; /* Eat up non '*'s */ -"*"+[^*/\n]* ; /* Eat '*'s not followed by a '/' */ -\n { ++coll_ml_config_yynewlines; - return COLL_ML_CONFIG_PARSE_NEWLINE; } -"*"+"/" { BEGIN(INITIAL); /* Done with block comment */ - return COLL_ML_CONFIG_PARSE_NEWLINE; } - -{WHITE}*\[{WHITE}* { BEGIN(collective_name); } -({NAME_CHAR}|{WHITE})*{NAME_CHAR}/{WHITE}*\] { - BEGIN(collective_end); - return COLL_ML_CONFIG_PARSE_COLLECTIVE; } -\n { ++coll_ml_config_yynewlines; - return COLL_ML_CONFIG_PARSE_ERROR; } -. { return COLL_ML_CONFIG_PARSE_ERROR; } -{WHITE}*\]{WHITE}*\n { - BEGIN(INITIAL); - ++coll_ml_config_yynewlines; - return COLL_ML_CONFIG_PARSE_NEWLINE; } - -{WHITE}*\<{WHITE}* { BEGIN(section_name); } -({NAME_CHAR}|{WHITE})*{NAME_CHAR}/{WHITE}*\> { - BEGIN(section_end); - return COLL_ML_CONFIG_PARSE_SECTION; } -\n { ++coll_ml_config_yynewlines; - return COLL_ML_CONFIG_PARSE_ERROR; } -. { return COLL_ML_CONFIG_PARSE_ERROR; } -{WHITE}*\>{WHITE}*\n { - BEGIN(INITIAL); - ++coll_ml_config_yynewlines; - return COLL_ML_CONFIG_PARSE_NEWLINE; } - -{WHITE}*"="{WHITE}* { BEGIN(value); - return COLL_ML_CONFIG_PARSE_EQUAL; } -{WHITE}+ ; /* whitespace */ -{CHAR}+ { return COLL_ML_CONFIG_PARSE_SINGLE_WORD; } - -{WHITE}*\n { BEGIN(INITIAL); - ++coll_ml_config_yynewlines; - return COLL_ML_CONFIG_PARSE_NEWLINE; } -[^\n]*[^\t \n]/[\t ]* { - return COLL_ML_CONFIG_PARSE_VALUE; } - -. { return COLL_ML_CONFIG_PARSE_ERROR; } -%% - -/* Old flex (2.5.4a? and older) does not define a destroy function */ -#if !defined(YY_FLEX_SUBMINOR_VERSION) -#define YY_FLEX_SUBMINOR_VERSION 0 -#endif - -#if (YY_FLEX_MAJOR_VERSION < 2) || (YY_FLEX_MAJOR_VERSION == 2 && (YY_FLEX_MINOR_VERSION < 5 || (YY_FLEX_MINOR_VERSION == 5 && YY_FLEX_SUBMINOR_VERSION < 5))) -int coll_ml_config_yylex_destroy(void) -{ - if (NULL != YY_CURRENT_BUFFER) { - yy_delete_buffer(YY_CURRENT_BUFFER); -#if defined(YY_CURRENT_BUFFER_LVALUE) - YY_CURRENT_BUFFER_LVALUE = NULL; -#else - YY_CURRENT_BUFFER = NULL; -#endif /* YY_CURRENT_BUFFER_LVALUE */ - } - return YY_NULL; -} -#endif - -static int coll_ml_config_yywrap(void) -{ - coll_ml_config_parse_done = true; - return 1; -} - - -/* - * Ensure that we have a valid yybuffer to use. Specifically, if this - * scanner is invoked a second time, finish_parsing() (above) will - * have been executed, and the current buffer will have been freed. - * Flex doesn't recognize this fact because as far as it's concerned, - * its internal state was already initialized, so it thinks it should - * have a valid buffer. Hence, here we ensure to give it a valid - * buffer. - */ -int coll_ml_config_init_buffer(FILE *file) -{ - YY_BUFFER_STATE buf = yy_create_buffer(file, YY_BUF_SIZE); - yy_switch_to_buffer(buf); - - return 0; -} diff --git a/ompi/mca/coll/ml/coll_ml_lmngr.c b/ompi/mca/coll/ml/coll_ml_lmngr.c deleted file mode 100644 index 1ff72027a7d..00000000000 --- a/ompi/mca/coll/ml/coll_ml_lmngr.c +++ /dev/null @@ -1,330 +0,0 @@ -/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ -/* - * Copyright (c) 2009-2012 Oak Ridge National Laboratory. All rights reserved. - * Copyright (c) 2009-2012 Mellanox Technologies. All rights reserved. - * Copyright (c) 2014 Los Alamos National Security, LLC. All rights - * reserved. - * Copyright (c) 2014 Research Organization for Information Science - * and Technology (RIST). All rights reserved. - * Copyright (c) 2014 Intel, Inc. All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#include "ompi_config.h" -#include "opal/class/opal_list.h" -#include "opal/threads/mutex.h" -#include "coll_ml.h" -#include "coll_ml_inlines.h" -#include "coll_ml_mca.h" -#include "coll_ml_lmngr.h" -#ifndef HAVE_POSIX_MEMALIGN -#include "opal/align.h" -#include "opal_stdint.h" -#endif -#include "opal/util/sys_limits.h" - -/* Constructor for list memory manager */ -static void construct_lmngr(mca_coll_ml_lmngr_t *lmngr) -{ - mca_coll_ml_component_t *cm = &mca_coll_ml_component; - - ML_VERBOSE(7, ("Constructing new list manager %p", (void *)lmngr)); - - /* No real memory is allocated, only basic init. - The real memory will be allocated on demand, on first block allocation */ - - /* I caching this block size, alignment and list size - since maybe in future we will want to define different parameters - for lists */ - lmngr->list_block_size = cm->lmngr_block_size; - lmngr->list_alignment = cm->lmngr_alignment; - lmngr->list_size = cm->lmngr_size; - lmngr->n_resources = 0; - lmngr->base_addr = NULL; /* If the base addr is not null, the struct was initilized - and memory was allocated */ - /* Not sure that lock is required */ - OBJ_CONSTRUCT(&lmngr->mem_lock, opal_mutex_t); - - /* Only construct the list, no memry initialisation */ - OBJ_CONSTRUCT(&lmngr->blocks_list, opal_list_t); -} - -static void destruct_lmngr(mca_coll_ml_lmngr_t *lmngr) -{ - int max_nc = lmngr->n_resources; - int rc, i; - bcol_base_network_context_t *nc; - opal_list_item_t *item; - - ML_VERBOSE(6, ("Destructing list manager %p", (void *)lmngr)); - - while (NULL != (item = opal_list_remove_first(&lmngr->blocks_list))) { - OBJ_RELEASE(item); - } - - OBJ_DESTRUCT(&lmngr->blocks_list); - - if (NULL != lmngr->alloc_base) { - for( i = 0; i < max_nc; i++ ) { - nc = lmngr->net_context[i]; - rc = nc->deregister_memory_fn(nc->context_data, - lmngr->reg_desc[nc->context_id]); - if(rc != OMPI_SUCCESS) { - ML_ERROR(("Failed to unregister , lmngr %p", (void *)lmngr)); - } - } - - ML_VERBOSE(10, ("Release base addr %p", lmngr->alloc_base)); - - free(lmngr->alloc_base); - lmngr->alloc_base = NULL; - lmngr->base_addr = NULL; - } - - lmngr->list_block_size = 0; - lmngr->list_alignment = 0; - lmngr->list_size = 0; - lmngr->n_resources = 0; - - OBJ_DESTRUCT(&lmngr->mem_lock); -} - -OBJ_CLASS_INSTANCE(mca_coll_ml_lmngr_t, - opal_object_t, - construct_lmngr, - destruct_lmngr); - -int mca_coll_ml_lmngr_tune(mca_coll_ml_lmngr_t *lmngr, - size_t block_size, size_t list_size, size_t alignment) -{ - ML_VERBOSE(7, ("Tunning list manager")); - - if (OPAL_UNLIKELY(NULL == lmngr->base_addr)) { - ML_VERBOSE(7, ("The list manager is already initialized, you can not tune it")); - return OMPI_ERROR; - } - - lmngr->list_block_size = block_size; - lmngr->list_alignment = alignment; - lmngr->list_size = list_size; - - return OMPI_SUCCESS; -} - -int mca_coll_ml_lmngr_reg(void) -{ - int tmp, ret = OMPI_SUCCESS; - - mca_coll_ml_component_t *cm = &mca_coll_ml_component; - -#define CHECK(expr) do {\ - tmp = (expr); \ - if (0 > tmp) ret = tmp; \ - } while (0) - - ML_VERBOSE(7, ("Setting parameters for list manager")); - - cm->lmngr_size = 8; - CHECK(mca_base_component_var_register(&mca_coll_ml_component.super.collm_version, - "memory_manager_list_size", "Memory manager list size", - MCA_BASE_VAR_TYPE_SIZE_T, NULL, 0, 0, - OPAL_INFO_LVL_9, - MCA_BASE_VAR_SCOPE_READONLY, - &cm->lmngr_size)); - - /* The size list couldn't be less than possible max of ML modules, - it = max supported communicators by ML */ - if (cm->lmngr_size < cm->max_comm) { - cm->lmngr_size = cm->max_comm; - } - - mca_coll_ml_component.lmngr_block_size = cm->payload_buffer_size * - cm->n_payload_buffs_per_bank * - cm->n_payload_mem_banks * - cm->lmngr_size; - - CHECK(mca_base_component_var_register(&mca_coll_ml_component.super.collm_version, - "memory_manager_block_size", "Memory manager block size", - MCA_BASE_VAR_TYPE_SIZE_T, NULL, 0, 0, - OPAL_INFO_LVL_9, - MCA_BASE_VAR_SCOPE_READONLY, - &mca_coll_ml_component.lmngr_block_size)); - - cm->lmngr_alignment = opal_getpagesize(); - CHECK(mca_base_component_var_register(&mca_coll_ml_component.super.collm_version, - "memory_manager_alignment", "Memory manager alignment", - MCA_BASE_VAR_TYPE_SIZE_T, NULL, 0, 0, - OPAL_INFO_LVL_9, - MCA_BASE_VAR_SCOPE_READONLY, - &mca_coll_ml_component.lmngr_block_size)); - - return ret; -} - -static int lmngr_register(mca_coll_ml_lmngr_t *lmngr, bcol_base_network_context_t *nc) -{ - int rc, j; - int max_nc = lmngr->n_resources; - - rc = nc->register_memory_fn(nc->context_data, - lmngr->base_addr, - lmngr->list_size * lmngr->list_block_size, - &lmngr->reg_desc[nc->context_id]); - - if(rc != OMPI_SUCCESS) { - int ret_val; - ML_VERBOSE(7, ("Failed to register [%d], unrolling the registration", rc)); - /* deregistser the successful registrations */ - for( j = 0; j < max_nc; j++ ) { - /* set the registration parameter to point to the current - * resource description */ - nc = lmngr->net_context[j]; - ret_val = nc->deregister_memory_fn(nc->context_data, - lmngr->reg_desc[nc->context_id]); - if(ret_val != OMPI_SUCCESS) { - return ret_val; - } - } - - return rc; - } - - return OMPI_SUCCESS; -} - -static int mca_coll_ml_lmngr_init(mca_coll_ml_lmngr_t *lmngr) -{ - int i, num_blocks; - int rc; - unsigned char *addr; - bcol_base_network_context_t *nc; - - ML_VERBOSE(7, ("List initialization")); - -#ifdef HAVE_POSIX_MEMALIGN - if((errno = posix_memalign(&lmngr->base_addr, - lmngr->list_alignment, - lmngr->list_size * lmngr->list_block_size)) != 0) { - ML_ERROR(("Failed to allocate memory: %d [%s]", errno, strerror(errno))); - return OMPI_ERROR; - } - lmngr->alloc_base = lmngr->base_addr; -#else - lmngr->alloc_base = - malloc(lmngr->list_size * lmngr->list_block_size + lmngr->list_alignment); - if(NULL == lmngr->alloc_base) { - ML_ERROR(("Failed to allocate memory: %d [%s]", errno, strerror(errno))); - return OMPI_ERROR; - } - - lmngr->base_addr = (void*)OPAL_ALIGN((uintptr_t)lmngr->alloc_base, - lmngr->list_alignment, uintptr_t); -#endif - - assert(lmngr->n_resources < MCA_COLL_ML_MAX_REG_INFO); - - for(i= 0 ;i < lmngr->n_resources ;i++) { - nc = lmngr->net_context[i]; - ML_VERBOSE(7, ("Call registration for resource index %d", i)); - rc = lmngr_register(lmngr, nc); - if (OMPI_SUCCESS != rc) { - ML_ERROR(("Failed to lmngr register: %d [%s]", errno, strerror(errno))); - return rc; - } - } - - /* slice the memory to blocks */ - addr = (unsigned char *) lmngr->base_addr; - for(num_blocks = 0; num_blocks < (int)lmngr->list_size; num_blocks++) { - mca_bcol_base_lmngr_block_t *item = OBJ_NEW(mca_bcol_base_lmngr_block_t); - item->base_addr = (void *)addr; - item->lmngr = lmngr; - /* ML_VERBOSE(10, ("Appending block # %d %p", num_blocks, (void *)addr)); */ - opal_list_append(&lmngr->blocks_list, (opal_list_item_t *)item); - /* advance the address */ - addr += lmngr->list_block_size; - } - - ML_VERBOSE(7, ("List initialization done %d", - opal_list_get_size(&lmngr->blocks_list))); - return OMPI_SUCCESS; -} - -mca_bcol_base_lmngr_block_t* mca_coll_ml_lmngr_alloc ( - mca_coll_ml_lmngr_t *lmngr) -{ - int rc; - opal_list_t *list = &lmngr->blocks_list; - - /* Check if the list manager was initialized */ - if(OPAL_UNLIKELY(NULL == lmngr->base_addr)) { - ML_VERBOSE(7 ,("Starting memory initialization")); - rc = mca_coll_ml_lmngr_init(lmngr); - if (OMPI_SUCCESS != rc) { - ML_ERROR(("Failed to init memory")); - return NULL; - } - } - - if(OPAL_UNLIKELY(opal_list_is_empty(list))) { - /* Upper layer need to handle the NULL */ - ML_VERBOSE(1, ("List manager is empty.")); - return NULL; - } - - return (mca_bcol_base_lmngr_block_t *)opal_list_remove_first(list); -} - -void mca_coll_ml_lmngr_free(mca_bcol_base_lmngr_block_t *block) -{ - opal_list_append(&block->lmngr->blocks_list, (opal_list_item_t *)block); -} - -int mca_coll_ml_lmngr_append_nc(mca_coll_ml_lmngr_t *lmngr, bcol_base_network_context_t *nc) -{ - int i, rc; - - ML_VERBOSE(7, ("Append new network context %p to list manager %p", - nc, lmngr)); - - if (NULL == nc) { - return OMPI_ERROR; - } - - /* check if we already have the context on the list. - if we do have - do not do anything, just return success - */ - if (OPAL_UNLIKELY(MCA_COLL_ML_MAX_REG_INFO == lmngr->n_resources)) { - ML_ERROR(("MPI overflows maximum supported network contexts is %d", MCA_COLL_ML_MAX_REG_INFO)); - return OMPI_ERROR; - } - - for (i = 0; i < lmngr->n_resources; i++) { - if (lmngr->net_context[i] == nc) { - ML_VERBOSE(7, ("It is not new ")); - return OMPI_SUCCESS; - } - } - - ML_VERBOSE(7, ("Adding new context")); - - /* Setting context id */ - nc->context_id = lmngr->n_resources; - lmngr->net_context[lmngr->n_resources] = nc; - - lmngr->n_resources++; - - /* Register the memory with new context */ - if (NULL != lmngr->base_addr) { - rc = lmngr_register(lmngr, nc); - if (OMPI_SUCCESS == rc) { - return rc; - } - } - - return OMPI_SUCCESS; -} diff --git a/ompi/mca/coll/ml/coll_ml_lmngr.h b/ompi/mca/coll/ml/coll_ml_lmngr.h deleted file mode 100644 index 9547128663a..00000000000 --- a/ompi/mca/coll/ml/coll_ml_lmngr.h +++ /dev/null @@ -1,81 +0,0 @@ -/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ -/* - * Copyright (c) 2009-2012 Oak Ridge National Laboratory. All rights reserved. - * Copyright (c) 2009-2012 Mellanox Technologies. All rights reserved. - * Copyright (c) 2014 Los Alamos National Security, LLC. All rights - * reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#ifndef MCA_ML_LMNGR_H -#define MCA_ML_LMNGR_H - -#include "ompi_config.h" -#include "opal/class/opal_list.h" -#include "ompi/mca/bcol/bcol.h" - -#define MCA_COLL_ML_MAX_REG_INFO 32 - -/* LMNGR - List manager for registred memory */ -struct mca_coll_ml_lmngr_t { - opal_object_t super; - /* lock to control list access */ - opal_mutex_t mem_lock; - - /* list of memory chunks */ - opal_list_t blocks_list; - - /* base (allocated) address of the memory pool */ - void* base_addr; - void *alloc_base; - - /* size of memory chunks */ - size_t list_block_size; - - /* memory chunk alignment */ - size_t list_alignment; - - /* init list size */ - size_t list_size; - - /* number network context of resources - In other words, number of different registration - functions that will be used. For example in case - of iboffload for each device (PD) we will have - different entry - */ - int n_resources; - - /* registration descriptor */ - void * reg_desc[MCA_COLL_ML_MAX_REG_INFO]; - - /* bcol network context array */ - struct bcol_base_network_context_t * net_context[MCA_COLL_ML_MAX_REG_INFO]; -}; -typedef struct mca_coll_ml_lmngr_t mca_coll_ml_lmngr_t; -OBJ_CLASS_DECLARATION(mca_coll_ml_lmngr_t); - -/* read user defined parametres for list manager */ -int mca_coll_ml_lmngr_reg(void); -/* If programmer want to user other than default mca -parametres, he can use the tune function. The tune -function must be run before list initialization, -otherway error will be returned */ -int mca_coll_ml_lmngr_tune(mca_coll_ml_lmngr_t *lmngr, - size_t block_size, size_t list_size, size_t alignment); - -/* Append new network context to the existing list memory manager */ -int mca_coll_ml_lmngr_append_nc(mca_coll_ml_lmngr_t *lmngr, bcol_base_network_context_t *nc); - -/* Allocate a block from memory list manager */ -mca_bcol_base_lmngr_block_t* mca_coll_ml_lmngr_alloc ( - mca_coll_ml_lmngr_t *lmngr); - -/* Return block to list memory manager */ -void mca_coll_ml_lmngr_free (mca_bcol_base_lmngr_block_t *block); - -#endif diff --git a/ompi/mca/coll/ml/coll_ml_mca.c b/ompi/mca/coll/ml/coll_ml_mca.c deleted file mode 100644 index 46be88c3705..00000000000 --- a/ompi/mca/coll/ml/coll_ml_mca.c +++ /dev/null @@ -1,300 +0,0 @@ -/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ -/* - * Copyright (c) 2009-2012 Oak Ridge National Laboratory. All rights reserved. - * Copyright (c) 2009-2012 Mellanox Technologies. All rights reserved. - * Copyright (c) 2013 Los Alamos National Security, LLC. All rights - * reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#include "ompi_config.h" -#include -#include -#include -#include - -#include "ompi/constants.h" -#include "ompi/communicator/communicator.h" -#include "ompi/mca/bcol/bcol.h" -#include "ompi/mca/bcol/base/base.h" -#include "coll_ml.h" -#include "coll_ml_inlines.h" -#include "coll_ml_mca.h" -#include "coll_ml_lmngr.h" -#include "ompi/patterns/net/netpatterns.h" -#include "opal/mca/installdirs/installdirs.h" - -/* - * Local flags - */ -enum { - REGINT_NEG_ONE_OK = 0x01, - REGINT_GE_ZERO = 0x02, - REGINT_GE_ONE = 0x04, - REGINT_NONZERO = 0x08, - REGINT_MAX = 0x88 -}; - -enum { - REGSTR_EMPTY_OK = 0x01, - REGSTR_MAX = 0x88 -}; - -/* - * Enumerators - */ -mca_base_var_enum_value_t fragmentation_enable_enum[] = { - {0, "disable"}, - {1, "enable"}, - {2, "auto"}, - {-1, NULL} -}; - -mca_base_var_enum_value_t bcast_algorithms[] = { - {COLL_ML_STATIC_BCAST, "static"}, - {COLL_ML_SEQ_BCAST, "sequential"}, - {COLL_ML_UNKNOWN_BCAST, "unknown-root"}, - {-1, NULL} -}; - -/* - * utility routine for string parameter registration - */ -static int reg_string(const char* param_name, - const char* deprecated_param_name, - const char* param_desc, - const char* default_value, char **storage, - int flags) -{ - int index; - - *storage = (char *) default_value; - index = mca_base_component_var_register(&mca_coll_ml_component.super.collm_version, - param_name, param_desc, MCA_BASE_VAR_TYPE_STRING, - NULL, 0, 0, OPAL_INFO_LVL_9, - MCA_BASE_VAR_SCOPE_READONLY, storage); - if (NULL != deprecated_param_name) { - (void) mca_base_var_register_synonym(index, "ompi", "coll", "ml", deprecated_param_name, - MCA_BASE_VAR_SYN_FLAG_DEPRECATED); - } - - if (0 != (flags & REGSTR_EMPTY_OK) && (NULL == *storage || 0 == strlen(*storage))) { - opal_output(0, "Bad parameter value for parameter \"%s\"", - param_name); - return OMPI_ERR_BAD_PARAM; - } - - return OMPI_SUCCESS; -} - -/* - * utility routine for integer parameter registration - */ -static int reg_int(const char* param_name, - const char* deprecated_param_name, - const char* param_desc, - int default_value, int *storage, int flags) -{ - int index; - - *storage = default_value; - index = mca_base_component_var_register(&mca_coll_ml_component.super.collm_version, - param_name, param_desc, MCA_BASE_VAR_TYPE_INT, - NULL, 0, 0,OPAL_INFO_LVL_9, - MCA_BASE_VAR_SCOPE_READONLY, storage); - if (NULL != deprecated_param_name) { - (void) mca_base_var_register_synonym(index, "ompi", "coll", "ml", deprecated_param_name, - MCA_BASE_VAR_SYN_FLAG_DEPRECATED); - } - - if (0 != (flags & REGINT_NEG_ONE_OK) && -1 == *storage) { - return OMPI_SUCCESS; - } - - if ((0 != (flags & REGINT_GE_ZERO) && *storage < 0) || - (0 != (flags & REGINT_GE_ONE) && *storage < 1) || - (0 != (flags & REGINT_NONZERO) && 0 == *storage)) { - opal_output(0, "Bad parameter value for parameter \"%s\"", - param_name); - return OMPI_ERR_BAD_PARAM; - } - - return OMPI_SUCCESS; -} - -static int reg_bool(const char* param_name, - const char* deprecated_param_name, - const char* param_desc, - bool default_value, bool *storage) -{ - int index; - - *storage = default_value; - index = mca_base_component_var_register(&mca_coll_ml_component.super.collm_version, - param_name, param_desc, MCA_BASE_VAR_TYPE_BOOL, - NULL, 0, 0,OPAL_INFO_LVL_9, - MCA_BASE_VAR_SCOPE_READONLY, storage); - if (NULL != deprecated_param_name) { - (void) mca_base_var_register_synonym(index, "ompi", "coll", "ml", deprecated_param_name, - MCA_BASE_VAR_SYN_FLAG_DEPRECATED); - } - - return OMPI_SUCCESS; -} - -static int reg_ullint(const char* param_name, - const char* deprecated_param_name, - const char* param_desc, - unsigned long long default_value, unsigned long long *storage, int flags) -{ - int index; - - *storage = default_value; - index = mca_base_component_var_register(&mca_coll_ml_component.super.collm_version, - param_name, param_desc, MCA_BASE_VAR_TYPE_UNSIGNED_LONG_LONG, - NULL, 0, 0,OPAL_INFO_LVL_9, - MCA_BASE_VAR_SCOPE_READONLY, storage); - if (NULL != deprecated_param_name) { - (void) mca_base_var_register_synonym(index, "ompi", "coll", "ml", deprecated_param_name, - MCA_BASE_VAR_SYN_FLAG_DEPRECATED); - } - - if ((0 != (flags & REGINT_GE_ONE) && *storage < 1) || - (0 != (flags & REGINT_NONZERO) && 0 == *storage)) { - opal_output(0, "Bad parameter value for parameter \"%s\"", - param_name); - return OMPI_ERR_BAD_PARAM; - } - - return OMPI_SUCCESS; -} - -static int mca_coll_ml_verify_params(void) -{ - int dummy; - - /* Make sure that the the number of memory banks is a power of 2 */ - mca_coll_ml_component.n_payload_mem_banks = - roundup_to_power_radix(2, mca_coll_ml_component.n_payload_mem_banks, - &dummy); - - /* Make sure that the the number of buffers is a power of 2 */ - mca_coll_ml_component.n_payload_buffs_per_bank = - roundup_to_power_radix(2, mca_coll_ml_component.n_payload_buffs_per_bank, - &dummy); - - return OMPI_SUCCESS; -} - -int mca_coll_ml_register_params(void) -{ - mca_base_var_enum_t *new_enum; - int ret, tmp; - char *str = NULL; - - ret = OMPI_SUCCESS; -#define CHECK(expr) do { \ - tmp = (expr); \ - if (OMPI_SUCCESS != tmp) ret = tmp; \ - } while (0) - - /* register openib component parameters */ - - CHECK(reg_int("priority", NULL, "ML component priority" - "(from 0(low) to 90 (high))", 27, &mca_coll_ml_component.ml_priority, 0)); - - CHECK(reg_int("verbose", NULL, "Output some verbose ML information " - "(0 = no output, nonzero = output)", 0, &mca_coll_ml_component.verbose, 0)); - - CHECK(reg_int("max_comm", NULL, "Maximum number of communicators that can use coll/ml", 24, - (int *) &mca_coll_ml_component.max_comm, 0)); - - CHECK(reg_int("min_comm_size", NULL, "Minimum size of communicator to use coll/ml", 0, - &mca_coll_ml_component.min_comm_size, 0)); - - CHECK(reg_int("n_payload_mem_banks", NULL, "Number of payload memory banks", 2, - &mca_coll_ml_component.n_payload_mem_banks, 0)); - - CHECK(reg_int("n_payload_buffs_per_bank", NULL, "Number of payload buffers per bank", 16, - &mca_coll_ml_component.n_payload_buffs_per_bank, 0)); - - /* RLG: need to handle alignment and size */ - CHECK(reg_ullint("payload_buffer_size", NULL, "Size of payload buffers", 4*1024, - &mca_coll_ml_component.payload_buffer_size, 0)); - - /* get the pipeline depth, default is 2 */ - CHECK(reg_int("pipeline_depth", NULL, "Size of fragmentation pipeline", 2, - &mca_coll_ml_component.pipeline_depth, 0)); - - CHECK(reg_int("free_list_init_size", NULL, "Initial size of free lists in coll/ml", 128, - &mca_coll_ml_component.free_list_init_size, 0)); - - CHECK(reg_int("free_list_grow_size", NULL, "Initial size of free lists in coll/ml", 64, - &mca_coll_ml_component.free_list_grow_size, 0)); - - CHECK(reg_int("free_list_max_size", NULL, "Initial size of free lists in coll/ml", -1, - &mca_coll_ml_component.free_list_max_size, 0)); - - mca_coll_ml_component.use_knomial_allreduce = 1; - - tmp = mca_base_var_enum_create ("coll_ml_bcast_algorithm", bcast_algorithms, &new_enum); - if (OPAL_SUCCESS != tmp) { - return tmp; - } - - mca_coll_ml_component.bcast_algorithm = COLL_ML_STATIC_BCAST; - tmp = mca_base_component_var_register (&mca_coll_ml_component.super.collm_version, "bcast_algorithm", - "Algorithm to use for broadcast", MCA_BASE_VAR_TYPE_INT, - new_enum, 0, 0, OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_READONLY, - &mca_coll_ml_component.bcast_algorithm); - OBJ_RELEASE(new_enum); - if (0 > tmp) { - ret = tmp; - } - - CHECK(reg_bool("disable_allgather", NULL, "Disable Allgather", false, - &mca_coll_ml_component.disable_allgather)); - - CHECK(reg_bool("disable_reduce", NULL, "Disable Reduce", false, - &mca_coll_ml_component.disable_reduce)); - - tmp = mca_base_var_enum_create ("coll_ml_enable_fragmentation_enum", fragmentation_enable_enum, &new_enum); - if (OPAL_SUCCESS != tmp) { - return tmp; - } - - /* default to auto-enable fragmentation */ - mca_coll_ml_component.enable_fragmentation = 2; - tmp = mca_base_component_var_register (&mca_coll_ml_component.super.collm_version, "enable_fragmentation", - "Disable/Enable fragmentation for large messages", MCA_BASE_VAR_TYPE_INT, - new_enum, 0, 0, OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_READONLY, - &mca_coll_ml_component.enable_fragmentation); - if (0 > tmp) { - ret = tmp; - } - OBJ_RELEASE(new_enum); - - asprintf(&str, "%s/mca-coll-ml.config", - opal_install_dirs.opaldatadir); - if (NULL == str) { - return OMPI_ERR_OUT_OF_RESOURCE; - } - - CHECK(reg_string("config_file", NULL, - "ML collectives configuration file", - str, &mca_coll_ml_component.config_file_name, - 0)); - free(str); - - /* Reading parameters for list manager */ - CHECK(mca_coll_ml_lmngr_reg()); - - /* Verify the parameters */ - CHECK(mca_coll_ml_verify_params()); - - return ret; -} diff --git a/ompi/mca/coll/ml/coll_ml_mca.h b/ompi/mca/coll/ml/coll_ml_mca.h deleted file mode 100644 index 7730bd284dd..00000000000 --- a/ompi/mca/coll/ml/coll_ml_mca.h +++ /dev/null @@ -1,20 +0,0 @@ -/* - * Copyright (c) 2009-2012 Oak Ridge National Laboratory. All rights reserved. - * Copyright (c) 2009-2012 Mellanox Technologies. All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - /** @file */ - -#ifndef MCA_COLL_ML_MCA_H -#define MCA_COLL_ML_MCA_H - -#include -#include "ompi_config.h" - -int mca_coll_ml_register_params(void); - -#endif diff --git a/ompi/mca/coll/ml/coll_ml_memsync.c b/ompi/mca/coll/ml/coll_ml_memsync.c deleted file mode 100644 index 560fb137b25..00000000000 --- a/ompi/mca/coll/ml/coll_ml_memsync.c +++ /dev/null @@ -1,175 +0,0 @@ -/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ -/* - * Copyright (c) 2009-2012 Oak Ridge National Laboratory. All rights reserved. - * Copyright (c) 2009-2012 Mellanox Technologies. All rights reserved. - * Copyright (c) 2013 Los Alamos National Security, LLC. All rights - * reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ -/** @file */ - -#include "ompi_config.h" - -#include "ompi/constants.h" -#include "opal/threads/mutex.h" -#include "ompi/communicator/communicator.h" -#include "ompi/mca/bcol/bcol.h" -#include "ompi/mca/coll/coll.h" -#include "opal/sys/atomic.h" -#include "ompi/mca/coll/ml/coll_ml.h" -#include "ompi/mca/coll/ml/coll_ml_inlines.h" -#include "ompi/mca/coll/ml/coll_ml_allocation.h" - -static int mca_coll_ml_memsync_recycle_memory(mca_coll_ml_collective_operation_progress_t *coll_op) -{ - mca_coll_ml_module_t *ml_module = (mca_coll_ml_module_t *)coll_op->coll_module; - mca_bcol_base_memory_block_desc_t *ml_memblock = ml_module->payload_block; - mca_coll_ml_collective_operation_progress_t *pending_op = NULL; - int bank = coll_op->full_message.bank_index_to_recycle; - int rc; - bool have_resources = true; - - assert(bank >= 0 || - bank < (int)ml_memblock->num_banks || - ML_MEMSYNC == coll_op->fragment_data.current_coll_op); - - ML_VERBOSE(10,("MEMSYNC: bank %d was recycled coll_op %p", bank, coll_op)); - - /* set the bank as free */ - - ml_memblock->bank_is_busy[bank] = false; - ml_memblock->bank_release_counters[bank] = 0; - - /* Check if we have any requests that are waiting for memory */ - while(opal_list_get_size(&ml_module->waiting_for_memory_list) && have_resources) { - pending_op = (mca_coll_ml_collective_operation_progress_t *) - opal_list_get_first(&ml_module->waiting_for_memory_list); - - ML_VERBOSE(10, ("Trying to start pending %p", pending_op)); - assert(pending_op->pending & REQ_OUT_OF_MEMORY); - rc = pending_op->fragment_data.message_descriptor->fragment_launcher(pending_op); - switch (rc) { - case OMPI_SUCCESS: - ML_VERBOSE(10, ("Pending fragment was started %p", pending_op)); - pending_op->pending ^= REQ_OUT_OF_MEMORY; - opal_list_remove_item(&ml_module->waiting_for_memory_list, - (opal_list_item_t *)pending_op); - if (0 != pending_op->fragment_data.offset_into_user_buffer) { - /* non-zero offset ==> this is not fragment 0 */ - CHECK_AND_RECYCLE(pending_op); - } - break; - case OMPI_ERR_TEMP_OUT_OF_RESOURCE: - ML_VERBOSE(10, ("Already on the list %p", pending_op)); - have_resources = false; - break; - default: - ML_ERROR(("Error happened %d", rc)); - return rc; - } - } - - ML_VERBOSE(10, ("Memsync done %p", coll_op)); - return OMPI_SUCCESS; -} - -static void mca_coll_ml_barrier_task_setup( - mca_coll_ml_task_status_t *task_status, - int index, mca_coll_ml_compound_functions_t *func) -{ - task_status->rt_num_dependencies = func->num_dependencies; - task_status->rt_num_dependent_tasks = func->num_dependent_tasks; - task_status->rt_dependent_task_indices = func->dependent_task_indices; -} - -static inline __opal_attribute_always_inline__ int mca_coll_ml_memsync_launch(mca_coll_ml_module_t *ml_module, - ompi_request_t **req, int bank_index) -{ - mca_coll_ml_collective_operation_progress_t *coll_op; - - coll_op = mca_coll_ml_alloc_op_prog_single_frag_dag(ml_module, - ml_module->coll_ml_memsync_function, - NULL, NULL, 0, 0); - - assert(NULL != coll_op); - - ML_VERBOSE(10, ("Get coll request %p", coll_op)); - - coll_op->fragment_data.buffer_desc = NULL; - - /* Caching bank index for future memory recycling callback */ - coll_op->full_message.bank_index_to_recycle = bank_index; - - coll_op->fragment_data.current_coll_op = ML_MEMSYNC; - /* I don't want to define one more parameter, so under root - * we pass buffer index */ - coll_op->variable_fn_params.root = bank_index; - /* As well it's little bit ugly, since it is no wait for this request, - * in order to recycle it we have to set offset to some value > 1 */ - coll_op->fragment_data.offset_into_user_buffer = 1; - coll_op->variable_fn_params.buffer_index = MCA_COLL_ML_NO_BUFFER; - coll_op->variable_fn_params.sequence_num = -1; /* It should be safe to use -1 */ - /* Pointer to a coll finalize function */ - if (OPAL_LIKELY(ml_module->initialized)) { - coll_op->process_fn = mca_coll_ml_memsync_recycle_memory; - } else { - /* No post work on first call */ - coll_op->process_fn = NULL; - } - - ML_VERBOSE(10,("Memsync start %p", &coll_op)); - - return mca_coll_ml_generic_collectives_append_to_queue(coll_op, mca_coll_ml_barrier_task_setup); -} - -/** - * Non blocking memory syncronization - */ -int mca_coll_ml_memsync_intra(mca_coll_ml_module_t *ml_module, int bank_index) -{ - int rc; - ompi_request_t *req; - - ML_VERBOSE(8, ("MEMSYNC start")); - - if (OPAL_UNLIKELY(0 == opal_list_get_size(&ml_module->active_bcols_list))) { - /* Josh's change: In the case where only p2p is active, we have no way - * to reset the bank release counters to zero, I am doing that here since it - * would actually be "correct" to do it outside of this conditional, however - * I suspect that reseting the value to zero elsewhere would result in corrupted - * flow for non-contiguous data types - */ - - /* nasty hack to ensure that resources are released in the single level - * ptp case. - */ - mca_coll_ml_collective_operation_progress_t dummy_coll; - - dummy_coll.coll_module = (mca_coll_base_module_t *) ml_module; - dummy_coll.fragment_data.current_coll_op = ML_MEMSYNC; - dummy_coll.full_message.bank_index_to_recycle = bank_index; - - /* Handling special case when memory syncronization is not required */ - rc = mca_coll_ml_memsync_recycle_memory(&dummy_coll); - if(OPAL_UNLIKELY(rc != OMPI_SUCCESS)){ - ML_ERROR(("Failed to flush the list.")); - return rc; - } - } else { - /* retain the communicator until the operation is finished. the communicator - * will be released by CHECK_AND_RECYCLE */ - OBJ_RETAIN(ml_module->comm); - - rc = mca_coll_ml_memsync_launch(ml_module, &req, bank_index); - if (OPAL_UNLIKELY(rc != OMPI_SUCCESS)) { - ML_ERROR(("Failed to launch a barrier.")); - return rc; - } - } - - return OMPI_SUCCESS; -} diff --git a/ompi/mca/coll/ml/coll_ml_module.c b/ompi/mca/coll/ml/coll_ml_module.c deleted file mode 100644 index c671cc044e3..00000000000 --- a/ompi/mca/coll/ml/coll_ml_module.c +++ /dev/null @@ -1,3128 +0,0 @@ -/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ -/* - * Copyright (c) 2009-2013 Oak Ridge National Laboratory. All rights reserved. - * Copyright (c) 2009-2012 Mellanox Technologies. All rights reserved. - * Copyright (c) 2012-2015 Los Alamos National Security, LLC. All rights - * reserved. - * Copyright (c) 2013-2014 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2014 Research Organization for Information Science - * and Technology (RIST). All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ -/** - * @file - * - * Most of the description of the data layout is in the - * coll_ml_module.c file. - */ - -#include "ompi_config.h" - -#include -#include -#include -#include -#include - -#include "ompi/constants.h" -#include "ompi/communicator/communicator.h" -#include "ompi/mca/coll/coll.h" -#include "ompi/mca/coll/base/base.h" -#include "ompi/mca/sbgp/base/base.h" -#include "ompi/mca/bcol/base/base.h" -#include "ompi/mca/sbgp/sbgp.h" -#include "ompi/patterns/comm/coll_ops.h" -#include "ompi/mca/coll/ml/coll_ml.h" - -#include "opal/util/argv.h" -#include "opal/datatype/opal_datatype.h" -#include "opal/util/output.h" -#include "opal/util/arch.h" -#include "opal/align.h" - -#include "coll_ml.h" -#include "coll_ml_inlines.h" -#include "coll_ml_select.h" -#include "coll_ml_custom_utils.h" -#include "coll_ml_allocation.h" - -static int coll_ml_parse_topology (sub_group_params_t *sub_group_meta_data, size_t sub_group_count, - int *list_of_ranks_in_all_subgroups, int level_one_size); - -/* #define NEW_LEADER_SELECTION */ - -struct ranks_proxy_t { - /* number of subgroups for which the rank is a proxy */ - int number_subgroups; - /* subgrou indecies */ - int *subgroup_index; -}; -typedef struct rank_proxy_t rank_proxy_t; - -#define PROVIDE_SUFFICIENT_MEMORY(ptr, dummy_ptr, ptr_size, unit_type, in_use, \ - n_to_add,n_to_grow) \ - do { \ - if ((in_use) + (n_to_add) > (ptr_size)) { \ - (dummy_ptr) = (unit_type *) \ - realloc(ptr, sizeof(unit_type) * ((ptr_size) + (n_to_grow))); \ - if (NULL != (dummy_ptr)) { \ - (ptr) = (dummy_ptr); \ - (ptr_size) += (n_to_grow); \ - } \ - } \ - } while (0) - -/* - * Local functions - */ - -static int ml_module_enable(mca_coll_base_module_t *module, - struct ompi_communicator_t *comm); - -static int mca_coll_ml_fill_in_route_tab(mca_coll_ml_topology_t *topo, - ompi_communicator_t *comm); - -static void -mca_coll_ml_module_construct(mca_coll_ml_module_t *module) -{ - int index_topo, coll_i, st_i; - mca_coll_ml_topology_t *topo; - - memset ((char *) module + sizeof (module->super), 0, sizeof (*module) - sizeof (module->super)); - - /* It's critical to reset data_offset to zero */ - module->data_offset = -1; - - /* If the topology support zero level and no fragmentation was requested */ - for (index_topo = 0; index_topo < COLL_ML_TOPO_MAX; index_topo++) { - topo = &module->topo_list[index_topo]; - topo->global_lowest_hier_group_index = -1; - topo->global_highest_hier_group_index = -1; - topo->number_of_all_subgroups = -1; - topo->n_levels = -1; - topo->all_bcols_mode = ~(0); /* set to all bits */ - topo->status = COLL_ML_TOPO_DISABLED; /* all topologies are not used by default */ - } - - for (coll_i = 0; coll_i < ML_NUM_OF_FUNCTIONS; coll_i++) { - for (st_i = 0; st_i < MCA_COLL_MAX_NUM_SUBTYPES; st_i++) { - module->collectives_topology_map[coll_i][st_i] = ML_UNDEFINED; - } - } - - for (coll_i = 0; coll_i < BCOL_NUM_OF_FUNCTIONS; ++coll_i) { - module->small_message_thresholds[coll_i] = BCOL_THRESHOLD_UNLIMITED; - } - - OBJ_CONSTRUCT(&module->active_bcols_list, opal_list_t); - OBJ_CONSTRUCT(&module->waiting_for_memory_list, opal_list_t); - OBJ_CONSTRUCT(&module->fragment_descriptors, opal_free_list_t); - OBJ_CONSTRUCT(&module->message_descriptors, opal_free_list_t); - OBJ_CONSTRUCT(&module->coll_ml_collective_descriptors, opal_free_list_t); - - memset (&module->fallback, 0, sizeof (module->fallback)); -} - -#define ML_RELEASE_FALLBACK(_coll_ml, _coll) \ - do { \ - if (_coll_ml->fallback.coll_ ## _coll ## _module) { \ - OBJ_RELEASE(_coll_ml->fallback.coll_ ## _coll ## _module); \ - _coll_ml->fallback.coll_ ## _coll ## _module = NULL; \ - } \ - } while (0); - -static void -mca_coll_ml_module_destruct(mca_coll_ml_module_t *module) -{ - int i, j, k,fnc, index_topo; - mca_coll_ml_topology_t *topo; - - ML_VERBOSE(4, ("ML module destruct")); - - for (index_topo = 0; index_topo < COLL_ML_TOPO_MAX; index_topo++) { - topo = &module->topo_list[index_topo]; - if (COLL_ML_TOPO_DISABLED == topo->status) { - /* skip the topology */ - continue; - } - - if (NULL != topo->component_pairs) { - for(i = 0; i < topo->n_levels; ++i) { - for(j = 0; j < topo->component_pairs[i].num_bcol_modules; ++j) { - OBJ_RELEASE(topo->component_pairs[i].bcol_modules[j]); - } - /* free the array of bcol module */ - free(topo->component_pairs[i].bcol_modules); - - OBJ_RELEASE(topo->component_pairs[i].subgroup_module); - } - - free(topo->component_pairs); - } - - /* gvm Leak FIX Free collective algorithms structure */ - for (fnc = 0; fnc < BCOL_NUM_OF_FUNCTIONS; fnc++) { - if (NULL != topo->hierarchical_algorithms[fnc]){ - free(topo->hierarchical_algorithms[fnc]); - } - } - - /* free up the route vector memory */ - if (NULL != topo->route_vector) { - free(topo->route_vector); - } - /* free resrouce description */ - if(NULL != topo->array_of_all_subgroups) { - for( k=0 ; k < topo->number_of_all_subgroups ; k++ ) { - if(0 < topo->array_of_all_subgroups[k].n_ranks) { - free(topo->array_of_all_subgroups[k].rank_data); - topo->array_of_all_subgroups[k].rank_data = NULL; - } - } - free(topo->array_of_all_subgroups); - topo->array_of_all_subgroups = NULL; - } - if (NULL != topo->hier_layout_info) { - free(topo->hier_layout_info); - topo->hier_layout_info = NULL; - } - } - - OPAL_LIST_DESTRUCT(&(module->active_bcols_list)); - OBJ_DESTRUCT(&(module->waiting_for_memory_list)); - - /* gvm Leak FIX Remove fragment free list */ - OBJ_DESTRUCT(&(module->fragment_descriptors)); - OBJ_DESTRUCT(&(module->message_descriptors)); - /* push mca_bcol_base_memory_block_desc_t back on list manager */ - mca_coll_ml_free_block(module->payload_block); - /* release the cinvertor if it was allocated */ - if (NULL != module->reference_convertor) { - OBJ_RELEASE(module->reference_convertor); - } - - OBJ_DESTRUCT(&(module->coll_ml_collective_descriptors)); - - if (NULL != module->coll_ml_barrier_function) { - if (NULL != module->coll_ml_barrier_function->component_functions) { - free(module->coll_ml_barrier_function->component_functions); - module->coll_ml_barrier_function->component_functions = NULL; - } - free(module->coll_ml_barrier_function); - module->coll_ml_barrier_function = NULL; - } - - if (module->coll_ml_memsync_function) { - if (module->coll_ml_memsync_function->component_functions) { - free(module->coll_ml_memsync_function->component_functions); - module->coll_ml_memsync_function->component_functions = NULL; - } - free(module->coll_ml_memsync_function); - module->coll_ml_memsync_function = NULL; - } - - ml_coll_hier_allreduce_cleanup_new(module); - ml_coll_hier_allgather_cleanup(module); - ml_coll_hier_bcast_cleanup(module); - ml_coll_hier_reduce_cleanup(module); - - /* release saved collectives */ - ML_RELEASE_FALLBACK(module, allreduce); - ML_RELEASE_FALLBACK(module, allgather); - ML_RELEASE_FALLBACK(module, reduce); - ML_RELEASE_FALLBACK(module, bcast); - ML_RELEASE_FALLBACK(module, iallreduce); - ML_RELEASE_FALLBACK(module, iallgather); - ML_RELEASE_FALLBACK(module, ireduce); - ML_RELEASE_FALLBACK(module, ibcast); -} - - -static int mca_coll_ml_request_free(ompi_request_t** request) -{ - /* local variables */ - mca_coll_ml_collective_operation_progress_t *ml_request= - (mca_coll_ml_collective_operation_progress_t *)(*request); - mca_coll_ml_module_t *ml_module = OP_ML_MODULE(ml_request); - - /* The ML memory bank recycling check done, no we may - * return request and signal completion */ - - /* this fragement does not hold the message data, so ok to return */ - assert(0 == ml_request->pending); - //assert(0 == ml_request->fragment_data.offset_into_user_buffer); - assert(&ml_request->full_message == ml_request->fragment_data.message_descriptor); - assert(ml_request->dag_description.status_array[0].item.opal_list_item_refcount == 0); - ML_VERBOSE(10, ("Releasing Master %p", ml_request)); - /* Mark the request as invalid */ - OMPI_REQUEST_FINI(&ml_request->full_message.super); - opal_free_list_return (&(ml_module->coll_ml_collective_descriptors), - (opal_free_list_item_t *)ml_request); - - /* MPI needs to return with the request object set to MPI_REQUEST_NULL - */ - *request = MPI_REQUEST_NULL; - - return OMPI_SUCCESS; -} - -/* constructor for collective managment descriptor */ -static void mca_coll_ml_collective_operation_progress_construct -(mca_coll_ml_collective_operation_progress_t *desc) { - - /* initialize pointer */ - desc->dag_description.status_array = NULL; - - OBJ_CONSTRUCT(&desc->full_message.send_convertor, opal_convertor_t); - OBJ_CONSTRUCT(&desc->full_message.recv_convertor, opal_convertor_t); - - OBJ_CONSTRUCT(&desc->full_message.dummy_convertor, opal_convertor_t); - - /* intialize request free pointer */ - desc->full_message.super.req_free = mca_coll_ml_request_free; - - /* no cancel function */ - desc->full_message.super.req_cancel = NULL; - /* Collective request type */ - desc->full_message.super.req_type = OMPI_REQUEST_COLL; - /* RLG: Do we need to set req_mpi_object ? */ - - /* If not null , we have to release next fragment */ - desc->next_to_process_frag = NULL; - - /* pointer to previous fragment */ - desc->prev_frag = NULL; - - /* Pasha: moreinit */ - desc->pending = 0; -} - -/* destructor for collective managment descriptor */ -static void mca_coll_ml_collective_operation_progress_destruct -(mca_coll_ml_collective_operation_progress_t *desc) { - mca_coll_ml_module_t *ml_module = - (mca_coll_ml_module_t *) desc->coll_module; - - int i, max_dag_size = ml_module->max_dag_size; - - if (NULL != desc->dag_description.status_array) { - for (i = 0; i < max_dag_size; ++i) { - OBJ_DESTRUCT(&desc->dag_description.status_array[i].item); - } - - free(desc->dag_description.status_array); - desc->dag_description.status_array = NULL; - } - - OBJ_DESTRUCT(&desc->full_message.send_convertor); - OBJ_DESTRUCT(&desc->full_message.recv_convertor); - - OBJ_DESTRUCT(&desc->full_message.dummy_convertor); -} -/* initialize the full message descriptor - can pass in module specific - * initialization data - */ -static void init_ml_fragment_desc(opal_free_list_item_t *desc , void* ctx); -static void init_ml_message_desc(opal_free_list_item_t *desc , void* ctx) -{ - mca_coll_ml_module_t *module= (mca_coll_ml_module_t *) ctx; - mca_coll_ml_descriptor_t *msg_desc = (mca_coll_ml_descriptor_t *) desc; - - /* finish setting up the fragment descriptor */ - init_ml_fragment_desc((opal_free_list_item_t*)&(msg_desc->fragment),module); -} - -/* initialize the fragment descriptor - can pass in module specific - * initialization data - */ -static void init_ml_fragment_desc(opal_free_list_item_t *desc , void* ctx) -{ - mca_coll_ml_module_t *module= (mca_coll_ml_module_t *) ctx; - mca_coll_ml_fragment_t *frag_desc = (mca_coll_ml_fragment_t *) desc; - - /* allocated array of function arguments */ - /* RLG - we have a problem if we don't get the memory */ - /* malloc-debug does not like zero allocations */ - if (module->max_fn_calls > 0) { - frag_desc->fn_args = (bcol_function_args_t *) - malloc(sizeof(bcol_function_args_t) * module->max_fn_calls); - } - -} -static void mca_coll_ml_bcol_list_item_construct(mca_coll_ml_bcol_list_item_t *item) -{ - item->bcol_module = NULL; -} -OBJ_CLASS_INSTANCE(mca_coll_ml_bcol_list_item_t, - opal_list_item_t, - mca_coll_ml_bcol_list_item_construct, - NULL); - -static void generate_active_bcols_list(mca_coll_ml_module_t *ml_module) -{ - int i, j, index_topo; - mca_coll_ml_topology_t *topo; - bool bcol_was_found; - mca_coll_ml_bcol_list_item_t *bcol_item = NULL; - mca_bcol_base_module_t *bcol_module = NULL; - - ML_VERBOSE(10, ("Generating active bcol list ")); - - for (index_topo = 0; index_topo < COLL_ML_TOPO_MAX; index_topo++) { - topo = &ml_module->topo_list[index_topo]; - if (COLL_ML_TOPO_DISABLED == topo->status) { - /* skip the topology */ - continue; - } - for( i = 0; i < topo->n_levels; i++) { - - for( j = 0; j < topo->component_pairs[i].num_bcol_modules; j++) { - bcol_module = topo->component_pairs[i].bcol_modules[j]; - - /* Check if the bcol provides synchronization function, if the - * function is not provided we skip this bcol, since it isn't used - * for memory synchronization (for instance - ptpcoll )*/ - if (NULL == GET_BCOL_SYNC_FN(bcol_module)) { - ML_VERBOSE(10,(" No sync function was provided by bcol %s", - bcol_module->bcol_component->bcol_version.mca_component_name)); - continue; - } - - bcol_was_found = false; - for(bcol_item = (mca_coll_ml_bcol_list_item_t *)opal_list_get_first(&ml_module->active_bcols_list); - !bcol_was_found && - bcol_item != (mca_coll_ml_bcol_list_item_t *)opal_list_get_end(&ml_module->active_bcols_list); - bcol_item = (mca_coll_ml_bcol_list_item_t *)opal_list_get_next((opal_list_item_t *)bcol_item)) { - if (bcol_module == bcol_item->bcol_module) { - bcol_was_found = true; - } - } - - /* append the item to the list if it was not found */ - if (!bcol_was_found) { - bcol_item = OBJ_NEW(mca_coll_ml_bcol_list_item_t); - bcol_item->bcol_module = bcol_module; - opal_list_append(&ml_module->active_bcols_list, (opal_list_item_t *)bcol_item); - } - - } - } - } -} - -static int calculate_buffer_header_size(mca_coll_ml_module_t *ml_module) -{ - mca_coll_ml_topology_t *topo; - mca_bcol_base_module_t *bcol_module; - - uint32_t offset = 0; - int i, j, *ranks_in_comm, kount = 0, - rc, data_offset = 0, index_topo, - comm_size = ompi_comm_size(ml_module->comm); - - ML_VERBOSE(10, ("Calculating offset for the ML")); - - /* probably a stupid thing to do, but we have to loop over twice */ - - for (index_topo = 0; index_topo < COLL_ML_TOPO_MAX; index_topo++) { - topo = &ml_module->topo_list[index_topo]; - if (COLL_ML_TOPO_DISABLED == topo->status) { - /* skip the topology */ - continue; - } - - for (i = 0; i < topo->n_levels; i++) { - for (j = 0; j < topo->component_pairs[i].num_bcol_modules; j++) { - bcol_module = topo->component_pairs[i].bcol_modules[j]; - if (0 < bcol_module->header_size) { - /* bump the kounter */ - kount++; - /* find the largest header request */ - if (offset < bcol_module->header_size) { - offset = bcol_module->header_size; - } - } - - /* Set bcol mode bits */ - topo->all_bcols_mode &= bcol_module->supported_mode; - } - } - - offset = OPAL_ALIGN(offset, BCOL_HEAD_ALIGN, uint32_t); - /* select largest offset between multiple topologies */ - if (data_offset < (int) offset) { - data_offset = (int) offset; - } - } - - ranks_in_comm = (int *) malloc(comm_size * sizeof(int)); - if (OPAL_UNLIKELY(NULL == ranks_in_comm)) { - ML_ERROR(("Memory allocation failed.")); - return OMPI_ERROR; - } - - for (i = 0; i < comm_size; ++i) { - ranks_in_comm[i] = i; - } - - rc = comm_allreduce_pml(&data_offset, &data_offset, 1, - MPI_INT, ompi_comm_rank(ml_module->comm), - MPI_MAX, comm_size, - ranks_in_comm, ml_module->comm); - - if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) { - ML_ERROR(("comm_allreduce_pml failed.")); - return OMPI_ERROR; - } - - ml_module->data_offset = (uint32_t) data_offset; - free(ranks_in_comm); - - ML_VERBOSE(10, ("The offset is %d", ml_module->data_offset)); - - return OMPI_SUCCESS; -} - -static int mca_coll_ml_register_bcols(mca_coll_ml_module_t *ml_module) -{ - /* local variables */ - int i, j, index_topo; - int ret = OMPI_SUCCESS; - mca_bcol_base_module_t *bcol_module; - mca_coll_ml_topology_t *topo; - - /* loop over all bcols and register the ml memory block which each */ - for (index_topo = 0; index_topo < COLL_ML_TOPO_MAX; index_topo++) { - topo = &ml_module->topo_list[index_topo]; - if (COLL_ML_TOPO_DISABLED == topo->status) { - /* skip the topology */ - continue; - } - - for (i = 0; i < topo->n_levels; i++) { - for (j = 0; j < topo->component_pairs[i].num_bcol_modules; j++) { - bcol_module = topo->component_pairs[i].bcol_modules[j]; - if (NULL != bcol_module->bcol_memory_init) { - ret = bcol_module->bcol_memory_init(ml_module->payload_block, - ml_module->data_offset, - bcol_module, - (NULL != bcol_module->network_context) ? - bcol_module->network_context->context_data: NULL); - if (OMPI_SUCCESS != ret) { - ML_ERROR(("Bcol registration failed on ml level!!")); - return ret; - } - } - } - } - } - - return OMPI_SUCCESS; -} - -static int ml_module_memory_initialization(mca_coll_ml_module_t *ml_module) -{ - int ret; - int nbanks, nbuffers, buf_size; - mca_coll_ml_component_t *cs = &mca_coll_ml_component; - - ml_module->payload_block = mca_coll_ml_allocate_block(cs,ml_module->payload_block); - - if (NULL == ml_module->payload_block) { - ML_VERBOSE(1, ("mca_coll_ml_allocate_block exited with error.")); - return OMPI_ERROR; - } - - /* get memory block parameters */ - nbanks = cs->n_payload_mem_banks; - nbuffers = cs->n_payload_buffs_per_bank; - buf_size = cs->payload_buffer_size; - - ML_VERBOSE(10, ("Call for initialize block.")); - - ret = mca_coll_ml_initialize_block(ml_module->payload_block, - nbuffers, nbanks, buf_size, ml_module->data_offset, - NULL); - if (OMPI_SUCCESS != ret) { - return ret; - } - - ML_VERBOSE(10, ("Call for register bcols.")); - - /* inititialize the memory with all of the bcols: - loop through the bcol modules and invoke the memory init */ - ret = mca_coll_ml_register_bcols(ml_module); - if (OMPI_SUCCESS != ret) { - ML_ERROR(("mca_coll_ml_register_bcols returned an error.")); - /* goto CLEANUP; */ - return ret; - } - - return OMPI_SUCCESS; -} - -/* do some sanity checks */ -static int check_global_view_of_subgroups( int n_procs_selected, - int n_procs_in, int ll_p1, int* all_selected, - mca_sbgp_base_module_t *module ) -{ - /* local variables */ - int ret=OMPI_SUCCESS; - int i, sum; - - bool local_leader_found=false; - - /* is there a single local-leader */ - for (i = 0; i < n_procs_selected; i++) { - if( ll_p1 == -all_selected[module->group_list[i]]) { - /* found the local leader */ - if( local_leader_found ) { - /* more than one local leader - don't know how to - * handle this, so bail - */ - ML_VERBOSE(1, ("More than a single leader for a group.")); - ret=OMPI_ERROR; - goto exit_ERROR; - } else { - local_leader_found=true; - } - } - } - - /* check to make sure that all agree on the same size of - * the group - */ - sum=0; - for (i = 0; i < n_procs_in; i++) { - if(ll_p1==all_selected[i]) { - sum++; - } else if( ll_p1 == -all_selected[i]) { - sum++; - } - } - if( sum != n_procs_selected ) { - ML_VERBOSE(1, ("number of procs in the group unexpected. Expected %d Got %d",n_procs_selected,sum)); - ret=OMPI_ERROR; - goto exit_ERROR; - } - /* check to make sure that all have the same list of ranks. - */ - for (i = 0; i < n_procs_selected; i++) { - if(ll_p1!=all_selected[module->group_list[i]] && - ll_p1!=-all_selected[module->group_list[i]] ) { - ret=OMPI_ERROR; - ML_VERBOSE(1, ("Mismatch in rank list - element #%d - %d ",i,all_selected[module->group_list[i]])); - goto exit_ERROR; - } - } - - /* return */ - return ret; - - exit_ERROR: - /* return */ - return ret; -} - -static int ml_init_k_nomial_trees(mca_coll_ml_topology_t *topo, int *list_of_ranks_in_all_subgroups, int my_rank_in_list) -{ - int *list_n_connected; - int group_size, rank, i, j, knt, offset, k, my_sbgp = 0; - int my_root, level_one_knt; - sub_group_params_t *array_of_all_subgroup_ranks = topo-> - array_of_all_subgroups; - int num_total_subgroups = topo->number_of_all_subgroups; - int n_hier = topo->n_levels; - - hierarchy_pairs *pair = NULL; - mca_coll_ml_leader_offset_info_t *loc_leader = (mca_coll_ml_leader_offset_info_t *) - malloc(sizeof(mca_coll_ml_leader_offset_info_t)*(n_hier+1)); - - if (NULL == loc_leader) { - return OMPI_ERR_OUT_OF_RESOURCE; - } - - /* first thing I want to know is where does the first level end */ - level_one_knt = 0; - - while (level_one_knt < num_total_subgroups && 0 == array_of_all_subgroup_ranks[level_one_knt].level_in_hierarchy) { - level_one_knt++; - } - - /* fprintf(stderr,"PPP %d %d %d ", level_one_knt, array_of_all_subgroup_ranks[0].level_in_hierarchy, num_total_subgroups); */ - - /* I want to cache this number for unpack*/ - array_of_all_subgroup_ranks->level_one_index = level_one_knt; - - /* determine whether or not ranks are contiguous */ - topo->ranks_contiguous = true; - for (i = 0, knt = 0 ; i < level_one_knt && topo->ranks_contiguous ; ++i) { - for (j = 0 ; j < array_of_all_subgroup_ranks[i].n_ranks ; ++j, ++knt) { - if (knt != list_of_ranks_in_all_subgroups[knt]) { - topo->ranks_contiguous = false; - break; - } - } - } - - loc_leader[0].offset = 0; - - /* now find my first level offset, and my index in level one */ - for (i = 0, loc_leader[0].level_one_index = -1 ; i < level_one_knt ; ++i) { - offset = array_of_all_subgroup_ranks[i].index_of_first_element; - for (k = 0 ; k < array_of_all_subgroup_ranks[i].n_ranks ; ++k) { - rank = list_of_ranks_in_all_subgroups[k + offset]; - if (rank == my_rank_in_list) { - loc_leader[0].offset = offset; - loc_leader[0].level_one_index = k; - i = level_one_knt; - break; - } - } - } - - /* every rank MUST appear at level 0 */ - assert (loc_leader[0].level_one_index > -1); - - for (i = 0 ; i < n_hier ; ++i) { - pair = &topo->component_pairs[i]; - /* find the size of the group */ - group_size = pair->subgroup_module->group_size; - /* malloc some memory for the new list to cache - on the bcol module - */ - list_n_connected = (int *) calloc(group_size, sizeof (int)); - if (NULL == list_n_connected) { - free (loc_leader); - return OMPI_ERR_OUT_OF_RESOURCE; - } - - /* next thing to do is to find out which subgroup I'm in - * at this particular level - */ - for (j = 0, knt = 0, my_sbgp = -1 ; j < num_total_subgroups && 0 > my_sbgp ; ++j) { - offset = array_of_all_subgroup_ranks[j].index_of_first_element; - - /* in the 1-level case just skip any group of size 1 and move on - * to the real group. */ - if (1 == n_hier && 1 == array_of_all_subgroup_ranks[j].n_ranks) { - continue; - } - - for (k = 0; k < array_of_all_subgroup_ranks[j].n_ranks; k++) { - rank = list_of_ranks_in_all_subgroups[k+offset]; - /* we can not use the level_in_topology flag to determine the - * level since not all levels may be represented so keep a count - * of the number of times this ranks shows up. when it has been - * seen the correct number of times we are done. */ - if (rank == my_rank_in_list && ++knt == (i+1)){ - my_sbgp = j; - /* tag whether I am a local leader or not at this level */ - loc_leader[i].leader = (my_rank_in_list == array_of_all_subgroup_ranks[j].root_rank_in_comm); - break; - } - } - } - - /* should have found a subgroup */ - assert (my_sbgp > -1); - - for (j = 0 ; j < group_size ; ++j) { - list_n_connected[j] = array_of_all_subgroup_ranks[my_sbgp]. - rank_data[j].num_of_ranks_represented; - } - - /* now find all sbgps that the root of this sbgp belongs to - * previous to this "my_sbgp" */ - my_root = array_of_all_subgroup_ranks[my_sbgp].root_rank_in_comm; - - for (j = 0, knt = 0 ; j < my_sbgp ; ++j) { - if (array_of_all_subgroup_ranks[j].root_rank_in_comm == my_root) { - for (k = 1; k < array_of_all_subgroup_ranks[j].n_ranks; ++k) { - knt += array_of_all_subgroup_ranks[j].rank_data[k]. - num_of_ranks_represented; - } - - } - } - - /* and then I add one for the root itself */ - list_n_connected[0] = knt + 1; - - /* now cache this on the bcol module */ - pair->bcol_modules[0]->list_n_connected = list_n_connected; - - /* I should do one more round here and figure out my offset at this level - * the calculation is simple: Am I a local leader in this level? If so, then I keep the offset - * from the previous level. Else, I find out how "far away" the local leader is from me and set - * this as the new offset. - */ - /* do this after first level */ - if (i > 0) { - /* if I'm not the local leader */ - if( !loc_leader[i].leader) { - /* then I am not a local leader at this level */ - offset = array_of_all_subgroup_ranks[my_sbgp].index_of_first_element; - for (k = 0, knt = 0 ; k < array_of_all_subgroup_ranks[my_sbgp].n_ranks ; ++k) { - rank = list_of_ranks_in_all_subgroups[k+offset]; - if (rank == my_rank_in_list) { - break; - } - - knt += list_n_connected[k]; - } - loc_leader[i].offset = loc_leader[i-1].offset - knt; - } else { - /* if I am the local leader, then keep the same offset */ - loc_leader[i].offset = loc_leader[i-1].offset; - } - } - - pair->bcol_modules[0]->hier_scather_offset = loc_leader[i].offset; - - /*setup the tree */ - pair->bcol_modules[0]->k_nomial_tree(pair->bcol_modules[0]); - } - - /* see if I am in the last subgroup, if I am, - * then I am a root for the bcast operation - */ - offset = array_of_all_subgroup_ranks[n_hier - 1].index_of_first_element; - for( i = 0; i < array_of_all_subgroup_ranks[n_hier - 1].n_ranks; i++){ - rank = list_of_ranks_in_all_subgroups[i + offset]; - if( rank == my_rank_in_list ){ - loc_leader[n_hier - 1].offset = 0; - loc_leader[n_hier - 1].leader = true; - } - } - - /* set the last offset to 0 and set the leader according to your top level position */ - loc_leader[n_hier].offset = 0; - if(loc_leader[n_hier - 1].leader){ - loc_leader[n_hier].leader = true; - } else { - loc_leader[n_hier].leader = false; - } - - /* what other goodies do I want to cache on the ml-module? */ - topo->hier_layout_info = loc_leader; - - return OMPI_SUCCESS; -} - -static int ml_setup_full_tree_data(mca_coll_ml_topology_t *topo, - ompi_communicator_t *comm, - int my_highest_group_index, int *map_to_comm_ranks, - int *num_total_subgroups, sub_group_params_t **array_of_all_subgroup_ranks, - int **list_of_ranks_in_all_subgroups) -{ - - int ret = OMPI_SUCCESS; - int i, in_buf, root, my_rank,sum; - int in_num_total_subgroups = *num_total_subgroups; - int *scratch_space = NULL; - - /* figure out who holds all the sub-group information - only those - * ranks in the top level know this data at this point */ - my_rank = ompi_comm_rank(comm); - if( (my_highest_group_index == topo->global_highest_hier_group_index ) - && - ( my_rank == - topo->component_pairs[topo->n_levels-1].subgroup_module->group_list[0]) - ) { - in_buf=my_rank; - } else { - /* since this will be a sum allreduce - contributing 0 will not - * change the value */ - in_buf=0; - } - ret = comm_allreduce_pml(&in_buf, &root, 1, MPI_INT, - my_rank, MPI_SUM, - ompi_comm_size(comm), map_to_comm_ranks, - comm); - if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) { - ML_VERBOSE(10, ("comm_allreduce_pml failed. root reduction")); - goto exit_ERROR; - } - - /* broadcast the number of groups */ - ret=comm_bcast_pml(num_total_subgroups, root, 1, - MPI_INT, my_rank, ompi_comm_size(comm), - map_to_comm_ranks,comm); - if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) { - ML_VERBOSE(10, ("comm_bcast_pml failed. num_total_subgroups bcast")); - goto exit_ERROR; - } - - scratch_space=(int *)malloc(4*sizeof(int)*(*num_total_subgroups)); - if (OPAL_UNLIKELY(NULL == scratch_space)) { - ML_VERBOSE(10, ("Cannot allocate memory scratch_space.")); - ret = OMPI_ERR_OUT_OF_RESOURCE; - goto exit_ERROR; - } - - if( my_rank == root ) { - for(i=0 ; i < (*num_total_subgroups) ; i++ ) { - scratch_space[4*i]=(*array_of_all_subgroup_ranks)[i].root_rank_in_comm; - scratch_space[4*i+1]=(*array_of_all_subgroup_ranks)[i].n_ranks; - scratch_space[4*i+2]=(*array_of_all_subgroup_ranks)[i].index_of_first_element; - scratch_space[4*i+3]=(*array_of_all_subgroup_ranks)[i].level_in_hierarchy; - } - } - ret=comm_bcast_pml(scratch_space, root, 4*(*num_total_subgroups), - MPI_INT, my_rank, ompi_comm_size(comm), - map_to_comm_ranks, comm); - if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) { - ML_VERBOSE(10, ("comm_allreduce_pml failed. scratch_space bcast")); - goto exit_ERROR; - } - if( my_rank != root ) { - if( in_num_total_subgroups != (*num_total_subgroups) ) { - /* free old array_of_all_subgroup_ranks array - need to fill it - * with the global data - assume that if the array size is the - * same, all data is correct, and in the same order */ - free((*array_of_all_subgroup_ranks)); - (*array_of_all_subgroup_ranks)=(sub_group_params_t *) - malloc(sizeof(sub_group_params_t)*(*num_total_subgroups)); - if (OPAL_UNLIKELY(NULL == (*array_of_all_subgroup_ranks))) { - ML_VERBOSE(10, ("Cannot allocate memory array_of_all_subgroup_ranks.")); - ret = OMPI_ERR_OUT_OF_RESOURCE; - goto exit_ERROR; - } - for(i=0 ; i < (*num_total_subgroups) ; i++ ) { - (*array_of_all_subgroup_ranks)[i].root_rank_in_comm=scratch_space[4*i]; - (*array_of_all_subgroup_ranks)[i].n_ranks=scratch_space[4*i+1]; - (*array_of_all_subgroup_ranks)[i].index_of_first_element=scratch_space[4*i+2]; - (*array_of_all_subgroup_ranks)[i].level_in_hierarchy=scratch_space[4*i+3]; - } - } - } - /* figure out how many entries in all the subgroups - ranks that apear - * in k subgroups appear k times in the list */ - sum=0; - for(i=0 ; i < (*num_total_subgroups) ; i++ ) { - sum+=(*array_of_all_subgroup_ranks)[i].n_ranks; - } - if( in_num_total_subgroups != (*num_total_subgroups) && sum > 0 ) { - (*list_of_ranks_in_all_subgroups)=(int *) - realloc((*list_of_ranks_in_all_subgroups),sizeof(int)*sum); - if (OPAL_UNLIKELY(NULL == (*list_of_ranks_in_all_subgroups))) { - ML_VERBOSE(10, ("Cannot allocate memory *list_of_ranks_in_all_subgroups.")); - ret = OMPI_ERR_OUT_OF_RESOURCE; - goto exit_ERROR; - } - } - ret = comm_bcast_pml(*list_of_ranks_in_all_subgroups, root, sum, - MPI_INT, my_rank, ompi_comm_size(comm), - map_to_comm_ranks, comm); - if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) { - ML_VERBOSE(10, ("Bcast failed for list_of_ranks_in_all_subgroups ")); - goto exit_ERROR; - } - - /* - * The data that is needed for a given rooted operation is: - * - subgroup,rank information for the source of the data. - * That is, which rank in the subgroup will recieve the - * data and distribute to the rest of the ranks. - * - the ranks that this data will be sent to. This is - * described by the ranks in the current subgroups, and - * the subroups for which each rank is a proxy for, - * recursively in the communication tree. - * - * The assumption is that data will be delived to each subgroup - * in an order, that is, all the data destined to subgroup rank 0 - * will appear 1st, then that for rank 1, etc. This implies that - * the data destined to rank 0, for example, will include the - * data for rank 0, as well as all the ranks that appear following - * it in the tree - in order. - * - * Proxies: A rank may be a proxy for more than a single subgroup. - * When a rank is proxy for more than a single subgroup, we - * maintain a fixed order of subgroups for which this is a - * proxy, with an assumption that the data for the first subgroup - * appears first in the list, then that for the second, etc. - * Since the data for the proxy (which is a member of this subgroup) - * appears only once in the data list, the assumption is that the - * proxy will be the root for this operation, and it is the first - * set of data in the data list. This means, that the data offset - * for the second ranks in each subgroup will include all the data - * for the previous subgroups, recursively. This lets us maintain - * the simple addressing scheme of contigous data per rank in - * the subcommunicator. - * - * The information needed for each rank in the subgroup are the - * group indices for which it is a proxy. - */ - /* - * fill in the vertecies in the hierarchichal communications graph - */ - - /* figure out how detailed connection information, so that we can - * can figure out how the data needs to be ordered for sending it - * though the tree in various collective algorithms that have per-rank - * data associated with them. - */ - - /* this function does a depth first traversal of the tree data and - * builds rank data and ensures that hierarchy level 0 is in the - * correct order for collective algorithms with per-rank data. - */ - coll_ml_parse_topology (*array_of_all_subgroup_ranks, *num_total_subgroups, - *list_of_ranks_in_all_subgroups, ompi_comm_size (comm)); - - /* The list of ranks in all subgroups is the same as the old sort list. This is the same - * order needed for both scatter and gather. */ - topo->sort_list = (*list_of_ranks_in_all_subgroups); - - /* return */ - exit_ERROR: - if (scratch_space) { - free(scratch_space); - } - - return ret; -} - -static int get_new_subgroup_data (int32_t *all_selected, int size_of_all_selected, - sub_group_params_t **sub_group_meta_data, - int *size_of_sub_group_meta_data, - int **list_of_ranks_in_all_subgroups, - int *size_of_list_of_ranks_in_all_subgroups, - int *num_ranks_in_list_of_ranks_in_all_subgroups, - int *num_total_subgroups, - int *map_to_comm_ranks, int level_in_hierarchy - ) { - - /* local data */ - int rc=OMPI_SUCCESS; - int rank_in_list,old_sg_size=(*num_total_subgroups); - int sg_index, array_id, offset, sg_id; - sub_group_params_t *dummy1 = NULL; - int32_t **dummy2 = NULL; - int32_t *dummy3 = NULL; - int32_t **temp = NULL; - int knt1 = 0, - knt2 = 0, - knt3 = 0; - - /* loop over all elements in the array of ranks selected, looking for - * newly selected ranks - these form the new subgroups */ - for(rank_in_list = 0 ; rank_in_list < size_of_all_selected ; rank_in_list++ ) { - int sg_root, current_rank_in_comm; - /* get root's rank in the communicator */ - sg_root=all_selected[rank_in_list]; - - if( 0 == sg_root ) { - /* this rank not selected - go to the next rank */ - continue; - } - - if( sg_root < 0 ) { - sg_root=-sg_root-1; - } else { - sg_root-=1; - } - - current_rank_in_comm=map_to_comm_ranks[rank_in_list]; - - /* loop over existing groups, and see if this is a member of a new group - * or if this group has already been found. - */ - for (sg_index = old_sg_size, sg_id = -1 ; sg_index < (*num_total_subgroups) ; sg_index++) { - if ((*sub_group_meta_data)[sg_index].root_rank_in_comm == sg_root) { - /* add rank to the list */ - (*sub_group_meta_data)[sg_index].n_ranks++; - sg_id = sg_index; - break; - } - } - - if (-1 == sg_id) { - /* did not find existing sub-group, create new one */ - /* intialize new subgroup */ - PROVIDE_SUFFICIENT_MEMORY((*sub_group_meta_data), dummy1, - (*size_of_sub_group_meta_data), - sub_group_params_t, (*num_total_subgroups), 1, 5); - if (OPAL_UNLIKELY(NULL == (*sub_group_meta_data))) { - ML_VERBOSE(10, ("Cannot allocate memory for sub_group_meta_data.")); - rc = OMPI_ERR_OUT_OF_RESOURCE; - goto exit_ERROR; - } - /* do this for the temporary memory slots */ - PROVIDE_SUFFICIENT_MEMORY(temp, dummy2, - knt1, int32_t *, knt2, 1, 5); - if (OPAL_UNLIKELY(NULL == temp)) { - ML_VERBOSE(10, ("Cannot allocate memory for temporary storage")); - rc = OMPI_ERR_OUT_OF_RESOURCE; - goto exit_ERROR; - } - (*sub_group_meta_data)[(*num_total_subgroups)].root_rank_in_comm = sg_root; - (*sub_group_meta_data)[(*num_total_subgroups)].n_ranks = 1; - - /* no need for this here - use a temporary ptr */ - temp[knt2]= - (int *)calloc(size_of_all_selected, sizeof(int)); - if (OPAL_UNLIKELY(NULL == temp[knt2] ) ){ - ML_VERBOSE(10, ("Cannot allocate memory for sub_group_meta_data.")); - rc = OMPI_ERR_OUT_OF_RESOURCE; - goto exit_ERROR; - } - sg_id = (*num_total_subgroups)++; - knt3 = ++knt2; - } else { - knt3 = sg_id - old_sg_size + 1; - } - - array_id = (*sub_group_meta_data)[sg_id].n_ranks-1; - temp[knt3-1][array_id] = current_rank_in_comm; - } - - /* linearize the data - one rank will ship this to all the other - * ranks the communicator - */ - /* make sure there is enough memory to hold the list */ - PROVIDE_SUFFICIENT_MEMORY((*list_of_ranks_in_all_subgroups),dummy3, - (*size_of_list_of_ranks_in_all_subgroups), - int, (*num_ranks_in_list_of_ranks_in_all_subgroups), - size_of_all_selected,size_of_all_selected); - if (OPAL_UNLIKELY(NULL == (*list_of_ranks_in_all_subgroups))) { - ML_VERBOSE(10, ("Cannot allocate memory for list_of_ranks_in_all_subgroups.")); - rc = OMPI_ERR_OUT_OF_RESOURCE; - goto exit_ERROR; - } - - /* loop over new subgroups */ - for( sg_id=old_sg_size ; sg_id < (*num_total_subgroups) ; sg_id++ ) { - offset=(*num_ranks_in_list_of_ranks_in_all_subgroups); - - (*sub_group_meta_data)[sg_id].index_of_first_element=offset; - - for( array_id=0 ; array_id < (*sub_group_meta_data)[sg_id].n_ranks ; - array_id++ ) { - (*list_of_ranks_in_all_subgroups)[offset+array_id]= - temp[sg_id-old_sg_size][array_id]; - } - (*num_ranks_in_list_of_ranks_in_all_subgroups)+= - (*sub_group_meta_data)[sg_id].n_ranks; - (*sub_group_meta_data)[sg_id].level_in_hierarchy=level_in_hierarchy; - /* this causes problems on XT5 starting at 6144 cores */ - free(temp[sg_id-old_sg_size]); - } - - /* clean up temporary storage */ - exit_ERROR: - if (NULL != temp) { - free(temp); - } - - /* return */ - return rc; -} - -static int topo_parse (sub_group_params_t *sub_group_meta_data, int index, int *dst, int *src, int *dst_offset) -{ - int src_offset = sub_group_meta_data[index].index_of_first_element; - int total_ranks_represented = 0, ranks_represented; - - if (0 == sub_group_meta_data[index].level_in_hierarchy) { - ML_VERBOSE(10, ("Copying data for index %d to %d. Ranks at this level: %d", index, *dst_offset, - sub_group_meta_data[index].n_ranks)); - - /* move level one subgroup data */ - memmove (dst + *dst_offset, src + src_offset, sizeof (int) * sub_group_meta_data[index].n_ranks); - - /* update the offset of this subgroup since it may have been moved */ - sub_group_meta_data[index].index_of_first_element = *dst_offset; - *dst_offset += sub_group_meta_data[index].n_ranks; - } - - ML_VERBOSE(10, ("Subgroup %d has %d ranks. level = %d", index, sub_group_meta_data[index].n_ranks, - sub_group_meta_data[index].level_in_hierarchy)); - - /* fill in subgroup ranks */ - sub_group_meta_data[index].rank_data=(rank_properties_t *) - malloc(sizeof(rank_properties_t) * sub_group_meta_data[index].n_ranks); - if (OPAL_UNLIKELY(NULL == sub_group_meta_data[index].rank_data)) { - ML_VERBOSE(10, ("Cannot allocate memory for rank_data ")); - return OMPI_ERR_OUT_OF_RESOURCE; - } - - /* recurse on all subgroups */ - for (int j = 0 ; j < sub_group_meta_data[index].n_ranks ; ++j) { - int rank = src[j + src_offset]; - int next_level; - - /* determine if this rank is the root of the subgroup */ - if (rank == sub_group_meta_data[index].root_rank_in_comm) { - sub_group_meta_data[index].root_index = j; - } - - sub_group_meta_data[index].rank_data[j].leaf = true; - sub_group_meta_data[index].rank_data[j].rank = rank; - - if (sub_group_meta_data[index].level_in_hierarchy) { - ML_VERBOSE(10, ("Looking for subgroup containing %d as root", rank)); - - for (next_level = index - 1 ; next_level >= 0 ; --next_level) { - if (rank == sub_group_meta_data[next_level].root_rank_in_comm) { - ML_VERBOSE(10, ("Subgroup %d has root %d", next_level, rank)); - break; - } - } - - /* all ranks are represented in the lowest level. this subgroup is not at the lowest level - * so it must be a root at a lower level */ - assert (next_level >= 0); - - /* not a leaf node */ - sub_group_meta_data[index].rank_data[j].leaf = false; - ranks_represented = topo_parse (sub_group_meta_data, next_level, dst, src, dst_offset); - if (0 > ranks_represented) { - return ranks_represented; - } - sub_group_meta_data[index].rank_data[j].num_of_ranks_represented = ranks_represented; - - total_ranks_represented += ranks_represented; - } else { - /* leaf node */ - sub_group_meta_data[index].rank_data[j].leaf = true; - sub_group_meta_data[index].rank_data[j].num_of_ranks_represented = 1; - - total_ranks_represented++; - } - - ML_VERBOSE(10, ("Group %d, level %d, index %d, rank %d represents %d ranks", index, - sub_group_meta_data[index].level_in_hierarchy, j, rank, - sub_group_meta_data[index].rank_data[j].num_of_ranks_represented)); - } - - return total_ranks_represented; -} - -/* put level one in leaf order */ -static int coll_ml_parse_topology (sub_group_params_t *sub_group_meta_data, size_t sub_group_count, - int *list_of_ranks_in_all_subgroups, int level_one_size) -{ - int *tmp_data; - int offset, rc; - - tmp_data = calloc (level_one_size, sizeof (int)); - if (NULL == tmp_data) { - return OMPI_ERR_OUT_OF_RESOURCE; - } - - /* do a DFS parse of the topology and ensure that level 1 is in the correct scatter/gather order */ - offset = 0; - rc = topo_parse (sub_group_meta_data, sub_group_count - 1, tmp_data, list_of_ranks_in_all_subgroups, &offset); - if (0 > rc) { - free (tmp_data); - return rc; - } - - /* all ranks in level one should be represented in the re-order buffer */ - assert (offset == level_one_size); - - /* copy re-ordered level 1 (0) */ - if (0 != offset) { - /* copy new level one data back into the list of all subgroups */ - memmove (list_of_ranks_in_all_subgroups, tmp_data, sizeof (int) * offset); - } - - free (tmp_data); - - return OMPI_SUCCESS; -} - -static int append_new_network_context(hierarchy_pairs *pair) -{ - int i; - int rc; - mca_coll_ml_lmngr_t *memory_manager = &mca_coll_ml_component.memory_manager; - bcol_base_network_context_t *nc = NULL; - - for (i = 0; i < pair->num_bcol_modules; i++) { - nc = pair->bcol_modules[i]->network_context; - if (NULL != nc) { - rc = mca_coll_ml_lmngr_append_nc(memory_manager, nc); - if (OMPI_SUCCESS != rc) { - return OMPI_ERROR; - } - /* caching the network context id on bcol */ - pair->bcol_modules[i]->context_index = nc->context_id; - } - } - - return OMPI_SUCCESS; -} - -static int ml_module_set_small_msg_thresholds(mca_coll_ml_module_t *ml_module) -{ - const mca_coll_ml_topology_t *topo_info; - mca_bcol_base_module_t *bcol_module; - hierarchy_pairs *pair; - - int i, j, rc, hier, *ranks_in_comm, n_hier, tp, - comm_size = ompi_comm_size(ml_module->comm); - - for (tp = 0; tp < COLL_ML_TOPO_MAX; ++tp) { - topo_info = &ml_module->topo_list[tp]; - if (COLL_ML_TOPO_DISABLED == topo_info->status) { - /* Skip the topology */ - continue; - } - - n_hier = topo_info->n_levels; - for (hier = 0; hier < n_hier; ++hier) { - pair = &topo_info->component_pairs[hier]; - - for (i = 0; i < pair->num_bcol_modules; ++i) { - bcol_module = pair->bcol_modules[i]; - - if (NULL != bcol_module->set_small_msg_thresholds) { - bcol_module->set_small_msg_thresholds(bcol_module); - } - - for (j = 0; j < BCOL_NUM_OF_FUNCTIONS; ++j) { - if (ml_module->small_message_thresholds[j] > - bcol_module->small_message_thresholds[j]) { - ml_module->small_message_thresholds[j] = - bcol_module->small_message_thresholds[j]; - } - } - } - - } - } - - ranks_in_comm = (int *) malloc(comm_size * sizeof(int)); - if (OPAL_UNLIKELY(NULL == ranks_in_comm)) { - ML_ERROR(("Memory allocation failed.")); - return OMPI_ERROR; - } - - for (i = 0; i < comm_size; ++i) { - ranks_in_comm[i] = i; - } - - rc = comm_allreduce_pml(ml_module->small_message_thresholds, - ml_module->small_message_thresholds, - BCOL_NUM_OF_FUNCTIONS, MPI_INT, - ompi_comm_rank(ml_module->comm), MPI_MIN, - comm_size, ranks_in_comm, ml_module->comm); - - if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) { - ML_ERROR(("comm_allreduce_pml failed.")); - return OMPI_ERROR; - } - - free(ranks_in_comm); - - return OMPI_SUCCESS; -} - -static int mca_coll_ml_read_allbcols_settings(mca_coll_ml_module_t *ml_module, - int n_hierarchies) -{ - int i, j, - ret = OMPI_SUCCESS; - int *ranks_map = NULL, - *bcols_in_use = NULL, - *bcols_in_use_all_ranks = NULL; - bool use_user_bufs, limit_size_user_bufs; - ssize_t length_ml_payload; - int64_t frag_size; - const mca_bcol_base_component_2_0_0_t *bcol_component = NULL; - mca_base_component_list_item_t *bcol_cli = NULL; - int bcol_index; - - /* If this assert fails, it means that you changed initialization - * order and the date offset , that is critical for this section of code, - * have not been initilized. - * DO NOT REMOVE THIS ASSERT !!! - */ - assert(ml_module->data_offset >= 0); - - /* need to figure out which bcol's are participating - * in the hierarchy across the communicator, so that we can set - * appropriate segmentation parameters. - */ - bcols_in_use = (int *) calloc(2 * n_hierarchies, sizeof(int)); - if (OPAL_UNLIKELY(NULL == bcols_in_use)) { - ML_VERBOSE(10, ("Cannot allocate memory for bcols_in_use.")); - ret = OMPI_ERR_OUT_OF_RESOURCE; - goto exit_ERROR; - } - /* setup pointers to arrays that will hold bcol parameters. Since - * given bols are not instantiated in all processes, need to get this - * information from those ranks that have instantiated these - * parameters - */ - bcols_in_use_all_ranks = bcols_in_use+n_hierarchies; - - /* get list of bcols that I am using */ - for (j = 0; j < COLL_ML_TOPO_MAX; j++) { - mca_coll_ml_topology_t *topo_info = &ml_module->topo_list[j]; - if (COLL_ML_TOPO_DISABLED == topo_info->status) { - /* skip the topology */ - continue; - } - - for(i = 0; i < topo_info->n_levels; i++ ) { - int ind; - ind = topo_info->component_pairs[i].bcol_index; - bcols_in_use[ind] = 1; - } - } - - /* set one to one mapping */ - ranks_map = (int *) malloc(sizeof(int) * ompi_comm_size(ml_module->comm)); - if (NULL == ranks_map) { - ret = OMPI_ERR_OUT_OF_RESOURCE; - goto exit_ERROR; - } - for (i = 0; i < ompi_comm_size(ml_module->comm); i++) { - ranks_map[i] = i; - } - - /* reduce over all the ranks to figure out which bcols are - * participating at this level - */ - ret = comm_allreduce_pml(bcols_in_use, bcols_in_use_all_ranks, - n_hierarchies, MPI_INT, ompi_comm_rank(ml_module->comm), - MPI_MAX, ompi_comm_size(ml_module->comm), - ranks_map, ml_module->comm); - if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) { - ML_VERBOSE(10, ("comm_allreduce_pml failed. bcols_in_use reduction")); - goto exit_ERROR; - } - - /* - * figure out fragmenation parameters - */ - - /* size of ml buffer */ - length_ml_payload = mca_coll_ml_component.payload_buffer_size - ml_module->data_offset; - - /* figure out if data will be segmented for pipelining - - * for non-contigous data will just use a fragment the size - * of the ml payload buffer */ - - /* check to see if any bcols impose a limit */ - limit_size_user_bufs = false; - use_user_bufs = true; - frag_size = length_ml_payload; - bcol_index = 0; - - OPAL_LIST_FOREACH(bcol_cli, &mca_bcol_base_components_in_use, mca_base_component_list_item_t) { - /* check to see if this bcol is being used */ - if (!bcols_in_use_all_ranks[bcol_index++]) { - /* not in use */ - continue; - } - - bcol_component = (mca_bcol_base_component_2_0_0_t *) bcol_cli->cli_component; - - /* check to see if user buffers can be used */ - if (!bcol_component->can_use_user_buffers) { - /* need to use library buffers, so all will do this */ - use_user_bufs = false; - } - - /* figure out fragement size */ - if (bcol_component->max_frag_size != FRAG_SIZE_NO_LIMIT ){ - /* user buffers need to be limited in size */ - limit_size_user_bufs = true; - - if (0 == frag_size) { - /* nothing set yet */ - frag_size = bcol_component->max_frag_size; - } else if (frag_size < bcol_component->max_frag_size) { - /* stricter constraint on fragment size */ - frag_size = bcol_component->max_frag_size; - } - } - } - - if (!use_user_bufs || limit_size_user_bufs) { - /* we need to limit the user buffer size or use library buffers */ - ml_module->fragment_size = frag_size; - } else { - /* entire message may be processed in single chunk */ - ml_module->fragment_size = FRAG_SIZE_NO_LIMIT; - } - - /* for non-contigous data - just use the ML buffers */ - ml_module->ml_fragment_size = length_ml_payload; - - /* set whether we can use user buffers */ - ml_module->use_user_buffers = use_user_bufs; - - ML_VERBOSE(10, ("Seting payload size to %d %d [%d %d]", - ml_module->ml_fragment_size, length_ml_payload, - mca_coll_ml_component.payload_buffer_size, - ml_module->data_offset)); - - exit_ERROR: - if (NULL != ranks_map) { - free(ranks_map); - } - if (NULL != bcols_in_use) { - free(bcols_in_use); - } - - return ret; -} - -static int ml_discover_hierarchy(mca_coll_ml_module_t *ml_module) -{ - ompi_proc_t *my_proc = NULL; - - int n_hierarchies = 0, - i = 0, ret = OMPI_SUCCESS; - - int size_bcol_list, size_sbgp_list; - - size_bcol_list = opal_list_get_size(&mca_bcol_base_components_in_use); - size_sbgp_list = opal_list_get_size(&mca_sbgp_base_components_in_use); - - if ((size_bcol_list != size_sbgp_list) || size_sbgp_list < 1 || size_bcol_list < 1) { - ML_ERROR(("Error: (size of mca_bcol_base_components_in_use = %d)" - " != (size of mca_sbgp_base_components_in_use = %d) or zero.", - size_bcol_list, size_sbgp_list)); - return OMPI_ERROR; - } - - n_hierarchies = size_sbgp_list; - - my_proc = ompi_proc_local(); - /* create the converter, for current implementation we - support homogenius comunicators only */ - ml_module->reference_convertor = - opal_convertor_create(my_proc->super.proc_arch, 0); - - if (OPAL_UNLIKELY(NULL == ml_module->reference_convertor)) { - return OMPI_ERROR; - } - - /* Do loop over all supported hiearchies. - To Do. We would like to have mca parameter that will allow control list - of topolgies that user would like use. Right now we will run - */ - for (i = 0; i < COLL_ML_TOPO_MAX; i++) { - if (COLL_ML_TOPO_ENABLED == ml_module->topo_list[i].status) { - ret = mca_coll_ml_component.topo_discovery_fn[i](ml_module, n_hierarchies); - if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) { - return ret; - } - } - } - - /* Local query for bcol header size */ - ret = calculate_buffer_header_size(ml_module); - if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) { - return ret; - } - - /* Get BCOL tuning, like support for zero copy, fragment size, and etc. - * This query involves global synchronization over all processes */ - ret = mca_coll_ml_read_allbcols_settings(ml_module, n_hierarchies); - if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) { - return ret; - } - /* Here is the safe point to call ml_module_memory_initialization , please - be very careful,if you decide to move this arround.*/ - ret = ml_module_memory_initialization(ml_module); - if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) { - /* make sure to release just allocated memory */ - mca_coll_ml_free_block(ml_module->payload_block); - return ret; - } - - ret = ml_module_set_small_msg_thresholds(ml_module); - if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) { - /* make sure to release just allocated memory */ - mca_coll_ml_free_block(ml_module->payload_block); - return ret; - } - - { - /* Syncronization barrier to make sure that all sides finsihed - * to register the memory */ - int ret, i; - int *comm_ranks = NULL; - - comm_ranks = (int *)calloc(ompi_comm_size(ml_module->comm), sizeof(int)); - if (OPAL_UNLIKELY(NULL == comm_ranks)) { - ML_VERBOSE(10, ("Cannot allocate memory.")); - return OMPI_ERR_OUT_OF_RESOURCE; - } - - for (i = 0; i < ompi_comm_size(ml_module->comm); i++) { - comm_ranks[i] = i; - } - - ret = comm_allreduce_pml(&ret, &i, - 1, MPI_INT, ompi_comm_rank(ml_module->comm), - MPI_MIN, ompi_comm_size(ml_module->comm), comm_ranks, - ml_module->comm); - - free(comm_ranks); - - if (OMPI_SUCCESS != ret) { - ML_ERROR(("comm_allreduce - failed to collect max_comm data")); - return ret; - } - /* Barrier done */ - } - - return ret; -} - -static int mca_coll_ml_tree_hierarchy_discovery(mca_coll_ml_module_t *ml_module, - mca_coll_ml_topology_t *topo, int n_hierarchies, - const char *exclude_sbgp_name, const char *include_sbgp_name) -{ - /* local variables */ - char *ptr_output = NULL; - sbgp_base_component_keyval_t *sbgp_cli = NULL; - mca_base_component_list_item_t *bcol_cli = NULL; - hierarchy_pairs *pair = NULL; - - mca_sbgp_base_module_t *module = NULL; - ompi_proc_t **copy_procs = NULL, - *my_proc = NULL; - - const mca_sbgp_base_component_2_0_0_t *sbgp_component = NULL; - - - int i_hier = 0, n_hier = 0, ll_p1, bcol_index = 0, - n_procs_in = 0, group_index = 0, n_remain = 0, - i, j, ret = OMPI_SUCCESS, my_rank_in_list = 0, - n_procs_selected = 0, original_group_size = 0, i_am_done = 0, - local_leader, my_rank_in_subgroup, my_rank_in_remaining_list = 0, - my_rank_in_comm; - - int32_t my_lowest_group_index = -1, my_highest_group_index = -1; - - int *map_to_comm_ranks = NULL, *bcols_in_use = NULL; - - int32_t *all_selected = NULL, - *index_proc_selected = NULL; - - short all_reduce_buffer2_in[2]; - short all_reduce_buffer2_out[2]; - sub_group_params_t *array_of_all_subgroup_ranks=NULL; - /* this pointer should probably be an int32_t and not an int type */ - int32_t *list_of_ranks_in_all_subgroups=NULL; - int num_ranks_in_all_subgroups=0,num_total_subgroups=0; - int size_of_array_of_all_subgroup_ranks=0; - int size_of_list_of_ranks_in_all_subgroups=0; - int32_t in_allgather_value; - - if (NULL != exclude_sbgp_name && NULL != include_sbgp_name) { - ret = OMPI_ERROR; - goto exit_ERROR; - } - - ML_VERBOSE(10,("include %s exclude %s size %d", include_sbgp_name, exclude_sbgp_name, n_hierarchies)); - - /* allocates scratch space */ - all_selected = (int32_t *) calloc(ompi_comm_size(ml_module->comm), sizeof(int32_t)); - if (OPAL_UNLIKELY(NULL == all_selected)) { - ML_VERBOSE(10, ("Cannot allocate memory.")); - ret = OMPI_ERR_OUT_OF_RESOURCE; - goto exit_ERROR; - } - - map_to_comm_ranks = (int *) calloc(ompi_comm_size(ml_module->comm), sizeof(int)); - if (OPAL_UNLIKELY(NULL == map_to_comm_ranks)) { - ML_VERBOSE(10, ("Cannot allocate memory.")); - ret = OMPI_ERR_OUT_OF_RESOURCE; - goto exit_ERROR; - } - - /* - ** obtain list of procs - */ - /* create private copy for manipulation */ - copy_procs = (ompi_proc_t **) calloc(ompi_comm_size(ml_module->comm), - sizeof(ompi_proc_t *)); - if (OPAL_UNLIKELY(NULL == copy_procs)) { - ML_VERBOSE(10, ("Cannot allocate memory.")); - ret = OMPI_ERR_OUT_OF_RESOURCE; - goto exit_ERROR; - } - - for (i = 0; i < ompi_comm_size(ml_module->comm); i++) { - copy_procs[i] = ompi_comm_peer_lookup (ml_module->comm, i); - map_to_comm_ranks[i] = i; - } - - my_rank_in_comm = ompi_comm_rank (ml_module->comm); - n_procs_in = ompi_comm_size(ml_module->comm); - original_group_size = n_procs_in; - - /* setup information for all-reduce over out of band */ - index_proc_selected = (int32_t *) malloc(sizeof(int32_t) * n_procs_in); - if (OPAL_UNLIKELY(NULL == index_proc_selected)) { - ML_VERBOSE(10, ("Cannot allocate memory.")); - ret = OMPI_ERR_OUT_OF_RESOURCE; - goto exit_ERROR; - } - - /* get my proc pointer - used to identify myself in the list */ - my_proc = ompi_proc_local(); - my_rank_in_list = ompi_comm_rank(ml_module->comm); - - topo->component_pairs = (hierarchy_pairs *) calloc(n_hierarchies, sizeof(hierarchy_pairs)); - if (OPAL_UNLIKELY(NULL == topo->component_pairs)) { - ML_VERBOSE(10, ("Cannot allocate memory.")); - ret = OMPI_ERR_OUT_OF_RESOURCE; - goto exit_ERROR; - } - - n_hier = 0; - /* - * Algorithm for subgrouping: - * 1) Start with all the ranks in the communicator - * 2) iterate over all (exclusive) hierarchy selection rules - * A) Apply subgrouping function to the remaining set of ranks - * - After the call to subgrouping subgroup_module->group_list - * has the index of ranks selected, from the list or ranks - * passed in. - * - map_to_comm_ranks maintains the mapping of the remaining - * ranks, to their rank in the communicator - * B) Each rank initializes a scratch array the size of the - * remaining ranks to 0, and then fills in the entry that - * corresponds to itself only with the value -/+R. If the - * rank is the local leader for the subgroup, the value of -R - * is entered, other wise R is entered. R is the root of the - * selected subgroup plus 1, so that for rank 0, +R has a - * different value than -R. - * C) The vector is then reduced, with the results going to all - * ranks, over the list of remaining ranks. As a result, - * the ranks of a given subgroup will show up with the value R, - * for all but the local-leader, which will have the value of -R. - * This is also used for error checking. - * D) subgroup_module->group_list is changed to contain the ranks - * of each member of the group within the communicator. - * E) Local rank with the group is determined. - * F) the list or remaining ranks is compacted, removing all selected - * ranks that are not the local-leader of the group. - * map_to_comm_ranks is also compacted. - * 3) This is terminated once all ranks are selected. - */ - - /* loop over hierarchies */ - sbgp_cli = (sbgp_base_component_keyval_t *) opal_list_get_first(&mca_sbgp_base_components_in_use); - bcol_cli = (mca_base_component_list_item_t *) opal_list_get_first(&mca_bcol_base_components_in_use); - - ML_VERBOSE(10, ("Loop over hierarchies.")); - - i_hier = 0; - while ((opal_list_item_t *) sbgp_cli != opal_list_get_end(&mca_sbgp_base_components_in_use)){ - /* number of processes selected with this sbgp on all ranks */ - int global_n_procs_selected; - - /* silence clang warnings */ - assert (NULL != bcol_cli && NULL != sbgp_cli); - - /* - ** obtain the list of ranks in the current level - */ - - sbgp_component = (mca_sbgp_base_component_2_0_0_t *) sbgp_cli->component.cli_component; - - /* Skip excluded levels */ - if (NULL != exclude_sbgp_name) { - - ML_VERBOSE(10,("EXCLUDE compare %s to %s", include_sbgp_name, - sbgp_component->sbgp_version.mca_component_name)); - if(0 == strcmp(exclude_sbgp_name, - sbgp_component->sbgp_version.mca_component_name)) { - /* take the next element */ - sbgp_cli = (sbgp_base_component_keyval_t *) opal_list_get_next((opal_list_item_t *) sbgp_cli); - bcol_cli = (mca_base_component_list_item_t *) opal_list_get_next((opal_list_item_t *) bcol_cli); - continue; - } - } - - if (NULL != include_sbgp_name) { - ML_VERBOSE(10,("INCLUDE compare %s to %s", include_sbgp_name, - sbgp_component->sbgp_version.mca_component_name)); - if(0 != strcmp(include_sbgp_name, - sbgp_component->sbgp_version.mca_component_name)) { - /* take the next element */ - sbgp_cli = (sbgp_base_component_keyval_t *) opal_list_get_next((opal_list_item_t *) sbgp_cli); - bcol_cli = (mca_base_component_list_item_t *) opal_list_get_next((opal_list_item_t *) bcol_cli); - continue; - } - } - - ML_VERBOSE(10,("Passed include %s exclude %s", include_sbgp_name, exclude_sbgp_name)); - - /* discover subgroup */ - ML_VERBOSE(10, ("Discover subgroup: hier level - %d.", i_hier)); - module = sbgp_component->select_procs(copy_procs, n_procs_in, - ml_module->comm, - sbgp_cli->key_value, &ptr_output); - if (NULL == module) { - /* no module created */ - n_procs_selected = 0; - /* We must continue and participate in the allgather. - * It's not clear that one can enter this conditional - * during "normal" execution. We need to review - * all modules. - */ - - /* THE CODE SNIPPET COMMENTED OUT BELOW IS DANGEROUS CODE THAT - * COULD RESULT IN A HANG - THE "CONTINUE" STATEMENT MAY RESULT IN - * RANKS BYPASSING THE ALLGATHER IN NON-SYMMETRIC CASES - */ - - /* - sbgp_cli = (sbgp_base_component_keyval_t *) opal_list_get_next((opal_list_item_t *) sbgp_cli); - bcol_cli = (mca_base_component_list_item_t *) opal_list_get_next((opal_list_item_t *) bcol_cli); - continue; - */ - - /* Skipping subgroups of size one will cause these processes to be missed in list of level one - * indices. */ - } else if (NULL == module->group_list || (1 == module->group_size && i_hier)) { - /* bypass modules that have no group_list */ - n_procs_selected = 0; - OBJ_RELEASE(module); - module=NULL; - } else { - n_procs_selected = module->group_size; - } - - ML_VERBOSE(10, ("Hier level - %d; group size - %d", i_hier, n_procs_selected)); - - /* setup array indicating all procs that were selected */ - for (i = 0; i < n_procs_in; i++) { - index_proc_selected[i] = 0; - } - - /* figure out my rank in the subgroup */ - my_rank_in_subgroup=-1; - ll_p1=-1; - in_allgather_value = 0; - if (n_procs_selected) { - /* I need to contribute to the vector */ - for (group_index = 0; group_index < n_procs_selected; group_index++) { - /* set my rank within the group */ - if (map_to_comm_ranks[module->group_list[group_index]] == my_rank_in_comm) { - my_rank_in_subgroup=group_index; - module->my_index = group_index; - /* currently the indecies are still given in terms of - * the rank in the list of remaining ranks */ - my_rank_in_remaining_list=module->group_list[group_index]; - } - } - - if( -1 != my_rank_in_subgroup ) { - /* I am contributing to this subgroup */ - -#ifdef NEW_LEADER_SELECTION -#if 0 - int lleader_index; - /* Select the local leader */ - lleader_index = coll_ml_select_leader(ml_module,module, map_to_comm_ranks, - copy_procs,n_procs_selected); - - local_leader = map_to_comm_ranks[module->group_list[lleader_index]]; -#endif -#else - - /* local leader is rank within list or remaining ranks */ - local_leader = map_to_comm_ranks[module->group_list[0]]; - -#endif - ML_VERBOSE(10,("The local leader selected for hierarchy %d is rank %d ", - i_hier, local_leader)); - - ll_p1 = local_leader + 1; - if (local_leader == my_rank_in_comm) { - in_allgather_value = - index_proc_selected[my_rank_in_remaining_list] = -ll_p1; - } else { - in_allgather_value = - index_proc_selected[my_rank_in_remaining_list] = ll_p1; - } - } - } - - /* gather the information from all the other remaining ranks */ - ML_VERBOSE(10, ("Call for comm_allreduce_pml.")); - ret = comm_allgather_pml(&in_allgather_value, - all_selected, 1, MPI_INT, my_rank_in_list, - n_procs_in, map_to_comm_ranks ,ml_module->comm); - if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) { - ML_VERBOSE(10, ("comm_allreduce_pml failed.")); - goto exit_ERROR; - } - - /* do some sanity checks */ - if( -1 != my_rank_in_subgroup ) { - ret = check_global_view_of_subgroups(n_procs_selected, - n_procs_in, ll_p1, all_selected, module ); - if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) { - ML_VERBOSE(10, ("check_global_view_of_subgroups failed.")); - goto exit_ERROR; - } - } - - /* - ** change the list of procs stored on the module to ranks within - ** the communicator. - */ - - ML_VERBOSE(10, ("Change the list of procs; hier level - %d.", i_hier)); - for (group_index = 0; group_index < n_procs_selected; group_index++) { - module->group_list[group_index] = map_to_comm_ranks[module->group_list[group_index]]; - /* set my rank within the group */ - if (module->group_list[group_index] == ompi_comm_rank(ml_module->comm)) { - module->my_index = group_index; - } - } - - /* - * accumulate data on the new subgroups created - */ - /*XXX*/ - global_n_procs_selected = num_ranks_in_all_subgroups; - ret = get_new_subgroup_data(all_selected, n_procs_in, - &array_of_all_subgroup_ranks, - &size_of_array_of_all_subgroup_ranks, - &list_of_ranks_in_all_subgroups, - &size_of_list_of_ranks_in_all_subgroups, - &num_ranks_in_all_subgroups, - &num_total_subgroups, map_to_comm_ranks,i_hier); - - if( OMPI_SUCCESS != ret ) { - ML_VERBOSE(10, (" Error: get_new_subgroup_data returned %d ",ret)); - goto exit_ERROR; - } - - /* the global number of processes selected at this level is the difference - * in the number of procs in all subgroups between this level and the - * last */ - global_n_procs_selected = num_ranks_in_all_subgroups - global_n_procs_selected; - - /* am I done ? */ - i_am_done=0; - if ( (all_selected[my_rank_in_list] == ll_p1) && - /* if I was not a member of any group, still need to continue */ - n_procs_selected ){ - i_am_done = 1; - } - /* get my rank in the list */ - n_remain = 0; - my_rank_in_list = -1; - for (i = 0; i < n_procs_in; i++) { - if (all_selected[i] > 0 ) { - /* this proc will not be used in the next hierarchy */ - continue; - } - /* reset my_rank_in_list, n_procs_in */ - copy_procs[n_remain] = copy_procs[i]; - map_to_comm_ranks[n_remain] = map_to_comm_ranks[i]; - - if (my_proc == copy_procs[n_remain]){ - my_rank_in_list = n_remain; - } - - n_remain++; - } - - /* check to make sure we did not get a size 1 group if more than - * one rank are still remaning to be grouped */ - if ((1 == n_procs_selected) && n_remain > 1) { - OBJ_RELEASE(module); - n_procs_selected = 0; - } - - if( 0 < n_procs_selected ) { - /* increment the level counter */ - pair = &topo->component_pairs[n_hier]; - - /* add this to the list of sub-group/bcol pairs in use */ - pair->subgroup_module = module; - pair->bcol_component = (mca_bcol_base_component_t *) - ((mca_base_component_list_item_t *) bcol_cli)->cli_component; - - pair->bcol_index = bcol_index; - - /* create bcol modules */ - ML_VERBOSE(10, ("Create bcol modules.")); - pair->bcol_modules = pair->bcol_component->collm_comm_query(module, &pair->num_bcol_modules); - /* failed to create a new module */ - if (OPAL_UNLIKELY(NULL == pair->bcol_modules)) { - ML_VERBOSE(10, ("Failed to create new modules.")); - ret = OMPI_ERROR; - goto exit_ERROR; - } - - if (pair->bcol_component->need_ordering) { - topo->topo_ordering_info.num_bcols_need_ordering += pair->num_bcol_modules; - } - - /* Append new network contexts to our memory managment */ - ML_VERBOSE(10, ("Append new network contexts to our memory managment.")); - if (OPAL_UNLIKELY(OMPI_SUCCESS != append_new_network_context(pair))) { - ML_VERBOSE(10, ("Exit with error. - append new network context")); - ret = OMPI_ERROR; - goto exit_ERROR; - } - - for (i = 0; i < pair->num_bcol_modules; ++i) { - /* set the starting sequence number */ - pair->bcol_modules[i]->squence_number_offset = - mca_coll_ml_component.base_sequence_number; - - /* cache the sub-group size */ - pair->bcol_modules[i]->size_of_subgroup= - module->group_size; - - /* set the bcol id */ - pair->bcol_modules[i]->bcol_id = (int16_t) bcol_index; - - /* Set bcol mode bits */ - topo->all_bcols_mode &= (( mca_bcol_base_module_t *) pair->bcol_modules[i])->supported_mode; - } - - /* - * set largest power of 2 for this group - */ - module->n_levels_pow2 = ml_fls(module->group_size); - /* silence a clang warning */ - assert (module->n_levels_pow2 > 0 && module->n_levels_pow2 < 32); - module->pow_2 = 1 << module->n_levels_pow2; - - n_hier++; - - if (-1 == my_lowest_group_index) { - my_lowest_group_index = bcol_index; - } - - my_highest_group_index = bcol_index; - } - - /* if n_remain is 1, and the communicator size is not 1, and module - ** is not NULL, I am done - */ - if ((1 == n_remain) && (1 < original_group_size) && - (NULL != module)) { - i_am_done = 1; - } - - /* am I done ? */ - if (1 == i_am_done) { - /* nothing more to do */ - goto SelectionDone; - } - - /* take the next element */ - sbgp_cli = (sbgp_base_component_keyval_t *) opal_list_get_next((opal_list_item_t *) sbgp_cli); - bcol_cli = (mca_base_component_list_item_t *) opal_list_get_next((opal_list_item_t *) bcol_cli); - - /* if no processes were selected anywhere with this sbgp module don't bother - * incrementing the hierarchy index. this resolves issues where (for example) - * process binding is not enabled or supported. */ - if (global_n_procs_selected) { - /* The way initialization is currently written *all* ranks MUST appear - * in the first level (0) of the hierarchy. If any rank is not in the first - * level then the calculation of gather/scatter offsets will be wrong. - * NTH: DO NOT REMOVE this assert until this changes! */ - assert (i_hier || global_n_procs_selected == n_procs_in); - i_hier++; - } - - ++bcol_index; - - n_procs_in = n_remain; - } - - SelectionDone: - - if (topo->topo_ordering_info.num_bcols_need_ordering > 0) { - for (j = 0; j < n_hier; ++j) { - pair = &topo->component_pairs[j]; - if (pair->bcol_component->need_ordering) { - for (i = 0; i < pair->num_bcol_modules; ++i) { - pair->bcol_modules[i]->next_inorder = &topo->topo_ordering_info.next_inorder; - } - } - } - } - - /* If I was not done, it means that we skipped all subgroups and no hierarchy was build */ - if (0 == i_am_done) { - - if (NULL != include_sbgp_name || NULL != exclude_sbgp_name) { - /* User explicitly asked for specific type of topology, which generates empty group */ - opal_show_help("help-mpi-coll-ml.txt", - "empty-sub-group", true, - NULL != include_sbgp_name ? include_sbgp_name : exclude_sbgp_name); - ret = OMPI_ERROR; - goto exit_ERROR; - } - - ML_VERBOSE(10, ("Constructing empty hierarchy")); - ret = OMPI_SUCCESS; - goto exit_ERROR; - } - - topo->n_levels = n_hier; - - /* Find lowest and highest index of the groups in this communicator. - ** This will be needed in deciding where in the hierarchical collective - ** sequence of calls these particular groups belong. - ** It is done with one allreduce call to save allreduce overhead. - */ - all_reduce_buffer2_in[0] = (short)my_lowest_group_index; - all_reduce_buffer2_in[1] = (short)-my_highest_group_index; - /* restore map to ranks for the original communicator */ - for (i = 0; i < ompi_comm_size(ml_module->comm); i++) { - map_to_comm_ranks[i] = i; - } - - ret = comm_allreduce_pml(all_reduce_buffer2_in, all_reduce_buffer2_out, - 2, MPI_SHORT, ompi_comm_rank(ml_module->comm), - MPI_MIN, original_group_size, - map_to_comm_ranks, ml_module->comm); - if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) { - ML_VERBOSE(10, ("comm_allreduce_pml failed. all_reduce_buffer2_in reduction")); - goto exit_ERROR; - } - - topo->global_lowest_hier_group_index = all_reduce_buffer2_out[0]; - topo->global_highest_hier_group_index = -all_reduce_buffer2_out[1]; - - ML_VERBOSE(10, ("The lowest index and highest index was successfully found.")); - - ML_VERBOSE(10, ("ml_discover_hierarchy done, n_levels %d lowest_group_index %d highest_group_index %d," - " original_group_size %d my_lowest_group_index %d my_highest_group_index %d", - topo->n_levels, topo->global_lowest_hier_group_index, - topo->global_highest_hier_group_index, - original_group_size, - my_lowest_group_index, - my_highest_group_index)); - - /* - * setup detailed subgroup information - */ - ret = ml_setup_full_tree_data(topo, ml_module->comm, my_highest_group_index, - map_to_comm_ranks,&num_total_subgroups,&array_of_all_subgroup_ranks, - &list_of_ranks_in_all_subgroups); - - if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) { - ML_VERBOSE(10, ("comm_allreduce_pml failed: bcols_in_use reduction %d ",ret)); - goto exit_ERROR; - } - - /* cache the ML hierarchical description on the tree */ - topo->number_of_all_subgroups = num_total_subgroups; - topo->array_of_all_subgroups = array_of_all_subgroup_ranks; - - ret = ml_init_k_nomial_trees(topo, list_of_ranks_in_all_subgroups, ompi_comm_rank(ml_module->comm)); - if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) { - goto exit_ERROR; - } - - /* Set the route table if know-root type of algorithms is used */ - if (COLL_ML_STATIC_BCAST == mca_coll_ml_component.bcast_algorithm) { - ret = mca_coll_ml_fill_in_route_tab(topo, ml_module->comm); - if (OMPI_SUCCESS != ret) { - ML_ERROR(("mca_coll_ml_fill_in_route_tab returned an error.")); - goto exit_ERROR; - } - } - - /* - ** If all ranks are selected, there will be a single rank that remains - - ** the root of the last group. Check to make sure that all ranks are - ** selected, and if not, return an error. We can't handle the collectives - ** correctly with this module. - */ - - exit_ERROR: - - ML_VERBOSE(10, ("Discovery done")); - - /* free temp resources */ - if (NULL != all_selected) { - free(all_selected); - all_selected = NULL; - } - - if (NULL != copy_procs) { - free(copy_procs); - copy_procs = NULL; - } - - if (NULL != map_to_comm_ranks) { - free(map_to_comm_ranks); - map_to_comm_ranks = NULL; - } - - if (NULL != index_proc_selected) { - free(index_proc_selected); - index_proc_selected = NULL; - } - - if (NULL != bcols_in_use) { - free(bcols_in_use); - bcols_in_use = NULL; - } - - if (NULL != list_of_ranks_in_all_subgroups) { - free(list_of_ranks_in_all_subgroups); - list_of_ranks_in_all_subgroups = NULL; - } - - return ret; -} - -void mca_coll_ml_allreduce_matrix_init(mca_coll_ml_module_t *ml_module, - const mca_bcol_base_component_2_0_0_t *bcol_component) -{ - int op, dt, et; - - for (op = 0; op < OMPI_OP_NUM_OF_TYPES; ++op) { - for (dt = 0; dt < OMPI_DATATYPE_MAX_PREDEFINED; ++dt) { - for (et = 0; et < BCOL_NUM_OF_ELEM_TYPES; ++et) { - ml_module->allreduce_matrix[op][dt][et] = - bcol_component->coll_support(op, dt, et); - } - } - } -} - -int mca_coll_ml_fulltree_hierarchy_discovery(mca_coll_ml_module_t *ml_module, - int n_hierarchies) -{ - return mca_coll_ml_tree_hierarchy_discovery(ml_module, - &ml_module->topo_list[COLL_ML_HR_FULL], - n_hierarchies, NULL, NULL); -} - -int mca_coll_ml_allreduce_hierarchy_discovery(mca_coll_ml_module_t *ml_module, - int n_hierarchies) -{ - mca_base_component_list_item_t *bcol_cli; - const mca_bcol_base_component_2_0_0_t *bcol_component; - - sbgp_base_component_keyval_t *sbgp_cli; - const mca_sbgp_base_component_2_0_0_t *sbgp_component; - - sbgp_cli = (sbgp_base_component_keyval_t *) - opal_list_get_first(&mca_sbgp_base_components_in_use); - - OPAL_LIST_FOREACH(bcol_cli, &mca_bcol_base_components_in_use, mca_base_component_list_item_t) { - bcol_component = (mca_bcol_base_component_2_0_0_t *) bcol_cli->cli_component; - - /* silence false-positive clang warning */ - assert (NULL != sbgp_cli); - - if (NULL != bcol_component->coll_support_all_types && - !bcol_component->coll_support_all_types(BCOL_ALLREDUCE)) { - mca_base_component_list_item_t *bcol_cli_next; - const mca_bcol_base_component_2_0_0_t *bcol_component_next; - - bcol_cli_next = (mca_base_component_list_item_t *) - opal_list_get_next((opal_list_item_t *) bcol_cli); - - mca_coll_ml_component.need_allreduce_support = true; - mca_coll_ml_allreduce_matrix_init(ml_module, bcol_component); - - sbgp_component = (mca_sbgp_base_component_2_0_0_t *) - sbgp_cli->component.cli_component; - - ML_VERBOSE(10, ("Topology build: sbgp %s will be excluded.", - sbgp_component->sbgp_version.mca_component_name)); - - - /* If there isn't additional component supports all types => print warning */ - if (1 == opal_list_get_size(&mca_bcol_base_components_in_use) || - (opal_list_item_t *) bcol_cli_next == - opal_list_get_end(&mca_bcol_base_components_in_use)) { - opal_show_help("help-mpi-coll-ml.txt", - "allreduce-not-supported", true, - bcol_component->bcol_version.mca_component_name); - - } else { - bcol_component_next = (mca_bcol_base_component_2_0_0_t *) - bcol_cli_next->cli_component; - - if (NULL != bcol_component_next->coll_support_all_types && - !bcol_component_next->coll_support_all_types(BCOL_ALLREDUCE)) { - - opal_show_help("help-mpi-coll-ml.txt", - "allreduce-alt-nosupport", true, - bcol_component->bcol_version.mca_component_name); - - } - } - - return mca_coll_ml_tree_hierarchy_discovery(ml_module, - &ml_module->topo_list[COLL_ML_HR_ALLREDUCE], - n_hierarchies, sbgp_component->sbgp_version.mca_component_name, NULL); - } - - sbgp_cli = (sbgp_base_component_keyval_t *) opal_list_get_next((opal_list_item_t *) sbgp_cli); - } - - return OMPI_SUCCESS; -} - -int mca_coll_ml_fulltree_exclude_basesmsocket_hierarchy_discovery(mca_coll_ml_module_t *ml_module, - int n_hierarchies) -{ - return mca_coll_ml_tree_hierarchy_discovery(ml_module, - &ml_module->topo_list[COLL_ML_HR_NBS], - n_hierarchies, "basesmsocket", NULL); -} - -int mca_coll_ml_fulltree_ptp_only_hierarchy_discovery(mca_coll_ml_module_t *ml_module, - int n_hierarchies) -{ - return mca_coll_ml_tree_hierarchy_discovery(ml_module, - &ml_module->topo_list[COLL_ML_HR_SINGLE_PTP], - n_hierarchies, NULL, "p2p"); -} - -int mca_coll_ml_fulltree_iboffload_only_hierarchy_discovery(mca_coll_ml_module_t *ml_module, - int n_hierarchies) -{ - return mca_coll_ml_tree_hierarchy_discovery(ml_module, - &ml_module->topo_list[COLL_ML_HR_SINGLE_IBOFFLOAD], - n_hierarchies, NULL, "ibnet"); -} - -#define IS_REACHABLE 1 -#define IS_NOT_REACHABLE -1 - -static int mca_coll_ml_fill_in_route_tab(mca_coll_ml_topology_t *topo, ompi_communicator_t *comm) -{ - int i, rc, level, comm_size = 0, - my_rank = ompi_comm_rank(comm); - - int32_t **route_table = NULL; - int32_t *all_reachable_ranks = NULL; - - struct ompi_proc_t **sbgp_procs = NULL; - - mca_sbgp_base_module_t *sbgp_group = NULL; - comm_size = ompi_comm_size(comm); - - all_reachable_ranks = (int32_t *) malloc(comm_size * sizeof(int32_t)); - if (NULL == all_reachable_ranks) { - ML_VERBOSE(10, ("Cannot allocate memory.")); - rc = OMPI_ERR_OUT_OF_RESOURCE; - goto exit_ERROR; - } - - for (i = 0; i < comm_size; ++i) { - all_reachable_ranks[i] = IS_NOT_REACHABLE; - } - - route_table = (int32_t **) calloc(topo->n_levels, sizeof(int32_t *)); - if (NULL == route_table) { - ML_VERBOSE(10, ("Cannot allocate memory.")); - rc = OMPI_ERR_OUT_OF_RESOURCE; - goto exit_ERROR; - } - - topo->route_vector = (mca_bcol_base_route_info_t *) - calloc(comm_size, sizeof(mca_bcol_base_route_info_t)); - if (NULL == topo->route_vector) { - ML_VERBOSE(10, ("Cannot allocate memory.")); - rc = OMPI_ERR_OUT_OF_RESOURCE; - goto exit_ERROR; - } - - all_reachable_ranks[my_rank] = IS_REACHABLE; - - for (level = 0; level < topo->n_levels; ++level) { - sbgp_group = topo->component_pairs[level].subgroup_module; - - route_table[level] = (int32_t *) malloc(comm_size * sizeof(int32_t)); - if (NULL == route_table[level]) { - ML_VERBOSE(10, ("Cannot allocate memory.")); - rc = OMPI_ERR_OUT_OF_RESOURCE; - goto exit_ERROR; - } - - for (i = 0; i < comm_size; ++i) { - if (IS_NOT_REACHABLE != all_reachable_ranks[i]) { - all_reachable_ranks[i] = sbgp_group->my_index; - } - } - - rc = comm_allreduce_pml(all_reachable_ranks, - route_table[level], - comm_size, - MPI_INT, sbgp_group->my_index, - MPI_MAX, sbgp_group->group_size, - sbgp_group->group_list, - comm); - if (OMPI_SUCCESS != rc) { - ML_VERBOSE(10, ("comm_allreduce failed.")); - goto exit_ERROR; - } - - for (i = 0; i < comm_size; ++i) { - if (IS_NOT_REACHABLE != - route_table[level][i]) { - all_reachable_ranks[i] = IS_REACHABLE; - } - } - } - - assert(0 < level); - - /* If there are unreachable ranks => - reach them through leader of my upper layer */ - for (i = 0; i < comm_size; ++i) { - if (IS_NOT_REACHABLE == - route_table[level - 1][i]) { - route_table[level - 1][i] = 0; - } - } - - free(all_reachable_ranks); - - for (i = 0; i < comm_size; ++i) { - for (level = 0; level < topo->n_levels; ++level) { - if (IS_NOT_REACHABLE != route_table[level][i]) { - topo->route_vector[i].level = level; - topo->route_vector[i].rank = route_table[level][i]; - break; - } - } - } - -#if OPAL_ENABLE_DEBUG -#define COLL_ML_ROUTE_BUFF_SIZE (1024*1024) - /* Only bother creating the string if we're actually going to - print it out (i.e., if the verbose level is >= 10) */ - if (mca_coll_ml_component.verbose >= 10) { - int ii, jj; - char *buff, *output; - - output = buff = calloc(1, COLL_ML_ROUTE_BUFF_SIZE); - assert(NULL != output); - - sprintf(output, "ranks: "); - - output = buff + strlen(buff); - assert(COLL_ML_ROUTE_BUFF_SIZE + buff > output); - - for(ii = 0; ii < comm_size; ++ii) { - sprintf(output, " %2d", ii); - - output = buff + strlen(buff); - assert(COLL_ML_ROUTE_BUFF_SIZE + buff > output); - } - - for (ii = 0; ii < topo->n_levels; ++ii) { - sprintf(output, "\nlevel: %d ", ii); - - output = buff + strlen(buff); - assert(COLL_ML_ROUTE_BUFF_SIZE + buff > output); - for(jj = 0; jj < comm_size; ++jj) { - sprintf(output, " %2d", route_table[ii][jj]); - - output = buff + strlen(buff); - assert(COLL_ML_ROUTE_BUFF_SIZE + buff > output); - } - } - - sprintf(output, "\n\nThe vector is:\n============\nranks: "); - - output = buff + strlen(buff); - assert(COLL_ML_ROUTE_BUFF_SIZE + buff > output); - - for(ii = 0; ii < comm_size; ++ii) { - sprintf(output, " %6d", ii); - - output = buff + strlen(buff); - assert(COLL_ML_ROUTE_BUFF_SIZE + buff > output); - } - - sprintf(output, "\nlevel x rank: "); - - output = buff + strlen(buff); - assert(COLL_ML_ROUTE_BUFF_SIZE + buff > output); - - for(ii = 0; ii < comm_size; ++ii) { - sprintf(output, " (%d, %d)", - topo->route_vector[ii].level, - topo->route_vector[ii].rank); - - output = buff + strlen(buff); - assert(COLL_ML_ROUTE_BUFF_SIZE + buff > output); - } - - ML_VERBOSE(10, ("\nThe table is:\n============%s", buff)); - free(buff); - } -#endif - - for (level = 0; level < topo->n_levels; ++level) { - free(route_table[level]); - } - - free(route_table); - - return OMPI_SUCCESS; - - exit_ERROR: - - ML_VERBOSE(10, ("Exit with error status - %d.", rc)); - if (NULL != route_table) { - for (level = 0; level < topo->n_levels; ++level) { - if (NULL != route_table[level]) { - free(route_table[level]); - } - } - - free(route_table); - } - - if (NULL != sbgp_procs) { - free(sbgp_procs); - } - - if (NULL != all_reachable_ranks) { - free(all_reachable_ranks); - } - - return rc; -} - -static void init_coll_func_pointers(mca_coll_ml_module_t *ml_module) -{ - mca_coll_base_module_2_1_0_t *coll_base = &ml_module->super; - - int iboffload_used = - mca_coll_ml_check_if_bcol_is_used("iboffload", ml_module, COLL_ML_TOPO_MAX); - - /* initialize coll component function pointers */ - coll_base->coll_module_enable = ml_module_enable; - coll_base->ft_event = NULL; - - if (mca_coll_ml_component.disable_allgather) { - coll_base->coll_allgather = NULL; - coll_base->coll_iallgather = NULL; - } else { - coll_base->coll_allgather = mca_coll_ml_allgather; - coll_base->coll_iallgather = mca_coll_ml_allgather_nb; - } - - coll_base->coll_allgatherv = NULL; - - if (mca_coll_ml_component.use_knomial_allreduce) { - if (true == mca_coll_ml_component.need_allreduce_support) { - coll_base->coll_allreduce = mca_coll_ml_allreduce_dispatch; - coll_base->coll_iallreduce = mca_coll_ml_allreduce_dispatch_nb; - } else { - coll_base->coll_allreduce = mca_coll_ml_allreduce; - coll_base->coll_iallreduce = mca_coll_ml_allreduce_nb; - } - } else { - coll_base->coll_allreduce = NULL; - } - - coll_base->coll_alltoall = NULL; - coll_base->coll_ialltoall = NULL; - - coll_base->coll_alltoallv = NULL; - coll_base->coll_alltoallw = NULL; - - coll_base->coll_barrier = mca_coll_ml_barrier_intra; - - /* Use the sequential broadcast */ - if (COLL_ML_SEQ_BCAST == mca_coll_ml_component.bcast_algorithm) { - coll_base->coll_bcast = mca_coll_ml_bcast_sequential_root; - } else { - coll_base->coll_bcast = mca_coll_ml_parallel_bcast; - } - - coll_base->coll_exscan = NULL; - coll_base->coll_gather = NULL; - /* - coll_base->coll_gather = mca_coll_ml_gather; - */ - /* Current iboffload/ptpcoll version have no support for gather */ - if (iboffload_used || - mca_coll_ml_check_if_bcol_is_used("ptpcoll", ml_module, COLL_ML_TOPO_MAX)) { - coll_base->coll_gather = NULL; - } - - - coll_base->coll_gatherv = NULL; - if (mca_coll_ml_component.disable_reduce) { - coll_base->coll_reduce = NULL; - } else { - coll_base->coll_reduce = mca_coll_ml_reduce; - } - coll_base->coll_reduce_scatter = NULL; - coll_base->coll_scan = NULL; - coll_base->coll_scatter = NULL; -#if 0 - coll_base->coll_scatter = mca_coll_ml_scatter_sequential; -#endif - coll_base->coll_scatterv = NULL; - - coll_base->coll_iallgatherv = NULL; - coll_base->coll_ialltoallv = NULL; - coll_base->coll_ialltoallw = NULL; - coll_base->coll_ibarrier = mca_coll_ml_ibarrier_intra; - - coll_base->coll_ibcast = mca_coll_ml_parallel_bcast_nb; - coll_base->coll_iexscan = NULL; - coll_base->coll_igather = NULL; - coll_base->coll_igatherv = NULL; - coll_base->coll_ireduce = mca_coll_ml_reduce_nb; - coll_base->coll_ireduce_scatter = NULL; - coll_base->coll_iscan = NULL; - coll_base->coll_iscatter = NULL; - coll_base->coll_iscatterv = NULL; -} - -static int init_lists(mca_coll_ml_module_t *ml_module) -{ - mca_coll_ml_component_t *cs = &mca_coll_ml_component; - int num_elements = cs->free_list_init_size; - int max_elements = cs->free_list_max_size; - int elements_per_alloc = cs->free_list_grow_size; - size_t length_payload = 0; - size_t length; - int ret; - - /* initialize full message descriptors - moving this to the - * module, as the fragment has resrouce requirements that - * are communicator dependent */ - /* no data associated with the message descriptor */ - - length = sizeof(mca_coll_ml_descriptor_t); - ret = opal_free_list_init(&(ml_module->message_descriptors), length, - opal_cache_line_size, OBJ_CLASS(mca_coll_ml_descriptor_t), - length_payload, 0, - num_elements, max_elements, elements_per_alloc, - NULL, 0, NULL, - init_ml_message_desc, ml_module); - if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) { - ML_ERROR(("opal_free_list_init exit with error")); - return ret; - } - - /* initialize fragement descriptors - always associate one fragment - * descriptr with full message descriptor, so that we can minimize - * small message latency */ - - /* create a free list of fragment descriptors */ - /*length_payload=sizeof(something);*/ - length = sizeof(mca_coll_ml_fragment_t); - ret = opal_free_list_init (&(ml_module->fragment_descriptors), length, - opal_cache_line_size, OBJ_CLASS(mca_coll_ml_fragment_t), - length_payload, 0, - num_elements, max_elements, elements_per_alloc, - NULL, 0, NULL, - init_ml_fragment_desc, ml_module); - if (OMPI_SUCCESS != ret) { - ML_ERROR(("opal_free_list_init exit with error")); - return ret; - } - - return OMPI_SUCCESS; -} - -static int check_for_max_supported_ml_modules(struct ompi_communicator_t *comm) -{ - int i, ret; - mca_coll_ml_component_t *cs = &mca_coll_ml_component; - int *comm_ranks = NULL; - - comm_ranks = (int *)calloc(ompi_comm_size(comm), sizeof(int)); - if (OPAL_UNLIKELY(NULL == comm_ranks)) { - ML_VERBOSE(10, ("Cannot allocate memory.")); - return OMPI_ERR_OUT_OF_RESOURCE; - } - for (i = 0; i < ompi_comm_size(comm); i++) { - comm_ranks[i] = i; - } - - ret = comm_allreduce_pml(&cs->max_comm, &cs->max_comm, - 1 , MPI_INT, ompi_comm_rank(comm), - MPI_MIN, ompi_comm_size(comm), comm_ranks, - comm); - if (OMPI_SUCCESS != ret) { - ML_ERROR(("comm_allreduce - failed to collect max_comm data")); - return ret; - } - - if (0 >= cs->max_comm || - ompi_comm_size(comm) < cs->min_comm_size) { - return OMPI_ERROR; - } else { - --cs->max_comm; - } - - free(comm_ranks); - - return OMPI_SUCCESS; -} - -#if OPAL_ENABLE_DEBUG -#define DEBUG_ML_COMM_QUERY() \ - do { \ - static int verbosity_level = 5; \ - static int module_num = 0; \ - ML_VERBOSE(10, ("ML module - %p num %d for comm - %p, " \ - "comm size - %d, ML component prio - %d.", \ - ml_module, ++module_num, comm, ompi_comm_size(comm), *priority)); \ - /* For now I want to always print that we enter ML - \ - at the past there was an issue that we did not enter ML and actually run with tuned. \ - Still I do not want to print it for each module - only for the first. */ \ - ML_VERBOSE(verbosity_level, ("ML module - %p was successfully created", ml_module)); \ - verbosity_level = 10; \ - } while(0) - -#else -#define DEBUG_ML_COMM_QUERY() -#endif - -static int mca_coll_ml_need_multi_topo(int bcol_collective) -{ - mca_base_component_list_item_t *bcol_cli; - const mca_bcol_base_component_2_0_0_t *bcol_component; - - for (bcol_cli = (mca_base_component_list_item_t *) - opal_list_get_first(&mca_bcol_base_components_in_use); - (opal_list_item_t *) bcol_cli != - opal_list_get_end(&mca_bcol_base_components_in_use); - bcol_cli = (mca_base_component_list_item_t *) - opal_list_get_next((opal_list_item_t *) bcol_cli)) { - bcol_component = (mca_bcol_base_component_2_0_0_t *) bcol_cli->cli_component; - if (NULL != bcol_component->coll_support_all_types && - !bcol_component->coll_support_all_types(bcol_collective)) { - return true; - } - } - - return false; -} - -/* We may call this function ONLY AFTER algorithm initialization */ -static int setup_bcast_table(mca_coll_ml_module_t *module) -{ - mca_coll_ml_component_t *cm = &mca_coll_ml_component; - bool has_zero_copy; - - /* setup bcast index table */ - if (COLL_ML_STATIC_BCAST == cm->bcast_algorithm) { - module->bcast_fn_index_table[0] = ML_BCAST_SMALL_DATA_KNOWN; - - has_zero_copy = !!(MCA_BCOL_BASE_ZERO_COPY & - module->coll_ml_bcast_functions[ML_BCAST_LARGE_DATA_KNOWN]->topo_info->all_bcols_mode); - - if (1 == cm->enable_fragmentation || (2 == cm->enable_fragmentation && !has_zero_copy)) { - module->bcast_fn_index_table[1] = ML_BCAST_SMALL_DATA_KNOWN; - } else if (!has_zero_copy) { - - opal_show_help("help-mpi-coll-ml.txt", - "fragmentation-disabled", true); - return OMPI_ERROR; - - } else { - module->bcast_fn_index_table[1] = ML_BCAST_LARGE_DATA_KNOWN; - } - } else { - module->bcast_fn_index_table[0] = ML_BCAST_SMALL_DATA_UNKNOWN; - - if (NULL == module->coll_ml_bcast_functions[ML_BCAST_LARGE_DATA_UNKNOWN]) { - - opal_show_help("help-mpi-coll-ml.txt", - "static-bcast-disabled", true); - - return OMPI_ERROR; - } - - has_zero_copy = !!(MCA_BCOL_BASE_ZERO_COPY & - module->coll_ml_bcast_functions[ML_BCAST_LARGE_DATA_UNKNOWN]->topo_info->all_bcols_mode); - - if (1 == cm->enable_fragmentation || (2 == cm->enable_fragmentation && !has_zero_copy)) { - module->bcast_fn_index_table[1] = ML_BCAST_SMALL_DATA_UNKNOWN; - } else if (!has_zero_copy) { - - opal_show_help("help-mpi-coll-ml.txt", - "fragmentation-disabled", true); - - return OMPI_ERROR; - } else { - /* If the topology support zero level and no fragmentation was requested */ - module->bcast_fn_index_table[1] = ML_BCAST_LARGE_DATA_UNKNOWN; - } - } - - return OMPI_SUCCESS; -} - -static void ml_check_for_enabled_topologies (int map[][MCA_COLL_MAX_NUM_SUBTYPES], mca_coll_ml_topology_t *topo_list) -{ - int coll_i, st_i; - for (coll_i = 0; coll_i < MCA_COLL_MAX_NUM_COLLECTIVES; coll_i++) { - for (st_i = 0; st_i < MCA_COLL_MAX_NUM_SUBTYPES; st_i++) { - if (map[coll_i][st_i] > -1) { - /* The topology is used, so set it to enabled */ - assert(map[coll_i][st_i] <= COLL_ML_TOPO_MAX); - topo_list[map[coll_i][st_i]].status = COLL_ML_TOPO_ENABLED; - } - } - } -} - -static void setup_default_topology_map(mca_coll_ml_module_t *ml_module) -{ - int i, j; - for (i = 0; i < MCA_COLL_MAX_NUM_COLLECTIVES; i++) { - for (j = 0; j < MCA_COLL_MAX_NUM_SUBTYPES; j++) { - ml_module->collectives_topology_map[i][j] = -1; - } - } - - ml_module->collectives_topology_map[ML_BARRIER][ML_BARRIER_DEFAULT] = COLL_ML_HR_FULL; - - ml_module->collectives_topology_map[ML_BCAST][ML_BCAST_SMALL_DATA_KNOWN] = COLL_ML_HR_FULL; - ml_module->collectives_topology_map[ML_BCAST][ML_BCAST_SMALL_DATA_UNKNOWN] = COLL_ML_HR_FULL; - ml_module->collectives_topology_map[ML_BCAST][ML_BCAST_SMALL_DATA_SEQUENTIAL] = COLL_ML_HR_FULL; - ml_module->collectives_topology_map[ML_BCAST][ML_BCAST_LARGE_DATA_KNOWN] = COLL_ML_HR_FULL; - ml_module->collectives_topology_map[ML_BCAST][ML_BCAST_LARGE_DATA_UNKNOWN] = COLL_ML_HR_FULL; - ml_module->collectives_topology_map[ML_BCAST][ML_BCAST_LARGE_DATA_UNKNOWN] = COLL_ML_HR_FULL; - - ml_module->collectives_topology_map[ML_ALLGATHER][ML_SMALL_DATA_ALLGATHER] = COLL_ML_HR_FULL; - ml_module->collectives_topology_map[ML_ALLGATHER][ML_LARGE_DATA_ALLGATHER] = COLL_ML_HR_FULL; - - ml_module->collectives_topology_map[ML_GATHER][ML_SMALL_DATA_GATHER] = COLL_ML_HR_FULL; - ml_module->collectives_topology_map[ML_GATHER][ML_LARGE_DATA_GATHER] = COLL_ML_HR_FULL; - - ml_module->collectives_topology_map[ML_ALLTOALL][ML_SMALL_DATA_ALLTOALL] = COLL_ML_HR_SINGLE_IBOFFLOAD; - ml_module->collectives_topology_map[ML_ALLTOALL][ML_LARGE_DATA_ALLTOALL] = COLL_ML_HR_SINGLE_IBOFFLOAD; - - ml_module->collectives_topology_map[ML_ALLREDUCE][ML_SMALL_DATA_ALLREDUCE] = COLL_ML_HR_FULL; - ml_module->collectives_topology_map[ML_ALLREDUCE][ML_LARGE_DATA_ALLREDUCE] = COLL_ML_HR_FULL; - - if (mca_coll_ml_need_multi_topo(BCOL_ALLREDUCE)) { - ml_module->collectives_topology_map[ML_ALLREDUCE][ML_SMALL_DATA_EXTRA_TOPO_ALLREDUCE] = COLL_ML_HR_ALLREDUCE; - ml_module->collectives_topology_map[ML_ALLREDUCE][ML_LARGE_DATA_EXTRA_TOPO_ALLREDUCE] = COLL_ML_HR_ALLREDUCE; - } - - ml_module->collectives_topology_map[ML_REDUCE][ML_SMALL_DATA_REDUCE] = COLL_ML_HR_FULL; - ml_module->collectives_topology_map[ML_REDUCE][ML_LARGE_DATA_REDUCE] = COLL_ML_HR_FULL; - - - ml_module->collectives_topology_map[ML_SCATTER][ML_SCATTER_SMALL_DATA_KNOWN] = COLL_ML_HR_FULL; - ml_module->collectives_topology_map[ML_SCATTER][ML_SCATTER_N_DATASIZE_BINS] = COLL_ML_HR_FULL; - ml_module->collectives_topology_map[ML_SCATTER][ML_SCATTER_SMALL_DATA_UNKNOWN] = COLL_ML_HR_FULL; - ml_module->collectives_topology_map[ML_SCATTER][ML_SCATTER_SMALL_DATA_SEQUENTIAL] = COLL_ML_HR_FULL; -} - -#define GET_CF(I, J) (&mca_coll_ml_component.coll_config[I][J]); - -static void load_cached_config(mca_coll_ml_module_t *ml_module) -{ - int c_idx, m_idx, alg; - per_collective_configuration_t *cf = NULL; - - for (c_idx = 0; c_idx < ML_NUM_OF_FUNCTIONS; c_idx++) { - for (m_idx = 0; m_idx < ML_NUM_MSG; m_idx++) { - cf = GET_CF(c_idx, m_idx); - /* load topology tunings */ - if (ML_UNDEFINED != cf->topology_id && - ML_UNDEFINED != cf->algorithm_id) { - alg = - cf->algorithm_id; - ml_module->collectives_topology_map[c_idx][alg] = - cf->topology_id; - } - } - } -} - -/* Pasha: In future I would suggest to convert this configuration to some sophisticated mca parameter or - even configuration file. On this stage of project I will set it statically and later we will change it - to run time parameter */ -static void setup_topology_coll_map(mca_coll_ml_module_t *ml_module) -{ - /* Load default topology setup */ - setup_default_topology_map(ml_module); - - /* Load configuration file */ - load_cached_config(ml_module); - - ml_check_for_enabled_topologies(ml_module->collectives_topology_map, ml_module->topo_list); -} - -/* query to see if the module is available for use on the given - * communicator, and if so, what it's priority is. This is where - * the backing shared-memory file is created. - */ -mca_coll_base_module_t * -mca_coll_ml_comm_query(struct ompi_communicator_t *comm, int *priority) -{ - /* local variables */ - int ret = OMPI_SUCCESS; - - mca_coll_ml_module_t *ml_module = NULL; - mca_coll_ml_component_t *cs = &mca_coll_ml_component; - bool iboffload_was_requested = mca_coll_ml_check_if_bcol_is_requested("iboffload"); - - ML_VERBOSE(10, ("ML comm query start.")); - - /** - * No support for inter-communicator yet. - */ - if (OMPI_COMM_IS_INTER(comm)) { - *priority = -1; - return NULL; - } - - if (MPI_THREAD_MULTIPLE == ompi_mpi_thread_provided) { - ML_VERBOSE(10, ("coll:ml: MPI_THREAD_MULTIPLE not suppported; skipping this component")); - *priority = -1; - return NULL; - } - - - /* NTH: Disabled this check until we have a better one. */ -#if 0 - if (!ompi_rte_proc_is_bound) { - /* do not enable coll/ml unless this process is bound (for now) */ - *priority = -1; - return NULL; - } -#endif - - /** - * If it is inter-communicator and size is less than 2 we have specialized modules - * to handle the intra collective communications. - */ - if (OMPI_COMM_IS_INTRA(comm) && ompi_comm_size(comm) < 2) { - ML_VERBOSE(10, ("It is inter-communicator and size is less than 2.")); - *priority = -1; - return NULL; - } - - /** - * In current implementation we limit number of supported ML modules in cases when - * iboffload companent was requested - */ - if (iboffload_was_requested) { - ret = check_for_max_supported_ml_modules(comm); - if (OMPI_SUCCESS != ret) { - /* We have nothing to cleanup yet, so just return NULL */ - ML_VERBOSE(10, ("check_for_max_supported_ml_modules returns ERROR, return NULL")); - *priority = -1; - return NULL; - } - } - - ML_VERBOSE(10, ("Create ML module start.")); - - /* allocate and initialize an ml module */ - ml_module = OBJ_NEW(mca_coll_ml_module_t); - if (NULL == ml_module) { - return NULL; - } - - /* Get our priority */ - *priority = cs->ml_priority; - - /** Set initial ML values **/ - ml_module->comm = comm; - /* set the starting sequence number */ - ml_module->collective_sequence_num = cs->base_sequence_number; - ml_module->no_data_collective_sequence_num = cs->base_sequence_number; - /* initialize the size of the largest collective communication description */ - ml_module->max_fn_calls = 0; - -#ifdef NEW_LEADER_SELECTION - coll_ml_construct_resource_graphs(ml_module); -#endif - - /* Set topology - function map */ - setup_topology_coll_map(ml_module); - - /** - * This is the core of the function: - * setup communicator hierarchy - the ml component is available for - * caching information about the sbgp modules selected. - */ - ret = ml_discover_hierarchy(ml_module); - if (OMPI_SUCCESS != ret) { - ML_VERBOSE(1, ("ml_discover_hierarchy exited with error.")); - goto CLEANUP; - } - - /* gvm Disabled for debuggin */ - ret = mca_coll_ml_build_filtered_fn_table(ml_module); - if (OMPI_SUCCESS != ret) { - ML_VERBOSE(1, ("mca_coll_ml_build_filtered_fn_table returned an error.")); - goto CLEANUP; - } - - /* Generate active bcols list */ - generate_active_bcols_list(ml_module); - - /* setup collective schedules - note that a given bcol may have more than - one module instantiated. We may want to use the same collective cap - capabilities over more than one set of procs. Each module will store - the relevant information for a given set of procs */ - ML_VERBOSE(10, ("Call for setup schedule.")); - ret = ml_coll_schedule_setup(ml_module); - if (OMPI_SUCCESS != ret) { - ML_VERBOSE(1, ("ml_coll_schedule_setup exit with error")); - goto CLEANUP; - } - - /* Setup bcast table */ - ML_VERBOSE(10, ("Setup bcast table")); - ret = setup_bcast_table(ml_module); - if (OMPI_SUCCESS != ret) { - ML_VERBOSE(1, ("setup_bcast_table exit with error")); - goto CLEANUP; - } - - ML_VERBOSE(10, ("Setup pointer to collectives calls.")); - init_coll_func_pointers(ml_module); - - ML_VERBOSE(10, ("Setup free lists")); - ret = init_lists(ml_module); - if (OMPI_SUCCESS != ret) { - goto CLEANUP; - } - - DEBUG_ML_COMM_QUERY(); - - /* Compute the bruck's buffer constant -- temp buffer requirements */ - { - int comm_size =ompi_comm_size(comm); - int count = 1, log_comm_size = 0; - - /* compute log of comm_size */ - while (count < comm_size) { - count = count << 1; - log_comm_size++; - } - - ml_module->brucks_buffer_threshold_const = - (comm_size / 2 + comm_size % 2) * (log_comm_size) ; - - - ml_module->log_comm_size = log_comm_size; - } - - if (iboffload_was_requested) { - /* HACK: Calling memory sync barrier first time to make sure - * that iboffload create qps for service barrier in right order, - * otherwise we may have deadlock and really nasty data corruptions. - * If you plan to remove this one - please talk to me first. - * Pasha. - !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! - Work around for deadlock caused by connection setup - for asyc service barrier. Asyc service barrier use own set of - MQ and QP _BUT_ the exchange operation uses the MQ that is used for - primary set of collectives operations like Allgahter, Barrier,etc. - As result exchange wait operation could be pushed to primary MQ and - cause dead-lock. - !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! - Create connection for service barrier and memory address exchange - for ml buffers and asyc service barrier - */ - ret = mca_coll_ml_memsync_intra(ml_module, 0); - if (OMPI_SUCCESS != ret) { - goto CLEANUP; - } - opal_progress(); - } - - /* The module is ready */ - ml_module->initialized = true; - - return &(ml_module->super); - - CLEANUP: - /* Vasily: RLG: Need to cleanup free lists */ - if (NULL != ml_module) { - OBJ_RELEASE(ml_module); - } - - return NULL; -} - -/* copied slightly modified from coll/hcoll */ -#define ML_SAVE_FALLBACK(_coll_ml, _coll) \ - do { \ - _coll_ml->fallback.coll_ ## _coll = comm->c_coll.coll_ ## _coll; \ - _coll_ml->fallback.coll_ ## _coll ## _module = comm->c_coll.coll_ ## _coll ## _module; \ - if (comm->c_coll.coll_ ## _coll && comm->c_coll.coll_ ## _coll ## _module) { \ - OBJ_RETAIN(_coll_ml->fallback.coll_ ## _coll ## _module); \ - } \ - } while(0) - -static void ml_save_fallback_colls (mca_coll_ml_module_t *coll_ml, - struct ompi_communicator_t *comm) -{ - memset (&coll_ml->fallback, 0, sizeof (coll_ml->fallback)); - /* save lower-priority collectives to handle cases not yet handled - * by coll/ml */ - ML_SAVE_FALLBACK(coll_ml, allreduce); - ML_SAVE_FALLBACK(coll_ml, allgather); - ML_SAVE_FALLBACK(coll_ml, reduce); - ML_SAVE_FALLBACK(coll_ml, bcast); - ML_SAVE_FALLBACK(coll_ml, iallreduce); - ML_SAVE_FALLBACK(coll_ml, iallgather); - ML_SAVE_FALLBACK(coll_ml, ireduce); - ML_SAVE_FALLBACK(coll_ml, ibcast); -} - -/* - * Init module on the communicator - */ -static int -ml_module_enable(mca_coll_base_module_t *module, - struct ompi_communicator_t *comm) -{ - /* local variables */ - char output_buffer[2 * MPI_MAX_OBJECT_NAME]; - - ml_save_fallback_colls ((mca_coll_ml_module_t *) module, comm); - - memset(&output_buffer[0], 0, sizeof(output_buffer)); - snprintf(output_buffer, sizeof(output_buffer), "%s (cid %d)", comm->c_name, - comm->c_contextid); - - ML_VERBOSE(10, ("coll:ml:enable: new communicator: %s.", output_buffer)); - - /* All done */ - return OMPI_SUCCESS; -} - -OBJ_CLASS_INSTANCE(mca_coll_ml_module_t, - mca_coll_base_module_t, - mca_coll_ml_module_construct, - mca_coll_ml_module_destruct); - -OBJ_CLASS_INSTANCE(mca_coll_ml_collective_operation_progress_t, - ompi_request_t, - mca_coll_ml_collective_operation_progress_construct, - mca_coll_ml_collective_operation_progress_destruct); diff --git a/ompi/mca/coll/ml/coll_ml_payload_buffers.h b/ompi/mca/coll/ml/coll_ml_payload_buffers.h deleted file mode 100644 index d4ac765342f..00000000000 --- a/ompi/mca/coll/ml/coll_ml_payload_buffers.h +++ /dev/null @@ -1,168 +0,0 @@ -/* - * Copyright (c) 2009-2012 Oak Ridge National Laboratory. All rights reserved. - * Copyright (c) 2009-2012 Mellanox Technologies. All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#ifndef MCA_ML_PAYLOAD_BUFFERS_H -#define MCA_ML_PAYLOAD_BUFFERS_H - -#include "ompi/include/ompi/constants.h" -#include "opal/threads/mutex.h" - -struct buffer_t { - /* payload */ - void *payload; - - /* next payload buffer - need this because of wrap around, and - * because we want to allocate several buffers at once, but only - * manipulate one entry - */ - struct buffer_t *next_buffer; -}; -typedef struct buffer_t buffer_t; - -struct ml_buffers_t { - /* fifo size */ - int fifo_size; - - /* write index - next to allocate */ - int head_index; - opal_mutex_t head_lock; - - /* read index - next to free */ - int tail_index; - - /* number available - used to detect full queue */ - int n_segments_available; - - /* mask - assumes that fifo link is a power of 2 */ - int mask; - - /* fifo */ - buffer_t *fifo; -}; - -typedef struct ml_buffers_t ml_buffers_t; - -/* Initialization function */ - -static inline int ml_fifo_init( - int fifo_size, void *memory_chunk, size_t size_of_memory_chunk, - size_t segment_alignment, - size_t segment_size, ml_buffers_t *buffer_fifo) -{ - /* local variable */ - ptrdiff_t allocation_base, memory_chunk_ptr; - size_t memory_to_allocate, allocated_fifo_size, - allocated_segment_size, seg; - - /* make sure fifo size is power of 2, and round up if not - want - * efficient addressing */ - if( 0 >= fifo_size ) { - return OMPI_ERROR; - } - allocated_fifo_size=1; - while ( allocated_fifo_size < (size_t)fifo_size ) { - allocated_fifo_size*=2; - } - - /* set buffer size to match its alignment - round size up */ - allocated_segment_size=segment_size; - if( 0 >= segment_alignment ) { - /* multiples of alignmnet */ - allocated_segment_size=( (allocated_segment_size-1)/segment_alignment)+1; - allocated_segment_size=allocated_segment_size*segment_alignment; - } - - /* adjust base pointer to segment alignment */ - memory_chunk_ptr = (ptrdiff_t )memory_chunk; - allocation_base=( ( memory_chunk_ptr-1)/segment_alignment)+1; - allocation_base=allocated_segment_size*segment_alignment; - - /* check for input consistency */ - memory_to_allocate=size_of_memory_chunk-(allocation_base-memory_chunk_ptr); - if( (allocated_segment_size * allocated_fifo_size) < memory_to_allocate ) { - return OMPI_ERROR; - } - - /* allocate the fifo array */ - buffer_fifo->fifo=(buffer_t *)malloc(sizeof(buffer_t)*allocated_fifo_size); - if( NULL == buffer_fifo->fifo) { - return OMPI_ERROR; - } - - /* Initialize structure */ - for( seg=0 ; seg < allocated_fifo_size ; seg++ ) { - buffer_fifo->fifo[seg].payload= - (void *)(allocation_base+seg*allocated_segment_size); - } - for( seg=0 ; seg < allocated_fifo_size-1 ; seg++ ) { - buffer_fifo->fifo[seg].next_buffer= - &(buffer_fifo->fifo[seg+1]); - } - buffer_fifo->fifo[allocated_fifo_size-1].next_buffer= - &(buffer_fifo->fifo[0]); - - buffer_fifo->head_index=0; - buffer_fifo->tail_index=0; - buffer_fifo->n_segments_available=allocated_fifo_size; - buffer_fifo->fifo_size=allocated_fifo_size; - buffer_fifo->mask=buffer_fifo->fifo_size-1; - OBJ_CONSTRUCT(&(buffer_fifo->head_lock), opal_mutex_t); - - /* return */ - return OMPI_SUCCESS; -} - -/* - * Allocate several buffers. Either all requested buffers are allocated, - * or none are allocated. - */ -static inline buffer_t *ml_fifo_alloc_n_buffers(int n_to_allocate, - ml_buffers_t *buffer_fifo) -{ - /* local variables */ - buffer_t *ret=NULL; - - /* RLG - probably want to try a few times before giving up */ - if(!OPAL_THREAD_TRYLOCK(&(buffer_fifo->head_lock))) { - if( buffer_fifo->n_segments_available >= n_to_allocate ) { - ret=&(buffer_fifo->fifo[buffer_fifo->head_index]); - buffer_fifo->head_index=(buffer_fifo->head_index+n_to_allocate); - /* wrap around */ - buffer_fifo->head_index&=buffer_fifo->mask; - - buffer_fifo->n_segments_available -= n_to_allocate; - } - OPAL_THREAD_UNLOCK(&(buffer_fifo->head_lock)); - } /* end of allocatoin */ - - return ret; -} - -/* return buffers */ -static inline void ml_fifo_return_n_buffers(int n_to_return, - ml_buffers_t *buffer_fifo) -{ - - OPAL_THREAD_LOCK(&(buffer_fifo->head_lock)); - - /* move tail pointer - RLG: Do we really need the tail pointer ? */ - buffer_fifo->tail_index=(buffer_fifo->tail_index+n_to_return); - /* wrap around */ - buffer_fifo->tail_index&=buffer_fifo->mask; - - /* adjust number of available buffers */ - buffer_fifo->n_segments_available += n_to_return; - - OPAL_THREAD_UNLOCK(&(buffer_fifo->head_lock)); - -} - -#endif - diff --git a/ompi/mca/coll/ml/coll_ml_progress.c b/ompi/mca/coll/ml/coll_ml_progress.c deleted file mode 100644 index 9954e348006..00000000000 --- a/ompi/mca/coll/ml/coll_ml_progress.c +++ /dev/null @@ -1,104 +0,0 @@ -/* - * Copyright (c) 2009-2012 Oak Ridge National Laboratory. All rights reserved. - * Copyright (c) 2009-2012 Mellanox Technologies. All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#include "ompi/mca/coll/ml/coll_ml.h" - -/* - * This routine is used to progress a series of communication - * primitives. - * - * Assumptions: - * - A message is described by a message descriptor - * - Each message has a setup function associated with it, which is - * algorithm specific. When a fragment is being prepared, this - * progress is used to setup the arguments that will be passed into - * each routine called to complete a given function. The idea here - * is that when the progress routines is called, the full communication - * pattern has already been described in the setup function, with - * progress function being generic. - * - Each fragment is described by a fragment descriptor - * - Each message descriptor has a fragment descriptor permanently - * associated with it. - * - The message will be proressed as long as the individul - * functions complete. When an indivicual funciton does not - * complete, the current state will be saved, for future - * restart. - * - return status - * OMPI_COMPLETE: funciton completed - * OMPI_INCOMPLETE: need to continue progressing the funciton - * any other return value - error condition - */ - -int coll_ml_progress_individual_message(mca_coll_ml_fragment_t *frag_descriptor) -{ - /* local variables */ - int fn_index, ret = OMPI_SUCCESS; - uint32_t n_frags_complete; - int starting_fn_index=frag_descriptor->current_fn_index; - coll_ml_collective_description_t *local_comm_description= - frag_descriptor->full_msg_descriptor->local_comm_description; - - /* loop over functions */ - for( fn_index=starting_fn_index ; fn_index < local_comm_description->n_functions; - fn_index ++ ) { - mca_bcol_base_module_t *bcol_module= - local_comm_description->functions[fn_index].bcol_module; - ret =(bcol_module->bcol_function_table[local_comm_description->functions[fn_index].fn_idx]) - (&(frag_descriptor->fn_args[fn_index]), &local_comm_description->functions[fn_index]); - if( ML_OMPI_COMPLETE != ret ) { - /* since function incomplete, need to decide what to do */ - if( ML_OMPI_INCOMPLETE == ret ) { - /* need to return to this later */ - /* mark where to continue */ - frag_descriptor->current_fn_index=fn_index; - /* RLG - is this really best ? Only advantage is that - * if we exit the loop, we can assume message is - * complete - */ - return OMPI_SUCCESS; - } else { - /* some sort of error condition */ - frag_descriptor->current_fn_index=fn_index; - return ret; - } - } - - } - - /* looks like we are done */ - /* increment counter for number of completed fragments */ - n_frags_complete = OPAL_THREAD_ADD_SIZE_T( - &(frag_descriptor->full_msg_descriptor->frags_complete), 1); - - /* - * release resrouces - */ - - /* fragment resources */ - - /* full message resources */ - if ( n_frags_complete == frag_descriptor->full_msg_descriptor->n_fragments) - { - /* free any fragments that still need to be freed - * NOTE: at this level we do not handle any resrouces - * aside from the pre-registered buffers, all these - * are handled in the bcol level */ - - /* return the buffers to the ml free list */ - - /* mark as complete - so MPI can complete - * the message descriptor will be freed by a call - * to mpi_test/mpi_wait/... as the message descriptor - * also holds the mpi request object */ - - } - - return OMPI_SUCCESS; -} diff --git a/ompi/mca/coll/ml/coll_ml_reduce.c b/ompi/mca/coll/ml/coll_ml_reduce.c deleted file mode 100644 index 53798524368..00000000000 --- a/ompi/mca/coll/ml/coll_ml_reduce.c +++ /dev/null @@ -1,526 +0,0 @@ -/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ -/* - * Copyright (c) 2009-2012 Oak Ridge National Laboratory. All rights reserved. - * Copyright (c) 2009-2012 Mellanox Technologies. All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ -/** @file */ - -#include "ompi_config.h" - -#include "ompi/constants.h" -#include "opal/threads/mutex.h" -#include "ompi/communicator/communicator.h" -#include "ompi/mca/coll/coll.h" -#include "ompi/mca/bcol/bcol.h" -#include "opal/sys/atomic.h" -#include "ompi/mca/coll/ml/coll_ml.h" -#include "ompi/mca/coll/ml/coll_ml_allocation.h" -#include "ompi/mca/coll/ml/coll_ml_inlines.h" -#define REDUCE_SMALL_MESSAGE_THRESHOLD 2048 - -static int mca_coll_ml_reduce_unpack(mca_coll_ml_collective_operation_progress_t *coll_op) -{ - int ret; - /* need to put in more */ - int count = coll_op->variable_fn_params.count; - ompi_datatype_t *dtype = coll_op->variable_fn_params.dtype; - - void *dest = (void *)((uintptr_t)coll_op->full_message.dest_user_addr + - (uintptr_t)coll_op->fragment_data.offset_into_user_buffer); - void *src = (void *)((uintptr_t)coll_op->fragment_data.buffer_desc->data_addr + - (size_t)coll_op->variable_fn_params.rbuf_offset); - - ret = ompi_datatype_copy_content_same_ddt(dtype, (int32_t) count, (char *) dest, - (char *) src); - if (ret < 0) { - return OMPI_ERROR; - } - - if (coll_op->variable_fn_params.root_flag) { - ML_VERBOSE(1,("In reduce unpack %d", - *(int *)((unsigned char*) src))); - } - - ML_VERBOSE(10, ("sbuf addr %p, sbuf offset %d, sbuf val %lf, rbuf addr %p, rbuf offset %d, rbuf val %lf.", - coll_op->variable_fn_params.sbuf, coll_op->variable_fn_params.sbuf_offset, - *(double *) ((unsigned char *) coll_op->variable_fn_params.sbuf + - (size_t) coll_op->variable_fn_params.sbuf_offset), - coll_op->variable_fn_params.rbuf, coll_op->variable_fn_params.rbuf_offset, - *(double *) ((unsigned char *) coll_op->variable_fn_params.rbuf + - (size_t) coll_op->variable_fn_params.rbuf_offset))); - - return OMPI_SUCCESS; -} - - -static int -mca_coll_ml_reduce_task_setup (mca_coll_ml_collective_operation_progress_t *coll_op) -{ - int fn_idx, h_level, next_h_level, my_index; - mca_sbgp_base_module_t *sbgp; - mca_coll_ml_topology_t *topo = coll_op->coll_schedule->topo_info; - - fn_idx = coll_op->sequential_routine.current_active_bcol_fn; - h_level = coll_op->coll_schedule->component_functions[fn_idx].h_level; - next_h_level = (fn_idx < coll_op->coll_schedule->n_fns - 1) ? - coll_op->coll_schedule->component_functions[fn_idx+1].h_level : -1; - sbgp = topo->component_pairs[h_level].subgroup_module; - my_index = sbgp->my_index; - - if (coll_op->variable_fn_params.root_flag) { - ML_VERBOSE(1,("In task completion Data in receiver buffer %d ", - *(int *)((unsigned char*) coll_op->variable_fn_params.rbuf + - coll_op->variable_fn_params.rbuf_offset))); - } - - /* determine the root for this level of the hierarchy */ - if (coll_op->coll_schedule->topo_info->route_vector[coll_op->global_root].level == next_h_level || - coll_op->global_root == sbgp->group_list[my_index]) { - /* I am the global root or I will be talking to the global root in the next round. */ - coll_op->variable_fn_params.root = my_index; - } else if (coll_op->coll_schedule->topo_info->route_vector[coll_op->global_root].level == h_level) { - /* the root is in this level of my hierarchy */ - coll_op->variable_fn_params.root = coll_op->coll_schedule->topo_info->route_vector[coll_op->global_root].rank; - } else { - coll_op->variable_fn_params.root = 0; - } - - /* Set the route vector for this root */ - coll_op->variable_fn_params.root_route = - &coll_op->coll_schedule->topo_info->route_vector[sbgp->group_list[coll_op->variable_fn_params.root]]; - - /* Am I the root of this hierarchy? */ - coll_op->variable_fn_params.root_flag = (my_index == coll_op->variable_fn_params.root); - - /* For hierarchy switch btw source and destination buffer - * No need to make this switch for the first call .. - * */ - if (0 < fn_idx) { - int tmp_offset = coll_op->variable_fn_params.sbuf_offset; - coll_op->variable_fn_params.sbuf_offset = - coll_op->variable_fn_params.rbuf_offset; - coll_op->variable_fn_params.rbuf_offset = tmp_offset; - } - - return OMPI_SUCCESS; -} - -static int mca_coll_ml_reduce_frag_progress(mca_coll_ml_collective_operation_progress_t *coll_op) -{ - /* local variables */ - void *buf; - - size_t dt_size; - int ret, frag_len, count; - - ptrdiff_t lb, extent; - - mca_bcol_base_payload_buffer_desc_t *src_buffer_desc; - mca_coll_ml_collective_operation_progress_t *new_op; - - mca_coll_ml_module_t *ml_module = OP_ML_MODULE(coll_op); - - ret = ompi_datatype_get_extent(coll_op->variable_fn_params.dtype, &lb, &extent); - if (ret < 0) { - return OMPI_ERROR; - } - - dt_size = (size_t) extent; - - /* Keep the pipeline filled with fragments */ - while (coll_op->fragment_data.message_descriptor->n_active < - coll_op->fragment_data.message_descriptor->pipeline_depth) { - /* If an active fragment happens to have completed the collective during - * a hop into the progress engine, then don't launch a new fragment, - * instead break and return. - */ - if (coll_op->fragment_data.message_descriptor->n_bytes_scheduled - == coll_op->fragment_data.message_descriptor->n_bytes_total) { - break; - } - - /* Get an ml buffer */ - src_buffer_desc = mca_coll_ml_alloc_buffer(OP_ML_MODULE(coll_op)); - if (NULL == src_buffer_desc) { - /* If there exist outstanding fragments, then break out - * and let an active fragment deal with this later, - * there are no buffers available. - */ - if (0 < coll_op->fragment_data.message_descriptor->n_active) { - return OMPI_SUCCESS; - } else { - /* It is useless to call progress from here, since - * ml progress can't be executed as result ml memsync - * call will not be completed and no memory will be - * recycled. So we put the element on the list, and we will - * progress it later when memsync will recycle some memory*/ - - /* The fragment is already on list and - * the we still have no ml resources - * Return busy */ - if (coll_op->pending & REQ_OUT_OF_MEMORY) { - ML_VERBOSE(10,("Out of resources %p", coll_op)); - return OMPI_ERR_TEMP_OUT_OF_RESOURCE; - } - - coll_op->pending |= REQ_OUT_OF_MEMORY; - opal_list_append(&((OP_ML_MODULE(coll_op))->waiting_for_memory_list), - (opal_list_item_t *)coll_op); - ML_VERBOSE(10,("Out of resources %p adding to pending queue", coll_op)); - return OMPI_ERR_TEMP_OUT_OF_RESOURCE; - } - } - - /* Get a new collective descriptor and initialize it */ - new_op = mca_coll_ml_alloc_op_prog_single_frag_dag(ml_module, - ml_module->coll_ml_reduce_functions[ML_SMALL_DATA_REDUCE], - coll_op->fragment_data.message_descriptor->src_user_addr, - coll_op->fragment_data.message_descriptor->dest_user_addr, - coll_op->fragment_data.message_descriptor->n_bytes_total, - coll_op->fragment_data.message_descriptor->n_bytes_scheduled); - - ML_VERBOSE(1,(" In Reduce fragment progress %d %d ", - coll_op->fragment_data.message_descriptor->n_bytes_total, - coll_op->fragment_data.message_descriptor->n_bytes_scheduled)); - MCA_COLL_IBOFFLOAD_SET_ML_BUFFER_INFO(new_op, - src_buffer_desc->buffer_index, src_buffer_desc); - - new_op->fragment_data.current_coll_op = coll_op->fragment_data.current_coll_op; - new_op->fragment_data.message_descriptor = coll_op->fragment_data.message_descriptor; - - /* set the task setup callback */ - new_op->sequential_routine.seq_task_setup = mca_coll_ml_reduce_task_setup; - /* We need this address for pointer arithmetic in memcpy */ - buf = (void*)coll_op->fragment_data.message_descriptor->src_user_addr; - /* calculate the number of data types in this packet */ - count = (coll_op->fragment_data.message_descriptor->n_bytes_total - - coll_op->fragment_data.message_descriptor->n_bytes_scheduled < - ((size_t) OP_ML_MODULE(coll_op)->small_message_thresholds[BCOL_REDUCE]/4 )? - (coll_op->fragment_data.message_descriptor->n_bytes_total - - coll_op->fragment_data.message_descriptor->n_bytes_scheduled) / dt_size : - (size_t) coll_op->variable_fn_params.count); - - /* calculate the fragment length */ - frag_len = count * dt_size; - - ret = ompi_datatype_copy_content_same_ddt(coll_op->variable_fn_params.dtype, count, - (char *) src_buffer_desc->data_addr, (char *) ((uintptr_t) buf + (uintptr_t) - coll_op->fragment_data.message_descriptor->n_bytes_scheduled)); - if (ret < 0) { - return OMPI_ERROR; - } - - /* if root unpack the data */ - if (ompi_comm_rank(ml_module->comm) == coll_op->global_root ) { - new_op->process_fn = mca_coll_ml_reduce_unpack; - new_op->variable_fn_params.root_flag = true; - } else { - new_op->process_fn = NULL; - new_op->variable_fn_params.root_flag = false; - } - - new_op->variable_fn_params.root_route = coll_op->variable_fn_params.root_route; - - /* Setup fragment specific data */ - new_op->fragment_data.message_descriptor->n_bytes_scheduled += frag_len; - new_op->fragment_data.buffer_desc = src_buffer_desc; - new_op->fragment_data.fragment_size = frag_len; - (new_op->fragment_data.message_descriptor->n_active)++; - - /* Set in Reduce Buffer arguments */ - ML_SET_VARIABLE_PARAMS_BCAST(new_op, OP_ML_MODULE(new_op), count, - coll_op->variable_fn_params.dtype, src_buffer_desc, - 0, (ml_module->payload_block->size_buffer - - ml_module->data_offset)/2, frag_len, - src_buffer_desc->data_addr); - - new_op->variable_fn_params.buffer_size = frag_len; - new_op->variable_fn_params.sbuf = src_buffer_desc->data_addr; - new_op->variable_fn_params.rbuf = src_buffer_desc->data_addr; - new_op->variable_fn_params.root = coll_op->variable_fn_params.root; - new_op->global_root = coll_op->global_root; - new_op->variable_fn_params.op = coll_op->variable_fn_params.op; - new_op->variable_fn_params.hier_factor = coll_op->variable_fn_params.hier_factor; - new_op->sequential_routine.current_bcol_status = SEQ_TASK_PENDING; - MCA_COLL_ML_SET_NEW_FRAG_ORDER_INFO(new_op); - - ML_VERBOSE(10,("FFFF Contig + fragmentation [0-sk, 1-lk, 3-su, 4-lu] %d %d %d", - new_op->variable_fn_params.buffer_size, - new_op->fragment_data.fragment_size, - new_op->fragment_data.message_descriptor->n_bytes_scheduled)); - /* initialize first coll */ - new_op->sequential_routine.seq_task_setup(new_op); - - /* append this collective !! */ - OPAL_THREAD_LOCK(&(mca_coll_ml_component.sequential_collectives_mutex)); - opal_list_append(&mca_coll_ml_component.sequential_collectives, - (opal_list_item_t *)new_op); - OPAL_THREAD_UNLOCK(&(mca_coll_ml_component.sequential_collectives_mutex)); - - } - - return OMPI_SUCCESS; -} - -static inline __opal_attribute_always_inline__ -int parallel_reduce_start (void *sbuf, void *rbuf, int count, - struct ompi_datatype_t *dtype, struct ompi_op_t *op, - int root, - struct ompi_communicator_t *comm, - mca_coll_ml_module_t *ml_module, - ompi_request_t **req, - int small_data_reduce, - int large_data_reduce) { - ptrdiff_t lb, extent; - size_t pack_len, dt_size; - mca_bcol_base_payload_buffer_desc_t *src_buffer_desc = NULL; - mca_coll_ml_collective_operation_progress_t * coll_op = NULL; - bool contiguous = ompi_datatype_is_contiguous_memory_layout(dtype, count); - mca_coll_ml_component_t *cm = &mca_coll_ml_component; - int ret, n_fragments = 1, frag_len, - pipeline_depth, n_dts_per_frag, rank; - - if (MPI_IN_PLACE == sbuf) { - sbuf = rbuf; - } - - ret = ompi_datatype_get_extent(dtype, &lb, &extent); - if (ret < 0) { - return OMPI_ERROR; - } - - rank = ompi_comm_rank (comm); - - dt_size = (size_t) extent; - pack_len = count * dt_size; - - /* We use a separate recieve and send buffer so only half the buffer is usable. */ - if (pack_len < (size_t) ml_module->small_message_thresholds[BCOL_REDUCE] / 4) { - /* The len of the message can not be larger than ML buffer size */ - assert(pack_len <= ml_module->payload_block->size_buffer); - - src_buffer_desc = mca_coll_ml_alloc_buffer(ml_module); - - ML_VERBOSE(10,("Using small data reduce (threshold = %d)", - REDUCE_SMALL_MESSAGE_THRESHOLD)); - while (NULL == src_buffer_desc) { - opal_progress(); - src_buffer_desc = mca_coll_ml_alloc_buffer(ml_module); - } - - coll_op = mca_coll_ml_alloc_op_prog_single_frag_dag(ml_module, - ml_module->coll_ml_reduce_functions[small_data_reduce], - sbuf, rbuf, pack_len, 0); - - MCA_COLL_IBOFFLOAD_SET_ML_BUFFER_INFO(coll_op, - src_buffer_desc->buffer_index, src_buffer_desc); - - coll_op->variable_fn_params.rbuf = src_buffer_desc->data_addr; - coll_op->variable_fn_params.sbuf = src_buffer_desc->data_addr; - coll_op->variable_fn_params.buffer_index = src_buffer_desc->buffer_index; - coll_op->variable_fn_params.src_desc = src_buffer_desc; - coll_op->variable_fn_params.count = count; - - ret = ompi_datatype_copy_content_same_ddt(dtype, count, - (void *) (uintptr_t) src_buffer_desc->data_addr, (char *) sbuf); - if (ret < 0){ - return OMPI_ERROR; - } - - } else if (cm->enable_fragmentation || !contiguous) { - ML_VERBOSE(1,("Using Fragmented Reduce ")); - - /* fragment the data */ - /* check for retarded application programming decisions */ - if (dt_size > (size_t) ml_module->small_message_thresholds[BCOL_REDUCE] / 4) { - ML_ERROR(("Sorry, but we don't support datatypes that large")); - return OMPI_ERROR; - } - - /* calculate the number of data types that can fit per ml-buffer */ - n_dts_per_frag = ml_module->small_message_thresholds[BCOL_REDUCE] / (4 * dt_size); - - /* calculate the number of fragments */ - n_fragments = (count + n_dts_per_frag - 1) / n_dts_per_frag; /* round up */ - - /* calculate the actual pipeline depth */ - pipeline_depth = n_fragments < cm->pipeline_depth ? n_fragments : cm->pipeline_depth; - - /* calculate the fragment size */ - frag_len = n_dts_per_frag * dt_size; - - /* allocate an ml buffer */ - src_buffer_desc = mca_coll_ml_alloc_buffer(ml_module); - while (NULL == src_buffer_desc) { - opal_progress(); - src_buffer_desc = mca_coll_ml_alloc_buffer(ml_module); - } - - coll_op = mca_coll_ml_alloc_op_prog_single_frag_dag(ml_module, - ml_module->coll_ml_reduce_functions[small_data_reduce], - sbuf,rbuf, - pack_len, - 0 /* offset for first pack */); - - MCA_COLL_IBOFFLOAD_SET_ML_BUFFER_INFO(coll_op, - src_buffer_desc->buffer_index, src_buffer_desc); - - - coll_op->variable_fn_params.sbuf = (void *) src_buffer_desc->data_addr; - coll_op->variable_fn_params.rbuf = (void *) src_buffer_desc->data_addr; - - coll_op->fragment_data.message_descriptor->n_active = 1; - coll_op->full_message.n_bytes_scheduled = frag_len; - coll_op->full_message.fragment_launcher = mca_coll_ml_reduce_frag_progress; - coll_op->full_message.pipeline_depth = pipeline_depth; - coll_op->fragment_data.current_coll_op = small_data_reduce; - coll_op->fragment_data.fragment_size = frag_len; - - coll_op->variable_fn_params.count = n_dts_per_frag; /* seems fishy */ - coll_op->variable_fn_params.buffer_size = frag_len; - coll_op->variable_fn_params.src_desc = src_buffer_desc; - /* copy into the ml-buffer */ - ret = ompi_datatype_copy_content_same_ddt(dtype, n_dts_per_frag, - (char *) src_buffer_desc->data_addr, (char *) sbuf); - if (ret < 0) { - return OMPI_ERROR; - } - } else { - ML_VERBOSE(1,("Using zero-copy ptp reduce")); - coll_op = mca_coll_ml_alloc_op_prog_single_frag_dag(ml_module, - ml_module->coll_ml_reduce_functions[large_data_reduce], - sbuf, rbuf, pack_len, 0); - - coll_op->variable_fn_params.userbuf = - coll_op->variable_fn_params.sbuf = sbuf; - - coll_op->variable_fn_params.rbuf = rbuf; - - /* The ML buffer is used for testing. Later, when we - * switch to use knem/mmap/portals this should be replaced - * appropriately - */ - src_buffer_desc = mca_coll_ml_alloc_buffer(ml_module); - while (NULL == src_buffer_desc) { - opal_progress(); - src_buffer_desc = mca_coll_ml_alloc_buffer(ml_module); - } - - coll_op->variable_fn_params.buffer_index = src_buffer_desc->buffer_index; - coll_op->variable_fn_params.src_desc = src_buffer_desc; - coll_op->variable_fn_params.count = count; - } - - coll_op->process_fn = (rank != root) ? NULL : mca_coll_ml_reduce_unpack; - - /* Set common parts */ - coll_op->fragment_data.buffer_desc = src_buffer_desc; - coll_op->variable_fn_params.dtype = dtype; - coll_op->variable_fn_params.op = op; - - /* NTH: the root, root route, and root flag are set in the task setup */ - - /* Fill in the function arguments */ - coll_op->variable_fn_params.sbuf_offset = 0; - coll_op->variable_fn_params.rbuf_offset = (ml_module->payload_block->size_buffer - - ml_module->data_offset)/2; - - /* Keep track of the global root of this operation */ - coll_op->global_root = root; - - coll_op->variable_fn_params.sequence_num = - OPAL_THREAD_ADD32(&(ml_module->collective_sequence_num), 1); - coll_op->sequential_routine.current_active_bcol_fn = 0; - /* set the task setup callback */ - coll_op->sequential_routine.seq_task_setup = mca_coll_ml_reduce_task_setup; - - /* Reduce requires the schedule to be fixed. If we use other (changing) schedule, - the operation might result in different result. */ - coll_op->coll_schedule->component_functions = coll_op->coll_schedule-> - comp_fn_arr[coll_op->coll_schedule->topo_info->route_vector[root].level]; - - /* Launch the collective */ - ret = mca_coll_ml_launch_sequential_collective (coll_op); - if (OMPI_SUCCESS != ret) { - ML_VERBOSE(10, ("Failed to launch reduce collective")); - return ret; - } - - *req = &coll_op->full_message.super; - - return OMPI_SUCCESS; -} - - -int mca_coll_ml_reduce(void *sbuf, void *rbuf, int count, - struct ompi_datatype_t *dtype, struct ompi_op_t *op, - int root, struct ompi_communicator_t *comm, - mca_coll_base_module_t *module) { - - mca_coll_ml_module_t *ml_module = (mca_coll_ml_module_t*)module; - int ret = OMPI_SUCCESS; - ompi_request_t *req; - - if (OPAL_UNLIKELY(!ompi_op_is_commute(op) || !opal_datatype_is_contiguous_memory_layout(&dtype->super, count))) { - /* coll/ml does not handle non-communative operations at this time. fallback - * on another collective module */ - return ml_module->fallback.coll_reduce (sbuf, rbuf, count, dtype, op, root, comm, - ml_module->fallback.coll_reduce_module); - } - - ML_VERBOSE(10,("Calling Ml Reduce ")); - ret = parallel_reduce_start(sbuf, rbuf, count, dtype, op, - root, comm, (mca_coll_ml_module_t *)module, - &req, ML_SMALL_DATA_REDUCE, - ML_LARGE_DATA_REDUCE); - if (OPAL_UNLIKELY(ret != OMPI_SUCCESS)) { - ML_VERBOSE(10, ("Failed to launch")); - return ret; - } - - /* Blocking reduce */ - ret = ompi_request_wait(&req, MPI_STATUS_IGNORE); - - ML_VERBOSE(10, ("Blocking Reduce is done")); - - return ret; -} - - -int mca_coll_ml_reduce_nb(void *sbuf, void *rbuf, int count, - struct ompi_datatype_t *dtype, struct ompi_op_t *op, - int root, struct ompi_communicator_t *comm, - ompi_request_t **req, - mca_coll_base_module_t *module) { - - int ret = OMPI_SUCCESS; - mca_coll_ml_module_t *ml_module = (mca_coll_ml_module_t*)module; - - if (OPAL_UNLIKELY(!ompi_op_is_commute(op) || !opal_datatype_is_contiguous_memory_layout(&dtype->super, count))) { - /* coll/ml does not handle non-communative operations at this time. fallback - * on another collective module */ - return ml_module->fallback.coll_ireduce (sbuf, rbuf, count, dtype, op, root, comm, req, - ml_module->fallback.coll_ireduce_module); - } - - ML_VERBOSE(10,("Calling Ml Reduce ")); - ret = parallel_reduce_start(sbuf, rbuf, count, dtype, op, - root, comm, ml_module, - req, ML_SMALL_DATA_REDUCE, - ML_LARGE_DATA_REDUCE); - if (OPAL_UNLIKELY(ret != OMPI_SUCCESS)) { - ML_VERBOSE(10, ("Failed to launch")); - return ret; - } - - - ML_VERBOSE(10, ("Non-blocking Reduce is done")); - - return OMPI_SUCCESS; - -} diff --git a/ompi/mca/coll/ml/coll_ml_resource_affinity.c b/ompi/mca/coll/ml/coll_ml_resource_affinity.c deleted file mode 100644 index 23d9a0fc71e..00000000000 --- a/ompi/mca/coll/ml/coll_ml_resource_affinity.c +++ /dev/null @@ -1,147 +0,0 @@ -#include "opal/mca/carto/carto.h" -#include "opal/mca/carto/base/base.h" -#include "opal/util/output.h" -#include "opal/class/opal_graph.h" -#include "opal/mca/paffinity/base/base.h" -#include "ompi/constants.h" - -#include "orte/mca/ess/ess.h" -#include "coll_ml_resource_affinity.h" - -int get_dev_distance_for_all_procs(opal_carto_graph_t *graph, const char *device) -{ - opal_paffinity_base_cpu_set_t cpus; - opal_carto_base_node_t *device_node; - int min_distance = -1, i, num_processors; - - if(opal_paffinity_base_get_processor_info(&num_processors) != OMPI_SUCCESS) { - num_processors = 100; /* Choose something big enough */ - } - - device_node = opal_carto_base_find_node(graph, device); - - /* no topology info for device found. Assume that it is close */ - if(NULL == device_node) - return 0; - - OPAL_PAFFINITY_CPU_ZERO(cpus); - opal_paffinity_base_get(&cpus); - - for (i = 0; i < num_processors; i++) { - opal_carto_base_node_t *slot_node; - int distance, socket, core; - char *slot; - - if(!OPAL_PAFFINITY_CPU_ISSET(i, cpus)) - continue; - - opal_paffinity_base_get_map_to_socket_core(i, &socket, &core); - asprintf(&slot, "socket%d", socket); - - slot_node = opal_carto_base_find_node(graph, slot); - - free(slot); - - if(NULL == slot_node) - return 0; - - distance = opal_carto_base_spf(graph, slot_node, device_node); - - if(distance < 0) - return 0; - - if(min_distance < 0 || min_distance > distance) - min_distance = distance; - } - - return min_distance; -} - -int get_dev_distance_proc(opal_carto_graph_t *graph, - const char *device,int rank, struct ompi_proc_t *proc){ - opal_paffinity_base_cpu_set_t cpus; - opal_carto_base_node_t *device_node; - opal_carto_base_node_t *slot_node; - int distance, socket, core; - char *slot; - int process_id; - int nrank; - - nrank = orte_ess.get_node_rank(&(proc->proc_name)); - - opal_paffinity_base_get_physical_processor_id(nrank, &process_id); - - device_node = opal_carto_base_find_node(graph, device); - - /* no topology info for device found. Assume that it is close */ - if(NULL == device_node) - return 0; - - OPAL_PAFFINITY_CPU_ZERO(cpus); - opal_paffinity_base_get(&cpus); - - - - opal_paffinity_base_get_map_to_socket_core(process_id, &socket, &core); - asprintf(&slot, "socket%d", socket); - ML_VERBOSE(10,("The socket addres is %d",socket)); - - slot_node = opal_carto_base_find_node(graph, slot); - - free(slot); - - if(NULL == slot_node) - return -1; - - distance = opal_carto_base_spf(graph, slot_node, device_node); - - if(distance < 0) - return -1; - - return distance; - -} - -int coll_ml_select_leader(mca_coll_ml_module_t *ml_module, - mca_sbgp_base_module_t *sbgp_module, - int *rank_in_comm, - struct ompi_proc_t ** procs, - int nprocs){ - - int rank, dist1, dist2,dist; - int min_dist = 10000; - int i,leader = 10000; - struct ompi_proc_t *proc = NULL; - - for (i=0; igroup_list[i]]; - proc = procs[sbgp_module->group_list[i]]; - dist1 = get_dev_distance_proc(ml_module->sm_graph,"mem0",rank,proc); - dist2 = get_dev_distance_proc(ml_module->ib_graph,"mthca0",rank,proc); - - dist = dist1 + dist2; - - ML_VERBOSE(10,("The distance for proc %d dist1 %d, dist2 %d",i,dist1,dist2)); - if ((dist < min_dist) || ((dist == min_dist) && (i < leader))) { - leader = i; - min_dist = dist; - } - } - - return leader; -} - - -int coll_ml_construct_resource_graphs(mca_coll_ml_module_t *ml_module){ - - opal_carto_base_get_host_graph(&ml_module->sm_graph,"Memory"); - opal_carto_base_get_host_graph(&ml_module->ib_graph,"Infiniband"); - - /* debug - opal_graph_print(ml_module->sm_graph); - */ - return 0; - -} diff --git a/ompi/mca/coll/ml/coll_ml_resource_affinity.h b/ompi/mca/coll/ml/coll_ml_resource_affinity.h deleted file mode 100644 index c64c214ee00..00000000000 --- a/ompi/mca/coll/ml/coll_ml_resource_affinity.h +++ /dev/null @@ -1,19 +0,0 @@ -#include "opal/mca/carto/carto.h" -#include "opal/mca/carto/base/base.h" -#include "opal/util/output.h" -#include "opal/class/opal_graph.h" -#include "coll_ml.h" - - -/* Get the host graph for SM and Infiniband */ -int discover_on_node_resources(const char device); -int get_dev_distance_for_all_procs(opal_carto_graph_t *graph, - const char *device); -int get_dev_distance_proc(opal_carto_graph_t *graph, - const char *device,int rank,struct ompi_proc_t *proc); -int coll_ml_select_leader(mca_coll_ml_module_t *ml_module, - mca_sbgp_base_module_t *sbgp_module, - int *rank_in_comm, - struct ompi_proc_t ** procs, - int nprocs); -int coll_ml_construct_resource_graphs(mca_coll_ml_module_t *ml_module); diff --git a/ompi/mca/coll/ml/coll_ml_select.c b/ompi/mca/coll/ml/coll_ml_select.c deleted file mode 100644 index 429ca5d4d94..00000000000 --- a/ompi/mca/coll/ml/coll_ml_select.c +++ /dev/null @@ -1,359 +0,0 @@ -/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ -/* - * Copyright (c) 2009-2013 Oak Ridge National Laboratory. All rights reserved. - * Copyright (c) 2009-2012 Mellanox Technologies. All rights reserved. - * Copyright (c) 2012-2014 Los Alamos National Security, LLC. All rights - * reserved. - * Copyright (c) 2013-2014 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2014 Research Organization for Information Science - * and Technology (RIST). All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ -/* - * Code for selecting a collective function. The selection is based on - * comm-time attributes and invoke-time attributes. - * - * comm-time attributes: Attributes, which can used to filter available - * collective functions at communicator init time. Example attributes include - * comm size and msg size supported by bcols. - * - * invoke-time attributes: Attributes, which can be used to select function - * for given collective when a collective is invoked. - * - */ - -#include "coll_ml_select.h" - -static int msg_to_range(size_t msg_len) -{ - int range; - - if (msg_len < MSG_RANGE_INITIAL) { - return 1; - } - - range = (int) log10((double)((msg_len / MSG_RANGE_INITIAL))); - - if (range > NUM_MSG_RANGES) - return NUM_MSG_RANGES; - - return range; -} - -static int cmp_comm_attribs(struct mca_bcol_base_coll_fn_comm_attributes_t *attrib_var, - struct mca_bcol_base_coll_fn_comm_attributes_t *attrib_bcol){ - - - if (!(attrib_var->comm_size_max <= attrib_bcol->comm_size_max)) { - return -1 ; - } - -#if 0 /* Manju: pelase fix it*/ - if (attrib_var->data_src != attrib_bcol->data_src) { - return -1; - } - - if (attrib_var->waiting_semantics != - attrib_bcol->waiting_semantics) { - return -1; - } -#endif - - return 0; -} - -/* - * Table that holds function names - */ -static int init_invoke_table(mca_coll_ml_module_t *ml_module) -{ - int i=0,j=0,k=0, index_topo; - int bcoll_type; - struct mca_bcol_base_module_t *bcol_module = NULL; - int j_bcol_module=0; - int i_hier=0; - mca_coll_ml_topology_t *topo; - - for (index_topo = 0; index_topo < COLL_ML_TOPO_MAX; index_topo++) { - topo = &ml_module->topo_list[index_topo]; - if (COLL_ML_TOPO_DISABLED == topo->status) { - /* skip the topology */ - continue; - } - for (i_hier = 0; i_hier < topo->n_levels; i_hier++) { - - for (j_bcol_module = 0; - j_bcol_module < topo->component_pairs[i_hier].num_bcol_modules; - ++j_bcol_module) { - - bcol_module = topo->component_pairs[i_hier].bcol_modules[j_bcol_module]; - - for (bcoll_type = 0; bcoll_type < BCOL_NUM_OF_FUNCTIONS ; bcoll_type++){ - for (i=0; ifiltered_fns_table[DATA_SRC_UNKNOWN][BLOCKING][bcoll_type][i][j][k] - = NULL; - - bcol_module->filtered_fns_table[DATA_SRC_KNOWN][BLOCKING][bcoll_type][i][j][k] - = NULL; - - bcol_module->filtered_fns_table[DATA_SRC_UNKNOWN][NON_BLOCKING][bcoll_type][i][j][k] - = NULL; - - bcol_module->filtered_fns_table[DATA_SRC_KNOWN][NON_BLOCKING][bcoll_type][i][j][k] - = NULL; - - } - } - } - } - } - - } - } - - return 0; -} - -static int add_to_invoke_table(mca_bcol_base_module_t *bcol_module, - mca_bcol_base_coll_fn_desc_t *fn_filtered, - mca_coll_ml_module_t *ml_module) -{ - struct mca_bcol_base_coll_fn_invoke_attributes_t *inv_attribs = NULL; - int bcoll_type, data_src_type, waiting_semantic; - int range_min,range_max; - int i=0,j=0,k=0,mask=1; - - - - if((NULL == fn_filtered->inv_attr)||(NULL == fn_filtered->comm_attr)) { - return OMPI_ERROR; - } - - ML_VERBOSE(10, ("Calling add_to_invoke_table %p",fn_filtered->coll_fn)); - - inv_attribs = fn_filtered->inv_attr; - bcoll_type = fn_filtered->comm_attr->bcoll_type; - data_src_type = fn_filtered->comm_attr->data_src; - waiting_semantic = fn_filtered->comm_attr->waiting_semantics; - - range_min = msg_to_range(inv_attribs->bcol_msg_min); - range_max = msg_to_range(inv_attribs->bcol_msg_max); - - for (j=0; jdatatype_bitmap & (mask << k)) && (inv_attribs->op_types_bitmap & (mask << j))){ - - for (i=range_min; i<=range_max; i++) { - bcol_module->filtered_fns_table[data_src_type][waiting_semantic][bcoll_type][i][j][k] - = fn_filtered; - ML_VERBOSE(21, ("Putting functions %d %d %d %d %p", bcoll_type, i, j, k, fn_filtered)); - } - } - } - } - - return 0; - -} - -/* - * Maps count to msg range that is used for - * function table - * RANGE 0 is for small messages (say small msg =10k) - * MSG RANGE 1 - 10K - 100K - * RANGE 2 - 100K -1M - * RANGE 3 - 1M - 10M - * - * This is valid only when MSG_RANGE_INC is 10. - * For other values the function should replace log10 to log with - * base=MSG_RANGE_INC - */ -static int count_to_msg_range(int count,struct ompi_datatype_t *dtype) -{ - size_t msg_len =0,dt_size; - int range = 0 ; - - ompi_datatype_type_size(dtype, &dt_size); - msg_len = count*dt_size; - - if (msg_len < MSG_RANGE_INITIAL) { - return 1; - } - - range = (int) log10((double)((msg_len/MSG_RANGE_INITIAL))); - - if (range > NUM_MSG_RANGES) - return NUM_MSG_RANGES; - - return range; - -} - -/* Based on the attributes filled in comm_select_attributes - select functions for invoke time filtering */ - - -static int build_algorithms_table(mca_coll_ml_module_t *ml_module,struct - mca_bcol_base_coll_fn_comm_attributes_t *my_comm_attrib) -{ - int i_hier, j_bcol_module, k_bcol_fn, index_topo; - struct mca_bcol_base_module_t *bcol_module = NULL; - opal_list_t *fn_filtered_list; - opal_list_item_t *item; - mca_coll_ml_topology_t *topo; - - /* - * Go through each hierarchy and for each - * bcol module in the hierarchy, select the alogrithms. - */ - for (index_topo = 0; index_topo < COLL_ML_TOPO_MAX; index_topo++) { - topo = &ml_module->topo_list[index_topo]; - for (i_hier = 0; i_hier < topo->n_levels; i_hier++) { - my_comm_attrib->comm_size_max = - topo->component_pairs[i_hier].subgroup_module->group_size; - - for (j_bcol_module = 0; - j_bcol_module < topo->component_pairs[i_hier].num_bcol_modules; - ++j_bcol_module) { - - bcol_module = topo->component_pairs[i_hier].bcol_modules[j_bcol_module]; - - /* Go through all bcols and available bcol functions */ - for (k_bcol_fn = 0; k_bcol_fn < BCOL_NUM_OF_FUNCTIONS; k_bcol_fn++) { - struct mca_bcol_base_coll_fn_desc_t *fn_filtered = NULL; - - /* Query the function attributes */ - fn_filtered_list = - &(bcol_module->bcol_fns_table[k_bcol_fn]); - - - if (0 == opal_list_get_size(fn_filtered_list)) { - continue; - } - /* All definitions of a collective type is stored in the list - * Each item in the list is checked for compatability in the - * attributes and stored in the filtered list */ - for (item = opal_list_get_first(fn_filtered_list); - item != opal_list_get_end(fn_filtered_list); - item = opal_list_get_next(item)){ - - fn_filtered = (struct mca_bcol_base_coll_fn_desc_t *)item; - if (cmp_comm_attribs(my_comm_attrib, fn_filtered->comm_attr) < 0) { - /* Criteria not satisfied continue to next bcol function */ - continue; - } - - /* - * Add bcol function to be available for invoke time selection - */ - add_to_invoke_table(bcol_module, fn_filtered, ml_module); - } - - } - } - } - } - - return 0; - -} - -int mca_coll_ml_build_filtered_fn_table(mca_coll_ml_module_t *ml_module) -{ - - struct mca_bcol_base_coll_fn_comm_attributes_t *my_comm_attrib = NULL; - - - /* Init table storing all filtered functions */ - init_invoke_table(ml_module); - - my_comm_attrib = malloc(sizeof(struct mca_bcol_base_coll_fn_comm_attributes_t)); - - if (!my_comm_attrib) { - return OMPI_ERR_OUT_OF_RESOURCE; - } - - my_comm_attrib->comm_size_min = 0; - - /* - * This values should be passed using (maybe) mca parameters - */ -#if 0 /* Manju: pelase fix it*/ - my_comm_attrib->data_src = DATA_SRC_KNOWN; - my_comm_attrib->waiting_semantics = BLOCKING; -#endif - - if (build_algorithms_table(ml_module,my_comm_attrib)) { - return OMPI_ERROR; - } - - free(my_comm_attrib); - - return OMPI_SUCCESS; - -} - -#if 0 -static struct mca_bcol_base_coll_fn_invoke_attributes_t *mca_construct_invoke_attributes( - struct ompi_datatype_t *dtype, int count, - struct ompi_op_t op_type) -{ - size_t dt_size, msg_size; - struct mca_bcol_base_coll_fn_invoke_attributes_t *inv_attribs = NULL; - - ompi_datatype_type_size(dtype, &dt_size); - msg_size = count*dt_size; - - - inv_attribs = malloc(sizeof(struct mca_bcol_base_coll_fn_invoke_attributes_t)); - - /* Fix : We might need to have range for msg size - For now selection will - * be based on maximum value - */ - inv_attribs->bcol_msg_min = 0; - inv_attribs->bcol_msg_max = msg_size; - - return inv_attribs; -} -#endif - -int mca_select_bcol_function(mca_bcol_base_module_t *bcol_module, - int bcoll_type, - bcol_function_args_t *bcol_fn_arguments, - mca_bcol_base_function_t *ml_fn_arguments ) -{ - - struct mca_bcol_base_coll_fn_desc_t *fn_filtered = NULL; - int msg_range=0; - int ret; - int data_src_type = DATA_SRC_KNOWN, waiting_type = BLOCKING; - - msg_range = - count_to_msg_range(bcol_fn_arguments->count, - bcol_fn_arguments->dtype); - if ((BCOL_ALLREDUCE == bcoll_type) || (BCOL_REDUCE == bcoll_type)) { - /* needs to be resolved, the op structure has changed, there is no field called "op_type" */ - fn_filtered = - bcol_module->filtered_fns_table[data_src_type][waiting_type][bcoll_type][msg_range][bcol_fn_arguments->dtype->id][bcol_fn_arguments->op->op_type]; - } - else { - fn_filtered = - bcol_module->filtered_fns_table[data_src_type][waiting_type][bcoll_type][msg_range][bcol_fn_arguments->dtype->id][0]; - - } - - if (NULL == fn_filtered) { - return OMPI_ERROR; - } - - ret = (fn_filtered->coll_fn)(bcol_fn_arguments,ml_fn_arguments); - return ret; -} - diff --git a/ompi/mca/coll/ml/coll_ml_select.h b/ompi/mca/coll/ml/coll_ml_select.h deleted file mode 100644 index 3c7fa40a07c..00000000000 --- a/ompi/mca/coll/ml/coll_ml_select.h +++ /dev/null @@ -1,29 +0,0 @@ -#ifndef MCA_COLL_ML_SELECT_H -#define MCA_COLL_ML_SELECT_H - - -#include "ompi_config.h" - -#include -#include "ompi/datatype/ompi_datatype.h" -#include "ompi/op/op.h" -#include "ompi/mca/bcol/bcol.h" -#include "coll_ml.h" -#include "coll_ml_inlines.h" - - - -/* Forward declaration */ -struct mca_coll_ml_module_t; - -int mca_select_bcol_function(mca_bcol_base_module_t *bcol_module, - int bcoll_type, - bcol_function_args_t *bcol_fn_arguments, - mca_bcol_base_function_t *ml_fn_arguments ); -/* - * Goes through the function table and filters the collectives functions - * based on comm-time attributes. - */ -int mca_coll_ml_build_filtered_fn_table(struct mca_coll_ml_module_t *ml_module); - -#endif /* MCA_COLL_ML_SELECT_H */ diff --git a/ompi/mca/coll/ml/configure.m4 b/ompi/mca/coll/ml/configure.m4 deleted file mode 100644 index 79e7bfeab1a..00000000000 --- a/ompi/mca/coll/ml/configure.m4 +++ /dev/null @@ -1,18 +0,0 @@ -# -*- shell-script -*- -# -# Copyright (c) 2014 Intel, Inc. All rights reserved. -# $COPYRIGHT$ -# -# Additional copyrights may follow -# -# $HEADER$ -# -# MCA_coll_ml_CONFIG([action-if-found], [action-if-not-found]) -# ----------------------------------------------------------- -AC_DEFUN([MCA_ompi_coll_ml_CONFIG], [ - AC_CONFIG_FILES([ompi/mca/coll/ml/Makefile]) - - AS_IF([test "$OPAL_HAVE_HWLOC" = 1], - [$1], - [$2]) -]) diff --git a/ompi/mca/coll/ml/help-mpi-coll-ml.txt b/ompi/mca/coll/ml/help-mpi-coll-ml.txt deleted file mode 100644 index 874516f7ac4..00000000000 --- a/ompi/mca/coll/ml/help-mpi-coll-ml.txt +++ /dev/null @@ -1,64 +0,0 @@ -# -*- text -*- -# -# Copyright (c) 2009-2014 Oak Ridge National Laboratory. All rights reserved. -# Copyright (c) 2014 Research Organization for Information Science -# and Technology (RIST). All rights reserved. -# $COPYRIGHT$ -# -# Additional copyrights may follow -# -# $HEADER$ -# -# This is the US/English help file for Open MPI's Hierarchical Collective -# Component (coll/ml). -# -[empty-sub-group] -ML topology configuration explicitly requested for this subgroup: - - %s - -Such configuration results in a creation of empty groups. As a result, ML -framework cannot configure requested collective operations. ML framework will be -disabled. One configuration that might enable ML component is --mca bcol_base_string basesmuma,ptpcoll ---mca sbgp_base_subgroups_string basesmuma,p2p - -[allreduce-not-supported] -This BCOL is configured in one of the hierarchy : - - %s - -The BCOL does not support Allreduce for all -operations and datatype combination. In addition, you did not suggest -alternate topology building configurations. - -[allreduce-alt-nosupport] -The hierarchy is configured with alternate BCOL: - - %s - -Both the original topology and alternate topology not support Allreduce for all -operations and datatype combination. In addition, you did not suggest -alternate topology building configurations. - -[fragmentation-disabled] - -ML could not be used because the mca param coll_ml_enable_fragmentation -was set to zero and there is a bcol that does not support -zero copy method. - -[static-bcast-disabled] - -ML could not be used because the mca param coll_ml_bcast_algorithm -was not set to static and other broadcast implementation was available. - -[coll-ml-check-error] - -ML detected an error on communicator %s - -This communicator cannot be used any more - -[coll-ml-check-fatal-error] - -ML detected an unrecoverable error on intrinsic communicator %s - -The program will now abort diff --git a/ompi/mca/coll/ml/mca-coll-ml.config b/ompi/mca/coll/ml/mca-coll-ml.config deleted file mode 100644 index bdf43792b06..00000000000 --- a/ompi/mca/coll/ml/mca-coll-ml.config +++ /dev/null @@ -1,170 +0,0 @@ -################################## -# ML collective configuration file -################################## -# NOTE (by Pasha): -# Since ML configuration infrastructure is limited on this stage we do not support some tunings, even so parser -# understands this values and keys, but we do not have place to load all this values. -# threshold - ML infrastructure does not handle multiple thresholds. -# fragmentation - ML infrastructure does not fragmentation tuning per collective. -################################## - -# Defining collective section -[BARRIER] -# Defining message size section. We will support small/large. In future we may add more options. Barrier is very specific case, because it is only collective that does not transfer any data, so for this specific case we use small - -# Since ML does not define any algorithm for BARRIER, we just use default. Later we have to introduce some algorithm name for Barrier -algorithm = ML_BARRIER_DEFAULT - -# Hierarchy setup: -# -# full_hr - means all possible levels of hierarchy (list of possible is defined by user command line) -# full_hr_no_basesocket - means all possible levels of hierarchy (list of possible is defined by user command line) -# except the basesocket subgroup. -# ptp_only - only ptp hierarchy -# iboffload_only - only iboffload hierarhcy -hierarchy = full_hr - -[IBARRIER] - -algorithm = ML_BARRIER_DEFAULT -hierarchy = full_hr - -[BCAST] - -# bcast supports: ML_BCAST_SMALL_DATA_KNOWN, ML_BCAST_SMALL_DATA_UNKNOWN, ML_BCAST_SMALL_DATA_SEQUENTIAL -algorithm = ML_BCAST_SMALL_DATA_KNOWN -hierarchy = full_hr - -# bcast supports: ML_BCAST_LARGE_DATA_KNOWN, ML_BCAST_LARGE_DATA_UNKNOWN, ML_BCAST_LARGE_DATA_SEQUENTIAL -algorithm = ML_BCAST_LARGE_DATA_KNOWN -hierarchy = full_hr - -[IBCAST] - -algorithm = ML_BCAST_SMALL_DATA_KNOWN -hierarchy = full_hr - -algorithm = ML_BCAST_LARGE_DATA_KNOWN -hierarchy = full_hr - -[GATHER] - -# gather supports: ML_SMALL_DATA_GATHER -algorithm = ML_SMALL_DATA_GATHER -hierarchy = full_hr - -# gather supports: ML_LARGE_DATA_GATHER -algorithm = ML_LARGE_DATA_GATHER -hierarchy = full_hr - -[IGATHER] - -# gather supports: ML_SMALL_DATA_GATHER -algorithm = ML_SMALL_DATA_GATHER -hierarchy = full_hr - -# gather supports: ML_LARGE_DATA_GATHER -algorithm = ML_LARGE_DATA_GATHER -hierarchy = full_hr - -[ALLGATHER] - -# allgather supports: ML_SMALL_DATA_ALLGATHER -algorithm = ML_SMALL_DATA_ALLGATHER -hierarchy = full_hr - -# allgather supports: ML_LARGE_DATA_ALLGATHER -algorithm = ML_LARGE_DATA_ALLGATHER -hierarchy = full_hr - -[IALLGATHER] - -# allgather supports: ML_SMALL_DATA_ALLGATHER -algorithm = ML_SMALL_DATA_ALLGATHER -hierarchy = full_hr - -# allgather supports: ML_LARGE_DATA_ALLGATHER -algorithm = ML_LARGE_DATA_ALLGATHER -hierarchy = full_hr - -[ALLTOALL] - -# alltoall supports: ML_SMALL_DATA_ALLTOALL -algorithm = ML_SMALL_DATA_ALLTOALL -hierarchy = ptp_only - -# alltoall supports: ML_LARGE_DATA_ALLTOALL -algorithm = ML_LARGE_DATA_ALLTOALL -hierarchy = ptp_only - -[IALLTOALL] - -# alltoall supports: ML_SMALL_DATA_ALLTOALL -algorithm = ML_SMALL_DATA_ALLTOALL -hierarchy = ptp_only - -# alltoall supports: ML_LARGE_DATA_ALLTOALL -algorithm = ML_LARGE_DATA_ALLTOALL -hierarchy = ptp_only - -[ALLREDUCE] - -# allreduce supports: ML_SMALL_DATA_ALLREDUCE -algorithm = ML_SMALL_DATA_ALLREDUCE -hierarchy = full_hr - -# allreduce supports: ML_LARGE_DATA_ALLREDUCE -algorithm = ML_LARGE_DATA_ALLREDUCE -hierarchy = full_hr - -[IALLREDUCE] - -# allreduce supports: ML_SMALL_DATA_ALLREDUCE -algorithm = ML_SMALL_DATA_ALLREDUCE -hierarchy = full_hr - -# allreduce supports: ML_LARGE_DATA_ALLREDUCE -algorithm = ML_LARGE_DATA_ALLREDUCE -hierarchy = full_hr - -[REDUCE] - -# scatter supports: ML_SCATTER_SMALL_DATA_SEQUENTIAL -algorithm = ML_SMALL_DATA_REDUCE -hierarchy = full_hr - -# scatter supports: ML_SCATTER_SMALL_DATA_SEQUENTIAL -algorithm = ML_LARGE_DATA_REDUCE -hierarchy = full_hr - -[IREDUCE] - -# scatter supports: ML_SCATTER_SMALL_DATA_SEQUENTIAL -algorithm = ML_SMALL_DATA_REDUCE -hierarchy = full_hr - -# scatter supports: ML_SCATTER_SMALL_DATA_SEQUENTIAL -algorithm = ML_LARGE_DATA_REDUCE -hierarchy = full_hr - - - -[SCATTER] - -# scatter supports: ML_SCATTER_SMALL_DATA_SEQUENTIAL -algorithm = ML_SCATTER_SMALL_DATA_SEQUENTIAL -hierarchy = full_hr - -# scatter supports: ML_SCATTER_SMALL_DATA_SEQUENTIAL -algorithm = ML_SCATTER_SMALL_DATA_SEQUENTIAL -hierarchy = full_hr - -[ISCATTER] - -# scatter supports: ML_SCATTER_SMALL_DATA_SEQUENTIAL -algorithm = ML_SCATTER_SMALL_DATA_SEQUENTIAL -hierarchy = full_hr - -# scatter supports: ML_SCATTER_SMALL_DATA_SEQUENTIAL -algorithm = ML_SCATTER_SMALL_DATA_SEQUENTIAL -hierarchy = full_hr diff --git a/ompi/mca/coll/ml/owner.txt b/ompi/mca/coll/ml/owner.txt deleted file mode 100644 index 51ea04a5175..00000000000 --- a/ompi/mca/coll/ml/owner.txt +++ /dev/null @@ -1,7 +0,0 @@ -# -# owner/status file -# owner: institution that is responsible for this package -# status: e.g. active, maintenance, unmaintained -# -owner: ORNL? -status: unmaintained diff --git a/ompi/mca/coll/portals4/Makefile.am b/ompi/mca/coll/portals4/Makefile.am index 2434bfee741..c8668033564 100644 --- a/ompi/mca/coll/portals4/Makefile.am +++ b/ompi/mca/coll/portals4/Makefile.am @@ -2,9 +2,9 @@ # Copyright (c) 2013-2015 Sandia National Laboratories. All rights reserved. # Copyright (c) 2015 Bull SAS. All rights reserved. # $COPYRIGHT$ -# +# # Additional copyrights may follow -# +# # $HEADER$ # @@ -15,6 +15,8 @@ local_sources = \ coll_portals4_barrier.c \ coll_portals4_bcast.c \ coll_portals4_reduce.c \ + coll_portals4_gather.c \ + coll_portals4_scatter.c \ coll_portals4_request.h \ coll_portals4_request.c diff --git a/ompi/mca/coll/portals4/coll_portals4.h b/ompi/mca/coll/portals4/coll_portals4.h index e4808dc1216..3e898eab995 100644 --- a/ompi/mca/coll/portals4/coll_portals4.h +++ b/ompi/mca/coll/portals4/coll_portals4.h @@ -3,6 +3,8 @@ * Copyright (c) 2015 Los Alamos National Security, LLC. All rights * reserved. * Copyright (c) 2015 Bull SAS. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -22,10 +24,17 @@ #include "ompi/datatype/ompi_datatype_internal.h" #include "ompi/op/op.h" #include "ompi/mca/mca.h" +#include "opal/datatype/opal_convertor.h" #include "ompi/mca/coll/coll.h" #include "ompi/request/request.h" #include "ompi/communicator/communicator.h" #include "ompi/mca/coll/base/base.h" +#include "ompi/datatype/ompi_datatype.h" +#include "ompi/mca/mtl/portals4/mtl_portals4_endpoint.h" + +#include "ompi/mca/mtl/portals4/mtl_portals4.h" + +#define MAXTREEFANOUT 32 BEGIN_C_DECLS @@ -56,11 +65,29 @@ struct mca_coll_portals4_component_t { opal_free_list_t requests; /* request free list for the i collectives */ ptl_ni_limits_t ni_limits; + ptl_size_t portals_max_msg_size; + + int use_binomial_gather_algorithm; }; typedef struct mca_coll_portals4_component_t mca_coll_portals4_component_t; OMPI_MODULE_DECLSPEC extern mca_coll_portals4_component_t mca_coll_portals4_component; + +/* + * Borrowed with thanks from the coll-tuned component, then modified for Portals4. + */ +typedef struct ompi_coll_portals4_tree_t { + int32_t tree_root; + int32_t tree_fanout; + int32_t tree_bmtree; + int32_t tree_prev; + int32_t tree_next[MAXTREEFANOUT]; + int32_t tree_nextsize; + int32_t tree_numdescendants; +} ompi_coll_portals4_tree_t; + + struct mca_coll_portals4_module_t { mca_coll_base_module_t super; size_t coll_count; @@ -75,6 +102,10 @@ struct mca_coll_portals4_module_t { mca_coll_base_module_t *previous_allreduce_module; mca_coll_base_module_iallreduce_fn_t previous_iallreduce; mca_coll_base_module_t *previous_iallreduce_module; + + /* binomial tree */ + ompi_coll_portals4_tree_t *cached_in_order_bmtree; + int cached_in_order_bmtree_root; }; typedef struct mca_coll_portals4_module_t mca_coll_portals4_module_t; OBJ_CLASS_DECLARATION(mca_coll_portals4_module_t); @@ -131,6 +162,22 @@ int opal_stderr(const char *msg, const char *file, const int line, const int ret); +/* + * Borrowed with thanks from the coll-tuned component. + */ +#define COLL_PORTALS4_UPDATE_IN_ORDER_BMTREE( OMPI_COMM, PORTALS4_MODULE, ROOT ) \ +do { \ + if( !( ((PORTALS4_MODULE)->cached_in_order_bmtree) \ + && ((PORTALS4_MODULE)->cached_in_order_bmtree_root == (ROOT)) ) ) { \ + if( (PORTALS4_MODULE)->cached_in_order_bmtree ) { /* destroy previous binomial if defined */ \ + ompi_coll_portals4_destroy_tree( &((PORTALS4_MODULE)->cached_in_order_bmtree) ); \ + } \ + (PORTALS4_MODULE)->cached_in_order_bmtree = ompi_coll_portals4_build_in_order_bmtree( (OMPI_COMM), (ROOT) ); \ + (PORTALS4_MODULE)->cached_in_order_bmtree_root = (ROOT); \ + } \ +} while (0) + + int ompi_coll_portals4_barrier_intra(struct ompi_communicator_t *comm, mca_coll_base_module_t *module); int ompi_coll_portals4_ibarrier_intra(struct ompi_communicator_t *comm, @@ -148,12 +195,12 @@ int ompi_coll_portals4_ibcast_intra(void *buff, int count, mca_coll_base_module_t *module); int ompi_coll_portals4_ibcast_intra_fini(struct ompi_coll_portals4_request_t *request); -int ompi_coll_portals4_reduce_intra(void *sbuf, void *rbuf, int count, +int ompi_coll_portals4_reduce_intra(const void *sbuf, void *rbuf, int count, MPI_Datatype dtype, MPI_Op op, int root, struct ompi_communicator_t *comm, mca_coll_base_module_t *module); -int ompi_coll_portals4_ireduce_intra(void* sendbuf, void* recvbuf, int count, +int ompi_coll_portals4_ireduce_intra(const void* sendbuf, void* recvbuf, int count, MPI_Datatype dype, MPI_Op op, int root, struct ompi_communicator_t *comm, @@ -161,11 +208,11 @@ int ompi_coll_portals4_ireduce_intra(void* sendbuf, void* recvbuf, int count, struct mca_coll_base_module_2_1_0_t *module); int ompi_coll_portals4_ireduce_intra_fini(struct ompi_coll_portals4_request_t *request); -int ompi_coll_portals4_allreduce_intra(void* sendbuf, void* recvbuf, int count, +int ompi_coll_portals4_allreduce_intra(const void* sendbuf, void* recvbuf, int count, MPI_Datatype dtype, MPI_Op op, struct ompi_communicator_t *comm, struct mca_coll_base_module_2_1_0_t *module); -int ompi_coll_portals4_iallreduce_intra(void* sendbuf, void* recvbuf, int count, +int ompi_coll_portals4_iallreduce_intra(const void* sendbuf, void* recvbuf, int count, MPI_Datatype dtype, MPI_Op op, struct ompi_communicator_t *comm, ompi_request_t ** ompi_request, @@ -173,14 +220,37 @@ int ompi_coll_portals4_iallreduce_intra(void* sendbuf, void* recvbuf, int count, int ompi_coll_portals4_iallreduce_intra_fini(struct ompi_coll_portals4_request_t *request); +int ompi_coll_portals4_gather_intra(const void *sbuf, int scount, struct ompi_datatype_t *sdtype, + void *rbuf, int rcount, struct ompi_datatype_t *rdtype, + int root, + struct ompi_communicator_t *comm, + mca_coll_base_module_t *module); +int ompi_coll_portals4_igather_intra(const void *sbuf, int scount, struct ompi_datatype_t *sdtype, + void *rbuf, int rcount, struct ompi_datatype_t *rdtype, + int root, + struct ompi_communicator_t *comm, + ompi_request_t **request, + mca_coll_base_module_t *module); +int ompi_coll_portals4_igather_intra_fini(struct ompi_coll_portals4_request_t *request); + +int ompi_coll_portals4_scatter_intra(const void *sbuf, int scount, struct ompi_datatype_t *sdtype, + void *rbuf, int rcount, struct ompi_datatype_t *rdtype, + int root, + struct ompi_communicator_t *comm, + mca_coll_base_module_t *module); +int ompi_coll_portals4_iscatter_intra(const void *sbuf, int scount, struct ompi_datatype_t *sdtype, + void *rbuf, int rcount, struct ompi_datatype_t *rdtype, + int root, + struct ompi_communicator_t *comm, + ompi_request_t **request, + mca_coll_base_module_t *module); +int ompi_coll_portals4_iscatter_intra_fini(struct ompi_coll_portals4_request_t *request); + + static inline ptl_process_t ompi_coll_portals4_get_peer(struct ompi_communicator_t *comm, int rank) { - ompi_proc_t *proc = ompi_comm_peer_lookup(comm, rank); - if (proc->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_PORTALS4] == NULL) { - printf("ompi_coll_portals4_get_peer failure\n"); - } - return *((ptl_process_t*) proc->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_PORTALS4]); + return ompi_mtl_portals4_get_peer(comm, rank); } @@ -245,7 +315,7 @@ is_reduce_optimizable(struct ompi_datatype_t *dtype, size_t length, struct ompi_ } *ptl_dtype = ompi_coll_portals4_atomic_datatype[dtype->id]; - if (*ptl_dtype == COLL_PORTALS4_NO_DTYPE){ + if (*ptl_dtype == COLL_PORTALS4_NO_DTYPE) { opal_output_verbose(50, ompi_coll_base_framework.framework_output, "datatype %d not supported\n", dtype->id); @@ -357,6 +427,43 @@ void get_k_ary_tree(const unsigned int k_ary, return; } + +static inline void +ompi_coll_portals4_create_recv_converter (opal_convertor_t *converter, + void *target, + ompi_proc_t *proc, + int count, + ompi_datatype_t *datatype) +{ + /* create converter */ + OBJ_CONSTRUCT(converter, opal_convertor_t); + + /* initialize converter */ + opal_convertor_copy_and_prepare_for_recv(proc->super.proc_convertor, + &datatype->super, + count, + target, + 0, + converter); +} + +static inline void +ompi_coll_portals4_create_send_converter (opal_convertor_t *converter, + const void *source, + ompi_proc_t *proc, + int count, + ompi_datatype_t *datatype) +{ + OBJ_CONSTRUCT(converter, opal_convertor_t); + + opal_convertor_copy_and_prepare_for_send(proc->super.proc_convertor, + &datatype->super, + count, + source, + 0, + converter); +} + END_C_DECLS #endif /* MCA_COLL_PORTALS4_EXPORT_H */ diff --git a/ompi/mca/coll/portals4/coll_portals4_allreduce.c b/ompi/mca/coll/portals4/coll_portals4_allreduce.c index 3cbe9cbedd0..e80c3b49857 100644 --- a/ompi/mca/coll/portals4/coll_portals4_allreduce.c +++ b/ompi/mca/coll/portals4/coll_portals4_allreduce.c @@ -1,6 +1,8 @@ /* * Copyright (c) 2015 Sandia National Laboratories. All rights reserved. * Copyright (c) 2015 Bull SAS. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -29,7 +31,7 @@ #define COLL_PORTALS4_ALLREDUCE_MAX_CHILDREN 2 static int -allreduce_kary_tree_top(void *sendbuf, void *recvbuf, int count, +allreduce_kary_tree_top(const void *sendbuf, void *recvbuf, int count, MPI_Datatype dtype, MPI_Op op, struct ompi_communicator_t *comm, ompi_coll_portals4_request_t *request, @@ -354,7 +356,7 @@ allreduce_kary_tree_bottom(ompi_coll_portals4_request_t *request) return (OMPI_SUCCESS); } -int ompi_coll_portals4_allreduce_intra(void* sendbuf, void* recvbuf, int count, +int ompi_coll_portals4_allreduce_intra(const void* sendbuf, void* recvbuf, int count, MPI_Datatype dtype, MPI_Op op, struct ompi_communicator_t *comm, struct mca_coll_base_module_2_1_0_t *module) @@ -383,7 +385,7 @@ int ompi_coll_portals4_allreduce_intra(void* sendbuf, void* recvbuf, int count, } -int ompi_coll_portals4_iallreduce_intra(void* sendbuf, void* recvbuf, int count, +int ompi_coll_portals4_iallreduce_intra(const void* sendbuf, void* recvbuf, int count, MPI_Datatype dtype, MPI_Op op, struct ompi_communicator_t *comm, ompi_request_t ** ompi_request, @@ -406,7 +408,7 @@ int ompi_coll_portals4_iallreduce_intra(void* sendbuf, void* recvbuf, int count, allreduce_kary_tree_top(sendbuf, recvbuf, count, dtype, op, comm, request, portals4_module); - puts("iallreduce"); + opal_output_verbose(10, ompi_coll_base_framework.framework_output, "iallreduce"); return (OMPI_SUCCESS); } @@ -415,10 +417,7 @@ int ompi_coll_portals4_iallreduce_intra_fini(struct ompi_coll_portals4_request_t *request) { allreduce_kary_tree_bottom(request); - - OPAL_THREAD_LOCK(&ompi_request_lock); ompi_request_complete(&request->super, true); - OPAL_THREAD_UNLOCK(&ompi_request_lock); return (OMPI_SUCCESS); } diff --git a/ompi/mca/coll/portals4/coll_portals4_barrier.c b/ompi/mca/coll/portals4/coll_portals4_barrier.c index 76b54fd9230..c3bc6a460d7 100644 --- a/ompi/mca/coll/portals4/coll_portals4_barrier.c +++ b/ompi/mca/coll/portals4/coll_portals4_barrier.c @@ -147,9 +147,31 @@ barrier_hypercube_top(struct ompi_communicator_t *comm, } if (is_sync) { - /* Send a put to self when we've received all our messages... */ - ret = PtlCTWait(request->u.barrier.rtr_ct_h, num_msgs, &event); + /* Each process has a pending PtlTriggeredPut. To be sure this request will be triggered, we must + call PtlTriggeredCTInc twice. Otherwise, we could free the CT too early and the Put wouldn't be triggered */ + + ptl_ct_event_t ct_inc; + + ct_inc.success = 1; + ct_inc.failure = 0; + + if ((ret = PtlTriggeredCTInc(request->u.barrier.rtr_ct_h, ct_inc, + request->u.barrier.rtr_ct_h, num_msgs)) != 0) { + return opal_stderr("PtlTriggeredCTInc failed", __FILE__, __LINE__, ret); + } + + if ((ret = PtlTriggeredCTInc(request->u.barrier.rtr_ct_h, ct_inc, + request->u.barrier.rtr_ct_h, num_msgs + 1)) != 0) { + return opal_stderr("PtlTriggeredCTInc failed", __FILE__, __LINE__, ret); + } + ret = PtlCTWait(request->u.barrier.rtr_ct_h, num_msgs + 2, &event); + if (PTL_OK != ret) { + opal_output_verbose(1, ompi_coll_base_framework.framework_output, + "%s:%d: PtlCTWait failed: %d\n", + __FILE__, __LINE__, ret); + return OMPI_ERROR; + } } else { /* Send a put to self when we've received all our messages... */ @@ -289,9 +311,7 @@ ompi_coll_portals4_ibarrier_intra_fini(ompi_coll_portals4_request_t *request) return OMPI_ERROR; } - OPAL_THREAD_LOCK(&ompi_request_lock); ompi_request_complete(&request->super, true); - OPAL_THREAD_UNLOCK(&ompi_request_lock); return OMPI_SUCCESS; } diff --git a/ompi/mca/coll/portals4/coll_portals4_bcast.c b/ompi/mca/coll/portals4/coll_portals4_bcast.c index fe0431d903f..11132f6ce4c 100644 --- a/ompi/mca/coll/portals4/coll_portals4_bcast.c +++ b/ompi/mca/coll/portals4/coll_portals4_bcast.c @@ -89,12 +89,20 @@ static int prepare_bcast_data (struct ompi_communicator_t *comm, } /* Number of segments */ - request->u.bcast.segment_nb = (request->u.bcast.tmpsize > COLL_PORTALS4_MAX_BW) ? - (((request->u.bcast.tmpsize + COLL_PORTALS4_MAX_BW -1) / COLL_PORTALS4_MAX_BW) < COLL_PORTALS4_MAX_SEGMENT ? - ((request->u.bcast.tmpsize + COLL_PORTALS4_MAX_BW -1) / COLL_PORTALS4_MAX_BW) : - COLL_PORTALS4_MAX_SEGMENT) : + { + size_t max_msg_size = (COLL_PORTALS4_MAX_BW > mca_coll_portals4_component.ni_limits.max_msg_size) ? + mca_coll_portals4_component.ni_limits.max_msg_size : + COLL_PORTALS4_MAX_BW; + + //TODO : Either make compatible Portals size limits and COLL_PORTALS4_MAX_SEGMENT or remove COLL_PORTALS4_MAX_SEGMENT + request->u.bcast.segment_nb = (request->u.bcast.tmpsize > max_msg_size) ? + (((request->u.bcast.tmpsize + max_msg_size -1) / max_msg_size) < COLL_PORTALS4_MAX_SEGMENT ? + ((request->u.bcast.tmpsize + max_msg_size -1) / max_msg_size) : COLL_PORTALS4_MAX_SEGMENT) : 1; + OPAL_OUTPUT_VERBOSE((10, ompi_coll_base_framework.framework_output, + "seg_number=%d , seg_size_max=%lu", request->u.bcast.segment_nb, max_msg_size)); + } if (request->u.bcast.segment_nb > COLL_PORTALS4_BCAST_ALGO_THRESHOLD) { request->u.bcast.algo = OMPI_COLL_PORTALS4_BCAST_PIPELINE_ALGO; } @@ -137,9 +145,9 @@ bcast_kary_tree_top(void *buff, int count, mca_coll_portals4_module_t *portals4_module) { bool is_sync = request->is_sync; - int ret, seg; - unsigned int i; - int segment_nb = request->u.bcast.segment_nb; + int ret; + unsigned int i, seg, seg_size, nb_long; + unsigned int segment_nb = request->u.bcast.segment_nb; unsigned int child_nb; int size = ompi_comm_size(comm); int rank = ompi_comm_rank(comm); @@ -201,15 +209,22 @@ bcast_kary_tree_top(void *buff, int count, COLL_PORTALS4_SET_BITS(match_bits, ompi_comm_get_cid(comm), 0, 0, COLL_PORTALS4_BCAST, 0, internal_count); + /* The data will be cut in segment_nb segments. + * nb_long segments will have a size of (seg_size + 1) + * and (segment_nb - nb_long) segments will have a size of seg_size + */ + seg_size = request->u.bcast.tmpsize / segment_nb; + nb_long = request->u.bcast.tmpsize % segment_nb; + opal_output_verbose(10, ompi_coll_base_framework.framework_output, "seg_size=%d nb_long=%d segment_nb=%d", seg_size, nb_long, segment_nb); + if (rank != root) { for (seg = 1, offset = 0, length = 0 ; seg <= segment_nb ; seg++, offset += length) { /* Divide buffer into segments */ - length = (seg < segment_nb) ? - (request->u.bcast.tmpsize + segment_nb - 1) / segment_nb : - request->u.bcast.tmpsize - ((request->u.bcast.tmpsize + segment_nb - 1) / segment_nb) * (segment_nb - 1); + if (seg <= nb_long) length = seg_size + 1; + else length = seg_size; /* ** Prepare Data ME @@ -352,13 +367,14 @@ bcast_kary_tree_top(void *buff, int count, seg++, offset += length) { /* Divide buffer into segments */ - length = (seg < segment_nb) ? - (request->u.bcast.tmpsize + segment_nb - 1) / segment_nb : - request->u.bcast.tmpsize - ((request->u.bcast.tmpsize + segment_nb - 1) / segment_nb) * (segment_nb - 1); + if (seg <= nb_long) length = seg_size + 1; + else length = seg_size; + opal_output_verbose(10, ompi_coll_base_framework.framework_output, + "bcast with k-ary tree : segment of size %ld", length); /* compute the triggering threshold to send data to the children */ - trig_thr = (rank == root) ? (segment_nb) : - (segment_nb + seg); + trig_thr = segment_nb + seg - 1; /* To be sure the set of PtlTriggeredPut of DATA will be executed in order */ + if (rank != root) trig_thr ++; /* ** Send Data to children @@ -381,6 +397,17 @@ bcast_kary_tree_top(void *buff, int count, } } + if (rank == root) { + trig_thr = segment_nb; + ct_inc.success = segment_nb; + ct_inc.failure = 0; + + if ((ret = PtlTriggeredCTInc(request->u.bcast.trig_ct_h, ct_inc, + request->u.bcast.trig_ct_h, trig_thr)) != 0) { + return opal_stderr("PtlTriggeredCTInc failed", __FILE__, __LINE__, ret); + } + } + ack_thr = child_nb; if (is_sync) { @@ -409,9 +436,28 @@ bcast_kary_tree_top(void *buff, int count, */ if (rank != root) { - ack_thr = segment_nb; + trig_thr = segment_nb; if (is_sync) { - if ((ret = PtlCTWait(request->u.bcast.trig_ct_h, ack_thr, &ct)) != 0) { + /* Each leaf has a pending PtlTriggeredPut (to send the final ACK). We must call PtlTriggeredCTInc twice. + Otherwise, we could pass the PtlCTWait and then free the CT too early and the Put wouldn't be triggered. + + This is necessary because portals4 does not insure the order in the triggered operations associated + with the same threshold. In the case where PtlCTWait is not called (else case), this is not necessary. */ + + ct_inc.success = 1; + ct_inc.failure = 0; + + if ((ret = PtlTriggeredCTInc(request->u.bcast.trig_ct_h, ct_inc, + request->u.bcast.trig_ct_h, trig_thr)) != 0) { + return opal_stderr("PtlTriggeredCTInc failed", __FILE__, __LINE__, ret); + } + + if ((ret = PtlTriggeredCTInc(request->u.bcast.trig_ct_h, ct_inc, + request->u.bcast.trig_ct_h, trig_thr + 1)) != 0) { + return opal_stderr("PtlTriggeredCTInc failed", __FILE__, __LINE__, ret); + } + + if ((ret = PtlCTWait(request->u.bcast.trig_ct_h, trig_thr + 2, &ct)) != 0) { opal_stderr("PtlCTWait failed", __FILE__, __LINE__, ret); } } @@ -421,7 +467,7 @@ bcast_kary_tree_top(void *buff, int count, mca_coll_portals4_component.finish_pt_idx, 0, 0, NULL, (uintptr_t) request, request->u.bcast.trig_ct_h, - ack_thr)) != 0) { + trig_thr)) != 0) { return opal_stderr("PtlTriggeredPut failed", __FILE__, __LINE__, ret); } @@ -440,8 +486,9 @@ bcast_pipeline_top(void *buff, int count, mca_coll_portals4_module_t *portals4_module) { bool is_sync = request->is_sync; - int ret, seg; - int segment_nb = request->u.bcast.segment_nb; + int ret; + unsigned int seg, seg_size, nb_long; + unsigned int segment_nb = request->u.bcast.segment_nb; int size = ompi_comm_size(comm); int rank = ompi_comm_rank(comm); ptl_rank_t parent, child; @@ -492,6 +539,13 @@ bcast_pipeline_top(void *buff, int count, COLL_PORTALS4_SET_BITS(match_bits, ompi_comm_get_cid(comm), 0, 0, COLL_PORTALS4_BCAST, 0, internal_count); + /* The data will be cut in segment_nb segments. + * nb_long segments will have a size of (seg_size + 1) + * and (segment_nb - nb_long) segments will have a size of seg_size + */ + seg_size = request->u.bcast.tmpsize / segment_nb; + nb_long = request->u.bcast.tmpsize % segment_nb; + opal_output_verbose(10, ompi_coll_base_framework.framework_output, "seg_size=%d nb_long=%d", seg_size, nb_long); if (rank != root) { for (seg = 1, offset = 0, length = 0 ; @@ -499,9 +553,8 @@ bcast_pipeline_top(void *buff, int count, seg++, offset += length) { /* Divide buffer into segments */ - length = (seg < segment_nb) ? - (request->u.bcast.tmpsize + segment_nb - 1) / segment_nb : - request->u.bcast.tmpsize - ((request->u.bcast.tmpsize + segment_nb - 1) / segment_nb) * (segment_nb - 1); + if (seg <= nb_long) length = seg_size + 1; + else length = seg_size; /* ** Prepare Data ME @@ -642,13 +695,14 @@ bcast_pipeline_top(void *buff, int count, seg++, offset += length) { /* Divide buffer into segments */ - length = (seg < segment_nb) ? - (request->u.bcast.tmpsize + segment_nb - 1) / segment_nb : - request->u.bcast.tmpsize - ((request->u.bcast.tmpsize + segment_nb - 1) / segment_nb) * (segment_nb - 1); + if (seg <= nb_long) length = seg_size + 1; + else length = seg_size; + opal_output_verbose(10, ompi_coll_base_framework.framework_output, + "bcast with pipeline : segment of size %ld \n", length); /* compute the triggering threshold to send data to the children */ - trig_thr = (rank == root) ? (segment_nb) : - (segment_nb + seg); + trig_thr = segment_nb + seg - 1; /* To be sure the PtlTriggeredPut will be executed in order */ + if (rank != root) trig_thr ++; /* ** Send Data to children @@ -668,6 +722,16 @@ bcast_pipeline_top(void *buff, int count, } } } + if (rank == root) { + trig_thr = segment_nb; + ct_inc.success = segment_nb; + ct_inc.failure = 0; + + if ((ret = PtlTriggeredCTInc(request->u.bcast.trig_ct_h, ct_inc, + request->u.bcast.trig_ct_h, trig_thr)) != 0) { + return opal_stderr("PtlTriggeredCTInc failed", __FILE__, __LINE__, ret); + } + } if (is_sync) { if ((ret = PtlCTWait(request->u.bcast.ack_ct_h, 1, &ct)) != 0) { @@ -696,8 +760,29 @@ bcast_pipeline_top(void *buff, int count, */ if (rank != root) { + trig_thr = segment_nb; + if (is_sync) { - if ((ret = PtlCTWait(request->u.bcast.trig_ct_h, segment_nb, &ct)) != 0) { + /* Each leaf has a pending PtlTriggeredPut (to send the final ACK). We must call PtlTriggeredCTInc twice. + Otherwise, we could pass the PtlCTWait and then free the CT too early and the Put wouldn't be triggered. + + This is necessary because portals4 does not insure the order in the triggered operations associated + with the same threshold. In the case where PtlCTWait is not called (else case), this is not necessary. */ + + ct_inc.success = 1; + ct_inc.failure = 0; + + if ((ret = PtlTriggeredCTInc(request->u.bcast.trig_ct_h, ct_inc, + request->u.bcast.trig_ct_h, trig_thr)) != 0) { + return opal_stderr("PtlTriggeredCTInc failed", __FILE__, __LINE__, ret); + } + + if ((ret = PtlTriggeredCTInc(request->u.bcast.trig_ct_h, ct_inc, + request->u.bcast.trig_ct_h, trig_thr + 1)) != 0) { + return opal_stderr("PtlTriggeredCTInc failed", __FILE__, __LINE__, ret); + } + + if ((ret = PtlCTWait(request->u.bcast.trig_ct_h, trig_thr + 2, &ct)) != 0) { opal_stderr("PtlCTWait failed", __FILE__, __LINE__, ret); } } @@ -707,7 +792,7 @@ bcast_pipeline_top(void *buff, int count, mca_coll_portals4_component.finish_pt_idx, 0, 0, NULL, (uintptr_t) request, request->u.bcast.trig_ct_h, - segment_nb)) != 0) { + trig_thr)) != 0) { return opal_stderr("PtlTriggeredPut failed", __FILE__, __LINE__, ret); } } @@ -831,7 +916,7 @@ ompi_coll_portals4_ibcast_intra(void *buff, int count, return OMPI_ERROR; } - puts("ibcast"); + opal_output_verbose(10, ompi_coll_base_framework.framework_output, "ibcast_intra"); return (OMPI_SUCCESS); } @@ -856,9 +941,8 @@ ompi_coll_portals4_ibcast_intra_fini(ompi_coll_portals4_request_t *request) post_bcast_data(request); - OPAL_THREAD_LOCK(&ompi_request_lock); ompi_request_complete(&request->super, true); - OPAL_THREAD_UNLOCK(&ompi_request_lock); + opal_output_verbose(10, ompi_coll_base_framework.framework_output, "ibcast_intra_fini"); return (OMPI_SUCCESS); } diff --git a/ompi/mca/coll/portals4/coll_portals4_component.c b/ompi/mca/coll/portals4/coll_portals4_component.c index cb29348ed3c..72149790fa6 100644 --- a/ompi/mca/coll/portals4/coll_portals4_component.c +++ b/ompi/mca/coll/portals4/coll_portals4_component.c @@ -180,7 +180,7 @@ mca_coll_portals4_component_t mca_coll_portals4_component = { /* Initialization / querying functions */ .collm_init_query = portals4_init_query, .collm_comm_query = portals4_comm_query, - }, + }, }; int @@ -203,6 +203,24 @@ portals4_register(void) MCA_BASE_VAR_SCOPE_READONLY, &mca_coll_portals4_priority); + mca_coll_portals4_component.use_binomial_gather_algorithm = 0; + (void) mca_base_component_var_register(&mca_coll_portals4_component.super.collm_version, "use_binomial_gather_algorithm", + "if 1 use a binomial tree algorithm for gather, otherwise use linear", + MCA_BASE_VAR_TYPE_INT, NULL, 0, 0, + OPAL_INFO_LVL_9, + MCA_BASE_VAR_SCOPE_READONLY, + &mca_coll_portals4_component.use_binomial_gather_algorithm); + + mca_coll_portals4_component.portals_max_msg_size = PTL_SIZE_MAX; + (void) mca_base_component_var_register(&mca_coll_portals4_component.super.collm_version, + "max_msg_size", + "Max size supported by portals4 (above that, a message is cut into messages less than that size)", + MCA_BASE_VAR_TYPE_UNSIGNED_LONG, + NULL, 0, 0, + OPAL_INFO_LVL_9, + MCA_BASE_VAR_SCOPE_READONLY, + &mca_coll_portals4_component.portals_max_msg_size); + return OMPI_SUCCESS; } @@ -361,7 +379,13 @@ portals4_init_query(bool enable_progress_threads, __FILE__, __LINE__, ret); return OMPI_ERROR; } + opal_output_verbose(10, ompi_coll_base_framework.framework_output, + "ni_limits.max_atomic_size=%ld", mca_coll_portals4_component.ni_limits.max_atomic_size); + if (mca_coll_portals4_component.portals_max_msg_size < mca_coll_portals4_component.ni_limits.max_msg_size) + mca_coll_portals4_component.ni_limits.max_msg_size = mca_coll_portals4_component.portals_max_msg_size; + opal_output_verbose(10, ompi_coll_base_framework.framework_output, + "ni_limits.max_msg_size=%lu", mca_coll_portals4_component.ni_limits.max_msg_size); ret = PtlGetId(mca_coll_portals4_component.ni_h, &mca_coll_portals4_component.id); if (PTL_OK != ret) { @@ -463,7 +487,7 @@ portals4_init_query(bool enable_progress_threads, __FILE__, __LINE__, ret); return OMPI_ERROR; } - OPAL_OUTPUT_VERBOSE((90, ompi_coll_base_framework.framework_output, "PtlMDBind start=%p length=%x\n", md.start, md.length)); + OPAL_OUTPUT_VERBOSE((90, ompi_coll_base_framework.framework_output, "PtlMDBind start=%p length=%lx\n", md.start, md.length)); /* setup finish ack ME */ me.start = NULL; @@ -472,7 +496,7 @@ portals4_init_query(bool enable_progress_threads, me.min_free = 0; me.uid = mca_coll_portals4_component.uid; me.options = PTL_ME_OP_PUT | - PTL_ME_EVENT_LINK_DISABLE | PTL_ME_EVENT_UNLINK_DISABLE; + PTL_ME_EVENT_LINK_DISABLE | PTL_ME_EVENT_UNLINK_DISABLE; me.match_id.phys.nid = PTL_NID_ANY; me.match_id.phys.pid = PTL_PID_ANY; me.match_bits = 0; @@ -565,6 +589,10 @@ portals4_comm_query(struct ompi_communicator_t *comm, return NULL; } + opal_output_verbose(50, ompi_coll_base_framework.framework_output, + "%s:%d: My nid,pid = (%x,%x)\n", + __FILE__, __LINE__, proc->phys.nid, proc->phys.pid); + /* check for logical addressing mode in the MTL */ if (0 == proc->phys.pid) { opal_output_verbose(1, ompi_coll_base_framework.framework_output, @@ -584,6 +612,15 @@ portals4_comm_query(struct ompi_communicator_t *comm, portals4_module->super.coll_barrier = ompi_coll_portals4_barrier_intra; portals4_module->super.coll_ibarrier = ompi_coll_portals4_ibarrier_intra; + portals4_module->super.coll_gather = ompi_coll_portals4_gather_intra; + portals4_module->super.coll_igather = ompi_coll_portals4_igather_intra; + + portals4_module->super.coll_scatter = ompi_coll_portals4_scatter_intra; + portals4_module->super.coll_iscatter = ompi_coll_portals4_iscatter_intra; + + portals4_module->cached_in_order_bmtree=NULL; + portals4_module->cached_in_order_bmtree_root=-1; + portals4_module->super.coll_bcast = ompi_coll_portals4_bcast_intra; portals4_module->super.coll_ibcast = ompi_coll_portals4_ibcast_intra; @@ -689,9 +726,10 @@ portals4_progress(void) ompi_coll_portals4_iallreduce_intra_fini(ptl_request); break; case OMPI_COLL_PORTALS4_TYPE_SCATTER: + ompi_coll_portals4_iscatter_intra_fini(ptl_request); + break; case OMPI_COLL_PORTALS4_TYPE_GATHER: - opal_output(ompi_coll_base_framework.framework_output, - "allreduce is not supported yet\n"); + ompi_coll_portals4_igather_intra_fini(ptl_request); break; } } diff --git a/ompi/mca/coll/portals4/coll_portals4_gather.c b/ompi/mca/coll/portals4/coll_portals4_gather.c new file mode 100644 index 00000000000..45ff4c07728 --- /dev/null +++ b/ompi/mca/coll/portals4/coll_portals4_gather.c @@ -0,0 +1,1381 @@ +/* + * Copyright (c) 2015 Sandia National Laboratories. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + + +#include "ompi_config.h" + +#include "mpi.h" +#include "ompi/constants.h" +#include "ompi/datatype/ompi_datatype.h" +#include "opal/util/bit_ops.h" +#include "ompi/mca/pml/pml.h" +#include "ompi/mca/coll/coll.h" +#include "ompi/mca/coll/base/base.h" + +#include "coll_portals4.h" +#include "coll_portals4_request.h" + +#include // included for ffs in get_tree_numdescendants_of + +#undef RTR_USES_TRIGGERED_PUT + + +#define VRANK(ra, ro, si) ((ra - ro + si) % si) + +/* + * Borrowed with thanks from the coll-tuned component, then modified for Portals4. + * + * + * Constructs in-order binomial tree which can be used for gather/scatter + * operations. + * + * Here are some of the examples of this tree: + * size = 2 size = 4 size = 8 + * 0 0 0 + * / / | / | \ + * 1 1 2 1 2 4 + * | | | \ + * 3 3 5 6 + * | + * 7 + * + * size = 16 + * 0 + * / | \ \ + * 1 2 4 8 + * | | \ / | \ + * 3 5 6 9 10 12 + * | | | \ + * 7 11 13 14 + * | + * 15 + */ + +static int32_t get_tree_numdescendants_of(struct ompi_communicator_t* comm, + int vrank) +{ + int max; + int size = ompi_comm_size(comm); + + if (0 == vrank) { + return size - 1; + } else { + max = 1 << ffs(vrank - 1); + return ((vrank + max <= size ) ? max : size - vrank) -1; + } + +} + +static ompi_coll_portals4_tree_t* +ompi_coll_portals4_build_in_order_bmtree( struct ompi_communicator_t* comm, + int root ) +{ + int childs = 0, rank, vrank, vparent, size, mask = 1, remote, i; + ompi_coll_portals4_tree_t *bmtree; + + /* + * Get size and rank of the process in this communicator + */ + size = ompi_comm_size(comm); + rank = ompi_comm_rank(comm); + + vrank = VRANK(rank, root, size); + + bmtree = (ompi_coll_portals4_tree_t*)malloc(sizeof(ompi_coll_portals4_tree_t)); + if (!bmtree) { + opal_output(ompi_coll_base_framework.framework_output, + "coll:portals4:build_bmtree PANIC out of memory"); + return NULL; + } + + bmtree->tree_bmtree = 1; + bmtree->tree_root = MPI_UNDEFINED; + bmtree->tree_nextsize = MPI_UNDEFINED; + for(i=0;itree_next[i] = -1; + } + + if (root == rank) { + bmtree->tree_prev = root; + } + + while (mask < size) { + remote = vrank ^ mask; + if (remote < vrank) { + bmtree->tree_prev = (remote + root) % size; + break; + } else if (remote < size) { + bmtree->tree_next[childs] = (remote + root) % size; + childs++; + if (childs==MAXTREEFANOUT) { + opal_output(ompi_coll_base_framework.framework_output, + "coll:portals4:build_bmtree max fanout incorrect %d needed %d", + MAXTREEFANOUT, childs); + return NULL; + } + } + mask <<= 1; + } + bmtree->tree_nextsize = childs; + bmtree->tree_root = root; + + vparent = VRANK(bmtree->tree_prev, root, size); + if (root == rank) { + bmtree->tree_numdescendants = size - 1; + } else if (bmtree->tree_nextsize > 0) { + int possible_descendants = vrank - vparent - 1; + if ((vrank + possible_descendants) > size) { + bmtree->tree_numdescendants = size - vrank - 1; + } else { + bmtree->tree_numdescendants = possible_descendants; + } + } else { + bmtree->tree_numdescendants = 0; + } + + opal_output_verbose(30, ompi_coll_base_framework.framework_output, + "%d: bmtree result - size(%d) rank(%d) vrank(%d) root(%d) parent(%d) vparent(%d) numkids(%d) numdescendants(%d)", + __LINE__, + size, rank, vrank, bmtree->tree_root, bmtree->tree_prev, vparent, bmtree->tree_nextsize, bmtree->tree_numdescendants); + + return bmtree; +} + +/* + * Borrowed with thanks from the coll-tuned component. + */ +static int +ompi_coll_portals4_destroy_tree( ompi_coll_portals4_tree_t** tree ) +{ + ompi_coll_portals4_tree_t *ptr; + + if ((!tree)||(!*tree)) { + return OMPI_SUCCESS; + } + + ptr = *tree; + + free (ptr); + *tree = NULL; /* mark tree as gone */ + + return OMPI_SUCCESS; +} + + +static int +setup_gather_buffers_binomial(struct ompi_communicator_t *comm, + ompi_coll_portals4_request_t *request, + mca_coll_portals4_module_t *portals4_module) +{ + int ret, line; + + uint32_t iov_count = 1; + struct iovec iov; + size_t max_data; + + ompi_coll_portals4_tree_t* bmtree = portals4_module->cached_in_order_bmtree; + + int vrank = VRANK(request->u.gather.my_rank, request->u.gather.root_rank, request->u.gather.size); + + ompi_coll_portals4_create_send_converter (&request->u.gather.send_converter, + request->u.gather.pack_src_buf + request->u.gather.pack_src_offset, + ompi_comm_peer_lookup(comm, request->u.gather.my_rank), + request->u.gather.pack_src_count, + request->u.gather.pack_src_dtype); + opal_convertor_get_packed_size(&request->u.gather.send_converter, &request->u.gather.packed_size); + + /**********************************/ + /* Setup Gather Buffers */ + /**********************************/ + if (vrank == 0) { + request->u.gather.gather_bytes=request->u.gather.packed_size * (ptrdiff_t)request->u.gather.size; + + /* + * root node, needs to allocate temp buffer to gather + * packed bytes from all nodes including self. + * rotate will occur after transfer during unpack. + */ + request->u.gather.gather_buf = (char *) malloc(request->u.gather.gather_bytes); + if (NULL == request->u.gather.gather_buf) { + ret = OMPI_ERR_OUT_OF_RESOURCE; line = __LINE__; goto err_hdlr; + } + request->u.gather.free_after = 1; + + /* pack local data into request->u.gather.gather_buf */ + iov.iov_len = request->u.gather.gather_bytes; + iov.iov_base = (IOVBASE_TYPE *) request->u.gather.gather_buf; + opal_convertor_pack(&request->u.gather.send_converter, &iov, &iov_count, &max_data); + + opal_output_verbose(30, ompi_coll_base_framework.framework_output, + "%s:%d:vrank(%d): root - gather_buf(%p) - gather_bytes(%lu)=packed_size(%ld) * size(%d)", + __FILE__, __LINE__, vrank, + request->u.gather.gather_buf, request->u.gather.gather_bytes, + request->u.gather.packed_size, request->u.gather.size); + } else if (bmtree->tree_nextsize) { + /* + * other non-leaf nodes, allocate temp buffer to receive data from + * children. we need space for data from tree_numdescendants + 1 + * processes. + */ + request->u.gather.gather_bytes=request->u.gather.packed_size * ((ptrdiff_t)bmtree->tree_numdescendants + 1); + + request->u.gather.gather_buf = (char *) malloc(request->u.gather.gather_bytes); + if (NULL == request->u.gather.gather_buf) { + ret = OMPI_ERR_OUT_OF_RESOURCE; line = __LINE__; goto err_hdlr; + } + request->u.gather.free_after = 1; + + iov.iov_len = request->u.gather.gather_bytes; + iov.iov_base = (IOVBASE_TYPE *) request->u.gather.gather_buf; + opal_convertor_pack(&request->u.gather.send_converter, &iov, &iov_count, &max_data); + + opal_output_verbose(30, ompi_coll_base_framework.framework_output, + "%s:%d:vrank(%d): nonleaf - gather_buf(%p) - gather_bytes(%lu)=packed_size(%ld) * (bmtree->tree_numdescendants(%d) + 1)", + __FILE__, __LINE__, vrank, + request->u.gather.gather_buf, request->u.gather.gather_bytes, + request->u.gather.packed_size, bmtree->tree_numdescendants); + } else { + /* leaf nodes, allocate space to pack into and put from */ + request->u.gather.gather_bytes=request->u.gather.packed_size; + + request->u.gather.gather_buf = (char *) malloc(request->u.gather.gather_bytes); + if (NULL == request->u.gather.gather_buf) { + ret = OMPI_ERR_OUT_OF_RESOURCE; line = __LINE__; goto err_hdlr; + } + request->u.gather.free_after = 1; + + iov.iov_len = request->u.gather.gather_bytes; + iov.iov_base = (IOVBASE_TYPE *) request->u.gather.gather_buf; + opal_convertor_pack(&request->u.gather.send_converter, &iov, &iov_count, &max_data); + + opal_output_verbose(30, ompi_coll_base_framework.framework_output, + "%s:%d:vrank(%d): leaf - gather_buf(%p) - gather_bytes(%lu)=packed_size(%ld)", + __FILE__, __LINE__, vrank, + request->u.gather.gather_buf, request->u.gather.gather_bytes, + request->u.gather.packed_size); + } + + return OMPI_SUCCESS; + +err_hdlr: + opal_output(ompi_coll_base_framework.framework_output, + "%s:%4d:%4d\tError occurred ret=%d, rank %2d", + __FILE__, __LINE__, line, ret, request->u.gather.my_rank); + + return ret; +} + +static int +setup_gather_buffers_linear(struct ompi_communicator_t *comm, + ompi_coll_portals4_request_t *request, + mca_coll_portals4_module_t *portals4_module) +{ + int ret, line; + + uint32_t iov_count = 1; + struct iovec iov; + size_t max_data; + + int8_t i_am_root = (request->u.gather.my_rank == request->u.gather.root_rank); + + ompi_coll_portals4_create_send_converter (&request->u.gather.send_converter, + request->u.gather.pack_src_buf + request->u.gather.pack_src_offset, + ompi_comm_peer_lookup(comm, request->u.gather.my_rank), + request->u.gather.pack_src_count, + request->u.gather.pack_src_dtype); + opal_convertor_get_packed_size(&request->u.gather.send_converter, &request->u.gather.packed_size); + + /**********************************/ + /* Setup Gather Buffers */ + /**********************************/ + if (i_am_root) { + request->u.gather.gather_bytes=request->u.gather.packed_size * (ptrdiff_t)request->u.gather.size; + + /* + * root node, needs to allocate temp buffer to gather + * packed bytes from all nodes including self. + */ + request->u.gather.gather_buf = (char *) malloc(request->u.gather.gather_bytes); + if (NULL == request->u.gather.gather_buf) { + ret = OMPI_ERR_OUT_OF_RESOURCE; line = __LINE__; goto err_hdlr; + } + request->u.gather.free_after = 1; + + /* pack local data into request->u.gather.gather_buf */ + uint64_t gather_buf_offset = (ptrdiff_t)request->u.gather.my_rank * request->u.gather.packed_size; + iov.iov_len = request->u.gather.gather_bytes - gather_buf_offset; + iov.iov_base = (IOVBASE_TYPE *) (request->u.gather.gather_buf + gather_buf_offset); + opal_convertor_pack(&request->u.gather.send_converter, &iov, &iov_count, &max_data); + + opal_output_verbose(30, ompi_coll_base_framework.framework_output, + "%s:%d:rank(%d): root - gather_buf(%p) - gather_bytes(%lu)=packed_size(%ld) * size(%d)", + __FILE__, __LINE__, request->u.gather.my_rank, + request->u.gather.gather_buf, request->u.gather.gather_bytes, + request->u.gather.packed_size, request->u.gather.size); + } else { + /* non-root nodes, allocate space to pack into and put from */ + request->u.gather.gather_bytes=request->u.gather.packed_size; + request->u.gather.gather_buf = (char *) malloc(request->u.gather.gather_bytes); + if (NULL == request->u.gather.gather_buf) { + ret = OMPI_ERR_OUT_OF_RESOURCE; line = __LINE__; goto err_hdlr; + } + request->u.gather.free_after = 1; + + iov.iov_len = request->u.gather.gather_bytes; + iov.iov_base = (IOVBASE_TYPE *) request->u.gather.gather_buf; + opal_convertor_pack(&request->u.gather.send_converter, &iov, &iov_count, &max_data); + + opal_output_verbose(30, ompi_coll_base_framework.framework_output, + "%s:%d:rank(%d): leaf - gather_buf(%p) - gather_bytes(%lu)=packed_size(%ld)", + __FILE__, __LINE__, request->u.gather.my_rank, + request->u.gather.gather_buf, request->u.gather.gather_bytes, + request->u.gather.packed_size); + } + + return OMPI_SUCCESS; + +err_hdlr: + opal_output(ompi_coll_base_framework.framework_output, + "%s:%4d:%4d\tError occurred ret=%d, rank %2d", + __FILE__, __LINE__, line, ret, request->u.gather.my_rank); + + return ret; +} + +static int +setup_gather_handles(struct ompi_communicator_t *comm, + ompi_coll_portals4_request_t *request, + mca_coll_portals4_module_t *portals4_module) +{ + int ret, line; + + ptl_me_t me; + + /**********************************/ + /* Setup Gather Handles */ + /**********************************/ + COLL_PORTALS4_SET_BITS(request->u.gather.gather_match_bits, ompi_comm_get_cid(comm), + 0, 0, COLL_PORTALS4_GATHER, 0, request->u.gather.coll_count); + + ret = PtlCTAlloc(mca_coll_portals4_component.ni_h, + &request->u.gather.gather_cth); + if (PTL_OK != ret) { ret = OMPI_ERR_TEMP_OUT_OF_RESOURCE; line = __LINE__; goto err_hdlr; } + + request->u.gather.gather_mdh = mca_coll_portals4_component.data_md_h; + request->u.gather.gather_offset = (ptl_size_t)request->u.gather.gather_buf; + + /* children put here */ + me.start = request->u.gather.gather_buf; + me.length = request->u.gather.gather_bytes; + me.ct_handle = request->u.gather.gather_cth; + me.min_free = 0; + me.uid = mca_coll_portals4_component.uid; + me.options = PTL_ME_OP_PUT | PTL_ME_EVENT_SUCCESS_DISABLE | + PTL_ME_EVENT_LINK_DISABLE | PTL_ME_EVENT_UNLINK_DISABLE | + PTL_ME_EVENT_CT_COMM; + me.match_id.phys.nid = PTL_NID_ANY; + me.match_id.phys.pid = PTL_PID_ANY; + me.match_bits = request->u.gather.gather_match_bits; + me.ignore_bits = 0; + ret = PtlMEAppend(mca_coll_portals4_component.ni_h, + mca_coll_portals4_component.pt_idx, + &me, + PTL_PRIORITY_LIST, + NULL, + &request->u.gather.gather_meh); + if (PTL_OK != ret) { ret = OMPI_ERROR; line = __LINE__; goto err_hdlr; } + + return OMPI_SUCCESS; + +err_hdlr: + opal_output(ompi_coll_base_framework.framework_output, + "%s:%4d:%4d\tError occurred ret=%d, rank %2d", + __FILE__, __LINE__, line, ret, request->u.gather.my_rank); + + return ret; +} + +static int +setup_sync_handles(struct ompi_communicator_t *comm, + ompi_coll_portals4_request_t *request, + mca_coll_portals4_module_t *portals4_module) +{ + int ret, line; + + ptl_me_t me; + + /**********************************/ + /* Setup Sync Handles */ + /**********************************/ + COLL_PORTALS4_SET_BITS(request->u.gather.sync_match_bits, ompi_comm_get_cid(comm), + 0, 1, COLL_PORTALS4_GATHER, 0, request->u.gather.coll_count); + + ret = PtlCTAlloc(mca_coll_portals4_component.ni_h, + &request->u.gather.sync_cth); + if (PTL_OK != ret) { ret = OMPI_ERR_TEMP_OUT_OF_RESOURCE; line = __LINE__; goto err_hdlr; } + + request->u.gather.sync_mdh = mca_coll_portals4_component.zero_md_h; + + me.start = NULL; + me.length = 0; + me.ct_handle = request->u.gather.sync_cth; + me.min_free = 0; + me.uid = mca_coll_portals4_component.uid; + me.options = PTL_ME_OP_PUT | PTL_ME_EVENT_SUCCESS_DISABLE | + PTL_ME_EVENT_LINK_DISABLE | PTL_ME_EVENT_UNLINK_DISABLE | + PTL_ME_EVENT_CT_COMM | PTL_ME_EVENT_CT_OVERFLOW; + me.match_id.phys.nid = PTL_NID_ANY; + me.match_id.phys.pid = PTL_PID_ANY; + me.match_bits = request->u.gather.sync_match_bits; + me.ignore_bits = 0; + ret = PtlMEAppend(mca_coll_portals4_component.ni_h, + mca_coll_portals4_component.pt_idx, + &me, + PTL_PRIORITY_LIST, + NULL, + &request->u.gather.sync_meh); + if (PTL_OK != ret) { ret = OMPI_ERROR; line = __LINE__; goto err_hdlr; } + + return OMPI_SUCCESS; + +err_hdlr: + opal_output(ompi_coll_base_framework.framework_output, + "%s:%4d:%4d\tError occurred ret=%d, rank %2d", + __FILE__, __LINE__, line, ret, request->u.gather.my_rank); + + return ret; +} + +static int +cleanup_gather_handles(ompi_coll_portals4_request_t *request) +{ + int ret, line; + + /**********************************/ + /* Cleanup Gather Handles */ + /**********************************/ + ret = PtlMEUnlink(request->u.gather.gather_meh); + if (PTL_OK != ret) { ret = OMPI_ERROR; line = __LINE__; goto err_hdlr; } + + ret = PtlCTFree(request->u.gather.gather_cth); + if (PTL_OK != ret) { ret = OMPI_ERROR; line = __LINE__; goto err_hdlr; } + + return OMPI_SUCCESS; + +err_hdlr: + opal_output(ompi_coll_base_framework.framework_output, + "%s:%4d:%4d\tError occurred ret=%d, rank %2d", + __FILE__, __LINE__, line, ret, request->u.gather.my_rank); + + return ret; +} + +static int +cleanup_sync_handles(ompi_coll_portals4_request_t *request) +{ + int ret, line; + + /**********************************/ + /* Cleanup Sync Handles */ + /**********************************/ + ret = PtlMEUnlink(request->u.gather.sync_meh); + if (PTL_OK != ret) { ret = OMPI_ERROR; line = __LINE__; goto err_hdlr; } + + ret = PtlCTFree(request->u.gather.sync_cth); + if (PTL_OK != ret) { ret = OMPI_ERROR; line = __LINE__; goto err_hdlr; } + + return OMPI_SUCCESS; + +err_hdlr: + opal_output(ompi_coll_base_framework.framework_output, + "%s:%4d:%4d\tError occurred ret=%d, rank %2d", + __FILE__, __LINE__, line, ret, request->u.gather.my_rank); + + return ret; +} + +static int +ompi_coll_portals4_gather_intra_binomial_top(const void *sbuf, int scount, struct ompi_datatype_t *sdtype, + void *rbuf, int rcount, struct ompi_datatype_t *rdtype, + int root, + struct ompi_communicator_t *comm, + ompi_coll_portals4_request_t *request, + mca_coll_base_module_t *module) +{ + mca_coll_portals4_module_t *portals4_module = (mca_coll_portals4_module_t*) module; + int ret, line; + ptl_ct_event_t ct; + + ptl_ct_event_t sync_incr_event; + + int vrank=-1; + + int32_t i=0; + + ompi_coll_portals4_tree_t* bmtree; + + int32_t expected_ops =0; + int32_t expected_acks=0; + + ptl_size_t number_of_fragment_gathered = 0; + ptl_size_t number_of_fragment_send = 1; + + OPAL_OUTPUT_VERBOSE((10, ompi_coll_base_framework.framework_output, + "coll:portals4:gather_intra_binomial_top enter rank %d", request->u.gather.my_rank)); + + request->type = OMPI_COLL_PORTALS4_TYPE_GATHER; + request->u.gather.gather_buf=NULL; + request->u.gather.gather_mdh=PTL_INVALID_HANDLE; + request->u.gather.gather_cth=PTL_INVALID_HANDLE; + request->u.gather.gather_meh=PTL_INVALID_HANDLE; + request->u.gather.sync_mdh=PTL_INVALID_HANDLE; + request->u.gather.sync_cth=PTL_INVALID_HANDLE; + request->u.gather.sync_meh=PTL_INVALID_HANDLE; + + request->u.gather.my_rank = ompi_comm_rank(comm); + request->u.gather.size = ompi_comm_size(comm); + request->u.gather.root_rank = root; + request->u.gather.sbuf = sbuf; + request->u.gather.rbuf = rbuf; + if ((root == request->u.gather.my_rank) && (sbuf == MPI_IN_PLACE)) { + request->u.gather.pack_src_buf = rbuf; + request->u.gather.pack_src_count = rcount; + request->u.gather.pack_src_dtype = rdtype; + } else { + request->u.gather.pack_src_buf = sbuf; + request->u.gather.pack_src_count = scount; + request->u.gather.pack_src_dtype = sdtype; + request->u.gather.pack_src_offset = 0; + } + ompi_datatype_get_extent(request->u.gather.pack_src_dtype, + &request->u.gather.pack_src_lb, + &request->u.gather.pack_src_extent); + ompi_datatype_get_true_extent(request->u.gather.pack_src_dtype, + &request->u.gather.pack_src_true_lb, + &request->u.gather.pack_src_true_extent); + request->u.gather.unpack_dst_buf = rbuf; + request->u.gather.unpack_dst_count = rcount; + request->u.gather.unpack_dst_dtype = rdtype; + ompi_datatype_get_extent(request->u.gather.unpack_dst_dtype, + &request->u.gather.unpack_dst_lb, + &request->u.gather.unpack_dst_extent); + ompi_datatype_get_true_extent(request->u.gather.unpack_dst_dtype, + &request->u.gather.unpack_dst_true_lb, + &request->u.gather.unpack_dst_true_extent); + + if ((root == request->u.gather.my_rank) && (sbuf == MPI_IN_PLACE)) { + request->u.gather.pack_src_offset = request->u.gather.pack_src_extent * request->u.gather.pack_src_count * request->u.gather.my_rank; + } + + opal_output_verbose(30, ompi_coll_base_framework.framework_output, + "%s:%d:vrank(%d): request->u.gather.pack_src_offset(%lu)", + __FILE__, __LINE__, vrank, + request->u.gather.pack_src_offset); + + /**********************************/ + /* Setup Common Parameters */ + /**********************************/ + + request->u.gather.coll_count = opal_atomic_add_size_t(&portals4_module->coll_count, 1); + + COLL_PORTALS4_UPDATE_IN_ORDER_BMTREE( comm, portals4_module, request->u.gather.root_rank ); + bmtree = portals4_module->cached_in_order_bmtree; + + vrank = VRANK(request->u.gather.my_rank, request->u.gather.root_rank, request->u.gather.size); + + ret = setup_gather_buffers_binomial(comm, request, portals4_module); + if (MPI_SUCCESS != ret) { line = __LINE__; goto err_hdlr; } + + ret = setup_gather_handles(comm, request, portals4_module); + if (MPI_SUCCESS != ret) { line = __LINE__; goto err_hdlr; } + + ret = setup_sync_handles(comm, request, portals4_module); + if (MPI_SUCCESS != ret) { line = __LINE__; goto err_hdlr; } + + OPAL_OUTPUT_VERBOSE((10, ompi_coll_base_framework.framework_output, + "%s:%d: packed_size=%lu, fragment_size=%lu", + __FILE__, __LINE__, request->u.gather.packed_size, mca_coll_portals4_component.ni_limits.max_msg_size)); + + for (int i =0; i < bmtree->tree_nextsize; i++) { + int child_vrank = VRANK(bmtree->tree_next[i], request->u.gather.root_rank, request->u.gather.size); + int sub_tree_size = get_tree_numdescendants_of(comm, child_vrank) + 1; + ptl_size_t local_number_of_fragment = ((sub_tree_size * request->u.gather.packed_size) + mca_coll_portals4_component.ni_limits.max_msg_size -1) / mca_coll_portals4_component.ni_limits.max_msg_size; + + OPAL_OUTPUT_VERBOSE((10, ompi_coll_base_framework.framework_output, + "%s:%d: %d is child of %d(%d) with %d descendants (nb_frag += %lu)", + __FILE__, __LINE__, bmtree->tree_next[i], vrank, request->u.gather.root_rank , sub_tree_size, local_number_of_fragment)); + number_of_fragment_gathered += local_number_of_fragment; + } + + number_of_fragment_send = (request->u.gather.gather_bytes + mca_coll_portals4_component.ni_limits.max_msg_size -1) / mca_coll_portals4_component.ni_limits.max_msg_size; + + /***********************************************/ + /* Chain the RTR and Recv-ACK to the Gather CT */ + /***********************************************/ + if (vrank != 0) { + sync_incr_event.success=1; + sync_incr_event.failure=0; + ret = PtlTriggeredCTInc(request->u.gather.gather_cth, + sync_incr_event, + request->u.gather.sync_cth, + 1); + if (PTL_OK != ret) { ret = OMPI_ERROR; line = __LINE__; goto err_hdlr; } + ret = PtlTriggeredCTInc(request->u.gather.gather_cth, + sync_incr_event, + request->u.gather.sync_cth, + 2); + if (PTL_OK != ret) { ret = OMPI_ERROR; line = __LINE__; goto err_hdlr; } + } + + /**********************************/ + /* do the gather */ + /**********************************/ + if (vrank == 0) { + /* root, so do nothing */ + + expected_ops=number_of_fragment_gathered ; /* gather put from each child */ + expected_acks=0; + + } else { + int32_t parent = bmtree->tree_prev; + int32_t vparent = VRANK(parent, request->u.gather.root_rank, request->u.gather.size); + + ptl_size_t remote_offset=(vrank-vparent) * request->u.gather.packed_size; + + opal_output_verbose(30, ompi_coll_base_framework.framework_output, + "%s:%d:vrank(%d): remote_offset(%lu)=(vrank(%d)-vparent(%d)) * packed_size(%ld)", + __FILE__, __LINE__, vrank, + remote_offset, vrank, vparent, request->u.gather.packed_size); + + expected_ops=number_of_fragment_gathered + 1; /* gather puts from each child + a chained RTR */ + expected_acks=1; /* Recv-ACK from parent */ + + ptl_size_t size_sent = 0; + ptl_size_t size_left = request->u.gather.gather_bytes; + + for (ptl_size_t i = 0 ; i < number_of_fragment_send; i++) { + ptl_size_t frag_size = (size_left > mca_coll_portals4_component.ni_limits.max_msg_size) ? + mca_coll_portals4_component.ni_limits.max_msg_size: + size_left; + ret = PtlTriggeredPut(request->u.gather.gather_mdh, + request->u.gather.gather_offset + size_sent, + frag_size, + PTL_NO_ACK_REQ, + ompi_coll_portals4_get_peer(comm, parent), + mca_coll_portals4_component.pt_idx, + request->u.gather.gather_match_bits, + remote_offset + size_sent, + NULL, + 0, + request->u.gather.gather_cth, + expected_ops); + if (PTL_OK != ret) { ret = OMPI_ERROR; line = __LINE__; goto err_hdlr; } + size_left -= frag_size; + size_sent += frag_size; + } + } + + /************************************/ + /* put Recv-ACK to each child */ + /************************************/ + for (i=0;itree_nextsize;i++) { + int32_t child=bmtree->tree_next[i]; + ret = PtlTriggeredPut(request->u.gather.sync_mdh, + 0, + 0, + PTL_NO_ACK_REQ, + ompi_coll_portals4_get_peer(comm, child), + mca_coll_portals4_component.pt_idx, + request->u.gather.sync_match_bits, + 0, + NULL, + 0, + request->u.gather.gather_cth, + expected_ops); + if (PTL_OK != ret) { ret = OMPI_ERROR; line = __LINE__; goto err_hdlr; } + } + + expected_ops+=expected_acks; + + if (!request->u.gather.is_sync) { + /******************************************/ + /* put to finish pt when all ops complete */ + /******************************************/ + ret = PtlTriggeredPut(mca_coll_portals4_component.zero_md_h, + 0, + 0, + PTL_NO_ACK_REQ, + ompi_coll_portals4_get_peer(comm, request->u.gather.my_rank), + mca_coll_portals4_component.finish_pt_idx, + 0, + 0, + NULL, + (uintptr_t) request, + request->u.gather.gather_cth, + expected_ops); + if (PTL_OK != ret) { ret = OMPI_ERROR; line = __LINE__; goto err_hdlr; } + } + +#ifdef RTR_USES_TRIGGERED_PUT + /**********************************/ + /* put RTR to each child */ + /**********************************/ + for (i=0;itree_nextsize;i++) { + int32_t child=bmtree->tree_next[i]; + ret = PtlTriggeredPut(request->u.gather.sync_mdh, + 0, + 0, + PTL_NO_ACK_REQ, + ompi_coll_portals4_get_peer(comm, child), + mca_coll_portals4_component.pt_idx, + request->u.gather.sync_match_bits, + 0, + NULL, + 0, + request->u.gather.sync_cth, + 0); + if (PTL_OK != ret) { ret = OMPI_ERROR; line = __LINE__; goto err_hdlr; } + } +#else + /**********************************/ + /* put RTR to each child */ + /**********************************/ + for (i=0;itree_nextsize;i++) { + int32_t child=bmtree->tree_next[i]; + ret = PtlPut(request->u.gather.sync_mdh, + 0, + 0, + PTL_NO_ACK_REQ, + ompi_coll_portals4_get_peer(comm, child), + mca_coll_portals4_component.pt_idx, + request->u.gather.sync_match_bits, + 0, + NULL, + 0); + if (PTL_OK != ret) { ret = OMPI_ERROR; line = __LINE__; goto err_hdlr; } + } +#endif + + if (request->u.gather.is_sync) { + opal_output_verbose(10, ompi_coll_base_framework.framework_output, + "%s:%d:vrank(%d): calling CTWait(expected_ops=%d)\n", + __FILE__, __LINE__, vrank, expected_ops); + + /********************************/ + /* Wait for all ops to complete */ + /********************************/ + ret = PtlCTWait(request->u.gather.gather_cth, expected_ops, &ct); + if (PTL_OK != ret) { ret = OMPI_ERROR; line = __LINE__; goto err_hdlr; } + + opal_output_verbose(10, ompi_coll_base_framework.framework_output, + "%s:%d:vrank(%d): completed CTWait(expected_ops=%d)\n", + __FILE__, __LINE__, vrank, expected_ops); + } + + ompi_coll_portals4_destroy_tree(&(portals4_module->cached_in_order_bmtree)); + + OPAL_OUTPUT_VERBOSE((10, ompi_coll_base_framework.framework_output, + "coll:portals4:gather_intra_binomial_top exit rank %d", request->u.gather.my_rank)); + + return OMPI_SUCCESS; + +err_hdlr: + if (NULL != request->u.gather.gather_buf) + free(request->u.gather.gather_buf); + + ompi_coll_portals4_destroy_tree(&(portals4_module->cached_in_order_bmtree)); + + opal_output(ompi_coll_base_framework.framework_output, + "%s:%4d:%4d\tError occurred ret=%d, rank %2d", + __FILE__, __LINE__, line, ret, request->u.gather.my_rank); + + return ret; +} + +static int +ompi_coll_portals4_gather_intra_linear_top(const void *sbuf, int scount, struct ompi_datatype_t *sdtype, + void *rbuf, int rcount, struct ompi_datatype_t *rdtype, + int root, + struct ompi_communicator_t *comm, + ompi_coll_portals4_request_t *request, + mca_coll_base_module_t *module) +{ + mca_coll_portals4_module_t *portals4_module = (mca_coll_portals4_module_t*) module; + int ret, line; + ptl_ct_event_t ct; + + ptl_ct_event_t sync_incr_event; + + int8_t i_am_root; + + int32_t i=0; + + int32_t expected_ops =0; + int32_t expected_acks=0; + + ptl_size_t number_of_fragment = 1; + + OPAL_OUTPUT_VERBOSE((10, ompi_coll_base_framework.framework_output, + "coll:portals4:gather_intra_linear_top enter rank %d", request->u.gather.my_rank)); + + request->type = OMPI_COLL_PORTALS4_TYPE_GATHER; + request->u.gather.gather_buf=NULL; + request->u.gather.gather_mdh=PTL_INVALID_HANDLE; + request->u.gather.gather_cth=PTL_INVALID_HANDLE; + request->u.gather.gather_meh=PTL_INVALID_HANDLE; + request->u.gather.sync_mdh=PTL_INVALID_HANDLE; + request->u.gather.sync_cth=PTL_INVALID_HANDLE; + request->u.gather.sync_meh=PTL_INVALID_HANDLE; + + request->u.gather.my_rank = ompi_comm_rank(comm); + request->u.gather.size = ompi_comm_size(comm); + request->u.gather.root_rank = root; + request->u.gather.sbuf = sbuf; + request->u.gather.rbuf = rbuf; + if ((root == request->u.gather.my_rank) && (sbuf == MPI_IN_PLACE)) { + request->u.gather.pack_src_buf = rbuf; + request->u.gather.pack_src_count = rcount; + request->u.gather.pack_src_dtype = rdtype; + } else { + request->u.gather.pack_src_buf = sbuf; + request->u.gather.pack_src_count = scount; + request->u.gather.pack_src_dtype = sdtype; + request->u.gather.pack_src_offset = 0; + } + ompi_datatype_get_extent(request->u.gather.pack_src_dtype, + &request->u.gather.pack_src_lb, + &request->u.gather.pack_src_extent); + ompi_datatype_get_true_extent(request->u.gather.pack_src_dtype, + &request->u.gather.pack_src_true_lb, + &request->u.gather.pack_src_true_extent); + request->u.gather.unpack_dst_buf = rbuf; + request->u.gather.unpack_dst_count = rcount; + request->u.gather.unpack_dst_dtype = rdtype; + ompi_datatype_get_extent(request->u.gather.unpack_dst_dtype, + &request->u.gather.unpack_dst_lb, + &request->u.gather.unpack_dst_extent); + ompi_datatype_get_true_extent(request->u.gather.unpack_dst_dtype, + &request->u.gather.unpack_dst_true_lb, + &request->u.gather.unpack_dst_true_extent); + + if ((root == request->u.gather.my_rank) && (sbuf == MPI_IN_PLACE)) { + request->u.gather.pack_src_offset = request->u.gather.pack_src_extent * request->u.gather.pack_src_count * request->u.gather.my_rank; + } + + opal_output_verbose(30, ompi_coll_base_framework.framework_output, + "%s:%d:rank(%d): request->u.gather.pack_src_offset(%lu)", + __FILE__, __LINE__, request->u.gather.my_rank, + request->u.gather.pack_src_offset); + + /**********************************/ + /* Setup Common Parameters */ + /**********************************/ + + i_am_root = (request->u.gather.my_rank == request->u.gather.root_rank); + + request->u.gather.coll_count = opal_atomic_add_size_t(&portals4_module->coll_count, 1); + + ret = setup_gather_buffers_linear(comm, request, portals4_module); + if (MPI_SUCCESS != ret) { line = __LINE__; goto err_hdlr; } + + ret = setup_gather_handles(comm, request, portals4_module); + if (MPI_SUCCESS != ret) { line = __LINE__; goto err_hdlr; } + + ret = setup_sync_handles(comm, request, portals4_module); + if (MPI_SUCCESS != ret) { line = __LINE__; goto err_hdlr; } + + number_of_fragment = (request->u.gather.packed_size > mca_coll_portals4_component.ni_limits.max_msg_size) ? + (request->u.gather.packed_size + mca_coll_portals4_component.ni_limits.max_msg_size - 1) / mca_coll_portals4_component.ni_limits.max_msg_size : + 1; + opal_output_verbose(90, ompi_coll_base_framework.framework_output, + "%s:%d:rank %d:number_of_fragment = %lu", + __FILE__, __LINE__, request->u.gather.my_rank, number_of_fragment); + + /***********************************************/ + /* Chain the RTR and Recv-ACK to the Gather CT */ + /***********************************************/ + if (!i_am_root) { + sync_incr_event.success=1; + sync_incr_event.failure=0; + ret = PtlTriggeredCTInc(request->u.gather.gather_cth, + sync_incr_event, + request->u.gather.sync_cth, + 1); + if (PTL_OK != ret) { ret = OMPI_ERROR; line = __LINE__; goto err_hdlr; } + ret = PtlTriggeredCTInc(request->u.gather.gather_cth, + sync_incr_event, + request->u.gather.sync_cth, + 2); + if (PTL_OK != ret) { ret = OMPI_ERROR; line = __LINE__; goto err_hdlr; } + } + + /**********************************/ + /* do the gather */ + /**********************************/ + if (i_am_root) { + /* root, so do nothing */ + + expected_ops=(request->u.gather.size-1) * number_of_fragment; /* gather put from all other ranks */ + expected_acks=0; + + } else { + ptl_size_t remote_offset=request->u.gather.my_rank * request->u.gather.packed_size; + ptl_size_t split_offset = 0; + ptl_size_t size_left = request->u.gather.gather_bytes; + + opal_output_verbose(30, ompi_coll_base_framework.framework_output, + "%s:%d:rank(%d): remote_offset(%lu)=rank(%d) * packed_size(%ld)", + __FILE__, __LINE__, request->u.gather.my_rank, + remote_offset, request->u.gather.my_rank, request->u.gather.packed_size); + + expected_ops=1; /* chained RTR */ + expected_acks=1; /* Recv-ACK from root */ + + for (ptl_size_t j=0; j mca_coll_portals4_component.ni_limits.max_msg_size) ? + mca_coll_portals4_component.ni_limits.max_msg_size : + size_left; + + opal_output_verbose(10, ompi_coll_base_framework.framework_output, + "%s:%d:rank(%d): frag(%lu),offset_frag (%lu) frag_size(%lu)", + __FILE__, __LINE__, request->u.gather.my_rank, + j, split_offset, frag_size); + + ret = PtlTriggeredPut(request->u.gather.gather_mdh, + request->u.gather.gather_offset + split_offset, + frag_size, + PTL_NO_ACK_REQ, + ompi_coll_portals4_get_peer(comm, request->u.gather.root_rank), + mca_coll_portals4_component.pt_idx, + request->u.gather.gather_match_bits, + remote_offset + split_offset, + NULL, + 0, + request->u.gather.gather_cth, + expected_ops); + if (PTL_OK != ret) { ret = OMPI_ERROR; line = __LINE__; goto err_hdlr; } + + size_left -= frag_size; + split_offset += frag_size; + } + } + + /*****************************************/ + /* root puts Recv-ACK to all other ranks */ + /*****************************************/ + if (i_am_root) { + for (i=0;iu.gather.size;i++) { + if (i == request->u.gather.root_rank) { continue; } + ret = PtlTriggeredPut(request->u.gather.sync_mdh, + 0, + 0, + PTL_NO_ACK_REQ, + ompi_coll_portals4_get_peer(comm, i), + mca_coll_portals4_component.pt_idx, + request->u.gather.sync_match_bits, + 0, + NULL, + 0, + request->u.gather.gather_cth, + expected_ops); + if (PTL_OK != ret) { ret = OMPI_ERROR; line = __LINE__; goto err_hdlr; } + } + } + + expected_ops+=expected_acks; + + if (!request->u.gather.is_sync) { + /******************************************/ + /* put to finish pt when all ops complete */ + /******************************************/ + ret = PtlTriggeredPut(mca_coll_portals4_component.zero_md_h, + 0, + 0, + PTL_NO_ACK_REQ, + ompi_coll_portals4_get_peer(comm, request->u.gather.my_rank), + mca_coll_portals4_component.finish_pt_idx, + 0, + 0, + NULL, + (uintptr_t) request, + request->u.gather.gather_cth, + expected_ops); + if (PTL_OK != ret) { ret = OMPI_ERROR; line = __LINE__; goto err_hdlr; } + } + +#ifdef RTR_USES_TRIGGERED_PUT + /************************************/ + /* root puts RTR to all other ranks */ + /************************************/ + if (i_am_root) { + for (i=0;iu.gather.size;i++) { + if (i == request->u.gather.root_rank) { continue; } + ret = PtlTriggeredPut(request->u.gather.sync_mdh, + 0, + 0, + PTL_NO_ACK_REQ, + ompi_coll_portals4_get_peer(comm, i), + mca_coll_portals4_component.pt_idx, + request->u.gather.sync_match_bits, + 0, + NULL, + 0, + request->u.gather.sync_cth, + 0); + if (PTL_OK != ret) { ret = OMPI_ERROR; line = __LINE__; goto err_hdlr; } + } + } +#else + /************************************/ + /* root puts RTR to all other ranks */ + /************************************/ + if (i_am_root) { + for (i=0;iu.gather.size;i++) { + if (i == request->u.gather.root_rank) { continue; } + ret = PtlPut(request->u.gather.sync_mdh, + 0, + 0, + PTL_NO_ACK_REQ, + ompi_coll_portals4_get_peer(comm, i), + mca_coll_portals4_component.pt_idx, + request->u.gather.sync_match_bits, + 0, + NULL, + 0); + if (PTL_OK != ret) { ret = OMPI_ERROR; line = __LINE__; goto err_hdlr; } + } + } +#endif + + if (request->u.gather.is_sync) { + opal_output_verbose(10, ompi_coll_base_framework.framework_output, + "calling CTWait(expected_ops=%d)\n", expected_ops); + + /********************************/ + /* Wait for all ops to complete */ + /********************************/ + ret = PtlCTWait(request->u.gather.gather_cth, expected_ops, &ct); + if (PTL_OK != ret) { ret = OMPI_ERROR; line = __LINE__; goto err_hdlr; } + + opal_output_verbose(10, ompi_coll_base_framework.framework_output, + "completed CTWait(expected_ops=%d)\n", expected_ops); + } + + OPAL_OUTPUT_VERBOSE((10, ompi_coll_base_framework.framework_output, + "coll:portals4:gather_intra_linear_top exit rank %d", request->u.gather.my_rank)); + + return OMPI_SUCCESS; + +err_hdlr: + if (NULL != request->u.gather.gather_buf) + free(request->u.gather.gather_buf); + + opal_output(ompi_coll_base_framework.framework_output, + "%s:%4d:%4d\tError occurred ret=%d, rank %2d", + __FILE__, __LINE__, line, ret, request->u.gather.my_rank); + + return ret; +} + +static int +ompi_coll_portals4_gather_intra_binomial_bottom(struct ompi_communicator_t *comm, + ompi_coll_portals4_request_t *request) +{ + int ret, line; + int i; + + OPAL_OUTPUT_VERBOSE((10, ompi_coll_base_framework.framework_output, + "coll:portals4:gather_intra_binomial_bottom enter rank %d", request->u.gather.my_rank)); + + ret = cleanup_gather_handles(request); + if (MPI_SUCCESS != ret) { line = __LINE__; goto err_hdlr; } + + ret = cleanup_sync_handles(request); + if (MPI_SUCCESS != ret) { line = __LINE__; goto err_hdlr; } + + if (request->u.gather.my_rank == request->u.gather.root_rank) { + uint32_t iov_count = 1; + struct iovec iov; + size_t max_data; + + for (i=0;iu.gather.size;i++) { + uint64_t offset = request->u.gather.unpack_dst_extent * request->u.gather.unpack_dst_count * ((request->u.gather.my_rank + i) % request->u.gather.size); + + opal_output_verbose(30, ompi_coll_base_framework.framework_output, + "%s:%d:rank(%d): offset(%lu)", + __FILE__, __LINE__, request->u.gather.my_rank, + offset); + + ompi_coll_portals4_create_recv_converter (&request->u.gather.recv_converter, + request->u.gather.unpack_dst_buf + offset, + ompi_comm_peer_lookup(comm, request->u.gather.my_rank), + request->u.gather.unpack_dst_count, + request->u.gather.unpack_dst_dtype); + + iov.iov_len = request->u.gather.packed_size; + iov.iov_base = (IOVBASE_TYPE *) ((char *)request->u.gather.gather_buf + (request->u.gather.packed_size*i)); + opal_convertor_unpack(&request->u.gather.recv_converter, &iov, &iov_count, &max_data); + + OBJ_DESTRUCT(&request->u.gather.recv_converter); + } + } + + if (request->u.gather.free_after) + free(request->u.gather.gather_buf); + + request->super.req_status.MPI_ERROR = OMPI_SUCCESS; + + ompi_request_complete(&request->super, true); + + OPAL_OUTPUT_VERBOSE((10, ompi_coll_base_framework.framework_output, + "coll:portals4:gather_intra_binomial_bottom exit rank %d", request->u.gather.my_rank)); + + return OMPI_SUCCESS; + +err_hdlr: + request->super.req_status.MPI_ERROR = ret; + + if (request->u.gather.free_after) + free(request->u.gather.gather_buf); + + opal_output(ompi_coll_base_framework.framework_output, + "%s:%4d:%4d\tError occurred ret=%d, rank %2d", + __FILE__, __LINE__, line, ret, request->u.gather.my_rank); + + return ret; +} + +static int +ompi_coll_portals4_gather_intra_linear_bottom(struct ompi_communicator_t *comm, + ompi_coll_portals4_request_t *request) +{ + int ret, line; + int i; + + OPAL_OUTPUT_VERBOSE((10, ompi_coll_base_framework.framework_output, + "coll:portals4:gather_intra_linear_bottom enter rank %d", request->u.gather.my_rank)); + + ret = cleanup_gather_handles(request); + if (MPI_SUCCESS != ret) { line = __LINE__; goto err_hdlr; } + + ret = cleanup_sync_handles(request); + if (MPI_SUCCESS != ret) { line = __LINE__; goto err_hdlr; } + + if (request->u.gather.my_rank == request->u.gather.root_rank) { + uint32_t iov_count = 1; + struct iovec iov; + size_t max_data; + + for (i=0;iu.gather.size;i++) { + ompi_coll_portals4_create_recv_converter (&request->u.gather.recv_converter, + request->u.gather.unpack_dst_buf + (request->u.gather.unpack_dst_extent*request->u.gather.unpack_dst_count*i), + ompi_comm_peer_lookup(comm, request->u.gather.my_rank), + request->u.gather.unpack_dst_count, + request->u.gather.unpack_dst_dtype); + + iov.iov_len = request->u.gather.packed_size; + iov.iov_base = (IOVBASE_TYPE *) ((char *)request->u.gather.gather_buf + (request->u.gather.packed_size*i)); + opal_convertor_unpack(&request->u.gather.recv_converter, &iov, &iov_count, &max_data); + + OBJ_DESTRUCT(&request->u.gather.recv_converter); + } + } + + if (request->u.gather.free_after) + free(request->u.gather.gather_buf); + + request->super.req_status.MPI_ERROR = OMPI_SUCCESS; + + ompi_request_complete(&request->super, true); + + OPAL_OUTPUT_VERBOSE((10, ompi_coll_base_framework.framework_output, + "coll:portals4:gather_intra_linear_bottom exit rank %d", request->u.gather.my_rank)); + + return OMPI_SUCCESS; + +err_hdlr: + request->super.req_status.MPI_ERROR = ret; + + if (request->u.gather.free_after) + free(request->u.gather.gather_buf); + + opal_output(ompi_coll_base_framework.framework_output, + "%s:%4d:%4d\tError occurred ret=%d, rank %2d", + __FILE__, __LINE__, line, ret, request->u.gather.my_rank); + + return ret; +} + +int +ompi_coll_portals4_gather_intra(const void *sbuf, int scount, struct ompi_datatype_t *sdtype, + void *rbuf, int rcount, struct ompi_datatype_t *rdtype, + int root, + struct ompi_communicator_t *comm, + mca_coll_base_module_t *module) +{ + int ret, line; + + ompi_coll_portals4_request_t *request; + + OPAL_OUTPUT_VERBOSE((10, ompi_coll_base_framework.framework_output, + "coll:portals4:gather_intra enter rank %d", ompi_comm_rank(comm))); + + /* + * allocate a portals4 request + */ + OMPI_COLL_PORTALS4_REQUEST_ALLOC(comm, request); + if (NULL == request) { + ret = OMPI_ERR_TEMP_OUT_OF_RESOURCE; line = __LINE__; goto err_hdlr; + } + request->u.gather.is_sync = 1; + + /* + * initiate the gather + * + * this request is marked synchronous (is_sync==1), so PtlCTWait() + * will be called to wait for completion. + */ + if (1 == mca_coll_portals4_component.use_binomial_gather_algorithm) { + ret = ompi_coll_portals4_gather_intra_binomial_top(sbuf, scount, sdtype, + rbuf, rcount, rdtype, + root, + comm, + request, + module); + if (MPI_SUCCESS != ret) { line = __LINE__; goto err_hdlr; } + + ret = ompi_coll_portals4_gather_intra_binomial_bottom(comm, request); + if (MPI_SUCCESS != ret) { line = __LINE__; goto err_hdlr; } + } else { + ret = ompi_coll_portals4_gather_intra_linear_top(sbuf, scount, sdtype, + rbuf, rcount, rdtype, + root, + comm, + request, + module); + if (MPI_SUCCESS != ret) { line = __LINE__; goto err_hdlr; } + + ret = ompi_coll_portals4_gather_intra_linear_bottom(comm, request); + if (MPI_SUCCESS != ret) { line = __LINE__; goto err_hdlr; } + } + + /* + * return the portals4 request + */ + OMPI_COLL_PORTALS4_REQUEST_RETURN(request); + + OPAL_OUTPUT_VERBOSE((10, ompi_coll_base_framework.framework_output, + "coll:portals4:gather_intra exit rank %d", request->u.gather.my_rank)); + + return OMPI_SUCCESS; + +err_hdlr: + opal_output(ompi_coll_base_framework.framework_output, + "%s:%4d:%4d\tError occurred ret=%d, rank %2d", + __FILE__, __LINE__, line, ret, request->u.gather.my_rank); + + return ret; +} + + +int +ompi_coll_portals4_igather_intra(const void *sbuf, int scount, struct ompi_datatype_t *sdtype, + void *rbuf, int rcount, struct ompi_datatype_t *rdtype, + int root, + struct ompi_communicator_t *comm, + ompi_request_t **ompi_request, + mca_coll_base_module_t *module) +{ + int ret, line; + + ompi_coll_portals4_request_t *request; + + OPAL_OUTPUT_VERBOSE((10, ompi_coll_base_framework.framework_output, + "coll:portals4:igather_intra enter rank %d", ompi_comm_rank(comm))); + + /* + * allocate a portals4 request + */ + OMPI_COLL_PORTALS4_REQUEST_ALLOC(comm, request); + if (NULL == request) { + ret = OMPI_ERR_TEMP_OUT_OF_RESOURCE; line = __LINE__; goto err_hdlr; + } + *ompi_request = &request->super; + request->u.gather.is_sync = 0; + + /* + * initiate the gather + * + * this request is marked asynchronous (is_sync==0), so + * portals4_progress() will handle completion. + */ + if (1 == mca_coll_portals4_component.use_binomial_gather_algorithm) { + ret = ompi_coll_portals4_gather_intra_binomial_top(sbuf, scount, sdtype, + rbuf, rcount, rdtype, + root, + comm, + request, + module); + if (MPI_SUCCESS != ret) { line = __LINE__; goto err_hdlr; } + } else { + ret = ompi_coll_portals4_gather_intra_linear_top(sbuf, scount, sdtype, + rbuf, rcount, rdtype, + root, + comm, + request, + module); + if (MPI_SUCCESS != ret) { line = __LINE__; goto err_hdlr; } + } + + OPAL_OUTPUT_VERBOSE((10, ompi_coll_base_framework.framework_output, + "coll:portals4:igather_intra exit rank %d", request->u.gather.my_rank)); + + return OMPI_SUCCESS; + +err_hdlr: + opal_output(ompi_coll_base_framework.framework_output, + "%s:%4d:%4d\tError occurred ret=%d, rank %2d", + __FILE__, __LINE__, line, ret, request->u.gather.my_rank); + + return ret; +} + + +int +ompi_coll_portals4_igather_intra_fini(ompi_coll_portals4_request_t *request) +{ + int ret, line; + + OPAL_OUTPUT_VERBOSE((10, ompi_coll_base_framework.framework_output, + "coll:portals4:igather_intra_fini enter rank %d", request->u.gather.my_rank)); + + /* + * cleanup the gather + */ + if (1 == mca_coll_portals4_component.use_binomial_gather_algorithm) { + ret = ompi_coll_portals4_gather_intra_binomial_bottom(request->super.req_mpi_object.comm, request); + if (MPI_SUCCESS != ret) { line = __LINE__; goto err_hdlr; } + } else { + ret = ompi_coll_portals4_gather_intra_linear_bottom(request->super.req_mpi_object.comm, request); + if (MPI_SUCCESS != ret) { line = __LINE__; goto err_hdlr; } + } + + OPAL_OUTPUT_VERBOSE((10, ompi_coll_base_framework.framework_output, + "coll:portals4:igather_intra_fini exit rank %d", request->u.gather.my_rank)); + + return OMPI_SUCCESS; + +err_hdlr: + opal_output(ompi_coll_base_framework.framework_output, + "%s:%4d:%4d\tError occurred ret=%d, rank %2d", + __FILE__, __LINE__, line, ret, request->u.gather.my_rank); + + return ret; +} diff --git a/ompi/mca/coll/portals4/coll_portals4_reduce.c b/ompi/mca/coll/portals4/coll_portals4_reduce.c index 3ab0bf727e7..5f41e75655e 100644 --- a/ompi/mca/coll/portals4/coll_portals4_reduce.c +++ b/ompi/mca/coll/portals4/coll_portals4_reduce.c @@ -1,6 +1,8 @@ /* * Copyright (c) 2015 Sandia National Laboratories. All rights reserved. * Copyright (c) 2015 Bull SAS. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -28,7 +30,7 @@ static int -reduce_kary_tree_top(void *sendbuf, void *recvbuf, int count, +reduce_kary_tree_top(const void *sendbuf, void *recvbuf, int count, MPI_Datatype dtype, MPI_Op op, int root, struct ompi_communicator_t *comm, @@ -359,12 +361,13 @@ reduce_kary_tree_bottom(ompi_coll_portals4_request_t *request) int -ompi_coll_portals4_reduce_intra(void *sendbuf, void *recvbuf, int count, +ompi_coll_portals4_reduce_intra(const void *sendbuf, void *recvbuf, int count, MPI_Datatype dtype, MPI_Op op, int root, struct ompi_communicator_t *comm, mca_coll_base_module_t *module) { + int ret; mca_coll_portals4_module_t *portals4_module = (mca_coll_portals4_module_t*) module; ompi_coll_portals4_request_t *request; @@ -379,9 +382,13 @@ ompi_coll_portals4_reduce_intra(void *sendbuf, void *recvbuf, int count, request->is_sync = true; request->fallback_request = NULL; - reduce_kary_tree_top(sendbuf, recvbuf, count, + ret = reduce_kary_tree_top(sendbuf, recvbuf, count, dtype, op, root, comm, request, portals4_module); - reduce_kary_tree_bottom(request); + if (OMPI_SUCCESS != ret) + return ret; + ret = reduce_kary_tree_bottom(request); + if (OMPI_SUCCESS != ret) + return ret; OMPI_COLL_PORTALS4_REQUEST_RETURN(request); return (OMPI_SUCCESS); @@ -389,13 +396,14 @@ ompi_coll_portals4_reduce_intra(void *sendbuf, void *recvbuf, int count, int -ompi_coll_portals4_ireduce_intra(void* sendbuf, void* recvbuf, int count, +ompi_coll_portals4_ireduce_intra(const void* sendbuf, void* recvbuf, int count, MPI_Datatype dtype, MPI_Op op, int root, struct ompi_communicator_t *comm, ompi_request_t ** ompi_request, struct mca_coll_base_module_2_1_0_t *module) { + int ret; mca_coll_portals4_module_t *portals4_module = (mca_coll_portals4_module_t*) module; ompi_coll_portals4_request_t *request; @@ -411,26 +419,29 @@ ompi_coll_portals4_ireduce_intra(void* sendbuf, void* recvbuf, int count, request->fallback_request = ompi_request; request->is_sync = false; - - reduce_kary_tree_top(sendbuf, recvbuf, count, + ret = reduce_kary_tree_top(sendbuf, recvbuf, count, dtype, op, root, comm, request, portals4_module); + if (OMPI_SUCCESS != ret) + return ret; if (!request->u.reduce.is_optim) { OMPI_COLL_PORTALS4_REQUEST_RETURN(request); } - puts("ireduce"); + opal_output_verbose(10, ompi_coll_base_framework.framework_output, "ireduce"); return (OMPI_SUCCESS); } int ompi_coll_portals4_ireduce_intra_fini(ompi_coll_portals4_request_t *request) { - reduce_kary_tree_bottom(request); + int ret; + + ret = reduce_kary_tree_bottom(request); + if (OMPI_SUCCESS != ret) + return ret; - OPAL_THREAD_LOCK(&ompi_request_lock); ompi_request_complete(&request->super, true); - OPAL_THREAD_UNLOCK(&ompi_request_lock); return (OMPI_SUCCESS); } diff --git a/ompi/mca/coll/portals4/coll_portals4_request.h b/ompi/mca/coll/portals4/coll_portals4_request.h index 175835381ec..f78c2dee631 100644 --- a/ompi/mca/coll/portals4/coll_portals4_request.h +++ b/ompi/mca/coll/portals4/coll_portals4_request.h @@ -83,6 +83,86 @@ struct ompi_coll_portals4_request_t { ptl_handle_ct_t ack_ct_h; } allreduce; + struct { + opal_convertor_t send_converter; + opal_convertor_t recv_converter; + size_t packed_size; + int8_t is_sync; + int8_t free_after; + size_t coll_count; + char *gather_buf; + uint64_t gather_bytes; + ptl_match_bits_t gather_match_bits; + ptl_handle_md_t gather_mdh; + ptl_size_t gather_offset; + ptl_handle_ct_t gather_cth; + ptl_handle_md_t gather_meh; + ptl_match_bits_t sync_match_bits; + ptl_handle_md_t sync_mdh; + ptl_handle_ct_t sync_cth; + ptl_handle_me_t sync_meh; + int my_rank; + int root_rank; + int size; + const void *sbuf; + void *rbuf; + const char *pack_src_buf; + int pack_src_count; + struct ompi_datatype_t *pack_src_dtype; + MPI_Aint pack_src_extent; + MPI_Aint pack_src_true_extent; + MPI_Aint pack_src_lb; + MPI_Aint pack_src_true_lb; + MPI_Aint pack_src_offset; + char *unpack_dst_buf; + int unpack_dst_count; + struct ompi_datatype_t *unpack_dst_dtype; + MPI_Aint unpack_dst_extent; + MPI_Aint unpack_dst_true_extent; + MPI_Aint unpack_dst_lb; + MPI_Aint unpack_dst_true_lb; + } gather; + + struct { + opal_convertor_t send_converter; + opal_convertor_t recv_converter; + size_t packed_size; + int8_t is_sync; + int8_t free_after; + size_t coll_count; + char *scatter_buf; + uint64_t scatter_bytes; + ptl_match_bits_t scatter_match_bits; + ptl_handle_md_t scatter_mdh; + ptl_handle_ct_t scatter_cth; + ptl_handle_md_t scatter_meh; + ptl_match_bits_t sync_match_bits; + ptl_handle_md_t sync_mdh; + ptl_handle_ct_t sync_cth; + ptl_handle_me_t sync_meh; + int my_rank; + int root_rank; + int size; + const void *sbuf; + void *rbuf; + uint64_t pack_bytes; + const char *pack_src_buf; + int pack_src_count; + struct ompi_datatype_t *pack_src_dtype; + MPI_Aint pack_src_extent; + MPI_Aint pack_src_true_extent; + MPI_Aint pack_src_lb; + MPI_Aint pack_src_true_lb; + uint64_t unpack_bytes; + char *unpack_dst_buf; + int unpack_dst_count; + struct ompi_datatype_t *unpack_dst_dtype; + MPI_Aint unpack_dst_extent; + MPI_Aint unpack_dst_true_extent; + MPI_Aint unpack_dst_lb; + MPI_Aint unpack_dst_true_lb; + MPI_Aint unpack_dst_offset; + } scatter; } u; }; typedef struct ompi_coll_portals4_request_t ompi_coll_portals4_request_t; diff --git a/ompi/mca/coll/portals4/coll_portals4_scatter.c b/ompi/mca/coll/portals4/coll_portals4_scatter.c new file mode 100644 index 00000000000..d1cfbbaa0d2 --- /dev/null +++ b/ompi/mca/coll/portals4/coll_portals4_scatter.c @@ -0,0 +1,797 @@ +/* + * Copyright (c) 2015 Sandia National Laboratories. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + + +#include "ompi_config.h" + +#include "mpi.h" +#include "ompi/constants.h" +#include "ompi/datatype/ompi_datatype.h" +#include "opal/util/bit_ops.h" +#include "ompi/mca/pml/pml.h" +#include "ompi/mca/coll/coll.h" +#include "ompi/mca/coll/base/base.h" + +#include "coll_portals4.h" +#include "coll_portals4_request.h" + + +#undef RTR_USES_TRIGGERED_PUT + + +#define VRANK(ra, ro, si) ((ra - ro + si) % si) + + +static int +setup_scatter_buffers_linear(struct ompi_communicator_t *comm, + ompi_coll_portals4_request_t *request, + mca_coll_portals4_module_t *portals4_module) +{ + int ret, line; + + int8_t i_am_root = (request->u.scatter.my_rank == request->u.scatter.root_rank); + + ompi_coll_portals4_create_send_converter (&request->u.scatter.send_converter, + request->u.scatter.pack_src_buf, + ompi_comm_peer_lookup(comm, request->u.scatter.my_rank), + request->u.scatter.pack_src_count, + request->u.scatter.pack_src_dtype); + opal_convertor_get_packed_size(&request->u.scatter.send_converter, &request->u.scatter.packed_size); + OBJ_DESTRUCT(&request->u.scatter.send_converter); + + /**********************************/ + /* Setup Scatter Buffers */ + /**********************************/ + if (i_am_root) { + + /* + * calculate the total size of the packed data + */ + request->u.scatter.scatter_bytes=request->u.scatter.packed_size * (ptrdiff_t)request->u.scatter.size; + + /* all transfers done using request->u.scatter.sdtype. + * allocate temp buffer for recv, copy and/or rotate data at the end */ + request->u.scatter.scatter_buf = (char *) malloc(request->u.scatter.scatter_bytes); + if (NULL == request->u.scatter.scatter_buf) { + ret = OMPI_ERR_OUT_OF_RESOURCE; line = __LINE__; goto err_hdlr; + } + request->u.scatter.free_after = 1; + + for (int32_t i=0;iu.scatter.size;i++) { + uint32_t iov_count = 1; + struct iovec iov; + size_t max_data; + + uint64_t offset = request->u.scatter.pack_src_extent * request->u.scatter.pack_src_count * i; + + opal_output_verbose(30, ompi_coll_base_framework.framework_output, + "%s:%d:rank(%d): offset(%lu)", + __FILE__, __LINE__, request->u.scatter.my_rank, + offset); + + ompi_coll_portals4_create_send_converter (&request->u.scatter.send_converter, + request->u.scatter.pack_src_buf + offset, + ompi_comm_peer_lookup(comm, request->u.scatter.my_rank), + request->u.scatter.pack_src_count, + request->u.scatter.pack_src_dtype); + + iov.iov_len = request->u.scatter.packed_size; + iov.iov_base = (IOVBASE_TYPE *) ((char *)request->u.scatter.scatter_buf + (request->u.scatter.packed_size*i)); + opal_convertor_pack(&request->u.scatter.send_converter, &iov, &iov_count, &max_data); + + OBJ_DESTRUCT(&request->u.scatter.send_converter); + } + + opal_output_verbose(30, ompi_coll_base_framework.framework_output, + "%s:%d:rank(%d): root - scatter_buf(%p) - scatter_bytes(%lu)=packed_size(%ld) * size(%d)", + __FILE__, __LINE__, request->u.scatter.my_rank, + request->u.scatter.scatter_buf, request->u.scatter.scatter_bytes, + request->u.scatter.packed_size, request->u.scatter.size); + } else { + request->u.scatter.scatter_bytes=request->u.scatter.packed_size; + request->u.scatter.scatter_buf = (char *) malloc(request->u.scatter.scatter_bytes); + if (NULL == request->u.scatter.scatter_buf) { + ret = OMPI_ERR_OUT_OF_RESOURCE; line = __LINE__; goto err_hdlr; + } + request->u.scatter.free_after = 1; + + opal_output_verbose(30, ompi_coll_base_framework.framework_output, + "%s:%d:rank(%d): leaf - scatter_buf(%p) - scatter_bytes(%lu)=packed_size(%ld)", + __FILE__, __LINE__, request->u.scatter.my_rank, + request->u.scatter.scatter_buf, request->u.scatter.scatter_bytes, + request->u.scatter.packed_size); + } + + return OMPI_SUCCESS; + +err_hdlr: + opal_output(ompi_coll_base_framework.framework_output, + "%s:%4d:%4d\tError occurred ret=%d, rank %2d", + __FILE__, __LINE__, line, ret, request->u.scatter.my_rank); + + return ret; +} + +static int +setup_scatter_handles(struct ompi_communicator_t *comm, + ompi_coll_portals4_request_t *request, + mca_coll_portals4_module_t *portals4_module) +{ + int ret, line; + + ptl_me_t me; + + OPAL_OUTPUT_VERBOSE((10, ompi_coll_base_framework.framework_output, + "coll:portals4:setup_scatter_handles enter rank %d", request->u.scatter.my_rank)); + + /**********************************/ + /* Setup Scatter Handles */ + /**********************************/ + COLL_PORTALS4_SET_BITS(request->u.scatter.scatter_match_bits, ompi_comm_get_cid(comm), + 0, 0, COLL_PORTALS4_SCATTER, 0, request->u.scatter.coll_count); + + OPAL_OUTPUT_VERBOSE((10, ompi_coll_base_framework.framework_output, + "coll:portals4:setup_scatter_handles rank(%d) scatter_match_bits(0x%016lX)", + request->u.scatter.my_rank, request->u.scatter.scatter_match_bits)); + + ret = PtlCTAlloc(mca_coll_portals4_component.ni_h, + &request->u.scatter.scatter_cth); + if (PTL_OK != ret) { ret = OMPI_ERR_TEMP_OUT_OF_RESOURCE; line = __LINE__; goto err_hdlr; } + + request->u.scatter.scatter_mdh = mca_coll_portals4_component.data_md_h; + + me.start = request->u.scatter.scatter_buf; + me.length = request->u.scatter.scatter_bytes; + me.ct_handle = request->u.scatter.scatter_cth; + me.min_free = 0; + me.uid = mca_coll_portals4_component.uid; + me.options = PTL_ME_OP_PUT | PTL_ME_EVENT_SUCCESS_DISABLE | + PTL_ME_EVENT_LINK_DISABLE | PTL_ME_EVENT_UNLINK_DISABLE | + PTL_ME_EVENT_CT_COMM; + me.match_id.phys.nid = PTL_NID_ANY; + me.match_id.phys.pid = PTL_PID_ANY; + me.match_bits = request->u.scatter.scatter_match_bits; + me.ignore_bits = 0; + ret = PtlMEAppend(mca_coll_portals4_component.ni_h, + mca_coll_portals4_component.pt_idx, + &me, + PTL_PRIORITY_LIST, + NULL, + &request->u.scatter.scatter_meh); + if (PTL_OK != ret) { ret = OMPI_ERROR; line = __LINE__; goto err_hdlr; } + + OPAL_OUTPUT_VERBOSE((10, ompi_coll_base_framework.framework_output, + "coll:portals4:setup_scatter_handles exit rank %d", request->u.scatter.my_rank)); + + return OMPI_SUCCESS; + +err_hdlr: + opal_output(ompi_coll_base_framework.framework_output, + "%s:%4d:%4d\tError occurred ret=%d, rank %2d", + __FILE__, __LINE__, line, ret, request->u.scatter.my_rank); + + return ret; +} + +static int +setup_sync_handles(struct ompi_communicator_t *comm, + ompi_coll_portals4_request_t *request, + mca_coll_portals4_module_t *portals4_module) +{ + int ret, line; + + ptl_me_t me; + + OPAL_OUTPUT_VERBOSE((10, ompi_coll_base_framework.framework_output, + "coll:portals4:setup_sync_handles enter rank %d", request->u.scatter.my_rank)); + + /**********************************/ + /* Setup Sync Handles */ + /**********************************/ + COLL_PORTALS4_SET_BITS(request->u.scatter.sync_match_bits, ompi_comm_get_cid(comm), + 0, 1, COLL_PORTALS4_SCATTER, 0, request->u.scatter.coll_count); + + OPAL_OUTPUT_VERBOSE((10, ompi_coll_base_framework.framework_output, + "coll:portals4:setup_sync_handles rank(%d) sync_match_bits(0x%016lX)", + request->u.scatter.my_rank, request->u.scatter.sync_match_bits)); + + ret = PtlCTAlloc(mca_coll_portals4_component.ni_h, + &request->u.scatter.sync_cth); + if (PTL_OK != ret) { ret = OMPI_ERR_TEMP_OUT_OF_RESOURCE; line = __LINE__; goto err_hdlr; } + + request->u.scatter.sync_mdh = mca_coll_portals4_component.zero_md_h; + + me.start = NULL; + me.length = 0; + me.ct_handle = request->u.scatter.sync_cth; + me.min_free = 0; + me.uid = mca_coll_portals4_component.uid; + me.options = PTL_ME_OP_PUT | PTL_ME_EVENT_SUCCESS_DISABLE | + PTL_ME_EVENT_LINK_DISABLE | PTL_ME_EVENT_UNLINK_DISABLE | + PTL_ME_EVENT_CT_COMM | PTL_ME_EVENT_CT_OVERFLOW; + me.match_id.phys.nid = PTL_NID_ANY; + me.match_id.phys.pid = PTL_PID_ANY; + me.match_bits = request->u.scatter.sync_match_bits; + me.ignore_bits = 0; + ret = PtlMEAppend(mca_coll_portals4_component.ni_h, + mca_coll_portals4_component.pt_idx, + &me, + PTL_PRIORITY_LIST, + NULL, + &request->u.scatter.sync_meh); + if (PTL_OK != ret) { ret = OMPI_ERROR; line = __LINE__; goto err_hdlr; } + + OPAL_OUTPUT_VERBOSE((10, ompi_coll_base_framework.framework_output, + "coll:portals4:setup_sync_handles exit rank %d", request->u.scatter.my_rank)); + + return OMPI_SUCCESS; + +err_hdlr: + opal_output(ompi_coll_base_framework.framework_output, + "%s:%4d:%4d\tError occurred ret=%d, rank %2d", + __FILE__, __LINE__, line, ret, request->u.scatter.my_rank); + + return ret; +} + +static int +cleanup_scatter_handles(ompi_coll_portals4_request_t *request) +{ + int ret, line; + + OPAL_OUTPUT_VERBOSE((10, ompi_coll_base_framework.framework_output, + "coll:portals4:cleanup_scatter_handles enter rank %d", request->u.scatter.my_rank)); + + /**********************************/ + /* Cleanup Scatter Handles */ + /**********************************/ + do { + ret = PtlMEUnlink(request->u.scatter.scatter_meh); + if (PTL_IN_USE == ret) { + opal_output(ompi_coll_base_framework.framework_output, + "%s:%4d: scatter_meh still in use (ret=%d, rank %2d)", + __FILE__, __LINE__, ret, request->u.scatter.my_rank); + continue; + } + if (PTL_OK != ret) { ret = OMPI_ERROR; line = __LINE__; goto err_hdlr; } + } while (ret == PTL_IN_USE); + + ret = PtlCTFree(request->u.scatter.scatter_cth); + if (PTL_OK != ret) { ret = OMPI_ERROR; line = __LINE__; goto err_hdlr; } + + OPAL_OUTPUT_VERBOSE((10, ompi_coll_base_framework.framework_output, + "coll:portals4:cleanup_scatter_handles exit rank %d", request->u.scatter.my_rank)); + + return OMPI_SUCCESS; + +err_hdlr: + opal_output(ompi_coll_base_framework.framework_output, + "%s:%4d:%4d\tError occurred ret=%d, rank %2d", + __FILE__, __LINE__, line, ret, request->u.scatter.my_rank); + + return ret; +} + +static int +cleanup_sync_handles(ompi_coll_portals4_request_t *request) +{ + int ret, line; + int ptl_ret; + + OPAL_OUTPUT_VERBOSE((10, ompi_coll_base_framework.framework_output, + "coll:portals4:cleanup_sync_handles enter rank %d", request->u.scatter.my_rank)); + + /**********************************/ + /* Cleanup Sync Handles */ + /**********************************/ + do { + ret = PtlMEUnlink(request->u.scatter.sync_meh); + if (PTL_IN_USE == ret) { + opal_output(ompi_coll_base_framework.framework_output, + "%s:%4d: sync_meh still in use (ret=%d, rank %2d)", + __FILE__, __LINE__, ret, request->u.scatter.my_rank); + continue; + } + if (PTL_OK != ret) { ret = OMPI_ERROR; line = __LINE__; goto err_hdlr; } + } while (ret == PTL_IN_USE); + + ret = PtlCTFree(request->u.scatter.sync_cth); + if (PTL_OK != ret) { ptl_ret = ret; ret = OMPI_ERROR; line = __LINE__; goto err_hdlr; } + + OPAL_OUTPUT_VERBOSE((10, ompi_coll_base_framework.framework_output, + "coll:portals4:cleanup_sync_handles exit rank %d", request->u.scatter.my_rank)); + + return OMPI_SUCCESS; + +err_hdlr: + opal_output(ompi_coll_base_framework.framework_output, + "%s:%4d:%4d\tError occurred (ptl_ret=%d) ret=%d, rank %2d", + __FILE__, __LINE__, line, ptl_ret, ret, request->u.scatter.my_rank); + + return ret; +} + +static int +ompi_coll_portals4_scatter_intra_linear_top(const void *sbuf, int scount, struct ompi_datatype_t *sdtype, + void *rbuf, int rcount, struct ompi_datatype_t *rdtype, + int root, + struct ompi_communicator_t *comm, + ompi_coll_portals4_request_t *request, + mca_coll_base_module_t *module) +{ + mca_coll_portals4_module_t *portals4_module = (mca_coll_portals4_module_t*) module; + int ret, line; + ptl_ct_event_t ct; + + ptl_ct_event_t sync_incr_event; + + int8_t i_am_root; + + int32_t expected_rtrs = 0; + int32_t expected_puts = 0; + int32_t expected_acks = 0; + int32_t expected_ops = 0; + + int32_t expected_chained_rtrs = 0; + int32_t expected_chained_acks = 0; + + ptl_size_t number_of_fragment = 1; + + OPAL_OUTPUT_VERBOSE((10, ompi_coll_base_framework.framework_output, + "coll:portals4:scatter_intra_linear_top enter rank %d", request->u.scatter.my_rank)); + + request->type = OMPI_COLL_PORTALS4_TYPE_SCATTER; + request->u.scatter.scatter_buf = NULL; + request->u.scatter.scatter_mdh = PTL_INVALID_HANDLE; + request->u.scatter.scatter_cth = PTL_INVALID_HANDLE; + request->u.scatter.scatter_meh = PTL_INVALID_HANDLE; + request->u.scatter.sync_mdh = PTL_INVALID_HANDLE; + request->u.scatter.sync_cth = PTL_INVALID_HANDLE; + request->u.scatter.sync_meh = PTL_INVALID_HANDLE; + + request->u.scatter.my_rank = ompi_comm_rank(comm); + request->u.scatter.size = ompi_comm_size(comm); + request->u.scatter.root_rank = root; + request->u.scatter.sbuf = sbuf; + request->u.scatter.rbuf = rbuf; + + request->u.scatter.pack_src_buf = sbuf; + request->u.scatter.pack_src_count = scount; + request->u.scatter.pack_src_dtype = sdtype; + ompi_datatype_get_extent(request->u.scatter.pack_src_dtype, + &request->u.scatter.pack_src_lb, + &request->u.scatter.pack_src_extent); + ompi_datatype_get_true_extent(request->u.scatter.pack_src_dtype, + &request->u.scatter.pack_src_true_lb, + &request->u.scatter.pack_src_true_extent); + + if ((root == request->u.scatter.my_rank) && (rbuf == MPI_IN_PLACE)) { + request->u.scatter.unpack_dst_buf = NULL; + request->u.scatter.unpack_dst_count = 0; + request->u.scatter.unpack_dst_dtype = MPI_DATATYPE_NULL; + } else { + request->u.scatter.unpack_dst_buf = rbuf; + request->u.scatter.unpack_dst_count = rcount; + request->u.scatter.unpack_dst_dtype = rdtype; + request->u.scatter.unpack_dst_offset = 0; + ompi_datatype_get_extent(request->u.scatter.unpack_dst_dtype, + &request->u.scatter.unpack_dst_lb, + &request->u.scatter.unpack_dst_extent); + ompi_datatype_get_true_extent(request->u.scatter.unpack_dst_dtype, + &request->u.scatter.unpack_dst_true_lb, + &request->u.scatter.unpack_dst_true_extent); + } + + opal_output_verbose(30, ompi_coll_base_framework.framework_output, + "%s:%d:rank(%d): request->u.scatter.unpack_dst_offset(%lu)", + __FILE__, __LINE__, request->u.scatter.my_rank, + request->u.scatter.unpack_dst_offset); + + /**********************************/ + /* Setup Common Parameters */ + /**********************************/ + + i_am_root = (request->u.scatter.my_rank == request->u.scatter.root_rank); + + request->u.scatter.coll_count = opal_atomic_add_size_t(&portals4_module->coll_count, 1); + + ret = setup_scatter_buffers_linear(comm, request, portals4_module); + if (MPI_SUCCESS != ret) { line = __LINE__; goto err_hdlr; } + + ret = setup_scatter_handles(comm, request, portals4_module); + if (MPI_SUCCESS != ret) { line = __LINE__; goto err_hdlr; } + + ret = setup_sync_handles(comm, request, portals4_module); + if (MPI_SUCCESS != ret) { line = __LINE__; goto err_hdlr; } + + number_of_fragment = (request->u.scatter.packed_size > mca_coll_portals4_component.ni_limits.max_msg_size) ? + (request->u.scatter.packed_size + mca_coll_portals4_component.ni_limits.max_msg_size - 1) / mca_coll_portals4_component.ni_limits.max_msg_size : + 1; + opal_output_verbose(90, ompi_coll_base_framework.framework_output, + "%s:%d:rank %d:number_of_fragment = %lu", + __FILE__, __LINE__, request->u.scatter.my_rank, number_of_fragment); + + /**********************************/ + /* do the scatter */ + /**********************************/ + if (i_am_root) { + /* operations on the sync counter */ + expected_rtrs = request->u.scatter.size - 1; /* expect RTRs from non-root ranks */ + expected_acks = request->u.scatter.size - 1; /* expect Recv-ACKs from non-root ranks */ + + /* operations on the scatter counter */ + expected_puts = 0; + expected_chained_rtrs = 1; + expected_chained_acks = 1; + + /* Chain the RTR and Recv-ACK to the Scatter CT */ + sync_incr_event.success=1; + sync_incr_event.failure=0; + ret = PtlTriggeredCTInc(request->u.scatter.scatter_cth, + sync_incr_event, + request->u.scatter.sync_cth, + expected_rtrs); + if (PTL_OK != ret) { ret = OMPI_ERROR; line = __LINE__; goto err_hdlr; } + + ret = PtlTriggeredCTInc(request->u.scatter.scatter_cth, + sync_incr_event, + request->u.scatter.sync_cth, + expected_rtrs + expected_acks); + if (PTL_OK != ret) { ret = OMPI_ERROR; line = __LINE__; goto err_hdlr; } + + /* root, so put packed bytes to other ranks */ + for (int32_t i=0;iu.scatter.size;i++) { + /* do not put to my scatter_buf. my data gets unpacked into my out buffer in linear_bottom(). */ + if (i == request->u.scatter.my_rank) { + continue; + } + + ptl_size_t offset = request->u.scatter.packed_size * i; + ptl_size_t size_sent = 0; + ptl_size_t size_left = request->u.scatter.packed_size; + + opal_output_verbose(10, ompi_coll_base_framework.framework_output, + "%s:%d:rank(%d): offset(%lu)=rank(%d) * packed_size(%ld)", + __FILE__, __LINE__, request->u.scatter.my_rank, + offset, i, request->u.scatter.packed_size); + + for (ptl_size_t j=0; j mca_coll_portals4_component.ni_limits.max_msg_size) ? + mca_coll_portals4_component.ni_limits.max_msg_size : + size_left; + + OPAL_OUTPUT_VERBOSE((10, ompi_coll_base_framework.framework_output, + "%s:%d:rank(%d): frag(%lu),offset_frag (%lu) frag_size(%lu)", + __FILE__, __LINE__, request->u.scatter.my_rank, + j, size_sent, frag_size)); + + ret = PtlTriggeredPut(request->u.scatter.scatter_mdh, + (ptl_size_t)request->u.scatter.scatter_buf + offset + size_sent, + frag_size, + PTL_NO_ACK_REQ, + ompi_coll_portals4_get_peer(comm, i), + mca_coll_portals4_component.pt_idx, + request->u.scatter.scatter_match_bits, + size_sent, + NULL, + 0, + request->u.scatter.scatter_cth, + expected_chained_rtrs); + if (PTL_OK != ret) { ret = OMPI_ERROR; line = __LINE__; goto err_hdlr; } + + size_left -= frag_size; + size_sent += frag_size; + } + } + } else { + /* non-root, so do nothing */ + + /* operations on the sync counter */ + expected_rtrs = 0; + expected_acks = 0; + + /* operations on the scatter counter */ + expected_puts = number_of_fragment; /* scatter put from root */ + expected_chained_rtrs = 0; + expected_chained_acks = 0; + } + + expected_ops = expected_chained_rtrs + expected_puts; + + /**********************************************/ + /* only non-root ranks are PUT to, so only */ + /* non-root ranks must PUT a Recv-ACK to root */ + /**********************************************/ + if (!i_am_root) { + ret = PtlTriggeredPut(request->u.scatter.sync_mdh, + 0, + 0, + PTL_NO_ACK_REQ, + ompi_coll_portals4_get_peer(comm, request->u.scatter.root_rank), + mca_coll_portals4_component.pt_idx, + request->u.scatter.sync_match_bits, + 0, + NULL, + 0, + request->u.scatter.scatter_cth, + expected_ops); + if (PTL_OK != ret) { ret = OMPI_ERROR; line = __LINE__; goto err_hdlr; } + } + + expected_ops += expected_chained_acks; + + if (!request->u.scatter.is_sync) { + /******************************************/ + /* put to finish pt when all ops complete */ + /******************************************/ + ret = PtlTriggeredPut(mca_coll_portals4_component.zero_md_h, + 0, + 0, + PTL_NO_ACK_REQ, + ompi_coll_portals4_get_peer(comm, request->u.scatter.my_rank), + mca_coll_portals4_component.finish_pt_idx, + 0, + 0, + NULL, + (uintptr_t) request, + request->u.scatter.scatter_cth, + expected_ops); + if (PTL_OK != ret) { ret = OMPI_ERROR; line = __LINE__; goto err_hdlr; } + } + + /**************************************/ + /* all non-root ranks put RTR to root */ + /**************************************/ + if (!i_am_root) { + ret = PtlPut(request->u.scatter.sync_mdh, + 0, + 0, + PTL_NO_ACK_REQ, + ompi_coll_portals4_get_peer(comm, request->u.scatter.root_rank), + mca_coll_portals4_component.pt_idx, + request->u.scatter.sync_match_bits, + 0, + NULL, + 0); + if (PTL_OK != ret) { ret = OMPI_ERROR; line = __LINE__; goto err_hdlr; } + } + + if (request->u.scatter.is_sync) { + opal_output_verbose(1, ompi_coll_base_framework.framework_output, + "calling CTWait(expected_ops=%d)\n", expected_ops); + + /********************************/ + /* Wait for all ops to complete */ + /********************************/ + ret = PtlCTWait(request->u.scatter.scatter_cth, expected_ops, &ct); + if (PTL_OK != ret) { ret = OMPI_ERROR; line = __LINE__; goto err_hdlr; } + + opal_output_verbose(1, ompi_coll_base_framework.framework_output, + "completed CTWait(expected_ops=%d)\n", expected_ops); + } + + OPAL_OUTPUT_VERBOSE((10, ompi_coll_base_framework.framework_output, + "coll:portals4:scatter_intra_linear_top exit rank %d", request->u.scatter.my_rank)); + + return OMPI_SUCCESS; + +err_hdlr: + if (NULL != request->u.scatter.scatter_buf) + free(request->u.scatter.scatter_buf); + + opal_output(ompi_coll_base_framework.framework_output, + "%s:%4d:%4d\tError occurred ret=%d, rank %2d", + __FILE__, __LINE__, line, ret, request->u.scatter.my_rank); + + return ret; +} + +static int +ompi_coll_portals4_scatter_intra_linear_bottom(struct ompi_communicator_t *comm, + ompi_coll_portals4_request_t *request) +{ + int ret, line; + + OPAL_OUTPUT_VERBOSE((10, ompi_coll_base_framework.framework_output, + "coll:portals4:scatter_intra_linear_bottom enter rank %d", request->u.scatter.my_rank)); + + ret = cleanup_scatter_handles(request); + if (MPI_SUCCESS != ret) { line = __LINE__; goto err_hdlr; } + + ret = cleanup_sync_handles(request); + if (MPI_SUCCESS != ret) { line = __LINE__; goto err_hdlr; } + + if (NULL != request->u.scatter.unpack_dst_buf) { + uint32_t iov_count = 1; + struct iovec iov; + size_t max_data; + + ompi_coll_portals4_create_recv_converter (&request->u.scatter.recv_converter, + request->u.scatter.unpack_dst_buf, + ompi_comm_peer_lookup(comm, request->u.scatter.my_rank), + request->u.scatter.unpack_dst_count, + request->u.scatter.unpack_dst_dtype); + + iov.iov_len = request->u.scatter.packed_size; + if (request->u.scatter.my_rank == request->u.scatter.root_rank) { + /* unpack my data from the location in scatter_buf where is was packed */ + uint64_t offset = request->u.scatter.pack_src_extent * request->u.scatter.pack_src_count * request->u.scatter.my_rank; + iov.iov_base = (IOVBASE_TYPE *)((char *)request->u.scatter.scatter_buf + offset); + } else { + iov.iov_base = (IOVBASE_TYPE *)request->u.scatter.scatter_buf; + } + opal_convertor_unpack(&request->u.scatter.recv_converter, &iov, &iov_count, &max_data); + + OBJ_DESTRUCT(&request->u.scatter.recv_converter); + } + + if (request->u.scatter.free_after) + free(request->u.scatter.scatter_buf); + + request->super.req_status.MPI_ERROR = OMPI_SUCCESS; + + ompi_request_complete(&request->super, true); + + OPAL_OUTPUT_VERBOSE((10, ompi_coll_base_framework.framework_output, + "coll:portals4:scatter_intra_linear_bottom exit rank %d", request->u.scatter.my_rank)); + + return OMPI_SUCCESS; + +err_hdlr: + request->super.req_status.MPI_ERROR = ret; + + if (request->u.scatter.free_after) + free(request->u.scatter.scatter_buf); + + opal_output(ompi_coll_base_framework.framework_output, + "%s:%4d:%4d\tError occurred ret=%d, rank %2d", + __FILE__, __LINE__, line, ret, request->u.scatter.my_rank); + + return ret; +} + +int +ompi_coll_portals4_scatter_intra(const void *sbuf, int scount, struct ompi_datatype_t *sdtype, + void *rbuf, int rcount, struct ompi_datatype_t *rdtype, + int root, + struct ompi_communicator_t *comm, + mca_coll_base_module_t *module) +{ + int ret, line; + + ompi_coll_portals4_request_t *request; + + OPAL_OUTPUT_VERBOSE((10, ompi_coll_base_framework.framework_output, + "coll:portals4:scatter_intra enter rank %d", ompi_comm_rank(comm))); + + /* + * allocate a portals4 request + */ + OMPI_COLL_PORTALS4_REQUEST_ALLOC(comm, request); + if (NULL == request) { + ret = OMPI_ERR_TEMP_OUT_OF_RESOURCE; line = __LINE__; goto err_hdlr; + } + request->u.scatter.is_sync = 1; + + /* + * initiate the scatter + * + * this request is marked synchronous (is_sync==1), so PtlCTWait() + * will be called to wait for completion. + */ + ret = ompi_coll_portals4_scatter_intra_linear_top(sbuf, scount, sdtype, + rbuf, rcount, rdtype, + root, + comm, + request, + module); + if (MPI_SUCCESS != ret) { line = __LINE__; goto err_hdlr; } + + ret = ompi_coll_portals4_scatter_intra_linear_bottom(comm, request); + if (MPI_SUCCESS != ret) { line = __LINE__; goto err_hdlr; } + + /* + * return the portals4 request + */ + OMPI_COLL_PORTALS4_REQUEST_RETURN(request); + + OPAL_OUTPUT_VERBOSE((10, ompi_coll_base_framework.framework_output, + "coll:portals4:scatter_intra exit rank %d", request->u.scatter.my_rank)); + + return OMPI_SUCCESS; + +err_hdlr: + opal_output(ompi_coll_base_framework.framework_output, + "%s:%4d:%4d\tError occurred ret=%d, rank %2d", + __FILE__, __LINE__, line, ret, request->u.scatter.my_rank); + + return ret; +} + + +int +ompi_coll_portals4_iscatter_intra(const void *sbuf, int scount, struct ompi_datatype_t *sdtype, + void *rbuf, int rcount, struct ompi_datatype_t *rdtype, + int root, + struct ompi_communicator_t *comm, + ompi_request_t **ompi_request, + mca_coll_base_module_t *module) +{ + int ret, line; + + ompi_coll_portals4_request_t *request; + + OPAL_OUTPUT_VERBOSE((10, ompi_coll_base_framework.framework_output, + "coll:portals4:iscatter_intra enter rank %d", ompi_comm_rank(comm))); + + /* + * allocate a portals4 request + */ + OMPI_COLL_PORTALS4_REQUEST_ALLOC(comm, request); + if (NULL == request) { + ret = OMPI_ERR_TEMP_OUT_OF_RESOURCE; line = __LINE__; goto err_hdlr; + } + *ompi_request = &request->super; + request->u.scatter.is_sync = 0; + + /* + * initiate the scatter + * + * this request is marked asynchronous (is_sync==0), so + * portals4_progress() will handle completion. + */ + ret = ompi_coll_portals4_scatter_intra_linear_top(sbuf, scount, sdtype, + rbuf, rcount, rdtype, + root, + comm, + request, + module); + if (MPI_SUCCESS != ret) { line = __LINE__; goto err_hdlr; } + + OPAL_OUTPUT_VERBOSE((10, ompi_coll_base_framework.framework_output, + "coll:portals4:iscatter_intra exit rank %d", request->u.scatter.my_rank)); + + return OMPI_SUCCESS; + +err_hdlr: + opal_output(ompi_coll_base_framework.framework_output, + "%s:%4d:%4d\tError occurred ret=%d, rank %2d", + __FILE__, __LINE__, line, ret, request->u.scatter.my_rank); + + return ret; +} + + +int +ompi_coll_portals4_iscatter_intra_fini(ompi_coll_portals4_request_t *request) +{ + int ret, line; + + OPAL_OUTPUT_VERBOSE((10, ompi_coll_base_framework.framework_output, + "coll:portals4:iscatter_intra_fini enter rank %d", request->u.scatter.my_rank)); + + /* + * cleanup the scatter + */ + ret = ompi_coll_portals4_scatter_intra_linear_bottom(request->super.req_mpi_object.comm, request); + if (MPI_SUCCESS != ret) { line = __LINE__; goto err_hdlr; } + + OPAL_OUTPUT_VERBOSE((10, ompi_coll_base_framework.framework_output, + "coll:portals4:iscatter_intra_fini exit rank %d", request->u.scatter.my_rank)); + + return OMPI_SUCCESS; + +err_hdlr: + opal_output(ompi_coll_base_framework.framework_output, + "%s:%4d:%4d\tError occurred ret=%d, rank %2d", + __FILE__, __LINE__, line, ret, request->u.scatter.my_rank); + + return ret; +} diff --git a/ompi/mca/coll/portals4/configure.m4 b/ompi/mca/coll/portals4/configure.m4 index 22b7f12b954..fd2e66cdc7c 100644 --- a/ompi/mca/coll/portals4/configure.m4 +++ b/ompi/mca/coll/portals4/configure.m4 @@ -2,9 +2,9 @@ # # Copyright (c) 2013 Sandia National Laboratories. All rights reserved. # $COPYRIGHT$ -# +# # Additional copyrights may follow -# +# # $HEADER$ # @@ -15,7 +15,7 @@ AC_DEFUN([MCA_ompi_coll_portals4_POST_CONFIG], [ AS_IF([test "$1" = "1"], [OMPI_REQUIRE_ENDPOINT_TAG([PORTALS4])]) ])dnl -# MCA_coll_portals4_CONFIG(action-if-can-compile, +# MCA_coll_portals4_CONFIG(action-if-can-compile, # [action-if-cant-compile]) # ------------------------------------------------ AC_DEFUN([MCA_ompi_coll_portals4_CONFIG],[ diff --git a/ompi/mca/coll/self/Makefile.am b/ompi/mca/coll/self/Makefile.am index cbaddd00b86..a3735ece346 100644 --- a/ompi/mca/coll/self/Makefile.am +++ b/ompi/mca/coll/self/Makefile.am @@ -5,15 +5,15 @@ # Copyright (c) 2004-2005 The University of Tennessee and The University # of Tennessee Research Foundation. All rights # reserved. -# Copyright (c) 2004-2009 High Performance Computing Center Stuttgart, +# Copyright (c) 2004-2009 High Performance Computing Center Stuttgart, # University of Stuttgart. All rights reserved. # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. # Copyright (c) 2010 Cisco Systems, Inc. All rights reserved. # $COPYRIGHT$ -# +# # Additional copyrights may follow -# +# # $HEADER$ # diff --git a/ompi/mca/coll/self/coll_self.h b/ompi/mca/coll/self/coll_self.h index 0b9386a1556..3431e7f5492 100644 --- a/ompi/mca/coll/self/coll_self.h +++ b/ompi/mca/coll/self/coll_self.h @@ -5,15 +5,17 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2008 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -51,90 +53,90 @@ mca_coll_self_comm_query(struct ompi_communicator_t *comm, int *priority); int mca_coll_self_module_enable(mca_coll_base_module_t *module, struct ompi_communicator_t *comm); -int mca_coll_self_allgather_intra(void *sbuf, int scount, - struct ompi_datatype_t *sdtype, - void *rbuf, int rcount, - struct ompi_datatype_t *rdtype, +int mca_coll_self_allgather_intra(const void *sbuf, int scount, + struct ompi_datatype_t *sdtype, + void *rbuf, int rcount, + struct ompi_datatype_t *rdtype, struct ompi_communicator_t *comm, mca_coll_base_module_t *module); -int mca_coll_self_allgatherv_intra(void *sbuf, int scount, - struct ompi_datatype_t *sdtype, - void * rbuf, int *rcounts, int *disps, - struct ompi_datatype_t *rdtype, +int mca_coll_self_allgatherv_intra(const void *sbuf, int scount, + struct ompi_datatype_t *sdtype, + void * rbuf, const int *rcounts, const int *disps, + struct ompi_datatype_t *rdtype, struct ompi_communicator_t *comm, mca_coll_base_module_t *module); -int mca_coll_self_allreduce_intra(void *sbuf, void *rbuf, int count, - struct ompi_datatype_t *dtype, - struct ompi_op_t *op, +int mca_coll_self_allreduce_intra(const void *sbuf, void *rbuf, int count, + struct ompi_datatype_t *dtype, + struct ompi_op_t *op, struct ompi_communicator_t *comm, mca_coll_base_module_t *module); -int mca_coll_self_alltoall_intra(void *sbuf, int scount, - struct ompi_datatype_t *sdtype, - void* rbuf, int rcount, - struct ompi_datatype_t *rdtype, +int mca_coll_self_alltoall_intra(const void *sbuf, int scount, + struct ompi_datatype_t *sdtype, + void* rbuf, int rcount, + struct ompi_datatype_t *rdtype, struct ompi_communicator_t *comm, mca_coll_base_module_t *module); -int mca_coll_self_alltoallv_intra(void *sbuf, int *scounts, int *sdisps, - struct ompi_datatype_t *sdtype, - void *rbuf, int *rcounts, int *rdisps, - struct ompi_datatype_t *rdtype, +int mca_coll_self_alltoallv_intra(const void *sbuf, const int *scounts, const int *sdisps, + struct ompi_datatype_t *sdtype, + void *rbuf, const int *rcounts, const int *rdisps, + struct ompi_datatype_t *rdtype, struct ompi_communicator_t *comm, mca_coll_base_module_t *module); -int mca_coll_self_alltoallw_intra(void *sbuf, int *scounts, int *sdisps, - struct ompi_datatype_t **sdtypes, - void *rbuf, int *rcounts, int *rdisps, - struct ompi_datatype_t **rdtypes, +int mca_coll_self_alltoallw_intra(const void *sbuf, const int *scounts, const int *sdisps, + struct ompi_datatype_t * const *sdtypes, + void *rbuf, const int *rcounts, const int *rdisps, + struct ompi_datatype_t * const *rdtypes, struct ompi_communicator_t *comm, mca_coll_base_module_t *module); int mca_coll_self_barrier_intra(struct ompi_communicator_t *comm, mca_coll_base_module_t *module); -int mca_coll_self_bcast_intra(void *buff, int count, +int mca_coll_self_bcast_intra(void *buff, int count, struct ompi_datatype_t *datatype, - int root, + int root, struct ompi_communicator_t *comm, mca_coll_base_module_t *module); -int mca_coll_self_exscan_intra(void *sbuf, void *rbuf, int count, - struct ompi_datatype_t *dtype, - struct ompi_op_t *op, +int mca_coll_self_exscan_intra(const void *sbuf, void *rbuf, int count, + struct ompi_datatype_t *dtype, + struct ompi_op_t *op, struct ompi_communicator_t *comm, mca_coll_base_module_t *module); -int mca_coll_self_gather_intra(void *sbuf, int scount, - struct ompi_datatype_t *sdtype, void *rbuf, - int rcount, struct ompi_datatype_t *rdtype, +int mca_coll_self_gather_intra(const void *sbuf, int scount, + struct ompi_datatype_t *sdtype, void *rbuf, + int rcount, struct ompi_datatype_t *rdtype, int root, struct ompi_communicator_t *comm, mca_coll_base_module_t *module); -int mca_coll_self_gatherv_intra(void *sbuf, int scount, - struct ompi_datatype_t *sdtype, void *rbuf, - int *rcounts, int *disps, - struct ompi_datatype_t *rdtype, int root, +int mca_coll_self_gatherv_intra(const void *sbuf, int scount, + struct ompi_datatype_t *sdtype, void *rbuf, + const int *rcounts, const int *disps, + struct ompi_datatype_t *rdtype, int root, struct ompi_communicator_t *comm, mca_coll_base_module_t *module); -int mca_coll_self_reduce_intra(void *sbuf, void* rbuf, int count, - struct ompi_datatype_t *dtype, - struct ompi_op_t *op, +int mca_coll_self_reduce_intra(const void *sbuf, void* rbuf, int count, + struct ompi_datatype_t *dtype, + struct ompi_op_t *op, int root, struct ompi_communicator_t *comm, mca_coll_base_module_t *module); -int mca_coll_self_reduce_scatter_intra(void *sbuf, void *rbuf, - int *rcounts, - struct ompi_datatype_t *dtype, - struct ompi_op_t *op, +int mca_coll_self_reduce_scatter_intra(const void *sbuf, void *rbuf, + const int *rcounts, + struct ompi_datatype_t *dtype, + struct ompi_op_t *op, struct ompi_communicator_t *comm, mca_coll_base_module_t *module); -int mca_coll_self_scan_intra(void *sbuf, void *rbuf, int count, - struct ompi_datatype_t *dtype, - struct ompi_op_t *op, +int mca_coll_self_scan_intra(const void *sbuf, void *rbuf, int count, + struct ompi_datatype_t *dtype, + struct ompi_op_t *op, struct ompi_communicator_t *comm, mca_coll_base_module_t *module); -int mca_coll_self_scatter_intra(void *sbuf, int scount, - struct ompi_datatype_t *sdtype, void *rbuf, - int rcount, struct ompi_datatype_t *rdtype, +int mca_coll_self_scatter_intra(const void *sbuf, int scount, + struct ompi_datatype_t *sdtype, void *rbuf, + int rcount, struct ompi_datatype_t *rdtype, int root, struct ompi_communicator_t *comm, mca_coll_base_module_t *module); -int mca_coll_self_scatterv_intra(void *sbuf, int *scounts, int *disps, - struct ompi_datatype_t *sdtype, - void* rbuf, int rcount, - struct ompi_datatype_t *rdtype, int root, +int mca_coll_self_scatterv_intra(const void *sbuf, const int *scounts, const int *disps, + struct ompi_datatype_t *sdtype, + void* rbuf, int rcount, + struct ompi_datatype_t *rdtype, int root, struct ompi_communicator_t *comm, mca_coll_base_module_t *module); @@ -143,9 +145,6 @@ int mca_coll_self_ft_event(int state); struct mca_coll_self_module_t { mca_coll_base_module_t super; - - ompi_request_t **mccb_reqs; - int mccb_num_reqs; }; typedef struct mca_coll_self_module_t mca_coll_self_module_t; OBJ_CLASS_DECLARATION(mca_coll_self_module_t); diff --git a/ompi/mca/coll/self/coll_self_allgather.c b/ompi/mca/coll/self/coll_self_allgather.c index 714baee0c55..25028bc6817 100644 --- a/ompi/mca/coll/self/coll_self_allgather.c +++ b/ompi/mca/coll/self/coll_self_allgather.c @@ -5,14 +5,16 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -30,9 +32,9 @@ * Accepts: - same as MPI_Allgather() * Returns: - MPI_SUCCESS, or error code */ -int mca_coll_self_allgather_intra(void *sbuf, int scount, - struct ompi_datatype_t *sdtype, void *rbuf, - int rcount, struct ompi_datatype_t *rdtype, +int mca_coll_self_allgather_intra(const void *sbuf, int scount, + struct ompi_datatype_t *sdtype, void *rbuf, + int rcount, struct ompi_datatype_t *rdtype, struct ompi_communicator_t *comm, mca_coll_base_module_t *module) { diff --git a/ompi/mca/coll/self/coll_self_allgatherv.c b/ompi/mca/coll/self/coll_self_allgatherv.c index fb1368f1246..89c5403bdec 100644 --- a/ompi/mca/coll/self/coll_self_allgatherv.c +++ b/ompi/mca/coll/self/coll_self_allgatherv.c @@ -5,14 +5,16 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -30,17 +32,17 @@ * Accepts: - same as MPI_Allgatherv() * Returns: - MPI_SUCCESS or error code */ -int mca_coll_self_allgatherv_intra(void *sbuf, int scount, - struct ompi_datatype_t *sdtype, - void * rbuf, int *rcounts, int *disps, - struct ompi_datatype_t *rdtype, +int mca_coll_self_allgatherv_intra(const void *sbuf, int scount, + struct ompi_datatype_t *sdtype, + void * rbuf, const int *rcounts, const int *disps, + struct ompi_datatype_t *rdtype, struct ompi_communicator_t *comm, mca_coll_base_module_t *module) { if (MPI_IN_PLACE == sbuf) { return MPI_SUCCESS; } else { - int err; + int err; ptrdiff_t lb, extent; err = ompi_datatype_get_extent(rdtype, &lb, &extent); if (OMPI_SUCCESS != err) { diff --git a/ompi/mca/coll/self/coll_self_allreduce.c b/ompi/mca/coll/self/coll_self_allreduce.c index f34d1b95c87..cfcde182953 100644 --- a/ompi/mca/coll/self/coll_self_allreduce.c +++ b/ompi/mca/coll/self/coll_self_allreduce.c @@ -5,14 +5,16 @@ * Copyright (c) 2004-2006 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -30,8 +32,8 @@ * Accepts: - same as MPI_Allreduce() * Returns: - MPI_SUCCESS or error code */ -int mca_coll_self_allreduce_intra(void *sbuf, void *rbuf, int count, - struct ompi_datatype_t *dtype, +int mca_coll_self_allreduce_intra(const void *sbuf, void *rbuf, int count, + struct ompi_datatype_t *dtype, struct ompi_op_t *op, struct ompi_communicator_t *comm, mca_coll_base_module_t *module) diff --git a/ompi/mca/coll/self/coll_self_alltoall.c b/ompi/mca/coll/self/coll_self_alltoall.c index 226b81ef471..56a24a5c72d 100644 --- a/ompi/mca/coll/self/coll_self_alltoall.c +++ b/ompi/mca/coll/self/coll_self_alltoall.c @@ -5,15 +5,17 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2013 FUJITSU LIMITED. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -27,13 +29,13 @@ /* * alltoall_intra * - * Function: - MPI_Alltoall + * Function: - MPI_Alltoall * Accepts: - same as MPI_Alltoall() * Returns: - MPI_SUCCESS or an MPI error code */ -int mca_coll_self_alltoall_intra(void *sbuf, int scount, - struct ompi_datatype_t *sdtype, - void *rbuf, int rcount, +int mca_coll_self_alltoall_intra(const void *sbuf, int scount, + struct ompi_datatype_t *sdtype, + void *rbuf, int rcount, struct ompi_datatype_t *rdtype, struct ompi_communicator_t *comm, mca_coll_base_module_t *module) diff --git a/ompi/mca/coll/self/coll_self_alltoallv.c b/ompi/mca/coll/self/coll_self_alltoallv.c index a02254c225a..7d34daa1645 100644 --- a/ompi/mca/coll/self/coll_self_alltoallv.c +++ b/ompi/mca/coll/self/coll_self_alltoallv.c @@ -5,15 +5,17 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2013 FUJITSU LIMITED. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -32,14 +34,14 @@ * Returns: - MPI_SUCCESS or an MPI error code */ int -mca_coll_self_alltoallv_intra(void *sbuf, int *scounts, int *sdisps, +mca_coll_self_alltoallv_intra(const void *sbuf, const int *scounts, const int *sdisps, struct ompi_datatype_t *sdtype, - void *rbuf, int *rcounts, int *rdisps, - struct ompi_datatype_t *rdtype, + void *rbuf, const int *rcounts, const int *rdisps, + struct ompi_datatype_t *rdtype, struct ompi_communicator_t *comm, mca_coll_base_module_t *module) { - int err; + int err; ptrdiff_t lb, rextent, sextent; if (MPI_IN_PLACE == sbuf) { @@ -54,8 +56,8 @@ mca_coll_self_alltoallv_intra(void *sbuf, int *scounts, int *sdisps, if (OMPI_SUCCESS != err) { return OMPI_ERROR; } - return ompi_datatype_sndrcv(((char *) sbuf) + sdisps[0] * sextent, + return ompi_datatype_sndrcv(((char *) sbuf) + sdisps[0] * sextent, scounts[0], sdtype, - ((char *) rbuf) + rdisps[0] * rextent, + ((char *) rbuf) + rdisps[0] * rextent, rcounts[0], rdtype); } diff --git a/ompi/mca/coll/self/coll_self_alltoallw.c b/ompi/mca/coll/self/coll_self_alltoallw.c index 5053f5f4149..1cf6dc1217e 100644 --- a/ompi/mca/coll/self/coll_self_alltoallw.c +++ b/ompi/mca/coll/self/coll_self_alltoallw.c @@ -5,15 +5,17 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2013 FUJITSU LIMITED. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -31,14 +33,14 @@ * Accepts: - same as MPI_Alltoallw() * Returns: - MPI_SUCCESS or an MPI error code */ -int mca_coll_self_alltoallw_intra(void *sbuf, int *scounts, int *sdisps, - struct ompi_datatype_t **sdtypes, - void *rbuf, int *rcounts, int *rdisps, - struct ompi_datatype_t **rdtypes, +int mca_coll_self_alltoallw_intra(const void *sbuf, const int *scounts, const int *sdisps, + struct ompi_datatype_t * const *sdtypes, + void *rbuf, const int *rcounts, const int *rdisps, + struct ompi_datatype_t * const *rdtypes, struct ompi_communicator_t *comm, mca_coll_base_module_t *module) { - int err; + int err; ptrdiff_t lb, rextent, sextent; if (MPI_IN_PLACE == sbuf) { @@ -54,8 +56,8 @@ int mca_coll_self_alltoallw_intra(void *sbuf, int *scounts, int *sdisps, return OMPI_ERROR; } - return ompi_datatype_sndrcv(((char *) sbuf) + sdisps[0] * sextent, + return ompi_datatype_sndrcv(((char *) sbuf) + sdisps[0] * sextent, scounts[0], sdtypes[0], - ((char *) rbuf) + rdisps[0] * rextent, + ((char *) rbuf) + rdisps[0] * rextent, rcounts[0], rdtypes[0]); } diff --git a/ompi/mca/coll/self/coll_self_barrier.c b/ompi/mca/coll/self/coll_self_barrier.c index d8371713e7a..b4ed5cbdbf3 100644 --- a/ompi/mca/coll/self/coll_self_barrier.c +++ b/ompi/mca/coll/self/coll_self_barrier.c @@ -5,14 +5,14 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ diff --git a/ompi/mca/coll/self/coll_self_bcast.c b/ompi/mca/coll/self/coll_self_bcast.c index 5121f749b23..d6813ee3fc5 100644 --- a/ompi/mca/coll/self/coll_self_bcast.c +++ b/ompi/mca/coll/self/coll_self_bcast.c @@ -5,14 +5,14 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ diff --git a/ompi/mca/coll/self/coll_self_component.c b/ompi/mca/coll/self/coll_self_component.c index 8a7a4b89b9e..2dc66932981 100644 --- a/ompi/mca/coll/self/coll_self_component.c +++ b/ompi/mca/coll/self/coll_self_component.c @@ -6,7 +6,7 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. @@ -14,9 +14,9 @@ * Copyright (c) 2015 Los Alamos National Security, LLC. All rights * reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ * * These symbols are in a file by themselves to provide nice linker diff --git a/ompi/mca/coll/self/coll_self_exscan.c b/ompi/mca/coll/self/coll_self_exscan.c index 353d1490b44..5b0b2fac134 100644 --- a/ompi/mca/coll/self/coll_self_exscan.c +++ b/ompi/mca/coll/self/coll_self_exscan.c @@ -5,14 +5,16 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -29,9 +31,9 @@ * Accepts: - same arguments as MPI_Exccan() * Returns: - MPI_SUCCESS */ -int mca_coll_self_exscan_intra(void *sbuf, void *rbuf, int count, - struct ompi_datatype_t *dtype, - struct ompi_op_t *op, +int mca_coll_self_exscan_intra(const void *sbuf, void *rbuf, int count, + struct ompi_datatype_t *dtype, + struct ompi_op_t *op, struct ompi_communicator_t *comm, mca_coll_base_module_t *module) { diff --git a/ompi/mca/coll/self/coll_self_gather.c b/ompi/mca/coll/self/coll_self_gather.c index 794ac8fa54d..57cf69dd6f6 100644 --- a/ompi/mca/coll/self/coll_self_gather.c +++ b/ompi/mca/coll/self/coll_self_gather.c @@ -5,14 +5,16 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -26,14 +28,14 @@ /* * gather_intra * - * Function: - gather + * Function: - gather * Accepts: - same arguments as MPI_Gather() * Returns: - MPI_SUCCESS or error code */ -int mca_coll_self_gather_intra(void *sbuf, int scount, - struct ompi_datatype_t *sdtype, - void *rbuf, int rcount, - struct ompi_datatype_t *rdtype, +int mca_coll_self_gather_intra(const void *sbuf, int scount, + struct ompi_datatype_t *sdtype, + void *rbuf, int rcount, + struct ompi_datatype_t *rdtype, int root, struct ompi_communicator_t *comm, mca_coll_base_module_t *module) { diff --git a/ompi/mca/coll/self/coll_self_gatherv.c b/ompi/mca/coll/self/coll_self_gatherv.c index 795c3ad3c62..136b4652ae3 100644 --- a/ompi/mca/coll/self/coll_self_gatherv.c +++ b/ompi/mca/coll/self/coll_self_gatherv.c @@ -5,14 +5,16 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -30,24 +32,24 @@ * Accepts: - same arguments as MPI_Gatherv() * Returns: - MPI_SUCCESS or error code */ -int mca_coll_self_gatherv_intra(void *sbuf, int scount, +int mca_coll_self_gatherv_intra(const void *sbuf, int scount, struct ompi_datatype_t *sdtype, - void *rbuf, int *rcounts, int *disps, + void *rbuf, const int *rcounts, const int *disps, struct ompi_datatype_t *rdtype, int root, struct ompi_communicator_t *comm, mca_coll_base_module_t *module) { if (MPI_IN_PLACE == sbuf) { return MPI_SUCCESS; - } else { - int err; + } else { + int err; ptrdiff_t lb, extent; err = ompi_datatype_get_extent(rdtype, &lb, &extent); if (OMPI_SUCCESS != err) { return OMPI_ERROR; } return ompi_datatype_sndrcv(sbuf, scount, sdtype, - ((char *) rbuf) + disps[0]*extent, + ((char *) rbuf) + disps[0]*extent, rcounts[0], rdtype); } } diff --git a/ompi/mca/coll/self/coll_self_module.c b/ompi/mca/coll/self/coll_self_module.c index cfbead87727..205c6451571 100644 --- a/ompi/mca/coll/self/coll_self_module.c +++ b/ompi/mca/coll/self/coll_self_module.c @@ -5,14 +5,15 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. + * Copyright (c) 2015 Los Alamos National Security, LLC. All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -36,7 +37,7 @@ int mca_coll_self_init_query(bool enable_progress_threads, bool enable_mpi_threads) { /* Nothing to do */ - + return OMPI_SUCCESS; } @@ -47,7 +48,7 @@ int mca_coll_self_init_query(bool enable_progress_threads, * priority we want to return. */ mca_coll_base_module_t * -mca_coll_self_comm_query(struct ompi_communicator_t *comm, +mca_coll_self_comm_query(struct ompi_communicator_t *comm, int *priority) { mca_coll_self_module_t *module; @@ -61,7 +62,7 @@ mca_coll_self_comm_query(struct ompi_communicator_t *comm, if (NULL == module) return NULL; module->super.coll_module_enable = mca_coll_self_module_enable; - module->super.ft_event = mca_coll_self_ft_event; + module->super.ft_event = NULL; module->super.coll_allgather = mca_coll_self_allgather_intra; module->super.coll_allgatherv = mca_coll_self_allgatherv_intra; module->super.coll_allreduce = mca_coll_self_allreduce_intra; @@ -96,23 +97,3 @@ mca_coll_self_module_enable(mca_coll_base_module_t *module, return OMPI_SUCCESS; } - -int mca_coll_self_ft_event(int state) { - if(OPAL_CRS_CHECKPOINT == state) { - ; - } - else if(OPAL_CRS_CONTINUE == state) { - ; - } - else if(OPAL_CRS_RESTART == state) { - ; - } - else if(OPAL_CRS_TERM == state ) { - ; - } - else { - ; - } - - return OMPI_SUCCESS; -} diff --git a/ompi/mca/coll/self/coll_self_reduce.c b/ompi/mca/coll/self/coll_self_reduce.c index 5446835b039..5fd3f4b6f60 100644 --- a/ompi/mca/coll/self/coll_self_reduce.c +++ b/ompi/mca/coll/self/coll_self_reduce.c @@ -5,14 +5,16 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -26,12 +28,12 @@ /* * reduce_lin_intra * - * Function: - reduction + * Function: - reduction * Accepts: - same as MPI_Reduce() * Returns: - MPI_SUCCESS or error code */ -int mca_coll_self_reduce_intra(void *sbuf, void *rbuf, int count, - struct ompi_datatype_t *dtype, +int mca_coll_self_reduce_intra(const void *sbuf, void *rbuf, int count, + struct ompi_datatype_t *dtype, struct ompi_op_t *op, int root, struct ompi_communicator_t *comm, mca_coll_base_module_t *module) diff --git a/ompi/mca/coll/self/coll_self_reduce_scatter.c b/ompi/mca/coll/self/coll_self_reduce_scatter.c index 2ca8afdf6e0..92006cbf463 100644 --- a/ompi/mca/coll/self/coll_self_reduce_scatter.c +++ b/ompi/mca/coll/self/coll_self_reduce_scatter.c @@ -5,14 +5,16 @@ * Copyright (c) 2004-2006 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -30,7 +32,7 @@ * Accepts: - same as MPI_Reduce_scatter() * Returns: - MPI_SUCCESS or error code */ -int mca_coll_self_reduce_scatter_intra(void *sbuf, void *rbuf, int *rcounts, +int mca_coll_self_reduce_scatter_intra(const void *sbuf, void *rbuf, const int *rcounts, struct ompi_datatype_t *dtype, struct ompi_op_t *op, struct ompi_communicator_t *comm, diff --git a/ompi/mca/coll/self/coll_self_scan.c b/ompi/mca/coll/self/coll_self_scan.c index 34a625cce25..14bd1cbf3f2 100644 --- a/ompi/mca/coll/self/coll_self_scan.c +++ b/ompi/mca/coll/self/coll_self_scan.c @@ -5,14 +5,16 @@ * Copyright (c) 2004-2006 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -30,9 +32,9 @@ * Accepts: - same arguments as MPI_Scan() * Returns: - MPI_SUCCESS or error code */ -int mca_coll_self_scan_intra(void *sbuf, void *rbuf, int count, - struct ompi_datatype_t *dtype, - struct ompi_op_t *op, +int mca_coll_self_scan_intra(const void *sbuf, void *rbuf, int count, + struct ompi_datatype_t *dtype, + struct ompi_op_t *op, struct ompi_communicator_t *comm, mca_coll_base_module_t *module) { diff --git a/ompi/mca/coll/self/coll_self_scatter.c b/ompi/mca/coll/self/coll_self_scatter.c index 74e60220876..179a7062f76 100644 --- a/ompi/mca/coll/self/coll_self_scatter.c +++ b/ompi/mca/coll/self/coll_self_scatter.c @@ -5,14 +5,16 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -30,11 +32,11 @@ * Accepts: - same arguments as MPI_Scatter() * Returns: - MPI_SUCCESS or error code */ -int mca_coll_self_scatter_intra(void *sbuf, int scount, +int mca_coll_self_scatter_intra(const void *sbuf, int scount, struct ompi_datatype_t *sdtype, - void *rbuf, int rcount, + void *rbuf, int rcount, struct ompi_datatype_t *rdtype, - int root, + int root, struct ompi_communicator_t *comm, mca_coll_base_module_t *module) { diff --git a/ompi/mca/coll/self/coll_self_scatterv.c b/ompi/mca/coll/self/coll_self_scatterv.c index e817d2f41c2..e7b0b5badea 100644 --- a/ompi/mca/coll/self/coll_self_scatterv.c +++ b/ompi/mca/coll/self/coll_self_scatterv.c @@ -5,14 +5,16 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -30,8 +32,8 @@ * Accepts: - same arguments as MPI_Scatter() * Returns: - MPI_SUCCESS or error code */ -int mca_coll_self_scatterv_intra(void *sbuf, int *scounts, - int *disps, struct ompi_datatype_t *sdtype, +int mca_coll_self_scatterv_intra(const void *sbuf, const int *scounts, + const int *disps, struct ompi_datatype_t *sdtype, void *rbuf, int rcount, struct ompi_datatype_t *rdtype, int root, struct ompi_communicator_t *comm, @@ -40,13 +42,13 @@ int mca_coll_self_scatterv_intra(void *sbuf, int *scounts, if (MPI_IN_PLACE == rbuf) { return MPI_SUCCESS; } else { - int err; + int err; ptrdiff_t lb, extent; err = ompi_datatype_get_extent(sdtype, &lb, &extent); if (OMPI_SUCCESS != err) { return OMPI_ERROR; } - return ompi_datatype_sndrcv(((char *) sbuf) + disps[0]*extent, scounts[0], + return ompi_datatype_sndrcv(((char *) sbuf) + disps[0]*extent, scounts[0], sdtype, rbuf, rcount, rdtype); } } diff --git a/ompi/mca/coll/sm/Makefile.am b/ompi/mca/coll/sm/Makefile.am index c7214f5d2e4..47a6582d16c 100644 --- a/ompi/mca/coll/sm/Makefile.am +++ b/ompi/mca/coll/sm/Makefile.am @@ -5,15 +5,15 @@ # Copyright (c) 2004-2005 The University of Tennessee and The University # of Tennessee Research Foundation. All rights # reserved. -# Copyright (c) 2004-2009 High Performance Computing Center Stuttgart, +# Copyright (c) 2004-2009 High Performance Computing Center Stuttgart, # University of Stuttgart. All rights reserved. # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. # Copyright (c) 2009-2014 Cisco Systems, Inc. All rights reserved. # $COPYRIGHT$ -# +# # Additional copyrights may follow -# +# # $HEADER$ # diff --git a/ompi/mca/coll/sm/coll_sm.h b/ompi/mca/coll/sm/coll_sm.h index 520875d3eee..baaa510ed19 100644 --- a/ompi/mca/coll/sm/coll_sm.h +++ b/ompi/mca/coll/sm/coll_sm.h @@ -1,19 +1,21 @@ -/* +/* * Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. * Copyright (c) 2004-2014 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2008-2009 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ /** @file */ @@ -43,13 +45,13 @@ BEGIN_C_DECLS } \ opal_progress(); \ } while (1); \ - exit_label: + exit_label: - /** + /** * Structure to hold the sm coll component. First it holds the * base coll component, and then holds a bunch of * sm-coll-component-specific stuff (e.g., current MCA param - * values). + * values). */ typedef struct mca_coll_sm_component_t { /** Base coll component */ @@ -195,7 +197,7 @@ BEGIN_C_DECLS mca_coll_base_module_t *previous_reduce_module; } mca_coll_sm_module_t; OBJ_CLASS_DECLARATION(mca_coll_sm_module_t); - + /** * Global component instance */ @@ -209,113 +211,113 @@ BEGIN_C_DECLS mca_coll_base_module_t * mca_coll_sm_comm_query(struct ompi_communicator_t *comm, int *priority); - + /* Lazily enable a module (since it involves expensive/slow mmap allocation, etc.) */ int ompi_coll_sm_lazy_enable(mca_coll_base_module_t *module, struct ompi_communicator_t *comm); - int mca_coll_sm_allgather_intra(void *sbuf, int scount, - struct ompi_datatype_t *sdtype, - void *rbuf, int rcount, - struct ompi_datatype_t *rdtype, + int mca_coll_sm_allgather_intra(const void *sbuf, int scount, + struct ompi_datatype_t *sdtype, + void *rbuf, int rcount, + struct ompi_datatype_t *rdtype, struct ompi_communicator_t *comm, mca_coll_base_module_t *module); - int mca_coll_sm_allgatherv_intra(void *sbuf, int scount, - struct ompi_datatype_t *sdtype, - void * rbuf, int *rcounts, int *disps, - struct ompi_datatype_t *rdtype, + int mca_coll_sm_allgatherv_intra(const void *sbuf, int scount, + struct ompi_datatype_t *sdtype, + void * rbuf, const int *rcounts, const int *disps, + struct ompi_datatype_t *rdtype, struct ompi_communicator_t *comm, mca_coll_base_module_t *module); - int mca_coll_sm_allreduce_intra(void *sbuf, void *rbuf, int count, - struct ompi_datatype_t *dtype, - struct ompi_op_t *op, + int mca_coll_sm_allreduce_intra(const void *sbuf, void *rbuf, int count, + struct ompi_datatype_t *dtype, + struct ompi_op_t *op, struct ompi_communicator_t *comm, mca_coll_base_module_t *module); - int mca_coll_sm_alltoall_intra(void *sbuf, int scount, - struct ompi_datatype_t *sdtype, - void* rbuf, int rcount, - struct ompi_datatype_t *rdtype, + int mca_coll_sm_alltoall_intra(const void *sbuf, int scount, + struct ompi_datatype_t *sdtype, + void* rbuf, int rcount, + struct ompi_datatype_t *rdtype, struct ompi_communicator_t *comm, mca_coll_base_module_t *module); - int mca_coll_sm_alltoallv_intra(void *sbuf, int *scounts, int *sdisps, - struct ompi_datatype_t *sdtype, - void *rbuf, int *rcounts, int *rdisps, - struct ompi_datatype_t *rdtype, + int mca_coll_sm_alltoallv_intra(const void *sbuf, const int *scounts, const int *sdisps, + struct ompi_datatype_t *sdtype, + void *rbuf, const int *rcounts, const int *rdisps, + struct ompi_datatype_t *rdtype, struct ompi_communicator_t *comm, mca_coll_base_module_t *module); - int mca_coll_sm_alltoallw_intra(void *sbuf, int *scounts, int *sdisps, - struct ompi_datatype_t **sdtypes, - void *rbuf, int *rcounts, int *rdisps, - struct ompi_datatype_t **rdtypes, + int mca_coll_sm_alltoallw_intra(const void *sbuf, const int *scounts, const int *sdisps, + struct ompi_datatype_t * const *sdtypes, + void *rbuf, const int *rcounts, const int *rdisps, + struct ompi_datatype_t * const *rdtypes, struct ompi_communicator_t *comm, mca_coll_base_module_t *module); int mca_coll_sm_barrier_intra(struct ompi_communicator_t *comm, mca_coll_base_module_t *module); - int mca_coll_sm_bcast_intra(void *buff, int count, + int mca_coll_sm_bcast_intra(void *buff, int count, struct ompi_datatype_t *datatype, - int root, + int root, struct ompi_communicator_t *comm, mca_coll_base_module_t *module); - int mca_coll_sm_bcast_log_intra(void *buff, int count, - struct ompi_datatype_t *datatype, - int root, - struct ompi_communicator_t *comm, + int mca_coll_sm_bcast_log_intra(void *buff, int count, + struct ompi_datatype_t *datatype, + int root, + struct ompi_communicator_t *comm, mca_coll_base_module_t *module); - int mca_coll_sm_exscan_intra(void *sbuf, void *rbuf, int count, - struct ompi_datatype_t *dtype, - struct ompi_op_t *op, - struct ompi_communicator_t *comm, + int mca_coll_sm_exscan_intra(const void *sbuf, void *rbuf, int count, + struct ompi_datatype_t *dtype, + struct ompi_op_t *op, + struct ompi_communicator_t *comm, mca_coll_base_module_t *module); - int mca_coll_sm_gather_intra(void *sbuf, int scount, - struct ompi_datatype_t *sdtype, void *rbuf, - int rcount, struct ompi_datatype_t *rdtype, + int mca_coll_sm_gather_intra(void *sbuf, int scount, + struct ompi_datatype_t *sdtype, void *rbuf, + int rcount, struct ompi_datatype_t *rdtype, int root, struct ompi_communicator_t *comm, mca_coll_base_module_t *module); - int mca_coll_sm_gatherv_intra(void *sbuf, int scount, - struct ompi_datatype_t *sdtype, void *rbuf, - int *rcounts, int *disps, - struct ompi_datatype_t *rdtype, int root, + int mca_coll_sm_gatherv_intra(void *sbuf, int scount, + struct ompi_datatype_t *sdtype, void *rbuf, + int *rcounts, int *disps, + struct ompi_datatype_t *rdtype, int root, struct ompi_communicator_t *comm, mca_coll_base_module_t *module); - int mca_coll_sm_reduce_intra(void *sbuf, void* rbuf, int count, - struct ompi_datatype_t *dtype, - struct ompi_op_t *op, + int mca_coll_sm_reduce_intra(const void *sbuf, void* rbuf, int count, + struct ompi_datatype_t *dtype, + struct ompi_op_t *op, int root, struct ompi_communicator_t *comm, mca_coll_base_module_t *module); - int mca_coll_sm_reduce_log_intra(void *sbuf, void* rbuf, int count, - struct ompi_datatype_t *dtype, - struct ompi_op_t *op, - int root, + int mca_coll_sm_reduce_log_intra(const void *sbuf, void* rbuf, int count, + struct ompi_datatype_t *dtype, + struct ompi_op_t *op, + int root, struct ompi_communicator_t *comm, mca_coll_base_module_t *module); - int mca_coll_sm_reduce_scatter_intra(void *sbuf, void *rbuf, - int *rcounts, - struct ompi_datatype_t *dtype, - struct ompi_op_t *op, + int mca_coll_sm_reduce_scatter_intra(const void *sbuf, void *rbuf, + int *rcounts, + struct ompi_datatype_t *dtype, + struct ompi_op_t *op, struct ompi_communicator_t *comm, mca_coll_base_module_t *module); - int mca_coll_sm_scan_intra(void *sbuf, void *rbuf, int count, - struct ompi_datatype_t *dtype, - struct ompi_op_t *op, + int mca_coll_sm_scan_intra(const void *sbuf, void *rbuf, int count, + struct ompi_datatype_t *dtype, + struct ompi_op_t *op, struct ompi_communicator_t *comm, mca_coll_base_module_t *module); - int mca_coll_sm_scatter_intra(void *sbuf, int scount, - struct ompi_datatype_t *sdtype, void *rbuf, - int rcount, struct ompi_datatype_t *rdtype, + int mca_coll_sm_scatter_intra(const void *sbuf, int scount, + struct ompi_datatype_t *sdtype, void *rbuf, + int rcount, struct ompi_datatype_t *rdtype, int root, struct ompi_communicator_t *comm, mca_coll_base_module_t *module); - int mca_coll_sm_scatterv_intra(void *sbuf, int *scounts, int *disps, - struct ompi_datatype_t *sdtype, - void* rbuf, int rcount, - struct ompi_datatype_t *rdtype, int root, + int mca_coll_sm_scatterv_intra(const void *sbuf, const int *scounts, const int *disps, + struct ompi_datatype_t *sdtype, + void* rbuf, int rcount, + struct ompi_datatype_t *rdtype, int root, struct ompi_communicator_t *comm, mca_coll_base_module_t *module); int mca_coll_sm_ft_event(int state); - + /** * Global variables used in the macros (essentially constants, so * these are thread safe) @@ -392,7 +394,7 @@ extern uint32_t mca_coll_sm_one; mca_coll_sm_component.sm_fragment_size)), \ (len)) -/** +/** * Macro to tell children that a segment is ready (normalize * the child's ID based on the shift used to calculate the "me" node * in the tree). Used in fan out opertations. diff --git a/ompi/mca/coll/sm/coll_sm_allgather.c b/ompi/mca/coll/sm/coll_sm_allgather.c index ee29213bc72..56d0a2ed8e2 100644 --- a/ompi/mca/coll/sm/coll_sm_allgather.c +++ b/ompi/mca/coll/sm/coll_sm_allgather.c @@ -5,14 +5,16 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -29,7 +31,7 @@ * Accepts: - same as MPI_Allgather() * Returns: - MPI_SUCCESS or error code */ -int mca_coll_sm_allgather_intra(void *sbuf, int scount, +int mca_coll_sm_allgather_intra(const void *sbuf, int scount, struct ompi_datatype_t *sdtype, void *rbuf, int rcount, struct ompi_datatype_t *rdtype, struct ompi_communicator_t *comm, diff --git a/ompi/mca/coll/sm/coll_sm_allgatherv.c b/ompi/mca/coll/sm/coll_sm_allgatherv.c index 34a6ef91a51..4f14374a87a 100644 --- a/ompi/mca/coll/sm/coll_sm_allgatherv.c +++ b/ompi/mca/coll/sm/coll_sm_allgatherv.c @@ -5,14 +5,16 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -29,10 +31,10 @@ * Accepts: - same as MPI_Allgatherv() * Returns: - MPI_SUCCESS or error code */ -int mca_coll_sm_allgatherv_intra(void *sbuf, int scount, - struct ompi_datatype_t *sdtype, - void * rbuf, int *rcounts, int *disps, - struct ompi_datatype_t *rdtype, +int mca_coll_sm_allgatherv_intra(const void *sbuf, int scount, + struct ompi_datatype_t *sdtype, + void * rbuf, const int *rcounts, const int *disps, + struct ompi_datatype_t *rdtype, struct ompi_communicator_t *comm, mca_coll_base_module_t *module) { diff --git a/ompi/mca/coll/sm/coll_sm_allreduce.c b/ompi/mca/coll/sm/coll_sm_allreduce.c index 8c253a411c3..190e16849c5 100644 --- a/ompi/mca/coll/sm/coll_sm_allreduce.c +++ b/ompi/mca/coll/sm/coll_sm_allreduce.c @@ -5,15 +5,17 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2009 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ /** @file */ @@ -31,8 +33,8 @@ * For the moment, all we're doing is a reduce to root==0 and then a * broadcast. It is possible that we'll do something better someday. */ -int mca_coll_sm_allreduce_intra(void *sbuf, void *rbuf, int count, - struct ompi_datatype_t *dtype, +int mca_coll_sm_allreduce_intra(const void *sbuf, void *rbuf, int count, + struct ompi_datatype_t *dtype, struct ompi_op_t *op, struct ompi_communicator_t *comm, mca_coll_base_module_t *module) @@ -45,14 +47,14 @@ int mca_coll_sm_allreduce_intra(void *sbuf, void *rbuf, int count, if (MPI_IN_PLACE == sbuf) { int rank = ompi_comm_rank(comm); if (0 == rank) { - ret = mca_coll_sm_reduce_intra(sbuf, rbuf, count, dtype, op, 0, + ret = mca_coll_sm_reduce_intra(sbuf, rbuf, count, dtype, op, 0, comm, module); } else { - ret = mca_coll_sm_reduce_intra(rbuf, NULL, count, dtype, op, 0, + ret = mca_coll_sm_reduce_intra(rbuf, NULL, count, dtype, op, 0, comm, module); } } else { - ret = mca_coll_sm_reduce_intra(sbuf, rbuf, count, dtype, op, 0, + ret = mca_coll_sm_reduce_intra(sbuf, rbuf, count, dtype, op, 0, comm, module); } return (ret == OMPI_SUCCESS) ? diff --git a/ompi/mca/coll/sm/coll_sm_alltoall.c b/ompi/mca/coll/sm/coll_sm_alltoall.c index 2a807cb1e4c..1fe9d38efc5 100644 --- a/ompi/mca/coll/sm/coll_sm_alltoall.c +++ b/ompi/mca/coll/sm/coll_sm_alltoall.c @@ -5,14 +5,16 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -25,11 +27,11 @@ /* * alltoall_intra * - * Function: - MPI_Alltoall + * Function: - MPI_Alltoall * Accepts: - same as MPI_Alltoall() * Returns: - MPI_SUCCESS or an MPI error code */ -int mca_coll_sm_alltoall_intra(void *sbuf, int scount, +int mca_coll_sm_alltoall_intra(const void *sbuf, int scount, struct ompi_datatype_t *sdtype, void *rbuf, int rcount, struct ompi_datatype_t *rdtype, struct ompi_communicator_t *comm, diff --git a/ompi/mca/coll/sm/coll_sm_alltoallv.c b/ompi/mca/coll/sm/coll_sm_alltoallv.c index b28ca4a90f9..d5ae55b7ed5 100644 --- a/ompi/mca/coll/sm/coll_sm_alltoallv.c +++ b/ompi/mca/coll/sm/coll_sm_alltoallv.c @@ -5,14 +5,16 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -29,10 +31,10 @@ * Accepts: - same as MPI_Alltoallv() * Returns: - MPI_SUCCESS or an MPI error code */ -int mca_coll_sm_alltoallv_intra(void *sbuf, int *scounts, int *sdisps, +int mca_coll_sm_alltoallv_intra(const void *sbuf, const int *scounts, const int *sdisps, struct ompi_datatype_t *sdtype, - void *rbuf, int *rcounts, int *rdisps, - struct ompi_datatype_t *rdtype, + void *rbuf, const int *rcounts, const int *rdisps, + struct ompi_datatype_t *rdtype, struct ompi_communicator_t *comm, mca_coll_base_module_t *module) { diff --git a/ompi/mca/coll/sm/coll_sm_alltoallw.c b/ompi/mca/coll/sm/coll_sm_alltoallw.c index 8f8bbdded6c..977f2b869c0 100644 --- a/ompi/mca/coll/sm/coll_sm_alltoallw.c +++ b/ompi/mca/coll/sm/coll_sm_alltoallw.c @@ -5,14 +5,16 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -29,10 +31,10 @@ * Accepts: - same as MPI_Alltoallw() * Returns: - MPI_SUCCESS or an MPI error code */ -int mca_coll_sm_alltoallw_intra(void *sbuf, int *scounts, int *sdisps, - struct ompi_datatype_t **sdtypes, - void *rbuf, int *rcounts, int *rdisps, - struct ompi_datatype_t **rdtypes, +int mca_coll_sm_alltoallw_intra(const void *sbuf, const int *scounts, const int *sdisps, + struct ompi_datatype_t * const *sdtypes, + void *rbuf, const int *rcounts, const int *rdisps, + struct ompi_datatype_t * const *rdtypes, struct ompi_communicator_t *comm, mca_coll_base_module_t *module) { diff --git a/ompi/mca/coll/sm/coll_sm_barrier.c b/ompi/mca/coll/sm/coll_sm_barrier.c index 77aa6cd5c19..a3000b7d847 100644 --- a/ompi/mca/coll/sm/coll_sm_barrier.c +++ b/ompi/mca/coll/sm/coll_sm_barrier.c @@ -5,15 +5,15 @@ * Copyright (c) 2004-2014 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2015 Cisco Systems, Inc. All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ /** @file */ @@ -69,7 +69,7 @@ int mca_coll_sm_barrier_intra(struct ompi_communicator_t *comm, } } - uint_control_size = + uint_control_size = mca_coll_sm_component.sm_control_size / sizeof(uint32_t); data = sm_module->sm_comm_data; rank = ompi_comm_rank(comm); @@ -83,7 +83,7 @@ int mca_coll_sm_barrier_intra(struct ompi_communicator_t *comm, if (0 != num_children) { /* Get children *out* buffer */ - children = data->mcb_barrier_control_children + buffer_set + + children = data->mcb_barrier_control_children + buffer_set + uint_control_size; SPIN_CONDITION(*me_in == num_children, exit_label1); *me_in = 0; diff --git a/ompi/mca/coll/sm/coll_sm_bcast.c b/ompi/mca/coll/sm/coll_sm_bcast.c index 7817481045b..754581ea160 100644 --- a/ompi/mca/coll/sm/coll_sm_bcast.c +++ b/ompi/mca/coll/sm/coll_sm_bcast.c @@ -5,23 +5,21 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ /** @file */ #include "ompi_config.h" -#ifdef HAVE_STRING_H #include -#endif #include "opal/datatype/opal_convertor.h" #include "ompi/constants.h" @@ -55,8 +53,8 @@ * have children, they copy the data directly from the parent's shared * data segment into the user's output buffer. */ -int mca_coll_sm_bcast_intra(void *buff, int count, - struct ompi_datatype_t *datatype, int root, +int mca_coll_sm_bcast_intra(void *buff, int count, + struct ompi_datatype_t *datatype, int root, struct ompi_communicator_t *comm, mca_coll_base_module_t *module) { @@ -109,11 +107,11 @@ int mca_coll_sm_bcast_intra(void *buff, int count, /* The root needs a send convertor to pack from the user's buffer to shared memory */ - if (OMPI_SUCCESS != - (ret = + if (OMPI_SUCCESS != + (ret = opal_convertor_copy_and_prepare_for_send(ompi_mpi_local_convertor, &(datatype->super), - count, + count, buff, 0, &convertor))) { @@ -124,7 +122,7 @@ int mca_coll_sm_bcast_intra(void *buff, int count, /* Main loop over sending fragments */ do { - flag_num = (data->mcb_operation_count++ % + flag_num = (data->mcb_operation_count++ % mca_coll_sm_component.sm_comm_num_in_use_flags); FLAG_SETUP(flag_num, flag, data); @@ -132,10 +130,10 @@ int mca_coll_sm_bcast_intra(void *buff, int count, FLAG_RETAIN(flag, size - 1, data->mcb_operation_count - 1); /* Loop over all the segments in this set */ - - segment_num = + + segment_num = flag_num * mca_coll_sm_component.sm_segs_per_inuse_flag; - max_segment_num = + max_segment_num = (flag_num + 1) * mca_coll_sm_component.sm_segs_per_inuse_flag; do { index = &(data->mcb_data_index[segment_num]); @@ -145,10 +143,10 @@ int mca_coll_sm_bcast_intra(void *buff, int count, max_data = mca_coll_sm_component.sm_fragment_size; COPY_FRAGMENT_IN(convertor, index, rank, iov, max_data); bytes += max_data; - + /* Wait for the write to absolutely complete */ opal_atomic_wmb(); - + /* Tell my children that this fragment is ready */ PARENT_NOTIFY_CHILDREN(children, num_children, index, max_data); @@ -167,11 +165,11 @@ int mca_coll_sm_bcast_intra(void *buff, int count, /* Non-root processes need a receive convertor to unpack from shared mmory to the user's buffer */ - if (OMPI_SUCCESS != - (ret = + if (OMPI_SUCCESS != + (ret = opal_convertor_copy_and_prepare_for_recv(ompi_mpi_local_convertor, &(datatype->super), - count, + count, buff, 0, &convertor))) { @@ -181,9 +179,9 @@ int mca_coll_sm_bcast_intra(void *buff, int count, /* Loop over receiving (and possibly re-sending) the fragments */ - + do { - flag_num = (data->mcb_operation_count % + flag_num = (data->mcb_operation_count % mca_coll_sm_component.sm_comm_num_in_use_flags); /* Wait for the root to mark this set of segments as @@ -193,10 +191,10 @@ int mca_coll_sm_bcast_intra(void *buff, int count, ++data->mcb_operation_count; /* Loop over all the segments in this set */ - - segment_num = + + segment_num = flag_num * mca_coll_sm_component.sm_segs_per_inuse_flag; - max_segment_num = + max_segment_num = (flag_num + 1) * mca_coll_sm_component.sm_segs_per_inuse_flag; do { @@ -206,20 +204,20 @@ int mca_coll_sm_bcast_intra(void *buff, int count, /* Wait for my parent to tell me that the segment is ready */ CHILD_WAIT_FOR_NOTIFY(rank, index, max_data, bcast_nonroot_label2); - + /* If I have children, send the data to them */ if (num_children > 0) { /* Copy the fragment from the parent's portion in the segment to my portion in the segment. */ COPY_FRAGMENT_BETWEEN(parent_rank, rank, index, max_data); - + /* Wait for the write to absolutely complete */ opal_atomic_wmb(); - + /* Tell my children that this fragment is ready */ PARENT_NOTIFY_CHILDREN(children, num_children, index, max_data); - + /* Set the "copy from buffer" to be my local segment buffer so that we don't potentially incur a non-local memory copy from the parent's @@ -227,15 +225,15 @@ int mca_coll_sm_bcast_intra(void *buff, int count, the user's buffer */ src_rank = rank; } - + /* If I don't have any children, set the "copy from buffer" to be my parent's fan out segment to copy directly from my parent */ - + else { src_rank = parent_rank; } - + /* Copy to my output buffer */ COPY_FRAGMENT_OUT(convertor, src_rank, index, iov, max_data); diff --git a/ompi/mca/coll/sm/coll_sm_component.c b/ompi/mca/coll/sm/coll_sm_component.c index 2c068ed7b9a..d88f42470ce 100644 --- a/ompi/mca/coll/sm/coll_sm_component.c +++ b/ompi/mca/coll/sm/coll_sm_component.c @@ -6,7 +6,7 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. @@ -15,9 +15,9 @@ * All rights reserved. * Copyright (c) 2015 Intel, Inc. All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ /** @@ -62,7 +62,7 @@ mca_coll_sm_component_t mca_coll_sm_component = { { /* First, the mca_component_t struct containing meta information about the component itself */ - + .collm_version = { MCA_COLL_BASE_VERSION_2_0_0, @@ -132,7 +132,7 @@ static int sm_verify_mca_variables(void) mca_coll_sm_component_t *cs = &mca_coll_sm_component; if (0 != (cs->sm_fragment_size % cs->sm_control_size)) { - cs->sm_fragment_size += cs->sm_control_size - + cs->sm_fragment_size += cs->sm_control_size - (cs->sm_fragment_size % cs->sm_control_size); } @@ -144,20 +144,20 @@ static int sm_verify_mca_variables(void) cs->sm_comm_num_segments = cs->sm_comm_num_in_use_flags; } if (0 != (cs->sm_comm_num_segments % cs->sm_comm_num_in_use_flags)) { - cs->sm_comm_num_segments += cs->sm_comm_num_in_use_flags - + cs->sm_comm_num_segments += cs->sm_comm_num_in_use_flags - (cs->sm_comm_num_segments % cs->sm_comm_num_in_use_flags); } - cs->sm_segs_per_inuse_flag = + cs->sm_segs_per_inuse_flag = cs->sm_comm_num_segments / cs->sm_comm_num_in_use_flags; if (cs->sm_tree_degree > cs->sm_control_size) { - opal_show_help("help-mpi-coll-sm.txt", + opal_show_help("help-mpi-coll-sm.txt", "tree-degree-larger-than-control", true, cs->sm_tree_degree, cs->sm_control_size); cs->sm_tree_degree = cs->sm_control_size; } if (cs->sm_tree_degree > 255) { - opal_show_help("help-mpi-coll-sm.txt", + opal_show_help("help-mpi-coll-sm.txt", "tree-degree-larger-than-255", true, cs->sm_tree_degree); cs->sm_tree_degree = 255; diff --git a/ompi/mca/coll/sm/coll_sm_exscan.c b/ompi/mca/coll/sm/coll_sm_exscan.c index c17e1f402d1..be008f226f4 100644 --- a/ompi/mca/coll/sm/coll_sm_exscan.c +++ b/ompi/mca/coll/sm/coll_sm_exscan.c @@ -5,14 +5,16 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -29,9 +31,9 @@ * Accepts: - same arguments as MPI_Exscan() * Returns: - MPI_SUCCESS or error code */ -int mca_coll_sm_exscan_intra(void *sbuf, void *rbuf, int count, - struct ompi_datatype_t *dtype, - struct ompi_op_t *op, +int mca_coll_sm_exscan_intra(const void *sbuf, void *rbuf, int count, + struct ompi_datatype_t *dtype, + struct ompi_op_t *op, struct ompi_communicator_t *comm, mca_coll_base_module_t *module) { diff --git a/ompi/mca/coll/sm/coll_sm_gather.c b/ompi/mca/coll/sm/coll_sm_gather.c index 61218b50882..51cd944f25d 100644 --- a/ompi/mca/coll/sm/coll_sm_gather.c +++ b/ompi/mca/coll/sm/coll_sm_gather.c @@ -5,14 +5,16 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -22,14 +24,14 @@ #include "coll_sm.h" -/* +/* * gather * * Function: - shared memory gather * Accepts: - same as MPI_Gather() * Returns: - MPI_SUCCESS or error code */ -int mca_coll_sm_gather_intra(void *sbuf, int scount, +int mca_coll_sm_gather_intra(const void *sbuf, int scount, struct ompi_datatype_t *sdtype, void *rbuf, int rcount, struct ompi_datatype_t *rdtype, int root, struct ompi_communicator_t *comm, diff --git a/ompi/mca/coll/sm/coll_sm_gatherv.c b/ompi/mca/coll/sm/coll_sm_gatherv.c index fee7edcbbb8..601b3b18234 100644 --- a/ompi/mca/coll/sm/coll_sm_gatherv.c +++ b/ompi/mca/coll/sm/coll_sm_gatherv.c @@ -5,14 +5,16 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -29,9 +31,9 @@ * Accepts: - same arguments as MPI_Gatherb() * Returns: - MPI_SUCCESS or error code */ -int mca_coll_sm_gatherv_intra(void *sbuf, int scount, +int mca_coll_sm_gatherv_intra(const void *sbuf, int scount, struct ompi_datatype_t *sdtype, - void *rbuf, int *rcounts, int *disps, + void *rbuf, const int *rcounts, const int *disps, struct ompi_datatype_t *rdtype, int root, struct ompi_communicator_t *comm, mca_coll_base_module_t *module) diff --git a/ompi/mca/coll/sm/coll_sm_module.c b/ompi/mca/coll/sm/coll_sm_module.c index 1bd83f6ca37..716faaf8915 100644 --- a/ompi/mca/coll/sm/coll_sm_module.c +++ b/ompi/mca/coll/sm/coll_sm_module.c @@ -11,10 +11,11 @@ * All rights reserved. * Copyright (c) 2008 Sun Microsystems, Inc. All rights reserved. * Copyright (c) 2009-2013 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2010-2012 Los Alamos National Security, LLC. + * Copyright (c) 2010-2015 Los Alamos National Security, LLC. * All rights reserved. * Copyright (c) 2014-2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. + * Copyright (c) 2015 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -34,9 +35,7 @@ #include "ompi_config.h" #include -#ifdef HAVE_STRING_H #include -#endif #ifdef HAVE_SCHED_H #include #endif @@ -75,7 +74,6 @@ uint32_t mca_coll_sm_one = 1; */ static int sm_module_enable(mca_coll_base_module_t *module, struct ompi_communicator_t *comm); -static bool have_local_peers(ompi_group_t *group, size_t size); static int bootstrap_comm(ompi_communicator_t *comm, mca_coll_sm_module_t *module); static int mca_coll_sm_module_disable(mca_coll_base_module_t *module, @@ -173,8 +171,7 @@ mca_coll_sm_comm_query(struct ompi_communicator_t *comm, int *priority) /* If we're intercomm, or if there's only one process in the communicator, or if not all the processes in the communicator are not on this node, then we don't want to run */ - if (OMPI_COMM_IS_INTER(comm) || 1 == ompi_comm_size(comm) || - !have_local_peers(comm->c_local_group, ompi_comm_size(comm))) { + if (OMPI_COMM_IS_INTER(comm) || 1 == ompi_comm_size(comm) || ompi_group_have_remote_peers (comm->c_local_group)) { opal_output_verbose(10, ompi_coll_base_framework.framework_output, "coll:sm:comm_query (%d/%s): intercomm, comm is too small, or not all peers local; disqualifying myself", comm->c_contextid, comm->c_name); return NULL; @@ -196,7 +193,7 @@ mca_coll_sm_comm_query(struct ompi_communicator_t *comm, int *priority) /* All is good -- return a module */ sm_module->super.coll_module_enable = sm_module_enable; - sm_module->super.ft_event = mca_coll_sm_ft_event; + sm_module->super.ft_event = NULL; sm_module->super.coll_allgather = NULL; sm_module->super.coll_allgatherv = NULL; sm_module->super.coll_allreduce = mca_coll_sm_allreduce_intra; @@ -249,9 +246,7 @@ int ompi_coll_sm_lazy_enable(mca_coll_base_module_t *module, mca_coll_sm_comm_t *data = NULL; size_t control_size, frag_size; mca_coll_sm_component_t *c = &mca_coll_sm_component; -#if OPAL_HAVE_HWLOC opal_hwloc_base_memory_segment_t *maffinity; -#endif int parent, min_child, num_children; unsigned char *base = NULL; const int num_barrier_buffers = 2; @@ -262,7 +257,6 @@ int ompi_coll_sm_lazy_enable(mca_coll_base_module_t *module, } sm_module->enabled = true; -#if OPAL_HAVE_HWLOC /* Get some space to setup memory affinity (just easier to try to alloc here to handle the error case) */ maffinity = (opal_hwloc_base_memory_segment_t*) @@ -274,7 +268,6 @@ int ompi_coll_sm_lazy_enable(mca_coll_base_module_t *module, comm->c_contextid, comm->c_name); return OMPI_ERR_OUT_OF_RESOURCE; } -#endif /* Allocate data to hang off the communicator. The memory we alloc will be laid out as follows: @@ -296,9 +289,7 @@ int ompi_coll_sm_lazy_enable(mca_coll_base_module_t *module, (sizeof(mca_coll_sm_tree_node_t) + (sizeof(mca_coll_sm_tree_node_t*) * c->sm_tree_degree)))); if (NULL == data) { -#if OPAL_HAVE_HWLOC free(maffinity); -#endif opal_output_verbose(10, ompi_coll_base_framework.framework_output, "coll:sm:enable (%d/%s): malloc failed (2)", comm->c_contextid, comm->c_name); @@ -363,9 +354,7 @@ int ompi_coll_sm_lazy_enable(mca_coll_base_module_t *module, /* Attach to this communicator's shmem data segment */ if (OMPI_SUCCESS != (ret = bootstrap_comm(comm, sm_module))) { free(data); -#if OPAL_HAVE_HWLOC free(maffinity); -#endif sm_module->sm_comm_data = NULL; return ret; } @@ -411,11 +400,9 @@ int ompi_coll_sm_lazy_enable(mca_coll_base_module_t *module, that they're marked as unused. */ j = 0; if (0 == rank) { -#if OPAL_HAVE_HWLOC maffinity[j].mbs_start_addr = base; maffinity[j].mbs_len = c->sm_control_size * c->sm_comm_num_in_use_flags; -#endif /* Set the op counts to 1 (actually any nonzero value will do) so that the first time children/leaf processes come through, they don't see a value of 0 and think that the @@ -440,7 +427,6 @@ int ompi_coll_sm_lazy_enable(mca_coll_base_module_t *module, (((char*) data->mcb_data_index[i].mcbmi_control) + control_size); -#if OPAL_HAVE_HWLOC /* Memory affinity: control */ maffinity[j].mbs_len = c->sm_control_size; @@ -456,15 +442,12 @@ int ompi_coll_sm_lazy_enable(mca_coll_base_module_t *module, ((char*) data->mcb_data_index[i].mcbmi_data) + (rank * c->sm_control_size); ++j; -#endif } -#if OPAL_HAVE_HWLOC /* Setup memory affinity so that the pages that belong to this process are local to this process */ opal_hwloc_base_memory_set(maffinity, j); free(maffinity); -#endif /* Zero out the control structures that belong to this process */ memset(data->mcb_barrier_control_me, 0, @@ -505,23 +488,6 @@ int ompi_coll_sm_lazy_enable(mca_coll_base_module_t *module, return OMPI_SUCCESS; } - -static bool have_local_peers(ompi_group_t *group, size_t size) -{ - size_t i; - ompi_proc_t *proc; - - for (i = 0; i < size; ++i) { - proc = ompi_group_peer_lookup(group,i); - if (!OPAL_PROC_ON_LOCAL_NODE(proc->super.proc_flags)) { - return false; - } - } - - return true; -} - - static int bootstrap_comm(ompi_communicator_t *comm, mca_coll_sm_module_t *module) { @@ -623,23 +589,3 @@ static int bootstrap_comm(ompi_communicator_t *comm, return OMPI_SUCCESS; } - -int mca_coll_sm_ft_event(int state) { - if(OPAL_CRS_CHECKPOINT == state) { - ; - } - else if(OPAL_CRS_CONTINUE == state) { - ; - } - else if(OPAL_CRS_RESTART == state) { - ; - } - else if(OPAL_CRS_TERM == state ) { - ; - } - else { - ; - } - - return OMPI_SUCCESS; -} diff --git a/ompi/mca/coll/sm/coll_sm_reduce.c b/ompi/mca/coll/sm/coll_sm_reduce.c index 3f34094fb6a..c731b87d2b8 100644 --- a/ompi/mca/coll/sm/coll_sm_reduce.c +++ b/ompi/mca/coll/sm/coll_sm_reduce.c @@ -2,26 +2,26 @@ * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. - * Copyright (c) 2004-2014 The University of Tennessee and The University + * Copyright (c) 2004-2015 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2009-2013 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ #include "ompi_config.h" -#ifdef HAVE_STRING_H #include -#endif #include "opal/datatype/opal_convertor.h" #include "opal/sys/atomic.h" @@ -35,16 +35,16 @@ /* * Local functions */ -static int reduce_inorder(void *sbuf, void* rbuf, int count, - struct ompi_datatype_t *dtype, - struct ompi_op_t *op, +static int reduce_inorder(const void *sbuf, void* rbuf, int count, + struct ompi_datatype_t *dtype, + struct ompi_op_t *op, int root, struct ompi_communicator_t *comm, mca_coll_base_module_t *module); #define WANT_REDUCE_NO_ORDER 0 #if WANT_REDUCE_NO_ORDER -static int reduce_no_order(void *sbuf, void* rbuf, int count, - struct ompi_datatype_t *dtype, - struct ompi_op_t *op, +static int reduce_no_order(const void *sbuf, void* rbuf, int count, + struct ompi_datatype_t *dtype, + struct ompi_op_t *op, int root, struct ompi_communicator_t *comm, mca_coll_base_module_t *module); #endif @@ -64,9 +64,9 @@ static inline int min(int a, int b) * * Simply farms out to the associative or non-associative functions. */ -int mca_coll_sm_reduce_intra(void *sbuf, void* rbuf, int count, - struct ompi_datatype_t *dtype, - struct ompi_op_t *op, +int mca_coll_sm_reduce_intra(const void *sbuf, void* rbuf, int count, + struct ompi_datatype_t *dtype, + struct ompi_op_t *op, int root, struct ompi_communicator_t *comm, mca_coll_base_module_t *module) { @@ -90,25 +90,25 @@ int mca_coll_sm_reduce_intra(void *sbuf, void* rbuf, int count, return sm_module->previous_reduce(sbuf, rbuf, count, dtype, op, root, comm, sm_module->previous_reduce_module); - } + } #if WANT_REDUCE_NO_ORDER else { /* Lazily enable the module the first time we invoke a collective on it */ if (!sm_module->enabled) { - if (OMPI_SUCCESS != + if (OMPI_SUCCESS != (ret = ompi_coll_sm_lazy_enable(module, comm))) { return ret; } } - + if (!ompi_op_is_intrinsic(op) || (ompi_op_is_intrinsic(op) && !ompi_op_is_float_assoc(op) && 0 != (dtype->flags & OMPI_DATATYPE_FLAG_DATA_FLOAT))) { - return reduce_inorder(sbuf, rbuf, count, dtype, op, + return reduce_inorder(sbuf, rbuf, count, dtype, op, root, comm, module); } else { - return reduce_no_order(sbuf, rbuf, count, dtype, op, + return reduce_no_order(sbuf, rbuf, count, dtype, op, root, comm, module); } } @@ -119,7 +119,7 @@ int mca_coll_sm_reduce_intra(void *sbuf, void* rbuf, int count, if (!sm_module->enabled) { int ret; - if (OMPI_SUCCESS != + if (OMPI_SUCCESS != (ret = ompi_coll_sm_lazy_enable(module, comm))) { return ret; } @@ -139,7 +139,7 @@ int mca_coll_sm_reduce_intra(void *sbuf, void* rbuf, int count, * (result operation 3), etc. * * Root's algorithm: - * + * * If our datatype is "friendly" (i.e., the representation of the * buffer is the same packed as it is unpacked), then the root doesn't * need a temporary buffer -- we can combine the operands directly @@ -154,7 +154,7 @@ int mca_coll_sm_reduce_intra(void *sbuf, void* rbuf, int count, * called, we know that the datattype is smaller than the max size of * a fragment, so this is definitely possible) * - * 2. loop over all the processes -- 0 to (comm_size-1). + * 2. loop over all the processes -- 0 to (comm_size-1). * For process 0: * - if the root==0, copy the *entire* buffer (i.e., don't copy * fragment by fragment -- might as well copy the entire thing) the @@ -173,9 +173,9 @@ int mca_coll_sm_reduce_intra(void *sbuf, void* rbuf, int count, */ -static int reduce_inorder(void *sbuf, void* rbuf, int count, - struct ompi_datatype_t *dtype, - struct ompi_op_t *op, +static int reduce_inorder(const void *sbuf, void* rbuf, int count, + struct ompi_datatype_t *dtype, + struct ompi_op_t *op, int root, struct ompi_communicator_t *comm, mca_coll_base_module_t *module) { @@ -187,9 +187,9 @@ static int reduce_inorder(void *sbuf, void* rbuf, int count, size_t total_size, max_data, bytes; mca_coll_sm_in_use_flag_t *flag; mca_coll_sm_data_index_t *index; - size_t ddt_size; + size_t ddt_size, segsize; size_t segment_ddt_count, segment_ddt_bytes, zero = 0; - ptrdiff_t true_lb, true_extent, lb, extent; + ptrdiff_t extent, gap; /* Setup some identities */ @@ -205,10 +205,7 @@ static int reduce_inorder(void *sbuf, void* rbuf, int count, /* ddt_size is the packed size (e.g., MPI_SHORT_INT is 6) */ ompi_datatype_type_size(dtype, &ddt_size); /* extent is from lb to ub (e.g., MPI_SHORT_INT is 8) */ - ompi_datatype_get_extent(dtype, &lb, &extent); - /* true_extent is extent of actual type map, ignoring lb and ub - (e.g., MPI_SHORT_INT is 8) */ - ompi_datatype_get_true_extent(dtype, &true_lb, &true_extent); + ompi_datatype_type_extent(dtype, &extent); segment_ddt_count = mca_coll_sm_component.sm_fragment_size / ddt_size; iov.iov_len = segment_ddt_bytes = segment_ddt_count * ddt_size; total_size = ddt_size * count; @@ -223,7 +220,7 @@ static int reduce_inorder(void *sbuf, void* rbuf, int count, /********************************************************************* * Root *********************************************************************/ - + if (root == rank) { opal_convertor_t rtb_convertor, rbuf_convertor; char *reduce_temp_buffer, *free_buffer, *reduce_target; @@ -239,7 +236,7 @@ static int reduce_inorder(void *sbuf, void* rbuf, int count, representation is not the same, then we need to get a receive convertor and a temporary buffer to receive into. */ - + if (ompi_datatype_is_contiguous_memory_layout(dtype, count)) { reduce_temp_buffer = free_buffer = NULL; } else { @@ -265,23 +262,24 @@ static int reduce_inorder(void *sbuf, void* rbuf, int count, entire user buffer) -- we only need to be able to hold "segment_ddt_count" instances (i.e., the number of instances that can be held in a single fragment) */ - - free_buffer = (char*)malloc(true_extent + - (segment_ddt_count - 1) * extent); + + segsize = opal_datatype_span(&dtype->super, segment_ddt_count, &gap); + + free_buffer = (char*)malloc(segsize); if (NULL == free_buffer) { return OMPI_ERR_OUT_OF_RESOURCE; } - reduce_temp_buffer = free_buffer - true_lb; - + reduce_temp_buffer = free_buffer - gap; + /* Trickery here: we use a potentially smaller count than the user count -- use the largest count that is <= user's count that will fit within a single segment. */ - - if (OMPI_SUCCESS != + + if (OMPI_SUCCESS != (ret = opal_convertor_copy_and_prepare_for_recv( ompi_mpi_local_convertor, &(dtype->super), - segment_ddt_count, + segment_ddt_count, reduce_temp_buffer, 0, &rtb_convertor))) { @@ -291,11 +289,11 @@ static int reduce_inorder(void *sbuf, void* rbuf, int count, /* See if we need the rbuf_convertor */ if (size - 1 != rank) { - if (OMPI_SUCCESS != + if (OMPI_SUCCESS != (ret = opal_convertor_copy_and_prepare_for_recv( ompi_mpi_local_convertor, &(dtype->super), - count, + count, rbuf, 0, &rbuf_convertor))) { @@ -312,23 +310,24 @@ static int reduce_inorder(void *sbuf, void* rbuf, int count, as the sbuf */ if (MPI_IN_PLACE == sbuf && (size - 1) != rank) { - inplace_temp = (char*)malloc(true_extent + (count - 1) * extent); + segsize = opal_datatype_span(&dtype->super, count, &gap); + inplace_temp = (char*)malloc(segsize); if (NULL == inplace_temp) { if (NULL != free_buffer) { free(free_buffer); } return OMPI_ERR_OUT_OF_RESOURCE; } - sbuf = inplace_temp - true_lb; - ompi_datatype_copy_content_same_ddt(dtype, count, (char *) sbuf, (char *) rbuf); + sbuf = inplace_temp - gap; + ompi_datatype_copy_content_same_ddt(dtype, count, (char *)sbuf, (char *)rbuf); } else { inplace_temp = NULL; } - + /* Main loop over receiving / reducing fragments */ do { - flag_num = (data->mcb_operation_count % + flag_num = (data->mcb_operation_count % mca_coll_sm_component.sm_comm_num_in_use_flags); FLAG_SETUP(flag_num, flag, data); FLAG_WAIT_FOR_IDLE(flag, reduce_root_flag_label); @@ -336,10 +335,10 @@ static int reduce_inorder(void *sbuf, void* rbuf, int count, ++data->mcb_operation_count; /* Loop over all the segments in this set */ - - segment_num = + + segment_num = flag_num * mca_coll_sm_component.sm_segs_per_inuse_flag; - max_segment_num = + max_segment_num = (flag_num + 1) * mca_coll_sm_component.sm_segs_per_inuse_flag; reduce_target = (((char*) rbuf) + (frag_num * extent * segment_ddt_count)); do { @@ -364,7 +363,7 @@ static int reduce_inorder(void *sbuf, void* rbuf, int count, reduce_target, (char*)sbuf); } } - } + } /* Process (size-1) is not the root */ else { @@ -372,19 +371,19 @@ static int reduce_inorder(void *sbuf, void* rbuf, int count, like any other non-root process */ index = &(data->mcb_data_index[segment_num]); PARENT_WAIT_FOR_NOTIFY_SPECIFIC(size - 1, rank, index, max_data, reduce_root_parent_label1); - + /* If the datatype is contiguous, just copy it straight to the reduce_target */ if (NULL == free_buffer) { memcpy(reduce_target, ((char*)index->mcbmi_data) + (size - 1) * mca_coll_sm_component.sm_fragment_size, max_data); - } + } /* If the datatype is noncontiguous, use the rbuf_convertor to unpack it straight to the rbuf */ else { max_data = segment_ddt_bytes; - COPY_FRAGMENT_OUT(rbuf_convertor, size - 1, index, + COPY_FRAGMENT_OUT(rbuf_convertor, size - 1, index, iov, max_data); } } @@ -402,7 +401,7 @@ static int reduce_inorder(void *sbuf, void* rbuf, int count, copy into shmem -- just reduce directly from my sbuf. */ if (rank == peer) { - ompi_op_reduce(op, + ompi_op_reduce(op, ((char *) sbuf) + frag_num * extent * segment_ddt_count, reduce_target, @@ -415,42 +414,42 @@ static int reduce_inorder(void *sbuf, void* rbuf, int count, the segment into shmem. */ else { index = &(data->mcb_data_index[segment_num]); - PARENT_WAIT_FOR_NOTIFY_SPECIFIC(peer, rank, + PARENT_WAIT_FOR_NOTIFY_SPECIFIC(peer, rank, index, max_data, reduce_root_parent_label2); - + /* If we don't need an extra buffer, then do the reduction operation on the fragment straight from the shmem. */ - + if (NULL == free_buffer) { ompi_op_reduce(op, - (index->mcbmi_data + + (index->mcbmi_data + (peer * mca_coll_sm_component.sm_fragment_size)), - reduce_target, + reduce_target, min(count_left, segment_ddt_count), dtype); } - + /* Otherwise, unpack the fragment to the temporary buffer and then do the reduction from there */ - + else { /* Unpack the fragment into my temporary buffer */ max_data = segment_ddt_bytes; - COPY_FRAGMENT_OUT(rtb_convertor, peer, index, + COPY_FRAGMENT_OUT(rtb_convertor, peer, index, iov, max_data); opal_convertor_set_position(&rtb_convertor, &zero); - + /* Do the reduction on this fragment */ ompi_op_reduce(op, reduce_temp_buffer, - reduce_target, + reduce_target, min(count_left, segment_ddt_count), dtype); } } /* whether this process was me or not */ } /* loop over all proceses */ - + /* We've iterated through all the processes -- now we move on to the next segment */ @@ -466,7 +465,7 @@ static int reduce_inorder(void *sbuf, void* rbuf, int count, } while (bytes < total_size); /* Kill the convertor, if we had one */ - + if (NULL != free_buffer) { OBJ_DESTRUCT(&rtb_convertor); OBJ_DESTRUCT(&rbuf_convertor); @@ -487,11 +486,11 @@ static int reduce_inorder(void *sbuf, void* rbuf, int count, opal_convertor_t sbuf_convertor; OBJ_CONSTRUCT(&sbuf_convertor, opal_convertor_t); - if (OMPI_SUCCESS != - (ret = + if (OMPI_SUCCESS != + (ret = opal_convertor_copy_and_prepare_for_send(ompi_mpi_local_convertor, &(dtype->super), - count, + count, sbuf, 0, &sbuf_convertor))) { @@ -499,9 +498,9 @@ static int reduce_inorder(void *sbuf, void* rbuf, int count, } /* Loop over sending fragments to the root */ - + do { - flag_num = (data->mcb_operation_count % + flag_num = (data->mcb_operation_count % mca_coll_sm_component.sm_comm_num_in_use_flags); /* Wait for the root to mark this set of segments as @@ -512,9 +511,9 @@ static int reduce_inorder(void *sbuf, void* rbuf, int count, /* Loop over all the segments in this set */ - segment_num = + segment_num = flag_num * mca_coll_sm_component.sm_segs_per_inuse_flag; - max_segment_num = + max_segment_num = (flag_num + 1) * mca_coll_sm_component.sm_segs_per_inuse_flag; do { index = &(data->mcb_data_index[segment_num]); @@ -527,7 +526,7 @@ static int reduce_inorder(void *sbuf, void* rbuf, int count, /* Wait for the write to absolutely complete */ opal_atomic_wmb(); - + /* Tell my parent (always the reduction root -- we're ignoring the mcb_tree parent/child relationships here) that this fragment is ready */ @@ -558,9 +557,9 @@ static int reduce_inorder(void *sbuf, void* rbuf, int count, * This function performs the reduction in whatever order the operands * arrive. */ -static int reduce_no_order(void *sbuf, void* rbuf, int count, - struct ompi_datatype_t *dtype, - struct ompi_op_t *op, +static int reduce_no_order(const void *sbuf, void* rbuf, int count, + struct ompi_datatype_t *dtype, + struct ompi_op_t *op, int root, struct ompi_communicator_t *comm, mca_coll_base_module_t *module) { diff --git a/ompi/mca/coll/sm/coll_sm_reduce_scatter.c b/ompi/mca/coll/sm/coll_sm_reduce_scatter.c index d05c8c9bab7..0ef35a0d896 100644 --- a/ompi/mca/coll/sm/coll_sm_reduce_scatter.c +++ b/ompi/mca/coll/sm/coll_sm_reduce_scatter.c @@ -5,14 +5,16 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -29,7 +31,7 @@ * Accepts: - same as MPI_Reduce_scatter() * Returns: - MPI_SUCCESS or error code */ -int mca_coll_sm_reduce_scatter_intra(void *sbuf, void *rbuf, int *rcounts, +int mca_coll_sm_reduce_scatter_intra(const void *sbuf, void *rbuf, const int *rcounts, struct ompi_datatype_t *dtype, struct ompi_op_t *op, struct ompi_communicator_t *comm, diff --git a/ompi/mca/coll/sm/coll_sm_scan.c b/ompi/mca/coll/sm/coll_sm_scan.c index 1e42ba419fd..4a2ce3e6001 100644 --- a/ompi/mca/coll/sm/coll_sm_scan.c +++ b/ompi/mca/coll/sm/coll_sm_scan.c @@ -5,14 +5,16 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -29,9 +31,9 @@ * Accepts: - same arguments as MPI_Scan() * Returns: - MPI_SUCCESS or error code */ -int mca_coll_sm_scan_intra(void *sbuf, void *rbuf, int count, - struct ompi_datatype_t *dtype, - struct ompi_op_t *op, +int mca_coll_sm_scan_intra(const void *sbuf, void *rbuf, int count, + struct ompi_datatype_t *dtype, + struct ompi_op_t *op, struct ompi_communicator_t *comm, mca_coll_base_module_t *module) { diff --git a/ompi/mca/coll/sm/coll_sm_scatter.c b/ompi/mca/coll/sm/coll_sm_scatter.c index d14a9c60f60..3b7da5550ad 100644 --- a/ompi/mca/coll/sm/coll_sm_scatter.c +++ b/ompi/mca/coll/sm/coll_sm_scatter.c @@ -5,14 +5,16 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -22,14 +24,14 @@ #include "coll_sm.h" -/* +/* * scatter * * Function: - shared memory reduce * Accepts: - same as MPI_Scatter() * Returns: - MPI_SUCCESS or error code */ -int mca_coll_sm_scatter_intra(void *sbuf, int scount, +int mca_coll_sm_scatter_intra(const void *sbuf, int scount, struct ompi_datatype_t *sdtype, void *rbuf, int rcount, struct ompi_datatype_t *rdtype, int root, struct ompi_communicator_t *comm, diff --git a/ompi/mca/coll/sm/coll_sm_scatterv.c b/ompi/mca/coll/sm/coll_sm_scatterv.c index 8519d390cd1..ae6e08a687c 100644 --- a/ompi/mca/coll/sm/coll_sm_scatterv.c +++ b/ompi/mca/coll/sm/coll_sm_scatterv.c @@ -5,14 +5,16 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -29,8 +31,8 @@ * Accepts: - same arguments as MPI_Scatterv() * Returns: - MPI_SUCCESS or error code */ -int mca_coll_sm_scatterv_intra(void *sbuf, int *scounts, - int *disps, struct ompi_datatype_t *sdtype, +int mca_coll_sm_scatterv_intra(const void *sbuf, const int *scounts, + const int *disps, struct ompi_datatype_t *sdtype, void *rbuf, int rcount, struct ompi_datatype_t *rdtype, int root, struct ompi_communicator_t *comm, diff --git a/ompi/mca/coll/sm/help-mpi-coll-sm.txt b/ompi/mca/coll/sm/help-mpi-coll-sm.txt index 67eb939f030..ce42bb65dfb 100644 --- a/ompi/mca/coll/sm/help-mpi-coll-sm.txt +++ b/ompi/mca/coll/sm/help-mpi-coll-sm.txt @@ -6,14 +6,14 @@ # Copyright (c) 2004-2005 The University of Tennessee and The University # of Tennessee Research Foundation. All rights # reserved. -# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, # University of Stuttgart. All rights reserved. # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. # $COPYRIGHT$ -# +# # Additional copyrights may follow -# +# # $HEADER$ # # This is the US/English general help file for Open MPI's Shared memory diff --git a/ompi/mca/coll/sm/owner.txt b/ompi/mca/coll/sm/owner.txt index 07ad487d187..29b7476009c 100644 --- a/ompi/mca/coll/sm/owner.txt +++ b/ompi/mca/coll/sm/owner.txt @@ -3,5 +3,5 @@ # owner: institution that is responsible for this package # status: e.g. active, maintenance, unmaintained # -owner: LANL? -status: maintenance +owner: nobody +status: unmaintained diff --git a/ompi/mca/coll/sync/Makefile.am b/ompi/mca/coll/sync/Makefile.am new file mode 100644 index 00000000000..61c2437e96e --- /dev/null +++ b/ompi/mca/coll/sync/Makefile.am @@ -0,0 +1,52 @@ +# +# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana +# University Research and Technology +# Corporation. All rights reserved. +# Copyright (c) 2004-2005 The University of Tennessee and The University +# of Tennessee Research Foundation. All rights +# reserved. +# Copyright (c) 2004-2009 High Performance Computing Center Stuttgart, +# University of Stuttgart. All rights reserved. +# Copyright (c) 2004-2005 The Regents of the University of California. +# All rights reserved. +# Copyright (c) 2009 Cisco Systems, Inc. All rights reserved. +# Copyright (c) 2016 Intel, Inc. All rights reserved +# $COPYRIGHT$ +# +# Additional copyrights may follow +# +# $HEADER$ +# + +dist_ompidata_DATA = help-coll-sync.txt + +sources = \ + coll_sync.h \ + coll_sync_component.c \ + coll_sync_module.c \ + coll_sync_bcast.c \ + coll_sync_exscan.c \ + coll_sync_gather.c \ + coll_sync_gatherv.c \ + coll_sync_reduce.c \ + coll_sync_reduce_scatter.c \ + coll_sync_scan.c \ + coll_sync_scatter.c \ + coll_sync_scatterv.c + +if MCA_BUILD_ompi_coll_sync_DSO +component_noinst = +component_install = mca_coll_sync.la +else +component_noinst = libmca_coll_sync.la +component_install = +endif + +mcacomponentdir = $(ompilibdir) +mcacomponent_LTLIBRARIES = $(component_install) +mca_coll_sync_la_SOURCES = $(sources) +mca_coll_sync_la_LDFLAGS = -module -avoid-version + +noinst_LTLIBRARIES = $(component_noinst) +libmca_coll_sync_la_SOURCES =$(sources) +libmca_coll_sync_la_LDFLAGS = -module -avoid-version diff --git a/ompi/mca/coll/sync/coll_sync.h b/ompi/mca/coll/sync/coll_sync.h new file mode 100644 index 00000000000..68fdb534ce8 --- /dev/null +++ b/ompi/mca/coll/sync/coll_sync.h @@ -0,0 +1,183 @@ +/* + * Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2006 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2008-2017 Cisco Systems, Inc. All rights reserved + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#ifndef MCA_COLL_SYNC_EXPORT_H +#define MCA_COLL_SYNC_EXPORT_H + +#include "ompi_config.h" + +#include "mpi.h" + +#include "opal/class/opal_object.h" +#include "opal/mca/mca.h" +#include "opal/util/output.h" + +#include "ompi/constants.h" +#include "ompi/mca/coll/coll.h" +#include "ompi/mca/coll/base/base.h" +#include "ompi/communicator/communicator.h" + +BEGIN_C_DECLS + +/* API functions */ + +int mca_coll_sync_init_query(bool enable_progress_threads, + bool enable_mpi_threads); +mca_coll_base_module_t +*mca_coll_sync_comm_query(struct ompi_communicator_t *comm, + int *priority); + +int mca_coll_sync_module_enable(mca_coll_base_module_t *module, + struct ompi_communicator_t *comm); + +int mca_coll_sync_barrier(struct ompi_communicator_t *comm, + mca_coll_base_module_t *module); + +int mca_coll_sync_bcast(void *buff, int count, + struct ompi_datatype_t *datatype, + int root, + struct ompi_communicator_t *comm, + mca_coll_base_module_t *module); + +int mca_coll_sync_exscan(const void *sbuf, void *rbuf, int count, + struct ompi_datatype_t *dtype, + struct ompi_op_t *op, + struct ompi_communicator_t *comm, + mca_coll_base_module_t *module); + +int mca_coll_sync_gather(const void *sbuf, int scount, + struct ompi_datatype_t *sdtype, + void *rbuf, int rcount, + struct ompi_datatype_t *rdtype, + int root, + struct ompi_communicator_t *comm, + mca_coll_base_module_t *module); + +int mca_coll_sync_gatherv(const void *sbuf, int scount, + struct ompi_datatype_t *sdtype, + void *rbuf, const int *rcounts, const int *disps, + struct ompi_datatype_t *rdtype, + int root, + struct ompi_communicator_t *comm, + mca_coll_base_module_t *module); + +int mca_coll_sync_reduce(const void *sbuf, void *rbuf, int count, + struct ompi_datatype_t *dtype, + struct ompi_op_t *op, + int root, + struct ompi_communicator_t *comm, + mca_coll_base_module_t *module); + +int mca_coll_sync_reduce_scatter(const void *sbuf, void *rbuf, + const int *rcounts, + struct ompi_datatype_t *dtype, + struct ompi_op_t *op, + struct ompi_communicator_t *comm, + mca_coll_base_module_t *module); + +int mca_coll_sync_scan(const void *sbuf, void *rbuf, int count, + struct ompi_datatype_t *dtype, + struct ompi_op_t *op, + struct ompi_communicator_t *comm, + mca_coll_base_module_t *module); + +int mca_coll_sync_scatter(const void *sbuf, int scount, + struct ompi_datatype_t *sdtype, + void *rbuf, int rcount, + struct ompi_datatype_t *rdtype, + int root, + struct ompi_communicator_t *comm, + mca_coll_base_module_t *module); + +int mca_coll_sync_scatterv(const void *sbuf, const int *scounts, + const int *disps, struct ompi_datatype_t *sdtype, + void *rbuf, int rcount, + struct ompi_datatype_t *rdtype, + int root, + struct ompi_communicator_t *comm, + mca_coll_base_module_t *module); + + +/* Types */ +/* Module */ + +typedef struct mca_coll_sync_module_t { + mca_coll_base_module_t super; + + /* Pointers to all the "real" collective functions */ + mca_coll_base_comm_coll_t c_coll; + + /* How many ops we've executed */ + int before_num_operations; + + /* How many ops we've executed (it's easier to have 2) */ + int after_num_operations; + + /* Avoid recursion of syncs */ + bool in_operation; +} mca_coll_sync_module_t; + +OBJ_CLASS_DECLARATION(mca_coll_sync_module_t); + +/* Component */ + +typedef struct mca_coll_sync_component_t { + mca_coll_base_component_2_0_0_t super; + + /* Priority of this component */ + int priority; + + /* Do a sync *before* each Nth collective */ + int barrier_before_nops; + + /* Do a sync *after* each Nth collective */ + int barrier_after_nops; +} mca_coll_sync_component_t; + +/* Globally exported variables */ + +OMPI_MODULE_DECLSPEC extern mca_coll_sync_component_t mca_coll_sync_component; + +/* Macro used in most of the collectives */ + +#define COLL_SYNC(m, op) \ +do { \ + int err = MPI_SUCCESS; \ + (m)->in_operation = true; \ + if (OPAL_UNLIKELY(++((m)->before_num_operations) == \ + mca_coll_sync_component.barrier_before_nops)) { \ + (m)->before_num_operations = 0; \ + err = (m)->c_coll.coll_barrier(comm, (m)->c_coll.coll_barrier_module); \ + } \ + if (OPAL_LIKELY(MPI_SUCCESS == err)) { \ + err = op; \ + } \ + if (OPAL_UNLIKELY(++((m)->after_num_operations) == \ + mca_coll_sync_component.barrier_after_nops) && \ + OPAL_LIKELY(MPI_SUCCESS == err)) { \ + (m)->after_num_operations = 0; \ + err = (m)->c_coll.coll_barrier(comm, (m)->c_coll.coll_barrier_module); \ + } \ + (m)->in_operation = false; \ + return err; \ +} while(0) + +END_C_DECLS + +#endif /* MCA_COLL_SYNC_EXPORT_H */ diff --git a/ompi/mca/coll/sync/coll_sync_bcast.c b/ompi/mca/coll/sync/coll_sync_bcast.c new file mode 100644 index 00000000000..696f78dbc69 --- /dev/null +++ b/ompi/mca/coll/sync/coll_sync_bcast.c @@ -0,0 +1,47 @@ +/* + * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2005 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2009 Cisco Systems, Inc. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include "ompi_config.h" + +#include "mpi.h" +#include "coll_sync.h" + + +/* + * bcast + * + * Function: - broadcast + * Accepts: - same arguments as MPI_Bcast() + * Returns: - MPI_SUCCESS or error code + */ +int mca_coll_sync_bcast(void *buff, int count, + struct ompi_datatype_t *datatype, int root, + struct ompi_communicator_t *comm, + mca_coll_base_module_t *module) +{ + mca_coll_sync_module_t *s = (mca_coll_sync_module_t*) module; + + if (s->in_operation) { + return s->c_coll.coll_bcast(buff, count, datatype, root, comm, + s->c_coll.coll_bcast_module); + } else { + COLL_SYNC(s, s->c_coll.coll_bcast(buff, count, datatype, root, comm, + s->c_coll.coll_bcast_module)); + } +} diff --git a/ompi/mca/coll/sync/coll_sync_component.c b/ompi/mca/coll/sync/coll_sync_component.c new file mode 100644 index 00000000000..46243f0c91e --- /dev/null +++ b/ompi/mca/coll/sync/coll_sync_component.c @@ -0,0 +1,104 @@ +/* + * Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2005 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2008-2009 Cisco Systems, Inc. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include "ompi_config.h" + +#include + +#include "opal/util/output.h" + +#include "mpi.h" +#include "ompi/constants.h" +#include "coll_sync.h" + +/* + * Public string showing the coll ompi_sync component version number + */ +const char *mca_coll_sync_component_version_string = + "Open MPI sync collective MCA component version " OMPI_VERSION; + +/* + * Local function + */ +static int sync_register(void); + +/* + * Instantiate the public struct with all of our public information + * and pointers to our public functions in it + */ + +mca_coll_sync_component_t mca_coll_sync_component = { + { + /* First, the mca_component_t struct containing meta information + * about the component itself */ + + .collm_version = { + MCA_COLL_BASE_VERSION_2_0_0, + + /* Component name and version */ + .mca_component_name = "sync", + MCA_BASE_MAKE_VERSION(component, OMPI_MAJOR_VERSION, OMPI_MINOR_VERSION, + OMPI_RELEASE_VERSION), + + /* Component open and close functions */ + .mca_register_component_params = sync_register + }, + .collm_data = { + /* The component is checkpoint ready */ + MCA_BASE_METADATA_PARAM_CHECKPOINT + }, + + /* Initialization / querying functions */ + + .collm_init_query = mca_coll_sync_init_query, + .collm_comm_query = mca_coll_sync_comm_query + }, +}; + + +static int sync_register(void) +{ + mca_base_component_t *c = &mca_coll_sync_component.super.collm_version; + + mca_coll_sync_component.priority = 50; + (void) mca_base_component_var_register(c, "priority", + "Priority of the sync coll component; only relevant if barrier_before or barrier_after is > 0", + MCA_BASE_VAR_TYPE_INT, NULL, 0, 0, + OPAL_INFO_LVL_9, + MCA_BASE_VAR_SCOPE_READONLY, + &mca_coll_sync_component.priority); + + mca_coll_sync_component.barrier_before_nops = 0; + (void) mca_base_component_var_register(c, "barrier_before", + "Do a synchronization before each Nth collective", + MCA_BASE_VAR_TYPE_INT, NULL, 0, 0, + OPAL_INFO_LVL_9, + MCA_BASE_VAR_SCOPE_READONLY, + &mca_coll_sync_component.barrier_before_nops); + + mca_coll_sync_component.barrier_after_nops = 0; + (void) mca_base_component_var_register(c, "barrier_after", + "Do a synchronization after each Nth collective", + MCA_BASE_VAR_TYPE_INT, NULL, 0, 0, + OPAL_INFO_LVL_9, + MCA_BASE_VAR_SCOPE_READONLY, + &mca_coll_sync_component.barrier_after_nops); + + return OMPI_SUCCESS; +} diff --git a/ompi/mca/coll/sync/coll_sync_exscan.c b/ompi/mca/coll/sync/coll_sync_exscan.c new file mode 100644 index 00000000000..3759c8ea9b4 --- /dev/null +++ b/ompi/mca/coll/sync/coll_sync_exscan.c @@ -0,0 +1,47 @@ +/* + * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2006 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2009 Cisco Systems, Inc. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include "ompi_config.h" + +#include "coll_sync.h" + + +/* + * exscan + * + * Function: - exscan + * Accepts: - same arguments as MPI_Exscan() + * Returns: - MPI_SUCCESS or error code + */ +int mca_coll_sync_exscan(const void *sbuf, void *rbuf, int count, + struct ompi_datatype_t *dtype, + struct ompi_op_t *op, + struct ompi_communicator_t *comm, + mca_coll_base_module_t *module) +{ + mca_coll_sync_module_t *s = (mca_coll_sync_module_t*) module; + + if (s->in_operation) { + return s->c_coll.coll_exscan(sbuf, rbuf, count, dtype, op, comm, + s->c_coll.coll_exscan_module); + } else { + COLL_SYNC(s, s->c_coll.coll_exscan(sbuf, rbuf, count, dtype, op, comm, + s->c_coll.coll_exscan_module)); + } +} diff --git a/ompi/mca/coll/sync/coll_sync_gather.c b/ompi/mca/coll/sync/coll_sync_gather.c new file mode 100644 index 00000000000..2a49d93a934 --- /dev/null +++ b/ompi/mca/coll/sync/coll_sync_gather.c @@ -0,0 +1,50 @@ +/* + * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2005 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2009 Cisco Systems, Inc. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include "ompi_config.h" + +#include "coll_sync.h" + + +/* + * gather + * + * Function: - gather + * Accepts: - same arguments as MPI_Gather() + * Returns: - MPI_SUCCESS or error code + */ +int mca_coll_sync_gather(const void *sbuf, int scount, + struct ompi_datatype_t *sdtype, + void *rbuf, int rcount, + struct ompi_datatype_t *rdtype, + int root, struct ompi_communicator_t *comm, + mca_coll_base_module_t *module) +{ + mca_coll_sync_module_t *s = (mca_coll_sync_module_t*) module; + + if (s->in_operation) { + return s->c_coll.coll_gather(sbuf, scount, sdtype, + rbuf, rcount, rdtype, root, comm, + s->c_coll.coll_gather_module); + } else { + COLL_SYNC(s, s->c_coll.coll_gather(sbuf, scount, sdtype, + rbuf, rcount, rdtype, root, comm, + s->c_coll.coll_gather_module)); + } +} diff --git a/ompi/mca/coll/sync/coll_sync_gatherv.c b/ompi/mca/coll/sync/coll_sync_gatherv.c new file mode 100644 index 00000000000..3452a4c5762 --- /dev/null +++ b/ompi/mca/coll/sync/coll_sync_gatherv.c @@ -0,0 +1,51 @@ +/* + * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2005 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2009-2017 Cisco Systems, Inc. All rights reserved + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include "ompi_config.h" + +#include "coll_sync.h" + + +/* + * gatherv + * + * Function: - gatherv + * Accepts: - same arguments as MPI_Gatherv() + * Returns: - MPI_SUCCESS or error code + */ +int mca_coll_sync_gatherv(const void *sbuf, int scount, + struct ompi_datatype_t *sdtype, + void *rbuf, const int *rcounts, const int *disps, + struct ompi_datatype_t *rdtype, int root, + struct ompi_communicator_t *comm, + mca_coll_base_module_t *module) +{ + mca_coll_sync_module_t *s = (mca_coll_sync_module_t*) module; + + if (s->in_operation) { + return s->c_coll.coll_gatherv(sbuf, scount, sdtype, + rbuf, rcounts, disps, rdtype, root, comm, + s->c_coll.coll_gatherv_module); + } else { + COLL_SYNC(s, s->c_coll.coll_gatherv(sbuf, scount, sdtype, + rbuf, rcounts, disps, rdtype, + root, comm, + s->c_coll.coll_gatherv_module)); + } +} diff --git a/ompi/mca/coll/sync/coll_sync_module.c b/ompi/mca/coll/sync/coll_sync_module.c new file mode 100644 index 00000000000..f2b82980595 --- /dev/null +++ b/ompi/mca/coll/sync/coll_sync_module.c @@ -0,0 +1,178 @@ +/* + * Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2006 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2009 Cisco Systems, Inc. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include "ompi_config.h" + +#ifdef HAVE_STRING_H +#include +#endif +#include + +#include "coll_sync.h" + +#include "mpi.h" + +#include "orte/util/show_help.h" +#include "orte/util/proc_info.h" + +#include "ompi/constants.h" +#include "ompi/communicator/communicator.h" +#include "ompi/mca/coll/coll.h" +#include "ompi/mca/coll/base/base.h" +#include "coll_sync.h" + + +static void mca_coll_sync_module_construct(mca_coll_sync_module_t *module) +{ + memset(&(module->c_coll), 0, sizeof(module->c_coll)); + module->before_num_operations = 0; + module->after_num_operations = 0; + module->in_operation = false; +} + +static void mca_coll_sync_module_destruct(mca_coll_sync_module_t *module) +{ + OBJ_RELEASE(module->c_coll.coll_bcast_module); + OBJ_RELEASE(module->c_coll.coll_gather_module); + OBJ_RELEASE(module->c_coll.coll_gatherv_module); + OBJ_RELEASE(module->c_coll.coll_reduce_module); + OBJ_RELEASE(module->c_coll.coll_reduce_scatter_module); + OBJ_RELEASE(module->c_coll.coll_scatter_module); + OBJ_RELEASE(module->c_coll.coll_scatterv_module); + /* If the exscan module is not NULL, then this was an + intracommunicator, and therefore scan will have a module as + well. */ + if (NULL != module->c_coll.coll_exscan_module) { + OBJ_RELEASE(module->c_coll.coll_exscan_module); + OBJ_RELEASE(module->c_coll.coll_scan_module); + } +} + +OBJ_CLASS_INSTANCE(mca_coll_sync_module_t, mca_coll_base_module_t, + mca_coll_sync_module_construct, + mca_coll_sync_module_destruct); + + +/* + * Initial query function that is invoked during MPI_INIT, allowing + * this component to disqualify itself if it doesn't support the + * required level of thread support. + */ +int mca_coll_sync_init_query(bool enable_progress_threads, + bool enable_mpi_threads) +{ + /* Nothing to do */ + return OMPI_SUCCESS; +} + + +/* + * Invoked when there's a new communicator that has been created. + * Look at the communicator and decide which set of functions and + * priority we want to return. + */ +mca_coll_base_module_t * +mca_coll_sync_comm_query(struct ompi_communicator_t *comm, + int *priority) +{ + mca_coll_sync_module_t *sync_module; + + sync_module = OBJ_NEW(mca_coll_sync_module_t); + if (NULL == sync_module) { + return NULL; + } + + /* If both MCA params are 0, then disqualify us */ + if (0 == mca_coll_sync_component.barrier_before_nops && + 0 == mca_coll_sync_component.barrier_after_nops) { + return NULL; + } + *priority = mca_coll_sync_component.priority; + + /* Choose whether to use [intra|inter] */ + sync_module->super.coll_module_enable = mca_coll_sync_module_enable; + + /* The "all" versions are already synchronous. So no need for an + additional barrier there. */ + sync_module->super.coll_allgather = NULL; + sync_module->super.coll_allgatherv = NULL; + sync_module->super.coll_allreduce = NULL; + sync_module->super.coll_alltoall = NULL; + sync_module->super.coll_alltoallv = NULL; + sync_module->super.coll_alltoallw = NULL; + sync_module->super.coll_barrier = NULL; + sync_module->super.coll_bcast = mca_coll_sync_bcast; + sync_module->super.coll_exscan = mca_coll_sync_exscan; + sync_module->super.coll_gather = mca_coll_sync_gather; + sync_module->super.coll_gatherv = mca_coll_sync_gatherv; + sync_module->super.coll_reduce = mca_coll_sync_reduce; + sync_module->super.coll_reduce_scatter = mca_coll_sync_reduce_scatter; + sync_module->super.coll_scan = mca_coll_sync_scan; + sync_module->super.coll_scatter = mca_coll_sync_scatter; + sync_module->super.coll_scatterv = mca_coll_sync_scatterv; + + return &(sync_module->super); +} + + +/* + * Init module on the communicator + */ +int mca_coll_sync_module_enable(mca_coll_base_module_t *module, + struct ompi_communicator_t *comm) +{ + bool good = true; + char *msg = NULL; + mca_coll_sync_module_t *s = (mca_coll_sync_module_t*) module; + + /* Save the prior layer of coll functions */ + s->c_coll = comm->c_coll; + +#define CHECK_AND_RETAIN(name) \ + if (NULL == s->c_coll.coll_ ## name ## _module) { \ + good = false; \ + msg = #name; \ + } else if (good) { \ + OBJ_RETAIN(s->c_coll.coll_ ## name ## _module); \ + } + + CHECK_AND_RETAIN(bcast); + CHECK_AND_RETAIN(gather); + CHECK_AND_RETAIN(gatherv); + CHECK_AND_RETAIN(reduce); + CHECK_AND_RETAIN(reduce_scatter); + CHECK_AND_RETAIN(scatter); + CHECK_AND_RETAIN(scatterv); + if (!OMPI_COMM_IS_INTER(comm)) { + /* MPI does not define scan/exscan on intercommunicators */ + CHECK_AND_RETAIN(exscan); + CHECK_AND_RETAIN(scan); + } + + /* All done */ + if (good) { + return OMPI_SUCCESS; + } else { + orte_show_help("help-coll-sync.txt", "missing collective", true, + orte_process_info.nodename, + mca_coll_sync_component.priority, msg); + return OMPI_ERR_NOT_FOUND; + } +} + diff --git a/ompi/mca/coll/sync/coll_sync_reduce.c b/ompi/mca/coll/sync/coll_sync_reduce.c new file mode 100644 index 00000000000..eec178c3055 --- /dev/null +++ b/ompi/mca/coll/sync/coll_sync_reduce.c @@ -0,0 +1,47 @@ +/* + * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2006 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include "ompi_config.h" + +#include "coll_sync.h" + + +/* + * reduce + * + * Function: - reduce + * Accepts: - same as MPI_Reduce() + * Returns: - MPI_SUCCESS or error code + */ +int mca_coll_sync_reduce(const void *sbuf, void *rbuf, int count, + struct ompi_datatype_t *dtype, + struct ompi_op_t *op, + int root, struct ompi_communicator_t *comm, + mca_coll_base_module_t *module) +{ + mca_coll_sync_module_t *s = (mca_coll_sync_module_t*) module; + + if (s->in_operation) { + return s->c_coll.coll_reduce(sbuf, rbuf, count, dtype, op, root, comm, + s->c_coll.coll_reduce_module); + } else { + COLL_SYNC(s, s->c_coll.coll_reduce(sbuf, rbuf, count, dtype, + op, root, comm, + s->c_coll.coll_reduce_module)); + } +} diff --git a/ompi/mca/coll/sync/coll_sync_reduce_scatter.c b/ompi/mca/coll/sync/coll_sync_reduce_scatter.c new file mode 100644 index 00000000000..2fbd1456b26 --- /dev/null +++ b/ompi/mca/coll/sync/coll_sync_reduce_scatter.c @@ -0,0 +1,51 @@ +/* + * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2006 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2008 Sun Microsystems, Inc. All rights reserved. + * Copyright (c) 2009-2017 Cisco Systems, Inc. All rights reserved + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include "ompi_config.h" + +#include "coll_sync.h" + + +/* + * reduce_scatter + * + * Function: - reduce then scatter + * Accepts: - same as MPI_Reduce_scatter() + * Returns: - MPI_SUCCESS or error code + */ +int mca_coll_sync_reduce_scatter(const void *sbuf, void *rbuf, + const int *rcounts, + struct ompi_datatype_t *dtype, + struct ompi_op_t *op, + struct ompi_communicator_t *comm, + mca_coll_base_module_t *module) +{ + mca_coll_sync_module_t *s = (mca_coll_sync_module_t*) module; + + if (s->in_operation) { + return s->c_coll.coll_reduce_scatter(sbuf, rbuf, rcounts, + dtype, op, comm, + s->c_coll.coll_reduce_scatter_module); + } else { + COLL_SYNC(s, s->c_coll.coll_reduce_scatter(sbuf, rbuf, rcounts, + dtype, op, comm, + s->c_coll.coll_reduce_scatter_module)); + } +} diff --git a/ompi/mca/coll/sync/coll_sync_scan.c b/ompi/mca/coll/sync/coll_sync_scan.c new file mode 100644 index 00000000000..9608bc7e83c --- /dev/null +++ b/ompi/mca/coll/sync/coll_sync_scan.c @@ -0,0 +1,46 @@ +/* + * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2006 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include "ompi_config.h" + +#include "coll_sync.h" + + +/* + * scan + * + * Function: - scan + * Accepts: - same arguments as MPI_Scan() + * Returns: - MPI_SUCCESS or error code + */ +int mca_coll_sync_scan(const void *sbuf, void *rbuf, int count, + struct ompi_datatype_t *dtype, + struct ompi_op_t *op, + struct ompi_communicator_t *comm, + mca_coll_base_module_t *module) +{ + mca_coll_sync_module_t *s = (mca_coll_sync_module_t*) module; + + if (s->in_operation) { + return s->c_coll.coll_scan(sbuf, rbuf, count, dtype, op, comm, + s->c_coll.coll_scan_module); + } else { + COLL_SYNC(s, s->c_coll.coll_scan(sbuf, rbuf, count, dtype, op, comm, + s->c_coll.coll_scan_module)); + } +} diff --git a/ompi/mca/coll/sync/coll_sync_scatter.c b/ompi/mca/coll/sync/coll_sync_scatter.c new file mode 100644 index 00000000000..3c093d86749 --- /dev/null +++ b/ompi/mca/coll/sync/coll_sync_scatter.c @@ -0,0 +1,50 @@ +/* + * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2005 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2009 Cisco Systems, Inc. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include "ompi_config.h" + +#include "coll_sync.h" + + +/* + * scatter + * + * Function: - scatter + * Accepts: - same arguments as MPI_Scatter() + * Returns: - MPI_SUCCESS or error code + */ +int mca_coll_sync_scatter(const void *sbuf, int scount, + struct ompi_datatype_t *sdtype, + void *rbuf, int rcount, + struct ompi_datatype_t *rdtype, + int root, struct ompi_communicator_t *comm, + mca_coll_base_module_t *module) +{ + mca_coll_sync_module_t *s = (mca_coll_sync_module_t*) module; + + if (s->in_operation) { + return s->c_coll.coll_scatter(sbuf, scount, sdtype, + rbuf, rcount, rdtype, root, comm, + s->c_coll.coll_scatter_module); + } else { + COLL_SYNC(s, s->c_coll.coll_scatter(sbuf, scount, sdtype, + rbuf, rcount, rdtype, root, comm, + s->c_coll.coll_scatter_module)); + } +} diff --git a/ompi/mca/coll/sync/coll_sync_scatterv.c b/ompi/mca/coll/sync/coll_sync_scatterv.c new file mode 100644 index 00000000000..fdb1737c94f --- /dev/null +++ b/ompi/mca/coll/sync/coll_sync_scatterv.c @@ -0,0 +1,50 @@ +/* + * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2005 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2009-2017 Cisco Systems, Inc. All rights reserved + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include "ompi_config.h" + +#include "coll_sync.h" + + +/* + * scatterv + * + * Function: - scatterv + * Accepts: - same arguments as MPI_Scatterv() + * Returns: - MPI_SUCCESS or error code + */ +int mca_coll_sync_scatterv(const void *sbuf, const int *scounts, + const int *disps, struct ompi_datatype_t *sdtype, + void *rbuf, int rcount, + struct ompi_datatype_t *rdtype, int root, + struct ompi_communicator_t *comm, + mca_coll_base_module_t *module) +{ + mca_coll_sync_module_t *s = (mca_coll_sync_module_t*) module; + + if (s->in_operation) { + return s->c_coll.coll_scatterv(sbuf, scounts, disps, sdtype, + rbuf, rcount, rdtype, root, comm, + s->c_coll.coll_scatterv_module); + } else { + COLL_SYNC(s, s->c_coll.coll_scatterv(sbuf, scounts, disps, sdtype, + rbuf, rcount, rdtype, root, comm, + s->c_coll.coll_scatterv_module)); + } +} diff --git a/ompi/mca/coll/sync/help-coll-sync.txt b/ompi/mca/coll/sync/help-coll-sync.txt new file mode 100644 index 00000000000..4a5c871207e --- /dev/null +++ b/ompi/mca/coll/sync/help-coll-sync.txt @@ -0,0 +1,22 @@ +# -*- text -*- +# +# Copyright (c) 2009 Cisco Systems, Inc. All rights reserved. +# $COPYRIGHT$ +# +# Additional copyrights may follow +# +# $HEADER$ +# +# This is the US/English general help file for Open MPI's sync +# collective component. +# +[missing collective] +The sync collective component in Open MPI was activated on a +communicator where it did not find an underlying collective operation +defined. This usually means that the sync collective module's +priority was not set high enough. Please try increasing sync's +priority. + + Local host: %s + Sync coll module priority: %d + First discovered missing collective: %s diff --git a/ompi/mca/coll/sync/owner.txt b/ompi/mca/coll/sync/owner.txt new file mode 100644 index 00000000000..f6e2c96b062 --- /dev/null +++ b/ompi/mca/coll/sync/owner.txt @@ -0,0 +1,7 @@ +# +# owner/status file +# owner: institution that is responsible for this package +# status: e.g. active, maintenance, unmaintained +# +owner: Intel +status: maintenance diff --git a/ompi/mca/coll/tuned/Makefile.am b/ompi/mca/coll/tuned/Makefile.am index f183f37a878..cc426671c5d 100644 --- a/ompi/mca/coll/tuned/Makefile.am +++ b/ompi/mca/coll/tuned/Makefile.am @@ -5,15 +5,15 @@ # Copyright (c) 2004-2015 The University of Tennessee and The University # of Tennessee Research Foundation. All rights # reserved. -# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, # University of Stuttgart. All rights reserved. # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. # Copyright (c) 2010 Cisco Systems, Inc. All rights reserved. # $COPYRIGHT$ -# +# # Additional copyrights may follow -# +# # $HEADER$ # diff --git a/ompi/mca/coll/tuned/coll_tuned.h b/ompi/mca/coll/tuned/coll_tuned.h index 51edda0421d..092056839de 100644 --- a/ompi/mca/coll/tuned/coll_tuned.h +++ b/ompi/mca/coll/tuned/coll_tuned.h @@ -3,10 +3,12 @@ * Copyright (c) 2004-2015 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -19,6 +21,7 @@ #include "ompi/mca/mca.h" #include "ompi/request/request.h" #include "ompi/mca/coll/base/coll_base_functions.h" +#include "opal/util/output.h" /* also need the dynamic rule structures */ #include "coll_tuned_dynamic_rules.h" @@ -81,9 +84,9 @@ ompi_coll_tuned_comm_query(struct ompi_communicator_t *comm, int *priority); /* API functions of decision functions and any implementations */ /* - * Note this gets long as we have to have a prototype for each + * Note this gets long as we have to have a prototype for each * MPI collective 4 times.. 2 for the comm type and 2 for each decision - * type. + * type. * we might cut down the decision prototypes by conditional compiling */ @@ -93,8 +96,6 @@ int ompi_coll_tuned_allgather_intra_dec_dynamic(ALLGATHER_ARGS); int ompi_coll_tuned_allgather_intra_do_forced(ALLGATHER_ARGS); int ompi_coll_tuned_allgather_intra_do_this(ALLGATHER_ARGS, int algorithm, int faninout, int segsize); int ompi_coll_tuned_allgather_intra_check_forced_init(coll_tuned_force_algorithm_mca_param_indices_t *mca_param_indices); -int ompi_coll_tuned_allgather_inter_dec_fixed(ALLGATHER_ARGS); -int ompi_coll_tuned_allgather_inter_dec_dynamic(ALLGATHER_ARGS); /* All GatherV */ int ompi_coll_tuned_allgatherv_intra_dec_fixed(ALLGATHERV_ARGS); @@ -102,8 +103,6 @@ int ompi_coll_tuned_allgatherv_intra_dec_dynamic(ALLGATHERV_ARGS); int ompi_coll_tuned_allgatherv_intra_do_forced(ALLGATHERV_ARGS); int ompi_coll_tuned_allgatherv_intra_do_this(ALLGATHERV_ARGS, int algorithm, int faninout, int segsize); int ompi_coll_tuned_allgatherv_intra_check_forced_init(coll_tuned_force_algorithm_mca_param_indices_t *mca_param_indices); -int ompi_coll_tuned_allgatherv_inter_dec_fixed(ALLGATHERV_ARGS); -int ompi_coll_tuned_allgatherv_inter_dec_dynamic(ALLGATHERV_ARGS); /* All Reduce */ int ompi_coll_tuned_allreduce_intra_dec_fixed(ALLREDUCE_ARGS); @@ -111,8 +110,6 @@ int ompi_coll_tuned_allreduce_intra_dec_dynamic(ALLREDUCE_ARGS); int ompi_coll_tuned_allreduce_intra_do_forced(ALLREDUCE_ARGS); int ompi_coll_tuned_allreduce_intra_do_this(ALLREDUCE_ARGS, int algorithm, int faninout, int segsize); int ompi_coll_tuned_allreduce_intra_check_forced_init (coll_tuned_force_algorithm_mca_param_indices_t *mca_param_indices); -int ompi_coll_tuned_allreduce_inter_dec_fixed(ALLREDUCE_ARGS); -int ompi_coll_tuned_allreduce_inter_dec_dynamic(ALLREDUCE_ARGS); /* AlltoAll */ int ompi_coll_tuned_alltoall_intra_dec_fixed(ALLTOALL_ARGS); @@ -120,8 +117,6 @@ int ompi_coll_tuned_alltoall_intra_dec_dynamic(ALLTOALL_ARGS); int ompi_coll_tuned_alltoall_intra_do_forced(ALLTOALL_ARGS); int ompi_coll_tuned_alltoall_intra_do_this(ALLTOALL_ARGS, int algorithm, int faninout, int segsize, int max_requests); int ompi_coll_tuned_alltoall_intra_check_forced_init (coll_tuned_force_algorithm_mca_param_indices_t *mca_param_indices); -int ompi_coll_tuned_alltoall_inter_dec_fixed(ALLTOALL_ARGS); -int ompi_coll_tuned_alltoall_inter_dec_dynamic(ALLTOALL_ARGS); /* AlltoAllV */ int ompi_coll_tuned_alltoallv_intra_dec_fixed(ALLTOALLV_ARGS); @@ -129,14 +124,6 @@ int ompi_coll_tuned_alltoallv_intra_dec_dynamic(ALLTOALLV_ARGS); int ompi_coll_tuned_alltoallv_intra_do_forced(ALLTOALLV_ARGS); int ompi_coll_tuned_alltoallv_intra_do_this(ALLTOALLV_ARGS, int algorithm); int ompi_coll_tuned_alltoallv_intra_check_forced_init(coll_tuned_force_algorithm_mca_param_indices_t *mca_param_indices); -int ompi_coll_tuned_alltoallv_inter_dec_fixed(ALLTOALLV_ARGS); -int ompi_coll_tuned_alltoallv_inter_dec_dynamic(ALLTOALLV_ARGS); - -/* AlltoAllW */ -int ompi_coll_tuned_alltoallw_intra_dec_fixed(ALLTOALLW_ARGS); -int ompi_coll_tuned_alltoallw_intra_dec_dynamic(ALLTOALLW_ARGS); -int ompi_coll_tuned_alltoallw_inter_dec_fixed(ALLTOALLW_ARGS); -int ompi_coll_tuned_alltoallw_inter_dec_dynamic(ALLTOALLW_ARGS); /* Barrier */ int ompi_coll_tuned_barrier_intra_dec_fixed(BARRIER_ARGS); @@ -144,24 +131,13 @@ int ompi_coll_tuned_barrier_intra_dec_dynamic(BARRIER_ARGS); int ompi_coll_tuned_barrier_intra_do_forced(BARRIER_ARGS); int ompi_coll_tuned_barrier_intra_do_this(BARRIER_ARGS, int algorithm, int faninout, int segsize); int ompi_coll_tuned_barrier_intra_check_forced_init (coll_tuned_force_algorithm_mca_param_indices_t *mca_param_indices); -int ompi_coll_tuned_barrier_inter_dec_fixed(BARRIER_ARGS); -int ompi_coll_tuned_barrier_inter_dec_dynamic(BARRIER_ARGS); /* Bcast */ -int ompi_coll_tuned_bcast_intra_generic( BCAST_ARGS, uint32_t count_by_segment, ompi_coll_tree_t* tree ); int ompi_coll_tuned_bcast_intra_dec_fixed(BCAST_ARGS); int ompi_coll_tuned_bcast_intra_dec_dynamic(BCAST_ARGS); int ompi_coll_tuned_bcast_intra_do_forced(BCAST_ARGS); int ompi_coll_tuned_bcast_intra_do_this(BCAST_ARGS, int algorithm, int faninout, int segsize); int ompi_coll_tuned_bcast_intra_check_forced_init (coll_tuned_force_algorithm_mca_param_indices_t *mca_param_indices); -int ompi_coll_tuned_bcast_inter_dec_fixed(BCAST_ARGS); -int ompi_coll_tuned_bcast_inter_dec_dynamic(BCAST_ARGS); - -/* Exscan */ -int ompi_coll_tuned_exscan_intra_dec_fixed(EXSCAN_ARGS); -int ompi_coll_tuned_exscan_intra_dec_dynamic(EXSCAN_ARGS); -int ompi_coll_tuned_exscan_inter_dec_fixed(EXSCAN_ARGS); -int ompi_coll_tuned_exscan_inter_dec_dynamic(EXSCAN_ARGS); /* Gather */ int ompi_coll_tuned_gather_intra_dec_fixed(GATHER_ARGS); @@ -169,24 +145,13 @@ int ompi_coll_tuned_gather_intra_dec_dynamic(GATHER_ARGS); int ompi_coll_tuned_gather_intra_do_forced(GATHER_ARGS); int ompi_coll_tuned_gather_intra_do_this(GATHER_ARGS, int algorithm, int faninout, int segsize); int ompi_coll_tuned_gather_intra_check_forced_init (coll_tuned_force_algorithm_mca_param_indices_t *mca_param_indices); -int ompi_coll_tuned_gather_inter_dec_fixed(GATHER_ARGS); -int ompi_coll_tuned_gather_inter_dec_dynamic(GATHER_ARGS); - -/* GatherV */ -int ompi_coll_tuned_gatherv_intra_dec_fixed(GATHERV_ARGS); -int ompi_coll_tuned_gatherv_intra_dec_dynamic(GATHER_ARGS); -int ompi_coll_tuned_gatherv_inter_dec_fixed(GATHER_ARGS); -int ompi_coll_tuned_gatherv_inter_dec_dynamic(GATHER_ARGS); /* Reduce */ -int ompi_coll_tuned_reduce_generic( REDUCE_ARGS, ompi_coll_tree_t* tree, int count_by_segment, int max_outstanding_reqs ); int ompi_coll_tuned_reduce_intra_dec_fixed(REDUCE_ARGS); int ompi_coll_tuned_reduce_intra_dec_dynamic(REDUCE_ARGS); int ompi_coll_tuned_reduce_intra_do_forced(REDUCE_ARGS); int ompi_coll_tuned_reduce_intra_do_this(REDUCE_ARGS, int algorithm, int faninout, int segsize, int max_oustanding_reqs); int ompi_coll_tuned_reduce_intra_check_forced_init (coll_tuned_force_algorithm_mca_param_indices_t *mca_param_indices); -int ompi_coll_tuned_reduce_inter_dec_fixed(REDUCE_ARGS); -int ompi_coll_tuned_reduce_inter_dec_dynamic(REDUCE_ARGS); /* Reduce_scatter */ int ompi_coll_tuned_reduce_scatter_intra_dec_fixed(REDUCESCATTER_ARGS); @@ -194,14 +159,6 @@ int ompi_coll_tuned_reduce_scatter_intra_dec_dynamic(REDUCESCATTER_ARGS); int ompi_coll_tuned_reduce_scatter_intra_do_forced(REDUCESCATTER_ARGS); int ompi_coll_tuned_reduce_scatter_intra_do_this(REDUCESCATTER_ARGS, int algorithm, int faninout, int segsize); int ompi_coll_tuned_reduce_scatter_intra_check_forced_init (coll_tuned_force_algorithm_mca_param_indices_t *mca_param_indices); -int ompi_coll_tuned_reduce_scatter_inter_dec_fixed(REDUCESCATTER_ARGS); -int ompi_coll_tuned_reduce_scatter_inter_dec_dynamic(REDUCESCATTER_ARGS); - -/* Scan */ -int ompi_coll_tuned_scan_intra_dec_fixed(SCAN_ARGS); -int ompi_coll_tuned_scan_intra_dec_dynamic(SCAN_ARGS); -int ompi_coll_tuned_scan_inter_dec_fixed(SCAN_ARGS); -int ompi_coll_tuned_scan_inter_dec_dynamic(SCAN_ARGS); /* Scatter */ int ompi_coll_tuned_scatter_intra_dec_fixed(SCATTER_ARGS); @@ -209,28 +166,20 @@ int ompi_coll_tuned_scatter_intra_dec_dynamic(SCATTER_ARGS); int ompi_coll_tuned_scatter_intra_do_forced(SCATTER_ARGS); int ompi_coll_tuned_scatter_intra_do_this(SCATTER_ARGS, int algorithm, int faninout, int segsize); int ompi_coll_tuned_scatter_intra_check_forced_init (coll_tuned_force_algorithm_mca_param_indices_t *mca_param_indices); -int ompi_coll_tuned_scatter_inter_dec_fixed(SCATTER_ARGS); -int ompi_coll_tuned_scatter_inter_dec_dynamic(SCATTER_ARGS); - -/* ScatterV */ -int ompi_coll_tuned_scatterv_intra_dec_fixed(SCATTERV_ARGS); -int ompi_coll_tuned_scatterv_intra_dec_dynamic(SCATTERV_ARGS); -int ompi_coll_tuned_scatterv_inter_dec_fixed(SCATTERV_ARGS); -int ompi_coll_tuned_scatterv_inter_dec_dynamic(SCATTERV_ARGS); int mca_coll_tuned_ft_event(int state); struct mca_coll_tuned_component_t { - /** Base coll component */ + /** Base coll component */ mca_coll_base_component_2_0_0_t super; - + /** MCA parameter: Priority of this component */ int tuned_priority; - + /** global stuff that I need the component to store */ - + /* MCA parameters first */ - + /* cached decision table stuff (moved from MCW module) */ ompi_coll_alg_rule_t *all_base_rules; }; diff --git a/ompi/mca/coll/tuned/coll_tuned_allgather_decision.c b/ompi/mca/coll/tuned/coll_tuned_allgather_decision.c index d34283a2e18..21b4026ac12 100644 --- a/ompi/mca/coll/tuned/coll_tuned_allgather_decision.c +++ b/ompi/mca/coll/tuned/coll_tuned_allgather_decision.c @@ -2,6 +2,8 @@ * Copyright (c) 2004-2015 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -118,7 +120,7 @@ ompi_coll_tuned_allgather_intra_check_forced_init(coll_tuned_force_algorithm_mca return (MPI_SUCCESS); } -int ompi_coll_tuned_allgather_intra_do_forced(void *sbuf, int scount, +int ompi_coll_tuned_allgather_intra_do_forced(const void *sbuf, int scount, struct ompi_datatype_t *sdtype, void* rbuf, int rcount, struct ompi_datatype_t *rdtype, @@ -169,7 +171,7 @@ int ompi_coll_tuned_allgather_intra_do_forced(void *sbuf, int scount, } -int ompi_coll_tuned_allgather_intra_do_this(void *sbuf, int scount, +int ompi_coll_tuned_allgather_intra_do_this(const void *sbuf, int scount, struct ompi_datatype_t *sdtype, void* rbuf, int rcount, struct ompi_datatype_t *rdtype, diff --git a/ompi/mca/coll/tuned/coll_tuned_allgatherv_decision.c b/ompi/mca/coll/tuned/coll_tuned_allgatherv_decision.c index 6a8a2677fa2..7d68498eb2c 100644 --- a/ompi/mca/coll/tuned/coll_tuned_allgatherv_decision.c +++ b/ompi/mca/coll/tuned/coll_tuned_allgatherv_decision.c @@ -3,6 +3,8 @@ * Copyright (c) 2004-2015 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -117,10 +119,10 @@ ompi_coll_tuned_allgatherv_intra_check_forced_init(coll_tuned_force_algorithm_mc return (MPI_SUCCESS); } -int ompi_coll_tuned_allgatherv_intra_do_forced(void *sbuf, int scount, +int ompi_coll_tuned_allgatherv_intra_do_forced(const void *sbuf, int scount, struct ompi_datatype_t *sdtype, - void *rbuf, int *rcounts, - int *rdispls, + void *rbuf, const int *rcounts, + const int *rdispls, struct ompi_datatype_t *rdtype, struct ompi_communicator_t *comm, mca_coll_base_module_t *module) @@ -165,10 +167,10 @@ int ompi_coll_tuned_allgatherv_intra_do_forced(void *sbuf, int scount, } -int ompi_coll_tuned_allgatherv_intra_do_this(void *sbuf, int scount, +int ompi_coll_tuned_allgatherv_intra_do_this(const void *sbuf, int scount, struct ompi_datatype_t *sdtype, - void *rbuf, int *rcounts, - int *rdispls, + void *rbuf, const int *rcounts, + const int *rdispls, struct ompi_datatype_t *rdtype, struct ompi_communicator_t *comm, mca_coll_base_module_t *module, diff --git a/ompi/mca/coll/tuned/coll_tuned_allreduce_decision.c b/ompi/mca/coll/tuned/coll_tuned_allreduce_decision.c index 712d21904a4..2fafe780058 100644 --- a/ompi/mca/coll/tuned/coll_tuned_allreduce_decision.c +++ b/ompi/mca/coll/tuned/coll_tuned_allreduce_decision.c @@ -120,7 +120,7 @@ int ompi_coll_tuned_allreduce_intra_check_forced_init (coll_tuned_force_algorith } -int ompi_coll_tuned_allreduce_intra_do_forced(void *sbuf, void *rbuf, int count, +int ompi_coll_tuned_allreduce_intra_do_forced(const void *sbuf, void *rbuf, int count, struct ompi_datatype_t *dtype, struct ompi_op_t *op, struct ompi_communicator_t *comm, @@ -153,7 +153,7 @@ int ompi_coll_tuned_allreduce_intra_do_forced(void *sbuf, void *rbuf, int count, } -int ompi_coll_tuned_allreduce_intra_do_this(void *sbuf, void *rbuf, int count, +int ompi_coll_tuned_allreduce_intra_do_this(const void *sbuf, void *rbuf, int count, struct ompi_datatype_t *dtype, struct ompi_op_t *op, struct ompi_communicator_t *comm, diff --git a/ompi/mca/coll/tuned/coll_tuned_alltoall_decision.c b/ompi/mca/coll/tuned/coll_tuned_alltoall_decision.c index 696bac31143..7db956164f9 100644 --- a/ompi/mca/coll/tuned/coll_tuned_alltoall_decision.c +++ b/ompi/mca/coll/tuned/coll_tuned_alltoall_decision.c @@ -3,6 +3,8 @@ * Copyright (c) 2004-2015 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -139,7 +141,7 @@ int ompi_coll_tuned_alltoall_intra_check_forced_init (coll_tuned_force_algorithm -int ompi_coll_tuned_alltoall_intra_do_forced(void *sbuf, int scount, +int ompi_coll_tuned_alltoall_intra_do_forced(const void *sbuf, int scount, struct ompi_datatype_t *sdtype, void* rbuf, int rcount, struct ompi_datatype_t *rdtype, @@ -172,7 +174,7 @@ int ompi_coll_tuned_alltoall_intra_do_forced(void *sbuf, int scount, } -int ompi_coll_tuned_alltoall_intra_do_this(void *sbuf, int scount, +int ompi_coll_tuned_alltoall_intra_do_this(const void *sbuf, int scount, struct ompi_datatype_t *sdtype, void* rbuf, int rcount, struct ompi_datatype_t *rdtype, diff --git a/ompi/mca/coll/tuned/coll_tuned_alltoallv_decision.c b/ompi/mca/coll/tuned/coll_tuned_alltoallv_decision.c index 254dc56ee0c..871aa1eceb4 100644 --- a/ompi/mca/coll/tuned/coll_tuned_alltoallv_decision.c +++ b/ompi/mca/coll/tuned/coll_tuned_alltoallv_decision.c @@ -86,9 +86,9 @@ int ompi_coll_tuned_alltoallv_intra_check_forced_init(coll_tuned_force_algorithm -int ompi_coll_tuned_alltoallv_intra_do_forced(void *sbuf, int *scounts, int *sdisps, +int ompi_coll_tuned_alltoallv_intra_do_forced(const void *sbuf, const int *scounts, const int *sdisps, struct ompi_datatype_t *sdtype, - void* rbuf, int *rcounts, int *rdisps, + void* rbuf, const int *rcounts, const int *rdisps, struct ompi_datatype_t *rdtype, struct ompi_communicator_t *comm, mca_coll_base_module_t *module) @@ -123,9 +123,9 @@ int ompi_coll_tuned_alltoallv_intra_do_forced(void *sbuf, int *scounts, int *sdi /* If the user selects dynamic rules and specifies the algorithm to * use, then this function is called. */ -int ompi_coll_tuned_alltoallv_intra_do_this(void *sbuf, int *scounts, int *sdisps, +int ompi_coll_tuned_alltoallv_intra_do_this(const void *sbuf, const int *scounts, const int *sdisps, struct ompi_datatype_t *sdtype, - void* rbuf, int *rcounts, int *rdisps, + void* rbuf, const int *rcounts, const int *rdisps, struct ompi_datatype_t *rdtype, struct ompi_communicator_t *comm, mca_coll_base_module_t *module, diff --git a/ompi/mca/coll/tuned/coll_tuned_component.c b/ompi/mca/coll/tuned/coll_tuned_component.c index 409e65c0b97..9756359ed6c 100644 --- a/ompi/mca/coll/tuned/coll_tuned_component.c +++ b/ompi/mca/coll/tuned/coll_tuned_component.c @@ -17,9 +17,9 @@ * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ * * These symbols are in a file by themselves to provide nice linker @@ -77,7 +77,7 @@ static int tuned_close(void); mca_coll_tuned_component_t mca_coll_tuned_component = { /* First, fill in the super */ - { + { /* First, the mca_component_t struct containing meta information about the component itself */ .collm_version = { @@ -108,13 +108,13 @@ mca_coll_tuned_component_t mca_coll_tuned_component = { 0, /* Tuned component specific information */ - NULL /* ompi_coll_alg_rule_t ptr */ + NULL /* ompi_coll_alg_rule_t ptr */ }; static int tuned_register(void) { - /* Use a low priority, but allow other components to be lower */ + /* Use a low priority, but allow other components to be lower */ ompi_coll_tuned_priority = 30; (void) mca_base_component_var_register(&mca_coll_tuned_component.super.collm_version, "priority", "Priority of the tuned coll component", @@ -218,13 +218,13 @@ static int tuned_open(void) /* this is useful for benchmarking and user knows best tuning */ /* as this is the component we only lookup the indicies of the mca params */ /* the actual values are looked up during comm create via module init */ - + /* intra functions first */ /* if dynamic rules allowed then look up dynamic rules config filename, else we leave it an empty filename (NULL) */ /* by default DISABLE dynamic rules and instead use fixed [if based] rules */ if (ompi_coll_tuned_use_dynamic_rules) { if( ompi_coll_tuned_dynamic_rules_filename ) { - OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:component_open Reading collective rules file [%s]", + OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:component_open Reading collective rules file [%s]", ompi_coll_tuned_dynamic_rules_filename)); rc = ompi_coll_tuned_read_rules_config_file( ompi_coll_tuned_dynamic_rules_filename, &(mca_coll_tuned_component.all_base_rules), COLLCOUNT); @@ -265,7 +265,7 @@ static void mca_coll_tuned_module_construct(mca_coll_tuned_module_t *module) { mca_coll_tuned_module_t *tuned_module = (mca_coll_tuned_module_t*) module; - + for( int i = 0; i < COLLCOUNT; i++ ) { tuned_module->user_forced[i].algorithm = 0; tuned_module->com_rules[i] = NULL; diff --git a/ompi/mca/coll/tuned/coll_tuned_decision_dynamic.c b/ompi/mca/coll/tuned/coll_tuned_decision_dynamic.c index cf4904598bd..3aa443b01e8 100644 --- a/ompi/mca/coll/tuned/coll_tuned_decision_dynamic.c +++ b/ompi/mca/coll/tuned/coll_tuned_decision_dynamic.c @@ -10,6 +10,8 @@ * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2008 Sun Microsystems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -48,7 +50,7 @@ * Returns: - MPI_SUCCESS or error code */ int -ompi_coll_tuned_allreduce_intra_dec_dynamic (void *sbuf, void *rbuf, int count, +ompi_coll_tuned_allreduce_intra_dec_dynamic (const void *sbuf, void *rbuf, int count, struct ompi_datatype_t *dtype, struct ompi_op_t *op, struct ompi_communicator_t *comm, @@ -94,7 +96,7 @@ ompi_coll_tuned_allreduce_intra_dec_dynamic (void *sbuf, void *rbuf, int count, * Returns: - MPI_SUCCESS or error code (passed from the bcast implementation) */ -int ompi_coll_tuned_alltoall_intra_dec_dynamic(void *sbuf, int scount, +int ompi_coll_tuned_alltoall_intra_dec_dynamic(const void *sbuf, int scount, struct ompi_datatype_t *sdtype, void* rbuf, int rcount, struct ompi_datatype_t *rdtype, @@ -144,9 +146,9 @@ int ompi_coll_tuned_alltoall_intra_dec_dynamic(void *sbuf, int scount, * Returns: - MPI_SUCCESS or error code */ -int ompi_coll_tuned_alltoallv_intra_dec_dynamic(void *sbuf, int *scounts, int *sdisps, +int ompi_coll_tuned_alltoallv_intra_dec_dynamic(const void *sbuf, const int *scounts, const int *sdisps, struct ompi_datatype_t *sdtype, - void* rbuf, int *rcounts, int *rdisps, + void* rbuf, const int *rcounts, const int *rdisps, struct ompi_datatype_t *rdtype, struct ompi_communicator_t *comm, mca_coll_base_module_t *module) @@ -274,7 +276,7 @@ int ompi_coll_tuned_bcast_intra_dec_dynamic(void *buff, int count, * Returns: - MPI_SUCCESS or error code (passed from the reduce implementation) * */ -int ompi_coll_tuned_reduce_intra_dec_dynamic( void *sendbuf, void *recvbuf, +int ompi_coll_tuned_reduce_intra_dec_dynamic( const void *sendbuf, void *recvbuf, int count, struct ompi_datatype_t* datatype, struct ompi_op_t* op, int root, struct ompi_communicator_t* comm, @@ -327,8 +329,8 @@ int ompi_coll_tuned_reduce_intra_dec_dynamic( void *sendbuf, void *recvbuf, * the reduce_scatter implementation) * */ -int ompi_coll_tuned_reduce_scatter_intra_dec_dynamic(void *sbuf, void *rbuf, - int *rcounts, +int ompi_coll_tuned_reduce_scatter_intra_dec_dynamic(const void *sbuf, void *rbuf, + const int *rcounts, struct ompi_datatype_t *dtype, struct ompi_op_t *op, struct ompi_communicator_t *comm, @@ -381,7 +383,7 @@ int ompi_coll_tuned_reduce_scatter_intra_dec_dynamic(void *sbuf, void *rbuf, * allgather function). */ -int ompi_coll_tuned_allgather_intra_dec_dynamic(void *sbuf, int scount, +int ompi_coll_tuned_allgather_intra_dec_dynamic(const void *sbuf, int scount, struct ompi_datatype_t *sdtype, void* rbuf, int rcount, struct ompi_datatype_t *rdtype, @@ -439,10 +441,10 @@ int ompi_coll_tuned_allgather_intra_dec_dynamic(void *sbuf, int scount, * allgatherv function). */ -int ompi_coll_tuned_allgatherv_intra_dec_dynamic(void *sbuf, int scount, +int ompi_coll_tuned_allgatherv_intra_dec_dynamic(const void *sbuf, int scount, struct ompi_datatype_t *sdtype, - void* rbuf, int *rcounts, - int *rdispls, + void* rbuf, const int *rcounts, + const int *rdispls, struct ompi_datatype_t *rdtype, struct ompi_communicator_t *comm, mca_coll_base_module_t *module) @@ -493,7 +495,7 @@ int ompi_coll_tuned_allgatherv_intra_dec_dynamic(void *sbuf, int scount, comm, module); } -int ompi_coll_tuned_gather_intra_dec_dynamic(void *sbuf, int scount, +int ompi_coll_tuned_gather_intra_dec_dynamic(const void *sbuf, int scount, struct ompi_datatype_t *sdtype, void* rbuf, int rcount, struct ompi_datatype_t *rdtype, @@ -540,7 +542,7 @@ int ompi_coll_tuned_gather_intra_dec_dynamic(void *sbuf, int scount, root, comm, module); } -int ompi_coll_tuned_scatter_intra_dec_dynamic(void *sbuf, int scount, +int ompi_coll_tuned_scatter_intra_dec_dynamic(const void *sbuf, int scount, struct ompi_datatype_t *sdtype, void* rbuf, int rcount, struct ompi_datatype_t *rdtype, diff --git a/ompi/mca/coll/tuned/coll_tuned_decision_fixed.c b/ompi/mca/coll/tuned/coll_tuned_decision_fixed.c index 57869ef4ebf..a80e1ff963e 100644 --- a/ompi/mca/coll/tuned/coll_tuned_decision_fixed.c +++ b/ompi/mca/coll/tuned/coll_tuned_decision_fixed.c @@ -13,6 +13,8 @@ * Copyright (c) 2008 Sun Microsystems, Inc. All rights reserved. * Copyright (c) 2013 Los Alamos National Security, LLC. All rights * reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -39,7 +41,7 @@ * Returns: - MPI_SUCCESS or error code */ int -ompi_coll_tuned_allreduce_intra_dec_fixed(void *sbuf, void *rbuf, int count, +ompi_coll_tuned_allreduce_intra_dec_fixed(const void *sbuf, void *rbuf, int count, struct ompi_datatype_t *dtype, struct ompi_op_t *op, struct ompi_communicator_t *comm, @@ -91,7 +93,7 @@ ompi_coll_tuned_allreduce_intra_dec_fixed(void *sbuf, void *rbuf, int count, * Returns: - MPI_SUCCESS or error code (passed from the bcast implementation) */ -int ompi_coll_tuned_alltoall_intra_dec_fixed(void *sbuf, int scount, +int ompi_coll_tuned_alltoall_intra_dec_fixed(const void *sbuf, int scount, struct ompi_datatype_t *sdtype, void* rbuf, int rcount, struct ompi_datatype_t *rdtype, @@ -161,9 +163,9 @@ int ompi_coll_tuned_alltoall_intra_dec_fixed(void *sbuf, int scount, * Accepts: - same arguments as MPI_Alltoallv() * Returns: - MPI_SUCCESS or error code */ -int ompi_coll_tuned_alltoallv_intra_dec_fixed(void *sbuf, int *scounts, int *sdisps, +int ompi_coll_tuned_alltoallv_intra_dec_fixed(const void *sbuf, const int *scounts, const int *sdisps, struct ompi_datatype_t *sdtype, - void *rbuf, int *rcounts, int *rdisps, + void *rbuf, const int *rcounts, const int *rdisps, struct ompi_datatype_t *rdtype, struct ompi_communicator_t *comm, mca_coll_base_module_t *module) @@ -338,7 +340,7 @@ int ompi_coll_tuned_bcast_intra_dec_fixed(void *buff, int count, * Returns: - MPI_SUCCESS or error code (passed from the reduce implementation) * */ -int ompi_coll_tuned_reduce_intra_dec_fixed( void *sendbuf, void *recvbuf, +int ompi_coll_tuned_reduce_intra_dec_fixed( const void *sendbuf, void *recvbuf, int count, struct ompi_datatype_t* datatype, struct ompi_op_t* op, int root, struct ompi_communicator_t* comm, @@ -450,8 +452,8 @@ int ompi_coll_tuned_reduce_intra_dec_fixed( void *sendbuf, void *recvbuf, * Returns: - MPI_SUCCESS or error code (passed from * the reduce scatter implementation) */ -int ompi_coll_tuned_reduce_scatter_intra_dec_fixed( void *sbuf, void *rbuf, - int *rcounts, +int ompi_coll_tuned_reduce_scatter_intra_dec_fixed( const void *sbuf, void *rbuf, + const int *rcounts, struct ompi_datatype_t *dtype, struct ompi_op_t *op, struct ompi_communicator_t *comm, @@ -507,7 +509,7 @@ int ompi_coll_tuned_reduce_scatter_intra_dec_fixed( void *sbuf, void *rbuf, * internal allgather function. */ -int ompi_coll_tuned_allgather_intra_dec_fixed(void *sbuf, int scount, +int ompi_coll_tuned_allgather_intra_dec_fixed(const void *sbuf, int scount, struct ompi_datatype_t *sdtype, void* rbuf, int rcount, struct ompi_datatype_t *rdtype, @@ -600,10 +602,10 @@ int ompi_coll_tuned_allgather_intra_dec_fixed(void *sbuf, int scount, * internal allgatherv function. */ -int ompi_coll_tuned_allgatherv_intra_dec_fixed(void *sbuf, int scount, +int ompi_coll_tuned_allgatherv_intra_dec_fixed(const void *sbuf, int scount, struct ompi_datatype_t *sdtype, - void* rbuf, int *rcounts, - int *rdispls, + void* rbuf, const int *rcounts, + const int *rdispls, struct ompi_datatype_t *rdtype, struct ompi_communicator_t *comm, mca_coll_base_module_t *module) @@ -660,7 +662,7 @@ int ompi_coll_tuned_allgatherv_intra_dec_fixed(void *sbuf, int scount, * internal allgather function. */ -int ompi_coll_tuned_gather_intra_dec_fixed(void *sbuf, int scount, +int ompi_coll_tuned_gather_intra_dec_fixed(const void *sbuf, int scount, struct ompi_datatype_t *sdtype, void* rbuf, int rcount, struct ompi_datatype_t *rdtype, @@ -730,7 +732,7 @@ int ompi_coll_tuned_gather_intra_dec_fixed(void *sbuf, int scount, * internal allgather function. */ -int ompi_coll_tuned_scatter_intra_dec_fixed(void *sbuf, int scount, +int ompi_coll_tuned_scatter_intra_dec_fixed(const void *sbuf, int scount, struct ompi_datatype_t *sdtype, void* rbuf, int rcount, struct ompi_datatype_t *rdtype, diff --git a/ompi/mca/coll/tuned/coll_tuned_dynamic_file.h b/ompi/mca/coll/tuned/coll_tuned_dynamic_file.h index 597321c325b..595e436fa49 100644 --- a/ompi/mca/coll/tuned/coll_tuned_dynamic_file.h +++ b/ompi/mca/coll/tuned/coll_tuned_dynamic_file.h @@ -5,14 +5,14 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ diff --git a/ompi/mca/coll/tuned/coll_tuned_dynamic_rules.h b/ompi/mca/coll/tuned/coll_tuned_dynamic_rules.h index d319e70adc0..7e8f672d21d 100644 --- a/ompi/mca/coll/tuned/coll_tuned_dynamic_rules.h +++ b/ompi/mca/coll/tuned/coll_tuned_dynamic_rules.h @@ -6,15 +6,15 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2011-2012 FUJITSU LIMITED. All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -41,7 +41,7 @@ typedef struct msg_rule_s { /* RESULT */ int result_alg; /* result algorithm to use */ int result_topo_faninout; /* result topology fan in/out to use (if applicable) */ - long result_segsize; /* result segment size to use */ + long result_segsize; /* result segment size to use */ int result_max_requests; /* maximum number of outstanding requests (if applicable) */ } ompi_coll_msg_rule_t; @@ -94,8 +94,8 @@ int ompi_coll_tuned_free_all_rules (ompi_coll_alg_rule_t* alg_p, int n_algs); ompi_coll_com_rule_t* ompi_coll_tuned_get_com_rule_ptr (ompi_coll_alg_rule_t* rules, int alg_id, int mpi_comsize); -int ompi_coll_tuned_get_target_method_params (ompi_coll_com_rule_t* base_com_rule, size_t mpi_msgsize, - int* result_topo_faninout, int* result_segsize, +int ompi_coll_tuned_get_target_method_params (ompi_coll_com_rule_t* base_com_rule, size_t mpi_msgsize, + int* result_topo_faninout, int* result_segsize, int* max_requests); diff --git a/ompi/mca/coll/tuned/coll_tuned_gather_decision.c b/ompi/mca/coll/tuned/coll_tuned_gather_decision.c index 3d7ad6371f6..09da5f1a070 100644 --- a/ompi/mca/coll/tuned/coll_tuned_gather_decision.c +++ b/ompi/mca/coll/tuned/coll_tuned_gather_decision.c @@ -3,6 +3,8 @@ * Copyright (c) 2004-2015 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -118,7 +120,7 @@ ompi_coll_tuned_gather_intra_check_forced_init(coll_tuned_force_algorithm_mca_pa } int -ompi_coll_tuned_gather_intra_do_forced(void *sbuf, int scount, +ompi_coll_tuned_gather_intra_do_forced(const void *sbuf, int scount, struct ompi_datatype_t *sdtype, void* rbuf, int rcount, struct ompi_datatype_t *rdtype, @@ -159,7 +161,7 @@ ompi_coll_tuned_gather_intra_do_forced(void *sbuf, int scount, } int -ompi_coll_tuned_gather_intra_do_this(void *sbuf, int scount, +ompi_coll_tuned_gather_intra_do_this(const void *sbuf, int scount, struct ompi_datatype_t *sdtype, void* rbuf, int rcount, struct ompi_datatype_t *rdtype, diff --git a/ompi/mca/coll/tuned/coll_tuned_module.c b/ompi/mca/coll/tuned/coll_tuned_module.c index 0469a62b7db..a21e4ca8d63 100644 --- a/ompi/mca/coll/tuned/coll_tuned_module.c +++ b/ompi/mca/coll/tuned/coll_tuned_module.c @@ -10,6 +10,7 @@ * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2008 Sun Microsystems, Inc. All rights reserved. + * Copyright (c) 2015 Los Alamos National Security, LLC. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -86,7 +87,7 @@ ompi_coll_tuned_comm_query(struct ompi_communicator_t *comm, int *priority) * but this would probably add an extra if and funct call to the path */ tuned_module->super.coll_module_enable = tuned_module_enable; - tuned_module->super.ft_event = mca_coll_tuned_ft_event; + tuned_module->super.ft_event = NULL; /* By default stick with the fied version of the tuned collectives. Later on, * when the module get enabled, set the correct version based on the availability @@ -200,7 +201,7 @@ tuned_module_enable( mca_coll_base_module_t *module, * The default is set very high */ - /* if we within the memory/size limit, allow preallocated data */ + /* prepare the placeholder for the array of request* */ data = OBJ_NEW(mca_coll_base_comm_t); if (NULL == data) { return OMPI_ERROR; @@ -279,22 +280,3 @@ tuned_module_enable( mca_coll_base_module_t *module, return OMPI_SUCCESS; } -int mca_coll_tuned_ft_event(int state) { - if(OPAL_CRS_CHECKPOINT == state) { - ; - } - else if(OPAL_CRS_CONTINUE == state) { - ; - } - else if(OPAL_CRS_RESTART == state) { - ; - } - else if(OPAL_CRS_TERM == state ) { - ; - } - else { - ; - } - - return OMPI_SUCCESS; -} diff --git a/ompi/mca/coll/tuned/coll_tuned_reduce_decision.c b/ompi/mca/coll/tuned/coll_tuned_reduce_decision.c index f1f6aed8e1c..bae6ebef6f2 100644 --- a/ompi/mca/coll/tuned/coll_tuned_reduce_decision.c +++ b/ompi/mca/coll/tuned/coll_tuned_reduce_decision.c @@ -3,6 +3,8 @@ * Copyright (c) 2004-2015 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -141,7 +143,7 @@ int ompi_coll_tuned_reduce_intra_check_forced_init (coll_tuned_force_algorithm_m } -int ompi_coll_tuned_reduce_intra_do_forced(void *sbuf, void* rbuf, int count, +int ompi_coll_tuned_reduce_intra_do_forced(const void *sbuf, void* rbuf, int count, struct ompi_datatype_t *dtype, struct ompi_op_t *op, int root, struct ompi_communicator_t *comm, @@ -184,7 +186,7 @@ int ompi_coll_tuned_reduce_intra_do_forced(void *sbuf, void* rbuf, int count, } -int ompi_coll_tuned_reduce_intra_do_this(void *sbuf, void* rbuf, int count, +int ompi_coll_tuned_reduce_intra_do_this(const void *sbuf, void* rbuf, int count, struct ompi_datatype_t *dtype, struct ompi_op_t *op, int root, struct ompi_communicator_t *comm, diff --git a/ompi/mca/coll/tuned/coll_tuned_reduce_scatter_decision.c b/ompi/mca/coll/tuned/coll_tuned_reduce_scatter_decision.c index 0cb9e05f890..33b31be1c43 100644 --- a/ompi/mca/coll/tuned/coll_tuned_reduce_scatter_decision.c +++ b/ompi/mca/coll/tuned/coll_tuned_reduce_scatter_decision.c @@ -3,6 +3,8 @@ * Copyright (c) 2004-2015 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -118,8 +120,8 @@ int ompi_coll_tuned_reduce_scatter_intra_check_forced_init (coll_tuned_force_alg } -int ompi_coll_tuned_reduce_scatter_intra_do_forced(void *sbuf, void* rbuf, - int *rcounts, +int ompi_coll_tuned_reduce_scatter_intra_do_forced(const void *sbuf, void* rbuf, + const int *rcounts, struct ompi_datatype_t *dtype, struct ompi_op_t *op, struct ompi_communicator_t *comm, @@ -146,8 +148,8 @@ int ompi_coll_tuned_reduce_scatter_intra_do_forced(void *sbuf, void* rbuf, } -int ompi_coll_tuned_reduce_scatter_intra_do_this(void *sbuf, void* rbuf, - int *rcounts, +int ompi_coll_tuned_reduce_scatter_intra_do_this(const void *sbuf, void* rbuf, + const int *rcounts, struct ompi_datatype_t *dtype, struct ompi_op_t *op, struct ompi_communicator_t *comm, diff --git a/ompi/mca/coll/tuned/coll_tuned_scatter_decision.c b/ompi/mca/coll/tuned/coll_tuned_scatter_decision.c index b0ff93192bf..6bfec329df3 100644 --- a/ompi/mca/coll/tuned/coll_tuned_scatter_decision.c +++ b/ompi/mca/coll/tuned/coll_tuned_scatter_decision.c @@ -3,6 +3,8 @@ * Copyright (c) 2004-2015 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -116,7 +118,7 @@ ompi_coll_tuned_scatter_intra_check_forced_init(coll_tuned_force_algorithm_mca_p } int -ompi_coll_tuned_scatter_intra_do_forced(void *sbuf, int scount, +ompi_coll_tuned_scatter_intra_do_forced(const void *sbuf, int scount, struct ompi_datatype_t *sdtype, void* rbuf, int rcount, struct ompi_datatype_t *rdtype, @@ -151,7 +153,7 @@ ompi_coll_tuned_scatter_intra_do_forced(void *sbuf, int scount, } int -ompi_coll_tuned_scatter_intra_do_this(void *sbuf, int scount, +ompi_coll_tuned_scatter_intra_do_this(const void *sbuf, int scount, struct ompi_datatype_t *sdtype, void* rbuf, int rcount, struct ompi_datatype_t *rdtype, diff --git a/ompi/mca/common/Makefile.am b/ompi/mca/common/Makefile.am index 33bbb5f2a33..4567c654307 100644 --- a/ompi/mca/common/Makefile.am +++ b/ompi/mca/common/Makefile.am @@ -5,15 +5,15 @@ # Copyright (c) 2004-2005 The University of Tennessee and The University # of Tennessee Research Foundation. All rights # reserved. -# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, # University of Stuttgart. All rights reserved. # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. # Copyright (c) 2010 Cisco Systems, Inc. All rights reserved. # $COPYRIGHT$ -# +# # Additional copyrights may follow -# +# # $HEADER$ # diff --git a/ompi/mca/crcp/Makefile.am b/ompi/mca/crcp/Makefile.am deleted file mode 100644 index a4d26e758d8..00000000000 --- a/ompi/mca/crcp/Makefile.am +++ /dev/null @@ -1,50 +0,0 @@ -# -# Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana -# University Research and Technology -# Corporation. All rights reserved. -# Copyright (c) 2004-2005 The University of Tennessee and The University -# of Tennessee Research Foundation. All rights -# reserved. -# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, -# University of Stuttgart. All rights reserved. -# Copyright (c) 2004-2005 The Regents of the University of California. -# All rights reserved. -# Copyright (c) 2008 Sun Microsystems, Inc. All rights reserved. -# Copyright (c) 2008-2014 Cisco Systems, Inc. All rights reserved. -# $COPYRIGHT$ -# -# Additional copyrights may follow -# -# $HEADER$ -# - -include $(top_srcdir)/Makefile.ompi-rules - -# main library setup -noinst_LTLIBRARIES = libmca_crcp.la -libmca_crcp_la_SOURCES = - -# local files -headers = crcp.h -libmca_crcp_la_SOURCES += $(headers) - -# Manual pages -nodist_man_MANS = ompi_crcp.7 -EXTRA_DIST = $(nodist_man_MANS:.7=.7in) - -# Ensure that the man pages are rebuilt if the opal_config.h file -# changes; a "good enough" way to know if configure was run again (and -# therefore the release date or version may have changed) -$(nodist_man_MANS): $(top_builddir)/opal/include/opal_config.h - -# Conditionally install the header files -if WANT_INSTALL_HEADERS -ompidir = $(ompiincludedir)/$(subdir) -nobase_ompi_HEADERS = $(headers) -endif - -include base/Makefile.am - -distclean-local: - rm -f base/static-components.h - rm -f $(nodist_man_MANS) diff --git a/ompi/mca/crcp/base/Makefile.am b/ompi/mca/crcp/base/Makefile.am deleted file mode 100644 index 00f371c3019..00000000000 --- a/ompi/mca/crcp/base/Makefile.am +++ /dev/null @@ -1,27 +0,0 @@ -# -# Copyright (c) 2004-2010 The Trustees of Indiana University and Indiana -# University Research and Technology -# Corporation. All rights reserved. -# Copyright (c) 2004-2005 The University of Tennessee and The University -# of Tennessee Research Foundation. All rights -# reserved. -# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, -# University of Stuttgart. All rights reserved. -# Copyright (c) 2004-2005 The Regents of the University of California. -# All rights reserved. -# Copyright (c) 2014 Cisco Systems, Inc. All rights reserved. -# $COPYRIGHT$ -# -# Additional copyrights may follow -# -# $HEADER$ -# - -headers += \ - base/base.h - -libmca_crcp_la_SOURCES += \ - base/crcp_base_frame.c \ - base/crcp_base_select.c \ - base/crcp_base_fns.c - diff --git a/ompi/mca/crcp/base/base.h b/ompi/mca/crcp/base/base.h deleted file mode 100644 index 3372c002193..00000000000 --- a/ompi/mca/crcp/base/base.h +++ /dev/null @@ -1,198 +0,0 @@ -/* - * Copyright (c) 2004-2010 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2005 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ -#ifndef OMPI_CRCP_BASE_H -#define OMPI_CRCP_BASE_H - -#include "ompi_config.h" - -#include "opal/mca/base/base.h" -#include "ompi/constants.h" - -#include "ompi/mca/crcp/crcp.h" - -/* - * Global functions for MCA overall CRCP - */ - -BEGIN_C_DECLS - - /** - * Select an available component. - * - * @retval OMPI_SUCCESS Upon Success - * @retval OMPI_NOT_FOUND If no component can be selected - * @retval OMPI_ERROR Upon other failure - * - */ - OMPI_DECLSPEC int ompi_crcp_base_select(void); - - /** - * Quiesce Interface (For MPI Ext.) - */ - OMPI_DECLSPEC int ompi_crcp_base_quiesce_start(MPI_Info *info); - OMPI_DECLSPEC int ompi_crcp_base_quiesce_end(MPI_Info *info); - - /** - * 'None' component functions - * These are to be used when no component is selected. - * They just return success, and empty strings as necessary. - */ - int ompi_crcp_base_none_open(void); - int ompi_crcp_base_none_close(void); - int ompi_crcp_base_none_query(mca_base_module_t **module, int *priority); - - int ompi_crcp_base_module_init(void); - int ompi_crcp_base_module_finalize(void); - - /* Quiesce Interface */ - int ompi_crcp_base_none_quiesce_start(MPI_Info *info); - int ompi_crcp_base_none_quiesce_end(MPI_Info *info); - - /* PML Interface */ - ompi_crcp_base_pml_state_t* ompi_crcp_base_none_pml_enable( bool enable, ompi_crcp_base_pml_state_t* ); - - ompi_crcp_base_pml_state_t* ompi_crcp_base_none_pml_add_comm( struct ompi_communicator_t* comm, ompi_crcp_base_pml_state_t* ); - ompi_crcp_base_pml_state_t* ompi_crcp_base_none_pml_del_comm( struct ompi_communicator_t* comm, ompi_crcp_base_pml_state_t* ); - - ompi_crcp_base_pml_state_t* ompi_crcp_base_none_pml_add_procs( struct ompi_proc_t **procs, size_t nprocs, ompi_crcp_base_pml_state_t* ); - ompi_crcp_base_pml_state_t* ompi_crcp_base_none_pml_del_procs( struct ompi_proc_t **procs, size_t nprocs, ompi_crcp_base_pml_state_t* ); - - ompi_crcp_base_pml_state_t* ompi_crcp_base_none_pml_progress(ompi_crcp_base_pml_state_t*); - - ompi_crcp_base_pml_state_t* ompi_crcp_base_none_pml_iprobe(int dst, int tag, struct ompi_communicator_t* comm, int *matched, ompi_status_public_t* status, ompi_crcp_base_pml_state_t* ); - - ompi_crcp_base_pml_state_t* ompi_crcp_base_none_pml_probe( int dst, int tag, struct ompi_communicator_t* comm, ompi_status_public_t* status, ompi_crcp_base_pml_state_t* ); - - ompi_crcp_base_pml_state_t* ompi_crcp_base_none_pml_isend_init( void *buf, size_t count, ompi_datatype_t *datatype, int dst, int tag, - mca_pml_base_send_mode_t mode, struct ompi_communicator_t* comm, struct ompi_request_t **request, ompi_crcp_base_pml_state_t* ); - - ompi_crcp_base_pml_state_t* ompi_crcp_base_none_pml_isend( void *buf, size_t count, ompi_datatype_t *datatype, int dst, int tag, - mca_pml_base_send_mode_t mode, struct ompi_communicator_t* comm, struct ompi_request_t **request, ompi_crcp_base_pml_state_t* ); - - ompi_crcp_base_pml_state_t* ompi_crcp_base_none_pml_send( void *buf, size_t count, ompi_datatype_t *datatype, int dst, int tag, - mca_pml_base_send_mode_t mode, struct ompi_communicator_t* comm, ompi_crcp_base_pml_state_t* ); - - ompi_crcp_base_pml_state_t* ompi_crcp_base_none_pml_irecv_init( void *buf, size_t count, ompi_datatype_t *datatype, int src, int tag, - struct ompi_communicator_t* comm, struct ompi_request_t **request, ompi_crcp_base_pml_state_t*); - - ompi_crcp_base_pml_state_t* ompi_crcp_base_none_pml_irecv( void *buf, size_t count, ompi_datatype_t *datatype, int src, int tag, - struct ompi_communicator_t* comm, struct ompi_request_t **request, ompi_crcp_base_pml_state_t* ); - - ompi_crcp_base_pml_state_t* ompi_crcp_base_none_pml_recv( void *buf, size_t count, ompi_datatype_t *datatype, int src, int tag, - struct ompi_communicator_t* comm, ompi_status_public_t* status, ompi_crcp_base_pml_state_t*); - - ompi_crcp_base_pml_state_t* ompi_crcp_base_none_pml_dump( struct ompi_communicator_t* comm, int verbose, ompi_crcp_base_pml_state_t* ); - - ompi_crcp_base_pml_state_t* ompi_crcp_base_none_pml_start( size_t count, ompi_request_t** requests, ompi_crcp_base_pml_state_t* ); - - ompi_crcp_base_pml_state_t* ompi_crcp_base_none_pml_ft_event(int state, ompi_crcp_base_pml_state_t*); - - /* Request Interface */ - int ompi_crcp_base_none_request_complete( struct ompi_request_t *request ); - - /* BTL Interface */ - ompi_crcp_base_btl_state_t* - ompi_crcp_base_none_btl_add_procs( struct mca_btl_base_module_t* btl, - size_t nprocs, - struct ompi_proc_t** procs, - struct mca_btl_base_endpoint_t** endpoints, - struct opal_bitmap_t* reachable, - ompi_crcp_base_btl_state_t* ); - - ompi_crcp_base_btl_state_t* - ompi_crcp_base_none_btl_del_procs( struct mca_btl_base_module_t* btl, - size_t nprocs, - struct ompi_proc_t** procs, - struct mca_btl_base_endpoint_t**, - ompi_crcp_base_btl_state_t*); - - ompi_crcp_base_btl_state_t* - ompi_crcp_base_none_btl_register( struct mca_btl_base_module_t* btl, - mca_btl_base_tag_t tag, - mca_btl_base_module_recv_cb_fn_t cbfunc, - void* cbdata, - ompi_crcp_base_btl_state_t*); - - ompi_crcp_base_btl_state_t* - ompi_crcp_base_none_btl_finalize( struct mca_btl_base_module_t* btl, - ompi_crcp_base_btl_state_t*); - - ompi_crcp_base_btl_state_t* - ompi_crcp_base_none_btl_alloc( struct mca_btl_base_module_t* btl, - size_t size, - ompi_crcp_base_btl_state_t*); - - ompi_crcp_base_btl_state_t* - ompi_crcp_base_none_btl_free( struct mca_btl_base_module_t* btl, - mca_btl_base_descriptor_t* descriptor, - ompi_crcp_base_btl_state_t*); - - ompi_crcp_base_btl_state_t* - ompi_crcp_base_none_btl_prepare_src( struct mca_btl_base_module_t* btl, - struct mca_btl_base_endpoint_t* endpoint, - mca_mpool_base_registration_t* registration, - struct opal_convertor_t* convertor, - size_t reserve, - size_t* size, - ompi_crcp_base_btl_state_t*); - - ompi_crcp_base_btl_state_t* - ompi_crcp_base_none_btl_prepare_dst( struct mca_btl_base_module_t* btl, - struct mca_btl_base_endpoint_t* endpoint, - mca_mpool_base_registration_t* registration, - struct opal_convertor_t* convertor, - size_t reserve, - size_t* size, - ompi_crcp_base_btl_state_t*); - - ompi_crcp_base_btl_state_t* - ompi_crcp_base_none_btl_send( struct mca_btl_base_module_t* btl, - struct mca_btl_base_endpoint_t* endpoint, - struct mca_btl_base_descriptor_t* descriptor, - mca_btl_base_tag_t tag, - ompi_crcp_base_btl_state_t*); - - ompi_crcp_base_btl_state_t* - ompi_crcp_base_none_btl_put( struct mca_btl_base_module_t* btl, - struct mca_btl_base_endpoint_t* endpoint, - struct mca_btl_base_descriptor_t* descriptor, - ompi_crcp_base_btl_state_t*); - - ompi_crcp_base_btl_state_t* - ompi_crcp_base_none_btl_get( struct mca_btl_base_module_t* btl, - struct mca_btl_base_endpoint_t* endpoint, - struct mca_btl_base_descriptor_t* descriptor, - ompi_crcp_base_btl_state_t*); - - ompi_crcp_base_btl_state_t* - ompi_crcp_base_none_btl_dump( struct mca_btl_base_module_t* btl, - struct mca_btl_base_endpoint_t* endpoint, - int verbose, - ompi_crcp_base_btl_state_t*); - - ompi_crcp_base_btl_state_t* - ompi_crcp_base_none_btl_ft_event(int state, - ompi_crcp_base_btl_state_t*); - - OMPI_DECLSPEC extern mca_base_framework_t ompi_crcp_base_framework; - OMPI_DECLSPEC extern ompi_crcp_base_component_t ompi_crcp_base_selected_component; - OMPI_DECLSPEC extern ompi_crcp_base_module_t ompi_crcp; - -END_C_DECLS - -#endif /* OMPI_CRCP_BASE_H */ diff --git a/ompi/mca/crcp/base/crcp_base_fns.c b/ompi/mca/crcp/base/crcp_base_fns.c deleted file mode 100644 index 46f963049f3..00000000000 --- a/ompi/mca/crcp/base/crcp_base_fns.c +++ /dev/null @@ -1,436 +0,0 @@ -/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ -/* - * Copyright (c) 2004-2010 The Trustees of Indiana University. - * All rights reserved. - * Copyright (c) 2004-2005 The Trustees of the University of Tennessee. - * All rights reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * Copyright (c) 2015 Los Alamos National Security, LLC. All rights - * reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#include "ompi_config.h" - -#ifdef HAVE_SYS_TYPES_H -#include -#endif -#ifdef HAVE_UNISTD_H -#include -#endif -#include -#include - -#include "opal/class/opal_bitmap.h" -#include "ompi/mca/mca.h" -#include "opal/mca/base/base.h" -#include "opal/mca/base/base.h" -#include "opal/mca/crs/crs.h" -#include "opal/mca/crs/base/base.h" - -#include "ompi/communicator/communicator.h" -#include "ompi/proc/proc.h" -#include "ompi/mca/crcp/crcp.h" -#include "ompi/mca/crcp/base/base.h" -#include "ompi/mca/bml/base/base.h" -#include "ompi/info/info.h" -#include "ompi/mca/pml/pml.h" -#include "ompi/mca/pml/base/base.h" -#include "ompi/mca/pml/base/pml_base_request.h" - -/****************** - * Local Functions - ******************/ - -/****************** - * Object stuff - ******************/ -OBJ_CLASS_INSTANCE(ompi_crcp_base_pml_state_t, - opal_free_list_item_t, - NULL, - NULL - ); - -OBJ_CLASS_INSTANCE(ompi_crcp_base_btl_state_t, - opal_free_list_item_t, - NULL, - NULL - ); - -/*********************** - * None component stuff - ************************/ -int ompi_crcp_base_none_open(void) -{ - return OMPI_SUCCESS; -} - -int ompi_crcp_base_none_close(void) -{ - return OMPI_SUCCESS; -} - -int ompi_crcp_base_none_query(mca_base_module_t **module, int *priority) -{ - *module = NULL; - *priority = 0; - - return OPAL_SUCCESS; -} - -int ompi_crcp_base_module_init(void) -{ - return OMPI_SUCCESS; -} - -int ompi_crcp_base_module_finalize(void) -{ - return OMPI_SUCCESS; -} - -/**************** - * MPI Quiesce Interface - ****************/ -int ompi_crcp_base_none_quiesce_start(MPI_Info *info) -{ - return OMPI_SUCCESS; -} - -int ompi_crcp_base_none_quiesce_end(MPI_Info *info) -{ - return OMPI_SUCCESS; -} - -/**************** - * PML Wrapper - ****************/ -ompi_crcp_base_pml_state_t* ompi_crcp_base_none_pml_enable( bool enable, - ompi_crcp_base_pml_state_t* pml_state ) -{ - pml_state->error_code = OMPI_SUCCESS; - return pml_state; -} - -ompi_crcp_base_pml_state_t* ompi_crcp_base_none_pml_add_comm( struct ompi_communicator_t* comm, - ompi_crcp_base_pml_state_t* pml_state ) -{ - pml_state->error_code = OMPI_SUCCESS; - return pml_state; -} - -ompi_crcp_base_pml_state_t* ompi_crcp_base_none_pml_del_comm( struct ompi_communicator_t* comm, - ompi_crcp_base_pml_state_t* pml_state ) -{ - pml_state->error_code = OMPI_SUCCESS; - return pml_state; -} - -ompi_crcp_base_pml_state_t* ompi_crcp_base_none_pml_add_procs( struct ompi_proc_t **procs, - size_t nprocs, - ompi_crcp_base_pml_state_t* pml_state ) -{ - pml_state->error_code = OMPI_SUCCESS; - return pml_state; -} - -ompi_crcp_base_pml_state_t* ompi_crcp_base_none_pml_del_procs( struct ompi_proc_t **procs, - size_t nprocs, - ompi_crcp_base_pml_state_t* pml_state ) -{ - pml_state->error_code = OMPI_SUCCESS; - return pml_state; -} - -ompi_crcp_base_pml_state_t* ompi_crcp_base_none_pml_progress(ompi_crcp_base_pml_state_t* pml_state) -{ - pml_state->error_code = OMPI_SUCCESS; - return pml_state; -} - -ompi_crcp_base_pml_state_t* ompi_crcp_base_none_pml_iprobe(int dst, int tag, - struct ompi_communicator_t* comm, - int *matched, ompi_status_public_t* status, - ompi_crcp_base_pml_state_t* pml_state ) -{ - pml_state->error_code = OMPI_SUCCESS; - return pml_state; -} - -ompi_crcp_base_pml_state_t* ompi_crcp_base_none_pml_probe( int dst, int tag, - struct ompi_communicator_t* comm, - ompi_status_public_t* status, - ompi_crcp_base_pml_state_t* pml_state ) -{ - pml_state->error_code = OMPI_SUCCESS; - return pml_state; -} - -ompi_crcp_base_pml_state_t* ompi_crcp_base_none_pml_isend_init( void *buf, size_t count, - ompi_datatype_t *datatype, - int dst, int tag, - mca_pml_base_send_mode_t mode, - struct ompi_communicator_t* comm, - struct ompi_request_t **request, - ompi_crcp_base_pml_state_t* pml_state ) -{ - pml_state->error_code = OMPI_SUCCESS; - return pml_state; -} - -ompi_crcp_base_pml_state_t* ompi_crcp_base_none_pml_isend( void *buf, size_t count, - ompi_datatype_t *datatype, - int dst, int tag, - mca_pml_base_send_mode_t mode, - struct ompi_communicator_t* comm, - struct ompi_request_t **request, - ompi_crcp_base_pml_state_t* pml_state ) -{ - pml_state->error_code = OMPI_SUCCESS; - return pml_state; -} - -ompi_crcp_base_pml_state_t* ompi_crcp_base_none_pml_send( void *buf, size_t count, - ompi_datatype_t *datatype, - int dst, int tag, - mca_pml_base_send_mode_t mode, - struct ompi_communicator_t* comm, - ompi_crcp_base_pml_state_t* pml_state ) -{ - pml_state->error_code = OMPI_SUCCESS; - return pml_state; -} - -ompi_crcp_base_pml_state_t* ompi_crcp_base_none_pml_irecv_init( void *buf, size_t count, - ompi_datatype_t *datatype, - int src, int tag, - struct ompi_communicator_t* comm, - struct ompi_request_t **request, - ompi_crcp_base_pml_state_t* pml_state) -{ - pml_state->error_code = OMPI_SUCCESS; - return pml_state; -} - -ompi_crcp_base_pml_state_t* ompi_crcp_base_none_pml_irecv( void *buf, size_t count, - ompi_datatype_t *datatype, - int src, int tag, - struct ompi_communicator_t* comm, - struct ompi_request_t **request, - ompi_crcp_base_pml_state_t* pml_state ) -{ - pml_state->error_code = OMPI_SUCCESS; - return pml_state; -} - -ompi_crcp_base_pml_state_t* ompi_crcp_base_none_pml_recv( void *buf, size_t count, - ompi_datatype_t *datatype, - int src, int tag, - struct ompi_communicator_t* comm, - ompi_status_public_t* status, - ompi_crcp_base_pml_state_t* pml_state) -{ - pml_state->error_code = OMPI_SUCCESS; - return pml_state; -} - -ompi_crcp_base_pml_state_t* ompi_crcp_base_none_pml_dump( struct ompi_communicator_t* comm, - int verbose, - ompi_crcp_base_pml_state_t* pml_state ) -{ - pml_state->error_code = OMPI_SUCCESS; - return pml_state; -} - -ompi_crcp_base_pml_state_t* ompi_crcp_base_none_pml_start( size_t count, - ompi_request_t** requests, - ompi_crcp_base_pml_state_t* pml_state ) -{ - pml_state->error_code = OMPI_SUCCESS; - return pml_state; -} - -ompi_crcp_base_pml_state_t* ompi_crcp_base_none_pml_ft_event(int state, - ompi_crcp_base_pml_state_t* pml_state) -{ - pml_state->error_code = OMPI_SUCCESS; - return pml_state; -} - -/******************** - * Request Interface - ********************/ -int ompi_crcp_base_none_request_complete( struct ompi_request_t *request ) { - return OMPI_SUCCESS; -} - -/******************** - * BTL Interface - ********************/ -ompi_crcp_base_btl_state_t* -ompi_crcp_base_none_btl_add_procs( struct mca_btl_base_module_t* btl, - size_t nprocs, - struct ompi_proc_t** procs, - struct mca_btl_base_endpoint_t** endpoints, - struct opal_bitmap_t* reachable, - ompi_crcp_base_btl_state_t* btl_state) -{ - btl_state->error_code = OMPI_SUCCESS; - return btl_state; -} - -ompi_crcp_base_btl_state_t* -ompi_crcp_base_none_btl_del_procs( struct mca_btl_base_module_t* btl, - size_t nprocs, - struct ompi_proc_t** procs, - struct mca_btl_base_endpoint_t** endpoints, - ompi_crcp_base_btl_state_t* btl_state) -{ - btl_state->error_code = OMPI_SUCCESS; - return btl_state; -} - -ompi_crcp_base_btl_state_t* -ompi_crcp_base_none_btl_register( struct mca_btl_base_module_t* btl, - mca_btl_base_tag_t tag, - mca_btl_base_module_recv_cb_fn_t cbfunc, - void* cbdata, - ompi_crcp_base_btl_state_t* btl_state) -{ - btl_state->error_code = OMPI_SUCCESS; - return btl_state; -} - -ompi_crcp_base_btl_state_t* -ompi_crcp_base_none_btl_finalize( struct mca_btl_base_module_t* btl, - ompi_crcp_base_btl_state_t* btl_state) -{ - btl_state->error_code = OMPI_SUCCESS; - return btl_state; -} - -ompi_crcp_base_btl_state_t* -ompi_crcp_base_none_btl_alloc( struct mca_btl_base_module_t* btl, - size_t size, - ompi_crcp_base_btl_state_t* btl_state) -{ - btl_state->error_code = OMPI_SUCCESS; - return btl_state; -} - -ompi_crcp_base_btl_state_t* -ompi_crcp_base_none_btl_free( struct mca_btl_base_module_t* btl, - mca_btl_base_descriptor_t* descriptor, - ompi_crcp_base_btl_state_t* btl_state) -{ - btl_state->error_code = OMPI_SUCCESS; - return btl_state; -} - -ompi_crcp_base_btl_state_t* -ompi_crcp_base_none_btl_prepare_src( struct mca_btl_base_module_t* btl, - struct mca_btl_base_endpoint_t* endpoint, - mca_mpool_base_registration_t* registration, - struct opal_convertor_t* convertor, - size_t reserve, - size_t* size, - ompi_crcp_base_btl_state_t* btl_state) -{ - btl_state->error_code = OMPI_SUCCESS; - return btl_state; -} - -ompi_crcp_base_btl_state_t* -ompi_crcp_base_none_btl_prepare_dst( struct mca_btl_base_module_t* btl, - struct mca_btl_base_endpoint_t* endpoint, - mca_mpool_base_registration_t* registration, - struct opal_convertor_t* convertor, - size_t reserve, - size_t* size, - ompi_crcp_base_btl_state_t* btl_state) -{ - btl_state->error_code = OMPI_SUCCESS; - return btl_state; -} - -ompi_crcp_base_btl_state_t* -ompi_crcp_base_none_btl_send( struct mca_btl_base_module_t* btl, - struct mca_btl_base_endpoint_t* endpoint, - struct mca_btl_base_descriptor_t* descriptor, - mca_btl_base_tag_t tag, - ompi_crcp_base_btl_state_t* btl_state) -{ - btl_state->error_code = OMPI_SUCCESS; - return btl_state; -} - -ompi_crcp_base_btl_state_t* -ompi_crcp_base_none_btl_put( struct mca_btl_base_module_t* btl, - struct mca_btl_base_endpoint_t* endpoint, - struct mca_btl_base_descriptor_t* descriptor, - ompi_crcp_base_btl_state_t* btl_state) -{ - btl_state->error_code = OMPI_SUCCESS; - return btl_state; -} - -ompi_crcp_base_btl_state_t* -ompi_crcp_base_none_btl_get( struct mca_btl_base_module_t* btl, - struct mca_btl_base_endpoint_t* endpoint, - struct mca_btl_base_descriptor_t* descriptor, - ompi_crcp_base_btl_state_t* btl_state) -{ - btl_state->error_code = OMPI_SUCCESS; - return btl_state; -} - - -ompi_crcp_base_btl_state_t* -ompi_crcp_base_none_btl_dump( struct mca_btl_base_module_t* btl, - struct mca_btl_base_endpoint_t* endpoint, - int verbose, - ompi_crcp_base_btl_state_t* btl_state) -{ - btl_state->error_code = OMPI_SUCCESS; - return btl_state; -} - -ompi_crcp_base_btl_state_t* -ompi_crcp_base_none_btl_ft_event(int state, - ompi_crcp_base_btl_state_t* btl_state) -{ - btl_state->error_code = OMPI_SUCCESS; - return btl_state; -} - - -/******************** - * Utility functions - ********************/ - -/****************** - * MPI Interface Functions - ******************/ -int ompi_crcp_base_quiesce_start(MPI_Info *info) -{ - if( NULL != ompi_crcp.quiesce_start ) { - return ompi_crcp.quiesce_start(info); - } else { - return OMPI_SUCCESS; - } -} - -int ompi_crcp_base_quiesce_end(MPI_Info *info) -{ - if( NULL != ompi_crcp.quiesce_end ) { - return ompi_crcp.quiesce_end(info); - } else { - return OMPI_SUCCESS; - } -} diff --git a/ompi/mca/crcp/base/crcp_base_frame.c b/ompi/mca/crcp/base/crcp_base_frame.c deleted file mode 100644 index 87cc386424e..00000000000 --- a/ompi/mca/crcp/base/crcp_base_frame.c +++ /dev/null @@ -1,52 +0,0 @@ -/* - * Copyright (c) 2004-2007 The Trustees of Indiana University. - * All rights reserved. - * Copyright (c) 2004-2005 The Trustees of the University of Tennessee. - * All rights reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * Copyright (c) 2015 Research Organization for Information Science - * and Technology (RIST). All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#include "ompi_config.h" - -#include "ompi/mca/mca.h" -#include "opal/util/output.h" -#include "opal/mca/base/base.h" - -#include "ompi/mca/crcp/crcp.h" -#include "ompi/mca/crcp/base/base.h" - -#include "ompi/mca/crcp/base/static-components.h" - -/* - * Globals - */ -OMPI_DECLSPEC ompi_crcp_base_module_t ompi_crcp = { - NULL, /* crcp_init */ - NULL /* crcp_finalize */ -}; - -ompi_crcp_base_component_t ompi_crcp_base_selected_component = {{0}}; - -static int ompi_crcp_base_close(void) -{ - /* Close the selected component */ - if( NULL != ompi_crcp.crcp_finalize ) { - ompi_crcp.crcp_finalize(); - } - - /* Close all available modules that are open */ - return mca_base_framework_components_close(&ompi_crcp_base_framework, NULL); -} - -MCA_BASE_FRAMEWORK_DECLARE(ompi, crcp, NULL, NULL, NULL, ompi_crcp_base_close, - mca_crcp_base_static_components, 0); diff --git a/ompi/mca/crcp/base/crcp_base_select.c b/ompi/mca/crcp/base/crcp_base_select.c deleted file mode 100644 index 0dc7d3b230c..00000000000 --- a/ompi/mca/crcp/base/crcp_base_select.c +++ /dev/null @@ -1,180 +0,0 @@ -/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ -/* - * Copyright (c) 2004-2010 The Trustees of Indiana University. - * All rights reserved. - * Copyright (c) 2004-2005 The Trustees of the University of Tennessee. - * All rights reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * Copyright (c) 2015 Los Alamos National Security, LLC. All rights - * reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#include "ompi_config.h" - -#include "ompi/mca/mca.h" -#include "opal/util/output.h" -#include "opal/mca/base/base.h" - -#include "ompi/mca/crcp/crcp.h" -#include "ompi/mca/crcp/base/base.h" - - -static ompi_crcp_base_component_t none_component = { - /* Handle the general mca_component_t struct containing - * meta information about the component itself - */ - { - OMPI_CRCP_BASE_VERSION_2_0_0, - - /* Component name and version */ - "none", - OMPI_MAJOR_VERSION, - OMPI_MINOR_VERSION, - OMPI_RELEASE_VERSION, - - /* Component open and close functions */ - ompi_crcp_base_none_open, - ompi_crcp_base_none_close, - ompi_crcp_base_none_query - }, - { - /* Component is checkpointable */ - MCA_BASE_METADATA_PARAM_CHECKPOINT - }, - - /* Verbosity level */ - 0, - /* opal_output handler */ - -1, - /* Default priority */ - 1 -}; - -static ompi_crcp_base_module_t none_module = { - /** Initialization Function */ - ompi_crcp_base_module_init, - /** Finalization Function */ - ompi_crcp_base_module_finalize, - - /** Quiesce interface */ - ompi_crcp_base_none_quiesce_start, - ompi_crcp_base_none_quiesce_end, - - /** PML Wrapper */ - ompi_crcp_base_none_pml_enable, - - ompi_crcp_base_none_pml_add_comm, - ompi_crcp_base_none_pml_del_comm, - - ompi_crcp_base_none_pml_add_procs, - ompi_crcp_base_none_pml_del_procs, - - ompi_crcp_base_none_pml_progress, - - ompi_crcp_base_none_pml_iprobe, - ompi_crcp_base_none_pml_probe, - - ompi_crcp_base_none_pml_isend_init, - ompi_crcp_base_none_pml_isend, - ompi_crcp_base_none_pml_send, - - ompi_crcp_base_none_pml_irecv_init, - ompi_crcp_base_none_pml_irecv, - ompi_crcp_base_none_pml_recv, - - ompi_crcp_base_none_pml_dump, - ompi_crcp_base_none_pml_start, - ompi_crcp_base_none_pml_ft_event, - - /** Request Wrapper */ - ompi_crcp_base_none_request_complete, - - /** BTL Wrapper */ - ompi_crcp_base_none_btl_add_procs, - ompi_crcp_base_none_btl_del_procs, - - ompi_crcp_base_none_btl_register, - ompi_crcp_base_none_btl_finalize, - - ompi_crcp_base_none_btl_alloc, - ompi_crcp_base_none_btl_free, - - ompi_crcp_base_none_btl_prepare_src, - ompi_crcp_base_none_btl_prepare_dst, - - ompi_crcp_base_none_btl_send, - ompi_crcp_base_none_btl_put, - ompi_crcp_base_none_btl_get, - - ompi_crcp_base_none_btl_dump, - ompi_crcp_base_none_btl_ft_event -}; - -int ompi_crcp_base_select(void) -{ - int ret; - ompi_crcp_base_component_t *best_component = NULL; - ompi_crcp_base_module_t *best_module = NULL; - const char *include_list = NULL; - const char **selection_value; - int var_id; - - /* - * Register the framework MCA param and look up include list - */ - var_id = mca_base_var_find("ompi", "crcp", NULL, NULL); - - /* NTH: The old parameter code here set the selection to none if no file value - or environment value was set. This effectively means include_list is never NULL. */ - selection_value = NULL; - (void) mca_base_var_get_value(var_id, &selection_value, NULL, NULL); - if (NULL == selection_value || NULL == selection_value[0]) { - (void) mca_base_var_set_value(var_id, "none", 5, MCA_BASE_VAR_SOURCE_DEFAULT, NULL); - include_list = "none"; - } else { - include_list = selection_value[0]; - } - - if(0 == strncmp(include_list, "none", strlen("none")) ){ - opal_output_verbose(10, ompi_crcp_base_framework.framework_output, - "crcp:select: Using %s component", - include_list); - best_component = &none_component; - best_module = &none_module; - /* JJH: Todo: Check if none is in the list */ - /* Close all components since none will be used */ - mca_base_components_close(ompi_crcp_base_framework.framework_output, - &ompi_crcp_base_framework.framework_components, - NULL); - } else - - /* - * Select the best component - */ - if( OPAL_SUCCESS != mca_base_select("crcp", ompi_crcp_base_framework.framework_output, - &ompi_crcp_base_framework.framework_components, - (mca_base_module_t **) &best_module, - (mca_base_component_t **) &best_component) ) { - /* This will only happen if no component was selected */ - return OMPI_ERROR; - } - - /* Save the winner */ - ompi_crcp_base_selected_component = *best_component; - ompi_crcp = *best_module; - - /* Initialize the winner */ - if (OPAL_SUCCESS != (ret = ompi_crcp.crcp_init()) ) { - return ret; - } - - return OMPI_SUCCESS; -} diff --git a/ompi/mca/crcp/base/owner.txt b/ompi/mca/crcp/base/owner.txt deleted file mode 100644 index 8ad5fc38ed2..00000000000 --- a/ompi/mca/crcp/base/owner.txt +++ /dev/null @@ -1,7 +0,0 @@ -# -# owner/status file -# owner: institution that is responsible for this package -# status: e.g. active, maintenance, unmaintained -# -owner: IU? -status: unmaintained diff --git a/ompi/mca/crcp/bkmrk/Makefile.am b/ompi/mca/crcp/bkmrk/Makefile.am deleted file mode 100644 index 511b2cbd290..00000000000 --- a/ompi/mca/crcp/bkmrk/Makefile.am +++ /dev/null @@ -1,44 +0,0 @@ -# -# Copyright (c) 2004-2007 The Trustees of Indiana University. -# All rights reserved. -# Copyright (c) 2004-2005 The Trustees of the University of Tennessee. -# All rights reserved. -# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, -# University of Stuttgart. All rights reserved. -# Copyright (c) 2004-2005 The Regents of the University of California. -# All rights reserved. -# Copyright (c) 2010-2014 Cisco Systems, Inc. All rights reserved. -# $COPYRIGHT$ -# -# Additional copyrights may follow -# -# $HEADER$ -# - -sources = \ - crcp_bkmrk.h \ - crcp_bkmrk_pml.h \ - crcp_bkmrk_component.c \ - crcp_bkmrk_module.c \ - crcp_bkmrk_pml.c - -# Make the output library in this directory, and name it either -# mca__.la (for DSO builds) or libmca__.la -# (for static builds). - -if MCA_BUILD_ompi_crcp_bkmrk_DSO -component_noinst = -component_install = mca_crcp_bkmrk.la -else -component_noinst = libmca_crcp_bkmrk.la -component_install = -endif - -mcacomponentdir = $(ompilibdir) -mcacomponent_LTLIBRARIES = $(component_install) -mca_crcp_bkmrk_la_SOURCES = $(sources) -mca_crcp_bkmrk_la_LDFLAGS = -module -avoid-version - -noinst_LTLIBRARIES = $(component_noinst) -libmca_crcp_bkmrk_la_SOURCES = $(sources) -libmca_crcp_bkmrk_la_LDFLAGS = -module -avoid-version diff --git a/ompi/mca/crcp/bkmrk/configure.m4 b/ompi/mca/crcp/bkmrk/configure.m4 deleted file mode 100644 index b148ad2cc6b..00000000000 --- a/ompi/mca/crcp/bkmrk/configure.m4 +++ /dev/null @@ -1,28 +0,0 @@ -# -*- shell-script -*- -# -# Copyright (c) 2004-2010 The Trustees of Indiana University. -# All rights reserved. -# Copyright (c) 2004-2005 The Trustees of the University of Tennessee. -# All rights reserved. -# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, -# University of Stuttgart. All rights reserved. -# Copyright (c) 2004-2005 The Regents of the University of California. -# All rights reserved. -# Copyright (c) 2010 Cisco Systems, Inc. All rights reserved. -# $COPYRIGHT$ -# -# Additional copyrights may follow -# -# $HEADER$ -# - -# MCA_crcp_bkmrk_CONFIG([action-if-found], [action-if-not-found]) -# ----------------------------------------------------------- -AC_DEFUN([MCA_ompi_crcp_bkmrk_CONFIG],[ - AC_CONFIG_FILES([ompi/mca/crcp/bkmrk/Makefile]) - - # If we don't want FT, don't compile this component - AS_IF([test "$opal_want_ft_cr" = "1"], - [$1], - [$2]) -])dnl diff --git a/ompi/mca/crcp/bkmrk/crcp_bkmrk.h b/ompi/mca/crcp/bkmrk/crcp_bkmrk.h deleted file mode 100644 index 6c78c44d2c4..00000000000 --- a/ompi/mca/crcp/bkmrk/crcp_bkmrk.h +++ /dev/null @@ -1,68 +0,0 @@ -/* - * Copyright (c) 2004-2010 The Trustees of Indiana University. - * All rights reserved. - * Copyright (c) 2004-2005 The Trustees of the University of Tennessee. - * All rights reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -/** - * @file - * - * Hoke CRCP component - * - */ - -#ifndef MCA_CRCP_HOKE_EXPORT_H -#define MCA_CRCP_HOKE_EXPORT_H - -#include "ompi_config.h" - -#include "ompi/mca/mca.h" -#include "ompi/mca/crcp/crcp.h" -#include MCA_timer_IMPLEMENTATION_HEADER - - -BEGIN_C_DECLS - - /* - * Local Component structures - */ - struct ompi_crcp_bkmrk_component_t { - ompi_crcp_base_component_t super; /** Base CRCP component */ - }; - typedef struct ompi_crcp_bkmrk_component_t ompi_crcp_bkmrk_component_t; - OMPI_MODULE_DECLSPEC extern ompi_crcp_bkmrk_component_t mca_crcp_bkmrk_component; - - /* - * Local variables - */ - extern bool timing_enabled; - - /* - * Module functions - */ - int ompi_crcp_bkmrk_component_query(mca_base_module_t **module, int *priority); - int ompi_crcp_bkmrk_module_init(void); - int ompi_crcp_bkmrk_module_finalize(void); - - int ompi_crcp_bkmrk_pml_init(void); - int ompi_crcp_bkmrk_pml_finalize(void); - - /* - * Quiesce Interface - */ - int ompi_crcp_bkmrk_quiesce_start(MPI_Info *info); - int ompi_crcp_bkmrk_quiesce_end(MPI_Info *info); - -END_C_DECLS - -#endif /* MCA_CRCP_HOKE_EXPORT_H */ diff --git a/ompi/mca/crcp/bkmrk/crcp_bkmrk_btl.c b/ompi/mca/crcp/bkmrk/crcp_bkmrk_btl.c deleted file mode 100644 index 2728ce84b39..00000000000 --- a/ompi/mca/crcp/bkmrk/crcp_bkmrk_btl.c +++ /dev/null @@ -1,180 +0,0 @@ -/* - * Copyright (c) 2004-2007 The Trustees of Indiana University. - * All rights reserved. - * Copyright (c) 2004-2005 The Trustees of the University of Tennessee. - * All rights reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#include "ompi_config.h" - -#include -#ifdef HAVE_UNISTD_H -#include -#endif /* HAVE_UNIST_H */ - -#include "opal/class/opal_bitmap.h" -#include "opal/mca/event/event.h" -#include "opal/util/opal_environ.h" -#include "ompi/mca/mca.h" -#include "opal/mca/base/base.h" -#include "opal/mca/btl/btl.h" -#include "opal/mca/btl/base/base.h" - -#include "ompi/mca/crcp/crcp.h" -#include "ompi/mca/crcp/base/base.h" - -#include "crcp_bkmrk.h" -#include "crcp_bkmrk_btl.h" - -int ompi_crcp_bkmrk_btl_init(void) { - return OMPI_SUCCESS; -} - -int ompi_crcp_bkmrk_btl_finalize(void) { - return OMPI_SUCCESS; -} - -ompi_crcp_base_btl_state_t* -ompi_crcp_base_coord_btl_add_procs( struct mca_btl_base_module_t* btl, - size_t nprocs, - struct ompi_proc_t** procs, - struct mca_btl_base_endpoint_t** endpoints, - struct opal_bitmap_t* reachable, - ompi_crcp_base_btl_state_t* btl_state) -{ - btl_state->error_code = OMPI_SUCCESS; - return btl_state; -} - -ompi_crcp_base_btl_state_t* -ompi_crcp_base_coord_btl_del_procs( struct mca_btl_base_module_t* btl, - size_t nprocs, - struct ompi_proc_t** procs, - struct mca_btl_base_endpoint_t** endpoints, - ompi_crcp_base_btl_state_t* btl_state) -{ - btl_state->error_code = OMPI_SUCCESS; - return btl_state; -} - -ompi_crcp_base_btl_state_t* -ompi_crcp_base_coord_btl_register( struct mca_btl_base_module_t* btl, - mca_btl_base_tag_t tag, - mca_btl_base_module_recv_cb_fn_t cbfunc, - void* cbdata, - ompi_crcp_base_btl_state_t* btl_state) -{ - btl_state->error_code = OMPI_SUCCESS; - return btl_state; -} - -ompi_crcp_base_btl_state_t* -ompi_crcp_base_coord_btl_finalize( struct mca_btl_base_module_t* btl, - ompi_crcp_base_btl_state_t* btl_state) -{ - btl_state->error_code = OMPI_SUCCESS; - return btl_state; -} - -ompi_crcp_base_btl_state_t* -ompi_crcp_base_coord_btl_alloc( struct mca_btl_base_module_t* btl, - size_t size, - ompi_crcp_base_btl_state_t* btl_state) -{ - btl_state->error_code = OMPI_SUCCESS; - return btl_state; -} - -ompi_crcp_base_btl_state_t* -ompi_crcp_base_coord_btl_free( struct mca_btl_base_module_t* btl, - mca_btl_base_descriptor_t* descriptor, - ompi_crcp_base_btl_state_t* btl_state) -{ - btl_state->error_code = OMPI_SUCCESS; - return btl_state; -} - -ompi_crcp_base_btl_state_t* -ompi_crcp_base_coord_btl_prepare_src( struct mca_btl_base_module_t* btl, - struct mca_btl_base_endpoint_t* endpoint, - mca_mpool_base_registration_t* registration, - struct opal_convertor_t* convertor, - size_t reserve, - size_t* size, - ompi_crcp_base_btl_state_t* btl_state) -{ - btl_state->error_code = OMPI_SUCCESS; - return btl_state; -} - -ompi_crcp_base_btl_state_t* -ompi_crcp_base_coord_btl_prepare_dst( struct mca_btl_base_module_t* btl, - struct mca_btl_base_endpoint_t* endpoint, - mca_mpool_base_registration_t* registration, - struct opal_convertor_t* convertor, - size_t reserve, - size_t* size, - ompi_crcp_base_btl_state_t* btl_state) -{ - btl_state->error_code = OMPI_SUCCESS; - return btl_state; -} - -ompi_crcp_base_btl_state_t* -ompi_crcp_base_coord_btl_send( struct mca_btl_base_module_t* btl, - struct mca_btl_base_endpoint_t* endpoint, - struct mca_btl_base_descriptor_t* descriptor, - mca_btl_base_tag_t tag, - ompi_crcp_base_btl_state_t* btl_state) -{ - btl_state->error_code = OMPI_SUCCESS; - return btl_state; -} - -ompi_crcp_base_btl_state_t* -ompi_crcp_base_coord_btl_put( struct mca_btl_base_module_t* btl, - struct mca_btl_base_endpoint_t* endpoint, - struct mca_btl_base_descriptor_t* descriptor, - ompi_crcp_base_btl_state_t* btl_state) -{ - btl_state->error_code = OMPI_SUCCESS; - return btl_state; -} - -ompi_crcp_base_btl_state_t* -ompi_crcp_base_coord_btl_get( struct mca_btl_base_module_t* btl, - struct mca_btl_base_endpoint_t* endpoint, - struct mca_btl_base_descriptor_t* descriptor, - ompi_crcp_base_btl_state_t* btl_state) -{ - btl_state->error_code = OMPI_SUCCESS; - return btl_state; -} - - -ompi_crcp_base_btl_state_t* -ompi_crcp_base_coord_btl_dump( struct mca_btl_base_module_t* btl, - struct mca_btl_base_endpoint_t* endpoint, - int verbose, - ompi_crcp_base_btl_state_t* btl_state) -{ - btl_state->error_code = OMPI_SUCCESS; - return btl_state; -} - -ompi_crcp_base_btl_state_t* -ompi_crcp_base_coord_btl_ft_event(int state, - ompi_crcp_base_btl_state_t* btl_state) -{ - btl_state->error_code = OMPI_SUCCESS; - return btl_state; -} diff --git a/ompi/mca/crcp/bkmrk/crcp_bkmrk_btl.h b/ompi/mca/crcp/bkmrk/crcp_bkmrk_btl.h deleted file mode 100644 index 792df28f338..00000000000 --- a/ompi/mca/crcp/bkmrk/crcp_bkmrk_btl.h +++ /dev/null @@ -1,124 +0,0 @@ -/* - * Copyright (c) 2004-2007 The Trustees of Indiana University. - * All rights reserved. - * Copyright (c) 2004-2005 The Trustees of the University of Tennessee. - * All rights reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -/** - * @file - * - * Hoke CRCP component - * - */ - -#ifndef MCA_CRCP_HOKE_BTL_EXPORT_H -#define MCA_CRCP_HOKE_BTL_EXPORT_H - -#include "ompi_config.h" - -#include "ompi/mca/mca.h" - -#include "ompi/mca/crcp/crcp.h" -#include "ompi/mca/crcp/bkmrk/crcp_bkmrk.h" - -BEGIN_C_DECLS - - /* - * BTL Coordination functions - */ - ompi_crcp_base_btl_state_t* ompi_crcp_base_coord_btl_add_procs - ( struct mca_btl_base_module_t* btl, - size_t nprocs, - struct ompi_proc_t** procs, - struct mca_btl_base_endpoint_t** endpoints, - struct opal_bitmap_t* reachable, - ompi_crcp_base_btl_state_t* ); - - ompi_crcp_base_btl_state_t* ompi_crcp_base_coord_btl_del_procs - ( struct mca_btl_base_module_t* btl, - size_t nprocs, - struct ompi_proc_t** procs, - struct mca_btl_base_endpoint_t**, - ompi_crcp_base_btl_state_t*); - - ompi_crcp_base_btl_state_t* ompi_crcp_base_coord_btl_register - ( struct mca_btl_base_module_t* btl, - mca_btl_base_tag_t tag, - mca_btl_base_module_recv_cb_fn_t cbfunc, - void* cbdata, - ompi_crcp_base_btl_state_t*); - - ompi_crcp_base_btl_state_t* ompi_crcp_base_coord_btl_finalize - ( struct mca_btl_base_module_t* btl, - ompi_crcp_base_btl_state_t*); - - ompi_crcp_base_btl_state_t* ompi_crcp_base_coord_btl_alloc - ( struct mca_btl_base_module_t* btl, - size_t size, - ompi_crcp_base_btl_state_t*); - - ompi_crcp_base_btl_state_t* ompi_crcp_base_coord_btl_free - ( struct mca_btl_base_module_t* btl, - mca_btl_base_descriptor_t* descriptor, - ompi_crcp_base_btl_state_t*); - - ompi_crcp_base_btl_state_t* ompi_crcp_base_coord_btl_prepare_src - ( struct mca_btl_base_module_t* btl, - struct mca_btl_base_endpoint_t* endpoint, - mca_mpool_base_registration_t* registration, - struct opal_convertor_t* convertor, - size_t reserve, - size_t* size, - ompi_crcp_base_btl_state_t*); - - ompi_crcp_base_btl_state_t* ompi_crcp_base_coord_btl_prepare_dst - ( struct mca_btl_base_module_t* btl, - struct mca_btl_base_endpoint_t* endpoint, - mca_mpool_base_registration_t* registration, - struct opal_convertor_t* convertor, - size_t reserve, - size_t* size, - ompi_crcp_base_btl_state_t*); - - ompi_crcp_base_btl_state_t* ompi_crcp_base_coord_btl_send - ( struct mca_btl_base_module_t* btl, - struct mca_btl_base_endpoint_t* endpoint, - struct mca_btl_base_descriptor_t* descriptor, - mca_btl_base_tag_t tag, - ompi_crcp_base_btl_state_t*); - - ompi_crcp_base_btl_state_t* ompi_crcp_base_coord_btl_put - ( struct mca_btl_base_module_t* btl, - struct mca_btl_base_endpoint_t* endpoint, - struct mca_btl_base_descriptor_t* descriptor, - ompi_crcp_base_btl_state_t*); - - ompi_crcp_base_btl_state_t* ompi_crcp_base_coord_btl_get - ( struct mca_btl_base_module_t* btl, - struct mca_btl_base_endpoint_t* endpoint, - struct mca_btl_base_descriptor_t* descriptor, - ompi_crcp_base_btl_state_t*); - - ompi_crcp_base_btl_state_t* ompi_crcp_base_coord_btl_dump - ( struct mca_btl_base_module_t* btl, - struct mca_btl_base_endpoint_t* endpoint, - int verbose, - ompi_crcp_base_btl_state_t*); - - ompi_crcp_base_btl_state_t* ompi_crcp_base_coord_btl_ft_event - (int state, - ompi_crcp_base_btl_state_t*); - -END_C_DECLS - -#endif /* MCA_CRCP_HOKE_BTL_EXPORT_H */ diff --git a/ompi/mca/crcp/bkmrk/crcp_bkmrk_component.c b/ompi/mca/crcp/bkmrk/crcp_bkmrk_component.c deleted file mode 100644 index a3a619d8201..00000000000 --- a/ompi/mca/crcp/bkmrk/crcp_bkmrk_component.c +++ /dev/null @@ -1,146 +0,0 @@ -/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ -/* - * Copyright (c) 2004-2008 The Trustees of Indiana University. - * All rights reserved. - * Copyright (c) 2004-2005 The Trustees of the University of Tennessee. - * All rights reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * Copyright (c) 2015 Los Alamos National Security, LLC. All rights - * reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#include "ompi_config.h" - -#include "opal/util/output.h" - -#include "ompi/mca/crcp/crcp.h" -#include "ompi/mca/crcp/base/base.h" -#include "crcp_bkmrk.h" - -/* - * Public string for version number - */ -const char *ompi_crcp_bkmrk_component_version_string = -"OMPI CRCP bkmrk MCA component version " OMPI_VERSION; - -bool timing_enabled; - -/* - * Local functionality - */ -static int crcp_bkmrk_register(void); -static int crcp_bkmrk_open(void); -static int crcp_bkmrk_close(void); - -/* - * Instantiate the public struct with all of our public information - * and pointer to our public functions in it - */ -ompi_crcp_bkmrk_component_t mca_crcp_bkmrk_component = { - /* First do the base component stuff */ - { - /* Handle the general mca_component_t struct containing - * meta information about the component - */ - .base_version = { - OMPI_CRCP_BASE_VERSION_2_0_0, - /* Component name and version */ - .mca_component_name = "bkmrk", - MCA_BASE_MAKE_VERSION(component, OMPI_MAJOR_VERSION, OMPI_MINOR_VERSION, - OMPI_RELEASE_VERSION), - - /* Component open and close functions */ - .mca_open_component = crcp_bkmrk_open, - .mca_close_component = crcp_bkmrk_close, - .mca_query_component = ompi_crcp_bkmrk_component_query, - .mca_register_component_params = crcp_bkmrk_register, - }, - .base_data = { - /* The component is checkpoint ready */ - MCA_BASE_METADATA_PARAM_CHECKPOINT - }, - - .verbose = 0, - .output_handle = -1, - .priority = 20, - } -}; - -static int crcp_bkmrk_register(void) -{ - /* - * This should be the last componet to ever get used since - * it doesn't do anything. - */ - mca_crcp_bkmrk_component.super.priority = 20; - (void) mca_base_component_var_register(&mca_crcp_bkmrk_component.super.base_version, - "priority", - "Priority of the CRCP bkmrk component", - MCA_BASE_VAR_TYPE_INT, NULL, 0, 0, - OPAL_INFO_LVL_9, - MCA_BASE_VAR_SCOPE_READONLY, - &mca_crcp_bkmrk_component.super.priority); - - mca_crcp_bkmrk_component.super.verbose = 0; - (void) mca_base_component_var_register(&mca_crcp_bkmrk_component.super.base_version, - "verbose", - "Verbose level for the CRCP bkmrk component", - MCA_BASE_VAR_TYPE_INT, NULL, 0, 0, - OPAL_INFO_LVL_9, - MCA_BASE_VAR_SCOPE_READONLY, - &mca_crcp_bkmrk_component.super.verbose); - - timing_enabled = false; - (void) mca_base_component_var_register(&mca_crcp_bkmrk_component.super.base_version, - "timing", "Enable Performance timing", - MCA_BASE_VAR_TYPE_BOOL, NULL, 0, 0, - OPAL_INFO_LVL_9, - MCA_BASE_VAR_SCOPE_READONLY, - &timing_enabled); - - return OMPI_SUCCESS; -} - -static int crcp_bkmrk_open(void) -{ - /* If there is a custom verbose level for this component than use it - * otherwise take our parents level and output channel - */ - if ( 0 != mca_crcp_bkmrk_component.super.verbose) { - mca_crcp_bkmrk_component.super.output_handle = opal_output_open(NULL); - opal_output_set_verbosity(mca_crcp_bkmrk_component.super.output_handle, - mca_crcp_bkmrk_component.super.verbose); - } else { - mca_crcp_bkmrk_component.super.output_handle = ompi_crcp_base_framework.framework_output; - } - - /* - * Debug Output - */ - opal_output_verbose(10, mca_crcp_bkmrk_component.super.output_handle, - "crcp:bkmrk: open()"); - opal_output_verbose(20, mca_crcp_bkmrk_component.super.output_handle, - "crcp:bkmrk: open: priority = %d", - mca_crcp_bkmrk_component.super.priority); - opal_output_verbose(20, mca_crcp_bkmrk_component.super.output_handle, - "crcp:bkmrk: open: verbosity = %d", - mca_crcp_bkmrk_component.super.verbose); - - return OMPI_SUCCESS; -} - -static int crcp_bkmrk_close(void) -{ - opal_output_verbose(10, mca_crcp_bkmrk_component.super.output_handle, - "crcp:bkmrk: close()"); - - return OMPI_SUCCESS; -} diff --git a/ompi/mca/crcp/bkmrk/crcp_bkmrk_module.c b/ompi/mca/crcp/bkmrk/crcp_bkmrk_module.c deleted file mode 100644 index bffcc618c82..00000000000 --- a/ompi/mca/crcp/bkmrk/crcp_bkmrk_module.c +++ /dev/null @@ -1,167 +0,0 @@ -/* - * Copyright (c) 2004-2010 The Trustees of Indiana University. - * All rights reserved. - * Copyright (c) 2004-2011 The Trustees of the University of Tennessee. - * All rights reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#include "ompi_config.h" - -#include -#ifdef HAVE_UNISTD_H -#include -#endif /* HAVE_UNIST_H */ - -#include "ompi/mca/mca.h" -#include "opal/mca/base/base.h" - -#include "opal/util/output.h" - -#include "opal/util/opal_environ.h" - -#include "ompi/mca/crcp/crcp.h" -#include "ompi/mca/crcp/base/base.h" - -#include "crcp_bkmrk.h" -#include "crcp_bkmrk_pml.h" - -/* - * Coord module - */ -static ompi_crcp_base_module_t loc_module = { - /** Initialization Function */ - ompi_crcp_bkmrk_module_init, - /** Finalization Function */ - ompi_crcp_bkmrk_module_finalize, - - /** Quiesce interface */ - ompi_crcp_bkmrk_quiesce_start, - ompi_crcp_bkmrk_quiesce_end, - - /** PML Wrapper */ - NULL, /* ompi_crcp_bkmrk_pml_enable, */ - - NULL, /* ompi_crcp_bkmrk_pml_add_comm, */ - NULL, /* ompi_crcp_bkmrk_pml_del_comm, */ - - ompi_crcp_bkmrk_pml_add_procs, - ompi_crcp_bkmrk_pml_del_procs, - - NULL, /* ompi_crcp_bkmrk_pml_progress, */ - - ompi_crcp_bkmrk_pml_iprobe, - ompi_crcp_bkmrk_pml_probe, - - ompi_crcp_bkmrk_pml_isend_init, - ompi_crcp_bkmrk_pml_isend, - ompi_crcp_bkmrk_pml_send, - - ompi_crcp_bkmrk_pml_irecv_init, - ompi_crcp_bkmrk_pml_irecv, - ompi_crcp_bkmrk_pml_recv, - - ompi_crcp_bkmrk_pml_dump, - ompi_crcp_bkmrk_pml_start, - - ompi_crcp_bkmrk_pml_ft_event, - - /* Request Functions */ - ompi_crcp_bkmrk_request_complete, - - /* BTL Wrapper Functions */ - NULL, /* btl_add_procs */ - NULL, /* btl_del_procs */ - NULL, /* btl_register */ - NULL, /* btl_finalize */ - NULL, /* btl_alloc */ - NULL, /* btl_free */ - NULL, /* btl_prepare_src */ - NULL, /* btl_prepare_dst */ - NULL, /* btl_send */ - NULL, /* btl_put */ - NULL, /* btl_get */ - NULL, /* btl_dump */ - NULL /* btl_ft_event */ -}; - -/************************************ - * Locally Global vars & functions :) - ************************************/ - -/************************ - * Function Definitions - ************************/ -/* - * MCA Functions - */ -int ompi_crcp_bkmrk_component_query(mca_base_module_t **module, int *priority) -{ - opal_output_verbose(10, mca_crcp_bkmrk_component.super.output_handle, - "crcp:bkmrk: component_query()"); - - *priority = mca_crcp_bkmrk_component.super.priority; - *module = (mca_base_module_t *)&loc_module; - - return OMPI_SUCCESS; -} - -int ompi_crcp_bkmrk_module_init(void) -{ - opal_output_verbose(10, mca_crcp_bkmrk_component.super.output_handle, - "crcp:bkmrk: module_init()"); - - ompi_crcp_bkmrk_pml_init(); - - return OMPI_SUCCESS; -} - -int ompi_crcp_bkmrk_module_finalize(void) -{ - opal_output_verbose(10, mca_crcp_bkmrk_component.super.output_handle, - "crcp:bkmrk: module_finalize()"); - - ompi_crcp_bkmrk_pml_finalize(); - - return OMPI_SUCCESS; -} - -int ompi_crcp_bkmrk_quiesce_start(MPI_Info *info) -{ - OPAL_OUTPUT_VERBOSE((10, mca_crcp_bkmrk_component.super.output_handle, - "crcp:bkmrk: quiesce_start(--)")); -#if 0 - if( OMPI_SUCCESS != (ret = ompi_crcp_bkmrk_pml_quiesce_start(QUIESCE_TAG_CKPT)) ) { - ; - } - return OMPI_SUCCESS; -#else - return OMPI_ERR_NOT_IMPLEMENTED; -#endif -} - -int ompi_crcp_bkmrk_quiesce_end(MPI_Info *info) -{ - OPAL_OUTPUT_VERBOSE((10, mca_crcp_bkmrk_component.super.output_handle, - "crcp:bkmrk: quiesce_end(--)")); -#if 0 - if( OMPI_SUCCESS != (ret = ompi_crcp_bkmrk_pml_quiesce_end(QUIESCE_TAG_CONTINUE) ) ) { - ; - } - return OMPI_SUCCESS; -#else - return OMPI_ERR_NOT_IMPLEMENTED; -#endif -} - -/****************** - * Local functions - ******************/ diff --git a/ompi/mca/crcp/bkmrk/crcp_bkmrk_pml.c b/ompi/mca/crcp/bkmrk/crcp_bkmrk_pml.c deleted file mode 100644 index fb2430c0873..00000000000 --- a/ompi/mca/crcp/bkmrk/crcp_bkmrk_pml.c +++ /dev/null @@ -1,6490 +0,0 @@ -/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ -/* - * Copyright (c) 2004-2011 The Trustees of Indiana University. - * All rights reserved. - * Copyright (c) 2010-2013 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2010-2012 Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2012-2015 Los Alamos National Security, LLC. All rights - * reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -/* - * - */ -#include "ompi_config.h" - -#include -#ifdef HAVE_UNISTD_H -#include -#endif /* HAVE_UNIST_H */ - -#include "opal/dss/dss.h" -#include "opal/runtime/opal_cr.h" -#include "opal/mca/event/event.h" -#include "opal/util/output.h" - -#include "opal/util/opal_environ.h" -#include "ompi/mca/mca.h" -#include "opal/mca/base/base.h" -#include "opal/mca/pmix/pmix.h" - -#include "ompi/request/request.h" -#include "ompi/mca/rte/rte.h" -#include "ompi/mca/pml/pml.h" -#include "ompi/mca/pml/base/base.h" -#include "ompi/mca/pml/base/pml_base_request.h" -#include "ompi/mca/crcp/crcp.h" -#include "ompi/mca/crcp/base/base.h" - -#include "opal/class/opal_free_list.h" -#include "ompi/runtime/ompi_cr.h" -#include "orte/runtime/orte_wait.h" - -#include "crcp_bkmrk.h" -#include "crcp_bkmrk_pml.h" - -/************************************ - * Locally Global vars - ************************************/ -#define PROBE_ANY_SIZE ((size_t) 0) -#define PROBE_ANY_COUNT ((size_t) 0) - -#define PERSIST_MARKER ((int) -1) - -#define RECV_MATCH_RESP_DONE 0 -#define RECV_MATCH_RESP_MORE 1 -#define RECV_MATCH_RESP_ERROR 2 - -#define INVALID_INT -123456789 - -#define FIND_MSG_TRUE 0 -#define FIND_MSG_FALSE 1 -#define FIND_MSG_UNKNOWN 2 - -/* Pointers to the 'real' PML */ -static mca_pml_base_component_t *wrapped_pml_component = NULL; -static mca_pml_base_module_t *wrapped_pml_module = NULL; - -/* A unique ID for each message signature in the system */ -static uint64_t message_seq_num = 1; -static uint64_t content_ref_seq_num = 1; - -/* The current message being worked on */ -static uint64_t current_msg_id = 0; -static ompi_crcp_bkmrk_pml_message_type_t current_msg_type = 0; - -/* If we need to stall the C/R coordination until the current - * operation is complete */ -static bool stall_for_completion; - -/* - * State of the ft_event - */ -static int ft_event_state = OPAL_CRS_RUNNING; - -/* - * List of known peers - */ -opal_list_t ompi_crcp_bkmrk_pml_peer_refs; - -/* - * MPI_ANY_SOURCE recv lists - */ -opal_list_t unknown_recv_from_list; -opal_list_t unknown_persist_recv_list; - -/* - * List of pending drain acks - */ -opal_list_t drained_msg_ack_list; - -/* - * Free lists - */ -opal_free_list_t coord_state_free_list; -opal_free_list_t content_ref_free_list; -opal_free_list_t peer_ref_free_list; -opal_free_list_t traffic_msg_ref_free_list; -opal_free_list_t drain_msg_ref_free_list; -opal_free_list_t drain_ack_msg_ref_free_list; - -/* - * Quiescence requests to wait on - */ -ompi_request_t ** quiesce_requests = NULL; -ompi_status_public_t ** quiesce_statuses = NULL; -int quiesce_request_count = 0; - -/************************************ - * Local Funcation Decls. - ************************************/ - -static int ompi_crcp_bkmrk_pml_start_isend_init(ompi_request_t **request); -static int ompi_crcp_bkmrk_pml_start_irecv_init(ompi_request_t **request); -static int ompi_crcp_bkmrk_pml_start_drain_irecv_init(ompi_request_t **request, bool *found_drain); - -static int ompi_crcp_bkmrk_request_complete_isend_init(struct ompi_request_t *request, - ompi_crcp_bkmrk_pml_peer_ref_t *peer_ref, - int src, int tag, int tmp_ddt_size); -static int ompi_crcp_bkmrk_request_complete_isend(struct ompi_request_t *request, - ompi_crcp_bkmrk_pml_peer_ref_t *peer_ref, - int src, int tag, int tmp_ddt_size); -static int ompi_crcp_bkmrk_request_complete_irecv_init(struct ompi_request_t *request, - ompi_crcp_bkmrk_pml_peer_ref_t *peer_ref, - int src, int tag, int tmp_ddt_size); -static int ompi_crcp_bkmrk_request_complete_irecv(struct ompi_request_t *request, - ompi_crcp_bkmrk_pml_peer_ref_t *peer_ref, - int src, int tag, int tmp_ddt_size); - -/* - * Traffic Message: Append - */ -static int traffic_message_append(ompi_crcp_bkmrk_pml_peer_ref_t *peer_ref, - opal_list_t * append_list, - ompi_crcp_bkmrk_pml_message_type_t msg_type, - size_t count, - ompi_datatype_t *datatype, - size_t ddt_size, - int tag, - int dest, - struct ompi_communicator_t* comm, - ompi_crcp_bkmrk_pml_traffic_message_ref_t **msg_ref); - -/* - * Traffic Message: Start a persistent send/recv - */ -static int traffic_message_start(ompi_crcp_bkmrk_pml_traffic_message_ref_t *msg_ref, - ompi_crcp_bkmrk_pml_peer_ref_t *peer_ref, - ompi_request_t **request, - opal_list_t * peer_list, - ompi_crcp_bkmrk_pml_message_content_ref_t ** content_ref); - -/* - * Traffic Message: Move a message from one list to another - * - useful when moving messages from the unknown lists - */ -static int traffic_message_move(ompi_crcp_bkmrk_pml_traffic_message_ref_t *msg_ref, - ompi_crcp_bkmrk_pml_message_type_t msg_type, - ompi_crcp_bkmrk_pml_peer_ref_t *from_peer_ref, - opal_list_t * from_list, - ompi_crcp_bkmrk_pml_peer_ref_t *to_peer_ref, - opal_list_t * to_list, - ompi_crcp_bkmrk_pml_traffic_message_ref_t **new_msg_ref, - bool keep_active, /* If you have to create a new context, should it be initialized to active? */ - bool remove); /* Remove the original? - false = copy() */ - -/* - * Traffic Message: Strip off the first matching request - */ -static int traffic_message_grab_content(ompi_crcp_bkmrk_pml_traffic_message_ref_t *msg_ref, - ompi_crcp_bkmrk_pml_message_content_ref_t ** content_ref, - bool remove, - bool already_drained); - -/* - * Traffic Message: Find a persistent message, and mark it approprately - */ -static int traffic_message_find_mark_persistent(ompi_crcp_bkmrk_pml_traffic_message_ref_t *msg_ref, - ompi_request_t **request, - bool cur_active, - bool set_is_active, - ompi_crcp_bkmrk_pml_message_content_ref_t **content_ref); - -/* - * Traffic Message: Find a message that matches the given signature - */ -static int traffic_message_find(opal_list_t * search_list, - size_t count, int tag, int peer, uint32_t comm_id, - size_t ddt_size, - ompi_crcp_bkmrk_pml_traffic_message_ref_t ** found_msg_ref, - int active); - -/* - * Traffic Message: Determine if we have received a message matching this signature. - * Return a reference to the message on all matching lists. - */ -static int traffic_message_find_recv(ompi_crcp_bkmrk_pml_peer_ref_t *peer_ref, - int rank, uint32_t comm_id, int tag, - size_t count, size_t datatype_size, - ompi_crcp_bkmrk_pml_traffic_message_ref_t ** posted_recv_msg_ref, - ompi_crcp_bkmrk_pml_traffic_message_ref_t ** posted_irecv_msg_ref, - ompi_crcp_bkmrk_pml_traffic_message_ref_t ** posted_precv_msg_ref, - ompi_crcp_bkmrk_pml_traffic_message_ref_t ** posted_unknown_recv_msg_ref, - ompi_crcp_bkmrk_pml_traffic_message_ref_t ** posted_unknown_precv_msg_ref); - -/* - * Traffic Message: For all of the 'active' recvs, create a drain message - */ -static int traffic_message_create_drain_message(bool post_drain, - int max_post, - ompi_crcp_bkmrk_pml_peer_ref_t *peer_ref, - ompi_crcp_bkmrk_pml_traffic_message_ref_t ** posted_msg_ref, - int *num_posted); - -/* - * Drain Message: Append - */ -static int drain_message_append(ompi_crcp_bkmrk_pml_peer_ref_t *peer_ref, - ompi_crcp_bkmrk_pml_message_type_t msg_type, - size_t count, size_t ddt_size, - int tag,int dest, - struct ompi_communicator_t* comm, - ompi_crcp_bkmrk_pml_drain_message_ref_t **msg_ref); - -/* - * Drain Message: Remove - */ -static int drain_message_remove(ompi_crcp_bkmrk_pml_peer_ref_t *peer_ref, - ompi_crcp_bkmrk_pml_drain_message_ref_t *msg_ref, - ompi_crcp_bkmrk_pml_message_content_ref_t *content_ref); - -/* - * Drain Message: Check if this receive has been drained - */ -static int drain_message_check_recv(void **buf, size_t count, - ompi_datatype_t *datatype, - int *src, int *tag, - struct ompi_communicator_t* comm, - struct ompi_request_t **request, - ompi_status_public_t** status, - bool *found_drain); - -/* - * Drain Message: Find a message matching the given signature on this peer list - */ -static int drain_message_find(opal_list_t * search_list, - size_t count, int tag, int peer, - uint32_t comm_id, size_t ddt_size, - ompi_crcp_bkmrk_pml_drain_message_ref_t ** found_msg_ref, - ompi_crcp_bkmrk_pml_message_content_ref_t ** content_ref); - -/* - * Drain Message: Find a message matching the given signature on any list from any peer - */ -static int drain_message_find_any(size_t count, int tag, int peer, - struct ompi_communicator_t* comm, size_t ddt_size, - ompi_crcp_bkmrk_pml_drain_message_ref_t ** found_msg_ref, - ompi_crcp_bkmrk_pml_message_content_ref_t ** content_ref, - ompi_crcp_bkmrk_pml_peer_ref_t **peer_ref); - -/* - * Drain Message: Grab a content reference, do not remove - */ -static int drain_message_grab_content(ompi_crcp_bkmrk_pml_drain_message_ref_t *drain_msg_ref, - ompi_crcp_bkmrk_pml_message_content_ref_t ** content_ref); - -/* - * Drain Message: Copy this drain message to the signature provided, remove drain message - */ -static int drain_message_copy_remove(ompi_crcp_bkmrk_pml_drain_message_ref_t *msg_ref, - ompi_crcp_bkmrk_pml_message_content_ref_t * content_ref, - int *src, int *tag, - struct ompi_request_t **request, - ompi_status_public_t **status, - ompi_datatype_t *datatype, int count, void **buf, - ompi_crcp_bkmrk_pml_peer_ref_t *peer_ref); - -/* - * Drain Message: Copy this persistent drain message to the signature provided, remove drain message - */ -static int drain_message_copy_remove_persistent(ompi_crcp_bkmrk_pml_drain_message_ref_t *drain_msg_ref, - ompi_crcp_bkmrk_pml_message_content_ref_t *drain_content_ref, - ompi_crcp_bkmrk_pml_traffic_message_ref_t *traffic_msg_ref, - ompi_request_t *request, - ompi_crcp_bkmrk_pml_peer_ref_t *peer_ref); - -/* - * Peer List: Find the peer reference matching the ORTE process name - */ -static ompi_crcp_bkmrk_pml_peer_ref_t* find_peer(ompi_process_name_t proc); - -/* - * Peer List: Find the peer reference matching the index into the communicator - */ -static int find_peer_in_comm(struct ompi_communicator_t* comm, int proc_idx, - ompi_crcp_bkmrk_pml_peer_ref_t **peer_ref); - -/* - * Coordinate Peers - * - Quiet channels - */ -static int ft_event_coordinate_peers(void); - -/* - * Finalize the coordination of peers. - * - Mostly cleanup. - */ -static int ft_event_finalize_exchange(void); - -/* - * Exchange the bookmarks - * - Staggered All-to-All - * LAM/MPI used a staggered all-to-all algoritm for bookmark exachange - * http://www.lam-mpi.org/papers/lacsi2003/ - */ -static int ft_event_exchange_bookmarks(void); - -/* - * Send Bookmarks to peer - */ -static int send_bookmarks(int peer_idx); - -/* - * Recv Bookmarks from peer - */ -static int recv_bookmarks(int peer_idx); - -/* - * Callback to receive the bookmarks from a peer - */ -static void recv_bookmarks_cbfunc(int status, - ompi_process_name_t* sender, - opal_buffer_t *buffer, - ompi_rml_tag_t tag, - void* cbdata); -static int total_recv_bookmarks = 0; - -/* - * Now that we have all the bookmarks, check them to see if we need to - * drain any messages. - */ -static int ft_event_check_bookmarks(void); - -/* - * Send message details to peer - * - matched with recv_msg_details() - */ -static int send_msg_details(ompi_crcp_bkmrk_pml_peer_ref_t *peer_ref, - int total_sent, int total_matched); - -/* - * Send a single message reference to a peer. - * found_match = true if peer found a message to drain. - */ -static int do_send_msg_detail(ompi_crcp_bkmrk_pml_peer_ref_t *peer_ref, - ompi_crcp_bkmrk_pml_traffic_message_ref_t*msg_ref, - int *num_matches, - int *total_found, - bool *finished); -/* - * Recv message details from peer - * - matched with send_msg_details() - */ -static int recv_msg_details(ompi_crcp_bkmrk_pml_peer_ref_t *peer_ref, - int total_recv, int total_matched); - -/* - * Receive a single message reference from a peer. - */ -static int do_recv_msg_detail(ompi_crcp_bkmrk_pml_peer_ref_t *peer_ref, - int *rank, uint32_t *comm_id, int *tag, - size_t *count, size_t *datatype_size, - int *p_num_sent); - -/* - * Check the message reference to determine if: - * - We have received this message already, or - * - We need to post this message - */ -static int do_recv_msg_detail_check_drain(ompi_crcp_bkmrk_pml_peer_ref_t *peer_ref, - int rank, uint32_t comm_id, int tag, - size_t count, size_t datatype_size, - int p_num_sent, - int *num_resolved); - -/* - * Respond to peer regarding a received message detail - */ -static int do_recv_msg_detail_resp(ompi_crcp_bkmrk_pml_peer_ref_t *peer_ref, - int resp, - int num_resolv, - int total_found); - -/* - * Post the Drain Message Acks - * - These are sent once the receiver has finished receiving - * all of the messages it needed to drain off the wire. - */ -static int ft_event_post_drain_acks(void); - -/* - * Callback to service drain message acks. - */ -static void drain_message_ack_cbfunc(int status, - ompi_process_name_t* sender, - opal_buffer_t *buffer, - ompi_rml_tag_t tag, - void* cbdata); - -/* - * Post the Drain Messages - * - These are irecvs to be completed in any order. - */ -static int ft_event_post_drained(void); - -static int ft_event_post_drain_message(ompi_crcp_bkmrk_pml_drain_message_ref_t *drain_msg_ref, - ompi_crcp_bkmrk_pml_message_content_ref_t *content_ref); - -/* - * Wait for all drained messages and acks to complete. - * - Once this this finished then all channels associated - * with this process have been drained. - */ -static int ft_event_wait_quiesce(void); - -/* - * Wait for all the posted drain messages to finish - */ -static int wait_quiesce_drained(void); - -/* - * An optimized local version of waitall. - * - Remove some unnecessary logic - * - Remove logic to 'free' the request - */ -static int coord_request_wait_all( size_t count, - ompi_request_t ** requests, - ompi_status_public_t ** statuses); - -/* - * An optimized local version of wait. - * - Remove some unnecessary logic - * - Remove logic to 'free' the request - * - Allow it to return if we need to stop waiting - */ -static int coord_request_wait( ompi_request_t * request, - ompi_status_public_t * status); - -/* - * Wait for all the drain ACKs to be received - */ -static int wait_quiesce_drain_ack(void); - -/************************************ - * A few timing structures - * - * CRCP Timing Information - * ----------------------- - * Pi Pj | Variable - * ---- -----+---------- - * exchange_bookmark() | CRCP_TIMER_CKPT_EX_B - * -------------> | CRCP_TIMER_CKPT_EX_PEER_S - * <------------- | CRCP_TIMER_CKPT_EX_PEER_R - * -> wait_for_bk_done() | - * -- -- | CRCP_TIMER_CKPT_EX_WAIT - * check_bookmarks() | - * -- -- | CRCP_TIMER_CKPT_CHECK_B - * -> exchange_details (*) | - * -------------> | CRCP_TIMER_CKPT_CHECK_PEER_S - * <------------- | CRCP_TIMER_CKPT_CHECK_PEER_R - * post_drain[ack]() | - * -- -- | CRCP_TIMER_CKPT_POST_DRAIN - * wait_quiescence() | - * -- -- | CRCP_TIMER_CKPT_WAIT_QUI - * Finish checkpoint | -- Total Pre-Checkpoint - * -- -- | CRCP_TIMER_TOTAL_CKPT - * finalize_exchange() | -- Total Continue / Restart - * -- -- | CRCP_TIMER_TOTAL_CONT / _RST - *-----------------------------+ - * (*) If needed. - * - * timing_enabled: - * < 0 : Off - * 1 : Summary only - * 2 : Per Peer messages + Barrier - * 3 : Messages from all procs - * - ************************************/ -#define CRCP_TIMER_TOTAL_CKPT 0 -#define CRCP_TIMER_CKPT_EX_B 1 -#define CRCP_TIMER_CKPT_EX_PEER_S 2 -#define CRCP_TIMER_CKPT_EX_PEER_R 3 -#define CRCP_TIMER_CKPT_EX_WAIT 4 -#define CRCP_TIMER_CKPT_CHECK_B 5 -#define CRCP_TIMER_CKPT_CHECK_PEER_S 6 -#define CRCP_TIMER_CKPT_CHECK_PEER_R 7 -#define CRCP_TIMER_CKPT_POST_DRAIN 8 -#define CRCP_TIMER_CKPT_WAIT_QUI 9 -#define CRCP_TIMER_TOTAL_CONT 10 -#define CRCP_TIMER_TOTAL_RST 11 -#define CRCP_TIMER_MAX 12 - -static double get_time(void); -static void start_time(int idx); -static void end_time(int idx); -static void display_indv_timer(int idx, int proc, int msgs); -static void display_indv_timer_core(int idx, int proc, int msgs, bool direct); -static void display_all_timers(int state); -static void clear_timers(void); - -double timer_start[CRCP_TIMER_MAX]; -double timer_end[CRCP_TIMER_MAX]; -char * timer_label[CRCP_TIMER_MAX]; - -#define START_TIMER(idx) \ - { \ - if(OPAL_UNLIKELY(timing_enabled > 0)) { \ - start_time(idx); \ - } \ - } - -#define END_TIMER(idx) \ - { \ - if(OPAL_UNLIKELY(timing_enabled > 0)) { \ - end_time(idx); \ - } \ - } - -#define DISPLAY_INDV_TIMER(idx, proc, msg) \ - { \ - if(OPAL_UNLIKELY(timing_enabled > 0)) { \ - display_indv_timer(idx, proc, msg); \ - } \ - } - -#define DISPLAY_ALL_TIMERS(var) \ - { \ - if(OPAL_UNLIKELY(timing_enabled > 0)) { \ - display_all_timers(var); \ - } \ - } - -/************************************ - * Additional Debuging dumps - ************************************/ -#if OPAL_ENABLE_DEBUG -static void traffic_message_dump_peer(ompi_crcp_bkmrk_pml_peer_ref_t *peer_ref, char * msg, bool root_only); -static void traffic_message_dump_msg_list(opal_list_t *msg_list, bool is_drain); -static void traffic_message_dump_msg_indv(ompi_crcp_bkmrk_pml_traffic_message_ref_t * msg_ref, char * msg, bool vshort); -static void traffic_message_dump_msg_content_indv(ompi_crcp_bkmrk_pml_message_content_ref_t * content_ref); - -static void traffic_message_dump_drain_msg_indv(ompi_crcp_bkmrk_pml_drain_message_ref_t * msg_ref, char * msg, bool vshort); - -#define TRAFFIC_MSG_DUMP_PEER(lv, a) { \ - if( lv <= mca_crcp_bkmrk_component.super.verbose ) { \ - traffic_message_dump_peer a; \ - } \ -} -#define TRAFFIC_MSG_DUMP_MSG_LIST(lv, a) { \ - if( lv <= mca_crcp_bkmrk_component.super.verbose ) { \ - traffic_message_dump_msg_list a; \ - } \ -} -#define TRAFFIC_MSG_DUMP_MSG_INDV(lv, a) { \ - if( lv <= mca_crcp_bkmrk_component.super.verbose ) { \ - traffic_message_dump_msg_indv a; \ - } \ -} -#define TRAFFIC_MSG_DUMP_MSG_CONTENT_INDV(lv, a) { \ - if( lv <= mca_crcp_bkmrk_component.super.verbose ) { \ - traffic_message_dump_msg_content_indv a; \ - } \ -} -#define TRAFFIC_MSG_DUMP_DRAIN_MSG_INDV(lv, a) { \ - if( lv <= mca_crcp_bkmrk_component.super.verbose ) { \ - traffic_message_dump_drain_msg_indv a; \ - } \ -} -#else -#define TRAFFIC_MSG_DUMP_PEER(lv, a) ; -#define TRAFFIC_MSG_DUMP_MSG_LIST(lv, a) ; -#define TRAFFIC_MSG_DUMP_MSG_INDV(lv, a) ; -#define TRAFFIC_MSG_DUMP_MSG_CONTENT_INDV(lv, a) ; -#define TRAFFIC_MSG_DUMP_DRAIN_MSG_INDV(lv, a) ; -#endif - -#define ERROR_SHOULD_NEVER_HAPPEN(msg) { \ - opal_output(0, msg \ - " ---------- This should never happen ---------- (%s:%d)", \ - __FILE__, __LINE__); \ -} - -#define ERROR_SHOULD_NEVER_HAPPEN_ARG(msg, arg) { \ - opal_output(0, msg \ - " ---------- This should never happen ---------- (%s:%d)", \ - arg, __FILE__, __LINE__); \ -} - -/************************************ - * Declare/Define Object Structures - ************************************/ -/* - * Free List Maintenance - */ -#define HOKE_PEER_REF_ALLOC(peer_ref) \ -do { \ - peer_ref = (ompi_crcp_bkmrk_pml_peer_ref_t *) \ - opal_free_list_wait (&peer_ref_free_list); \ -} while(0) - -#define HOKE_PEER_REF_RETURN(peer_ref) \ -do { \ - opal_free_list_return (&peer_ref_free_list, \ - (opal_free_list_item_t*)peer_ref); \ -} while(0) - - -#define HOKE_CONTENT_REF_ALLOC(content_ref) \ -do { \ - content_ref = (ompi_crcp_bkmrk_pml_message_content_ref_t*) \ - opal_free_list_wait (&content_ref_free_list); \ - content_ref->msg_id = content_ref_seq_num; \ - content_ref_seq_num++; \ -} while(0) - -#define HOKE_CONTENT_REF_RETURN(content_ref) \ -do { \ - opal_free_list_return (&content_ref_free_list, \ - (opal_free_list_item_t*)content_ref); \ -} while(0) - - -#define HOKE_TRAFFIC_MSG_REF_ALLOC(msg_ref) \ -do { \ - msg_ref = (ompi_crcp_bkmrk_pml_traffic_message_ref_t*) \ - opal_free_list_wait (&traffic_msg_ref_free_list); \ -} while(0) - -#define HOKE_TRAFFIC_MSG_REF_RETURN(msg_ref) \ -do { \ - opal_free_list_return (&traffic_msg_ref_free_list, \ - (opal_free_list_item_t*)msg_ref); \ -} while(0) - - -#define HOKE_DRAIN_MSG_REF_ALLOC(msg_ref) \ -do { \ - msg_ref = (ompi_crcp_bkmrk_pml_drain_message_ref_t *) \ - opal_free_list_wait (&drain_msg_ref_free_list); \ -} while(0) - -#define HOKE_DRAIN_MSG_REF_RETURN(msg_ref) \ -do { \ - opal_free_list_return (&drain_msg_ref_free_list, \ - (opal_free_list_item_t*)msg_ref); \ -} while(0) - - -#define HOKE_DRAIN_ACK_MSG_REF_ALLOC(msg_ref) \ -do { \ - msg_ref = (ompi_crcp_bkmrk_pml_drain_message_ack_ref_t *) \ - opal_free_list_wait (&drain_ack_msg_ref_free_list); \ -} while(0) - -#define HOKE_DRAIN_ACK_MSG_REF_RETURN(msg_ref) \ -do { \ - opal_free_list_return (&drain_ack_msg_ref_free_list, \ - (opal_free_list_item_t*)msg_ref); \ -} while(0) - - -/* - * Peer reference - */ -OBJ_CLASS_INSTANCE(ompi_crcp_bkmrk_pml_peer_ref_t, - opal_list_item_t, - ompi_crcp_bkmrk_pml_peer_ref_construct, - ompi_crcp_bkmrk_pml_peer_ref_destruct); - -void ompi_crcp_bkmrk_pml_peer_ref_construct(ompi_crcp_bkmrk_pml_peer_ref_t *peer_ref) { - peer_ref->proc_name.jobid = ORTE_JOBID_INVALID; - peer_ref->proc_name.vpid = ORTE_VPID_INVALID; - - OBJ_CONSTRUCT(&peer_ref->send_list, opal_list_t); - OBJ_CONSTRUCT(&peer_ref->isend_list, opal_list_t); - OBJ_CONSTRUCT(&peer_ref->send_init_list, opal_list_t); - - OBJ_CONSTRUCT(&peer_ref->recv_list, opal_list_t); - OBJ_CONSTRUCT(&peer_ref->irecv_list, opal_list_t); - OBJ_CONSTRUCT(&peer_ref->recv_init_list, opal_list_t); - - OBJ_CONSTRUCT(&peer_ref->drained_list, opal_list_t); - - peer_ref->total_msgs_sent = 0; - peer_ref->matched_msgs_sent = 0; - - peer_ref->total_msgs_recvd = 0; - peer_ref->matched_msgs_recvd = 0; - - peer_ref->total_drained_msgs = 0; - - peer_ref->ack_required = false; -} - -void ompi_crcp_bkmrk_pml_peer_ref_destruct( ompi_crcp_bkmrk_pml_peer_ref_t *peer_ref) { - opal_list_item_t* item = NULL; - - peer_ref->proc_name.jobid = ORTE_JOBID_INVALID; - peer_ref->proc_name.vpid = ORTE_VPID_INVALID; - - while( NULL != (item = opal_list_remove_first(&peer_ref->send_list)) ) { - HOKE_TRAFFIC_MSG_REF_RETURN(item); - } - OBJ_DESTRUCT(&peer_ref->send_list); - while( NULL != (item = opal_list_remove_first(&peer_ref->isend_list)) ) { - HOKE_TRAFFIC_MSG_REF_RETURN(item); - } - OBJ_DESTRUCT(&peer_ref->isend_list); - while( NULL != (item = opal_list_remove_first(&peer_ref->send_init_list)) ) { - HOKE_TRAFFIC_MSG_REF_RETURN(item); - } - OBJ_DESTRUCT(&peer_ref->send_init_list); - - while( NULL != (item = opal_list_remove_first(&peer_ref->recv_list)) ) { - HOKE_TRAFFIC_MSG_REF_RETURN(item); - } - OBJ_DESTRUCT(&peer_ref->recv_list); - while( NULL != (item = opal_list_remove_first(&peer_ref->irecv_list)) ) { - HOKE_TRAFFIC_MSG_REF_RETURN(item); - } - OBJ_DESTRUCT(&peer_ref->irecv_list); - while( NULL != (item = opal_list_remove_first(&peer_ref->recv_init_list)) ) { - HOKE_TRAFFIC_MSG_REF_RETURN(item); - } - OBJ_DESTRUCT(&peer_ref->recv_init_list); - - while( NULL != (item = opal_list_remove_first(&peer_ref->drained_list)) ) { - HOKE_DRAIN_MSG_REF_RETURN(item); - } - OBJ_DESTRUCT(&peer_ref->drained_list); - - peer_ref->total_msgs_sent = 0; - peer_ref->matched_msgs_sent = 0; - - peer_ref->total_msgs_recvd = 0; - peer_ref->matched_msgs_recvd = 0; - - peer_ref->total_drained_msgs = 0; - - peer_ref->ack_required = false; -} - -/* - * Message Content Structure - */ -OBJ_CLASS_INSTANCE(ompi_crcp_bkmrk_pml_message_content_ref_t, - opal_list_item_t, - ompi_crcp_bkmrk_pml_message_content_ref_construct, - ompi_crcp_bkmrk_pml_message_content_ref_destruct); - -void ompi_crcp_bkmrk_pml_message_content_ref_construct(ompi_crcp_bkmrk_pml_message_content_ref_t *content_ref) -{ - content_ref->buffer = NULL; - content_ref->request = NULL; - content_ref->active = false; - - content_ref->done = false; - content_ref->active = false; - content_ref->already_posted = false; - content_ref->already_drained = false; - - content_ref->msg_id = 0; -} - -void ompi_crcp_bkmrk_pml_message_content_ref_destruct( ompi_crcp_bkmrk_pml_message_content_ref_t *content_ref) -{ - if( NULL != content_ref->buffer ) { - free(content_ref->buffer); - } - content_ref->buffer = NULL; - - if( NULL != content_ref->request ) { - OBJ_RELEASE(content_ref->request); - } - content_ref->request = NULL; - - content_ref->active = false; - - content_ref->done = false; - content_ref->active = false; - content_ref->already_posted = false; - content_ref->already_drained = false; - - content_ref->msg_id = 0; -} - -/* - * Traffic Message - */ -OBJ_CLASS_INSTANCE(ompi_crcp_bkmrk_pml_traffic_message_ref_t, - opal_list_item_t, - ompi_crcp_bkmrk_pml_traffic_message_ref_construct, - ompi_crcp_bkmrk_pml_traffic_message_ref_destruct); - -void ompi_crcp_bkmrk_pml_traffic_message_ref_construct(ompi_crcp_bkmrk_pml_traffic_message_ref_t *msg_ref) { - msg_ref->msg_id = 0; - msg_ref->msg_type = COORD_MSG_TYPE_UNKNOWN; - - msg_ref->count = 0; - msg_ref->ddt_size = 0; - msg_ref->tag = 0; - msg_ref->rank = 0; - msg_ref->comm = NULL; - - OBJ_CONSTRUCT(&msg_ref->msg_contents, opal_list_t); - - msg_ref->proc_name.jobid = ORTE_JOBID_INVALID; - msg_ref->proc_name.vpid = ORTE_VPID_INVALID; - - msg_ref->matched = INVALID_INT; - msg_ref->done = INVALID_INT; - msg_ref->active = INVALID_INT; - msg_ref->posted = INVALID_INT; - msg_ref->active_drain = INVALID_INT; -} - -void ompi_crcp_bkmrk_pml_traffic_message_ref_destruct( ompi_crcp_bkmrk_pml_traffic_message_ref_t *msg_ref) { - opal_list_item_t* item = NULL; - - msg_ref->msg_id = 0; - msg_ref->msg_type = COORD_MSG_TYPE_UNKNOWN; - - msg_ref->count = 0; - msg_ref->ddt_size = 0; - msg_ref->tag = 0; - msg_ref->rank = 0; - msg_ref->comm = NULL; - - while( NULL != (item = opal_list_remove_first(&(msg_ref->msg_contents)) ) ) { - HOKE_CONTENT_REF_RETURN(item); - } - OBJ_DESTRUCT(&(msg_ref->msg_contents)); - - msg_ref->proc_name.jobid = ORTE_JOBID_INVALID; - msg_ref->proc_name.vpid = ORTE_VPID_INVALID; - - msg_ref->matched = INVALID_INT; - msg_ref->done = INVALID_INT; - msg_ref->active = INVALID_INT; - msg_ref->posted = INVALID_INT; - msg_ref->active_drain = INVALID_INT; -} - -/* - * Drain Message - */ -OBJ_CLASS_INSTANCE(ompi_crcp_bkmrk_pml_drain_message_ref_t, - opal_list_item_t, - ompi_crcp_bkmrk_pml_drain_message_ref_construct, - ompi_crcp_bkmrk_pml_drain_message_ref_destruct); - -void ompi_crcp_bkmrk_pml_drain_message_ref_construct(ompi_crcp_bkmrk_pml_drain_message_ref_t *msg_ref) { - msg_ref->msg_id = 0; - msg_ref->msg_type = COORD_MSG_TYPE_UNKNOWN; - - msg_ref->count = 0; - - msg_ref->datatype = NULL; - msg_ref->ddt_size = 0; - - msg_ref->tag = 0; - msg_ref->rank = 0; - msg_ref->comm = NULL; - - OBJ_CONSTRUCT(&msg_ref->msg_contents, opal_list_t); - - msg_ref->proc_name.jobid = ORTE_JOBID_INVALID; - msg_ref->proc_name.vpid = ORTE_VPID_INVALID; - - msg_ref->done = INVALID_INT; - msg_ref->active = INVALID_INT; - msg_ref->already_posted = INVALID_INT; -} - -void ompi_crcp_bkmrk_pml_drain_message_ref_destruct( ompi_crcp_bkmrk_pml_drain_message_ref_t *msg_ref) { - opal_list_item_t* item = NULL; - - msg_ref->msg_id = 0; - msg_ref->msg_type = COORD_MSG_TYPE_UNKNOWN; - - msg_ref->count = 0; - - if( NULL != msg_ref->datatype ) { - OBJ_RELEASE(msg_ref->datatype); - msg_ref->datatype = NULL; - } - msg_ref->ddt_size = 0; - - msg_ref->tag = 0; - msg_ref->rank = 0; - msg_ref->comm = NULL; - - while( NULL != (item = opal_list_remove_first(&(msg_ref->msg_contents)) ) ) { - HOKE_CONTENT_REF_RETURN(item); - } - OBJ_DESTRUCT(&(msg_ref->msg_contents)); - - msg_ref->proc_name.jobid = ORTE_JOBID_INVALID; - msg_ref->proc_name.vpid = ORTE_VPID_INVALID; - - msg_ref->done = INVALID_INT; - msg_ref->active = INVALID_INT; - msg_ref->already_posted = INVALID_INT; -} - -/* - * Drain Ack Message - */ -OBJ_CLASS_INSTANCE(ompi_crcp_bkmrk_pml_drain_message_ack_ref_t, - opal_list_item_t, - ompi_crcp_bkmrk_pml_drain_message_ack_ref_construct, - ompi_crcp_bkmrk_pml_drain_message_ack_ref_destruct); - -void ompi_crcp_bkmrk_pml_drain_message_ack_ref_construct(ompi_crcp_bkmrk_pml_drain_message_ack_ref_t *msg_ack_ref) { - msg_ack_ref->complete = false; - - msg_ack_ref->peer.jobid = ORTE_JOBID_INVALID; - msg_ack_ref->peer.vpid = ORTE_VPID_INVALID; -} - -void ompi_crcp_bkmrk_pml_drain_message_ack_ref_destruct( ompi_crcp_bkmrk_pml_drain_message_ack_ref_t *msg_ack_ref) { - msg_ack_ref->complete = false; - - msg_ack_ref->peer.jobid = ORTE_JOBID_INVALID; - msg_ack_ref->peer.vpid = ORTE_VPID_INVALID; -} - - -/* - * PML state - */ -OBJ_CLASS_INSTANCE(ompi_crcp_bkmrk_pml_state_t, - ompi_crcp_base_pml_state_t, - NULL, - NULL - ); - -/************************************ - * Some Macro shortcuts - ************************************/ -#define CRCP_COORD_STATE_ALLOC(state_ref) \ -do { \ - state_ref = (ompi_crcp_bkmrk_pml_state_t *) \ - opal_free_list_wait (&coord_state_free_list); \ -} while(0) - -#define CRCP_COORD_STATE_RETURN(state_ref) \ -do { \ - opal_free_list_return (&coord_state_free_list, \ - (opal_free_list_item_t *)state_ref); \ -} while(0) - -#define CREATE_COORD_STATE(coord_state, pml_state, v_peer_ref, v_msg_ref) \ - { \ - CRCP_COORD_STATE_ALLOC(coord_state); \ - \ - coord_state->prev_ptr = pml_state; \ - coord_state->p_super.super = pml_state->super; \ - coord_state->p_super.state = pml_state->state; \ - coord_state->p_super.error_code = pml_state->error_code; \ - coord_state->p_super.wrapped_pml_component = pml_state->wrapped_pml_component; \ - coord_state->p_super.wrapped_pml_module = pml_state->wrapped_pml_module; \ - \ - coord_state->peer_ref = v_peer_ref; \ - coord_state->msg_ref = v_msg_ref; \ - } - -#define EXTRACT_COORD_STATE(pml_state, v_coord_state, v_rtn_state, v_peer_ref, v_msg_ref) \ - { \ - v_coord_state = (ompi_crcp_bkmrk_pml_state_t*)pml_state; \ - v_rtn_state = v_coord_state->prev_ptr; \ - v_peer_ref = v_coord_state->peer_ref; \ - v_msg_ref = v_coord_state->msg_ref; \ - } - - -#define CREATE_NEW_MSG(msg_ref, v_type, v_count, v_ddt_size, v_tag, v_rank, v_comm, p_jobid, p_vpid) \ - { \ - HOKE_TRAFFIC_MSG_REF_ALLOC(msg_ref); \ - \ - msg_ref->msg_id = message_seq_num; \ - message_seq_num++; \ - \ - msg_ref->msg_type = v_type; \ - \ - msg_ref->count = v_count; \ - \ - msg_ref->ddt_size = v_ddt_size; \ - \ - msg_ref->tag = v_tag; \ - msg_ref->rank = v_rank; \ - msg_ref->comm = v_comm; \ - \ - msg_ref->proc_name.jobid = p_jobid; \ - msg_ref->proc_name.vpid = p_vpid; \ - \ - msg_ref->matched = 0; \ - msg_ref->done = 0; \ - msg_ref->active = 0; \ - msg_ref->posted = 0; \ - msg_ref->active_drain = 0; \ - } - -#define CREATE_NEW_DRAIN_MSG(msg_ref, v_type, v_count, v_ddt_size, v_tag, v_rank, v_comm, p_jobid, p_vpid) \ - { \ - HOKE_DRAIN_MSG_REF_ALLOC(msg_ref); \ - \ - msg_ref->msg_id = message_seq_num; \ - message_seq_num++; \ - \ - msg_ref->msg_type = v_type; \ - \ - msg_ref->count = v_count; \ - \ - msg_ref->datatype = NULL; \ - msg_ref->ddt_size = ddt_size; \ - \ - msg_ref->tag = v_tag; \ - msg_ref->rank = v_rank; \ - msg_ref->comm = v_comm; \ - \ - msg_ref->proc_name.jobid = p_jobid; \ - msg_ref->proc_name.vpid = p_vpid; \ - } - - -#define PACK_BUFFER(buffer, var, count, type, error_msg) \ - { \ - if (OMPI_SUCCESS != (ret = opal_dss.pack(buffer, &(var), count, type)) ) { \ - opal_output(mca_crcp_bkmrk_component.super.output_handle, \ - "%s (Return %d)", error_msg, ret); \ - exit_status = ret; \ - goto cleanup; \ - } \ - } - -#define UNPACK_BUFFER(buffer, var, count, type, error_msg) \ - { \ - int32_t n = count; \ - if (OPAL_SUCCESS != (ret = opal_dss.unpack(buffer, &(var), &n, type)) ) { \ - opal_output(mca_crcp_bkmrk_component.super.output_handle, \ - "%s (Return %d)", error_msg, ret); \ - exit_status = ret; \ - goto cleanup; \ - } \ - } - -/**************** - * PML Wrapper Init/Finalize - ****************/ -int ompi_crcp_bkmrk_pml_init(void) { - message_seq_num = 1; - current_msg_id = 0; - current_msg_type = COORD_MSG_TYPE_UNKNOWN; - stall_for_completion = false; - ft_event_state = OPAL_CRS_RUNNING; - - OBJ_CONSTRUCT(&ompi_crcp_bkmrk_pml_peer_refs, opal_list_t); - - OBJ_CONSTRUCT(&unknown_recv_from_list, opal_list_t); - OBJ_CONSTRUCT(&unknown_persist_recv_list, opal_list_t); - - OBJ_CONSTRUCT(&drained_msg_ack_list, opal_list_t); - - /* Create free lists for - * - Coord State - * - Peer Refs - * - Traffic Message Refs - * - Drain Message Refs - * - Drain ACK Messsage Refs - * - Message Contents? - */ - OBJ_CONSTRUCT(&coord_state_free_list, opal_free_list_t); - opal_free_list_init (&coord_state_free_list, - sizeof(ompi_crcp_bkmrk_pml_state_t), - opal_cache_line_size, - OBJ_CLASS(ompi_crcp_bkmrk_pml_state_t), - 0,opal_cache_line_size, - 4, /* Initial number */ - -1, /* Max = Unlimited */ - 4, /* Increment by */ - NULL, 0, NULL, NULL, NULL); - - OBJ_CONSTRUCT(&content_ref_free_list, opal_free_list_t); - opal_free_list_init (&content_ref_free_list, - sizeof(ompi_crcp_bkmrk_pml_message_content_ref_t), - opal_cache_line_size, - OBJ_CLASS(ompi_crcp_bkmrk_pml_message_content_ref_t), - 0,opal_cache_line_size, - 80, /* Initial number */ - -1, /* Max = Unlimited */ - 32, /* Increment by */ - NULL, 0, NULL, NULL, NULL); - - OBJ_CONSTRUCT(&peer_ref_free_list, opal_free_list_t); - opal_free_list_init (&peer_ref_free_list, - sizeof(ompi_crcp_bkmrk_pml_peer_ref_t), - opal_cache_line_size, - OBJ_CLASS(ompi_crcp_bkmrk_pml_peer_ref_t), - 0,opal_cache_line_size, - 16, /* Initial number */ - -1, /* Max = Unlimited */ - 16, /* Increment by */ - NULL, 0, NULL, NULL, NULL); - - OBJ_CONSTRUCT(&traffic_msg_ref_free_list, opal_free_list_t); - opal_free_list_init (&traffic_msg_ref_free_list, - sizeof(ompi_crcp_bkmrk_pml_traffic_message_ref_t), - opal_cache_line_size, - OBJ_CLASS(ompi_crcp_bkmrk_pml_traffic_message_ref_t), - 0,opal_cache_line_size, - 32, /* Initial number */ - -1, /* Max = Unlimited */ - 64, /* Increment by */ - NULL, 0, NULL, NULL, NULL); - - OBJ_CONSTRUCT(&drain_msg_ref_free_list, opal_free_list_t); - opal_free_list_init (&drain_msg_ref_free_list, - sizeof(ompi_crcp_bkmrk_pml_drain_message_ref_t), - opal_cache_line_size, - OBJ_CLASS(ompi_crcp_bkmrk_pml_drain_message_ref_t), - 0,opal_cache_line_size, - 32, /* Initial number */ - -1, /* Max = Unlimited */ - 64, /* Increment by */ - NULL, 0, NULL, NULL, NULL); - - OBJ_CONSTRUCT(&drain_ack_msg_ref_free_list, opal_free_list_t); - opal_free_list_init (&drain_ack_msg_ref_free_list, - sizeof(ompi_crcp_bkmrk_pml_drain_message_ack_ref_t), - opal_cache_line_size, - OBJ_CLASS(ompi_crcp_bkmrk_pml_drain_message_ack_ref_t), - 0,opal_cache_line_size, - 16, /* Initial number */ - -1, /* Max = Unlimited */ - 16, /* Increment by */ - NULL, 0, NULL, NULL, NULL); - - clear_timers(); - - if( timing_enabled > 0 ) { - timer_label[CRCP_TIMER_TOTAL_CKPT] = strdup("Total Ckpt."); - timer_label[CRCP_TIMER_CKPT_EX_B] = strdup("Exchange Bookmarks"); - timer_label[CRCP_TIMER_CKPT_EX_PEER_S] = strdup(" Ex.Bk. Send Peer"); - timer_label[CRCP_TIMER_CKPT_EX_PEER_R] = strdup(" Ex.Bk. Recv Peer"); - timer_label[CRCP_TIMER_CKPT_EX_WAIT] = strdup(" Ex.Bk. Wait"); - - timer_label[CRCP_TIMER_CKPT_CHECK_B] = strdup("Check Bookmarks"); - timer_label[CRCP_TIMER_CKPT_CHECK_PEER_S] = strdup(" Ck.Bk. Send Peer"); - timer_label[CRCP_TIMER_CKPT_CHECK_PEER_R] = strdup(" Ck.Bk. Recv Peer"); - - timer_label[CRCP_TIMER_CKPT_POST_DRAIN] = strdup("Post Drain Msgs."); - timer_label[CRCP_TIMER_CKPT_WAIT_QUI] = strdup("Wait for Quiescence"); - - timer_label[CRCP_TIMER_TOTAL_CONT] = strdup("Total Continue"); - - timer_label[CRCP_TIMER_TOTAL_RST] = strdup("Total Restart"); - } - - return OMPI_SUCCESS; -} - -int ompi_crcp_bkmrk_pml_finalize(void) { - int i; - - current_msg_id = 0; - current_msg_type = COORD_MSG_TYPE_UNKNOWN; - stall_for_completion = false; - ft_event_state = OPAL_CRS_RUNNING; - - OBJ_DESTRUCT(&ompi_crcp_bkmrk_pml_peer_refs); - - OBJ_DESTRUCT(&unknown_recv_from_list); - OBJ_DESTRUCT(&unknown_persist_recv_list); - - OBJ_DESTRUCT(&drained_msg_ack_list); - - /* Destroy All Free Lists */ - OBJ_DESTRUCT(&peer_ref_free_list); - OBJ_DESTRUCT(&traffic_msg_ref_free_list); - OBJ_DESTRUCT(&drain_msg_ref_free_list); - OBJ_DESTRUCT(&drain_ack_msg_ref_free_list); - OBJ_DESTRUCT(&content_ref_free_list); - - if( timing_enabled > 0 ) { - for(i = 0; i < CRCP_TIMER_MAX; ++i) { - free(timer_label[i]); - timer_label[i] = NULL; - } - } - - return OMPI_SUCCESS; -} - -/**************** - * PML Wrapper - ****************/ -/**************** Enable *****************/ -ompi_crcp_base_pml_state_t* ompi_crcp_bkmrk_pml_enable( - bool enable, - ompi_crcp_base_pml_state_t* pml_state ) -{ - /* Note: This function is not used. Set to NULL in crcp_bkmrk_module.c */ - OPAL_OUTPUT_VERBOSE((30, mca_crcp_bkmrk_component.super.output_handle, - "crcp:bkmrk: pml_enable()")); - - pml_state->error_code = OMPI_SUCCESS; - return pml_state; -} - -/**************** Progress *****************/ -ompi_crcp_base_pml_state_t* ompi_crcp_bkmrk_pml_progress( - ompi_crcp_base_pml_state_t* pml_state) -{ - /* Note: This function is not used. Set to NULL in crcp_bkmrk_module.c */ - - OPAL_OUTPUT_VERBOSE((35, mca_crcp_bkmrk_component.super.output_handle, - "crcp:bkmrk: pml_progress()")); - - pml_state->error_code = OMPI_SUCCESS; - return pml_state; -} - -/**************** Probe *****************/ -/* JJH - Code reuse: Combine iprobe and probe logic */ -ompi_crcp_base_pml_state_t* ompi_crcp_bkmrk_pml_iprobe( - int dst, int tag, - struct ompi_communicator_t* comm, - int *matched, - ompi_status_public_t* status, - ompi_crcp_base_pml_state_t* pml_state ) -{ - ompi_crcp_bkmrk_pml_drain_message_ref_t *drain_msg_ref = NULL; - ompi_crcp_bkmrk_pml_message_content_ref_t *content_ref = NULL; - int exit_status = OMPI_SUCCESS; - int ret; - - OPAL_OUTPUT_VERBOSE((30, mca_crcp_bkmrk_component.super.output_handle, - "crcp:bkmrk: pml_iprobe(%d, %d)", dst, tag)); - - /* - * Before PML Call - * - Determine if this can be satisfied from the drained list - * - Otherwise let the PML handle it - */ - if( OMPI_CRCP_PML_PRE == pml_state->state) { - /* - * Check to see if this message is in the drained message list - */ - if( OMPI_SUCCESS != (ret = drain_message_find_any(PROBE_ANY_COUNT, tag, dst, - comm, PROBE_ANY_SIZE, - &drain_msg_ref, - &content_ref, - NULL) ) ) { - ERROR_SHOULD_NEVER_HAPPEN("crcp:bkmrk: pml_iprobe(): Failed trying to find a drained message."); - exit_status = ret; - goto DONE; - } - - /* - * If the message is a drained message - * - Copy of the status structure to pass back to the user - * - Mark the 'matched' flag as true - */ - if( NULL != drain_msg_ref ) { - OPAL_OUTPUT_VERBOSE((12, mca_crcp_bkmrk_component.super.output_handle, - "crcp:bkmrk: pml_iprobe(): Matched a drained message...")); - - /* Copy the status information */ - if( MPI_STATUS_IGNORE != status ) { - memcpy(status, &content_ref->status, sizeof(ompi_status_public_t)); - } - - /* Mark as complete */ - *matched = 1; - - /* This will identify to the wrapper that this message is complete */ - pml_state->state = OMPI_CRCP_PML_DONE; - pml_state->error_code = OMPI_SUCCESS; - return pml_state; - } - /* - * Otherwise the message is not drained (common case), so let the PML deal with it - */ - else { - /* Mark as not complete */ - *matched = 0; - } - } - - DONE: - pml_state->error_code = exit_status; - return pml_state; -} - -ompi_crcp_base_pml_state_t* ompi_crcp_bkmrk_pml_probe( - int dst, int tag, - struct ompi_communicator_t* comm, - ompi_status_public_t* status, - ompi_crcp_base_pml_state_t* pml_state ) -{ - ompi_crcp_bkmrk_pml_drain_message_ref_t *drain_msg_ref = NULL; - ompi_crcp_bkmrk_pml_message_content_ref_t *content_ref = NULL; - int exit_status = OMPI_SUCCESS; - int ret; - - OPAL_OUTPUT_VERBOSE((30, mca_crcp_bkmrk_component.super.output_handle, - "crcp:bkmrk: pml_probe(%d, %d)", dst, tag)); - - /* - * Before PML Call - * - Determine if this can be satisfied from the drained list - * - Otherwise let the PML handle it - */ - if( OMPI_CRCP_PML_PRE == pml_state->state) { - /* - * Check to see if this message is in the drained message list - */ - if( OMPI_SUCCESS != (ret = drain_message_find_any(PROBE_ANY_COUNT, tag, dst, - comm, PROBE_ANY_SIZE, - &drain_msg_ref, - &content_ref, - NULL) ) ) { - ERROR_SHOULD_NEVER_HAPPEN("crcp:bkmrk: pml_probe(): Failed trying to find a drained message."); - exit_status = ret; - goto DONE; - } - - /* - * If the message is a drained message - * - Copy of the status structure to pass back to the user - */ - if( NULL != drain_msg_ref ) { - OPAL_OUTPUT_VERBOSE((12, mca_crcp_bkmrk_component.super.output_handle, - "crcp:bkmrk: pml_iprobe(): Matched a drained message...")); - - /* Copy the status information */ - if( MPI_STATUS_IGNORE != status ) { - memcpy(status, &content_ref->status, sizeof(ompi_status_public_t)); - } - - /* This will identify to the wrapper that this message is complete */ - pml_state->state = OMPI_CRCP_PML_DONE; - pml_state->error_code = OMPI_SUCCESS; - return pml_state; - } - } - - DONE: - pml_state->error_code = exit_status; - return pml_state; -} - -/**************** Dump *****************/ -ompi_crcp_base_pml_state_t* ompi_crcp_bkmrk_pml_dump( - struct ompi_communicator_t* comm, - int verbose, - ompi_crcp_base_pml_state_t* pml_state ) -{ - OPAL_OUTPUT_VERBOSE((30, mca_crcp_bkmrk_component.super.output_handle, - "crcp:bkmrk: pml_dump()")); - - pml_state->error_code = OMPI_SUCCESS; - return pml_state; -} - - -/**************** Communicator *****************/ -ompi_crcp_base_pml_state_t* ompi_crcp_bkmrk_pml_add_comm( - struct ompi_communicator_t* comm, - ompi_crcp_base_pml_state_t* pml_state ) -{ - /* Note: This function is not used. Set to NULL in crcp_bkmrk_module.c */ - - OPAL_OUTPUT_VERBOSE((30, mca_crcp_bkmrk_component.super.output_handle, - "crcp:bkmrk: pml_add_comm()")); - - pml_state->error_code = OMPI_SUCCESS; - return pml_state; -} - -ompi_crcp_base_pml_state_t* ompi_crcp_bkmrk_pml_del_comm( - struct ompi_communicator_t* comm, - ompi_crcp_base_pml_state_t* pml_state ) -{ - /* Note: This function is not used. Set to NULL in crcp_bkmrk_module.c */ - - OPAL_OUTPUT_VERBOSE((30, mca_crcp_bkmrk_component.super.output_handle, - "crcp:bkmrk: pml_del_comm()")); - - pml_state->error_code = OMPI_SUCCESS; - return pml_state; -} - -/**************** Processes *****************/ -ompi_crcp_base_pml_state_t* ompi_crcp_bkmrk_pml_add_procs( - struct ompi_proc_t **procs, - size_t nprocs, - ompi_crcp_base_pml_state_t* pml_state ) -{ - ompi_crcp_bkmrk_pml_peer_ref_t *new_peer_ref; - size_t i; - - if( OMPI_CRCP_PML_PRE != pml_state->state ){ - goto DONE; - } - - OPAL_OUTPUT_VERBOSE((30, mca_crcp_bkmrk_component.super.output_handle, - "crcp:bkmrk: pml_add_procs()")); - - /* - * Save pointers to the wrapped PML - */ - wrapped_pml_component = pml_state->wrapped_pml_component; - wrapped_pml_module = pml_state->wrapped_pml_module; - - /* - * Create a peer_ref for each peer added - */ - for( i = 0; i < nprocs; ++i) { - HOKE_PEER_REF_ALLOC(new_peer_ref); - - new_peer_ref->proc_name.jobid = OMPI_CAST_RTE_NAME(&procs[i]->super.proc_name)->jobid; - new_peer_ref->proc_name.vpid = OMPI_CAST_RTE_NAME(&procs[i]->super.proc_name)->vpid; - - opal_list_append(&ompi_crcp_bkmrk_pml_peer_refs, &(new_peer_ref->super)); - } - - DONE: - pml_state->error_code = OMPI_SUCCESS; - return pml_state; -} - -ompi_crcp_base_pml_state_t* ompi_crcp_bkmrk_pml_del_procs( - struct ompi_proc_t **procs, - size_t nprocs, - ompi_crcp_base_pml_state_t* pml_state ) -{ - opal_list_item_t *item = NULL; - ompi_crcp_bkmrk_pml_peer_ref_t *old_peer_ref; - int exit_status = OMPI_SUCCESS; - size_t i; - - if( OMPI_CRCP_PML_PRE != pml_state->state ){ - goto DONE; - } - - OPAL_OUTPUT_VERBOSE((30, mca_crcp_bkmrk_component.super.output_handle, - "crcp:bkmrk: pml_del_procs()")); - - for( i = 0; i < nprocs; ++i) { - item = (opal_list_item_t*)find_peer(*(ompi_process_name_t*)&procs[i]->super.proc_name); - if(NULL == item) { - opal_output(mca_crcp_bkmrk_component.super.output_handle, - "crcp:bkmrk: del_procs: Unable to find peer %s\n", - OMPI_NAME_PRINT(&procs[i]->super.proc_name)); - exit_status = OMPI_ERROR; - goto DONE; - } - - /* Remove the found peer from the list */ - opal_list_remove_item(&ompi_crcp_bkmrk_pml_peer_refs, item); - old_peer_ref = (ompi_crcp_bkmrk_pml_peer_ref_t*)item; - HOKE_PEER_REF_RETURN(old_peer_ref); - } - - DONE: - pml_state->error_code = exit_status; - return pml_state; -} - -/**************** Send *****************/ -ompi_crcp_base_pml_state_t* ompi_crcp_bkmrk_pml_isend_init( - void *buf, size_t count, - ompi_datatype_t *datatype, - int dst, int tag, - mca_pml_base_send_mode_t mode, - struct ompi_communicator_t* comm, - struct ompi_request_t **request, - ompi_crcp_base_pml_state_t* pml_state ) -{ - ompi_crcp_bkmrk_pml_peer_ref_t *peer_ref = NULL; - ompi_crcp_bkmrk_pml_traffic_message_ref_t *msg_ref = NULL; - ompi_crcp_bkmrk_pml_state_t *coord_state = NULL; - int exit_status = OMPI_SUCCESS; - int ret; - - OPAL_OUTPUT_VERBOSE((30, mca_crcp_bkmrk_component.super.output_handle, - "crcp:bkmrk: pml_isend_init()")); - - /* - * Before the PML gets the message: - * - Setup structure to track the message - */ - if( OMPI_CRCP_PML_PRE == pml_state->state ) { - /* - * Find the peer reference - */ - if( OMPI_SUCCESS != (ret = find_peer_in_comm(comm, dst, &peer_ref) ) ){ - opal_output(mca_crcp_bkmrk_component.super.output_handle, - "crcp:bkmrk: isend: Failed to find peer_ref\n"); - exit_status = ret; - goto DONE; - } - - /* - * Archive the message Message Object - */ - traffic_message_append(peer_ref, &(peer_ref->send_init_list), - COORD_MSG_TYPE_P_SEND, - count, datatype, 0, tag, dst, comm, - &msg_ref); - - /* Save the pointers */ - CREATE_COORD_STATE(coord_state, pml_state, - peer_ref, msg_ref); - - coord_state->p_super.error_code = OMPI_SUCCESS; - return &coord_state->p_super; - } - /* - * After PML is done, update message reference - */ - else if( OMPI_CRCP_PML_POST == pml_state->state ) { - ompi_crcp_base_pml_state_t *rtn_state = NULL; - ompi_crcp_bkmrk_pml_message_content_ref_t *new_content = NULL; - - EXTRACT_COORD_STATE(pml_state, coord_state, rtn_state, - peer_ref, msg_ref); - - /* - * Update Message - */ - HOKE_CONTENT_REF_ALLOC(new_content); - new_content->buffer = buf; - new_content->request = *request; - new_content->done = false; - new_content->active = false; - new_content->already_posted = true; - new_content->already_drained = false; - OBJ_RETAIN(*request); - opal_list_append(&(msg_ref->msg_contents), &(new_content->super) ); - - CRCP_COORD_STATE_RETURN(coord_state); - - rtn_state->error_code = OMPI_SUCCESS; - return rtn_state; - } - - DONE: - pml_state->error_code = exit_status; - return pml_state; -} - -static int ompi_crcp_bkmrk_pml_start_isend_init(ompi_request_t **request) -{ - int ret, exit_status = OMPI_SUCCESS; - ompi_crcp_bkmrk_pml_peer_ref_t *peer_ref = NULL; - ompi_crcp_bkmrk_pml_traffic_message_ref_t *msg_ref = NULL; - ompi_crcp_bkmrk_pml_message_content_ref_t *content_ref = NULL; - mca_pml_base_request_t *breq = NULL; - size_t tmp_ddt_size = 0; - - breq = (mca_pml_base_request_t *)(*request); - ompi_datatype_type_size(breq->req_datatype, &tmp_ddt_size); - - /* - * Find the peer reference - */ - if( OMPI_SUCCESS != (ret = find_peer_in_comm(breq->req_comm, - breq->req_peer, - &peer_ref) ) ){ - opal_output(mca_crcp_bkmrk_component.super.output_handle, - "crcp:bkmrk: req_start(): Failed to find peer_ref\n"); - exit_status = ret; - goto DONE; - } - - /* Check the send_init list */ - if( OMPI_SUCCESS != (ret = traffic_message_find(&(peer_ref->send_init_list), - breq->req_count, - breq->req_tag, - breq->req_peer, - breq->req_comm->c_contextid, - tmp_ddt_size, - &msg_ref, - PERSIST_MARKER - ) ) ) { - opal_output(mca_crcp_bkmrk_component.super.output_handle, - "crcp:bkmrk: pml_start(): Unable to find the proper (send_init) message ref for this recv\n"); - exit_status = ret; - goto DONE; - } - - if( NULL == msg_ref ) { - ERROR_SHOULD_NEVER_HAPPEN("crcp:bkmrk: pml_start(): Could not find message ref"); - exit_status = OMPI_ERROR; - goto DONE; - } else { - traffic_message_start(msg_ref, - peer_ref, - request, - &(peer_ref->send_init_list), - &content_ref); - - if( !content_ref->already_drained ) { - /* Account for this inflight send */ - peer_ref->total_msgs_sent += 1; - } - } - - DONE: - return exit_status; -} - -static int ompi_crcp_bkmrk_request_complete_isend_init(struct ompi_request_t *request, - ompi_crcp_bkmrk_pml_peer_ref_t *peer_ref, - int src, int tag, int tmp_ddt_size) -{ - int ret, exit_status = OMPI_SUCCESS; - mca_pml_base_request_t *breq = NULL; - ompi_crcp_bkmrk_pml_traffic_message_ref_t *msg_ref = NULL; - ompi_crcp_bkmrk_pml_message_content_ref_t *content_ref = NULL; - - breq = (mca_pml_base_request_t *)request; - - /* Check the isend_init list */ - if( OMPI_SUCCESS != (ret = traffic_message_find(&(peer_ref->send_init_list), - breq->req_count, - tag, src, - breq->req_comm->c_contextid, - tmp_ddt_size, - &msg_ref, - FIND_MSG_TRUE - ) ) ) { - opal_output(mca_crcp_bkmrk_component.super.output_handle, - "crcp:bkmrk: req_complete: Unable to find the proper (send_init) message ref for this complete\n"); - exit_status = ret; - goto DONE; - } - - if( NULL == msg_ref ) { - /* - * It is possible that we did not 'find' the message because - * we could have previously marked it as done. Due to the - * logic in the Request Wait/Test routines we could - * receive multiple request complete calls for the - * same request. - * - * It is possible that we have 'completed' this message previously, - * so this case can occur during normal operation. - * This is caused by us checking for completeness twice in ompi_request_wait_all. - */ - OPAL_OUTPUT_VERBOSE((15, mca_crcp_bkmrk_component.super.output_handle, - "crcp:bkmrk: request_complete: No match found for this request :( %d, %d ): [%d/%d,%d]\n", - peer_ref->total_msgs_sent, peer_ref->total_msgs_recvd, - breq->req_peer, src, breq->req_comm->c_contextid)); - exit_status = OMPI_SUCCESS; - goto DONE; - } - - /* Mark request as inactive */ - traffic_message_find_mark_persistent(msg_ref, &request, - true, /* Find currently active */ - false, /* Mark as inactive */ - &content_ref); - - TRAFFIC_MSG_DUMP_PEER(15, (peer_ref, "-- Request Complete (Send_init) --", true)); - - if( !content_ref->already_drained ) { - msg_ref->done++; - msg_ref->active--; - } else { - msg_ref->active_drain--; - content_ref->already_drained = false; - } - - OPAL_OUTPUT_VERBOSE((25, mca_crcp_bkmrk_component.super.output_handle, - "crcp:bkmrk: req_complete: Marked Message... ( %d, %d )\n", - peer_ref->total_msgs_sent, peer_ref->total_msgs_recvd)); - DONE: - return exit_status; -} - - -ompi_crcp_base_pml_state_t* ompi_crcp_bkmrk_pml_isend( - void *buf, size_t count, - ompi_datatype_t *datatype, - int dst, int tag, - mca_pml_base_send_mode_t mode, - struct ompi_communicator_t* comm, - struct ompi_request_t **request, - ompi_crcp_base_pml_state_t* pml_state ) -{ - ompi_crcp_bkmrk_pml_peer_ref_t *peer_ref = NULL; - ompi_crcp_bkmrk_pml_traffic_message_ref_t *msg_ref = NULL; - ompi_crcp_bkmrk_pml_state_t *coord_state = NULL; - int exit_status = OMPI_SUCCESS; - int ret; - - OPAL_OUTPUT_VERBOSE((30, mca_crcp_bkmrk_component.super.output_handle, - "crcp:bkmrk: pml_isend()")); - - /* - * Before the PML gets the message: - * - Setup structure to track the message - */ - if( OMPI_CRCP_PML_PRE == pml_state->state ) { - /* - * Find the peer reference - */ - if( OMPI_SUCCESS != (ret = find_peer_in_comm(comm, dst, &peer_ref) ) ){ - opal_output(mca_crcp_bkmrk_component.super.output_handle, - "crcp:bkmrk: isend: Failed to find peer_ref\n"); - exit_status = ret; - goto DONE; - } - - /* - * Archive the message Message Object - */ - traffic_message_append(peer_ref, &(peer_ref->isend_list), - COORD_MSG_TYPE_I_SEND, - count, datatype, 0, tag, dst, comm, - &msg_ref); - - /* Bookkeeping */ - peer_ref->total_msgs_sent += 1; - - /* Save the pointers */ - CREATE_COORD_STATE(coord_state, pml_state, - peer_ref, msg_ref); - - coord_state->p_super.error_code = OMPI_SUCCESS; - return &coord_state->p_super; - } - /* - * After PML is done, update message reference - */ - else if( OMPI_CRCP_PML_POST == pml_state->state ) { - ompi_crcp_base_pml_state_t *rtn_state = NULL; - ompi_crcp_bkmrk_pml_message_content_ref_t *new_content = NULL; - - EXTRACT_COORD_STATE(pml_state, coord_state, rtn_state, - peer_ref, msg_ref); - - /* - * Update Message - */ - HOKE_CONTENT_REF_ALLOC(new_content); - new_content->buffer = NULL; /* No Tracked */ - new_content->request = *request; - new_content->done = false; - new_content->active = true; - new_content->already_posted = true; - new_content->already_drained = false; - OBJ_RETAIN(*request); - opal_list_append(&(msg_ref->msg_contents), &(new_content->super) ); - - TRAFFIC_MSG_DUMP_PEER(15, (peer_ref, "-- Append Message (isend) --", true)); - - CRCP_COORD_STATE_RETURN(coord_state); - - rtn_state->error_code = OMPI_SUCCESS; - return rtn_state; - } - - DONE: - pml_state->error_code = exit_status; - return pml_state; -} - -static int ompi_crcp_bkmrk_request_complete_isend(struct ompi_request_t *request, - ompi_crcp_bkmrk_pml_peer_ref_t *peer_ref, - int src, int tag, int tmp_ddt_size) -{ - int ret, exit_status = OMPI_SUCCESS; - mca_pml_base_request_t *breq = NULL; - ompi_crcp_bkmrk_pml_traffic_message_ref_t *msg_ref = NULL; - ompi_crcp_bkmrk_pml_message_content_ref_t *content_ref = NULL; - - breq = (mca_pml_base_request_t *)request; - - /* Check the isend list */ - if( OMPI_SUCCESS != (ret = traffic_message_find(&(peer_ref->isend_list), - breq->req_count, - tag, src, - breq->req_comm->c_contextid, - tmp_ddt_size, - &msg_ref, - FIND_MSG_TRUE - ) ) ) { - opal_output(mca_crcp_bkmrk_component.super.output_handle, - "crcp:bkmrk: req_complete: Unable to find the proper (isend) message ref for this complete\n"); - exit_status = ret; - goto DONE; - } - - if( NULL == msg_ref ) { - /* - * It is possible that we did not 'find' the message because - * we could have previously marked it as done. Due to the - * logic in the Request Wait/Test routines we could - * receive multiple request complete calls for the - * same request. - * - * It is possible that we have 'completed' this message previously, - * so this case can occur during normal operation. - * This is caused by us checking for completeness twice in ompi_request_wait_all. - */ - OPAL_OUTPUT_VERBOSE((15, mca_crcp_bkmrk_component.super.output_handle, - "crcp:bkmrk: request_complete: No match found for this request :( %d, %d ): [%d/%d,%d]\n", - peer_ref->total_msgs_sent, peer_ref->total_msgs_recvd, - breq->req_peer, src, breq->req_comm->c_contextid)); - exit_status = OMPI_SUCCESS; - goto DONE; - } - - OPAL_OUTPUT_VERBOSE((15, mca_crcp_bkmrk_component.super.output_handle, - "crcp:bkmrk: req_complete: Matched an iSend: total = %d", - peer_ref->total_msgs_sent)); - - /* Strip off an isend request */ - traffic_message_grab_content(msg_ref, &content_ref, true, true); /* Remove, prefer already_drained */ - - if( !content_ref->already_drained ) { - msg_ref->done++; - msg_ref->active--; - } else { - msg_ref->active_drain--; - content_ref->already_drained = false; - } - HOKE_CONTENT_REF_RETURN(content_ref); - - TRAFFIC_MSG_DUMP_PEER(15, (peer_ref, "-- Request Complete (iSend) --", true)); - - OPAL_OUTPUT_VERBOSE((25, mca_crcp_bkmrk_component.super.output_handle, - "crcp:bkmrk: req_complete: Marked Message... ( %d, %d )\n", - peer_ref->total_msgs_sent, peer_ref->total_msgs_recvd)); - DONE: - return exit_status; -} - - -ompi_crcp_base_pml_state_t* ompi_crcp_bkmrk_pml_send( - void *buf, size_t count, - ompi_datatype_t *datatype, - int dst, int tag, - mca_pml_base_send_mode_t mode, - struct ompi_communicator_t* comm, - ompi_crcp_base_pml_state_t* pml_state ) -{ - ompi_crcp_bkmrk_pml_peer_ref_t *peer_ref = NULL; - ompi_crcp_bkmrk_pml_traffic_message_ref_t *msg_ref = NULL; - ompi_crcp_bkmrk_pml_state_t *coord_state = NULL; - int exit_status = OMPI_SUCCESS; - int ret; - - OPAL_OUTPUT_VERBOSE((30, mca_crcp_bkmrk_component.super.output_handle, - "crcp:bkmrk: pml_send()")); - - /* - * Before the PML gets the message: - * - Setup structure to track the message - */ - if( OMPI_CRCP_PML_PRE == pml_state->state ) { - /* - * Find the peer reference - */ - if( OMPI_SUCCESS != (ret = find_peer_in_comm(comm, dst, &peer_ref) ) ){ - opal_output(mca_crcp_bkmrk_component.super.output_handle, - "crcp:bkmrk: send: Failed to find peer_ref\n"); - exit_status = ret; - goto DONE; - } - - /* - * Archive the message Message Object - */ - traffic_message_append(peer_ref, &(peer_ref->send_list), - COORD_MSG_TYPE_B_SEND, - count, datatype, 0, tag, dst, comm, - &msg_ref); - - /* Bookkeeping */ - peer_ref->total_msgs_sent += 1; - current_msg_id = msg_ref->msg_id; - current_msg_type = COORD_MSG_TYPE_B_SEND; - - /* Save the pointers */ - CREATE_COORD_STATE(coord_state, pml_state, - peer_ref, msg_ref); - coord_state->p_super.error_code = OMPI_SUCCESS; - - return &coord_state->p_super; - } - /* - * After PML is done, update message reference - */ - else if( OMPI_CRCP_PML_POST == pml_state->state ) { - ompi_crcp_base_pml_state_t *rtn_state = NULL; - - EXTRACT_COORD_STATE(pml_state, coord_state, rtn_state, - peer_ref, msg_ref); - - /* - * Update Message - */ - msg_ref->done++; - msg_ref->active--; - - current_msg_id = 0; - current_msg_type = COORD_MSG_TYPE_UNKNOWN; - - TRAFFIC_MSG_DUMP_PEER(15, (peer_ref, "Send done", true)); - - CRCP_COORD_STATE_RETURN(coord_state); - rtn_state->error_code = OMPI_SUCCESS; - - return rtn_state; - } - - DONE: - pml_state->error_code = exit_status; - return pml_state; -} - -/**************** Recv *****************/ -ompi_crcp_base_pml_state_t* ompi_crcp_bkmrk_pml_irecv_init( - void *buf, size_t count, - ompi_datatype_t *datatype, - int src, int tag, - struct ompi_communicator_t* comm, - struct ompi_request_t **request, - ompi_crcp_base_pml_state_t* pml_state) -{ - ompi_crcp_bkmrk_pml_peer_ref_t *peer_ref = NULL; - ompi_crcp_bkmrk_pml_traffic_message_ref_t *msg_ref = NULL; - ompi_crcp_bkmrk_pml_state_t *coord_state = NULL; - int exit_status = OMPI_SUCCESS; - int ret; - - OPAL_OUTPUT_VERBOSE((30, mca_crcp_bkmrk_component.super.output_handle, - "crcp:bkmrk: pml_irecv_init()")); - - /* - * Before PML Call - * - Determine if this can be satisfied from the drained list - * - Otherwise create a new reference to it so we can track it - */ - if( OMPI_CRCP_PML_PRE == pml_state->state) { - /* - * A message will never be on the drained list for this function since - * it does not actually receive anything, just sets up the system. - * The receive for these reqeusts are done in the start() and wait() - * commands. - */ - - /* - * Find the Peer - */ - if( MPI_ANY_SOURCE == src || src < 0) { - /* - * Archive the message Message Object - */ - traffic_message_append(NULL, &(unknown_persist_recv_list), - COORD_MSG_TYPE_P_RECV, - count, datatype, 0, tag, src, comm, - &msg_ref); - - CREATE_COORD_STATE(coord_state, pml_state, - NULL, msg_ref); - } - else { - if( OMPI_SUCCESS != (ret = find_peer_in_comm(comm, src, &peer_ref) ) ){ - opal_output(mca_crcp_bkmrk_component.super.output_handle, - "crcp:bkmrk: recv: Failed to find peer_ref\n"); - exit_status = ret; - goto DONE; - } - - /* - * Archive the message Message Object - */ - traffic_message_append(peer_ref, &(peer_ref->recv_init_list), - COORD_MSG_TYPE_P_RECV, - count, datatype, 0, tag, src, comm, - &msg_ref); - - CREATE_COORD_STATE(coord_state, pml_state, - peer_ref, msg_ref); - } - - coord_state->p_super.error_code = OMPI_SUCCESS; - return &coord_state->p_super; - } - /* - * Post PML Call - * - bookkeeping... - */ - else if( OMPI_CRCP_PML_POST == pml_state->state) { - ompi_crcp_base_pml_state_t *rtn_state = NULL; - ompi_crcp_bkmrk_pml_message_content_ref_t *new_content = NULL; - - EXTRACT_COORD_STATE(pml_state, coord_state, rtn_state, - peer_ref, msg_ref); - - /* - * Do the update - */ - HOKE_CONTENT_REF_ALLOC(new_content); - new_content->buffer = buf; - new_content->request = *request; - new_content->done = false; - new_content->active = false; - new_content->already_posted = true; - new_content->already_drained = false; - OBJ_RETAIN(*request); - opal_list_append(&(msg_ref->msg_contents), &(new_content->super) ); - - CRCP_COORD_STATE_RETURN(coord_state); - - rtn_state->error_code = OMPI_SUCCESS; - return rtn_state; - } - - DONE: - pml_state->error_code = exit_status; - return pml_state; -} - -static int ompi_crcp_bkmrk_pml_start_drain_irecv_init(ompi_request_t **request, bool *found_drain) -{ - int ret, exit_status = OMPI_SUCCESS; - ompi_crcp_bkmrk_pml_peer_ref_t *peer_ref = NULL; - ompi_crcp_bkmrk_pml_traffic_message_ref_t *msg_ref = NULL; - ompi_crcp_bkmrk_pml_drain_message_ref_t *drain_msg_ref = NULL; - ompi_crcp_bkmrk_pml_message_content_ref_t *content_ref = NULL; - mca_pml_base_request_t *breq = NULL; - size_t tmp_ddt_size = 0; - - *found_drain = false; - - breq = (mca_pml_base_request_t *)(*request); - ompi_datatype_type_size(breq->req_datatype, &tmp_ddt_size); - - /* - * If peer rank is given then find the peer reference - */ - if( 0 <= breq->req_peer ) { - if( OMPI_SUCCESS != (ret = find_peer_in_comm(breq->req_comm, - breq->req_peer, - &peer_ref) ) ){ - opal_output(mca_crcp_bkmrk_component.super.output_handle, - "crcp:bkmrk: pml_start(): Failed to find peer_ref\n"); - exit_status = ret; - goto DONE; - } - - if( OMPI_SUCCESS != (ret = traffic_message_find(&(peer_ref->recv_init_list), - breq->req_count, - breq->req_tag, - breq->req_peer, - breq->req_comm->c_contextid, - tmp_ddt_size, - &msg_ref, - PERSIST_MARKER - ) ) ) { - opal_output(mca_crcp_bkmrk_component.super.output_handle, - "crcp:bkmrk: pml_start(): Unable to find the proper (recv) message ref for this recv\n"); - exit_status = ret; - goto DONE; - } - } - /* - * Otherwise peer is not known - */ - else { - if( OMPI_SUCCESS != (ret = traffic_message_find(&(unknown_persist_recv_list), - breq->req_count, - breq->req_tag, - INVALID_INT, - breq->req_comm->c_contextid, - tmp_ddt_size, - &msg_ref, - PERSIST_MARKER - ) ) ) { - opal_output(mca_crcp_bkmrk_component.super.output_handle, - "crcp:bkmrk: pml_start(): Unable to find the proper (recv) message ref for this recv\n"); - exit_status = ret; - goto DONE; - } - } - - /* - * No message found :( - */ - if( NULL == msg_ref ) { - ERROR_SHOULD_NEVER_HAPPEN("crcp:bkmrk: pml_start(): Could not find message ref"); - exit_status = OMPI_ERROR; - goto DONE; - } - - /* - * See if this mesage was already drained. - */ - if( NULL != peer_ref ) { - if( OMPI_SUCCESS != (ret = drain_message_find(&(peer_ref->drained_list), - msg_ref->count, msg_ref->tag, msg_ref->rank, - msg_ref->comm->c_contextid, msg_ref->ddt_size, - &drain_msg_ref, - &content_ref) ) ) { - ERROR_SHOULD_NEVER_HAPPEN("crcp:bkmrk: pml_start(): Failed trying to find a drained message."); - exit_status = ret; - goto DONE; - } - } else { - if( OMPI_SUCCESS != (ret = drain_message_find_any(msg_ref->count, msg_ref->tag, msg_ref->rank, - msg_ref->comm, msg_ref->ddt_size, - &drain_msg_ref, - &content_ref, - &peer_ref) ) ) { - ERROR_SHOULD_NEVER_HAPPEN("crcp:bkmrk: pml_start(): Failed trying to find a drained message."); - exit_status = ret; - goto DONE; - } - } - - /* - * Found a drained message! - */ - if( NULL != drain_msg_ref ) { - *found_drain = true; - OPAL_OUTPUT_VERBOSE((12, mca_crcp_bkmrk_component.super.output_handle, - "crcp:bkmrk: pml_start(): Matched a drained message...")); - - if( OMPI_SUCCESS != (ret = drain_message_copy_remove_persistent(drain_msg_ref, - content_ref, - msg_ref, - *request, - peer_ref) ) ) { - opal_output( mca_crcp_bkmrk_component.super.output_handle, - "crcp:bkmrk: pml_start(): Datatype copy failed (%d)", - ret); - } - - peer_ref->total_drained_msgs -= 1; - } - - DONE: - return exit_status; -} - -static int ompi_crcp_bkmrk_pml_start_irecv_init(ompi_request_t **request) -{ - int ret, exit_status = OMPI_SUCCESS; - ompi_crcp_bkmrk_pml_peer_ref_t *peer_ref = NULL; - ompi_crcp_bkmrk_pml_traffic_message_ref_t *msg_ref = NULL; - mca_pml_base_request_t *breq = NULL; - size_t tmp_ddt_size = 0; - - breq = (mca_pml_base_request_t *)(*request); - ompi_datatype_type_size(breq->req_datatype, &tmp_ddt_size); - - /* - * If peer rank is given then find the peer reference - */ - if( 0 <= breq->req_peer ) { - if( OMPI_SUCCESS != (ret = find_peer_in_comm(breq->req_comm, - breq->req_peer, - &peer_ref) ) ){ - opal_output(mca_crcp_bkmrk_component.super.output_handle, - "crcp:bkmrk: pml_start(): Failed to find peer_ref\n"); - exit_status = ret; - goto DONE; - } - - if( OMPI_SUCCESS != (ret = traffic_message_find(&(peer_ref->recv_init_list), - breq->req_count, - breq->req_tag, - breq->req_peer, - breq->req_comm->c_contextid, - tmp_ddt_size, - &msg_ref, - PERSIST_MARKER - ) ) ) { - opal_output(mca_crcp_bkmrk_component.super.output_handle, - "crcp:bkmrk: pml_start(): Unable to find the proper (recv) message ref for this recv\n"); - exit_status = ret; - goto DONE; - } - - if( NULL != msg_ref ) { - traffic_message_start(msg_ref, - peer_ref, - request, - &(peer_ref->recv_init_list), - NULL); - } - } - /* - * Else peer is not known - */ - else { - if( OMPI_SUCCESS != (ret = traffic_message_find(&(unknown_persist_recv_list), - breq->req_count, - breq->req_tag, - INVALID_INT, - breq->req_comm->c_contextid, - tmp_ddt_size, - &msg_ref, - PERSIST_MARKER - ) ) ) { - opal_output(mca_crcp_bkmrk_component.super.output_handle, - "crcp:bkmrk: pml_start(): Unable to find the proper (recv) message ref for this recv\n"); - exit_status = ret; - goto DONE; - } - - if( NULL != msg_ref ) { - traffic_message_start(msg_ref, - NULL, - request, - &(unknown_persist_recv_list), - NULL); - } - } - - if( NULL == msg_ref ) { - ERROR_SHOULD_NEVER_HAPPEN("crcp:bkmrk: pml_start(): Could not find message ref"); - exit_status = OMPI_ERROR; - goto DONE; - } - - DONE: - return exit_status; -} - -static int ompi_crcp_bkmrk_request_complete_irecv_init(struct ompi_request_t *request, - ompi_crcp_bkmrk_pml_peer_ref_t *peer_ref, - int src, int tag, int tmp_ddt_size) -{ - int ret, exit_status = OMPI_SUCCESS; - mca_pml_base_request_t *breq = NULL; - ompi_crcp_bkmrk_pml_traffic_message_ref_t *msg_ref = NULL, *new_msg_ref = NULL; - ompi_crcp_bkmrk_pml_message_content_ref_t *content_ref = NULL; - - breq = (mca_pml_base_request_t *)request; - - /* - * Check the irecv_init list - */ - if( OMPI_SUCCESS != (ret = traffic_message_find(&(peer_ref->recv_init_list), - breq->req_count, - tag, src, - breq->req_comm->c_contextid, - tmp_ddt_size, - &msg_ref, - FIND_MSG_TRUE - ) ) ) { - opal_output(mca_crcp_bkmrk_component.super.output_handle, - "crcp:bkmrk: req_complete: Unable to find the proper (recv_init) message ref for this complete\n"); - exit_status = ret; - goto DONE; - } - - /* - * If not found, check the unknown_irecv_list - */ - if( NULL == msg_ref ) { - if( OMPI_SUCCESS != (ret = traffic_message_find(&(unknown_persist_recv_list), - breq->req_count, - tag, - INVALID_INT, - breq->req_comm->c_contextid, - tmp_ddt_size, - &msg_ref, - FIND_MSG_TRUE - ) ) ) { - opal_output(mca_crcp_bkmrk_component.super.output_handle, - "crcp:bkmrk: requ_complete: Unable to find the proper (recv_init) message ref for this complete\n"); - exit_status = ret; - goto DONE; - } - - if( NULL != msg_ref ) { - traffic_message_move(msg_ref, - COORD_MSG_TYPE_P_RECV, - NULL, &(unknown_persist_recv_list), - peer_ref, &(peer_ref->recv_init_list), - &new_msg_ref, - true, - false); - msg_ref = new_msg_ref; - } - } - - /* - * If still not found, then we must have completed this already - */ - if( NULL == msg_ref ) { - /* - * It is possible that we did not 'find' the message because - * we could have previously marked it as done. Due to the - * logic in the Request Wait/Test routines we could - * receive multiple request complete calls for the - * same request. - * - * It is possible that we have 'completed' this message previously, - * so this case can occur during normal operation. - * This is caused by us checking for completeness twice in ompi_request_wait_all. - */ - OPAL_OUTPUT_VERBOSE((15, mca_crcp_bkmrk_component.super.output_handle, - "crcp:bkmrk: request_complete: No match found for this request :( %d, %d ): [%d/%d,%d]\n", - peer_ref->total_msgs_sent, peer_ref->total_msgs_recvd, - breq->req_peer, src, breq->req_comm->c_contextid)); - exit_status = OMPI_SUCCESS; - goto DONE; - } - - /* - * Mark request as inactive - * Only increment the total count if this was not accounted for in the last checkpoint - */ - traffic_message_find_mark_persistent(msg_ref, &request, - true, /* Find currently active */ - false, /* Mark as inactive */ - &content_ref); - if( NULL == content_ref ) { - exit_status = OMPI_ERROR; - goto DONE; - } - - if( !content_ref->already_drained ) { - peer_ref->total_msgs_recvd += 1; - msg_ref->done++; - msg_ref->active--; - } else { - msg_ref->active_drain--; - content_ref->already_drained = false; - } - - /* Do not return the content_ref, persistent sends re-use these */ - - if( NULL == new_msg_ref ) { - TRAFFIC_MSG_DUMP_PEER(15, (peer_ref, "-- Request Complete (Recv_Init) --", true)); - } else { - TRAFFIC_MSG_DUMP_PEER(15, (peer_ref, "-- Request Complete (Recv_init - Unknown) --", true)); - } - - OPAL_OUTPUT_VERBOSE((25, mca_crcp_bkmrk_component.super.output_handle, - "crcp:bkmrk: req_complete: Marked Message... ( %d, %d )\n", - peer_ref->total_msgs_sent, peer_ref->total_msgs_recvd)); - DONE: - return exit_status; -} - -ompi_crcp_base_pml_state_t* ompi_crcp_bkmrk_pml_irecv( - void *buf, size_t count, - ompi_datatype_t *datatype, - int src, int tag, - struct ompi_communicator_t* comm, - struct ompi_request_t **request, - ompi_crcp_base_pml_state_t* pml_state ) -{ - int ret, exit_status = OMPI_SUCCESS; - ompi_crcp_bkmrk_pml_peer_ref_t *peer_ref = NULL; - ompi_crcp_bkmrk_pml_traffic_message_ref_t *msg_ref = NULL; - ompi_crcp_bkmrk_pml_state_t *coord_state = NULL; - bool found_drain = false; - - OPAL_OUTPUT_VERBOSE((30, mca_crcp_bkmrk_component.super.output_handle, - "crcp:bkmrk: pml_irecv()")); - - /* - * Before PML Call - * - Determine if this can be satisfied from the drained list - * - Otherwise create a new reference to it so we can track it - */ - if( OMPI_CRCP_PML_PRE == pml_state->state) { - /* - * Check to see if this message is in the drained message list - */ - found_drain = false; - if( OMPI_SUCCESS != (ret = drain_message_check_recv(buf, count, datatype, - &src, &tag, comm, request, NULL, - &found_drain) ) ) { - ERROR_SHOULD_NEVER_HAPPEN("crcp:bkmrk: pml_recv(): Failed trying to find a drained message."); - exit_status = ret; - goto DONE; - } - - if( found_drain ) { - /* Do *not* increment: - * peer_ref->total_msgs_recvd += 1; - * Because we accounted for this message during the last checkpoint. - */ - - /* This will identify to the wrapper that this message is complete */ - pml_state->state = OMPI_CRCP_PML_DONE; - pml_state->error_code = OMPI_SUCCESS; - return pml_state; - } - /* - * Otherwise the message is not drained (common case) - */ - else { - /* - * Find the Peer - */ - if( MPI_ANY_SOURCE == src || src < 0) { - /* - * Archive the message Message Object - */ - traffic_message_append(NULL, &(unknown_recv_from_list), - COORD_MSG_TYPE_I_RECV, - count, datatype, 0, tag, src, comm, - &msg_ref); - - CREATE_COORD_STATE(coord_state, pml_state, - NULL, msg_ref); - } - else { - if( OMPI_SUCCESS != (ret = find_peer_in_comm(comm, src, &peer_ref) ) ){ - opal_output(mca_crcp_bkmrk_component.super.output_handle, - "crcp:bkmrk: pml_irecv(): Failed to find peer_ref\n"); - exit_status = ret; - goto DONE; - } - - /* - * Archive the message Message Object - */ - traffic_message_append(peer_ref, &(peer_ref->irecv_list), - COORD_MSG_TYPE_I_RECV, - count, datatype, 0, tag, src, comm, - &msg_ref); - - CREATE_COORD_STATE(coord_state, pml_state, - peer_ref, msg_ref); - } - - coord_state->p_super.error_code = OMPI_SUCCESS; - return &coord_state->p_super; - } - } - /* - * Post PML Call - * - bookkeeping... - */ - else if( OMPI_CRCP_PML_POST == pml_state->state) { - ompi_crcp_base_pml_state_t *rtn_state = NULL; - ompi_crcp_bkmrk_pml_message_content_ref_t *new_content = NULL; - - EXTRACT_COORD_STATE(pml_state, coord_state, rtn_state, - peer_ref, msg_ref); - - /* - * Do the update - */ - HOKE_CONTENT_REF_ALLOC(new_content); - new_content->buffer = NULL; /* No tracked */ - new_content->request = *request; - new_content->done = false; - new_content->active = true; - new_content->already_posted = true; - new_content->already_drained = false; - OBJ_RETAIN(*request); - opal_list_append(&(msg_ref->msg_contents), &(new_content->super) ); - - TRAFFIC_MSG_DUMP_PEER(15, (peer_ref, "-- Append Message (irecv) --", true)); - - CRCP_COORD_STATE_RETURN(coord_state); - - rtn_state->error_code = OMPI_SUCCESS; - return rtn_state; - } - - DONE: - pml_state->error_code = exit_status; - return pml_state; -} - -static int ompi_crcp_bkmrk_request_complete_irecv(struct ompi_request_t *request, - ompi_crcp_bkmrk_pml_peer_ref_t *peer_ref, - int src, int tag, int tmp_ddt_size) -{ - int ret, exit_status = OMPI_SUCCESS; - mca_pml_base_request_t *breq = NULL; - ompi_crcp_bkmrk_pml_traffic_message_ref_t *msg_ref = NULL, *new_msg_ref = NULL; - ompi_crcp_bkmrk_pml_message_content_ref_t *content_ref = NULL; - - breq = (mca_pml_base_request_t *)request; - - /* - * Check the irecv list - */ - if( OMPI_SUCCESS != (ret = traffic_message_find(&(peer_ref->irecv_list), - breq->req_count, - tag, src, - breq->req_comm->c_contextid, - tmp_ddt_size, - &msg_ref, - FIND_MSG_TRUE - ) ) ) { - opal_output(mca_crcp_bkmrk_component.super.output_handle, - "crcp:bkmrk: req_complete: Unable to find the proper (irecv) message ref for this complete\n"); - exit_status = ret; - goto DONE; - } - - /* - * If not found, try the unknown_irecv_list - */ - if( NULL == msg_ref ) { - if( OMPI_SUCCESS != (ret = traffic_message_find(&(unknown_recv_from_list), - breq->req_count, - tag, - INVALID_INT, - breq->req_comm->c_contextid, - tmp_ddt_size, - &msg_ref, - FIND_MSG_TRUE - ) ) ) { - opal_output(mca_crcp_bkmrk_component.super.output_handle, - "crcp:bkmrk: req_complete: Unable to find the proper (recv_init) message ref for this complete\n"); - exit_status = ret; - goto DONE; - } - - if( NULL != msg_ref ) { - traffic_message_move(msg_ref, - COORD_MSG_TYPE_I_RECV, - NULL, &(unknown_recv_from_list), - peer_ref, &(peer_ref->irecv_list), - &new_msg_ref, - true, - true); - msg_ref = new_msg_ref; - } - } - - /* - * If still not found, then must have completed this twice - */ - if( NULL == msg_ref ) { - /* - * It is possible that we did not 'find' the message because - * we could have previously marked it as done. Due to the - * logic in the Request Wait/Test routines we could - * receive multiple request complete calls for the - * same request. - * - * It is possible that we have 'completed' this message previously, - * so this case can occur during normal operation. - * This is caused by us checking for completeness twice in ompi_request_wait_all. - */ - OPAL_OUTPUT_VERBOSE((15, mca_crcp_bkmrk_component.super.output_handle, - "crcp:bkmrk: request_complete: No match found for this request :( %d, %d ): [%d/%d,%d]\n", - peer_ref->total_msgs_sent, peer_ref->total_msgs_recvd, - breq->req_peer, src, breq->req_comm->c_contextid)); - exit_status = OMPI_SUCCESS; - goto DONE; - } - - /* Strip off an irecv request - * Only increment the total count if this was not accounted for in the last checkpoint - */ - traffic_message_grab_content(msg_ref, &content_ref, true, true); /* Remove, prefer already_drained */ - - if( !content_ref->already_drained ) { - peer_ref->total_msgs_recvd += 1; - msg_ref->done++; - msg_ref->active--; - } else { - msg_ref->active_drain--; - content_ref->already_drained = false; - } - - HOKE_CONTENT_REF_RETURN(content_ref); - - if( NULL == new_msg_ref ) { - TRAFFIC_MSG_DUMP_PEER(15, (peer_ref, "-- Request Complete (iRecv) --", true)); - } else { - TRAFFIC_MSG_DUMP_PEER(15, (peer_ref, "-- Request Complete (iRecv - Unknown) --", true)); - } - - OPAL_OUTPUT_VERBOSE((15, mca_crcp_bkmrk_component.super.output_handle, - "crcp:bkmrk: req_complete: Matched an iRecv: total = %d", - peer_ref->total_msgs_recvd)); - - DONE: - return exit_status; -} - -ompi_crcp_base_pml_state_t* ompi_crcp_bkmrk_pml_recv( - void *buf, size_t count, - ompi_datatype_t *datatype, - int src, int tag, - struct ompi_communicator_t* comm, - ompi_status_public_t* status, - ompi_crcp_base_pml_state_t* pml_state) -{ - ompi_crcp_bkmrk_pml_peer_ref_t *peer_ref = NULL; - ompi_crcp_bkmrk_pml_traffic_message_ref_t *msg_ref = NULL, *new_msg_ref = NULL; - ompi_crcp_bkmrk_pml_state_t *coord_state = NULL; - bool found_drain = false; - int exit_status = OMPI_SUCCESS; - int ret; - - OPAL_OUTPUT_VERBOSE((30, mca_crcp_bkmrk_component.super.output_handle, - "crcp:bkmrk: pml_recv()")); - - /* - * Before PML Call - * - Determine if this can be satisfied from the drained list - * - Otherwise create a new reference to it so we can track it - */ - if( OMPI_CRCP_PML_PRE == pml_state->state) { - /* - * Check to see if this message is in the drained message list - */ - found_drain = false; - if( OMPI_SUCCESS != (ret = drain_message_check_recv(buf, count, datatype, - &src, &tag, comm, NULL, &status, - &found_drain) ) ) { - ERROR_SHOULD_NEVER_HAPPEN("crcp:bkmrk: pml_recv(): Failed trying to find a drained message."); - exit_status = ret; - goto DONE; - } - - if( found_drain ) { - /* Do *not* increment: - * peer_ref->total_msgs_recvd += 1; - * Because we accounted for this message during the last checkpoint. - */ - - /* This will identify to the wrapper that this message is complete */ - pml_state->state = OMPI_CRCP_PML_DONE; - pml_state->error_code = OMPI_SUCCESS; - return pml_state; - } - /* - * Otherwise the message is not drained (common case) - */ - else { - /* - * Find the Peer - */ - if( MPI_ANY_SOURCE == src || src < 0) { - traffic_message_append(NULL, &(unknown_recv_from_list), - COORD_MSG_TYPE_B_RECV, - count, datatype, 0, tag, src, comm, - &msg_ref); - - CREATE_COORD_STATE(coord_state, pml_state, - NULL, msg_ref); - } - else { - if( OMPI_SUCCESS != (ret = find_peer_in_comm(comm, src, &peer_ref) ) ){ - opal_output(mca_crcp_bkmrk_component.super.output_handle, - "crcp:bkmrk: pml_recv(): Failed to find peer_ref\n"); - exit_status = ret; - goto DONE; - } - - traffic_message_append(peer_ref, &(peer_ref->recv_list), - COORD_MSG_TYPE_B_RECV, - count, datatype, 0, tag, src, comm, - &msg_ref); - - CREATE_COORD_STATE(coord_state, pml_state, - peer_ref, msg_ref); - } - - /* Bookkeeping */ - current_msg_id = msg_ref->msg_id; - current_msg_type = COORD_MSG_TYPE_B_RECV; - - coord_state->p_super.error_code = OMPI_SUCCESS; - return &coord_state->p_super; - } - } - /* - * Post PML Call - * - bookkeeping... - */ - else if( OMPI_CRCP_PML_POST == pml_state->state) { - ompi_crcp_base_pml_state_t *rtn_state = NULL; - - EXTRACT_COORD_STATE(pml_state, coord_state, rtn_state, - peer_ref, msg_ref); - - /* - * If MPI_ANY_SOUCE, then move the message from the unknown list - * to the list associated with the resolved process. - */ - if( NULL == peer_ref ) { - src = status->MPI_SOURCE; - - if( OMPI_SUCCESS != (ret = find_peer_in_comm(comm, src, &peer_ref) ) ){ - opal_output(mca_crcp_bkmrk_component.super.output_handle, - "crcp:bkmrk: pml_recv(): Failed to resolve peer_ref (rank %d)\n", - src); - exit_status = ret; - goto DONE; - } - - traffic_message_move(msg_ref, - COORD_MSG_TYPE_B_RECV, - NULL, &(unknown_recv_from_list), - peer_ref, &(peer_ref->recv_list), - &new_msg_ref, - false, - true); - new_msg_ref->done++; - new_msg_ref->active--; - } else { - /* - * Do the update - */ - msg_ref->done++; - msg_ref->active--; - } - - peer_ref->total_msgs_recvd += 1; - current_msg_id = 0; - current_msg_type = COORD_MSG_TYPE_UNKNOWN; - - TRAFFIC_MSG_DUMP_PEER(15, (peer_ref, "Recv Done", true)); - - CRCP_COORD_STATE_RETURN(coord_state); - - rtn_state->error_code = OMPI_SUCCESS; - return rtn_state; - } - - DONE: - pml_state->error_code = exit_status; - return pml_state; -} - - -/**************** Start *****************/ -/* Start is connected to irecv_start or isend_start */ -static ompi_request_type_t * coord_start_req_types = NULL; - -ompi_crcp_base_pml_state_t* ompi_crcp_bkmrk_pml_start( - size_t count, - ompi_request_t** requests, - ompi_crcp_base_pml_state_t* pml_state ) -{ - int ret, exit_status = OMPI_SUCCESS; - mca_pml_base_request_t *breq = NULL; - size_t tmp_ddt_size = 0; - size_t iter_req; - bool found_drain = false; - - OPAL_OUTPUT_VERBOSE((30, mca_crcp_bkmrk_component.super.output_handle, - "crcp:bkmrk: pml_start()")); - - /* - * Handle all start() on send requests - */ - if( OMPI_CRCP_PML_POST == pml_state->state ) { - for(iter_req = 0; iter_req < count; iter_req++) { - breq = (mca_pml_base_request_t *)requests[iter_req]; - if(breq->req_type == MCA_PML_REQUEST_SEND ) { - if( OMPI_SUCCESS != (ret = ompi_crcp_bkmrk_pml_start_isend_init(&(requests[iter_req]))) ) { - exit_status = ret; - goto DONE; - } - } - } - } - - /* - * Handle all start() on recv requests - * - Pre: Check drain queue for a match - * - Post: Start the message, unless drained - */ - if( OMPI_CRCP_PML_PRE == pml_state->state ) { - /* - * Mark all saved requests as NOOP - */ - coord_start_req_types = (ompi_request_type_t *)malloc(sizeof(ompi_request_type_t) * count); - for(iter_req = 0; iter_req < count; iter_req++) { - coord_start_req_types[iter_req] = OMPI_REQUEST_NOOP; - } - - for(iter_req = 0; iter_req < count; iter_req++) { - breq = (mca_pml_base_request_t *)requests[iter_req]; - ompi_datatype_type_size(breq->req_datatype, &tmp_ddt_size); - - if( breq->req_type == MCA_PML_REQUEST_RECV ) { - found_drain = false; - if( OMPI_SUCCESS != (ret = ompi_crcp_bkmrk_pml_start_drain_irecv_init(&(requests[iter_req]), &found_drain)) ) { - exit_status = ret; - goto DONE; - } - - if( found_drain ) { - coord_start_req_types[iter_req] = requests[iter_req]->req_type; - requests[iter_req]->req_type = OMPI_REQUEST_NOOP; - requests[iter_req]->req_complete = true; - } - } - } - goto DONE; - } - else if( OMPI_CRCP_PML_POST == pml_state->state) { - for(iter_req = 0; iter_req < count; iter_req++) { - breq = (mca_pml_base_request_t *)requests[iter_req]; - ompi_datatype_type_size(breq->req_datatype, &tmp_ddt_size); - - if (breq->req_type == MCA_PML_REQUEST_RECV) { - /* - * If this was a drained message it will have it's type set to - * OMPI_REQUEST_NOOP so the PML does not try to start it again. - * So we need to replace it with the original type, but can - * skip starting it. - */ - if( NULL != coord_start_req_types ) { - if( OMPI_REQUEST_NOOP != coord_start_req_types[iter_req] ) { - requests[iter_req]->req_type = coord_start_req_types[iter_req]; - continue; - } - } - - if( OMPI_SUCCESS != (ret = ompi_crcp_bkmrk_pml_start_irecv_init(&(requests[iter_req]))) ) { - exit_status = ret; - goto DONE; - } - } - } - - /* - * Clear out the temporary drain type structure. - */ - if( NULL != coord_start_req_types ) { - free(coord_start_req_types); - coord_start_req_types = NULL; - } - } - - DONE: - pml_state->error_code = exit_status; - return pml_state; -} - -/**************** Request Completed ********/ -int ompi_crcp_bkmrk_request_complete(struct ompi_request_t *request) -{ - int ret, exit_status = OMPI_SUCCESS; - ompi_crcp_bkmrk_pml_peer_ref_t *peer_ref = NULL; - mca_pml_base_request_t *breq; - size_t tmp_ddt_size = 0; - int src, tag; - - OPAL_OUTPUT_VERBOSE((30, mca_crcp_bkmrk_component.super.output_handle, - "crcp:bkmrk: pml_request_complete()")); - - /* - * Extract & check the PML version of the request - */ - breq = (mca_pml_base_request_t *)request; - - if( (breq->req_type != MCA_PML_REQUEST_SEND && - breq->req_type != MCA_PML_REQUEST_RECV ) || /* JJH YYY -- req_state = OMPI_REQUEST_INACTIVE ??? */ - request->req_type == OMPI_REQUEST_NOOP || - request->req_type == OMPI_REQUEST_NULL) { - exit_status = OMPI_SUCCESS; - goto DONE; - } - - /* Extract source/tag/ddt_size */ - src = breq->req_peer; - tag = breq->req_tag; - ompi_datatype_type_size(breq->req_datatype, &tmp_ddt_size); - - /* - * Find the peer reference - */ - if( MPI_ANY_SOURCE == src ) { - if( OMPI_SUCCESS != (ret = find_peer_in_comm(breq->req_comm, request->req_status.MPI_SOURCE, &peer_ref) ) ){ - opal_output(mca_crcp_bkmrk_component.super.output_handle, - "crcp:bkmrk: req_complete(): Failed to find peer_ref\n"); - exit_status = ret; - goto DONE; - } - } else { - if( OMPI_SUCCESS != (ret = find_peer_in_comm(breq->req_comm, src, &peer_ref) ) ){ - opal_output(mca_crcp_bkmrk_component.super.output_handle, - "crcp:bkmrk: req_complete(): Failed to find peer_ref\n"); - exit_status = ret; - goto DONE; - } - } - - /******************************* - * A send request is completing - ******************************/ - if(breq->req_type == MCA_PML_REQUEST_SEND ) { - /* - * ISEND Case: - */ - if( false == request->req_persistent ) { - if( OMPI_SUCCESS != (ret = ompi_crcp_bkmrk_request_complete_isend(request, peer_ref, - src, tag, tmp_ddt_size) ) ) { - exit_status = ret; - goto DONE; - } - } - /* - * SEND_INIT/START Case - */ - else { - if( OMPI_SUCCESS != (ret = ompi_crcp_bkmrk_request_complete_isend_init(request, peer_ref, - src, tag, tmp_ddt_size) ) ) { - exit_status = ret; - goto DONE; - } - } - } - /*********************************** - * A receive request is completing - ***********************************/ - else if(breq->req_type == MCA_PML_REQUEST_RECV) { - /* - * IRECV Case: - */ - if( false == request->req_persistent ) { - if( OMPI_SUCCESS != (ret = ompi_crcp_bkmrk_request_complete_irecv(request, peer_ref, - src, tag, tmp_ddt_size) ) ) { - exit_status = ret; - goto DONE; - } - } - /* - * IRECV_INIT/START Case: - */ - else { - if( OMPI_SUCCESS != (ret = ompi_crcp_bkmrk_request_complete_irecv_init(request, peer_ref, - src, tag, tmp_ddt_size) ) ) { - exit_status = ret; - goto DONE; - } - } - } - - DONE: - return exit_status; -} - -/**************** FT Event *****************/ -int ompi_crcp_bkmrk_pml_quiesce_start(ompi_crcp_bkmrk_pml_quiesce_tag_type_t tag ) { - int ret, exit_status = OMPI_SUCCESS; - - if( OMPI_SUCCESS != (ret = ft_event_coordinate_peers()) ) { - exit_status = ret; - } - - return exit_status; -} - -int ompi_crcp_bkmrk_pml_quiesce_end(ompi_crcp_bkmrk_pml_quiesce_tag_type_t tag ) { - int ret, exit_status = OMPI_SUCCESS; - - if( OMPI_SUCCESS != (ret = ft_event_finalize_exchange() ) ) { - exit_status = ret; - } - - return exit_status; -} - -ompi_crcp_base_pml_state_t* ompi_crcp_bkmrk_pml_ft_event( - int state, - ompi_crcp_base_pml_state_t* pml_state) -{ - static int step_to_return_to = 0; - static bool first_continue_pass = false; - opal_list_item_t* item = NULL; - int exit_status = OMPI_SUCCESS; - int ret; - - ft_event_state = state; - - if( step_to_return_to == 1 ) { - goto STEP_1; - } - - OPAL_OUTPUT_VERBOSE((30, mca_crcp_bkmrk_component.super.output_handle, - "crcp:bkmrk: pml_ft_event()")); - - /************************** - * Prepare for a Checkpoint - **************************/ - if(OPAL_CRS_CHECKPOINT == state) { - if( OMPI_CRCP_PML_PRE != pml_state->state){ - goto DONE; - } - - if( opal_cr_timing_barrier_enabled ) { - OPAL_CR_SET_TIMER(OPAL_CR_TIMER_CRCPBR0); - opal_pmix.fence(NULL, 0); - } - OPAL_CR_SET_TIMER(OPAL_CR_TIMER_CRCP0); - - START_TIMER(CRCP_TIMER_TOTAL_CKPT); - STEP_1: - step_to_return_to = 0; - - /* Coordinate Peers: - * When we return from this function we know that all of our - * channels have been flushed. - */ - if( OMPI_SUCCESS != (ret = ompi_crcp_bkmrk_pml_quiesce_start(QUIESCE_TAG_CKPT)) ) { - opal_output(mca_crcp_bkmrk_component.super.output_handle, - "crcp:bkmrk: %s ft_event: Checkpoint Coordination Failed %d", - OMPI_NAME_PRINT(OMPI_PROC_MY_NAME), - ret); - exit_status = ret; - goto DONE; - } - - if( stall_for_completion ) { - stall_for_completion = false; - opal_cr_stall_check = true; - step_to_return_to = 1; - - exit_status = OMPI_EXISTS; - goto DONE_STALL; - } - END_TIMER(CRCP_TIMER_TOTAL_CKPT); - - DISPLAY_ALL_TIMERS(state); - clear_timers(); - } - /***************************** - * Continue after a checkpoint - ******************************/ - else if(OPAL_CRS_CONTINUE == state) { - if( OMPI_CRCP_PML_POST != pml_state->state){ - goto DONE; - } - - first_continue_pass = !first_continue_pass; - - /* Only finalize the Protocol after the PML has been rebuilt */ - if (opal_cr_continue_like_restart && first_continue_pass) { - goto DONE; - } - - START_TIMER(CRCP_TIMER_TOTAL_CONT); - - /* - * Finish the coord protocol - */ - if( OMPI_SUCCESS != (ret = ompi_crcp_bkmrk_pml_quiesce_end(QUIESCE_TAG_CONTINUE) ) ) { - opal_output(mca_crcp_bkmrk_component.super.output_handle, - "crcp:bkmrk: pml_ft_event: Checkpoint Finalization Failed %d", - ret); - exit_status = ret; - goto DONE; - } - END_TIMER(CRCP_TIMER_TOTAL_CONT); - - DISPLAY_ALL_TIMERS(state); - clear_timers(); - - if( opal_cr_timing_barrier_enabled ) { - OPAL_CR_SET_TIMER(OPAL_CR_TIMER_COREBR1); - opal_pmix.fence(NULL, 0); - } - OPAL_CR_SET_TIMER(OPAL_CR_TIMER_CORE2); - } - /***************************** - * Restart from a checkpoint - *****************************/ - else if(OPAL_CRS_RESTART == state) { - if( OMPI_CRCP_PML_POST != pml_state->state){ - goto DONE; - } - - START_TIMER(CRCP_TIMER_TOTAL_RST); - /* - * Refresh the jobids - */ - for(item = opal_list_get_first(&ompi_crcp_bkmrk_pml_peer_refs); - item != opal_list_get_end(&ompi_crcp_bkmrk_pml_peer_refs); - item = opal_list_get_next(item) ) { - ompi_crcp_bkmrk_pml_peer_ref_t *cur_peer_ref; - cur_peer_ref = (ompi_crcp_bkmrk_pml_peer_ref_t*)item; - - /* JJH - Assuming only one global jobid at the moment */ - cur_peer_ref->proc_name.jobid = OMPI_PROC_MY_NAME->jobid; - } - - /* - * Finish the coord protocol - */ - if( OMPI_SUCCESS != (ret = ft_event_finalize_exchange() ) ) { - opal_output(mca_crcp_bkmrk_component.super.output_handle, - "crcp:bkmrk: pml_ft_event: Checkpoint Finalization Failed %d", - ret); - exit_status = ret; - goto DONE; - } - - END_TIMER(CRCP_TIMER_TOTAL_RST); - - DISPLAY_ALL_TIMERS(state); - clear_timers(); - } - /***************************** - * Terminating the process post checkpoint - *****************************/ - else if(OPAL_CRS_TERM == state ) { - goto DONE; - } - /**************************** - * Reached an error - ****************************/ - else { - goto DONE; - } - - DONE: - step_to_return_to = 0; - ft_event_state = OPAL_CRS_RUNNING; - - DONE_STALL: - pml_state->error_code = exit_status; - return pml_state; -} - -/****************** - * Local Utility functions - ******************/ - -/************************************************ - * Traffic Message Utility Functions - ************************************************/ -static int traffic_message_append(ompi_crcp_bkmrk_pml_peer_ref_t *peer_ref, - opal_list_t * append_list, - ompi_crcp_bkmrk_pml_message_type_t msg_type, - size_t count, - ompi_datatype_t *datatype, - size_t in_ddt_size, - int tag, - int dest, - struct ompi_communicator_t* comm, - ompi_crcp_bkmrk_pml_traffic_message_ref_t **msg_ref) -{ - int ret, exit_status = OMPI_SUCCESS; - size_t ddt_size = 0; - - if( NULL != datatype ) { - ompi_datatype_type_size(datatype, - &ddt_size); - } else { - ddt_size = in_ddt_size; - /* ddt_size = 0; */ - } - - /* - * Determine if message is currently in the list - * - If it is then increment the count. - * - ow add it to the list - */ - if( OMPI_SUCCESS != (ret = traffic_message_find(append_list, - count, tag, dest, - comm->c_contextid, - ddt_size, - msg_ref, - FIND_MSG_UNKNOWN /* Active? */ - ) ) ) { - opal_output(mca_crcp_bkmrk_component.super.output_handle, - "crcp:bkmrk: traffic_message_append: Unable to find the proper message reference.\n"); - return OMPI_ERROR; - } - - if( NULL != *msg_ref ) { - if( msg_type == COORD_MSG_TYPE_P_SEND || - msg_type == COORD_MSG_TYPE_P_RECV ) { - (*msg_ref)->posted++; - } else { - (*msg_ref)->active++; - } - } else { - if( NULL != peer_ref ) { - CREATE_NEW_MSG((*msg_ref), msg_type, - count, ddt_size, tag, dest, comm, - peer_ref->proc_name.jobid, - peer_ref->proc_name.vpid); - } else { - CREATE_NEW_MSG((*msg_ref), msg_type, - count, ddt_size, tag, dest, comm, - ORTE_JOBID_INVALID, ORTE_VPID_INVALID); - } - - if( msg_type == COORD_MSG_TYPE_P_SEND || - msg_type == COORD_MSG_TYPE_P_RECV ) { - (*msg_ref)->matched = 0; - (*msg_ref)->done = 0; - (*msg_ref)->active = 0; - (*msg_ref)->posted = 1; - } else { - (*msg_ref)->matched = 0; - (*msg_ref)->done = 0; - (*msg_ref)->active = 1; - (*msg_ref)->posted = 0; - } - - opal_list_append(append_list, &((*msg_ref)->super)); - } - - if( NULL != peer_ref ) { - if( msg_type == COORD_MSG_TYPE_B_SEND ) { - TRAFFIC_MSG_DUMP_PEER(15, (peer_ref, "-- Append Message (send) --", true)); - } - else if( msg_type == COORD_MSG_TYPE_P_SEND ) { - TRAFFIC_MSG_DUMP_PEER(15, (peer_ref, "-- Append Message (send_init) --", true)); - } - else if( msg_type == COORD_MSG_TYPE_B_RECV ) { - TRAFFIC_MSG_DUMP_PEER(15, (peer_ref, "-- Append Message (recv) --", true)); - } - else if( msg_type == COORD_MSG_TYPE_P_RECV ) { - TRAFFIC_MSG_DUMP_PEER(15, (peer_ref, "-- Append Message (recv_init) --", true)); - } - else if( msg_type == COORD_MSG_TYPE_I_SEND || msg_type == COORD_MSG_TYPE_I_RECV ) { - ; - } - else { - TRAFFIC_MSG_DUMP_PEER(15, (peer_ref, "-- Append Message (Unknown) --", true)); - } - } - - return exit_status; -} - -static int traffic_message_start(ompi_crcp_bkmrk_pml_traffic_message_ref_t *msg_ref, - ompi_crcp_bkmrk_pml_peer_ref_t *peer_ref, - ompi_request_t **request, - opal_list_t * peer_list, - ompi_crcp_bkmrk_pml_message_content_ref_t ** content_ref) -{ - /* This is only called by persistent calls. - * This will mark the current message as having one more active member. - * There is still only one posted message. */ - msg_ref->active++; - - traffic_message_find_mark_persistent(msg_ref, request, - false, /* Find currently not active */ - true, /* Mark as active */ - content_ref); - return OMPI_SUCCESS; -} - -static int traffic_message_move(ompi_crcp_bkmrk_pml_traffic_message_ref_t *old_msg_ref, - ompi_crcp_bkmrk_pml_message_type_t msg_type, - ompi_crcp_bkmrk_pml_peer_ref_t *from_peer_ref, - opal_list_t * from_list, - ompi_crcp_bkmrk_pml_peer_ref_t *to_peer_ref, - opal_list_t * to_list, - ompi_crcp_bkmrk_pml_traffic_message_ref_t **new_msg_ref, - bool keep_active, - bool remove) -{ - int ret; - ompi_crcp_bkmrk_pml_message_content_ref_t *new_content = NULL, *prev_content = NULL; - ompi_request_t *request = NULL; - bool loc_already_drained = false; - - /* Append to the to_peer_ref */ - if( COORD_MSG_TYPE_B_RECV != msg_type ) { - traffic_message_grab_content(old_msg_ref, &prev_content, remove, true); /* Remove, prefer already_drained */ - request = prev_content->request; - - loc_already_drained = prev_content->already_drained; - - if( remove ) { - prev_content->request = NULL; - HOKE_CONTENT_REF_RETURN(prev_content); - } - } - - ret = traffic_message_append(to_peer_ref, to_list, - old_msg_ref->msg_type, - old_msg_ref->count, - NULL, - old_msg_ref->ddt_size, - old_msg_ref->tag, - old_msg_ref->rank, - old_msg_ref->comm, - new_msg_ref); - - if( loc_already_drained ) { - old_msg_ref->active_drain--; - (*new_msg_ref)->active--; /* Undo the action from _append() */ - (*new_msg_ref)->active_drain++; - } else { - /* 'remove' from from_peer_ref */ - old_msg_ref->active--; - } - - if( msg_type == COORD_MSG_TYPE_P_SEND || - msg_type == COORD_MSG_TYPE_P_RECV ) { - if( keep_active ) { - (*new_msg_ref)->active++; - } - } - - if( COORD_MSG_TYPE_B_RECV != msg_type && NULL == request ) { - ERROR_SHOULD_NEVER_HAPPEN("Error: Must match a non-blocking send, and there is no matching request."); - } - - if( NULL != request ) { - HOKE_CONTENT_REF_ALLOC(new_content); - new_content->buffer = NULL; - new_content->request = request; - new_content->done = false; - new_content->active = keep_active; - new_content->already_posted = true; - new_content->already_drained = loc_already_drained; - OBJ_RETAIN(request); - opal_list_append(&((*new_msg_ref)->msg_contents), &(new_content->super) ); - } - - if( NULL == from_peer_ref && NULL != to_peer_ref ) { - (*new_msg_ref)->proc_name.jobid = to_peer_ref->proc_name.jobid; - (*new_msg_ref)->proc_name.vpid = to_peer_ref->proc_name.vpid; - } - - return ret; -} - -static int traffic_message_find_mark_persistent(ompi_crcp_bkmrk_pml_traffic_message_ref_t *msg_ref, - ompi_request_t **request, - bool cur_active, - bool set_is_active, - ompi_crcp_bkmrk_pml_message_content_ref_t **c_ref) -{ - mca_pml_base_request_t * breq = NULL; - opal_list_item_t* item = NULL; - - breq = (mca_pml_base_request_t *)(*request); - - for(item = opal_list_get_first(&(msg_ref->msg_contents)); - item != opal_list_get_end( &(msg_ref->msg_contents)); - item = opal_list_get_next(item) ) { - ompi_crcp_bkmrk_pml_message_content_ref_t *content_ref = NULL; - mca_pml_base_request_t * loc_breq = NULL; - - content_ref = (ompi_crcp_bkmrk_pml_message_content_ref_t*)item; - loc_breq = (mca_pml_base_request_t *)(content_ref->request); - - if( content_ref->active != cur_active ) { - continue; - } - else if( loc_breq->req_sequence == breq->req_sequence ) { - OPAL_OUTPUT_VERBOSE((25, mca_crcp_bkmrk_component.super.output_handle, - "%s %8s Request [%d] (%s) %d : %d", - OMPI_NAME_PRINT(OMPI_PROC_MY_NAME), - (set_is_active ? "Start" : (NULL != c_ref ? "Drain" : "Complete")), - (int)msg_ref->msg_id, - (content_ref->active ? "T" : "F"), - (int)loc_breq->req_sequence, - (int)breq->req_sequence)); - - content_ref->active = set_is_active; - if( NULL != c_ref ) { - *c_ref = content_ref; - } - break; - } - } - - return OMPI_SUCCESS; -} - -static int traffic_message_grab_content(ompi_crcp_bkmrk_pml_traffic_message_ref_t *msg_ref, - ompi_crcp_bkmrk_pml_message_content_ref_t ** content_ref, - bool remove, - bool already_drained) -{ - ompi_crcp_bkmrk_pml_message_content_ref_t *new_content = NULL; - ompi_crcp_bkmrk_pml_message_content_ref_t *loc_content_ref = NULL; - opal_list_item_t* item = NULL; - - /* - * If there is no request list, return NULL - */ - if( 0 >= opal_list_get_size(&msg_ref->msg_contents) ) { - return OMPI_SUCCESS; - } - - /* - * Otherwise look though the list, and grab something 'already_drained' if - * possible, otherwise just get the first element. - */ - if( already_drained ) { - item = opal_list_get_first(&(msg_ref->msg_contents)); - new_content = (ompi_crcp_bkmrk_pml_message_content_ref_t*)item; - } - - for(item = opal_list_get_first(&(msg_ref->msg_contents)); - item != opal_list_get_end( &(msg_ref->msg_contents)); - item = opal_list_get_next(item) ) { - loc_content_ref = (ompi_crcp_bkmrk_pml_message_content_ref_t*)item; - - if( !already_drained ) { - TRAFFIC_MSG_DUMP_MSG_CONTENT_INDV(10, (loc_content_ref)); - } - - if( loc_content_ref->already_drained == already_drained ) { - new_content = (ompi_crcp_bkmrk_pml_message_content_ref_t*)item; - break; - } - } - - if( remove ) { - opal_list_remove_item(&msg_ref->msg_contents, &(new_content->super)); - } - - if( NULL != content_ref ) { - *content_ref = new_content; - } else if( remove && NULL != new_content ) { - HOKE_CONTENT_REF_RETURN(new_content); - } - - return OMPI_SUCCESS; -} - -static int traffic_message_create_drain_message(bool post_drain, - int max_post, - ompi_crcp_bkmrk_pml_peer_ref_t *peer_ref, - ompi_crcp_bkmrk_pml_traffic_message_ref_t ** posted_msg_ref, - int *num_posted) -{ - ompi_crcp_bkmrk_pml_drain_message_ref_t *drain_msg_ref = NULL; - ompi_crcp_bkmrk_pml_message_content_ref_t *new_content = NULL, *prev_content = NULL; - int m_iter, m_total; - - *num_posted = 0; - - /* - * Nothing to do here - */ - if( NULL == (*posted_msg_ref) || max_post <= 0) { - return OMPI_SUCCESS; - } - - /* - * For each active message or if not active message then max_post, create a drain message - */ - m_total = max_post; - if( !post_drain && max_post > (*posted_msg_ref)->active ) { - m_total = (*posted_msg_ref)->active; - } - - OPAL_OUTPUT_VERBOSE((10, mca_crcp_bkmrk_component.super.output_handle, - "crcp:bkmrk: %s <-- %s " - " --> Create Drain Msg: %s %4d = min(%4d / %4d)", - OMPI_NAME_PRINT(OMPI_PROC_MY_NAME), - OMPI_NAME_PRINT(&(peer_ref->proc_name)), - (post_drain ? "Posting" : "Not Posting"), - m_total, (*posted_msg_ref)->active, max_post )); - - TRAFFIC_MSG_DUMP_MSG_INDV(10, ((*posted_msg_ref), "Drain", true)); - - /* - * Get a drained message reference for this signature. - */ - drain_message_append(peer_ref, - COORD_MSG_TYPE_I_RECV, - (*posted_msg_ref)->count, - (*posted_msg_ref)->ddt_size, - (*posted_msg_ref)->tag, - (*posted_msg_ref)->rank, - (*posted_msg_ref)->comm, - &drain_msg_ref); - - /* - * Create a new message content for each message to be drained. - */ - for(m_iter = 0; m_iter < m_total; ++m_iter) { - new_content = NULL; - - OPAL_OUTPUT_VERBOSE((10, mca_crcp_bkmrk_component.super.output_handle, - "crcp:bkmrk: %s <-- %s " - " \t--> Find Content: %s (%4d of %4d)", - OMPI_NAME_PRINT(OMPI_PROC_MY_NAME), - OMPI_NAME_PRINT(&(peer_ref->proc_name)), - (post_drain ? "Posting" : "Not Posting"), - m_iter, m_total)); - - /* Grab a request if there are any - * - if we are posting, then we created a dummy message which will not - * have any contents, so this is still valid. - * - if we are not posting, and this is an iRecv, then we *must* find a content! - */ - traffic_message_grab_content((*posted_msg_ref), &prev_content, false, false); /* Do not remove, No already drained */ - if( NULL != prev_content ) { - prev_content->already_drained = true; - } - - /* YYY JJH YYY - Is this needed? */ - if( !post_drain && (*posted_msg_ref)->msg_type != COORD_MSG_TYPE_B_RECV ) { - assert( NULL != prev_content ); - } - - /* Decrementing active occurs when we stall in the Blocking Recv, do not do so here. */ - if( NULL != prev_content ) { - (*posted_msg_ref)->active--; - } - (*posted_msg_ref)->active_drain++; - - /* Create a new content for the drained message */ - HOKE_CONTENT_REF_ALLOC(new_content); - new_content->buffer = NULL; - if( NULL == prev_content ) { - new_content->request = NULL; - } else { - new_content->request = prev_content->request; - if( NULL != new_content->request ) { - OBJ_RETAIN(new_content->request); - } - } - opal_list_append(&(drain_msg_ref->msg_contents), &(new_content->super) ); - - if( !post_drain ) { - new_content->done = false; - new_content->active = true; - new_content->already_posted = true; - new_content->already_drained = true; - - drain_msg_ref->active++; - drain_msg_ref->already_posted++; - } else { - new_content->done = false; - new_content->active = false; - new_content->already_posted = false; - new_content->already_drained = true; - - /* - * Put the true count here so we can properly match the drain. - * The post_drained() will properly handle the packed datatype - * by changing the count to (count * ddt_size). - */ - ompi_datatype_duplicate(&(ompi_mpi_packed.dt), &(drain_msg_ref->datatype)); - - /* Create a buffer of the necessary type/size */ - if(drain_msg_ref->count > 0 ) { - new_content->buffer = (void *) malloc(drain_msg_ref->count * drain_msg_ref->ddt_size); - } else { - new_content->buffer = (void *) malloc(1 * drain_msg_ref->ddt_size); - } - - /* JJH - Performance Optimization? - Post drained messages right away? */ - } - - (*num_posted)++; - } - - peer_ref->total_drained_msgs += *num_posted; - - OPAL_OUTPUT_VERBOSE((15, mca_crcp_bkmrk_component.super.output_handle, - "crcp:bkmrk: %s <-- %s " - "Added %d messages to the drained list (size = %d)", - OMPI_NAME_PRINT(OMPI_PROC_MY_NAME), - OMPI_NAME_PRINT(&(peer_ref->proc_name)), - (*num_posted), - (int)opal_list_get_size(&(peer_ref->drained_list)) )); - - return OMPI_SUCCESS; -} - -static int traffic_message_find_recv(ompi_crcp_bkmrk_pml_peer_ref_t *peer_ref, - int rank, uint32_t comm_id, int tag, - size_t count, size_t datatype_size, - ompi_crcp_bkmrk_pml_traffic_message_ref_t ** posted_recv_msg_ref, - ompi_crcp_bkmrk_pml_traffic_message_ref_t ** posted_irecv_msg_ref, - ompi_crcp_bkmrk_pml_traffic_message_ref_t ** posted_precv_msg_ref, - ompi_crcp_bkmrk_pml_traffic_message_ref_t ** posted_unknown_recv_msg_ref, - ompi_crcp_bkmrk_pml_traffic_message_ref_t ** posted_unknown_precv_msg_ref) -{ - int ret; - - *posted_recv_msg_ref = NULL; - *posted_irecv_msg_ref = NULL; - *posted_precv_msg_ref = NULL; - *posted_unknown_recv_msg_ref = NULL; - *posted_unknown_precv_msg_ref = NULL; - - /* - * Check the recv_list - */ - if( OMPI_SUCCESS != (ret = traffic_message_find(&(peer_ref->recv_list), - count, tag, INVALID_INT, - comm_id, datatype_size, - posted_recv_msg_ref, - FIND_MSG_UNKNOWN) ) ) { - opal_output(mca_crcp_bkmrk_component.super.output_handle, - "crcp:bkmrk: traffic_message_find_recv: Unable to find the proper message reference.\n"); - return OMPI_ERROR; - } - - /* - * Check the irecv_list - */ - if( OMPI_SUCCESS != (ret = traffic_message_find(&(peer_ref->irecv_list), - count, tag, INVALID_INT, - comm_id, datatype_size, - posted_irecv_msg_ref, - FIND_MSG_UNKNOWN) ) ) { - opal_output(mca_crcp_bkmrk_component.super.output_handle, - "crcp:bkmrk: traffic_message_find_recv: Unable to find the proper message reference.\n"); - return OMPI_ERROR; - } - - /* - * Check the recv_init_list - */ - if( OMPI_SUCCESS != (ret = traffic_message_find(&(peer_ref->recv_init_list), - count, tag, INVALID_INT, - comm_id, datatype_size, - posted_precv_msg_ref, - FIND_MSG_UNKNOWN) ) ) { - opal_output(mca_crcp_bkmrk_component.super.output_handle, - "crcp:bkmrk: traffic_message_find_recv: Unable to find the proper message reference.\n"); - return OMPI_ERROR; - } - - /* - * Check the unknown list of non-persistant - */ - if( OMPI_SUCCESS != (ret = traffic_message_find(&(unknown_recv_from_list), - count, tag, INVALID_INT, - comm_id, datatype_size, - posted_unknown_recv_msg_ref, - FIND_MSG_UNKNOWN) ) ) { - opal_output(mca_crcp_bkmrk_component.super.output_handle, - "crcp:bkmrk: traffic_message_find_recv: Unable to find the proper message reference.\n"); - return OMPI_ERROR; - } - - /* - * Check the unknown list of persistant - */ - if( OMPI_SUCCESS != (ret = traffic_message_find(&(unknown_persist_recv_list), - count, tag, INVALID_INT, - comm_id, datatype_size, - posted_unknown_precv_msg_ref, - FIND_MSG_UNKNOWN) ) ) { - opal_output(mca_crcp_bkmrk_component.super.output_handle, - "crcp:bkmrk: traffic_message_find_recv: Unable to find the proper message reference.\n"); - return OMPI_ERROR; - } - - /* - * JJH -- Should we check the drained list? - * If we checkpoint again before dimishing the drained list, then - * the peer could be requesting that a drained send complete... - */ - - return OMPI_SUCCESS; -} - -static int traffic_message_find(opal_list_t * search_list, - size_t count, int tag, int peer, - uint32_t comm_id, size_t ddt_size, - ompi_crcp_bkmrk_pml_traffic_message_ref_t ** found_msg_ref, - int active ) -{ - opal_list_item_t* item = NULL; - - *found_msg_ref = NULL; - -#if OPAL_ENABLE_DEBUG == 1 - /* - * Dummy checks: - */ - if( NULL == search_list) { - opal_output(0, "WARNING (Debug): Search_list NULL! (%s:%d)", __FILE__, __LINE__); - return OMPI_ERROR; - } -#endif - - /* - * Check the search list - */ - for(item = opal_list_get_last(search_list); - item != opal_list_get_begin(search_list); - item = opal_list_get_prev(item) ) { - ompi_crcp_bkmrk_pml_traffic_message_ref_t * msg_ref; - msg_ref = (ompi_crcp_bkmrk_pml_traffic_message_ref_t*)item; - - if( active != FIND_MSG_UNKNOWN ) { - if( active == PERSIST_MARKER ) { - if( 0 >= msg_ref->posted ) { - continue; - } - } - else if( (active == FIND_MSG_TRUE && 0 >= (msg_ref->active + msg_ref->active_drain) ) || - (active == FIND_MSG_FALSE && 0 <= (msg_ref->active + msg_ref->active_drain) ) ) { - continue; - } - } - - if(msg_ref->count == count && - (NULL != msg_ref->comm && msg_ref->comm->c_contextid == comm_id) && - (msg_ref->tag == MPI_ANY_TAG || msg_ref->tag == tag) && - (peer == INVALID_INT || msg_ref->rank == peer) && - msg_ref->ddt_size == ddt_size) { - - OPAL_OUTPUT_VERBOSE((30, mca_crcp_bkmrk_component.super.output_handle, - "crcp:bkmrk: traffic_message_find: Found Message -- Comm list (%d, %d)\n", - tag, peer)); - - *found_msg_ref = msg_ref; - return OMPI_SUCCESS; - } - } - - return OMPI_SUCCESS; -} - - -/************************************************ - * Drain Message Utility Functions - ************************************************/ -static int drain_message_append(ompi_crcp_bkmrk_pml_peer_ref_t *peer_ref, - ompi_crcp_bkmrk_pml_message_type_t msg_type, - size_t count, size_t ddt_size, - int tag,int dest, - struct ompi_communicator_t* comm, - ompi_crcp_bkmrk_pml_drain_message_ref_t **msg_ref) -{ - int ret, exit_status = OMPI_SUCCESS; - ompi_crcp_bkmrk_pml_message_content_ref_t *content_ref = NULL; - - /* - * Determine if message is currently in the list - * - If it is then increment the count. - * - ow add it to the list - */ - if( OMPI_SUCCESS != (ret = drain_message_find(&(peer_ref->drained_list), - count, tag, dest, - comm->c_contextid, - ddt_size, - msg_ref, - &content_ref) ) ) { - opal_output(mca_crcp_bkmrk_component.super.output_handle, - "crcp:bkmrk: traffic_message_append: Unable to find the proper message reference.\n"); - return OMPI_ERROR; - } - - if( NULL == *msg_ref ) { - CREATE_NEW_DRAIN_MSG((*msg_ref), msg_type, - count, NULL, tag, dest, comm, - peer_ref->proc_name.jobid, - peer_ref->proc_name.vpid); - - (*msg_ref)->done = 0; - (*msg_ref)->active = 0; - (*msg_ref)->already_posted = 0; - - opal_list_append(&(peer_ref->drained_list), &((*msg_ref)->super)); - } - /* If message does exist then the calling function needs to handle the msg_contents and counts */ - - return exit_status; -} - -static int drain_message_remove(ompi_crcp_bkmrk_pml_peer_ref_t *peer_ref, - ompi_crcp_bkmrk_pml_drain_message_ref_t *msg_ref, - ompi_crcp_bkmrk_pml_message_content_ref_t *content_ref) -{ - /* - * Remove the message content from the list attached to the message - */ - opal_list_remove_item(&(msg_ref->msg_contents), &(content_ref->super)); - HOKE_CONTENT_REF_RETURN(content_ref); - - /* - * If there are no more drained messages of this signature, - * then remove the signature from the peers drained list. - */ - if( 0 >= opal_list_get_size(&(msg_ref->msg_contents) ) ) { - TRAFFIC_MSG_DUMP_DRAIN_MSG_INDV(10, (msg_ref, "D*remove", true)); - opal_list_remove_item(&(peer_ref->drained_list), &(msg_ref->super)); - HOKE_DRAIN_MSG_REF_RETURN(msg_ref); - } else { - TRAFFIC_MSG_DUMP_DRAIN_MSG_INDV(10, (msg_ref, "Dremove", true)); - } - - return OMPI_SUCCESS; -} - -static int drain_message_check_recv(void **buf, size_t count, - ompi_datatype_t *datatype, - int *src, int *tag, - struct ompi_communicator_t* comm, - struct ompi_request_t **request, - ompi_status_public_t** status, - bool *found_drain) -{ - int ret, exit_status = OMPI_SUCCESS; - ompi_crcp_bkmrk_pml_peer_ref_t *peer_ref = NULL; - ompi_crcp_bkmrk_pml_drain_message_ref_t *drain_msg_ref = NULL; - ompi_crcp_bkmrk_pml_message_content_ref_t *content_ref = NULL; - size_t tmp_ddt_size = 0; - - *found_drain = false; - - ompi_datatype_type_size(datatype, &tmp_ddt_size); - - /* - * Check to see if this message is in the drained message list - */ - if( OMPI_SUCCESS != (ret = drain_message_find_any(count, *tag, *src, - comm, tmp_ddt_size, - &drain_msg_ref, - &content_ref, - &peer_ref) ) ) { - ERROR_SHOULD_NEVER_HAPPEN("crcp:bkmrk: drain_check_recv(): Failed trying to find a drained message."); - exit_status = ret; - goto DONE; - } - - /* - * If the message is a drained message - * - Complete it right now - * - We do not need to increment any counters here since we already have - * when we originally drained the message. - */ - if( NULL != drain_msg_ref ) { - OPAL_OUTPUT_VERBOSE((12, mca_crcp_bkmrk_component.super.output_handle, - "crcp:bkmrk: drain_check_recv(): Matched a drained message...")); - - if( OMPI_SUCCESS != (ret = drain_message_copy_remove(drain_msg_ref, - content_ref, - src, tag, request, status, - datatype, count, buf, - peer_ref) ) ) { - opal_output( mca_crcp_bkmrk_component.super.output_handle, - "crcp:bkmrk: drain_check_recv(): Datatype copy failed (%d)", - ret); - exit_status = ret; - goto DONE; - } - - peer_ref->total_drained_msgs -= 1; - - *found_drain = true; - } - - DONE: - return exit_status; -} - -static int drain_message_find_any(size_t count, int tag, int peer, - struct ompi_communicator_t* comm, size_t ddt_size, - ompi_crcp_bkmrk_pml_drain_message_ref_t ** found_msg_ref, - ompi_crcp_bkmrk_pml_message_content_ref_t ** content_ref, - ompi_crcp_bkmrk_pml_peer_ref_t **peer_ref) -{ - ompi_crcp_bkmrk_pml_peer_ref_t *cur_peer_ref = NULL; - opal_list_item_t* item = NULL; - - *found_msg_ref = NULL; - - for(item = opal_list_get_first(&ompi_crcp_bkmrk_pml_peer_refs); - item != opal_list_get_end(&ompi_crcp_bkmrk_pml_peer_refs); - item = opal_list_get_next(item) ) { - cur_peer_ref = (ompi_crcp_bkmrk_pml_peer_ref_t*)item; - - /* - * If we ware not MPI_ANY_SOURCE, then extract the process name from the - * communicator, and search only the peer that matches. - */ - if( MPI_ANY_SOURCE != peer && peer >= 0) { - /* Check to see if peer could possibly be in this communicator */ - if( comm->c_local_group->grp_proc_count <= peer ) { - continue; - } - - if( OPAL_EQUAL != ompi_rte_compare_name_fields(OMPI_RTE_CMP_ALL, - &(cur_peer_ref->proc_name), - OMPI_CAST_RTE_NAME(&comm->c_local_group->grp_proc_pointers[peer]->super.proc_name))) { - continue; - } - } - - drain_message_find(&(cur_peer_ref->drained_list), - count, tag, peer, - comm->c_contextid, ddt_size, - found_msg_ref, - content_ref); - if( NULL != *found_msg_ref) { - if( NULL != peer_ref ) { - *peer_ref = cur_peer_ref; - } - return OMPI_SUCCESS; - } - } - - return OMPI_SUCCESS; -} - -static int drain_message_find(opal_list_t * search_list, - size_t count, int tag, int peer, - uint32_t comm_id, size_t ddt_size, - ompi_crcp_bkmrk_pml_drain_message_ref_t ** found_msg_ref, - ompi_crcp_bkmrk_pml_message_content_ref_t ** content_ref) -{ - ompi_crcp_bkmrk_pml_drain_message_ref_t * drain_msg = NULL; - opal_list_item_t* item = NULL; - - *found_msg_ref = NULL; - *content_ref = NULL; - - /* Dummy Check: - * If the list is empty... - */ - if( 0 >= opal_list_get_size(search_list) ) { - return OMPI_SUCCESS; - } - - for(item = opal_list_get_first(search_list); - item != opal_list_get_end(search_list); - item = opal_list_get_next(item) ) { - drain_msg = (ompi_crcp_bkmrk_pml_drain_message_ref_t*)item; - - OPAL_OUTPUT_VERBOSE((15, mca_crcp_bkmrk_component.super.output_handle, - "crcp:bkmrk: find_drain_msg(): Compare [%d, %d, %d, %d] to [%d, %d, %d, %d]", - (int)ddt_size, (int)count, tag, peer, - (int)drain_msg->ddt_size, (int)drain_msg->count, (int)drain_msg->tag, (int)drain_msg->rank)); - - /* Check the communicator for a match */ - if( NULL != drain_msg->comm ) { - if( drain_msg->comm->c_contextid != comm_id ) { - continue; - } - } - - /* If a specific tag was requested, then make sure this messages matches */ - if( MPI_ANY_TAG != tag && - drain_msg->tag != tag) { - continue; - } - - /* If a specific rank was requested, then make sure this messages matches */ - if( INVALID_INT != peer ) { - if( MPI_ANY_SOURCE != peer && - drain_msg->rank != peer) { - continue; - } - } - - /* Check the datatype size, if specified for a match */ - if( ddt_size != PROBE_ANY_SIZE && - count != PROBE_ANY_COUNT) { - /* Check the datatype size and count to make sure it matches */ - if((drain_msg->count ) != count || - (drain_msg->ddt_size) != ddt_size) { - continue; - } - } - - /* If we get here then the message matches */ - *found_msg_ref = drain_msg; - break; - } - - /* - * Find a content to return - */ - if( NULL != *found_msg_ref ) { - drain_message_grab_content((*found_msg_ref), content_ref ); - - /* If there are no contents that match, then there are no drained messages that match. */ - if( NULL == *content_ref ) { - *found_msg_ref = NULL; - } - } - - return OMPI_SUCCESS; -} - -static int drain_message_grab_content(ompi_crcp_bkmrk_pml_drain_message_ref_t *drain_msg_ref, - ompi_crcp_bkmrk_pml_message_content_ref_t ** content_ref) -{ - ompi_crcp_bkmrk_pml_message_content_ref_t *loc_content_ref = NULL; - opal_list_item_t* item = NULL; - - *content_ref = NULL; - - for(item = opal_list_get_first(&(drain_msg_ref->msg_contents)); - item != opal_list_get_end(&(drain_msg_ref->msg_contents)); - item = opal_list_get_next(item) ) { - loc_content_ref = (ompi_crcp_bkmrk_pml_message_content_ref_t*)item; - - /* If the buffer is invalid then this is not a valid message or - * has not been completed draining just yet */ - if(NULL != loc_content_ref->buffer) { - *content_ref = loc_content_ref; - break; - } - } - - return OMPI_SUCCESS; -} - -static int drain_message_copy_remove_persistent(ompi_crcp_bkmrk_pml_drain_message_ref_t *drain_msg_ref, - ompi_crcp_bkmrk_pml_message_content_ref_t *drain_content_ref, - ompi_crcp_bkmrk_pml_traffic_message_ref_t *traffic_msg_ref, - ompi_request_t *request, - ompi_crcp_bkmrk_pml_peer_ref_t *peer_ref) -{ - int ret, exit_status = OMPI_SUCCESS; - ompi_crcp_bkmrk_pml_message_content_ref_t *content_ref = NULL; - - /* - * Find the request in the list that has been posted, but not started - */ - traffic_message_find_mark_persistent(traffic_msg_ref, &request, - false, /* Find currently not active */ - false, /* Keep inactive */ - &content_ref); - - /* These two requests should be exactly the same, so this is redundant, but here for completeness */ - content_ref->request = request; - - memcpy(&(content_ref->status), &drain_content_ref->status, sizeof(ompi_status_public_t)); - - if( 0 != (ret = ompi_datatype_copy_content_same_ddt(drain_msg_ref->datatype, - drain_msg_ref->count, - content_ref->buffer, - drain_content_ref->buffer) ) ) { - opal_output( mca_crcp_bkmrk_component.super.output_handle, - "crcp:bkmrk: drain_message_copy_remove_p(): Datatype copy failed (%d)", - ret); - exit_status = ret; - } - - /* Remove the message from the list */ - drain_content_ref->request = NULL; - drain_message_remove(peer_ref, drain_msg_ref, drain_content_ref); - - return exit_status; -} - -static int drain_message_copy_remove(ompi_crcp_bkmrk_pml_drain_message_ref_t *drain_msg_ref, - ompi_crcp_bkmrk_pml_message_content_ref_t * drain_content_ref, - int *src, int *tag, - struct ompi_request_t **request, - ompi_status_public_t **status, - ompi_datatype_t *datatype, int count, void **buf, - ompi_crcp_bkmrk_pml_peer_ref_t *peer_ref) -{ - int ret, exit_status = OMPI_SUCCESS; - - if( NULL != src ) { - *src = drain_msg_ref->rank; - } - - if( NULL != tag ) { - *tag = drain_msg_ref->tag; - } - - if( NULL != request ) { - *request = drain_content_ref->request; - OBJ_RETAIN(*request); - } - - if( NULL != status && MPI_STATUS_IGNORE != *status ) { - memcpy(*status, &drain_content_ref->status, sizeof(ompi_status_public_t)); - } - - /* The buffer could be NULL - More likely when doing a count=0 type of message (e.g., Barrier) */ - if( OPAL_LIKELY(NULL != buf) ) { - if( 0 != (ret = ompi_datatype_copy_content_same_ddt(datatype, count, - (void*)buf, drain_content_ref->buffer) ) ) { - opal_output( mca_crcp_bkmrk_component.super.output_handle, - "crcp:bkmrk: drain_message_copy_remove(): Datatype copy failed (%d)", - ret); - exit_status = ret; - } - } - else { - OPAL_OUTPUT_VERBOSE((20, mca_crcp_bkmrk_component.super.output_handle, - "crcp:bkmrk: drain_message_copy_remove(): Skip copy - NULL buffer")); - } - - /* Remove the message from the list */ - drain_content_ref->request = NULL; - drain_message_remove(peer_ref, drain_msg_ref, drain_content_ref); - - return exit_status; -} - - -/************************************************ - * Peer List Utility Functions - ************************************************/ -static ompi_crcp_bkmrk_pml_peer_ref_t * find_peer(ompi_process_name_t proc) -{ - opal_list_item_t* item = NULL; - ompi_rte_cmp_bitmask_t mask; - - for(item = opal_list_get_first(&ompi_crcp_bkmrk_pml_peer_refs); - item != opal_list_get_end(&ompi_crcp_bkmrk_pml_peer_refs); - item = opal_list_get_next(item) ) { - ompi_crcp_bkmrk_pml_peer_ref_t *cur_peer_ref; - cur_peer_ref = (ompi_crcp_bkmrk_pml_peer_ref_t*)item; - - mask = OMPI_RTE_CMP_JOBID | OMPI_RTE_CMP_VPID; - - if( OPAL_EQUAL == ompi_rte_compare_name_fields(mask, - &(cur_peer_ref->proc_name), - &proc) ) { - return cur_peer_ref; - } - } - - return NULL; -} - -static int find_peer_in_comm(struct ompi_communicator_t* comm, int proc_idx, - ompi_crcp_bkmrk_pml_peer_ref_t **peer_ref) -{ - *peer_ref = find_peer(*(ompi_process_name_t *)&comm->c_remote_group->grp_proc_pointers[proc_idx]->super.proc_name); - - if( NULL == *peer_ref) { - opal_output(mca_crcp_bkmrk_component.super.output_handle, - "crcp:bkmrk: find_peer_in_comm(): Failed to find peer_ref - peer_ref is NULL\n"); - return OMPI_ERROR; - } - - return OMPI_SUCCESS; -} - - -/************************************************ - * FT Event Utility Functions - ************************************************/ -static int ft_event_coordinate_peers(void) -{ - static int step_to_return_to = 0; - int exit_status = OMPI_SUCCESS; - int ret; - - if( step_to_return_to == 1 ) { - goto STEP_1; - } - - /* - * Exchange Bookmarks with peers - */ - START_TIMER(CRCP_TIMER_CKPT_EX_B); - if( OMPI_SUCCESS != (ret = ft_event_exchange_bookmarks() ) ) { - opal_output(mca_crcp_bkmrk_component.super.output_handle, - "crcp:bkmrk: ft_event_coordinate_peers: Bookmark Exchange Failed %d", - ret); - exit_status = ret; - goto DONE; - } - END_TIMER(CRCP_TIMER_CKPT_EX_B); - - /* - * Check exchanged bookmarks - */ - START_TIMER(CRCP_TIMER_CKPT_CHECK_B); - if( OMPI_SUCCESS != (ret = ft_event_check_bookmarks() ) ) { - opal_output(mca_crcp_bkmrk_component.super.output_handle, - "crcp:bkmrk: ft_event_coordinate_peers: Bookmark Check Failed %d", - ret); - exit_status = ret; - goto DONE; - } - END_TIMER(CRCP_TIMER_CKPT_CHECK_B); - - /* - * Post Drain Acks and Msgs - */ - START_TIMER(CRCP_TIMER_CKPT_POST_DRAIN); - if( OMPI_SUCCESS != (ret = ft_event_post_drain_acks() ) ) { - opal_output(mca_crcp_bkmrk_component.super.output_handle, - "crcp:bkmrk: ft_event_coordinate_peers: Bookmark Post Drain ACKS Failed %d", - ret); - exit_status = ret; - goto DONE; - } - - if( OMPI_SUCCESS != (ret = ft_event_post_drained() ) ) { - opal_output(mca_crcp_bkmrk_component.super.output_handle, - "crcp:bkmrk: ft_event_coordinate_peers: Bookmark Post Drain Msgs Failed %d", - ret); - exit_status = ret; - goto DONE; - } - END_TIMER(CRCP_TIMER_CKPT_POST_DRAIN); - DISPLAY_INDV_TIMER(CRCP_TIMER_CKPT_POST_DRAIN, -1, 0); - - /* - * Check if we need to stall for completion of tasks - */ - /* - * If we know that we are in the middle of a blocking send then we - * need to stall the coordination algorithm while we wait for this to - * complete. - */ - if( 0 < current_msg_id && - current_msg_type == COORD_MSG_TYPE_B_SEND) { - stall_for_completion = true; - } - START_TIMER(CRCP_TIMER_CKPT_WAIT_QUI); - if( stall_for_completion ) { - OPAL_OUTPUT_VERBOSE((10, mca_crcp_bkmrk_component.super.output_handle, - "crcp:bkmrk: %s **** STALLING %s in PID %d ***", - OMPI_NAME_PRINT(OMPI_PROC_MY_NAME), - (current_msg_type == COORD_MSG_TYPE_B_SEND ? "Send" : "Recv"), - getpid() )); - step_to_return_to = 1; - exit_status = OMPI_SUCCESS; - goto DONE; - } - - STEP_1: - step_to_return_to = 0; - - /* - * Wait for any messages that needed resolved. - * - Outstanding Receives (to drain wire) -- Receiver - * - Outstanding Irecvs (for drain ack) -- Sender - */ - if( OMPI_SUCCESS != (ret = ft_event_wait_quiesce() ) ) { - opal_output(mca_crcp_bkmrk_component.super.output_handle, - "crcp:bkmrk: ft_event_coordinate_peers: Wait Quiesce Failed %d", - ret); - exit_status = ret; - goto DONE; - } - END_TIMER(CRCP_TIMER_CKPT_WAIT_QUI); - - OPAL_OUTPUT_VERBOSE((5, mca_crcp_bkmrk_component.super.output_handle, - "crcp:bkmrk: %s Coordination Finished...\n", - OMPI_NAME_PRINT(OMPI_PROC_MY_NAME))); - - /* - * Now that all our peer channels are marked as drained - * continue with the checkpoint. - * Note: This does not guarentee that all of the peers - * are at this same position, but that our - * checkpoint will be consistent with all of the - * peers once they finish the protocol. - */ - - DONE: - return exit_status; -} - -static int ft_event_finalize_exchange(void) -{ - int exit_status = OMPI_SUCCESS; - opal_list_item_t* item = NULL, *rm_item = NULL; - ompi_crcp_bkmrk_pml_traffic_message_ref_t * msg_ref; - ompi_crcp_bkmrk_pml_message_content_ref_t *content_ref = NULL; - opal_list_item_t* cont_item = NULL; - - /* - * Clear bookmark totals - */ - for(item = opal_list_get_first(&ompi_crcp_bkmrk_pml_peer_refs); - item != opal_list_get_end(&ompi_crcp_bkmrk_pml_peer_refs); - item = opal_list_get_next(item) ) { - ompi_crcp_bkmrk_pml_peer_ref_t *peer_ref; - peer_ref = (ompi_crcp_bkmrk_pml_peer_ref_t*)item; - - if( OPAL_EQUAL != ompi_rte_compare_name_fields(OMPI_RTE_CMP_ALL, - (OMPI_PROC_MY_NAME), - &(peer_ref->proc_name)) ) { - TRAFFIC_MSG_DUMP_PEER(10, (peer_ref, "finalize_exchange", false)); - } - - peer_ref->total_msgs_sent = 0; - peer_ref->total_msgs_recvd = 0; - - peer_ref->matched_msgs_sent = 0; - peer_ref->matched_msgs_recvd = 0; - - peer_ref->ack_required = false; - - /* Clear send_list */ - for(rm_item = opal_list_get_last(&peer_ref->send_list); - rm_item != opal_list_get_begin(&peer_ref->send_list); - rm_item = opal_list_get_prev(rm_item) ) { - msg_ref = (ompi_crcp_bkmrk_pml_traffic_message_ref_t*)rm_item; - msg_ref->matched = 0; - msg_ref->done = 0; - msg_ref->active_drain += msg_ref->active; - msg_ref->active = 0; - - for(cont_item = opal_list_get_first(&(msg_ref->msg_contents)); - cont_item != opal_list_get_end( &(msg_ref->msg_contents)); - cont_item = opal_list_get_next(cont_item) ) { - content_ref = (ompi_crcp_bkmrk_pml_message_content_ref_t*)cont_item; - if( content_ref->active ) { - content_ref->already_drained = true; - } - } - } - - /* Clear isend_list */ - for(rm_item = opal_list_get_last(&peer_ref->isend_list); - rm_item != opal_list_get_begin(&peer_ref->isend_list); - rm_item = opal_list_get_prev(rm_item) ) { - msg_ref = (ompi_crcp_bkmrk_pml_traffic_message_ref_t*)rm_item; - msg_ref->matched = 0; - msg_ref->done = 0; - msg_ref->active_drain += msg_ref->active; - msg_ref->active = 0; - - for(cont_item = opal_list_get_first(&(msg_ref->msg_contents)); - cont_item != opal_list_get_end( &(msg_ref->msg_contents)); - cont_item = opal_list_get_next(cont_item) ) { - content_ref = (ompi_crcp_bkmrk_pml_message_content_ref_t*)cont_item; - if( content_ref->active ) { - content_ref->already_drained = true; - } - } - } - - /* Clear send_init_list */ - for(rm_item = opal_list_get_last(&peer_ref->send_list); - rm_item != opal_list_get_begin(&peer_ref->send_list); - rm_item = opal_list_get_prev(rm_item) ) { - msg_ref = (ompi_crcp_bkmrk_pml_traffic_message_ref_t*)rm_item; - msg_ref->matched = 0; - msg_ref->done = 0; - msg_ref->active_drain += msg_ref->active; - msg_ref->active = 0; - - for(cont_item = opal_list_get_first(&(msg_ref->msg_contents)); - cont_item != opal_list_get_end( &(msg_ref->msg_contents)); - cont_item = opal_list_get_next(cont_item) ) { - content_ref = (ompi_crcp_bkmrk_pml_message_content_ref_t*)cont_item; - if( content_ref->active ) { - content_ref->already_drained = true; - } - } - } - - /* Clear recv_list */ - for(rm_item = opal_list_get_last(&peer_ref->recv_list); - rm_item != opal_list_get_begin(&peer_ref->recv_list); - rm_item = opal_list_get_prev(rm_item) ) { - msg_ref = (ompi_crcp_bkmrk_pml_traffic_message_ref_t*)rm_item; - msg_ref->matched = 0; - msg_ref->done = 0; - } - - /* Clear irecv_list */ - for(rm_item = opal_list_get_last(&peer_ref->irecv_list); - rm_item != opal_list_get_begin(&peer_ref->irecv_list); - rm_item = opal_list_get_prev(rm_item) ) { - msg_ref = (ompi_crcp_bkmrk_pml_traffic_message_ref_t*)rm_item; - msg_ref->matched = 0; - msg_ref->done = 0; - } - - /* Clear recv_init_list */ - for(rm_item = opal_list_get_last(&peer_ref->recv_list); - rm_item != opal_list_get_begin(&peer_ref->recv_list); - rm_item = opal_list_get_prev(rm_item) ) { - msg_ref = (ompi_crcp_bkmrk_pml_traffic_message_ref_t*)rm_item; - msg_ref->matched = 0; - msg_ref->done = 0; - } - } - - return exit_status; -} - -static int ft_event_exchange_bookmarks(void) -{ - int peer_idx = 0; - int my_idx = OMPI_PROC_MY_NAME->vpid; - int iter = 0; - int num_peers = 0; - - num_peers = opal_list_get_size(&ompi_crcp_bkmrk_pml_peer_refs); - - for( peer_idx = (num_peers - my_idx - 1), iter = 0; - iter < num_peers; - peer_idx = (peer_idx + 1) % num_peers, ++iter) - { - if(my_idx > peer_idx) { - /* Send our bookmark status */ - send_bookmarks(peer_idx); - /* Recv peer bookmark status */ - recv_bookmarks(peer_idx); - } - else if(my_idx < peer_idx) { - /* Recv peer bookmark status */ - recv_bookmarks(peer_idx); - /* Send our bookmark status */ - send_bookmarks(peer_idx); - } - } - - /* Wait for all bookmarks to arrive */ - START_TIMER(CRCP_TIMER_CKPT_EX_WAIT); - while( total_recv_bookmarks > 0 ) { - opal_event_loop(opal_event_base, OPAL_EVLOOP_NONBLOCK); - } - total_recv_bookmarks = 0; - END_TIMER(CRCP_TIMER_CKPT_EX_WAIT); - - return OMPI_SUCCESS; -} - -static int ft_event_check_bookmarks(void) -{ - opal_list_item_t* item = NULL; - int ret; - int p_n_to_p_m = 0; - int p_n_from_p_m = 0; - - if( 10 <= mca_crcp_bkmrk_component.super.verbose ) { - sleep(OMPI_PROC_MY_NAME->vpid); - OPAL_OUTPUT_VERBOSE((10, mca_crcp_bkmrk_component.super.output_handle, - "---------------------------------------------")); - OPAL_OUTPUT_VERBOSE((10, mca_crcp_bkmrk_component.super.output_handle, - "Process %s Match Table", - OMPI_NAME_PRINT(OMPI_PROC_MY_NAME))); - OPAL_OUTPUT_VERBOSE((10, mca_crcp_bkmrk_component.super.output_handle, - "%s %5s | %7s | %7s | %7s | %7s |", - OMPI_NAME_PRINT(OMPI_PROC_MY_NAME), - "Vpid", "T_Send", "M_Recv", "M_Send", "T_Recv")); - - for(item = opal_list_get_first(&ompi_crcp_bkmrk_pml_peer_refs); - item != opal_list_get_end(&ompi_crcp_bkmrk_pml_peer_refs); - item = opal_list_get_next(item) ) { - ompi_crcp_bkmrk_pml_peer_ref_t *peer_ref; - int t_send, m_send; - int t_recv, m_recv; - peer_ref = (ompi_crcp_bkmrk_pml_peer_ref_t*)item; - - t_send = peer_ref->total_msgs_sent; - m_send = peer_ref->matched_msgs_sent; - t_recv = peer_ref->total_msgs_recvd; - m_recv = peer_ref->matched_msgs_recvd; - - OPAL_OUTPUT_VERBOSE((10, mca_crcp_bkmrk_component.super.output_handle, - "%s %5d | %7d | %7d | %7d | %7d |", - OMPI_NAME_PRINT(OMPI_PROC_MY_NAME), - peer_ref->proc_name.vpid, - t_send, m_recv, m_send, t_recv)); - } - OPAL_OUTPUT_VERBOSE((10, mca_crcp_bkmrk_component.super.output_handle, - "---------------------------------------------")); - } - - /* - * For each peer: - * - Check bookmarks - * - if mis-matched then post outstanding recvs. - */ - for(item = opal_list_get_first(&ompi_crcp_bkmrk_pml_peer_refs); - item != opal_list_get_end(&ompi_crcp_bkmrk_pml_peer_refs); - item = opal_list_get_next(item) ) { - ompi_crcp_bkmrk_pml_peer_ref_t *peer_ref; - peer_ref = (ompi_crcp_bkmrk_pml_peer_ref_t*)item; - - if( OPAL_EQUAL == ompi_rte_compare_name_fields(OMPI_RTE_CMP_ALL, - (OMPI_PROC_MY_NAME), - &(peer_ref->proc_name)) ) { - continue; - } - - TRAFFIC_MSG_DUMP_PEER(15, (peer_ref, "-- Bookmark Details --", false)); - - /* Lowest Rank sends first */ - if( OMPI_PROC_MY_NAME->vpid < peer_ref->proc_name.vpid ) { - /******************** - * Check P_n --> P_m - * Has the peer received all the messages that I have put on the wire? - ********************/ - p_n_to_p_m = peer_ref->total_msgs_sent; - p_n_from_p_m = peer_ref->matched_msgs_recvd; - - /* T_Send >= M_Recv */ - if( p_n_to_p_m < p_n_from_p_m ) { - opal_output(mca_crcp_bkmrk_component.super.output_handle, - "crcp:bkmrk: %s --> %s " - "Total Sent (%4d) = Matched Recv. (%4d) => Diff (%4d). " - " WARNING: Peer received more than was sent. :(\n", - OMPI_NAME_PRINT(OMPI_PROC_MY_NAME), - OMPI_NAME_PRINT(&(peer_ref->proc_name)), - p_n_to_p_m, - p_n_from_p_m, - (p_n_to_p_m - p_n_from_p_m) - ); - } - - /* I've send more than my peer has received, - * so need to coordinate with peer. */ - if( p_n_to_p_m > p_n_from_p_m) { - OPAL_OUTPUT_VERBOSE((10, mca_crcp_bkmrk_component.super.output_handle, - "crcp:bkmrk: %s --> %s " - "Total Sent (%4d) = Matched Recv. (%4d). Peer needs %4d.\n", - OMPI_NAME_PRINT(OMPI_PROC_MY_NAME), - OMPI_NAME_PRINT(&(peer_ref->proc_name)), - p_n_to_p_m, - p_n_from_p_m, - (p_n_to_p_m - p_n_from_p_m) - )); - /* - * Tell the peer what the outstanding messages looked like. - * Since we can't tell which ones they are, we need to send the - * information for all of the messages since the last checkpoint - */ - if( OMPI_SUCCESS != (ret = send_msg_details(peer_ref, p_n_to_p_m, p_n_from_p_m) ) ) { - opal_output(mca_crcp_bkmrk_component.super.output_handle, - "crcp:bkmrk: check_bookmarks: Unable to send message details to peer %s: Return %d\n", - OMPI_NAME_PRINT(&peer_ref->proc_name), - ret); - return ret; - } - } - - /******************** - * Check P_n <-- P_m - * Have I received all the messages that my peer has put on the wire? - ********************/ - p_n_to_p_m = peer_ref->matched_msgs_sent; - p_n_from_p_m = peer_ref->total_msgs_recvd; - - /* M_Send >= T_Recv */ - if( p_n_to_p_m < p_n_from_p_m ) { - opal_output(mca_crcp_bkmrk_component.super.output_handle, - "crcp:bkmrk: %s --> %s " - "Matched Sent (%4d) = Total Recv. (%4d) => Diff (%4d). " - " WARNING: I received more than the peer sent. :(\n", - OMPI_NAME_PRINT(OMPI_PROC_MY_NAME), - OMPI_NAME_PRINT(&(peer_ref->proc_name)), - p_n_to_p_m, - p_n_from_p_m, - (p_n_to_p_m - p_n_from_p_m) - ); - } - - /* I've recv'ed less than my peer has sent, - * so need to coordinate with peer. */ - if( p_n_to_p_m > p_n_from_p_m) { - OPAL_OUTPUT_VERBOSE((10, mca_crcp_bkmrk_component.super.output_handle, - "crcp:bkmrk: %s <-- %s " - "Matched Sent (%4d) = Total Recv. (%4d). I need %4d.\n", - OMPI_NAME_PRINT(OMPI_PROC_MY_NAME), - OMPI_NAME_PRINT(&(peer_ref->proc_name)), - p_n_to_p_m, - p_n_from_p_m, - (p_n_to_p_m - p_n_from_p_m) - )); - /* - * Receive from peer the datatypes of the outstanding sends - * As we figure out what they are post Irecv's for them into a drained buffer list. - */ - if( OMPI_SUCCESS != (ret = recv_msg_details(peer_ref, p_n_to_p_m, p_n_from_p_m) ) ) { - opal_output(mca_crcp_bkmrk_component.super.output_handle, - "crcp:bkmrk: check_bookmarks: Unable to recv message details from peer %s: Return %d\n", - OMPI_NAME_PRINT(&peer_ref->proc_name), - ret); - return ret; - } - } - } - /* Highest rank recvs first */ - else { - /******************** - * Check P_n <-- P_m - * Have I received all the messages that my peer has put on the wire? - ********************/ - p_n_to_p_m = peer_ref->matched_msgs_sent; - p_n_from_p_m = peer_ref->total_msgs_recvd; - - /* M_Send >= T_Recv */ - if( p_n_to_p_m < p_n_from_p_m ) { - opal_output(mca_crcp_bkmrk_component.super.output_handle, - "crcp:bkmrk: %s --> %s " - "Matched Sent (%4d) = Total Recv. (%4d) => Diff (%4d). " - " WARNING: I received more than the peer sent. :(\n", - OMPI_NAME_PRINT(OMPI_PROC_MY_NAME), - OMPI_NAME_PRINT(&(peer_ref->proc_name)), - p_n_to_p_m, - p_n_from_p_m, - (p_n_to_p_m - p_n_from_p_m) - ); - } - - /* I've recv'ed less than my peer has sent, - * so need to coordinate with peer. */ - if( p_n_to_p_m > p_n_from_p_m) { - OPAL_OUTPUT_VERBOSE((10, mca_crcp_bkmrk_component.super.output_handle, - "crcp:bkmrk: %s <-- %s " - "Matched Sent (%4d) = Total Recv. (%4d). I need %4d.\n", - OMPI_NAME_PRINT(OMPI_PROC_MY_NAME), - OMPI_NAME_PRINT(&(peer_ref->proc_name)), - p_n_to_p_m, - p_n_from_p_m, - (p_n_to_p_m - p_n_from_p_m) - )); - /* - * Receive from peer the datatypes of the outstanding sends - * As we figure out what they are post Irecv's for them into a drained buffer list. - */ - if( OMPI_SUCCESS != (ret = recv_msg_details(peer_ref, p_n_to_p_m, p_n_from_p_m) ) ) { - opal_output(mca_crcp_bkmrk_component.super.output_handle, - "crcp:bkmrk: check_bookmarks: Unable to recv message details from peer %s: Return %d\n", - OMPI_NAME_PRINT(&peer_ref->proc_name), - ret); - return ret; - } - } - - /******************** - * Check P_n --> P_m - * Has the peer received all the messages that I have put on the wire? - ********************/ - p_n_to_p_m = peer_ref->total_msgs_sent; - p_n_from_p_m = peer_ref->matched_msgs_recvd; - - /* T_Send >= M_Recv */ - if( p_n_to_p_m < p_n_from_p_m ) { - opal_output(mca_crcp_bkmrk_component.super.output_handle, - "crcp:bkmrk: %s --> %s " - "Total Sent (%4d) = Matched Recv. (%4d) => Diff (%4d). " - " WARNING: Peer received more than was sent. :(\n", - OMPI_NAME_PRINT(OMPI_PROC_MY_NAME), - OMPI_NAME_PRINT(&(peer_ref->proc_name)), - p_n_to_p_m, - p_n_from_p_m, - (p_n_to_p_m - p_n_from_p_m) - ); - } - - /* I've send more than my peer has received, - * so need to coordinate with peer. */ - if( p_n_to_p_m > p_n_from_p_m) { - OPAL_OUTPUT_VERBOSE((10, mca_crcp_bkmrk_component.super.output_handle, - "crcp:bkmrk: %s --> %s " - "Total Sent (%4d) = Matched Recv. (%4d). Peer needs %4d.\n", - OMPI_NAME_PRINT(OMPI_PROC_MY_NAME), - OMPI_NAME_PRINT(&(peer_ref->proc_name)), - p_n_to_p_m, - p_n_from_p_m, - (p_n_to_p_m - p_n_from_p_m) - )); - /* - * Tell the peer what the outstanding messages looked like. - * Since we can't tell which ones they are, we need to send the - * information for all of the messages since the last checkpoint - */ - if( OMPI_SUCCESS != (ret = send_msg_details(peer_ref, p_n_to_p_m, p_n_from_p_m) ) ) { - opal_output(mca_crcp_bkmrk_component.super.output_handle, - "crcp:bkmrk: check_bookmarks: Unable to send message details to peer %s: Return %d\n", - OMPI_NAME_PRINT(&peer_ref->proc_name), - ret); - return ret; - } - } - } - } - - return OMPI_SUCCESS; -} - -static int ft_event_post_drain_acks(void) -{ - ompi_crcp_bkmrk_pml_drain_message_ack_ref_t * drain_msg_ack = NULL; - opal_list_item_t* item = NULL; - size_t req_size; - - req_size = opal_list_get_size(&drained_msg_ack_list); - if(req_size <= 0) { - return OMPI_SUCCESS; - } - - OPAL_OUTPUT_VERBOSE((10, mca_crcp_bkmrk_component.super.output_handle, - "crcp:bkmrk: %s Wait on %d Drain ACK Messages.\n", - OMPI_NAME_PRINT(OMPI_PROC_MY_NAME), - (int)req_size)); - - /* - * We have loaded our peer with the message information - * Now wait for the ack from them - */ - for(item = opal_list_get_first(&drained_msg_ack_list); - item != opal_list_get_end(&drained_msg_ack_list); - item = opal_list_get_next(item) ) { - drain_msg_ack = (ompi_crcp_bkmrk_pml_drain_message_ack_ref_t*)item; - - /* Post the receive */ - ompi_rte_recv_buffer_nb(&drain_msg_ack->peer, OMPI_CRCP_COORD_BOOKMARK_TAG, - 0, drain_message_ack_cbfunc, NULL); - } - - return OMPI_SUCCESS; -} - -static void drain_message_ack_cbfunc(int status, - ompi_process_name_t* sender, - opal_buffer_t *buffer, - ompi_rml_tag_t tag, - void* cbdata) -{ - int ret, exit_status = OMPI_SUCCESS; - opal_list_item_t* item = NULL; - size_t ckpt_status; - - /* - * Unpack the buffer - */ - UNPACK_BUFFER(buffer, ckpt_status, 1, OPAL_SIZE, ""); - - /* - * Update the outstanding message queue - */ - for(item = opal_list_get_first(&drained_msg_ack_list); - item != opal_list_get_end(&drained_msg_ack_list); - item = opal_list_get_next(item) ) { - ompi_crcp_bkmrk_pml_drain_message_ack_ref_t * drain_msg_ack; - drain_msg_ack = (ompi_crcp_bkmrk_pml_drain_message_ack_ref_t*)item; - - /* If this ACK has not completed yet */ - if(!drain_msg_ack->complete) { - /* If it is the correct peer */ - if( OPAL_EQUAL == ompi_rte_compare_name_fields(OMPI_RTE_CMP_ALL, - &(drain_msg_ack->peer), - sender) ) { - /* We found it! */ - drain_msg_ack->complete = true; - OPAL_OUTPUT_VERBOSE((5, mca_crcp_bkmrk_component.super.output_handle, - "crcp:bkmrk: %s --> %s Received ACK of FLUSH from peer\n", - OMPI_NAME_PRINT(OMPI_PROC_MY_NAME), - OMPI_NAME_PRINT(sender) )); - return; - } - } - } - - opal_output(mca_crcp_bkmrk_component.super.output_handle, - "crcp:bkmrk: %s --> %s ERROR: Unable to match ACK to peer (%d)\n", - OMPI_NAME_PRINT(OMPI_PROC_MY_NAME), - OMPI_NAME_PRINT(sender), exit_status); - - cleanup: - return; -} - -static int ft_event_post_drained(void) -{ - int ret, exit_status = OMPI_SUCCESS; - ompi_crcp_bkmrk_pml_peer_ref_t *cur_peer_ref = NULL; - ompi_crcp_bkmrk_pml_drain_message_ref_t * drain_msg_ref = NULL; - ompi_crcp_bkmrk_pml_message_content_ref_t *content_ref = NULL; - opal_list_item_t* item = NULL, *d_item = NULL, *c_item = NULL; - int i, total_number_to_drain = 0, peer_total = 0; - - /* First Pass just to get a count */ - for(item = opal_list_get_first(&ompi_crcp_bkmrk_pml_peer_refs); - item != opal_list_get_end(&ompi_crcp_bkmrk_pml_peer_refs); - item = opal_list_get_next(item) ) { - cur_peer_ref = (ompi_crcp_bkmrk_pml_peer_ref_t*)item; - - if( OPAL_EQUAL == ompi_rte_compare_name_fields(OMPI_RTE_CMP_ALL, - (OMPI_PROC_MY_NAME), - &(cur_peer_ref->proc_name)) ) { - continue; - } - - for(d_item = opal_list_get_first(&(cur_peer_ref->drained_list)); - d_item != opal_list_get_end(&(cur_peer_ref->drained_list)); - d_item = opal_list_get_next(d_item) ) { - drain_msg_ref = (ompi_crcp_bkmrk_pml_drain_message_ref_t*)d_item; - - for(c_item = opal_list_get_first(&(drain_msg_ref->msg_contents)); - c_item != opal_list_get_end(&(drain_msg_ref->msg_contents)); - c_item = opal_list_get_next(c_item) ) { - content_ref = (ompi_crcp_bkmrk_pml_message_content_ref_t*)c_item; - - if( !content_ref->done ) { - ++total_number_to_drain; - } - } - } - } - - /* - * Check to make sure there is something to post - */ - if( 0 >= total_number_to_drain ) { - return OMPI_SUCCESS; - } - - /* Allocate memory */ - if( NULL != quiesce_requests ) { - free(quiesce_requests); - quiesce_requests = NULL; - } - quiesce_requests = (ompi_request_t **)malloc( (total_number_to_drain) * sizeof(ompi_request_t *)); - if( NULL == quiesce_requests){ - exit_status = OMPI_ERROR; - goto cleanup; - } - - if( NULL != quiesce_statuses ) { - free(quiesce_statuses); - quiesce_statuses = NULL; - } - quiesce_statuses = (ompi_status_public_t **)malloc( (total_number_to_drain) * sizeof(ompi_status_public_t *)); - if( NULL == quiesce_statuses){ - exit_status = OMPI_ERROR; - goto cleanup; - } - - /* Initalize to invalid values */ - for(i = 0; i < (total_number_to_drain); ++i) { - quiesce_requests[i] = &(ompi_request_null.request); - quiesce_statuses[i] = &ompi_status_empty; - } - quiesce_request_count = 0; - - /* Second pass to post */ - for(item = opal_list_get_first(&ompi_crcp_bkmrk_pml_peer_refs); - item != opal_list_get_end(&ompi_crcp_bkmrk_pml_peer_refs); - item = opal_list_get_next(item) ) { - cur_peer_ref = (ompi_crcp_bkmrk_pml_peer_ref_t*)item; - peer_total = 0; - - if( OPAL_EQUAL == ompi_rte_compare_name_fields(OMPI_RTE_CMP_ALL, - (OMPI_PROC_MY_NAME), - &(cur_peer_ref->proc_name)) ) { - continue; - } - - for(d_item = opal_list_get_first(&(cur_peer_ref->drained_list)); - d_item != opal_list_get_end(&(cur_peer_ref->drained_list)); - d_item = opal_list_get_next(d_item) ) { - drain_msg_ref = (ompi_crcp_bkmrk_pml_drain_message_ref_t*)d_item; - - for(c_item = opal_list_get_first(&(drain_msg_ref->msg_contents)); - c_item != opal_list_get_end(&(drain_msg_ref->msg_contents)); - c_item = opal_list_get_next(c_item) ) { - content_ref = (ompi_crcp_bkmrk_pml_message_content_ref_t*)c_item; - - if( content_ref->done ) { - continue; - } - - if( OMPI_SUCCESS != (ret = ft_event_post_drain_message(drain_msg_ref, content_ref) ) ) { - exit_status = ret; - goto cleanup; - } - - cur_peer_ref->ack_required = true; - - /* Wait on all drain requests, newly posted or not */ - if( NULL != content_ref->request) { - quiesce_requests[quiesce_request_count] = content_ref->request; - quiesce_statuses[quiesce_request_count] = &content_ref->status; - quiesce_request_count++; - peer_total++; - } - /* If a NULL request, and already_posted then this is an indicator that we need to stall */ - else if( content_ref->already_posted ) { - stall_for_completion = true; - } - else { - ERROR_SHOULD_NEVER_HAPPEN("crcp:bkmrk: ft_event_post_drained(): Found a drain message with a NULL request."); - } - } - } - - if( peer_total > 0 || stall_for_completion ) { - OPAL_OUTPUT_VERBOSE((10, mca_crcp_bkmrk_component.super.output_handle, - "crcp:bkmrk: %s <-- %s Will be draining %4d messages from this peer. Total %4d %s\n", - OMPI_NAME_PRINT(OMPI_PROC_MY_NAME), - OMPI_NAME_PRINT(&(cur_peer_ref->proc_name)), - peer_total, - quiesce_request_count, - (stall_for_completion ? "(And Stalling)" : "") )); - } - } - - cleanup: - return exit_status; -} - -static int ft_event_post_drain_message(ompi_crcp_bkmrk_pml_drain_message_ref_t *drain_msg_ref, - ompi_crcp_bkmrk_pml_message_content_ref_t *content_ref) -{ - int ret; - - /* - * This message has already been posted and drained in a previous - * checkpoint, do not post it again. - */ - if( content_ref->done ) { - return OMPI_SUCCESS; - } - - /* Do not repost those that are already posted, and - * we have requests for - */ - if( content_ref->already_posted ) { - OPAL_OUTPUT_VERBOSE((15, mca_crcp_bkmrk_component.super.output_handle, - "crcp:bkmrk: %s <-- %s Found a message that we do not need to post.\n", - OMPI_NAME_PRINT(OMPI_PROC_MY_NAME), - OMPI_NAME_PRINT(&(drain_msg_ref->proc_name)) )); - return OMPI_SUCCESS; - } - - /* Match counts in traffic_message_create_drain_message() */ - content_ref->active = true; - drain_msg_ref->active++; - - /* - * Post a receive to drain this message - */ - OPAL_OUTPUT_VERBOSE((20, mca_crcp_bkmrk_component.super.output_handle, - "crcp:bkmrk: %s <-- %s Posting a message to be drained from rank %d.\n", - OMPI_NAME_PRINT(OMPI_PROC_MY_NAME), - OMPI_NAME_PRINT(&(drain_msg_ref->proc_name)), - drain_msg_ref->rank)); - if( OMPI_SUCCESS != (ret = wrapped_pml_module->pml_irecv(content_ref->buffer, - (drain_msg_ref->count * drain_msg_ref->ddt_size), - drain_msg_ref->datatype, - drain_msg_ref->rank, - drain_msg_ref->tag, - drain_msg_ref->comm, - &(content_ref->request) ) ) ) { - opal_output(mca_crcp_bkmrk_component.super.output_handle, - "crcp:bkmrk: %s <-- %s Failed to post the Draining PML iRecv\n", - OMPI_NAME_PRINT(OMPI_PROC_MY_NAME), - OMPI_NAME_PRINT(&(drain_msg_ref->proc_name)) ); - return ret; - } - - return OMPI_SUCCESS; -} - -static int ft_event_wait_quiesce(void) -{ - int exit_status = OMPI_SUCCESS; - int ret; - - /********************************************* - * Wait for all draining receives to complete - **********************************************/ - if( OMPI_SUCCESS != (ret = wait_quiesce_drained() ) ) { - opal_output(mca_crcp_bkmrk_component.super.output_handle, - "crcp:bkmrk: wait_quiesce: %s Failed to quiesce drained messages\n", - OMPI_NAME_PRINT(OMPI_PROC_MY_NAME) ); - exit_status = ret; - goto cleanup; - } - - /******************************************************************* - * If we are waiting for All Clear messages from peers wait on them. - *******************************************************************/ - if( OMPI_SUCCESS != (ret = wait_quiesce_drain_ack() ) ) { - opal_output(mca_crcp_bkmrk_component.super.output_handle, - "crcp:bkmrk: wait_quiesce: %s Failed to recv all drain ACKs\n", - OMPI_NAME_PRINT(OMPI_PROC_MY_NAME) ); - exit_status = ret; - goto cleanup; - } - - cleanup: - return exit_status; -} - -static int wait_quiesce_drained(void) -{ - int ret, exit_status = OMPI_SUCCESS; - ompi_crcp_bkmrk_pml_peer_ref_t *cur_peer_ref = NULL; - ompi_crcp_bkmrk_pml_drain_message_ref_t * drain_msg_ref = NULL; - ompi_crcp_bkmrk_pml_message_content_ref_t *content_ref = NULL; - opal_list_item_t* item = NULL, *d_item = NULL, *d_next = NULL, *c_item = NULL, *c_next = NULL; - bool prev_stall = false; - - /* Can we shortcut this? */ - - OPAL_OUTPUT_VERBOSE((5, mca_crcp_bkmrk_component.super.output_handle, - "crcp:bkmrk: %s Waiting on %d messages to drain\n", - OMPI_NAME_PRINT(OMPI_PROC_MY_NAME), - (int)quiesce_request_count)); - - /* - * Wait on all of the message to complete in any order - * Created in ft_event_post_drained() - */ - prev_stall = opal_cr_stall_check; - opal_cr_stall_check = true; - if( OMPI_SUCCESS != (ret = coord_request_wait_all(quiesce_request_count, - quiesce_requests, - quiesce_statuses) ) ) { - exit_status = ret; - goto cleanup; - } - opal_cr_stall_check = prev_stall; - - /* - * Send ACKs to all peers - * - * Remove only the already posted members of the drained list. - * All other elements need to be left in the list since we need - * to match them as new receives come in. - */ - for(item = opal_list_get_first(&ompi_crcp_bkmrk_pml_peer_refs); - item != opal_list_get_end(&ompi_crcp_bkmrk_pml_peer_refs); - item = opal_list_get_next(item) ) { - cur_peer_ref = (ompi_crcp_bkmrk_pml_peer_ref_t*)item; - - if( OPAL_EQUAL == ompi_rte_compare_name_fields(OMPI_RTE_CMP_ALL, - (OMPI_PROC_MY_NAME), - &(cur_peer_ref->proc_name)) ) { - continue; - } - - /* - * Send ACK to peer if wanted - */ - if( cur_peer_ref->ack_required ) { - opal_buffer_t *buffer = NULL; - size_t response = 1; - - OPAL_OUTPUT_VERBOSE((5, mca_crcp_bkmrk_component.super.output_handle, - "crcp:bkmrk: %s --> %s Send ACKs to Peer\n", - OMPI_NAME_PRINT(OMPI_PROC_MY_NAME), - OMPI_NAME_PRINT(&(cur_peer_ref->proc_name)) )); - - /* Send All Clear to Peer */ - if (NULL == (buffer = OBJ_NEW(opal_buffer_t))) { - exit_status = OMPI_ERROR; - goto cleanup; - } - - PACK_BUFFER(buffer, response, 1, OPAL_SIZE, ""); - - /* JJH - Performance Optimization? - Why not post all isends, then wait? */ - if (ORTE_SUCCESS != (ret = ompi_rte_send_buffer_nb(&(cur_peer_ref->proc_name), - buffer, OMPI_CRCP_COORD_BOOKMARK_TAG, - orte_rml_send_callback, NULL))) { - exit_status = ret; - goto cleanup; - } - if( NULL != buffer) { - OBJ_RELEASE(buffer); - buffer = NULL; - } - } - - cur_peer_ref->ack_required = false; - - /* - * Remove already_posted drained items - */ - for(d_item = opal_list_get_first(&(cur_peer_ref->drained_list)); - d_item != opal_list_get_end(&(cur_peer_ref->drained_list)); - d_item = d_next ) { - drain_msg_ref = (ompi_crcp_bkmrk_pml_drain_message_ref_t*)d_item; - d_next = opal_list_get_next(d_item); - - for(c_item = opal_list_get_first(&(drain_msg_ref->msg_contents)); - c_item != opal_list_get_end(&(drain_msg_ref->msg_contents)); - c_item = c_next ) { - content_ref = (ompi_crcp_bkmrk_pml_message_content_ref_t*)c_item; - c_next = opal_list_get_next(c_item); - - /* - * This message has already been posted and drained in a previous - * checkpoint, do not do anything to it. - */ - if( content_ref->done ) { - continue; - } - - if( content_ref->already_posted ) { - drain_message_remove(cur_peer_ref, drain_msg_ref, content_ref); - - /* Match counts in traffic_message_create_drain_message() */ - drain_msg_ref->active--; - drain_msg_ref->already_posted--; - } else { - content_ref->done = true; - content_ref->active = false; - - /* Match counts in traffic_message_create_drain_message() */ - drain_msg_ref->done++; - drain_msg_ref->active--; - } - } - } - } - - cleanup: - if( NULL != quiesce_requests ) { - free(quiesce_requests); - quiesce_requests = NULL; - } - - if( NULL != quiesce_statuses ) { - free(quiesce_statuses); - quiesce_statuses = NULL; - } - - quiesce_request_count = 0; - - return exit_status; -} - -static int coord_request_wait_all( size_t count, - ompi_request_t ** requests, - ompi_status_public_t ** statuses ) -{ - int exit_status = OMPI_SUCCESS; - ompi_status_public_t * status; - ompi_request_t *req; - size_t i; - - /* - * Just wait on each request in order - */ - for( i = 0; i < count; ++i) { - req = requests[i]; - status = statuses[i]; - - coord_request_wait(req, status); - - OPAL_OUTPUT_VERBOSE((15, mca_crcp_bkmrk_component.super.output_handle, - "crcp:bkmrk: %s Request Wait: Done with idx %d of %d\n", - OMPI_NAME_PRINT(OMPI_PROC_MY_NAME), - (int)i, (int)count)); - } - - return exit_status; -} - -static int coord_request_wait( ompi_request_t * req, - ompi_status_public_t * status) -{ - ompi_request_wait_completion(req); - - if( MPI_STATUS_IGNORE != status ) { - status->MPI_TAG = req->req_status.MPI_TAG; - status->MPI_SOURCE = req->req_status.MPI_SOURCE; - status->_cancelled = req->req_status._cancelled; - status->_ucount = req->req_status._ucount; - } - - return OMPI_SUCCESS; -} - -static int wait_quiesce_drain_ack(void) -{ - opal_list_item_t* item = NULL; - opal_list_item_t* next = NULL; - ompi_crcp_bkmrk_pml_drain_message_ack_ref_t * drain_msg_ack; - int num_outstanding; - - /* YYY JJH YYY Should we wait explicitly on the send requests pending first? */ - - num_outstanding = opal_list_get_size(&drained_msg_ack_list); - if(num_outstanding <= 0) { - /* Nothing to do */ - return OMPI_SUCCESS; - } - - OPAL_OUTPUT_VERBOSE((10, mca_crcp_bkmrk_component.super.output_handle, - "crcp:bkmrk: %s Waiting on %d Drain ACK messages\n", - OMPI_NAME_PRINT(OMPI_PROC_MY_NAME), - num_outstanding)); - - while(0 < num_outstanding) { - for(item = opal_list_get_first(&drained_msg_ack_list); - item != opal_list_get_end(&drained_msg_ack_list); - item = next) { - drain_msg_ack = (ompi_crcp_bkmrk_pml_drain_message_ack_ref_t*)item; - next = opal_list_get_next(item); - - if(drain_msg_ack->complete) { - num_outstanding--; - opal_list_remove_item(&drained_msg_ack_list, &(drain_msg_ack->super) ); - HOKE_DRAIN_ACK_MSG_REF_RETURN(item); - break; - } - } - - opal_event_loop(opal_event_base, OPAL_EVLOOP_NONBLOCK); - } - - /* Clear the ack queue if it isn't already clear (it should already be) */ - while (NULL != (item = opal_list_remove_first(&drained_msg_ack_list) ) ) { - HOKE_DRAIN_ACK_MSG_REF_RETURN(item); - } - - return OMPI_SUCCESS; -} - -/* Paired with recv_bookmarks */ -static int send_bookmarks(int peer_idx) -{ - ompi_crcp_bkmrk_pml_peer_ref_t *peer_ref; - ompi_process_name_t peer_name; - opal_buffer_t *buffer = NULL; - int exit_status = OMPI_SUCCESS; - int ret; - - START_TIMER(CRCP_TIMER_CKPT_EX_PEER_S); - /* - * Find the peer structure for this peer - */ - peer_name.jobid = OMPI_PROC_MY_NAME->jobid; - peer_name.vpid = peer_idx; - - if( NULL == (peer_ref = find_peer(peer_name))) { - opal_output(mca_crcp_bkmrk_component.super.output_handle, - "crcp:bkmrk: send_bookmarks: Error: Could not find peer indexed %d\n", - peer_idx); - exit_status = OMPI_ERROR; - goto cleanup; - } - - OPAL_OUTPUT_VERBOSE((15, mca_crcp_bkmrk_component.super.output_handle, - "crcp:bkmrk: %s --> %s Sending bookmark (S[%6d] R[%6d])\n", - OMPI_NAME_PRINT(OMPI_PROC_MY_NAME), - OMPI_NAME_PRINT(&peer_name), - peer_ref->total_msgs_sent, - peer_ref->total_msgs_recvd)); - - /* - * Send the bookmarks to peer - */ - if (NULL == (buffer = OBJ_NEW(opal_buffer_t))) { - exit_status = OMPI_ERROR; - goto cleanup; - } - - PACK_BUFFER(buffer, (peer_ref->total_msgs_sent), 1, OPAL_UINT32, - "crcp:bkmrk: send_bookmarks: Unable to pack total_msgs_sent"); - PACK_BUFFER(buffer, (peer_ref->total_msgs_recvd), 1, OPAL_UINT32, - "crcp:bkmrk: send_bookmarks: Unable to pack total_msgs_recvd"); - - if (ORTE_SUCCESS != (ret = ompi_rte_send_buffer_nb(&peer_name, buffer, - OMPI_CRCP_COORD_BOOKMARK_TAG, - orte_rml_send_callback, NULL))) { - opal_output(mca_crcp_bkmrk_component.super.output_handle, - "crcp:bkmrk: send_bookmarks: Failed to send bookmark to peer %s: Return %d\n", - OMPI_NAME_PRINT(&peer_name), - ret); - exit_status = ret; - goto cleanup; - } - - cleanup: - if(NULL != buffer) { - OBJ_RELEASE(buffer); - } - - END_TIMER(CRCP_TIMER_CKPT_EX_PEER_S); - DISPLAY_INDV_TIMER(CRCP_TIMER_CKPT_EX_PEER_S, peer_idx, 1); - - return exit_status; -} - -/* Paired with send_bookmarks */ -static int recv_bookmarks(int peer_idx) -{ - ompi_process_name_t peer_name; - - START_TIMER(CRCP_TIMER_CKPT_EX_PEER_R); - - peer_name.jobid = OMPI_PROC_MY_NAME->jobid; - peer_name.vpid = peer_idx; - - ompi_rte_recv_buffer_nb(&peer_name, OMPI_CRCP_COORD_BOOKMARK_TAG, - 0, recv_bookmarks_cbfunc, NULL); - - ++total_recv_bookmarks; - - END_TIMER(CRCP_TIMER_CKPT_EX_PEER_R); - /* JJH Doesn't make much sense to print this. The real bottleneck is always the send_bookmarks() */ - /*DISPLAY_INDV_TIMER(CRCP_TIMER_CKPT_EX_PEER_R, peer_idx, 1);*/ - - return OMPI_SUCCESS; -} - -static void recv_bookmarks_cbfunc(int status, - ompi_process_name_t* sender, - opal_buffer_t *buffer, - ompi_rml_tag_t tag, - void* cbdata) -{ - ompi_crcp_bkmrk_pml_peer_ref_t *peer_ref; - int exit_status = OMPI_SUCCESS; - int ret, tmp_int; - ompi_vpid_t peer_idx; - - peer_idx = sender->vpid; - - /* - * Find the peer structure for this peer - */ - if( NULL == (peer_ref = find_peer(*sender))) { - opal_output(mca_crcp_bkmrk_component.super.output_handle, - "crcp:bkmrk: recv_bookmarks: Could not find peer indexed %d\n", - peer_idx); - exit_status = OMPI_ERROR; - goto cleanup; - } - - UNPACK_BUFFER(buffer, tmp_int, 1, OPAL_UINT32, - "crcp:bkmrk: recv_bookmarks: Unable to unpack total_msgs_sent"); - peer_ref->matched_msgs_sent = tmp_int; - - UNPACK_BUFFER(buffer, tmp_int, 1, OPAL_UINT32, - "crcp:bkmrk: recv_bookmarks: Unable to unpack total_msgs_recvd"); - peer_ref->matched_msgs_recvd = tmp_int; - - OPAL_OUTPUT_VERBOSE((15, mca_crcp_bkmrk_component.super.output_handle, - "crcp:bkmrk: %s <-- %s Received bookmark (S[%6d] R[%6d]) vs. (S[%6d] R[%6d]) (%d)\n", - OMPI_NAME_PRINT(OMPI_PROC_MY_NAME), - OMPI_NAME_PRINT(sender), - peer_ref->matched_msgs_sent, - peer_ref->matched_msgs_recvd, - peer_ref->total_msgs_sent, - peer_ref->total_msgs_recvd, - exit_status)); - - cleanup: - --total_recv_bookmarks; - - return; -} - -static int send_msg_details(ompi_crcp_bkmrk_pml_peer_ref_t *peer_ref, - int total_sent, int total_matched) -{ - int ret, exit_status = OMPI_SUCCESS; - ompi_crcp_bkmrk_pml_drain_message_ack_ref_t * d_msg_ack = NULL; - opal_list_t *search_list = NULL; - opal_list_item_t* msg_item = NULL; - bool finished; - int pass_num = 1; - int need, found; - int total_details_sent = 0; - int num_matches = 0; - int p_total_found = 0; - - need = total_sent - total_matched; - found = 0; - finished = false; - assert(need > 0); - - START_TIMER(CRCP_TIMER_CKPT_CHECK_PEER_S); - - /* - * Check the 'send_list' for this peer - */ - search_list = &(peer_ref->send_list); - pass_num = 1; - - SEARCH_AGAIN: - for(msg_item = opal_list_get_last(search_list); - msg_item != opal_list_get_begin(search_list); - msg_item = opal_list_get_prev(msg_item) ) { - ompi_crcp_bkmrk_pml_traffic_message_ref_t * msg_ref; - msg_ref = (ompi_crcp_bkmrk_pml_traffic_message_ref_t*)msg_item; - - num_matches = 0; - - OPAL_OUTPUT_VERBOSE((10, mca_crcp_bkmrk_component.super.output_handle, - "crcp:bkmrk: send_msg_details: Stage 1: [M/A/D/AD] [%3d/%3d/%3d/%3d] (%s)", - msg_ref->matched, msg_ref->active, msg_ref->done, msg_ref->active_drain, - (msg_ref->msg_type == COORD_MSG_TYPE_B_SEND ? " Send" : - (msg_ref->msg_type == COORD_MSG_TYPE_I_SEND ? "iSend" : "pSend")) - )); - - /* If this message has not seen any traffic, then skip it */ - if( 0 >= (msg_ref->active + msg_ref->done) ) { - continue; - } - /* YYY JJH YYY Keep this as a sanity check? if( msg_ref->matched >= (msg_ref->active + msg_ref->done) ) { continue; } */ - - if(OMPI_SUCCESS != (ret = do_send_msg_detail(peer_ref, msg_ref, &num_matches, &p_total_found, &finished)) ) { - opal_output(mca_crcp_bkmrk_component.super.output_handle, - "crcp:bkmrk: send_msg_details: %s --> %s Failed to send message details to peer. Return %d\n", - OMPI_NAME_PRINT(OMPI_PROC_MY_NAME), - OMPI_NAME_PRINT(&(peer_ref->proc_name)), - ret); - } - - OPAL_OUTPUT_VERBOSE((10, mca_crcp_bkmrk_component.super.output_handle, - "crcp:bkmrk: send_msg_details: Stage 2: [M/A/D/AD] [%3d/%3d/%3d/%3d] (%s) [%3d, %3d, %s] [%3d, %3d]", - msg_ref->matched, msg_ref->active, msg_ref->done, msg_ref->active_drain, - (msg_ref->msg_type == COORD_MSG_TYPE_B_SEND ? " Send" : - (msg_ref->msg_type == COORD_MSG_TYPE_I_SEND ? "iSend" : "pSend")), - num_matches, p_total_found, (finished ? "T" : "F"), - total_details_sent, found - )); - - total_details_sent += num_matches; - if(0 < num_matches ) { - found += num_matches; - } - if(finished) { - goto ALL_SENT; - } - } - - /* - * We tried the 'send_list' and need more, - * so match off the 'isend_list' - */ - if( 1 == pass_num ) { - search_list = &(peer_ref->isend_list); - pass_num = 2; - goto SEARCH_AGAIN; - } - - /* - * We tried the 'send_list' and 'isend_list' and need more, - * so match off the 'send_init_list' - */ - if( 2 == pass_num ) { - search_list = &(peer_ref->send_init_list); - pass_num = 3; - goto SEARCH_AGAIN; - } - - ALL_SENT: - if( need > found ) { - OPAL_OUTPUT_VERBOSE((10, mca_crcp_bkmrk_component.super.output_handle, - "crcp:bkmrk: send_msg_details: ERROR: ****** Need (%d) vs Found (%d)", - need, found)); - } - assert(need <= found); - - /* Prepare to post a Recv for the ACK All Clear signal from the peer - * which is sent when they have finished receiving all of the - * inflight messages into a local buffer - */ - HOKE_DRAIN_ACK_MSG_REF_ALLOC(d_msg_ack); - d_msg_ack->peer.jobid = peer_ref->proc_name.jobid; - d_msg_ack->peer.vpid = peer_ref->proc_name.vpid; - - d_msg_ack->complete = false; - opal_list_append(&drained_msg_ack_list, &(d_msg_ack->super)); - OPAL_OUTPUT_VERBOSE((10, mca_crcp_bkmrk_component.super.output_handle, - "crcp:bkmrk: %s <-> %s Message Inflight! Will wait on ACK from this peer.\n", - OMPI_NAME_PRINT(OMPI_PROC_MY_NAME), - OMPI_NAME_PRINT(&(peer_ref->proc_name)))); - - END_TIMER(CRCP_TIMER_CKPT_CHECK_PEER_S); - DISPLAY_INDV_TIMER(CRCP_TIMER_CKPT_CHECK_PEER_S, peer_ref->proc_name.vpid, total_details_sent); - - return exit_status; -} - -static int do_send_msg_detail(ompi_crcp_bkmrk_pml_peer_ref_t *peer_ref, - ompi_crcp_bkmrk_pml_traffic_message_ref_t*msg_ref, - int *num_matches, - int *total_found, - bool *finished) -{ - int ret, exit_status = OMPI_SUCCESS; - opal_buffer_t *buffer = NULL; - orte_rml_recv_cb_t *rb = NULL; - int32_t recv_response = RECV_MATCH_RESP_ERROR; - int32_t num_resolv = -1; - int32_t p_total_found = -1; - int comm_my_rank = -1; - int total_sent; - - *num_matches = 0; - *total_found = 0;; - *finished = false; - - if( NULL != buffer) { - OBJ_RELEASE(buffer); - buffer = NULL; - } - - if (NULL == (buffer = OBJ_NEW(opal_buffer_t))) { - exit_status = OMPI_ERROR; - goto cleanup; - } - - /* - * Send: - * - Communicator Context ID - * - My Rank in Communicator - */ - comm_my_rank = ompi_comm_rank(msg_ref->comm); - - PACK_BUFFER(buffer, msg_ref->comm->c_contextid, 1, OPAL_UINT32, - "crcp:bkmrk: send_msg_details: Unable to pack communicator ID"); - PACK_BUFFER(buffer, comm_my_rank, 1, OPAL_INT, - "crcp:bkmrk: send_msg_details: Unable to pack comm rank ID"); - - /* - * Send: - * - Message tag - * - Message count - * - Message Datatype size - */ - PACK_BUFFER(buffer, msg_ref->tag, 1, OPAL_INT, - "crcp:bkmrk: send_msg_details: Unable to pack tag"); - PACK_BUFFER(buffer, msg_ref->count, 1, OPAL_SIZE, - "crcp:bkmrk: send_msg_details: Unable to pack count"); - PACK_BUFFER(buffer, msg_ref->ddt_size, 1, OPAL_SIZE, - "crcp:bkmrk: send_msg_details: Unable to pack datatype size"); - - /* - * Send: - * - Message done - * - Message active - */ - total_sent = msg_ref->done + msg_ref->active; - PACK_BUFFER(buffer, total_sent, 1, OPAL_INT, - "crcp:bkmrk: send_msg_details: Unable to pack done+active count"); - - /* - * Do the send... - */ - if (ORTE_SUCCESS != (ret = ompi_rte_send_buffer_nb(&peer_ref->proc_name, buffer, - OMPI_CRCP_COORD_BOOKMARK_TAG, - orte_rml_send_callback, NULL))) { - opal_output(mca_crcp_bkmrk_component.super.output_handle, - "crcp:bkmrk: do_send_msg_detail: Unable to send message details to peer %s: Return %d\n", - OMPI_NAME_PRINT(&peer_ref->proc_name), - ret); - - exit_status = OMPI_ERROR; - goto cleanup; - } - - /* - * Recv the ACK msg - */ - rb = OBJ_NEW(orte_rml_recv_cb_t); - rb->active = true; - ompi_rte_recv_buffer_nb(&peer_ref->proc_name, OMPI_CRCP_COORD_BOOKMARK_TAG, 0, - orte_rml_recv_callback, rb); - ORTE_WAIT_FOR_COMPLETION(rb->active); - - UNPACK_BUFFER(&rb->data, recv_response, 1, OPAL_UINT32, - "crcp:bkmrk: send_msg_details: Failed to unpack the ACK from peer buffer."); - UNPACK_BUFFER(&rb->data, num_resolv, 1, OPAL_UINT32, - "crcp:bkmrk: send_msg_details: Failed to unpack the num_resolv from peer buffer."); - UNPACK_BUFFER(&rb->data, p_total_found, 1, OPAL_UINT32, - "crcp:bkmrk: send_msg_details: Failed to unpack the total_found from peer buffer."); - - OBJ_RELEASE(rb); - /* Mark message as matched */ - msg_ref->matched += num_resolv; - *num_matches = num_resolv; - *total_found = p_total_found; - - /* - * - */ - if( RECV_MATCH_RESP_DONE == recv_response ) { - *finished = true; - } - else if( RECV_MATCH_RESP_MORE == recv_response ) { - *finished = false; - } - - OPAL_OUTPUT_VERBOSE((15, mca_crcp_bkmrk_component.super.output_handle, - "**************************\n")); - OPAL_OUTPUT_VERBOSE((15, mca_crcp_bkmrk_component.super.output_handle, - "send_msg_details: %d, %d = %s [%d / %d]\n", - *num_matches, *total_found, - (*finished ? "Done" : "Continue..."), - msg_ref->done, msg_ref->active)); - TRAFFIC_MSG_DUMP_MSG_INDV(15, (msg_ref, "", false)); - OPAL_OUTPUT_VERBOSE((15, mca_crcp_bkmrk_component.super.output_handle, - "**************************\n")); - - cleanup: - return exit_status; -} - -/* - * Recv message details from peer - * - matched with send_msg_details() - */ -static int recv_msg_details(ompi_crcp_bkmrk_pml_peer_ref_t *peer_ref, - int total_recv, int total_matched) -{ - int need, found; - int response; - int exit_status = OMPI_SUCCESS; - int ret; - int total_details_recv = 0; - - need = total_recv - total_matched; - found = 0; - - assert( need > 0); - - START_TIMER(CRCP_TIMER_CKPT_CHECK_PEER_R); - - /* - * While we are still looking for messages to drain - */ - while(need > found) { - uint32_t p_comm_id; - size_t p_count; - size_t p_datatype_size; - int p_rank; - int p_tag; - int p_num_sent; - int num_resolved = 0; - - /* - * Receive the message details from peer - */ - if( OMPI_SUCCESS != (ret = do_recv_msg_detail(peer_ref, - &p_rank, &p_comm_id, - &p_tag, &p_count, - &p_datatype_size, - &p_num_sent)) ) { - opal_output(mca_crcp_bkmrk_component.super.output_handle, - "crcp:bkmrk: recv_msg_details: %s <-- %s " - "Failed to receive message detail from peer. Return %d\n", - OMPI_NAME_PRINT(OMPI_PROC_MY_NAME), - OMPI_NAME_PRINT(&(peer_ref->proc_name)), - ret); - exit_status = ret; - goto cleanup; - } - - /* - * Determine if we have matched this message or not. - * Also take approprate action. - */ - num_resolved = 0; - if( OMPI_SUCCESS != (ret = do_recv_msg_detail_check_drain(peer_ref, - p_rank, p_comm_id, - p_tag, p_count, - p_datatype_size, - p_num_sent, - &num_resolved)) ) { - opal_output(mca_crcp_bkmrk_component.super.output_handle, - "crcp:bkmrk: recv_msg_details: %s <-- %s " - "Failed to check message detail from peer. Return %d\n", - OMPI_NAME_PRINT(OMPI_PROC_MY_NAME), - OMPI_NAME_PRINT(&(peer_ref->proc_name)), - ret); - exit_status = ret; - goto cleanup; - } - - found += num_resolved; - total_details_recv += num_resolved; - - OPAL_OUTPUT_VERBOSE((10, mca_crcp_bkmrk_component.super.output_handle, - "crcp:bkmrk: %s <-- %s Recv Detail: Stage --: [%3d / %3d] [%3d, %3d, %s]", - OMPI_NAME_PRINT(OMPI_PROC_MY_NAME), - OMPI_NAME_PRINT(&(peer_ref->proc_name)), - need, found, - num_resolved, total_details_recv, - ( need <= found ? "T" : "F") )); - - /* If we do not need any more, respond DONE */ - if( need <= found ) { - response = RECV_MATCH_RESP_DONE; /* All done */ - } - /* Otherwise respond need more */ - else { - response = RECV_MATCH_RESP_MORE; - } - - if(OMPI_SUCCESS != (ret = do_recv_msg_detail_resp(peer_ref, response, num_resolved, found))) { - opal_output(mca_crcp_bkmrk_component.super.output_handle, - "crcp:bkmrk: recv_msg_details: %s <-- %s Failed to respond to peer. Return %d\n", - OMPI_NAME_PRINT(OMPI_PROC_MY_NAME), - OMPI_NAME_PRINT(&(peer_ref->proc_name)), - ret); - exit_status = ret; - goto cleanup; - } - } - - cleanup: - - END_TIMER(CRCP_TIMER_CKPT_CHECK_PEER_R); - DISPLAY_INDV_TIMER(CRCP_TIMER_CKPT_CHECK_PEER_R, peer_ref->proc_name.vpid, total_details_recv); - - return exit_status; -} - -static int do_recv_msg_detail(ompi_crcp_bkmrk_pml_peer_ref_t *peer_ref, - int *rank, uint32_t *comm_id, int *tag, - size_t *count, size_t *datatype_size, - int *p_num_sent) -{ - orte_rml_recv_cb_t *rb = NULL; - int exit_status = OMPI_SUCCESS; - int ret; - - /* - * Recv the msg - */ - rb = OBJ_NEW(orte_rml_recv_cb_t); - rb->active = true; - ompi_rte_recv_buffer_nb(&peer_ref->proc_name, OMPI_CRCP_COORD_BOOKMARK_TAG, 0, orte_rml_recv_callback, rb); - ORTE_WAIT_FOR_COMPLETION(rb->active); - - /* Pull out the communicator ID */ - UNPACK_BUFFER(&rb->data, (*comm_id), 1, OPAL_UINT32, - "crcp:bkmrk: recv_msg_details: Failed to unpack the communicator ID"); - UNPACK_BUFFER(&rb->data, (*rank), 1, OPAL_INT, - "crcp:bkmrk: recv_msg_details: Failed to unpack the communicator rank ID"); - - /* Pull out the message details */ - UNPACK_BUFFER(&rb->data, (*tag), 1, OPAL_INT, - "crcp:bkmrk: recv_msg_details: Failed to unpack the tag"); - UNPACK_BUFFER(&rb->data, (*count), 1, OPAL_SIZE, - "crcp:bkmrk: recv_msg_details: Failed to unpack the count"); - UNPACK_BUFFER(&rb->data, (*datatype_size), 1, OPAL_SIZE, - "crcp:bkmrk: recv_msg_details: Failed to unpack the datatype size"); - - /* Pull out the counts */ - UNPACK_BUFFER(&rb->data, (*p_num_sent), 1, OPAL_INT, - "crcp:bkmrk: recv_msg_details: Failed to unpack the sent count"); - -cleanup: - OBJ_RELEASE(rb); - return exit_status; -} - -static int do_recv_msg_detail_check_drain(ompi_crcp_bkmrk_pml_peer_ref_t *peer_ref, - int rank, uint32_t comm_id, int tag, - size_t count, size_t datatype_size, - int p_num_sent, - int *num_resolved) -{ - int ret, exit_status = OMPI_SUCCESS; - ompi_crcp_bkmrk_pml_traffic_message_ref_t *posted_tmp_msg_ref = NULL; - ompi_crcp_bkmrk_pml_traffic_message_ref_t *posted_recv_msg_ref = NULL; - ompi_crcp_bkmrk_pml_traffic_message_ref_t *posted_irecv_msg_ref = NULL; - ompi_crcp_bkmrk_pml_traffic_message_ref_t *posted_precv_msg_ref = NULL; - ompi_crcp_bkmrk_pml_traffic_message_ref_t *posted_unknown_recv_msg_ref = NULL; - ompi_crcp_bkmrk_pml_traffic_message_ref_t *posted_unknown_precv_msg_ref = NULL; - /* Number of messages left not-matched */ - int num_left_unresolved = 0; - /* Number of active messages need to be drained */ - int num_still_active = 0; - /* Number of drain messages posted */ - int num_posted = 0; - - *num_resolved = 0; - num_left_unresolved = p_num_sent; - - OPAL_OUTPUT_VERBOSE((10, mca_crcp_bkmrk_component.super.output_handle, - "crcp:bkmrk: %s <-- %s " - "Stage 0: Ck.Drain: [TR %3d/MS %3d] sent %4d, unres %4d, res %4d", - OMPI_NAME_PRINT(OMPI_PROC_MY_NAME), - OMPI_NAME_PRINT(&(peer_ref->proc_name)), - peer_ref->total_msgs_recvd, - peer_ref->matched_msgs_sent, - p_num_sent, - num_left_unresolved, - *num_resolved)); - TRAFFIC_MSG_DUMP_PEER(15, (peer_ref, "Recv Check...", true)); - - /* - * Find all references to this message signature. - */ - ret = traffic_message_find_recv(peer_ref, /* Peer to resolve with */ - rank, comm_id, tag, count, datatype_size, /* Message signature */ - &posted_recv_msg_ref, /* One of 5 lists where this signature could match */ - &posted_irecv_msg_ref, - &posted_precv_msg_ref, - &posted_unknown_recv_msg_ref, - &posted_unknown_precv_msg_ref); - if( OMPI_SUCCESS != ret) { - opal_output(mca_crcp_bkmrk_component.super.output_handle, - "crcp:bkmrk: recv_msg_detail_check: %s -- %s " - "Failed to determine if we have received this message. Return %d\n", - OMPI_NAME_PRINT(OMPI_PROC_MY_NAME), - OMPI_NAME_PRINT(&(peer_ref->proc_name)), - ret); - exit_status = ret; - goto cleanup; - } - - /* - * Peer sent 'p_num_sent'. - * For each msg_ref from recv lists: - * Mark all as 'matched' - * Subtract recv->{'active' + 'done'} from 'p_num_sent' - * If recv->active - * need to make sure to drain these and possibly stall - * If 'p_num_sent' > 0 - * Post outstanding messages in drain queue - */ - - /* - * First pass: Count all 'done' - */ - if( NULL != posted_recv_msg_ref ) { - posted_recv_msg_ref->matched += posted_recv_msg_ref->done; - num_left_unresolved -= posted_recv_msg_ref->done; - TRAFFIC_MSG_DUMP_MSG_INDV(11, (posted_recv_msg_ref, "Ck. Recv", true)); - } - if( NULL != posted_irecv_msg_ref ) { - posted_irecv_msg_ref->matched += posted_irecv_msg_ref->done; - num_left_unresolved -= posted_irecv_msg_ref->done; - TRAFFIC_MSG_DUMP_MSG_INDV(11, (posted_irecv_msg_ref, "Ck. iRecv", true)); - } - if( NULL != posted_precv_msg_ref ) { - posted_precv_msg_ref->matched += posted_precv_msg_ref->done; - num_left_unresolved -= posted_precv_msg_ref->done; - TRAFFIC_MSG_DUMP_MSG_INDV(11, (posted_precv_msg_ref, "Ck. pRecv", true)); - } - if( NULL != posted_unknown_recv_msg_ref ) { - posted_unknown_recv_msg_ref->matched += posted_unknown_recv_msg_ref->done; - num_left_unresolved -= posted_unknown_recv_msg_ref->done; - TRAFFIC_MSG_DUMP_MSG_INDV(11, (posted_unknown_recv_msg_ref, "Ck. uRecv", true)); - } - if( NULL != posted_unknown_precv_msg_ref ) { - posted_unknown_precv_msg_ref->matched += posted_unknown_precv_msg_ref->done; - num_left_unresolved -= posted_unknown_precv_msg_ref->done; - TRAFFIC_MSG_DUMP_MSG_INDV(11, (posted_unknown_precv_msg_ref, "Ck. upRecv", true)); - } - - OPAL_OUTPUT_VERBOSE((10, mca_crcp_bkmrk_component.super.output_handle, - "crcp:bkmrk: %s <-- %s " - "Stage 1: Ck.Drain: [TR %3d/MS %3d] sent %4d, unres %4d, res %4d", - OMPI_NAME_PRINT(OMPI_PROC_MY_NAME), - OMPI_NAME_PRINT(&(peer_ref->proc_name)), - peer_ref->total_msgs_recvd, - peer_ref->matched_msgs_sent, - p_num_sent, - num_left_unresolved, - *num_resolved)); - - /* Short cut if we have completed everything necessary - * This should never happen since we are here because there is a message - * that was sent that has not been started. - */ - if( num_left_unresolved <= 0 ) { - goto cleanup; - } - - /* - * Next pass: Count all 'active' - * - if active > unresolved then match all unresolved, and jump to end - * - if active < unresolved then match all active, and continue looking - */ - if( NULL != posted_recv_msg_ref ) { - if( posted_recv_msg_ref->active > num_left_unresolved ) { - posted_recv_msg_ref->matched += num_left_unresolved; - num_still_active += num_left_unresolved; - num_left_unresolved = 0; - } else { - posted_recv_msg_ref->matched += posted_recv_msg_ref->active; - num_still_active += posted_recv_msg_ref->active; - num_left_unresolved -= posted_recv_msg_ref->active; - } - } - if( num_left_unresolved > 0 && NULL != posted_irecv_msg_ref ) { - if( posted_irecv_msg_ref->active > num_left_unresolved ) { - posted_irecv_msg_ref->matched += num_left_unresolved; - num_still_active += num_left_unresolved; - num_left_unresolved = 0; - } else { - posted_irecv_msg_ref->matched += posted_irecv_msg_ref->active; - num_still_active += posted_irecv_msg_ref->active; - num_left_unresolved -= posted_irecv_msg_ref->active; - } - } - if( num_left_unresolved > 0 && NULL != posted_precv_msg_ref ) { - if( posted_precv_msg_ref->active > num_left_unresolved ) { - posted_precv_msg_ref->matched += num_left_unresolved; - num_still_active += num_left_unresolved; - num_left_unresolved = 0; - } else { - posted_precv_msg_ref->matched += posted_precv_msg_ref->active; - num_still_active += posted_precv_msg_ref->active; - num_left_unresolved -= posted_precv_msg_ref->active; - } - } - if( num_left_unresolved > 0 && NULL != posted_unknown_recv_msg_ref ) { - if( posted_unknown_recv_msg_ref->active > num_left_unresolved ) { - posted_unknown_recv_msg_ref->matched += num_left_unresolved; - num_still_active += num_left_unresolved; - num_left_unresolved = 0; - } else { - posted_unknown_recv_msg_ref->matched += posted_unknown_recv_msg_ref->active; - num_still_active += posted_unknown_recv_msg_ref->active; - num_left_unresolved -= posted_unknown_recv_msg_ref->active; - } - } - if( num_left_unresolved > 0 && NULL != posted_unknown_precv_msg_ref ) { - if( posted_unknown_precv_msg_ref->active > num_left_unresolved ) { - posted_unknown_precv_msg_ref->matched += num_left_unresolved; - num_still_active += num_left_unresolved; - num_left_unresolved = 0; - } else { - posted_unknown_precv_msg_ref->matched += posted_unknown_precv_msg_ref->active; - num_still_active += posted_unknown_precv_msg_ref->active; - num_left_unresolved -= posted_unknown_precv_msg_ref->active; - } - } - - /* - * If we happen to have more active Recvs than the peer has posted sends, then - * we need to reset the number still active to reflect that only a subset - * of the active sends should be drained. - */ - OPAL_OUTPUT_VERBOSE((10, mca_crcp_bkmrk_component.super.output_handle, - "crcp:bkmrk: %s <-- %s " - "Stage 2: Ck.Drain: [TR %3d/MS %3d] sent %4d, unres %4d, res %4d, active %4d", - OMPI_NAME_PRINT(OMPI_PROC_MY_NAME), - OMPI_NAME_PRINT(&(peer_ref->proc_name)), - peer_ref->total_msgs_recvd, - peer_ref->matched_msgs_sent, - p_num_sent, - num_left_unresolved, - *num_resolved, - num_still_active - )); - - /* - * Check the math at this point, and make sure we did not mess up above. - */ - if(num_left_unresolved < 0 ) { - ERROR_SHOULD_NEVER_HAPPEN_ARG("crcp:bkmrk: Ck.Drain: Unresolved (%3d) < 0", num_left_unresolved); - exit_status = OMPI_ERROR; - goto cleanup; - } - - /* - * Fast Track: If there are no outstanding messages to post, and nothing 'active' - * If all the matched messages were found 'done' (none were 'active') - * -> Nothing to do. - */ - if( num_left_unresolved <= 0 && - num_still_active <= 0) { - goto cleanup; - } - - /* - * Stage 3: Resolve 'active' messages by posting a drain message for each - * -> then we need to make sure to wait for them to complete before the checkpoint - * -> Create a drain message - * -> Point the 'request' at it - * -> Make sure not to post this message to be drained, but just wait on the request. - */ - if( num_still_active > 0 ) { - /* - * If this is the current blocking recv, then we need to stall for it to - * complete properly. - * - Only applies to Blocking Recv. - */ - if( NULL != posted_recv_msg_ref ) { - /* Is this the signature of the current blocking recv? */ - if (current_msg_id == posted_recv_msg_ref->msg_id && - COORD_MSG_TYPE_B_RECV == posted_recv_msg_ref->msg_type) { - OPAL_OUTPUT_VERBOSE((10, mca_crcp_bkmrk_component.super.output_handle, - "crcp:bkmrk: %s <-- %s " - "Recv Check: Found a message that is 'active'! Prepare to STALL.\n", - OMPI_NAME_PRINT(OMPI_PROC_MY_NAME), - OMPI_NAME_PRINT(&(peer_ref->proc_name)) )); - stall_for_completion = true; - } - else { - OPAL_OUTPUT_VERBOSE((10, mca_crcp_bkmrk_component.super.output_handle, - "crcp:bkmrk: %s <-- %s " - "Recv Check: Found a message that is 'active', but is not the current recv! " - "No stall required [%3d, %3d, %3d, %3d].\n", - OMPI_NAME_PRINT(OMPI_PROC_MY_NAME), - OMPI_NAME_PRINT(&(peer_ref->proc_name)), - (int)current_msg_id, - (int)current_msg_type, - (int)posted_recv_msg_ref->msg_id, - (int)posted_recv_msg_ref->msg_type)); - } - } - - /* - * Construct a message for draining for each active message. - * This message will *not* be posted for draining since it is already - * posted in the system. We will simply wait for it to complete. - * - Only applies to messages that are not Blocking Recv - */ - traffic_message_create_drain_message(false, num_still_active, - peer_ref, - &posted_recv_msg_ref, - &num_posted); - num_still_active -= num_posted; - *num_resolved += num_posted; - peer_ref->total_msgs_recvd += num_posted; - - traffic_message_create_drain_message(false, num_still_active, - peer_ref, - &posted_irecv_msg_ref, - &num_posted); - num_still_active -= num_posted; - *num_resolved += num_posted; - peer_ref->total_msgs_recvd += num_posted; - - traffic_message_create_drain_message(false, num_still_active, - peer_ref, - &posted_precv_msg_ref, - &num_posted); - num_still_active -= num_posted; - *num_resolved += num_posted; - peer_ref->total_msgs_recvd += num_posted; - - traffic_message_create_drain_message(false, num_still_active, - peer_ref, - &posted_unknown_recv_msg_ref, - &num_posted); - num_still_active -= num_posted; - *num_resolved += num_posted; - peer_ref->total_msgs_recvd += num_posted; - - traffic_message_create_drain_message(false, num_still_active, - peer_ref, - &posted_unknown_precv_msg_ref, - &num_posted); - num_still_active -= num_posted; - *num_resolved += num_posted; - peer_ref->total_msgs_recvd += num_posted; - } - - OPAL_OUTPUT_VERBOSE((10, mca_crcp_bkmrk_component.super.output_handle, - "crcp:bkmrk: %s <-- %s " - "Stage 3: Ck.Drain: [TR %3d/MS %3d] sent %4d, unres %4d, res %4d, active %4d", - OMPI_NAME_PRINT(OMPI_PROC_MY_NAME), - OMPI_NAME_PRINT(&(peer_ref->proc_name)), - peer_ref->total_msgs_recvd, - peer_ref->matched_msgs_sent, - p_num_sent, - num_left_unresolved, - *num_resolved, - num_still_active - )); - - /* - * Post all unresolved messages to the drain queue - * - Create a new message to drain - * - Notify peer of resolution of N messages - */ - if( num_left_unresolved > 0 ) { - /* Create a stamp for the drained message */ - CREATE_NEW_MSG(posted_tmp_msg_ref, COORD_MSG_TYPE_I_RECV, - count, datatype_size, tag, rank, - ompi_comm_lookup(comm_id), - peer_ref->proc_name.jobid, - peer_ref->proc_name.vpid); - - traffic_message_create_drain_message(true, num_left_unresolved, - peer_ref, - &posted_tmp_msg_ref, - &num_posted); - num_left_unresolved -= num_posted; - *num_resolved += num_posted; - peer_ref->total_msgs_recvd += num_posted; - - HOKE_TRAFFIC_MSG_REF_RETURN(posted_tmp_msg_ref); - } - - OPAL_OUTPUT_VERBOSE((10, mca_crcp_bkmrk_component.super.output_handle, - "crcp:bkmrk: %s <-- %s " - "Stage 4: Ck.Drain: [TR %3d/MS %3d] sent %4d, unres %4d, res %4d, active %4d", - OMPI_NAME_PRINT(OMPI_PROC_MY_NAME), - OMPI_NAME_PRINT(&(peer_ref->proc_name)), - peer_ref->total_msgs_recvd, - peer_ref->matched_msgs_sent, - p_num_sent, - num_left_unresolved, - *num_resolved, - num_still_active - )); - - /* YYY JJH YYY Should we check for no-action? */ - cleanup: - return exit_status; -} - -static int do_recv_msg_detail_resp(ompi_crcp_bkmrk_pml_peer_ref_t *peer_ref, - int resp, int num_resolv, int total_found) -{ - opal_buffer_t * buffer = NULL; - int exit_status = OMPI_SUCCESS; - int ret; - - if (NULL == (buffer = OBJ_NEW(opal_buffer_t))) { - exit_status = OMPI_ERROR; - goto cleanup; - } - - PACK_BUFFER(buffer, resp, 1, OPAL_UINT32, - "crcp:bkmrk: recv_msg_details: Unable to ask peer for more messages"); - PACK_BUFFER(buffer, num_resolv, 1, OPAL_UINT32, - "crcp:bkmrk: recv_msg_details: Unable to ask peer for more messages"); - PACK_BUFFER(buffer, total_found, 1, OPAL_UINT32, - "crcp:bkmrk: recv_msg_details: Unable to ask peer for more messages"); - - if (ORTE_SUCCESS != (ret = ompi_rte_send_buffer_nb(&peer_ref->proc_name, buffer, - OMPI_CRCP_COORD_BOOKMARK_TAG, - orte_rml_send_callback, NULL))) { - opal_output(mca_crcp_bkmrk_component.super.output_handle, - "crcp:bkmrk: recv_msg_detail_resp: Unable to send message detail response to peer %s: Return %d\n", - OMPI_NAME_PRINT(&peer_ref->proc_name), - ret); - exit_status = OMPI_ERROR; - goto cleanup; - } - - cleanup: - if( NULL != buffer) { - OBJ_RELEASE(buffer); - buffer = NULL; - } - - return exit_status; -} - - -/************************************************ - * Timer Utility Functions - ************************************************/ -static void start_time(int idx) { - if(idx < CRCP_TIMER_MAX ) { - timer_start[idx] = get_time(); - } -} - -static void end_time(int idx) { - if(idx < CRCP_TIMER_MAX ) { - timer_end[idx] = get_time(); - } -} - -static double get_time() { - double wtime; - -#if OPAL_TIMER_USEC_NATIVE - wtime = (double)opal_timer_base_get_usec() / 1000000.0; -#else - struct timeval tv; - gettimeofday(&tv, NULL); - wtime = tv.tv_sec; - wtime += (double)tv.tv_usec / 1000000.0; -#endif - - return wtime; -} - -static void clear_timers(void) { - int i; - for(i = 0; i < CRCP_TIMER_MAX; ++i) { - timer_start[i] = 0.0; - timer_end[i] = 0.0; - } -} - -static void display_all_timers(int state) { - bool report_ready = false; - double barrier_start, barrier_stop; - int i; - - if( 0 != OMPI_PROC_MY_NAME->vpid ) { - if( 2 > timing_enabled ) { - return; - } - else if( 2 == timing_enabled ) { - opal_pmix.fence(NULL, 0); - return; - } - } - - for( i = 0; i < CRCP_TIMER_MAX; ++i) { - if(timer_end[i] > 0.001) { - report_ready = true; - } - } - if( !report_ready ) { - return; - } - - opal_output(0, "crcp:bkmrk: timing(%20s): ******************** Begin: [State = %12s]\n", "Summary", opal_crs_base_state_str(state)); - for( i = 0; i < CRCP_TIMER_MAX; ++i) { - display_indv_timer_core(i, 0, 0, false); - } - - if( timing_enabled >= 2) { - barrier_start = get_time(); - opal_pmix.fence(NULL, 0); - barrier_stop = get_time(); - opal_output(0, - "crcp:bkmrk: timing(%20s): %20s = %10.2f s\n", - "", - "Group Barrier", - (barrier_stop - barrier_start)); - } - - opal_output(0, "crcp:bkmrk: timing(%20s): ******************** End: [State = %12s]\n", "Summary", opal_crs_base_state_str(state)); - -} - -static void display_indv_timer(int idx, int proc, int msgs) { - display_indv_timer_core(idx, proc, msgs, true); -} - -static void display_indv_timer_core(int idx, int proc, int msgs, bool direct) { - double diff = timer_end[idx] - timer_start[idx]; - char * str = NULL; - - if( 0 != OMPI_PROC_MY_NAME->vpid && timing_enabled < 3 ) { - return; - } - - /* Only display the timer if an end value was set */ - if(timer_end[idx] <= 0.001) { - return; - } - - switch(idx) { - case CRCP_TIMER_CKPT_EX_PEER_S: - case CRCP_TIMER_CKPT_EX_PEER_R: - case CRCP_TIMER_CKPT_CHECK_PEER_S: - case CRCP_TIMER_CKPT_CHECK_PEER_R: - /* These timers do not mean anything in the aggregate, so only display - * them when directly asked for */ - if( direct && timing_enabled >= 2) { - asprintf(&str, "Proc %2d, Msg %5d", proc, msgs); - } else { - return; - } - break; - default: - str = strdup(""); - break; - } - - opal_output(0, - "crcp:bkmrk: timing(%20s): %20s = %10.2f s\n", - str, - timer_label[idx], - diff); - free(str); - str = NULL; -} - -/**************** Message Dump functionality ********************/ -#if OPAL_ENABLE_DEBUG -static void traffic_message_dump_msg_content_indv(ompi_crcp_bkmrk_pml_message_content_ref_t * content_ref) -{ - OPAL_OUTPUT_VERBOSE((10, mca_crcp_bkmrk_component.super.output_handle, - "\t\t(%3d) Content: [A/D/P/Dr] [%s / %s / %s /%s]", - (int)content_ref->msg_id, - (content_ref->active ? "T" : "F"), - (content_ref->done ? "T" : "F"), - (content_ref->already_posted ? "T" : "F"), - (content_ref->already_drained ? "T" : "F"))); -} - -static void traffic_message_dump_msg_indv(ompi_crcp_bkmrk_pml_traffic_message_ref_t * msg_ref, char * msg, bool vshort) -{ - ompi_crcp_bkmrk_pml_message_content_ref_t *content_ref = NULL; - opal_list_item_t* cont_item = NULL; - char * type_name = NULL; - - switch(msg_ref->msg_type) { - case COORD_MSG_TYPE_B_SEND: - type_name = strdup(" Send"); - break; - case COORD_MSG_TYPE_I_SEND: - type_name = strdup("iSend"); - break; - case COORD_MSG_TYPE_P_SEND: - type_name = strdup("pSend"); - break; - case COORD_MSG_TYPE_B_RECV: - type_name = strdup(" Recv"); - break; - case COORD_MSG_TYPE_I_RECV: - type_name = strdup("iRecv"); - break; - case COORD_MSG_TYPE_P_RECV: - type_name = strdup("pRecv"); - break; - default: - type_name = strdup("Unknown"); - break; - } - - if( !vshort ) { - opal_output(0, "\t%s %10s (%3d): [m %3d/d %3d/a %3d/ad %3d/p %3d] Contents %2d ... count %6d, tag %6d, rank %3d", - type_name, - msg, - (int)msg_ref->msg_id, - msg_ref->matched, - msg_ref->done, - msg_ref->active, - msg_ref->active_drain, - msg_ref->posted, - (int)opal_list_get_size(&msg_ref->msg_contents), - (int)msg_ref->count, - msg_ref->tag, - msg_ref->rank); - } else { - opal_output(0, "\t%s %10s (%3d): [m %3d/d %3d/a %3d/ad %3d/p %3d] Contents %2d ... count %6d", - type_name, - msg, - (int)msg_ref->msg_id, - msg_ref->matched, - msg_ref->done, - msg_ref->active, - msg_ref->active_drain, - msg_ref->posted, - (int)opal_list_get_size(&msg_ref->msg_contents), - (int)msg_ref->count); - } - - free(type_name); - - for(cont_item = opal_list_get_first(&(msg_ref->msg_contents)); - cont_item != opal_list_get_end( &(msg_ref->msg_contents)); - cont_item = opal_list_get_next(cont_item) ) { - content_ref = (ompi_crcp_bkmrk_pml_message_content_ref_t*)cont_item; - - traffic_message_dump_msg_content_indv(content_ref); - } -} - -static void traffic_message_dump_drain_msg_indv(ompi_crcp_bkmrk_pml_drain_message_ref_t * msg_ref, char * msg, bool vshort) -{ - ompi_crcp_bkmrk_pml_message_content_ref_t *content_ref = NULL; - opal_list_item_t* cont_item = NULL; - char * type_name = NULL; - - switch(msg_ref->msg_type) { - case COORD_MSG_TYPE_B_SEND: - type_name = strdup(" Send"); - break; - case COORD_MSG_TYPE_I_SEND: - type_name = strdup("iSend"); - break; - case COORD_MSG_TYPE_P_SEND: - type_name = strdup("pSend"); - break; - case COORD_MSG_TYPE_B_RECV: - type_name = strdup(" Recv"); - break; - case COORD_MSG_TYPE_I_RECV: - type_name = strdup("iRecv"); - break; - case COORD_MSG_TYPE_P_RECV: - type_name = strdup("pRecv"); - break; - default: - type_name = strdup("Unknown"); - break; - } - - if( !vshort ) { - opal_output(0, "\t%s %10s (%3d): [d %3d/a %3d] Contents %2d ... count %6d, tag %6d, rank %3d", - type_name, - msg, - (int)msg_ref->msg_id, - msg_ref->done, - msg_ref->active, - (int)opal_list_get_size(&msg_ref->msg_contents), - (int)msg_ref->count, - msg_ref->tag, - msg_ref->rank); - } else { - opal_output(0, "\t%s %10s (%3d): [d %3d/a %3d] Contents %2d ... count %6d", - type_name, - msg, - (int)msg_ref->msg_id, - msg_ref->done, - msg_ref->active, - (int)opal_list_get_size(&msg_ref->msg_contents), - (int)msg_ref->count); - } - - free(type_name); - - for(cont_item = opal_list_get_first(&(msg_ref->msg_contents)); - cont_item != opal_list_get_end( &(msg_ref->msg_contents)); - cont_item = opal_list_get_next(cont_item) ) { - content_ref = (ompi_crcp_bkmrk_pml_message_content_ref_t*)cont_item; - - traffic_message_dump_msg_content_indv(content_ref); - } -} - -static void traffic_message_dump_msg_list(opal_list_t *msg_list, bool is_drain) -{ - opal_list_item_t* item = NULL; - ompi_crcp_bkmrk_pml_traffic_message_ref_t * msg_ref = NULL; - ompi_crcp_bkmrk_pml_drain_message_ref_t * drain_msg_ref = NULL; - - for(item = opal_list_get_last(msg_list); - item != opal_list_get_begin(msg_list); - item = opal_list_get_prev(item) ) { - if( !is_drain ) { - msg_ref = (ompi_crcp_bkmrk_pml_traffic_message_ref_t*)item; - traffic_message_dump_msg_indv(msg_ref, "", false); - } else { - drain_msg_ref = (ompi_crcp_bkmrk_pml_drain_message_ref_t*)item; - traffic_message_dump_drain_msg_indv(drain_msg_ref, "Drain", false); - } - } -} - -static void traffic_message_dump_peer(ompi_crcp_bkmrk_pml_peer_ref_t *peer_ref, char * msg, bool root_only) -{ - if( root_only && ompi_process_info.my_name.vpid != 0 ) { - return; - } else { - sleep(ompi_process_info.my_name.vpid * 2); - } - - opal_output(0, "------------- %s ---------------------------------", msg); - opal_output(0, "%s <-> %s Totals Sent [ %3d / %3d ] Recv [ %3d / %3d ]", - OMPI_NAME_PRINT(OMPI_PROC_MY_NAME), - OMPI_NAME_PRINT(&(peer_ref->proc_name)), - peer_ref->total_msgs_sent, - peer_ref->matched_msgs_sent, - peer_ref->total_msgs_recvd, - peer_ref->matched_msgs_recvd); - opal_output(0, "\n"); - - traffic_message_dump_msg_list(&(peer_ref->send_list), false); - traffic_message_dump_msg_list(&(peer_ref->isend_list), false); - traffic_message_dump_msg_list(&(peer_ref->send_init_list), false); - - traffic_message_dump_msg_list(&(peer_ref->recv_list), false); - traffic_message_dump_msg_list(&(peer_ref->irecv_list), false); - traffic_message_dump_msg_list(&(peer_ref->recv_init_list), false); - - traffic_message_dump_msg_list(&(peer_ref->drained_list), true); - - opal_output(0, "--------------------------------------------------"); - usleep(250000); -} -#endif diff --git a/ompi/mca/crcp/bkmrk/crcp_bkmrk_pml.h b/ompi/mca/crcp/bkmrk/crcp_bkmrk_pml.h deleted file mode 100644 index 4d3304416b6..00000000000 --- a/ompi/mca/crcp/bkmrk/crcp_bkmrk_pml.h +++ /dev/null @@ -1,457 +0,0 @@ -/* - * Copyright (c) 2004-2010 The Trustees of Indiana University. - * All rights reserved. - * Copyright (c) 2004-2005 The Trustees of the University of Tennessee. - * All rights reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * Copyright (c) 2011 Oak Ridge National Labs. All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -/** - * @file - * - * Hoke CRCP component - * - */ - -#ifndef MCA_CRCP_HOKE_PML_EXPORT_H -#define MCA_CRCP_HOKE_PML_EXPORT_H - -#include "ompi_config.h" - -#include "ompi/mca/mca.h" -#include "ompi/mca/crcp/crcp.h" -#include "ompi/communicator/communicator.h" - -#include "ompi/mca/crcp/bkmrk/crcp_bkmrk.h" - -BEGIN_C_DECLS - - /* - * PML Coordination functions - */ - ompi_crcp_base_pml_state_t* ompi_crcp_bkmrk_pml_enable - ( bool enable, ompi_crcp_base_pml_state_t* pml_state ); - - ompi_crcp_base_pml_state_t* ompi_crcp_bkmrk_pml_add_comm - ( struct ompi_communicator_t* comm, - ompi_crcp_base_pml_state_t* pml_state ); - ompi_crcp_base_pml_state_t* ompi_crcp_bkmrk_pml_del_comm - ( struct ompi_communicator_t* comm, - ompi_crcp_base_pml_state_t* pml_state ); - - ompi_crcp_base_pml_state_t* ompi_crcp_bkmrk_pml_add_procs - ( struct ompi_proc_t **procs, size_t nprocs, - ompi_crcp_base_pml_state_t* pml_state ); - ompi_crcp_base_pml_state_t* ompi_crcp_bkmrk_pml_del_procs - ( struct ompi_proc_t **procs, size_t nprocs, - ompi_crcp_base_pml_state_t* pml_state ); - - ompi_crcp_base_pml_state_t* ompi_crcp_bkmrk_pml_progress - (ompi_crcp_base_pml_state_t* pml_state); - - ompi_crcp_base_pml_state_t* ompi_crcp_bkmrk_pml_iprobe - (int dst, int tag, struct ompi_communicator_t* comm, - int *matched, ompi_status_public_t* status, - ompi_crcp_base_pml_state_t* pml_state ); - - ompi_crcp_base_pml_state_t* ompi_crcp_bkmrk_pml_probe - ( int dst, int tag, struct ompi_communicator_t* comm, - ompi_status_public_t* status, - ompi_crcp_base_pml_state_t* pml_state ); - - ompi_crcp_base_pml_state_t* ompi_crcp_bkmrk_pml_isend_init - ( void *buf, size_t count, ompi_datatype_t *datatype, - int dst, int tag, mca_pml_base_send_mode_t mode, - struct ompi_communicator_t* comm, - struct ompi_request_t **request, - ompi_crcp_base_pml_state_t* pml_state ); - - ompi_crcp_base_pml_state_t* ompi_crcp_bkmrk_pml_isend - ( void *buf, size_t count, ompi_datatype_t *datatype, - int dst, int tag, mca_pml_base_send_mode_t mode, - struct ompi_communicator_t* comm, - struct ompi_request_t **request, - ompi_crcp_base_pml_state_t* pml_state ); - - ompi_crcp_base_pml_state_t* ompi_crcp_bkmrk_pml_send - ( void *buf, size_t count, ompi_datatype_t *datatype, - int dst, int tag, mca_pml_base_send_mode_t mode, - struct ompi_communicator_t* comm, - ompi_crcp_base_pml_state_t* pml_state ); - - ompi_crcp_base_pml_state_t* ompi_crcp_bkmrk_pml_irecv_init - ( void *buf, size_t count, ompi_datatype_t *datatype, - int src, int tag, struct ompi_communicator_t* comm, - struct ompi_request_t **request, - ompi_crcp_base_pml_state_t* pml_state); - - ompi_crcp_base_pml_state_t* ompi_crcp_bkmrk_pml_irecv - ( void *buf, size_t count, ompi_datatype_t *datatype, - int src, int tag, struct ompi_communicator_t* comm, - struct ompi_request_t **request, - ompi_crcp_base_pml_state_t* pml_state ); - - ompi_crcp_base_pml_state_t* ompi_crcp_bkmrk_pml_recv - ( void *buf, size_t count, ompi_datatype_t *datatype, - int src, int tag, struct ompi_communicator_t* comm, - ompi_status_public_t* status, - ompi_crcp_base_pml_state_t* pml_state); - - ompi_crcp_base_pml_state_t* ompi_crcp_bkmrk_pml_dump - ( struct ompi_communicator_t* comm, int verbose, - ompi_crcp_base_pml_state_t* pml_state ); - - ompi_crcp_base_pml_state_t* ompi_crcp_bkmrk_pml_start - ( size_t count, ompi_request_t** requests, - ompi_crcp_base_pml_state_t* pml_state ); - - ompi_crcp_base_pml_state_t* ompi_crcp_bkmrk_pml_ft_event - (int state, ompi_crcp_base_pml_state_t* pml_state); - - enum ompi_crcp_bkmrk_pml_quiesce_tag_type_t { - QUIESCE_TAG_NONE = 0, /* 0 No tag specified */ - QUIESCE_TAG_CKPT, /* 1 Prepare for checkpoint */ - QUIESCE_TAG_CONTINUE, /* 2 Continue after a checkpoint */ - QUIESCE_TAG_RESTART, /* 3 Restart from a checkpoint */ - QUIESCE_TAG_UNKNOWN /* 4 Unknown */ - }; - typedef enum ompi_crcp_bkmrk_pml_quiesce_tag_type_t ompi_crcp_bkmrk_pml_quiesce_tag_type_t; - - int ompi_crcp_bkmrk_pml_quiesce_start(ompi_crcp_bkmrk_pml_quiesce_tag_type_t tag ); - int ompi_crcp_bkmrk_pml_quiesce_end(ompi_crcp_bkmrk_pml_quiesce_tag_type_t tag ); - - /* - * Request function - */ - int ompi_crcp_bkmrk_request_complete(struct ompi_request_t *request); - - /*********************************** - * Globally Defined Structures - ***********************************/ - /* - * Types of Messages - */ - enum ompi_crcp_bkmrk_pml_message_type_t { - COORD_MSG_TYPE_UNKNOWN, /* 0 Unknown type */ - COORD_MSG_TYPE_B_SEND, /* 1 Blocking Send */ - COORD_MSG_TYPE_I_SEND, /* 2 Non-Blocking Send */ - COORD_MSG_TYPE_P_SEND, /* 3 Persistent Send */ - COORD_MSG_TYPE_B_RECV, /* 4 Blocking Recv */ - COORD_MSG_TYPE_I_RECV, /* 5 Non-Blocking Recv */ - COORD_MSG_TYPE_P_RECV /* 6 Persistent Recv */ - }; - typedef enum ompi_crcp_bkmrk_pml_message_type_t ompi_crcp_bkmrk_pml_message_type_t; - - /* - * A list structure to contain {buffer, request, status} sets - * - * send/recv type | Buffer | Request | Status | Active - * ---------------+--------+---------+--------+-------- - * Blocking | No | No | No | No - * Non-Blocking | No | Yes | Yes | No - * Persistent | Yes | Yes | Yes | Yes - * - * No : Does not require this field - * Yes: Does require this field - */ - struct ompi_crcp_bkmrk_pml_message_content_ref_t { - /** This is a list object */ - opal_list_item_t super; - - /** Buffer for data */ - void * buffer; - - /* Request for this message */ - ompi_request_t *request; - - /** Status */ - ompi_status_public_t status; - - /** Active ? */ - bool active; - - /** Done ? - Only useful in Drain*/ - bool done; - - /** Already_posted ? - Only useful in Drain */ - bool already_posted; - - /** Drained */ - bool already_drained; - - /** JJH XXX Debug counter*/ - uint64_t msg_id; - }; - typedef struct ompi_crcp_bkmrk_pml_message_content_ref_t ompi_crcp_bkmrk_pml_message_content_ref_t; - - OBJ_CLASS_DECLARATION(ompi_crcp_bkmrk_pml_message_content_ref_t); - void ompi_crcp_bkmrk_pml_message_content_ref_construct(ompi_crcp_bkmrk_pml_message_content_ref_t *content_ref); - void ompi_crcp_bkmrk_pml_message_content_ref_destruct( ompi_crcp_bkmrk_pml_message_content_ref_t *content_ref); - - /* - * Drain Message Reference - * - The first section of this structure should match - * ompi_crcp_bkmrk_pml_traffic_message_ref_t exactly. - */ - struct ompi_crcp_bkmrk_pml_drain_message_ref_t { - /** This is a list object */ - opal_list_item_t super; - - /** Sequence Number of this message */ - uint64_t msg_id; - - /** Type of message this references */ - ompi_crcp_bkmrk_pml_message_type_t msg_type; - - /** Count for data */ - size_t count; - - /** Datatype */ - struct ompi_datatype_t * datatype; - - /** Quick reference to the size of the datatype */ - size_t ddt_size; - - /** Message Tag */ - int tag; - - /** Peer rank to which it was sent/recv'ed if known */ - int rank; - - /** Communicator pointer */ - ompi_communicator_t* comm; - - /** Message Contents */ - opal_list_t msg_contents; - - /** Peer which we received from */ - ompi_process_name_t proc_name; - - /** - * Count of the number of completed PML messages that match this reference. - */ - int done; - - /** - * Count of the number of active PML messages that match this reference. - */ - int active; - - /** - * Count of the number of posted PML messages that match this reference. - * Used when trying to figure out which messages the drain protocol needs to post, and - * which message have already been posted for it. - */ - int already_posted; - - }; - typedef struct ompi_crcp_bkmrk_pml_drain_message_ref_t ompi_crcp_bkmrk_pml_drain_message_ref_t; - - OBJ_CLASS_DECLARATION(ompi_crcp_bkmrk_pml_drain_message_ref_t); - void ompi_crcp_bkmrk_pml_drain_message_ref_construct(ompi_crcp_bkmrk_pml_drain_message_ref_t *msg_ref); - void ompi_crcp_bkmrk_pml_drain_message_ref_destruct( ompi_crcp_bkmrk_pml_drain_message_ref_t *msg_ref); - - /* - * List of Pending ACKs to drained messages - */ - struct ompi_crcp_bkmrk_pml_drain_message_ack_ref_t { - /** This is a list object */ - opal_list_item_t super; - - /** Complete flag */ - bool complete; - - /** Peer which we received from */ - ompi_process_name_t peer; - }; - typedef struct ompi_crcp_bkmrk_pml_drain_message_ack_ref_t ompi_crcp_bkmrk_pml_drain_message_ack_ref_t; - - OBJ_CLASS_DECLARATION(ompi_crcp_bkmrk_pml_drain_message_ack_ref_t); - void ompi_crcp_bkmrk_pml_drain_message_ack_ref_construct(ompi_crcp_bkmrk_pml_drain_message_ack_ref_t *msg_ack_ref); - void ompi_crcp_bkmrk_pml_drain_message_ack_ref_destruct( ompi_crcp_bkmrk_pml_drain_message_ack_ref_t *msg_ack_ref); - - /* - * Regular Traffic Message Reference - * Tracks message signature {count, datatype_size, tag, comm, peer} - */ - struct ompi_crcp_bkmrk_pml_traffic_message_ref_t { - /** This is a list object */ - opal_list_item_t super; - - /** Sequence Number of this message */ - uint64_t msg_id; - - /** Type of message this references */ - ompi_crcp_bkmrk_pml_message_type_t msg_type; - - /** Count for data */ - size_t count; - - /** Quick reference to the size of the datatype */ - size_t ddt_size; - - /** Message Tag */ - int tag; - - /** Peer rank to which it was sent/recv'ed if known */ - int rank; - - /** Communicator pointer */ - ompi_communicator_t* comm; - - /** Message Contents */ - opal_list_t msg_contents; - - /** Peer which we received from */ - ompi_process_name_t proc_name; - - /* Sample movement of values (mirrored for send): - * Recv() iRecv() irecv_init() start() req_complete() - * * Pre: - * matched = false false false --- --- - * done = false false false --- true - * active = true true false true false - * already_posted = true true true --- --- - * * Post: - * matched = false false false --- --- - * done = true false false false true - * active = false true false true false - * already_posted = true true true --- --- - * * Drain - * already_posted = false -> true when posted irecv - */ - /** Has this message been matched by the peer? - * - Resolved during bookmark exchange - * true = peer confirmed the receipt of this message - * false = unknown if peer has received this message or not - */ - int matched; - - /** Is this message complete WRT PML semantics? - * - Is it not in-flight? - * true = message done on this side (send or receive) - * false = message still in process (sending or receiving) - */ - int done; - - /** Is the message actively being worked on? - * - Known to be in-flight? - * true = Message is !done, and is in the progress cycle - * false = Message is !done and is *not* in the progress cycle ( [send/recv]_init requests) - */ - int active; - - /** How many times a persistent send/recv has been posted, but not activated. - * - */ - int posted; - - /** Actively drained - * These are messages that are active, and being drained. So if we checkpoint while the drain - * list is not empty then we do not try to count these messages more than once. - */ - int active_drain; - }; - typedef struct ompi_crcp_bkmrk_pml_traffic_message_ref_t ompi_crcp_bkmrk_pml_traffic_message_ref_t; - - OBJ_CLASS_DECLARATION(ompi_crcp_bkmrk_pml_traffic_message_ref_t); - void ompi_crcp_bkmrk_pml_traffic_message_ref_construct(ompi_crcp_bkmrk_pml_traffic_message_ref_t *msg_ref); - void ompi_crcp_bkmrk_pml_traffic_message_ref_destruct( ompi_crcp_bkmrk_pml_traffic_message_ref_t *msg_ref); - - /* - * A structure for a single process - * Contains: - * - List of sent messages to this peer - * - List of received message from this peer - * - Message totals - */ - struct ompi_crcp_bkmrk_pml_peer_ref_t { - /** This is a list object */ - opal_list_item_t super; - - /** Name of peer */ - ompi_process_name_t proc_name; - - /** List of messages sent to this peer */ - opal_list_t send_list; /**< pml_send */ - opal_list_t isend_list; /**< pml_isend */ - opal_list_t send_init_list; /**< pml_isend_init */ - - /** List of messages recved from this peer */ - opal_list_t recv_list; /**< pml_recv */ - opal_list_t irecv_list; /**< pml_irecv */ - opal_list_t recv_init_list; /**< pml_irecv_init */ - - /** List of messages drained from this peer */ - opal_list_t drained_list; - - /* - * These are totals over all communicators provided for convenience. - * - * If we are P_n and this structure represent P_m then: - * - total_* = P_n --> P_m - * - matched_* = P_n <-- P_m - * Where P_n --> P_m means: - * the number of messages P_n knows that it has sent/recv to/from P_m - * And P_n --> P_m means: - * the number of messages P_m told us that is has sent/recv to/from P_n - * - * How total* are used: - * Send: - * Before put on the wire: ++total - * Recv: - * Once completed: ++total - */ - /** Total Number of messages sent */ - uint32_t total_msgs_sent; - uint32_t matched_msgs_sent; - - /** Total Number of messages received */ - uint32_t total_msgs_recvd; - uint32_t matched_msgs_recvd; - - /** Total Number of messages drained */ - uint32_t total_drained_msgs; - - /** If peer is expecting an ACK after draining the messages */ - bool ack_required; - }; - typedef struct ompi_crcp_bkmrk_pml_peer_ref_t ompi_crcp_bkmrk_pml_peer_ref_t; - - OBJ_CLASS_DECLARATION(ompi_crcp_bkmrk_pml_peer_ref_t); - void ompi_crcp_bkmrk_pml_peer_ref_construct(ompi_crcp_bkmrk_pml_peer_ref_t *bkm_proc); - void ompi_crcp_bkmrk_pml_peer_ref_destruct( ompi_crcp_bkmrk_pml_peer_ref_t *bkm_proc); - - /* - * Local version of the PML state - */ - struct ompi_crcp_bkmrk_pml_state_t { - ompi_crcp_base_pml_state_t p_super; - ompi_crcp_base_pml_state_t *prev_ptr; - - ompi_crcp_bkmrk_pml_peer_ref_t *peer_ref; - ompi_crcp_bkmrk_pml_traffic_message_ref_t *msg_ref; - }; - typedef struct ompi_crcp_bkmrk_pml_state_t ompi_crcp_bkmrk_pml_state_t; - OBJ_CLASS_DECLARATION(ompi_crcp_bkmrk_pml_state_t); - - /*********************************** - * Globally Defined Variables - ***********************************/ - /* - * List of known peers - */ - extern opal_list_t ompi_crcp_bkmrk_pml_peer_refs; - -END_C_DECLS - -#endif /* MCA_CRCP_HOKE_PML_EXPORT_H */ diff --git a/ompi/mca/crcp/bkmrk/owner.txt b/ompi/mca/crcp/bkmrk/owner.txt deleted file mode 100644 index 8ad5fc38ed2..00000000000 --- a/ompi/mca/crcp/bkmrk/owner.txt +++ /dev/null @@ -1,7 +0,0 @@ -# -# owner/status file -# owner: institution that is responsible for this package -# status: e.g. active, maintenance, unmaintained -# -owner: IU? -status: unmaintained diff --git a/ompi/mca/crcp/crcp.h b/ompi/mca/crcp/crcp.h deleted file mode 100644 index 4e31c664d4b..00000000000 --- a/ompi/mca/crcp/crcp.h +++ /dev/null @@ -1,383 +0,0 @@ -/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ -/* - * Copyright (c) 2004-2010 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2005 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * Copyright (c) 2015 Los Alamos National Security, LLC. All rights - * reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ -/** - * @file - * - * Checkpoint/Restart Coordination Protocol (CRCP) Interface - * - */ - -#ifndef MCA_CRCP_H -#define MCA_CRCP_H - -#include "ompi_config.h" - -#include "opal/class/opal_object.h" -#include "ompi/mca/mca.h" -#include "opal/mca/base/base.h" -#include "opal/mca/crs/crs.h" -#include "opal/mca/crs/base/base.h" -#include "opal/mca/btl/btl.h" -#include "opal/mca/btl/base/base.h" -#include "opal/class/opal_free_list.h" - -#include "ompi/datatype/ompi_datatype.h" -#include "ompi/request/request.h" -#include "ompi/mca/pml/pml.h" -#include "ompi/mca/pml/base/base.h" - - -BEGIN_C_DECLS - -/** - * Module initialization function. - * Returns OMPI_SUCCESS - */ -typedef int (*ompi_crcp_base_module_init_fn_t) - (void); - -/** - * Module finalization function. - * Returns OMPI_SUCCESS - */ -typedef int (*ompi_crcp_base_module_finalize_fn_t) - (void); - - -/************************ - * MPI Quiesce Interface - ************************/ -/** - * MPI_Quiesce_start component interface - */ -typedef int (*ompi_crcp_base_quiesce_start_fn_t) - (MPI_Info *info); - -/** - * MPI_Quiesce_end component interface - */ -typedef int (*ompi_crcp_base_quiesce_end_fn_t) - (MPI_Info *info); - - -/************************ - * PML Wrapper hooks - * PML Wrapper is the CRCPW PML component - ************************/ -/** - * To allow us to work before and after a PML command - */ -enum ompi_crcp_base_pml_states_t { - OMPI_CRCP_PML_PRE, - OMPI_CRCP_PML_POST, - OMPI_CRCP_PML_SKIP, - OMPI_CRCP_PML_DONE -}; -typedef enum ompi_crcp_base_pml_states_t ompi_crcp_base_pml_states_t; - -struct ompi_crcp_base_pml_state_t { - opal_free_list_item_t super; - ompi_crcp_base_pml_states_t state; - int error_code; - mca_pml_base_component_t *wrapped_pml_component; - mca_pml_base_module_t *wrapped_pml_module; -}; -typedef struct ompi_crcp_base_pml_state_t ompi_crcp_base_pml_state_t; -OMPI_DECLSPEC OBJ_CLASS_DECLARATION(ompi_crcp_base_pml_state_t); - -typedef ompi_crcp_base_pml_state_t* (*ompi_crcp_base_pml_enable_fn_t) - (bool enable, ompi_crcp_base_pml_state_t* ); - -typedef ompi_crcp_base_pml_state_t* (*ompi_crcp_base_pml_add_comm_fn_t) - ( struct ompi_communicator_t* comm , ompi_crcp_base_pml_state_t*); -typedef ompi_crcp_base_pml_state_t* (*ompi_crcp_base_pml_del_comm_fn_t) - ( struct ompi_communicator_t* comm , ompi_crcp_base_pml_state_t*); - -typedef ompi_crcp_base_pml_state_t* (*ompi_crcp_base_pml_add_procs_fn_t) - ( struct ompi_proc_t **procs, size_t nprocs , ompi_crcp_base_pml_state_t*); -typedef ompi_crcp_base_pml_state_t* (*ompi_crcp_base_pml_del_procs_fn_t) - ( struct ompi_proc_t **procs, size_t nprocs , ompi_crcp_base_pml_state_t*); - -typedef ompi_crcp_base_pml_state_t* (*ompi_crcp_base_pml_progress_fn_t) - (ompi_crcp_base_pml_state_t*); - -typedef ompi_crcp_base_pml_state_t* (*ompi_crcp_base_pml_iprobe_fn_t) - (int dst, int tag, struct ompi_communicator_t* comm, int *matched, - ompi_status_public_t* status, ompi_crcp_base_pml_state_t* ); - -typedef ompi_crcp_base_pml_state_t* (*ompi_crcp_base_pml_probe_fn_t) - ( int dst, int tag, struct ompi_communicator_t* comm, - ompi_status_public_t* status, ompi_crcp_base_pml_state_t* ); - -typedef ompi_crcp_base_pml_state_t* (*ompi_crcp_base_pml_isend_init_fn_t) - ( void *buf, size_t count, ompi_datatype_t *datatype, int dst, int tag, - mca_pml_base_send_mode_t mode, struct ompi_communicator_t* comm, - struct ompi_request_t **request, ompi_crcp_base_pml_state_t* ); - -typedef ompi_crcp_base_pml_state_t* (*ompi_crcp_base_pml_isend_fn_t) - ( void *buf, size_t count, ompi_datatype_t *datatype, int dst, int tag, - mca_pml_base_send_mode_t mode, struct ompi_communicator_t* comm, - struct ompi_request_t **request, ompi_crcp_base_pml_state_t* ); - -typedef ompi_crcp_base_pml_state_t* (*ompi_crcp_base_pml_send_fn_t) - ( void *buf, size_t count, ompi_datatype_t *datatype, int dst, int tag, - mca_pml_base_send_mode_t mode, struct ompi_communicator_t* comm, - ompi_crcp_base_pml_state_t* ); - -typedef ompi_crcp_base_pml_state_t* (*ompi_crcp_base_pml_irecv_init_fn_t) - ( void *buf, size_t count, ompi_datatype_t *datatype, int src, int tag, - struct ompi_communicator_t* comm, struct ompi_request_t **request, - ompi_crcp_base_pml_state_t*); - -typedef ompi_crcp_base_pml_state_t* (*ompi_crcp_base_pml_irecv_fn_t) - ( void *buf, size_t count, ompi_datatype_t *datatype, int src, int tag, - struct ompi_communicator_t* comm, struct ompi_request_t **request, - ompi_crcp_base_pml_state_t* ); - -typedef ompi_crcp_base_pml_state_t* (*ompi_crcp_base_pml_recv_fn_t) - ( void *buf, size_t count, ompi_datatype_t *datatype, int src, int tag, - struct ompi_communicator_t* comm, ompi_status_public_t* status, - ompi_crcp_base_pml_state_t*); - -typedef ompi_crcp_base_pml_state_t* (*ompi_crcp_base_pml_dump_fn_t) - ( struct ompi_communicator_t* comm, int verbose, ompi_crcp_base_pml_state_t* ); - -typedef ompi_crcp_base_pml_state_t* (*ompi_crcp_base_pml_start_fn_t) - ( size_t count, ompi_request_t** requests, ompi_crcp_base_pml_state_t* ); - -typedef ompi_crcp_base_pml_state_t* (*ompi_crcp_base_pml_ft_event_fn_t) - (int state, ompi_crcp_base_pml_state_t*); - -/* Request Interface */ -typedef int (*ompi_crcp_base_request_complete_fn_t) - (struct ompi_request_t *request); - -/************************ - * BTL Wrapper hooks - * JJH: Wrapper BTL not currently implemented. - ************************/ -/** - * To allow us to work before and after a BTL command - */ -enum ompi_crcp_base_btl_states_t { - OMPI_CRCP_BTL_PRE, - OMPI_CRCP_BTL_POST, - OMPI_CRCP_BTL_SKIP, - OMPI_CRCP_BTL_DONE -}; -typedef enum ompi_crcp_base_btl_states_t ompi_crcp_base_btl_states_t; - -struct ompi_crcp_base_btl_state_t { - opal_free_list_item_t super; - ompi_crcp_base_btl_states_t state; - int error_code; - mca_btl_base_descriptor_t* des; - mca_btl_base_component_t *wrapped_btl_component; - mca_btl_base_module_t *wrapped_btl_module; -}; -typedef struct ompi_crcp_base_btl_state_t ompi_crcp_base_btl_state_t; -OBJ_CLASS_DECLARATION(ompi_crcp_base_btl_state_t); - -typedef ompi_crcp_base_btl_state_t* (*mca_crcp_base_btl_module_add_procs_fn_t) - ( struct mca_btl_base_module_t* btl, - size_t nprocs, - struct ompi_proc_t** procs, - struct mca_btl_base_endpoint_t** endpoints, - struct opal_bitmap_t* reachable, - ompi_crcp_base_btl_state_t* ); - -typedef ompi_crcp_base_btl_state_t* (*mca_crcp_base_btl_module_del_procs_fn_t) - ( struct mca_btl_base_module_t* btl, - size_t nprocs, - struct ompi_proc_t** procs, - struct mca_btl_base_endpoint_t**, - ompi_crcp_base_btl_state_t*); - -typedef ompi_crcp_base_btl_state_t* (*mca_crcp_base_btl_module_register_fn_t) - ( struct mca_btl_base_module_t* btl, - mca_btl_base_tag_t tag, - mca_btl_base_module_recv_cb_fn_t cbfunc, - void* cbdata, - ompi_crcp_base_btl_state_t*); - -typedef ompi_crcp_base_btl_state_t* (*mca_crcp_base_btl_module_finalize_fn_t) - ( struct mca_btl_base_module_t* btl, - ompi_crcp_base_btl_state_t*); - -typedef ompi_crcp_base_btl_state_t* (*mca_crcp_base_btl_module_alloc_fn_t) - ( struct mca_btl_base_module_t* btl, - size_t size, - ompi_crcp_base_btl_state_t*); - -typedef ompi_crcp_base_btl_state_t* (*mca_crcp_base_btl_module_free_fn_t) - ( struct mca_btl_base_module_t* btl, - mca_btl_base_descriptor_t* descriptor, - ompi_crcp_base_btl_state_t*); - -typedef ompi_crcp_base_btl_state_t* (*mca_crcp_base_btl_module_prepare_fn_t) - ( struct mca_btl_base_module_t* btl, - struct mca_btl_base_endpoint_t* endpoint, - mca_mpool_base_registration_t* registration, - struct opal_convertor_t* convertor, - size_t reserve, - size_t* size, - ompi_crcp_base_btl_state_t*); - -typedef ompi_crcp_base_btl_state_t* (*mca_crcp_base_btl_module_send_fn_t) - ( struct mca_btl_base_module_t* btl, - struct mca_btl_base_endpoint_t* endpoint, - struct mca_btl_base_descriptor_t* descriptor, - mca_btl_base_tag_t tag, - ompi_crcp_base_btl_state_t*); - -typedef ompi_crcp_base_btl_state_t* (*mca_crcp_base_btl_module_put_fn_t) - ( struct mca_btl_base_module_t* btl, - struct mca_btl_base_endpoint_t* endpoint, - struct mca_btl_base_descriptor_t* descriptor, - ompi_crcp_base_btl_state_t*); - -typedef ompi_crcp_base_btl_state_t* (*mca_crcp_base_btl_module_get_fn_t) - ( struct mca_btl_base_module_t* btl, - struct mca_btl_base_endpoint_t* endpoint, - struct mca_btl_base_descriptor_t* descriptor, - ompi_crcp_base_btl_state_t*); - -typedef ompi_crcp_base_btl_state_t* (*mca_crcp_base_btl_module_dump_fn_t) - ( struct mca_btl_base_module_t* btl, - struct mca_btl_base_endpoint_t* endpoint, - int verbose, - ompi_crcp_base_btl_state_t*); - -typedef ompi_crcp_base_btl_state_t* (*mca_crcp_base_btl_module_ft_event_fn_t) - (int state, - ompi_crcp_base_btl_state_t*); - - -/** - * Structure for CRCP components. - */ -struct ompi_crcp_base_component_2_0_0_t { - /** MCA base component */ - mca_base_component_t base_version; - /** MCA base data */ - mca_base_component_data_t base_data; - - /** Verbosity Level */ - int verbose; - /** Output Handle for opal_output */ - int output_handle; - /** Default Priority */ - int priority; - -}; -typedef struct ompi_crcp_base_component_2_0_0_t ompi_crcp_base_component_2_0_0_t; -typedef struct ompi_crcp_base_component_2_0_0_t ompi_crcp_base_component_t; - -/** - * Structure for CRCP modules - */ -struct ompi_crcp_base_module_1_0_0_t { - /** Initialization Function */ - ompi_crcp_base_module_init_fn_t crcp_init; - /** Finalization Function */ - ompi_crcp_base_module_finalize_fn_t crcp_finalize; - - /**< MPI_Quiesce Interface Functions ******************/ - ompi_crcp_base_quiesce_start_fn_t quiesce_start; - ompi_crcp_base_quiesce_end_fn_t quiesce_end; - - /**< PML Wrapper Functions ****************************/ - ompi_crcp_base_pml_enable_fn_t pml_enable; - - ompi_crcp_base_pml_add_comm_fn_t pml_add_comm; - ompi_crcp_base_pml_del_comm_fn_t pml_del_comm; - - ompi_crcp_base_pml_add_procs_fn_t pml_add_procs; - ompi_crcp_base_pml_del_procs_fn_t pml_del_procs; - - ompi_crcp_base_pml_progress_fn_t pml_progress; - - ompi_crcp_base_pml_iprobe_fn_t pml_iprobe; - ompi_crcp_base_pml_probe_fn_t pml_probe; - - ompi_crcp_base_pml_isend_init_fn_t pml_isend_init; - ompi_crcp_base_pml_isend_fn_t pml_isend; - ompi_crcp_base_pml_send_fn_t pml_send; - - ompi_crcp_base_pml_irecv_init_fn_t pml_irecv_init; - ompi_crcp_base_pml_irecv_fn_t pml_irecv; - ompi_crcp_base_pml_recv_fn_t pml_recv; - - ompi_crcp_base_pml_dump_fn_t pml_dump; - ompi_crcp_base_pml_start_fn_t pml_start; - - ompi_crcp_base_pml_ft_event_fn_t pml_ft_event; - - /**< Request complete Function ****************************/ - ompi_crcp_base_request_complete_fn_t request_complete; - - /**< BTL Wrapper Functions ****************************/ - mca_crcp_base_btl_module_add_procs_fn_t btl_add_procs; - mca_crcp_base_btl_module_del_procs_fn_t btl_del_procs; - - mca_crcp_base_btl_module_register_fn_t btl_register; - mca_crcp_base_btl_module_finalize_fn_t btl_finalize; - - mca_crcp_base_btl_module_alloc_fn_t btl_alloc; - mca_crcp_base_btl_module_free_fn_t btl_free; - - mca_crcp_base_btl_module_prepare_fn_t btl_prepare_src; - mca_crcp_base_btl_module_prepare_fn_t btl_prepare_dst; - - mca_crcp_base_btl_module_send_fn_t btl_send; - mca_crcp_base_btl_module_put_fn_t btl_put; - mca_crcp_base_btl_module_get_fn_t btl_get; - - mca_crcp_base_btl_module_dump_fn_t btl_dump; - - mca_crcp_base_btl_module_ft_event_fn_t btl_ft_event; -}; -typedef struct ompi_crcp_base_module_1_0_0_t ompi_crcp_base_module_1_0_0_t; -typedef struct ompi_crcp_base_module_1_0_0_t ompi_crcp_base_module_t; - -OMPI_DECLSPEC extern ompi_crcp_base_module_t ompi_crcp; - -/** - * Macro for use in components that are of type CRCP - */ -#define OMPI_CRCP_BASE_VERSION_2_0_0 \ - OMPI_MCA_BASE_VERSION_2_1_0("crcp", 2, 0, 0) - -/** - * Macro to call the CRCP Request Complete function - */ -#if OPAL_ENABLE_FT_CR == 1 -#define OMPI_CRCP_REQUEST_COMPLETE(req) \ - if( NULL != ompi_crcp.request_complete) { \ - ompi_crcp.request_complete(req); \ - } -#else -#define OMPI_CRCP_REQUEST_COMPLETE(req) ; -#endif - -END_C_DECLS - -#endif /* OMPI_CRCP_H */ diff --git a/ompi/mca/crcp/ompi_crcp.7in b/ompi/mca/crcp/ompi_crcp.7in deleted file mode 100644 index 062f96018f2..00000000000 --- a/ompi/mca/crcp/ompi_crcp.7in +++ /dev/null @@ -1,93 +0,0 @@ -.\" -.\" Man page for OMPI's CRCP Functionality -.\" -.\" .TH name section center-footer left-footer center-header -.TH OMPI_CRCP 7 "#OMPI_DATE#" "#PACKAGE_VERSION#" "#PACKAGE_NAME#" -.\" ************************** -.\" Name Section -.\" ************************** -.SH NAME -. -OMPI_CRCP \- Open MPI MCA Checkpoint/Restart Coordination Protocol (CRCP) Framework: -Overview of Open MPI's CRCP framework, and selected modules. #PACKAGE_NAME# #PACKAGE_VERSION# -. -.\" ************************** -.\" Description Section -.\" ************************** -.SH DESCRIPTION -. -.PP -The CRCP Framework is used by Open MPI for the encapsulation of various -Checkpoint/Restart Coordination Protocols (e.g., Coordinated, Uncoordinated, -Message/Communication Induced, ...). -. -.\" ************************** -.\" General Process Requirements Section -.\" ************************** -.SH GENERAL PROCESS REQUIREMENTS -.PP -In order for a process to use the Open MPI CRCP components it must adhear to a -few programmatic requirements. -.PP -First, the program must call \fIMPI_INIT\fR early in its execution. -.PP -The program must call \fIMPI_FINALIZE\fR before termination. -.PP -A user may initiate a checkpoint of a parallel application by using the -ompi-checkpoint(1) and ompi-restart(1) commands. -. -.\" ********************************** -.\" Available Components Section -.\" ********************************** -.SH AVAILABLE COMPONENTS -.PP -Open MPI currently ships with one CRCP component: \fIcoord\fR. -. -.PP -The following MCA parameters apply to all components: -. -.TP 4 -crcp_base_verbose -Set the verbosity level for all components. Default is 0, or silent except on error. -. -.\" Coord Component -.\" ****************** -.SS coord CRCP Component -.PP -The \fIcoord\fR component implements a Coordinated Checkpoint/Restart -Coordination Protocol similar to the one implemented in LAM/MPI. -. -.PP -The \fIcoord\fR component has the following MCA parameters: -. -.TP 4 -crcp_coord_priority -The component's priority to use when selecting the most appropriate component -for a run. -. -.TP 4 -crcp_coord_verbose -Set the verbosity level for this component. Default is 0, or silent except on -error. -. -.\" Special 'none' option -.\" ************************ -.SS none CRCP Component -.PP -The \fInone\fP component simply selects no CRCP component. All of the CRCP -function calls return immediately with OMPI_SUCCESS. -. -.PP -This component is the last component to be selected by default. This means that if -another component is available, and the \fInone\fP component was not explicity -requested then Open MPI will attempt to activate all of the available components -before falling back to this component. -. -.\" ************************** -.\" See Also Section -.\" ************************** -. -.SH SEE ALSO - ompi-checkpoint(1), ompi-restart(1), opal-checkpoint(1), opal-restart(1), -orte_snapc(7), orte_filem(7), opal_crs(7) -. diff --git a/ompi/mca/dpm/Makefile.am b/ompi/mca/dpm/Makefile.am deleted file mode 100644 index 229519504d3..00000000000 --- a/ompi/mca/dpm/Makefile.am +++ /dev/null @@ -1,37 +0,0 @@ -# -# Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana -# University Research and Technology -# Corporation. All rights reserved. -# Copyright (c) 2004-2005 The University of Tennessee and The University -# of Tennessee Research Foundation. All rights -# reserved. -# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, -# University of Stuttgart. All rights reserved. -# Copyright (c) 2004-2005 The Regents of the University of California. -# All rights reserved. -# Copyright (c) 2010 Cisco Systems, Inc. All rights reserved. -# $COPYRIGHT$ -# -# Additional copyrights may follow -# -# $HEADER$ -# - -# main library setup -noinst_LTLIBRARIES = libmca_dpm.la -libmca_dpm_la_SOURCES = - -# local files -headers = dpm.h -libmca_dpm_la_SOURCES += $(headers) - -# Conditionally install the header files -if WANT_INSTALL_HEADERS -ompidir = $(ompiincludedir)/$(subdir) -nobase_ompi_HEADERS = $(headers) -endif - -include base/Makefile.am - -distclean-local: - rm -f base/static-components.h diff --git a/ompi/mca/dpm/base/Makefile.am b/ompi/mca/dpm/base/Makefile.am deleted file mode 100644 index 1fe6b6fa6e6..00000000000 --- a/ompi/mca/dpm/base/Makefile.am +++ /dev/null @@ -1,27 +0,0 @@ -# -# Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana -# University Research and Technology -# Corporation. All rights reserved. -# Copyright (c) 2004-2005 The University of Tennessee and The University -# of Tennessee Research Foundation. All rights -# reserved. -# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, -# University of Stuttgart. All rights reserved. -# Copyright (c) 2004-2005 The Regents of the University of California. -# All rights reserved. -# Copyright (c) 2014 Cisco Systems, Inc. All rights reserved. -# $COPYRIGHT$ -# -# Additional copyrights may follow -# -# $HEADER$ -# - -headers += \ - base/base.h - -libmca_dpm_la_SOURCES += \ - base/dpm_base_frame.c \ - base/dpm_base_select.c \ - base/dpm_base_null_fns.c \ - base/dpm_base_common_fns.c diff --git a/ompi/mca/dpm/base/base.h b/ompi/mca/dpm/base/base.h deleted file mode 100644 index d83a908ed4f..00000000000 --- a/ompi/mca/dpm/base/base.h +++ /dev/null @@ -1,103 +0,0 @@ -/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ -/* - * Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2005 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * Copyright (c) 2012-2013 Los Alamos National Security, LLC. All rights - * reserved. - * Copyright (c) 2013 Intel, Inc. All rights reserved - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ -#ifndef OMPI_MCA_DPM_BASE_H -#define OMPI_MCA_DPM_BASE_H - -#include "ompi_config.h" -#include "ompi/constants.h" - -#if HAVE_TIME_H -#include -#endif -#if HAVE_SYS_TIME_H -#include -#endif - -#include "ompi/mca/dpm/dpm.h" - -/* - * Global functions for MCA overall DPM - */ - -BEGIN_C_DECLS - -struct ompi_dpm_base_disconnect_obj { - ompi_communicator_t *comm; - int size; - struct ompi_request_t **reqs; - int buf; -}; -typedef struct ompi_dpm_base_disconnect_obj ompi_dpm_base_disconnect_obj; - -/** - * Select an available component. - * - * @retval OMPI_SUCCESS Upon Success - * @retval OMPI_NOT_FOUND If no component can be selected - * @retval OMPI_ERROR Upon other failure - * - */ -OMPI_DECLSPEC int ompi_dpm_base_select(void); - -/* Internal support functions */ -OMPI_DECLSPEC char* ompi_dpm_base_dyn_init (void); -OMPI_DECLSPEC int ompi_dpm_base_dyn_finalize (void); -OMPI_DECLSPEC void ompi_dpm_base_mark_dyncomm (ompi_communicator_t *comm); -OMPI_DECLSPEC ompi_dpm_base_disconnect_obj *ompi_dpm_base_disconnect_init ( ompi_communicator_t *comm); -OMPI_DECLSPEC int ompi_dpm_base_disconnect_waitall (int count, ompi_dpm_base_disconnect_obj **objs); - -/* NULL component functions */ -int ompi_dpm_base_null_connect_accept (ompi_communicator_t *comm, int root, - const char *port_string, bool send_first, - ompi_communicator_t **newcomm); -int ompi_dpm_base_null_disconnect(ompi_communicator_t *comm); -int ompi_dpm_base_null_spawn(int count, const char *array_of_commands[], - char **array_of_argv[], - const int array_of_maxprocs[], - const MPI_Info array_of_info[], - const char *port_name); -int ompi_dpm_base_null_dyn_init(void); -int ompi_dpm_base_null_dyn_finalize (void); -void ompi_dpm_base_null_mark_dyncomm (ompi_communicator_t *comm); -int ompi_dpm_base_null_open_port(char *port_name, ompi_rml_tag_t given_tag); -int ompi_dpm_base_null_parse_port(const char *port_name, - char **hnp_uri, char **rml_uri, ompi_rml_tag_t *tag); -int ompi_dpm_base_null_route_to_port(char *rml_uri, ompi_process_name_t *rproc); -int ompi_dpm_base_null_close_port(const char *port_name); -int ompi_dpm_base_null_pconnect(char *port, - struct timeval *timeout, - ompi_dpm_base_paccept_connect_callback_fn_t cbfunc, - void *cbdata); -int ompi_dpm_base_null_paccept(char *port, - ompi_dpm_base_paccept_connect_callback_fn_t cbfunc, - void *cbdata); -void ompi_dpm_base_null_pclose(char *port); - -/* useful globals */ -OMPI_DECLSPEC extern ompi_dpm_base_component_t ompi_dpm_base_selected_component; -OMPI_DECLSPEC extern ompi_dpm_base_module_t ompi_dpm; - -OMPI_DECLSPEC extern mca_base_framework_t ompi_dpm_base_framework; - -END_C_DECLS - -#endif /* OMPI_MCA_DPM_BASE_H */ diff --git a/ompi/mca/dpm/base/dpm_base_common_fns.c b/ompi/mca/dpm/base/dpm_base_common_fns.c deleted file mode 100644 index cac245256ec..00000000000 --- a/ompi/mca/dpm/base/dpm_base_common_fns.c +++ /dev/null @@ -1,289 +0,0 @@ -/* -*- Mode: C; c-basic-offset:4 ; -*- */ -/* - * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2007 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * Copyright (c) 2006-2007 University of Houston. All rights reserved. - * Copyright (c) 2006-2007 Los Alamos National Security, LLC. All rights - * reserved. - * Copyright (c) 2007-2013 Cisco Systems, Inc. All rights reserved. - * - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#include "ompi_config.h" -#include -#include - -#include "ompi/request/request.h" -#include "ompi/mca/dpm/dpm.h" -#include "ompi/mca/pml/pml.h" - -#include "ompi/mca/dpm/base/base.h" - - -char* ompi_dpm_base_dyn_init (void) -{ - char *envvarname=NULL, *port_name=NULL, *tmp, *ptr; - - /* check for appropriate env variable */ - asprintf(&envvarname, "OMPI_PARENT_PORT"); - tmp = getenv(envvarname); - free (envvarname); - if (NULL != tmp) { - /* the value passed to us may have quote marks around it to protect - * the value if passed on the command line. We must remove those - * to have a correct string - */ - if ('"' == tmp[0]) { - /* if the first char is a quote, then so will the last one be */ - tmp[strlen(tmp)-1] = '\0'; - ptr = &tmp[1]; - } else { - ptr = &tmp[0]; - } - port_name = strdup(ptr); - } - - return port_name; -} - -/**********************************************************************/ -/**********************************************************************/ -/**********************************************************************/ -/* this routine runs through the list of communicators - and does the disconnect for all dynamic communicators */ -int ompi_dpm_base_dyn_finalize (void) -{ - int i,j=0, max=0; - ompi_dpm_base_disconnect_obj **objs=NULL; - ompi_communicator_t *comm=NULL; - - if ( 1 size = ompi_comm_remote_size (comm); - } else { - obj->size = ompi_comm_size (comm); - } - - obj->comm = comm; - obj->reqs = (ompi_request_t **) malloc(2*obj->size*sizeof(ompi_request_t *)); - if ( NULL == obj->reqs ) { - printf("Could not allocate request array for disconnect object\n"); - free (obj); - return NULL; - } - - /* initiate all isend_irecvs. We use a dummy buffer stored on - the object, since we are sending zero size messages anyway. */ - for ( i=0; i < obj->size; i++ ) { - ret = MCA_PML_CALL(irecv (&(obj->buf), 0, MPI_INT, i, - OMPI_COMM_BARRIER_TAG, comm, - &(obj->reqs[2*i]))); - - if ( OMPI_SUCCESS != ret ) { - printf("dpm_base_disconnect_init: error %d in irecv to process %d\n", ret, i); - free (obj->reqs); - free (obj); - return NULL; - } - ret = MCA_PML_CALL(isend (&(obj->buf), 0, MPI_INT, i, - OMPI_COMM_BARRIER_TAG, - MCA_PML_BASE_SEND_SYNCHRONOUS, - comm, &(obj->reqs[2*i+1]))); - - if ( OMPI_SUCCESS != ret ) { - printf("dpm_base_disconnect_init: error %d in isend to process %d\n", ret, i); - free (obj->reqs); - free (obj); - return NULL; - } - } - - /* return handle */ - return obj; -} -/**********************************************************************/ -/**********************************************************************/ -/**********************************************************************/ -/* - count how many requests are active - * - generate a request array large enough to hold - all active requests - * - call waitall on the overall request array - * - free the objects - */ -int ompi_dpm_base_disconnect_waitall (int count, ompi_dpm_base_disconnect_obj **objs) -{ - - ompi_request_t **reqs=NULL; - char *treq=NULL; - int totalcount = 0; - int i; - int ret; - - for (i=0; isize; - } - - reqs = (ompi_request_t **) malloc (2*totalcount*sizeof(ompi_request_t *)); - if ( NULL == reqs ) { - printf("ompi_comm_disconnect_waitall: error allocating memory\n"); - return OMPI_ERROR; - } - - /* generate a single, large array of pending requests */ - treq = (char *)reqs; - for (i=0; ireqs, 2*objs[i]->size * sizeof(ompi_request_t *)); - treq += 2*objs[i]->size * sizeof(ompi_request_t *); - } - - /* force all non-blocking all-to-alls to finish */ - ret = ompi_request_wait_all (2*totalcount, reqs, MPI_STATUSES_IGNORE); - - /* Finally, free everything */ - for (i=0; i< count; i++ ) { - if (NULL != objs[i]->reqs ) { - free (objs[i]->reqs ); - free (objs[i]); - } - } - - free (reqs); - - return ret; -} - -/**********************************************************************/ -/**********************************************************************/ -/**********************************************************************/ -/* All we want to do in this function is determine if the number of - * jobids in the local and/or remote group is > 1. This tells us to - * set the disconnect flag. We don't actually care what the true - * number -is-, only that it is > 1 - */ -void ompi_dpm_base_mark_dyncomm (ompi_communicator_t *comm) -{ - int i; - int size, rsize; - bool found=false; - ompi_jobid_t thisjobid; - ompi_group_t *grp=NULL; - ompi_proc_t *proc = NULL; - - /* special case for MPI_COMM_NULL */ - if ( comm == MPI_COMM_NULL ) { - return; - } - - size = ompi_comm_size (comm); - rsize = ompi_comm_remote_size(comm); - - /* loop over all processes in local group and check for - * a different jobid - */ - grp = comm->c_local_group; - proc = ompi_group_peer_lookup(grp,0); - thisjobid = ((ompi_process_name_t*)&proc->super.proc_name)->jobid; - - for (i=1; i< size; i++) { - proc = ompi_group_peer_lookup(grp,i); - if (thisjobid != ((ompi_process_name_t*)&proc->super.proc_name)->jobid) { - /* at least one is different */ - found = true; - goto complete; - } - } - - /* if inter-comm, loop over all processes in remote_group - * and see if any are different from thisjobid - */ - grp = comm->c_remote_group; - for (i=0; i< rsize; i++) { - proc = ompi_group_peer_lookup(grp,i); - if (thisjobid != ((ompi_process_name_t*)&proc->super.proc_name)->jobid) { - /* at least one is different */ - found = true; - break; - } - } - - complete: - /* if a different jobid was found, set the disconnect flag*/ - if (found) { - ompi_comm_num_dyncomm++; - OMPI_COMM_SET_DYNAMIC(comm); - } - - return; -} diff --git a/ompi/mca/dpm/base/dpm_base_frame.c b/ompi/mca/dpm/base/dpm_base_frame.c deleted file mode 100644 index 041987dade7..00000000000 --- a/ompi/mca/dpm/base/dpm_base_frame.c +++ /dev/null @@ -1,68 +0,0 @@ -/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ -/* - * Copyright (c) 2004-2007 The Trustees of Indiana University. - * All rights reserved. - * Copyright (c) 2004-2005 The Trustees of the University of Tennessee. - * All rights reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * Copyright (c) 2013 Intel, Inc. All rights reserved - * Copyright (c) 2013 Los Alamos National Security, LLC. All rights - * reserved. - * Copyright (c) 2015 Research Organization for Information Science - * and Technology (RIST). All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#include "ompi_config.h" - -#include "ompi/mca/mca.h" -#include "opal/util/output.h" -#include "opal/mca/base/base.h" - -#include "ompi/mca/dpm/dpm.h" -#include "ompi/mca/dpm/base/base.h" - -#include "ompi/mca/dpm/base/static-components.h" - -/* - * Globals - */ -OMPI_DECLSPEC ompi_dpm_base_module_t ompi_dpm = { - NULL, - ompi_dpm_base_null_connect_accept, - ompi_dpm_base_null_disconnect, - ompi_dpm_base_null_spawn, - ompi_dpm_base_null_dyn_init, - ompi_dpm_base_null_dyn_finalize, - ompi_dpm_base_null_mark_dyncomm, - ompi_dpm_base_null_open_port, - ompi_dpm_base_null_parse_port, - ompi_dpm_base_null_route_to_port, - ompi_dpm_base_null_close_port, - NULL, - ompi_dpm_base_null_pconnect, - ompi_dpm_base_null_paccept, - ompi_dpm_base_null_pclose -}; -ompi_dpm_base_component_t ompi_dpm_base_selected_component = {{0}}; - -static int ompi_dpm_base_close(void) -{ - /* Close the selected component */ - if( NULL != ompi_dpm.finalize ) { - ompi_dpm.finalize(); - } - - /* Close all available modules that are open */ - return mca_base_framework_components_close(&ompi_dpm_base_framework, NULL); -} - -MCA_BASE_FRAMEWORK_DECLARE(ompi, dpm, NULL, NULL, NULL, ompi_dpm_base_close, - mca_dpm_base_static_components, 0); diff --git a/ompi/mca/dpm/base/dpm_base_null_fns.c b/ompi/mca/dpm/base/dpm_base_null_fns.c deleted file mode 100644 index 47b9b53748b..00000000000 --- a/ompi/mca/dpm/base/dpm_base_null_fns.c +++ /dev/null @@ -1,115 +0,0 @@ -/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ -/* - * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2007 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * Copyright (c) 2006-2007 University of Houston. All rights reserved. - * Copyright (c) 2006-2013 Los Alamos National Security, LLC. All rights - * reserved. - * Copyright (c) 2007 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2013 Intel, Inc. All rights reserved - * - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#include "ompi_config.h" -#include -#include -#if HAVE_TIME_H -#include -#endif -#if HAVE_SYS_TIME_H -#include -#endif - -#include "ompi/mca/dpm/dpm.h" -#include "ompi/mca/dpm/base/base.h" - - -int ompi_dpm_base_null_connect_accept (ompi_communicator_t *comm, int root, - const char *port_string, bool send_first, - ompi_communicator_t **newcomm) -{ - return OMPI_ERR_NOT_SUPPORTED; -} - -int ompi_dpm_base_null_disconnect(ompi_communicator_t *comm) -{ - return OMPI_SUCCESS; -} - -int ompi_dpm_base_null_spawn(int count, const char *array_of_commands[], - char **array_of_argv[], - const int array_of_maxprocs[], - const MPI_Info array_of_info[], - const char *port_name) -{ - return OMPI_ERR_NOT_SUPPORTED; -} - -int ompi_dpm_base_null_dyn_init(void) -{ - return OMPI_SUCCESS; -} - -int ompi_dpm_base_null_dyn_finalize (void) -{ - return OMPI_SUCCESS; -} - -void ompi_dpm_base_null_mark_dyncomm (ompi_communicator_t *comm) -{ - return; -} - -int ompi_dpm_base_null_open_port(char *port_name, ompi_rml_tag_t given_tag) -{ - return OMPI_ERR_NOT_SUPPORTED; -} - -int ompi_dpm_base_null_parse_port(const char *port_name, - char **hnp_uri, char **rml_uri, ompi_rml_tag_t *tag) -{ - return OMPI_ERR_NOT_SUPPORTED; -} - -int ompi_dpm_base_null_route_to_port(char *rml_uri, ompi_process_name_t *rproc) -{ - return OMPI_ERR_NOT_SUPPORTED; -} - -int ompi_dpm_base_null_close_port(const char *port_name) -{ - return OMPI_ERR_NOT_SUPPORTED; -} - -int ompi_dpm_base_null_pconnect(char *port, - struct timeval *timeout, - ompi_dpm_base_paccept_connect_callback_fn_t cbfunc, - void *cbdata) -{ - return OMPI_ERR_NOT_SUPPORTED; -} - -int ompi_dpm_base_null_paccept(char *port, - ompi_dpm_base_paccept_connect_callback_fn_t cbfunc, - void *cbdata) -{ - return OMPI_ERR_NOT_SUPPORTED; -} - -void ompi_dpm_base_null_pclose(char *port) -{ - return; -} diff --git a/ompi/mca/dpm/base/dpm_base_select.c b/ompi/mca/dpm/base/dpm_base_select.c deleted file mode 100644 index e65ad9dd555..00000000000 --- a/ompi/mca/dpm/base/dpm_base_select.c +++ /dev/null @@ -1,61 +0,0 @@ -/* - * Copyright (c) 2004-2008 The Trustees of Indiana University. - * All rights reserved. - * Copyright (c) 2004-2005 The Trustees of the University of Tennessee. - * All rights reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * Copyright (c) 2012 Los Alamos National Security, LLC. All rights - * reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#include "ompi_config.h" - -#include "ompi/mca/mca.h" -#include "opal/mca/base/base.h" - -#include "opal/mca/base/mca_base_component_repository.h" - -#include "ompi/mca/dpm/dpm.h" -#include "ompi/mca/dpm/base/base.h" - - -int ompi_dpm_base_select(void) -{ - int ret; - ompi_dpm_base_component_t *best_component = NULL; - ompi_dpm_base_module_t *best_module = NULL; - - /* - * Select the best component - */ - if( OPAL_SUCCESS != (ret = mca_base_select("dpm", ompi_dpm_base_framework.framework_output, - &ompi_dpm_base_framework.framework_components, - (mca_base_module_t **) &best_module, - (mca_base_component_t **) &best_component))) { - /* it is okay not to find any executable components */ - if (OMPI_ERR_NOT_FOUND == ret) { - ret = OPAL_SUCCESS; - } - goto cleanup; - } - - /* Save the winner */ - ompi_dpm = *best_module; - ompi_dpm_base_selected_component = *best_component; - - /* init the selected module */ - if (NULL != ompi_dpm.init) { - ret = ompi_dpm.init(); - } - - cleanup: - return ret; -} diff --git a/ompi/mca/dpm/dpm.h b/ompi/mca/dpm/dpm.h deleted file mode 100644 index 1acd7024fbf..00000000000 --- a/ompi/mca/dpm/dpm.h +++ /dev/null @@ -1,235 +0,0 @@ -/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ -/* - * Copyright (c) 2004-2008 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2011 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * Copyright (c) 2013 Intel, Inc. All rights reserved - * Copyright (c) 2013-2015 Los Alamos National Security, LLC. All rights - * reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ -/** - * @file - * - * Dynamic Process Management Interface - * - */ - -#ifndef OMPI_MCA_DPM_H -#define OMPI_MCA_DPM_H - -#include "ompi_config.h" - -#if HAVE_TIME_H -#include -#endif -#if HAVE_SYS_TIME_H -#include -#endif - -#include "ompi/mca/mca.h" -#include "opal/mca/base/base.h" - -#include "ompi/info/info.h" -#include "ompi/communicator/communicator.h" - -BEGIN_C_DECLS - -/* - * Initialize a module - */ -typedef int (*ompi_dpm_base_module_init_fn_t)(void); - -/* - * Connect/accept communications - */ -typedef int (*ompi_dpm_base_module_connect_accept_fn_t)(ompi_communicator_t *comm, int root, - const char *port, bool send_first, - ompi_communicator_t **newcomm); - -/* define a callback function for use by non-blocking persistent connect/accept operations */ -typedef void (*ompi_dpm_base_paccept_connect_callback_fn_t)(ompi_communicator_t *newcomm, - ompi_proc_t *remote_proc, - void *cbdata); - -/* - * Create a persistent connection point for accepting non-blocking connection requests. - * The accept is persistent and will remain open until explicitly closed, or during - * dpm_framework_close. Any incoming connection request will be used to create a new - * communicator which will be returned via callback, along with the process name. - * - * In both cases, the callback function will return the new communicator plus the - * user's original cbdata. - * - * paccept requires a port (typically obtained by a prior call to MPI_Open_port). - * This must be published so it can be found by processes wanting to - * connect to this process, and is passed by those processes as the "port" argument for - * pconnect. - * - * Calls to pconnect are also non-blocking, with callback upon completion. Periodic - * attempts to complete the connection may be made at the discretion of the implementation. - * Failure to connect will be indicated by a callback returning a NULL communicator. Callers - * should use the cbdata to track the corresponding pconnect request. A timeout - * is provided to avoid hanging should the other process not have an active paccept - * on the specified port (e.g., the process may have closed it). A NULL value for - * the timeout argument indicates that the pconnect operation should not timeout, - * and will regularly retry the connection forever. - * - * Processes may create and publish as many ports, and call paccept as many times, as - * they like. When a process no longer wishes to accept connect requests, it can "close" - * a paccept request by passing in the port used when calling paccept. A call to "close" - * with a NULL argument will close *all* currently registered paccept channels. - */ -typedef int (*ompi_dpm_base_module_paccept_fn_t)(char *port, - ompi_dpm_base_paccept_connect_callback_fn_t cbfunc, - void *cbdata); - -typedef int (*ompi_dpm_base_module_pconnect_fn_t)(char *port, - struct timeval *timeout, - ompi_dpm_base_paccept_connect_callback_fn_t cbfunc, - void *cbdata); - -typedef void (*ompi_dpm_base_module_pclose_fn_t)(char *port); - - -/** - * Executes internally a disconnect on all dynamic communicators - * in case the user did not disconnect them. - */ -typedef int (*ompi_dpm_base_module_disconnect_fn_t)(ompi_communicator_t *comm); - -/* - * Dynamically spawn processes - */ -typedef int (*ompi_dpm_base_module_spawn_fn_t)(int count, char const *array_of_commands[], - char **array_of_argv[], - const int array_of_maxprocs[], - const MPI_Info array_of_info[], - const char *port_name); - -/* - * This routine checks, whether an application has been spawned - * by another MPI application, or has been independently started. - * If it has been spawned, it establishes the parent communicator. - * Since the routine has to communicate, it should be among the last - * steps in MPI_Init, to be sure that everything is already set up. - */ -typedef int (*ompi_dpm_base_module_dyn_init_fn_t)(void); - -/* - * Interface for mpi_finalize to call to ensure dynamically spawned procs - * collectively finalize - */ -typedef int (*ompi_dpm_base_module_dyn_finalize_fn_t)(void); - -/* this routine counts the number of different jobids of the processes - given in a certain communicator. If there is more than one jobid, - we mark the communicator as 'dynamic'. This is especially relevant - for the MPI_Comm_disconnect *and* for MPI_Finalize, where we have - to wait for all still connected processes. -*/ -typedef void (*ompi_dpm_base_module_mark_dyncomm_fn_t)(ompi_communicator_t *comm); - -/* - * Open a port to interface to a dynamically spawned job - if the - * specified tag is valid, then it will be used to form the port. Otherwise, - * a dynamically assigned tag that is unique to this request will be provided - */ -typedef int (*ompi_dpm_base_module_open_port_fn_t)(char *port_name, ompi_rml_tag_t tag); - -/* - * Converts an opaque port string to a RML process nane and tag. - */ -typedef int (*ompi_dpm_base_module_parse_port_name_t)(const char *port_name, - char **hnp_uri, char **rml_uri, - ompi_rml_tag_t *tag); - -/* - * Update the routed component to make sure that the RML can send messages to - * the remote port - */ -typedef int (*ompi_dpm_base_module_route_to_port_t)(char *rml_uri, ompi_process_name_t *rproc); - - -/* - * Close a port - */ -typedef int (*ompi_dpm_base_module_close_port_fn_t)(const char *port_name); - -/* - * Finalize a module - */ -typedef int (*ompi_dpm_base_module_finalize_fn_t)(void); - -/** -* Structure for DPM modules - */ -struct ompi_dpm_base_module_1_0_0_t { - /** Initialization Function */ - ompi_dpm_base_module_init_fn_t init; - /* connect/accept */ - ompi_dpm_base_module_connect_accept_fn_t connect_accept; - /* disconnect */ - ompi_dpm_base_module_disconnect_fn_t disconnect; - /* spawn processes */ - ompi_dpm_base_module_spawn_fn_t spawn; - /* dyn_init */ - ompi_dpm_base_module_dyn_init_fn_t dyn_init; - /* dyn_finalize */ - ompi_dpm_base_module_dyn_finalize_fn_t dyn_finalize; - /* mark dyncomm */ - ompi_dpm_base_module_mark_dyncomm_fn_t mark_dyncomm; - /* open port */ - ompi_dpm_base_module_open_port_fn_t open_port; - /* parse port string */ - ompi_dpm_base_module_parse_port_name_t parse_port; - /* update route to a port */ - ompi_dpm_base_module_route_to_port_t route_to_port; - /* close port */ - ompi_dpm_base_module_close_port_fn_t close_port; - /* finalize */ - ompi_dpm_base_module_finalize_fn_t finalize; - /* pconnect/accept */ - ompi_dpm_base_module_pconnect_fn_t pconnect; - ompi_dpm_base_module_paccept_fn_t paccept; - ompi_dpm_base_module_pclose_fn_t pclose; -}; -typedef struct ompi_dpm_base_module_1_0_0_t ompi_dpm_base_module_1_0_0_t; -typedef struct ompi_dpm_base_module_1_0_0_t ompi_dpm_base_module_t; - -OMPI_DECLSPEC extern ompi_dpm_base_module_t ompi_dpm; - - -/** - * Structure for DPM components. - */ -struct ompi_dpm_base_component_2_0_0_t { - /** MCA base component */ - mca_base_component_t base_version; - /** MCA base data */ - mca_base_component_data_t base_data; -}; -typedef struct ompi_dpm_base_component_2_0_0_t ompi_dpm_base_component_2_0_0_t; -typedef struct ompi_dpm_base_component_2_0_0_t ompi_dpm_base_component_t; - -/** - * Macro for use in components that are of type DPM - */ -#define OMPI_DPM_BASE_VERSION_2_0_0 \ - OMPI_MCA_BASE_VERSION_2_1_0("dpm", 2, 0, 0) - - -END_C_DECLS - -#endif /* OMPI_MCA_DPM_H */ diff --git a/ompi/mca/dpm/orte/Makefile.am b/ompi/mca/dpm/orte/Makefile.am deleted file mode 100644 index dd56727373b..00000000000 --- a/ompi/mca/dpm/orte/Makefile.am +++ /dev/null @@ -1,43 +0,0 @@ -# -# Copyright (c) 2004-2006 The Regents of the University of California. -# All rights reserved. -# Copyright (c) 2009 High Performance Computing Center Stuttgart, -# University of Stuttgart. All rights reserved. -# Copyright (c) 2010 Cisco Systems, Inc. All rights reserved. -# $COPYRIGHT$ -# -# Additional copyrights may follow -# -# $HEADER$ -# - - -dist_ompidata_DATA = help-ompi-dpm-orte.txt - -# Make the output library in this directory, and name it either -# mca__.la (for DSO builds) or libmca__.la -# (for static builds). - -if MCA_BUILD_ompi_dpm_orte_DSO -component_noinst = -component_install = mca_dpm_orte.la -else -component_noinst = libmca_dpm_orte.la -component_install = -endif - -local_sources = \ - dpm_orte.c \ - dpm_orte.h \ - dpm_orte_component.c - -mcacomponentdir = $(ompilibdir) -mcacomponent_LTLIBRARIES = $(component_install) -mca_dpm_orte_la_SOURCES = $(local_sources) -mca_dpm_orte_la_LDFLAGS = -module -avoid-version $(dpm_orte_LDFLAGS) - -noinst_LTLIBRARIES = $(component_noinst) -libmca_dpm_orte_la_SOURCES = $(local_sources) -libmca_dpm_orte_la_LIBADD = $(dpm_orte_LIBS) -libmca_dpm_orte_la_LDFLAGS = -module -avoid-version $(dpm_orte_LDFLAGS) - diff --git a/ompi/mca/dpm/orte/configure.m4 b/ompi/mca/dpm/orte/configure.m4 deleted file mode 100644 index 43a50d50ac2..00000000000 --- a/ompi/mca/dpm/orte/configure.m4 +++ /dev/null @@ -1,25 +0,0 @@ -# -*- shell-script -*- -# -# Copyright (c) 2011 Los Alamos National Security, LLC. -# All rights reserved. -# Copyright (c) 2014 Intel, Inc. All rights reserved -# -# $COPYRIGHT$ -# -# Additional copyrights may follow -# -# $HEADER$ -# - -# MCA_dpm_orte_CONFIG([action-if-found], [action-if-not-found]) -# ----------------------------------------------------------- -AC_DEFUN([MCA_ompi_dpm_orte_CONFIG],[ - AC_CONFIG_FILES([ompi/mca/dpm/orte/Makefile]) - - AC_ARG_WITH([orte], - AC_HELP_STRING([--with-orte], - [Use ORTE run-time environment (default: yes)])) - AS_IF([test "$with_orte" != "no"], - [$1], - [$2]) -]) diff --git a/ompi/mca/dpm/orte/dpm_orte.c b/ompi/mca/dpm/orte/dpm_orte.c deleted file mode 100644 index 4dea4d03125..00000000000 --- a/ompi/mca/dpm/orte/dpm_orte.c +++ /dev/null @@ -1,1766 +0,0 @@ -/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ -/* - * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2011 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * Copyright (c) 2007-2015 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2006-2009 University of Houston. All rights reserved. - * Copyright (c) 2009 Sun Microsystems, Inc. All rights reserved. - * Copyright (c) 2011-2013 Los Alamos National Security, LLC. All rights - * reserved. - * Copyright (c) 2013-2014 Intel, Inc. All rights reserved - * Copyright (c) 2014-2015 Research Organization for Information Science - * and Technology (RIST). All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#include "ompi_config.h" -#include "ompi/constants.h" - -#include -#include -#include -#if HAVE_TIME_H -#include -#endif -#if HAVE_SYS_TIME_H -#include -#endif - -#include "opal/util/argv.h" -#include "opal/util/opal_getcwd.h" -#include "opal/dss/dss.h" -#include "opal/mca/dstore/dstore.h" -#include "opal/mca/hwloc/base/base.h" -#include "opal/mca/pmix/pmix.h" - -#include "orte/mca/errmgr/errmgr.h" -#include "orte/mca/grpcomm/base/base.h" -#include "orte/mca/plm/base/base.h" -#include "orte/mca/rml/rml.h" -#include "orte/mca/rml/rml_types.h" -#include "orte/mca/rmaps/rmaps.h" -#include "orte/mca/rmaps/rmaps_types.h" -#include "orte/mca/rmaps/base/base.h" -#include "orte/mca/rml/base/rml_contact.h" -#include "orte/mca/routed/routed.h" -#include "orte/util/name_fns.h" -#include "orte/util/show_help.h" -#include "orte/runtime/orte_globals.h" -#include "orte/runtime/orte_wait.h" - -#include "ompi/communicator/communicator.h" -#include "ompi/group/group.h" -#include "ompi/proc/proc.h" -#include "ompi/mca/pml/pml.h" -#include "ompi/mca/rte/rte.h" -#include "ompi/info/info.h" - -#include "ompi/mca/dpm/base/base.h" -#include "dpm_orte.h" - -/* Local static variables */ -static opal_mutex_t ompi_dpm_port_mutex; -static orte_rml_tag_t next_tag; -static opal_list_t orte_dpm_acceptors, orte_dpm_connectors, dynamics; -static uint32_t next_preq=0; - -/* API functions */ -static int init(void); -static int connect_accept (ompi_communicator_t *comm, int root, - const char *port_string, bool send_first, - ompi_communicator_t **newcomm); -static int disconnect(ompi_communicator_t *comm); -static int spawn(int count, const char *array_of_commands[], - char **array_of_argv[], - const int array_of_maxprocs[], - const MPI_Info array_of_info[], - const char *port_name); -static int dyn_init(void); -static int open_port(char *port_name, orte_rml_tag_t given_tag); -static int parse_port_name(const char *port_name, char **hnp_uri, char **rml_uri, - orte_rml_tag_t *tag); -static int route_to_port(char *rml_uri, orte_process_name_t *rproc); -static int close_port(const char *port_name); -static int finalize(void); -static int dpm_pconnect(char *port, - struct timeval *timeout, - ompi_dpm_base_paccept_connect_callback_fn_t cbfunc, - void *cbdata); -static int dpm_paccept(char *port, - ompi_dpm_base_paccept_connect_callback_fn_t cbfunc, - void *cbdata); -static void dpm_pclose(char *port); - -/* - * instantiate the module - */ -ompi_dpm_base_module_t ompi_dpm_orte_module = { - init, - connect_accept, - disconnect, - spawn, - dyn_init, - ompi_dpm_base_dyn_finalize, - ompi_dpm_base_mark_dyncomm, - open_port, - parse_port_name, - route_to_port, - close_port, - finalize, - dpm_pconnect, - dpm_paccept, - dpm_pclose -}; - -typedef struct { - opal_list_item_t super; - opal_event_t ev; - bool event_active; - uint32_t id; - uint32_t cid; - orte_rml_tag_t tag; - ompi_dpm_base_paccept_connect_callback_fn_t cbfunc; - void *cbdata; -} orte_dpm_prequest_t; -OBJ_CLASS_INSTANCE(orte_dpm_prequest_t, - opal_list_item_t, - NULL, NULL); - - -static void connect_complete(int status, orte_process_name_t* sender, - opal_buffer_t* buffer, orte_rml_tag_t tag, - void* cbdata); - -/* - * Init the module - */ -static int init(void) -{ - OBJ_CONSTRUCT(&ompi_dpm_port_mutex, opal_mutex_t); - next_tag = OMPI_RML_TAG_DYNAMIC; - OBJ_CONSTRUCT(&orte_dpm_acceptors, opal_list_t); - OBJ_CONSTRUCT(&orte_dpm_connectors, opal_list_t); - OBJ_CONSTRUCT(&dynamics, opal_list_t); - - /* post a receive for pconnect request responses */ - orte_rml.recv_buffer_nb(ORTE_NAME_WILDCARD, - OMPI_RML_PCONNECT_TAG, - ORTE_RML_PERSISTENT, - connect_complete, NULL); - - - return OMPI_SUCCESS; -} - -static int connect_accept(ompi_communicator_t *comm, int root, - const char *port_string, bool send_first, - ompi_communicator_t **newcomm) -{ - int size, rsize, rank, rc; - orte_std_cntr_t num_vals; - orte_std_cntr_t rnamebuflen = 0; - int rnamebuflen_int = 0; - void *rnamebuf=NULL; - - ompi_communicator_t *newcomp=MPI_COMM_NULL; - ompi_proc_t **rprocs=NULL; - ompi_group_t *group=comm->c_local_group; - orte_process_name_t port; - orte_rml_tag_t tag=ORTE_RML_TAG_INVALID; - opal_buffer_t *nbuf=NULL, *nrbuf=NULL; - ompi_proc_t **proc_list=NULL, **new_proc_list = NULL; - int32_t i,j, new_proc_len; - ompi_group_t *new_group_pointer; - - orte_namelist_t *nm; - orte_rml_recv_cb_t xfer; - orte_process_name_t carport; - - OPAL_OUTPUT_VERBOSE((1, ompi_dpm_base_framework.framework_output, - "%s dpm:orte:connect_accept with port %s %s", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - port_string, send_first ? "sending first" : "recv first")); - - /* set default error return */ - *newcomm = MPI_COMM_NULL; - - size = ompi_comm_size ( comm ); - rank = ompi_comm_rank ( comm ); - - /* extract the process name from the port string, if given, and - * set us up to communicate with it - */ - if (NULL != port_string && 0 < strlen(port_string)) { - char *hnp_uri, *rml_uri; - - /* separate the string into the HNP and RML URI and tag */ - if (ORTE_SUCCESS != (rc = parse_port_name(port_string, &hnp_uri, &rml_uri, &tag))) { - ORTE_ERROR_LOG(rc); - return rc; - } - /* extract the originating proc's name */ - if (ORTE_SUCCESS != (rc = orte_rml_base_parse_uris(rml_uri, &port, NULL))) { - ORTE_ERROR_LOG(rc); - free(hnp_uri); free(rml_uri); - return rc; - } - /* make sure we can route rml messages to the destination job */ - if (ORTE_SUCCESS != (rc = route_to_port(hnp_uri, &port))) { - ORTE_ERROR_LOG(rc); - free(hnp_uri); free(rml_uri); - return rc; - } - free(hnp_uri); free(rml_uri); - } - - if ( rank == root ) { - /* Generate the message buffer containing the number of processes and the list of - participating processes */ - nbuf = OBJ_NEW(opal_buffer_t); - if (NULL == nbuf) { - return OMPI_ERROR; - } - - if (OPAL_SUCCESS != (rc = opal_dss.pack(nbuf, &size, 1, OPAL_INT))) { - ORTE_ERROR_LOG(rc); - goto exit; - } - - if (OMPI_GROUP_IS_DENSE(group)) { - ompi_proc_pack(group->grp_proc_pointers, size, false, nbuf); - } else { - proc_list = (ompi_proc_t **) calloc (group->grp_proc_count, - sizeof (ompi_proc_t *)); - for (i=0 ; igrp_proc_count ; i++) { - if (NULL == (proc_list[i] = ompi_group_peer_lookup(group,i))) { - ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND); - rc = ORTE_ERR_NOT_FOUND; - goto exit; - } - - OPAL_OUTPUT_VERBOSE((3, ompi_dpm_base_framework.framework_output, - "%s dpm:orte:connect_accept adding %s to proc list", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - OMPI_NAME_PRINT(&proc_list[i]->super.proc_name))); - } - ompi_proc_pack(proc_list, size, false, nbuf); - } - - /* pack wireup info - this is required so that all involved parties can - * discover how to talk to each other. For example, consider the case - * where we connect_accept to one independent job (B), and then connect_accept - * to another one (C) to wire all three of us together. Job B will not know - * how to talk to job C at the OOB level because the two of them didn't - * directly connect_accept to each other. Hence, we include the required - * wireup info at this first exchange - */ - if (ORTE_SUCCESS != (rc = orte_routed.get_wireup_info(nbuf))) { - ORTE_ERROR_LOG(rc); - goto exit; - } - - OBJ_CONSTRUCT(&xfer, orte_rml_recv_cb_t); - /* Exchange the number and the list of processes in the groups */ - if ( send_first ) { - OPAL_OUTPUT_VERBOSE((3, ompi_dpm_base_framework.framework_output, - "%s dpm:orte:connect_accept sending first to %s", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - ORTE_NAME_PRINT(&port))); - rc = orte_rml.send_buffer_nb(&port, nbuf, tag, orte_rml_send_callback, NULL); - /* setup to recv */ - OPAL_OUTPUT_VERBOSE((3, ompi_dpm_base_framework.framework_output, - "%s dpm:orte:connect_accept waiting for response", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME))); - xfer.active = true; - orte_rml.recv_buffer_nb(ORTE_NAME_WILDCARD, tag, - ORTE_RML_NON_PERSISTENT, - orte_rml_recv_callback, &xfer); - /* wait for response */ - OMPI_WAIT_FOR_COMPLETION(xfer.active); - OPAL_OUTPUT_VERBOSE((3, ompi_dpm_base_framework.framework_output, - "%s dpm:orte:connect_accept got data from %s", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - ORTE_NAME_PRINT(&xfer.name))); - - } else { - OPAL_OUTPUT_VERBOSE((3, ompi_dpm_base_framework.framework_output, - "%s dpm:orte:connect_accept recving first", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME))); - /* setup to recv */ - xfer.active = true; - orte_rml.recv_buffer_nb(ORTE_NAME_WILDCARD, tag, - ORTE_RML_NON_PERSISTENT, - orte_rml_recv_callback, &xfer); - /* wait for response */ - OMPI_WAIT_FOR_COMPLETION(xfer.active); - /* now send our info */ - OPAL_OUTPUT_VERBOSE((3, ompi_dpm_base_framework.framework_output, - "%s dpm:orte:connect_accept sending info to %s", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - ORTE_NAME_PRINT(&xfer.name))); - rc = orte_rml.send_buffer_nb(&xfer.name, nbuf, tag, orte_rml_send_callback, NULL); - } - - if (OPAL_SUCCESS != (rc = opal_dss.unload(&xfer.data, &rnamebuf, &rnamebuflen))) { - ORTE_ERROR_LOG(rc); - OBJ_DESTRUCT(&xfer.data); - goto exit; - } - carport.jobid = xfer.name.jobid; - carport.vpid = xfer.name.vpid; - OBJ_DESTRUCT(&xfer); - } - - /* First convert the size_t to an int so we can cast in the bcast to a void * - * if we don't then we will get badness when using big vs little endian - * THIS IS NO LONGER REQUIRED AS THE LENGTH IS NOW A STD_CNTR_T, WHICH - * CORRELATES TO AN INT32 - */ - rnamebuflen_int = (int)rnamebuflen; - - /* bcast the buffer-length to all processes in the local comm */ - OPAL_OUTPUT_VERBOSE((3, ompi_dpm_base_framework.framework_output, - "%s dpm:orte:connect_accept bcast buffer length", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME))); - rc = comm->c_coll.coll_bcast (&rnamebuflen_int, 1, MPI_INT, root, comm, - comm->c_coll.coll_bcast_module); - if ( OMPI_SUCCESS != rc ) { - goto exit; - } - rnamebuflen = rnamebuflen_int; - - if ( rank != root ) { - /* non root processes need to allocate the buffer manually */ - rnamebuf = (char *) malloc(rnamebuflen); - if ( NULL == rnamebuf ) { - rc = OMPI_ERR_OUT_OF_RESOURCE; - goto exit; - } - } - - /* bcast list of processes to all procs in local group - and reconstruct the data. Note that proc_get_proclist - adds processes, which were not known yet to our - process pool. - */ - OPAL_OUTPUT_VERBOSE((3, ompi_dpm_base_framework.framework_output, - "%s dpm:orte:connect_accept bcast proc list", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME))); - rc = comm->c_coll.coll_bcast (rnamebuf, rnamebuflen_int, MPI_BYTE, root, comm, - comm->c_coll.coll_bcast_module); - if ( OMPI_SUCCESS != rc ) { - goto exit; - } - - nrbuf = OBJ_NEW(opal_buffer_t); - if (NULL == nrbuf) { - goto exit; - } - if ( OPAL_SUCCESS != ( rc = opal_dss.load(nrbuf, rnamebuf, rnamebuflen))) { - ORTE_ERROR_LOG(rc); - goto exit; - } - num_vals = 1; - if (OPAL_SUCCESS != (rc = opal_dss.unpack(nrbuf, &rsize, &num_vals, OPAL_INT))) { - ORTE_ERROR_LOG(rc); - goto exit; - } - rc = ompi_proc_unpack(nrbuf, rsize, &rprocs, false, &new_proc_len, &new_proc_list); - if ( OMPI_SUCCESS != rc ) { - goto exit; - } - - OPAL_OUTPUT_VERBOSE((3, ompi_dpm_base_framework.framework_output, - "%s dpm:orte:connect_accept unpacked %d new procs", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), new_proc_len)); - - /* If we added new procs, we need to do the modex and then call - PML add_procs */ - if (new_proc_len > 0) { - opal_list_t all_procs; - orte_namelist_t *name; - opal_process_name_t *ids; - opal_list_t myvals; - opal_value_t *kv; - - /* we first need to give the wireup info to our routed module. - * Not every routed module will need it, but some do require - * this info before we can do any comm - */ - if (ORTE_SUCCESS != (rc = orte_routed.init_routes(OMPI_CAST_RTE_NAME(&rprocs[0]->super.proc_name)->jobid, nrbuf))) { - ORTE_ERROR_LOG(rc); - goto exit; - } - - OBJ_CONSTRUCT(&all_procs, opal_list_t); - - if (send_first) { - for (i = 0 ; i < rsize ; ++i) { - name = OBJ_NEW(orte_namelist_t); - name->name = *OMPI_CAST_RTE_NAME(&rprocs[i]->super.proc_name); - opal_list_append(&all_procs, &name->super); - OPAL_OUTPUT_VERBOSE((3, ompi_dpm_base_framework.framework_output, - "%s dpm:orte:connect_accept send first adding %s to allgather list", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - ORTE_NAME_PRINT(&name->name))); - } - for (i = 0 ; i < group->grp_proc_count ; ++i) { - name = OBJ_NEW(orte_namelist_t); - name->name = *OMPI_CAST_RTE_NAME(&(ompi_group_peer_lookup(group, i)->super.proc_name)); - opal_list_append(&all_procs, &name->super); - OPAL_OUTPUT_VERBOSE((3, ompi_dpm_base_framework.framework_output, - "%s dpm:orte:connect_accept send first adding %s to allgather list", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - ORTE_NAME_PRINT(&name->name))); - } - - } else { - for (i = 0 ; i < group->grp_proc_count ; ++i) { - name = OBJ_NEW(orte_namelist_t); - name->name = *OMPI_CAST_RTE_NAME(&(ompi_group_peer_lookup(group, i)->super.proc_name)); - opal_list_append(&all_procs, &name->super); - OPAL_OUTPUT_VERBOSE((3, ompi_dpm_base_framework.framework_output, - "%s dpm:orte:connect_accept recv first adding %s to allgather list", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - ORTE_NAME_PRINT(&name->name))); - } - for (i = 0 ; i < rsize ; ++i) { - name = OBJ_NEW(orte_namelist_t); - name->name = *OMPI_CAST_RTE_NAME(&rprocs[i]->super.proc_name); - opal_list_append(&all_procs, &name->super); - OPAL_OUTPUT_VERBOSE((3, ompi_dpm_base_framework.framework_output, - "%s dpm:orte:connect_accept recv first adding %s to allgather list", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - ORTE_NAME_PRINT(&name->name))); - } - - } - - OPAL_OUTPUT_VERBOSE((3, ompi_dpm_base_framework.framework_output, - "%s dpm:orte:connect_accept executing modex", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME))); - - /* setup the modex */ - ids = (opal_process_name_t*)malloc(opal_list_get_size(&all_procs) * sizeof(opal_process_name_t)); - /* copy across the list of participants */ - i=0; - OPAL_LIST_FOREACH(nm, &all_procs, orte_namelist_t) { - ids[i++] = nm->name; - } - OPAL_LIST_DESTRUCT(&all_procs); - /* perform it */ - opal_pmix.fence(ids, i); - free(ids); - - OPAL_OUTPUT_VERBOSE((3, ompi_dpm_base_framework.framework_output, - "%s dpm:orte:connect_accept adding procs", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME))); - - /* set the locality of the new procs - the required info should - * have been included in the data exchange */ - for (j=0; j < new_proc_len; j++) { - OBJ_CONSTRUCT(&myvals, opal_list_t); - if (OMPI_SUCCESS != (rc = opal_dstore.fetch(opal_dstore_internal, - &new_proc_list[j]->super.proc_name, - OPAL_DSTORE_LOCALITY, &myvals))) { - new_proc_list[j]->super.proc_flags = OPAL_PROC_NON_LOCAL; - } else { - kv = (opal_value_t*)opal_list_get_first(&myvals); - new_proc_list[j]->super.proc_flags = kv->data.uint16; - } - OPAL_LIST_DESTRUCT(&myvals); - } - - if (OMPI_SUCCESS != (rc = MCA_PML_CALL(add_procs(new_proc_list, new_proc_len)))) { - ORTE_ERROR_LOG(rc); - goto exit; - } - - OPAL_OUTPUT_VERBOSE((3, ompi_dpm_base_framework.framework_output, - "%s dpm:orte:connect_accept new procs added", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME))); - - } - - OBJ_RELEASE(nrbuf); - - OPAL_OUTPUT_VERBOSE((3, ompi_dpm_base_framework.framework_output, - "%s dpm:orte:connect_accept allocating group size %d", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), rsize)); - - new_group_pointer=ompi_group_allocate(rsize); - if( NULL == new_group_pointer ) { - rc = OMPI_ERR_OUT_OF_RESOURCE; - goto exit; - } - - /* put group elements in the list */ - for (j = 0; j < rsize; j++) { - new_group_pointer->grp_proc_pointers[j] = rprocs[j]; - } /* end proc loop */ - - /* increment proc reference counters */ - ompi_group_increment_proc_count(new_group_pointer); - - OPAL_OUTPUT_VERBOSE((3, ompi_dpm_base_framework.framework_output, - "%s dpm:orte:connect_accept setting up communicator", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME))); - - /* set up communicator structure */ - rc = ompi_comm_set ( &newcomp, /* new comm */ - comm, /* old comm */ - group->grp_proc_count, /* local_size */ - NULL, /* local_procs */ - rsize, /* remote_size */ - NULL , /* remote_procs */ - NULL, /* attrs */ - comm->error_handler, /* error handler */ - NULL, /* topo component */ - group, /* local group */ - new_group_pointer /* remote group */ - ); - if ( NULL == newcomp ) { - rc = OMPI_ERR_OUT_OF_RESOURCE; - goto exit; - } - - ompi_group_decrement_proc_count (new_group_pointer); - OBJ_RELEASE(new_group_pointer); - new_group_pointer = MPI_GROUP_NULL; - - OPAL_OUTPUT_VERBOSE((3, ompi_dpm_base_framework.framework_output, - "%s dpm:orte:connect_accept allocate comm_cid", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME))); - - /* allocate comm_cid */ - rc = ompi_comm_nextcid ( newcomp, /* new communicator */ - comm, /* old communicator */ - NULL, /* bridge comm */ - &root, /* local leader */ - &carport, /* remote leader */ - OMPI_COMM_CID_INTRA_OOB, /* mode */ - send_first ); /* send or recv first */ - if ( OMPI_SUCCESS != rc ) { - goto exit; - } - - OPAL_OUTPUT_VERBOSE((3, ompi_dpm_base_framework.framework_output, - "%s dpm:orte:connect_accept activate comm", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME))); - - /* activate comm and init coll-component */ - rc = ompi_comm_activate ( &newcomp, /* new communicator */ - comm, /* old communicator */ - NULL, /* bridge comm */ - &root, /* local leader */ - &carport, /* remote leader */ - OMPI_COMM_CID_INTRA_OOB, /* mode */ - send_first ); /* send or recv first */ - if ( OMPI_SUCCESS != rc ) { - goto exit; - } - - /* Question: do we have to re-start some low level stuff - to enable the usage of fast communication devices - between the two worlds ? - */ - - exit: - if ( NULL != rprocs ) { - free ( rprocs ); - } - if ( NULL != proc_list ) { - free ( proc_list ); - } - if ( NULL != new_proc_list ) { - free ( new_proc_list ); - } - if ( OMPI_SUCCESS != rc ) { - if ( MPI_COMM_NULL != newcomp && NULL != newcomp ) { - OBJ_RETAIN(newcomp); - newcomp = MPI_COMM_NULL; - } - } - - *newcomm = newcomp; - OPAL_OUTPUT_VERBOSE((3, ompi_dpm_base_framework.framework_output, - "%s dpm:orte:connect_accept complete", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME))); - - return rc; -} - -static int construct_peers(ompi_group_t *group, opal_list_t *peers) -{ - int i; - orte_namelist_t *nm, *n2; - ompi_proc_t *proct; - - if (OMPI_GROUP_IS_DENSE(group)) { - OPAL_OUTPUT_VERBOSE((3, ompi_dpm_base_framework.framework_output, - "%s dpm:orte:disconnect group is dense", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME))); - for (i=0; i < group->grp_proc_count; i++) { - if (NULL == (proct = group->grp_proc_pointers[i])) { - ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND); - return ORTE_ERR_NOT_FOUND; - } - /* add to the list of peers */ - OPAL_OUTPUT_VERBOSE((3, ompi_dpm_base_framework.framework_output, - "%s dpm:orte:disconnect adding participant %s", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - ORTE_NAME_PRINT((const orte_process_name_t *)&proct->super.proc_name))); - nm = OBJ_NEW(orte_namelist_t); - nm->name = *(orte_process_name_t*)&proct->super.proc_name; - /* need to maintain an ordered list to ensure the tracker signatures - * match across all procs */ - OPAL_LIST_FOREACH(n2, peers, orte_namelist_t) { - if (opal_compare_proc(nm->name, n2->name) < 0) { - opal_list_insert_pos(peers, &n2->super, &nm->super); - nm = NULL; - break; - } - } - if (NULL != nm) { - /* append to the end */ - opal_list_append(peers, &nm->super); - } - } - } else { - for (i=0; i < group->grp_proc_count; i++) { - /* lookup this proc_t to get the process name */ - if (NULL == (proct = ompi_group_peer_lookup(group, i))) { - ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND); - return ORTE_ERR_NOT_FOUND; - } - /* add to the list of peers */ - OPAL_OUTPUT_VERBOSE((3, ompi_dpm_base_framework.framework_output, - "%s dpm:orte:disconnect adding participant %s", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - ORTE_NAME_PRINT((const orte_process_name_t *)&proct->super.proc_name))); - nm = OBJ_NEW(orte_namelist_t); - nm->name = *(orte_process_name_t*)&proct->super.proc_name; - /* need to maintain an ordered list to ensure the tracker signatures - * match across all procs */ - OPAL_LIST_FOREACH(n2, peers, orte_namelist_t) { - if (opal_compare_proc(nm->name, n2->name) < 0) { - opal_list_insert_pos(peers, &n2->super, &nm->super); - nm = NULL; - break; - } - } - if (NULL != nm) { - /* append to the end */ - opal_list_append(peers, &nm->super); - } - } - } - return ORTE_SUCCESS; -} - -static int disconnect(ompi_communicator_t *comm) -{ - int ret, i; - ompi_group_t *group; - opal_list_t coll; - orte_namelist_t *nm; - opal_process_name_t *ids; - - /* Note that we explicitly use an RTE-based barrier (vs. an MPI - barrier). See a lengthy comment in - ompi/runtime/ompi_mpi_finalize.c for a much more detailed - rationale. */ - - OPAL_OUTPUT_VERBOSE((3, ompi_dpm_base_framework.framework_output, - "%s dpm:orte:disconnect comm_cid %d", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), comm->c_contextid)); - - /* setup the collective */ - OBJ_CONSTRUCT(&coll, opal_list_t); - /* RHC: assuming for now that this must flow across all - * local and remote group members */ - group = comm->c_local_group; - if (ORTE_SUCCESS != (ret = construct_peers(group, &coll))) { - ORTE_ERROR_LOG(ret); - OPAL_LIST_DESTRUCT(&coll); - return ret; - } - /* do the same for the remote group */ - group = comm->c_remote_group; - if (ORTE_SUCCESS != (ret = construct_peers(group, &coll))) { - ORTE_ERROR_LOG(ret); - OPAL_LIST_DESTRUCT(&coll); - return ret; - } - - /* setup the ids */ - ids = (opal_process_name_t*)malloc(opal_list_get_size(&coll) * sizeof(opal_process_name_t)); - i=0; - OPAL_LIST_FOREACH(nm, &coll, orte_namelist_t) { - ids[i++] = nm->name; - } - OPAL_LIST_DESTRUCT(&coll); - - OPAL_OUTPUT_VERBOSE((3, ompi_dpm_base_framework.framework_output, - "%s dpm:orte:disconnect calling barrier on comm_cid %d with %d participants", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), comm->c_contextid, i)); - opal_pmix.fence(ids, i); - free(ids); - - OPAL_OUTPUT_VERBOSE((3, ompi_dpm_base_framework.framework_output, - "%s dpm:orte:disconnect barrier complete for comm_cid %d", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), comm->c_contextid)); - - return OMPI_SUCCESS; -} - -static int spawn(int count, const char *array_of_commands[], - char **array_of_argv[], - const int array_of_maxprocs[], - const MPI_Info array_of_info[], - const char *port_name) -{ - int rc, i, j, counter; - int have_wdir=0; - int flag=0; - char cwd[OPAL_PATH_MAX]; - char host[OPAL_MAX_INFO_VAL]; /*** should define OMPI_HOST_MAX ***/ - char prefix[OPAL_MAX_INFO_VAL]; - char stdin_target[OPAL_MAX_INFO_VAL]; - char params[OPAL_MAX_INFO_VAL]; - char mapper[OPAL_MAX_INFO_VAL]; - int npernode; - char slot_list[OPAL_MAX_INFO_VAL]; - - orte_job_t *jdata; - orte_app_context_t *app; - bool local_spawn, non_mpi; - char **envars; - - /* parse the info object */ - /* check potentially for: - - "host": desired host where to spawn the processes - - "hostfile": hostfile containing hosts where procs are - to be spawned - - "add-host": add the specified hosts to the known list - of available resources and spawn these - procs on them - - "add-hostfile": add the hosts in the hostfile to the - known list of available resources and spawn - these procs on them - - "env": a newline-delimited list of envar values to be - placed into the app's environment (of form "foo=bar") - - "ompi_prefix": the path to the root of the directory tree where ompi - executables and libraries can be found on all nodes - used to spawn these procs - - "arch": desired architecture - - "wdir": directory, where executable can be found - - "path": list of directories where to look for the executable - - "file": filename, where additional information is provided. - - "soft": see page 92 of MPI-2. - - "mapper": indicate the mapper to be used for the job - - "display_map": display the map of the spawned job - - "npernode": number of procs/node to spawn - - "pernode": spawn one proc/node - - "ppr": spawn specified number of procs per specified object - - "map_by": specify object by which the procs should be mapped - - "rank_by": specify object by which the procs should be ranked - - "bind_to": specify object to which the procs should be bound - - "ompi_preload_binary": move binaries to nodes prior to execution - - "ompi_preload_files": move specified files to nodes prior to execution - - "ompi_non_mpi": spawned job will not call MPI_Init - - "ompi_param": list of MCA params to be in the spawned job's environment - - "env": newline (\n) delimited list of envar values to be passed to spawned procs - */ - - /* setup the job object */ - jdata = OBJ_NEW(orte_job_t); - - /* Convert the list of commands to an array of orte_app_context_t - pointers */ - for (i = 0; i < count; ++i) { - app = OBJ_NEW(orte_app_context_t); - if (NULL == app) { - ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); - OBJ_RELEASE(jdata); - opal_progress_event_users_decrement(); - return ORTE_ERR_OUT_OF_RESOURCE; - } - /* add the app to the job data */ - opal_pointer_array_add(jdata->apps, app); - app->idx = i; - jdata->num_apps++; - - /* copy over the name of the executable */ - app->app = strdup(array_of_commands[i]); - if (NULL == app->app) { - ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); - OBJ_RELEASE(jdata); - opal_progress_event_users_decrement(); - return ORTE_ERR_OUT_OF_RESOURCE; - } - /* record the number of procs to be generated */ - app->num_procs = array_of_maxprocs[i]; - - /* copy over the argv array */ - counter = 1; - - if (MPI_ARGVS_NULL != array_of_argv && - MPI_ARGV_NULL != array_of_argv[i]) { - /* first need to find out how many entries there are */ - j=0; - while (NULL != array_of_argv[i][j]) { - j++; - } - counter += j; - } - - /* now copy them over, ensuring to NULL terminate the array */ - app->argv = (char**)malloc((1 + counter) * sizeof(char*)); - if (NULL == app->argv) { - ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); - OBJ_RELEASE(jdata); - opal_progress_event_users_decrement(); - return ORTE_ERR_OUT_OF_RESOURCE; - } - app->argv[0] = strdup(array_of_commands[i]); - for (j=1; j < counter; j++) { - app->argv[j] = strdup(array_of_argv[i][j-1]); - } - app->argv[counter] = NULL; - - - /* the environment gets set by the launcher - * all we need to do is add the specific values - * needed for comm_spawn - */ - /* Add environment variable with the contact information for the - child processes. - */ - app->env = (char**)malloc(2 * sizeof(char*)); - if (NULL == app->env) { - ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); - OBJ_RELEASE(jdata); - opal_progress_event_users_decrement(); - return ORTE_ERR_OUT_OF_RESOURCE; - } - asprintf(&(app->env[0]), "OMPI_PARENT_PORT=%s", port_name); - app->env[1] = NULL; - for (j = 0; NULL != environ[j]; ++j) { - if (0 == strncmp("OMPI_", environ[j], 5)) { - opal_argv_append_nosize(&app->env, environ[j]); - } - } - - /* Check for well-known info keys */ - have_wdir = 0; - if ( array_of_info != NULL && array_of_info[i] != MPI_INFO_NULL ) { - - /* check for personality */ - ompi_info_get (array_of_info[i], "personality", sizeof(host) - 1, host, &flag); - if ( flag ) { - jdata->personality = strdup(host); - } - - /* check for 'host' */ - ompi_info_get (array_of_info[i], "host", sizeof(host) - 1, host, &flag); - if ( flag ) { - orte_set_attribute(&app->attributes, ORTE_APP_DASH_HOST, false, host, OPAL_STRING); - } - - /* check for 'hostfile' */ - ompi_info_get (array_of_info[i], "hostfile", sizeof(host) - 1, host, &flag); - if ( flag ) { - orte_set_attribute(&app->attributes, ORTE_APP_HOSTFILE, false, host, OPAL_STRING); - } - - /* check for 'add-hostfile' */ - ompi_info_get (array_of_info[i], "add-hostfile", sizeof(host) - 1, host, &flag); - if ( flag ) { - orte_set_attribute(&app->attributes, ORTE_APP_ADD_HOSTFILE, false, host, OPAL_STRING); - } - - /* check for 'add-host' */ - ompi_info_get (array_of_info[i], "add-host", sizeof(host) - 1, host, &flag); - if ( flag ) { - orte_set_attribute(&app->attributes, ORTE_APP_ADD_HOST, false, host, OPAL_STRING); - } - - /* check for env */ - ompi_info_get (array_of_info[i], "env", sizeof(host)-1, host, &flag); - if ( flag ) { - envars = opal_argv_split(host, '\n'); - for (j=0; NULL != envars[j]; j++) { - opal_argv_append_nosize(&app->env, envars[j]); - } - opal_argv_free(envars); - } - - /* 'path', 'arch', 'file', 'soft' -- to be implemented */ - - /* check for 'ompi_prefix' (OMPI-specific -- to effect the same - * behavior as --prefix option to orterun) - */ - ompi_info_get (array_of_info[i], "ompi_prefix", sizeof(prefix) - 1, prefix, &flag); - if ( flag ) { - orte_set_attribute(&app->attributes, ORTE_APP_PREFIX_DIR, false, prefix, OPAL_STRING); - } - - /* check for 'wdir' */ - ompi_info_get (array_of_info[i], "wdir", sizeof(cwd) - 1, cwd, &flag); - if ( flag ) { - app->cwd = strdup(cwd); - have_wdir = 1; - } - - /* check for 'mapper' */ - ompi_info_get(array_of_info[i], "mapper", sizeof(mapper) - 1, mapper, &flag); - if ( flag ) { - if (NULL == jdata->map) { - jdata->map = OBJ_NEW(orte_job_map_t); - if (NULL == jdata->map) { - ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); - return ORTE_ERR_OUT_OF_RESOURCE; - } - } - jdata->map->req_mapper = strdup(mapper); - } - - /* check for 'display_map' */ - ompi_info_get_bool(array_of_info[i], "display_map", &local_spawn, &flag); - if ( flag ) { - if (NULL == jdata->map) { - jdata->map = OBJ_NEW(orte_job_map_t); - if (NULL == jdata->map) { - ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); - return ORTE_ERR_OUT_OF_RESOURCE; - } - } - jdata->map->display_map = true; - } - - /* check for 'npernode' and 'ppr' */ - ompi_info_get (array_of_info[i], "npernode", sizeof(slot_list) - 1, slot_list, &flag); - if ( flag ) { - if (ORTE_SUCCESS != ompi_info_value_to_int(slot_list, &npernode)) { - ORTE_ERROR_LOG(ORTE_ERR_BAD_PARAM); - return ORTE_ERR_BAD_PARAM; - } - if (NULL == jdata->map) { - jdata->map = OBJ_NEW(orte_job_map_t); - if (NULL == jdata->map) { - ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); - return ORTE_ERR_OUT_OF_RESOURCE; - } - } - if (ORTE_MAPPING_POLICY_IS_SET(jdata->map->mapping)) { - /* not allowed to provide multiple mapping policies */ - return OMPI_ERROR; - } - ORTE_SET_MAPPING_DIRECTIVE(jdata->map->mapping, ORTE_MAPPING_PPR); - asprintf(&(jdata->map->ppr), "%d:n", npernode); - } - ompi_info_get (array_of_info[i], "pernode", sizeof(slot_list) - 1, slot_list, &flag); - if ( flag ) { - if (NULL == jdata->map) { - jdata->map = OBJ_NEW(orte_job_map_t); - if (NULL == jdata->map) { - ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); - return ORTE_ERR_OUT_OF_RESOURCE; - } - } - if (ORTE_MAPPING_POLICY_IS_SET(jdata->map->mapping)) { - /* not allowed to provide multiple mapping policies */ - return OMPI_ERROR; - } - ORTE_SET_MAPPING_DIRECTIVE(jdata->map->mapping, ORTE_MAPPING_PPR); - jdata->map->ppr = strdup("1:n"); - } - ompi_info_get (array_of_info[i], "ppr", sizeof(slot_list) - 1, slot_list, &flag); - if ( flag ) { - if (NULL == jdata->map) { - jdata->map = OBJ_NEW(orte_job_map_t); - if (NULL == jdata->map) { - ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); - return ORTE_ERR_OUT_OF_RESOURCE; - } - } - if (ORTE_MAPPING_POLICY_IS_SET(jdata->map->mapping)) { - /* not allowed to provide multiple mapping policies */ - return OMPI_ERROR; - } - ORTE_SET_MAPPING_DIRECTIVE(jdata->map->mapping, ORTE_MAPPING_PPR); - jdata->map->ppr = strdup(slot_list); - } - - /* check for 'map_by' */ - ompi_info_get(array_of_info[i], "map_by", sizeof(slot_list) - 1, slot_list, &flag); - if ( flag ) { - if (NULL == jdata->map) { - jdata->map = OBJ_NEW(orte_job_map_t); - if (NULL == jdata->map) { - ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); - return ORTE_ERR_OUT_OF_RESOURCE; - } - } - if (ORTE_MAPPING_POLICY_IS_SET(jdata->map->mapping)) { - /* not allowed to provide multiple mapping policies */ - return OMPI_ERROR; - } - if (ORTE_SUCCESS != (rc = orte_rmaps_base_set_mapping_policy(&jdata->map->mapping, - NULL, slot_list))) { - return rc; - } - } - - /* check for 'rank_by' */ - ompi_info_get(array_of_info[i], "rank_by", sizeof(slot_list) - 1, slot_list, &flag); - if ( flag ) { - if (NULL == jdata->map) { - jdata->map = OBJ_NEW(orte_job_map_t); - if (NULL == jdata->map) { - ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); - return ORTE_ERR_OUT_OF_RESOURCE; - } - } - if (ORTE_RANKING_POLICY_IS_SET(jdata->map->ranking)) { - /* not allowed to provide multiple ranking policies */ - return OMPI_ERROR; - } - if (ORTE_SUCCESS != (rc = orte_rmaps_base_set_ranking_policy(&jdata->map->ranking, - jdata->map->mapping, slot_list))) { - return rc; - } - } - -#if OPAL_HAVE_HWLOC - /* check for 'bind_to' */ - ompi_info_get(array_of_info[i], "bind_to", sizeof(slot_list) - 1, slot_list, &flag); - if ( flag ) { - if (NULL == jdata->map) { - jdata->map = OBJ_NEW(orte_job_map_t); - if (NULL == jdata->map) { - ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); - return ORTE_ERR_OUT_OF_RESOURCE; - } - } - if (OPAL_BINDING_POLICY_IS_SET(jdata->map->binding)) { - /* not allowed to provide multiple binding policies */ - return OMPI_ERROR; - } - if (ORTE_SUCCESS != (rc = opal_hwloc_base_set_binding_policy(&jdata->map->binding, slot_list))) { - return rc; - } - } -#endif - - /* check for 'preload_binary' */ - ompi_info_get_bool(array_of_info[i], "ompi_preload_binary", &local_spawn, &flag); - if ( flag ) { - orte_set_attribute(&app->attributes, ORTE_APP_PRELOAD_BIN, false, NULL, OPAL_BOOL); - } - - /* check for 'preload_files' */ - ompi_info_get (array_of_info[i], "ompi_preload_files", sizeof(cwd) - 1, cwd, &flag); - if ( flag ) { - orte_set_attribute(&app->attributes, ORTE_APP_PRELOAD_FILES, false, cwd, OPAL_STRING); - } - - /* see if this is a non-mpi job - if so, then set the flag so ORTE - * knows what to do - */ - ompi_info_get_bool(array_of_info[i], "ompi_non_mpi", &non_mpi, &flag); - if (flag && non_mpi) { - orte_set_attribute(&jdata->attributes, ORTE_JOB_NON_ORTE_JOB, false, NULL, OPAL_BOOL); - } - - /* see if this is an MCA param that the user wants applied to the child job */ - ompi_info_get (array_of_info[i], "ompi_param", sizeof(params) - 1, params, &flag); - if ( flag ) { - opal_argv_append_unique_nosize(&app->env, params, true); - } - - /* see if user specified what to do with stdin - defaults to - * not forwarding stdin to child processes - */ - ompi_info_get (array_of_info[i], "ompi_stdin_target", sizeof(stdin_target) - 1, stdin_target, &flag); - if ( flag ) { - if (0 == strcmp(stdin_target, "all")) { - jdata->stdin_target = ORTE_VPID_WILDCARD; - } else if (0 == strcmp(stdin_target, "none")) { - jdata->stdin_target = ORTE_VPID_INVALID; - } else { - jdata->stdin_target = strtoul(stdin_target, NULL, 10); - } - } - } - - /* default value: If the user did not tell us where to look for the - * executable, we assume the current working directory - */ - if ( !have_wdir ) { - if (OMPI_SUCCESS != (rc = opal_getcwd(cwd, OPAL_PATH_MAX))) { - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(jdata); - opal_progress_event_users_decrement(); - return rc; - } - app->cwd = strdup(cwd); - } - - /* leave the map info alone - the launcher will - * decide where to put things - */ - } /* for (i = 0 ; i < count ; ++i) */ - - /* default the personality */ - if (NULL == jdata->personality) { - jdata->personality = strdup("ompi"); - } - - /* spawn procs */ - rc = orte_plm.spawn(jdata); - OBJ_RELEASE(jdata); - - if (ORTE_SUCCESS != rc) { - ORTE_ERROR_LOG(rc); - opal_progress_event_users_decrement(); - return MPI_ERR_SPAWN; - } - - return OMPI_SUCCESS; -} - -/* - * The port_name is constructed to support the ability - * to route messages between different jobs. Messages - * between job families are routed via their respective HNPs - * to reduce connection count and to support connect/accept. - * Thus, the port_name consists of three fields: - * (a) the contact info of the process opening the port. This - * is provided in case the routed module wants to communicate - * directly between the procs. - * (b) the tag of the port. The reason for adding the tag is - * to make the port unique for multi-threaded scenarios. - * (c) the contact info for the job's HNP. This will be - * used to route messages between job families - * - * Construction of the port name is done here - as opposed to - * in the routed module itself - because two mpiruns using different - * routed modules could exchange the port name (via pubsub). The - * format of the port name must, therefore, be universal. - * - * Optionally can provide a tag to be used - otherwise, we supply the - * next dynamically assigned tag - */ -static int open_port(char *port_name, orte_rml_tag_t given_tag) -{ - char *rml_uri=NULL; - int rc, len; - char tag[12]; - - /* if we are a singleton and the supporting HNP hasn't - * been spawned, then do so now - */ - if ((orte_process_info.proc_type & ORTE_PROC_SINGLETON) && - !orte_routing_is_enabled) { - if (ORTE_SUCCESS != (rc = orte_plm_base_fork_hnp())) { - ORTE_ERROR_LOG(rc); - return OMPI_ERROR; - } - orte_routing_is_enabled = true; - /* need to init_routes again to redirect messages - * thru the HNP - */ - orte_routed.init_routes(ORTE_PROC_MY_NAME->jobid, NULL); - } - - if (NULL == orte_process_info.my_hnp_uri) { - rc = OMPI_ERR_NOT_AVAILABLE; - ORTE_ERROR_LOG(rc); - goto cleanup; - } - - if (NULL == (rml_uri = orte_rml.get_contact_info())) { - rc = OMPI_ERROR; - ORTE_ERROR_LOG(rc); - goto cleanup; - } - - if (ORTE_RML_TAG_INVALID == given_tag) { - OPAL_THREAD_LOCK(&ompi_dpm_port_mutex); - snprintf(tag, 12, "%d", next_tag); - next_tag++; - OPAL_THREAD_UNLOCK(&ompi_dpm_port_mutex); - } else { - snprintf(tag, 12, "%d", given_tag); - } - - - len = strlen(orte_process_info.my_hnp_uri) + strlen(rml_uri) + strlen(tag); - - /* if the overall port name is too long, we abort */ - if (len > (MPI_MAX_PORT_NAME-1)) { - rc = OMPI_ERR_VALUE_OUT_OF_BOUNDS; - goto cleanup; - } - - /* assemble the port name */ - snprintf(port_name, MPI_MAX_PORT_NAME, "%s+%s:%s", orte_process_info.my_hnp_uri, rml_uri, tag); - rc = OMPI_SUCCESS; - -cleanup: - if (NULL != rml_uri) { - free(rml_uri); - } - - return rc; -} - - -static int route_to_port(char *rml_uri, orte_process_name_t *rproc) -{ - opal_buffer_t route; - int rc; - - /* We need to ask the routed module to init_routes so it can do the - * right thing. In most cases, it will route any messages to the - * proc through our HNP - however, this is NOT the case in all - * circumstances, so we need to let the routed module decide what - * to do. - */ - /* pack a cmd so the buffer can be unpacked correctly */ - OBJ_CONSTRUCT(&route, opal_buffer_t); - - /* pack the provided uri */ - opal_dss.pack(&route, &rml_uri, 1, OPAL_STRING); - - /* init the route */ - if (ORTE_SUCCESS != (rc = orte_routed.init_routes(rproc->jobid, &route))) { - ORTE_ERROR_LOG(rc); - } - OBJ_DESTRUCT(&route); - - /* nothing more to do here */ - return rc; -} - -static int parse_port_name(const char *port_name, - char **hnp_uri, - char **rml_uri, - orte_rml_tag_t *ptag) -{ - char *tmpstring=NULL, *ptr; - int tag; - int rc; - - /* don't mangle the port name */ - tmpstring = strdup(port_name); - if (NULL == tmpstring) { - return OMPI_ERR_OUT_OF_RESOURCE; - } - - /* find the ':' demarking the RML tag we added to the end */ - if (NULL == (ptr = strrchr(tmpstring, ':'))) { - rc = OMPI_ERR_NOT_FOUND; - goto cleanup; - } - - /* terminate the port_name at that location */ - *ptr = '\0'; - ptr++; - - /* convert the RML tag */ - sscanf(ptr,"%d", &tag); - - /* now split out the second field - the uri of the remote proc */ - if (NULL == (ptr = strchr(tmpstring, '+'))) { - rc = OMPI_ERR_NOT_FOUND; - goto cleanup; - } - *ptr = '\0'; - ptr++; - - /* save that info */ - if(NULL != hnp_uri) *hnp_uri = tmpstring; - else free(tmpstring); - if(NULL != rml_uri) *rml_uri = strdup(ptr); - if(NULL != ptag) *ptag = tag; - - return OMPI_SUCCESS; - -cleanup: - /* release the tmp storage */ - free(tmpstring); - return rc; -} - -static int close_port(const char *port_name) -{ - /* nothing to do here - user is responsible for the memory */ - return OMPI_SUCCESS; -} - -static int dyn_init(void) -{ - char *port_name=NULL; - int root=0, rc; - bool send_first = true; - ompi_communicator_t *newcomm=NULL; - - /* if env-variable is set, we are a dynamically spawned - * child - parse port and call comm_connect_accept */ - if (NULL == (port_name = ompi_dpm_base_dyn_init())) { - /* nothing to do */ - return OMPI_SUCCESS; - } - - OPAL_OUTPUT_VERBOSE((1, ompi_dpm_base_framework.framework_output, - "%s dpm:orte:dyn_init with port %s", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - port_name)); - - rc = connect_accept (MPI_COMM_WORLD, root, port_name, send_first, &newcomm); - free(port_name); - if (OMPI_SUCCESS != rc) { - return rc; - } - - /* originally, we set comm_parent to comm_null (in comm_init), - * now we have to decrease the reference counters to the according - * objects - */ - OBJ_RELEASE(ompi_mpi_comm_parent->c_local_group); - OBJ_RELEASE(ompi_mpi_comm_parent->error_handler); - OBJ_RELEASE(ompi_mpi_comm_parent); - - /* Set the parent communicator */ - ompi_mpi_comm_parent = newcomm; - - /* Set name for debugging purposes */ - snprintf(newcomm->c_name, MPI_MAX_OBJECT_NAME, "MPI_COMM_PARENT"); - newcomm->c_flags |= OMPI_COMM_NAMEISSET; - - return OMPI_SUCCESS; -} - - -/* - * finalize the module - */ -static int finalize(void) -{ - OBJ_DESTRUCT(&ompi_dpm_port_mutex); - OPAL_LIST_DESTRUCT(&orte_dpm_acceptors); - OPAL_LIST_DESTRUCT(&orte_dpm_connectors); - return OMPI_SUCCESS; -} - -static void timeout_cb(int fd, short args, void *cbdata) -{ - orte_dpm_prequest_t *req = (orte_dpm_prequest_t*)cbdata; - - /* remove the request from the list */ - OPAL_THREAD_LOCK(&ompi_dpm_port_mutex); - opal_list_remove_item(&orte_dpm_connectors, &req->super); - OPAL_THREAD_UNLOCK(&ompi_dpm_port_mutex); - - /* this connection request failed - notify the caller */ - req->cbfunc(MPI_COMM_NULL, NULL, req->cbdata); - - /* cleanup */ - OBJ_RELEASE(req); -} - -static void process_request(orte_process_name_t* sender, - opal_buffer_t *buffer, - bool connector, - ompi_communicator_t **newcomm, - ompi_proc_t **proct) -{ - ompi_communicator_t *newcomp=MPI_COMM_NULL; - ompi_group_t *group=MPI_COMM_SELF->c_local_group; - ompi_group_t *new_group_pointer; - ompi_proc_t **rprocs=NULL; - ompi_proc_t **new_proc_list=NULL; - int new_proc_len; - opal_buffer_t *xfer; - int cnt, rc; - uint32_t id; - - OPAL_OUTPUT_VERBOSE((2, ompi_dpm_base_framework.framework_output, - "%s dpm:pconprocess: PROCESS REQUEST: %s", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - connector ? "connector" : "acceptor")); - - /* if we are the acceptor, unpack the remote peer's request id */ - if (!connector) { - cnt=1; - if (OPAL_SUCCESS != (rc = opal_dss.unpack(buffer, &id, &cnt, OPAL_UINT32))) { - ORTE_ERROR_LOG(rc); - goto cleanup; - } - OPAL_OUTPUT_VERBOSE((2, ompi_dpm_base_framework.framework_output, - "%s dpm:pconprocess: PROCESS REQUEST ID: %d", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), id)); - } - - /* unpack the proc info */ - if (OMPI_SUCCESS != (rc = ompi_proc_unpack(buffer, 1, &rprocs, false, &new_proc_len, &new_proc_list))) { - ORTE_ERROR_LOG(rc); - return; - } - - /* If we added new procs, we need to unpack the modex info - * and then call PML add_procs - */ - if (0 < new_proc_len) { - OPAL_OUTPUT_VERBOSE((3, ompi_dpm_base_framework.framework_output, - "%s dpm:pconprocess: process modex", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME))); - - OPAL_OUTPUT_VERBOSE((3, ompi_dpm_base_framework.framework_output, - "%s dpm:pconprocess: adding procs", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME))); - if (OMPI_SUCCESS != (rc = MCA_PML_CALL(add_procs(new_proc_list, new_proc_len)))) { - ORTE_ERROR_LOG(rc); - goto cleanup; - } - - OPAL_OUTPUT_VERBOSE((3, ompi_dpm_base_framework.framework_output, - "%s dpm:orte:pconnect new procs added", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME))); - - } - - /* if we are the acceptor, we now have to send the requestor our - * info so we can collaborate on setup of the communicator - we - * must wait until this point so the route can be initiated, if - * required - */ - if (!connector) { - xfer = OBJ_NEW(opal_buffer_t); - /* pack the request id */ - if (ORTE_SUCCESS != (rc = opal_dss.pack(xfer, &id, 1, OPAL_UINT32))) { - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(xfer); - goto cleanup; - } - /* pack the remaining info */ - if (ORTE_SUCCESS != ompi_proc_pack(group->grp_proc_pointers, 1, true, xfer)) { - OBJ_RELEASE(xfer); - goto cleanup; - } - /* send to requestor */ - if (ORTE_SUCCESS != (rc = orte_rml.send_buffer_nb(sender, xfer, OMPI_RML_PCONNECT_TAG, - orte_rml_send_callback, NULL))) { - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(xfer); - goto cleanup; - } - } - - /* allocate a new group */ - new_group_pointer=ompi_group_allocate(1); - if( NULL == new_group_pointer ) { - rc = OMPI_ERR_OUT_OF_RESOURCE; - goto cleanup; - } - - /* put group element in the list */ - new_group_pointer->grp_proc_pointers[0] = rprocs[0]; - - /* increment proc reference counter */ - ompi_group_increment_proc_count(new_group_pointer); - - OPAL_OUTPUT_VERBOSE((3, ompi_dpm_base_framework.framework_output, - "%s dpm:pconprocess setting up communicator", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME))); - - /* set up communicator structure */ - rc = ompi_comm_set(&newcomp, /* new comm */ - MPI_COMM_SELF, /* old comm */ - 1, /* local_size */ - NULL, /* local_procs */ - 1, /* remote_size */ - NULL, /* remote_procs */ - NULL, /* attrs */ - MPI_COMM_SELF->error_handler, /* error handler */ - NULL, /* topo component */ - group, /* local group */ - new_group_pointer /* remote group */ - ); - if (NULL == newcomp) { - rc = OMPI_ERR_OUT_OF_RESOURCE; - goto cleanup; - } - - ompi_group_decrement_proc_count (new_group_pointer); - OBJ_RELEASE(new_group_pointer); - new_group_pointer = MPI_GROUP_NULL; - - /* return the communicator */ - *newcomm = newcomp; - *proct = rprocs[0]; - rc = OMPI_SUCCESS; - - cleanup: - if (NULL != rprocs) { - free(rprocs); - } - if (NULL != new_proc_list) { - free(new_proc_list); - } - if (OMPI_SUCCESS != rc && MPI_COMM_NULL != newcomp) { - OBJ_RELEASE(newcomp); - } -} - -static void connect_complete(int status, orte_process_name_t* sender, - opal_buffer_t* buffer, orte_rml_tag_t tag, - void* cbdata) -{ - ompi_communicator_t *newcomm=MPI_COMM_NULL; - ompi_proc_t *proct=NULL; - orte_dpm_prequest_t *req=NULL, *rptr; - int rc, cnt; - uint32_t id; - - OPAL_OUTPUT_VERBOSE((3, ompi_dpm_base_framework.framework_output, - "%s dpm:pconnect: starting", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME))); - - /* unpack the request id */ - cnt=1; - if (OPAL_SUCCESS != (rc = opal_dss.unpack(buffer, &id, &cnt, OPAL_UINT32))) { - ORTE_ERROR_LOG(rc); - goto cleanup; - } - - /* find this request on the list */ - req = NULL; - OPAL_THREAD_LOCK(&ompi_dpm_port_mutex); - OPAL_LIST_FOREACH(rptr, &orte_dpm_connectors, orte_dpm_prequest_t) { - if (id == rptr->id) { - req = rptr; - break; - } - } - if (NULL == req) { - /* unknown request */ - opal_output(0, "%s dpm:pconnect: received unknown id %u from %s", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), id, - ORTE_NAME_PRINT(sender)); - OPAL_THREAD_UNLOCK(&ompi_dpm_port_mutex); - return; - } - /* remove the request from the list */ - opal_list_remove_item(&orte_dpm_connectors, &req->super); - OPAL_THREAD_UNLOCK(&ompi_dpm_port_mutex); - - OPAL_OUTPUT_VERBOSE((3, ompi_dpm_base_framework.framework_output, - "%s dpm:pconnect: found request %d", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), id)); - - if (req->event_active) { - /* release the timeout */ - opal_event_del(&req->ev); - } - - /* process the request - as the initiator, we will send first - * for communicator creation - */ - process_request(sender, buffer, true, &newcomm, &proct); - /* notify the MPI layer */ - req->cbfunc(newcomm, proct, req->cbdata); - - cleanup: - if (NULL != req) { - OBJ_RELEASE(req); - } -} - -static int dpm_pconnect(char *port, - struct timeval *timeout, - ompi_dpm_base_paccept_connect_callback_fn_t cbfunc, - void *cbdata) -{ - char *hnp_uri, *rml_uri; - orte_rml_tag_t tag; - int rc; - orte_dpm_prequest_t *connector; - orte_process_name_t peer; - ompi_group_t *group=MPI_COMM_SELF->c_local_group; - opal_buffer_t *buf; - - /* separate the string into the HNP and RML URI and tag */ - if (ORTE_SUCCESS != (rc = parse_port_name(port, &hnp_uri, &rml_uri, &tag))) { - ORTE_ERROR_LOG(rc); - return rc; - } - /* extract the originating proc's name */ - if (ORTE_SUCCESS != (rc = orte_rml_base_parse_uris(rml_uri, &peer, NULL))) { - ORTE_ERROR_LOG(rc); - free(hnp_uri); free(rml_uri); - return rc; - } - /* make sure we can route rml messages to the destination job */ - if (ORTE_SUCCESS != (rc = route_to_port(hnp_uri, &peer))) { - ORTE_ERROR_LOG(rc); - free(hnp_uri); free(rml_uri); - return rc; - } - opal_output(0, "dpm:pconnect requesting connect to %s on tag %d", - ORTE_NAME_PRINT(&peer), tag); - - free(hnp_uri); free(rml_uri); - - /* create a message to the remote peer */ - buf = OBJ_NEW(opal_buffer_t); - - /* track the connection request */ - connector = OBJ_NEW(orte_dpm_prequest_t); - connector->tag = tag; - connector->cbfunc = cbfunc; - connector->cbdata = cbdata; - OPAL_THREAD_LOCK(&ompi_dpm_port_mutex); - connector->id = next_preq++; - opal_list_append(&orte_dpm_connectors, &connector->super); - OPAL_THREAD_UNLOCK(&ompi_dpm_port_mutex); - - /* pack my request id */ - if (ORTE_SUCCESS != (rc = opal_dss.pack(buf, &connector->id, 1, OPAL_UINT32))) { - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(buf); - OPAL_THREAD_LOCK(&ompi_dpm_port_mutex); - opal_list_remove_item(&orte_dpm_connectors, &connector->super); - OPAL_THREAD_UNLOCK(&ompi_dpm_port_mutex); - OBJ_RELEASE(connector); - return rc; - } - /* pack the request info */ - if (ORTE_SUCCESS != ompi_proc_pack(group->grp_proc_pointers, 1, true, buf)) { - OBJ_RELEASE(buf); - OPAL_THREAD_LOCK(&ompi_dpm_port_mutex); - opal_list_remove_item(&orte_dpm_connectors, &connector->super); - OPAL_THREAD_UNLOCK(&ompi_dpm_port_mutex); - OBJ_RELEASE(connector); - return rc; - } - - /* setup the timeout, if requested */ - if (NULL != timeout) { - opal_output(0, "dpm:pconnect setting timeout"); - opal_event_evtimer_set(orte_event_base, - &connector->ev, timeout_cb, connector); - opal_event_set_priority(&connector->ev, ORTE_ERROR_PRI); - opal_event_evtimer_add(&connector->ev, timeout); - connector->event_active = true; - } else { - connector->event_active = false; - } - - /* send it to our new friend */ - OPAL_OUTPUT_VERBOSE((2, ompi_dpm_base_framework.framework_output, - "%s dpm:pconnect sending connect to %s on tag %d", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - ORTE_NAME_PRINT(&peer), tag)); - - if (ORTE_SUCCESS != (rc = orte_rml.send_buffer_nb(&peer, buf, tag, - orte_rml_send_callback, NULL))) { - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(buf); - } - - return rc; -} - -static void paccept_recv(int status, - orte_process_name_t* peer, - struct opal_buffer_t* buffer, - orte_rml_tag_t tag, - void* cbdata) -{ - orte_dpm_prequest_t *acceptor = (orte_dpm_prequest_t*)cbdata; - ompi_communicator_t *newcomm=MPI_COMM_NULL; - ompi_proc_t *proct=NULL; - - OPAL_OUTPUT_VERBOSE((2, ompi_dpm_base_framework.framework_output, - "%s dpm:paccept recvd request from %s", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - ORTE_NAME_PRINT(peer))); - - /* process the request - as the acceptor, we will recv first - * on communicator formation - */ - process_request(peer, buffer, false, &newcomm, &proct); - /* if we succeeded, notify the MPI layer */ - if (MPI_COMM_NULL != newcomm) { - acceptor->cbfunc(newcomm, proct, acceptor->cbdata); - } -} - -static int dpm_paccept(char *port, - ompi_dpm_base_paccept_connect_callback_fn_t cbfunc, - void *cbdata) -{ - orte_rml_tag_t tag; - int rc; - orte_dpm_prequest_t *acceptor; - - /* extract the RML tag from the port name - it's the only part we need */ - if (OMPI_SUCCESS != (rc = parse_port_name(port, NULL, NULL, &tag))) { - return rc; - } - - /* track the accept request */ - acceptor = OBJ_NEW(orte_dpm_prequest_t); - acceptor->tag = tag; - acceptor->cbfunc = cbfunc; - acceptor->cbdata = cbdata; - OPAL_THREAD_LOCK(&ompi_dpm_port_mutex); - opal_list_append(&orte_dpm_acceptors, &acceptor->super); - OPAL_THREAD_UNLOCK(&ompi_dpm_port_mutex); - - /* register a recv for this tag */ - orte_rml.recv_buffer_nb(ORTE_NAME_WILDCARD, tag, - ORTE_RML_PERSISTENT, - paccept_recv, acceptor); - - return OMPI_SUCCESS; -} - -static void dpm_pclose(char *port) -{ - orte_rml_tag_t tag; - orte_dpm_prequest_t *rptr; - - /* extract the RML tag from the port name - it's the only part we need */ - if (OMPI_SUCCESS != parse_port_name(port, NULL, NULL, &tag)) { - return; - } - - OPAL_THREAD_LOCK(&ompi_dpm_port_mutex); - OPAL_LIST_FOREACH(rptr, &orte_dpm_acceptors, orte_dpm_prequest_t) { - if (tag == rptr->tag) { - /* found it */ - opal_list_remove_item(&orte_dpm_acceptors, &rptr->super); - orte_rml.recv_cancel(ORTE_NAME_WILDCARD, tag); - OBJ_RELEASE(rptr); - break; - } - } - OPAL_THREAD_UNLOCK(&ompi_dpm_port_mutex); -} diff --git a/ompi/mca/dpm/orte/dpm_orte.h b/ompi/mca/dpm/orte/dpm_orte.h deleted file mode 100644 index 2badcdcb519..00000000000 --- a/ompi/mca/dpm/orte/dpm_orte.h +++ /dev/null @@ -1,36 +0,0 @@ -/* - * Copyright (c) 2004-2005 The Trustees of Indiana University. - * All rights reserved. - * Copyright (c) 2004-2006 The Trustees of the University of Tennessee. - * All rights reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * Copyright (c) 2007 Los Alamos National Security, LLC. All rights - * reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#ifndef OMPI_DPM_ORTE_H -#define OMPI_DPM_ORTE_H - -#include "ompi_config.h" - - -#include "ompi/mca/dpm/dpm.h" - -BEGIN_C_DECLS - -/* access to module */ -extern ompi_dpm_base_module_t ompi_dpm_orte_module; - -OMPI_MODULE_DECLSPEC extern ompi_dpm_base_component_t mca_dpm_orte_component; - -END_C_DECLS - -#endif /* OMPI_DPM_ORTE_H */ diff --git a/ompi/mca/dpm/orte/dpm_orte_component.c b/ompi/mca/dpm/orte/dpm_orte_component.c deleted file mode 100644 index 184742c4a1b..00000000000 --- a/ompi/mca/dpm/orte/dpm_orte_component.c +++ /dev/null @@ -1,67 +0,0 @@ -/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ -/* - * Copyright (c) 2004-2008 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2011 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2006 The Regents of the University of California. - * All rights reserved. - * Copyright (c) 2015 Los Alamos National Security, LLC. All rights - * reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#include "ompi_config.h" -#include "ompi/constants.h" - -#include "dpm_orte.h" - -static int dpm_orte_component_open(void); -static int dpm_orte_component_close(void); -static int dpm_orte_component_query(mca_base_module_t **module, int *priority); - -ompi_dpm_base_component_t mca_dpm_orte_component = { - /* First, the mca_base_component_t struct containing meta - information about the component itself */ - - .base_version = { - OMPI_DPM_BASE_VERSION_2_0_0, - - .mca_component_name = "orte", - MCA_BASE_MAKE_VERSION(component, OMPI_MAJOR_VERSION, OMPI_MINOR_VERSION, - OMPI_RELEASE_VERSION), - .mca_open_component = dpm_orte_component_open, - .mca_close_component = dpm_orte_component_close, - .mca_query_component = dpm_orte_component_query, - }, - .base_data = { - /* This component is checkpoint ready */ - MCA_BASE_METADATA_PARAM_CHECKPOINT - }, -}; - - -int dpm_orte_component_open(void) -{ - return OMPI_SUCCESS; -} - -int dpm_orte_component_close(void) -{ - return OMPI_SUCCESS; -} - -static int dpm_orte_component_query(mca_base_module_t **module, int *priority) -{ - *priority = 50; - *module = (mca_base_module_t *) &ompi_dpm_orte_module; - return OMPI_SUCCESS; -} diff --git a/ompi/mca/dpm/orte/help-ompi-dpm-orte.txt b/ompi/mca/dpm/orte/help-ompi-dpm-orte.txt deleted file mode 100644 index 68bd9103774..00000000000 --- a/ompi/mca/dpm/orte/help-ompi-dpm-orte.txt +++ /dev/null @@ -1,43 +0,0 @@ -# -*- text -*- -# -# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana -# University Research and Technology -# Corporation. All rights reserved. -# Copyright (c) 2004-2005 The University of Tennessee and The University -# of Tennessee Research Foundation. All rights -# reserved. -# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, -# University of Stuttgart. All rights reserved. -# Copyright (c) 2004-2005 The Regents of the University of California. -# All rights reserved. -# Copyright (c) 2007 Cisco Systems, Inc. All rights reserved. -# $COPYRIGHT$ -# -# Additional copyrights may follow -# -# $HEADER$ -# -# This is the US/English general help file for Open MPI. -# -[dpm-orte:no-server] -Process rank %ld attempted to %s a global ompi_server that -could not be contacted. This is typically caused by either not -specifying the contact info for the server, or by the server not -currently executing. If you did specify the contact info for a -server, please check to see that the server is running and start -it again (or have your sys admin start it) if it isn't. - -[dpm-orte:unknown-order] -Process rank %ld attempted to lookup a value but provided an -unrecognized order parameter. Order parameters are used to tell Open -MPI if it should first look for the requested value locally (i.e., from -the current job) or from a global ompi_server. Accepted order -parameters are "local" and "global", respectively. - -[dpm-orte:too-many-orders] -Process rank %ld attempted to lookup a value but provided too many -order parameters (%ld found). Order parameters are used to tell -Open MPI if it should first look for the requested value locally -(i.e., from the current job) or from a global ompi_server. Accepted -order parameters are "local" and "global", respectively, and each can -only be specified once. diff --git a/ompi/mca/dpm/orte/owner.txt b/ompi/mca/dpm/orte/owner.txt deleted file mode 100644 index 4ad6f408ca3..00000000000 --- a/ompi/mca/dpm/orte/owner.txt +++ /dev/null @@ -1,7 +0,0 @@ -# -# owner/status file -# owner: institution that is responsible for this package -# status: e.g. active, maintenance, unmaintained -# -owner: INTEL -status: maintenance diff --git a/ompi/mca/fbtl/Makefile.am b/ompi/mca/fbtl/Makefile.am index 358c4cdab4b..6e023ec073e 100644 --- a/ompi/mca/fbtl/Makefile.am +++ b/ompi/mca/fbtl/Makefile.am @@ -5,16 +5,16 @@ # Copyright (c) 2004-2005 The University of Tennessee and The University # of Tennessee Research Foundation. All rights # reserved. -# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, # University of Stuttgart. All rights reserved. # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. # Copyright (c) 2008-2011 University of Houston. All rights reserved. # Copyright (c) 2010 Cisco Systems, Inc. All rights reserved. # $COPYRIGHT$ -# +# # Additional copyrights may follow -# +# # $HEADER$ # diff --git a/ompi/mca/fbtl/base/Makefile.am b/ompi/mca/fbtl/base/Makefile.am index 0d539aec08a..7758c02f745 100644 --- a/ompi/mca/fbtl/base/Makefile.am +++ b/ompi/mca/fbtl/base/Makefile.am @@ -5,15 +5,15 @@ # Copyright (c) 2004-2005 The University of Tennessee and The University # of Tennessee Research Foundation. All rights # reserved. -# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, # University of Stuttgart. All rights reserved. # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. # Copyright (c) 2008-2011 University of Houston. All rights reserved. # $COPYRIGHT$ -# +# # Additional copyrights may follow -# +# # $HEADER$ # diff --git a/ompi/mca/fbtl/base/base.h b/ompi/mca/fbtl/base/base.h index 1fb4f3c3db2..bc79eeb6430 100644 --- a/ompi/mca/fbtl/base/base.h +++ b/ompi/mca/fbtl/base/base.h @@ -5,20 +5,20 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2008 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2008-2011 University of Houston. All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ -/** +/** * @file * * MCA fbtl base framework public interface functions. diff --git a/ompi/mca/fbtl/base/fbtl_base_file_select.c b/ompi/mca/fbtl/base/fbtl_base_file_select.c index d7ea46eb7ed..36393fc6d91 100644 --- a/ompi/mca/fbtl/base/fbtl_base_file_select.c +++ b/ompi/mca/fbtl/base/fbtl_base_file_select.c @@ -5,15 +5,15 @@ * Copyright (c) 2004-2011 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2008-2011 University of Houston. All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -30,7 +30,7 @@ #include "ompi/mca/io/ompio/io_ompio.h" /* - * This structure is needed so that we can close the modules + * This structure is needed so that we can close the modules * which are not selected but were opened. mca_base_modules_close * which does this job for us requires a opal_list_t which contains * these modules @@ -58,20 +58,20 @@ static OBJ_CLASS_INSTANCE(queried_module_t, opal_list_item_t, NULL, NULL); * 4. Select the module with the highest priority * 5. Call the init function on the selected module so that it does the * right setup for the file - * 6. Call finalize on all the other modules which returned + * 6. Call finalize on all the other modules which returned * their module but were unfortunate to not get selected - */ + */ int mca_fbtl_base_file_select (struct mca_io_ompio_file_t *file, - mca_base_component_t *preferred) + mca_base_component_t *preferred) { - int priority; - int best_priority; - opal_list_item_t *item; + int priority; + int best_priority; + opal_list_item_t *item; mca_base_component_list_item_t *cli; - mca_fbtl_base_component_t *component; + mca_fbtl_base_component_t *component; mca_fbtl_base_component_t *best_component; - mca_fbtl_base_module_t *module; + mca_fbtl_base_module_t *module; opal_list_t queried; queried_module_t *om; char *str; @@ -81,22 +81,22 @@ int mca_fbtl_base_file_select (struct mca_io_ompio_file_t *file, provided then it should be used (if possible) */ if (NULL != preferred) { - + /* We have a preferred component. Check if it is available and if so, whether it wants to run */ - + str = &(preferred->mca_component_name[0]); - + opal_output_verbose(10, ompi_fbtl_base_framework.framework_output, "fbtl:base:file_select: Checking preferred component: %s", str); - - /* query the component for its priority and get its module + + /* query the component for its priority and get its module structure. This is necessary to proceed */ - + component = (mca_fbtl_base_component_t *)preferred; module = component->fbtlm_file_query (file, &priority); - if (NULL != module && + if (NULL != module && NULL != module->fbtl_module_init) { /* this query seems to have returned something legitimate @@ -109,7 +109,7 @@ int mca_fbtl_base_file_select (struct mca_io_ompio_file_t *file, file->f_fbtl_component = preferred; return module->fbtl_module_init(file); - } + } /* His preferred component is present, but is unable to * run. This is not a good sign. We should try selecting * some other component We let it fall through and select @@ -126,7 +126,7 @@ int mca_fbtl_base_file_select (struct mca_io_ompio_file_t *file, * All we need to do is to go through the list of available * components and find the one which has the highest priority and * use that for this file - */ + */ best_component = NULL; best_priority = -1; @@ -148,21 +148,21 @@ int mca_fbtl_base_file_select (struct mca_io_ompio_file_t *file, } else { /* * call the query function and see what it returns - */ + */ module = component->fbtlm_file_query (file, &priority); if (NULL == module || NULL == module->fbtl_module_init) { /* * query did not return any action which can be used - */ + */ opal_output_verbose(10, ompi_fbtl_base_framework.framework_output, "select: query returned failure"); } else { opal_output_verbose(10, ompi_fbtl_base_framework.framework_output, "select: query returned priority %d", priority); - /* + /* * is this the best component we have found till now? */ if (priority > best_priority) { @@ -179,8 +179,8 @@ int mca_fbtl_base_file_select (struct mca_io_ompio_file_t *file, return OMPI_ERR_OUT_OF_RESOURCE; } om->om_component = component; - om->om_module = module; - opal_list_append(&queried, (opal_list_item_t *)om); + om->om_module = module; + opal_list_append(&queried, (opal_list_item_t *)om); } /* end else of if (NULL == module) */ } /* end else of if (NULL == component->fbtlm_init) */ } /* end for ... end of traversal */ @@ -207,7 +207,7 @@ int mca_fbtl_base_file_select (struct mca_io_ompio_file_t *file, * returned their priorities from the query. We now have to * unquery() those components which have not been selected and * init() the component which was selected - */ + */ while (NULL != (item = opal_list_remove_first(&queried))) { om = (queried_module_t *) item; if (om->om_component == best_component) { @@ -219,7 +219,7 @@ int mca_fbtl_base_file_select (struct mca_io_ompio_file_t *file, * defined. Whereever a function pointer is null in the * module structure we need to fill it in with the base * structure function pointers. This is yet to be done - */ + */ /* * We don return here coz we still need to go through and @@ -248,7 +248,7 @@ int mca_fbtl_base_file_select (struct mca_io_ompio_file_t *file, } /* if not best component */ OBJ_RELEASE(om); } /* traversing through the entire list */ - + opal_output_verbose(10, ompi_fbtl_base_framework.framework_output, "select: component %s selected", best_component->fbtlm_version.mca_component_name); diff --git a/ompi/mca/fbtl/base/fbtl_base_file_unselect.c b/ompi/mca/fbtl/base/fbtl_base_file_unselect.c index cb47e69e9d6..280965f9347 100644 --- a/ompi/mca/fbtl/base/fbtl_base_file_unselect.c +++ b/ompi/mca/fbtl/base/fbtl_base_file_unselect.c @@ -5,15 +5,15 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2008-2011 University of Houston. All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ diff --git a/ompi/mca/fbtl/base/fbtl_base_find_available.c b/ompi/mca/fbtl/base/fbtl_base_find_available.c index 79fef6e22d6..c6ecabc3c71 100644 --- a/ompi/mca/fbtl/base/fbtl_base_find_available.c +++ b/ompi/mca/fbtl/base/fbtl_base_find_available.c @@ -5,15 +5,17 @@ * Copyright (c) 2004-2011 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2008-2011 University of Houston. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -25,6 +27,7 @@ #include "mpi.h" #include "ompi/constants.h" #include "opal/class/opal_list.h" +#include "opal/util/output.h" #include "ompi/mca/mca.h" #include "opal/mca/base/base.h" #include "ompi/mca/fbtl/fbtl.h" @@ -36,18 +39,18 @@ static int init_query(const mca_base_component_t *m, static int init_query_2_0_0(const mca_base_component_t *component, bool enable_progress_threads, bool enable_mpi_threads); - + int mca_fbtl_base_find_available(bool enable_progress_threads, bool enable_mpi_threads) { mca_base_component_list_item_t *cli, *next; - /* The list of components which we should check is already present - in mca_fbtl_base_components_opened, which was established in + /* The list of components which we should check is already present + in mca_fbtl_base_components_opened, which was established in mca_fbtl_base_open */ OPAL_LIST_FOREACH_SAFE(cli, next, &ompi_fbtl_base_framework.framework_components, mca_base_component_list_item_t) { - /* Now for this entry, we have to determine the thread level. Call + /* Now for this entry, we have to determine the thread level. Call a subroutine to do the job for us */ if (OMPI_SUCCESS != init_query(cli->cli_component, @@ -69,14 +72,14 @@ int mca_fbtl_base_find_available(bool enable_progress_threads, /* All done */ return OMPI_SUCCESS; } - - + + static int init_query(const mca_base_component_t *m, bool enable_progress_threads, - bool enable_mpi_threads) + bool enable_mpi_threads) { int ret; - + opal_output_verbose(10, ompi_fbtl_base_framework.framework_output, "fbtl:find_available: querying fbtl component %s", m->mca_component_name); @@ -115,11 +118,11 @@ static int init_query(const mca_base_component_t *m, static int init_query_2_0_0(const mca_base_component_t *component, bool enable_progress_threads, - bool enable_mpi_threads) + bool enable_mpi_threads) { - mca_fbtl_base_component_2_0_0_t *fbtl = + mca_fbtl_base_component_2_0_0_t *fbtl = (mca_fbtl_base_component_2_0_0_t *) component; - + return fbtl->fbtlm_init_query(enable_progress_threads, enable_mpi_threads); } diff --git a/ompi/mca/fbtl/base/fbtl_base_frame.c b/ompi/mca/fbtl/base/fbtl_base_frame.c index 69669225d72..028a98ba5ac 100644 --- a/ompi/mca/fbtl/base/fbtl_base_frame.c +++ b/ompi/mca/fbtl/base/fbtl_base_frame.c @@ -6,7 +6,7 @@ * Copyright (c) 2004-2011 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. @@ -16,9 +16,9 @@ * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ diff --git a/ompi/mca/fbtl/configure.m4 b/ompi/mca/fbtl/configure.m4 index 54b39dc63a5..803de5aaf79 100644 --- a/ompi/mca/fbtl/configure.m4 +++ b/ompi/mca/fbtl/configure.m4 @@ -1,22 +1,28 @@ # -*- shell-script -*- # -# Copyright (c) 2011 Cisco Systems, Inc. All rights reserved. +# Copyright (c) 2011 Cisco Systems, Inc. All rights reserved. +# Copyright (c) 2016 Research Organization for Information Science +# and Technology (RIST). All rights reserved. # # $COPYRIGHT$ -# +# # Additional copyrights may follow -# +# # $HEADER$ # # MCA_ompi_fbtl_CONFIG(project_name, framework_name) # ------------------------------------------- -AC_DEFUN([MCA_ompi_fbtl_CONFIG], +AC_DEFUN([MCA_ompi_fbtl_CONFIG], [ - # An AC-ARG-ENABLE for mpi-io was set in ompi/mca/io/configure.m4. - # If it's no, we shouldn't bother building anything in fcoll. - AS_IF([test "$enable_mpi_io" != "no"], - [want_mpi_io=1], - [want_mpi_io=0]) - MCA_CONFIGURE_FRAMEWORK([$1], [$2], [$want_mpi_io]) + OPAL_VAR_SCOPE_PUSH([want_io_ompio]) + + AS_IF([test "$enable_mpi_io" != "no" && + test "$enable_io_ompio" != "no"], + [want_io_ompio=1], + [want_io_ompio=0]) + + MCA_CONFIGURE_FRAMEWORK([$1], [$2], [$want_io_ompio]) + + OPAL_VAR_SCOPE_POP ]) diff --git a/ompi/mca/fbtl/fbtl.h b/ompi/mca/fbtl/fbtl.h index 14ac44612bd..83ddb7c8f33 100644 --- a/ompi/mca/fbtl/fbtl.h +++ b/ompi/mca/fbtl/fbtl.h @@ -6,7 +6,7 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. @@ -14,9 +14,9 @@ * Copyright (c) 2015 Los Alamos National Security, LLC. All rights * reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -45,15 +45,15 @@ struct mca_ompio_request_t; /* * The file byte transfer layer (fbtl) framework provides the abstraction - * for individual blocking and non-blocking read and write operations. - * The functionality provided by the interfaces in this module + * for individual blocking and non-blocking read and write operations. + * The functionality provided by the interfaces in this module * can be used to implement the corresponding operations in MPI I/O. - * Note however, that the interfaces are not a one-to-one mapping + * Note however, that the interfaces are not a one-to-one mapping * of the MPI individual read and write operations, since the fbtl framework * avoids using derived MPI datatypes. The step mapping/unrolling the MPI * derived data types into a vector of (offset into file, memory address, length) * is done in the OMPIO module of the IO framework. - * + * * These are the component function prototypes. These function pointers * go into the component structure. These functions (query() and finalize() * are called during fbtl_base_select(). Each component is query() ied @@ -76,11 +76,11 @@ struct mca_ompio_request_t; * **************** component struct ******************************* */ -typedef int (*mca_fbtl_base_component_init_query_1_0_0_fn_t) - (bool enable_progress_threads, +typedef int (*mca_fbtl_base_component_init_query_1_0_0_fn_t) + (bool enable_progress_threads, bool enable_mpi_threads); -typedef struct mca_fbtl_base_module_1_0_0_t * +typedef struct mca_fbtl_base_module_1_0_0_t * (*mca_fbtl_base_component_file_query_1_0_0_fn_t) (struct mca_io_ompio_file_t *file, int *priority); @@ -95,7 +95,7 @@ typedef int (*mca_fbtl_base_component_file_unquery_1_0_0_fn_t) struct mca_fbtl_base_component_2_0_0_t { mca_base_component_t fbtlm_version; mca_base_component_data_t fbtlm_data; - + mca_fbtl_base_component_init_query_1_0_0_fn_t fbtlm_init_query; mca_fbtl_base_component_file_query_1_0_0_fn_t fbtlm_file_query; mca_fbtl_base_component_file_unquery_1_0_0_fn_t fbtlm_file_unquery; @@ -146,7 +146,7 @@ struct mca_fbtl_base_module_1_0_0_t { */ mca_fbtl_base_module_init_1_0_0_fn_t fbtl_module_init; mca_fbtl_base_module_finalize_1_0_0_fn_t fbtl_module_finalize; - + /* FBTL function pointers */ mca_fbtl_base_module_preadv_fn_t fbtl_preadv; mca_fbtl_base_module_ipreadv_fn_t fbtl_ipreadv; diff --git a/ompi/mca/fbtl/plfs/Makefile.am b/ompi/mca/fbtl/plfs/Makefile.am index f5298871eb3..68fb67d034d 100644 --- a/ompi/mca/fbtl/plfs/Makefile.am +++ b/ompi/mca/fbtl/plfs/Makefile.am @@ -5,15 +5,15 @@ # Copyright (c) 2004-2005 The University of Tennessee and The University # of Tennessee Research Foundation. All rights # reserved. -# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, # University of Stuttgart. All rights reserved. # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. # Copyright (c) 2008-2011 University of Houston. All rights reserved. # $COPYRIGHT$ -# +# # Additional copyrights may follow -# +# # $HEADER$ # diff --git a/ompi/mca/fbtl/plfs/configure.m4 b/ompi/mca/fbtl/plfs/configure.m4 index 85627198da6..c7502b51107 100644 --- a/ompi/mca/fbtl/plfs/configure.m4 +++ b/ompi/mca/fbtl/plfs/configure.m4 @@ -6,21 +6,21 @@ # Copyright (c) 2004-2005 The University of Tennessee and The University # of Tennessee Research Foundation. All rights # reserved. -# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, # University of Stuttgart. All rights reserved. # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. # Copyright (c) 2010 Cisco Systems, Inc. All rights reserved. # Copyright (c) 2008-2014 University of Houston. All rights reserved. # $COPYRIGHT$ -# +# # Additional copyrights may follow -# +# # $HEADER$ # -# MCA_fbtl_plfs_CONFIG(action-if-can-compile, +# MCA_fbtl_plfs_CONFIG(action-if-can-compile, # [action-if-cant-compile]) # ------------------------------------------------ AC_DEFUN([MCA_ompi_fbtl_plfs_CONFIG],[ diff --git a/ompi/mca/fbtl/plfs/fbtl_plfs.c b/ompi/mca/fbtl/plfs/fbtl_plfs.c index 5696cf02980..df4391a8f04 100644 --- a/ompi/mca/fbtl/plfs/fbtl_plfs.c +++ b/ompi/mca/fbtl/plfs/fbtl_plfs.c @@ -5,15 +5,15 @@ * Copyright (c) 2004-2006 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2008-2014 University of Houston. All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ * * These symbols are in a file by themselves to provide nice linker @@ -53,7 +53,7 @@ int mca_fbtl_plfs_component_init_query(bool enable_progress_threads, bool enable_mpi_threads) { /* Nothing to do */ return OMPI_SUCCESS; -} +} struct mca_fbtl_base_module_1_0_0_t * mca_fbtl_plfs_component_file_query (mca_io_ompio_file_t *fh, int *priority) { @@ -67,9 +67,9 @@ mca_fbtl_plfs_component_file_query (mca_io_ompio_file_t *fh, int *priority) { return &plfs; } -int mca_fbtl_plfs_component_file_unquery (mca_io_ompio_file_t *file) { +int mca_fbtl_plfs_component_file_unquery (mca_io_ompio_file_t *file) { /* This function might be needed for some purposes later. for now it - * does not have anything to do since there are no steps which need + * does not have anything to do since there are no steps which need * to be undone if this module is not selected */ return OMPI_SUCCESS; @@ -79,7 +79,7 @@ int mca_fbtl_plfs_module_init (mca_io_ompio_file_t *file) { return OMPI_SUCCESS; } - + int mca_fbtl_plfs_module_finalize (mca_io_ompio_file_t *file) { return OMPI_SUCCESS; } diff --git a/ompi/mca/fbtl/plfs/fbtl_plfs.h b/ompi/mca/fbtl/plfs/fbtl_plfs.h index b486a6753a7..40173b25245 100644 --- a/ompi/mca/fbtl/plfs/fbtl_plfs.h +++ b/ompi/mca/fbtl/plfs/fbtl_plfs.h @@ -5,15 +5,15 @@ * Copyright (c) 2004-2006 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2008-2014 University of Houston. All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -44,7 +44,7 @@ OMPI_MODULE_DECLSPEC extern mca_fbtl_base_component_2_0_0_t mca_fbtl_plfs_compon * ****************************************************************** * ********* functions which are implemented in this module ********* * ****************************************************************** - */ + */ ssize_t mca_fbtl_plfs_preadv (mca_io_ompio_file_t *file ); ssize_t mca_fbtl_plfs_pwritev (mca_io_ompio_file_t *file ); @@ -57,8 +57,8 @@ ssize_t mca_fbtl_plfs_ipwritev (mca_io_ompio_file_t *file, * ****************************************************************** * ************ functions implemented in this module end ************ * ****************************************************************** - */ - + */ + END_C_DECLS #endif /* MCA_FBTL_PLFS_H */ diff --git a/ompi/mca/fbtl/plfs/fbtl_plfs_component.c b/ompi/mca/fbtl/plfs/fbtl_plfs_component.c index 985b29db583..aa2b9f347a6 100644 --- a/ompi/mca/fbtl/plfs/fbtl_plfs_component.c +++ b/ompi/mca/fbtl/plfs/fbtl_plfs_component.c @@ -6,7 +6,7 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. @@ -14,9 +14,9 @@ * Copyright (c) 2015 Los Alamos National Security, LLC. All rights * reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ * * These symbols are in a file by themselves to provide nice linker diff --git a/ompi/mca/fbtl/plfs/fbtl_plfs_ipreadv.c b/ompi/mca/fbtl/plfs/fbtl_plfs_ipreadv.c index f4fce7cd9f1..9cb16785034 100644 --- a/ompi/mca/fbtl/plfs/fbtl_plfs_ipreadv.c +++ b/ompi/mca/fbtl/plfs/fbtl_plfs_ipreadv.c @@ -5,15 +5,15 @@ * Copyright (c) 2004-2011 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2008-2014 University of Houston. All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ diff --git a/ompi/mca/fbtl/plfs/fbtl_plfs_ipwritev.c b/ompi/mca/fbtl/plfs/fbtl_plfs_ipwritev.c index adf01c3252e..4365ac99238 100644 --- a/ompi/mca/fbtl/plfs/fbtl_plfs_ipwritev.c +++ b/ompi/mca/fbtl/plfs/fbtl_plfs_ipwritev.c @@ -5,15 +5,15 @@ * Copyright (c) 2004-2011 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2008-2014 University of Houston. All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ diff --git a/ompi/mca/fbtl/plfs/fbtl_plfs_preadv.c b/ompi/mca/fbtl/plfs/fbtl_plfs_preadv.c index 2ea3b02921c..26e60065a5a 100644 --- a/ompi/mca/fbtl/plfs/fbtl_plfs_preadv.c +++ b/ompi/mca/fbtl/plfs/fbtl_plfs_preadv.c @@ -5,15 +5,15 @@ * Copyright (c) 2004-2011 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2008-2014 University of Houston. All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -28,103 +28,27 @@ ssize_t mca_fbtl_plfs_preadv (mca_io_ompio_file_t *fh ) { - Plfs_fd *pfd = NULL; + Plfs_fd *pfd = fh->f_fs_ptr; plfs_error_t plfs_ret; - pfd = fh->f_fs_ptr; ssize_t total_bytes_read=0; - - int i, block=1; - struct iovec *iov = NULL; - int iov_count = 0; - OMPI_MPI_OFFSET_TYPE iov_offset = 0; + int i; + ssize_t bytes_read; if (NULL == fh->f_io_array) { return OMPI_ERROR; } - iov = (struct iovec *) malloc - (OMPIO_IOVEC_INITIAL_SIZE * sizeof (struct iovec)); - if (NULL == iov) { - opal_output(1, "OUT OF MEMORY\n"); - return OMPI_ERR_OUT_OF_RESOURCE; - } - for (i=0 ; if_num_of_io_entries ; i++) { - if (0 == iov_count) { - iov[iov_count].iov_base = fh->f_io_array[i].memory_address; - iov[iov_count].iov_len = fh->f_io_array[i].length; - iov_offset = (OMPI_MPI_OFFSET_TYPE)(intptr_t)fh->f_io_array[i].offset; - iov_count ++; - } - - if (OMPIO_IOVEC_INITIAL_SIZE*block <= iov_count) { - block ++; - iov = (struct iovec *)realloc - (iov, OMPIO_IOVEC_INITIAL_SIZE * block * - sizeof(struct iovec)); - if (NULL == iov) { - opal_output(1, "OUT OF MEMORY\n"); - return OMPI_ERR_OUT_OF_RESOURCE; - } - } - - if (fh->f_num_of_io_entries != i+1) { - if (((OMPI_MPI_OFFSET_TYPE)(intptr_t)fh->f_io_array[i].offset + - (OPAL_PTRDIFF_TYPE)fh->f_io_array[i].length) == - (OMPI_MPI_OFFSET_TYPE)(intptr_t)fh->f_io_array[i+1].offset) { - iov[iov_count].iov_base = - fh->f_io_array[i+1].memory_address; - iov[iov_count].iov_len = fh->f_io_array[i+1].length; - iov_count ++; - continue; - } - } - - // Find the total number of bytes to be read. - size_t bytes = 0; - for (int i = 0; i < iov_count; ++i) { - bytes += iov[i].iov_len; - } - - // Allocate a temporary buffer to hold the data - char *buffer; - buffer = (char *) malloc (bytes); - if (buffer == NULL) { - return OMPI_ERROR; - } - - // Read the data - ssize_t bytes_read; - plfs_ret = plfs_read( pfd, buffer, bytes, iov_offset, &bytes_read ); + plfs_ret = plfs_read( pfd, fh->f_io_array[i].memory_address, fh->f_io_array[i].length, + (off_t )fh->f_io_array[i].offset, &bytes_read ); if (PLFS_SUCCESS != plfs_ret) { opal_output(0, "fbtl_plfs_preadv: Error in plfs_read:\n%s\n", strplfserr(plfs_ret)); return OMPI_ERROR; } - + if (bytes_read < 0) return OMPI_ERROR; total_bytes_read += bytes_read; - // Copy the data from BUFFER into the memory specified by IOV - bytes = bytes_read; - for (int i = 0; i < iov_count; ++i) { - size_t copy = MIN (iov[i].iov_len, bytes); - (void) memcpy ((void *) iov[i].iov_base, (void *) buffer, copy); - buffer += copy; - bytes -= copy; - if (bytes == 0) { - break; - } - } - iov_count = 0; - if ( NULL != buffer ) { - free (buffer); - buffer=NULL; - } - } - - if (NULL != iov) { - free (iov); - iov = NULL; } return total_bytes_read; diff --git a/ompi/mca/fbtl/plfs/fbtl_plfs_pwritev.c b/ompi/mca/fbtl/plfs/fbtl_plfs_pwritev.c index de2849960bc..cd63c9db5a2 100644 --- a/ompi/mca/fbtl/plfs/fbtl_plfs_pwritev.c +++ b/ompi/mca/fbtl/plfs/fbtl_plfs_pwritev.c @@ -5,15 +5,15 @@ * Copyright (c) 2004-2011 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2008-2014 University of Houston. All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -28,103 +28,27 @@ ssize_t mca_fbtl_plfs_pwritev (mca_io_ompio_file_t *fh ) { - Plfs_fd *pfd = NULL; + Plfs_fd *pfd = fh->f_fs_ptr; plfs_error_t plfs_ret; - pfd = fh->f_fs_ptr; ssize_t total_bytes_written=0; - - int i, block = 1; - struct iovec *iov = NULL; - int iov_count = 0; - OMPI_MPI_OFFSET_TYPE iov_offset = 0; + ssize_t bytes_written; + int i; if (NULL == fh->f_io_array) { return OMPI_ERROR; } - iov = (struct iovec *) malloc - (OMPIO_IOVEC_INITIAL_SIZE * sizeof (struct iovec)); - if (NULL == iov) { - opal_output(1, "OUT OF MEMORY\n"); - return OMPI_ERR_OUT_OF_RESOURCE; - } - for (i=0 ; if_num_of_io_entries ; i++) { - if (0 == iov_count) { - iov[iov_count].iov_base = fh->f_io_array[i].memory_address; - iov[iov_count].iov_len = fh->f_io_array[i].length; - iov_offset = (OMPI_MPI_OFFSET_TYPE)(intptr_t)fh->f_io_array[i].offset; - iov_count ++; - } - - if (OMPIO_IOVEC_INITIAL_SIZE*block <= iov_count) { - block ++; - iov = (struct iovec *)realloc - (iov, OMPIO_IOVEC_INITIAL_SIZE * block * - sizeof(struct iovec)); - if (NULL == iov) { - opal_output(1, "OUT OF MEMORY\n"); - return OMPI_ERR_OUT_OF_RESOURCE; - } - } - - if (fh->f_num_of_io_entries != i+1) { - if (((OMPI_MPI_OFFSET_TYPE)(intptr_t)fh->f_io_array[i].offset + - (OPAL_PTRDIFF_TYPE)fh->f_io_array[i].length) == - (OMPI_MPI_OFFSET_TYPE)(intptr_t)fh->f_io_array[i+1].offset) { - iov[iov_count].iov_base = - fh->f_io_array[i+1].memory_address; - iov[iov_count].iov_len = fh->f_io_array[i+1].length; - iov_count ++; - continue; - } - } - - // Find the total number of bytes to be written. - size_t bytes = 0; - for (int i = 0; i < iov_count; ++i) { - bytes += iov[i].iov_len; - } - - // Allocate a temporary buffer to hold the data - char *buffer=NULL; - buffer = (char *) malloc (bytes); - if (buffer == NULL) { - return OMPI_ERROR; - } - - // Copy the data into BUFFER. - size_t to_copy = bytes; - char *bp = buffer; - for (int i = 0; i < iov_count; ++i) { - size_t copy = MIN (iov[i].iov_len, to_copy); - bp = mempcpy ((void *) bp, (void *) iov[i].iov_base, copy); - to_copy -= copy; - if (to_copy == 0) { - break; - } - } - - // Write the data - ssize_t bytes_written; - - plfs_ret = plfs_write( pfd, buffer, bytes, iov_offset, 0, &bytes_written ); + plfs_ret = plfs_write( pfd, fh->f_io_array[i].memory_address, + fh->f_io_array[i].length, + (off_t) fh->f_io_array[i].offset, + fh->f_rank, &bytes_written ); if (PLFS_SUCCESS != plfs_ret) { opal_output(0, "fbtl_plfs_pwritev: Error in plfs_write:\n%s\n", strplfserr(plfs_ret)); return OMPI_ERROR; } total_bytes_written += bytes_written; - iov_count = 0; - if ( NULL != buffer ) { - free ( buffer ); - buffer=NULL; - } - } - - if (NULL != iov) { - free (iov); - iov = NULL; } - + return total_bytes_written; } diff --git a/ompi/mca/fbtl/posix/Makefile.am b/ompi/mca/fbtl/posix/Makefile.am index 1a27c995c69..2c806f08e00 100644 --- a/ompi/mca/fbtl/posix/Makefile.am +++ b/ompi/mca/fbtl/posix/Makefile.am @@ -5,15 +5,15 @@ # Copyright (c) 2004-2005 The University of Tennessee and The University # of Tennessee Research Foundation. All rights # reserved. -# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, # University of Stuttgart. All rights reserved. # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. # Copyright (c) 2008-2011 University of Houston. All rights reserved. # $COPYRIGHT$ -# +# # Additional copyrights may follow -# +# # $HEADER$ # diff --git a/ompi/mca/fbtl/posix/configure.m4 b/ompi/mca/fbtl/posix/configure.m4 index 8e7c2e034ea..8a6d548ea58 100644 --- a/ompi/mca/fbtl/posix/configure.m4 +++ b/ompi/mca/fbtl/posix/configure.m4 @@ -6,20 +6,20 @@ # Copyright (c) 2004-2005 The University of Tennessee and The University # of Tennessee Research Foundation. All rights # reserved. -# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, # University of Stuttgart. All rights reserved. # Copyright (c) 2004-2012 The Regents of the University of California. # All rights reserved. # Copyright (c) 2010-2014 Cisco Systems, Inc. All rights reserved. # Copyright (c) 2008-2011 University of Houston. All rights reserved. # $COPYRIGHT$ -# +# # Additional copyrights may follow -# +# # $HEADER$ # -# MCA_fbtl_posix_CONFIG(action-if-can-compile, +# MCA_fbtl_posix_CONFIG(action-if-can-compile, # [action-if-cant-compile]) # ------------------------------------------------ AC_DEFUN([MCA_ompi_fbtl_posix_CONFIG],[ diff --git a/ompi/mca/fbtl/posix/fbtl_posix.c b/ompi/mca/fbtl/posix/fbtl_posix.c index b38280e650a..4c6d21ab011 100644 --- a/ompi/mca/fbtl/posix/fbtl_posix.c +++ b/ompi/mca/fbtl/posix/fbtl_posix.c @@ -5,15 +5,15 @@ * Copyright (c) 2004-2006 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2008-2015 University of Houston. All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ * * These symbols are in a file by themselves to provide nice linker @@ -71,9 +71,9 @@ static mca_fbtl_base_module_1_0_0_t posix = { int mca_fbtl_posix_component_init_query(bool enable_progress_threads, bool enable_mpi_threads) { /* Nothing to do */ - + return OMPI_SUCCESS; -} +} struct mca_fbtl_base_module_1_0_0_t * mca_fbtl_posix_component_file_query (mca_io_ompio_file_t *fh, int *priority) { @@ -88,16 +88,16 @@ mca_fbtl_posix_component_file_query (mca_io_ompio_file_t *fh, int *priority) { return &posix; } -int mca_fbtl_posix_component_file_unquery (mca_io_ompio_file_t *file) { +int mca_fbtl_posix_component_file_unquery (mca_io_ompio_file_t *file) { /* This function might be needed for some purposes later. for now it - * does not have anything to do since there are no steps which need + * does not have anything to do since there are no steps which need * to be undone if this module is not selected */ return OMPI_SUCCESS; } int mca_fbtl_posix_module_init (mca_io_ompio_file_t *file) { - + #if defined (FBTL_POSIX_HAVE_AIO) long val = sysconf(_SC_AIO_MAX); if ( -1 != val ) { @@ -107,7 +107,7 @@ int mca_fbtl_posix_module_init (mca_io_ompio_file_t *file) { return OMPI_SUCCESS; } - + int mca_fbtl_posix_module_finalize (mca_io_ompio_file_t *file) { return OMPI_SUCCESS; } @@ -125,9 +125,9 @@ bool mca_fbtl_posix_progress ( mca_ompio_request_t *req) if ( 0 == data->aio_req_status[i]){ data->aio_open_reqs--; lcount++; - /* assuming right now that aio_return will return + /* assuming right now that aio_return will return ** the number of bytes written/read and not an error code, - ** since aio_error should have returned an error in that + ** since aio_error should have returned an error in that ** case and not 0 ( which means request is complete) */ data->aio_total_len += aio_return (&data->aio_reqs[i]); @@ -137,7 +137,7 @@ bool mca_fbtl_posix_progress ( mca_ompio_request_t *req) continue; } else { - /* an error occured. Mark the request done, but + /* an error occured. Mark the request done, but set an error code in the status */ req->req_ompi.req_status.MPI_ERROR = OMPI_ERROR; req->req_ompi.req_status._ucount = data->aio_total_len; @@ -161,7 +161,7 @@ bool mca_fbtl_posix_progress ( mca_ompio_request_t *req) } else { data->aio_last_active_req = data->aio_req_count; - } + } for ( i=data->aio_first_active_req; i< data->aio_last_active_req; i++ ) { if ( FBTL_POSIX_READ == data->aio_req_type ) { if (-1 == aio_read(&data->aio_reqs[i])) { @@ -180,7 +180,7 @@ bool mca_fbtl_posix_progress ( mca_ompio_request_t *req) printf("posting new batch: first=%d last=%d\n", data->aio_first_active_req, data->aio_last_active_req ); #endif } - + if ( 0 == data->aio_open_reqs ) { /* all pending operations are finished for this request */ req->req_ompi.req_status.MPI_ERROR = OMPI_SUCCESS; diff --git a/ompi/mca/fbtl/posix/fbtl_posix.h b/ompi/mca/fbtl/posix/fbtl_posix.h index cb8b16a3175..df6fd29e4fb 100644 --- a/ompi/mca/fbtl/posix/fbtl_posix.h +++ b/ompi/mca/fbtl/posix/fbtl_posix.h @@ -5,15 +5,15 @@ * Copyright (c) 2004-2006 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2008-2015 University of Houston. All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -46,7 +46,7 @@ OMPI_MODULE_DECLSPEC extern mca_fbtl_base_component_2_0_0_t mca_fbtl_posix_compo * ****************************************************************** * ********* functions which are implemented in this module ********* * ****************************************************************** - */ + */ ssize_t mca_fbtl_posix_preadv (mca_io_ompio_file_t *file ); ssize_t mca_fbtl_posix_pwritev (mca_io_ompio_file_t *file ); @@ -57,7 +57,7 @@ ssize_t mca_fbtl_posix_ipwritev (mca_io_ompio_file_t *file, bool mca_fbtl_posix_progress ( mca_ompio_request_t *req); void mca_fbtl_posix_request_free ( mca_ompio_request_t *req); - + struct mca_fbtl_posix_request_data_t { int aio_req_count; /* total number of aio reqs */ int aio_open_reqs; /* number of unfinished reqs */ @@ -82,8 +82,8 @@ typedef struct mca_fbtl_posix_request_data_t mca_fbtl_posix_request_data_t; * ****************************************************************** * ************ functions implemented in this module end ************ * ****************************************************************** - */ - + */ + END_C_DECLS #endif /* MCA_FBTL_POSIX_H */ diff --git a/ompi/mca/fbtl/posix/fbtl_posix_component.c b/ompi/mca/fbtl/posix/fbtl_posix_component.c index 7dfb5d6eb3f..6ffceaea807 100644 --- a/ompi/mca/fbtl/posix/fbtl_posix_component.c +++ b/ompi/mca/fbtl/posix/fbtl_posix_component.c @@ -6,7 +6,7 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. @@ -14,9 +14,9 @@ * Copyright (c) 2015 Los Alamos National Security, LLC. All rights * reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ * * These symbols are in a file by themselves to provide nice linker diff --git a/ompi/mca/fbtl/posix/fbtl_posix_ipreadv.c b/ompi/mca/fbtl/posix/fbtl_posix_ipreadv.c index ee4ac2b0345..00eaedeaf74 100644 --- a/ompi/mca/fbtl/posix/fbtl_posix_ipreadv.c +++ b/ompi/mca/fbtl/posix/fbtl_posix_ipreadv.c @@ -5,7 +5,7 @@ * Copyright (c) 2004-2011 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. @@ -13,9 +13,9 @@ * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -46,13 +46,13 @@ ssize_t mca_fbtl_posix_ipreadv (mca_io_ompio_file_t *fh, opal_output (1,"could not allocate memory\n"); return 0; } - + data->aio_req_count = fh->f_num_of_io_entries; data->aio_open_reqs = fh->f_num_of_io_entries; data->aio_req_type = FBTL_POSIX_READ; data->aio_req_chunks = fbtl_posix_max_aio_active_reqs; data->aio_total_len = 0; - data->aio_reqs = (struct aiocb *) malloc (sizeof(struct aiocb) * + data->aio_reqs = (struct aiocb *) malloc (sizeof(struct aiocb) * fh->f_num_of_io_entries); if (NULL == data->aio_reqs) { opal_output(1, "OUT OF MEMORY\n"); @@ -85,7 +85,7 @@ ssize_t mca_fbtl_posix_ipreadv (mca_io_ompio_file_t *fh, } else { data->aio_last_active_req = data->aio_req_count; - } + } for (i=0; i < data->aio_last_active_req; i++) { if (-1 == aio_read(&data->aio_reqs[i])) { opal_output(1, "aio_read() error: %s", strerror(errno)); diff --git a/ompi/mca/fbtl/posix/fbtl_posix_ipwritev.c b/ompi/mca/fbtl/posix/fbtl_posix_ipwritev.c index 058b5eb723d..1d869c2a756 100644 --- a/ompi/mca/fbtl/posix/fbtl_posix_ipwritev.c +++ b/ompi/mca/fbtl/posix/fbtl_posix_ipwritev.c @@ -5,7 +5,7 @@ * Copyright (c) 2004-2011 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. @@ -13,9 +13,9 @@ * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -45,13 +45,13 @@ ssize_t mca_fbtl_posix_ipwritev (mca_io_ompio_file_t *fh, opal_output (1,"could not allocate memory\n"); return 0; } - + data->aio_req_count = fh->f_num_of_io_entries; data->aio_open_reqs = fh->f_num_of_io_entries; data->aio_req_type = FBTL_POSIX_WRITE; data->aio_req_chunks = fbtl_posix_max_aio_active_reqs; data->aio_total_len = 0; - data->aio_reqs = (struct aiocb *) malloc (sizeof(struct aiocb) * + data->aio_reqs = (struct aiocb *) malloc (sizeof(struct aiocb) * fh->f_num_of_io_entries); if (NULL == data->aio_reqs) { opal_output(1, "OUT OF MEMORY\n"); @@ -84,9 +84,9 @@ ssize_t mca_fbtl_posix_ipwritev (mca_io_ompio_file_t *fh, } else { data->aio_last_active_req = data->aio_req_count; - } + } - for (i=0; i < data->aio_last_active_req; i++) { + for (i=0; i < data->aio_last_active_req; i++) { if (-1 == aio_write(&data->aio_reqs[i])) { opal_output(1, "aio_write() error: %s", strerror(errno)); free(data->aio_req_status); diff --git a/ompi/mca/fbtl/posix/fbtl_posix_preadv.c b/ompi/mca/fbtl/posix/fbtl_posix_preadv.c index 7fd8b0ab9df..ceb8b1d984c 100644 --- a/ompi/mca/fbtl/posix/fbtl_posix_preadv.c +++ b/ompi/mca/fbtl/posix/fbtl_posix_preadv.c @@ -5,7 +5,7 @@ * Copyright (c) 2004-2011 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. @@ -13,9 +13,9 @@ * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -36,12 +36,12 @@ ssize_t mca_fbtl_posix_preadv (mca_io_ompio_file_t *fh ) int iov_count = 0; OMPI_MPI_OFFSET_TYPE iov_offset = 0; ssize_t bytes_read=0, ret_code=0; - + if (NULL == fh->f_io_array) { return OMPI_ERROR; } - iov = (struct iovec *) malloc + iov = (struct iovec *) malloc (OMPIO_IOVEC_INITIAL_SIZE * sizeof (struct iovec)); if (NULL == iov) { opal_output(1, "OUT OF MEMORY\n"); @@ -55,7 +55,7 @@ ssize_t mca_fbtl_posix_preadv (mca_io_ompio_file_t *fh ) iov_offset = (OMPI_MPI_OFFSET_TYPE)(intptr_t)fh->f_io_array[i].offset; iov_count ++; } - + if (OMPIO_IOVEC_INITIAL_SIZE*block <= iov_count) { block ++; iov = (struct iovec *)realloc @@ -66,20 +66,20 @@ ssize_t mca_fbtl_posix_preadv (mca_io_ompio_file_t *fh ) return OMPI_ERR_OUT_OF_RESOURCE; } } - + if (fh->f_num_of_io_entries != i+1) { - if (((((OMPI_MPI_OFFSET_TYPE)(intptr_t)fh->f_io_array[i].offset + - (OPAL_PTRDIFF_TYPE)fh->f_io_array[i].length) == - (OMPI_MPI_OFFSET_TYPE)(intptr_t)fh->f_io_array[i+1].offset)) && - (iov_count < IOV_MAX ) ){ - iov[iov_count].iov_base = + if (((((OMPI_MPI_OFFSET_TYPE)(intptr_t)fh->f_io_array[i].offset + + (OPAL_PTRDIFF_TYPE)fh->f_io_array[i].length) == + (OMPI_MPI_OFFSET_TYPE)(intptr_t)fh->f_io_array[i+1].offset)) && + (iov_count < IOV_MAX ) ){ + iov[iov_count].iov_base = fh->f_io_array[i+1].memory_address; iov[iov_count].iov_len = fh->f_io_array[i+1].length; iov_count ++; continue; } } - + if (-1 == lseek (fh->fd, iov_offset, SEEK_SET)) { opal_output(1, "lseek:%s", strerror(errno)); free(iov); diff --git a/ompi/mca/fbtl/posix/fbtl_posix_pwritev.c b/ompi/mca/fbtl/posix/fbtl_posix_pwritev.c index bcf616693b2..5208716d111 100644 --- a/ompi/mca/fbtl/posix/fbtl_posix_pwritev.c +++ b/ompi/mca/fbtl/posix/fbtl_posix_pwritev.c @@ -5,7 +5,7 @@ * Copyright (c) 2004-2011 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. @@ -13,9 +13,9 @@ * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -43,7 +43,7 @@ ssize_t mca_fbtl_posix_pwritev(mca_io_ompio_file_t *fh ) return OMPI_ERROR; } - iov = (struct iovec *) malloc + iov = (struct iovec *) malloc (OMPIO_IOVEC_INITIAL_SIZE * sizeof (struct iovec)); if (NULL == iov) { opal_output(1, "OUT OF MEMORY\n"); @@ -57,7 +57,7 @@ ssize_t mca_fbtl_posix_pwritev(mca_io_ompio_file_t *fh ) iov_offset = (OMPI_MPI_OFFSET_TYPE)(intptr_t)fh->f_io_array[i].offset; iov_count ++; } - + if (OMPIO_IOVEC_INITIAL_SIZE*block <= iov_count) { block ++; iov = (struct iovec *)realloc @@ -68,13 +68,13 @@ ssize_t mca_fbtl_posix_pwritev(mca_io_ompio_file_t *fh ) return OMPI_ERR_OUT_OF_RESOURCE; } } - + if (fh->f_num_of_io_entries != i+1) { - if ( (((OMPI_MPI_OFFSET_TYPE)(intptr_t)fh->f_io_array[i].offset + - (OPAL_PTRDIFF_TYPE)fh->f_io_array[i].length) == - (OMPI_MPI_OFFSET_TYPE)(intptr_t)fh->f_io_array[i+1].offset) && + if ( (((OMPI_MPI_OFFSET_TYPE)(intptr_t)fh->f_io_array[i].offset + + (OPAL_PTRDIFF_TYPE)fh->f_io_array[i].length) == + (OMPI_MPI_OFFSET_TYPE)(intptr_t)fh->f_io_array[i+1].offset) && (iov_count < IOV_MAX )) { - iov[iov_count].iov_base = + iov[iov_count].iov_base = fh->f_io_array[i+1].memory_address; iov[iov_count].iov_len = fh->f_io_array[i+1].length; iov_count ++; @@ -82,16 +82,16 @@ ssize_t mca_fbtl_posix_pwritev(mca_io_ompio_file_t *fh ) } } /* - printf ("RANK: %d Entries: %d count: %d\n", + printf ("RANK: %d Entries: %d count: %d\n", fh->f_rank, fh->f_num_of_io_entries, iov_count); for (j=0 ; jfd, iov_offset, SEEK_SET)) { opal_output(1, "lseek:%s", strerror(errno)); diff --git a/ompi/mca/fbtl/pvfs2/Makefile.am b/ompi/mca/fbtl/pvfs2/Makefile.am index 116dbcd0c59..fc877c819c1 100644 --- a/ompi/mca/fbtl/pvfs2/Makefile.am +++ b/ompi/mca/fbtl/pvfs2/Makefile.am @@ -5,15 +5,15 @@ # Copyright (c) 2004-2005 The University of Tennessee and The University # of Tennessee Research Foundation. All rights # reserved. -# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, # University of Stuttgart. All rights reserved. # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. # Copyright (c) 2008-2011 University of Houston. All rights reserved. # $COPYRIGHT$ -# +# # Additional copyrights may follow -# +# # $HEADER$ # diff --git a/ompi/mca/fbtl/pvfs2/configure.m4 b/ompi/mca/fbtl/pvfs2/configure.m4 index a5bef3b077b..6fea62d4d07 100644 --- a/ompi/mca/fbtl/pvfs2/configure.m4 +++ b/ompi/mca/fbtl/pvfs2/configure.m4 @@ -6,21 +6,21 @@ # Copyright (c) 2004-2005 The University of Tennessee and The University # of Tennessee Research Foundation. All rights # reserved. -# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, # University of Stuttgart. All rights reserved. # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. # Copyright (c) 2010 Cisco Systems, Inc. All rights reserved. # Copyright (c) 2008-2012 University of Houston. All rights reserved. # $COPYRIGHT$ -# +# # Additional copyrights may follow -# +# # $HEADER$ # -# MCA_fbtl_pvfs2_CONFIG(action-if-can-compile, +# MCA_fbtl_pvfs2_CONFIG(action-if-can-compile, # [action-if-cant-compile]) # ------------------------------------------------ AC_DEFUN([MCA_ompi_fbtl_pvfs2_CONFIG],[ diff --git a/ompi/mca/fbtl/pvfs2/fbtl_pvfs2.c b/ompi/mca/fbtl/pvfs2/fbtl_pvfs2.c index cd017918e56..3289fcd62a4 100644 --- a/ompi/mca/fbtl/pvfs2/fbtl_pvfs2.c +++ b/ompi/mca/fbtl/pvfs2/fbtl_pvfs2.c @@ -5,15 +5,15 @@ * Copyright (c) 2004-2006 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2008-2014 University of Houston. All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ * * These symbols are in a file by themselves to provide nice linker @@ -52,9 +52,9 @@ static mca_fbtl_base_module_1_0_0_t pvfs2 = { int mca_fbtl_pvfs2_component_init_query(bool enable_progress_threads, bool enable_mpi_threads) { /* Nothing to do */ - + return OMPI_SUCCESS; -} +} struct mca_fbtl_base_module_1_0_0_t * mca_fbtl_pvfs2_component_file_query (mca_io_ompio_file_t *fh, int *priority) { @@ -69,9 +69,9 @@ mca_fbtl_pvfs2_component_file_query (mca_io_ompio_file_t *fh, int *priority) { return &pvfs2; } -int mca_fbtl_pvfs2_component_file_unquery (mca_io_ompio_file_t *file) { +int mca_fbtl_pvfs2_component_file_unquery (mca_io_ompio_file_t *file) { /* This function might be needed for some purposes later. for now it - * does not have anything to do since there are no steps which need + * does not have anything to do since there are no steps which need * to be undone if this module is not selected */ return OMPI_SUCCESS; @@ -81,7 +81,7 @@ int mca_fbtl_pvfs2_module_init (mca_io_ompio_file_t *file) { return OMPI_SUCCESS; } - + int mca_fbtl_pvfs2_module_finalize (mca_io_ompio_file_t *file) { return OMPI_SUCCESS; } diff --git a/ompi/mca/fbtl/pvfs2/fbtl_pvfs2.h b/ompi/mca/fbtl/pvfs2/fbtl_pvfs2.h index 5bf2ac5ab56..eab4e2c1dde 100644 --- a/ompi/mca/fbtl/pvfs2/fbtl_pvfs2.h +++ b/ompi/mca/fbtl/pvfs2/fbtl_pvfs2.h @@ -5,15 +5,15 @@ * Copyright (c) 2004-2006 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2008-2014 University of Houston. All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -55,7 +55,7 @@ OMPI_MODULE_DECLSPEC extern mca_fbtl_base_component_2_0_0_t mca_fbtl_pvfs2_compo * ****************************************************************** * ********* functions which are implemented in this module ********* * ****************************************************************** - */ + */ ssize_t mca_fbtl_pvfs2_preadv (mca_io_ompio_file_t *file); ssize_t mca_fbtl_pvfs2_pwritev (mca_io_ompio_file_t *file); @@ -68,8 +68,8 @@ ssize_t mca_fbtl_pvfs2_ipwritev (mca_io_ompio_file_t *file, * ****************************************************************** * ************ functions implemented in this module end ************ * ****************************************************************** - */ - + */ + END_C_DECLS #endif /* MCA_FBTL_PVFS2_H */ diff --git a/ompi/mca/fbtl/pvfs2/fbtl_pvfs2_component.c b/ompi/mca/fbtl/pvfs2/fbtl_pvfs2_component.c index 34ac9ce4ddc..45e0a58b73b 100644 --- a/ompi/mca/fbtl/pvfs2/fbtl_pvfs2_component.c +++ b/ompi/mca/fbtl/pvfs2/fbtl_pvfs2_component.c @@ -6,7 +6,7 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. @@ -14,9 +14,9 @@ * Copyright (c) 2015 Los Alamos National Security, LLC. All rights * reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ * * These symbols are in a file by themselves to provide nice linker diff --git a/ompi/mca/fbtl/pvfs2/fbtl_pvfs2_ipreadv.c b/ompi/mca/fbtl/pvfs2/fbtl_pvfs2_ipreadv.c index 33794d4ad63..75a5edee547 100644 --- a/ompi/mca/fbtl/pvfs2/fbtl_pvfs2_ipreadv.c +++ b/ompi/mca/fbtl/pvfs2/fbtl_pvfs2_ipreadv.c @@ -5,15 +5,15 @@ * Copyright (c) 2004-2011 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2008-2011 University of Houston. All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ diff --git a/ompi/mca/fbtl/pvfs2/fbtl_pvfs2_ipwritev.c b/ompi/mca/fbtl/pvfs2/fbtl_pvfs2_ipwritev.c index 525c62bc3d5..0915f3a0c73 100644 --- a/ompi/mca/fbtl/pvfs2/fbtl_pvfs2_ipwritev.c +++ b/ompi/mca/fbtl/pvfs2/fbtl_pvfs2_ipwritev.c @@ -5,15 +5,15 @@ * Copyright (c) 2004-2011 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2008-2011 University of Houston. All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ diff --git a/ompi/mca/fbtl/pvfs2/fbtl_pvfs2_preadv.c b/ompi/mca/fbtl/pvfs2/fbtl_pvfs2_preadv.c index 59f225f9920..61e9e2460c7 100644 --- a/ompi/mca/fbtl/pvfs2/fbtl_pvfs2_preadv.c +++ b/ompi/mca/fbtl/pvfs2/fbtl_pvfs2_preadv.c @@ -5,20 +5,20 @@ * Copyright (c) 2004-2011 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2008-2014 University of Houston. All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ /* This code is based on the PVFS2 ADIO module in ROMIO - * Copyright (C) 1997 University of Chicago. + * Copyright (C) 1997 University of Chicago. * See COPYRIGHT notice in top-level directory. */ @@ -53,8 +53,8 @@ ssize_t mca_fbtl_pvfs2_preadv (mca_io_ompio_file_t *fh) for (i=0 ; if_num_of_io_entries ; i++) { if (fh->f_num_of_io_entries != i+1) { - if (((OMPI_MPI_OFFSET_TYPE)fh->f_io_array[i].offset + - (OPAL_PTRDIFF_TYPE)fh->f_io_array[i].length) == + if (((OMPI_MPI_OFFSET_TYPE)fh->f_io_array[i].offset + + (OPAL_PTRDIFF_TYPE)fh->f_io_array[i].length) == (OMPI_MPI_OFFSET_TYPE)fh->f_io_array[i+1].offset) { if (!merge) { merge_offset = (OMPI_MPI_OFFSET_TYPE) @@ -68,27 +68,27 @@ ssize_t mca_fbtl_pvfs2_preadv (mca_io_ompio_file_t *fh) } if (merge) { merge_buf = malloc (merge_length); - - ret = PVFS_Request_contiguous (merge_length, - PVFS_BYTE, + + ret = PVFS_Request_contiguous (merge_length, + PVFS_BYTE, &mem_req); if (ret != 0) { perror("PVFS_Request_contiguous() error"); return OMPI_ERROR; } - ret = PVFS_Request_contiguous (merge_length, - PVFS_BYTE, + ret = PVFS_Request_contiguous (merge_length, + PVFS_BYTE, &file_req); if (ret != 0) { perror("PVFS_Request_contiguous() error"); return OMPI_ERROR; } - ret = PVFS_sys_read (pvfs2_fs->object_ref, + ret = PVFS_sys_read (pvfs2_fs->object_ref, file_req, merge_offset, - merge_buf, + merge_buf, mem_req, - &(pvfs2_fs->credentials), + &(pvfs2_fs->credentials), &resp_io); if (ret != 0) { perror("PVFS_sys_write() error"); @@ -99,7 +99,7 @@ ssize_t mca_fbtl_pvfs2_preadv (mca_io_ompio_file_t *fh) k = 0; while (merge >= 0) { memcpy (fh->f_io_array[i-merge].memory_address, - merge_buf + k, + merge_buf + k, fh->f_io_array[i-merge].length); k += fh->f_io_array[i-merge].length; merge --; @@ -113,27 +113,27 @@ ssize_t mca_fbtl_pvfs2_preadv (mca_io_ompio_file_t *fh) } } else { - ret = PVFS_Request_contiguous (fh->f_io_array[i].length, - PVFS_BYTE, + ret = PVFS_Request_contiguous (fh->f_io_array[i].length, + PVFS_BYTE, &mem_req); if (ret != 0) { perror("PVFS_Request_contiguous() error"); return OMPI_ERROR; } - ret = PVFS_Request_contiguous (fh->f_io_array[i].length, - PVFS_BYTE, + ret = PVFS_Request_contiguous (fh->f_io_array[i].length, + PVFS_BYTE, &file_req); if (ret != 0) { perror("PVFS_Request_contiguous() error"); return OMPI_ERROR; } - ret = PVFS_sys_read (pvfs2_fs->object_ref, + ret = PVFS_sys_read (pvfs2_fs->object_ref, file_req, (OMPI_MPI_OFFSET_TYPE) fh ->f_io_array[i].offset, - fh->f_io_array[i].memory_address, + fh->f_io_array[i].memory_address, mem_req, - &(pvfs2_fs->credentials), + &(pvfs2_fs->credentials), &resp_io); if (ret != 0) { perror("PVFS_sys_write() error"); diff --git a/ompi/mca/fbtl/pvfs2/fbtl_pvfs2_pwritev.c b/ompi/mca/fbtl/pvfs2/fbtl_pvfs2_pwritev.c index dbc4a978136..31c5b46c5df 100644 --- a/ompi/mca/fbtl/pvfs2/fbtl_pvfs2_pwritev.c +++ b/ompi/mca/fbtl/pvfs2/fbtl_pvfs2_pwritev.c @@ -5,20 +5,20 @@ * Copyright (c) 2004-2011 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2008-2014 University of Houston. All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ /* This code is based on the PVFS2 ADIO module in ROMIO - * Copyright (C) 1997 University of Chicago. + * Copyright (C) 1997 University of Chicago. * See COPYRIGHT notice in top-level directory. */ @@ -54,8 +54,8 @@ ssize_t mca_fbtl_pvfs2_pwritev (mca_io_ompio_file_t *fh ) for (i=0 ; if_num_of_io_entries ; i++) { if (fh->f_num_of_io_entries != i+1) { - if (((OMPI_MPI_OFFSET_TYPE)fh->f_io_array[i].offset + - (OPAL_PTRDIFF_TYPE)fh->f_io_array[i].length) == + if (((OMPI_MPI_OFFSET_TYPE)fh->f_io_array[i].offset + + (OPAL_PTRDIFF_TYPE)fh->f_io_array[i].length) == (OMPI_MPI_OFFSET_TYPE)fh->f_io_array[i+1].offset) { if (!merge) { merge_offset = (OMPI_MPI_OFFSET_TYPE) @@ -79,40 +79,40 @@ ssize_t mca_fbtl_pvfs2_pwritev (mca_io_ompio_file_t *fh ) merge_buf = malloc (merge_length); k = 0; while (merge >= 0) { - memcpy (merge_buf + k, + memcpy (merge_buf + k, fh->f_io_array[i-merge].memory_address, fh->f_io_array[i-merge].length); k += fh->f_io_array[i-merge].length; merge --; } - - ret = PVFS_Request_contiguous (merge_length, - PVFS_BYTE, + + ret = PVFS_Request_contiguous (merge_length, + PVFS_BYTE, &mem_req); if (ret != 0) { perror("PVFS_Request_contiguous() error"); return OMPI_ERROR; } - ret = PVFS_Request_contiguous (merge_length, - PVFS_BYTE, + ret = PVFS_Request_contiguous (merge_length, + PVFS_BYTE, &file_req); if (ret != 0) { perror("PVFS_Request_contiguous() error"); return OMPI_ERROR; } - ret = PVFS_sys_write (pvfs2_fs->object_ref, + ret = PVFS_sys_write (pvfs2_fs->object_ref, file_req, merge_offset, - merge_buf, + merge_buf, mem_req, - &(pvfs2_fs->credentials), + &(pvfs2_fs->credentials), &resp_io); if (ret != 0) { perror("PVFS_sys_write() error"); return OMPI_ERROR; } total_bytes_written += (ssize_t)resp_io.total_completed; - + merge = 0; merge_offset = 0; merge_length = 0; @@ -122,27 +122,27 @@ ssize_t mca_fbtl_pvfs2_pwritev (mca_io_ompio_file_t *fh ) } } else { - ret = PVFS_Request_contiguous (fh->f_io_array[i].length, - PVFS_BYTE, + ret = PVFS_Request_contiguous (fh->f_io_array[i].length, + PVFS_BYTE, &mem_req); if (ret != 0) { perror("PVFS_Request_contiguous() error"); return OMPI_ERROR; } - ret = PVFS_Request_contiguous (fh->f_io_array[i].length, - PVFS_BYTE, + ret = PVFS_Request_contiguous (fh->f_io_array[i].length, + PVFS_BYTE, &file_req); if (ret != 0) { perror("PVFS_Request_contiguous() error"); return OMPI_ERROR; } - ret = PVFS_sys_write (pvfs2_fs->object_ref, + ret = PVFS_sys_write (pvfs2_fs->object_ref, file_req, (OMPI_MPI_OFFSET_TYPE) fh ->f_io_array[i].offset, - fh->f_io_array[i].memory_address, + fh->f_io_array[i].memory_address, mem_req, - &(pvfs2_fs->credentials), + &(pvfs2_fs->credentials), &resp_io); if (ret != 0) { perror("PVFS_sys_write() error"); @@ -152,6 +152,6 @@ ssize_t mca_fbtl_pvfs2_pwritev (mca_io_ompio_file_t *fh ) } } - + return total_bytes_written; } diff --git a/ompi/mca/fcoll/Makefile.am b/ompi/mca/fcoll/Makefile.am index 0add37ec473..4523295104f 100644 --- a/ompi/mca/fcoll/Makefile.am +++ b/ompi/mca/fcoll/Makefile.am @@ -5,16 +5,16 @@ # Copyright (c) 2004-2005 The University of Tennessee and The University # of Tennessee Research Foundation. All rights # reserved. -# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, # University of Stuttgart. All rights reserved. # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. # Copyright (c) 2008-2011 University of Houston. All rights reserved. # Copyright (c) 2010 Cisco Systems, Inc. All rights reserved. # $COPYRIGHT$ -# +# # Additional copyrights may follow -# +# # $HEADER$ # diff --git a/ompi/mca/fcoll/base/Makefile.am b/ompi/mca/fcoll/base/Makefile.am index b45878f6ee9..4a7f17d6e7f 100644 --- a/ompi/mca/fcoll/base/Makefile.am +++ b/ompi/mca/fcoll/base/Makefile.am @@ -5,15 +5,15 @@ # Copyright (c) 2004-2005 The University of Tennessee and The University # of Tennessee Research Foundation. All rights # reserved. -# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, # University of Stuttgart. All rights reserved. # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. # Copyright (c) 2008-2011 University of Houston. All rights reserved. # $COPYRIGHT$ -# +# # Additional copyrights may follow -# +# # $HEADER$ # diff --git a/ompi/mca/fcoll/base/base.h b/ompi/mca/fcoll/base/base.h index cbfc9e9a92f..9b10b8d2f97 100644 --- a/ompi/mca/fcoll/base/base.h +++ b/ompi/mca/fcoll/base/base.h @@ -5,20 +5,20 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2008 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2008-2011 University of Houston. All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ -/** +/** * @file * * MCA fcoll base framework public interface functions. @@ -37,7 +37,7 @@ BEGIN_C_DECLS OMPI_DECLSPEC int mca_fcoll_base_file_select(struct mca_io_ompio_file_t *file, mca_base_component_t *preferred); -OMPI_DECLSPEC int mca_fcoll_base_query_table (struct mca_io_ompio_file_t *file, +OMPI_DECLSPEC int mca_fcoll_base_query_table (struct mca_io_ompio_file_t *file, char *name); OMPI_DECLSPEC int mca_fcoll_base_file_unselect(struct mca_io_ompio_file_t *file); diff --git a/ompi/mca/fcoll/base/fcoll_base_file_select.c b/ompi/mca/fcoll/base/fcoll_base_file_select.c index 433098af365..0d8aa3ff009 100644 --- a/ompi/mca/fcoll/base/fcoll_base_file_select.c +++ b/ompi/mca/fcoll/base/fcoll_base_file_select.c @@ -5,15 +5,15 @@ * Copyright (c) 2004-2011 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2008-2011 University of Houston. All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -30,7 +30,7 @@ #include "ompi/mca/io/ompio/io_ompio.h" /* - * This structure is needed so that we can close the modules + * This structure is needed so that we can close the modules * which are not selected but were opened. mca_base_modules_close * which does this job for us requires a opal_list_t which contains * these modules @@ -58,20 +58,20 @@ static OBJ_CLASS_INSTANCE(queried_module_t, opal_list_item_t, NULL, NULL); * 4. Select the module with the highest priority * 5. Call the init function on the selected module so that it does the * right setup for the file - * 6. Call finalize on all the other modules which returned + * 6. Call finalize on all the other modules which returned * their module but were unfortunate to not get selected - */ + */ int mca_fcoll_base_file_select (struct mca_io_ompio_file_t *file, - mca_base_component_t *preferred) + mca_base_component_t *preferred) { - int priority; - int best_priority; - opal_list_item_t *item; + int priority; + int best_priority; + opal_list_item_t *item; mca_base_component_list_item_t *cli; - mca_fcoll_base_component_t *component; + mca_fcoll_base_component_t *component; mca_fcoll_base_component_t *best_component; - mca_fcoll_base_module_t *module; + mca_fcoll_base_module_t *module; opal_list_t queried; queried_module_t *om; int err = MPI_SUCCESS; @@ -79,19 +79,19 @@ int mca_fcoll_base_file_select (struct mca_io_ompio_file_t *file, /* Check and see if a preferred component was provided. If it was provided then it should be used (if possible) */ if (NULL != preferred) { - + /* We have a preferred component. Check if it is available and if so, whether it wants to run */ opal_output_verbose(10, ompi_fcoll_base_framework.framework_output, "fcoll:base:file_select: Checking preferred component: %s", preferred->mca_component_name); - - /* query the component for its priority and get its module + + /* query the component for its priority and get its module structure. This is necessary to proceed */ - + component = (mca_fcoll_base_component_t *)preferred; module = component->fcollm_file_query (file, &priority); - if (NULL != module && + if (NULL != module && NULL != module->fcoll_module_init) { /* this query seems to have returned something legitimate @@ -104,7 +104,7 @@ int mca_fcoll_base_file_select (struct mca_io_ompio_file_t *file, file->f_fcoll_component = preferred; return module->fcoll_module_init(file); - } + } /* His preferred component is present, but is unable to * run. This is not a good sign. We should try selecting * some other component We let it fall through and select @@ -121,7 +121,7 @@ int mca_fcoll_base_file_select (struct mca_io_ompio_file_t *file, * All we need to do is to go through the list of available * components and find the one which has the highest priority and * use that for this file - */ + */ best_component = NULL; best_priority = -1; @@ -143,21 +143,21 @@ int mca_fcoll_base_file_select (struct mca_io_ompio_file_t *file, } else { /* * call the query function and see what it returns - */ + */ module = component->fcollm_file_query (file, &priority); if (NULL == module || NULL == module->fcoll_module_init) { /* * query did not return any action which can be used - */ + */ opal_output_verbose(10, ompi_fcoll_base_framework.framework_output, "select: query returned failure"); } else { opal_output_verbose(10, ompi_fcoll_base_framework.framework_output, "select: query returned priority %d", priority); - /* + /* * is this the best component we have found till now? */ if (priority > best_priority) { @@ -174,8 +174,8 @@ int mca_fcoll_base_file_select (struct mca_io_ompio_file_t *file, return OMPI_ERR_OUT_OF_RESOURCE; } om->om_component = component; - om->om_module = module; - opal_list_append(&queried, (opal_list_item_t *)om); + om->om_module = module; + opal_list_append(&queried, (opal_list_item_t *)om); } /* end else of if (NULL == module) */ } /* end else of if (NULL == component->fcollm_init) */ } /* end for ... end of traversal */ @@ -201,7 +201,7 @@ int mca_fcoll_base_file_select (struct mca_io_ompio_file_t *file, * returned their priorities from the query. We now have to * unquery() those components which have not been selected and * init() the component which was selected - */ + */ while (NULL != (item = opal_list_remove_first(&queried))) { om = (queried_module_t *) item; if (om->om_component == best_component) { @@ -213,7 +213,7 @@ int mca_fcoll_base_file_select (struct mca_io_ompio_file_t *file, * defined. Whereever a function pointer is null in the * module structure we need to fill it in with the base * structure function pointers. This is yet to be done - */ + */ /* * We don return here coz we still need to go through and @@ -245,7 +245,7 @@ int mca_fcoll_base_file_select (struct mca_io_ompio_file_t *file, } /* if not best component */ OBJ_RELEASE(om); } /* traversing through the entire list */ - + opal_output_verbose(10, ompi_fcoll_base_framework.framework_output, "select: component %s selected", best_component->fcollm_version.mca_component_name); @@ -258,19 +258,22 @@ int mca_fcoll_base_file_select (struct mca_io_ompio_file_t *file, int mca_fcoll_base_query_table (struct mca_io_ompio_file_t *file, char *name) { if (!strcmp (name, "individual")) { - if ((int)file->f_cc_size >= file->f_bytes_per_agg && + if ((int)file->f_cc_size >= file->f_bytes_per_agg && file->f_cc_size >= file->f_stripe_size) { return 1; } + if ( 2 >= (int)file->f_size ){ + return 1; + } } if (!strcmp (name, "dynamic")) { - if ((int)file->f_cc_size < file->f_bytes_per_agg && + if ((int)file->f_cc_size < file->f_bytes_per_agg && file->f_cc_size >= file->f_stripe_size) { return 1; } } if (!strcmp (name, "two_phase")) { - if ((int)file->f_cc_size < file->f_bytes_per_agg && + if ((int)file->f_cc_size < file->f_bytes_per_agg && file->f_cc_size < file->f_stripe_size) { return 1; } diff --git a/ompi/mca/fcoll/base/fcoll_base_file_unselect.c b/ompi/mca/fcoll/base/fcoll_base_file_unselect.c index f461f770c47..b4b614ca8f8 100644 --- a/ompi/mca/fcoll/base/fcoll_base_file_unselect.c +++ b/ompi/mca/fcoll/base/fcoll_base_file_unselect.c @@ -5,15 +5,15 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2008-2011 University of Houston. All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ diff --git a/ompi/mca/fcoll/base/fcoll_base_find_available.c b/ompi/mca/fcoll/base/fcoll_base_find_available.c index 20736a354ee..47303eb5c12 100644 --- a/ompi/mca/fcoll/base/fcoll_base_find_available.c +++ b/ompi/mca/fcoll/base/fcoll_base_find_available.c @@ -5,15 +5,15 @@ * Copyright (c) 2004-2011 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2008-2011 University of Houston. All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -34,18 +34,18 @@ static int init_query(const mca_base_component_t *m, static int init_query_2_0_0(const mca_base_component_t *component, bool enable_progress_threads, bool enable_mpi_threads); - + int mca_fcoll_base_find_available(bool enable_progress_threads, bool enable_mpi_threads) { mca_base_component_list_item_t *cli, *next; - /* The list of components which we should check is already present - in mca_fcoll_base_components_opened, which was established in + /* The list of components which we should check is already present + in mca_fcoll_base_components_opened, which was established in mca_fcoll_base_open */ OPAL_LIST_FOREACH_SAFE(cli, next, &ompi_fcoll_base_framework.framework_components, mca_base_component_list_item_t) { - /* Now for this entry, we have to determine the thread level. Call + /* Now for this entry, we have to determine the thread level. Call a subroutine to do the job for us */ if (OMPI_SUCCESS != init_query(cli->cli_component, @@ -70,14 +70,14 @@ int mca_fcoll_base_find_available(bool enable_progress_threads, /* All done */ return OMPI_SUCCESS; } - - + + static int init_query(const mca_base_component_t *m, bool enable_progress_threads, - bool enable_mpi_threads) + bool enable_mpi_threads) { int ret; - + opal_output_verbose(10, ompi_fcoll_base_framework.framework_output, "fcoll:find_available: querying fcoll component %s", m->mca_component_name); @@ -116,11 +116,11 @@ static int init_query(const mca_base_component_t *m, static int init_query_2_0_0(const mca_base_component_t *component, bool enable_progress_threads, - bool enable_mpi_threads) + bool enable_mpi_threads) { - mca_fcoll_base_component_2_0_0_t *fcoll = + mca_fcoll_base_component_2_0_0_t *fcoll = (mca_fcoll_base_component_2_0_0_t *) component; - + return fcoll->fcollm_init_query(enable_progress_threads, enable_mpi_threads); } diff --git a/ompi/mca/fcoll/base/fcoll_base_frame.c b/ompi/mca/fcoll/base/fcoll_base_frame.c index 77bba485db3..d4520cfd89a 100644 --- a/ompi/mca/fcoll/base/fcoll_base_frame.c +++ b/ompi/mca/fcoll/base/fcoll_base_frame.c @@ -6,7 +6,7 @@ * Copyright (c) 2004-2011 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. @@ -14,9 +14,9 @@ * Copyright (c) 2015 Los Alamos National Security, LLC. All rights * reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ diff --git a/ompi/mca/fcoll/configure.m4 b/ompi/mca/fcoll/configure.m4 index 69dcb3ebcf8..30f5cbfc52b 100644 --- a/ompi/mca/fcoll/configure.m4 +++ b/ompi/mca/fcoll/configure.m4 @@ -1,22 +1,28 @@ # -*- shell-script -*- # -# Copyright (c) 2011 Cisco Systems, Inc. All rights reserved. +# Copyright (c) 2011 Cisco Systems, Inc. All rights reserved. +# Copyright (c) 2016 Research Organization for Information Science +# and Technology (RIST). All rights reserved. # # $COPYRIGHT$ -# +# # Additional copyrights may follow -# +# # $HEADER$ # # MCA_ompi_fcoll_CONFIG(project_name, framework_name) # ------------------------------------------- -AC_DEFUN([MCA_ompi_fcoll_CONFIG], +AC_DEFUN([MCA_ompi_fcoll_CONFIG], [ - # An AC-ARG-ENABLE for mpi-io was set in ompi/mca/io/configure.m4. - # If it's no, we shouldn't bother building anything in fcoll. - AS_IF([test "$enable_mpi_io" != "no"], - [want_mpi_io=1], - [want_mpi_io=0]) - MCA_CONFIGURE_FRAMEWORK([$1], [$2], [$want_mpi_io]) + OPAL_VAR_SCOPE_PUSH([want_io_ompio]) + + AS_IF([test "$enable_mpi_io" != "no" && + test "$enable_io_ompio" != "no"], + [want_io_ompio=1], + [want_io_ompio=0]) + + MCA_CONFIGURE_FRAMEWORK([$1], [$2], [$want_io_ompio]) + + OPAL_VAR_SCOPE_POP ]) diff --git a/ompi/mca/fcoll/dynamic/Makefile.am b/ompi/mca/fcoll/dynamic/Makefile.am index c983c24d840..e6d4cc02906 100644 --- a/ompi/mca/fcoll/dynamic/Makefile.am +++ b/ompi/mca/fcoll/dynamic/Makefile.am @@ -5,16 +5,16 @@ # Copyright (c) 2004-2005 The University of Tennessee and The University # of Tennessee Research Foundation. All rights # reserved. -# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, # University of Stuttgart. All rights reserved. # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. # Copyright (c) 2008-2015 University of Houston. All rights reserved. # Copyright (c) 2012 Cisco Systems, Inc. All rights reserved. # $COPYRIGHT$ -# +# # Additional copyrights may follow -# +# # $HEADER$ # @@ -23,7 +23,7 @@ sources = \ fcoll_dynamic_module.c \ fcoll_dynamic_component.c \ fcoll_dynamic_file_read_all.c \ - fcoll_dynamic_file_write_all.c + fcoll_dynamic_file_write_all.c # Make the output library in this directory, and name it either # mca__.la (for DSO builds) or libmca__.la diff --git a/ompi/mca/fcoll/dynamic/fcoll_dynamic.h b/ompi/mca/fcoll/dynamic/fcoll_dynamic.h index 62c70606bd8..4ca2cab8233 100644 --- a/ompi/mca/fcoll/dynamic/fcoll_dynamic.h +++ b/ompi/mca/fcoll/dynamic/fcoll_dynamic.h @@ -5,15 +5,17 @@ * Copyright (c) 2004-2006 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2008-2014 University of Houston. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -48,17 +50,17 @@ int mca_fcoll_dynamic_component_file_unquery (mca_io_ompio_file_t *file); int mca_fcoll_dynamic_module_init (mca_io_ompio_file_t *file); int mca_fcoll_dynamic_module_finalize (mca_io_ompio_file_t *file); -int mca_fcoll_dynamic_file_read_all (mca_io_ompio_file_t *fh, - void *buf, +int mca_fcoll_dynamic_file_read_all (mca_io_ompio_file_t *fh, + void *buf, int count, - struct ompi_datatype_t *datatype, + struct ompi_datatype_t *datatype, ompi_status_public_t * status); -int mca_fcoll_dynamic_file_write_all (mca_io_ompio_file_t *fh, - void *buf, +int mca_fcoll_dynamic_file_write_all (mca_io_ompio_file_t *fh, + const void *buf, int count, - struct ompi_datatype_t *datatype, + struct ompi_datatype_t *datatype, ompi_status_public_t * status); diff --git a/ompi/mca/fcoll/dynamic/fcoll_dynamic_component.c b/ompi/mca/fcoll/dynamic/fcoll_dynamic_component.c index 6fd25d175f5..e4aaa46a81a 100644 --- a/ompi/mca/fcoll/dynamic/fcoll_dynamic_component.c +++ b/ompi/mca/fcoll/dynamic/fcoll_dynamic_component.c @@ -6,7 +6,7 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. @@ -15,9 +15,9 @@ * Copyright (c) 2015 Los Alamos National Security, LLC. All rights * reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ * * These symbols are in a file by themselves to provide nice linker diff --git a/ompi/mca/fcoll/dynamic/fcoll_dynamic_file_read_all.c b/ompi/mca/fcoll/dynamic/fcoll_dynamic_file_read_all.c index e4cabc64a92..6d2133b1aa6 100644 --- a/ompi/mca/fcoll/dynamic/fcoll_dynamic_file_read_all.c +++ b/ompi/mca/fcoll/dynamic/fcoll_dynamic_file_read_all.c @@ -9,7 +9,7 @@ * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. - * Copyright (c) 2008-2014 University of Houston. All rights reserved. + * Copyright (c) 2008-2015 University of Houston. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -17,1003 +17,1057 @@ * $HEADER$ */ - #include "ompi_config.h" - #include "fcoll_dynamic.h" - - #include "mpi.h" - #include "ompi/constants.h" - #include "ompi/mca/fcoll/fcoll.h" - #include "ompi/mca/io/ompio/io_ompio.h" - #include "ompi/mca/io/io.h" - #include "math.h" - #include "ompi/mca/pml/pml.h" - #include - - #define TIME_BREAKDOWN 1 - #define DEBUG_ON 0 - - /*Used for loading file-offsets per aggregator*/ - typedef struct local_io_array{ - OMPI_MPI_OFFSET_TYPE offset; - MPI_Aint length; - int process_id; - }local_io_array; - - - static int read_heap_sort (local_io_array *io_array, - int num_entries, - int *sorted); - - - - int - mca_fcoll_dynamic_file_read_all (mca_io_ompio_file_t *fh, - void *buf, - int count, - struct ompi_datatype_t *datatype, - ompi_status_public_t *status) - { - MPI_Aint position = 0; - MPI_Aint total_bytes = 0; /* total bytes to be read */ - MPI_Aint bytes_to_read_in_cycle = 0; /* left to be read in a cycle*/ - MPI_Aint bytes_per_cycle = 0; /* total read in each cycle by each process*/ - int index = 0, ret=OMPI_SUCCESS; - int cycles = 0; - int i=0, j=0, l=0; - int n=0; /* current position in total_bytes_per_process array */ - MPI_Aint bytes_remaining = 0; /* how many bytes have been read from the current - value from total_bytes_per_process */ - int *sorted_file_offsets=NULL, entries_per_aggregator=0; - int bytes_received = 0; - int blocks = 0; - /* iovec structure and count of the buffer passed in */ - uint32_t iov_count = 0; - struct iovec *decoded_iov = NULL; - int iov_index = 0; - size_t current_position = 0; - struct iovec *local_iov_array=NULL, *global_iov_array=NULL; - char *receive_buf = NULL; - MPI_Aint *memory_displacements=NULL; - /* global iovec at the readers that contain the iovecs created from - file_set_view */ - uint32_t total_fview_count = 0; - int local_count = 0; - int *fview_count = NULL, *disp_index=NULL, *temp_disp_index=NULL; - int current_index=0, temp_index=0; - int **blocklen_per_process=NULL; - MPI_Aint **displs_per_process=NULL; - char *global_buf = NULL; - MPI_Aint global_count = 0; - local_io_array *file_offsets_for_agg=NULL; - - /* array that contains the sorted indices of the global_iov */ - int *sorted = NULL; - int *displs = NULL; - int dynamic_num_io_procs; - size_t max_data = 0; - int *bytes_per_process = NULL; - MPI_Aint *total_bytes_per_process = NULL; - ompi_datatype_t **sendtype = NULL; - MPI_Request *send_req=NULL, *recv_req=NULL; - - - #if TIME_BREAKDOWN - double read_time = 0.0, start_read_time = 0.0, end_read_time = 0.0; - double rcomm_time = 0.0, start_rcomm_time = 0.0, end_rcomm_time = 0.0; - double read_exch = 0.0, start_rexch = 0.0, end_rexch = 0.0; - print_entry nentry; - #endif - - -// if (opal_datatype_is_contiguous_memory_layout(&datatype->super,1)) { -// fh->f_flags |= OMPIO_CONTIGUOUS_MEMORY; -// } - /************************************************************************** - ** In case the data is not contigous in memory, decode it into an iovec ** - **************************************************************************/ - if (! (fh->f_flags & OMPIO_CONTIGUOUS_MEMORY)) { - ret = fh->f_decode_datatype ((struct mca_io_ompio_file_t *)fh, - datatype, - count, - buf, - &max_data, - &decoded_iov, - &iov_count); - if (OMPI_SUCCESS != ret){ - goto exit; - } - } - else { - max_data = count * datatype->super.size; - } - - if ( MPI_STATUS_IGNORE != status ) { - status->_ucount = max_data; - } - - fh->f_get_num_aggregators ( &dynamic_num_io_procs); - ret = fh->f_set_aggregator_props ((struct mca_io_ompio_file_t *) fh, - dynamic_num_io_procs, - max_data); - if (OMPI_SUCCESS != ret){ - goto exit; - } - - total_bytes_per_process = (MPI_Aint*)malloc - (fh->f_procs_per_group*sizeof(MPI_Aint)); - if (NULL == total_bytes_per_process) { - opal_output (1, "OUT OF MEMORY\n"); - ret = OMPI_ERR_OUT_OF_RESOURCE; - goto exit; - } - - ret = fh->f_allgather_array (&max_data, - 1, - MPI_LONG, - total_bytes_per_process, - 1, - MPI_LONG, - fh->f_aggregator_index, - fh->f_procs_in_group, - fh->f_procs_per_group, - fh->f_comm); - if (OMPI_SUCCESS != ret){ - goto exit; - } - - for (i=0 ; if_procs_per_group ; i++) { - total_bytes += total_bytes_per_process[i]; - } - - if (NULL != total_bytes_per_process) { - free (total_bytes_per_process); - total_bytes_per_process = NULL; - } - - /********************************************************************* - *** Generate the File offsets/lengths corresponding to this write *** - ********************************************************************/ - ret = fh->f_generate_current_file_view ((struct mca_io_ompio_file_t *) fh, - max_data, - &local_iov_array, - &local_count); - - if (ret != OMPI_SUCCESS){ - goto exit; - } - - - - /* #########################################################*/ - - /************************************************************* - *** ALLGather the File View information at all processes *** - *************************************************************/ - - fview_count = (int *) malloc (fh->f_procs_per_group * sizeof (int)); - if (NULL == fview_count) { - opal_output (1, "OUT OF MEMORY\n"); - ret = OMPI_ERR_OUT_OF_RESOURCE; - goto exit; - } - - ret = fh->f_allgather_array (&local_count, - 1, - MPI_INT, - fview_count, - 1, - MPI_INT, - fh->f_aggregator_index, - fh->f_procs_in_group, - fh->f_procs_per_group, - fh->f_comm); - - if (OMPI_SUCCESS != ret){ - goto exit; - } - - displs = (int*)malloc (fh->f_procs_per_group*sizeof(int)); - if (NULL == displs) { - opal_output (1, "OUT OF MEMORY\n"); - ret = OMPI_ERR_OUT_OF_RESOURCE; - goto exit; - } - - displs[0] = 0; - total_fview_count = fview_count[0]; - for (i=1 ; if_procs_per_group ; i++) { - total_fview_count += fview_count[i]; - displs[i] = displs[i-1] + fview_count[i-1]; - } - - #if DEBUG_ON - if (fh->f_procs_in_group[fh->f_aggregator_index] == fh->f_rank) { - for (i=0 ; if_procs_per_group ; i++) { - printf ("%d: PROCESS: %d ELEMENTS: %d DISPLS: %d\n", - fh->f_rank, - i, - fview_count[i], - displs[i]); - } - } - #endif - - /* allocate the global iovec */ - if (0 != total_fview_count) { - global_iov_array = (struct iovec*)malloc (total_fview_count * - sizeof(struct iovec)); - if (NULL == global_iov_array) { - opal_output (1, "OUT OF MEMORY\n"); - ret = OMPI_ERR_OUT_OF_RESOURCE; - goto exit; - } - } - - ret = fh->f_allgatherv_array (local_iov_array, - local_count, - fh->f_iov_type, - global_iov_array, - fview_count, - displs, - fh->f_iov_type, - fh->f_aggregator_index, - fh->f_procs_in_group, - fh->f_procs_per_group, - fh->f_comm); - - if (OMPI_SUCCESS != ret){ - goto exit; - } - - /* sort it */ - if (0 != total_fview_count) { - sorted = (int *)malloc (total_fview_count * sizeof(int)); - if (NULL == sorted) { - opal_output (1, "OUT OF MEMORY\n"); - ret = OMPI_ERR_OUT_OF_RESOURCE; - goto exit; - } - fh->f_sort_iovec (global_iov_array, total_fview_count, sorted); - } - - if (NULL != local_iov_array) { - free (local_iov_array); - local_iov_array = NULL; - } - - #if DEBUG_ON - if (fh->f_procs_in_group[fh->f_aggregator_index] == fh->f_rank) { - for (i=0 ; if_rank, - global_iov_array[sorted[i]].iov_base, - global_iov_array[sorted[i]].iov_len); - } - } - #endif - - if (fh->f_procs_in_group[fh->f_aggregator_index] == fh->f_rank) { - - disp_index = (int *)malloc (fh->f_procs_per_group * sizeof (int)); - if (NULL == disp_index) { - opal_output (1, "OUT OF MEMORY\n"); - ret = OMPI_ERR_OUT_OF_RESOURCE; - goto exit; - } - - blocklen_per_process = (int **)malloc (fh->f_procs_per_group * sizeof (int*)); - if (NULL == blocklen_per_process) { - opal_output (1, "OUT OF MEMORY\n"); - ret = OMPI_ERR_OUT_OF_RESOURCE; - goto exit; - } - - displs_per_process = (MPI_Aint **)malloc (fh->f_procs_per_group * sizeof (MPI_Aint*)); - if (NULL == displs_per_process){ - opal_output (1, "OUT OF MEMORY\n"); - ret = OMPI_ERR_OUT_OF_RESOURCE; - goto exit; - } - - for (i=0;if_procs_per_group;i++){ - blocklen_per_process[i] = NULL; - displs_per_process[i] = NULL; - } - } - - - /* - * Calculate how many bytes are read in each cycle - */ - fh->f_get_bytes_per_agg ( (int *) &bytes_per_cycle); - cycles = ceil((double)total_bytes/bytes_per_cycle); - - n = 0; - bytes_remaining = 0; - current_index = 0; - - - #if TIME_BREAKDOWN - start_rexch = MPI_Wtime(); - #endif - for (index = 0; index < cycles; index++) { - /* Getting ready for next cycle - Initializing and freeing buffers */ - if (fh->f_procs_in_group[fh->f_aggregator_index] == fh->f_rank) { - if (NULL == sendtype){ - sendtype = (ompi_datatype_t **) - malloc (fh->f_procs_per_group * sizeof(ompi_datatype_t *)); - if (NULL == sendtype) { - opal_output (1, "OUT OF MEMORY\n"); - ret = OMPI_ERR_OUT_OF_RESOURCE; - goto exit; - } - } - - for(l=0;lf_procs_per_group;l++){ - - disp_index[l] = 1; - - if (NULL != blocklen_per_process[l]){ - free(blocklen_per_process[l]); - blocklen_per_process[l] = NULL; - } - if (NULL != displs_per_process[l]){ - free(displs_per_process[l]); - displs_per_process[l] = NULL; - } - blocklen_per_process[l] = (int *) calloc (1, sizeof(int)); - if (NULL == blocklen_per_process[l]) { - opal_output (1, "OUT OF MEMORY for blocklen\n"); - ret = OMPI_ERR_OUT_OF_RESOURCE; - goto exit; - } - displs_per_process[l] = (MPI_Aint *) calloc (1, sizeof(MPI_Aint)); - if (NULL == displs_per_process[l]){ - opal_output (1, "OUT OF MEMORY for displs\n"); - ret = OMPI_ERR_OUT_OF_RESOURCE; - goto exit; - } - } - - if (NULL != sorted_file_offsets){ - free(sorted_file_offsets); - sorted_file_offsets = NULL; - } - - if(NULL != file_offsets_for_agg){ - free(file_offsets_for_agg); - file_offsets_for_agg = NULL; - } - if (NULL != memory_displacements){ - free(memory_displacements); - memory_displacements = NULL; - } - } - - - if (cycles-1 == index) { - bytes_to_read_in_cycle = total_bytes - bytes_per_cycle*index; - } - else { - bytes_to_read_in_cycle = bytes_per_cycle; - } - - #if DEBUG_ON - if (fh->f_procs_in_group[fh->f_aggregator_index] == fh->f_rank) { - printf ("****%d: CYCLE %d Bytes %d**********\n", - fh->f_rank, - index, - bytes_to_write_in_cycle); - } - #endif - - /* Calculate how much data will be contributed in this cycle - by each process*/ - bytes_received = 0; - - while (bytes_to_read_in_cycle) { - blocks = fview_count[0]; - for (j=0 ; jf_procs_per_group ; j++) { - if (sorted[current_index] < blocks) { - n = j; - break; - } - else { - blocks += fview_count[j+1]; - } - } - if (bytes_remaining) { - if (bytes_remaining <= bytes_to_read_in_cycle) { - - if (fh->f_procs_in_group[fh->f_aggregator_index] == fh->f_rank) { - blocklen_per_process[n][disp_index[n] - 1] = bytes_remaining; - displs_per_process[n][disp_index[n] - 1] = - (OPAL_PTRDIFF_TYPE)global_iov_array[sorted[current_index]].iov_base + - (global_iov_array[sorted[current_index]].iov_len - bytes_remaining); - } - if (fh->f_procs_in_group[n] == fh->f_rank) { - bytes_received += bytes_remaining; - } - current_index ++; - bytes_to_read_in_cycle -= bytes_remaining; - bytes_remaining = 0; - if (fh->f_procs_in_group[fh->f_aggregator_index] == - fh->f_rank) { - blocklen_per_process[n] = (int *) realloc - ((void *)blocklen_per_process[n], (disp_index[n]+1)*sizeof(int)); - displs_per_process[n] = (MPI_Aint *) realloc - ((void *)displs_per_process[n], (disp_index[n]+1)*sizeof(MPI_Aint)); - blocklen_per_process[n][disp_index[n]] = 0; - displs_per_process[n][disp_index[n]] = 0; - disp_index[n] += 1; - } - continue; - } - else { - if (fh->f_procs_in_group[fh->f_aggregator_index] == fh->f_rank) { - blocklen_per_process[n][disp_index[n] - 1] = bytes_to_read_in_cycle; - displs_per_process[n][disp_index[n] - 1] = - (OPAL_PTRDIFF_TYPE)global_iov_array[sorted[current_index]].iov_base + - (global_iov_array[sorted[current_index]].iov_len - - bytes_remaining); - } - if (fh->f_procs_in_group[n] == fh->f_rank) { - bytes_received += bytes_to_read_in_cycle; - } - bytes_remaining -= bytes_to_read_in_cycle; - bytes_to_read_in_cycle = 0; - break; - } - } - else { - if (bytes_to_read_in_cycle < - (MPI_Aint) global_iov_array[sorted[current_index]].iov_len) { - if (fh->f_procs_in_group[fh->f_aggregator_index] == - fh->f_rank) { - - blocklen_per_process[n][disp_index[n] - 1] = bytes_to_read_in_cycle; - displs_per_process[n][disp_index[n] - 1] = - (OPAL_PTRDIFF_TYPE)global_iov_array[sorted[current_index]].iov_base ; - } - - if (fh->f_procs_in_group[n] == fh->f_rank) { - bytes_received += bytes_to_read_in_cycle; - } - bytes_remaining = global_iov_array[sorted[current_index]].iov_len - - bytes_to_read_in_cycle; - bytes_to_read_in_cycle = 0; - break; - } - else { - if (fh->f_procs_in_group[fh->f_aggregator_index] == - fh->f_rank) { - blocklen_per_process[n][disp_index[n] - 1] = - global_iov_array[sorted[current_index]].iov_len; - displs_per_process[n][disp_index[n] - 1] = (OPAL_PTRDIFF_TYPE) - global_iov_array[sorted[current_index]].iov_base; - blocklen_per_process[n] = - (int *) realloc ((void *)blocklen_per_process[n], (disp_index[n]+1)*sizeof(int)); - displs_per_process[n] = (MPI_Aint *)realloc - ((void *)displs_per_process[n], (disp_index[n]+1)*sizeof(MPI_Aint)); - blocklen_per_process[n][disp_index[n]] = 0; - displs_per_process[n][disp_index[n]] = 0; - disp_index[n] += 1; - } - if (fh->f_procs_in_group[n] == fh->f_rank) { - bytes_received += - global_iov_array[sorted[current_index]].iov_len; - } - bytes_to_read_in_cycle -= - global_iov_array[sorted[current_index]].iov_len; - current_index ++; - continue; - } - } - } - /* Calculate the displacement on where to put the data and allocate - the recieve buffer (global_buf) */ - if (fh->f_procs_in_group[fh->f_aggregator_index] == fh->f_rank) { - entries_per_aggregator=0; - for (i=0;if_procs_per_group; i++){ - for (j=0;j 0) - entries_per_aggregator++ ; - } - } - if (entries_per_aggregator > 0){ - file_offsets_for_agg = (local_io_array *) - malloc(entries_per_aggregator*sizeof(local_io_array)); - if (NULL == file_offsets_for_agg) { - opal_output (1, "OUT OF MEMORY\n"); - ret = OMPI_ERR_OUT_OF_RESOURCE; - goto exit; - } - sorted_file_offsets = (int *) - malloc (entries_per_aggregator*sizeof(int)); - if (NULL == sorted_file_offsets){ - opal_output (1, "OUT OF MEMORY\n"); - ret = OMPI_ERR_OUT_OF_RESOURCE; - goto exit; - } - /*Moving file offsets to an IO array!*/ - temp_index = 0; - global_count = 0; - for (i=0;if_procs_per_group; i++){ - for(j=0;j 0){ - file_offsets_for_agg[temp_index].length = - blocklen_per_process[i][j]; - global_count += blocklen_per_process[i][j]; - file_offsets_for_agg[temp_index].process_id = i; - file_offsets_for_agg[temp_index].offset = - displs_per_process[i][j]; - temp_index++; - } - } - } - } - else{ - continue; - } - - read_heap_sort (file_offsets_for_agg, - entries_per_aggregator, - sorted_file_offsets); - - memory_displacements = (MPI_Aint *) malloc - (entries_per_aggregator * sizeof(MPI_Aint)); - memory_displacements[sorted_file_offsets[0]] = 0; - for (i=1; if_io_array = (mca_io_ompio_io_array_t *) malloc - (entries_per_aggregator * sizeof (mca_io_ompio_io_array_t)); - if (NULL == fh->f_io_array) { +#include "ompi_config.h" +#include "fcoll_dynamic.h" + +#include "mpi.h" +#include "ompi/constants.h" +#include "ompi/mca/fcoll/fcoll.h" +#include "ompi/mca/io/ompio/io_ompio.h" +#include "ompi/mca/io/io.h" +#include "math.h" +#include "ompi/mca/pml/pml.h" +#include + +#define DEBUG_ON 0 + +/*Used for loading file-offsets per aggregator*/ +typedef struct mca_io_ompio_local_io_array{ + OMPI_MPI_OFFSET_TYPE offset; + MPI_Aint length; + int process_id; +}mca_io_ompio_local_io_array; + + +static int read_heap_sort (mca_io_ompio_local_io_array *io_array, + int num_entries, + int *sorted); + + + +int +mca_fcoll_dynamic_file_read_all (mca_io_ompio_file_t *fh, + void *buf, + int count, + struct ompi_datatype_t *datatype, + ompi_status_public_t *status) +{ + MPI_Aint position = 0; + MPI_Aint total_bytes = 0; /* total bytes to be read */ + MPI_Aint bytes_to_read_in_cycle = 0; /* left to be read in a cycle*/ + MPI_Aint bytes_per_cycle = 0; /* total read in each cycle by each process*/ + int index = 0, ret=OMPI_SUCCESS; + int cycles = 0; + int i=0, j=0, l=0; + int n=0; /* current position in total_bytes_per_process array */ + MPI_Aint bytes_remaining = 0; /* how many bytes have been read from the current + value from total_bytes_per_process */ + int *sorted_file_offsets=NULL, entries_per_aggregator=0; + int bytes_received = 0; + int blocks = 0; + /* iovec structure and count of the buffer passed in */ + uint32_t iov_count = 0; + struct iovec *decoded_iov = NULL; + int iov_index = 0; + size_t current_position = 0; + struct iovec *local_iov_array=NULL, *global_iov_array=NULL; + char *receive_buf = NULL; + MPI_Aint *memory_displacements=NULL; + /* global iovec at the readers that contain the iovecs created from + file_set_view */ + uint32_t total_fview_count = 0; + int local_count = 0; + int *fview_count = NULL, *disp_index=NULL, *temp_disp_index=NULL; + int current_index=0, temp_index=0; + int **blocklen_per_process=NULL; + MPI_Aint **displs_per_process=NULL; + char *global_buf = NULL; + MPI_Aint global_count = 0; + mca_io_ompio_local_io_array *file_offsets_for_agg=NULL; + + /* array that contains the sorted indices of the global_iov */ + int *sorted = NULL; + int *displs = NULL; + int dynamic_num_io_procs; + size_t max_data = 0; + MPI_Aint *total_bytes_per_process = NULL; + ompi_datatype_t **sendtype = NULL; + MPI_Request *send_req=NULL, recv_req=NULL; + int my_aggregator =-1; + bool recvbuf_is_contiguous=false; + size_t ftype_size; + OPAL_PTRDIFF_TYPE ftype_extent, lb; + + +#if OMPIO_FCOLL_WANT_TIME_BREAKDOWN + double read_time = 0.0, start_read_time = 0.0, end_read_time = 0.0; + double rcomm_time = 0.0, start_rcomm_time = 0.0, end_rcomm_time = 0.0; + double read_exch = 0.0, start_rexch = 0.0, end_rexch = 0.0; + mca_io_ompio_print_entry nentry; +#endif + + /************************************************************************** + ** 1. In case the data is not contigous in memory, decode it into an iovec + **************************************************************************/ + + opal_datatype_type_size ( &datatype->super, &ftype_size ); + opal_datatype_get_extent ( &datatype->super, &lb, &ftype_extent ); + + if ( (ftype_extent == (OPAL_PTRDIFF_TYPE) ftype_size) && + opal_datatype_is_contiguous_memory_layout(&datatype->super,1) && + 0 == lb ) { + recvbuf_is_contiguous = true; + } + + + if (! recvbuf_is_contiguous ) { + ret = fh->f_decode_datatype ((struct mca_io_ompio_file_t *)fh, + datatype, + count, + buf, + &max_data, + &decoded_iov, + &iov_count); + if (OMPI_SUCCESS != ret){ + goto exit; + } + } + else { + max_data = count * datatype->super.size; + } + + if ( MPI_STATUS_IGNORE != status ) { + status->_ucount = max_data; + } + + fh->f_get_num_aggregators ( &dynamic_num_io_procs); + ret = fh->f_set_aggregator_props ((struct mca_io_ompio_file_t *) fh, + dynamic_num_io_procs, + max_data); + if (OMPI_SUCCESS != ret){ + goto exit; + } + my_aggregator = fh->f_procs_in_group[fh->f_aggregator_index]; + + /************************************************************************** + ** 2. Determine the total amount of data to be written + **************************************************************************/ + total_bytes_per_process = (MPI_Aint*)malloc(fh->f_procs_per_group*sizeof(MPI_Aint)); + if (NULL == total_bytes_per_process) { + opal_output (1, "OUT OF MEMORY\n"); + ret = OMPI_ERR_OUT_OF_RESOURCE; + goto exit; + } +#if OMPIO_FCOLL_WANT_TIME_BREAKDOWN + start_rcomm_time = MPI_Wtime(); +#endif + ret = fh->f_allgather_array (&max_data, + 1, + MPI_LONG, + total_bytes_per_process, + 1, + MPI_LONG, + fh->f_aggregator_index, + fh->f_procs_in_group, + fh->f_procs_per_group, + fh->f_comm); + if (OMPI_SUCCESS != ret){ + goto exit; + } +#if OMPIO_FCOLL_WANT_TIME_BREAKDOWN + end_rcomm_time = MPI_Wtime(); + rcomm_time += end_rcomm_time - start_rcomm_time; +#endif + + for (i=0 ; if_procs_per_group ; i++) { + total_bytes += total_bytes_per_process[i]; + } + + if (NULL != total_bytes_per_process) { + free (total_bytes_per_process); + total_bytes_per_process = NULL; + } + + /********************************************************************* + *** 3. Generate the File offsets/lengths corresponding to this write + ********************************************************************/ + ret = fh->f_generate_current_file_view ((struct mca_io_ompio_file_t *) fh, + max_data, + &local_iov_array, + &local_count); + + if (ret != OMPI_SUCCESS){ + goto exit; + } + + /************************************************************* + *** 4. Allgather the File View information at all processes + *************************************************************/ + + fview_count = (int *) malloc (fh->f_procs_per_group * sizeof (int)); + if (NULL == fview_count) { + opal_output (1, "OUT OF MEMORY\n"); + ret = OMPI_ERR_OUT_OF_RESOURCE; + goto exit; + } +#if OMPIO_FCOLL_WANT_TIME_BREAKDOWN + start_rcomm_time = MPI_Wtime(); +#endif + ret = fh->f_allgather_array (&local_count, + 1, + MPI_INT, + fview_count, + 1, + MPI_INT, + fh->f_aggregator_index, + fh->f_procs_in_group, + fh->f_procs_per_group, + fh->f_comm); + + if (OMPI_SUCCESS != ret){ + goto exit; + } +#if OMPIO_FCOLL_WANT_TIME_BREAKDOWN + end_rcomm_time = MPI_Wtime(); + rcomm_time += end_rcomm_time - start_rcomm_time; +#endif + + displs = (int*)malloc (fh->f_procs_per_group*sizeof(int)); + if (NULL == displs) { + opal_output (1, "OUT OF MEMORY\n"); + ret = OMPI_ERR_OUT_OF_RESOURCE; + goto exit; + } + + displs[0] = 0; + total_fview_count = fview_count[0]; + for (i=1 ; if_procs_per_group ; i++) { + total_fview_count += fview_count[i]; + displs[i] = displs[i-1] + fview_count[i-1]; + } + +#if DEBUG_ON + if (my_aggregator == fh->f_rank) { + for (i=0 ; if_procs_per_group ; i++) { + printf ("%d: PROCESS: %d ELEMENTS: %d DISPLS: %d\n", + fh->f_rank, + i, + fview_count[i], + displs[i]); +} +} +#endif + + /* allocate the global iovec */ + if (0 != total_fview_count) { + global_iov_array = (struct iovec*)malloc (total_fview_count * + sizeof(struct iovec)); + if (NULL == global_iov_array) { + opal_output (1, "OUT OF MEMORY\n"); + ret = OMPI_ERR_OUT_OF_RESOURCE; + goto exit; + } + } +#if OMPIO_FCOLL_WANT_TIME_BREAKDOWN + start_rcomm_time = MPI_Wtime(); +#endif + ret = fh->f_allgatherv_array (local_iov_array, + local_count, + fh->f_iov_type, + global_iov_array, + fview_count, + displs, + fh->f_iov_type, + fh->f_aggregator_index, + fh->f_procs_in_group, + fh->f_procs_per_group, + fh->f_comm); + + if (OMPI_SUCCESS != ret){ + goto exit; + } +#if OMPIO_FCOLL_WANT_TIME_BREAKDOWN + end_rcomm_time = MPI_Wtime(); + rcomm_time += end_rcomm_time - start_rcomm_time; +#endif + + /**************************************************************************************** + *** 5. Sort the global offset/lengths list based on the offsets. + *** The result of the sort operation is the 'sorted', an integer array, + *** which contains the indexes of the global_iov_array based on the offset. + *** For example, if global_iov_array[x].offset is followed by global_iov_array[y].offset + *** in the file, and that one is followed by global_iov_array[z].offset, than + *** sorted[0] = x, sorted[1]=y and sorted[2]=z; + ******************************************************************************************/ + if (0 != total_fview_count) { + sorted = (int *)malloc (total_fview_count * sizeof(int)); + if (NULL == sorted) { + opal_output (1, "OUT OF MEMORY\n"); + ret = OMPI_ERR_OUT_OF_RESOURCE; + goto exit; + } + fh->f_sort_iovec (global_iov_array, total_fview_count, sorted); + } + + if (NULL != local_iov_array) { + free (local_iov_array); + local_iov_array = NULL; + } + +#if DEBUG_ON + if (my_aggregator == fh->f_rank) { + for (i=0 ; if_rank, + global_iov_array[sorted[i]].iov_base, + global_iov_array[sorted[i]].iov_len); + } + } +#endif + + /************************************************************* + *** 6. Determine the number of cycles required to execute this + *** operation + *************************************************************/ + fh->f_get_bytes_per_agg ( (int *) &bytes_per_cycle); + cycles = ceil((double)total_bytes/bytes_per_cycle); + + if ( my_aggregator == fh->f_rank) { + disp_index = (int *)malloc (fh->f_procs_per_group * sizeof (int)); + if (NULL == disp_index) { + opal_output (1, "OUT OF MEMORY\n"); + ret = OMPI_ERR_OUT_OF_RESOURCE; + goto exit; + } + + blocklen_per_process = (int **)malloc (fh->f_procs_per_group * sizeof (int*)); + if (NULL == blocklen_per_process) { + opal_output (1, "OUT OF MEMORY\n"); + ret = OMPI_ERR_OUT_OF_RESOURCE; + goto exit; + } + + displs_per_process = (MPI_Aint **)malloc (fh->f_procs_per_group * sizeof (MPI_Aint*)); + if (NULL == displs_per_process){ + opal_output (1, "OUT OF MEMORY\n"); + ret = OMPI_ERR_OUT_OF_RESOURCE; + goto exit; + } + + for (i=0;if_procs_per_group;i++){ + blocklen_per_process[i] = NULL; + displs_per_process[i] = NULL; + } + + send_req = (MPI_Request *) malloc (fh->f_procs_per_group * sizeof(MPI_Request)); + if (NULL == send_req){ + opal_output ( 1, "OUT OF MEMORY\n"); + ret = OMPI_ERR_OUT_OF_RESOURCE; + goto exit; + } + + global_buf = (char *) malloc (bytes_per_cycle); + if (NULL == global_buf){ opal_output(1, "OUT OF MEMORY\n"); ret = OMPI_ERR_OUT_OF_RESOURCE; goto exit; - } - - fh->f_num_of_io_entries = 0; - fh->f_io_array[fh->f_num_of_io_entries].offset = - (IOVBASE_TYPE *)(intptr_t)file_offsets_for_agg[sorted_file_offsets[0]].offset; - fh->f_io_array[fh->f_num_of_io_entries].length = - file_offsets_for_agg[sorted_file_offsets[0]].length; - fh->f_io_array[fh->f_num_of_io_entries].memory_address = - global_buf+memory_displacements[sorted_file_offsets[0]]; - fh->f_num_of_io_entries++; - for (i=1;if_io_array[fh->f_num_of_io_entries - 1].length += - file_offsets_for_agg[sorted_file_offsets[i]].length; - } - else{ - fh->f_io_array[fh->f_num_of_io_entries].offset = - (IOVBASE_TYPE *)(intptr_t)file_offsets_for_agg[sorted_file_offsets[i]].offset; - fh->f_io_array[fh->f_num_of_io_entries].length = - file_offsets_for_agg[sorted_file_offsets[i]].length; - fh->f_io_array[fh->f_num_of_io_entries].memory_address = - global_buf+memory_displacements[sorted_file_offsets[i]]; - fh->f_num_of_io_entries++; - } - } - - - #if TIME_BREAKDOWN - start_read_time = MPI_Wtime(); - #endif - - if (fh->f_num_of_io_entries) { - if ( 0 > fh->f_fbtl->fbtl_preadv (fh)) { - opal_output (1, "READ FAILED\n"); - ret = OMPI_ERROR; - goto exit; - } - } - - #if TIME_BREAKDOWN - end_read_time = MPI_Wtime(); - read_time += end_read_time - start_read_time; - #endif - /********************************************************** - ******************** DONE READING ************************ - *********************************************************/ - - temp_disp_index = (int *)calloc (1, fh->f_procs_per_group * sizeof (int)); - if (NULL == temp_disp_index) { - opal_output (1, "OUT OF MEMORY\n"); - ret = OMPI_ERR_OUT_OF_RESOURCE; - goto exit; - } - for (i=0; if_procs_per_group * sizeof(MPI_Request)); - if (NULL == send_req){ - opal_output ( 1, "OUT OF MEMORY\n"); - ret = OMPI_ERR_OUT_OF_RESOURCE; - goto exit; - } - #if TIME_BREAKDOWN - start_rcomm_time = MPI_Wtime(); - #endif - for (i=0;if_procs_per_group;i++){ - ompi_datatype_create_hindexed(disp_index[i], - blocklen_per_process[i], - displs_per_process[i], - MPI_BYTE, - &sendtype[i]); - ompi_datatype_commit(&sendtype[i]); - ret = MCA_PML_CALL (isend(global_buf, - 1, - sendtype[i], - fh->f_procs_in_group[i], - 123, - MCA_PML_BASE_SEND_STANDARD, - fh->f_comm, - &send_req[i])); - if(OMPI_SUCCESS != ret){ - goto exit; - } - } - #if TIME_BREAKDOWN - end_rcomm_time = MPI_Wtime(); - rcomm_time += end_rcomm_time - start_rcomm_time; - #endif - } - - /********************************************************** - ********* Scatter the Data from the readers ************** - *********************************************************/ - if (fh->f_flags & OMPIO_CONTIGUOUS_MEMORY) { - receive_buf = &((char*)buf)[position]; - } - else if (bytes_received) { - /* allocate a receive buffer and copy the data that needs - to be received into it in case the data is non-contigous - in memory */ - receive_buf = malloc (bytes_received); - if (NULL == receive_buf) { - opal_output (1, "OUT OF MEMORY\n"); - ret = OMPI_ERR_OUT_OF_RESOURCE; - goto exit; - } - } - - #if TIME_BREAKDOWN - start_rcomm_time = MPI_Wtime(); - #endif - recv_req = (MPI_Request *) malloc (sizeof (MPI_Request)); - if (NULL == recv_req){ - opal_output (1, "OUT OF MEMORY\n"); - ret = OMPI_ERR_OUT_OF_RESOURCE; - goto exit; - } - - ret = MCA_PML_CALL(irecv(receive_buf, - bytes_received, - MPI_BYTE, - fh->f_procs_in_group[fh->f_aggregator_index], - 123, - fh->f_comm, - recv_req)); - if (OMPI_SUCCESS != ret){ - goto exit; - } - - - if (fh->f_procs_in_group[fh->f_aggregator_index] == fh->f_rank){ - ret = ompi_request_wait_all (fh->f_procs_per_group, - send_req, - MPI_STATUS_IGNORE); - if (OMPI_SUCCESS != ret){ - goto exit; - } - } - - ret = ompi_request_wait (recv_req, MPI_STATUS_IGNORE); - if (OMPI_SUCCESS != ret){ - goto exit; - } - position += bytes_received; - - /* If data is not contigous in memory, copy the data from the - receive buffer into the buffer passed in */ - if (!(fh->f_flags & OMPIO_CONTIGUOUS_MEMORY)) { - OPAL_PTRDIFF_TYPE mem_address; - size_t remaining = 0; - size_t temp_position = 0; - - remaining = bytes_received; - - while (remaining) { - mem_address = (OPAL_PTRDIFF_TYPE) - (decoded_iov[iov_index].iov_base) + current_position; - - if (remaining >= - (decoded_iov[iov_index].iov_len - current_position)) { - memcpy ((IOVBASE_TYPE *) mem_address, - receive_buf+temp_position, - decoded_iov[iov_index].iov_len - current_position); - remaining = remaining - - (decoded_iov[iov_index].iov_len - current_position); - temp_position = temp_position + - (decoded_iov[iov_index].iov_len - current_position); - iov_index = iov_index + 1; - current_position = 0; - } - else { - memcpy ((IOVBASE_TYPE *) mem_address, - receive_buf+temp_position, - remaining); - current_position = current_position + remaining; - remaining = 0; - } - } - - if (NULL != receive_buf) { - free (receive_buf); - receive_buf = NULL; - } - } -#if TIME_BREAKDOWN - end_rcomm_time = MPI_Wtime(); - rcomm_time += end_rcomm_time - start_rcomm_time; + } + + sendtype = (ompi_datatype_t **) malloc (fh->f_procs_per_group * sizeof(ompi_datatype_t *)); + if (NULL == sendtype) { + opal_output (1, "OUT OF MEMORY\n"); + ret = OMPI_ERR_OUT_OF_RESOURCE; + goto exit; + } + + for(l=0;lf_procs_per_group;l++){ + sendtype[l] = MPI_DATATYPE_NULL; + } + } + + + + +#if OMPIO_FCOLL_WANT_TIME_BREAKDOWN + start_rexch = MPI_Wtime(); +#endif + n = 0; + bytes_remaining = 0; + current_index = 0; + + for (index = 0; index < cycles; index++) { + /********************************************************************** + *** 7a. Getting ready for next cycle: initializing and freeing buffers + **********************************************************************/ + if (my_aggregator == fh->f_rank) { + if (NULL != fh->f_io_array) { + free (fh->f_io_array); + fh->f_io_array = NULL; + } + fh->f_num_of_io_entries = 0; + + if (NULL != sendtype){ + for (i =0; i< fh->f_procs_per_group; i++) { + if ( MPI_DATATYPE_NULL != sendtype[i] ) { + ompi_datatype_destroy(&sendtype[i]); + sendtype[i] = MPI_DATATYPE_NULL; + } + } + } + + for(l=0;lf_procs_per_group;l++){ + disp_index[l] = 1; + + if (NULL != blocklen_per_process[l]){ + free(blocklen_per_process[l]); + blocklen_per_process[l] = NULL; + } + if (NULL != displs_per_process[l]){ + free(displs_per_process[l]); + displs_per_process[l] = NULL; + } + blocklen_per_process[l] = (int *) calloc (1, sizeof(int)); + if (NULL == blocklen_per_process[l]) { + opal_output (1, "OUT OF MEMORY for blocklen\n"); + ret = OMPI_ERR_OUT_OF_RESOURCE; + goto exit; + } + displs_per_process[l] = (MPI_Aint *) calloc (1, sizeof(MPI_Aint)); + if (NULL == displs_per_process[l]){ + opal_output (1, "OUT OF MEMORY for displs\n"); + ret = OMPI_ERR_OUT_OF_RESOURCE; + goto exit; + } + } + + if (NULL != sorted_file_offsets){ + free(sorted_file_offsets); + sorted_file_offsets = NULL; + } + + if(NULL != file_offsets_for_agg){ + free(file_offsets_for_agg); + file_offsets_for_agg = NULL; + } + if (NULL != memory_displacements){ + free(memory_displacements); + memory_displacements = NULL; + } + } /* (my_aggregator == fh->f_rank */ + + /************************************************************************** + *** 7b. Determine the number of bytes to be actually read in this cycle + **************************************************************************/ + if (cycles-1 == index) { + bytes_to_read_in_cycle = total_bytes - bytes_per_cycle*index; + } + else { + bytes_to_read_in_cycle = bytes_per_cycle; + } + +#if DEBUG_ON + if (my_aggregator == fh->f_rank) { + printf ("****%d: CYCLE %d Bytes %d**********\n", + fh->f_rank, + index, + bytes_to_write_in_cycle); + } +#endif + + /***************************************************************** + *** 7c. Calculate how much data will be contributed in this cycle + *** by each process + *****************************************************************/ + bytes_received = 0; + + while (bytes_to_read_in_cycle) { + /* This next block identifies which process is the holder + ** of the sorted[current_index] element; + */ + blocks = fview_count[0]; + for (j=0 ; jf_procs_per_group ; j++) { + if (sorted[current_index] < blocks) { + n = j; + break; + } + else { + blocks += fview_count[j+1]; + } + } + + if (bytes_remaining) { + /* Finish up a partially used buffer from the previous cycle */ + if (bytes_remaining <= bytes_to_read_in_cycle) { + /* Data fits completely into the block */ + if (my_aggregator == fh->f_rank) { + blocklen_per_process[n][disp_index[n] - 1] = bytes_remaining; + displs_per_process[n][disp_index[n] - 1] = + (OPAL_PTRDIFF_TYPE)global_iov_array[sorted[current_index]].iov_base + + (global_iov_array[sorted[current_index]].iov_len - bytes_remaining); + + blocklen_per_process[n] = (int *) realloc + ((void *)blocklen_per_process[n], (disp_index[n]+1)*sizeof(int)); + displs_per_process[n] = (MPI_Aint *) realloc + ((void *)displs_per_process[n], (disp_index[n]+1)*sizeof(MPI_Aint)); + blocklen_per_process[n][disp_index[n]] = 0; + displs_per_process[n][disp_index[n]] = 0; + disp_index[n] += 1; + } + if (fh->f_procs_in_group[n] == fh->f_rank) { + bytes_received += bytes_remaining; + } + current_index ++; + bytes_to_read_in_cycle -= bytes_remaining; + bytes_remaining = 0; + continue; + } + else { + /* the remaining data from the previous cycle is larger than the + bytes_to_write_in_cycle, so we have to segment again */ + if (my_aggregator == fh->f_rank) { + blocklen_per_process[n][disp_index[n] - 1] = bytes_to_read_in_cycle; + displs_per_process[n][disp_index[n] - 1] = + (OPAL_PTRDIFF_TYPE)global_iov_array[sorted[current_index]].iov_base + + (global_iov_array[sorted[current_index]].iov_len + - bytes_remaining); + } + if (fh->f_procs_in_group[n] == fh->f_rank) { + bytes_received += bytes_to_read_in_cycle; + } + bytes_remaining -= bytes_to_read_in_cycle; + bytes_to_read_in_cycle = 0; + break; + } + } + else { + /* No partially used entry available, have to start a new one */ + if (bytes_to_read_in_cycle < + (MPI_Aint) global_iov_array[sorted[current_index]].iov_len) { + /* This entry has more data than we can sendin one cycle */ + if (my_aggregator == fh->f_rank) { + blocklen_per_process[n][disp_index[n] - 1] = bytes_to_read_in_cycle; + displs_per_process[n][disp_index[n] - 1] = + (OPAL_PTRDIFF_TYPE)global_iov_array[sorted[current_index]].iov_base ; + } + + if (fh->f_procs_in_group[n] == fh->f_rank) { + bytes_received += bytes_to_read_in_cycle; + } + bytes_remaining = global_iov_array[sorted[current_index]].iov_len - + bytes_to_read_in_cycle; + bytes_to_read_in_cycle = 0; + break; + } + else { + /* Next data entry is less than bytes_to_write_in_cycle */ + if (my_aggregator == fh->f_rank) { + blocklen_per_process[n][disp_index[n] - 1] = + global_iov_array[sorted[current_index]].iov_len; + displs_per_process[n][disp_index[n] - 1] = (OPAL_PTRDIFF_TYPE) + global_iov_array[sorted[current_index]].iov_base; + blocklen_per_process[n] = + (int *) realloc ((void *)blocklen_per_process[n], (disp_index[n]+1)*sizeof(int)); + displs_per_process[n] = (MPI_Aint *)realloc + ((void *)displs_per_process[n], (disp_index[n]+1)*sizeof(MPI_Aint)); + blocklen_per_process[n][disp_index[n]] = 0; + displs_per_process[n][disp_index[n]] = 0; + disp_index[n] += 1; + } + if (fh->f_procs_in_group[n] == fh->f_rank) { + bytes_received += + global_iov_array[sorted[current_index]].iov_len; + } + bytes_to_read_in_cycle -= + global_iov_array[sorted[current_index]].iov_len; + current_index ++; + continue; + } + } + } /* end while (bytes_to_read_in_cycle) */ + + /************************************************************************* + *** 7d. Calculate the displacement on where to put the data and allocate + *** the recieve buffer (global_buf) + *************************************************************************/ + if (my_aggregator == fh->f_rank) { + entries_per_aggregator=0; + for (i=0;if_procs_per_group; i++){ + for (j=0;j 0) + entries_per_aggregator++ ; + } + } + if (entries_per_aggregator > 0){ + file_offsets_for_agg = (mca_io_ompio_local_io_array *) + malloc(entries_per_aggregator*sizeof(mca_io_ompio_local_io_array)); + if (NULL == file_offsets_for_agg) { + opal_output (1, "OUT OF MEMORY\n"); + ret = OMPI_ERR_OUT_OF_RESOURCE; + goto exit; + } + sorted_file_offsets = (int *) + malloc (entries_per_aggregator*sizeof(int)); + if (NULL == sorted_file_offsets){ + opal_output (1, "OUT OF MEMORY\n"); + ret = OMPI_ERR_OUT_OF_RESOURCE; + goto exit; + } + /*Moving file offsets to an IO array!*/ + temp_index = 0; + global_count = 0; + for (i=0;if_procs_per_group; i++){ + for(j=0;j 0){ + file_offsets_for_agg[temp_index].length = + blocklen_per_process[i][j]; + global_count += blocklen_per_process[i][j]; + file_offsets_for_agg[temp_index].process_id = i; + file_offsets_for_agg[temp_index].offset = + displs_per_process[i][j]; + temp_index++; + } + } + } + } + else{ + continue; + } + + /* Sort the displacements for each aggregator */ + read_heap_sort (file_offsets_for_agg, + entries_per_aggregator, + sorted_file_offsets); + + memory_displacements = (MPI_Aint *) malloc + (entries_per_aggregator * sizeof(MPI_Aint)); + memory_displacements[sorted_file_offsets[0]] = 0; + for (i=1; if_io_array = (mca_io_ompio_io_array_t *) malloc + (entries_per_aggregator * sizeof (mca_io_ompio_io_array_t)); + if (NULL == fh->f_io_array) { + opal_output(1, "OUT OF MEMORY\n"); + ret = OMPI_ERR_OUT_OF_RESOURCE; + goto exit; + } + + fh->f_num_of_io_entries = 0; + fh->f_io_array[0].offset = + (IOVBASE_TYPE *)(intptr_t)file_offsets_for_agg[sorted_file_offsets[0]].offset; + fh->f_io_array[0].length = + file_offsets_for_agg[sorted_file_offsets[0]].length; + fh->f_io_array[0].memory_address = + global_buf+memory_displacements[sorted_file_offsets[0]]; + fh->f_num_of_io_entries++; + for (i=1;if_io_array[fh->f_num_of_io_entries - 1].length += + file_offsets_for_agg[sorted_file_offsets[i]].length; + } + else{ + fh->f_io_array[fh->f_num_of_io_entries].offset = + (IOVBASE_TYPE *)(intptr_t)file_offsets_for_agg[sorted_file_offsets[i]].offset; + fh->f_io_array[fh->f_num_of_io_entries].length = + file_offsets_for_agg[sorted_file_offsets[i]].length; + fh->f_io_array[fh->f_num_of_io_entries].memory_address = + global_buf+memory_displacements[sorted_file_offsets[i]]; + fh->f_num_of_io_entries++; + } + } + + +#if OMPIO_FCOLL_WANT_TIME_BREAKDOWN + start_read_time = MPI_Wtime(); +#endif + + if (fh->f_num_of_io_entries) { + if ( 0 > fh->f_fbtl->fbtl_preadv (fh)) { + opal_output (1, "READ FAILED\n"); + ret = OMPI_ERROR; + goto exit; + } + } + +#if OMPIO_FCOLL_WANT_TIME_BREAKDOWN + end_read_time = MPI_Wtime(); + read_time += end_read_time - start_read_time; +#endif + /********************************************************** + ******************** DONE READING ************************ + *********************************************************/ + + temp_disp_index = (int *)calloc (1, fh->f_procs_per_group * sizeof (int)); + if (NULL == temp_disp_index) { + opal_output (1, "OUT OF MEMORY\n"); + ret = OMPI_ERR_OUT_OF_RESOURCE; + goto exit; + } + for (i=0; if_procs_per_group;i++){ + send_req[i] = MPI_REQUEST_NULL; + if ( 0 < disp_index[i] ) { + ompi_datatype_create_hindexed(disp_index[i], + blocklen_per_process[i], + displs_per_process[i], + MPI_BYTE, + &sendtype[i]); + ompi_datatype_commit(&sendtype[i]); + ret = MCA_PML_CALL (isend(global_buf, + 1, + sendtype[i], + fh->f_procs_in_group[i], + 123, + MCA_PML_BASE_SEND_STANDARD, + fh->f_comm, + &send_req[i])); + if(OMPI_SUCCESS != ret){ + goto exit; + } + } + } +#if OMPIO_FCOLL_WANT_TIME_BREAKDOWN + end_rcomm_time = MPI_Wtime(); + rcomm_time += end_rcomm_time - start_rcomm_time; +#endif + } + + /********************************************************** + *** 7f. Scatter the Data from the readers + *********************************************************/ + if ( recvbuf_is_contiguous ) { + receive_buf = &((char*)buf)[position]; + } + else if (bytes_received) { + /* allocate a receive buffer and copy the data that needs + to be received into it in case the data is non-contigous + in memory */ + receive_buf = malloc (bytes_received); + if (NULL == receive_buf) { + opal_output (1, "OUT OF MEMORY\n"); + ret = OMPI_ERR_OUT_OF_RESOURCE; + goto exit; + } + } + +#if OMPIO_FCOLL_WANT_TIME_BREAKDOWN + start_rcomm_time = MPI_Wtime(); +#endif + ret = MCA_PML_CALL(irecv(receive_buf, + bytes_received, + MPI_BYTE, + my_aggregator, + 123, + fh->f_comm, + &recv_req)); + if (OMPI_SUCCESS != ret){ + goto exit; + } + + + if (my_aggregator == fh->f_rank){ + ret = ompi_request_wait_all (fh->f_procs_per_group, + send_req, + MPI_STATUS_IGNORE); + if (OMPI_SUCCESS != ret){ + goto exit; + } + } + + ret = ompi_request_wait (&recv_req, MPI_STATUS_IGNORE); + if (OMPI_SUCCESS != ret){ + goto exit; + } + position += bytes_received; + + /* If data is not contigous in memory, copy the data from the + receive buffer into the buffer passed in */ + if (!recvbuf_is_contiguous ) { + OPAL_PTRDIFF_TYPE mem_address; + size_t remaining = 0; + size_t temp_position = 0; + + remaining = bytes_received; + + while (remaining) { + mem_address = (OPAL_PTRDIFF_TYPE) + (decoded_iov[iov_index].iov_base) + current_position; + + if (remaining >= + (decoded_iov[iov_index].iov_len - current_position)) { + memcpy ((IOVBASE_TYPE *) mem_address, + receive_buf+temp_position, + decoded_iov[iov_index].iov_len - current_position); + remaining = remaining - + (decoded_iov[iov_index].iov_len - current_position); + temp_position = temp_position + + (decoded_iov[iov_index].iov_len - current_position); + iov_index = iov_index + 1; + current_position = 0; + } + else { + memcpy ((IOVBASE_TYPE *) mem_address, + receive_buf+temp_position, + remaining); + current_position = current_position + remaining; + remaining = 0; + } + } + + if (NULL != receive_buf) { + free (receive_buf); + receive_buf = NULL; + } + } +#if OMPIO_FCOLL_WANT_TIME_BREAKDOWN + end_rcomm_time = MPI_Wtime(); + rcomm_time += end_rcomm_time - start_rcomm_time; +#endif + } /* end for (index=0; index < cycles; index ++) */ + +#if OMPIO_FCOLL_WANT_TIME_BREAKDOWN + end_rexch = MPI_Wtime(); + read_exch += end_rexch - start_rexch; + nentry.time[0] = read_time; + nentry.time[1] = rcomm_time; + nentry.time[2] = read_exch; + if (my_aggregator == fh->f_rank) + nentry.aggregator = 1; + else + nentry.aggregator = 0; + nentry.nprocs_for_coll = dynamic_num_io_procs; + if (!fh->f_full_print_queue(READ_PRINT_QUEUE)){ + fh->f_register_print_entry(READ_PRINT_QUEUE, + nentry); + } #endif - - if (NULL != recv_req){ - free(recv_req); - recv_req = NULL; - } - if (fh->f_procs_in_group[fh->f_aggregator_index] == fh->f_rank){ - fh->f_num_of_io_entries = 0; - if (NULL != fh->f_io_array) { - free (fh->f_io_array); - fh->f_io_array = NULL; - } - if (NULL != global_buf) { - free (global_buf); - global_buf = NULL; - } - for (i = 0; i < fh->f_procs_per_group; i++) - ompi_datatype_destroy(sendtype+i); - if (NULL != sendtype){ - free(sendtype); - sendtype=NULL; - } - if (NULL != send_req){ - free(send_req); - send_req = NULL; - } - if (NULL != sorted_file_offsets){ - free(sorted_file_offsets); - sorted_file_offsets = NULL; - } - if (NULL != file_offsets_for_agg){ - free(file_offsets_for_agg); - file_offsets_for_agg = NULL; - } - if (NULL != bytes_per_process){ - free(bytes_per_process); - bytes_per_process =NULL; - } - if (NULL != memory_displacements){ - free(memory_displacements); - memory_displacements= NULL; - } - } - } - - #if TIME_BREAKDOWN - end_rexch = MPI_Wtime(); - read_exch += end_rexch - start_rexch; - nentry.time[0] = read_time; - nentry.time[1] = rcomm_time; - nentry.time[2] = read_exch; - if (fh->f_procs_in_group[fh->f_aggregator_index] == fh->f_rank) - nentry.aggregator = 1; - else - nentry.aggregator = 0; - nentry.nprocs_for_coll = dynamic_num_io_procs; - if (!fh->f_full_print_queue(READ_PRINT_QUEUE)){ - fh->f_register_print_entry(READ_PRINT_QUEUE, - nentry); - } - #endif - - exit: - if (NULL != sorted) { - free (sorted); - sorted = NULL; - } - if (NULL != global_iov_array) { - free (global_iov_array); - global_iov_array = NULL; - } - if (NULL != fview_count) { - free (fview_count); - fview_count = NULL; - } - if (NULL != decoded_iov) { - free (decoded_iov); - decoded_iov = NULL; - } - if (NULL != local_iov_array){ - free(local_iov_array); - local_iov_array=NULL; - } - - if (NULL != displs) { - free (displs); - displs = NULL; - } - if (fh->f_procs_in_group[fh->f_aggregator_index] == fh->f_rank) { - - if (NULL != disp_index){ - free(disp_index); - disp_index = NULL; - } - - if ( NULL != blocklen_per_process){ - for(l=0;lf_procs_per_group;l++){ - if (NULL != blocklen_per_process[l]){ - free(blocklen_per_process[l]); - blocklen_per_process[l] = NULL; - } - } - - free(blocklen_per_process); - blocklen_per_process = NULL; - } - - if (NULL != displs_per_process){ - for (l=0; if_procs_per_group; l++){ - if (NULL != displs_per_process[l]){ - free(displs_per_process[l]); - displs_per_process[l] = NULL; - } - } - free(displs_per_process); - displs_per_process = NULL; - } - - } - return ret; - } - - - static int read_heap_sort (local_io_array *io_array, - int num_entries, - int *sorted) - { - int i = 0; - int j = 0; - int left = 0; - int right = 0; - int largest = 0; - int heap_size = num_entries - 1; - int temp = 0; - unsigned char done = 0; - int* temp_arr = NULL; - - temp_arr = (int*)malloc(num_entries*sizeof(int)); - if (NULL == temp_arr) { - opal_output (1, "OUT OF MEMORY\n"); - return OMPI_ERR_OUT_OF_RESOURCE; - } - temp_arr[0] = 0; - for (i = 1; i < num_entries; ++i) { - temp_arr[i] = i; - } - /* num_entries can be a large no. so NO RECURSION */ - for (i = num_entries/2-1 ; i>=0 ; i--) { - done = 0; - j = i; - largest = j; - - while (!done) { - left = j*2+1; - right = j*2+2; - if ((left <= heap_size) && - (io_array[temp_arr[left]].offset > io_array[temp_arr[j]].offset)) { - largest = left; - } - else { - largest = j; - } - if ((right <= heap_size) && - (io_array[temp_arr[right]].offset > - io_array[temp_arr[largest]].offset)) { - largest = right; - } - if (largest != j) { - temp = temp_arr[largest]; - temp_arr[largest] = temp_arr[j]; - temp_arr[j] = temp; - j = largest; - } - else { - done = 1; - } - } - } - - for (i = num_entries-1; i >=1; --i) { - temp = temp_arr[0]; - temp_arr[0] = temp_arr[i]; - temp_arr[i] = temp; - heap_size--; - done = 0; - j = 0; - largest = j; - - while (!done) { - left = j*2+1; - right = j*2+2; - - if ((left <= heap_size) && - (io_array[temp_arr[left]].offset > - io_array[temp_arr[j]].offset)) { - largest = left; - } - else { - largest = j; - } - if ((right <= heap_size) && - (io_array[temp_arr[right]].offset > - io_array[temp_arr[largest]].offset)) { - largest = right; - } - if (largest != j) { - temp = temp_arr[largest]; - temp_arr[largest] = temp_arr[j]; - temp_arr[j] = temp; - j = largest; - } - else { - done = 1; - } - } - sorted[i] = temp_arr[i]; - } - sorted[0] = temp_arr[0]; - - if (NULL != temp_arr) { - free(temp_arr); - temp_arr = NULL; - } - return OMPI_SUCCESS; - } +exit: + if (!recvbuf_is_contiguous) { + if (NULL != receive_buf) { + free (receive_buf); + receive_buf = NULL; + } + } + if (NULL != global_buf) { + free (global_buf); + global_buf = NULL; + } + if (NULL != sorted) { + free (sorted); + sorted = NULL; + } + if (NULL != global_iov_array) { + free (global_iov_array); + global_iov_array = NULL; + } + if (NULL != fview_count) { + free (fview_count); + fview_count = NULL; + } + if (NULL != decoded_iov) { + free (decoded_iov); + decoded_iov = NULL; + } + if (NULL != local_iov_array){ + free(local_iov_array); + local_iov_array=NULL; + } + + if (NULL != displs) { + free (displs); + displs = NULL; + } + if (my_aggregator == fh->f_rank) { + + if (NULL != sorted_file_offsets){ + free(sorted_file_offsets); + sorted_file_offsets = NULL; + } + if (NULL != file_offsets_for_agg){ + free(file_offsets_for_agg); + file_offsets_for_agg = NULL; + } + if (NULL != memory_displacements){ + free(memory_displacements); + memory_displacements= NULL; + } + if (NULL != sendtype){ + for (i = 0; i < fh->f_procs_per_group; i++) { + if ( MPI_DATATYPE_NULL != sendtype[i] ) { + ompi_datatype_destroy(&sendtype[i]); + } + } + free(sendtype); + sendtype=NULL; + } + + if (NULL != disp_index){ + free(disp_index); + disp_index = NULL; + } + + if ( NULL != blocklen_per_process){ + for(l=0;lf_procs_per_group;l++){ + if (NULL != blocklen_per_process[l]){ + free(blocklen_per_process[l]); + blocklen_per_process[l] = NULL; + } + } + + free(blocklen_per_process); + blocklen_per_process = NULL; + } + + if (NULL != displs_per_process){ + for (l=0; if_procs_per_group; l++){ + if (NULL != displs_per_process[l]){ + free(displs_per_process[l]); + displs_per_process[l] = NULL; + } + } + free(displs_per_process); + displs_per_process = NULL; + } + if ( NULL != send_req ) { + free ( send_req ); + send_req = NULL; + } + } + return ret; +} + + +static int read_heap_sort (mca_io_ompio_local_io_array *io_array, + int num_entries, + int *sorted) +{ + int i = 0; + int j = 0; + int left = 0; + int right = 0; + int largest = 0; + int heap_size = num_entries - 1; + int temp = 0; + unsigned char done = 0; + int* temp_arr = NULL; + + temp_arr = (int*)malloc(num_entries*sizeof(int)); + if (NULL == temp_arr) { + opal_output (1, "OUT OF MEMORY\n"); + return OMPI_ERR_OUT_OF_RESOURCE; + } + temp_arr[0] = 0; + for (i = 1; i < num_entries; ++i) { + temp_arr[i] = i; + } + /* num_entries can be a large no. so NO RECURSION */ + for (i = num_entries/2-1 ; i>=0 ; i--) { + done = 0; + j = i; + largest = j; + + while (!done) { + left = j*2+1; + right = j*2+2; + if ((left <= heap_size) && + (io_array[temp_arr[left]].offset > io_array[temp_arr[j]].offset)) { + largest = left; + } + else { + largest = j; + } + if ((right <= heap_size) && + (io_array[temp_arr[right]].offset > + io_array[temp_arr[largest]].offset)) { + largest = right; + } + if (largest != j) { + temp = temp_arr[largest]; + temp_arr[largest] = temp_arr[j]; + temp_arr[j] = temp; + j = largest; + } + else { + done = 1; + } + } + } + + for (i = num_entries-1; i >=1; --i) { + temp = temp_arr[0]; + temp_arr[0] = temp_arr[i]; + temp_arr[i] = temp; + heap_size--; + done = 0; + j = 0; + largest = j; + + while (!done) { + left = j*2+1; + right = j*2+2; + + if ((left <= heap_size) && + (io_array[temp_arr[left]].offset > + io_array[temp_arr[j]].offset)) { + largest = left; + } + else { + largest = j; + } + if ((right <= heap_size) && + (io_array[temp_arr[right]].offset > + io_array[temp_arr[largest]].offset)) { + largest = right; + } + if (largest != j) { + temp = temp_arr[largest]; + temp_arr[largest] = temp_arr[j]; + temp_arr[j] = temp; + j = largest; + } + else { + done = 1; + } + } + sorted[i] = temp_arr[i]; + } + sorted[0] = temp_arr[0]; + + if (NULL != temp_arr) { + free(temp_arr); + temp_arr = NULL; + } + return OMPI_SUCCESS; +} diff --git a/ompi/mca/fcoll/dynamic/fcoll_dynamic_file_write_all.c b/ompi/mca/fcoll/dynamic/fcoll_dynamic_file_write_all.c index 75391d8219e..976aea9dcbc 100644 --- a/ompi/mca/fcoll/dynamic/fcoll_dynamic_file_write_all.c +++ b/ompi/mca/fcoll/dynamic/fcoll_dynamic_file_write_all.c @@ -9,7 +9,9 @@ * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. - * Copyright (c) 2008-2014 University of Houston. All rights reserved. + * Copyright (c) 2008-2015 University of Houston. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -31,25 +33,24 @@ #define DEBUG_ON 0 -#define TIME_BREAKDOWN 0 /*Used for loading file-offsets per aggregator*/ -typedef struct local_io_array{ - OMPI_MPI_OFFSET_TYPE offset; - MPI_Aint length; - int process_id; -}local_io_array; +typedef struct mca_io_ompio_local_io_array{ + OMPI_MPI_OFFSET_TYPE offset; + MPI_Aint length; + int process_id; +}mca_io_ompio_local_io_array; -static int local_heap_sort (local_io_array *io_array, +static int local_heap_sort (mca_io_ompio_local_io_array *io_array, int num_entries, int *sorted); int mca_fcoll_dynamic_file_write_all (mca_io_ompio_file_t *fh, - void *buf, + const void *buf, int count, struct ompi_datatype_t *datatype, ompi_status_public_t *status) @@ -63,7 +64,7 @@ mca_fcoll_dynamic_file_write_all (mca_io_ompio_file_t *fh, int i=0, j=0, l=0; int n=0; /* current position in total_bytes_per_process array */ MPI_Aint bytes_remaining = 0; /* how many bytes have been written from the current - value from total_bytes_per_process */ + value from total_bytes_per_process */ int bytes_sent = 0, ret =0; int blocks=0, entries_per_aggregator=0; @@ -74,7 +75,7 @@ mca_fcoll_dynamic_file_write_all (mca_io_ompio_file_t *fh, char *send_buf = NULL; size_t current_position = 0; struct iovec *local_iov_array=NULL, *global_iov_array=NULL; - local_io_array *file_offsets_for_agg=NULL; + mca_io_ompio_local_io_array *file_offsets_for_agg=NULL; /* global iovec at the writers that contain the iovecs created from file_set_view */ uint32_t total_fview_count = 0; @@ -84,47 +85,56 @@ mca_fcoll_dynamic_file_write_all (mca_io_ompio_file_t *fh, char *global_buf = NULL; MPI_Aint global_count = 0; - - + + /* array that contains the sorted indices of the global_iov */ int *sorted = NULL, *sorted_file_offsets=NULL; int *displs = NULL; int dynamic_num_io_procs; - size_t max_data = 0, datatype_size = 0; + size_t max_data = 0, datatype_size = 0; int **blocklen_per_process=NULL; MPI_Aint **displs_per_process=NULL, *memory_displacements=NULL; ompi_datatype_t **recvtype = NULL; MPI_Aint *total_bytes_per_process = NULL; - MPI_Request *send_req=NULL, *recv_req=NULL; - int recv_req_count=0; - + MPI_Request send_req=NULL, *recv_req=NULL; + int my_aggregator=-1; + bool sendbuf_is_contiguous = false; + size_t ftype_size; + OPAL_PTRDIFF_TYPE ftype_extent, lb; + -#if TIME_BREAKDOWN +#if OMPIO_FCOLL_WANT_TIME_BREAKDOWN double write_time = 0.0, start_write_time = 0.0, end_write_time = 0.0; double comm_time = 0.0, start_comm_time = 0.0, end_comm_time = 0.0; double exch_write = 0.0, start_exch = 0.0, end_exch = 0.0; - print_entry nentry; + mca_io_ompio_print_entry nentry; #endif - -// if (opal_datatype_is_contiguous_memory_layout(&datatype->super,1)) { -// fh->f_flags |= OMPIO_CONTIGUOUS_MEMORY; -// } + opal_datatype_type_size ( &datatype->super, &ftype_size ); + opal_datatype_get_extent ( &datatype->super, &lb, &ftype_extent ); /************************************************************************** - ** In case the data is not contigous in memory, decode it into an iovec ** + ** 1. In case the data is not contigous in memory, decode it into an iovec **************************************************************************/ - if (! (fh->f_flags & OMPIO_CONTIGUOUS_MEMORY)) { - ret = fh->f_decode_datatype ((struct mca_io_ompio_file_t *) fh, - datatype, - count, - buf, - &max_data, - &decoded_iov, - &iov_count); - if (OMPI_SUCCESS != ret ){ - goto exit; - } + if ( ( ftype_extent == (OPAL_PTRDIFF_TYPE) ftype_size) && + opal_datatype_is_contiguous_memory_layout(&datatype->super,1) && + 0 == lb ) { + sendbuf_is_contiguous = true; + } + + + + if (! sendbuf_is_contiguous ) { + ret = fh->f_decode_datatype ((struct mca_io_ompio_file_t *) fh, + datatype, + count, + buf, + &max_data, + &decoded_iov, + &iov_count); + if (OMPI_SUCCESS != ret ){ + goto exit; + } } else { max_data = count * datatype->super.size; @@ -133,17 +143,19 @@ mca_fcoll_dynamic_file_write_all (mca_io_ompio_file_t *fh, if ( MPI_STATUS_IGNORE != status ) { status->_ucount = max_data; } - + fh->f_get_num_aggregators ( &dynamic_num_io_procs ); - ret = fh->f_set_aggregator_props ((struct mca_io_ompio_file_t *) fh, + ret = fh->f_set_aggregator_props ((struct mca_io_ompio_file_t *) fh, dynamic_num_io_procs, max_data); - + if (OMPI_SUCCESS != ret){ goto exit; } - - + my_aggregator = fh->f_procs_in_group[fh->f_aggregator_index]; + /************************************************************************** + ** 2. Determine the total amount of data to be written + **************************************************************************/ total_bytes_per_process = (MPI_Aint*)malloc (fh->f_procs_per_group*sizeof(MPI_Aint)); if (NULL == total_bytes_per_process) { @@ -152,6 +164,9 @@ mca_fcoll_dynamic_file_write_all (mca_io_ompio_file_t *fh, goto exit; } +#if OMPIO_FCOLL_WANT_TIME_BREAKDOWN + start_comm_time = MPI_Wtime(); +#endif ret = fh->f_allgather_array (&max_data, 1, MPI_LONG, @@ -162,10 +177,15 @@ mca_fcoll_dynamic_file_write_all (mca_io_ompio_file_t *fh, fh->f_procs_in_group, fh->f_procs_per_group, fh->f_comm); - + if( OMPI_SUCCESS != ret){ goto exit; } +#if OMPIO_FCOLL_WANT_TIME_BREAKDOWN + end_comm_time = MPI_Wtime(); + comm_time += (end_comm_time - start_comm_time); +#endif + for (i=0 ; if_procs_per_group ; i++) { total_bytes += total_bytes_per_process[i]; } @@ -174,9 +194,10 @@ mca_fcoll_dynamic_file_write_all (mca_io_ompio_file_t *fh, free (total_bytes_per_process); total_bytes_per_process = NULL; } - + /********************************************************************* - *** Generate the File offsets/lengths corresponding to this write *** + *** 3. Generate the local offsets/lengths array corresponding to + *** this write operation ********************************************************************/ ret = fh->f_generate_current_file_view( (struct mca_io_ompio_file_t *) fh, max_data, @@ -186,28 +207,29 @@ mca_fcoll_dynamic_file_write_all (mca_io_ompio_file_t *fh, goto exit; } -#if DEBUG_ON +#if DEBUG_ON for (i=0 ; if_rank, - local_iov_array[i].iov_base, - local_iov_array[i].iov_len); + + printf("%d: OFFSET: %d LENGTH: %ld\n", + fh->f_rank, + local_iov_array[i].iov_base, + local_iov_array[i].iov_len); } -#endif +#endif /************************************************************* - *** ALLGather the File View information at all processes *** + *** 4. Allgather the offset/lengths array from all processes *************************************************************/ - fview_count = (int *) malloc (fh->f_procs_per_group * sizeof (int)); if (NULL == fview_count) { opal_output (1, "OUT OF MEMORY\n"); ret = OMPI_ERR_OUT_OF_RESOURCE; goto exit; } - +#if OMPIO_FCOLL_WANT_TIME_BREAKDOWN + start_comm_time = MPI_Wtime(); +#endif ret = fh->f_allgather_array (&local_count, 1, MPI_INT, @@ -218,10 +240,14 @@ mca_fcoll_dynamic_file_write_all (mca_io_ompio_file_t *fh, fh->f_procs_in_group, fh->f_procs_per_group, fh->f_comm); - + if( OMPI_SUCCESS != ret){ goto exit; } +#if OMPIO_FCOLL_WANT_TIME_BREAKDOWN + end_comm_time = MPI_Wtime(); + comm_time += (end_comm_time - start_comm_time); +#endif displs = (int*) malloc (fh->f_procs_per_group * sizeof (int)); if (NULL == displs) { @@ -236,10 +262,10 @@ mca_fcoll_dynamic_file_write_all (mca_io_ompio_file_t *fh, total_fview_count += fview_count[i]; displs[i] = displs[i-1] + fview_count[i-1]; } - + #if DEBUG_ON printf("total_fview_count : %d\n", total_fview_count); - if (fh->f_procs_in_group[fh->f_aggregator_index] == fh->f_rank) { + if (my_aggregator == fh->f_rank) { for (i=0 ; if_procs_per_group ; i++) { printf ("%d: PROCESS: %d ELEMENTS: %d DISPLS: %d\n", fh->f_rank, @@ -249,20 +275,23 @@ mca_fcoll_dynamic_file_write_all (mca_io_ompio_file_t *fh, } } #endif - + /* allocate the global iovec */ if (0 != total_fview_count) { - global_iov_array = (struct iovec*) malloc (total_fview_count * - sizeof(struct iovec)); - if (NULL == global_iov_array){ - opal_output(1, "OUT OF MEMORY\n"); - ret = OMPI_ERR_OUT_OF_RESOURCE; - goto exit; - } + global_iov_array = (struct iovec*) malloc (total_fview_count * + sizeof(struct iovec)); + if (NULL == global_iov_array){ + opal_output(1, "OUT OF MEMORY\n"); + ret = OMPI_ERR_OUT_OF_RESOURCE; + goto exit; + } } - + +#if OMPIO_FCOLL_WANT_TIME_BREAKDOWN + start_comm_time = MPI_Wtime(); +#endif ret = fh->f_allgatherv_array (local_iov_array, local_count, fh->f_iov_type, @@ -277,8 +306,19 @@ mca_fcoll_dynamic_file_write_all (mca_io_ompio_file_t *fh, if (OMPI_SUCCESS != ret){ goto exit; } +#if OMPIO_FCOLL_WANT_TIME_BREAKDOWN + end_comm_time = MPI_Wtime(); + comm_time += (end_comm_time - start_comm_time); +#endif - /* sort it */ + /**************************************************************************************** + *** 5. Sort the global offset/lengths list based on the offsets. + *** The result of the sort operation is the 'sorted', an integer array, + *** which contains the indexes of the global_iov_array based on the offset. + *** For example, if global_iov_array[x].offset is followed by global_iov_array[y].offset + *** in the file, and that one is followed by global_iov_array[z].offset, than + *** sorted[0] = x, sorted[1]=y and sorted[2]=z; + ******************************************************************************************/ if (0 != total_fview_count) { sorted = (int *)malloc (total_fview_count * sizeof(int)); if (NULL == sorted) { @@ -298,21 +338,27 @@ mca_fcoll_dynamic_file_write_all (mca_io_ompio_file_t *fh, free(displs); displs=NULL; } - - + + #if DEBUG_ON - if (fh->f_procs_in_group[fh->f_aggregator_index] == fh->f_rank) { - uint32_t tv=0; - for (tv=0 ; tvf_rank, - global_iov_array[sorted[tv]].iov_base, - global_iov_array[sorted[tv]].iov_len); - } + if (my_aggregator == fh->f_rank) { + uint32_t tv=0; + for (tv=0 ; tvf_rank, + global_iov_array[sorted[tv]].iov_base, + global_iov_array[sorted[tv]].iov_len); + } } #endif - - if (fh->f_procs_in_group[fh->f_aggregator_index] == fh->f_rank) { + /************************************************************* + *** 6. Determine the number of cycles required to execute this + *** operation + *************************************************************/ + fh->f_get_bytes_per_agg ( (int *)&bytes_per_cycle ); + cycles = ceil((double)total_bytes/bytes_per_cycle); + + if (my_aggregator == fh->f_rank) { disp_index = (int *)malloc (fh->f_procs_per_group * sizeof (int)); if (NULL == disp_index) { opal_output (1, "OUT OF MEMORY\n"); @@ -320,720 +366,697 @@ mca_fcoll_dynamic_file_write_all (mca_io_ompio_file_t *fh, goto exit; } - blocklen_per_process = (int **)malloc (fh->f_procs_per_group * sizeof (int*)); + blocklen_per_process = (int **)calloc (fh->f_procs_per_group, sizeof (int*)); if (NULL == blocklen_per_process) { opal_output (1, "OUT OF MEMORY\n"); ret = OMPI_ERR_OUT_OF_RESOURCE; goto exit; } - displs_per_process = (MPI_Aint **)malloc (fh->f_procs_per_group * sizeof (MPI_Aint*)); + displs_per_process = (MPI_Aint **)calloc (fh->f_procs_per_group, sizeof (MPI_Aint*)); if (NULL == displs_per_process) { opal_output (1, "OUT OF MEMORY\n"); ret = OMPI_ERR_OUT_OF_RESOURCE; goto exit; } - for(i=0;if_procs_per_group;i++){ - blocklen_per_process[i] = NULL; - displs_per_process[i] = NULL; + recv_req = (MPI_Request *)malloc ((fh->f_procs_per_group)*sizeof(MPI_Request)); + if ( NULL == recv_req ) { + opal_output (1, "OUT OF MEMORY\n"); + ret = OMPI_ERR_OUT_OF_RESOURCE; + goto exit; } - } - - fh->f_get_bytes_per_agg ( (int *)&bytes_per_cycle ); - cycles = ceil((double)total_bytes/bytes_per_cycle); + global_buf = (char *) malloc (bytes_per_cycle); + if (NULL == global_buf){ + opal_output(1, "OUT OF MEMORY"); + ret = OMPI_ERR_OUT_OF_RESOURCE; + goto exit; + } + recvtype = (ompi_datatype_t **) malloc (fh->f_procs_per_group * sizeof(ompi_datatype_t *)); + if (NULL == recvtype) { + opal_output (1, "OUT OF MEMORY\n"); + ret = OMPI_ERR_OUT_OF_RESOURCE; + goto exit; + } + for(l=0;lf_procs_per_group;l++){ + recvtype[l] = MPI_DATATYPE_NULL; + } + } - n = 0; +#if OMPIO_FCOLL_WANT_TIME_BREAKDOWN + start_exch = MPI_Wtime(); +#endif + n = 0; bytes_remaining = 0; current_index = 0; + for (index = 0; index < cycles; index++) { + /********************************************************************** + *** 7a. Getting ready for next cycle: initializing and freeing buffers + **********************************************************************/ + if (my_aggregator == fh->f_rank) { + if (NULL != fh->f_io_array) { + free (fh->f_io_array); + fh->f_io_array = NULL; + } + fh->f_num_of_io_entries = 0; + + if (NULL != recvtype){ + for (i =0; i< fh->f_procs_per_group; i++) { + if ( MPI_DATATYPE_NULL != recvtype[i] ) { + ompi_datatype_destroy(&recvtype[i]); + recvtype[i] = MPI_DATATYPE_NULL; + } + } + } + for(l=0;lf_procs_per_group;l++){ + disp_index[l] = 1; + free(blocklen_per_process[l]); + free(displs_per_process[l]); -#if TIME_BREAKDOWN - start_exch = MPI_Wtime(); -#endif - - for (index = 0; index < cycles; index++) { + blocklen_per_process[l] = (int *) calloc (1, sizeof(int)); + displs_per_process[l] = (MPI_Aint *) calloc (1, sizeof(MPI_Aint)); + if (NULL == displs_per_process[l] || NULL == blocklen_per_process[l]){ + opal_output (1, "OUT OF MEMORY for displs\n"); + ret = OMPI_ERR_OUT_OF_RESOURCE; + goto exit; + } + } + + if (NULL != sorted_file_offsets){ + free(sorted_file_offsets); + sorted_file_offsets = NULL; + } + + if(NULL != file_offsets_for_agg){ + free(file_offsets_for_agg); + file_offsets_for_agg = NULL; + } + + if (NULL != memory_displacements){ + free(memory_displacements); + memory_displacements = NULL; + } + + } /* (my_aggregator == fh->f_rank */ + + /************************************************************************** + *** 7b. Determine the number of bytes to be actually written in this cycle + **************************************************************************/ + if (cycles-1 == index) { + bytes_to_write_in_cycle = total_bytes - bytes_per_cycle*index; + } + else { + bytes_to_write_in_cycle = bytes_per_cycle; + } - /* Getting ready for next cycle - Initializing and freeing buffers*/ - if (fh->f_procs_in_group[fh->f_aggregator_index] == fh->f_rank) { - - if (NULL == recvtype){ - recvtype = (ompi_datatype_t **) - malloc (fh->f_procs_per_group * sizeof(ompi_datatype_t *)); - if (NULL == recvtype) { - opal_output (1, "OUT OF MEMORY\n"); - ret = OMPI_ERR_OUT_OF_RESOURCE; - goto exit; - } - } - - for(l=0;lf_procs_per_group;l++){ - disp_index[l] = 1; - - if (NULL != blocklen_per_process[l]){ - free(blocklen_per_process[l]); - blocklen_per_process[l] = NULL; - } - if (NULL != displs_per_process[l]){ - free(displs_per_process[l]); - displs_per_process[l] = NULL; - } - blocklen_per_process[l] = (int *) calloc (1, sizeof(int)); - if (NULL == blocklen_per_process[l]) { - opal_output (1, "OUT OF MEMORY for blocklen\n"); - ret = OMPI_ERR_OUT_OF_RESOURCE; - goto exit; - } - displs_per_process[l] = (MPI_Aint *) calloc (1, sizeof(MPI_Aint)); - if (NULL == displs_per_process[l]){ - opal_output (1, "OUT OF MEMORY for displs\n"); - ret = OMPI_ERR_OUT_OF_RESOURCE; - goto exit; - } - } - - if (NULL != sorted_file_offsets){ - free(sorted_file_offsets); - sorted_file_offsets = NULL; - } - - if(NULL != file_offsets_for_agg){ - free(file_offsets_for_agg); - file_offsets_for_agg = NULL; - } - - if (NULL != memory_displacements){ - free(memory_displacements); - memory_displacements = NULL; - } - - } - - if (cycles-1 == index) { - bytes_to_write_in_cycle = total_bytes - bytes_per_cycle*index; - } - else { - bytes_to_write_in_cycle = bytes_per_cycle; - } - #if DEBUG_ON - if (fh->f_procs_in_group[fh->f_aggregator_index] == fh->f_rank) { - printf ("****%d: CYCLE %d Bytes %lld**********\n", - fh->f_rank, - index, - bytes_to_write_in_cycle); - } + if (my_aggregator == fh->f_rank) { + printf ("****%d: CYCLE %d Bytes %lld**********\n", + fh->f_rank, + index, + bytes_to_write_in_cycle); + } #endif - /********************************************************** - **Gather the Data from all the processes at the writers ** - *********************************************************/ - - /* Calculate how much data will be contributed in this cycle - by each process*/ - bytes_sent = 0; + /********************************************************** + **Gather the Data from all the processes at the writers ** + *********************************************************/ + #if DEBUG_ON - printf("bytes_to_write_in_cycle: %ld, cycle : %d\n", bytes_to_write_in_cycle, + printf("bytes_to_write_in_cycle: %ld, cycle : %d\n", bytes_to_write_in_cycle, index); #endif - /* The blocklen and displs calculation only done at aggregators!*/ - - - while (bytes_to_write_in_cycle) { - - blocks = fview_count[0]; - for (j=0 ; jf_procs_per_group ; j++) { - if (sorted[current_index] < blocks) { - n = j; - break; - } - else { - blocks += fview_count[j+1]; - } - } - - if (bytes_remaining) { - if (bytes_remaining <= bytes_to_write_in_cycle) { - - if (fh->f_procs_in_group[fh->f_aggregator_index] == - fh->f_rank) { - blocklen_per_process[n][disp_index[n] - 1] = bytes_remaining; - displs_per_process[n][disp_index[n] - 1] = - (OPAL_PTRDIFF_TYPE)global_iov_array[sorted[current_index]].iov_base + - (global_iov_array[sorted[current_index]].iov_len - - bytes_remaining); - - } - if (fh->f_procs_in_group[n] == fh->f_rank) { - bytes_sent += bytes_remaining; - } - current_index ++; - bytes_to_write_in_cycle -= bytes_remaining; - bytes_remaining = 0; - if (fh->f_procs_in_group[fh->f_aggregator_index] == - fh->f_rank) { - /* In this cases the length is consumed so allocating for - next displacement and blocklength*/ - - blocklen_per_process[n] = (int *) realloc - ((void *)blocklen_per_process[n], (disp_index[n]+1)*sizeof(int)); - displs_per_process[n] = (MPI_Aint *) realloc - ((void *)displs_per_process[n], (disp_index[n]+1)*sizeof(MPI_Aint)); - blocklen_per_process[n][disp_index[n]] = 0; - displs_per_process[n][disp_index[n]] = 0; - disp_index[n] += 1; - } - continue; - } - else { - if (fh->f_procs_in_group[fh->f_aggregator_index] == - fh->f_rank) { - blocklen_per_process[n][disp_index[n] - 1] = bytes_to_write_in_cycle; - displs_per_process[n][disp_index[n] - 1] = - (OPAL_PTRDIFF_TYPE)global_iov_array[sorted[current_index]].iov_base + - (global_iov_array[sorted[current_index]].iov_len - - bytes_remaining); - } - - if (fh->f_procs_in_group[n] == fh->f_rank) { - bytes_sent += bytes_to_write_in_cycle; - } - bytes_remaining -= bytes_to_write_in_cycle; - bytes_to_write_in_cycle = 0; - break; - } - } - else { - if (bytes_to_write_in_cycle < - (MPI_Aint) global_iov_array[sorted[current_index]].iov_len) { - if (fh->f_procs_in_group[fh->f_aggregator_index] == - fh->f_rank) { - blocklen_per_process[n][disp_index[n] - 1] = bytes_to_write_in_cycle; - displs_per_process[n][disp_index[n] - 1] = - (OPAL_PTRDIFF_TYPE)global_iov_array[sorted[current_index]].iov_base ; + /***************************************************************** + *** 7c. Calculate how much data will be contributed in this cycle + *** by each process + *****************************************************************/ + bytes_sent = 0; + + /* The blocklen and displs calculation only done at aggregators!*/ + while (bytes_to_write_in_cycle) { + + /* This next block identifies which process is the holder + ** of the sorted[current_index] element; + */ + blocks = fview_count[0]; + for (j=0 ; jf_procs_per_group ; j++) { + if (sorted[current_index] < blocks) { + n = j; + break; + } + else { + blocks += fview_count[j+1]; + } + } + if (bytes_remaining) { + /* Finish up a partially used buffer from the previous cycle */ + + if (bytes_remaining <= bytes_to_write_in_cycle) { + /* The data fits completely into the block */ + if (my_aggregator == fh->f_rank) { + blocklen_per_process[n][disp_index[n] - 1] = bytes_remaining; + displs_per_process[n][disp_index[n] - 1] = + (OPAL_PTRDIFF_TYPE)global_iov_array[sorted[current_index]].iov_base + + (global_iov_array[sorted[current_index]].iov_len + - bytes_remaining); + + /* In this cases the length is consumed so allocating for + next displacement and blocklength*/ + blocklen_per_process[n] = (int *) realloc + ((void *)blocklen_per_process[n], (disp_index[n]+1)*sizeof(int)); + displs_per_process[n] = (MPI_Aint *) realloc + ((void *)displs_per_process[n], (disp_index[n]+1)*sizeof(MPI_Aint)); + blocklen_per_process[n][disp_index[n]] = 0; + displs_per_process[n][disp_index[n]] = 0; + disp_index[n] += 1; + } + if (fh->f_procs_in_group[n] == fh->f_rank) { + bytes_sent += bytes_remaining; + } + current_index ++; + bytes_to_write_in_cycle -= bytes_remaining; + bytes_remaining = 0; + continue; + } + else { + /* the remaining data from the previous cycle is larger than the + bytes_to_write_in_cycle, so we have to segment again */ + if (my_aggregator == fh->f_rank) { + blocklen_per_process[n][disp_index[n] - 1] = bytes_to_write_in_cycle; + displs_per_process[n][disp_index[n] - 1] = + (OPAL_PTRDIFF_TYPE)global_iov_array[sorted[current_index]].iov_base + + (global_iov_array[sorted[current_index]].iov_len + - bytes_remaining); + } + + if (fh->f_procs_in_group[n] == fh->f_rank) { + bytes_sent += bytes_to_write_in_cycle; + } + bytes_remaining -= bytes_to_write_in_cycle; + bytes_to_write_in_cycle = 0; + break; + } + } + else { + /* No partially used entry available, have to start a new one */ + if (bytes_to_write_in_cycle < + (MPI_Aint) global_iov_array[sorted[current_index]].iov_len) { + /* This entry has more data than we can sendin one cycle */ + if (my_aggregator == fh->f_rank) { + blocklen_per_process[n][disp_index[n] - 1] = bytes_to_write_in_cycle; + displs_per_process[n][disp_index[n] - 1] = + (OPAL_PTRDIFF_TYPE)global_iov_array[sorted[current_index]].iov_base ; + } + if (fh->f_procs_in_group[n] == fh->f_rank) { + bytes_sent += bytes_to_write_in_cycle; + + } + bytes_remaining = global_iov_array[sorted[current_index]].iov_len - + bytes_to_write_in_cycle; + bytes_to_write_in_cycle = 0; + break; + } + else { + /* Next data entry is less than bytes_to_write_in_cycle */ + if (my_aggregator == fh->f_rank) { + blocklen_per_process[n][disp_index[n] - 1] = + global_iov_array[sorted[current_index]].iov_len; + displs_per_process[n][disp_index[n] - 1] = (OPAL_PTRDIFF_TYPE) + global_iov_array[sorted[current_index]].iov_base; + + /*realloc for next blocklength + and assign this displacement and check for next displs as + the total length of this entry has been consumed!*/ + blocklen_per_process[n] = + (int *) realloc ((void *)blocklen_per_process[n], (disp_index[n]+1)*sizeof(int)); + displs_per_process[n] = (MPI_Aint *)realloc + ((void *)displs_per_process[n], (disp_index[n]+1)*sizeof(MPI_Aint)); + blocklen_per_process[n][disp_index[n]] = 0; + displs_per_process[n][disp_index[n]] = 0; + disp_index[n] += 1; + } + if (fh->f_procs_in_group[n] == fh->f_rank) { + bytes_sent += global_iov_array[sorted[current_index]].iov_len; + } + bytes_to_write_in_cycle -= + global_iov_array[sorted[current_index]].iov_len; + current_index ++; + continue; + } + } + } - } - if (fh->f_procs_in_group[n] == fh->f_rank) { - bytes_sent += bytes_to_write_in_cycle; - - } - bytes_remaining = global_iov_array[sorted[current_index]].iov_len - - bytes_to_write_in_cycle; - bytes_to_write_in_cycle = 0; - break; - } - else { - if (fh->f_procs_in_group[fh->f_aggregator_index] == - fh->f_rank) { - blocklen_per_process[n][disp_index[n] - 1] = - global_iov_array[sorted[current_index]].iov_len; - displs_per_process[n][disp_index[n] - 1] = (OPAL_PTRDIFF_TYPE) - global_iov_array[sorted[current_index]].iov_base; - - blocklen_per_process[n] = - (int *) realloc ((void *)blocklen_per_process[n], (disp_index[n]+1)*sizeof(int)); - displs_per_process[n] = (MPI_Aint *)realloc - ((void *)displs_per_process[n], (disp_index[n]+1)*sizeof(MPI_Aint)); - blocklen_per_process[n][disp_index[n]] = 0; - displs_per_process[n][disp_index[n]] = 0; - disp_index[n] += 1; - /*realloc for next blocklength - and assign this displacement and check for next displs as - the total length of this entry has been consumed!*/ - } - if (fh->f_procs_in_group[n] == fh->f_rank) { - bytes_sent += global_iov_array[sorted[current_index]].iov_len; - } - bytes_to_write_in_cycle -= - global_iov_array[sorted[current_index]].iov_len; - current_index ++; - continue; - } - } - } - - - /* Calculate the displacement on where to put the data and allocate - the recieve buffer (global_buf) */ - if (fh->f_procs_in_group[fh->f_aggregator_index] == fh->f_rank) { - entries_per_aggregator=0; - for (i=0;if_procs_per_group; i++){ - for (j=0;j 0) - entries_per_aggregator++ ; - } - } + + /************************************************************************* + *** 7d. Calculate the displacement on where to put the data and allocate + *** the recieve buffer (global_buf) + *************************************************************************/ + if (my_aggregator == fh->f_rank) { + entries_per_aggregator=0; + for (i=0;if_procs_per_group; i++){ + for (j=0;j 0) + entries_per_aggregator++ ; + } + } #if DEBUG_ON - printf("%d: cycle: %d, bytes_sent: %d\n ",fh->f_rank,index, - bytes_sent); - printf("%d : Entries per aggregator : %d\n",fh->f_rank,entries_per_aggregator); + printf("%d: cycle: %d, bytes_sent: %d\n ",fh->f_rank,index, + bytes_sent); + printf("%d : Entries per aggregator : %d\n",fh->f_rank,entries_per_aggregator); #endif - - if (entries_per_aggregator > 0){ - file_offsets_for_agg = (local_io_array *) - malloc(entries_per_aggregator*sizeof(local_io_array)); - if (NULL == file_offsets_for_agg) { - opal_output (1, "OUT OF MEMORY\n"); - ret = OMPI_ERR_OUT_OF_RESOURCE; - goto exit; - } - - sorted_file_offsets = (int *) - malloc (entries_per_aggregator*sizeof(int)); - if (NULL == sorted_file_offsets){ - opal_output (1, "OUT OF MEMORY\n"); - ret = OMPI_ERR_OUT_OF_RESOURCE; - goto exit; - } - - /*Moving file offsets to an IO array!*/ - temp_index = 0; - - for (i=0;if_procs_per_group; i++){ - for(j=0;j 0){ - file_offsets_for_agg[temp_index].length = - blocklen_per_process[i][j]; - file_offsets_for_agg[temp_index].process_id = i; - file_offsets_for_agg[temp_index].offset = - displs_per_process[i][j]; - temp_index++; - + + if (entries_per_aggregator > 0){ + file_offsets_for_agg = (mca_io_ompio_local_io_array *) + malloc(entries_per_aggregator*sizeof(mca_io_ompio_local_io_array)); + if (NULL == file_offsets_for_agg) { + opal_output (1, "OUT OF MEMORY\n"); + ret = OMPI_ERR_OUT_OF_RESOURCE; + goto exit; + } + + sorted_file_offsets = (int *) + malloc (entries_per_aggregator*sizeof(int)); + if (NULL == sorted_file_offsets){ + opal_output (1, "OUT OF MEMORY\n"); + ret = OMPI_ERR_OUT_OF_RESOURCE; + goto exit; + } + + /*Moving file offsets to an IO array!*/ + temp_index = 0; + + for (i=0;if_procs_per_group; i++){ + for(j=0;j 0){ + file_offsets_for_agg[temp_index].length = + blocklen_per_process[i][j]; + file_offsets_for_agg[temp_index].process_id = i; + file_offsets_for_agg[temp_index].offset = + displs_per_process[i][j]; + temp_index++; + #if DEBUG_ON - printf("************Cycle: %d, Aggregator: %d ***************\n", - index+1,fh->f_rank); - - printf("%d sends blocklen[%d]: %d, disp[%d]: %ld to %d\n", - fh->f_procs_in_group[i],j, - blocklen_per_process[i][j],j, - displs_per_process[i][j], - fh->f_rank); + printf("************Cycle: %d, Aggregator: %d ***************\n", + index+1,fh->f_rank); + + printf("%d sends blocklen[%d]: %d, disp[%d]: %ld to %d\n", + fh->f_procs_in_group[i],j, + blocklen_per_process[i][j],j, + displs_per_process[i][j], + fh->f_rank); #endif - } - } - } - } - else{ - continue; - } - /* Sort the displacements for each aggregator*/ - local_heap_sort (file_offsets_for_agg, - entries_per_aggregator, - sorted_file_offsets); - - /*create contiguous memory displacements - based on blocklens on the same displs array - and map it to this aggregator's actual - file-displacements (this is in the io-array created above)*/ - memory_displacements = (MPI_Aint *) malloc - (entries_per_aggregator * sizeof(MPI_Aint)); - - memory_displacements[sorted_file_offsets[0]] = 0; - for (i=1; if_procs_per_group * sizeof (int)); - if (NULL == temp_disp_index) { - opal_output (1, "OUT OF MEMORY\n"); - return OMPI_ERR_OUT_OF_RESOURCE; - } - - /*Now update the displacements array with memory offsets*/ - global_count = 0; - for (i=0;if_procs_per_group * sizeof (int)); + if (NULL == temp_disp_index) { + opal_output (1, "OUT OF MEMORY\n"); + ret = OMPI_ERR_OUT_OF_RESOURCE; + goto exit; + } + + /*Now update the displacements array with memory offsets*/ + global_count = 0; + for (i=0;if_rank); - for (i=0;if_procs_per_group; i++){ - for(j=0;j 0){ - printf("%d sends blocklen[%d]: %d, disp[%d]: %ld to %d\n", - fh->f_procs_in_group[i],j, - blocklen_per_process[i][j],j, - displs_per_process[i][j], - fh->f_rank); - - } - } - } - printf("************Cycle: %d, Aggregator: %d ***************\n", - index+1,fh->f_rank); - for (i=0; if_rank,global_count, bytes_sent); + printf("************Cycle: %d, Aggregator: %d ***************\n", + index+1,fh->f_rank); + for (i=0;if_procs_per_group; i++){ + for(j=0;j 0){ + printf("%d sends blocklen[%d]: %d, disp[%d]: %ld to %d\n", + fh->f_procs_in_group[i],j, + blocklen_per_process[i][j],j, + displs_per_process[i][j], + fh->f_rank); + + } + } + } + printf("************Cycle: %d, Aggregator: %d ***************\n", + index+1,fh->f_rank); + for (i=0; if_rank,global_count, bytes_sent); #endif -#if TIME_BREAKDOWN - start_comm_time = MPI_Wtime(); +#if OMPIO_FCOLL_WANT_TIME_BREAKDOWN + start_comm_time = MPI_Wtime(); #endif + /************************************************************************* + *** 7e. Perform the actual communication + *************************************************************************/ + for (i=0;if_procs_per_group; i++) { + recv_req[i] = MPI_REQUEST_NULL; + if ( 0 < disp_index[i] ) { + ompi_datatype_create_hindexed(disp_index[i], + blocklen_per_process[i], + displs_per_process[i], + MPI_BYTE, + &recvtype[i]); + ompi_datatype_commit(&recvtype[i]); + opal_datatype_type_size(&recvtype[i]->super, &datatype_size); + + if (datatype_size){ + ret = MCA_PML_CALL(irecv(global_buf, + 1, + recvtype[i], + fh->f_procs_in_group[i], + 123, + fh->f_comm, + &recv_req[i])); + if (OMPI_SUCCESS != ret){ + goto exit; + } + } + } + } + } /* end if (my_aggregator == fh->f_rank ) */ - - global_buf = (char *) malloc (global_count); - if (NULL == global_buf){ - opal_output(1, "OUT OF MEMORY"); - ret = OMPI_ERR_OUT_OF_RESOURCE; - goto exit; - } - recv_req_count = 0; - for (i=0;if_procs_per_group; i++){ - - ompi_datatype_create_hindexed(disp_index[i], - blocklen_per_process[i], - displs_per_process[i], - MPI_BYTE, - &recvtype[i]); - ompi_datatype_commit(&recvtype[i]); - - opal_datatype_type_size(&recvtype[i]->super, - &datatype_size); - - if (datatype_size){ - - recv_req = (MPI_Request *)realloc - ((void *)recv_req, (recv_req_count + 1)*sizeof(MPI_Request)); - - ret = MCA_PML_CALL(irecv(global_buf, - 1, - recvtype[i], - fh->f_procs_in_group[i], - 123, - fh->f_comm, - &recv_req[recv_req_count])); - recv_req_count++; - - if (OMPI_SUCCESS != ret){ - goto exit; - } - } - } - - } - - - - if (fh->f_flags & OMPIO_CONTIGUOUS_MEMORY) { - send_buf = &((char*)buf)[total_bytes_written]; - } - else if (bytes_sent) { - /* allocate a send buffer and copy the data that needs - to be sent into it in case the data is non-contigous - in memory */ - OPAL_PTRDIFF_TYPE mem_address; - size_t remaining = 0; - size_t temp_position = 0; - - send_buf = malloc (bytes_sent); - if (NULL == send_buf) { - opal_output (1, "OUT OF MEMORY\n"); - return OMPI_ERR_OUT_OF_RESOURCE; - } - - remaining = bytes_sent; - - while (remaining) { - mem_address = (OPAL_PTRDIFF_TYPE) - (decoded_iov[iov_index].iov_base) + current_position; - - if (remaining >= - (decoded_iov[iov_index].iov_len - current_position)) { - memcpy (send_buf+temp_position, - (IOVBASE_TYPE *)mem_address, - decoded_iov[iov_index].iov_len - current_position); - remaining = remaining - - (decoded_iov[iov_index].iov_len - current_position); - temp_position = temp_position + - (decoded_iov[iov_index].iov_len - current_position); - iov_index = iov_index + 1; - current_position = 0; - } - else { - memcpy (send_buf+temp_position, - (IOVBASE_TYPE *) mem_address, - remaining); - current_position = current_position + remaining; - remaining = 0; - } - } + if ( sendbuf_is_contiguous ) { + send_buf = &((char*)buf)[total_bytes_written]; + } + else if (bytes_sent) { + /* allocate a send buffer and copy the data that needs + to be sent into it in case the data is non-contigous + in memory */ + OPAL_PTRDIFF_TYPE mem_address; + size_t remaining = 0; + size_t temp_position = 0; + + send_buf = malloc (bytes_sent); + if (NULL == send_buf) { + opal_output (1, "OUT OF MEMORY\n"); + ret = OMPI_ERR_OUT_OF_RESOURCE; + goto exit; + } + + remaining = bytes_sent; + + while (remaining) { + mem_address = (OPAL_PTRDIFF_TYPE) + (decoded_iov[iov_index].iov_base) + current_position; + + if (remaining >= + (decoded_iov[iov_index].iov_len - current_position)) { + memcpy (send_buf+temp_position, + (IOVBASE_TYPE *)mem_address, + decoded_iov[iov_index].iov_len - current_position); + remaining = remaining - + (decoded_iov[iov_index].iov_len - current_position); + temp_position = temp_position + + (decoded_iov[iov_index].iov_len - current_position); + iov_index = iov_index + 1; + current_position = 0; + } + else { + memcpy (send_buf+temp_position, + (IOVBASE_TYPE *) mem_address, + remaining); + current_position = current_position + remaining; + remaining = 0; + } + } } total_bytes_written += bytes_sent; - /* Gather the sendbuf from each process in appropritate locations in - aggregators*/ - - send_req = (MPI_Request *) malloc (sizeof(MPI_Request)); - if (NULL == send_req){ - opal_output (1, "OUT OF MEMORY\n"); - ret = OMPI_ERR_OUT_OF_RESOURCE; - goto exit; - } + /* Gather the sendbuf from each process in appropritate locations in + aggregators*/ - if (bytes_sent){ + ret = MCA_PML_CALL(isend(send_buf, + bytes_sent, + MPI_BYTE, + my_aggregator, + 123, + MCA_PML_BASE_SEND_STANDARD, + fh->f_comm, + &send_req)); - ret = MCA_PML_CALL(isend(send_buf, - bytes_sent, - MPI_BYTE, - fh->f_procs_in_group[fh->f_aggregator_index], - 123, - MCA_PML_BASE_SEND_STANDARD, - fh->f_comm, - send_req)); - if ( OMPI_SUCCESS != ret ){ goto exit; } - ret = ompi_request_wait(send_req, MPI_STATUS_IGNORE); + ret = ompi_request_wait(&send_req, MPI_STATUS_IGNORE); if (OMPI_SUCCESS != ret){ goto exit; } - } - if (fh->f_procs_in_group[fh->f_aggregator_index] == fh->f_rank) { - ret = ompi_request_wait_all (recv_req_count, - recv_req, - MPI_STATUS_IGNORE); - - if (OMPI_SUCCESS != ret){ - goto exit; - } } -#if DEBUG_ON - if (fh->f_procs_in_group[fh->f_aggregator_index] == fh->f_rank){ - printf("************Cycle: %d, Aggregator: %d ***************\n", - index+1,fh->f_rank); - for (i=0 ; if_rank) { + ret = ompi_request_wait_all (fh->f_procs_per_group, + recv_req, + MPI_STATUS_IGNORE); - - if (! (fh->f_flags & OMPIO_CONTIGUOUS_MEMORY)) { - if (NULL != send_buf) { - free (send_buf); - send_buf = NULL; + if (OMPI_SUCCESS != ret){ + goto exit; + } } - } -#if TIME_BREAKDOWN - end_comm_time = MPI_Wtime(); - comm_time += (end_comm_time - start_comm_time); +#if DEBUG_ON + if (my_aggregator == fh->f_rank){ + printf("************Cycle: %d, Aggregator: %d ***************\n", + index+1,fh->f_rank); + for (i=0 ; if_procs_in_group[fh->f_aggregator_index] == fh->f_rank) { + if (my_aggregator == fh->f_rank) { -#if TIME_BREAKDOWN +#if OMPIO_FCOLL_WANT_TIME_BREAKDOWN start_write_time = MPI_Wtime(); #endif - - fh->f_io_array = (mca_io_ompio_io_array_t *) malloc - (entries_per_aggregator * sizeof (mca_io_ompio_io_array_t)); - if (NULL == fh->f_io_array) { - opal_output(1, "OUT OF MEMORY\n"); - ret = OMPI_ERR_OUT_OF_RESOURCE; - goto exit; - } - - fh->f_num_of_io_entries = 0; - /*First entry for every aggregator*/ - fh->f_io_array[fh->f_num_of_io_entries].offset = - (IOVBASE_TYPE *)(intptr_t)file_offsets_for_agg[sorted_file_offsets[0]].offset; - fh->f_io_array[fh->f_num_of_io_entries].length = - file_offsets_for_agg[sorted_file_offsets[0]].length; - fh->f_io_array[fh->f_num_of_io_entries].memory_address = - global_buf+memory_displacements[sorted_file_offsets[0]]; - fh->f_num_of_io_entries++; - - for (i=1;if_io_array[fh->f_num_of_io_entries - 1].length += - file_offsets_for_agg[sorted_file_offsets[i]].length; - } - else { - fh->f_io_array[fh->f_num_of_io_entries].offset = - (IOVBASE_TYPE *)(intptr_t)file_offsets_for_agg[sorted_file_offsets[i]].offset; - fh->f_io_array[fh->f_num_of_io_entries].length = - file_offsets_for_agg[sorted_file_offsets[i]].length; - fh->f_io_array[fh->f_num_of_io_entries].memory_address = - global_buf+memory_displacements[sorted_file_offsets[i]]; - fh->f_num_of_io_entries++; - } - - } - + + fh->f_io_array = (mca_io_ompio_io_array_t *) malloc + (entries_per_aggregator * sizeof (mca_io_ompio_io_array_t)); + if (NULL == fh->f_io_array) { + opal_output(1, "OUT OF MEMORY\n"); + ret = OMPI_ERR_OUT_OF_RESOURCE; + goto exit; + } + + fh->f_num_of_io_entries = 0; + /*First entry for every aggregator*/ + fh->f_io_array[0].offset = + (IOVBASE_TYPE *)(intptr_t)file_offsets_for_agg[sorted_file_offsets[0]].offset; + fh->f_io_array[0].length = + file_offsets_for_agg[sorted_file_offsets[0]].length; + fh->f_io_array[0].memory_address = + global_buf+memory_displacements[sorted_file_offsets[0]]; + fh->f_num_of_io_entries++; + + for (i=1;if_io_array[fh->f_num_of_io_entries - 1].length += + file_offsets_for_agg[sorted_file_offsets[i]].length; + } + else { + fh->f_io_array[fh->f_num_of_io_entries].offset = + (IOVBASE_TYPE *)(intptr_t)file_offsets_for_agg[sorted_file_offsets[i]].offset; + fh->f_io_array[fh->f_num_of_io_entries].length = + file_offsets_for_agg[sorted_file_offsets[i]].length; + fh->f_io_array[fh->f_num_of_io_entries].memory_address = + global_buf+memory_displacements[sorted_file_offsets[i]]; + fh->f_num_of_io_entries++; + } + + } + #if DEBUG_ON - printf("*************************** %d\n", fh->f_num_of_io_entries); - for (i=0 ; if_num_of_io_entries ; i++) { - printf(" ADDRESS: %p OFFSET: %ld LENGTH: %ld\n", - fh->f_io_array[i].memory_address, - (OPAL_PTRDIFF_TYPE)fh->f_io_array[i].offset, - fh->f_io_array[i].length); - } - -#endif + printf("*************************** %d\n", fh->f_num_of_io_entries); + for (i=0 ; if_num_of_io_entries ; i++) { + printf(" ADDRESS: %p OFFSET: %ld LENGTH: %ld\n", + fh->f_io_array[i].memory_address, + (OPAL_PTRDIFF_TYPE)fh->f_io_array[i].offset, + fh->f_io_array[i].length); + } +#endif - if (fh->f_num_of_io_entries) { - if ( 0 > fh->f_fbtl->fbtl_pwritev (fh)) { - opal_output (1, "WRITE FAILED\n"); - ret = OMPI_ERROR; - goto exit; - } - } -#if TIME_BREAKDOWN - end_write_time = MPI_Wtime(); - write_time += end_write_time - start_write_time; -#endif + if (fh->f_num_of_io_entries) { + if ( 0 > fh->f_fbtl->fbtl_pwritev (fh)) { + opal_output (1, "WRITE FAILED\n"); + ret = OMPI_ERROR; + goto exit; + } + } +#if OMPIO_FCOLL_WANT_TIME_BREAKDOWN + end_write_time = MPI_Wtime(); + write_time += end_write_time - start_write_time; +#endif - } - - - if (NULL != send_req){ - free(send_req); - send_req = NULL; - } - - if (fh->f_procs_in_group[fh->f_aggregator_index] == fh->f_rank) { - fh->f_num_of_io_entries = 0; - if (NULL != fh->f_io_array) { - free (fh->f_io_array); - fh->f_io_array = NULL; - } - for (i =0; i< fh->f_procs_per_group; i++) - ompi_datatype_destroy(recvtype+i); - if (NULL != recvtype){ - free(recvtype); - recvtype=NULL; - } - if (NULL != recv_req){ - free(recv_req); - recv_req = NULL; - - } - if (NULL != global_buf) { - free (global_buf); - global_buf = NULL; - } - - } - - } + } /* end if (my_aggregator == fh->f_rank) */ + } /* end for (index = 0; index < cycles; index++) */ -#if TIME_BREAKDOWN - end_exch = MPI_Wtime(); - exch_write += end_exch - start_exch; - nentry.time[0] = write_time; - nentry.time[1] = comm_time; - nentry.time[2] = exch_write; - if (fh->f_procs_in_group[fh->f_aggregator_index] == fh->f_rank) +#if OMPIO_FCOLL_WANT_TIME_BREAKDOWN + end_exch = MPI_Wtime(); + exch_write += end_exch - start_exch; + nentry.time[0] = write_time; + nentry.time[1] = comm_time; + nentry.time[2] = exch_write; + if (my_aggregator == fh->f_rank) nentry.aggregator = 1; - else + else nentry.aggregator = 0; - nentry.nprocs_for_coll = dynamic_num_io_procs; - if (!fh->f_full_print_queue(WRITE_PRINT_QUEUE)){ - fh->f_register_print_entry(WRITE_PRINT_QUEUE, - nentry); - } + nentry.nprocs_for_coll = dynamic_num_io_procs; + if (!fh->f_full_print_queue(WRITE_PRINT_QUEUE)){ + fh->f_register_print_entry(WRITE_PRINT_QUEUE, + nentry); + } #endif - exit : - if (fh->f_procs_in_group[fh->f_aggregator_index] == fh->f_rank) { - if (NULL != fh->f_io_array) { - free (fh->f_io_array); - fh->f_io_array = NULL; - } - if (NULL != disp_index){ - free(disp_index); - disp_index = NULL; - } - if (NULL != recvtype){ - free(recvtype); - recvtype=NULL; - } - if (NULL != recv_req){ - free(recv_req); - recv_req = NULL; - } - if (NULL != global_buf) { - free (global_buf); - global_buf = NULL; - } - for(l=0;lf_procs_per_group;l++){ - if (NULL != blocklen_per_process[l]){ - free(blocklen_per_process[l]); - blocklen_per_process[l] = NULL; - } - if (NULL != displs_per_process[l]){ - free(displs_per_process[l]); - displs_per_process[l] = NULL; - } - } - if (NULL != blocklen_per_process){ - free(blocklen_per_process); - blocklen_per_process = NULL; - } - if (NULL != displs_per_process){ - free(displs_per_process); - displs_per_process = NULL; - } +exit : + if (my_aggregator == fh->f_rank) { + if (NULL != sorted_file_offsets){ + free(sorted_file_offsets); + sorted_file_offsets = NULL; + } + if(NULL != file_offsets_for_agg){ + free(file_offsets_for_agg); + file_offsets_for_agg = NULL; + } + if (NULL != memory_displacements){ + free(memory_displacements); + memory_displacements = NULL; + } + if (NULL != recvtype){ + for (i =0; i< fh->f_procs_per_group; i++) { + if ( MPI_DATATYPE_NULL != recvtype[i] ) { + ompi_datatype_destroy(&recvtype[i]); + } + } + free(recvtype); + recvtype=NULL; + } + + if (NULL != fh->f_io_array) { + free (fh->f_io_array); + fh->f_io_array = NULL; + } + if (NULL != disp_index){ + free(disp_index); + disp_index = NULL; + } + if (NULL != recvtype){ + free(recvtype); + recvtype=NULL; + } + if (NULL != recv_req){ + free(recv_req); + recv_req = NULL; + } + if (NULL != global_buf) { + free (global_buf); + global_buf = NULL; + } + for(l=0;lf_procs_per_group;l++){ + if (NULL != blocklen_per_process){ + free(blocklen_per_process[l]); + } + if (NULL != displs_per_process){ + free(displs_per_process[l]); + } + } + free(blocklen_per_process); + free(displs_per_process); } + if (NULL != displs){ + free(displs); + displs=NULL; + } + + if (! sendbuf_is_contiguous) { + if (NULL != send_buf) { + free (send_buf); + send_buf = NULL; + } + } + if (NULL != global_buf) { + free (global_buf); + global_buf = NULL; + } if (NULL != sorted) { free (sorted); sorted = NULL; @@ -1050,17 +1073,13 @@ mca_fcoll_dynamic_file_write_all (mca_io_ompio_file_t *fh, free (decoded_iov); decoded_iov = NULL; } - - if (NULL != send_req){ - free(send_req); - send_req = NULL; - } + return OMPI_SUCCESS; } -static int local_heap_sort (local_io_array *io_array, +static int local_heap_sort (mca_io_ompio_local_io_array *io_array, int num_entries, int *sorted) { @@ -1092,15 +1111,15 @@ static int local_heap_sort (local_io_array *io_array, while (!done) { left = j*2+1; right = j*2+2; - if ((left <= heap_size) && + if ((left <= heap_size) && (io_array[temp_arr[left]].offset > io_array[temp_arr[j]].offset)) { largest = left; } else { largest = j; } - if ((right <= heap_size) && - (io_array[temp_arr[right]].offset > + if ((right <= heap_size) && + (io_array[temp_arr[right]].offset > io_array[temp_arr[largest]].offset)) { largest = right; } @@ -1119,8 +1138,8 @@ static int local_heap_sort (local_io_array *io_array, for (i = num_entries-1; i >=1; --i) { temp = temp_arr[0]; temp_arr[0] = temp_arr[i]; - temp_arr[i] = temp; - heap_size--; + temp_arr[i] = temp; + heap_size--; done = 0; j = 0; largest = j; @@ -1128,17 +1147,17 @@ static int local_heap_sort (local_io_array *io_array, while (!done) { left = j*2+1; right = j*2+2; - - if ((left <= heap_size) && - (io_array[temp_arr[left]].offset > + + if ((left <= heap_size) && + (io_array[temp_arr[left]].offset > io_array[temp_arr[j]].offset)) { largest = left; } else { largest = j; } - if ((right <= heap_size) && - (io_array[temp_arr[right]].offset > + if ((right <= heap_size) && + (io_array[temp_arr[right]].offset > io_array[temp_arr[largest]].offset)) { largest = right; } diff --git a/ompi/mca/fcoll/dynamic/fcoll_dynamic_module.c b/ompi/mca/fcoll/dynamic/fcoll_dynamic_module.c index 85b2b7afd12..4d3466b3ec8 100644 --- a/ompi/mca/fcoll/dynamic/fcoll_dynamic_module.c +++ b/ompi/mca/fcoll/dynamic/fcoll_dynamic_module.c @@ -5,15 +5,15 @@ * Copyright (c) 2004-2006 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2008-2015 University of Houston. All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -70,9 +70,9 @@ mca_fcoll_dynamic_component_file_query (mca_io_ompio_file_t *fh, int *priority) } int mca_fcoll_dynamic_component_file_unquery (mca_io_ompio_file_t *file) -{ +{ /* This function might be needed for some purposes later. for now it - * does not have anything to do since there are no steps which need + * does not have anything to do since there are no steps which need * to be undone if this module is not selected */ return OMPI_SUCCESS; @@ -83,8 +83,8 @@ int mca_fcoll_dynamic_module_init (mca_io_ompio_file_t *file) return OMPI_SUCCESS; } - -int mca_fcoll_dynamic_module_finalize (mca_io_ompio_file_t *file) + +int mca_fcoll_dynamic_module_finalize (mca_io_ompio_file_t *file) { return OMPI_SUCCESS; } diff --git a/ompi/mca/fcoll/dynamic_gen2/Makefile.am b/ompi/mca/fcoll/dynamic_gen2/Makefile.am new file mode 100644 index 00000000000..f4910ac5e97 --- /dev/null +++ b/ompi/mca/fcoll/dynamic_gen2/Makefile.am @@ -0,0 +1,47 @@ +# +# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana +# University Research and Technology +# Corporation. All rights reserved. +# Copyright (c) 2004-2005 The University of Tennessee and The University +# of Tennessee Research Foundation. All rights +# reserved. +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# University of Stuttgart. All rights reserved. +# Copyright (c) 2004-2005 The Regents of the University of California. +# All rights reserved. +# Copyright (c) 2008-2015 University of Houston. All rights reserved. +# Copyright (c) 2012 Cisco Systems, Inc. All rights reserved. +# $COPYRIGHT$ +# +# Additional copyrights may follow +# +# $HEADER$ +# + +sources = \ + fcoll_dynamic_gen2.h \ + fcoll_dynamic_gen2_module.c \ + fcoll_dynamic_gen2_component.c \ + fcoll_dynamic_gen2_file_read_all.c \ + fcoll_dynamic_gen2_file_write_all.c + +# Make the output library in this directory, and name it either +# mca__.la (for DSO builds) or libmca__.la +# (for static builds). + +if MCA_BUILD_ompi_fcoll_dynamic_gen2_DSO +component_noinst = +component_install = mca_fcoll_dynamic_gen2.la +else +component_noinst = libmca_fcoll_dynamic_gen2.la +component_install = +endif + +mcacomponentdir = $(ompilibdir) +mcacomponent_LTLIBRARIES = $(component_install) +mca_fcoll_dynamic_gen2_la_SOURCES = $(sources) +mca_fcoll_dynamic_gen2_la_LDFLAGS = -module -avoid-version + +noinst_LTLIBRARIES = $(component_noinst) +libmca_fcoll_dynamic_gen2_la_SOURCES =$(sources) +libmca_fcoll_dynamic_gen2_la_LDFLAGS = -module -avoid-version diff --git a/ompi/mca/fcoll/dynamic_gen2/fcoll_dynamic_gen2.h b/ompi/mca/fcoll/dynamic_gen2/fcoll_dynamic_gen2.h new file mode 100644 index 00000000000..dfd8d16e924 --- /dev/null +++ b/ompi/mca/fcoll/dynamic_gen2/fcoll_dynamic_gen2.h @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2006 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2008-2016 University of Houston. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#ifndef MCA_FCOLL_DYNAMIC_EXPORT_H +#define MCA_FCOLL_DYNAMIC_EXPORT_H + +#include "ompi_config.h" + +#include "mpi.h" +#include "ompi/mca/mca.h" +#include "ompi/mca/fcoll/fcoll.h" +#include "ompi/mca/fcoll/base/base.h" +#include "ompi/mca/io/ompio/io_ompio.h" + +BEGIN_C_DECLS + +/* Globally exported variables */ + +extern int mca_fcoll_dynamic_gen2_priority; +extern int mca_fcoll_dynamic_gen2_num_groups; +extern int mca_fcoll_dynamic_gen2_write_chunksize; + +OMPI_MODULE_DECLSPEC extern mca_fcoll_base_component_2_0_0_t mca_fcoll_dynamic_gen2_component; + +/* API functions */ + +int mca_fcoll_dynamic_gen2_component_init_query(bool enable_progress_threads, + bool enable_mpi_threads); +struct mca_fcoll_base_module_1_0_0_t * +mca_fcoll_dynamic_gen2_component_file_query (mca_io_ompio_file_t *fh, int *priority); + +int mca_fcoll_dynamic_gen2_component_file_unquery (mca_io_ompio_file_t *file); + +int mca_fcoll_dynamic_gen2_module_init (mca_io_ompio_file_t *file); +int mca_fcoll_dynamic_gen2_module_finalize (mca_io_ompio_file_t *file); + +int mca_fcoll_dynamic_gen2_file_read_all (mca_io_ompio_file_t *fh, + void *buf, + int count, + struct ompi_datatype_t *datatype, + ompi_status_public_t * status); + + +int mca_fcoll_dynamic_gen2_file_write_all (mca_io_ompio_file_t *fh, + const void *buf, + int count, + struct ompi_datatype_t *datatype, + ompi_status_public_t * status); + + +END_C_DECLS + +#endif /* MCA_FCOLL_DYNAMIC_EXPORT_H */ diff --git a/ompi/mca/fcoll/dynamic_gen2/fcoll_dynamic_gen2_component.c b/ompi/mca/fcoll/dynamic_gen2/fcoll_dynamic_gen2_component.c new file mode 100644 index 00000000000..055b6b244b2 --- /dev/null +++ b/ompi/mca/fcoll/dynamic_gen2/fcoll_dynamic_gen2_component.c @@ -0,0 +1,106 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ +/* + * Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2005 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2008 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2008-2016 University of Houston. All rights reserved. + * Copyright (c) 2015 Los Alamos National Security, LLC. All rights + * reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + * + * These symbols are in a file by themselves to provide nice linker + * semantics. Since linkers generally pull in symbols by object + * files, keeping these symbols as the only symbols in this file + * prevents utility programs such as "ompi_info" from having to import + * entire components just to query their version and parameters. + */ + +#include "ompi_config.h" +#include "fcoll_dynamic_gen2.h" +#include "mpi.h" + +/* + * Public string showing the fcoll ompi_dynamic_gen2 component version number + */ +const char *mca_fcoll_dynamic_gen2_component_version_string = + "Open MPI dynamic_gen2 collective MCA component version " OMPI_VERSION; + +/* + * Global variables + */ +int mca_fcoll_dynamic_gen2_priority = 10; +int mca_fcoll_dynamic_gen2_num_groups = 1; +int mca_fcoll_dynamic_gen2_write_chunksize = -1; + +/* + * Local function + */ +static int dynamic_gen2_register(void); + +/* + * Instantiate the public struct with all of our public information + * and pointers to our public functions in it + */ +mca_fcoll_base_component_2_0_0_t mca_fcoll_dynamic_gen2_component = { + + /* First, the mca_component_t struct containing meta information + * about the component itself */ + + .fcollm_version = { + MCA_FCOLL_BASE_VERSION_2_0_0, + + /* Component name and version */ + .mca_component_name = "dynamic_gen2", + MCA_BASE_MAKE_VERSION(component, OMPI_MAJOR_VERSION, OMPI_MINOR_VERSION, + OMPI_RELEASE_VERSION), + .mca_register_component_params = dynamic_gen2_register, + }, + .fcollm_data = { + /* The component is checkpoint ready */ + MCA_BASE_METADATA_PARAM_CHECKPOINT + }, + + .fcollm_init_query = mca_fcoll_dynamic_gen2_component_init_query, + .fcollm_file_query = mca_fcoll_dynamic_gen2_component_file_query, + .fcollm_file_unquery = mca_fcoll_dynamic_gen2_component_file_unquery, +}; + + +static int +dynamic_gen2_register(void) +{ + mca_fcoll_dynamic_gen2_priority = 10; + (void) mca_base_component_var_register(&mca_fcoll_dynamic_gen2_component.fcollm_version, + "priority", "Priority of the dynamic_gen2 fcoll component", + MCA_BASE_VAR_TYPE_INT, NULL, 0, 0, + OPAL_INFO_LVL_9, + MCA_BASE_VAR_SCOPE_READONLY, &mca_fcoll_dynamic_gen2_priority); + + mca_fcoll_dynamic_gen2_num_groups = 1; + (void) mca_base_component_var_register(&mca_fcoll_dynamic_gen2_component.fcollm_version, + "num_groups", "Number of subgroups created by the dynamic_gen2 component", + MCA_BASE_VAR_TYPE_INT, NULL, 0, 0, + OPAL_INFO_LVL_9, + MCA_BASE_VAR_SCOPE_READONLY, &mca_fcoll_dynamic_gen2_num_groups); + + mca_fcoll_dynamic_gen2_write_chunksize = -1; + (void) mca_base_component_var_register(&mca_fcoll_dynamic_gen2_component.fcollm_version, + "write_chunksize", "Chunk size written at once. Default: stripe_size of the file system", + MCA_BASE_VAR_TYPE_INT, NULL, 0, 0, + OPAL_INFO_LVL_9, + MCA_BASE_VAR_SCOPE_READONLY, &mca_fcoll_dynamic_gen2_write_chunksize); + + return OMPI_SUCCESS; +} diff --git a/ompi/mca/fcoll/dynamic_gen2/fcoll_dynamic_gen2_file_read_all.c b/ompi/mca/fcoll/dynamic_gen2/fcoll_dynamic_gen2_file_read_all.c new file mode 100644 index 00000000000..f34858ed34b --- /dev/null +++ b/ompi/mca/fcoll/dynamic_gen2/fcoll_dynamic_gen2_file_read_all.c @@ -0,0 +1,1074 @@ +/* + * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2005 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2008-2015 University of Houston. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include "ompi_config.h" +#include "fcoll_dynamic_gen2.h" + +#include "mpi.h" +#include "ompi/constants.h" +#include "ompi/mca/fcoll/fcoll.h" +#include "ompi/mca/io/ompio/io_ompio.h" +#include "ompi/mca/io/io.h" +#include "math.h" +#include "ompi/mca/pml/pml.h" +#include + +#define DEBUG_ON 0 + +/*Used for loading file-offsets per aggregator*/ +typedef struct mca_io_ompio_local_io_array{ + OMPI_MPI_OFFSET_TYPE offset; + MPI_Aint length; + int process_id; +}mca_io_ompio_local_io_array; + + +static int read_heap_sort (mca_io_ompio_local_io_array *io_array, + int num_entries, + int *sorted); + + + +int +mca_fcoll_dynamic_gen2_file_read_all (mca_io_ompio_file_t *fh, + void *buf, + int count, + struct ompi_datatype_t *datatype, + ompi_status_public_t *status) +{ + MPI_Aint position = 0; + MPI_Aint total_bytes = 0; /* total bytes to be read */ + MPI_Aint bytes_to_read_in_cycle = 0; /* left to be read in a cycle*/ + MPI_Aint bytes_per_cycle = 0; /* total read in each cycle by each process*/ + int index = 0, ret=OMPI_SUCCESS; + int cycles = 0; + int i=0, j=0, l=0; + int n=0; /* current position in total_bytes_per_process array */ + MPI_Aint bytes_remaining = 0; /* how many bytes have been read from the current + value from total_bytes_per_process */ + int *sorted_file_offsets=NULL, entries_per_aggregator=0; + int bytes_received = 0; + int blocks = 0; + /* iovec structure and count of the buffer passed in */ + uint32_t iov_count = 0; + struct iovec *decoded_iov = NULL; + int iov_index = 0; + size_t current_position = 0; + struct iovec *local_iov_array=NULL, *global_iov_array=NULL; + char *receive_buf = NULL; + MPI_Aint *memory_displacements=NULL; + /* global iovec at the readers that contain the iovecs created from + file_set_view */ + uint32_t total_fview_count = 0; + int local_count = 0; + int *fview_count = NULL, *disp_index=NULL, *temp_disp_index=NULL; + int current_index=0, temp_index=0; + int **blocklen_per_process=NULL; + MPI_Aint **displs_per_process=NULL; + char *global_buf = NULL; + MPI_Aint global_count = 0; + mca_io_ompio_local_io_array *file_offsets_for_agg=NULL; + + /* array that contains the sorted indices of the global_iov */ + int *sorted = NULL; + int *displs = NULL; + int dynamic_gen2_num_io_procs; + size_t max_data = 0; + MPI_Aint *total_bytes_per_process = NULL; + ompi_datatype_t **sendtype = NULL; + MPI_Request *send_req=NULL, recv_req=NULL; + int my_aggregator =-1; + bool recvbuf_is_contiguous=false; + size_t ftype_size; + OPAL_PTRDIFF_TYPE ftype_extent, lb; + + +#if OMPIO_FCOLL_WANT_TIME_BREAKDOWN + double read_time = 0.0, start_read_time = 0.0, end_read_time = 0.0; + double rcomm_time = 0.0, start_rcomm_time = 0.0, end_rcomm_time = 0.0; + double read_exch = 0.0, start_rexch = 0.0, end_rexch = 0.0; + mca_io_ompio_print_entry nentry; +#endif + + /************************************************************************** + ** 1. In case the data is not contigous in memory, decode it into an iovec + **************************************************************************/ + + opal_datatype_type_size ( &datatype->super, &ftype_size ); + opal_datatype_get_extent ( &datatype->super, &lb, &ftype_extent ); + + if ( (ftype_extent == (OPAL_PTRDIFF_TYPE) ftype_size) && + opal_datatype_is_contiguous_memory_layout(&datatype->super,1) && + 0 == lb ) { + recvbuf_is_contiguous = true; + } + + + if (! recvbuf_is_contiguous ) { + ret = fh->f_decode_datatype ((struct mca_io_ompio_file_t *)fh, + datatype, + count, + buf, + &max_data, + &decoded_iov, + &iov_count); + if (OMPI_SUCCESS != ret){ + goto exit; + } + } + else { + max_data = count * datatype->super.size; + } + + if ( MPI_STATUS_IGNORE != status ) { + status->_ucount = max_data; + } + + fh->f_get_num_aggregators ( &dynamic_gen2_num_io_procs); + ret = fh->f_set_aggregator_props ((struct mca_io_ompio_file_t *) fh, + dynamic_gen2_num_io_procs, + max_data); + if (OMPI_SUCCESS != ret){ + goto exit; + } + my_aggregator = fh->f_procs_in_group[fh->f_aggregator_index]; + + /************************************************************************** + ** 2. Determine the total amount of data to be written + **************************************************************************/ + total_bytes_per_process = (MPI_Aint*)malloc(fh->f_procs_per_group*sizeof(MPI_Aint)); + if (NULL == total_bytes_per_process) { + opal_output (1, "OUT OF MEMORY\n"); + ret = OMPI_ERR_OUT_OF_RESOURCE; + goto exit; + } +#if OMPIO_FCOLL_WANT_TIME_BREAKDOWN + start_rcomm_time = MPI_Wtime(); +#endif + ret = fh->f_allgather_array (&max_data, + 1, + MPI_LONG, + total_bytes_per_process, + 1, + MPI_LONG, + fh->f_aggregator_index, + fh->f_procs_in_group, + fh->f_procs_per_group, + fh->f_comm); + if (OMPI_SUCCESS != ret){ + goto exit; + } +#if OMPIO_FCOLL_WANT_TIME_BREAKDOWN + end_rcomm_time = MPI_Wtime(); + rcomm_time += end_rcomm_time - start_rcomm_time; +#endif + + for (i=0 ; if_procs_per_group ; i++) { + total_bytes += total_bytes_per_process[i]; + } + + if (NULL != total_bytes_per_process) { + free (total_bytes_per_process); + total_bytes_per_process = NULL; + } + + /********************************************************************* + *** 3. Generate the File offsets/lengths corresponding to this write + ********************************************************************/ + ret = fh->f_generate_current_file_view ((struct mca_io_ompio_file_t *) fh, + max_data, + &local_iov_array, + &local_count); + + if (ret != OMPI_SUCCESS){ + goto exit; + } + + /************************************************************* + *** 4. Allgather the File View information at all processes + *************************************************************/ + + fview_count = (int *) malloc (fh->f_procs_per_group * sizeof (int)); + if (NULL == fview_count) { + opal_output (1, "OUT OF MEMORY\n"); + ret = OMPI_ERR_OUT_OF_RESOURCE; + goto exit; + } +#if OMPIO_FCOLL_WANT_TIME_BREAKDOWN + start_rcomm_time = MPI_Wtime(); +#endif + ret = fh->f_allgather_array (&local_count, + 1, + MPI_INT, + fview_count, + 1, + MPI_INT, + fh->f_aggregator_index, + fh->f_procs_in_group, + fh->f_procs_per_group, + fh->f_comm); + + if (OMPI_SUCCESS != ret){ + goto exit; + } +#if OMPIO_FCOLL_WANT_TIME_BREAKDOWN + end_rcomm_time = MPI_Wtime(); + rcomm_time += end_rcomm_time - start_rcomm_time; +#endif + + displs = (int*)malloc (fh->f_procs_per_group*sizeof(int)); + if (NULL == displs) { + opal_output (1, "OUT OF MEMORY\n"); + ret = OMPI_ERR_OUT_OF_RESOURCE; + goto exit; + } + + displs[0] = 0; + total_fview_count = fview_count[0]; + for (i=1 ; if_procs_per_group ; i++) { + total_fview_count += fview_count[i]; + displs[i] = displs[i-1] + fview_count[i-1]; + } + +#if DEBUG_ON + if (my_aggregator == fh->f_rank) { + for (i=0 ; if_procs_per_group ; i++) { + printf ("%d: PROCESS: %d ELEMENTS: %d DISPLS: %d\n", + fh->f_rank, + i, + fview_count[i], + displs[i]); +} +} +#endif + + /* allocate the global iovec */ + if (0 != total_fview_count) { + global_iov_array = (struct iovec*)malloc (total_fview_count * + sizeof(struct iovec)); + if (NULL == global_iov_array) { + opal_output (1, "OUT OF MEMORY\n"); + ret = OMPI_ERR_OUT_OF_RESOURCE; + goto exit; + } + } +#if OMPIO_FCOLL_WANT_TIME_BREAKDOWN + start_rcomm_time = MPI_Wtime(); +#endif + ret = fh->f_allgatherv_array (local_iov_array, + local_count, + fh->f_iov_type, + global_iov_array, + fview_count, + displs, + fh->f_iov_type, + fh->f_aggregator_index, + fh->f_procs_in_group, + fh->f_procs_per_group, + fh->f_comm); + + if (OMPI_SUCCESS != ret){ + goto exit; + } +#if OMPIO_FCOLL_WANT_TIME_BREAKDOWN + end_rcomm_time = MPI_Wtime(); + rcomm_time += end_rcomm_time - start_rcomm_time; +#endif + + /**************************************************************************************** + *** 5. Sort the global offset/lengths list based on the offsets. + *** The result of the sort operation is the 'sorted', an integer array, + *** which contains the indexes of the global_iov_array based on the offset. + *** For example, if global_iov_array[x].offset is followed by global_iov_array[y].offset + *** in the file, and that one is followed by global_iov_array[z].offset, than + *** sorted[0] = x, sorted[1]=y and sorted[2]=z; + ******************************************************************************************/ + if (0 != total_fview_count) { + sorted = (int *)malloc (total_fview_count * sizeof(int)); + if (NULL == sorted) { + opal_output (1, "OUT OF MEMORY\n"); + ret = OMPI_ERR_OUT_OF_RESOURCE; + goto exit; + } + fh->f_sort_iovec (global_iov_array, total_fview_count, sorted); + } + + if (NULL != local_iov_array) { + free (local_iov_array); + local_iov_array = NULL; + } + +#if DEBUG_ON + if (my_aggregator == fh->f_rank) { + for (i=0 ; if_rank, + global_iov_array[sorted[i]].iov_base, + global_iov_array[sorted[i]].iov_len); + } + } +#endif + + /************************************************************* + *** 6. Determine the number of cycles required to execute this + *** operation + *************************************************************/ + fh->f_get_bytes_per_agg ( (int *) &bytes_per_cycle); + cycles = ceil((double)total_bytes/bytes_per_cycle); + + if ( my_aggregator == fh->f_rank) { + disp_index = (int *)malloc (fh->f_procs_per_group * sizeof (int)); + if (NULL == disp_index) { + opal_output (1, "OUT OF MEMORY\n"); + ret = OMPI_ERR_OUT_OF_RESOURCE; + goto exit; + } + + blocklen_per_process = (int **)malloc (fh->f_procs_per_group * sizeof (int*)); + if (NULL == blocklen_per_process) { + opal_output (1, "OUT OF MEMORY\n"); + ret = OMPI_ERR_OUT_OF_RESOURCE; + goto exit; + } + + displs_per_process = (MPI_Aint **)malloc (fh->f_procs_per_group * sizeof (MPI_Aint*)); + if (NULL == displs_per_process){ + opal_output (1, "OUT OF MEMORY\n"); + ret = OMPI_ERR_OUT_OF_RESOURCE; + goto exit; + } + + for (i=0;if_procs_per_group;i++){ + blocklen_per_process[i] = NULL; + displs_per_process[i] = NULL; + } + + send_req = (MPI_Request *) malloc (fh->f_procs_per_group * sizeof(MPI_Request)); + if (NULL == send_req){ + opal_output ( 1, "OUT OF MEMORY\n"); + ret = OMPI_ERR_OUT_OF_RESOURCE; + goto exit; + } + + global_buf = (char *) malloc (bytes_per_cycle); + if (NULL == global_buf){ + opal_output(1, "OUT OF MEMORY\n"); + ret = OMPI_ERR_OUT_OF_RESOURCE; + goto exit; + } + + sendtype = (ompi_datatype_t **) malloc (fh->f_procs_per_group * sizeof(ompi_datatype_t *)); + if (NULL == sendtype) { + opal_output (1, "OUT OF MEMORY\n"); + ret = OMPI_ERR_OUT_OF_RESOURCE; + goto exit; + } + + for(l=0;lf_procs_per_group;l++){ + sendtype[l] = MPI_DATATYPE_NULL; + } + } + + + + +#if OMPIO_FCOLL_WANT_TIME_BREAKDOWN + start_rexch = MPI_Wtime(); +#endif + n = 0; + bytes_remaining = 0; + current_index = 0; + + for (index = 0; index < cycles; index++) { + /********************************************************************** + *** 7a. Getting ready for next cycle: initializing and freeing buffers + **********************************************************************/ + if (my_aggregator == fh->f_rank) { + if (NULL != fh->f_io_array) { + free (fh->f_io_array); + fh->f_io_array = NULL; + } + fh->f_num_of_io_entries = 0; + + if (NULL != sendtype){ + for (i =0; i< fh->f_procs_per_group; i++) { + if ( MPI_DATATYPE_NULL != sendtype[i] ) { + ompi_datatype_destroy(&sendtype[i]); + sendtype[i] = MPI_DATATYPE_NULL; + } + } + } + + for(l=0;lf_procs_per_group;l++){ + disp_index[l] = 1; + + if (NULL != blocklen_per_process[l]){ + free(blocklen_per_process[l]); + blocklen_per_process[l] = NULL; + } + if (NULL != displs_per_process[l]){ + free(displs_per_process[l]); + displs_per_process[l] = NULL; + } + blocklen_per_process[l] = (int *) calloc (1, sizeof(int)); + if (NULL == blocklen_per_process[l]) { + opal_output (1, "OUT OF MEMORY for blocklen\n"); + ret = OMPI_ERR_OUT_OF_RESOURCE; + goto exit; + } + displs_per_process[l] = (MPI_Aint *) calloc (1, sizeof(MPI_Aint)); + if (NULL == displs_per_process[l]){ + opal_output (1, "OUT OF MEMORY for displs\n"); + ret = OMPI_ERR_OUT_OF_RESOURCE; + goto exit; + } + } + + if (NULL != sorted_file_offsets){ + free(sorted_file_offsets); + sorted_file_offsets = NULL; + } + + if(NULL != file_offsets_for_agg){ + free(file_offsets_for_agg); + file_offsets_for_agg = NULL; + } + if (NULL != memory_displacements){ + free(memory_displacements); + memory_displacements = NULL; + } + } /* (my_aggregator == fh->f_rank */ + + /************************************************************************** + *** 7b. Determine the number of bytes to be actually read in this cycle + **************************************************************************/ + if (cycles-1 == index) { + bytes_to_read_in_cycle = total_bytes - bytes_per_cycle*index; + } + else { + bytes_to_read_in_cycle = bytes_per_cycle; + } + +#if DEBUG_ON + if (my_aggregator == fh->f_rank) { + printf ("****%d: CYCLE %d Bytes %d**********\n", + fh->f_rank, + index, + bytes_to_write_in_cycle); + } +#endif + + /***************************************************************** + *** 7c. Calculate how much data will be contributed in this cycle + *** by each process + *****************************************************************/ + bytes_received = 0; + + while (bytes_to_read_in_cycle) { + /* This next block identifies which process is the holder + ** of the sorted[current_index] element; + */ + blocks = fview_count[0]; + for (j=0 ; jf_procs_per_group ; j++) { + if (sorted[current_index] < blocks) { + n = j; + break; + } + else { + blocks += fview_count[j+1]; + } + } + + if (bytes_remaining) { + /* Finish up a partially used buffer from the previous cycle */ + if (bytes_remaining <= bytes_to_read_in_cycle) { + /* Data fits completely into the block */ + if (my_aggregator == fh->f_rank) { + blocklen_per_process[n][disp_index[n] - 1] = bytes_remaining; + displs_per_process[n][disp_index[n] - 1] = + (OPAL_PTRDIFF_TYPE)global_iov_array[sorted[current_index]].iov_base + + (global_iov_array[sorted[current_index]].iov_len - bytes_remaining); + + blocklen_per_process[n] = (int *) realloc + ((void *)blocklen_per_process[n], (disp_index[n]+1)*sizeof(int)); + displs_per_process[n] = (MPI_Aint *) realloc + ((void *)displs_per_process[n], (disp_index[n]+1)*sizeof(MPI_Aint)); + blocklen_per_process[n][disp_index[n]] = 0; + displs_per_process[n][disp_index[n]] = 0; + disp_index[n] += 1; + } + if (fh->f_procs_in_group[n] == fh->f_rank) { + bytes_received += bytes_remaining; + } + current_index ++; + bytes_to_read_in_cycle -= bytes_remaining; + bytes_remaining = 0; + continue; + } + else { + /* the remaining data from the previous cycle is larger than the + bytes_to_write_in_cycle, so we have to segment again */ + if (my_aggregator == fh->f_rank) { + blocklen_per_process[n][disp_index[n] - 1] = bytes_to_read_in_cycle; + displs_per_process[n][disp_index[n] - 1] = + (OPAL_PTRDIFF_TYPE)global_iov_array[sorted[current_index]].iov_base + + (global_iov_array[sorted[current_index]].iov_len + - bytes_remaining); + } + if (fh->f_procs_in_group[n] == fh->f_rank) { + bytes_received += bytes_to_read_in_cycle; + } + bytes_remaining -= bytes_to_read_in_cycle; + bytes_to_read_in_cycle = 0; + break; + } + } + else { + /* No partially used entry available, have to start a new one */ + if (bytes_to_read_in_cycle < + (MPI_Aint) global_iov_array[sorted[current_index]].iov_len) { + /* This entry has more data than we can sendin one cycle */ + if (my_aggregator == fh->f_rank) { + blocklen_per_process[n][disp_index[n] - 1] = bytes_to_read_in_cycle; + displs_per_process[n][disp_index[n] - 1] = + (OPAL_PTRDIFF_TYPE)global_iov_array[sorted[current_index]].iov_base ; + } + + if (fh->f_procs_in_group[n] == fh->f_rank) { + bytes_received += bytes_to_read_in_cycle; + } + bytes_remaining = global_iov_array[sorted[current_index]].iov_len - + bytes_to_read_in_cycle; + bytes_to_read_in_cycle = 0; + break; + } + else { + /* Next data entry is less than bytes_to_write_in_cycle */ + if (my_aggregator == fh->f_rank) { + blocklen_per_process[n][disp_index[n] - 1] = + global_iov_array[sorted[current_index]].iov_len; + displs_per_process[n][disp_index[n] - 1] = (OPAL_PTRDIFF_TYPE) + global_iov_array[sorted[current_index]].iov_base; + blocklen_per_process[n] = + (int *) realloc ((void *)blocklen_per_process[n], (disp_index[n]+1)*sizeof(int)); + displs_per_process[n] = (MPI_Aint *)realloc + ((void *)displs_per_process[n], (disp_index[n]+1)*sizeof(MPI_Aint)); + blocklen_per_process[n][disp_index[n]] = 0; + displs_per_process[n][disp_index[n]] = 0; + disp_index[n] += 1; + } + if (fh->f_procs_in_group[n] == fh->f_rank) { + bytes_received += + global_iov_array[sorted[current_index]].iov_len; + } + bytes_to_read_in_cycle -= + global_iov_array[sorted[current_index]].iov_len; + current_index ++; + continue; + } + } + } /* end while (bytes_to_read_in_cycle) */ + + /************************************************************************* + *** 7d. Calculate the displacement on where to put the data and allocate + *** the recieve buffer (global_buf) + *************************************************************************/ + if (my_aggregator == fh->f_rank) { + entries_per_aggregator=0; + for (i=0;if_procs_per_group; i++){ + for (j=0;j 0) + entries_per_aggregator++ ; + } + } + if (entries_per_aggregator > 0){ + file_offsets_for_agg = (mca_io_ompio_local_io_array *) + malloc(entries_per_aggregator*sizeof(mca_io_ompio_local_io_array)); + if (NULL == file_offsets_for_agg) { + opal_output (1, "OUT OF MEMORY\n"); + ret = OMPI_ERR_OUT_OF_RESOURCE; + goto exit; + } + sorted_file_offsets = (int *) + malloc (entries_per_aggregator*sizeof(int)); + if (NULL == sorted_file_offsets){ + opal_output (1, "OUT OF MEMORY\n"); + ret = OMPI_ERR_OUT_OF_RESOURCE; + goto exit; + } + /*Moving file offsets to an IO array!*/ + temp_index = 0; + global_count = 0; + for (i=0;if_procs_per_group; i++){ + for(j=0;j 0){ + file_offsets_for_agg[temp_index].length = + blocklen_per_process[i][j]; + global_count += blocklen_per_process[i][j]; + file_offsets_for_agg[temp_index].process_id = i; + file_offsets_for_agg[temp_index].offset = + displs_per_process[i][j]; + temp_index++; + } + } + } + } + else{ + continue; + } + + /* Sort the displacements for each aggregator */ + read_heap_sort (file_offsets_for_agg, + entries_per_aggregator, + sorted_file_offsets); + + memory_displacements = (MPI_Aint *) malloc + (entries_per_aggregator * sizeof(MPI_Aint)); + memory_displacements[sorted_file_offsets[0]] = 0; + for (i=1; if_io_array = (mca_io_ompio_io_array_t *) malloc + (entries_per_aggregator * sizeof (mca_io_ompio_io_array_t)); + if (NULL == fh->f_io_array) { + opal_output(1, "OUT OF MEMORY\n"); + ret = OMPI_ERR_OUT_OF_RESOURCE; + goto exit; + } + + fh->f_num_of_io_entries = 0; + fh->f_io_array[0].offset = + (IOVBASE_TYPE *)(intptr_t)file_offsets_for_agg[sorted_file_offsets[0]].offset; + fh->f_io_array[0].length = + file_offsets_for_agg[sorted_file_offsets[0]].length; + fh->f_io_array[0].memory_address = + global_buf+memory_displacements[sorted_file_offsets[0]]; + fh->f_num_of_io_entries++; + for (i=1;if_io_array[fh->f_num_of_io_entries - 1].length += + file_offsets_for_agg[sorted_file_offsets[i]].length; + } + else{ + fh->f_io_array[fh->f_num_of_io_entries].offset = + (IOVBASE_TYPE *)(intptr_t)file_offsets_for_agg[sorted_file_offsets[i]].offset; + fh->f_io_array[fh->f_num_of_io_entries].length = + file_offsets_for_agg[sorted_file_offsets[i]].length; + fh->f_io_array[fh->f_num_of_io_entries].memory_address = + global_buf+memory_displacements[sorted_file_offsets[i]]; + fh->f_num_of_io_entries++; + } + } + + +#if OMPIO_FCOLL_WANT_TIME_BREAKDOWN + start_read_time = MPI_Wtime(); +#endif + + if (fh->f_num_of_io_entries) { + if ( 0 > fh->f_fbtl->fbtl_preadv (fh)) { + opal_output (1, "READ FAILED\n"); + ret = OMPI_ERROR; + goto exit; + } + } + +#if OMPIO_FCOLL_WANT_TIME_BREAKDOWN + end_read_time = MPI_Wtime(); + read_time += end_read_time - start_read_time; +#endif + /********************************************************** + ******************** DONE READING ************************ + *********************************************************/ + + temp_disp_index = (int *)calloc (1, fh->f_procs_per_group * sizeof (int)); + if (NULL == temp_disp_index) { + opal_output (1, "OUT OF MEMORY\n"); + ret = OMPI_ERR_OUT_OF_RESOURCE; + goto exit; + } + for (i=0; if_procs_per_group;i++){ + send_req[i] = MPI_REQUEST_NULL; + if ( 0 < disp_index[i] ) { + ompi_datatype_create_hindexed(disp_index[i], + blocklen_per_process[i], + displs_per_process[i], + MPI_BYTE, + &sendtype[i]); + ompi_datatype_commit(&sendtype[i]); + ret = MCA_PML_CALL (isend(global_buf, + 1, + sendtype[i], + fh->f_procs_in_group[i], + 123, + MCA_PML_BASE_SEND_STANDARD, + fh->f_comm, + &send_req[i])); + if(OMPI_SUCCESS != ret){ + goto exit; + } + } + } +#if OMPIO_FCOLL_WANT_TIME_BREAKDOWN + end_rcomm_time = MPI_Wtime(); + rcomm_time += end_rcomm_time - start_rcomm_time; +#endif + } + + /********************************************************** + *** 7f. Scatter the Data from the readers + *********************************************************/ + if ( recvbuf_is_contiguous ) { + receive_buf = &((char*)buf)[position]; + } + else if (bytes_received) { + /* allocate a receive buffer and copy the data that needs + to be received into it in case the data is non-contigous + in memory */ + receive_buf = malloc (bytes_received); + if (NULL == receive_buf) { + opal_output (1, "OUT OF MEMORY\n"); + ret = OMPI_ERR_OUT_OF_RESOURCE; + goto exit; + } + } + +#if OMPIO_FCOLL_WANT_TIME_BREAKDOWN + start_rcomm_time = MPI_Wtime(); +#endif + ret = MCA_PML_CALL(irecv(receive_buf, + bytes_received, + MPI_BYTE, + my_aggregator, + 123, + fh->f_comm, + &recv_req)); + if (OMPI_SUCCESS != ret){ + goto exit; + } + + + if (my_aggregator == fh->f_rank){ + ret = ompi_request_wait_all (fh->f_procs_per_group, + send_req, + MPI_STATUS_IGNORE); + if (OMPI_SUCCESS != ret){ + goto exit; + } + } + + ret = ompi_request_wait (&recv_req, MPI_STATUS_IGNORE); + if (OMPI_SUCCESS != ret){ + goto exit; + } + position += bytes_received; + + /* If data is not contigous in memory, copy the data from the + receive buffer into the buffer passed in */ + if (!recvbuf_is_contiguous ) { + OPAL_PTRDIFF_TYPE mem_address; + size_t remaining = 0; + size_t temp_position = 0; + + remaining = bytes_received; + + while (remaining) { + mem_address = (OPAL_PTRDIFF_TYPE) + (decoded_iov[iov_index].iov_base) + current_position; + + if (remaining >= + (decoded_iov[iov_index].iov_len - current_position)) { + memcpy ((IOVBASE_TYPE *) mem_address, + receive_buf+temp_position, + decoded_iov[iov_index].iov_len - current_position); + remaining = remaining - + (decoded_iov[iov_index].iov_len - current_position); + temp_position = temp_position + + (decoded_iov[iov_index].iov_len - current_position); + iov_index = iov_index + 1; + current_position = 0; + } + else { + memcpy ((IOVBASE_TYPE *) mem_address, + receive_buf+temp_position, + remaining); + current_position = current_position + remaining; + remaining = 0; + } + } + + if (NULL != receive_buf) { + free (receive_buf); + receive_buf = NULL; + } + } +#if OMPIO_FCOLL_WANT_TIME_BREAKDOWN + end_rcomm_time = MPI_Wtime(); + rcomm_time += end_rcomm_time - start_rcomm_time; +#endif + } /* end for (index=0; index < cycles; index ++) */ + +#if OMPIO_FCOLL_WANT_TIME_BREAKDOWN + end_rexch = MPI_Wtime(); + read_exch += end_rexch - start_rexch; + nentry.time[0] = read_time; + nentry.time[1] = rcomm_time; + nentry.time[2] = read_exch; + if (my_aggregator == fh->f_rank) + nentry.aggregator = 1; + else + nentry.aggregator = 0; + nentry.nprocs_for_coll = dynamic_gen2_num_io_procs; + if (!fh->f_full_print_queue(READ_PRINT_QUEUE)){ + fh->f_register_print_entry(READ_PRINT_QUEUE, + nentry); + } +#endif + +exit: + if (!recvbuf_is_contiguous) { + if (NULL != receive_buf) { + free (receive_buf); + receive_buf = NULL; + } + } + if (NULL != global_buf) { + free (global_buf); + global_buf = NULL; + } + if (NULL != sorted) { + free (sorted); + sorted = NULL; + } + if (NULL != global_iov_array) { + free (global_iov_array); + global_iov_array = NULL; + } + if (NULL != fview_count) { + free (fview_count); + fview_count = NULL; + } + if (NULL != decoded_iov) { + free (decoded_iov); + decoded_iov = NULL; + } + if (NULL != local_iov_array){ + free(local_iov_array); + local_iov_array=NULL; + } + + if (NULL != displs) { + free (displs); + displs = NULL; + } + if (my_aggregator == fh->f_rank) { + + if (NULL != sorted_file_offsets){ + free(sorted_file_offsets); + sorted_file_offsets = NULL; + } + if (NULL != file_offsets_for_agg){ + free(file_offsets_for_agg); + file_offsets_for_agg = NULL; + } + if (NULL != memory_displacements){ + free(memory_displacements); + memory_displacements= NULL; + } + if (NULL != sendtype){ + for (i = 0; i < fh->f_procs_per_group; i++) { + if ( MPI_DATATYPE_NULL != sendtype[i] ) { + ompi_datatype_destroy(&sendtype[i]); + } + } + free(sendtype); + sendtype=NULL; + } + + if (NULL != disp_index){ + free(disp_index); + disp_index = NULL; + } + + if ( NULL != blocklen_per_process){ + for(l=0;lf_procs_per_group;l++){ + if (NULL != blocklen_per_process[l]){ + free(blocklen_per_process[l]); + blocklen_per_process[l] = NULL; + } + } + + free(blocklen_per_process); + blocklen_per_process = NULL; + } + + if (NULL != displs_per_process){ + for (l=0; if_procs_per_group; l++){ + if (NULL != displs_per_process[l]){ + free(displs_per_process[l]); + displs_per_process[l] = NULL; + } + } + free(displs_per_process); + displs_per_process = NULL; + } + if ( NULL != send_req ) { + free ( send_req ); + send_req = NULL; + } + } + return ret; +} + + +static int read_heap_sort (mca_io_ompio_local_io_array *io_array, + int num_entries, + int *sorted) +{ + int i = 0; + int j = 0; + int left = 0; + int right = 0; + int largest = 0; + int heap_size = num_entries - 1; + int temp = 0; + unsigned char done = 0; + int* temp_arr = NULL; + + temp_arr = (int*)malloc(num_entries*sizeof(int)); + if (NULL == temp_arr) { + opal_output (1, "OUT OF MEMORY\n"); + return OMPI_ERR_OUT_OF_RESOURCE; + } + temp_arr[0] = 0; + for (i = 1; i < num_entries; ++i) { + temp_arr[i] = i; + } + /* num_entries can be a large no. so NO RECURSION */ + for (i = num_entries/2-1 ; i>=0 ; i--) { + done = 0; + j = i; + largest = j; + + while (!done) { + left = j*2+1; + right = j*2+2; + if ((left <= heap_size) && + (io_array[temp_arr[left]].offset > io_array[temp_arr[j]].offset)) { + largest = left; + } + else { + largest = j; + } + if ((right <= heap_size) && + (io_array[temp_arr[right]].offset > + io_array[temp_arr[largest]].offset)) { + largest = right; + } + if (largest != j) { + temp = temp_arr[largest]; + temp_arr[largest] = temp_arr[j]; + temp_arr[j] = temp; + j = largest; + } + else { + done = 1; + } + } + } + + for (i = num_entries-1; i >=1; --i) { + temp = temp_arr[0]; + temp_arr[0] = temp_arr[i]; + temp_arr[i] = temp; + heap_size--; + done = 0; + j = 0; + largest = j; + + while (!done) { + left = j*2+1; + right = j*2+2; + + if ((left <= heap_size) && + (io_array[temp_arr[left]].offset > + io_array[temp_arr[j]].offset)) { + largest = left; + } + else { + largest = j; + } + if ((right <= heap_size) && + (io_array[temp_arr[right]].offset > + io_array[temp_arr[largest]].offset)) { + largest = right; + } + if (largest != j) { + temp = temp_arr[largest]; + temp_arr[largest] = temp_arr[j]; + temp_arr[j] = temp; + j = largest; + } + else { + done = 1; + } + } + sorted[i] = temp_arr[i]; + } + sorted[0] = temp_arr[0]; + + if (NULL != temp_arr) { + free(temp_arr); + temp_arr = NULL; + } + return OMPI_SUCCESS; +} + + + diff --git a/ompi/mca/fcoll/dynamic_gen2/fcoll_dynamic_gen2_file_write_all.c b/ompi/mca/fcoll/dynamic_gen2/fcoll_dynamic_gen2_file_write_all.c new file mode 100644 index 00000000000..adf7bbb6118 --- /dev/null +++ b/ompi/mca/fcoll/dynamic_gen2/fcoll_dynamic_gen2_file_write_all.c @@ -0,0 +1,1700 @@ +/* + * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2005 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2008-2016 University of Houston. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include "ompi_config.h" +#include "fcoll_dynamic_gen2.h" + +#include "mpi.h" +#include "ompi/constants.h" +#include "ompi/mca/fcoll/fcoll.h" +#include "ompi/mca/io/ompio/io_ompio.h" +#include "ompi/mca/io/io.h" +#include "math.h" +#include "ompi/mca/pml/pml.h" +#include + + +#define DEBUG_ON 0 +#define FCOLL_DYNAMIC_GEN2_SHUFFLE_TAG 123 + +/*Used for loading file-offsets per aggregator*/ +typedef struct mca_io_ompio_local_io_array{ + OMPI_MPI_OFFSET_TYPE offset; + MPI_Aint length; + int process_id; +}mca_io_ompio_local_io_array; + +typedef struct mca_io_ompio_aggregator_data { + int *disp_index, *sorted, *fview_count, n; + int **blocklen_per_process; + MPI_Aint **displs_per_process, total_bytes, bytes_per_cycle, total_bytes_written; + MPI_Comm comm; + char *buf, *global_buf, *prev_global_buf; + ompi_datatype_t **recvtype, **prev_recvtype; + struct iovec *global_iov_array; + int current_index, current_position; + int bytes_to_write_in_cycle, bytes_remaining, procs_per_group; + int *procs_in_group, iov_index; + bool sendbuf_is_contiguous, prev_sendbuf_is_contiguous; + int bytes_sent, prev_bytes_sent; + struct iovec *decoded_iov; + int bytes_to_write, prev_bytes_to_write; + mca_io_ompio_io_array_t *io_array, *prev_io_array; + int num_io_entries, prev_num_io_entries; + char *send_buf, *prev_send_buf; +} mca_io_ompio_aggregator_data; + + +#define SWAP_REQUESTS(_r1,_r2) { \ + ompi_request_t **_t=_r1; \ + _r1=_r2; \ + _r2=_t;} + +#define SWAP_AGGR_POINTERS(_aggr,_num) { \ + int _i; \ + char *_t; \ + for (_i=0; _i<_num; _i++ ) { \ + _aggr[_i]->prev_io_array=_aggr[_i]->io_array; \ + _aggr[_i]->prev_num_io_entries=_aggr[_i]->num_io_entries; \ + _aggr[_i]->prev_send_buf=_aggr[_i]->send_buf; \ + _aggr[_i]->prev_bytes_sent=_aggr[_i]->bytes_sent; \ + _aggr[_i]->prev_sendbuf_is_contiguous=_aggr[_i]->sendbuf_is_contiguous; \ + _aggr[_i]->prev_bytes_to_write=_aggr[_i]->bytes_to_write; \ + _t=_aggr[_i]->prev_global_buf; \ + _aggr[_i]->prev_global_buf=_aggr[_i]->global_buf; \ + _aggr[_i]->global_buf=_t; \ + _t=(char *)_aggr[_i]->recvtype; \ + _aggr[_i]->recvtype=_aggr[_i]->prev_recvtype; \ + _aggr[_i]->prev_recvtype=(ompi_datatype_t **)_t; } \ +} + + + +static int shuffle_init ( int index, int cycles, int aggregator, int rank, + mca_io_ompio_aggregator_data *data, + ompi_request_t **reqs ); +static int write_init (mca_io_ompio_file_t *fh, int aggregator, mca_io_ompio_aggregator_data *aggr_data, int write_chunksize ); + +int mca_fcoll_dynamic_gen2_break_file_view ( struct iovec *decoded_iov, int iov_count, + struct iovec *local_iov_array, int local_count, + struct iovec ***broken_decoded_iovs, int **broken_iov_counts, + struct iovec ***broken_iov_arrays, int **broken_counts, + MPI_Aint **broken_total_lengths, + int stripe_count, int stripe_size); + + +int mca_fcoll_dynamic_gen2_get_configuration (mca_io_ompio_file_t *fh, int *dynamic_gen2_num_io_procs, + int **ret_aggregators); + + +static int local_heap_sort (mca_io_ompio_local_io_array *io_array, + int num_entries, + int *sorted); + +int mca_fcoll_dynamic_gen2_split_iov_array ( mca_io_ompio_file_t *fh, mca_io_ompio_io_array_t *work_array, + int num_entries, int *last_array_pos, int *last_pos_in_field, + int chunk_size ); + + +int mca_fcoll_dynamic_gen2_file_write_all (mca_io_ompio_file_t *fh, + const void *buf, + int count, + struct ompi_datatype_t *datatype, + ompi_status_public_t *status) +{ + int index = 0; + int cycles = 0; + int ret =0, l, i, j, bytes_per_cycle; + uint32_t iov_count = 0; + struct iovec *decoded_iov = NULL; + struct iovec *local_iov_array=NULL; + uint32_t total_fview_count = 0; + int local_count = 0; + ompi_request_t **reqs1=NULL,**reqs2=NULL; + ompi_request_t **curr_reqs=NULL,**prev_reqs=NULL; + mca_io_ompio_aggregator_data **aggr_data=NULL; + + int *displs = NULL; + int dynamic_gen2_num_io_procs; + size_t max_data = 0; + MPI_Aint *total_bytes_per_process = NULL; + + struct iovec **broken_iov_arrays=NULL; + struct iovec **broken_decoded_iovs=NULL; + int *broken_counts=NULL; + int *broken_iov_counts=NULL; + MPI_Aint *broken_total_lengths=NULL; + + int *aggregators=NULL; + int write_chunksize, *result_counts=NULL; + + +#if OMPIO_FCOLL_WANT_TIME_BREAKDOWN + double write_time = 0.0, start_write_time = 0.0, end_write_time = 0.0; + double comm_time = 0.0, start_comm_time = 0.0, end_comm_time = 0.0; + double exch_write = 0.0, start_exch = 0.0, end_exch = 0.0; + mca_io_ompio_print_entry nentry; +#endif + + + /************************************************************************** + ** 1. In case the data is not contigous in memory, decode it into an iovec + **************************************************************************/ + fh->f_get_bytes_per_agg ( (int *)&bytes_per_cycle ); + /* since we want to overlap 2 iterations, define the bytes_per_cycle to be half of what + the user requested */ + bytes_per_cycle =bytes_per_cycle/2; + + ret = fh->f_decode_datatype ((struct mca_io_ompio_file_t *) fh, + datatype, + count, + buf, + &max_data, + &decoded_iov, + &iov_count); + if (OMPI_SUCCESS != ret ){ + goto exit; + } + + if ( MPI_STATUS_IGNORE != status ) { + status->_ucount = max_data; + } + + /* difference to the first generation of this function: + ** dynamic_gen2_num_io_procs should be the number of io_procs per group + ** consequently.Initially, we will have only 1 group. + */ + if ( fh->f_stripe_count > 1 ) { + dynamic_gen2_num_io_procs = fh->f_stripe_count; + } + else { + fh->f_get_num_aggregators ( &dynamic_gen2_num_io_procs ); + } + + + if ( fh->f_stripe_size == 0 ) { + // EDGAR: just a quick heck for testing + fh->f_stripe_size = 65536; + } + if ( -1 == mca_fcoll_dynamic_gen2_write_chunksize ) { + write_chunksize = fh->f_stripe_size; + } + else { + write_chunksize = mca_fcoll_dynamic_gen2_write_chunksize; + } + + + ret = mca_fcoll_dynamic_gen2_get_configuration (fh, &dynamic_gen2_num_io_procs, &aggregators); + if (OMPI_SUCCESS != ret){ + goto exit; + } + + aggr_data = (mca_io_ompio_aggregator_data **) malloc ( dynamic_gen2_num_io_procs * + sizeof(mca_io_ompio_aggregator_data*)); + + for ( i=0; i< dynamic_gen2_num_io_procs; i++ ) { + // At this point we know the number of aggregators. If there is a correlation between + // number of aggregators and number of IO nodes, we know how many aggr_data arrays we need + // to allocate. + aggr_data[i] = (mca_io_ompio_aggregator_data *) calloc ( 1, sizeof(mca_io_ompio_aggregator_data)); + aggr_data[i]->procs_per_group = fh->f_procs_per_group; + aggr_data[i]->procs_in_group = fh->f_procs_in_group; + aggr_data[i]->comm = fh->f_comm; + aggr_data[i]->buf = (char *)buf; // should not be used in the new version. + aggr_data[i]->sendbuf_is_contiguous = false; //safe assumption for right now + aggr_data[i]->prev_sendbuf_is_contiguous = false; //safe assumption for right now + } + + /********************************************************************* + *** 2. Generate the local offsets/lengths array corresponding to + *** this write operation + ********************************************************************/ + ret = fh->f_generate_current_file_view( (struct mca_io_ompio_file_t *) fh, + max_data, + &local_iov_array, + &local_count); + if (ret != OMPI_SUCCESS){ + goto exit; + } + + /************************************************************************* + ** 2b. Separate the local_iov_array entries based on the number of aggregators + *************************************************************************/ + // broken_iov_arrays[0] contains broken_counts[0] entries to aggregator 0, + // broken_iov_arrays[1] contains broken_counts[1] entries to aggregator 1, etc. + ret = mca_fcoll_dynamic_gen2_break_file_view ( decoded_iov, iov_count, + local_iov_array, local_count, + &broken_decoded_iovs, &broken_iov_counts, + &broken_iov_arrays, &broken_counts, + &broken_total_lengths, + dynamic_gen2_num_io_procs, fh->f_stripe_size); + + + /************************************************************************** + ** 3. Determine the total amount of data to be written and no. of cycles + **************************************************************************/ + total_bytes_per_process = (MPI_Aint*)malloc + (dynamic_gen2_num_io_procs * fh->f_procs_per_group*sizeof(MPI_Aint)); + if (NULL == total_bytes_per_process) { + opal_output (1, "OUT OF MEMORY\n"); + ret = OMPI_ERR_OUT_OF_RESOURCE; + goto exit; + } + +#if OMPIO_FCOLL_WANT_TIME_BREAKDOWN + start_comm_time = MPI_Wtime(); +#endif + if ( 1 == mca_fcoll_dynamic_gen2_num_groups ) { + ret = fh->f_comm->c_coll.coll_allgather (broken_total_lengths, + dynamic_gen2_num_io_procs, + MPI_LONG, + total_bytes_per_process, + dynamic_gen2_num_io_procs, + MPI_LONG, + fh->f_comm, + fh->f_comm->c_coll.coll_allgather_module); + } + else { + ret = fh->f_allgather_array (broken_total_lengths, + dynamic_gen2_num_io_procs, + MPI_LONG, + total_bytes_per_process, + dynamic_gen2_num_io_procs, + MPI_LONG, + 0, + fh->f_procs_in_group, + fh->f_procs_per_group, + fh->f_comm); + } + + if( OMPI_SUCCESS != ret){ + goto exit; + } +#if OMPIO_FCOLL_WANT_TIME_BREAKDOWN + end_comm_time = MPI_Wtime(); + comm_time += (end_comm_time - start_comm_time); +#endif + + cycles=0; + for ( i=0; if_procs_per_group ; j++) { + broken_total_lengths[i] += total_bytes_per_process[j*dynamic_gen2_num_io_procs + i]; + } +#if DEBUG_ON + printf("%d: Overall broken_total_lengths[%d] = %ld\n", fh->f_rank, i, broken_total_lengths[i]); +#endif + if ( ceil((double)broken_total_lengths[i]/bytes_per_cycle) > cycles ) { + cycles = ceil((double)broken_total_lengths[i]/bytes_per_cycle); + } + } + + if (NULL != total_bytes_per_process) { + free (total_bytes_per_process); + total_bytes_per_process = NULL; + } + + result_counts = (int *) malloc ( dynamic_gen2_num_io_procs * fh->f_procs_per_group * sizeof(int) ); + if ( NULL == result_counts ) { + ret = OMPI_ERR_OUT_OF_RESOURCE; + goto exit; + } + +#if OMPIO_FCOLL_WANT_TIME_BREAKDOWN + start_comm_time = MPI_Wtime(); +#endif + if ( 1 == mca_fcoll_dynamic_gen2_num_groups ) { + ret = fh->f_comm->c_coll.coll_allgather(broken_counts, + dynamic_gen2_num_io_procs, + MPI_INT, + result_counts, + dynamic_gen2_num_io_procs, + MPI_INT, + fh->f_comm, + fh->f_comm->c_coll.coll_allgather_module); + } + else { + ret = fh->f_allgather_array (broken_counts, + dynamic_gen2_num_io_procs, + MPI_INT, + result_counts, + dynamic_gen2_num_io_procs, + MPI_INT, + 0, + fh->f_procs_in_group, + fh->f_procs_per_group, + fh->f_comm); + } + if( OMPI_SUCCESS != ret){ + goto exit; + } +#if OMPIO_FCOLL_WANT_TIME_BREAKDOWN + end_comm_time = MPI_Wtime(); + comm_time += (end_comm_time - start_comm_time); +#endif + + /************************************************************* + *** 4. Allgather the offset/lengths array from all processes + *************************************************************/ + for ( i=0; i< dynamic_gen2_num_io_procs; i++ ) { + aggr_data[i]->total_bytes = broken_total_lengths[i]; + aggr_data[i]->decoded_iov = broken_decoded_iovs[i]; + aggr_data[i]->fview_count = (int *) malloc (fh->f_procs_per_group * sizeof (int)); + if (NULL == aggr_data[i]->fview_count) { + opal_output (1, "OUT OF MEMORY\n"); + ret = OMPI_ERR_OUT_OF_RESOURCE; + goto exit; + } + for ( j=0; j f_procs_per_group; j++ ) { + aggr_data[i]->fview_count[j] = result_counts[dynamic_gen2_num_io_procs*j+i]; + } + displs = (int*) malloc (fh->f_procs_per_group * sizeof (int)); + if (NULL == displs) { + opal_output (1, "OUT OF MEMORY\n"); + ret = OMPI_ERR_OUT_OF_RESOURCE; + goto exit; + } + + displs[0] = 0; + total_fview_count = aggr_data[i]->fview_count[0]; + for (j=1 ; jf_procs_per_group ; j++) { + total_fview_count += aggr_data[i]->fview_count[j]; + displs[j] = displs[j-1] + aggr_data[i]->fview_count[j-1]; + } + +#if DEBUG_ON + printf("total_fview_count : %d\n", total_fview_count); + if (aggregators[i] == fh->f_rank) { + for (j=0 ; jf_procs_per_group ; i++) { + printf ("%d: PROCESS: %d ELEMENTS: %d DISPLS: %d\n", + fh->f_rank, + j, + aggr_data[i]->fview_count[j], + displs[j]); + } + } +#endif + + /* allocate the global iovec */ + if (0 != total_fview_count) { + aggr_data[i]->global_iov_array = (struct iovec*) malloc (total_fview_count * + sizeof(struct iovec)); + if (NULL == aggr_data[i]->global_iov_array){ + opal_output(1, "OUT OF MEMORY\n"); + ret = OMPI_ERR_OUT_OF_RESOURCE; + goto exit; + } + } + +#if OMPIO_FCOLL_WANT_TIME_BREAKDOWN + start_comm_time = MPI_Wtime(); +#endif + if ( 1 == mca_fcoll_dynamic_gen2_num_groups ) { + ret = fh->f_comm->c_coll.coll_allgatherv (broken_iov_arrays[i], + broken_counts[i], + fh->f_iov_type, + aggr_data[i]->global_iov_array, + aggr_data[i]->fview_count, + displs, + fh->f_iov_type, + fh->f_comm, + fh->f_comm->c_coll.coll_allgatherv_module ); + } + else { + ret = fh->f_allgatherv_array (broken_iov_arrays[i], + broken_counts[i], + fh->f_iov_type, + aggr_data[i]->global_iov_array, + aggr_data[i]->fview_count, + displs, + fh->f_iov_type, + aggregators[i], + fh->f_procs_in_group, + fh->f_procs_per_group, + fh->f_comm); + } + if (OMPI_SUCCESS != ret){ + goto exit; + } +#if OMPIO_FCOLL_WANT_TIME_BREAKDOWN + end_comm_time = MPI_Wtime(); + comm_time += (end_comm_time - start_comm_time); +#endif + + /**************************************************************************************** + *** 5. Sort the global offset/lengths list based on the offsets. + *** The result of the sort operation is the 'sorted', an integer array, + *** which contains the indexes of the global_iov_array based on the offset. + *** For example, if global_iov_array[x].offset is followed by global_iov_array[y].offset + *** in the file, and that one is followed by global_iov_array[z].offset, than + *** sorted[0] = x, sorted[1]=y and sorted[2]=z; + ******************************************************************************************/ + if (0 != total_fview_count) { + aggr_data[i]->sorted = (int *)malloc (total_fview_count * sizeof(int)); + if (NULL == aggr_data[i]->sorted) { + opal_output (1, "OUT OF MEMORY\n"); + ret = OMPI_ERR_OUT_OF_RESOURCE; + goto exit; + } + fh->f_sort_iovec (aggr_data[i]->global_iov_array, total_fview_count, aggr_data[i]->sorted); + } + + if (NULL != local_iov_array){ + free(local_iov_array); + local_iov_array = NULL; + } + + if (NULL != displs){ + free(displs); + displs=NULL; + } + + +#if DEBUG_ON + if (my_aggregator == fh->f_rank) { + uint32_t tv=0; + for (tv=0 ; tvf_rank, + aggr_data[i]->global_iov_array[aggr_data[i]->sorted[tv]].iov_base, + aggr_data[i]->global_iov_array[aggr_data[i]->sorted[tv]].iov_len); + } + } +#endif + /************************************************************* + *** 6. Determine the number of cycles required to execute this + *** operation + *************************************************************/ + + aggr_data[i]->bytes_per_cycle = bytes_per_cycle; + + if (aggregators[i] == fh->f_rank) { + aggr_data[i]->disp_index = (int *)malloc (fh->f_procs_per_group * sizeof (int)); + if (NULL == aggr_data[i]->disp_index) { + opal_output (1, "OUT OF MEMORY\n"); + ret = OMPI_ERR_OUT_OF_RESOURCE; + goto exit; + } + + aggr_data[i]->blocklen_per_process = (int **)calloc (fh->f_procs_per_group, sizeof (int*)); + if (NULL == aggr_data[i]->blocklen_per_process) { + opal_output (1, "OUT OF MEMORY\n"); + ret = OMPI_ERR_OUT_OF_RESOURCE; + goto exit; + } + + aggr_data[i]->displs_per_process = (MPI_Aint **)calloc (fh->f_procs_per_group, sizeof (MPI_Aint*)); + if (NULL == aggr_data[i]->displs_per_process) { + opal_output (1, "OUT OF MEMORY\n"); + ret = OMPI_ERR_OUT_OF_RESOURCE; + goto exit; + } + + + aggr_data[i]->global_buf = (char *) malloc (bytes_per_cycle); + aggr_data[i]->prev_global_buf = (char *) malloc (bytes_per_cycle); + if (NULL == aggr_data[i]->global_buf || NULL == aggr_data[i]->prev_global_buf){ + opal_output(1, "OUT OF MEMORY"); + ret = OMPI_ERR_OUT_OF_RESOURCE; + goto exit; + } + + aggr_data[i]->recvtype = (ompi_datatype_t **) malloc (fh->f_procs_per_group * + sizeof(ompi_datatype_t *)); + aggr_data[i]->prev_recvtype = (ompi_datatype_t **) malloc (fh->f_procs_per_group * + sizeof(ompi_datatype_t *)); + if (NULL == aggr_data[i]->recvtype || NULL == aggr_data[i]->prev_recvtype) { + opal_output (1, "OUT OF MEMORY\n"); + ret = OMPI_ERR_OUT_OF_RESOURCE; + goto exit; + } + for(l=0;lf_procs_per_group;l++){ + aggr_data[i]->recvtype[l] = MPI_DATATYPE_NULL; + aggr_data[i]->prev_recvtype[l] = MPI_DATATYPE_NULL; + } + } + +#if OMPIO_FCOLL_WANT_TIME_BREAKDOWN + start_exch = MPI_Wtime(); +#endif + } + + reqs1 = (ompi_request_t **)malloc ((fh->f_procs_per_group + 1 )*dynamic_gen2_num_io_procs *sizeof(ompi_request_t *)); + reqs2 = (ompi_request_t **)malloc ((fh->f_procs_per_group + 1 )*dynamic_gen2_num_io_procs *sizeof(ompi_request_t *)); + if ( NULL == reqs1 || NULL == reqs2 ) { + opal_output (1, "OUT OF MEMORY\n"); + ret = OMPI_ERR_OUT_OF_RESOURCE; + goto exit; + } + for (l=0,i=0; i < dynamic_gen2_num_io_procs; i++ ) { + for ( j=0; j< (fh->f_procs_per_group+1); j++ ) { + reqs1[l] = MPI_REQUEST_NULL; + reqs2[l] = MPI_REQUEST_NULL; + l++; + } + } + + curr_reqs = reqs1; + prev_reqs = reqs2; + + /* Initialize communication for iteration 0 */ + if ( cycles > 0 ) { + for ( i=0; if_rank, aggr_data[i], + &curr_reqs[i*(fh->f_procs_per_group + 1)] ); + if ( OMPI_SUCCESS != ret ) { + goto exit; + } + } + } + + + for (index = 1; index < cycles; index++) { + SWAP_REQUESTS(curr_reqs,prev_reqs); + SWAP_AGGR_POINTERS(aggr_data,dynamic_gen2_num_io_procs); + + /* Initialize communication for iteration i */ + for ( i=0; if_rank, aggr_data[i], + &curr_reqs[i*(fh->f_procs_per_group + 1)] ); + if ( OMPI_SUCCESS != ret ) { + goto exit; + } + } + + /* Finish communication for iteration i-1 */ + ret = ompi_request_wait_all ( (fh->f_procs_per_group + 1 )*dynamic_gen2_num_io_procs, + prev_reqs, MPI_STATUS_IGNORE); + if (OMPI_SUCCESS != ret){ + goto exit; + } + + + /* Write data for iteration i-1 */ + for ( i=0; iprev_sendbuf_is_contiguous && aggr_data[i]->prev_bytes_sent) { + free (aggr_data[i]->prev_send_buf); + } + } + + } /* end for (index = 0; index < cycles; index++) */ + + + /* Finish communication for iteration i = cycles-1 */ + if ( cycles > 0 ) { + SWAP_REQUESTS(curr_reqs,prev_reqs); + SWAP_AGGR_POINTERS(aggr_data,dynamic_gen2_num_io_procs); + + ret = ompi_request_wait_all ( (fh->f_procs_per_group + 1 )*dynamic_gen2_num_io_procs, + prev_reqs, MPI_STATUS_IGNORE); + if (OMPI_SUCCESS != ret){ + goto exit; + } + + /* Write data for iteration i=cycles-1 */ + for ( i=0; iprev_sendbuf_is_contiguous && aggr_data[i]->prev_bytes_sent) { + free (aggr_data[i]->prev_send_buf); + } + } + } + + +#if OMPIO_FCOLL_WANT_TIME_BREAKDOWN + end_exch = MPI_Wtime(); + exch_write += end_exch - start_exch; + nentry.time[0] = write_time; + nentry.time[1] = comm_time; + nentry.time[2] = exch_write; + if (my_aggregator == fh->f_rank) + nentry.aggregator = 1; + else + nentry.aggregator = 0; + nentry.nprocs_for_coll = dynamic_gen2_num_io_procs; + if (!fh->f_full_print_queue(WRITE_PRINT_QUEUE)){ + fh->f_register_print_entry(WRITE_PRINT_QUEUE, + nentry); + } +#endif + + +exit : + + if ( NULL != aggr_data ) { + + for ( i=0; i< dynamic_gen2_num_io_procs; i++ ) { + if (aggregators[i] == fh->f_rank) { + if (NULL != aggr_data[i]->recvtype){ + for (j =0; j< aggr_data[i]->procs_per_group; j++) { + if ( MPI_DATATYPE_NULL != aggr_data[i]->recvtype[j] ) { + ompi_datatype_destroy(&aggr_data[i]->recvtype[j]); + } + if ( MPI_DATATYPE_NULL != aggr_data[i]->prev_recvtype[j] ) { + ompi_datatype_destroy(&aggr_data[i]->prev_recvtype[j]); + } + + } + free(aggr_data[i]->recvtype); + free(aggr_data[i]->prev_recvtype); + } + + free (aggr_data[i]->disp_index); + free (aggr_data[i]->global_buf); + free (aggr_data[i]->prev_global_buf); + for(l=0;lprocs_per_group;l++){ + free (aggr_data[i]->blocklen_per_process[l]); + free (aggr_data[i]->displs_per_process[l]); + } + + free (aggr_data[i]->blocklen_per_process); + free (aggr_data[i]->displs_per_process); + } + free (aggr_data[i]->sorted); + free (aggr_data[i]->global_iov_array); + free (aggr_data[i]->fview_count); + free (aggr_data[i]->decoded_iov); + + free (aggr_data[i]); + } + free (aggr_data); + } + free(displs); + free(decoded_iov); + free(broken_counts); + free(broken_total_lengths); + free(broken_iov_counts); + free(broken_decoded_iovs); // decoded_iov arrays[i] were freed as aggr_data[i]->decoded_iov; + if ( NULL != broken_iov_arrays ) { + for (i=0; if_procs_in_group); + fh->f_procs_in_group=NULL; + fh->f_procs_per_group=0; + free(reqs1); + free(reqs2); + free(result_counts); + + + return OMPI_SUCCESS; +} + + +static int write_init (mca_io_ompio_file_t *fh, int aggregator, mca_io_ompio_aggregator_data *aggr_data, int write_chunksize ) +{ + int ret=OMPI_SUCCESS; + int last_array_pos=0; + int last_pos=0; + + + if ( aggregator == fh->f_rank && aggr_data->prev_num_io_entries) { + while ( aggr_data->prev_bytes_to_write > 0 ) { + aggr_data->prev_bytes_to_write -= mca_fcoll_dynamic_gen2_split_iov_array (fh, aggr_data->prev_io_array, + aggr_data->prev_num_io_entries, + &last_array_pos, &last_pos, + write_chunksize ); +#if OMPIO_FCOLL_WANT_TIME_BREAKDOWN + start_write_time = MPI_Wtime(); +#endif + if ( 0 > fh->f_fbtl->fbtl_pwritev (fh)) { + free ( aggr_data->prev_io_array); + opal_output (1, "dynamic_gen2_write_all: fbtl_pwritev failed\n"); + ret = OMPI_ERROR; + goto exit; + } +#if OMPIO_FCOLL_WANT_TIME_BREAKDOWN + end_write_time = MPI_Wtime(); + write_time += end_write_time - start_write_time; +#endif + } + free ( fh->f_io_array ); + free ( aggr_data->prev_io_array); + } + +exit: + + fh->f_io_array=NULL; + fh->f_num_of_io_entries=0; + + return ret; +} + +static int shuffle_init ( int index, int cycles, int aggregator, int rank, mca_io_ompio_aggregator_data *data, + ompi_request_t **reqs ) +{ + int bytes_sent = 0; + int blocks=0, temp_pindex; + int i, j, l, ret; + int entries_per_aggregator=0; + mca_io_ompio_local_io_array *file_offsets_for_agg=NULL; + int *sorted_file_offsets=NULL; + int temp_index=0; + MPI_Aint *memory_displacements=NULL; + int *temp_disp_index=NULL; + MPI_Aint global_count = 0; + + data->num_io_entries = 0; + data->bytes_sent = 0; + data->io_array=NULL; + data->send_buf=NULL; + /********************************************************************** + *** 7a. Getting ready for next cycle: initializing and freeing buffers + **********************************************************************/ + if (aggregator == rank) { + + if (NULL != data->recvtype){ + for (i =0; i< data->procs_per_group; i++) { + if ( MPI_DATATYPE_NULL != data->recvtype[i] ) { + ompi_datatype_destroy(&data->recvtype[i]); + data->recvtype[i] = MPI_DATATYPE_NULL; + } + } + } + + for(l=0;lprocs_per_group;l++){ + data->disp_index[l] = 1; + + free(data->blocklen_per_process[l]); + free(data->displs_per_process[l]); + + data->blocklen_per_process[l] = (int *) calloc (1, sizeof(int)); + data->displs_per_process[l] = (MPI_Aint *) calloc (1, sizeof(MPI_Aint)); + if (NULL == data->displs_per_process[l] || NULL == data->blocklen_per_process[l]){ + opal_output (1, "OUT OF MEMORY for displs\n"); + ret = OMPI_ERR_OUT_OF_RESOURCE; + goto exit; + } + } + } /* (aggregator == rank */ + + /************************************************************************** + *** 7b. Determine the number of bytes to be actually written in this cycle + **************************************************************************/ + int local_cycles= ceil((double)data->total_bytes / data->bytes_per_cycle); + if ( index < (local_cycles -1) ) { + data->bytes_to_write_in_cycle = data->bytes_per_cycle; + } + else if ( index == (local_cycles -1)) { + data->bytes_to_write_in_cycle = data->total_bytes - data->bytes_per_cycle*index ; + } + else { + data->bytes_to_write_in_cycle = 0; + } + data->bytes_to_write = data->bytes_to_write_in_cycle; + +#if DEBUG_ON + if (aggregator == rank) { + printf ("****%d: CYCLE %d Bytes %lld**********\n", + rank, + index, + data->bytes_to_write_in_cycle); + } +#endif + /********************************************************** + **Gather the Data from all the processes at the writers ** + *********************************************************/ + +#if DEBUG_ON + printf("bytes_to_write_in_cycle: %ld, cycle : %d\n", data->bytes_to_write_in_cycle, + index); +#endif + + /***************************************************************** + *** 7c. Calculate how much data will be contributed in this cycle + *** by each process + *****************************************************************/ + + /* The blocklen and displs calculation only done at aggregators!*/ + while (data->bytes_to_write_in_cycle) { + + /* This next block identifies which process is the holder + ** of the sorted[current_index] element; + */ + blocks = data->fview_count[0]; + for (j=0 ; jprocs_per_group ; j++) { + if (data->sorted[data->current_index] < blocks) { + data->n = j; + break; + } + else { + blocks += data->fview_count[j+1]; + } + } + + if (data->bytes_remaining) { + /* Finish up a partially used buffer from the previous cycle */ + + if (data->bytes_remaining <= data->bytes_to_write_in_cycle) { + /* The data fits completely into the block */ + if (aggregator == rank) { + data->blocklen_per_process[data->n][data->disp_index[data->n] - 1] = data->bytes_remaining; + data->displs_per_process[data->n][data->disp_index[data->n] - 1] = + (OPAL_PTRDIFF_TYPE)data->global_iov_array[data->sorted[data->current_index]].iov_base + + (data->global_iov_array[data->sorted[data->current_index]].iov_len + - data->bytes_remaining); + + /* In this cases the length is consumed so allocating for + next displacement and blocklength*/ + data->blocklen_per_process[data->n] = (int *) realloc + ((void *)data->blocklen_per_process[data->n], (data->disp_index[data->n]+1)*sizeof(int)); + data->displs_per_process[data->n] = (MPI_Aint *) realloc + ((void *)data->displs_per_process[data->n], (data->disp_index[data->n]+1)*sizeof(MPI_Aint)); + data->blocklen_per_process[data->n][data->disp_index[data->n]] = 0; + data->displs_per_process[data->n][data->disp_index[data->n]] = 0; + data->disp_index[data->n] += 1; + } + if (data->procs_in_group[data->n] == rank) { + bytes_sent += data->bytes_remaining; + } + data->current_index ++; + data->bytes_to_write_in_cycle -= data->bytes_remaining; + data->bytes_remaining = 0; +// continue; + } + else { + /* the remaining data from the previous cycle is larger than the + data->bytes_to_write_in_cycle, so we have to segment again */ + if (aggregator == rank) { + data->blocklen_per_process[data->n][data->disp_index[data->n] - 1] = data->bytes_to_write_in_cycle; + data->displs_per_process[data->n][data->disp_index[data->n] - 1] = + (OPAL_PTRDIFF_TYPE)data->global_iov_array[data->sorted[data->current_index]].iov_base + + (data->global_iov_array[data->sorted[data->current_index]].iov_len + - data->bytes_remaining); + } + + if (data->procs_in_group[data->n] == rank) { + bytes_sent += data->bytes_to_write_in_cycle; + } + data->bytes_remaining -= data->bytes_to_write_in_cycle; + data->bytes_to_write_in_cycle = 0; + break; + } + } + else { + /* No partially used entry available, have to start a new one */ + if (data->bytes_to_write_in_cycle < + (MPI_Aint) data->global_iov_array[data->sorted[data->current_index]].iov_len) { + /* This entry has more data than we can sendin one cycle */ + if (aggregator == rank) { + data->blocklen_per_process[data->n][data->disp_index[data->n] - 1] = data->bytes_to_write_in_cycle; + data->displs_per_process[data->n][data->disp_index[data->n] - 1] = + (OPAL_PTRDIFF_TYPE)data->global_iov_array[data->sorted[data->current_index]].iov_base ; + } + if (data->procs_in_group[data->n] == rank) { + bytes_sent += data->bytes_to_write_in_cycle; + + } + data->bytes_remaining = data->global_iov_array[data->sorted[data->current_index]].iov_len - + data->bytes_to_write_in_cycle; + data->bytes_to_write_in_cycle = 0; + break; + } + else { + /* Next data entry is less than data->bytes_to_write_in_cycle */ + if (aggregator == rank) { + data->blocklen_per_process[data->n][data->disp_index[data->n] - 1] = + data->global_iov_array[data->sorted[data->current_index]].iov_len; + data->displs_per_process[data->n][data->disp_index[data->n] - 1] = (OPAL_PTRDIFF_TYPE) + data->global_iov_array[data->sorted[data->current_index]].iov_base; + + /*realloc for next blocklength + and assign this displacement and check for next displs as + the total length of this entry has been consumed!*/ + data->blocklen_per_process[data->n] = + (int *) realloc ((void *)data->blocklen_per_process[data->n], (data->disp_index[data->n]+1)*sizeof(int)); + data->displs_per_process[data->n] = (MPI_Aint *)realloc + ((void *)data->displs_per_process[data->n], (data->disp_index[data->n]+1)*sizeof(MPI_Aint)); + data->blocklen_per_process[data->n][data->disp_index[data->n]] = 0; + data->displs_per_process[data->n][data->disp_index[data->n]] = 0; + data->disp_index[data->n] += 1; + } + if (data->procs_in_group[data->n] == rank) { + bytes_sent += data->global_iov_array[data->sorted[data->current_index]].iov_len; + } + data->bytes_to_write_in_cycle -= + data->global_iov_array[data->sorted[data->current_index]].iov_len; + data->current_index ++; +// continue; + } + } + } + + + /************************************************************************* + *** 7d. Calculate the displacement on where to put the data and allocate + *** the recieve buffer (global_buf) + *************************************************************************/ + if (aggregator == rank) { + entries_per_aggregator=0; + for (i=0;iprocs_per_group; i++){ + for (j=0;jdisp_index[i];j++){ + if (data->blocklen_per_process[i][j] > 0) + entries_per_aggregator++ ; + } + } + +#if DEBUG_ON + printf("%d: cycle: %d, bytes_sent: %d\n ",rank,index, + bytes_sent); + printf("%d : Entries per aggregator : %d\n",rank,entries_per_aggregator); +#endif + + if (entries_per_aggregator > 0){ + file_offsets_for_agg = (mca_io_ompio_local_io_array *) + malloc(entries_per_aggregator*sizeof(mca_io_ompio_local_io_array)); + if (NULL == file_offsets_for_agg) { + opal_output (1, "OUT OF MEMORY\n"); + ret = OMPI_ERR_OUT_OF_RESOURCE; + goto exit; + } + + sorted_file_offsets = (int *) + malloc (entries_per_aggregator*sizeof(int)); + if (NULL == sorted_file_offsets){ + opal_output (1, "OUT OF MEMORY\n"); + ret = OMPI_ERR_OUT_OF_RESOURCE; + goto exit; + } + + /*Moving file offsets to an IO array!*/ + temp_index = 0; + + for (i=0;iprocs_per_group; i++){ + for(j=0;jdisp_index[i];j++){ + if (data->blocklen_per_process[i][j] > 0){ + file_offsets_for_agg[temp_index].length = + data->blocklen_per_process[i][j]; + file_offsets_for_agg[temp_index].process_id = i; + file_offsets_for_agg[temp_index].offset = + data->displs_per_process[i][j]; + temp_index++; + +#if DEBUG_ON + printf("************Cycle: %d, Aggregator: %d ***************\n", + index+1,rank); + + printf("%d sends blocklen[%d]: %d, disp[%d]: %ld to %d\n", + data->procs_in_group[i],j, + data->blocklen_per_process[i][j],j, + data->displs_per_process[i][j], + rank); +#endif + } + } + } + + /* Sort the displacements for each aggregator*/ + local_heap_sort (file_offsets_for_agg, + entries_per_aggregator, + sorted_file_offsets); + + /*create contiguous memory displacements + based on blocklens on the same displs array + and map it to this aggregator's actual + file-displacements (this is in the io-array created above)*/ + memory_displacements = (MPI_Aint *) malloc + (entries_per_aggregator * sizeof(MPI_Aint)); + + memory_displacements[sorted_file_offsets[0]] = 0; + for (i=1; iprocs_per_group * sizeof (int)); + if (NULL == temp_disp_index) { + opal_output (1, "OUT OF MEMORY\n"); + ret = OMPI_ERR_OUT_OF_RESOURCE; + goto exit; + } + + /*Now update the displacements array with memory offsets*/ + global_count = 0; + for (i=0;idispls_per_process[temp_pindex][temp_disp_index[temp_pindex]] = + memory_displacements[sorted_file_offsets[i]]; + if (temp_disp_index[temp_pindex] < data->disp_index[temp_pindex]) + temp_disp_index[temp_pindex] += 1; + else{ + printf("temp_disp_index[%d]: %d is greater than disp_index[%d]: %d\n", + temp_pindex, temp_disp_index[temp_pindex], + temp_pindex, data->disp_index[temp_pindex]); + } + global_count += + file_offsets_for_agg[sorted_file_offsets[i]].length; + } + + if (NULL != temp_disp_index){ + free(temp_disp_index); + temp_disp_index = NULL; + } + +#if DEBUG_ON + + printf("************Cycle: %d, Aggregator: %d ***************\n", + index+1,rank); + for (i=0;iprocs_per_group; i++){ + for(j=0;jdisp_index[i];j++){ + if (data->blocklen_per_process[i][j] > 0){ + printf("%d sends blocklen[%d]: %d, disp[%d]: %ld to %d\n", + data->procs_in_group[i],j, + data->blocklen_per_process[i][j],j, + data->displs_per_process[i][j], + rank); + + } + } + } + printf("************Cycle: %d, Aggregator: %d ***************\n", + index+1,rank); + for (i=0; iprocs_per_group; i++) { + size_t datatype_size; + reqs[i] = MPI_REQUEST_NULL; + if ( 0 < data->disp_index[i] ) { + ompi_datatype_create_hindexed(data->disp_index[i], + data->blocklen_per_process[i], + data->displs_per_process[i], + MPI_BYTE, + &data->recvtype[i]); + ompi_datatype_commit(&data->recvtype[i]); + opal_datatype_type_size(&data->recvtype[i]->super, &datatype_size); + + if (datatype_size){ + ret = MCA_PML_CALL(irecv(data->global_buf, + 1, + data->recvtype[i], + data->procs_in_group[i], + FCOLL_DYNAMIC_GEN2_SHUFFLE_TAG+index, + data->comm, + &reqs[i])); + if (OMPI_SUCCESS != ret){ + goto exit; + } + } + } + } + } /* end if (entries_per_aggr > 0 ) */ + }/* end if (aggregator == rank ) */ + + if ( data->sendbuf_is_contiguous ) { + data->send_buf = &((char*)data->buf)[data->total_bytes_written]; + } + else if (bytes_sent) { + /* allocate a send buffer and copy the data that needs + to be sent into it in case the data is non-contigous + in memory */ + OPAL_PTRDIFF_TYPE mem_address; + size_t remaining = 0; + size_t temp_position = 0; + + data->send_buf = malloc (bytes_sent); + if (NULL == data->send_buf) { + opal_output (1, "OUT OF MEMORY\n"); + ret = OMPI_ERR_OUT_OF_RESOURCE; + goto exit; + } + + remaining = bytes_sent; + + while (remaining) { + mem_address = (OPAL_PTRDIFF_TYPE) + (data->decoded_iov[data->iov_index].iov_base) + data->current_position; + + if (remaining >= + (data->decoded_iov[data->iov_index].iov_len - data->current_position)) { + memcpy (data->send_buf+temp_position, + (IOVBASE_TYPE *)mem_address, + data->decoded_iov[data->iov_index].iov_len - data->current_position); + remaining = remaining - + (data->decoded_iov[data->iov_index].iov_len - data->current_position); + temp_position = temp_position + + (data->decoded_iov[data->iov_index].iov_len - data->current_position); + data->iov_index = data->iov_index + 1; + data->current_position = 0; + } + else { + memcpy (data->send_buf+temp_position, + (IOVBASE_TYPE *) mem_address, + remaining); + data->current_position += remaining; + remaining = 0; + } + } + } + data->total_bytes_written += bytes_sent; + data->bytes_sent = bytes_sent; + /* Gather the sendbuf from each process in appropritate locations in + aggregators*/ + + if (bytes_sent){ + ret = MCA_PML_CALL(isend(data->send_buf, + bytes_sent, + MPI_BYTE, + aggregator, + FCOLL_DYNAMIC_GEN2_SHUFFLE_TAG+index, + MCA_PML_BASE_SEND_STANDARD, + data->comm, + &reqs[data->procs_per_group])); + + + if ( OMPI_SUCCESS != ret ){ + goto exit; + } + + } + +#if DEBUG_ON + if (aggregator == rank){ + printf("************Cycle: %d, Aggregator: %d ***************\n", + index+1,rank); + for (i=0 ; iglobal_buf)[i]); + } +#endif + +#if OMPIO_FCOLL_WANT_TIME_BREAKDOWN + end_comm_time = MPI_Wtime(); + comm_time += (end_comm_time - start_comm_time); +#endif + /********************************************************** + *** 7f. Create the io array, and pass it to fbtl + *********************************************************/ + + if (aggregator == rank && entries_per_aggregator>0) { + + + data->io_array = (mca_io_ompio_io_array_t *) malloc + (entries_per_aggregator * sizeof (mca_io_ompio_io_array_t)); + if (NULL == data->io_array) { + opal_output(1, "OUT OF MEMORY\n"); + ret = OMPI_ERR_OUT_OF_RESOURCE; + goto exit; + } + + data->num_io_entries = 0; + /*First entry for every aggregator*/ + data->io_array[0].offset = + (IOVBASE_TYPE *)(intptr_t)file_offsets_for_agg[sorted_file_offsets[0]].offset; + data->io_array[0].length = + file_offsets_for_agg[sorted_file_offsets[0]].length; + data->io_array[0].memory_address = + data->global_buf+memory_displacements[sorted_file_offsets[0]]; + data->num_io_entries++; + + for (i=1;iio_array[data->num_io_entries - 1].length += + file_offsets_for_agg[sorted_file_offsets[i]].length; + } + else { + data->io_array[data->num_io_entries].offset = + (IOVBASE_TYPE *)(intptr_t)file_offsets_for_agg[sorted_file_offsets[i]].offset; + data->io_array[data->num_io_entries].length = + file_offsets_for_agg[sorted_file_offsets[i]].length; + data->io_array[data->num_io_entries].memory_address = + data->global_buf+memory_displacements[sorted_file_offsets[i]]; + data->num_io_entries++; + } + + } + +#if DEBUG_ON + printf("*************************** %d\n", num_of_io_entries); + for (i=0 ; i= rest ) { + blocklen = rest; + temp_offset = offset+rest; + temp_len = len - rest; + } + else { + blocklen = len; + temp_offset = 0; + temp_len = 0; + } + + broken_file_iovs[owner][broken_file_counts[owner]].iov_base = (void *)offset; + broken_file_iovs[owner][broken_file_counts[owner]].iov_len = blocklen; +#if DEBUG_ON + printf("%d: owner=%d b_file_iovs[%d].base=%ld .len=%d \n", rank, owner, + broken_file_counts[owner], + broken_file_iovs[owner][broken_file_counts[owner]].iov_base, + broken_file_iovs[owner][broken_file_counts[owner]].iov_len ); +#endif + do { + if ( memlen >= blocklen ) { + broken_mem_iovs[owner][broken_mem_counts[owner]].iov_base = (void *) memoffset; + broken_mem_iovs[owner][broken_mem_counts[owner]].iov_len = blocklen; + memoffset += blocklen; + memlen -= blocklen; + blocklen = 0; + + if ( 0 == memlen ) { + j++; + if ( j < mem_count ) { + memoffset = (off_t) mem_iov[j].iov_base; + memlen = mem_iov[j].iov_len; + } + else + break; + } + } + else { + broken_mem_iovs[owner][broken_mem_counts[owner]].iov_base = (void *) memoffset; + broken_mem_iovs[owner][broken_mem_counts[owner]].iov_len = memlen; + blocklen -= memlen; + + j++; + if ( j < mem_count ) { + memoffset = (off_t) mem_iov[j].iov_base; + memlen = mem_iov[j].iov_len; + } + else + break; + } +#if DEBUG_ON + printf("%d: owner=%d b_mem_iovs[%d].base=%ld .len=%d\n", rank, owner, + broken_mem_counts[owner], + broken_mem_iovs[owner][broken_mem_counts[owner]].iov_base, + broken_mem_iovs[owner][broken_mem_counts[owner]].iov_len); +#endif + + broken_mem_counts[owner]++; + if ( broken_mem_counts[owner] >= max_lengths[owner][0] ) { + broken_mem_iovs[owner] = (struct iovec*) realloc ( broken_mem_iovs[owner], + mem_count * block[owner][0] * + sizeof(struct iovec )); + max_lengths[owner][0] = mem_count * block[owner][0]; + block[owner][0]++; + } + + } while ( blocklen > 0 ); + + broken_file_counts[owner]++; + if ( broken_file_counts[owner] >= max_lengths[owner][1] ) { + broken_file_iovs[owner] = (struct iovec*) realloc ( broken_file_iovs[owner], + file_count * block[owner][1] * + sizeof(struct iovec )); + max_lengths[owner][1] = file_count * block[owner][1]; + block[owner][1]++; + } + + offset = temp_offset; + len = temp_len; + } while( temp_len > 0 ); + + i++; + } + + + /* Step 2: recalculating the total lengths per aggregator */ + for ( i=0; i< stripe_count; i++ ) { + for ( j=0; jf_stripe_count; + if ( num_io_procs < 1 ) { + num_io_procs = 1; + } + } + if ( num_io_procs > fh->f_size ) { + num_io_procs = fh->f_size; + } + + fh->f_procs_per_group = fh->f_size; + fh->f_procs_in_group = (int *) malloc ( sizeof(int) * fh->f_size ); + if ( NULL == fh->f_procs_in_group) { + return OMPI_ERR_OUT_OF_RESOURCE; + } + for (i=0; if_size; i++ ) { + fh->f_procs_in_group[i]=i; + } + + + aggregators = (int *) malloc ( num_io_procs * sizeof(int)); + if ( NULL == aggregators ) { + // fh->procs_in_group will be freed with the fh structure. No need to do it here. + return OMPI_ERR_OUT_OF_RESOURCE; + } + for ( i=0; if_size / num_io_procs; + } + + *dynamic_gen2_num_io_procs = num_io_procs; + *ret_aggregators = aggregators; + + return OMPI_SUCCESS; +} + + +int mca_fcoll_dynamic_gen2_split_iov_array ( mca_io_ompio_file_t *fh, mca_io_ompio_io_array_t *io_array, int num_entries, + int *ret_array_pos, int *ret_pos, int chunk_size ) +{ + + int array_pos = *ret_array_pos; + int pos = *ret_pos; + size_t bytes_written = 0; + size_t bytes_to_write = chunk_size; + + if ( 0 == array_pos && 0 == pos ) { + fh->f_io_array = (mca_io_ompio_io_array_t *) malloc ( num_entries * sizeof(mca_io_ompio_io_array_t)); + if ( NULL == fh->f_io_array ){ + opal_output (1,"Could not allocate memory\n"); + return -1; + } + } + + int i=0; + while (bytes_to_write > 0 ) { + fh->f_io_array[i].memory_address = &(((char *)io_array[array_pos].memory_address)[pos]); + fh->f_io_array[i].offset = &(((char *)io_array[array_pos].offset)[pos]); + + if ( (io_array[array_pos].length - pos ) >= bytes_to_write ) { + fh->f_io_array[i].length = bytes_to_write; + } + else { + fh->f_io_array[i].length = io_array[array_pos].length - pos; + } + + pos += fh->f_io_array[i].length; + bytes_written += fh->f_io_array[i].length; + bytes_to_write-= fh->f_io_array[i].length; + i++; + + if ( pos == (int)io_array[array_pos].length ) { + pos = 0; + if ((array_pos + 1) < num_entries) { + array_pos++; + } + else { + break; + } + } + } + + fh->f_num_of_io_entries = i; + *ret_array_pos = array_pos; + *ret_pos = pos; + return bytes_written; +} + + +static int local_heap_sort (mca_io_ompio_local_io_array *io_array, + int num_entries, + int *sorted) +{ + int i = 0; + int j = 0; + int left = 0; + int right = 0; + int largest = 0; + int heap_size = num_entries - 1; + int temp = 0; + unsigned char done = 0; + int* temp_arr = NULL; + + temp_arr = (int*)malloc(num_entries*sizeof(int)); + if (NULL == temp_arr) { + opal_output (1, "OUT OF MEMORY\n"); + return OMPI_ERR_OUT_OF_RESOURCE; + } + temp_arr[0] = 0; + for (i = 1; i < num_entries; ++i) { + temp_arr[i] = i; + } + /* num_entries can be a large no. so NO RECURSION */ + for (i = num_entries/2-1 ; i>=0 ; i--) { + done = 0; + j = i; + largest = j; + + while (!done) { + left = j*2+1; + right = j*2+2; + if ((left <= heap_size) && + (io_array[temp_arr[left]].offset > io_array[temp_arr[j]].offset)) { + largest = left; + } + else { + largest = j; + } + if ((right <= heap_size) && + (io_array[temp_arr[right]].offset > + io_array[temp_arr[largest]].offset)) { + largest = right; + } + if (largest != j) { + temp = temp_arr[largest]; + temp_arr[largest] = temp_arr[j]; + temp_arr[j] = temp; + j = largest; + } + else { + done = 1; + } + } + } + + for (i = num_entries-1; i >=1; --i) { + temp = temp_arr[0]; + temp_arr[0] = temp_arr[i]; + temp_arr[i] = temp; + heap_size--; + done = 0; + j = 0; + largest = j; + + while (!done) { + left = j*2+1; + right = j*2+2; + + if ((left <= heap_size) && + (io_array[temp_arr[left]].offset > + io_array[temp_arr[j]].offset)) { + largest = left; + } + else { + largest = j; + } + if ((right <= heap_size) && + (io_array[temp_arr[right]].offset > + io_array[temp_arr[largest]].offset)) { + largest = right; + } + if (largest != j) { + temp = temp_arr[largest]; + temp_arr[largest] = temp_arr[j]; + temp_arr[j] = temp; + j = largest; + } + else { + done = 1; + } + } + sorted[i] = temp_arr[i]; + } + sorted[0] = temp_arr[0]; + + if (NULL != temp_arr) { + free(temp_arr); + temp_arr = NULL; + } + return OMPI_SUCCESS; +} + diff --git a/ompi/mca/fcoll/dynamic_gen2/fcoll_dynamic_gen2_module.c b/ompi/mca/fcoll/dynamic_gen2/fcoll_dynamic_gen2_module.c new file mode 100644 index 00000000000..16070a9cbf8 --- /dev/null +++ b/ompi/mca/fcoll/dynamic_gen2/fcoll_dynamic_gen2_module.c @@ -0,0 +1,90 @@ +/* + * Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2006 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2008-2015 University of Houston. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include "ompi_config.h" +#include "fcoll_dynamic_gen2.h" + +#include + +#include "mpi.h" +#include "ompi/mca/fcoll/fcoll.h" +#include "ompi/mca/fcoll/base/base.h" + + +/* + * ******************************************************************* + * ************************ actions structure ************************ + * ******************************************************************* + */ +static mca_fcoll_base_module_1_0_0_t dynamic_gen2 = { + mca_fcoll_dynamic_gen2_module_init, + mca_fcoll_dynamic_gen2_module_finalize, + mca_fcoll_dynamic_gen2_file_read_all, + NULL, /* iread_all */ + mca_fcoll_dynamic_gen2_file_write_all, + NULL, /*iwrite_all */ + NULL, /* progress */ + NULL /* request_free */ +}; + +int +mca_fcoll_dynamic_gen2_component_init_query(bool enable_progress_threads, + bool enable_mpi_threads) +{ + /* Nothing to do */ + + return OMPI_SUCCESS; +} + +mca_fcoll_base_module_1_0_0_t * +mca_fcoll_dynamic_gen2_component_file_query (mca_io_ompio_file_t *fh, int *priority) +{ + *priority = mca_fcoll_dynamic_gen2_priority; + if (0 >= mca_fcoll_dynamic_gen2_priority) { + return NULL; + } + + if (mca_fcoll_base_query_table (fh, "dynamic_gen2")) { + if (*priority < 50) { + *priority = 50; + } + } + + return &dynamic_gen2; +} + +int mca_fcoll_dynamic_gen2_component_file_unquery (mca_io_ompio_file_t *file) +{ + /* This function might be needed for some purposes later. for now it + * does not have anything to do since there are no steps which need + * to be undone if this module is not selected */ + + return OMPI_SUCCESS; +} + +int mca_fcoll_dynamic_gen2_module_init (mca_io_ompio_file_t *file) +{ + return OMPI_SUCCESS; +} + + +int mca_fcoll_dynamic_gen2_module_finalize (mca_io_ompio_file_t *file) +{ + return OMPI_SUCCESS; +} diff --git a/ompi/mca/fcoll/dynamic_gen2/owner.txt b/ompi/mca/fcoll/dynamic_gen2/owner.txt new file mode 100644 index 00000000000..2e9726c28a4 --- /dev/null +++ b/ompi/mca/fcoll/dynamic_gen2/owner.txt @@ -0,0 +1,7 @@ +# +# owner/status file +# owner: institution that is responsible for this package +# status: e.g. active, maintenance, unmaintained +# +owner: UH +status: active diff --git a/ompi/mca/fcoll/fcoll.h b/ompi/mca/fcoll/fcoll.h index ec0a6fc23ee..8c82a66226f 100644 --- a/ompi/mca/fcoll/fcoll.h +++ b/ompi/mca/fcoll/fcoll.h @@ -6,17 +6,19 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2008-2015 University of Houston. All rights reserved. * Copyright (c) 2015 Los Alamos National Security, LLC. All rights * reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -44,9 +46,9 @@ struct mca_fcoll_request_t; * This framework provides the abstraction for the collective file * read and write operations of MPI I/O. The interfaces include * blocking collective operations using the individual file pointer, - * blocking collective operations using explicit offsets and + * blocking collective operations using explicit offsets and * the split collective operations defined in MPI/O for the same. - * + * * These are the component function prototypes. These function pointers * go into the component structure. These functions (query() and finalize() * are called during fcoll_base_select(). Each component is query() ied @@ -69,11 +71,11 @@ struct mca_fcoll_request_t; * **************** component struct ******************************* */ -typedef int (*mca_fcoll_base_component_init_query_1_0_0_fn_t) - (bool enable_progress_threads, +typedef int (*mca_fcoll_base_component_init_query_1_0_0_fn_t) + (bool enable_progress_threads, bool enable_mpi_threads); -typedef struct mca_fcoll_base_module_1_0_0_t * +typedef struct mca_fcoll_base_module_1_0_0_t * (*mca_fcoll_base_component_file_query_1_0_0_fn_t) (struct mca_io_ompio_file_t *file, int *priority); @@ -88,7 +90,7 @@ typedef int (*mca_fcoll_base_component_file_unquery_1_0_0_fn_t) struct mca_fcoll_base_component_2_0_0_t { mca_base_component_t fcollm_version; mca_base_component_data_t fcollm_data; - + mca_fcoll_base_component_init_query_1_0_0_fn_t fcollm_init_query; mca_fcoll_base_component_file_query_1_0_0_fn_t fcollm_file_query; mca_fcoll_base_component_file_unquery_1_0_0_fn_t fcollm_file_unquery; @@ -126,14 +128,14 @@ typedef int (*mca_fcoll_base_module_file_iread_all_fn_t) typedef int (*mca_fcoll_base_module_file_write_all_fn_t) (struct mca_io_ompio_file_t *fh, - void *buf, + const void *buf, int count, struct ompi_datatype_t *datatype, ompi_status_public_t *status); typedef int (*mca_fcoll_base_module_file_iwrite_all_fn_t) (struct mca_io_ompio_file_t *fh, - void *buf, + const void *buf, int count, struct ompi_datatype_t *datatype, ompi_request_t **request); @@ -157,7 +159,7 @@ struct mca_fcoll_base_module_1_0_0_t { */ mca_fcoll_base_module_init_1_0_0_fn_t fcoll_module_init; mca_fcoll_base_module_finalize_1_0_0_fn_t fcoll_module_finalize; - + /* FCOLL function pointers */ mca_fcoll_base_module_file_read_all_fn_t fcoll_file_read_all; mca_fcoll_base_module_file_iread_all_fn_t fcoll_file_iread_all; @@ -165,7 +167,7 @@ struct mca_fcoll_base_module_1_0_0_t { mca_fcoll_base_module_file_iwrite_all_fn_t fcoll_file_iwrite_all; mca_fcoll_base_module_progress_fn_t fcoll_progress; mca_fcoll_base_module_request_free_fn_t fcoll_request_free; - + }; typedef struct mca_fcoll_base_module_1_0_0_t mca_fcoll_base_module_1_0_0_t; typedef mca_fcoll_base_module_1_0_0_t mca_fcoll_base_module_t; diff --git a/ompi/mca/fcoll/individual/Makefile.am b/ompi/mca/fcoll/individual/Makefile.am index 727e3f52f37..7fc9af1b623 100644 --- a/ompi/mca/fcoll/individual/Makefile.am +++ b/ompi/mca/fcoll/individual/Makefile.am @@ -5,16 +5,16 @@ # Copyright (c) 2004-2005 The University of Tennessee and The University # of Tennessee Research Foundation. All rights # reserved. -# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, # University of Stuttgart. All rights reserved. # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. # Copyright (c) 2008-2015 University of Houston. All rights reserved. # Copyright (c) 2012 Cisco Systems, Inc. All rights reserved. # $COPYRIGHT$ -# +# # Additional copyrights may follow -# +# # $HEADER$ # diff --git a/ompi/mca/fcoll/individual/fcoll_individual.h b/ompi/mca/fcoll/individual/fcoll_individual.h index 9ae1f967854..298fae795ba 100644 --- a/ompi/mca/fcoll/individual/fcoll_individual.h +++ b/ompi/mca/fcoll/individual/fcoll_individual.h @@ -5,15 +5,17 @@ * Copyright (c) 2004-2006 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2008-2014 University of Houston. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -48,17 +50,17 @@ int mca_fcoll_individual_component_file_unquery (mca_io_ompio_file_t *file); int mca_fcoll_individual_module_init (mca_io_ompio_file_t *file); int mca_fcoll_individual_module_finalize (mca_io_ompio_file_t *file); -int mca_fcoll_individual_file_read_all (mca_io_ompio_file_t *fh, - void *buf, +int mca_fcoll_individual_file_read_all (mca_io_ompio_file_t *fh, + void *buf, int count, - struct ompi_datatype_t *datatype, + struct ompi_datatype_t *datatype, ompi_status_public_t * status); -int mca_fcoll_individual_file_write_all (mca_io_ompio_file_t *fh, - void *buf, +int mca_fcoll_individual_file_write_all (mca_io_ompio_file_t *fh, + const void *buf, int count, - struct ompi_datatype_t *datatype, + struct ompi_datatype_t *datatype, ompi_status_public_t * status); diff --git a/ompi/mca/fcoll/individual/fcoll_individual_component.c b/ompi/mca/fcoll/individual/fcoll_individual_component.c index 829fa10a59a..722b80c42f4 100644 --- a/ompi/mca/fcoll/individual/fcoll_individual_component.c +++ b/ompi/mca/fcoll/individual/fcoll_individual_component.c @@ -6,7 +6,7 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. @@ -15,9 +15,9 @@ * Copyright (c) 2015 Los Alamos National Security, LLC. All rights * reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ * * These symbols are in a file by themselves to provide nice linker diff --git a/ompi/mca/fcoll/individual/fcoll_individual_file_read_all.c b/ompi/mca/fcoll/individual/fcoll_individual_file_read_all.c index 2cfdbbe3671..bde6f9a92df 100644 --- a/ompi/mca/fcoll/individual/fcoll_individual_file_read_all.c +++ b/ompi/mca/fcoll/individual/fcoll_individual_file_read_all.c @@ -9,7 +9,7 @@ * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. - * Copyright (c) 2008-2011 University of Houston. All rights reserved. + * Copyright (c) 2008-2015 University of Houston. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -32,12 +32,11 @@ /* this component just directly calls the individual I/O operations */ int -mca_fcoll_individual_file_read_all (mca_io_ompio_file_t *fh, - void *buf, - int count, - struct ompi_datatype_t *datatype, +mca_fcoll_individual_file_read_all (mca_io_ompio_file_t *fh, + void *buf, + int count, + struct ompi_datatype_t *datatype, ompi_status_public_t *status) { - return fh->f_fh->f_io_selected_module.v2_0_0. - io_module_file_read( fh->f_fh, buf, count, datatype, status); + return ompio_io_ompio_file_read( fh, buf, count, datatype, status); } diff --git a/ompi/mca/fcoll/individual/fcoll_individual_file_write_all.c b/ompi/mca/fcoll/individual/fcoll_individual_file_write_all.c index 2697f9aebf0..8b4b6fa8815 100644 --- a/ompi/mca/fcoll/individual/fcoll_individual_file_write_all.c +++ b/ompi/mca/fcoll/individual/fcoll_individual_file_write_all.c @@ -9,7 +9,9 @@ * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. - * Copyright (c) 2008-2011 University of Houston. All rights reserved. + * Copyright (c) 2008-2015 University of Houston. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -29,12 +31,11 @@ #include -int mca_fcoll_individual_file_write_all (mca_io_ompio_file_t *fh, - void *buf, - int count, - struct ompi_datatype_t *datatype, +int mca_fcoll_individual_file_write_all (mca_io_ompio_file_t *fh, + const void *buf, + int count, + struct ompi_datatype_t *datatype, ompi_status_public_t *status) { - return fh->f_fh->f_io_selected_module.v2_0_0. - io_module_file_write (fh->f_fh, buf, count, datatype, status); + return ompio_io_ompio_file_write (fh, buf, count, datatype, status); } diff --git a/ompi/mca/fcoll/individual/fcoll_individual_module.c b/ompi/mca/fcoll/individual/fcoll_individual_module.c index b9c95a86bae..3f29a10a4ee 100644 --- a/ompi/mca/fcoll/individual/fcoll_individual_module.c +++ b/ompi/mca/fcoll/individual/fcoll_individual_module.c @@ -5,15 +5,15 @@ * Copyright (c) 2004-2006 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2008-2015 University of Houston. All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -64,15 +64,18 @@ mca_fcoll_individual_component_file_query (mca_io_ompio_file_t *fh, int *priorit if (*priority < 50) { *priority = 50; } + if ( 2 >= fh->f_size ) { + *priority = 100; + } } - + return &individual; } int mca_fcoll_individual_component_file_unquery (mca_io_ompio_file_t *file) -{ +{ /* This function might be needed for some purposes later. for now it - * does not have anything to do since there are no steps which need + * does not have anything to do since there are no steps which need * to be undone if this module is not selected */ return OMPI_SUCCESS; @@ -83,8 +86,8 @@ int mca_fcoll_individual_module_init (mca_io_ompio_file_t *file) return OMPI_SUCCESS; } - -int mca_fcoll_individual_module_finalize (mca_io_ompio_file_t *file) + +int mca_fcoll_individual_module_finalize (mca_io_ompio_file_t *file) { return OMPI_SUCCESS; } diff --git a/ompi/mca/fcoll/static/Makefile.am b/ompi/mca/fcoll/static/Makefile.am index ee01ac7ec26..c9ff1893d2f 100644 --- a/ompi/mca/fcoll/static/Makefile.am +++ b/ompi/mca/fcoll/static/Makefile.am @@ -5,16 +5,16 @@ # Copyright (c) 2004-2005 The University of Tennessee and The University # of Tennessee Research Foundation. All rights # reserved. -# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, # University of Stuttgart. All rights reserved. # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. # Copyright (c) 2008-2015 University of Houston. All rights reserved. # Copyright (c) 2012 Cisco Systems, Inc. All rights reserved. # $COPYRIGHT$ -# +# # Additional copyrights may follow -# +# # $HEADER$ # @@ -23,7 +23,7 @@ sources = \ fcoll_static_module.c \ fcoll_static_component.c \ fcoll_static_file_read_all.c \ - fcoll_static_file_write_all.c + fcoll_static_file_write_all.c # Make the output library in this directory, and name it either # mca__.la (for DSO builds) or libmca__.la diff --git a/ompi/mca/fcoll/static/fcoll_static.h b/ompi/mca/fcoll/static/fcoll_static.h index be03c032ea6..91a9dd4900f 100644 --- a/ompi/mca/fcoll/static/fcoll_static.h +++ b/ompi/mca/fcoll/static/fcoll_static.h @@ -5,15 +5,17 @@ * Copyright (c) 2004-2006 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2008-2015 University of Houston. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -47,17 +49,17 @@ int mca_fcoll_static_component_file_unquery (mca_io_ompio_file_t *file); int mca_fcoll_static_module_init (mca_io_ompio_file_t *file); int mca_fcoll_static_module_finalize (mca_io_ompio_file_t *file); -int mca_fcoll_static_file_read_all (mca_io_ompio_file_t *fh, - void *buf, +int mca_fcoll_static_file_read_all (mca_io_ompio_file_t *fh, + void *buf, int count, - struct ompi_datatype_t *datatype, + struct ompi_datatype_t *datatype, ompi_status_public_t * status); -int mca_fcoll_static_file_write_all (mca_io_ompio_file_t *fh, - void *buf, +int mca_fcoll_static_file_write_all (mca_io_ompio_file_t *fh, + const void *buf, int count, - struct ompi_datatype_t *datatype, + struct ompi_datatype_t *datatype, ompi_status_public_t * status); diff --git a/ompi/mca/fcoll/static/fcoll_static_component.c b/ompi/mca/fcoll/static/fcoll_static_component.c index 32e600d2b71..d71e7ec67fd 100644 --- a/ompi/mca/fcoll/static/fcoll_static_component.c +++ b/ompi/mca/fcoll/static/fcoll_static_component.c @@ -6,7 +6,7 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. @@ -15,9 +15,9 @@ * Copyright (c) 2015 Los Alamos National Security, LLC. All rights * reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ * * These symbols are in a file by themselves to provide nice linker diff --git a/ompi/mca/fcoll/static/fcoll_static_file_read_all.c b/ompi/mca/fcoll/static/fcoll_static_file_read_all.c index 2cb77698618..354adf62302 100644 --- a/ompi/mca/fcoll/static/fcoll_static_file_read_all.c +++ b/ompi/mca/fcoll/static/fcoll_static_file_read_all.c @@ -9,7 +9,7 @@ * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. - * Copyright (c) 2008-2014 University of Houston. All rights reserved. + * Copyright (c) 2008-2016 University of Houston. All rights reserved. * Copyright (c) 2015 Los Alamos National Security, LLC. All rights reserved. * * $COPYRIGHT$ @@ -33,25 +33,24 @@ #include #define DEBUG_ON 0 -#define TIME_BREAKDOWN 0 -typedef struct local_io_array { - OMPI_MPI_OFFSET_TYPE offset; - MPI_Aint length; - int process_id; -}local_io_array; +typedef struct mca_fcoll_static_local_io_array { + OMPI_MPI_OFFSET_TYPE offset; + MPI_Aint length; + int process_id; +}mca_fcoll_static_local_io_array; -int read_local_heap_sort (local_io_array *io_array, +int read_local_heap_sort (mca_fcoll_static_local_io_array *io_array, int num_entries, int *sorted); int read_find_next_index( int proc_index, int c_index, - mca_io_ompio_file_t *fh, - local_io_array *global_iov_array, + mca_io_ompio_file_t *fh, + mca_fcoll_static_local_io_array *global_iov_array, int global_iov_count, int *sorted); @@ -61,1016 +60,1037 @@ int read_get_process_id (int rank, int -mca_fcoll_static_file_read_all (mca_io_ompio_file_t *fh, - void *buf, - int count, - struct ompi_datatype_t *datatype, +mca_fcoll_static_file_read_all (mca_io_ompio_file_t *fh, + void *buf, + int count, + struct ompi_datatype_t *datatype, ompi_status_public_t *status) { - int ret = OMPI_SUCCESS, iov_size=0, *bytes_remaining=NULL; - int i, j, l,cycles=0, local_cycles=0, *current_index=NULL; - int index, *disp_index=NULL, *bytes_per_process=NULL, current_position=0; - int **blocklen_per_process=NULL, *iovec_count_per_process=NULL; - int *displs=NULL, *sorted=NULL ,entries_per_aggregator=0; - int *sorted_file_offsets=NULL, temp_index=0, position=0, *temp_disp_index=NULL; - - - MPI_Aint **displs_per_process=NULL, global_iov_count=0, global_count=0; - MPI_Aint *memory_displacements=NULL; - int bytes_to_read_in_cycle=0; - size_t max_data=0, bytes_per_cycle=0; - uint32_t iov_count=0, iov_index=0; - struct iovec *decoded_iov=NULL, *iov=NULL; - local_io_array *local_iov_array=NULL, *global_iov_array=NULL; - local_io_array *file_offsets_for_agg=NULL; - - char *global_buf=NULL, *receive_buf=NULL; - - int blocklen[3] = {1, 1, 1}; - int static_num_io_procs=1; - OPAL_PTRDIFF_TYPE d[3], base; - ompi_datatype_t *types[3]; - ompi_datatype_t *io_array_type=MPI_DATATYPE_NULL; - ompi_datatype_t **sendtype = NULL; - MPI_Request *send_req=NULL, *recv_req=NULL; - /* MPI_Request *grecv_req=NULL, *gsend_req=NULL; */ - -#if TIME_BREAKDOWN - double read_time = 0.0, start_read_time = 0.0, end_read_time = 0.0; - double rcomm_time = 0.0, start_rcomm_time = 0.0, end_rcomm_time = 0.0; - double read_exch = 0.0, start_rexch = 0.0, end_rexch = 0.0; - print_entry nentry; + int ret = OMPI_SUCCESS, iov_size=0, *bytes_remaining=NULL; + int i, j, l,cycles=0, local_cycles=0, *current_index=NULL; + int index, *disp_index=NULL, *bytes_per_process=NULL, current_position=0; + int **blocklen_per_process=NULL, *iovec_count_per_process=NULL; + int *displs=NULL, *sorted=NULL ,entries_per_aggregator=0; + int *sorted_file_offsets=NULL, temp_index=0, position=0, *temp_disp_index=NULL; + + + MPI_Aint **displs_per_process=NULL, global_iov_count=0, global_count=0; + MPI_Aint *memory_displacements=NULL; + int bytes_to_read_in_cycle=0; + size_t max_data=0, bytes_per_cycle=0; + uint32_t iov_count=0, iov_index=0; + struct iovec *decoded_iov=NULL, *iov=NULL; + mca_fcoll_static_local_io_array *local_iov_array=NULL, *global_iov_array=NULL; + mca_fcoll_static_local_io_array *file_offsets_for_agg=NULL; + + char *global_buf=NULL, *receive_buf=NULL; + + int blocklen[3] = {1, 1, 1}; + int static_num_io_procs=1; + OPAL_PTRDIFF_TYPE d[3], base; + ompi_datatype_t *types[3]; + ompi_datatype_t *io_array_type=MPI_DATATYPE_NULL; + ompi_datatype_t **sendtype = NULL; + MPI_Request *send_req=NULL, recv_req=NULL; + int my_aggregator=-1; + bool recvbuf_is_contiguous=false; + size_t ftype_size; + OPAL_PTRDIFF_TYPE ftype_extent, lb; + +#if OMPIO_FCOLL_WANT_TIME_BREAKDOWN + double read_time = 0.0, start_read_time = 0.0, end_read_time = 0.0; + double rcomm_time = 0.0, start_rcomm_time = 0.0, end_rcomm_time = 0.0; + double read_exch = 0.0, start_rexch = 0.0, end_rexch = 0.0; + mca_io_ompio_print_entry nentry; #endif -#if DEBUG_ON +#if DEBUG_ON MPI_Aint gc_in; #endif + opal_datatype_type_size ( &datatype->super, &ftype_size ); + opal_datatype_get_extent ( &datatype->super, &lb, &ftype_extent ); + + /************************************************************************** + ** 1. In case the data is not contigous in memory, decode it into an iovec + **************************************************************************/ + if ( ( ftype_extent == (OPAL_PTRDIFF_TYPE) ftype_size) && + opal_datatype_is_contiguous_memory_layout(&datatype->super,1) && + 0 == lb ) { + recvbuf_is_contiguous = true; + } -// if (opal_datatype_is_contiguous_memory_layout(&datatype->super,1)) { -// fh->f_flags |= OMPIO_CONTIGUOUS_MEMORY; -// } - - - /* In case the data is not contigous in memory, decode it into an iovec */ - if (! (fh->f_flags & OMPIO_CONTIGUOUS_MEMORY)) { - fh->f_decode_datatype ( (struct mca_io_ompio_file_t *)fh, - datatype, - count, - buf, - &max_data, - &decoded_iov, - &iov_count); - } - else { - max_data = count * datatype->super.size; - } - - if ( MPI_STATUS_IGNORE != status ) { - status->_ucount = max_data; - } - - - fh->f_get_num_aggregators ( &static_num_io_procs ); - fh->f_set_aggregator_props ((struct mca_io_ompio_file_t *) fh, - static_num_io_procs, - max_data); - - /* printf("max_data %ld\n", max_data); */ - ret = fh->f_generate_current_file_view((struct mca_io_ompio_file_t *)fh, - max_data, - &iov, - &iov_size); - if (ret != OMPI_SUCCESS){ - goto exit; - } - - if ( iov_size > 0 ) { - local_iov_array = (local_io_array *)malloc (iov_size * sizeof(local_io_array)); - if ( NULL == local_iov_array){ - ret = OMPI_ERR_OUT_OF_RESOURCE; - goto exit; - } - - - for (j=0; j < iov_size; j++){ - local_iov_array[j].offset = (OMPI_MPI_OFFSET_TYPE)(intptr_t) - iov[j].iov_base; - local_iov_array[j].length = (size_t)iov[j].iov_len; - local_iov_array[j].process_id = fh->f_rank; - - } - } - else { - /* Allocate at least one element to correctly create the derived - data type */ - local_iov_array = (local_io_array *)malloc (sizeof(local_io_array)); - if ( NULL == local_iov_array){ - ret = OMPI_ERR_OUT_OF_RESOURCE; - goto exit; - } - - - local_iov_array[0].offset = (OMPI_MPI_OFFSET_TYPE)(intptr_t) 0; - local_iov_array[0].length = (size_t) 0; - local_iov_array[0].process_id = fh->f_rank; - } - - d[0] = (OPAL_PTRDIFF_TYPE)&local_iov_array[0]; - d[1] = (OPAL_PTRDIFF_TYPE)&local_iov_array[0].length; - d[2] = (OPAL_PTRDIFF_TYPE)&local_iov_array[0].process_id; - base = d[0]; - for (i=0 ; i<3 ; i++) { - d[i] -= base; - } - - /* io_array datatype for using in communication*/ - types[0] = &ompi_mpi_long.dt; - types[1] = &ompi_mpi_long.dt; - types[2] = &ompi_mpi_int.dt; - - ompi_datatype_create_struct (3, - blocklen, - d, - types, - &io_array_type); - ompi_datatype_commit (&io_array_type); - /* #########################################################*/ - - - fh->f_get_bytes_per_agg ( (int*) &bytes_per_cycle); - local_cycles = ceil((double)max_data/bytes_per_cycle); - ret = fh->f_comm->c_coll.coll_allreduce (&local_cycles, - &cycles, - 1, - MPI_INT, - MPI_MAX, - fh->f_comm, - fh->f_comm->c_coll.coll_allreduce_module); - - if (OMPI_SUCCESS != ret){ - goto exit; - } - - if (fh->f_procs_in_group[fh->f_aggregator_index] == fh->f_rank) { - disp_index = (int *) malloc (fh->f_procs_per_group * sizeof(int)); - if (NULL == disp_index) { - opal_output (1, "OUT OF MEMORY\n"); - ret = OMPI_ERR_OUT_OF_RESOURCE; - goto exit; + + /* In case the data is not contigous in memory, decode it into an iovec */ + if (!recvbuf_is_contiguous ) { + fh->f_decode_datatype ( (struct mca_io_ompio_file_t *)fh, + datatype, + count, + buf, + &max_data, + &decoded_iov, + &iov_count); + } + else { + max_data = count * datatype->super.size; } - bytes_per_process = (int *) malloc (fh->f_procs_per_group * sizeof(int )); - if (NULL == bytes_per_process){ - opal_output (1, "OUT OF MEMORY\n"); - ret = OMPI_ERR_OUT_OF_RESOURCE; - goto exit; + if ( MPI_STATUS_IGNORE != status ) { + status->_ucount = max_data; } - bytes_remaining = (int *) malloc (fh->f_procs_per_group * sizeof(int)); - if (NULL == bytes_remaining){ - opal_output (1, "OUT OF MEMORY\n"); - ret = OMPI_ERR_OUT_OF_RESOURCE; - goto exit; + + fh->f_get_num_aggregators ( &static_num_io_procs ); + fh->f_set_aggregator_props ((struct mca_io_ompio_file_t *) fh, + static_num_io_procs, + max_data); + my_aggregator = fh->f_procs_in_group[fh->f_aggregator_index]; + + /* printf("max_data %ld\n", max_data); */ + ret = fh->f_generate_current_file_view((struct mca_io_ompio_file_t *)fh, + max_data, + &iov, + &iov_size); + if (ret != OMPI_SUCCESS){ + goto exit; } - current_index = (int *) malloc (fh->f_procs_per_group * sizeof(int)); - if (NULL == current_index){ - opal_output (1, "OUT OF MEMORY\n"); - ret = OMPI_ERR_OUT_OF_RESOURCE; - goto exit; + if ( iov_size > 0 ) { + local_iov_array = (mca_fcoll_static_local_io_array *)malloc (iov_size * sizeof(mca_fcoll_static_local_io_array)); + if ( NULL == local_iov_array){ + ret = OMPI_ERR_OUT_OF_RESOURCE; + goto exit; + } + + + for (j=0; j < iov_size; j++){ + local_iov_array[j].offset = (OMPI_MPI_OFFSET_TYPE)(intptr_t) + iov[j].iov_base; + local_iov_array[j].length = (size_t)iov[j].iov_len; + local_iov_array[j].process_id = fh->f_rank; + + } } - - blocklen_per_process = (int **)malloc (fh->f_procs_per_group * sizeof (int*)); - if (NULL == blocklen_per_process) { - opal_output (1, "OUT OF MEMORY\n"); - ret = OMPI_ERR_OUT_OF_RESOURCE; - goto exit; + else { + /* Allocate at least one element to correctly create the derived + data type */ + local_iov_array = (mca_fcoll_static_local_io_array *)malloc (sizeof(mca_fcoll_static_local_io_array)); + if ( NULL == local_iov_array){ + ret = OMPI_ERR_OUT_OF_RESOURCE; + goto exit; + } + + + local_iov_array[0].offset = (OMPI_MPI_OFFSET_TYPE)(intptr_t) 0; + local_iov_array[0].length = (size_t) 0; + local_iov_array[0].process_id = fh->f_rank; } - displs_per_process = (MPI_Aint **)malloc (fh->f_procs_per_group * sizeof (MPI_Aint*)); - if (NULL == displs_per_process) { - opal_output (1, "OUT OF MEMORY\n"); - ret = OMPI_ERR_OUT_OF_RESOURCE; - goto exit; + d[0] = (OPAL_PTRDIFF_TYPE)&local_iov_array[0]; + d[1] = (OPAL_PTRDIFF_TYPE)&local_iov_array[0].length; + d[2] = (OPAL_PTRDIFF_TYPE)&local_iov_array[0].process_id; + base = d[0]; + for (i=0 ; i<3 ; i++) { + d[i] -= base; } - for(i=0;if_procs_per_group;i++){ - current_index[i] = 0; - bytes_remaining[i] = 0; - blocklen_per_process[i] = NULL; - displs_per_process[i] = NULL; + /* io_array datatype for using in communication*/ + types[0] = &ompi_mpi_long.dt; + types[1] = &ompi_mpi_long.dt; + types[2] = &ompi_mpi_int.dt; + + ompi_datatype_create_struct (3, + blocklen, + d, + types, + &io_array_type); + ompi_datatype_commit (&io_array_type); + + /* #########################################################*/ + fh->f_get_bytes_per_agg ( (int*) &bytes_per_cycle); + local_cycles = ceil((double)max_data*fh->f_procs_per_group/bytes_per_cycle); + +#if OMPIO_FCOLL_WANT_TIME_BREAKDOWN + start_rexch = MPI_Wtime(); +#endif + ret = fh->f_comm->c_coll.coll_allreduce (&local_cycles, + &cycles, + 1, + MPI_INT, + MPI_MAX, + fh->f_comm, + fh->f_comm->c_coll.coll_allreduce_module); + + if (OMPI_SUCCESS != ret){ + goto exit; } - } - - - iovec_count_per_process = (int *) malloc (fh->f_procs_per_group * sizeof(int)); - if (NULL == iovec_count_per_process){ - opal_output (1, "OUT OF MEMORY\n"); - ret = OMPI_ERR_OUT_OF_RESOURCE; - goto exit; - } - - displs = (int *) malloc (fh->f_procs_per_group * sizeof(int)); - if (NULL == displs){ - opal_output (1, "OUT OF MEMORY\n"); - ret = OMPI_ERR_OUT_OF_RESOURCE; - goto exit; - } - - - ret = fh->f_allgather_array (&iov_size, - 1, - MPI_INT, - iovec_count_per_process, - 1, - MPI_INT, - fh->f_aggregator_index, - fh->f_procs_in_group, - fh->f_procs_per_group, - fh->f_comm); - - if( OMPI_SUCCESS != ret){ - goto exit; - } - - if (fh->f_procs_in_group[fh->f_aggregator_index] == fh->f_rank) { - displs[0] = 0; - global_iov_count = iovec_count_per_process[0]; - for (i=1 ; if_procs_per_group ; i++) { - global_iov_count += iovec_count_per_process[i]; - displs[i] = displs[i-1] + iovec_count_per_process[i-1]; +#if OMPIO_FCOLL_WANT_TIME_BREAKDOWN + end_rcomm_time = MPI_Wtime(); + rcomm_time += end_rcomm_time - start_rcomm_time; +#endif + + + if (my_aggregator == fh->f_rank) { + disp_index = (int *) malloc (fh->f_procs_per_group * sizeof(int)); + if (NULL == disp_index) { + opal_output (1, "OUT OF MEMORY\n"); + ret = OMPI_ERR_OUT_OF_RESOURCE; + goto exit; + } + + bytes_per_process = (int *) malloc (fh->f_procs_per_group * sizeof(int )); + if (NULL == bytes_per_process){ + opal_output (1, "OUT OF MEMORY\n"); + ret = OMPI_ERR_OUT_OF_RESOURCE; + goto exit; + } + + bytes_remaining = (int *) calloc (fh->f_procs_per_group, sizeof(int)); + if (NULL == bytes_remaining){ + opal_output (1, "OUT OF MEMORY\n"); + ret = OMPI_ERR_OUT_OF_RESOURCE; + goto exit; + } + + current_index = (int *) calloc (fh->f_procs_per_group, sizeof(int)); + if (NULL == current_index){ + opal_output (1, "OUT OF MEMORY\n"); + ret = OMPI_ERR_OUT_OF_RESOURCE; + goto exit; + } + + blocklen_per_process = (int **)calloc (fh->f_procs_per_group, sizeof (int*)); + if (NULL == blocklen_per_process) { + opal_output (1, "OUT OF MEMORY\n"); + ret = OMPI_ERR_OUT_OF_RESOURCE; + goto exit; + } + + displs_per_process = (MPI_Aint **)calloc (fh->f_procs_per_group, sizeof (MPI_Aint*)); + if (NULL == displs_per_process) { + opal_output (1, "OUT OF MEMORY\n"); + ret = OMPI_ERR_OUT_OF_RESOURCE; + goto exit; + } } - } - - - if ( (fh->f_procs_in_group[fh->f_aggregator_index] == fh->f_rank) && - (global_iov_count > 0 )) { - global_iov_array = (local_io_array *) malloc (global_iov_count * - sizeof(local_io_array)); - if (NULL == global_iov_array){ - opal_output (1, "OUT OF MEMORY\n"); - ret = OMPI_ERR_OUT_OF_RESOURCE; - goto exit; + + + iovec_count_per_process = (int *) malloc (fh->f_procs_per_group * sizeof(int)); + if (NULL == iovec_count_per_process){ + opal_output (1, "OUT OF MEMORY\n"); + ret = OMPI_ERR_OUT_OF_RESOURCE; + goto exit; } - } - - ret = fh->f_gatherv_array (local_iov_array, - iov_size, - io_array_type, - global_iov_array, - iovec_count_per_process, - displs, - io_array_type, - fh->f_aggregator_index, - fh->f_procs_in_group, - fh->f_procs_per_group, - fh->f_comm); - - if (OMPI_SUCCESS != ret){ - fprintf(stderr,"global_iov_array gather error!\n"); - goto exit; - } - - - if (NULL != local_iov_array){ - free(local_iov_array); - local_iov_array = NULL; - } - - if ( ( fh->f_procs_in_group[fh->f_aggregator_index] == fh->f_rank) && - ( global_iov_count > 0 )) { - sorted = (int *)malloc (global_iov_count * sizeof(int)); - if (NULL == sorted) { - opal_output (1, "OUT OF MEMORY\n"); - ret = OMPI_ERR_OUT_OF_RESOURCE; - goto exit; + + displs = (int *) malloc (fh->f_procs_per_group * sizeof(int)); + if (NULL == displs){ + opal_output (1, "OUT OF MEMORY\n"); + ret = OMPI_ERR_OUT_OF_RESOURCE; + goto exit; } - read_local_heap_sort (global_iov_array, global_iov_count, sorted); - } - -#if DEBUG_ON - if (fh->f_procs_in_group[fh->f_aggregator_index] == fh->f_rank) { - for (gc_in=0; gc_inf_allgather_array (&iov_size, + 1, + MPI_INT, + iovec_count_per_process, + 1, + MPI_INT, + fh->f_aggregator_index, + fh->f_procs_in_group, + fh->f_procs_per_group, + fh->f_comm); + + if( OMPI_SUCCESS != ret){ + goto exit; } - } +#if OMPIO_FCOLL_WANT_TIME_BREAKDOWN + end_rcomm_time = MPI_Wtime(); + rcomm_time += end_rcomm_time - start_rcomm_time; #endif - -#if TIME_BREAKDOWN + + if (my_aggregator == fh->f_rank) { + displs[0] = 0; + global_iov_count = iovec_count_per_process[0]; + for (i=1 ; if_procs_per_group ; i++) { + global_iov_count += iovec_count_per_process[i]; + displs[i] = displs[i-1] + iovec_count_per_process[i-1]; + } + } + + + if ( (my_aggregator == fh->f_rank) && + (global_iov_count > 0 )) { + global_iov_array = (mca_fcoll_static_local_io_array *) malloc (global_iov_count * + sizeof(mca_fcoll_static_local_io_array)); + if (NULL == global_iov_array){ + opal_output (1, "OUT OF MEMORY\n"); + ret = OMPI_ERR_OUT_OF_RESOURCE; + goto exit; + } + } + +#if OMPIO_FCOLL_WANT_TIME_BREAKDOWN start_rexch = MPI_Wtime(); #endif + ret = fh->f_gatherv_array (local_iov_array, + iov_size, + io_array_type, + global_iov_array, + iovec_count_per_process, + displs, + io_array_type, + fh->f_aggregator_index, + fh->f_procs_in_group, + fh->f_procs_per_group, + fh->f_comm); - for (index = 0; index < cycles; index++){ - - - if (fh->f_procs_in_group[fh->f_aggregator_index] == fh->f_rank) { - - if (NULL == sendtype){ - sendtype = (ompi_datatype_t **) - malloc (fh->f_procs_per_group * sizeof(ompi_datatype_t *)); - if (NULL == sendtype) { - opal_output (1, "OUT OF MEMORY\n"); - ret = OMPI_ERR_OUT_OF_RESOURCE; - goto exit; - } - } - - if (NULL == bytes_per_process){ - bytes_per_process = (int *) malloc (fh->f_procs_per_group * sizeof(int)); - if (NULL == bytes_per_process){ - opal_output (1, "OUT OF MEMORY\n"); - ret = OMPI_ERR_OUT_OF_RESOURCE; - goto exit; - } - } - - for(l=0;lf_procs_per_group;l++){ - disp_index[l] = 1; - if (NULL != blocklen_per_process[l]){ - free(blocklen_per_process[l]); - blocklen_per_process[l] = NULL; - } - if (NULL != displs_per_process[l]){ - free(displs_per_process[l]); - displs_per_process[l] = NULL; - } - blocklen_per_process[l] = (int *) calloc (1, sizeof(int)); - if (NULL == blocklen_per_process[l]) { - opal_output (1, "OUT OF MEMORY for blocklen\n"); - ret = OMPI_ERR_OUT_OF_RESOURCE; - goto exit; - } - displs_per_process[l] = (MPI_Aint *) calloc (1, sizeof(MPI_Aint)); - if (NULL == displs_per_process[l]){ - opal_output (1, "OUT OF MEMORY for displs\n"); - ret = OMPI_ERR_OUT_OF_RESOURCE; - goto exit; - } - } - } - if (local_cycles > index) { - if ((index == local_cycles-1) && (max_data % bytes_per_cycle)) { - bytes_to_read_in_cycle = max_data % bytes_per_cycle; - } - else if (max_data <= bytes_per_cycle) { - bytes_to_read_in_cycle = max_data; - } - else { - bytes_to_read_in_cycle = bytes_per_cycle; - } + if (OMPI_SUCCESS != ret){ + fprintf(stderr,"global_iov_array gather error!\n"); + goto exit; } - else { - bytes_to_read_in_cycle = 0; +#if OMPIO_FCOLL_WANT_TIME_BREAKDOWN + end_rcomm_time = MPI_Wtime(); + rcomm_time += end_rcomm_time - start_rcomm_time; +#endif + + + if (NULL != local_iov_array){ + free(local_iov_array); + local_iov_array = NULL; } - fh->f_gather_array (&bytes_to_read_in_cycle, - 1, - MPI_INT, - bytes_per_process, - 1, - MPI_INT, - fh->f_aggregator_index, - fh->f_procs_in_group, - fh->f_procs_per_group, - fh->f_comm); - - if (fh->f_flags & OMPIO_CONTIGUOUS_MEMORY) { - receive_buf = &((char*)buf)[position]; + + if ( ( my_aggregator == fh->f_rank) && + ( global_iov_count > 0 )) { + sorted = (int *)malloc (global_iov_count * sizeof(int)); + if (NULL == sorted) { + opal_output (1, "OUT OF MEMORY\n"); + ret = OMPI_ERR_OUT_OF_RESOURCE; + goto exit; + } + read_local_heap_sort (global_iov_array, global_iov_count, sorted); + + send_req = (MPI_Request *) malloc (fh->f_procs_per_group * sizeof(MPI_Request)); + if (NULL == send_req){ + opal_output ( 1, "OUT OF MEMORY\n"); + ret = OMPI_ERR_OUT_OF_RESOURCE; + goto exit; + } + + sendtype = (ompi_datatype_t **) malloc (fh->f_procs_per_group * sizeof(ompi_datatype_t *)); + if (NULL == sendtype) { + opal_output (1, "OUT OF MEMORY\n"); + ret = OMPI_ERR_OUT_OF_RESOURCE; + goto exit; + } + for ( i=0; if_procs_per_group; i++ ) { + sendtype[i] = MPI_DATATYPE_NULL; + } + + if (NULL == bytes_per_process){ + bytes_per_process = (int *) malloc (fh->f_procs_per_group * sizeof(int)); + if (NULL == bytes_per_process){ + opal_output (1, "OUT OF MEMORY\n"); + ret = OMPI_ERR_OUT_OF_RESOURCE; + goto exit; + } + } } - else if (bytes_to_read_in_cycle) { - receive_buf = (char *) malloc (bytes_to_read_in_cycle * sizeof(char)); - if ( NULL == receive_buf){ - opal_output (1, "OUT OF MEMORY\n"); - ret = OMPI_ERR_OUT_OF_RESOURCE; - goto exit; - } - } - - recv_req = (MPI_Request *) malloc (sizeof (MPI_Request)); - if (NULL == recv_req){ - opal_output (1, "OUT OF MEMORY\n"); - ret = OMPI_ERR_OUT_OF_RESOURCE; - goto exit; + +#if DEBUG_ON + + if (my_aggregator == fh->f_rank) { + for (gc_in=0; gc_inf_procs_in_group[fh->f_aggregator_index], - 123, - fh->f_comm, - recv_req)); - if (OMPI_SUCCESS != ret){ - goto exit; - } -#if TIME_BREAKDOWN - end_rcomm_time = MPI_Wtime(); - rcomm_time += end_rcomm_time - start_rcomm_time; + for (index = 0; index < cycles; index++){ + + if (my_aggregator == fh->f_rank) { + + fh->f_num_of_io_entries = 0; + if (NULL != fh->f_io_array) { + free (fh->f_io_array); + fh->f_io_array = NULL; + } + if (NULL != global_buf) { + free (global_buf); + global_buf = NULL; + } + + if (NULL != sorted_file_offsets){ + free(sorted_file_offsets); + sorted_file_offsets = NULL; + } + if (NULL != file_offsets_for_agg){ + free(file_offsets_for_agg); + file_offsets_for_agg = NULL; + } + if (NULL != memory_displacements){ + free(memory_displacements); + memory_displacements= NULL; + } + + if ( NULL != sendtype ) { + for ( i=0; if_procs_per_group; i++ ) { + if ( MPI_DATATYPE_NULL != sendtype[i] ) { + ompi_datatype_destroy (&sendtype[i] ); + sendtype[i] = MPI_DATATYPE_NULL; + } + } + } + + for(l=0;lf_procs_per_group;l++){ + disp_index[l] = 1; + if (NULL != blocklen_per_process[l]){ + free(blocklen_per_process[l]); + blocklen_per_process[l] = NULL; + } + if (NULL != displs_per_process[l]){ + free(displs_per_process[l]); + displs_per_process[l] = NULL; + } + blocklen_per_process[l] = (int *) calloc (1, sizeof(int)); + if (NULL == blocklen_per_process[l]) { + opal_output (1, "OUT OF MEMORY for blocklen\n"); + ret = OMPI_ERR_OUT_OF_RESOURCE; + goto exit; + } + displs_per_process[l] = (MPI_Aint *) calloc (1, sizeof(MPI_Aint)); + if (NULL == displs_per_process[l]){ + opal_output (1, "OUT OF MEMORY for displs\n"); + ret = OMPI_ERR_OUT_OF_RESOURCE; + goto exit; + } + } + } + + if (index < local_cycles ) { + if ((index == local_cycles-1) && (max_data % (bytes_per_cycle/fh->f_procs_per_group))) { + bytes_to_read_in_cycle = max_data - position; + } + else if (max_data <= bytes_per_cycle/fh->f_procs_per_group) { + bytes_to_read_in_cycle = max_data; + } + else { + bytes_to_read_in_cycle = bytes_per_cycle/fh->f_procs_per_group; + } + } + else { + bytes_to_read_in_cycle = 0; + } + +#if OMPIO_FCOLL_WANT_TIME_BREAKDOWN + start_rexch = MPI_Wtime(); #endif - - - if (fh->f_procs_in_group[fh->f_aggregator_index] == fh->f_rank) { - for (i=0;if_procs_per_group; i++){ - while (bytes_per_process[i] > 0){ - /*printf("%d: bytes_per_process[%d]: %d, bytes_remaining[%d]: %d\n", - index, i, bytes_per_process[i], i, bytes_remaining[i]);*/ - if (read_get_process_id(global_iov_array[sorted[current_index[i]]].process_id, - fh) == i){ /* current id owns this entry!*/ - if (bytes_remaining[i]){ /*Remaining bytes in the current entry of - the global offset array*/ - if (bytes_remaining[i] <= bytes_per_process[i]){ - - blocklen_per_process[i][disp_index[i] - 1] = bytes_remaining[i]; - displs_per_process[i][disp_index[i] - 1] = - global_iov_array[sorted[current_index[i]]].offset + - (global_iov_array[sorted[current_index[i]]].length - - bytes_remaining[i]); - blocklen_per_process[i] = (int *) realloc - ((void *)blocklen_per_process[i], (disp_index[i]+1)*sizeof(int)); - displs_per_process[i] = (MPI_Aint *)realloc - ((void *)displs_per_process[i], (disp_index[i]+1)*sizeof(MPI_Aint)); - bytes_per_process[i] -= bytes_remaining[i]; - blocklen_per_process[i][disp_index[i]] = 0; - displs_per_process[i][disp_index[i]] = 0; - disp_index[i] += 1; - bytes_remaining[i] = 0; - /* This entry has been used up, we need to move to the - next entry of this process and make current_index point there*/ - current_index[i] = read_find_next_index(i, - current_index[i], - fh, - global_iov_array, - global_iov_count, - sorted); - if (current_index[i] == -1){ - break; - } - continue; - } - else{ - blocklen_per_process[i][disp_index[i] - 1] = bytes_per_process[i]; - displs_per_process[i][disp_index[i] - 1] = - global_iov_array[sorted[current_index[i]]].offset + - (global_iov_array[sorted[current_index[i]]].length - - bytes_remaining[i]); - bytes_remaining[i] -= bytes_per_process[i]; - bytes_per_process[i] = 0; - break; - } - } - else{ - if (bytes_per_process[i] < - global_iov_array[sorted[current_index[i]]].length){ - blocklen_per_process[i][disp_index[i] - 1] = - bytes_per_process[i]; - displs_per_process[i][disp_index[i] - 1] = - global_iov_array[sorted[current_index[i]]].offset; - bytes_remaining[i] = - global_iov_array[sorted[current_index[i]]].length - - bytes_per_process[i]; - bytes_per_process[i] = 0; - break; - } - else { - blocklen_per_process[i][disp_index[i] - 1] = - global_iov_array[sorted[current_index[i]]].length; - displs_per_process[i][disp_index[i] - 1] = - global_iov_array[sorted[current_index[i]]].offset; - blocklen_per_process[i] = - (int *) realloc ((void *)blocklen_per_process[i], (disp_index[i]+1)*sizeof(int)); - displs_per_process[i] = (MPI_Aint *)realloc - ((void *)displs_per_process[i], (disp_index[i]+1)*sizeof(MPI_Aint)); - blocklen_per_process[i][disp_index[i]] = 0; - displs_per_process[i][disp_index[i]] = 0; - disp_index[i] += 1; - bytes_per_process[i] -= - global_iov_array[sorted[current_index[i]]].length; - current_index[i] = read_find_next_index(i, - current_index[i], - fh, - global_iov_array, - global_iov_count, - sorted); - if (current_index[i] == -1){ - break; - } - } - } - } - else{ - current_index[i] = read_find_next_index(i, - current_index[i], - fh, - global_iov_array, - global_iov_count, - sorted); - if (current_index[i] == -1){ - bytes_per_process = 0; /* no more entries left - to service this request*/ - continue; - } - } - } - } - - entries_per_aggregator=0; - for (i=0;if_procs_per_group;i++){ - for (j=0;j 0){ - entries_per_aggregator++; + fh->f_gather_array (&bytes_to_read_in_cycle, + 1, + MPI_INT, + bytes_per_process, + 1, + MPI_INT, + fh->f_aggregator_index, + fh->f_procs_in_group, + fh->f_procs_per_group, + fh->f_comm); + +#if OMPIO_FCOLL_WANT_TIME_BREAKDOWN + end_rcomm_time = MPI_Wtime(); + rcomm_time += end_rcomm_time - start_rcomm_time; +#endif + + if (recvbuf_is_contiguous ) { + receive_buf = &((char*)buf)[position]; + } + else if (bytes_to_read_in_cycle) { + receive_buf = (char *) malloc (bytes_to_read_in_cycle * sizeof(char)); + if ( NULL == receive_buf){ + opal_output (1, "OUT OF MEMORY\n"); + ret = OMPI_ERR_OUT_OF_RESOURCE; + goto exit; + } + } + + +#if OMPIO_FCOLL_WANT_TIME_BREAKDOWN + start_rcomm_time = MPI_Wtime(); +#endif + + ret = MCA_PML_CALL(irecv(receive_buf, + bytes_to_read_in_cycle, + MPI_BYTE, + my_aggregator, + 123, + fh->f_comm, + &recv_req)); + if (OMPI_SUCCESS != ret){ + goto exit; + } + +#if OMPIO_FCOLL_WANT_TIME_BREAKDOWN + end_rcomm_time = MPI_Wtime(); + rcomm_time += end_rcomm_time - start_rcomm_time; +#endif + + + if (my_aggregator == fh->f_rank) { + for (i=0;if_procs_per_group; i++){ + while (bytes_per_process[i] > 0){ + /*printf("%d: bytes_per_process[%d]: %d, bytes_remaining[%d]: %d\n", + index, i, bytes_per_process[i], i, bytes_remaining[i]);*/ + if (read_get_process_id(global_iov_array[sorted[current_index[i]]].process_id, + fh) == i){ /* current id owns this entry!*/ + if (bytes_remaining[i]){ /*Remaining bytes in the current entry of + the global offset array*/ + if (bytes_remaining[i] <= bytes_per_process[i]){ + + blocklen_per_process[i][disp_index[i] - 1] = bytes_remaining[i]; + displs_per_process[i][disp_index[i] - 1] = + global_iov_array[sorted[current_index[i]]].offset + + (global_iov_array[sorted[current_index[i]]].length + - bytes_remaining[i]); + blocklen_per_process[i] = (int *) realloc + ((void *)blocklen_per_process[i], (disp_index[i]+1)*sizeof(int)); + displs_per_process[i] = (MPI_Aint *)realloc + ((void *)displs_per_process[i], (disp_index[i]+1)*sizeof(MPI_Aint)); + bytes_per_process[i] -= bytes_remaining[i]; + blocklen_per_process[i][disp_index[i]] = 0; + displs_per_process[i][disp_index[i]] = 0; + disp_index[i] += 1; + bytes_remaining[i] = 0; + /* This entry has been used up, we need to move to the + next entry of this process and make current_index point there*/ + current_index[i] = read_find_next_index(i, + current_index[i], + fh, + global_iov_array, + global_iov_count, + sorted); + if (current_index[i] == -1){ + break; + } + continue; + } + else{ + blocklen_per_process[i][disp_index[i] - 1] = bytes_per_process[i]; + displs_per_process[i][disp_index[i] - 1] = + global_iov_array[sorted[current_index[i]]].offset + + (global_iov_array[sorted[current_index[i]]].length + - bytes_remaining[i]); + bytes_remaining[i] -= bytes_per_process[i]; + bytes_per_process[i] = 0; + break; + } + } + else{ + if (bytes_per_process[i] < + global_iov_array[sorted[current_index[i]]].length){ + blocklen_per_process[i][disp_index[i] - 1] = + bytes_per_process[i]; + displs_per_process[i][disp_index[i] - 1] = + global_iov_array[sorted[current_index[i]]].offset; + bytes_remaining[i] = + global_iov_array[sorted[current_index[i]]].length - + bytes_per_process[i]; + bytes_per_process[i] = 0; + break; + } + else { + blocklen_per_process[i][disp_index[i] - 1] = + global_iov_array[sorted[current_index[i]]].length; + displs_per_process[i][disp_index[i] - 1] = + global_iov_array[sorted[current_index[i]]].offset; + blocklen_per_process[i] = + (int *) realloc ((void *)blocklen_per_process[i], (disp_index[i]+1)*sizeof(int)); + displs_per_process[i] = (MPI_Aint *)realloc + ((void *)displs_per_process[i], (disp_index[i]+1)*sizeof(MPI_Aint)); + blocklen_per_process[i][disp_index[i]] = 0; + displs_per_process[i][disp_index[i]] = 0; + disp_index[i] += 1; + bytes_per_process[i] -= + global_iov_array[sorted[current_index[i]]].length; + current_index[i] = read_find_next_index(i, + current_index[i], + fh, + global_iov_array, + global_iov_count, + sorted); + if (current_index[i] == -1){ + break; + } + } + } + } + else{ + current_index[i] = read_find_next_index(i, + current_index[i], + fh, + global_iov_array, + global_iov_count, + sorted); + if (current_index[i] == -1){ + bytes_per_process[i] = 0; /* no more entries left + to service this request*/ + continue; + } + } + } + } + + entries_per_aggregator=0; + for (i=0;if_procs_per_group;i++){ + for (j=0;j 0){ + entries_per_aggregator++; #if DEBUG_ON - printf("%d sends blocklen[%d]: %d, disp[%d]: %ld to %d\n", - fh->f_procs_in_group[i],j, - blocklen_per_process[i][j],j, - displs_per_process[i][j], - fh->f_rank); - + printf("%d sends blocklen[%d]: %d, disp[%d]: %ld to %d\n", + fh->f_procs_in_group[i],j, + blocklen_per_process[i][j],j, + displs_per_process[i][j], + fh->f_rank); + #endif - } - } - } - - if (entries_per_aggregator > 0){ - file_offsets_for_agg = (local_io_array *) - malloc(entries_per_aggregator*sizeof(local_io_array)); - if (NULL == file_offsets_for_agg) { - opal_output (1, "OUT OF MEMORY\n"); - ret = OMPI_ERR_OUT_OF_RESOURCE; - goto exit; - } - sorted_file_offsets = (int *) - malloc (entries_per_aggregator * sizeof(int)); - if (NULL == sorted_file_offsets){ - opal_output (1, "OUT OF MEMORY\n"); - ret = OMPI_ERR_OUT_OF_RESOURCE; - goto exit; - } - temp_index=0; - global_count = 0; - for (i=0;if_procs_per_group; i++){ - for(j=0;j 0){ - file_offsets_for_agg[temp_index].length = - blocklen_per_process[i][j]; - global_count += blocklen_per_process[i][j]; - file_offsets_for_agg[temp_index].process_id = i; - file_offsets_for_agg[temp_index].offset = - displs_per_process[i][j]; - temp_index++; - } - } - } - } - else{ - continue; - } - read_local_heap_sort (file_offsets_for_agg, - entries_per_aggregator, - sorted_file_offsets); - memory_displacements = (MPI_Aint *) malloc - (entries_per_aggregator * sizeof(MPI_Aint)); - memory_displacements[sorted_file_offsets[0]] = 0; - for (i=1; i 0){ + file_offsets_for_agg = (mca_fcoll_static_local_io_array *) + malloc(entries_per_aggregator*sizeof(mca_fcoll_static_local_io_array)); + if (NULL == file_offsets_for_agg) { + opal_output (1, "OUT OF MEMORY\n"); + ret = OMPI_ERR_OUT_OF_RESOURCE; + goto exit; + } + sorted_file_offsets = (int *) malloc (entries_per_aggregator * sizeof(int)); + if (NULL == sorted_file_offsets){ + opal_output (1, "OUT OF MEMORY\n"); + ret = OMPI_ERR_OUT_OF_RESOURCE; + goto exit; + } + temp_index=0; + global_count = 0; + for (i=0;if_procs_per_group; i++){ + for(j=0;j 0){ + file_offsets_for_agg[temp_index].length = + blocklen_per_process[i][j]; + global_count += blocklen_per_process[i][j]; + file_offsets_for_agg[temp_index].process_id = i; + file_offsets_for_agg[temp_index].offset = + displs_per_process[i][j]; + temp_index++; + } + } + } + } + else{ + continue; + } + read_local_heap_sort (file_offsets_for_agg, + entries_per_aggregator, + sorted_file_offsets); + memory_displacements = (MPI_Aint *) malloc + (entries_per_aggregator * sizeof(MPI_Aint)); + memory_displacements[sorted_file_offsets[0]] = 0; + for (i=1; if_rank); - for (i=0; if_rank); + for (i=0; if_io_array = (mca_io_ompio_io_array_t *) malloc - (entries_per_aggregator * sizeof (mca_io_ompio_io_array_t)); - if (NULL == fh->f_io_array) { - opal_output(1, "OUT OF MEMORY\n"); - ret = OMPI_ERR_OUT_OF_RESOURCE; - goto exit; - } - - - - fh->f_num_of_io_entries = 0; - fh->f_io_array[fh->f_num_of_io_entries].offset = - (IOVBASE_TYPE *)(intptr_t)file_offsets_for_agg[sorted_file_offsets[0]].offset; - fh->f_io_array[fh->f_num_of_io_entries].length = - file_offsets_for_agg[sorted_file_offsets[0]].length; - fh->f_io_array[fh->f_num_of_io_entries].memory_address = - global_buf+memory_displacements[sorted_file_offsets[0]]; - fh->f_num_of_io_entries++; - for (i=1;if_io_array[fh->f_num_of_io_entries - 1].length += - file_offsets_for_agg[sorted_file_offsets[i]].length; - } - else{ - fh->f_io_array[fh->f_num_of_io_entries].offset = - (IOVBASE_TYPE *)(intptr_t)file_offsets_for_agg[sorted_file_offsets[i]].offset; - fh->f_io_array[fh->f_num_of_io_entries].length = - file_offsets_for_agg[sorted_file_offsets[i]].length; - fh->f_io_array[fh->f_num_of_io_entries].memory_address = - global_buf+memory_displacements[sorted_file_offsets[i]]; - fh->f_num_of_io_entries++; - } - } - + + fh->f_io_array = (mca_io_ompio_io_array_t *) malloc + (entries_per_aggregator * sizeof (mca_io_ompio_io_array_t)); + if (NULL == fh->f_io_array) { + opal_output(1, "OUT OF MEMORY\n"); + ret = OMPI_ERR_OUT_OF_RESOURCE; + goto exit; + } + + + + fh->f_num_of_io_entries = 0; + fh->f_io_array[0].offset = + (IOVBASE_TYPE *)(intptr_t)file_offsets_for_agg[sorted_file_offsets[0]].offset; + fh->f_io_array[0].length = file_offsets_for_agg[sorted_file_offsets[0]].length; + fh->f_io_array[0].memory_address = global_buf+memory_displacements[sorted_file_offsets[0]]; + fh->f_num_of_io_entries++; + for (i=1;if_io_array[fh->f_num_of_io_entries - 1].length += + file_offsets_for_agg[sorted_file_offsets[i]].length; + } + else{ + fh->f_io_array[fh->f_num_of_io_entries].offset = + (IOVBASE_TYPE *)(intptr_t)file_offsets_for_agg[sorted_file_offsets[i]].offset; + fh->f_io_array[fh->f_num_of_io_entries].length = + file_offsets_for_agg[sorted_file_offsets[i]].length; + fh->f_io_array[fh->f_num_of_io_entries].memory_address = + global_buf+memory_displacements[sorted_file_offsets[i]]; + fh->f_num_of_io_entries++; + } + } + #if DEBUG_ON - printf("*************************** %d\n", fh->f_num_of_io_entries); - for (i=0 ; if_num_of_io_entries ; i++) { - printf(" ADDRESS: %p OFFSET: %ld LENGTH: %ld\n", - fh->f_io_array[i].memory_address, - (OPAL_PTRDIFF_TYPE)fh->f_io_array[i].offset, - fh->f_io_array[i].length); - } + printf("*************************** %d\n", fh->f_num_of_io_entries); + for (i=0 ; if_num_of_io_entries ; i++) { + printf(" ADDRESS: %p OFFSET: %ld LENGTH: %ld\n", + fh->f_io_array[i].memory_address, + (OPAL_PTRDIFF_TYPE)fh->f_io_array[i].offset, + fh->f_io_array[i].length); + } #endif -#if TIME_BREAKDOWN - start_read_time = MPI_Wtime(); +#if OMPIO_FCOLL_WANT_TIME_BREAKDOWN + start_read_time = MPI_Wtime(); #endif - if (fh->f_num_of_io_entries) { - if ( 0 > fh->f_fbtl->fbtl_preadv (fh)) { - opal_output (1, "READ FAILED\n"); - ret = OMPI_ERROR; - goto exit; - } - } - -#if TIME_BREAKDOWN - end_read_time = MPI_Wtime(); - read_time += end_read_time - start_read_time; + if (fh->f_num_of_io_entries) { + if ( 0 > fh->f_fbtl->fbtl_preadv (fh)) { + opal_output (1, "READ FAILED\n"); + ret = OMPI_ERROR; + goto exit; + } + } + +#if OMPIO_FCOLL_WANT_TIME_BREAKDOWN + end_read_time = MPI_Wtime(); + read_time += end_read_time - start_read_time; #endif #if DEBUG_ON - printf("************Cycle: %d, Aggregator: %d ***************\n", - index+1,fh->f_rank); - if (fh->f_procs_in_group[fh->f_aggregator_index] == fh->f_rank){ - for (i=0 ; if_procs_per_group * sizeof (int)); - if (NULL == temp_disp_index) { - opal_output (1, "OUT OF MEMORY\n"); - ret = OMPI_ERR_OUT_OF_RESOURCE; - goto exit; - } - - for (i=0; if_procs_per_group * sizeof(MPI_Request)); - if (NULL == send_req){ - opal_output ( 1, "OUT OF MEMORY\n"); - ret = OMPI_ERR_OUT_OF_RESOURCE; - goto exit; - } -#if TIME_BREAKDOWN - start_rcomm_time = MPI_Wtime(); + printf("************Cycle: %d, Aggregator: %d ***************\n", + index+1,fh->f_rank); + if (my_aggregator == fh->f_rank){ + for (i=0 ; if_procs_per_group; i++){ - ompi_datatype_create_hindexed(disp_index[i], - blocklen_per_process[i], - displs_per_process[i], - MPI_BYTE, - &sendtype[i]); - ompi_datatype_commit(&sendtype[i]); - ret = MCA_PML_CALL (isend(global_buf, - 1, - sendtype[i], - fh->f_procs_in_group[i], - 123, - MCA_PML_BASE_SEND_STANDARD, - fh->f_comm, - &send_req[i])); - if(OMPI_SUCCESS != ret){ - goto exit; - } - } - - ret = ompi_request_wait_all (fh->f_procs_per_group, - send_req, - MPI_STATUS_IGNORE); - if (OMPI_SUCCESS != ret){ - goto exit; - } - } - - ret = ompi_request_wait (recv_req, MPI_STATUS_IGNORE); - if (OMPI_SUCCESS != ret){ - goto exit; - } + temp_disp_index = (int *)calloc (1, fh->f_procs_per_group * sizeof (int)); + if (NULL == temp_disp_index) { + opal_output (1, "OUT OF MEMORY\n"); + ret = OMPI_ERR_OUT_OF_RESOURCE; + goto exit; + } + + for (i=0; if_flags & OMPIO_CONTIGUOUS_MEMORY)) { - OPAL_PTRDIFF_TYPE mem_address; - size_t remaining = 0; - size_t temp_position = 0; - - remaining = bytes_to_read_in_cycle; - - while (remaining && (iov_count > iov_index)){ - mem_address = (OPAL_PTRDIFF_TYPE) - (decoded_iov[iov_index].iov_base) + current_position; - - if (remaining >= - (decoded_iov[iov_index].iov_len - current_position)) { - memcpy ((IOVBASE_TYPE *) mem_address, - receive_buf+temp_position, - decoded_iov[iov_index].iov_len - current_position); - remaining = remaining - - (decoded_iov[iov_index].iov_len - current_position); - temp_position = temp_position + - (decoded_iov[iov_index].iov_len - current_position); - iov_index = iov_index + 1; - current_position = 0; - } - else{ - memcpy ((IOVBASE_TYPE *) mem_address, - receive_buf+temp_position, - remaining); - current_position = current_position + remaining; - remaining = 0; - } - } - if (NULL != receive_buf) { - free (receive_buf); - receive_buf = NULL; - } - } + for (i=0;if_procs_per_group; i++){ + send_req[i] = MPI_REQUEST_NULL; + ompi_datatype_create_hindexed(disp_index[i], + blocklen_per_process[i], + displs_per_process[i], + MPI_BYTE, + &sendtype[i]); + ompi_datatype_commit(&sendtype[i]); + ret = MCA_PML_CALL (isend(global_buf, + 1, + sendtype[i], + fh->f_procs_in_group[i], + 123, + MCA_PML_BASE_SEND_STANDARD, + fh->f_comm, + &send_req[i])); + if(OMPI_SUCCESS != ret){ + goto exit; + } + } - if (NULL != recv_req){ - free(recv_req); - recv_req = NULL; - } + ret = ompi_request_wait_all (fh->f_procs_per_group, + send_req, + MPI_STATUS_IGNORE); + if (OMPI_SUCCESS != ret){ + goto exit; + } + } /* if ( my_aggregator == fh->f_rank ) */ + + ret = ompi_request_wait (&recv_req, MPI_STATUS_IGNORE); + if (OMPI_SUCCESS != ret){ + goto exit; + } + +#if OMPIO_FCOLL_WANT_TIME_BREAKDOWN + end_rcomm_time = MPI_Wtime(); + rcomm_time += end_rcomm_time - start_rcomm_time; +#endif + + position += bytes_to_read_in_cycle; + + if (!recvbuf_is_contiguous) { + OPAL_PTRDIFF_TYPE mem_address; + size_t remaining = 0; + size_t temp_position = 0; + + remaining = bytes_to_read_in_cycle; + + while (remaining && (iov_count > iov_index)){ + mem_address = (OPAL_PTRDIFF_TYPE) + (decoded_iov[iov_index].iov_base) + current_position; + + if (remaining >= + (decoded_iov[iov_index].iov_len - current_position)) { + memcpy ((IOVBASE_TYPE *) mem_address, + receive_buf+temp_position, + decoded_iov[iov_index].iov_len - current_position); + remaining = remaining - + (decoded_iov[iov_index].iov_len - current_position); + temp_position = temp_position + + (decoded_iov[iov_index].iov_len - current_position); + iov_index = iov_index + 1; + current_position = 0; + } + else{ + memcpy ((IOVBASE_TYPE *) mem_address, + receive_buf+temp_position, + remaining); + current_position = current_position + remaining; + remaining = 0; + } + } + if (NULL != receive_buf) { + free (receive_buf); + receive_buf = NULL; + } + } - if (fh->f_procs_in_group[fh->f_aggregator_index] == fh->f_rank) { - fh->f_num_of_io_entries = 0; - if (NULL != fh->f_io_array) { - free (fh->f_io_array); - fh->f_io_array = NULL; - } - for (i = 0; i < fh->f_procs_per_group; i++) - ompi_datatype_destroy(sendtype+i); - if (NULL != sendtype){ - free(sendtype); - sendtype=NULL; - } - if (NULL != send_req){ - free(send_req); - send_req = NULL; - } - if (NULL != global_buf) { - free (global_buf); - global_buf = NULL; - } - - - if (NULL != sorted_file_offsets){ - free(sorted_file_offsets); - sorted_file_offsets = NULL; - } - if (NULL != file_offsets_for_agg){ - free(file_offsets_for_agg); - file_offsets_for_agg = NULL; - } - if (NULL != bytes_per_process){ - free(bytes_per_process); - bytes_per_process =NULL; - } - if (NULL != memory_displacements){ - free(memory_displacements); - memory_displacements= NULL; - } } - } -#if TIME_BREAKDOWN +#if OMPIO_FCOLL_WANT_TIME_BREAKDOWN end_rexch = MPI_Wtime(); read_exch += end_rexch - start_rexch; nentry.time[0] = read_time; nentry.time[1] = rcomm_time; nentry.time[2] = read_exch; - if (fh->f_procs_in_group[fh->f_aggregator_index] == fh->f_rank) - nentry.aggregator = 1; + if (my_aggregator == fh->f_rank) + nentry.aggregator = 1; else - nentry.aggregator = 0; + nentry.aggregator = 0; nentry.nprocs_for_coll = static_num_io_procs; if (!fh->f_full_print_queue(READ_PRINT_QUEUE)){ - fh->f_register_print_entry(READ_PRINT_QUEUE, - nentry); - } + fh->f_register_print_entry(READ_PRINT_QUEUE, + nentry); + } #endif - - exit: - if (NULL != decoded_iov){ - free(decoded_iov); - decoded_iov = NULL; - } - - if (NULL != displs){ - free(displs); - displs = NULL; - } - - if (NULL != iovec_count_per_process){ - free(iovec_count_per_process); - iovec_count_per_process=NULL; - } - - if (NULL != local_iov_array){ - free(local_iov_array); - local_iov_array=NULL; - } - - if (NULL != global_iov_array){ - free(global_iov_array); - global_iov_array=NULL; - } - - if (fh->f_procs_in_group[fh->f_aggregator_index] == fh->f_rank) { - - for(l=0;lf_procs_per_group;l++){ - if (NULL != blocklen_per_process[l]){ - free(blocklen_per_process[l]); - blocklen_per_process[l] = NULL; - } - if (NULL != displs_per_process[l]){ - free(displs_per_process[l]); - displs_per_process[l] = NULL; - } + +exit: + if (NULL != decoded_iov){ + free(decoded_iov); + decoded_iov = NULL; } - } - - if (NULL != bytes_per_process){ - free(bytes_per_process); - bytes_per_process =NULL; - } - - if (NULL != disp_index){ - free(disp_index); - disp_index =NULL; - } - - if (NULL != displs_per_process){ - free(displs_per_process); - displs_per_process = NULL; - } - - if(NULL != bytes_remaining){ - free(bytes_remaining); - bytes_remaining = NULL; - } - - if(NULL != current_index){ - free(current_index); - current_index = NULL; - } - - if (NULL != blocklen_per_process){ - free(blocklen_per_process); - blocklen_per_process =NULL; - } - - if (NULL != bytes_remaining){ - free(bytes_remaining); - bytes_remaining =NULL; - } - - if (NULL != memory_displacements){ - free(memory_displacements); - memory_displacements= NULL; - } - - if (NULL != file_offsets_for_agg){ - free(file_offsets_for_agg); - file_offsets_for_agg = NULL; - } - - if (NULL != sorted_file_offsets){ - free(sorted_file_offsets); - sorted_file_offsets = NULL; - } - - if (NULL != sendtype){ - free(sendtype); - sendtype=NULL; - } - - if (NULL != receive_buf){ - free(receive_buf); - receive_buf=NULL; - } - - if (NULL != global_buf) { - free(global_buf); - global_buf = NULL; - } - - if (NULL != sorted) { - free(sorted); - sorted = NULL; - } - - return ret; - + + if (NULL != displs){ + free(displs); + displs = NULL; + } + + if (NULL != iovec_count_per_process){ + free(iovec_count_per_process); + iovec_count_per_process=NULL; + } + + if (NULL != local_iov_array){ + free(local_iov_array); + local_iov_array=NULL; + } + + if (NULL != global_iov_array){ + free(global_iov_array); + global_iov_array=NULL; + } + + if (my_aggregator == fh->f_rank) { + + for(l=0;lf_procs_per_group;l++){ + if (blocklen_per_process) { + free(blocklen_per_process[l]); + } + if (NULL != displs_per_process[l]){ + free(displs_per_process[l]); + displs_per_process[l] = NULL; + } + } + } + + if (NULL != bytes_per_process){ + free(bytes_per_process); + bytes_per_process =NULL; + } + + if (NULL != disp_index){ + free(disp_index); + disp_index =NULL; + } + + if (NULL != displs_per_process){ + free(displs_per_process); + displs_per_process = NULL; + } + + if(NULL != bytes_remaining){ + free(bytes_remaining); + bytes_remaining = NULL; + } + + if(NULL != current_index){ + free(current_index); + current_index = NULL; + } + + if (NULL != blocklen_per_process){ + free(blocklen_per_process); + blocklen_per_process =NULL; + } + + if (NULL != bytes_remaining){ + free(bytes_remaining); + bytes_remaining =NULL; + } + + if (NULL != memory_displacements){ + free(memory_displacements); + memory_displacements= NULL; + } + + if (NULL != file_offsets_for_agg){ + free(file_offsets_for_agg); + file_offsets_for_agg = NULL; + } + + if (NULL != sorted_file_offsets){ + free(sorted_file_offsets); + sorted_file_offsets = NULL; + } + + if (NULL != sendtype){ + free(sendtype); + sendtype=NULL; + } + + if ( !recvbuf_is_contiguous ) { + if (NULL != receive_buf){ + free(receive_buf); + receive_buf=NULL; + } + } + + if (NULL != global_buf) { + free(global_buf); + global_buf = NULL; + } + + if (NULL != sorted) { + free(sorted); + sorted = NULL; + } + + if (NULL != send_req){ + free(send_req); + send_req = NULL; + } + + + return ret; + } -int read_local_heap_sort (local_io_array *io_array, +int read_local_heap_sort (mca_fcoll_static_local_io_array *io_array, int num_entries, int *sorted) { - int i = 0; - int j = 0; - int left = 0; - int right = 0; - int largest = 0; - int heap_size = num_entries - 1; - int temp = 0; - unsigned char done = 0; - int* temp_arr = NULL; - - if ( 0 == num_entries ) { - return OMPI_SUCCESS; - } - - temp_arr = (int*)malloc(num_entries*sizeof(int)); - if (NULL == temp_arr) { - opal_output (1, "OUT OF MEMORY\n"); - return OMPI_ERR_OUT_OF_RESOURCE; - } - temp_arr[0] = 0; - for (i = 1; i < num_entries; ++i) { + int i = 0; + int j = 0; + int left = 0; + int right = 0; + int largest = 0; + int heap_size = num_entries - 1; + int temp = 0; + unsigned char done = 0; + int* temp_arr = NULL; + + if ( 0 == num_entries ) { + return OMPI_SUCCESS; + } + + temp_arr = (int*)malloc(num_entries*sizeof(int)); + if (NULL == temp_arr) { + opal_output (1, "OUT OF MEMORY\n"); + return OMPI_ERR_OUT_OF_RESOURCE; + } + temp_arr[0] = 0; + for (i = 1; i < num_entries; ++i) { temp_arr[i] = i; - } - /* num_entries can be a large no. so NO RECURSION */ - for (i = num_entries/2-1 ; i>=0 ; i--) { - done = 0; + } + /* num_entries can be a large no. so NO RECURSION */ + for (i = num_entries/2-1 ; i>=0 ; i--) { + done = 0; j = i; largest = j; while (!done) { left = j*2+1; right = j*2+2; - if ((left <= heap_size) && + if ((left <= heap_size) && (io_array[temp_arr[left]].offset > io_array[temp_arr[j]].offset)) { largest = left; } else { largest = j; } - if ((right <= heap_size) && - (io_array[temp_arr[right]].offset > + if ((right <= heap_size) && + (io_array[temp_arr[right]].offset > io_array[temp_arr[largest]].offset)) { largest = right; } @@ -1089,8 +1109,8 @@ int read_local_heap_sort (local_io_array *io_array, for (i = num_entries-1; i >=1; --i) { temp = temp_arr[0]; temp_arr[0] = temp_arr[i]; - temp_arr[i] = temp; - heap_size--; + temp_arr[i] = temp; + heap_size--; done = 0; j = 0; largest = j; @@ -1098,17 +1118,17 @@ int read_local_heap_sort (local_io_array *io_array, while (!done) { left = j*2+1; right = j*2+2; - - if ((left <= heap_size) && - (io_array[temp_arr[left]].offset > + + if ((left <= heap_size) && + (io_array[temp_arr[left]].offset > io_array[temp_arr[j]].offset)) { largest = left; } else { largest = j; } - if ((right <= heap_size) && - (io_array[temp_arr[right]].offset > + if ((right <= heap_size) && + (io_array[temp_arr[right]].offset > io_array[temp_arr[largest]].offset)) { largest = right; } @@ -1136,30 +1156,30 @@ int read_local_heap_sort (local_io_array *io_array, int read_find_next_index( int proc_index, int c_index, - mca_io_ompio_file_t *fh, - local_io_array *global_iov_array, + mca_io_ompio_file_t *fh, + mca_fcoll_static_local_io_array *global_iov_array, int global_iov_count, int *sorted){ - int i; - - for(i=c_index+1; if_procs_per_group; i++){ - if (fh->f_procs_in_group[i] == rank){ - return i; + int i; + for (i=0; i<=fh->f_procs_per_group; i++){ + if (fh->f_procs_in_group[i] == rank){ + return i; + } } - } - return -1; + return -1; } diff --git a/ompi/mca/fcoll/static/fcoll_static_file_write_all.c b/ompi/mca/fcoll/static/fcoll_static_file_write_all.c index f1cb7cd9887..a326e2ae549 100644 --- a/ompi/mca/fcoll/static/fcoll_static_file_write_all.c +++ b/ompi/mca/fcoll/static/fcoll_static_file_write_all.c @@ -9,9 +9,10 @@ * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. - * Copyright (c) 2008-2014 University of Houston. All rights reserved. + * Copyright (c) 2008-2016 University of Houston. All rights reserved. * Copyright (c) 2015 Los Alamos National Security, LLC. All rights reserved. - * + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -32,24 +33,23 @@ #include #define DEBUG_ON 0 -#define TIME_BREAKDOWN 0 -typedef struct local_io_array{ - OMPI_MPI_OFFSET_TYPE offset; - MPI_Aint length; - int process_id; -}local_io_array; +typedef struct mca_fcoll_static_local_io_array{ + OMPI_MPI_OFFSET_TYPE offset; + MPI_Aint length; + int process_id; +}mca_fcoll_static_local_io_array; -static int local_heap_sort (local_io_array *io_array, +static int local_heap_sort (mca_fcoll_static_local_io_array *io_array, int num_entries, int *sorted); int find_next_index( int proc_index, int c_index, - mca_io_ompio_file_t *fh, - local_io_array *global_iov_array, + mca_io_ompio_file_t *fh, + mca_fcoll_static_local_io_array *global_iov_array, int global_iov_count, int *sorted); @@ -58,954 +58,1003 @@ int get_process_id (int rank, int -mca_fcoll_static_file_write_all (mca_io_ompio_file_t *fh, - void *buf, - int count, - struct ompi_datatype_t *datatype, +mca_fcoll_static_file_write_all (mca_io_ompio_file_t *fh, + const void *buf, + int count, + struct ompi_datatype_t *datatype, ompi_status_public_t *status) { - - - size_t max_data = 0, bytes_per_cycle=0; - struct iovec *iov=NULL, *decoded_iov=NULL; - uint32_t iov_count=0, iov_index=0; - int i=0,j=0,l=0, temp_index; - int ret=OMPI_SUCCESS, cycles, local_cycles, *bytes_per_process=NULL; - int index, *disp_index=NULL, **blocklen_per_process=NULL; - int *iovec_count_per_process=NULL, *displs=NULL; - size_t total_bytes_written=0; - MPI_Aint **displs_per_process=NULL, *memory_displacements=NULL; - MPI_Aint bytes_to_write_in_cycle=0, global_iov_count=0, global_count=0; - - local_io_array *local_iov_array =NULL, *global_iov_array=NULL; - local_io_array *file_offsets_for_agg=NULL; - int *sorted=NULL, *sorted_file_offsets=NULL, temp_pindex, *temp_disp_index=NULL; - char *send_buf=NULL, *global_buf=NULL; - int iov_size=0, current_position=0, *current_index=NULL; - int *bytes_remaining=NULL, entries_per_aggregator=0; - ompi_datatype_t **recvtype = NULL; - MPI_Request *send_req=NULL, *recv_req=NULL; - /* For creating datatype of type io_array */ - int blocklen[3] = {1, 1, 1}; - int static_num_io_procs=1; - OPAL_PTRDIFF_TYPE d[3], base; - ompi_datatype_t *types[3]; - ompi_datatype_t *io_array_type=MPI_DATATYPE_NULL; - /*----------------------------------------------*/ -#if TIME_BREAKDOWN - double write_time = 0.0, start_write_time = 0.0, end_write_time = 0.0; - double comm_time = 0.0, start_comm_time = 0.0, end_comm_time = 0.0; - double exch_write = 0.0, start_exch = 0.0, end_exch = 0.0; - print_entry nentry; + + + size_t max_data = 0, bytes_per_cycle=0; + struct iovec *iov=NULL, *decoded_iov=NULL; + uint32_t iov_count=0, iov_index=0; + int i=0,j=0,l=0, temp_index; + int ret=OMPI_SUCCESS, cycles, local_cycles, *bytes_per_process=NULL; + int index, *disp_index=NULL, **blocklen_per_process=NULL; + int *iovec_count_per_process=NULL, *displs=NULL; + size_t total_bytes_written=0; + MPI_Aint **displs_per_process=NULL, *memory_displacements=NULL; + MPI_Aint bytes_to_write_in_cycle=0, global_iov_count=0, global_count=0; + + mca_fcoll_static_local_io_array *local_iov_array =NULL, *global_iov_array=NULL; + mca_fcoll_static_local_io_array *file_offsets_for_agg=NULL; + int *sorted=NULL, *sorted_file_offsets=NULL, temp_pindex, *temp_disp_index=NULL; + char *send_buf=NULL, *global_buf=NULL; + int iov_size=0, current_position=0, *current_index=NULL; + int *bytes_remaining=NULL, entries_per_aggregator=0; + ompi_datatype_t **recvtype = NULL; + MPI_Request send_req=NULL, *recv_req=NULL; + /* For creating datatype of type io_array */ + int blocklen[3] = {1, 1, 1}; + int static_num_io_procs=1; + OPAL_PTRDIFF_TYPE d[3], base; + ompi_datatype_t *types[3]; + ompi_datatype_t *io_array_type=MPI_DATATYPE_NULL; + int my_aggregator=-1; + bool sendbuf_is_contiguous= false; + size_t ftype_size; + OPAL_PTRDIFF_TYPE ftype_extent, lb; + + + /*----------------------------------------------*/ +#if OMPIO_FCOLL_WANT_TIME_BREAKDOWN + double write_time = 0.0, start_write_time = 0.0, end_write_time = 0.0; + double comm_time = 0.0, start_comm_time = 0.0, end_comm_time = 0.0; + double exch_write = 0.0, start_exch = 0.0, end_exch = 0.0; + mca_io_ompio_print_entry nentry; #endif - + #if DEBUG_ON MPI_Aint gc_in; #endif - -// if (opal_datatype_is_contiguous_memory_layout(&datatype->super,1)) { -// fh->f_flags |= OMPIO_CONTIGUOUS_MEMORY; -// } - - /* In case the data is not contigous in memory, decode it into an iovec */ - if (! (fh->f_flags & OMPIO_CONTIGUOUS_MEMORY)) { - fh->f_decode_datatype ((struct mca_io_ompio_file_t *)fh, - datatype, - count, - buf, - &max_data, - &decoded_iov, - &iov_count); - } - else { - max_data = count * datatype->super.size; - } - + + opal_datatype_type_size ( &datatype->super, &ftype_size ); + opal_datatype_get_extent ( &datatype->super, &lb, &ftype_extent ); + + /************************************************************************** + ** 1. In case the data is not contigous in memory, decode it into an iovec + **************************************************************************/ + if ( ( ftype_extent == (OPAL_PTRDIFF_TYPE) ftype_size) && + opal_datatype_is_contiguous_memory_layout(&datatype->super,1) && + 0 == lb ) { + sendbuf_is_contiguous = true; + } + + + /* In case the data is not contigous in memory, decode it into an iovec */ + if (! sendbuf_is_contiguous ) { + fh->f_decode_datatype ((struct mca_io_ompio_file_t *)fh, + datatype, + count, + buf, + &max_data, + &decoded_iov, + &iov_count); + } + else { + max_data = count * datatype->super.size; + } + if ( MPI_STATUS_IGNORE != status ) { status->_ucount = max_data; } fh->f_get_num_aggregators ( & static_num_io_procs ); - fh->f_set_aggregator_props ((struct mca_io_ompio_file_t *)fh, + fh->f_set_aggregator_props ((struct mca_io_ompio_file_t *)fh, static_num_io_procs, max_data); - - - /* io_array datatype for using in communication*/ - types[0] = &ompi_mpi_long.dt; - types[1] = &ompi_mpi_long.dt; - types[2] = &ompi_mpi_int.dt; - - d[0] = (OPAL_PTRDIFF_TYPE)&local_iov_array[0]; - d[1] = (OPAL_PTRDIFF_TYPE)&local_iov_array[0].length; - d[2] = (OPAL_PTRDIFF_TYPE)&local_iov_array[0].process_id; - base = d[0]; - for (i=0 ; i<3 ; i++) { - d[i] -= base; - } - ompi_datatype_create_struct (3, - blocklen, - d, - types, - &io_array_type); - ompi_datatype_commit (&io_array_type); - /* #########################################################*/ - - - - ret = fh->f_generate_current_file_view((struct mca_io_ompio_file_t *)fh, - max_data, - &iov, - &iov_size); - if (ret != OMPI_SUCCESS){ - fprintf(stderr,"Current File View Generation Error\n"); - goto exit; - } - - if (0 == iov_size){ - iov_size = 1; - } - - local_iov_array = (local_io_array *)malloc (iov_size * sizeof(local_io_array)); - if ( NULL == local_iov_array){ - fprintf(stderr,"local_iov_array allocation error\n"); - ret = OMPI_ERR_OUT_OF_RESOURCE; - goto exit; - } - - - for (j=0; j < iov_size; j++){ - local_iov_array[j].offset = (OMPI_MPI_OFFSET_TYPE)(intptr_t) - iov[j].iov_base; - local_iov_array[j].length = (size_t)iov[j].iov_len; - local_iov_array[j].process_id = fh->f_rank; - - } - - fh->f_get_bytes_per_agg ( (int *) &bytes_per_cycle); - - - local_cycles = ceil((double)max_data/bytes_per_cycle); - ret = fh->f_comm->c_coll.coll_allreduce (&local_cycles, - &cycles, - 1, - MPI_INT, - MPI_MAX, - fh->f_comm, - fh->f_comm->c_coll.coll_allreduce_module); - - if (OMPI_SUCCESS != ret){ - fprintf(stderr,"local cycles allreduce!\n"); - goto exit; - } - - if (fh->f_procs_in_group[fh->f_aggregator_index] == fh->f_rank) { - - disp_index = (int *)malloc (fh->f_procs_per_group * sizeof (int)); - if (NULL == disp_index) { - opal_output (1, "OUT OF MEMORY\n"); - ret = OMPI_ERR_OUT_OF_RESOURCE; - goto exit; + + my_aggregator = fh->f_procs_in_group[fh->f_aggregator_index]; + + /* io_array datatype for using in communication*/ + types[0] = &ompi_mpi_long.dt; + types[1] = &ompi_mpi_long.dt; + types[2] = &ompi_mpi_int.dt; + + d[0] = (OPAL_PTRDIFF_TYPE)&local_iov_array[0]; + d[1] = (OPAL_PTRDIFF_TYPE)&local_iov_array[0].length; + d[2] = (OPAL_PTRDIFF_TYPE)&local_iov_array[0].process_id; + base = d[0]; + for (i=0 ; i<3 ; i++) { + d[i] -= base; + } + ompi_datatype_create_struct (3, + blocklen, + d, + types, + &io_array_type); + ompi_datatype_commit (&io_array_type); + /* #########################################################*/ + + + + ret = fh->f_generate_current_file_view((struct mca_io_ompio_file_t *)fh, + max_data, + &iov, + &iov_size); + if (ret != OMPI_SUCCESS){ + fprintf(stderr,"Current File View Generation Error\n"); + goto exit; } - bytes_per_process = (int *) malloc (fh->f_procs_per_group * sizeof(int )); - if (NULL == bytes_per_process){ - opal_output (1, "OUT OF MEMORY\n"); - ret = OMPI_ERR_OUT_OF_RESOURCE; - goto exit; + if (0 == iov_size){ + iov_size = 1; } - - bytes_remaining = (int *) malloc (fh->f_procs_per_group * sizeof(int)); - if (NULL == bytes_remaining){ - opal_output (1, "OUT OF MEMORY\n"); - ret = OMPI_ERR_OUT_OF_RESOURCE; - goto exit; + + local_iov_array = (mca_fcoll_static_local_io_array *)malloc (iov_size * sizeof(mca_fcoll_static_local_io_array)); + if ( NULL == local_iov_array){ + fprintf(stderr,"local_iov_array allocation error\n"); + ret = OMPI_ERR_OUT_OF_RESOURCE; + goto exit; } - - current_index = (int *) malloc (fh->f_procs_per_group * sizeof(int)); - if (NULL == current_index){ - opal_output (1, "OUT OF MEMORY\n"); - ret = OMPI_ERR_OUT_OF_RESOURCE; - goto exit; + + + for (j=0; j < iov_size; j++){ + local_iov_array[j].offset = (OMPI_MPI_OFFSET_TYPE)(intptr_t) + iov[j].iov_base; + local_iov_array[j].length = (size_t)iov[j].iov_len; + local_iov_array[j].process_id = fh->f_rank; + } - blocklen_per_process = (int **)malloc (fh->f_procs_per_group * sizeof (int*)); - if (NULL == blocklen_per_process) { - opal_output (1, "OUT OF MEMORY\n"); - ret = OMPI_ERR_OUT_OF_RESOURCE; - goto exit; + fh->f_get_bytes_per_agg ( (int *) &bytes_per_cycle); + local_cycles = ceil( ((double)max_data*fh->f_procs_per_group) /bytes_per_cycle); + +#if OMPIO_FCOLL_WANT_TIME_BREAKDOWN + start_exch = MPI_Wtime(); +#endif + ret = fh->f_comm->c_coll.coll_allreduce (&local_cycles, + &cycles, + 1, + MPI_INT, + MPI_MAX, + fh->f_comm, + fh->f_comm->c_coll.coll_allreduce_module); + + if (OMPI_SUCCESS != ret){ + fprintf(stderr,"local cycles allreduce!\n"); + goto exit; } +#if OMPIO_FCOLL_WANT_TIME_BREAKDOWN + end_comm_time = MPI_Wtime(); + comm_time += end_comm_time - start_comm_time; +#endif + + if (my_aggregator == fh->f_rank) { - displs_per_process = (MPI_Aint **) - malloc (fh->f_procs_per_group * sizeof (MPI_Aint*)); + disp_index = (int *)malloc (fh->f_procs_per_group * sizeof (int)); + if (NULL == disp_index) { + opal_output (1, "OUT OF MEMORY\n"); + ret = OMPI_ERR_OUT_OF_RESOURCE; + goto exit; + } + + bytes_per_process = (int *) malloc (fh->f_procs_per_group * sizeof(int )); + if (NULL == bytes_per_process){ + opal_output (1, "OUT OF MEMORY\n"); + ret = OMPI_ERR_OUT_OF_RESOURCE; + goto exit; + } + + bytes_remaining = (int *) malloc (fh->f_procs_per_group * sizeof(int)); + if (NULL == bytes_remaining){ + opal_output (1, "OUT OF MEMORY\n"); + ret = OMPI_ERR_OUT_OF_RESOURCE; + goto exit; + } + + current_index = (int *) malloc (fh->f_procs_per_group * sizeof(int)); + if (NULL == current_index){ + opal_output (1, "OUT OF MEMORY\n"); + ret = OMPI_ERR_OUT_OF_RESOURCE; + goto exit; + } + + blocklen_per_process = (int **)malloc (fh->f_procs_per_group * sizeof (int*)); + if (NULL == blocklen_per_process) { + opal_output (1, "OUT OF MEMORY\n"); + ret = OMPI_ERR_OUT_OF_RESOURCE; + goto exit; + } + + displs_per_process = (MPI_Aint **) + malloc (fh->f_procs_per_group * sizeof (MPI_Aint*)); + + if (NULL == displs_per_process) { + opal_output (1, "OUT OF MEMORY\n"); + ret = OMPI_ERR_OUT_OF_RESOURCE; + goto exit; + } + + for(i=0;if_procs_per_group;i++){ + current_index[i] = 0; + bytes_remaining[i] =0; + blocklen_per_process[i] = NULL; + displs_per_process[i] = NULL; + } + } + + iovec_count_per_process = (int *) malloc (fh->f_procs_per_group * sizeof(int)); + if (NULL == iovec_count_per_process){ + opal_output (1, "OUT OF MEMORY\n"); + ret = OMPI_ERR_OUT_OF_RESOURCE; + goto exit; + } - if (NULL == displs_per_process) { - opal_output (1, "OUT OF MEMORY\n"); - ret = OMPI_ERR_OUT_OF_RESOURCE; - goto exit; + displs = (int *) malloc (fh->f_procs_per_group * sizeof(int)); + if (NULL == displs){ + opal_output (1, "OUT OF MEMORY\n"); + ret = OMPI_ERR_OUT_OF_RESOURCE; + goto exit; } - - for(i=0;if_procs_per_group;i++){ - current_index[i] = 0; - bytes_remaining[i] =0; - blocklen_per_process[i] = NULL; - displs_per_process[i] = NULL; + +#if OMPIO_FCOLL_WANT_TIME_BREAKDOWN + start_exch = MPI_Wtime(); +#endif + ret = fh->f_allgather_array (&iov_size, + 1, + MPI_INT, + iovec_count_per_process, + 1, + MPI_INT, + fh->f_aggregator_index, + fh->f_procs_in_group, + fh->f_procs_per_group, + fh->f_comm); + + if( OMPI_SUCCESS != ret){ + fprintf(stderr,"iov size allgatherv array!\n"); + goto exit; } - } - - iovec_count_per_process = (int *) malloc (fh->f_procs_per_group * sizeof(int)); - if (NULL == iovec_count_per_process){ - opal_output (1, "OUT OF MEMORY\n"); - ret = OMPI_ERR_OUT_OF_RESOURCE; - goto exit; - } - - displs = (int *) malloc (fh->f_procs_per_group * sizeof(int)); - if (NULL == displs){ - opal_output (1, "OUT OF MEMORY\n"); - ret = OMPI_ERR_OUT_OF_RESOURCE; - goto exit; - } - - ret = fh->f_allgather_array (&iov_size, - 1, - MPI_INT, - iovec_count_per_process, - 1, - MPI_INT, - fh->f_aggregator_index, - fh->f_procs_in_group, - fh->f_procs_per_group, - fh->f_comm); - - if( OMPI_SUCCESS != ret){ - fprintf(stderr,"iov size allgatherv array!\n"); - goto exit; - } - - - if (fh->f_procs_in_group[fh->f_aggregator_index] == fh->f_rank) { - displs[0] = 0; - global_iov_count = iovec_count_per_process[0]; - for (i=1 ; if_procs_per_group ; i++) { - global_iov_count += iovec_count_per_process[i]; - displs[i] = displs[i-1] + iovec_count_per_process[i-1]; +#if OMPIO_FCOLL_WANT_TIME_BREAKDOWN + end_comm_time = MPI_Wtime(); + comm_time += end_comm_time - start_comm_time; +#endif + + + if (my_aggregator == fh->f_rank) { + displs[0] = 0; + global_iov_count = iovec_count_per_process[0]; + for (i=1 ; if_procs_per_group ; i++) { + global_iov_count += iovec_count_per_process[i]; + displs[i] = displs[i-1] + iovec_count_per_process[i-1]; + } } - } - - - if (fh->f_procs_in_group[fh->f_aggregator_index] == fh->f_rank) { - global_iov_array = (local_io_array *) malloc (global_iov_count * - sizeof(local_io_array)); - if (NULL == global_iov_array){ - opal_output (1, "OUT OF MEMORY\n"); - ret = OMPI_ERR_OUT_OF_RESOURCE; - goto exit; + + + if (my_aggregator == fh->f_rank) { + global_iov_array = (mca_fcoll_static_local_io_array *) malloc (global_iov_count * + sizeof(mca_fcoll_static_local_io_array)); + if (NULL == global_iov_array){ + opal_output (1, "OUT OF MEMORY\n"); + ret = OMPI_ERR_OUT_OF_RESOURCE; + goto exit; + } } - } - - ret = fh->f_gatherv_array (local_iov_array, - iov_size, - io_array_type, - global_iov_array, - iovec_count_per_process, - displs, - io_array_type, - fh->f_aggregator_index, - fh->f_procs_in_group, - fh->f_procs_per_group, - fh->f_comm); - if (OMPI_SUCCESS != ret){ - fprintf(stderr,"global_iov_array gather error!\n"); - goto exit; - } - - if (fh->f_procs_in_group[fh->f_aggregator_index] == fh->f_rank) { - - if ( 0 == global_iov_count){ - global_iov_count = 1; + +#if OMPIO_FCOLL_WANT_TIME_BREAKDOWN + start_exch = MPI_Wtime(); +#endif + ret = fh->f_gatherv_array (local_iov_array, + iov_size, + io_array_type, + global_iov_array, + iovec_count_per_process, + displs, + io_array_type, + fh->f_aggregator_index, + fh->f_procs_in_group, + fh->f_procs_per_group, + fh->f_comm); + if (OMPI_SUCCESS != ret){ + fprintf(stderr,"global_iov_array gather error!\n"); + goto exit; } +#if OMPIO_FCOLL_WANT_TIME_BREAKDOWN + end_comm_time = MPI_Wtime(); + comm_time += end_comm_time - start_comm_time; +#endif + + if (my_aggregator == fh->f_rank) { + + if ( 0 == global_iov_count){ + global_iov_count = 1; + } + + sorted = (int *)malloc (global_iov_count * sizeof(int)); + if (NULL == sorted) { + opal_output (1, "OUT OF MEMORY\n"); + ret = OMPI_ERR_OUT_OF_RESOURCE; + goto exit; + } + local_heap_sort (global_iov_array, global_iov_count, sorted); + + recv_req = (MPI_Request *)malloc (fh->f_procs_per_group * sizeof(MPI_Request)); + if (NULL == recv_req){ + opal_output (1, "OUT OF MEMORY\n"); + ret = OMPI_ERR_OUT_OF_RESOURCE; + goto exit; + } + if (NULL == recvtype){ + recvtype = (ompi_datatype_t **) malloc (fh->f_procs_per_group * sizeof(ompi_datatype_t *)); + if (NULL == recvtype) { + opal_output (1, "OUT OF MEMORY\n"); + ret = OMPI_ERR_OUT_OF_RESOURCE; + goto exit; + } + } + for ( i=0; i < fh->f_procs_per_group; i++ ) { + recvtype[i] = MPI_DATATYPE_NULL; + } - sorted = (int *)malloc (global_iov_count * sizeof(int)); - if (NULL == sorted) { - opal_output (1, "OUT OF MEMORY\n"); - ret = OMPI_ERR_OUT_OF_RESOURCE; - goto exit; } - local_heap_sort (global_iov_array, global_iov_count, sorted); - } #if DEBUG_ON - - if (fh->f_procs_in_group[fh->f_aggregator_index] == fh->f_rank) { - for (gc_in=0; gc_inf_rank) { + for (gc_in=0; gc_inf_procs_in_group[fh->f_aggregator_index] == fh->f_rank) { - if (NULL == recvtype){ - recvtype = (ompi_datatype_t **) - malloc (fh->f_procs_per_group * sizeof(ompi_datatype_t *)); - if (NULL == recvtype) { - opal_output (1, "OUT OF MEMORY\n"); - ret = OMPI_ERR_OUT_OF_RESOURCE; - goto exit; - } - } - for(l=0;lf_procs_per_group;l++){ - disp_index[l] = 1; - if (NULL != blocklen_per_process[l]){ - free(blocklen_per_process[l]); - blocklen_per_process[l] = NULL; - } - if (NULL != displs_per_process[l]){ - free(displs_per_process[l]); - displs_per_process[l] = NULL; - } - blocklen_per_process[l] = (int *) calloc (1, sizeof(int)); - if (NULL == blocklen_per_process[l]) { - opal_output (1, "OUT OF MEMORY for blocklen\n"); - ret = OMPI_ERR_OUT_OF_RESOURCE; - goto exit; - } - displs_per_process[l] = (MPI_Aint *) calloc (1, sizeof(MPI_Aint)); - if (NULL == displs_per_process[l]){ - opal_output (1, "OUT OF MEMORY for displs\n"); - ret = OMPI_ERR_OUT_OF_RESOURCE; - goto exit; - } - } - if (NULL != sorted_file_offsets){ - free(sorted_file_offsets); - sorted_file_offsets = NULL; - } - - if(NULL != file_offsets_for_agg){ - free(file_offsets_for_agg); - file_offsets_for_agg = NULL; - } - - if (NULL != memory_displacements){ - free(memory_displacements); - memory_displacements = NULL; - } - - } - if (local_cycles > index) { - if ((index == local_cycles-1) && (max_data % bytes_per_cycle)) { - bytes_to_write_in_cycle = max_data % bytes_per_cycle; - } - else if (max_data <= bytes_per_cycle) { - bytes_to_write_in_cycle = max_data; - } - else { - bytes_to_write_in_cycle = bytes_per_cycle; - } - } - else { - bytes_to_write_in_cycle = 0; - } + + for (index = 0; index < cycles; index++){ + + if (my_aggregator == fh->f_rank) { + fh->f_num_of_io_entries = 0; + if (NULL != fh->f_io_array) { + free (fh->f_io_array); + fh->f_io_array = NULL; + } + if (NULL != global_buf) { + free (global_buf); + global_buf = NULL; + } + + if ( NULL != recvtype ) { + for ( i=0; i < fh->f_procs_per_group; i++ ) { + if (MPI_DATATYPE_NULL != recvtype[i] ) { + ompi_datatype_destroy(&recvtype[i]); + } + } + } + + for(l=0;lf_procs_per_group;l++){ + disp_index[l] = 1; + if (NULL != blocklen_per_process[l]){ + free(blocklen_per_process[l]); + blocklen_per_process[l] = NULL; + } + if (NULL != displs_per_process[l]){ + free(displs_per_process[l]); + displs_per_process[l] = NULL; + } + blocklen_per_process[l] = (int *) calloc (1, sizeof(int)); + if (NULL == blocklen_per_process[l]) { + opal_output (1, "OUT OF MEMORY for blocklen\n"); + ret = OMPI_ERR_OUT_OF_RESOURCE; + goto exit; + } + displs_per_process[l] = (MPI_Aint *) calloc (1, sizeof(MPI_Aint)); + if (NULL == displs_per_process[l]){ + opal_output (1, "OUT OF MEMORY for displs\n"); + ret = OMPI_ERR_OUT_OF_RESOURCE; + goto exit; + } + } + if (NULL != sorted_file_offsets){ + free(sorted_file_offsets); + sorted_file_offsets = NULL; + } + + if(NULL != file_offsets_for_agg){ + free(file_offsets_for_agg); + file_offsets_for_agg = NULL; + } + + if (NULL != memory_displacements){ + free(memory_displacements); + memory_displacements = NULL; + } + + } + if ( index < local_cycles ) { + if ((index == local_cycles-1) && (max_data % (bytes_per_cycle/fh->f_procs_per_group)) ) { + bytes_to_write_in_cycle = max_data - total_bytes_written; + } + else if (max_data <= bytes_per_cycle/fh->f_procs_per_group) { + bytes_to_write_in_cycle = max_data; + } + else { + bytes_to_write_in_cycle = bytes_per_cycle/ fh->f_procs_per_group; + } + } + else { + bytes_to_write_in_cycle = 0; + } #if DEBUG_ON - /* if (fh->f_procs_in_group[fh->f_aggregator_index] == fh->f_rank) {*/ - printf ("***%d: CYCLE %d Bytes %ld**********\n", - fh->f_rank, - index, - bytes_to_write_in_cycle); - /* }*/ + /* if (my_aggregator == fh->f_rank) {*/ + printf ("***%d: CYCLE %d Bytes %ld**********\n", + fh->f_rank, + index, + bytes_to_write_in_cycle); + /* }*/ #endif - /********************************************************** - **Gather the Data from all the processes at the writers ** - *********************************************************/ - - /* gather from each process how many bytes each will be sending */ - fh->f_gather_array (&bytes_to_write_in_cycle, - 1, - MPI_INT, - bytes_per_process, - 1, - MPI_INT, - fh->f_aggregator_index, - fh->f_procs_in_group, - fh->f_procs_per_group, - fh->f_comm); - - /* - For each aggregator - it needs to get bytes_to_write_in_cycle from each process - in group which adds up to bytes_per_cycle - - */ - if (fh->f_procs_in_group[fh->f_aggregator_index] == fh->f_rank) { - for (i=0;if_procs_per_group; i++){ -/* printf("bytes_per_process[%d]: %d\n", i, bytes_per_process[i]); - */ + /********************************************************** + **Gather the Data from all the processes at the writers ** + *********************************************************/ + +#if OMPIO_FCOLL_WANT_TIME_BREAKDOWN + start_exch = MPI_Wtime(); +#endif + /* gather from each process how many bytes each will be sending */ + ret = fh->f_gather_array (&bytes_to_write_in_cycle, + 1, + MPI_INT, + bytes_per_process, + 1, + MPI_INT, + fh->f_aggregator_index, + fh->f_procs_in_group, + fh->f_procs_per_group, + fh->f_comm); + + if (OMPI_SUCCESS != ret){ + fprintf(stderr,"bytes_to_write_in_cycle gather error!\n"); + goto exit; + } +#if OMPIO_FCOLL_WANT_TIME_BREAKDOWN + end_comm_time = MPI_Wtime(); + comm_time += end_comm_time - start_comm_time; +#endif + + /* + For each aggregator + it needs to get bytes_to_write_in_cycle from each process + in group which adds up to bytes_per_cycle + + */ + if (my_aggregator == fh->f_rank) { + for (i=0;if_procs_per_group; i++){ #if DEBUG_ON - printf ("%d : bytes_per_process : %d\n", - fh->f_procs_in_group[i], - bytes_per_process[i]); + printf ("%d : bytes_per_process : %d\n", + fh->f_procs_in_group[i], + bytes_per_process[i]); #endif - - while (bytes_per_process[i] > 0){ - if (get_process_id(global_iov_array[sorted[current_index[i]]].process_id, - fh) == i){ /* current id owns this entry!*/ - - /*Add and subtract length and create - blocklength and displs array*/ - if (bytes_remaining[i]){ /*Remaining bytes in the current entry of - the global offset array*/ - if (bytes_remaining[i] <= bytes_per_process[i]){ - blocklen_per_process[i][disp_index[i] - 1] = bytes_remaining[i]; - displs_per_process[i][disp_index[i] - 1] = - global_iov_array[sorted[current_index[i]]].offset + - (global_iov_array[sorted[current_index[i]]].length - - bytes_remaining[i]); - - blocklen_per_process[i] = (int *) realloc - ((void *)blocklen_per_process[i], (disp_index[i]+1)*sizeof(int)); - displs_per_process[i] = (MPI_Aint *)realloc - ((void *)displs_per_process[i], (disp_index[i]+1)*sizeof(MPI_Aint)); - bytes_per_process[i] -= bytes_remaining[i]; - blocklen_per_process[i][disp_index[i]] = 0; - displs_per_process[i][disp_index[i]] = 0; - bytes_remaining[i] = 0; - disp_index[i] += 1; - /* This entry has been used up, we need to move to the - next entry of this process and make current_index point there*/ - current_index[i] = find_next_index(i, - current_index[i], - fh, - global_iov_array, - global_iov_count, - sorted); - if (current_index[i] == -1){ - /* No more entries left, so Its all done! exit!*/ - break; - } - continue; - } - else{ - blocklen_per_process[i][disp_index[i] - 1] = bytes_per_process[i]; - displs_per_process[i][disp_index[i] - 1] = - global_iov_array[sorted[current_index[i]]].offset + - (global_iov_array[sorted[current_index[i]]].length - - bytes_remaining[i]); - bytes_remaining[i] -= bytes_per_process[i]; - bytes_per_process[i] = 0; - break; - } - } - else{ - if (bytes_per_process[i] < - global_iov_array[sorted[current_index[i]]].length){ - blocklen_per_process[i][disp_index[i] - 1] = - bytes_per_process[i]; - displs_per_process[i][disp_index[i] - 1] = - global_iov_array[sorted[current_index[i]]].offset; - - bytes_remaining[i] = - global_iov_array[sorted[current_index[i]]].length - - bytes_per_process[i]; - bytes_per_process[i] = 0; - break; - } - else { - blocklen_per_process[i][disp_index[i] - 1] = - global_iov_array[sorted[current_index[i]]].length; - displs_per_process[i][disp_index[i] - 1] = - global_iov_array[sorted[current_index[i]]].offset; - blocklen_per_process[i] = - (int *) realloc ((void *)blocklen_per_process[i], (disp_index[i]+1)*sizeof(int)); - displs_per_process[i] = (MPI_Aint *)realloc - ((void *)displs_per_process[i], (disp_index[i]+1)*sizeof(MPI_Aint)); - blocklen_per_process[i][disp_index[i]] = 0; - displs_per_process[i][disp_index[i]] = 0; - disp_index[i] += 1; - bytes_per_process[i] -= - global_iov_array[sorted[current_index[i]]].length; - current_index[i] = find_next_index(i, - current_index[i], - fh, - global_iov_array, - global_iov_count, - sorted); - if (current_index[i] == -1){ - break; - } - } - } - } - else{ - current_index[i] = find_next_index(i, - current_index[i], - fh, - global_iov_array, - global_iov_count, - sorted); - if (current_index[i] == -1){ - bytes_per_process[i] = 0; /* no more entries left - to service this request*/ - continue; - } - } - } - } - entries_per_aggregator=0; - for (i=0;if_procs_per_group;i++){ - for (j=0;j 0){ - entries_per_aggregator++; + + while (bytes_per_process[i] > 0){ + if (get_process_id(global_iov_array[sorted[current_index[i]]].process_id, + fh) == i){ /* current id owns this entry!*/ + + /*Add and subtract length and create + blocklength and displs array*/ + if (bytes_remaining[i]){ /*Remaining bytes in the current entry of + the global offset array*/ + if (bytes_remaining[i] <= bytes_per_process[i]){ + blocklen_per_process[i][disp_index[i] - 1] = bytes_remaining[i]; + displs_per_process[i][disp_index[i] - 1] = + global_iov_array[sorted[current_index[i]]].offset + + (global_iov_array[sorted[current_index[i]]].length + - bytes_remaining[i]); + + blocklen_per_process[i] = (int *) realloc + ((void *)blocklen_per_process[i], (disp_index[i]+1)*sizeof(int)); + displs_per_process[i] = (MPI_Aint *)realloc + ((void *)displs_per_process[i], (disp_index[i]+1)*sizeof(MPI_Aint)); + bytes_per_process[i] -= bytes_remaining[i]; + blocklen_per_process[i][disp_index[i]] = 0; + displs_per_process[i][disp_index[i]] = 0; + bytes_remaining[i] = 0; + disp_index[i] += 1; + /* This entry has been used up, we need to move to the + next entry of this process and make current_index point there*/ + current_index[i] = find_next_index(i, + current_index[i], + fh, + global_iov_array, + global_iov_count, + sorted); + if (current_index[i] == -1){ + /* No more entries left, so Its all done! exit!*/ + break; + } + continue; + } + else{ + blocklen_per_process[i][disp_index[i] - 1] = bytes_per_process[i]; + displs_per_process[i][disp_index[i] - 1] = + global_iov_array[sorted[current_index[i]]].offset + + (global_iov_array[sorted[current_index[i]]].length + - bytes_remaining[i]); + bytes_remaining[i] -= bytes_per_process[i]; + bytes_per_process[i] = 0; + break; + } + } + else{ + if (bytes_per_process[i] < + global_iov_array[sorted[current_index[i]]].length){ + blocklen_per_process[i][disp_index[i] - 1] = + bytes_per_process[i]; + displs_per_process[i][disp_index[i] - 1] = + global_iov_array[sorted[current_index[i]]].offset; + + bytes_remaining[i] = + global_iov_array[sorted[current_index[i]]].length - + bytes_per_process[i]; + bytes_per_process[i] = 0; + break; + } + else { + blocklen_per_process[i][disp_index[i] - 1] = + global_iov_array[sorted[current_index[i]]].length; + displs_per_process[i][disp_index[i] - 1] = + global_iov_array[sorted[current_index[i]]].offset; + blocklen_per_process[i] = + (int *) realloc ((void *)blocklen_per_process[i], (disp_index[i]+1)*sizeof(int)); + displs_per_process[i] = (MPI_Aint *)realloc + ((void *)displs_per_process[i], (disp_index[i]+1)*sizeof(MPI_Aint)); + blocklen_per_process[i][disp_index[i]] = 0; + displs_per_process[i][disp_index[i]] = 0; + disp_index[i] += 1; + bytes_per_process[i] -= + global_iov_array[sorted[current_index[i]]].length; + current_index[i] = find_next_index(i, + current_index[i], + fh, + global_iov_array, + global_iov_count, + sorted); + if (current_index[i] == -1){ + break; + } + } + } + } + else{ + current_index[i] = find_next_index(i, + current_index[i], + fh, + global_iov_array, + global_iov_count, + sorted); + if (current_index[i] == -1){ + bytes_per_process[i] = 0; /* no more entries left + to service this request*/ + continue; + } + } + } + } + entries_per_aggregator=0; + for (i=0;if_procs_per_group;i++){ + for (j=0;j 0){ + entries_per_aggregator++; #if DEBUG_ON - printf("%d sends blocklen[%d]: %d, disp[%d]: %ld to %d\n", - fh->f_procs_in_group[i],j, - blocklen_per_process[i][j],j, - displs_per_process[i][j], - fh->f_rank); - + printf("%d sends blocklen[%d]: %d, disp[%d]: %ld to %d\n", + fh->f_procs_in_group[i],j, + blocklen_per_process[i][j],j, + displs_per_process[i][j], + fh->f_rank); + #endif - } - - } - } - - if (entries_per_aggregator > 0){ - file_offsets_for_agg = (local_io_array *) - malloc(entries_per_aggregator*sizeof(local_io_array)); - if (NULL == file_offsets_for_agg) { - opal_output (1, "OUT OF MEMORY\n"); - ret = OMPI_ERR_OUT_OF_RESOURCE; - goto exit; - } - sorted_file_offsets = (int *) - malloc (entries_per_aggregator*sizeof(int)); - if (NULL == sorted_file_offsets){ - opal_output (1, "OUT OF MEMORY\n"); - ret = OMPI_ERR_OUT_OF_RESOURCE; - goto exit; - } - temp_index = 0; - for (i=0;if_procs_per_group; i++){ - for(j=0;j 0){ - file_offsets_for_agg[temp_index].length = - blocklen_per_process[i][j]; - file_offsets_for_agg[temp_index].process_id = i; - file_offsets_for_agg[temp_index].offset = - displs_per_process[i][j]; - temp_index++; - } - } - } - } - else{ - continue; - } - local_heap_sort (file_offsets_for_agg, - entries_per_aggregator, - sorted_file_offsets); - - memory_displacements = (MPI_Aint *) malloc - (entries_per_aggregator * sizeof(MPI_Aint)); - memory_displacements[sorted_file_offsets[0]] = 0; - for (i=1; if_procs_per_group * sizeof (int)); - if (NULL == temp_disp_index) { - opal_output (1, "OUT OF MEMORY\n"); - ret = OMPI_ERR_OUT_OF_RESOURCE; - goto exit; - } - global_count = 0; - for (i=0;i 0){ + file_offsets_for_agg = (mca_fcoll_static_local_io_array *) + malloc(entries_per_aggregator*sizeof(mca_fcoll_static_local_io_array)); + if (NULL == file_offsets_for_agg) { + opal_output (1, "OUT OF MEMORY\n"); + ret = OMPI_ERR_OUT_OF_RESOURCE; + goto exit; + } + sorted_file_offsets = (int *) + malloc (entries_per_aggregator*sizeof(int)); + if (NULL == sorted_file_offsets){ + opal_output (1, "OUT OF MEMORY\n"); + ret = OMPI_ERR_OUT_OF_RESOURCE; + goto exit; + } + temp_index = 0; + for (i=0;if_procs_per_group; i++){ + for(j=0;j 0){ + file_offsets_for_agg[temp_index].length = + blocklen_per_process[i][j]; + file_offsets_for_agg[temp_index].process_id = i; + file_offsets_for_agg[temp_index].offset = + displs_per_process[i][j]; + temp_index++; + } + } + } + } + else{ + continue; + } + local_heap_sort (file_offsets_for_agg, + entries_per_aggregator, + sorted_file_offsets); + + memory_displacements = (MPI_Aint *) malloc + (entries_per_aggregator * sizeof(MPI_Aint)); + memory_displacements[sorted_file_offsets[0]] = 0; + for (i=1; if_procs_per_group * sizeof (int)); + if (NULL == temp_disp_index) { + opal_output (1, "OUT OF MEMORY\n"); + ret = OMPI_ERR_OUT_OF_RESOURCE; + goto exit; + } + global_count = 0; + for (i=0;if_rank); - for (i=0; if_rank); + for (i=0; if_rank, - global_count, - bytes_to_write_in_cycle, - fh->f_procs_per_group); +#if DEBUG_ON + printf("%d: global_count : %ld, bytes_to_write_in_cycle : %ld, procs_per_group: %d\n", + fh->f_rank, + global_count, + bytes_to_write_in_cycle, + fh->f_procs_per_group); #endif -#if TIME_BREAKDOWN - start_comm_time = MPI_Wtime(); +#if OMPIO_FCOLL_WANT_TIME_BREAKDOWN + start_comm_time = MPI_Wtime(); #endif - global_buf = (char *) malloc (global_count); - if (NULL == global_buf){ - opal_output(1, "OUT OF MEMORY"); - ret = OMPI_ERR_OUT_OF_RESOURCE; - goto exit; - } - - recv_req = (MPI_Request *) - malloc (fh->f_procs_per_group * sizeof(MPI_Request)); - if (NULL == recv_req){ - opal_output (1, "OUT OF MEMORY\n"); - ret = OMPI_ERR_OUT_OF_RESOURCE; - goto exit; - } - for (i=0;if_procs_per_group; i++){ - ompi_datatype_create_hindexed(disp_index[i], - blocklen_per_process[i], - displs_per_process[i], - MPI_BYTE, - &recvtype[i]); - ompi_datatype_commit(&recvtype[i]); - ret = MCA_PML_CALL(irecv(global_buf, - 1, - recvtype[i], - fh->f_procs_in_group[i], - 123, - fh->f_comm, - &recv_req[i])); - if (OMPI_SUCCESS != ret){ - fprintf(stderr,"irecv Error!\n"); - goto exit; - } - } - } - - if (fh->f_flags & OMPIO_CONTIGUOUS_MEMORY) { - send_buf = &((char*)buf)[total_bytes_written]; - } - else if (bytes_to_write_in_cycle) { - /* allocate a send buffer and copy the data that needs - to be sent into it in case the data is non-contigous - in memory */ - OPAL_PTRDIFF_TYPE mem_address; - size_t remaining = 0; - size_t temp_position = 0; - - send_buf = malloc (bytes_to_write_in_cycle); - if (NULL == send_buf) { - opal_output (1, "OUT OF MEMORY\n"); - ret = OMPI_ERR_OUT_OF_RESOURCE; + global_buf = (char *) malloc (global_count); + if (NULL == global_buf){ + opal_output(1, "OUT OF MEMORY"); + ret = OMPI_ERR_OUT_OF_RESOURCE; + goto exit; + } + + for (i=0;if_procs_per_group; i++){ + ompi_datatype_create_hindexed(disp_index[i], + blocklen_per_process[i], + displs_per_process[i], + MPI_BYTE, + &recvtype[i]); + ompi_datatype_commit(&recvtype[i]); + ret = MCA_PML_CALL(irecv(global_buf, + 1, + recvtype[i], + fh->f_procs_in_group[i], + 123, + fh->f_comm, + &recv_req[i])); + if (OMPI_SUCCESS != ret){ + fprintf(stderr,"irecv Error!\n"); + goto exit; + } + } + } + + if ( sendbuf_is_contiguous ) { + send_buf = &((char*)buf)[total_bytes_written]; + } + else if (bytes_to_write_in_cycle) { + /* allocate a send buffer and copy the data that needs + to be sent into it in case the data is non-contigous + in memory */ + OPAL_PTRDIFF_TYPE mem_address; + size_t remaining = 0; + size_t temp_position = 0; + + send_buf = malloc (bytes_to_write_in_cycle); + if (NULL == send_buf) { + opal_output (1, "OUT OF MEMORY\n"); + ret = OMPI_ERR_OUT_OF_RESOURCE; + goto exit; + } + remaining = bytes_to_write_in_cycle; + + while (remaining) { + mem_address = (OPAL_PTRDIFF_TYPE) + (decoded_iov[iov_index].iov_base) + current_position; + + if (remaining >= + (decoded_iov[iov_index].iov_len - current_position)) { + memcpy (send_buf+temp_position, + (IOVBASE_TYPE *)mem_address, + decoded_iov[iov_index].iov_len - current_position); + remaining = remaining - + (decoded_iov[iov_index].iov_len - current_position); + temp_position = temp_position + + (decoded_iov[iov_index].iov_len - current_position); + iov_index = iov_index + 1; + current_position = 0; + } + else { + memcpy (send_buf+temp_position, + (IOVBASE_TYPE *)mem_address, + remaining); + current_position = current_position + remaining; + remaining = 0; + } + } + } + total_bytes_written += bytes_to_write_in_cycle; + + ret = MCA_PML_CALL(isend(send_buf, + bytes_to_write_in_cycle, + MPI_BYTE, + my_aggregator, + 123, + MCA_PML_BASE_SEND_STANDARD, + fh->f_comm, + &send_req)); + + if ( OMPI_SUCCESS != ret ){ + fprintf(stderr,"isend error!\n"); goto exit; - } - remaining = bytes_to_write_in_cycle; - - while (remaining) { - mem_address = (OPAL_PTRDIFF_TYPE) - (decoded_iov[iov_index].iov_base) + current_position; - - if (remaining >= - (decoded_iov[iov_index].iov_len - current_position)) { - memcpy (send_buf+temp_position, - (IOVBASE_TYPE *)mem_address, - decoded_iov[iov_index].iov_len - current_position); - remaining = remaining - - (decoded_iov[iov_index].iov_len - current_position); - temp_position = temp_position + - (decoded_iov[iov_index].iov_len - current_position); - iov_index = iov_index + 1; - current_position = 0; - } - else { - memcpy (send_buf+temp_position, - (IOVBASE_TYPE *)mem_address, - remaining); - current_position = current_position + remaining; - remaining = 0; - } - } - } - total_bytes_written += bytes_to_write_in_cycle; - - send_req = (MPI_Request *) malloc (sizeof(MPI_Request)); - if (NULL == send_req){ - opal_output (1, "OUT OF MEMORY\n"); - ret = OMPI_ERR_OUT_OF_RESOURCE; - goto exit; - } - - ret = MCA_PML_CALL(isend(send_buf, - bytes_to_write_in_cycle, - MPI_BYTE, - fh->f_procs_in_group[fh->f_aggregator_index], - 123, - MCA_PML_BASE_SEND_STANDARD, - fh->f_comm, - send_req)); - - if ( OMPI_SUCCESS != ret ){ - fprintf(stderr,"isend error!\n"); - goto exit; - } - - ret = ompi_request_wait (send_req, MPI_STATUS_IGNORE); - if (OMPI_SUCCESS != ret){ - goto exit; - } - - if (fh->f_procs_in_group[fh->f_aggregator_index] == fh->f_rank) { - ret = ompi_request_wait_all (fh->f_procs_per_group, - recv_req, - MPI_STATUS_IGNORE); - if (OMPI_SUCCESS != ret){ - goto exit; - } - + } + + ret = ompi_request_wait (&send_req, MPI_STATUS_IGNORE); + if (OMPI_SUCCESS != ret){ + goto exit; + } + if ( !sendbuf_is_contiguous ) { + if ( NULL != send_buf ) { + free ( send_buf ); + send_buf = NULL; + } + } + + if (my_aggregator == fh->f_rank) { + ret = ompi_request_wait_all (fh->f_procs_per_group, + recv_req, + MPI_STATUS_IGNORE); + if (OMPI_SUCCESS != ret){ + goto exit; + } + #if DEBUG_ON - printf("************Cycle: %d, Aggregator: %d ***************\n", - index+1,fh->f_rank); - if (fh->f_procs_in_group[fh->f_aggregator_index] == fh->f_rank){ - for (i=0 ; if_rank); + if (my_aggregator == fh->f_rank){ + for (i=0 ; if_procs_in_group[fh->f_aggregator_index] == fh->f_rank) { - fh->f_io_array = (mca_io_ompio_io_array_t *) malloc - (entries_per_aggregator * sizeof (mca_io_ompio_io_array_t)); - if (NULL == fh->f_io_array) { - opal_output(1, "OUT OF MEMORY\n"); - ret = OMPI_ERR_OUT_OF_RESOURCE; - goto exit; - } - fh->f_num_of_io_entries = 0; - /*First entry for every aggregator*/ - fh->f_io_array[fh->f_num_of_io_entries].offset = - (IOVBASE_TYPE *)(intptr_t)file_offsets_for_agg[sorted_file_offsets[0]].offset; - fh->f_io_array[fh->f_num_of_io_entries].length = - file_offsets_for_agg[sorted_file_offsets[0]].length; - fh->f_io_array[fh->f_num_of_io_entries].memory_address = - global_buf+memory_displacements[sorted_file_offsets[0]]; - fh->f_num_of_io_entries++; - for (i=1;if_io_array[fh->f_num_of_io_entries - 1].length += - file_offsets_for_agg[sorted_file_offsets[i]].length; - } - else { - fh->f_io_array[fh->f_num_of_io_entries].offset = - (IOVBASE_TYPE *)(intptr_t)file_offsets_for_agg[sorted_file_offsets[i]].offset; - fh->f_io_array[fh->f_num_of_io_entries].length = - file_offsets_for_agg[sorted_file_offsets[i]].length; - fh->f_io_array[fh->f_num_of_io_entries].memory_address = - global_buf+memory_displacements[sorted_file_offsets[i]]; - fh->f_num_of_io_entries++; - } - } + + if (my_aggregator == fh->f_rank) { + fh->f_io_array = (mca_io_ompio_io_array_t *) malloc + (entries_per_aggregator * sizeof (mca_io_ompio_io_array_t)); + if (NULL == fh->f_io_array) { + opal_output(1, "OUT OF MEMORY\n"); + ret = OMPI_ERR_OUT_OF_RESOURCE; + goto exit; + } + fh->f_num_of_io_entries = 0; + /*First entry for every aggregator*/ + fh->f_io_array[fh->f_num_of_io_entries].offset = + (IOVBASE_TYPE *)(intptr_t)file_offsets_for_agg[sorted_file_offsets[0]].offset; + fh->f_io_array[fh->f_num_of_io_entries].length = + file_offsets_for_agg[sorted_file_offsets[0]].length; + fh->f_io_array[fh->f_num_of_io_entries].memory_address = + global_buf+memory_displacements[sorted_file_offsets[0]]; + fh->f_num_of_io_entries++; + for (i=1;if_io_array[fh->f_num_of_io_entries - 1].length += + file_offsets_for_agg[sorted_file_offsets[i]].length; + } + else { + fh->f_io_array[fh->f_num_of_io_entries].offset = + (IOVBASE_TYPE *)(intptr_t)file_offsets_for_agg[sorted_file_offsets[i]].offset; + fh->f_io_array[fh->f_num_of_io_entries].length = + file_offsets_for_agg[sorted_file_offsets[i]].length; + fh->f_io_array[fh->f_num_of_io_entries].memory_address = + global_buf+memory_displacements[sorted_file_offsets[i]]; + fh->f_num_of_io_entries++; + } + } #if DEBUG_ON - printf("*************************** %d\n", fh->f_num_of_io_entries); - for (i=0 ; if_num_of_io_entries ; i++) { - printf(" ADDRESS: %p OFFSET: %ld LENGTH: %ld\n", - fh->f_io_array[i].memory_address, - (OPAL_PTRDIFF_TYPE)fh->f_io_array[i].offset, - fh->f_io_array[i].length); - } + printf("*************************** %d\n", fh->f_num_of_io_entries); + for (i=0 ; if_num_of_io_entries ; i++) { + printf(" ADDRESS: %p OFFSET: %ld LENGTH: %ld\n", + fh->f_io_array[i].memory_address, + (OPAL_PTRDIFF_TYPE)fh->f_io_array[i].offset, + fh->f_io_array[i].length); + } #endif - -#if TIME_BREAKDOWN - start_write_time = MPI_Wtime(); + +#if OMPIO_FCOLL_WANT_TIME_BREAKDOWN + start_write_time = MPI_Wtime(); #endif - if (fh->f_num_of_io_entries) { - if ( 0 > fh->f_fbtl->fbtl_pwritev (fh)) { - opal_output (1, "WRITE FAILED\n"); - ret = OMPI_ERROR; - goto exit; - } - } - -#if TIME_BREAKDOWN - end_write_time = MPI_Wtime(); - write_time += end_write_time - start_write_time; + if (fh->f_num_of_io_entries) { + if ( 0 > fh->f_fbtl->fbtl_pwritev (fh)) { + opal_output (1, "WRITE FAILED\n"); + ret = OMPI_ERROR; + goto exit; + } + } + +#if OMPIO_FCOLL_WANT_TIME_BREAKDOWN + end_write_time = MPI_Wtime(); + write_time += end_write_time - start_write_time; #endif - - } - if (NULL != send_req){ - free(send_req); - send_req = NULL; - } - - if (fh->f_procs_in_group[fh->f_aggregator_index] == fh->f_rank) { - fh->f_num_of_io_entries = 0; - if (NULL != fh->f_io_array) { - free (fh->f_io_array); - fh->f_io_array = NULL; - } - for (i = 0; i < fh->f_procs_per_group; i++) - ompi_datatype_destroy(recvtype+i); - if (NULL != recvtype){ - free(recvtype); - recvtype=NULL; - } - if (NULL != recv_req){ - free(recv_req); - recv_req = NULL; - } - if (NULL != global_buf) { - free (global_buf); - global_buf = NULL; - } - } - } -#if TIME_BREAKDOWN + } + + if (my_aggregator == fh->f_rank) { + } } + +#if OMPIO_FCOLL_WANT_TIME_BREAKDOWN end_exch = MPI_Wtime(); exch_write += end_exch - start_exch; nentry.time[0] = write_time; nentry.time[1] = comm_time; nentry.time[2] = exch_write; - if (fh->f_procs_in_group[fh->f_aggregator_index] == fh->f_rank) - nentry.aggregator = 1; + if (my_aggregator == fh->f_rank) + nentry.aggregator = 1; else - nentry.aggregator = 0; + nentry.aggregator = 0; nentry.nprocs_for_coll = static_num_io_procs; if (!fh->f_full_print_queue(WRITE_PRINT_QUEUE)){ fh->f_register_print_entry(WRITE_PRINT_QUEUE, nentry); - } + } #endif - - - exit: - if (NULL != decoded_iov){ - free(decoded_iov); - decoded_iov = NULL; - } - - if (fh->f_procs_in_group[fh->f_aggregator_index] == fh->f_rank) { - - if (NULL != local_iov_array){ - free(local_iov_array); - local_iov_array = NULL; + + +exit: + if (NULL != decoded_iov){ + free(decoded_iov); + decoded_iov = NULL; + } + + if (my_aggregator == fh->f_rank) { + + if (NULL != local_iov_array){ + free(local_iov_array); + local_iov_array = NULL; + } + for(l=0;lf_procs_per_group;l++){ + if (NULL != blocklen_per_process[l]){ + free(blocklen_per_process[l]); + blocklen_per_process[l] = NULL; + } + if (NULL != displs_per_process[l]){ + free(displs_per_process[l]); + displs_per_process[l] = NULL; + } + } + } + + if ( NULL != recv_req ) { + free ( recv_req ); + recv_req = NULL; } - for(l=0;lf_procs_per_group;l++){ - if (NULL != blocklen_per_process[l]){ - free(blocklen_per_process[l]); - blocklen_per_process[l] = NULL; - } - if (NULL != displs_per_process[l]){ - free(displs_per_process[l]); - displs_per_process[l] = NULL; - } + if ( !sendbuf_is_contiguous ) { + if (NULL != send_buf){ + free(send_buf); + send_buf = NULL; + } } - } - - if (NULL != send_buf){ - free(send_buf); - send_buf = NULL; - } - - if (NULL != global_buf){ - free(global_buf); - global_buf = NULL; - } - - if (NULL != recvtype){ - free(recvtype); - recvtype = NULL; - } - - if (NULL != sorted_file_offsets){ - free(sorted_file_offsets); - sorted_file_offsets = NULL; - } - - if (NULL != file_offsets_for_agg){ - free(file_offsets_for_agg); - file_offsets_for_agg = NULL; - } - - if (NULL != memory_displacements){ - free(memory_displacements); - memory_displacements = NULL; - } - - if (NULL != displs_per_process){ - free(displs_per_process); - displs_per_process = NULL; - } - - if (NULL != blocklen_per_process){ - free(blocklen_per_process); - blocklen_per_process = NULL; - } - - if(NULL != current_index){ - free(current_index); - current_index = NULL; - } - - if(NULL != bytes_remaining){ - free(bytes_remaining); - bytes_remaining = NULL; - } - - if (NULL != disp_index){ - free(disp_index); - disp_index = NULL; - } - - if (NULL != sorted) { - free(sorted); - sorted = NULL; - } - - return ret; -} - - - -static int local_heap_sort (local_io_array *io_array, + + if (NULL != global_buf){ + free(global_buf); + global_buf = NULL; + } + + if (NULL != recvtype){ + free(recvtype); + recvtype = NULL; + } + + if (NULL != sorted_file_offsets){ + free(sorted_file_offsets); + sorted_file_offsets = NULL; + } + + if (NULL != file_offsets_for_agg){ + free(file_offsets_for_agg); + file_offsets_for_agg = NULL; + } + + if (NULL != memory_displacements){ + free(memory_displacements); + memory_displacements = NULL; + } + + if (NULL != displs_per_process){ + free(displs_per_process); + displs_per_process = NULL; + } + + if (NULL != blocklen_per_process){ + free(blocklen_per_process); + blocklen_per_process = NULL; + } + + if(NULL != current_index){ + free(current_index); + current_index = NULL; + } + + if(NULL != bytes_remaining){ + free(bytes_remaining); + bytes_remaining = NULL; + } + + if (NULL != disp_index){ + free(disp_index); + disp_index = NULL; + } + + if (NULL != sorted) { + free(sorted); + sorted = NULL; + } + + return ret; +} + + + +static int local_heap_sort (mca_fcoll_static_local_io_array *io_array, int num_entries, int *sorted) { @@ -1020,7 +1069,7 @@ static int local_heap_sort (local_io_array *io_array, int* temp_arr = NULL; if( 0 == num_entries){ - num_entries = 1; + num_entries = 1; } @@ -1042,15 +1091,15 @@ static int local_heap_sort (local_io_array *io_array, while (!done) { left = j*2+1; right = j*2+2; - if ((left <= heap_size) && + if ((left <= heap_size) && (io_array[temp_arr[left]].offset > io_array[temp_arr[j]].offset)) { largest = left; } else { largest = j; } - if ((right <= heap_size) && - (io_array[temp_arr[right]].offset > + if ((right <= heap_size) && + (io_array[temp_arr[right]].offset > io_array[temp_arr[largest]].offset)) { largest = right; } @@ -1069,8 +1118,8 @@ static int local_heap_sort (local_io_array *io_array, for (i = num_entries-1; i >=1; --i) { temp = temp_arr[0]; temp_arr[0] = temp_arr[i]; - temp_arr[i] = temp; - heap_size--; + temp_arr[i] = temp; + heap_size--; done = 0; j = 0; largest = j; @@ -1078,17 +1127,17 @@ static int local_heap_sort (local_io_array *io_array, while (!done) { left = j*2+1; right = j*2+2; - - if ((left <= heap_size) && - (io_array[temp_arr[left]].offset > + + if ((left <= heap_size) && + (io_array[temp_arr[left]].offset > io_array[temp_arr[j]].offset)) { largest = left; } else { largest = j; } - if ((right <= heap_size) && - (io_array[temp_arr[right]].offset > + if ((right <= heap_size) && + (io_array[temp_arr[right]].offset > io_array[temp_arr[largest]].offset)) { largest = right; } @@ -1115,28 +1164,28 @@ static int local_heap_sort (local_io_array *io_array, int find_next_index( int proc_index, int c_index, - mca_io_ompio_file_t *fh, - local_io_array *global_iov_array, + mca_io_ompio_file_t *fh, + mca_fcoll_static_local_io_array *global_iov_array, int global_iov_count, int *sorted){ - int i; - - for(i=c_index+1; if_procs_per_group; i++){ - if (fh->f_procs_in_group[i] == rank){ - return i; + int i; + for (i=0; if_procs_per_group; i++){ + if (fh->f_procs_in_group[i] == rank){ + return i; + } } - } - return -1; + return -1; } diff --git a/ompi/mca/fcoll/static/fcoll_static_module.c b/ompi/mca/fcoll/static/fcoll_static_module.c index f88253ec641..e4438f70a19 100644 --- a/ompi/mca/fcoll/static/fcoll_static_module.c +++ b/ompi/mca/fcoll/static/fcoll_static_module.c @@ -5,15 +5,15 @@ * Copyright (c) 2004-2006 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2008-2015 University of Houston. All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -70,9 +70,9 @@ mca_fcoll_static_component_file_query (mca_io_ompio_file_t *fh, int *priority) } int mca_fcoll_static_component_file_unquery (mca_io_ompio_file_t *file) -{ +{ /* This function might be needed for some purposes later. for now it - * does not have anything to do since there are no steps which need + * does not have anything to do since there are no steps which need * to be undone if this module is not selected */ return OMPI_SUCCESS; @@ -83,8 +83,8 @@ int mca_fcoll_static_module_init (mca_io_ompio_file_t *file) return OMPI_SUCCESS; } - -int mca_fcoll_static_module_finalize (mca_io_ompio_file_t *file) + +int mca_fcoll_static_module_finalize (mca_io_ompio_file_t *file) { return OMPI_SUCCESS; } diff --git a/ompi/mca/fcoll/two_phase/Makefile.am b/ompi/mca/fcoll/two_phase/Makefile.am index 5a14494700a..7b9395f55e7 100644 --- a/ompi/mca/fcoll/two_phase/Makefile.am +++ b/ompi/mca/fcoll/two_phase/Makefile.am @@ -5,16 +5,16 @@ # Copyright (c) 2004-2005 The University of Tennessee and The University # of Tennessee Research Foundation. All rights # reserved. -# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, # University of Stuttgart. All rights reserved. # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. # Copyright (c) 2008-2015 University of Houston. All rights reserved. # Copyright (c) 2012 Cisco Systems, Inc. All rights reserved. # $COPYRIGHT$ -# +# # Additional copyrights may follow -# +# # $HEADER$ # @@ -24,7 +24,7 @@ sources = \ fcoll_two_phase_component.c \ fcoll_two_phase_file_read_all.c \ fcoll_two_phase_file_write_all.c \ - fcoll_two_phase_support_fns.c + fcoll_two_phase_support_fns.c # Make the output library in this directory, and name it either # mca__.la (for DSO builds) or libmca__.la diff --git a/ompi/mca/fcoll/two_phase/fcoll_two_phase.h b/ompi/mca/fcoll/two_phase/fcoll_two_phase.h index 61c88f86581..8ffa620812e 100644 --- a/ompi/mca/fcoll/two_phase/fcoll_two_phase.h +++ b/ompi/mca/fcoll/two_phase/fcoll_two_phase.h @@ -5,15 +5,17 @@ * Copyright (c) 2004-2006 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2008-2014 University of Houston. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -48,29 +50,29 @@ int mca_fcoll_two_phase_component_file_unquery (mca_io_ompio_file_t *file); int mca_fcoll_two_phase_module_init (mca_io_ompio_file_t *file); int mca_fcoll_two_phase_module_finalize (mca_io_ompio_file_t *file); -int mca_fcoll_two_phase_file_read_all (mca_io_ompio_file_t *fh, - void *buf, +int mca_fcoll_two_phase_file_read_all (mca_io_ompio_file_t *fh, + void *buf, int count, - struct ompi_datatype_t *datatype, + struct ompi_datatype_t *datatype, ompi_status_public_t * status); -int mca_fcoll_two_phase_file_write_all (mca_io_ompio_file_t *fh, - void *buf, +int mca_fcoll_two_phase_file_write_all (mca_io_ompio_file_t *fh, + const void *buf, int count, - struct ompi_datatype_t *datatype, + struct ompi_datatype_t *datatype, ompi_status_public_t * status); int mca_fcoll_two_phase_calc_aggregator (mca_io_ompio_file_t *fh, - OMPI_MPI_OFFSET_TYPE off, + OMPI_MPI_OFFSET_TYPE off, OMPI_MPI_OFFSET_TYPE min_off, OMPI_MPI_OFFSET_TYPE *len, OMPI_MPI_OFFSET_TYPE fd_size, OMPI_MPI_OFFSET_TYPE *fd_start, OMPI_MPI_OFFSET_TYPE *fd_end, int striping_unit, - int num_aggregators, + int num_aggregators, int *aggregator_list); int mca_fcoll_two_phase_calc_others_requests(mca_io_ompio_file_t *fh, diff --git a/ompi/mca/fcoll/two_phase/fcoll_two_phase_component.c b/ompi/mca/fcoll/two_phase/fcoll_two_phase_component.c index 3a2a2aa1277..507192ef4ba 100644 --- a/ompi/mca/fcoll/two_phase/fcoll_two_phase_component.c +++ b/ompi/mca/fcoll/two_phase/fcoll_two_phase_component.c @@ -6,7 +6,7 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. @@ -15,9 +15,9 @@ * Copyright (c) 2015 Los Alamos National Security, LLC. All rights * reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ * * These symbols are in a file by themselves to provide nice linker diff --git a/ompi/mca/fcoll/two_phase/fcoll_two_phase_file_read_all.c b/ompi/mca/fcoll/two_phase/fcoll_two_phase_file_read_all.c index 62c263964e1..cfae9c1ec67 100644 --- a/ompi/mca/fcoll/two_phase/fcoll_two_phase_file_read_all.c +++ b/ompi/mca/fcoll/two_phase/fcoll_two_phase_file_read_all.c @@ -1,3 +1,4 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ /* * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana * University Research and Technology @@ -11,6 +12,8 @@ * All rights reserved. * Copyright (c) 2008-2014 University of Houston. All rights reserved. * Copyright (c) 2015 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Los Alamos National Security, LLC. All rights + * reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -31,21 +34,20 @@ #include #define DEBUG 0 -#define TIME_BREAKDOWN 0 -/* Two Phase implementation from ROMIO ported to OMPIO infrastructure +/* Two Phase implementation from ROMIO ported to OMPIO infrastructure * This is pretty much the same as ROMIO's two_phase and based on ROMIO's code * base */ /* Datastructure to support specifying the flat-list. */ -typedef struct flat_list_node { +typedef struct flat_list_node { MPI_Datatype type; - int count; + int count; OMPI_MPI_OFFSET_TYPE *blocklens; - OMPI_MPI_OFFSET_TYPE *indices; - struct flat_list_node *next; + OMPI_MPI_OFFSET_TYPE *indices; + struct flat_list_node *next; }Flatlist_node; /* local function declarations */ @@ -84,29 +86,29 @@ static int two_phase_exchange_data(mca_io_ompio_file_t *fh, static void two_phase_fill_user_buffer(mca_io_ompio_file_t *fh, - void *buf, + void *buf, Flatlist_node *flat_buf, char **recv_buf, - struct iovec *offset_length, - unsigned *recv_size, - MPI_Request *requests, + struct iovec *offset_length, + unsigned *recv_size, + MPI_Request *requests, int *recd_from_proc, - int contig_access_count, - OMPI_MPI_OFFSET_TYPE min_st_offset, - OMPI_MPI_OFFSET_TYPE fd_size, - OMPI_MPI_OFFSET_TYPE *fd_start, + int contig_access_count, + OMPI_MPI_OFFSET_TYPE min_st_offset, + OMPI_MPI_OFFSET_TYPE fd_size, + OMPI_MPI_OFFSET_TYPE *fd_start, OMPI_MPI_OFFSET_TYPE *fd_end, MPI_Aint buftype_extent, - int striping_unit, + int striping_unit, int num_io_procs, int *aggregator_list); -#if TIME_BREAKDOWN +#if OMPIO_FCOLL_WANT_TIME_BREAKDOWN static int isread_aggregator(int rank, int nprocs_for_coll, int *aggregator_list); #endif -#if TIME_BREAKDOWN +#if OMPIO_FCOLL_WANT_TIME_BREAKDOWN double read_time = 0.0, start_read_time = 0.0, end_read_time = 0.0; double rcomm_time = 0.0, start_rcomm_time = 0.0, end_rcomm_time = 0.0; double read_exch = 0.0, start_rexch = 0.0, end_rexch = 0.0; @@ -137,13 +139,13 @@ mca_fcoll_two_phase_file_read_all (mca_io_ompio_file_t *fh, OMPI_MPI_OFFSET_TYPE *fd_start=NULL, *fd_end=NULL, min_st_offset = 0; Flatlist_node *flat_buf=NULL; mca_io_ompio_access_array_t *my_req=NULL, *others_req=NULL; -#if TIME_BREAKDOWN - print_entry nentry; +#if OMPIO_FCOLL_WANT_TIME_BREAKDOWN + mca_io_ompio_print_entry nentry; #endif - if (opal_datatype_is_predefined(&datatype->super)) { - fh->f_flags = fh->f_flags | OMPIO_CONTIGUOUS_MEMORY; - } - +// if (opal_datatype_is_predefined(&datatype->super)) { +// fh->f_flags = fh->f_flags | OMPIO_CONTIGUOUS_MEMORY; +// } + if (! (fh->f_flags & OMPIO_CONTIGUOUS_MEMORY)) { ret = fh->f_decode_datatype ((struct mca_io_ompio_file_t *)fh, datatype, @@ -155,13 +157,13 @@ mca_fcoll_two_phase_file_read_all (mca_io_ompio_file_t *fh, if (OMPI_SUCCESS != ret ){ goto exit; } - + recv_buf_addr = (size_t)(buf); - decoded_iov = (struct iovec *) calloc + decoded_iov = (struct iovec *) calloc (iov_count, sizeof(struct iovec)); - + for (ti = 0; ti < iov_count; ti++){ - + decoded_iov[ti].iov_base = (IOVBASE_TYPE *) ((OPAL_PTRDIFF_TYPE)temp_iov[ti].iov_base - recv_buf_addr); decoded_iov[ti].iov_len = temp_iov[ti].iov_len; @@ -171,52 +173,52 @@ mca_fcoll_two_phase_file_read_all (mca_io_ompio_file_t *fh, ti, decoded_iov[ti].iov_len); #endif } - + } else{ max_data = count * datatype->super.size; } - + if ( MPI_STATUS_IGNORE != status ) { status->_ucount = max_data; } - + fh->f_get_num_aggregators (&two_phase_num_io_procs); if (-1 == two_phase_num_io_procs ){ - ret = fh->f_set_aggregator_props ((struct mca_io_ompio_file_t *)fh, + ret = fh->f_set_aggregator_props ((struct mca_io_ompio_file_t *)fh, two_phase_num_io_procs, max_data); if (OMPI_SUCCESS != ret){ - return ret; + goto exit; } - + two_phase_num_io_procs = fh->f_final_num_aggrs; - + } - + if (two_phase_num_io_procs > fh->f_size){ two_phase_num_io_procs = fh->f_size; } - + aggregator_list = (int *) calloc (two_phase_num_io_procs, sizeof(int)); if (NULL == aggregator_list){ ret = OMPI_ERR_OUT_OF_RESOURCE; goto exit; } - + for (i=0; i< two_phase_num_io_procs; i++){ aggregator_list[i] = i * fh->f_size / two_phase_num_io_procs; } - - ret = fh->f_generate_current_file_view ((struct mca_io_ompio_file_t *)fh, - max_data, - &iov, + + ret = fh->f_generate_current_file_view ((struct mca_io_ompio_file_t *)fh, + max_data, + &iov, &local_count); - + if (OMPI_SUCCESS != ret){ goto exit; } - + long_max_data = (long) max_data; ret = fh->f_comm->c_coll.coll_allreduce (&long_max_data, &long_total_bytes, @@ -225,69 +227,63 @@ mca_fcoll_two_phase_file_read_all (mca_io_ompio_file_t *fh, MPI_SUM, fh->f_comm, fh->f_comm->c_coll.coll_allreduce_module); - + if ( OMPI_SUCCESS != ret ) { goto exit; } - + if (!(fh->f_flags & OMPIO_CONTIGUOUS_MEMORY)) { - + /* This datastructre translates between OMPIO->ROMIO its a little hacky!*/ /* But helps to re-use romio's code for handling non-contiguous file-type*/ /*Flattened datatype for ompio is in decoded_iov it translated into flatbuf*/ - + flat_buf = (Flatlist_node *)calloc(1, sizeof(Flatlist_node)); if ( NULL == flat_buf ){ ret = OMPI_ERR_OUT_OF_RESOURCE; goto exit; } - + flat_buf->type = datatype; flat_buf->next = NULL; flat_buf->count = 0; flat_buf->indices = NULL; flat_buf->blocklens = NULL; - + if ( 0 < count ) { local_size = OMPIO_MAX(1,iov_count/count); } else { local_size = 0; } - + if ( 0 < local_size ) { - flat_buf->indices = - (OMPI_MPI_OFFSET_TYPE *)calloc(local_size, + flat_buf->indices = + (OMPI_MPI_OFFSET_TYPE *)calloc(local_size, sizeof(OMPI_MPI_OFFSET_TYPE)); if (NULL == flat_buf->indices){ ret = OMPI_ERR_OUT_OF_RESOURCE; goto exit; } - - flat_buf->blocklens = - (OMPI_MPI_OFFSET_TYPE *)calloc(local_size, + + flat_buf->blocklens = + (OMPI_MPI_OFFSET_TYPE *)calloc(local_size, sizeof(OMPI_MPI_OFFSET_TYPE)); if ( NULL == flat_buf->blocklens ){ ret = OMPI_ERR_OUT_OF_RESOURCE; goto exit; } } - flat_buf->count = local_size; - i=0;j=0; - while(j < local_size){ - flat_buf->indices[j] = (OMPI_MPI_OFFSET_TYPE)(intptr_t)decoded_iov[i].iov_base; - flat_buf->blocklens[j] = decoded_iov[i].iov_len; - - if(i < (int)iov_count) - i+=1; - - j+=1; + flat_buf->count = local_size; + for (j = 0 ; j < local_size ; ++j) { + flat_buf->indices[j] = (OMPI_MPI_OFFSET_TYPE)(intptr_t)decoded_iov[j].iov_base; + flat_buf->blocklens[j] = decoded_iov[j].iov_len; } - + #if DEBUG - printf("flat_buf count: %d\n", + printf("flat_buf count: %d\n", flat_buf->count); for(i=0;icount;i++){ printf("%d: blocklen[%d] : %lld, indices[%d]: %lld\n", @@ -295,7 +291,7 @@ mca_fcoll_two_phase_file_read_all (mca_io_ompio_file_t *fh, } #endif } - + #if DEBUG printf("%d: total_bytes:%ld, local_count: %d\n", fh->f_rank, long_total_bytes, local_count); @@ -306,11 +302,11 @@ mca_fcoll_two_phase_file_read_all (mca_io_ompio_file_t *fh, (size_t)iov[i].iov_len); } #endif - + start_offset = (OMPI_MPI_OFFSET_TYPE)(intptr_t)iov[0].iov_base; if ( 0 < local_count ) { end_offset = (OMPI_MPI_OFFSET_TYPE)(intptr_t)iov[local_count-1].iov_base + - (OMPI_MPI_OFFSET_TYPE)(intptr_t)iov[local_count-1].iov_len - 1; + (OMPI_MPI_OFFSET_TYPE)(intptr_t)iov[local_count-1].iov_len - 1; } else { end_offset = 0; @@ -321,23 +317,23 @@ mca_fcoll_two_phase_file_read_all (mca_io_ompio_file_t *fh, (size_t)start_offset, (size_t)end_offset); #endif - + start_offsets = (OMPI_MPI_OFFSET_TYPE *)calloc (fh->f_size, sizeof(OMPI_MPI_OFFSET_TYPE)); - + if ( NULL == start_offsets ){ ret = OMPI_ERR_OUT_OF_RESOURCE; - goto exit; + goto exit; } - + end_offsets = (OMPI_MPI_OFFSET_TYPE *)calloc (fh->f_size, sizeof(OMPI_MPI_OFFSET_TYPE)); - + if (NULL == end_offsets){ ret = OMPI_ERR_OUT_OF_RESOURCE; goto exit; } - + ret = fh->f_comm->c_coll.coll_allgather(&start_offset, 1, OMPI_OFFSET_DATATYPE, @@ -346,11 +342,11 @@ mca_fcoll_two_phase_file_read_all (mca_io_ompio_file_t *fh, OMPI_OFFSET_DATATYPE, fh->f_comm, fh->f_comm->c_coll.coll_allgather_module); - + if ( OMPI_SUCCESS != ret ){ goto exit; } - + ret = fh->f_comm->c_coll.coll_allgather(&end_offset, 1, OMPI_OFFSET_DATATYPE, @@ -359,12 +355,12 @@ mca_fcoll_two_phase_file_read_all (mca_io_ompio_file_t *fh, OMPI_OFFSET_DATATYPE, fh->f_comm, fh->f_comm->c_coll.coll_allgather_module); - - + + if ( OMPI_SUCCESS != ret ){ goto exit; } - + #if DEBUG for (i=0;if_size;i++){ printf("%d: start[%d]:%ld,end[%d]:%ld\n", @@ -373,40 +369,40 @@ mca_fcoll_two_phase_file_read_all (mca_io_ompio_file_t *fh, (size_t)end_offsets[i]); } #endif - + for (i=1; if_size; i++){ - if ((start_offsets[i] < end_offsets[i-1]) && + if ((start_offsets[i] < end_offsets[i-1]) && (start_offsets[i] <= end_offsets[i])){ interleave_count++; } } - + #if DEBUG printf("%d: interleave_count:%d\n", fh->f_rank,interleave_count); -#endif - +#endif + ret = mca_fcoll_two_phase_domain_partition(fh, start_offsets, end_offsets, &min_st_offset, &fd_start, &fd_end, - domain_size, + domain_size, &fd_size, striping_unit, two_phase_num_io_procs); if (OMPI_SUCCESS != ret){ goto exit; } - + #if DEBUG for (i=0;if_rank, count_other_req_procs); -#endif - -#if TIME_BREAKDOWN +#endif + +#if OMPIO_FCOLL_WANT_TIME_BREAKDOWN start_rexch = MPI_Wtime(); #endif - - + + ret = two_phase_read_and_exch(fh, buf, datatype, @@ -459,14 +455,14 @@ mca_fcoll_two_phase_file_read_all (mca_io_ompio_file_t *fh, flat_buf, buf_indices, striping_unit, - two_phase_num_io_procs, + two_phase_num_io_procs, aggregator_list); - - + + if (OMPI_SUCCESS != ret){ goto exit; } -#if TIME_BREAKDOWN +#if OMPIO_FCOLL_WANT_TIME_BREAKDOWN end_rexch = MPI_Wtime(); read_exch += (end_rexch - start_rexch); nentry.time[0] = read_time; @@ -481,15 +477,15 @@ mca_fcoll_two_phase_file_read_all (mca_io_ompio_file_t *fh, nentry.aggregator = 0; } nentry.nprocs_for_coll = two_phase_num_io_procs; - - + + if (!fh->f_full_print_queue(READ_PRINT_QUEUE)){ fh->f_register_print_entry(READ_PRINT_QUEUE, nentry); } #endif - - + + exit: if (flat_buf != NULL){ if (flat_buf->blocklens != NULL){ @@ -498,22 +494,20 @@ mca_fcoll_two_phase_file_read_all (mca_io_ompio_file_t *fh, if (flat_buf->indices != NULL){ free (flat_buf->indices); } - free(flat_buf); - flat_buf = NULL; - } - if (start_offsets != NULL){ - free(start_offsets); - start_offsets = NULL; + free (flat_buf); } - if (end_offsets != NULL){ - free(end_offsets); - end_offsets = NULL; - } - if (aggregator_list != NULL){ - free(aggregator_list); - aggregator_list = NULL; - } - + + free (start_offsets); + free (end_offsets); + free (aggregator_list); + free (fd_start); + free (decoded_iov); + free (buf_indices); + free (count_my_req_per_proc); + free (my_req); + free (others_req); + free (fd_end); + return ret; } @@ -532,10 +526,10 @@ static int two_phase_read_and_exch(mca_io_ompio_file_t *fh, OMPI_MPI_OFFSET_TYPE *fd_end, Flatlist_node *flat_buf, size_t *buf_idx, int striping_unit, - int two_phase_num_io_procs, + int two_phase_num_io_procs, int *aggregator_list){ - - + + int ret=OMPI_SUCCESS, i = 0, j = 0, ntimes = 0, max_ntimes = 0; int m = 0; int *curr_offlen_ptr=NULL, *count=NULL, *send_size=NULL, *recv_size=NULL; @@ -549,10 +543,10 @@ static int two_phase_read_and_exch(mca_io_ompio_file_t *fh, char *read_buf=NULL, *tmp_buf=NULL; MPI_Datatype byte = MPI_BYTE; int two_phase_cycle_buffer_size=0; - - opal_datatype_type_size(&byte->super, + + opal_datatype_type_size(&byte->super, &byte_size); - + for (i = 0; i < fh->f_size; i++){ if (others_req[i].count) { st_loc = others_req[i].offsets[0]; @@ -560,25 +554,25 @@ static int two_phase_read_and_exch(mca_io_ompio_file_t *fh, break; } } - + for (i=0;if_size;i++){ for(j=0;j< others_req[i].count; j++){ - st_loc = + st_loc = OMPIO_MIN(st_loc, others_req[i].offsets[j]); - end_loc = + end_loc = OMPIO_MAX(end_loc, (others_req[i].offsets[j] + others_req[i].lens[j] - 1)); } } - + fh->f_get_bytes_per_agg ( &two_phase_cycle_buffer_size); ntimes = (int)((end_loc - st_loc + two_phase_cycle_buffer_size)/ two_phase_cycle_buffer_size); - + if ((st_loc == -1) && (end_loc == -1)){ ntimes = 0; } - + fh->f_comm->c_coll.coll_allreduce (&ntimes, &max_ntimes, 1, @@ -586,7 +580,7 @@ static int two_phase_read_and_exch(mca_io_ompio_file_t *fh, MPI_MAX, fh->f_comm, fh->f_comm->c_coll.coll_allreduce_module); - + if (ntimes){ read_buf = (char *) calloc (two_phase_cycle_buffer_size, sizeof(char)); if ( NULL == read_buf ){ @@ -594,66 +588,66 @@ static int two_phase_read_and_exch(mca_io_ompio_file_t *fh, goto exit; } } - - curr_offlen_ptr = (int *)calloc (fh->f_size, + + curr_offlen_ptr = (int *)calloc (fh->f_size, sizeof(int)); if (NULL == curr_offlen_ptr){ ret = OMPI_ERR_OUT_OF_RESOURCE; goto exit; } - - count = (int *)calloc (fh->f_size, + + count = (int *)calloc (fh->f_size, sizeof(int)); if (NULL == count){ ret = OMPI_ERR_OUT_OF_RESOURCE; goto exit; } - + partial_send = (int *)calloc(fh->f_size, sizeof(int)); if ( NULL == partial_send ){ ret = OMPI_ERR_OUT_OF_RESOURCE; goto exit; } - + send_size = (int *)malloc(fh->f_size * sizeof(int)); if (NULL == send_size){ ret = OMPI_ERR_OUT_OF_RESOURCE; goto exit; } - + recv_size = (int *)malloc(fh->f_size * sizeof(int)); if (NULL == recv_size){ ret = OMPI_ERR_OUT_OF_RESOURCE; goto exit; } - + recd_from_proc = (int *)calloc(fh->f_size,sizeof(int)); if (NULL == recd_from_proc){ ret = OMPI_ERR_OUT_OF_RESOURCE; goto exit; } - + start_pos = (int *) calloc(fh->f_size, sizeof(int)); if ( NULL == start_pos ){ - ret = OMPI_ERR_OUT_OF_RESOURCE; - return ret; + ret = OMPI_ERR_OUT_OF_RESOURCE; + goto exit; } - + done = 0; off = st_loc; for_curr_iter = for_next_iter = 0; - + ompi_datatype_type_extent(datatype, &buftype_extent); - + for (m=0; mf_size; i++) count[i] = send_size[i] = 0; for_next_iter = 0; - + for (i=0; if_size; i++) { if (others_req[i].count) { start_pos[i] = curr_offlen_ptr[i]; @@ -663,7 +657,7 @@ static int two_phase_read_and_exch(mca_io_ompio_file_t *fh, /* this request may have been partially satisfied in the previous iteration. */ req_off = others_req[i].offsets[j] + - partial_send[i]; + partial_send[i]; req_len = others_req[i].lens[j] - partial_send[i]; partial_send[i] = 0; @@ -677,22 +671,22 @@ static int two_phase_read_and_exch(mca_io_ompio_file_t *fh, } if (req_off < real_off + real_size) { count[i]++; - MPI_Address(read_buf+req_off-real_off, - &(others_req[i].mem_ptrs[j])); - - send_size[i] += (int)(OMPIO_MIN(real_off + real_size - req_off, - (OMPI_MPI_OFFSET_TYPE)req_len)); - + PMPI_Address(read_buf+req_off-real_off, + &(others_req[i].mem_ptrs[j])); + + send_size[i] += (int)(OMPIO_MIN(real_off + real_size - req_off, + (OMPI_MPI_OFFSET_TYPE)req_len)); + if (real_off+real_size-req_off < (OMPI_MPI_OFFSET_TYPE)req_len) { partial_send[i] = (int) (real_off + real_size - req_off); - if ((j+1 < others_req[i].count) && - (others_req[i].offsets[j+1] < - real_off+real_size)) { + if ((j+1 < others_req[i].count) && + (others_req[i].offsets[j+1] < + real_off+real_size)) { /* this is the case illustrated in the figure above. */ for_next_iter = OMPIO_MAX(for_next_iter, - real_off + real_size - others_req[i].offsets[j+1]); - /* max because it must cover requests + real_off + real_size - others_req[i].offsets[j+1]); + /* max because it must cover requests from different processes */ } break; @@ -704,42 +698,44 @@ static int two_phase_read_and_exch(mca_io_ompio_file_t *fh, } } flag = 0; - for (i=0; if_size; i++) + for (i=0; if_size; i++) if (count[i]) flag = 1; - + if (flag) { - -#if TIME_BREAKDOWN + +#if OMPIO_FCOLL_WANT_TIME_BREAKDOWN start_read_time = MPI_Wtime(); #endif - + len = size * byte_size; - fh->f_io_array = (mca_io_ompio_io_array_t *)calloc + fh->f_io_array = (mca_io_ompio_io_array_t *)calloc (1,sizeof(mca_io_ompio_io_array_t)); if (NULL == fh->f_io_array) { opal_output(1, "OUT OF MEMORY\n"); - return OMPI_ERR_OUT_OF_RESOURCE; + ret = OMPI_ERR_OUT_OF_RESOURCE; + goto exit; } fh->f_io_array[0].offset = (IOVBASE_TYPE *)(intptr_t)off; fh->f_io_array[0].length = len; - fh->f_io_array[0].memory_address = + fh->f_io_array[0].memory_address = read_buf+for_curr_iter; fh->f_num_of_io_entries = 1; - + if (fh->f_num_of_io_entries){ if ( 0 > fh->f_fbtl->fbtl_preadv (fh)) { opal_output(1, "READ FAILED\n"); - return OMPI_ERROR; + ret = OMPI_ERROR; + goto exit; } } - + #if 0 int ii; printf("%d: len/4 : %lld\n", fh->f_rank, len/4); for (ii = 0; ii < len/4 ;ii++){ - printf("%d: read_buf[%d]: %ld\n", + printf("%d: read_buf[%d]: %ld\n", fh->f_rank, ii, (int *)read_buf[ii]); @@ -750,120 +746,97 @@ static int two_phase_read_and_exch(mca_io_ompio_file_t *fh, free (fh->f_io_array); fh->f_io_array = NULL; } - -#if TIME_BREAKDOWN + +#if OMPIO_FCOLL_WANT_TIME_BREAKDOWN end_read_time = MPI_Wtime(); read_time += (end_read_time - start_read_time); #endif - - + + } - + for_curr_iter = for_next_iter; - + for (i=0; i< fh->f_size; i++){ recv_size[i] = 0; } two_phase_exchange_data(fh, buf, offset_len, - send_size, start_pos, recv_size, count, - partial_send, recd_from_proc, + send_size, start_pos, recv_size, count, + partial_send, recd_from_proc, contig_access_count, min_st_offset, fd_size, fd_start, fd_end, flat_buf, others_req, m, buf_idx, - buftype_extent, striping_unit, two_phase_num_io_procs, - aggregator_list); - + buftype_extent, striping_unit, two_phase_num_io_procs, + aggregator_list); + if (for_next_iter){ tmp_buf = (char *) calloc (for_next_iter, sizeof(char)); - memcpy(tmp_buf, - read_buf+real_size-for_next_iter, + memcpy(tmp_buf, + read_buf+real_size-for_next_iter, for_next_iter); free(read_buf); read_buf = (char *)malloc(for_next_iter+two_phase_cycle_buffer_size); memcpy(read_buf, tmp_buf, for_next_iter); free(tmp_buf); } - + off += size; done += size; } - + for (i=0; if_size; i++) count[i] = send_size[i] = 0; for (m=ntimes; mf_comm->c_coll.coll_alltoall (send_size, 1, MPI_INT, @@ -872,12 +845,12 @@ static int two_phase_exchange_data(mca_io_ompio_file_t *fh, MPI_INT, fh->f_comm, fh->f_comm->c_coll.coll_alltoall_module); - + if ( OMPI_SUCCESS != ret ){ goto exit; } - - + + #if DEBUG for (i=0; if_size; i++){ printf("%d: RS[%d]: %d\n", fh->f_rank, @@ -885,19 +858,19 @@ static int two_phase_exchange_data(mca_io_ompio_file_t *fh, recv_size[i]); } #endif - - + + nprocs_recv = 0; - for (i=0; i < fh->f_size; i++) + for (i=0; i < fh->f_size; i++) if (recv_size[i]) nprocs_recv++; - + nprocs_send = 0; - for (i=0; i< fh->f_size; i++) + for (i=0; i< fh->f_size; i++) if (send_size[i]) nprocs_send++; - + requests = (MPI_Request *) malloc((nprocs_send+nprocs_recv+1) * sizeof(MPI_Request)); - + if (fh->f_flags & OMPIO_CONTIGUOUS_MEMORY) { j = 0; for (i=0; i < fh->f_size; i++){ @@ -909,7 +882,7 @@ static int two_phase_exchange_data(mca_io_ompio_file_t *fh, fh->f_rank+i+100*iter, fh->f_comm, requests+j)); - + if ( OMPI_SUCCESS != ret ){ return ret; } @@ -919,15 +892,15 @@ static int two_phase_exchange_data(mca_io_ompio_file_t *fh, } } else{ - - recv_buf = (char **)malloc(fh->f_size * sizeof(char *)); + + recv_buf = (char **) calloc (fh->f_size, sizeof(char *)); if (NULL == recv_buf){ ret = OMPI_ERR_OUT_OF_RESOURCE; goto exit; } - + for (i=0; i < fh->f_size; i++) - if(recv_size[i]) recv_buf[i] = + if(recv_size[i]) recv_buf[i] = (char *) malloc (recv_size[i] * sizeof(char)); j = 0; for(i=0; if_size; i++) @@ -940,12 +913,12 @@ static int two_phase_exchange_data(mca_io_ompio_file_t *fh, fh->f_comm, requests+j)); j++; - + } } - - - + + + j = 0; for (i = 0; i< fh->f_size; i++){ if (send_size[i]){ @@ -954,15 +927,15 @@ static int two_phase_exchange_data(mca_io_ompio_file_t *fh, tmp = others_req[i].lens[k]; others_req[i].lens[k] = partial_send[i]; } - + ompi_datatype_create_hindexed(count[i], &(others_req[i].lens[start_pos[i]]), &(others_req[i].mem_ptrs[start_pos[i]]), MPI_BYTE, &send_type); - + ompi_datatype_commit(&send_type); - + ret = MCA_PML_CALL(isend(MPI_BOTTOM, 1, send_type, @@ -972,58 +945,57 @@ static int two_phase_exchange_data(mca_io_ompio_file_t *fh, fh->f_comm, requests+nprocs_recv+j)); ompi_datatype_destroy(&send_type); - + if (partial_send[i]) others_req[i].lens[k] = tmp; j++; } } - - + + if (nprocs_recv) { - + ret = ompi_request_wait_all(nprocs_recv, - requests, + requests, MPI_STATUS_IGNORE); - - + if (OMPI_SUCCESS != ret) { + goto exit; + } + if (! (fh->f_flags & OMPIO_CONTIGUOUS_MEMORY)) { - + two_phase_fill_user_buffer(fh, buf, flat_buf, recv_buf, offset_len, (unsigned *)recv_size, requests, recd_from_proc, contig_access_count, min_st_offset, fd_size, fd_start, fd_end, - buftype_extent, striping_unit, two_phase_num_io_procs, + buftype_extent, striping_unit, two_phase_num_io_procs, aggregator_list); } } - + ret = ompi_request_wait_all(nprocs_send, - requests+nprocs_recv, + requests+nprocs_recv, MPI_STATUS_IGNORE); - - if (NULL != requests){ - free(requests); - requests = NULL; - } - - if (! (fh->f_flags & OMPIO_CONTIGUOUS_MEMORY)){ - for (i=0; i< fh->f_size; i++){ - if (recv_size[i]){ - free(recv_buf[i]); - } - } - free(recv_buf); - } - -#if TIME_BREAKDOWN + +#if OMPIO_FCOLL_WANT_TIME_BREAKDOWN end_rcomm_time = MPI_Wtime(); rcomm_time += (end_rcomm_time - start_rcomm_time); #endif - + exit: + + if (recv_buf) { + for (i=0; i< fh->f_size; i++){ + free(recv_buf[i]); + } + + free(recv_buf); + } + + free(requests); + return ret; - + } @@ -1076,54 +1048,54 @@ static int two_phase_exchange_data(mca_io_ompio_file_t *fh, static void two_phase_fill_user_buffer(mca_io_ompio_file_t *fh, - void *buf, + void *buf, Flatlist_node *flat_buf, char **recv_buf, - struct iovec *offset_length, - unsigned *recv_size, - MPI_Request *requests, + struct iovec *offset_length, + unsigned *recv_size, + MPI_Request *requests, int *recd_from_proc, - int contig_access_count, - OMPI_MPI_OFFSET_TYPE min_st_offset, - OMPI_MPI_OFFSET_TYPE fd_size, - OMPI_MPI_OFFSET_TYPE *fd_start, + int contig_access_count, + OMPI_MPI_OFFSET_TYPE min_st_offset, + OMPI_MPI_OFFSET_TYPE fd_size, + OMPI_MPI_OFFSET_TYPE *fd_start, OMPI_MPI_OFFSET_TYPE *fd_end, MPI_Aint buftype_extent, int striping_unit, int two_phase_num_io_procs, int *aggregator_list){ - + int i = 0, p = 0, flat_buf_idx = 0; OMPI_MPI_OFFSET_TYPE flat_buf_sz = 0, size_in_buf = 0, buf_incr = 0, size = 0; int n_buftypes = 0; OMPI_MPI_OFFSET_TYPE off=0, len=0, rem_len=0, user_buf_idx=0; unsigned *curr_from_proc=NULL, *done_from_proc=NULL, *recv_buf_idx=NULL; - + curr_from_proc = (unsigned *) malloc (fh->f_size * sizeof(unsigned)); done_from_proc = (unsigned *) malloc (fh->f_size * sizeof(unsigned)); recv_buf_idx = (unsigned *) malloc (fh->f_size * sizeof(unsigned)); - + for (i=0; i < fh->f_size; i++) { recv_buf_idx[i] = curr_from_proc[i] = 0; done_from_proc[i] = recd_from_proc[i]; } - + flat_buf_idx = 0; n_buftypes = 0; - + if ( flat_buf->count > 0 ) { user_buf_idx = flat_buf->indices[0]; flat_buf_sz = flat_buf->blocklens[0]; } - + /* flat_buf_idx = current index into flattened buftype - flat_buf_sz = size of current contiguous component in + flat_buf_sz = size of current contiguous component in flattened buf */ - - for (i=0; i done_from_proc[p]) { if (done_from_proc[p] > curr_from_proc[p]) { - size = OMPIO_MIN(curr_from_proc[p] + len - + size = OMPIO_MIN(curr_from_proc[p] + len - done_from_proc[p], recv_size[p]-recv_buf_idx[p]); buf_incr = done_from_proc[p] - curr_from_proc[p]; TWO_PHASE_BUF_INCR @@ -1174,20 +1146,20 @@ static void two_phase_fill_user_buffer(mca_io_ompio_file_t *fh, rem_len -= len; } } - for (i=0; i < fh->f_size; i++) + for (i=0; i < fh->f_size; i++) if (recv_size[i]) recd_from_proc[i] = curr_from_proc[i]; - + free(curr_from_proc); free(done_from_proc); free(recv_buf_idx); - + } -#if TIME_BREAKDOWN -int isread_aggregator(int rank, +#if OMPIO_FCOLL_WANT_TIME_BREAKDOWN +int isread_aggregator(int rank, int nprocs_for_coll, int *aggregator_list){ - + int i=0; for (i=0; i #define DEBUG_ON 0 -#define TIME_BREAKDOWN 0 -/* Two Phase implementation from ROMIO ported to OMPIO infrastructure +/* Two Phase implementation from ROMIO ported to OMPIO infrastructure * This is pretty much the same as ROMIO's two_phase and based on ROMIO's code * base */ /* Datastructure to support specifying the flat-list. */ -typedef struct flat_list_node { +typedef struct flat_list_node { MPI_Datatype type; - int count; + int count; OMPI_MPI_OFFSET_TYPE *blocklens; - OMPI_MPI_OFFSET_TYPE *indices; - struct flat_list_node *next; + OMPI_MPI_OFFSET_TYPE *indices; + struct flat_list_node *next; } Flatlist_node; /* local function declarations */ static int two_phase_exch_and_write(mca_io_ompio_file_t *fh, - void *buf, + const void *buf, MPI_Datatype datatype, mca_io_ompio_access_array_t *others_req, struct iovec *offset_len, @@ -70,7 +72,7 @@ static int two_phase_exch_and_write(mca_io_ompio_file_t *fh, static int two_phase_exchage_data(mca_io_ompio_file_t *fh, - void *buf, + const void *buf, char *write_buf, struct iovec *offset_length, int *send_size, int *start_pos, @@ -88,30 +90,30 @@ static int two_phase_exchage_data(mca_io_ompio_file_t *fh, int *send_buf_idx, int *curr_to_proc, int *done_to_proc, int iter, size_t *buf_idx, MPI_Aint buftype_extent, - int striping_unit, int num_io_procs, + int striping_unit, int num_io_procs, int *aggregator_list, int *hole); - + static int two_phase_fill_send_buffer(mca_io_ompio_file_t *fh, - void *buf, + const void *buf, Flatlist_node *flat_buf, char **send_buf, struct iovec *offset_length, int *send_size, MPI_Request *send_req, int *sent_to_proc, - int contig_access_count, + int contig_access_count, OMPI_MPI_OFFSET_TYPE min_st_offset, OMPI_MPI_OFFSET_TYPE fd_size, OMPI_MPI_OFFSET_TYPE *fd_start, OMPI_MPI_OFFSET_TYPE *fd_end, int *send_buf_idx, - int *curr_to_proc, + int *curr_to_proc, int *done_to_proc, int iter, MPI_Aint buftype_extent, - int striping_unit, + int striping_unit, int num_io_procs, int *aggregator_list); -#if TIME_BREAKDOWN +#if OMPIO_FCOLL_WANT_TIME_BREAKDOWN static int is_aggregator(int rank, int nprocs_for_coll, int *aggregator_list); @@ -131,7 +133,7 @@ void two_phase_heap_merge(mca_io_ompio_access_array_t *others_req, /* local function declarations ends here!*/ -#if TIME_BREAKDOWN +#if OMPIO_FCOLL_WANT_TIME_BREAKDOWN double write_time = 0.0, start_write_time = 0.0, end_write_time = 0.0; double comm_time = 0.0, start_comm_time = 0.0, end_comm_time = 0.0; double exch_write = 0.0, start_exch = 0.0, end_exch = 0.0; @@ -139,44 +141,44 @@ double exch_write = 0.0, start_exch = 0.0, end_exch = 0.0; int mca_fcoll_two_phase_file_write_all (mca_io_ompio_file_t *fh, - void *buf, + const void *buf, int count, struct ompi_datatype_t *datatype, ompi_status_public_t *status) { - + int i, j,interleave_count=0, striping_unit=0; uint32_t iov_count=0,ti; struct iovec *decoded_iov=NULL, *temp_iov=NULL; - size_t max_data = 0, total_bytes = 0; - long long_max_data = 0, long_total_bytes = 0; + size_t max_data = 0, total_bytes = 0; + long long_max_data = 0, long_total_bytes = 0; int domain_size=0, *count_my_req_per_proc=NULL, count_my_req_procs; int count_other_req_procs, ret=OMPI_SUCCESS; int two_phase_num_io_procs=1; size_t *buf_indices=NULL; int local_count = 0, local_size=0,*aggregator_list = NULL; struct iovec *iov = NULL; - + OMPI_MPI_OFFSET_TYPE start_offset, end_offset, fd_size; OMPI_MPI_OFFSET_TYPE *start_offsets=NULL, *end_offsets=NULL; OMPI_MPI_OFFSET_TYPE *fd_start=NULL, *fd_end=NULL, min_st_offset; Flatlist_node *flat_buf=NULL; mca_io_ompio_access_array_t *my_req=NULL, *others_req=NULL; MPI_Aint send_buf_addr; -#if TIME_BREAKDOWN - print_entry nentry; +#if OMPIO_FCOLL_WANT_TIME_BREAKDOWN + mca_io_ompio_print_entry nentry; #endif - - + + // if (opal_datatype_is_predefined(&datatype->super)) { // fh->f_flags = fh->f_flags | OMPIO_CONTIGUOUS_MEMORY; // } - - + + if (! (fh->f_flags & OMPIO_CONTIGUOUS_MEMORY)) { - + ret = fh->f_decode_datatype ((struct mca_io_ompio_file_t *)fh, datatype, count, @@ -187,17 +189,21 @@ mca_fcoll_two_phase_file_write_all (mca_io_ompio_file_t *fh, if (OMPI_SUCCESS != ret ){ goto exit; } - + send_buf_addr = (OPAL_PTRDIFF_TYPE)buf; if ( 0 < iov_count ) { - decoded_iov = (struct iovec *)malloc + decoded_iov = (struct iovec *)malloc (iov_count * sizeof(struct iovec)); + if (NULL == decoded_iov) { + ret = OMPI_ERR_OUT_OF_RESOURCE; + goto exit; + } } for (ti = 0; ti < iov_count; ti ++){ decoded_iov[ti].iov_base = (IOVBASE_TYPE *)( - (OPAL_PTRDIFF_TYPE)temp_iov[ti].iov_base - + (OPAL_PTRDIFF_TYPE)temp_iov[ti].iov_base - send_buf_addr); - decoded_iov[ti].iov_len = + decoded_iov[ti].iov_len = temp_iov[ti].iov_len ; #if DEBUG_ON printf("d_offset[%d]: %ld, d_len[%d]: %ld\n", @@ -205,57 +211,58 @@ mca_fcoll_two_phase_file_write_all (mca_io_ompio_file_t *fh, ti, decoded_iov[ti].iov_len); #endif } - + } else{ max_data = count * datatype->super.size; } - + if ( MPI_STATUS_IGNORE != status ) { status->_ucount = max_data; } - + fh->f_get_num_aggregators ( &two_phase_num_io_procs ); if(-1 == two_phase_num_io_procs){ - ret = fh->f_set_aggregator_props ((struct mca_io_ompio_file_t *)fh, + ret = fh->f_set_aggregator_props ((struct mca_io_ompio_file_t *)fh, two_phase_num_io_procs, max_data); if ( OMPI_SUCCESS != ret){ - return ret; + goto exit; } - - two_phase_num_io_procs = fh->f_final_num_aggrs; - + + two_phase_num_io_procs = fh->f_final_num_aggrs; + } - + if (two_phase_num_io_procs > fh->f_size){ two_phase_num_io_procs = fh->f_size; } - + #if DEBUG_ON printf("Number of aggregators : %ld\n", two_phase_num_io_procs); #endif - + aggregator_list = (int *) malloc (two_phase_num_io_procs *sizeof(int)); if ( NULL == aggregator_list ) { - return OMPI_ERR_OUT_OF_RESOURCE; + ret = OMPI_ERR_OUT_OF_RESOURCE; + goto exit; } - + for (i =0; i< two_phase_num_io_procs; i++){ aggregator_list[i] = i * fh->f_size / two_phase_num_io_procs; } - - - ret = fh->f_generate_current_file_view ((struct mca_io_ompio_file_t*)fh, - max_data, - &iov, + + + ret = fh->f_generate_current_file_view ((struct mca_io_ompio_file_t*)fh, + max_data, + &iov, &local_count); - - + + if ( OMPI_SUCCESS != ret ){ goto exit; } - + long_max_data = (long) max_data; ret = fh->f_comm->c_coll.coll_allreduce (&long_max_data, &long_total_bytes, @@ -264,19 +271,19 @@ mca_fcoll_two_phase_file_write_all (mca_io_ompio_file_t *fh, MPI_SUM, fh->f_comm, fh->f_comm->c_coll.coll_allreduce_module); - + if ( OMPI_SUCCESS != ret ) { goto exit; } total_bytes = (size_t) long_total_bytes; - + if ( 0 == total_bytes ) { - free(aggregator_list); - return OMPI_SUCCESS; + ret = OMPI_SUCCESS; + goto exit; } - + if (!(fh->f_flags & OMPIO_CONTIGUOUS_MEMORY)) { - + /* This datastructre translates between OMPIO->ROMIO its a little hacky!*/ /* But helps to re-use romio's code for handling non-contiguous file-type*/ flat_buf = (Flatlist_node *)malloc(sizeof(Flatlist_node)); @@ -284,32 +291,32 @@ mca_fcoll_two_phase_file_write_all (mca_io_ompio_file_t *fh, ret = OMPI_ERR_OUT_OF_RESOURCE; goto exit; } - + flat_buf->type = datatype; flat_buf->next = NULL; flat_buf->count = 0; flat_buf->indices = NULL; flat_buf->blocklens = NULL; - + if ( 0 < count ) { local_size = OMPIO_MAX(1,iov_count/count); } else { local_size = 0; } - + if ( 0 < local_size ) { - flat_buf->indices = - (OMPI_MPI_OFFSET_TYPE *)malloc(local_size * + flat_buf->indices = + (OMPI_MPI_OFFSET_TYPE *)malloc(local_size * sizeof(OMPI_MPI_OFFSET_TYPE)); if ( NULL == flat_buf->indices ){ ret = OMPI_ERR_OUT_OF_RESOURCE; goto exit; - + } - - flat_buf->blocklens = - (OMPI_MPI_OFFSET_TYPE *)malloc(local_size * + + flat_buf->blocklens = + (OMPI_MPI_OFFSET_TYPE *)malloc(local_size * sizeof(OMPI_MPI_OFFSET_TYPE)); if ( NULL == flat_buf->blocklens ){ ret = OMPI_ERR_OUT_OF_RESOURCE; @@ -317,31 +324,27 @@ mca_fcoll_two_phase_file_write_all (mca_io_ompio_file_t *fh, } } flat_buf->count = local_size; - i=0;j=0; - while(j < local_size){ + for (j = 0 ; j < local_size ; ++j) { if ( 0 < max_data ) { - flat_buf->indices[j] = (OMPI_MPI_OFFSET_TYPE)(intptr_t)decoded_iov[i].iov_base; - flat_buf->blocklens[j] = decoded_iov[i].iov_len; + flat_buf->indices[j] = (OMPI_MPI_OFFSET_TYPE)(intptr_t)decoded_iov[j].iov_base; + flat_buf->blocklens[j] = decoded_iov[j].iov_len; } else { flat_buf->indices[j] = 0; flat_buf->blocklens[j] = 0; } - if(i < (int)iov_count) - i+=1; - j+=1; } - + #if DEBUG_ON printf("flat_buf_count : %d\n", flat_buf->count); for(i=0;icount;i++){ printf("%d: blocklen[%d] : %lld, indices[%d]: %lld \n", fh->f_rank, i, flat_buf->blocklens[i], i ,flat_buf->indices[i]); - + } #endif } - + #if DEBUG_ON printf("%d: fcoll:two_phase:write_all->total_bytes:%ld, local_count: %d\n", fh->f_rank,total_bytes, local_count); @@ -351,14 +354,14 @@ mca_fcoll_two_phase_file_write_all (mca_io_ompio_file_t *fh, (size_t)iov[i].iov_base, (size_t)iov[i].iov_len); } - - + + #endif - + start_offset = (OMPI_MPI_OFFSET_TYPE)(uintptr_t)iov[0].iov_base; if ( 0 < local_count ) { end_offset = (OMPI_MPI_OFFSET_TYPE)(uintptr_t)iov[local_count-1].iov_base + - (OMPI_MPI_OFFSET_TYPE)iov[local_count-1].iov_len - 1; + (OMPI_MPI_OFFSET_TYPE)iov[local_count-1].iov_len - 1; } else { end_offset = 0; @@ -369,26 +372,26 @@ mca_fcoll_two_phase_file_write_all (mca_io_ompio_file_t *fh, fh->f_rank, (size_t)start_offset, (size_t)end_offset); - + #endif - + start_offsets = (OMPI_MPI_OFFSET_TYPE *)malloc (fh->f_size*sizeof(OMPI_MPI_OFFSET_TYPE)); - + if ( NULL == start_offsets ){ ret = OMPI_ERR_OUT_OF_RESOURCE; - goto exit; + goto exit; } - + end_offsets = (OMPI_MPI_OFFSET_TYPE *)malloc (fh->f_size*sizeof(OMPI_MPI_OFFSET_TYPE)); - + if ( NULL == end_offsets ){ ret = OMPI_ERR_OUT_OF_RESOURCE; goto exit; } - - + + ret = fh->f_comm->c_coll.coll_allgather(&start_offset, 1, OMPI_OFFSET_DATATYPE, @@ -397,12 +400,12 @@ mca_fcoll_two_phase_file_write_all (mca_io_ompio_file_t *fh, OMPI_OFFSET_DATATYPE, fh->f_comm, fh->f_comm->c_coll.coll_allgather_module); - + if ( OMPI_SUCCESS != ret ){ goto exit; } - - + + ret = fh->f_comm->c_coll.coll_allgather(&end_offset, 1, OMPI_OFFSET_DATATYPE, @@ -411,12 +414,12 @@ mca_fcoll_two_phase_file_write_all (mca_io_ompio_file_t *fh, OMPI_OFFSET_DATATYPE, fh->f_comm, fh->f_comm->c_coll.coll_allgather_module); - - + + if ( OMPI_SUCCESS != ret ){ goto exit; } - + #if DEBUG_ON for (i=0;if_size;i++){ printf("%d: fcoll:two_phase:write_all:start[%d]:%ld,end[%d]:%ld\n", @@ -425,45 +428,45 @@ mca_fcoll_two_phase_file_write_all (mca_io_ompio_file_t *fh, (size_t)end_offsets[i]); } #endif - - - + + + for (i=1; if_size; i++){ - if ((start_offsets[i] < end_offsets[i-1]) && + if ((start_offsets[i] < end_offsets[i-1]) && (start_offsets[i] <= end_offsets[i])){ interleave_count++; } } - + #if DEBUG_ON printf("%d: fcoll:two_phase:write_all:interleave_count:%d\n", fh->f_rank,interleave_count); -#endif - - +#endif + + ret = mca_fcoll_two_phase_domain_partition(fh, start_offsets, end_offsets, &min_st_offset, &fd_start, &fd_end, - domain_size, + domain_size, &fd_size, striping_unit, two_phase_num_io_procs); if ( OMPI_SUCCESS != ret ){ goto exit; } - - + + #if DEBUG_ON for (i=0;if_full_print_queue(WRITE_PRINT_QUEUE)){ - fh->f_ompio_register_print_entry(WRITE_PRINT_QUEUE, + fh->f_register_print_entry(WRITE_PRINT_QUEUE, nentry); } #endif - -exit : + +exit : if (flat_buf != NULL) { - + if (flat_buf->blocklens != NULL) { free (flat_buf->blocklens); } - + if (flat_buf->indices != NULL) { free (flat_buf->indices); } free (flat_buf); - - } - - - - if (start_offsets != NULL) { - free(start_offsets); - } - - if (end_offsets != NULL){ - free(end_offsets); - } - if (aggregator_list != NULL){ - free(aggregator_list); + } - + + + free (start_offsets); + free (end_offsets); + free (aggregator_list); + free (decoded_iov); + free (fd_start); + free (fd_end); + free (others_req); + free (my_req); + free (buf_indices); + free (count_my_req_per_proc); + return ret; } static int two_phase_exch_and_write(mca_io_ompio_file_t *fh, - void *buf, + const void *buf, MPI_Datatype datatype, mca_io_ompio_access_array_t *others_req, struct iovec *offset_len, @@ -589,12 +591,12 @@ static int two_phase_exch_and_write(mca_io_ompio_file_t *fh, OMPI_MPI_OFFSET_TYPE *fd_end, Flatlist_node *flat_buf, size_t *buf_idx, int striping_unit, - int two_phase_num_io_procs, + int two_phase_num_io_procs, int *aggregator_list) - + { - - + + int i, j, ntimes, max_ntimes, m; int *curr_offlen_ptr=NULL, *count=NULL, *send_size=NULL, *recv_size=NULL; int *partial_recv=NULL, *start_pos=NULL, req_len, flag; @@ -610,13 +612,13 @@ static int two_phase_exch_and_write(mca_io_ompio_file_t *fh, #if DEBUG_ON int ii,jj; #endif - + char *write_buf=NULL; - - + + opal_datatype_type_size(&byte->super, &byte_size); - + for (i = 0; i < fh->f_size; i++){ if (others_req[i].count) { st_loc = others_req[i].offsets[0]; @@ -624,22 +626,22 @@ static int two_phase_exch_and_write(mca_io_ompio_file_t *fh, break; } } - + for (i=0;if_size;i++){ for(j=0;j< others_req[i].count; j++){ st_loc = OMPIO_MIN(st_loc, others_req[i].offsets[j]); end_loc = OMPIO_MAX(end_loc, (others_req[i].offsets[j] + others_req[i].lens[j] - 1)); - + } } - + fh->f_get_bytes_per_agg ( &two_phase_cycle_buffer_size ); - ntimes = (int) ((end_loc - st_loc + two_phase_cycle_buffer_size)/two_phase_cycle_buffer_size); - + ntimes = (int) ((end_loc - st_loc + two_phase_cycle_buffer_size)/two_phase_cycle_buffer_size); + if ((st_loc == -1) && (end_loc == -1)) { ntimes = 0; } - + fh->f_comm->c_coll.coll_allreduce (&ntimes, &max_ntimes, 1, @@ -647,82 +649,92 @@ static int two_phase_exch_and_write(mca_io_ompio_file_t *fh, MPI_MAX, fh->f_comm, fh->f_comm->c_coll.coll_allreduce_module); - + if (ntimes){ write_buf = (char *) malloc (two_phase_cycle_buffer_size); if ( NULL == write_buf ){ return OMPI_ERR_OUT_OF_RESOURCE; } } - - curr_offlen_ptr = (int *) calloc(fh->f_size, sizeof(int)); - + + curr_offlen_ptr = (int *) calloc(fh->f_size, sizeof(int)); + if ( NULL == curr_offlen_ptr ){ - return OMPI_ERR_OUT_OF_RESOURCE; + ret = OMPI_ERR_OUT_OF_RESOURCE; + goto exit; } - + count = (int *) malloc(fh->f_size*sizeof(int)); - + if ( NULL == count ){ - return OMPI_ERR_OUT_OF_RESOURCE; + ret = OMPI_ERR_OUT_OF_RESOURCE; + goto exit; } - + partial_recv = (int *)calloc(fh->f_size, sizeof(int)); - + if ( NULL == partial_recv ){ - return OMPI_ERR_OUT_OF_RESOURCE; + ret = OMPI_ERR_OUT_OF_RESOURCE; + goto exit; } - + send_size = (int *) calloc(fh->f_size,sizeof(int)); - + if ( NULL == send_size ){ - return OMPI_ERR_OUT_OF_RESOURCE; + ret = OMPI_ERR_OUT_OF_RESOURCE; + goto exit; } - + recv_size = (int *) calloc(fh->f_size,sizeof(int)); - + if ( NULL == recv_size ){ - return OMPI_ERR_OUT_OF_RESOURCE; + ret = OMPI_ERR_OUT_OF_RESOURCE; + goto exit; } - + send_buf_idx = (int *) malloc(fh->f_size*sizeof(int)); - + if ( NULL == send_buf_idx ){ - return OMPI_ERR_OUT_OF_RESOURCE; + ret = OMPI_ERR_OUT_OF_RESOURCE; + goto exit; } - + sent_to_proc = (int *) calloc(fh->f_size, sizeof(int)); - + if ( NULL == sent_to_proc){ - return OMPI_ERR_OUT_OF_RESOURCE; + ret = OMPI_ERR_OUT_OF_RESOURCE; + goto exit; } - + curr_to_proc = (int *) malloc(fh->f_size*sizeof(int)); - + if ( NULL == curr_to_proc ){ - return OMPI_ERR_OUT_OF_RESOURCE; + ret = OMPI_ERR_OUT_OF_RESOURCE; + goto exit; } - + done_to_proc = (int *) malloc(fh->f_size*sizeof(int)); - + if ( NULL == done_to_proc ){ - return OMPI_ERR_OUT_OF_RESOURCE; + ret = OMPI_ERR_OUT_OF_RESOURCE; + goto exit; } - + start_pos = (int *) malloc(fh->f_size*sizeof(int)); - + if ( NULL == start_pos ){ - return OMPI_ERR_OUT_OF_RESOURCE; + ret = OMPI_ERR_OUT_OF_RESOURCE; + goto exit; } - - + + done = 0; off = st_loc; - + ompi_datatype_type_extent(datatype, &buftype_extent); for (m=0;m f_size; i++) count[i] = recv_size[i] = 0; - + size = OMPIO_MIN((unsigned)two_phase_cycle_buffer_size, end_loc-st_loc+1-done); for (i=0;if_size;i++){ @@ -733,7 +745,7 @@ static int two_phase_exch_and_write(mca_io_ompio_file_t *fh, /* this request may have been partially satisfied in the previous iteration. */ req_off = others_req[i].offsets[j] + - partial_recv[i]; + partial_recv[i]; req_len = others_req[i].lens[j] - partial_recv[i]; partial_recv[i] = 0; @@ -754,17 +766,17 @@ static int two_phase_exch_and_write(mca_io_ompio_file_t *fh, size,i, count[i]); #endif - MPI_Address(write_buf+req_off-off, - &(others_req[i].mem_ptrs[j])); + PMPI_Address(write_buf+req_off-off, + &(others_req[i].mem_ptrs[j])); #if DEBUG_ON printf("%d : mem_ptrs : %ld\n", fh->f_rank, others_req[i].mem_ptrs[j]); #endif recv_size[i] += (int) (OMPIO_MIN(off + size - req_off, (unsigned)req_len)); - + if (off+size-req_off < (unsigned)req_len){ - + partial_recv[i] = (int)(off + size - req_off); break; } @@ -773,8 +785,8 @@ static int two_phase_exch_and_write(mca_io_ompio_file_t *fh, } curr_offlen_ptr[i] = j; } - } - + } + ret = two_phase_exchage_data(fh, buf, write_buf, offset_len,send_size, start_pos,recv_size,off,size, @@ -784,29 +796,29 @@ static int two_phase_exch_and_write(mca_io_ompio_file_t *fh, fd_size, fd_start, fd_end, flat_buf, others_req, send_buf_idx, curr_to_proc, - done_to_proc, m, buf_idx, + done_to_proc, m, buf_idx, buftype_extent, striping_unit, two_phase_num_io_procs, aggregator_list, &hole); - + if ( OMPI_SUCCESS != ret ){ goto exit; } - - - + + + flag = 0; for (i=0; if_size; i++) if (count[i]) flag = 1; - - - + + + if (flag){ - -#if TIME_BREAKDOWN + +#if OMPIO_FCOLL_WANT_TIME_BREAKDOWN start_write_time = MPI_Wtime(); #endif - + #if DEBUG_ON printf("rank : %d enters writing\n", fh->f_rank); printf("size : %ld, off : %ld\n",size, off); @@ -815,18 +827,19 @@ static int two_phase_exch_and_write(mca_io_ompio_file_t *fh, } #endif len = size * byte_size; - fh->f_io_array = (mca_io_ompio_io_array_t *)malloc + fh->f_io_array = (mca_io_ompio_io_array_t *)malloc (sizeof(mca_io_ompio_io_array_t)); if (NULL == fh->f_io_array) { opal_output(1, "OUT OF MEMORY\n"); - return OMPI_ERR_OUT_OF_RESOURCE; + ret = OMPI_ERR_OUT_OF_RESOURCE; + goto exit; } - + fh->f_io_array[0].offset =(IOVBASE_TYPE *)(intptr_t) off; fh->f_io_array[0].length = len; fh->f_io_array[0].memory_address = write_buf; fh->f_num_of_io_entries = 1; - + #if DEBUG_ON for (i=0 ; if_num_of_io_entries ; i++) { printf("%d: ADDRESS: %p OFFSET: %ld LENGTH: %d\n", @@ -836,19 +849,20 @@ static int two_phase_exch_and_write(mca_io_ompio_file_t *fh, fh->f_io_array[i].length); } #endif - + if (fh->f_num_of_io_entries){ if ( 0 > fh->f_fbtl->fbtl_pwritev (fh)) { opal_output(1, "WRITE FAILED\n"); - return OMPI_ERROR; + ret = OMPI_ERROR; + goto exit; } } -#if TIME_BREAKDOWN +#if OMPIO_FCOLL_WANT_TIME_BREAKDOWN end_write_time = MPI_Wtime(); write_time += (end_write_time - start_write_time); #endif - - + + } /***************** DONE WRITING *****************************************/ /****RESET **********************/ @@ -857,10 +871,10 @@ static int two_phase_exch_and_write(mca_io_ompio_file_t *fh, free (fh->f_io_array); fh->f_io_array = NULL; } - + off += size; done += size; - + } for (i=0; if_size; i++) count[i] = recv_size[i] = 0; for (m=ntimes; mf_comm->c_coll.coll_alltoall (recv_size, @@ -963,7 +953,7 @@ static int two_phase_exchage_data(mca_io_ompio_file_t *fh, MPI_INT, fh->f_comm, fh->f_comm->c_coll.coll_alltoall_module); - + if ( OMPI_SUCCESS != ret ){ return ret; } @@ -974,22 +964,23 @@ static int two_phase_exchage_data(mca_io_ompio_file_t *fh, nprocs_recv++; } } - - + + recv_types = (ompi_datatype_t **) - malloc (( nprocs_recv + 1 ) * sizeof(ompi_datatype_t *)); - + calloc (( nprocs_recv + 1 ), sizeof(ompi_datatype_t *)); + if ( NULL == recv_types ){ ret = OMPI_ERR_OUT_OF_RESOURCE; goto exit; } - - tmp_len = (int *) malloc(fh->f_size*sizeof(int)); - + + tmp_len = (int *) calloc(fh->f_size, sizeof(int)); + if ( NULL == tmp_len ) { - return OMPI_ERR_OUT_OF_RESOURCE; + ret = OMPI_ERR_OUT_OF_RESOURCE; + goto exit; } - + j = 0; for (i=0;if_size;i++){ if (recv_size[i]) { @@ -998,47 +989,46 @@ static int two_phase_exchage_data(mca_io_ompio_file_t *fh, tmp_len[i] = others_req[i].lens[k]; others_req[i].lens[k] = partial_recv[i]; } - ompi_datatype_create_hindexed(count[i], + ompi_datatype_create_hindexed(count[i], &(others_req[i].lens[start_pos[i]]), - &(others_req[i].mem_ptrs[start_pos[i]]), + &(others_req[i].mem_ptrs[start_pos[i]]), MPI_BYTE, recv_types+j); ompi_datatype_commit(recv_types+j); j++; } } - + sum = 0; for (i=0;if_size;i++) sum += count[i]; - srt_off = (OMPI_MPI_OFFSET_TYPE *) + srt_off = (OMPI_MPI_OFFSET_TYPE *) malloc((sum+1)*sizeof(OMPI_MPI_OFFSET_TYPE)); - + if ( NULL == srt_off ){ ret = OMPI_ERR_OUT_OF_RESOURCE; - free(tmp_len); goto exit; } - + srt_len = (int *) malloc((sum+1)*sizeof(int)); - + if ( NULL == srt_len ) { ret = OMPI_ERR_OUT_OF_RESOURCE; - free(tmp_len); free(srt_off); goto exit; } - - + + two_phase_heap_merge(others_req, count, srt_off, srt_len, start_pos, fh->f_size,fh->f_rank, nprocs_recv, sum); - - - for (i=0; if_size; i++) + + + for (i=0; if_size; i++) if (partial_recv[i]) { k = start_pos[i] + count[i] - 1; others_req[i].lens[k] = tmp_len[i]; } - - free(tmp_len); - + + free(tmp_len); + tmp_len = NULL; + *hole = 0; if (off != srt_off[0]){ *hole = 1; @@ -1047,7 +1037,7 @@ static int two_phase_exchage_data(mca_io_ompio_file_t *fh, for (i=1;i srt_len[0]) + if(new_len > srt_len[0]) srt_len[0] = new_len; } else @@ -1056,19 +1046,20 @@ static int two_phase_exchage_data(mca_io_ompio_file_t *fh, if (i < sum || size != srt_len[0]) *hole = 1; } - - + + free(srt_off); free(srt_len); - + if (nprocs_recv){ if (*hole){ if (off > 0){ - fh->f_io_array = (mca_io_ompio_io_array_t *)malloc + fh->f_io_array = (mca_io_ompio_io_array_t *)malloc (sizeof(mca_io_ompio_io_array_t)); if (NULL == fh->f_io_array) { opal_output(1, "OUT OF MEMORY\n"); - return OMPI_ERR_OUT_OF_RESOURCE; + ret = OMPI_ERR_OUT_OF_RESOURCE; + goto exit; } fh->f_io_array[0].offset =(IOVBASE_TYPE *)(intptr_t) off; fh->f_num_of_io_entries = 1; @@ -1077,10 +1068,11 @@ static int two_phase_exchage_data(mca_io_ompio_file_t *fh, if (fh->f_num_of_io_entries){ if ( 0 > fh->f_fbtl->fbtl_preadv (fh)) { opal_output(1, "READ FAILED\n"); - return OMPI_ERROR; + ret = OMPI_ERROR; + goto exit; } } - + } fh->f_num_of_io_entries = 0; if (NULL != fh->f_io_array) { @@ -1089,21 +1081,22 @@ static int two_phase_exchage_data(mca_io_ompio_file_t *fh, } } } - + nprocs_send = 0; for (i=0; i f_size; i++) if (send_size[i]) nprocs_send++; - + #if DEBUG_ON printf("%d : nprocs_send : %d\n", fh->f_rank,nprocs_send); #endif - - requests = (MPI_Request *) - malloc((nprocs_send+nprocs_recv+1)*sizeof(MPI_Request)); - + + requests = (MPI_Request *) + malloc((nprocs_send+nprocs_recv+1)*sizeof(MPI_Request)); + if ( NULL == requests ){ - return OMPI_ERR_OUT_OF_RESOURCE; + ret = OMPI_ERR_OUT_OF_RESOURCE; + goto exit; } - + j = 0; for (i=0; if_size; i++) { if (recv_size[i]) { @@ -1114,7 +1107,7 @@ static int two_phase_exchage_data(mca_io_ompio_file_t *fh, fh->f_rank+i+100*iter, fh->f_comm, requests+j)); - + if ( OMPI_SUCCESS != ret ){ goto exit; } @@ -1122,31 +1115,31 @@ static int two_phase_exchage_data(mca_io_ompio_file_t *fh, } } send_req = requests + nprocs_recv; - - + + if (fh->f_flags & OMPIO_CONTIGUOUS_MEMORY) { j = 0; - for (i=0; i f_size; i++) + for (i=0; i f_size; i++) if (send_size[i]) { ret = MCA_PML_CALL(isend(((char *) buf) + buf_idx[i], send_size[i], MPI_BYTE, i, fh->f_rank+i+100*iter, - MCA_PML_BASE_SEND_STANDARD, + MCA_PML_BASE_SEND_STANDARD, fh->f_comm, - send_req+j)); - + send_req+j)); + if ( OMPI_SUCCESS != ret ){ goto exit; } - + j++; buf_idx[i] += send_size[i]; } } else if(nprocs_send && (!(fh->f_flags & OMPIO_CONTIGUOUS_MEMORY))){ - send_buf = (char **) malloc(fh->f_size*sizeof(char*)); + send_buf = (char **) calloc (fh->f_size, sizeof(char*)); if ( NULL == send_buf ){ ret = OMPI_ERR_OUT_OF_RESOURCE; goto exit; @@ -1154,51 +1147,60 @@ static int two_phase_exchage_data(mca_io_ompio_file_t *fh, for (i=0; i < fh->f_size; i++){ if (send_size[i]) { send_buf[i] = (char *) malloc(send_size[i]); - + if ( NULL == send_buf[i] ){ ret = OMPI_ERR_OUT_OF_RESOURCE; goto exit; } } } - + ret = two_phase_fill_send_buffer(fh, buf,flat_buf, send_buf, offset_length, send_size, send_req,sent_to_proc, - contig_access_count, + contig_access_count, min_st_offset, fd_size, fd_start, fd_end, send_buf_idx, curr_to_proc, done_to_proc, iter, buftype_extent, striping_unit, two_phase_num_io_procs, aggregator_list); - + if ( OMPI_SUCCESS != ret ){ goto exit; } } - - - for (i=0; if_size; i++){ + free (send_buf[i]); + } + + free (send_buf); + } + free (tmp_len); + return ret; } @@ -1254,23 +1256,23 @@ static int two_phase_exchage_data(mca_io_ompio_file_t *fh, static int two_phase_fill_send_buffer(mca_io_ompio_file_t *fh, - void *buf, + const void *buf, Flatlist_node *flat_buf, char **send_buf, struct iovec *offset_length, int *send_size, MPI_Request *requests, int *sent_to_proc, - int contig_access_count, + int contig_access_count, OMPI_MPI_OFFSET_TYPE min_st_offset, OMPI_MPI_OFFSET_TYPE fd_size, OMPI_MPI_OFFSET_TYPE *fd_start, OMPI_MPI_OFFSET_TYPE *fd_end, int *send_buf_idx, - int *curr_to_proc, + int *curr_to_proc, int *done_to_proc, int iter, MPI_Aint buftype_extent, - int striping_unit, int two_phase_num_io_procs, + int striping_unit, int two_phase_num_io_procs, int *aggregator_list) { @@ -1284,7 +1286,7 @@ static int two_phase_fill_send_buffer(mca_io_ompio_file_t *fh, done_to_proc[i] = sent_to_proc[i]; } jj = 0; - + flat_buf_idx = 0; n_buftypes = 0; if ( flat_buf->count > 0 ) { @@ -1292,11 +1294,11 @@ static int two_phase_fill_send_buffer(mca_io_ompio_file_t *fh, flat_buf_sz = flat_buf->blocklens[0]; } - for (i=0; i done_to_proc[p]) { if (done_to_proc[p] > curr_to_proc[p]) { - size = OMPIO_MIN(curr_to_proc[p] + len - + size = OMPIO_MIN(curr_to_proc[p] + len - done_to_proc[p], send_size[p]-send_buf_idx[p]); buf_incr = done_to_proc[p] - curr_to_proc[p]; TWO_PHASE_BUF_INCR @@ -1335,10 +1337,10 @@ static int two_phase_fill_send_buffer(mca_io_ompio_file_t *fh, MPI_BYTE, p, fh->f_rank+p+100*iter, - MCA_PML_BASE_SEND_STANDARD, + MCA_PML_BASE_SEND_STANDARD, fh->f_comm, - requests+jj)); - + requests+jj)); + if ( OMPI_SUCCESS != ret ){ return ret; } @@ -1367,14 +1369,14 @@ static int two_phase_fill_send_buffer(mca_io_ompio_file_t *fh, return ret; } - - + + void two_phase_heap_merge( mca_io_ompio_access_array_t *others_req, - int *count, + int *count, OMPI_MPI_OFFSET_TYPE *srt_off, int *srt_len, int *start_pos, @@ -1413,12 +1415,12 @@ void two_phase_heap_merge( mca_io_ompio_access_array_t *others_req, for(;;) { l = 2*(k+1) - 1; r = 2*(k+1); - if ((l < heapsize) && + if ((l < heapsize) && (*(a[l].off_list) < *(a[k].off_list))) smallest = l; else smallest = k; - if ((r < heapsize) && + if ((r < heapsize) && (*(a[r].off_list) < *(a[smallest].off_list))) smallest = r; @@ -1430,11 +1432,11 @@ void two_phase_heap_merge( mca_io_ompio_access_array_t *others_req, a[k].off_list = a[smallest].off_list; a[k].len_list = a[smallest].len_list; a[k].nelem = a[smallest].nelem; - + a[smallest].off_list = tmp.off_list; a[smallest].len_list = tmp.len_list; a[smallest].nelem = tmp.nelem; - + k = smallest; } else break; @@ -1465,12 +1467,12 @@ void two_phase_heap_merge( mca_io_ompio_access_array_t *others_req, l = 2*(k+1) - 1; r = 2*(k+1); - if ((l < heapsize) && + if ((l < heapsize) && (*(a[l].off_list) < *(a[k].off_list))) smallest = l; else smallest = k; - if ((r < heapsize) && + if ((r < heapsize) && (*(a[r].off_list) < *(a[smallest].off_list))) smallest = r; @@ -1482,11 +1484,11 @@ void two_phase_heap_merge( mca_io_ompio_access_array_t *others_req, a[k].off_list = a[smallest].off_list; a[k].len_list = a[smallest].len_list; a[k].nelem = a[smallest].nelem; - + a[smallest].off_list = tmp.off_list; a[smallest].len_list = tmp.len_list; a[smallest].nelem = tmp.nelem; - + k = smallest; } else break; @@ -1494,11 +1496,11 @@ void two_phase_heap_merge( mca_io_ompio_access_array_t *others_req, } free(a); } -#if TIME_BREAKDOWN -int is_aggregator(int rank, +#if OMPIO_FCOLL_WANT_TIME_BREAKDOWN +int is_aggregator(int rank, int nprocs_for_coll, int *aggregator_list){ - + int i=0; for (i=0; i -/*Based on ROMIO's domain partitioning implementaion +/*Based on ROMIO's domain partitioning implementaion Series of functions implementations for two-phase implementation Functions to support Domain partitioning and aggregator selection for two_phase . @@ -48,7 +51,7 @@ int mca_fcoll_two_phase_domain_partition (mca_io_ompio_file_t *fh, OMPI_MPI_OFFSET_TYPE *fd_size_ptr, int striping_unit, int nprocs_for_coll){ - + OMPI_MPI_OFFSET_TYPE min_st_offset, max_end_offset, *fd_start=NULL, *fd_end=NULL, fd_size; int i; @@ -58,57 +61,57 @@ int mca_fcoll_two_phase_domain_partition (mca_io_ompio_file_t *fh, for (i=0; i< fh->f_size; i++){ min_st_offset = OMPIO_MIN(min_st_offset, start_offsets[i]); max_end_offset = OMPIO_MAX(max_end_offset, end_offsets[i]); - + } - fd_size = ((max_end_offset - min_st_offset + 1) + nprocs_for_coll - 1)/nprocs_for_coll; - + fd_size = ((max_end_offset - min_st_offset + 1) + nprocs_for_coll - 1)/nprocs_for_coll; + if (fd_size < min_fd_size) fd_size = min_fd_size; - + *fd_st_ptr = (OMPI_MPI_OFFSET_TYPE *) - malloc(nprocs_for_coll*sizeof(OMPI_MPI_OFFSET_TYPE)); + malloc(nprocs_for_coll*sizeof(OMPI_MPI_OFFSET_TYPE)); if ( NULL == *fd_st_ptr ) { return OMPI_ERR_OUT_OF_RESOURCE; } *fd_end_ptr = (OMPI_MPI_OFFSET_TYPE *) - malloc(nprocs_for_coll*sizeof(OMPI_MPI_OFFSET_TYPE)); + malloc(nprocs_for_coll*sizeof(OMPI_MPI_OFFSET_TYPE)); if ( NULL == *fd_end_ptr ) { return OMPI_ERR_OUT_OF_RESOURCE; } - + fd_start = *fd_st_ptr; fd_end = *fd_end_ptr; - - + + if (striping_unit > 0){ /* Lock Boundary based domain partitioning */ int rem_front, rem_back; OMPI_MPI_OFFSET_TYPE end_off; - + fd_start[0] = min_st_offset; end_off = fd_start[0] + fd_size; rem_front = end_off % striping_unit; rem_back = striping_unit - rem_front; - if (rem_front < rem_back) + if (rem_front < rem_back) end_off -= rem_front; - else + else end_off += rem_back; fd_end[0] = end_off - 1; - + /* align fd_end[i] to the nearest file lock boundary */ for (i=1; i max_end_offset) fd_start[i] = fd_end[i] = -1; if (fd_end[i] > max_end_offset) fd_end[i] = max_end_offset; } - + *fd_size_ptr = fd_size; *min_st_offset_ptr = min_st_offset; - + return OMPI_SUCCESS; } int mca_fcoll_two_phase_calc_aggregator(mca_io_ompio_file_t *fh, - OMPI_MPI_OFFSET_TYPE off, + OMPI_MPI_OFFSET_TYPE off, OMPI_MPI_OFFSET_TYPE min_off, OMPI_MPI_OFFSET_TYPE *len, OMPI_MPI_OFFSET_TYPE fd_size, OMPI_MPI_OFFSET_TYPE *fd_start, OMPI_MPI_OFFSET_TYPE *fd_end, int striping_unit, - int num_aggregators, + int num_aggregators, int *aggregator_list) { int rank_index, rank; OMPI_MPI_OFFSET_TYPE avail_bytes; - + rank_index = (int) ((off - min_off + fd_size)/ fd_size - 1); - + if (striping_unit > 0){ rank_index = 0; while (off > fd_end[rank_index]) rank_index++; } - + if (rank_index >= num_aggregators || rank_index < 0) { fprintf(stderr, @@ -170,7 +173,7 @@ int mca_fcoll_two_phase_calc_aggregator(mca_io_ompio_file_t *fh, fprintf(stderr, "rank_index(%d) >= num_aggregators(%d)fd_size=%lld off=%lld\n", rank_index,num_aggregators,fd_size,off); - MPI_Abort(MPI_COMM_WORLD, 1); + ompi_mpi_abort(&ompi_mpi_comm_world.comm, 1); } @@ -180,11 +183,11 @@ int mca_fcoll_two_phase_calc_aggregator(mca_io_ompio_file_t *fh, } rank = aggregator_list[rank_index]; - + #if 0 printf("rank : %d, rank_index : %d\n",rank, rank_index); #endif - + return rank; } @@ -195,44 +198,44 @@ int mca_fcoll_two_phase_calc_others_requests(mca_io_ompio_file_t *fh, int *count_others_req_procs_ptr, mca_io_ompio_access_array_t **others_req_ptr) { - + int *count_others_req_per_proc=NULL, count_others_req_procs; int i,j, ret=OMPI_SUCCESS; MPI_Request *requests=NULL; mca_io_ompio_access_array_t *others_req=NULL; - + count_others_req_per_proc = (int *)malloc(fh->f_size*sizeof(int)); if ( NULL == count_others_req_per_proc ) { return OMPI_ERR_OUT_OF_RESOURCE; } - + /* Change it to the ompio specific alltoall in coll module : VVN*/ ret = fh->f_comm->c_coll.coll_alltoall (count_my_req_per_proc, 1, MPI_INT, - count_others_req_per_proc, + count_others_req_per_proc, 1, MPI_INT, - fh->f_comm, + fh->f_comm, fh->f_comm->c_coll.coll_alltoall_module); if ( OMPI_SUCCESS != ret ) { return ret; } - + #if 0 for( i = 0; i< fh->f_size; i++){ printf("my: %d, others: %d\n",count_my_req_per_proc[i], count_others_req_per_proc[i]); - + } #endif - *others_req_ptr = (mca_io_ompio_access_array_t *) malloc - (fh->f_size*sizeof(mca_io_ompio_access_array_t)); + *others_req_ptr = (mca_io_ompio_access_array_t *) malloc + (fh->f_size*sizeof(mca_io_ompio_access_array_t)); others_req = *others_req_ptr; - + count_others_req_procs = 0; for (i=0; if_size; i++) { if (count_others_req_per_proc[i]) { @@ -242,23 +245,23 @@ int mca_fcoll_two_phase_calc_others_requests(mca_io_ompio_file_t *fh, others_req[i].lens = (int *) malloc(count_others_req_per_proc[i]*sizeof(int)); others_req[i].mem_ptrs = (MPI_Aint *) - malloc(count_others_req_per_proc[i]*sizeof(MPI_Aint)); + malloc(count_others_req_per_proc[i]*sizeof(MPI_Aint)); count_others_req_procs++; } else others_req[i].count = 0; } - - + + requests = (MPI_Request *) malloc(1+2*(count_my_req_procs+count_others_req_procs)* - sizeof(MPI_Request)); + sizeof(MPI_Request)); if ( NULL == requests ) { ret = OMPI_ERR_OUT_OF_RESOURCE; goto exit; } - + j = 0; for (i=0; if_size; i++){ if (others_req[i].count){ @@ -272,7 +275,7 @@ int mca_fcoll_two_phase_calc_others_requests(mca_io_ompio_file_t *fh, if ( OMPI_SUCCESS != ret ) { goto exit; } - + j++; ret = MCA_PML_CALL(irecv(others_req[i].lens, @@ -285,7 +288,7 @@ int mca_fcoll_two_phase_calc_others_requests(mca_io_ompio_file_t *fh, if ( OMPI_SUCCESS != ret ) { goto exit; } - + j++; } } @@ -298,9 +301,9 @@ int mca_fcoll_two_phase_calc_others_requests(mca_io_ompio_file_t *fh, OMPI_OFFSET_DATATYPE, i, i+fh->f_rank, - MCA_PML_BASE_SEND_STANDARD, + MCA_PML_BASE_SEND_STANDARD, fh->f_comm, - &requests[j])); + &requests[j])); if ( OMPI_SUCCESS != ret ) { goto exit; } @@ -311,17 +314,17 @@ int mca_fcoll_two_phase_calc_others_requests(mca_io_ompio_file_t *fh, MPI_INT, i, i+fh->f_rank+1, - MCA_PML_BASE_SEND_STANDARD, + MCA_PML_BASE_SEND_STANDARD, fh->f_comm, - &requests[j])); + &requests[j])); if ( OMPI_SUCCESS != ret ) { goto exit; } - + j++; } } - + if (j) { ret = ompi_request_wait_all ( j, requests, MPI_STATUSES_IGNORE ); if ( OMPI_SUCCESS != ret ) { @@ -365,43 +368,43 @@ int mca_fcoll_two_phase_calc_my_requests (mca_io_ompio_file_t *fh, int i, l, proc; OMPI_MPI_OFFSET_TYPE fd_len, rem_len, curr_idx, off; mca_io_ompio_access_array_t *my_req = NULL; - - - *count_my_req_per_proc_ptr = (int*)malloc(fh->f_size*sizeof(int)); - - if ( NULL == count_my_req_per_proc_ptr ){ + + + *count_my_req_per_proc_ptr = (int*)malloc(fh->f_size*sizeof(int)); + + if ( NULL == *count_my_req_per_proc_ptr ){ return OMPI_ERR_OUT_OF_RESOURCE; } count_my_req_per_proc = *count_my_req_per_proc_ptr; - + for (i=0;if_size;i++){ count_my_req_per_proc[i] = 0; } - + buf_idx = (size_t *) malloc (fh->f_size * sizeof(size_t)); - - if ( NULL == buf_idx ){ + + if ( NULL == buf_idx ){ return OMPI_ERR_OUT_OF_RESOURCE; } - + for (i=0; i < fh->f_size; i++) buf_idx[i] = -1; - + for (i=0;if_size : %d\n", fh->f_rank,fh->f_size);*/ *my_req_ptr = (mca_io_ompio_access_array_t *) malloc (fh->f_size * sizeof(mca_io_ompio_access_array_t)); @@ -419,13 +422,13 @@ int mca_fcoll_two_phase_calc_my_requests (mca_io_ompio_file_t *fh, goto err_exit; } my_req = *my_req_ptr; - + count_my_req_procs = 0; for (i = 0; i < fh->f_size; i++){ if(count_my_req_per_proc[i]) { my_req[i].offsets = (OMPI_MPI_OFFSET_TYPE *) malloc(count_my_req_per_proc[i] * sizeof(OMPI_MPI_OFFSET_TYPE)); - + if ( NULL == my_req[i].offsets ) { ret = OMPI_ERR_OUT_OF_RESOURCE; goto err_exit; @@ -440,10 +443,10 @@ int mca_fcoll_two_phase_calc_my_requests (mca_io_ompio_file_t *fh, } count_my_req_procs++; } - my_req[i].count = 0; + my_req[i].count = 0; } curr_idx = 0; - for (i=0; if_size; i++) { if (count_my_req_per_proc[i] > 0) { - fprintf(stdout, "data needed from %d (count = %d):\n", i, + fprintf(stdout, "data needed from %d (count = %d):\n", i, my_req[i].count); for (l=0; l < my_req[i].count; l++) { fprintf(stdout, " %d: off[%d] = %lld, len[%d] = %d\n", fh->f_rank, l, @@ -500,11 +503,11 @@ int mca_fcoll_two_phase_calc_my_requests (mca_io_ompio_file_t *fh, } } #endif - - + + *count_my_req_procs_ptr = count_my_req_procs; *buf_indices = buf_idx; - + return ret; err_exit: if (NULL != my_req) { diff --git a/ompi/mca/fs/Makefile.am b/ompi/mca/fs/Makefile.am index 862877e5d7e..0eb05d672f3 100644 --- a/ompi/mca/fs/Makefile.am +++ b/ompi/mca/fs/Makefile.am @@ -5,16 +5,16 @@ # Copyright (c) 2004-2005 The University of Tennessee and The University # of Tennessee Research Foundation. All rights # reserved. -# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, # University of Stuttgart. All rights reserved. # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. # Copyright (c) 2008-2011 University of Houston. All rights reserved. # Copyright (c) 2010 Cisco Systems, Inc. All rights reserved. # $COPYRIGHT$ -# +# # Additional copyrights may follow -# +# # $HEADER$ # diff --git a/ompi/mca/fs/base/Makefile.am b/ompi/mca/fs/base/Makefile.am index bcb8422ea0f..e578dd73085 100644 --- a/ompi/mca/fs/base/Makefile.am +++ b/ompi/mca/fs/base/Makefile.am @@ -5,15 +5,15 @@ # Copyright (c) 2004-2005 The University of Tennessee and The University # of Tennessee Research Foundation. All rights # reserved. -# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, # University of Stuttgart. All rights reserved. # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. # Copyright (c) 2008-2011 University of Houston. All rights reserved. # $COPYRIGHT$ -# +# # Additional copyrights may follow -# +# # $HEADER$ # diff --git a/ompi/mca/fs/base/base.h b/ompi/mca/fs/base/base.h index 7c3f7b1991e..b83c93890a0 100644 --- a/ompi/mca/fs/base/base.h +++ b/ompi/mca/fs/base/base.h @@ -5,20 +5,20 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2008 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2008-2011 University of Houston. All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ -/** +/** * @file * * MCA fs base framework public interface functions. @@ -48,6 +48,7 @@ OMPI_DECLSPEC int mca_fs_base_init_file (struct mca_io_ompio_file_t *file); OMPI_DECLSPEC int mca_fs_base_get_param (struct mca_io_ompio_file_t *file, int keyval); OMPI_DECLSPEC void mca_fs_base_get_parent_dir (char *filename, char **dirnamep); +OMPI_DECLSPEC int mca_fs_base_get_fstype(char *fname); /* * Globals */ diff --git a/ompi/mca/fs/base/fs_base_file_select.c b/ompi/mca/fs/base/fs_base_file_select.c index 41ea29275a3..6d91087b7da 100644 --- a/ompi/mca/fs/base/fs_base_file_select.c +++ b/ompi/mca/fs/base/fs_base_file_select.c @@ -5,15 +5,15 @@ * Copyright (c) 2004-2011 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2008-2011 University of Houston. All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -30,7 +30,7 @@ #include "ompi/mca/io/ompio/io_ompio.h" /* - * This structure is needed so that we can close the modules + * This structure is needed so that we can close the modules * which are not selected but were opened. mca_base_modules_close * which does this job for us requires a opal_list_t which contains * these modules @@ -58,20 +58,20 @@ static OBJ_CLASS_INSTANCE(queried_module_t, opal_list_item_t, NULL, NULL); * 4. Select the module with the highest priority * 5. Call the init function on the selected module so that it does the * right setup for the file - * 6. Call finalize on all the other modules which returned + * 6. Call finalize on all the other modules which returned * their module but were unfortunate to not get selected - */ + */ int mca_fs_base_file_select (struct mca_io_ompio_file_t *file, - mca_base_component_t *preferred) + mca_base_component_t *preferred) { - int priority; - int best_priority; + int priority; + int best_priority; opal_list_item_t *item; mca_base_component_list_item_t *cli; - mca_fs_base_component_t *component; + mca_fs_base_component_t *component; mca_fs_base_component_t *best_component; - mca_fs_base_module_t *module; + mca_fs_base_module_t *module; opal_list_t queried; queried_module_t *om; int err = MPI_SUCCESS; @@ -80,19 +80,19 @@ int mca_fs_base_file_select (struct mca_io_ompio_file_t *file, provided then it should be used (if possible) */ if (NULL != preferred) { - + /* We have a preferred component. Check if it is available and if so, whether it wants to run */ opal_output_verbose(10, ompi_fs_base_framework.framework_output, "fs:base:file_select: Checking preferred component: %s", preferred->mca_component_name); - - /* query the component for its priority and get its module + + /* query the component for its priority and get its module structure. This is necessary to proceed */ - + component = (mca_fs_base_component_t *)preferred; module = component->fsm_file_query (file, &priority); - if (NULL != module && + if (NULL != module && NULL != module->fs_module_init) { /* this query seems to have returned something legitimate @@ -105,7 +105,7 @@ int mca_fs_base_file_select (struct mca_io_ompio_file_t *file, file->f_fs_component = preferred; return module->fs_module_init(file); - } + } /* His preferred component is present, but is unable to * run. This is not a good sign. We should try selecting * some other component We let it fall through and select @@ -122,7 +122,7 @@ int mca_fs_base_file_select (struct mca_io_ompio_file_t *file, * All we need to do is to go through the list of available * components and find the one which has the highest priority and * use that for this file - */ + */ best_component = NULL; best_priority = -1; @@ -144,21 +144,21 @@ int mca_fs_base_file_select (struct mca_io_ompio_file_t *file, } else { /* * call the query function and see what it returns - */ + */ module = component->fsm_file_query (file, &priority); if (NULL == module || NULL == module->fs_module_init) { /* * query did not return any action which can be used - */ + */ opal_output_verbose(10, ompi_fs_base_framework.framework_output, "select: query returned failure"); } else { opal_output_verbose(10, ompi_fs_base_framework.framework_output, "select: query returned priority %d", priority); - /* + /* * is this the best component we have found till now? */ if (priority > best_priority) { @@ -175,8 +175,8 @@ int mca_fs_base_file_select (struct mca_io_ompio_file_t *file, return OMPI_ERR_OUT_OF_RESOURCE; } om->om_component = component; - om->om_module = module; - opal_list_append(&queried, (opal_list_item_t *)om); + om->om_module = module; + opal_list_append(&queried, (opal_list_item_t *)om); } /* end else of if (NULL == module) */ } /* end else of if (NULL == component->fsm_init) */ } /* end for ... end of traversal */ @@ -202,7 +202,7 @@ int mca_fs_base_file_select (struct mca_io_ompio_file_t *file, * returned their priorities from the query. We now have to * unquery() those components which have not been selected and * init() the component which was selected - */ + */ while (NULL != (item = opal_list_remove_first(&queried))) { om = (queried_module_t *) item; if (om->om_component == best_component) { @@ -214,7 +214,7 @@ int mca_fs_base_file_select (struct mca_io_ompio_file_t *file, * defined. Whereever a function pointer is null in the * module structure we need to fill it in with the base * structure function pointers. This is yet to be done - */ + */ /* * We don return here coz we still need to go through and @@ -243,7 +243,7 @@ int mca_fs_base_file_select (struct mca_io_ompio_file_t *file, } /* if not best component */ OBJ_RELEASE(om); } /* traversing through the entire list */ - + opal_output_verbose(10, ompi_fs_base_framework.framework_output, "select: component %s selected", best_component->fsm_version.mca_component_name); diff --git a/ompi/mca/fs/base/fs_base_file_unselect.c b/ompi/mca/fs/base/fs_base_file_unselect.c index 73859b698c7..1252bc24976 100644 --- a/ompi/mca/fs/base/fs_base_file_unselect.c +++ b/ompi/mca/fs/base/fs_base_file_unselect.c @@ -5,15 +5,15 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2008-2011 University of Houston. All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ diff --git a/ompi/mca/fs/base/fs_base_find_available.c b/ompi/mca/fs/base/fs_base_find_available.c index 9fe2fc6182e..edc32e174e0 100644 --- a/ompi/mca/fs/base/fs_base_find_available.c +++ b/ompi/mca/fs/base/fs_base_find_available.c @@ -5,15 +5,15 @@ * Copyright (c) 2004-2011 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2008-2011 University of Houston. All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -36,18 +36,18 @@ static int init_query(const mca_base_component_t *m, static int init_query_2_0_0(const mca_base_component_t *component, bool enable_progress_threads, bool enable_mpi_threads); - + int mca_fs_base_find_available(bool enable_progress_threads, bool enable_mpi_threads) { mca_base_component_list_item_t *cli, *next; - /* The list of components which we should check is already present - in mca_fs_base_components_opened, which was established in + /* The list of components which we should check is already present + in mca_fs_base_components_opened, which was established in mca_fs_base_open */ OPAL_LIST_FOREACH_SAFE(cli, next, &ompi_fs_base_framework.framework_components, mca_base_component_list_item_t) { - /* Now for this entry, we have to determine the thread level. Call + /* Now for this entry, we have to determine the thread level. Call a subroutine to do the job for us */ if (OMPI_SUCCESS != init_query(cli->cli_component, @@ -72,14 +72,14 @@ int mca_fs_base_find_available(bool enable_progress_threads, /* All done */ return OMPI_SUCCESS; } - - + + static int init_query(const mca_base_component_t *m, bool enable_progress_threads, - bool enable_mpi_threads) + bool enable_mpi_threads) { int ret; - + opal_output_verbose(10, ompi_fs_base_framework.framework_output, "fs:find_available: querying fs component %s", m->mca_component_name); @@ -118,11 +118,11 @@ static int init_query(const mca_base_component_t *m, static int init_query_2_0_0(const mca_base_component_t *component, bool enable_progress_threads, - bool enable_mpi_threads) + bool enable_mpi_threads) { - mca_fs_base_component_2_0_0_t *fs = + mca_fs_base_component_2_0_0_t *fs = (mca_fs_base_component_2_0_0_t *) component; - + return fs->fsm_init_query(enable_progress_threads, enable_mpi_threads); } diff --git a/ompi/mca/fs/base/fs_base_frame.c b/ompi/mca/fs/base/fs_base_frame.c index 7f0fae12b28..1a992dd24d7 100644 --- a/ompi/mca/fs/base/fs_base_frame.c +++ b/ompi/mca/fs/base/fs_base_frame.c @@ -5,15 +5,15 @@ * Copyright (c) 2004-2011 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2008-2011 University of Houston. All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ diff --git a/ompi/mca/fs/base/fs_base_get_parent_dir.c b/ompi/mca/fs/base/fs_base_get_parent_dir.c index 5e622458931..66b3d4636a1 100644 --- a/ompi/mca/fs/base/fs_base_get_parent_dir.c +++ b/ompi/mca/fs/base/fs_base_get_parent_dir.c @@ -5,17 +5,17 @@ * Copyright (c) 2004-2011 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. - * Copyright (c) 2008-2011 University of Houston. All rights reserved. + * Copyright (c) 2008-2016 University of Houston. All rights reserved. * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -25,12 +25,14 @@ #include "ompi/mca/mca.h" #include "opal/mca/base/base.h" +#include "opal/util/path.h" #include "ompi/mca/fs/fs.h" #include "ompi/mca/fs/base/base.h" +#include "ompi/mca/io/ompio/io_ompio.h" #ifdef HAVE_SYS_STATFS_H -#include /* or */ +#include /* or */ #endif #ifdef HAVE_SYS_PARAM_H #include @@ -93,3 +95,29 @@ void mca_fs_base_get_parent_dir ( char *filename, char **dirnamep) *dirnamep = dir; return; } + +int mca_fs_base_get_fstype(char *fname ) +{ + int ompio_type = UFS; + char *fstype=NULL; + bool ret = opal_path_nfs ( fname, &fstype ); + + if ( false == ret ) { + char *dir; + mca_fs_base_get_parent_dir (fname, &dir ); + ret = opal_path_nfs (dir, &fstype); + if ( false == ret ) { + return ompio_type; + } + } + if ( 0 == strncasecmp(fstype, "lustre", sizeof("lustre")) ) { + ompio_type = LUSTRE; + } + else if ( 0 == strncasecmp(fstype, "pvfs2", sizeof("pvfs2"))) { + ompio_type = PVFS2; + } + + free (fstype); + return ompio_type; +} + diff --git a/ompi/mca/fs/configure.m4 b/ompi/mca/fs/configure.m4 index 71013429cbc..f394b14cb11 100644 --- a/ompi/mca/fs/configure.m4 +++ b/ompi/mca/fs/configure.m4 @@ -3,15 +3,15 @@ # Copyright (c) 2011 Cisco Systems, Inc. All rights reserved. # # $COPYRIGHT$ -# +# # Additional copyrights may follow -# +# # $HEADER$ # # MCA_ompi_fs_CONFIG(project_name, framework_name) # ------------------------------------------- -AC_DEFUN([MCA_ompi_fs_CONFIG], +AC_DEFUN([MCA_ompi_fs_CONFIG], [ # An AC-ARG-ENABLE for mpi-io was set in ompi/mca/io/configure.m4. # If it's no, we shouldn't bother building anything in fcoll. diff --git a/ompi/mca/fs/fs.h b/ompi/mca/fs/fs.h index a8cd47a98d4..cdb6922827c 100644 --- a/ompi/mca/fs/fs.h +++ b/ompi/mca/fs/fs.h @@ -6,17 +6,19 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2008-2015 University of Houston. All rights reserved. * Copyright (c) 2015 Los Alamos National Security, LLC. All rights * reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -41,8 +43,8 @@ struct mca_io_ompio_file_t; /* * This framework provides the abstraction for file management operations * of the MPI I/O chapter in MPI-2. The operations defined by this - * framework are mostly collective in nature. - * + * framework are mostly collective in nature. + * * These are the component function prototypes. These function pointers * go into the component structure. These functions (query() and finalize() * are called during fs_base_select(). Each component is query() ied @@ -65,11 +67,11 @@ struct mca_io_ompio_file_t; * **************** component struct ******************************* */ -typedef int (*mca_fs_base_component_init_query_1_0_0_fn_t) - (bool enable_progress_threads, +typedef int (*mca_fs_base_component_init_query_1_0_0_fn_t) + (bool enable_progress_threads, bool enable_mpi_threads); -typedef struct mca_fs_base_module_1_0_0_t * +typedef struct mca_fs_base_module_1_0_0_t * (*mca_fs_base_component_file_query_1_0_0_fn_t) (struct mca_io_ompio_file_t *file, int *priority); @@ -84,7 +86,7 @@ typedef int (*mca_fs_base_component_file_unquery_1_0_0_fn_t) struct mca_fs_base_component_2_0_0_t { mca_base_component_t fsm_version; mca_base_component_data_t fsm_data; - + mca_fs_base_component_init_query_1_0_0_fn_t fsm_init_query; mca_fs_base_component_file_query_1_0_0_fn_t fsm_file_query; mca_fs_base_component_file_unquery_1_0_0_fn_t fsm_file_unquery; @@ -107,7 +109,7 @@ typedef int (*mca_fs_base_module_finalize_1_0_0_fn_t) (struct mca_io_ompio_file_t *file); typedef int (*mca_fs_base_module_file_open_fn_t)( - struct ompi_communicator_t *comm, char *filename, int amode, + struct ompi_communicator_t *comm, const char *filename, int amode, struct ompi_info_t *info, struct mca_io_ompio_file_t *fh); typedef int (*mca_fs_base_module_file_close_fn_t)(struct mca_io_ompio_file_t *fh); typedef int (*mca_fs_base_module_file_delete_fn_t)( @@ -132,7 +134,7 @@ struct mca_fs_base_module_1_0_0_t { */ mca_fs_base_module_init_1_0_0_fn_t fs_module_init; mca_fs_base_module_finalize_1_0_0_fn_t fs_module_finalize; - + /* FS function pointers */ mca_fs_base_module_file_open_fn_t fs_file_open; mca_fs_base_module_file_close_fn_t fs_file_close; diff --git a/ompi/mca/fs/lustre/Makefile.am b/ompi/mca/fs/lustre/Makefile.am index 1640aec4ecc..4fe256888ef 100644 --- a/ompi/mca/fs/lustre/Makefile.am +++ b/ompi/mca/fs/lustre/Makefile.am @@ -5,15 +5,15 @@ # Copyright (c) 2004-2005 The University of Tennessee and The University # of Tennessee Research Foundation. All rights # reserved. -# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, # University of Stuttgart. All rights reserved. # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. # Copyright (c) 2008-2011 University of Houston. All rights reserved. # $COPYRIGHT$ -# +# # Additional copyrights may follow -# +# # $HEADER$ # diff --git a/ompi/mca/fs/lustre/configure.m4 b/ompi/mca/fs/lustre/configure.m4 index bab25ec6fbf..ab660ed0b26 100644 --- a/ompi/mca/fs/lustre/configure.m4 +++ b/ompi/mca/fs/lustre/configure.m4 @@ -6,21 +6,21 @@ # Copyright (c) 2004-2005 The University of Tennessee and The University # of Tennessee Research Foundation. All rights # reserved. -# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, # University of Stuttgart. All rights reserved. # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. # Copyright (c) 2010 Cisco Systems, Inc. All rights reserved. # Copyright (c) 2008-2012 University of Houston. All rights reserved. # $COPYRIGHT$ -# +# # Additional copyrights may follow -# +# # $HEADER$ # -# MCA_fs_lustre_CONFIG(action-if-can-compile, +# MCA_fs_lustre_CONFIG(action-if-can-compile, # [action-if-cant-compile]) # ------------------------------------------------ AC_DEFUN([MCA_ompi_fs_lustre_CONFIG],[ diff --git a/ompi/mca/fs/lustre/fs_lustre.c b/ompi/mca/fs/lustre/fs_lustre.c index 4be9ecdcd0a..b0f722db1c1 100644 --- a/ompi/mca/fs/lustre/fs_lustre.c +++ b/ompi/mca/fs/lustre/fs_lustre.c @@ -5,15 +5,15 @@ * Copyright (c) 2004-2006 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. - * Copyright (c) 2008-2015 University of Houston. All rights reserved. + * Copyright (c) 2008-2016 University of Houston. All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ * * These symbols are in a file by themselves to provide nice linker @@ -30,7 +30,7 @@ #include "ompi/mca/fs/lustre/fs_lustre.h" #ifdef HAVE_SYS_STATFS_H -#include /* or */ +#include /* or */ #endif #ifdef HAVE_SYS_PARAM_H #include @@ -71,44 +71,27 @@ int mca_fs_lustre_component_init_query(bool enable_progress_threads, bool enable_mpi_threads) { /* Nothing to do */ - + return OMPI_SUCCESS; -} +} struct mca_fs_base_module_1_0_0_t * mca_fs_lustre_component_file_query (mca_io_ompio_file_t *fh, int *priority) { - int err; - char *dir; - struct statfs fsbuf; char *tmp; /* The code in this function is based on the ADIO FS selection in ROMIO - * Copyright (C) 1997 University of Chicago. + * Copyright (C) 1997 University of Chicago. * See COPYRIGHT notice in top-level directory. */ *priority = mca_fs_lustre_priority; - + tmp = strchr (fh->f_filename, ':'); if (!tmp) { if (OMPIO_ROOT == fh->f_rank) { - do { - err = statfs (fh->f_filename, &fsbuf); - } while (err && (errno == ESTALE)); - - if (err && (errno == ENOENT)) { - mca_fs_base_get_parent_dir (fh->f_filename, &dir); - err = statfs (dir, &fsbuf); - free (dir); - } -#ifndef LL_SUPER_MAGIC -#define LL_SUPER_MAGIC 0x0BD00BD0 -#endif - if (fsbuf.f_type == LL_SUPER_MAGIC) { - fh->f_fstype = LUSTRE; - } - } + fh->f_fstype = mca_fs_base_get_fstype ( fh->f_filename ); + } fh->f_comm->c_coll.coll_bcast (&(fh->f_fstype), 1, MPI_INT, @@ -117,12 +100,12 @@ mca_fs_lustre_component_file_query (mca_io_ompio_file_t *fh, int *priority) fh->f_comm->c_coll.coll_bcast_module); } else { - if (!strncmp(fh->f_filename, "lustre:", 7) || + if (!strncmp(fh->f_filename, "lustre:", 7) || !strncmp(fh->f_filename, "LUSTRE:", 7)) { fh->f_fstype = LUSTRE; } } - + if (LUSTRE == fh->f_fstype) { if (*priority < 50) { *priority = 50; @@ -134,9 +117,9 @@ mca_fs_lustre_component_file_query (mca_io_ompio_file_t *fh, int *priority) } int mca_fs_lustre_component_file_unquery (mca_io_ompio_file_t *file) -{ +{ /* This function might be needed for some purposes later. for now it - * does not have anything to do since there are no steps which need + * does not have anything to do since there are no steps which need * to be undone if this module is not selected */ return OMPI_SUCCESS; @@ -144,14 +127,14 @@ int mca_fs_lustre_component_file_unquery (mca_io_ompio_file_t *file) int mca_fs_lustre_module_init (mca_io_ompio_file_t *file) { - /* Make sure the file type is not overwritten by the last queried + /* Make sure the file type is not overwritten by the last queried * component */ file->f_fstype = LUSTRE; return OMPI_SUCCESS; } - -int mca_fs_lustre_module_finalize (mca_io_ompio_file_t *file) + +int mca_fs_lustre_module_finalize (mca_io_ompio_file_t *file) { return OMPI_SUCCESS; } diff --git a/ompi/mca/fs/lustre/fs_lustre.h b/ompi/mca/fs/lustre/fs_lustre.h index 18fb8a33256..ad4844c618c 100644 --- a/ompi/mca/fs/lustre/fs_lustre.h +++ b/ompi/mca/fs/lustre/fs_lustre.h @@ -5,15 +5,17 @@ * Copyright (c) 2004-2006 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2008-2015 University of Houston. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -31,6 +33,14 @@ extern int mca_fs_lustre_stripe_width; BEGIN_C_DECLS +#include +#include + +#ifndef LOV_MAX_STRIPE_COUNT +#define LOV_MAX_STRIPE_COUNT 160 +#endif + + int mca_fs_lustre_component_init_query(bool enable_progress_threads, bool enable_mpi_threads); struct mca_fs_base_module_1_0_0_t * @@ -45,23 +55,23 @@ OMPI_MODULE_DECLSPEC extern mca_fs_base_component_2_0_0_t mca_fs_lustre_componen * ****************************************************************** * ********* functions which are implemented in this module ********* * ****************************************************************** - */ + */ -int mca_fs_lustre_file_open (struct ompi_communicator_t *comm, - char *filename, +int mca_fs_lustre_file_open (struct ompi_communicator_t *comm, + const char *filename, int amode, - struct ompi_info_t *info, + struct ompi_info_t *info, mca_io_ompio_file_t *fh); int mca_fs_lustre_file_close (mca_io_ompio_file_t *fh); - + int mca_fs_lustre_file_delete (char *filename, struct ompi_info_t *info); int mca_fs_lustre_file_set_size (mca_io_ompio_file_t *fh, OMPI_MPI_OFFSET_TYPE size); -int mca_fs_lustre_file_get_size (mca_io_ompio_file_t *fh, +int mca_fs_lustre_file_get_size (mca_io_ompio_file_t *fh, OMPI_MPI_OFFSET_TYPE *size); int mca_fs_lustre_file_sync (mca_io_ompio_file_t *fh); @@ -73,8 +83,8 @@ int mca_fs_lustre_file_seek (mca_io_ompio_file_t *fh, * ****************************************************************** * ************ functions implemented in this module end ************ * ****************************************************************** - */ - + */ + END_C_DECLS #endif /* MCA_FS_LUSTRE_H */ diff --git a/ompi/mca/fs/lustre/fs_lustre_component.c b/ompi/mca/fs/lustre/fs_lustre_component.c index ccf2580c8d7..d8392af482d 100644 --- a/ompi/mca/fs/lustre/fs_lustre_component.c +++ b/ompi/mca/fs/lustre/fs_lustre_component.c @@ -6,7 +6,7 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. @@ -14,9 +14,9 @@ * Copyright (c) 2015 Los Alamos National Security, LLC. All rights * reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ * * These symbols are in a file by themselves to provide nice linker @@ -41,9 +41,9 @@ static int lustre_register(void); int mca_fs_lustre_priority = 20; /*setting default stripe size to 64KB. MCA parameter - Can be changed at + Can be changed at runtime also*/ -int mca_fs_lustre_stripe_size = 1048576; +int mca_fs_lustre_stripe_size = 0; int mca_fs_lustre_stripe_width = 0; /* * Instantiate the public struct with all of our public information @@ -81,7 +81,7 @@ lustre_register(void) MCA_BASE_VAR_TYPE_INT, NULL, 0, 0, OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_READONLY, &mca_fs_lustre_priority); - mca_fs_lustre_stripe_size = 1048576; + mca_fs_lustre_stripe_size = 0; (void) mca_base_component_var_register(&mca_fs_lustre_component.fsm_version, "stripe_size", "stripe size of a file over lustre", MCA_BASE_VAR_TYPE_INT, NULL, 0, 0, @@ -89,7 +89,7 @@ lustre_register(void) MCA_BASE_VAR_SCOPE_READONLY, &mca_fs_lustre_stripe_size); mca_fs_lustre_stripe_width = 0; (void) mca_base_component_var_register(&mca_fs_lustre_component.fsm_version, - "stripe_width", "stripe width of a file over lustre", + "stripe_width", "stripe count of a file over lustre", MCA_BASE_VAR_TYPE_INT, NULL, 0, 0, OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_READONLY, &mca_fs_lustre_stripe_width); diff --git a/ompi/mca/fs/lustre/fs_lustre_file_close.c b/ompi/mca/fs/lustre/fs_lustre_file_close.c index 0ee1e15b27f..a62e371d040 100644 --- a/ompi/mca/fs/lustre/fs_lustre_file_close.c +++ b/ompi/mca/fs/lustre/fs_lustre_file_close.c @@ -5,15 +5,15 @@ * Copyright (c) 2004-2011 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2008-2011 University of Houston. All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -37,7 +37,7 @@ int mca_fs_lustre_file_close (mca_io_ompio_file_t *fh) { - fh->f_comm->c_coll.coll_barrier (fh->f_comm, + fh->f_comm->c_coll.coll_barrier (fh->f_comm, fh->f_comm->c_coll.coll_barrier_module); close (fh->fd); return OMPI_SUCCESS; diff --git a/ompi/mca/fs/lustre/fs_lustre_file_delete.c b/ompi/mca/fs/lustre/fs_lustre_file_delete.c index ba45ae168d1..1fc6da84080 100644 --- a/ompi/mca/fs/lustre/fs_lustre_file_delete.c +++ b/ompi/mca/fs/lustre/fs_lustre_file_delete.c @@ -5,15 +5,15 @@ * Copyright (c) 2004-2011 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2008-2011 University of Houston. All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ diff --git a/ompi/mca/fs/lustre/fs_lustre_file_get_size.c b/ompi/mca/fs/lustre/fs_lustre_file_get_size.c index 35c73cf40cb..e6e2a514225 100644 --- a/ompi/mca/fs/lustre/fs_lustre_file_get_size.c +++ b/ompi/mca/fs/lustre/fs_lustre_file_get_size.c @@ -5,15 +5,15 @@ * Copyright (c) 2004-2011 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2008-2011 University of Houston. All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -33,9 +33,18 @@ * Returns: - Success if size is get */ int -mca_fs_lustre_file_get_size (mca_io_ompio_file_t *file_handle, +mca_fs_lustre_file_get_size (mca_io_ompio_file_t *fh, OMPI_MPI_OFFSET_TYPE *size) { - printf ("LUSTRE GET SIZE\n"); + *size = lseek(fh->fd, 0, SEEK_END); + if (-1 == *size) { + perror ("lseek"); + return OMPI_ERROR; + } + + if (-1 == (lseek(fh->fd, fh->f_offset, SEEK_SET))) { + perror ("lseek"); + return OMPI_ERROR; + } return OMPI_SUCCESS; } diff --git a/ompi/mca/fs/lustre/fs_lustre_file_open.c b/ompi/mca/fs/lustre/fs_lustre_file_open.c index 6792bb5a52e..b873564bc86 100644 --- a/ompi/mca/fs/lustre/fs_lustre_file_open.c +++ b/ompi/mca/fs/lustre/fs_lustre_file_open.c @@ -5,15 +5,17 @@ * Copyright (c) 2004-2011 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. - * Copyright (c) 2008-2011 University of Houston. All rights reserved. + * Copyright (c) 2008-2015 University of Houston. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -30,8 +32,20 @@ #include "ompi/info/info.h" #include -#include -#include + +static void *alloc_lum(); + +static void *alloc_lum() +{ + int v1, v3, join; + + v1 = sizeof(struct lov_user_md_v1) + + LOV_MAX_STRIPE_COUNT * sizeof(struct lov_user_ost_data_v1); + v3 = sizeof(struct lov_user_md_v3) + + LOV_MAX_STRIPE_COUNT * sizeof(struct lov_user_ost_data_v1); + + return malloc(MAX(v1, v3)); +} /* * file_open_lustre @@ -40,9 +54,10 @@ * Accepts: - same arguments as MPI_File_open() * Returns: - Success if new file handle */ + int -mca_fs_lustre_file_open (struct ompi_communicator_t *comm, - char* filename, +mca_fs_lustre_file_open (struct ompi_communicator_t *comm, + const char* filename, int access_mode, struct ompi_info_t *info, mca_io_ompio_file_t *fh) @@ -50,6 +65,10 @@ mca_fs_lustre_file_open (struct ompi_communicator_t *comm, int amode; int old_mask, perm; int rc; + int flag; + int fs_lustre_stripe_size = -1; + int fs_lustre_stripe_width = -1; + char char_stripe[MPI_MAX_INFO_KEY]; struct lov_user_md *lump=NULL; @@ -74,29 +93,49 @@ mca_fs_lustre_file_open (struct ompi_communicator_t *comm, if (access_mode & MPI_MODE_EXCL) amode = amode | O_EXCL; - if ((mca_fs_lustre_stripe_size || mca_fs_lustre_stripe_width) && + + ompi_info_get (info, "stripe_size", MPI_MAX_INFO_VAL, char_stripe, &flag); + if ( flag ) { + sscanf ( char_stripe, "%d", &fs_lustre_stripe_size ); + } + + ompi_info_get (info, "stripe_width", MPI_MAX_INFO_VAL, char_stripe, &flag); + if ( flag ) { + sscanf ( char_stripe, "%d", &fs_lustre_stripe_width ); + } + + if (fs_lustre_stripe_size < 0) { + fs_lustre_stripe_size = mca_fs_lustre_stripe_size; + } + + if (fs_lustre_stripe_width < 0) { + fs_lustre_stripe_width = mca_fs_lustre_stripe_width; + } + + if ( (fs_lustre_stripe_size>0 || fs_lustre_stripe_width>0) && (amode&O_CREAT) && (amode&O_RDWR)) { if (0 == fh->f_rank) { - llapi_file_create(filename, - mca_fs_lustre_stripe_size, + llapi_file_create(filename, + fs_lustre_stripe_size, -1, /* MSC need to change that */ - mca_fs_lustre_stripe_width, + fs_lustre_stripe_width, 0); /* MSC need to change that */ fh->fd = open(filename, O_CREAT | O_RDWR | O_LOV_DELAY_CREATE, perm); if (fh->fd < 0) { - fprintf(stderr, "Can't open %s file: %d (%s)\n", + fprintf(stderr, "Can't open %s file: %d (%s)\n", filename, errno, strerror(errno)); return OMPI_ERROR; } close (fh->fd); } - fh->f_comm->c_coll.coll_barrier (fh->f_comm, + fh->f_comm->c_coll.coll_barrier (fh->f_comm, fh->f_comm->c_coll.coll_barrier_module); } fh->fd = open (filename, amode, perm); if (fh->fd < 0) { + opal_output(1, "error opening file %s\n", filename); return OMPI_ERROR; } @@ -104,17 +143,21 @@ mca_fs_lustre_file_open (struct ompi_communicator_t *comm, fh->f_stripe_size = mca_fs_lustre_stripe_size; } else { - lump = (struct lov_user_md *) malloc (sizeof(struct lov_user_md)); + lump = alloc_lum(); if (NULL == lump ){ - fprintf(stderr,"Cannot Allocate Lump for extracting stripe size\n"); + fprintf(stderr,"Cannot allocate memory for extracting stripe size\n"); return OMPI_ERROR; } rc = llapi_file_get_stripe(filename, lump); if (rc != 0) { - fprintf(stderr, "get_stripe failed: %d (%s)\n",errno, strerror(errno)); - return -1; + opal_output(1, "get_stripe failed: %d (%s)\n", errno, strerror(errno)); + return OMPI_ERROR; } fh->f_stripe_size = lump->lmm_stripe_size; + + // if ( NULL != lump ) { + // free ( lump ); + // } } return OMPI_SUCCESS; } diff --git a/ompi/mca/fs/lustre/fs_lustre_file_set_size.c b/ompi/mca/fs/lustre/fs_lustre_file_set_size.c index c5492a31eb3..5c2d6b1eb56 100644 --- a/ompi/mca/fs/lustre/fs_lustre_file_set_size.c +++ b/ompi/mca/fs/lustre/fs_lustre_file_set_size.c @@ -5,15 +5,15 @@ * Copyright (c) 2004-2011 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2008-2011 University of Houston. All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -33,9 +33,21 @@ * Returns: - Success if size is set */ int -mca_fs_lustre_file_set_size (mca_io_ompio_file_t *file_handle, +mca_fs_lustre_file_set_size (mca_io_ompio_file_t *fh, OMPI_MPI_OFFSET_TYPE size) { - printf ("LUSTRE SET SIZE\n"); + int err = 0; + + err = ftruncate(fh->fd, size); + + fh->f_comm->c_coll.coll_bcast (&err, + 1, + MPI_INT, + OMPIO_ROOT, + fh->f_comm, + fh->f_comm->c_coll.coll_bcast_module); + if (-1 == err) { + return OMPI_ERROR; + } return OMPI_SUCCESS; } diff --git a/ompi/mca/fs/lustre/fs_lustre_file_sync.c b/ompi/mca/fs/lustre/fs_lustre_file_sync.c index 2c97f8e90d2..091960de234 100644 --- a/ompi/mca/fs/lustre/fs_lustre_file_sync.c +++ b/ompi/mca/fs/lustre/fs_lustre_file_sync.c @@ -5,15 +5,15 @@ * Copyright (c) 2004-2011 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2008-2009 University of Houston. All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -39,7 +39,7 @@ mca_fs_lustre_file_sync (mca_io_ompio_file_t *fh) int err; err = fsync(fh->fd); - + if (-1 == err) { return OMPI_ERROR; } diff --git a/ompi/mca/fs/plfs/Makefile.am b/ompi/mca/fs/plfs/Makefile.am index 2576c5acd01..be6409d131a 100644 --- a/ompi/mca/fs/plfs/Makefile.am +++ b/ompi/mca/fs/plfs/Makefile.am @@ -5,15 +5,15 @@ # Copyright (c) 2004-2005 The University of Tennessee and The University # of Tennessee Research Foundation. All rights # reserved. -# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, # University of Stuttgart. All rights reserved. # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. # Copyright (c) 2008-2014 University of Houston. All rights reserved. # $COPYRIGHT$ -# +# # Additional copyrights may follow -# +# # $HEADER$ # @@ -40,7 +40,7 @@ fs_plfs_sources = \ fs_plfs_file_delete.c \ fs_plfs_file_sync.c \ fs_plfs_file_set_size.c \ - fs_plfs_file_get_size.c + fs_plfs_file_get_size.c AM_CPPFLAGS = $(fs_plfs_CPPFLAGS) diff --git a/ompi/mca/fs/plfs/configure.m4 b/ompi/mca/fs/plfs/configure.m4 index 7a63540f5c8..012422ea760 100644 --- a/ompi/mca/fs/plfs/configure.m4 +++ b/ompi/mca/fs/plfs/configure.m4 @@ -6,21 +6,21 @@ # Copyright (c) 2004-2005 The University of Tennessee and The University # of Tennessee Research Foundation. All rights # reserved. -# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, # University of Stuttgart. All rights reserved. # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. # Copyright (c) 2010 Cisco Systems, Inc. All rights reserved. # Copyright (c) 2008-2014 University of Houston. All rights reserved. # $COPYRIGHT$ -# +# # Additional copyrights may follow -# +# # $HEADER$ # -# MCA_fs_plfs_CONFIG(action-if-can-compile, +# MCA_fs_plfs_CONFIG(action-if-can-compile, # [action-if-cant-compile]) # ------------------------------------------------ AC_DEFUN([MCA_ompi_fs_plfs_CONFIG],[ diff --git a/ompi/mca/fs/plfs/fs_plfs.c b/ompi/mca/fs/plfs/fs_plfs.c index 7c024ca5ca5..0085150043a 100644 --- a/ompi/mca/fs/plfs/fs_plfs.c +++ b/ompi/mca/fs/plfs/fs_plfs.c @@ -5,15 +5,15 @@ * Copyright (c) 2004-2006 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2008-2015 University of Houston. All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ * * These symbols are in a file by themselves to provide nice linker @@ -30,7 +30,7 @@ #include "ompi/mca/fs/plfs/fs_plfs.h" #ifdef HAVE_SYS_STATFS_H -#include /* or */ +#include /* or */ #endif #ifdef HAVE_SYS_PARAM_H #include @@ -71,7 +71,7 @@ int mca_fs_plfs_component_init_query(bool enable_progress_threads, { /* Nothing to do */ return OMPI_SUCCESS; -} +} struct mca_fs_base_module_1_0_0_t * mca_fs_plfs_component_file_query (mca_io_ompio_file_t *fh, int *priority) @@ -83,27 +83,27 @@ mca_fs_plfs_component_file_query (mca_io_ompio_file_t *fh, int *priority) char wpath[1024]; /* The code in this function is based on the ADIO FS selection in ROMIO - * Copyright (C) 1997 University of Chicago. + * Copyright (C) 1997 University of Chicago. * See COPYRIGHT notice in top-level directory. */ *priority = mca_fs_plfs_priority; - + tmp = strchr (fh->f_filename, ':'); if (!tmp) { if (OMPIO_ROOT == fh->f_rank) { do { err = statfs (fh->f_filename, &fsbuf); } while (err && (errno == ESTALE)); - + if (err && (ENOENT == errno)) { mca_fs_base_get_parent_dir (fh->f_filename, &dir); err = statfs (dir, &fsbuf); free (dir); } - - getcwd( wpath, sizeof(wpath) ); - if(is_plfs_path(wpath) == 1) { + + getcwd( wpath, sizeof(wpath) ); + if(is_plfs_path(wpath) == 1) { fh->f_fstype = PLFS; } } @@ -115,12 +115,12 @@ mca_fs_plfs_component_file_query (mca_io_ompio_file_t *fh, int *priority) fh->f_comm->c_coll.coll_bcast_module); } else { - if (!strncmp(fh->f_filename, "plfs:", 7) || + if (!strncmp(fh->f_filename, "plfs:", 7) || !strncmp(fh->f_filename, "PLFS:", 7)) { fh->f_fstype = PLFS; } } - + if (PLFS == fh->f_fstype) { if (*priority < 50) { *priority = 50; @@ -131,9 +131,9 @@ mca_fs_plfs_component_file_query (mca_io_ompio_file_t *fh, int *priority) } int mca_fs_plfs_component_file_unquery (mca_io_ompio_file_t *file) -{ +{ /* This function might be needed for some purposes later. for now it - * does not have anything to do since there are no steps which need + * does not have anything to do since there are no steps which need * to be undone if this module is not selected */ return OMPI_SUCCESS; @@ -141,14 +141,14 @@ int mca_fs_plfs_component_file_unquery (mca_io_ompio_file_t *file) int mca_fs_plfs_module_init (mca_io_ompio_file_t *file) { - /* Make sure the file type is not overwritten by the last queried + /* Make sure the file type is not overwritten by the last queried * component */ file->f_fstype = PLFS; return OMPI_SUCCESS; } - -int mca_fs_plfs_module_finalize (mca_io_ompio_file_t *file) + +int mca_fs_plfs_module_finalize (mca_io_ompio_file_t *file) { return OMPI_SUCCESS; } diff --git a/ompi/mca/fs/plfs/fs_plfs.h b/ompi/mca/fs/plfs/fs_plfs.h index 323a4b42163..69221d4c9b8 100644 --- a/ompi/mca/fs/plfs/fs_plfs.h +++ b/ompi/mca/fs/plfs/fs_plfs.h @@ -5,15 +5,17 @@ * Copyright (c) 2004-2006 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2008-2015 University of Houston. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -28,7 +30,6 @@ #include extern int mca_fs_plfs_priority; -extern int mca_fs_plfs_num_hostdir; BEGIN_C_DECLS @@ -46,23 +47,23 @@ OMPI_MODULE_DECLSPEC extern mca_fs_base_component_2_0_0_t mca_fs_plfs_component; * ****************************************************************** * ********* functions which are implemented in this module ********* * ****************************************************************** - */ + */ -int mca_fs_plfs_file_open (struct ompi_communicator_t *comm, - char *filename, +int mca_fs_plfs_file_open (struct ompi_communicator_t *comm, + const char *filename, int amode, - struct ompi_info_t *info, + struct ompi_info_t *info, mca_io_ompio_file_t *fh); int mca_fs_plfs_file_close (mca_io_ompio_file_t *fh); - + int mca_fs_plfs_file_delete (char *filename, struct ompi_info_t *info); int mca_fs_plfs_file_set_size (mca_io_ompio_file_t *fh, OMPI_MPI_OFFSET_TYPE size); -int mca_fs_plfs_file_get_size (mca_io_ompio_file_t *fh, +int mca_fs_plfs_file_get_size (mca_io_ompio_file_t *fh, OMPI_MPI_OFFSET_TYPE *size); int mca_fs_plfs_file_sync (mca_io_ompio_file_t *fh); @@ -74,8 +75,8 @@ int mca_fs_plfs_file_seek (mca_io_ompio_file_t *fh, * ****************************************************************** * ************ functions implemented in this module end ************ * ****************************************************************** - */ - + */ + END_C_DECLS #endif /* MCA_FS_PLFS_H */ diff --git a/ompi/mca/fs/plfs/fs_plfs_component.c b/ompi/mca/fs/plfs/fs_plfs_component.c index 37c50745329..6df5f7db22b 100644 --- a/ompi/mca/fs/plfs/fs_plfs_component.c +++ b/ompi/mca/fs/plfs/fs_plfs_component.c @@ -6,7 +6,7 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. @@ -14,9 +14,9 @@ * Copyright (c) 2015 Los Alamos National Security, LLC. All rights * reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ * * These symbols are in a file by themselves to provide nice linker @@ -39,7 +39,6 @@ const char *mca_fs_plfs_component_version_string = static int plfs_register(void); int mca_fs_plfs_priority = 20; -int mca_fs_plfs_num_hostdir = -1; /* * Instantiate the public struct with all of our public information @@ -77,12 +76,6 @@ plfs_register(void) MCA_BASE_VAR_TYPE_INT, NULL, 0, 0, OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_READONLY, &mca_fs_plfs_priority); - mca_fs_plfs_num_hostdir = -1; - (void) mca_base_component_var_register(&mca_fs_plfs_component.fsm_version, - "num_hostdir", "number of host directories of a file over plfs", - MCA_BASE_VAR_TYPE_INT, NULL, 0, 0, - OPAL_INFO_LVL_9, - MCA_BASE_VAR_SCOPE_READONLY, &mca_fs_plfs_num_hostdir); return OMPI_SUCCESS; } diff --git a/ompi/mca/fs/plfs/fs_plfs_file_close.c b/ompi/mca/fs/plfs/fs_plfs_file_close.c index 83109f1e620..03b92af91af 100644 --- a/ompi/mca/fs/plfs/fs_plfs_file_close.c +++ b/ompi/mca/fs/plfs/fs_plfs_file_close.c @@ -5,15 +5,15 @@ * Copyright (c) 2004-2011 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2008-2014 University of Houston. All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -43,13 +43,15 @@ mca_fs_plfs_file_close (mca_io_ompio_file_t *fh) int amode; char wpath[1024]; - fh->f_comm->c_coll.coll_barrier (fh->f_comm, + fh->f_comm->c_coll.coll_barrier (fh->f_comm, fh->f_comm->c_coll.coll_barrier_module); getcwd( wpath, sizeof(wpath) ); sprintf( wpath,"%s/%s",wpath,fh->f_filename ); - - if(-1 == access(fh->f_filename, F_OK)) { + + plfs_ret = plfs_access(wpath, F_OK); + if ( PLFS_SUCCESS != plfs_ret ) { + opal_output(0, "fs_plfs_file_close: Error in plfs_access:\n%s\n", strplfserr(plfs_ret)); return OMPI_ERROR; // file doesn't exist } @@ -65,9 +67,16 @@ mca_fs_plfs_file_close (mca_io_ompio_file_t *fh) if (fh->f_amode & MPI_MODE_EXCL) { return OMPI_ERROR; } - - plfs_ret = plfs_close(fh->f_fs_ptr, 0, 0, amode ,NULL, &flags); - if (PLFS_SUCCESS != plfs_ret) { + + plfs_ret = plfs_sync(fh->f_fs_ptr); + if (PLFS_SUCCESS != plfs_ret) { + opal_output(0, "fs_plfs_file_close: Error in plfs_sync:\n%s\n", strplfserr(plfs_ret)); + return OMPI_ERROR; + } + + + plfs_ret = plfs_close(fh->f_fs_ptr, fh->f_rank, 0, amode ,NULL, &flags); + if (PLFS_SUCCESS != plfs_ret) { opal_output(0, "fs_plfs_file_close: Error in plfs_close:\n%s\n", strplfserr(plfs_ret)); return OMPI_ERROR; } diff --git a/ompi/mca/fs/plfs/fs_plfs_file_delete.c b/ompi/mca/fs/plfs/fs_plfs_file_delete.c index aad9ec4456d..d20a8e88c59 100644 --- a/ompi/mca/fs/plfs/fs_plfs_file_delete.c +++ b/ompi/mca/fs/plfs/fs_plfs_file_delete.c @@ -5,15 +5,15 @@ * Copyright (c) 2004-2011 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2008-2014 University of Houston. All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ diff --git a/ompi/mca/fs/plfs/fs_plfs_file_get_size.c b/ompi/mca/fs/plfs/fs_plfs_file_get_size.c index e27284d569b..c59bd8c53ae 100644 --- a/ompi/mca/fs/plfs/fs_plfs_file_get_size.c +++ b/ompi/mca/fs/plfs/fs_plfs_file_get_size.c @@ -5,15 +5,15 @@ * Copyright (c) 2004-2011 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2008-2014 University of Houston. All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -42,8 +42,8 @@ mca_fs_plfs_file_get_size (mca_io_ompio_file_t *fh, char wpath[1024]; int size_only = 1; - getcwd(wpath, sizeof(wpath)); - sprintf(wpath,"%s/%s",wpath,fh->f_filename); + getcwd(wpath, sizeof(wpath)); + sprintf(wpath,"%s/%s",wpath,fh->f_filename); plfs_ret = plfs_getattr(pfd, wpath, &st, size_only); if (PLFS_SUCCESS != plfs_ret) { diff --git a/ompi/mca/fs/plfs/fs_plfs_file_open.c b/ompi/mca/fs/plfs/fs_plfs_file_open.c index a333a477bcd..c8dd294820a 100644 --- a/ompi/mca/fs/plfs/fs_plfs_file_open.c +++ b/ompi/mca/fs/plfs/fs_plfs_file_open.c @@ -5,15 +5,17 @@ * Copyright (c) 2004-2011 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2008-2014 University of Houston. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -39,8 +41,8 @@ * Returns: - Success if new file handle */ int -mca_fs_plfs_file_open (struct ompi_communicator_t *comm, - char* filename, +mca_fs_plfs_file_open (struct ompi_communicator_t *comm, + const char* filename, int access_mode, struct ompi_info_t *info, mca_io_ompio_file_t *fh) @@ -51,14 +53,12 @@ mca_fs_plfs_file_open (struct ompi_communicator_t *comm, plfs_error_t plfs_ret; Plfs_fd *pfd = NULL; char wpath[1024]; - size_t len = sizeof(int); - char key[] = "num_hostdirs"; rank = ompi_comm_rank ( comm ); getcwd( wpath, sizeof(wpath) ); sprintf( wpath,"%s/%s",wpath,filename ); - + if (OMPIO_PERM_NULL == fh->f_perm) { old_mask = umask(022); umask(old_mask); @@ -76,7 +76,7 @@ mca_fs_plfs_file_open (struct ompi_communicator_t *comm, amode = amode | O_WRONLY; if (access_mode & MPI_MODE_RDWR) amode = amode | O_RDWR; - if (access_mode & MPI_MODE_EXCL) { + if (access_mode & MPI_MODE_EXCL) { if( is_plfs_path(wpath) == 1 ) { //the file already exists return OMPI_ERROR; } @@ -87,32 +87,25 @@ mca_fs_plfs_file_open (struct ompi_communicator_t *comm, if (access_mode & MPI_MODE_CREATE) amode = amode | O_CREAT; - plfs_ret = plfs_open( &pfd, wpath, amode, 0, perm, NULL ); + plfs_ret = plfs_open( &pfd, wpath, amode, fh->f_rank, perm, NULL ); fh->f_fs_ptr = pfd; } - + comm->c_coll.coll_bcast ( &plfs_ret, 1, MPI_INT, 0, comm, comm->c_coll.coll_bcast_module); if ( PLFS_SUCCESS != plfs_ret ) { return OMPI_ERROR; } if (0 != rank) { - plfs_ret = plfs_open( &pfd, wpath, amode, 0, perm, NULL ); + plfs_ret = plfs_open( &pfd, wpath, amode, fh->f_rank, perm, NULL ); if (PLFS_SUCCESS != plfs_ret) { opal_output(0, "fs_plfs_file_open: Error in plfs_open:\n%s\n", strplfserr(plfs_ret)); - return OMPI_ERROR; + return OMPI_ERROR; } else { fh->f_fs_ptr = pfd; } - } - - if (mca_fs_plfs_num_hostdir > 0) { - plfs_ret = plfs_setxattr( pfd, &mca_fs_plfs_num_hostdir, key, len ); - if (PLFS_SUCCESS != plfs_ret) { - opal_output(0, "fs_plfs_file_open: Error in plfs_setxattr:\n%s\n", strplfserr(plfs_ret)); - return OMPI_ERROR; - } } + return OMPI_SUCCESS; } diff --git a/ompi/mca/fs/plfs/fs_plfs_file_set_size.c b/ompi/mca/fs/plfs/fs_plfs_file_set_size.c index 8ab6aeb45c9..6c24fb44c53 100644 --- a/ompi/mca/fs/plfs/fs_plfs_file_set_size.c +++ b/ompi/mca/fs/plfs/fs_plfs_file_set_size.c @@ -5,15 +5,15 @@ * Copyright (c) 2004-2011 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2008-2011 University of Houston. All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ diff --git a/ompi/mca/fs/plfs/fs_plfs_file_sync.c b/ompi/mca/fs/plfs/fs_plfs_file_sync.c index c08cba1a25f..6bbf056b0f9 100644 --- a/ompi/mca/fs/plfs/fs_plfs_file_sync.c +++ b/ompi/mca/fs/plfs/fs_plfs_file_sync.c @@ -5,15 +5,15 @@ * Copyright (c) 2004-2011 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2008-2014 University of Houston. All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ diff --git a/ompi/mca/fs/pvfs2/Makefile.am b/ompi/mca/fs/pvfs2/Makefile.am index 3232b53147a..4b8f6bfc578 100644 --- a/ompi/mca/fs/pvfs2/Makefile.am +++ b/ompi/mca/fs/pvfs2/Makefile.am @@ -5,15 +5,15 @@ # Copyright (c) 2004-2005 The University of Tennessee and The University # of Tennessee Research Foundation. All rights # reserved. -# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, # University of Stuttgart. All rights reserved. # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. # Copyright (c) 2008-2011 University of Houston. All rights reserved. # $COPYRIGHT$ -# +# # Additional copyrights may follow -# +# # $HEADER$ # @@ -40,7 +40,7 @@ fs_pvfs2_sources = \ fs_pvfs2_file_delete.c \ fs_pvfs2_file_sync.c \ fs_pvfs2_file_set_size.c \ - fs_pvfs2_file_get_size.c + fs_pvfs2_file_get_size.c AM_CPPFLAGS = $(fs_pvfs2_CPPFLAGS) diff --git a/ompi/mca/fs/pvfs2/configure.m4 b/ompi/mca/fs/pvfs2/configure.m4 index 1c5195155da..17539ba070f 100644 --- a/ompi/mca/fs/pvfs2/configure.m4 +++ b/ompi/mca/fs/pvfs2/configure.m4 @@ -6,21 +6,21 @@ # Copyright (c) 2004-2005 The University of Tennessee and The University # of Tennessee Research Foundation. All rights # reserved. -# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, # University of Stuttgart. All rights reserved. # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. # Copyright (c) 2010 Cisco Systems, Inc. All rights reserved. # Copyright (c) 2008-2012 University of Houston. All rights reserved. # $COPYRIGHT$ -# +# # Additional copyrights may follow -# +# # $HEADER$ # -# MCA_fs_pvfs2_CONFIG(action-if-can-compile, +# MCA_fs_pvfs2_CONFIG(action-if-can-compile, # [action-if-cant-compile]) # ------------------------------------------------ AC_DEFUN([MCA_ompi_fs_pvfs2_CONFIG],[ diff --git a/ompi/mca/fs/pvfs2/fs_pvfs2.c b/ompi/mca/fs/pvfs2/fs_pvfs2.c index c2410f416c7..a7a2d0d8b3c 100644 --- a/ompi/mca/fs/pvfs2/fs_pvfs2.c +++ b/ompi/mca/fs/pvfs2/fs_pvfs2.c @@ -5,15 +5,15 @@ * Copyright (c) 2004-2006 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. - * Copyright (c) 2008-2015 University of Houston. All rights reserved. + * Copyright (c) 2008-2016 University of Houston. All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ * * These symbols are in a file by themselves to provide nice linker @@ -31,7 +31,7 @@ #include "ompi/mca/fs/pvfs2/fs_pvfs2.h" #ifdef HAVE_SYS_STATFS_H -#include /* or */ +#include /* or */ #endif #ifdef HAVE_SYS_PARAM_H #include @@ -68,26 +68,23 @@ int mca_fs_pvfs2_component_init_query(bool enable_progress_threads, bool enable_mpi_threads) { /* Nothing to do */ - + return OMPI_SUCCESS; -} +} struct mca_fs_base_module_1_0_0_t * mca_fs_pvfs2_component_file_query (mca_io_ompio_file_t *fh, int *priority) { /* The code in this function is based on the ADIO FS selection in ROMIO - * Copyright (C) 1997 University of Chicago. + * Copyright (C) 1997 University of Chicago. * See COPYRIGHT notice in top-level directory. */ - int err; - char *dir; - struct statfs fsbuf; char *tmp; /* The code in this function is based on the ADIO FS selection in ROMIO - * Copyright (C) 1997 University of Chicago. + * Copyright (C) 1997 University of Chicago. * See COPYRIGHT notice in top-level directory. */ @@ -96,18 +93,7 @@ mca_fs_pvfs2_component_file_query (mca_io_ompio_file_t *fh, int *priority) tmp = strchr (fh->f_filename, ':'); if (!tmp) { if (OMPIO_ROOT == fh->f_rank) { - do { - err = statfs (fh->f_filename, &fsbuf); - } while (err && (errno == ESTALE)); - - if (err && (errno == ENOENT)) { - mca_fs_base_get_parent_dir (fh->f_filename, &dir); - err = statfs (dir, &fsbuf); - free (dir); - } - if (fsbuf.f_type == PVFS2_SUPER_MAGIC) { - fh->f_fstype = PVFS2; - } + fh->f_fstype = mca_fs_base_get_fstype ( fh->f_filename ); } fh->f_comm->c_coll.coll_bcast (&(fh->f_fstype), 1, @@ -117,12 +103,12 @@ mca_fs_pvfs2_component_file_query (mca_io_ompio_file_t *fh, int *priority) fh->f_comm->c_coll.coll_bcast_module); } else { - if (!strncmp(fh->f_filename, "pvfs2:", 6) || + if (!strncmp(fh->f_filename, "pvfs2:", 6) || !strncmp(fh->f_filename, "PVFS2:", 6)) { fh->f_fstype = PVFS2; } } - + if (PVFS2 == fh->f_fstype) { if (*priority < 50) { *priority = 50; @@ -134,9 +120,9 @@ mca_fs_pvfs2_component_file_query (mca_io_ompio_file_t *fh, int *priority) } int mca_fs_pvfs2_component_file_unquery (mca_io_ompio_file_t *file) -{ +{ /* This function might be needed for some purposes later. for now it - * does not have anything to do since there are no steps which need + * does not have anything to do since there are no steps which need * to be undone if this module is not selected */ return OMPI_SUCCESS; @@ -144,14 +130,14 @@ int mca_fs_pvfs2_component_file_unquery (mca_io_ompio_file_t *file) int mca_fs_pvfs2_module_init (mca_io_ompio_file_t *file) { - /* Make sure the file type is not overwritten by the last queried + /* Make sure the file type is not overwritten by the last queried * component */ file->f_fstype = PVFS2; return OMPI_SUCCESS; } - -int mca_fs_pvfs2_module_finalize (mca_io_ompio_file_t *file) + +int mca_fs_pvfs2_module_finalize (mca_io_ompio_file_t *file) { return OMPI_SUCCESS; } diff --git a/ompi/mca/fs/pvfs2/fs_pvfs2.h b/ompi/mca/fs/pvfs2/fs_pvfs2.h index 89109159abe..a2e372770c4 100644 --- a/ompi/mca/fs/pvfs2/fs_pvfs2.h +++ b/ompi/mca/fs/pvfs2/fs_pvfs2.h @@ -5,15 +5,17 @@ * Copyright (c) 2004-2006 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2008-2015 University of Houston. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -63,23 +65,23 @@ OMPI_MODULE_DECLSPEC extern mca_fs_base_component_2_0_0_t mca_fs_pvfs2_component * ****************************************************************** * ********* functions which are implemented in this module ********* * ****************************************************************** - */ + */ -int mca_fs_pvfs2_file_open (struct ompi_communicator_t *comm, - char *filename, +int mca_fs_pvfs2_file_open (struct ompi_communicator_t *comm, + const char *filename, int amode, - struct ompi_info_t *info, + struct ompi_info_t *info, mca_io_ompio_file_t *fh); int mca_fs_pvfs2_file_close (mca_io_ompio_file_t *fh); - + int mca_fs_pvfs2_file_delete (char *filename, struct ompi_info_t *info); int mca_fs_pvfs2_file_set_size (mca_io_ompio_file_t *fh, OMPI_MPI_OFFSET_TYPE size); -int mca_fs_pvfs2_file_get_size (mca_io_ompio_file_t *fh, +int mca_fs_pvfs2_file_get_size (mca_io_ompio_file_t *fh, OMPI_MPI_OFFSET_TYPE *size); int mca_fs_pvfs2_file_sync (mca_io_ompio_file_t *fh); @@ -91,8 +93,8 @@ int mca_fs_pvfs2_file_seek (mca_io_ompio_file_t *fh, * ****************************************************************** * ************ functions implemented in this module end ************ * ****************************************************************** - */ - + */ + END_C_DECLS #endif /* MCA_FS_PVFS2_H */ diff --git a/ompi/mca/fs/pvfs2/fs_pvfs2_component.c b/ompi/mca/fs/pvfs2/fs_pvfs2_component.c index 9f14312b189..ef9bf933cc2 100644 --- a/ompi/mca/fs/pvfs2/fs_pvfs2_component.c +++ b/ompi/mca/fs/pvfs2/fs_pvfs2_component.c @@ -6,17 +6,19 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2008-2013 University of Houston. All rights reserved. * Copyright (c) 2015 Los Alamos National Security, LLC. All rights * reserved. + * Copyright (c) 2016 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ * * These symbols are in a file by themselves to provide nice linker @@ -68,7 +70,7 @@ mca_fs_base_component_2_0_0_t mca_fs_pvfs2_component = { .fsm_init_query = mca_fs_pvfs2_component_init_query, /* get thread level */ .fsm_file_query = mca_fs_pvfs2_component_file_query, /* get priority and actions */ .fsm_file_unquery = mca_fs_pvfs2_component_file_unquery, /* undo what was done by previous function */ -};. +}; static int pvfs2_register(void) diff --git a/ompi/mca/fs/pvfs2/fs_pvfs2_file_close.c b/ompi/mca/fs/pvfs2/fs_pvfs2_file_close.c index 675cd1766ac..cc3ea3e1c3f 100644 --- a/ompi/mca/fs/pvfs2/fs_pvfs2_file_close.c +++ b/ompi/mca/fs/pvfs2/fs_pvfs2_file_close.c @@ -5,20 +5,20 @@ * Copyright (c) 2004-2011 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2008-2011 University of Houston. All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ /* This code is based on the PVFS2 ADIO module in ROMIO - * Copyright (C) 1997 University of Chicago. + * Copyright (C) 1997 University of Chicago. * See COPYRIGHT notice in top-level directory. */ @@ -46,7 +46,7 @@ mca_fs_pvfs2_file_close (mca_io_ompio_file_t *fh) fh->f_fs_ptr = NULL; } /* - fh->f_comm->c_coll.coll_barrier (fh->f_comm, + fh->f_comm->c_coll.coll_barrier (fh->f_comm, fh->f_comm->c_coll.coll_barrier_module); close (fh->fd); */ diff --git a/ompi/mca/fs/pvfs2/fs_pvfs2_file_delete.c b/ompi/mca/fs/pvfs2/fs_pvfs2_file_delete.c index 355770439ee..d69007fe6a1 100644 --- a/ompi/mca/fs/pvfs2/fs_pvfs2_file_delete.c +++ b/ompi/mca/fs/pvfs2/fs_pvfs2_file_delete.c @@ -5,20 +5,20 @@ * Copyright (c) 2004-2011 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2008-2011 University of Houston. All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ /* This code is based on the PVFS2 ADIO module in ROMIO - * Copyright (C) 1997 University of Chicago. + * Copyright (C) 1997 University of Chicago. * See COPYRIGHT notice in top-level directory. */ diff --git a/ompi/mca/fs/pvfs2/fs_pvfs2_file_get_size.c b/ompi/mca/fs/pvfs2/fs_pvfs2_file_get_size.c index 6eef1c521eb..f6b5059529d 100644 --- a/ompi/mca/fs/pvfs2/fs_pvfs2_file_get_size.c +++ b/ompi/mca/fs/pvfs2/fs_pvfs2_file_get_size.c @@ -5,20 +5,20 @@ * Copyright (c) 2004-2011 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2008-2011 University of Houston. All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ /* This code is based on the PVFS2 ADIO module in ROMIO - * Copyright (C) 1997 University of Chicago. + * Copyright (C) 1997 University of Chicago. * See COPYRIGHT notice in top-level directory. */ diff --git a/ompi/mca/fs/pvfs2/fs_pvfs2_file_open.c b/ompi/mca/fs/pvfs2/fs_pvfs2_file_open.c index 5ae30413c30..f95e7f8aa94 100644 --- a/ompi/mca/fs/pvfs2/fs_pvfs2_file_open.c +++ b/ompi/mca/fs/pvfs2/fs_pvfs2_file_open.c @@ -5,20 +5,22 @@ * Copyright (c) 2004-2011 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2008-2014 University of Houston. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ /* This code is based on the PVFS2 ADIO module in ROMIO - * Copyright (C) 1997 University of Chicago. + * Copyright (C) 1997 University of Chicago. * See COPYRIGHT notice in top-level directory. */ @@ -40,12 +42,12 @@ struct open_status_s { }; typedef struct open_status_s open_status; -static void fake_an_open(PVFS_fs_id id, - char *pvfs2_name, +static void fake_an_open(PVFS_fs_id id, + const char *pvfs2_name, int access_mode, - int stripe_width, + int stripe_width, PVFS_size stripe_size, - mca_fs_pvfs2 *pvfs2_fs, + mca_fs_pvfs2 *pvfs2_fs, open_status *o_status); /* * file_open_pvfs2: This is the same strategy as ROMIO's pvfs2 open @@ -55,8 +57,8 @@ static void fake_an_open(PVFS_fs_id id, * Returns: - Success if new file handle */ int -mca_fs_pvfs2_file_open (struct ompi_communicator_t *comm, - char* filename, +mca_fs_pvfs2_file_open (struct ompi_communicator_t *comm, + const char* filename, int access_mode, struct ompi_info_t *info, mca_io_ompio_file_t *fh) @@ -132,12 +134,12 @@ mca_fs_pvfs2_file_open (struct ompi_communicator_t *comm, o_status.error = -1; } else { - fake_an_open (pvfs2_id, + fake_an_open (pvfs2_id, pvfs2_path, - access_mode, + access_mode, fs_pvfs2_stripe_width, (PVFS_size)fs_pvfs2_stripe_size, - pvfs2_fs, + pvfs2_fs, &o_status); } pvfs2_fs->object_ref = o_status.object_ref; @@ -171,24 +173,24 @@ mca_fs_pvfs2_file_open (struct ompi_communicator_t *comm, fh->f_fs_ptr = pvfs2_fs; /* update the internal ompio structure to store stripe - size and stripe depth correctly. + size and stripe depth correctly. Hadi(to be done): For this read the stripe size and stripe depth from the file itself */ - + if (fs_pvfs2_stripe_size > 0 && fs_pvfs2_stripe_width > 0) { fh->f_stripe_size = fs_pvfs2_stripe_size; } - + return OMPI_SUCCESS; } -static void fake_an_open(PVFS_fs_id id, - char *pvfs2_name, +static void fake_an_open(PVFS_fs_id id, + const char *pvfs2_name, int access_mode, - int stripe_width, + int stripe_width, PVFS_size stripe_size, - mca_fs_pvfs2 *pvfs2_fs, + mca_fs_pvfs2 *pvfs2_fs, open_status *o_status) { int ret; @@ -199,7 +201,7 @@ static void fake_an_open(PVFS_fs_id id, PVFS_sys_dist *dist; memset(&attribs, 0, sizeof(PVFS_sys_attr)); - + attribs.owner = geteuid(); attribs.group = getegid(); attribs.perms = 0644; @@ -219,24 +221,24 @@ static void fake_an_open(PVFS_fs_id id, memset(&resp_getparent, 0, sizeof(resp_getparent)); memset(&resp_create, 0, sizeof(resp_create)); - ret = PVFS_sys_lookup(id, + ret = PVFS_sys_lookup(id, pvfs2_name, - &(pvfs2_fs->credentials), - &resp_lookup, + &(pvfs2_fs->credentials), + &resp_lookup, PVFS2_LOOKUP_LINK_FOLLOW); if ( ret == (-PVFS_ENOENT)) { if (access_mode & MPI_MODE_CREATE) { - ret = PVFS_sys_getparent(id, + ret = PVFS_sys_getparent(id, pvfs2_name, - &(pvfs2_fs->credentials), - &resp_getparent); + &(pvfs2_fs->credentials), + &resp_getparent); if (ret < 0) { opal_output (1, "pvfs_sys_getparent returns with %d\n", ret); o_status->error = ret; return; } - + /* Set the distribution stripe size if specified */ if (0 < stripe_size) { /* Note that the distribution is hardcoded here */ @@ -245,34 +247,34 @@ static void fake_an_open(PVFS_fs_id id, "strip_size", &stripe_size); if (ret < 0) { - opal_output (1, + opal_output (1, "pvfs_sys_dist_setparam returns with %d\n", ret); o_status->error = ret; } } /* Perform file creation */ - ret = PVFS_sys_create(resp_getparent.basename, - resp_getparent.parent_ref, + ret = PVFS_sys_create(resp_getparent.basename, + resp_getparent.parent_ref, attribs, - &(pvfs2_fs->credentials), - dist, - &resp_create); + &(pvfs2_fs->credentials), + dist, + &resp_create); /* #ifdef HAVE_PVFS2_CREATE_WITHOUT_LAYOUT - ret = PVFS_sys_create(resp_getparent.basename, - resp_getparent.parent_ref, - attribs, - &(pvfs2_fs->credentials), - dist, - &resp_create); - #else - ret = PVFS_sys_create(resp_getparent.basename, - resp_getparent.parent_ref, - attribs, - &(pvfs2_fs->credentials), - dist, - NULL, + ret = PVFS_sys_create(resp_getparent.basename, + resp_getparent.parent_ref, + attribs, + &(pvfs2_fs->credentials), + dist, + &resp_create); + #else + ret = PVFS_sys_create(resp_getparent.basename, + resp_getparent.parent_ref, + attribs, + &(pvfs2_fs->credentials), + dist, + NULL, &resp_create); #endif */ @@ -283,10 +285,10 @@ static void fake_an_open(PVFS_fs_id id, * less work for us and we can just open it up and return the * handle */ if (ret == (-PVFS_EEXIST)) { - ret = PVFS_sys_lookup(id, + ret = PVFS_sys_lookup(id, pvfs2_name, - &(pvfs2_fs->credentials), - &resp_lookup, + &(pvfs2_fs->credentials), + &resp_lookup, PVFS2_LOOKUP_LINK_FOLLOW); if ( ret < 0 ) { o_status->error = ret; @@ -303,12 +305,12 @@ static void fake_an_open(PVFS_fs_id id, o_status->error = ret; return; } - } + } else if (access_mode & MPI_MODE_EXCL) { /* lookup should not succeed if opened with EXCL */ o_status->error = -PVFS_EEXIST; return; - } + } else { o_status->object_ref = resp_lookup.ref; } diff --git a/ompi/mca/fs/pvfs2/fs_pvfs2_file_set_size.c b/ompi/mca/fs/pvfs2/fs_pvfs2_file_set_size.c index 541e986c3ff..551f5a03ee8 100644 --- a/ompi/mca/fs/pvfs2/fs_pvfs2_file_set_size.c +++ b/ompi/mca/fs/pvfs2/fs_pvfs2_file_set_size.c @@ -5,20 +5,20 @@ * Copyright (c) 2004-2011 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2008-2011 University of Houston. All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ /* This code is based on the PVFS2 ADIO module in ROMIO - * Copyright (C) 1997 University of Chicago. + * Copyright (C) 1997 University of Chicago. * See COPYRIGHT notice in top-level directory. */ @@ -54,7 +54,7 @@ mca_fs_pvfs2_file_set_size (mca_io_ompio_file_t *fh, OMPIO_ROOT, fh->f_comm, fh->f_comm->c_coll.coll_bcast_module); - } + } else { fh->f_comm->c_coll.coll_bcast (&ret, 1, diff --git a/ompi/mca/fs/pvfs2/fs_pvfs2_file_sync.c b/ompi/mca/fs/pvfs2/fs_pvfs2_file_sync.c index e302a7fc91b..e4a7758ab24 100644 --- a/ompi/mca/fs/pvfs2/fs_pvfs2_file_sync.c +++ b/ompi/mca/fs/pvfs2/fs_pvfs2_file_sync.c @@ -5,20 +5,20 @@ * Copyright (c) 2004-2011 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2008-2011 University of Houston. All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ /* This code is based on the PVFS2 ADIO module in ROMIO - * Copyright (C) 1997 University of Chicago. + * Copyright (C) 1997 University of Chicago. * See COPYRIGHT notice in top-level directory. */ diff --git a/ompi/mca/fs/ufs/Makefile.am b/ompi/mca/fs/ufs/Makefile.am index fcd03d5638c..a66f1d8993f 100644 --- a/ompi/mca/fs/ufs/Makefile.am +++ b/ompi/mca/fs/ufs/Makefile.am @@ -5,15 +5,15 @@ # Copyright (c) 2004-2005 The University of Tennessee and The University # of Tennessee Research Foundation. All rights # reserved. -# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, # University of Stuttgart. All rights reserved. # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. # Copyright (c) 2008-2011 University of Houston. All rights reserved. # $COPYRIGHT$ -# +# # Additional copyrights may follow -# +# # $HEADER$ # diff --git a/ompi/mca/fs/ufs/fs_ufs.c b/ompi/mca/fs/ufs/fs_ufs.c index 91b17e6ccd8..b16ec9c8191 100644 --- a/ompi/mca/fs/ufs/fs_ufs.c +++ b/ompi/mca/fs/ufs/fs_ufs.c @@ -5,15 +5,15 @@ * Copyright (c) 2004-2006 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2008-2014 University of Houston. All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ * * These symbols are in a file by themselves to provide nice linker @@ -54,9 +54,9 @@ int mca_fs_ufs_component_init_query(bool enable_progress_threads, bool enable_mpi_threads) { /* Nothing to do */ - + return OMPI_SUCCESS; -} +} struct mca_fs_base_module_1_0_0_t * mca_fs_ufs_component_file_query (mca_io_ompio_file_t *fh, int *priority) @@ -72,9 +72,9 @@ mca_fs_ufs_component_file_query (mca_io_ompio_file_t *fh, int *priority) } int mca_fs_ufs_component_file_unquery (mca_io_ompio_file_t *file) -{ +{ /* This function might be needed for some purposes later. for now it - * does not have anything to do since there are no steps which need + * does not have anything to do since there are no steps which need * to be undone if this module is not selected */ return OMPI_SUCCESS; @@ -82,14 +82,14 @@ int mca_fs_ufs_component_file_unquery (mca_io_ompio_file_t *file) int mca_fs_ufs_module_init (mca_io_ompio_file_t *file) { - /* Make sure the file type is not overwritten by the last queried + /* Make sure the file type is not overwritten by the last queried * component */ file->f_fstype = UFS; return OMPI_SUCCESS; } - -int mca_fs_ufs_module_finalize (mca_io_ompio_file_t *file) + +int mca_fs_ufs_module_finalize (mca_io_ompio_file_t *file) { return OMPI_SUCCESS; } diff --git a/ompi/mca/fs/ufs/fs_ufs.h b/ompi/mca/fs/ufs/fs_ufs.h index c884d625b20..daebc5d505b 100644 --- a/ompi/mca/fs/ufs/fs_ufs.h +++ b/ompi/mca/fs/ufs/fs_ufs.h @@ -5,15 +5,17 @@ * Copyright (c) 2004-2006 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2008-2015 University of Houston. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -43,23 +45,23 @@ OMPI_MODULE_DECLSPEC extern mca_fs_base_component_2_0_0_t mca_fs_ufs_component; * ****************************************************************** * ********* functions which are implemented in this module ********* * ****************************************************************** - */ + */ -int mca_fs_ufs_file_open (struct ompi_communicator_t *comm, - char *filename, +int mca_fs_ufs_file_open (struct ompi_communicator_t *comm, + const char *filename, int amode, - struct ompi_info_t *info, + struct ompi_info_t *info, mca_io_ompio_file_t *fh); int mca_fs_ufs_file_close (mca_io_ompio_file_t *fh); - + int mca_fs_ufs_file_delete (char *filename, struct ompi_info_t *info); int mca_fs_ufs_file_set_size (mca_io_ompio_file_t *fh, OMPI_MPI_OFFSET_TYPE size); -int mca_fs_ufs_file_get_size (mca_io_ompio_file_t *fh, +int mca_fs_ufs_file_get_size (mca_io_ompio_file_t *fh, OMPI_MPI_OFFSET_TYPE *size); int mca_fs_ufs_file_sync (mca_io_ompio_file_t *fh); @@ -71,8 +73,8 @@ int mca_fs_ufs_file_seek (mca_io_ompio_file_t *fh, * ****************************************************************** * ************ functions implemented in this module end ************ * ****************************************************************** - */ - + */ + END_C_DECLS #endif /* MCA_FS_UFS_H */ diff --git a/ompi/mca/fs/ufs/fs_ufs_component.c b/ompi/mca/fs/ufs/fs_ufs_component.c index 37a9cd93108..d5f3c157daf 100644 --- a/ompi/mca/fs/ufs/fs_ufs_component.c +++ b/ompi/mca/fs/ufs/fs_ufs_component.c @@ -6,7 +6,7 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. @@ -14,9 +14,9 @@ * Copyright (c) 2015 Los Alamos National Security, LLC. All rights * reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ * * These symbols are in a file by themselves to provide nice linker diff --git a/ompi/mca/fs/ufs/fs_ufs_file_close.c b/ompi/mca/fs/ufs/fs_ufs_file_close.c index 4ebedbd4c7d..4eb9938c953 100644 --- a/ompi/mca/fs/ufs/fs_ufs_file_close.c +++ b/ompi/mca/fs/ufs/fs_ufs_file_close.c @@ -5,15 +5,15 @@ * Copyright (c) 2004-2011 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2008-2011 University of Houston. All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -37,7 +37,7 @@ int mca_fs_ufs_file_close (mca_io_ompio_file_t *fh) { - fh->f_comm->c_coll.coll_barrier (fh->f_comm, + fh->f_comm->c_coll.coll_barrier (fh->f_comm, fh->f_comm->c_coll.coll_barrier_module); /* close (*(int *)fh->fd);*/ close (fh->fd); diff --git a/ompi/mca/fs/ufs/fs_ufs_file_delete.c b/ompi/mca/fs/ufs/fs_ufs_file_delete.c index 3d33285092a..c585ee18da0 100644 --- a/ompi/mca/fs/ufs/fs_ufs_file_delete.c +++ b/ompi/mca/fs/ufs/fs_ufs_file_delete.c @@ -5,15 +5,15 @@ * Copyright (c) 2004-2011 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2008-2011 University of Houston. All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ diff --git a/ompi/mca/fs/ufs/fs_ufs_file_get_size.c b/ompi/mca/fs/ufs/fs_ufs_file_get_size.c index d103f504040..fcbdb3861dc 100644 --- a/ompi/mca/fs/ufs/fs_ufs_file_get_size.c +++ b/ompi/mca/fs/ufs/fs_ufs_file_get_size.c @@ -5,15 +5,15 @@ * Copyright (c) 2004-2011 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2008-2011 University of Houston. All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -38,7 +38,7 @@ int mca_fs_ufs_file_get_size (mca_io_ompio_file_t *fh, OMPI_MPI_OFFSET_TYPE *size) { - *size = lseek(fh->fd, 0, SEEK_END); + *size = lseek(fh->fd, 0, SEEK_END); if (-1 == *size) { perror ("lseek"); return OMPI_ERROR; diff --git a/ompi/mca/fs/ufs/fs_ufs_file_open.c b/ompi/mca/fs/ufs/fs_ufs_file_open.c index 051defa050c..dedca294685 100644 --- a/ompi/mca/fs/ufs/fs_ufs_file_open.c +++ b/ompi/mca/fs/ufs/fs_ufs_file_open.c @@ -5,15 +5,17 @@ * Copyright (c) 2004-2011 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2008-2014 University of Houston. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -37,8 +39,8 @@ * Returns: - Success if new file handle */ int -mca_fs_ufs_file_open (struct ompi_communicator_t *comm, - char* filename, +mca_fs_ufs_file_open (struct ompi_communicator_t *comm, + const char* filename, int access_mode, struct ompi_info_t *info, mca_io_ompio_file_t *fh) @@ -66,13 +68,15 @@ mca_fs_ufs_file_open (struct ompi_communicator_t *comm, amode = amode | O_WRONLY; if (access_mode & MPI_MODE_RDWR) amode = amode | O_RDWR; - + if ( 0 == rank ) { /* MODE_CREATE and MODE_EXCL can only be set by one process */ - if ( access_mode & MPI_MODE_CREATE ) - amode = amode | O_CREAT; - if (access_mode & MPI_MODE_EXCL) - amode = amode | O_EXCL; + if ( !(fh->f_flags & OMPIO_SHAREDFP_IS_SET)) { + if ( access_mode & MPI_MODE_CREATE ) + amode = amode | O_CREAT; + if (access_mode & MPI_MODE_EXCL) + amode = amode | O_EXCL; + } fh->fd = open (filename, amode, perm); ret = fh->fd; } diff --git a/ompi/mca/fs/ufs/fs_ufs_file_set_size.c b/ompi/mca/fs/ufs/fs_ufs_file_set_size.c index ae9f2cd8ad7..5cb64474a30 100644 --- a/ompi/mca/fs/ufs/fs_ufs_file_set_size.c +++ b/ompi/mca/fs/ufs/fs_ufs_file_set_size.c @@ -5,15 +5,15 @@ * Copyright (c) 2004-2011 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2008-2011 University of Houston. All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ diff --git a/ompi/mca/fs/ufs/fs_ufs_file_sync.c b/ompi/mca/fs/ufs/fs_ufs_file_sync.c index 2a6e26fe2b0..3added6009e 100644 --- a/ompi/mca/fs/ufs/fs_ufs_file_sync.c +++ b/ompi/mca/fs/ufs/fs_ufs_file_sync.c @@ -5,15 +5,15 @@ * Copyright (c) 2004-2011 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2008-2011 University of Houston. All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ diff --git a/ompi/mca/io/Makefile.am b/ompi/mca/io/Makefile.am index 24f7710da09..cc9e0a8bca4 100644 --- a/ompi/mca/io/Makefile.am +++ b/ompi/mca/io/Makefile.am @@ -5,15 +5,15 @@ # Copyright (c) 2004-2005 The University of Tennessee and The University # of Tennessee Research Foundation. All rights # reserved. -# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, # University of Stuttgart. All rights reserved. # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. # Copyright (c) 2010 Cisco Systems, Inc. All rights reserved. # $COPYRIGHT$ -# +# # Additional copyrights may follow -# +# # $HEADER$ # diff --git a/ompi/mca/io/base/Makefile.am b/ompi/mca/io/base/Makefile.am index 260ce4f0ac7..42e6427d8da 100644 --- a/ompi/mca/io/base/Makefile.am +++ b/ompi/mca/io/base/Makefile.am @@ -5,14 +5,14 @@ # Copyright (c) 2004-2005 The University of Tennessee and The University # of Tennessee Research Foundation. All rights # reserved. -# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, # University of Stuttgart. All rights reserved. # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. # $COPYRIGHT$ -# +# # Additional copyrights may follow -# +# # $HEADER$ # diff --git a/ompi/mca/io/base/base.h b/ompi/mca/io/base/base.h index 33ae5374f8b..19e96b56933 100644 --- a/ompi/mca/io/base/base.h +++ b/ompi/mca/io/base/base.h @@ -5,19 +5,21 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2008 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ -/** +/** * @file * * MCA io base framework public interface functions. @@ -66,7 +68,7 @@ BEGIN_C_DECLS */ OMPI_DECLSPEC int mca_io_base_find_available(bool enable_progress_threads, bool enable_mpi_threads); - + /** * Select an available component for a new file handle. * @@ -94,7 +96,7 @@ BEGIN_C_DECLS * * - If the \em preferred argument is NULL, the selection set is * defined to be all the components found during - * mca_io_base_find_available(). + * mca_io_base_find_available(). * - If \em preferred is not NULL, then the selection set is just * that component. (However, in this mode, we may make 2 passes * through the selection process -- more on this below). @@ -154,10 +156,10 @@ BEGIN_C_DECLS * the available components (rather than some pre-selected * module). See io.h for details. */ - OMPI_DECLSPEC int mca_io_base_delete(char *filename, + OMPI_DECLSPEC int mca_io_base_delete(const char *filename, struct ompi_info_t *info); - OMPI_DECLSPEC int mca_io_base_register_datarep(char *, + OMPI_DECLSPEC int mca_io_base_register_datarep(const char *, MPI_Datarep_conversion_function*, MPI_Datarep_conversion_function*, MPI_Datarep_extent_function*, diff --git a/ompi/mca/io/base/io_base_delete.c b/ompi/mca/io/base/io_base_delete.c index d1609237d4e..b00b9eebe49 100644 --- a/ompi/mca/io/base/io_base_delete.c +++ b/ompi/mca/io/base/io_base_delete.c @@ -5,15 +5,17 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2008 Sun Microsystems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -49,20 +51,20 @@ typedef struct avail_io_t avail_io_t; /* * Local functions */ -static opal_list_t *check_components(opal_list_t *components, - char *filename, struct ompi_info_t *info, +static opal_list_t *check_components(opal_list_t *components, + const char *filename, struct ompi_info_t *info, char **names, int num_names); static avail_io_t *check_one_component(const mca_base_component_t *component, - char *filename, struct ompi_info_t *info); + const char *filename, struct ompi_info_t *info); static avail_io_t *query(const mca_base_component_t *component, - char *filename, struct ompi_info_t *info); -static avail_io_t *query_2_0_0(const mca_io_base_component_2_0_0_t *io_component, - char *filename, struct ompi_info_t *info); + const char *filename, struct ompi_info_t *info); +static avail_io_t *query_2_0_0(const mca_io_base_component_2_0_0_t *io_component, + const char *filename, struct ompi_info_t *info); -static void unquery(avail_io_t *avail, char *filename, struct ompi_info_t *info); +static void unquery(avail_io_t *avail, const char *filename, struct ompi_info_t *info); -static int delete_file(avail_io_t *avail, char *filename, struct ompi_info_t *info); +static int delete_file(avail_io_t *avail, const char *filename, struct ompi_info_t *info); /* @@ -73,7 +75,7 @@ static OBJ_CLASS_INSTANCE(avail_io_t, opal_list_item_t, NULL, NULL); /* */ -int mca_io_base_delete(char *filename, struct ompi_info_t *info) +int mca_io_base_delete(const char *filename, struct ompi_info_t *info) { int err; opal_list_t *selectable; @@ -83,9 +85,9 @@ int mca_io_base_delete(char *filename, struct ompi_info_t *info) /* Announce */ opal_output_verbose(10, ompi_io_base_framework.framework_output, - "io:base:delete: deleting file: %s", + "io:base:delete: deleting file: %s", filename); - + /* See if a set of component was requested by the MCA parameter. Don't check for error. */ @@ -98,7 +100,7 @@ int mca_io_base_delete(char *filename, struct ompi_info_t *info) and check them all */ err = OMPI_ERROR; - opal_output_verbose(10, ompi_io_base_framework.framework_output, + opal_output_verbose(10, ompi_io_base_framework.framework_output, "io:base:delete: Checking all available modules"); selectable = check_components(&ompi_io_base_framework.framework_components, filename, info, NULL, 0); @@ -145,11 +147,11 @@ int mca_io_base_delete(char *filename, struct ompi_info_t *info) } /* Announce the winner */ - + opal_output_verbose(10, ompi_io_base_framework.framework_output, - "io:base:delete: Selected io component %s", + "io:base:delete: Selected io component %s", selected.ai_component.v2_0_0.io_version.mca_component_name); - + return OMPI_SUCCESS; } @@ -177,8 +179,8 @@ static int avail_io_compare (opal_list_item_t **itema, * be only those who returned that they want to run, and put them in * priority order. */ -static opal_list_t *check_components(opal_list_t *components, - char *filename, struct ompi_info_t *info, +static opal_list_t *check_components(opal_list_t *components, + const char *filename, struct ompi_info_t *info, char **names, int num_names) { int i; @@ -227,9 +229,9 @@ static opal_list_t *check_components(opal_list_t *components, } } } - + /* If we didn't find any available components, return an error */ - + if (0 == opal_list_get_size(selectable)) { OBJ_RELEASE(selectable); return NULL; @@ -247,26 +249,26 @@ static opal_list_t *check_components(opal_list_t *components, * Check a single component */ static avail_io_t *check_one_component(const mca_base_component_t *component, - char *filename, struct ompi_info_t *info) + const char *filename, struct ompi_info_t *info) { avail_io_t *avail; avail = query(component, filename, info); if (NULL != avail) { - avail->ai_priority = (avail->ai_priority < 100) ? + avail->ai_priority = (avail->ai_priority < 100) ? avail->ai_priority : 100; avail->ai_priority = (avail->ai_priority < 0) ? 0 : avail->ai_priority; - opal_output_verbose(10, ompi_io_base_framework.framework_output, - "io:base:delete: component available: %s, priority: %d", - component->mca_component_name, + opal_output_verbose(10, ompi_io_base_framework.framework_output, + "io:base:delete: component available: %s, priority: %d", + component->mca_component_name, avail->ai_priority); } else { - opal_output_verbose(10, ompi_io_base_framework.framework_output, + opal_output_verbose(10, ompi_io_base_framework.framework_output, "io:base:delete: component not available: %s", component->mca_component_name); } - + return avail; } @@ -279,18 +281,18 @@ static avail_io_t *check_one_component(const mca_base_component_t *component, * Take any version of a io module, query it, and return the right * module struct */ -static avail_io_t *query(const mca_base_component_t *component, - char *filename, struct ompi_info_t *info) +static avail_io_t *query(const mca_base_component_t *component, + const char *filename, struct ompi_info_t *info) { const mca_io_base_component_2_0_0_t *ioc_200; /* io v2.0.0 */ - if (2 == component->mca_major_version && - 0 == component->mca_minor_version && - 0 == component->mca_release_version) { + if (MCA_BASE_VERSION_MAJOR == component->mca_major_version && + MCA_BASE_VERSION_MINOR == component->mca_minor_version && + MCA_BASE_VERSION_RELEASE == component->mca_release_version) { ioc_200 = (mca_io_base_component_2_0_0_t *) component; - + return query_2_0_0(ioc_200, filename, info); } @@ -301,7 +303,7 @@ static avail_io_t *query(const mca_base_component_t *component, static avail_io_t *query_2_0_0(const mca_io_base_component_2_0_0_t *component, - char *filename, struct ompi_info_t *info) + const char *filename, struct ompi_info_t *info) { bool usable; int priority, ret; @@ -313,7 +315,7 @@ static avail_io_t *query_2_0_0(const mca_io_base_component_2_0_0_t *component, avail = NULL; private_data = NULL; usable = false; - ret = component->io_delete_query(filename, info, &private_data, &usable, + ret = component->io_delete_query(filename, info, &private_data, &usable, &priority); if (OMPI_SUCCESS == ret && usable) { avail = OBJ_NEW(avail_io_t); @@ -331,7 +333,7 @@ static avail_io_t *query_2_0_0(const mca_io_base_component_2_0_0_t *component, * Unquery functions **************************************************************************/ -static void unquery(avail_io_t *avail, char *filename, struct ompi_info_t *info) +static void unquery(avail_io_t *avail, const char *filename, struct ompi_info_t *info) { const mca_io_base_component_2_0_0_t *ioc_200; @@ -356,7 +358,7 @@ static void unquery(avail_io_t *avail, char *filename, struct ompi_info_t *info) /* * Invoke the component's delete function */ -static int delete_file(avail_io_t *avail, char *filename, struct ompi_info_t *info) +static int delete_file(avail_io_t *avail, const char *filename, struct ompi_info_t *info) { const mca_io_base_component_2_0_0_t *ioc_200; diff --git a/ompi/mca/io/base/io_base_file_select.c b/ompi/mca/io/base/io_base_file_select.c index 94804f9272e..5c822a5307c 100644 --- a/ompi/mca/io/base/io_base_file_select.c +++ b/ompi/mca/io/base/io_base_file_select.c @@ -5,7 +5,7 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. @@ -14,9 +14,9 @@ * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -64,15 +64,15 @@ typedef struct avail_io_t avail_io_t; /* * Local functions */ -static opal_list_t *check_components(opal_list_t *components, - ompi_file_t *file, +static opal_list_t *check_components(opal_list_t *components, + ompi_file_t *file, char **names, int num_names); -static avail_io_t *check_one_component(ompi_file_t *file, +static avail_io_t *check_one_component(ompi_file_t *file, const mca_base_component_t *component); -static avail_io_t *query(const mca_base_component_t *component, +static avail_io_t *query(const mca_base_component_t *component, ompi_file_t *file); -static avail_io_t *query_2_0_0(const mca_io_base_component_2_0_0_t *io_component, +static avail_io_t *query_2_0_0(const mca_io_base_component_2_0_0_t *io_component, ompi_file_t *file); static void unquery(avail_io_t *avail, ompi_file_t *file); @@ -91,7 +91,7 @@ static OBJ_CLASS_INSTANCE(avail_io_t, opal_list_item_t, NULL, NULL); * file. It is used to select which io component will be * active for a given file. */ -int mca_io_base_file_select(ompi_file_t *file, +int mca_io_base_file_select(ompi_file_t *file, mca_base_component_t *preferred) { int err; @@ -103,9 +103,9 @@ int mca_io_base_file_select(ompi_file_t *file, /* Announce */ opal_output_verbose(10, ompi_io_base_framework.framework_output, - "io:base:file_select: new file: %s", + "io:base:file_select: new file: %s", file->f_filename); - + /* Initialize all the relevant pointers, since they're used as sentinel values */ @@ -125,16 +125,16 @@ int mca_io_base_file_select(ompi_file_t *file, if (NULL != preferred) { str = &(preferred->mca_component_name[0]); - opal_output_verbose(10, ompi_io_base_framework.framework_output, + opal_output_verbose(10, ompi_io_base_framework.framework_output, "io:base:file_select: Checking preferred module: %s", str); - selectable = check_components(&ompi_io_base_framework.framework_components, + selectable = check_components(&ompi_io_base_framework.framework_components, file, &str, 1); - + /* If we didn't get a preferred module, then call again without a preferred module. This makes the logic below dramatically simpler. */ - + if (NULL == selectable) { return mca_io_base_file_select(file, NULL); } @@ -145,11 +145,11 @@ int mca_io_base_file_select(ompi_file_t *file, /* Nope -- a specific [set of] component[s] was not requested. Go check them all. */ - + else { - opal_output_verbose(10, ompi_io_base_framework.framework_output, + opal_output_verbose(10, ompi_io_base_framework.framework_output, "io:base:file_select: Checking all available modules"); - selectable = check_components(&ompi_io_base_framework.framework_components, + selectable = check_components(&ompi_io_base_framework.framework_components, file, NULL, 0); } @@ -214,39 +214,39 @@ int mca_io_base_file_select(ompi_file_t *file, return err; } - if (OMPI_SUCCESS != + if (OMPI_SUCCESS != (ret = mca_fs_base_find_available(OPAL_ENABLE_PROGRESS_THREADS, OMPI_ENABLE_THREAD_MULTIPLE))) { return err; } - if (OMPI_SUCCESS != + if (OMPI_SUCCESS != (ret = mca_fcoll_base_find_available(OPAL_ENABLE_PROGRESS_THREADS, OMPI_ENABLE_THREAD_MULTIPLE))) { return err; } - if (OMPI_SUCCESS != + if (OMPI_SUCCESS != (ret = mca_fbtl_base_find_available(OPAL_ENABLE_PROGRESS_THREADS, OMPI_ENABLE_THREAD_MULTIPLE))) { return err; } - if (OMPI_SUCCESS != + if (OMPI_SUCCESS != (ret = mca_sharedfp_base_find_available(OPAL_ENABLE_PROGRESS_THREADS, OMPI_ENABLE_THREAD_MULTIPLE))) { return err; } } /* Finally -- intialize the selected module. */ - + if (OMPI_SUCCESS != (err = module_init(file))) { return err; } /* Announce the winner */ - + opal_output_verbose(10, ompi_io_base_framework.framework_output, - "io:base:file_select: Selected io module %s", + "io:base:file_select: Selected io module %s", selected.ai_component.v2_0_0.io_version.mca_component_name); - + return OMPI_SUCCESS; } @@ -273,8 +273,8 @@ static int avail_io_compare (opal_list_item_t **itema, * (component, module) tuples (of type avail_io_t) to be only those * who returned that they want to run, and put them in priority order. */ -static opal_list_t *check_components(opal_list_t *components, - ompi_file_t *file, +static opal_list_t *check_components(opal_list_t *components, + ompi_file_t *file, char **names, int num_names) { int i; @@ -291,7 +291,7 @@ static opal_list_t *check_components(opal_list_t *components, /* Scan through the list of components. This nested loop is O(N^2), but we should never have too many components and/or names, so this *hopefully* shouldn't matter... */ - + OPAL_LIST_FOREACH(cli, components, mca_base_component_list_item_t) { component = cli->cli_component; @@ -322,9 +322,9 @@ static opal_list_t *check_components(opal_list_t *components, } } } - + /* If we didn't find any available components, return an error */ - + if (0 == opal_list_get_size(selectable)) { OBJ_RELEASE(selectable); return NULL; @@ -340,27 +340,27 @@ static opal_list_t *check_components(opal_list_t *components, /* * Check a single component */ -static avail_io_t *check_one_component(ompi_file_t *file, +static avail_io_t *check_one_component(ompi_file_t *file, const mca_base_component_t *component) { avail_io_t *avail; avail = query(component, file); if (NULL != avail) { - avail->ai_priority = (avail->ai_priority < 100) ? + avail->ai_priority = (avail->ai_priority < 100) ? avail->ai_priority : 100; avail->ai_priority = (avail->ai_priority < 0) ? 0 : avail->ai_priority; - opal_output_verbose(10, ompi_io_base_framework.framework_output, - "io:base:file_select: component available: %s, priority: %d", - component->mca_component_name, + opal_output_verbose(10, ompi_io_base_framework.framework_output, + "io:base:file_select: component available: %s, priority: %d", + component->mca_component_name, avail->ai_priority); } else { - opal_output_verbose(10, ompi_io_base_framework.framework_output, + opal_output_verbose(10, ompi_io_base_framework.framework_output, "io:base:file_select: component not available: %s", component->mca_component_name); } - + return avail; } @@ -373,7 +373,7 @@ static avail_io_t *check_one_component(ompi_file_t *file, * Take any version of a io module, query it, and return the right * module struct */ -static avail_io_t *query(const mca_base_component_t *component, +static avail_io_t *query(const mca_base_component_t *component, ompi_file_t *file) { const mca_io_base_component_2_0_0_t *ioc_200; @@ -384,7 +384,7 @@ static avail_io_t *query(const mca_base_component_t *component, MCA_BASE_VERSION_MINOR == component->mca_minor_version && MCA_BASE_VERSION_RELEASE == component->mca_release_version) { ioc_200 = (mca_io_base_component_2_0_0_t *) component; - + return query_2_0_0(ioc_200, file); } diff --git a/ompi/mca/io/base/io_base_find_available.c b/ompi/mca/io/base/io_base_find_available.c index 297277502ce..4b194e239ae 100644 --- a/ompi/mca/io/base/io_base_find_available.c +++ b/ompi/mca/io/base/io_base_find_available.c @@ -5,15 +5,15 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2008 Sun Microsystems, Inc. All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -36,10 +36,10 @@ /* * Private functions */ -static int init_query(const mca_base_component_t *ls, +static int init_query(const mca_base_component_t *ls, bool enable_progress_threads, bool enable_mpi_threads); -static int init_query_2_0_0(const mca_base_component_t *ls, +static int init_query_2_0_0(const mca_base_component_t *ls, bool enable_progress_threads, bool enable_mpi_threads); @@ -63,17 +63,17 @@ int mca_io_base_find_available(bool enable_progress_threads, /* The list of components that we should check has already been established in mca_io_base_open. */ - + OPAL_LIST_FOREACH_SAFE(cli, next, &ompi_io_base_framework.framework_components, mca_base_component_list_item_t) { const mca_base_component_t *component = cli->cli_component; /* Call a subroutine to do the work, because the component may represent different versions of the io MCA. */ - + if (OMPI_SUCCESS != init_query(component, enable_progress_threads, enable_mpi_threads)) { - + /* If the component doesn't want to run, then close it. It's already had its close() method invoked; now close it out of the DSO repository (if it's there). */ @@ -93,14 +93,14 @@ int mca_io_base_find_available(bool enable_progress_threads, * Query a component, see if it wants to run at all. If it does, save * some information. If it doesn't, close it. */ -static int init_query(const mca_base_component_t *m, +static int init_query(const mca_base_component_t *m, bool enable_progress_threads, bool enable_mpi_threads) { int ret; opal_output_verbose(10, ompi_io_base_framework.framework_output, - "io:find_available: querying io component %s", + "io:find_available: querying io component %s", m->mca_component_name); /* This component has already been successfully opened. So now @@ -109,13 +109,13 @@ static int init_query(const mca_base_component_t *m, if (2 == m->mca_type_major_version && 0 == m->mca_type_minor_version && 0 == m->mca_type_release_version) { - ret = init_query_2_0_0(m, enable_progress_threads, + ret = init_query_2_0_0(m, enable_progress_threads, enable_mpi_threads); } else { /* Unrecognized io API version */ opal_output_verbose(10, ompi_io_base_framework.framework_output, - "io:find_available: unrecognized io API version (%d.%d.%d)", + "io:find_available: unrecognized io API version (%d.%d.%d)", m->mca_type_major_version, m->mca_type_minor_version, m->mca_type_release_version); @@ -126,12 +126,12 @@ static int init_query(const mca_base_component_t *m, /* Query done -- look at the return value to see what happened */ if (OMPI_SUCCESS != ret) { - opal_output_verbose(10, ompi_io_base_framework.framework_output, - "io:find_available: io component %s is not available", + opal_output_verbose(10, ompi_io_base_framework.framework_output, + "io:find_available: io component %s is not available", m->mca_component_name); } else { - opal_output_verbose(10, ompi_io_base_framework.framework_output, - "io:find_available: io component %s is available", + opal_output_verbose(10, ompi_io_base_framework.framework_output, + "io:find_available: io component %s is available", m->mca_component_name); } @@ -144,11 +144,11 @@ static int init_query(const mca_base_component_t *m, /* * Query a specific component, io v2.0.0 */ -static int init_query_2_0_0(const mca_base_component_t *component, +static int init_query_2_0_0(const mca_base_component_t *component, bool enable_progress_threads, bool enable_mpi_threads) { - mca_io_base_component_2_0_0_t *io = + mca_io_base_component_2_0_0_t *io = (mca_io_base_component_2_0_0_t *) component; return io->io_init_query(enable_progress_threads, diff --git a/ompi/mca/io/base/io_base_frame.c b/ompi/mca/io/base/io_base_frame.c index 9c2fa0053d9..27a32fc47fd 100644 --- a/ompi/mca/io/base/io_base_frame.c +++ b/ompi/mca/io/base/io_base_frame.c @@ -5,14 +5,14 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ diff --git a/ompi/mca/io/base/io_base_register_datarep.c b/ompi/mca/io/base/io_base_register_datarep.c index 7a4df09fbc9..8890e06b989 100644 --- a/ompi/mca/io/base/io_base_register_datarep.c +++ b/ompi/mca/io/base/io_base_register_datarep.c @@ -5,15 +5,17 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2008 Sun Microsystems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -27,7 +29,7 @@ int -mca_io_base_register_datarep(char *datarep, +mca_io_base_register_datarep(const char *datarep, MPI_Datarep_conversion_function* read_fn, MPI_Datarep_conversion_function* write_fn, MPI_Datarep_extent_function* extent_fn, @@ -51,7 +53,7 @@ mca_io_base_register_datarep(char *datarep, v200 = (mca_io_base_component_2_0_0_t *) component; /* return first non-good error-code */ - tmp = v200->io_register_datarep(datarep, read_fn, write_fn, + tmp = v200->io_register_datarep(datarep, read_fn, write_fn, extent_fn, state); ret = (ret == OMPI_SUCCESS) ? tmp : ret; } diff --git a/ompi/mca/io/base/io_base_request.c b/ompi/mca/io/base/io_base_request.c index e0bfee04808..6af310709c1 100644 --- a/ompi/mca/io/base/io_base_request.c +++ b/ompi/mca/io/base/io_base_request.c @@ -5,16 +5,16 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2006 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2008 Sun Microsystems, Inc. All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ diff --git a/ompi/mca/io/base/io_base_request.h b/ompi/mca/io/base/io_base_request.h index 502ccaa4463..2edafc0121d 100644 --- a/ompi/mca/io/base/io_base_request.h +++ b/ompi/mca/io/base/io_base_request.h @@ -5,14 +5,14 @@ * Copyright (c) 2004-2006 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ /** diff --git a/ompi/mca/io/configure.m4 b/ompi/mca/io/configure.m4 index a57d77fee6e..6b3fe4ec3c9 100644 --- a/ompi/mca/io/configure.m4 +++ b/ompi/mca/io/configure.m4 @@ -1,18 +1,18 @@ # -*- shell-script -*- # -# Copyright (c) 2006-2007 Los Alamos National Security, LLC. -# All rights reserved. +# Copyright (c) 2006-2007 Los Alamos National Security, LLC. +# All rights reserved. # Copyright (c) 2012 Cisco Systems, Inc. All rights reserved. # $COPYRIGHT$ -# +# # Additional copyrights may follow -# +# # $HEADER$ # # MCA_ompi_io_CONFIG(project_name, framework_name) # ------------------------------------------- -AC_DEFUN([MCA_ompi_io_CONFIG], +AC_DEFUN([MCA_ompi_io_CONFIG], [ AC_ARG_ENABLE([mpi-io], [AC_HELP_STRING([--disable-mpi-io], diff --git a/ompi/mca/io/io.h b/ompi/mca/io/io.h index 554ca2ff0c9..5caa7b6079a 100644 --- a/ompi/mca/io/io.h +++ b/ompi/mca/io/io.h @@ -6,17 +6,20 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2008 Sun Microsystems, Inc. All rights reserved. * Copyright (c) 2015 Los Alamos National Security, LLC. All rights * reserved. + * Copyright (c) 2015 University of Houston. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -43,7 +46,7 @@ union mca_io_base_modules_t; /** - * Version of IO component interface that we're using. + * Version of IO component interface that we're using. * * The IO component is being designed to ensure that it can * simultaneously support multiple component versions in a single @@ -80,24 +83,24 @@ typedef int (*mca_io_base_component_init_query_fn_t) (bool enable_progress_threads, bool enable_mpi_threads); typedef const struct mca_io_base_module_2_0_0_t * (*mca_io_base_component_file_query_2_0_0_fn_t) - (struct ompi_file_t *file, struct mca_io_base_file_t **private_data, + (struct ompi_file_t *file, struct mca_io_base_file_t **private_data, int *priority); typedef int (*mca_io_base_component_file_unquery_fn_t) (struct ompi_file_t *file, struct mca_io_base_file_t *private_data); typedef int (*mca_io_base_component_file_delete_query_fn_t) - (char *filename, struct ompi_info_t *info, + (const char *filename, struct ompi_info_t *info, struct mca_io_base_delete_t **private_data, bool *usable, int *priority); typedef int (*mca_io_base_component_file_delete_select_fn_t) - (char *filename, struct ompi_info_t *info, + (const char *filename, struct ompi_info_t *info, struct mca_io_base_delete_t *private_data); typedef int (*mca_io_base_component_file_delete_unselect_fn_t) - (char *filename, struct ompi_info_t *info, + (const char *filename, struct ompi_info_t *info, struct mca_io_base_delete_t *private_data); typedef int (*mca_io_base_component_register_datarep_fn_t)( - char *, + const char *, MPI_Datarep_conversion_function*, MPI_Datarep_conversion_function*, MPI_Datarep_extent_function*, @@ -136,7 +139,7 @@ typedef union mca_io_base_components_t mca_io_base_components_t; */ typedef int (*mca_io_base_module_file_open_fn_t) - (struct ompi_communicator_t *comm, char *filename, int amode, + (struct ompi_communicator_t *comm, const char *filename, int amode, struct ompi_info_t *info, struct ompi_file_t *fh); typedef int (*mca_io_base_module_file_close_fn_t)(struct ompi_file_t *fh); @@ -155,57 +158,73 @@ typedef int (*mca_io_base_module_file_get_info_fn_t) typedef int (*mca_io_base_module_file_set_view_fn_t) (struct ompi_file_t *fh, MPI_Offset disp, struct ompi_datatype_t *etype, - struct ompi_datatype_t *filetype, char *datarep, + struct ompi_datatype_t *filetype, const char *datarep, struct ompi_info_t *info); typedef int (*mca_io_base_module_file_get_view_fn_t) - (struct ompi_file_t *fh, MPI_Offset *disp, + (struct ompi_file_t *fh, MPI_Offset *disp, struct ompi_datatype_t **etype, struct ompi_datatype_t **filetype, char *datarep); typedef int (*mca_io_base_module_file_read_at_fn_t) (struct ompi_file_t *fh, MPI_Offset offset, void *buf, - int count, struct ompi_datatype_t *datatype, + int count, struct ompi_datatype_t *datatype, struct ompi_status_public_t *status); typedef int (*mca_io_base_module_file_read_at_all_fn_t) (struct ompi_file_t *fh, MPI_Offset offset, void *buf, - int count, struct ompi_datatype_t *datatype, + int count, struct ompi_datatype_t *datatype, struct ompi_status_public_t *status); typedef int (*mca_io_base_module_file_write_at_fn_t) - (struct ompi_file_t *fh, MPI_Offset offset, void *buf, - int count, struct ompi_datatype_t *datatype, + (struct ompi_file_t *fh, MPI_Offset offset, const void *buf, + int count, struct ompi_datatype_t *datatype, struct ompi_status_public_t *status); typedef int (*mca_io_base_module_file_write_at_all_fn_t) - (struct ompi_file_t *fh, MPI_Offset offset, void *buf, - int count, struct ompi_datatype_t *datatype, + (struct ompi_file_t *fh, MPI_Offset offset, const void *buf, + int count, struct ompi_datatype_t *datatype, struct ompi_status_public_t *status); typedef int (*mca_io_base_module_file_iread_at_fn_t) (struct ompi_file_t *fh, MPI_Offset offset, void *buf, - int count, struct ompi_datatype_t *datatype, + int count, struct ompi_datatype_t *datatype, struct ompi_request_t **request); typedef int (*mca_io_base_module_file_iwrite_at_fn_t) + (struct ompi_file_t *fh, MPI_Offset offset, const void *buf, + int count, struct ompi_datatype_t *datatype, + struct ompi_request_t **request); + +typedef int (*mca_io_base_module_file_iread_at_all_fn_t) (struct ompi_file_t *fh, MPI_Offset offset, void *buf, - int count, struct ompi_datatype_t *datatype, + int count, struct ompi_datatype_t *datatype, + struct ompi_request_t **request); +typedef int (*mca_io_base_module_file_iwrite_at_all_fn_t) + (struct ompi_file_t *fh, MPI_Offset offset, const void *buf, + int count, struct ompi_datatype_t *datatype, struct ompi_request_t **request); typedef int (*mca_io_base_module_file_read_fn_t) (struct ompi_file_t *fh, void *buf, int count, struct ompi_datatype_t * - datatype, struct ompi_status_public_t *status); + datatype, struct ompi_status_public_t *status); typedef int (*mca_io_base_module_file_read_all_fn_t) (struct ompi_file_t *fh, void *buf, int count, struct ompi_datatype_t * - datatype, struct ompi_status_public_t *status); + datatype, struct ompi_status_public_t *status); typedef int (*mca_io_base_module_file_write_fn_t) - (struct ompi_file_t *fh, void *buf, int count, struct ompi_datatype_t * + (struct ompi_file_t *fh, const void *buf, int count, struct ompi_datatype_t * datatype, struct ompi_status_public_t *status); typedef int (*mca_io_base_module_file_write_all_fn_t) - (struct ompi_file_t *fh, void *buf, int count, struct ompi_datatype_t * + (struct ompi_file_t *fh, const void *buf, int count, struct ompi_datatype_t * datatype, struct ompi_status_public_t *status); typedef int (*mca_io_base_module_file_iread_fn_t) - (struct ompi_file_t *fh, void *buf, int count, - struct ompi_datatype_t *datatype, struct ompi_request_t **request); + (struct ompi_file_t *fh, void *buf, int count, + struct ompi_datatype_t *datatype, struct ompi_request_t **request); typedef int (*mca_io_base_module_file_iwrite_fn_t) - (struct ompi_file_t *fh, void *buf, int count, + (struct ompi_file_t *fh, const void *buf, int count, + struct ompi_datatype_t *datatype, struct ompi_request_t **request); + +typedef int (*mca_io_base_module_file_iread_all_fn_t) + (struct ompi_file_t *fh, void *buf, int count, + struct ompi_datatype_t *datatype, struct ompi_request_t **request); +typedef int (*mca_io_base_module_file_iwrite_all_fn_t) + (struct ompi_file_t *fh, const void *buf, int count, struct ompi_datatype_t *datatype, struct ompi_request_t **request); typedef int (*mca_io_base_module_file_seek_fn_t) @@ -216,22 +235,22 @@ typedef int (*mca_io_base_module_file_get_byte_offset_fn_t) (struct ompi_file_t *fh, MPI_Offset offset, MPI_Offset *disp); typedef int (*mca_io_base_module_file_read_shared_fn_t) - (struct ompi_file_t *fh, void *buf, int count, + (struct ompi_file_t *fh, void *buf, int count, struct ompi_datatype_t *datatype, struct ompi_status_public_t *status); typedef int (*mca_io_base_module_file_write_shared_fn_t) - (struct ompi_file_t *fh, void *buf, int count, + (struct ompi_file_t *fh, const void *buf, int count, struct ompi_datatype_t *datatype, struct ompi_status_public_t *status); typedef int (*mca_io_base_module_file_iread_shared_fn_t) - (struct ompi_file_t *fh, void *buf, int count, + (struct ompi_file_t *fh, void *buf, int count, struct ompi_datatype_t *datatype, struct ompi_request_t **request); typedef int (*mca_io_base_module_file_iwrite_shared_fn_t) - (struct ompi_file_t *fh, void *buf, int count, + (struct ompi_file_t *fh, const void *buf, int count, struct ompi_datatype_t *datatype, struct ompi_request_t **request); typedef int (*mca_io_base_module_file_read_ordered_fn_t) - (struct ompi_file_t *fh, void *buf, int count, + (struct ompi_file_t *fh, void *buf, int count, struct ompi_datatype_t *datatype, struct ompi_status_public_t *status); typedef int (*mca_io_base_module_file_write_ordered_fn_t) - (struct ompi_file_t *fh, void *buf, int count, + (struct ompi_file_t *fh, const void *buf, int count, struct ompi_datatype_t *datatype, struct ompi_status_public_t *status); typedef int (*mca_io_base_module_file_seek_shared_fn_t) (struct ompi_file_t *fh, MPI_Offset offset, int whence); @@ -244,33 +263,33 @@ typedef int (*mca_io_base_module_file_read_at_all_begin_fn_t) typedef int (*mca_io_base_module_file_read_at_all_end_fn_t) (struct ompi_file_t *fh, void *buf, struct ompi_status_public_t *status); typedef int (*mca_io_base_module_file_write_at_all_begin_fn_t) - (struct ompi_file_t *fh, MPI_Offset offset, void *buf, + (struct ompi_file_t *fh, MPI_Offset offset, const void *buf, int count, struct ompi_datatype_t *datatype); typedef int (*mca_io_base_module_file_write_at_all_end_fn_t) - (struct ompi_file_t *fh, void *buf, struct ompi_status_public_t *status); + (struct ompi_file_t *fh, const void *buf, struct ompi_status_public_t *status); typedef int (*mca_io_base_module_file_read_all_begin_fn_t) - (struct ompi_file_t *fh, void *buf, int count, + (struct ompi_file_t *fh, void *buf, int count, struct ompi_datatype_t *datatype); typedef int (*mca_io_base_module_file_read_all_end_fn_t) (struct ompi_file_t *fh, void *buf, struct ompi_status_public_t *status); typedef int (*mca_io_base_module_file_write_all_begin_fn_t) - (struct ompi_file_t *fh, void *buf, int count, + (struct ompi_file_t *fh, const void *buf, int count, struct ompi_datatype_t *datatype); typedef int (*mca_io_base_module_file_write_all_end_fn_t) - (struct ompi_file_t *fh, void *buf, struct ompi_status_public_t *status); + (struct ompi_file_t *fh, const void *buf, struct ompi_status_public_t *status); typedef int (*mca_io_base_module_file_read_ordered_begin_fn_t) - (struct ompi_file_t *fh, void *buf, int count, + (struct ompi_file_t *fh, void *buf, int count, struct ompi_datatype_t *datatype); typedef int (*mca_io_base_module_file_read_ordered_end_fn_t) (struct ompi_file_t *fh, void *buf, struct ompi_status_public_t *status); typedef int (*mca_io_base_module_file_write_ordered_begin_fn_t) - (struct ompi_file_t *fh, void *buf, int count, + (struct ompi_file_t *fh, const void *buf, int count, struct ompi_datatype_t *datatype); typedef int (*mca_io_base_module_file_write_ordered_end_fn_t) - (struct ompi_file_t *fh, void *buf, struct ompi_status_public_t *status); + (struct ompi_file_t *fh, const void *buf, struct ompi_status_public_t *status); typedef int (*mca_io_base_module_file_get_type_extent_fn_t) - (struct ompi_file_t *fh, struct ompi_datatype_t *datatype, + (struct ompi_file_t *fh, struct ompi_datatype_t *datatype, MPI_Aint *extent); typedef int (*mca_io_base_module_file_set_atomicity_fn_t) @@ -285,14 +304,14 @@ struct mca_io_base_module_2_0_0_t { mca_io_base_module_file_open_fn_t io_module_file_open; mca_io_base_module_file_close_fn_t io_module_file_close; - + mca_io_base_module_file_set_size_fn_t io_module_file_set_size; mca_io_base_module_file_preallocate_fn_t io_module_file_preallocate; mca_io_base_module_file_get_size_fn_t io_module_file_get_size; - mca_io_base_module_file_get_amode_fn_t io_module_file_get_amode; + mca_io_base_module_file_get_amode_fn_t io_module_file_get_amode; mca_io_base_module_file_set_info_fn_t io_module_file_set_info; mca_io_base_module_file_get_info_fn_t io_module_file_get_info; - + mca_io_base_module_file_set_view_fn_t io_module_file_set_view; mca_io_base_module_file_get_view_fn_t io_module_file_get_view; @@ -300,22 +319,26 @@ struct mca_io_base_module_2_0_0_t { mca_io_base_module_file_read_at_all_fn_t io_module_file_read_at_all; mca_io_base_module_file_write_at_fn_t io_module_file_write_at; mca_io_base_module_file_write_at_all_fn_t io_module_file_write_at_all; - - mca_io_base_module_file_iread_at_fn_t io_module_file_iread_at; - mca_io_base_module_file_iwrite_at_fn_t io_module_file_iwrite_at; - + + mca_io_base_module_file_iread_at_fn_t io_module_file_iread_at; + mca_io_base_module_file_iwrite_at_fn_t io_module_file_iwrite_at; + mca_io_base_module_file_iread_at_all_fn_t io_module_file_iread_at_all; + mca_io_base_module_file_iwrite_at_all_fn_t io_module_file_iwrite_at_all; + mca_io_base_module_file_read_fn_t io_module_file_read; mca_io_base_module_file_read_all_fn_t io_module_file_read_all; mca_io_base_module_file_write_fn_t io_module_file_write; mca_io_base_module_file_write_all_fn_t io_module_file_write_all; - + mca_io_base_module_file_iread_fn_t io_module_file_iread; mca_io_base_module_file_iwrite_fn_t io_module_file_iwrite; - + mca_io_base_module_file_iread_all_fn_t io_module_file_iread_all; + mca_io_base_module_file_iwrite_all_fn_t io_module_file_iwrite_all; + mca_io_base_module_file_seek_fn_t io_module_file_seek; mca_io_base_module_file_get_position_fn_t io_module_file_get_position; mca_io_base_module_file_get_byte_offset_fn_t io_module_file_get_byte_offset; - + mca_io_base_module_file_read_shared_fn_t io_module_file_read_shared; mca_io_base_module_file_write_shared_fn_t io_module_file_write_shared; mca_io_base_module_file_iread_shared_fn_t io_module_file_iread_shared; @@ -324,7 +347,7 @@ struct mca_io_base_module_2_0_0_t { mca_io_base_module_file_write_ordered_fn_t io_module_file_write_ordered; mca_io_base_module_file_seek_shared_fn_t io_module_file_seek_shared; mca_io_base_module_file_get_position_shared_fn_t io_module_file_get_position_shared; - + mca_io_base_module_file_read_at_all_begin_fn_t io_module_file_read_at_all_begin; mca_io_base_module_file_read_at_all_end_fn_t io_module_file_read_at_all_end; mca_io_base_module_file_write_at_all_begin_fn_t io_module_file_write_at_all_begin; @@ -337,9 +360,9 @@ struct mca_io_base_module_2_0_0_t { mca_io_base_module_file_read_ordered_end_fn_t io_module_file_read_ordered_end; mca_io_base_module_file_write_ordered_begin_fn_t io_module_file_write_ordered_begin; mca_io_base_module_file_write_ordered_end_fn_t io_module_file_write_ordered_end; - + mca_io_base_module_file_get_type_extent_fn_t io_module_file_get_type_extent; - + mca_io_base_module_file_set_atomicity_fn_t io_module_file_set_atomicity; mca_io_base_module_file_get_atomicity_fn_t io_module_file_get_atomicity; mca_io_base_module_file_sync_fn_t io_module_file_sync; diff --git a/ompi/mca/io/ompio/Makefile.am b/ompi/mca/io/ompio/Makefile.am index 8724ee10eff..b0efdf633d5 100644 --- a/ompi/mca/io/ompio/Makefile.am +++ b/ompi/mca/io/ompio/Makefile.am @@ -5,15 +5,15 @@ # Copyright (c) 2004-2005 The University of Tennessee and The University # of Tennessee Research Foundation. All rights # reserved. -# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, # University of Stuttgart. All rights reserved. # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. # Copyright (c) 2008-2012 University of Houston. All rights reserved. # $COPYRIGHT$ -# +# # Additional copyrights may follow -# +# # $HEADER$ # @@ -42,7 +42,6 @@ libmca_io_ompio_la_LDFLAGS = -module -avoid-version sources = \ io_ompio.h \ - io_ompio_nbc.h \ io_ompio_request.h \ io_ompio.c \ io_ompio_component.c \ @@ -52,5 +51,4 @@ sources = \ io_ompio_file_open.c \ io_ompio_file_write.c \ io_ompio_file_read.c \ - io_ompio_request.c \ - io_ompio_nbc.c + io_ompio_request.c diff --git a/ompi/mca/io/ompio/configure.m4 b/ompi/mca/io/ompio/configure.m4 new file mode 100644 index 00000000000..31eb6bf7607 --- /dev/null +++ b/ompi/mca/io/ompio/configure.m4 @@ -0,0 +1,21 @@ +# -*- shell-script -*- +# +# Copyright (c) 2016 Research Organization for Information Science +# and Technology (RIST). All rights reserved. +# $COPYRIGHT$ +# +# Additional copyrights may follow +# +# $HEADER$ +# + +# MCA_ompi_io_ompio_CONFIG([action-if-can-compile], +# [action-if-cant-compile]) +# ------------------------------------------------ +AC_DEFUN([MCA_ompi_io_ompio_CONFIG],[ + AC_CONFIG_FILES([ompi/mca/io/ompio/Makefile]) + + AS_IF([test "$enable_io_ompio" != "no"], + [$1], + [$2]) +])dnl diff --git a/ompi/mca/io/ompio/io_ompio.c b/ompi/mca/io/ompio/io_ompio.c index 93e792d8155..3fd403e4eac 100644 --- a/ompi/mca/io/ompio/io_ompio.c +++ b/ompi/mca/io/ompio/io_ompio.c @@ -6,7 +6,7 @@ * Copyright (c) 2004-2013 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. @@ -16,9 +16,9 @@ * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -38,7 +38,7 @@ #include #ifdef HAVE_SYS_STATFS_H -#include /* or */ +#include /* or */ #endif #ifdef HAVE_SYS_PARAM_H #include @@ -51,8 +51,8 @@ #endif #include "io_ompio.h" -print_queue *coll_write_time=NULL; -print_queue *coll_read_time=NULL; +mca_io_ompio_print_queue *coll_write_time=NULL; +mca_io_ompio_print_queue *coll_read_time=NULL; static int mca_io_ompio_create_groups(mca_io_ompio_file_t *fh, @@ -61,12 +61,12 @@ static int mca_io_ompio_create_groups(mca_io_ompio_file_t *fh, static int mca_io_ompio_prepare_to_group(mca_io_ompio_file_t *fh, OMPI_MPI_OFFSET_TYPE **start_offsets_lens, - OMPI_MPI_OFFSET_TYPE **end_offsets, + OMPI_MPI_OFFSET_TYPE **end_offsets, OMPI_MPI_OFFSET_TYPE **aggr_bytes_per_group, OMPI_MPI_OFFSET_TYPE *bytes_per_group, int **decision_list, size_t bytes_per_proc, - int *is_aggregator, + int *is_aggregator, int *ompio_grouping_flag); static int mca_io_ompio_retain_initial_groups(mca_io_ompio_file_t *fh); @@ -102,12 +102,11 @@ static int mca_io_ompio_merge_groups(mca_io_ompio_file_t *fh, int num_merge_aggrs); - int ompi_io_ompio_set_file_defaults (mca_io_ompio_file_t *fh) { if (NULL != fh) { - ompi_datatype_t *types[2], *default_file_view; + ompi_datatype_t *types[2]; int blocklen[2] = {1, 1}; OPAL_PTRDIFF_TYPE d[2], base; int i; @@ -123,45 +122,41 @@ int ompi_io_ompio_set_file_defaults (mca_io_ompio_file_t *fh) fh->f_position_in_file_view = 0; fh->f_index_in_file_view = 0; fh->f_total_bytes = 0; - + fh->f_init_procs_per_group = -1; fh->f_init_procs_in_group = NULL; - + fh->f_procs_per_group = -1; fh->f_procs_in_group = NULL; fh->f_init_num_aggrs = -1; fh->f_init_aggr_list = NULL; - ompi_datatype_create_contiguous(1048576, - &ompi_mpi_byte.dt, - &default_file_view); - ompi_datatype_commit (&default_file_view); - - fh->f_etype = &ompi_mpi_byte.dt; - fh->f_filetype = default_file_view; - - + /* Default file View */ fh->f_iov_type = MPI_DATATYPE_NULL; fh->f_stripe_size = mca_io_ompio_bytes_per_agg; /*Decoded iovec of the file-view*/ fh->f_decoded_iov = NULL; - + + fh->f_etype = NULL; + fh->f_filetype = NULL; + fh->f_orig_filetype = NULL; + mca_io_ompio_set_view_internal(fh, 0, &ompi_mpi_byte.dt, - default_file_view, - "native", + &ompi_mpi_byte.dt, + "native", fh->f_info); - + /*Create a derived datatype for the created iovec */ types[0] = &ompi_mpi_long.dt; types[1] = &ompi_mpi_long.dt; - d[0] = (OPAL_PTRDIFF_TYPE) fh->f_decoded_iov; - d[1] = (OPAL_PTRDIFF_TYPE) &fh->f_decoded_iov[0].iov_len; + d[0] = (OPAL_PTRDIFF_TYPE) fh->f_decoded_iov; + d[1] = (OPAL_PTRDIFF_TYPE) &fh->f_decoded_iov[0].iov_len; base = d[0]; for (i=0 ; i<2 ; i++) { @@ -195,8 +190,8 @@ int ompi_io_ompio_generate_current_file_view (struct mca_io_ompio_file_t *fh, int block = 1; /* allocate an initial iovec, will grow if needed */ - iov = (struct iovec *) malloc - (OMPIO_IOVEC_INITIAL_SIZE * sizeof (struct iovec)); + iov = (struct iovec *) calloc + (OMPIO_IOVEC_INITIAL_SIZE, sizeof (struct iovec)); if (NULL == iov) { opal_output(1, "OUT OF MEMORY\n"); return OMPI_ERR_OUT_OF_RESOURCE; @@ -206,7 +201,7 @@ int ompi_io_ompio_generate_current_file_view (struct mca_io_ompio_file_t *fh, j = fh->f_index_in_file_view; bytes_to_write = max_data; k = 0; - + while (bytes_to_write) { OPAL_PTRDIFF_TYPE disp; /* reallocate if needed */ @@ -220,7 +215,7 @@ int ompi_io_ompio_generate_current_file_view (struct mca_io_ompio_file_t *fh, } } - if (fh->f_decoded_iov[j].iov_len - + if (fh->f_decoded_iov[j].iov_len - (fh->f_total_bytes - sum_previous_counts) <= 0) { sum_previous_counts += fh->f_decoded_iov[j].iov_len; j = j + 1; @@ -233,18 +228,18 @@ int ompi_io_ompio_generate_current_file_view (struct mca_io_ompio_file_t *fh, fh->f_total_bytes = 0; } } - - disp = (OPAL_PTRDIFF_TYPE)(fh->f_decoded_iov[j].iov_base) + + + disp = (OPAL_PTRDIFF_TYPE)(fh->f_decoded_iov[j].iov_base) + (fh->f_total_bytes - sum_previous_counts); iov[k].iov_base = (IOVBASE_TYPE *)(intptr_t)(disp + fh->f_offset); - if ((fh->f_decoded_iov[j].iov_len - - (fh->f_total_bytes - sum_previous_counts)) + if ((fh->f_decoded_iov[j].iov_len - + (fh->f_total_bytes - sum_previous_counts)) >= bytes_to_write) { iov[k].iov_len = bytes_to_write; } else { - iov[k].iov_len = fh->f_decoded_iov[j].iov_len - + iov[k].iov_len = fh->f_decoded_iov[j].iov_len - (fh->f_total_bytes - sum_previous_counts); } @@ -258,7 +253,7 @@ int ompi_io_ompio_generate_current_file_view (struct mca_io_ompio_file_t *fh, *f_iov = iov; if (mca_io_ompio_record_offset_info){ - + int tot_entries=0, *recvcounts=NULL, *displs=NULL; mca_io_ompio_offlen_array_t *per_process=NULL; mca_io_ompio_offlen_array_t *all_process=NULL; @@ -308,7 +303,7 @@ int ompi_io_ompio_generate_current_file_view (struct mca_io_ompio_file_t *fh, (MPI_Aint)iov[i].iov_len; per_process[i].process_id = fh->f_rank; } - + types[0] = &ompi_mpi_long.dt; types[1] = &ompi_mpi_long.dt; types[2] = &ompi_mpi_int.dt; @@ -401,20 +396,20 @@ int ompi_io_ompio_generate_current_file_view (struct mca_io_ompio_file_t *fh, fh->f_comm->c_coll.coll_gatherv_module); ompi_datatype_destroy(&io_array_type); - + if (OMPIO_ROOT == fh->f_rank){ - + ompi_io_ompio_sort_offlen(all_process, tot_entries, sorted); - + for (i=0;if_size; i++){ @@ -425,6 +420,18 @@ int ompi_io_ompio_generate_current_file_view (struct mca_io_ompio_file_t *fh, } } fp = fopen("fileview_info.out", "w+"); + if ( NULL == fp ) { + for (i=0; if_size; i++) { + free(adj_matrix[i]); + } + free(adj_matrix); + free(sorted); + free(all_process); + free(per_process); + free(recvcounts); + free(displs); + return MPI_ERR_OTHER; + } fprintf(fp,"FILEVIEW\n"); column_list = (int *) malloc ( m * sizeof(int)); if (NULL == column_list){ @@ -457,7 +464,7 @@ int ompi_io_ompio_generate_current_file_view (struct mca_io_ompio_file_t *fh, free(displs); return OMPI_ERR_OUT_OF_RESOURCE; } - + row_index = (int *) malloc ((fh->f_size + 1) * sizeof(int)); if (NULL == row_index){ @@ -489,7 +496,7 @@ int ompi_io_ompio_generate_current_file_view (struct mca_io_ompio_file_t *fh, column_index++; r_index++; } - + } row_index[i+1]= r_index; } @@ -551,20 +558,20 @@ int ompi_io_ompio_set_explicit_offset (mca_io_ompio_file_t *fh, if ( fh->f_view_size > 0 ) { /* starting offset of the current copy of the filew view */ - fh->f_offset = (fh->f_view_extent * + fh->f_offset = (fh->f_view_extent * ((offset*fh->f_etype_size) / fh->f_view_size)) + fh->f_disp; - - + + /* number of bytes used within the current copy of the file view */ fh->f_total_bytes = (offset*fh->f_etype_size) % fh->f_view_size; i = fh->f_total_bytes; - - - /* Initialize the block id and the starting offset of the current block + + + /* Initialize the block id and the starting offset of the current block within the current copy of the file view to zero */ fh->f_index_in_file_view = 0; fh->f_position_in_file_view = 0; - + /* determine block id that the offset is located in and the starting offset of that block */ k = fh->f_decoded_iov[fh->f_index_in_file_view].iov_len; @@ -578,28 +585,28 @@ int ompi_io_ompio_set_explicit_offset (mca_io_ompio_file_t *fh, return OMPI_SUCCESS; } -int ompi_io_ompio_decode_datatype (struct mca_io_ompio_file_t *fh, +int ompi_io_ompio_decode_datatype (struct mca_io_ompio_file_t *fh, ompi_datatype_t *datatype, int count, - void *buf, + const void *buf, size_t *max_data, struct iovec **iov, uint32_t *iovec_count) -{ +{ + - opal_convertor_t convertor; size_t remaining_length = 0; uint32_t i; uint32_t temp_count; struct iovec *temp_iov=NULL; size_t temp_data; - + opal_convertor_clone (fh->f_convertor, &convertor, 0); - if (OMPI_SUCCESS != opal_convertor_prepare_for_send (&convertor, + if (OMPI_SUCCESS != opal_convertor_prepare_for_send (&convertor, &(datatype->super), count, buf)) { @@ -623,11 +630,11 @@ int ompi_io_ompio_decode_datatype (struct mca_io_ompio_file_t *fh, return OMPI_ERR_OUT_OF_RESOURCE; } - while (0 == opal_convertor_raw(&convertor, + while (0 == opal_convertor_raw(&convertor, temp_iov, - &temp_count, + &temp_count, &temp_data)) { -#if 0 +#if 0 printf ("%d: New raw extraction (iovec_count = %d, max_data = %lu)\n", fh->f_rank,temp_count, (unsigned long)temp_data); for (i = 0; i < temp_count; i++) { @@ -681,8 +688,8 @@ int ompi_io_ompio_decode_datatype (struct mca_io_ompio_file_t *fh, if (0 == fh->f_rank) { printf ("%d Entries: \n",*iovec_count); for (i=0 ; i<*iovec_count ; i++) { - printf ("\t{%p, %d}\n", - (*iov)[i].iov_base, + printf ("\t{%p, %d}\n", + (*iov)[i].iov_base, (*iov)[i].iov_len); } } @@ -729,15 +736,15 @@ int ompi_io_ompio_sort (mca_io_ompio_io_array_t *io_array, while (!done) { left = j*2+1; right = j*2+2; - if ((left <= heap_size) && + if ((left <= heap_size) && (io_array[temp_arr[left]].offset > io_array[temp_arr[j]].offset)) { largest = left; } else { largest = j; } - if ((right <= heap_size) && - (io_array[temp_arr[right]].offset > + if ((right <= heap_size) && + (io_array[temp_arr[right]].offset > io_array[temp_arr[largest]].offset)) { largest = right; } @@ -756,8 +763,8 @@ int ompi_io_ompio_sort (mca_io_ompio_io_array_t *io_array, for (i = num_entries-1; i >=1; --i) { temp = temp_arr[0]; temp_arr[0] = temp_arr[i]; - temp_arr[i] = temp; - heap_size--; + temp_arr[i] = temp; + heap_size--; done = 0; j = 0; largest = j; @@ -765,17 +772,17 @@ int ompi_io_ompio_sort (mca_io_ompio_io_array_t *io_array, while (!done) { left = j*2+1; right = j*2+2; - - if ((left <= heap_size) && - (io_array[temp_arr[left]].offset > + + if ((left <= heap_size) && + (io_array[temp_arr[left]].offset > io_array[temp_arr[j]].offset)) { largest = left; } else { largest = j; } - if ((right <= heap_size) && - (io_array[temp_arr[right]].offset > + if ((right <= heap_size) && + (io_array[temp_arr[right]].offset > io_array[temp_arr[largest]].offset)) { largest = right; } @@ -836,15 +843,15 @@ int ompi_io_ompio_sort_iovec (struct iovec *iov, while (!done) { left = j*2+1; right = j*2+2; - if ((left <= heap_size) && + if ((left <= heap_size) && (iov[temp_arr[left]].iov_base > iov[temp_arr[j]].iov_base)) { largest = left; } else { largest = j; } - if ((right <= heap_size) && - (iov[temp_arr[right]].iov_base > + if ((right <= heap_size) && + (iov[temp_arr[right]].iov_base > iov[temp_arr[largest]].iov_base)) { largest = right; } @@ -863,8 +870,8 @@ int ompi_io_ompio_sort_iovec (struct iovec *iov, for (i = num_entries-1; i >=1; --i) { temp = temp_arr[0]; temp_arr[0] = temp_arr[i]; - temp_arr[i] = temp; - heap_size--; + temp_arr[i] = temp; + heap_size--; done = 0; j = 0; largest = j; @@ -872,17 +879,17 @@ int ompi_io_ompio_sort_iovec (struct iovec *iov, while (!done) { left = j*2+1; right = j*2+2; - - if ((left <= heap_size) && - (iov[temp_arr[left]].iov_base > + + if ((left <= heap_size) && + (iov[temp_arr[left]].iov_base > iov[temp_arr[j]].iov_base)) { largest = left; } else { largest = j; } - if ((right <= heap_size) && - (iov[temp_arr[right]].iov_base > + if ((right <= heap_size) && + (iov[temp_arr[right]].iov_base > iov[temp_arr[largest]].iov_base)) { largest = right; } @@ -1015,7 +1022,7 @@ int ompi_io_ompio_set_aggregator_props (struct mca_io_ompio_file_t *fh, size_t bytes_per_proc) { int j,procs_per_group = 0; - + /*If only one process used, no need to do aggregator selection!*/ if (fh->f_size == 1){ num_aggregators = 1; @@ -1024,13 +1031,29 @@ int ompi_io_ompio_set_aggregator_props (struct mca_io_ompio_file_t *fh, fh->f_flags |= OMPIO_AGGREGATOR_IS_SET; if (-1 == num_aggregators) { - mca_io_ompio_create_groups(fh,bytes_per_proc); + if ( SIMPLE == mca_io_ompio_grouping_option || + NO_REFINEMENT == mca_io_ompio_grouping_option ) { + fh->f_aggregator_index = 0; + fh->f_final_num_aggrs = fh->f_init_num_aggrs; + fh->f_procs_per_group = fh->f_init_procs_per_group; + + fh->f_procs_in_group = (int*)malloc (fh->f_procs_per_group * sizeof(int)); + if (NULL == fh->f_procs_in_group) { + opal_output (1, "OUT OF MEMORY\n"); + return OMPI_ERR_OUT_OF_RESOURCE; + } + + for (j=0 ; jf_procs_per_group ; j++) { + fh->f_procs_in_group[j] = fh->f_init_procs_in_group[j]; + } + } + else { + mca_io_ompio_create_groups(fh,bytes_per_proc); + } return OMPI_SUCCESS; } - //Forced number of aggregators - else - { + //Forced number of aggregators /* calculate the offset at which each group of processes will start */ procs_per_group = ceil ((float)fh->f_size/num_aggregators); @@ -1053,9 +1076,9 @@ int ompi_io_ompio_set_aggregator_props (struct mca_io_ompio_file_t *fh, } fh->f_aggregator_index = 0; + fh->f_final_num_aggrs = num_aggregators; return OMPI_SUCCESS; - } } @@ -1083,7 +1106,7 @@ int ompi_io_ompio_break_file_view (mca_io_ompio_file_t *fh, /* allocate an initial iovec, will grow if needed */ - temp_iov = (struct iovec *) malloc + temp_iov = (struct iovec *) malloc (count * sizeof (struct iovec)); if (NULL == temp_iov) { opal_output(1, "OUT OF MEMORY\n"); @@ -1111,7 +1134,7 @@ int ompi_io_ompio_break_file_view (mca_io_ompio_file_t *fh, else { temp_iov[k].iov_base = iov[i].iov_base; temp_iov[k].iov_len = stripe_size-temp; - current_offset = (OPAL_PTRDIFF_TYPE)(temp_iov[k].iov_base) + + current_offset = (OPAL_PTRDIFF_TYPE)(temp_iov[k].iov_base) + temp_iov[k].iov_len; remaining = iov[i].iov_len - temp_iov[k].iov_len; k++; @@ -1184,7 +1207,7 @@ int ompi_io_ompio_distribute_file_view (mca_io_ompio_file_t *fh, /* calculate how many entries in the broken iovec belong to each aggregator */ for (i=0 ; if_rank%fh->f_aggregator_index) { for (i=0; if_size ; i++) { @@ -1240,7 +1263,7 @@ int ompi_io_ompio_distribute_file_view (mca_io_ompio_file_t *fh, MPI_INT, i*fh->f_aggregator_index, OMPIO_TAG_GATHER, - MCA_PML_BASE_SEND_STANDARD, + MCA_PML_BASE_SEND_STANDARD, fh->f_comm, &sendreq[i])); if (OMPI_SUCCESS != rc) { @@ -1341,7 +1364,7 @@ int ompi_io_ompio_distribute_file_view (mca_io_ompio_file_t *fh, } for (i=0 ; if_iov_type, i*fh->f_aggregator_index, OMPIO_TAG_GATHERV, - MCA_PML_BASE_SEND_STANDARD, + MCA_PML_BASE_SEND_STANDARD, fh->f_comm, &sendreq[i])); if (OMPI_SUCCESS != rc) { @@ -1444,10 +1467,14 @@ int ompi_io_ompio_distribute_file_view (mca_io_ompio_file_t *fh, } */ exit: - for (i=0 ; if_aggregator_index, OMPIO_TAG_GATHERV, - MCA_PML_BASE_SEND_STANDARD, + MCA_PML_BASE_SEND_STANDARD, fh->f_comm, &sendreq[i])); if (OMPI_SUCCESS != rc) { @@ -1817,7 +1841,7 @@ int ompi_io_ompio_scatter_data (mca_io_ompio_file_t *fh, bytes_remaining = total_bytes_recv; while (bytes_remaining) { - temp = (int)((OPAL_PTRDIFF_TYPE)broken_iovec[current].iov_base/stripe_size) + temp = (int)((OPAL_PTRDIFF_TYPE)broken_iovec[current].iov_base/stripe_size) % num_aggregators; if (part) { @@ -1830,7 +1854,7 @@ int ompi_io_ompio_scatter_data (mca_io_ompio_file_t *fh, bytes_remaining -= part; temp_position[temp] += part; part = 0; - current ++; + current ++; } else { memcpy ((IOVBASE_TYPE *)((OPAL_PTRDIFF_TYPE)receive_buf + @@ -1901,30 +1925,30 @@ void mca_io_ompio_get_bytes_per_agg ( int *bytes_per_agg) } /* Print queue related function implementations */ -int ompi_io_ompio_set_print_queue (print_queue **q, +int ompi_io_ompio_set_print_queue (mca_io_ompio_print_queue **q, int queue_type){ int ret = OMPI_SUCCESS; - switch(queue_type) { + switch(queue_type) { - case WRITE_PRINT_QUEUE: - *q = coll_write_time; - break; - case READ_PRINT_QUEUE: - *q = coll_read_time; - break; - } + case WRITE_PRINT_QUEUE: + *q = coll_write_time; + break; + case READ_PRINT_QUEUE: + *q = coll_read_time; + break; + } - if (NULL == q){ - ret = OMPI_ERROR; - } + if (NULL == q){ + ret = OMPI_ERROR; + } return ret; -} +} -int ompi_io_ompio_initialize_print_queue(print_queue *q){ +int ompi_io_ompio_initialize_print_queue(mca_io_ompio_print_queue *q){ int ret = OMPI_SUCCESS; q->first = 0; @@ -1933,10 +1957,10 @@ int ompi_io_ompio_initialize_print_queue(print_queue *q){ return ret; } int ompi_io_ompio_register_print_entry (int queue_type, - print_entry x){ - + mca_io_ompio_print_entry x){ + int ret = OMPI_SUCCESS; - print_queue *q=NULL; + mca_io_ompio_print_queue *q=NULL; ret = ompi_io_ompio_set_print_queue(&q, queue_type); @@ -1952,12 +1976,11 @@ int ompi_io_ompio_register_print_entry (int queue_type, } return ret; } +int ompi_io_ompio_unregister_print_entry (int queue_type, + mca_io_ompio_print_entry *x){ -int ompi_io_ompio_unregister_print_entry (int queue_type, - print_entry *x){ - int ret = OMPI_SUCCESS; - print_queue *q=NULL; + mca_io_ompio_print_queue *q=NULL; ret = ompi_io_ompio_set_print_queue(&q, queue_type); if (ret != OMPI_ERROR){ if (q->count <= 0){ @@ -1975,57 +1998,57 @@ int ompi_io_ompio_unregister_print_entry (int queue_type, int ompi_io_ompio_empty_print_queue(int queue_type){ int ret = OMPI_SUCCESS; - print_queue *q=NULL; + mca_io_ompio_print_queue *q=NULL; ret = ompi_io_ompio_set_print_queue(&q, queue_type); - - assert (ret != OMPI_ERROR); + + assert (ret != OMPI_ERROR); if (q->count == 0) return 1; else return 0; - + } int ompi_io_ompio_full_print_queue(int queue_type){ - + int ret = OMPI_SUCCESS; - print_queue *q=NULL; + mca_io_ompio_print_queue *q=NULL; ret = ompi_io_ompio_set_print_queue(&q, queue_type); - - assert ( ret != OMPI_ERROR); + + assert ( ret != OMPI_ERROR); if (q->count < QUEUESIZE) return 0; else return 1; - + } int ompi_io_ompio_print_time_info(int queue_type, char *name, mca_io_ompio_file_t *fh){ - + int i = 0, j=0, nprocs_for_coll = 0, ret = OMPI_SUCCESS, count = 0; double *time_details = NULL, *final_sum = NULL; double *final_max = NULL, *final_min = NULL; double *final_time_details=NULL; - print_queue *q=NULL; - + mca_io_ompio_print_queue *q=NULL; + ret = ompi_io_ompio_set_print_queue(&q, queue_type); - - assert (ret != OMPI_ERROR); + + assert (ret != OMPI_ERROR); nprocs_for_coll = q->entry[0].nprocs_for_coll; time_details = (double *) malloc (4*sizeof(double)); if ( NULL == time_details){ ret = OMPI_ERR_OUT_OF_RESOURCE; goto exit; - + } - + if (!fh->f_rank){ - + final_min = (double *) malloc (3*sizeof(double)); if ( NULL == final_min){ ret = OMPI_ERR_OUT_OF_RESOURCE; @@ -2044,8 +2067,8 @@ int ompi_io_ompio_print_time_info(int queue_type, ret = OMPI_ERR_OUT_OF_RESOURCE; goto exit; } - - final_time_details = + + final_time_details = (double *)malloc (fh->f_size * 4 * sizeof(double)); if (NULL == final_time_details){ @@ -2058,12 +2081,12 @@ int ompi_io_ompio_print_time_info(int queue_type, final_time_details[i] = 0.0; } - + } - + for (i = 0; i < 4; i++){ time_details[i] = 0.0; - } + } if (q->count > 0){ for (i=0; i < q->count; i++){ @@ -2088,7 +2111,7 @@ int ompi_io_ompio_print_time_info(int queue_type, 0, fh->f_comm, fh->f_comm->c_coll.coll_gather_module); - + if (!fh->f_rank){ @@ -2117,7 +2140,7 @@ int ompi_io_ompio_print_time_info(int queue_type, } } - + printf ("\n# MAX-%s AVG-%s MIN-%s MAX-COMM AVG-COMM MIN-COMM", name, name, name); printf (" MAX-EXCH AVG-EXCH MIN-EXCH\n"); @@ -2125,9 +2148,9 @@ int ompi_io_ompio_print_time_info(int queue_type, final_max[0], final_sum[0]/nprocs_for_coll, final_min[0], final_max[1], final_sum[1]/nprocs_for_coll, final_min[1], final_max[2], final_sum[2]/nprocs_for_coll, final_min[2]); - + } - + exit: if ( NULL != final_max){ free(final_max); @@ -2148,26 +2171,26 @@ int ompi_io_ompio_print_time_info(int queue_type, return ret; } - + int mca_io_ompio_create_groups(mca_io_ompio_file_t *fh, size_t bytes_per_proc) { - + int is_aggregator = 0; int final_aggr = 0; int final_num_aggrs = 0; int ompio_grouping_flag = 0; - + int *decision_list = NULL; - + OMPI_MPI_OFFSET_TYPE *start_offsets_lens = NULL; OMPI_MPI_OFFSET_TYPE *end_offsets = NULL; OMPI_MPI_OFFSET_TYPE bytes_per_group = 0; OMPI_MPI_OFFSET_TYPE *aggr_bytes_per_group = NULL; - + mca_io_ompio_prepare_to_group(fh, &start_offsets_lens, - &end_offsets, + &end_offsets, &aggr_bytes_per_group, &bytes_per_group, &decision_list, @@ -2175,9 +2198,9 @@ int mca_io_ompio_create_groups(mca_io_ompio_file_t *fh, &is_aggregator, &ompio_grouping_flag); - switch(ompio_grouping_flag){ - - case OMPIO_SPLIT: + switch(ompio_grouping_flag){ + + case OMPIO_SPLIT: mca_io_ompio_split_initial_groups(fh, start_offsets_lens, end_offsets, @@ -2190,16 +2213,16 @@ int mca_io_ompio_create_groups(mca_io_ompio_file_t *fh, decision_list, is_aggregator); break; - - case OMPIO_RETAIN: - + + case OMPIO_RETAIN: + mca_io_ompio_retain_initial_groups(fh); - - break; + break; + + + } - } - //Set aggregator index fh->f_aggregator_index = 0; @@ -2214,7 +2237,7 @@ int mca_io_ompio_create_groups(mca_io_ompio_file_t *fh, MPI_SUM, fh->f_comm, fh->f_comm->c_coll.coll_allreduce_module); - + //Set final number of aggregators in file handle fh->f_final_num_aggrs = final_num_aggrs; @@ -2223,8 +2246,8 @@ int mca_io_ompio_create_groups(mca_io_ompio_file_t *fh, /*if(fh->f_rank == 0){ printf("Rank %d : has final_num_aggrs = %d\n",fh->f_rank,final_num_aggrs); }*/ - - //Print final grouping + + //Print final grouping /*if (fh->f_procs_in_group[fh->f_aggregator_index] == fh->f_rank) { for (j=0 ; jf_procs_per_group; j++) { printf ("%d: Proc %d: %d\n", fh->f_rank, j, fh->f_procs_in_group[j]); @@ -2232,7 +2255,7 @@ int mca_io_ompio_create_groups(mca_io_ompio_file_t *fh, printf("\n\n"); } - + */ if (NULL != start_offsets_lens) { free (start_offsets_lens); @@ -2259,9 +2282,9 @@ int mca_io_ompio_merge_initial_groups(mca_io_ompio_file_t *fh, OMPI_MPI_OFFSET_TYPE *aggr_bytes_per_group, int *decision_list, int is_aggregator){ - + OMPI_MPI_OFFSET_TYPE sum_bytes = 0; - + MPI_Request *sendreq = NULL; int start = 0; @@ -2270,20 +2293,20 @@ int mca_io_ompio_merge_initial_groups(mca_io_ompio_file_t *fh, int j = 0; int r = 0; - int merge_pair_flag = 4; + int merge_pair_flag = 4; int first_merge_flag = 4; int *merge_aggrs = NULL; int is_new_aggregator= 0; - + if(is_aggregator){ i = 0; sum_bytes = 0; //go through the decision list //Find the aggregators that could merge - + while(i < fh->f_init_num_aggrs){ while(1){ if( i >= fh->f_init_num_aggrs){ @@ -2348,9 +2371,9 @@ int mca_io_ompio_merge_initial_groups(mca_io_ompio_file_t *fh, } if(fh->f_rank == merge_aggrs[0]) is_new_aggregator = 1; - + for( j = 0 ; j < end-start+1 ;j++){ - if(fh->f_rank == merge_aggrs[j]){ + if(fh->f_rank == merge_aggrs[j]){ mca_io_ompio_merge_groups(fh, merge_aggrs, end-start+1); @@ -2364,7 +2387,7 @@ int mca_io_ompio_merge_initial_groups(mca_io_ompio_file_t *fh, } i++; } - + }//end old aggregators //New aggregators communicate new grouping info to the groups @@ -2396,7 +2419,7 @@ int mca_io_ompio_merge_initial_groups(mca_io_ompio_file_t *fh, MCA_PML_BASE_SEND_STANDARD, fh->f_comm, &sendreq[r++])); - + } } else { @@ -2409,13 +2432,13 @@ int mca_io_ompio_merge_initial_groups(mca_io_ompio_file_t *fh, OMPIO_PROCS_PER_GROUP_TAG, fh->f_comm, MPI_STATUS_IGNORE)); - + fh->f_procs_in_group = (int*)malloc (fh->f_procs_per_group * sizeof(int)); if (NULL == fh->f_procs_in_group) { opal_output (1, "OUT OF MEMORY\n"); return OMPI_ERR_OUT_OF_RESOURCE; } - + MCA_PML_CALL(recv(fh->f_procs_in_group, fh->f_procs_per_group, MPI_INT, @@ -2424,13 +2447,13 @@ int mca_io_ompio_merge_initial_groups(mca_io_ompio_file_t *fh, fh->f_comm, MPI_STATUS_IGNORE)); } - + if(is_new_aggregator) { ompi_request_wait_all (r, sendreq, MPI_STATUSES_IGNORE); free (sendreq); } - - return OMPI_SUCCESS; + + return OMPI_SUCCESS; } int mca_io_ompio_split_initial_groups(mca_io_ompio_file_t *fh, @@ -2438,19 +2461,19 @@ int mca_io_ompio_split_initial_groups(mca_io_ompio_file_t *fh, OMPI_MPI_OFFSET_TYPE *end_offsets, OMPI_MPI_OFFSET_TYPE bytes_per_group){ - + int size_new_group = 0; int size_old_group = 0; int size_last_group = 0; int size_smallest_group = 0; int num_groups = 0; - + OMPI_MPI_OFFSET_TYPE max_cci = 0; OMPI_MPI_OFFSET_TYPE min_cci = 0; size_new_group = ceil ((float)mca_io_ompio_bytes_per_agg * fh->f_init_procs_per_group/ bytes_per_group); size_old_group = fh->f_init_procs_per_group; - + mca_io_ompio_split_a_group(fh, start_offsets_lens, end_offsets, @@ -2465,7 +2488,7 @@ int mca_io_ompio_split_initial_groups(mca_io_ompio_file_t *fh, //Just use size as returned by split group size_last_group = size_smallest_group; break; - + case UNIFORM_DISTRIBUTION: if(size_smallest_group <= OMPIO_UNIFORM_DIST_THRESHOLD * size_new_group){ //uneven split need to call split again @@ -2484,13 +2507,13 @@ int mca_io_ompio_split_initial_groups(mca_io_ompio_file_t *fh, size_last_group = size_smallest_group; } break; - + case CONTIGUITY: - + while(1){ if((max_cci < OMPIO_CONTG_THRESHOLD) && (size_new_group < size_old_group)){ - + size_new_group = floor( (float) (size_new_group + size_old_group ) / 2 ); mca_io_ompio_split_a_group(fh, start_offsets_lens, @@ -2507,14 +2530,14 @@ int mca_io_ompio_split_initial_groups(mca_io_ompio_file_t *fh, } size_last_group = size_smallest_group; break; - + case OPTIMIZE_GROUPING: //This case is a combination of Data volume, contiguity and uniform distribution while(1){ if((max_cci < OMPIO_CONTG_THRESHOLD) && (size_new_group < size_old_group)){ //can be a better condition //monitor the previous iteration - //break if it has not changed. + //break if it has not changed. size_new_group = ceil( (float) (size_new_group + size_old_group ) / 2 ); mca_io_ompio_split_a_group(fh, start_offsets_lens, @@ -2529,7 +2552,7 @@ int mca_io_ompio_split_initial_groups(mca_io_ompio_file_t *fh, break; } } - + if(size_smallest_group <= OMPIO_UNIFORM_DIST_THRESHOLD * size_new_group){ //uneven split need to call split again if( size_old_group % num_groups == 0 ){ @@ -2546,7 +2569,7 @@ int mca_io_ompio_split_initial_groups(mca_io_ompio_file_t *fh, //Considered uniform size_last_group = size_smallest_group; } - + break; } @@ -2558,9 +2581,9 @@ int mca_io_ompio_split_initial_groups(mca_io_ompio_file_t *fh, return OMPI_SUCCESS; } - + int mca_io_ompio_retain_initial_groups(mca_io_ompio_file_t *fh){ - + int i = 0; fh->f_procs_per_group = fh->f_init_procs_per_group; @@ -2586,8 +2609,8 @@ int mca_io_ompio_merge_groups(mca_io_ompio_file_t *fh, int *displs; - - + + sizes_old_group = (int*)malloc(num_merge_aggrs * sizeof(int)); if (NULL == sizes_old_group) { opal_output (1, "OUT OF MEMORY\n"); @@ -2603,7 +2626,7 @@ int mca_io_ompio_merge_groups(mca_io_ompio_file_t *fh, } - //merge_aggrs[0] is considered the new aggregator + //merge_aggrs[0] is considered the new aggregator //New aggregator collects group sizes of the groups to be merged ompi_io_ompio_allgather_array (&fh->f_init_procs_per_group, 1, @@ -2615,10 +2638,10 @@ int mca_io_ompio_merge_groups(mca_io_ompio_file_t *fh, merge_aggrs, num_merge_aggrs, fh->f_comm); - + fh->f_procs_per_group = 0; - + for( i = 0; i < num_merge_aggrs; i++){ fh->f_procs_per_group = fh->f_procs_per_group + sizes_old_group[i]; } @@ -2633,9 +2656,9 @@ int mca_io_ompio_merge_groups(mca_io_ompio_file_t *fh, opal_output (1, "OUT OF MEMORY\n"); free(sizes_old_group); free(displs); - return OMPI_ERR_OUT_OF_RESOURCE; + return OMPI_ERR_OUT_OF_RESOURCE; } - + //New aggregator also collects the grouping distribution //This is the actual merge //use allgatherv array @@ -2650,13 +2673,13 @@ int mca_io_ompio_merge_groups(mca_io_ompio_file_t *fh, merge_aggrs, num_merge_aggrs, fh->f_comm); - - + + free(displs); free (sizes_old_group); return OMPI_SUCCESS; - + } @@ -2670,7 +2693,7 @@ int mca_io_ompio_split_a_group(mca_io_ompio_file_t *fh, int *num_groups, int *size_smallest_group) { - + OMPI_MPI_OFFSET_TYPE *cci = NULL; *num_groups = fh->f_init_procs_per_group / size_new_group; *size_smallest_group = size_new_group; @@ -2684,28 +2707,28 @@ int mca_io_ompio_split_a_group(mca_io_ompio_file_t *fh, *size_smallest_group = fh->f_init_procs_per_group % size_new_group; flag = 1; } - + cci = (OMPI_MPI_OFFSET_TYPE*)malloc(*num_groups * sizeof( OMPI_MPI_OFFSET_TYPE )); if (NULL == cci) { opal_output(1, "OUT OF MEMORY\n"); return OMPI_ERR_OUT_OF_RESOURCE; } - + //check contiguity within new groups size = size_new_group; - for( i = 0; i < *num_groups; i++){ + for( i = 0; i < *num_groups; i++){ cci[i] = start_offsets_lens[3*size_new_group*i + 1]; //if it is the last group check if it is the smallest group if( (i == *num_groups-1) && flag == 1){ size = *size_smallest_group; } for( k = 0; k < size-1; k++){ - if( end_offsets[size_new_group* i + k] == start_offsets_lens[3*size_new_group*i + 3*(k+1)] ){ + if( end_offsets[size_new_group* i + k] == start_offsets_lens[3*size_new_group*i + 3*(k+1)] ){ cci[i] += start_offsets_lens[3*size_new_group*i + 3*(k + 1) + 1]; - } - } + } + } } - + //get min and max cci *min_cci = cci[0]; *max_cci = cci[0]; @@ -2714,7 +2737,7 @@ int mca_io_ompio_split_a_group(mca_io_ompio_file_t *fh, *max_cci = cci[i]; } else if(cci[i] < *min_cci){ - *min_cci = cci[i]; + *min_cci = cci[i]; } } //if cci is not needed anymore @@ -2730,7 +2753,7 @@ int mca_io_ompio_finalize_split(mca_io_ompio_file_t *fh, int size_last_group) { //based on new group and last group finalize f_procs_per_group and f_procs_in_group - + int i = 0; int j = 0; int k = 0; @@ -2744,16 +2767,16 @@ int mca_io_ompio_finalize_split(mca_io_ompio_file_t *fh, else{ fh->f_procs_per_group = size_new_group; } - } + } } - - + + fh->f_procs_in_group = (int*)malloc (fh->f_procs_per_group * sizeof(int)); if (NULL == fh->f_procs_in_group) { opal_output (1, "OUT OF MEMORY\n"); return OMPI_ERR_OUT_OF_RESOURCE; } - + for( i = 0; i < fh->f_init_procs_per_group ; i++){ if( fh->f_rank == fh->f_init_procs_in_group[i]){ if( i >= fh->f_init_procs_per_group - size_last_group ){ @@ -2765,14 +2788,14 @@ int mca_io_ompio_finalize_split(mca_io_ompio_file_t *fh, else{ //distribute all other groups for( j = 0 ; j < fh->f_init_procs_per_group; j = j + size_new_group){ - if(i >= j && i < j+size_new_group ){ + if(i >= j && i < j+size_new_group ){ for( k = 0; k < fh->f_procs_per_group ; k++){ fh->f_procs_in_group[k] = fh->f_init_procs_in_group[j+k]; } } } } - + } } @@ -2789,7 +2812,7 @@ int mca_io_ompio_prepare_to_group(mca_io_ompio_file_t *fh, int *is_aggregator, int *ompio_grouping_flag) { - + OMPI_MPI_OFFSET_TYPE start_offset_len[3] = {0}; OMPI_MPI_OFFSET_TYPE *aggr_bytes_per_group_tmp = NULL; OMPI_MPI_OFFSET_TYPE *start_offsets_lens_tmp = NULL; @@ -2802,7 +2825,7 @@ int mca_io_ompio_prepare_to_group(mca_io_ompio_file_t *fh, int merge_count = 0; int split_count = 0; //not req? int retain_as_is_count = 0; //not req? - + //Store start offset and length in an array //also add bytes per process if(NULL == fh->f_decoded_iov){ @@ -2880,7 +2903,7 @@ int mca_io_ompio_prepare_to_group(mca_io_ompio_file_t *fh, fh->f_init_aggr_list, fh->f_init_num_aggrs, fh->f_comm); - + for( i = 0; i < fh->f_init_num_aggrs; i++){ if((size_t)(aggr_bytes_per_group_tmp[i])> (size_t)mca_io_ompio_bytes_per_agg){ @@ -2897,7 +2920,7 @@ int mca_io_ompio_prepare_to_group(mca_io_ompio_file_t *fh, retain_as_is_count++; } } - + *aggr_bytes_per_group = &aggr_bytes_per_group_tmp[0]; //Go through the decision list to see if non consecutive //processes intend to merge, if yes retain original grouping @@ -2909,32 +2932,32 @@ int mca_io_ompio_prepare_to_group(mca_io_ompio_file_t *fh, } else if( (i == fh->f_init_num_aggrs-1) && (decision_list_tmp[i-1] != OMPIO_MERGE)){ - + decision_list_tmp[i] = OMPIO_RETAIN; } else if(!((decision_list_tmp[i-1] == OMPIO_MERGE) || (decision_list_tmp[i+1] == OMPIO_MERGE))){ - + decision_list_tmp[i] = OMPIO_RETAIN; } } } - + //Set the flag as per the decision list for( i = 0 ; i < fh->f_init_num_aggrs; i++){ if((decision_list_tmp[i] == OMPIO_MERGE)&& (fh->f_rank == fh->f_init_aggr_list[i])) - *ompio_grouping_flag = OMPIO_MERGE; - + *ompio_grouping_flag = OMPIO_MERGE; + if((decision_list_tmp[i] == OMPIO_SPLIT)&& (fh->f_rank == fh->f_init_aggr_list[i])) *ompio_grouping_flag = OMPIO_SPLIT; - + if((decision_list_tmp[i] == OMPIO_RETAIN)&& (fh->f_rank == fh->f_init_aggr_list[i])) - *ompio_grouping_flag = OMPIO_RETAIN; + *ompio_grouping_flag = OMPIO_RETAIN; } - + //print decision list of aggregators /*printf("RANK%d : Printing decsion list : \n",fh->f_rank); for( i = 0; i < fh->f_init_num_aggrs; i++){ @@ -2947,7 +2970,7 @@ int mca_io_ompio_prepare_to_group(mca_io_ompio_file_t *fh, } printf("\n\n"); */ - *decision_list = &decision_list_tmp[0]; + *decision_list = &decision_list_tmp[0]; } //Communicate flag to all group members ompi_io_ompio_bcast_array (ompio_grouping_flag, diff --git a/ompi/mca/io/ompio/io_ompio.h b/ompi/mca/io/ompio/io_ompio.h index 0649f7a01e6..618e127422e 100644 --- a/ompi/mca/io/ompio/io_ompio.h +++ b/ompi/mca/io/ompio/io_ompio.h @@ -6,15 +6,17 @@ * Copyright (c) 2004-2007 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2008-2015 University of Houston. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -57,7 +59,10 @@ OMPI_DECLSPEC extern int mca_io_ompio_coll_timing_info; #define OMPIO_CONTIGUOUS_FVIEW 0x00000010 #define OMPIO_AGGREGATOR_IS_SET 0x00000020 #define OMPIO_SHAREDFP_IS_SET 0x00000040 + #define QUEUESIZE 2048 +#define MCA_IO_DEFAULT_FILE_VIEW_SIZE 4*1024*1024 +#define OMPIO_FCOLL_WANT_TIME_BREAKDOWN 0 #define OMPIO_MIN(a, b) (((a) < (b)) ? (a) : (b)) #define OMPIO_MAX(a, b) (((a) < (b)) ? (b) : (a)) @@ -66,6 +71,7 @@ OMPI_DECLSPEC extern int mca_io_ompio_coll_timing_info; * General values */ #define OMPIO_PREALLOC_MAX_BUF_SIZE 33554432 +#define OMPIO_DEFAULT_CYCLE_BUF_SIZE 536870912 #define OMPIO_PERM_NULL -1 #define OMPIO_IOVEC_INITIAL_SIZE 100 #define OMPIO_ROOT 0 @@ -96,15 +102,22 @@ OMPI_DECLSPEC extern int mca_io_ompio_coll_timing_info; #define OMPIO_MERGE 1 #define OMPIO_SPLIT 2 #define OMPIO_RETAIN 3 + #define DATA_VOLUME 1 #define UNIFORM_DISTRIBUTION 2 -#define OMPIO_UNIFORM_DIST_THRESHOLD 0.5 #define CONTIGUITY 3 -#define OMPIO_CONTG_THRESHOLD 1048576 #define OPTIMIZE_GROUPING 4 -#define OMPIO_PROCS_PER_GROUP_TAG 0 -#define OMPIO_PROCS_IN_GROUP_TAG 1 -#define OMPIO_MERGE_THRESHOLD 0.5 +#define SIMPLE 5 +#define NO_REFINEMENT 6 + + +#define OMPIO_UNIFORM_DIST_THRESHOLD 0.5 +#define OMPIO_CONTG_THRESHOLD 1048576 +#define OMPIO_CONTG_FACTOR 8 +#define OMPIO_DEFAULT_STRIPE_SIZE 1048576 +#define OMPIO_PROCS_PER_GROUP_TAG 0 +#define OMPIO_PROCS_IN_GROUP_TAG 1 +#define OMPIO_MERGE_THRESHOLD 0.5 /*---------------------------*/ @@ -121,7 +134,7 @@ enum ompio_fs_type OMPI_DECLSPEC extern mca_io_base_component_2_0_0_t mca_io_ompio_component; /* - * global variables, instantiated in module.c + * global variables, instantiated in module.c */ extern opal_mutex_t mca_io_ompio_mutex; extern mca_io_base_module_2_0_0_t mca_io_ompio_module; @@ -155,14 +168,14 @@ typedef struct { double time[3]; int nprocs_for_coll; int aggregator; -}print_entry; +}mca_io_ompio_print_entry; typedef struct { - print_entry entry[QUEUESIZE + 1]; + mca_io_ompio_print_entry entry[QUEUESIZE + 1]; int first; int last; int count; -} print_queue; +} mca_io_ompio_print_queue; typedef struct { int ndims; @@ -188,10 +201,10 @@ typedef struct{ /* forward declaration to keep the compiler happy. */ struct mca_io_ompio_file_t; -typedef int (*mca_io_ompio_decode_datatype_fn_t) (struct mca_io_ompio_file_t *fh, +typedef int (*mca_io_ompio_decode_datatype_fn_t) (struct mca_io_ompio_file_t *fh, struct ompi_datatype_t *datatype, int count, - void *buf, + const void *buf, size_t *max_data, struct iovec **iov, uint32_t *iov_count); @@ -201,34 +214,34 @@ typedef int (*mca_io_ompio_generate_current_file_view_fn_t) (struct mca_io_ompio int *iov_count); /* - * Function that sorts an io_array according to the offset by filling + * Function that sorts an io_array according to the offset by filling * up an array of the indices into the array (HEAP SORT) */ typedef int (*mca_io_ompio_sort_fn_t) (mca_io_ompio_io_array_t *io_array, int num_entries, int *sorted); -typedef int (*mca_io_ompio_sort_iovec_fn_t) (struct iovec *iov, - int num_entries, +typedef int (*mca_io_ompio_sort_iovec_fn_t) (struct iovec *iov, + int num_entries, int *sorted); /* collective operations based on list of participating ranks instead of communicators*/ -typedef int (*mca_io_ompio_allgather_array_fn_t) (void *sbuf, +typedef int (*mca_io_ompio_allgather_array_fn_t) (void *sbuf, int scount, - ompi_datatype_t *sdtype, + ompi_datatype_t *sdtype, void *rbuf, - int rcount, + int rcount, ompi_datatype_t *rdtype, int root_index, int *procs_in_group, int procs_per_group, ompi_communicator_t *comm); -typedef int (*mca_io_ompio_allgatherv_array_fn_t) (void *sbuf, +typedef int (*mca_io_ompio_allgatherv_array_fn_t) (void *sbuf, int scount, - ompi_datatype_t *sdtype, + ompi_datatype_t *sdtype, void *rbuf, - int *rcounts, + int *rcounts, int *disps, ompi_datatype_t *rdtype, int root_index, @@ -236,10 +249,10 @@ typedef int (*mca_io_ompio_allgatherv_array_fn_t) (void *sbuf, int procs_per_group, ompi_communicator_t *comm); -typedef int (*mca_io_ompio_gather_array_fn_t) (void *sbuf, +typedef int (*mca_io_ompio_gather_array_fn_t) (void *sbuf, int scount, ompi_datatype_t *sdtype, - void *rbuf, + void *rbuf, int rcount, ompi_datatype_t *rdtype, int root_index, @@ -258,7 +271,7 @@ typedef int (*mca_io_ompio_gatherv_array_fn_t) (void *sbuf, int procs_per_group, ompi_communicator_t *comm); -/* functions to retrieve the number of aggregators and the size of the +/* functions to retrieve the number of aggregators and the size of the temporary buffer on aggregators from the fcoll modules */ typedef void (*mca_io_ompio_get_num_aggregators_fn_t) ( int *num_aggregators); typedef void (*mca_io_ompio_get_bytes_per_agg_fn_t) ( int *bytes_per_agg); @@ -269,7 +282,7 @@ typedef int (*mca_io_ompio_set_aggregator_props_fn_t) (struct mca_io_ompio_file_ typedef int (*mca_io_ompio_full_print_queue_fn_t) (int queue_type); typedef int (*mca_io_ompio_register_print_entry_fn_t) (int queue_type, - print_entry x); + mca_io_ompio_print_entry x); /** @@ -286,7 +299,7 @@ struct mca_io_ompio_file_t { int f_amode; int f_perm; ompi_communicator_t *f_comm; - char *f_filename; + const char *f_filename; char *f_datarep; opal_convertor_t *f_convertor; ompi_info_t *f_info; @@ -294,6 +307,7 @@ struct mca_io_ompio_file_t { void *f_fs_ptr; int f_atomicity; size_t f_stripe_size; + int f_stripe_count; size_t f_cc_size; int f_bytes_per_agg; enum ompio_fs_type f_fstype; @@ -320,6 +334,7 @@ struct mca_io_ompio_file_t { size_t f_view_size; ompi_datatype_t *f_etype; ompi_datatype_t *f_filetype; + ompi_datatype_t *f_orig_filetype; /* the fileview passed by the user to us */ size_t f_etype_size; /* contains IO requests that needs to be read/written */ @@ -351,7 +366,7 @@ struct mca_io_ompio_file_t { int *f_init_procs_in_group; int f_final_num_aggrs; - + /* internal ompio functions required by fbtl and fcoll */ mca_io_ompio_decode_datatype_fn_t f_decode_datatype; mca_io_ompio_generate_current_file_view_fn_t f_generate_current_file_view; @@ -369,7 +384,7 @@ struct mca_io_ompio_file_t { mca_io_ompio_set_aggregator_props_fn_t f_set_aggregator_props; mca_io_ompio_full_print_queue_fn_t f_full_print_queue; - mca_io_ompio_register_print_entry_fn_t f_register_print_entry; + mca_io_ompio_register_print_entry_fn_t f_register_print_entry; }; typedef struct mca_io_ompio_file_t mca_io_ompio_file_t; @@ -378,37 +393,37 @@ struct mca_io_ompio_data_t { }; typedef struct mca_io_ompio_data_t mca_io_ompio_data_t; -OMPI_DECLSPEC extern print_queue *coll_write_time; -OMPI_DECLSPEC extern print_queue *coll_read_time; +OMPI_DECLSPEC extern mca_io_ompio_print_queue *coll_write_time; +OMPI_DECLSPEC extern mca_io_ompio_print_queue *coll_read_time; -/* functions to retrieve the number of aggregators and the size of the +/* functions to retrieve the number of aggregators and the size of the temporary buffer on aggregators from the fcoll modules */ OMPI_DECLSPEC void mca_io_ompio_get_num_aggregators ( int *num_aggregators); OMPI_DECLSPEC void mca_io_ompio_get_bytes_per_agg ( int *bytes_per_agg); -OMPI_DECLSPEC int mca_io_ompio_build_io_array ( mca_io_ompio_file_t *fh, int index, int cycles, - size_t bpc, int max_data, uint32_t iov_count, - struct iovec *decoded_iov, int *ii, int *jj, - size_t *tbw ); +OMPI_DECLSPEC int mca_io_ompio_build_io_array ( mca_io_ompio_file_t *fh, int index, int cycles, + size_t bpc, int max_data, uint32_t iov_count, + struct iovec *decoded_iov, int *ii, int *jj, + size_t *tbw ); OMPI_DECLSPEC int ompi_io_ompio_set_file_defaults (mca_io_ompio_file_t *fh); OMPI_DECLSPEC int ompio_io_ompio_file_open (ompi_communicator_t *comm, - char *filename, + const char *filename, int amode, ompi_info_t *info, mca_io_ompio_file_t *fh,bool use_sharedfp); OMPI_DECLSPEC int ompio_io_ompio_file_write_at (mca_io_ompio_file_t *fh, OMPI_MPI_OFFSET_TYPE offset, - void *buf, + const void *buf, int count, struct ompi_datatype_t *datatype, ompi_status_public_t *status); OMPI_DECLSPEC int ompio_io_ompio_file_write (mca_io_ompio_file_t *fh, - void *buf, + const void *buf, int count, struct ompi_datatype_t *datatype, ompi_status_public_t *status); @@ -417,34 +432,30 @@ OMPI_DECLSPEC int ompio_io_ompio_file_close (mca_io_ompio_file_t *fh); OMPI_DECLSPEC int ompio_io_ompio_file_write_at_all (mca_io_ompio_file_t *fh, OMPI_MPI_OFFSET_TYPE offset, - void *buf, + const void *buf, int count, struct ompi_datatype_t *datatype, ompi_status_public_t *status); -OMPI_DECLSPEC int ompio_io_ompio_file_write_at_all_begin (mca_io_ompio_file_t *fh, - OMPI_MPI_OFFSET_TYPE offset, - void *buf, - int count, - struct ompi_datatype_t *datatype); - -OMPI_DECLSPEC int ompio_io_ompio_file_write_at_all_end (mca_io_ompio_file_t *fh, - void *buf, - ompi_status_public_t * status); - OMPI_DECLSPEC int ompio_io_ompio_file_iwrite_at (mca_io_ompio_file_t *fh, OMPI_MPI_OFFSET_TYPE offset, - void *buf, + const void *buf, int count, struct ompi_datatype_t *datatype, ompi_request_t **request); OMPI_DECLSPEC int ompio_io_ompio_file_iwrite (mca_io_ompio_file_t *fh, - void *buf, + const void *buf, int count, struct ompi_datatype_t *datatype, ompi_request_t **request); +OMPI_DECLSPEC int ompio_io_ompio_file_iwrite_at_all (mca_io_ompio_file_t *fh, + OMPI_MPI_OFFSET_TYPE offset, + const void *buf, + int count, + struct ompi_datatype_t *datatype, + ompi_request_t **request); OMPI_DECLSPEC int ompio_io_ompio_file_iread (mca_io_ompio_file_t *fh, void *buf, int count, @@ -462,6 +473,12 @@ OMPI_DECLSPEC int ompio_io_ompio_file_iread_at (mca_io_ompio_file_t *fh, int count, struct ompi_datatype_t *datatype, ompi_request_t **request); +OMPI_DECLSPEC int ompio_io_ompio_file_iread_at_all (mca_io_ompio_file_t *fh, + OMPI_MPI_OFFSET_TYPE offset, + void *buf, + int count, + struct ompi_datatype_t *datatype, + ompi_request_t **request); OMPI_DECLSPEC int ompio_io_ompio_file_read_at (mca_io_ompio_file_t *fh, OMPI_MPI_OFFSET_TYPE offset, void *buf, @@ -474,14 +491,6 @@ OMPI_DECLSPEC int ompio_io_ompio_file_read_at_all (mca_io_ompio_file_t *fh, int count, struct ompi_datatype_t *datatype, ompi_status_public_t * status); -OMPI_DECLSPEC int ompio_io_ompio_file_read_at_all_begin (mca_io_ompio_file_t *ompio_fh, - OMPI_MPI_OFFSET_TYPE offset, - void *buf, - int count, - struct ompi_datatype_t *datatype); -OMPI_DECLSPEC int ompio_io_ompio_file_read_at_all_end (mca_io_ompio_file_t *ompio_fh, - void *buf, - ompi_status_public_t * status); OMPI_DECLSPEC int ompio_io_ompio_file_get_size (mca_io_ompio_file_t *fh, OMPI_MPI_OFFSET_TYPE *size); @@ -492,24 +501,24 @@ OMPI_DECLSPEC int ompio_io_ompio_file_get_position (mca_io_ompio_file_t *fh, * Function that takes in a datatype and buffer, and decodes that datatype * into an iovec using the convertor_raw function */ -OMPI_DECLSPEC int ompi_io_ompio_decode_datatype (struct mca_io_ompio_file_t *fh, +OMPI_DECLSPEC int ompi_io_ompio_decode_datatype (struct mca_io_ompio_file_t *fh, struct ompi_datatype_t *datatype, int count, - void *buf, + const void *buf, size_t *max_data, struct iovec **iov, uint32_t *iov_count); /* - * Function that sorts an io_array according to the offset by filling + * Function that sorts an io_array according to the offset by filling * up an array of the indices into the array (HEAP SORT) */ OMPI_DECLSPEC int ompi_io_ompio_sort (mca_io_ompio_io_array_t *io_array, int num_entries, int *sorted); -OMPI_DECLSPEC int ompi_io_ompio_sort_iovec (struct iovec *iov, - int num_entries, +OMPI_DECLSPEC int ompi_io_ompio_sort_iovec (struct iovec *iov, + int num_entries, int *sorted); OMPI_DECLSPEC int ompi_io_ompio_sort_offlen (mca_io_ompio_offlen_array_t *io_array, @@ -518,7 +527,7 @@ OMPI_DECLSPEC int ompi_io_ompio_sort_offlen (mca_io_ompio_offlen_array_t *io_arr -OMPI_DECLSPEC int ompi_io_ompio_set_explicit_offset (mca_io_ompio_file_t *fh, +OMPI_DECLSPEC int ompi_io_ompio_set_explicit_offset (mca_io_ompio_file_t *fh, OMPI_MPI_OFFSET_TYPE offset); OMPI_DECLSPEC int ompi_io_ompio_generate_current_file_view (struct mca_io_ompio_file_t *fh, @@ -543,6 +552,9 @@ int mca_io_ompio_cart_based_grouping(mca_io_ompio_file_t *ompio_fh); int mca_io_ompio_fview_based_grouping(mca_io_ompio_file_t *fh, int *num_groups, contg *contg_groups); +int mca_io_ompio_simple_grouping(mca_io_ompio_file_t *fh, + int *num_groups, + contg *contg_groups); int mca_io_ompio_finalize_initial_grouping(mca_io_ompio_file_t *fh, int num_groups, @@ -619,96 +631,96 @@ OMPI_DECLSPEC int ompi_io_ompio_scatterv_array (void *sbuf, int *procs_in_group, int procs_per_group, ompi_communicator_t *comm); -OMPI_DECLSPEC int ompi_io_ompio_allgather_array (void *sbuf, +OMPI_DECLSPEC int ompi_io_ompio_allgather_array (void *sbuf, int scount, - ompi_datatype_t *sdtype, + ompi_datatype_t *sdtype, void *rbuf, - int rcount, + int rcount, ompi_datatype_t *rdtype, int root_index, int *procs_in_group, int procs_per_group, ompi_communicator_t *comm); - -OMPI_DECLSPEC int ompi_io_ompio_allgatherv_array (void *sbuf, + +OMPI_DECLSPEC int ompi_io_ompio_allgatherv_array (void *sbuf, int scount, - ompi_datatype_t *sdtype, + ompi_datatype_t *sdtype, void *rbuf, - int *rcounts, + int *rcounts, int *disps, ompi_datatype_t *rdtype, int root_index, int *procs_in_group, int procs_per_group, ompi_communicator_t *comm); -OMPI_DECLSPEC int ompi_io_ompio_gather_array (void *sbuf, +OMPI_DECLSPEC int ompi_io_ompio_gather_array (void *sbuf, int scount, ompi_datatype_t *sdtype, - void *rbuf, + void *rbuf, int rcount, ompi_datatype_t *rdtype, int root_index, int *procs_in_group, int procs_per_group, ompi_communicator_t *comm); -OMPI_DECLSPEC int ompi_io_ompio_bcast_array (void *buff, +OMPI_DECLSPEC int ompi_io_ompio_bcast_array (void *buff, int count, - ompi_datatype_t *datatype, + ompi_datatype_t *datatype, int root_index, int *procs_in_group, int procs_per_group, ompi_communicator_t *comm); OMPI_DECLSPEC int ompi_io_ompio_register_print_entry (int queue_type, - print_entry x); + mca_io_ompio_print_entry x); -OMPI_DECLSPEC int ompi_io_ompio_unregister_print_entry (int queue_type, print_entry *x); +OMPI_DECLSPEC int ompi_io_ompio_unregister_print_entry (int queue_type, mca_io_ompio_print_entry *x); OMPI_DECLSPEC int ompi_io_ompio_empty_print_queue(int queue_type); OMPI_DECLSPEC int ompi_io_ompio_full_print_queue(int queue_type); -OMPI_DECLSPEC int ompi_io_ompio_initialize_print_queue(print_queue *q); +OMPI_DECLSPEC int ompi_io_ompio_initialize_print_queue(mca_io_ompio_print_queue *q); OMPI_DECLSPEC int ompi_io_ompio_print_time_info(int queue_type, char *name_operation, mca_io_ompio_file_t *fh); -int ompi_io_ompio_set_print_queue (print_queue **q, +int ompi_io_ompio_set_print_queue (mca_io_ompio_print_queue **q, int queue_type); - + /* * ****************************************************************** * ********* functions which are implemented in this module ********* * ****************************************************************** - */ + */ -int mca_io_ompio_file_set_view (struct ompi_file_t *fh, - OMPI_MPI_OFFSET_TYPE disp, +int mca_io_ompio_file_set_view (struct ompi_file_t *fh, + OMPI_MPI_OFFSET_TYPE disp, struct ompi_datatype_t *etype, - struct ompi_datatype_t *filetype, - char *datarep, + struct ompi_datatype_t *filetype, + const char *datarep, struct ompi_info_t *info); -int mca_io_ompio_set_view_internal (struct mca_io_ompio_file_t *fh, - OMPI_MPI_OFFSET_TYPE disp, +int mca_io_ompio_set_view_internal (struct mca_io_ompio_file_t *fh, + OMPI_MPI_OFFSET_TYPE disp, struct ompi_datatype_t *etype, - struct ompi_datatype_t *filetype, - char *datarep, + struct ompi_datatype_t *filetype, + const char *datarep, struct ompi_info_t *info); -int mca_io_ompio_file_get_view (struct ompi_file_t *fh, +int mca_io_ompio_file_get_view (struct ompi_file_t *fh, OMPI_MPI_OFFSET_TYPE *disp, - struct ompi_datatype_t **etype, + struct ompi_datatype_t **etype, struct ompi_datatype_t **filetype, char *datarep); int mca_io_ompio_file_open (struct ompi_communicator_t *comm, - char *filename, + const char *filename, int amode, struct ompi_info_t *info, struct ompi_file_t *fh); int mca_io_ompio_file_close (struct ompi_file_t *fh); -int mca_io_ompio_file_delete (char *filename, +int mca_io_ompio_file_delete (const char *filename, struct ompi_info_t *info); int mca_io_ompio_file_set_size (struct ompi_file_t *fh, OMPI_MPI_OFFSET_TYPE size); @@ -731,7 +743,7 @@ int mca_io_ompio_file_set_view (struct ompi_file_t *fh, OMPI_MPI_OFFSET_TYPE disp, struct ompi_datatype_t *etype, struct ompi_datatype_t *filetype, - char *datarep, + const char *datarep, struct ompi_info_t *info); int mca_io_ompio_file_get_view (struct ompi_file_t *fh, OMPI_MPI_OFFSET_TYPE *disp, @@ -754,13 +766,13 @@ int mca_io_ompio_file_read_at_all (struct ompi_file_t *fh, ompi_status_public_t *status); int mca_io_ompio_file_write_at (struct ompi_file_t *fh, OMPI_MPI_OFFSET_TYPE offset, - void *buf, + const void *buf, int count, struct ompi_datatype_t *datatype, ompi_status_public_t *status); int mca_io_ompio_file_write_at_all (struct ompi_file_t *fh, OMPI_MPI_OFFSET_TYPE offset, - void *buf, + const void *buf, int count, struct ompi_datatype_t *datatype, ompi_status_public_t *status); @@ -772,7 +784,7 @@ int mca_io_ompio_file_iread_at (struct ompi_file_t *fh, ompi_request_t **request); int mca_io_ompio_file_iwrite_at (struct ompi_file_t *fh, OMPI_MPI_OFFSET_TYPE offset, - void *buf, + const void *buf, int count, struct ompi_datatype_t *datatype, ompi_request_t **request); @@ -801,23 +813,23 @@ int mca_io_ompio_file_iread_at_all (ompi_file_t *fh, ompi_request_t **request); int mca_io_ompio_file_write (struct ompi_file_t *fh, - void *buf, + const void *buf, int count, struct ompi_datatype_t *datatype, ompi_status_public_t *status); int mca_io_ompio_file_write_all (struct ompi_file_t *fh, - void *buf, + const void *buf, int count, struct ompi_datatype_t *datatype, ompi_status_public_t *status); int mca_io_ompio_file_iwrite_all (ompi_file_t *fh, - void *buf, + const void *buf, int count, struct ompi_datatype_t *datatype, ompi_request_t **request); int mca_io_ompio_file_iwrite_at_all (ompi_file_t *fh, OMPI_MPI_OFFSET_TYPE offset, - void *buf, + const void *buf, int count, struct ompi_datatype_t *datatype, ompi_request_t **request); @@ -827,7 +839,7 @@ int mca_io_ompio_file_iread (struct ompi_file_t *fh, struct ompi_datatype_t *datatype, ompi_request_t **request); int mca_io_ompio_file_iwrite (struct ompi_file_t *fh, - void *buf, + const void *buf, int count, struct ompi_datatype_t *datatype, ompi_request_t **request); @@ -839,7 +851,7 @@ int mca_io_ompio_file_get_position (struct ompi_file_t *fh, int mca_io_ompio_file_get_byte_offset (struct ompi_file_t *fh, OMPI_MPI_OFFSET_TYPE offset, OMPI_MPI_OFFSET_TYPE *disp); - + /* Section 9.4.4 */ int mca_io_ompio_file_read_shared (struct ompi_file_t *fh, void *buf, @@ -847,7 +859,7 @@ int mca_io_ompio_file_read_shared (struct ompi_file_t *fh, struct ompi_datatype_t *datatype, ompi_status_public_t *status); int mca_io_ompio_file_write_shared (struct ompi_file_t *fh, - void *buf, + const void *buf, int count, struct ompi_datatype_t *datatype, ompi_status_public_t *status); @@ -857,7 +869,7 @@ int mca_io_ompio_file_iread_shared (struct ompi_file_t *fh, struct ompi_datatype_t *datatype, ompi_request_t **request); int mca_io_ompio_file_iwrite_shared (struct ompi_file_t *fh, - void *buf, + const void *buf, int count, struct ompi_datatype_t *datatype, ompi_request_t **request); @@ -867,7 +879,7 @@ int mca_io_ompio_file_read_ordered (struct ompi_file_t *fh, struct ompi_datatype_t *datatype, ompi_status_public_t *status); int mca_io_ompio_file_write_ordered (struct ompi_file_t *fh, - void *buf, + const void *buf, int count, struct ompi_datatype_t *datatype, ompi_status_public_t *status); @@ -888,11 +900,11 @@ int mca_io_ompio_file_read_at_all_end (struct ompi_file_t *fh, ompi_status_public_t *status); int mca_io_ompio_file_write_at_all_begin (struct ompi_file_t *fh, OMPI_MPI_OFFSET_TYPE offset, - void *buf, + const void *buf, int count, struct ompi_datatype_t *datatype); int mca_io_ompio_file_write_at_all_end (struct ompi_file_t *fh, - void *buf, + const void *buf, ompi_status_public_t *status); int mca_io_ompio_file_read_all_begin (struct ompi_file_t *fh, void *buf, @@ -902,11 +914,11 @@ int mca_io_ompio_file_read_all_end (struct ompi_file_t *fh, void *buf, ompi_status_public_t *status); int mca_io_ompio_file_write_all_begin (struct ompi_file_t *fh, - void *buf, + const void *buf, int count, struct ompi_datatype_t *datatype); int mca_io_ompio_file_write_all_end (struct ompi_file_t *fh, - void *buf, + const void *buf, ompi_status_public_t *status); int mca_io_ompio_file_read_ordered_begin (struct ompi_file_t *fh, void *buf, @@ -916,11 +928,11 @@ int mca_io_ompio_file_read_ordered_end (struct ompi_file_t *fh, void *buf, ompi_status_public_t *status); int mca_io_ompio_file_write_ordered_begin (struct ompi_file_t *fh, - void *buf, + const void *buf, int count, struct ompi_datatype_t *datatype); int mca_io_ompio_file_write_ordered_end (struct ompi_file_t *fh, - void *buf, + const void *buf, struct ompi_status_public_t *status); /* Section 9.5.1 */ @@ -938,7 +950,7 @@ int mca_io_ompio_file_sync (struct ompi_file_t *fh); * ****************************************************************** * ************ functions implemented in this module end ************ * ****************************************************************** - */ + */ END_C_DECLS diff --git a/ompi/mca/io/ompio/io_ompio_coll_array.c b/ompi/mca/io/ompio/io_ompio_coll_array.c index cf46a656a9d..d8332d84f18 100644 --- a/ompi/mca/io/ompio/io_ompio_coll_array.c +++ b/ompi/mca/io/ompio/io_ompio_coll_array.c @@ -6,15 +6,15 @@ * Copyright (c) 2004-2007 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2008-2011 University of Houston. All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -31,11 +31,11 @@ #include "io_ompio.h" -int ompi_io_ompio_allgatherv_array (void *sbuf, +int ompi_io_ompio_allgatherv_array (void *sbuf, int scount, - ompi_datatype_t *sdtype, + ompi_datatype_t *sdtype, void *rbuf, - int *rcounts, + int *rcounts, int *disps, ompi_datatype_t *rdtype, int root_index, @@ -74,11 +74,11 @@ int ompi_io_ompio_allgatherv_array (void *sbuf, } err = ompi_io_ompio_gatherv_array (send_buf, - rcounts[j], + rcounts[j], send_type, rbuf, - rcounts, - disps, + rcounts, + disps, rdtype, root_index, procs_in_group, @@ -101,26 +101,26 @@ int ompi_io_ompio_allgatherv_array (void *sbuf, return err; } - ompi_io_ompio_bcast_array (rbuf, + ompi_io_ompio_bcast_array (rbuf, 1, newtype, root_index, procs_in_group, procs_per_group, comm); - + ompi_datatype_destroy (&newtype); return OMPI_SUCCESS; } -int ompi_io_ompio_gatherv_array (void *sbuf, +int ompi_io_ompio_gatherv_array (void *sbuf, int scount, ompi_datatype_t *sdtype, - void *rbuf, - int *rcounts, + void *rbuf, + int *rcounts, int *disps, - ompi_datatype_t *rdtype, + ompi_datatype_t *rdtype, int root_index, int *procs_in_group, int procs_per_group, @@ -130,36 +130,40 @@ int ompi_io_ompio_gatherv_array (void *sbuf, int err = OMPI_SUCCESS; char *ptmp; OPAL_PTRDIFF_TYPE extent, lb; + ompi_request_t **reqs=NULL; rank = ompi_comm_rank (comm); if (procs_in_group[root_index] != rank) { if (scount > 0) { - return MCA_PML_CALL(send(sbuf, - scount, - sdtype, + return MCA_PML_CALL(send(sbuf, + scount, + sdtype, procs_in_group[root_index], OMPIO_TAG_GATHERV, - MCA_PML_BASE_SEND_STANDARD, + MCA_PML_BASE_SEND_STANDARD, comm)); } return err; } - /* writer processes, loop receiving data from proceses + /* writer processes, loop receiving data from proceses belonging to each corresponding root */ err = opal_datatype_get_extent (&rdtype->super, &lb, &extent); if (OMPI_SUCCESS != err) { return OMPI_ERROR; } - + reqs = (ompi_request_t **) malloc ( procs_per_group *sizeof(ompi_request_t *)); + if ( NULL == reqs ) { + return OMPI_ERR_OUT_OF_RESOURCE; + } for (i=0; i 0) { - err = MCA_PML_CALL(recv(ptmp, - rcounts[i], - rdtype, - procs_in_group[i], - OMPIO_TAG_GATHERV, - comm, - MPI_STATUS_IGNORE)); + err = MCA_PML_CALL(irecv(ptmp, + rcounts[i], + rdtype, + procs_in_group[i], + OMPIO_TAG_GATHERV, + comm, + &reqs[i])); } + else { + reqs[i] = MPI_REQUEST_NULL; + } } if (OMPI_SUCCESS != err) { + free ( reqs ); return err; } } /* All done */ - + err = ompi_request_wait_all ( procs_per_group, reqs, MPI_STATUSES_IGNORE ); + if ( NULL != reqs ) { + free ( reqs ); + } return err; } -int ompi_io_ompio_scatterv_array (void *sbuf, +int ompi_io_ompio_scatterv_array (void *sbuf, int *scounts, int *disps, ompi_datatype_t *sdtype, - void *rbuf, - int rcount, - ompi_datatype_t *rdtype, + void *rbuf, + int rcount, + ompi_datatype_t *rdtype, int root_index, int *procs_in_group, int procs_per_group, @@ -207,14 +219,15 @@ int ompi_io_ompio_scatterv_array (void *sbuf, int err = OMPI_SUCCESS; char *ptmp; OPAL_PTRDIFF_TYPE extent, lb; + ompi_request_t ** reqs=NULL; rank = ompi_comm_rank (comm); if (procs_in_group[root_index] != rank) { if (rcount > 0) { - err = MCA_PML_CALL(recv(rbuf, - rcount, - rdtype, + err = MCA_PML_CALL(recv(rbuf, + rcount, + rdtype, procs_in_group[root_index], OMPIO_TAG_SCATTERV, comm, @@ -223,20 +236,24 @@ int ompi_io_ompio_scatterv_array (void *sbuf, return err; } - /* writer processes, loop sending data to proceses + /* writer processes, loop sending data to proceses belonging to each corresponding root */ err = opal_datatype_get_extent (&sdtype->super, &lb, &extent); if (OMPI_SUCCESS != err) { return OMPI_ERROR; } - + reqs = ( ompi_request_t **) malloc ( procs_per_group * sizeof ( ompi_request_t *)); + if (NULL == reqs ) { + return OMPI_ERR_OUT_OF_RESOURCE; + } + for (i=0 ; i 0) { - err = MCA_PML_CALL(send(ptmp, - scounts[i], - sdtype, - procs_in_group[i], - OMPIO_TAG_SCATTERV, - MCA_PML_BASE_SEND_STANDARD, - comm)); + err = MCA_PML_CALL(isend(ptmp, + scounts[i], + sdtype, + procs_in_group[i], + OMPIO_TAG_SCATTERV, + MCA_PML_BASE_SEND_STANDARD, + comm, + &reqs[i])); + } + else { + reqs[i] = MPI_REQUEST_NULL; } } if (OMPI_SUCCESS != err) { + free ( reqs ); return err; } } /* All done */ - + err = ompi_request_wait_all ( procs_per_group, reqs, MPI_STATUSES_IGNORE ); + if ( NULL != reqs ) { + free ( reqs ); + } return err; } -int ompi_io_ompio_allgather_array (void *sbuf, +int ompi_io_ompio_allgather_array (void *sbuf, int scount, - ompi_datatype_t *sdtype, + ompi_datatype_t *sdtype, void *rbuf, - int rcount, + int rcount, ompi_datatype_t *rdtype, int root_index, int *procs_in_group, @@ -295,20 +321,20 @@ int ompi_io_ompio_allgather_array (void *sbuf, } /* Gather and broadcast. */ - err = ompi_io_ompio_gather_array (sbuf, - scount, - sdtype, - rbuf, + err = ompi_io_ompio_gather_array (sbuf, + scount, + sdtype, + rbuf, rcount, - rdtype, + rdtype, root_index, procs_in_group, procs_per_group, comm); if (OMPI_SUCCESS == err) { - err = ompi_io_ompio_bcast_array (rbuf, - rcount * procs_per_group, + err = ompi_io_ompio_bcast_array (rbuf, + rcount * procs_per_group, rdtype, root_index, procs_in_group, @@ -320,10 +346,10 @@ int ompi_io_ompio_allgather_array (void *sbuf, return err; } -int ompi_io_ompio_gather_array (void *sbuf, +int ompi_io_ompio_gather_array (void *sbuf, int scount, ompi_datatype_t *sdtype, - void *rbuf, + void *rbuf, int rcount, ompi_datatype_t *rdtype, int root_index, @@ -337,17 +363,18 @@ int ompi_io_ompio_gather_array (void *sbuf, OPAL_PTRDIFF_TYPE incr; OPAL_PTRDIFF_TYPE extent, lb; int err = OMPI_SUCCESS; + ompi_request_t ** reqs=NULL; rank = ompi_comm_rank (comm); - + /* Everyone but the writers sends data and returns. */ if (procs_in_group[root_index] != rank) { - err = MCA_PML_CALL(send(sbuf, - scount, - sdtype, + err = MCA_PML_CALL(send(sbuf, + scount, + sdtype, procs_in_group[root_index], OMPIO_TAG_GATHER, - MCA_PML_BASE_SEND_STANDARD, + MCA_PML_BASE_SEND_STANDARD, comm)); return err; } @@ -356,49 +383,60 @@ int ompi_io_ompio_gather_array (void *sbuf, opal_datatype_get_extent (&rdtype->super, &lb, &extent); incr = extent * rcount; - for (i = 0, ptmp = (char *) rbuf; - i < procs_per_group; + reqs = ( ompi_request_t **) malloc ( procs_per_group * sizeof ( ompi_request_t *)); + if (NULL == reqs ) { + return OMPI_ERR_OUT_OF_RESOURCE; + } + + for (i = 0, ptmp = (char *) rbuf; + i < procs_per_group; ++i, ptmp += incr) { if (procs_in_group[i] == rank) { if (MPI_IN_PLACE != sbuf) { - err = ompi_datatype_sndrcv (sbuf, - scount, + err = ompi_datatype_sndrcv (sbuf, + scount, sdtype , - ptmp, - rcount, + ptmp, + rcount, rdtype); } else { err = OMPI_SUCCESS; } + reqs[i] = MPI_REQUEST_NULL; } else { - err = MCA_PML_CALL(recv(ptmp, - rcount, - rdtype, - procs_in_group[i], - OMPIO_TAG_GATHER, - comm, - MPI_STATUS_IGNORE)); + err = MCA_PML_CALL(irecv(ptmp, + rcount, + rdtype, + procs_in_group[i], + OMPIO_TAG_GATHER, + comm, + &reqs[i])); /* for (k=0 ; k<4 ; k++) - printf ("RECV %p %d \n", + printf ("RECV %p %d \n", ((struct iovec *)ptmp)[k].iov_base, ((struct iovec *)ptmp)[k].iov_len); */ } if (OMPI_SUCCESS != err) { + free ( reqs ); return err; } } /* All done */ + err = ompi_request_wait_all ( procs_per_group, reqs, MPI_STATUSES_IGNORE ); + if ( NULL != reqs ) { + free ( reqs ); + } return err; } -int ompi_io_ompio_bcast_array (void *buff, +int ompi_io_ompio_bcast_array (void *buff, int count, ompi_datatype_t *datatype, int root_index, @@ -408,9 +446,10 @@ int ompi_io_ompio_bcast_array (void *buff, { int i, rank; int err = OMPI_SUCCESS; - + ompi_request_t ** reqs=NULL; + rank = ompi_comm_rank (comm); - + /* Non-writers receive the data. */ if (procs_in_group[root_index] != rank) { err = MCA_PML_CALL(recv(buff, @@ -424,24 +463,33 @@ int ompi_io_ompio_bcast_array (void *buff, } /* Writers sends data to all others. */ - + reqs = ( ompi_request_t **) malloc ( procs_per_group * sizeof ( ompi_request_t *)); + if (NULL == reqs ) { + return OMPI_ERR_OUT_OF_RESOURCE; + } for (i=0 ; i /* or */ +#endif +#ifdef HAVE_SYS_MOUNT_H +#include +#endif +#ifdef HAVE_SYS_PARAM_H +#include +#endif + + + +int mca_io_ompio_cycle_buffer_size = OMPIO_DEFAULT_CYCLE_BUF_SIZE; int mca_io_ompio_bytes_per_agg = OMPIO_PREALLOC_MAX_BUF_SIZE; int mca_io_ompio_num_aggregators = -1; int mca_io_ompio_record_offset_info = 0; int mca_io_ompio_coll_timing_info = 0; -int mca_io_ompio_sharedfp_lazy_open = 1; +int mca_io_ompio_sharedfp_lazy_open = 0; -int mca_io_ompio_grouping_option=0; +int mca_io_ompio_grouping_option=5; /* * Private functions @@ -48,27 +61,28 @@ static int open_component(void); static int close_component(void); static int init_query(bool enable_progress_threads, bool enable_mpi_threads); -static const struct mca_io_base_module_2_0_0_t * -file_query (struct ompi_file_t *file, +static const struct mca_io_base_module_2_0_0_t * +file_query (struct ompi_file_t *file, struct mca_io_base_file_t **private_data, int *priority); -static int file_unquery(struct ompi_file_t *file, +static int file_unquery(struct ompi_file_t *file, struct mca_io_base_file_t *private_data); -static int delete_query(char *filename, struct ompi_info_t *info, +static int delete_query(const char *filename, struct ompi_info_t *info, struct mca_io_base_delete_t **private_data, bool *usable, int *priorty); -static int delete_select(char *filename, struct ompi_info_t *info, +static int delete_select(const char *filename, struct ompi_info_t *info, struct mca_io_base_delete_t *private_data); -/* -static int io_progress(void); -static int register_datarep(char *, +static int register_datarep(const char *, MPI_Datarep_conversion_function*, MPI_Datarep_conversion_function*, MPI_Datarep_extent_function*, void*); +/* +static int io_progress(void); + */ /* @@ -125,6 +139,8 @@ mca_io_base_component_2_0_0_t mca_io_ompio_component = { .io_delete_query = delete_query, .io_delete_select = delete_select, + + .io_register_datarep = register_datarep, }; static int register_component(void) @@ -136,7 +152,7 @@ static int register_component(void) OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_READONLY, &priority_param); - delete_priority_param = 10; + delete_priority_param = 30; (void) mca_base_component_var_register(&mca_io_ompio_component.io_version, "delete_priority", "Delete priority of the io ompio component", MCA_BASE_VAR_TYPE_INT, NULL, 0, 0, @@ -162,10 +178,10 @@ static int register_component(void) MCA_BASE_VAR_SCOPE_READONLY, &mca_io_ompio_coll_timing_info); - mca_io_ompio_cycle_buffer_size = OMPIO_PREALLOC_MAX_BUF_SIZE; + mca_io_ompio_cycle_buffer_size = OMPIO_DEFAULT_CYCLE_BUF_SIZE; (void) mca_base_component_var_register(&mca_io_ompio_component.io_version, "cycle_buffer_size", - "Cycle buffer size of individual reads/writes", + "Data size issued by individual reads/writes per call", MCA_BASE_VAR_TYPE_INT, NULL, 0, 0, OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_READONLY, @@ -190,7 +206,7 @@ static int register_component(void) &mca_io_ompio_num_aggregators); - mca_io_ompio_sharedfp_lazy_open = 1; + mca_io_ompio_sharedfp_lazy_open = 0; (void) mca_base_component_var_register(&mca_io_ompio_component.io_version, "sharedfp_lazy_open", "lazy allocation of internal shared file pointer structures", @@ -199,15 +215,18 @@ static int register_component(void) MCA_BASE_VAR_SCOPE_READONLY, &mca_io_ompio_sharedfp_lazy_open); - mca_io_ompio_grouping_option = 0; + mca_io_ompio_grouping_option = 5; (void) mca_base_component_var_register(&mca_io_ompio_component.io_version, "grouping_option", - "Option for grouping of processes in the aggregator selection", + "Option for grouping of processes in the aggregator selection " + "1: Data volume based grouping 2: maximizing group size uniformity 3: maximimze " + "data contiguity 4: hybrid optimization 5: simple (default) " + "6: skip refinement step", MCA_BASE_VAR_TYPE_INT, NULL, 0, 0, OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_READONLY, &mca_io_ompio_grouping_option); - + return OMPI_SUCCESS; } @@ -246,18 +265,49 @@ static int init_query(bool enable_progress_threads, static const struct mca_io_base_module_2_0_0_t * -file_query(struct ompi_file_t *file, +file_query(struct ompi_file_t *file, struct mca_io_base_file_t **private_data, int *priority) { mca_io_ompio_data_t *data; + char *tmp; + int rank; + int is_lustre=0; //false + + tmp = strchr (file->f_filename, ':'); + rank = ompi_comm_rank ( file->f_comm); + if (!tmp) { + if ( 0 == rank) { + if (LUSTRE == mca_fs_base_get_fstype(file->f_filename)) { + is_lustre = 1; //true + } + } + + file->f_comm->c_coll.coll_bcast (&is_lustre, + 1, + MPI_INT, + 0, + file->f_comm, + file->f_comm->c_coll.coll_bcast_module); + } + else { + if (!strncmp(file->f_filename, "lustre:", 7) || + !strncmp(file->f_filename, "LUSTRE:", 7)) { + is_lustre = 1; + } + } - *priority = priority_param; + if (is_lustre) { + *priority = 1; + } + else { + *priority = priority_param; + } /* Allocate a space for this module to hang private data (e.g., the OMPIO file handle) */ - data = malloc(sizeof(mca_io_ompio_data_t)); + data = calloc(1, sizeof(mca_io_ompio_data_t)); if (NULL == data) { return NULL; } @@ -270,7 +320,7 @@ file_query(struct ompi_file_t *file, } -static int file_unquery(struct ompi_file_t *file, +static int file_unquery(struct ompi_file_t *file, struct mca_io_base_file_t *private_data) { /* Free the ompio module-specific data that was allocated in @@ -284,7 +334,7 @@ static int file_unquery(struct ompi_file_t *file, } -static int delete_query(char *filename, struct ompi_info_t *info, +static int delete_query(const char *filename, struct ompi_info_t *info, struct mca_io_base_delete_t **private_data, bool *usable, int *priority) { @@ -295,7 +345,7 @@ static int delete_query(char *filename, struct ompi_info_t *info, return OMPI_SUCCESS; } -static int delete_select(char *filename, struct ompi_info_t *info, +static int delete_select(const char *filename, struct ompi_info_t *info, struct mca_io_base_delete_t *private_data) { int ret; @@ -306,6 +356,16 @@ static int delete_select(char *filename, struct ompi_info_t *info, return ret; } + +static int register_datarep(const char * datarep, + MPI_Datarep_conversion_function* read_fn, + MPI_Datarep_conversion_function* write_fn, + MPI_Datarep_extent_function* extent_fn, + void* state) +{ + return OMPI_ERROR; +} + /* static int io_progress (void) { diff --git a/ompi/mca/io/ompio/io_ompio_file_open.c b/ompi/mca/io/ompio/io_ompio_file_open.c index a6f45683a43..1fd62f399c7 100644 --- a/ompi/mca/io/ompio/io_ompio_file_open.c +++ b/ompi/mca/io/ompio/io_ompio_file_open.c @@ -1,20 +1,23 @@ /* - * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2005 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * Copyright (c) 2008-2015 University of Houston. All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ + * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2005 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2008-2017 University of Houston. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. + * Copyright (c) 2016 Cisco Systems, Inc. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ */ #include "ompi_config.h" @@ -39,7 +42,7 @@ int mca_io_ompio_file_open (ompi_communicator_t *comm, - char *filename, + const char *filename, int amode, ompi_info_t *info, ompi_file_t *fh) @@ -53,14 +56,13 @@ mca_io_ompio_file_open (ompi_communicator_t *comm, return OMPI_ERR_OUT_OF_RESOURCE; } - + /*save pointer back to the file_t structure */ + data->ompio_fh.f_fh = fh; ret = ompio_io_ompio_file_open(comm,filename,amode,info,&data->ompio_fh,use_sharedfp); if ( OMPI_SUCCESS == ret ) { fh->f_flags |= OMPIO_FILE_IS_OPEN; - /*save pointer back to the file_t structure */ - data->ompio_fh.f_fh = fh; } @@ -71,21 +73,24 @@ mca_io_ompio_file_open (ompi_communicator_t *comm, int ompio_io_ompio_file_open (ompi_communicator_t *comm, - char *filename, + const char *filename, int amode, ompi_info_t *info, mca_io_ompio_file_t *ompio_fh, bool use_sharedfp) { int ret = OMPI_SUCCESS; int remote_arch; - + + + ompio_fh->f_iov_type = MPI_DATATYPE_NULL; + ompio_fh->f_comm = MPI_COMM_NULL; if ( ((amode&MPI_MODE_RDONLY)?1:0) + ((amode&MPI_MODE_RDWR)?1:0) + ((amode&MPI_MODE_WRONLY)?1:0) != 1 ) { return MPI_ERR_AMODE; } - if ((amode & MPI_MODE_RDONLY) && + if ((amode & MPI_MODE_RDONLY) && ((amode & MPI_MODE_CREATE) || (amode & MPI_MODE_EXCL))) { return MPI_ERR_AMODE; } @@ -94,7 +99,6 @@ ompio_io_ompio_file_open (ompi_communicator_t *comm, return MPI_ERR_AMODE; } - ompio_fh->f_iov_type = MPI_DATATYPE_NULL; ompio_fh->f_rank = ompi_comm_rank (comm); ompio_fh->f_size = ompi_comm_size (comm); remote_arch = opal_local_arch; @@ -110,7 +114,6 @@ ompio_io_ompio_file_open (ompi_communicator_t *comm, /* No need to duplicate the communicator if the file_open is called from the sharedfp component, since the comm used as an input is already a dup of the user level comm. */ - ompio_fh->f_flags |= OMPIO_SHAREDFP_IS_SET; ompio_fh->f_comm = comm; } @@ -126,8 +129,8 @@ ompio_io_ompio_file_open (ompi_communicator_t *comm, ompio_fh->f_split_coll_in_use = false; /*Initialize the print_queues queues here!*/ - coll_write_time = (print_queue *) malloc (sizeof(print_queue)); - coll_read_time = (print_queue *) malloc (sizeof(print_queue)); + coll_write_time = (mca_io_ompio_print_queue *) malloc (sizeof(mca_io_ompio_print_queue)); + coll_read_time = (mca_io_ompio_print_queue *) malloc (sizeof(mca_io_ompio_print_queue)); ompi_io_ompio_initialize_print_queue(coll_write_time); ompi_io_ompio_initialize_print_queue(coll_read_time); @@ -149,9 +152,9 @@ ompio_io_ompio_file_open (ompi_communicator_t *comm, ompio_fh->f_set_aggregator_props=ompi_io_ompio_set_aggregator_props; ompio_fh->f_full_print_queue=ompi_io_ompio_full_print_queue; - ompio_fh->f_register_print_entry=ompi_io_ompio_register_print_entry; + ompio_fh->f_register_print_entry=ompi_io_ompio_register_print_entry; - /* This fix is needed for data seiving to work with + /* This fix is needed for data seiving to work with two-phase collective I/O */ if ((amode & MPI_MODE_WRONLY)){ amode -= MPI_MODE_WRONLY; @@ -183,7 +186,7 @@ ompio_io_ompio_file_open (ompi_communicator_t *comm, if ( true == use_sharedfp ) { if (OMPI_SUCCESS != (ret = mca_sharedfp_base_file_select (ompio_fh, NULL))) { - opal_output ( ompi_io_base_framework.framework_output, + opal_output ( ompi_io_base_framework.framework_output, "mca_sharedfp_base_file_select() failed\n"); ompio_fh->f_sharedfp = NULL; /*module*/ /* Its ok to not have a shared file pointer module as long as the shared file @@ -191,34 +194,11 @@ ompio_io_ompio_file_open (ompi_communicator_t *comm, ** function will return an error code. */ } - - /* open the file once more for the shared file pointer if required. - ** Per default, the shared file pointer specific actions are however - ** only performed on first access of the shared file pointer, except - ** for the addproc sharedfp component. - ** - ** Lazy open does not work for the addproc sharedfp - ** component since it starts by spawning a process using MPI_Comm_spawn. - ** For this, the first operation has to be collective which we can - ** not guarantuee outside of the MPI_File_open operation. - */ - if ( NULL != ompio_fh->f_sharedfp && - true == use_sharedfp && - (!mca_io_ompio_sharedfp_lazy_open || - !strcmp (ompio_fh->f_sharedfp_component->mca_component_name, - "addproc") )) { - ret = ompio_fh->f_sharedfp->sharedfp_file_open(comm, - filename, - amode, - info, - ompio_fh); - - if ( OMPI_SUCCESS != ret ) { - goto fn_fail; - } - } } - + else { + ompio_fh->f_flags |= OMPIO_SHAREDFP_IS_SET; + } + /*Determine topology information if set*/ if (ompio_fh->f_comm->c_flags & OMPI_COMM_CART){ ret = mca_io_ompio_cart_based_grouping(ompio_fh); @@ -226,39 +206,70 @@ ompio_io_ompio_file_open (ompi_communicator_t *comm, ret = MPI_ERR_FILE; } } - + ret = ompio_fh->f_fs->fs_file_open (comm, filename, amode, info, ompio_fh); - - - if ( OMPI_SUCCESS != ret ) { ret = MPI_ERR_FILE; goto fn_fail; } + if ( true == use_sharedfp ) { + /* open the file once more for the shared file pointer if required. + ** Can be disabled by the user if no shared file pointer operations + ** are used by his application. + */ + if ( NULL != ompio_fh->f_sharedfp && + !mca_io_ompio_sharedfp_lazy_open ) { + ret = ompio_fh->f_sharedfp->sharedfp_file_open(comm, + filename, + amode, + info, + ompio_fh); + + if ( OMPI_SUCCESS != ret ) { + goto fn_fail; + } + } + } + /* If file has been opened in the append mode, move the internal file pointer of OMPIO to the very end of the file. */ if ( ompio_fh->f_amode & MPI_MODE_APPEND ) { OMPI_MPI_OFFSET_TYPE current_size; + mca_sharedfp_base_module_t * shared_fp_base_module; ompio_fh->f_fs->fs_file_get_size( ompio_fh, ¤t_size); ompi_io_ompio_set_explicit_offset (ompio_fh, current_size); + if ( true == use_sharedfp ) { + if ( NULL != ompio_fh->f_sharedfp && + !mca_io_ompio_sharedfp_lazy_open ) { + + shared_fp_base_module = ompio_fh->f_sharedfp; + ret = shared_fp_base_module->sharedfp_seek(ompio_fh,current_size, MPI_SEEK_SET); + } + else { + opal_output(1, "mca_common_ompio_file_open: Could not adjust position of " + "shared file pointer with MPI_MODE_APPEND\n"); + ret = MPI_ERR_OTHER; + goto fn_fail; + } + } } - + return OMPI_SUCCESS; fn_fail: /* no need to free resources here, since the destructor - * is calling mca_io_ompio_file_close, which actually gets + * is calling mca_io_ompio_file_close, which actually gets *rid of all allocated memory items */ return ret; @@ -290,6 +301,13 @@ ompio_io_ompio_file_close (mca_io_ompio_file_t *ompio_fh) int delete_flag = 0; char name[256]; + ret = ompio_fh->f_comm->c_coll.coll_barrier ( ompio_fh->f_comm, ompio_fh->f_comm->c_coll.coll_barrier_module); + if ( OMPI_SUCCESS != ret ) { + /* Not sure what to do */ + opal_output (1,"mca_common_ompio_file_close: error in Barrier \n"); + return ret; + } + if(mca_io_ompio_coll_timing_info){ strcpy (name, "WRITE"); if (!ompi_io_ompio_empty_print_queue(WRITE_PRINT_QUEUE)){ @@ -319,14 +337,27 @@ ompio_io_ompio_file_close (mca_io_ompio_file_t *ompio_fh) if( NULL != ompio_fh->f_sharedfp ){ ret = ompio_fh->f_sharedfp->sharedfp_file_close(ompio_fh); } - ret = ompio_fh->f_fs->fs_file_close (ompio_fh); + if ( NULL != ompio_fh->f_fs ) { + /* The pointer might not be set if file_close() is + ** called from the file destructor in case of an error + ** during file_open() + */ + ret = ompio_fh->f_fs->fs_file_close (ompio_fh); + } if ( delete_flag && 0 == ompio_fh->f_rank ) { mca_io_ompio_file_delete ( ompio_fh->f_filename, MPI_INFO_NULL ); } - mca_fs_base_file_unselect (ompio_fh); - mca_fbtl_base_file_unselect (ompio_fh); - mca_fcoll_base_file_unselect (ompio_fh); + if ( NULL != ompio_fh->f_fs ) { + mca_fs_base_file_unselect (ompio_fh); + } + if ( NULL != ompio_fh->f_fbtl ) { + mca_fbtl_base_file_unselect (ompio_fh); + } + + if ( NULL != ompio_fh->f_fcoll ) { + mca_fcoll_base_file_unselect (ompio_fh); + } if ( NULL != ompio_fh->f_sharedfp) { mca_sharedfp_base_file_unselect (ompio_fh); } @@ -360,27 +391,45 @@ ompio_io_ompio_file_close (mca_io_ompio_file_t *ompio_fh) ompio_fh->f_datarep = NULL; } - + if (MPI_DATATYPE_NULL != ompio_fh->f_iov_type) { ompi_datatype_destroy (&ompio_fh->f_iov_type); } - if (MPI_COMM_NULL != ompio_fh->f_comm && (ompio_fh->f_flags & OMPIO_SHAREDFP_IS_SET) ) { + if ( MPI_DATATYPE_NULL != ompio_fh->f_etype ) { + ompi_datatype_destroy (&ompio_fh->f_etype); + } + if ( MPI_DATATYPE_NULL != ompio_fh->f_filetype ){ + ompi_datatype_destroy (&ompio_fh->f_filetype); + } + + if ( MPI_DATATYPE_NULL != ompio_fh->f_orig_filetype ){ + ompi_datatype_destroy (&ompio_fh->f_orig_filetype); + } + + + if (MPI_COMM_NULL != ompio_fh->f_comm && !(ompio_fh->f_flags & OMPIO_SHAREDFP_IS_SET) ) { ompi_comm_free (&ompio_fh->f_comm); } - + return ret; } -int mca_io_ompio_file_delete (char *filename, - struct ompi_info_t *info) +int mca_io_ompio_file_delete (const char *filename, + struct ompi_info_t *info) { int ret = OMPI_SUCCESS; ret = unlink(filename); - if (0 > ret) { - return OMPI_ERROR; + if (0 > ret ) { + if ( ENOENT == errno ) { + return MPI_ERR_NO_SUCH_FILE; + } else { + opal_output (0, "mca_io_ompio_file_delete: Could not remove file %s errno = %d %s\n", filename, + errno, strerror(errno)); + return MPI_ERR_ACCESS; + } } return OMPI_SUCCESS; @@ -400,27 +449,40 @@ mca_io_ompio_file_preallocate (ompi_file_t *fh, tmp = diskspace; - data->ompio_fh.f_comm->c_coll.coll_bcast (&tmp, - 1, - OMPI_OFFSET_DATATYPE, - OMPIO_ROOT, - data->ompio_fh.f_comm, - data->ompio_fh.f_comm->c_coll.coll_bcast_module); + ret = data->ompio_fh.f_comm->c_coll.coll_bcast (&tmp, + 1, + OMPI_OFFSET_DATATYPE, + OMPIO_ROOT, + data->ompio_fh.f_comm, + data->ompio_fh.f_comm->c_coll.coll_bcast_module); + if ( OMPI_SUCCESS != ret ) { + return OMPI_ERROR; + } if (tmp != diskspace) { return OMPI_ERROR; } + ret = data->ompio_fh.f_fs->fs_file_get_size (&data->ompio_fh, + ¤t_size); + if ( OMPI_SUCCESS != ret ) { + return OMPI_ERROR; + } + + if ( current_size > diskspace ) { + return OMPI_SUCCESS; + } + /* ROMIO explanation - On file systems with no preallocation function, we have to - explicitly write to allocate space. Since there could be holes in the file, - we need to read up to the current file size, write it back, - and then write beyond that depending on how much + On file systems with no preallocation function, we have to + explicitly write to allocate space. Since there could be holes in the file, + we need to read up to the current file size, write it back, + and then write beyond that depending on how much preallocation is needed. */ if (OMPIO_ROOT == data->ompio_fh.f_rank) { - ret = data->ompio_fh.f_fs->fs_file_get_size (&data->ompio_fh, - ¤t_size); + OMPI_MPI_OFFSET_TYPE prev_offset; + ompio_io_ompio_file_get_position (&data->ompio_fh, &prev_offset ); size = diskspace; if (size > current_size) { @@ -432,7 +494,8 @@ mca_io_ompio_file_preallocate (ompi_file_t *fh, buf = (char *) malloc (OMPIO_PREALLOC_MAX_BUF_SIZE); if (NULL == buf) { opal_output(1, "OUT OF MEMORY\n"); - return OMPI_ERR_OUT_OF_RESOURCE; + ret = OMPI_ERR_OUT_OF_RESOURCE; + goto exit; } written = 0; @@ -443,11 +506,11 @@ mca_io_ompio_file_preallocate (ompi_file_t *fh, } ret = mca_io_ompio_file_read (fh, buf, len, MPI_BYTE, status); if (ret != OMPI_SUCCESS) { - return OMPI_ERROR; + goto exit; } ret = mca_io_ompio_file_write (fh, buf, len, MPI_BYTE, status); if (ret != OMPI_SUCCESS) { - return OMPI_ERROR; + goto exit; } written += len; } @@ -464,18 +527,25 @@ mca_io_ompio_file_preallocate (ompi_file_t *fh, } ret = mca_io_ompio_file_write (fh, buf, len, MPI_BYTE, status); if (ret != OMPI_SUCCESS) { - return OMPI_ERROR; + goto exit; } written += len; } } - if (NULL != buf) { - free (buf); - buf = NULL; - } + + // This operation should not affect file pointer position. + ompi_io_ompio_set_explicit_offset ( &data->ompio_fh, prev_offset); } - fh->f_comm->c_coll.coll_barrier (fh->f_comm, - fh->f_comm->c_coll.coll_barrier_module); + +exit: + free ( buf ); + fh->f_comm->c_coll.coll_bcast ( &ret, 1, MPI_INT, OMPIO_ROOT, fh->f_comm, + fh->f_comm->c_coll.coll_bcast_module); + + if ( diskspace > current_size ) { + data->ompio_fh.f_fs->fs_file_set_size (&data->ompio_fh, diskspace); + } + return ret; } @@ -491,18 +561,33 @@ mca_io_ompio_file_set_size (ompi_file_t *fh, tmp = size; - data->ompio_fh.f_comm->c_coll.coll_bcast (&tmp, - 1, - OMPI_OFFSET_DATATYPE, - OMPIO_ROOT, - data->ompio_fh.f_comm, - data->ompio_fh.f_comm->c_coll.coll_bcast_module); + ret = data->ompio_fh.f_comm->c_coll.coll_bcast (&tmp, + 1, + OMPI_OFFSET_DATATYPE, + OMPIO_ROOT, + data->ompio_fh.f_comm, + data->ompio_fh.f_comm->c_coll.coll_bcast_module); + if ( OMPI_SUCCESS != ret ) { + opal_output(1, ",mca_io_ompio_file_set_size: error in bcast\n"); + return ret; + } if (tmp != size) { return OMPI_ERROR; } ret = data->ompio_fh.f_fs->fs_file_set_size (&data->ompio_fh, size); + if ( OMPI_SUCCESS != ret ) { + opal_output(1, ",mca_io_ompio_file_set_size: error in fs->set_size\n"); + return ret; + } + + ret = data->ompio_fh.f_comm->c_coll.coll_barrier (data->ompio_fh.f_comm, + data->ompio_fh.f_comm->c_coll.coll_barrier_module); + if ( OMPI_SUCCESS != ret ) { + opal_output(1, ",mca_io_ompio_file_set_size: error in barrier\n"); + return ret; + } return ret; } @@ -550,7 +635,7 @@ int mca_io_ompio_file_set_info (ompi_file_t *fh, ompi_info_t *info) { int ret = OMPI_SUCCESS; - + if ( MPI_INFO_NULL == fh->f_info ) { /* OBJ_RELEASE(MPI_INFO_NULL); */ } @@ -569,15 +654,15 @@ int mca_io_ompio_file_get_info (ompi_file_t *fh, { int ret = OMPI_SUCCESS; ompi_info_t *info=NULL; - - if ( MPI_INFO_NULL == fh->f_info ) { - *info_used = MPI_INFO_NULL; + + info = OBJ_NEW(ompi_info_t); + if (NULL == info) { + return MPI_ERR_INFO; } - else { - info = OBJ_NEW(ompi_info_t); + if (MPI_INFO_NULL != fh->f_info) { ret = ompi_info_dup (fh->f_info, &info); - *info_used = info; } + *info_used = info; return ret; } @@ -677,7 +762,7 @@ mca_io_ompio_file_seek (ompi_file_t *fh, } break; case MPI_SEEK_END: - ret = data->ompio_fh.f_fs->fs_file_get_size (&data->ompio_fh, + ret = data->ompio_fh.f_fs->fs_file_get_size (&data->ompio_fh, &temp_offset); offset += temp_offset; if (offset < 0 || OMPI_SUCCESS != ret) { @@ -688,7 +773,7 @@ mca_io_ompio_file_seek (ompi_file_t *fh, return OMPI_ERROR; } - ret = ompi_io_ompio_set_explicit_offset (&data->ompio_fh, + ret = ompi_io_ompio_set_explicit_offset (&data->ompio_fh, offset/data->ompio_fh.f_etype_size); return ret; } @@ -700,12 +785,12 @@ mca_io_ompio_file_get_position (ompi_file_t *fd, int ret=OMPI_SUCCESS; mca_io_ompio_data_t *data=NULL; mca_io_ompio_file_t *fh=NULL; - + data = (mca_io_ompio_data_t *) fd->f_io_selected_data; fh = &data->ompio_fh; - + ret = ompio_io_ompio_file_get_position (fh, offset); - + return ret; } @@ -714,7 +799,7 @@ ompio_io_ompio_file_get_position (mca_io_ompio_file_t *fh, OMPI_MPI_OFFSET_TYPE *offset) { OMPI_MPI_OFFSET_TYPE off; - + /* No. of copies of the entire file view */ off = (fh->f_offset - fh->f_disp)/fh->f_view_extent; @@ -736,35 +821,36 @@ mca_io_ompio_file_get_byte_offset (ompi_file_t *fh, { mca_io_ompio_data_t *data; int i, k, index; - size_t position; - size_t total_bytes; size_t temp_offset; data = (mca_io_ompio_data_t *) fh->f_io_selected_data; - temp_offset = data->ompio_fh.f_view_extent * + temp_offset = data->ompio_fh.f_view_extent * (offset*data->ompio_fh.f_etype_size / data->ompio_fh.f_view_size); + - position = 0; - total_bytes = (offset*data->ompio_fh.f_etype_size) % data->ompio_fh.f_view_size; + i = (offset*data->ompio_fh.f_etype_size) % data->ompio_fh.f_view_size; index = 0; - i = total_bytes; k = 0; while (1) { - k += data->ompio_fh.f_decoded_iov[index].iov_len; + k = data->ompio_fh.f_decoded_iov[index].iov_len; if (i >= k) { - i = i - data->ompio_fh.f_decoded_iov[index].iov_len; - position += data->ompio_fh.f_decoded_iov[index].iov_len; - index = index+1; + i -= k; + index++; + if ( 0 == i ) { + k=0; + break; + } } else { + k=i; break; } } *disp = data->ompio_fh.f_disp + temp_offset + - (OMPI_MPI_OFFSET_TYPE)(intptr_t)data->ompio_fh.f_decoded_iov[index].iov_base; + (OMPI_MPI_OFFSET_TYPE)(intptr_t)data->ompio_fh.f_decoded_iov[index].iov_base + k; return OMPI_SUCCESS; } @@ -828,7 +914,7 @@ mca_io_ompio_cart_based_grouping(mca_io_ompio_file_t *ompio_fh) int coords_tmp[2] = { 0 }; cart_topo_components cart_topo; - + ompio_fh->f_comm->c_topo->topo.cart.cartdim_get(ompio_fh->f_comm, &cart_topo.ndims); cart_topo.dims = (int*)malloc (cart_topo.ndims * sizeof(int)); @@ -852,7 +938,7 @@ mca_io_ompio_cart_based_grouping(mca_io_ompio_file_t *ompio_fh) cart_topo.dims, cart_topo.periods, cart_topo.coords); - + ompio_fh->f_init_procs_per_group = cart_topo.dims[1]; //number of elements per row ompio_fh->f_init_num_aggrs = cart_topo.dims[0]; //number of rows @@ -862,7 +948,7 @@ mca_io_ompio_cart_based_grouping(mca_io_ompio_file_t *ompio_fh) opal_output (1, "OUT OF MEMORY\n"); return OMPI_ERR_OUT_OF_RESOURCE; } - + for(k = 0; k < cart_topo.dims[0]; k++){ coords_tmp[0] = k; coords_tmp[1] = k * cart_topo.dims[1]; @@ -870,7 +956,7 @@ mca_io_ompio_cart_based_grouping(mca_io_ompio_file_t *ompio_fh) ompio_fh->f_init_aggr_list[k] = tmp_rank; //change this to use get rank } - //Initial Grouping + //Initial Grouping ompio_fh->f_init_procs_in_group = (int*)malloc (ompio_fh->f_init_procs_per_group * sizeof(int)); if (NULL == ompio_fh->f_init_procs_in_group) { opal_output (1, "OUT OF MEMORY\n"); @@ -879,9 +965,9 @@ mca_io_ompio_cart_based_grouping(mca_io_ompio_file_t *ompio_fh) for (j=0 ; j< ompio_fh->f_size ; j++) { ompio_fh->f_comm->c_topo->topo.cart.cart_coords (ompio_fh->f_comm, j, cart_topo.ndims, coords_tmp); if (coords_tmp[0] == cart_topo.coords[0]) { - if ((coords_tmp[1]/ompio_fh->f_init_procs_per_group) == + if ((coords_tmp[1]/ompio_fh->f_init_procs_per_group) == (cart_topo.coords[1]/ompio_fh->f_init_procs_per_group)) { - + ompio_fh->f_init_procs_in_group[n] = j; n++; } @@ -907,6 +993,6 @@ mca_io_ompio_cart_based_grouping(mca_io_ompio_file_t *ompio_fh) free (cart_topo.coords); cart_topo.coords = NULL; } - + return OMPI_SUCCESS; } diff --git a/ompi/mca/io/ompio/io_ompio_file_read.c b/ompi/mca/io/ompio/io_ompio_file_read.c index 2cf699c53f5..d60c181912f 100644 --- a/ompi/mca/io/ompio/io_ompio_file_read.c +++ b/ompi/mca/io/ompio/io_ompio_file_read.c @@ -2,18 +2,18 @@ * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. - * Copyright (c) 2004-2005 The University of Tennessee and The University + * Copyright (c) 2004-2016 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2008-2015 University of Houston. All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -34,13 +34,13 @@ #include "math.h" #include -/* Read and write routines are split into two interfaces. -** The -** mca_io_ompio_file_read/write[_at] -** +/* Read and write routines are split into two interfaces. +** The +** mca_io_ompio_file_read/write[_at] +** ** routines are the ones registered with the ompio modules. ** The -** +** ** ompio_io_ompio_file_read/write[_at] ** ** routesin are used e.g. from the shared file pointer modules. @@ -98,15 +98,20 @@ int ompio_io_ompio_file_read (mca_io_ompio_file_t *fh, return ret; } - ompi_io_ompio_decode_datatype (fh, - datatype, - count, - buf, - &max_data, - &decoded_iov, + ompi_io_ompio_decode_datatype (fh, + datatype, + count, + buf, + &max_data, + &decoded_iov, &iov_count); - bytes_per_cycle = mca_io_ompio_cycle_buffer_size; + if ( -1 == mca_io_ompio_cycle_buffer_size ) { + bytes_per_cycle = max_data; + } + else { + bytes_per_cycle = mca_io_ompio_cycle_buffer_size; + } cycles = ceil((float)max_data/bytes_per_cycle); #if 0 @@ -117,17 +122,17 @@ int ompio_io_ompio_file_read (mca_io_ompio_file_t *fh, for (index = 0; index < cycles; index++) { - mca_io_ompio_build_io_array ( fh, - index, - cycles, - bytes_per_cycle, - max_data, - iov_count, - decoded_iov, - &i, - &j, + mca_io_ompio_build_io_array ( fh, + index, + cycles, + bytes_per_cycle, + max_data, + iov_count, + decoded_iov, + &i, + &j, &total_bytes_read); - + if (fh->f_num_of_io_entries) { ret_code = fh->f_fbtl->fbtl_preadv (fh); if ( 0<= ret_code ) { @@ -179,7 +184,7 @@ int ompio_io_ompio_file_read_at (mca_io_ompio_file_t *fh, { int ret = OMPI_SUCCESS; OMPI_MPI_OFFSET_TYPE prev_offset; - + ompio_io_ompio_file_get_position (fh, &prev_offset ); ompi_io_ompio_set_explicit_offset (fh, offset); @@ -225,11 +230,14 @@ int ompio_io_ompio_file_iread (mca_io_ompio_file_t *fh, ompio_req = OBJ_NEW(mca_ompio_request_t); ompio_req->req_type = MCA_OMPIO_REQUEST_READ; + ompio_req->req_ompi.req_state = OMPI_REQUEST_ACTIVE; if ( 0 == count ) { - ompi_request_complete (&ompio_req->req_ompi, 0); ompio_req->req_ompi.req_status.MPI_ERROR = OMPI_SUCCESS; ompio_req->req_ompi.req_status._ucount = 0; + ompi_request_complete (&ompio_req->req_ompi, false); + *request = (ompi_request_t *) ompio_req; + return OMPI_SUCCESS; } @@ -239,33 +247,33 @@ int ompio_io_ompio_file_iread (mca_io_ompio_file_t *fh, size_t total_bytes_read = 0; /* total bytes that have been read*/ uint32_t iov_count = 0; struct iovec *decoded_iov = NULL; - - size_t max_data = 0; + + size_t max_data = 0; int i = 0; /* index into the decoded iovec of the buffer */ int j = 0; /* index into the file vie iovec */ - - ompi_io_ompio_decode_datatype (fh, - datatype, - count, - buf, - &max_data, - &decoded_iov, + + ompi_io_ompio_decode_datatype (fh, + datatype, + count, + buf, + &max_data, + &decoded_iov, &iov_count); - + // Non-blocking operations have to occur in a single cycle j = fh->f_index_in_file_view; - - mca_io_ompio_build_io_array ( fh, - 0, // index - 1, // no. of cyces - max_data, // setting bytes per cycle to match data - max_data, - iov_count, - decoded_iov, - &i, - &j, + + mca_io_ompio_build_io_array ( fh, + 0, // index + 1, // no. of cyces + max_data, // setting bytes per cycle to match data + max_data, + iov_count, + decoded_iov, + &i, + &j, &total_bytes_read); - + if (fh->f_num_of_io_entries) { fh->f_fbtl->fbtl_ipreadv (fh, (ompi_request_t *) ompio_req); } @@ -293,9 +301,9 @@ int ompio_io_ompio_file_iread (mca_io_ompio_file_t *fh, ompi_status_public_t status; ret = ompio_io_ompio_file_read (fh, buf, count, datatype, &status); - ompi_request_complete (&ompio_req->req_ompi, 0); ompio_req->req_ompi.req_status.MPI_ERROR = ret; ompio_req->req_ompi.req_status._ucount = status._ucount; + ompi_request_complete (&ompio_req->req_ompi, false); } *request = (ompi_request_t *) ompio_req; @@ -339,9 +347,9 @@ int ompio_io_ompio_file_iread_at (mca_io_ompio_file_t *fh, /* An explicit offset file operation is not suppsed to modify ** the internal file pointer. So reset the pointer ** to the previous value - ** It is OK to reset the position already here, althgouth + ** It is OK to reset the position already here, althgouth ** the operation might still be pending/ongoing, since - ** the entire array of have + ** the entire array of have ** already been constructed in the file_iread operation */ ompi_io_ompio_set_explicit_offset (fh, prev_offset); @@ -364,9 +372,9 @@ int mca_io_ompio_file_read_all (ompi_file_t *fh, data = (mca_io_ompio_data_t *) fh->f_io_selected_data; ret = data->ompio_fh. - f_fcoll->fcoll_file_read_all (&data->ompio_fh, - buf, - count, + f_fcoll->fcoll_file_read_all (&data->ompio_fh, + buf, + count, datatype, status); if ( MPI_STATUS_IGNORE != status ) { @@ -393,15 +401,15 @@ int mca_io_ompio_file_iread_all (ompi_file_t *fh, fp = &data->ompio_fh; if ( NULL != fp->f_fcoll->fcoll_file_iread_all ) { - ret = fp->f_fcoll->fcoll_file_iread_all (&data->ompio_fh, - buf, - count, + ret = fp->f_fcoll->fcoll_file_iread_all (&data->ompio_fh, + buf, + count, datatype, request); } else { - /* this fcoll component does not support non-blocking - collective I/O operations. WE fake it with + /* this fcoll component does not support non-blocking + collective I/O operations. WE fake it with individual non-blocking I/O operations. */ ret = ompio_io_ompio_file_iread ( fp, buf, count, datatype, request ); } @@ -457,24 +465,35 @@ int mca_io_ompio_file_iread_at_all (ompi_file_t *fh, { int ret = OMPI_SUCCESS; mca_io_ompio_data_t *data; - mca_io_ompio_file_t *fp=NULL; - OMPI_MPI_OFFSET_TYPE prev_offset; data = (mca_io_ompio_data_t *) fh->f_io_selected_data; - fp = &data->ompio_fh; + + ret = ompio_io_ompio_file_iread_at_all ( &data->ompio_fh, offset, buf, count, datatype, request ); + return ret; +} + +int ompio_io_ompio_file_iread_at_all (mca_io_ompio_file_t *fp, + OMPI_MPI_OFFSET_TYPE offset, + void *buf, + int count, + struct ompi_datatype_t *datatype, + ompi_request_t **request) +{ + int ret = OMPI_SUCCESS; + OMPI_MPI_OFFSET_TYPE prev_offset; ompio_io_ompio_file_get_position (fp, &prev_offset ); ompi_io_ompio_set_explicit_offset (fp, offset); if ( NULL != fp->f_fcoll->fcoll_file_iread_all ) { - ret = fp->f_fcoll->fcoll_file_iread_all (&data->ompio_fh, - buf, - count, + ret = fp->f_fcoll->fcoll_file_iread_all (fp, + buf, + count, datatype, request); } else { - /* this fcoll component does not support non-blocking - collective I/O operations. WE fake it with + /* this fcoll component does not support non-blocking + collective I/O operations. WE fake it with individual non-blocking I/O operations. */ ret = ompio_io_ompio_file_iread ( fp, buf, count, datatype, request ); } @@ -484,7 +503,7 @@ int mca_io_ompio_file_iread_at_all (ompi_file_t *fh, return ret; } -/* Infrastructure for shared file pointer operations +/* Infrastructure for shared file pointer operations ** (individual and ordered)*/ /******************************************************/ int mca_io_ompio_file_read_shared (ompi_file_t *fp, @@ -658,26 +677,16 @@ int mca_io_ompio_file_read_at_all_begin (ompi_file_t *fh, { int ret = OMPI_SUCCESS; mca_io_ompio_data_t *data; - + mca_io_ompio_file_t *fp=NULL; data = (mca_io_ompio_data_t *) fh->f_io_selected_data; - ret = ompio_io_ompio_file_read_at_all_begin ( &data->ompio_fh, offset, buf, count, datatype ); - return ret; -} - -int ompio_io_ompio_file_read_at_all_begin (mca_io_ompio_file_t *fh, - OMPI_MPI_OFFSET_TYPE offset, - void *buf, - int count, - struct ompi_datatype_t *datatype) -{ - int ret = OMPI_SUCCESS; + fp = &data->ompio_fh; - if ( true == fh->f_split_coll_in_use ) { + if ( true == fp->f_split_coll_in_use ) { printf("Only one split collective I/O operation allowed per file handle at any given point in time!\n"); return MPI_ERR_REQUEST; } - ret = mca_io_ompio_file_iread_at_all ( fh->f_fh, offset, buf, count, datatype, &fh->f_split_coll_req ); - fh->f_split_coll_in_use = true; + ret = ompio_io_ompio_file_iread_at_all ( fp, offset, buf, count, datatype, &fp->f_split_coll_req ); + fp->f_split_coll_in_use = true; return ret; } @@ -687,20 +696,13 @@ int mca_io_ompio_file_read_at_all_end (ompi_file_t *fh, { int ret = OMPI_SUCCESS; mca_io_ompio_data_t *data; + mca_io_ompio_file_t *fp=NULL; data = (mca_io_ompio_data_t *) fh->f_io_selected_data; - ret = ompio_io_ompio_file_read_at_all_end ( &data->ompio_fh, buf, status ); - return ret; -} - -int ompio_io_ompio_file_read_at_all_end (mca_io_ompio_file_t *ompio_fh, - void *buf, - ompi_status_public_t * status) -{ - int ret = OMPI_SUCCESS; - ret = ompi_request_wait ( &ompio_fh->f_split_coll_req, status ); + fp = &data->ompio_fh; + ret = ompi_request_wait ( &fp->f_split_coll_req, status ); /* remove the flag again */ - ompio_fh->f_split_coll_in_use = false; + fp->f_split_coll_in_use = false; return ret; } diff --git a/ompi/mca/io/ompio/io_ompio_file_set_view.c b/ompi/mca/io/ompio/io_ompio_file_set_view.c index 812e3096b73..311dab63ad6 100644 --- a/ompi/mca/io/ompio/io_ompio_file_set_view.c +++ b/ompi/mca/io/ompio/io_ompio_file_set_view.c @@ -2,14 +2,14 @@ * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. - * Copyright (c) 2004-2005 The University of Tennessee and The University + * Copyright (c) 2004-2016 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. - * Copyright (c) 2008-2014 University of Houston. All rights reserved. + * Copyright (c) 2008-2017 University of Houston. All rights reserved. * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ @@ -28,6 +28,7 @@ #include "ompi/mca/fs/base/base.h" #include "ompi/mca/fcoll/fcoll.h" #include "ompi/mca/fcoll/base/base.h" +#include "ompi/mca/sharedfp/sharedfp.h" #include "ompi/mca/pml/pml.h" #include "opal/datatype/opal_convertor.h" #include "ompi/datatype/ompi_datatype.h" @@ -38,55 +39,124 @@ #include "io_ompio.h" static OMPI_MPI_OFFSET_TYPE get_contiguous_chunk_size (mca_io_ompio_file_t *); +static int datatype_duplicate (ompi_datatype_t *oldtype, ompi_datatype_t **newtype ); +static int datatype_duplicate (ompi_datatype_t *oldtype, ompi_datatype_t **newtype ) +{ + ompi_datatype_t *type; + if( ompi_datatype_is_predefined(oldtype) ) { + OBJ_RETAIN(oldtype); + *newtype = oldtype; + return OMPI_SUCCESS; + } + + if ( OMPI_SUCCESS != ompi_datatype_duplicate (oldtype, &type)){ + ompi_datatype_destroy (&type); + return MPI_ERR_INTERN; + } + + ompi_datatype_set_args( type, 0, NULL, 0, NULL, 1, &oldtype, MPI_COMBINER_DUP ); + + *newtype = type; + return OMPI_SUCCESS; +} int mca_io_ompio_set_view_internal(mca_io_ompio_file_t *fh, OMPI_MPI_OFFSET_TYPE disp, ompi_datatype_t *etype, ompi_datatype_t *filetype, - char *datarep, + const char *datarep, ompi_info_t *info) { - + size_t max_data = 0; int i; int num_groups = 0; contg *contg_groups; + size_t ftype_size; + OPAL_PTRDIFF_TYPE ftype_extent, lb, ub; + ompi_datatype_t *newfiletype; + + if ( NULL != fh->f_etype ) { + ompi_datatype_destroy (&fh->f_etype); + } + if ( NULL != fh->f_filetype ) { + ompi_datatype_destroy (&fh->f_filetype); + } + if ( NULL != fh->f_orig_filetype ) { + ompi_datatype_destroy (&fh->f_orig_filetype); + } + if (NULL != fh->f_decoded_iov) { + free (fh->f_decoded_iov); + fh->f_decoded_iov = NULL; + } + + if (NULL != fh->f_datarep) { + free (fh->f_datarep); + fh->f_datarep = NULL; + } + + /* Reset the flags first */ + if ( fh->f_flags & OMPIO_CONTIGUOUS_FVIEW ) { + fh->f_flags &= ~OMPIO_CONTIGUOUS_FVIEW; + } + if ( fh->f_flags & OMPIO_UNIFORM_FVIEW ) { + fh->f_flags &= ~OMPIO_UNIFORM_FVIEW; + } + fh->f_flags |= OMPIO_FILE_VIEW_IS_SET; + fh->f_datarep = strdup (datarep); + datatype_duplicate (filetype, &fh->f_orig_filetype ); + + opal_datatype_get_extent(&filetype->super, &lb, &ftype_extent); + opal_datatype_type_size (&filetype->super, &ftype_size); + + if ( etype == filetype && + ompi_datatype_is_predefined (filetype ) && + ftype_extent == (OPAL_PTRDIFF_TYPE)ftype_size ){ + ompi_datatype_create_contiguous(MCA_IO_DEFAULT_FILE_VIEW_SIZE, + &ompi_mpi_byte.dt, + &newfiletype); + ompi_datatype_commit (&newfiletype); + } + else { + newfiletype = filetype; + } - - MPI_Aint lb,ub; fh->f_iov_count = 0; fh->f_disp = disp; fh->f_offset = disp; fh->f_total_bytes = 0; - - ompi_io_ompio_decode_datatype (fh, - filetype, - 1, - NULL, + fh->f_index_in_file_view=0; + fh->f_position_in_file_view=0; + + ompi_io_ompio_decode_datatype (fh, + newfiletype, + 1, + NULL, &max_data, - &fh->f_decoded_iov, + &fh->f_decoded_iov, &fh->f_iov_count); - opal_datatype_get_extent(&filetype->super, &lb, &fh->f_view_extent); - opal_datatype_type_ub (&filetype->super, &ub); + opal_datatype_get_extent(&newfiletype->super, &lb, &fh->f_view_extent); + opal_datatype_type_ub (&newfiletype->super, &ub); opal_datatype_type_size (&etype->super, &fh->f_etype_size); - opal_datatype_type_size (&filetype->super, &fh->f_view_size); - ompi_datatype_duplicate (etype, &fh->f_etype); - ompi_datatype_duplicate (filetype, &fh->f_filetype); - + opal_datatype_type_size (&newfiletype->super, &fh->f_view_size); + datatype_duplicate (etype, &fh->f_etype); + // This file type is our own representation. The original is stored + // in orig_file type, No need to set args on this one. + ompi_datatype_duplicate (newfiletype, &fh->f_filetype); + fh->f_cc_size = get_contiguous_chunk_size (fh); if (opal_datatype_is_contiguous_memory_layout(&etype->super,1)) { - if (opal_datatype_is_contiguous_memory_layout(&filetype->super,1) && + if (opal_datatype_is_contiguous_memory_layout(&filetype->super,1) && fh->f_view_extent == (OPAL_PTRDIFF_TYPE)fh->f_view_size ) { fh->f_flags |= OMPIO_CONTIGUOUS_FVIEW; } } - contg_groups = (contg*) calloc ( 1, fh->f_size * sizeof(contg)); if (NULL == contg_groups) { opal_output (1, "OUT OF MEMORY\n"); @@ -104,24 +174,46 @@ int mca_io_ompio_set_view_internal(mca_io_ompio_file_t *fh, return OMPI_ERR_OUT_OF_RESOURCE; } } - if( OMPI_SUCCESS != mca_io_ompio_fview_based_grouping(fh, + + if ( SIMPLE != mca_io_ompio_grouping_option ) { + if( OMPI_SUCCESS != mca_io_ompio_fview_based_grouping(fh, &num_groups, contg_groups)){ - opal_output(1, "mca_io_ompio_fview_based_grouping() failed\n"); - free(contg_groups); - return OMPI_ERROR; + opal_output(1, "mca_io_ompio_fview_based_grouping() failed\n"); + free(contg_groups); + return OMPI_ERROR; + } } - if( !( (fh->f_comm->c_flags & OMPI_COMM_CART) && - (num_groups == 1 || num_groups == fh->f_size)) ) { - mca_io_ompio_finalize_initial_grouping(fh, - num_groups, - contg_groups); + else { + if( OMPI_SUCCESS != mca_io_ompio_simple_grouping(fh, + &num_groups, + contg_groups)){ + opal_output(1, "mca_io_ompio_simple_grouping() failed\n"); + free(contg_groups); + return OMPI_ERROR; + } } + + + mca_io_ompio_finalize_initial_grouping(fh, + num_groups, + contg_groups); for( i = 0; i < fh->f_size; i++){ free(contg_groups[i].procs_in_contg_group); } free(contg_groups); - + + if ( etype == filetype && + ompi_datatype_is_predefined (filetype ) && + ftype_extent == (OPAL_PTRDIFF_TYPE)ftype_size ){ + ompi_datatype_destroy ( &newfiletype ); + } + + + if (OMPI_SUCCESS != mca_fcoll_base_file_select (fh, NULL)) { + opal_output(1, "mca_fcoll_base_file_select() failed\n"); + return OMPI_ERROR; + } return OMPI_SUCCESS; } @@ -130,51 +222,33 @@ int mca_io_ompio_file_set_view (ompi_file_t *fp, OMPI_MPI_OFFSET_TYPE disp, ompi_datatype_t *etype, ompi_datatype_t *filetype, - char *datarep, + const char *datarep, ompi_info_t *info) { + int ret=OMPI_SUCCESS; mca_io_ompio_data_t *data; mca_io_ompio_file_t *fh; + mca_io_ompio_file_t *sh; data = (mca_io_ompio_data_t *) fp->f_io_selected_data; - fh = &data->ompio_fh; - - if (NULL != fh->f_decoded_iov) { - free (fh->f_decoded_iov); - fh->f_decoded_iov = NULL; - } - if (NULL != fh->f_datarep) { - free (fh->f_datarep); - fh->f_datarep = NULL; - } - - /* Reset the flags first */ - fh->f_flags = 0; - - fh->f_flags |= OMPIO_FILE_VIEW_IS_SET; - fh->f_datarep = strdup (datarep); - - mca_io_ompio_set_view_internal (fh, - disp, - etype, - filetype, - datarep, - info); - + /* we need to call the internal file set view twice: once for the individual + file pointer, once for the shared file pointer (if it is existent) + */ + fh = &data->ompio_fh; + ret = mca_io_ompio_set_view_internal(fh, disp, etype, filetype, datarep, info); - if (OMPI_SUCCESS != mca_fcoll_base_file_select (&data->ompio_fh, - NULL)) { - opal_output(1, "mca_fcoll_base_file_select() failed\n"); - return OMPI_ERROR; + if ( NULL != fh->f_sharedfp_data) { + sh = ((struct mca_sharedfp_base_data_t *)fh->f_sharedfp_data)->sharedfh; + ret = mca_io_ompio_set_view_internal(sh, disp, etype, filetype, datarep, info); } - return OMPI_SUCCESS; + return ret; } -int mca_io_ompio_file_get_view (struct ompi_file_t *fp, +int mca_io_ompio_file_get_view (struct ompi_file_t *fp, OMPI_MPI_OFFSET_TYPE *disp, - struct ompi_datatype_t **etype, + struct ompi_datatype_t **etype, struct ompi_datatype_t **filetype, char *datarep) { @@ -185,8 +259,8 @@ int mca_io_ompio_file_get_view (struct ompi_file_t *fp, fh = &data->ompio_fh; *disp = fh->f_disp; - ompi_datatype_duplicate (fh->f_etype, etype); - ompi_datatype_duplicate (fh->f_filetype, filetype); + datatype_duplicate (fh->f_etype, etype); + datatype_duplicate (fh->f_orig_filetype, filetype); strcpy (datarep, fh->f_datarep); return OMPI_SUCCESS; @@ -194,14 +268,14 @@ int mca_io_ompio_file_get_view (struct ompi_file_t *fp, OMPI_MPI_OFFSET_TYPE get_contiguous_chunk_size (mca_io_ompio_file_t *fh) { - int uniform = 0, global_uniform = 0; + int uniform = 0; OMPI_MPI_OFFSET_TYPE avg[3] = {0,0,0}; OMPI_MPI_OFFSET_TYPE global_avg[3] = {0,0,0}; int i = 0; - /* This function does two things: first, it determines the average data chunk - ** size in the file view for each process and across all processes. - ** Second, it establishes whether the view across all processes is uniform. + /* This function does two things: first, it determines the average data chunk + ** size in the file view for each process and across all processes. + ** Second, it establishes whether the view across all processes is uniform. ** By definition, uniform means: ** 1. the file view of each process has the same number of contiguous sections ** 2. each section in the file view has exactly the same size @@ -231,7 +305,11 @@ OMPI_MPI_OFFSET_TYPE get_contiguous_chunk_size (mca_io_ompio_file_t *fh) global_avg[0] = global_avg[0]/fh->f_size; global_avg[1] = global_avg[1]/fh->f_size; - if ( global_avg[0] == avg[0] && +#if 0 + /* Disabling the feature since we are not using it anyway. Saves us one allreduce operation. */ + int global_uniform=0; + + if ( global_avg[0] == avg[0] && global_avg[1] == avg[1] && 0 == avg[2] && 0 == global_avg[2] ) { @@ -242,7 +320,7 @@ OMPI_MPI_OFFSET_TYPE get_contiguous_chunk_size (mca_io_ompio_file_t *fh) } /* second confirmation round to see whether all processes agree - ** on having a uniform file view or not + ** on having a uniform file view or not */ fh->f_comm->c_coll.coll_allreduce (&uniform, &global_uniform, @@ -256,15 +334,57 @@ OMPI_MPI_OFFSET_TYPE get_contiguous_chunk_size (mca_io_ompio_file_t *fh) /* yes, everybody agrees on having a uniform file view */ fh->f_flags |= OMPIO_UNIFORM_FVIEW; } - +#endif return global_avg[0]; } +int mca_io_ompio_simple_grouping(mca_io_ompio_file_t *fh, + int *num_groups, + contg *contg_groups) +{ + size_t stripe_size = (size_t) fh->f_stripe_size; + int group_size = 0; + int k=0, p=0, g=0; + int total_procs = 0; + + if ( 0 < fh->f_stripe_size ) { + stripe_size = OMPIO_DEFAULT_STRIPE_SIZE; + } + + if ( 0 != fh->f_cc_size && stripe_size > fh->f_cc_size ) { + group_size = (((int)stripe_size/(int)fh->f_cc_size) > fh->f_size ) ? fh->f_size : ((int)stripe_size/(int)fh->f_cc_size); + *num_groups = fh->f_size / group_size; + } + else if ( fh->f_cc_size <= OMPIO_CONTG_FACTOR * stripe_size) { + *num_groups = fh->f_size/OMPIO_CONTG_FACTOR > 0 ? (fh->f_size/OMPIO_CONTG_FACTOR) : 1 ; + group_size = OMPIO_CONTG_FACTOR; + } + else { + *num_groups = fh->f_size; + group_size = 1; + } + + for ( k=0, p=0; p<*num_groups; p++ ) { + if ( p == (*num_groups - 1) ) { + contg_groups[p].procs_per_contg_group = fh->f_size - total_procs; + } + else { + contg_groups[p].procs_per_contg_group = group_size; + total_procs +=group_size; + } + for ( g=0; gf_decoded_iov[0].iov_len; } start_offset_len[2] = fh->f_rank; - - if( OMPIO_ROOT == fh->f_rank){ - start_offsets_lens = (OMPI_MPI_OFFSET_TYPE* )malloc (3 * fh->f_size * sizeof(OMPI_MPI_OFFSET_TYPE)); - if (NULL == start_offsets_lens) { - opal_output (1, "OUT OF MEMORY\n"); - return OMPI_ERR_OUT_OF_RESOURCE; - } - end_offsets = (OMPI_MPI_OFFSET_TYPE* )malloc (fh->f_size * sizeof(OMPI_MPI_OFFSET_TYPE)); - if (NULL == end_offsets) { - opal_output (1, "OUT OF MEMORY\n"); - free(start_offsets_lens); - return OMPI_ERR_OUT_OF_RESOURCE; - } + start_offsets_lens = (OMPI_MPI_OFFSET_TYPE* )malloc (3 * fh->f_size * sizeof(OMPI_MPI_OFFSET_TYPE)); + if (NULL == start_offsets_lens) { + opal_output (1, "OUT OF MEMORY\n"); + return OMPI_ERR_OUT_OF_RESOURCE; } - //Gather start offsets across processes in a group on aggregator - fh->f_comm->c_coll.coll_gather (start_offset_len, - 3, - OMPI_OFFSET_DATATYPE, - start_offsets_lens, - 3, - OMPI_OFFSET_DATATYPE, - OMPIO_ROOT, - fh->f_comm, - fh->f_comm->c_coll.coll_gather_module); - + end_offsets = (OMPI_MPI_OFFSET_TYPE* )malloc (fh->f_size * sizeof(OMPI_MPI_OFFSET_TYPE)); + if (NULL == end_offsets) { + opal_output (1, "OUT OF MEMORY\n"); + free(start_offsets_lens); + return OMPI_ERR_OUT_OF_RESOURCE; + } + + //Allgather start offsets across processes in a group on aggregator + fh->f_comm->c_coll.coll_allgather (start_offset_len, + 3, + OMPI_OFFSET_DATATYPE, + start_offsets_lens, + 3, + OMPI_OFFSET_DATATYPE, + fh->f_comm, + fh->f_comm->c_coll.coll_allgather_module); + //Calculate contg chunk size and contg subgroups - if(OMPIO_ROOT == fh->f_rank){ - for( k = 0 ; k < fh->f_size; k++){ - end_offsets[k] = start_offsets_lens[3*k] + start_offsets_lens[3*k+1]; - contg_groups[k].contg_chunk_size = 0; + for( k = 0 ; k < fh->f_size; k++){ + end_offsets[k] = start_offsets_lens[3*k] + start_offsets_lens[3*k+1]; + contg_groups[k].contg_chunk_size = 0; + } + k = 0; + while( k < fh->f_size){ + if( k == 0){ + contg_groups[p].contg_chunk_size += start_offsets_lens[3*k+1]; + contg_groups[p].procs_in_contg_group[g] = start_offsets_lens[3*k + 2]; + g++; + contg_groups[p].procs_per_contg_group = g; + k++; } - k = 0; - while( k < fh->f_size){ - if( k == 0){ - contg_groups[p].contg_chunk_size += start_offsets_lens[3*k+1]; - contg_groups[p].procs_in_contg_group[g] = start_offsets_lens[3*k + 2]; - g++; - contg_groups[p].procs_per_contg_group = g; - k++; - } - else if( start_offsets_lens[3*k] == end_offsets[k - 1] ){ - contg_groups[p].contg_chunk_size += start_offsets_lens[3*k+1]; - contg_groups[p].procs_in_contg_group[g] = start_offsets_lens[3*k + 2]; - g++; - contg_groups[p].procs_per_contg_group = g; - k++; - } - else{ - p++; - g = 0; - contg_groups[p].contg_chunk_size += start_offsets_lens[3*k+1]; - contg_groups[p].procs_in_contg_group[g] = start_offsets_lens[3*k + 2]; - g++; - contg_groups[p].procs_per_contg_group = g; - k++; - } + else if( start_offsets_lens[3*k] == end_offsets[k - 1] ){ + contg_groups[p].contg_chunk_size += start_offsets_lens[3*k+1]; + contg_groups[p].procs_in_contg_group[g] = start_offsets_lens[3*k + 2]; + g++; + contg_groups[p].procs_per_contg_group = g; + k++; + } + else{ + p++; + g = 0; + contg_groups[p].contg_chunk_size += start_offsets_lens[3*k+1]; + contg_groups[p].procs_in_contg_group[g] = start_offsets_lens[3*k + 2]; + g++; + contg_groups[p].procs_per_contg_group = g; + k++; } - - *num_groups = p+1; - if (NULL != start_offsets_lens) { - free (start_offsets_lens); - start_offsets_lens = NULL; - } - if (NULL != end_offsets) { - free (end_offsets); - end_offsets = NULL; - } } - - //bcast num_groups to all procs - fh->f_comm->c_coll.coll_bcast (num_groups, - 1, - MPI_INT, - OMPIO_ROOT, - fh->f_comm, - fh->f_comm->c_coll.coll_bcast_module); - - + + *num_groups = p+1; + free (start_offsets_lens); + free (end_offsets); + return OMPI_SUCCESS; } @@ -371,106 +471,35 @@ int mca_io_ompio_finalize_initial_grouping(mca_io_ompio_file_t *fh, int z = 0; int y = 0; - int r = 0; - - MPI_Request *sendreq = NULL , *req = NULL; - - - req = (MPI_Request *)malloc (2* sizeof(MPI_Request)); - if (NULL == req) { - return OMPI_ERR_OUT_OF_RESOURCE; - } fh->f_init_num_aggrs = num_groups; fh->f_init_aggr_list = (int*)malloc (fh->f_init_num_aggrs * sizeof(int)); if (NULL == fh->f_init_aggr_list) { opal_output (1, "OUT OF MEMORY\n"); - free(req); return OMPI_ERR_OUT_OF_RESOURCE; } - if(OMPIO_ROOT == fh->f_rank){ - sendreq = (MPI_Request *)malloc ( 2 *fh->f_size * sizeof(MPI_Request)); - if (NULL == sendreq) { - free(req); - return OMPI_ERR_OUT_OF_RESOURCE; - } - - for( z = 0 ;z < num_groups; z++){ - for( y = 0; y < contg_groups[z].procs_per_contg_group; y++){ - MCA_PML_CALL(isend(&contg_groups[z].procs_per_contg_group, - 1, - MPI_INT, - contg_groups[z].procs_in_contg_group[y], - OMPIO_PROCS_PER_GROUP_TAG, - MCA_PML_BASE_SEND_STANDARD, - fh->f_comm, - &sendreq[r++])); - - //send initial grouping distribution to all processes in the group - MCA_PML_CALL(isend(contg_groups[z].procs_in_contg_group, - contg_groups[z].procs_per_contg_group, - MPI_INT, - contg_groups[z].procs_in_contg_group[y], - OMPIO_PROCS_IN_GROUP_TAG, - MCA_PML_BASE_SEND_STANDARD, - fh->f_comm, - &sendreq[r++])); - } - } - } - - //All processes receive initial procs per group from OMPIO_ROOT - MCA_PML_CALL(irecv(&fh->f_init_procs_per_group, - 1, - MPI_INT, - OMPIO_ROOT, - OMPIO_PROCS_PER_GROUP_TAG, - fh->f_comm, - &req[0])); - - ompi_request_wait (&req[0], MPI_STATUS_IGNORE); - fh->f_init_procs_in_group = (int*)malloc (fh->f_init_procs_per_group * sizeof(int)); - if (NULL == fh->f_init_procs_in_group) { - opal_output (1, "OUT OF MEMORY\n"); - free(req); - if (NULL != sendreq) { - free(sendreq); + for( z = 0 ;z < num_groups; z++){ + for( y = 0; y < contg_groups[z].procs_per_contg_group; y++){ + if ( fh->f_rank == contg_groups[z].procs_in_contg_group[y] ) { + fh->f_init_procs_per_group = contg_groups[z].procs_per_contg_group; + fh->f_init_procs_in_group = (int*)malloc (fh->f_init_procs_per_group * sizeof(int)); + if (NULL == fh->f_init_procs_in_group) { + opal_output (1, "OUT OF MEMORY\n"); + return OMPI_ERR_OUT_OF_RESOURCE; + } + memcpy ( fh->f_init_procs_in_group, contg_groups[z].procs_in_contg_group, + contg_groups[z].procs_per_contg_group * sizeof (int)); + + } } - return OMPI_ERR_OUT_OF_RESOURCE; } - //All processes receive initial process distribution from OMPIO_ROOT - MCA_PML_CALL(irecv(fh->f_init_procs_in_group, - fh->f_init_procs_per_group, - MPI_INT, - OMPIO_ROOT, - OMPIO_PROCS_IN_GROUP_TAG, - fh->f_comm, - &req[1])); - - ompi_request_wait (&req[1], MPI_STATUS_IGNORE); - free (req); - if(OMPIO_ROOT == fh->f_rank){ - ompi_request_wait_all (r, sendreq, MPI_STATUSES_IGNORE); - free (sendreq); - } - - - /*set initial aggregator list */ - //OMPIO_ROOT broadcasts aggr list - if(OMPIO_ROOT == fh->f_rank){ - for( z = 0 ;z < num_groups; z++){ - fh->f_init_aggr_list[z] = contg_groups[z].procs_in_contg_group[0]; - } - } - - fh->f_comm->c_coll.coll_bcast (fh->f_init_aggr_list, - num_groups, - MPI_INT, - OMPIO_ROOT, - fh->f_comm, - fh->f_comm->c_coll.coll_bcast_module); - + + for( z = 0 ;z < num_groups; z++){ + fh->f_init_aggr_list[z] = contg_groups[z].procs_in_contg_group[0]; + } + + return OMPI_SUCCESS; } diff --git a/ompi/mca/io/ompio/io_ompio_file_write.c b/ompi/mca/io/ompio/io_ompio_file_write.c index 1cc22ef7366..637cafe8637 100644 --- a/ompi/mca/io/ompio/io_ompio_file_write.c +++ b/ompi/mca/io/ompio/io_ompio_file_write.c @@ -1,20 +1,22 @@ /* - * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2005 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * Copyright (c) 2008-2015 University of Houston. All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ + * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2016 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2008-2015 University of Houston. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ */ #include "ompi_config.h" @@ -36,13 +38,13 @@ #include "math.h" #include -/* Read and write routines are split into two interfaces. -** The -** mca_io_ompio_file_read/write[_at] -** +/* Read and write routines are split into two interfaces. +** The +** mca_io_ompio_file_read/write[_at] +** ** routines are the ones registered with the ompio modules. ** The -** +** ** ompio_io_ompio_file_read/write[_at] ** ** routesin are used e.g. from the shared file pointer modules. @@ -53,7 +55,7 @@ int mca_io_ompio_file_write (ompi_file_t *fp, - void *buf, + const void *buf, int count, struct ompi_datatype_t *datatype, ompi_status_public_t *status) @@ -70,7 +72,7 @@ int mca_io_ompio_file_write (ompi_file_t *fp, } int ompio_io_ompio_file_write (mca_io_ompio_file_t *fh, - void *buf, + const void *buf, int count, struct ompi_datatype_t *datatype, ompi_status_public_t *status) @@ -83,7 +85,7 @@ int ompio_io_ompio_file_write (mca_io_ompio_file_t *fh, struct iovec *decoded_iov = NULL; size_t bytes_per_cycle=0; size_t total_bytes_written = 0; - size_t max_data=0, real_bytes_written=0; + size_t max_data=0, real_bytes_written=0; ssize_t ret_code=0; int i = 0; /* index into the decoded iovec of the buffer */ int j = 0; /* index into the file view iovec */ @@ -95,15 +97,20 @@ int ompio_io_ompio_file_write (mca_io_ompio_file_t *fh, return ret; } - ompi_io_ompio_decode_datatype (fh, - datatype, - count, - buf, - &max_data, - &decoded_iov, + ompi_io_ompio_decode_datatype (fh, + datatype, + count, + buf, + &max_data, + &decoded_iov, &iov_count); - bytes_per_cycle = mca_io_ompio_cycle_buffer_size; + if ( -1 == mca_io_ompio_cycle_buffer_size ) { + bytes_per_cycle = max_data; + } + else { + bytes_per_cycle = mca_io_ompio_cycle_buffer_size; + } cycles = ceil((float)max_data/bytes_per_cycle); #if 0 @@ -112,15 +119,15 @@ int ompio_io_ompio_file_write (mca_io_ompio_file_t *fh, j = fh->f_index_in_file_view; for (index = 0; index < cycles; index++) { - mca_io_ompio_build_io_array ( fh, - index, - cycles, - bytes_per_cycle, - max_data, - iov_count, - decoded_iov, - &i, - &j, + mca_io_ompio_build_io_array ( fh, + index, + cycles, + bytes_per_cycle, + max_data, + iov_count, + decoded_iov, + &i, + &j, &total_bytes_written); if (fh->f_num_of_io_entries) { @@ -151,7 +158,7 @@ int ompio_io_ompio_file_write (mca_io_ompio_file_t *fh, int mca_io_ompio_file_write_at (ompi_file_t *fh, OMPI_MPI_OFFSET_TYPE offset, - void *buf, + const void *buf, int count, struct ompi_datatype_t *datatype, ompi_status_public_t *status) @@ -167,7 +174,7 @@ int mca_io_ompio_file_write_at (ompi_file_t *fh, int ompio_io_ompio_file_write_at (mca_io_ompio_file_t *fh, OMPI_MPI_OFFSET_TYPE offset, - void *buf, + const void *buf, int count, struct ompi_datatype_t *datatype, ompi_status_public_t *status) @@ -190,7 +197,7 @@ int ompio_io_ompio_file_write_at (mca_io_ompio_file_t *fh, } int mca_io_ompio_file_iwrite (ompi_file_t *fp, - void *buf, + const void *buf, int count, struct ompi_datatype_t *datatype, ompi_request_t **request) @@ -205,7 +212,7 @@ int mca_io_ompio_file_iwrite (ompi_file_t *fp, } int ompio_io_ompio_file_iwrite (mca_io_ompio_file_t *fh, - void *buf, + const void *buf, int count, struct ompi_datatype_t *datatype, ompi_request_t **request) @@ -215,11 +222,14 @@ int ompio_io_ompio_file_iwrite (mca_io_ompio_file_t *fh, ompio_req = OBJ_NEW(mca_ompio_request_t); ompio_req->req_type = MCA_OMPIO_REQUEST_WRITE; + ompio_req->req_ompi.req_state = OMPI_REQUEST_ACTIVE; - if ( 0 == count ) { - ompi_request_complete (&ompio_req->req_ompi, 0); + if ( 0 == count ) { ompio_req->req_ompi.req_status.MPI_ERROR = OMPI_SUCCESS; ompio_req->req_ompi.req_status._ucount = 0; + ompi_request_complete (&ompio_req->req_ompi, false); + *request = (ompi_request_t *) ompio_req; + return OMPI_SUCCESS; } @@ -228,43 +238,43 @@ int ompio_io_ompio_file_iwrite (mca_io_ompio_file_t *fh, uint32_t iov_count = 0; struct iovec *decoded_iov = NULL; - size_t max_data = 0; + size_t max_data = 0; size_t total_bytes_written =0; int i = 0; /* index into the decoded iovec of the buffer */ int j = 0; /* index into the file vie iovec */ - ompi_io_ompio_decode_datatype (fh, - datatype, - count, - buf, - &max_data, - &decoded_iov, + ompi_io_ompio_decode_datatype (fh, + datatype, + count, + buf, + &max_data, + &decoded_iov, &iov_count); j = fh->f_index_in_file_view; /* Non blocking operations have to occur in a single cycle */ - mca_io_ompio_build_io_array ( fh, + mca_io_ompio_build_io_array ( fh, 0, // index of current cycle iteration 1, // number of cycles - max_data, // setting bytes_per_cycle to max_data - max_data, - iov_count, - decoded_iov, - &i, - &j, + max_data, // setting bytes_per_cycle to max_data + max_data, + iov_count, + decoded_iov, + &i, + &j, &total_bytes_written); - + if (fh->f_num_of_io_entries) { fh->f_fbtl->fbtl_ipwritev (fh, (ompi_request_t *) ompio_req); } - + if ( false == mca_io_ompio_progress_is_registered ) { // Lazy initialization of progress function to minimize impact // on other ompi functionality in case its not used. opal_progress_register (mca_io_ompio_component_progress); mca_io_ompio_progress_is_registered=true; } - + fh->f_num_of_io_entries = 0; if (NULL != fh->f_io_array) { free (fh->f_io_array); @@ -279,10 +289,10 @@ int ompio_io_ompio_file_iwrite (mca_io_ompio_file_t *fh, // This fbtl does not support non-blocking write operations ompi_status_public_t status; ret = ompio_io_ompio_file_write(fh,buf,count,datatype, &status); - - ompi_request_complete (&ompio_req->req_ompi, 0); + ompio_req->req_ompi.req_status.MPI_ERROR = ret; ompio_req->req_ompi.req_status._ucount = status._ucount; + ompi_request_complete (&ompio_req->req_ompi, false); } *request = (ompi_request_t *) ompio_req; @@ -291,14 +301,14 @@ int ompio_io_ompio_file_iwrite (mca_io_ompio_file_t *fh, int mca_io_ompio_file_iwrite_at (ompi_file_t *fh, OMPI_MPI_OFFSET_TYPE offset, - void *buf, + const void *buf, int count, struct ompi_datatype_t *datatype, ompi_request_t **request) { int ret = OMPI_SUCCESS; mca_io_ompio_data_t *data; - + data = (mca_io_ompio_data_t *) fh->f_io_selected_data; ret = ompio_io_ompio_file_iwrite_at(&data->ompio_fh,offset,buf,count,datatype,request); @@ -307,7 +317,7 @@ int mca_io_ompio_file_iwrite_at (ompi_file_t *fh, int ompio_io_ompio_file_iwrite_at (mca_io_ompio_file_t *fh, OMPI_MPI_OFFSET_TYPE offset, - void *buf, + const void *buf, int count, struct ompi_datatype_t *datatype, ompi_request_t **request) @@ -326,9 +336,9 @@ int ompio_io_ompio_file_iwrite_at (mca_io_ompio_file_t *fh, /* An explicit offset file operation is not suppsed to modify ** the internal file pointer. So reset the pointer ** to the previous value - ** It is OK to reset the position already here, althgouth + ** It is OK to reset the position already here, althgouth ** the operation might still be pending/ongoing, since - ** the entire array of have + ** the entire array of have ** already been constructed in the file_iwrite operation */ ompi_io_ompio_set_explicit_offset (fh, prev_offset); @@ -339,9 +349,9 @@ int ompio_io_ompio_file_iwrite_at (mca_io_ompio_file_t *fh, /* Helper function used by both read and write operations */ /**************************************************************/ -int mca_io_ompio_build_io_array ( mca_io_ompio_file_t *fh, int index, int cycles, - size_t bytes_per_cycle, int max_data, uint32_t iov_count, - struct iovec *decoded_iov, int *ii, int *jj, size_t *tbw ) +int mca_io_ompio_build_io_array ( mca_io_ompio_file_t *fh, int index, int cycles, + size_t bytes_per_cycle, int max_data, uint32_t iov_count, + struct iovec *decoded_iov, int *ii, int *jj, size_t *tbw ) { OPAL_PTRDIFF_TYPE disp; int block = 1; @@ -352,7 +362,7 @@ int mca_io_ompio_build_io_array ( mca_io_ompio_file_t *fh, int index, int cycles int k = 0; /* index into the io_array */ int i = *ii; int j = *jj; - + sum_previous_length = fh->f_position_in_file_view; if ((index == cycles-1) && (max_data % bytes_per_cycle)) { @@ -361,49 +371,49 @@ int mca_io_ompio_build_io_array ( mca_io_ompio_file_t *fh, int index, int cycles else { bytes_to_write_in_cycle = bytes_per_cycle; } - - fh->f_io_array = (mca_io_ompio_io_array_t *)malloc + + fh->f_io_array = (mca_io_ompio_io_array_t *)malloc (OMPIO_IOVEC_INITIAL_SIZE * sizeof (mca_io_ompio_io_array_t)); if (NULL == fh->f_io_array) { opal_output(1, "OUT OF MEMORY\n"); return OMPI_ERR_OUT_OF_RESOURCE; } - + while (bytes_to_write_in_cycle) { /* reallocate if needed */ if (OMPIO_IOVEC_INITIAL_SIZE*block <= k) { block ++; - fh->f_io_array = (mca_io_ompio_io_array_t *)realloc - (fh->f_io_array, OMPIO_IOVEC_INITIAL_SIZE * + fh->f_io_array = (mca_io_ompio_io_array_t *)realloc + (fh->f_io_array, OMPIO_IOVEC_INITIAL_SIZE * block * sizeof (mca_io_ompio_io_array_t)); if (NULL == fh->f_io_array) { opal_output(1, "OUT OF MEMORY\n"); return OMPI_ERR_OUT_OF_RESOURCE; } } - - if (decoded_iov[i].iov_len - + + if (decoded_iov[i].iov_len - (total_bytes_written - sum_previous_counts) <= 0) { sum_previous_counts += decoded_iov[i].iov_len; i = i + 1; } - - disp = (OPAL_PTRDIFF_TYPE)decoded_iov[i].iov_base + + + disp = (OPAL_PTRDIFF_TYPE)decoded_iov[i].iov_base + (total_bytes_written - sum_previous_counts); fh->f_io_array[k].memory_address = (IOVBASE_TYPE *)disp; - - if (decoded_iov[i].iov_len - - (total_bytes_written - sum_previous_counts) >= + + if (decoded_iov[i].iov_len - + (total_bytes_written - sum_previous_counts) >= bytes_to_write_in_cycle) { fh->f_io_array[k].length = bytes_to_write_in_cycle; } else { - fh->f_io_array[k].length = decoded_iov[i].iov_len - + fh->f_io_array[k].length = decoded_iov[i].iov_len - (total_bytes_written - sum_previous_counts); } - - if (! (fh->f_flags & OMPIO_CONTIGUOUS_FVIEW)) { - if (fh->f_decoded_iov[j].iov_len - + + if (! (fh->f_flags & OMPIO_CONTIGUOUS_FVIEW)) { + if (fh->f_decoded_iov[j].iov_len - (fh->f_total_bytes - sum_previous_length) <= 0) { sum_previous_length += fh->f_decoded_iov[j].iov_len; j = j + 1; @@ -416,21 +426,21 @@ int mca_io_ompio_build_io_array ( mca_io_ompio_file_t *fh, int index, int cycles fh->f_total_bytes = 0; } } - } - - disp = (OPAL_PTRDIFF_TYPE)fh->f_decoded_iov[j].iov_base + + } + + disp = (OPAL_PTRDIFF_TYPE)fh->f_decoded_iov[j].iov_base + (fh->f_total_bytes - sum_previous_length); fh->f_io_array[k].offset = (IOVBASE_TYPE *)(intptr_t)(disp + fh->f_offset); - - if (! (fh->f_flags & OMPIO_CONTIGUOUS_FVIEW)) { - if (fh->f_decoded_iov[j].iov_len - - (fh->f_total_bytes - sum_previous_length) + + if (! (fh->f_flags & OMPIO_CONTIGUOUS_FVIEW)) { + if (fh->f_decoded_iov[j].iov_len - + (fh->f_total_bytes - sum_previous_length) < fh->f_io_array[k].length) { - fh->f_io_array[k].length = fh->f_decoded_iov[j].iov_len - + fh->f_io_array[k].length = fh->f_decoded_iov[j].iov_len - (fh->f_total_bytes - sum_previous_length); } } - + total_bytes_written += fh->f_io_array[k].length; fh->f_total_bytes += fh->f_io_array[k].length; bytes_to_write_in_cycle -= fh->f_io_array[k].length; @@ -439,12 +449,12 @@ int mca_io_ompio_build_io_array ( mca_io_ompio_file_t *fh, int index, int cycles fh->f_position_in_file_view = sum_previous_length; fh->f_index_in_file_view = j; fh->f_num_of_io_entries = k; - + #if 0 if (fh->f_rank == 0) { int d; printf("*************************** %d\n", fh->f_num_of_io_entries); - + for (d=0 ; df_num_of_io_entries ; d++) { printf(" ADDRESS: %p OFFSET: %p LENGTH: %d\n", fh->f_io_array[d].memory_address, @@ -464,7 +474,7 @@ int mca_io_ompio_build_io_array ( mca_io_ompio_file_t *fh, int index, int cycles /******************************************************************/ int mca_io_ompio_file_write_all (ompi_file_t *fh, - void *buf, + const void *buf, int count, struct ompi_datatype_t *datatype, ompi_status_public_t *status) @@ -475,12 +485,12 @@ int mca_io_ompio_file_write_all (ompi_file_t *fh, data = (mca_io_ompio_data_t *) fh->f_io_selected_data; ret = data->ompio_fh. - f_fcoll->fcoll_file_write_all (&data->ompio_fh, - buf, - count, + f_fcoll->fcoll_file_write_all (&data->ompio_fh, + buf, + count, datatype, status); - + if ( MPI_STATUS_IGNORE != status ) { size_t size; @@ -493,7 +503,7 @@ int mca_io_ompio_file_write_all (ompi_file_t *fh, int mca_io_ompio_file_write_at_all (ompi_file_t *fh, OMPI_MPI_OFFSET_TYPE offset, - void *buf, + const void *buf, int count, struct ompi_datatype_t *datatype, ompi_status_public_t *status) @@ -508,7 +518,7 @@ int mca_io_ompio_file_write_at_all (ompi_file_t *fh, } int mca_io_ompio_file_iwrite_all (ompi_file_t *fh, - void *buf, + const void *buf, int count, struct ompi_datatype_t *datatype, ompi_request_t **request) @@ -521,15 +531,15 @@ int mca_io_ompio_file_iwrite_all (ompi_file_t *fh, fp = &data->ompio_fh; if ( NULL != fp->f_fcoll->fcoll_file_iwrite_all ) { - ret = fp->f_fcoll->fcoll_file_iwrite_all (&data->ompio_fh, - buf, - count, + ret = fp->f_fcoll->fcoll_file_iwrite_all (&data->ompio_fh, + buf, + count, datatype, request); } else { - /* this fcoll component does not support non-blocking - collective I/O operations. WE fake it with + /* this fcoll component does not support non-blocking + collective I/O operations. WE fake it with individual non-blocking I/O operations. */ ret = ompio_io_ompio_file_iwrite ( fp, buf, count, datatype, request ); } @@ -539,7 +549,7 @@ int mca_io_ompio_file_iwrite_all (ompi_file_t *fh, int ompio_io_ompio_file_write_at_all (mca_io_ompio_file_t *fh, OMPI_MPI_OFFSET_TYPE offset, - void *buf, + const void *buf, int count, struct ompi_datatype_t *datatype, ompi_status_public_t *status) @@ -561,48 +571,62 @@ int ompio_io_ompio_file_write_at_all (mca_io_ompio_file_t *fh, int mca_io_ompio_file_iwrite_at_all (ompi_file_t *fh, OMPI_MPI_OFFSET_TYPE offset, - void *buf, + const void *buf, int count, struct ompi_datatype_t *datatype, ompi_request_t **request) { int ret = OMPI_SUCCESS; mca_io_ompio_data_t *data; - mca_io_ompio_file_t *fp=NULL; - OMPI_MPI_OFFSET_TYPE prev_offset; data = (mca_io_ompio_data_t *) fh->f_io_selected_data; - fp = &data->ompio_fh; + ret = ompio_io_ompio_file_iwrite_at_all ( &data->ompio_fh, offset, buf, count, datatype, request ); + return ret; +} + +int ompio_io_ompio_file_iwrite_at_all (mca_io_ompio_file_t *fp, + OMPI_MPI_OFFSET_TYPE offset, + const void *buf, + int count, + struct ompi_datatype_t *datatype, + ompi_request_t **request) +{ + + int ret = OMPI_SUCCESS; + OMPI_MPI_OFFSET_TYPE prev_offset; + ompio_io_ompio_file_get_position (fp, &prev_offset ); ompi_io_ompio_set_explicit_offset (fp, offset); if ( NULL != fp->f_fcoll->fcoll_file_iwrite_all ) { - ret = fp->f_fcoll->fcoll_file_iwrite_all (&data->ompio_fh, - buf, - count, + ret = fp->f_fcoll->fcoll_file_iwrite_all (fp, + buf, + count, datatype, request); } else { - /* this fcoll component does not support non-blocking - collective I/O operations. WE fake it with + /* this fcoll component does not support non-blocking + collective I/O operations. WE fake it with individual non-blocking I/O operations. */ ret = ompio_io_ompio_file_iwrite ( fp, buf, count, datatype, request ); } - ompi_io_ompio_set_explicit_offset (fp, prev_offset); return ret; } + + + /* Infrastructure for shared file pointer operations */ /* (Individual and collective */ /******************************************************/ int mca_io_ompio_file_write_shared (ompi_file_t *fp, - void *buf, + const void *buf, int count, struct ompi_datatype_t *datatype, ompi_status_public_t * status) @@ -627,7 +651,7 @@ int mca_io_ompio_file_write_shared (ompi_file_t *fp, } int mca_io_ompio_file_iwrite_shared (ompi_file_t *fp, - void *buf, + const void *buf, int count, struct ompi_datatype_t *datatype, ompi_request_t **request) @@ -652,7 +676,7 @@ int mca_io_ompio_file_iwrite_shared (ompi_file_t *fp, } int mca_io_ompio_file_write_ordered (ompi_file_t *fp, - void *buf, + const void *buf, int count, struct ompi_datatype_t *datatype, ompi_status_public_t * status) @@ -677,7 +701,7 @@ int mca_io_ompio_file_write_ordered (ompi_file_t *fp, } int mca_io_ompio_file_write_ordered_begin (ompi_file_t *fp, - void *buf, + const void *buf, int count, struct ompi_datatype_t *datatype) { @@ -701,7 +725,7 @@ int mca_io_ompio_file_write_ordered_begin (ompi_file_t *fp, } int mca_io_ompio_file_write_ordered_end (ompi_file_t *fp, - void *buf, + const void *buf, ompi_status_public_t *status) { int ret = OMPI_SUCCESS; @@ -727,7 +751,7 @@ int mca_io_ompio_file_write_ordered_end (ompi_file_t *fp, /* Split collectives . Not really used but infrastructure is in place */ /**********************************************************************/ int mca_io_ompio_file_write_all_begin (ompi_file_t *fh, - void *buf, + const void *buf, int count, struct ompi_datatype_t *datatype) { @@ -748,7 +772,7 @@ int mca_io_ompio_file_write_all_begin (ompi_file_t *fh, } int mca_io_ompio_file_write_all_end (ompi_file_t *fh, - void *buf, + const void *buf, ompi_status_public_t *status) { int ret = OMPI_SUCCESS; @@ -767,56 +791,42 @@ int mca_io_ompio_file_write_all_end (ompi_file_t *fh, int mca_io_ompio_file_write_at_all_begin (ompi_file_t *fh, OMPI_MPI_OFFSET_TYPE offset, - void *buf, + const void *buf, int count, struct ompi_datatype_t *datatype) { int ret = OMPI_SUCCESS; - mca_io_ompio_data_t *data; + mca_io_ompio_data_t *data=NULL; + mca_io_ompio_file_t *fp=NULL; data = (mca_io_ompio_data_t *) fh->f_io_selected_data; - ret = ompio_io_ompio_file_write_at_all_begin ( &data->ompio_fh, offset, buf, count, datatype ); - return ret; -} - -int ompio_io_ompio_file_write_at_all_begin (mca_io_ompio_file_t *fh, - OMPI_MPI_OFFSET_TYPE offset, - void *buf, - int count, - struct ompi_datatype_t *datatype) -{ - int ret = OMPI_SUCCESS; + fp = &data->ompio_fh; - if ( true == fh->f_split_coll_in_use ) { + if ( true == fp->f_split_coll_in_use ) { printf("Only one split collective I/O operation allowed per file handle at any given point in time!\n"); return MPI_ERR_REQUEST; } - ret = mca_io_ompio_file_iwrite_at_all ( fh->f_fh, offset, buf, count, datatype, &fh->f_split_coll_req ); - fh->f_split_coll_in_use = true; + ret = ompio_io_ompio_file_iwrite_at_all ( fp, offset, buf, count, datatype, &fp->f_split_coll_req ); + fp->f_split_coll_in_use = true; + return ret; } + int mca_io_ompio_file_write_at_all_end (ompi_file_t *fh, - void *buf, + const void *buf, ompi_status_public_t * status) { int ret = OMPI_SUCCESS; mca_io_ompio_data_t *data; + mca_io_ompio_file_t *fp=NULL; data = (mca_io_ompio_data_t *) fh->f_io_selected_data; - ret = ompio_io_ompio_file_read_at_all_end ( &data->ompio_fh, buf, status ); - return ret; -} - -int ompio_io_ompio_file_write_at_all_end (mca_io_ompio_file_t *fh, - void *buf, - ompi_status_public_t * status) -{ - int ret = OMPI_SUCCESS; - ret = ompi_request_wait ( &fh->f_split_coll_req, status ); + fp = &data->ompio_fh; + ret = ompi_request_wait ( &fp->f_split_coll_req, status ); /* remove the flag again */ - fh->f_split_coll_in_use = false; + fp->f_split_coll_in_use = false; + return ret; } - diff --git a/ompi/mca/io/ompio/io_ompio_module.c b/ompi/mca/io/ompio/io_ompio_module.c index 5f10cfa1625..cbdaf2e0dd8 100644 --- a/ompi/mca/io/ompio/io_ompio_module.c +++ b/ompi/mca/io/ompio/io_ompio_module.c @@ -5,15 +5,15 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2008-2011 University of Houston. All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ #include "ompi_config.h" @@ -49,6 +49,8 @@ mca_io_base_module_2_0_0_t mca_io_ompio_module = { mca_io_ompio_file_iread_at, mca_io_ompio_file_iwrite_at, + mca_io_ompio_file_iread_at_all, + mca_io_ompio_file_iwrite_at_all, /* non-indexed IO operations */ mca_io_ompio_file_read, @@ -58,6 +60,8 @@ mca_io_base_module_2_0_0_t mca_io_ompio_module = { mca_io_ompio_file_iread, mca_io_ompio_file_iwrite, + mca_io_ompio_file_iread_all, + mca_io_ompio_file_iwrite_all, mca_io_ompio_file_seek, mca_io_ompio_file_get_position, diff --git a/ompi/mca/io/ompio/io_ompio_nbc.c b/ompi/mca/io/ompio/io_ompio_nbc.c deleted file mode 100644 index 32e8aa1aeeb..00000000000 --- a/ompi/mca/io/ompio/io_ompio_nbc.c +++ /dev/null @@ -1,551 +0,0 @@ -/* -*- Mode: C; c-basic-offset:4 ; -*- */ -/* - * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2007 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * Copyright (c) 2008-2011 University of Houston. All rights reserved. - * Copyright (c) 2015 Research Organization for Information Science - * and Technology (RIST). All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#include "ompi_config.h" - -#include "ompi/runtime/params.h" -#include "ompi/communicator/communicator.h" -#include "opal/datatype/opal_convertor.h" -#include "opal/datatype/opal_datatype.h" -#include "ompi/datatype/ompi_datatype.h" -#include "ompi/info/info.h" -#include "ompi/request/request.h" - -#include -#include -#include "io_ompio_nbc.h" - - - -int mca_io_ompio_get_f_aggregator_index (ompi_file_t *fh) -{ - mca_io_ompio_data_t *data; - mca_io_ompio_file_t *file; - - data = (mca_io_ompio_data_t *) fh->f_io_selected_data; - file = &data->ompio_fh; - - return file->f_aggregator_index; -} - -int mca_io_ompio_get_f_num_of_io_entries(ompi_file_t *fh) -{ - - mca_io_ompio_data_t *data; - mca_io_ompio_file_t *file; - - data = (mca_io_ompio_data_t *) fh->f_io_selected_data; - file = &data->ompio_fh; - - return file->f_num_of_io_entries; -} - -int mca_io_ompio_get_fcoll_dynamic_num_io_procs (int *num_procs) -{ - int param; - - param = mca_base_var_find("ompi", "fcoll", "dynamic", "num_io_procs"); - if (param >= 0){ - const int *value = NULL; - mca_base_var_get_value(param, &value, NULL, NULL); - *num_procs = value[0]; -/* printf("num procs : %d\n", num_procs);*/ - return OMPI_SUCCESS; - } - else - return -1; - -} - -int mca_io_ompio_get_fcoll_dynamic_constant_cbs (int *constant_cbs) -{ - int param; - - param = mca_base_var_find("ompi", "fcoll", "dynamic", "constant_cbs"); - if (param >= 0){ - const int *value = NULL; - mca_base_var_get_value(param, &value, NULL, NULL); - *constant_cbs = value[0]; -/* printf ("constant_cbs: %d\n", constant_cbs);*/ - return OMPI_SUCCESS; - } - else{ - constant_cbs[0] = -1; - return OMPI_SUCCESS; - } - -} - -int mca_io_ompio_get_fcoll_dynamic_cycle_buffer_size (int *cycle_buffer_size) -{ - - int param; - - param = mca_base_var_find("ompi", "fcoll", "dynamic", "cycle_buffer_size"); - if (param >= 0){ - const int *value = NULL; - mca_base_var_get_value(param, &value, NULL, NULL); - *cycle_buffer_size = value[0]; -/* printf ("cycle_buffer_size : %d\n", *cycle_buffer_size);*/ - return OMPI_SUCCESS; - } - else - return -1; - -} - -int mca_io_ompio_get_f_io_array(ompi_file_t *fh, - mca_io_ompio_io_array_t **f_io_array) -{ - - mca_io_ompio_data_t *data; - mca_io_ompio_file_t *file; - - data = (mca_io_ompio_data_t *) fh->f_io_selected_data; - file = &data->ompio_fh; - *f_io_array = file->f_io_array; - return OMPI_SUCCESS; -} - -int mca_io_ompio_get_f_comm(ompi_file_t *fh, ompi_communicator_t **value) -{ - mca_io_ompio_data_t *data; - mca_io_ompio_file_t *file; - - data = (mca_io_ompio_data_t *)fh->f_io_selected_data; - file = &(data->ompio_fh); - *value = file->f_comm; - - return OMPI_SUCCESS; -} - -int mca_io_ompio_get_iov_type(ompi_file_t *fh, ompi_datatype_t **value) -{ - mca_io_ompio_data_t *data; - mca_io_ompio_file_t *file; - - data = (mca_io_ompio_data_t *)fh->f_io_selected_data; - file = &data->ompio_fh; - - *value = file->f_iov_type; - return OMPI_SUCCESS; -} - -int mca_io_ompio_get_f_procs_in_group(ompi_file_t *fh, int **value) -{ - mca_io_ompio_data_t *data; - mca_io_ompio_file_t *file; - - data = (mca_io_ompio_data_t *)fh->f_io_selected_data; - file = &data->ompio_fh; - - *value = file->f_procs_in_group; - return OMPI_SUCCESS; -} - -int mca_io_ompio_get_f_procs_per_group(ompi_file_t *fh) -{ - mca_io_ompio_data_t *data; - mca_io_ompio_file_t *file; - - data = (mca_io_ompio_data_t *)fh->f_io_selected_data; - file = &data->ompio_fh; - - return file->f_procs_per_group; -} - -signed int mca_io_ompio_get_f_flags(ompi_file_t *fh) -{ - mca_io_ompio_data_t *data; - mca_io_ompio_file_t *file; - - data = (mca_io_ompio_data_t *) fh->f_io_selected_data; - file = &data->ompio_fh; - - return file->f_flags; -} - -int mca_io_ompio_get_fd(ompi_file_t *fh) -{ - mca_io_ompio_data_t *data; - mca_io_ompio_file_t *file; - - data = (mca_io_ompio_data_t *) fh->f_io_selected_data; - file = &data->ompio_fh; - - return file->fd; -} - -int mca_io_ompio_generate_io_array(ompi_file_t *file, - struct iovec *global_fview, - int *tglobal_count, - int *fview_count, - int *bytes_per_process, - char *global_buf, - int *tblocks, - int *sorted, - int *nvalue, - int *bytes_left_ptr, - int *sorted_index) -{ - mca_io_ompio_data_t *data; - mca_io_ompio_file_t *fh; - int k, j, x=sorted_index[0]; - int blocks = *tblocks; - int bytes_left = bytes_left_ptr[0]; - - - data = (mca_io_ompio_data_t *) file->f_io_selected_data; - fh = &data->ompio_fh; - - - if (fh->f_procs_in_group[fh->f_aggregator_index] == fh->f_rank) { - int global_count = *tglobal_count; - int bytes_to_write = global_count; - int *temp = NULL; - int block = 1; - k = 0; - temp = (int *)malloc (sizeof(int) * fh->f_procs_per_group); - if (NULL == temp) { - opal_output(1, "OUT OF MEMORY\n"); - return OMPI_ERR_OUT_OF_RESOURCE; - } - memset(temp, 0x0, fh->f_procs_per_group*sizeof(int)); - if (NULL != fh->f_io_array){ - fh->f_num_of_io_entries = 0; - free (fh->f_io_array); - fh->f_io_array = NULL; - } - - fh->f_io_array = (mca_io_ompio_io_array_t *) malloc - (OMPIO_IOVEC_INITIAL_SIZE * sizeof (mca_io_ompio_io_array_t)); - if (NULL == fh->f_io_array) { - opal_output(1, "OUT OF MEMORY\n"); - free(temp); - return OMPI_ERR_OUT_OF_RESOURCE; - } - while (bytes_to_write) { - int start = 0; - if (OMPIO_IOVEC_INITIAL_SIZE*block <= k) { - block ++; - fh->f_io_array = (mca_io_ompio_io_array_t *)realloc - (fh->f_io_array, OMPIO_IOVEC_INITIAL_SIZE * block * - sizeof(mca_io_ompio_io_array_t)); - if (NULL == fh->f_io_array) { - opal_output(1, "OUT OF MEMORY\n"); - free(temp); - return OMPI_ERR_OUT_OF_RESOURCE; - } - } - blocks= fview_count[0]; - for (j=0 ; jf_procs_per_group ; j++) { - if (sorted[x] < blocks) { - nvalue[0] = j; - break; - } - else { - blocks += fview_count[j+1]; - } - } - for (j=0 ; jf_io_array[k].offset = (IOVBASE_TYPE *) - ((OPAL_PTRDIFF_TYPE)global_fview[sorted[x]].iov_base + - (global_fview[sorted[x]].iov_len - bytes_left)); - - fh->f_io_array[k].length = bytes_left; - fh->f_io_array[k].memory_address = &global_buf[start+temp[nvalue[0]]]; -/* printf("global_buf[%d] : %d\n", - (start+temp[nvalue[0]]),(int)global_buf[start+temp[nvalue[0]]]);*/ - - temp[nvalue[0]] += (int)fh->f_io_array[k].length; - bytes_to_write -= bytes_left; - bytes_left = 0; - k ++; - x ++; - continue; - } - else { - fh->f_io_array[k].offset = (IOVBASE_TYPE *) - ((OPAL_PTRDIFF_TYPE)global_fview[sorted[x]].iov_base + (global_fview[sorted[x]].iov_len - bytes_left)); - - fh->f_io_array[k].length = bytes_to_write; - fh->f_io_array[k].memory_address = - &global_buf[start+temp[nvalue[0]]]; -/* printf("global_buf[%d] : %d\n", - (start+temp[nvalue[0]]),(int)global_buf[start+temp[nvalue[0]]]);*/ - - temp[nvalue[0]] += (int)fh->f_io_array[k].length; - bytes_left -= bytes_to_write; - bytes_to_write = 0;; - k ++; - break; - } - } - else { - if (bytes_to_write < (int)global_fview[sorted[x]].iov_len) { - fh->f_io_array[k].offset = global_fview[sorted[x]].iov_base; - - fh->f_io_array[k].length = bytes_to_write; - fh->f_io_array[k].memory_address = &global_buf[start+temp[nvalue[0]]]; -/* printf("global_buf[%d] : %d\n", - (start+temp[nvalue[0]]),(int)global_buf[start+temp[nvalue[0]]]);*/ - - bytes_left = - global_fview[sorted[x]].iov_len - bytes_to_write; - bytes_to_write = 0; - k ++; - break; - } - else { - fh->f_io_array[k].offset = global_fview[sorted[x]].iov_base; - - fh->f_io_array[k].length = global_fview[sorted[x]].iov_len; - fh->f_io_array[k].memory_address = &global_buf[start+temp[nvalue[0]]]; - temp[nvalue[0]] += (int)fh->f_io_array[k].length; -/* printf("global_buf[%d] : %d\n", - (start+temp[nvalue[0]]),(int)global_buf[start+temp[nvalue[0]]]);*/ - - bytes_to_write -= global_fview[sorted[x]].iov_len; - k ++; - x ++; - continue; - } - } - } - fh->f_num_of_io_entries = k; -/* for (i=0 ; if_num_of_io_entries ; i++) { - printf("OFFSET: %lu LENGTH: %d\n", - fh->f_io_array[i].offset, - fh->f_io_array[i].length); - }*/ - - - bytes_left_ptr[0] = bytes_left; - sorted_index[0] = x; - - if (NULL != temp) { - free (temp); - temp = NULL; - } - - } - return OMPI_SUCCESS; -} - -int mca_io_ompio_non_contiguous_create_receive_buf(int *bytes_received, - struct iovec *decoded_iov, - char *receive_buf) -{ - - OPAL_PTRDIFF_TYPE mem_address; - size_t remaining = 0; - size_t temp_position = 0; - int current_position = 0, iov_index = 0; - - remaining = *bytes_received; - - while (remaining) { - mem_address = (OPAL_PTRDIFF_TYPE) - (decoded_iov[iov_index].iov_base) + current_position; - - if (remaining >= - (decoded_iov[iov_index].iov_len - current_position)) { - memcpy ((IOVBASE_TYPE *) mem_address, - receive_buf+temp_position, - decoded_iov[iov_index].iov_len - current_position); - remaining = remaining - - (decoded_iov[iov_index].iov_len - current_position); - temp_position = temp_position + - (decoded_iov[iov_index].iov_len - current_position); - iov_index = iov_index + 1; - current_position = 0; - } - else { - memcpy ((IOVBASE_TYPE *) mem_address, - receive_buf+temp_position, - remaining); - current_position = current_position + remaining; - remaining = 0; - } - } - if (NULL != receive_buf) { - free (receive_buf); - receive_buf = NULL; - } - return OMPI_SUCCESS; -} - - -int mca_io_ompio_non_contiguous_create_send_buf(int *bytes_sent, - struct iovec *decoded_iov, - char *send_buf) -{ - - OPAL_PTRDIFF_TYPE mem_address; - size_t remaining = 0; - size_t temp_position = 0; - int current_position = 0, iov_index = 0; - - remaining = *bytes_sent; - while (remaining) { - mem_address = (OPAL_PTRDIFF_TYPE) - (decoded_iov[iov_index].iov_base) + current_position; - if (remaining >= - (decoded_iov[iov_index].iov_len - current_position)) { - memcpy (send_buf+temp_position, - (IOVBASE_TYPE *)mem_address, - decoded_iov[iov_index].iov_len - current_position); - remaining = remaining - - (decoded_iov[iov_index].iov_len - current_position); - temp_position = temp_position + - (decoded_iov[iov_index].iov_len - current_position); - iov_index = iov_index + 1; - current_position = 0; - } - else { - memcpy (send_buf+temp_position, (IOVBASE_TYPE *) mem_address, - remaining); - current_position = current_position + remaining; - remaining = 0; - } - } - return OMPI_SUCCESS; -} - - - -int mca_io_ompio_get_datatype_size (ompi_datatype_t * datatype) -{ - return datatype->super.size; -} - -int mca_io_ompio_decode_datatype_external (ompi_file_t *fp, - ompi_datatype_t *datatype, - int count, - void *buf, - size_t *max_data, - struct iovec **iov, - uint32_t *iovec_count) -{ - - int res; - mca_io_ompio_data_t *data; - mca_io_ompio_file_t *fh; - - data = (mca_io_ompio_data_t *) fp->f_io_selected_data; - fh = &data->ompio_fh; - res = ompi_io_ompio_decode_datatype (fh, - datatype, - count, - buf, - max_data, - iov, - iovec_count); - if(res != OMPI_SUCCESS){ - printf("Error in ompio decode datatype\n"); - return res; - } - return OMPI_SUCCESS; - -} - -int mca_io_ompio_datatype_is_contiguous(ompi_datatype_t *datatype, - ompi_file_t *fp) -{ - mca_io_ompio_data_t *data; - mca_io_ompio_file_t *fh; - - data = (mca_io_ompio_data_t *) fp->f_io_selected_data; - fh = &data->ompio_fh; - - if (opal_datatype_is_contiguous_memory_layout(&datatype->super,1)){ - fh->f_flags |= OMPIO_CONTIGUOUS_MEMORY; - return 1; - } - else - return 0; -} - - -int mca_io_ompio_set_aggregator_props (ompi_file_t *fp, - int num_aggregators, - size_t bytes_per_proc) -{ - int res; - mca_io_ompio_data_t *data; - mca_io_ompio_file_t *fh; - - data = (mca_io_ompio_data_t *) fp->f_io_selected_data; - fh = &data->ompio_fh; - res = ompi_io_ompio_set_aggregator_props (fh, - num_aggregators, - bytes_per_proc); - if(res != OMPI_SUCCESS){ - printf("Error in aggregator props external\n"); - return res; - } - - return OMPI_SUCCESS; -} - -int mca_io_ompio_generate_current_file_view (ompi_file_t *fp, - size_t max_data, - struct iovec **f_iov, - int *iov_count) -{ - int res; - mca_io_ompio_data_t *data; - mca_io_ompio_file_t *fh; - - data = (mca_io_ompio_data_t *) fp->f_io_selected_data; - fh = &data->ompio_fh; - res = ompi_io_ompio_generate_current_file_view (fh, - max_data, - f_iov, - iov_count); - if(res != OMPI_SUCCESS){ - printf("Error in ompi_io_generate_current_file_view\n"); - return res; - } - - return OMPI_SUCCESS; -} - -int mca_io_ompio_free_f_io_array (ompi_file_t *fp){ - - mca_io_ompio_data_t *data; - mca_io_ompio_file_t *fh; - - data = (mca_io_ompio_data_t *) fp->f_io_selected_data; - fh = &data->ompio_fh; - - if (NULL != fh->f_io_array) { - free (fh->f_io_array); - fh->f_io_array = NULL; - } - - return OMPI_SUCCESS; -} diff --git a/ompi/mca/io/ompio/io_ompio_nbc.h b/ompi/mca/io/ompio/io_ompio_nbc.h deleted file mode 100644 index 09a8c61b092..00000000000 --- a/ompi/mca/io/ompio/io_ompio_nbc.h +++ /dev/null @@ -1,63 +0,0 @@ -#ifndef MCA_IO_OMPIO_NBC_H -#define MCA_IO_OMPIO_NBC_H - -#include "io_ompio.h" - -/* Function declaration for get and utility method to use with libNBC - implementation in io_ompio_nbc.c */ -OMPI_DECLSPEC int mca_io_ompio_get_fcoll_dynamic_num_io_procs (int *num_procs); -OMPI_DECLSPEC int mca_io_ompio_get_fcoll_dynamic_cycle_buffer_size (int *cycle_buffer_size); -OMPI_DECLSPEC int mca_io_ompio_get_fcoll_dynamic_constant_cbs (int *constant_cbs); -OMPI_DECLSPEC int mca_io_ompio_get_f_aggregator_index (ompi_file_t *fh); -OMPI_DECLSPEC int mca_io_ompio_get_f_procs_in_group (ompi_file_t *fh, - int **value); -OMPI_DECLSPEC int mca_io_ompio_get_f_procs_per_group (ompi_file_t *fh); -OMPI_DECLSPEC int mca_io_ompio_get_f_comm (ompi_file_t *fh, - ompi_communicator_t **value); -OMPI_DECLSPEC int mca_io_ompio_get_iov_type (ompi_file_t *fh, - ompi_datatype_t **value); -OMPI_DECLSPEC signed int mca_io_ompio_get_f_flags (ompi_file_t *fh); -OMPI_DECLSPEC int mca_io_ompio_get_fd (ompi_file_t *fh); -OMPI_DECLSPEC int mca_io_ompio_get_f_num_of_io_entries (ompi_file_t *fh); -OMPI_DECLSPEC int mca_io_ompio_get_f_io_array (ompi_file_t *fh, - mca_io_ompio_io_array_t **f_io_array); -OMPI_DECLSPEC int mca_io_ompio_free_f_io_array (ompi_file_t *fh); - -OMPI_DECLSPEC int mca_io_ompio_get_datatype_size (ompi_datatype_t *datatype); -OMPI_DECLSPEC int mca_io_ompio_decode_datatype_external(ompi_file_t *fh, - struct ompi_datatype_t *datatype, - int count, - void *buf, - size_t *max_data, - struct iovec **iov, - uint32_t *iov_count); -OMPI_DECLSPEC int mca_io_ompio_generate_current_file_view (ompi_file_t *fp, - size_t max_data, - struct iovec **f_iov, - int *iov_count); -OMPI_DECLSPEC int mca_io_ompio_set_aggregator_props (ompi_file_t *fh, - int num_aggregators, - size_t bytes_per_proc); -OMPI_DECLSPEC int mca_io_ompio_generate_io_array (ompi_file_t *file, - struct iovec *global_view, - int *tglobal_count, - int *fview_count, - int *bytes_per_process, - char *global_buf, - int *tblocks, - int *sorted, - int *nvalue, - int *bytes_left, - int *sorted_index); -OMPI_DECLSPEC int mca_io_ompio_datatype_is_contiguous (ompi_datatype_t *datatype, - ompi_file_t *fp); -OMPI_DECLSPEC int mca_io_ompio_non_contiguous_create_send_buf (int *bytes_sent, - struct iovec *decoded_iov, - char *send_buf); -OMPI_DECLSPEC int mca_io_ompio_non_contiguous_create_receive_buf(int *bytes_received, - struct iovec *decoded_iov, - char *receive_buf); - -/* libNBC utility methods declarations ends here */ - -#endif /*MCA_IO_OMPIO_NBC_H*/ diff --git a/ompi/mca/io/ompio/io_ompio_request.c b/ompi/mca/io/ompio/io_ompio_request.c index 01e42277e46..59271a346ba 100644 --- a/ompi/mca/io/ompio/io_ompio_request.c +++ b/ompi/mca/io/ompio/io_ompio_request.c @@ -3,18 +3,18 @@ * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. - * Copyright (c) 2004-2007 The University of Tennessee and The University + * Copyright (c) 2004-2016 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2008-2014 University of Houston. All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -29,7 +29,7 @@ static int mca_io_ompio_request_free ( struct ompi_request_t **req) { mca_ompio_request_t *ompio_req = ( mca_ompio_request_t *)*req; if ( NULL != ompio_req->req_free_fn ) { - ompio_req->req_free_fn (ompio_req ); + ompio_req->req_free_fn (ompio_req ); } opal_list_remove_item (&mca_io_ompio_pending_requests, &ompio_req->req_item); @@ -65,7 +65,7 @@ void mca_io_ompio_request_destruct(mca_ompio_request_t* req) OMPI_REQUEST_FINI ( &(req->req_ompi)); OBJ_DESTRUCT (&req->req_item); if ( NULL != req->req_data ) { - free (req->req_data); + free (req->req_data); } return; @@ -79,19 +79,19 @@ int mca_io_ompio_component_progress ( void ) OPAL_LIST_FOREACH(litem, &mca_io_ompio_pending_requests, opal_list_item_t) { req = GET_OMPIO_REQ_FROM_ITEM(litem); - if ( true == req->req_ompi.req_complete ) { - continue; - } + if( REQUEST_COMPLETE(&req->req_ompi) ) { + continue; + } if ( NULL != req->req_progress_fn ) { if ( req->req_progress_fn(req) ) { completed++; - ompi_request_complete (&req->req_ompi, 1); - /* The fbtl progress function is expected to set the - ** status elements - */ + ompi_request_complete (&req->req_ompi, true); + /* The fbtl progress function is expected to set the + * status elements + */ } } - + } return completed; diff --git a/ompi/mca/io/ompio/io_ompio_request.h b/ompi/mca/io/ompio/io_ompio_request.h index 7d990231861..620565ca48d 100644 --- a/ompi/mca/io/ompio/io_ompio_request.h +++ b/ompi/mca/io/ompio/io_ompio_request.h @@ -6,15 +6,15 @@ * Copyright (c) 2004-2007 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2008-2014 University of Houston. All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -57,7 +57,7 @@ typedef struct mca_ompio_request_t mca_ompio_request_t; OBJ_CLASS_DECLARATION(mca_ompio_request_t); #define GET_OMPIO_REQ_FROM_ITEM(ITEM) ((mca_ompio_request_t *)((char *)ITEM - offsetof(struct mca_ompio_request_t,req_item))) - + OMPI_DECLSPEC int mca_io_ompio_component_progress ( void); diff --git a/ompi/mca/io/romio314/configure.m4 b/ompi/mca/io/romio314/configure.m4 index 431d489e23e..6ebe85263f0 100644 --- a/ompi/mca/io/romio314/configure.m4 +++ b/ompi/mca/io/romio314/configure.m4 @@ -52,7 +52,7 @@ AC_DEFUN([MCA_ompi_io_romio314_CONFIG],[ $2], [AC_MSG_RESULT([yes]) - AS_IF([test -n "$with_io_romio_flags" -a "$with_io_romio_flags" != "no"], + AS_IF([test -n "$with_io_romio_flags" && test "$with_io_romio_flags" != "no"], [io_romio314_flags="$with_io_romio_flags $io_romio314_flags"], [io_romio314_flags=]) # If ROMIO is going to end up in a DSO, all we need is @@ -69,7 +69,7 @@ AC_DEFUN([MCA_ompi_io_romio314_CONFIG],[ AS_IF([test "$enable_static" = "yes"], [io_romio314_static=enable], [io_romio314_static=disable])]) - AS_IF([test -n "$prefix" -a "$prefix" != "NONE"], + AS_IF([test -n "$prefix" && test "$prefix" != "NONE"], [io_romio314_prefix_arg="--prefix=$prefix"], [io_romio314_prefix_arg=]) diff --git a/ompi/mca/io/romio314/ompi.patch b/ompi/mca/io/romio314/ompi.patch index 140ba249ddd..e10089fa8dc 100644 --- a/ompi/mca/io/romio314/ompi.patch +++ b/ompi/mca/io/romio314/ompi.patch @@ -52,7 +52,7 @@ index b9d4e25..69d7014 100644 # (C) 2011 by Argonne National Laboratory. # See COPYRIGHT in top-level directory. # - + +# OMPI: include a top level makefile with some options +include $(top_srcdir)/Makefile.options + @@ -60,12 +60,12 @@ index b9d4e25..69d7014 100644 ## 1) that ROMIO is being embedded within the MPI library, as in MPICH or Open ## MPI @@ -17,7 +36,6 @@ ACLOCAL_AMFLAGS = -I confdb - include_HEADERS = + include_HEADERS = nodist_include_HEADERS = - noinst_HEADERS = + noinst_HEADERS = -AM_CPPFLAGS = EXTRA_DIST = - SUFFIXES = + SUFFIXES = doc1_src_txt = @@ -46,7 +64,9 @@ AM_CPPFLAGS += $(MPI_H_INCLUDE) # handle the "include" directory here @@ -75,9 +75,9 @@ index b9d4e25..69d7014 100644 +# Open MPI: do not install mpio.h +noinst_HEADERS += include/mpio.h +noinst_HEADERS += include/io_romio_conv.h - + # ------------------------------------------------------------------------ - + @@ -63,8 +83,8 @@ EXTRA_DIST += autogen.sh if BUILD_ROMIO_EMBEDDED # Build a libtool convenience library that the enclosing MPI implementation can @@ -86,7 +86,7 @@ index b9d4e25..69d7014 100644 -libromio_la_SOURCES = $(romio_mpi_sources) $(romio_other_sources) $(glue_sources) +noinst_LTLIBRARIES = libromio_dist.la +libromio_dist_la_SOURCES = $(romio_mpi_sources) $(romio_other_sources) $(glue_sources) - + ## NOTE: ROMIO's old build system builds a bunch of _foo.o objects that contain ## PMPI_ implementations as well as calls to only other PMPI routines. In diff --git a/ompi/mca/io/romio32b1/romio/Makefile.options b/ompi/mca/io/romio32b1/romio/Makefile.options @@ -159,13 +159,13 @@ index 505d518..ffc05cb 100644 + adio/include/romioconf-undefs.h \ adio/include/mpiu_external32.h \ adio/include/hint_fns.h - + diff --git a/ompi/mca/io/romio32b1/romio/adio/common/ad_end.c b/ompi/mca/io/romio32b1/romio/adio/common/ad_end.c index ea4dfeb..066c65c 100644 --- a/ompi/mca/io/romio32b1/romio/adio/common/ad_end.c +++ b/ompi/mca/io/romio32b1/romio/adio/common/ad_end.c @@ -16,7 +16,12 @@ void ADIO_End(int *error_code) - + /* if a default errhandler was set on MPI_FILE_NULL then we need to ensure * that our reference to that errhandler is released */ +/* Open MPI: The call to PMPI_File_set_errhandler has to be done in romio/src/io_romio_file_open.c @@ -174,7 +174,7 @@ index ea4dfeb..066c65c 100644 +#if 0 PMPI_File_set_errhandler(MPI_FILE_NULL, MPI_ERRORS_RETURN); +#endif - + /* delete the flattened datatype list */ curr = ADIOI_Flatlist; diff --git a/ompi/mca/io/romio32b1/romio/adio/common/ad_iread_coll.c b/ompi/mca/io/romio32b1/romio/adio/common/ad_iread_coll.c @@ -184,7 +184,7 @@ index b1311e6..6ae4359 100644 @@ -16,6 +16,8 @@ #include "mpe.h" #endif - + +#ifdef HAVE_MPI_GREQUEST_EXTENSIONS + /* ADIOI_GEN_IreadStridedColl */ @@ -193,7 +193,7 @@ index b1311e6..6ae4359 100644 @@ -1315,3 +1317,4 @@ static int ADIOI_GEN_irc_wait_fn(int count, void **array_of_states, return errcode; } - + +#endif /* HAVE_MPI_GREQUEST_EXTENSIONS */ diff --git a/ompi/mca/io/romio32b1/romio/adio/common/ad_iwrite_coll.c b/ompi/mca/io/romio32b1/romio/adio/common/ad_iwrite_coll.c index b456ec4..9178a8d 100644 @@ -202,7 +202,7 @@ index b456ec4..9178a8d 100644 @@ -13,6 +13,8 @@ #include "mpe.h" #endif - + +#ifdef HAVE_MPI_GREQUEST_EXTENSIONS + /* ADIOI_GEN_IwriteStridedColl */ @@ -211,7 +211,7 @@ index b456ec4..9178a8d 100644 @@ -1539,3 +1541,4 @@ static int ADIOI_GEN_iwc_wait_fn(int count, void **array_of_states, return errcode; } - + +#endif /* HAVE_MPI_GREQUEST_EXTENSIONS */ diff --git a/ompi/mca/io/romio32b1/romio/adio/include/adioi.h b/ompi/mca/io/romio32b1/romio/adio/include/adioi.h index b20ca82..73dad0d 100644 @@ -242,7 +242,7 @@ index b20ca82..73dad0d 100644 +#define ADIOI_GEN_IwriteStridedColl NULL +#endif void ADIOI_Calc_my_off_len(ADIO_File fd, int bufcount, MPI_Datatype - datatype, int file_ptr_type, ADIO_Offset + datatype, int file_ptr_type, ADIO_Offset offset, ADIO_Offset **offset_list_ptr, ADIO_Offset diff --git a/ompi/mca/io/romio32b1/romio/adio/include/romioconf-undefs.h b/ompi/mca/io/romio32b1/romio/adio/include/romioconf-undefs.h new file mode 100644 @@ -307,7 +307,7 @@ index f975e1c..4d51a3e 100644 +# harmless and are left here solely for the sake of ease of future +# patching/importing. AC_PREREQ([2.63]) - + -m4_include([version.m4]) -dnl 2nd arg is intentionally underquoted +# Open MPI: Modifications to this file were done on an "let's do the @@ -327,15 +327,15 @@ index f975e1c..4d51a3e 100644 @@ -22,7 +31,7 @@ dnl scripts. AC_CONFIG_AUX_DIR([confdb]) AC_CONFIG_MACRO_DIR([confdb]) - + -AM_INIT_AUTOMAKE([-Wall -Werror -Wno-portability-recursive foreign 1.12.3 silent-rules subdir-objects]) +AM_INIT_AUTOMAKE([-Wall -Werror -Wno-portability-recursive foreign 1.12 silent-rules subdir-objects]) AM_MAINTAINER_MODE([enable]) - + dnl must come before LT_INIT, which AC_REQUIREs AC_PROG_CC @@ -43,12 +52,15 @@ if test -n "$CONFIGURE_ARGS" ; then fi - + AC_CONFIG_HEADER(adio/include/romioconf.h) +# Open MPI: modified AH_TOP AH_TOP([/* @@ -352,7 +352,7 @@ index f975e1c..4d51a3e 100644 @@ -58,9 +70,37 @@ AH_BOTTOM([ #endif /* !defined(ROMIOCONF_H_INCLUDED) */ ]) - + +# Open MPI: this configure script doesn't seem to define these +# anywhere, so just do them manually here because "we know better" +# (i.e., Open MPI can be hard-wired to these values). @@ -392,7 +392,7 @@ index f975e1c..4d51a3e 100644 @@ -95,6 +135,10 @@ AC_ARG_VAR([FROM_OMPI],[set to "yes" if building ROMIO inside of Open MPI]) FROM_OMPI=${FROM_OMPI:-no} if test "$FROM_OMPI" = 1 ; then FROM_OMPI=yes ; fi - + +AC_ARG_VAR([FROM_OMPI],[set to "yes" if building ROMIO inside of Open MPI]) +FROM_OMPI=${FROM_OMPI:-no} +if test "$FROM_OMPI" = 1 ; then FROM_OMPI=yes ; fi @@ -448,13 +448,13 @@ index f975e1c..4d51a3e 100644 +#if test -n "$with_mpi"; then +# CC=$MPI/bin/mpicc +#fi - + # start with the set of file systems that the user asked for # FILE_SYSTEM=$with_file_system @@ -259,6 +316,7 @@ top_build_dir=`pwd` # used in romioinstall AC_SUBST(top_build_dir) - + +# Open MPI: these shouldn't be needed with AM # # Create the "autoconf" style directory names... @@ -463,7 +463,7 @@ index f975e1c..4d51a3e 100644 fi if test $DEBUG = "yes" ; then CFLAGS="$CFLAGS $C_DEBUG_FLAG" --else +-else - CFLAGS="$CFLAGS $C_OPT_FLAG" +# Open MPI: don't add optflags - they'll come from the top-level configure +#else @@ -478,18 +478,18 @@ index f975e1c..4d51a3e 100644 -AC_C_INLINE +# Open MPI: We already do this test top-level +dnl AC_C_INLINE - + AC_TYPE_SIZE_T AC_TYPE_SSIZE_T @@ -651,7 +711,9 @@ AM_CONDITIONAL([BUILD_MPIO_ERRHAN],[false]) - + # if we don't have weak symbol support, we must build a separate convenience # library in order to provide the "PMPI_" symbols -AM_CONDITIONAL([BUILD_PROFILING_LIB],[test "x$HAVE_WEAK_SYMBOLS" = "x0"]) +# Open MPI: Disable the profile library +#AM_CONDITIONAL([BUILD_PROFILING_LIB],[test "x$HAVE_WEAK_SYMBOLS" = "x0"]) +AM_CONDITIONAL([BUILD_PROFILING_LIB],[false]) - + # weird: we have conflated "buid ROMIO's versions of the fortran bindings" and # "build ROMIO"s fortran I/O tests". Of course the common situaiton is that we @@ -1565,8 +1627,10 @@ if test $FROM_OMPI = yes ; then @@ -516,7 +516,7 @@ index f975e1c..4d51a3e 100644 @@ -1751,9 +1815,22 @@ AC_CHECK_HEADERS(unistd.h, echo "setting SYSDEP_INC to $SYSDEP_INC" AC_SUBST(SYSDEP_INC) - + +# Open MPI: use the exact same restrict test that we use in the +# upper-level Open MPI configure script so that we always get the same +# #define for "restrict" (there are a small number of files that will @@ -524,7 +524,7 @@ index f975e1c..4d51a3e 100644 +# need to #defines to agree). AC_C_RESTRICT PAC_C_GNU_ATTRIBUTE - + +# Open MPI: we need libtool +AM_PROG_LIBTOOL + @@ -535,11 +535,11 @@ index f975e1c..4d51a3e 100644 + # support gcov test coverage information PAC_ENABLE_COVERAGE - + @@ -1765,23 +1842,29 @@ echo "setting CFLAGS to $CFLAGS" echo "setting USER_CFLAGS to $USER_CFLAGS" echo "setting USER_FFLAGS to $USER_FFLAGS" - + +# Open MPI: Add on CFLAGS that we figured out up top. They have +# makefile macros in them, so we couldn't substitute them until now. +CFLAGS="$CFLAGS $OMPI_CFLAGS "'-I$(top_builddir)/include' @@ -611,8 +611,8 @@ index f975e1c..4d51a3e 100644 +#if test ! -d mpi2-other ; then mkdir mpi2-other ; fi +#if test ! -d mpi-io ; then mkdir mpi-io ; fi +#if test ! -d mpi-io/glue ; then mkdir mpi-io/glue ; fi - - # Create makefiles for all of the adio devices. Only the ones that + + # Create makefiles for all of the adio devices. Only the ones that # are active will be called by the top level ROMIO make @@ -1850,8 +1934,6 @@ AC_OUTPUT_COMMANDS([chmod 755 util/romioinstall test/runtests]) AC_CONFIG_FILES([ @@ -642,27 +642,27 @@ index f975e1c..4d51a3e 100644 +# mpi2-other/array/fortran/Makefile +# include/mpiof.h AC_OUTPUT - + dnl PAC_SUBDIR_CACHE_CLEANUP diff --git a/ompi/mca/io/romio32b1/romio/doc/users-guide.tex b/ompi/mca/io/romio32b1/romio/doc/users-guide.tex index b33d483..3715431 100644 --- a/ompi/mca/io/romio32b1/romio/doc/users-guide.tex +++ b/ompi/mca/io/romio32b1/romio/doc/users-guide.tex @@ -807,13 +807,19 @@ to include the file {\tt mpio.h} for C or {\tt mpiof.h} for Fortran in - your MPI-IO program. - + your MPI-IO program. + Note that on HP machines running HPUX and on NEC SX-4, you need to -compile Fortran programs with {\tt mpifort}. +compile Fortran programs with {\tt mpifort}, because {\tt mpif77} does +not support 8-byte integers. - + With MPICH, HP MPI, or NEC MPI, you can compile MPI-IO programs as \\ \hspace*{.4in} {\tt mpicc foo.c}\\ or \\ +\hspace*{.4in} {\tt mpif77 foo.f }\\ +or\\ \hspace*{.4in} {\tt mpifort foo.f}\\ - + +As mentioned above, mpifort is preferred over mpif77 on HPUX and NEC +because the f77 compilers on those machines do not support 8-byte integers. + @@ -671,12 +671,12 @@ index b33d483..3715431 100644 or \\ @@ -863,7 +869,8 @@ file systems because they don't support {\tt fcntl} file locks, and ROMIO uses that feature to implement shared file pointers. - + \item On HP machines running HPUX and on NEC SX-4, you need to compile -Fortran programs with {\tt mpifort}. +Fortran programs with {\tt mpifort} instead of {\tt mpif77}, because +the {\tt f77} compilers on these machines don't support 8-byte integers. - + \item The file-open mode {\tt MPI\_MODE\_EXCL} does not work on Intel PFS file system, due to a bug in PFS. diff --git a/ompi/mca/io/romio32b1/romio/include/io_romio_conv.h b/ompi/mca/io/romio32b1/romio/include/io_romio_conv.h @@ -815,7 +815,7 @@ index 2238f4b..8d1314c 100644 +++ b/ompi/mca/io/romio32b1/romio/include/mpio.h.in @@ -11,6 +11,16 @@ #define MPIO_INCLUDE - + #include "mpi.h" +/* Open MPI: We need to rename almost all of these functions, as well + a the types to be names that conform to the prefix rule */ @@ -827,7 +827,7 @@ index 2238f4b..8d1314c 100644 +#define MPIIMPL_HAVE_MPI_TYPE_CREATE_DARRAY 1 +#define MPIIMPL_HAVE_MPI_COMBINER_DUP 1 +#define MPICH_ATTR_POINTER_WITH_TYPE_TAG(x,y) - + #if defined(__cplusplus) extern "C" { @@ -279,9 +289,12 @@ int MPI_Type_create_darray(int size, int rank, int ndims, const int array_of_gsi @@ -841,7 +841,7 @@ index 2238f4b..8d1314c 100644 MPI_Fint MPI_File_c2f(MPI_File file); - +#endif - + #ifndef HAVE_MPI_GREQUEST /* The following functions are required if generalized requests are not @@ -329,15 +342,21 @@ int MPI_Info_free(MPI_Info *info); @@ -854,22 +854,22 @@ index 2238f4b..8d1314c 100644 MPI_Info MPI_Info_f2c(MPI_Fint info); #endif +#endif - + #endif /* HAVE_PRAGMA_HP_SEC_DEF */ - - + + /**************** BINDINGS FOR THE PROFILING INTERFACE ***************/ - + +/* Open MPI: We don't want any of the profiling layer */ +#if 0 + - + /* Section 9.2 */ int PMPI_File_open(MPI_Comm, const char *, int, MPI_Info, MPI_File *); @@ -528,6 +547,8 @@ MPI_Fint PMPI_Info_c2f(MPI_Info); MPI_Info PMPI_Info_f2c(MPI_Fint); #endif - + +#endif /* Open MPI: We don't want any of the profiling layer */ + #if defined(__cplusplus) @@ -916,7 +916,7 @@ index 520f206..160b661 100644 error_code = PMPI_File_set_errhandler(*fh, MPI_ERRORS_RETURN); if (error_code != MPI_SUCCESS) goto fn_fail; +#endif - + ADIO_Close(adio_fh, &error_code); MPIO_File_free(fh); diff --git a/ompi/mca/io/romio32b1/romio/mpi-io/glue/Makefile.mk b/ompi/mca/io/romio32b1/romio/mpi-io/glue/Makefile.mk @@ -924,11 +924,11 @@ index 05954a1..66f7f9e 100644 --- a/ompi/mca/io/romio32b1/romio/mpi-io/glue/Makefile.mk +++ b/ompi/mca/io/romio32b1/romio/mpi-io/glue/Makefile.mk @@ -7,6 +7,7 @@ - + include $(top_srcdir)/mpi-io/glue/default/Makefile.mk include $(top_srcdir)/mpi-io/glue/mpich/Makefile.mk +include $(top_srcdir)/mpi-io/glue/openmpi/Makefile.mk - + if !BUILD_ROMIO_EMBEDDED romio_other_sources += \ diff --git a/ompi/mca/io/romio32b1/romio/mpi-io/iread_all.c b/ompi/mca/io/romio32b1/romio/mpi-io/iread_all.c @@ -938,12 +938,12 @@ index ba36161..32a48b8 100644 @@ -26,7 +26,7 @@ int MPIX_File_iread_all(MPI_File fh, void *buf, int count, MPI_Datatype datatype #include "mpioprof.h" #endif - + -#ifdef HAVE_MPI_GREQUEST +#if HAVE_MPI_GREQUEST #include "mpiu_greq.h" #endif - + diff --git a/ompi/mca/io/romio32b1/romio/mpi-io/mpioprof.h b/ompi/mca/io/romio32b1/romio/mpi-io/mpioprof.h index 15654ac..63dffd8 100644 --- a/ompi/mca/io/romio32b1/romio/mpi-io/mpioprof.h @@ -951,7 +951,7 @@ index 15654ac..63dffd8 100644 @@ -10,6 +10,15 @@ building the profiling interface */ - + +/* + * Open MPI: Unfortunately, ROMIO doesn't seem to be able to build + * without a profiling interface, but we don't want a profiling @@ -962,10 +962,10 @@ index 15654ac..63dffd8 100644 + */ +#if 0 #ifdef MPIO_BUILD_PROFILING - + #undef MPI_File_open @@ -212,3 +221,4 @@ #define MPIX_Grequest_class_create PMPIX_Grequest_class_create - + #endif +#endif diff --git a/ompi/mca/io/romio314/romio/.codingcheck b/ompi/mca/io/romio314/romio/.codingcheck index 9b52b35ce42..c8d93615a22 100644 --- a/ompi/mca/io/romio314/romio/.codingcheck +++ b/ompi/mca/io/romio314/romio/.codingcheck @@ -1,9 +1,9 @@ # Here are names that at least at one point were used within ROMIO. -# We should look at these and decide which we wish to allow and which +# We should look at these and decide which we wish to allow and which # should be replaced with something more ROMIO-specific. -%romioDefines = ( 'ROMIO_[A-Za-z0-9_]+' => romio, - 'PROFILE' => romio, - 'PRINT_ERR_MSG' => romio, +%romioDefines = ( 'ROMIO_[A-Za-z0-9_]+' => romio, + 'PROFILE' => romio, + 'PRINT_ERR_MSG' => romio, 'HPUX' => romio, 'SPPUX'=> romio, 'SX4'=> romio, @@ -51,5 +51,5 @@ if (defined(&PushDefinesNames)) { &PushDefinesNames( "romioDefines", "tree", "add" ); } - + 1; diff --git a/ompi/mca/io/romio314/romio/COPYRIGHT b/ompi/mca/io/romio314/romio/COPYRIGHT index 609bcfa4e84..a058206a474 100644 --- a/ompi/mca/io/romio314/romio/COPYRIGHT +++ b/ompi/mca/io/romio314/romio/COPYRIGHT @@ -7,11 +7,11 @@ all source listings of the code. Copyright (C) 1997 University of Chicago Permission is hereby granted to use, reproduce, prepare derivative -works, and to redistribute to others. +works, and to redistribute to others. -The University of Chicago makes no representations as to the suitability, -operability, accuracy, or correctness of this software for any purpose. -It is provided "as is" without express or implied warranty. +The University of Chicago makes no representations as to the suitability, +operability, accuracy, or correctness of this software for any purpose. +It is provided "as is" without express or implied warranty. This software was authored by: Rajeev Thakur: (630) 252-1682; thakur@mcs.anl.gov @@ -26,7 +26,7 @@ Government Contract and are subject to the following license: the Government is granted for itself and others acting on its behalf a paid-up, nonexclusive, irrevocable worldwide license in this computer software to reproduce, prepare derivative works, and perform publicly -and display publicly. +and display publicly. DISCLAIMER @@ -37,5 +37,5 @@ of their employees, makes any warranty express or implied, or assumes any legal liability or responsibility for the accuracy, completeness, or usefulness of any information, apparatus, product, or process disclosed, or represents that its use would not infringe privately -owned rights. +owned rights. diff --git a/ompi/mca/io/romio314/romio/Makefile.am b/ompi/mca/io/romio314/romio/Makefile.am index 69d70142df8..c0c226f6ff5 100644 --- a/ompi/mca/io/romio314/romio/Makefile.am +++ b/ompi/mca/io/romio314/romio/Makefile.am @@ -33,11 +33,11 @@ include $(top_srcdir)/Makefile.options ACLOCAL_AMFLAGS = -I confdb # empty variable initializations so that later code can append (+=) -include_HEADERS = +include_HEADERS = nodist_include_HEADERS = -noinst_HEADERS = +noinst_HEADERS = EXTRA_DIST = -SUFFIXES = +SUFFIXES = doc1_src_txt = # ------------------------------------------------------------------------ @@ -47,14 +47,14 @@ doc1_src_txt = # In MPICH these will have an MPI_ and a PMPI_ version. Other implementations # (like OMPI) only want these to be MPI_ routines, possibly with some # name-shifting prefix. -romio_mpi_sources = +romio_mpi_sources = # regular old source files that implement ROMIO, such as ADIO code romio_other_sources = # code that may need to be "up" called from the MPI library and/or is # MPI-implementation-specific in some way -glue_sources = +glue_sources = # ------------------------------------------------------------------------ # when building under MPICH we must be able to find mpi.h @@ -102,7 +102,7 @@ if BUILD_PROFILING_LIB # won't work very well the other way around. noinst_LTLIBRARIES += libpromio.la libpromio_la_SOURCES = $(romio_mpi_sources) -libpromio_la_CPPFLAGS = $(AM_CPPFLAGS) -DMPIO_BUILD_PROFILING +libpromio_la_CPPFLAGS = $(AM_CPPFLAGS) -DMPIO_BUILD_PROFILING endif BUILD_PROFILING_LIB else !BUILD_ROMIO_EMBEDDED diff --git a/ompi/mca/io/romio314/romio/README b/ompi/mca/io/romio314/romio/README index a6fb25a09a5..bee37ec6c88 100644 --- a/ompi/mca/io/romio314/romio/README +++ b/ompi/mca/io/romio314/romio/README @@ -23,11 +23,11 @@ Major Changes in this version: PanFS allows users to specify the layout of a file at file-creation time. Layout information includes the number of StorageBlades (SB) - across which the data is stored, the number of SBs across which a - parity stripe is written, and the number of consecutive stripes that - are placed on the same set of SBs. The panfs_layout_* hints are only + across which the data is stored, the number of SBs across which a + parity stripe is written, and the number of consecutive stripes that + are placed on the same set of SBs. The panfs_layout_* hints are only used if supplied at file-creation time. - + panfs_layout_type - Specifies the layout of a file: 2 = RAID0 3 = RAID5 Parity Stripes @@ -35,37 +35,37 @@ Major Changes in this version: panfs_layout_total_num_comps - The total number of StorageBlades a file is striped across. panfs_layout_parity_stripe_width - If the layout type is RAID5 Parity - Stripes, this hint specifies the + Stripes, this hint specifies the number of StorageBlades in a parity stripe. panfs_layout_parity_stripe_depth - If the layout type is RAID5 Parity Stripes, this hint specifies the - number of contiguous parity stripes written + number of contiguous parity stripes written across the same set of SBs. - panfs_layout_visit_policy - If the layout type is RAID5 Parity Stripes, - the policy used to determine the parity + panfs_layout_visit_policy - If the layout type is RAID5 Parity Stripes, + the policy used to determine the parity stripe a given file offset is written to: 1 = Round Robin - PanFS supports the "concurrent write" (CW) mode, where groups of cooperating - clients can disable the PanFS consistency mechanisms and use their own - consistency protocol. Clients participating in concurrent write mode use - application specific information to improve performance while maintaining - file consistency. All clients accessing the file(s) must enable concurrent - write mode. If any client does not enable concurrent write mode, then the - PanFS consistency protocol will be invoked. Once a file is opened in CW mode - on a machine, attempts to open a file in non-CW mode will fail with - EACCES. If a file is already opened in non-CW mode, attempts to open - the file in CW mode will fail with EACCES. The following hint is + PanFS supports the "concurrent write" (CW) mode, where groups of cooperating + clients can disable the PanFS consistency mechanisms and use their own + consistency protocol. Clients participating in concurrent write mode use + application specific information to improve performance while maintaining + file consistency. All clients accessing the file(s) must enable concurrent + write mode. If any client does not enable concurrent write mode, then the + PanFS consistency protocol will be invoked. Once a file is opened in CW mode + on a machine, attempts to open a file in non-CW mode will fail with + EACCES. If a file is already opened in non-CW mode, attempts to open + the file in CW mode will fail with EACCES. The following hint is used to enable concurrent write mode. - panfs_concurrent_write - If set to 1 at file open time, the file - is opened using the PanFS concurrent write + panfs_concurrent_write - If set to 1 at file open time, the file + is opened using the PanFS concurrent write mode flag. Concurrent write mode is not a persistent attribute of the file. Below is an example PanFS layout using the following parameters: - + - panfs_layout_type = 3 - panfs_layout_total_num_comps = 100 - panfs_layout_parity_stripe_width = 10 @@ -89,7 +89,7 @@ Major Changes in this version: ... * Initial support for the Globus GridFTP filesystem. Work contributed by Troy - Baer (troy@osc.edu). + Baer (troy@osc.edu). Major Changes in Version 1.2.5: ------------------------------ @@ -108,7 +108,7 @@ Major Changes in Version 1.2.5: Major Changes in Version 1.2.4: ------------------------------ -* Added section describing ROMIO MPI_FILE_SYNC and MPI_FILE_CLOSE behavior to +* Added section describing ROMIO MPI_FILE_SYNC and MPI_FILE_CLOSE behavior to User's Guide * Bug removed from PVFS ADIO implementation regarding resize operations @@ -136,7 +136,7 @@ Major Changes in Version 1.2.3: Major Changes in Version 1.0.3: ------------------------------- -* When used with MPICH 1.2.1, the MPI-IO functions return proper error codes +* When used with MPICH 1.2.1, the MPI-IO functions return proper error codes and classes, and the status object is filled in. * On SGI's XFS file system, ROMIO can use direct I/O even if the @@ -144,12 +144,12 @@ Major Changes in Version 1.0.3: direct I/O. ROMIO does this by doing part of the request with buffered I/O (until all the restrictions are met) and doing the rest with direct I/O. (This feature hasn't been tested rigorously. Please - check for errors.) + check for errors.) By default, ROMIO will use only buffered I/O. Direct I/O can be enabled either by setting the environment variables MPIO_DIRECT_READ and/or MPIO_DIRECT_WRITE to TRUE, or on a per-file basis by using - the info keys "direct_read" and "direct_write". + the info keys "direct_read" and "direct_write". Direct I/O will result in higher performance only if you are accessing a high-bandwidth disk system. Otherwise, buffered I/O is @@ -166,51 +166,51 @@ Major Changes Version 1.0.2: components of the MPI I/O chapter not yet implemented are file interoperability and error handling. -* Added support for using "direct I/O" on SGI's XFS file system. +* Added support for using "direct I/O" on SGI's XFS file system. Direct I/O is an optional feature of XFS in which data is moved - directly between the user's buffer and the storage devices, bypassing - the file-system cache. This can improve performance significantly on + directly between the user's buffer and the storage devices, bypassing + the file-system cache. This can improve performance significantly on systems with high disk bandwidth. Without high disk bandwidth, regular I/O (that uses the file-system cache) perfoms better. ROMIO, therefore, does not use direct I/O by default. The user can turn on direct I/O (separately for reading and writing) either by - using environment variables or by using MPI's hints mechanism (info). + using environment variables or by using MPI's hints mechanism (info). To use the environment-variables method, do setenv MPIO_DIRECT_READ TRUE setenv MPIO_DIRECT_WRITE TRUE - To use the hints method, the two keys are "direct_read" and "direct_write". - By default their values are "false". To turn on direct I/O, set the values + To use the hints method, the two keys are "direct_read" and "direct_write". + By default their values are "false". To turn on direct I/O, set the values to "true". The environment variables have priority over the info keys. In other words, if the environment variables are set to TRUE, direct I/O - will be used even if the info keys say "false", and vice versa. - Note that direct I/O must be turned on separately for reading + will be used even if the info keys say "false", and vice versa. + Note that direct I/O must be turned on separately for reading and writing. The environment-variables method assumes that the environment variables can be read by each process in the MPI job. This is not guaranteed by the MPI Standard, but it works with SGI's MPI and the ch_shmem device of MPICH. -* Added support (new ADIO device, ad_pvfs) for the PVFS parallel +* Added support (new ADIO device, ad_pvfs) for the PVFS parallel file system for Linux clusters, developed at Clemson University (see http://www.parl.clemson.edu/pvfs ). To use it, you must first install - PVFS and then when configuring ROMIO, specify "-file_system=pvfs" in + PVFS and then when configuring ROMIO, specify "-file_system=pvfs" in addition to any other options to "configure". (As usual, you can configure - for multiple file systems by using "+"; for example, - "-file_system=pvfs+ufs+nfs".) You will need to specify the path - to the PVFS include files via the "-cflags" option to configure, + for multiple file systems by using "+"; for example, + "-file_system=pvfs+ufs+nfs".) You will need to specify the path + to the PVFS include files via the "-cflags" option to configure, for example, "configure -cflags=-I/usr/pvfs/include". You will also need to specify the full path name of the PVFS library. - The best way to do this is via the "-lib" option to MPICH's - configure script (assuming you are using ROMIO from within MPICH). + The best way to do this is via the "-lib" option to MPICH's + configure script (assuming you are using ROMIO from within MPICH). * Uses weak symbols (where available) for building the profiling version, i.e., the PMPI routines. As a result, the size of the library is reduced - considerably. + considerably. * The Makefiles use "virtual paths" if supported by the make utility. GNU make supports it, for example. This feature allows you to untar the distribution in some directory, say a slow NFS directory, - and compile the library (the .o files) in another + and compile the library (the .o files) in another directory, say on a faster local disk. For example, if the tar file has been untarred in an NFS directory called /home/thakur/romio, one can compile it in a different directory, say /tmp/thakur, as follows: @@ -228,8 +228,8 @@ Major Changes Version 1.0.2: * This version is included in MPICH 1.2.0. If you are using MPICH, you need not download ROMIO separately; it gets built as part of MPICH. - The previous version of ROMIO is included in LAM, HP MPI, SGI MPI, and - NEC MPI. NEC has also implemented the MPI-IO functions missing + The previous version of ROMIO is included in LAM, HP MPI, SGI MPI, and + NEC MPI. NEC has also implemented the MPI-IO functions missing in ROMIO, and therefore NEC MPI has a complete implementation of MPI-IO. @@ -242,10 +242,10 @@ Major Changes in Version 1.0.1: * Added support for NEC SX-4 and created a new device ad_sfs for NEC SFS file system. -* New devices ad_hfs for HP/Convex HFS file system and ad_xfs for +* New devices ad_hfs for HP/Convex HFS file system and ad_xfs for SGI XFS file system. -* Users no longer need to prefix the filename with the type of +* Users no longer need to prefix the filename with the type of file system; ROMIO determines the file-system type on its own. * Added support for 64-bit file sizes on IBM PIOFS, SGI XFS, @@ -254,27 +254,27 @@ Major Changes in Version 1.0.1: * MPI_Offset is an 8-byte integer on machines that support 8-byte integers. It is of type "long long" in C and "integer*8" in Fortran. With a Fortran 90 compiler, you can use either integer*8 or - integer(kind=MPI_OFFSET_KIND). - If you printf an MPI_Offset in C, remember to use %lld - or %ld as required by your compiler. (See what is used in the test + integer(kind=MPI_OFFSET_KIND). + If you printf an MPI_Offset in C, remember to use %lld + or %ld as required by your compiler. (See what is used in the test program romio/test/misc.c.) -* On some machines, ROMIO detects at configure time that "long long" is +* On some machines, ROMIO detects at configure time that "long long" is either not supported by the C compiler or it doesn't work properly. In such cases, configure sets MPI_Offset to long in C and integer in Fortran. This happens on Intel Paragon, Sun4, and FreeBSD. -* Added support for passing hints to the implementation via the MPI_Info +* Added support for passing hints to the implementation via the MPI_Info parameter. ROMIO understands the following hints (keys in MPI_Info object): - /* on all file systems */ + /* on all file systems */ cb_buffer_size - buffer size for collective I/O cb_nodes - no. of processes that actually perform I/O in collective I/O ind_rd_buffer_size - buffer size for data sieving in independent reads /* on all file systems except IBM PIOFS */ ind_wr_buffer_size - buffer size for data sieving in independent writes - /* ind_wr_buffer_size is ignored on PIOFS because data sieving + /* ind_wr_buffer_size is ignored on PIOFS because data sieving cannot be done for writes since PIOFS doesn't support file locking */ /* on Intel PFS and IBM PIOFS only. These hints are understood only if @@ -285,16 +285,16 @@ Major Changes in Version 1.0.1: striping (between 0 and (striping_factor-1)) /* on Intel PFS only. */ - pfs_svr_buf - turn on or off PFS server buffering by setting the value + pfs_svr_buf - turn on or off PFS server buffering by setting the value to "true" or "false", case-sensitive. - + If ROMIO doesn't understand a hint, or if the value is invalid, the hint - will be ignored. The values of hints being used by ROMIO at any time + will be ignored. The values of hints being used by ROMIO at any time can be obtained via MPI_File_get_info. -General Information +General Information ------------------- ROMIO is a high-performance, portable implementation of MPI-IO (the @@ -311,7 +311,7 @@ accessing arrays stored in files. The functions MPI_File_f2c and MPI_File_c2f (Sec. 4.12.4) are also implemented. C, Fortran, and profiling interfaces are provided for all functions -that have been implemented. +that have been implemented. Please read the limitations of this version of ROMIO that are listed below (e.g., MPIO_Request object, restriction to homogeneous @@ -326,7 +326,7 @@ SFS, PVFS, NFS, and any Unix file system (UFS). This version of ROMIO is included in MPICH 1.2.3; an earlier version is included in at least the following MPI implementations: LAM, HP -MPI, SGI MPI, and NEC MPI. +MPI, SGI MPI, and NEC MPI. Note that proper I/O error codes and classes are returned and the status variable is filled only when used with MPICH 1.2.1 or later. @@ -358,16 +358,16 @@ performance, but it is necessary for correct behavior. The following are some instructions we received from Ian Wells of HP for setting the noac option on NFS. We have not tried them -ourselves. We are including them here because you may find +ourselves. We are including them here because you may find them useful. Note that some of the steps may be specific to HP systems, and you may need root permission to execute some of the -commands. - +commands. + >1. first confirm you are running nfs version 3 > >rpcnfo -p `hostname` | grep nfs > - >ie + >ie > goedel >rpcinfo -p goedel | grep nfs > 100003 2 udp 2049 nfs > 100003 3 udp 2049 nfs @@ -379,11 +379,11 @@ commands. > Here is an example of a correct fstab entry for /epm1: > > ie grep epm1 /etc/fstab - > + > > ROOOOT 11>grep epm1 /etc/fstab > gershwin:/epm1 /rmt/gershwin/epm1 nfs bg,intr,noac 0 0 > - > if the noac option is not present, add it + > if the noac option is not present, add it > and then remount this directory > on each of the machines that will be used to share MPIO files > @@ -394,7 +394,7 @@ commands. > >3. Confirm that the directory is mounted noac: > - >ROOOOT >grep gershwin /etc/mnttab + >ROOOOT >grep gershwin /etc/mnttab >gershwin:/epm1 /rmt/gershwin/epm1 nfs >noac,acregmin=0,acregmax=0,acdirmin=0,acdirmax=0 0 0 899911504 @@ -424,8 +424,8 @@ THEN make Some example programs and a Makefile are provided in the romio/test directory. -Run the examples the way you would run any MPI program. Each program takes -the filename as a command-line argument "-fname filename". +Run the examples the way you would run any MPI program. Each program takes +the filename as a command-line argument "-fname filename". The configure script by default configures ROMIO for the file systems most likely to be used on the given machine. If you wish, you can @@ -440,7 +440,7 @@ For the entire list of options to configure do ./configure -h | more After building a specific version as above, you can install it in a -particular directory with +particular directory with make install PREFIX=/usr/local/romio (or whatever directory you like) @@ -448,14 +448,14 @@ or just make install (if you used -prefix at configure time) -If you intend to leave ROMIO where you built it, you should NOT install it -(install is used only to move the necessary parts of a built ROMIO to +If you intend to leave ROMIO where you built it, you should NOT install it +(install is used only to move the necessary parts of a built ROMIO to another location). The installed copy will have the include files, libraries, man pages, and a few other odds and ends, but not the whole source tree. It will have a test directory for testing the installation and a location-independent Makefile built during installation, which users can copy and modify to compile and link -against the installed copy. +against the installed copy. To rebuild ROMIO with a different set of configure options, do @@ -471,7 +471,7 @@ Testing ROMIO To test if the installation works, do - make testing + make testing in the romio/test directory. This calls a script that runs the test programs and compares the results with what they should be. By @@ -489,15 +489,15 @@ Compiling and Running MPI-IO Programs If ROMIO is not already included in the MPI implementation, you need to include the file mpio.h for C or mpiof.h for Fortran in your MPI-IO -program. +program. Note that on HP machines running HPUX and on NEC SX-4, you need to compile Fortran programs with mpifort, because the f77 compilers on -these machines don't support 8-byte integers. +these machines don't support 8-byte integers. -With MPICH, HP MPI, or NEC MPI, you can compile MPI-IO programs as +With MPICH, HP MPI, or NEC MPI, you can compile MPI-IO programs as mpicc foo.c -or +or mpif77 foo.f or mpifort foo.f @@ -505,14 +505,14 @@ or As mentioned above, mpifort is preferred over mpif77 on HPUX and NEC because the f77 compilers on those machines do not support 8-byte integers. -With SGI MPI, you can compile MPI-IO programs as +With SGI MPI, you can compile MPI-IO programs as cc foo.c -lmpi or f77 foo.f -lmpi or f90 foo.f -lmpi -With LAM, you can compile MPI-IO programs as +With LAM, you can compile MPI-IO programs as hcc foo.c -lmpi or hf77 foo.f -lmpi @@ -530,7 +530,7 @@ an SGI machine, make sure that you use MPICH's mpirun and not SGI's mpirun. The Makefile in the romio/test directory illustrates how to compile -and link MPI-IO programs. +and link MPI-IO programs. @@ -552,7 +552,7 @@ int MPIO_Test(MPIO_Request *request, int *flag, MPI_Status *status); int MPIO_Wait(MPIO_Request *request, MPI_Status *status); The usual functions MPI_Test, MPI_Wait, MPI_Testany, etc., will not -work for nonblocking I/O. +work for nonblocking I/O. * This version works only on a homogeneous cluster of machines, and only the "native" file data representation is supported. @@ -582,11 +582,11 @@ message from SGI MPI: ``MPI has run out of internal datatype entries. Please set the environment variable MPI_TYPE_MAX for additional space.'' If you get this error message, add this line to your .cshrc file: - setenv MPI_TYPE_MAX 65536 + setenv MPI_TYPE_MAX 65536 Use a larger number if you still get the error message. * If a Fortran program uses a file handle created using ROMIO's C -interface, or vice-versa, you must use the functions MPI_File_c2f +interface, or vice-versa, you must use the functions MPI_File_c2f or MPI_File_f2c. Such a situation occurs, for example, if a Fortran program uses an I/O library written in C with MPI-IO calls. Similar functions MPIO_Request_f2c and @@ -595,12 +595,12 @@ MPIO_Request_c2f are also provided. * For Fortran programs on the Intel Paragon, you may need to provide the complete path to mpif.h in the include statement, e.g., include '/usr/local/mpich/include/mpif.h' -instead of +instead of include 'mpif.h' This is because the -I option to the Paragon Fortran compiler if77 -doesn't work correctly. It always looks in the default directories first +doesn't work correctly. It always looks in the default directories first and, therefore, picks up Intel's mpif.h, which is actually the -mpif.h of an older version of MPICH. +mpif.h of an older version of MPICH. diff --git a/ompi/mca/io/romio314/romio/adio/ad_gpfs/ad_gpfs.c b/ompi/mca/io/romio314/romio/adio/ad_gpfs/ad_gpfs.c index 4be147f49a5..ce80672dc08 100644 --- a/ompi/mca/io/romio314/romio/adio/ad_gpfs/ad_gpfs.c +++ b/ompi/mca/io/romio314/romio/adio/ad_gpfs/ad_gpfs.c @@ -7,8 +7,8 @@ */ /* -*- Mode: C; c-basic-offset:4 ; -*- */ -/* - * Copyright (C) 2001 University of Chicago. +/* + * Copyright (C) 2001 University of Chicago. * See COPYRIGHT notice in top-level directory. */ #include "ad_gpfs.h" diff --git a/ompi/mca/io/romio314/romio/adio/ad_gpfs/ad_gpfs.h b/ompi/mca/io/romio314/romio/adio/ad_gpfs/ad_gpfs.h index 81fb076c339..1bff707491d 100644 --- a/ompi/mca/io/romio314/romio/adio/ad_gpfs/ad_gpfs.h +++ b/ompi/mca/io/romio314/romio/adio/ad_gpfs/ad_gpfs.h @@ -7,8 +7,8 @@ */ /* -*- Mode: C; c-basic-offset:4 ; -*- */ -/* - * Copyright (C) 1997 University of Chicago. +/* + * Copyright (C) 1997 University of Chicago. * See COPYRIGHT notice in top-level directory. */ diff --git a/ompi/mca/io/romio314/romio/adio/ad_gpfs/ad_gpfs_aggrs.h b/ompi/mca/io/romio314/romio/adio/ad_gpfs/ad_gpfs_aggrs.h index 1b6215e576f..cd8dce90614 100644 --- a/ompi/mca/io/romio314/romio/adio/ad_gpfs/ad_gpfs_aggrs.h +++ b/ompi/mca/io/romio314/romio/adio/ad_gpfs/ad_gpfs_aggrs.h @@ -1,86 +1,86 @@ -/* ---------------------------------------------------------------- */ -/* (C)Copyright IBM Corp. 2007, 2008 */ -/* ---------------------------------------------------------------- */ -/** - * \file ad_gpfs_aggrs.h - * \brief ??? - */ - -/* - * File: ad_gpfs_aggrs.h - * - * Declares functions optimized specifically for GPFS parallel I/O solution. - * - */ - -#ifndef AD_GPFS_AGGRS_H_ -#define AD_GPFS_AGGRS_H_ - -#include "adio.h" -#include +/* ---------------------------------------------------------------- */ +/* (C)Copyright IBM Corp. 2007, 2008 */ +/* ---------------------------------------------------------------- */ +/** + * \file ad_gpfs_aggrs.h + * \brief ??? + */ + +/* + * File: ad_gpfs_aggrs.h + * + * Declares functions optimized specifically for GPFS parallel I/O solution. + * + */ + +#ifndef AD_GPFS_AGGRS_H_ +#define AD_GPFS_AGGRS_H_ + +#include "adio.h" +#include #ifdef HAVE_GPFS_H #include #endif - - - /* overriding ADIOI_Calc_file_domains() to apply 'aligned file domain partitioning'. */ - void ADIOI_GPFS_Calc_file_domains(ADIO_File fd, - ADIO_Offset *st_offsets, - ADIO_Offset *end_offsets, - int nprocs, - int nprocs_for_coll, - ADIO_Offset *min_st_offset_ptr, - ADIO_Offset **fd_start_ptr, - ADIO_Offset **fd_end_ptr, - ADIO_Offset *fd_size_ptr, - void *fs_ptr); - - /* overriding ADIOI_Calc_aggregator() for the default implementation is specific for - static file domain partitioning */ - int ADIOI_GPFS_Calc_aggregator(ADIO_File fd, - ADIO_Offset off, - ADIO_Offset min_off, - ADIO_Offset *len, - ADIO_Offset fd_size, - ADIO_Offset *fd_start, - ADIO_Offset *fd_end); - - /* overriding ADIOI_Calc_my_req for the default implementation is specific for - static file domain partitioning */ - void ADIOI_GPFS_Calc_my_req ( ADIO_File fd, ADIO_Offset *offset_list, ADIO_Offset *len_list, - int contig_access_count, ADIO_Offset - min_st_offset, ADIO_Offset *fd_start, - ADIO_Offset *fd_end, ADIO_Offset fd_size, - int nprocs, - int *count_my_req_procs_ptr, - int **count_my_req_per_proc_ptr, - ADIOI_Access **my_req_ptr, - int **buf_idx_ptr); - - /* - * ADIOI_Calc_others_req - * - * param[in] count_my_req_procs Number of processes whose file domain my - * request touches. - * param[in] count_my_req_per_proc count_my_req_per_proc[i] gives the no. of - * contig. requests of this process in - * process i's file domain. - * param[in] my_req A structure defining my request - * param[in] nprocs Number of nodes in the block - * param[in] myrank Rank of this node - * param[out] count_others_req_proc_ptr Number of processes whose requests lie in - * my process's file domain (including my - * process itself) - * param[out] others_req_ptr Array of other process' requests that lie - * in my process's file domain - */ - void ADIOI_GPFS_Calc_others_req(ADIO_File fd, int count_my_req_procs, - int *count_my_req_per_proc, - ADIOI_Access *my_req, - int nprocs, int myrank, - int *count_others_req_procs_ptr, - ADIOI_Access **others_req_ptr); - - -#endif /* AD_GPFS_AGGRS_H_ */ + + + /* overriding ADIOI_Calc_file_domains() to apply 'aligned file domain partitioning'. */ + void ADIOI_GPFS_Calc_file_domains(ADIO_File fd, + ADIO_Offset *st_offsets, + ADIO_Offset *end_offsets, + int nprocs, + int nprocs_for_coll, + ADIO_Offset *min_st_offset_ptr, + ADIO_Offset **fd_start_ptr, + ADIO_Offset **fd_end_ptr, + ADIO_Offset *fd_size_ptr, + void *fs_ptr); + + /* overriding ADIOI_Calc_aggregator() for the default implementation is specific for + static file domain partitioning */ + int ADIOI_GPFS_Calc_aggregator(ADIO_File fd, + ADIO_Offset off, + ADIO_Offset min_off, + ADIO_Offset *len, + ADIO_Offset fd_size, + ADIO_Offset *fd_start, + ADIO_Offset *fd_end); + + /* overriding ADIOI_Calc_my_req for the default implementation is specific for + static file domain partitioning */ + void ADIOI_GPFS_Calc_my_req ( ADIO_File fd, ADIO_Offset *offset_list, ADIO_Offset *len_list, + int contig_access_count, ADIO_Offset + min_st_offset, ADIO_Offset *fd_start, + ADIO_Offset *fd_end, ADIO_Offset fd_size, + int nprocs, + int *count_my_req_procs_ptr, + int **count_my_req_per_proc_ptr, + ADIOI_Access **my_req_ptr, + int **buf_idx_ptr); + + /* + * ADIOI_Calc_others_req + * + * param[in] count_my_req_procs Number of processes whose file domain my + * request touches. + * param[in] count_my_req_per_proc count_my_req_per_proc[i] gives the no. of + * contig. requests of this process in + * process i's file domain. + * param[in] my_req A structure defining my request + * param[in] nprocs Number of nodes in the block + * param[in] myrank Rank of this node + * param[out] count_others_req_proc_ptr Number of processes whose requests lie in + * my process's file domain (including my + * process itself) + * param[out] others_req_ptr Array of other process' requests that lie + * in my process's file domain + */ + void ADIOI_GPFS_Calc_others_req(ADIO_File fd, int count_my_req_procs, + int *count_my_req_per_proc, + ADIOI_Access *my_req, + int nprocs, int myrank, + int *count_others_req_procs_ptr, + ADIOI_Access **others_req_ptr); + + +#endif /* AD_GPFS_AGGRS_H_ */ diff --git a/ompi/mca/io/romio314/romio/adio/ad_gpfs/ad_gpfs_close.c b/ompi/mca/io/romio314/romio/adio/ad_gpfs/ad_gpfs_close.c index f8a41671a21..c053700ccdd 100644 --- a/ompi/mca/io/romio314/romio/adio/ad_gpfs/ad_gpfs_close.c +++ b/ompi/mca/io/romio314/romio/adio/ad_gpfs/ad_gpfs_close.c @@ -7,8 +7,8 @@ */ /* -*- Mode: C; c-basic-offset:4 ; -*- */ -/* - * Copyright (C) 1997 University of Chicago. +/* + * Copyright (C) 1997 University of Chicago. * See COPYRIGHT notice in top-level directory. */ diff --git a/ompi/mca/io/romio314/romio/adio/ad_gpfs/ad_gpfs_flush.c b/ompi/mca/io/romio314/romio/adio/ad_gpfs/ad_gpfs_flush.c index 555002f639d..6ff28e348c5 100644 --- a/ompi/mca/io/romio314/romio/adio/ad_gpfs/ad_gpfs_flush.c +++ b/ompi/mca/io/romio314/romio/adio/ad_gpfs/ad_gpfs_flush.c @@ -7,9 +7,9 @@ */ /* -*- Mode: C; c-basic-offset:4 ; -*- */ -/* +/* * - * Copyright (C) 1997 University of Chicago. + * Copyright (C) 1997 University of Chicago. * See COPYRIGHT notice in top-level directory. */ diff --git a/ompi/mca/io/romio314/romio/adio/ad_gpfs/ad_gpfs_open.c b/ompi/mca/io/romio314/romio/adio/ad_gpfs/ad_gpfs_open.c index f4fef37c85c..31dc675ce9e 100644 --- a/ompi/mca/io/romio314/romio/adio/ad_gpfs/ad_gpfs_open.c +++ b/ompi/mca/io/romio314/romio/adio/ad_gpfs/ad_gpfs_open.c @@ -7,8 +7,8 @@ */ /* -*- Mode: C; c-basic-offset:4 ; -*- */ -/* - * Copyright (C) 1997 University of Chicago. +/* + * Copyright (C) 1997 University of Chicago. * See COPYRIGHT notice in top-level directory. */ @@ -151,6 +151,6 @@ void ADIOI_GPFS_Open(ADIO_File fd, int *error_code) } else *error_code = MPI_SUCCESS; } -/* - *vim: ts=8 sts=4 sw=4 noexpandtab +/* + *vim: ts=8 sts=4 sw=4 noexpandtab */ diff --git a/ompi/mca/io/romio314/romio/adio/ad_gpfs/ad_gpfs_rdcoll.c b/ompi/mca/io/romio314/romio/adio/ad_gpfs/ad_gpfs_rdcoll.c index c2cad8bf8be..83fe227f074 100644 --- a/ompi/mca/io/romio314/romio/adio/ad_gpfs/ad_gpfs_rdcoll.c +++ b/ompi/mca/io/romio314/romio/adio/ad_gpfs/ad_gpfs_rdcoll.c @@ -7,9 +7,9 @@ */ /* -*- Mode: C; c-basic-offset:4 ; -*- */ -/* +/* * - * Copyright (C) 1997 University of Chicago. + * Copyright (C) 1997 University of Chicago. * See COPYRIGHT notice in top-level directory. */ @@ -34,7 +34,7 @@ static void ADIOI_Read_and_exch(ADIO_File fd, void *buf, MPI_Datatype datatype, int nprocs, int myrank, ADIOI_Access *others_req, ADIO_Offset *offset_list, - ADIO_Offset *len_list, int contig_access_count, + ADIO_Offset *len_list, int contig_access_count, ADIO_Offset min_st_offset, ADIO_Offset fd_size, ADIO_Offset *fd_start, ADIO_Offset *fd_end, @@ -42,16 +42,16 @@ static void ADIOI_Read_and_exch(ADIO_File fd, void *buf, MPI_Datatype static void ADIOI_R_Exchange_data(ADIO_File fd, void *buf, ADIOI_Flatlist_node *flat_buf, ADIO_Offset *offset_list, ADIO_Offset *len_list, int *send_size, int *recv_size, - int *count, int *start_pos, - int *partial_send, - int *recd_from_proc, int nprocs, + int *count, int *start_pos, + int *partial_send, + int *recd_from_proc, int nprocs, int myrank, int buftype_is_contig, int contig_access_count, - ADIO_Offset min_st_offset, + ADIO_Offset min_st_offset, ADIO_Offset fd_size, - ADIO_Offset *fd_start, ADIO_Offset *fd_end, - ADIOI_Access *others_req, - int iter, + ADIO_Offset *fd_start, ADIO_Offset *fd_end, + ADIOI_Access *others_req, + int iter, MPI_Aint buftype_extent, int *buf_idx); static void ADIOI_R_Exchange_data_alltoallv(ADIO_File fd, void *buf, ADIOI_Flatlist_node *flat_buf, ADIO_Offset *offset_list, ADIO_Offset @@ -68,14 +68,14 @@ static void ADIOI_R_Exchange_data_alltoallv(ADIO_File fd, void *buf, ADIOI_Flatl int iter, MPI_Aint buftype_extent, int *buf_idx); static void ADIOI_Fill_user_buffer(ADIO_File fd, void *buf, ADIOI_Flatlist_node - *flat_buf, char **recv_buf, ADIO_Offset - *offset_list, ADIO_Offset *len_list, - unsigned *recv_size, + *flat_buf, char **recv_buf, ADIO_Offset + *offset_list, ADIO_Offset *len_list, + unsigned *recv_size, MPI_Request *requests, MPI_Status *statuses, int *recd_from_proc, int nprocs, - int contig_access_count, - ADIO_Offset min_st_offset, - ADIO_Offset fd_size, ADIO_Offset *fd_start, + int contig_access_count, + ADIO_Offset min_st_offset, + ADIO_Offset fd_size, ADIO_Offset *fd_start, ADIO_Offset *fd_end, MPI_Aint buftype_extent); @@ -94,15 +94,15 @@ void ADIOI_GPFS_ReadStridedColl(ADIO_File fd, void *buf, int count, *error_code) { /* Uses a generalized version of the extended two-phase method described - in "An Extended Two-Phase Method for Accessing Sections of + in "An Extended Two-Phase Method for Accessing Sections of Out-of-Core Arrays", Rajeev Thakur and Alok Choudhary, - Scientific Programming, (5)4:301--317, Winter 1996. + Scientific Programming, (5)4:301--317, Winter 1996. http://www.mcs.anl.gov/home/thakur/ext2ph.ps */ - ADIOI_Access *my_req; + ADIOI_Access *my_req; /* array of nprocs structures, one for each other process in whose file domain this process's request lies */ - + ADIOI_Access *others_req; /* array of nprocs structures, one for each other process whose request lies in this process's file domain. */ @@ -127,7 +127,7 @@ void ADIOI_GPFS_ReadStridedColl(ADIO_File fd, void *buf, int count, #if 0 /* From common code - not implemented for bg. */ if (fd->hints->cb_pfr != ADIOI_HINT_DISABLE) { - ADIOI_IOStridedColl (fd, buf, count, ADIOI_READ, datatype, + ADIOI_IOStridedColl (fd, buf, count, ADIOI_READ, datatype, file_ptr_type, offset, status, error_code); return; } */ @@ -162,15 +162,15 @@ void ADIOI_GPFS_ReadStridedColl(ADIO_File fd, void *buf, int count, #ifdef RDCOLL_DEBUG for (i=0; ihints->cb_read == ADIOI_HINT_DISABLE - || (!interleave_count && (fd->hints->cb_read == ADIOI_HINT_AUTO))) + || (!interleave_count && (fd->hints->cb_read == ADIOI_HINT_AUTO))) { /* don't do aggregation */ if (fd->hints->cb_read != ADIOI_HINT_DISABLE) { @@ -267,7 +267,7 @@ void ADIOI_GPFS_ReadStridedColl(ADIO_File fd, void *buf, int count, ADIOI_Calc_file_domains(st_offsets, end_offsets, nprocs, nprocs_for_coll, &min_st_offset, &fd_start, &fd_end, - fd->hints->min_fdomain_size, &fd_size, + fd->hints->min_fdomain_size, &fd_size, fd->hints->striping_unit); GPFSMPIO_T_CIO_SET_GET( r, 1, 1, GPFSMPIO_CIO_T_MYREQ, GPFSMPIO_CIO_T_FD_PART ); @@ -304,7 +304,7 @@ void ADIOI_GPFS_ReadStridedColl(ADIO_File fd, void *buf, int count, } } - /* calculate where the portions of the access requests of this process + /* calculate where the portions of the access requests of this process * are located in terms of the file domains. this could be on the same * process or on other processes. this function fills in: * count_my_req_procs - number of processes (including this one) for which @@ -319,13 +319,13 @@ void ADIOI_GPFS_ReadStridedColl(ADIO_File fd, void *buf, int count, if (gpfsmpio_tuneblocking) ADIOI_GPFS_Calc_my_req(fd, offset_list, len_list, contig_access_count, min_st_offset, fd_start, fd_end, fd_size, - nprocs, &count_my_req_procs, + nprocs, &count_my_req_procs, &count_my_req_per_proc, &my_req, &buf_idx); else ADIOI_Calc_my_req(fd, offset_list, len_list, contig_access_count, min_st_offset, fd_start, fd_end, fd_size, - nprocs, &count_my_req_procs, + nprocs, &count_my_req_procs, &count_my_req_per_proc, &my_req, &buf_idx); @@ -345,15 +345,15 @@ void ADIOI_GPFS_ReadStridedColl(ADIO_File fd, void *buf, int count, &others_req); else - ADIOI_Calc_others_req(fd, count_my_req_procs, - count_my_req_per_proc, my_req, - nprocs, myrank, &count_others_req_procs, - &others_req); + ADIOI_Calc_others_req(fd, count_my_req_procs, + count_my_req_per_proc, my_req, + nprocs, myrank, &count_others_req_procs, + &others_req); GPFSMPIO_T_CIO_SET_GET( r, 1, 1, GPFSMPIO_CIO_T_DEXCH, GPFSMPIO_CIO_T_OTHREQ ) - /* my_req[] and count_my_req_per_proc aren't needed at this point, so - * let's free the memory + /* my_req[] and count_my_req_per_proc aren't needed at this point, so + * let's free the memory */ ADIOI_Free(count_my_req_per_proc); for (i=0; icomm); + MPI_Allreduce(&ntimes, &max_ntimes, 1, MPI_INT, MPI_MAX, fd->comm); read_buf = fd->io_buf; - curr_offlen_ptr = (int *) ADIOI_Calloc(nprocs, sizeof(int)); + curr_offlen_ptr = (int *) ADIOI_Calloc(nprocs, sizeof(int)); /* its use is explained below. calloc initializes to 0. */ count = (int *) ADIOI_Malloc(nprocs * sizeof(int)); @@ -498,7 +498,7 @@ static void ADIOI_Read_and_exch(ADIO_File fd, void *buf, MPI_Datatype in an iteration. */ partial_send = (int *) ADIOI_Calloc(nprocs, sizeof(int)); - /* if only a portion of the last off-len pair is sent to a process + /* if only a portion of the last off-len pair is sent to a process in a particular iteration, the length sent is stored here. calloc initializes to 0. */ @@ -514,7 +514,7 @@ static void ADIOI_Read_and_exch(ADIO_File fd, void *buf, MPI_Datatype ADIOI_Fill_user_buffer. initialized to 0 here. */ start_pos = (int *) ADIOI_Malloc(nprocs*sizeof(int)); - /* used to store the starting value of curr_offlen_ptr[i] in + /* used to store the starting value of curr_offlen_ptr[i] in this iteration */ ADIOI_Datatype_iscontig(datatype, &buftype_is_contig); @@ -540,9 +540,9 @@ static void ADIOI_Read_and_exch(ADIO_File fd, void *buf, MPI_Datatype /* go through all others_req and check if any are satisfied by the current read */ - /* since MPI guarantees that displacements in filetypes are in + /* since MPI guarantees that displacements in filetypes are in monotonically nondecreasing order, I can maintain a pointer - (curr_offlen_ptr) to + (curr_offlen_ptr) to current off-len pair for each process in others_req and scan further only from there. There is still a problem of filetypes such as: (1, 2, 3 are not process nos. They are just numbers for @@ -552,7 +552,7 @@ static void ADIOI_Read_and_exch(ADIO_File fd, void *buf, MPI_Datatype 2 -----!---- 3 --!----- - where ! indicates where the current read_size limitation cuts + where ! indicates where the current read_size limitation cuts through the filetype. I resolve this by reading up to !, but filling the communication buffer only for 1. I copy the portion left over for 2 into a tmp_buf for use in the next @@ -561,22 +561,22 @@ static void ADIOI_Read_and_exch(ADIO_File fd, void *buf, MPI_Datatype other end, as only one off-len pair with incomplete data will be sent. I also don't need to send the individual offsets and lens along with the data, as the data is being - sent in a particular order. */ + sent in a particular order. */ - /* off = start offset in the file for the data actually read in - this iteration + /* off = start offset in the file for the data actually read in + this iteration size = size of data read corresponding to off real_off = off minus whatever data was retained in memory from previous iteration for cases like 2, 3 illustrated above real_size = size plus the extra corresponding to real_off - req_off = off in file for a particular contiguous request + req_off = off in file for a particular contiguous request minus what was satisfied in previous iteration req_size = size corresponding to req_off */ #ifdef PROFILE MPE_Log_event(13, 0, "start computation"); #endif - size = ADIOI_MIN((unsigned)coll_bufsize, end_loc-st_loc+1-done); + size = ADIOI_MIN((unsigned)coll_bufsize, end_loc-st_loc+1-done); real_off = off - for_curr_iter; real_size = size + for_curr_iter; @@ -585,7 +585,7 @@ static void ADIOI_Read_and_exch(ADIO_File fd, void *buf, MPI_Datatype for (i=0; icomm, requests+j); j++; buf_idx[i] += recv_size[i]; @@ -802,19 +802,19 @@ static void ADIOI_R_Exchange_data(ADIO_File fd, void *buf, ADIOI_Flatlist_node else { /* allocate memory for recv_buf and post receives */ recv_buf = (char **) ADIOI_Malloc(nprocs * sizeof(char*)); - for (i=0; i < nprocs; i++) - if (recv_size[i]) recv_buf[i] = + for (i=0; i < nprocs; i++) + if (recv_size[i]) recv_buf[i] = (char *) ADIOI_Malloc(recv_size[i]); j = 0; - for (i=0; i < nprocs; i++) + for (i=0; i < nprocs; i++) if (recv_size[i]) { - MPI_Irecv(recv_buf[i], recv_size[i], MPI_BYTE, i, + MPI_Irecv(recv_buf[i], recv_size[i], MPI_BYTE, i, myrank+i+100*iter, fd->comm, requests+j); j++; #ifdef RDCOLL_DEBUG - DBG_FPRINTF(stderr, "node %d, recv_size %d, tag %d \n", - myrank, recv_size[i], myrank+i+100*iter); + DBG_FPRINTF(stderr, "node %d, recv_size %d, tag %d \n", + myrank, recv_size[i], myrank+i+100*iter); #endif } } @@ -832,7 +832,7 @@ static void ADIOI_R_Exchange_data(ADIO_File fd, void *buf, ADIOI_Flatlist_node } ADIOI_Type_create_hindexed_x(count[i], &(others_req[i].lens[start_pos[i]]), - &(others_req[i].mem_ptrs[start_pos[i]]), + &(others_req[i].mem_ptrs[start_pos[i]]), MPI_BYTE, &send_type); /* absolute displacement; use MPI_BOTTOM in send */ MPI_Type_commit(&send_type); @@ -845,7 +845,7 @@ static void ADIOI_R_Exchange_data(ADIO_File fd, void *buf, ADIOI_Flatlist_node } statuses = (MPI_Status *) ADIOI_Malloc((nprocs_send+nprocs_recv+1) * \ - sizeof(MPI_Status)); + sizeof(MPI_Status)); /* +1 to avoid a 0-size malloc */ /* wait on the receives */ @@ -858,10 +858,10 @@ static void ADIOI_R_Exchange_data(ADIO_File fd, void *buf, ADIOI_Flatlist_node #endif /* if noncontiguous, to the copies from the recv buffers */ - if (!buftype_is_contig) + if (!buftype_is_contig) ADIOI_Fill_user_buffer(fd, buf, flat_buf, recv_buf, - offset_list, len_list, (unsigned*)recv_size, - requests, statuses, recd_from_proc, + offset_list, len_list, (unsigned*)recv_size, + requests, statuses, recd_from_proc, nprocs, contig_access_count, min_st_offset, fd_size, fd_start, fd_end, buftype_extent); @@ -874,7 +874,7 @@ static void ADIOI_R_Exchange_data(ADIO_File fd, void *buf, ADIOI_Flatlist_node ADIOI_Free(requests); if (!buftype_is_contig) { - for (i=0; i < nprocs; i++) + for (i=0; i < nprocs; i++) if (recv_size[i]) ADIOI_Free(recv_buf[i]); ADIOI_Free(recv_buf); } @@ -932,14 +932,14 @@ static void ADIOI_R_Exchange_data(ADIO_File fd, void *buf, ADIOI_Flatlist_node } static void ADIOI_Fill_user_buffer(ADIO_File fd, void *buf, ADIOI_Flatlist_node - *flat_buf, char **recv_buf, ADIO_Offset - *offset_list, ADIO_Offset *len_list, - unsigned *recv_size, + *flat_buf, char **recv_buf, ADIO_Offset + *offset_list, ADIO_Offset *len_list, + unsigned *recv_size, MPI_Request *requests, MPI_Status *statuses, int *recd_from_proc, int nprocs, - int contig_access_count, - ADIO_Offset min_st_offset, - ADIO_Offset fd_size, ADIO_Offset *fd_start, + int contig_access_count, + ADIO_Offset min_st_offset, + ADIO_Offset fd_size, ADIO_Offset *fd_start, ADIO_Offset *fd_end, MPI_Aint buftype_extent) { @@ -958,9 +958,9 @@ static void ADIOI_Fill_user_buffer(ADIO_File fd, void *buf, ADIOI_Flatlist_node /* curr_from_proc[p] = amount of data recd from proc. p that has already been accounted for so far - done_from_proc[p] = amount of data already recd from proc. p and + done_from_proc[p] = amount of data already recd from proc. p and filled into user buffer in previous iterations - user_buf_idx = current location in user buffer + user_buf_idx = current location in user buffer recv_buf_idx[p] = current location in recv_buf of proc. p */ curr_from_proc = (unsigned *) ADIOI_Malloc(nprocs * sizeof(unsigned)); done_from_proc = (unsigned *) ADIOI_Malloc(nprocs * sizeof(unsigned)); @@ -977,10 +977,10 @@ static void ADIOI_Fill_user_buffer(ADIO_File fd, void *buf, ADIOI_Flatlist_node flat_buf_sz = flat_buf->blocklens[0]; /* flat_buf_idx = current index into flattened buftype - flat_buf_sz = size of current contiguous component in + flat_buf_sz = size of current contiguous component in flattened buf */ - for (i=0; i done_from_proc[p]) { if (done_from_proc[p] > curr_from_proc[p]) { - size = ADIOI_MIN(curr_from_proc[p] + len - + size = ADIOI_MIN(curr_from_proc[p] + len - done_from_proc[p], recv_size[p]-recv_buf_idx[p]); buf_incr = done_from_proc[p] - curr_from_proc[p]; ADIOI_BUF_INCR @@ -1034,7 +1034,7 @@ static void ADIOI_Fill_user_buffer(ADIO_File fd, void *buf, ADIOI_Flatlist_node rem_len -= len; } } - for (i=0; i < nprocs; i++) + for (i=0; i < nprocs; i++) if (recv_size[i]) recd_from_proc[i] = curr_from_proc[i]; ADIOI_Free(curr_from_proc); @@ -1045,16 +1045,16 @@ static void ADIOI_Fill_user_buffer(ADIO_File fd, void *buf, ADIOI_Flatlist_node static void ADIOI_R_Exchange_data_alltoallv( ADIO_File fd, void *buf, ADIOI_Flatlist_node *flat_buf, ADIO_Offset *offset_list, ADIO_Offset - *len_list, int *send_size, int *recv_size, + *len_list, int *send_size, int *recv_size, int *count, int *start_pos, int *partial_send, int *recd_from_proc, int nprocs, int myrank, int buftype_is_contig, int contig_access_count, ADIO_Offset min_st_offset, ADIO_Offset fd_size, - ADIO_Offset *fd_start, ADIO_Offset *fd_end, + ADIO_Offset *fd_start, ADIO_Offset *fd_end, ADIOI_Access *others_req, int iter, MPI_Aint buftype_extent, int *buf_idx) -{ +{ int i, j, k=0, tmp=0, nprocs_recv, nprocs_send; char **recv_buf = NULL; MPI_Request *requests=NULL; @@ -1068,13 +1068,13 @@ static void ADIOI_R_Exchange_data_alltoallv( /* exchange send_size info so that each process knows how much to receive from whom and how much memory to allocate. */ MPI_Alltoall(send_size, 1, MPI_INT, recv_size, 1, MPI_INT, fd->comm); - + nprocs_recv = 0; for (i=0; icomm ); + fd->comm ); #if 0 DBG_FPRINTF(stderr, "\tall_recv_buf = " ); for (i=131072; i<131073; i++) { DBG_FPRINTF(stderr, "%2d,", all_recv_buf [i] ); } DBG_FPRINTF(stderr, "\n" ); #endif - + /* unpack at the receiver side */ - if (nprocs_recv) { + if (nprocs_recv) { if (!buftype_is_contig) ADIOI_Fill_user_buffer(fd, buf, flat_buf, recv_buf, offset_list, len_list, (unsigned*)recv_size, @@ -1161,11 +1161,11 @@ static void ADIOI_R_Exchange_data_alltoallv( } } } - + ADIOI_Free( all_send_buf ); ADIOI_Free( all_recv_buf ); ADIOI_Free( recv_buf ); ADIOI_Free( sdispls ); ADIOI_Free( rdispls ); - return; -} + return; +} diff --git a/ompi/mca/io/romio314/romio/adio/ad_gpfs/ad_gpfs_wrcoll.c b/ompi/mca/io/romio314/romio/adio/ad_gpfs/ad_gpfs_wrcoll.c index 968e6e6591b..e83b643bb8a 100644 --- a/ompi/mca/io/romio314/romio/adio/ad_gpfs/ad_gpfs_wrcoll.c +++ b/ompi/mca/io/romio314/romio/adio/ad_gpfs/ad_gpfs_wrcoll.c @@ -7,8 +7,8 @@ */ /* -*- Mode: C; c-basic-offset:4 ; -*- */ -/* - * Copyright (C) 1997 University of Chicago. +/* + * Copyright (C) 1997 University of Chicago. * See COPYRIGHT notice in top-level directory. */ @@ -47,32 +47,32 @@ static void ADIOI_Exch_and_write(ADIO_File fd, const void *buf, MPI_Datatype ADIO_Offset *fd_start, ADIO_Offset *fd_end, int *buf_idx, int *error_code); static void ADIOI_W_Exchange_data(ADIO_File fd, const void *buf, char *write_buf, - ADIOI_Flatlist_node *flat_buf, ADIO_Offset - *offset_list, ADIO_Offset *len_list, int *send_size, + ADIOI_Flatlist_node *flat_buf, ADIO_Offset + *offset_list, ADIO_Offset *len_list, int *send_size, int *recv_size, ADIO_Offset off, int size, - int *count, int *start_pos, int *partial_recv, - int *sent_to_proc, int nprocs, + int *count, int *start_pos, int *partial_recv, + int *sent_to_proc, int nprocs, int myrank, int buftype_is_contig, int contig_access_count, ADIO_Offset min_st_offset, ADIO_Offset fd_size, - ADIO_Offset *fd_start, ADIO_Offset *fd_end, - ADIOI_Access *others_req, + ADIO_Offset *fd_start, ADIO_Offset *fd_end, + ADIOI_Access *others_req, int *send_buf_idx, int *curr_to_proc, - int *done_to_proc, int *hole, int iter, + int *done_to_proc, int *hole, int iter, MPI_Aint buftype_extent, int *buf_idx, int *error_code); static void ADIOI_W_Exchange_data_alltoallv( - ADIO_File fd, const void *buf, + ADIO_File fd, const void *buf, char *write_buf, /* 1 */ - ADIOI_Flatlist_node *flat_buf, - ADIO_Offset *offset_list, - ADIO_Offset *len_list, int *send_size, int *recv_size, + ADIOI_Flatlist_node *flat_buf, + ADIO_Offset *offset_list, + ADIO_Offset *len_list, int *send_size, int *recv_size, ADIO_Offset off, int size, /* 2 */ int *count, int *start_pos, int *partial_recv, - int *sent_to_proc, int nprocs, int myrank, + int *sent_to_proc, int nprocs, int myrank, int buftype_is_contig, int contig_access_count, ADIO_Offset min_st_offset, ADIO_Offset fd_size, - ADIO_Offset *fd_start, + ADIO_Offset *fd_start, ADIO_Offset *fd_end, ADIOI_Access *others_req, int *send_buf_idx, int *curr_to_proc, /* 3 */ @@ -80,28 +80,28 @@ static void ADIOI_W_Exchange_data_alltoallv( int iter, MPI_Aint buftype_extent, int *buf_idx, int *error_code); static void ADIOI_Fill_send_buffer(ADIO_File fd, const void *buf, ADIOI_Flatlist_node - *flat_buf, char **send_buf, ADIO_Offset - *offset_list, ADIO_Offset *len_list, int *send_size, - MPI_Request *requests, int *sent_to_proc, - int nprocs, int myrank, + *flat_buf, char **send_buf, ADIO_Offset + *offset_list, ADIO_Offset *len_list, int *send_size, + MPI_Request *requests, int *sent_to_proc, + int nprocs, int myrank, int contig_access_count, ADIO_Offset min_st_offset, ADIO_Offset fd_size, - ADIO_Offset *fd_start, ADIO_Offset *fd_end, - int *send_buf_idx, int *curr_to_proc, - int *done_to_proc, int iter, + ADIO_Offset *fd_start, ADIO_Offset *fd_end, + int *send_buf_idx, int *curr_to_proc, + int *done_to_proc, int iter, MPI_Aint buftype_extent); static void ADIOI_Fill_send_buffer_nosend(ADIO_File fd, const void *buf, ADIOI_Flatlist_node - *flat_buf, char **send_buf, ADIO_Offset - *offset_list, ADIO_Offset *len_list, int *send_size, - MPI_Request *requests, int *sent_to_proc, - int nprocs, int myrank, + *flat_buf, char **send_buf, ADIO_Offset + *offset_list, ADIO_Offset *len_list, int *send_size, + MPI_Request *requests, int *sent_to_proc, + int nprocs, int myrank, int contig_access_count, ADIO_Offset min_st_offset, ADIO_Offset fd_size, - ADIO_Offset *fd_start, ADIO_Offset *fd_end, - int *send_buf_idx, int *curr_to_proc, - int *done_to_proc, int iter, + ADIO_Offset *fd_start, ADIO_Offset *fd_end, + int *send_buf_idx, int *curr_to_proc, + int *done_to_proc, int iter, MPI_Aint buftype_extent); -static void ADIOI_Heap_merge(ADIOI_Access *others_req, int *count, +static void ADIOI_Heap_merge(ADIOI_Access *others_req, int *count, ADIO_Offset *srt_off, int *srt_len, int *start_pos, int nprocs, int nprocs_recv, int total_elements); @@ -112,15 +112,15 @@ void ADIOI_GPFS_WriteStridedColl(ADIO_File fd, const void *buf, int count, *error_code) { /* Uses a generalized version of the extended two-phase method described - in "An Extended Two-Phase Method for Accessing Sections of + in "An Extended Two-Phase Method for Accessing Sections of Out-of-Core Arrays", Rajeev Thakur and Alok Choudhary, - Scientific Programming, (5)4:301--317, Winter 1996. + Scientific Programming, (5)4:301--317, Winter 1996. http://www.mcs.anl.gov/home/thakur/ext2ph.ps */ - ADIOI_Access *my_req; + ADIOI_Access *my_req; /* array of nprocs access structures, one for each other process in whose file domain this process's request lies */ - + ADIOI_Access *others_req; /* array of nprocs access structures, one for each other process whose request lies in this process's file domain. */ @@ -164,14 +164,14 @@ void ADIOI_GPFS_WriteStridedColl(ADIO_File fd, const void *buf, int count, ADIOI_Calc_my_off_len(fd, count, datatype, file_ptr_type, offset, &offset_list, &len_list, &start_offset, - &end_offset, &contig_access_count); + &end_offset, &contig_access_count); GPFSMPIO_T_CIO_SET_GET( w, 1, 1, GPFSMPIO_CIO_T_GATHER, GPFSMPIO_CIO_T_LCOMP ) - /* each process communicates its start and end offsets to other + /* each process communicates its start and end offsets to other processes. The result is an array each of start and end offsets stored - in order of process rank. */ - + in order of process rank. */ + st_offsets = (ADIO_Offset *) ADIOI_Malloc(nprocs*sizeof(ADIO_Offset)); end_offsets = (ADIO_Offset *) ADIOI_Malloc(nprocs*sizeof(ADIO_Offset)); @@ -253,13 +253,13 @@ void ADIOI_GPFS_WriteStridedColl(ADIO_File fd, const void *buf, int count, if (gpfsmpio_tuneblocking) ADIOI_GPFS_Calc_file_domains(fd, st_offsets, end_offsets, nprocs, nprocs_for_coll, &min_st_offset, - &fd_start, &fd_end, &fd_size, fd->fs_ptr); + &fd_start, &fd_end, &fd_size, fd->fs_ptr); else ADIOI_Calc_file_domains(st_offsets, end_offsets, nprocs, nprocs_for_coll, &min_st_offset, &fd_start, &fd_end, fd->hints->min_fdomain_size, &fd_size, - fd->hints->striping_unit); + fd->hints->striping_unit); GPFSMPIO_T_CIO_SET_GET( w, 1, 1, GPFSMPIO_CIO_T_MYREQ, GPFSMPIO_CIO_T_FD_PART ); @@ -298,13 +298,13 @@ void ADIOI_GPFS_WriteStridedColl(ADIO_File fd, const void *buf, int count, if (gpfsmpio_tuneblocking) ADIOI_GPFS_Calc_my_req(fd, offset_list, len_list, contig_access_count, min_st_offset, fd_start, fd_end, fd_size, - nprocs, &count_my_req_procs, + nprocs, &count_my_req_procs, &count_my_req_per_proc, &my_req, - &buf_idx); + &buf_idx); else ADIOI_Calc_my_req(fd, offset_list, len_list, contig_access_count, min_st_offset, fd_start, fd_end, fd_size, - nprocs, &count_my_req_procs, + nprocs, &count_my_req_procs, &count_my_req_per_proc, &my_req, &buf_idx); @@ -313,7 +313,7 @@ void ADIOI_GPFS_WriteStridedColl(ADIO_File fd, const void *buf, int count, /* based on everyone's my_req, calculate what requests of other processes lie in this process's file domain. count_others_req_procs = number of processes whose requests lie in - this process's file domain (including this process itself) + this process's file domain (including this process itself) count_others_req_per_proc[i] indicates how many separate contiguous requests of proc. i lie in this process's file domain. */ @@ -323,8 +323,8 @@ void ADIOI_GPFS_WriteStridedColl(ADIO_File fd, const void *buf, int count, nprocs, myrank, &count_others_req_procs, &others_req); else - ADIOI_Calc_others_req(fd, count_my_req_procs, - count_my_req_per_proc, my_req, + ADIOI_Calc_others_req(fd, count_my_req_procs, + count_my_req_per_proc, my_req, nprocs, myrank, &count_others_req_procs, &others_req); @@ -379,7 +379,7 @@ void ADIOI_GPFS_WriteStridedColl(ADIO_File fd, const void *buf, int count, bufsize = size * count; MPIR_Status_set_bytes(status, datatype, bufsize); } -/* This is a temporary way of filling in status. The right way is to +/* This is a temporary way of filling in status. The right way is to keep track of how much data was actually written during collective I/O. */ #endif @@ -489,7 +489,7 @@ static int gpfs_find_access_for_ion(ADIO_File fd, * code is created and returned in error_code. */ static void ADIOI_Exch_and_write(ADIO_File fd, const void *buf, MPI_Datatype - datatype, int nprocs, + datatype, int nprocs, int myrank, ADIOI_Access *others_req, ADIO_Offset *offset_list, @@ -499,7 +499,7 @@ static void ADIOI_Exch_and_write(ADIO_File fd, const void *buf, MPI_Datatype int *buf_idx, int *error_code) { /* Send data to appropriate processes and write in sizes of no more - than coll_bufsize. + than coll_bufsize. The idea is to reduce the amount of extra memory required for collective I/O. If all data were written all at once, which is much easier, it would require temp space more than the size of user_buf, @@ -533,7 +533,7 @@ static void ADIOI_Exch_and_write(ADIO_File fd, const void *buf, MPI_Datatype That gives the no. of communication phases as well. */ value = (char *) ADIOI_Malloc((MPI_MAX_INFO_VAL+1)*sizeof(char)); - ADIOI_Info_get(fd->info, "cb_buffer_size", MPI_MAX_INFO_VAL, value, + ADIOI_Info_get(fd->info, "cb_buffer_size", MPI_MAX_INFO_VAL, value, &info_flag); coll_bufsize = atoi(value); ADIOI_Free(value); @@ -589,14 +589,14 @@ static void ADIOI_Exch_and_write(ADIO_File fd, const void *buf, MPI_Datatype #endif MPI_Allreduce(&ntimes, &max_ntimes, 1, MPI_INT, MPI_MAX, - fd->comm); + fd->comm); write_buf = fd->io_buf; if (gpfsmpio_pthreadio == 1) { write_buf2 = fd->io_buf + coll_bufsize; } - curr_offlen_ptr = (int *) ADIOI_Calloc(nprocs, sizeof(int)); + curr_offlen_ptr = (int *) ADIOI_Calloc(nprocs, sizeof(int)); /* its use is explained below. calloc initializes to 0. */ count = (int *) ADIOI_Malloc(nprocs*sizeof(int)); @@ -625,7 +625,7 @@ static void ADIOI_Exch_and_write(ADIO_File fd, const void *buf, MPI_Datatype /* Above three are used in ADIOI_Fill_send_buffer*/ start_pos = (int *) ADIOI_Malloc(nprocs*sizeof(int)); - /* used to store the starting value of curr_offlen_ptr[i] in + /* used to store the starting value of curr_offlen_ptr[i] in this iteration */ ADIOI_Datatype_iscontig(datatype, &buftype_is_contig); @@ -662,15 +662,15 @@ static void ADIOI_Exch_and_write(ADIO_File fd, const void *buf, MPI_Datatype /* go through all others_req and check which will be satisfied by the current write */ - /* Note that MPI guarantees that displacements in filetypes are in + /* Note that MPI guarantees that displacements in filetypes are in monotonically nondecreasing order and that, for writes, the filetypes cannot specify overlapping regions in the file. This simplifies implementation a bit compared to reads. */ - /* off = start offset in the file for the data to be written in - this iteration + /* off = start offset in the file for the data to be written in + this iteration size = size of data written (bytes) corresponding to off - req_off = off in file for a particular contiguous request + req_off = off in file for a particular contiguous request minus what was satisfied in previous iteration req_size = size corresponding to req_off */ @@ -681,7 +681,7 @@ static void ADIOI_Exch_and_write(ADIO_File fd, const void *buf, MPI_Datatype #endif for (i=0; i < nprocs; i++) count[i] = recv_size[i] = 0; - size = ADIOI_MIN((unsigned)coll_bufsize, end_loc-st_loc+1-done); + size = ADIOI_MIN((unsigned)coll_bufsize, end_loc-st_loc+1-done); for (i=0; i < nprocs; i++) { if (others_req[i].count) { @@ -691,7 +691,7 @@ static void ADIOI_Exch_and_write(ADIO_File fd, const void *buf, MPI_Datatype /* this request may have been partially satisfied in the previous iteration. */ req_off = others_req[i].offsets[j] + - partial_recv[i]; + partial_recv[i]; req_len = others_req[i].lens[j] - partial_recv[i]; partial_recv[i] = 0; @@ -706,10 +706,10 @@ static void ADIOI_Exch_and_write(ADIO_File fd, const void *buf, MPI_Datatype if (req_off < off + size) { count[i]++; ADIOI_Assert((((ADIO_Offset)(MPIR_Upint)write_buf)+req_off-off) == (ADIO_Offset)(MPIR_Upint)(write_buf+req_off-off)); - MPI_Address(write_buf+req_off-off, + MPI_Address(write_buf+req_off-off, &(others_req[i].mem_ptrs[j])); ADIOI_Assert((off + size - req_off) == (int)(off + size - req_off)); - recv_size[i] += (int)(ADIOI_MIN(off + size - req_off, + recv_size[i] += (int)(ADIOI_MIN(off + size - req_off, (unsigned)req_len)); if (off+size-req_off < (unsigned)req_len) @@ -717,9 +717,9 @@ static void ADIOI_Exch_and_write(ADIO_File fd, const void *buf, MPI_Datatype partial_recv[i] = (int) (off + size - req_off); /* --BEGIN ERROR HANDLING-- */ - if ((j+1 < others_req[i].count) && + if ((j+1 < others_req[i].count) && (others_req[i].offsets[j+1] < off+size)) - { + { *error_code = MPIO_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE, myname, @@ -739,7 +739,7 @@ static void ADIOI_Exch_and_write(ADIO_File fd, const void *buf, MPI_Datatype curr_offlen_ptr[i] = j; } } - + #ifdef PROFILE MPE_Log_event(14, 0, "end computation"); MPE_Log_event(7, 0, "start communication"); @@ -753,7 +753,7 @@ static void ADIOI_Exch_and_write(ADIO_File fd, const void *buf, MPI_Datatype min_st_offset, fd_size, fd_start, fd_end, others_req, send_buf_idx, curr_to_proc, done_to_proc, &hole, m, buftype_extent, buf_idx, - error_code); + error_code); else if (gpfsmpio_comm == 0) ADIOI_W_Exchange_data_alltoallv(fd, buf, write_buf, flat_buf, offset_list, @@ -764,7 +764,7 @@ static void ADIOI_Exch_and_write(ADIO_File fd, const void *buf, MPI_Datatype min_st_offset, fd_size, fd_start, fd_end, others_req, send_buf_idx, curr_to_proc, done_to_proc, &hole, m, buftype_extent, buf_idx, - error_code); + error_code); if (*error_code != MPI_SUCCESS) return; #ifdef PROFILE MPE_Log_event(8, 0, "end communication"); @@ -826,7 +826,7 @@ static void ADIOI_Exch_and_write(ADIO_File fd, const void *buf, MPI_Datatype #ifdef PROFILE MPE_Log_event(7, 0, "start communication"); #endif - for (m=ntimes; matomicity) { /* bug fix from Wei-keng Liao and Kenin Coloma */ requests = (MPI_Request *) - ADIOI_Malloc((nprocs_send+1)*sizeof(MPI_Request)); + ADIOI_Malloc((nprocs_send+1)*sizeof(MPI_Request)); send_req = requests; } else { - requests = (MPI_Request *) - ADIOI_Malloc((nprocs_send+nprocs_recv+1)*sizeof(MPI_Request)); + requests = (MPI_Request *) + ADIOI_Malloc((nprocs_send+nprocs_recv+1)*sizeof(MPI_Request)); /* +1 to avoid a 0-size malloc */ /* post receives */ @@ -1041,10 +1041,10 @@ static void ADIOI_W_Exchange_data(ADIO_File fd, const void *buf, char *write_buf #endif if (buftype_is_contig) { j = 0; - for (i=0; i < nprocs; i++) + for (i=0; i < nprocs; i++) if (send_size[i]) { - MPI_Isend(((char *) buf) + buf_idx[i], send_size[i], - MPI_BYTE, i, myrank+i+100*iter, fd->comm, + MPI_Isend(((char *) buf) + buf_idx[i], send_size[i], + MPI_BYTE, i, myrank+i+100*iter, fd->comm, send_req+j); j++; buf_idx[i] += send_size[i]; @@ -1053,16 +1053,16 @@ static void ADIOI_W_Exchange_data(ADIO_File fd, const void *buf, char *write_buf else if (nprocs_send) { /* buftype is not contig */ send_buf = (char **) ADIOI_Malloc(nprocs*sizeof(char*)); - for (i=0; i < nprocs; i++) - if (send_size[i]) + for (i=0; i < nprocs; i++) + if (send_size[i]) send_buf[i] = (char *) ADIOI_Malloc(send_size[i]); ADIOI_Fill_send_buffer(fd, buf, flat_buf, send_buf, - offset_list, len_list, send_size, + offset_list, len_list, send_size, send_req, - sent_to_proc, nprocs, myrank, + sent_to_proc, nprocs, myrank, contig_access_count, - min_st_offset, fd_size, fd_start, fd_end, + min_st_offset, fd_size, fd_start, fd_end, send_buf_idx, curr_to_proc, done_to_proc, iter, buftype_extent); /* the send is done in ADIOI_Fill_send_buffer */ @@ -1083,16 +1083,16 @@ static void ADIOI_W_Exchange_data(ADIO_File fd, const void *buf, char *write_buf for (i=0; iatomicity) { /* bug fix from Wei-keng Liao and Kenin Coloma */ statuses = (MPI_Status *) ADIOI_Malloc((nprocs_send+1) * \ - sizeof(MPI_Status)); + sizeof(MPI_Status)); /* +1 to avoid a 0-size malloc */ } else { statuses = (MPI_Status *) ADIOI_Malloc((nprocs_send+nprocs_recv+1) * \ - sizeof(MPI_Status)); + sizeof(MPI_Status)); /* +1 to avoid a 0-size malloc */ } @@ -1119,7 +1119,7 @@ static void ADIOI_W_Exchange_data(ADIO_File fd, const void *buf, char *write_buf ADIOI_Free(statuses); ADIOI_Free(requests); if (!buftype_is_contig && nprocs_send) { - for (i=0; i < nprocs; i++) + for (i=0; i < nprocs; i++) if (send_size[i]) ADIOI_Free(send_buf[i]); ADIOI_Free(send_buf); } @@ -1175,14 +1175,14 @@ static void ADIOI_W_Exchange_data(ADIO_File fd, const void *buf, char *write_buf } static void ADIOI_Fill_send_buffer(ADIO_File fd, const void *buf, ADIOI_Flatlist_node - *flat_buf, char **send_buf, ADIO_Offset - *offset_list, ADIO_Offset *len_list, int *send_size, - MPI_Request *requests, int *sent_to_proc, - int nprocs, int myrank, - int contig_access_count, + *flat_buf, char **send_buf, ADIO_Offset + *offset_list, ADIO_Offset *len_list, int *send_size, + MPI_Request *requests, int *sent_to_proc, + int nprocs, int myrank, + int contig_access_count, ADIO_Offset min_st_offset, ADIO_Offset fd_size, - ADIO_Offset *fd_start, ADIO_Offset *fd_end, - int *send_buf_idx, int *curr_to_proc, + ADIO_Offset *fd_start, ADIO_Offset *fd_end, + int *send_buf_idx, int *curr_to_proc, int *done_to_proc, int iter, MPI_Aint buftype_extent) { @@ -1195,9 +1195,9 @@ static void ADIOI_Fill_send_buffer(ADIO_File fd, const void *buf, ADIOI_Flatlist /* curr_to_proc[p] = amount of data sent to proc. p that has already been accounted for so far - done_to_proc[p] = amount of data already sent to proc. p in + done_to_proc[p] = amount of data already sent to proc. p in previous iterations - user_buf_idx = current location in user buffer + user_buf_idx = current location in user buffer send_buf_idx[p] = current location in send_buf of proc. p */ for (i=0; i < nprocs; i++) { @@ -1212,10 +1212,10 @@ static void ADIOI_Fill_send_buffer(ADIO_File fd, const void *buf, ADIOI_Flatlist flat_buf_sz = flat_buf->blocklens[0]; /* flat_buf_idx = current index into flattened buftype - flat_buf_sz = size of current contiguous component in + flat_buf_sz = size of current contiguous component in flattened buf */ - for (i=0; i done_to_proc[p]) { if (done_to_proc[p] > curr_to_proc[p]) { - size = ADIOI_MIN(curr_to_proc[p] + len - + size = ADIOI_MIN(curr_to_proc[p] + len - done_to_proc[p], send_size[p]-send_buf_idx[p]); buf_incr = done_to_proc[p] - curr_to_proc[p]; ADIOI_BUF_INCR @@ -1255,7 +1255,7 @@ static void ADIOI_Fill_send_buffer(ADIO_File fd, const void *buf, ADIOI_Flatlist ADIOI_BUF_COPY } if (send_buf_idx[p] == send_size[p]) { - MPI_Isend(send_buf[p], send_size[p], MPI_BYTE, p, + MPI_Isend(send_buf[p], send_size[p], MPI_BYTE, p, myrank+p+100*iter, fd->comm, requests+jj); jj++; } @@ -1275,13 +1275,13 @@ static void ADIOI_Fill_send_buffer(ADIO_File fd, const void *buf, ADIOI_Flatlist rem_len -= len; } } - for (i=0; i < nprocs; i++) + for (i=0; i < nprocs; i++) if (send_size[i]) sent_to_proc[i] = curr_to_proc[i]; } -static void ADIOI_Heap_merge(ADIOI_Access *others_req, int *count, +static void ADIOI_Heap_merge(ADIOI_Access *others_req, int *count, ADIO_Offset *srt_off, int *srt_len, int *start_pos, int nprocs, int nprocs_recv, int total_elements) { @@ -1311,7 +1311,7 @@ static void ADIOI_Heap_merge(ADIOI_Access *others_req, int *count, heapsize = nprocs_recv; for (i=heapsize/2 - 1; i>=0; i--) { /* Heapify(a, i, heapsize); Algorithm from Cormen et al. pg. 143 - modified for a heap with smallest element at root. I have + modified for a heap with smallest element at root. I have removed the recursion so that there are no function calls. Function calls are too expensive. */ k = i; @@ -1319,12 +1319,12 @@ static void ADIOI_Heap_merge(ADIOI_Access *others_req, int *count, l = 2*(k+1) - 1; r = 2*(k+1); - if ((l < heapsize) && + if ((l < heapsize) && (*(a[l].off_list) < *(a[k].off_list))) smallest = l; else smallest = k; - if ((r < heapsize) && + if ((r < heapsize) && (*(a[r].off_list) < *(a[smallest].off_list))) smallest = r; @@ -1336,11 +1336,11 @@ static void ADIOI_Heap_merge(ADIOI_Access *others_req, int *count, a[k].off_list = a[smallest].off_list; a[k].len_list = a[smallest].len_list; a[k].nelem = a[smallest].nelem; - + a[smallest].off_list = tmp.off_list; a[smallest].len_list = tmp.len_list; a[smallest].nelem = tmp.nelem; - + k = smallest; } else break; @@ -1370,12 +1370,12 @@ static void ADIOI_Heap_merge(ADIOI_Access *others_req, int *count, l = 2*(k+1) - 1; r = 2*(k+1); - if ((l < heapsize) && + if ((l < heapsize) && (*(a[l].off_list) < *(a[k].off_list))) smallest = l; else smallest = k; - if ((r < heapsize) && + if ((r < heapsize) && (*(a[r].off_list) < *(a[smallest].off_list))) smallest = r; @@ -1387,11 +1387,11 @@ static void ADIOI_Heap_merge(ADIOI_Access *others_req, int *count, a[k].off_list = a[smallest].off_list; a[k].len_list = a[smallest].len_list; a[k].nelem = a[smallest].nelem; - + a[smallest].off_list = tmp.off_list; a[smallest].len_list = tmp.len_list; a[smallest].nelem = tmp.nelem; - + k = smallest; } else break; @@ -1403,25 +1403,25 @@ static void ADIOI_Heap_merge(ADIOI_Access *others_req, int *count, static void ADIOI_W_Exchange_data_alltoallv( - ADIO_File fd, const void *buf, + ADIO_File fd, const void *buf, char *write_buf, /* 1 */ - ADIOI_Flatlist_node *flat_buf, - ADIO_Offset *offset_list, - ADIO_Offset *len_list, int *send_size, int *recv_size, + ADIOI_Flatlist_node *flat_buf, + ADIO_Offset *offset_list, + ADIO_Offset *len_list, int *send_size, int *recv_size, ADIO_Offset off, int size, /* 2 */ int *count, int *start_pos, int *partial_recv, - int *sent_to_proc, int nprocs, int myrank, + int *sent_to_proc, int nprocs, int myrank, int buftype_is_contig, int contig_access_count, ADIO_Offset min_st_offset, ADIO_Offset fd_size, - ADIO_Offset *fd_start, + ADIO_Offset *fd_start, ADIO_Offset *fd_end, ADIOI_Access *others_req, int *send_buf_idx, int *curr_to_proc, /* 3 */ int *done_to_proc, int *hole, /* 4 */ int iter, MPI_Aint buftype_extent, int *buf_idx, int *error_code) -{ +{ int i, j, k=0, nprocs_recv, nprocs_send, *tmp_len, err; char **send_buf = NULL; MPI_Request *send_req=NULL; @@ -1443,12 +1443,12 @@ static void ADIOI_W_Exchange_data_alltoallv( gpfsmpio_prof_cw[GPFSMPIO_CIO_T_DEXCH_RECV_EXCH] += MPI_Wtime() - io_time; io_time = MPI_Wtime(); - + nprocs_recv = 0; for (i=0; icomm ); + fd->comm ); ADIOI_Free( all_send_buf ); ADIOI_Free(sdispls); @@ -1503,7 +1503,7 @@ static void ADIOI_W_Exchange_data_alltoallv( gpfsmpio_prof_cw[GPFSMPIO_CIO_T_DEXCH_NET] += MPI_Wtime() - io_time; io_time = MPI_Wtime(); /* data sieving pre-read */ - /* To avoid a read-modify-write, check if there are holes in the + /* To avoid a read-modify-write, check if there are holes in the data to be written. For this, merge the (sorted) offset lists others_req using a heap-merge. */ @@ -1518,7 +1518,7 @@ static void ADIOI_W_Exchange_data_alltoallv( /* check if there are any holes */ *hole = 0; /* See if there are holes before the first request or after the last request*/ - if((srt_off[0] > off) || + if((srt_off[0] > off) || ((srt_off[sum-1] + srt_len[sum-1]) < (off + size))) { *hole = 1; @@ -1577,25 +1577,25 @@ static void ADIOI_W_Exchange_data_alltoallv( k = start_pos[i] + count[i] - 1; others_req[i].lens[k] = tmp_len[i]; } - + } } - + ADIOI_Free( tmp_len ); ADIOI_Free( all_recv_buf ); ADIOI_Free(rdispls); - return; -} + return; +} static void ADIOI_Fill_send_buffer_nosend(ADIO_File fd, const void *buf, ADIOI_Flatlist_node - *flat_buf, char **send_buf, ADIO_Offset - *offset_list, ADIO_Offset *len_list, int *send_size, - MPI_Request *requests, int *sent_to_proc, - int nprocs, int myrank, - int contig_access_count, + *flat_buf, char **send_buf, ADIO_Offset + *offset_list, ADIO_Offset *len_list, int *send_size, + MPI_Request *requests, int *sent_to_proc, + int nprocs, int myrank, + int contig_access_count, ADIO_Offset min_st_offset, ADIO_Offset fd_size, - ADIO_Offset *fd_start, ADIO_Offset *fd_end, - int *send_buf_idx, int *curr_to_proc, + ADIO_Offset *fd_start, ADIO_Offset *fd_end, + int *send_buf_idx, int *curr_to_proc, int *done_to_proc, int iter, MPI_Aint buftype_extent) { @@ -1608,9 +1608,9 @@ static void ADIOI_Fill_send_buffer_nosend(ADIO_File fd, const void *buf, ADIOI_F /* curr_to_proc[p] = amount of data sent to proc. p that has already been accounted for so far - done_to_proc[p] = amount of data already sent to proc. p in + done_to_proc[p] = amount of data already sent to proc. p in previous iterations - user_buf_idx = current location in user buffer + user_buf_idx = current location in user buffer send_buf_idx[p] = current location in send_buf of proc. p */ for (i=0; i < nprocs; i++) { @@ -1625,10 +1625,10 @@ static void ADIOI_Fill_send_buffer_nosend(ADIO_File fd, const void *buf, ADIOI_F flat_buf_sz = flat_buf->blocklens[0]; /* flat_buf_idx = current index into flattened buftype - flat_buf_sz = size of current contiguous component in + flat_buf_sz = size of current contiguous component in flattened buf */ - for (i=0; i done_to_proc[p]) { if (done_to_proc[p] > curr_to_proc[p]) { - size = ADIOI_MIN(curr_to_proc[p] + len - + size = ADIOI_MIN(curr_to_proc[p] + len - done_to_proc[p], send_size[p]-send_buf_idx[p]); buf_incr = done_to_proc[p] - curr_to_proc[p]; ADIOI_BUF_INCR @@ -1670,7 +1670,7 @@ static void ADIOI_Fill_send_buffer_nosend(ADIO_File fd, const void *buf, ADIOI_F /* moved to alltoallv */ /* if (send_buf_idx[p] == send_size[p]) { - MPI_Isend(send_buf[p], send_size[p], MPI_BYTE, p, + MPI_Isend(send_buf[p], send_size[p], MPI_BYTE, p, myrank+p+100*iter, fd->comm, requests+jj); jj++; } @@ -1691,6 +1691,6 @@ static void ADIOI_Fill_send_buffer_nosend(ADIO_File fd, const void *buf, ADIOI_F rem_len -= len; } } - for (i=0; i < nprocs; i++) + for (i=0; i < nprocs; i++) if (send_size[i]) sent_to_proc[i] = curr_to_proc[i]; } diff --git a/ompi/mca/io/romio314/romio/adio/ad_gpfs/bg/Makefile.mk b/ompi/mca/io/romio314/romio/adio/ad_gpfs/bg/Makefile.mk index 1d957ef8f6f..baec508e549 100644 --- a/ompi/mca/io/romio314/romio/adio/ad_gpfs/bg/Makefile.mk +++ b/ompi/mca/io/romio314/romio/adio/ad_gpfs/bg/Makefile.mk @@ -13,6 +13,6 @@ noinst_HEADERS += \ romio_other_sources += \ adio/ad_gpfs/bg/ad_bg_aggrs.c \ - adio/ad_gpfs/bg/ad_bg_pset.c + adio/ad_gpfs/bg/ad_bg_pset.c endif BUILD_AD_BG diff --git a/ompi/mca/io/romio314/romio/adio/ad_gpfs/bg/ad_bg_aggrs.c b/ompi/mca/io/romio314/romio/adio/ad_gpfs/bg/ad_bg_aggrs.c index 240c0138df2..90c97a20a9a 100644 --- a/ompi/mca/io/romio314/romio/adio/ad_gpfs/bg/ad_bg_aggrs.c +++ b/ompi/mca/io/romio314/romio/adio/ad_gpfs/bg/ad_bg_aggrs.c @@ -7,8 +7,8 @@ */ /* -*- Mode: C; c-basic-offset:4 ; -*- */ -/* - * Copyright (C) 1997-2001 University of Chicago. +/* + * Copyright (C) 1997-2001 University of Chicago. * See COPYRIGHT notice in top-level directory. */ @@ -46,14 +46,14 @@ * * The last three of these were originally in ad_read_coll.c, but they are * also shared with ad_write_coll.c. I felt that they were better kept with - * the rest of the shared aggregation code. + * the rest of the shared aggregation code. */ /* Discussion of values available from above: * * ADIO_Offset st_offsets[0..nprocs-1] * ADIO_Offset end_offsets[0..nprocs-1] - * These contain a list of start and end offsets for each process in + * These contain a list of start and end offsets for each process in * the communicator. For example, an access at loc 10, size 10 would * have a start offset of 10 and end offset of 19. * int nprocs @@ -63,26 +63,26 @@ * starting location of "file domain"; region that a given process will * perform aggregation for (i.e. actually do I/O) * ADIO_Offset fd_end[0..nprocs_for_coll-1] - * start + size - 1 roughly, but it can be less, or 0, in the case of + * start + size - 1 roughly, but it can be less, or 0, in the case of * uneven distributions */ /* forward declaration */ -static void -ADIOI_BG_compute_agg_ranklist_serial ( ADIO_File fd, - const ADIOI_BG_ConfInfo_t *confInfo, +static void +ADIOI_BG_compute_agg_ranklist_serial ( ADIO_File fd, + const ADIOI_BG_ConfInfo_t *confInfo, ADIOI_BG_ProcInfo_t *all_procInfo); /* * Compute the aggregator-related parameters that are required in 2-phase collective IO of ADIO. - * The parameters are + * The parameters are * . the number of aggregators (proxies) : fd->hints->cb_nodes * . the ranks of the aggregators : fd->hints->ranklist - * By compute these two parameters in a BG-PSET-aware way, the default 2-phase collective IO of + * By compute these two parameters in a BG-PSET-aware way, the default 2-phase collective IO of * ADIO can work more efficiently. */ -int -ADIOI_BG_gen_agg_ranklist(ADIO_File fd, int n_aggrs_per_pset) +int +ADIOI_BG_gen_agg_ranklist(ADIO_File fd, int n_aggrs_per_pset) { int r, s; ADIOI_BG_ProcInfo_t *procInfo, *all_procInfo; @@ -101,13 +101,13 @@ ADIOI_BG_gen_agg_ranklist(ADIO_File fd, int n_aggrs_per_pset) /* if (r == 0) */ all_procInfo = ADIOI_BG_ProcInfo_new_n (s); - MPI_Gather( (void *)procInfo, sizeof(ADIOI_BG_ProcInfo_t), MPI_BYTE, - (void *)all_procInfo, sizeof(ADIOI_BG_ProcInfo_t), MPI_BYTE, - 0, + MPI_Gather( (void *)procInfo, sizeof(ADIOI_BG_ProcInfo_t), MPI_BYTE, + (void *)all_procInfo, sizeof(ADIOI_BG_ProcInfo_t), MPI_BYTE, + 0, fd->comm ); /* Compute a list of the ranks of chosen IO proxy CN on process 0 */ - if (r == 0) { + if (r == 0) { ADIOI_BG_compute_agg_ranklist_serial (fd, confInfo, all_procInfo); /* ADIOI_BG_ProcInfo_free (all_procInfo);*/ } @@ -156,7 +156,7 @@ ADIOI_BG_gen_agg_ranklist(ADIO_File fd, int n_aggrs_per_pset) /* Maybe find which bridge node is closer (manhattan distance) and try to * distribute evenly. */ -/* +/* * Pick IO aggregators based on the under PSET organization and stores the ranks of the proxy CNs in tmp_ranklist. * The first order of tmp_ranklist is : PSET number * The secondary order of the list is determined in ADIOI_BG_select_agg_in_pset() and thus adjustable. @@ -181,9 +181,9 @@ static int intsort(const void *p1, const void *p2) return(i1->bridge - i2->bridge); } -static int -ADIOI_BG_compute_agg_ranklist_serial_do (const ADIOI_BG_ConfInfo_t *confInfo, - ADIOI_BG_ProcInfo_t *all_procInfo, +static int +ADIOI_BG_compute_agg_ranklist_serial_do (const ADIOI_BG_ConfInfo_t *confInfo, + ADIOI_BG_ProcInfo_t *all_procInfo, int *tmp_ranklist) { TRACE_ERR("Entering ADIOI_BG_compute_agg_ranklist_serial_do\n"); @@ -313,7 +313,7 @@ ADIOI_BG_compute_agg_ranklist_serial_do (const ADIOI_BG_ConfInfo_t *confInfo, bridgelist[i].rank = i; TRACE_ERR("bridgelist[%d].bridge: %d .rank: %d\n", i, bridgelist[i].bridge, i); } - + /* This list contains rank->bridge info. Now, we need to sort this list. */ qsort(bridgelist, confInfo->nProcs, sizeof(sortstruct), intsort); @@ -324,7 +324,7 @@ ADIOI_BG_compute_agg_ranklist_serial_do (const ADIOI_BG_ConfInfo_t *confInfo, if(numAggs == 1) aggTotal = 1; else - /* the number of aggregators is (numAggs per bridgenode) plus each + /* the number of aggregators is (numAggs per bridgenode) plus each * bridge node is an aggregator */ aggTotal = confInfo->numBridgeRanks * (numAggs+1); @@ -350,7 +350,7 @@ ADIOI_BG_compute_agg_ranklist_serial_do (const ADIOI_BG_ConfInfo_t *confInfo, if(lastBridge == bridgelist[procIndex].bridge) { psetSize++; - if(procIndex) continue; + if(procIndex) continue; else procIndex--;/* procIndex == 0 */ } /* Sets up a list of nodes which will act as aggregators. numAggs @@ -377,7 +377,7 @@ ADIOI_BG_compute_agg_ranklist_serial_do (const ADIOI_BG_ConfInfo_t *confInfo, aggList[nextAggr] = bridgelist[procIndex+j*distance+1].rank; TRACE_ERR("agglist[%d] -> bridgelist[%d] = %d\n", nextAggr, procIndex+j*distance+1,aggList[nextAggr]); if(aggList[nextAggr]==lastBridge) /* can't have bridge in the list twice */ - { + { aggList[nextAggr] = bridgelist[procIndex+psetSize].rank; /* take the last one in the pset */ TRACE_ERR("replacement agglist[%d] -> bridgelist[%d] = %d\n", nextAggr, procIndex+psetSize,aggList[nextAggr]); } @@ -409,17 +409,17 @@ ADIOI_BG_compute_agg_ranklist_serial_do (const ADIOI_BG_ConfInfo_t *confInfo, } -/* +/* * compute aggregators ranklist and put it into fd->hints struct - */ -static void -ADIOI_BG_compute_agg_ranklist_serial ( ADIO_File fd, - const ADIOI_BG_ConfInfo_t *confInfo, + */ +static void +ADIOI_BG_compute_agg_ranklist_serial ( ADIO_File fd, + const ADIOI_BG_ConfInfo_t *confInfo, ADIOI_BG_ProcInfo_t *all_procInfo) { TRACE_ERR("Entering ADIOI_BG_compute_agg_ranklist_serial\n"); - int i; - int naggs; + int i; + int naggs; int size; int *tmp_ranklist; @@ -432,12 +432,12 @@ ADIOI_BG_compute_agg_ranklist_serial ( ADIO_File fd, } # endif - naggs= + naggs= ADIOI_BG_compute_agg_ranklist_serial_do (confInfo, all_procInfo, tmp_ranklist); # define VERIFY 1 # if VERIFY - DBG_FPRINTF(stderr, "\tconfInfo = min: %3d, max: %3d, naggrs: %3d, bridge: %3d, nprocs: %3d, vpset: %3d, tsize: %3d, ratio: %.4f; naggs = %d\n", + DBG_FPRINTF(stderr, "\tconfInfo = min: %3d, max: %3d, naggrs: %3d, bridge: %3d, nprocs: %3d, vpset: %3d, tsize: %3d, ratio: %.4f; naggs = %d\n", confInfo->ioMinSize , confInfo->ioMaxSize , confInfo->nAggrs , @@ -462,7 +462,7 @@ ADIOI_BG_compute_agg_ranklist_serial ( ADIO_File fd, tmp_ranklist[i] = 0; } } - + # if AGG_DEBUG for (i=0; i= 0)); /* A dim is < 6 bits or sorting won't work */ - if((hw.Coords[0] == pers.Network_Config.cnBridge_A) && - (hw.Coords[1] == pers.Network_Config.cnBridge_B) && - (hw.Coords[2] == pers.Network_Config.cnBridge_C) && - (hw.Coords[3] == pers.Network_Config.cnBridge_D) && + if((hw.Coords[0] == pers.Network_Config.cnBridge_A) && + (hw.Coords[1] == pers.Network_Config.cnBridge_B) && + (hw.Coords[2] == pers.Network_Config.cnBridge_C) && + (hw.Coords[3] == pers.Network_Config.cnBridge_D) && (hw.Coords[4] == pers.Network_Config.cnBridge_E)) { iambridge = 1; /* I am bridge */ if (gpfsmpio_bridgeringagg > 0) { @@ -238,16 +238,16 @@ ADIOI_BG_persInfo_init(ADIOI_BG_ConfInfo_t *conf, bridges = (sortstruct *) ADIOI_Malloc(sizeof(sortstruct) * size); /* We're going to sort this structure by bridgeCoord: - + typedef struct { int rank; int bridgeCoord; - } sortstruct; - - and I want the rank that IS the bridge to sort first, so - OR in '1' on non-bridge ranks that use a bridge coord. - */ + } sortstruct; + + and I want the rank that IS the bridge to sort first, so + OR in '1' on non-bridge ranks that use a bridge coord. + */ /* My input to the collective */ bridges[rank].rank = rank; @@ -268,18 +268,18 @@ ADIOI_BG_persInfo_init(ADIOI_BG_ConfInfo_t *conf, tempRank = bridges[0].rank; countPset=1; - bridgeIndex = 0; + bridgeIndex = 0; mincompute = size+1; maxcompute = 1; for(i=1; imyIOSize = countPset; proc->ioNodeIndex = bridgeIndex; } - - - if(rank == 0) + + + if(rank == 0) { /* Only rank 0 has a conf structure, fill in stuff as appropriate */ conf->ioMinSize = mincompute; @@ -344,15 +344,15 @@ ADIOI_BG_persInfo_init(ADIOI_BG_ConfInfo_t *conf, conf->nProcs = size; conf->cpuIDsize = hw.ppn; /*conf->virtualPsetSize = maxcompute * conf->cpuIDsize;*/ - + conf->nAggrs = n_aggrs; /* First pass gets nAggrs = -1 */ if(conf->nAggrs <=0) conf->nAggrs = gpfsmpio_bg_nagg_pset; if(conf->ioMinSize <= conf->nAggrs) conf->nAggrs = ADIOI_MAX(1,conf->ioMinSize-1); /* not including bridge itself */ -/* if(conf->nAggrs > conf->numBridgeRanks) - conf->nAggrs = conf->numBridgeRanks; +/* if(conf->nAggrs > conf->numBridgeRanks) + conf->nAggrs = conf->numBridgeRanks; */ conf->aggRatio = 1. * conf->nAggrs / conf->ioMinSize /*virtualPsetSize*/; /* if(conf->aggRatio > 1) conf->aggRatio = 1.; */ @@ -369,7 +369,7 @@ ADIOI_BG_persInfo_init(ADIOI_BG_ConfInfo_t *conf, } -void +void ADIOI_BG_persInfo_free( ADIOI_BG_ConfInfo_t *conf, ADIOI_BG_ProcInfo_t *proc ) { ADIOI_BG_ConfInfo_free( conf ); diff --git a/ompi/mca/io/romio314/romio/adio/ad_gridftp/ad_gridftp.c b/ompi/mca/io/romio314/romio/adio/ad_gridftp/ad_gridftp.c index f08f112a3c0..8c1ed2d99f6 100644 --- a/ompi/mca/io/romio314/romio/adio/ad_gridftp/ad_gridftp.c +++ b/ompi/mca/io/romio314/romio/adio/ad_gridftp/ad_gridftp.c @@ -1,7 +1,7 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */ -/* +/* * - * Copyright (C) 2003 University of Chicago, Ohio Supercomputer Center. + * Copyright (C) 2003 University of Chicago, Ohio Supercomputer Center. * See COPYRIGHT notice in top-level directory. */ diff --git a/ompi/mca/io/romio314/romio/adio/ad_gridftp/ad_gridftp.h b/ompi/mca/io/romio314/romio/adio/ad_gridftp/ad_gridftp.h index 0b94c780eaa..427af0a8ebd 100644 --- a/ompi/mca/io/romio314/romio/adio/ad_gridftp/ad_gridftp.h +++ b/ompi/mca/io/romio314/romio/adio/ad_gridftp/ad_gridftp.h @@ -1,6 +1,6 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */ -/* - * Copyright (C) 2003 University of Chicago, Ohio Supercomputer Center. +/* + * Copyright (C) 2003 University of Chicago, Ohio Supercomputer Center. * See COPYRIGHT notice in top-level directory. */ @@ -26,32 +26,32 @@ extern globus_ftp_client_operationattr_t oattr[ADIO_GRIDFTP_HANDLES_MAX]; /* TODO: weed out the now-unused prototypes */ void ADIOI_GRIDFTP_Open(ADIO_File fd, int *error_code); void ADIOI_GRIDFTP_Close(ADIO_File fd, int *error_code); -void ADIOI_GRIDFTP_ReadContig(ADIO_File fd, void *buf, int count, +void ADIOI_GRIDFTP_ReadContig(ADIO_File fd, void *buf, int count, MPI_Datatype datatype, int file_ptr_type, ADIO_Offset offset, ADIO_Status *status, int *error_code); -void ADIOI_GRIDFTP_WriteContig(ADIO_File fd, void *buf, int count, +void ADIOI_GRIDFTP_WriteContig(ADIO_File fd, void *buf, int count, MPI_Datatype datatype, int file_ptr_type, ADIO_Offset offset, ADIO_Status *status, int - *error_code); -void ADIOI_GRIDFTP_IwriteContig(ADIO_File fd, void *buf, int count, + *error_code); +void ADIOI_GRIDFTP_IwriteContig(ADIO_File fd, void *buf, int count, MPI_Datatype datatype, int file_ptr_type, ADIO_Offset offset, ADIO_Request *request, int - *error_code); -void ADIOI_GRIDFTP_IreadContig(ADIO_File fd, void *buf, int count, + *error_code); +void ADIOI_GRIDFTP_IreadContig(ADIO_File fd, void *buf, int count, MPI_Datatype datatype, int file_ptr_type, ADIO_Offset offset, ADIO_Request *request, int - *error_code); + *error_code); int ADIOI_GRIDFTP_ReadDone(ADIO_Request *request, ADIO_Status *status, int *error_code); int ADIOI_GRIDFTP_WriteDone(ADIO_Request *request, ADIO_Status *status, int *error_code); void ADIOI_GRIDFTP_ReadComplete(ADIO_Request *request, ADIO_Status *status, int - *error_code); + *error_code); void ADIOI_GRIDFTP_WriteComplete(ADIO_Request *request, ADIO_Status *status, - int *error_code); -void ADIOI_GRIDFTP_Fcntl(ADIO_File fd, int flag, ADIO_Fcntl_t *fcntl_struct, - int *error_code); + int *error_code); +void ADIOI_GRIDFTP_Fcntl(ADIO_File fd, int flag, ADIO_Fcntl_t *fcntl_struct, + int *error_code); void ADIOI_GRIDFTP_WriteStrided(ADIO_File fd, void *buf, int count, MPI_Datatype datatype, int file_ptr_type, ADIO_Offset offset, ADIO_Status *status, @@ -79,10 +79,10 @@ void ADIOI_GRIDFTP_IwriteStrided(ADIO_File fd, void *buf, int count, void ADIOI_GRIDFTP_Flush(ADIO_File fd, int *error_code); void ADIOI_GRIDFTP_Resize(ADIO_File fd, ADIO_Offset size, int *error_code); void ADIOI_GRIDFTP_SetInfo(ADIO_File fd, MPI_Info users_info, int *error_code); -void ADIOI_GRIDFTP_Get_shared_fp(ADIO_File fd, int size, - ADIO_Offset *shared_fp, +void ADIOI_GRIDFTP_Get_shared_fp(ADIO_File fd, int size, + ADIO_Offset *shared_fp, int *error_code); -void ADIOI_GRIDFTP_Set_shared_fp(ADIO_File fd, ADIO_Offset offset, +void ADIOI_GRIDFTP_Set_shared_fp(ADIO_File fd, ADIO_Offset offset, int *error_code); void ADIOI_GRIDFTP_Delete(char *filename, int *error_code); diff --git a/ompi/mca/io/romio314/romio/adio/ad_gridftp/ad_gridftp_close.c b/ompi/mca/io/romio314/romio/adio/ad_gridftp/ad_gridftp_close.c index c1693d65ce0..c80dec31752 100644 --- a/ompi/mca/io/romio314/romio/adio/ad_gridftp/ad_gridftp_close.c +++ b/ompi/mca/io/romio314/romio/adio/ad_gridftp/ad_gridftp_close.c @@ -1,7 +1,7 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */ -/* +/* * - * Copyright (C) 2003 University of Chicago, Ohio Supercomputer Center. + * Copyright (C) 2003 University of Chicago, Ohio Supercomputer Center. * See COPYRIGHT notice in top-level directory. */ diff --git a/ompi/mca/io/romio314/romio/adio/ad_gridftp/ad_gridftp_delete.c b/ompi/mca/io/romio314/romio/adio/ad_gridftp/ad_gridftp_delete.c index 54eb7144295..2217e12e8f2 100644 --- a/ompi/mca/io/romio314/romio/adio/ad_gridftp/ad_gridftp_delete.c +++ b/ompi/mca/io/romio314/romio/adio/ad_gridftp/ad_gridftp_delete.c @@ -1,7 +1,7 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */ -/* +/* * - * Copyright (C) 2003 University of Chicago, Ohio Supercomputer Center. + * Copyright (C) 2003 University of Chicago, Ohio Supercomputer Center. * See COPYRIGHT notice in top-level directory. */ @@ -13,7 +13,7 @@ static globus_cond_t cond; static globus_bool_t delete_done, delete_success; static void delete_cb(void *myarg, globus_ftp_client_handle_t *handle, globus_object_t *error) { - + if (error) { FPRINTF(stderr, "%s\n", globus_object_printable_to_string(error)); @@ -39,7 +39,7 @@ void ADIOI_GRIDFTP_Delete(char *filename, int *error_code) globus_module_activate(GLOBUS_FTP_CLIENT_MODULE); result=globus_ftp_client_handle_init(&handle,GLOBUS_NULL); - + if (result != GLOBUS_SUCCESS ) { globus_err_handler("globus_ftp_client_handle_init",myname,result); @@ -47,11 +47,11 @@ void ADIOI_GRIDFTP_Delete(char *filename, int *error_code) MPIR_ERR_RECOVERABLE, myname, __LINE__, MPI_ERR_IO, - "**io", "**io %s", + "**io", "**io %s", globus_object_printable_to_string(globus_error_get(result))); - return; + return; } - + delete_done=GLOBUS_FALSE; delete_success=GLOBUS_FALSE; result=globus_ftp_client_delete(&handle,filename,GLOBUS_NULL,delete_cb,GLOBUS_NULL); @@ -78,7 +78,7 @@ void ADIOI_GRIDFTP_Delete(char *filename, int *error_code) MPIR_ERR_RECOVERABLE, myname, __LINE__, MPI_ERR_IO, - "**io", "**io %s", + "**io", "**io %s", globus_object_printable_to_string(globus_error_get(result))); return; } @@ -89,7 +89,7 @@ void ADIOI_GRIDFTP_Delete(char *filename, int *error_code) MPIR_ERR_RECOVERABLE, myname, __LINE__, MPI_ERR_IO, - "**io", "**io %s", + "**io", "**io %s", globus_object_printable_to_string(globus_error_get(result))); } } diff --git a/ompi/mca/io/romio314/romio/adio/ad_gridftp/ad_gridftp_fcntl.c b/ompi/mca/io/romio314/romio/adio/ad_gridftp/ad_gridftp_fcntl.c index dd9cb5ee098..b5150cbdfb4 100644 --- a/ompi/mca/io/romio314/romio/adio/ad_gridftp/ad_gridftp_fcntl.c +++ b/ompi/mca/io/romio314/romio/adio/ad_gridftp/ad_gridftp_fcntl.c @@ -1,7 +1,7 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */ -/* +/* * - * Copyright (C) 2003 University of Chicago, Ohio Supercomputer Center. + * Copyright (C) 2003 University of Chicago, Ohio Supercomputer Center. * See COPYRIGHT notice in top-level directory. */ @@ -26,7 +26,7 @@ void fcntl_size_cb(void *myargs, globus_ftp_client_handle_t *handle, globus_mutex_unlock(&fcntl_size_lock); } -void ADIOI_GRIDFTP_Fcntl(ADIO_File fd, int flag, ADIO_Fcntl_t *fcntl_struct, +void ADIOI_GRIDFTP_Fcntl(ADIO_File fd, int flag, ADIO_Fcntl_t *fcntl_struct, int *error_code) { MPI_Datatype copy_etype, copy_filetype; @@ -46,7 +46,7 @@ void ADIOI_GRIDFTP_Fcntl(ADIO_File fd, int flag, ADIO_Fcntl_t *fcntl_struct, { globus_result_t result; globus_off_t fsize=0; - + globus_mutex_init(&fcntl_size_lock,GLOBUS_NULL); globus_cond_init(&fcntl_size_cond,GLOBUS_NULL); fcntl_size_done=GLOBUS_FALSE; @@ -61,7 +61,7 @@ void ADIOI_GRIDFTP_Fcntl(ADIO_File fd, int flag, ADIO_Fcntl_t *fcntl_struct, *error_code = MPIO_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE, myname, __LINE__, MPI_ERR_IO, - "**io", "**io %s", + "**io", "**io %s", globus_object_printable_to_string(globus_error_get(result))); return; } diff --git a/ompi/mca/io/romio314/romio/adio/ad_gridftp/ad_gridftp_flush.c b/ompi/mca/io/romio314/romio/adio/ad_gridftp/ad_gridftp_flush.c index 795341e8889..2f61e7f76d0 100644 --- a/ompi/mca/io/romio314/romio/adio/ad_gridftp/ad_gridftp_flush.c +++ b/ompi/mca/io/romio314/romio/adio/ad_gridftp/ad_gridftp_flush.c @@ -1,7 +1,7 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */ -/* +/* * - * Copyright (C) 2003 University of Chicago, Ohio Supercomputer Center. + * Copyright (C) 2003 University of Chicago, Ohio Supercomputer Center. * See COPYRIGHT notice in top-level directory. */ diff --git a/ompi/mca/io/romio314/romio/adio/ad_gridftp/ad_gridftp_hints.c b/ompi/mca/io/romio314/romio/adio/ad_gridftp/ad_gridftp_hints.c index c0b0a40ebb2..f008cdc247f 100644 --- a/ompi/mca/io/romio314/romio/adio/ad_gridftp/ad_gridftp_hints.c +++ b/ompi/mca/io/romio314/romio/adio/ad_gridftp/ad_gridftp_hints.c @@ -1,11 +1,11 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */ -/* +/* * - * Copyright (C) 2003 University of Chicago, Ohio Supercomputer Center. + * Copyright (C) 2003 University of Chicago, Ohio Supercomputer Center. * See COPYRIGHT notice in top-level directory. */ -/* +/* Valid hints for ftp:// and gsiftp:// URLs (aside from the std. ones): @@ -19,7 +19,7 @@ Valid hints for ftp:// and gsiftp:// URLs (aside from the std. ones): tcp_buffer integer size of tcp stream buffers in bytes - transfer_type ascii or binary (default binary) + transfer_type ascii or binary (default binary) These *must* be specified at open time currently. */ @@ -29,12 +29,12 @@ These *must* be specified at open time currently. void ADIOI_GRIDFTP_SetInfo(ADIO_File fd, MPI_Info users_info, int *error_code) { - + if (!(fd->info)) { if ( users_info==MPI_INFO_NULL ) { - /* This must be part of the open call. */ + /* This must be part of the open call. */ MPI_Info_create(&(fd->info)); } else @@ -46,7 +46,7 @@ void ADIOI_GRIDFTP_SetInfo(ADIO_File fd, MPI_Info users_info, int *error_code) { int i,nkeys,valuelen,flag; char key[MPI_MAX_INFO_KEY], value[MPI_MAX_INFO_VAL]; - + if ( users_info!=MPI_INFO_NULL ) { MPI_Info_get_nkeys(users_info,&nkeys); @@ -62,7 +62,7 @@ void ADIOI_GRIDFTP_SetInfo(ADIO_File fd, MPI_Info users_info, int *error_code) } } } - + /* let the generic ROMIO and MPI-I/O stuff happen... */ - ADIOI_GEN_SetInfo(fd, users_info, error_code); + ADIOI_GEN_SetInfo(fd, users_info, error_code); } diff --git a/ompi/mca/io/romio314/romio/adio/ad_gridftp/ad_gridftp_open.c b/ompi/mca/io/romio314/romio/adio/ad_gridftp/ad_gridftp_open.c index 45aab921051..e30e3811d44 100644 --- a/ompi/mca/io/romio314/romio/adio/ad_gridftp/ad_gridftp_open.c +++ b/ompi/mca/io/romio314/romio/adio/ad_gridftp/ad_gridftp_open.c @@ -1,5 +1,5 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */ -/* +/* * Copyright (C) 2003 University of Chicago, Ohio Supercomputer Center. * See COPYRIGHT notice in top-level directory. */ @@ -12,7 +12,7 @@ static globus_cond_t cond; static globus_bool_t file_exists,exists_done; static void exists_cb(void *myargs, globus_ftp_client_handle_t *handle, globus_object_t *error) -{ +{ if (error) { FPRINTF(stderr, "%s\n", globus_object_printable_to_string(error)); @@ -77,7 +77,7 @@ void ADIOI_GRIDFTP_Open(ADIO_File fd, int *error_code) result=globus_ftp_client_handleattr_init(&hattr); if ( result != GLOBUS_SUCCESS ) { - + globus_err_handler("globus_ftp_client_handleattr_init", myname,result); @@ -114,41 +114,41 @@ void ADIOI_GRIDFTP_Open(ADIO_File fd, int *error_code) /* Since we're (almost by definition) doing things that FTP S (stream) control mode can't handle, default to E (extended block) control mode - for gsiftp:// URLs. ftp:// URLs use standard stream control mode + for gsiftp:// URLs. ftp:// URLs use standard stream control mode by default. This behavior can be overridden by the ftp_control_mode hint. */ /* - if ( !strncmp(fd->filename,"gsiftp:",7) && + if ( !strncmp(fd->filename,"gsiftp:",7) && (result=globus_ftp_client_operationattr_set_mode(&(oattr[fd->fd_sys]),GLOBUS_FTP_CONTROL_MODE_EXTENDED_BLOCK))!=GLOBUS_SUCCESS ) globus_err_handler("globus_ftp_client_operationattr_set_mode",myname,result); - else if ( !strncmp(fd->filename,"ftp:",4) && + else if ( !strncmp(fd->filename,"ftp:",4) && (result=globus_ftp_client_operationattr_set_mode(&(oattr[fd->fd_sys]),GLOBUS_FTP_CONTROL_MODE_STREAM))!=GLOBUS_SUCCESS ) globus_err_handler("globus_ftp_client_operationattr_set_mode",myname,result); */ /* Set append mode if necessary */ - if ( (fd->access_mode&ADIO_APPEND) && + if ( (fd->access_mode&ADIO_APPEND) && ((result=globus_ftp_client_operationattr_set_append(&(oattr[fd->fd_sys]),GLOBUS_TRUE))!=GLOBUS_SUCCESS) ) globus_err_handler("globus_ftp_client_operationattr_set_append",myname,result); - /* Other hint and amode processing that would affect hattr and/or + /* Other hint and amode processing that would affect hattr and/or oattr[] (eg. parallelism, striping, etc.) goes here */ if ( fd->info!=MPI_INFO_NULL ) { ADIOI_Info_get(fd->info,"ftp_control_mode",MPI_MAX_INFO_VAL,hintval,&keyfound); if ( keyfound ) { - if ( ( !strcmp(hintval,"extended") || !strcmp(hintval,"extended_block") ) && + if ( ( !strcmp(hintval,"extended") || !strcmp(hintval,"extended_block") ) && (result=globus_ftp_client_operationattr_set_mode(&(oattr[fd->fd_sys]),GLOBUS_FTP_CONTROL_MODE_EXTENDED_BLOCK))!=GLOBUS_SUCCESS ) globus_err_handler("globus_ftp_client_operationattr_set_mode",myname,result); - else if ( !strcmp(hintval,"block") && + else if ( !strcmp(hintval,"block") && (result=globus_ftp_client_operationattr_set_mode(&(oattr[fd->fd_sys]),GLOBUS_FTP_CONTROL_MODE_BLOCK))!=GLOBUS_SUCCESS ) globus_err_handler("globus_ftp_client_operationattr_set_mode",myname,result); - else if ( !strcmp(hintval,"compressed") && + else if ( !strcmp(hintval,"compressed") && (result=globus_ftp_client_operationattr_set_mode(&(oattr[fd->fd_sys]),GLOBUS_FTP_CONTROL_MODE_COMPRESSED))!=GLOBUS_SUCCESS ) globus_err_handler("globus_ftp_client_operationattr_set_mode",myname,result); - else if ( !strcmp(hintval,"stream") && + else if ( !strcmp(hintval,"stream") && (result=globus_ftp_client_operationattr_set_mode(&(oattr[fd->fd_sys]),GLOBUS_FTP_CONTROL_MODE_STREAM))!=GLOBUS_SUCCESS ) globus_err_handler("globus_ftp_client_operationattr_set_mode",myname,result); } @@ -157,7 +157,7 @@ void ADIOI_GRIDFTP_Open(ADIO_File fd, int *error_code) if ( keyfound ) { int nftpthreads; - + if ( sscanf(hintval,"%d",&nftpthreads)==1 ) { globus_ftp_control_parallelism_t parallelism; @@ -261,10 +261,10 @@ void ADIOI_GRIDFTP_Open(ADIO_File fd, int *error_code) GLOBUS_NULL))!=GLOBUS_SUCCESS ) { globus_err_handler("globus_ftp_client_exists",myname,result); - fd->fd_sys = -1; + fd->fd_sys = -1; *error_code = MPIO_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE, myname, __LINE__, MPI_ERR_IO, - "**io", "**io %s", + "**io", "**io %s", globus_object_printable_to_string(globus_error_get(result))); return; } @@ -295,10 +295,10 @@ void ADIOI_GRIDFTP_Open(ADIO_File fd, int *error_code) { globus_err_handler("globus_ftp_client_put",myname,result); fd->fd_sys = -1; - *error_code = MPIO_Err_create_code(MPI_SUCCESS, + *error_code = MPIO_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE, myname, __LINE__, MPI_ERR_IO, - "**io", "**io %s", + "**io", "**io %s", globus_object_printable_to_string(globus_error_get(result))); return; } @@ -310,10 +310,10 @@ void ADIOI_GRIDFTP_Open(ADIO_File fd, int *error_code) if ( result != GLOBUS_SUCCESS ) { globus_err_handler("globus_ftp_client_register_write",myname,result); - *error_code = MPIO_Err_create_code(MPI_SUCCESS, + *error_code = MPIO_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE, myname, __LINE__, MPI_ERR_IO, - "**io", "**io %s", + "**io", "**io %s", globus_object_printable_to_string(globus_error_get(result))); return; } @@ -328,7 +328,7 @@ void ADIOI_GRIDFTP_Open(ADIO_File fd, int *error_code) { fd->fd_sys = -1; *error_code = MPIO_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE, - myname, __LINE__, MPI_ERR_IO, + myname, __LINE__, MPI_ERR_IO, "**io", 0); return; } diff --git a/ompi/mca/io/romio314/romio/adio/ad_gridftp/ad_gridftp_read.c b/ompi/mca/io/romio314/romio/adio/ad_gridftp/ad_gridftp_read.c index 6a0fc9c7c46..dee878e6b3f 100644 --- a/ompi/mca/io/romio314/romio/adio/ad_gridftp/ad_gridftp_read.c +++ b/ompi/mca/io/romio314/romio/adio/ad_gridftp/ad_gridftp_read.c @@ -1,7 +1,7 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */ -/* +/* * - * Copyright (C) 2003 University of Chicago, Ohio Supercomputer Center. + * Copyright (C) 2003 University of Chicago, Ohio Supercomputer Center. * See COPYRIGHT notice in top-level directory. */ @@ -100,7 +100,7 @@ static void readdiscontig_data_cb(void *myargs, globus_ftp_client_handle_t *hand return; } -void ADIOI_GRIDFTP_ReadContig(ADIO_File fd, void *buf, int count, +void ADIOI_GRIDFTP_ReadContig(ADIO_File fd, void *buf, int count, MPI_Datatype datatype, int file_ptr_type, ADIO_Offset offset, ADIO_Status *status, int *error_code) @@ -156,12 +156,12 @@ void ADIOI_GRIDFTP_ReadContig(ADIO_File fd, void *buf, int count, if ( result != GLOBUS_SUCCESS ) { globus_err_handler("globus_ftp_client_register_read",myname,result); - *error_code = MPIO_Err_create_code(MPI_SUCCESS, - MPIR_ERR_RECOVERABLE, myname, __LINE__, - MPI_ERR_IO, "**io", "**io %s", + *error_code = MPIO_Err_create_code(MPI_SUCCESS, + MPIR_ERR_RECOVERABLE, myname, __LINE__, + MPI_ERR_IO, "**io", "**io %s", globus_object_printable_to_string(globus_error_get(result))); return; - } + } /* The ctl callback won't start till the data callbacks complete, so it's @@ -227,14 +227,14 @@ void ADIOI_GRIDFTP_ReadDiscontig(ADIO_File fd, void *buf, int count, MPI_Type_size_x(datatype,&btype_size); MPI_Type_extent(datatype,&btype_extent); ADIOI_Datatype_iscontig(datatype,&buf_contig); - + if ( ( btype_extent!=btype_size ) || ( ! buf_contig ) ) { FPRINTF(stderr,"[%d/%d] %s called with discontigous memory buffer\n", myrank,nprocs,myname); fflush(stderr); - *error_code = MPIO_Err_create_code(MPI_SUCCESS, - MPIR_ERR_RECOVERABLE, myname, __LINE__, + *error_code = MPIO_Err_create_code(MPI_SUCCESS, + MPIR_ERR_RECOVERABLE, myname, __LINE__, MPI_ERR_IO, "**io", 0 ); return; } @@ -279,8 +279,8 @@ void ADIOI_GRIDFTP_ReadDiscontig(ADIO_File fd, void *buf, int count, FPRINTF(stderr,"[%d/%d] %s error in computing extent -- extent %d is smaller than total bytes requested %d!\n", myrank,nprocs,myname,extent,count*btype_size); fflush(stderr); - *error_code = MPIO_Err_create_code(MPI_SUCCESS, - MPIR_ERR_RECOVERABLE, myname, __LINE__, + *error_code = MPIO_Err_create_code(MPI_SUCCESS, + MPIR_ERR_RECOVERABLE, myname, __LINE__, MPI_ERR_IO, "**io", 0); return; } @@ -301,9 +301,9 @@ void ADIOI_GRIDFTP_ReadDiscontig(ADIO_File fd, void *buf, int count, GLOBUS_NULL))!=GLOBUS_SUCCESS ) { globus_err_handler("globus_ftp_client_partial_get",myname,result); - *error_code = MPIO_Err_create_code(MPI_SUCCESS, - MPIR_ERR_RECOVERABLE, myname, __LINE__, - MPI_ERR_IO, "**io", "**io %s", + *error_code = MPIO_Err_create_code(MPI_SUCCESS, + MPIR_ERR_RECOVERABLE, myname, __LINE__, + MPI_ERR_IO, "**io", "**io %s", globus_object_printable_to_string(globus_error_get(result))); return; } @@ -391,15 +391,15 @@ void ADIOI_GRIDFTP_ReadStrided(ADIO_File fd, void *buf, int count, MPI_Comm_size(fd->comm, &nprocs); MPI_Comm_rank(fd->comm, &myrank); #ifdef PRINT_ERR_MSG - FPRINTF(stdout, "[%d/%d] ADIOI_GRIDFTP_ReadStrided called on %s\n", myrank, + FPRINTF(stdout, "[%d/%d] ADIOI_GRIDFTP_ReadStrided called on %s\n", myrank, nprocs, fd->filename); - FPRINTF(stdout, "[%d/%d] calling ADIOI_GEN_ReadStrided\n", myrank, + FPRINTF(stdout, "[%d/%d] calling ADIOI_GEN_ReadStrided\n", myrank, nprocs); #endif ADIOI_GEN_ReadStrided(fd, buf, count, datatype, file_ptr_type, offset, status, error_code); - + */ char myname[]="ADIOI_GRIDFTP_ReadStrided"; @@ -457,7 +457,7 @@ void ADIOI_GRIDFTP_ReadStrided(ADIO_File fd, void *buf, int count, ADIOI_Free(intermediate); } - else + else { /* Why did you bother calling ReadStrided?!?!?! */ ADIOI_GRIDFTP_ReadContig(fd, buf, count, datatype, diff --git a/ompi/mca/io/romio314/romio/adio/ad_gridftp/ad_gridftp_resize.c b/ompi/mca/io/romio314/romio/adio/ad_gridftp/ad_gridftp_resize.c index 96c0460c42f..f55c56951f5 100644 --- a/ompi/mca/io/romio314/romio/adio/ad_gridftp/ad_gridftp_resize.c +++ b/ompi/mca/io/romio314/romio/adio/ad_gridftp/ad_gridftp_resize.c @@ -1,7 +1,7 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */ -/* +/* * - * Copyright (C) 2003 University of Chicago, Ohio Supercomputer Center. + * Copyright (C) 2003 University of Chicago, Ohio Supercomputer Center. * See COPYRIGHT notice in top-level directory. */ @@ -113,9 +113,9 @@ void ADIOI_GRIDFTP_Resize(ADIO_File fd, ADIO_Offset size, int *error_code) GLOBUS_NULL))!=GLOBUS_SUCCESS ) { globus_err_handler("globus_ftp_client_partial_put",myname,result); - *error_code = MPIO_Err_create_code(MPI_SUCCESS, - MPIR_ERR_RECOVERABLE, myname, __LINE__, - MPI_ERR_IO, "**io", "**io %s", + *error_code = MPIO_Err_create_code(MPI_SUCCESS, + MPIR_ERR_RECOVERABLE, myname, __LINE__, + MPI_ERR_IO, "**io", "**io %s", globus_object_printable_to_string(globus_error_get(result))); return; } @@ -129,9 +129,9 @@ void ADIOI_GRIDFTP_Resize(ADIO_File fd, ADIO_Offset size, int *error_code) GLOBUS_NULL))!=GLOBUS_SUCCESS ) { globus_err_handler("globus_ftp_client_register_write",myname,result); - *error_code = MPIO_Err_create_code(MPI_SUCCESS, - MPIR_ERR_RECOVERABLE, myname, __LINE__, - MPI_ERR_IO, "**io", "**io %s", + *error_code = MPIO_Err_create_code(MPI_SUCCESS, + MPIR_ERR_RECOVERABLE, myname, __LINE__, + MPI_ERR_IO, "**io", "**io %s", globus_object_printable_to_string(globus_error_get(result))); return; } @@ -161,9 +161,9 @@ void ADIOI_GRIDFTP_Resize(ADIO_File fd, ADIO_Offset size, int *error_code) GLOBUS_NULL))!=GLOBUS_SUCCESS ) { globus_err_handler("globus_ftp_client_move",myname,result); - *error_code = MPIO_Err_create_code(MPI_SUCCESS, - MPIR_ERR_RECOVERABLE, myname, __LINE__, - MPI_ERR_IO, "**io", "**io %s", + *error_code = MPIO_Err_create_code(MPI_SUCCESS, + MPIR_ERR_RECOVERABLE, myname, __LINE__, + MPI_ERR_IO, "**io", "**io %s", globus_object_printable_to_string(globus_error_get(result))); return; } @@ -189,9 +189,9 @@ void ADIOI_GRIDFTP_Resize(ADIO_File fd, ADIO_Offset size, int *error_code) GLOBUS_NULL))!=GLOBUS_SUCCESS ) { globus_err_handler("globus_ftp_client_partial_third_party_transfer",myname,result); - *error_code = MPIO_Err_create_code(MPI_SUCCESS, - MPIR_ERR_RECOVERABLE, myname, __LINE__, - MPI_ERR_IO, "**io", "**io %s", + *error_code = MPIO_Err_create_code(MPI_SUCCESS, + MPIR_ERR_RECOVERABLE, myname, __LINE__, + MPI_ERR_IO, "**io", "**io %s", globus_object_printable_to_string(globus_error_get(result))); return; } @@ -213,9 +213,9 @@ void ADIOI_GRIDFTP_Resize(ADIO_File fd, ADIO_Offset size, int *error_code) GLOBUS_NULL))!=GLOBUS_SUCCESS ) { globus_err_handler("globus_ftp_client_delete",myname,result); - *error_code = MPIO_Err_create_code(MPI_SUCCESS, - MPIR_ERR_RECOVERABLE, myname, __LINE__, - MPI_ERR_IO, "**io", "**io %s", + *error_code = MPIO_Err_create_code(MPI_SUCCESS, + MPIR_ERR_RECOVERABLE, myname, __LINE__, + MPI_ERR_IO, "**io", "**io %s", globus_object_printable_to_string(globus_error_get(result))); return; } diff --git a/ompi/mca/io/romio314/romio/adio/ad_gridftp/ad_gridftp_write.c b/ompi/mca/io/romio314/romio/adio/ad_gridftp/ad_gridftp_write.c index 0400dae30b7..3c6f36fea35 100644 --- a/ompi/mca/io/romio314/romio/adio/ad_gridftp/ad_gridftp_write.c +++ b/ompi/mca/io/romio314/romio/adio/ad_gridftp/ad_gridftp_write.c @@ -1,7 +1,7 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */ -/* +/* * - * Copyright (C) 2003 University of Chicago, Ohio Supercomputer Center. + * Copyright (C) 2003 University of Chicago, Ohio Supercomputer Center. * See COPYRIGHT notice in top-level directory. */ @@ -101,12 +101,12 @@ static void writediscontig_data_cb(void *myargs, globus_ftp_client_handle_t *han eof, writediscontig_data_cb, (void *)(bytes_written)); - FPRINTF(stderr,"wrote %Ld bytes...",(long long)length); + FPRINTF(stderr,"wrote %Ld bytes...",(long long)length); return; } -void ADIOI_GRIDFTP_WriteContig(ADIO_File fd, void *buf, int count, +void ADIOI_GRIDFTP_WriteContig(ADIO_File fd, void *buf, int count, MPI_Datatype datatype, int file_ptr_type, ADIO_Offset offset, ADIO_Status *status, int *error_code) @@ -227,7 +227,7 @@ void ADIOI_GRIDFTP_WriteDiscontig(ADIO_File fd, void *buf, int count, MPI_Type_size_x(datatype,&btype_size); MPI_Type_extent(datatype,&btype_extent); ADIOI_Datatype_iscontig(datatype,&buf_contig); - + if ( ( btype_extent!=btype_size ) || ( ! buf_contig ) ) { FPRINTF(stderr,"[%d/%d] %s called with discontigous memory buffer\n", @@ -348,7 +348,7 @@ void ADIOI_GRIDFTP_WriteDiscontig(ADIO_File fd, void *buf, int count, nblks++; } - + /* The ctl callback won't start till the data callbacks complete, so it's safe to wait on just the ctl callback */ globus_mutex_lock(&writediscontig_ctl_lock); @@ -392,7 +392,7 @@ void ADIOI_GRIDFTP_WriteStrided(ADIO_File fd, void *buf, int count, MPI_Comm_size(fd->comm, &nprocs); MPI_Comm_rank(fd->comm, &myrank); - ADIOI_GEN_WriteStrided(fd, buf, count, datatype, file_ptr_type, offset, + ADIOI_GEN_WriteStrided(fd, buf, count, datatype, file_ptr_type, offset, status, error_code); return; #else @@ -460,7 +460,7 @@ void ADIOI_GRIDFTP_WriteStrided(ADIO_File fd, void *buf, int count, ADIOI_Free(intermediate); } - else + else { /* Why did you bother calling WriteStrided?!?!?! */ FPRINTF(stderr,"[%d/%d] Why the heck did you call %s with contiguous buffer *and* file types?\n", diff --git a/ompi/mca/io/romio314/romio/adio/ad_gridftp/globus_routines.c b/ompi/mca/io/romio314/romio/adio/ad_gridftp/globus_routines.c index 1cca367a3f7..bb4a48dad18 100644 --- a/ompi/mca/io/romio314/romio/adio/ad_gridftp/globus_routines.c +++ b/ompi/mca/io/romio314/romio/adio/ad_gridftp/globus_routines.c @@ -1,7 +1,7 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */ -/* +/* * - * Copyright (C) 2003 University of Chicago, Ohio Supercomputer Center. + * Copyright (C) 2003 University of Chicago, Ohio Supercomputer Center. * See COPYRIGHT notice in top-level directory. */ diff --git a/ompi/mca/io/romio314/romio/adio/ad_hfs/ad_hfs.c b/ompi/mca/io/romio314/romio/adio/ad_hfs/ad_hfs.c index ad99ff7b815..860c42bdc3d 100644 --- a/ompi/mca/io/romio314/romio/adio/ad_hfs/ad_hfs.c +++ b/ompi/mca/io/romio314/romio/adio/ad_hfs/ad_hfs.c @@ -1,7 +1,7 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */ -/* +/* * - * Copyright (C) 2001 University of Chicago. + * Copyright (C) 2001 University of Chicago. * See COPYRIGHT notice in top-level directory. */ diff --git a/ompi/mca/io/romio314/romio/adio/ad_hfs/ad_hfs.h b/ompi/mca/io/romio314/romio/adio/ad_hfs/ad_hfs.h index 2950aa50fcf..27bd5dfa9c1 100644 --- a/ompi/mca/io/romio314/romio/adio/ad_hfs/ad_hfs.h +++ b/ompi/mca/io/romio314/romio/adio/ad_hfs/ad_hfs.h @@ -1,7 +1,7 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */ -/* +/* * - * Copyright (C) 1997 University of Chicago. + * Copyright (C) 1997 University of Chicago. * See COPYRIGHT notice in top-level directory. */ @@ -19,16 +19,16 @@ #endif void ADIOI_HFS_Open(ADIO_File fd, int *error_code); -void ADIOI_HFS_ReadContig(ADIO_File fd, void *buf, int count, +void ADIOI_HFS_ReadContig(ADIO_File fd, void *buf, int count, MPI_Datatype datatype, int file_ptr_type, ADIO_Offset offset, ADIO_Status *status, int *error_code); -void ADIOI_HFS_WriteContig(ADIO_File fd, void *buf, int count, +void ADIOI_HFS_WriteContig(ADIO_File fd, void *buf, int count, MPI_Datatype datatype, int file_ptr_type, ADIO_Offset offset, ADIO_Status *status, int - *error_code); + *error_code); void ADIOI_HFS_Fcntl(ADIO_File fd, int flag, ADIO_Fcntl_t *fcntl_struct, int - *error_code); + *error_code); void ADIOI_HFS_Resize(ADIO_File fd, ADIO_Offset size, int *error_code); #endif diff --git a/ompi/mca/io/romio314/romio/adio/ad_hfs/ad_hfs_fcntl.c b/ompi/mca/io/romio314/romio/adio/ad_hfs/ad_hfs_fcntl.c index 460c73666fe..0bc48c763e3 100644 --- a/ompi/mca/io/romio314/romio/adio/ad_hfs/ad_hfs_fcntl.c +++ b/ompi/mca/io/romio314/romio/adio/ad_hfs/ad_hfs_fcntl.c @@ -1,7 +1,7 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */ -/* +/* * - * Copyright (C) 1997 University of Chicago. + * Copyright (C) 1997 University of Chicago. * See COPYRIGHT notice in top-level directory. */ @@ -25,20 +25,20 @@ void ADIOI_HFS_Fcntl(ADIO_File fd, int flag, ADIO_Fcntl_t *fcntl_struct, int *er case ADIO_FCNTL_GET_FSIZE: fcntl_struct->fsize = lseek64(fd->fd_sys, 0, SEEK_END); #ifdef HPUX - if (fd->fp_sys_posn != -1) + if (fd->fp_sys_posn != -1) lseek64(fd->fd_sys, fd->fp_sys_posn, SEEK_SET); /* not required in SPPUX since there we use pread/pwrite */ #endif if (fcntl_struct->fsize == -1) { #ifdef MPICH - *error_code = MPIR_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE, myname, __LINE__, MPI_ERR_IO, "**io", + *error_code = MPIR_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE, myname, __LINE__, MPI_ERR_IO, "**io", "**io %s", strerror(errno)); #elif defined(PRINT_ERR_MSG) *error_code = MPI_ERR_UNKNOWN; #else /* MPICH-1 */ *error_code = MPIR_Err_setmsg(MPI_ERR_IO, MPIR_ADIO_ERROR, myname, "I/O Error", "%s", strerror(errno)); - ADIOI_Error(fd, *error_code, myname); + ADIOI_Error(fd, *error_code, myname); #endif } else *error_code = MPI_SUCCESS; @@ -52,7 +52,7 @@ void ADIOI_HFS_Fcntl(ADIO_File fd, int flag, ADIO_Fcntl_t *fcntl_struct, int *er /* prealloc64 works only if file is of zero length */ if (err && (errno != ENOTEMPTY)) { #ifdef MPICH - *error_code = MPIR_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE, myname, __LINE__, MPI_ERR_IO, "**io", + *error_code = MPIR_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE, myname, __LINE__, MPI_ERR_IO, "**io", "**io %s", strerror(errno)); #elif defined(PRINT_ERR_MSG) *error_code = MPI_ERR_UNKNOWN; @@ -84,16 +84,16 @@ void ADIOI_HFS_Fcntl(ADIO_File fd, int flag, ADIO_Fcntl_t *fcntl_struct, int *er #endif return; } - } + } - if ((fcntl_struct->diskspace > 2147483647) || + if ((fcntl_struct->diskspace > 2147483647) || (err && (errno == ENOTEMPTY))) { #endif ADIOI_GEN_Prealloc(fd,fcntl_struct->diskspace, error_code); } ADIOI_Free(buf); #ifdef HPUX - if (fd->fp_sys_posn != -1) + if (fd->fp_sys_posn != -1) lseek64(fd->fd_sys, fd->fp_sys_posn, SEEK_SET); /* not required in SPPUX since there we use pread/pwrite */ #endif diff --git a/ompi/mca/io/romio314/romio/adio/ad_hfs/ad_hfs_open.c b/ompi/mca/io/romio314/romio/adio/ad_hfs/ad_hfs_open.c index 46ee2848e56..7df8868d7e6 100644 --- a/ompi/mca/io/romio314/romio/adio/ad_hfs/ad_hfs_open.c +++ b/ompi/mca/io/romio314/romio/adio/ad_hfs/ad_hfs_open.c @@ -1,7 +1,7 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */ -/* +/* * - * Copyright (C) 1997 University of Chicago. + * Copyright (C) 1997 University of Chicago. * See COPYRIGHT notice in top-level directory. */ @@ -53,14 +53,14 @@ void ADIOI_HFS_Open(ADIO_File fd, int *error_code) if (fd->fd_sys == -1 ) { #ifdef MPICH - *error_code = MPIR_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE, myname, __LINE__, MPI_ERR_IO, "**io", + *error_code = MPIR_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE, myname, __LINE__, MPI_ERR_IO, "**io", "**io %s", strerror(errno)); #elif defined(PRINT_ERR_MSG) *error_code = MPI_ERR_UNKNOWN; #else /* MPICH-1 */ *error_code = MPIR_Err_setmsg(MPI_ERR_IO, MPIR_ADIO_ERROR, myname, "I/O Error", "%s", strerror(errno)); - ADIOI_Error(ADIO_FILE_NULL, *error_code, myname); + ADIOI_Error(ADIO_FILE_NULL, *error_code, myname); #endif } else *error_code = MPI_SUCCESS; diff --git a/ompi/mca/io/romio314/romio/adio/ad_hfs/ad_hfs_read.c b/ompi/mca/io/romio314/romio/adio/ad_hfs/ad_hfs_read.c index eac03ab6f80..145bae19323 100644 --- a/ompi/mca/io/romio314/romio/adio/ad_hfs/ad_hfs_read.c +++ b/ompi/mca/io/romio314/romio/adio/ad_hfs/ad_hfs_read.c @@ -1,7 +1,7 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */ -/* +/* * - * Copyright (C) 1997 University of Chicago. + * Copyright (C) 1997 University of Chicago. * See COPYRIGHT notice in top-level directory. */ @@ -11,7 +11,7 @@ #define lseek64 lseek #endif -void ADIOI_HFS_ReadContig(ADIO_File fd, void *buf, int count, +void ADIOI_HFS_ReadContig(ADIO_File fd, void *buf, int count, MPI_Datatype datatype, int file_ptr_type, ADIO_Offset offset, ADIO_Status *status, int *error_code) { @@ -26,7 +26,7 @@ void ADIOI_HFS_ReadContig(ADIO_File fd, void *buf, int count, #ifdef SPPUX fd->fp_sys_posn = -1; /* set it to null, since we are using pread */ - if (file_ptr_type == ADIO_EXPLICIT_OFFSET) + if (file_ptr_type == ADIO_EXPLICIT_OFFSET) err = pread64(fd->fd_sys, buf, len, offset); else { /* read from curr. location of ind. file pointer */ err = pread64(fd->fd_sys, buf, len, fd->fp_ind); @@ -40,15 +40,15 @@ void ADIOI_HFS_ReadContig(ADIO_File fd, void *buf, int count, lseek64(fd->fd_sys, offset, SEEK_SET); err = read(fd->fd_sys, buf, len); fd->fp_sys_posn = offset + err; - /* individual file pointer not updated */ + /* individual file pointer not updated */ } else { /* read from curr. location of ind. file pointer */ if (fd->fp_sys_posn != fd->fp_ind) lseek64(fd->fd_sys, fd->fp_ind, SEEK_SET); err = read(fd->fd_sys, buf, len); - fd->fp_ind += err; + fd->fp_ind += err; fd->fp_sys_posn = fd->fp_ind; - } + } #endif #ifdef HAVE_STATUS_SET_BYTES @@ -64,7 +64,7 @@ void ADIOI_HFS_ReadContig(ADIO_File fd, void *buf, int count, #else /* MPICH-1 */ *error_code = MPIR_Err_setmsg(MPI_ERR_IO, MPIR_ADIO_ERROR, myname, "I/O Error", "%s", strerror(errno)); - ADIOI_Error(fd, *error_code, myname); + ADIOI_Error(fd, *error_code, myname); #endif } else *error_code = MPI_SUCCESS; diff --git a/ompi/mca/io/romio314/romio/adio/ad_hfs/ad_hfs_resize.c b/ompi/mca/io/romio314/romio/adio/ad_hfs/ad_hfs_resize.c index de24ad672ae..3496849939c 100644 --- a/ompi/mca/io/romio314/romio/adio/ad_hfs/ad_hfs_resize.c +++ b/ompi/mca/io/romio314/romio/adio/ad_hfs/ad_hfs_resize.c @@ -1,7 +1,7 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */ -/* +/* * - * Copyright (C) 1997 University of Chicago. + * Copyright (C) 1997 University of Chicago. * See COPYRIGHT notice in top-level directory. */ @@ -13,7 +13,7 @@ void ADIOI_HFS_Resize(ADIO_File fd, ADIO_Offset size, int *error_code) #ifndef PRINT_ERR_MSG static char myname[] = "ADIOI_HFS_RESIZE"; #endif - + err = ftruncate64(fd->fd_sys, size); if (err == -1) { #ifdef MPICH @@ -24,7 +24,7 @@ void ADIOI_HFS_Resize(ADIO_File fd, ADIO_Offset size, int *error_code) #else /* MPICH-1 */ *error_code = MPIR_Err_setmsg(MPI_ERR_IO, MPIR_ADIO_ERROR, myname, "I/O Error", "%s", strerror(errno)); - ADIOI_Error(fd, *error_code, myname); + ADIOI_Error(fd, *error_code, myname); #endif } else *error_code = MPI_SUCCESS; diff --git a/ompi/mca/io/romio314/romio/adio/ad_hfs/ad_hfs_write.c b/ompi/mca/io/romio314/romio/adio/ad_hfs/ad_hfs_write.c index 4bffa82a2f9..3bde18d2bbc 100644 --- a/ompi/mca/io/romio314/romio/adio/ad_hfs/ad_hfs_write.c +++ b/ompi/mca/io/romio314/romio/adio/ad_hfs/ad_hfs_write.c @@ -1,7 +1,7 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */ -/* +/* * - * Copyright (C) 1997 University of Chicago. + * Copyright (C) 1997 University of Chicago. * See COPYRIGHT notice in top-level directory. */ @@ -11,7 +11,7 @@ #define lseek64 lseek #endif -void ADIOI_HFS_WriteContig(ADIO_File fd, void *buf, int count, +void ADIOI_HFS_WriteContig(ADIO_File fd, void *buf, int count, MPI_Datatype datatype, int file_ptr_type, ADIO_Offset offset, ADIO_Status *status, int *error_code) { @@ -25,7 +25,7 @@ void ADIOI_HFS_WriteContig(ADIO_File fd, void *buf, int count, #ifdef SPPUX fd->fp_sys_posn = -1; /* set it to null, since we are using pwrite */ - if (file_ptr_type == ADIO_EXPLICIT_OFFSET) + if (file_ptr_type == ADIO_EXPLICIT_OFFSET) err = pwrite64(fd->fd_sys, buf, len, offset); else { /* write from curr. location of ind. file pointer */ err = pwrite64(fd->fd_sys, buf, len, fd->fp_ind); @@ -39,7 +39,7 @@ void ADIOI_HFS_WriteContig(ADIO_File fd, void *buf, int count, lseek64(fd->fd_sys, offset, SEEK_SET); err = write(fd->fd_sys, buf, len); fd->fp_sys_posn = offset + err; - /* individual file pointer not updated */ + /* individual file pointer not updated */ } else { /* write from curr. location of ind. file pointer */ if (fd->fp_sys_posn != fd->fp_ind) diff --git a/ompi/mca/io/romio314/romio/adio/ad_lustre/README b/ompi/mca/io/romio314/romio/adio/ad_lustre/README index a217c0f8fe5..d27110f42f0 100644 --- a/ompi/mca/io/romio314/romio/adio/ad_lustre/README +++ b/ompi/mca/io/romio314/romio/adio/ad_lustre/README @@ -1,11 +1,11 @@ -Upcoming soon: +Upcoming soon: o Hierarchical striping as described in the paper from CCGrid2007 http://ft.ornl.gov/projects/io/pubs/CCGrid-2007-file-joining.pdf Further out: o To post the code for ParColl (Partitioned collective IO) - + ----------------------------------------------------- -V05: +V05: ----------------------------------------------------- Improved data redistribution o Improve I/O pattern identification. Besides checking interleaving, @@ -20,12 +20,12 @@ Improved data redistribution more constant clients. ----------------------------------------------------- -V04: +V04: ----------------------------------------------------- o Direct IO and Lockless IO support ----------------------------------------------------- -V03: +V03: ----------------------------------------------------- o Correct detection of fs_type when lustre: prefix is not given o Further fix on stripe alignment @@ -34,21 +34,21 @@ V03: ----------------------------------------------------- V02: ----------------------------------------------------- -The Lustre ADIO driver has been cleaned up quite a lot. Compared +The Lustre ADIO driver has been cleaned up quite a lot. Compared to the intital posting, here are the changes: o Removal of dead/redundant code o Removal of asynchronous IO piece as it appears outdated o Bug fixes for setting Lustre Hints - o Bug fixes for data sieving - o Improved Setsize operation with one process calling ftruncate - o Improved collective IO with domain partitioning on + o Bug fixes for data sieving + o Improved Setsize operation with one process calling ftruncate + o Improved collective IO with domain partitioning on Lustre stripe boundary Contributing: - o You may contribute via many different ways, such as + o You may contribute via many different ways, such as testing results, bug reports, and new feature patches. o We appreciate any courtesy reference of this work. - o Disclaimer: you are welcome to try the code, but at your own risk. + o Disclaimer: you are welcome to try the code, but at your own risk. Contact info: For more info, visit http://ft.ornl.gov/projects/io/ diff --git a/ompi/mca/io/romio314/romio/adio/ad_lustre/ad_lustre_aggregate.c b/ompi/mca/io/romio314/romio/adio/ad_lustre/ad_lustre_aggregate.c index cd552829ed3..ce893a30e3f 100644 --- a/ompi/mca/io/romio314/romio/adio/ad_lustre/ad_lustre_aggregate.c +++ b/ompi/mca/io/romio314/romio/adio/ad_lustre/ad_lustre_aggregate.c @@ -49,23 +49,23 @@ void ADIOI_LUSTRE_Get_striping_info(ADIO_File fd, int **striping_info_ptr, /* Calculate how many IO clients we need */ /* Algorithm courtesy Pascal Deveze (pascal.deveze@bull.net) */ /* To avoid extent lock conflicts, - * avail_cb_nodes should either + * avail_cb_nodes should either * - be a multiple of stripe_count, * - or divide stripe_count exactly * so that each OST is accessed by a maximum of CO constant clients. */ if (nprocs_for_coll >= stripe_count) /* avail_cb_nodes should be a multiple of stripe_count and the number * of procs per OST should be limited to the minimum between - * nprocs_for_coll/stripe_count and CO - * - * e.g. if stripe_count=20, nprocs_for_coll=42 and CO=3 then + * nprocs_for_coll/stripe_count and CO + * + * e.g. if stripe_count=20, nprocs_for_coll=42 and CO=3 then * avail_cb_nodes should be equal to 40 */ - avail_cb_nodes = + avail_cb_nodes = stripe_count * ADIOI_MIN(nprocs_for_coll/stripe_count, CO); else { /* nprocs_for_coll is less than stripe_count */ /* avail_cb_nodes should divide stripe_count */ - /* e.g. if stripe_count=60 and nprocs_for_coll=8 then + /* e.g. if stripe_count=60 and nprocs_for_coll=8 then * avail_cb_nodes should be egal to 6 */ /* This could be done with : while (stripe_count % avail_cb_nodes != 0) avail_cb_nodes--; @@ -83,7 +83,7 @@ void ADIOI_LUSTRE_Get_striping_info(ADIO_File fd, int **striping_info_ptr, } /* if divisor is less than nprocs_for_coll, divisor is a * solution, but it is not sure that it is the best one */ - else if (divisor <= nprocs_for_coll) + else if (divisor <= nprocs_for_coll) avail_cb_nodes = divisor; } divisor++; @@ -202,7 +202,7 @@ void ADIOI_LUSTRE_Calc_my_req(ADIO_File fd, ADIO_Offset *offset_list, for (i = 0; i < nprocs; i++) { /* add one to count_my_req_per_proc[i] to avoid zero size malloc */ buf_idx[i] = (int *) ADIOI_Malloc((count_my_req_per_proc[i] + 1) - * sizeof(int)); + * sizeof(int)); } /* now allocate space for my_req, offset, and len */ diff --git a/ompi/mca/io/romio314/romio/adio/ad_lustre/ad_lustre_fcntl.c b/ompi/mca/io/romio314/romio/adio/ad_lustre/ad_lustre_fcntl.c index a4bd6fc6d17..61d4b66205f 100644 --- a/ompi/mca/io/romio314/romio/adio/ad_lustre/ad_lustre_fcntl.c +++ b/ompi/mca/io/romio314/romio/adio/ad_lustre/ad_lustre_fcntl.c @@ -1,6 +1,6 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */ -/* - * Copyright (C) 1997 University of Chicago. +/* + * Copyright (C) 1997 University of Chicago. * See COPYRIGHT notice in top-level directory. * * Copyright (C) 2007 Oak Ridge National Laboratory @@ -22,11 +22,11 @@ void ADIOI_LUSTRE_Fcntl(ADIO_File fd, int flag, ADIO_Fcntl_t *fcntl_struct, int switch(flag) { case ADIO_FCNTL_GET_FSIZE: fcntl_struct->fsize = lseek(fd->fd_sys, 0, SEEK_END); - if (fd->fp_sys_posn != -1) + if (fd->fp_sys_posn != -1) lseek(fd->fd_sys, fd->fp_sys_posn, SEEK_SET); if (fcntl_struct->fsize == -1) { - *error_code = MPIO_Err_create_code(MPI_SUCCESS, - MPIR_ERR_RECOVERABLE, myname, __LINE__, + *error_code = MPIO_Err_create_code(MPI_SUCCESS, + MPIR_ERR_RECOVERABLE, myname, __LINE__, MPI_ERR_IO, "**io", "**io %s", strerror(errno)); } else *error_code = MPI_SUCCESS; @@ -34,11 +34,11 @@ void ADIOI_LUSTRE_Fcntl(ADIO_File fd, int flag, ADIO_Fcntl_t *fcntl_struct, int case ADIO_FCNTL_SET_DISKSPACE: /* will be called by one process only */ - /* On file systems with no preallocation function, I have to - explicitly write - to allocate space. Since there could be holes in the file, - I need to read up to the current file size, write it back, - and then write beyond that depending on how much + /* On file systems with no preallocation function, I have to + explicitly write + to allocate space. Since there could be holes in the file, + I need to read up to the current file size, write it back, + and then write beyond that depending on how much preallocation is needed. read/write in sizes of no more than ADIOI_PREALLOC_BUFSZ */ @@ -46,7 +46,7 @@ void ADIOI_LUSTRE_Fcntl(ADIO_File fd, int flag, ADIO_Fcntl_t *fcntl_struct, int alloc_size = fcntl_struct->diskspace; size = ADIOI_MIN(curr_fsize, alloc_size); - + ntimes = (size + ADIOI_PREALLOC_BUFSZ - 1)/ADIOI_PREALLOC_BUFSZ; buf = (char *) ADIOI_Malloc(ADIOI_PREALLOC_BUFSZ); done = 0; @@ -56,31 +56,31 @@ void ADIOI_LUSTRE_Fcntl(ADIO_File fd, int flag, ADIO_Fcntl_t *fcntl_struct, int ADIO_ReadContig(fd, buf, len, MPI_BYTE, ADIO_EXPLICIT_OFFSET, done, &status, error_code); if (*error_code != MPI_SUCCESS) { - *error_code = MPIO_Err_create_code(MPI_SUCCESS, - MPIR_ERR_RECOVERABLE, myname, __LINE__, + *error_code = MPIO_Err_create_code(MPI_SUCCESS, + MPIR_ERR_RECOVERABLE, myname, __LINE__, MPI_ERR_IO, "**io", "**io %s", strerror(errno)); - return; + return; } - ADIO_WriteContig(fd, buf, len, MPI_BYTE, ADIO_EXPLICIT_OFFSET, + ADIO_WriteContig(fd, buf, len, MPI_BYTE, ADIO_EXPLICIT_OFFSET, done, &status, error_code); if (*error_code != MPI_SUCCESS) return; done += len; } if (alloc_size > curr_fsize) { - memset(buf, 0, ADIOI_PREALLOC_BUFSZ); + memset(buf, 0, ADIOI_PREALLOC_BUFSZ); size = alloc_size - curr_fsize; ntimes = (size + ADIOI_PREALLOC_BUFSZ - 1)/ADIOI_PREALLOC_BUFSZ; for (i=0; ifp_sys_posn != -1) + if (fd->fp_sys_posn != -1) lseek(fd->fd_sys, fd->fp_sys_posn, SEEK_SET); *error_code = MPI_SUCCESS; break; diff --git a/ompi/mca/io/romio314/romio/adio/ad_lustre/ad_lustre_hints.c b/ompi/mca/io/romio314/romio/adio/ad_lustre/ad_lustre_hints.c index 051cfb0c4c7..00aebd1a57d 100644 --- a/ompi/mca/io/romio314/romio/adio/ad_lustre/ad_lustre_hints.c +++ b/ompi/mca/io/romio314/romio/adio/ad_lustre/ad_lustre_hints.c @@ -20,8 +20,7 @@ void ADIOI_LUSTRE_SetInfo(ADIO_File fd, MPI_Info users_info, int *error_code) char *value; int flag; ADIO_Offset stripe_val[3], str_factor = -1, str_unit=0, start_iodev=-1; - struct lov_user_md lum = { 0 }; - int err, myrank, fd_sys, perm, amode, old_mask; + int err, myrank; static char myname[] = "ADIOI_LUSTRE_SETINFO"; value = (char *) ADIOI_Malloc((MPI_MAX_INFO_VAL+1)*sizeof(char)); @@ -47,18 +46,25 @@ void ADIOI_LUSTRE_SetInfo(ADIO_File fd, MPI_Info users_info, int *error_code) /* striping information */ ADIOI_Info_get(users_info, "striping_unit", MPI_MAX_INFO_VAL, value, &flag); - if (flag) + if (flag) { + ADIOI_Info_set(fd->info, "striping_unit", value); str_unit=atoll(value); + } ADIOI_Info_get(users_info, "striping_factor", MPI_MAX_INFO_VAL, value, &flag); - if (flag) + if (flag) { + ADIOI_Info_set(fd->info, "striping_factor", value); str_factor=atoll(value); + } ADIOI_Info_get(users_info, "romio_lustre_start_iodevice", MPI_MAX_INFO_VAL, value, &flag); - if (flag) + if (flag) { + ADIOI_Info_set(fd->info, "romio_lustre_start_iodevice", value); start_iodev=atoll(value); + } + /* direct read and write */ ADIOI_Info_get(users_info, "direct_read", MPI_MAX_INFO_VAL, @@ -84,77 +90,25 @@ void ADIOI_LUSTRE_SetInfo(ADIO_File fd, MPI_Info users_info, int *error_code) } MPI_Bcast(stripe_val, 3, MPI_OFFSET, 0, fd->comm); + /* do not open file in hint processing. Open file in open routines, + * where we can better deal with EXCL flag . Continue to check the + * "all processors set a value" condition holds. */ if (stripe_val[0] != str_factor || stripe_val[1] != str_unit || stripe_val[2] != start_iodev) { - FPRINTF(stderr, "ADIOI_LUSTRE_SetInfo: All keys" - "-striping_factor:striping_unit:start_iodevice " - "need to be identical across all processes\n"); - MPI_Abort(MPI_COMM_WORLD, 1); - } else if ((str_factor > 0) || (str_unit > 0) || (start_iodev >= 0)) { - /* if user has specified striping info, process 0 tries to set it */ - if (!myrank) { - if (fd->perm == ADIO_PERM_NULL) { - old_mask = umask(022); - umask(old_mask); - perm = old_mask ^ 0666; - } - else perm = fd->perm; - - amode = 0; - if (fd->access_mode & ADIO_CREATE) - amode = amode | O_CREAT; - if (fd->access_mode & ADIO_RDONLY) - amode = amode | O_RDONLY; - if (fd->access_mode & ADIO_WRONLY) - amode = amode | O_WRONLY; - if (fd->access_mode & ADIO_RDWR) - amode = amode | O_RDWR; - if (fd->access_mode & ADIO_EXCL) - amode = amode | O_EXCL; - - /* we need to create file so ensure this is set */ - amode = amode | O_LOV_DELAY_CREATE | O_CREAT; - - fd_sys = open(fd->filename, amode, perm); - if (fd_sys == -1) { - if (errno != EEXIST) - fprintf(stderr, - "Failure to open file %s %d %d\n",strerror(errno), amode, perm); - } else { - lum.lmm_magic = LOV_USER_MAGIC; - lum.lmm_pattern = 0; - lum.lmm_stripe_size = str_unit; - /* crude check for overflow of lustre internal datatypes. - * Silently cap to large value if user provides a value - * larger than lustre supports */ - if (lum.lmm_stripe_size != str_unit) { - lum.lmm_stripe_size = UINT_MAX; - } - lum.lmm_stripe_count = str_factor; - if ( lum.lmm_stripe_count != str_factor) { - lum.lmm_stripe_count = USHRT_MAX; - } - lum.lmm_stripe_offset = start_iodev; - if (lum.lmm_stripe_offset != start_iodev) { - lum.lmm_stripe_offset = USHRT_MAX; - } - - err = ioctl(fd_sys, LL_IOC_LOV_SETSTRIPE, &lum); - if (err == -1 && errno != EEXIST) { - fprintf(stderr, "Failure to set stripe info %s \n", strerror(errno)); - } - close(fd_sys); - } - } /* End of striping parameters validation */ + MPIO_ERR_CREATE_CODE_INFO_NOT_SAME("ADIOI_LUSTRE_SetInfo", + "str_factor or str_unit or start_iodev", + error_code); + ADIOI_Free(value); + return; } - MPI_Barrier(fd->comm); } + /* get other hint */ if (users_info != MPI_INFO_NULL) { /* CO: IO Clients/OST, * to keep the load balancing between clients and OSTs */ - ADIOI_Info_check_and_install_int(fd, users_info, "romio_lustre_co_ratio", + ADIOI_Info_check_and_install_int(fd, users_info, "romio_lustre_co_ratio", &(fd->hints->fs_hints.lustre.co_ratio), myname, error_code ); /* coll_threshold: diff --git a/ompi/mca/io/romio314/romio/adio/ad_lustre/ad_lustre_open.c b/ompi/mca/io/romio314/romio/adio/ad_lustre/ad_lustre_open.c index e06cb24d7aa..c7add090637 100644 --- a/ompi/mca/io/romio314/romio/adio/ad_lustre/ad_lustre_open.c +++ b/ompi/mca/io/romio314/romio/adio/ad_lustre/ad_lustre_open.c @@ -18,14 +18,17 @@ void ADIOI_LUSTRE_Open(ADIO_File fd, int *error_code) { int perm, old_mask, amode, amode_direct; - int lumlen; + int lumlen, myrank, flag, set_layout=0, err; struct lov_user_md *lum = NULL; char *value; + ADIO_Offset str_factor = -1, str_unit=0, start_iodev=-1; #if defined(MPICH) || !defined(PRINT_ERR_MSG) static char myname[] = "ADIOI_LUSTRE_OPEN"; #endif + MPI_Comm_rank(fd->comm, &myrank); + if (fd->perm == ADIO_PERM_NULL) { old_mask = umask(022); umask(old_mask); @@ -47,46 +50,102 @@ void ADIOI_LUSTRE_Open(ADIO_File fd, int *error_code) amode_direct = amode | O_DIRECT; - fd->fd_sys = open(fd->filename, amode|O_CREAT, perm); - - if (fd->fd_sys != -1) { - int err; - - /* get file striping information and set it in info */ - /* odd malloc here because lov_user_md contains some fixed data and - * then a list of 'lmm_objects' representing stripe */ - lumlen = sizeof(struct lov_user_md) + - MAX_LOV_UUID_COUNT * sizeof(struct lov_user_ost_data); - /* furthermore, Pascal Deveze reports that, even though we pass a - * "GETSTRIPE" (read) flag to the ioctl, if some of the values of this - * struct are uninitialzed, the call can give an error. calloc in case - * there are other members that must be initialized and in case - * lov_user_md struct changes in future */ - lum = (struct lov_user_md *)ADIOI_Calloc(1,lumlen); - lum->lmm_magic = LOV_USER_MAGIC; - err = ioctl(fd->fd_sys, LL_IOC_LOV_GETSTRIPE, (void *)lum); - if (!err) { - value = (char *) ADIOI_Malloc((MPI_MAX_INFO_VAL+1)*sizeof(char)); - - fd->hints->striping_unit = lum->lmm_stripe_size; - sprintf(value, "%d", lum->lmm_stripe_size); - ADIOI_Info_set(fd->info, "striping_unit", value); - - fd->hints->striping_factor = lum->lmm_stripe_count; - sprintf(value, "%d", lum->lmm_stripe_count); - ADIOI_Info_set(fd->info, "striping_factor", value); - - fd->hints->fs_hints.lustre.start_iodevice = lum->lmm_stripe_offset; - sprintf(value, "%d", lum->lmm_stripe_offset); - ADIOI_Info_set(fd->info, "romio_lustre_start_iodevice", value); - - ADIOI_Free(value); - } - ADIOI_Free(lum); - - if (fd->access_mode & ADIO_APPEND) - fd->fp_ind = fd->fp_sys_posn = lseek(fd->fd_sys, 0, SEEK_END); - } + /* odd length here because lov_user_md contains some fixed data and + * then a list of 'lmm_objects' representing stripe */ + lumlen = sizeof(struct lov_user_md) + + MAX_LOV_UUID_COUNT * sizeof(struct lov_user_ost_data); + lum = (struct lov_user_md *)ADIOI_Calloc(1,lumlen); + + value = (char *) ADIOI_Malloc((MPI_MAX_INFO_VAL+1)*sizeof(char)); + /* we already validated in LUSTRE_SetInfo that these are going to be the same */ + if (fd->info != MPI_INFO_NULL) { + /* striping information */ + ADIOI_Info_get(fd->info, "striping_unit", MPI_MAX_INFO_VAL, + value, &flag); + if (flag) + str_unit=atoll(value); + + ADIOI_Info_get(fd->info, "striping_factor", MPI_MAX_INFO_VAL, + value, &flag); + if (flag) + str_factor=atoll(value); + + ADIOI_Info_get(fd->info, "romio_lustre_start_iodevice", + MPI_MAX_INFO_VAL, value, &flag); + if (flag) + start_iodev=atoll(value); + } + if ((str_factor > 0) || (str_unit > 0) || (start_iodev >= 0)) + set_layout = 1; + + /* if hints were set, we need to delay creation of any lustre objects. + * However, if we open the file with O_LOV_DELAY_CREATE and don't call the + * follow-up ioctl, subsequent writes will fail */ + if (myrank == 0 && set_layout) + amode = amode | O_LOV_DELAY_CREATE; + + fd->fd_sys = open(fd->filename, amode, perm); + if (fd->fd_sys == -1) goto fn_exit; + + /* we can only set these hints on new files */ + /* It was strange and buggy to open the file in the hint path. Instead, + * we'll apply the file tunings at open time */ + if ((amode & O_CREAT) && set_layout ) { + /* if user has specified striping info, process 0 tries to set it */ + if (!myrank) { + lum->lmm_magic = LOV_USER_MAGIC; + lum->lmm_pattern = 0; + /* crude check for overflow of lustre internal datatypes. + * Silently cap to large value if user provides a value + * larger than lustre supports */ + if (str_unit > UINT_MAX) + lum->lmm_stripe_size = UINT_MAX; + else + lum->lmm_stripe_size = str_unit; + + if (str_factor > USHRT_MAX) + lum->lmm_stripe_count = USHRT_MAX; + else + lum->lmm_stripe_count = str_factor; + + if (start_iodev > USHRT_MAX) + lum->lmm_stripe_offset = USHRT_MAX; + else + lum->lmm_stripe_offset = start_iodev; + err = ioctl(fd->fd_sys, LL_IOC_LOV_SETSTRIPE, lum); + if (err == -1 && errno != EEXIST) { + fprintf(stderr, "Failure to set stripe info %s \n", strerror(errno)); + /* not a fatal error, but user might care to know */ + } + } /* End of striping parameters validation */ + } + + /* Pascal Deveze reports that, even though we pass a + * "GETSTRIPE" (read) flag to the ioctl, if some of the values of this + * struct are uninitialzed, the call can give an error. zero it out in case + * there are other members that must be initialized and in case + * lov_user_md struct changes in future */ + memset(lum, 0, lumlen); + lum->lmm_magic = LOV_USER_MAGIC; + err = ioctl(fd->fd_sys, LL_IOC_LOV_GETSTRIPE, (void *)lum); + if (!err) { + + fd->hints->striping_unit = lum->lmm_stripe_size; + sprintf(value, "%d", lum->lmm_stripe_size); + ADIOI_Info_set(fd->info, "striping_unit", value); + + fd->hints->striping_factor = lum->lmm_stripe_count; + sprintf(value, "%d", lum->lmm_stripe_count); + ADIOI_Info_set(fd->info, "striping_factor", value); + + fd->hints->fs_hints.lustre.start_iodevice = lum->lmm_stripe_offset; + sprintf(value, "%d", lum->lmm_stripe_offset); + ADIOI_Info_set(fd->info, "romio_lustre_start_iodevice", value); + + } + + if (fd->access_mode & ADIO_APPEND) + fd->fp_ind = fd->fp_sys_posn = lseek(fd->fd_sys, 0, SEEK_END); if ((fd->fd_sys != -1) && (fd->access_mode & ADIO_APPEND)) fd->fp_ind = fd->fp_sys_posn = lseek(fd->fd_sys, 0, SEEK_END); @@ -101,9 +160,12 @@ void ADIOI_LUSTRE_Open(ADIO_File fd, int *error_code) fd->direct_write = fd->direct_read = 0; } } +fn_exit: + ADIOI_Free(lum); + ADIOI_Free(value); /* --BEGIN ERROR HANDLING-- */ - if (fd->fd_sys == -1 || ((fd->fd_direct == -1) && + if (fd->fd_sys == -1 || ((fd->fd_direct == -1) && (fd->direct_write || fd->direct_read))) { *error_code = ADIOI_Err_create_code(myname, fd->filename, errno); } diff --git a/ompi/mca/io/romio314/romio/adio/ad_lustre/ad_lustre_rwcontig.c b/ompi/mca/io/romio314/romio/adio/ad_lustre/ad_lustre_rwcontig.c index cb187a39e9c..c7372676354 100644 --- a/ompi/mca/io/romio314/romio/adio/ad_lustre/ad_lustre_rwcontig.c +++ b/ompi/mca/io/romio314/romio/adio/ad_lustre/ad_lustre_rwcontig.c @@ -94,7 +94,7 @@ static int ADIOI_LUSTRE_Directio(ADIO_File fd, const void *buf, int len, else nbytes += pwrite(fd->fd_sys, buf, size, offset); } err = nbytes; - } else { + } else { if (!(((long) buf) % fd->d_mem)) { ADIOI_LUSTRE_Aligned_Mem_File_Read(fd, buf, size, offset, &err); nbytes += err; @@ -115,11 +115,11 @@ static int ADIOI_LUSTRE_Directio(ADIO_File fd, const void *buf, int len, static void ADIOI_LUSTRE_IOContig(ADIO_File fd, const void *buf, int count, MPI_Datatype datatype, int file_ptr_type, - ADIO_Offset offset, ADIO_Status *status, + ADIO_Offset offset, ADIO_Status *status, int io_mode, int *error_code); static void ADIOI_LUSTRE_IOContig(ADIO_File fd, const void *buf, int count, MPI_Datatype datatype, int file_ptr_type, - ADIO_Offset offset, ADIO_Status *status, + ADIO_Offset offset, ADIO_Status *status, int io_mode, int *error_code) { int err=-1; @@ -138,7 +138,7 @@ static void ADIOI_LUSTRE_IOContig(ADIO_File fd, const void *buf, int count, err = lseek(fd->fd_sys, offset, SEEK_SET); if (err == -1) goto ioerr; } - + if (io_mode) { #ifdef ADIOI_MPE_LOGGING MPE_Log_event(ADIOI_MPE_write_a, 0, NULL); @@ -164,7 +164,7 @@ static void ADIOI_LUSTRE_IOContig(ADIO_File fd, const void *buf, int count, fd->fp_sys_posn = offset + err; if (file_ptr_type == ADIO_INDIVIDUAL) { - fd->fp_ind += err; + fd->fp_ind += err; } #ifdef HAVE_STATUS_SET_BYTES @@ -194,7 +194,7 @@ void ADIOI_LUSTRE_WriteContig(ADIO_File fd, const void *buf, int count, offset, status, 1, error_code); } -void ADIOI_LUSTRE_ReadContig(ADIO_File fd, void *buf, int count, +void ADIOI_LUSTRE_ReadContig(ADIO_File fd, void *buf, int count, MPI_Datatype datatype, int file_ptr_type, ADIO_Offset offset, ADIO_Status *status, int *error_code) { diff --git a/ompi/mca/io/romio314/romio/adio/ad_lustre/ad_lustre_wrcoll.c b/ompi/mca/io/romio314/romio/adio/ad_lustre/ad_lustre_wrcoll.c index 3299ad5af3d..51476dc27f4 100644 --- a/ompi/mca/io/romio314/romio/adio/ad_lustre/ad_lustre_wrcoll.c +++ b/ompi/mca/io/romio314/romio/adio/ad_lustre/ad_lustre_wrcoll.c @@ -42,7 +42,7 @@ static void ADIOI_LUSTRE_W_Exchange_data(ADIO_File fd, const void *buf, ADIO_Offset *len_list, int *send_size, int *recv_size, ADIO_Offset off, int size, int *count, - int *start_pos, + int *start_pos, int *sent_to_proc, int nprocs, int myrank, int buftype_is_contig, int contig_access_count, @@ -288,7 +288,7 @@ static void ADIOI_LUSTRE_Exch_and_write(ADIO_File fd, const void *buf, int myrank, ADIOI_Access *others_req, ADIOI_Access *my_req, ADIO_Offset *offset_list, - ADIO_Offset *len_list, + ADIO_Offset *len_list, int contig_access_count, int *striping_info, int **buf_idx, int *error_code) @@ -620,7 +620,7 @@ static void ADIOI_LUSTRE_W_Exchange_data(ADIO_File fd, const void *buf, ADIO_Offset *len_list, int *send_size, int *recv_size, ADIO_Offset off, int size, int *count, - int *start_pos, + int *start_pos, int *sent_to_proc, int nprocs, int myrank, int buftype_is_contig, int contig_access_count, diff --git a/ompi/mca/io/romio314/romio/adio/ad_nfs/ad_nfs.c b/ompi/mca/io/romio314/romio/adio/ad_nfs/ad_nfs.c index 725c4d1ead6..e69d8390460 100644 --- a/ompi/mca/io/romio314/romio/adio/ad_nfs/ad_nfs.c +++ b/ompi/mca/io/romio314/romio/adio/ad_nfs/ad_nfs.c @@ -1,7 +1,7 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */ -/* +/* * - * Copyright (C) 2001 University of Chicago. + * Copyright (C) 2001 University of Chicago. * See COPYRIGHT notice in top-level directory. */ diff --git a/ompi/mca/io/romio314/romio/adio/ad_nfs/ad_nfs.h b/ompi/mca/io/romio314/romio/adio/ad_nfs/ad_nfs.h index 83d394af619..a06f9da26db 100644 --- a/ompi/mca/io/romio314/romio/adio/ad_nfs/ad_nfs.h +++ b/ompi/mca/io/romio314/romio/adio/ad_nfs/ad_nfs.h @@ -1,6 +1,6 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */ -/* - * Copyright (C) 1997 University of Chicago. +/* + * Copyright (C) 1997 University of Chicago. * See COPYRIGHT notice in top-level directory. */ @@ -25,7 +25,7 @@ #include #endif -/* Workaround for incomplete set of definitions if __REDIRECT is not +/* Workaround for incomplete set of definitions if __REDIRECT is not defined and large file support is used in aio.h */ #if !defined(__REDIRECT) && defined(__USE_FILE_OFFSET64) #define aiocb aiocb64 @@ -39,32 +39,32 @@ int ADIOI_NFS_aio(ADIO_File fd, void *buf, int len, ADIO_Offset offset, #endif void ADIOI_NFS_Open(ADIO_File fd, int *error_code); -void ADIOI_NFS_ReadContig(ADIO_File fd, void *buf, int count, +void ADIOI_NFS_ReadContig(ADIO_File fd, void *buf, int count, MPI_Datatype datatype, int file_ptr_type, ADIO_Offset offset, ADIO_Status *status, int *error_code); void ADIOI_NFS_WriteContig(ADIO_File fd, const void *buf, int count, MPI_Datatype datatype, int file_ptr_type, ADIO_Offset offset, ADIO_Status *status, int - *error_code); -void ADIOI_NFS_IwriteContig(ADIO_File fd, void *buf, int count, + *error_code); +void ADIOI_NFS_IwriteContig(ADIO_File fd, void *buf, int count, MPI_Datatype datatype, int file_ptr_type, ADIO_Offset offset, ADIO_Request *request, int - *error_code); -void ADIOI_NFS_IreadContig(ADIO_File fd, void *buf, int count, + *error_code); +void ADIOI_NFS_IreadContig(ADIO_File fd, void *buf, int count, MPI_Datatype datatype, int file_ptr_type, ADIO_Offset offset, ADIO_Request *request, int - *error_code); + *error_code); int ADIOI_NFS_ReadDone(ADIO_Request *request, ADIO_Status *status, int *error_code); int ADIOI_NFS_WriteDone(ADIO_Request *request, ADIO_Status *status, int *error_code); void ADIOI_NFS_ReadComplete(ADIO_Request *request, ADIO_Status *status, int - *error_code); + *error_code); void ADIOI_NFS_WriteComplete(ADIO_Request *request, ADIO_Status *status, - int *error_code); + int *error_code); void ADIOI_NFS_Fcntl(ADIO_File fd, int flag, ADIO_Fcntl_t *fcntl_struct, int - *error_code); + *error_code); void ADIOI_NFS_WriteStrided(ADIO_File fd, const void *buf, int count, MPI_Datatype datatype, int file_ptr_type, ADIO_Offset offset, ADIO_Status *status, int diff --git a/ompi/mca/io/romio314/romio/adio/ad_nfs/ad_nfs_done.c b/ompi/mca/io/romio314/romio/adio/ad_nfs/ad_nfs_done.c index ff688546cab..8ebe5835706 100644 --- a/ompi/mca/io/romio314/romio/adio/ad_nfs/ad_nfs_done.c +++ b/ompi/mca/io/romio314/romio/adio/ad_nfs/ad_nfs_done.c @@ -1,6 +1,6 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */ -/* - * Copyright (C) 1997 University of Chicago. +/* + * Copyright (C) 1997 University of Chicago. * See COPYRIGHT notice in top-level directory. */ @@ -16,4 +16,4 @@ int ADIOI_NFS_WriteDone(ADIO_Request *request, ADIO_Status *status, int *error_code) { return ADIOI_NFS_ReadDone(request, status, error_code); -} +} diff --git a/ompi/mca/io/romio314/romio/adio/ad_nfs/ad_nfs_fcntl.c b/ompi/mca/io/romio314/romio/adio/ad_nfs/ad_nfs_fcntl.c index c73006e6cd9..ea06e114c73 100644 --- a/ompi/mca/io/romio314/romio/adio/ad_nfs/ad_nfs_fcntl.c +++ b/ompi/mca/io/romio314/romio/adio/ad_nfs/ad_nfs_fcntl.c @@ -1,7 +1,7 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */ -/* +/* * - * Copyright (C) 1997 University of Chicago. + * Copyright (C) 1997 University of Chicago. * See COPYRIGHT notice in top-level directory. */ @@ -57,7 +57,7 @@ void ADIOI_NFS_Fcntl(ADIO_File fd, int flag, ADIO_Fcntl_t *fcntl_struct, int *er /* --BEGIN ERROR HANDLING-- */ *error_code = MPIO_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE, myname, __LINE__, - MPI_ERR_ARG, + MPI_ERR_ARG, "**flag", "**flag %d", flag); return; /* --END ERROR HANDLING-- */ diff --git a/ompi/mca/io/romio314/romio/adio/ad_nfs/ad_nfs_features.c b/ompi/mca/io/romio314/romio/adio/ad_nfs/ad_nfs_features.c index 05b061acf47..a4153e95171 100644 --- a/ompi/mca/io/romio314/romio/adio/ad_nfs/ad_nfs_features.c +++ b/ompi/mca/io/romio314/romio/adio/ad_nfs/ad_nfs_features.c @@ -8,7 +8,7 @@ #include "ad_nfs.h" int ADIOI_NFS_Feature(ADIO_File fd, int flag) -{ +{ switch(flag) { case ADIO_SHARED_FP: case ADIO_LOCKS: diff --git a/ompi/mca/io/romio314/romio/adio/ad_nfs/ad_nfs_getsh.c b/ompi/mca/io/romio314/romio/adio/ad_nfs/ad_nfs_getsh.c index 974d547cc13..a1f62906381 100644 --- a/ompi/mca/io/romio314/romio/adio/ad_nfs/ad_nfs_getsh.c +++ b/ompi/mca/io/romio314/romio/adio/ad_nfs/ad_nfs_getsh.c @@ -1,7 +1,7 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */ -/* +/* * - * Copyright (C) 1997 University of Chicago. + * Copyright (C) 1997 University of Chicago. * See COPYRIGHT notice in top-level directory. */ @@ -23,11 +23,11 @@ void ADIOI_NFS_Get_shared_fp(ADIO_File fd, ADIO_Offset incr, ADIO_Offset *shared if (fd->shared_fp_fd == ADIO_FILE_NULL) { MPI_Comm_dup(MPI_COMM_SELF, &dupcommself); fd->shared_fp_fd = ADIO_Open(MPI_COMM_SELF, dupcommself, - fd->shared_fp_fname, + fd->shared_fp_fname, fd->file_system, fd->fns, - ADIO_CREATE | ADIO_RDWR | ADIO_DELETE_ON_CLOSE, - 0, MPI_BYTE, MPI_BYTE, MPI_INFO_NULL, + ADIO_CREATE | ADIO_RDWR | ADIO_DELETE_ON_CLOSE, + 0, MPI_BYTE, MPI_BYTE, MPI_INFO_NULL, ADIO_PERM_NULL, error_code); if (*error_code != MPI_SUCCESS) return; *shared_fp = 0; @@ -40,7 +40,7 @@ void ADIOI_NFS_Get_shared_fp(ADIO_File fd, ADIO_Offset incr, ADIO_Offset *shared MPE_Log_event( ADIOI_MPE_read_b, 0, NULL ); #endif /* if the file is empty, the above read may return error - (reading beyond end of file). In that case, shared_fp = 0, + (reading beyond end of file). In that case, shared_fp = 0, set above, is the correct value. */ } else { diff --git a/ompi/mca/io/romio314/romio/adio/ad_nfs/ad_nfs_hints.c b/ompi/mca/io/romio314/romio/adio/ad_nfs/ad_nfs_hints.c index 0e5386d22bb..46480d51d1c 100644 --- a/ompi/mca/io/romio314/romio/adio/ad_nfs/ad_nfs_hints.c +++ b/ompi/mca/io/romio314/romio/adio/ad_nfs/ad_nfs_hints.c @@ -1,7 +1,7 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */ -/* +/* * - * Copyright (C) 1997 University of Chicago. + * Copyright (C) 1997 University of Chicago. * See COPYRIGHT notice in top-level directory. */ @@ -9,5 +9,5 @@ void ADIOI_NFS_SetInfo(ADIO_File fd, MPI_Info users_info, int *error_code) { - ADIOI_GEN_SetInfo(fd, users_info, error_code); + ADIOI_GEN_SetInfo(fd, users_info, error_code); } diff --git a/ompi/mca/io/romio314/romio/adio/ad_nfs/ad_nfs_iread.c b/ompi/mca/io/romio314/romio/adio/ad_nfs/ad_nfs_iread.c index 28d20281059..2b4e19907c8 100644 --- a/ompi/mca/io/romio314/romio/adio/ad_nfs/ad_nfs_iread.c +++ b/ompi/mca/io/romio314/romio/adio/ad_nfs/ad_nfs_iread.c @@ -1,6 +1,6 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */ -/* - * Copyright (C) 1997 University of Chicago. +/* + * Copyright (C) 1997 University of Chicago. * See COPYRIGHT notice in top-level directory. */ @@ -8,10 +8,10 @@ #ifdef ROMIO_HAVE_WORKING_AIO /* nearly identical to ADIOI_GEN_IreadContig, except we lock around I/O */ -void ADIOI_NFS_IreadContig(ADIO_File fd, void *buf, int count, +void ADIOI_NFS_IreadContig(ADIO_File fd, void *buf, int count, MPI_Datatype datatype, int file_ptr_type, ADIO_Offset offset, ADIO_Request *request, - int *error_code) + int *error_code) { MPI_Count len, typesize; int aio_errno = 0; diff --git a/ompi/mca/io/romio314/romio/adio/ad_nfs/ad_nfs_iwrite.c b/ompi/mca/io/romio314/romio/adio/ad_nfs/ad_nfs_iwrite.c index f27b099fa54..c0fff5c4cb9 100644 --- a/ompi/mca/io/romio314/romio/adio/ad_nfs/ad_nfs_iwrite.c +++ b/ompi/mca/io/romio314/romio/adio/ad_nfs/ad_nfs_iwrite.c @@ -1,6 +1,6 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */ -/* - * Copyright (C) 1997 University of Chicago. +/* + * Copyright (C) 1997 University of Chicago. * See COPYRIGHT notice in top-level directory. */ @@ -16,9 +16,9 @@ static MPIX_Grequest_class ADIOI_GEN_greq_class = 0; /* this routine is nearly identical to ADIOI_GEN_IwriteContig, except we lock * around I/O */ -void ADIOI_NFS_IwriteContig(ADIO_File fd, void *buf, int count, +void ADIOI_NFS_IwriteContig(ADIO_File fd, void *buf, int count, MPI_Datatype datatype, int file_ptr_type, - ADIO_Offset offset, ADIO_Request *request, int *error_code) + ADIO_Offset offset, ADIO_Request *request, int *error_code) { MPI_Count len, typesize; int aio_errno = 0; @@ -118,9 +118,9 @@ int ADIOI_NFS_aio(ADIO_File fd, void *buf, int len, ADIO_Offset offset, } aio_req->aiocbp = aiocbp; if (ADIOI_GEN_greq_class == 0) { - MPIX_Grequest_class_create(ADIOI_GEN_aio_query_fn, - ADIOI_GEN_aio_free_fn, MPIU_Greq_cancel_fn, - ADIOI_GEN_aio_poll_fn, ADIOI_GEN_aio_wait_fn, + MPIX_Grequest_class_create(ADIOI_GEN_aio_query_fn, + ADIOI_GEN_aio_free_fn, MPIU_Greq_cancel_fn, + ADIOI_GEN_aio_poll_fn, ADIOI_GEN_aio_wait_fn, &ADIOI_GEN_greq_class); } MPIX_Grequest_class_allocate(ADIOI_GEN_greq_class, aio_req, request); diff --git a/ompi/mca/io/romio314/romio/adio/ad_nfs/ad_nfs_open.c b/ompi/mca/io/romio314/romio/adio/ad_nfs/ad_nfs_open.c index d8763292b91..d2ae6b22f1c 100644 --- a/ompi/mca/io/romio314/romio/adio/ad_nfs/ad_nfs_open.c +++ b/ompi/mca/io/romio314/romio/adio/ad_nfs/ad_nfs_open.c @@ -1,7 +1,7 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */ -/* +/* * - * Copyright (C) 1997 University of Chicago. + * Copyright (C) 1997 University of Chicago. * See COPYRIGHT notice in top-level directory. */ @@ -19,7 +19,7 @@ void ADIOI_NFS_Open(ADIO_File fd, int *error_code) perm = old_mask ^ 0666; } else perm = fd->perm; - + amode = 0; if (fd->access_mode & ADIO_CREATE) amode = amode | O_CREAT; diff --git a/ompi/mca/io/romio314/romio/adio/ad_nfs/ad_nfs_read.c b/ompi/mca/io/romio314/romio/adio/ad_nfs/ad_nfs_read.c index 18dfde712c0..0a74dafe989 100644 --- a/ompi/mca/io/romio314/romio/adio/ad_nfs/ad_nfs_read.c +++ b/ompi/mca/io/romio314/romio/adio/ad_nfs/ad_nfs_read.c @@ -1,14 +1,14 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */ -/* +/* * - * Copyright (C) 1997 University of Chicago. + * Copyright (C) 1997 University of Chicago. * See COPYRIGHT notice in top-level directory. */ #include "ad_nfs.h" #include "adio_extern.h" -void ADIOI_NFS_ReadContig(ADIO_File fd, void *buf, int count, +void ADIOI_NFS_ReadContig(ADIO_File fd, void *buf, int count, MPI_Datatype datatype, int file_ptr_type, ADIO_Offset offset, ADIO_Status *status, int *error_code) { @@ -41,7 +41,7 @@ void ADIOI_NFS_ReadContig(ADIO_File fd, void *buf, int count, #endif ADIOI_UNLOCK(fd, offset, SEEK_SET, len); fd->fp_sys_posn = offset + err; - /* individual file pointer not updated */ + /* individual file pointer not updated */ } else { /* read from curr. location of ind. file pointer */ offset = fd->fp_ind; @@ -174,7 +174,7 @@ void ADIOI_NFS_ReadStrided(ADIO_File fd, void *buf, int count, ADIO_Offset abs_off_in_filetype=0; int req_len, partial_read; MPI_Count filetype_size, etype_size, buftype_size; - MPI_Aint filetype_extent, buftype_extent; + MPI_Aint filetype_extent, buftype_extent; int buf_count, buftype_is_contig, filetype_is_contig; ADIO_Offset userbuf_off; ADIO_Offset off, req_off, disp, end_offset=0, readbuf_off, start_off; @@ -192,7 +192,7 @@ void ADIOI_NFS_ReadStrided(ADIO_File fd, void *buf, int count, #ifdef HAVE_STATUS_SET_BYTES MPIR_Status_set_bytes(status, datatype, 0); #endif - *error_code = MPI_SUCCESS; + *error_code = MPI_SUCCESS; return; } @@ -206,7 +206,7 @@ void ADIOI_NFS_ReadStrided(ADIO_File fd, void *buf, int count, /* get max_bufsize from the info object. */ value = (char *) ADIOI_Malloc((MPI_MAX_INFO_VAL+1)*sizeof(char)); - ADIOI_Info_get(fd->info, "ind_rd_buffer_size", MPI_MAX_INFO_VAL, value, + ADIOI_Info_get(fd->info, "ind_rd_buffer_size", MPI_MAX_INFO_VAL, value, &info_flag); max_bufsize = atoi(value); ADIOI_Free(value); @@ -219,7 +219,7 @@ void ADIOI_NFS_ReadStrided(ADIO_File fd, void *buf, int count, flat_buf = ADIOI_Flatlist; while (flat_buf->type != datatype) flat_buf = flat_buf->next; - off = (file_ptr_type == ADIO_INDIVIDUAL) ? fd->fp_ind : + off = (file_ptr_type == ADIO_INDIVIDUAL) ? fd->fp_ind : fd->disp + etype_size * offset; start_off = off; @@ -250,7 +250,7 @@ void ADIOI_NFS_ReadStrided(ADIO_File fd, void *buf, int count, if (!(fd->atomicity)) ADIOI_UNLOCK(fd, readbuf_off, SEEK_SET, readbuf_len); if (err == -1) err_flag = 1; - for (j=0; jcount; i++) { userbuf_off = j*buftype_extent + flat_buf->indices[i]; req_off = off; @@ -288,7 +288,7 @@ void ADIOI_NFS_ReadStrided(ADIO_File fd, void *buf, int count, n_filetypes = (offset - flat_file->indices[0]) / filetype_extent; offset -= (ADIO_Offset)n_filetypes * filetype_extent; /* now offset is local to this extent */ - + /* find the block where offset is located, skip blocklens[i]==0 */ for (i=0; icount; i++) { ADIO_Offset dist; @@ -301,7 +301,7 @@ void ADIOI_NFS_ReadStrided(ADIO_File fd, void *buf, int count, frd_size = flat_file->blocklens[i]; break; } - if (dist > 0 ) { + if (dist > 0 ) { frd_size = dist; break; } @@ -314,7 +314,7 @@ void ADIOI_NFS_ReadStrided(ADIO_File fd, void *buf, int count, n_filetypes = (int) (offset / n_etypes_in_filetype); etype_in_filetype = (int) (offset % n_etypes_in_filetype); size_in_filetype = etype_in_filetype * etype_size; - + sum = 0; for (i=0; icount; i++) { sum += flat_file->blocklens[i]; @@ -328,7 +328,7 @@ void ADIOI_NFS_ReadStrided(ADIO_File fd, void *buf, int count, } /* abs. offset in bytes in the file */ - offset = disp + (ADIO_Offset) n_filetypes*filetype_extent + + offset = disp + (ADIO_Offset) n_filetypes*filetype_extent + abs_off_in_filetype; } @@ -342,7 +342,7 @@ void ADIOI_NFS_ReadStrided(ADIO_File fd, void *buf, int count, offset, status, error_code); if (file_ptr_type == ADIO_INDIVIDUAL) { - /* update MPI-IO file pointer to point to the first byte that + /* update MPI-IO file pointer to point to the first byte that * can be accessed in the fileview. */ fd->fp_ind = offset + bufsize; if (bufsize == frd_size) { @@ -357,10 +357,10 @@ void ADIOI_NFS_ReadStrided(ADIO_File fd, void *buf, int count, + n_filetypes*filetype_extent; } } - fd->fp_sys_posn = -1; /* set it to null. */ + fd->fp_sys_posn = -1; /* set it to null. */ #ifdef HAVE_STATUS_SET_BYTES MPIR_Status_set_bytes(status, datatype, bufsize); -#endif +#endif return; } @@ -426,9 +426,9 @@ void ADIOI_NFS_ReadStrided(ADIO_File fd, void *buf, int count, n_filetypes = st_n_filetypes; frd_size = ADIOI_MIN(st_frd_size, bufsize); while (i < bufsize) { - if (frd_size) { - /* TYPE_UB and TYPE_LB can result in - frd_size = 0. save system call in such cases */ + if (frd_size) { + /* TYPE_UB and TYPE_LB can result in + frd_size = 0. save system call in such cases */ /* lseek(fd->fd_sys, off, SEEK_SET); err = read(fd->fd_sys, ((char *) buf) + i, frd_size);*/ @@ -451,7 +451,7 @@ void ADIOI_NFS_ReadStrided(ADIO_File fd, void *buf, int count, j = (j+1) % flat_file->count; n_filetypes += (j == 0) ? 1 : 0; } - off = disp + flat_file->indices[j] + + off = disp + flat_file->indices[j] + (ADIO_Offset) n_filetypes*filetype_extent; frd_size = ADIOI_MIN(flat_file->blocklens[j], bufsize-i); } @@ -495,7 +495,7 @@ void ADIOI_NFS_ReadStrided(ADIO_File fd, void *buf, int count, j = (j+1) % flat_file->count; n_filetypes += (j == 0) ? 1 : 0; } - off = disp + flat_file->indices[j] + + off = disp + flat_file->indices[j] + (ADIO_Offset) n_filetypes*filetype_extent; new_frd_size = flat_file->blocklens[j]; @@ -511,7 +511,7 @@ void ADIOI_NFS_ReadStrided(ADIO_File fd, void *buf, int count, k = (k + 1)%flat_buf->count; buf_count++; i = (int) (buftype_extent*(buf_count/flat_buf->count) + - flat_buf->indices[k]); + flat_buf->indices[k]); new_brd_size = flat_buf->blocklens[k]; if (size != frd_size) { off += size; @@ -523,7 +523,7 @@ void ADIOI_NFS_ReadStrided(ADIO_File fd, void *buf, int count, brd_size = new_brd_size; } } - + if (fd->atomicity) ADIOI_UNLOCK(fd, start_off, SEEK_SET, end_offset-start_off+1); @@ -544,8 +544,8 @@ void ADIOI_NFS_ReadStrided(ADIO_File fd, void *buf, int count, #ifdef HAVE_STATUS_SET_BYTES MPIR_Status_set_bytes(status, datatype, bufsize); -/* This is a temporary way of filling in status. The right way is to - keep track of how much data was actually read and placed in buf +/* This is a temporary way of filling in status. The right way is to + keep track of how much data was actually read and placed in buf by ADIOI_BUFFERED_READ. */ #endif diff --git a/ompi/mca/io/romio314/romio/adio/ad_nfs/ad_nfs_resize.c b/ompi/mca/io/romio314/romio/adio/ad_nfs/ad_nfs_resize.c index d86dfcc77ed..957b5bd809a 100644 --- a/ompi/mca/io/romio314/romio/adio/ad_nfs/ad_nfs_resize.c +++ b/ompi/mca/io/romio314/romio/adio/ad_nfs/ad_nfs_resize.c @@ -1,7 +1,7 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */ -/* +/* * - * Copyright (C) 2004 University of Chicago. + * Copyright (C) 2004 University of Chicago. * See COPYRIGHT notice in top-level directory. */ diff --git a/ompi/mca/io/romio314/romio/adio/ad_nfs/ad_nfs_setsh.c b/ompi/mca/io/romio314/romio/adio/ad_nfs/ad_nfs_setsh.c index 42e558cb476..f0f79379d04 100644 --- a/ompi/mca/io/romio314/romio/adio/ad_nfs/ad_nfs_setsh.c +++ b/ompi/mca/io/romio314/romio/adio/ad_nfs/ad_nfs_setsh.c @@ -1,17 +1,17 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */ -/* +/* * - * Copyright (C) 1997 University of Chicago. + * Copyright (C) 1997 University of Chicago. * See COPYRIGHT notice in top-level directory. */ #include "ad_nfs.h" -/* set the shared file pointer to "offset" etypes relative to the current +/* set the shared file pointer to "offset" etypes relative to the current view */ /* -This looks very similar to ADIOI_GEN_Set_shared_fp, except this +This looks very similar to ADIOI_GEN_Set_shared_fp, except this function avoids locking the file twice. The generic version does Write lock @@ -24,7 +24,7 @@ caching. To avoid the lock being called twice, this version for NFS does Write lock Lseek Write -Unlock +Unlock */ @@ -37,10 +37,10 @@ void ADIOI_NFS_Set_shared_fp(ADIO_File fd, ADIO_Offset offset, int *error_code) if (fd->shared_fp_fd == ADIO_FILE_NULL) { MPI_Comm_dup(MPI_COMM_SELF, &dupcommself); fd->shared_fp_fd = ADIO_Open(MPI_COMM_SELF, dupcommself, - fd->shared_fp_fname, + fd->shared_fp_fname, fd->file_system, fd->fns, - ADIO_CREATE | ADIO_RDWR | ADIO_DELETE_ON_CLOSE, - 0, MPI_BYTE, MPI_BYTE, MPI_INFO_NULL, + ADIO_CREATE | ADIO_RDWR | ADIO_DELETE_ON_CLOSE, + 0, MPI_BYTE, MPI_BYTE, MPI_INFO_NULL, ADIO_PERM_NULL, error_code); } diff --git a/ompi/mca/io/romio314/romio/adio/ad_nfs/ad_nfs_wait.c b/ompi/mca/io/romio314/romio/adio/ad_nfs/ad_nfs_wait.c index e1037fc5513..218ac5d14b1 100644 --- a/ompi/mca/io/romio314/romio/adio/ad_nfs/ad_nfs_wait.c +++ b/ompi/mca/io/romio314/romio/adio/ad_nfs/ad_nfs_wait.c @@ -1,6 +1,6 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */ -/* - * Copyright (C) 1997 University of Chicago. +/* + * Copyright (C) 1997 University of Chicago. * See COPYRIGHT notice in top-level directory. */ diff --git a/ompi/mca/io/romio314/romio/adio/ad_nfs/ad_nfs_write.c b/ompi/mca/io/romio314/romio/adio/ad_nfs/ad_nfs_write.c index 660e8682d97..b41488036e5 100644 --- a/ompi/mca/io/romio314/romio/adio/ad_nfs/ad_nfs_write.c +++ b/ompi/mca/io/romio314/romio/adio/ad_nfs/ad_nfs_write.c @@ -1,7 +1,7 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */ -/* +/* * - * Copyright (C) 1997 University of Chicago. + * Copyright (C) 1997 University of Chicago. * See COPYRIGHT notice in top-level directory. */ @@ -39,7 +39,7 @@ void ADIOI_NFS_WriteContig(ADIO_File fd, const void *buf, int count, #endif ADIOI_UNLOCK(fd, offset, SEEK_SET, len); fd->fp_sys_posn = offset + err; - /* individual file pointer not updated */ + /* individual file pointer not updated */ } else { /* write from curr. location of ind. file pointer */ offset = fd->fp_ind; @@ -278,7 +278,7 @@ void ADIOI_NFS_WriteStrided(ADIO_File fd, const void *buf, int count, ADIO_Offset abs_off_in_filetype=0; int req_len; MPI_Count filetype_size, etype_size, buftype_size; - MPI_Aint filetype_extent, buftype_extent; + MPI_Aint filetype_extent, buftype_extent; int buf_count, buftype_is_contig, filetype_is_contig; ADIO_Offset userbuf_off; ADIO_Offset off, req_off, disp, end_offset=0, writebuf_off, start_off; @@ -295,7 +295,7 @@ void ADIOI_NFS_WriteStrided(ADIO_File fd, const void *buf, int count, #ifdef HAVE_STATUS_SET_BYTES MPIR_Status_set_bytes(status, datatype, 0); #endif - *error_code = MPI_SUCCESS; + *error_code = MPI_SUCCESS; return; } @@ -309,7 +309,7 @@ void ADIOI_NFS_WriteStrided(ADIO_File fd, const void *buf, int count, /* get max_bufsize from the info object. */ value = (char *) ADIOI_Malloc((MPI_MAX_INFO_VAL+1)*sizeof(char)); - ADIOI_Info_get(fd->info, "ind_wr_buffer_size", MPI_MAX_INFO_VAL, value, + ADIOI_Info_get(fd->info, "ind_wr_buffer_size", MPI_MAX_INFO_VAL, value, &info_flag); max_bufsize = atoi(value); ADIOI_Free(value); @@ -322,7 +322,7 @@ void ADIOI_NFS_WriteStrided(ADIO_File fd, const void *buf, int count, flat_buf = ADIOI_Flatlist; while (flat_buf->type != datatype) flat_buf = flat_buf->next; - off = (file_ptr_type == ADIO_INDIVIDUAL) ? fd->fp_ind : + off = (file_ptr_type == ADIO_INDIVIDUAL) ? fd->fp_ind : fd->disp + etype_size * offset; start_off = off; @@ -332,10 +332,10 @@ void ADIOI_NFS_WriteStrided(ADIO_File fd, const void *buf, int count, writebuf_len = (int) (ADIOI_MIN(max_bufsize,end_offset-writebuf_off+1)); /* if atomicity is true, lock the region to be accessed */ - if (fd->atomicity) + if (fd->atomicity) ADIOI_WRITE_LOCK(fd, start_off, SEEK_SET, end_offset-start_off+1); - for (j=0; jcount; i++) { userbuf_off = j*buftype_extent + flat_buf->indices[i]; req_off = off; @@ -348,7 +348,7 @@ void ADIOI_NFS_WriteStrided(ADIO_File fd, const void *buf, int count, #ifdef ADIOI_MPE_LOGGING MPE_Log_event( ADIOI_MPE_lseek_a, 0, NULL ); #endif - lseek(fd->fd_sys, writebuf_off, SEEK_SET); + lseek(fd->fd_sys, writebuf_off, SEEK_SET); #ifdef ADIOI_MPE_LOGGING MPE_Log_event( ADIOI_MPE_lseek_b, 0, NULL ); #endif @@ -356,14 +356,14 @@ void ADIOI_NFS_WriteStrided(ADIO_File fd, const void *buf, int count, #ifdef ADIOI_MPE_LOGGING MPE_Log_event( ADIOI_MPE_write_a, 0, NULL ); #endif - err = write(fd->fd_sys, writebuf, writebuf_len); + err = write(fd->fd_sys, writebuf, writebuf_len); #ifdef ADIOI_MPE_LOGGING MPE_Log_event( ADIOI_MPE_write_b, 0, NULL ); #endif if (!(fd->atomicity)) ADIOI_UNLOCK(fd, writebuf_off, SEEK_SET, writebuf_len); - if (err == -1) err_flag = 1; + if (err == -1) err_flag = 1; - if (fd->atomicity) + if (fd->atomicity) ADIOI_UNLOCK(fd, start_off, SEEK_SET, end_offset-start_off+1); if (file_ptr_type == ADIO_INDIVIDUAL) fd->fp_ind = off; @@ -415,7 +415,7 @@ void ADIOI_NFS_WriteStrided(ADIO_File fd, const void *buf, int count, n_filetypes = (int) (offset / n_etypes_in_filetype); etype_in_filetype = (int) (offset % n_etypes_in_filetype); size_in_filetype = etype_in_filetype * etype_size; - + sum = 0; for (i=0; icount; i++) { sum += flat_file->blocklens[i]; @@ -429,7 +429,7 @@ void ADIOI_NFS_WriteStrided(ADIO_File fd, const void *buf, int count, } /* abs. offset in bytes in the file */ - offset = disp + (ADIO_Offset) n_filetypes*filetype_extent + + offset = disp + (ADIO_Offset) n_filetypes*filetype_extent + abs_off_in_filetype; } @@ -442,7 +442,7 @@ void ADIOI_NFS_WriteStrided(ADIO_File fd, const void *buf, int count, offset, status, error_code); if (file_ptr_type == ADIO_INDIVIDUAL) { - /* update MPI-IO file pointer to point to the first byte + /* update MPI-IO file pointer to point to the first byte * that can be accessed in the fileview. */ fd->fp_ind = offset + bufsize; if (bufsize == fwr_size) { @@ -457,10 +457,10 @@ void ADIOI_NFS_WriteStrided(ADIO_File fd, const void *buf, int count, + (ADIO_Offset)n_filetypes*filetype_extent; } } - fd->fp_sys_posn = -1; /* set it to null. */ + fd->fp_sys_posn = -1; /* set it to null. */ #ifdef HAVE_STATUS_SET_BYTES MPIR_Status_set_bytes(status, datatype, bufsize); -#endif +#endif return; } @@ -483,13 +483,13 @@ void ADIOI_NFS_WriteStrided(ADIO_File fd, const void *buf, int count, n_filetypes += (j == 0) ? 1 : 0; } - off = disp + flat_file->indices[j] + + off = disp + flat_file->indices[j] + (ADIO_Offset) n_filetypes*filetype_extent; fwr_size = ADIOI_MIN(flat_file->blocklens[j], bufsize-i); } /* if atomicity is true, lock the region to be accessed */ - if (fd->atomicity) + if (fd->atomicity) ADIOI_WRITE_LOCK(fd, start_off, SEEK_SET, end_offset-start_off+1); /* initial read for the read-modify-write */ @@ -500,14 +500,14 @@ void ADIOI_NFS_WriteStrided(ADIO_File fd, const void *buf, int count, #ifdef ADIOI_MPE_LOGGING MPE_Log_event( ADIOI_MPE_lseek_a, 0, NULL ); #endif - lseek(fd->fd_sys, writebuf_off, SEEK_SET); + lseek(fd->fd_sys, writebuf_off, SEEK_SET); #ifdef ADIOI_MPE_LOGGING MPE_Log_event( ADIOI_MPE_lseek_b, 0, NULL ); #endif #ifdef ADIOI_MPE_LOGGING MPE_Log_event( ADIOI_MPE_read_a, 0, NULL ); #endif - err = read(fd->fd_sys, writebuf, writebuf_len); + err = read(fd->fd_sys, writebuf, writebuf_len); #ifdef ADIOI_MPE_LOGGING MPE_Log_event( ADIOI_MPE_read_b, 0, NULL ); #endif @@ -531,9 +531,9 @@ void ADIOI_NFS_WriteStrided(ADIO_File fd, const void *buf, int count, n_filetypes = st_n_filetypes; fwr_size = ADIOI_MIN(st_fwr_size, bufsize); while (i < bufsize) { - if (fwr_size) { - /* TYPE_UB and TYPE_LB can result in - fwr_size = 0. save system call in such cases */ + if (fwr_size) { + /* TYPE_UB and TYPE_LB can result in + fwr_size = 0. save system call in such cases */ /* lseek(fd->fd_sys, off, SEEK_SET); err = write(fd->fd_sys, ((char *) buf) + i, fwr_size);*/ @@ -556,7 +556,7 @@ void ADIOI_NFS_WriteStrided(ADIO_File fd, const void *buf, int count, j = (j+1) % flat_file->count; n_filetypes += (j == 0) ? 1 : 0; } - off = disp + flat_file->indices[j] + + off = disp + flat_file->indices[j] + (ADIO_Offset) n_filetypes*filetype_extent; fwr_size = ADIOI_MIN(flat_file->blocklens[j], bufsize-i); } @@ -601,7 +601,7 @@ void ADIOI_NFS_WriteStrided(ADIO_File fd, const void *buf, int count, n_filetypes += (j == 0) ? 1 : 0; } - off = disp + flat_file->indices[j] + + off = disp + flat_file->indices[j] + (ADIO_Offset) n_filetypes*filetype_extent; new_fwr_size = flat_file->blocklens[j]; @@ -617,7 +617,7 @@ void ADIOI_NFS_WriteStrided(ADIO_File fd, const void *buf, int count, k = (k + 1)%flat_buf->count; buf_count++; i = (int) (buftype_extent*(buf_count/flat_buf->count) + - flat_buf->indices[k]); + flat_buf->indices[k]); new_bwr_size = flat_buf->blocklens[k]; if (size != fwr_size) { off += size; @@ -630,11 +630,11 @@ void ADIOI_NFS_WriteStrided(ADIO_File fd, const void *buf, int count, } } - /* write the buffer out finally */ + /* write the buffer out finally */ #ifdef ADIOI_MPE_LOGGING MPE_Log_event( ADIOI_MPE_lseek_a, 0, NULL ); #endif - lseek(fd->fd_sys, writebuf_off, SEEK_SET); + lseek(fd->fd_sys, writebuf_off, SEEK_SET); #ifdef ADIOI_MPE_LOGGING MPE_Log_event( ADIOI_MPE_lseek_b, 0, NULL ); #endif @@ -642,7 +642,7 @@ void ADIOI_NFS_WriteStrided(ADIO_File fd, const void *buf, int count, #ifdef ADIOI_MPE_LOGGING MPE_Log_event( ADIOI_MPE_write_a, 0, NULL ); #endif - err = write(fd->fd_sys, writebuf, writebuf_len); + err = write(fd->fd_sys, writebuf, writebuf_len); #ifdef ADIOI_MPE_LOGGING MPE_Log_event( ADIOI_MPE_write_b, 0, NULL ); #endif @@ -651,7 +651,7 @@ void ADIOI_NFS_WriteStrided(ADIO_File fd, const void *buf, int count, ADIOI_UNLOCK(fd, writebuf_off, SEEK_SET, writebuf_len); else ADIOI_UNLOCK(fd, start_off, SEEK_SET, end_offset-start_off+1); - if (err == -1) err_flag = 1; + if (err == -1) err_flag = 1; if (file_ptr_type == ADIO_INDIVIDUAL) fd->fp_ind = off; if (err_flag) { @@ -667,7 +667,7 @@ void ADIOI_NFS_WriteStrided(ADIO_File fd, const void *buf, int count, #ifdef HAVE_STATUS_SET_BYTES MPIR_Status_set_bytes(status, datatype, bufsize); -/* This is a temporary way of filling in status. The right way is to +/* This is a temporary way of filling in status. The right way is to keep track of how much data was actually written by ADIOI_BUFFERED_WRITE. */ #endif diff --git a/ompi/mca/io/romio314/romio/adio/ad_ntfs/ad_ntfs.c b/ompi/mca/io/romio314/romio/adio/ad_ntfs/ad_ntfs.c index 8789fc378cf..823596e95ee 100644 --- a/ompi/mca/io/romio314/romio/adio/ad_ntfs/ad_ntfs.c +++ b/ompi/mca/io/romio314/romio/adio/ad_ntfs/ad_ntfs.c @@ -1,7 +1,7 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */ -/* +/* * - * Copyright (C) 2001 University of Chicago. + * Copyright (C) 2001 University of Chicago. * See COPYRIGHT notice in top-level directory. */ diff --git a/ompi/mca/io/romio314/romio/adio/ad_ntfs/ad_ntfs.h b/ompi/mca/io/romio314/romio/adio/ad_ntfs/ad_ntfs.h index aed6168bd8b..2f9945971bb 100644 --- a/ompi/mca/io/romio314/romio/adio/ad_ntfs/ad_ntfs.h +++ b/ompi/mca/io/romio314/romio/adio/ad_ntfs/ad_ntfs.h @@ -1,8 +1,8 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */ -/* +/* * * - * Copyright (C) 1997 University of Chicago. + * Copyright (C) 1997 University of Chicago. * See COPYRIGHT notice in top-level directory. */ @@ -28,32 +28,32 @@ int ADIOI_NTFS_aio(ADIO_File fd, void *buf, int len, ADIO_Offset offset, void ADIOI_NTFS_Open(ADIO_File fd, int *error_code); void ADIOI_NTFS_Close(ADIO_File fd, int *error_code); -void ADIOI_NTFS_ReadContig(ADIO_File fd, void *buf, int count, +void ADIOI_NTFS_ReadContig(ADIO_File fd, void *buf, int count, MPI_Datatype datatype, int file_ptr_type, ADIO_Offset offset, ADIO_Status *status, int *error_code); -void ADIOI_NTFS_WriteContig(ADIO_File fd, void *buf, int count, +void ADIOI_NTFS_WriteContig(ADIO_File fd, void *buf, int count, MPI_Datatype datatype, int file_ptr_type, ADIO_Offset offset, ADIO_Status *status, int - *error_code); -void ADIOI_NTFS_IwriteContig(ADIO_File fd, void *buf, int count, + *error_code); +void ADIOI_NTFS_IwriteContig(ADIO_File fd, void *buf, int count, MPI_Datatype datatype, int file_ptr_type, ADIO_Offset offset, ADIO_Request *request, int - *error_code); -void ADIOI_NTFS_IreadContig(ADIO_File fd, void *buf, int count, + *error_code); +void ADIOI_NTFS_IreadContig(ADIO_File fd, void *buf, int count, MPI_Datatype datatype, int file_ptr_type, ADIO_Offset offset, ADIO_Request *request, int - *error_code); + *error_code); int ADIOI_NTFS_ReadDone(ADIO_Request *request, ADIO_Status *status, int *error_code); int ADIOI_NTFS_WriteDone(ADIO_Request *request, ADIO_Status *status, int *error_code); void ADIOI_NTFS_ReadComplete(ADIO_Request *request, ADIO_Status *status, int - *error_code); + *error_code); void ADIOI_NTFS_WriteComplete(ADIO_Request *request, ADIO_Status *status, - int *error_code); + int *error_code); void ADIOI_NTFS_Fcntl(ADIO_File fd, int flag, ADIO_Fcntl_t *fcntl_struct, int - *error_code); + *error_code); void ADIOI_NTFS_IwriteStrided(ADIO_File fd, void *buf, int count, MPI_Datatype datatype, int file_ptr_type, ADIO_Offset offset, ADIO_Request *request, int diff --git a/ompi/mca/io/romio314/romio/adio/ad_ntfs/ad_ntfs_close.c b/ompi/mca/io/romio314/romio/adio/ad_ntfs/ad_ntfs_close.c index 95022dca77b..8a0d4d3a90e 100644 --- a/ompi/mca/io/romio314/romio/adio/ad_ntfs/ad_ntfs_close.c +++ b/ompi/mca/io/romio314/romio/adio/ad_ntfs/ad_ntfs_close.c @@ -1,7 +1,7 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */ -/* +/* * - * Copyright (C) 1997 University of Chicago. + * Copyright (C) 1997 University of Chicago. * See COPYRIGHT notice in top-level directory. */ diff --git a/ompi/mca/io/romio314/romio/adio/ad_ntfs/ad_ntfs_done.c b/ompi/mca/io/romio314/romio/adio/ad_ntfs/ad_ntfs_done.c index 9f4967b48ba..c5edb683953 100644 --- a/ompi/mca/io/romio314/romio/adio/ad_ntfs/ad_ntfs_done.c +++ b/ompi/mca/io/romio314/romio/adio/ad_ntfs/ad_ntfs_done.c @@ -1,6 +1,6 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */ -/* - * Copyright (C) 1997 University of Chicago. +/* + * Copyright (C) 1997 University of Chicago. * See COPYRIGHT notice in top-level directory. */ diff --git a/ompi/mca/io/romio314/romio/adio/ad_ntfs/ad_ntfs_fcntl.c b/ompi/mca/io/romio314/romio/adio/ad_ntfs/ad_ntfs_fcntl.c index 4c7d66cd074..721709a505c 100644 --- a/ompi/mca/io/romio314/romio/adio/ad_ntfs/ad_ntfs_fcntl.c +++ b/ompi/mca/io/romio314/romio/adio/ad_ntfs/ad_ntfs_fcntl.c @@ -1,6 +1,6 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */ -/* - * Copyright (C) 1997 University of Chicago. +/* + * Copyright (C) 1997 University of Chicago. * See COPYRIGHT notice in top-level directory. */ @@ -17,7 +17,7 @@ void ADIOI_NTFS_Fcntl(ADIO_File fd, int flag, ADIO_Fcntl_t *fcntl_struct, int *e { case ADIO_FCNTL_GET_FSIZE: fcntl_struct->fsize = SetFilePointer(fd->fd_sys, 0, 0, FILE_END); - if (fd->fp_sys_posn != -1) + if (fd->fp_sys_posn != -1) { dwTemp = DWORDHIGH(fd->fp_sys_posn); if (SetFilePointer(fd->fd_sys, DWORDLOW(fd->fp_sys_posn), &dwTemp, FILE_BEGIN) == INVALID_SET_FILE_POINTER) diff --git a/ompi/mca/io/romio314/romio/adio/ad_ntfs/ad_ntfs_flush.c b/ompi/mca/io/romio314/romio/adio/ad_ntfs/ad_ntfs_flush.c index 7656d60fb66..8dcfa20866d 100644 --- a/ompi/mca/io/romio314/romio/adio/ad_ntfs/ad_ntfs_flush.c +++ b/ompi/mca/io/romio314/romio/adio/ad_ntfs/ad_ntfs_flush.c @@ -1,7 +1,7 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */ -/* +/* * - * Copyright (C) 1997 University of Chicago. + * Copyright (C) 1997 University of Chicago. * See COPYRIGHT notice in top-level directory. */ diff --git a/ompi/mca/io/romio314/romio/adio/ad_ntfs/ad_ntfs_iread.c b/ompi/mca/io/romio314/romio/adio/ad_ntfs/ad_ntfs_iread.c index 845401ad44a..ff8f7c1b585 100644 --- a/ompi/mca/io/romio314/romio/adio/ad_ntfs/ad_ntfs_iread.c +++ b/ompi/mca/io/romio314/romio/adio/ad_ntfs/ad_ntfs_iread.c @@ -1,14 +1,14 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */ -/* - * Copyright (C) 1997 University of Chicago. +/* + * Copyright (C) 1997 University of Chicago. * See COPYRIGHT notice in top-level directory. */ #include "ad_ntfs.h" -void ADIOI_NTFS_IreadContig(ADIO_File fd, void *buf, int count, +void ADIOI_NTFS_IreadContig(ADIO_File fd, void *buf, int count, MPI_Datatype datatype, int file_ptr_type, - ADIO_Offset offset, ADIO_Request *request, int *error_code) + ADIO_Offset offset, ADIO_Request *request, int *error_code) { MPI_Count len, typesize; int err; diff --git a/ompi/mca/io/romio314/romio/adio/ad_ntfs/ad_ntfs_iwrite.c b/ompi/mca/io/romio314/romio/adio/ad_ntfs/ad_ntfs_iwrite.c index 40a567e564b..73216ddf8c0 100644 --- a/ompi/mca/io/romio314/romio/adio/ad_ntfs/ad_ntfs_iwrite.c +++ b/ompi/mca/io/romio314/romio/adio/ad_ntfs/ad_ntfs_iwrite.c @@ -1,6 +1,6 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */ -/* - * Copyright (C) 1997 University of Chicago. +/* + * Copyright (C) 1997 University of Chicago. * See COPYRIGHT notice in top-level directory. */ @@ -12,7 +12,7 @@ static MPIX_Grequest_class ADIOI_NTFS_greq_class = 0; -/* Fills the input buffer, errMsg, with the error message +/* Fills the input buffer, errMsg, with the error message corresponding to error code, error */ void ADIOI_NTFS_Strerror(int error, char *errMsg, int errMsgLen) { @@ -44,20 +44,20 @@ int ADIOI_NTFS_aio_poll_fn(void *extra_state, MPI_Status *status) ADIOI_AIO_Request *aio_req; int mpi_errno = MPI_SUCCESS; - /* FIXME: Validate the args -- has it already been done by the + /* FIXME: Validate the args -- has it already been done by the caller ? */ aio_req = (ADIOI_AIO_Request *)extra_state; - + /* XXX: test for AIO completion here */ - if(!GetOverlappedResult( aio_req->fd, aio_req->lpOvl, + if(!GetOverlappedResult( aio_req->fd, aio_req->lpOvl, &(aio_req->nbytes), FALSE)){ if(GetLastError() == ERROR_IO_INCOMPLETE){ /* IO in progress */ /* TODO: need to diddle with status somehow */ }else{ /* Error occured */ - /* TODO: unsure how to handle this */ + /* TODO: unsure how to handle this */ } }else{ mpi_errno = MPI_Grequest_complete(aio_req->req); @@ -82,7 +82,7 @@ int ADIOI_NTFS_aio_wait_fn(int count, void **array_of_states, LPHANDLE lpHandles; DWORD retObject=0; - /* FIXME: Validate the args -- has it already been done by the + /* FIXME: Validate the args -- has it already been done by the caller ? */ aio_reqlist = (ADIOI_AIO_Request **)array_of_states; lpHandles = (LPHANDLE) ADIOI_Calloc(count, sizeof(HANDLE)); @@ -101,12 +101,12 @@ int ADIOI_NTFS_aio_wait_fn(int count, void **array_of_states, /* XXX: wait for one request to complete */ /* FIXME: Is the timeout in seconds ? */ timeout = (timeout <= 0) ? INFINITE : (timeout * 1000); - + if((retObject = WaitForMultipleObjects(count, lpHandles, FALSE, timeout)) != WAIT_FAILED){ retObject = retObject - WAIT_OBJECT_0; - if(GetOverlappedResult( aio_reqlist[retObject]->fd, - aio_reqlist[retObject]->lpOvl, &(aio_reqlist[retObject]->nbytes), + if(GetOverlappedResult( aio_reqlist[retObject]->fd, + aio_reqlist[retObject]->lpOvl, &(aio_reqlist[retObject]->nbytes), FALSE)){ /* XXX: mark completed requests as 'done'*/ mpi_errno = MPI_Grequest_complete(aio_reqlist[retObject]->req); @@ -123,7 +123,7 @@ int ADIOI_NTFS_aio_wait_fn(int count, void **array_of_states, /* TODO: need to diddle with status somehow */ }else{ /* Error occured */ - /* TODO: not sure how to handle this */ + /* TODO: not sure how to handle this */ } } }else{ @@ -133,43 +133,43 @@ int ADIOI_NTFS_aio_wait_fn(int count, void **array_of_states, return mpi_errno; } -int ADIOI_NTFS_aio_query_fn(void *extra_state, MPI_Status *status) +int ADIOI_NTFS_aio_query_fn(void *extra_state, MPI_Status *status) { ADIOI_AIO_Request *aio_req; aio_req = (ADIOI_AIO_Request *)extra_state; - MPI_Status_set_elements(status, MPI_BYTE, aio_req->nbytes); + MPI_Status_set_elements(status, MPI_BYTE, aio_req->nbytes); - /* can never cancel so always true */ - MPI_Status_set_cancelled(status, 0); + /* can never cancel so always true */ + MPI_Status_set_cancelled(status, 0); - /* choose not to return a value for this */ - status->MPI_SOURCE = MPI_UNDEFINED; - /* tag has no meaning for this generalized request */ - status->MPI_TAG = MPI_UNDEFINED; - /* this generalized request never fails */ - return MPI_SUCCESS; + /* choose not to return a value for this */ + status->MPI_SOURCE = MPI_UNDEFINED; + /* tag has no meaning for this generalized request */ + status->MPI_TAG = MPI_UNDEFINED; + /* this generalized request never fails */ + return MPI_SUCCESS; } int ADIOI_NTFS_aio_free_fn(void *extra_state) { ADIOI_AIO_Request *aio_req; - /* FIXME: Validate the args -- has it already been done by the + /* FIXME: Validate the args -- has it already been done by the caller ? */ aio_req = (ADIOI_AIO_Request*)extra_state; CloseHandle(aio_req->lpOvl->hEvent); ADIOI_Free(aio_req->lpOvl); ADIOI_Free(aio_req); - return MPI_SUCCESS; + return MPI_SUCCESS; } -void ADIOI_NTFS_IwriteContig(ADIO_File fd, void *buf, int count, +void ADIOI_NTFS_IwriteContig(ADIO_File fd, void *buf, int count, MPI_Datatype datatype, int file_ptr_type, ADIO_Offset offset, ADIO_Request *request, - int *error_code) + int *error_code) { MPI_Count len, typesize; int err; @@ -255,7 +255,7 @@ int ADIOI_NTFS_aio(ADIO_File fd, void *buf, int len, ADIO_Offset offset, aio_req->lpOvl->Offset = DWORDLOW(offset); aio_req->lpOvl->OffsetHigh = DWORDHIGH(offset); aio_req->fd = fd_sys; - + /* XXX: initiate async I/O */ if (wr) { @@ -267,7 +267,7 @@ int ADIOI_NTFS_aio(ADIO_File fd, void *buf, int len, ADIO_Offset offset, } /* --BEGIN ERROR HANDLING-- */ - if (ret_val == FALSE) + if (ret_val == FALSE) { mpi_errno = GetLastError(); if (mpi_errno != ERROR_IO_PENDING) diff --git a/ompi/mca/io/romio314/romio/adio/ad_ntfs/ad_ntfs_open.c b/ompi/mca/io/romio314/romio/adio/ad_ntfs/ad_ntfs_open.c index 3a49418dbaa..2b41ad2ff69 100644 --- a/ompi/mca/io/romio314/romio/adio/ad_ntfs/ad_ntfs_open.c +++ b/ompi/mca/io/romio314/romio/adio/ad_ntfs/ad_ntfs_open.c @@ -1,8 +1,8 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */ -/* +/* * * - * Copyright (C) 1997 University of Chicago. + * Copyright (C) 1997 University of Chicago. * See COPYRIGHT notice in top-level directory. */ @@ -57,12 +57,12 @@ void ADIOI_NTFS_Open(ADIO_File fd, int *error_code) attrib = attrib | FILE_FLAG_RANDOM_ACCESS; } - fd->fd_sys = CreateFile(fd->filename, + fd->fd_sys = CreateFile(fd->filename, amode, - FILE_SHARE_READ | FILE_SHARE_WRITE | FILE_SHARE_DELETE, - NULL, - cmode, - attrib, + FILE_SHARE_READ | FILE_SHARE_WRITE | FILE_SHARE_DELETE, + NULL, + cmode, + attrib, NULL); fd->fd_direct = -1; diff --git a/ompi/mca/io/romio314/romio/adio/ad_ntfs/ad_ntfs_read.c b/ompi/mca/io/romio314/romio/adio/ad_ntfs/ad_ntfs_read.c index 7c55c1bc376..3ffd58cbe40 100644 --- a/ompi/mca/io/romio314/romio/adio/ad_ntfs/ad_ntfs_read.c +++ b/ompi/mca/io/romio314/romio/adio/ad_ntfs/ad_ntfs_read.c @@ -1,12 +1,12 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */ -/* - * Copyright (C) 1997 University of Chicago. +/* + * Copyright (C) 1997 University of Chicago. * See COPYRIGHT notice in top-level directory. */ #include "ad_ntfs.h" -void ADIOI_NTFS_ReadContig(ADIO_File fd, void *buf, int count, +void ADIOI_NTFS_ReadContig(ADIO_File fd, void *buf, int count, MPI_Datatype datatype, int file_ptr_type, ADIO_Offset offset, ADIO_Status *status, int *error_code) @@ -140,7 +140,7 @@ void ADIOI_NTFS_ReadContig(ADIO_File fd, void *buf, int count, ADIOI_Free(pOvl); fd->fp_sys_posn = offset + (ADIO_Offset)dwNumRead; - /* individual file pointer not updated */ + /* individual file pointer not updated */ } else { @@ -231,9 +231,9 @@ void ADIOI_NTFS_ReadContig(ADIO_File fd, void *buf, int count, } ADIOI_Free(pOvl); - fd->fp_ind = fd->fp_ind + (ADIO_Offset)dwNumRead; + fd->fp_ind = fd->fp_ind + (ADIO_Offset)dwNumRead; fd->fp_sys_posn = fd->fp_ind; - } + } #ifdef HAVE_STATUS_SET_BYTES if (err != FALSE) diff --git a/ompi/mca/io/romio314/romio/adio/ad_ntfs/ad_ntfs_resize.c b/ompi/mca/io/romio314/romio/adio/ad_ntfs/ad_ntfs_resize.c index 0fbeaaf9728..3f2ff58495c 100644 --- a/ompi/mca/io/romio314/romio/adio/ad_ntfs/ad_ntfs_resize.c +++ b/ompi/mca/io/romio314/romio/adio/ad_ntfs/ad_ntfs_resize.c @@ -1,7 +1,7 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */ -/* +/* * - * Copyright (C) 1997 University of Chicago. + * Copyright (C) 1997 University of Chicago. * See COPYRIGHT notice in top-level directory. */ diff --git a/ompi/mca/io/romio314/romio/adio/ad_ntfs/ad_ntfs_wait.c b/ompi/mca/io/romio314/romio/adio/ad_ntfs/ad_ntfs_wait.c index 80dfa4d3349..dd40a1bf686 100644 --- a/ompi/mca/io/romio314/romio/adio/ad_ntfs/ad_ntfs_wait.c +++ b/ompi/mca/io/romio314/romio/adio/ad_ntfs/ad_ntfs_wait.c @@ -1,13 +1,13 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */ -/* - * Copyright (C) 1997 University of Chicago. +/* + * Copyright (C) 1997 University of Chicago. * See COPYRIGHT notice in top-level directory. */ #include "ad_ntfs.h" void ADIOI_NTFS_ReadComplete(ADIO_Request *request, ADIO_Status *status, - int *error_code) + int *error_code) { return; } diff --git a/ompi/mca/io/romio314/romio/adio/ad_ntfs/ad_ntfs_write.c b/ompi/mca/io/romio314/romio/adio/ad_ntfs/ad_ntfs_write.c index 389e8669362..291de17b46d 100644 --- a/ompi/mca/io/romio314/romio/adio/ad_ntfs/ad_ntfs_write.c +++ b/ompi/mca/io/romio314/romio/adio/ad_ntfs/ad_ntfs_write.c @@ -1,12 +1,12 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */ -/* - * Copyright (C) 1997 University of Chicago. +/* + * Copyright (C) 1997 University of Chicago. * See COPYRIGHT notice in top-level directory. */ #include "ad_ntfs.h" -void ADIOI_NTFS_WriteContig(ADIO_File fd, void *buf, int count, +void ADIOI_NTFS_WriteContig(ADIO_File fd, void *buf, int count, MPI_Datatype datatype, int file_ptr_type, ADIO_Offset offset, ADIO_Status *status, int *error_code) @@ -16,7 +16,7 @@ void ADIOI_NTFS_WriteContig(ADIO_File fd, void *buf, int count, DWORD dwNumWritten = 0; MPI_Count err=-1, datatype_size, len; OVERLAPPED *pOvl; - + /* If file pointer type in ADIO_INDIVIDUAL then offset should be ignored and the current location of file pointer should be used */ if(file_ptr_type == ADIO_INDIVIDUAL){ @@ -121,7 +121,7 @@ void ADIOI_NTFS_WriteContig(ADIO_File fd, void *buf, int count, ADIOI_Free(pOvl); fd->fp_sys_posn = offset + dwNumWritten; - /* individual file pointer not updated */ + /* individual file pointer not updated */ } else { diff --git a/ompi/mca/io/romio314/romio/adio/ad_panfs/ad_panfs.c b/ompi/mca/io/romio314/romio/adio/ad_panfs/ad_panfs.c index fd9cd93ff86..6b2b13a3262 100644 --- a/ompi/mca/io/romio314/romio/adio/ad_panfs/ad_panfs.c +++ b/ompi/mca/io/romio314/romio/adio/ad_panfs/ad_panfs.c @@ -1,5 +1,5 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */ -/* +/* * ad_panfs.c * * Copyright (C) 2001 University of Chicago. diff --git a/ompi/mca/io/romio314/romio/adio/ad_panfs/ad_panfs.h b/ompi/mca/io/romio314/romio/adio/ad_panfs/ad_panfs.h index 5f13f64fc39..4ccc73cb778 100644 --- a/ompi/mca/io/romio314/romio/adio/ad_panfs/ad_panfs.h +++ b/ompi/mca/io/romio314/romio/adio/ad_panfs/ad_panfs.h @@ -1,5 +1,5 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */ -/* +/* * ad_panfs.h * * Copyright (C) 2001 University of Chicago. @@ -27,7 +27,7 @@ typedef struct adiocb adiocb_t; void ADIOI_PANFS_Open(ADIO_File fd, int *error_code); void ADIOI_PANFS_SetInfo(ADIO_File fd, MPI_Info users_info, int *error_code); -void ADIOI_PANFS_ReadContig(ADIO_File fd, void *buf, int count, +void ADIOI_PANFS_ReadContig(ADIO_File fd, void *buf, int count, MPI_Datatype datatype, int file_ptr_type, ADIO_Offset offset, ADIO_Status *status, int *error_code); diff --git a/ompi/mca/io/romio314/romio/adio/ad_panfs/ad_panfs_hints.c b/ompi/mca/io/romio314/romio/adio/ad_panfs/ad_panfs_hints.c index 94178ab0cfe..93ec2303472 100644 --- a/ompi/mca/io/romio314/romio/adio/ad_panfs/ad_panfs_hints.c +++ b/ompi/mca/io/romio314/romio/adio/ad_panfs/ad_panfs_hints.c @@ -1,5 +1,5 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */ -/* +/* * ad_panfs_hints.c * * Copyright (C) 2001 University of Chicago. @@ -20,15 +20,15 @@ void ADIOI_PANFS_SetInfo(ADIO_File fd, MPI_Info users_info, int *error_code) *error_code = MPI_SUCCESS; if (fd->info == MPI_INFO_NULL) { - /* This must be part of the open call. can set striping parameters - * if necessary. - */ + /* This must be part of the open call. can set striping parameters + * if necessary. + */ MPI_Info_create(&(fd->info)); /* anticipate concurrent writes in an MPI-IO application */ ADIOI_Info_set (fd->info, "panfs_concurrent_write", "1"); - /* has user specified striping parameters + /* has user specified striping parameters and do they have the same value on all processes? */ if (users_info != MPI_INFO_NULL) { @@ -61,7 +61,7 @@ void ADIOI_PANFS_SetInfo(ADIO_File fd, MPI_Info users_info, int *error_code) } } - ADIOI_GEN_SetInfo(fd, users_info, &gen_error_code); + ADIOI_GEN_SetInfo(fd, users_info, &gen_error_code); /* If this function is successful, use the error code returned from ADIOI_GEN_SetInfo * otherwise use the error_code generated by this function */ diff --git a/ompi/mca/io/romio314/romio/adio/ad_panfs/ad_panfs_open.c b/ompi/mca/io/romio314/romio/adio/ad_panfs/ad_panfs_open.c index d5374ebf448..d2e3d8dd404 100644 --- a/ompi/mca/io/romio314/romio/adio/ad_panfs/ad_panfs_open.c +++ b/ompi/mca/io/romio314/romio/adio/ad_panfs/ad_panfs_open.c @@ -1,5 +1,5 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */ -/* +/* * ad_panfs_open.c * * Copyright (C) 2001 University of Chicago. @@ -30,7 +30,7 @@ void ADIOI_PANFS_Open(ADIO_File fd, int *error_code) pan_fs_client_layout_agg_type_t layout_type = PAN_FS_CLIENT_LAYOUT_TYPE__DEFAULT; unsigned long int layout_stripe_unit = 0; unsigned long int layout_parity_stripe_width = 0; - unsigned long int layout_parity_stripe_depth = 0; + unsigned long int layout_parity_stripe_depth = 0; unsigned long int layout_total_num_comps = 0; pan_fs_client_layout_visit_t layout_visit_policy = PAN_FS_CLIENT_LAYOUT_VISIT__ROUND_ROBIN; int myrank; @@ -39,32 +39,32 @@ void ADIOI_PANFS_Open(ADIO_File fd, int *error_code) *error_code = MPI_SUCCESS; value = (char *) ADIOI_Malloc((MPI_MAX_INFO_VAL+1)*sizeof(char)); - ADIOI_Info_get(fd->info, "panfs_layout_type", MPI_MAX_INFO_VAL, + ADIOI_Info_get(fd->info, "panfs_layout_type", MPI_MAX_INFO_VAL, value, &flag); if (flag) { layout_type = strtoul(value,NULL,10); } - ADIOI_Info_get(fd->info, "panfs_layout_stripe_unit", MPI_MAX_INFO_VAL, + ADIOI_Info_get(fd->info, "panfs_layout_stripe_unit", MPI_MAX_INFO_VAL, value, &flag); if (flag) { layout_stripe_unit = strtoul(value,NULL,10); } - ADIOI_Info_get(fd->info, "panfs_layout_total_num_comps", MPI_MAX_INFO_VAL, + ADIOI_Info_get(fd->info, "panfs_layout_total_num_comps", MPI_MAX_INFO_VAL, value, &flag); if (flag) { layout_total_num_comps = strtoul(value,NULL,10); } - ADIOI_Info_get(fd->info, "panfs_layout_parity_stripe_width", MPI_MAX_INFO_VAL, + ADIOI_Info_get(fd->info, "panfs_layout_parity_stripe_width", MPI_MAX_INFO_VAL, value, &flag); if (flag) { layout_parity_stripe_width = strtoul(value,NULL,10); } - ADIOI_Info_get(fd->info, "panfs_layout_parity_stripe_depth", MPI_MAX_INFO_VAL, + ADIOI_Info_get(fd->info, "panfs_layout_parity_stripe_depth", MPI_MAX_INFO_VAL, value, &flag); if (flag) { layout_parity_stripe_depth = strtoul(value,NULL,10); } - ADIOI_Info_get(fd->info, "panfs_layout_visit_policy", MPI_MAX_INFO_VAL, + ADIOI_Info_get(fd->info, "panfs_layout_visit_policy", MPI_MAX_INFO_VAL, value, &flag); if (flag) { layout_visit_policy = strtoul(value,NULL,10); @@ -145,13 +145,13 @@ void ADIOI_PANFS_Open(ADIO_File fd, int *error_code) MPI_Abort(MPI_COMM_WORLD, 1); } } - /* Create the file via ioctl() or open(). ADIOI_PANFS_Open's caller + /* Create the file via ioctl() or open(). ADIOI_PANFS_Open's caller * already optimizes performance by only calling this function with - * ADIO_CREATE on rank 0. Therefore, we don't need to worry about + * ADIO_CREATE on rank 0. Therefore, we don't need to worry about * implementing that optimization here. */ - if((layout_type == PAN_FS_CLIENT_LAYOUT_TYPE__RAID0) || (layout_type == PAN_FS_CLIENT_LAYOUT_TYPE__RAID1_5_PARITY_STRIPE) + if((layout_type == PAN_FS_CLIENT_LAYOUT_TYPE__RAID0) || (layout_type == PAN_FS_CLIENT_LAYOUT_TYPE__RAID1_5_PARITY_STRIPE) || (layout_type == PAN_FS_CLIENT_LAYOUT_TYPE__RAID10)) { - pan_fs_client_layout_create_args_t file_create_args; + pan_fs_client_layout_create_args_t file_create_args; int fd_dir; char* slash; struct stat stat_buf; @@ -185,7 +185,7 @@ void ADIOI_PANFS_Open(ADIO_File fd, int *error_code) if (!slash) ADIOI_Strncpy(path, ".", 2); else { - if (slash == path) + if (slash == path) *(path + 1) = '\0'; else *slash = '\0'; } @@ -210,7 +210,7 @@ void ADIOI_PANFS_Open(ADIO_File fd, int *error_code) file_create_args.mode = perm; file_create_args.version = PAN_FS_CLIENT_LAYOUT_VERSION; file_create_args.flags = PAN_FS_CLIENT_LAYOUT_CREATE_F__NONE; - ADIOI_Strncpy(file_create_args.filename, file_name_ptr, strlen(fd->filename)+1); + ADIOI_Strncpy(file_create_args.filename, file_name_ptr, strlen(fd->filename)+1); file_create_args.layout.agg_type = layout_type; file_create_args.layout.layout_is_valid = 1; if(layout_type == PAN_FS_CLIENT_LAYOUT_TYPE__RAID1_5_PARITY_STRIPE) @@ -266,7 +266,7 @@ void ADIOI_PANFS_Open(ADIO_File fd, int *error_code) amode = amode | O_EXCL; value = (char *) ADIOI_Malloc((MPI_MAX_INFO_VAL+1)*sizeof(char)); - ADIOI_Info_get(fd->info, "panfs_concurrent_write", MPI_MAX_INFO_VAL, + ADIOI_Info_get(fd->info, "panfs_concurrent_write", MPI_MAX_INFO_VAL, value, &flag); if (flag) { unsigned long int concurrent_write = strtoul(value,NULL,10); @@ -293,7 +293,7 @@ void ADIOI_PANFS_Open(ADIO_File fd, int *error_code) /* Error - set layout type to unknown */ ADIOI_Info_set(fd->info, "panfs_layout_type", "PAN_FS_CLIENT_LAYOUT_TYPE__INVALID"); } - else + else { ADIOI_Snprintf(temp_buffer,TEMP_BUFFER_SIZE,"%u",file_query_args.layout.agg_type); ADIOI_Info_set(fd->info, "panfs_layout_type", temp_buffer); diff --git a/ompi/mca/io/romio314/romio/adio/ad_panfs/ad_panfs_read.c b/ompi/mca/io/romio314/romio/adio/ad_panfs/ad_panfs_read.c index 237e4929de3..44e28279df0 100644 --- a/ompi/mca/io/romio314/romio/adio/ad_panfs/ad_panfs_read.c +++ b/ompi/mca/io/romio314/romio/adio/ad_panfs/ad_panfs_read.c @@ -1,7 +1,7 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */ -/* +/* * - * Copyright (C) 1997 University of Chicago. + * Copyright (C) 1997 University of Chicago. * See COPYRIGHT notice in top-level directory. */ @@ -11,7 +11,7 @@ #include #endif -void ADIOI_PANFS_ReadContig(ADIO_File fd, void *buf, int count, +void ADIOI_PANFS_ReadContig(ADIO_File fd, void *buf, int count, MPI_Datatype datatype, int file_ptr_type, ADIO_Offset offset, ADIO_Status *status, int *error_code) @@ -57,7 +57,7 @@ void ADIOI_PANFS_ReadContig(ADIO_File fd, void *buf, int count, fd->fp_sys_posn = offset + err; if (file_ptr_type == ADIO_INDIVIDUAL) { - fd->fp_ind += err; + fd->fp_ind += err; } #ifdef HAVE_STATUS_SET_BYTES diff --git a/ompi/mca/io/romio314/romio/adio/ad_panfs/ad_panfs_resize.c b/ompi/mca/io/romio314/romio/adio/ad_panfs/ad_panfs_resize.c index 5c41126c4de..3c59cf8fad2 100644 --- a/ompi/mca/io/romio314/romio/adio/ad_panfs/ad_panfs_resize.c +++ b/ompi/mca/io/romio314/romio/adio/ad_panfs/ad_panfs_resize.c @@ -1,7 +1,7 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */ -/* +/* * - * Copyright (C) 2004 University of Chicago. + * Copyright (C) 2004 University of Chicago. * See COPYRIGHT notice in top-level directory. */ diff --git a/ompi/mca/io/romio314/romio/adio/ad_panfs/ad_panfs_write.c b/ompi/mca/io/romio314/romio/adio/ad_panfs/ad_panfs_write.c index 920d2f473e7..d0ec79ffe65 100644 --- a/ompi/mca/io/romio314/romio/adio/ad_panfs/ad_panfs_write.c +++ b/ompi/mca/io/romio314/romio/adio/ad_panfs/ad_panfs_write.c @@ -1,7 +1,7 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */ -/* +/* * - * Copyright (C) 2004 University of Chicago. + * Copyright (C) 2004 University of Chicago. * See COPYRIGHT notice in top-level directory. */ @@ -40,7 +40,7 @@ void ADIOI_PANFS_WriteContig(ADIO_File fd, const void *buf, int count, } /* --END ERROR HANDLING-- */ } - + AD_PANFS_RETRY(write(fd->fd_sys, buf, len),err) /* --BEGIN ERROR HANDLING-- */ if (err == -1) { @@ -57,7 +57,7 @@ void ADIOI_PANFS_WriteContig(ADIO_File fd, const void *buf, int count, fd->fp_sys_posn = offset + err; if (file_ptr_type == ADIO_INDIVIDUAL) { - fd->fp_ind += err; + fd->fp_ind += err; } #ifdef HAVE_STATUS_SET_BYTES diff --git a/ompi/mca/io/romio314/romio/adio/ad_pfs/ad_pfs.c b/ompi/mca/io/romio314/romio/adio/ad_pfs/ad_pfs.c index 62a4305205b..2e68bda6259 100644 --- a/ompi/mca/io/romio314/romio/adio/ad_pfs/ad_pfs.c +++ b/ompi/mca/io/romio314/romio/adio/ad_pfs/ad_pfs.c @@ -1,7 +1,7 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */ -/* +/* * - * Copyright (C) 2001 University of Chicago. + * Copyright (C) 2001 University of Chicago. * See COPYRIGHT notice in top-level directory. */ diff --git a/ompi/mca/io/romio314/romio/adio/ad_pfs/ad_pfs.h b/ompi/mca/io/romio314/romio/adio/ad_pfs/ad_pfs.h index fbe055ccf75..b0c9739670d 100644 --- a/ompi/mca/io/romio314/romio/adio/ad_pfs/ad_pfs.h +++ b/ompi/mca/io/romio314/romio/adio/ad_pfs/ad_pfs.h @@ -1,7 +1,7 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */ -/* +/* * - * Copyright (C) 1997 University of Chicago. + * Copyright (C) 1997 University of Chicago. * See COPYRIGHT notice in top-level directory. */ @@ -24,38 +24,38 @@ #endif /* PFS file-pointer modes (removed most of them because they are unused) */ -#ifndef M_ASYNC +#ifndef M_ASYNC #define M_UNIX 0 #define M_ASYNC 5 #endif void ADIOI_PFS_Open(ADIO_File fd, int *error_code); -void ADIOI_PFS_ReadContig(ADIO_File fd, void *buf, int count, +void ADIOI_PFS_ReadContig(ADIO_File fd, void *buf, int count, MPI_Datatype datatype, int file_ptr_type, ADIO_Offset offset, ADIO_Status *status, int *error_code); -void ADIOI_PFS_WriteContig(ADIO_File fd, void *buf, int count, +void ADIOI_PFS_WriteContig(ADIO_File fd, void *buf, int count, MPI_Datatype datatype, int file_ptr_type, ADIO_Offset offset, ADIO_Status *status, int - *error_code); -void ADIOI_PFS_IwriteContig(ADIO_File fd, void *buf, int count, + *error_code); +void ADIOI_PFS_IwriteContig(ADIO_File fd, void *buf, int count, MPI_Datatype datatype, int file_ptr_type, ADIO_Offset offset, ADIO_Request *request, int - *error_code); -void ADIOI_PFS_IreadContig(ADIO_File fd, void *buf, int count, + *error_code); +void ADIOI_PFS_IreadContig(ADIO_File fd, void *buf, int count, MPI_Datatype datatype, int file_ptr_type, ADIO_Offset offset, ADIO_Request *request, int - *error_code); + *error_code); int ADIOI_PFS_ReadDone(ADIO_Request *request, ADIO_Status *status, int *error_code); int ADIOI_PFS_WriteDone(ADIO_Request *request, ADIO_Status *status, int *error_code); void ADIOI_PFS_ReadComplete(ADIO_Request *request, ADIO_Status *status, int - *error_code); + *error_code); void ADIOI_PFS_WriteComplete(ADIO_Request *request, ADIO_Status *status, - int *error_code); + int *error_code); void ADIOI_PFS_Fcntl(ADIO_File fd, int flag, ADIO_Fcntl_t *fcntl_struct, int - *error_code); + *error_code); void ADIOI_PFS_Flush(ADIO_File fd, int *error_code); void ADIOI_PFS_SetInfo(ADIO_File fd, MPI_Info users_info, int *error_code); diff --git a/ompi/mca/io/romio314/romio/adio/ad_pfs/ad_pfs_done.c b/ompi/mca/io/romio314/romio/adio/ad_pfs/ad_pfs_done.c index 60e2d7da298..0550aa9d483 100644 --- a/ompi/mca/io/romio314/romio/adio/ad_pfs/ad_pfs_done.c +++ b/ompi/mca/io/romio314/romio/adio/ad_pfs/ad_pfs_done.c @@ -1,14 +1,14 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */ -/* +/* * - * Copyright (C) 1997 University of Chicago. + * Copyright (C) 1997 University of Chicago. * See COPYRIGHT notice in top-level directory. */ #include "ad_pfs.h" int ADIOI_PFS_ReadDone(ADIO_Request *request, ADIO_Status *status, - int *error_code) + int *error_code) { int done=0; static char myname[] = "ADIOI_PFS_READDONE"; @@ -20,7 +20,7 @@ int ADIOI_PFS_ReadDone(ADIO_Request *request, ADIO_Status *status, if ((*request)->queued) done = _iodone(*((long *) (*request)->handle)); - else done = 1; /* ADIOI_Complete_Async completed this request, + else done = 1; /* ADIOI_Complete_Async completed this request, but request object was not freed. */ #ifdef HAVE_STATUS_SET_BYTES @@ -38,7 +38,7 @@ int ADIOI_PFS_ReadDone(ADIO_Request *request, ADIO_Status *status, ADIOI_Free_request((ADIOI_Req_node *) (*request)); *request = ADIO_REQUEST_NULL; } - + if (done == -1 && errno != 0) { *error_code = MPIO_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE, myname, __LINE__, MPI_ERR_IO, @@ -54,4 +54,4 @@ int ADIOI_PFS_WriteDone(ADIO_Request *request, ADIO_Status *status, int *error_code) { return ADIOI_PFS_ReadDone(request, status, error_code); -} +} diff --git a/ompi/mca/io/romio314/romio/adio/ad_pfs/ad_pfs_fcntl.c b/ompi/mca/io/romio314/romio/adio/ad_pfs/ad_pfs_fcntl.c index 4a2c0fd3827..619abe09322 100644 --- a/ompi/mca/io/romio314/romio/adio/ad_pfs/ad_pfs_fcntl.c +++ b/ompi/mca/io/romio314/romio/adio/ad_pfs/ad_pfs_fcntl.c @@ -1,7 +1,7 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */ -/* +/* * - * Copyright (C) 1997 University of Chicago. + * Copyright (C) 1997 University of Chicago. * See COPYRIGHT notice in top-level directory. */ @@ -18,10 +18,10 @@ void ADIOI_PFS_Fcntl(ADIO_File fd, int flag, ADIO_Fcntl_t *fcntl_struct, switch(flag) { case ADIO_FCNTL_GET_FSIZE: if (!(fd->atomicity)) { - /* in M_ASYNC mode, all processes are not aware of changes - in file size (although the manual says otherwise). Therefore, - temporarily change to M_UNIX and then change - back to M_ASYNC.*/ + /* in M_ASYNC mode, all processes are not aware of changes + in file size (although the manual says otherwise). Therefore, + temporarily change to M_UNIX and then change + back to M_ASYNC.*/ MPI_Comm_size(MPI_COMM_WORLD, &np_total); MPI_Comm_size(fd->comm, &np_comm); if (np_total == np_comm) { @@ -31,7 +31,7 @@ void ADIOI_PFS_Fcntl(ADIO_File fd, int flag, ADIO_Fcntl_t *fcntl_struct, /* else it is M_UNIX anyway, so no problem */ } fcntl_struct->fsize = lseek(fd->fd_sys, 0, SEEK_END); - if (fd->fp_sys_posn != -1) + if (fd->fp_sys_posn != -1) lseek(fd->fd_sys, fd->fp_sys_posn, SEEK_SET); *error_code = MPI_SUCCESS; break; diff --git a/ompi/mca/io/romio314/romio/adio/ad_pfs/ad_pfs_flush.c b/ompi/mca/io/romio314/romio/adio/ad_pfs/ad_pfs_flush.c index 98dedc099c3..9867a46628d 100644 --- a/ompi/mca/io/romio314/romio/adio/ad_pfs/ad_pfs_flush.c +++ b/ompi/mca/io/romio314/romio/adio/ad_pfs/ad_pfs_flush.c @@ -1,7 +1,7 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */ -/* +/* * - * Copyright (C) 1997 University of Chicago. + * Copyright (C) 1997 University of Chicago. * See COPYRIGHT notice in top-level directory. */ diff --git a/ompi/mca/io/romio314/romio/adio/ad_pfs/ad_pfs_hints.c b/ompi/mca/io/romio314/romio/adio/ad_pfs/ad_pfs_hints.c index 407a0eb7758..54676e1be39 100644 --- a/ompi/mca/io/romio314/romio/adio/ad_pfs/ad_pfs_hints.c +++ b/ompi/mca/io/romio314/romio/adio/ad_pfs/ad_pfs_hints.c @@ -1,7 +1,7 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */ -/* +/* * - * Copyright (C) 1997 University of Chicago. + * Copyright (C) 1997 University of Chicago. * See COPYRIGHT notice in top-level directory. */ @@ -15,16 +15,16 @@ void ADIOI_PFS_SetInfo(ADIO_File fd, MPI_Info users_info, int *error_code) int err, myrank, fd_sys, perm, amode, old_mask; if ( (fd->info) == MPI_INFO_NULL) { - /* This must be part of the open call. can set striping parameters - if necessary. */ + /* This must be part of the open call. can set striping parameters + if necessary. */ MPI_Info_create(&(fd->info)); - - /* has user specified striping or server buffering parameters + + /* has user specified striping or server buffering parameters and do they have the same value on all processes? */ if (users_info != MPI_INFO_NULL) { value = (char *) ADIOI_Malloc((MPI_MAX_INFO_VAL+1)*sizeof(char)); - ADIOI_Info_get(users_info, "striping_factor", MPI_MAX_INFO_VAL, + ADIOI_Info_get(users_info, "striping_factor", MPI_MAX_INFO_VAL, value, &flag); if (flag) { str_factor=atoi(value); @@ -40,7 +40,7 @@ void ADIOI_PFS_SetInfo(ADIO_File fd, MPI_Info users_info, int *error_code) /* --END ERROR HANDLING-- */ } - ADIOI_Info_get(users_info, "striping_unit", MPI_MAX_INFO_VAL, + ADIOI_Info_get(users_info, "striping_unit", MPI_MAX_INFO_VAL, value, &flag); if (flag) { str_unit=atoi(value); @@ -56,7 +56,7 @@ void ADIOI_PFS_SetInfo(ADIO_File fd, MPI_Info users_info, int *error_code) /* --END ERROR HANDLING-- */ } - ADIOI_Info_get(users_info, "start_iodevice", MPI_MAX_INFO_VAL, + ADIOI_Info_get(users_info, "start_iodevice", MPI_MAX_INFO_VAL, value, &flag); if (flag) { start_iodev=atoi(value); @@ -100,7 +100,7 @@ void ADIOI_PFS_SetInfo(ADIO_File fd, MPI_Info users_info, int *error_code) if (!err) { if (str_unit > 0) attr.s_sunitsize = str_unit; - if ((start_iodev >= 0) && + if ((start_iodev >= 0) && (start_iodev < attr.s_sfactor)) attr.s_start_sdir = start_iodev; if ((str_factor > 0) && (str_factor < attr.s_sfactor)) @@ -116,10 +116,10 @@ void ADIOI_PFS_SetInfo(ADIO_File fd, MPI_Info users_info, int *error_code) } /* Has user asked for pfs server buffering to be turned on? - If so, mark it as true in fd->info and turn it on in + If so, mark it as true in fd->info and turn it on in ADIOI_PFS_Open after the file is opened */ - ADIOI_Info_get(users_info, "pfs_svr_buf", MPI_MAX_INFO_VAL, + ADIOI_Info_get(users_info, "pfs_svr_buf", MPI_MAX_INFO_VAL, value, &flag); if (flag && (!strcmp(value, "true"))) ADIOI_Info_set(fd->info, "pfs_svr_buf", "true"); @@ -128,15 +128,15 @@ void ADIOI_PFS_SetInfo(ADIO_File fd, MPI_Info users_info, int *error_code) ADIOI_Free(value); } else ADIOI_Info_set(fd->info, "pfs_svr_buf", "false"); - + /* set the values for collective I/O and data sieving parameters */ ADIOI_GEN_SetInfo(fd, users_info, error_code); } - + else { /* The file has been opened previously and fd->fd_sys is a valid file descriptor. cannot set striping parameters now. */ - + /* set the values for collective I/O and data sieving parameters */ ADIOI_GEN_SetInfo(fd, users_info, error_code); @@ -144,22 +144,22 @@ void ADIOI_PFS_SetInfo(ADIO_File fd, MPI_Info users_info, int *error_code) if (users_info != MPI_INFO_NULL) { value = (char *) ADIOI_Malloc((MPI_MAX_INFO_VAL+1)*sizeof(char)); - ADIOI_Info_get(users_info, "pfs_svr_buf", MPI_MAX_INFO_VAL, + ADIOI_Info_get(users_info, "pfs_svr_buf", MPI_MAX_INFO_VAL, value, &flag); if (flag && (!strcmp(value, "true") || !strcmp(value, "false"))) { - value_in_fd = (char *) + value_in_fd = (char *) ADIOI_Malloc((MPI_MAX_INFO_VAL+1)*sizeof(char)); - ADIOI_Info_get(fd->info, "pfs_svr_buf", MPI_MAX_INFO_VAL, + ADIOI_Info_get(fd->info, "pfs_svr_buf", MPI_MAX_INFO_VAL, value_in_fd, &flag); if (strcmp(value, value_in_fd)) { if (!strcmp(value, "true")) { err = fcntl(fd->fd_sys, F_PFS_SVR_BUF, TRUE); - if (!err) + if (!err) ADIOI_Info_set(fd->info, "pfs_svr_buf", "true"); } else { err = fcntl(fd->fd_sys, F_PFS_SVR_BUF, FALSE); - if (!err) + if (!err) ADIOI_Info_set(fd->info, "pfs_svr_buf", "false"); } } @@ -169,6 +169,6 @@ void ADIOI_PFS_SetInfo(ADIO_File fd, MPI_Info users_info, int *error_code) } } - + *error_code = MPI_SUCCESS; } diff --git a/ompi/mca/io/romio314/romio/adio/ad_pfs/ad_pfs_iread.c b/ompi/mca/io/romio314/romio/adio/ad_pfs/ad_pfs_iread.c index 78b3c592fb7..8e278120471 100644 --- a/ompi/mca/io/romio314/romio/adio/ad_pfs/ad_pfs_iread.c +++ b/ompi/mca/io/romio314/romio/adio/ad_pfs/ad_pfs_iread.c @@ -1,13 +1,13 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */ -/* +/* * - * Copyright (C) 1997 University of Chicago. + * Copyright (C) 1997 University of Chicago. * See COPYRIGHT notice in top-level directory. */ #include "ad_pfs.h" -void ADIOI_PFS_IreadContig(ADIO_File fd, void *buf, int count, +void ADIOI_PFS_IreadContig(ADIO_File fd, void *buf, int count, MPI_Datatype datatype, int file_ptr_type, ADIO_Offset offset, ADIO_Request *request, int *error_code) @@ -62,7 +62,7 @@ void ADIOI_PFS_IreadContig(ADIO_File fd, void *buf, int count, return; } - if (file_ptr_type == ADIO_INDIVIDUAL) fd->fp_ind += len; + if (file_ptr_type == ADIO_INDIVIDUAL) fd->fp_ind += len; (*request)->queued = 1; (*request)->nbytes = len; diff --git a/ompi/mca/io/romio314/romio/adio/ad_pfs/ad_pfs_iwrite.c b/ompi/mca/io/romio314/romio/adio/ad_pfs/ad_pfs_iwrite.c index 5dda2fbdeb6..cf54cc4d715 100644 --- a/ompi/mca/io/romio314/romio/adio/ad_pfs/ad_pfs_iwrite.c +++ b/ompi/mca/io/romio314/romio/adio/ad_pfs/ad_pfs_iwrite.c @@ -1,15 +1,15 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */ -/* +/* * - * Copyright (C) 1997 University of Chicago. + * Copyright (C) 1997 University of Chicago. * See COPYRIGHT notice in top-level directory. */ #include "ad_pfs.h" -void ADIOI_PFS_IwriteContig(ADIO_File fd, void *buf, int count, +void ADIOI_PFS_IwriteContig(ADIO_File fd, void *buf, int count, MPI_Datatype datatype, int file_ptr_type, - ADIO_Offset offset, ADIO_Request *request, int *error_code) + ADIO_Offset offset, ADIO_Request *request, int *error_code) { long *id_sys; ADIO_Offset off; @@ -61,7 +61,7 @@ void ADIOI_PFS_IwriteContig(ADIO_File fd, void *buf, int count, return; } - if (file_ptr_type == ADIO_INDIVIDUAL) fd->fp_ind += len; + if (file_ptr_type == ADIO_INDIVIDUAL) fd->fp_ind += len; (*request)->queued = 1; (*request)->nbytes = len; diff --git a/ompi/mca/io/romio314/romio/adio/ad_pfs/ad_pfs_open.c b/ompi/mca/io/romio314/romio/adio/ad_pfs/ad_pfs_open.c index f814b7c0a14..86c14593106 100644 --- a/ompi/mca/io/romio314/romio/adio/ad_pfs/ad_pfs_open.c +++ b/ompi/mca/io/romio314/romio/adio/ad_pfs/ad_pfs_open.c @@ -1,7 +1,7 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */ -/* +/* * - * Copyright (C) 1997 University of Chicago. + * Copyright (C) 1997 University of Chicago. * See COPYRIGHT notice in top-level directory. */ @@ -36,7 +36,7 @@ void ADIOI_PFS_Open(ADIO_File fd, int *error_code) MPI_Comm_size(MPI_COMM_WORLD, &np_total); MPI_Comm_size(fd->comm, &np_comm); - if (np_total == np_comm) + if (np_total == np_comm) fd->fd_sys = _gopen(fd->filename, amode, M_ASYNC, perm); else fd->fd_sys = open(fd->filename, amode, perm); fd->fd_direct = -1; @@ -46,10 +46,10 @@ void ADIOI_PFS_Open(ADIO_File fd, int *error_code) /* if user has asked for pfs server buffering to be turned on, it will be set to true in fd->info in the earlier call - to ADIOI_PFS_SetInfo. Turn it on now, since we now have a + to ADIOI_PFS_SetInfo. Turn it on now, since we now have a valid file descriptor. */ - ADIOI_Info_get(fd->info, "pfs_svr_buf", MPI_MAX_INFO_VAL, + ADIOI_Info_get(fd->info, "pfs_svr_buf", MPI_MAX_INFO_VAL, value, &flag); if (flag && (!strcmp(value, "true"))) { err = fcntl(fd->fd_sys, F_PFS_SVR_BUF, TRUE); @@ -71,7 +71,7 @@ void ADIOI_PFS_Open(ADIO_File fd, int *error_code) } ADIOI_Free(value); - if (fd->access_mode & ADIO_APPEND) + if (fd->access_mode & ADIO_APPEND) fd->fp_ind = fd->fp_sys_posn = lseek(fd->fd_sys, 0, SEEK_END); } diff --git a/ompi/mca/io/romio314/romio/adio/ad_pfs/ad_pfs_read.c b/ompi/mca/io/romio314/romio/adio/ad_pfs/ad_pfs_read.c index bd3b7e70e47..d85b09d8792 100644 --- a/ompi/mca/io/romio314/romio/adio/ad_pfs/ad_pfs_read.c +++ b/ompi/mca/io/romio314/romio/adio/ad_pfs/ad_pfs_read.c @@ -1,13 +1,13 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */ -/* +/* * - * Copyright (C) 1997 University of Chicago. + * Copyright (C) 1997 University of Chicago. * See COPYRIGHT notice in top-level directory. */ #include "ad_pfs.h" -void ADIOI_PFS_ReadContig(ADIO_File fd, void *buf, int count, +void ADIOI_PFS_ReadContig(ADIO_File fd, void *buf, int count, MPI_Datatype datatype, int file_ptr_type, ADIO_Offset offset, ADIO_Status *status, int *error_code) { @@ -23,16 +23,16 @@ void ADIOI_PFS_ReadContig(ADIO_File fd, void *buf, int count, } err = _cread(fd->fd_sys, buf, len); fd->fp_sys_posn = offset + err; - /* individual file pointer not updated */ + /* individual file pointer not updated */ } else { /* read from curr. location of ind. file pointer */ if (fd->fp_sys_posn != fd->fp_ind) { lseek(fd->fd_sys, fd->fp_ind, SEEK_SET); } err = _cread(fd->fd_sys, buf, len); - fd->fp_ind += err; + fd->fp_ind += err; fd->fp_sys_posn = fd->fp_ind; - } + } #ifdef HAVE_STATUS_SET_BYTES if (err != -1) MPIR_Status_set_bytes(status, datatype, err); diff --git a/ompi/mca/io/romio314/romio/adio/ad_pfs/ad_pfs_wait.c b/ompi/mca/io/romio314/romio/adio/ad_pfs/ad_pfs_wait.c index e14159521a5..d8200aa5409 100644 --- a/ompi/mca/io/romio314/romio/adio/ad_pfs/ad_pfs_wait.c +++ b/ompi/mca/io/romio314/romio/adio/ad_pfs/ad_pfs_wait.c @@ -1,7 +1,7 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */ -/* +/* * - * Copyright (C) 1997 University of Chicago. + * Copyright (C) 1997 University of Chicago. * See COPYRIGHT notice in top-level directory. */ @@ -41,7 +41,7 @@ void ADIOI_PFS_ReadComplete(ADIO_Request *request, ADIO_Status *status, freed. This is used in ADIOI_Complete_async, because the user will call MPI_Wait later, which would require status to be filled. Ugly but works. queued = -1 should be used only - in ADIOI_Complete_async. + in ADIOI_Complete_async. This should not affect the user in any way. */ /* if request is still queued in the system, it is also there @@ -56,7 +56,7 @@ void ADIOI_PFS_ReadComplete(ADIO_Request *request, ADIO_Status *status, } -void ADIOI_PFS_WriteComplete(ADIO_Request *request, ADIO_Status *status, int *error_code) +void ADIOI_PFS_WriteComplete(ADIO_Request *request, ADIO_Status *status, int *error_code) { ADIOI_PFS_ReadComplete(request, status, error_code); } diff --git a/ompi/mca/io/romio314/romio/adio/ad_pfs/ad_pfs_write.c b/ompi/mca/io/romio314/romio/adio/ad_pfs/ad_pfs_write.c index c64e976a2b4..fe836a971a6 100644 --- a/ompi/mca/io/romio314/romio/adio/ad_pfs/ad_pfs_write.c +++ b/ompi/mca/io/romio314/romio/adio/ad_pfs/ad_pfs_write.c @@ -1,13 +1,13 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */ -/* +/* * - * Copyright (C) 1997 University of Chicago. + * Copyright (C) 1997 University of Chicago. * See COPYRIGHT notice in top-level directory. */ #include "ad_pfs.h" -void ADIOI_PFS_WriteContig(ADIO_File fd, void *buf, int count, +void ADIOI_PFS_WriteContig(ADIO_File fd, void *buf, int count, MPI_Datatype datatype, int file_ptr_type, ADIO_Offset offset, ADIO_Status *status, int *error_code) @@ -24,7 +24,7 @@ void ADIOI_PFS_WriteContig(ADIO_File fd, void *buf, int count, } err = _cwrite(fd->fd_sys, buf, len); fd->fp_sys_posn = offset + err; - /* individual file pointer not updated */ + /* individual file pointer not updated */ } else { /* write from curr. location of ind. file pointer */ if (fd->fp_sys_posn != fd->fp_ind) { diff --git a/ompi/mca/io/romio314/romio/adio/ad_piofs/ad_piofs.c b/ompi/mca/io/romio314/romio/adio/ad_piofs/ad_piofs.c index 29d8c3010d6..fbe59d10b46 100644 --- a/ompi/mca/io/romio314/romio/adio/ad_piofs/ad_piofs.c +++ b/ompi/mca/io/romio314/romio/adio/ad_piofs/ad_piofs.c @@ -1,7 +1,7 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */ -/* +/* * - * Copyright (C) 2001 University of Chicago. + * Copyright (C) 2001 University of Chicago. * See COPYRIGHT notice in top-level directory. */ @@ -33,5 +33,5 @@ struct ADIOI_Fns_struct ADIO_PIOFS_operations = { ADIOI_GEN_Flush, /* Flush */ ADIOI_GEN_Resize, /* Resize */ ADIOI_GEN_Delete, /* Delete */ - ADIOI_PIOFS_Feature, + ADIOI_PIOFS_Feature, }; diff --git a/ompi/mca/io/romio314/romio/adio/ad_piofs/ad_piofs.h b/ompi/mca/io/romio314/romio/adio/ad_piofs/ad_piofs.h index e9b74c9e872..01d2567d8a0 100644 --- a/ompi/mca/io/romio314/romio/adio/ad_piofs/ad_piofs.h +++ b/ompi/mca/io/romio314/romio/adio/ad_piofs/ad_piofs.h @@ -1,7 +1,7 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */ -/* +/* * - * Copyright (C) 1997 University of Chicago. + * Copyright (C) 1997 University of Chicago. * See COPYRIGHT notice in top-level directory. */ @@ -19,16 +19,16 @@ #include "adio.h" void ADIOI_PIOFS_Open(ADIO_File fd, int *error_code); -void ADIOI_PIOFS_ReadContig(ADIO_File fd, void *buf, int count, +void ADIOI_PIOFS_ReadContig(ADIO_File fd, void *buf, int count, MPI_Datatype datatype, int file_ptr_type, ADIO_Offset offset, ADIO_Status *status, int *error_code); -void ADIOI_PIOFS_WriteContig(ADIO_File fd, void *buf, int count, +void ADIOI_PIOFS_WriteContig(ADIO_File fd, void *buf, int count, MPI_Datatype datatype, int file_ptr_type, ADIO_Offset offset, ADIO_Status *status, int - *error_code); + *error_code); void ADIOI_PIOFS_Fcntl(ADIO_File fd, int flag, ADIO_Fcntl_t *fcntl_struct, int - *error_code); + *error_code); void ADIOI_PIOFS_WriteStrided(ADIO_File fd, void *buf, int count, MPI_Datatype datatype, int file_ptr_type, ADIO_Offset offset, ADIO_Status *status, int diff --git a/ompi/mca/io/romio314/romio/adio/ad_piofs/ad_piofs_fcntl.c b/ompi/mca/io/romio314/romio/adio/ad_piofs/ad_piofs_fcntl.c index 7d4a37cbfd5..80009aa44af 100644 --- a/ompi/mca/io/romio314/romio/adio/ad_piofs/ad_piofs_fcntl.c +++ b/ompi/mca/io/romio314/romio/adio/ad_piofs/ad_piofs_fcntl.c @@ -1,7 +1,7 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */ -/* +/* * - * Copyright (C) 1997 University of Chicago. + * Copyright (C) 1997 University of Chicago. * See COPYRIGHT notice in top-level directory. */ @@ -23,7 +23,7 @@ void ADIOI_PIOFS_Fcntl(ADIO_File fd, int flag, ADIO_Fcntl_t *fcntl_struct, int * switch(flag) { case ADIO_FCNTL_GET_FSIZE: fcntl_struct->fsize = llseek(fd->fd_sys, 0, SEEK_END); - if (fd->fp_sys_posn != -1) + if (fd->fp_sys_posn != -1) llseek(fd->fd_sys, fd->fp_sys_posn, SEEK_SET); if (fcntl_struct->fsize == -1) { #ifdef MPICH @@ -34,7 +34,7 @@ void ADIOI_PIOFS_Fcntl(ADIO_File fd, int flag, ADIO_Fcntl_t *fcntl_struct, int * #else /* MPICH-1 */ *error_code = MPIR_Err_setmsg(MPI_ERR_IO, MPIR_ADIO_ERROR, myname, "I/O Error", "%s", strerror(errno)); - ADIOI_Error(fd, *error_code, myname); + ADIOI_Error(fd, *error_code, myname); #endif } else *error_code = MPI_SUCCESS; @@ -45,12 +45,12 @@ void ADIOI_PIOFS_Fcntl(ADIO_File fd, int flag, ADIO_Fcntl_t *fcntl_struct, int * break; case ADIO_FCNTL_SET_ATOMICITY: - piofs_change_view = (piofs_change_view_t *) + piofs_change_view = (piofs_change_view_t *) ADIOI_Malloc(sizeof(piofs_change_view_t)); - piofs_change_view->Vbs = piofs_change_view->Vn = + piofs_change_view->Vbs = piofs_change_view->Vn = piofs_change_view->Hbs = piofs_change_view->Hn = 1; piofs_change_view->subfile = 0; - piofs_change_view->flags = (fcntl_struct->atomicity == 0) + piofs_change_view->flags = (fcntl_struct->atomicity == 0) ? (ACTIVE | NORMAL) : (ACTIVE | CAUTIOUS); err = piofsioctl(fd->fd_sys, PIOFS_CHANGE_VIEW, piofs_change_view); ADIOI_Free(piofs_change_view); @@ -64,7 +64,7 @@ void ADIOI_PIOFS_Fcntl(ADIO_File fd, int flag, ADIO_Fcntl_t *fcntl_struct, int * #else /* MPICH-1 */ *error_code = MPIR_Err_setmsg(MPI_ERR_IO, MPIR_ADIO_ERROR, myname, "I/O Error", "%s", strerror(errno)); - ADIOI_Error(fd, *error_code, myname); + ADIOI_Error(fd, *error_code, myname); #endif } else *error_code = MPI_SUCCESS; diff --git a/ompi/mca/io/romio314/romio/adio/ad_piofs/ad_piofs_hints.c b/ompi/mca/io/romio314/romio/adio/ad_piofs/ad_piofs_hints.c index 242ebb3f07d..391e9de0074 100644 --- a/ompi/mca/io/romio314/romio/adio/ad_piofs/ad_piofs_hints.c +++ b/ompi/mca/io/romio314/romio/adio/ad_piofs/ad_piofs_hints.c @@ -1,7 +1,7 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */ -/* +/* * - * Copyright (C) 1997 University of Chicago. + * Copyright (C) 1997 University of Chicago. * See COPYRIGHT notice in top-level directory. */ @@ -16,16 +16,16 @@ void ADIOI_PIOFS_SetInfo(ADIO_File fd, MPI_Info users_info, int *error_code) int err, myrank, perm, old_mask, nioservers; if ((fd->info) == MPI_INFO_NULL) { - /* This must be part of the open call. can set striping parameters - if necessary. */ + /* This must be part of the open call. can set striping parameters + if necessary. */ MPI_Info_create(&(fd->info)); - - /* has user specified striping parameters + + /* has user specified striping parameters and do they have the same value on all processes? */ if (users_info != MPI_INFO_NULL) { value = (char *) ADIOI_Malloc((MPI_MAX_INFO_VAL+1)*sizeof(char)); - ADIOI_Info_get(users_info, "striping_factor", MPI_MAX_INFO_VAL, + ADIOI_Info_get(users_info, "striping_factor", MPI_MAX_INFO_VAL, value, &flag); if (flag) { str_factor=atoi(value); @@ -37,7 +37,7 @@ void ADIOI_PIOFS_SetInfo(ADIO_File fd, MPI_Info users_info, int *error_code) } } - ADIOI_Info_get(users_info, "striping_unit", MPI_MAX_INFO_VAL, + ADIOI_Info_get(users_info, "striping_unit", MPI_MAX_INFO_VAL, value, &flag); if (flag) { str_unit=atoi(value); @@ -49,7 +49,7 @@ void ADIOI_PIOFS_SetInfo(ADIO_File fd, MPI_Info users_info, int *error_code) } } - ADIOI_Info_get(users_info, "start_iodevice", MPI_MAX_INFO_VAL, + ADIOI_Info_get(users_info, "start_iodevice", MPI_MAX_INFO_VAL, value, &flag); if (flag) { start_iodev=atoi(value); @@ -100,7 +100,7 @@ void ADIOI_PIOFS_SetInfo(ADIO_File fd, MPI_Info users_info, int *error_code) piofs_create.bsu = (str_unit > 0) ? str_unit : -1; piofs_create.cells = (str_factor > 0) ? str_factor : -1; piofs_create.permissions = perm; - piofs_create.base_node = (start_iodev >= 0) ? + piofs_create.base_node = (start_iodev >= 0) ? start_iodev : -1; piofs_create.flags = 0; @@ -109,8 +109,8 @@ void ADIOI_PIOFS_SetInfo(ADIO_File fd, MPI_Info users_info, int *error_code) MPI_Barrier(fd->comm); } } - } - + } + /* set the values for collective I/O and data sieving parameters */ ADIOI_GEN_SetInfo(fd, users_info, error_code); diff --git a/ompi/mca/io/romio314/romio/adio/ad_piofs/ad_piofs_open.c b/ompi/mca/io/romio314/romio/adio/ad_piofs/ad_piofs_open.c index e02e90cf325..67c771ecfad 100644 --- a/ompi/mca/io/romio314/romio/adio/ad_piofs/ad_piofs_open.c +++ b/ompi/mca/io/romio314/romio/adio/ad_piofs/ad_piofs_open.c @@ -1,7 +1,7 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */ -/* +/* * - * Copyright (C) 1997 University of Chicago. + * Copyright (C) 1997 University of Chicago. * See COPYRIGHT notice in top-level directory. */ @@ -72,7 +72,7 @@ void ADIOI_PIOFS_Open(ADIO_File fd, int *error_code) #else /* MPICH-1 */ *error_code = MPIR_Err_setmsg(MPI_ERR_IO, MPIR_ADIO_ERROR, myname, "I/O Error", "%s", strerror(errno)); - ADIOI_Error(ADIO_FILE_NULL, *error_code, myname); + ADIOI_Error(ADIO_FILE_NULL, *error_code, myname); #endif } else *error_code = MPI_SUCCESS; diff --git a/ompi/mca/io/romio314/romio/adio/ad_piofs/ad_piofs_read.c b/ompi/mca/io/romio314/romio/adio/ad_piofs/ad_piofs_read.c index 278548656e3..79a91218100 100644 --- a/ompi/mca/io/romio314/romio/adio/ad_piofs/ad_piofs_read.c +++ b/ompi/mca/io/romio314/romio/adio/ad_piofs/ad_piofs_read.c @@ -1,13 +1,13 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */ -/* +/* * - * Copyright (C) 1997 University of Chicago. + * Copyright (C) 1997 University of Chicago. * See COPYRIGHT notice in top-level directory. */ #include "ad_piofs.h" -void ADIOI_PIOFS_ReadContig(ADIO_File fd, void *buf, int count, +void ADIOI_PIOFS_ReadContig(ADIO_File fd, void *buf, int count, MPI_Datatype datatype, int file_ptr_type, ADIO_Offset offset, ADIO_Status *status, int *error_code) { @@ -25,16 +25,16 @@ void ADIOI_PIOFS_ReadContig(ADIO_File fd, void *buf, int count, } err = read(fd->fd_sys, buf, len); fd->fp_sys_posn = offset + err; - /* individual file pointer not updated */ + /* individual file pointer not updated */ } else { /* read from curr. location of ind. file pointer */ if (fd->fp_sys_posn != fd->fp_ind) { llseek(fd->fd_sys, fd->fp_ind, SEEK_SET); } err = read(fd->fd_sys, buf, len); - fd->fp_ind += err; + fd->fp_ind += err; fd->fp_sys_posn = fd->fp_ind; - } + } #ifdef HAVE_STATUS_SET_BYTES if (err != -1) MPIR_Status_set_bytes(status, datatype, err); @@ -49,7 +49,7 @@ void ADIOI_PIOFS_ReadContig(ADIO_File fd, void *buf, int count, #else *error_code = MPIR_Err_setmsg(MPI_ERR_IO, MPIR_ADIO_ERROR, myname, "I/O Error", "%s", strerror(errno)); - ADIOI_Error(fd, *error_code, myname); + ADIOI_Error(fd, *error_code, myname); #endif } else *error_code = MPI_SUCCESS; diff --git a/ompi/mca/io/romio314/romio/adio/ad_piofs/ad_piofs_write.c b/ompi/mca/io/romio314/romio/adio/ad_piofs/ad_piofs_write.c index 4e1c7f11590..4d0b4b99230 100644 --- a/ompi/mca/io/romio314/romio/adio/ad_piofs/ad_piofs_write.c +++ b/ompi/mca/io/romio314/romio/adio/ad_piofs/ad_piofs_write.c @@ -1,14 +1,14 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */ -/* +/* * - * Copyright (C) 1997 University of Chicago. + * Copyright (C) 1997 University of Chicago. * See COPYRIGHT notice in top-level directory. */ #include "ad_piofs.h" #include "adio_extern.h" -void ADIOI_PIOFS_WriteContig(ADIO_File fd, void *buf, int count, +void ADIOI_PIOFS_WriteContig(ADIO_File fd, void *buf, int count, MPI_Datatype datatype, int file_ptr_type, ADIO_Offset offset, ADIO_Status *status, int *error_code) { @@ -26,7 +26,7 @@ void ADIOI_PIOFS_WriteContig(ADIO_File fd, void *buf, int count, } err = write(fd->fd_sys, buf, len); fd->fp_sys_posn = offset + err; - /* individual file pointer not updated */ + /* individual file pointer not updated */ } else { /* write from curr. location of ind. file pointer */ if (fd->fp_sys_posn != fd->fp_ind) { @@ -97,7 +97,7 @@ void ADIOI_PIOFS_WriteStrided(ADIO_File fd, void *buf, int count, #ifdef HAVE_STATUS_SET_BYTES MPIR_Status_set_bytes(status, datatype, 0); #endif - *error_code = MPI_SUCCESS; + *error_code = MPI_SUCCESS; return; } @@ -105,7 +105,7 @@ void ADIOI_PIOFS_WriteStrided(ADIO_File fd, void *buf, int count, MPI_Type_size_x(datatype, &buftype_size); MPI_Type_extent(datatype, &buftype_extent); etype_size = fd->etype_size; - + bufsize = buftype_size * count; if (!buftype_is_contig && filetype_is_contig) { @@ -127,10 +127,10 @@ void ADIOI_PIOFS_WriteStrided(ADIO_File fd, void *buf, int count, else off = llseek(fd->fd_sys, fd->fp_ind, SEEK_SET); k = 0; - for (j=0; jcount; i++) { iov[k].iov_base = ((char *) buf) + j*buftype_extent + - flat_buf->indices[i]; + flat_buf->indices[i]; iov[k].iov_len = flat_buf->blocklens[i]; /*FPRINTF(stderr, "%d %d\n", iov[k].iov_base, iov[k].iov_len);*/ @@ -155,7 +155,7 @@ void ADIOI_PIOFS_WriteStrided(ADIO_File fd, void *buf, int count, #ifdef MPICH *error_code = MPIR_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE, myname, __LINE__, MPI_ERR_IO, "**io", "**io %s", strerror(errno)); -#elif defined(PRINT_ERR_MSG) +#elif defined(PRINT_ERR_MSG) *error_code = MPI_ERR_UNKNOWN; #else /* MPICH-1 */ *error_code = MPIR_Err_setmsg(MPI_ERR_IO, MPIR_ADIO_ERROR, @@ -184,11 +184,11 @@ void ADIOI_PIOFS_WriteStrided(ADIO_File fd, void *buf, int count, while (!flag) { n_filetypes++; for (i=0; icount; i++) { - if (disp + flat_file->indices[i] + - (ADIO_Offset) n_filetypes*filetype_extent + flat_file->blocklens[i] + if (disp + flat_file->indices[i] + + (ADIO_Offset) n_filetypes*filetype_extent + flat_file->blocklens[i] >= offset) { st_index = i; - fwr_size = disp + flat_file->indices[i] + + fwr_size = disp + flat_file->indices[i] + (ADIO_Offset) n_filetypes*filetype_extent + flat_file->blocklens[i] - offset; flag = 1; @@ -202,7 +202,7 @@ void ADIOI_PIOFS_WriteStrided(ADIO_File fd, void *buf, int count, n_filetypes = (int) (offset / n_etypes_in_filetype); etype_in_filetype = (int) (offset % n_etypes_in_filetype); size_in_filetype = etype_in_filetype * etype_size; - + sum = 0; for (i=0; icount; i++) { sum += flat_file->blocklens[i]; @@ -229,9 +229,9 @@ void ADIOI_PIOFS_WriteStrided(ADIO_File fd, void *buf, int count, off = offset; fwr_size = ADIOI_MIN(fwr_size, bufsize); while (i < bufsize) { - if (fwr_size) { - /* TYPE_UB and TYPE_LB can result in - fwr_size = 0. save system call in such cases */ + if (fwr_size) { + /* TYPE_UB and TYPE_LB can result in + fwr_size = 0. save system call in such cases */ llseek(fd->fd_sys, off, SEEK_SET); err = write(fd->fd_sys, ((char *) buf) + i, fwr_size); if (err == -1) err_flag = 1; @@ -249,7 +249,7 @@ void ADIOI_PIOFS_WriteStrided(ADIO_File fd, void *buf, int count, j = 0; n_filetypes++; } - off = disp + flat_file->indices[j] + + off = disp + flat_file->indices[j] + (ADIO_Offset) n_filetypes*filetype_extent; fwr_size = ADIOI_MIN(flat_file->blocklens[j], bufsize-i); } @@ -287,7 +287,7 @@ void ADIOI_PIOFS_WriteStrided(ADIO_File fd, void *buf, int count, n_filetypes++; } - off = disp + flat_file->indices[j] + + off = disp + flat_file->indices[j] + (ADIO_Offset) n_filetypes*filetype_extent; new_fwr_size = flat_file->blocklens[j]; @@ -303,7 +303,7 @@ void ADIOI_PIOFS_WriteStrided(ADIO_File fd, void *buf, int count, k = (k + 1)%flat_buf->count; buf_count++; indx = buftype_extent*(buf_count/flat_buf->count) + - flat_buf->indices[k]; + flat_buf->indices[k]; new_bwr_size = flat_buf->blocklens[k]; if (size != fwr_size) { off += size; @@ -330,13 +330,13 @@ void ADIOI_PIOFS_WriteStrided(ADIO_File fd, void *buf, int count, #endif } else *error_code = MPI_SUCCESS; - } + } fd->fp_sys_posn = -1; /* set it to null. */ #ifdef HAVE_STATUS_SET_BYTES MPIR_Status_set_bytes(status, datatype, bufsize); -/* This is a temporary way of filling in status. The right way is to +/* This is a temporary way of filling in status. The right way is to keep track of how much data was actually written by ADIOI_BUFFERED_WRITE. */ #endif diff --git a/ompi/mca/io/romio314/romio/adio/ad_pvfs/ad_pvfs.c b/ompi/mca/io/romio314/romio/adio/ad_pvfs/ad_pvfs.c index 92b6df63e9f..1d83cca6e6b 100644 --- a/ompi/mca/io/romio314/romio/adio/ad_pvfs/ad_pvfs.c +++ b/ompi/mca/io/romio314/romio/adio/ad_pvfs/ad_pvfs.c @@ -1,7 +1,7 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */ -/* +/* * - * Copyright (C) 2001 University of Chicago. + * Copyright (C) 2001 University of Chicago. * See COPYRIGHT notice in top-level directory. */ diff --git a/ompi/mca/io/romio314/romio/adio/ad_pvfs/ad_pvfs.h b/ompi/mca/io/romio314/romio/adio/ad_pvfs/ad_pvfs.h index 88e1a9f2253..276f1076dc1 100644 --- a/ompi/mca/io/romio314/romio/adio/ad_pvfs/ad_pvfs.h +++ b/ompi/mca/io/romio314/romio/adio/ad_pvfs/ad_pvfs.h @@ -1,7 +1,7 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */ -/* +/* * - * Copyright (C) 1997 University of Chicago. + * Copyright (C) 1997 University of Chicago. * See COPYRIGHT notice in top-level directory. */ @@ -27,16 +27,16 @@ typedef long long int int64_t; void ADIOI_PVFS_Open(ADIO_File fd, int *error_code); void ADIOI_PVFS_Close(ADIO_File fd, int *error_code); -void ADIOI_PVFS_ReadContig(ADIO_File fd, void *buf, int count, +void ADIOI_PVFS_ReadContig(ADIO_File fd, void *buf, int count, MPI_Datatype datatype, int file_ptr_type, ADIO_Offset offset, ADIO_Status *status, int *error_code); -void ADIOI_PVFS_WriteContig(ADIO_File fd, void *buf, int count, +void ADIOI_PVFS_WriteContig(ADIO_File fd, void *buf, int count, MPI_Datatype datatype, int file_ptr_type, ADIO_Offset offset, ADIO_Status *status, int - *error_code); + *error_code); void ADIOI_PVFS_Fcntl(ADIO_File fd, int flag, ADIO_Fcntl_t *fcntl_struct, int - *error_code); + *error_code); void ADIOI_PVFS_WriteStrided(ADIO_File fd, void *buf, int count, MPI_Datatype datatype, int file_ptr_type, ADIO_Offset offset, ADIO_Status *status, int diff --git a/ompi/mca/io/romio314/romio/adio/ad_pvfs/ad_pvfs_close.c b/ompi/mca/io/romio314/romio/adio/ad_pvfs/ad_pvfs_close.c index c2da2e360ef..4766b9b2e72 100644 --- a/ompi/mca/io/romio314/romio/adio/ad_pvfs/ad_pvfs_close.c +++ b/ompi/mca/io/romio314/romio/adio/ad_pvfs/ad_pvfs_close.c @@ -1,7 +1,7 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */ -/* +/* * - * Copyright (C) 1997 University of Chicago. + * Copyright (C) 1997 University of Chicago. * See COPYRIGHT notice in top-level directory. */ diff --git a/ompi/mca/io/romio314/romio/adio/ad_pvfs/ad_pvfs_delete.c b/ompi/mca/io/romio314/romio/adio/ad_pvfs/ad_pvfs_delete.c index 0e322ad32ae..bb6592aaff3 100644 --- a/ompi/mca/io/romio314/romio/adio/ad_pvfs/ad_pvfs_delete.c +++ b/ompi/mca/io/romio314/romio/adio/ad_pvfs/ad_pvfs_delete.c @@ -1,7 +1,7 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */ -/* +/* * - * Copyright (C) 1997 University of Chicago. + * Copyright (C) 1997 University of Chicago. * See COPYRIGHT notice in top-level directory. */ diff --git a/ompi/mca/io/romio314/romio/adio/ad_pvfs/ad_pvfs_fcntl.c b/ompi/mca/io/romio314/romio/adio/ad_pvfs/ad_pvfs_fcntl.c index eeff2507189..d13760b74fc 100644 --- a/ompi/mca/io/romio314/romio/adio/ad_pvfs/ad_pvfs_fcntl.c +++ b/ompi/mca/io/romio314/romio/adio/ad_pvfs/ad_pvfs_fcntl.c @@ -1,7 +1,7 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */ -/* +/* * - * Copyright (C) 1997 University of Chicago. + * Copyright (C) 1997 University of Chicago. * See COPYRIGHT notice in top-level directory. */ @@ -66,7 +66,7 @@ void ADIOI_PVFS_Fcntl(ADIO_File fd, int flag, ADIO_Fcntl_t *fcntl_struct, myname, __LINE__, MPI_ERR_ARG, "**flag", "**flag %d", flag); - return; + return; /* --END ERROR HANDLING-- */ } } diff --git a/ompi/mca/io/romio314/romio/adio/ad_pvfs/ad_pvfs_flush.c b/ompi/mca/io/romio314/romio/adio/ad_pvfs/ad_pvfs_flush.c index 340f0cb3395..9b2ac4ecd78 100644 --- a/ompi/mca/io/romio314/romio/adio/ad_pvfs/ad_pvfs_flush.c +++ b/ompi/mca/io/romio314/romio/adio/ad_pvfs/ad_pvfs_flush.c @@ -1,7 +1,7 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */ -/* +/* * - * Copyright (C) 1997 University of Chicago. + * Copyright (C) 1997 University of Chicago. * See COPYRIGHT notice in top-level directory. */ @@ -19,7 +19,7 @@ void ADIOI_PVFS_Flush(ADIO_File fd, int *error_code) */ MPI_Comm_rank(fd->comm, &rank); - MPI_Reduce(&dummy_in, &dummy, 1, MPI_INT, MPI_SUM, + MPI_Reduce(&dummy_in, &dummy, 1, MPI_INT, MPI_SUM, fd->hints->ranklist[0], fd->comm); if (rank == fd->hints->ranklist[0]) { err = pvfs_fsync(fd->fd_sys); diff --git a/ompi/mca/io/romio314/romio/adio/ad_pvfs/ad_pvfs_hints.c b/ompi/mca/io/romio314/romio/adio/ad_pvfs/ad_pvfs_hints.c index fdc06ed8465..f64edd13d0f 100644 --- a/ompi/mca/io/romio314/romio/adio/ad_pvfs/ad_pvfs_hints.c +++ b/ompi/mca/io/romio314/romio/adio/ad_pvfs/ad_pvfs_hints.c @@ -1,7 +1,7 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */ -/* +/* * - * Copyright (C) 1997 University of Chicago. + * Copyright (C) 1997 University of Chicago. * See COPYRIGHT notice in top-level directory. */ @@ -10,24 +10,24 @@ void ADIOI_PVFS_SetInfo(ADIO_File fd, MPI_Info users_info, int *error_code) { char *value; - int flag, tmp_val, str_factor=-1, str_unit=-1, start_iodev=-1; + int flag, tmp_val, str_factor=-1, str_unit=-1, start_iodev=-1; static char myname[] = "ADIOI_PVFS_SETINFO"; if ((fd->info) == MPI_INFO_NULL) { - /* This must be part of the open call. can set striping parameters - if necessary. */ + /* This must be part of the open call. can set striping parameters + if necessary. */ MPI_Info_create(&(fd->info)); ADIOI_Info_set(fd->info, "romio_pvfs_listio_read", "disable"); ADIOI_Info_set(fd->info, "romio_pvfs_listio_write", "disable"); fd->hints->fs_hints.pvfs.listio_read = ADIOI_HINT_DISABLE; fd->hints->fs_hints.pvfs.listio_write = ADIOI_HINT_DISABLE; - + /* has user specified any pvfs-specific hints (striping params, listio) and do they have the same value on all processes? */ if (users_info != MPI_INFO_NULL) { value = (char *) ADIOI_Malloc((MPI_MAX_INFO_VAL+1)*sizeof(char)); - ADIOI_Info_get(users_info, "striping_factor", MPI_MAX_INFO_VAL, + ADIOI_Info_get(users_info, "striping_factor", MPI_MAX_INFO_VAL, value, &flag); if (flag) { str_factor=atoi(value); @@ -44,7 +44,7 @@ void ADIOI_PVFS_SetInfo(ADIO_File fd, MPI_Info users_info, int *error_code) else ADIOI_Info_set(fd->info, "striping_factor", value); } - ADIOI_Info_get(users_info, "striping_unit", MPI_MAX_INFO_VAL, + ADIOI_Info_get(users_info, "striping_unit", MPI_MAX_INFO_VAL, value, &flag); if (flag) { str_unit=atoi(value); @@ -61,7 +61,7 @@ void ADIOI_PVFS_SetInfo(ADIO_File fd, MPI_Info users_info, int *error_code) else ADIOI_Info_set(fd->info, "striping_unit", value); } - ADIOI_Info_get(users_info, "start_iodevice", MPI_MAX_INFO_VAL, + ADIOI_Info_get(users_info, "start_iodevice", MPI_MAX_INFO_VAL, value, &flag); if (flag) { start_iodev=atoi(value); @@ -82,16 +82,16 @@ void ADIOI_PVFS_SetInfo(ADIO_File fd, MPI_Info users_info, int *error_code) MPI_MAX_INFO_VAL, value, &flag); if (flag) { - if ( !strcmp(value, "enable") || !strcmp(value, "ENABLE")) + if ( !strcmp(value, "enable") || !strcmp(value, "ENABLE")) { ADIOI_Info_set(fd->info, "romio_pvfs_listio_read", value); fd->hints->fs_hints.pvfs.listio_read = ADIOI_HINT_ENABLE; - } else if ( !strcmp(value, "disable") || !strcmp(value, "DISABLE")) + } else if ( !strcmp(value, "disable") || !strcmp(value, "DISABLE")) { ADIOI_Info_set(fd->info , "romio_pvfs_listio_read", value); fd->hints->fs_hints.pvfs.listio_read = ADIOI_HINT_DISABLE; } - else if ( !strcmp(value, "automatic") || !strcmp(value, "AUTOMATIC")) + else if ( !strcmp(value, "automatic") || !strcmp(value, "AUTOMATIC")) { ADIOI_Info_set(fd->info, "romio_pvfs_listio_read", value); fd->hints->fs_hints.pvfs.listio_read = ADIOI_HINT_AUTO; @@ -110,16 +110,16 @@ void ADIOI_PVFS_SetInfo(ADIO_File fd, MPI_Info users_info, int *error_code) ADIOI_Info_get(users_info, "romio_pvfs_listio_write", MPI_MAX_INFO_VAL, value, &flag); if (flag) { - if ( !strcmp(value, "enable") || !strcmp(value, "ENABLE")) + if ( !strcmp(value, "enable") || !strcmp(value, "ENABLE")) { ADIOI_Info_set(fd->info, "romio_pvfs_listio_write", value); fd->hints->fs_hints.pvfs.listio_write = ADIOI_HINT_ENABLE; - } else if ( !strcmp(value, "disable") || !strcmp(value, "DISABLE")) + } else if ( !strcmp(value, "disable") || !strcmp(value, "DISABLE")) { ADIOI_Info_set(fd->info, "romio_pvfs_listio_write", value); fd->hints->fs_hints.pvfs.listio_write = ADIOI_HINT_DISABLE; } - else if ( !strcmp(value, "automatic") || !strcmp(value, "AUTOMATIC")) + else if ( !strcmp(value, "automatic") || !strcmp(value, "AUTOMATIC")) { ADIOI_Info_set(fd->info, "romio_pvfs_listio_write", value); fd->hints->fs_hints.pvfs.listio_write = ADIOI_HINT_AUTO; @@ -134,10 +134,10 @@ void ADIOI_PVFS_SetInfo(ADIO_File fd, MPI_Info users_info, int *error_code) return; /* --END ERROR HANDLING-- */ } - } + } ADIOI_Free(value); } - } + } /* set the values for collective I/O and data sieving parameters */ ADIOI_GEN_SetInfo(fd, users_info, error_code); diff --git a/ompi/mca/io/romio314/romio/adio/ad_pvfs/ad_pvfs_open.c b/ompi/mca/io/romio314/romio/adio/ad_pvfs/ad_pvfs_open.c index c4fa28805da..cef52445536 100644 --- a/ompi/mca/io/romio314/romio/adio/ad_pvfs/ad_pvfs_open.c +++ b/ompi/mca/io/romio314/romio/adio/ad_pvfs/ad_pvfs_open.c @@ -1,7 +1,7 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */ -/* +/* * - * Copyright (C) 1997 University of Chicago. + * Copyright (C) 1997 University of Chicago. * See COPYRIGHT notice in top-level directory. */ @@ -37,15 +37,15 @@ void ADIOI_PVFS_Open(ADIO_File fd, int *error_code) value = (char *) ADIOI_Malloc((MPI_MAX_INFO_VAL+1)*sizeof(char)); - ADIOI_Info_get(fd->info, "striping_factor", MPI_MAX_INFO_VAL, + ADIOI_Info_get(fd->info, "striping_factor", MPI_MAX_INFO_VAL, value, &flag); if (flag && (atoi(value) > 0)) pstat.pcount = atoi(value); - ADIOI_Info_get(fd->info, "striping_unit", MPI_MAX_INFO_VAL, + ADIOI_Info_get(fd->info, "striping_unit", MPI_MAX_INFO_VAL, value, &flag); if (flag && (atoi(value) > 0)) pstat.ssize = atoi(value); - ADIOI_Info_get(fd->info, "start_iodevice", MPI_MAX_INFO_VAL, + ADIOI_Info_get(fd->info, "start_iodevice", MPI_MAX_INFO_VAL, value, &flag); if (flag && (atoi(value) >= 0)) pstat.base = atoi(value); diff --git a/ompi/mca/io/romio314/romio/adio/ad_pvfs/ad_pvfs_read.c b/ompi/mca/io/romio314/romio/adio/ad_pvfs/ad_pvfs_read.c index 71b558a5d1f..4b06984c24e 100644 --- a/ompi/mca/io/romio314/romio/adio/ad_pvfs/ad_pvfs_read.c +++ b/ompi/mca/io/romio314/romio/adio/ad_pvfs/ad_pvfs_read.c @@ -1,7 +1,7 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */ -/* +/* * - * Copyright (C) 1997 University of Chicago. + * Copyright (C) 1997 University of Chicago. * See COPYRIGHT notice in top-level directory. */ @@ -16,7 +16,7 @@ void ADIOI_PVFS_ReadStridedListIO(ADIO_File fd, void *buf, int count, *error_code); #endif -void ADIOI_PVFS_ReadContig(ADIO_File fd, void *buf, int count, +void ADIOI_PVFS_ReadContig(ADIO_File fd, void *buf, int count, MPI_Datatype datatype, int file_ptr_type, ADIO_Offset offset, ADIO_Status *status, int *error_code) { @@ -45,7 +45,7 @@ void ADIOI_PVFS_ReadContig(ADIO_File fd, void *buf, int count, #endif if (err>0) fd->fp_sys_posn = offset + err; - /* individual file pointer not updated */ + /* individual file pointer not updated */ } else { /* read from curr. location of ind. file pointer */ if (fd->fp_sys_posn != fd->fp_ind) { @@ -65,9 +65,9 @@ void ADIOI_PVFS_ReadContig(ADIO_File fd, void *buf, int count, MPE_Log_event( ADIOI_MPE_read_b, 0, NULL ); #endif if (err > 0) - fd->fp_ind += err; + fd->fp_ind += err; fd->fp_sys_posn = fd->fp_ind; - } + } #ifdef HAVE_STATUS_SET_BYTES if (err != -1) MPIR_Status_set_bytes(status, datatype, err); @@ -115,7 +115,7 @@ void ADIOI_PVFS_ReadStridedListIO(ADIO_File fd, void *buf, int count, int n_filetypes, etype_in_filetype; ADIO_Offset abs_off_in_filetype=0; MPI_Count filetype_size, etype_size, buftype_size; - MPI_Aint filetype_extent, buftype_extent; + MPI_Aint filetype_extent, buftype_extent; int buf_count, buftype_is_contig, filetype_is_contig; ADIO_Offset userbuf_off; ADIO_Offset off, disp, start_off; @@ -154,7 +154,7 @@ void ADIOI_PVFS_ReadStridedListIO(ADIO_File fd, void *buf, int count, #ifdef HAVE_STATUS_SET_BYTES MPIR_Status_set_bytes(status, datatype, 0); #endif - *error_code = MPI_SUCCESS; + *error_code = MPI_SUCCESS; return; } @@ -175,7 +175,7 @@ void ADIOI_PVFS_ReadStridedListIO(ADIO_File fd, void *buf, int count, flat_buf = ADIOI_Flatlist; while (flat_buf->type != datatype) flat_buf = flat_buf->next; - off = (file_ptr_type == ADIO_INDIVIDUAL) ? fd->fp_ind : + off = (file_ptr_type == ADIO_INDIVIDUAL) ? fd->fp_ind : fd->disp + etype_size * offset; file_list_count = 1; @@ -195,9 +195,9 @@ void ADIOI_PVFS_ReadStridedListIO(ADIO_File fd, void *buf, int count, /* step through each block in memory, filling memory arrays */ while (b_blks_read < total_blks_to_read) { for (i=0; icount; i++) { - mem_offsets[b_blks_read % MAX_ARRAY_SIZE] = + mem_offsets[b_blks_read % MAX_ARRAY_SIZE] = (char*)((char *)buf + j*buftype_extent + flat_buf->indices[i]); - mem_lengths[b_blks_read % MAX_ARRAY_SIZE] = + mem_lengths[b_blks_read % MAX_ARRAY_SIZE] = flat_buf->blocklens[i]; file_lengths += flat_buf->blocklens[i]; b_blks_read++; @@ -215,13 +215,13 @@ void ADIOI_PVFS_ReadStridedListIO(ADIO_File fd, void *buf, int count, pvfs_read_list(fd->fd_sys ,mem_list_count, mem_offsets, mem_lengths, file_list_count, &file_offsets, &file_lengths); - + /* in the case of the last read list call, leave here */ if (b_blks_read == total_blks_to_read) break; file_offsets += file_lengths; file_lengths = 0; - } + } } /* for (i=0; icount; i++) */ j++; } /* while (b_blks_read < total_blks_to_read) */ @@ -264,11 +264,11 @@ void ADIOI_PVFS_ReadStridedListIO(ADIO_File fd, void *buf, int count, while (!flag) { n_filetypes++; for (i=0; icount; i++) { - if (disp + flat_file->indices[i] + + if (disp + flat_file->indices[i] + (ADIO_Offset) n_filetypes*filetype_extent + flat_file->blocklens[i] >= offset) { st_index = i; - frd_size = (int) (disp + flat_file->indices[i] + + frd_size = (int) (disp + flat_file->indices[i] + (ADIO_Offset) n_filetypes*filetype_extent + flat_file->blocklens[i] - offset); flag = 1; @@ -282,7 +282,7 @@ void ADIOI_PVFS_ReadStridedListIO(ADIO_File fd, void *buf, int count, n_filetypes = (int) (offset / n_etypes_in_filetype); etype_in_filetype = (int) (offset % n_etypes_in_filetype); size_in_filetype = etype_in_filetype * etype_size; - + sum = 0; for (i=0; icount; i++) { sum += flat_file->blocklens[i]; @@ -294,16 +294,16 @@ void ADIOI_PVFS_ReadStridedListIO(ADIO_File fd, void *buf, int count, break; } } - + /* abs. offset in bytes in the file */ - offset = disp + (ADIO_Offset) n_filetypes*filetype_extent + + offset = disp + (ADIO_Offset) n_filetypes*filetype_extent + abs_off_in_filetype; } /* else [file_ptr_type != ADIO_INDIVIDUAL] */ start_off = offset; st_frd_size = frd_size; st_n_filetypes = n_filetypes; - + if (buftype_is_contig && !filetype_is_contig) { /* contiguous in memory, noncontiguous in file. should be the most @@ -311,13 +311,13 @@ void ADIOI_PVFS_ReadStridedListIO(ADIO_File fd, void *buf, int count, int mem_lengths; char *mem_offsets; - + i = 0; j = st_index; n_filetypes = st_n_filetypes; - + mem_list_count = 1; - + /* determine how many blocks in file to read */ f_data_read = ADIOI_MIN(st_frd_size, bufsize); total_blks_to_read = 1; @@ -326,17 +326,17 @@ void ADIOI_PVFS_ReadStridedListIO(ADIO_File fd, void *buf, int count, f_data_read += flat_file->blocklens[j]; total_blks_to_read++; if (j<(flat_file->count-1)) j++; - else j = 0; + else j = 0; } - + j = st_index; n_filetypes = st_n_filetypes; n_read_lists = total_blks_to_read/MAX_ARRAY_SIZE; extra_blks = total_blks_to_read%MAX_ARRAY_SIZE; - + mem_offsets = buf; mem_lengths = 0; - + /* if at least one full readlist, allocate file arrays at max array size and don't free until very end */ if (n_read_lists) { @@ -353,7 +353,7 @@ void ADIOI_PVFS_ReadStridedListIO(ADIO_File fd, void *buf, int count, file_lengths = (int32_t*)ADIOI_Malloc(extra_blks* sizeof(int32_t)); } - + /* for file arrays that are of MAX_ARRAY_SIZE, build arrays */ for (i=0; itype != datatype) flat_buf = flat_buf->next; @@ -430,7 +430,7 @@ void ADIOI_PVFS_ReadStridedListIO(ADIO_File fd, void *buf, int count, max_mem_list = 0; max_file_list = 0; - /* run through and file max_file_list and max_mem_list so that you + /* run through and file max_file_list and max_mem_list so that you can allocate the file and memory arrays less than MAX_ARRAY_SIZE if possible */ @@ -438,7 +438,7 @@ void ADIOI_PVFS_ReadStridedListIO(ADIO_File fd, void *buf, int count, k = start_k; new_buffer_read = 0; mem_list_count = 0; - while ((mem_list_count < MAX_ARRAY_SIZE) && + while ((mem_list_count < MAX_ARRAY_SIZE) && (new_buffer_read < bufsize-size_read)) { /* find mem_list_count and file_list_count such that both are less than MAX_ARRAY_SIZE, the sum of their lengths are @@ -446,9 +446,9 @@ void ADIOI_PVFS_ReadStridedListIO(ADIO_File fd, void *buf, int count, read in the next immediate read list is less than bufsize */ if(mem_list_count) { - if((new_buffer_read + flat_buf->blocklens[k] + + if((new_buffer_read + flat_buf->blocklens[k] + size_read) > bufsize) { - end_brd_size = new_buffer_read + + end_brd_size = new_buffer_read + flat_buf->blocklens[k] - (bufsize - size_read); new_buffer_read = bufsize - size_read; } @@ -466,15 +466,15 @@ void ADIOI_PVFS_ReadStridedListIO(ADIO_File fd, void *buf, int count, } mem_list_count++; k = (k + 1)%flat_buf->count; - } /* while ((mem_list_count < MAX_ARRAY_SIZE) && + } /* while ((mem_list_count < MAX_ARRAY_SIZE) && (new_buffer_read < bufsize-size_read)) */ j = start_j; new_file_read = 0; file_list_count = 0; - while ((file_list_count < MAX_ARRAY_SIZE) && + while ((file_list_count < MAX_ARRAY_SIZE) && (new_file_read < new_buffer_read)) { if(file_list_count) { - if((new_file_read + flat_file->blocklens[j]) > + if((new_file_read + flat_file->blocklens[j]) > new_buffer_read) { end_frd_size = new_buffer_read - new_file_read; new_file_read = new_buffer_read; @@ -495,9 +495,9 @@ void ADIOI_PVFS_ReadStridedListIO(ADIO_File fd, void *buf, int count, file_list_count++; if (j < (flat_file->count - 1)) j++; else j = 0; - + k = start_k; - if ((new_file_read < new_buffer_read) && + if ((new_file_read < new_buffer_read) && (file_list_count == MAX_ARRAY_SIZE)) { new_buffer_read = 0; mem_list_count = 0; @@ -526,13 +526,13 @@ void ADIOI_PVFS_ReadStridedListIO(ADIO_File fd, void *buf, int count, } /* while (new_buffer_read < new_file_read) */ } /* if ((new_file_read < new_buffer_read) && (file_list_count == MAX_ARRAY_SIZE)) */ - } /* while ((mem_list_count < MAX_ARRAY_SIZE) && + } /* while ((mem_list_count < MAX_ARRAY_SIZE) && (new_buffer_read < bufsize-size_read)) */ /* fakes filling the readlist arrays of lengths found above */ k = start_k; j = start_j; - for (i=0; iblocklens[k] == end_brd_size) @@ -553,7 +553,7 @@ void ADIOI_PVFS_ReadStridedListIO(ADIO_File fd, void *buf, int count, if (i == (file_list_count - 1)) { if (flat_file->blocklens[j] == end_frd_size) frd_size = flat_file->blocklens[(j+1)% - flat_file->count]; + flat_file->count]; else { frd_size = flat_file->blocklens[j] - end_frd_size; j--; @@ -579,7 +579,7 @@ void ADIOI_PVFS_ReadStridedListIO(ADIO_File fd, void *buf, int count, mem_lengths = (int *)ADIOI_Malloc(max_mem_list*sizeof(int)); file_offsets = (int64_t *)ADIOI_Malloc(max_file_list*sizeof(int64_t)); file_lengths = (int32_t *)ADIOI_Malloc(max_file_list*sizeof(int32_t)); - + size_read = 0; n_filetypes = st_n_filetypes; frd_size = st_frd_size; @@ -592,12 +592,12 @@ void ADIOI_PVFS_ReadStridedListIO(ADIO_File fd, void *buf, int count, /* this section calculates mem_list_count and file_list_count and also finds the possibly odd sized last array elements in new_frd_size and new_brd_size */ - + while (size_read < bufsize) { k = start_k; new_buffer_read = 0; mem_list_count = 0; - while ((mem_list_count < MAX_ARRAY_SIZE) && + while ((mem_list_count < MAX_ARRAY_SIZE) && (new_buffer_read < bufsize-size_read)) { /* find mem_list_count and file_list_count such that both are less than MAX_ARRAY_SIZE, the sum of their lengths are @@ -605,9 +605,9 @@ void ADIOI_PVFS_ReadStridedListIO(ADIO_File fd, void *buf, int count, read in the next immediate read list is less than bufsize */ if(mem_list_count) { - if((new_buffer_read + flat_buf->blocklens[k] + + if((new_buffer_read + flat_buf->blocklens[k] + size_read) > bufsize) { - end_brd_size = new_buffer_read + + end_brd_size = new_buffer_read + flat_buf->blocklens[k] - (bufsize - size_read); new_buffer_read = bufsize - size_read; } @@ -625,15 +625,15 @@ void ADIOI_PVFS_ReadStridedListIO(ADIO_File fd, void *buf, int count, } mem_list_count++; k = (k + 1)%flat_buf->count; - } /* while ((mem_list_count < MAX_ARRAY_SIZE) && + } /* while ((mem_list_count < MAX_ARRAY_SIZE) && (new_buffer_read < bufsize-size_read)) */ j = start_j; new_file_read = 0; file_list_count = 0; - while ((file_list_count < MAX_ARRAY_SIZE) && + while ((file_list_count < MAX_ARRAY_SIZE) && (new_file_read < new_buffer_read)) { if(file_list_count) { - if((new_file_read + flat_file->blocklens[j]) > + if((new_file_read + flat_file->blocklens[j]) > new_buffer_read) { end_frd_size = new_buffer_read - new_file_read; new_file_read = new_buffer_read; @@ -654,9 +654,9 @@ void ADIOI_PVFS_ReadStridedListIO(ADIO_File fd, void *buf, int count, file_list_count++; if (j < (flat_file->count - 1)) j++; else j = 0; - + k = start_k; - if ((new_file_read < new_buffer_read) && + if ((new_file_read < new_buffer_read) && (file_list_count == MAX_ARRAY_SIZE)) { new_buffer_read = 0; mem_list_count = 0; @@ -685,13 +685,13 @@ void ADIOI_PVFS_ReadStridedListIO(ADIO_File fd, void *buf, int count, } /* while (new_buffer_read < new_file_read) */ } /* if ((new_file_read < new_buffer_read) && (file_list_count == MAX_ARRAY_SIZE)) */ - } /* while ((mem_list_count < MAX_ARRAY_SIZE) && + } /* while ((mem_list_count < MAX_ARRAY_SIZE) && (new_buffer_read < bufsize-size_read)) */ /* fills the allocated readlist arrays */ k = start_k; j = start_j; - for (i=0; icount) + (int)flat_buf->indices[k]); @@ -730,7 +730,7 @@ void ADIOI_PVFS_ReadStridedListIO(ADIO_File fd, void *buf, int count, file_lengths[i] = end_frd_size; if (flat_file->blocklens[j] == end_frd_size) frd_size = flat_file->blocklens[(j+1)% - flat_file->count]; + flat_file->count]; else { frd_size = flat_file->blocklens[j] - end_frd_size; j--; @@ -745,7 +745,7 @@ void ADIOI_PVFS_ReadStridedListIO(ADIO_File fd, void *buf, int count, } } /* for (i=0; ifp_ind = off; fd->fp_sys_posn = -1; /* set it to null. */ - + #ifdef HAVE_STATUS_SET_BYTES MPIR_Status_set_bytes(status, datatype, bufsize); - /* This is a temporary way of filling in status. The right way is to - keep track of how much data was actually read and placed in buf + /* This is a temporary way of filling in status. The right way is to + keep track of how much data was actually read and placed in buf by ADIOI_BUFFERED_READ. */ #endif - + if (!buftype_is_contig) ADIOI_Delete_flattened(datatype); } #endif /* HAVE_PVFS_LISTIO */ diff --git a/ompi/mca/io/romio314/romio/adio/ad_pvfs/ad_pvfs_resize.c b/ompi/mca/io/romio314/romio/adio/ad_pvfs/ad_pvfs_resize.c index b4b9553633f..e3d3b2b127f 100644 --- a/ompi/mca/io/romio314/romio/adio/ad_pvfs/ad_pvfs_resize.c +++ b/ompi/mca/io/romio314/romio/adio/ad_pvfs/ad_pvfs_resize.c @@ -1,7 +1,7 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */ -/* +/* * - * Copyright (C) 1997 University of Chicago. + * Copyright (C) 1997 University of Chicago. * See COPYRIGHT notice in top-level directory. */ diff --git a/ompi/mca/io/romio314/romio/adio/ad_pvfs/ad_pvfs_write.c b/ompi/mca/io/romio314/romio/adio/ad_pvfs/ad_pvfs_write.c index 4e874d99ac0..6b0b3b70c1c 100644 --- a/ompi/mca/io/romio314/romio/adio/ad_pvfs/ad_pvfs_write.c +++ b/ompi/mca/io/romio314/romio/adio/ad_pvfs/ad_pvfs_write.c @@ -1,7 +1,7 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */ -/* +/* * - * Copyright (C) 1997 University of Chicago. + * Copyright (C) 1997 University of Chicago. * See COPYRIGHT notice in top-level directory. */ @@ -15,7 +15,7 @@ void ADIOI_PVFS_WriteStridedListIO(ADIO_File fd, void *buf, int count, *error_code); #endif -void ADIOI_PVFS_WriteContig(ADIO_File fd, void *buf, int count, +void ADIOI_PVFS_WriteContig(ADIO_File fd, void *buf, int count, MPI_Datatype datatype, int file_ptr_type, ADIO_Offset offset, ADIO_Status *status, int *error_code) @@ -45,7 +45,7 @@ void ADIOI_PVFS_WriteContig(ADIO_File fd, void *buf, int count, #endif if (err > 0) fd->fp_sys_posn = offset + err; - /* individual file pointer not updated */ + /* individual file pointer not updated */ } else { /* write from curr. location of ind. file pointer */ if (fd->fp_sys_posn != fd->fp_ind) { @@ -109,7 +109,7 @@ void ADIOI_PVFS_WriteStrided(ADIO_File fd, void *buf, int count, #ifdef HAVE_PVFS_LISTIO if ( fd->hints->fs_hints.pvfs.listio_write == ADIOI_HINT_ENABLE ) { - ADIOI_PVFS_WriteStridedListIO(fd, buf, count, datatype, + ADIOI_PVFS_WriteStridedListIO(fd, buf, count, datatype, file_ptr_type, offset, status, error_code); return; } @@ -134,7 +134,7 @@ void ADIOI_PVFS_WriteStrided(ADIO_File fd, void *buf, int count, #ifdef HAVE_STATUS_SET_BYTES MPIR_Status_set_bytes(status, datatype, 0); #endif - *error_code = MPI_SUCCESS; + *error_code = MPI_SUCCESS; return; } @@ -142,7 +142,7 @@ void ADIOI_PVFS_WriteStrided(ADIO_File fd, void *buf, int count, MPI_Type_size_x(datatype, &buftype_size); MPI_Type_extent(datatype, &buftype_extent); etype_size = fd->etype_size; - + bufsize = buftype_size * count; if (!buftype_is_contig && filetype_is_contig) { @@ -280,11 +280,11 @@ void ADIOI_PVFS_WriteStrided(ADIO_File fd, void *buf, int count, while (!flag) { n_filetypes++; for (i=0; icount; i++) { - if (disp + flat_file->indices[i] + - (ADIO_Offset) n_filetypes*filetype_extent + flat_file->blocklens[i] + if (disp + flat_file->indices[i] + + (ADIO_Offset) n_filetypes*filetype_extent + flat_file->blocklens[i] >= offset) { st_index = i; - fwr_size = disp + flat_file->indices[i] + + fwr_size = disp + flat_file->indices[i] + (ADIO_Offset) n_filetypes*filetype_extent + flat_file->blocklens[i] - offset; flag = 1; @@ -298,7 +298,7 @@ void ADIOI_PVFS_WriteStrided(ADIO_File fd, void *buf, int count, n_filetypes = (int) (offset / n_etypes_in_filetype); etype_in_filetype = (int) (offset % n_etypes_in_filetype); size_in_filetype = etype_in_filetype * etype_size; - + sum = 0; for (i=0; icount; i++) { sum += flat_file->blocklens[i]; @@ -325,9 +325,9 @@ void ADIOI_PVFS_WriteStrided(ADIO_File fd, void *buf, int count, off = offset; fwr_size = ADIOI_MIN(fwr_size, bufsize); while (i < bufsize) { - if (fwr_size) { - /* TYPE_UB and TYPE_LB can result in - fwr_size = 0. save system call in such cases */ + if (fwr_size) { + /* TYPE_UB and TYPE_LB can result in + fwr_size = 0. save system call in such cases */ #ifdef ADIOI_MPE_LOGGING MPE_Log_event( ADIOI_MPE_lseek_a, 0, NULL ); #endif @@ -357,7 +357,7 @@ void ADIOI_PVFS_WriteStrided(ADIO_File fd, void *buf, int count, j = 0; n_filetypes++; } - off = disp + flat_file->indices[j] + + off = disp + flat_file->indices[j] + (ADIO_Offset) n_filetypes*filetype_extent; fwr_size = ADIOI_MIN(flat_file->blocklens[j], bufsize-i); } @@ -407,7 +407,7 @@ void ADIOI_PVFS_WriteStrided(ADIO_File fd, void *buf, int count, n_filetypes++; } - off = disp + flat_file->indices[j] + + off = disp + flat_file->indices[j] + (ADIO_Offset) n_filetypes*filetype_extent; new_fwr_size = flat_file->blocklens[j]; @@ -423,7 +423,7 @@ void ADIOI_PVFS_WriteStrided(ADIO_File fd, void *buf, int count, k = (k + 1)%flat_buf->count; buf_count++; indx = buftype_extent*(buf_count/flat_buf->count) + - flat_buf->indices[k]; + flat_buf->indices[k]; new_bwr_size = flat_buf->blocklens[k]; if (size != fwr_size) { off += size; @@ -450,7 +450,7 @@ void ADIOI_PVFS_WriteStrided(ADIO_File fd, void *buf, int count, #ifdef HAVE_STATUS_SET_BYTES MPIR_Status_set_bytes(status, datatype, bufsize); -/* This is a temporary way of filling in status. The right way is to +/* This is a temporary way of filling in status. The right way is to keep track of how much data was actually written by ADIOI_BUFFERED_WRITE. */ #endif @@ -461,7 +461,7 @@ void ADIOI_PVFS_WriteStrided(ADIO_File fd, void *buf, int count, void ADIOI_PVFS_WriteStridedListIO(ADIO_File fd, void *buf, int count, MPI_Datatype datatype, int file_ptr_type, ADIO_Offset offset, ADIO_Status *status, int - *error_code) + *error_code) { /* Since PVFS does not support file locking, can't do buffered writes as on Unix */ @@ -522,7 +522,7 @@ void ADIOI_PVFS_WriteStridedListIO(ADIO_File fd, void *buf, int count, #ifdef HAVE_STATUS_SET_BYTES MPIR_Status_set_bytes(status, datatype, 0); #endif - *error_code = MPI_SUCCESS; + *error_code = MPI_SUCCESS; return; } @@ -530,7 +530,7 @@ void ADIOI_PVFS_WriteStridedListIO(ADIO_File fd, void *buf, int count, MPI_Type_size_x(datatype, &buftype_size); MPI_Type_extent(datatype, &buftype_extent); etype_size = fd->etype_size; - + bufsize = buftype_size * count; if (!buftype_is_contig && filetype_is_contig) { @@ -542,7 +542,7 @@ void ADIOI_PVFS_WriteStridedListIO(ADIO_File fd, void *buf, int count, ADIOI_Flatten_datatype(datatype); flat_buf = ADIOI_Flatlist; while (flat_buf->type != datatype) flat_buf = flat_buf->next; - + if (file_ptr_type == ADIO_EXPLICIT_OFFSET) { off = fd->disp + etype_size * offset; #ifdef ADIOI_MPE_LOGGING @@ -580,9 +580,9 @@ void ADIOI_PVFS_WriteStridedListIO(ADIO_File fd, void *buf, int count, /* step through each block in memory, filling memory arrays */ while (b_blks_wrote < total_blks_to_write) { for (i=0; icount; i++) { - mem_offsets[b_blks_wrote % MAX_ARRAY_SIZE] = + mem_offsets[b_blks_wrote % MAX_ARRAY_SIZE] = ((char*)buf + j*buftype_extent + flat_buf->indices[i]); - mem_lengths[b_blks_wrote % MAX_ARRAY_SIZE] = + mem_lengths[b_blks_wrote % MAX_ARRAY_SIZE] = flat_buf->blocklens[i]; file_lengths += flat_buf->blocklens[i]; b_blks_wrote++; @@ -600,13 +600,13 @@ void ADIOI_PVFS_WriteStridedListIO(ADIO_File fd, void *buf, int count, pvfs_write_list(fd->fd_sys ,mem_list_count, mem_offsets, mem_lengths, file_list_count, &file_offsets, &file_lengths); - + /* in the case of the last read list call, leave here */ if (b_blks_wrote == total_blks_to_write) break; file_offsets += file_lengths; file_lengths = 0; - } + } } /* for (i=0; icount; i++) */ j++; } /* while (b_blks_wrote < total_blks_to_write) */ @@ -627,7 +627,7 @@ void ADIOI_PVFS_WriteStridedListIO(ADIO_File fd, void *buf, int count, #ifdef HAVE_STATUS_SET_BYTES MPIR_Status_set_bytes(status, datatype, bufsize); -/* This is a temporary way of filling in status. The right way is to +/* This is a temporary way of filling in status. The right way is to keep track of how much data was actually written by ADIOI_BUFFERED_WRITE. */ #endif @@ -645,7 +645,7 @@ void ADIOI_PVFS_WriteStridedListIO(ADIO_File fd, void *buf, int count, disp = fd->disp; /* for each case - ADIO_Individual pointer or explicit, find offset - (file offset in bytes), n_filetypes (how many filetypes into file + (file offset in bytes), n_filetypes (how many filetypes into file to start), fwr_size (remaining amount of data in present file block), and st_index (start point in terms of blocks in starting filetype) */ @@ -656,11 +656,11 @@ void ADIOI_PVFS_WriteStridedListIO(ADIO_File fd, void *buf, int count, while (!flag) { n_filetypes++; for (i=0; icount; i++) { - if (disp + flat_file->indices[i] + + if (disp + flat_file->indices[i] + (ADIO_Offset) n_filetypes*filetype_extent + flat_file->blocklens[i] >= offset) { st_index = i; - fwr_size = disp + flat_file->indices[i] + + fwr_size = disp + flat_file->indices[i] + (ADIO_Offset) n_filetypes*filetype_extent + flat_file->blocklens[i] - offset; flag = 1; @@ -674,7 +674,7 @@ void ADIOI_PVFS_WriteStridedListIO(ADIO_File fd, void *buf, int count, n_filetypes = (int) (offset / n_etypes_in_filetype); etype_in_filetype = (int) (offset % n_etypes_in_filetype); size_in_filetype = etype_in_filetype * etype_size; - + sum = 0; for (i=0; icount; i++) { sum += flat_file->blocklens[i]; @@ -695,7 +695,7 @@ void ADIOI_PVFS_WriteStridedListIO(ADIO_File fd, void *buf, int count, start_off = offset; st_fwr_size = fwr_size; st_n_filetypes = n_filetypes; - + if (buftype_is_contig && !filetype_is_contig) { /* contiguous in memory, noncontiguous in file. should be the most @@ -703,14 +703,14 @@ void ADIOI_PVFS_WriteStridedListIO(ADIO_File fd, void *buf, int count, int mem_lengths; char *mem_offsets; - + i = 0; j = st_index; off = offset; n_filetypes = st_n_filetypes; - + mem_list_count = 1; - + /* determine how many blocks in file to read */ f_data_wrote = ADIOI_MIN(st_fwr_size, bufsize); total_blks_to_write = 1; @@ -719,17 +719,17 @@ void ADIOI_PVFS_WriteStridedListIO(ADIO_File fd, void *buf, int count, f_data_wrote += flat_file->blocklens[j]; total_blks_to_write++; if (j<(flat_file->count-1)) j++; - else j = 0; + else j = 0; } - + j = st_index; n_filetypes = st_n_filetypes; n_write_lists = total_blks_to_write/MAX_ARRAY_SIZE; extra_blks = total_blks_to_write%MAX_ARRAY_SIZE; - + mem_offsets = buf; mem_lengths = 0; - + /* if at least one full readlist, allocate file arrays at max array size and don't free until very end */ if (n_write_lists) { @@ -746,7 +746,7 @@ void ADIOI_PVFS_WriteStridedListIO(ADIO_File fd, void *buf, int count, file_lengths = (int32_t*)ADIOI_Malloc(extra_blks* sizeof(int32_t)); } - + /* for file arrays that are of MAX_ARRAY_SIZE, build arrays */ for (i=0; iblocklens[k] + + if((new_buffer_write + flat_buf->blocklens[k] + size_wrote) > bufsize) { - end_bwr_size = new_buffer_write + + end_bwr_size = new_buffer_write + flat_buf->blocklens[k] - (bufsize - size_wrote); new_buffer_write = bufsize - size_wrote; } @@ -859,15 +859,15 @@ void ADIOI_PVFS_WriteStridedListIO(ADIO_File fd, void *buf, int count, } mem_list_count++; k = (k + 1)%flat_buf->count; - } /* while ((mem_list_count < MAX_ARRAY_SIZE) && + } /* while ((mem_list_count < MAX_ARRAY_SIZE) && (new_buffer_write < bufsize-size_wrote)) */ j = start_j; new_file_write = 0; file_list_count = 0; - while ((file_list_count < MAX_ARRAY_SIZE) && + while ((file_list_count < MAX_ARRAY_SIZE) && (new_file_write < new_buffer_write)) { if(file_list_count) { - if((new_file_write + flat_file->blocklens[j]) > + if((new_file_write + flat_file->blocklens[j]) > new_buffer_write) { end_fwr_size = new_buffer_write - new_file_write; new_file_write = new_buffer_write; @@ -888,9 +888,9 @@ void ADIOI_PVFS_WriteStridedListIO(ADIO_File fd, void *buf, int count, file_list_count++; if (j < (flat_file->count - 1)) j++; else j = 0; - + k = start_k; - if ((new_file_write < new_buffer_write) && + if ((new_file_write < new_buffer_write) && (file_list_count == MAX_ARRAY_SIZE)) { new_buffer_write = 0; mem_list_count = 0; @@ -898,7 +898,7 @@ void ADIOI_PVFS_WriteStridedListIO(ADIO_File fd, void *buf, int count, if(mem_list_count) { if((new_buffer_write + flat_buf->blocklens[k]) > new_file_write) { - end_bwr_size = new_file_write - + end_bwr_size = new_file_write - new_buffer_write; new_buffer_write = new_file_write; k--; @@ -920,13 +920,13 @@ void ADIOI_PVFS_WriteStridedListIO(ADIO_File fd, void *buf, int count, } /* while (new_buffer_write < new_file_write) */ } /* if ((new_file_write < new_buffer_write) && (file_list_count == MAX_ARRAY_SIZE)) */ - } /* while ((mem_list_count < MAX_ARRAY_SIZE) && + } /* while ((mem_list_count < MAX_ARRAY_SIZE) && (new_buffer_write < bufsize-size_wrote)) */ /* fakes filling the writelist arrays of lengths found above */ k = start_k; j = start_j; - for (i=0; iblocklens[k] == end_bwr_size) @@ -947,7 +947,7 @@ void ADIOI_PVFS_WriteStridedListIO(ADIO_File fd, void *buf, int count, if (i == (file_list_count - 1)) { if (flat_file->blocklens[j] == end_fwr_size) fwr_size = flat_file->blocklens[(j+1)% - flat_file->count]; + flat_file->count]; else { fwr_size = flat_file->blocklens[j] - end_fwr_size; j--; @@ -973,7 +973,7 @@ void ADIOI_PVFS_WriteStridedListIO(ADIO_File fd, void *buf, int count, mem_lengths = (int *)ADIOI_Malloc(max_mem_list*sizeof(int)); file_offsets = (int64_t *)ADIOI_Malloc(max_file_list*sizeof(int64_t)); file_lengths = (int32_t *)ADIOI_Malloc(max_file_list*sizeof(int32_t)); - + size_wrote = 0; n_filetypes = st_n_filetypes; fwr_size = st_fwr_size; @@ -986,12 +986,12 @@ void ADIOI_PVFS_WriteStridedListIO(ADIO_File fd, void *buf, int count, /* this section calculates mem_list_count and file_list_count and also finds the possibly odd sized last array elements in new_fwr_size and new_bwr_size */ - + while (size_wrote < bufsize) { k = start_k; new_buffer_write = 0; mem_list_count = 0; - while ((mem_list_count < MAX_ARRAY_SIZE) && + while ((mem_list_count < MAX_ARRAY_SIZE) && (new_buffer_write < bufsize-size_wrote)) { /* find mem_list_count and file_list_count such that both are less than MAX_ARRAY_SIZE, the sum of their lengths are @@ -999,9 +999,9 @@ void ADIOI_PVFS_WriteStridedListIO(ADIO_File fd, void *buf, int count, read in the next immediate read list is less than bufsize */ if(mem_list_count) { - if((new_buffer_write + flat_buf->blocklens[k] + + if((new_buffer_write + flat_buf->blocklens[k] + size_wrote) > bufsize) { - end_bwr_size = new_buffer_write + + end_bwr_size = new_buffer_write + flat_buf->blocklens[k] - (bufsize - size_wrote); new_buffer_write = bufsize - size_wrote; } @@ -1019,15 +1019,15 @@ void ADIOI_PVFS_WriteStridedListIO(ADIO_File fd, void *buf, int count, } mem_list_count++; k = (k + 1)%flat_buf->count; - } /* while ((mem_list_count < MAX_ARRAY_SIZE) && + } /* while ((mem_list_count < MAX_ARRAY_SIZE) && (new_buffer_write < bufsize-size_wrote)) */ j = start_j; new_file_write = 0; file_list_count = 0; - while ((file_list_count < MAX_ARRAY_SIZE) && + while ((file_list_count < MAX_ARRAY_SIZE) && (new_file_write < new_buffer_write)) { if(file_list_count) { - if((new_file_write + flat_file->blocklens[j]) > + if((new_file_write + flat_file->blocklens[j]) > new_buffer_write) { end_fwr_size = new_buffer_write - new_file_write; new_file_write = new_buffer_write; @@ -1048,9 +1048,9 @@ void ADIOI_PVFS_WriteStridedListIO(ADIO_File fd, void *buf, int count, file_list_count++; if (j < (flat_file->count - 1)) j++; else j = 0; - + k = start_k; - if ((new_file_write < new_buffer_write) && + if ((new_file_write < new_buffer_write) && (file_list_count == MAX_ARRAY_SIZE)) { new_buffer_write = 0; mem_list_count = 0; @@ -1080,17 +1080,17 @@ void ADIOI_PVFS_WriteStridedListIO(ADIO_File fd, void *buf, int count, } /* while (new_buffer_write < new_file_write) */ } /* if ((new_file_write < new_buffer_write) && (file_list_count == MAX_ARRAY_SIZE)) */ - } /* while ((mem_list_count < MAX_ARRAY_SIZE) && + } /* while ((mem_list_count < MAX_ARRAY_SIZE) && (new_buffer_write < bufsize-size_wrote)) */ /* fills the allocated readlist arrays */ k = start_k; j = start_j; - for (i=0; icount) + (int)flat_buf->indices[k]); - + if(!i) { mem_lengths[0] = bwr_size; mem_offsets[0] += flat_buf->blocklens[k] - bwr_size; @@ -1126,7 +1126,7 @@ void ADIOI_PVFS_WriteStridedListIO(ADIO_File fd, void *buf, int count, file_lengths[i] = end_fwr_size; if (flat_file->blocklens[j] == end_fwr_size) fwr_size = flat_file->blocklens[(j+1)% - flat_file->count]; + flat_file->count]; else { fwr_size = flat_file->blocklens[j] - end_fwr_size; j--; @@ -1166,7 +1166,7 @@ void ADIOI_PVFS_WriteStridedListIO(ADIO_File fd, void *buf, int count, #ifdef HAVE_STATUS_SET_BYTES MPIR_Status_set_bytes(status, datatype, bufsize); -/* This is a temporary way of filling in status. The right way is to +/* This is a temporary way of filling in status. The right way is to keep track of how much data was actually written by ADIOI_BUFFERED_WRITE. */ #endif diff --git a/ompi/mca/io/romio314/romio/adio/ad_pvfs2/ad_pvfs2.c b/ompi/mca/io/romio314/romio/adio/ad_pvfs2/ad_pvfs2.c index a55c3c89254..2ddbd66b148 100644 --- a/ompi/mca/io/romio314/romio/adio/ad_pvfs2/ad_pvfs2.c +++ b/ompi/mca/io/romio314/romio/adio/ad_pvfs2/ad_pvfs2.c @@ -1,7 +1,7 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */ -/* +/* * - * Copyright (C) 2003 University of Chicago. + * Copyright (C) 2003 University of Chicago. * See COPYRIGHT notice in top-level directory. */ @@ -42,6 +42,6 @@ struct ADIOI_Fns_struct ADIO_PVFS2_operations = { "PVFS2: the PVFS v2 or OrangeFS file systems" }; -/* - * vim: ts=8 sts=4 sw=4 noexpandtab +/* + * vim: ts=8 sts=4 sw=4 noexpandtab */ diff --git a/ompi/mca/io/romio314/romio/adio/ad_pvfs2/ad_pvfs2.h b/ompi/mca/io/romio314/romio/adio/ad_pvfs2/ad_pvfs2.h index 95c5912376b..76fe56045a3 100644 --- a/ompi/mca/io/romio314/romio/adio/ad_pvfs2/ad_pvfs2.h +++ b/ompi/mca/io/romio314/romio/adio/ad_pvfs2/ad_pvfs2.h @@ -1,7 +1,7 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */ -/* +/* * - * Copyright (C) 1997 University of Chicago. + * Copyright (C) 1997 University of Chicago. * See COPYRIGHT notice in top-level directory. */ @@ -19,16 +19,16 @@ void ADIOI_PVFS2_Open(ADIO_File fd, int *error_code); void ADIOI_PVFS2_Close(ADIO_File fd, int *error_code); -void ADIOI_PVFS2_ReadContig(ADIO_File fd, void *buf, int count, +void ADIOI_PVFS2_ReadContig(ADIO_File fd, void *buf, int count, MPI_Datatype datatype, int file_ptr_type, ADIO_Offset offset, ADIO_Status *status, int *error_code); void ADIOI_PVFS2_WriteContig(ADIO_File fd, const void *buf, int count, MPI_Datatype datatype, int file_ptr_type, ADIO_Offset offset, ADIO_Status *status, int - *error_code); + *error_code); void ADIOI_PVFS2_Fcntl(ADIO_File fd, int flag, ADIO_Fcntl_t *fcntl_struct, int - *error_code); + *error_code); void ADIOI_PVFS2_WriteStrided(ADIO_File fd, const void *buf, int count, MPI_Datatype datatype, int file_ptr_type, ADIO_Offset offset, ADIO_Status *status, int @@ -43,7 +43,7 @@ void ADIOI_PVFS2_Resize(ADIO_File fd, ADIO_Offset size, int *error_code); void ADIOI_PVFS2_SetInfo(ADIO_File fd, MPI_Info users_info, int *error_code); int ADIOI_PVFS2_Feature(ADIO_File fd, int flag); -void ADIOI_PVFS2_IReadContig(ADIO_File fd, void *buf, int count, +void ADIOI_PVFS2_IReadContig(ADIO_File fd, void *buf, int count, MPI_Datatype datatype, int file_ptr_type, ADIO_Offset offset, MPI_Request *request, int *error_code); @@ -51,7 +51,7 @@ void ADIOI_PVFS2_IWriteContig(ADIO_File fd, const void *buf, int count, MPI_Datatype datatype, int file_ptr_type, ADIO_Offset offset, MPI_Request *request, int *error_code); -void ADIOI_PVFS2_AIO_contig(ADIO_File fd, void *buf, int count, +void ADIOI_PVFS2_AIO_contig(ADIO_File fd, void *buf, int count, MPI_Datatype datatype, int file_ptr_type, ADIO_Offset offset, MPI_Request *request, int flag, int *error_code); diff --git a/ompi/mca/io/romio314/romio/adio/ad_pvfs2/ad_pvfs2_aio.c b/ompi/mca/io/romio314/romio/adio/ad_pvfs2/ad_pvfs2_aio.c index f4d78497e3e..b90fdedabb8 100644 --- a/ompi/mca/io/romio314/romio/adio/ad_pvfs2/ad_pvfs2_aio.c +++ b/ompi/mca/io/romio314/romio/adio/ad_pvfs2/ad_pvfs2_aio.c @@ -1,7 +1,7 @@ -/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- - * vim: ts=8 sts=4 sw=4 noexpandtab - * - * Copyright (C) 1997 University of Chicago. +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- + * vim: ts=8 sts=4 sw=4 noexpandtab + * + * Copyright (C) 1997 University of Chicago. * See COPYRIGHT notice in top-level directory. */ @@ -20,10 +20,10 @@ static int ADIOI_PVFS2_greq_class = 0; int ADIOI_PVFS2_aio_free_fn(void *extra_state); int ADIOI_PVFS2_aio_poll_fn(void *extra_state, MPI_Status *status); -int ADIOI_PVFS2_aio_wait_fn(int count, void ** array_of_states, +int ADIOI_PVFS2_aio_wait_fn(int count, void ** array_of_states, double timeout, MPI_Status *status); -void ADIOI_PVFS2_IReadContig(ADIO_File fd, void *buf, int count, +void ADIOI_PVFS2_IReadContig(ADIO_File fd, void *buf, int count, MPI_Datatype datatype, int file_ptr_type, ADIO_Offset offset, MPI_Request *request, int *error_code) @@ -41,7 +41,7 @@ void ADIOI_PVFS2_IWriteContig(ADIO_File fd, const void *buf, int count, offset, request, WRITE, error_code); } -void ADIOI_PVFS2_AIO_contig(ADIO_File fd, void *buf, int count, +void ADIOI_PVFS2_AIO_contig(ADIO_File fd, void *buf, int count, MPI_Datatype datatype, int file_ptr_type, ADIO_Offset offset, MPI_Request *request, int flag, int *error_code) @@ -87,13 +87,13 @@ void ADIOI_PVFS2_AIO_contig(ADIO_File fd, void *buf, int count, if (file_ptr_type == ADIO_INDIVIDUAL) { /* copy individual file pointer into offset variable, continue */ offset = fd->fp_ind; - } + } if (flag == READ) { #ifdef ADIOI_MPE_LOGGING MPE_Log_event( ADIOI_MPE_iread_a, 0, NULL ); #endif - ret = PVFS_isys_read(pvfs_fs->object_ref, aio_req->file_req, offset, - buf, aio_req->mem_req, &(pvfs_fs->credentials), + ret = PVFS_isys_read(pvfs_fs->object_ref, aio_req->file_req, offset, + buf, aio_req->mem_req, &(pvfs_fs->credentials), &(aio_req->resp_io), &(aio_req->op_id), NULL); #ifdef ADIOI_MPE_LOGGING MPE_Log_event( ADIOI_MPE_iread_b, 0, NULL ); @@ -102,13 +102,13 @@ void ADIOI_PVFS2_AIO_contig(ADIO_File fd, void *buf, int count, #ifdef ADIOI_MPE_LOGGING MPE_Log_event( ADIOI_MPE_iwrite_a, 0, NULL ); #endif - ret = PVFS_isys_write(pvfs_fs->object_ref, aio_req->file_req, offset, - buf, aio_req->mem_req, &(pvfs_fs->credentials), + ret = PVFS_isys_write(pvfs_fs->object_ref, aio_req->file_req, offset, + buf, aio_req->mem_req, &(pvfs_fs->credentials), &(aio_req->resp_io), &(aio_req->op_id), NULL); #ifdef ADIOI_MPE_LOGGING MPE_Log_event( ADIOI_MPE_iwrite_b, 0, NULL ); -#endif - } +#endif + } /* --BEGIN ERROR HANDLING-- */ if (ret < 0 ) { @@ -123,9 +123,9 @@ void ADIOI_PVFS2_AIO_contig(ADIO_File fd, void *buf, int count, #ifdef HAVE_MPI_GREQUEST_EXTENSIONS /* posted. defered completion */ - if (ret == 0) { + if (ret == 0) { if (ADIOI_PVFS2_greq_class == 0) { - MPIX_Grequest_class_create(ADIOI_GEN_aio_query_fn, + MPIX_Grequest_class_create(ADIOI_GEN_aio_query_fn, ADIOI_PVFS2_aio_free_fn, MPIU_Greq_cancel_fn, ADIOI_PVFS2_aio_poll_fn, ADIOI_PVFS2_aio_wait_fn, &ADIOI_PVFS2_greq_class); @@ -188,7 +188,7 @@ int ADIOI_PVFS2_aio_poll_fn(void *extra_state, MPI_Status *status) } /* wait for multiple requests to complete */ -int ADIOI_PVFS2_aio_wait_fn(int count, void ** array_of_states, +int ADIOI_PVFS2_aio_wait_fn(int count, void ** array_of_states, double timeout, MPI_Status *status) { @@ -214,7 +214,7 @@ int ADIOI_PVFS2_aio_wait_fn(int count, void ** array_of_states, for (i=0; i< count; i++) { for (j=0; jop_id) { - aio_reqlist[j]->nbytes = + aio_reqlist[j]->nbytes = aio_reqlist[j]->resp_io.total_completed; MPI_Grequest_complete(aio_reqlist[j]->req); } @@ -226,5 +226,5 @@ int ADIOI_PVFS2_aio_wait_fn(int count, void ** array_of_states, /* - * vim: ts=8 sts=4 sw=4 noexpandtab + * vim: ts=8 sts=4 sw=4 noexpandtab */ diff --git a/ompi/mca/io/romio314/romio/adio/ad_pvfs2/ad_pvfs2_close.c b/ompi/mca/io/romio314/romio/adio/ad_pvfs2/ad_pvfs2_close.c index 847dd8341cd..907b0b031d9 100644 --- a/ompi/mca/io/romio314/romio/adio/ad_pvfs2/ad_pvfs2_close.c +++ b/ompi/mca/io/romio314/romio/adio/ad_pvfs2/ad_pvfs2_close.c @@ -1,7 +1,7 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */ -/* +/* * - * Copyright (C) 1997 University of Chicago. + * Copyright (C) 1997 University of Chicago. * See COPYRIGHT notice in top-level directory. */ @@ -23,6 +23,6 @@ void ADIOI_PVFS2_Close(ADIO_File fd, int *error_code) *error_code = MPI_SUCCESS; } -/* - * vim: ts=8 sts=4 sw=4 noexpandtab +/* + * vim: ts=8 sts=4 sw=4 noexpandtab */ diff --git a/ompi/mca/io/romio314/romio/adio/ad_pvfs2/ad_pvfs2_common.c b/ompi/mca/io/romio314/romio/adio/ad_pvfs2/ad_pvfs2_common.c index ccd3c1916c6..b85e9b590a5 100644 --- a/ompi/mca/io/romio314/romio/adio/ad_pvfs2/ad_pvfs2_common.c +++ b/ompi/mca/io/romio314/romio/adio/ad_pvfs2/ad_pvfs2_common.c @@ -1,6 +1,6 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */ -/* - * Copyright (C) 2003 University of Chicago. +/* + * Copyright (C) 2003 University of Chicago. * See COPYRIGHT notice in top-level directory. */ @@ -37,7 +37,7 @@ void ADIOI_PVFS2_End(int *error_code) *error_code = MPI_SUCCESS; } -int ADIOI_PVFS2_End_call(MPI_Comm comm, int keyval, +int ADIOI_PVFS2_End_call(MPI_Comm comm, int keyval, void *attribute_val, void *extra_state) { int error_code; @@ -77,10 +77,10 @@ void ADIOI_PVFS2_Init(int *error_code ) return; } - + MPI_Keyval_create(MPI_NULL_COPY_FN, ADIOI_PVFS2_End_call, - &ADIOI_PVFS2_Initialized, (void *)0); - /* just like romio does, we make a dummy attribute so we + &ADIOI_PVFS2_Initialized, (void *)0); + /* just like romio does, we make a dummy attribute so we * get cleaned up */ MPI_Attr_put(MPI_COMM_SELF, ADIOI_PVFS2_Initialized, (void *)0); } @@ -88,7 +88,7 @@ void ADIOI_PVFS2_Init(int *error_code ) void ADIOI_PVFS2_makeattribs(PVFS_sys_attr * attribs) { memset(attribs, 0, sizeof(PVFS_sys_attr)); - + attribs->owner = geteuid(); attribs->group = getegid(); attribs->perms = 0644; @@ -145,6 +145,6 @@ int ADIOI_PVFS2_error_convert(int pvfs_error) } -/* - * vim: ts=8 sts=4 sw=4 noexpandtab +/* + * vim: ts=8 sts=4 sw=4 noexpandtab */ diff --git a/ompi/mca/io/romio314/romio/adio/ad_pvfs2/ad_pvfs2_common.h b/ompi/mca/io/romio314/romio/adio/ad_pvfs2/ad_pvfs2_common.h index f63f84a27bd..a34f84ee3ef 100644 --- a/ompi/mca/io/romio314/romio/adio/ad_pvfs2/ad_pvfs2_common.h +++ b/ompi/mca/io/romio314/romio/adio/ad_pvfs2/ad_pvfs2_common.h @@ -1,7 +1,7 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- * vim: ts=8 sts=4 sw=4 noexpandtab * - * Copyright (C) 1997 University of Chicago. + * Copyright (C) 1997 University of Chicago. * See COPYRIGHT notice in top-level directory. */ @@ -28,7 +28,7 @@ void ADIOI_PVFS2_Init(int *error_code ); void ADIOI_PVFS2_makeattribs(PVFS_sys_attr * attribs); void ADIOI_PVFS2_makecredentials(PVFS_credentials * credentials); void ADIOI_PVFS2_End(int *error_code); -int ADIOI_PVFS2_End_call(MPI_Comm comm, int keyval, +int ADIOI_PVFS2_End_call(MPI_Comm comm, int keyval, void *attribute_val, void *extra_state); int ADIOI_PVFS2_error_convert(int pvfs_error); diff --git a/ompi/mca/io/romio314/romio/adio/ad_pvfs2/ad_pvfs2_delete.c b/ompi/mca/io/romio314/romio/adio/ad_pvfs2/ad_pvfs2_delete.c index f5770bb393c..b360288a643 100644 --- a/ompi/mca/io/romio314/romio/adio/ad_pvfs2/ad_pvfs2_delete.c +++ b/ompi/mca/io/romio314/romio/adio/ad_pvfs2/ad_pvfs2_delete.c @@ -1,7 +1,7 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */ -/* +/* * - * Copyright (C) 2003 University of Chicago. + * Copyright (C) 2003 University of Chicago. * See COPYRIGHT notice in top-level directory. */ @@ -21,7 +21,7 @@ void ADIOI_PVFS2_Delete(const char *filename, int *error_code) ADIOI_PVFS2_Init(error_code); /* --BEGIN ERROR HANDLING-- */ - if (*error_code != MPI_SUCCESS) + if (*error_code != MPI_SUCCESS) { /* ADIOI_PVFS2_INIT handles creating error codes itself */ return; @@ -47,7 +47,7 @@ void ADIOI_PVFS2_Delete(const char *filename, int *error_code) ret = PVFS_sys_getparent(cur_fs, pvfs_path, &credentials, &resp_getparent); - ret = PVFS_sys_remove(resp_getparent.basename, + ret = PVFS_sys_remove(resp_getparent.basename, resp_getparent.parent_ref, &credentials); /* --BEGIN ERROR HANDLING-- */ if (ret != 0) { @@ -64,6 +64,6 @@ void ADIOI_PVFS2_Delete(const char *filename, int *error_code) return; } -/* - * vim: ts=8 sts=4 sw=4 noexpandtab +/* + * vim: ts=8 sts=4 sw=4 noexpandtab */ diff --git a/ompi/mca/io/romio314/romio/adio/ad_pvfs2/ad_pvfs2_fcntl.c b/ompi/mca/io/romio314/romio/adio/ad_pvfs2/ad_pvfs2_fcntl.c index e07291f7575..f28f4f49272 100644 --- a/ompi/mca/io/romio314/romio/adio/ad_pvfs2/ad_pvfs2_fcntl.c +++ b/ompi/mca/io/romio314/romio/adio/ad_pvfs2/ad_pvfs2_fcntl.c @@ -1,7 +1,7 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */ -/* +/* * - * Copyright (C) 1997 University of Chicago. + * Copyright (C) 1997 University of Chicago. * See COPYRIGHT notice in top-level directory. */ @@ -21,7 +21,7 @@ void ADIOI_PVFS2_Fcntl(ADIO_File fd, int flag, ADIO_Fcntl_t *fcntl_struct, switch(flag) { case ADIO_FCNTL_GET_FSIZE: - ret = PVFS_sys_getattr(pvfs_fs->object_ref, PVFS_ATTR_SYS_SIZE, + ret = PVFS_sys_getattr(pvfs_fs->object_ref, PVFS_ATTR_SYS_SIZE, &(pvfs_fs->credentials), &resp_getattr); if (ret != 0 ) { /* --BEGIN ERROR HANDLING-- */ @@ -54,6 +54,6 @@ void ADIOI_PVFS2_Fcntl(ADIO_File fd, int flag, ADIO_Fcntl_t *fcntl_struct, } } -/* - * vim: ts=8 sts=4 sw=4 noexpandtab +/* + * vim: ts=8 sts=4 sw=4 noexpandtab */ diff --git a/ompi/mca/io/romio314/romio/adio/ad_pvfs2/ad_pvfs2_flush.c b/ompi/mca/io/romio314/romio/adio/ad_pvfs2/ad_pvfs2_flush.c index 18061084a85..a82cc54e600 100644 --- a/ompi/mca/io/romio314/romio/adio/ad_pvfs2/ad_pvfs2_flush.c +++ b/ompi/mca/io/romio314/romio/adio/ad_pvfs2/ad_pvfs2_flush.c @@ -1,7 +1,7 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */ -/* +/* * - * Copyright (C) 1997 University of Chicago. + * Copyright (C) 1997 University of Chicago. * See COPYRIGHT notice in top-level directory. */ @@ -12,12 +12,12 @@ * flush request, it will stress the PVFS2 servers with redundant * PVFS_sys_flush requests. Instead, one process should wait for * everyone to catch up, do the sync, then broadcast the result. We can - * get away with this thanks to PVFS2's stateless design + * get away with this thanks to PVFS2's stateless design */ -void ADIOI_PVFS2_Flush(ADIO_File fd, int *error_code) -{ - int ret, rank, dummy=0, dummy_in=0; +void ADIOI_PVFS2_Flush(ADIO_File fd, int *error_code) +{ + int ret, rank, dummy=0, dummy_in=0; ADIOI_PVFS2_fs *pvfs_fs; static char myname[] = "ADIOI_PVFS2_FLUSH"; @@ -30,7 +30,7 @@ void ADIOI_PVFS2_Flush(ADIO_File fd, int *error_code) /* unlike ADIOI_PVFS2_Resize, MPI_File_sync() does not perform any * syncronization */ - MPI_Reduce(&dummy_in, &dummy, 1, MPI_INT, MPI_SUM, + MPI_Reduce(&dummy_in, &dummy, 1, MPI_INT, MPI_SUM, fd->hints->ranklist[0], fd->comm); /* io_worker computed in ADIO_Open */ @@ -50,6 +50,6 @@ void ADIOI_PVFS2_Flush(ADIO_File fd, int *error_code) /* --END ERROR HANDLING-- */ } -/* - * vim: ts=8 sts=4 sw=4 noexpandtab +/* + * vim: ts=8 sts=4 sw=4 noexpandtab */ diff --git a/ompi/mca/io/romio314/romio/adio/ad_pvfs2/ad_pvfs2_hints.c b/ompi/mca/io/romio314/romio/adio/ad_pvfs2/ad_pvfs2_hints.c index 47522064e30..5961726e2be 100644 --- a/ompi/mca/io/romio314/romio/adio/ad_pvfs2/ad_pvfs2_hints.c +++ b/ompi/mca/io/romio314/romio/adio/ad_pvfs2/ad_pvfs2_hints.c @@ -1,7 +1,7 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */ -/* +/* * - * Copyright (C) 1997 University of Chicago. + * Copyright (C) 1997 University of Chicago. * See COPYRIGHT notice in top-level directory. */ @@ -44,15 +44,15 @@ void ADIOI_PVFS2_SetInfo(ADIO_File fd, MPI_Info users_info, int *error_code) fd->hints->fs_hints.pvfs2.listio_read = ADIOI_HINT_DISABLE; fd->hints->fs_hints.pvfs2.listio_write = ADIOI_HINT_DISABLE; - + /* any user-provided hints? */ if (users_info != MPI_INFO_NULL) { value = (char *) ADIOI_Malloc( (MPI_MAX_INFO_VAL+1)*sizeof(char)); /* pvfs2 debugging */ - ADIOI_Info_get(users_info, "romio_pvfs2_debugmask", + ADIOI_Info_get(users_info, "romio_pvfs2_debugmask", MPI_MAX_INFO_VAL, value, &flag); if (flag) { - tmp_value = fd->hints->fs_hints.pvfs2.debugmask = + tmp_value = fd->hints->fs_hints.pvfs2.debugmask = PVFS_debug_eventlog_to_mask(value); MPI_Bcast(&tmp_value, 1, MPI_INT, 0, fd->comm); @@ -64,7 +64,7 @@ void ADIOI_PVFS2_SetInfo(ADIO_File fd, MPI_Info users_info, int *error_code) return; } /* --END ERROR HANDLING-- */ - + ADIOI_Info_set(fd->info, "romio_pvfs2_debugmask", value); } diff --git a/ompi/mca/io/romio314/romio/adio/ad_pvfs2/ad_pvfs2_io.h b/ompi/mca/io/romio314/romio/adio/ad_pvfs2/ad_pvfs2_io.h index aefe0653ecb..c9a394f2042 100644 --- a/ompi/mca/io/romio314/romio/adio/ad_pvfs2/ad_pvfs2_io.h +++ b/ompi/mca/io/romio314/romio/adio/ad_pvfs2/ad_pvfs2_io.h @@ -1,7 +1,7 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- * vim: ts=8 sts=4 sw=4 noexpandtab * - * Copyright (C) 2006 University of Chicago. + * Copyright (C) 2006 University of Chicago. * See COPYRIGHT notice in top-level directory. */ @@ -60,7 +60,7 @@ void print_buf_file_ol_pairs(int64_t buf_off_arr[], /* Datatype I/O helper prototypes */ int ADIOI_PVFS2_StridedDtypeIO(ADIO_File fd, void *buf, int count, MPI_Datatype datatype, int file_ptr_type, - ADIO_Offset offset, ADIO_Status *status, + ADIO_Offset offset, ADIO_Status *status, int *error_code, int rw_type); int convert_named(MPI_Datatype *mpi_dtype, diff --git a/ompi/mca/io/romio314/romio/adio/ad_pvfs2/ad_pvfs2_io_dtype.c b/ompi/mca/io/romio314/romio/adio/ad_pvfs2/ad_pvfs2_io_dtype.c index 09012523b8a..c8046b36897 100644 --- a/ompi/mca/io/romio314/romio/adio/ad_pvfs2/ad_pvfs2_io_dtype.c +++ b/ompi/mca/io/romio314/romio/adio/ad_pvfs2/ad_pvfs2_io_dtype.c @@ -1,7 +1,7 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- * vim: ts=8 sts=4 sw=4 noexpandtab * - * Copyright (C) 2006 University of Chicago. + * Copyright (C) 2006 University of Chicago. * See COPYRIGHT notice in top-level directory. */ @@ -45,9 +45,9 @@ int ADIOI_PVFS2_StridedDtypeIO(ADIO_File fd, void *buf, int count, ADIOI_Datatype_iscontig(fd->filetype, &filetype_is_contig); /* changed below if error */ - *error_code = MPI_SUCCESS; + *error_code = MPI_SUCCESS; - /* datatype is the memory type + /* datatype is the memory type * fd->filetype is the file type */ MPI_Type_size_x(fd->filetype, &filetype_size); if (filetype_size == 0) { @@ -70,34 +70,34 @@ int ADIOI_PVFS2_StridedDtypeIO(ADIO_File fd, void *buf, int count, * case is handled by using fd->disp and byte-converted off. */ pvfs_disp = fd->disp; - if (file_ptr_type == ADIO_INDIVIDUAL) + if (file_ptr_type == ADIO_INDIVIDUAL) { - if (filetype_is_contig) + if (filetype_is_contig) { off = fd->fp_ind - fd->disp; } - else + else { int flag = 0; /* Should have already been flattened in ADIO_Open*/ - while (flat_file_p->type != fd->filetype) + while (flat_file_p->type != fd->filetype) { flat_file_p = flat_file_p->next; } num_filetypes = -1; - while (!flag) + while (!flag) { num_filetypes++; - for (i = 0; i < flat_file_p->count; i++) + for (i = 0; i < flat_file_p->count; i++) { /* Start on a non zero-length region */ - if (flat_file_p->blocklens[i]) + if (flat_file_p->blocklens[i]) { if (fd->disp + flat_file_p->indices[i] + (num_filetypes * filetype_extent) + flat_file_p->blocklens[i] > fd->fp_ind && - fd->disp + flat_file_p->indices[i] <= - fd->fp_ind) + fd->disp + flat_file_p->indices[i] <= + fd->fp_ind) { cur_flat_file_reg_off = fd->fp_ind - (fd->disp + flat_file_p->indices[i] + @@ -116,7 +116,7 @@ int ADIOI_PVFS2_StridedDtypeIO(ADIO_File fd, void *buf, int count, } } else /* ADIO_EXPLICIT */ - { + { off = etype_size * offset; } @@ -144,7 +144,7 @@ int ADIOI_PVFS2_StridedDtypeIO(ADIO_File fd, void *buf, int count, if (ret != 0) /* TODO: convert this to MPIO error handling */ fprintf(stderr, "ADIOI_PVFS2_stridedDtypeIO: error in final" " CONTIG memory type\n"); - PVFS_Request_free(&tmp_mem_req); + PVFS_Request_free(&tmp_mem_req); /* pvfs_disp is used to offset the filetype */ ret = PVFS_Request_hindexed(1, &pvfs_blk, &pvfs_disp, @@ -163,7 +163,7 @@ int ADIOI_PVFS2_StridedDtypeIO(ADIO_File fd, void *buf, int count, if (ret != 0) { fprintf(stderr, "ADIOI_PVFS2_StridedDtypeIO: Warning - PVFS_sys_" - "read/write returned %d and completed %Ld bytes.\n", + "read/write returned %d and completed %Ld bytes.\n", ret, (long long)resp_io.total_completed); *error_code = MPIO_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE, @@ -177,40 +177,40 @@ int ADIOI_PVFS2_StridedDtypeIO(ADIO_File fd, void *buf, int count, { fd->fp_ind = off += resp_io.total_completed; } - + error_state: fd->fp_sys_posn = -1; /* set it to null. */ PVFS_Request_free(&mem_req); - PVFS_Request_free(&file_req); + PVFS_Request_free(&file_req); #ifdef DEBUG_DTYPE fprintf(stderr, "ADIOI_PVFS2_StridedDtypeIO: " - "resp_io.total_completed=%Ld,ret=%d\n", + "resp_io.total_completed=%Ld,ret=%d\n", resp_io.total_completed, ret); #endif #ifdef HAVE_STATUS_SET_BYTES MPIR_Status_set_bytes(status, datatype, resp_io.total_completed); /* This is a temporary way of filling in status. The right way is to - * keep track of how much data was actually acccessed by + * keep track of how much data was actually acccessed by * ADIOI_BUFFERED operations */ #endif return ret; } /* convert_mpi_pvfs2_dtype - Convert a MPI datatype into - * a PVFS2 datatype so that we can natively use the PVFS2 - * datatypes in the PVFS2 I/O calls instead of converting - * all datatypes to the hindexed method + * a PVFS2 datatype so that we can natively use the PVFS2 + * datatypes in the PVFS2 I/O calls instead of converting + * all datatypes to the hindexed method * return 1 - a leaf node - * return 0 - normal return + * return 0 - normal return * return -1 - problems */ -int convert_mpi_pvfs2_dtype(MPI_Datatype *mpi_dtype, +int convert_mpi_pvfs2_dtype(MPI_Datatype *mpi_dtype, PVFS_Request *pvfs_dtype) { - int num_int = -1, num_addr = -1, num_dtype = -1, + int num_int = -1, num_addr = -1, num_dtype = -1, combiner = -1, i = -1, ret = -1, leaf = -1; int *arr_int = NULL; MPI_Aint *arr_addr = NULL; @@ -227,16 +227,16 @@ int convert_mpi_pvfs2_dtype(MPI_Datatype *mpi_dtype, &num_dtype, &combiner); - /* Depending on type of datatype do the following + /* Depending on type of datatype do the following * operations */ - + if (combiner == MPI_COMBINER_NAMED) { convert_named(mpi_dtype, pvfs_dtype, combiner); return 1; } - /* Allocate space for the arrays necessary for + /* Allocate space for the arrays necessary for * MPI_Type_get_contents */ if ((arr_int = ADIOI_Malloc(sizeof(int)*num_int)) == NULL) @@ -266,7 +266,7 @@ int convert_mpi_pvfs2_dtype(MPI_Datatype *mpi_dtype, arr_addr, arr_dtype); - /* If it's not a predefined datatype, it is either a + /* If it's not a predefined datatype, it is either a * derived datatype or a structured datatype */ if (combiner != MPI_COMBINER_STRUCT) @@ -278,28 +278,28 @@ int convert_mpi_pvfs2_dtype(MPI_Datatype *mpi_dtype, { case MPI_COMBINER_CONTIGUOUS: leaf = convert_mpi_pvfs2_dtype(&arr_dtype[0], old_pvfs_dtype); - ret = PVFS_Request_contiguous(arr_int[0], + ret = PVFS_Request_contiguous(arr_int[0], *old_pvfs_dtype, pvfs_dtype); break; case MPI_COMBINER_VECTOR: leaf = convert_mpi_pvfs2_dtype(&arr_dtype[0], old_pvfs_dtype); ret = PVFS_Request_vector(arr_int[0], arr_int[1], - arr_int[2], *old_pvfs_dtype, + arr_int[2], *old_pvfs_dtype, pvfs_dtype); break; case MPI_COMBINER_HVECTOR: leaf = convert_mpi_pvfs2_dtype(&arr_dtype[0], old_pvfs_dtype); ret = PVFS_Request_hvector(arr_int[0], arr_int[1], - arr_addr[0], *old_pvfs_dtype, + arr_addr[0], *old_pvfs_dtype, pvfs_dtype); break; - /* Both INDEXED and HINDEXED types require PVFS_size - * address arrays. Therefore, we need to copy and - * convert the data from MPI_get_contents() into + /* Both INDEXED and HINDEXED types require PVFS_size + * address arrays. Therefore, we need to copy and + * convert the data from MPI_get_contents() into * a PVFS_size buffer */ case MPI_COMBINER_INDEXED: leaf = convert_mpi_pvfs2_dtype(&arr_dtype[0], old_pvfs_dtype); - if ((pvfs_arr_disp = + if ((pvfs_arr_disp = ADIOI_Malloc(arr_int[0]*sizeof(PVFS_size))) == 0) { fprintf(stderr, "convert_mpi_pvfs2_dtype: " @@ -307,17 +307,17 @@ int convert_mpi_pvfs2_dtype(MPI_Datatype *mpi_dtype, } for (i = 0; i < arr_int[0]; i++) { - pvfs_arr_disp[i] = + pvfs_arr_disp[i] = (PVFS_size) arr_int[arr_int[0]+1+i]; } - ret = PVFS_Request_indexed(arr_int[0], &arr_int[1], + ret = PVFS_Request_indexed(arr_int[0], &arr_int[1], pvfs_arr_disp, *old_pvfs_dtype, pvfs_dtype); ADIOI_Free(pvfs_arr_disp); break; case MPI_COMBINER_HINDEXED: leaf = convert_mpi_pvfs2_dtype(&arr_dtype[0], old_pvfs_dtype); - if ((pvfs_arr_disp = + if ((pvfs_arr_disp = ADIOI_Malloc(arr_int[0]*sizeof(PVFS_size))) == 0) { fprintf(stderr, "convert_mpi_pvfs2_dtype: " @@ -325,17 +325,17 @@ int convert_mpi_pvfs2_dtype(MPI_Datatype *mpi_dtype, } for (i = 0; i < arr_int[0]; i++) { - pvfs_arr_disp[i] = + pvfs_arr_disp[i] = (PVFS_size) arr_addr[i]; } - ret = PVFS_Request_hindexed(arr_int[0], &arr_int[1], + ret = PVFS_Request_hindexed(arr_int[0], &arr_int[1], (int64_t *)&arr_addr[0], *old_pvfs_dtype, pvfs_dtype); - ADIOI_Free(pvfs_arr_disp); + ADIOI_Free(pvfs_arr_disp); break; case MPI_COMBINER_DUP: leaf = convert_mpi_pvfs2_dtype(&arr_dtype[0], old_pvfs_dtype); - ret = PVFS_Request_contiguous(1, + ret = PVFS_Request_contiguous(1, *old_pvfs_dtype, pvfs_dtype); break; @@ -343,7 +343,7 @@ int convert_mpi_pvfs2_dtype(MPI_Datatype *mpi_dtype, /* No native PVFS2 support for this operation currently */ ADIOI_Free(old_pvfs_dtype); fprintf(stderr, "convert_mpi_pvfs2_dtype: " - "INDEXED_BLOCK is unsupported\n"); + "INDEXED_BLOCK is unsupported\n"); break; case MPI_COMBINER_HINDEXED_BLOCK: /* No native PVFS2 support for this operation currently */ @@ -354,42 +354,42 @@ int convert_mpi_pvfs2_dtype(MPI_Datatype *mpi_dtype, case MPI_COMBINER_HINDEXED_INTEGER: ADIOI_Free(old_pvfs_dtype); fprintf(stderr, "convert_mpi_pvfs2_dtype: " - "HINDEXED_INTEGER is unsupported\n"); + "HINDEXED_INTEGER is unsupported\n"); break; case MPI_COMBINER_STRUCT_INTEGER: ADIOI_Free(old_pvfs_dtype); fprintf(stderr, "convert_mpi_pvfs2_dtype: " - "STRUCT_INTEGER is unsupported\n"); + "STRUCT_INTEGER is unsupported\n"); break; case MPI_COMBINER_SUBARRAY: ADIOI_Free(old_pvfs_dtype); fprintf(stderr, "convert_mpi_pvfs2_dtype: " - "SUBARRAY is unsupported\n"); + "SUBARRAY is unsupported\n"); break; case MPI_COMBINER_DARRAY: ADIOI_Free(old_pvfs_dtype); fprintf(stderr, "convert_mpi_pvfs2_dtype: " - "DARRAY is unsupported\n"); + "DARRAY is unsupported\n"); break; case MPI_COMBINER_F90_REAL: ADIOI_Free(old_pvfs_dtype); fprintf(stderr, "convert_mpi_pvfs2_dtype: " - "F90_REAL is unsupported\n"); + "F90_REAL is unsupported\n"); break; case MPI_COMBINER_F90_COMPLEX: ADIOI_Free(old_pvfs_dtype); fprintf(stderr, "convert_mpi_pvfs2_dtype: " - "F90_COMPLEX is unsupported\n"); + "F90_COMPLEX is unsupported\n"); break; case MPI_COMBINER_F90_INTEGER: ADIOI_Free(old_pvfs_dtype); fprintf(stderr, "convert_mpi_pvfs2_dtype: " - "F90_INTEGER is unsupported\n"); + "F90_INTEGER is unsupported\n"); break; case MPI_COMBINER_RESIZED: ADIOI_Free(old_pvfs_dtype); fprintf(stderr, "convert_mpi_pvfs2_dtype: " - "RESIZED is unsupported\n"); + "RESIZED is unsupported\n"); break; default: break; @@ -400,7 +400,7 @@ int convert_mpi_pvfs2_dtype(MPI_Datatype *mpi_dtype, "for a derived datatype\n"); #ifdef DEBUG_DTYPE - print_dtype_info(combiner, + print_dtype_info(combiner, num_int, num_addr, num_dtype, @@ -418,7 +418,7 @@ int convert_mpi_pvfs2_dtype(MPI_Datatype *mpi_dtype, PVFS_Request_free(old_pvfs_dtype); ADIOI_Free(old_pvfs_dtype); - + return ret; } else /* MPI_COMBINER_STRUCT */ @@ -430,9 +430,9 @@ int convert_mpi_pvfs2_dtype(MPI_Datatype *mpi_dtype, /* When converting into a PVFS_Request_struct, we no longer * can use MPI_LB and MPI_UB. Therfore, we have to do the - * following. - * We simply ignore all the MPI_LB and MPI_UB types and - * get the lb and extent and pass it on through a + * following. + * We simply ignore all the MPI_LB and MPI_UB types and + * get the lb and extent and pass it on through a * PVFS resized_req */ arr_count = 0; @@ -450,7 +450,7 @@ int convert_mpi_pvfs2_dtype(MPI_Datatype *mpi_dtype, MPI_Type_get_extent(*mpi_dtype, &mpi_lb, &mpi_extent); pvfs_lb = mpi_lb; pvfs_extent = mpi_extent; - if ((pvfs_arr_len = ADIOI_Malloc(arr_count*sizeof(int))) + if ((pvfs_arr_len = ADIOI_Malloc(arr_count*sizeof(int))) == NULL) { fprintf(stderr, "convert_mpi_pvfs2_dtype: " @@ -464,7 +464,7 @@ int convert_mpi_pvfs2_dtype(MPI_Datatype *mpi_dtype, fprintf(stderr, "convert_mpi_pvfs2_dtype: " "Failed to allocate PVFS_Requests\n"); - if ((pvfs_arr_disp = ADIOI_Malloc(arr_count*sizeof(PVFS_size))) + if ((pvfs_arr_disp = ADIOI_Malloc(arr_count*sizeof(PVFS_size))) == NULL) { fprintf(stderr, "convert_mpi_pvfs2_dtype: " @@ -480,12 +480,12 @@ int convert_mpi_pvfs2_dtype(MPI_Datatype *mpi_dtype, leaf = convert_mpi_pvfs2_dtype( &arr_dtype[i], &old_pvfs_dtype_arr[arr_count]); if (leaf != 1) - MPI_Type_free(&arr_dtype[i]); - pvfs_arr_disp[arr_count] = + MPI_Type_free(&arr_dtype[i]); + pvfs_arr_disp[arr_count] = (PVFS_size) arr_addr[i]; if (has_lb_ub) { - pvfs_arr_len[arr_count] = + pvfs_arr_len[arr_count] = arr_int[i+1]; } arr_count++; @@ -500,8 +500,8 @@ int convert_mpi_pvfs2_dtype(MPI_Datatype *mpi_dtype, if ((tmp_pvfs_dtype = ADIOI_Malloc(sizeof(PVFS_Request))) == NULL) fprintf(stderr, "convert_mpi_pvfs2_dtype: " "Failed to allocate PVFS_Request\n"); - - ret = PVFS_Request_struct(arr_count, pvfs_arr_len, + + ret = PVFS_Request_struct(arr_count, pvfs_arr_len, pvfs_arr_disp, old_pvfs_dtype_arr, tmp_pvfs_dtype); if (ret != 0) @@ -517,7 +517,7 @@ int convert_mpi_pvfs2_dtype(MPI_Datatype *mpi_dtype, arr_count++; } } - + #ifdef DEBUG_DTYPE fprintf(stderr, "STRUCT(WITHOUT %d LB or UB)(%d,[", arr_int[0] - arr_count, arr_count); @@ -528,8 +528,8 @@ int convert_mpi_pvfs2_dtype(MPI_Datatype *mpi_dtype, fprintf(stderr, "]\n"); fprintf(stderr, "RESIZED(LB = %Ld, EXTENT = %Ld)\n", pvfs_lb, pvfs_extent); -#endif - ret = PVFS_Request_resized(*tmp_pvfs_dtype, +#endif + ret = PVFS_Request_resized(*tmp_pvfs_dtype, pvfs_lb, pvfs_extent, pvfs_dtype); if (ret != 0) fprintf(stderr, "Error in PVFS_Request_resize\n"); @@ -539,7 +539,7 @@ int convert_mpi_pvfs2_dtype(MPI_Datatype *mpi_dtype, } else /* No MPI_LB or MPI_UB datatypes */ { - ret = PVFS_Request_struct(arr_int[0], &arr_int[1], + ret = PVFS_Request_struct(arr_int[0], &arr_int[1], pvfs_arr_disp, old_pvfs_dtype_arr, pvfs_dtype); if (ret != 0) @@ -553,14 +553,14 @@ int convert_mpi_pvfs2_dtype(MPI_Datatype *mpi_dtype, } #ifdef DEBUG_DTYPE - print_dtype_info(combiner, + print_dtype_info(combiner, num_int, num_addr, num_dtype, arr_int, arr_addr, arr_dtype); -#endif +#endif } ADIOI_Free(arr_int); @@ -579,9 +579,9 @@ int convert_mpi_pvfs2_dtype(MPI_Datatype *mpi_dtype, return -1; } -int convert_named(MPI_Datatype *mpi_dtype, +int convert_named(MPI_Datatype *mpi_dtype, PVFS_Request *pvfs_dtype, int combiner) -{ +{ int ret = -1; #ifdef DEBUG_DTYPE fprintf(stderr, "NAMED"); @@ -681,7 +681,7 @@ int convert_named(MPI_Datatype *mpi_dtype, return ret; } -void print_dtype_info(int combiner, +void print_dtype_info(int combiner, int num_int, int num_addr, int num_dtype, @@ -696,7 +696,7 @@ void print_dtype_info(int combiner, fprintf(stderr, "CONTIG(%d)\n", arr_int[0]); break; case MPI_COMBINER_VECTOR: - fprintf(stderr, "VECTOR(%d,%d,%d)\n", + fprintf(stderr, "VECTOR(%d,%d,%d)\n", arr_int[0], arr_int[1], arr_int[2]); break; case MPI_COMBINER_HVECTOR: diff --git a/ompi/mca/io/romio314/romio/adio/ad_pvfs2/ad_pvfs2_io_list.c b/ompi/mca/io/romio314/romio/adio/ad_pvfs2/ad_pvfs2_io_list.c index c5d03d151c9..014ec658a8f 100644 --- a/ompi/mca/io/romio314/romio/adio/ad_pvfs2/ad_pvfs2_io_list.c +++ b/ompi/mca/io/romio314/romio/adio/ad_pvfs2/ad_pvfs2_io_list.c @@ -1,4 +1,4 @@ -/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- * vim: ts=8 sts=4 sw=4 noexpandtab * * Copyright (C) 2006 Unknown (TODO: fix this) @@ -27,7 +27,7 @@ int ADIOI_PVFS2_StridedListIO(ADIO_File fd, void *buf, int count, int etypes_in_filetype = -1, size_in_filetype = -1; int bytes_into_filetype = 0; MPI_Offset total_bytes_accessed = 0; - + /* parameters for offset-length pairs arrays */ int64_t buf_off_arr[MAX_OL_COUNT]; int32_t buf_len_arr[MAX_OL_COUNT]; @@ -35,7 +35,7 @@ int ADIOI_PVFS2_StridedListIO(ADIO_File fd, void *buf, int count, int32_t file_len_arr[MAX_OL_COUNT]; int32_t buf_ol_count = 0; int32_t file_ol_count = 0; - + /* parameters for flattened memory and file datatypes*/ int flat_buf_index = 0; int flat_file_index = 0; @@ -45,7 +45,7 @@ int ADIOI_PVFS2_StridedListIO(ADIO_File fd, void *buf, int count, MPI_Count buftype_size = -1, filetype_size = -1; MPI_Aint filetype_extent = -1, buftype_extent = -1;; int buftype_is_contig = -1, filetype_is_contig = -1; - + /* PVFS2 specific parameters */ PVFS_Request mem_req, file_req; ADIOI_PVFS2_fs * pvfs_fs; @@ -73,7 +73,7 @@ int ADIOI_PVFS2_StridedListIO(ADIO_File fd, void *buf, int count, io_size = buftype_size*count; pvfs_fs = (ADIOI_PVFS2_fs*)fd->fs_ptr; - + /* Flatten the memory datatype * (file datatype has already been flattened in ADIO open * unless it is contibuous, then we need to flatten it manually) @@ -84,18 +84,18 @@ int ADIOI_PVFS2_StridedListIO(ADIO_File fd, void *buf, int count, { ADIOI_Flatten_datatype(datatype); flat_buf_p = ADIOI_Flatlist; - while (flat_buf_p->type != datatype) + while (flat_buf_p->type != datatype) flat_buf_p = flat_buf_p->next; } - else + else { /* flatten and add to the list */ flat_buf_p = (ADIOI_Flatlist_node *) ADIOI_Malloc (sizeof(ADIOI_Flatlist_node)); flat_buf_p->blocklens = (ADIO_Offset*)ADIOI_Malloc(sizeof(ADIO_Offset)); - flat_buf_p->indices = + flat_buf_p->indices = (ADIO_Offset *) ADIOI_Malloc(sizeof(ADIO_Offset)); - /* For the buffer, we can optimize the buftype, this is not + /* For the buffer, we can optimize the buftype, this is not * possible with the filetype since it is tiled */ buftype_size = buftype_size*count; buftype_extent = buftype_size*count; @@ -110,7 +110,7 @@ int ADIOI_PVFS2_StridedListIO(ADIO_File fd, void *buf, int count, * flattened */ ADIOI_Flatten_datatype(fd->filetype); flat_file_p = ADIOI_Flatlist; - while (flat_file_p->type != fd->filetype) + while (flat_file_p->type != fd->filetype) flat_file_p = flat_file_p->next; } else @@ -119,18 +119,18 @@ int ADIOI_PVFS2_StridedListIO(ADIO_File fd, void *buf, int count, flat_file_p = (ADIOI_Flatlist_node *) ADIOI_Malloc (sizeof(ADIOI_Flatlist_node)); flat_file_p->blocklens =(ADIO_Offset*)ADIOI_Malloc(sizeof(ADIO_Offset)); - flat_file_p->indices = + flat_file_p->indices = (ADIO_Offset *) ADIOI_Malloc(sizeof(ADIO_Offset)); flat_file_p->blocklens[0] = filetype_size; flat_file_p->indices[0] = 0; flat_file_p->count = 1; } - - /* Find out where we are in the flattened filetype (the block index, + + /* Find out where we are in the flattened filetype (the block index, * how far into the block, and how many bytes_into_filetype) - * If the file_ptr_type == ADIO_INDIVIDUAL we will use disp, fp_ind - * to figure this out (offset should always be zero) - * If file_ptr_type == ADIO_EXPLICIT, we will use disp and offset + * If the file_ptr_type == ADIO_INDIVIDUAL we will use disp, fp_ind + * to figure this out (offset should always be zero) + * If file_ptr_type == ADIO_EXPLICIT, we will use disp and offset * to figure this out. */ etype_size = fd->etype_size; @@ -170,7 +170,7 @@ int ADIOI_PVFS2_StridedListIO(ADIO_File fd, void *buf, int count, assert(i != flat_file_p->count); } else - { + { num_filetypes = (int) (offset / num_etypes_in_filetype); etypes_in_filetype = (int) (offset % num_etypes_in_filetype); size_in_filetype = etypes_in_filetype * etype_size; @@ -178,10 +178,10 @@ int ADIOI_PVFS2_StridedListIO(ADIO_File fd, void *buf, int count, tmp_filetype_size = 0; for (i=0; icount; i++) { tmp_filetype_size += flat_file_p->blocklens[i]; - if (tmp_filetype_size > size_in_filetype) + if (tmp_filetype_size > size_in_filetype) { flat_file_index = i; - cur_flat_file_reg_off = flat_file_p->blocklens[i] - + cur_flat_file_reg_off = flat_file_p->blocklens[i] - (tmp_filetype_size - size_in_filetype); bytes_into_filetype = offset * filetype_size - flat_file_p->blocklens[i]; @@ -193,7 +193,7 @@ int ADIOI_PVFS2_StridedListIO(ADIO_File fd, void *buf, int count, fprintf(stderr, "ADIOI_PVFS2_StridedListIO: (fd->fp_ind=%Ld,fd->disp=%Ld," " offset=%Ld)\n(flat_file_index=%d,cur_flat_file_reg_off=%Ld," "bytes_into_filetype=%d)\n", - fd->fp_ind, fd->disp, offset, flat_file_index, + fd->fp_ind, fd->disp, offset, flat_file_index, cur_flat_file_reg_off, bytes_into_filetype); #endif #ifdef DEBUG_LIST2 @@ -207,13 +207,13 @@ int ADIOI_PVFS2_StridedListIO(ADIO_File fd, void *buf, int count, fprintf(stderr, "(offset, length) = (%Ld, %d)\n", flat_file_p->indices[i], flat_file_p->blocklens[i]); -#endif +#endif /* total data written */ cur_io_size = 0; while (cur_io_size != io_size) { - /* Initialize the temporarily unrolling lists and + /* Initialize the temporarily unrolling lists and * and associated variables */ buf_ol_count = 0; file_ol_count = 0; @@ -275,7 +275,7 @@ int ADIOI_PVFS2_StridedListIO(ADIO_File fd, void *buf, int count, } } while (0); #endif - + /* Run list I/O operation */ ret = PVFS_Request_hindexed(buf_ol_count, buf_len_arr, buf_off_arr, PVFS_BYTE, &mem_req); @@ -285,16 +285,16 @@ int ADIOI_PVFS2_StridedListIO(ADIO_File fd, void *buf, int count, if (rw_type == READ) { ret = PVFS_sys_read(pvfs_fs->object_ref, file_req, 0, - buf, mem_req, + buf, mem_req, &(pvfs_fs->credentials), &resp_io); } - else + else { ret = PVFS_sys_write(pvfs_fs->object_ref, file_req, 0, - buf, mem_req, + buf, mem_req, &(pvfs_fs->credentials), &resp_io); } - if (ret != 0) + if (ret != 0) { fprintf(stderr, "ADIOI_PVFS2_StridedListIO: Warning - PVFS_sys_" "read/write returned %d and completed %lld bytes.\n", @@ -313,14 +313,14 @@ int ADIOI_PVFS2_StridedListIO(ADIO_File fd, void *buf, int count, PVFS_Request_free(&mem_req); PVFS_Request_free(&file_req); } - + #ifdef DEBUG_LIST fprintf(stderr, "ADIOI_PVFS2_StridedListIO: " - "total_bytes_accessed=%Ld,ret=%d\n", + "total_bytes_accessed=%Ld,ret=%d\n", total_bytes_accessed, ret); #endif - if (file_ptr_type == ADIO_INDIVIDUAL) + if (file_ptr_type == ADIO_INDIVIDUAL) fd->fp_ind += total_bytes_accessed; *error_code = MPI_SUCCESS; @@ -380,11 +380,11 @@ int gen_listio_arr(ADIOI_Flatlist_node *flat_buf_p, int32_t *file_ol_count_p) { int region_size = -1; - + /* parameters for flattened memory and file datatypes*/ int64_t cur_flat_buf_reg_left = 0; int64_t cur_flat_file_reg_left = 0; - + #ifdef DEBUG_LIST2 fprintf(stderr, "gen_list_arr:\n"); #endif @@ -394,31 +394,31 @@ int gen_listio_arr(ADIOI_Flatlist_node *flat_buf_p, fprintf(stderr, "buf_ol_count != 0 || file_ol_count != 0\n"); return -1; } - - /* Start on a non-zero memory and file region - * Note this does not affect the bytes_completed - * since no data is in these regions. Initialize the + + /* Start on a non-zero memory and file region + * Note this does not affect the bytes_completed + * since no data is in these regions. Initialize the * first memory and file offsets. */ while (flat_buf_p->blocklens[(*flat_buf_index_p)] == 0) { - (*flat_buf_index_p) = ((*flat_buf_index_p) + 1) % + (*flat_buf_index_p) = ((*flat_buf_index_p) + 1) % flat_buf_p->count; } buf_off_arr[*buf_ol_count_p] = - (*bytes_completed / flat_buf_size) * - flat_buf_extent + + (*bytes_completed / flat_buf_size) * + flat_buf_extent + flat_buf_p->indices[*flat_buf_index_p] + *cur_flat_buf_reg_off_p; buf_len_arr[*buf_ol_count_p] = 0; while (flat_file_p->blocklens[(*flat_file_index_p)] == 0) { - (*flat_file_index_p) = ((*flat_file_index_p) + 1) % + (*flat_file_index_p) = ((*flat_file_index_p) + 1) % flat_file_p->count; } - file_off_arr[*file_ol_count_p] = disp + - (((bytes_into_filetype + *bytes_completed) / flat_file_size) * - flat_file_extent) + + file_off_arr[*file_ol_count_p] = disp + + (((bytes_into_filetype + *bytes_completed) / flat_file_size) * + flat_file_extent) + flat_file_p->indices[*flat_file_index_p] + *cur_flat_file_reg_off_p; file_len_arr[*file_ol_count_p] = 0; @@ -442,21 +442,21 @@ int gen_listio_arr(ADIOI_Flatlist_node *flat_buf_p, - *cur_flat_file_reg_off_p; #ifdef DEBUG_LIST2 - fprintf(stderr, + fprintf(stderr, "flat_buf_index=%d flat_buf->blocklens[%d]=%d\n" "cur_flat_buf_reg_left=%Ld " - "*cur_flat_buf_reg_off_p=%Ld\n" + "*cur_flat_buf_reg_off_p=%Ld\n" "flat_file_index=%d flat_file->blocklens[%d]=%d\n" "cur_flat_file_reg_left=%Ld " - "*cur_flat_file_reg_off_p=%Ld\n" + "*cur_flat_file_reg_off_p=%Ld\n" "bytes_completed=%Ld\n" "buf_ol_count=%d file_ol_count=%d\n" "buf_len_arr[%d]=%d file_len_arr[%d]=%d\n\n", - *flat_buf_index_p, *flat_buf_index_p, + *flat_buf_index_p, *flat_buf_index_p, flat_buf_p->blocklens[*flat_buf_index_p], cur_flat_buf_reg_left, *cur_flat_buf_reg_off_p, - *flat_file_index_p, *flat_file_index_p, + *flat_file_index_p, *flat_file_index_p, flat_file_p->blocklens[*flat_file_index_p], cur_flat_file_reg_left, *cur_flat_file_reg_off_p, @@ -475,26 +475,26 @@ int gen_listio_arr(ADIOI_Flatlist_node *flat_buf_p, region_size = cur_flat_file_reg_left; else region_size = cur_flat_buf_reg_left; - + if (region_size > total_io_size - *bytes_completed) region_size = total_io_size - *bytes_completed; - + /* Add this piece to both the mem and file arrays - * coalescing offset-length pairs if possible and advance + * coalescing offset-length pairs if possible and advance * the pointers through the flatten mem and file datatypes - * as well Note: no more than a single piece can be done + * as well Note: no more than a single piece can be done * since we take the smallest one possible */ - + if (cur_flat_buf_reg_left == region_size) { #ifdef DEBUG_LIST2 fprintf(stderr, "reached end of memory block...\n"); #endif - (*flat_buf_index_p) = ((*flat_buf_index_p) + 1) % + (*flat_buf_index_p) = ((*flat_buf_index_p) + 1) % flat_buf_p->count; while (flat_buf_p->blocklens[(*flat_buf_index_p)] == 0) { - (*flat_buf_index_p) = ((*flat_buf_index_p) + 1) % + (*flat_buf_index_p) = ((*flat_buf_index_p) + 1) % flat_buf_p->count; } *cur_flat_buf_reg_off_p = 0; @@ -524,13 +524,13 @@ int gen_listio_arr(ADIOI_Flatlist_node *flat_buf_p, } #endif - /* Don't prepare for the next piece if we have reached + /* Don't prepare for the next piece if we have reached * the limit or else it will segment fault. */ if ((*buf_ol_count_p) != max_ol_count) { - buf_off_arr[*buf_ol_count_p] = - ((*bytes_completed + region_size) / flat_buf_size) * - flat_buf_extent + + buf_off_arr[*buf_ol_count_p] = + ((*bytes_completed + region_size) / flat_buf_size) * + flat_buf_extent + flat_buf_p->indices[*flat_buf_index_p] + (*cur_flat_buf_reg_off_p); buf_len_arr[*buf_ol_count_p] = 0; @@ -549,8 +549,8 @@ int gen_listio_arr(ADIOI_Flatlist_node *flat_buf_p, { fprintf(stderr, "gen_listio_arr: Error\n"); } - - /* To calculate the absolute file offset we need to + + /* To calculate the absolute file offset we need to * add the disp, how many filetypes we have gone through, * the relative block offset in the filetype and how far * into the block we have gone. */ @@ -559,11 +559,11 @@ int gen_listio_arr(ADIOI_Flatlist_node *flat_buf_p, #ifdef DEBUG_LIST2 fprintf(stderr, "reached end of file block...\n"); #endif - (*flat_file_index_p) = ((*flat_file_index_p) + 1) % + (*flat_file_index_p) = ((*flat_file_index_p) + 1) % flat_file_p->count; while (flat_file_p->blocklens[(*flat_file_index_p)] == 0) { - (*flat_file_index_p) = ((*flat_file_index_p) + 1) % + (*flat_file_index_p) = ((*flat_file_index_p) + 1) % flat_file_p->count; } (*cur_flat_file_reg_off_p) = 0; @@ -597,10 +597,10 @@ int gen_listio_arr(ADIOI_Flatlist_node *flat_buf_p, * the limit or else it will segment fault. */ if ((*file_ol_count_p) != max_ol_count) { - file_off_arr[*file_ol_count_p] = disp + - (((bytes_into_filetype + *bytes_completed + region_size) - / flat_file_size) * - flat_file_extent) + + file_off_arr[*file_ol_count_p] = disp + + (((bytes_into_filetype + *bytes_completed + region_size) + / flat_file_size) * + flat_file_extent) + flat_file_p->indices[*flat_file_index_p] + (*cur_flat_file_reg_off_p); file_len_arr[*file_ol_count_p] = 0; @@ -620,12 +620,12 @@ int gen_listio_arr(ADIOI_Flatlist_node *flat_buf_p, fprintf(stderr, "gen_listio_arr: Error\n"); } #ifdef DEBUG_LIST2 - fprintf(stderr, + fprintf(stderr, "------------------------------\n\n"); #endif *bytes_completed += region_size; } - /* Increment the count if we stopped in the middle of a + /* Increment the count if we stopped in the middle of a * memory or file region */ if (*cur_flat_buf_reg_off_p != 0) (*buf_ol_count_p)++; @@ -645,7 +645,7 @@ void print_buf_file_ol_pairs(int64_t buf_off_arr[], int rw_type) { int i = -1; - + fprintf(stderr, "buf_ol_pairs(offset,length) count = %d\n", buf_ol_count); for (i = 0; i < buf_ol_count; i++) diff --git a/ompi/mca/io/romio314/romio/adio/ad_pvfs2/ad_pvfs2_open.c b/ompi/mca/io/romio314/romio/adio/ad_pvfs2/ad_pvfs2_open.c index c5d933f42b5..e14c40c9ac3 100644 --- a/ompi/mca/io/romio314/romio/adio/ad_pvfs2/ad_pvfs2_open.c +++ b/ompi/mca/io/romio314/romio/adio/ad_pvfs2/ad_pvfs2_open.c @@ -1,7 +1,7 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- * vim: ts=8 sts=4 sw=4 noexpandtab * - * Copyright (C) 1997 University of Chicago. + * Copyright (C) 1997 University of Chicago. * See COPYRIGHT notice in top-level directory. */ @@ -14,13 +14,13 @@ struct open_status_s { PVFS_object_ref object_ref; }; typedef struct open_status_s open_status; - + /* steps for getting a handle: (it gets a little convoluted, but at least - * it's deterministic) - * . lookup the file. + * it's deterministic) + * . lookup the file. * . if lookup succeeds, but we were passed MPI_MODE_EXCL, that's an error - * . if lookup fails, the file might not exist. - * in that case, create the file if we were passed MPI_MODE_CREATE + * . if lookup fails, the file might not exist. + * in that case, create the file if we were passed MPI_MODE_CREATE * . if the create fails, that means someone else created the file between * our call to lookup and our call to create (like if N processors all * open the same file with MPI_COMM_SELF). Then we can just look up the @@ -31,7 +31,7 @@ typedef struct open_status_s open_status; */ static void fake_an_open(PVFS_fs_id fs_id, char *pvfs_name, int access_mode, int nr_datafiles, PVFS_size strip_size, - ADIOI_PVFS2_fs *pvfs2_fs, + ADIOI_PVFS2_fs *pvfs2_fs, open_status *o_status) { int ret; @@ -48,7 +48,7 @@ static void fake_an_open(PVFS_fs_id fs_id, char *pvfs_name, int access_mode, } dist = NULL; - + memset(&resp_lookup, 0, sizeof(resp_lookup)); memset(&resp_getparent, 0, sizeof(resp_getparent)); memset(&resp_create, 0, sizeof(resp_create)); @@ -59,13 +59,13 @@ static void fake_an_open(PVFS_fs_id fs_id, char *pvfs_name, int access_mode, if ( ret == (-PVFS_ENOENT)) { if (access_mode & ADIO_CREATE) { ret = PVFS_sys_getparent(fs_id, pvfs_name, - &(pvfs2_fs->credentials), &resp_getparent); + &(pvfs2_fs->credentials), &resp_getparent); if (ret < 0) { FPRINTF(stderr, "pvfs_sys_getparent returns with %d\n", ret); o_status->error = ret; return; } - + /* Set the distribution strip size if specified */ if (0 < strip_size) { /* Note that the distribution is hardcoded here */ @@ -83,13 +83,13 @@ static void fake_an_open(PVFS_fs_id fs_id, char *pvfs_name, int access_mode, /* Perform file creation */ #ifdef HAVE_PVFS2_CREATE_WITHOUT_LAYOUT - ret = PVFS_sys_create(resp_getparent.basename, - resp_getparent.parent_ref, attribs, - &(pvfs2_fs->credentials), dist, &resp_create); -#else - ret = PVFS_sys_create(resp_getparent.basename, - resp_getparent.parent_ref, attribs, - &(pvfs2_fs->credentials), dist, NULL, &resp_create); + ret = PVFS_sys_create(resp_getparent.basename, + resp_getparent.parent_ref, attribs, + &(pvfs2_fs->credentials), dist, &resp_create); +#else + ret = PVFS_sys_create(resp_getparent.basename, + resp_getparent.parent_ref, attribs, + &(pvfs2_fs->credentials), dist, NULL, &resp_create); #endif /* if many creates are happening in this directory, the earlier @@ -99,7 +99,7 @@ static void fake_an_open(PVFS_fs_id fs_id, char *pvfs_name, int access_mode, * handle */ if (ret == (-PVFS_EEXIST)) { ret = PVFS_sys_lookup(fs_id, pvfs_name, - &(pvfs2_fs->credentials), &resp_lookup, + &(pvfs2_fs->credentials), &resp_lookup, PVFS2_LOOKUP_LINK_FOLLOW); if ( ret < 0 ) { o_status->error = ret; @@ -130,7 +130,7 @@ static void fake_an_open(PVFS_fs_id fs_id, char *pvfs_name, int access_mode, /* ADIOI_PVFS2_Open: * one process opens (or creates) the file, then broadcasts the result to the - * remaining processors. + * remaining processors. * * ADIO_Open used to perform an optimization when MPI_MODE_CREATE (and before * that, MPI_MODE_EXCL) was set. Because PVFS2 handles file lookup and @@ -154,7 +154,7 @@ void ADIOI_PVFS2_Open(ADIO_File fd, int *error_code) MPI_Datatype types[2] = {MPI_INT, MPI_BYTE}; int lens[2] = {1, sizeof(PVFS_object_ref)}; MPI_Aint offsets[2]; - + pvfs2_fs = (ADIOI_PVFS2_fs *) ADIOI_Malloc(sizeof(ADIOI_PVFS2_fs)); /* --BEGIN ERROR HANDLING-- */ @@ -186,7 +186,7 @@ void ADIOI_PVFS2_Open(ADIO_File fd, int *error_code) #endif if (rank == fd->hints->ranklist[0] && fd->fs_ptr == NULL) { /* given the filename, figure out which pvfs filesystem it is on */ - ret = PVFS_util_resolve(fd->filename, &cur_fs, + ret = PVFS_util_resolve(fd->filename, &cur_fs, pvfs_path, PVFS_NAME_MAX); if (ret < 0 ) { PVFS_perror("PVFS_util_resolve", ret); @@ -225,7 +225,7 @@ void ADIOI_PVFS2_Open(ADIO_File fd, int *error_code) /* --BEGIN ERROR HANDLING-- */ if (o_status.error != 0) - { + { ADIOI_Free(pvfs2_fs); fd->fs_ptr = NULL; *error_code = MPIO_Err_create_code(MPI_SUCCESS, diff --git a/ompi/mca/io/romio314/romio/adio/ad_pvfs2/ad_pvfs2_read.c b/ompi/mca/io/romio314/romio/adio/ad_pvfs2/ad_pvfs2_read.c index 007b4dc3933..667ac6cd505 100644 --- a/ompi/mca/io/romio314/romio/adio/ad_pvfs2/ad_pvfs2_read.c +++ b/ompi/mca/io/romio314/romio/adio/ad_pvfs2/ad_pvfs2_read.c @@ -1,7 +1,7 @@ -/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- - * vim: ts=8 sts=4 sw=4 noexpandtab - * - * Copyright (C) 1997 University of Chicago. +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- + * vim: ts=8 sts=4 sw=4 noexpandtab + * + * Copyright (C) 1997 University of Chicago. * See COPYRIGHT notice in top-level directory. */ @@ -11,7 +11,7 @@ #include "ad_pvfs2_io.h" #include "ad_pvfs2_common.h" -void ADIOI_PVFS2_ReadContig(ADIO_File fd, void *buf, int count, +void ADIOI_PVFS2_ReadContig(ADIO_File fd, void *buf, int count, MPI_Datatype datatype, int file_ptr_type, ADIO_Offset offset, ADIO_Status *status, int *error_code) @@ -60,7 +60,7 @@ void ADIOI_PVFS2_ReadContig(ADIO_File fd, void *buf, int count, #ifdef ADIOI_MPE_LOGGING MPE_Log_event( ADIOI_MPE_read_a, 0, NULL ); #endif - ret = PVFS_sys_read(pvfs_fs->object_ref, file_req, offset, buf, + ret = PVFS_sys_read(pvfs_fs->object_ref, file_req, offset, buf, mem_req, &(pvfs_fs->credentials), &resp_io); #ifdef ADIOI_MPE_LOGGING MPE_Log_event( ADIOI_MPE_read_b, 0, NULL ); @@ -106,7 +106,7 @@ static int ADIOI_PVFS2_ReadStridedListIO(ADIO_File fd, void *buf, int count, static int ADIOI_PVFS2_ReadStridedDtypeIO(ADIO_File fd, void *buf, int count, MPI_Datatype datatype, int file_ptr_type, - ADIO_Offset offset, ADIO_Status *status, + ADIO_Offset offset, ADIO_Status *status, int *error_code) { return ADIOI_PVFS2_StridedDtypeIO(fd, buf, count, @@ -126,12 +126,12 @@ void ADIOI_PVFS2_ReadStrided(ADIO_File fd, void *buf, int count, * - new List I/O (from avery) * - classic List I/O (the one that's always been in ROMIO) * I imagine we'll keep Datatype as an optional optimization, and afer a - * release or two promote it to the default + * release or two promote it to the default */ int ret = -1; if (fd->hints->fs_hints.pvfs2.posix_read == ADIOI_HINT_ENABLE) { - ADIOI_GEN_ReadStrided(fd, buf, count, datatype, + ADIOI_GEN_ReadStrided(fd, buf, count, datatype, file_ptr_type, offset, status, error_code); return; } @@ -158,12 +158,12 @@ void ADIOI_PVFS2_ReadStrided(ADIO_File fd, void *buf, int count, } /* Use classic list I/O if no hints given base case */ - ADIOI_PVFS2_OldReadStrided(fd, buf, count, datatype, + ADIOI_PVFS2_OldReadStrided(fd, buf, count, datatype, file_ptr_type, offset, status, error_code); return; } /* - * vim: ts=8 sts=4 sw=4 noexpandtab + * vim: ts=8 sts=4 sw=4 noexpandtab */ diff --git a/ompi/mca/io/romio314/romio/adio/ad_pvfs2/ad_pvfs2_read_list_classic.c b/ompi/mca/io/romio314/romio/adio/ad_pvfs2/ad_pvfs2_read_list_classic.c index 2aee893332b..757624883b0 100644 --- a/ompi/mca/io/romio314/romio/adio/ad_pvfs2/ad_pvfs2_read_list_classic.c +++ b/ompi/mca/io/romio314/romio/adio/ad_pvfs2/ad_pvfs2_read_list_classic.c @@ -1,7 +1,7 @@ -/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- - * vim: ts=8 sts=4 sw=4 noexpandtab - * - * Copyright (C) 2008 University of Chicago. +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- + * vim: ts=8 sts=4 sw=4 noexpandtab + * + * Copyright (C) 2008 University of Chicago. * See COPYRIGHT notice in top-level directory. */ @@ -24,7 +24,7 @@ void ADIOI_PVFS2_OldReadStrided(ADIO_File fd, void *buf, int count, int n_filetypes, etype_in_filetype; ADIO_Offset abs_off_in_filetype=0; MPI_Count filetype_size, etype_size, buftype_size; - MPI_Aint filetype_extent, buftype_extent; + MPI_Aint filetype_extent, buftype_extent; int buf_count, buftype_is_contig, filetype_is_contig; ADIO_Offset off, disp, start_off, initial_off; int flag, st_frd_size, st_n_filetypes; @@ -74,7 +74,7 @@ void ADIOI_PVFS2_OldReadStrided(ADIO_File fd, void *buf, int count, #ifdef HAVE_STATUS_SET_BYTES MPIR_Status_set_bytes(status, datatype, 0); #endif - *error_code = MPI_SUCCESS; + *error_code = MPI_SUCCESS; return; } @@ -84,7 +84,7 @@ void ADIOI_PVFS2_OldReadStrided(ADIO_File fd, void *buf, int count, etype_size = fd->etype_size; bufsize = buftype_size * count; - + pvfs_fs = (ADIOI_PVFS2_fs*)fd->fs_ptr; if (!buftype_is_contig && filetype_is_contig) { @@ -97,7 +97,7 @@ void ADIOI_PVFS2_OldReadStrided(ADIO_File fd, void *buf, int count, flat_buf = ADIOI_Flatlist; while (flat_buf->type != datatype) flat_buf = flat_buf->next; - off = (file_ptr_type == ADIO_INDIVIDUAL) ? fd->fp_ind : + off = (file_ptr_type == ADIO_INDIVIDUAL) ? fd->fp_ind : fd->disp + etype_size * offset; file_list_count = 1; @@ -119,10 +119,10 @@ void ADIOI_PVFS2_OldReadStrided(ADIO_File fd, void *buf, int count, /* step through each block in memory, filling memory arrays */ while (b_blks_read < total_blks_to_read) { for (i=0; icount; i++) { - mem_offsets[b_blks_read % MAX_ARRAY_SIZE] = + mem_offsets[b_blks_read % MAX_ARRAY_SIZE] = /* TODO: fix this compiler warning */ ((PVFS_size)buf + j*buftype_extent + flat_buf->indices[i]); - mem_lengths[b_blks_read % MAX_ARRAY_SIZE] = + mem_lengths[b_blks_read % MAX_ARRAY_SIZE] = flat_buf->blocklens[i]; file_length += flat_buf->blocklens[i]; b_blks_read++; @@ -136,7 +136,7 @@ void ADIOI_PVFS2_OldReadStrided(ADIO_File fd, void *buf, int count, /* in case last read list call fills max arrays */ if (!mem_list_count) mem_list_count = MAX_ARRAY_SIZE; } - err_flag = PVFS_Request_hindexed(mem_list_count, + err_flag = PVFS_Request_hindexed(mem_list_count, mem_lengths, mem_offsets, PVFS_BYTE, &mem_req); if (err_flag < 0) break; err_flag = PVFS_Request_contiguous(file_length, @@ -145,7 +145,7 @@ void ADIOI_PVFS2_OldReadStrided(ADIO_File fd, void *buf, int count, #ifdef ADIOI_MPE_LOGGING MPE_Log_event( ADIOI_MPE_read_a, 0, NULL ); #endif - err_flag = PVFS_sys_read(pvfs_fs->object_ref, file_req, + err_flag = PVFS_sys_read(pvfs_fs->object_ref, file_req, file_offset, PVFS_BOTTOM, mem_req, &(pvfs_fs->credentials), &resp_io); #ifdef ADIOI_MPE_LOGGING @@ -164,21 +164,21 @@ void ADIOI_PVFS2_OldReadStrided(ADIO_File fd, void *buf, int count, PVFS_Request_free(&file_req); total_bytes_read += resp_io.total_completed; /* --END ERROR HANDLING-- */ - - /* in the case of error or the last read list call, + + /* in the case of error or the last read list call, * leave here */ if (err_flag || b_blks_read == total_blks_to_read) break; file_offset += file_length; file_length = 0; - } + } } /* for (i=0; icount; i++) */ j++; } /* while (b_blks_read < total_blks_to_read) */ ADIOI_Free(mem_offsets); ADIOI_Free(mem_lengths); - if (file_ptr_type == ADIO_INDIVIDUAL) + if (file_ptr_type == ADIO_INDIVIDUAL) fd->fp_ind += total_bytes_read; fd->fp_sys_posn = -1; /* set it to null. */ @@ -217,11 +217,11 @@ void ADIOI_PVFS2_OldReadStrided(ADIO_File fd, void *buf, int count, while (!flag) { n_filetypes++; for (i=0; icount; i++) { - if (disp + flat_file->indices[i] + + if (disp + flat_file->indices[i] + ((ADIO_Offset) n_filetypes)*filetype_extent + flat_file->blocklens[i] >= offset) { st_index = i; - frd_size = (int) (disp + flat_file->indices[i] + + frd_size = (int) (disp + flat_file->indices[i] + ((ADIO_Offset) n_filetypes)*filetype_extent + flat_file->blocklens[i] - offset); flag = 1; @@ -235,7 +235,7 @@ void ADIOI_PVFS2_OldReadStrided(ADIO_File fd, void *buf, int count, n_filetypes = (int) (offset / n_etypes_in_filetype); etype_in_filetype = (int) (offset % n_etypes_in_filetype); size_in_filetype = etype_in_filetype * etype_size; - + sum = 0; for (i=0; icount; i++) { sum += flat_file->blocklens[i]; @@ -247,16 +247,16 @@ void ADIOI_PVFS2_OldReadStrided(ADIO_File fd, void *buf, int count, break; } } - + /* abs. offset in bytes in the file */ - offset = disp + ((ADIO_Offset) n_filetypes)*filetype_extent + + offset = disp + ((ADIO_Offset) n_filetypes)*filetype_extent + abs_off_in_filetype; } /* else [file_ptr_type != ADIO_INDIVIDUAL] */ start_off = offset; st_frd_size = frd_size; st_n_filetypes = n_filetypes; - + if (buftype_is_contig && !filetype_is_contig) { /* contiguous in memory, noncontiguous in file. should be the most @@ -264,13 +264,13 @@ void ADIOI_PVFS2_OldReadStrided(ADIO_File fd, void *buf, int count, int mem_length=0; intptr_t mem_offset; - + i = 0; j = st_index; n_filetypes = st_n_filetypes; - + mem_list_count = 1; - + /* determine how many blocks in file to read */ f_data_read = ADIOI_MIN(st_frd_size, bufsize); total_blks_to_read = 1; @@ -283,17 +283,17 @@ void ADIOI_PVFS2_OldReadStrided(ADIO_File fd, void *buf, int count, f_data_read += flat_file->blocklens[j]; total_blks_to_read++; if (j<(flat_file->count-1)) j++; - else j = 0; + else j = 0; } - + j = st_index; n_filetypes = st_n_filetypes; n_read_lists = total_blks_to_read/MAX_ARRAY_SIZE; extra_blks = total_blks_to_read%MAX_ARRAY_SIZE; - + mem_offset = (intptr_t)buf; mem_lengths = 0; - + /* if at least one full readlist, allocate file arrays at max array size and don't free until very end */ if (n_read_lists) { @@ -310,7 +310,7 @@ void ADIOI_PVFS2_OldReadStrided(ADIO_File fd, void *buf, int count, file_lengths = (int32_t*)ADIOI_Malloc(extra_blks* sizeof(int32_t)); } - + /* for file arrays that are of MAX_ARRAY_SIZE, build arrays */ for (i=0; iindices[j]; file_lengths[k] = flat_file->blocklens[j]; @@ -346,7 +346,7 @@ void ADIOI_PVFS2_OldReadStrided(ADIO_File fd, void *buf, int count, } /* --END ERROR HANDLING-- */ - err_flag = PVFS_Request_hindexed(file_list_count, file_lengths, + err_flag = PVFS_Request_hindexed(file_list_count, file_lengths, file_offsets, PVFS_BYTE, &file_req); /* --BEGIN ERROR HANDLING-- */ @@ -366,7 +366,7 @@ void ADIOI_PVFS2_OldReadStrided(ADIO_File fd, void *buf, int count, #ifdef ADIOI_MPE_LOGGING MPE_Log_event( ADIOI_MPE_read_a, 0, NULL ); #endif - err_flag = PVFS_sys_read(pvfs_fs->object_ref, file_req, 0, + err_flag = PVFS_sys_read(pvfs_fs->object_ref, file_req, 0, (void *)mem_offset, mem_req, &(pvfs_fs->credentials), &resp_io); #ifdef ADIOI_MPE_LOGGING @@ -400,7 +400,7 @@ void ADIOI_PVFS2_OldReadStrided(ADIO_File fd, void *buf, int count, } for (k=0; kindices[j]; if (k == (extra_blks - 1)) { @@ -429,7 +429,7 @@ void ADIOI_PVFS2_OldReadStrided(ADIO_File fd, void *buf, int count, } /* --END ERROR HANDLING-- */ - err_flag = PVFS_Request_hindexed(file_list_count, file_lengths, + err_flag = PVFS_Request_hindexed(file_list_count, file_lengths, file_offsets, PVFS_BYTE, &file_req); /* --BEGIN ERROR HANDLING-- */ if (err_flag != 0) { @@ -446,7 +446,7 @@ void ADIOI_PVFS2_OldReadStrided(ADIO_File fd, void *buf, int count, #ifdef ADIOI_MPE_LOGGING MPE_Log_event( ADIOI_MPE_read_a, 0, NULL ); #endif - err_flag = PVFS_sys_read(pvfs_fs->object_ref, file_req, 0, + err_flag = PVFS_sys_read(pvfs_fs->object_ref, file_req, 0, (void *)mem_offset, mem_req, &(pvfs_fs->credentials), &resp_io); #ifdef ADIOI_MPE_LOGGING MPE_Log_event( ADIOI_MPE_read_b, 0, NULL ); @@ -457,7 +457,7 @@ void ADIOI_PVFS2_OldReadStrided(ADIO_File fd, void *buf, int count, MPIR_ERR_RECOVERABLE, myname, __LINE__, ADIOI_PVFS2_error_convert(err_flag), - "Error in PVFS_sys_read", 0); + "Error in PVFS_sys_read", 0); goto error_state; } /* --END ERROR HANDLING-- */ @@ -468,7 +468,7 @@ void ADIOI_PVFS2_OldReadStrided(ADIO_File fd, void *buf, int count, } else { /* noncontiguous in memory as well as in file */ - + ADIOI_Flatten_datatype(datatype); flat_buf = ADIOI_Flatlist; while (flat_buf->type != datatype) flat_buf = flat_buf->next; @@ -484,7 +484,7 @@ void ADIOI_PVFS2_OldReadStrided(ADIO_File fd, void *buf, int count, max_mem_list = 0; max_file_list = 0; - /* run through and file max_file_list and max_mem_list so that you + /* run through and file max_file_list and max_mem_list so that you can allocate the file and memory arrays less than MAX_ARRAY_SIZE if possible */ @@ -492,7 +492,7 @@ void ADIOI_PVFS2_OldReadStrided(ADIO_File fd, void *buf, int count, k = start_k; new_buffer_read = 0; mem_list_count = 0; - while ((mem_list_count < MAX_ARRAY_SIZE) && + while ((mem_list_count < MAX_ARRAY_SIZE) && (new_buffer_read < bufsize-size_read)) { /* find mem_list_count and file_list_count such that both are less than MAX_ARRAY_SIZE, the sum of their lengths are @@ -500,9 +500,9 @@ void ADIOI_PVFS2_OldReadStrided(ADIO_File fd, void *buf, int count, read in the next immediate read list is less than bufsize */ if(mem_list_count) { - if((new_buffer_read + flat_buf->blocklens[k] + + if((new_buffer_read + flat_buf->blocklens[k] + size_read) > bufsize) { - end_brd_size = new_buffer_read + + end_brd_size = new_buffer_read + flat_buf->blocklens[k] - (bufsize - size_read); new_buffer_read = bufsize - size_read; } @@ -520,15 +520,15 @@ void ADIOI_PVFS2_OldReadStrided(ADIO_File fd, void *buf, int count, } mem_list_count++; k = (k + 1)%flat_buf->count; - } /* while ((mem_list_count < MAX_ARRAY_SIZE) && + } /* while ((mem_list_count < MAX_ARRAY_SIZE) && (new_buffer_read < bufsize-size_read)) */ j = start_j; new_file_read = 0; file_list_count = 0; - while ((file_list_count < MAX_ARRAY_SIZE) && + while ((file_list_count < MAX_ARRAY_SIZE) && (new_file_read < new_buffer_read)) { if(file_list_count) { - if((new_file_read + flat_file->blocklens[j]) > + if((new_file_read + flat_file->blocklens[j]) > new_buffer_read) { end_frd_size = new_buffer_read - new_file_read; new_file_read = new_buffer_read; @@ -549,9 +549,9 @@ void ADIOI_PVFS2_OldReadStrided(ADIO_File fd, void *buf, int count, file_list_count++; if (j < (flat_file->count - 1)) j++; else j = 0; - + k = start_k; - if ((new_file_read < new_buffer_read) && + if ((new_file_read < new_buffer_read) && (file_list_count == MAX_ARRAY_SIZE)) { new_buffer_read = 0; mem_list_count = 0; @@ -580,13 +580,13 @@ void ADIOI_PVFS2_OldReadStrided(ADIO_File fd, void *buf, int count, } /* while (new_buffer_read < new_file_read) */ } /* if ((new_file_read < new_buffer_read) && (file_list_count == MAX_ARRAY_SIZE)) */ - } /* while ((mem_list_count < MAX_ARRAY_SIZE) && + } /* while ((mem_list_count < MAX_ARRAY_SIZE) && (new_buffer_read < bufsize-size_read)) */ /* fakes filling the readlist arrays of lengths found above */ k = start_k; j = start_j; - for (i=0; iblocklens[k] == end_brd_size) @@ -607,7 +607,7 @@ void ADIOI_PVFS2_OldReadStrided(ADIO_File fd, void *buf, int count, if (i == (file_list_count - 1)) { if (flat_file->blocklens[j] == end_frd_size) frd_size = flat_file->blocklens[(j+1)% - flat_file->count]; + flat_file->count]; else { frd_size = flat_file->blocklens[j] - end_frd_size; j--; @@ -640,11 +640,11 @@ void ADIOI_PVFS2_OldReadStrided(ADIO_File fd, void *buf, int count, * region and many (700) very small memory regions. both cases caused * problems for this code */ - if ( ( (file_list_count == 1) && + if ( ( (file_list_count == 1) && (new_file_read < flat_file->blocklens[0] ) ) || - ((mem_list_count == 1) && + ((mem_list_count == 1) && (new_buffer_read < flat_buf->blocklens[0]) ) || - ((file_list_count == MAX_ARRAY_SIZE) && + ((file_list_count == MAX_ARRAY_SIZE) && (new_file_read < flat_buf->blocklens[0]) ) || ( (mem_list_count == MAX_ARRAY_SIZE) && (new_buffer_read < flat_file->blocklens[0])) ) @@ -660,7 +660,7 @@ void ADIOI_PVFS2_OldReadStrided(ADIO_File fd, void *buf, int count, mem_lengths = (int *)ADIOI_Malloc(max_mem_list*sizeof(int)); file_offsets = (int64_t *)ADIOI_Malloc(max_file_list*sizeof(int64_t)); file_lengths = (int32_t *)ADIOI_Malloc(max_file_list*sizeof(int32_t)); - + size_read = 0; n_filetypes = st_n_filetypes; frd_size = st_frd_size; @@ -673,12 +673,12 @@ void ADIOI_PVFS2_OldReadStrided(ADIO_File fd, void *buf, int count, /* this section calculates mem_list_count and file_list_count and also finds the possibly odd sized last array elements in new_frd_size and new_brd_size */ - + while (size_read < bufsize) { k = start_k; new_buffer_read = 0; mem_list_count = 0; - while ((mem_list_count < MAX_ARRAY_SIZE) && + while ((mem_list_count < MAX_ARRAY_SIZE) && (new_buffer_read < bufsize-size_read)) { /* find mem_list_count and file_list_count such that both are less than MAX_ARRAY_SIZE, the sum of their lengths are @@ -686,9 +686,9 @@ void ADIOI_PVFS2_OldReadStrided(ADIO_File fd, void *buf, int count, read in the next immediate read list is less than bufsize */ if(mem_list_count) { - if((new_buffer_read + flat_buf->blocklens[k] + + if((new_buffer_read + flat_buf->blocklens[k] + size_read) > bufsize) { - end_brd_size = new_buffer_read + + end_brd_size = new_buffer_read + flat_buf->blocklens[k] - (bufsize - size_read); new_buffer_read = bufsize - size_read; } @@ -706,15 +706,15 @@ void ADIOI_PVFS2_OldReadStrided(ADIO_File fd, void *buf, int count, } mem_list_count++; k = (k + 1)%flat_buf->count; - } /* while ((mem_list_count < MAX_ARRAY_SIZE) && + } /* while ((mem_list_count < MAX_ARRAY_SIZE) && (new_buffer_read < bufsize-size_read)) */ j = start_j; new_file_read = 0; file_list_count = 0; - while ((file_list_count < MAX_ARRAY_SIZE) && + while ((file_list_count < MAX_ARRAY_SIZE) && (new_file_read < new_buffer_read)) { if(file_list_count) { - if((new_file_read + flat_file->blocklens[j]) > + if((new_file_read + flat_file->blocklens[j]) > new_buffer_read) { end_frd_size = new_buffer_read - new_file_read; new_file_read = new_buffer_read; @@ -735,9 +735,9 @@ void ADIOI_PVFS2_OldReadStrided(ADIO_File fd, void *buf, int count, file_list_count++; if (j < (flat_file->count - 1)) j++; else j = 0; - + k = start_k; - if ((new_file_read < new_buffer_read) && + if ((new_file_read < new_buffer_read) && (file_list_count == MAX_ARRAY_SIZE)) { new_buffer_read = 0; mem_list_count = 0; @@ -766,13 +766,13 @@ void ADIOI_PVFS2_OldReadStrided(ADIO_File fd, void *buf, int count, } /* while (new_buffer_read < new_file_read) */ } /* if ((new_file_read < new_buffer_read) && (file_list_count == MAX_ARRAY_SIZE)) */ - } /* while ((mem_list_count < MAX_ARRAY_SIZE) && + } /* while ((mem_list_count < MAX_ARRAY_SIZE) && (new_buffer_read < bufsize-size_read)) */ /* fills the allocated readlist arrays */ k = start_k; j = start_j; - for (i=0; icount) + (int)flat_buf->indices[k]); @@ -800,7 +800,7 @@ void ADIOI_PVFS2_OldReadStrided(ADIO_File fd, void *buf, int count, k = (k + 1)%flat_buf->count; } /* for (i=0; iindices[j] + + file_offsets[i] = disp + flat_file->indices[j] + ((ADIO_Offset)n_filetypes) * filetype_extent; if (!i) { file_lengths[0] = frd_size; @@ -811,7 +811,7 @@ void ADIOI_PVFS2_OldReadStrided(ADIO_File fd, void *buf, int count, file_lengths[i] = end_frd_size; if (flat_file->blocklens[j] == end_frd_size) frd_size = flat_file->blocklens[(j+1)% - flat_file->count]; + flat_file->count]; else { frd_size = flat_file->blocklens[j] - end_frd_size; j--; @@ -825,7 +825,7 @@ void ADIOI_PVFS2_OldReadStrided(ADIO_File fd, void *buf, int count, n_filetypes++; } } /* for (i=0; iobject_ref, file_req, 0, + err_flag = PVFS_sys_read(pvfs_fs->object_ref, file_req, 0, PVFS_BOTTOM, mem_req, &(pvfs_fs->credentials), &resp_io); #ifdef ADIOI_MPE_LOGGING MPE_Log_event( ADIOI_MPE_read_b, 0, NULL ); @@ -882,7 +882,7 @@ void ADIOI_PVFS2_OldReadStrided(ADIO_File fd, void *buf, int count, /* when incrementing fp_ind, need to also take into account the file type: * consider an N-element 1-d subarray with a lb and ub: ( |---xxxxx-----| * if we wrote N elements, offset needs to point at beginning of type, not - * at empty region at offset N+1) + * at empty region at offset N+1) * * As we discussed on mpich-discuss in may/june 2009, the code below might * look wierd, but by putting fp_ind at the last byte written, the next @@ -892,10 +892,10 @@ void ADIOI_PVFS2_OldReadStrided(ADIO_File fd, void *buf, int count, fd->fp_ind = file_offsets[file_list_count-1]+ file_lengths[file_list_count-1]; } - + ADIOI_Free(file_offsets); ADIOI_Free(file_lengths); - + if (err_flag == 0) *error_code = MPI_SUCCESS; error_state: @@ -903,11 +903,11 @@ void ADIOI_PVFS2_OldReadStrided(ADIO_File fd, void *buf, int count, #ifdef HAVE_STATUS_SET_BYTES MPIR_Status_set_bytes(status, datatype, bufsize); - /* This is a temporary way of filling in status. The right way is to - keep track of how much data was actually read and placed in buf + /* This is a temporary way of filling in status. The right way is to + keep track of how much data was actually read and placed in buf by ADIOI_BUFFERED_READ. */ #endif - + if (!buftype_is_contig) ADIOI_Delete_flattened(datatype); } diff --git a/ompi/mca/io/romio314/romio/adio/ad_pvfs2/ad_pvfs2_resize.c b/ompi/mca/io/romio314/romio/adio/ad_pvfs2/ad_pvfs2_resize.c index db219db0fde..c68e1ec3725 100644 --- a/ompi/mca/io/romio314/romio/adio/ad_pvfs2/ad_pvfs2_resize.c +++ b/ompi/mca/io/romio314/romio/adio/ad_pvfs2/ad_pvfs2_resize.c @@ -1,7 +1,7 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */ -/* +/* * - * Copyright (C) 1997 University of Chicago. + * Copyright (C) 1997 University of Chicago. * See COPYRIGHT notice in top-level directory. */ @@ -23,8 +23,8 @@ void ADIOI_PVFS2_Resize(ADIO_File fd, ADIO_Offset size, int *error_code) MPI_Comm_rank(fd->comm, &rank); - /* We desginate one node in the communicator to be an 'io_worker' in - * ADIO_Open. This node can perform operations on files and then + /* We desginate one node in the communicator to be an 'io_worker' in + * ADIO_Open. This node can perform operations on files and then * inform the other nodes of the result */ /* MPI-IO semantics treat conflicting MPI_File_set_size requests the @@ -33,7 +33,7 @@ void ADIOI_PVFS2_Resize(ADIO_File fd, ADIO_Offset size, int *error_code) * syncronization point is reached */ if (rank == fd->hints->ranklist[0]) { - ret = PVFS_sys_truncate(pvfs_fs->object_ref, + ret = PVFS_sys_truncate(pvfs_fs->object_ref, size, &(pvfs_fs->credentials)); MPI_Bcast(&ret, 1, MPI_INT, fd->hints->ranklist[0], fd->comm); } else { @@ -52,5 +52,5 @@ void ADIOI_PVFS2_Resize(ADIO_File fd, ADIO_Offset size, int *error_code) } /* - * vim: ts=8 sts=4 sw=4 noexpandtab + * vim: ts=8 sts=4 sw=4 noexpandtab */ diff --git a/ompi/mca/io/romio314/romio/adio/ad_pvfs2/ad_pvfs2_write.c b/ompi/mca/io/romio314/romio/adio/ad_pvfs2/ad_pvfs2_write.c index 93e142bb687..27f08738e74 100644 --- a/ompi/mca/io/romio314/romio/adio/ad_pvfs2/ad_pvfs2_write.c +++ b/ompi/mca/io/romio314/romio/adio/ad_pvfs2/ad_pvfs2_write.c @@ -1,7 +1,7 @@ -/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- * vim: ts=8 sts=4 sw=4 noexpandtab * - * Copyright (C) 1997 University of Chicago. + * Copyright (C) 1997 University of Chicago. * See COPYRIGHT notice in top-level directory. */ @@ -118,7 +118,7 @@ int ADIOI_PVFS2_WriteStridedListIO(ADIO_File fd, const void *buf, int count, int ADIOI_PVFS2_WriteStridedDtypeIO(ADIO_File fd, const void *buf, int count, MPI_Datatype datatype, int file_ptr_type, - ADIO_Offset offset, ADIO_Status *status, + ADIO_Offset offset, ADIO_Status *status, int *error_code) { return ADIOI_PVFS2_StridedDtypeIO(fd, (void *)buf, count, @@ -139,7 +139,7 @@ void ADIOI_PVFS2_WriteStrided(ADIO_File fd, const void *buf, int count, * - new List I/O (from avery) * - classic List I/O (the one that's always been in ROMIO) * I imagine we'll keep Datatype as an optional optimization, and afer a - * release or two promote it to the default + * release or two promote it to the default */ /* a lot of near-duplication from ADIOI_PVFS2_ReadStrided: for @@ -170,7 +170,7 @@ void ADIOI_PVFS2_WriteStrided(ADIO_File fd, const void *buf, int count, } /* Use list I/O in the base case */ if (fd->hints->fs_hints.pvfs2.listio_write == ADIOI_HINT_ENABLE) { - ret = ADIOI_PVFS2_WriteStridedListIO(fd, buf, count, datatype, + ret = ADIOI_PVFS2_WriteStridedListIO(fd, buf, count, datatype, file_ptr_type, offset, status, error_code); return; } diff --git a/ompi/mca/io/romio314/romio/adio/ad_pvfs2/ad_pvfs2_write_list_classic.c b/ompi/mca/io/romio314/romio/adio/ad_pvfs2/ad_pvfs2_write_list_classic.c index f51bf7932d5..f99ddc8ebf0 100644 --- a/ompi/mca/io/romio314/romio/adio/ad_pvfs2/ad_pvfs2_write_list_classic.c +++ b/ompi/mca/io/romio314/romio/adio/ad_pvfs2/ad_pvfs2_write_list_classic.c @@ -1,7 +1,7 @@ -/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- - * vim: ts=8 sts=4 sw=4 noexpandtab - * - * Copyright (C) 2008 University of Chicago. +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- + * vim: ts=8 sts=4 sw=4 noexpandtab + * + * Copyright (C) 2008 University of Chicago. * See COPYRIGHT notice in top-level directory. */ @@ -57,7 +57,7 @@ void ADIOI_PVFS2_OldWriteStrided(ADIO_File fd, const void *buf, int count, MPI_Offset total_bytes_written=0; static char myname[] = "ADIOI_PVFS2_WRITESTRIDED"; - /* note: don't increase this: several parts of PVFS2 now + /* note: don't increase this: several parts of PVFS2 now * assume this limit*/ #define MAX_ARRAY_SIZE 64 @@ -90,7 +90,7 @@ void ADIOI_PVFS2_OldWriteStrided(ADIO_File fd, const void *buf, int count, #ifdef HAVE_STATUS_SET_BYTES MPIR_Status_set_bytes(status, datatype, 0); #endif - *error_code = MPI_SUCCESS; + *error_code = MPI_SUCCESS; return; } @@ -98,7 +98,7 @@ void ADIOI_PVFS2_OldWriteStrided(ADIO_File fd, const void *buf, int count, MPI_Type_size_x(datatype, &buftype_size); MPI_Type_extent(datatype, &buftype_extent); etype_size = fd->etype_size; - + bufsize = buftype_size * count; pvfs_fs = (ADIOI_PVFS2_fs*)fd->fs_ptr; @@ -112,7 +112,7 @@ void ADIOI_PVFS2_OldWriteStrided(ADIO_File fd, const void *buf, int count, ADIOI_Flatten_datatype(datatype); flat_buf = ADIOI_Flatlist; while (flat_buf->type != datatype) flat_buf = flat_buf->next; - + if (file_ptr_type == ADIO_EXPLICIT_OFFSET) { off = fd->disp + etype_size * offset; } @@ -135,12 +135,12 @@ void ADIOI_PVFS2_OldWriteStrided(ADIO_File fd, const void *buf, int count, /* step through each block in memory, filling memory arrays */ while (b_blks_wrote < total_blks_to_write) { for (i=0; icount; i++) { - mem_offsets[b_blks_wrote % MAX_ARRAY_SIZE] = + mem_offsets[b_blks_wrote % MAX_ARRAY_SIZE] = /* TODO: fix this warning by casting to an integer that's * the same size as a char * and /then/ casting to * PVFS_size */ ((PVFS_size)buf + j*buftype_extent + flat_buf->indices[i]); - mem_lengths[b_blks_wrote % MAX_ARRAY_SIZE] = + mem_lengths[b_blks_wrote % MAX_ARRAY_SIZE] = flat_buf->blocklens[i]; file_length += flat_buf->blocklens[i]; b_blks_wrote++; @@ -154,7 +154,7 @@ void ADIOI_PVFS2_OldWriteStrided(ADIO_File fd, const void *buf, int count, /* in case last write list call fills max arrays */ if (!mem_list_count) mem_list_count = MAX_ARRAY_SIZE; } - err_flag = PVFS_Request_hindexed(mem_list_count, + err_flag = PVFS_Request_hindexed(mem_list_count, mem_lengths, mem_offsets, PVFS_BYTE, &mem_req); /* --BEGIN ERROR HANDLING-- */ @@ -184,17 +184,17 @@ void ADIOI_PVFS2_OldWriteStrided(ADIO_File fd, const void *buf, int count, #ifdef ADIOI_MPE_LOGGING MPE_Log_event( ADIOI_MPE_write_a, 0, NULL ); #endif - err_flag = PVFS_sys_write(pvfs_fs->object_ref, file_req, + err_flag = PVFS_sys_write(pvfs_fs->object_ref, file_req, file_offset, PVFS_BOTTOM, - mem_req, + mem_req, &(pvfs_fs->credentials), &resp_io); #ifdef ADIOI_MPE_LOGGING MPE_Log_event( ADIOI_MPE_write_b, 0, NULL ); #endif total_bytes_written += resp_io.total_completed; - - /* in the case of error or the last write list call, + + /* in the case of error or the last write list call, * leave here */ /* --BEGIN ERROR HANDLING-- */ if (err_flag) { @@ -212,14 +212,14 @@ void ADIOI_PVFS2_OldWriteStrided(ADIO_File fd, const void *buf, int count, file_length = 0; PVFS_Request_free(&mem_req); PVFS_Request_free(&file_req); - } + } } /* for (i=0; icount; i++) */ j++; } /* while (b_blks_wrote < total_blks_to_write) */ ADIOI_Free(mem_offsets); ADIOI_Free(mem_lengths); - if (file_ptr_type == ADIO_INDIVIDUAL) + if (file_ptr_type == ADIO_INDIVIDUAL) fd->fp_ind += total_bytes_written; if (!err_flag) *error_code = MPI_SUCCESS; @@ -228,7 +228,7 @@ void ADIOI_PVFS2_OldWriteStrided(ADIO_File fd, const void *buf, int count, #ifdef HAVE_STATUS_SET_BYTES MPIR_Status_set_bytes(status, datatype, bufsize); -/* This is a temporary way of filling in status. The right way is to +/* This is a temporary way of filling in status. The right way is to keep track of how much data was actually written by ADIOI_BUFFERED_WRITE. */ #endif @@ -247,7 +247,7 @@ void ADIOI_PVFS2_OldWriteStrided(ADIO_File fd, const void *buf, int count, initial_off = offset; /* for each case - ADIO_Individual pointer or explicit, find offset - (file offset in bytes), n_filetypes (how many filetypes into file + (file offset in bytes), n_filetypes (how many filetypes into file to start), fwr_size (remaining amount of data in present file block), and st_index (start point in terms of blocks in starting filetype) */ @@ -258,11 +258,11 @@ void ADIOI_PVFS2_OldWriteStrided(ADIO_File fd, const void *buf, int count, while (!flag) { n_filetypes++; for (i=0; icount; i++) { - if (disp + flat_file->indices[i] + + if (disp + flat_file->indices[i] + ((ADIO_Offset) n_filetypes)*filetype_extent + flat_file->blocklens[i] >= offset) { st_index = i; - fwr_size = disp + flat_file->indices[i] + + fwr_size = disp + flat_file->indices[i] + ((ADIO_Offset) n_filetypes)*filetype_extent + flat_file->blocklens[i] - offset; flag = 1; @@ -276,7 +276,7 @@ void ADIOI_PVFS2_OldWriteStrided(ADIO_File fd, const void *buf, int count, n_filetypes = (int) (offset / n_etypes_in_filetype); etype_in_filetype = (int) (offset % n_etypes_in_filetype); size_in_filetype = etype_in_filetype * etype_size; - + sum = 0; for (i=0; icount; i++) { sum += flat_file->blocklens[i]; @@ -297,7 +297,7 @@ void ADIOI_PVFS2_OldWriteStrided(ADIO_File fd, const void *buf, int count, start_off = offset; st_fwr_size = fwr_size; st_n_filetypes = n_filetypes; - + if (buftype_is_contig && !filetype_is_contig) { /* contiguous in memory, noncontiguous in file. should be the most @@ -305,14 +305,14 @@ void ADIOI_PVFS2_OldWriteStrided(ADIO_File fd, const void *buf, int count, int mem_length; intptr_t mem_offset; - + i = 0; j = st_index; off = offset; n_filetypes = st_n_filetypes; - + mem_list_count = 1; - + /* determine how many blocks in file to write */ f_data_wrote = ADIOI_MIN(st_fwr_size, bufsize); total_blks_to_write = 1; @@ -325,17 +325,17 @@ void ADIOI_PVFS2_OldWriteStrided(ADIO_File fd, const void *buf, int count, f_data_wrote += flat_file->blocklens[j]; total_blks_to_write++; if (j<(flat_file->count-1)) j++; - else j = 0; + else j = 0; } - + j = st_index; n_filetypes = st_n_filetypes; n_write_lists = total_blks_to_write/MAX_ARRAY_SIZE; extra_blks = total_blks_to_write%MAX_ARRAY_SIZE; - + mem_offset = (intptr_t) buf; mem_length = 0; - + /* if at least one full writelist, allocate file arrays at max array size and don't free until very end */ if (n_write_lists) { @@ -352,7 +352,7 @@ void ADIOI_PVFS2_OldWriteStrided(ADIO_File fd, const void *buf, int count, file_lengths = (int32_t*)ADIOI_Malloc(extra_blks* sizeof(int32_t)); } - + /* for file arrays that are of MAX_ARRAY_SIZE, build arrays */ for (i=0; iindices[j]; file_lengths[k] = flat_file->blocklens[j]; @@ -389,7 +389,7 @@ void ADIOI_PVFS2_OldWriteStrided(ADIO_File fd, const void *buf, int count, } /* --END ERROR HANDLING-- */ - err_flag = PVFS_Request_hindexed(file_list_count, file_lengths, + err_flag = PVFS_Request_hindexed(file_list_count, file_lengths, file_offsets, PVFS_BYTE, &file_req); /* --BEGIN ERROR HANDLING-- */ @@ -409,7 +409,7 @@ void ADIOI_PVFS2_OldWriteStrided(ADIO_File fd, const void *buf, int count, #ifdef ADIOI_MPE_LOGGING MPE_Log_event( ADIOI_MPE_write_a, 0, NULL ); #endif - err_flag = PVFS_sys_write(pvfs_fs->object_ref, file_req, 0, + err_flag = PVFS_sys_write(pvfs_fs->object_ref, file_req, 0, (void *)mem_offset, mem_req, &(pvfs_fs->credentials), &resp_io); #ifdef ADIOI_MPE_LOGGING @@ -443,7 +443,7 @@ void ADIOI_PVFS2_OldWriteStrided(ADIO_File fd, const void *buf, int count, } for (k=0; kindices[j]; if (k == (extra_blks - 1)) { @@ -473,7 +473,7 @@ void ADIOI_PVFS2_OldWriteStrided(ADIO_File fd, const void *buf, int count, } /* --END ERROR HANDLING-- */ - err_flag = PVFS_Request_hindexed(file_list_count, file_lengths, + err_flag = PVFS_Request_hindexed(file_list_count, file_lengths, file_offsets, PVFS_BYTE, &file_req); /* --BEGIN ERROR HANDLING-- */ @@ -491,7 +491,7 @@ void ADIOI_PVFS2_OldWriteStrided(ADIO_File fd, const void *buf, int count, #ifdef ADIOI_MPE_LOGGING MPE_Log_event( ADIOI_MPE_write_a, 0, NULL ); #endif - err_flag = PVFS_sys_write(pvfs_fs->object_ref, file_req, 0, + err_flag = PVFS_sys_write(pvfs_fs->object_ref, file_req, 0, (void *)mem_offset, mem_req, &(pvfs_fs->credentials), &resp_io); #ifdef ADIOI_MPE_LOGGING @@ -511,7 +511,7 @@ void ADIOI_PVFS2_OldWriteStrided(ADIO_File fd, const void *buf, int count, PVFS_Request_free(&mem_req); PVFS_Request_free(&file_req); } - } + } else { /* noncontiguous in memory as well as in file */ @@ -530,7 +530,7 @@ void ADIOI_PVFS2_OldWriteStrided(ADIO_File fd, const void *buf, int count, max_mem_list = 0; max_file_list = 0; - /* run through and file max_file_list and max_mem_list so that you + /* run through and file max_file_list and max_mem_list so that you can allocate the file and memory arrays less than MAX_ARRAY_SIZE if possible */ @@ -538,7 +538,7 @@ void ADIOI_PVFS2_OldWriteStrided(ADIO_File fd, const void *buf, int count, k = start_k; new_buffer_write = 0; mem_list_count = 0; - while ((mem_list_count < MAX_ARRAY_SIZE) && + while ((mem_list_count < MAX_ARRAY_SIZE) && (new_buffer_write < bufsize-size_wrote)) { /* find mem_list_count and file_list_count such that both are less than MAX_ARRAY_SIZE, the sum of their lengths are @@ -546,9 +546,9 @@ void ADIOI_PVFS2_OldWriteStrided(ADIO_File fd, const void *buf, int count, written in the next immediate write list is less than bufsize */ if(mem_list_count) { - if((new_buffer_write + flat_buf->blocklens[k] + + if((new_buffer_write + flat_buf->blocklens[k] + size_wrote) > bufsize) { - end_bwr_size = new_buffer_write + + end_bwr_size = new_buffer_write + flat_buf->blocklens[k] - (bufsize - size_wrote); new_buffer_write = bufsize - size_wrote; } @@ -566,15 +566,15 @@ void ADIOI_PVFS2_OldWriteStrided(ADIO_File fd, const void *buf, int count, } mem_list_count++; k = (k + 1)%flat_buf->count; - } /* while ((mem_list_count < MAX_ARRAY_SIZE) && + } /* while ((mem_list_count < MAX_ARRAY_SIZE) && (new_buffer_write < bufsize-size_wrote)) */ j = start_j; new_file_write = 0; file_list_count = 0; - while ((file_list_count < MAX_ARRAY_SIZE) && - (new_file_write < new_buffer_write)) { + while ((file_list_count < MAX_ARRAY_SIZE) && + (new_file_write < new_buffer_write)) { if(file_list_count) { - if((new_file_write + flat_file->blocklens[j]) > + if((new_file_write + flat_file->blocklens[j]) > new_buffer_write) { end_fwr_size = new_buffer_write - new_file_write; new_file_write = new_buffer_write; @@ -595,9 +595,9 @@ void ADIOI_PVFS2_OldWriteStrided(ADIO_File fd, const void *buf, int count, file_list_count++; if (j < (flat_file->count - 1)) j++; else j = 0; - + k = start_k; - if ((new_file_write < new_buffer_write) && + if ((new_file_write < new_buffer_write) && (file_list_count == MAX_ARRAY_SIZE)) { new_buffer_write = 0; mem_list_count = 0; @@ -605,7 +605,7 @@ void ADIOI_PVFS2_OldWriteStrided(ADIO_File fd, const void *buf, int count, if(mem_list_count) { if((new_buffer_write + flat_buf->blocklens[k]) > new_file_write) { - end_bwr_size = new_file_write - + end_bwr_size = new_file_write - new_buffer_write; new_buffer_write = new_file_write; k--; @@ -627,13 +627,13 @@ void ADIOI_PVFS2_OldWriteStrided(ADIO_File fd, const void *buf, int count, } /* while (new_buffer_write < new_file_write) */ } /* if ((new_file_write < new_buffer_write) && (file_list_count == MAX_ARRAY_SIZE)) */ - } /* while ((mem_list_count < MAX_ARRAY_SIZE) && + } /* while ((mem_list_count < MAX_ARRAY_SIZE) && (new_buffer_write < bufsize-size_wrote)) */ /* fakes filling the writelist arrays of lengths found above */ k = start_k; j = start_j; - for (i=0; iblocklens[k] == end_bwr_size) @@ -654,7 +654,7 @@ void ADIOI_PVFS2_OldWriteStrided(ADIO_File fd, const void *buf, int count, if (i == (file_list_count - 1)) { if (flat_file->blocklens[j] == end_fwr_size) fwr_size = flat_file->blocklens[(j+1)% - flat_file->count]; + flat_file->count]; else { fwr_size = flat_file->blocklens[j] - end_fwr_size; j--; @@ -687,11 +687,11 @@ void ADIOI_PVFS2_OldWriteStrided(ADIO_File fd, const void *buf, int count, * region and many (700) very small memory regions. both cases caused * problems for this code */ - if ( ( (file_list_count == 1) && + if ( ( (file_list_count == 1) && (new_file_write < flat_file->blocklens[0] ) ) || - ((mem_list_count == 1) && + ((mem_list_count == 1) && (new_buffer_write < flat_buf->blocklens[0]) ) || - ((file_list_count == MAX_ARRAY_SIZE) && + ((file_list_count == MAX_ARRAY_SIZE) && (new_file_write < flat_buf->blocklens[0]) ) || ( (mem_list_count == MAX_ARRAY_SIZE) && (new_buffer_write < flat_file->blocklens[0])) ) @@ -707,7 +707,7 @@ void ADIOI_PVFS2_OldWriteStrided(ADIO_File fd, const void *buf, int count, mem_lengths = (int *)ADIOI_Malloc(max_mem_list*sizeof(int)); file_offsets = (int64_t *)ADIOI_Malloc(max_file_list*sizeof(int64_t)); file_lengths = (int32_t *)ADIOI_Malloc(max_file_list*sizeof(int32_t)); - + size_wrote = 0; n_filetypes = st_n_filetypes; fwr_size = st_fwr_size; @@ -720,12 +720,12 @@ void ADIOI_PVFS2_OldWriteStrided(ADIO_File fd, const void *buf, int count, /* this section calculates mem_list_count and file_list_count and also finds the possibly odd sized last array elements in new_fwr_size and new_bwr_size */ - + while (size_wrote < bufsize) { k = start_k; new_buffer_write = 0; mem_list_count = 0; - while ((mem_list_count < MAX_ARRAY_SIZE) && + while ((mem_list_count < MAX_ARRAY_SIZE) && (new_buffer_write < bufsize-size_wrote)) { /* find mem_list_count and file_list_count such that both are less than MAX_ARRAY_SIZE, the sum of their lengths are @@ -733,9 +733,9 @@ void ADIOI_PVFS2_OldWriteStrided(ADIO_File fd, const void *buf, int count, written in the next immediate write list is less than bufsize */ if(mem_list_count) { - if((new_buffer_write + flat_buf->blocklens[k] + + if((new_buffer_write + flat_buf->blocklens[k] + size_wrote) > bufsize) { - end_bwr_size = new_buffer_write + + end_bwr_size = new_buffer_write + flat_buf->blocklens[k] - (bufsize - size_wrote); new_buffer_write = bufsize - size_wrote; } @@ -753,15 +753,15 @@ void ADIOI_PVFS2_OldWriteStrided(ADIO_File fd, const void *buf, int count, } mem_list_count++; k = (k + 1)%flat_buf->count; - } /* while ((mem_list_count < MAX_ARRAY_SIZE) && + } /* while ((mem_list_count < MAX_ARRAY_SIZE) && (new_buffer_write < bufsize-size_wrote)) */ j = start_j; new_file_write = 0; file_list_count = 0; - while ((file_list_count < MAX_ARRAY_SIZE) && + while ((file_list_count < MAX_ARRAY_SIZE) && (new_file_write < new_buffer_write)) { if(file_list_count) { - if((new_file_write + flat_file->blocklens[j]) > + if((new_file_write + flat_file->blocklens[j]) > new_buffer_write) { end_fwr_size = new_buffer_write - new_file_write; new_file_write = new_buffer_write; @@ -782,9 +782,9 @@ void ADIOI_PVFS2_OldWriteStrided(ADIO_File fd, const void *buf, int count, file_list_count++; if (j < (flat_file->count - 1)) j++; else j = 0; - + k = start_k; - if ((new_file_write < new_buffer_write) && + if ((new_file_write < new_buffer_write) && (file_list_count == MAX_ARRAY_SIZE)) { new_buffer_write = 0; mem_list_count = 0; @@ -814,19 +814,19 @@ void ADIOI_PVFS2_OldWriteStrided(ADIO_File fd, const void *buf, int count, } /* while (new_buffer_write < new_file_write) */ } /* if ((new_file_write < new_buffer_write) && (file_list_count == MAX_ARRAY_SIZE)) */ - } /* while ((mem_list_count < MAX_ARRAY_SIZE) && + } /* while ((mem_list_count < MAX_ARRAY_SIZE) && (new_buffer_write < bufsize-size_wrote)) */ /* fills the allocated writelist arrays */ k = start_k; j = start_j; - for (i=0; icount) + (int)flat_buf->indices[k]); - + if(!i) { mem_lengths[0] = bwr_size; mem_offsets[0] += flat_buf->blocklens[k] - bwr_size; @@ -851,7 +851,7 @@ void ADIOI_PVFS2_OldWriteStrided(ADIO_File fd, const void *buf, int count, k = (k + 1)%flat_buf->count; } /* for (i=0; iindices[j] + + file_offsets[i] = disp + flat_file->indices[j] + ((ADIO_Offset)n_filetypes) * filetype_extent; if (!i) { file_lengths[0] = fwr_size; @@ -862,7 +862,7 @@ void ADIOI_PVFS2_OldWriteStrided(ADIO_File fd, const void *buf, int count, file_lengths[i] = end_fwr_size; if (flat_file->blocklens[j] == end_fwr_size) fwr_size = flat_file->blocklens[(j+1)% - flat_file->count]; + flat_file->count]; else { fwr_size = flat_file->blocklens[j] - end_fwr_size; j--; @@ -877,7 +877,7 @@ void ADIOI_PVFS2_OldWriteStrided(ADIO_File fd, const void *buf, int count, } } /* for (i=0; iobject_ref, file_req, 0, + err_flag = PVFS_sys_write(pvfs_fs->object_ref, file_req, 0, PVFS_BOTTOM, mem_req, &(pvfs_fs->credentials), &resp_io); #ifdef ADIOI_MPE_LOGGING @@ -939,7 +939,7 @@ void ADIOI_PVFS2_OldWriteStrided(ADIO_File fd, const void *buf, int count, /* when incrementing fp_ind, need to also take into account the file type: * consider an N-element 1-d subarray with a lb and ub: ( |---xxxxx-----| * if we wrote N elements, offset needs to point at beginning of type, not - * at empty region at offset N+1). + * at empty region at offset N+1). * * As we discussed on mpich-discuss in may/june 2009, the code below might * look wierd, but by putting fp_ind at the last byte written, the next @@ -959,7 +959,7 @@ void ADIOI_PVFS2_OldWriteStrided(ADIO_File fd, const void *buf, int count, #ifdef HAVE_STATUS_SET_BYTES MPIR_Status_set_bytes(status, datatype, bufsize); -/* This is a temporary way of filling in status. The right way is to +/* This is a temporary way of filling in status. The right way is to keep track of how much data was actually written by ADIOI_BUFFERED_WRITE. */ #endif diff --git a/ompi/mca/io/romio314/romio/adio/ad_sfs/ad_sfs.c b/ompi/mca/io/romio314/romio/adio/ad_sfs/ad_sfs.c index 929dfd97f75..d022016c57c 100644 --- a/ompi/mca/io/romio314/romio/adio/ad_sfs/ad_sfs.c +++ b/ompi/mca/io/romio314/romio/adio/ad_sfs/ad_sfs.c @@ -1,7 +1,7 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */ -/* +/* * - * Copyright (C) 2001 University of Chicago. + * Copyright (C) 2001 University of Chicago. * See COPYRIGHT notice in top-level directory. */ diff --git a/ompi/mca/io/romio314/romio/adio/ad_sfs/ad_sfs.h b/ompi/mca/io/romio314/romio/adio/ad_sfs/ad_sfs.h index 9f029f05580..67d7fe7d235 100644 --- a/ompi/mca/io/romio314/romio/adio/ad_sfs/ad_sfs.h +++ b/ompi/mca/io/romio314/romio/adio/ad_sfs/ad_sfs.h @@ -1,7 +1,7 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */ -/* +/* * - * Copyright (C) 1997 University of Chicago. + * Copyright (C) 1997 University of Chicago. * See COPYRIGHT notice in top-level directory. */ @@ -15,7 +15,7 @@ void ADIOI_SFS_Open(ADIO_File fd, int *error_code); void ADIOI_SFS_Fcntl(ADIO_File fd, int flag, ADIO_Fcntl_t *fcntl_struct, int - *error_code); + *error_code); void ADIOI_SFS_Flush(ADIO_File fd, int *error_code); #endif diff --git a/ompi/mca/io/romio314/romio/adio/ad_sfs/ad_sfs_fcntl.c b/ompi/mca/io/romio314/romio/adio/ad_sfs/ad_sfs_fcntl.c index 533e26a77b6..8bef5755863 100644 --- a/ompi/mca/io/romio314/romio/adio/ad_sfs/ad_sfs_fcntl.c +++ b/ompi/mca/io/romio314/romio/adio/ad_sfs/ad_sfs_fcntl.c @@ -1,7 +1,7 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */ -/* +/* * - * Copyright (C) 1997 University of Chicago. + * Copyright (C) 1997 University of Chicago. * See COPYRIGHT notice in top-level directory. */ @@ -21,17 +21,17 @@ void ADIOI_SFS_Fcntl(ADIO_File fd, int flag, ADIO_Fcntl_t *fcntl_struct, int *er switch(flag) { case ADIO_FCNTL_GET_FSIZE: /* On SFS, I find that a write from one process, which changes - the file size, does not automatically make the new file size - visible to other processes. Therefore, a sync-barrier-sync is - needed. (Other processes are able to read the data written + the file size, does not automatically make the new file size + visible to other processes. Therefore, a sync-barrier-sync is + needed. (Other processes are able to read the data written though; only file size is returned incorrectly.) */ fsync(fd->fd_sys); MPI_Barrier(fd->comm); fsync(fd->fd_sys); - + fcntl_struct->fsize = llseek(fd->fd_sys, 0, SEEK_END); - if (fd->fp_sys_posn != -1) + if (fd->fp_sys_posn != -1) llseek(fd->fd_sys, fd->fp_sys_posn, SEEK_SET); if (fcntl_struct->fsize == -1) { #ifdef MPICH @@ -42,7 +42,7 @@ void ADIOI_SFS_Fcntl(ADIO_File fd, int flag, ADIO_Fcntl_t *fcntl_struct, int *er #else /* MPICH-1 */ *error_code = MPIR_Err_setmsg(MPI_ERR_IO, MPIR_ADIO_ERROR, myname, "I/O Error", "%s", strerror(errno)); - ADIOI_Error(fd, *error_code, myname); + ADIOI_Error(fd, *error_code, myname); #endif } else *error_code = MPI_SUCCESS; diff --git a/ompi/mca/io/romio314/romio/adio/ad_sfs/ad_sfs_flush.c b/ompi/mca/io/romio314/romio/adio/ad_sfs/ad_sfs_flush.c index 8fc7e358cf6..5a36dcd4fba 100644 --- a/ompi/mca/io/romio314/romio/adio/ad_sfs/ad_sfs_flush.c +++ b/ompi/mca/io/romio314/romio/adio/ad_sfs/ad_sfs_flush.c @@ -1,7 +1,7 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */ -/* +/* * - * Copyright (C) 1997 University of Chicago. + * Copyright (C) 1997 University of Chicago. * See COPYRIGHT notice in top-level directory. */ @@ -18,10 +18,10 @@ void ADIOI_SFS_Flush(ADIO_File fd, int *error_code) *error_code = MPIR_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE, myname, __LINE__, MPI_ERR_IO, "**io", "**io %s", strerror(errno)); #elif defined(PRINT_ERR_MSG) - *error_code = MPI_ERR_UNKNOWN; + *error_code = MPI_ERR_UNKNOWN; #else /* MPICH-1 */ *error_code = MPIR_Err_setmsg(MPI_ERR_UNSUPPORTED_OPERATION, 1, myname, (char *) 0, (char *) 0); - ADIOI_Error(fd, *error_code, myname); + ADIOI_Error(fd, *error_code, myname); #endif } diff --git a/ompi/mca/io/romio314/romio/adio/ad_sfs/ad_sfs_open.c b/ompi/mca/io/romio314/romio/adio/ad_sfs/ad_sfs_open.c index ebeefdcbcba..dd614940b93 100644 --- a/ompi/mca/io/romio314/romio/adio/ad_sfs/ad_sfs_open.c +++ b/ompi/mca/io/romio314/romio/adio/ad_sfs/ad_sfs_open.c @@ -1,7 +1,7 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */ -/* +/* * - * Copyright (C) 1997 University of Chicago. + * Copyright (C) 1997 University of Chicago. * See COPYRIGHT notice in top-level directory. */ @@ -50,7 +50,7 @@ void ADIOI_SFS_Open(ADIO_File fd, int *error_code) #else /* MPICH-1 */ *error_code = MPIR_Err_setmsg(MPI_ERR_IO, MPIR_ADIO_ERROR, myname, "I/O Error", "%s", strerror(errno)); - ADIOI_Error(ADIO_FILE_NULL, *error_code, myname); + ADIOI_Error(ADIO_FILE_NULL, *error_code, myname); #endif } else *error_code = MPI_SUCCESS; diff --git a/ompi/mca/io/romio314/romio/adio/ad_testfs/ad_testfs.c b/ompi/mca/io/romio314/romio/adio/ad_testfs/ad_testfs.c index 6823468c700..00542b71640 100644 --- a/ompi/mca/io/romio314/romio/adio/ad_testfs/ad_testfs.c +++ b/ompi/mca/io/romio314/romio/adio/ad_testfs/ad_testfs.c @@ -1,7 +1,7 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */ -/* +/* * - * Copyright (C) 2001 University of Chicago. + * Copyright (C) 2001 University of Chicago. * See COPYRIGHT notice in top-level directory. */ diff --git a/ompi/mca/io/romio314/romio/adio/ad_testfs/ad_testfs.h b/ompi/mca/io/romio314/romio/adio/ad_testfs/ad_testfs.h index 1871bde5b50..829620fc5f3 100644 --- a/ompi/mca/io/romio314/romio/adio/ad_testfs/ad_testfs.h +++ b/ompi/mca/io/romio314/romio/adio/ad_testfs/ad_testfs.h @@ -1,7 +1,7 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */ -/* +/* * - * Copyright (C) 2001 University of Chicago. + * Copyright (C) 2001 University of Chicago. * See COPYRIGHT notice in top-level directory. */ @@ -23,25 +23,25 @@ void ADIOI_TESTFS_ReadContig(ADIO_File fd, void *buf, int count, void ADIOI_TESTFS_WriteContig(ADIO_File fd, const void *buf, int count, MPI_Datatype datatype, int file_ptr_type, ADIO_Offset offset, ADIO_Status *status, int - *error_code); + *error_code); void ADIOI_TESTFS_IwriteContig(ADIO_File fd, const void *buf, int count, MPI_Datatype datatype, int file_ptr_type, ADIO_Offset offset, ADIO_Request *request, int - *error_code); -void ADIOI_TESTFS_IreadContig(ADIO_File fd, void *buf, int count, + *error_code); +void ADIOI_TESTFS_IreadContig(ADIO_File fd, void *buf, int count, MPI_Datatype datatype, int file_ptr_type, ADIO_Offset offset, ADIO_Request *request, int - *error_code); + *error_code); int ADIOI_TESTFS_ReadDone(ADIO_Request *request, ADIO_Status *status, int *error_code); int ADIOI_TESTFS_WriteDone(ADIO_Request *request, ADIO_Status *status, int *error_code); void ADIOI_TESTFS_ReadComplete(ADIO_Request *request, ADIO_Status *status, int - *error_code); + *error_code); void ADIOI_TESTFS_WriteComplete(ADIO_Request *request, ADIO_Status *status, - int *error_code); -void ADIOI_TESTFS_Fcntl(ADIO_File fd, int flag, ADIO_Fcntl_t *fcntl_struct, - int *error_code); + int *error_code); +void ADIOI_TESTFS_Fcntl(ADIO_File fd, int flag, ADIO_Fcntl_t *fcntl_struct, + int *error_code); void ADIOI_TESTFS_WriteStrided(ADIO_File fd, const void *buf, int count, MPI_Datatype datatype, int file_ptr_type, ADIO_Offset offset, ADIO_Status *status, @@ -68,13 +68,13 @@ void ADIOI_TESTFS_IwriteStrided(ADIO_File fd, const void *buf, int count, *error_code); void ADIOI_TESTFS_Flush(ADIO_File fd, int *error_code); void ADIOI_TESTFS_Resize(ADIO_File fd, ADIO_Offset size, int *error_code); -ADIO_Offset ADIOI_TESTFS_SeekIndividual(ADIO_File fd, ADIO_Offset offset, +ADIO_Offset ADIOI_TESTFS_SeekIndividual(ADIO_File fd, ADIO_Offset offset, int whence, int *error_code); void ADIOI_TESTFS_SetInfo(ADIO_File fd, MPI_Info users_info, int *error_code); -void ADIOI_TESTFS_Get_shared_fp(ADIO_File fd, int size, - ADIO_Offset *shared_fp, +void ADIOI_TESTFS_Get_shared_fp(ADIO_File fd, int size, + ADIO_Offset *shared_fp, int *error_code); -void ADIOI_TESTFS_Set_shared_fp(ADIO_File fd, ADIO_Offset offset, +void ADIOI_TESTFS_Set_shared_fp(ADIO_File fd, ADIO_Offset offset, int *error_code); void ADIOI_TESTFS_Delete(const char *filename, int *error_code); diff --git a/ompi/mca/io/romio314/romio/adio/ad_testfs/ad_testfs_close.c b/ompi/mca/io/romio314/romio/adio/ad_testfs/ad_testfs_close.c index a1b85e600d0..7c6a9553dfe 100644 --- a/ompi/mca/io/romio314/romio/adio/ad_testfs/ad_testfs_close.c +++ b/ompi/mca/io/romio314/romio/adio/ad_testfs/ad_testfs_close.c @@ -1,7 +1,7 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */ -/* +/* * - * Copyright (C) 2001 University of Chicago. + * Copyright (C) 2001 University of Chicago. * See COPYRIGHT notice in top-level directory. */ @@ -17,6 +17,6 @@ void ADIOI_TESTFS_Close(ADIO_File fd, int *error_code) MPI_Comm_size(fd->comm, &nprocs); MPI_Comm_rank(fd->comm, &myrank); - FPRINTF(stdout, "[%d/%d] ADIOI_TESTFS_Close called on %s\n", myrank, + FPRINTF(stdout, "[%d/%d] ADIOI_TESTFS_Close called on %s\n", myrank, nprocs, fd->filename); } diff --git a/ompi/mca/io/romio314/romio/adio/ad_testfs/ad_testfs_delete.c b/ompi/mca/io/romio314/romio/adio/ad_testfs/ad_testfs_delete.c index 9a1b6f37e12..5563c3ddd36 100644 --- a/ompi/mca/io/romio314/romio/adio/ad_testfs/ad_testfs_delete.c +++ b/ompi/mca/io/romio314/romio/adio/ad_testfs/ad_testfs_delete.c @@ -1,7 +1,7 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */ -/* +/* * - * Copyright (C) 2001 University of Chicago. + * Copyright (C) 2001 University of Chicago. * See COPYRIGHT notice in top-level directory. */ @@ -16,6 +16,6 @@ void ADIOI_TESTFS_Delete(const char *filename, int *error_code) MPI_Comm_size(MPI_COMM_WORLD, &nprocs); MPI_Comm_rank(MPI_COMM_WORLD, &myrank); - FPRINTF(stdout, "[%d/%d] ADIOI_TESTFS_Delete called on %s\n", + FPRINTF(stdout, "[%d/%d] ADIOI_TESTFS_Delete called on %s\n", myrank, nprocs, filename); } diff --git a/ompi/mca/io/romio314/romio/adio/ad_testfs/ad_testfs_done.c b/ompi/mca/io/romio314/romio/adio/ad_testfs/ad_testfs_done.c index 2ee3111587e..1cbfa2af14a 100644 --- a/ompi/mca/io/romio314/romio/adio/ad_testfs/ad_testfs_done.c +++ b/ompi/mca/io/romio314/romio/adio/ad_testfs/ad_testfs_done.c @@ -1,7 +1,7 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */ -/* +/* * - * Copyright (C) 2001 University of Chicago. + * Copyright (C) 2001 University of Chicago. * See COPYRIGHT notice in top-level directory. */ @@ -17,7 +17,7 @@ int ADIOI_TESTFS_ReadDone(ADIO_Request *request, ADIO_Status *status, int MPI_Comm_size( MPI_COMM_WORLD, &nprocs ); MPI_Comm_rank( MPI_COMM_WORLD, &myrank ); - FPRINTF(stdout, "[%d/%d] ADIOI_TESTFS_ReadDone called on ADIO_REQUEST_NULL\n", + FPRINTF(stdout, "[%d/%d] ADIOI_TESTFS_ReadDone called on ADIO_REQUEST_NULL\n", myrank, nprocs); return 1; } @@ -32,8 +32,8 @@ int ADIOI_TESTFS_WriteDone(ADIO_Request *request, ADIO_Status *status, int MPI_Comm_size( MPI_COMM_WORLD, &nprocs ); MPI_Comm_rank( MPI_COMM_WORLD, &myrank ); - FPRINTF(stdout, - "[%d/%d] ADIOI_TESTFS_WriteDone called on ADIO_REQUEST_NULL\n", + FPRINTF(stdout, + "[%d/%d] ADIOI_TESTFS_WriteDone called on ADIO_REQUEST_NULL\n", myrank, nprocs); return 1; } diff --git a/ompi/mca/io/romio314/romio/adio/ad_testfs/ad_testfs_fcntl.c b/ompi/mca/io/romio314/romio/adio/ad_testfs/ad_testfs_fcntl.c index 8b1e516d9a2..e0ee8fa0546 100644 --- a/ompi/mca/io/romio314/romio/adio/ad_testfs/ad_testfs_fcntl.c +++ b/ompi/mca/io/romio314/romio/adio/ad_testfs/ad_testfs_fcntl.c @@ -1,7 +1,7 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */ -/* +/* * - * Copyright (C) 2001 University of Chicago. + * Copyright (C) 2001 University of Chicago. * See COPYRIGHT notice in top-level directory. */ @@ -9,7 +9,7 @@ #include "adioi.h" #include "adio_extern.h" -void ADIOI_TESTFS_Fcntl(ADIO_File fd, int flag, ADIO_Fcntl_t *fcntl_struct, +void ADIOI_TESTFS_Fcntl(ADIO_File fd, int flag, ADIO_Fcntl_t *fcntl_struct, int *error_code) { int myrank, nprocs; @@ -19,7 +19,7 @@ void ADIOI_TESTFS_Fcntl(ADIO_File fd, int flag, ADIO_Fcntl_t *fcntl_struct, MPI_Comm_size(fd->comm, &nprocs); MPI_Comm_rank(fd->comm, &myrank); - FPRINTF(stdout, "[%d/%d] ADIOI_TESTFS_Fcntl called on %s\n", + FPRINTF(stdout, "[%d/%d] ADIOI_TESTFS_Fcntl called on %s\n", myrank, nprocs, fd->filename); switch(flag) { @@ -40,7 +40,7 @@ void ADIOI_TESTFS_Fcntl(ADIO_File fd, int flag, ADIO_Fcntl_t *fcntl_struct, default: /* --BEGIN ERROR HANDLING-- */ *error_code = MPIO_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE, - myname, __LINE__, + myname, __LINE__, MPI_ERR_ARG, "**flag", "**flag %d", flag); return; diff --git a/ompi/mca/io/romio314/romio/adio/ad_testfs/ad_testfs_flush.c b/ompi/mca/io/romio314/romio/adio/ad_testfs/ad_testfs_flush.c index 23d559787d3..304a6663828 100644 --- a/ompi/mca/io/romio314/romio/adio/ad_testfs/ad_testfs_flush.c +++ b/ompi/mca/io/romio314/romio/adio/ad_testfs/ad_testfs_flush.c @@ -1,7 +1,7 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */ -/* +/* * - * Copyright (C) 2001 University of Chicago. + * Copyright (C) 2001 University of Chicago. * See COPYRIGHT notice in top-level directory. */ @@ -16,6 +16,6 @@ void ADIOI_TESTFS_Flush(ADIO_File fd, int *error_code) MPI_Comm_size(fd->comm, &nprocs); MPI_Comm_rank(fd->comm, &myrank); - FPRINTF(stdout, "[%d/%d] ADIOI_TESTFS_Flush called on %s\n", + FPRINTF(stdout, "[%d/%d] ADIOI_TESTFS_Flush called on %s\n", myrank, nprocs, fd->filename); } diff --git a/ompi/mca/io/romio314/romio/adio/ad_testfs/ad_testfs_getsh.c b/ompi/mca/io/romio314/romio/adio/ad_testfs/ad_testfs_getsh.c index 2bdb3dceb80..abdf38776b5 100644 --- a/ompi/mca/io/romio314/romio/adio/ad_testfs/ad_testfs_getsh.c +++ b/ompi/mca/io/romio314/romio/adio/ad_testfs/ad_testfs_getsh.c @@ -1,15 +1,15 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */ -/* +/* * - * Copyright (C) 2001 University of Chicago. + * Copyright (C) 2001 University of Chicago. * See COPYRIGHT notice in top-level directory. */ #include "ad_testfs.h" #include "adioi.h" -void ADIOI_TESTFS_Get_shared_fp(ADIO_File fd, int size, - ADIO_Offset *shared_fp, +void ADIOI_TESTFS_Get_shared_fp(ADIO_File fd, int size, + ADIO_Offset *shared_fp, int *error_code) { int myrank, nprocs; @@ -18,6 +18,6 @@ void ADIOI_TESTFS_Get_shared_fp(ADIO_File fd, int size, MPI_Comm_size(fd->comm, &nprocs); MPI_Comm_rank(fd->comm, &myrank); - FPRINTF(stdout, "[%d/%d] ADIOI_TESTFS_Get_shared_fp called on %s\n", + FPRINTF(stdout, "[%d/%d] ADIOI_TESTFS_Get_shared_fp called on %s\n", myrank, nprocs, fd->filename); } diff --git a/ompi/mca/io/romio314/romio/adio/ad_testfs/ad_testfs_hints.c b/ompi/mca/io/romio314/romio/adio/ad_testfs/ad_testfs_hints.c index a6c1be9e9fa..ffff0c94939 100644 --- a/ompi/mca/io/romio314/romio/adio/ad_testfs/ad_testfs_hints.c +++ b/ompi/mca/io/romio314/romio/adio/ad_testfs/ad_testfs_hints.c @@ -1,7 +1,7 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */ -/* +/* * - * Copyright (C) 2001 University of Chicago. + * Copyright (C) 2001 University of Chicago. * See COPYRIGHT notice in top-level directory. */ @@ -18,9 +18,9 @@ void ADIOI_TESTFS_SetInfo(ADIO_File fd, MPI_Info users_info, int *error_code) MPI_Comm_size(fd->comm, &nprocs); MPI_Comm_rank(fd->comm, &myrank); - FPRINTF(stdout, "[%d/%d] ADIOI_TESTFS_SetInfo called on %s\n", + FPRINTF(stdout, "[%d/%d] ADIOI_TESTFS_SetInfo called on %s\n", myrank, nprocs, fd->filename); - FPRINTF(stdout, "[%d/%d] calling ADIOI_GEN_SetInfo\n", + FPRINTF(stdout, "[%d/%d] calling ADIOI_GEN_SetInfo\n", myrank, nprocs); ADIOI_GEN_SetInfo(fd, users_info, error_code); diff --git a/ompi/mca/io/romio314/romio/adio/ad_testfs/ad_testfs_iread.c b/ompi/mca/io/romio314/romio/adio/ad_testfs/ad_testfs_iread.c index 77e862438c6..b402791a186 100644 --- a/ompi/mca/io/romio314/romio/adio/ad_testfs/ad_testfs_iread.c +++ b/ompi/mca/io/romio314/romio/adio/ad_testfs/ad_testfs_iread.c @@ -1,7 +1,7 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */ -/* +/* * - * Copyright (C) 2001 University of Chicago. + * Copyright (C) 2001 University of Chicago. * See COPYRIGHT notice in top-level directory. */ @@ -12,7 +12,7 @@ * * Implemented by immediately calling ReadContig() */ -void ADIOI_TESTFS_IreadContig(ADIO_File fd, void *buf, int count, +void ADIOI_TESTFS_IreadContig(ADIO_File fd, void *buf, int count, MPI_Datatype datatype, int file_ptr_type, ADIO_Offset offset, ADIO_Request *request, int *error_code) @@ -26,13 +26,13 @@ void ADIOI_TESTFS_IreadContig(ADIO_File fd, void *buf, int count, MPI_Comm_size(fd->comm, &nprocs); MPI_Comm_rank(fd->comm, &myrank); MPI_Type_size_x(datatype, &typesize); - FPRINTF(stdout, "[%d/%d] ADIOI_TESTFS_IreadContig called on %s\n", + FPRINTF(stdout, "[%d/%d] ADIOI_TESTFS_IreadContig called on %s\n", myrank, nprocs, fd->filename); - FPRINTF(stdout, "[%d/%d] calling ADIOI_TESTFS_ReadContig\n", + FPRINTF(stdout, "[%d/%d] calling ADIOI_TESTFS_ReadContig\n", myrank, nprocs); len = count * typesize; - ADIOI_TESTFS_ReadContig(fd, buf, len, MPI_BYTE, file_ptr_type, + ADIOI_TESTFS_ReadContig(fd, buf, len, MPI_BYTE, file_ptr_type, offset, &status, error_code); MPIO_Completed_request_create(&fd, len, error_code, request); @@ -50,13 +50,13 @@ void ADIOI_TESTFS_IreadStrided(ADIO_File fd, void *buf, int count, MPI_Comm_size(fd->comm, &nprocs); MPI_Comm_rank(fd->comm, &myrank); MPI_Type_size_x(datatype, &typesize); - FPRINTF(stdout, "[%d/%d] ADIOI_TESTFS_IreadStrided called on %s\n", + FPRINTF(stdout, "[%d/%d] ADIOI_TESTFS_IreadStrided called on %s\n", myrank, nprocs, fd->filename); - FPRINTF(stdout, "[%d/%d] calling ADIOI_TESTFS_ReadStrided\n", + FPRINTF(stdout, "[%d/%d] calling ADIOI_TESTFS_ReadStrided\n", myrank, nprocs); - ADIOI_TESTFS_ReadStrided(fd, buf, count, datatype, file_ptr_type, - offset, &status, error_code); + ADIOI_TESTFS_ReadStrided(fd, buf, count, datatype, file_ptr_type, + offset, &status, error_code); MPIO_Completed_request_create(&fd, count*typesize, error_code, request); } diff --git a/ompi/mca/io/romio314/romio/adio/ad_testfs/ad_testfs_iwrite.c b/ompi/mca/io/romio314/romio/adio/ad_testfs/ad_testfs_iwrite.c index e29c9f6ee03..bfa2b634ab0 100644 --- a/ompi/mca/io/romio314/romio/adio/ad_testfs/ad_testfs_iwrite.c +++ b/ompi/mca/io/romio314/romio/adio/ad_testfs/ad_testfs_iwrite.c @@ -1,7 +1,7 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */ -/* +/* * - * Copyright (C) 2001 University of Chicago. + * Copyright (C) 2001 University of Chicago. * See COPYRIGHT notice in top-level directory. */ @@ -29,13 +29,13 @@ void ADIOI_TESTFS_IwriteContig(ADIO_File fd, const void *buf, int count, MPI_Type_size_x(datatype, &typesize); MPI_Comm_size(fd->comm, &nprocs); MPI_Comm_rank(fd->comm, &myrank); - FPRINTF(stdout, "[%d/%d] ADIOI_TESTFS_IwriteContig called on %s\n", + FPRINTF(stdout, "[%d/%d] ADIOI_TESTFS_IwriteContig called on %s\n", myrank, nprocs, fd->filename); - FPRINTF(stdout, "[%d/%d] calling ADIOI_TESTFS_WriteContig\n", + FPRINTF(stdout, "[%d/%d] calling ADIOI_TESTFS_WriteContig\n", myrank, nprocs); len = count * typesize; - ADIOI_TESTFS_WriteContig(fd, buf, len, MPI_BYTE, file_ptr_type, + ADIOI_TESTFS_WriteContig(fd, buf, len, MPI_BYTE, file_ptr_type, offset, &status, error_code); MPIO_Completed_request_create(&fd, len, error_code, request); @@ -56,12 +56,12 @@ void ADIOI_TESTFS_IwriteStrided(ADIO_File fd, const void *buf, int count, MPI_Comm_rank(fd->comm, &myrank); MPI_Type_size_x(datatype, &typesize); - FPRINTF(stdout, "[%d/%d] ADIOI_TESTFS_IwriteStrided called on %s\n", + FPRINTF(stdout, "[%d/%d] ADIOI_TESTFS_IwriteStrided called on %s\n", myrank, nprocs, fd->filename); - FPRINTF(stdout, "[%d/%d] calling ADIOI_TESTFS_WriteStrided\n", + FPRINTF(stdout, "[%d/%d] calling ADIOI_TESTFS_WriteStrided\n", myrank, nprocs); - ADIOI_TESTFS_WriteStrided(fd, buf, count, datatype, file_ptr_type, + ADIOI_TESTFS_WriteStrided(fd, buf, count, datatype, file_ptr_type, offset, &status, error_code); MPIO_Completed_request_create(&fd, count*typesize, error_code, request); diff --git a/ompi/mca/io/romio314/romio/adio/ad_testfs/ad_testfs_open.c b/ompi/mca/io/romio314/romio/adio/ad_testfs/ad_testfs_open.c index 6b1595b99ca..90751a2d313 100644 --- a/ompi/mca/io/romio314/romio/adio/ad_testfs/ad_testfs_open.c +++ b/ompi/mca/io/romio314/romio/adio/ad_testfs/ad_testfs_open.c @@ -1,7 +1,7 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */ -/* +/* * - * Copyright (C) 2001 University of Chicago. + * Copyright (C) 2001 University of Chicago. * See COPYRIGHT notice in top-level directory. */ @@ -18,6 +18,6 @@ void ADIOI_TESTFS_Open(ADIO_File fd, int *error_code) MPI_Comm_size(fd->comm, &nprocs); MPI_Comm_rank(fd->comm, &myrank); - FPRINTF(stdout, "[%d/%d] ADIOI_TESTFS_Open called on %s\n", myrank, + FPRINTF(stdout, "[%d/%d] ADIOI_TESTFS_Open called on %s\n", myrank, nprocs, fd->filename); } diff --git a/ompi/mca/io/romio314/romio/adio/ad_testfs/ad_testfs_rdcoll.c b/ompi/mca/io/romio314/romio/adio/ad_testfs/ad_testfs_rdcoll.c index 5df94458fdd..16d97591929 100644 --- a/ompi/mca/io/romio314/romio/adio/ad_testfs/ad_testfs_rdcoll.c +++ b/ompi/mca/io/romio314/romio/adio/ad_testfs/ad_testfs_rdcoll.c @@ -1,7 +1,7 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */ -/* +/* * - * Copyright (C) 2001 University of Chicago. + * Copyright (C) 2001 University of Chicago. * See COPYRIGHT notice in top-level directory. */ @@ -10,7 +10,7 @@ void ADIOI_TESTFS_ReadStridedColl(ADIO_File fd, void *buf, int count, MPI_Datatype datatype, int file_ptr_type, - ADIO_Offset offset, ADIO_Status *status, + ADIO_Offset offset, ADIO_Status *status, int *error_code) { int myrank, nprocs; @@ -19,9 +19,9 @@ void ADIOI_TESTFS_ReadStridedColl(ADIO_File fd, void *buf, int count, MPI_Comm_size(fd->comm, &nprocs); MPI_Comm_rank(fd->comm, &myrank); - FPRINTF(stdout, "[%d/%d] ADIOI_TESTFS_ReadStridedColl called on %s\n", - myrank, nprocs, fd->filename); - FPRINTF(stdout, "[%d/%d] calling ADIOI_GEN_ReadStridedColl\n", + FPRINTF(stdout, "[%d/%d] ADIOI_TESTFS_ReadStridedColl called on %s\n", + myrank, nprocs, fd->filename); + FPRINTF(stdout, "[%d/%d] calling ADIOI_GEN_ReadStridedColl\n", myrank, nprocs); ADIOI_GEN_ReadStridedColl(fd, buf, count, datatype, file_ptr_type, diff --git a/ompi/mca/io/romio314/romio/adio/ad_testfs/ad_testfs_read.c b/ompi/mca/io/romio314/romio/adio/ad_testfs/ad_testfs_read.c index f60a9920f76..aa2f72f3ab4 100644 --- a/ompi/mca/io/romio314/romio/adio/ad_testfs/ad_testfs_read.c +++ b/ompi/mca/io/romio314/romio/adio/ad_testfs/ad_testfs_read.c @@ -1,13 +1,13 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */ -/* - * Copyright (C) 2001 University of Chicago. +/* + * Copyright (C) 2001 University of Chicago. * See COPYRIGHT notice in top-level directory. */ #include "ad_testfs.h" #include "adioi.h" -void ADIOI_TESTFS_ReadContig(ADIO_File fd, void *buf, int count, +void ADIOI_TESTFS_ReadContig(ADIO_File fd, void *buf, int count, MPI_Datatype datatype, int file_ptr_type, ADIO_Offset offset, ADIO_Status *status, int *error_code) @@ -20,7 +20,7 @@ void ADIOI_TESTFS_ReadContig(ADIO_File fd, void *buf, int count, MPI_Comm_size(fd->comm, &nprocs); MPI_Comm_rank(fd->comm, &myrank); MPI_Type_size_x(datatype, &datatype_size); - FPRINTF(stdout, "[%d/%d] ADIOI_TESTFS_ReadContig called on %s\n", myrank, + FPRINTF(stdout, "[%d/%d] ADIOI_TESTFS_ReadContig called on %s\n", myrank, nprocs, fd->filename); if (file_ptr_type != ADIO_EXPLICIT_OFFSET) { @@ -33,7 +33,7 @@ void ADIOI_TESTFS_ReadContig(ADIO_File fd, void *buf, int count, } FPRINTF(stdout, "[%d/%d] reading (buf = %p, loc = %lld, sz = %lld)\n", - myrank, nprocs, buf, (long long) offset, + myrank, nprocs, buf, (long long) offset, (long long) datatype_size * count); #ifdef HAVE_STATUS_SET_BYTES @@ -52,9 +52,9 @@ void ADIOI_TESTFS_ReadStrided(ADIO_File fd, void *buf, int count, MPI_Comm_size(fd->comm, &nprocs); MPI_Comm_rank(fd->comm, &myrank); - FPRINTF(stdout, "[%d/%d] ADIOI_TESTFS_ReadStrided called on %s\n", myrank, + FPRINTF(stdout, "[%d/%d] ADIOI_TESTFS_ReadStrided called on %s\n", myrank, nprocs, fd->filename); - FPRINTF(stdout, "[%d/%d] calling ADIOI_GEN_ReadStrided\n", myrank, + FPRINTF(stdout, "[%d/%d] calling ADIOI_GEN_ReadStrided\n", myrank, nprocs); ADIOI_GEN_ReadStrided(fd, buf, count, datatype, file_ptr_type, offset, diff --git a/ompi/mca/io/romio314/romio/adio/ad_testfs/ad_testfs_resize.c b/ompi/mca/io/romio314/romio/adio/ad_testfs/ad_testfs_resize.c index a4a37eb94bb..30fa9e0853d 100644 --- a/ompi/mca/io/romio314/romio/adio/ad_testfs/ad_testfs_resize.c +++ b/ompi/mca/io/romio314/romio/adio/ad_testfs/ad_testfs_resize.c @@ -1,7 +1,7 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */ -/* +/* * - * Copyright (C) 2001 University of Chicago. + * Copyright (C) 2001 University of Chicago. * See COPYRIGHT notice in top-level directory. */ @@ -16,6 +16,6 @@ void ADIOI_TESTFS_Resize(ADIO_File fd, ADIO_Offset size, int *error_code) MPI_Comm_size(fd->comm, &nprocs); MPI_Comm_rank(fd->comm, &myrank); - FPRINTF(stdout, "[%d/%d] ADIOI_TESTFS_Resize called on %s\n", + FPRINTF(stdout, "[%d/%d] ADIOI_TESTFS_Resize called on %s\n", myrank, nprocs, fd->filename); } diff --git a/ompi/mca/io/romio314/romio/adio/ad_testfs/ad_testfs_seek.c b/ompi/mca/io/romio314/romio/adio/ad_testfs/ad_testfs_seek.c index df6d30a5160..a6bca984c02 100644 --- a/ompi/mca/io/romio314/romio/adio/ad_testfs/ad_testfs_seek.c +++ b/ompi/mca/io/romio314/romio/adio/ad_testfs/ad_testfs_seek.c @@ -1,7 +1,7 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */ -/* +/* * - * Copyright (C) 2001 University of Chicago. + * Copyright (C) 2001 University of Chicago. * See COPYRIGHT notice in top-level directory. */ @@ -16,9 +16,9 @@ * * Returns an absolute offset in bytes. The offset passed into the call is in * terms of the etype relative to the filetype, so some calculations are - * necessary. + * necessary. */ -ADIO_Offset ADIOI_TESTFS_SeekIndividual(ADIO_File fd, ADIO_Offset offset, +ADIO_Offset ADIOI_TESTFS_SeekIndividual(ADIO_File fd, ADIO_Offset offset, int whence, int *error_code) { int myrank, nprocs; @@ -36,7 +36,7 @@ ADIO_Offset ADIOI_TESTFS_SeekIndividual(ADIO_File fd, ADIO_Offset offset, MPI_Comm_size(fd->comm, &nprocs); MPI_Comm_rank(fd->comm, &myrank); - FPRINTF(stdout, "[%d/%d] ADIOI_TESTFS_SeekIndividual called on %s\n", + FPRINTF(stdout, "[%d/%d] ADIOI_TESTFS_SeekIndividual called on %s\n", myrank, nprocs, fd->filename); ADIOI_Datatype_iscontig(fd->filetype, &filetype_is_contig); @@ -50,7 +50,7 @@ ADIO_Offset ADIOI_TESTFS_SeekIndividual(ADIO_File fd, ADIO_Offset offset, MPI_Type_extent(fd->filetype, &filetype_extent); MPI_Type_size_x(fd->filetype, &filetype_size); if ( ! filetype_size ) { - *error_code = MPI_SUCCESS; + *error_code = MPI_SUCCESS; return 0; } @@ -59,7 +59,7 @@ ADIO_Offset ADIOI_TESTFS_SeekIndividual(ADIO_File fd, ADIO_Offset offset, n_filetypes = (int) (offset / n_etypes_in_filetype); etype_in_filetype = (int) (offset % n_etypes_in_filetype); size_in_filetype = etype_in_filetype * etype_size; - + sum = 0; for (i=0; icount; i++) { sum += flat_file->blocklens[i]; diff --git a/ompi/mca/io/romio314/romio/adio/ad_testfs/ad_testfs_setsh.c b/ompi/mca/io/romio314/romio/adio/ad_testfs/ad_testfs_setsh.c index 6fc8a04be9f..2b7e7560a46 100644 --- a/ompi/mca/io/romio314/romio/adio/ad_testfs/ad_testfs_setsh.c +++ b/ompi/mca/io/romio314/romio/adio/ad_testfs/ad_testfs_setsh.c @@ -1,14 +1,14 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */ -/* +/* * - * Copyright (C) 2001 University of Chicago. + * Copyright (C) 2001 University of Chicago. * See COPYRIGHT notice in top-level directory. */ #include "ad_testfs.h" #include "adioi.h" -void ADIOI_TESTFS_Set_shared_fp(ADIO_File fd, ADIO_Offset offset, +void ADIOI_TESTFS_Set_shared_fp(ADIO_File fd, ADIO_Offset offset, int *error_code) { int myrank, nprocs; @@ -17,6 +17,6 @@ void ADIOI_TESTFS_Set_shared_fp(ADIO_File fd, ADIO_Offset offset, MPI_Comm_size(fd->comm, &nprocs); MPI_Comm_rank(fd->comm, &myrank); - FPRINTF(stdout, "[%d/%d] ADIOI_TESTFS_Set_shared_fp called on %s\n", + FPRINTF(stdout, "[%d/%d] ADIOI_TESTFS_Set_shared_fp called on %s\n", myrank, nprocs, fd->filename); } diff --git a/ompi/mca/io/romio314/romio/adio/ad_testfs/ad_testfs_wait.c b/ompi/mca/io/romio314/romio/adio/ad_testfs/ad_testfs_wait.c index 6eef9ef7060..ffa4c51cdc3 100644 --- a/ompi/mca/io/romio314/romio/adio/ad_testfs/ad_testfs_wait.c +++ b/ompi/mca/io/romio314/romio/adio/ad_testfs/ad_testfs_wait.c @@ -1,7 +1,7 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */ -/* +/* * - * Copyright (C) 2001 University of Chicago. + * Copyright (C) 2001 University of Chicago. * See COPYRIGHT notice in top-level directory. */ @@ -17,7 +17,7 @@ void ADIOI_TESTFS_ReadComplete(ADIO_Request *request, ADIO_Status *status, int MPI_Comm_size(MPI_COMM_WORLD, &nprocs); MPI_Comm_rank(MPI_COMM_WORLD, &myrank); - FPRINTF(stdout, "[%d/%d] ADIOI_TESTFS_ReadComplete called \n", + FPRINTF(stdout, "[%d/%d] ADIOI_TESTFS_ReadComplete called \n", myrank, nprocs); /* do something with status set bytes? */ @@ -32,7 +32,7 @@ void ADIOI_TESTFS_WriteComplete(ADIO_Request *request, ADIO_Status *status, int MPI_Comm_size(MPI_COMM_WORLD, &nprocs); MPI_Comm_rank(MPI_COMM_WORLD, &myrank); - FPRINTF(stdout, "[%d/%d] ADIOI_TESTFS_WriteComplete called\n", + FPRINTF(stdout, "[%d/%d] ADIOI_TESTFS_WriteComplete called\n", myrank, nprocs); /* do something with status_set_bytes? */ diff --git a/ompi/mca/io/romio314/romio/adio/ad_testfs/ad_testfs_wrcoll.c b/ompi/mca/io/romio314/romio/adio/ad_testfs/ad_testfs_wrcoll.c index ac11f5847f3..be87957df33 100644 --- a/ompi/mca/io/romio314/romio/adio/ad_testfs/ad_testfs_wrcoll.c +++ b/ompi/mca/io/romio314/romio/adio/ad_testfs/ad_testfs_wrcoll.c @@ -1,7 +1,7 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */ -/* +/* * - * Copyright (C) 2001 University of Chicago. + * Copyright (C) 2001 University of Chicago. * See COPYRIGHT notice in top-level directory. */ @@ -10,7 +10,7 @@ void ADIOI_TESTFS_WriteStridedColl(ADIO_File fd, const void *buf, int count, MPI_Datatype datatype, int file_ptr_type, - ADIO_Offset offset, ADIO_Status *status, + ADIO_Offset offset, ADIO_Status *status, int *error_code) { int myrank, nprocs; @@ -19,9 +19,9 @@ void ADIOI_TESTFS_WriteStridedColl(ADIO_File fd, const void *buf, int count, MPI_Comm_size(fd->comm, &nprocs); MPI_Comm_rank(fd->comm, &myrank); - FPRINTF(stdout, "[%d/%d] ADIOI_TESTFS_WriteStridedColl called on %s\n", + FPRINTF(stdout, "[%d/%d] ADIOI_TESTFS_WriteStridedColl called on %s\n", myrank, nprocs, fd->filename); - FPRINTF(stdout, "[%d/%d] calling ADIOI_GEN_WriteStridedColl\n", + FPRINTF(stdout, "[%d/%d] calling ADIOI_GEN_WriteStridedColl\n", myrank, nprocs); ADIOI_GEN_WriteStridedColl(fd, buf, count, datatype, file_ptr_type, diff --git a/ompi/mca/io/romio314/romio/adio/ad_testfs/ad_testfs_write.c b/ompi/mca/io/romio314/romio/adio/ad_testfs/ad_testfs_write.c index e1a59ff0101..70c9012b7af 100644 --- a/ompi/mca/io/romio314/romio/adio/ad_testfs/ad_testfs_write.c +++ b/ompi/mca/io/romio314/romio/adio/ad_testfs/ad_testfs_write.c @@ -1,6 +1,6 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */ -/* - * Copyright (C) 2001 University of Chicago. +/* + * Copyright (C) 2001 University of Chicago. * See COPYRIGHT notice in top-level directory. */ @@ -20,17 +20,17 @@ void ADIOI_TESTFS_WriteContig(ADIO_File fd, const void *buf, int count, MPI_Comm_size(fd->comm, &nprocs); MPI_Comm_rank(fd->comm, &myrank); MPI_Type_size_x(datatype, &datatype_size); - FPRINTF(stdout, "[%d/%d] ADIOI_TESTFS_WriteContig called on %s\n", myrank, + FPRINTF(stdout, "[%d/%d] ADIOI_TESTFS_WriteContig called on %s\n", myrank, nprocs, fd->filename); FPRINTF(stdout, "[%d/%d] writing (buf = %p, loc = %lld, sz = %lld)\n", - myrank, nprocs, buf, (long long) offset, + myrank, nprocs, buf, (long long) offset, (long long)datatype_size * (long long)count); if (file_ptr_type != ADIO_EXPLICIT_OFFSET) { fd->fp_ind += datatype_size * count; fd->fp_sys_posn = fd->fp_ind; - FPRINTF(stdout, "[%d/%d] new file position is %lld\n", myrank, + FPRINTF(stdout, "[%d/%d] new file position is %lld\n", myrank, nprocs, (long long) fd->fp_ind); } else { @@ -53,11 +53,11 @@ void ADIOI_TESTFS_WriteStrided(ADIO_File fd, const void *buf, int count, MPI_Comm_size(fd->comm, &nprocs); MPI_Comm_rank(fd->comm, &myrank); - FPRINTF(stdout, "[%d/%d] ADIOI_TESTFS_WriteStrided called on %s\n", + FPRINTF(stdout, "[%d/%d] ADIOI_TESTFS_WriteStrided called on %s\n", myrank, nprocs, fd->filename); - FPRINTF(stdout, "[%d/%d] calling ADIOI_GEN_WriteStrided\n", + FPRINTF(stdout, "[%d/%d] calling ADIOI_GEN_WriteStrided\n", myrank, nprocs); - ADIOI_GEN_WriteStrided(fd, buf, count, datatype, file_ptr_type, offset, + ADIOI_GEN_WriteStrided(fd, buf, count, datatype, file_ptr_type, offset, status, error_code); } diff --git a/ompi/mca/io/romio314/romio/adio/ad_ufs/ad_ufs.c b/ompi/mca/io/romio314/romio/adio/ad_ufs/ad_ufs.c index 66b183ec56f..4fc330258d4 100644 --- a/ompi/mca/io/romio314/romio/adio/ad_ufs/ad_ufs.c +++ b/ompi/mca/io/romio314/romio/adio/ad_ufs/ad_ufs.c @@ -1,7 +1,7 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */ -/* +/* * - * Copyright (C) 2001 University of Chicago. + * Copyright (C) 2001 University of Chicago. * See COPYRIGHT notice in top-level directory. */ diff --git a/ompi/mca/io/romio314/romio/adio/ad_ufs/ad_ufs.h b/ompi/mca/io/romio314/romio/adio/ad_ufs/ad_ufs.h index 5ad27439b3e..e8995eb1e69 100644 --- a/ompi/mca/io/romio314/romio/adio/ad_ufs/ad_ufs.h +++ b/ompi/mca/io/romio314/romio/adio/ad_ufs/ad_ufs.h @@ -1,6 +1,6 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */ -/* - * Copyright (C) 1997 University of Chicago. +/* + * Copyright (C) 1997 University of Chicago. * See COPYRIGHT notice in top-level directory. */ @@ -25,7 +25,7 @@ #include #endif -/* Workaround for incomplete set of definitions if __REDIRECT is not +/* Workaround for incomplete set of definitions if __REDIRECT is not defined and large file support is used in aio.h */ #if !defined(__REDIRECT) && defined(__USE_FILE_OFFSET64) #define aiocb aiocb64 @@ -35,23 +35,23 @@ int ADIOI_UFS_aio(ADIO_File fd, void *buf, int len, ADIO_Offset offset, int wr, void *handle); void ADIOI_UFS_Open(ADIO_File fd, int *error_code); -void ADIOI_UFS_IwriteContig(ADIO_File fd, void *buf, int count, +void ADIOI_UFS_IwriteContig(ADIO_File fd, void *buf, int count, MPI_Datatype datatype, int file_ptr_type, ADIO_Offset offset, ADIO_Request *request, int - *error_code); -void ADIOI_UFS_IreadContig(ADIO_File fd, void *buf, int count, + *error_code); +void ADIOI_UFS_IreadContig(ADIO_File fd, void *buf, int count, MPI_Datatype datatype, int file_ptr_type, ADIO_Offset offset, ADIO_Request *request, int - *error_code); + *error_code); int ADIOI_UFS_ReadDone(ADIO_Request *request, ADIO_Status *status, int *error_code); int ADIOI_UFS_WriteDone(ADIO_Request *request, ADIO_Status *status, int *error_code); void ADIOI_UFS_ReadComplete(ADIO_Request *request, ADIO_Status *status, int - *error_code); + *error_code); void ADIOI_UFS_WriteComplete(ADIO_Request *request, ADIO_Status *status, - int *error_code); + int *error_code); void ADIOI_UFS_Fcntl(ADIO_File fd, int flag, ADIO_Fcntl_t *fcntl_struct, int - *error_code); + *error_code); #endif diff --git a/ompi/mca/io/romio314/romio/adio/ad_ufs/ad_ufs_open.c b/ompi/mca/io/romio314/romio/adio/ad_ufs/ad_ufs_open.c index 9d5a2a11741..d4c64db4a2b 100644 --- a/ompi/mca/io/romio314/romio/adio/ad_ufs/ad_ufs_open.c +++ b/ompi/mca/io/romio314/romio/adio/ad_ufs/ad_ufs_open.c @@ -1,7 +1,7 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */ -/* +/* * - * Copyright (C) 1997 University of Chicago. + * Copyright (C) 1997 University of Chicago. * See COPYRIGHT notice in top-level directory. */ @@ -31,7 +31,7 @@ void ADIOI_UFS_Open(ADIO_File fd, int *error_code) if (fd->access_mode & ADIO_EXCL) amode = amode | O_EXCL; - + #ifdef ADIOI_MPE_LOGGING MPE_Log_event( ADIOI_MPE_open_a, 0, NULL ); #endif diff --git a/ompi/mca/io/romio314/romio/adio/ad_xfs/ad_xfs.c b/ompi/mca/io/romio314/romio/adio/ad_xfs/ad_xfs.c index b748a8a637a..529792488c1 100644 --- a/ompi/mca/io/romio314/romio/adio/ad_xfs/ad_xfs.c +++ b/ompi/mca/io/romio314/romio/adio/ad_xfs/ad_xfs.c @@ -1,7 +1,7 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */ -/* +/* * - * Copyright (C) 2001 University of Chicago. + * Copyright (C) 2001 University of Chicago. * See COPYRIGHT notice in top-level directory. */ diff --git a/ompi/mca/io/romio314/romio/adio/ad_xfs/ad_xfs.h b/ompi/mca/io/romio314/romio/adio/ad_xfs/ad_xfs.h index d14858d4aa5..506f441ad73 100644 --- a/ompi/mca/io/romio314/romio/adio/ad_xfs/ad_xfs.h +++ b/ompi/mca/io/romio314/romio/adio/ad_xfs/ad_xfs.h @@ -1,7 +1,7 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */ -/* +/* * - * Copyright (C) 1997 University of Chicago. + * Copyright (C) 1997 University of Chicago. * See COPYRIGHT notice in top-level directory. */ @@ -23,14 +23,14 @@ typedef struct aiocb64 aiocb64_t; void ADIOI_XFS_Open(ADIO_File fd, int *error_code); void ADIOI_XFS_Close(ADIO_File fd, int *error_code); -void ADIOI_XFS_ReadContig(ADIO_File fd, void *buf, int count, +void ADIOI_XFS_ReadContig(ADIO_File fd, void *buf, int count, MPI_Datatype datatype, int file_ptr_type, ADIO_Offset offset, ADIO_Status *status, int *error_code); -void ADIOI_XFS_WriteContig(ADIO_File fd, void *buf, int count, +void ADIOI_XFS_WriteContig(ADIO_File fd, void *buf, int count, MPI_Datatype datatype, int file_ptr_type, ADIO_Offset offset, ADIO_Status *status, int - *error_code); + *error_code); void ADIOI_XFS_Fcntl(ADIO_File fd, int flag, ADIO_Fcntl_t *fcntl_struct, int *error_code); void ADIOI_XFS_Resize(ADIO_File fd, ADIO_Offset size, int *error_code); diff --git a/ompi/mca/io/romio314/romio/adio/ad_xfs/ad_xfs_fcntl.c b/ompi/mca/io/romio314/romio/adio/ad_xfs/ad_xfs_fcntl.c index 1f19081afca..cf45a6cd908 100644 --- a/ompi/mca/io/romio314/romio/adio/ad_xfs/ad_xfs_fcntl.c +++ b/ompi/mca/io/romio314/romio/adio/ad_xfs/ad_xfs_fcntl.c @@ -1,7 +1,7 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */ -/* +/* * - * Copyright (C) 1997 University of Chicago. + * Copyright (C) 1997 University of Chicago. * See COPYRIGHT notice in top-level directory. */ diff --git a/ompi/mca/io/romio314/romio/adio/ad_xfs/ad_xfs_hints.c b/ompi/mca/io/romio314/romio/adio/ad_xfs/ad_xfs_hints.c index 0fe0e832f4c..93b4f5061a6 100644 --- a/ompi/mca/io/romio314/romio/adio/ad_xfs/ad_xfs_hints.c +++ b/ompi/mca/io/romio314/romio/adio/ad_xfs/ad_xfs_hints.c @@ -1,7 +1,7 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */ -/* +/* * - * Copyright (C) 1997 University of Chicago. + * Copyright (C) 1997 University of Chicago. * See COPYRIGHT notice in top-level directory. */ @@ -67,14 +67,14 @@ void ADIOI_XFS_SetInfo(ADIO_File fd, MPI_Info users_info, int *error_code) if (users_info != MPI_INFO_NULL) { value = (char *) ADIOI_Malloc((MPI_MAX_INFO_VAL+1)*sizeof(char)); - ADIOI_Info_get(users_info, "direct_read", MPI_MAX_INFO_VAL, + ADIOI_Info_get(users_info, "direct_read", MPI_MAX_INFO_VAL, value, &flag); if (flag && !strcmp(value, "true")) { ADIOI_Info_set(fd->info, "direct_read", "true"); fd->direct_read = 1; } - ADIOI_Info_get(users_info, "direct_write", MPI_MAX_INFO_VAL, + ADIOI_Info_get(users_info, "direct_write", MPI_MAX_INFO_VAL, value, &flag); if (flag && !strcmp(value, "true")) { ADIOI_Info_set(fd->info, "direct_write", "true"); @@ -83,7 +83,7 @@ void ADIOI_XFS_SetInfo(ADIO_File fd, MPI_Info users_info, int *error_code) ADIOI_Free(value); } - + /* set the values for collective I/O and data sieving parameters */ ADIOI_GEN_SetInfo(fd, users_info, error_code); diff --git a/ompi/mca/io/romio314/romio/adio/ad_xfs/ad_xfs_open.c b/ompi/mca/io/romio314/romio/adio/ad_xfs/ad_xfs_open.c index 5c0a9a2b8ae..e0ccafe7f1b 100644 --- a/ompi/mca/io/romio314/romio/adio/ad_xfs/ad_xfs_open.c +++ b/ompi/mca/io/romio314/romio/adio/ad_xfs/ad_xfs_open.c @@ -1,7 +1,7 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */ -/* +/* * - * Copyright (C) 1997 University of Chicago. + * Copyright (C) 1997 University of Chicago. * See COPYRIGHT notice in top-level directory. */ @@ -9,9 +9,7 @@ #include "ad_xfs.h" #include -#ifdef HAVE_STDDEF_H #include -#endif #ifndef HAVE_LSEEK64 #define lseek64 lseek @@ -80,7 +78,7 @@ void ADIOI_XFS_Open(ADIO_File fd, int *error_code) fd->hints->fs_hints.xfs.write_chunk_sz = st.d_maxiosz; } else { /* - * MPIO_DIRECT_WRITE_CHUNK_SIZE was set. + * MPIO_DIRECT_WRITE_CHUNK_SIZE was set. * Make write_chunk_sz a multiple of d_miniosz. */ factor = write_chunk_sz / fd->d_miniosz; diff --git a/ompi/mca/io/romio314/romio/adio/ad_xfs/ad_xfs_read.c b/ompi/mca/io/romio314/romio/adio/ad_xfs/ad_xfs_read.c index c3c237cc15d..b8267313ca0 100644 --- a/ompi/mca/io/romio314/romio/adio/ad_xfs/ad_xfs_read.c +++ b/ompi/mca/io/romio314/romio/adio/ad_xfs/ad_xfs_read.c @@ -1,7 +1,7 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */ -/* +/* * - * Copyright (C) 1997 University of Chicago. + * Copyright (C) 1997 University of Chicago. * See COPYRIGHT notice in top-level directory. */ @@ -12,10 +12,10 @@ /* style: allow:free:2 sig:0 */ -static void ADIOI_XFS_Aligned_Mem_File_Read(ADIO_File fd, void *buf, int len, +static void ADIOI_XFS_Aligned_Mem_File_Read(ADIO_File fd, void *buf, int len, ADIO_Offset offset, int *err); -void ADIOI_XFS_ReadContig(ADIO_File fd, void *buf, int count, +void ADIOI_XFS_ReadContig(ADIO_File fd, void *buf, int count, MPI_Datatype datatype, int file_ptr_type, ADIO_Offset offset, ADIO_Status *status, int *error_code) { @@ -35,11 +35,11 @@ void ADIOI_XFS_ReadContig(ADIO_File fd, void *buf, int count, err = pread(fd->fd_sys, buf, len, offset); else { /* direct I/O enabled */ - /* (1) if mem_aligned && file_aligned + /* (1) if mem_aligned && file_aligned use direct I/O to read up to correct io_size use buffered I/O for remaining */ - if (!(((long) buf) % fd->d_mem) && !(offset % fd->d_miniosz)) + if (!(((long) buf) % fd->d_mem) && !(offset % fd->d_miniosz)) ADIOI_XFS_Aligned_Mem_File_Read(fd, buf, len, offset, &err); /* (2) if !file_aligned @@ -99,7 +99,7 @@ void ADIOI_XFS_ReadContig(ADIO_File fd, void *buf, int count, } -void ADIOI_XFS_Aligned_Mem_File_Read(ADIO_File fd, void *buf, int len, +void ADIOI_XFS_Aligned_Mem_File_Read(ADIO_File fd, void *buf, int len, ADIO_Offset offset, int *err) { int ntimes, rem, newrem, i, size, nbytes; @@ -110,7 +110,7 @@ void ADIOI_XFS_Aligned_Mem_File_Read(ADIO_File fd, void *buf, int len, use direct I/O to read up to correct io_size, use buffered I/O for remaining. */ - if (!(len % fd->d_miniosz) && + if (!(len % fd->d_miniosz) && (len >= fd->d_miniosz) && (len <= read_chunk_sz)) *err = pread(fd->fd_direct, buf, len, offset); else if (len < fd->d_miniosz) @@ -126,17 +126,17 @@ void ADIOI_XFS_Aligned_Mem_File_Read(ADIO_File fd, void *buf, int len, } if (rem) { if (!(rem % fd->d_miniosz)) - nbytes += pread(fd->fd_direct, + nbytes += pread(fd->fd_direct, ((char *)buf) + ntimes * read_chunk_sz, rem, offset); else { newrem = rem % fd->d_miniosz; size = rem - newrem; if (size) { - nbytes += pread(fd->fd_direct, + nbytes += pread(fd->fd_direct, ((char *)buf) + ntimes * read_chunk_sz, size, offset); offset += size; } - nbytes += pread(fd->fd_sys, + nbytes += pread(fd->fd_sys, ((char *)buf) + ntimes * read_chunk_sz + size, newrem, offset); } } diff --git a/ompi/mca/io/romio314/romio/adio/ad_xfs/ad_xfs_resize.c b/ompi/mca/io/romio314/romio/adio/ad_xfs/ad_xfs_resize.c index 8caf8b48bd7..79084eb411b 100644 --- a/ompi/mca/io/romio314/romio/adio/ad_xfs/ad_xfs_resize.c +++ b/ompi/mca/io/romio314/romio/adio/ad_xfs/ad_xfs_resize.c @@ -1,7 +1,7 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */ -/* +/* * - * Copyright (C) 1997 University of Chicago. + * Copyright (C) 1997 University of Chicago. * See COPYRIGHT notice in top-level directory. */ @@ -11,7 +11,7 @@ void ADIOI_XFS_Resize(ADIO_File fd, ADIO_Offset size, int *error_code) { int err; static char myname[] = "ADIOI_XFS_RESIZE"; - + err = ftruncate64(fd->fd_sys, size); if (err == -1) { *error_code = MPIO_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE, diff --git a/ompi/mca/io/romio314/romio/adio/ad_xfs/ad_xfs_write.c b/ompi/mca/io/romio314/romio/adio/ad_xfs/ad_xfs_write.c index 9ab82768f76..a56102e31fa 100644 --- a/ompi/mca/io/romio314/romio/adio/ad_xfs/ad_xfs_write.c +++ b/ompi/mca/io/romio314/romio/adio/ad_xfs/ad_xfs_write.c @@ -1,7 +1,7 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */ -/* +/* * - * Copyright (C) 1997 University of Chicago. + * Copyright (C) 1997 University of Chicago. * See COPYRIGHT notice in top-level directory. */ @@ -16,7 +16,7 @@ static int ADIOI_XFS_Aligned_Mem_File_Write(ADIO_File fd, void *buf, ADIO_Offset len, ADIO_Offset offset); -void ADIOI_XFS_WriteContig(ADIO_File fd, void *buf, int count, +void ADIOI_XFS_WriteContig(ADIO_File fd, void *buf, int count, MPI_Datatype datatype, int file_ptr_type, ADIO_Offset offset, ADIO_Status *status, int *error_code) { @@ -38,7 +38,7 @@ void ADIOI_XFS_WriteContig(ADIO_File fd, void *buf, int count, if (err < 0) {goto leaving;} } else { /* direct I/O enabled */ - /* (1) if mem_aligned && file_aligned + /* (1) if mem_aligned && file_aligned use direct I/O to write up to correct io_size use buffered I/O for remaining */ @@ -109,7 +109,7 @@ void ADIOI_XFS_WriteContig(ADIO_File fd, void *buf, int count, static int -ADIOI_XFS_Aligned_Mem_File_Write(ADIO_File fd, void *buf, ADIO_Offset len, +ADIOI_XFS_Aligned_Mem_File_Write(ADIO_File fd, void *buf, ADIO_Offset len, ADIO_Offset offset) { unsigned write_chunk_sz = fd->hints->fs_hints.xfs.write_chunk_sz; @@ -121,7 +121,7 @@ ADIOI_XFS_Aligned_Mem_File_Write(ADIO_File fd, void *buf, ADIO_Offset len, use direct I/O to write up to correct io_size, use buffered I/O for remaining. */ - if (!(len % fd->d_miniosz) && + if (!(len % fd->d_miniosz) && (len >= fd->d_miniosz) && (len <= write_chunk_sz)) { nbytes = pwrite(fd->fd_direct, buf, len, offset); if (nbytes < 0) {return -1;} @@ -140,19 +140,19 @@ ADIOI_XFS_Aligned_Mem_File_Write(ADIO_File fd, void *buf, ADIO_Offset len, } if (rem) { if (!(rem % fd->d_miniosz)) { - nbytes = pwrite(fd->fd_direct, + nbytes = pwrite(fd->fd_direct, ((char *)buf) + ntimes * write_chunk_sz, rem, offset); if (nbytes < 0) {return -1;} } else { newrem = rem % fd->d_miniosz; size = rem - newrem; if (size) { - nbytes = pwrite(fd->fd_direct, + nbytes = pwrite(fd->fd_direct, ((char *)buf) + ntimes * write_chunk_sz, size, offset); offset += size; if (nbytes < 0) {return -1;} } - nbytes = pwrite(fd->fd_sys, + nbytes = pwrite(fd->fd_sys, ((char *)buf) + ntimes * write_chunk_sz + size, newrem, offset); if (nbytes < 0) {return -1;} } diff --git a/ompi/mca/io/romio314/romio/adio/ad_zoidfs/ad_zoidfs.c b/ompi/mca/io/romio314/romio/adio/ad_zoidfs/ad_zoidfs.c index a0eadfba699..be8b8a94b1c 100644 --- a/ompi/mca/io/romio314/romio/adio/ad_zoidfs/ad_zoidfs.c +++ b/ompi/mca/io/romio314/romio/adio/ad_zoidfs/ad_zoidfs.c @@ -1,7 +1,7 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */ -/* +/* * - * Copyright (C) 2003 University of Chicago. + * Copyright (C) 2003 University of Chicago. * See COPYRIGHT notice in top-level directory. */ @@ -37,6 +37,6 @@ struct ADIOI_Fns_struct ADIO_ZOIDFS_operations = { ADIOI_ZOIDFS_Feature, }; -/* - * vim: ts=8 sts=4 sw=4 noexpandtab +/* + * vim: ts=8 sts=4 sw=4 noexpandtab */ diff --git a/ompi/mca/io/romio314/romio/adio/ad_zoidfs/ad_zoidfs.h b/ompi/mca/io/romio314/romio/adio/ad_zoidfs/ad_zoidfs.h index d4999b66228..412970fac63 100644 --- a/ompi/mca/io/romio314/romio/adio/ad_zoidfs/ad_zoidfs.h +++ b/ompi/mca/io/romio314/romio/adio/ad_zoidfs/ad_zoidfs.h @@ -1,7 +1,7 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */ -/* +/* * - * Copyright (C) 1997 University of Chicago. + * Copyright (C) 1997 University of Chicago. * See COPYRIGHT notice in top-level directory. */ @@ -18,16 +18,16 @@ typedef zoidfs_handle_t ADIOI_ZOIDFS_object; void ADIOI_ZOIDFS_Open(ADIO_File fd, int *error_code); void ADIOI_ZOIDFS_Close(ADIO_File fd, int *error_code); -void ADIOI_ZOIDFS_ReadContig(ADIO_File fd, void *buf, int count, +void ADIOI_ZOIDFS_ReadContig(ADIO_File fd, void *buf, int count, MPI_Datatype datatype, int file_ptr_type, ADIO_Offset offset, ADIO_Status *status, int *error_code); -void ADIOI_ZOIDFS_WriteContig(ADIO_File fd, void *buf, int count, +void ADIOI_ZOIDFS_WriteContig(ADIO_File fd, void *buf, int count, MPI_Datatype datatype, int file_ptr_type, ADIO_Offset offset, ADIO_Status *status, int - *error_code); + *error_code); void ADIOI_ZOIDFS_Fcntl(ADIO_File fd, int flag, ADIO_Fcntl_t *fcntl_struct, int - *error_code); + *error_code); void ADIOI_ZOIDFS_WriteStrided(ADIO_File fd, void *buf, int count, MPI_Datatype datatype, int file_ptr_type, ADIO_Offset offset, ADIO_Status *status, int diff --git a/ompi/mca/io/romio314/romio/adio/ad_zoidfs/ad_zoidfs_close.c b/ompi/mca/io/romio314/romio/adio/ad_zoidfs/ad_zoidfs_close.c index 0126783aa37..db7aa622273 100644 --- a/ompi/mca/io/romio314/romio/adio/ad_zoidfs/ad_zoidfs_close.c +++ b/ompi/mca/io/romio314/romio/adio/ad_zoidfs/ad_zoidfs_close.c @@ -1,7 +1,7 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */ -/* +/* * - * Copyright (C) 1997 University of Chicago. + * Copyright (C) 1997 University of Chicago. * See COPYRIGHT notice in top-level directory. */ @@ -20,6 +20,6 @@ void ADIOI_ZOIDFS_Close(ADIO_File fd, int *error_code) *error_code = MPI_SUCCESS; } -/* - * vim: ts=8 sts=4 sw=4 noexpandtab +/* + * vim: ts=8 sts=4 sw=4 noexpandtab */ diff --git a/ompi/mca/io/romio314/romio/adio/ad_zoidfs/ad_zoidfs_common.c b/ompi/mca/io/romio314/romio/adio/ad_zoidfs/ad_zoidfs_common.c index 018d439aaa2..d2781c41402 100644 --- a/ompi/mca/io/romio314/romio/adio/ad_zoidfs/ad_zoidfs_common.c +++ b/ompi/mca/io/romio314/romio/adio/ad_zoidfs/ad_zoidfs_common.c @@ -1,6 +1,6 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */ -/* - * Copyright (C) 2003 University of Chicago. +/* + * Copyright (C) 2003 University of Chicago. * See COPYRIGHT notice in top-level directory. */ @@ -34,7 +34,7 @@ void ADIOI_ZOIDFS_End(int *error_code) *error_code = MPI_SUCCESS; } -int ADIOI_ZOIDFS_End_call(MPI_Comm comm, int keyval, +int ADIOI_ZOIDFS_End_call(MPI_Comm comm, int keyval, void *attribute_val, void *extra_state) { int error_code; @@ -64,10 +64,10 @@ void ADIOI_ZOIDFS_Init(int rank, int *error_code ) 0); return; } - + MPI_Keyval_create(MPI_NULL_COPY_FN, ADIOI_ZOIDFS_End_call, - &ADIOI_ZOIDFS_Initialized, (void *)0); - /* just like romio does, we make a dummy attribute so we + &ADIOI_ZOIDFS_Initialized, (void *)0); + /* just like romio does, we make a dummy attribute so we * get cleaned up */ MPI_Attr_put(MPI_COMM_SELF, ADIOI_ZOIDFS_Initialized, (void *)0); } @@ -121,6 +121,6 @@ int ADIOI_ZOIDFS_error_convert(int error) } } -/* - * vim: ts=8 sts=4 sw=4 noexpandtab +/* + * vim: ts=8 sts=4 sw=4 noexpandtab */ diff --git a/ompi/mca/io/romio314/romio/adio/ad_zoidfs/ad_zoidfs_common.h b/ompi/mca/io/romio314/romio/adio/ad_zoidfs/ad_zoidfs_common.h index ba985b49226..487b499e0d4 100644 --- a/ompi/mca/io/romio314/romio/adio/ad_zoidfs/ad_zoidfs_common.h +++ b/ompi/mca/io/romio314/romio/adio/ad_zoidfs/ad_zoidfs_common.h @@ -1,7 +1,7 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- * vim: ts=8 sts=4 sw=4 noexpandtab * - * Copyright (C) 1997 University of Chicago. + * Copyright (C) 1997 University of Chicago. * See COPYRIGHT notice in top-level directory. */ @@ -9,10 +9,10 @@ #define _AD_ZOIDFS_COMMON_H #include "ad_zoidfs.h" -/* The ESTALE problem: +/* The ESTALE problem: * The IO forwarding protocol can respond to any call with ESTALE, which means * the handle upon which that call operates has expired from the metadata - * cache. We thus wrap any zoidfs routine (expr) in this macro. + * cache. We thus wrap any zoidfs routine (expr) in this macro. * * ROMIO stores the filename in the ADIOI_File structrue (fd), so we can always * re-lookup in response to ESTALE */ @@ -36,7 +36,7 @@ void ADIOI_ZOIDFS_Init(int rank, int *error_code ); void ADIOI_ZOIDFS_makeattribs(zoidfs_sattr_t * attribs); void ADIOI_ZOIDFS_End(int *error_code); -int ADIOI_ZOIDFS_End_call(MPI_Comm comm, int keyval, +int ADIOI_ZOIDFS_End_call(MPI_Comm comm, int keyval, void *attribute_val, void *extra_state); int ADIOI_ZOIDFS_error_convert(int error); diff --git a/ompi/mca/io/romio314/romio/adio/ad_zoidfs/ad_zoidfs_delete.c b/ompi/mca/io/romio314/romio/adio/ad_zoidfs/ad_zoidfs_delete.c index 87193147a56..cca80210613 100644 --- a/ompi/mca/io/romio314/romio/adio/ad_zoidfs/ad_zoidfs_delete.c +++ b/ompi/mca/io/romio314/romio/adio/ad_zoidfs/ad_zoidfs_delete.c @@ -1,7 +1,7 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */ -/* +/* * - * Copyright (C) 2003 University of Chicago. + * Copyright (C) 2003 University of Chicago. * See COPYRIGHT notice in top-level directory. */ @@ -17,7 +17,7 @@ void ADIOI_ZOIDFS_Delete(char *filename, int *error_code) ADIOI_ZOIDFS_Init(0, error_code); /* --BEGIN ERROR HANDLING-- */ - if (*error_code != MPI_SUCCESS) + if (*error_code != MPI_SUCCESS) { /* ADIOI_ZOIDFS_INIT handles creating error codes itself */ return; @@ -40,6 +40,6 @@ void ADIOI_ZOIDFS_Delete(char *filename, int *error_code) return; } -/* - * vim: ts=8 sts=4 sw=4 noexpandtab +/* + * vim: ts=8 sts=4 sw=4 noexpandtab */ diff --git a/ompi/mca/io/romio314/romio/adio/ad_zoidfs/ad_zoidfs_fcntl.c b/ompi/mca/io/romio314/romio/adio/ad_zoidfs/ad_zoidfs_fcntl.c index 22c26714d80..810100574d2 100644 --- a/ompi/mca/io/romio314/romio/adio/ad_zoidfs/ad_zoidfs_fcntl.c +++ b/ompi/mca/io/romio314/romio/adio/ad_zoidfs/ad_zoidfs_fcntl.c @@ -1,7 +1,7 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */ -/* +/* * - * Copyright (C) 1997 University of Chicago. + * Copyright (C) 1997 University of Chicago. * See COPYRIGHT notice in top-level directory. */ @@ -55,6 +55,6 @@ void ADIOI_ZOIDFS_Fcntl(ADIO_File fd, int flag, ADIO_Fcntl_t *fcntl_struct, } } -/* - * vim: ts=8 sts=4 sw=4 noexpandtab +/* + * vim: ts=8 sts=4 sw=4 noexpandtab */ diff --git a/ompi/mca/io/romio314/romio/adio/ad_zoidfs/ad_zoidfs_flush.c b/ompi/mca/io/romio314/romio/adio/ad_zoidfs/ad_zoidfs_flush.c index 8ec0b8d5518..06cd30631aa 100644 --- a/ompi/mca/io/romio314/romio/adio/ad_zoidfs/ad_zoidfs_flush.c +++ b/ompi/mca/io/romio314/romio/adio/ad_zoidfs/ad_zoidfs_flush.c @@ -1,7 +1,7 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */ -/* +/* * - * Copyright (C) 1997 University of Chicago. + * Copyright (C) 1997 University of Chicago. * See COPYRIGHT notice in top-level directory. */ @@ -11,12 +11,12 @@ /* we want to be a bit clever here: at scale, if every client sends a * flush request, it will stress the file system with redundant * commit requests. Instead, one process should wait for - * everyone to catch up, do the sync, then broadcast the result. + * everyone to catch up, do the sync, then broadcast the result. */ -void ADIOI_ZOIDFS_Flush(ADIO_File fd, int *error_code) -{ - int ret, rank, dummy=0, dummy_in=0; +void ADIOI_ZOIDFS_Flush(ADIO_File fd, int *error_code) +{ + int ret, rank, dummy=0, dummy_in=0; ADIOI_ZOIDFS_object *zoidfs_obj_ptr; static char myname[] = "ADIOI_ZOIDFS_FLUSH"; @@ -28,7 +28,7 @@ void ADIOI_ZOIDFS_Flush(ADIO_File fd, int *error_code) /* collective call to ensure no outstanding write requests. reduce is * slightly less expensvie than barrier */ - MPI_Reduce(&dummy_in, &dummy, 1, MPI_INT, MPI_SUM, + MPI_Reduce(&dummy_in, &dummy, 1, MPI_INT, MPI_SUM, fd->hints->ranklist[0], fd->comm); if (rank == fd->hints->ranklist[0]) { @@ -47,6 +47,6 @@ void ADIOI_ZOIDFS_Flush(ADIO_File fd, int *error_code) /* --END ERROR HANDLING-- */ } -/* - * vim: ts=8 sts=4 sw=4 noexpandtab +/* + * vim: ts=8 sts=4 sw=4 noexpandtab */ diff --git a/ompi/mca/io/romio314/romio/adio/ad_zoidfs/ad_zoidfs_io.c b/ompi/mca/io/romio314/romio/adio/ad_zoidfs/ad_zoidfs_io.c index 337c92f4101..35dd552ce78 100644 --- a/ompi/mca/io/romio314/romio/adio/ad_zoidfs/ad_zoidfs_io.c +++ b/ompi/mca/io/romio314/romio/adio/ad_zoidfs/ad_zoidfs_io.c @@ -1,6 +1,6 @@ -/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- - * - * Copyright (C) 1997 University of Chicago. +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- + * + * Copyright (C) 1997 University of Chicago. * See COPYRIGHT notice in top-level directory. */ @@ -37,12 +37,12 @@ static void ZOIDFS_IOContig(ADIO_File fd, void * buf, int count, if (flag == ZOIDFS_READ) { NO_STALE(ret, fd, zoidfs_obj_ptr, - zoidfs_read(zoidfs_obj_ptr, + zoidfs_read(zoidfs_obj_ptr, 1, &buf, &mem_len, 1, &file_offset, &file_len, ZOIDFS_NO_OP_HINT)); } else { NO_STALE(ret, fd, zoidfs_obj_ptr, - zoidfs_write(zoidfs_obj_ptr, + zoidfs_write(zoidfs_obj_ptr, 1, (const void **)&buf, &mem_len, 1, &file_offset, &file_len, ZOIDFS_NO_OP_HINT)); } @@ -72,12 +72,12 @@ static void ZOIDFS_IOContig(ADIO_File fd, void * buf, int count, return; } -void ADIOI_ZOIDFS_ReadContig(ADIO_File fd, void *buf, int count, +void ADIOI_ZOIDFS_ReadContig(ADIO_File fd, void *buf, int count, MPI_Datatype datatype, int file_ptr_type, ADIO_Offset offset, ADIO_Status *status, int *error_code) { - ZOIDFS_IOContig(fd, buf, count, datatype, file_ptr_type, + ZOIDFS_IOContig(fd, buf, count, datatype, file_ptr_type, offset, status, ZOIDFS_READ, error_code); } @@ -90,7 +90,7 @@ void ADIOI_ZOIDFS_WriteContig(ADIO_File fd, void *buf, int count, offset, status, ZOIDFS_WRITE, error_code); } - + /* - * vim: ts=8 sts=4 sw=4 noexpandtab + * vim: ts=8 sts=4 sw=4 noexpandtab */ diff --git a/ompi/mca/io/romio314/romio/adio/ad_zoidfs/ad_zoidfs_open.c b/ompi/mca/io/romio314/romio/adio/ad_zoidfs/ad_zoidfs_open.c index 55fb950a3e8..2d4abff0e92 100644 --- a/ompi/mca/io/romio314/romio/adio/ad_zoidfs/ad_zoidfs_open.c +++ b/ompi/mca/io/romio314/romio/adio/ad_zoidfs/ad_zoidfs_open.c @@ -1,7 +1,7 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- * vim: ts=8 sts=4 sw=4 noexpandtab * - * Copyright (C) 2007 University of Chicago. + * Copyright (C) 2007 University of Chicago. * See COPYRIGHT notice in top-level directory. */ @@ -14,10 +14,10 @@ struct open_status_s { zoidfs_handle_t handle; }; typedef struct open_status_s open_status; - + static void fake_an_open(char *fname, int access_mode, int nr_datafiles, MPI_Offset strip_size, - ADIOI_ZOIDFS_object *zoidfs_ptr, + ADIOI_ZOIDFS_object *zoidfs_ptr, open_status *o_status) { int ret, created; @@ -31,7 +31,7 @@ static void fake_an_open(char *fname, int access_mode, * be careful with ADIO_EXCL. */ if (access_mode & ADIO_CREATE) { - ret = zoidfs_create(NULL, NULL, + ret = zoidfs_create(NULL, NULL, fname, &attribs, &handle, &created, ZOIDFS_NO_OP_HINT); if ((ret == ZFS_OK) && !created && (access_mode & ADIO_EXCL)) { /* lookup should not succeed if opened with EXCL */ @@ -51,7 +51,7 @@ static void fake_an_open(char *fname, int access_mode, /* ADIOI_ZOIDFS_Open: * one process opens (or creates) the file, then broadcasts the result to the - * remaining processors. + * remaining processors. * * ADIO_Open used to perform an optimization when MPI_MODE_CREATE (and before * that, MPI_MODE_EXCL) was set. Because ZoidFS handles file lookup and @@ -72,9 +72,9 @@ void ADIOI_ZOIDFS_Open(ADIO_File fd, int *error_code) MPI_Datatype types[2] = {MPI_INT, MPI_BYTE}; int lens[2] = {1, sizeof(ADIOI_ZOIDFS_object)}; MPI_Aint offsets[2]; - + memset(&o_status, 0, sizeof(o_status)); - zoidfs_obj_ptr = (ADIOI_ZOIDFS_object *) + zoidfs_obj_ptr = (ADIOI_ZOIDFS_object *) ADIOI_Malloc(sizeof(ADIOI_ZOIDFS_object)); /* --BEGIN ERROR HANDLING-- */ if (zoidfs_obj_ptr == NULL) { @@ -102,10 +102,10 @@ void ADIOI_ZOIDFS_Open(ADIO_File fd, int *error_code) MPE_Log_event( ADIOI_MPE_open_a, 0, NULL ); #endif if (rank == fd->hints->ranklist[0] && fd->fs_ptr == NULL) { - fake_an_open(fd->filename, fd->access_mode, + fake_an_open(fd->filename, fd->access_mode, fd->hints->striping_factor, fd->hints->striping_unit, - zoidfs_obj_ptr, &o_status); + zoidfs_obj_ptr, &o_status); /* store credentials and object reference in fd */ *zoidfs_obj_ptr = o_status.handle; fd->fs_ptr = zoidfs_obj_ptr; @@ -132,7 +132,7 @@ void ADIOI_ZOIDFS_Open(ADIO_File fd, int *error_code) /* --BEGIN ERROR HANDLING-- */ if (o_status.error != ZFS_OK) - { + { ADIOI_Free(zoidfs_obj_ptr); fd->fs_ptr = NULL; *error_code = MPIO_Err_create_code(MPI_SUCCESS, diff --git a/ompi/mca/io/romio314/romio/adio/ad_zoidfs/ad_zoidfs_read_list.c b/ompi/mca/io/romio314/romio/adio/ad_zoidfs/ad_zoidfs_read_list.c index d48a06c1bde..b537fffc3d1 100644 --- a/ompi/mca/io/romio314/romio/adio/ad_zoidfs/ad_zoidfs_read_list.c +++ b/ompi/mca/io/romio314/romio/adio/ad_zoidfs/ad_zoidfs_read_list.c @@ -1,7 +1,7 @@ -/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- - * vim: ts=8 sts=4 sw=4 noexpandtab - * - * Copyright (C) 2008 University of Chicago. +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- + * vim: ts=8 sts=4 sw=4 noexpandtab + * + * Copyright (C) 2008 University of Chicago. * See COPYRIGHT notice in top-level directory. */ @@ -26,7 +26,7 @@ void ADIOI_ZOIDFS_ReadStrided(ADIO_File fd, void *buf, int count, int n_filetypes, etype_in_filetype; ADIO_Offset abs_off_in_filetype=0; MPI_Count filetype_size, etype_size, buftype_size; - MPI_Aint filetype_extent, buftype_extent; + MPI_Aint filetype_extent, buftype_extent; int buf_count, buftype_is_contig, filetype_is_contig; ADIO_Offset off, disp, start_off, initial_off; int flag, st_frd_size, st_n_filetypes; @@ -76,7 +76,7 @@ void ADIOI_ZOIDFS_ReadStrided(ADIO_File fd, void *buf, int count, #ifdef HAVE_STATUS_SET_BYTES MPIR_Status_set_bytes(status, datatype, 0); #endif - *error_code = MPI_SUCCESS; + *error_code = MPI_SUCCESS; return; } @@ -86,7 +86,7 @@ void ADIOI_ZOIDFS_ReadStrided(ADIO_File fd, void *buf, int count, etype_size = fd->etype_size; bufsize = buftype_size * count; - + zoidfs_obj_ptr = (ADIOI_ZOIDFS_object *)fd->fs_ptr; if (!buftype_is_contig && filetype_is_contig) { @@ -99,7 +99,7 @@ void ADIOI_ZOIDFS_ReadStrided(ADIO_File fd, void *buf, int count, flat_buf = ADIOI_Flatlist; while (flat_buf->type != datatype) flat_buf = flat_buf->next; - off = (file_ptr_type == ADIO_INDIVIDUAL) ? fd->fp_ind : + off = (file_ptr_type == ADIO_INDIVIDUAL) ? fd->fp_ind : fd->disp + etype_size * offset; file_list_count = 1; @@ -121,9 +121,9 @@ void ADIOI_ZOIDFS_ReadStrided(ADIO_File fd, void *buf, int count, /* step through each block in memory, filling memory arrays */ while (b_blks_read < total_blks_to_read) { for (i=0; icount; i++) { - mem_offsets[b_blks_read % MAX_ARRAY_SIZE] = + mem_offsets[b_blks_read % MAX_ARRAY_SIZE] = buf + j*buftype_extent + flat_buf->indices[i]; - mem_lengths[b_blks_read % MAX_ARRAY_SIZE] = + mem_lengths[b_blks_read % MAX_ARRAY_SIZE] = flat_buf->blocklens[i]; file_lengths += flat_buf->blocklens[i]; b_blks_read++; @@ -159,21 +159,21 @@ void ADIOI_ZOIDFS_ReadStrided(ADIO_File fd, void *buf, int count, } total_bytes_read += file_lengths; /* --END ERROR HANDLING-- */ - - /* in the case of error or the last read list call, + + /* in the case of error or the last read list call, * leave here */ if (err_flag || b_blks_read == total_blks_to_read) break; file_offsets += file_lengths; file_lengths = 0; - } + } } /* for (i=0; icount; i++) */ j++; } /* while (b_blks_read < total_blks_to_read) */ ADIOI_Free(mem_offsets); ADIOI_Free(mem_lengths); - if (file_ptr_type == ADIO_INDIVIDUAL) + if (file_ptr_type == ADIO_INDIVIDUAL) fd->fp_ind += total_bytes_read; fd->fp_sys_posn = -1; /* set it to null. */ @@ -212,11 +212,11 @@ void ADIOI_ZOIDFS_ReadStrided(ADIO_File fd, void *buf, int count, while (!flag) { n_filetypes++; for (i=0; icount; i++) { - if (disp + flat_file->indices[i] + + if (disp + flat_file->indices[i] + ((ADIO_Offset) n_filetypes)*filetype_extent + flat_file->blocklens[i] >= offset) { st_index = i; - frd_size = disp + flat_file->indices[i] + + frd_size = disp + flat_file->indices[i] + ((ADIO_Offset) n_filetypes)*filetype_extent + flat_file->blocklens[i] - offset; flag = 1; @@ -230,7 +230,7 @@ void ADIOI_ZOIDFS_ReadStrided(ADIO_File fd, void *buf, int count, n_filetypes = (int) (offset / n_etypes_in_filetype); etype_in_filetype = (int) (offset % n_etypes_in_filetype); size_in_filetype = etype_in_filetype * etype_size; - + sum = 0; for (i=0; icount; i++) { sum += flat_file->blocklens[i]; @@ -242,16 +242,16 @@ void ADIOI_ZOIDFS_ReadStrided(ADIO_File fd, void *buf, int count, break; } } - + /* abs. offset in bytes in the file */ - offset = disp + ((ADIO_Offset) n_filetypes)*filetype_extent + + offset = disp + ((ADIO_Offset) n_filetypes)*filetype_extent + abs_off_in_filetype; } /* else [file_ptr_type != ADIO_INDIVIDUAL] */ start_off = offset; st_frd_size = frd_size; st_n_filetypes = n_filetypes; - + if (buftype_is_contig && !filetype_is_contig) { /* contiguous in memory, noncontiguous in file. should be the most @@ -260,13 +260,13 @@ void ADIOI_ZOIDFS_ReadStrided(ADIO_File fd, void *buf, int count, /* only one memory off-len pair, so no array here */ size_t mem_lengths; size_t mem_offsets; - + i = 0; j = st_index; n_filetypes = st_n_filetypes; - + mem_list_count = 1; - + /* determine how many blocks in file to read */ f_data_read = ADIOI_MIN(st_frd_size, bufsize); total_blks_to_read = 1; @@ -279,17 +279,17 @@ void ADIOI_ZOIDFS_ReadStrided(ADIO_File fd, void *buf, int count, f_data_read += flat_file->blocklens[j]; total_blks_to_read++; if (j<(flat_file->count-1)) j++; - else j = 0; + else j = 0; } - + j = st_index; n_filetypes = st_n_filetypes; n_read_lists = total_blks_to_read/MAX_ARRAY_SIZE; extra_blks = total_blks_to_read%MAX_ARRAY_SIZE; - + mem_offsets = (size_t)buf; mem_lengths = 0; - + /* if at least one full readlist, allocate file arrays at max array size and don't free until very end */ if (n_read_lists) { @@ -306,7 +306,7 @@ void ADIOI_ZOIDFS_ReadStrided(ADIO_File fd, void *buf, int count, file_lengths = (uint64_t*)ADIOI_Malloc(extra_blks* sizeof(uint64_t)); } - + /* for file arrays that are of MAX_ARRAY_SIZE, build arrays */ for (i=0; iindices[j]; file_lengths[k] = flat_file->blocklens[j]; @@ -366,7 +366,7 @@ void ADIOI_ZOIDFS_ReadStrided(ADIO_File fd, void *buf, int count, } for (k=0; kindices[j]; if (k == (extra_blks - 1)) { @@ -400,7 +400,7 @@ void ADIOI_ZOIDFS_ReadStrided(ADIO_File fd, void *buf, int count, MPIR_ERR_RECOVERABLE, myname, __LINE__, ADIOI_ZOIDFS_error_convert(err_flag), - "Error in zoidfs_read", 0); + "Error in zoidfs_read", 0); goto error_state; } /* --END ERROR HANDLING-- */ @@ -409,7 +409,7 @@ void ADIOI_ZOIDFS_ReadStrided(ADIO_File fd, void *buf, int count, } else { /* noncontiguous in memory as well as in file */ - + ADIOI_Flatten_datatype(datatype); flat_buf = ADIOI_Flatlist; while (flat_buf->type != datatype) flat_buf = flat_buf->next; @@ -425,7 +425,7 @@ void ADIOI_ZOIDFS_ReadStrided(ADIO_File fd, void *buf, int count, max_mem_list = 0; max_file_list = 0; - /* run through and file max_file_list and max_mem_list so that you + /* run through and file max_file_list and max_mem_list so that you can allocate the file and memory arrays less than MAX_ARRAY_SIZE if possible */ @@ -433,7 +433,7 @@ void ADIOI_ZOIDFS_ReadStrided(ADIO_File fd, void *buf, int count, k = start_k; new_buffer_read = 0; mem_list_count = 0; - while ((mem_list_count < MAX_ARRAY_SIZE) && + while ((mem_list_count < MAX_ARRAY_SIZE) && (new_buffer_read < bufsize-size_read)) { /* find mem_list_count and file_list_count such that both are less than MAX_ARRAY_SIZE, the sum of their lengths are @@ -441,9 +441,9 @@ void ADIOI_ZOIDFS_ReadStrided(ADIO_File fd, void *buf, int count, read in the next immediate read list is less than bufsize */ if(mem_list_count) { - if((new_buffer_read + flat_buf->blocklens[k] + + if((new_buffer_read + flat_buf->blocklens[k] + size_read) > bufsize) { - end_brd_size = new_buffer_read + + end_brd_size = new_buffer_read + flat_buf->blocklens[k] - (bufsize - size_read); new_buffer_read = bufsize - size_read; } @@ -461,15 +461,15 @@ void ADIOI_ZOIDFS_ReadStrided(ADIO_File fd, void *buf, int count, } mem_list_count++; k = (k + 1)%flat_buf->count; - } /* while ((mem_list_count < MAX_ARRAY_SIZE) && + } /* while ((mem_list_count < MAX_ARRAY_SIZE) && (new_buffer_read < bufsize-size_read)) */ j = start_j; new_file_read = 0; file_list_count = 0; - while ((file_list_count < MAX_ARRAY_SIZE) && + while ((file_list_count < MAX_ARRAY_SIZE) && (new_file_read < new_buffer_read)) { if(file_list_count) { - if((new_file_read + flat_file->blocklens[j]) > + if((new_file_read + flat_file->blocklens[j]) > new_buffer_read) { end_frd_size = new_buffer_read - new_file_read; new_file_read = new_buffer_read; @@ -490,9 +490,9 @@ void ADIOI_ZOIDFS_ReadStrided(ADIO_File fd, void *buf, int count, file_list_count++; if (j < (flat_file->count - 1)) j++; else j = 0; - + k = start_k; - if ((new_file_read < new_buffer_read) && + if ((new_file_read < new_buffer_read) && (file_list_count == MAX_ARRAY_SIZE)) { new_buffer_read = 0; mem_list_count = 0; @@ -521,13 +521,13 @@ void ADIOI_ZOIDFS_ReadStrided(ADIO_File fd, void *buf, int count, } /* while (new_buffer_read < new_file_read) */ } /* if ((new_file_read < new_buffer_read) && (file_list_count == MAX_ARRAY_SIZE)) */ - } /* while ((mem_list_count < MAX_ARRAY_SIZE) && + } /* while ((mem_list_count < MAX_ARRAY_SIZE) && (new_buffer_read < bufsize-size_read)) */ /* fakes filling the readlist arrays of lengths found above */ k = start_k; j = start_j; - for (i=0; iblocklens[k] == end_brd_size) @@ -548,7 +548,7 @@ void ADIOI_ZOIDFS_ReadStrided(ADIO_File fd, void *buf, int count, if (i == (file_list_count - 1)) { if (flat_file->blocklens[j] == end_frd_size) frd_size = flat_file->blocklens[(j+1)% - flat_file->count]; + flat_file->count]; else { frd_size = flat_file->blocklens[j] - end_frd_size; j--; @@ -581,11 +581,11 @@ void ADIOI_ZOIDFS_ReadStrided(ADIO_File fd, void *buf, int count, * region and many (700) very small memory regions. both cases caused * problems for this code */ - if ( ( (file_list_count == 1) && + if ( ( (file_list_count == 1) && (new_file_read < flat_file->blocklens[0] ) ) || - ((mem_list_count == 1) && + ((mem_list_count == 1) && (new_buffer_read < flat_buf->blocklens[0]) ) || - ((file_list_count == MAX_ARRAY_SIZE) && + ((file_list_count == MAX_ARRAY_SIZE) && (new_file_read < flat_buf->blocklens[0]) ) || ( (mem_list_count == MAX_ARRAY_SIZE) && (new_buffer_read < flat_file->blocklens[0])) ) @@ -601,7 +601,7 @@ void ADIOI_ZOIDFS_ReadStrided(ADIO_File fd, void *buf, int count, mem_lengths = (size_t*)ADIOI_Malloc(max_mem_list*sizeof(size_t)); file_offsets = (uint64_t *)ADIOI_Malloc(max_file_list*sizeof(uint64_t)); file_lengths = (uint64_t *)ADIOI_Malloc(max_file_list*sizeof(uint64_t)); - + size_read = 0; n_filetypes = st_n_filetypes; frd_size = st_frd_size; @@ -614,12 +614,12 @@ void ADIOI_ZOIDFS_ReadStrided(ADIO_File fd, void *buf, int count, /* this section calculates mem_list_count and file_list_count and also finds the possibly odd sized last array elements in new_frd_size and new_brd_size */ - + while (size_read < bufsize) { k = start_k; new_buffer_read = 0; mem_list_count = 0; - while ((mem_list_count < MAX_ARRAY_SIZE) && + while ((mem_list_count < MAX_ARRAY_SIZE) && (new_buffer_read < bufsize-size_read)) { /* find mem_list_count and file_list_count such that both are less than MAX_ARRAY_SIZE, the sum of their lengths are @@ -627,9 +627,9 @@ void ADIOI_ZOIDFS_ReadStrided(ADIO_File fd, void *buf, int count, read in the next immediate read list is less than bufsize */ if(mem_list_count) { - if((new_buffer_read + flat_buf->blocklens[k] + + if((new_buffer_read + flat_buf->blocklens[k] + size_read) > bufsize) { - end_brd_size = new_buffer_read + + end_brd_size = new_buffer_read + flat_buf->blocklens[k] - (bufsize - size_read); new_buffer_read = bufsize - size_read; } @@ -647,15 +647,15 @@ void ADIOI_ZOIDFS_ReadStrided(ADIO_File fd, void *buf, int count, } mem_list_count++; k = (k + 1)%flat_buf->count; - } /* while ((mem_list_count < MAX_ARRAY_SIZE) && + } /* while ((mem_list_count < MAX_ARRAY_SIZE) && (new_buffer_read < bufsize-size_read)) */ j = start_j; new_file_read = 0; file_list_count = 0; - while ((file_list_count < MAX_ARRAY_SIZE) && + while ((file_list_count < MAX_ARRAY_SIZE) && (new_file_read < new_buffer_read)) { if(file_list_count) { - if((new_file_read + flat_file->blocklens[j]) > + if((new_file_read + flat_file->blocklens[j]) > new_buffer_read) { end_frd_size = new_buffer_read - new_file_read; new_file_read = new_buffer_read; @@ -676,9 +676,9 @@ void ADIOI_ZOIDFS_ReadStrided(ADIO_File fd, void *buf, int count, file_list_count++; if (j < (flat_file->count - 1)) j++; else j = 0; - + k = start_k; - if ((new_file_read < new_buffer_read) && + if ((new_file_read < new_buffer_read) && (file_list_count == MAX_ARRAY_SIZE)) { new_buffer_read = 0; mem_list_count = 0; @@ -707,14 +707,14 @@ void ADIOI_ZOIDFS_ReadStrided(ADIO_File fd, void *buf, int count, } /* while (new_buffer_read < new_file_read) */ } /* if ((new_file_read < new_buffer_read) && (file_list_count == MAX_ARRAY_SIZE)) */ - } /* while ((mem_list_count < MAX_ARRAY_SIZE) && + } /* while ((mem_list_count < MAX_ARRAY_SIZE) && (new_buffer_read < bufsize-size_read)) */ /* fills the allocated readlist arrays */ k = start_k; j = start_j; - for (i=0; icount) + flat_buf->indices[k]; if(!i) { @@ -741,7 +741,7 @@ void ADIOI_ZOIDFS_ReadStrided(ADIO_File fd, void *buf, int count, k = (k + 1)%flat_buf->count; } /* for (i=0; iindices[j] + + file_offsets[i] = disp + flat_file->indices[j] + ((ADIO_Offset)n_filetypes) * filetype_extent; if (!i) { file_lengths[0] = frd_size; @@ -752,7 +752,7 @@ void ADIOI_ZOIDFS_ReadStrided(ADIO_File fd, void *buf, int count, file_lengths[i] = end_frd_size; if (flat_file->blocklens[j] == end_frd_size) frd_size = flat_file->blocklens[(j+1)% - flat_file->count]; + flat_file->count]; else { frd_size = flat_file->blocklens[j] - end_frd_size; j--; @@ -799,7 +799,7 @@ void ADIOI_ZOIDFS_ReadStrided(ADIO_File fd, void *buf, int count, /* when incrementing fp_ind, need to also take into account the file type: * consider an N-element 1-d subarray with a lb and ub: ( |---xxxxx-----| * if we wrote N elements, offset needs to point at beginning of type, not - * at empty region at offset N+1) + * at empty region at offset N+1) * * As we discussed on mpich-discuss in may/june 2009, the code below might * look wierd, but by putting fp_ind at the last byte written, the next @@ -809,10 +809,10 @@ void ADIOI_ZOIDFS_ReadStrided(ADIO_File fd, void *buf, int count, fd->fp_ind = file_offsets[file_list_count-1]+ file_lengths[file_list_count-1]; } - + ADIOI_Free(file_offsets); ADIOI_Free(file_lengths); - + if (err_flag == 0) *error_code = MPI_SUCCESS; error_state: @@ -820,11 +820,11 @@ void ADIOI_ZOIDFS_ReadStrided(ADIO_File fd, void *buf, int count, #ifdef HAVE_STATUS_SET_BYTES MPIR_Status_set_bytes(status, datatype, bufsize); - /* This is a temporary way of filling in status. The right way is to - keep track of how much data was actually read and placed in buf + /* This is a temporary way of filling in status. The right way is to + keep track of how much data was actually read and placed in buf by ADIOI_BUFFERED_READ. */ #endif - + if (!buftype_is_contig) ADIOI_Delete_flattened(datatype); } diff --git a/ompi/mca/io/romio314/romio/adio/ad_zoidfs/ad_zoidfs_resize.c b/ompi/mca/io/romio314/romio/adio/ad_zoidfs/ad_zoidfs_resize.c index 60d2fcaba20..90734ed59d5 100644 --- a/ompi/mca/io/romio314/romio/adio/ad_zoidfs/ad_zoidfs_resize.c +++ b/ompi/mca/io/romio314/romio/adio/ad_zoidfs/ad_zoidfs_resize.c @@ -1,7 +1,7 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */ -/* +/* * - * Copyright (C) 1997 University of Chicago. + * Copyright (C) 1997 University of Chicago. * See COPYRIGHT notice in top-level directory. */ @@ -49,5 +49,5 @@ void ADIOI_ZOIDFS_Resize(ADIO_File fd, ADIO_Offset size, int *error_code) } /* - * vim: ts=8 sts=4 sw=4 noexpandtab + * vim: ts=8 sts=4 sw=4 noexpandtab */ diff --git a/ompi/mca/io/romio314/romio/adio/ad_zoidfs/ad_zoidfs_write_list.c b/ompi/mca/io/romio314/romio/adio/ad_zoidfs/ad_zoidfs_write_list.c index 8ca0594fe56..ced3c109553 100644 --- a/ompi/mca/io/romio314/romio/adio/ad_zoidfs/ad_zoidfs_write_list.c +++ b/ompi/mca/io/romio314/romio/adio/ad_zoidfs/ad_zoidfs_write_list.c @@ -1,7 +1,7 @@ -/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- - * vim: ts=8 sts=4 sw=4 noexpandtab - * - * Copyright (C) 2008 University of Chicago. +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- + * vim: ts=8 sts=4 sw=4 noexpandtab + * + * Copyright (C) 2008 University of Chicago. * See COPYRIGHT notice in top-level directory. */ @@ -90,7 +90,7 @@ void ADIOI_ZOIDFS_WriteStrided(ADIO_File fd, void *buf, int count, #ifdef HAVE_STATUS_SET_BYTES MPIR_Status_set_bytes(status, datatype, 0); #endif - *error_code = MPI_SUCCESS; + *error_code = MPI_SUCCESS; return; } @@ -98,7 +98,7 @@ void ADIOI_ZOIDFS_WriteStrided(ADIO_File fd, void *buf, int count, MPI_Type_size_x(datatype, &buftype_size); MPI_Type_extent(datatype, &buftype_extent); etype_size = fd->etype_size; - + bufsize = buftype_size * count; zoidfs_obj_ptr = (ADIOI_ZOIDFS_object*)fd->fs_ptr; @@ -112,7 +112,7 @@ void ADIOI_ZOIDFS_WriteStrided(ADIO_File fd, void *buf, int count, ADIOI_Flatten_datatype(datatype); flat_buf = ADIOI_Flatlist; while (flat_buf->type != datatype) flat_buf = flat_buf->next; - + if (file_ptr_type == ADIO_EXPLICIT_OFFSET) { off = fd->disp + etype_size * offset; } @@ -135,11 +135,11 @@ void ADIOI_ZOIDFS_WriteStrided(ADIO_File fd, void *buf, int count, /* step through each block in memory, filling memory arrays */ while (b_blks_wrote < total_blks_to_write) { for (i=0; icount; i++) { - mem_offsets[b_blks_wrote % MAX_ARRAY_SIZE] = - buf + - j*buftype_extent + + mem_offsets[b_blks_wrote % MAX_ARRAY_SIZE] = + buf + + j*buftype_extent + flat_buf->indices[i]; - mem_lengths[b_blks_wrote % MAX_ARRAY_SIZE] = + mem_lengths[b_blks_wrote % MAX_ARRAY_SIZE] = flat_buf->blocklens[i]; file_lengths += flat_buf->blocklens[i]; b_blks_wrote++; @@ -157,9 +157,9 @@ void ADIOI_ZOIDFS_WriteStrided(ADIO_File fd, void *buf, int count, MPE_Log_event( ADIOI_MPE_write_a, 0, NULL ); #endif NO_STALE(err_flag, fd, zoidfs_obj_ptr, - zoidfs_write(zoidfs_obj_ptr, + zoidfs_write(zoidfs_obj_ptr, mem_list_count, - mem_offsets, mem_lengths, + mem_offsets, mem_lengths, 1, &file_offsets, &file_lengths, ZOIDFS_NO_OP_HINT)); /* --BEGIN ERROR HANDLING-- */ @@ -175,8 +175,8 @@ void ADIOI_ZOIDFS_WriteStrided(ADIO_File fd, void *buf, int count, MPE_Log_event( ADIOI_MPE_write_b, 0, NULL ); #endif total_bytes_written += file_lengths; - - /* in the case of error or the last write list call, + + /* in the case of error or the last write list call, * leave here */ /* --BEGIN ERROR HANDLING-- */ if (err_flag) { @@ -192,14 +192,14 @@ void ADIOI_ZOIDFS_WriteStrided(ADIO_File fd, void *buf, int count, file_offsets += file_lengths; file_lengths = 0; - } + } } /* for (i=0; icount; i++) */ j++; } /* while (b_blks_wrote < total_blks_to_write) */ ADIOI_Free(mem_offsets); ADIOI_Free(mem_lengths); - if (file_ptr_type == ADIO_INDIVIDUAL) + if (file_ptr_type == ADIO_INDIVIDUAL) fd->fp_ind += total_bytes_written; if (!err_flag) *error_code = MPI_SUCCESS; @@ -208,7 +208,7 @@ void ADIOI_ZOIDFS_WriteStrided(ADIO_File fd, void *buf, int count, #ifdef HAVE_STATUS_SET_BYTES MPIR_Status_set_bytes(status, datatype, bufsize); -/* This is a temporary way of filling in status. The right way is to +/* This is a temporary way of filling in status. The right way is to keep track of how much data was actually written by ADIOI_BUFFERED_WRITE. */ #endif @@ -227,7 +227,7 @@ void ADIOI_ZOIDFS_WriteStrided(ADIO_File fd, void *buf, int count, initial_off = offset; /* for each case - ADIO_Individual pointer or explicit, find offset - (file offset in bytes), n_filetypes (how many filetypes into file + (file offset in bytes), n_filetypes (how many filetypes into file to start), fwr_size (remaining amount of data in present file block), and st_index (start point in terms of blocks in starting filetype) */ @@ -238,11 +238,11 @@ void ADIOI_ZOIDFS_WriteStrided(ADIO_File fd, void *buf, int count, while (!flag) { n_filetypes++; for (i=0; icount; i++) { - if (disp + flat_file->indices[i] + + if (disp + flat_file->indices[i] + ((ADIO_Offset) n_filetypes)*filetype_extent + flat_file->blocklens[i] >= offset) { st_index = i; - fwr_size = disp + flat_file->indices[i] + + fwr_size = disp + flat_file->indices[i] + ((ADIO_Offset) n_filetypes)*filetype_extent + flat_file->blocklens[i] - offset; flag = 1; @@ -256,7 +256,7 @@ void ADIOI_ZOIDFS_WriteStrided(ADIO_File fd, void *buf, int count, n_filetypes = (int) (offset / n_etypes_in_filetype); etype_in_filetype = (int) (offset % n_etypes_in_filetype); size_in_filetype = etype_in_filetype * etype_size; - + sum = 0; for (i=0; icount; i++) { sum += flat_file->blocklens[i]; @@ -277,7 +277,7 @@ void ADIOI_ZOIDFS_WriteStrided(ADIO_File fd, void *buf, int count, start_off = offset; st_fwr_size = fwr_size; st_n_filetypes = n_filetypes; - + if (buftype_is_contig && !filetype_is_contig) { /* contiguous in memory, noncontiguous in file. should be the most @@ -286,14 +286,14 @@ void ADIOI_ZOIDFS_WriteStrided(ADIO_File fd, void *buf, int count, /* only one memory off-len pair, so no array */ size_t mem_lengths; size_t mem_offsets; - + i = 0; j = st_index; off = offset; n_filetypes = st_n_filetypes; - + mem_list_count = 1; - + /* determine how many blocks in file to write */ f_data_wrote = ADIOI_MIN(st_fwr_size, bufsize); total_blks_to_write = 1; @@ -306,17 +306,17 @@ void ADIOI_ZOIDFS_WriteStrided(ADIO_File fd, void *buf, int count, f_data_wrote += flat_file->blocklens[j]; total_blks_to_write++; if (j<(flat_file->count-1)) j++; - else j = 0; + else j = 0; } - + j = st_index; n_filetypes = st_n_filetypes; n_write_lists = total_blks_to_write/MAX_ARRAY_SIZE; extra_blks = total_blks_to_write%MAX_ARRAY_SIZE; - + mem_offsets = (size_t)buf; mem_lengths = 0; - + /* if at least one full writelist, allocate file arrays at max array size and don't free until very end */ if (n_write_lists) { @@ -333,7 +333,7 @@ void ADIOI_ZOIDFS_WriteStrided(ADIO_File fd, void *buf, int count, file_lengths = (uint64_t*)ADIOI_Malloc(extra_blks* sizeof(uint64_t)); } - + /* for file arrays that are of MAX_ARRAY_SIZE, build arrays */ for (i=0; iindices[j]; file_lengths[k] = flat_file->blocklens[j]; @@ -362,7 +362,7 @@ void ADIOI_ZOIDFS_WriteStrided(ADIO_File fd, void *buf, int count, NO_STALE(err_flag, fd, zoidfs_obj_ptr, zoidfs_write(zoidfs_obj_ptr, 1, buf, &mem_lengths, - file_list_count, + file_list_count, file_offsets, file_lengths, ZOIDFS_NO_OP_HINT)); #ifdef ADIOI_MPE_LOGGING @@ -394,12 +394,12 @@ void ADIOI_ZOIDFS_WriteStrided(ADIO_File fd, void *buf, int count, } for (k=0; kindices[j]; /* XXX: double-check these casts */ if (k == (extra_blks - 1)) { - file_lengths[k] = bufsize + file_lengths[k] = bufsize - mem_lengths - mem_offsets + (size_t)buf; } else file_lengths[k] = flat_file->blocklens[j]; @@ -415,11 +415,11 @@ void ADIOI_ZOIDFS_WriteStrided(ADIO_File fd, void *buf, int count, #ifdef ADIOI_MPE_LOGGING MPE_Log_event( ADIOI_MPE_write_a, 0, NULL ); #endif - NO_STALE(err_flag, fd, zoidfs_obj_ptr, - zoidfs_write(zoidfs_obj_ptr, 1, - (const void **)&mem_offsets, + NO_STALE(err_flag, fd, zoidfs_obj_ptr, + zoidfs_write(zoidfs_obj_ptr, 1, + (const void **)&mem_offsets, &mem_lengths, - file_list_count, + file_list_count, file_offsets, file_lengths, ZOIDFS_NO_OP_HINT)); #ifdef ADIOI_MPE_LOGGING @@ -437,7 +437,7 @@ void ADIOI_ZOIDFS_WriteStrided(ADIO_File fd, void *buf, int count, /* --END ERROR HANDLING-- */ total_bytes_written += mem_lengths; } - } + } else { /* noncontiguous in memory as well as in file */ @@ -456,7 +456,7 @@ void ADIOI_ZOIDFS_WriteStrided(ADIO_File fd, void *buf, int count, max_mem_list = 0; max_file_list = 0; - /* run through and file max_file_list and max_mem_list so that you + /* run through and file max_file_list and max_mem_list so that you can allocate the file and memory arrays less than MAX_ARRAY_SIZE if possible */ @@ -464,7 +464,7 @@ void ADIOI_ZOIDFS_WriteStrided(ADIO_File fd, void *buf, int count, k = start_k; new_buffer_write = 0; mem_list_count = 0; - while ((mem_list_count < MAX_ARRAY_SIZE) && + while ((mem_list_count < MAX_ARRAY_SIZE) && (new_buffer_write < bufsize-size_wrote)) { /* find mem_list_count and file_list_count such that both are less than MAX_ARRAY_SIZE, the sum of their lengths are @@ -472,9 +472,9 @@ void ADIOI_ZOIDFS_WriteStrided(ADIO_File fd, void *buf, int count, written in the next immediate write list is less than bufsize */ if(mem_list_count) { - if((new_buffer_write + flat_buf->blocklens[k] + + if((new_buffer_write + flat_buf->blocklens[k] + size_wrote) > bufsize) { - end_bwr_size = new_buffer_write + + end_bwr_size = new_buffer_write + flat_buf->blocklens[k] - (bufsize - size_wrote); new_buffer_write = bufsize - size_wrote; } @@ -492,15 +492,15 @@ void ADIOI_ZOIDFS_WriteStrided(ADIO_File fd, void *buf, int count, } mem_list_count++; k = (k + 1)%flat_buf->count; - } /* while ((mem_list_count < MAX_ARRAY_SIZE) && + } /* while ((mem_list_count < MAX_ARRAY_SIZE) && (new_buffer_write < bufsize-size_wrote)) */ j = start_j; new_file_write = 0; file_list_count = 0; - while ((file_list_count < MAX_ARRAY_SIZE) && - (new_file_write < new_buffer_write)) { + while ((file_list_count < MAX_ARRAY_SIZE) && + (new_file_write < new_buffer_write)) { if(file_list_count) { - if((new_file_write + flat_file->blocklens[j]) > + if((new_file_write + flat_file->blocklens[j]) > new_buffer_write) { end_fwr_size = new_buffer_write - new_file_write; new_file_write = new_buffer_write; @@ -521,9 +521,9 @@ void ADIOI_ZOIDFS_WriteStrided(ADIO_File fd, void *buf, int count, file_list_count++; if (j < (flat_file->count - 1)) j++; else j = 0; - + k = start_k; - if ((new_file_write < new_buffer_write) && + if ((new_file_write < new_buffer_write) && (file_list_count == MAX_ARRAY_SIZE)) { new_buffer_write = 0; mem_list_count = 0; @@ -531,7 +531,7 @@ void ADIOI_ZOIDFS_WriteStrided(ADIO_File fd, void *buf, int count, if(mem_list_count) { if((new_buffer_write + flat_buf->blocklens[k]) > new_file_write) { - end_bwr_size = new_file_write - + end_bwr_size = new_file_write - new_buffer_write; new_buffer_write = new_file_write; k--; @@ -553,13 +553,13 @@ void ADIOI_ZOIDFS_WriteStrided(ADIO_File fd, void *buf, int count, } /* while (new_buffer_write < new_file_write) */ } /* if ((new_file_write < new_buffer_write) && (file_list_count == MAX_ARRAY_SIZE)) */ - } /* while ((mem_list_count < MAX_ARRAY_SIZE) && + } /* while ((mem_list_count < MAX_ARRAY_SIZE) && (new_buffer_write < bufsize-size_wrote)) */ /* fakes filling the writelist arrays of lengths found above */ k = start_k; j = start_j; - for (i=0; iblocklens[k] == end_bwr_size) @@ -580,7 +580,7 @@ void ADIOI_ZOIDFS_WriteStrided(ADIO_File fd, void *buf, int count, if (i == (file_list_count - 1)) { if (flat_file->blocklens[j] == end_fwr_size) fwr_size = flat_file->blocklens[(j+1)% - flat_file->count]; + flat_file->count]; else { fwr_size = flat_file->blocklens[j] - end_fwr_size; j--; @@ -613,11 +613,11 @@ void ADIOI_ZOIDFS_WriteStrided(ADIO_File fd, void *buf, int count, * region and many (700) very small memory regions. both cases caused * problems for this code */ - if ( ( (file_list_count == 1) && + if ( ( (file_list_count == 1) && (new_file_write < flat_file->blocklens[0] ) ) || - ((mem_list_count == 1) && + ((mem_list_count == 1) && (new_buffer_write < flat_buf->blocklens[0]) ) || - ((file_list_count == MAX_ARRAY_SIZE) && + ((file_list_count == MAX_ARRAY_SIZE) && (new_file_write < flat_buf->blocklens[0]) ) || ( (mem_list_count == MAX_ARRAY_SIZE) && (new_buffer_write < flat_file->blocklens[0])) ) @@ -633,7 +633,7 @@ void ADIOI_ZOIDFS_WriteStrided(ADIO_File fd, void *buf, int count, mem_lengths = (size_t*)ADIOI_Malloc(max_mem_list*sizeof(size_t)); file_offsets = (uint64_t *)ADIOI_Malloc(max_file_list*sizeof(uint64_t)); file_lengths = (uint64_t*)ADIOI_Malloc(max_file_list*sizeof(uint64_t)); - + size_wrote = 0; n_filetypes = st_n_filetypes; fwr_size = st_fwr_size; @@ -646,12 +646,12 @@ void ADIOI_ZOIDFS_WriteStrided(ADIO_File fd, void *buf, int count, /* this section calculates mem_list_count and file_list_count and also finds the possibly odd sized last array elements in new_fwr_size and new_bwr_size */ - + while (size_wrote < bufsize) { k = start_k; new_buffer_write = 0; mem_list_count = 0; - while ((mem_list_count < MAX_ARRAY_SIZE) && + while ((mem_list_count < MAX_ARRAY_SIZE) && (new_buffer_write < bufsize-size_wrote)) { /* find mem_list_count and file_list_count such that both are less than MAX_ARRAY_SIZE, the sum of their lengths are @@ -659,9 +659,9 @@ void ADIOI_ZOIDFS_WriteStrided(ADIO_File fd, void *buf, int count, written in the next immediate write list is less than bufsize */ if(mem_list_count) { - if((new_buffer_write + flat_buf->blocklens[k] + + if((new_buffer_write + flat_buf->blocklens[k] + size_wrote) > bufsize) { - end_bwr_size = new_buffer_write + + end_bwr_size = new_buffer_write + flat_buf->blocklens[k] - (bufsize - size_wrote); new_buffer_write = bufsize - size_wrote; } @@ -679,15 +679,15 @@ void ADIOI_ZOIDFS_WriteStrided(ADIO_File fd, void *buf, int count, } mem_list_count++; k = (k + 1)%flat_buf->count; - } /* while ((mem_list_count < MAX_ARRAY_SIZE) && + } /* while ((mem_list_count < MAX_ARRAY_SIZE) && (new_buffer_write < bufsize-size_wrote)) */ j = start_j; new_file_write = 0; file_list_count = 0; - while ((file_list_count < MAX_ARRAY_SIZE) && + while ((file_list_count < MAX_ARRAY_SIZE) && (new_file_write < new_buffer_write)) { if(file_list_count) { - if((new_file_write + flat_file->blocklens[j]) > + if((new_file_write + flat_file->blocklens[j]) > new_buffer_write) { end_fwr_size = new_buffer_write - new_file_write; new_file_write = new_buffer_write; @@ -708,9 +708,9 @@ void ADIOI_ZOIDFS_WriteStrided(ADIO_File fd, void *buf, int count, file_list_count++; if (j < (flat_file->count - 1)) j++; else j = 0; - + k = start_k; - if ((new_file_write < new_buffer_write) && + if ((new_file_write < new_buffer_write) && (file_list_count == MAX_ARRAY_SIZE)) { new_buffer_write = 0; mem_list_count = 0; @@ -740,17 +740,17 @@ void ADIOI_ZOIDFS_WriteStrided(ADIO_File fd, void *buf, int count, } /* while (new_buffer_write < new_file_write) */ } /* if ((new_file_write < new_buffer_write) && (file_list_count == MAX_ARRAY_SIZE)) */ - } /* while ((mem_list_count < MAX_ARRAY_SIZE) && + } /* while ((mem_list_count < MAX_ARRAY_SIZE) && (new_buffer_write < bufsize-size_wrote)) */ /* fills the allocated writelist arrays */ k = start_k; j = start_j; - for (i=0; icount) + flat_buf->indices[k]; - + if(!i) { mem_lengths[0] = bwr_size; mem_offsets[0] += flat_buf->blocklens[k] - bwr_size; @@ -775,7 +775,7 @@ void ADIOI_ZOIDFS_WriteStrided(ADIO_File fd, void *buf, int count, k = (k + 1)%flat_buf->count; } /* for (i=0; iindices[j] + + file_offsets[i] = disp + flat_file->indices[j] + ((ADIO_Offset)n_filetypes) * filetype_extent; if (!i) { file_lengths[0] = fwr_size; @@ -786,7 +786,7 @@ void ADIOI_ZOIDFS_WriteStrided(ADIO_File fd, void *buf, int count, file_lengths[i] = end_fwr_size; if (flat_file->blocklens[j] == end_fwr_size) fwr_size = flat_file->blocklens[(j+1)% - flat_file->count]; + flat_file->count]; else { fwr_size = flat_file->blocklens[j] - end_fwr_size; j--; @@ -805,9 +805,9 @@ void ADIOI_ZOIDFS_WriteStrided(ADIO_File fd, void *buf, int count, MPE_Log_event( ADIOI_MPE_write_a, 0, NULL ); #endif NO_STALE(err_flag, fd, zoidfs_obj_ptr, - zoidfs_write(zoidfs_obj_ptr, - mem_list_count, mem_offsets, mem_lengths, - file_list_count, + zoidfs_write(zoidfs_obj_ptr, + mem_list_count, mem_offsets, mem_lengths, + file_list_count, file_offsets, file_lengths, ZOIDFS_NO_OP_HINT)); /* --BEGIN ERROR HANDLING-- */ if (err_flag != ZFS_OK) { @@ -833,7 +833,7 @@ void ADIOI_ZOIDFS_WriteStrided(ADIO_File fd, void *buf, int count, /* when incrementing fp_ind, need to also take into account the file type: * consider an N-element 1-d subarray with a lb and ub: ( |---xxxxx-----| * if we wrote N elements, offset needs to point at beginning of type, not - * at empty region at offset N+1). + * at empty region at offset N+1). * * As we discussed on mpich-discuss in may/june 2009, the code below might * look wierd, but by putting fp_ind at the last byte written, the next @@ -853,7 +853,7 @@ void ADIOI_ZOIDFS_WriteStrided(ADIO_File fd, void *buf, int count, #ifdef HAVE_STATUS_SET_BYTES MPIR_Status_set_bytes(status, datatype, bufsize); -/* This is a temporary way of filling in status. The right way is to +/* This is a temporary way of filling in status. The right way is to keep track of how much data was actually written by ADIOI_BUFFERED_WRITE. */ #endif diff --git a/ompi/mca/io/romio314/romio/adio/common/ad_aggregate.c b/ompi/mca/io/romio314/romio/adio/common/ad_aggregate.c index 55b55440df5..fe225394c51 100644 --- a/ompi/mca/io/romio314/romio/adio/common/ad_aggregate.c +++ b/ompi/mca/io/romio314/romio/adio/common/ad_aggregate.c @@ -1,6 +1,6 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */ -/* - * Copyright (C) 1997-2001 University of Chicago. +/* + * Copyright (C) 1997-2001 University of Chicago. * See COPYRIGHT notice in top-level directory. */ @@ -22,14 +22,14 @@ * * The last three of these were originally in ad_read_coll.c, but they are * also shared with ad_write_coll.c. I felt that they were better kept with - * the rest of the shared aggregation code. + * the rest of the shared aggregation code. */ /* Discussion of values available from above: * * ADIO_Offset st_offsets[0..nprocs-1] * ADIO_Offset end_offsets[0..nprocs-1] - * These contain a list of start and end offsets for each process in + * These contain a list of start and end offsets for each process in * the communicator. For example, an access at loc 10, size 10 would * have a start offset of 10 and end offset of 19. * int nprocs @@ -39,15 +39,15 @@ * starting location of "file domain"; region that a given process will * perform aggregation for (i.e. actually do I/O) * ADIO_Offset fd_end[0..nprocs_for_coll-1] - * start + size - 1 roughly, but it can be less, or 0, in the case of + * start + size - 1 roughly, but it can be less, or 0, in the case of * uneven distributions */ /* ADIOI_Calc_aggregator() * - * The intention here is to implement a function which provides basically - * the same functionality as in Rajeev's original version of - * ADIOI_Calc_my_req(). He used a ceiling division approach to assign the + * The intention here is to implement a function which provides basically + * the same functionality as in Rajeev's original version of + * ADIOI_Calc_my_req(). He used a ceiling division approach to assign the * file domains, and we use the same approach here when calculating the * location of an offset/len in a specific file domain. Further we assume * this same distribution when calculating the rank_index, which is later @@ -56,7 +56,7 @@ * A better (i.e. more general) approach would be to use the list of file * domains only. This would be slower in the case where the * original ceiling division was used, but it would allow for arbitrary - * distributions of regions to aggregators. We'd need to know the + * distributions of regions to aggregators. We'd need to know the * nprocs_for_coll in that case though, which we don't have now. * * Note a significant difference between this function and Rajeev's old code: @@ -71,9 +71,9 @@ * actually available in this file domain. */ int ADIOI_Calc_aggregator(ADIO_File fd, - ADIO_Offset off, - ADIO_Offset min_off, - ADIO_Offset *len, + ADIO_Offset off, + ADIO_Offset min_off, + ADIO_Offset *len, ADIO_Offset fd_size, ADIO_Offset *fd_start, ADIO_Offset *fd_end) @@ -107,7 +107,7 @@ int ADIOI_Calc_aggregator(ADIO_File fd, /* remember here that even in Rajeev's original code it was the case that * different aggregators could end up with different amounts of data to * aggregate. here we use fd_end[] to make sure that we know how much - * data this aggregator is working with. + * data this aggregator is working with. * * the +1 is to take into account the end vs. length issue. */ @@ -127,8 +127,8 @@ int ADIOI_Calc_aggregator(ADIO_File fd, void ADIOI_Calc_file_domains(ADIO_Offset *st_offsets, ADIO_Offset *end_offsets, int nprocs, int nprocs_for_coll, ADIO_Offset *min_st_offset_ptr, - ADIO_Offset **fd_start_ptr, ADIO_Offset - **fd_end_ptr, int min_fd_size, + ADIO_Offset **fd_start_ptr, ADIO_Offset + **fd_end_ptr, int min_fd_size, ADIO_Offset *fd_size_ptr, int striping_unit) { @@ -144,7 +144,7 @@ void ADIOI_Calc_file_domains(ADIO_Offset *st_offsets, ADIO_Offset #endif #ifdef AGG_DEBUG - FPRINTF(stderr, "ADIOI_Calc_file_domains: %d aggregator(s)\n", + FPRINTF(stderr, "ADIOI_Calc_file_domains: %d aggregator(s)\n", nprocs_for_coll); #endif @@ -162,9 +162,9 @@ void ADIOI_Calc_file_domains(ADIO_Offset *st_offsets, ADIO_Offset the file that will be "owned" by each process */ /* partition the total file access range equally among nprocs_for_coll - processes */ + processes */ fd_size = ((max_end_offset - min_st_offset + 1) + nprocs_for_coll - - 1)/nprocs_for_coll; + 1)/nprocs_for_coll; /* ceiling division as in HPF block distribution */ /* Tweak the file domains so that no fd is smaller than a threshold. We @@ -176,9 +176,9 @@ void ADIOI_Calc_file_domains(ADIO_Offset *st_offsets, ADIO_Offset fd_size = min_fd_size; *fd_start_ptr = (ADIO_Offset *) - ADIOI_Malloc(nprocs_for_coll*sizeof(ADIO_Offset)); + ADIOI_Malloc(nprocs_for_coll*sizeof(ADIO_Offset)); *fd_end_ptr = (ADIO_Offset *) - ADIOI_Malloc(nprocs_for_coll*sizeof(ADIO_Offset)); + ADIOI_Malloc(nprocs_for_coll*sizeof(ADIO_Offset)); fd_start = *fd_start_ptr; fd_end = *fd_end_ptr; @@ -195,21 +195,21 @@ void ADIOI_Calc_file_domains(ADIO_Offset *st_offsets, ADIO_Offset end_off = fd_start[0] + fd_size; rem_front = end_off % striping_unit; rem_back = striping_unit - rem_front; - if (rem_front < rem_back) + if (rem_front < rem_back) end_off -= rem_front; - else + else end_off += rem_back; fd_end[0] = end_off - 1; - + /* align fd_end[i] to the nearest file lock boundary */ for (i=1; i 0) { - FPRINTF(stdout, "data needed from %d (count = %d):\n", i, + FPRINTF(stdout, "data needed from %d (count = %d):\n", i, my_req[i].count); for (l=0; l < my_req[i].count; l++) { FPRINTF(stdout, " off[%d] = %lld, len[%d] = %d\n", l, @@ -421,18 +421,18 @@ void ADIOI_Calc_my_req(ADIO_File fd, ADIO_Offset *offset_list, ADIO_Offset *len_ -void ADIOI_Calc_others_req(ADIO_File fd, int count_my_req_procs, +void ADIOI_Calc_others_req(ADIO_File fd, int count_my_req_procs, int *count_my_req_per_proc, - ADIOI_Access *my_req, + ADIOI_Access *my_req, int nprocs, int myrank, int *count_others_req_procs_ptr, - ADIOI_Access **others_req_ptr) + ADIOI_Access **others_req_ptr) { /* determine what requests of other processes lie in this process's file domain */ /* count_others_req_procs = number of processes whose requests lie in - this process's file domain (including this process itself) + this process's file domain (including this process itself) count_others_req_per_proc[i] indicates how many separate contiguous requests of proc. i lie in this process's file domain. */ @@ -452,7 +452,7 @@ void ADIOI_Calc_others_req(ADIO_File fd, int count_my_req_procs, count_others_req_per_proc, 1, MPI_INT, fd->comm); *others_req_ptr = (ADIOI_Access *) - ADIOI_Malloc(nprocs*sizeof(ADIOI_Access)); + ADIOI_Malloc(nprocs*sizeof(ADIOI_Access)); others_req = *others_req_ptr; count_others_req_procs = 0; @@ -464,25 +464,25 @@ void ADIOI_Calc_others_req(ADIO_File fd, int count_my_req_procs, others_req[i].lens = ADIOI_Malloc(count_others_req_per_proc[i]*sizeof(ADIO_Offset)); others_req[i].mem_ptrs = (MPI_Aint *) - ADIOI_Malloc(count_others_req_per_proc[i]*sizeof(MPI_Aint)); + ADIOI_Malloc(count_others_req_per_proc[i]*sizeof(MPI_Aint)); count_others_req_procs++; } else others_req[i].count = 0; } - + /* now send the calculated offsets and lengths to respective processes */ requests = (MPI_Request *) - ADIOI_Malloc(1+2*(count_my_req_procs+count_others_req_procs)*sizeof(MPI_Request)); + ADIOI_Malloc(1+2*(count_my_req_procs+count_others_req_procs)*sizeof(MPI_Request)); /* +1 to avoid a 0-size malloc */ j = 0; for (i=0; icomm, &requests[j]); j++; - MPI_Irecv(others_req[i].lens, others_req[i].count, + MPI_Irecv(others_req[i].lens, others_req[i].count, ADIO_OFFSET, i, i+myrank+1, fd->comm, &requests[j]); j++; } @@ -490,10 +490,10 @@ void ADIOI_Calc_others_req(ADIO_File fd, int count_my_req_procs, for (i=0; i < nprocs; i++) { if (my_req[i].count) { - MPI_Isend(my_req[i].offsets, my_req[i].count, + MPI_Isend(my_req[i].offsets, my_req[i].count, ADIO_OFFSET, i, i+myrank, fd->comm, &requests[j]); j++; - MPI_Isend(my_req[i].lens, my_req[i].count, + MPI_Isend(my_req[i].lens, my_req[i].count, ADIO_OFFSET, i, i+myrank+1, fd->comm, &requests[j]); j++; } diff --git a/ompi/mca/io/romio314/romio/adio/common/ad_aggregate_new.c b/ompi/mca/io/romio314/romio/adio/common/ad_aggregate_new.c index ba5af6b0c3a..a01a41c5034 100644 --- a/ompi/mca/io/romio314/romio/adio/common/ad_aggregate_new.c +++ b/ompi/mca/io/romio314/romio/adio/common/ad_aggregate_new.c @@ -1,6 +1,6 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */ -/* - * Copyright (C) 2008 University of Chicago. +/* + * Copyright (C) 2008 University of Chicago. * See COPYRIGHT notice in top-level directory. */ @@ -39,7 +39,7 @@ void ADIOI_Calc_file_realms (ADIO_File fd, ADIO_Offset min_st_offset, { int nprocs_for_coll; int file_realm_calc_type; - + MPI_Datatype *file_realm_types = NULL; ADIO_Offset *file_realm_st_offs = NULL; @@ -49,8 +49,8 @@ void ADIOI_Calc_file_realms (ADIO_File fd, ADIO_Offset min_st_offset, #ifdef DEBUG printf ("ADIOI_Calc_file_realms\n"); #endif - - nprocs_for_coll = fd->hints->cb_nodes; + + nprocs_for_coll = fd->hints->cb_nodes; file_realm_calc_type = fd->hints->cb_fr_type; /* If PFRs are disabled we know these pointers are not allocated */ @@ -85,7 +85,7 @@ void ADIOI_Calc_file_realms (ADIO_File fd, ADIO_Offset min_st_offset, ADIOI_Malloc (nprocs_for_coll * sizeof(ADIO_Offset)); file_realm_types = (MPI_Datatype *) ADIOI_Malloc (nprocs_for_coll * sizeof(MPI_Datatype)); - + if (file_realm_calc_type == ADIOI_FR_AAR) { ADIOI_Calc_file_realms_aar (fd, nprocs_for_coll, fd->hints->cb_pfr, @@ -218,7 +218,7 @@ void ADIOI_Calc_file_realms_fsize (ADIO_File fd, int nprocs_for_coll, MPI_Datatype simpletype; ADIO_Fcntl (fd, ADIO_FCNTL_GET_FSIZE, &fcntl_struct, &error_code); - + /* use impending file size since a write call may lengthen the file */ fsize = ADIOI_MAX (fcntl_struct.fsize, max_end_offset+1); fr_size = (fsize + nprocs_for_coll - 1) / nprocs_for_coll; @@ -274,7 +274,7 @@ int ADIOI_Agg_idx (int rank, ADIO_File fd) { static void align_fr (int fr_size, ADIO_Offset fr_off, int alignment, int *aligned_fr_size, ADIO_Offset *aligned_fr_off) { *aligned_fr_off = fr_off - (fr_off % alignment); - *aligned_fr_size = ((fr_off + fr_size) / alignment) * alignment - + *aligned_fr_size = ((fr_off + fr_size) / alignment) * alignment - *aligned_fr_off; if ((fr_off + fr_size) % alignment) *aligned_fr_size += alignment; diff --git a/ompi/mca/io/romio314/romio/adio/common/ad_close.c b/ompi/mca/io/romio314/romio/adio/common/ad_close.c index 7aa5ce07a6c..13b9f7c6019 100644 --- a/ompi/mca/io/romio314/romio/adio/common/ad_close.c +++ b/ompi/mca/io/romio314/romio/adio/common/ad_close.c @@ -1,7 +1,7 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */ -/* +/* * - * Copyright (C) 1997 University of Chicago. + * Copyright (C) 1997 University of Chicago. * See COPYRIGHT notice in top-level directory. */ @@ -43,7 +43,7 @@ void ADIO_Close(ADIO_File fd, int *error_code) } else { *error_code = MPI_SUCCESS; } - + } if (fd->access_mode & ADIO_DELETE_ON_CLOSE) { @@ -101,7 +101,7 @@ void ADIO_Close(ADIO_File fd, int *error_code) MPI_Comm_free(&(fd->comm)); - ADIOI_Free(fd->filename); + ADIOI_Free(fd->filename); MPI_Type_get_envelope(fd->etype, &i, &j, &k, &combiner); if (combiner != MPI_COMBINER_NAMED) MPI_Type_free(&(fd->etype)); diff --git a/ompi/mca/io/romio314/romio/adio/common/ad_coll_build_req_new.c b/ompi/mca/io/romio314/romio/adio/common/ad_coll_build_req_new.c index 2ea97545f99..52dfcba0860 100644 --- a/ompi/mca/io/romio314/romio/adio/common/ad_coll_build_req_new.c +++ b/ompi/mca/io/romio314/romio/adio/common/ad_coll_build_req_new.c @@ -1,7 +1,7 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */ -/* +/* * - * Copyright (C) 1997 University of Chicago. + * Copyright (C) 1997 University of Chicago. * See COPYRIGHT notice in top-level directory. */ @@ -33,7 +33,7 @@ static inline ADIO_Offset view_state_get_cur_sz(view_state *tmp_view_state_p, { flatten_state *tmp_state_p = NULL; switch(op_type) - { + { case TEMP_OFF: tmp_state_p = &(tmp_view_state_p->tmp_state); break; @@ -62,7 +62,7 @@ static inline ADIO_Offset view_state_get_next_len(view_state *tmp_view_state_p, default: fprintf(stderr, "op_type invalid\n"); } - return (ADIO_Offset) + return (ADIO_Offset) tmp_view_state_p->flat_type_p->blocklens[tmp_state_p->idx] - tmp_state_p->cur_reg_off; } @@ -73,7 +73,7 @@ static inline ADIO_Offset view_state_get_next_len(view_state *tmp_view_state_p, * possible later on. */ static inline int view_state_add_region( ADIO_Offset max_sz, - view_state *tmp_view_state_p, + view_state *tmp_view_state_p, ADIO_Offset *st_reg_p, ADIO_Offset *tmp_reg_sz_p, int op_type) @@ -104,23 +104,23 @@ static inline int view_state_add_region( /* Should be looking at some data (or it's a zero len blocklens * (i.e. placeholder). */ - assert(tmp_state_p->cur_reg_off != + assert(tmp_state_p->cur_reg_off != tmp_flat_type_p->blocklens[tmp_state_p->idx]); /* Shouldn't have been called if the view_state is done. */ assert(tmp_state_p->cur_sz != tmp_view_state_p->sz); /* Make sure we are not in a non-zero region in the flat_type */ assert(tmp_flat_type_p->blocklens[tmp_state_p->idx] != 0); - + #ifdef DEBUG3 fprintf(stderr, "view_state:(blocklens[%Ld]=%d,cur_reg_off=%Ld," - "max_sz=%Ld)\n", tmp_state_p->idx, - tmp_flat_type_p->blocklens[tmp_state_p->idx], + "max_sz=%Ld)\n", tmp_state_p->idx, + tmp_flat_type_p->blocklens[tmp_state_p->idx], tmp_state_p->cur_reg_off, max_sz); #endif /* Can it add the whole piece? */ - if (tmp_flat_type_p->blocklens[tmp_state_p->idx] - + if (tmp_flat_type_p->blocklens[tmp_state_p->idx] - tmp_state_p->cur_reg_off <= max_sz) { data_sz = tmp_flat_type_p->blocklens[tmp_state_p->idx] - @@ -133,15 +133,15 @@ static inline int view_state_add_region( { assert(tmp_flat_type_p->blocklens[tmp_state_p->idx] != 0); tmp_state_p->abs_off += data_sz; -#ifdef DEBUG3 +#ifdef DEBUG3 fprintf(stderr, "view_state_add_region: %s contig type " - "(old abs_off=%Ld,abs_off=%Ld,cur_sz=%Ld,reg size=%Ld)\n", - off_type_name[op_type], tmp_state_p->abs_off - data_sz, + "(old abs_off=%Ld,abs_off=%Ld,cur_sz=%Ld,reg size=%Ld)\n", + off_type_name[op_type], tmp_state_p->abs_off - data_sz, tmp_state_p->abs_off, tmp_state_p->cur_sz, data_sz); #endif } else - { + { /* Is this the last region in the datatype? */ if (tmp_state_p->idx == (tmp_flat_type_p->count - 1)) { @@ -151,30 +151,30 @@ static inline int view_state_add_region( tmp_view_state_p->ext; #ifdef DEBUG3 fprintf(stderr, "view_state_add_region: %s last region for type " - "(old abs_off=%Ld,abs_off=%Ld,cur_sz=%Ld,reg size=%Ld)\n", - off_type_name[op_type], tmp_state_p->abs_off - data_sz, + "(old abs_off=%Ld,abs_off=%Ld,cur_sz=%Ld,reg size=%Ld)\n", + off_type_name[op_type], tmp_state_p->abs_off - data_sz, tmp_state_p->abs_off, tmp_state_p->cur_sz, data_sz); #endif } else { - tmp_state_p->abs_off += + tmp_state_p->abs_off += tmp_flat_type_p->indices[tmp_state_p->idx + 1] - (tmp_flat_type_p->indices[tmp_state_p->idx] + tmp_state_p->cur_reg_off); #ifdef DEBUG3 fprintf(stderr, "view_state_add_region: %s inner region type " - "(old abs_off=%Ld,abs_off=%Ld,cur_sz=%Ld,reg size=%Ld)\n", - off_type_name[op_type], tmp_state_p->abs_off - + "(old abs_off=%Ld,abs_off=%Ld,cur_sz=%Ld,reg size=%Ld)\n", + off_type_name[op_type], tmp_state_p->abs_off - (tmp_flat_type_p->indices[tmp_state_p->idx + 1] - (tmp_flat_type_p->indices[tmp_state_p->idx] + - tmp_state_p->cur_reg_off)), tmp_state_p->abs_off, + tmp_state_p->cur_reg_off)), tmp_state_p->abs_off, tmp_state_p->cur_sz, data_sz); #endif } /* Increment idx to next non-zero region in the flat_type */ do { - tmp_state_p->idx = + tmp_state_p->idx = (tmp_state_p->idx + 1) % tmp_flat_type_p->count; } while (tmp_flat_type_p->blocklens[tmp_state_p->idx] == 0); } @@ -186,10 +186,10 @@ static inline int view_state_add_region( tmp_state_p->cur_reg_off += data_sz; tmp_state_p->abs_off += data_sz; tmp_state_p->cur_sz += data_sz; -#ifdef DEBUG3 +#ifdef DEBUG3 fprintf(stderr, "view_state_add_region: %s partial region type " - "(cur_reg_off=%Ld,abs_off=%Ld,cur_sz=%Ld,reg size=%Ld\n", - off_type_name[op_type], tmp_state_p->cur_reg_off, + "(cur_reg_off=%Ld,abs_off=%Ld,cur_sz=%Ld,reg size=%Ld\n", + off_type_name[op_type], tmp_state_p->cur_reg_off, tmp_state_p->abs_off, tmp_state_p->cur_sz, data_sz); #endif } @@ -204,7 +204,7 @@ static inline int view_state_add_region( /* Set up the abs_off, idx, and cur_reg_off of a view_state for the * tmp_state or the cur_state. */ int ADIOI_init_view_state(int file_ptr_type, - int nprocs, + int nprocs, view_state *view_state_arr, int op_type) { @@ -227,7 +227,7 @@ int ADIOI_init_view_state(int file_ptr_type, default: fprintf(stderr, "op_type invalid\n"); } - + tmp_view_p = &(view_state_arr[i]); tmp_flat_type_p = tmp_view_p->flat_type_p; @@ -235,7 +235,7 @@ int ADIOI_init_view_state(int file_ptr_type, tmp_state_p->abs_off = tmp_view_p->fp_ind; else tmp_state_p->abs_off = tmp_view_p->disp; - + tmp_off_used = 0; /* initialize tmp_state idx */ @@ -244,7 +244,7 @@ int ADIOI_init_view_state(int file_ptr_type, if (file_ptr_type == ADIO_EXPLICIT_OFFSET) tmp_state_p->abs_off += tmp_flat_type_p->indices[tmp_state_p->idx]; - /* Initialize the abs_off by moving into the datatype + /* Initialize the abs_off by moving into the datatype * byte_off bytes. Since we only do this in the beginning, we * make the assumption that pieces are added whole until the last * piece which MAY be partial. */ @@ -252,19 +252,19 @@ int ADIOI_init_view_state(int file_ptr_type, { view_state_add_region( tmp_view_p->byte_off - tmp_off_used, - &(view_state_arr[i]), &st_reg, &tmp_reg_sz, + &(view_state_arr[i]), &st_reg, &tmp_reg_sz, op_type); } - + /* Re-initialize the cur_size so that the abs_off was set to * the proper position while the actual size = 0.*/ tmp_state_p->cur_sz = 0; #ifdef DEBUG1 fprintf(stderr, "init_view_state: %s (idx=%d,byte_off=%Ld," - "abs_off=%Ld,reg_off=%Ld,sz=%Ld)\n", off_type_name[op_type], + "abs_off=%Ld,reg_off=%Ld,sz=%Ld)\n", off_type_name[op_type], i, tmp_view_p->byte_off, tmp_state_p->abs_off, tmp_state_p->cur_reg_off, tmp_view_p->sz); -#endif +#endif } return 0; @@ -277,7 +277,7 @@ static inline int get_next_fr_off(ADIO_File fd, ADIO_Offset fr_st_off, MPI_Datatype *fr_type_p, ADIO_Offset *fr_next_off_p, - ADIO_Offset *fr_max_len_p) + ADIO_Offset *fr_max_len_p) { MPI_Aint fr_extent = -1; ADIO_Offset tmp_off, off_rem; @@ -297,7 +297,7 @@ static inline int get_next_fr_off(ADIO_File fd, return 0; } - /* Calculate how many times to loop through the fr_type + /* Calculate how many times to loop through the fr_type * and where the next fr_off is. */ MPI_Type_extent(*fr_type_p, &fr_extent); tmp_off = off - fr_st_off; @@ -315,12 +315,12 @@ static inline int get_next_fr_off(ADIO_File fd, else if (off_rem < fr_node_p->indices[i] + fr_node_p->blocklens[i]) { *fr_next_off_p = off; - *fr_max_len_p = fr_node_p->blocklens[i] - + *fr_max_len_p = fr_node_p->blocklens[i] - (off_rem - fr_node_p->indices[i]); return off; } } - + /* Shouldn't get here. */ fprintf(stderr, "get_next_fr_off: Couldn't find the correct " "location of the next offset for this file realm.\n"); @@ -340,7 +340,7 @@ static inline int find_next_off(ADIO_File fd, ADIO_Offset *cur_reg_max_len_p) { ADIOI_Flatlist_node *tmp_flat_type_p = NULL; - ADIO_Offset tmp_off = -1, fr_next_off = -1, fr_max_len = -1, + ADIO_Offset tmp_off = -1, fr_next_off = -1, fr_max_len = -1, tmp_fr_max_len = -1; int ret = 0; flatten_state *tmp_state_p = NULL; @@ -364,7 +364,7 @@ static inline int find_next_off(ADIO_File fd, default: fprintf(stderr, "op_type invalid\n"); } - + tmp_flat_type_p = view_state_p->flat_type_p; /* Can we use this proc? */ @@ -372,26 +372,26 @@ static inline int find_next_off(ADIO_File fd, tmp_st_off = 0; tmp_reg_sz = 0; /* If the current region is not within the file realm, advance - * the state until it is and calculate the end of the next file + * the state until it is and calculate the end of the next file * realm in fr_max_len. */ ret = get_next_fr_off(fd, - tmp_state_p->abs_off, + tmp_state_p->abs_off, fr_st_off, fr_type_p, &fr_next_off, &fr_max_len); - + while ((tmp_state_p->abs_off < fr_next_off) && (tmp_state_p->cur_sz != view_state_p->sz)) { - + /* While this might appear to be erroneous at first, * view_state_add_region can only add a single piece at a * time. Therefore, it will never overshoot the beginning * of the next file realm. When it finally does enter the * next file realm it will not be able to go beyond its * first piece. */ - + #ifdef DTYPE_SKIP if (tmp_flat_type_p->count > 1) { /* let's see if we can skip whole datatypes */ @@ -418,7 +418,7 @@ static inline int find_next_off(ADIO_File fd, op_type); ret = get_next_fr_off(fd, - tmp_state_p->abs_off, + tmp_state_p->abs_off, fr_st_off, fr_type_p, &fr_next_off, @@ -446,7 +446,7 @@ static inline int find_next_off(ADIO_File fd, * should return a list of MPI_Datatypes that correspond to client * communication into a collective buffer, a list of corresponding * sizes, and an aggregate MPI_Datatype which will be used as a - * filetype in MPI_File_write/read on the aggregator. */ + * filetype in MPI_File_write/read on the aggregator. */ int ADIOI_Build_agg_reqs(ADIO_File fd, int rw_type, int nprocs, view_state *client_file_view_state_arr, MPI_Datatype *client_comm_dtype_arr, @@ -480,21 +480,21 @@ int ADIOI_Build_agg_reqs(ADIO_File fd, int rw_type, int nprocs, memset(client_comm_sz_arr, 0, nprocs*sizeof(ADIO_Offset)); - if ((client_comm_next_off_arr = (ADIO_Offset *) + if ((client_comm_next_off_arr = (ADIO_Offset *) ADIOI_Malloc(nprocs*sizeof(ADIO_Offset))) == NULL) { fprintf(stderr, "ADIOI_Build_agg_reqs: malloc client_next_off_arr " "failed\n"); return -1; } - + if ((client_ol_ct_arr = (int *) ADIOI_Calloc(nprocs, sizeof(int))) == NULL) { fprintf(stderr, "ADIOI_Build_agg_reqs: " "malloc client_ol_ct_arr failed\n"); return -1; } - if ((client_ol_cur_ct_arr = + if ((client_ol_cur_ct_arr = (int *) ADIOI_Calloc(nprocs, sizeof(int))) == NULL) { fprintf(stderr, "ADIOI_Build_agg_reqs: " @@ -517,9 +517,9 @@ int ADIOI_Build_agg_reqs(ADIO_File fd, int rw_type, int nprocs, /* initialize heap */ ADIOI_Heap_create(&offset_heap, nprocs); offset_heap.size = 0; - + for (j=0; jhints->cb_buffer_size); #endif - + /* We process only contiguous file realm regions if we are * using data sieving. Note that we only do this for * writes since reads can be data sieved across each other @@ -595,23 +595,23 @@ int ADIOI_Build_agg_reqs(ADIO_File fd, int rw_type, int nprocs, #ifdef DEBUG1 fprintf(stderr, "ADIOI_Build_agg_reqs: " "Data sieving file realm end changed from " - "%Ld to %Ld\n", ds_fr_end, + "%Ld to %Ld\n", ds_fr_end, cur_off + cur_reg_max_len); #endif break; } } } - + /* Add up to the end of the file realm or the collective * buffer. */ - if (cur_reg_max_len > (fd->hints->cb_buffer_size - + if (cur_reg_max_len > (fd->hints->cb_buffer_size - tmp_coll_buf_sz)) cur_reg_max_len = fd->hints->cb_buffer_size - tmp_coll_buf_sz; view_state_add_region( cur_reg_max_len, - &(client_file_view_state_arr[cur_off_proc]), + &(client_file_view_state_arr[cur_off_proc]), &st_reg, &act_reg_sz, i); switch(i) @@ -621,14 +621,14 @@ int ADIOI_Build_agg_reqs(ADIO_File fd, int rw_type, int nprocs, * the used part of the collective buffer if the * next region is not adjacent to the previous * region. */ - if (client_comm_next_off_arr[cur_off_proc] != + if (client_comm_next_off_arr[cur_off_proc] != tmp_coll_buf_sz) { (client_ol_ct_arr[cur_off_proc])++; } - client_comm_next_off_arr[cur_off_proc] = + client_comm_next_off_arr[cur_off_proc] = tmp_coll_buf_sz + act_reg_sz; - + if (agg_next_off != st_reg) agg_ol_ct++; agg_next_off = st_reg + act_reg_sz; @@ -638,24 +638,24 @@ int ADIOI_Build_agg_reqs(ADIO_File fd, int rw_type, int nprocs, * the next region is not adjacent to the previous * region. */ next_off_idx = client_ol_cur_ct_arr[cur_off_proc]; - if (client_comm_next_off_arr[cur_off_proc] != + if (client_comm_next_off_arr[cur_off_proc] != tmp_coll_buf_sz) { client_disp_arr[cur_off_proc][next_off_idx] = tmp_coll_buf_sz; - client_blk_arr[cur_off_proc][next_off_idx] = + client_blk_arr[cur_off_proc][next_off_idx] = act_reg_sz; (client_ol_cur_ct_arr[cur_off_proc])++; } else { - client_blk_arr[cur_off_proc][next_off_idx - 1] + client_blk_arr[cur_off_proc][next_off_idx - 1] += act_reg_sz; } client_comm_sz_arr[cur_off_proc] += act_reg_sz; client_comm_next_off_arr[cur_off_proc] = tmp_coll_buf_sz + act_reg_sz; - + /* Add to the aggregator filetype if the next * region is not adjacent to the previous * region. */ @@ -667,7 +667,7 @@ int ADIOI_Build_agg_reqs(ADIO_File fd, int rw_type, int nprocs, *agg_dtype_offset_p = st_reg; agg_disp_arr[agg_ol_cur_ct] = st_reg - (MPI_Aint) *agg_dtype_offset_p; - agg_blk_arr[agg_ol_cur_ct] = act_reg_sz; + agg_blk_arr[agg_ol_cur_ct] = act_reg_sz; agg_ol_cur_ct++; } else @@ -675,7 +675,7 @@ int ADIOI_Build_agg_reqs(ADIO_File fd, int rw_type, int nprocs, agg_blk_arr[agg_ol_cur_ct - 1] += act_reg_sz; } agg_next_off = st_reg + act_reg_sz; - + break; default: fprintf(stderr, "ADIOI_Build_agg_reqs: Impossible type\n"); @@ -699,12 +699,12 @@ int ADIOI_Build_agg_reqs(ADIO_File fd, int rw_type, int nprocs, #endif } } - + if (i == TEMP_OFF) { /* Allocate offset-length pairs for creating hindexed * MPI_Datatypes for both the client and the aggregator. */ - if ((client_disp_arr = (MPI_Aint **) + if ((client_disp_arr = (MPI_Aint **) ADIOI_Malloc(nprocs*sizeof(MPI_Aint *))) == NULL) { fprintf(stderr, "ADIOI_Build_agg_reqs: malloc " @@ -718,7 +718,7 @@ int ADIOI_Build_agg_reqs(ADIO_File fd, int rw_type, int nprocs, fprintf(stderr, "ADIOI_Build_agg_reqs: malloc " "client_blk_arr failed\n"); return -1; - } + } for (j = 0; j < nprocs; j++) { if ((client_disp_arr[j] = (MPI_Aint *) ADIOI_Malloc( @@ -728,7 +728,7 @@ int ADIOI_Build_agg_reqs(ADIO_File fd, int rw_type, int nprocs, "client_disp_arr[%d] failed\n", j); return -1; } - if ((client_blk_arr[j] = (int *) + if ((client_blk_arr[j] = (int *) ADIOI_Malloc(client_ol_ct_arr[j]*sizeof(int))) == NULL) { ADIOI_Free(client_disp_arr[j]); @@ -737,21 +737,21 @@ int ADIOI_Build_agg_reqs(ADIO_File fd, int rw_type, int nprocs, return -1; } } - - if (agg_ol_ct > 0) + + if (agg_ol_ct > 0) { if ((agg_disp_arr = (MPI_Aint *) ADIOI_Malloc( agg_ol_ct*sizeof(MPI_Aint))) == NULL) { - fprintf(stderr, + fprintf(stderr, "ADIOI_Build_agg_reqs: malloc disp_arr failed\n"); return -1; } - if ((agg_blk_arr = (int *) + if ((agg_blk_arr = (int *) ADIOI_Malloc(agg_ol_ct*sizeof(int))) == NULL) { ADIOI_Free(agg_disp_arr); - fprintf(stderr, + fprintf(stderr, "ADIOI_Build_agg_reqs: malloc blk_arr failed\n"); return -1; } @@ -759,7 +759,7 @@ int ADIOI_Build_agg_reqs(ADIO_File fd, int rw_type, int nprocs, } ADIOI_Heap_free(&offset_heap); } - + /* Let the clients know if this aggregator is totally finished * with all possible client requests. */ all_done = 1; @@ -788,7 +788,7 @@ int ADIOI_Build_agg_reqs(ADIO_File fd, int rw_type, int nprocs, if (client_ol_cur_ct_arr[i] != client_ol_ct_arr[i]) { fprintf(stderr, "ADIOI_Build_agg_reqs: ERROR Process %d " - "processed only %d out of %d ol pairs\n", i, + "processed only %d out of %d ol pairs\n", i, client_ol_cur_ct_arr[i], client_ol_ct_arr[i]); return -1; @@ -817,13 +817,13 @@ int ADIOI_Build_agg_reqs(ADIO_File fd, int rw_type, int nprocs, fprintf(stderr, "ADIOI_Build_agg_reqs: p %d (off,len) = ", i); for (j = 0; j < client_ol_ct_arr[i]; j++) { - fprintf(stderr, "[%d](%d,%d) ", j, + fprintf(stderr, "[%d](%d,%d) ", j, client_disp_arr[i][j], client_blk_arr[i][j]); } fprintf(stderr, "\n"); } - } + } if (agg_ol_ct) { fprintf(stderr, "ADIOI_Build_agg_reqs:agg_type(off,len)="); for (i = 0; i < agg_ol_ct; i++) @@ -845,7 +845,7 @@ int ADIOI_Build_agg_reqs(ADIO_File fd, int rw_type, int nprocs, if (client_comm_sz_arr[i] > 0) { MPI_Type_hindexed(client_ol_ct_arr[i], client_blk_arr[i], - client_disp_arr[i], MPI_BYTE, + client_disp_arr[i], MPI_BYTE, &(client_comm_dtype_arr[i])); MPI_Type_commit(&(client_comm_dtype_arr[i])); } @@ -864,7 +864,7 @@ int ADIOI_Build_agg_reqs(ADIO_File fd, int rw_type, int nprocs, MPI_Type_contiguous (agg_blk_arr[0], MPI_BYTE, agg_dtype_p); else if (agg_ol_ct > 1) MPI_Type_hindexed(agg_ol_ct, agg_blk_arr, agg_disp_arr, MPI_BYTE, - agg_dtype_p); + agg_dtype_p); MPI_Type_commit(agg_dtype_p); @@ -884,7 +884,7 @@ int ADIOI_Build_agg_reqs(ADIO_File fd, int rw_type, int nprocs, * then call this function, which will generate the comm datatypes for * each aggregator (agg_comm_dtype_arr) in the upcoming * MPI_Alltoallw() */ -int ADIOI_Build_client_reqs(ADIO_File fd, +int ADIOI_Build_client_reqs(ADIO_File fd, int nprocs, view_state *my_mem_view_state_arr, view_state *agg_file_view_state_arr, @@ -928,7 +928,7 @@ int ADIOI_Build_client_reqs(ADIO_File fd, fprintf(stderr, "\n"); } #endif - + if ((agg_mem_next_off_arr = (ADIO_Offset *) ADIOI_Malloc( nprocs*sizeof(ADIO_Offset))) == NULL) { @@ -937,7 +937,7 @@ int ADIOI_Build_client_reqs(ADIO_File fd, return -1; } - if ((agg_comm_cur_sz_arr = (ADIO_Offset *) + if ((agg_comm_cur_sz_arr = (ADIO_Offset *) ADIOI_Malloc(nprocs*sizeof(ADIO_Offset))) == NULL) { fprintf(stderr, "ADIOI_Build_client_reqs: malloc agg_comm_cur_sz_arr" @@ -964,7 +964,7 @@ int ADIOI_Build_client_reqs(ADIO_File fd, if (agg_comm_sz_arr[i] > 0) total_agg_comm_sz += agg_comm_sz_arr[i]; } - + /* On the first pass see how many offset-length pairs are * necessary for each aggregator. Then allocate the correct * amount of offset-length pairs for handling each aggregator's @@ -984,11 +984,11 @@ int ADIOI_Build_client_reqs(ADIO_File fd, { tmp_agg_fr_idx = ADIOI_Agg_idx(j, fd); assert(tmp_agg_fr_idx < fd->hints->cb_nodes); - + /* If this process is not an aggregator or we have * finished all the bytes for this aggregator, move * along. */ - if (tmp_agg_fr_idx < 0 || + if (tmp_agg_fr_idx < 0 || agg_comm_cur_sz_arr[j] == agg_comm_sz_arr[j]) { continue; @@ -1002,9 +1002,9 @@ int ADIOI_Build_client_reqs(ADIO_File fd, &tmp_cur_off, &tmp_cur_reg_max_len); if (tmp_cur_off == -1) - continue; + continue; - if ((cur_off == -1) || + if ((cur_off == -1) || (cur_off > tmp_cur_off)) { cur_off_proc = j; @@ -1014,23 +1014,23 @@ int ADIOI_Build_client_reqs(ADIO_File fd, } assert(cur_off_proc != -1); - + /* Add up to the end of the file realm or as many bytes * are left for this particular aggregator in the client's * filetype */ - if (cur_reg_max_len > agg_comm_sz_arr[cur_off_proc] - + if (cur_reg_max_len > agg_comm_sz_arr[cur_off_proc] - agg_comm_cur_sz_arr[cur_off_proc]) { - cur_reg_max_len = agg_comm_sz_arr[cur_off_proc] - + cur_reg_max_len = agg_comm_sz_arr[cur_off_proc] - agg_comm_cur_sz_arr[cur_off_proc]; } assert(cur_reg_max_len > 0); - + view_state_add_region( cur_reg_max_len, &(agg_file_view_state_arr[cur_off_proc]), &st_reg, &act_reg_sz, i); - + #ifdef DEBUG2 fprintf(stderr, "ADIOI_Build_client_reqs: %s File region" " (proc=%d,off=%Ld,sz=%Ld)\n", @@ -1046,7 +1046,7 @@ int ADIOI_Build_client_reqs(ADIO_File fd, tmp_mem_state_p = &(my_mem_view_state_arr[cur_off_proc]); assert(view_state_get_cur_sz(tmp_file_state_p, i) - act_reg_sz >= view_state_get_cur_sz(tmp_mem_state_p, i)); - while (view_state_get_cur_sz(tmp_file_state_p, i) - act_reg_sz != + while (view_state_get_cur_sz(tmp_file_state_p, i) - act_reg_sz != view_state_get_cur_sz(tmp_mem_state_p, i)) { ADIO_Offset fill_st_reg = -1, fill_reg_sz = -1; @@ -1057,7 +1057,7 @@ int ADIOI_Build_client_reqs(ADIO_File fd, &fill_st_reg, &fill_reg_sz, i); } - + /* Based on how large the act_reg_sz 1. Figure out how * many memory offset-length pairs are necessary. 2. Set * the offset-length pairs. */ @@ -1067,7 +1067,7 @@ int ADIOI_Build_client_reqs(ADIO_File fd, view_state_add_region( act_reg_sz - tmp_reg_sz, tmp_mem_state_p, - &agg_mem_st_reg, &agg_mem_act_reg_sz, + &agg_mem_st_reg, &agg_mem_act_reg_sz, i); tmp_reg_sz += agg_mem_act_reg_sz; @@ -1078,19 +1078,19 @@ int ADIOI_Build_client_reqs(ADIO_File fd, agg_mem_st_reg, agg_mem_act_reg_sz); #endif agg_comm_cur_sz_arr[cur_off_proc] += agg_mem_act_reg_sz; - cur_total_agg_comm_sz += agg_mem_act_reg_sz; + cur_total_agg_comm_sz += agg_mem_act_reg_sz; switch(i) { case TEMP_OFF: /* Increment the ol list count a particular * aggregator if next region is not adjacent * to the previous region. */ - if (agg_mem_next_off_arr[cur_off_proc] != + if (agg_mem_next_off_arr[cur_off_proc] != agg_mem_st_reg) { agg_ol_ct_arr[cur_off_proc]++; } - agg_mem_next_off_arr[cur_off_proc] = + agg_mem_next_off_arr[cur_off_proc] = agg_mem_st_reg + agg_mem_act_reg_sz; break; case REAL_OFF: @@ -1098,12 +1098,12 @@ int ADIOI_Build_client_reqs(ADIO_File fd, * map to each aggregator, coaslescing if * possible. */ agg_next_off_idx = agg_ol_cur_ct_arr[cur_off_proc]; - if (agg_mem_next_off_arr[cur_off_proc] != + if (agg_mem_next_off_arr[cur_off_proc] != agg_mem_st_reg) { - agg_disp_arr[cur_off_proc][agg_next_off_idx] = + agg_disp_arr[cur_off_proc][agg_next_off_idx] = agg_mem_st_reg; - agg_blk_arr[cur_off_proc][agg_next_off_idx] = + agg_blk_arr[cur_off_proc][agg_next_off_idx] = agg_mem_act_reg_sz; (agg_ol_cur_ct_arr[cur_off_proc])++; } @@ -1112,7 +1112,7 @@ int ADIOI_Build_client_reqs(ADIO_File fd, agg_blk_arr[cur_off_proc][agg_next_off_idx - 1] += agg_mem_act_reg_sz; } - agg_mem_next_off_arr[cur_off_proc] = + agg_mem_next_off_arr[cur_off_proc] = agg_mem_st_reg + agg_mem_act_reg_sz; break; default: @@ -1121,38 +1121,38 @@ int ADIOI_Build_client_reqs(ADIO_File fd, } } } - + /* On the first pass, allocate the memory structures for * creating the MPI_hindexed type. */ if (i == TEMP_OFF) - { + { /* Allocate offset-length pairs for creating hindexed * MPI_Datatypes for each aggregator */ - if ((agg_disp_arr = (MPI_Aint **) + if ((agg_disp_arr = (MPI_Aint **) ADIOI_Malloc(nprocs*sizeof(MPI_Aint *))) == NULL) { - fprintf(stderr, + fprintf(stderr, "ADIOI_Build_client_reqs: malloc agg_disp_arr failed\n"); return -1; } - if ((agg_blk_arr = (int **) ADIOI_Malloc(nprocs*sizeof(int *))) + if ((agg_blk_arr = (int **) ADIOI_Malloc(nprocs*sizeof(int *))) == NULL) { ADIOI_Free(agg_disp_arr); - fprintf(stderr, + fprintf(stderr, "ADIOI_Build_client_reqs: malloc agg_blk_arr failed\n"); return -1; - } + } for (j = 0; j < nprocs; j++) { - if ((agg_disp_arr[j] = (MPI_Aint *) + if ((agg_disp_arr[j] = (MPI_Aint *) ADIOI_Malloc(agg_ol_ct_arr[j]*sizeof(MPI_Aint))) == NULL) { fprintf(stderr, "ADIOI_Build_client_reqs: malloc " "agg_disp_arr[%d] failed\n", j); return -1; } - if ((agg_blk_arr[j] = (int *) + if ((agg_blk_arr[j] = (int *) ADIOI_Malloc(agg_ol_ct_arr[j]*sizeof(int))) == NULL) { ADIOI_Free(agg_disp_arr[j]); @@ -1224,7 +1224,7 @@ int ADIOI_Build_client_reqs(ADIO_File fd, ADIOI_Free(agg_ol_cur_ct_arr); #ifdef AGGREGATION_PROFILE MPE_Log_event (5019, 0, NULL); -#endif +#endif return 0; } /* ADIOI_Build_client_pre_req allows a client to calculate the memtype @@ -1265,12 +1265,12 @@ int ADIOI_Build_client_pre_req(ADIO_File fd, return -1; } - if (agg_file_view_state_p->cur_state.cur_sz == + if (agg_file_view_state_p->cur_state.cur_sz == agg_file_view_state_p->sz || max_pre_req_sz <= 0 || max_ol_ct <= 0) { #ifdef DEBUG1 - fprintf(stderr, + fprintf(stderr, "ADIOI_Build_client_pre_req: Nothing to preprocess\n"); #endif return 0; @@ -1282,13 +1282,13 @@ int ADIOI_Build_client_pre_req(ADIO_File fd, (my_mem_view_state_p->pre_ol_ct >= max_ol_ct)) { #ifdef DEBUG1 - fprintf(stderr, + fprintf(stderr, "ADIOI_Build_client_pre_req: Old values surpass new " "pre_req values\n"); #endif return 0; } - + /* General idea is to first advance the filetype to the file realm * and then the memtype to the filetype. The memtype is advanced * further by peeking at the filetype and then the filetype is @@ -1326,18 +1326,18 @@ int ADIOI_Build_client_pre_req(ADIO_File fd, { cur_sz = my_mem_view_state_p->pre_sz; agg_ol_cur_ct = my_mem_view_state_p->pre_ol_ct; - + /* Copy the old data to the new data, freeing the old * arrays */ - memcpy(my_mem_view_state_p->pre_disp_arr, tmp_disp_arr, + memcpy(my_mem_view_state_p->pre_disp_arr, tmp_disp_arr, my_mem_view_state_p->pre_ol_ct * sizeof(MPI_Aint)); - memcpy(my_mem_view_state_p->pre_blk_arr, tmp_blk_arr, + memcpy(my_mem_view_state_p->pre_blk_arr, tmp_blk_arr, my_mem_view_state_p->pre_ol_ct * sizeof(int)); ADIOI_Free(tmp_disp_arr); ADIOI_Free(tmp_blk_arr); - agg_mem_next_off = + agg_mem_next_off = my_mem_view_state_p->pre_disp_arr[agg_ol_cur_ct - 1] + my_mem_view_state_p->pre_blk_arr[agg_ol_cur_ct - 1]; } @@ -1345,10 +1345,10 @@ int ADIOI_Build_client_pre_req(ADIO_File fd, { cur_sz = 0; } - + /* Max_pre_req_sz may be larger than the amount of data left * to preprocess */ - if (max_pre_req_sz - cur_sz > + if (max_pre_req_sz - cur_sz > agg_file_view_state_p->sz - tmp_file_state_p->cur_sz) { max_sz = cur_sz + @@ -1356,12 +1356,12 @@ int ADIOI_Build_client_pre_req(ADIO_File fd, } else max_sz = max_pre_req_sz; - + assert(cur_sz != max_sz); #ifdef DEBUG1 - fprintf(stderr, + fprintf(stderr, "ADIOI_Build_client_pre_req: (cur_sz=%Ld,agg_ol_ct=%d," - "agg_mem_next_off=%Ld,max_sz=%Ld,max_ol_ct=%d)\n", + "agg_mem_next_off=%Ld,max_sz=%Ld,max_ol_ct=%d)\n", cur_sz, agg_ol_ct, agg_mem_next_off, max_sz, max_ol_ct); #endif while (cur_sz < max_sz) @@ -1372,7 +1372,7 @@ int ADIOI_Build_client_pre_req(ADIO_File fd, i, &cur_off, &cur_reg_max_len); - + /* find_next_off may show that the file_view_state is done * even if cur_sz != max_sz since find_next_off may * advance the file_view_state to the end here and realize @@ -1381,7 +1381,7 @@ int ADIOI_Build_client_pre_req(ADIO_File fd, break; assert(cur_off != -1); - + /* Before translating the file regions to memory regions, * we first must advance to the proper point in the * mem_view_state for this aggregator to match the @@ -1417,10 +1417,10 @@ int ADIOI_Build_client_pre_req(ADIO_File fd, * overstepped the min(end of the current piece in the * file view, end of the file realm, data left in * max_sz) */ - - if (cur_reg_max_len > + + if (cur_reg_max_len > view_state_get_next_len(agg_file_view_state_p, i)) - cur_reg_max_len = + cur_reg_max_len = view_state_get_next_len(agg_file_view_state_p, i); if (cur_reg_max_len > max_sz - cur_sz) @@ -1433,20 +1433,20 @@ int ADIOI_Build_client_pre_req(ADIO_File fd, * allowed */ act_reg_sz = 0; exit_loop = 0; - while ((act_reg_sz < cur_reg_max_len) && + while ((act_reg_sz < cur_reg_max_len) && (exit_loop == 0)) { view_state_add_region( cur_reg_max_len - act_reg_sz, my_mem_view_state_p, - &agg_mem_st_reg, &agg_mem_act_reg_sz, + &agg_mem_st_reg, &agg_mem_act_reg_sz, i); act_reg_sz += agg_mem_act_reg_sz; - + #ifdef DEBUG2 fprintf(stderr, "ADIOI_Build_client_pre_req: %s Mem region" "(proc=%d,off=%Ld,sz=%Ld)\n", - off_type_name[i], agg_rank, agg_mem_st_reg, + off_type_name[i], agg_rank, agg_mem_st_reg, agg_mem_act_reg_sz); #endif switch(i) @@ -1461,7 +1461,7 @@ int ADIOI_Build_client_pre_req(ADIO_File fd, if (agg_ol_ct == max_ol_ct) exit_loop = 1; } - agg_mem_next_off = + agg_mem_next_off = agg_mem_st_reg + agg_mem_act_reg_sz; break; case REAL_OFF: @@ -1472,10 +1472,10 @@ int ADIOI_Build_client_pre_req(ADIO_File fd, if (agg_mem_next_off != agg_mem_st_reg) { my_mem_view_state_p-> - pre_disp_arr[agg_next_off_idx] = + pre_disp_arr[agg_next_off_idx] = agg_mem_st_reg; my_mem_view_state_p-> - pre_blk_arr[agg_next_off_idx] = + pre_blk_arr[agg_next_off_idx] = agg_mem_act_reg_sz; agg_ol_cur_ct++; if (agg_ol_cur_ct == agg_ol_ct) @@ -1487,7 +1487,7 @@ int ADIOI_Build_client_pre_req(ADIO_File fd, pre_blk_arr[agg_next_off_idx - 1] += agg_mem_act_reg_sz; } - agg_mem_next_off = + agg_mem_next_off = agg_mem_st_reg + agg_mem_act_reg_sz; break; default: @@ -1515,10 +1515,10 @@ int ADIOI_Build_client_pre_req(ADIO_File fd, "view_state_add_region failed to match the memtype\n"); return -1; } - + cur_sz += act_reg_sz; } - + /* On the first pass, allocate the memory structures for * storing the preprocessed information */ if (i == TEMP_OFF) @@ -1531,7 +1531,7 @@ int ADIOI_Build_client_pre_req(ADIO_File fd, (long int)agg_ol_ct * sizeof(MPI_Aint)); return -1; } - if ((my_mem_view_state_p->pre_blk_arr = (int *) + if ((my_mem_view_state_p->pre_blk_arr = (int *) ADIOI_Malloc(agg_ol_ct * sizeof(int))) == NULL) { ADIOI_Free(my_mem_view_state_p->pre_disp_arr); @@ -1559,8 +1559,8 @@ int ADIOI_Build_client_pre_req(ADIO_File fd, "(off,len) = \n", agg_rank, my_mem_view_state_p->pre_sz); for (i = 0; i < my_mem_view_state_p->pre_ol_ct; i++) { - fprintf(stderr, "[%d](%d,%d) ", i, - my_mem_view_state_p->pre_disp_arr[i], + fprintf(stderr, "[%d](%d,%d) ", i, + my_mem_view_state_p->pre_disp_arr[i], my_mem_view_state_p->pre_blk_arr[i]); if (i % 5 == 0 && i != 0) fprintf(stderr, "\n"); @@ -1605,7 +1605,7 @@ static int process_pre_req(ADIO_File fd, { for (i = 0; i < my_mem_view_state_p->pre_ol_ct; i++) { - if ((my_mem_view_state_p->pre_blk_arr[i] + + if ((my_mem_view_state_p->pre_blk_arr[i] + *agg_comm_pre_sz_p) > *agg_comm_sz_p) { has_partial = 1; @@ -1617,39 +1617,39 @@ static int process_pre_req(ADIO_File fd, else if ((my_mem_view_state_p->pre_blk_arr[i] + *agg_comm_pre_sz_p) == *agg_comm_sz_p) { - *agg_comm_pre_sz_p += + *agg_comm_pre_sz_p += my_mem_view_state_p->pre_blk_arr[i]; i++; break; } else - *agg_comm_pre_sz_p += + *agg_comm_pre_sz_p += my_mem_view_state_p->pre_blk_arr[i]; } - + if (has_partial == 1) { - *agg_mem_next_off_p = - my_mem_view_state_p->pre_disp_arr[i - 1] + + *agg_mem_next_off_p = + my_mem_view_state_p->pre_disp_arr[i - 1] + partial_len; } else { - *agg_mem_next_off_p = - my_mem_view_state_p->pre_disp_arr[i - 1] + + *agg_mem_next_off_p = + my_mem_view_state_p->pre_disp_arr[i - 1] + my_mem_view_state_p->pre_blk_arr[i - 1]; } - + *agg_comm_cur_sz_p = *agg_comm_pre_sz_p; *agg_ol_ct_p = i; - + } else /* Use all the precalculated data */ { *agg_comm_pre_sz_p = my_mem_view_state_p->pre_sz; *agg_comm_cur_sz_p = *agg_comm_pre_sz_p; *agg_ol_ct_p = my_mem_view_state_p->pre_ol_ct; - *agg_mem_next_off_p = + *agg_mem_next_off_p = my_mem_view_state_p->pre_disp_arr[ my_mem_view_state_p->pre_ol_ct - 1] + my_mem_view_state_p->pre_blk_arr[ @@ -1669,8 +1669,8 @@ static int process_pre_req(ADIO_File fd, { agg_disp_arr[i] = my_mem_view_state_p->pre_disp_arr[i]; agg_blk_arr[i] = my_mem_view_state_p->pre_blk_arr[i]; - - if ((my_mem_view_state_p->pre_blk_arr[i] + + + if ((my_mem_view_state_p->pre_blk_arr[i] + tmp_agg_comm_pre_sz) > *agg_comm_pre_sz_p) { has_partial = 1; @@ -1678,7 +1678,7 @@ static int process_pre_req(ADIO_File fd, tmp_agg_comm_pre_sz = *agg_comm_pre_sz_p; partial_disp = my_mem_view_state_p->pre_disp_arr[i] + agg_blk_arr[i]; - partial_len = my_mem_view_state_p->pre_blk_arr[i] - + partial_len = my_mem_view_state_p->pre_blk_arr[i] - agg_blk_arr[i]; i++; break; @@ -1686,7 +1686,7 @@ static int process_pre_req(ADIO_File fd, else if ((my_mem_view_state_p->pre_blk_arr[i] + tmp_agg_comm_pre_sz) == *agg_comm_pre_sz_p) { - tmp_agg_comm_pre_sz += + tmp_agg_comm_pre_sz += my_mem_view_state_p->pre_blk_arr[i]; i++; break; @@ -1698,15 +1698,15 @@ static int process_pre_req(ADIO_File fd, *agg_mem_next_off_p = agg_disp_arr[i - 1] + agg_blk_arr[i - 1]; *agg_ol_cur_ct_p = i; *agg_comm_cur_sz_p = *agg_comm_pre_sz_p; - - /* Clean up the ol pairs we used */ + + /* Clean up the ol pairs we used */ if ((i < my_mem_view_state_p->pre_ol_ct) || (has_partial == 1)) { - int remain_ol_ct = + int remain_ol_ct = my_mem_view_state_p->pre_ol_ct - i + has_partial; MPI_Aint *new_pre_disp_arr = NULL; int *new_pre_blk_arr = NULL; - + if ((new_pre_disp_arr = (MPI_Aint *) ADIOI_Malloc(remain_ol_ct * sizeof(MPI_Aint))) == NULL) { @@ -1721,14 +1721,14 @@ static int process_pre_req(ADIO_File fd, "new_pre_blk_arr failed\n"); return -1; } - - memcpy(new_pre_disp_arr, + + memcpy(new_pre_disp_arr, &(my_mem_view_state_p->pre_disp_arr[i - has_partial]), remain_ol_ct * sizeof(MPI_Aint)); - memcpy(new_pre_blk_arr, + memcpy(new_pre_blk_arr, &(my_mem_view_state_p->pre_blk_arr[i - has_partial]), remain_ol_ct * sizeof(int)); - + /* Set the partial len of the first piece */ if (has_partial == 1) { @@ -1737,10 +1737,10 @@ static int process_pre_req(ADIO_File fd, new_pre_disp_arr[0] = partial_disp; new_pre_blk_arr[0] = partial_len; } - + ADIOI_Free(my_mem_view_state_p->pre_disp_arr); ADIOI_Free(my_mem_view_state_p->pre_blk_arr); - + my_mem_view_state_p->pre_disp_arr = new_pre_disp_arr; my_mem_view_state_p->pre_blk_arr = new_pre_blk_arr; my_mem_view_state_p->pre_ol_ct = remain_ol_ct; @@ -1750,7 +1750,7 @@ static int process_pre_req(ADIO_File fd, { ADIOI_Free(my_mem_view_state_p->pre_disp_arr); ADIOI_Free(my_mem_view_state_p->pre_blk_arr); - + my_mem_view_state_p->pre_disp_arr = NULL; my_mem_view_state_p->pre_blk_arr = NULL; my_mem_view_state_p->pre_ol_ct = 0; @@ -1760,7 +1760,7 @@ static int process_pre_req(ADIO_File fd, fprintf(stderr, "process_pre_req: REAL_OFF " "agg_comm_pre_sz=%Ld,agg_comm_cur_sz=%Ld,agg_ol_ct=%d," "agg_ol_cur_ct=%d\n", - *agg_comm_pre_sz_p, *agg_comm_cur_sz_p, *agg_ol_ct_p, + *agg_comm_pre_sz_p, *agg_comm_cur_sz_p, *agg_ol_ct_p, *agg_ol_cur_ct_p); #endif break; @@ -1814,7 +1814,7 @@ int ADIOI_Build_client_req(ADIO_File fd, fprintf(stderr, "ADIOI_Build_client_req:(agg=%d,size_req=%Ld)\n", agg_idx, agg_comm_sz); #endif - + /* On the first pass see how many offset-length pairs are * necessary for each aggregator. Then allocate the correct * amount of offset-length pairs for handling each aggregator's @@ -1859,18 +1859,18 @@ int ADIOI_Build_client_req(ADIO_File fd, &agg_ol_ct, &agg_mem_next_off); } - + while (agg_comm_cur_sz < agg_comm_sz) - { + { find_next_off(fd, agg_file_view_state_p, fr_st_off_arr[agg_idx], &(fr_type_arr[agg_idx]), i, &cur_off, &cur_reg_max_len); - + assert(cur_off != -1); - + /* Add up to the end of the file realm or as many bytes * are left for this particular aggregator in the client's * filetype */ @@ -1879,27 +1879,27 @@ int ADIOI_Build_client_req(ADIO_File fd, cur_reg_max_len = agg_comm_sz - agg_comm_cur_sz; } assert(cur_reg_max_len > 0); - + view_state_add_region( cur_reg_max_len, agg_file_view_state_p, &st_reg, &act_reg_sz, i); - + #ifdef DEBUG2 fprintf(stderr, "ADIOI_Build_client_req: %s File region" " (proc=%d,off=%Ld,sz=%Ld)\n", off_type_name[i], agg_rank, cur_off, act_reg_sz); #endif - + /* Before translating the file regions to memory regions, * we first must advance to the proper point in the * mem_view_state for this aggregator to match the * file_view_state. */ - - assert(tmp_file_state_p->cur_sz - act_reg_sz >= + + assert(tmp_file_state_p->cur_sz - act_reg_sz >= tmp_mem_state_p->cur_sz); - - while (tmp_file_state_p->cur_sz - act_reg_sz != + + while (tmp_file_state_p->cur_sz - act_reg_sz != tmp_mem_state_p->cur_sz) { ADIO_Offset fill_st_reg = -1, fill_reg_sz = -1; @@ -1922,13 +1922,13 @@ int ADIOI_Build_client_req(ADIO_File fd, } #endif view_state_add_region( - tmp_file_state_p->cur_sz - + tmp_file_state_p->cur_sz - act_reg_sz - tmp_mem_state_p->cur_sz, my_mem_view_state_p, &fill_st_reg, &fill_reg_sz, i); } - + /* Based on how large the act_reg_sz is, first figure * out how many memory offset-length pairs are * necessary and then set the offset-length pairs. */ @@ -1938,14 +1938,14 @@ int ADIOI_Build_client_req(ADIO_File fd, view_state_add_region( act_reg_sz - tmp_reg_sz, my_mem_view_state_p, - &agg_mem_st_reg, &agg_mem_act_reg_sz, + &agg_mem_st_reg, &agg_mem_act_reg_sz, i); tmp_reg_sz += agg_mem_act_reg_sz; - + #ifdef DEBUG2 fprintf(stderr, "ADIOI_Build_client_req: %s Mem region" "(off=%Ld,sz=%Ld)\n", - off_type_name[i], agg_mem_st_reg, + off_type_name[i], agg_mem_st_reg, agg_mem_act_reg_sz); #endif agg_comm_cur_sz += agg_mem_act_reg_sz; @@ -1959,7 +1959,7 @@ int ADIOI_Build_client_req(ADIO_File fd, { agg_ol_ct++; } - agg_mem_next_off = + agg_mem_next_off = agg_mem_st_reg + agg_mem_act_reg_sz; break; case REAL_OFF: @@ -1969,9 +1969,9 @@ int ADIOI_Build_client_req(ADIO_File fd, agg_next_off_idx = agg_ol_cur_ct; if (agg_mem_next_off != agg_mem_st_reg) { - agg_disp_arr[agg_next_off_idx] = + agg_disp_arr[agg_next_off_idx] = agg_mem_st_reg; - agg_blk_arr[agg_next_off_idx] = + agg_blk_arr[agg_next_off_idx] = agg_mem_act_reg_sz; agg_ol_cur_ct++; } @@ -1980,7 +1980,7 @@ int ADIOI_Build_client_req(ADIO_File fd, agg_blk_arr[agg_next_off_idx - 1] += agg_mem_act_reg_sz; } - agg_mem_next_off = + agg_mem_next_off = agg_mem_st_reg + agg_mem_act_reg_sz; break; default: @@ -1989,14 +1989,14 @@ int ADIOI_Build_client_req(ADIO_File fd, } } } - + /* On the first pass, allocate the memory structures for * creating the MPI_hindexed type. */ if (i == TEMP_OFF) - { + { /* Allocate offset-length pairs for creating hindexed * MPI_Datatypes for each aggregator */ - if ((agg_disp_arr = (MPI_Aint *) + if ((agg_disp_arr = (MPI_Aint *) ADIOI_Malloc(agg_ol_ct * sizeof(MPI_Aint))) == NULL) { fprintf(stderr, "ADIOI_Build_client_req: malloc " @@ -2004,7 +2004,7 @@ int ADIOI_Build_client_req(ADIO_File fd, (long int)agg_ol_ct * sizeof(MPI_Aint)); return -1; } - if ((agg_blk_arr = (int *) + if ((agg_blk_arr = (int *) ADIOI_Malloc(agg_ol_ct * sizeof(int))) == NULL) { ADIOI_Free(agg_disp_arr); @@ -2018,7 +2018,7 @@ int ADIOI_Build_client_req(ADIO_File fd, assert(agg_ol_ct == agg_ol_cur_ct); #ifdef DEBUG1 - fprintf(stderr, + fprintf(stderr, "ADIOI_Build_client_req:(agg=%d,cur_ol_count=%d=ol_count=%d)\n", agg_rank, agg_ol_cur_ct, agg_ol_ct); #endif @@ -2029,7 +2029,7 @@ int ADIOI_Build_client_req(ADIO_File fd, fprintf(stderr, "ADIOI_Build_client_req: p %d (off,len) = ", agg_rank); for (i = 0; i < agg_ol_ct; i++) { - fprintf(stderr, "[%d](%d,%d) ", i, + fprintf(stderr, "[%d](%d,%d) ", i, agg_disp_arr[i], agg_blk_arr[i]); if (i % 5 == 0 && i != 0) fprintf(stderr, "\n"); @@ -2038,7 +2038,7 @@ int ADIOI_Build_client_req(ADIO_File fd, } #endif #ifdef DEBUG1 - fprintf(stderr, + fprintf(stderr, "ADIOI_Build_client_req:(agg=%d,pre_ol_count=%d)\n", agg_idx, my_mem_view_state_p->pre_ol_ct); #endif @@ -2046,12 +2046,12 @@ int ADIOI_Build_client_req(ADIO_File fd, #ifdef DEBUG2 if (my_mem_view_state_p->pre_sz > 0) { - fprintf(stderr, "ADIOI_Build_client_req: p %d pre(off,len) = ", + fprintf(stderr, "ADIOI_Build_client_req: p %d pre(off,len) = ", agg_idx); for (i = 0; i < my_mem_view_state_p->pre_ol_ct; i++) { - fprintf(stderr, "[%d](%d,%d) ", i, - my_mem_view_state_p->pre_disp_arr[i], + fprintf(stderr, "[%d](%d,%d) ", i, + my_mem_view_state_p->pre_disp_arr[i], my_mem_view_state_p->pre_blk_arr[i]); if (i % 5 == 0 && i != 0) fprintf(stderr, "\n"); @@ -2077,7 +2077,7 @@ int ADIOI_Build_client_req(ADIO_File fd, #ifdef AGGREGATION_PROFILE MPE_Log_event (5019, 0, NULL); -#endif +#endif return 0; } diff --git a/ompi/mca/io/romio314/romio/adio/common/ad_coll_exch_new.c b/ompi/mca/io/romio314/romio/adio/common/ad_coll_exch_new.c index d121589d69e..abe7d74465c 100644 --- a/ompi/mca/io/romio314/romio/adio/common/ad_coll_exch_new.c +++ b/ompi/mca/io/romio314/romio/adio/common/ad_coll_exch_new.c @@ -1,7 +1,7 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */ -/* +/* * - * Copyright (C) 1997 University of Chicago. + * Copyright (C) 1997 University of Chicago. * See COPYRIGHT notice in top-level directory. */ @@ -42,7 +42,7 @@ void ADIOI_Print_flatlist_node(ADIOI_Flatlist_node *flatlist_node_p) fprintf(stderr, "print flatlist node of NULL ptr\n"); return; } - fprintf(stderr, "print flatlist node count = %d (idx,blocklen)\n", + fprintf(stderr, "print flatlist node count = %d (idx,blocklen)\n", (int)flatlist_node_p->count); for (i = 0; i < flatlist_node_p->count; i++) { @@ -64,7 +64,7 @@ ADIOI_Flatlist_node * ADIOI_Add_contig_flattened(MPI_Datatype contig_type) { MPI_Count contig_type_sz = -1; ADIOI_Flatlist_node *flat_node_p = ADIOI_Flatlist; - + /* Add contig type to the end of the list if it doesn't already * exist. */ while (flat_node_p->next) @@ -88,7 +88,7 @@ ADIOI_Flatlist_node * ADIOI_Add_contig_flattened(MPI_Datatype contig_type) { fprintf(stderr, "ADIOI_Flatlist_node: malloc blocklens failed\n"); } - if ((flat_node_p->indices = (ADIO_Offset *) + if ((flat_node_p->indices = (ADIO_Offset *) ADIOI_Malloc(sizeof(ADIO_Offset))) == NULL) { fprintf(stderr, "ADIOI_Flatlist_node: malloc indices failed\n"); @@ -167,7 +167,7 @@ void ADIOI_Exch_file_views(int myrank, int nprocs, int file_ptr_type, else { flat_file_p = ADIOI_Flatlist; while (flat_file_p->type != fd->filetype) - flat_file_p = flat_file_p->next; + flat_file_p = flat_file_p->next; } disp_off_sz_ext_typesz[0] = fd->fp_ind; @@ -186,14 +186,14 @@ void ADIOI_Exch_file_views(int myrank, int nprocs, int file_ptr_type, /* only aggregators receive data */ if (fd->is_agg) { - recv_count_arr = ADIOI_Calloc(nprocs, + recv_count_arr = ADIOI_Calloc(nprocs, sizeof(amount_and_extra_data_t)); recv_req_arr = ADIOI_Malloc (nprocs * sizeof(MPI_Request)); for (i=0; i < nprocs; i++) MPI_Irecv (&recv_count_arr[i], sizeof(amount_and_extra_data_t), MPI_BYTE, i, COUNT_EXCH, fd->comm, &recv_req_arr[i]); } - + /* only send data to aggregators */ send_req_arr = ADIOI_Calloc (fd->hints->cb_nodes, sizeof(MPI_Request)); for (i=0; i < fd->hints->cb_nodes; i++) { @@ -210,7 +210,7 @@ void ADIOI_Exch_file_views(int myrank, int nprocs, int file_ptr_type, } } - + /* Every client has to build mem and file view_states for each aggregator. * We initialize their values here. and we also initialize * send_count_arr */ @@ -241,7 +241,7 @@ void ADIOI_Exch_file_views(int myrank, int nprocs, int file_ptr_type, 1, &(my_mem_view_state_arr[tmp_agg_idx]), REAL_OFF); - + memset(&(agg_file_view_state_arr[tmp_agg_idx]), 0, sizeof(view_state)); agg_file_view_state_arr[tmp_agg_idx].fp_ind = disp_off_sz_ext_typesz[0]; @@ -283,10 +283,10 @@ void ADIOI_Exch_file_views(int myrank, int nprocs, int file_ptr_type, fprintf(stderr, "my own flattened filetype: "); ADIOI_Print_flatlist_node(flat_file_p); #endif - + if (fd->hints->cb_alltoall != ADIOI_HINT_DISABLE) { ret = MPI_Alltoall(send_count_arr, sizeof(amount_and_extra_data_t), - MPI_BYTE, + MPI_BYTE, recv_count_arr, sizeof(amount_and_extra_data_t), MPI_BYTE, fd->comm); if (ret != MPI_SUCCESS) @@ -353,22 +353,22 @@ void ADIOI_Exch_file_views(int myrank, int nprocs, int file_ptr_type, if (fd->is_agg) { if (recv_count_arr[i].count > 0) { - if ((client_file_view_state_arr[i].flat_type_p = + if ((client_file_view_state_arr[i].flat_type_p = (ADIOI_Flatlist_node *) ADIOI_Malloc( sizeof(ADIOI_Flatlist_node))) == NULL) { fprintf(stderr, "ADIOI_Exchange_file_views: malloc " "flat_type_p failed\n"); } - client_file_view_state_arr[i].flat_type_p->count = + client_file_view_state_arr[i].flat_type_p->count = recv_count_arr[i].count; - client_file_view_state_arr[i].flat_type_p->indices = - (ADIO_Offset *) ADIOI_Calloc(recv_count_arr[i].count, + client_file_view_state_arr[i].flat_type_p->indices = + (ADIO_Offset *) ADIOI_Calloc(recv_count_arr[i].count, sizeof(ADIO_Offset)); client_file_view_state_arr[i].flat_type_p->blocklens = - (ADIO_Offset *) ADIOI_Calloc(recv_count_arr[i].count, + (ADIO_Offset *) ADIOI_Calloc(recv_count_arr[i].count, sizeof(ADIO_Offset)); - + /* Copy the extra data out of the stuff we Alltoall'd */ memcpy (&client_file_view_state_arr[i].fp_ind, &recv_count_arr[i].fp_ind, @@ -379,11 +379,11 @@ void ADIOI_Exch_file_views(int myrank, int nprocs, int file_ptr_type, } } - /* Since ADIOI_Calloc may do other things we add the +1 + /* Since ADIOI_Calloc may do other things we add the +1 * to avoid a 0-size malloc */ send_req_arr = (MPI_Request *) ADIOI_Calloc(2*(send_req_arr_sz)+1, sizeof(MPI_Request)); - + j = 0; if (recv_req_arr_sz > 0) { assert (fd->is_agg); @@ -392,7 +392,7 @@ void ADIOI_Exch_file_views(int myrank, int nprocs, int file_ptr_type, for (i = 0; i < nprocs; i++) { if (recv_count_arr[i].count > 0) { MPI_Irecv(client_file_view_state_arr[i].flat_type_p->indices, - recv_count_arr[i].count, ADIO_OFFSET, i, + recv_count_arr[i].count, ADIO_OFFSET, i, INDICES, fd->comm, &recv_req_arr[j]); j++; MPI_Irecv(client_file_view_state_arr[i].flat_type_p->blocklens, @@ -411,7 +411,7 @@ void ADIOI_Exch_file_views(int myrank, int nprocs, int file_ptr_type, send_count_arr[i].count, ADIO_OFFSET, i, INDICES, fd->comm, &send_req_arr[j]); j++; - MPI_Isend(flat_file_p->blocklens, + MPI_Isend(flat_file_p->blocklens, send_count_arr[i].count, ADIO_OFFSET, i, BLOCK_LENS, fd->comm, &send_req_arr[j]); j++; @@ -426,7 +426,7 @@ void ADIOI_Exch_file_views(int myrank, int nprocs, int file_ptr_type, fd->hints->ranklist[i], INDICES, fd->comm, &send_req_arr[j]); j++; - MPI_Isend(flat_file_p->blocklens, + MPI_Isend(flat_file_p->blocklens, send_count_arr[i].count, ADIO_OFFSET, fd->hints->ranklist[i], BLOCK_LENS, fd->comm, &send_req_arr[j]); @@ -435,9 +435,9 @@ void ADIOI_Exch_file_views(int myrank, int nprocs, int file_ptr_type, } } - /* Since ADIOI_Malloc may do other things we add the +1 - * to avoid a 0-size malloc */ - statuses = (MPI_Status *) + /* Since ADIOI_Malloc may do other things we add the +1 + * to avoid a 0-size malloc */ + statuses = (MPI_Status *) ADIOI_Malloc(1 + 2 * ADIOI_MAX(send_req_arr_sz,recv_req_arr_sz) * sizeof(MPI_Status)); @@ -479,19 +479,19 @@ void ADIOI_Exch_file_views(int myrank, int nprocs, int file_ptr_type, client_file_view_state_arr[i].sz, client_file_view_state_arr[i].ext); } - - while (fr_node_p->type != + + while (fr_node_p->type != fd->file_realm_types[fd->my_cb_nodes_index]) fr_node_p = fr_node_p->next; assert(fr_node_p != NULL); - - fprintf(stderr, "my file realm (idx=%d,st_off=%Ld) ", + + fprintf(stderr, "my file realm (idx=%d,st_off=%Ld) ", fd->my_cb_nodes_index, fd->file_realm_st_offs[fd->my_cb_nodes_index]); ADIOI_Print_flatlist_node(fr_node_p); } #endif - + #ifdef DEBUG2 if (fd->is_agg == 1) { diff --git a/ompi/mca/io/romio314/romio/adio/common/ad_darray.c b/ompi/mca/io/romio314/romio/adio/common/ad_darray.c index 23715c412f3..0437db828ef 100644 --- a/ompi/mca/io/romio314/romio/adio/common/ad_darray.c +++ b/ompi/mca/io/romio314/romio/adio/common/ad_darray.c @@ -1,7 +1,7 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */ -/* +/* * - * Copyright (C) 1997 University of Chicago. + * Copyright (C) 1997 University of Chicago. * See COPYRIGHT notice in top-level directory. */ @@ -18,11 +18,11 @@ static int MPIOI_Type_cyclic(int *array_of_gsizes, int dim, int ndims, int nproc MPI_Aint *st_offset); -int ADIO_Type_create_darray(int size, int rank, int ndims, - int *array_of_gsizes, int *array_of_distribs, - int *array_of_dargs, int *array_of_psizes, - int order, MPI_Datatype oldtype, - MPI_Datatype *newtype) +int ADIO_Type_create_darray(int size, int rank, int ndims, + int *array_of_gsizes, int *array_of_distribs, + int *array_of_dargs, int *array_of_psizes, + int order, MPI_Datatype oldtype, + MPI_Datatype *newtype) { MPI_Datatype type_old, type_new=MPI_DATATYPE_NULL, types[3]; int procs, tmp_rank, i, tmp_size, blklens[3], *coords; @@ -52,12 +52,12 @@ int ADIO_Type_create_darray(int size, int rank, int ndims, MPIOI_Type_block(array_of_gsizes, i, ndims, array_of_psizes[i], coords[i], array_of_dargs[i], - order, orig_extent, + order, orig_extent, type_old, &type_new, - st_offsets+i); + st_offsets+i); break; case MPI_DISTRIBUTE_CYCLIC: - MPIOI_Type_cyclic(array_of_gsizes, i, ndims, + MPIOI_Type_cyclic(array_of_gsizes, i, ndims, array_of_psizes[i], coords[i], array_of_dargs[i], order, orig_extent, type_old, @@ -65,11 +65,11 @@ int ADIO_Type_create_darray(int size, int rank, int ndims, break; case MPI_DISTRIBUTE_NONE: /* treat it as a block distribution on 1 process */ - MPIOI_Type_block(array_of_gsizes, i, ndims, 1, 0, + MPIOI_Type_block(array_of_gsizes, i, ndims, 1, 0, MPI_DISTRIBUTE_DFLT_DARG, order, - orig_extent, + orig_extent, type_old, &type_new, - st_offsets+i); + st_offsets+i); break; } if (i) MPI_Type_free(&type_old); @@ -94,20 +94,20 @@ int ADIO_Type_create_darray(int size, int rank, int ndims, MPIOI_Type_block(array_of_gsizes, i, ndims, array_of_psizes[i], coords[i], array_of_dargs[i], order, orig_extent, type_old, &type_new, - st_offsets+i); + st_offsets+i); break; case MPI_DISTRIBUTE_CYCLIC: - MPIOI_Type_cyclic(array_of_gsizes, i, ndims, + MPIOI_Type_cyclic(array_of_gsizes, i, ndims, array_of_psizes[i], coords[i], - array_of_dargs[i], order, + array_of_dargs[i], order, orig_extent, type_old, &type_new, st_offsets+i); break; case MPI_DISTRIBUTE_NONE: /* treat it as a block distribution on 1 process */ MPIOI_Type_block(array_of_gsizes, i, ndims, array_of_psizes[i], - coords[i], MPI_DISTRIBUTE_DFLT_DARG, order, orig_extent, - type_old, &type_new, st_offsets+i); + coords[i], MPI_DISTRIBUTE_DFLT_DARG, order, orig_extent, + type_old, &type_new, st_offsets+i); break; } if (i != ndims-1) MPI_Type_free(&type_old); @@ -127,13 +127,13 @@ int ADIO_Type_create_darray(int size, int rank, int ndims, disps[2] = orig_extent; for (i=0; idim; i--) stride *= (MPI_Aint)array_of_gsizes[i]; @@ -211,7 +211,7 @@ static int MPIOI_Type_block(int *array_of_gsizes, int dim, int ndims, int nprocs static int MPIOI_Type_cyclic(int *array_of_gsizes, int dim, int ndims, int nprocs, int rank, int darg, int order, MPI_Aint orig_extent, MPI_Datatype type_old, MPI_Datatype *type_new, - MPI_Aint *st_offset) + MPI_Aint *st_offset) { /* nprocs = no. of processes in dimension dim of grid rank = coordinate of this process in dimension dim */ @@ -227,7 +227,7 @@ static int MPIOI_Type_cyclic(int *array_of_gsizes, int dim, int ndims, int nproc return MPI_ERR_ARG; } /* --END ERROR HANDLING-- */ - + st_index = rank*blksize; end_index = array_of_gsizes[dim] - 1; @@ -240,7 +240,7 @@ static int MPIOI_Type_cyclic(int *array_of_gsizes, int dim, int ndims, int nproc count = local_size/blksize; rem = local_size % blksize; - + stride = (MPI_Aint)nprocs*(MPI_Aint)blksize*orig_extent; if (order == MPI_ORDER_FORTRAN) for (i=0; i #endif -/* Workaround for incomplete set of definitions if __REDIRECT is not +/* Workaround for incomplete set of definitions if __REDIRECT is not defined and large file support is used in aio.h */ #if !defined(__REDIRECT) && defined(__USE_FILE_OFFSET64) #define aiocb aiocb64 @@ -40,7 +40,7 @@ * to complete. */ int ADIOI_GEN_IODone(ADIO_Request *request, ADIO_Status *status, - int *error_code) + int *error_code) { return 0; diff --git a/ompi/mca/io/romio314/romio/adio/common/ad_done_fake.c b/ompi/mca/io/romio314/romio/adio/common/ad_done_fake.c index 59da5755e0b..3fffa4555b9 100644 --- a/ompi/mca/io/romio314/romio/adio/common/ad_done_fake.c +++ b/ompi/mca/io/romio314/romio/adio/common/ad_done_fake.c @@ -1,7 +1,7 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */ -/* +/* * - * Copyright (C) 2004 University of Chicago. + * Copyright (C) 2004 University of Chicago. * See COPYRIGHT notice in top-level directory. */ diff --git a/ompi/mca/io/romio314/romio/adio/common/ad_end.c b/ompi/mca/io/romio314/romio/adio/common/ad_end.c index 066c65c27ed..00725f5f008 100644 --- a/ompi/mca/io/romio314/romio/adio/common/ad_end.c +++ b/ompi/mca/io/romio314/romio/adio/common/ad_end.c @@ -1,6 +1,6 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */ -/* - * Copyright (C) 1997 University of Chicago. +/* + * Copyright (C) 1997 University of Chicago. * See COPYRIGHT notice in top-level directory. */ @@ -11,7 +11,7 @@ void ADIO_End(int *error_code) { ADIOI_Flatlist_node *curr, *next; ADIOI_Datarep *datarep, *datarep_next; - + /* FPRINTF(stderr, "reached end\n"); */ /* if a default errhandler was set on MPI_FILE_NULL then we need to ensure diff --git a/ompi/mca/io/romio314/romio/adio/common/ad_fcntl.c b/ompi/mca/io/romio314/romio/adio/common/ad_fcntl.c index 62067573dd1..8a4d07352cf 100644 --- a/ompi/mca/io/romio314/romio/adio/common/ad_fcntl.c +++ b/ompi/mca/io/romio314/romio/adio/common/ad_fcntl.c @@ -1,7 +1,7 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */ -/* +/* * - * Copyright (C) 2005 University of Chicago. + * Copyright (C) 2005 University of Chicago. * See COPYRIGHT notice in top-level directory. */ diff --git a/ompi/mca/io/romio314/romio/adio/common/ad_flush.c b/ompi/mca/io/romio314/romio/adio/common/ad_flush.c index 3ace6778bf4..76f5da7abcc 100644 --- a/ompi/mca/io/romio314/romio/adio/common/ad_flush.c +++ b/ompi/mca/io/romio314/romio/adio/common/ad_flush.c @@ -1,7 +1,7 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */ -/* +/* * - * Copyright (C) 1997 University of Chicago. + * Copyright (C) 1997 University of Chicago. * See COPYRIGHT notice in top-level directory. */ diff --git a/ompi/mca/io/romio314/romio/adio/common/ad_fstype.c b/ompi/mca/io/romio314/romio/adio/common/ad_fstype.c index c89b560e541..a78f77c9521 100644 --- a/ompi/mca/io/romio314/romio/adio/common/ad_fstype.c +++ b/ompi/mca/io/romio314/romio/adio/common/ad_fstype.c @@ -1,6 +1,6 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */ -/* - * Copyright (C) 1997 University of Chicago. +/* + * Copyright (C) 1997 University of Chicago. * See COPYRIGHT notice in top-level directory. */ @@ -111,12 +111,12 @@ from the compiler */ #if defined(ROMIO_HAVE_STRUCT_STATVFS_WITH_F_BASETYPE) || \ defined(HAVE_STRUCT_STATFS) || \ - defined(ROMIO_HAVE_STRUCT_STAT_WITH_ST_FSTYPE) + defined(ROMIO_HAVE_STRUCT_STAT_WITH_ST_FSTYPE) #ifndef ROMIO_NTFS #define ROMIO_NEEDS_ADIOPARENTDIR static void ADIO_FileSysType_parentdir(const char *filename, char **dirnamep); #endif -#endif +#endif static void ADIO_FileSysType_prefix(const char *filename, int *fstype, int *error_code); static void ADIO_FileSysType_fncall(const char *filename, int *fstype, @@ -141,13 +141,13 @@ Output Parameters: here. We assume that S_ISLNK is *always* defined as a macro. If that is not universally true, then add a test to the romio configure that trys to link a program that references S_ISLNK */ -#if !defined(S_ISLNK) +#if !defined(S_ISLNK) # if defined(S_IFLNK) /* Check for the link bit */ # define S_ISLNK(mode) ((mode) & S_IFLNK) # else /* no way to check if it is a link, so say false */ -# define S_ISLNK(mode) 0 +# define S_ISLNK(mode) 0 # endif #endif /* !(S_ISLNK) */ @@ -161,7 +161,7 @@ static void ADIO_FileSysType_parentdir(const char *filename, char **dirnamep) int err; char *dir = NULL, *slash; struct stat statbuf; - + err = lstat(filename, &statbuf); if (err || (!S_ISLNK(statbuf.st_mode))) { @@ -221,8 +221,8 @@ Output Parameters: MPI_SUCCESS is stored in the location pointed to by error_code on success. - This function is used by MPI_File_open() and MPI_File_delete() to determine - file system type. Most other functions use the type which is stored when the + This function is used by MPI_File_open() and MPI_File_delete() to determine + file system type. Most other functions use the type which is stored when the file is opened. */ static void ADIO_FileSysType_fncall(const char *filename, int *fstype, int *error_code) @@ -346,6 +346,8 @@ static void ADIO_FileSysType_fncall(const char *filename, int *fstype, int *erro # endif +# ifdef ROMIO_HAVE_STRUCT_STATFS_WITH_F_TYPE + #ifdef ROMIO_GPFS if (fsbuf.f_type == GPFS_SUPER_MAGIC) { *fstype = ADIO_GPFS; @@ -413,6 +415,8 @@ static void ADIO_FileSysType_fncall(const char *filename, int *fstype, int *erro } # endif +# endif /*ROMIO_HAVE_STRUCT_STATFS_WITH_F_TYPE */ + # ifdef ROMIO_UFS /* if UFS support is enabled, default to that */ *fstype = ADIO_UFS; @@ -444,7 +448,7 @@ static void ADIO_FileSysType_fncall(const char *filename, int *fstype, int *erro if(*error_code != MPI_SUCCESS) return; } } - + if (err) { /* --BEGIN ERROR HANDLING-- */ *error_code = MPIO_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE, @@ -499,7 +503,7 @@ static void ADIO_FileSysType_fncall_scalable(MPI_Comm comm, const char *filename /* - ADIO_FileSysType_prefix - determines file system type for a file using + ADIO_FileSysType_prefix - determines file system type for a file using a prefix on the file name. upper layer should have already determined that a prefix is present. @@ -552,18 +556,18 @@ static void ADIO_FileSysType_prefix(const char *filename, int *fstype, int *erro else if (!strncmp(filename, "zoidfs:", 7)|| !strncmp(filename, "ZOIDFS:", 7)) { *fstype = ADIO_ZOIDFS; - } - else if (!strncmp(filename, "testfs:", 7) + } + else if (!strncmp(filename, "testfs:", 7) || !strncmp(filename, "TESTFS:", 7)) { *fstype = ADIO_TESTFS; } - else if (!strncmp(filename, "ftp:", 4) + else if (!strncmp(filename, "ftp:", 4) || !strncmp(filename, "gsiftp:", 7)) { *fstype = ADIO_GRIDFTP; } - else if (!strncmp(filename, "lustre:", 7) + else if (!strncmp(filename, "lustre:", 7) || !strncmp(filename, "LUSTRE:", 7)) { *fstype = ADIO_LUSTRE; @@ -599,7 +603,7 @@ Output Parameters: . error_code - (pointer to) int holding error code Notes: -This code used to be in MPI_File_open(), but it has been moved into here in +This code used to be in MPI_File_open(), but it has been moved into here in order to clean things up. The goal is to separate all this "did we compile for this fs type" code from the MPI layer and also to introduce the ADIOI_Fns tables in a reasonable way. -- Rob, 06/06/2001 @@ -625,13 +629,13 @@ void ADIO_ResolveFileType(MPI_Comm comm, const char *filename, int *fstype, /* Optimization: we can reduce the 'storm of stats' that result from * thousands of mpi processes determinig file type this way. Let us * have just one process stat the file and broadcast the result to - * everyone else. + * everyone else. * - Note that we will not catch cases like * http://www.mcs.anl.gov/web-mail-archive/lists/mpich-discuss/2007/08/msg00042.html * (edit: now http://lists.mcs.anl.gov/pipermail/mpich-discuss/2007-August/002648.html) * * where file systems are not mounted or available on other processes, - * but we'll catch those a few functions later in ADIO_Open + * but we'll catch those a few functions later in ADIO_Open * - Note that if we have NFS enabled, we might have a situation where, * for example, /home/user/data.out is UFS on one process but NFS on * others, so we won't perform this optimization if NFS is enabled. @@ -674,7 +678,7 @@ void ADIO_ResolveFileType(MPI_Comm comm, const char *filename, int *fstype, } } else { - /* prefix specified; just match via prefix and assume everyone got + /* prefix specified; just match via prefix and assume everyone got * the same thing. * * perhaps we should have this code go through the allreduce as well? @@ -846,7 +850,7 @@ void ADIO_ResolveFileType(MPI_Comm comm, const char *filename, int *fstype, #endif } if (file_system == ADIO_LUSTRE) { -#ifndef ROMIO_LUSTRE +#ifndef ROMIO_LUSTRE *error_code = MPIO_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE, myname, __LINE__, MPI_ERR_IO, "**iofstypeunsupported", 0); return; #else @@ -867,6 +871,6 @@ void ADIO_ResolveFileType(MPI_Comm comm, const char *filename, int *fstype, *fstype = file_system; return; } -/* - * vim: ts=8 sts=4 sw=4 noexpandtab +/* + * vim: ts=8 sts=4 sw=4 noexpandtab */ diff --git a/ompi/mca/io/romio314/romio/adio/common/ad_get_sh_fp.c b/ompi/mca/io/romio314/romio/adio/common/ad_get_sh_fp.c index 12133277903..3c933f9e876 100644 --- a/ompi/mca/io/romio314/romio/adio/common/ad_get_sh_fp.c +++ b/ompi/mca/io/romio314/romio/adio/common/ad_get_sh_fp.c @@ -1,6 +1,6 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */ -/* - * Copyright (C) 1997 University of Chicago. +/* + * Copyright (C) 1997 University of Chicago. * See COPYRIGHT notice in top-level directory. */ @@ -37,25 +37,25 @@ void ADIO_Get_shared_fp(ADIO_File fd, ADIO_Offset incr, ADIO_Offset *shared_fp, if (fd->shared_fp_fd == ADIO_FILE_NULL) { MPI_Comm_dup(MPI_COMM_SELF, &dupcommself); - fd->shared_fp_fd = ADIO_Open(MPI_COMM_SELF, dupcommself, - fd->shared_fp_fname, + fd->shared_fp_fd = ADIO_Open(MPI_COMM_SELF, dupcommself, + fd->shared_fp_fname, fd->file_system, fd->fns, - ADIO_CREATE | ADIO_RDWR | ADIO_DELETE_ON_CLOSE, + ADIO_CREATE | ADIO_RDWR | ADIO_DELETE_ON_CLOSE, 0, MPI_BYTE, MPI_BYTE, - MPI_INFO_NULL, + MPI_INFO_NULL, ADIO_PERM_NULL, error_code); if (*error_code != MPI_SUCCESS) return; ADIOI_WRITE_LOCK(fd->shared_fp_fd, 0, SEEK_SET, sizeof(ADIO_Offset)); - ADIO_ReadContig(fd->shared_fp_fd, shared_fp, sizeof(ADIO_Offset), + ADIO_ReadContig(fd->shared_fp_fd, shared_fp, sizeof(ADIO_Offset), MPI_BYTE, ADIO_EXPLICIT_OFFSET, 0, &status, error_code); /* if the file is empty, the above function may return error - (reading beyond end of file). In that case, shared_fp = 0, + (reading beyond end of file). In that case, shared_fp = 0, set above, is the correct value. */ } else { ADIOI_WRITE_LOCK(fd->shared_fp_fd, 0, SEEK_SET, sizeof(ADIO_Offset)); - ADIO_ReadContig(fd->shared_fp_fd, shared_fp, sizeof(ADIO_Offset), + ADIO_ReadContig(fd->shared_fp_fd, shared_fp, sizeof(ADIO_Offset), MPI_BYTE, ADIO_EXPLICIT_OFFSET, 0, &status, error_code); if (*error_code != MPI_SUCCESS) { ADIOI_UNLOCK(fd->shared_fp_fd, 0, SEEK_SET, sizeof(ADIO_Offset)); @@ -67,7 +67,7 @@ void ADIO_Get_shared_fp(ADIO_File fd, ADIO_Offset incr, ADIO_Offset *shared_fp, new_fp = *shared_fp + incr; - ADIO_WriteContig(fd->shared_fp_fd, &new_fp, sizeof(ADIO_Offset), + ADIO_WriteContig(fd->shared_fp_fd, &new_fp, sizeof(ADIO_Offset), MPI_BYTE, ADIO_EXPLICIT_OFFSET, 0, &status, error_code); done: ADIOI_UNLOCK(fd->shared_fp_fd, 0, SEEK_SET, sizeof(ADIO_Offset)); diff --git a/ompi/mca/io/romio314/romio/adio/common/ad_hints.c b/ompi/mca/io/romio314/romio/adio/common/ad_hints.c index 1bc74f1a555..1d226e6c7df 100644 --- a/ompi/mca/io/romio314/romio/adio/common/ad_hints.c +++ b/ompi/mca/io/romio314/romio/adio/common/ad_hints.c @@ -1,7 +1,7 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */ -/* +/* * - * Copyright (C) 1997 University of Chicago. + * Copyright (C) 1997 University of Chicago. * See COPYRIGHT notice in top-level directory. */ @@ -11,7 +11,7 @@ void ADIOI_GEN_SetInfo(ADIO_File fd, MPI_Info users_info, int *error_code) { -/* if fd->info is null, create a new info object. +/* if fd->info is null, create a new info object. Initialize fd->info to default values. Initialize fd->hints to default values. Examine the info object passed by the user. If it contains values that @@ -58,15 +58,15 @@ void ADIOI_GEN_SetInfo(ADIO_File fd, MPI_Info users_info, int *error_code) if (!fd->hints->initialized) { /* buffer size for collective I/O */ - ADIOI_Info_set(info, "cb_buffer_size", ADIOI_CB_BUFFER_SIZE_DFLT); + ADIOI_Info_set(info, "cb_buffer_size", ADIOI_CB_BUFFER_SIZE_DFLT); fd->hints->cb_buffer_size = atoi(ADIOI_CB_BUFFER_SIZE_DFLT); /* default is to let romio automatically decide when to use * collective buffering */ - ADIOI_Info_set(info, "romio_cb_read", "automatic"); + ADIOI_Info_set(info, "romio_cb_read", "automatic"); fd->hints->cb_read = ADIOI_HINT_AUTO; - ADIOI_Info_set(info, "romio_cb_write", "automatic"); + ADIOI_Info_set(info, "romio_cb_write", "automatic"); fd->hints->cb_write = ADIOI_HINT_AUTO; fd->hints->cb_config_list = NULL; @@ -83,7 +83,7 @@ void ADIOI_GEN_SetInfo(ADIO_File fd, MPI_Info users_info, int *error_code) /* hint instructing the use of persistent file realms */ ADIOI_Info_set(info, "romio_cb_pfr", "disable"); fd->hints->cb_pfr = ADIOI_HINT_DISABLE; - + /* hint guiding the assignment of persistent file realms */ ADIOI_Info_set(info, "romio_cb_fr_types", "aar"); fd->hints->cb_fr_type = ADIOI_FR_AAR; @@ -115,9 +115,9 @@ void ADIOI_GEN_SetInfo(ADIO_File fd, MPI_Info users_info, int *error_code) /* default is to let romio automatically decide when to use data * sieving */ - ADIOI_Info_set(info, "romio_ds_read", "automatic"); + ADIOI_Info_set(info, "romio_ds_read", "automatic"); fd->hints->ds_read = ADIOI_HINT_AUTO; - ADIOI_Info_set(info, "romio_ds_write", "automatic"); + ADIOI_Info_set(info, "romio_ds_write", "automatic"); fd->hints->ds_write = ADIOI_HINT_AUTO; /* still to do: tune this a bit for a variety of file systems. there's @@ -137,18 +137,18 @@ void ADIOI_GEN_SetInfo(ADIO_File fd, MPI_Info users_info, int *error_code) /* add in user's info if supplied */ if (users_info != MPI_INFO_NULL) { - ADIOI_Info_check_and_install_int(fd, users_info, "cb_buffer_size", + ADIOI_Info_check_and_install_int(fd, users_info, "cb_buffer_size", &(fd->hints->cb_buffer_size), myname, error_code); /* aligning file realms to certain sizes (e.g. stripe sizes) * may benefit I/O performance */ - ADIOI_Info_check_and_install_int(fd, users_info, "romio_cb_fr_alignment", + ADIOI_Info_check_and_install_int(fd, users_info, "romio_cb_fr_alignment", &(fd->hints->cb_fr_alignment), myname, error_code); /* for collective I/O, try to be smarter about when to do data sieving * using a specific threshold for the datatype size/extent * (percentage 0-100%) */ - ADIOI_Info_check_and_install_int(fd, users_info, "romio_cb_ds_threshold", + ADIOI_Info_check_and_install_int(fd, users_info, "romio_cb_ds_threshold", &(fd->hints->cb_ds_threshold), myname, error_code); ADIOI_Info_check_and_install_enabled(fd, users_info, "romio_cb_alltoall", @@ -178,7 +178,7 @@ void ADIOI_GEN_SetInfo(ADIO_File fd, MPI_Info users_info, int *error_code) ADIOI_Info_check_and_install_enabled(fd, users_info, "romio_cb_pfr", &(fd->hints->cb_pfr), myname, error_code); - + /* file realm assignment types ADIOI_FR_AAR(0), ADIOI_FR_FSZ(-1), ADIOI_FR_USR_REALMS(-2), all others specify a regular fr size in bytes. probably not the best way... */ @@ -196,7 +196,7 @@ void ADIOI_GEN_SetInfo(ADIO_File fd, MPI_Info users_info, int *error_code) ADIOI_Info_set(info, "romio_cb_read", "enable"); fd->hints->cb_read = 1; fd->hints->cb_write = 1; - } + } /* new hints for enabling/disabling data sieving on * reads/writes */ @@ -244,7 +244,7 @@ void ADIOI_GEN_SetInfo(ADIO_File fd, MPI_Info users_info, int *error_code) /* Now we use striping unit in common code so we should process hints for it. */ - ADIOI_Info_check_and_install_int(fd, users_info, "striping_unit", + ADIOI_Info_check_and_install_int(fd, users_info, "striping_unit", &(fd->hints->striping_unit), myname, error_code); } diff --git a/ompi/mca/io/romio314/romio/adio/common/ad_init.c b/ompi/mca/io/romio314/romio/adio/common/ad_init.c index 88e75a5251f..b650ad78758 100644 --- a/ompi/mca/io/romio314/romio/adio/common/ad_init.c +++ b/ompi/mca/io/romio314/romio/adio/common/ad_init.c @@ -1,7 +1,7 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */ -/* +/* * - * Copyright (C) 1997 University of Chicago. + * Copyright (C) 1997 University of Chicago. * See COPYRIGHT notice in top-level directory. */ @@ -55,7 +55,7 @@ void ADIO_Init(int *argc, char ***argv, int *error_code) ADIOI_UNREFERENCED_ARG(argc); ADIOI_UNREFERENCED_ARG(argv); - + #ifdef ROMIO_INSIDE_MPICH MPIR_Ext_init(); #endif @@ -69,11 +69,11 @@ void ADIO_Init(int *argc, char ***argv, int *error_code) #if defined(ROMIO_XFS) || defined(ROMIO_LUSTRE) c = getenv("MPIO_DIRECT_READ"); - if (c && (!strcmp(c, "true") || !strcmp(c, "TRUE"))) + if (c && (!strcmp(c, "true") || !strcmp(c, "TRUE"))) ADIOI_Direct_read = 1; else ADIOI_Direct_read = 0; c = getenv("MPIO_DIRECT_WRITE"); - if (c && (!strcmp(c, "true") || !strcmp(c, "TRUE"))) + if (c && (!strcmp(c, "true") || !strcmp(c, "TRUE"))) ADIOI_Direct_write = 1; else ADIOI_Direct_write = 0; #endif @@ -93,7 +93,7 @@ void ADIO_Init(int *argc, char ***argv, int *error_code) MPE_Log_get_state_eventIDs( &ADIOI_MPE_unlock_a, &ADIOI_MPE_unlock_b ); MPE_Log_get_state_eventIDs( &ADIOI_MPE_postwrite_a, &ADIOI_MPE_postwrite_b ); - MPE_Log_get_state_eventIDs( &ADIOI_MPE_openinternal_a, + MPE_Log_get_state_eventIDs( &ADIOI_MPE_openinternal_a, &ADIOI_MPE_openinternal_b); MPE_Log_get_state_eventIDs( &ADIOI_MPE_stat_a, &ADIOI_MPE_stat_b); MPE_Log_get_state_eventIDs( &ADIOI_MPE_iread_a, &ADIOI_MPE_iread_b); diff --git a/ompi/mca/io/romio314/romio/adio/common/ad_io_coll.c b/ompi/mca/io/romio314/romio/adio/common/ad_io_coll.c index 7e980e1e1d6..22b2da473de 100644 --- a/ompi/mca/io/romio314/romio/adio/common/ad_io_coll.c +++ b/ompi/mca/io/romio314/romio/adio/common/ad_io_coll.c @@ -1,6 +1,6 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */ -/* - * Copyright (C) 2008 University of Chicago. +/* + * Copyright (C) 2008 University of Chicago. * See COPYRIGHT notice in top-level directory. */ @@ -31,7 +31,7 @@ static void post_aggregator_comm (MPI_Comm comm, int rw_type, int nproc, MPI_Request **requests, int *aggregators_client_count_p); -static void post_client_comm (ADIO_File fd, int rw_type, +static void post_client_comm (ADIO_File fd, int rw_type, int agg_rank, void *buf, MPI_Datatype agg_comm_dtype, int agg_alltoallw_count, @@ -141,7 +141,7 @@ void ADIOI_IOStridedColl (ADIO_File fd, void *buf, int count, int rdwr, interleave_count++; /* This is a rudimentary check for interleaving, but should * suffice for the moment. */ - + min_st_offset = ADIOI_MIN(all_st_end_offsets[i*2], min_st_offset); max_end_offset = ADIOI_MAX(all_st_end_offsets[i*2+1], @@ -249,11 +249,11 @@ void ADIOI_IOStridedColl (ADIO_File fd, void *buf, int count, int rdwr, } } #ifdef USE_PRE_REQ - else + else { /* Example use of ADIOI_Build_client_pre_req. to an * appropriate section */ - + for (i = 0; i < fd->hints->cb_nodes; i++) { agg_rank = fd->hints->ranklist[(i+myrank)%fd->hints->cb_nodes]; @@ -264,7 +264,7 @@ void ADIOI_IOStridedColl (ADIO_File fd, void *buf, int count, int rdwr, fd, agg_rank, (i+myrank)%fd->hints->cb_nodes, &(my_mem_view_state_arr[agg_rank]), &(agg_file_view_state_arr[agg_rank]), - 2*1024*1024, + 2*1024*1024, 64*1024); #ifdef AGGREGATION_PROFILE MPE_Log_event (5041, 0, NULL); @@ -327,7 +327,7 @@ void ADIOI_IOStridedColl (ADIO_File fd, void *buf, int count, int rdwr, (i+myrank)%fd->hints->cb_nodes, &(my_mem_view_state_arr[agg_rank]), &(agg_file_view_state_arr[agg_rank]), - agg_comm_sz_arr[agg_rank], + agg_comm_sz_arr[agg_rank], &(agg_comm_dtype_arr[agg_rank])); #ifdef AGGREGATION_PROFILE @@ -359,7 +359,7 @@ void ADIOI_IOStridedColl (ADIO_File fd, void *buf, int count, int rdwr, fprintf (stderr, "expecting from [agg](disp,size,cnt)="); for (i=0; i < nprocs; i++) { MPI_Type_size_x (agg_comm_dtype_arr[i], &size); - fprintf (stderr, "[%d](%d,%d,%d)", i, alltoallw_disps[i], + fprintf (stderr, "[%d](%d,%d,%d)", i, alltoallw_disps[i], size, agg_alltoallw_counts[i]); if (i != nprocs - 1) fprintf(stderr, ","); @@ -372,8 +372,8 @@ void ADIOI_IOStridedColl (ADIO_File fd, void *buf, int count, int rdwr, MPI_Type_size_x (client_comm_dtype_arr[i], &size); else size = -1; - - fprintf (stderr, "[%d](%d,%d,%d)", i, alltoallw_disps[i], + + fprintf (stderr, "[%d](%d,%d,%d)", i, alltoallw_disps[i], size, client_alltoallw_counts[i]); if (i != nprocs - 1) fprintf(stderr, ","); @@ -434,7 +434,7 @@ void ADIOI_IOStridedColl (ADIO_File fd, void *buf, int count, int rdwr, fprintf (stderr, "sending to [agg](disp,size,cnt)="); for (i=0; i < nprocs; i++) { MPI_Type_size_x (agg_comm_dtype_arr[i], &size); - fprintf (stderr, "[%d](%d,%d,%d)", i, alltoallw_disps[i], + fprintf (stderr, "[%d](%d,%d,%d)", i, alltoallw_disps[i], size, agg_alltoallw_counts[i]); if (i != nprocs - 1) fprintf(stderr, ","); @@ -446,8 +446,8 @@ void ADIOI_IOStridedColl (ADIO_File fd, void *buf, int count, int rdwr, MPI_Type_size_x (client_comm_dtype_arr[i], &size); else size = -1; - - fprintf (stderr, "[%d](%d,%d,%d)", i, alltoallw_disps[i], + + fprintf (stderr, "[%d](%d,%d,%d)", i, alltoallw_disps[i], size, client_alltoallw_counts[i]); if (i != nprocs - 1) fprintf(stderr, ","); @@ -458,7 +458,7 @@ void ADIOI_IOStridedColl (ADIO_File fd, void *buf, int count, int rdwr, #ifdef DEBUG fprintf (stderr, "buffered_io_size = %lld\n", buffered_io_size); #endif - + if (clients_agg_count) { client_comm_statuses = ADIOI_Malloc(clients_agg_count * sizeof(MPI_Status)); @@ -484,7 +484,7 @@ void ADIOI_IOStridedColl (ADIO_File fd, void *buf, int count, int rdwr, /* make sure we actually have the data to write out */ agg_comm_statuses = (MPI_Status *) ADIOI_Malloc (aggs_client_count*sizeof(MPI_Status)); - + MPI_Waitall (aggs_client_count, agg_comm_requests, agg_comm_statuses); #ifdef AGGREGATION_PROFILE @@ -598,7 +598,7 @@ void ADIOI_IOStridedColl (ADIO_File fd, void *buf, int count, int rdwr, fd, agg_rank, (i+myrank)%fd->hints->cb_nodes, &(my_mem_view_state_arr[agg_rank]), &(agg_file_view_state_arr[agg_rank]), - 2*1024*1024, + 2*1024*1024, 64*1024); #ifdef AGGREGATION_PROFILE MPE_Log_event (5041, 0, NULL); @@ -606,7 +606,7 @@ void ADIOI_IOStridedColl (ADIO_File fd, void *buf, int count, int rdwr, } } #endif - + /* aggregators pre-post all Irecv's for incoming data from * clients. if nothing is needed, agg_comm_requests is not * allocated */ @@ -627,7 +627,7 @@ void ADIOI_IOStridedColl (ADIO_File fd, void *buf, int count, int rdwr, } /* Clean up */ - + if (fd->hints->cb_pfr != ADIOI_HINT_ENABLE) { /* AAR, FSIZE, and User provided uniform File realms */ if (1) { @@ -663,11 +663,11 @@ void ADIOI_IOStridedColl (ADIO_File fd, void *buf, int count, int rdwr, } ADIOI_Free (client_file_view_state_arr); ADIOI_Free (cb_buf); - } + } for (i = 0; i 0) MPI_Type_free (&agg_comm_dtype_arr[i]); - + ADIOI_Free (client_comm_sz_arr); ADIOI_Free (client_comm_dtype_arr); ADIOI_Free (my_mem_view_state_arr); @@ -707,9 +707,9 @@ void ADIOI_Calc_bounds (ADIO_File fd, int count, MPI_Datatype buftype, int filetype_is_contig; ADIO_Offset i, remainder; ADIOI_Flatlist_node *flat_file; - + ADIO_Offset st_byte_off, end_byte_off; - + #ifdef AGGREGATION_PROFILE MPE_Log_event (5000, 0, NULL); #endif @@ -724,12 +724,12 @@ void ADIOI_Calc_bounds (ADIO_File fd, int count, MPI_Datatype buftype, } ADIOI_Datatype_iscontig (fd->filetype, &filetype_is_contig); - + MPI_Type_size_x (fd->filetype, &filetype_size); MPI_Type_extent (fd->filetype, &filetype_extent); MPI_Type_size_x (fd->etype, &etype_size); MPI_Type_size_x (buftype, &buftype_size); - + total_io = buftype_size * count; if (filetype_is_contig) { @@ -748,7 +748,7 @@ void ADIOI_Calc_bounds (ADIO_File fd, int count, MPI_Datatype buftype, points at an accessible byte in file. the first accessible byte in the file is not necessarily the first byte, nor is it necessarily the first off/len pair in the filetype. */ - if (file_ptr_type == ADIO_INDIVIDUAL) { + if (file_ptr_type == ADIO_INDIVIDUAL) { st_byte_off = fd->fp_ind; /* find end byte of I/O (may be in middle of an etype) */ @@ -791,7 +791,7 @@ void ADIOI_Calc_bounds (ADIO_File fd, int count, MPI_Datatype buftype, for (i=0; icount; i++) { sum += flat_file->blocklens[i]; if (sum >= remainder) { - end_byte_off += flat_file->indices[i] + + end_byte_off += flat_file->indices[i] + flat_file->blocklens[i] - sum + remainder - 1; break; } @@ -801,12 +801,12 @@ void ADIOI_Calc_bounds (ADIO_File fd, int count, MPI_Datatype buftype, } else { /* find starting byte of I/O (must be aligned with an etype) */ - /* byte starting point of starting filetype */ + /* byte starting point of starting filetype */ st_byte_off = fd->disp + ((offset * etype_size) / filetype_size) * filetype_extent; /* number of file viewable bytes into starting filetype */ remainder = (etype_size * offset) % filetype_size; - + sum = 0; for (i=0; icount; i++) { sum += flat_file->blocklens[i]; @@ -819,14 +819,14 @@ void ADIOI_Calc_bounds (ADIO_File fd, int count, MPI_Datatype buftype, break; } } - + /* find end byte of I/O (may be in middle of an etype) */ /* byte starting point of last filetype */ end_byte_off = fd->disp + (offset * etype_size + total_io) / filetype_size * filetype_extent; /* number of bytes into last filetype */ remainder = (offset * etype_size + total_io) % filetype_size; - + if (!remainder) { /* the last non-zero off/len pair */ for (i=flat_file->count-1; i>=0; i--) { @@ -847,7 +847,7 @@ void ADIOI_Calc_bounds (ADIO_File fd, int count, MPI_Datatype buftype, for (i=0; icount; i++) { sum += flat_file->blocklens[i]; if (sum >= remainder) { - end_byte_off += flat_file->indices[i] + + end_byte_off += flat_file->indices[i] + flat_file->blocklens[i] - sum + remainder - 1; break; } @@ -855,7 +855,7 @@ void ADIOI_Calc_bounds (ADIO_File fd, int count, MPI_Datatype buftype, } } } - + *st_offset = st_byte_off; *end_offset = end_byte_off; #ifdef DEBUG @@ -874,7 +874,7 @@ void ADIOI_Calc_bounds (ADIO_File fd, int count, MPI_Datatype buftype, * assume that this uses MPI_BYTE for the etype, and disp is 0 */ void ADIOI_IOFiletype(ADIO_File fd, void *buf, int count, MPI_Datatype datatype, int file_ptr_type, - ADIO_Offset offset, MPI_Datatype custom_ftype, + ADIO_Offset offset, MPI_Datatype custom_ftype, int rdwr, ADIO_Status *status, int *error_code) { MPI_Datatype user_filetype; @@ -1031,7 +1031,7 @@ static void Exch_data_amounts (ADIO_File fd, int nprocs, send_requests = NULL; if (fd->is_agg) { /* only aggregators send data */ - send_requests = ADIOI_Malloc (nprocs * sizeof(MPI_Request)); + send_requests = ADIOI_Malloc (nprocs * sizeof(MPI_Request)); /* post all sends */ for (i = 0; i < nprocs; i++) { @@ -1067,7 +1067,7 @@ static void Exch_data_amounts (ADIO_File fd, int nprocs, } } -static void post_aggregator_comm (MPI_Comm comm, int rw_type, +static void post_aggregator_comm (MPI_Comm comm, int rw_type, int nproc, void *cb_buf, MPI_Datatype *client_comm_dtype_arr, ADIO_Offset *client_comm_sz_arr, @@ -1115,7 +1115,7 @@ static void post_aggregator_comm (MPI_Comm comm, int rw_type, } } -static void post_client_comm (ADIO_File fd, int rw_type, +static void post_client_comm (ADIO_File fd, int rw_type, int agg_rank, void *buf, MPI_Datatype agg_comm_dtype, int agg_alltoallw_count, diff --git a/ompi/mca/io/romio314/romio/adio/common/ad_iopen.c b/ompi/mca/io/romio314/romio/adio/common/ad_iopen.c index 5deeaa2cdeb..37e98e3f92f 100644 --- a/ompi/mca/io/romio314/romio/adio/common/ad_iopen.c +++ b/ompi/mca/io/romio314/romio/adio/common/ad_iopen.c @@ -1,14 +1,14 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */ -/* +/* * - * Copyright (C) 2002 University of Chicago. + * Copyright (C) 2002 University of Chicago. * See COPYRIGHT notice in top-level directory. */ #include "adio.h" void ADIO_ImmediateOpen(ADIO_File fd, int *error_code) -{ +{ MPI_Comm tmp_comm; tmp_comm = fd->comm; /* some file systems might try to be clever inside their open routine. @@ -18,4 +18,4 @@ void ADIO_ImmediateOpen(ADIO_File fd, int *error_code) fd->is_open = 1; fd->comm = tmp_comm; -} +} diff --git a/ompi/mca/io/romio314/romio/adio/common/ad_iread.c b/ompi/mca/io/romio314/romio/adio/common/ad_iread.c index 74c342ab939..2bc3ad6f16f 100644 --- a/ompi/mca/io/romio314/romio/adio/common/ad_iread.c +++ b/ompi/mca/io/romio314/romio/adio/common/ad_iread.c @@ -1,7 +1,7 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */ -/* +/* * - * Copyright (C) 2004 University of Chicago. + * Copyright (C) 2004 University of Chicago. * See COPYRIGHT notice in top-level directory. */ @@ -30,15 +30,15 @@ * * This code handles two distinct cases. If ROMIO_HAVE_WORKING_AIO is not * defined, then I/O is performed in a blocking manner. Otherwise we post - * an asynchronous I/O operation using the appropriate aio routines. + * an asynchronous I/O operation using the appropriate aio routines. * * In the aio case we rely on ADIOI_GEN_aio(), which is implemented in * common/ad_iwrite.c. */ -void ADIOI_GEN_IreadContig(ADIO_File fd, void *buf, int count, +void ADIOI_GEN_IreadContig(ADIO_File fd, void *buf, int count, MPI_Datatype datatype, int file_ptr_type, ADIO_Offset offset, MPI_Request *request, - int *error_code) + int *error_code) { MPI_Count len, typesize; int aio_errno = 0; @@ -60,7 +60,7 @@ void ADIOI_GEN_IreadContig(ADIO_File fd, void *buf, int count, return; } /* --END ERROR HANDLING-- */ - + *error_code = MPI_SUCCESS; } #endif @@ -68,7 +68,7 @@ void ADIOI_GEN_IreadContig(ADIO_File fd, void *buf, int count, /* Generic implementation of IreadStrided calls the blocking ReadStrided * immediately. */ -void ADIOI_GEN_IreadStrided(ADIO_File fd, void *buf, int count, +void ADIOI_GEN_IreadStrided(ADIO_File fd, void *buf, int count, MPI_Datatype datatype, int file_ptr_type, ADIO_Offset offset, ADIO_Request *request, int *error_code) @@ -80,8 +80,8 @@ void ADIOI_GEN_IreadStrided(ADIO_File fd, void *buf, int count, /* Call the blocking function. It will create an error code * if necessary. */ - ADIO_ReadStrided(fd, buf, count, datatype, file_ptr_type, - offset, &status, error_code); + ADIO_ReadStrided(fd, buf, count, datatype, file_ptr_type, + offset, &status, error_code); if (*error_code == MPI_SUCCESS) { MPI_Type_size_x(datatype, &typesize); diff --git a/ompi/mca/io/romio314/romio/adio/common/ad_iread_fake.c b/ompi/mca/io/romio314/romio/adio/common/ad_iread_fake.c index b5cbd1da271..6a7f0bfde72 100644 --- a/ompi/mca/io/romio314/romio/adio/common/ad_iread_fake.c +++ b/ompi/mca/io/romio314/romio/adio/common/ad_iread_fake.c @@ -1,7 +1,7 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */ -/* +/* * - * Copyright (C) 2004 University of Chicago. + * Copyright (C) 2004 University of Chicago. * See COPYRIGHT notice in top-level directory. */ @@ -11,10 +11,10 @@ /* Generic implementation of IreadContig calls the blocking ReadContig * immediately. */ -void ADIOI_FAKE_IreadContig(ADIO_File fd, void *buf, int count, +void ADIOI_FAKE_IreadContig(ADIO_File fd, void *buf, int count, MPI_Datatype datatype, int file_ptr_type, ADIO_Offset offset, ADIO_Request *request, - int *error_code) + int *error_code) { ADIO_Status status; MPI_Count typesize; @@ -27,8 +27,8 @@ void ADIOI_FAKE_IreadContig(ADIO_File fd, void *buf, int count, * if necessary. */ ADIOI_Assert(len == (int) len); /* the count is an int parm */ - ADIO_ReadContig(fd, buf, (int)len, MPI_BYTE, file_ptr_type, offset, - &status, error_code); + ADIO_ReadContig(fd, buf, (int)len, MPI_BYTE, file_ptr_type, offset, + &status, error_code); if (*error_code != MPI_SUCCESS) { len=0; } @@ -39,7 +39,7 @@ void ADIOI_FAKE_IreadContig(ADIO_File fd, void *buf, int count, /* Generic implementation of IreadStrided calls the blocking ReadStrided * immediately. */ -void ADIOI_FAKE_IreadStrided(ADIO_File fd, void *buf, int count, +void ADIOI_FAKE_IreadStrided(ADIO_File fd, void *buf, int count, MPI_Datatype datatype, int file_ptr_type, ADIO_Offset offset, ADIO_Request *request, int *error_code) @@ -51,8 +51,8 @@ void ADIOI_FAKE_IreadStrided(ADIO_File fd, void *buf, int count, /* Call the blocking function. It will create an error code * if necessary. */ - ADIO_ReadStrided(fd, buf, count, datatype, file_ptr_type, - offset, &status, error_code); + ADIO_ReadStrided(fd, buf, count, datatype, file_ptr_type, + offset, &status, error_code); if (*error_code == MPI_SUCCESS) { MPI_Type_size_x(datatype, &typesize); nbytes = (MPI_Offset)count*(MPI_Offset)typesize; diff --git a/ompi/mca/io/romio314/romio/adio/common/ad_iwrite.c b/ompi/mca/io/romio314/romio/adio/common/ad_iwrite.c index 8177718ff80..feecae6a24a 100644 --- a/ompi/mca/io/romio314/romio/adio/common/ad_iwrite.c +++ b/ompi/mca/io/romio314/romio/adio/common/ad_iwrite.c @@ -1,7 +1,7 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */ -/* +/* * - * Copyright (C) 2004 University of Chicago. + * Copyright (C) 2004 University of Chicago. * See COPYRIGHT notice in top-level directory. */ @@ -27,7 +27,7 @@ #include "../../mpi-io/mpioimpl.h" #include "../../mpi-io/mpioprof.h" #include "mpiu_greq.h" -/* Workaround for incomplete set of definitions if __REDIRECT is not +/* Workaround for incomplete set of definitions if __REDIRECT is not defined and large file support is used in aio.h */ #if !defined(__REDIRECT) && defined(__USE_FILE_OFFSET64) #define aiocb aiocb64 @@ -39,7 +39,7 @@ static MPIX_Grequest_class ADIOI_GEN_greq_class = 0; /* ADIOI_GEN_IwriteContig * - * This code handles only the case where ROMIO_HAVE_WORKING_AIO is + * This code handles only the case where ROMIO_HAVE_WORKING_AIO is * defined. We post an asynchronous I/O operations using the appropriate aio * routines. Otherwise, the ADIOI_Fns_struct will point to the FAKE * version. @@ -100,8 +100,8 @@ int ADIOI_GEN_aio(ADIO_File fd, void *buf, int len, ADIO_Offset offset, /* Use Direct I/O if desired and properly aligned */ if (fd->fns == &ADIO_XFS_operations && ((wr && fd->direct_write) || (!wr && fd->direct_read)) && - !(((long) buf) % fd->d_mem) && !(offset % fd->d_miniosz) && - !(len % fd->d_miniosz) && (len >= fd->d_miniosz) && + !(((long) buf) % fd->d_mem) && !(offset % fd->d_miniosz) && + !(len % fd->d_miniosz) && (len >= fd->d_miniosz) && (len <= maxiosz)) { fd_sys = fd->fd_direct; } @@ -146,17 +146,17 @@ int ADIOI_GEN_aio(ADIO_File fd, void *buf, int len, ADIO_Offset offset, #endif if (err == -1) { - if (errno == EAGAIN || errno == ENOSYS) { + if (errno == EAGAIN || errno == ENOSYS) { /* exceeded the max. no. of outstanding requests. - or, aio routines are not actually implemented + or, aio routines are not actually implemented treat this as a blocking request and return. */ - if (wr) - ADIO_WriteContig(fd, buf, len, MPI_BYTE, - ADIO_EXPLICIT_OFFSET, offset, &status, &error_code); + if (wr) + ADIO_WriteContig(fd, buf, len, MPI_BYTE, + ADIO_EXPLICIT_OFFSET, offset, &status, &error_code); else ADIO_ReadContig(fd, buf, len, MPI_BYTE, - ADIO_EXPLICIT_OFFSET, offset, &status, &error_code); - + ADIO_EXPLICIT_OFFSET, offset, &status, &error_code); + MPIO_Completed_request_create(&fd, len, &error_code, request); if (aiocbp != NULL) ADIOI_Free(aiocbp); if (aio_req != NULL) ADIOI_Free(aio_req); @@ -167,9 +167,9 @@ int ADIOI_GEN_aio(ADIO_File fd, void *buf, int len, ADIO_Offset offset, } aio_req->aiocbp = aiocbp; if (ADIOI_GEN_greq_class == 0) { - MPIX_Grequest_class_create(ADIOI_GEN_aio_query_fn, - ADIOI_GEN_aio_free_fn, MPIU_Greq_cancel_fn, - ADIOI_GEN_aio_poll_fn, ADIOI_GEN_aio_wait_fn, + MPIX_Grequest_class_create(ADIOI_GEN_aio_query_fn, + ADIOI_GEN_aio_free_fn, MPIU_Greq_cancel_fn, + ADIOI_GEN_aio_poll_fn, ADIOI_GEN_aio_wait_fn, &ADIOI_GEN_greq_class); } MPIX_Grequest_class_allocate(ADIOI_GEN_greq_class, aio_req, request); @@ -191,11 +191,11 @@ void ADIOI_GEN_IwriteStrided(ADIO_File fd, const void *buf, int count, MPI_Count typesize; MPI_Offset nbytes=0; - /* Call the blocking function. It will create an error code + /* Call the blocking function. It will create an error code * if necessary. */ - ADIO_WriteStrided(fd, buf, count, datatype, file_ptr_type, - offset, &status, error_code); + ADIO_WriteStrided(fd, buf, count, datatype, file_ptr_type, + offset, &status, error_code); if (*error_code == MPI_SUCCESS) { MPI_Type_size_x(datatype, &typesize); @@ -238,7 +238,7 @@ int ADIOI_GEN_aio_poll_fn(void *extra_state, MPI_Status *status) } /* wait for multiple requests to complete */ -int ADIOI_GEN_aio_wait_fn(int count, void ** array_of_states, +int ADIOI_GEN_aio_wait_fn(int count, void ** array_of_states, double timeout, MPI_Status *status) { const struct aiocb **cblist; @@ -270,13 +270,13 @@ int ADIOI_GEN_aio_wait_fn(int count, void ** array_of_states, do { err = aio_suspend(cblist, count, aio_timer_p); } while (err < 0 && errno == EINTR); - if (err == 0) + if (err == 0) { /* run through the list of requests, and mark all the completed ones as done */ for (i=0; i< count; i++) { /* aio_error returns an ERRNO value */ - if (aio_reqlist[i]->aiocbp == NULL) + if (aio_reqlist[i]->aiocbp == NULL) continue; errno = aio_error(aio_reqlist[i]->aiocbp); if (errno == 0) { @@ -286,15 +286,15 @@ int ADIOI_GEN_aio_wait_fn(int count, void ** array_of_states, if (errcode != MPI_SUCCESS) { errcode = MPIO_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE, - "ADIOI_GEN_aio_wait_fn", - __LINE__, MPI_ERR_IO, + "ADIOI_GEN_aio_wait_fn", + __LINE__, MPI_ERR_IO, "**mpi_grequest_complete", 0); } ADIOI_Free(aio_reqlist[i]->aiocbp); aio_reqlist[i]->aiocbp = NULL; cblist[i] = NULL; nr_complete++; - } + } /* TODO: need to handle error conditions somehow*/ } } /* TODO: also need to handle errors here */ @@ -319,7 +319,7 @@ int ADIOI_GEN_aio_free_fn(void *extra_state) } #endif /* working AIO */ -int ADIOI_GEN_aio_query_fn(void *extra_state, MPI_Status *status) +int ADIOI_GEN_aio_query_fn(void *extra_state, MPI_Status *status) { ADIOI_AIO_Request *aio_req; @@ -327,16 +327,16 @@ int ADIOI_GEN_aio_query_fn(void *extra_state, MPI_Status *status) MPI_Status_set_elements_x(status, MPI_BYTE, aio_req->nbytes); - /* can never cancel so always true */ - MPI_Status_set_cancelled(status, 0); + /* can never cancel so always true */ + MPI_Status_set_cancelled(status, 0); - /* choose not to return a value for this */ - status->MPI_SOURCE = MPI_UNDEFINED; - /* tag has no meaning for this generalized request */ - status->MPI_TAG = MPI_UNDEFINED; - /* this generalized request never fails */ - return MPI_SUCCESS; + /* choose not to return a value for this */ + status->MPI_SOURCE = MPI_UNDEFINED; + /* tag has no meaning for this generalized request */ + status->MPI_TAG = MPI_UNDEFINED; + /* this generalized request never fails */ + return MPI_SUCCESS; } -/* - * vim: ts=8 sts=4 sw=4 noexpandtab +/* + * vim: ts=8 sts=4 sw=4 noexpandtab */ diff --git a/ompi/mca/io/romio314/romio/adio/common/ad_iwrite_fake.c b/ompi/mca/io/romio314/romio/adio/common/ad_iwrite_fake.c index 6210942e663..e92e13a5b7c 100644 --- a/ompi/mca/io/romio314/romio/adio/common/ad_iwrite_fake.c +++ b/ompi/mca/io/romio314/romio/adio/common/ad_iwrite_fake.c @@ -1,7 +1,7 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */ -/* +/* * - * Copyright (C) 2004 University of Chicago. + * Copyright (C) 2004 University of Chicago. * See COPYRIGHT notice in top-level directory. */ @@ -30,7 +30,7 @@ void ADIOI_FAKE_IwriteContig(ADIO_File fd, const void *buf, int count, */ ADIOI_Assert(len == (int) len); /* the count is an int parm */ ADIO_WriteContig(fd, buf, (int)len, MPI_BYTE, file_ptr_type, offset, - &status, error_code); + &status, error_code); if (*error_code == MPI_SUCCESS) { MPI_Type_size_x(datatype, &typesize); nbytes = (MPI_Offset)count*(MPI_Offset)typesize; @@ -52,11 +52,11 @@ void ADIOI_FAKE_IwriteStrided(ADIO_File fd, const void *buf, int count, MPI_Count typesize; MPI_Offset nbytes=0; - /* Call the blocking function. It will create an error code + /* Call the blocking function. It will create an error code * if necessary. */ - ADIO_WriteStrided(fd, buf, count, datatype, file_ptr_type, - offset, &status, error_code); + ADIO_WriteStrided(fd, buf, count, datatype, file_ptr_type, + offset, &status, error_code); if (*error_code == MPI_SUCCESS) { MPI_Type_size_x(datatype, &typesize); nbytes = (MPI_Offset)count * (MPI_Offset)typesize; diff --git a/ompi/mca/io/romio314/romio/adio/common/ad_open.c b/ompi/mca/io/romio314/romio/adio/common/ad_open.c index 7d69280057d..46da0b2d404 100644 --- a/ompi/mca/io/romio314/romio/adio/common/ad_open.c +++ b/ompi/mca/io/romio314/romio/adio/common/ad_open.c @@ -1,7 +1,7 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */ -/* +/* * - * Copyright (C) 1997 University of Chicago. + * Copyright (C) 1997 University of Chicago. * See COPYRIGHT notice in top-level directory. */ @@ -14,14 +14,14 @@ static int is_aggregator(int rank, ADIO_File fd); static int uses_generic_read(ADIO_File fd); static int uses_generic_write(ADIO_File fd); -static int build_cb_config_list(ADIO_File fd, - MPI_Comm orig_comm, MPI_Comm comm, +static int build_cb_config_list(ADIO_File fd, + MPI_Comm orig_comm, MPI_Comm comm, int rank, int procs, int *error_code); MPI_File ADIO_Open(MPI_Comm orig_comm, MPI_Comm comm, const char *filename, int file_system, ADIOI_Fns *ops, - int access_mode, ADIO_Offset disp, MPI_Datatype etype, + int access_mode, ADIO_Offset disp, MPI_Datatype etype, MPI_Datatype filetype, MPI_Info info, int perm, int *error_code) { @@ -126,13 +126,13 @@ MPI_File ADIO_Open(MPI_Comm orig_comm, * (e.g. Blue Gene) more efficent */ fd->io_buf = ADIOI_Malloc(fd->hints->cb_buffer_size); - /* deferred open: + /* deferred open: * we can only do this optimization if 'fd->hints->deferred_open' is set * (which means the user hinted 'no_indep_rw' and collective buffering). * Furthermore, we only do this if our collective read/write routines use * our generic function, and not an fs-specific routine (we can defer opens * only if we use our aggreagation code). */ - if (fd->hints->deferred_open && + if (fd->hints->deferred_open && !(uses_generic_read(fd) \ && uses_generic_write(fd))) { fd->hints->deferred_open = 0; @@ -147,7 +147,7 @@ MPI_File ADIO_Open(MPI_Comm orig_comm, * one else does that right now */ if (fd->hints->ranklist == NULL) { build_cb_config_list(fd, orig_comm, comm, rank, procs, error_code); - if (*error_code != MPI_SUCCESS) + if (*error_code != MPI_SUCCESS) goto fn_exit; } /* for debugging, it can be helpful to see the hints selected */ @@ -177,7 +177,7 @@ MPI_File ADIO_Open(MPI_Comm orig_comm, /* If the file was successfully opened, close it */ if (*error_code == MPI_SUCCESS) { - + /* in the deferred open case, only those who have actually opened the file should close it */ if (fd->hints->deferred_open) { @@ -210,16 +210,16 @@ MPI_File ADIO_Open(MPI_Comm orig_comm, } /* a simple linear search. possible enancement: add a my_cb_nodes_index member - * ( index into cb_nodes, else -1 if not aggregator ) for faster lookups + * ( index into cb_nodes, else -1 if not aggregator ) for faster lookups * * fd->hints->cb_nodes is the number of aggregators * fd->hints->ranklist[] is an array of the ranks of aggregators * - * might want to move this to adio/common/cb_config_list.c + * might want to move this to adio/common/cb_config_list.c */ int is_aggregator(int rank, ADIO_File fd ) { int i; - + if (fd->my_cb_nodes_index == -2) { for (i=0; i< fd->hints->cb_nodes; i++ ) { if ( rank == fd->hints->ranklist[i] ) { @@ -253,8 +253,8 @@ static int uses_generic_write(ADIO_File fd) return 0; } -static int build_cb_config_list(ADIO_File fd, - MPI_Comm orig_comm, MPI_Comm comm, +static int build_cb_config_list(ADIO_File fd, + MPI_Comm orig_comm, MPI_Comm comm, int rank, int procs, int *error_code) { ADIO_cb_name_array array; @@ -282,7 +282,7 @@ static int build_cb_config_list(ADIO_File fd, return 0; } - rank_ct = ADIOI_cb_config_list_parse(fd->hints->cb_config_list, + rank_ct = ADIOI_cb_config_list_parse(fd->hints->cb_config_list, array, tmp_ranklist, fd->hints->cb_nodes); @@ -310,6 +310,6 @@ static int build_cb_config_list(ADIO_File fd, return 0; } -/* - * vim: ts=8 sts=4 sw=4 noexpandtab +/* + * vim: ts=8 sts=4 sw=4 noexpandtab */ diff --git a/ompi/mca/io/romio314/romio/adio/common/ad_opencoll.c b/ompi/mca/io/romio314/romio/adio/common/ad_opencoll.c index 467a322a340..b1fd33153c4 100644 --- a/ompi/mca/io/romio314/romio/adio/common/ad_opencoll.c +++ b/ompi/mca/io/romio314/romio/adio/common/ad_opencoll.c @@ -1,5 +1,5 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */ -/* +/* * * Copyright (C) 2007 UChicago/Argonne LLC * See COPYRIGHT notice in top-level directory. @@ -12,16 +12,16 @@ * * optimization: by having just one process create a file, close it, * then have all N processes open it, we can possibly avoid contention - * for write locks on a directory for some file systems. + * for write locks on a directory for some file systems. * * Happy side-effect: exclusive create (error if file already exists) - * just falls out + * just falls out * * Note: this is not a "scalable open" (c.f. "The impact of file systems - * on MPI-IO scalability"). + * on MPI-IO scalability"). */ - -void ADIOI_GEN_OpenColl(ADIO_File fd, int rank, + +void ADIOI_GEN_OpenColl(ADIO_File fd, int rank, int access_mode, int *error_code) { int orig_amode_excl, orig_amode_wronly; @@ -34,9 +34,9 @@ void ADIOI_GEN_OpenColl(ADIO_File fd, int rank, /* remove delete_on_close flag if set */ if (access_mode & ADIO_DELETE_ON_CLOSE) fd->access_mode = access_mode ^ ADIO_DELETE_ON_CLOSE; - else + else fd->access_mode = access_mode; - + tmp_comm = fd->comm; fd->comm = MPI_COMM_SELF; (*(fd->fns->ADIOI_xxx_Open))(fd, error_code); @@ -44,7 +44,7 @@ void ADIOI_GEN_OpenColl(ADIO_File fd, int rank, MPI_Bcast(error_code, 1, MPI_INT, \ fd->hints->ranklist[0], fd->comm); /* if no error, close the file and reopen normally below */ - if (*error_code == MPI_SUCCESS) + if (*error_code == MPI_SUCCESS) (*(fd->fns->ADIOI_xxx_Close))(fd, error_code); fd->access_mode = access_mode; /* back to original */ @@ -53,10 +53,10 @@ void ADIOI_GEN_OpenColl(ADIO_File fd, int rank, if (*error_code != MPI_SUCCESS) { return; - } + } else { /* turn off CREAT (and EXCL if set) for real multi-processor open */ - access_mode ^= ADIO_CREATE; + access_mode ^= ADIO_CREATE; if (access_mode & ADIO_EXCL) access_mode ^= ADIO_EXCL; } @@ -85,7 +85,7 @@ void ADIOI_GEN_OpenColl(ADIO_File fd, int rank, } } -/* For writing with data sieving, a read-modify-write is needed. If +/* For writing with data sieving, a read-modify-write is needed. If the file is opened for write_only, the read will fail. Therefore, if write_only, open the file as read_write, but record it as write_only in fd, so that get_amode returns the right answer. */ @@ -103,10 +103,10 @@ void ADIOI_GEN_OpenColl(ADIO_File fd, int rank, (*(fd->fns->ADIOI_xxx_Open))(fd, error_code); - /* if error, may be it was due to the change in amode above. - therefore, reopen with access mode provided by the user.*/ - fd->access_mode = orig_amode_wronly; - if (*error_code != MPI_SUCCESS) + /* if error, may be it was due to the change in amode above. + therefore, reopen with access mode provided by the user.*/ + fd->access_mode = orig_amode_wronly; + if (*error_code != MPI_SUCCESS) (*(fd->fns->ADIOI_xxx_Open))(fd, error_code); /* if we turned off EXCL earlier, then we should turn it back on */ @@ -124,6 +124,6 @@ void ADIOI_GEN_OpenColl(ADIO_File fd, int rank, } -/* - * vim: ts=8 sts=4 sw=4 noexpandtab +/* + * vim: ts=8 sts=4 sw=4 noexpandtab */ diff --git a/ompi/mca/io/romio314/romio/adio/common/ad_opencoll_failsafe.c b/ompi/mca/io/romio314/romio/adio/common/ad_opencoll_failsafe.c index e7ac8719e82..65c27467eaa 100644 --- a/ompi/mca/io/romio314/romio/adio/common/ad_opencoll_failsafe.c +++ b/ompi/mca/io/romio314/romio/adio/common/ad_opencoll_failsafe.c @@ -1,5 +1,5 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */ -/* +/* * * Copyright (C) 2007 UChicago/Argonne LLC * See COPYRIGHT notice in top-level directory. @@ -12,7 +12,7 @@ * as NFS where a create from one client might not be immediately * visible on another */ -void ADIOI_FAILSAFE_OpenColl(ADIO_File fd, int rank, +void ADIOI_FAILSAFE_OpenColl(ADIO_File fd, int rank, int access_mode, int *error_code) { int orig_amode_excl, orig_amode_wronly; @@ -22,7 +22,7 @@ void ADIOI_FAILSAFE_OpenColl(ADIO_File fd, int rank, /* the open should fail if the file exists. Only *1* process * should check this. Otherwise, if all processes try to check * and the file does not exist, one process will create the file - * and others who reach later will return error. */ + * and others who reach later will return error. */ if(rank == fd->hints->ranklist[0]) { fd->access_mode = access_mode; (*(fd->fns->ADIOI_xxx_Open))(fd, error_code); @@ -52,7 +52,7 @@ void ADIOI_FAILSAFE_OpenColl(ADIO_File fd, int rank, return; } -/* For writing with data sieving, a read-modify-write is needed. If +/* For writing with data sieving, a read-modify-write is needed. If the file is opened for write_only, the read will fail. Therefore, if write_only, open the file as read_write, but record it as write_only in fd, so that get_amode returns the right answer. */ @@ -66,10 +66,10 @@ void ADIOI_FAILSAFE_OpenColl(ADIO_File fd, int rank, (*(fd->fns->ADIOI_xxx_Open))(fd, error_code); - /* if error, may be it was due to the change in amode above. - therefore, reopen with access mode provided by the user.*/ - fd->access_mode = orig_amode_wronly; - if (*error_code != MPI_SUCCESS) + /* if error, may be it was due to the change in amode above. + therefore, reopen with access mode provided by the user.*/ + fd->access_mode = orig_amode_wronly; + if (*error_code != MPI_SUCCESS) (*(fd->fns->ADIOI_xxx_Open))(fd, error_code); if(*error_code != MPI_SUCCESS) return; @@ -83,6 +83,6 @@ void ADIOI_FAILSAFE_OpenColl(ADIO_File fd, int rank, -/* - * vim: ts=8 sts=4 sw=4 noexpandtab +/* + * vim: ts=8 sts=4 sw=4 noexpandtab */ diff --git a/ompi/mca/io/romio314/romio/adio/common/ad_opencoll_scalable.c b/ompi/mca/io/romio314/romio/adio/common/ad_opencoll_scalable.c index 1c472017f53..97e0fb655a9 100644 --- a/ompi/mca/io/romio314/romio/adio/common/ad_opencoll_scalable.c +++ b/ompi/mca/io/romio314/romio/adio/common/ad_opencoll_scalable.c @@ -1,5 +1,5 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */ -/* +/* * * Copyright (C) 2007 UChicago/Argonne LLC * See COPYRIGHT notice in top-level directory. @@ -7,24 +7,24 @@ #include "adio.h" -/* +/* * Scalable open: for file systems capable of having one process - * create/open a file and broadcast the result to everyone else. + * create/open a file and broadcast the result to everyone else. * - Does not need one process to create the file * - Does not need special handling for CREATE|EXCL */ -void ADIOI_SCALEABLE_OpenColl(ADIO_File fd, int rank, +void ADIOI_SCALEABLE_OpenColl(ADIO_File fd, int rank, int access_mode, int *error_code) -{ +{ int orig_amode_wronly; /* if we are doing deferred open, non-aggregators should return now */ if (fd->hints->deferred_open && !(fd->is_agg)) { *error_code = MPI_SUCCESS; return; - } - - /* For writing with data sieving, a read-modify-write is needed. If + } + + /* For writing with data sieving, a read-modify-write is needed. If the file is opened for write_only, the read will fail. Therefore, if write_only, open the file as read_write, but record it as write_only in fd, so that get_amode returns the right answer. */ @@ -38,10 +38,10 @@ void ADIOI_SCALEABLE_OpenColl(ADIO_File fd, int rank, (*(fd->fns->ADIOI_xxx_Open))(fd, error_code); - /* if error, may be it was due to the change in amode above. - therefore, reopen with access mode provided by the user.*/ - fd->access_mode = orig_amode_wronly; - if (*error_code != MPI_SUCCESS) + /* if error, may be it was due to the change in amode above. + therefore, reopen with access mode provided by the user.*/ + fd->access_mode = orig_amode_wronly; + if (*error_code != MPI_SUCCESS) (*(fd->fns->ADIOI_xxx_Open))(fd, error_code); /* for deferred open: this process has opened the file (because if we are @@ -50,6 +50,6 @@ void ADIOI_SCALEABLE_OpenColl(ADIO_File fd, int rank, } -/* - * vim: ts=8 sts=4 sw=4 noexpandtab +/* + * vim: ts=8 sts=4 sw=4 noexpandtab */ diff --git a/ompi/mca/io/romio314/romio/adio/common/ad_prealloc.c b/ompi/mca/io/romio314/romio/adio/common/ad_prealloc.c index 9462f775aa6..557cff4434d 100644 --- a/ompi/mca/io/romio314/romio/adio/common/ad_prealloc.c +++ b/ompi/mca/io/romio314/romio/adio/common/ad_prealloc.c @@ -1,7 +1,7 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */ -/* +/* * - * Copyright (C) 2004 University of Chicago. + * Copyright (C) 2004 University of Chicago. * See COPYRIGHT notice in top-level directory. */ @@ -13,7 +13,7 @@ * This naive approach will get the job done, but not in a terribly efficient * manner. */ -void ADIOI_GEN_Prealloc(ADIO_File fd, ADIO_Offset diskspace, int *error_code) +void ADIOI_GEN_Prealloc(ADIO_File fd, ADIO_Offset diskspace, int *error_code) { ADIO_Offset curr_fsize, alloc_size, size, len, done, ntimes; ADIO_Status status; @@ -23,11 +23,11 @@ void ADIOI_GEN_Prealloc(ADIO_File fd, ADIO_Offset diskspace, int *error_code) static char myname[] = "ADIOI_GEN_PREALLOC"; /* will be called by one process only */ - /* On file systems with no preallocation function, we have to - explicitly write - to allocate space. Since there could be holes in the file, - we need to read up to the current file size, write it back, - and then write beyond that depending on how much + /* On file systems with no preallocation function, we have to + explicitly write + to allocate space. Since there could be holes in the file, + we need to read up to the current file size, write it back, + and then write beyond that depending on how much preallocation is needed. read/write in sizes of no more than ADIOI_PREALLOC_BUFSZ */ @@ -40,14 +40,14 @@ void ADIOI_GEN_Prealloc(ADIO_File fd, ADIO_Offset diskspace, int *error_code) alloc_size = diskspace; size = ADIOI_MIN(curr_fsize, alloc_size); - + ntimes = (size + ADIOI_PREALLOC_BUFSZ - 1)/ADIOI_PREALLOC_BUFSZ; buf = (char *) ADIOI_Malloc(ADIOI_PREALLOC_BUFSZ); done = 0; for (i=0; i curr_fsize) { - memset(buf, 0, ADIOI_PREALLOC_BUFSZ); + memset(buf, 0, ADIOI_PREALLOC_BUFSZ); size = alloc_size - curr_fsize; ntimes = (size + ADIOI_PREALLOC_BUFSZ - 1)/ADIOI_PREALLOC_BUFSZ; for (i=0; i #endif -void ADIOI_GEN_ReadContig(ADIO_File fd, void *buf, int count, +void ADIOI_GEN_ReadContig(ADIO_File fd, void *buf, int count, MPI_Datatype datatype, int file_ptr_type, ADIO_Offset offset, ADIO_Status *status, int *error_code) @@ -96,7 +96,7 @@ void ADIOI_GEN_ReadContig(ADIO_File fd, void *buf, int count, fd->fp_sys_posn = offset + bytes_xfered; if (file_ptr_type == ADIO_INDIVIDUAL) { - fd->fp_ind += bytes_xfered; + fd->fp_ind += bytes_xfered; } #ifdef HAVE_STATUS_SET_BYTES diff --git a/ompi/mca/io/romio314/romio/adio/common/ad_read_coll.c b/ompi/mca/io/romio314/romio/adio/common/ad_read_coll.c index c5202a3e723..db2a6a6b222 100644 --- a/ompi/mca/io/romio314/romio/adio/common/ad_read_coll.c +++ b/ompi/mca/io/romio314/romio/adio/common/ad_read_coll.c @@ -1,7 +1,7 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */ -/* +/* * - * Copyright (C) 1997 University of Chicago. + * Copyright (C) 1997 University of Chicago. * See COPYRIGHT notice in top-level directory. */ @@ -20,7 +20,7 @@ static void ADIOI_Read_and_exch(ADIO_File fd, void *buf, MPI_Datatype datatype, int nprocs, int myrank, ADIOI_Access *others_req, ADIO_Offset *offset_list, - ADIO_Offset *len_list, int contig_access_count, + ADIO_Offset *len_list, int contig_access_count, ADIO_Offset min_st_offset, ADIO_Offset fd_size, ADIO_Offset *fd_start, ADIO_Offset *fd_end, @@ -28,26 +28,26 @@ static void ADIOI_Read_and_exch(ADIO_File fd, void *buf, MPI_Datatype static void ADIOI_R_Exchange_data(ADIO_File fd, void *buf, ADIOI_Flatlist_node *flat_buf, ADIO_Offset *offset_list, ADIO_Offset *len_list, int *send_size, int *recv_size, - int *count, int *start_pos, - int *partial_send, - int *recd_from_proc, int nprocs, + int *count, int *start_pos, + int *partial_send, + int *recd_from_proc, int nprocs, int myrank, int buftype_is_contig, int contig_access_count, - ADIO_Offset min_st_offset, + ADIO_Offset min_st_offset, ADIO_Offset fd_size, - ADIO_Offset *fd_start, ADIO_Offset *fd_end, - ADIOI_Access *others_req, - int iter, + ADIO_Offset *fd_start, ADIO_Offset *fd_end, + ADIOI_Access *others_req, + int iter, MPI_Aint buftype_extent, int *buf_idx); static void ADIOI_Fill_user_buffer(ADIO_File fd, void *buf, ADIOI_Flatlist_node - *flat_buf, char **recv_buf, ADIO_Offset - *offset_list, ADIO_Offset *len_list, - unsigned *recv_size, + *flat_buf, char **recv_buf, ADIO_Offset + *offset_list, ADIO_Offset *len_list, + unsigned *recv_size, MPI_Request *requests, MPI_Status *statuses, int *recd_from_proc, int nprocs, - int contig_access_count, - ADIO_Offset min_st_offset, - ADIO_Offset fd_size, ADIO_Offset *fd_start, + int contig_access_count, + ADIO_Offset min_st_offset, + ADIO_Offset fd_size, ADIO_Offset *fd_start, ADIO_Offset *fd_end, MPI_Aint buftype_extent); @@ -58,15 +58,15 @@ void ADIOI_GEN_ReadStridedColl(ADIO_File fd, void *buf, int count, *error_code) { /* Uses a generalized version of the extended two-phase method described - in "An Extended Two-Phase Method for Accessing Sections of + in "An Extended Two-Phase Method for Accessing Sections of Out-of-Core Arrays", Rajeev Thakur and Alok Choudhary, - Scientific Programming, (5)4:301--317, Winter 1996. + Scientific Programming, (5)4:301--317, Winter 1996. http://www.mcs.anl.gov/home/thakur/ext2ph.ps */ - ADIOI_Access *my_req; + ADIOI_Access *my_req; /* array of nprocs structures, one for each other process in whose file domain this process's request lies */ - + ADIOI_Access *others_req; /* array of nprocs structures, one for each other process whose request lies in this process's file domain. */ @@ -85,7 +85,7 @@ void ADIOI_GEN_ReadStridedColl(ADIO_File fd, void *buf, int count, #endif if (fd->hints->cb_pfr != ADIOI_HINT_DISABLE) { - ADIOI_IOStridedColl (fd, buf, count, ADIOI_READ, datatype, + ADIOI_IOStridedColl (fd, buf, count, ADIOI_READ, datatype, file_ptr_type, offset, status, error_code); return; } @@ -108,19 +108,19 @@ void ADIOI_GEN_ReadStridedColl(ADIO_File fd, void *buf, int count, ADIOI_Calc_my_off_len(fd, count, datatype, file_ptr_type, offset, &offset_list, &len_list, &start_offset, - &end_offset, &contig_access_count); - + &end_offset, &contig_access_count); + #ifdef RDCOLL_DEBUG for (i=0; ihints->cb_read == ADIOI_HINT_DISABLE - || (!interleave_count && (fd->hints->cb_read == ADIOI_HINT_AUTO))) + || (!interleave_count && (fd->hints->cb_read == ADIOI_HINT_AUTO))) { /* don't do aggregation */ if (fd->hints->cb_read != ADIOI_HINT_DISABLE) { @@ -187,11 +187,11 @@ void ADIOI_GEN_ReadStridedColl(ADIO_File fd, void *buf, int count, */ ADIOI_Calc_file_domains(st_offsets, end_offsets, nprocs, nprocs_for_coll, &min_st_offset, - &fd_start, &fd_end, + &fd_start, &fd_end, fd->hints->min_fdomain_size, &fd_size, fd->hints->striping_unit); - /* calculate where the portions of the access requests of this process + /* calculate where the portions of the access requests of this process * are located in terms of the file domains. this could be on the same * process or on other processes. this function fills in: * count_my_req_procs - number of processes (including this one) for which @@ -205,7 +205,7 @@ void ADIOI_GEN_ReadStridedColl(ADIO_File fd, void *buf, int count, */ ADIOI_Calc_my_req(fd, offset_list, len_list, contig_access_count, min_st_offset, fd_start, fd_end, fd_size, - nprocs, &count_my_req_procs, + nprocs, &count_my_req_procs, &count_my_req_per_proc, &my_req, &buf_idx); @@ -216,13 +216,13 @@ void ADIOI_GEN_ReadStridedColl(ADIO_File fd, void *buf, int count, * count_others_req_per_proc[] - number of separate contiguous * requests from proc i lie in this process's file domain. */ - ADIOI_Calc_others_req(fd, count_my_req_procs, - count_my_req_per_proc, my_req, - nprocs, myrank, &count_others_req_procs, - &others_req); + ADIOI_Calc_others_req(fd, count_my_req_procs, + count_my_req_per_proc, my_req, + nprocs, myrank, &count_others_req_procs, + &others_req); - /* my_req[] and count_my_req_per_proc aren't needed at this point, so - * let's free the memory + /* my_req[] and count_my_req_per_proc aren't needed at this point, so + * let's free the memory */ ADIOI_Free(count_my_req_per_proc); for (i=0; ifp_ind : + offset_list[0] = (file_ptr_type == ADIO_INDIVIDUAL) ? fd->fp_ind : fd->disp + (ADIO_Offset)etype_size * offset; len_list[0] = 0; *start_offset_ptr = offset_list[0]; *end_offset_ptr = offset_list[0] + len_list[0] - 1; - + return; } if (filetype_is_contig) { - *contig_access_count_ptr = 1; + *contig_access_count_ptr = 1; *offset_list_ptr = (ADIO_Offset *) ADIOI_Malloc(2*sizeof(ADIO_Offset)); *len_list_ptr = (ADIO_Offset *) ADIOI_Malloc(2*sizeof(ADIO_Offset)); /* 2 is for consistency. everywhere I malloc one more than needed */ offset_list = *offset_list_ptr; len_list = *len_list_ptr; - offset_list[0] = (file_ptr_type == ADIO_INDIVIDUAL) ? fd->fp_ind : + offset_list[0] = (file_ptr_type == ADIO_INDIVIDUAL) ? fd->fp_ind : fd->disp + (ADIO_Offset)etype_size * offset; len_list[0] = (ADIO_Offset)bufcount * (ADIO_Offset)buftype_size; *start_offset_ptr = offset_list[0]; @@ -350,13 +350,13 @@ void ADIOI_Calc_my_off_len(ADIO_File fd, int bufcount, MPI_Datatype else { /* First calculate what size of offset_list and len_list to allocate */ - + /* filetype already flattened in ADIO_Open or ADIO_Fcntl */ flat_file = ADIOI_Flatlist; while (flat_file->type != fd->filetype) flat_file = flat_file->next; disp = fd->disp; -#ifdef RDCOLL_DEBUG +#ifdef RDCOLL_DEBUG { int ii; DBG_FPRINTF(stderr, "flattened %3lld : ", flat_file->count ); @@ -372,7 +372,7 @@ void ADIOI_Calc_my_off_len(ADIO_File fd, int bufcount, MPI_Datatype n_filetypes = (offset - flat_file->indices[0]) / filetype_extent; offset -= (ADIO_Offset)n_filetypes * filetype_extent; /* now offset is local to this extent */ - + /* find the block where offset is located, skip blocklens[i]==0 */ for (i=0; icount; i++) { ADIO_Offset dist; @@ -398,7 +398,7 @@ void ADIOI_Calc_my_off_len(ADIO_File fd, int bufcount, MPI_Datatype n_filetypes = offset / n_etypes_in_filetype; etype_in_filetype = offset % n_etypes_in_filetype; size_in_filetype = etype_in_filetype * etype_size; - + sum = 0; for (i=0; icount; i++) { sum += flat_file->blocklens[i]; @@ -412,7 +412,7 @@ void ADIOI_Calc_my_off_len(ADIO_File fd, int bufcount, MPI_Datatype } /* abs. offset in bytes in the file */ - offset = disp + n_filetypes* (ADIO_Offset)filetype_extent + + offset = disp + n_filetypes* (ADIO_Offset)filetype_extent + abs_off_in_filetype; } @@ -433,7 +433,7 @@ void ADIOI_Calc_my_off_len(ADIO_File fd, int bufcount, MPI_Datatype /* allocate space for offset_list and len_list */ *offset_list_ptr = (ADIO_Offset *) - ADIOI_Malloc((contig_access_count+1)*sizeof(ADIO_Offset)); + ADIOI_Malloc((contig_access_count+1)*sizeof(ADIO_Offset)); *len_list_ptr = (ADIO_Offset *) ADIOI_Malloc((contig_access_count+1)*sizeof(ADIO_Offset)); /* +1 to avoid a 0-size malloc */ @@ -461,12 +461,12 @@ void ADIOI_Calc_my_off_len(ADIO_File fd, int bufcount, MPI_Datatype e.g., if start_offset=0 and 100 bytes to be read, end_offset=99*/ if (off + frd_size < disp + flat_file->indices[j] + - flat_file->blocklens[j] + + flat_file->blocklens[j] + n_filetypes* (ADIO_Offset)filetype_extent) { off += frd_size; /* did not reach end of contiguous block in filetype. - * no more I/O needed. off is incremented by frd_size. + * no more I/O needed. off is incremented by frd_size. */ } else { @@ -475,10 +475,10 @@ void ADIOI_Calc_my_off_len(ADIO_File fd, int bufcount, MPI_Datatype while (flat_file->blocklens[j]==0) { j = (j+1) % flat_file->count; n_filetypes += (j == 0) ? 1 : 0; - /* hit end of flattened filetype; start at beginning + /* hit end of flattened filetype; start at beginning * again */ } - off = disp + flat_file->indices[j] + + off = disp + flat_file->indices[j] + n_filetypes* (ADIO_Offset)filetype_extent; frd_size = ADIOI_MIN(flat_file->blocklens[j], bufsize-i_offset); } @@ -505,7 +505,7 @@ static void ADIOI_Read_and_exch(ADIO_File fd, void *buf, MPI_Datatype int *buf_idx, int *error_code) { /* Read in sizes of no more than coll_bufsize, an info parameter. - Send data to appropriate processes. + Send data to appropriate processes. Place recd. data in user buf. The idea is to reduce the amount of extra memory required for collective I/O. If all data were read all at once, which is much @@ -529,7 +529,7 @@ static void ADIOI_Read_and_exch(ADIO_File fd, void *buf, MPI_Datatype *error_code = MPI_SUCCESS; /* changed below if error */ /* only I/O errors are currently reported */ - + /* calculate the number of reads of size coll_bufsize to be done by each process and the max among all processes. That gives the no. of communication phases as well. @@ -568,11 +568,11 @@ static void ADIOI_Read_and_exch(ADIO_File fd, void *buf, MPI_Datatype ntimes = (int) ((end_loc - st_loc + coll_bufsize)/coll_bufsize); } - MPI_Allreduce(&ntimes, &max_ntimes, 1, MPI_INT, MPI_MAX, fd->comm); + MPI_Allreduce(&ntimes, &max_ntimes, 1, MPI_INT, MPI_MAX, fd->comm); read_buf = fd->io_buf; /* Allocated at open time */ - curr_offlen_ptr = (int *) ADIOI_Calloc(nprocs, sizeof(int)); + curr_offlen_ptr = (int *) ADIOI_Calloc(nprocs, sizeof(int)); /* its use is explained below. calloc initializes to 0. */ count = (int *) ADIOI_Malloc(nprocs * sizeof(int)); @@ -580,7 +580,7 @@ static void ADIOI_Read_and_exch(ADIO_File fd, void *buf, MPI_Datatype in an iteration. */ partial_send = (int *) ADIOI_Calloc(nprocs, sizeof(int)); - /* if only a portion of the last off-len pair is sent to a process + /* if only a portion of the last off-len pair is sent to a process in a particular iteration, the length sent is stored here. calloc initializes to 0. */ @@ -596,7 +596,7 @@ static void ADIOI_Read_and_exch(ADIO_File fd, void *buf, MPI_Datatype ADIOI_Fill_user_buffer. initialized to 0 here. */ start_pos = (int *) ADIOI_Malloc(nprocs*sizeof(int)); - /* used to store the starting value of curr_offlen_ptr[i] in + /* used to store the starting value of curr_offlen_ptr[i] in this iteration */ ADIOI_Datatype_iscontig(datatype, &buftype_is_contig); @@ -618,9 +618,9 @@ static void ADIOI_Read_and_exch(ADIO_File fd, void *buf, MPI_Datatype /* go through all others_req and check if any are satisfied by the current read */ - /* since MPI guarantees that displacements in filetypes are in + /* since MPI guarantees that displacements in filetypes are in monotonically nondecreasing order, I can maintain a pointer - (curr_offlen_ptr) to + (curr_offlen_ptr) to current off-len pair for each process in others_req and scan further only from there. There is still a problem of filetypes such as: (1, 2, 3 are not process nos. They are just numbers for @@ -630,7 +630,7 @@ static void ADIOI_Read_and_exch(ADIO_File fd, void *buf, MPI_Datatype 2 -----!---- 3 --!----- - where ! indicates where the current read_size limitation cuts + where ! indicates where the current read_size limitation cuts through the filetype. I resolve this by reading up to !, but filling the communication buffer only for 1. I copy the portion left over for 2 into a tmp_buf for use in the next @@ -639,19 +639,19 @@ static void ADIOI_Read_and_exch(ADIO_File fd, void *buf, MPI_Datatype other end, as only one off-len pair with incomplete data will be sent. I also don't need to send the individual offsets and lens along with the data, as the data is being - sent in a particular order. */ + sent in a particular order. */ - /* off = start offset in the file for the data actually read in - this iteration + /* off = start offset in the file for the data actually read in + this iteration size = size of data read corresponding to off real_off = off minus whatever data was retained in memory from previous iteration for cases like 2, 3 illustrated above real_size = size plus the extra corresponding to real_off - req_off = off in file for a particular contiguous request + req_off = off in file for a particular contiguous request minus what was satisfied in previous iteration req_size = size corresponding to req_off */ - size = ADIOI_MIN((unsigned)coll_bufsize, end_loc-st_loc+1-done); + size = ADIOI_MIN((unsigned)coll_bufsize, end_loc-st_loc+1-done); real_off = off - for_curr_iter; real_size = size + for_curr_iter; @@ -660,7 +660,7 @@ static void ADIOI_Read_and_exch(ADIO_File fd, void *buf, MPI_Datatype for (i=0; icomm, requests+j); j++; buf_idx[i] += recv_size[i]; @@ -825,19 +825,19 @@ static void ADIOI_R_Exchange_data(ADIO_File fd, void *buf, ADIOI_Flatlist_node else { /* allocate memory for recv_buf and post receives */ recv_buf = (char **) ADIOI_Malloc(nprocs * sizeof(char*)); - for (i=0; i < nprocs; i++) - if (recv_size[i]) recv_buf[i] = + for (i=0; i < nprocs; i++) + if (recv_size[i]) recv_buf[i] = (char *) ADIOI_Malloc(recv_size[i]); j = 0; - for (i=0; i < nprocs; i++) + for (i=0; i < nprocs; i++) if (recv_size[i]) { - MPI_Irecv(recv_buf[i], recv_size[i], MPI_BYTE, i, + MPI_Irecv(recv_buf[i], recv_size[i], MPI_BYTE, i, myrank+i+100*iter, fd->comm, requests+j); j++; #ifdef RDCOLL_DEBUG - DBG_FPRINTF(stderr, "node %d, recv_size %d, tag %d \n", - myrank, recv_size[i], myrank+i+100*iter); + DBG_FPRINTF(stderr, "node %d, recv_size %d, tag %d \n", + myrank, recv_size[i], myrank+i+100*iter); #endif } } @@ -855,7 +855,7 @@ static void ADIOI_R_Exchange_data(ADIO_File fd, void *buf, ADIOI_Flatlist_node } ADIOI_Type_create_hindexed_x(count[i], &(others_req[i].lens[start_pos[i]]), - &(others_req[i].mem_ptrs[start_pos[i]]), + &(others_req[i].mem_ptrs[start_pos[i]]), MPI_BYTE, &send_type); /* absolute displacement; use MPI_BOTTOM in send */ MPI_Type_commit(&send_type); @@ -868,7 +868,7 @@ static void ADIOI_R_Exchange_data(ADIO_File fd, void *buf, ADIOI_Flatlist_node } statuses = (MPI_Status *) ADIOI_Malloc((nprocs_send+nprocs_recv+1) * \ - sizeof(MPI_Status)); + sizeof(MPI_Status)); /* +1 to avoid a 0-size malloc */ /* wait on the receives */ @@ -881,10 +881,10 @@ static void ADIOI_R_Exchange_data(ADIO_File fd, void *buf, ADIOI_Flatlist_node #endif /* if noncontiguous, to the copies from the recv buffers */ - if (!buftype_is_contig) + if (!buftype_is_contig) ADIOI_Fill_user_buffer(fd, buf, flat_buf, recv_buf, - offset_list, len_list, (unsigned*)recv_size, - requests, statuses, recd_from_proc, + offset_list, len_list, (unsigned*)recv_size, + requests, statuses, recd_from_proc, nprocs, contig_access_count, min_st_offset, fd_size, fd_start, fd_end, buftype_extent); @@ -897,7 +897,7 @@ static void ADIOI_R_Exchange_data(ADIO_File fd, void *buf, ADIOI_Flatlist_node ADIOI_Free(requests); if (!buftype_is_contig) { - for (i=0; i < nprocs; i++) + for (i=0; i < nprocs; i++) if (recv_size[i]) ADIOI_Free(recv_buf[i]); ADIOI_Free(recv_buf); } @@ -955,14 +955,14 @@ static void ADIOI_R_Exchange_data(ADIO_File fd, void *buf, ADIOI_Flatlist_node } static void ADIOI_Fill_user_buffer(ADIO_File fd, void *buf, ADIOI_Flatlist_node - *flat_buf, char **recv_buf, ADIO_Offset - *offset_list, ADIO_Offset *len_list, - unsigned *recv_size, + *flat_buf, char **recv_buf, ADIO_Offset + *offset_list, ADIO_Offset *len_list, + unsigned *recv_size, MPI_Request *requests, MPI_Status *statuses, int *recd_from_proc, int nprocs, - int contig_access_count, - ADIO_Offset min_st_offset, - ADIO_Offset fd_size, ADIO_Offset *fd_start, + int contig_access_count, + ADIO_Offset min_st_offset, + ADIO_Offset fd_size, ADIO_Offset *fd_start, ADIO_Offset *fd_end, MPI_Aint buftype_extent) { @@ -981,9 +981,9 @@ static void ADIOI_Fill_user_buffer(ADIO_File fd, void *buf, ADIOI_Flatlist_node /* curr_from_proc[p] = amount of data recd from proc. p that has already been accounted for so far - done_from_proc[p] = amount of data already recd from proc. p and + done_from_proc[p] = amount of data already recd from proc. p and filled into user buffer in previous iterations - user_buf_idx = current location in user buffer + user_buf_idx = current location in user buffer recv_buf_idx[p] = current location in recv_buf of proc. p */ curr_from_proc = (unsigned *) ADIOI_Malloc(nprocs * sizeof(unsigned)); done_from_proc = (unsigned *) ADIOI_Malloc(nprocs * sizeof(unsigned)); @@ -1000,10 +1000,10 @@ static void ADIOI_Fill_user_buffer(ADIO_File fd, void *buf, ADIOI_Flatlist_node flat_buf_sz = flat_buf->blocklens[0]; /* flat_buf_idx = current index into flattened buftype - flat_buf_sz = size of current contiguous component in + flat_buf_sz = size of current contiguous component in flattened buf */ - for (i=0; i done_from_proc[p]) { if (done_from_proc[p] > curr_from_proc[p]) { - size = ADIOI_MIN(curr_from_proc[p] + len - + size = ADIOI_MIN(curr_from_proc[p] + len - done_from_proc[p], recv_size[p]-recv_buf_idx[p]); buf_incr = done_from_proc[p] - curr_from_proc[p]; ADIOI_BUF_INCR @@ -1057,7 +1057,7 @@ static void ADIOI_Fill_user_buffer(ADIO_File fd, void *buf, ADIOI_Flatlist_node rem_len -= len; } } - for (i=0; i < nprocs; i++) + for (i=0; i < nprocs; i++) if (recv_size[i]) recd_from_proc[i] = curr_from_proc[i]; ADIOI_Free(curr_from_proc); diff --git a/ompi/mca/io/romio314/romio/adio/common/ad_read_str.c b/ompi/mca/io/romio314/romio/adio/common/ad_read_str.c index bec361b9d83..dc2ea719adb 100644 --- a/ompi/mca/io/romio314/romio/adio/common/ad_read_str.c +++ b/ompi/mca/io/romio314/romio/adio/common/ad_read_str.c @@ -1,7 +1,7 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */ -/* +/* * - * Copyright (C) 1997 University of Chicago. + * Copyright (C) 1997 University of Chicago. * See COPYRIGHT notice in top-level directory. */ @@ -56,7 +56,7 @@ void ADIOI_GEN_ReadStrided(ADIO_File fd, void *buf, int count, ADIO_Offset n_filetypes, etype_in_filetype, st_n_filetypes, size_in_filetype; ADIO_Offset abs_off_in_filetype=0, new_frd_size, frd_size=0, st_frd_size; MPI_Count filetype_size, etype_size, buftype_size, partial_read; - MPI_Aint filetype_extent, buftype_extent; + MPI_Aint filetype_extent, buftype_extent; int buf_count, buftype_is_contig, filetype_is_contig; ADIO_Offset userbuf_off, req_len, sum; ADIO_Offset off, req_off, disp, end_offset=0, readbuf_off, start_off; @@ -69,7 +69,7 @@ void ADIOI_GEN_ReadStrided(ADIO_File fd, void *buf, int count, /* if user has disabled data sieving on reads, use naive * approach instead. */ - ADIOI_GEN_ReadStrided_naive(fd, + ADIOI_GEN_ReadStrided_naive(fd, buf, count, datatype, @@ -90,7 +90,7 @@ void ADIOI_GEN_ReadStrided(ADIO_File fd, void *buf, int count, #ifdef HAVE_STATUS_SET_BYTES MPIR_Status_set_bytes(status, datatype, 0); #endif - *error_code = MPI_SUCCESS; + *error_code = MPI_SUCCESS; return; } @@ -105,7 +105,7 @@ void ADIOI_GEN_ReadStrided(ADIO_File fd, void *buf, int count, /* get max_bufsize from the info object. */ value = (char *) ADIOI_Malloc((MPI_MAX_INFO_VAL+1)*sizeof(char)); - ADIOI_Info_get(fd->info, "ind_rd_buffer_size", MPI_MAX_INFO_VAL, value, + ADIOI_Info_get(fd->info, "ind_rd_buffer_size", MPI_MAX_INFO_VAL, value, &info_flag); max_bufsize = atoi(value); ADIOI_Free(value); @@ -119,7 +119,7 @@ void ADIOI_GEN_ReadStrided(ADIO_File fd, void *buf, int count, flat_buf = ADIOI_Flatlist; while (flat_buf->type != datatype) flat_buf = flat_buf->next; - off = (file_ptr_type == ADIO_INDIVIDUAL) ? fd->fp_ind : + off = (file_ptr_type == ADIO_INDIVIDUAL) ? fd->fp_ind : fd->disp + (ADIO_Offset)etype_size * offset; start_off = off; @@ -132,11 +132,11 @@ void ADIOI_GEN_ReadStrided(ADIO_File fd, void *buf, int count, if ((fd->atomicity) && ADIO_Feature(fd, ADIO_LOCKS)) ADIOI_WRITE_LOCK(fd, start_off, SEEK_SET, end_offset-start_off+1); - ADIO_ReadContig(fd, readbuf, readbuf_len, MPI_BYTE, + ADIO_ReadContig(fd, readbuf, readbuf_len, MPI_BYTE, ADIO_EXPLICIT_OFFSET, readbuf_off, &status1, error_code); if (*error_code != MPI_SUCCESS) return; - for (j=0; jcount; i++) { userbuf_off = (ADIO_Offset)j*(ADIO_Offset)buftype_extent + flat_buf->indices[i]; @@ -152,7 +152,7 @@ void ADIOI_GEN_ReadStrided(ADIO_File fd, void *buf, int count, if (file_ptr_type == ADIO_INDIVIDUAL) fd->fp_ind = off; - ADIOI_Free(readbuf); + ADIOI_Free(readbuf); } else { /* noncontiguous in file */ @@ -194,7 +194,7 @@ void ADIOI_GEN_ReadStrided(ADIO_File fd, void *buf, int count, n_filetypes = offset / n_etypes_in_filetype; etype_in_filetype = offset % n_etypes_in_filetype; size_in_filetype = etype_in_filetype * etype_size; - + sum = 0; for (i=0; icount; i++) { sum += flat_file->blocklens[i]; @@ -208,7 +208,7 @@ void ADIOI_GEN_ReadStrided(ADIO_File fd, void *buf, int count, } /* abs. offset in bytes in the file */ - offset = disp + (ADIO_Offset) n_filetypes*filetype_extent + + offset = disp + (ADIO_Offset) n_filetypes*filetype_extent + abs_off_in_filetype; } @@ -223,7 +223,7 @@ void ADIOI_GEN_ReadStrided(ADIO_File fd, void *buf, int count, offset, status, error_code); if (file_ptr_type == ADIO_INDIVIDUAL) { - /* update MPI-IO file pointer to point to the first byte that + /* update MPI-IO file pointer to point to the first byte that * can be accessed in the fileview. */ fd->fp_ind = offset + bufsize; if (bufsize == frd_size) { @@ -238,10 +238,10 @@ void ADIOI_GEN_ReadStrided(ADIO_File fd, void *buf, int count, + n_filetypes*filetype_extent; } } - fd->fp_sys_posn = -1; /* set it to null. */ + fd->fp_sys_posn = -1; /* set it to null. */ #ifdef HAVE_STATUS_SET_BYTES MPIR_Status_set_bytes(status, datatype, bufsize); -#endif +#endif return; } @@ -287,9 +287,9 @@ void ADIOI_GEN_ReadStrided(ADIO_File fd, void *buf, int count, n_filetypes = st_n_filetypes; frd_size = ADIOI_MIN(st_frd_size, bufsize); while (i_offset < bufsize) { - if (frd_size) { - /* TYPE_UB and TYPE_LB can result in - frd_size = 0. save system call in such cases */ + if (frd_size) { + /* TYPE_UB and TYPE_LB can result in + frd_size = 0. save system call in such cases */ /* lseek(fd->fd_sys, off, SEEK_SET); err = read(fd->fd_sys, ((char *) buf) + i, frd_size);*/ @@ -312,7 +312,7 @@ void ADIOI_GEN_ReadStrided(ADIO_File fd, void *buf, int count, j = (j+1) % flat_file->count; n_filetypes += (j == 0) ? 1 : 0; } - off = disp + flat_file->indices[j] + + off = disp + flat_file->indices[j] + n_filetypes*(ADIO_Offset)filetype_extent; frd_size = ADIOI_MIN(flat_file->blocklens[j], bufsize-i_offset); } @@ -356,7 +356,7 @@ void ADIOI_GEN_ReadStrided(ADIO_File fd, void *buf, int count, j = (j+1) % flat_file->count; n_filetypes += (j == 0) ? 1 : 0; } - off = disp + flat_file->indices[j] + + off = disp + flat_file->indices[j] + n_filetypes*(ADIO_Offset)filetype_extent; new_frd_size = flat_file->blocklens[j]; @@ -385,7 +385,7 @@ void ADIOI_GEN_ReadStrided(ADIO_File fd, void *buf, int count, brd_size = new_brd_size; } } - + if ((fd->atomicity) && ADIO_Feature(fd, ADIO_LOCKS)) ADIOI_UNLOCK(fd, start_off, SEEK_SET, end_offset-start_off+1); @@ -398,8 +398,8 @@ void ADIOI_GEN_ReadStrided(ADIO_File fd, void *buf, int count, #ifdef HAVE_STATUS_SET_BYTES MPIR_Status_set_bytes(status, datatype, bufsize); -/* This is a temporary way of filling in status. The right way is to - keep track of how much data was actually read and placed in buf +/* This is a temporary way of filling in status. The right way is to + keep track of how much data was actually read and placed in buf by ADIOI_BUFFERED_READ. */ #endif diff --git a/ompi/mca/io/romio314/romio/adio/common/ad_read_str_naive.c b/ompi/mca/io/romio314/romio/adio/common/ad_read_str_naive.c index d616bd883a1..6ecebda4305 100644 --- a/ompi/mca/io/romio314/romio/adio/common/ad_read_str_naive.c +++ b/ompi/mca/io/romio314/romio/adio/common/ad_read_str_naive.c @@ -1,7 +1,7 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */ -/* +/* * - * Copyright (C) 2001 University of Chicago. + * Copyright (C) 2001 University of Chicago. * See COPYRIGHT notice in top-level directory. */ @@ -23,7 +23,7 @@ void ADIOI_GEN_ReadStrided_naive(ADIO_File fd, void *buf, int count, ADIO_Offset abs_off_in_filetype=0; MPI_Count bufsize, filetype_size, buftype_size, size_in_filetype; ADIO_Offset etype_size; - MPI_Aint filetype_extent, buftype_extent; + MPI_Aint filetype_extent, buftype_extent; int buf_count, buftype_is_contig, filetype_is_contig; ADIO_Offset userbuf_off; ADIO_Offset off, req_off, disp, end_offset=0, start_off; @@ -39,7 +39,7 @@ void ADIOI_GEN_ReadStrided_naive(ADIO_File fd, void *buf, int count, #ifdef HAVE_STATUS_SET_BYTES MPIR_Status_set_bytes(status, buftype, 0); #endif - *error_code = MPI_SUCCESS; + *error_code = MPI_SUCCESS; return; } @@ -61,7 +61,7 @@ void ADIOI_GEN_ReadStrided_naive(ADIO_File fd, void *buf, int count, flat_buf = ADIOI_Flatlist; while (flat_buf->type != buftype) flat_buf = flat_buf->next; - off = (file_ptr_type == ADIO_INDIVIDUAL) ? fd->fp_ind : + off = (file_ptr_type == ADIO_INDIVIDUAL) ? fd->fp_ind : fd->disp + etype_size * offset; start_off = off; @@ -78,17 +78,17 @@ void ADIOI_GEN_ReadStrided_naive(ADIO_File fd, void *buf, int count, */ for (b_count=0; b_count < count; b_count++) { for (b_index=0; b_index < flat_buf->count; b_index++) { - userbuf_off = (ADIO_Offset)b_count*(ADIO_Offset)buftype_extent + + userbuf_off = (ADIO_Offset)b_count*(ADIO_Offset)buftype_extent + flat_buf->indices[b_index]; req_off = off; req_len = flat_buf->blocklens[b_index]; ADIOI_Assert((((ADIO_Offset)(MPIR_Upint)buf) + userbuf_off) == (ADIO_Offset)(MPIR_Upint)((MPIR_Upint)buf + userbuf_off)); ADIOI_Assert(req_len == (int) req_len); - ADIO_ReadContig(fd, + ADIO_ReadContig(fd, (char *) buf + userbuf_off, - req_len, - MPI_BYTE, + req_len, + MPI_BYTE, ADIO_EXPLICIT_OFFSET, req_off, &status1, @@ -110,7 +110,7 @@ void ADIOI_GEN_ReadStrided_naive(ADIO_File fd, void *buf, int count, } else { /* noncontiguous in file */ - int f_index, st_index = 0; + int f_index, st_index = 0; ADIO_Offset st_n_filetypes; ADIO_Offset st_frd_size; int flag; @@ -141,15 +141,15 @@ void ADIOI_GEN_ReadStrided_naive(ADIO_File fd, void *buf, int count, while (!flag) { n_filetypes++; for (f_index=0; f_index < flat_file->count; f_index++) { - if (disp + flat_file->indices[f_index] + - n_filetypes*(ADIO_Offset)filetype_extent + - flat_file->blocklens[f_index] >= start_off) + if (disp + flat_file->indices[f_index] + + n_filetypes*(ADIO_Offset)filetype_extent + + flat_file->blocklens[f_index] >= start_off) { /* this block contains our starting position */ st_index = f_index; - frd_size = disp + flat_file->indices[f_index] + - n_filetypes*(ADIO_Offset)filetype_extent + + frd_size = disp + flat_file->indices[f_index] + + n_filetypes*(ADIO_Offset)filetype_extent + flat_file->blocklens[f_index] - start_off; flag = 1; break; @@ -162,7 +162,7 @@ void ADIOI_GEN_ReadStrided_naive(ADIO_File fd, void *buf, int count, n_filetypes = offset / n_etypes_in_filetype; etype_in_filetype = (int) (offset % n_etypes_in_filetype); size_in_filetype = (unsigned)etype_in_filetype * (unsigned)etype_size; - + sum = 0; for (f_index=0; f_index < flat_file->count; f_index++) { sum += flat_file->blocklens[f_index]; @@ -170,21 +170,21 @@ void ADIOI_GEN_ReadStrided_naive(ADIO_File fd, void *buf, int count, st_index = f_index; frd_size = sum - size_in_filetype; abs_off_in_filetype = flat_file->indices[f_index] + - size_in_filetype - + size_in_filetype - (sum - flat_file->blocklens[f_index]); break; } } /* abs. offset in bytes in the file */ - start_off = disp + n_filetypes*(ADIO_Offset)filetype_extent + + start_off = disp + n_filetypes*(ADIO_Offset)filetype_extent + abs_off_in_filetype; } st_frd_size = frd_size; st_n_filetypes = n_filetypes; - /* start_off, st_n_filetypes, st_index, and st_frd_size are + /* start_off, st_n_filetypes, st_index, and st_frd_size are * all calculated at this point */ @@ -205,9 +205,9 @@ void ADIOI_GEN_ReadStrided_naive(ADIO_File fd, void *buf, int count, n_filetypes++; } - off = disp + flat_file->indices[f_index] + + off = disp + flat_file->indices[f_index] + n_filetypes*(ADIO_Offset)filetype_extent; - frd_size = ADIOI_MIN(flat_file->blocklens[f_index], + frd_size = ADIOI_MIN(flat_file->blocklens[f_index], bufsize-(unsigned)userbuf_off); } @@ -239,18 +239,18 @@ void ADIOI_GEN_ReadStrided_naive(ADIO_File fd, void *buf, int count, /* while there is still space in the buffer, read more data */ while (userbuf_off < bufsize) { - if (frd_size) { - /* TYPE_UB and TYPE_LB can result in - frd_size = 0. save system call in such cases */ + if (frd_size) { + /* TYPE_UB and TYPE_LB can result in + frd_size = 0. save system call in such cases */ req_off = off; req_len = frd_size; ADIOI_Assert((((ADIO_Offset)(MPIR_Upint)buf) + userbuf_off) == (ADIO_Offset)(MPIR_Upint)((MPIR_Upint)buf + userbuf_off)); ADIOI_Assert(req_len == (int) req_len); - ADIO_ReadContig(fd, + ADIO_ReadContig(fd, (char *) buf + userbuf_off, - req_len, - MPI_BYTE, + req_len, + MPI_BYTE, ADIO_EXPLICIT_OFFSET, req_off, &status1, @@ -260,7 +260,7 @@ void ADIOI_GEN_ReadStrided_naive(ADIO_File fd, void *buf, int count, userbuf_off += frd_size; if (off + frd_size < disp + flat_file->indices[f_index] + - flat_file->blocklens[f_index] + + flat_file->blocklens[f_index] + n_filetypes*(ADIO_Offset)filetype_extent) { /* important that this value be correct, as it is @@ -278,9 +278,9 @@ void ADIOI_GEN_ReadStrided_naive(ADIO_File fd, void *buf, int count, f_index = 0; n_filetypes++; } - off = disp + flat_file->indices[f_index] + + off = disp + flat_file->indices[f_index] + n_filetypes*(ADIO_Offset)filetype_extent; - frd_size = ADIOI_MIN(flat_file->blocklens[f_index], + frd_size = ADIOI_MIN(flat_file->blocklens[f_index], bufsize-(unsigned)userbuf_off); } } @@ -313,10 +313,10 @@ void ADIOI_GEN_ReadStrided_naive(ADIO_File fd, void *buf, int count, ADIOI_Assert((((ADIO_Offset)(MPIR_Upint)buf) + userbuf_off) == (ADIO_Offset)(MPIR_Upint)((MPIR_Upint)buf + userbuf_off)); ADIOI_Assert(req_len == (int) req_len); - ADIO_ReadContig(fd, + ADIO_ReadContig(fd, (char *) buf + userbuf_off, - req_len, - MPI_BYTE, + req_len, + MPI_BYTE, ADIO_EXPLICIT_OFFSET, req_off, &status1, @@ -332,7 +332,7 @@ void ADIOI_GEN_ReadStrided_naive(ADIO_File fd, void *buf, int count, n_filetypes++; } - off = disp + flat_file->indices[f_index] + + off = disp + flat_file->indices[f_index] + n_filetypes*(ADIO_Offset)filetype_extent; new_frd_size = flat_file->blocklens[f_index]; @@ -362,7 +362,7 @@ void ADIOI_GEN_ReadStrided_naive(ADIO_File fd, void *buf, int count, } /* unlock the file region if we locked it */ - if ((fd->atomicity) && (fd->file_system != ADIO_PIOFS) && + if ((fd->atomicity) && (fd->file_system != ADIO_PIOFS) && (fd->file_system != ADIO_PVFS) && (fd->file_system != ADIO_PVFS2)) { ADIOI_UNLOCK(fd, start_off, SEEK_SET, end_offset-start_off+1); @@ -375,8 +375,8 @@ void ADIOI_GEN_ReadStrided_naive(ADIO_File fd, void *buf, int count, #ifdef HAVE_STATUS_SET_BYTES MPIR_Status_set_bytes(status, buftype, bufsize); - /* This is a temporary way of filling in status. The right way is to - * keep track of how much data was actually read and placed in buf + /* This is a temporary way of filling in status. The right way is to + * keep track of how much data was actually read and placed in buf */ #endif diff --git a/ompi/mca/io/romio314/romio/adio/common/ad_resize.c b/ompi/mca/io/romio314/romio/adio/common/ad_resize.c index a6be2c17608..0f166cbc2f4 100644 --- a/ompi/mca/io/romio314/romio/adio/common/ad_resize.c +++ b/ompi/mca/io/romio314/romio/adio/common/ad_resize.c @@ -1,7 +1,7 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */ -/* +/* * - * Copyright (C) 2004 University of Chicago. + * Copyright (C) 2004 University of Chicago. * See COPYRIGHT notice in top-level directory. */ @@ -20,7 +20,7 @@ void ADIOI_GEN_Resize(ADIO_File fd, ADIO_Offset size, int *error_code) /* first aggregator performs ftruncate() */ if (rank == fd->hints->ranklist[0]) { - ADIOI_Assert(size == (off_t) size); + ADIOI_Assert(size == (off_t) size); err = ftruncate(fd->fd_sys, (off_t)size); } diff --git a/ompi/mca/io/romio314/romio/adio/common/ad_seek.c b/ompi/mca/io/romio314/romio/adio/common/ad_seek.c index ceaf6f2d62d..b987fe6d023 100644 --- a/ompi/mca/io/romio314/romio/adio/common/ad_seek.c +++ b/ompi/mca/io/romio314/romio/adio/common/ad_seek.c @@ -1,6 +1,6 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */ -/* - * Copyright (C) 1997 University of Chicago. +/* + * Copyright (C) 1997 University of Chicago. * See COPYRIGHT notice in top-level directory. */ @@ -11,11 +11,11 @@ #include #endif -ADIO_Offset ADIOI_GEN_SeekIndividual(ADIO_File fd, ADIO_Offset offset, +ADIO_Offset ADIOI_GEN_SeekIndividual(ADIO_File fd, ADIO_Offset offset, int whence, int *error_code) { /* implemented for whence=SEEK_SET only. SEEK_CUR and SEEK_END must - be converted to the equivalent with SEEK_SET before calling this + be converted to the equivalent with SEEK_SET before calling this routine. */ /* offset is in units of etype relative to the filetype */ @@ -46,7 +46,7 @@ ADIO_Offset ADIOI_GEN_SeekIndividual(ADIO_File fd, ADIO_Offset offset, /* Since offset relative to the filetype size, we can't do compute the offset when that result is zero. Return zero for the offset for now */ - *error_code = MPI_SUCCESS; + *error_code = MPI_SUCCESS; return 0; } @@ -54,7 +54,7 @@ ADIO_Offset ADIOI_GEN_SeekIndividual(ADIO_File fd, ADIO_Offset offset, n_filetypes = offset / n_etypes_in_filetype; etype_in_filetype = offset % n_etypes_in_filetype; size_in_filetype = etype_in_filetype * etype_size; - + sum = 0; for (i=0; icount; i++) { sum += flat_file->blocklens[i]; @@ -73,7 +73,7 @@ ADIO_Offset ADIOI_GEN_SeekIndividual(ADIO_File fd, ADIO_Offset offset, /* * we used to call lseek here and update both fp_ind and fp_sys_posn, but now * we don't seek and only update fp_ind (ROMIO's idea of where we are in the - * file). We leave the system file descriptor and fp_sys_posn alone. + * file). We leave the system file descriptor and fp_sys_posn alone. * The fs-specifc ReadContig and WriteContig will seek to the correct place in * the file before reading/writing if the 'offset' parameter doesn't match * fp_sys_posn diff --git a/ompi/mca/io/romio314/romio/adio/common/ad_set_sh_fp.c b/ompi/mca/io/romio314/romio/adio/common/ad_set_sh_fp.c index c55f2efe5a9..b936d205513 100644 --- a/ompi/mca/io/romio314/romio/adio/common/ad_set_sh_fp.c +++ b/ompi/mca/io/romio314/romio/adio/common/ad_set_sh_fp.c @@ -1,12 +1,12 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */ -/* - * Copyright (C) 1997 University of Chicago. +/* + * Copyright (C) 1997 University of Chicago. * See COPYRIGHT notice in top-level directory. */ #include "adio.h" -/* set the shared file pointer to "offset" etypes relative to the current +/* set the shared file pointer to "offset" etypes relative to the current view */ void ADIOI_NFS_Set_shared_fp(ADIO_File fd, ADIO_Offset offset, int *error_code); @@ -25,20 +25,20 @@ void ADIO_Set_shared_fp(ADIO_File fd, ADIO_Offset offset, int *error_code) if (fd->shared_fp_fd == ADIO_FILE_NULL) { MPI_Comm_dup(MPI_COMM_SELF, &dupcommself); - fd->shared_fp_fd = ADIO_Open(MPI_COMM_SELF, dupcommself, - fd->shared_fp_fname, + fd->shared_fp_fd = ADIO_Open(MPI_COMM_SELF, dupcommself, + fd->shared_fp_fname, fd->file_system, fd->fns, - ADIO_CREATE | ADIO_RDWR | ADIO_DELETE_ON_CLOSE, + ADIO_CREATE | ADIO_RDWR | ADIO_DELETE_ON_CLOSE, 0, MPI_BYTE, MPI_BYTE, - MPI_INFO_NULL, + MPI_INFO_NULL, ADIO_PERM_NULL, error_code); } if (*error_code != MPI_SUCCESS) return; ADIOI_WRITE_LOCK(fd->shared_fp_fd, 0, SEEK_SET, sizeof(ADIO_Offset)); - ADIO_WriteContig(fd->shared_fp_fd, &offset, sizeof(ADIO_Offset), + ADIO_WriteContig(fd->shared_fp_fd, &offset, sizeof(ADIO_Offset), MPI_BYTE, ADIO_EXPLICIT_OFFSET, 0, &status, error_code); ADIOI_UNLOCK(fd->shared_fp_fd, 0, SEEK_SET, sizeof(ADIO_Offset)); } diff --git a/ompi/mca/io/romio314/romio/adio/common/ad_set_view.c b/ompi/mca/io/romio314/romio/adio/common/ad_set_view.c index 31aa6c6dd50..2b8ef46b2d1 100644 --- a/ompi/mca/io/romio314/romio/adio/common/ad_set_view.c +++ b/ompi/mca/io/romio314/romio/adio/common/ad_set_view.c @@ -1,7 +1,7 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */ -/* +/* * - * Copyright (C) 1997 University of Chicago. + * Copyright (C) 1997 University of Chicago. * See COPYRIGHT notice in top-level directory. */ @@ -12,13 +12,13 @@ * deferred open easier if we know ADIO_Fcntl will always need a file to really * be open. set_view doesn't modify anything related to the open files. */ -void ADIO_Set_view(ADIO_File fd, ADIO_Offset disp, MPI_Datatype etype, - MPI_Datatype filetype, MPI_Info info, int *error_code) +void ADIO_Set_view(ADIO_File fd, ADIO_Offset disp, MPI_Datatype etype, + MPI_Datatype filetype, MPI_Info info, int *error_code) { int combiner, i, j, k, err, filetype_is_contig; MPI_Datatype copy_etype, copy_filetype; ADIOI_Flatlist_node *flat_file; - /* free copies of old etypes and filetypes and delete flattened + /* free copies of old etypes and filetypes and delete flattened version of filetype if necessary */ MPI_Type_get_envelope(fd->etype, &i, &j, &k, &combiner); @@ -43,7 +43,7 @@ void ADIO_Set_view(ADIO_File fd, ADIO_Offset disp, MPI_Datatype etype, fd->etype = copy_etype; } MPI_Type_get_envelope(filetype, &i, &j, &k, &combiner); - if (combiner == MPI_COMBINER_NAMED) + if (combiner == MPI_COMBINER_NAMED) fd->filetype = filetype; else { MPI_Type_contiguous(1, filetype, ©_filetype); @@ -64,7 +64,7 @@ void ADIO_Set_view(ADIO_File fd, ADIO_Offset disp, MPI_Datatype etype, if (filetype_is_contig) fd->fp_ind = disp; else { flat_file = ADIOI_Flatlist; - while (flat_file->type != fd->filetype) + while (flat_file->type != fd->filetype) flat_file = flat_file->next; for (i=0; icount; i++) { if (flat_file->blocklens[i]) { diff --git a/ompi/mca/io/romio314/romio/adio/common/ad_subarray.c b/ompi/mca/io/romio314/romio/adio/common/ad_subarray.c index c4597646ad6..e7984ac3814 100644 --- a/ompi/mca/io/romio314/romio/adio/common/ad_subarray.c +++ b/ompi/mca/io/romio314/romio/adio/common/ad_subarray.c @@ -1,7 +1,7 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */ -/* +/* * - * Copyright (C) 1997 University of Chicago. + * Copyright (C) 1997 University of Chicago. * See COPYRIGHT notice in top-level directory. */ @@ -9,11 +9,11 @@ #include "adio_extern.h" int ADIO_Type_create_subarray(int ndims, - int *array_of_sizes, + int *array_of_sizes, int *array_of_subsizes, int *array_of_starts, int order, - MPI_Datatype oldtype, + MPI_Datatype oldtype, MPI_Datatype *newtype) { MPI_Aint extent, disps[3], size; @@ -31,7 +31,7 @@ int ADIO_Type_create_subarray(int ndims, MPI_Type_vector(array_of_subsizes[1], array_of_subsizes[0], array_of_sizes[0], oldtype, &tmp1); - + size = (MPI_Aint)array_of_sizes[0]*extent; for (i=2; i=0; i--) { size *= (MPI_Aint)array_of_sizes[i+1]; @@ -69,7 +69,7 @@ int ADIO_Type_create_subarray(int ndims, tmp1 = tmp2; } } - + /* add displacement and UB */ disps[1] = array_of_starts[ndims-1]; size = 1; @@ -78,18 +78,18 @@ int ADIO_Type_create_subarray(int ndims, disps[1] += size*(MPI_Aint)array_of_starts[i]; } } - + disps[1] *= extent; - + disps[2] = extent; for (i=0; i #endif -/* Workaround for incomplete set of definitions if __REDIRECT is not +/* Workaround for incomplete set of definitions if __REDIRECT is not defined and large file support is used in aio.h */ #if !defined(__REDIRECT) && defined(__USE_FILE_OFFSET64) #define aiocb aiocb64 @@ -40,8 +40,8 @@ * to complete. */ void ADIOI_GEN_IOComplete(ADIO_Request *request, ADIO_Status *status, - int *error_code) + int *error_code) { return; - + } diff --git a/ompi/mca/io/romio314/romio/adio/common/ad_wait_fake.c b/ompi/mca/io/romio314/romio/adio/common/ad_wait_fake.c index 16947baa1fb..72905080c90 100644 --- a/ompi/mca/io/romio314/romio/adio/common/ad_wait_fake.c +++ b/ompi/mca/io/romio314/romio/adio/common/ad_wait_fake.c @@ -1,7 +1,7 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */ -/* +/* * - * Copyright (C) 2004 University of Chicago. + * Copyright (C) 2004 University of Chicago. * See COPYRIGHT notice in top-level directory. */ diff --git a/ompi/mca/io/romio314/romio/adio/common/ad_write.c b/ompi/mca/io/romio314/romio/adio/common/ad_write.c index 34a31d963f7..2e8692d64b4 100644 --- a/ompi/mca/io/romio314/romio/adio/common/ad_write.c +++ b/ompi/mca/io/romio314/romio/adio/common/ad_write.c @@ -1,7 +1,7 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */ -/* +/* * - * Copyright (C) 2004 University of Chicago. + * Copyright (C) 2004 University of Chicago. * See COPYRIGHT notice in top-level directory. */ @@ -95,7 +95,7 @@ void ADIOI_GEN_WriteContig(ADIO_File fd, const void *buf, int count, fd->fp_sys_posn = offset + bytes_xfered; if (file_ptr_type == ADIO_INDIVIDUAL) { - fd->fp_ind += bytes_xfered; + fd->fp_ind += bytes_xfered; } #ifdef ROMIO_GPFS diff --git a/ompi/mca/io/romio314/romio/adio/common/ad_write_coll.c b/ompi/mca/io/romio314/romio/adio/common/ad_write_coll.c index d585f9e09fa..266f97d6246 100644 --- a/ompi/mca/io/romio314/romio/adio/common/ad_write_coll.c +++ b/ompi/mca/io/romio314/romio/adio/common/ad_write_coll.c @@ -1,7 +1,7 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */ -/* +/* * - * Copyright (C) 1997 University of Chicago. + * Copyright (C) 1997 University of Chicago. * See COPYRIGHT notice in top-level directory. */ @@ -22,31 +22,31 @@ static void ADIOI_Exch_and_write(ADIO_File fd, void *buf, MPI_Datatype ADIO_Offset *fd_start, ADIO_Offset *fd_end, int *buf_idx, int *error_code); static void ADIOI_W_Exchange_data(ADIO_File fd, void *buf, char *write_buf, - ADIOI_Flatlist_node *flat_buf, ADIO_Offset - *offset_list, ADIO_Offset *len_list, int *send_size, + ADIOI_Flatlist_node *flat_buf, ADIO_Offset + *offset_list, ADIO_Offset *len_list, int *send_size, int *recv_size, ADIO_Offset off, int size, - int *count, int *start_pos, int *partial_recv, - int *sent_to_proc, int nprocs, + int *count, int *start_pos, int *partial_recv, + int *sent_to_proc, int nprocs, int myrank, int buftype_is_contig, int contig_access_count, ADIO_Offset min_st_offset, ADIO_Offset fd_size, - ADIO_Offset *fd_start, ADIO_Offset *fd_end, - ADIOI_Access *others_req, + ADIO_Offset *fd_start, ADIO_Offset *fd_end, + ADIOI_Access *others_req, int *send_buf_idx, int *curr_to_proc, - int *done_to_proc, int *hole, int iter, + int *done_to_proc, int *hole, int iter, MPI_Aint buftype_extent, int *buf_idx, int *error_code); static void ADIOI_Fill_send_buffer(ADIO_File fd, void *buf, ADIOI_Flatlist_node - *flat_buf, char **send_buf, ADIO_Offset - *offset_list, ADIO_Offset *len_list, int *send_size, - MPI_Request *requests, int *sent_to_proc, - int nprocs, int myrank, + *flat_buf, char **send_buf, ADIO_Offset + *offset_list, ADIO_Offset *len_list, int *send_size, + MPI_Request *requests, int *sent_to_proc, + int nprocs, int myrank, int contig_access_count, ADIO_Offset min_st_offset, ADIO_Offset fd_size, - ADIO_Offset *fd_start, ADIO_Offset *fd_end, - int *send_buf_idx, int *curr_to_proc, - int *done_to_proc, int iter, + ADIO_Offset *fd_start, ADIO_Offset *fd_end, + int *send_buf_idx, int *curr_to_proc, + int *done_to_proc, int iter, MPI_Aint buftype_extent); -void ADIOI_Heap_merge(ADIOI_Access *others_req, int *count, +void ADIOI_Heap_merge(ADIOI_Access *others_req, int *count, ADIO_Offset *srt_off, int *srt_len, int *start_pos, int nprocs, int nprocs_recv, int total_elements); @@ -57,15 +57,15 @@ void ADIOI_GEN_WriteStridedColl(ADIO_File fd, const void *buf, int count, *error_code) { /* Uses a generalized version of the extended two-phase method described - in "An Extended Two-Phase Method for Accessing Sections of + in "An Extended Two-Phase Method for Accessing Sections of Out-of-Core Arrays", Rajeev Thakur and Alok Choudhary, - Scientific Programming, (5)4:301--317, Winter 1996. + Scientific Programming, (5)4:301--317, Winter 1996. http://www.mcs.anl.gov/home/thakur/ext2ph.ps */ - ADIOI_Access *my_req; + ADIOI_Access *my_req; /* array of nprocs access structures, one for each other process in whose file domain this process's request lies */ - + ADIOI_Access *others_req; /* array of nprocs access structures, one for each other process whose request lies in this process's file domain. */ @@ -80,7 +80,7 @@ void ADIOI_GEN_WriteStridedColl(ADIO_File fd, const void *buf, int count, ADIO_Offset *len_list = NULL; int old_error, tmp_error; - if (fd->hints->cb_pfr != ADIOI_HINT_DISABLE) { + if (fd->hints->cb_pfr != ADIOI_HINT_DISABLE) { /* Cast away const'ness as the below function is used for read * and write */ ADIOI_IOStridedColl (fd, (char *) buf, count, ADIOI_WRITE, datatype, @@ -107,12 +107,12 @@ void ADIOI_GEN_WriteStridedColl(ADIO_File fd, const void *buf, int count, ADIOI_Calc_my_off_len(fd, count, datatype, file_ptr_type, offset, &offset_list, &len_list, &start_offset, - &end_offset, &contig_access_count); + &end_offset, &contig_access_count); - /* each process communicates its start and end offsets to other + /* each process communicates its start and end offsets to other processes. The result is an array each of start and end offsets stored - in order of process rank. */ - + in order of process rank. */ + st_offsets = (ADIO_Offset *) ADIOI_Malloc(nprocs*sizeof(ADIO_Offset)); end_offsets = (ADIO_Offset *) ADIOI_Malloc(nprocs*sizeof(ADIO_Offset)); @@ -123,7 +123,7 @@ void ADIOI_GEN_WriteStridedColl(ADIO_File fd, const void *buf, int count, /* are the accesses of different processes interleaved? */ for (i=1; ihints->min_fdomain_size, &fd_size, - fd->hints->striping_unit); + fd->hints->striping_unit); /* calculate what portions of the access requests of this process are @@ -178,22 +178,22 @@ void ADIOI_GEN_WriteStridedColl(ADIO_File fd, const void *buf, int count, ADIOI_Calc_my_req(fd, offset_list, len_list, contig_access_count, min_st_offset, fd_start, fd_end, fd_size, - nprocs, &count_my_req_procs, + nprocs, &count_my_req_procs, &count_my_req_per_proc, &my_req, - &buf_idx); + &buf_idx); /* based on everyone's my_req, calculate what requests of other processes lie in this process's file domain. count_others_req_procs = number of processes whose requests lie in - this process's file domain (including this process itself) + this process's file domain (including this process itself) count_others_req_per_proc[i] indicates how many separate contiguous requests of proc. i lie in this process's file domain. */ - ADIOI_Calc_others_req(fd, count_my_req_procs, - count_my_req_per_proc, my_req, + ADIOI_Calc_others_req(fd, count_my_req_procs, + count_my_req_per_proc, my_req, nprocs, myrank, - &count_others_req_procs, &others_req); - + &count_others_req_procs, &others_req); + ADIOI_Free(count_my_req_per_proc); for (i=0; i < nprocs; i++) { if (my_req[i].count) { @@ -214,7 +214,7 @@ void ADIOI_GEN_WriteStridedColl(ADIO_File fd, const void *buf, int count, * it's possible to have those subsequent writes on other processes * race ahead and sneak in before the read-modify-write completes. * We carry out a collective communication at the end here so no one - * can start independent i/o before collective I/O completes. + * can start independent i/o before collective I/O completes. * * need to do some gymnastics with the error codes so that if something * went wrong, all processes report error, but if a process has a more @@ -229,12 +229,12 @@ void ADIOI_GEN_WriteStridedColl(ADIO_File fd, const void *buf, int count, #ifdef ADIOI_MPE_LOGGING MPE_Log_event( ADIOI_MPE_postwrite_a, 0, NULL ); #endif - if (fd->hints->cb_nodes == 1) - MPI_Bcast(error_code, 1, MPI_INT, + if (fd->hints->cb_nodes == 1) + MPI_Bcast(error_code, 1, MPI_INT, fd->hints->ranklist[0], fd->comm); else { tmp_error = *error_code; - MPI_Allreduce(&tmp_error, error_code, 1, MPI_INT, + MPI_Allreduce(&tmp_error, error_code, 1, MPI_INT, MPI_MAX, fd->comm); } #ifdef ADIOI_MPE_LOGGING @@ -277,7 +277,7 @@ void ADIOI_GEN_WriteStridedColl(ADIO_File fd, const void *buf, int count, bufsize = size * count; MPIR_Status_set_bytes(status, datatype, bufsize); } -/* This is a temporary way of filling in status. The right way is to +/* This is a temporary way of filling in status. The right way is to keep track of how much data was actually written during collective I/O. */ #endif @@ -293,7 +293,7 @@ void ADIOI_GEN_WriteStridedColl(ADIO_File fd, const void *buf, int count, * code is created and returned in error_code. */ static void ADIOI_Exch_and_write(ADIO_File fd, void *buf, MPI_Datatype - datatype, int nprocs, + datatype, int nprocs, int myrank, ADIOI_Access *others_req, ADIO_Offset *offset_list, @@ -303,7 +303,7 @@ static void ADIOI_Exch_and_write(ADIO_File fd, void *buf, MPI_Datatype int *buf_idx, int *error_code) { /* Send data to appropriate processes and write in sizes of no more - than coll_bufsize. + than coll_bufsize. The idea is to reduce the amount of extra memory required for collective I/O. If all data were written all at once, which is much easier, it would require temp space more than the size of user_buf, @@ -334,7 +334,7 @@ static void ADIOI_Exch_and_write(ADIO_File fd, void *buf, MPI_Datatype That gives the no. of communication phases as well. */ value = (char *) ADIOI_Malloc((MPI_MAX_INFO_VAL+1)*sizeof(char)); - ADIOI_Info_get(fd->info, "cb_buffer_size", MPI_MAX_INFO_VAL, value, + ADIOI_Info_get(fd->info, "cb_buffer_size", MPI_MAX_INFO_VAL, value, &info_flag); coll_bufsize = atoi(value); ADIOI_Free(value); @@ -364,11 +364,11 @@ static void ADIOI_Exch_and_write(ADIO_File fd, void *buf, MPI_Datatype } MPI_Allreduce(&ntimes, &max_ntimes, 1, MPI_INT, MPI_MAX, - fd->comm); + fd->comm); write_buf = fd->io_buf; - curr_offlen_ptr = (int *) ADIOI_Calloc(nprocs, sizeof(int)); + curr_offlen_ptr = (int *) ADIOI_Calloc(nprocs, sizeof(int)); /* its use is explained below. calloc initializes to 0. */ count = (int *) ADIOI_Malloc(nprocs*sizeof(int)); @@ -397,7 +397,7 @@ static void ADIOI_Exch_and_write(ADIO_File fd, void *buf, MPI_Datatype /* Above three are used in ADIOI_Fill_send_buffer*/ start_pos = (int *) ADIOI_Malloc(nprocs*sizeof(int)); - /* used to store the starting value of curr_offlen_ptr[i] in + /* used to store the starting value of curr_offlen_ptr[i] in this iteration */ ADIOI_Datatype_iscontig(datatype, &buftype_is_contig); @@ -427,15 +427,15 @@ static void ADIOI_Exch_and_write(ADIO_File fd, void *buf, MPI_Datatype /* go through all others_req and check which will be satisfied by the current write */ - /* Note that MPI guarantees that displacements in filetypes are in + /* Note that MPI guarantees that displacements in filetypes are in monotonically nondecreasing order and that, for writes, the filetypes cannot specify overlapping regions in the file. This simplifies implementation a bit compared to reads. */ - /* off = start offset in the file for the data to be written in - this iteration + /* off = start offset in the file for the data to be written in + this iteration size = size of data written (bytes) corresponding to off - req_off = off in file for a particular contiguous request + req_off = off in file for a particular contiguous request minus what was satisfied in previous iteration req_size = size corresponding to req_off */ @@ -443,7 +443,7 @@ static void ADIOI_Exch_and_write(ADIO_File fd, void *buf, MPI_Datatype for (i=0; i < nprocs; i++) count[i] = recv_size[i] = 0; - size = ADIOI_MIN((unsigned)coll_bufsize, end_loc-st_loc+1-done); + size = ADIOI_MIN((unsigned)coll_bufsize, end_loc-st_loc+1-done); for (i=0; i < nprocs; i++) { if (others_req[i].count) { @@ -453,7 +453,7 @@ static void ADIOI_Exch_and_write(ADIO_File fd, void *buf, MPI_Datatype /* this request may have been partially satisfied in the previous iteration. */ req_off = others_req[i].offsets[j] + - partial_recv[i]; + partial_recv[i]; req_len = others_req[i].lens[j] - partial_recv[i]; partial_recv[i] = 0; @@ -468,10 +468,10 @@ static void ADIOI_Exch_and_write(ADIO_File fd, void *buf, MPI_Datatype if (req_off < off + size) { count[i]++; ADIOI_Assert((((ADIO_Offset)(MPIR_Upint)write_buf)+req_off-off) == (ADIO_Offset)(MPIR_Upint)(write_buf+req_off-off)); - MPI_Address(write_buf+req_off-off, + MPI_Address(write_buf+req_off-off, &(others_req[i].mem_ptrs[j])); ADIOI_Assert((off + size - req_off) == (int)(off + size - req_off)); - recv_size[i] += (int)(ADIOI_MIN(off + size - req_off, + recv_size[i] += (int)(ADIOI_MIN(off + size - req_off, (unsigned)req_len)); if (off+size-req_off < (unsigned)req_len) @@ -479,9 +479,9 @@ static void ADIOI_Exch_and_write(ADIO_File fd, void *buf, MPI_Datatype partial_recv[i] = (int) (off + size - req_off); /* --BEGIN ERROR HANDLING-- */ - if ((j+1 < others_req[i].count) && + if ((j+1 < others_req[i].count) && (others_req[i].offsets[j+1] < off+size)) - { + { *error_code = MPIO_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE, myname, @@ -501,16 +501,16 @@ static void ADIOI_Exch_and_write(ADIO_File fd, void *buf, MPI_Datatype curr_offlen_ptr[i] = j; } } - - ADIOI_W_Exchange_data(fd, buf, write_buf, flat_buf, offset_list, - len_list, send_size, recv_size, off, size, count, - start_pos, partial_recv, - sent_to_proc, nprocs, myrank, + + ADIOI_W_Exchange_data(fd, buf, write_buf, flat_buf, offset_list, + len_list, send_size, recv_size, off, size, count, + start_pos, partial_recv, + sent_to_proc, nprocs, myrank, buftype_is_contig, contig_access_count, min_st_offset, fd_size, fd_start, fd_end, others_req, send_buf_idx, curr_to_proc, done_to_proc, &hole, m, buftype_extent, buf_idx, - error_code); + error_code); if (*error_code != MPI_SUCCESS) return; flag = 0; @@ -519,7 +519,7 @@ static void ADIOI_Exch_and_write(ADIO_File fd, void *buf, MPI_Datatype if (flag) { ADIOI_Assert(size == (int)size); - ADIO_WriteContig(fd, write_buf, (int)size, MPI_BYTE, ADIO_EXPLICIT_OFFSET, + ADIO_WriteContig(fd, write_buf, (int)size, MPI_BYTE, ADIO_EXPLICIT_OFFSET, off, &status, error_code); if (*error_code != MPI_SUCCESS) return; } @@ -532,15 +532,15 @@ static void ADIOI_Exch_and_write(ADIO_File fd, void *buf, MPI_Datatype for (m=ntimes; matomicity) { /* bug fix from Wei-keng Liao and Kenin Coloma */ requests = (MPI_Request *) - ADIOI_Malloc((nprocs_send+1)*sizeof(MPI_Request)); + ADIOI_Malloc((nprocs_send+1)*sizeof(MPI_Request)); send_req = requests; } else { - requests = (MPI_Request *) - ADIOI_Malloc((nprocs_send+nprocs_recv+1)*sizeof(MPI_Request)); + requests = (MPI_Request *) + ADIOI_Malloc((nprocs_send+nprocs_recv+1)*sizeof(MPI_Request)); /* +1 to avoid a 0-size malloc */ /* post receives */ @@ -724,10 +724,10 @@ static void ADIOI_W_Exchange_data(ADIO_File fd, void *buf, char *write_buf, #endif if (buftype_is_contig) { j = 0; - for (i=0; i < nprocs; i++) + for (i=0; i < nprocs; i++) if (send_size[i]) { - MPI_Isend(((char *) buf) + buf_idx[i], send_size[i], - MPI_BYTE, i, myrank+i+100*iter, fd->comm, + MPI_Isend(((char *) buf) + buf_idx[i], send_size[i], + MPI_BYTE, i, myrank+i+100*iter, fd->comm, send_req+j); j++; buf_idx[i] += send_size[i]; @@ -736,16 +736,16 @@ static void ADIOI_W_Exchange_data(ADIO_File fd, void *buf, char *write_buf, else if (nprocs_send) { /* buftype is not contig */ send_buf = (char **) ADIOI_Malloc(nprocs*sizeof(char*)); - for (i=0; i < nprocs; i++) - if (send_size[i]) + for (i=0; i < nprocs; i++) + if (send_size[i]) send_buf[i] = (char *) ADIOI_Malloc(send_size[i]); ADIOI_Fill_send_buffer(fd, buf, flat_buf, send_buf, - offset_list, len_list, send_size, + offset_list, len_list, send_size, send_req, - sent_to_proc, nprocs, myrank, + sent_to_proc, nprocs, myrank, contig_access_count, - min_st_offset, fd_size, fd_start, fd_end, + min_st_offset, fd_size, fd_start, fd_end, send_buf_idx, curr_to_proc, done_to_proc, iter, buftype_extent); /* the send is done in ADIOI_Fill_send_buffer */ @@ -766,16 +766,16 @@ static void ADIOI_W_Exchange_data(ADIO_File fd, void *buf, char *write_buf, for (i=0; iatomicity) { /* bug fix from Wei-keng Liao and Kenin Coloma */ statuses = (MPI_Status *) ADIOI_Malloc((nprocs_send+1) * \ - sizeof(MPI_Status)); + sizeof(MPI_Status)); /* +1 to avoid a 0-size malloc */ } else { statuses = (MPI_Status *) ADIOI_Malloc((nprocs_send+nprocs_recv+1) * \ - sizeof(MPI_Status)); + sizeof(MPI_Status)); /* +1 to avoid a 0-size malloc */ } @@ -802,7 +802,7 @@ static void ADIOI_W_Exchange_data(ADIO_File fd, void *buf, char *write_buf, ADIOI_Free(statuses); ADIOI_Free(requests); if (!buftype_is_contig && nprocs_send) { - for (i=0; i < nprocs; i++) + for (i=0; i < nprocs; i++) if (send_size[i]) ADIOI_Free(send_buf[i]); ADIOI_Free(send_buf); } @@ -861,14 +861,14 @@ static void ADIOI_W_Exchange_data(ADIO_File fd, void *buf, char *write_buf, static void ADIOI_Fill_send_buffer(ADIO_File fd, void *buf, ADIOI_Flatlist_node - *flat_buf, char **send_buf, ADIO_Offset - *offset_list, ADIO_Offset *len_list, int *send_size, - MPI_Request *requests, int *sent_to_proc, - int nprocs, int myrank, - int contig_access_count, + *flat_buf, char **send_buf, ADIO_Offset + *offset_list, ADIO_Offset *len_list, int *send_size, + MPI_Request *requests, int *sent_to_proc, + int nprocs, int myrank, + int contig_access_count, ADIO_Offset min_st_offset, ADIO_Offset fd_size, - ADIO_Offset *fd_start, ADIO_Offset *fd_end, - int *send_buf_idx, int *curr_to_proc, + ADIO_Offset *fd_start, ADIO_Offset *fd_end, + int *send_buf_idx, int *curr_to_proc, int *done_to_proc, int iter, MPI_Aint buftype_extent) { @@ -881,9 +881,9 @@ static void ADIOI_Fill_send_buffer(ADIO_File fd, void *buf, ADIOI_Flatlist_node /* curr_to_proc[p] = amount of data sent to proc. p that has already been accounted for so far - done_to_proc[p] = amount of data already sent to proc. p in + done_to_proc[p] = amount of data already sent to proc. p in previous iterations - user_buf_idx = current location in user buffer + user_buf_idx = current location in user buffer send_buf_idx[p] = current location in send_buf of proc. p */ for (i=0; i < nprocs; i++) { @@ -898,10 +898,10 @@ static void ADIOI_Fill_send_buffer(ADIO_File fd, void *buf, ADIOI_Flatlist_node flat_buf_sz = flat_buf->blocklens[0]; /* flat_buf_idx = current index into flattened buftype - flat_buf_sz = size of current contiguous component in + flat_buf_sz = size of current contiguous component in flattened buf */ - for (i=0; i done_to_proc[p]) { if (done_to_proc[p] > curr_to_proc[p]) { - size = ADIOI_MIN(curr_to_proc[p] + len - + size = ADIOI_MIN(curr_to_proc[p] + len - done_to_proc[p], send_size[p]-send_buf_idx[p]); buf_incr = done_to_proc[p] - curr_to_proc[p]; ADIOI_BUF_INCR @@ -942,7 +942,7 @@ static void ADIOI_Fill_send_buffer(ADIO_File fd, void *buf, ADIOI_Flatlist_node ADIOI_BUF_COPY } if (send_buf_idx[p] == send_size[p]) { - MPI_Isend(send_buf[p], send_size[p], MPI_BYTE, p, + MPI_Isend(send_buf[p], send_size[p], MPI_BYTE, p, myrank+p+100*iter, fd->comm, requests+jj); jj++; } @@ -962,13 +962,13 @@ static void ADIOI_Fill_send_buffer(ADIO_File fd, void *buf, ADIOI_Flatlist_node rem_len -= len; } } - for (i=0; i < nprocs; i++) + for (i=0; i < nprocs; i++) if (send_size[i]) sent_to_proc[i] = curr_to_proc[i]; } -void ADIOI_Heap_merge(ADIOI_Access *others_req, int *count, +void ADIOI_Heap_merge(ADIOI_Access *others_req, int *count, ADIO_Offset *srt_off, int *srt_len, int *start_pos, int nprocs, int nprocs_recv, int total_elements) { @@ -998,7 +998,7 @@ void ADIOI_Heap_merge(ADIOI_Access *others_req, int *count, heapsize = nprocs_recv; for (i=heapsize/2 - 1; i>=0; i--) { /* Heapify(a, i, heapsize); Algorithm from Cormen et al. pg. 143 - modified for a heap with smallest element at root. I have + modified for a heap with smallest element at root. I have removed the recursion so that there are no function calls. Function calls are too expensive. */ k = i; @@ -1006,12 +1006,12 @@ void ADIOI_Heap_merge(ADIOI_Access *others_req, int *count, l = 2*(k+1) - 1; r = 2*(k+1); - if ((l < heapsize) && + if ((l < heapsize) && (*(a[l].off_list) < *(a[k].off_list))) smallest = l; else smallest = k; - if ((r < heapsize) && + if ((r < heapsize) && (*(a[r].off_list) < *(a[smallest].off_list))) smallest = r; @@ -1023,11 +1023,11 @@ void ADIOI_Heap_merge(ADIOI_Access *others_req, int *count, a[k].off_list = a[smallest].off_list; a[k].len_list = a[smallest].len_list; a[k].nelem = a[smallest].nelem; - + a[smallest].off_list = tmp.off_list; a[smallest].len_list = tmp.len_list; a[smallest].nelem = tmp.nelem; - + k = smallest; } else break; @@ -1057,12 +1057,12 @@ void ADIOI_Heap_merge(ADIOI_Access *others_req, int *count, l = 2*(k+1) - 1; r = 2*(k+1); - if ((l < heapsize) && + if ((l < heapsize) && (*(a[l].off_list) < *(a[k].off_list))) smallest = l; else smallest = k; - if ((r < heapsize) && + if ((r < heapsize) && (*(a[r].off_list) < *(a[smallest].off_list))) smallest = r; @@ -1074,11 +1074,11 @@ void ADIOI_Heap_merge(ADIOI_Access *others_req, int *count, a[k].off_list = a[smallest].off_list; a[k].len_list = a[smallest].len_list; a[k].nelem = a[smallest].nelem; - + a[smallest].off_list = tmp.off_list; a[smallest].len_list = tmp.len_list; a[smallest].nelem = tmp.nelem; - + k = smallest; } else break; diff --git a/ompi/mca/io/romio314/romio/adio/common/ad_write_nolock.c b/ompi/mca/io/romio314/romio/adio/common/ad_write_nolock.c index 704819843ed..42b5ff2d3ab 100644 --- a/ompi/mca/io/romio314/romio/adio/common/ad_write_nolock.c +++ b/ompi/mca/io/romio314/romio/adio/common/ad_write_nolock.c @@ -1,7 +1,7 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */ -/* +/* * - * Copyright (C) 1997 University of Chicago. + * Copyright (C) 1997 University of Chicago. * See COPYRIGHT notice in top-level directory. */ @@ -21,7 +21,7 @@ void ADIOI_NOLOCK_WriteStrided(ADIO_File fd, const void *buf, int count, * cannot or do not support client-side buffering * Does not do data sieving optimization * Does contain write-combining optimization for noncontig in memory, contig in - * file + * file */ /* offset is in units of etype relative to the filetype. */ @@ -62,7 +62,7 @@ void ADIOI_NOLOCK_WriteStrided(ADIO_File fd, const void *buf, int count, #ifdef HAVE_STATUS_SET_BYTES MPIR_Status_set_bytes(status, datatype, 0); #endif - *error_code = MPI_SUCCESS; + *error_code = MPI_SUCCESS; return; } @@ -75,7 +75,7 @@ void ADIOI_NOLOCK_WriteStrided(ADIO_File fd, const void *buf, int count, MPI_Type_size_x(datatype, &buftype_size); MPI_Type_extent(datatype, &buftype_extent); etype_size = fd->etype_size; - + ADIOI_Assert((buftype_size * count) == ((ADIO_Offset)(unsigned)buftype_size * (ADIO_Offset)count)); bufsize = buftype_size * count; @@ -112,8 +112,8 @@ void ADIOI_NOLOCK_WriteStrided(ADIO_File fd, const void *buf, int count, if (flat_buf->blocklens[i] > combine_buf_remain && combine_buf != combine_buf_ptr) { /* there is data in the buffer; write out the buffer so far */ #ifdef IO_DEBUG - printf("[%d/%d] nc mem c file (0) writing loc = %Ld sz = %Ld\n", - rank, nprocs, off, + printf("[%d/%d] nc mem c file (0) writing loc = %Ld sz = %Ld\n", + rank, nprocs, off, fd->hints->ind_wr_buffer_size-combine_buf_remain); #endif #ifdef ADIOI_MPE_LOGGING @@ -138,8 +138,8 @@ void ADIOI_NOLOCK_WriteStrided(ADIO_File fd, const void *buf, int count, * write directly */ #ifdef IO_DEBUG - printf("[%d/%d] nc mem c file (1) writing loc = %Ld sz = %d\n", - rank, nprocs, off, + printf("[%d/%d] nc mem c file (1) writing loc = %Ld sz = %d\n", + rank, nprocs, off, flat_buf->blocklens[i]); #endif ADIOI_Assert(flat_buf->blocklens[i] == (unsigned)flat_buf->blocklens[i]); @@ -171,8 +171,8 @@ void ADIOI_NOLOCK_WriteStrided(ADIO_File fd, const void *buf, int count, if (combine_buf_ptr != combine_buf) { /* data left in buffer to write */ #ifdef IO_DEBUG - printf("[%d/%d] nc mem c file (2) writing loc = %Ld sz = %Ld\n", - rank, nprocs, off, + printf("[%d/%d] nc mem c file (2) writing loc = %Ld sz = %Ld\n", + rank, nprocs, off, fd->hints->ind_wr_buffer_size-combine_buf_remain); #endif #ifdef ADIOI_MPE_LOGGING @@ -219,11 +219,11 @@ void ADIOI_NOLOCK_WriteStrided(ADIO_File fd, const void *buf, int count, int i; n_filetypes++; for (i=0; icount; i++) { - if (disp + flat_file->indices[i] + - n_filetypes*(ADIO_Offset)filetype_extent + flat_file->blocklens[i] + if (disp + flat_file->indices[i] + + n_filetypes*(ADIO_Offset)filetype_extent + flat_file->blocklens[i] >= offset) { st_index = i; - fwr_size = disp + flat_file->indices[i] + + fwr_size = disp + flat_file->indices[i] + n_filetypes*(ADIO_Offset)filetype_extent + flat_file->blocklens[i] - offset; flag = 1; @@ -238,7 +238,7 @@ void ADIOI_NOLOCK_WriteStrided(ADIO_File fd, const void *buf, int count, n_filetypes = offset / n_etypes_in_filetype; etype_in_filetype = offset % n_etypes_in_filetype; size_in_filetype = etype_in_filetype * etype_size; - + sum = 0; for (i=0; icount; i++) { sum += flat_file->blocklens[i]; @@ -265,14 +265,14 @@ void ADIOI_NOLOCK_WriteStrided(ADIO_File fd, const void *buf, int count, off = offset; fwr_size = ADIOI_MIN(fwr_size, bufsize); while (i_offset < bufsize) { - if (fwr_size) { - /* TYPE_UB and TYPE_LB can result in - fwr_size = 0. save system call in such cases */ + if (fwr_size) { + /* TYPE_UB and TYPE_LB can result in + fwr_size = 0. save system call in such cases */ #ifdef ADIOI_MPE_LOGGING MPE_Log_event(ADIOI_MPE_lseek_a, 0, NULL); #endif #ifdef IO_DEBUG - printf("[%d/%d] c mem nc file writing loc = %Ld sz = %d\n", + printf("[%d/%d] c mem nc file writing loc = %Ld sz = %d\n", rank, nprocs, off, fwr_size); #endif err_lseek = lseek(fd->fd_sys, off, SEEK_SET); @@ -302,7 +302,7 @@ void ADIOI_NOLOCK_WriteStrided(ADIO_File fd, const void *buf, int count, j = 0; n_filetypes++; } - off = disp + flat_file->indices[j] + + off = disp + flat_file->indices[j] + n_filetypes*(ADIO_Offset)filetype_extent; fwr_size = ADIOI_MIN(flat_file->blocklens[j], bufsize-i_offset); } @@ -325,18 +325,18 @@ void ADIOI_NOLOCK_WriteStrided(ADIO_File fd, const void *buf, int count, size = ADIOI_MIN(fwr_size, bwr_size); if (size) { #ifdef IO_DEBUG - printf("[%d/%d] nc mem nc file writing loc = %Ld sz = %d\n", + printf("[%d/%d] nc mem nc file writing loc = %Ld sz = %d\n", rank, nprocs, off, size); #endif #ifdef ADIOI_MPE_LOGGING MPE_Log_event( ADIOI_MPE_lseek_a, 0, NULL ); #endif lseek(fd->fd_sys, off, SEEK_SET); -#ifdef ADIOI_MPE_LOGGING +#ifdef ADIOI_MPE_LOGGING MPE_Log_event( ADIOI_MPE_lseek_b, 0, NULL ); #endif if (err == -1) err_flag = 1; -#ifdef ADIOI_MPE_LOGGING +#ifdef ADIOI_MPE_LOGGING MPE_Log_event( ADIOI_MPE_write_a, 0, NULL ); #endif ADIOI_Assert(size == (size_t) size); @@ -359,7 +359,7 @@ void ADIOI_NOLOCK_WriteStrided(ADIO_File fd, const void *buf, int count, n_filetypes++; } - off = disp + flat_file->indices[j] + + off = disp + flat_file->indices[j] + n_filetypes*(ADIO_Offset)filetype_extent; new_fwr_size = flat_file->blocklens[j]; @@ -375,7 +375,7 @@ void ADIOI_NOLOCK_WriteStrided(ADIO_File fd, const void *buf, int count, k = (k + 1)%flat_buf->count; buf_count++; indx = buftype_extent*(buf_count/flat_buf->count) + - flat_buf->indices[k]; + flat_buf->indices[k]; new_bwr_size = flat_buf->blocklens[k]; if (size != fwr_size) { off += size; @@ -402,7 +402,7 @@ void ADIOI_NOLOCK_WriteStrided(ADIO_File fd, const void *buf, int count, #ifdef HAVE_STATUS_SET_BYTES MPIR_Status_set_bytes(status, datatype, bufsize); -/* This is a temporary way of filling in status. The right way is to +/* This is a temporary way of filling in status. The right way is to keep track of how much data was actually written by ADIOI_BUFFERED_WRITE. */ #endif diff --git a/ompi/mca/io/romio314/romio/adio/common/ad_write_str.c b/ompi/mca/io/romio314/romio/adio/common/ad_write_str.c index f3b6c8960b3..624aeb12285 100644 --- a/ompi/mca/io/romio314/romio/adio/common/ad_write_str.c +++ b/ompi/mca/io/romio314/romio/adio/common/ad_write_str.c @@ -1,7 +1,7 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */ -/* +/* * - * Copyright (C) 1997 University of Chicago. + * Copyright (C) 1997 University of Chicago. * See COPYRIGHT notice in top-level directory. */ @@ -122,7 +122,7 @@ void ADIOI_GEN_WriteStrided(ADIO_File fd, const void *buf, int count, ADIO_Offset num, size, n_filetypes, etype_in_filetype, st_n_filetypes; ADIO_Offset n_etypes_in_filetype, abs_off_in_filetype=0; MPI_Count filetype_size, etype_size, buftype_size; - MPI_Aint filetype_extent, buftype_extent; + MPI_Aint filetype_extent, buftype_extent; int buf_count, buftype_is_contig, filetype_is_contig; ADIO_Offset userbuf_off; ADIO_Offset off, req_off, disp, end_offset=0, writebuf_off, start_off; @@ -138,7 +138,7 @@ void ADIOI_GEN_WriteStrided(ADIO_File fd, const void *buf, int count, * approach instead. */ - ADIOI_GEN_WriteStrided_naive(fd, + ADIOI_GEN_WriteStrided_naive(fd, buf, count, datatype, @@ -160,7 +160,7 @@ void ADIOI_GEN_WriteStrided(ADIO_File fd, const void *buf, int count, #ifdef HAVE_STATUS_SET_BYTES MPIR_Status_set_bytes(status, datatype, 0); #endif - *error_code = MPI_SUCCESS; + *error_code = MPI_SUCCESS; return; } @@ -184,7 +184,7 @@ void ADIOI_GEN_WriteStrided(ADIO_File fd, const void *buf, int count, flat_buf = ADIOI_Flatlist; while (flat_buf->type != datatype) flat_buf = flat_buf->next; - off = (file_ptr_type == ADIO_INDIVIDUAL) ? fd->fp_ind : + off = (file_ptr_type == ADIO_INDIVIDUAL) ? fd->fp_ind : fd->disp + (ADIO_Offset)etype_size * offset; start_off = off; @@ -194,10 +194,10 @@ void ADIOI_GEN_WriteStrided(ADIO_File fd, const void *buf, int count, writebuf_len = (unsigned) (ADIOI_MIN(max_bufsize, end_offset-writebuf_off+1)); /* if atomicity is true, lock the region to be accessed */ - if (fd->atomicity) + if (fd->atomicity) ADIOI_WRITE_LOCK(fd, start_off, SEEK_SET, end_offset-start_off+1); - for (j=0; jcount; i++) { userbuf_off = (ADIO_Offset)j*(ADIO_Offset)buftype_extent + flat_buf->indices[i]; @@ -212,7 +212,7 @@ void ADIOI_GEN_WriteStrided(ADIO_File fd, const void *buf, int count, ADIO_WriteContig(fd, writebuf, writebuf_len, MPI_BYTE, ADIO_EXPLICIT_OFFSET, writebuf_off, &status1, error_code); - if (fd->atomicity) + if (fd->atomicity) ADIOI_UNLOCK(fd, start_off, SEEK_SET, end_offset-start_off+1); if (*error_code != MPI_SUCCESS) goto fn_exit; @@ -259,7 +259,7 @@ void ADIOI_GEN_WriteStrided(ADIO_File fd, const void *buf, int count, n_filetypes = offset / n_etypes_in_filetype; etype_in_filetype = offset % n_etypes_in_filetype; size_in_filetype = etype_in_filetype * etype_size; - + sum = 0; for (i=0; icount; i++) { sum += flat_file->blocklens[i]; @@ -273,7 +273,7 @@ void ADIOI_GEN_WriteStrided(ADIO_File fd, const void *buf, int count, } /* abs. offset in bytes in the file */ - offset = disp + (ADIO_Offset) n_filetypes*filetype_extent + + offset = disp + (ADIO_Offset) n_filetypes*filetype_extent + abs_off_in_filetype; } @@ -293,7 +293,7 @@ void ADIOI_GEN_WriteStrided(ADIO_File fd, const void *buf, int count, offset, status, error_code); if (file_ptr_type == ADIO_INDIVIDUAL) { - /* update MPI-IO file pointer to point to the first byte + /* update MPI-IO file pointer to point to the first byte * that can be accessed in the fileview. */ fd->fp_ind = offset + bufsize; if (bufsize == fwr_size) { @@ -308,10 +308,10 @@ void ADIOI_GEN_WriteStrided(ADIO_File fd, const void *buf, int count, + (ADIO_Offset)n_filetypes*filetype_extent; } } - fd->fp_sys_posn = -1; /* set it to null. */ + fd->fp_sys_posn = -1; /* set it to null. */ #ifdef HAVE_STATUS_SET_BYTES MPIR_Status_set_bytes(status, datatype, bufsize); -#endif +#endif goto fn_exit; } @@ -335,13 +335,13 @@ void ADIOI_GEN_WriteStrided(ADIO_File fd, const void *buf, int count, n_filetypes += (j == 0) ? 1 : 0; } - off = disp + flat_file->indices[j] + + off = disp + flat_file->indices[j] + n_filetypes*(ADIO_Offset)filetype_extent; fwr_size = ADIOI_MIN(flat_file->blocklens[j], bufsize-i_offset); } /* if atomicity is true, lock the region to be accessed */ - if (fd->atomicity) + if (fd->atomicity) ADIOI_WRITE_LOCK(fd, start_off, SEEK_SET, end_offset-start_off+1); writebuf_off = 0; @@ -360,9 +360,9 @@ void ADIOI_GEN_WriteStrided(ADIO_File fd, const void *buf, int count, n_filetypes = st_n_filetypes; fwr_size = ADIOI_MIN(st_fwr_size, bufsize); while (i_offset < bufsize) { - if (fwr_size) { - /* TYPE_UB and TYPE_LB can result in - fwr_size = 0. save system call in such cases */ + if (fwr_size) { + /* TYPE_UB and TYPE_LB can result in + fwr_size = 0. save system call in such cases */ /* lseek(fd->fd_sys, off, SEEK_SET); err = write(fd->fd_sys, ((char *) buf) + i_offset, fwr_size);*/ @@ -385,9 +385,9 @@ void ADIOI_GEN_WriteStrided(ADIO_File fd, const void *buf, int count, j = (j+1) % flat_file->count; n_filetypes += (j == 0) ? 1 : 0; } - off = disp + flat_file->indices[j] + + off = disp + flat_file->indices[j] + n_filetypes*(ADIO_Offset)filetype_extent; - fwr_size = ADIOI_MIN(flat_file->blocklens[j], + fwr_size = ADIOI_MIN(flat_file->blocklens[j], bufsize-i_offset); } } @@ -431,7 +431,7 @@ void ADIOI_GEN_WriteStrided(ADIO_File fd, const void *buf, int count, n_filetypes += (j == 0) ? 1 : 0; } - off = disp + flat_file->indices[j] + + off = disp + flat_file->indices[j] + n_filetypes*(ADIO_Offset)filetype_extent; new_fwr_size = flat_file->blocklens[j]; @@ -447,7 +447,7 @@ void ADIOI_GEN_WriteStrided(ADIO_File fd, const void *buf, int count, k = (k + 1)%flat_buf->count; buf_count++; i_offset = (ADIO_Offset)buftype_extent*(ADIO_Offset)(buf_count/flat_buf->count) + - flat_buf->indices[k]; + flat_buf->indices[k]; new_bwr_size = flat_buf->blocklens[k]; if (size != fwr_size) { off += size; @@ -460,15 +460,15 @@ void ADIOI_GEN_WriteStrided(ADIO_File fd, const void *buf, int count, } } - /* write the buffer out finally */ + /* write the buffer out finally */ if (writebuf_len) { ADIO_WriteContig(fd, writebuf, writebuf_len, MPI_BYTE, ADIO_EXPLICIT_OFFSET, writebuf_off, &status1, error_code); - if (!(fd->atomicity)) + if (!(fd->atomicity)) ADIOI_UNLOCK(fd, writebuf_off, SEEK_SET, writebuf_len); if (*error_code != MPI_SUCCESS) goto fn_exit; } - if (fd->atomicity) + if (fd->atomicity) ADIOI_UNLOCK(fd, start_off, SEEK_SET, end_offset-start_off+1); if (file_ptr_type == ADIO_INDIVIDUAL) fd->fp_ind = off; @@ -479,7 +479,7 @@ void ADIOI_GEN_WriteStrided(ADIO_File fd, const void *buf, int count, #ifdef HAVE_STATUS_SET_BYTES /* datatypes returning negagive values, probably related to tt 1893 */ MPIR_Status_set_bytes(status, datatype, bufsize); -/* This is a temporary way of filling in status. The right way is to +/* This is a temporary way of filling in status. The right way is to keep track of how much data was actually written by ADIOI_BUFFERED_WRITE. */ #endif diff --git a/ompi/mca/io/romio314/romio/adio/common/ad_write_str_naive.c b/ompi/mca/io/romio314/romio/adio/common/ad_write_str_naive.c index f59c02d4278..591c66f6a96 100644 --- a/ompi/mca/io/romio314/romio/adio/common/ad_write_str_naive.c +++ b/ompi/mca/io/romio314/romio/adio/common/ad_write_str_naive.c @@ -1,7 +1,7 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */ -/* +/* * - * Copyright (C) 2001 University of Chicago. + * Copyright (C) 2001 University of Chicago. * See COPYRIGHT notice in top-level directory. */ @@ -17,14 +17,14 @@ void ADIOI_GEN_WriteStrided_naive(ADIO_File fd, const void *buf, int count, ADIOI_Flatlist_node *flat_buf, *flat_file; /* bwr == buffer write; fwr == file write */ - ADIO_Offset bwr_size, fwr_size=0, sum, size_in_filetype; + ADIO_Offset bwr_size, fwr_size=0, sum, size_in_filetype; int b_index; MPI_Count bufsize; ADIO_Offset n_etypes_in_filetype; ADIO_Offset size, n_filetypes, etype_in_filetype; ADIO_Offset abs_off_in_filetype=0, req_len; MPI_Count filetype_size, etype_size, buftype_size; - MPI_Aint filetype_extent, buftype_extent; + MPI_Aint filetype_extent, buftype_extent; int buf_count, buftype_is_contig, filetype_is_contig; ADIO_Offset userbuf_off; ADIO_Offset off, req_off, disp, end_offset=0, start_off; @@ -40,7 +40,7 @@ void ADIOI_GEN_WriteStrided_naive(ADIO_File fd, const void *buf, int count, #ifdef HAVE_STATUS_SET_BYTES MPIR_Status_set_bytes(status, buftype, 0); #endif - *error_code = MPI_SUCCESS; + *error_code = MPI_SUCCESS; return; } @@ -62,7 +62,7 @@ void ADIOI_GEN_WriteStrided_naive(ADIO_File fd, const void *buf, int count, flat_buf = ADIOI_Flatlist; while (flat_buf->type != buftype) flat_buf = flat_buf->next; - off = (file_ptr_type == ADIO_INDIVIDUAL) ? fd->fp_ind : + off = (file_ptr_type == ADIO_INDIVIDUAL) ? fd->fp_ind : fd->disp + (ADIO_Offset)etype_size * offset; start_off = off; @@ -79,17 +79,17 @@ void ADIOI_GEN_WriteStrided_naive(ADIO_File fd, const void *buf, int count, */ for (b_count=0; b_count < count; b_count++) { for (b_index=0; b_index < flat_buf->count; b_index++) { - userbuf_off = (ADIO_Offset)b_count*(ADIO_Offset)buftype_extent + + userbuf_off = (ADIO_Offset)b_count*(ADIO_Offset)buftype_extent + flat_buf->indices[b_index]; req_off = off; req_len = flat_buf->blocklens[b_index]; ADIOI_Assert(req_len == (int) req_len); ADIOI_Assert((((ADIO_Offset)(MPIR_Upint)buf) + userbuf_off) == (ADIO_Offset)(MPIR_Upint)((MPIR_Upint)buf + userbuf_off)); - ADIO_WriteContig(fd, + ADIO_WriteContig(fd, (char *) buf + userbuf_off, - (int)req_len, - MPI_BYTE, + (int)req_len, + MPI_BYTE, ADIO_EXPLICIT_OFFSET, req_off, &status1, @@ -141,15 +141,15 @@ void ADIOI_GEN_WriteStrided_naive(ADIO_File fd, const void *buf, int count, while (!flag) { n_filetypes++; for (f_index=0; f_index < flat_file->count; f_index++) { - if (disp + flat_file->indices[f_index] + - n_filetypes*(ADIO_Offset)filetype_extent + - flat_file->blocklens[f_index] >= start_off) + if (disp + flat_file->indices[f_index] + + n_filetypes*(ADIO_Offset)filetype_extent + + flat_file->blocklens[f_index] >= start_off) { /* this block contains our starting position */ st_index = f_index; - fwr_size = disp + flat_file->indices[f_index] + - n_filetypes*(ADIO_Offset)filetype_extent + + fwr_size = disp + flat_file->indices[f_index] + + n_filetypes*(ADIO_Offset)filetype_extent + flat_file->blocklens[f_index] - start_off; flag = 1; break; @@ -162,7 +162,7 @@ void ADIOI_GEN_WriteStrided_naive(ADIO_File fd, const void *buf, int count, n_filetypes = offset / n_etypes_in_filetype; etype_in_filetype = offset % n_etypes_in_filetype; size_in_filetype = etype_in_filetype * etype_size; - + sum = 0; for (f_index=0; f_index < flat_file->count; f_index++) { sum += flat_file->blocklens[f_index]; @@ -170,21 +170,21 @@ void ADIOI_GEN_WriteStrided_naive(ADIO_File fd, const void *buf, int count, st_index = f_index; fwr_size = sum - size_in_filetype; abs_off_in_filetype = flat_file->indices[f_index] + - size_in_filetype - + size_in_filetype - (sum - flat_file->blocklens[f_index]); break; } } /* abs. offset in bytes in the file */ - start_off = disp + n_filetypes*(ADIO_Offset)filetype_extent + + start_off = disp + n_filetypes*(ADIO_Offset)filetype_extent + abs_off_in_filetype; } st_fwr_size = fwr_size; st_n_filetypes = n_filetypes; - /* start_off, st_n_filetypes, st_index, and st_fwr_size are + /* start_off, st_n_filetypes, st_index, and st_fwr_size are * all calculated at this point */ @@ -205,9 +205,9 @@ void ADIOI_GEN_WriteStrided_naive(ADIO_File fd, const void *buf, int count, n_filetypes++; } - off = disp + flat_file->indices[f_index] + + off = disp + flat_file->indices[f_index] + n_filetypes*(ADIO_Offset)filetype_extent; - fwr_size = ADIOI_MIN(flat_file->blocklens[f_index], + fwr_size = ADIOI_MIN(flat_file->blocklens[f_index], bufsize-(unsigned)userbuf_off); } @@ -239,18 +239,18 @@ void ADIOI_GEN_WriteStrided_naive(ADIO_File fd, const void *buf, int count, /* while there is still space in the buffer, write more data */ while (userbuf_off < bufsize) { - if (fwr_size) { - /* TYPE_UB and TYPE_LB can result in - fwr_size = 0. save system call in such cases */ + if (fwr_size) { + /* TYPE_UB and TYPE_LB can result in + fwr_size = 0. save system call in such cases */ req_off = off; req_len = fwr_size; ADIOI_Assert(req_len == (int) req_len); ADIOI_Assert((((ADIO_Offset)(MPIR_Upint)buf) + userbuf_off) == (ADIO_Offset)(MPIR_Upint)((MPIR_Upint)buf + userbuf_off)); - ADIO_WriteContig(fd, + ADIO_WriteContig(fd, (char *) buf + userbuf_off, - (int)req_len, - MPI_BYTE, + (int)req_len, + MPI_BYTE, ADIO_EXPLICIT_OFFSET, req_off, &status1, @@ -260,7 +260,7 @@ void ADIOI_GEN_WriteStrided_naive(ADIO_File fd, const void *buf, int count, userbuf_off += fwr_size; if (off + fwr_size < disp + flat_file->indices[f_index] + - flat_file->blocklens[f_index] + + flat_file->blocklens[f_index] + n_filetypes*(ADIO_Offset)filetype_extent) { /* important that this value be correct, as it is @@ -278,9 +278,9 @@ void ADIOI_GEN_WriteStrided_naive(ADIO_File fd, const void *buf, int count, f_index = 0; n_filetypes++; } - off = disp + flat_file->indices[f_index] + + off = disp + flat_file->indices[f_index] + n_filetypes*(ADIO_Offset)filetype_extent; - fwr_size = ADIOI_MIN(flat_file->blocklens[f_index], + fwr_size = ADIOI_MIN(flat_file->blocklens[f_index], bufsize-(unsigned)userbuf_off); } } @@ -313,10 +313,10 @@ void ADIOI_GEN_WriteStrided_naive(ADIO_File fd, const void *buf, int count, ADIOI_Assert(req_len == (int) req_len); ADIOI_Assert((((ADIO_Offset)(MPIR_Upint)buf) + userbuf_off) == (ADIO_Offset)(MPIR_Upint)((MPIR_Upint)buf + userbuf_off)); - ADIO_WriteContig(fd, + ADIO_WriteContig(fd, (char *) buf + userbuf_off, - (int)req_len, - MPI_BYTE, + (int)req_len, + MPI_BYTE, ADIO_EXPLICIT_OFFSET, req_off, &status1, @@ -332,7 +332,7 @@ void ADIOI_GEN_WriteStrided_naive(ADIO_File fd, const void *buf, int count, n_filetypes++; } - off = disp + flat_file->indices[f_index] + + off = disp + flat_file->indices[f_index] + n_filetypes*(ADIO_Offset)filetype_extent; new_fwr_size = flat_file->blocklens[f_index]; @@ -374,8 +374,8 @@ void ADIOI_GEN_WriteStrided_naive(ADIO_File fd, const void *buf, int count, #ifdef HAVE_STATUS_SET_BYTES MPIR_Status_set_bytes(status, buftype, bufsize); - /* This is a temporary way of filling in status. The right way is to - * keep track of how much data was actually written and placed in buf + /* This is a temporary way of filling in status. The right way is to + * keep track of how much data was actually written and placed in buf */ #endif diff --git a/ompi/mca/io/romio314/romio/adio/common/adi_close.c b/ompi/mca/io/romio314/romio/adio/common/adi_close.c index cdc18737850..e11984259a7 100644 --- a/ompi/mca/io/romio314/romio/adio/common/adi_close.c +++ b/ompi/mca/io/romio314/romio/adio/common/adi_close.c @@ -1,7 +1,7 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */ -/* +/* * - * Copyright (C) 1997 University of Chicago. + * Copyright (C) 1997 University of Chicago. * See COPYRIGHT notice in top-level directory. */ diff --git a/ompi/mca/io/romio314/romio/adio/common/async_list.c b/ompi/mca/io/romio314/romio/adio/common/async_list.c index 239dbe587ba..6e7c7345763 100644 --- a/ompi/mca/io/romio314/romio/adio/common/async_list.c +++ b/ompi/mca/io/romio314/romio/adio/common/async_list.c @@ -1,7 +1,7 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */ -/* +/* * - * Copyright (C) 1997 University of Chicago. + * Copyright (C) 1997 University of Chicago. * See COPYRIGHT notice in top-level directory. */ @@ -23,7 +23,7 @@ ADIOI_Async_node *ADIOI_Malloc_async_node(void) if (!ADIOI_Async_avail_head) { ADIOI_Async_avail_head = (ADIOI_Async_node *) - ADIOI_Malloc(NUM*sizeof(ADIOI_Async_node)); + ADIOI_Malloc(NUM*sizeof(ADIOI_Async_node)); curr = ADIOI_Async_avail_head; for (i=1; inext = ADIOI_Async_avail_head+i; @@ -35,7 +35,7 @@ ADIOI_Async_node *ADIOI_Malloc_async_node(void) /* keep track of malloced area that needs to be freed later */ if (!ADIOI_Malloc_async_tail) { ADIOI_Malloc_async_tail = (ADIOI_Malloc_async *) - ADIOI_Malloc(sizeof(ADIOI_Malloc_async)); + ADIOI_Malloc(sizeof(ADIOI_Malloc_async)); ADIOI_Malloc_async_head = ADIOI_Malloc_async_tail; ADIOI_Malloc_async_head->ptr = ADIOI_Async_avail_head; ADIOI_Malloc_async_head->next = NULL; @@ -94,7 +94,7 @@ void ADIOI_Add_req_to_list(ADIO_Request *request) (*request)->ptr_in_async_list = ADIOI_Async_list_tail; } } - + /* Sets error_code to MPI_SUCCESS on success, creates an error code on * failure. */ @@ -113,7 +113,7 @@ void ADIOI_Complete_async(int *error_code) while (ADIOI_Async_list_head) { request = ADIOI_Async_list_head->request; (*request)->queued = -1; /* ugly internal hack that prevents - ADIOI_xxxComplete from freeing the request object. + ADIOI_xxxComplete from freeing the request object. This is required, because the user will call MPI_Wait later, which would require status to be filled. */ switch ((*request)->optype) { @@ -152,7 +152,7 @@ void ADIOI_Del_req_from_list(ADIO_Request *request) { /* Delete a request that has already been completed from the async list and move it to the list of available nodes. Typically called - from within an ADIO_Test/ADIO_Wait. */ + from within an ADIO_Test/ADIO_Wait. */ ADIOI_Async_node *curr, *prev, *next; diff --git a/ompi/mca/io/romio314/romio/adio/common/byte_offset.c b/ompi/mca/io/romio314/romio/adio/common/byte_offset.c index df0a240849a..b7350f1faa8 100644 --- a/ompi/mca/io/romio314/romio/adio/common/byte_offset.c +++ b/ompi/mca/io/romio314/romio/adio/common/byte_offset.c @@ -1,14 +1,14 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */ -/* +/* * - * Copyright (C) 1997 University of Chicago. + * Copyright (C) 1997 University of Chicago. * See COPYRIGHT notice in top-level directory. */ #include "adio.h" #include "adio_extern.h" -/* Returns the absolute byte position in the file corresponding to +/* Returns the absolute byte position in the file corresponding to "offset" etypes relative to the current view. */ void ADIOI_Get_byte_offset(ADIO_File fd, ADIO_Offset offset, ADIO_Offset *disp) @@ -34,7 +34,7 @@ void ADIOI_Get_byte_offset(ADIO_File fd, ADIO_Offset offset, ADIO_Offset *disp) n_filetypes = offset / n_etypes_in_filetype; etype_in_filetype = offset % n_etypes_in_filetype; size_in_filetype = etype_in_filetype * etype_size; - + sum = 0; for (i=0; icount; i++) { sum += flat_file->blocklens[i]; diff --git a/ompi/mca/io/romio314/romio/adio/common/cb_config_list.c b/ompi/mca/io/romio314/romio/adio/common/cb_config_list.c index 626709f8444..468105c5ae0 100644 --- a/ompi/mca/io/romio314/romio/adio/common/cb_config_list.c +++ b/ompi/mca/io/romio314/romio/adio/common/cb_config_list.c @@ -1,7 +1,7 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */ -/* +/* * - * Copyright (C) 2001 University of Chicago. + * Copyright (C) 2001 University of Chicago. * See COPYRIGHT notice in top-level directory. */ @@ -41,15 +41,15 @@ static char *token_ptr; /* internal stuff */ static int get_max_procs(int cb_nodes); -static int match_procs(char *name, int max_per_proc, char *procnames[], +static int match_procs(char *name, int max_per_proc, char *procnames[], char used_procnames[], - int nr_procnames, int ranks[], int nr_ranks, + int nr_procnames, int ranks[], int nr_ranks, int *nr_ranks_allocated); static int match_this_proc(char *name, int cur_proc, int max_matches, char *procnames[], char used_procnames[], - int nr_procnames, int ranks[], + int nr_procnames, int ranks[], int nr_ranks, int nr_ranks_allocated); -static int find_name(char *name, char *procnames[], char used_procnames[], +static int find_name(char *name, char *procnames[], char used_procnames[], int nr_procnames, int start_ind); static int cb_config_list_lex(void); @@ -86,7 +86,7 @@ int ADIOI_cb_bcast_rank_map(ADIO_File fd) return error_code; } } - MPI_Bcast(fd->hints->ranklist, fd->hints->cb_nodes, MPI_INT, 0, + MPI_Bcast(fd->hints->ranklist, fd->hints->cb_nodes, MPI_INT, 0, fd->comm); } /* TEMPORARY -- REMOVE WHEN NO LONGER UPDATING INFO FOR @@ -121,7 +121,7 @@ int ADIOI_cb_bcast_rank_map(ADIO_File fd) * * Returns 0 on success, -1 on failure. * - * NOTE: Needs some work to cleanly handle out of memory cases! + * NOTE: Needs some work to cleanly handle out of memory cases! */ int ADIOI_cb_gather_name_array(MPI_Comm comm, MPI_Comm dupcomm, @@ -135,7 +135,7 @@ int ADIOI_cb_gather_name_array(MPI_Comm comm, if (ADIOI_cb_config_list_keyval == MPI_KEYVAL_INVALID) { /* cleaned up by ADIOI_End_call */ - MPI_Keyval_create((MPI_Copy_function *) ADIOI_cb_copy_name_array, + MPI_Keyval_create((MPI_Copy_function *) ADIOI_cb_copy_name_array, (MPI_Delete_function *) ADIOI_cb_delete_name_array, &ADIOI_cb_config_list_keyval, NULL); } @@ -171,7 +171,7 @@ int ADIOI_cb_gather_name_array(MPI_Comm comm, procname = array->names; /* simpler to read */ procname_len = (int *) ADIOI_Malloc(commsize * sizeof(int)); - if (procname_len == NULL) { + if (procname_len == NULL) { return -1; } } @@ -181,7 +181,7 @@ int ADIOI_cb_gather_name_array(MPI_Comm comm, array->names = NULL; } /* gather lengths first */ - MPI_Gather(&my_procname_len, 1, MPI_INT, + MPI_Gather(&my_procname_len, 1, MPI_INT, procname_len, 1, MPI_INT, 0, dupcomm); if (commrank == 0) { @@ -195,11 +195,11 @@ int ADIOI_cb_gather_name_array(MPI_Comm comm, for (i=0; i < commsize; i++) { /* add one to the lengths because we need to count the * terminator, and we are going to use this list of lengths - * again in the gatherv. + * again in the gatherv. */ alloc_size += ++procname_len[i]; } - + procname[0] = ADIOI_Malloc(alloc_size); if (procname[0] == NULL) { return -1; @@ -208,7 +208,7 @@ int ADIOI_cb_gather_name_array(MPI_Comm comm, for (i=1; i < commsize; i++) { procname[i] = procname[i-1] + procname_len[i-1]; } - + /* create our list of displacements for the gatherv. we're going * to do everything relative to the start of the region allocated * for procname[0] @@ -223,7 +223,7 @@ int ADIOI_cb_gather_name_array(MPI_Comm comm, /* now gather strings */ if (commrank == 0) { - MPI_Gatherv(my_procname, my_procname_len + 1, MPI_CHAR, + MPI_Gatherv(my_procname, my_procname_len + 1, MPI_CHAR, procname[0], procname_len, disp, MPI_CHAR, 0, dupcomm); } @@ -231,7 +231,7 @@ int ADIOI_cb_gather_name_array(MPI_Comm comm, /* if we didn't do this, we would need to allocate procname[] * on all processes...which seems a little silly. */ - MPI_Gatherv(my_procname, my_procname_len + 1, MPI_CHAR, + MPI_Gatherv(my_procname, my_procname_len + 1, MPI_CHAR, NULL, NULL, NULL, MPI_CHAR, 0, dupcomm); } @@ -248,7 +248,7 @@ int ADIOI_cb_gather_name_array(MPI_Comm comm, } /* store the attribute; we want to store SOMETHING on all processes - * so that they can all tell if we have gone through this procedure + * so that they can all tell if we have gone through this procedure * or not for the given communicator. * * specifically we put it on both the original comm, so we can find @@ -262,7 +262,7 @@ int ADIOI_cb_gather_name_array(MPI_Comm comm, } -/* ADIOI_cb_config_list_parse() - parse the cb_config_list and build the +/* ADIOI_cb_config_list_parse() - parse the cb_config_list and build the * ranklist * * Parameters: @@ -270,9 +270,9 @@ int ADIOI_cb_gather_name_array(MPI_Comm comm, * * Returns number of ranks allocated in parsing, -1 on error. */ -int ADIOI_cb_config_list_parse(char *config_list, +int ADIOI_cb_config_list_parse(char *config_list, ADIO_cb_name_array array, - int ranklist[], + int ranklist[], int cb_nodes) { int token, max_procs, cur_rank = 0, nr_procnames; @@ -349,7 +349,7 @@ int ADIOI_cb_config_list_parse(char *config_list, ADIOI_Free(used_procnames); return cur_rank; } - + if (token == AGG_WILDCARD) { cur_procname_p = NULL; } @@ -385,17 +385,17 @@ int ADIOI_cb_config_list_parse(char *config_list, /* ADIOI_cb_copy_name_array() - attribute copy routine */ -int ADIOI_cb_copy_name_array(MPI_Comm comm, - int keyval, - void *extra, +int ADIOI_cb_copy_name_array(MPI_Comm comm, + int keyval, + void *extra, void *attr_in, - void **attr_out, + void **attr_out, int *flag) { ADIO_cb_name_array array; ADIOI_UNREFERENCED_ARG(comm); - ADIOI_UNREFERENCED_ARG(keyval); + ADIOI_UNREFERENCED_ARG(keyval); ADIOI_UNREFERENCED_ARG(extra); array = (ADIO_cb_name_array) attr_in; @@ -403,15 +403,15 @@ int ADIOI_cb_copy_name_array(MPI_Comm comm, *attr_out = attr_in; *flag = 1; /* make a copy in the new communicator */ - + return MPI_SUCCESS; } /* ADIOI_cb_delete_name_array() - attribute destructor */ -int ADIOI_cb_delete_name_array(MPI_Comm comm, - int keyval, - void *attr_val, +int ADIOI_cb_delete_name_array(MPI_Comm comm, + int keyval, + void *attr_val, void *extra) { ADIO_cb_name_array array; @@ -427,7 +427,7 @@ int ADIOI_cb_delete_name_array(MPI_Comm comm, /* time to free the structures (names, array of ptrs to names, struct) */ if (array->namect) { - /* Note that array->names[i], where i > 0, + /* Note that array->names[i], where i > 0, * are just pointers into the allocated region array->names[0] */ ADIOI_Free(array->names[0]); @@ -439,7 +439,7 @@ int ADIOI_cb_delete_name_array(MPI_Comm comm, } /* match_procs() - given a name (or NULL for wildcard) and a max. number - * of aggregator processes (per processor name), this + * of aggregator processes (per processor name), this * matches in the procnames[] array and puts appropriate * ranks in the ranks array. * @@ -456,8 +456,8 @@ int ADIOI_cb_delete_name_array(MPI_Comm comm, * * Returns number of matches. */ -static int match_procs(char *name, - int max_per_proc, +static int match_procs(char *name, + int max_per_proc, char *procnames[], char used_procnames[], int nr_procnames, @@ -466,7 +466,7 @@ static int match_procs(char *name, int *nr_ranks_allocated) { int wildcard_proc, cur_proc, old_nr_allocated, ret; - + /* save this so we can report on progress */ old_nr_allocated = *nr_ranks_allocated; @@ -493,8 +493,8 @@ static int match_procs(char *name, while (nr_ranks - *nr_ranks_allocated > 0) { /* find a name */ - while ((wildcard_proc < nr_procnames) && - (used_procnames[wildcard_proc] != 0)) + while ((wildcard_proc < nr_procnames) && + (used_procnames[wildcard_proc] != 0)) { wildcard_proc++; } @@ -505,7 +505,7 @@ static int match_procs(char *name, } #ifdef CB_CONFIG_LIST_DEBUG - FPRINTF(stderr, "performing wildcard match (*:%d) starting with %s (%d)\n", + FPRINTF(stderr, "performing wildcard match (*:%d) starting with %s (%d)\n", max_per_proc, procnames[wildcard_proc], wildcard_proc); #endif @@ -521,7 +521,7 @@ static int match_procs(char *name, * our while loop. */ ranks[*nr_ranks_allocated] = cur_proc; - *nr_ranks_allocated = *nr_ranks_allocated + 1; + *nr_ranks_allocated = *nr_ranks_allocated + 1; cur_proc++; /* so, to accomplish this we use the match_this_proc() to @@ -534,7 +534,7 @@ static int match_procs(char *name, nr_procnames, ranks, nr_ranks, *nr_ranks_allocated); if (ret > 0) *nr_ranks_allocated = *nr_ranks_allocated + ret; - + /* clean up and point wildcard_proc to the next entry, since * we know that this one is NULL now. */ @@ -556,7 +556,7 @@ static int match_procs(char *name, return *nr_ranks_allocated - old_nr_allocated; } -/* match_this_proc() - find each instance of processor name "name" in +/* match_this_proc() - find each instance of processor name "name" in * the "procnames" array, starting with index "cur_proc" * and add the first "max_matches" into the "ranks" * array. remove all instances of "name" from @@ -581,8 +581,8 @@ static int match_this_proc(char *name, int max_matches, char *procnames[], char used_procnames[], - int nr_procnames, - int ranks[], + int nr_procnames, + int ranks[], int nr_ranks, int nr_ranks_allocated) { @@ -592,11 +592,11 @@ static int match_this_proc(char *name, /* calculate how many ranks we want to allocate */ ranks_remaining = nr_ranks - nr_ranks_allocated; - nr_to_alloc = (max_matches < ranks_remaining) ? + nr_to_alloc = (max_matches < ranks_remaining) ? max_matches : ranks_remaining; while (nr_to_alloc > 0) { - cur_proc = find_name(name, procnames, used_procnames, nr_procnames, + cur_proc = find_name(name, procnames, used_procnames, nr_procnames, cur_proc); if (cur_proc < 0) { /* didn't find it */ @@ -612,14 +612,14 @@ static int match_this_proc(char *name, ranks[nr_ranks_allocated] = cur_proc; nr_ranks_allocated++; used_procnames[cur_proc] = 1; - + cur_proc++; nr_to_alloc--; } - + /* take all other instances of this host out of the list */ while (cur_proc >= 0) { - cur_proc = find_name(name, procnames, used_procnames, nr_procnames, + cur_proc = find_name(name, procnames, used_procnames, nr_procnames, cur_proc); if (cur_proc >= 0) { #ifdef CB_CONFIG_LIST_DEBUG @@ -632,17 +632,17 @@ static int match_this_proc(char *name, } return nr_ranks_allocated - old_nr_allocated; } - + /* find_name() - finds the first entry in procnames[] which matches name, * starting at index start_ind * * Returns an index [0..nr_procnames-1] on success, -1 if not found. */ -static int find_name(char *name, - char *procnames[], +static int find_name(char *name, + char *procnames[], char used_procnames[], - int nr_procnames, + int nr_procnames, int start_ind) { int i; @@ -689,7 +689,7 @@ static int get_max_procs(int cb_nodes) /* strip off next comma (if there is one) */ token = cb_config_list_lex(); if (token != AGG_COMMA && token != AGG_EOS) return -1; - + /* return max_procs */ if (max_procs < 0) return -1; else return max_procs; @@ -708,7 +708,7 @@ static int get_max_procs(int cb_nodes) #define COLON ':' #define COMMA ';' #define DELIMS ":;" -#else +#else /* these tokens work for every other platform */ #define COLON ':' #define COMMA ',' @@ -745,7 +745,7 @@ static int cb_config_list_lex(void) /* it would be a good idea to look at the string and make sure that * it doesn't have any illegal characters in it. in particular we - * should ensure that no one tries to use wildcards with strings + * should ensure that no one tries to use wildcards with strings * (e.g. "ccn*"). */ ADIOI_Strncpy(yylval, token_ptr, slen); diff --git a/ompi/mca/io/romio314/romio/adio/common/eof_offset.c b/ompi/mca/io/romio314/romio/adio/common/eof_offset.c index 0fb3539fc1a..724746317fd 100644 --- a/ompi/mca/io/romio314/romio/adio/common/eof_offset.c +++ b/ompi/mca/io/romio314/romio/adio/common/eof_offset.c @@ -1,14 +1,14 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */ -/* +/* * - * Copyright (C) 1997 University of Chicago. + * Copyright (C) 1997 University of Chicago. * See COPYRIGHT notice in top-level directory. */ #include "adio.h" #include "adio_extern.h" -/* return the current end of file in etype units relative to the +/* return the current end of file in etype units relative to the current view */ void ADIOI_Get_eof_offset(ADIO_File fd, ADIO_Offset *eof_offset) @@ -26,24 +26,24 @@ void ADIOI_Get_eof_offset(ADIO_File fd, ADIO_Offset *eof_offset) ADIO_Fcntl(fd, ADIO_FCNTL_GET_FSIZE, fcntl_struct, &error_code); fsize = fcntl_struct->fsize; ADIOI_Free(fcntl_struct); - + /* Find the offset in etype units corresponding to eof. - The eof could lie in a hole in the current view, or in the + The eof could lie in a hole in the current view, or in the middle of an etype. In that case the offset will be the offset corresponding to the start of the next etype in the current view.*/ ADIOI_Datatype_iscontig(fd->filetype, &filetype_is_contig); etype_size = fd->etype_size; - if (filetype_is_contig) + if (filetype_is_contig) *eof_offset = (fsize - fd->disp + etype_size - 1)/etype_size; /* ceiling division in case fsize is not a multiple of etype_size;*/ else { /* filetype already flattened in ADIO_Open */ flat_file = ADIOI_Flatlist; - while (flat_file->type != fd->filetype) + while (flat_file->type != fd->filetype) flat_file = flat_file->next; - + MPI_Type_size_x(fd->filetype, &filetype_size); MPI_Type_extent(fd->filetype, &filetype_extent); @@ -55,14 +55,14 @@ void ADIOI_Get_eof_offset(ADIO_File fd, ADIO_Offset *eof_offset) n_filetypes++; for (i=0; icount; i++) { sum += flat_file->blocklens[i]; - if (disp + flat_file->indices[i] + - n_filetypes* ADIOI_AINT_CAST_TO_OFFSET filetype_extent + + if (disp + flat_file->indices[i] + + n_filetypes* ADIOI_AINT_CAST_TO_OFFSET filetype_extent + flat_file->blocklens[i] >= fsize) { - if (disp + flat_file->indices[i] + + if (disp + flat_file->indices[i] + n_filetypes * ADIOI_AINT_CAST_TO_OFFSET filetype_extent >= fsize) sum -= flat_file->blocklens[i]; else { - rem = (disp + flat_file->indices[i] + + rem = (disp + flat_file->indices[i] + n_filetypes* ADIOI_AINT_CAST_TO_OFFSET filetype_extent + flat_file->blocklens[i] - fsize); sum -= rem; diff --git a/ompi/mca/io/romio314/romio/adio/common/error.c b/ompi/mca/io/romio314/romio/adio/common/error.c index e990a5e1819..6417b943cb2 100644 --- a/ompi/mca/io/romio314/romio/adio/common/error.c +++ b/ompi/mca/io/romio314/romio/adio/common/error.c @@ -1,7 +1,7 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */ -/* +/* * - * Copyright (C) 1997 University of Chicago. + * Copyright (C) 1997 University of Chicago. * See COPYRIGHT notice in top-level directory. */ @@ -14,7 +14,7 @@ int ADIOI_Error(ADIO_File fd, int error_code, char *string) { char buf[MPI_MAX_ERROR_STRING]; - int myrank, result_len; + int myrank, result_len; MPI_Errhandler err_handler; if (fd == ADIO_FILE_NULL) err_handler = ADIOI_DFLT_ERR_HANDLER; diff --git a/ompi/mca/io/romio314/romio/adio/common/flatten.c b/ompi/mca/io/romio314/romio/adio/common/flatten.c index 86c2875e55e..88590d9719c 100644 --- a/ompi/mca/io/romio314/romio/adio/common/flatten.c +++ b/ompi/mca/io/romio314/romio/adio/common/flatten.c @@ -1,6 +1,6 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */ /* - * Copyright (C) 1997 University of Chicago. + * Copyright (C) 1997 University of Chicago. * See COPYRIGHT notice in top-level directory. */ @@ -26,10 +26,10 @@ void ADIOI_Flatten_datatype(MPI_Datatype datatype) ADIOI_Flatlist_node *flat, *prev=0; /* check if necessary to flatten. */ - + /* is it entirely contiguous? */ ADIOI_Datatype_iscontig(datatype, &is_contig); - #ifdef FLATTEN_DEBUG + #ifdef FLATTEN_DEBUG DBG_FPRINTF(stderr,"ADIOI_Flatten_datatype:: is_contig %#X\n",is_contig); #endif if (is_contig) return; @@ -38,7 +38,7 @@ void ADIOI_Flatten_datatype(MPI_Datatype datatype) flat = ADIOI_Flatlist; while (flat) { if (flat->type == datatype) { - #ifdef FLATTEN_DEBUG + #ifdef FLATTEN_DEBUG DBG_FPRINTF(stderr,"ADIOI_Flatten_datatype:: found datatype %#X\n", datatype); #endif return; @@ -60,7 +60,7 @@ void ADIOI_Flatten_datatype(MPI_Datatype datatype) flat->indices = NULL; flat->count = ADIOI_Count_contiguous_blocks(datatype, &curr_index); -#ifdef FLATTEN_DEBUG +#ifdef FLATTEN_DEBUG DBG_FPRINTF(stderr,"ADIOI_Flatten_datatype:: count %llX, cur_idx = %#llX\n",flat->count,curr_index); #endif /* DBG_FPRINTF(stderr, "%d\n", flat->count);*/ @@ -69,17 +69,17 @@ void ADIOI_Flatten_datatype(MPI_Datatype datatype) flat->blocklens = (ADIO_Offset *) ADIOI_Malloc(flat->count * sizeof(ADIO_Offset)); flat->indices = (ADIO_Offset *) ADIOI_Malloc(flat->count * sizeof(ADIO_Offset)); } - + curr_index = 0; #ifdef HAVE_MPIR_TYPE_FLATTEN flatten_idx = (MPI_Aint) flat->count; MPIR_Type_flatten(datatype, flat->indices, flat->blocklens, &flatten_idx); - #ifdef FLATTEN_DEBUG + #ifdef FLATTEN_DEBUG DBG_FPRINTF(stderr,"ADIOI_Flatten_datatype:: MPIR_Type_flatten\n"); #endif #else ADIOI_Flatten(datatype, flat, 0, &curr_index); - #ifdef FLATTEN_DEBUG + #ifdef FLATTEN_DEBUG DBG_FPRINTF(stderr,"ADIOI_Flatten_datatype:: ADIOI_Flatten\n"); #endif @@ -89,7 +89,7 @@ void ADIOI_Flatten_datatype(MPI_Datatype datatype) #ifdef FLATTEN_DEBUG { int i; - for (i=0; icount; i++) + for (i=0; icount; i++) DBG_FPRINTF(stderr,"ADIOI_Flatten_datatype:: i %#X, blocklens %#llX, indices %#llX\n", i, flat->blocklens[i], @@ -104,13 +104,13 @@ void ADIOI_Flatten_datatype(MPI_Datatype datatype) * * Assumption: input datatype is not a basic!!!! */ -void ADIOI_Flatten(MPI_Datatype datatype, ADIOI_Flatlist_node *flat, +void ADIOI_Flatten(MPI_Datatype datatype, ADIOI_Flatlist_node *flat, ADIO_Offset st_offset, MPI_Count *curr_index) { int i, k, m, n, basic_num, nonzeroth, is_hindexed_block=0; int combiner, old_combiner, old_is_contig; int nints, nadds, ntypes, old_nints, old_nadds, old_ntypes; - /* By using ADIO_Offset we preserve +/- sign and + /* By using ADIO_Offset we preserve +/- sign and avoid >2G integer arithmetic problems */ ADIO_Offset top_count; MPI_Count j, old_size, prev_index, num; @@ -124,7 +124,7 @@ void ADIOI_Flatten(MPI_Datatype datatype, ADIOI_Flatlist_node *flat, types = (MPI_Datatype *) ADIOI_Malloc((ntypes+1)*sizeof(MPI_Datatype)); MPI_Type_get_contents(datatype, nints, nadds, ntypes, ints, adds, types); - #ifdef FLATTEN_DEBUG + #ifdef FLATTEN_DEBUG DBG_FPRINTF(stderr,"ADIOI_Flatten:: st_offset %#llX, curr_index %#llX\n",st_offset,*curr_index); DBG_FPRINTF(stderr,"ADIOI_Flatten:: nints %#X, nadds %#X, ntypes %#X\n",nints, nadds, ntypes); for(i=0; i< nints; ++i) @@ -150,11 +150,11 @@ void ADIOI_Flatten(MPI_Datatype datatype, ADIOI_Flatlist_node *flat, switch (combiner) { #ifdef MPIIMPL_HAVE_MPI_COMBINER_DUP case MPI_COMBINER_DUP: - #ifdef FLATTEN_DEBUG + #ifdef FLATTEN_DEBUG DBG_FPRINTF(stderr,"ADIOI_Flatten:: MPI_COMBINER_DUP\n"); #endif MPI_Type_get_envelope(types[0], &old_nints, &old_nadds, - &old_ntypes, &old_combiner); + &old_ntypes, &old_combiner); ADIOI_Datatype_iscontig(types[0], &old_is_contig); if ((old_combiner != MPI_COMBINER_NAMED) && (!old_is_contig)) ADIOI_Flatten(types[0], flat, st_offset, curr_index); @@ -165,7 +165,7 @@ void ADIOI_Flatten(MPI_Datatype datatype, ADIOI_Flatlist_node *flat, { int dims = ints[0]; MPI_Datatype stype; - #ifdef FLATTEN_DEBUG + #ifdef FLATTEN_DEBUG DBG_FPRINTF(stderr,"ADIOI_Flatten:: MPI_COMBINER_SUBARRAY\n"); #endif @@ -186,7 +186,7 @@ void ADIOI_Flatten(MPI_Datatype datatype, ADIOI_Flatlist_node *flat, { int dims = ints[2]; MPI_Datatype dtype; - #ifdef FLATTEN_DEBUG + #ifdef FLATTEN_DEBUG DBG_FPRINTF(stderr,"ADIOI_Flatten:: MPI_COMBINER_DARRAY\n"); #endif @@ -200,12 +200,12 @@ void ADIOI_Flatten(MPI_Datatype datatype, ADIOI_Flatlist_node *flat, ints[4*dims+3], /* order */ types[0], &dtype); - #ifdef FLATTEN_DEBUG + #ifdef FLATTEN_DEBUG DBG_FPRINTF(stderr,"ADIOI_Flatten:: MPI_COMBINER_DARRAY indices[%#X] %#llX, flat->blocklens[%#X] %#llX, st_offset %#llX, curr_index %#llX);\n", 0, flat->indices[0], 0, flat->blocklens[0], st_offset, *curr_index); #endif ADIOI_Flatten(dtype, flat, st_offset, curr_index); - #ifdef FLATTEN_DEBUG + #ifdef FLATTEN_DEBUG DBG_FPRINTF(stderr,"ADIOI_Flatten:: MPI_COMBINER_DARRAY >ADIOI_Flatten(dtype, flat->indices[%#X] %#llX, flat->blocklens[%#X] %#llX, st_offset %#llX, curr_index %#llX);\n", 0, flat->indices[0], 0, flat->blocklens[0], st_offset, *curr_index); #endif @@ -214,12 +214,12 @@ void ADIOI_Flatten(MPI_Datatype datatype, ADIOI_Flatlist_node *flat, break; #endif case MPI_COMBINER_CONTIGUOUS: - #ifdef FLATTEN_DEBUG + #ifdef FLATTEN_DEBUG DBG_FPRINTF(stderr,"ADIOI_Flatten:: MPI_COMBINER_CONTIGUOUS\n"); #endif top_count = ints[0]; MPI_Type_get_envelope(types[0], &old_nints, &old_nadds, - &old_ntypes, &old_combiner); + &old_ntypes, &old_combiner); ADIOI_Datatype_iscontig(types[0], &old_is_contig); prev_index = *curr_index; @@ -232,7 +232,7 @@ void ADIOI_Flatten(MPI_Datatype datatype, ADIOI_Flatlist_node *flat, flat->indices[j] = st_offset; MPI_Type_size_x(types[0], &old_size); flat->blocklens[j] = top_count * old_size; - #ifdef FLATTEN_DEBUG + #ifdef FLATTEN_DEBUG DBG_FPRINTF(stderr,"ADIOI_Flatten:: simple flat->indices[%#llX] %#llX, flat->blocklens[%#llX] %#llX\n",j, flat->indices[j], j, flat->blocklens[j]); #endif (*curr_index)++; @@ -248,7 +248,7 @@ void ADIOI_Flatten(MPI_Datatype datatype, ADIOI_Flatlist_node *flat, for (i=0; iindices[j] = flat->indices[j-num] + ADIOI_AINT_CAST_TO_OFFSET old_extent; flat->blocklens[j] = flat->blocklens[j-num]; - #ifdef FLATTEN_DEBUG + #ifdef FLATTEN_DEBUG DBG_FPRINTF(stderr,"ADIOI_Flatten:: derived flat->indices[%#llX] %#llX, flat->blocklens[%#llX] %#llX\n",j, flat->indices[j], j, flat->blocklens[j]); #endif j++; @@ -258,13 +258,13 @@ void ADIOI_Flatten(MPI_Datatype datatype, ADIOI_Flatlist_node *flat, } break; - case MPI_COMBINER_VECTOR: - #ifdef FLATTEN_DEBUG + case MPI_COMBINER_VECTOR: + #ifdef FLATTEN_DEBUG DBG_FPRINTF(stderr,"ADIOI_Flatten:: MPI_COMBINER_VECTOR\n"); #endif top_count = ints[0]; MPI_Type_get_envelope(types[0], &old_nints, &old_nadds, - &old_ntypes, &old_combiner); + &old_ntypes, &old_combiner); ADIOI_Datatype_iscontig(types[0], &old_is_contig); prev_index = *curr_index; @@ -273,7 +273,7 @@ void ADIOI_Flatten(MPI_Datatype datatype, ADIOI_Flatlist_node *flat, if (prev_index == *curr_index) { /* simplest case, vector of basic or contiguous types */ - /* By using ADIO_Offset we preserve +/- sign and + /* By using ADIO_Offset we preserve +/- sign and avoid >2G integer arithmetic problems */ ADIO_Offset blocklength = ints[1], stride = ints[2]; j = *curr_index; @@ -288,7 +288,7 @@ void ADIOI_Flatten(MPI_Datatype datatype, ADIOI_Flatlist_node *flat, } else { /* vector of noncontiguous derived types */ - /* By using ADIO_Offset we preserve +/- sign and + /* By using ADIO_Offset we preserve +/- sign and avoid >2G integer arithmetic problems */ ADIO_Offset blocklength = ints[1], stride = ints[2]; @@ -320,14 +320,14 @@ void ADIOI_Flatten(MPI_Datatype datatype, ADIOI_Flatlist_node *flat, } break; - case MPI_COMBINER_HVECTOR: - case MPI_COMBINER_HVECTOR_INTEGER: - #ifdef FLATTEN_DEBUG + case MPI_COMBINER_HVECTOR: + case MPI_COMBINER_HVECTOR_INTEGER: + #ifdef FLATTEN_DEBUG DBG_FPRINTF(stderr,"ADIOI_Flatten:: MPI_COMBINER_HVECTOR_INTEGER\n"); #endif top_count = ints[0]; MPI_Type_get_envelope(types[0], &old_nints, &old_nadds, - &old_ntypes, &old_combiner); + &old_ntypes, &old_combiner); ADIOI_Datatype_iscontig(types[0], &old_is_contig); prev_index = *curr_index; @@ -336,7 +336,7 @@ void ADIOI_Flatten(MPI_Datatype datatype, ADIOI_Flatlist_node *flat, if (prev_index == *curr_index) { /* simplest case, vector of basic or contiguous types */ - /* By using ADIO_Offset we preserve +/- sign and + /* By using ADIO_Offset we preserve +/- sign and avoid >2G integer arithmetic problems */ ADIO_Offset blocklength = ints[1]; j = *curr_index; @@ -351,7 +351,7 @@ void ADIOI_Flatten(MPI_Datatype datatype, ADIOI_Flatlist_node *flat, } else { /* vector of noncontiguous derived types */ - /* By using ADIO_Offset we preserve +/- sign and + /* By using ADIO_Offset we preserve +/- sign and avoid >2G integer arithmetic problems */ ADIO_Offset blocklength = ints[1]; @@ -383,20 +383,20 @@ void ADIOI_Flatten(MPI_Datatype datatype, ADIOI_Flatlist_node *flat, } break; - case MPI_COMBINER_INDEXED: - #ifdef FLATTEN_DEBUG + case MPI_COMBINER_INDEXED: + #ifdef FLATTEN_DEBUG DBG_FPRINTF(stderr,"ADIOI_Flatten:: MPI_COMBINER_INDEXED\n"); #endif top_count = ints[0]; MPI_Type_get_envelope(types[0], &old_nints, &old_nadds, - &old_ntypes, &old_combiner); + &old_ntypes, &old_combiner); ADIOI_Datatype_iscontig(types[0], &old_is_contig); MPI_Type_extent(types[0], &old_extent); prev_index = *curr_index; if ((old_combiner != MPI_COMBINER_NAMED) && (!old_is_contig)) { - /* By using ADIO_Offset we preserve +/- sign and + /* By using ADIO_Offset we preserve +/- sign and avoid >2G integer arithmetic problems */ ADIO_Offset stride = ints[top_count+1]; ADIOI_Flatten(types[0], flat, @@ -407,7 +407,7 @@ void ADIOI_Flatten(MPI_Datatype datatype, ADIOI_Flatlist_node *flat, /* simplest case, indexed type made up of basic or contiguous types */ j = *curr_index; for (i=j, nonzeroth=i; i2G integer arithmetic problems */ ADIO_Offset blocklength = ints[1+i-j], stride = ints[top_count+1+i-j]; if (blocklength > 0) { @@ -452,7 +452,7 @@ void ADIOI_Flatten(MPI_Datatype datatype, ADIOI_Flatlist_node *flat, num = *curr_index - prev_index; prev_index = *curr_index; for (m=0, nonzeroth=j; m2G integer arithmetic problems */ ADIO_Offset stride = ints[top_count+1+i]-ints[top_count+i]; if (flat->blocklens[j-num] > 0 ) { @@ -490,19 +490,19 @@ void ADIOI_Flatten(MPI_Datatype datatype, ADIOI_Flatlist_node *flat, /* deliberate fall-through */ #endif case MPI_COMBINER_INDEXED_BLOCK: - #ifdef FLATTEN_DEBUG + #ifdef FLATTEN_DEBUG DBG_FPRINTF(stderr,"ADIOI_Flatten:: MPI_COMBINER_INDEXED_BLOCK\n"); #endif top_count = ints[0]; MPI_Type_get_envelope(types[0], &old_nints, &old_nadds, - &old_ntypes, &old_combiner); + &old_ntypes, &old_combiner); ADIOI_Datatype_iscontig(types[0], &old_is_contig); MPI_Type_extent(types[0], &old_extent); prev_index = *curr_index; if ((old_combiner != MPI_COMBINER_NAMED) && (!old_is_contig)) { - /* By using ADIO_Offset we preserve +/- sign and + /* By using ADIO_Offset we preserve +/- sign and avoid >2G integer arithmetic problems */ ADIO_Offset stride = ints[1+1]; if (is_hindexed_block) { @@ -518,7 +518,7 @@ void ADIOI_Flatten(MPI_Datatype datatype, ADIOI_Flatlist_node *flat, /* simplest case, indexed type made up of basic or contiguous types */ j = *curr_index; for (i=j; i2G integer arithmetic problems */ ADIO_Offset blocklength = ints[1]; if (is_hindexed_block) { @@ -577,20 +577,20 @@ void ADIOI_Flatten(MPI_Datatype datatype, ADIOI_Flatlist_node *flat, } break; - case MPI_COMBINER_HINDEXED: + case MPI_COMBINER_HINDEXED: case MPI_COMBINER_HINDEXED_INTEGER: - #ifdef FLATTEN_DEBUG + #ifdef FLATTEN_DEBUG DBG_FPRINTF(stderr,"ADIOI_Flatten:: MPI_COMBINER_HINDEXED_INTEGER\n"); #endif top_count = ints[0]; MPI_Type_get_envelope(types[0], &old_nints, &old_nadds, - &old_ntypes, &old_combiner); + &old_ntypes, &old_combiner); ADIOI_Datatype_iscontig(types[0], &old_is_contig); prev_index = *curr_index; if ((old_combiner != MPI_COMBINER_NAMED) && (!old_is_contig)) { - ADIOI_Flatten(types[0], flat, st_offset+adds[0], curr_index); + ADIOI_Flatten(types[0], flat, st_offset+adds[0], curr_index); } if (prev_index == *curr_index) { @@ -668,15 +668,15 @@ void ADIOI_Flatten(MPI_Datatype datatype, ADIOI_Flatlist_node *flat, } break; - case MPI_COMBINER_STRUCT: - case MPI_COMBINER_STRUCT_INTEGER: - #ifdef FLATTEN_DEBUG + case MPI_COMBINER_STRUCT: + case MPI_COMBINER_STRUCT_INTEGER: + #ifdef FLATTEN_DEBUG DBG_FPRINTF(stderr,"ADIOI_Flatten:: MPI_COMBINER_STRUCT_INTEGER\n"); #endif top_count = ints[0]; for (n=0; n2G integer arithmetic problems */ if (ints[1+n] > 0 || types[n] == MPI_LB || types[n] == MPI_UB) { ADIO_Offset blocklength = ints[1+n]; @@ -723,8 +723,8 @@ void ADIOI_Flatten(MPI_Datatype datatype, ADIOI_Flatlist_node *flat, } break; - case MPI_COMBINER_RESIZED: - #ifdef FLATTEN_DEBUG + case MPI_COMBINER_RESIZED: + #ifdef FLATTEN_DEBUG DBG_FPRINTF(stderr,"ADIOI_Flatten:: MPI_COMBINER_RESIZED\n"); #endif @@ -738,7 +738,7 @@ void ADIOI_Flatten(MPI_Datatype datatype, ADIOI_Flatlist_node *flat, * marker */ flat->blocklens[j] = 0; - #ifdef FLATTEN_DEBUG + #ifdef FLATTEN_DEBUG DBG_FPRINTF(stderr,"ADIOI_Flatten:: simple adds[%#X] "MPI_AINT_FMT_HEX_SPEC", flat->indices[%#llX] %#llX, flat->blocklens[%#llX] %#llX\n",0,adds[0],j, flat->indices[j], j, flat->blocklens[j]); #endif @@ -747,7 +747,7 @@ void ADIOI_Flatten(MPI_Datatype datatype, ADIOI_Flatlist_node *flat, /* handle the datatype */ MPI_Type_get_envelope(types[0], &old_nints, &old_nadds, - &old_ntypes, &old_combiner); + &old_ntypes, &old_combiner); ADIOI_Datatype_iscontig(types[0], &old_is_contig); if ((old_combiner != MPI_COMBINER_NAMED) && (!old_is_contig)) { @@ -760,7 +760,7 @@ void ADIOI_Flatten(MPI_Datatype datatype, ADIOI_Flatlist_node *flat, MPI_Type_size_x(types[0], &old_size); flat->blocklens[j] = old_size; - #ifdef FLATTEN_DEBUG + #ifdef FLATTEN_DEBUG DBG_FPRINTF(stderr,"ADIOI_Flatten:: simple adds[%#X] "MPI_AINT_FMT_HEX_SPEC", flat->indices[%#llX] %#llX, flat->blocklens[%#llX] %#llX\n",0,adds[0],j, flat->indices[j], j, flat->blocklens[j]); #endif @@ -774,7 +774,7 @@ void ADIOI_Flatten(MPI_Datatype datatype, ADIOI_Flatlist_node *flat, * constructor of this resized type */ flat->blocklens[j] = 0; - #ifdef FLATTEN_DEBUG + #ifdef FLATTEN_DEBUG DBG_FPRINTF(stderr,"ADIOI_Flatten:: simple adds[%#X] "MPI_AINT_FMT_HEX_SPEC", flat->indices[%#llX] %#llX, flat->blocklens[%#llX] %#llX\n",1,adds[1],j, flat->indices[j], j, flat->blocklens[j]); #endif @@ -802,7 +802,7 @@ void ADIOI_Flatten(MPI_Datatype datatype, ADIOI_Flatlist_node *flat, ADIOI_Free(adds); ADIOI_Free(types); - #ifdef FLATTEN_DEBUG + #ifdef FLATTEN_DEBUG DBG_FPRINTF(stderr,"ADIOI_Flatten:: return st_offset %#llX, curr_index %#llX\n",st_offset,*curr_index); #endif @@ -837,7 +837,7 @@ MPI_Count ADIOI_Count_contiguous_blocks(MPI_Datatype datatype, MPI_Count *curr_i #ifdef MPIIMPL_HAVE_MPI_COMBINER_DUP case MPI_COMBINER_DUP: MPI_Type_get_envelope(types[0], &old_nints, &old_nadds, - &old_ntypes, &old_combiner); + &old_ntypes, &old_combiner); ADIOI_Datatype_iscontig(types[0], &old_is_contig); if ((old_combiner != MPI_COMBINER_NAMED) && (!old_is_contig)) count = ADIOI_Count_contiguous_blocks(types[0], curr_index); @@ -896,7 +896,7 @@ MPI_Count ADIOI_Count_contiguous_blocks(MPI_Datatype datatype, MPI_Count *curr_i case MPI_COMBINER_CONTIGUOUS: top_count = ints[0]; MPI_Type_get_envelope(types[0], &old_nints, &old_nadds, - &old_ntypes, &old_combiner); + &old_ntypes, &old_combiner); ADIOI_Datatype_iscontig(types[0], &old_is_contig); prev_index = *curr_index; @@ -904,7 +904,7 @@ MPI_Count ADIOI_Count_contiguous_blocks(MPI_Datatype datatype, MPI_Count *curr_i count = ADIOI_Count_contiguous_blocks(types[0], curr_index); else count = 1; - if (prev_index == *curr_index) + if (prev_index == *curr_index) /* simplest case, made up of basic or contiguous types */ (*curr_index)++; else { @@ -917,10 +917,10 @@ MPI_Count ADIOI_Count_contiguous_blocks(MPI_Datatype datatype, MPI_Count *curr_i case MPI_COMBINER_VECTOR: case MPI_COMBINER_HVECTOR: - case MPI_COMBINER_HVECTOR_INTEGER: + case MPI_COMBINER_HVECTOR_INTEGER: top_count = ints[0]; MPI_Type_get_envelope(types[0], &old_nints, &old_nadds, - &old_ntypes, &old_combiner); + &old_ntypes, &old_combiner); ADIOI_Datatype_iscontig(types[0], &old_is_contig); prev_index = *curr_index; @@ -950,12 +950,12 @@ MPI_Count ADIOI_Count_contiguous_blocks(MPI_Datatype datatype, MPI_Count *curr_i } break; - case MPI_COMBINER_INDEXED: + case MPI_COMBINER_INDEXED: case MPI_COMBINER_HINDEXED: case MPI_COMBINER_HINDEXED_INTEGER: top_count = ints[0]; MPI_Type_get_envelope(types[0], &old_nints, &old_nadds, - &old_ntypes, &old_combiner); + &old_ntypes, &old_combiner); ADIOI_Datatype_iscontig(types[0], &old_is_contig); prev_index = *curr_index; @@ -991,7 +991,7 @@ MPI_Count ADIOI_Count_contiguous_blocks(MPI_Datatype datatype, MPI_Count *curr_i case MPI_COMBINER_INDEXED_BLOCK: top_count = ints[0]; MPI_Type_get_envelope(types[0], &old_nints, &old_nadds, - &old_ntypes, &old_combiner); + &old_ntypes, &old_combiner); ADIOI_Datatype_iscontig(types[0], &old_is_contig); prev_index = *curr_index; @@ -1019,13 +1019,13 @@ MPI_Count ADIOI_Count_contiguous_blocks(MPI_Datatype datatype, MPI_Count *curr_i } break; - case MPI_COMBINER_STRUCT: - case MPI_COMBINER_STRUCT_INTEGER: + case MPI_COMBINER_STRUCT: + case MPI_COMBINER_STRUCT_INTEGER: top_count = ints[0]; count = 0; for (n=0; ncount - 1); i++) { if ((flat_type->indices[i] + flat_type->blocklens[i] != @@ -1140,7 +1140,7 @@ void ADIOI_Optimize_flattened(ADIOI_Flatlist_node *flat_type) j++; opt_indices[j] = flat_type->indices[i + 1]; opt_blocklens[j] = flat_type->blocklens[i + 1]; - } + } } flat_type->count = opt_blocks; ADIOI_Free(flat_type->blocklens); diff --git a/ompi/mca/io/romio314/romio/adio/common/get_fp_posn.c b/ompi/mca/io/romio314/romio/adio/common/get_fp_posn.c index 671dde3eb32..eb176b81ab7 100644 --- a/ompi/mca/io/romio314/romio/adio/common/get_fp_posn.c +++ b/ompi/mca/io/romio314/romio/adio/common/get_fp_posn.c @@ -1,7 +1,7 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */ -/* +/* * - * Copyright (C) 1997 University of Chicago. + * Copyright (C) 1997 University of Chicago. * See COPYRIGHT notice in top-level directory. */ @@ -20,7 +20,7 @@ void ADIOI_Get_position(ADIO_File fd, ADIO_Offset *offset) int filetype_is_contig; MPI_Aint filetype_extent; ADIO_Offset disp, byte_offset, sum=0, size_in_file, n_filetypes, frd_size; - + ADIOI_Datatype_iscontig(fd->filetype, &filetype_is_contig); etype_size = fd->etype_size; @@ -42,10 +42,10 @@ void ADIOI_Get_position(ADIO_File fd, ADIO_Offset *offset) n_filetypes++; for (i=0; icount; i++) { sum += flat_file->blocklens[i]; - if (disp + flat_file->indices[i] + - n_filetypes* ADIOI_AINT_CAST_TO_OFFSET filetype_extent + flat_file->blocklens[i] + if (disp + flat_file->indices[i] + + n_filetypes* ADIOI_AINT_CAST_TO_OFFSET filetype_extent + flat_file->blocklens[i] >= byte_offset) { - frd_size = disp + flat_file->indices[i] + + frd_size = disp + flat_file->indices[i] + n_filetypes * ADIOI_AINT_CAST_TO_OFFSET filetype_extent + flat_file->blocklens[i] - byte_offset; sum -= frd_size; diff --git a/ompi/mca/io/romio314/romio/adio/common/greq_fns.c b/ompi/mca/io/romio314/romio/adio/common/greq_fns.c index aff8ff77e5d..5be6f4d167e 100644 --- a/ompi/mca/io/romio314/romio/adio/common/greq_fns.c +++ b/ompi/mca/io/romio314/romio/adio/common/greq_fns.c @@ -1,7 +1,7 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */ -/* +/* * - * Copyright (C) 2004 University of Chicago. + * Copyright (C) 2004 University of Chicago. * See COPYRIGHT notice in top-level directory. */ @@ -25,7 +25,7 @@ void MPIO_Completed_request_create(MPI_File *fh, MPI_Offset bytes, if (*error_code != MPI_SUCCESS) *error_code = MPIO_Err_return_file(*fh, *error_code); /* --END ERROR HANDLING-- */ - MPI_Grequest_start(MPIU_Greq_query_fn, MPIU_Greq_free_fn, + MPI_Grequest_start(MPIU_Greq_query_fn, MPIU_Greq_free_fn, MPIU_Greq_cancel_fn, status, request); MPI_Grequest_complete(*request); } diff --git a/ompi/mca/io/romio314/romio/adio/common/heap-sort.c b/ompi/mca/io/romio314/romio/adio/common/heap-sort.c index 025a1a74c4c..e9b19f5efab 100644 --- a/ompi/mca/io/romio314/romio/adio/common/heap-sort.c +++ b/ompi/mca/io/romio314/romio/adio/common/heap-sort.c @@ -1,7 +1,7 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */ -/* +/* * - * Copyright (C) 2008 University of Chicago. + * Copyright (C) 2008 University of Chicago. * See COPYRIGHT notice in top-level directory. */ @@ -97,7 +97,7 @@ void ADIOI_Heap_extract_min(heap_t *heap, ADIO_Offset* offset, int *proc, ADIO_Offset *reg_max_len) { heap_node_t *nodes; nodes = heap->nodes; - + assert (heap->size > 0); *offset = nodes[0].offset; *proc = nodes[0].proc; @@ -121,7 +121,7 @@ static void print_heap(heap_t *heap) printf ("offsets:\n"); for (i=0; i < heap->size; i++) { printf ("%lld ", heap->nodes[i].offset); - + if ((i+1) == next_level_idx) { printf ("\n"); next_level_idx += (int) exp2(level+1); diff --git a/ompi/mca/io/romio314/romio/adio/common/hint_fns.c b/ompi/mca/io/romio314/romio/adio/common/hint_fns.c index 34c898bdfe5..ef447db6e08 100644 --- a/ompi/mca/io/romio314/romio/adio/common/hint_fns.c +++ b/ompi/mca/io/romio314/romio/adio/common/hint_fns.c @@ -1,5 +1,5 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */ -/* +/* * * Copyright (C) 2013 UChicago/Argonne, LLC * See COPYRIGHT notice in top-level directory. @@ -9,10 +9,10 @@ #include "hint_fns.h" -int ADIOI_Info_check_and_install_int(ADIO_File fd, MPI_Info info, const char *key, +int ADIOI_Info_check_and_install_int(ADIO_File fd, MPI_Info info, const char *key, int *local_cache, char *funcname, int *error_code) { - int intval, tmp_val, flag; + int intval, tmp_val, flag; char *value; value = (char *) ADIOI_Malloc((MPI_MAX_INFO_VAL+1)*sizeof(char)); @@ -35,7 +35,7 @@ int ADIOI_Info_check_and_install_int(ADIO_File fd, MPI_Info info, const char *ke /* --BEGIN ERROR HANDLING-- */ if (tmp_val != intval) { MPIO_ERR_CREATE_CODE_INFO_NOT_SAME(funcname, - key, + key, error_code); return -1; } @@ -49,7 +49,7 @@ int ADIOI_Info_check_and_install_int(ADIO_File fd, MPI_Info info, const char *ke return 0; } -int ADIOI_Info_check_and_install_enabled(ADIO_File fd, MPI_Info info, const char *key, +int ADIOI_Info_check_and_install_enabled(ADIO_File fd, MPI_Info info, const char *key, int *local_cache, char *funcname, int *error_code) { int tmp_val, flag; @@ -97,7 +97,7 @@ int ADIOI_Info_check_and_install_enabled(ADIO_File fd, MPI_Info info, const char ADIOI_Free(value); return 0; } -int ADIOI_Info_check_and_install_true(ADIO_File fd, MPI_Info info, const char *key, +int ADIOI_Info_check_and_install_true(ADIO_File fd, MPI_Info info, const char *key, int *local_cache, char *funcname, int *error_code) { int flag, tmp_val; @@ -140,7 +140,7 @@ int ADIOI_Info_check_and_install_true(ADIO_File fd, MPI_Info info, const char *k ADIOI_Free(value); return 0; } -int ADIOI_Info_check_and_install_str(ADIO_File fd, MPI_Info info, const char *key, +int ADIOI_Info_check_and_install_str(ADIO_File fd, MPI_Info info, const char *key, char **local_cache, char *funcname, int *error_code) { int flag; @@ -175,7 +175,7 @@ int ADIOI_Info_check_and_install_str(ADIO_File fd, MPI_Info info, const char *ke } ADIOI_Strncpy(*local_cache, value, len); } - /* if it has been set already, we ignore it the second time. + /* if it has been set already, we ignore it the second time. * otherwise we would get an error if someone used the same * info value with a cb_config_list value in it in a couple * of calls, which would be irritating. */ diff --git a/ompi/mca/io/romio314/romio/adio/common/iscontig.c b/ompi/mca/io/romio314/romio/adio/common/iscontig.c index 3c8aa712f4e..58eead89da9 100644 --- a/ompi/mca/io/romio314/romio/adio/common/iscontig.c +++ b/ompi/mca/io/romio314/romio/adio/common/iscontig.c @@ -1,6 +1,6 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */ -/* - * Copyright (C) 1997 University of Chicago. +/* + * Copyright (C) 1997 University of Chicago. * See COPYRIGHT notice in top-level directory. */ @@ -80,7 +80,7 @@ void ADIOI_Datatype_iscontig(MPI_Datatype datatype, int *flag) adds = (MPI_Aint *) ADIOI_Malloc((nadds+1)*sizeof(MPI_Aint)); types = (MPI_Datatype *) ADIOI_Malloc((ntypes+1)*sizeof(MPI_Datatype)); MPI_Type_get_contents(datatype, nints, nadds, ntypes, ints, - adds, types); + adds, types); ADIOI_Datatype_iscontig(types[0], flag); #ifndef MPISGI @@ -99,7 +99,7 @@ void ADIOI_Datatype_iscontig(MPI_Datatype datatype, int *flag) break; } - /* This function needs more work. It should check for contiguity + /* This function needs more work. It should check for contiguity in other cases as well.*/ } #endif diff --git a/ompi/mca/io/romio314/romio/adio/common/lock.c b/ompi/mca/io/romio314/romio/adio/common/lock.c index 2590d77433a..6fcff9e6900 100644 --- a/ompi/mca/io/romio314/romio/adio/common/lock.c +++ b/ompi/mca/io/romio314/romio/adio/common/lock.c @@ -1,18 +1,18 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */ -/* +/* * - * Copyright (C) 1997 University of Chicago. + * Copyright (C) 1997 University of Chicago. * See COPYRIGHT notice in top-level directory. */ #include "adio.h" #ifdef ROMIO_NTFS -/* This assumes that lock will always remain in the common directory and +/* This assumes that lock will always remain in the common directory and * that the ntfs directory will always be called ad_ntfs. */ #include "..\ad_ntfs\ad_ntfs.h" int ADIOI_Set_lock(FDTYPE fd, int cmd, int type, ADIO_Offset offset, int whence, - ADIO_Offset len) + ADIO_Offset len) { static char myname[] = "ADIOI_Set_lock"; int ret_val, error_code = MPI_SUCCESS; @@ -33,17 +33,17 @@ int ADIOI_Set_lock(FDTYPE fd, int cmd, int type, ADIO_Offset offset, int whence, if (cmd == ADIOI_LOCK_CMD) { /*printf("locking %d\n", (int)fd);fflush(stdout);*/ - ret_val = LockFileEx(fd, dwFlags, 0, - ( (DWORD) ( len & (__int64) 0xFFFFFFFF ) ), - ( (DWORD) ( (len >> 32) & (__int64) 0xFFFFFFFF ) ), + ret_val = LockFileEx(fd, dwFlags, 0, + ( (DWORD) ( len & (__int64) 0xFFFFFFFF ) ), + ( (DWORD) ( (len >> 32) & (__int64) 0xFFFFFFFF ) ), &Overlapped); } else { /*printf("unlocking %d\n", (int)fd);fflush(stdout);*/ - ret_val = UnlockFileEx(fd, 0, - ( (DWORD) ( len & (__int64) 0xFFFFFFFF ) ), - ( (DWORD) ( (len >> 32) & (__int64) 0xFFFFFFFF ) ), + ret_val = UnlockFileEx(fd, 0, + ( (DWORD) ( len & (__int64) 0xFFFFFFFF ) ), + ( (DWORD) ( (len >> 32) & (__int64) 0xFFFFFFFF ) ), &Overlapped); } #else @@ -91,7 +91,7 @@ int ADIOI_Set_lock(FDTYPE fd, int cmd, int type, ADIO_Offset offset, int whence, } #else int ADIOI_Set_lock(FDTYPE fd, int cmd, int type, ADIO_Offset offset, int whence, - ADIO_Offset len) + ADIO_Offset len) { int err, error_code, err_count = 0, sav_errno; struct flock lock; @@ -99,13 +99,13 @@ int ADIOI_Set_lock(FDTYPE fd, int cmd, int type, ADIO_Offset offset, int whence, if (len == 0) return MPI_SUCCESS; - /* Depending on the compiler flags and options, struct flock + /* Depending on the compiler flags and options, struct flock may not be defined with types that are the same size as ADIO_Offsets. */ /* FIXME: This is a temporary hack until we use flock64 where available. It also doesn't fix the broken Solaris header sys/types.h header file, which declars off_t as a UNION ! Configure tests to - see if the off64_t is a union if large file support is requested; + see if the off64_t is a union if large file support is requested; if so, it does not select large file support. */ #ifdef NEEDS_INT_CAST_WITH_FLOCK @@ -131,16 +131,16 @@ int ADIOI_Set_lock(FDTYPE fd, int cmd, int type, ADIO_Offset offset, int whence, { if((err_count < 5) || (err_count > 9995)) { - fprintf(stderr, "File locking failed in ADIOI_Set_lock(fd %#X,cmd %s/%#X,type %s/%#X,whence %#X) with return value %#X and errno %#X. Retry (%d).\n", + fprintf(stderr, "File locking failed in ADIOI_Set_lock(fd %#X,cmd %s/%#X,type %s/%#X,whence %#X) with return value %#X and errno %#X. Retry (%d).\n", fd, ((cmd == F_GETLK )? "F_GETLK" : ((cmd == F_SETLK )? "F_SETLK" : ((cmd == F_SETLKW )? "F_SETLKW" : "UNEXPECTED"))), - cmd, + cmd, ((type == F_RDLCK )? "F_RDLCK" : ((type == F_WRLCK )? "F_WRLCK" : ((type == F_UNLCK )? "F_UNLOCK" : "UNEXPECTED"))), - type, + type, whence, err, errno, err_count); perror("ADIOI_Set_lock:"); fprintf(stderr,"ADIOI_Set_lock:offset %#llx, length %#llx\n",(unsigned long long)offset, (unsigned long long)len); @@ -151,7 +151,7 @@ int ADIOI_Set_lock(FDTYPE fd, int cmd, int type, ADIO_Offset offset, int whence, } while (err && ((errno == EINTR) || ((errno == EINPROGRESS) && (++err_count < 10000)))); if (err && (errno != EBADF)) { - /* FIXME: This should use the error message system, + /* FIXME: This should use the error message system, especially for MPICH */ FPRINTF(stderr, "This requires fcntl(2) to be implemented. As of 8/25/2011 it is not. Generic MPICH Message: File locking failed in ADIOI_Set_lock(fd %X,cmd %s/%X,type %s/%X,whence %X) with return value %X and errno %X.\n" "- If the file system is NFS, you need to use NFS version 3, ensure that the lockd daemon is running on all the machines, and mount the directory with the 'noac' option (no attribute caching).\n" @@ -160,11 +160,11 @@ int ADIOI_Set_lock(FDTYPE fd, int cmd, int type, ADIO_Offset offset, int whence, ((cmd == F_GETLK )? "F_GETLK" : ((cmd == F_SETLK )? "F_SETLK" : ((cmd == F_SETLKW )? "F_SETLKW" : "UNEXPECTED"))), - cmd, + cmd, ((type == F_RDLCK )? "F_RDLCK" : ((type == F_WRLCK )? "F_WRLCK" : ((type == F_UNLCK )? "F_UNLOCK" : "UNEXPECTED"))), - type, + type, whence, err, errno); perror("ADIOI_Set_lock:"); FPRINTF(stderr,"ADIOI_Set_lock:offset %llu, length %llu\n",(unsigned long long)offset, (unsigned long long)len); @@ -182,7 +182,7 @@ int ADIOI_Set_lock(FDTYPE fd, int cmd, int type, ADIO_Offset offset, int whence, #if (defined(ROMIO_HFS) || defined(ROMIO_XFS)) int ADIOI_Set_lock64(FDTYPE fd, int cmd, int type, ADIO_Offset offset, int whence, - ADIO_Offset len) + ADIO_Offset len) { int err, error_code; struct flock64 lock; @@ -208,11 +208,11 @@ int ADIOI_Set_lock64(FDTYPE fd, int cmd, int type, ADIO_Offset offset, ((cmd == F_GETLK64 )? "F_GETLK64" : ((cmd == F_SETLK64 )? "F_SETLK64" : ((cmd == F_SETLKW64)? "F_SETLKW64" : "UNEXPECTED")))))), - cmd, + cmd, ((type == F_RDLCK )? "F_RDLCK" : ((type == F_WRLCK )? "F_WRLCK" : ((type == F_UNLCK )? "F_UNLOCK" : "UNEXPECTED"))), - type, + type, whence, err, errno); perror("ADIOI_Set_lock64:"); FPRINTF(stderr,"ADIOI_Set_lock:offset %llu, length %llu\n",(unsigned long long)offset, (unsigned long long)len); diff --git a/ompi/mca/io/romio314/romio/adio/common/malloc.c b/ompi/mca/io/romio314/romio/adio/common/malloc.c index 73c5a707050..03e90e38df3 100644 --- a/ompi/mca/io/romio314/romio/adio/common/malloc.c +++ b/ompi/mca/io/romio314/romio/adio/common/malloc.c @@ -1,17 +1,17 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */ -/* +/* * - * Copyright (C) 1997 University of Chicago. + * Copyright (C) 1997 University of Chicago. * See COPYRIGHT notice in top-level directory. */ /* These are routines for allocating and deallocating memory. They should be called as ADIOI_Malloc(size) and - ADIOI_Free(ptr). In adio.h, they are macro-replaced to - ADIOI_Malloc(size,__LINE__,__FILE__) and + ADIOI_Free(ptr). In adio.h, they are macro-replaced to + ADIOI_Malloc(size,__LINE__,__FILE__) and ADIOI_Free(ptr,__LINE__,__FILE__). - Later on, add some tracing and error checking, similar to + Later on, add some tracing and error checking, similar to MPID_trmalloc. */ #include "adio.h" diff --git a/ompi/mca/io/romio314/romio/adio/common/req_malloc.c b/ompi/mca/io/romio314/romio/adio/common/req_malloc.c index 61018e1a684..833507cfdff 100644 --- a/ompi/mca/io/romio314/romio/adio/common/req_malloc.c +++ b/ompi/mca/io/romio314/romio/adio/common/req_malloc.c @@ -1,7 +1,7 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */ -/* +/* * - * Copyright (C) 1997 University of Chicago. + * Copyright (C) 1997 University of Chicago. * See COPYRIGHT notice in top-level directory. */ @@ -22,7 +22,7 @@ struct ADIOI_RequestD *ADIOI_Malloc_request(void) int i; if (!ADIOI_Req_avail_head) { - ADIOI_Req_avail_head = (ADIOI_Req_node *) + ADIOI_Req_avail_head = (ADIOI_Req_node *) ADIOI_Malloc(NUM*sizeof(ADIOI_Req_node)); if (ADIOI_Req_avail_head == NULL) { @@ -40,7 +40,7 @@ struct ADIOI_RequestD *ADIOI_Malloc_request(void) /* keep track of malloced area that needs to be freed later */ if (!ADIOI_Malloc_req_tail) { ADIOI_Malloc_req_tail = (ADIOI_Malloc_req *) - ADIOI_Malloc(sizeof(ADIOI_Malloc_req)); + ADIOI_Malloc(sizeof(ADIOI_Malloc_req)); ADIOI_Malloc_req_head = ADIOI_Malloc_req_tail; ADIOI_Malloc_req_head->ptr = ADIOI_Req_avail_head; ADIOI_Malloc_req_head->next = NULL; @@ -57,7 +57,7 @@ struct ADIOI_RequestD *ADIOI_Malloc_request(void) ptr = ADIOI_Req_avail_head; ADIOI_Req_avail_head = ADIOI_Req_avail_head->next; if (!ADIOI_Req_avail_head) ADIOI_Req_avail_tail = NULL; - + (ptr->reqd).cookie = ADIOI_REQ_COOKIE; return &(ptr->reqd); } @@ -65,7 +65,7 @@ struct ADIOI_RequestD *ADIOI_Malloc_request(void) void ADIOI_Free_request(ADIOI_Req_node *node) { -/* This function could be called as ADIOI_Free_request(ADIO_Request request), +/* This function could be called as ADIOI_Free_request(ADIO_Request request), because request would be a pointer to the first element of ADIOI_Req_node.*/ /* moves this node to available pool. does not actually free it. */ diff --git a/ompi/mca/io/romio314/romio/adio/common/shfp_fname.c b/ompi/mca/io/romio314/romio/adio/common/shfp_fname.c index dfa5bafc2cf..3ad772fddb2 100644 --- a/ompi/mca/io/romio314/romio/adio/common/shfp_fname.c +++ b/ompi/mca/io/romio314/romio/adio/common/shfp_fname.c @@ -1,7 +1,7 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */ -/* +/* * - * Copyright (C) 1997 University of Chicago. + * Copyright (C) 1997 University of Chicago. * See COPYRIGHT notice in top-level directory. */ @@ -17,8 +17,15 @@ #ifdef HAVE_TIME_H #include #endif -/* The following function selects the name of the file to be used to - store the shared file pointer. The shared-file-pointer file is a + +/* + * Open MPI: we have to use internal opal_random() instead of rand(3) + * to prevent pertubing user's randon seed + */ +#include + +/* The following function selects the name of the file to be used to + store the shared file pointer. The shared-file-pointer file is a hidden file in the same directory as the real file being accessed. If the real file is /tmp/thakur/testfile, the shared-file-pointer file will be /tmp/thakur/.testfile.shfp.yyy.xxxx, where yyy @@ -35,20 +42,26 @@ void ADIOI_Shfp_fname(ADIO_File fd, int rank, int *error_code) int len; char *slash, *ptr, tmp[128]; int pid = 0; + opal_rng_buff_t adio_rand_buff; fd->shared_fp_fname = (char *) ADIOI_Malloc(PATH_MAX); if (!rank) { - srand(time(NULL)); - i = rand(); + /* + * Open MPI: we have to use internal opal_random() instead of rand(3) + * to prevent pertubing user's randon seed + */ + opal_srand(&adio_rand_buff,time(NULL)); + i = opal_random(); + pid = (int)getpid(); - + if (ADIOI_Strncpy(fd->shared_fp_fname, fd->filename, PATH_MAX)) { *error_code = ADIOI_Err_create_code("ADIOI_Shfp_fname", fd->filename, ENAMETOOLONG); return; } - + #ifdef ROMIO_NTFS slash = strrchr(fd->filename, '\\'); #else @@ -86,11 +99,11 @@ void ADIOI_Shfp_fname(ADIO_File fd, int rank, int *error_code) return; } } - + ADIOI_Snprintf(tmp, 128, ".shfp.%d.%d", pid, i); /* ADIOI_Strnapp will return non-zero if truncated. That's ok */ ADIOI_Strnapp(fd->shared_fp_fname, tmp, PATH_MAX); - + len = (int)strlen(fd->shared_fp_fname); MPI_Bcast(&len, 1, MPI_INT, 0, fd->comm); MPI_Bcast(fd->shared_fp_fname, len+1, MPI_CHAR, 0, fd->comm); diff --git a/ompi/mca/io/romio314/romio/adio/common/status_setb.c b/ompi/mca/io/romio314/romio/adio/common/status_setb.c index ec1e4ff7f69..756e94e38a9 100644 --- a/ompi/mca/io/romio314/romio/adio/common/status_setb.c +++ b/ompi/mca/io/romio314/romio/adio/common/status_setb.c @@ -1,7 +1,7 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */ -/* +/* * - * Copyright (C) 1997 University of Chicago. + * Copyright (C) 1997 University of Chicago. * See COPYRIGHT notice in top-level directory. */ @@ -13,11 +13,11 @@ /* TODO: still needs to handle partial datatypes and situations where the mpi * implementation fills status with something other than bytes (globus2 might * do this) */ -int MPIR_Status_set_bytes(MPI_Status *status, MPI_Datatype datatype, +int MPIR_Status_set_bytes(MPI_Status *status, MPI_Datatype datatype, MPI_Count nbytes) { ADIOI_UNREFERENCED_ARG(datatype); - /* it's ok that ROMIO stores number-of-bytes in status, not + /* it's ok that ROMIO stores number-of-bytes in status, not * count-of-copies, as long as MPI_GET_COUNT knows what to do */ if (status != MPI_STATUS_IGNORE) MPI_Status_set_elements_x(status, MPI_BYTE, nbytes); diff --git a/ompi/mca/io/romio314/romio/adio/common/strfns.c b/ompi/mca/io/romio314/romio/adio/common/strfns.c index 65e5ac24e74..4b8c1ec2568 100644 --- a/ompi/mca/io/romio314/romio/adio/common/strfns.c +++ b/ompi/mca/io/romio314/romio/adio/common/strfns.c @@ -1,7 +1,7 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */ -/* +/* * - * Copyright (C) 1997 University of Chicago. + * Copyright (C) 1997 University of Chicago. * See COPYRIGHT notice in top-level directory. */ @@ -9,7 +9,7 @@ /* style: allow:sprintf:3 sig:0 */ -/* +/* * Below are the "safe" versions of the various string and printf * operations. They are directly taken from MPICH, with MPIU replaced by ADIOI. */ @@ -18,7 +18,7 @@ * ADIOI_Strncpy - Copy at most n character. Stop once a null is reached. * * This is different from strncpy, which null pads so that exactly - * n characters are copied. The strncpy behavior is correct for many + * n characters are copied. The strncpy behavior is correct for many * applications because it guarantees that the string has no uninitialized * data. * @@ -27,7 +27,7 @@ * */ /*@ ADIOI_Strncpy - Copy a string with a maximum length - + Input Parameters: + instr - String to copy - maxlen - Maximum total length of 'outstr' @@ -36,11 +36,11 @@ Output Parameters: . outstr - String to copy into Notes: - This routine is the routine that you wish 'strncpy' was. In copying - 'instr' to 'outstr', it stops when either the end of 'outstr' (the + This routine is the routine that you wish 'strncpy' was. In copying + 'instr' to 'outstr', it stops when either the end of 'outstr' (the null character) is seen or the maximum length 'maxlen' is reached. - Unlike 'strncpy', it does not add enough nulls to 'outstr' after - copying 'instr' in order to move precisely 'maxlen' characters. + Unlike 'strncpy', it does not add enough nulls to 'outstr' after + copying 'instr' in order to move precisely 'maxlen' characters. Thus, this routine may be used anywhere 'strcpy' is used, without any performance cost related to large values of 'maxlen'. @@ -57,8 +57,8 @@ int ADIOI_Strncpy( char *dest, const char *src, size_t n ) while (*s_ptr && i-- > 0) { *d_ptr++ = *s_ptr++; } - - if (i > 0) { + + if (i > 0) { *d_ptr = 0; return 0; } @@ -81,7 +81,7 @@ Output Parameters: Notes: This routine is similar to 'strncat' except that the 'maxlen' argument - is the maximum total length of 'outstr', rather than the maximum + is the maximum total length of 'outstr', rather than the maximum number of characters to move from 'instr'. Thus, this routine is easier to use when the declared size of 'instr' is known. @@ -106,21 +106,21 @@ int ADIOI_Strnapp( char *dest, const char *src, size_t n ) /* We allow i >= (not just >) here because the first while decrements i by one more than there are characters, leaving room for the null */ - if (i >= 0) { + if (i >= 0) { *d_ptr = 0; return 0; } else { /* Force the null at the end */ *--d_ptr = 0; - + /* We may want to force an error message here, at least in the debugging version */ return 1; } } -/*@ +/*@ ADIOI_Strdup - Duplicate a string Synopsis: @@ -136,7 +136,7 @@ Input Parameters: null pointer is returned on error, such as out-of-memory. Notes: - Like 'ADIOI_Malloc' and 'ADIOI_Free', this will often be implemented as a + Like 'ADIOI_Malloc' and 'ADIOI_Free', this will often be implemented as a macro but may use 'ADIOI_trstrdup' to provide a tracing version. Module: @@ -159,14 +159,14 @@ char *ADIOI_Strdup( const char *str ) } -/* +/* * We need an snprintf replacement for systems without one */ #ifndef HAVE_SNPRINTF #include /* FIXME: Really need a check for varargs.h vs stdarg.h */ #include -/* +/* * This is an approximate form which is suitable for most uses within * the MPICH code */ @@ -209,7 +209,7 @@ int ADIOI_Snprintf( char *str, size_t size, const char *format, ... ) while (*p && isdigit(*p)) { width = 10 * width + (*p++ - '0'); } - /* When there is no longer a digit, get the format + /* When there is no longer a digit, get the format character */ nc = *p++; } @@ -236,7 +236,7 @@ int ADIOI_Snprintf( char *str, size_t size, const char *format, ... ) int tmplen = strlen(tmp); /* If a width was specified, pad with spaces on the left (on the right if %-3d given; not implemented yet */ - while (size-- > 0 && width-- > tmplen) + while (size-- > 0 && width-- > tmplen) *out_str++ = ' '; } while (size-- > 0 && *t) { @@ -257,7 +257,7 @@ int ADIOI_Snprintf( char *str, size_t size, const char *format, ... ) int tmplen = strlen(tmp); /* If a width was specified, pad with spaces on the left (on the right if %-3d given; not implemented yet */ - while (size-- > 0 && width-- > tmplen) + while (size-- > 0 && width-- > tmplen) *out_str++ = ' '; } while (size-- > 0 && *t) { @@ -277,7 +277,7 @@ int ADIOI_Snprintf( char *str, size_t size, const char *format, ... ) int tmplen = strlen(tmp); /* If a width was specified, pad with spaces on the left (on the right if %-3d given; not implemented yet */ - while (size-- > 0 && width-- > tmplen) + while (size-- > 0 && width-- > tmplen) *out_str++ = ' '; } while (size-- > 0 && *t) { diff --git a/ompi/mca/io/romio314/romio/adio/common/system_hints.c b/ompi/mca/io/romio314/romio/adio/common/system_hints.c index fd6cba5cb5e..56e9db21446 100644 --- a/ompi/mca/io/romio314/romio/adio/common/system_hints.c +++ b/ompi/mca/io/romio314/romio/adio/common/system_hints.c @@ -1,7 +1,7 @@ -/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- * vim: ts=8 sts=4 sw=4 noexpandtab * - * Copyright (C) 2007 UChicago/Argonne LLC. + * Copyright (C) 2007 UChicago/Argonne LLC. * See COPYRIGHT notice in top-level directory. */ @@ -15,12 +15,8 @@ #ifdef HAVE_SYS_TYPES_H #include #endif -#ifdef HAVE_STDLIB_H #include -#endif -#ifdef HAVE_STRING_H #include -#endif #ifdef HAVE_UNISTD_H #include #endif @@ -62,7 +58,7 @@ static int find_file(void) { int fd=-1; char * hintfile; - + hintfile = getenv(ROMIO_HINT_ENV_VAR); if(hintfile) fd = open(hintfile, O_RDONLY); @@ -74,11 +70,11 @@ static int find_file(void) /* parse the file-of-hints. Format is zero or more lines of " \n". * A # in collumn zero is a comment and the line will be ignored. Do our best - * to ignore badly formed lines too. + * to ignore badly formed lines too. * * The caller provides an 'info' object. Each key-value pair found by the * parser will get added to the info object. any keys already set will be left - * alone on the assumption that the caller knows best. + * alone on the assumption that the caller knows best. * * because MPI-IO hints are optional, we can get away with limited error * reporting. @@ -110,19 +106,19 @@ static int file_to_info_all(int fd, MPI_Info info, int rank, MPI_Comm comm) if (token == NULL) goto fn_exit; do { - if ( (key = strtok_r(token, " \t", &pos2)) == NULL) + if ( (key = strtok_r(token, " \t", &pos2)) == NULL) /* malformed line: found no items */ continue; - if (token[0] == '#') + if (token[0] == '#') /* ignore '#'-delimited comments */ continue; - if ( (val = strtok_r(NULL, " \t", &pos2)) == NULL) + if ( (val = strtok_r(NULL, " \t", &pos2)) == NULL) /* malformed line: found key without value */ continue; - if ( (garbage = strtok_r(NULL, " \t", &pos2)) != NULL) + if ( (garbage = strtok_r(NULL, " \t", &pos2)) != NULL) /* malformed line: more than two items */ continue; - + #ifdef SYSHINT_DEBUG printf("found: key=%s val=%s\n", key, val); #endif @@ -155,9 +151,9 @@ void ADIOI_process_system_hints(ADIO_File fd, MPI_Info info) /* given 'info', incorporate any hints in 'sysinfo' that are not already set * into 'new_info'. Caller must free 'new_info' later. */ -void ADIOI_incorporate_system_hints(MPI_Info info, - MPI_Info sysinfo, - MPI_Info *new_info) +void ADIOI_incorporate_system_hints(MPI_Info info, + MPI_Info sysinfo, + MPI_Info *new_info) { int i, nkeys_sysinfo, flag=0; /* must initialize flag to 0 */ @@ -174,7 +170,7 @@ void ADIOI_incorporate_system_hints(MPI_Info info, return; } - if (info == MPI_INFO_NULL) + if (info == MPI_INFO_NULL) MPI_Info_create(new_info); else MPI_Info_dup(info, new_info); @@ -182,7 +178,7 @@ void ADIOI_incorporate_system_hints(MPI_Info info, for (i=0; i 2GB files in some cases... */ struct ADIOI_Hints_struct { @@ -94,7 +94,7 @@ typedef struct ADIOI_Datarep { struct ADIOI_Datarep *next; /* pointer to next datarep */ } ADIOI_Datarep; -/* Values for use with cb_read, cb_write, ds_read, and ds_write +/* Values for use with cb_read, cb_write, ds_read, and ds_write * and some fs-specific hints (IBM xlc, Compaq Tru64 compilers object to a comma after the last item) (that's just wrong) @@ -107,9 +107,9 @@ enum { /* flattened datatypes. Each datatype is stored as a node of a globally accessible linked list. Once attribute caching on a - datatype is available (in MPI-2), that should be used instead. */ + datatype is available (in MPI-2), that should be used instead. */ -typedef struct ADIOI_Fl_node { +typedef struct ADIOI_Fl_node { MPI_Datatype type; MPI_Count count; /* no. of contiguous blocks */ ADIO_Offset *blocklens; /* array of contiguous block lengths (bytes)*/ @@ -145,25 +145,25 @@ typedef struct ADIOI_AIO_req_str { struct ADIOI_Fns_struct { void (*ADIOI_xxx_Open) (ADIO_File fd, int *error_code); - void (*ADIOI_xxx_OpenColl) (ADIO_File fd, int rank, + void (*ADIOI_xxx_OpenColl) (ADIO_File fd, int rank, int access_mode, int *error_code); void (*ADIOI_xxx_ReadContig) (ADIO_File fd, void *buf, int count, MPI_Datatype datatype, int file_ptr_type, ADIO_Offset offset, ADIO_Status *status, int *error_code); void (*ADIOI_xxx_WriteContig) (ADIO_File fd, const void *buf, int count, MPI_Datatype datatype, int file_ptr_type, - ADIO_Offset offset, ADIO_Status *status, int *error_code); + ADIO_Offset offset, ADIO_Status *status, int *error_code); void (*ADIOI_xxx_ReadStridedColl) (ADIO_File fd, void *buf, int count, MPI_Datatype datatype, int file_ptr_type, ADIO_Offset offset, ADIO_Status *status, int *error_code); void (*ADIOI_xxx_WriteStridedColl) (ADIO_File fd, const void *buf, int count, MPI_Datatype datatype, int file_ptr_type, ADIO_Offset offset, ADIO_Status *status, int *error_code); - ADIO_Offset (*ADIOI_xxx_SeekIndividual) (ADIO_File fd, ADIO_Offset offset, + ADIO_Offset (*ADIOI_xxx_SeekIndividual) (ADIO_File fd, ADIO_Offset offset, int whence, int *error_code); - void (*ADIOI_xxx_Fcntl) (ADIO_File fd, int flag, - ADIO_Fcntl_t *fcntl_struct, int *error_code); - void (*ADIOI_xxx_SetInfo) (ADIO_File fd, MPI_Info users_info, + void (*ADIOI_xxx_Fcntl) (ADIO_File fd, int flag, + ADIO_Fcntl_t *fcntl_struct, int *error_code); + void (*ADIOI_xxx_SetInfo) (ADIO_File fd, MPI_Info users_info, int *error_code); void (*ADIOI_xxx_ReadStrided) (ADIO_File fd, void *buf, int count, MPI_Datatype datatype, int file_ptr_type, @@ -172,27 +172,27 @@ struct ADIOI_Fns_struct { MPI_Datatype datatype, int file_ptr_type, ADIO_Offset offset, ADIO_Status *status, int *error_code); void (*ADIOI_xxx_Close) (ADIO_File fd, int *error_code); - void (*ADIOI_xxx_IreadContig) (ADIO_File fd, void *buf, int count, - MPI_Datatype datatype, int file_ptr_type, + void (*ADIOI_xxx_IreadContig) (ADIO_File fd, void *buf, int count, + MPI_Datatype datatype, int file_ptr_type, ADIO_Offset offset, ADIO_Request *request, int *error_code); void (*ADIOI_xxx_IwriteContig) (ADIO_File fd, const void *buf, int count, MPI_Datatype datatype, int file_ptr_type, ADIO_Offset offset, ADIO_Request *request, int *error_code); - int (*ADIOI_xxx_ReadDone) (ADIO_Request *request, ADIO_Status *status, - int *error_code); - int (*ADIOI_xxx_WriteDone) (ADIO_Request *request, ADIO_Status *status, + int (*ADIOI_xxx_ReadDone) (ADIO_Request *request, ADIO_Status *status, + int *error_code); + int (*ADIOI_xxx_WriteDone) (ADIO_Request *request, ADIO_Status *status, + int *error_code); + void (*ADIOI_xxx_ReadComplete) (ADIO_Request *request, ADIO_Status *status, int *error_code); - void (*ADIOI_xxx_ReadComplete) (ADIO_Request *request, ADIO_Status *status, - int *error_code); void (*ADIOI_xxx_WriteComplete) (ADIO_Request *request, ADIO_Status *status, - int *error_code); + int *error_code); void (*ADIOI_xxx_IreadStrided) (ADIO_File fd, void *buf, int count, MPI_Datatype datatype, int file_ptr_type, ADIO_Offset offset, ADIO_Request *request, int *error_code); void (*ADIOI_xxx_IwriteStrided) (ADIO_File fd, const void *buf, int count, MPI_Datatype datatype, int file_ptr_type, ADIO_Offset offset, ADIO_Request *request, int *error_code); - void (*ADIOI_xxx_Flush) (ADIO_File fd, int *error_code); + void (*ADIOI_xxx_Flush) (ADIO_File fd, int *error_code); void (*ADIOI_xxx_Resize) (ADIO_File fd, ADIO_Offset size, int *error_code); void (*ADIOI_xxx_Delete) (const char *filename, int *error_code); int (*ADIOI_xxx_Feature) (ADIO_File fd, int flag); @@ -209,7 +209,7 @@ struct ADIOI_Fns_struct { * http://stackoverflow.com/questions/3982348/implement-generic-swap-macro-in-c */ #define ADIOI_SWAP(x, y, T) do { T temp##x##y = x; x = y; y = temp##x##y; } while (0); -#define ADIOI_PREALLOC_BUFSZ 16777216 /* buffer size used to +#define ADIOI_PREALLOC_BUFSZ 16777216 /* buffer size used to preallocate disk space */ @@ -220,7 +220,7 @@ struct ADIOI_Fns_struct { #define ADIOI_IND_RD_BUFFER_SIZE_DFLT "4194304" /* buffer size for data sieving in independent writes = 512KB. default is smaller than for reads, because write requires read-modify-write - with file locking. If buffer size is large there is more contention + with file locking. If buffer size is large there is more contention for locks. */ #define ADIOI_IND_WR_BUFFER_SIZE_DFLT "524288" /* use one process per processor name by default */ @@ -302,8 +302,8 @@ struct ADIOI_Fns_struct { (*(fd->fns->ADIOI_xxx_Feature))(fd, flag) -/* structure for storing access info of this process's request - from the file domain of other processes, and vice-versa. used +/* structure for storing access info of this process's request + from the file domain of other processes, and vice-versa. used as array of structures indexed by process number. */ typedef struct { ADIO_Offset *offsets; /* array of offsets */ @@ -341,7 +341,7 @@ void ADIOI_Get_eof_offset(ADIO_File fd, ADIO_Offset *eof_offset); void ADIOI_Get_byte_offset(ADIO_File fd, ADIO_Offset offset, ADIO_Offset *disp); void ADIOI_process_system_hints(ADIO_File fd, MPI_Info info); -void ADIOI_incorporate_system_hints(MPI_Info info, MPI_Info sysinfo, +void ADIOI_incorporate_system_hints(MPI_Info info, MPI_Info sysinfo, MPI_Info *new_info); void ADIOI_Info_print_keyvals(MPI_Info info); @@ -349,20 +349,20 @@ void ADIOI_Info_print_keyvals(MPI_Info info); void ADIOI_GEN_Fcntl(ADIO_File fd, int flag, ADIO_Fcntl_t *fcntl_struct, int *error_code); void ADIOI_GEN_Flush(ADIO_File fd, int *error_code); -void ADIOI_GEN_OpenColl(ADIO_File fd, int rank, +void ADIOI_GEN_OpenColl(ADIO_File fd, int rank, int access_mode, int *error_code); -void ADIOI_SCALEABLE_OpenColl(ADIO_File fd, int rank, +void ADIOI_SCALEABLE_OpenColl(ADIO_File fd, int rank, int access_mode, int *error_code); -void ADIOI_FAILSAFE_OpenColl(ADIO_File fd, int rank, +void ADIOI_FAILSAFE_OpenColl(ADIO_File fd, int rank, int access_mode, int *error_code); void ADIOI_GEN_Delete(const char *filename, int *error_code); -void ADIOI_GEN_ReadContig(ADIO_File fd, void *buf, int count, +void ADIOI_GEN_ReadContig(ADIO_File fd, void *buf, int count, MPI_Datatype datatype, int file_ptr_type, ADIO_Offset offset, ADIO_Status *status, int *error_code); int ADIOI_GEN_aio(ADIO_File fd, void *buf, int len, ADIO_Offset offset, int wr, MPI_Request *request); -void ADIOI_GEN_IreadContig(ADIO_File fd, void *buf, int count, +void ADIOI_GEN_IreadContig(ADIO_File fd, void *buf, int count, MPI_Datatype datatype, int file_ptr_type, ADIO_Offset offset, ADIO_Request *request, int *error_code); @@ -391,7 +391,7 @@ int ADIOI_GEN_IODone(ADIO_Request *request, ADIO_Status *status, void ADIOI_GEN_IOComplete(ADIO_Request *request, ADIO_Status *status, int *error_code); int ADIOI_GEN_aio_poll_fn(void *extra_state, ADIO_Status *status); -int ADIOI_GEN_aio_wait_fn(int count, void **array_of_states, double timeout, +int ADIOI_GEN_aio_wait_fn(int count, void **array_of_states, double timeout, ADIO_Status *status); int ADIOI_GEN_aio_query_fn(void *extra_state, ADIO_Status *status); int ADIOI_GEN_aio_free_fn(void *extra_state); @@ -422,7 +422,7 @@ void ADIOI_GEN_WriteStridedColl(ADIO_File fd, const void *buf, int count, ADIO_Offset offset, ADIO_Status *status, int *error_code); void ADIOI_Calc_my_off_len(ADIO_File fd, int bufcount, MPI_Datatype - datatype, int file_ptr_type, ADIO_Offset + datatype, int file_ptr_type, ADIO_Offset offset, ADIO_Offset **offset_list_ptr, ADIO_Offset **len_list_ptr, ADIO_Offset *start_offset_ptr, ADIO_Offset *end_offset_ptr, int @@ -430,8 +430,8 @@ void ADIOI_Calc_my_off_len(ADIO_File fd, int bufcount, MPI_Datatype void ADIOI_Calc_file_domains(ADIO_Offset *st_offsets, ADIO_Offset *end_offsets, int nprocs, int nprocs_for_coll, ADIO_Offset *min_st_offset_ptr, - ADIO_Offset **fd_start_ptr, ADIO_Offset - **fd_end_ptr, int min_fd_size, + ADIO_Offset **fd_start_ptr, ADIO_Offset + **fd_end_ptr, int min_fd_size, ADIO_Offset *fd_size_ptr, int striping_unit); int ADIOI_Calc_aggregator(ADIO_File fd, @@ -441,9 +441,9 @@ int ADIOI_Calc_aggregator(ADIO_File fd, ADIO_Offset fd_size, ADIO_Offset *fd_start, ADIO_Offset *fd_end); -void ADIOI_Calc_my_req(ADIO_File fd, ADIO_Offset *offset_list, +void ADIOI_Calc_my_req(ADIO_File fd, ADIO_Offset *offset_list, ADIO_Offset *len_list, int - contig_access_count, ADIO_Offset + contig_access_count, ADIO_Offset min_st_offset, ADIO_Offset *fd_start, ADIO_Offset *fd_end, ADIO_Offset fd_size, int nprocs, @@ -451,12 +451,12 @@ void ADIOI_Calc_my_req(ADIO_File fd, ADIO_Offset *offset_list, int **count_my_req_per_proc_ptr, ADIOI_Access **my_req_ptr, int **buf_idx_ptr); -void ADIOI_Calc_others_req(ADIO_File fd, int count_my_req_procs, +void ADIOI_Calc_others_req(ADIO_File fd, int count_my_req_procs, int *count_my_req_per_proc, - ADIOI_Access *my_req, + ADIOI_Access *my_req, int nprocs, int myrank, int *count_others_req_procs_ptr, - ADIOI_Access **others_req_ptr); + ADIOI_Access **others_req_ptr); /* KC && AC - New Collective I/O internals*/ @@ -500,7 +500,7 @@ typedef struct view_state int pre_ol_ct; MPI_Aint *pre_disp_arr; int *pre_blk_arr; - + ADIOI_Flatlist_node *flat_type_p; } view_state; @@ -528,7 +528,7 @@ void ADIOI_Exch_file_views(int myrank, int nprocs, int file_ptr_type, view_state *agg_file_view_state_arr, view_state *client_file_view_state_arr); int ADIOI_init_view_state(int file_ptr_type, - int nprocs, + int nprocs, view_state *view_state_arr, int op_type); int ADIOI_Build_agg_reqs(ADIO_File fd, int rw_type, int nprocs, @@ -537,7 +537,7 @@ int ADIOI_Build_agg_reqs(ADIO_File fd, int rw_type, int nprocs, ADIO_Offset *client_comm_sz_arr, ADIO_Offset *agg_dtype_offset_p, MPI_Datatype *agg_dtype_p); -int ADIOI_Build_client_reqs(ADIO_File fd, +int ADIOI_Build_client_reqs(ADIO_File fd, int nprocs, view_state *my_mem_view_state_arr, view_state *agg_file_view_state_arr, @@ -574,7 +574,7 @@ void ADIOI_P2PContigReadAggregation(ADIO_File fd, ADIO_Offset *fd_start, ADIO_Offset *fd_end); -ADIO_Offset ADIOI_GEN_SeekIndividual(ADIO_File fd, ADIO_Offset offset, +ADIO_Offset ADIOI_GEN_SeekIndividual(ADIO_File fd, ADIO_Offset offset, int whence, int *error_code); void ADIOI_GEN_Resize(ADIO_File fd, ADIO_Offset size, int *error_code); void ADIOI_GEN_SetInfo(ADIO_File fd, MPI_Info users_info, int *error_code); @@ -597,11 +597,11 @@ int ADIOI_Type_create_hindexed_x(int count, int ADIOI_FAKE_IODone(ADIO_Request *request, ADIO_Status *status, int *error_code); -void ADIOI_FAKE_IreadContig(ADIO_File fd, void *buf, int count, +void ADIOI_FAKE_IreadContig(ADIO_File fd, void *buf, int count, MPI_Datatype datatype, int file_ptr_type, ADIO_Offset offset, ADIO_Request *request, int *error_code); -void ADIOI_FAKE_IreadStrided(ADIO_File fd, void *buf, int count, +void ADIOI_FAKE_IreadStrided(ADIO_File fd, void *buf, int count, MPI_Datatype datatype, int file_ptr_type, ADIO_Offset offset, ADIO_Request *request, int *error_code); @@ -698,7 +698,7 @@ int MPIOI_File_iread(MPI_File fh, # define ADIOI_WRITE_LOCK(fd, offset, whence, len) \ do {if (((fd)->file_system == ADIO_XFS) || ((fd)->file_system == ADIO_HFS)) \ ADIOI_Set_lock64((fd)->fd_sys, F_SETLKW64, F_WRLCK, offset, whence, len);\ - else ADIOI_Set_lock((fd)->fd_sys, F_SETLKW, F_WRLCK, offset, whence, len); } while (0) + else ADIOI_Set_lock((fd)->fd_sys, F_SETLKW, F_WRLCK, offset, whence, len); } while (0) # define ADIOI_READ_LOCK(fd, offset, whence, len) \ do {if (((fd)->file_system == ADIO_XFS) || ((fd)->file_system == ADIO_HFS)) \ @@ -796,7 +796,7 @@ char *ADIOI_Strdup( const char * ); extern int snprintf( char *, size_t, const char *, ... ) ATTRIBUTE((format(printf,3,4))); #endif #else -int ADIOI_Snprintf( char *str, size_t size, const char *format, ... ) +int ADIOI_Snprintf( char *str, size_t size, const char *format, ... ) ATTRIBUTE((format(printf,3,4))); #endif /* HAVE_SNPRINTF */ @@ -806,7 +806,7 @@ int ADIOI_Snprintf( char *str, size_t size, const char *format, ... ) # ifdef HAVE_SYSERRLIST extern char *sys_errlist[]; # define strerror(n) sys_errlist[n] -# else +# else # define PRINT_ERR_MSG # endif #endif @@ -848,11 +848,11 @@ int ADIOI_MPE_iwrite_b; #ifdef ROMIO_INSIDE_MPICH /* Assert that this MPI_Aint value can be cast to a ptr value without problem.*/ -/* Basic idea is the value should be unchanged after casting - (no loss of (meaningful) high order bytes in 8 byte MPI_Aint +/* Basic idea is the value should be unchanged after casting + (no loss of (meaningful) high order bytes in 8 byte MPI_Aint to (possible) 4 byte ptr cast) */ /* Should work even on 64bit or old 32bit configs */ - /* Use MPID_Ensure_Aint_fits_in_pointer from mpiutil.h and + /* Use MPID_Ensure_Aint_fits_in_pointer from mpiutil.h and MPI_AINT_CAST_TO_VOID_PTR from configure (mpi.h) */ #include "glue_romio.h" @@ -869,7 +869,7 @@ int ADIOI_MPE_iwrite_b; #define ADIOI_AINT_CAST_TO_VOID_PTR (void*) #define ADIOI_AINT_CAST_TO_LONG_LONG (long long) #define ADIOI_AINT_CAST_TO_OFFSET ADIOI_AINT_CAST_TO_LONG_LONG - #define ADIOI_ENSURE_AINT_FITS_IN_PTR(aint_value) + #define ADIOI_ENSURE_AINT_FITS_IN_PTR(aint_value) #define ADIOI_Assert assert #define MPIR_Upint unsigned long #define MPIU_THREADPRIV_DECL diff --git a/ompi/mca/io/romio314/romio/adio/include/adioi_fs_proto.h b/ompi/mca/io/romio314/romio/adio/include/adioi_fs_proto.h index e3af9170821..5c3f748096a 100644 --- a/ompi/mca/io/romio314/romio/adio/include/adioi_fs_proto.h +++ b/ompi/mca/io/romio314/romio/adio/include/adioi_fs_proto.h @@ -1,7 +1,7 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */ -/* +/* * - * Copyright (C) 1997 University of Chicago. + * Copyright (C) 1997 University of Chicago. * See COPYRIGHT notice in top-level directory. */ diff --git a/ompi/mca/io/romio314/romio/adio/include/mpio_error.h b/ompi/mca/io/romio314/romio/adio/include/mpio_error.h index f9d22b17219..88fdb09273a 100644 --- a/ompi/mca/io/romio314/romio/adio/include/mpio_error.h +++ b/ompi/mca/io/romio314/romio/adio/include/mpio_error.h @@ -59,7 +59,7 @@ #define MPIR_ERR_ASYNC_OUTSTANDING 27 #define MPIR_READ_PERM 29 #define MPIR_PREALLOC_PERM 31 -#define MPIR_ERR_FILETYPE 33 +#define MPIR_ERR_FILETYPE 33 #define MPIR_ERR_NO_NTFS 35 #define MPIR_ERR_NO_TESTFS 36 #define MPIR_ERR_NO_LUSTRE 37 @@ -68,7 +68,7 @@ /* MPI_ERR_COMM */ #ifndef MPIR_ERR_COMM_NULL #define MPIR_ERR_COMM_NULL 3 -#define MPIR_ERR_COMM_INTER 5 +#define MPIR_ERR_COMM_INTER 5 #endif /* MPI_ERR_UNSUPPORTED_DATAREP */ diff --git a/ompi/mca/io/romio314/romio/adio/include/mpipr.h b/ompi/mca/io/romio314/romio/adio/include/mpipr.h index b1dcd9fb449..21f208ed01f 100644 --- a/ompi/mca/io/romio314/romio/adio/include/mpipr.h +++ b/ompi/mca/io/romio314/romio/adio/include/mpipr.h @@ -4,8 +4,8 @@ * See COPYRIGHT in top-level directory. */ /* This file replaces all MPI function names with their PMPI equivalents. - PMPI versions are used by default so that the user can profile - application code without interference from MPI functions used by + PMPI versions are used by default so that the user can profile + application code without interference from MPI functions used by MPI-IO. */ #ifndef USE_MPI_VERSIONS @@ -295,7 +295,7 @@ #undef MPI_Wtime #define MPI_Wtime PMPI_Wtime -/* commented out because these could be macros themselves, as in MPICH +/* commented out because these could be macros themselves, as in MPICH #undef MPI_Type_c2f #define MPI_Type_c2f PMPI_Type_c2f #undef MPI_Type_f2c diff --git a/ompi/mca/io/romio314/romio/confdb/aclocal_am.m4 b/ompi/mca/io/romio314/romio/confdb/aclocal_am.m4 index 4d3b0de2257..8e425df1902 100644 --- a/ompi/mca/io/romio314/romio/confdb/aclocal_am.m4 +++ b/ompi/mca/io/romio314/romio/confdb/aclocal_am.m4 @@ -1,6 +1,6 @@ dnl AM_IGNORE is an extension that tells (a patched) automake not to dnl include the specified AC_SUBST variable in the Makefile.in that -dnl automake generates. We don't use AC_DEFUN, since aclocal will +dnl automake generates. We don't use AC_DEFUN, since aclocal will dnl then complain that AM_IGNORE is a duplicate (if you are using the dnl patched automake/aclocal). m4_ifdef([AM_IGNORE],[],[m4_define([AM_IGNORE],[])]) diff --git a/ompi/mca/io/romio314/romio/confdb/aclocal_atomic.m4 b/ompi/mca/io/romio314/romio/confdb/aclocal_atomic.m4 index 4e96cfbf366..0bda659a98a 100644 --- a/ompi/mca/io/romio314/romio/confdb/aclocal_atomic.m4 +++ b/ompi/mca/io/romio314/romio/confdb/aclocal_atomic.m4 @@ -16,11 +16,11 @@ dnl . HAVE__INTERLOCKEDEXCHANGE - _InterlockedExchange intrinsic is available dnl (IA64) dnl . HAVE_GCC_ASM_SPARC_MEMBAR - gcc __asm__ will issue SPARC architecture dnl memory barrier instruction -dnl . HAVE_SOLARIS_ASM_SPARC_MEMBAR - Solaris asm() will issue SPARC +dnl . HAVE_SOLARIS_ASM_SPARC_MEMBAR - Solaris asm() will issue SPARC dnl architecture memory barrier instruction dnl . HAVE_GCC_ASM_SPARC_STBAR - gcc __asm__ will issue stbar dnl - HAVE_SOLARIS_ASM_SPARC_STBAR - Solaris __asm() will issue stbar -dnl +dnl dnl D*/ AC_DEFUN([PAC_C_MEMATOMIC],[ AC_CACHE_CHECK([for x86 mfence instruction using __asm__], @@ -114,9 +114,9 @@ if test "$lac_cv_have___asm_and_x86_lfence" = "yes" ; then AC_DEFINE(HAVE___ASM_AND_X86_LFENCE, 1, [Define if using __asm on a x86 system with the lfence instruction]) fi -dnl +dnl dnl Some compilers, such as pgcc, may require additional arguments. -dnl pgcc may need -Masmkeyword flag. We may want to try this with and +dnl pgcc may need -Masmkeyword flag. We may want to try this with and dnl without adding -Masmkeyword to CFLAGS AC_CACHE_CHECK([for x86 mfence instruction using asm()], @@ -224,4 +224,4 @@ int main(int argc, char **argv){ if test "$pac_cv_solaris_sparc_stbar" = yes ; then AC_DEFINE(HAVE_SOLARIS_ASM_SPARC_STBAR,1,[Define if solaris asm stbar supported]) fi -]) \ No newline at end of file +]) diff --git a/ompi/mca/io/romio314/romio/confdb/aclocal_attr_alias.m4 b/ompi/mca/io/romio314/romio/confdb/aclocal_attr_alias.m4 index 32f016546dc..128a08844a4 100644 --- a/ompi/mca/io/romio314/romio/confdb/aclocal_attr_alias.m4 +++ b/ompi/mca/io/romio314/romio/confdb/aclocal_attr_alias.m4 @@ -17,7 +17,7 @@ AC_PATH_PROGS_FEATURE_CHECK(NM_G, nm, [ # Tru64's nm complains that /dev/null is an invalid object file # # AIX's sed does not accept \+, 1) instead of doing 's|a\+||', do 's|aa*||' - # or 2) instead of 's|A \+B|AB|g', do 's|A *B|AB|g' + # or 2) instead of 's|A \+B|AB|g', do 's|A *B|AB|g' # Check if nm accepts -g case `${ac_path_NM_G} -g /dev/null 2>&1 | sed '1q'` in @@ -108,7 +108,7 @@ mpif_cmblk_t MPIFCMB; /* Do the test in this file instead in the file - where __attribute__((alias)) is used. + where __attribute__((alias)) is used. This is needed for pgcc since pgcc seems to define aliased symbols if they are in the same file. */ @@ -171,7 +171,7 @@ if test "$pac_c_attr_alias_other" = "yes" ; then #if defined(HAVE_STDIO_H) || defined(STDC_HEADERS) #include #endif - + struct mpif_cmblk_t_ { int imember; }; typedef struct mpif_cmblk_t_ mpif_cmblk_t; @@ -239,7 +239,7 @@ if test "$pac_c_attr_alias_main" = "yes" ; then cmp_addr=${addr} fi done - + if test "$diff_addrs" != "yes" ; then dnl echo "Same addresses. Multiple aliases support" AC_MSG_RESULT([${NM_G} says yes]) @@ -348,7 +348,7 @@ dnl the following weird behavour dnl pgf77 -o ftest ftest.f => when $?=0 with zero stderr output dnl pgf77 -o ftest ftest.f dummy.o => when $?=0 with non-zero stderr output. dnl stderr has "ftest.f:". -dnl +dnl # First create a fortran CONFTEST which will be used repeatedly. AC_LANG_PUSH([Fortran]) dnl AC_LANG_PUSH([Fortran 77]) AC_LANG_CONFTEST([ @@ -385,7 +385,7 @@ AC_COMPILE_IFELSE([AC_LANG_SOURCE([])],[ pac_f2c_alignedn_diffbase=no ]) # Be sure NOT to remove the conftest.f which is still needed for later use. - # rm -f conftest.$ac_ext + # rm -f conftest.$ac_ext # Restore everything in autoconf that has been overwritten PAC_POP_FLAG([ac_link]) # restore previously pushed LIBS diff --git a/ompi/mca/io/romio314/romio/confdb/aclocal_bugfix.m4 b/ompi/mca/io/romio314/romio/confdb/aclocal_bugfix.m4 index f580d3919ad..1f8704ed15e 100644 --- a/ompi/mca/io/romio314/romio/confdb/aclocal_bugfix.m4 +++ b/ompi/mca/io/romio314/romio/confdb/aclocal_bugfix.m4 @@ -17,26 +17,26 @@ dnl Synopsis: dnl PAC_PROG_CHECK_INSTALL_WORKS dnl dnl Output Effect: -dnl Sets the variable 'INSTALL' to the value of 'ac_sh_install' if +dnl Sets the variable 'INSTALL' to the value of 'ac_sh_install' if dnl a file cannot be installed into a local directory with the 'INSTALL' dnl program dnl dnl Notes: -dnl The 'AC_PROG_INSTALL' scripts tries to avoid broken versions of -dnl install by avoiding directories such as '/usr/sbin' where some -dnl systems are known to have bad versions of 'install'. Unfortunately, +dnl The 'AC_PROG_INSTALL' scripts tries to avoid broken versions of +dnl install by avoiding directories such as '/usr/sbin' where some +dnl systems are known to have bad versions of 'install'. Unfortunately, dnl this is exactly the sort of test-on-name instead of test-on-capability dnl that 'autoconf' is meant to eliminate. The test in this script -dnl is very simple but has been adequate for working around problems -dnl on Solaris, where the '/usr/sbin/install' program (known by -dnl autoconf to be bad because it is in /usr/sbin) is also reached by a +dnl is very simple but has been adequate for working around problems +dnl on Solaris, where the '/usr/sbin/install' program (known by +dnl autoconf to be bad because it is in /usr/sbin) is also reached by a dnl soft link through /bin, which autoconf believes is good. dnl -dnl No variables are cached to ensure that we do not make a mistake in +dnl No variables are cached to ensure that we do not make a mistake in dnl our choice of install program. dnl dnl The Solaris configure requires the directory name to immediately -dnl follow the '-c' argument, rather than the more common +dnl follow the '-c' argument, rather than the more common dnl.vb dnl args sourcefiles destination-dir dnl.ve diff --git a/ompi/mca/io/romio314/romio/confdb/aclocal_cache.m4 b/ompi/mca/io/romio314/romio/confdb/aclocal_cache.m4 index 9d02c416387..d0a79822470 100644 --- a/ompi/mca/io/romio314/romio/confdb/aclocal_cache.m4 +++ b/ompi/mca/io/romio314/romio/confdb/aclocal_cache.m4 @@ -1,20 +1,20 @@ dnl dnl/*D -dnl AC_CACHE_LOAD - Replacement for autoconf cache load +dnl AC_CACHE_LOAD - Replacement for autoconf cache load dnl dnl Notes: -dnl Caching in autoconf is broken (at least through version 2.13). +dnl Caching in autoconf is broken (at least through version 2.13). dnl The problem is that the cache is read dnl without any check for whether it makes any sense to read it. -dnl A common problem is a build on a shared file system; connecting to +dnl A common problem is a build on a shared file system; connecting to dnl a different computer and then building within the same directory will dnl lead to at best error messages from configure and at worse a build that dnl is wrong but fails only at run time (e.g., wrong datatype sizes used). dnl Later versions of autoconf do include some checks for changes in the dnl environment that impact the choices, but still misses problems with dnl multiple different systems. -dnl -dnl This fixes that by requiring the user to explicitly enable caching +dnl +dnl This fixes that by requiring the user to explicitly enable caching dnl before the cache file will be loaded. dnl dnl To use this version of 'AC_CACHE_LOAD', you need to include @@ -26,7 +26,7 @@ dnl command causes configure to keep track of the system being configured dnl in a config.system file; if the current system matches the value stored dnl in that file (or there is neither a config.cache nor config.system file), dnl configure will enable caching. In order to ensure that the configure -dnl tests make sense, the values of CC, F77, F90, and CXX are also included +dnl tests make sense, the values of CC, F77, F90, and CXX are also included dnl in the config.system file. In addition, the value of PATH is included dnl to ensure that changes in the PATH that might select a different version dnl of a program with the same name (such as a native make versus gnumake) @@ -34,7 +34,7 @@ dnl are detected. dnl dnl Bugs: dnl This does not work with the Cygnus configure because the enable arguments -dnl are processed *after* AC_CACHE_LOAD (!). To address this, we avoid +dnl are processed *after* AC_CACHE_LOAD (!). To address this, we avoid dnl changing the value of enable_cache, and use real_enable_cache, duplicating dnl the "notgiven" value. dnl @@ -46,14 +46,14 @@ dnl See Also: dnl PAC_ARG_CACHING dnl D*/ define([AC_CACHE_LOAD], -[if test "$CONFIGURE_DEBUG_CACHE" = yes ; then +[if test "$CONFIGURE_DEBUG_CACHE" = yes ; then oldopts="$-" clearMinusX=no - set -x - if test "$oldopts" != "$-" ; then + set -x + if test "$oldopts" != "$-" ; then clearMinusX=yes fi -fi +fi if test "X$cache_system" = "X" ; then # A default file name, just in case cache_system="config.system" @@ -66,13 +66,13 @@ if test "X$cache_system" = "X" ; then test "x$cache_system" = "x$cache_file" && cache_system="config.system" # else # We must *not* set enable_cache to no because we need to know if -# enable_cache was not set. +# enable_cache was not set. # enable_cache=no fi fi dnl dnl The "action-if-not-given" part of AC_ARG_ENABLE is not executed until -dnl after the AC_CACHE_LOAD is executed (!). Thus, the value of +dnl after the AC_CACHE_LOAD is executed (!). Thus, the value of dnl enable_cache if neither --enable-cache or --disable-cache is selected dnl is null. Just in case autoconf ever fixes this, we test both cases. dnl @@ -81,12 +81,12 @@ dnl change which versions of programs are found (such as vendor make dnl or GNU make). dnl # -# Get a test value and flag whether we should remove/replace the +# Get a test value and flag whether we should remove/replace the # cache_system file (do so unless cache_system_ok is yes) # FC and F77 should be synonyms. Save both in case # We include the xxxFLAGS in case the user is using the flags to change -# the language (either input or output) of the compiler. E.g., -# using -xarch=v9 on Solaris to select 64 bit output or using -D_BSD_SOURCE +# the language (either input or output) of the compiler. E.g., +# using -xarch=v9 on Solaris to select 64 bit output or using -D_BSD_SOURCE # with gcc to get different header files on input. cleanargs=`echo "$CC $F77 $FC $CXX $F90 $CFLAGS $FFLAGS $CXXFLAGS $F90FLAGS $PATH" | tr '"' ' '` if uname -srm >/dev/null 2>&1 ; then @@ -112,7 +112,7 @@ if test "X$real_enable_cache" = "Xnotgiven" ; then elif test ! -f "$cache_system" -a -n "$cache_system_text" ; then # remove the cache file because it may not correspond to our # system - if test "$cache_file" != "/dev/null" ; then + if test "$cache_file" != "/dev/null" ; then rm -f $cache_file fi real_enable_cache="yes" @@ -157,7 +157,7 @@ if test "$clearMinusX" = yes ; then fi ]) dnl -dnl/*D +dnl/*D dnl PAC_ARG_CACHING - Enable caching of results from a configure execution dnl dnl Synopsis: @@ -165,7 +165,7 @@ dnl PAC_ARG_CACHING dnl dnl Output Effects: dnl Adds '--enable-cache' and '--disable-cache' to the command line arguments -dnl accepted by 'configure'. +dnl accepted by 'configure'. dnl dnl See Also: dnl AC_CACHE_LOAD @@ -194,7 +194,7 @@ AC_DEFUN([PAC_CACHE_CLEAN],[ else echo "not updating unwritable cache $cache_file" fi - fi + fi rm -f confcache if test "$DEBUG_AUTOCONF_CACHE" = "yes" ; then echo "Results of cleaned cache file:" @@ -207,20 +207,20 @@ AC_DEFUN([PAC_CACHE_CLEAN],[ dnl/*D dnl PAC_SUBDIR_CACHE - Create a cache file before ac_output for subdirectory dnl configures. -dnl +dnl dnl Synopsis: dnl PAC_SUBDIR_CACHE(when) dnl dnl Input Parameter: dnl . when - Indicates when the cache should be created (optional) dnl If 'always', create a new cache file. This option -dnl should be used if any of the cache parameters (such as +dnl should be used if any of the cache parameters (such as dnl CFLAGS or LDFLAGS) may have changed. dnl dnl Output Effects: -dnl +dnl dnl Create a cache file before ac_output so that subdir configures don't -dnl make mistakes. +dnl make mistakes. dnl We can't use OUTPUT_COMMANDS to remove the cache file, because those dnl commands are executed *before* the subdir configures. dnl @@ -232,9 +232,9 @@ if test "x$1" = "xalways" -o \( "$cache_file" = "/dev/null" -a "X$real_enable_ca # lost cache_file=`pwd`/$$conf.cache touch $cache_file - dnl + dnl dnl For Autoconf 2.52+, we should ensure that the environment is set - dnl for the cache. Make sure that we get the values and set the + dnl for the cache. Make sure that we get the values and set the dnl xxx_set variables properly ac_cv_env_CC_set=set ac_cv_env_CC_value=$CC @@ -320,7 +320,7 @@ fi ]) dnl dnl The following three macros support the sharing of configure results -dnl by configure scripts, including ones that are not run with +dnl by configure scripts, including ones that are not run with dnl AC_CONFIG_SUBDIRS (the cachefiles managed by --enable-cache can dnl only be used with AC_CONFIG_SUBDIRS; creating a autoconf-style dnl cachefile before the the end of the autoconf process will often @@ -335,7 +335,7 @@ AC_ARG_ENABLE(base-cache, if test "$enable_base_cache" = "default" ; then if test "$CONF_USE_CACHEFILE" = yes ; then enable_base_cache=yes - else + else enable_base_cache=no fi fi diff --git a/ompi/mca/io/romio314/romio/confdb/aclocal_cc.m4 b/ompi/mca/io/romio314/romio/confdb/aclocal_cc.m4 index 92e7ee109bc..fc801f1709f 100644 --- a/ompi/mca/io/romio314/romio/confdb/aclocal_cc.m4 +++ b/ompi/mca/io/romio314/romio/confdb/aclocal_cc.m4 @@ -27,13 +27,13 @@ dnl dnl If no actions are specified, a working value is added to 'COPTIONS' dnl dnl Notes: -dnl This is now careful to check that the output is different, since +dnl This is now careful to check that the output is different, since dnl some compilers are noisy. -dnl +dnl dnl We are extra careful to prototype the functions in case compiler options dnl that complain about poor code are in effect. dnl -dnl Because this is a long script, we have ensured that you can pass a +dnl Because this is a long script, we have ensured that you can pass a dnl variable containing the option name as the first argument. dnl dnl D*/ @@ -68,11 +68,11 @@ AC_MSG_RESULT([$pac_result]) dnl Delete the conftest created by AC_LANG_CONFTEST. rm -f conftest.$ac_ext -# gcc 4.2.4 on 32-bit does not complain about the -Wno-type-limits option -# even though it doesn't support it. However, when another warning is -# triggered, it gives an error that the option is not recognized. So we +# gcc 4.2.4 on 32-bit does not complain about the -Wno-type-limits option +# even though it doesn't support it. However, when another warning is +# triggered, it gives an error that the option is not recognized. So we # need to test with a conftest file that will generate warnings. -# +# # add an extra switch, pac_c_check_compiler_option_prototest, to # disable this test just in case some new compiler does not like it. # @@ -155,13 +155,13 @@ dnl PAC_C_OPTIMIZATION([action if found]) dnl dnl Output Effect: dnl Adds options to 'COPTIONS' if no other action is specified -dnl +dnl dnl Notes: dnl This is a temporary standin for compiler optimization. dnl It should try to match known systems to known compilers (checking, of dnl course), and then falling back to some common defaults. dnl Note that many compilers will complain about -g and aggressive -dnl optimization. +dnl optimization. dnl D*/ AC_DEFUN([PAC_C_OPTIMIZATION],[ for copt in "-O4 -Ofast" "-Ofast" "-fast" "-O3" "-xO3" "-O" ; do @@ -221,18 +221,18 @@ return 0; } ],pac_cv_prog_c_unaligned_doubles="yes",pac_cv_prog_c_unaligned_doubles="no", pac_cv_prog_c_unaligned_doubles="unknown")]) -ifelse($1,,,if test "X$pac_cv_prog_c_unaligned_doubles" = "yes" ; then +ifelse($1,,,if test "X$pac_cv_prog_c_unaligned_doubles" = "yes" ; then $1 fi) -ifelse($2,,,if test "X$pac_cv_prog_c_unaligned_doubles" = "no" ; then +ifelse($2,,,if test "X$pac_cv_prog_c_unaligned_doubles" = "no" ; then $2 fi) -ifelse($3,,,if test "X$pac_cv_prog_c_unaligned_doubles" = "unknown" ; then +ifelse($3,,,if test "X$pac_cv_prog_c_unaligned_doubles" = "unknown" ; then $3 fi) ]) -dnl/*D +dnl/*D dnl PAC_PROG_C_WEAK_SYMBOLS - Test whether C supports weak alias symbols. dnl dnl Synopsis @@ -254,7 +254,7 @@ dnl sets the shell variable pac_cv_attr_weak to yes. dnl Also checks for __attribute__((weak_import)) which is supported by dnl Apple in Mac OSX (at least in Darwin). Note that this provides only dnl weak symbols, not weak aliases -dnl +dnl dnl D*/ AC_DEFUN([PAC_PROG_C_WEAK_SYMBOLS],[ pragma_extra_message="" @@ -273,7 +273,7 @@ int Foo(int a) { return a; } # only within a single object file! This tests that case. # Note that there is an extern int PFoo declaration before the # pragma. Some compilers require this in order to make the weak symbol -# externally visible. +# externally visible. if test "$has_pragma_weak" = yes ; then PAC_COMPLINK_IFELSE([ AC_LANG_SOURCE([ @@ -315,7 +315,7 @@ return Foo(0);} ]) fi dnl -if test -z "$pac_cv_prog_c_weak_symbols" ; then +if test -z "$pac_cv_prog_c_weak_symbols" ; then AC_TRY_LINK([ extern int PFoo(int); #pragma _HP_SECONDARY_DEF Foo PFoo @@ -404,14 +404,14 @@ correctly set error code when a fatal error occurs]) fi ]) -dnl/*D +dnl/*D dnl PAC_PROG_C_MULTIPLE_WEAK_SYMBOLS - Test whether C and the dnl linker allow multiple weak symbols. dnl dnl Synopsis dnl PAC_PROG_C_MULTIPLE_WEAK_SYMBOLS(action-if-true,action-if-false) dnl -dnl +dnl dnl D*/ AC_DEFUN([PAC_PROG_C_MULTIPLE_WEAK_SYMBOLS],[ AC_CACHE_CHECK([for multiple weak symbol support], @@ -451,10 +451,10 @@ dnl pac_cc_strict_flags contains the strict flags. dnl dnl -std=c89 is used to select the C89 version of the ANSI/ISO C standard. dnl As of this writing, many C compilers still accepted only this version, -dnl not the later C99 version. When all compilers accept C99, this +dnl not the later C99 version. When all compilers accept C99, this dnl should be changed to the appropriate standard level. Note that we've -dnl had trouble with gcc 2.95.3 accepting -std=c89 but then trying to -dnl compile program with a invalid set of options +dnl had trouble with gcc 2.95.3 accepting -std=c89 but then trying to +dnl compile program with a invalid set of options dnl (-D __STRICT_ANSI__-trigraphs) AC_DEFUN([PAC_CC_STRICT],[ export enable_strict_done @@ -469,14 +469,14 @@ if test "$enable_strict_done" != "yes" ; then # warning at 256k. # # These were added to reduce warnings: - # -Wno-missing-field-initializers -- We want to allow a struct to be - # initialized to zero using "struct x y = {0};" and not require + # -Wno-missing-field-initializers -- We want to allow a struct to be + # initialized to zero using "struct x y = {0};" and not require # each field to be initialized individually. # -Wno-unused-parameter -- For portability, some parameters go unused - # when we have different implementations of functions for + # when we have different implementations of functions for # different platforms - # -Wno-unused-label -- We add fn_exit: and fn_fail: on all functions, - # but fn_fail may not be used if the function doesn't return an + # -Wno-unused-label -- We add fn_exit: and fn_fail: on all functions, + # but fn_fail may not be used if the function doesn't return an # error. # -Wno-sign-compare -- read() and write() return bytes read/written # as a signed value, but we often compare this to size_t (or @@ -484,10 +484,10 @@ if test "$enable_strict_done" != "yes" ; then # -Wno-format-zero-length -- this warning is irritating and useless, since # a zero-length format string is very well defined # These were removed to reduce warnings: - # -Wcast-qual -- Sometimes we need to cast "volatile char*" to + # -Wcast-qual -- Sometimes we need to cast "volatile char*" to # "char*", e.g., for memcpy. # -Wpadded -- We catch struct padding with asserts when we need to - # -Wredundant-decls -- Having redundant declarations is benign and the + # -Wredundant-decls -- Having redundant declarations is benign and the # code already has some. # -Waggregate-return -- This seems to be a performance-related warning # aggregate return values are legal in ANSI C, but they may be returned @@ -498,7 +498,7 @@ if test "$enable_strict_done" != "yes" ; then # -Wdeclaration-after-statement -- This is a C89 # requirement. When compiling with C99, this should be # disabled. - # -Wfloat-equal -- There are places in hwloc that set a float var to 0, then + # -Wfloat-equal -- There are places in hwloc that set a float var to 0, then # compare it to 0 later to see if it was updated. Also when using strtod() # one needs to compare the return value with 0 to see whether a conversion # was performed. @@ -519,11 +519,11 @@ if test "$enable_strict_done" != "yes" ; then # However, since Intel compiler currently does not include -Wtype-limits # in -Wextra, -Wtype-limits was added to handle warnings with the Intel # compiler. - # -Wno-type-limits -- There are places where we compare an unsigned to - # a constant that happens to be zero e.g., if x is unsigned and + # -Wno-type-limits -- There are places where we compare an unsigned to + # a constant that happens to be zero e.g., if x is unsigned and # MIN_VAL is zero, we'd like to do "MPIU_Assert(x >= MIN_VAL);". - # Note this option is not supported by gcc 4.2. This needs to be added - # after most other warning flags, so that we catch a gcc bug on 32-bit + # Note this option is not supported by gcc 4.2. This needs to be added + # after most other warning flags, so that we catch a gcc bug on 32-bit # that doesn't give a warning that this is unsupported, unless another # warning is triggered, and then if gives an error. # the embedded newlines in this string are safe because we evaluate each @@ -660,11 +660,11 @@ fi ]) dnl/*D -dnl PAC_ARG_STRICT - Add --enable-strict to configure. +dnl PAC_ARG_STRICT - Add --enable-strict to configure. dnl dnl Synopsis: dnl PAC_ARG_STRICT -dnl +dnl dnl Output effects: dnl Adds '--enable-strict' to the command line. dnl @@ -685,7 +685,7 @@ dnl four dnl eight dnl dnl In addition, a "Could not determine alignment" and a "error!" -dnl return is possible. +dnl return is possible. AC_DEFUN([PAC_C_MAX_INTEGER_ALIGN],[ AC_CACHE_CHECK([for max C struct integer alignment], pac_cv_c_max_integer_align,[ @@ -816,7 +816,7 @@ dnl eight dnl sixteen dnl dnl In addition, a "Could not determine alignment" and a "error!" -dnl return is possible. +dnl return is possible. AC_DEFUN([PAC_C_MAX_FP_ALIGN],[ AC_CACHE_CHECK([for max C struct floating point alignment], pac_cv_c_max_fp_align,[ @@ -930,7 +930,7 @@ dnl four dnl eight dnl dnl In addition, a "Could not determine alignment" and a "error!" -dnl return is possible. +dnl return is possible. AC_DEFUN([PAC_C_MAX_DOUBLE_FP_ALIGN],[ AC_CACHE_CHECK([for max C struct alignment of structs with doubles], pac_cv_c_max_double_fp_align,[ @@ -1232,7 +1232,7 @@ dnl Synopsis: dnl PAC_FUNC_NEEDS_DECL(headerfiles,funcname) dnl dnl Output Effect: -dnl Sets 'NEEDS__DECL' if 'funcname' is not declared by the +dnl Sets 'NEEDS__DECL' if 'funcname' is not declared by the dnl headerfiles. dnl dnl Approach: @@ -1269,8 +1269,8 @@ dnl #endif dnl If *not*, define __attribute__(a) as null dnl dnl We start by requiring Gcc. Some other compilers accept __attribute__ -dnl but generate warning messages, or have different interpretations -dnl (which seems to make __attribute__ just as bad as #pragma) +dnl but generate warning messages, or have different interpretations +dnl (which seems to make __attribute__ just as bad as #pragma) dnl For example, the Intel icc compiler accepts __attribute__ and dnl __attribute__((pure)) but generates warnings for __attribute__((format...)) dnl @@ -1308,7 +1308,7 @@ AC_COMPILE_IFELSE([ ],[ if ${AR-ar} ${AR_FLAGS-cr} libconftest.a conftest.$OBJEXT >/dev/null 2>&1 ; then if ${RANLIB-:} libconftest.a >/dev/null 2>&1 ; then - # Anything less than sleep 10, and Mac OS/X (Darwin) + # Anything less than sleep 10, and Mac OS/X (Darwin) # will claim that install works because ranlib won't complain sleep 10 libinstall="$INSTALL_DATA" @@ -1325,7 +1325,7 @@ int main(int argc, char **argv){ return foo(0); } # Success! Install works ac_cv_prog_install_breaks_libs=no ],[ - # Failure! Does install -p work? + # Failure! Does install -p work? rm -f libconftest1.a if ${libinstall} -p libconftest.a libconftest1.a >/dev/null 2>&1 ; then AC_LINK_IFELSE([],[ @@ -1366,7 +1366,7 @@ AC_SUBST(RANLIB_AFTER_INSTALL) # These tests check not only that the compiler defines some symbol, such # as __FUNCTION__, but that the symbol correctly names the function. # -# Defines +# Defines # HAVE__FUNC__ (if __func__ defined) # HAVE_CAP__FUNC__ (if __FUNC__ defined) # HAVE__FUNCTION__ (if __FUNCTION__ defined) diff --git a/ompi/mca/io/romio314/romio/confdb/aclocal_coverage.m4 b/ompi/mca/io/romio314/romio/confdb/aclocal_coverage.m4 index ce949d40a65..4b7cf0881ad 100644 --- a/ompi/mca/io/romio314/romio/confdb/aclocal_coverage.m4 +++ b/ompi/mca/io/romio314/romio/confdb/aclocal_coverage.m4 @@ -44,7 +44,7 @@ if test "$enable_coverage" = "yes" ; then fi fi # Add similar options for g77 so that the Fortran tests will also - # + # if test "$enable_f77" = yes ; then if test "$ac_cv_f77_compiler_gnu" = "yes" ; then FFLAGS="$FFLAGS -fprofile-arcs -ftest-coverage" @@ -67,14 +67,14 @@ if test "$enable_coverage" = "yes" ; then AC_MSG_WARN([--enable-coverage only supported for GFORTRAN]) fi fi - # On some platforms (e.g., Mac Darwin), we must also *link* + # On some platforms (e.g., Mac Darwin), we must also *link* # with the -fprofile-args -ftest-coverage option. AC_MSG_CHECKING([whether compilation with coverage analysis enabled works]) AC_LINK_IFELSE([AC_LANG_SOURCE([int main(int argc, char **argv){return 1;}])], [AC_MSG_RESULT([yes])], [AC_MSG_RESULT([no]) AC_MSG_ERROR([Unable to link programs when coverage analysis enabled])]) - + # Test for the routines that we need to use to ensure that the # data files are (usually) written out # FIXME: Some versions of Linux provide usleep, but it rounds times diff --git a/ompi/mca/io/romio314/romio/confdb/aclocal_cxx.m4 b/ompi/mca/io/romio314/romio/confdb/aclocal_cxx.m4 index 72febd8ab9d..2edf6235389 100644 --- a/ompi/mca/io/romio314/romio/confdb/aclocal_cxx.m4 +++ b/ompi/mca/io/romio314/romio/confdb/aclocal_cxx.m4 @@ -78,7 +78,7 @@ AC_DEFUN([AX_CXX_NAMESPACE_STD], AC_CACHE_CHECK(whether the compiler implements the namespace std, ac_cv_cxx_namespace_std, [ac_cv_cxx_namespace_std=no -if test "$ac_cv_cxx_namespaces" = yes ; then +if test "$ac_cv_cxx_namespaces" = yes ; then AC_LANG_SAVE AC_LANG_CPLUSPLUS AC_TRY_COMPILE([ @@ -106,13 +106,13 @@ dnl dnl If no actions are specified, a working value is added to 'CXXOPTIONS' dnl dnl Notes: -dnl This is now careful to check that the output is different, since +dnl This is now careful to check that the output is different, since dnl some compilers are noisy. -dnl +dnl dnl We are extra careful to prototype the functions in case compiler options dnl that complain about poor code are in effect. dnl -dnl Because this is a long script, we have ensured that you can pass a +dnl Because this is a long script, we have ensured that you can pass a dnl variable containing the option name as the first argument. dnl D*/ AC_DEFUN([PAC_CXX_CHECK_COMPILER_OPTION],[ diff --git a/ompi/mca/io/romio314/romio/confdb/aclocal_f77.m4 b/ompi/mca/io/romio314/romio/confdb/aclocal_f77.m4 index 92d310b0f21..e7a8feff72e 100644 --- a/ompi/mca/io/romio314/romio/confdb/aclocal_f77.m4 +++ b/ompi/mca/io/romio314/romio/confdb/aclocal_f77.m4 @@ -13,7 +13,7 @@ PAC_POP_FLAG([FFLAGS]) dnl dnl/*D dnl PAC_PROG_F77_NAME_MANGLE - Determine how the Fortran compiler mangles -dnl names +dnl names dnl dnl Synopsis: dnl PAC_PROG_F77_NAME_MANGLE([action]) @@ -31,20 +31,20 @@ dnl mixed -> mixed_ F77_NAME_MIXED_USCORE dnl mixed -> UPPER@STACK_SIZE F77_NAME_UPPER_STDCALL dnl.ve dnl If an action is specified, it is executed instead. -dnl +dnl dnl Notes: dnl We assume that if lower -> lower (any underscore), upper -> upper with the -dnl same underscore behavior. Previous versions did this by -dnl compiling a Fortran program and running strings -a over it. Depending on -dnl strings is a bad idea, so instead we try compiling and linking with a +dnl same underscore behavior. Previous versions did this by +dnl compiling a Fortran program and running strings -a over it. Depending on +dnl strings is a bad idea, so instead we try compiling and linking with a dnl C program, since that is why we are doing this anyway. A similar approach -dnl is used by FFTW, though without some of the cases we check (specifically, +dnl is used by FFTW, though without some of the cases we check (specifically, dnl mixed name mangling). STD_CALL not only specifies a particular name dnl mangling convention (adding the size of the calling stack into the function dnl name, but also the stack management convention (callee cleans the stack, dnl and arguments are pushed onto the stack from right to left) dnl -dnl One additional problem is that some Fortran implementations include +dnl One additional problem is that some Fortran implementations include dnl references to the runtime (like pgf90_compiled for the pgf90 compiler dnl used as the "Fortran 77" compiler). This is not yet solved. dnl @@ -104,7 +104,7 @@ if test "$pac_found" != "yes" ; then AC_LANG_PROGRAM([],[ call my_name(0)]) ],[ pac_found=yes - ]) + ]) AC_LANG_POP([Fortran 77]) LIBS="$saved_LIBS" rm -f cconftest.$OBJEXT @@ -163,7 +163,7 @@ name_scheme="`echo $pac_cv_prog_f77_name_mangle | sed 's% %_%g'`" # Turn lowercase into uppercase. name_scheme="`echo $name_scheme | sed -e 'y%abcdefghijklmnopqrstuvwxyz%ABCDEFGHIJKLMNOPQRSTUVWXYZ%'`" F77_NAME_MANGLE="F77_NAME_${name_scheme}" -AC_DEFINE_UNQUOTED([$F77_NAME_MANGLE]) +AC_DEFINE_UNQUOTED([$F77_NAME_MANGLE]) AC_SUBST(F77_NAME_MANGLE) if test "X$pac_cv_prog_f77_name_mangle" = "X" ; then AC_MSG_WARN([Unknown Fortran naming scheme]) @@ -208,11 +208,11 @@ dnl Sets SIZEOF_F77_uctype to the size if bytes of type. dnl If type is unknown, the size is set to 0. dnl If cross-compiling, the value cross-size is used (it may be a variable) dnl For example 'PAC_PROG_F77_CHECK_SIZEOF(real)' defines -dnl 'SIZEOF_F77_REAL' to 4 on most systems. The variable +dnl 'SIZEOF_F77_REAL' to 4 on most systems. The variable dnl 'pac_cv_sizeof_f77_' (e.g., 'pac_cv_sizeof_f77_real') is also set to -dnl the size of the type. +dnl the size of the type. dnl If the corresponding variable is already set, that value is used. -dnl If the name has an '*' in it (e.g., 'integer*4'), the defined name +dnl If the name has an '*' in it (e.g., 'integer*4'), the defined name dnl replaces that with an underscore (e.g., 'SIZEOF_F77_INTEGER_4'). dnl dnl Notes: @@ -263,7 +263,7 @@ static int isize_val=0; void cisize_(char *,char*); void isize_(void); void cisize_(char *i1p, char *i2p) -{ +{ isize_val = (int)(i2p - i1p); } ],[ @@ -398,7 +398,7 @@ dnl Notes: dnl Check whether '!' may be used to begin comments in Fortran. dnl dnl This macro requires a version of autoconf `after` 2.13; the 'acgeneral.m4' -dnl file contains an error in the handling of Fortran programs in +dnl file contains an error in the handling of Fortran programs in dnl 'AC_TRY_COMPILE' (fixed in our local version). dnl dnl D*/ @@ -434,13 +434,13 @@ dnl dnl If no actions are specified, a working value is added to 'FOPTIONS' dnl dnl Notes: -dnl This is now careful to check that the output is different, since +dnl This is now careful to check that the output is different, since dnl some compilers are noisy. -dnl +dnl dnl We are extra careful to prototype the functions in case compiler options dnl that complain about poor code are in effect. dnl -dnl Because this is a long script, we have ensured that you can pass a +dnl Because this is a long script, we have ensured that you can pass a dnl variable containing the option name as the first argument. dnl D*/ AC_DEFUN([PAC_F77_CHECK_COMPILER_OPTION],[ @@ -519,10 +519,10 @@ dnl PAC_PROG_F77_LIBRARY_DIR_FLAG - Determine the flag used to indicate dnl the directories to find libraries in dnl dnl Notes: -dnl Many compilers accept '-Ldir' just like most C compilers. -dnl Unfortunately, some (such as some HPUX Fortran compilers) do not, +dnl Many compilers accept '-Ldir' just like most C compilers. +dnl Unfortunately, some (such as some HPUX Fortran compilers) do not, dnl and require instead either '-Wl,-L,dir' or something else. This -dnl command attempts to determine what is accepted. The flag is +dnl command attempts to determine what is accepted. The flag is dnl placed into 'F77_LIBDIR_LEADER'. dnl dnl D*/ @@ -574,19 +574,19 @@ if test "X$pac_cv_prog_f77_library_dir_flag" != "Xnone" ; then fi ]) dnl -dnl/*D +dnl/*D dnl PAC_PROG_F77_HAS_INCDIR - Check whether Fortran accepts -Idir flag dnl dnl Syntax: dnl PAC_PROG_F77_HAS_INCDIR(directory,action-if-true,action-if-false) dnl dnl Output Effect: -dnl Sets 'F77_INCDIR' to the flag used to choose the directory. +dnl Sets 'F77_INCDIR' to the flag used to choose the directory. dnl dnl Notes: dnl This refers to the handling of the common Fortran include extension, dnl not to the use of '#include' with the C preprocessor. -dnl If directory does not exist, it will be created. In that case, the +dnl If directory does not exist, it will be created. In that case, the dnl directory should be a direct descendant of the current directory. dnl dnl D*/ @@ -628,7 +628,7 @@ fi dnl dnl/*D dnl PAC_PROG_F77_ALLOWS_UNUSED_EXTERNALS - Check whether the Fortran compiler -dnl allows unused and undefined functions to be listed in an external +dnl allows unused and undefined functions to be listed in an external dnl statement dnl dnl Syntax: @@ -659,12 +659,12 @@ else ifelse([$2],[],[:],[$2]) fi ]) -dnl PAC_PROG_F77_RUN_PROC_FROM_C( c main program, fortran routine, -dnl [action-if-works], [action-if-fails], +dnl PAC_PROG_F77_RUN_PROC_FROM_C( c main program, fortran routine, +dnl [action-if-works], [action-if-fails], dnl [cross-action] ) dnl Fortran routine MUST be named ftest unless you include code dnl to select the appropriate Fortran name. -dnl +dnl AC_DEFUN([PAC_PROG_F77_RUN_PROC_FROM_C],[ AC_REQUIRE([AC_HEADER_STDC]) AC_REQUIRE([AC_F77_LIBRARY_LDFLAGS]) @@ -706,18 +706,18 @@ AC_LANG_POP([Fortran 77]) ]) dnl PAC_PROG_F77_IN_C_LIBS dnl -dnl Find the essential libraries that are needed to use the C linker to -dnl create a program that includes a trival Fortran code. +dnl Find the essential libraries that are needed to use the C linker to +dnl create a program that includes a trival Fortran code. dnl dnl For example, all pgf90 compiled objects include a reference to the dnl symbol pgf90_compiled, found in libpgf90 . dnl -dnl There is an additional problem. To *run* programs, we may need +dnl There is an additional problem. To *run* programs, we may need dnl additional arguments; e.g., if shared libraries are used. Even dnl with autoconf 2.52, the autoconf macro to find the library arguments dnl doesn't handle this, either by detecting the use of -rpath or dnl by trying to *run* a trivial program. It only checks for *linking*. -dnl +dnl dnl AC_DEFUN([PAC_PROG_F77_IN_C_LIBS],[ AC_REQUIRE([AC_HEADER_STDC]) @@ -788,7 +788,7 @@ fi ]) dnl dnl Test to see if we should use C or Fortran to link programs whose -dnl main program is in Fortran. We may find that neither work because +dnl main program is in Fortran. We may find that neither work because dnl we need special libraries in each case. dnl AC_DEFUN([PAC_PROG_F77_LINKER_WITH_C],[ @@ -905,7 +905,7 @@ AC_LINK_IFELSE([],[ pac_libs="" pac_other="" for name in $FLIBS ; do - case $name in + case $name in -l*) pac_libs="$pac_libs $name" ;; -L*) pac_ldirs="$pac_ldirs $name" ;; *) pac_other="$pac_other $name" ;; @@ -927,7 +927,7 @@ AC_LANG_PUSH([C]) ]) dnl dnl Test for extra libraries needed when linking C routines that use -dnl stdio with Fortran. This test was created for OSX, which +dnl stdio with Fortran. This test was created for OSX, which dnl sometimes requires -lSystemStubs. If another library is needed, dnl add it to F77_OTHER_LIBS dnl @@ -991,7 +991,7 @@ dnl Endof ac_cache_check if test "$pac_cv_prog_f77_and_c_stdio_libs" != "none" \ -a "$pac_cv_prog_f77_and_c_stdio_libs" != "unknown" ; then F77_OTHER_LIBS="$F77_OTHER_LIBS $pac_cv_prog_f77_and_c_stdio_libs" -fi +fi ]) dnl dnl Check that the FLIBS determined by AC_F77_LIBRARY_LDFLAGS is valid. @@ -1205,7 +1205,7 @@ AC_LANG_POP([Fortran 77]) AC_MSG_RESULT([$pac_cv_f77_accepts_F]) ]) dnl -dnl /*D +dnl /*D dnl PAC_PROG_F77_CRAY_POINTER - Check if Fortran 77 supports Cray-style pointer. dnl If so, set pac_cv_prog_f77_has_pointer to yes dnl and find out if any extra compiler flag is @@ -1325,16 +1325,16 @@ dnl PAC_F77_LOGICALS_IN_C(MPI_FINT) dnl dnl where MPI_FINT is the C type for Fortran integer. dnl -dnl Use a Fortran main program. This simplifies some steps, -dnl since getting all of the Fortran libraries (including shared -dnl libraries that are not in the default library search path) can -dnl be tricky. Specifically, The PROG_F77_RUN_PROC_FROM_C failed with +dnl Use a Fortran main program. This simplifies some steps, +dnl since getting all of the Fortran libraries (including shared +dnl libraries that are not in the default library search path) can +dnl be tricky. Specifically, The PROG_F77_RUN_PROC_FROM_C failed with dnl some installations of the Portland group compiler. dnl dnl We'd also like to check other values for .TRUE. and .FALSE. to see dnl if the compiler allows (or uses) more than one value (some DEC compilers, -dnl for example, used the high (sign) bit to indicate true and false; the -dnl rest of the bits were ignored. For now, we'll assume that there are +dnl for example, used the high (sign) bit to indicate true and false; the +dnl rest of the bits were ignored. For now, we'll assume that there are dnl unique true and false values. dnl AC_DEFUN([PAC_F77_LOGICALS_IN_C],[ @@ -1416,14 +1416,14 @@ if test -n "$true_val" -a -n "$false_val" ; then fi ]) dnl/*D -dnl PAC_PROG_F77_MISMATCHED_ARGS([option],[AllOnly]) - Determine whether the -dnl Fortran compiler allows routines to be called with different -dnl argument types. If not, attempts to determine a command-line argument -dnl that permits such use +dnl PAC_PROG_F77_MISMATCHED_ARGS([option],[AllOnly]) - Determine whether the +dnl Fortran compiler allows routines to be called with different +dnl argument types. If not, attempts to determine a command-line argument +dnl that permits such use dnl (The Fortran standard prohibits this usage) dnl dnl option is set to the compiler option to use. -dnl if AllOnly is yes (literal, not variable with value), then only consider +dnl if AllOnly is yes (literal, not variable with value), then only consider dnl options that turn off checking dnl for all routines dnl @@ -1470,7 +1470,7 @@ if test "X$pac_cv_prog_f77_mismatched_args" = X ; then FFLAGS="$save_FFLAGS" if test "$testok" = yes ; then break ; fi done - if test "$testok" = yes ; then + if test "$testok" = yes ; then pac_cv_prog_f77_mismatched_args_parm="$flags" pac_cv_prog_f77_mismatched_args="yes, with $pac_cv_prog_f77_mismatched_args_parm" fi @@ -1479,7 +1479,7 @@ if test "X$pac_cv_prog_f77_mismatched_args" = X ; then fi AC_MSG_RESULT($pac_cv_prog_f77_mismatched_args) if test "$pac_cv_prog_f77_mismatched_args" = no ; then - AC_MSG_ERROR([The Fortran compiler $F77 will not compile files that call + AC_MSG_ERROR([The Fortran compiler $F77 will not compile files that call the same routine with arguments of different types.]) fi diff --git a/ompi/mca/io/romio314/romio/confdb/aclocal_f77old.m4 b/ompi/mca/io/romio314/romio/confdb/aclocal_f77old.m4 index 5d6e2d9e70a..71d1ae4023e 100644 --- a/ompi/mca/io/romio314/romio/confdb/aclocal_f77old.m4 +++ b/ompi/mca/io/romio314/romio/confdb/aclocal_f77old.m4 @@ -15,7 +15,7 @@ dnl.ve dnl If 'F77_GETARG' has a value, then that value and the values for these dnl other symbols will be used instead. If no approach is found, all of these dnl variables will have empty values. -dnl If no other approach works and a file 'f77argdef' is in the directory, +dnl If no other approach works and a file 'f77argdef' is in the directory, dnl that file will be sourced for the values of the above four variables. dnl dnl In most cases, you should add F77_GETARG_FFLAGS to the FFLAGS variable @@ -24,13 +24,13 @@ dnl performed on the compiler version that will be used. dnl dnl 'AC_SUBST' is called for all six variables. dnl -dnl One complication is that on systems with multiple Fortran compilers, +dnl One complication is that on systems with multiple Fortran compilers, dnl some libraries used by one Fortran compiler may have been (mis)placed dnl in a common location. We have had trouble with libg2c in particular. dnl To work around this, we test whether iargc etc. work first. This dnl will catch most systems and will speed up the tests. dnl -dnl Next, the libraries are only added if they are needed to complete a +dnl Next, the libraries are only added if they are needed to complete a dnl link; they aren''t added just because they exist. dnl dnl f77argdef @@ -55,11 +55,11 @@ AC_CACHE_VAL(pac_cv_prog_f77_cmdarg, f77_getargdecl="${F77_GETARGDECL:-external getarg}" f77_getarg="${F77_GETARG:-call GETARG(i,s)}" f77_iargc="${F77_IARGC:-IARGC()}" - # + # # Grumble. The Absoft Fortran compiler computes i - i as 0 and then # 1.0 / 0 at compile time, even though the code may never be executed. # What we need is a way to generate an error, so the second usage of i - # was replaced with f77_iargc. + # was replaced with f77_iargc. cat > conftest.f <, but this caused other + # (we experimented with using a , but this caused other # problems because we need in the IFS) trial_LIBS="0 -lU77 -lPEPCF90" if test "$NOG2C" != "1" ; then @@ -149,7 +149,7 @@ $libs" # The -N109 was used for getarg before we realized that GETARG # was necessary with the (non standard conforming) Absoft compiler # (Fortran is monocase; Absoft uses mixedcase by default) - # The -f is used by Absoft and is the compiler switch that folds + # The -f is used by Absoft and is the compiler switch that folds # symbolic names to lower case. Without this option, the compiler # considers upper- and lower-case letters to be unique. # The -YEXT_NAMES=LCS will cause external names to be output as lower @@ -157,14 +157,14 @@ $libs" # The first line is ", the space is important # To make the Absoft f77 and f90 work together, we need to prefer the # upper case versions of the arguments. They also require libU77. - # -YCFRL=1 causes Absoft f90 to work with g77 and similar (f2c-based) + # -YCFRL=1 causes Absoft f90 to work with g77 and similar (f2c-based) # Fortran compilers # # Problem: The Intel efc compiler hangs when presented with -N109 . # The only real fix for this is to detect this compiler and exclude # the test. We may want to reorganize these tests so that if we # can compile code without special options, we never look for them. - # + # using_intel_efc="no" pac_test_msg=`$F77 -V 2>&1 | grep 'Intel(R) Fortran Itanium'` if test "$pac_test_msg" != "" ; then @@ -194,10 +194,10 @@ $libs" PAC_F77_CHECK_COMPILER_OPTION($flag,opt_ok=yes,opt_ok=no) fi if test "$opt_ok" = "yes" ; then - if test "$flag" = " " -o "$flag" = "000" ; then - fflag="" - else - fflag="$flag" + if test "$flag" = " " -o "$flag" = "000" ; then + fflag="" + else + fflag="$flag" fi # discard options that don't allow mixed-case name matching cat > conftest.f </dev/null 2>&1 ; then found_answer="yes" fi - else + else found_answer="yes" fi fi @@ -358,9 +358,9 @@ EOF fi done done - IFS="$save_IFS" + IFS="$save_IFS" rm -f conftest.* - trial=`expr $trial + 1` + trial=`expr $trial + 1` done fi pac_cv_F77_GETARGDECL="$F77_GETARGDECL" @@ -368,7 +368,7 @@ pac_cv_F77_IARGC="$F77_IARGC" pac_cv_F77_GETARG="$F77_GETARG" pac_cv_FXX_MODULE="$FXX_MODULE" ]) -if test "$found_cached" = "yes" ; then +if test "$found_cached" = "yes" ; then AC_MSG_RESULT([$pac_cv_prog_f77_cmdarg]) elif test -z "$pac_cv_F77_IARGC" ; then AC_MSG_WARN([Could not find a way to access the command line from Fortran 77]) diff --git a/ompi/mca/io/romio314/romio/confdb/aclocal_fc.m4 b/ompi/mca/io/romio314/romio/confdb/aclocal_fc.m4 index 870a5bea101..609ccb33266 100644 --- a/ompi/mca/io/romio314/romio/confdb/aclocal_fc.m4 +++ b/ompi/mca/io/romio314/romio/confdb/aclocal_fc.m4 @@ -33,7 +33,7 @@ dnl ifc - An older Intel compiler dnl fc - A compiler on some unknown system. This has been removed because dnl it may also be the name of a command for something other than dnl the Fortran compiler (e.g., fc=file system check!) -dnl gfortran - The GNU Fortran compiler (not the same as g95) +dnl gfortran - The GNU Fortran compiler (not the same as g95) dnl gfc - An alias for gfortran recommended in cygwin installations dnl NOTE: this macro suffers from a basically intractable "expanded before it dnl was required" problem when libtool is also used @@ -58,7 +58,7 @@ AC_COMPILE_IFELSE([ ],[ AC_MSG_RESULT([f90]) ],[ - ac_fc_srcext="f" + ac_fc_srcext="f" AC_COMPILE_IFELSE([ AC_LANG_PROGRAM() ],[ @@ -101,12 +101,12 @@ AC_LINK_IFELSE([ rm -f work.pc work.pcl AC_LANG_POP(Fortran) dnl cross_compiling no longer maintained by autoconf as part of the -dnl AC_LANG changes. If we set it here, a later AC_LANG may not +dnl AC_LANG changes. If we set it here, a later AC_LANG may not dnl restore it (in the case where one compiler claims to be a cross compiler dnl and another does not) dnl cross_compiling=$pac_cv_prog_f90_cross ]) -dnl/*D +dnl/*D dnl PAC_PROG_FC_INT_KIND - Determine kind parameter for an integer with dnl the specified number of bytes. dnl @@ -124,7 +124,7 @@ else AC_LANG_PUSH(Fortran) AC_MSG_CHECKING([for Fortran 90 integer kind for $2-byte integers]) # Convert bytes to digits - case $2 in + case $2 in 1) sellen=2 ;; 2) sellen=4 ;; 4) sellen=8 ;; @@ -134,7 +134,7 @@ else esac # Check for cached value eval testval=\$"pac_cv_prog_fc_int_kind_$sellen" - if test -n "$testval" ; then + if test -n "$testval" ; then AC_MSG_RESULT([$testval (cached)]) $1=$testval else @@ -170,7 +170,7 @@ dnl ------------------------------------------------------------------------ dnl Special characteristics that have no autoconf counterpart but that dnl we need as part of the Fortran 90 support. To distinquish these, they dnl have a [PAC] prefix. -dnl +dnl dnl At least one version of the Cray compiler needs the option -em to dnl generate a separate module file, rather than including the module dnl information in the object (.o) file. @@ -226,7 +226,7 @@ AC_COMPILE_IFELSE([ pac_MOD="" fi fi - if test -z "$pac_MOD" ; then + if test -z "$pac_MOD" ; then pac_cv_fc_module_ext="unknown" else pac_cv_fc_module_ext=$pac_MOD @@ -304,7 +304,7 @@ AC_COMPILE_IFELSE([],[ mv $pac_module conftestdir # Remove any temporary files, and hide the work.pc file # (if the compiler generates them) - if test -f work.pc ; then + if test -f work.pc ; then mv -f work.pc conftest.pc fi rm -f work.pcl @@ -343,7 +343,7 @@ if test "X$pac_cv_fc_module_incflag" = "X" ; then AC_MSG_RESULT([-cl,filename where filename contains a list of files and directories]) FC_WORK_FILES_ARG="-cl,mpimod.pcl" FCMODINCSPEC="-cl,/mod.pcl" - else + else # The version of the Intel compiler that I have refuses to let # you put the "work catalog" list anywhere but the current directory. pac_cv_fc_module_incflag="Unavailable!" @@ -351,12 +351,12 @@ if test "X$pac_cv_fc_module_incflag" = "X" ; then else # Early versions of the Intel ifc compiler required a *file* # containing the names of files that contained the names of the - # + # # -cl,filename.pcl # filename.pcl contains # fullpathname.pc - # The "fullpathname.pc" is generated, I believe, when a module is - # compiled. + # The "fullpathname.pc" is generated, I believe, when a module is + # compiled. # Intel compilers use a wierd system: -cl,filename.pcl . If no file is # specified, work.pcl and work.pc are created. However, if you specify # a file, it must contain the name of a file ending in .pc . Ugh! @@ -415,7 +415,7 @@ AC_COMPILE_IFELSE([],[ rm -f "$pac_module" # Remove any temporary files, and hide the work.pc file # (if the compiler generates them) - if test -f work.pc ; then + if test -f work.pc ; then mv -f work.pc conftest.pc fi rm -f work.pcl @@ -478,7 +478,7 @@ AC_SUBST([FCMODOUTFLAG],[$pac_cv_fc_module_outflag]) dnl dnl PAC_FC_AND_F77_COMPATIBLE([action-if-true],[action-if-false]) dnl -dnl Determine whether object files compiled with Fortran 77 can be +dnl Determine whether object files compiled with Fortran 77 can be dnl linked to Fortran 90 main programs. dnl dnl The test uses a name that includes an underscore unless the 3rd @@ -543,7 +543,7 @@ dnl ]) dnl dnl -dnl /*D +dnl /*D dnl PAC_PROG_FC_CRAY_POINTER - Check if Fortran supports Cray-style pointer. dnl If so, set pac_cv_prog_fc_has_pointer to yes dnl and find out if any extra compiler flag is @@ -650,7 +650,7 @@ AC_LANG_POP(C) dnl if test "$pac_cv_prog_fc_and_c_stdio_libs" != none -a \ "$pac_cv_prog_fc_and_c_stdio_libs" != unknown ; then - FC_OTHER_LIBS="$FC_OTHER_LIBS $pac_cv_prog_fc_and_c_stdio_libs" + FC_OTHER_LIBS="$FC_OTHER_LIBS $pac_cv_prog_fc_and_c_stdio_libs" fi ]) dnl @@ -668,13 +668,13 @@ dnl dnl If no actions are specified, a working value is added to 'FCOPTIONS' dnl dnl Notes: -dnl This is now careful to check that the output is different, since +dnl This is now careful to check that the output is different, since dnl some compilers are noisy. -dnl +dnl dnl We are extra careful to prototype the functions in case compiler options dnl that complain about poor code are in effect. dnl -dnl Because this is a long script, we have ensured that you can pass a +dnl Because this is a long script, we have ensured that you can pass a dnl variable containing the option name as the first argument. dnl D*/ AC_DEFUN([PAC_FC_CHECK_COMPILER_OPTION],[ @@ -817,14 +817,14 @@ for arg in --version -V -v ; do rm -f conftest.txt PAC_RUNLOG([$FC $arg conftest.txt 2>&1]) # Ignore the return code, because some compilers set the - # return code to zero on invalid arguments and some to + # return code to zero on invalid arguments and some to # non-zero on success (with no files to compile) if test -f conftest.txt ; then if grep 'Portland Group' conftest.txt >/dev/null 2>&1 ; then pac_cv_fc_vendor=pgi elif grep 'Sun Workshop' conftest.txt >/dev/null 2>&1 ; then pac_cv_fc_vendor=sun - elif grep 'Sun Fortran 9' conftest.txt >/dev/null 2>&1 ; then + elif grep 'Sun Fortran 9' conftest.txt >/dev/null 2>&1 ; then pac_cv_fc_vendor=sun elif grep 'Absoft' conftest.txt >/dev/null 2>&1 ; then pac_cv_fc_vendor=absoft diff --git a/ompi/mca/io/romio314/romio/confdb/aclocal_make.m4 b/ompi/mca/io/romio314/romio/confdb/aclocal_make.m4 index 147e92e9c2e..2c900e810b6 100644 --- a/ompi/mca/io/romio314/romio/confdb/aclocal_make.m4 +++ b/ompi/mca/io/romio314/romio/confdb/aclocal_make.m4 @@ -72,7 +72,7 @@ fi ])dnl dnl/*D -dnl PAC_PROG_MAKE_ALLOWS_COMMENTS - Check whether comments are allowed in +dnl PAC_PROG_MAKE_ALLOWS_COMMENTS - Check whether comments are allowed in dnl shell commands in a makefile dnl dnl Synopsis: @@ -105,7 +105,7 @@ ALL: pac_str=`$MAKE -f conftest 2>&1` # This is needed for Mac OSX 10.5 rm -rf conftest.dSYM -rm -f conftest +rm -f conftest if test "$pac_str" != "success" ; then pac_cv_prog_make_allows_comments="no" else @@ -135,7 +135,7 @@ dnl or dnl.vb dnl .PATH: . ${srcdir} dnl.ve -dnl +dnl dnl Notes: dnl The test checks that the path works with implicit targets (some makes dnl support only explicit targets with 'VPATH' or 'PATH'). @@ -166,7 +166,7 @@ VPATH=.:conftestdir @echo \$< EOF ac_out=`$MAKE -f conftest 2>&1 | grep 'conftestdir/a.c'` -if test -n "$ac_out" ; then +if test -n "$ac_out" ; then pac_cv_prog_make_vpath="VPATH" else rm -f conftest @@ -177,7 +177,7 @@ all: a.o @echo \$< EOF ac_out=`$MAKE -f conftest 2>&1 | grep 'conftestdir/a.c'` - if test -n "$ac_out" ; then + if test -n "$ac_out" ; then pac_cv_prog_make_vpath=".PATH" else pac_cv_prog_make_vpath="neither VPATH nor .PATH works" @@ -226,7 +226,7 @@ EOF pac_str=`$MAKE -f conftest 2>&1` # This is needed for Mac OSX 10.5 rm -rf conftest.dSYM -rm -f conftest +rm -f conftest if test "$pac_str" = "XX" ; then pac_cv_prog_make_set_cflags="no" else @@ -241,7 +241,7 @@ fi ])dnl dnl/*D -dnl PAC_PROG_MAKE_CLOCK_SKEW - Check whether there is a problem with +dnl PAC_PROG_MAKE_CLOCK_SKEW - Check whether there is a problem with dnl clock skew in suing make. dnl dnl Effect: @@ -277,7 +277,7 @@ fi ]) dnl/*D -dnl PAC_PROG_MAKE - Checks for the varieties of MAKE, including support for +dnl PAC_PROG_MAKE - Checks for the varieties of MAKE, including support for dnl VPATH dnl dnl Synopsis: @@ -292,10 +292,10 @@ dnl This macro uses 'PAC_PROG_MAKE_INCLUDE', dnl 'PAC_PROG_MAKE_ALLOWS_COMMENTS', 'PAC_PROG_MAKE_VPATH', and dnl 'PAC_PROG_MAKE_SET_CFLAGS'. See those commands for details about their dnl actions. -dnl +dnl dnl It may call 'AC_PROG_MAKE_SET', which sets 'SET_MAKE' to 'MAKE = @MAKE@' dnl if the make program does not set the value of make, otherwise 'SET_MAKE' -dnl is set to empty; if the make program echos the directory name, then +dnl is set to empty; if the make program echos the directory name, then dnl 'SET_MAKE' is set to 'MAKE = $MAKE'. dnl D*/ AC_DEFUN([PAC_PROG_MAKE],[ diff --git a/ompi/mca/io/romio314/romio/confdb/aclocal_mpi.m4 b/ompi/mca/io/romio314/romio/confdb/aclocal_mpi.m4 index f0013a9bd18..c90fb49065c 100644 --- a/ompi/mca/io/romio314/romio/confdb/aclocal_mpi.m4 +++ b/ompi/mca/io/romio314/romio/confdb/aclocal_mpi.m4 @@ -1,5 +1,5 @@ dnl -dnl/*D +dnl/*D dnl PAC_LIB_MPI - Check for MPI library dnl dnl Synopsis: @@ -9,7 +9,7 @@ dnl Output Effect: dnl dnl Notes: dnl Currently, only checks for lib mpi and mpi.h. Later, we will add -dnl MPI_Pcontrol prototype (const int or not?). +dnl MPI_Pcontrol prototype (const int or not?). dnl dnl Prerequisites: dnl autoconf version 2.13 (for AC_SEARCH_LIBS) @@ -21,7 +21,7 @@ dnl MPI-2 Spawn? dnl MPI-2 RMA? dnl PAC_LIB_MPI([found text],[not found text]) AC_DEFUN([PAC_LIB_MPI],[ -dnl Set the prereq to 2.50 to avoid having +dnl Set the prereq to 2.50 to avoid having AC_PREREQ(2.50) if test "X$pac_lib_mpi_is_building" != "Xyes" ; then # Use CC if TESTCC is defined @@ -55,7 +55,7 @@ ifelse($1,,,[$1]) dnl This should also set MPIRUN. dnl dnl/*D -dnl PAC_ARG_MPI_TYPES - Add command-line switches for different MPI +dnl PAC_ARG_MPI_TYPES - Add command-line switches for different MPI dnl environments dnl dnl Synopsis: @@ -73,11 +73,11 @@ dnl is given, that type is used as if '--with-' was given. dnl dnl Sets 'CC', 'F77', 'TESTCC', 'TESTF77', and 'MPILIBNAME'. Does `not` dnl perform an AC_SUBST for these values. -dnl Also sets 'MPIBOOT' and 'MPIUNBOOT'. These are used to specify +dnl Also sets 'MPIBOOT' and 'MPIUNBOOT'. These are used to specify dnl programs that may need to be run before and after running MPI programs. dnl For example, 'MPIBOOT' may start demons necessary to run MPI programs and dnl 'MPIUNBOOT' will stop those demons. -dnl +dnl dnl The two forms of the compilers are to allow for tests of the compiler dnl when the MPI version of the compiler creates executables that cannot dnl be run on the local system (for example, the IBM SP, where executables @@ -87,21 +87,21 @@ dnl the size of data types). dnl dnl Historical note: dnl Some common autoconf tests, such as AC_CHECK_SIZEOF, used to require -dnl running a program. But some MPI compilers (often really compilation +dnl running a program. But some MPI compilers (often really compilation dnl scripts) produced programs that could only be run with special commands, dnl such as a batch submission system. To allow these test programs to be -dnl run, a separate set of compiler variables, TESTCC, TESTF77, etc., +dnl run, a separate set of compiler variables, TESTCC, TESTF77, etc., dnl were defined. However, in later versions of autoconf, it both became -dnl unnecessary to run programs for tests such as AC_CHECK_SIZEOF and +dnl unnecessary to run programs for tests such as AC_CHECK_SIZEOF and dnl it became necessary to define CC etc. before invoking AC_PROG_CC (and dnl the othe language compilers), because those commands now do much, much dnl more than just determining the compiler. dnl dnl To address the change, we still define the TESTCC etc. compilers where dnl possible to allow the use of AC_TRY_RUN when required, but we define -dnl the CC etc variables and do not define ac_cv_prog_CC etc., as these -dnl cause autoconf to skip all of the other initialization code that -dnl AC_PROG_CC etc. runs. Note also that this command must occur before +dnl the CC etc variables and do not define ac_cv_prog_CC etc., as these +dnl cause autoconf to skip all of the other initialization code that +dnl AC_PROG_CC etc. runs. Note also that this command must occur before dnl AC_PROG_CC (or anything that might cause AC_PROG_CC to be invoked). dnl dnl See also: @@ -160,7 +160,7 @@ dnl dnl Because autoconf insists on moving code to the beginning of dnl certain definitions, it is *not possible* to define a single command dnl that selects compilation scripts and also check for other options. -dnl Thus, this needs to be divided into +dnl Thus, this needs to be divided into dnl MPI_FIND_COMPILER_SCRIPTS dnl which can fail (i.e., not find a script), and dnl MPI_FIND_COMPILERS @@ -183,7 +183,7 @@ AC_ARG_VAR([MPIF77],[Name and absolute path of program used to compile MPI progr AC_ARG_VAR([MPICXX],[Name and absolute path of program used to compile MPI programs in C++]) AC_ARG_VAR([MPIF90],[Name and absolute path of program used to compile MPI programs in F90]) # -# Check for things that will cause trouble. For example, +# Check for things that will cause trouble. For example, # if MPICC is defined but does not contain a / or \, then PATH_PROG will # ignore the value if test -n "$MPICC" ; then @@ -192,7 +192,7 @@ changequote(<<,>>) [\\/]* | ?:[\\/]*) changequote([,]) # Ok, PATH_PROG will figure it out - ;; + ;; *) AC_MSG_ERROR([MPICC must be set to an absolute path if it is set]) esac @@ -203,7 +203,7 @@ changequote(<<,>>) [\\/]* | ?:[\\/]*) changequote([,]) # Ok, PATH_PROG will figure it out - ;; + ;; *) AC_MSG_ERROR([MPICXX must be set to an absolute path if it is set]) esac @@ -214,7 +214,7 @@ changequote(<<,>>) [\\/]* | ?:[\\/]*) changequote([,]) # Ok, PATH_PROG will figure it out - ;; + ;; *) AC_MSG_ERROR([MPIF77 must be set to an absolute path if it is set]) esac @@ -225,7 +225,7 @@ changequote(<<,>>) [\\/]* | ?:[\\/]*) changequote([,]) # Ok, PATH_PROG will figure it out - ;; + ;; *) AC_MSG_ERROR([MPIF90 must be set to an absolute path if it is set]) esac @@ -233,12 +233,12 @@ fi case $ac_mpi_type in mpich) - dnl + dnl dnl This isn't correct. It should try to get the underlying compiler dnl from the mpicc and mpif77 scripts or mpireconfig if test "X$pac_lib_mpi_is_building" != "Xyes" ; then PAC_PUSH_FLAG([PATH]) - if test "$with_mpich" != "yes" -a "$with_mpich" != "no" ; then + if test "$with_mpich" != "yes" -a "$with_mpich" != "no" ; then # Look for commands; if not found, try adding bin to the # path if test ! -x $with_mpich/mpicc -a -x $with_mpich/bin/mpicc ; then @@ -249,21 +249,21 @@ case $ac_mpi_type in AC_PATH_PROG(MPICC,mpicc) if test -z "$TESTCC" ; then TESTCC=${CC-cc} ; fi CC="$MPICC" - # Note that autoconf may unconditionally change the value of + # Note that autoconf may unconditionally change the value of # CC (!) in some other command. Thus, we define CCMASTER CCMASTER=$CC # Force autoconf to respect this choice ac_ct_CC=$CC # to permit configure codes to recover the correct CC. This - # is an ugly not-quite-correct workaround for the fact that + # is an ugly not-quite-correct workaround for the fact that # does not want you to change the C compiler once you have set it - # (But since it does so unconditionally, it silently creates + # (But since it does so unconditionally, it silently creates # bogus output files.) AC_PATH_PROG(MPIF77,mpif77) if test -z "$TESTF77" ; then TESTF77=${F77-f77} ; fi F77="$MPIF77" AC_PATH_PROG(MPIFC,mpif90) - if test -z "$TESTFC" ; then TESTFC=${FC-f90} ; fi + if test -z "$TESTFC" ; then TESTFC=${FC-f90} ; fi FC="$MPIFC" AC_PATH_PROG(MPICXX,mpiCC) if test -z "$TESTCXX" ; then TESTCXX=${CXX-CC} ; fi @@ -275,7 +275,7 @@ case $ac_mpi_type in AC_PATH_PROG(MPIUNBOOT,mpichstop) PAC_POP_FLAG([PATH]) MPILIBNAME="mpich" - else + else # All of the above should have been passed in the environment! : fi @@ -289,7 +289,7 @@ case $ac_mpi_type in dnl This isn't correct. It should try to get the underlying compiler dnl from the mpicc and mpif77 scripts or mpireconfig PAC_PUSH_FLAG([PATH]) - if test "$with_mpich" != "yes" -a "$with_mpich" != "no" ; then + if test "$with_mpich" != "yes" -a "$with_mpich" != "no" ; then # Look for commands; if not found, try adding bin to the path if test ! -x $with_lammpi/mpicc -a -x $with_lammpi/bin/mpicc ; then with_lammpi="$with_lammpi/bin" @@ -330,7 +330,7 @@ case $ac_mpi_type in if test "$enable_f90" != no ; then AC_CHECK_PROGS(MPIXLF90,mpxlf90 mpfort) if test -z "$TESTFC" ; then TESTFC=${FC-xlf90}; fi - if test "X$MPIXLF90" != "X" ; then + if test "X$MPIXLF90" != "X" ; then FC="$MPIXLF90" else FC="$MPXLF -qlanglvl=90ext -qfree=f90" @@ -405,7 +405,7 @@ esac ]) AC_DEFUN([PAC_MPI_FIND_COMPILERS],[ -# Tell autoconf to determine properties of the compilers (these are the +# Tell autoconf to determine properties of the compilers (these are the # compilers for MPI programs) PAC_PROG_CC if test "$enable_f77" != no -a "$enable_fortran" != no ; then @@ -468,14 +468,14 @@ case $ac_mpi_type in AC_CHECK_LIB(mpi,MPI_Init) if test "$ac_cv_lib_mpi_MPI_Init" = "yes" ; then MPILIBNAME="mpi" - fi + fi ;; generic) AC_SEARCH_LIBS(MPI_Init,mpi mpich mpich) if test "$ac_cv_lib_mpi_MPI_Init" = "yes" ; then MPILIBNAME="mpi" - fi + fi ;; *) @@ -502,8 +502,8 @@ AC_TRY_LINK([#include "mpi.h"], [MPI_Request request;MPI_Fint a;a = MPI_Request_c2f(request);], pac_cv_mpi_f2c="yes",pac_cv_mpi_f2c="no") ]) -if test "$pac_cv_mpi_f2c" = "yes" ; then - AC_DEFINE(HAVE_MPI_F2C,1,[Define if MPI has F2C]) +if test "$pac_cv_mpi_f2c" = "yes" ; then + AC_DEFINE(HAVE_MPI_F2C,1,[Define if MPI has F2C]) fi ]) dnl diff --git a/ompi/mca/io/romio314/romio/confdb/aclocal_romio.m4 b/ompi/mca/io/romio314/romio/confdb/aclocal_romio.m4 index 39dd76dab20..db731d54dea 100644 --- a/ompi/mca/io/romio314/romio/confdb/aclocal_romio.m4 +++ b/ompi/mca/io/romio314/romio/confdb/aclocal_romio.m4 @@ -1,5 +1,5 @@ dnl -dnl This files contains additional macros for using autoconf to +dnl This files contains additional macros for using autoconf to dnl build configure scripts. dnl dnl Almost all of this file is taken from the aclocal.m4 of MPICH @@ -19,21 +19,21 @@ AC_DEFUN([PAC_GET_FORTNAMES],[ EOF $F77 $FFLAGS -c confftest.f > /dev/null 2>&1 if test ! -s confftest.$OBJEXT ; then - AC_MSG_WARN([Unable to test Fortran compiler. Compiling a test + AC_MSG_WARN([Unable to test Fortran compiler. Compiling a test program failed to produce an object file]) NOF77=1 elif test -z "$FORTRANNAMES" ; then # MAC OS X (and probably FreeBSD need strings - (not strings -a) # Cray doesn't accept -a ... allstrings="-a" - if test $arch_CRAY ; then - allstrings="" + if test $arch_CRAY ; then + allstrings="" elif strings - confftest.$OBJEXT < /dev/null >/dev/null 2>&1 ; then allstrings="-" elif strings -a confftest.$OBJEXT < /dev/null >/dev/null 2>&1 ; then allstrings="-a" fi - + nameform1=`strings $allstrings confftest.$OBJEXT | grep mpir_init_fop_ | head -1` nameform2=`strings $allstrings confftest.$OBJEXT | grep MPIR_INIT_FOP | head -1` nameform3=`strings $allstrings confftest.$OBJEXT | grep mpir_init_fop | head -1` @@ -47,8 +47,8 @@ program failed to produce an object file]) echo "Fortran externals have a trailing underscore and are lowercase" FORTRANNAMES="FORTRANUNDERSCORE" elif test -n "$nameform2" ; then - echo "Fortran externals are uppercase" - FORTRANNAMES="FORTRANCAPS" + echo "Fortran externals are uppercase" + FORTRANNAMES="FORTRANCAPS" elif test -n "$nameform3" ; then echo "Fortran externals are lower case" FORTRANNAMES="FORTRANNOUNDERSCORE" @@ -76,7 +76,7 @@ if test -n "$arch_IRIX"; then dnl For example dnl IRIX_5_4400 (IRIX 5.x, using MIPS 4400) osversion=`uname -r | sed 's/\..*//'` - dnl Note that we need to allow brackets here, so we briefly turn off + dnl Note that we need to allow brackets here, so we briefly turn off dnl the macro quotes changequote(,)dnl dnl Get the second field (looking for 6.1) @@ -111,7 +111,7 @@ if test -n "$arch_IRIX"; then fi AC_MSG_RESULT($cputype) dnl echo "checking for osversion and cputype" - dnl cputype may contain R4400, R2000A/R3000, or something else. + dnl cputype may contain R4400, R2000A/R3000, or something else. dnl We may eventually need to look at it. if test -z "$osversion" ; then AC_MSG_RESULT([Could not determine OS version. Please send]) @@ -123,9 +123,9 @@ if test -n "$arch_IRIX"; then true elif test $osversion = 6 ; then true - else + else AC_MSG_RESULT([Could not recognize the version of IRIX (got $osversion). -ROMIO knows about versions 4, 5 and 6; the version being returned from +ROMIO knows about versions 4, 5 and 6; the version being returned from uname -r is $osversion. Please send]) uname -a 2>&1 hinv 2>&1 @@ -138,7 +138,7 @@ uname -r is $osversion. Please send]) changequote(,)dnl cputype=`echo $cputype | sed -e 's%.*/%%' -e 's/R//' | tr -d "[A-Z]"` changequote([,])dnl - case $cputype in + case $cputype in 3000) ;; 4000) ;; 4400) ;; @@ -150,7 +150,7 @@ uname -r is $osversion. Please send]) *) AC_MSG_WARN([Unexpected IRIX/MIPS chipset $cputype. Please send the output]) uname -a 2>&1 - hinv 2>&1 + hinv 2>&1 AC_MSG_WARN([to romio-maint@mcs.anl.gov ROMIO will continue and assume that the cputype is compatible with a MIPS 4400 processor.]) @@ -172,7 +172,7 @@ define(PAC_TEST_MPI,[ main(int argc, char **argv) { MPI_Init(&argc,&argv); - MPI_Finalize(); + MPI_Finalize(); } EOF rm -f conftest$EXEEXT @@ -225,10 +225,10 @@ define(PAC_MPI_LONG_LONG_INT,[ #include "mpi.h" main(int argc, char **argv) { - long long i; + long long i; MPI_Init(&argc,&argv); MPI_Send(&i, 1, MPI_LONG_LONG_INT, 0, 0, MPI_COMM_WORLD); - MPI_Finalize(); + MPI_Finalize(); } EOF rm -f conftest$EXEEXT @@ -247,7 +247,7 @@ dnl define(PAC_LONG_LONG_64,[ if test -n "$longlongsize" ; then if test "$longlongsize" = 8 ; then - echo "defining MPI_Offset as long long in C and integer*8 in Fortran" + echo "defining MPI_Offset as long long in C and integer*8 in Fortran" AC_DEFINE(HAVE_LONG_LONG_64,,[Define if long long is 64 bits]) DEFINE_MPI_OFFSET="typedef long long MPI_Offset;" FORTRAN_MPI_OFFSET="integer*8" @@ -260,8 +260,8 @@ if test -n "$longlongsize" ; then LL="\%d" MPI_OFFSET_KIND1="!" MPI_OFFSET_KIND2="!" - else - echo "defining MPI_Offset as long in C and integer in Fortran" + else + echo "defining MPI_Offset as long in C and integer in Fortran" DEFINE_MPI_OFFSET="typedef long MPI_Offset;" FORTRAN_MPI_OFFSET="integer" LL="\%ld" @@ -274,14 +274,14 @@ else if test "$longlongsize" = 8 ; then PAC_TEST_LONG_LONG() else - echo "defining MPI_Offset as long in C and integer in Fortran" + echo "defining MPI_Offset as long in C and integer in Fortran" DEFINE_MPI_OFFSET="typedef long MPI_Offset;" FORTRAN_MPI_OFFSET="integer" LL="\%ld" MPI_OFFSET_KIND1="!" MPI_OFFSET_KIND2="!" fi - else + else dnl check if longlong is not supported or only its size cannot be determined dnl because the program cannot be run. rm -f ltest.c @@ -297,14 +297,14 @@ EOF if test -x conftest$EXEEXT ; then echo "assuming size of long long is 8bytes; use '-longlongsize' to indicate otherwise" rm -f conftest$EXEEXT ltest.c - echo "defining MPI_Offset as long long in C and integer*8 in Fortran" + echo "defining MPI_Offset as long long in C and integer*8 in Fortran" AC_DEFINE(HAVE_LONG_LONG_64,,[Define if long long is 64 bits]) DEFINE_MPI_OFFSET="typedef long long MPI_Offset;" FORTRAN_MPI_OFFSET="integer*8" LL="\%lld" - else + else echo "assuming long long is not available; use '-longlongsize' to indicate otherwise" - echo "defining MPI_Offset as long in C and integer in Fortran" + echo "defining MPI_Offset as long in C and integer in Fortran" DEFINE_MPI_OFFSET="typedef long MPI_Offset;" FORTRAN_MPI_OFFSET="integer" LL="\%ld" @@ -326,7 +326,7 @@ define(PAC_MPI_INFO,[ MPI_Info info; MPI_Init(&argc,&argv); MPI_Info_create(&info); - MPI_Finalize(); + MPI_Finalize(); } EOF rm -f conftest$EXEEXT @@ -363,7 +363,7 @@ define(PAC_MPI_DARRAY_SUBARRAY,[ MPI_Init(&argc,&argv); MPI_Type_create_darray(i, i, i, &i, &i, &i, &i, i, MPI_INT, &t); MPI_Type_create_subarray(i, &i, &i, &i, i, MPI_INT, &t); - MPI_Finalize(); + MPI_Finalize(); } EOF rm -f conftest$EXEEXT @@ -480,7 +480,7 @@ define(PAC_TEST_MPI_SGI_type_is_contig,[ MPI_Init(&argc,&argv); i = MPI_SGI_type_is_contig(type); - MPI_Finalize(); + MPI_Finalize(); } EOF rm -f conftest$EXEEXT @@ -507,7 +507,7 @@ define(PAC_TEST_MPI_COMBINERS,[ MPI_Init(&argc,&argv); i = MPI_COMBINER_STRUCT; - MPI_Finalize(); + MPI_Finalize(); } EOF rm -f conftest$EXEEXT @@ -581,7 +581,7 @@ fi KINDVAL="" if $FC -o conftest$EXEEXT conftest.$ac_f90ext >/dev/null 2>&1 ; then ./conftest$EXEEXT >/dev/null 2>&1 - if test -s conftest.out ; then + if test -s conftest.out ; then KINDVAL=`cat conftest.out` fi fi @@ -624,7 +624,7 @@ EOF dnl dnl dnl PAC_GET_XFS_MEMALIGN -dnl +dnl dnl define(PAC_GET_XFS_MEMALIGN, [AC_MSG_CHECKING([for memory alignment needed for direct I/O]) @@ -634,7 +634,7 @@ AC_TEST_PROGRAM([#include #include #include #include -main() { +main() { struct dioattr st; int fd = open("/tmp/romio_tmp.bin", O_RDWR | O_CREAT, 0644); FILE *f=fopen("memalignval","w"); @@ -704,7 +704,7 @@ fi KINDVAL="" if $FC -o kind$EXEEXT kind.f >/dev/null 2>&1 ; then ./kind >/dev/null 2>&1 - if test -s k.out ; then + if test -s k.out ; then KINDVAL=`cat k.out` fi fi @@ -773,7 +773,7 @@ define(PAC_TEST_MPIR_STATUS_SET_BYTES,[ MPI_Init(&argc,&argv); MPIR_Status_set_bytes(status,type,err); - MPI_Finalize(); + MPI_Finalize(); } EOF rm -f conftest$EXEEXT diff --git a/ompi/mca/io/romio314/romio/confdb/aclocal_shl.m4 b/ompi/mca/io/romio314/romio/confdb/aclocal_shl.m4 index f974bfa2261..f13222944e0 100644 --- a/ompi/mca/io/romio314/romio/confdb/aclocal_shl.m4 +++ b/ompi/mca/io/romio314/romio/confdb/aclocal_shl.m4 @@ -1,7 +1,7 @@ dnl dnl Definitions for creating shared libraries dnl -dnl The purpose of these definitions is to provide common support for +dnl The purpose of these definitions is to provide common support for dnl shared libraries, with *or without* the use of the GNU Libtool package. dnl For many of our important platforms, the Libtool approach is overkill, dnl and can be particularly painful for developers. @@ -9,30 +9,30 @@ dnl dnl To use libtool, you need macros that are defined by libtool for libtool dnl Don't even think about the consequences of this for updating and for dnl using user-versions of libtool :( -dnl +dnl dnl !!!!!!!!!!!!!!!!!!!!! dnl libtool requires ac 2.50 !!!!!!!!!!!!!!!!! -dnl +dnl dnl builtin(include,libtool.m4) dnl dnl/*D dnl PAC_ARG_SHAREDLIBS - Add --enable-sharedlibs=kind to configure. -dnl +dnl dnl Synopsis: dnl PAC_ARG_SHAREDLIBS dnl dnl Output effects: dnl Adds '--enable-sharedlibs=kind' to the command line. If this is enabled, -dnl then based on the value of 'kind', programs are selected for the +dnl then based on the value of 'kind', programs are selected for the dnl names 'CC_SHL' and 'CC_LINK_SHL' that configure will substitute for in dnl 'Makefile.in's. These symbols are generated by 'simplemake' when dnl shared library support is selected. -dnl The variable 'C_LINKPATH_SHL' is set to the option to specify the +dnl The variable 'C_LINKPATH_SHL' is set to the option to specify the dnl path to search at runtime for libraries (-rpath in gcc/GNU ld). dnl This can be turned off with --disable-rpath , which is appropriate dnl for libraries and for executables that may be installed in different dnl locations. -dnl The variable 'SHLIB_EXT' is set to the extension used by shared +dnl The variable 'SHLIB_EXT' is set to the extension used by shared dnl libraries; under most forms of Unix, this is 'so'; under Mac OS/X, this dnl is 'dylib', and under Windows (including cygwin), this is 'dll'. dnl @@ -40,7 +40,7 @@ dnl Supported values of 'kind' include \: dnl+ gcc - Use gcc to create both shared objects and libraries dnl. osx-gcc - Use gcc on Mac OS/X to create both shared objects and dnl libraries -dnl. solaris-cc - Use native Solaris cc to create shared objects and +dnl. solaris-cc - Use native Solaris cc to create shared objects and dnl libraries dnl. cygwin-gcc - Use gcc on Cygwin to create shared objects and libraries dnl- none - The same as '--disable-sharedlibs' @@ -105,7 +105,7 @@ C_LINKPATH_SHL="" SHLIB_EXT=unknown SHLIB_FROM_LO=no SHLIB_INSTALL='$(INSTALL_PROGRAM)' -case "$enable_sharedlibs" in +case "$enable_sharedlibs" in no|none) ;; gcc-osx|osx-gcc) @@ -114,7 +114,7 @@ case "$enable_sharedlibs" in CC_SHL='${CC} -fPIC' # No way in osx to specify the location of the shared libraries at link # time (see the code in createshlib in mpich/src/util) - # As of 10.5, -Wl,-rpath,dirname should work . The dirname + # As of 10.5, -Wl,-rpath,dirname should work . The dirname # must be a single directory, not a colon-separated list (use multiple # -Wl,-rpath,path for each of the paths in the list). However, os x # apparently records the library full path, so rpath isn't as useful @@ -148,13 +148,13 @@ case "$enable_sharedlibs" in C_LINKPATH_SHL="-Wl,-rpath," fi SHLIB_EXT=so - # We need to test that this isn't osx. The following is a + # We need to test that this isn't osx. The following is a # simple hack osname=`uname -s` - case $osname in + case $osname in *Darwin*|*darwin*) AC_MSG_ERROR([You must specify --enable-sharedlibs=osx-gcc for Mac OS/X]) - ;; + ;; *CYGWIN*|*cygwin*) AC_MSG_ERROR([You must specify --enable-sharedlibs=cygwin-gcc for Cygwin]) ;; @@ -172,7 +172,7 @@ case "$enable_sharedlibs" in C_LINKPATH_SHL="" SHLIB_EXT="dll" enable_sharedlibs="cygwin-gcc" - ;; + ;; libtool) # set TRY_LIBTOOL to yes to experiment with libtool. You are on your @@ -180,7 +180,7 @@ case "$enable_sharedlibs" in if test "$TRY_LIBTOOL" != yes ; then AC_MSG_ERROR([Creating shared libraries using libtool not yet supported]) else - # Using libtool requires a heavy-weight process to test for + # Using libtool requires a heavy-weight process to test for # various stuff that libtool needs. Without this, you'll get a # bizarre error message about libtool being unable to find # configure.in or configure.ac (!) @@ -205,7 +205,7 @@ case "$enable_sharedlibs" in # we are building under cygwin sysname=`uname -s | tr abcdefghijklmnopqrstuvwxyz ABCDEFGHIJKLMNOPQRSTUVWXYZ` isCygwin=no - case "$sysname" in + case "$sysname" in *CYGWIN*) isCygwin=yes ;; esac if test "$isCygwin" = yes ; then @@ -269,7 +269,7 @@ dnl Other, such as solaris-cc *) AC_MSG_ERROR([Unknown value $enable_sharedlibs for enable-sharedlibs. Values should be gcc or osx-gcc]) enable_sharedlibs=no - ;; + ;; esac # Check for the shared-library extension PAC_CC_SHLIB_EXT @@ -286,7 +286,7 @@ dnl /*D dnl PAC_xx_SHAREDLIBS - Get compiler and linker for shared libraries dnl These routines may be used to determine the compiler and the dnl linker to be used in creating shared libraries -dnl Rather than set predefined variable names, they set an argument +dnl Rather than set predefined variable names, they set an argument dnl (if provided) dnl dnl Synopsis @@ -300,11 +300,11 @@ ifelse($1,,[ AC_CHECK_PROG(pac_prog,gcc,yes,no) # If we are gcc but OS X, set the special type # We need a similar setting for cygwin - if test "$pac_prog" = yes ; then + if test "$pac_prog" = yes ; then osname=`uname -s` - case $osname in + case $osname in *Darwin*|*darwin*) pac_kinds=gcc-osx - ;; + ;; *) pac_kinds=gcc ;; esac @@ -314,7 +314,7 @@ ifelse($1,,[ if test "$pac_prog" = yes ; then pac_kinds="$pac_kinds libtool" ; fi ]) for pac_arg in $pac_kinds ; do - case $pac_arg in + case $pac_arg in gcc) # For example, include the libname as ${LIBNAME_SHL} #C_LINK_SHL='${CC} -shared -Wl,-h,' @@ -363,10 +363,10 @@ ifelse($3,,C_LINK_SHL=$pac_clink_sharedlibs,$3=$pac_clink_sharedlibs) ifelse($4,,SHAREDLIB_TYPE=$pac_type_sharedlibs,$4=$pac_type_sharedlibs) ]) -dnl This macro ensures that all of the necessary substitutions are +dnl This macro ensures that all of the necessary substitutions are dnl made by any subdirectory configure (which may simply SUBST the dnl necessary values rather than trying to determine them from scratch) -dnl This is a more robust (and, in the case of libtool, only +dnl This is a more robust (and, in the case of libtool, only dnl managable) method. AC_DEFUN([PAC_CC_SUBDIR_SHLIBS],[ AC_SUBST(CC_SHL) @@ -395,9 +395,9 @@ AC_DEFUN([PAC_CC_SHLIB_EXT],[ # clean steps that look for libfoo.$SHLIB_EXT . if test "$SHLIB_EXT" = "unknown" ; then osname=`uname -s` - case $osname in + case $osname in *Darwin*|*darwin*) SHLIB_EXT=dylib - ;; + ;; *CYGWIN*|*cygwin*) SHLIB_EXT=dll ;; *Linux*|*LINUX*|*SunOS*) SHLIB_EXT=so diff --git a/ompi/mca/io/romio314/romio/confdb/aclocal_subcfg.m4 b/ompi/mca/io/romio314/romio/confdb/aclocal_subcfg.m4 index 53fb78e3d9e..970ae223052 100644 --- a/ompi/mca/io/romio314/romio/confdb/aclocal_subcfg.m4 +++ b/ompi/mca/io/romio314/romio/confdb/aclocal_subcfg.m4 @@ -30,7 +30,7 @@ dnl to subconfigure as "precious" appropriately. The precious variable dnl can be created in the following ways: dnl 1) implicit declaration through use of autoconf macros, like dnl AC_PROG_CC (declares CC/CFLAGS/CPPFLAGS/LIBS/LDFLAGS), or -dnl AC_PROG_F77 (declares F77/FFLAGS/FLIBS) ... +dnl AC_PROG_F77 (declares F77/FFLAGS/FLIBS) ... dnl which are in turns invoked by other subconfigure. dnl When in doubt, check "ac_precious_var" in the calling configure. dnl 2) explicit "precious" declaration through AC_ARG_VAR. @@ -54,7 +54,7 @@ AC_DEFUN([PAC_CONFIG_SUBDIR_ARGS],[ # Adapted for MPICH from the autoconf-2.67 implementation of # AC_CONFIG_SUBDIRS. Search for "MPICH note:" for relevant commentary and # local modifications. - + # Remove --cache-file, --srcdir, and --disable-option-checking arguments # so they do not pile up. Otherwise relative paths (like --srcdir=.. from # make distcheck) will be incorrect. @@ -115,7 +115,7 @@ AC_DEFUN([PAC_CONFIG_SUBDIR_ARGS],[ AS_VAR_APPEND([pac_sub_configure_args], [" '$pac_arg'"]) ;; esac done - + # Always prepend --prefix to ensure using the same prefix # in subdir configurations. # MPICH note: see tt#983 for an example of why this is necessary @@ -124,22 +124,22 @@ AC_DEFUN([PAC_CONFIG_SUBDIR_ARGS],[ *\'*) pac_arg=`AS_ECHO(["$pac_arg"]) | sed "s/'/'\\\\\\\\''/g"` ;; esac pac_sub_configure_args="'$pac_arg' $pac_sub_configure_args" - + # Pass --silent if test "$silent" = yes; then pac_sub_configure_args="--silent $pac_sub_configure_args" fi - + # Always prepend --disable-option-checking to silence warnings, since # different subdirs can have different --enable and --with options. pac_sub_configure_args="--disable-option-checking $pac_sub_configure_args" - + pac_popdir=`pwd` - + # Do not complain, so a configure script can configure whichever # parts of a large source tree are present. test -d "$srcdir/$pac_dir" || continue - + # MPICH note: modified to remove the internal "_AS_*" macro usage, also # msg is already printed at top dnl _AS_ECHO_LOG([$pac_msg]) diff --git a/ompi/mca/io/romio314/romio/confdb/aclocal_util.m4 b/ompi/mca/io/romio314/romio/confdb/aclocal_util.m4 index 575a1444630..6faec25ab1c 100644 --- a/ompi/mca/io/romio314/romio/confdb/aclocal_util.m4 +++ b/ompi/mca/io/romio314/romio/confdb/aclocal_util.m4 @@ -131,7 +131,7 @@ AC_SUBST(MKDIR_P) ]) dnl Test for a clean VPATH directory. Provide this command with the names -dnl of all of the generated files that might cause problems +dnl of all of the generated files that might cause problems dnl (Makefiles won't cause problems because there's no VPATH usage for them) dnl dnl Synopsis @@ -151,12 +151,12 @@ if test ! -s $srcdir/conftest$$ ; then pac_header="" ifdef([AC_LIST_HEADER],[pac_header=AC_LIST_HEADER]) for file in config.status $pac_header $1 ; do - if test -f $srcdir/$file ; then + if test -f $srcdir/$file ; then pac_dirtyfiles="$pac_dirtyfiles $file" fi done ifelse($2,,,[ - for dir in $2 ; do + for dir in $2 ; do if test -d $srcdir/$dir ; then pac_dirtydirs="$pac_dirtydirs $dir" fi diff --git a/ompi/mca/io/romio314/romio/configure.ac b/ompi/mca/io/romio314/romio/configure.ac index 4d51a3ef79a..436b7832cbf 100644 --- a/ompi/mca/io/romio314/romio/configure.ac +++ b/ompi/mca/io/romio314/romio/configure.ac @@ -213,14 +213,14 @@ known_filesystems="m4_join([ ],known_filesystems_m4)" # Defaults AC_ARG_ENABLE(aio,[ --enable-aio - Request use of asynchronous I/O routines (default)], -[ +[ if test "x$enableval" = "xno" ; then disable_aio=yes else disable_aio=no fi ], disable_aio=no) -AC_ARG_ENABLE(echo, +AC_ARG_ENABLE(echo, [--enable-echo - Turn on strong echoing. The default is enable=no.] ,set -x) AC_ARG_ENABLE(f77, [--enable-f77 - Turn on support for Fortran 77 (default)],,enable_f77=yes) @@ -274,9 +274,9 @@ if test -z "$ARCH" -a -x $srcdir/util/tarch ; then ARCH=`$srcdir/util/tarch | sed s/-/_/g` if test -z "$ARCH" ; then AC_MSG_RESULT([Unknown!]) - AC_MSG_ERROR([Error: Could not guess target architecture, you must -set an architecture type with the environment variable ARCH]) - fi + AC_MSG_ERROR([Error: Could not guess target architecture, you must +set an architecture type with the environment variable ARCH]) + fi eval "arch_$ARCH=1" AC_MSG_RESULT($ARCH) fi @@ -291,7 +291,7 @@ fi # # # Find the home directory if not specified -if test "X$srcdir" != "X." -a -s $srcdir/mpi-io/Makefile.in ; then +if test "X$srcdir" != "X." -a -s $srcdir/mpi-io/Makefile.in ; then ROMIO_HOME_TRIAL=$srcdir else # Take advantage of autoconf2 features @@ -327,7 +327,7 @@ AC_SUBST(mandir) if test -z "$docdir" ; then docdir='${prefix}/doc' ; fi AC_SUBST(docdir) if test -z "$htmldir" ; then htmldir='${prefix}/www' ; fi -AC_SUBST(htmldir) +AC_SUBST(htmldir) # If we are building within a known MPI implementation, we must avoid the @@ -375,7 +375,7 @@ if test $WITHIN_KNOWN_MPI_IMPL = no ; then # compile command. Some compilers complain if it's only -I MPI_INCLUDE_DIR=. fi -else +else MPI_INCLUDE_DIR=. fi # @@ -397,22 +397,22 @@ else USER_FFLAGS="$FFLAGS -O" fi # -# Here begin the architecture-specific tests. +# Here begin the architecture-specific tests. # -------------------------------------------------------------------------- -# We must first select the C and Fortran compilers. Because of the +# We must first select the C and Fortran compilers. Because of the # way that the PROG_CC autoconf macro works (and all of the macros that # require it, including CHECK_HEADERS), that macro must occur exactly -# once in the configure.ac file, at least as of autoconf 2.57 . +# once in the configure.ac file, at least as of autoconf 2.57 . # Unfortunately, this requirement is not enforced. To handle this, # we first case on the architecture; then use PROG_CC, then case on the # architecture again for any arch-specific features. We also set the # C_DEBUG_FLAG and F77_DEBUG_FLAG in case debugging is selected. -# +# # For the MPICH and MPICH configures, the compilers will already be # selected, so most of the compiler-selection code will be bypassed. # -------------------------------------------------------------------------- # For historical reasons -if test -z "$FC" ; then +if test -z "$FC" ; then FC=$F77 fi # @@ -467,7 +467,7 @@ PAC_GET_SPECIAL_SYSTEM_INFO AC_HAVE_FUNCS(memalign) # -# Question: Should ROMIO under MPICH ignore the Fortran tests, since +# Question: Should ROMIO under MPICH ignore the Fortran tests, since # MPICH provides all of the Fortran interface routines? # if test $NOF77 = 0 ; then @@ -483,7 +483,7 @@ if test $NOF77 = 0 ; then if test ! -d test ; then mkdir test ; fi ln -s $MPI_INCLUDE_DIR/mpif.h test fi -else +else F77=":" fi # @@ -557,7 +557,7 @@ if test "$ac_cv_sizeof_long_long" != 0 ; then MPI_OFFSET_KIND1="!" MPI_OFFSET_KIND2="!" else - echo "defining MPI_Offset as long in C and integer in Fortran" + echo "defining MPI_Offset as long in C and integer in Fortran" MPI_OFFSET_TYPE="long" DEFINE_MPI_OFFSET="typedef long MPI_Offset;" FORTRAN_MPI_OFFSET="integer" @@ -566,7 +566,7 @@ if test "$ac_cv_sizeof_long_long" != 0 ; then MPI_OFFSET_KIND2="!" fi else - echo "defining MPI_Offset as long in C and integer in Fortran" + echo "defining MPI_Offset as long in C and integer in Fortran" MPI_OFFSET_TYPE="long" DEFINE_MPI_OFFSET="typedef long MPI_Offset;" FORTRAN_MPI_OFFSET="integer" @@ -585,7 +585,7 @@ if test -n "$ac_cv_sizeof_long_long"; then fi fi # -if test -n "$OFFSET_KIND" -a "A$MPI_OFFSET_KIND1" = "A!" ; then +if test -n "$OFFSET_KIND" -a "A$MPI_OFFSET_KIND1" = "A!" ; then MPI_OFFSET_KIND1=" INTEGER MPI_OFFSET_KIND" MPI_OFFSET_KIND2=" PARAMETER (MPI_OFFSET_KIND=$OFFSET_KIND)" MPI_OFFSET_KIND_VAL=$OFFSET_KIND @@ -650,7 +650,7 @@ else MPI_FINFO2="!" MPI_FINFO3="!" MPI_FINFO4="!" -fi +fi # if test -n "$mpi_sgi"; then dnl if test -z "$HAVE_MPI_INFO" ; then @@ -661,8 +661,8 @@ dnl fi PAC_TEST_MPI_HAVE_OFFSET_KIND fi # -# check if darray and subarray constructors are defined in the MPI -# implementation +# check if darray and subarray constructors are defined in the MPI +# implementation if test $WITHIN_KNOWN_MPI_IMPL = no ; then PAC_MPI_DARRAY_SUBARRAY fi @@ -680,7 +680,7 @@ if test $FROM_MPICH = yes ; then MPI_FARRAY5="!" MPI_FARRAY6="!" MPI_FARRAY7="!" -fi +fi # Check to see if weak symbols work correctly if test "$enable_weak_symbols" = "yes" ; then @@ -729,7 +729,7 @@ AC_MSG_CHECKING([whether struct flock compatible with MPI_Offset]) AC_TRY_COMPILE([#include ], [struct flock l; $MPI_OFFSET_TYPE a=1; - l.l_start = a; + l.l_start = a; l.l_len = a; ],pac_cv_struct_flock_and_mpi_offset=yes,pac_cv_struct_flock_and_mpi_offset=no) AC_MSG_RESULT($pac_cv_struct_flock_and_mpi_offset) @@ -740,7 +740,7 @@ if test "$pac_cv_struct_flock_and_mpi_offset" = no ; then AC_TRY_COMPILE([#include ], [struct flock l; int a=1; - l.l_start = a; + l.l_start = a; l.l_len = a; ],pac_cv_struct_flock_and_int=yes,pac_cv_struct_flock_and_int=no) AC_MSG_RESULT($pac_cv_struct_flock_and_int) @@ -766,15 +766,15 @@ if test -n "$FILE_SYSTEM" ; then # if multiple filesystems are passed in, they are '+'-delimited # we could set the IFS to tokenize FILE_SYSTEM, but the FILE_SYSTEM env var # is used in multiple places in the build system: get rid of the '+'s so we - # can use the 'for x in $FILE_SYSTEM ...' idiom + # can use the 'for x in $FILE_SYSTEM ...' idiom FILE_SYSTEM=`echo $FILE_SYSTEM|sed -e 's/\+/ /g'` for x in $FILE_SYSTEM do found=no # We could also do test -d "ad_$y" to test for known file systems - # based on having access to the adio code. Then adding a file + # based on having access to the adio code. Then adding a file # system would not require changing configure to change known_filesystems - for y in $known_filesystems ; do + for y in $known_filesystems ; do if test $x = $y ; then found=yes eval "file_system_`echo $x`=1" @@ -905,7 +905,7 @@ fi if test -n "$file_system_lustre"; then AC_CHECK_HEADERS(lustre/lustre_user.h, AC_DEFINE(ROMIO_LUSTRE,1,[Define for ROMIO with LUSTRE]), - AC_MSG_ERROR([LUSTRE support requested but cannot find lustre/lustre_user.h header file]) + AC_MSG_ERROR([LUSTRE support requested but cannot find lustre/lustre_user.h header file]) ) fi @@ -922,7 +922,7 @@ if test -n "$file_system_xfs"; then #include #include #include -int main(int argc, char **argv) { +int main(int argc, char **argv) { struct dioattr st; int fd = open("/tmp/romio_tmp.bin", O_RDWR | O_CREAT, 0644); FILE *f=fopen("confmemalignval","w"); @@ -1006,7 +1006,7 @@ if test -n "$file_system_pvfs2"; then AC_CHECK_HEADERS(pvfs2.h, AC_DEFINE(ROMIO_PVFS2,1,[Define for ROMIO with PVFS2]) AC_DEFINE(HAVE_PVFS2_SUPER_MAGIC, 1, [Define if PVFS2_SUPER_MAGIC defined.]), - AC_MSG_ERROR([PVFS2 support requested but cannot find pvfs2.h header file]) + AC_MSG_ERROR([PVFS2 support requested but cannot find pvfs2.h header file]) ) fi @@ -1016,13 +1016,13 @@ if test -n "$file_system_pvfs2"; then AC_LANG_SOURCE([ #include #include "pvfs2.h" - int main(int argc, char **argv) { + int main(int argc, char **argv) { PVFS_object_ref ref; PVFS_sys_attr attr; - PVFS_sys_create(NULL, ref, attr, NULL, NULL, NULL, NULL); + PVFS_sys_create(NULL, ref, attr, NULL, NULL, NULL, NULL); return 0; } ])], - , AC_DEFINE(HAVE_PVFS2_CREATE_WITHOUT_LAYOUT, 1, + , AC_DEFINE(HAVE_PVFS2_CREATE_WITHOUT_LAYOUT, 1, [Define if PVFS_sys_create does not have layout parameter]) ) fi @@ -1094,14 +1094,14 @@ if test "$have_aio_h" = "yes" -o "$have_sys_aio_h" = "yes" -o "x$disable_aio" = # Check that aio is available (many systems appear to have aio # either installed improperly or turned off). - # The test is the following: if not cross compiling, try to run a + # The test is the following: if not cross compiling, try to run a # program that includes a *reference* to aio_write but does not call it # If the libraries are not set up correctly, then this will fail. AC_MSG_CHECKING([whether aio routines can be used]) - # Include aio.h and the aiocb struct (since we'll need these to - # actually use the aio_write interface). Note that this will - # fail for some pre-POSIX implementations of the aio interface + # Include aio.h and the aiocb struct (since we'll need these to + # actually use the aio_write interface). Note that this will + # fail for some pre-POSIX implementations of the aio interface # (an old IBM interface needs an fd argument as well) AC_TRY_RUN([ #include @@ -1131,7 +1131,7 @@ if test "$have_aio_h" = "yes" -o "$have_sys_aio_h" = "yes" -o "x$disable_aio" = if test "$aio_runs" != "no" ; then AC_DEFINE(ROMIO_HAVE_WORKING_AIO, 1, Define if AIO calls seem to work) fi - + # now about that old IBM interface... # modern AIO interfaces have the file descriptor in the aiocb structure, # and will set ROMIO_HAVE_STRUCT_AIOCB_WITH_AIO_FILDES. Old IBM @@ -1164,7 +1164,7 @@ if test "$have_aio_h" = "yes" -o "$have_sys_aio_h" = "yes" -o "x$disable_aio" = ], aio_two_arg_write=yes AC_MSG_RESULT(yes), - aio_two_arg_write=no + aio_two_arg_write=no AC_MSG_RESULT(no), aio_two_arg_write=no AC_MSG_RESULT(no: cannot test when cross-compiling) @@ -1196,7 +1196,7 @@ if test "$have_aio_h" = "yes" -o "$have_sys_aio_h" = "yes" -o "x$disable_aio" = ], aio_two_arg_suspend=yes AC_MSG_RESULT(yes), - aio_two_arg_suspend=no + aio_two_arg_suspend=no AC_MSG_RESULT(no), aio_two_arg_suspend=no AC_MSG_RESULT(no: cannot test when cross compiling) @@ -1298,7 +1298,7 @@ if test "$have_aio_h" = "yes" -o "$have_sys_aio_h" = "yes" -o "x$disable_aio" = AC_MSG_RESULT(yes) AC_DEFINE(ROMIO_HAVE_STRUCT_AIOCB_WITH_AIO_REQPRIO, 1, [Define if aiocb has aio_reqprio member]), AC_MSG_RESULT(no) - ) + ) AC_MSG_CHECKING(for aio_sigevent member of aiocb structure) AC_TRY_COMPILE([ #ifdef HAVE_SIGNAL_H @@ -1322,7 +1322,7 @@ if test "$have_aio_h" = "yes" -o "$have_sys_aio_h" = "yes" -o "x$disable_aio" = AC_DEFINE(ROMIO_HAVE_STRUCT_AIOCB_WITH_AIO_SIGEVENT, 1, [Define if aiocb has aio_sigevent member]), AC_MSG_RESULT(no) ) - + fi # End of aio-related tests @@ -1352,19 +1352,61 @@ AC_TRY_COMPILE([ pac_cv_have_statfs=yes,pac_cv_have_statfs=no ) AC_MSG_RESULT($pac_cv_have_statfs) -# At this point, we could check for whether defining +# At this point, we could check for whether defining # __SWORD_TYPE as sizet_t or int/long (size of pointer) # would help. FIXME if test "$pac_cv_have_statfs" = yes ; then AC_DEFINE(HAVE_STRUCT_STATFS,1,[Define if struct statfs can be compiled]) fi - + +AC_MSG_CHECKING([for f_type member of statfs structure]) +AC_TRY_COMPILE([ +#ifdef HAVE_SYS_VFS_H +#include +#endif +#ifdef HAVE_SYS_STATVFS_H +#include +#endif +#ifdef HAVE_SYS_PARAM_H +#include +#endif +#ifdef HAVE_SYS_MOUNT_H +#include +#endif +#ifdef HAVE_STRING_H +#include +#endif + ],[ + struct statfs f; + memset(&f, 0, sizeof(f)); + f.f_type = 0; + ], + pac_cv_have_statfs_f_type=yes, + pac_cv_have_statfs_f_type=no +) +AC_MSG_RESULT($pac_cv_have_statfs_f_type) +if test $pac_cv_have_statfs_f_type = yes ; then + AC_DEFINE(ROMIO_HAVE_STRUCT_STATFS_WITH_F_TYPE, 1,[Define if statfs has f_type]) +fi + AC_MSG_CHECKING([for f_fstypename member of statfs structure]) AC_TRY_COMPILE([ +#ifdef HAVE_SYS_VFS_H +#include +#endif +#ifdef HAVE_SYS_STATVFS_H +#include +#endif +#ifdef HAVE_SYS_PARAM_H #include +#endif +#ifdef HAVE_SYS_MOUNT_H #include +#endif +#ifdef HAVE_STRING_H #include +#endif ],[ struct statfs f; memset(&f, 0, sizeof(f)); @@ -1446,7 +1488,7 @@ AC_CHECK_TYPE([blksize_t],[],[AC_DEFINE_UNQUOTED([blksize_t],[__blksize_t],[Prov #endif]] ) # -# Check for large file support. Make sure that we can use the off64_t +# Check for large file support. Make sure that we can use the off64_t # type (in some cases, it is an array, and the ROMIO code isn't prepared for # that). # @@ -1524,7 +1566,7 @@ AC_ARG_VAR([master_top_srcdir],[set by the MPICH configure to indicate the MPICH AC_ARG_VAR([master_top_builddir],[set by the MPICH configure to indicate the MPICH build root]) # The master_top_srcdir is the location of the source for the building -# package. This is used only as part of the MPICH build, including +# package. This is used only as part of the MPICH build, including # the documentation targets mandoc, htmldoc, and latexdoc if test -z "$master_top_srcdir" ; then if test "$FROM_MPICH" = yes ; then @@ -1557,7 +1599,7 @@ if test "$FROM_MPICH" = no ; then LIBNAME="$top_build_dir/lib/libmpio.a" fi # - if test ! -d $top_build_dir/lib ; then + if test ! -d $top_build_dir/lib ; then mkdir $top_build_dir/lib fi else @@ -1589,7 +1631,7 @@ AC_SUBST(DOCTEXT) if test $NOF77 = 1 ; then F77=":" else - FORTRAN_TEST="fperf fcoll_test fmisc pfcoll_test" + FORTRAN_TEST="fperf fcoll_test fmisc pfcoll_test" fi # if test $WITHIN_KNOWN_MPI_IMPL = no ; then @@ -1601,7 +1643,7 @@ fi # if test "$MPI_INCLUDE_DIR" = "." ; then ROMIO_INCLUDE="-I../include" -else +else ROMIO_INCLUDE="-I../include -I$MPI_INCLUDE_DIR" fi # @@ -1634,11 +1676,11 @@ if test $FROM_OMPI = yes ; then elif test $FROM_LAM = yes ; then # LAM does have the status set bytes functionality AC_DEFINE(HAVE_STATUS_SET_BYTES,1,[Define if have MPIR_Status_set_bytes]) - + # Used in the tests/ subdirectory for after ROMIO is built TEST_CC=mpicc TEST_F77=mpifort - MPIRUN=mpirun + MPIRUN=mpirun MPI_LIB= ROMIO_INCLUDE= USER_CFLAGS= @@ -1656,7 +1698,7 @@ elif test $FROM_MPICH = yes ; then # CC="$CC -I${use_top_srcdir}/src/include -I${top_build_dir}/src/include" # TEST_CC="$CC" # MPI_LIB="$LIBNAME" - # To allow ROMIO to work with the LIBTOOL scripts, we want to + # To allow ROMIO to work with the LIBTOOL scripts, we want to # work directly with the CC, not the mpicc, compiler. # Note that in the "FROM_MPICH" case, the CPPFLAGS and INCLUDES are already # properly set @@ -1720,7 +1762,8 @@ fi AC_CHECK_FUNCS(lstat) if test "$ac_cv_func_lstat" = "yes" ; then # Do we need to declare lstat? - PAC_FUNC_NEEDS_DECL([#include ],lstat) + PAC_FUNC_NEEDS_DECL([#include + #include ],lstat) fi AC_CHECK_FUNCS(readlink) if test "$ac_cv_func_readlink" = "yes" ; then @@ -1794,7 +1837,7 @@ CFLAGS="$CFLAGS -DHAVE_ROMIOCONF_H" if test -n "$MPIOF_H_INCLUDED"; then F77MPIOINC="" -else +else F77MPIOINC="include 'mpiof.h'" fi @@ -1928,7 +1971,7 @@ AC_SUBST(FORTRAN_TEST) #if test ! -d mpi-io ; then mkdir mpi-io ; fi #if test ! -d mpi-io/glue ; then mkdir mpi-io/glue ; fi -# Create makefiles for all of the adio devices. Only the ones that +# Create makefiles for all of the adio devices. Only the ones that # are active will be called by the top level ROMIO make AC_OUTPUT_COMMANDS([chmod 755 util/romioinstall test/runtests]) AC_CONFIG_FILES([ diff --git a/ompi/mca/io/romio314/romio/doc/README b/ompi/mca/io/romio314/romio/doc/README index b61e73ee204..9f72cc10a14 100644 --- a/ompi/mca/io/romio314/romio/doc/README +++ b/ompi/mca/io/romio314/romio/doc/README @@ -1,7 +1,7 @@ The ROMIO Users Guide is in the file users-guide.ps.gz. The book ``Using MPI-2: Advanced Features of the Message-Passing -Interface,'' by William Gropp, Ewing Lusk, and Rajeev Thakur, +Interface,'' by William Gropp, Ewing Lusk, and Rajeev Thakur, MIT Press, 1999, provides a tutorial introduction to all aspects of MPI-2, including I/O. It has lots of example programs. @@ -9,18 +9,18 @@ The following papers related to ROMIO are available online: * Rajeev Thakur, William Gropp, and Ewing Lusk, ``Optimizing Noncontiguous Accesses in MPI-IO,'' Parallel Computing, (28)1:83--105, -January 2002. +January 2002. http://www.mcs.anl.gov/~thakur/papers/mpi-io-noncontig.ps * R. Thakur, W. Gropp, and E. Lusk, ``On Implementing MPI-IO Portably and with High Performance,'' in Proc. of the Sixth Workshop on I/O in -Parallel and Distributed Systems, May 1999. +Parallel and Distributed Systems, May 1999. http://www.mcs.anl.gov/~thakur/papers/mpio-impl.ps * R. Thakur, W. Gropp, and E. Lusk, ``Data Sieving and Collective I/O in ROMIO,'' in Proc. of the 7th Symposium on the Frontiers of Massively Parallel Computation, February 1999, pp. 182--189. -http://www.mcs.anl.gov/~thakur/papers/romio-coll.ps +http://www.mcs.anl.gov/~thakur/papers/romio-coll.ps * R. Thakur, W. Gropp, and E. Lusk, ``A Case for Using MPI's Derived Datatypes to Improve I/O Performance,'' in Proc. of SC98: High @@ -29,7 +29,7 @@ http://www.mcs.anl.gov/~thakur/dtype * R. Thakur, W. Gropp, and E. Lusk, ``An Abstract-Device Interface for Implementing Portable Parallel-I/O Interfaces,'' in Proc. of the 6th -Symposium on the Frontiers of Massively Parallel Computation, +Symposium on the Frontiers of Massively Parallel Computation, October 1996, pp. 180-187. http://www.mcs.anl.gov/~thakur/papers/adio.ps diff --git a/ompi/mca/io/romio314/romio/doc/pubs.bib b/ompi/mca/io/romio314/romio/doc/pubs.bib index 5799bc305cf..946f6f2b541 100644 --- a/ompi/mca/io/romio314/romio/doc/pubs.bib +++ b/ompi/mca/io/romio314/romio/doc/pubs.bib @@ -145,10 +145,10 @@ @InProceedings{lee:rfs } @InProceedings{yu:bgl-io, - author = {Hao Yu and R. K. Sahoo and C. Howson and George. Almasi and - J. G. Castanos and M. Gupta and Jose. E. Moreira and J. J. Parker and - T. E. Engelsiepen and Robert Ross and Rajeev Thakur and Robert Latham - and W. D. Gropp}, + author = {Hao Yu and R. K. Sahoo and C. Howson and George. Almasi and + J. G. Castanos and M. Gupta and Jose. E. Moreira and J. J. Parker and + T. E. Engelsiepen and Robert Ross and Rajeev Thakur and Robert Latham + and W. D. Gropp}, title = {High Performance File {I/O} for the {BlueGene/L} Supercomputer}, booktitle = {Proceedings of the 12th International Symposium on High-Performance Computer Architecture (HPCA-12)}, month = {February}, @@ -260,7 +260,7 @@ @InProceedings{yu:lustre-joining title = {Exploiting {Lustre} File Joining for Effective Collective {IO}}, booktitle = {Seventh IEEE International Symposium on Cluster Computing and the Grid (CCGrid 2007)}, month = {May}, - year = {2007}, + year = {2007}, } @InProceedings{yu:opal, @@ -333,7 +333,7 @@ @article{liao:cooperative_caching_mpi_journal title = {Cooperative Client-side File Caching for {MPI} Applications}, journal = {International Journal of High Performance Computing Applications}, volume = {21}, - number = {2}, + number = {2}, pages = {144-154}, month = {May}, year = {2007} @@ -341,10 +341,10 @@ @article{liao:cooperative_caching_mpi_journal @InProceedings{liao:client_cache_eval, author = {Wei-keng Liao and Avery Ching and Kenin Coloma and Alok Choudhary and Lee Ward}, - title = {An Implementation and Evaluation of Client-side File Caching for {MPI-IO}}, - booktitle = {Proceedings of the 21st International Parallel and Distributed + title = {An Implementation and Evaluation of Client-side File Caching for {MPI-IO}}, + booktitle = {Proceedings of the 21st International Parallel and Distributed Processing Symposium (IPDPS), Long Beach, California}, - month = {March}, + month = {March}, year = {2007} } @@ -384,7 +384,7 @@ @Inbook{liao:atomicity_overlap title = {{MPI} Atomicity and Concurrent Overlapping {I/O}}, booktitle = {High Performance Computing: Paradigm and Infrastructure}, pages = {203-218}, - chapter = {10}, + chapter = {10}, month = {November}, year = {2005}, publisher = {John Wiley & Sons Inc}, @@ -396,7 +396,7 @@ @InProceedings{liao:cooperative_write-behind title = {{Cooperative Write-Behind Data Buffering for MPI I/O}}, booktitle = {Proceedings of the 12th European Parallel Virtual Machine and Message Passing Interface Conference (EURO PVM/MPI), Sorrento (Naples), Italy}, month = {September}, - year = {2005} + year = {2005} } @InProceedings{liao:app_aware_caching, diff --git a/ompi/mca/io/romio314/romio/doc/romio.bib b/ompi/mca/io/romio314/romio/doc/romio.bib index bb94a2d2ea1..8cbb37f128a 100644 --- a/ompi/mca/io/romio314/romio/doc/romio.bib +++ b/ompi/mca/io/romio314/romio/doc/romio.bib @@ -8,17 +8,17 @@ @Book{grop99a @Misc{mpi97a, author = "{Message Passing Interface Forum}", - title = "{{MPI-2}: Extensions to the Message-Passing Interface}", + title = "{{MPI-2}: Extensions to the Message-Passing Interface}", note = "{\tt http://www.mpi-forum.org/docs/docs.html}", year = {July 1997} } @InProceedings{thak96e, author = {Rajeev Thakur and William Gropp and Ewing Lusk}, - title = {An Abstract-Device Interface for Implementing Portable + title = {An Abstract-Device Interface for Implementing Portable Parallel-{I/O} Interfaces}, booktitle = {Proceedings of the 6th Symposium on the Frontiers of - Massively Parallel Computation}, + Massively Parallel Computation}, publisher = {IEEE Computer Society Press}, pages = {180--187}, month = {October}, @@ -28,8 +28,8 @@ @InProceedings{thak96e @InProceedings{thak99b, author = {Rajeev Thakur and William Gropp and Ewing Lusk}, title = {On Implementing {MPI-IO} Portably and with High Performance}, - booktitle = {Proceedings of the 6th Workshop on I/O in Parallel and - Distributed Systems}, + booktitle = {Proceedings of the 6th Workshop on I/O in Parallel and + Distributed Systems}, pages = {23--32}, year = {1999}, month = {May}, diff --git a/ompi/mca/io/romio314/romio/doc/source-guide.tex b/ompi/mca/io/romio314/romio/doc/source-guide.tex index 96fa7ca58d9..ec151bfeeab 100644 --- a/ompi/mca/io/romio314/romio/doc/source-guide.tex +++ b/ompi/mca/io/romio314/romio/doc/source-guide.tex @@ -61,7 +61,7 @@ %% \newcommand{\ls}[1] - {\dimen0=\fontdimen6\the\font + {\dimen0=\fontdimen6\the\font \lineskip=#1\dimen0 \advance\lineskip.5\fontdimen5\the\font \advance\lineskip-\dimen0 diff --git a/ompi/mca/io/romio314/romio/doc/users-guide.tex b/ompi/mca/io/romio314/romio/doc/users-guide.tex index 3715431b718..3eebdbd21e6 100644 --- a/ompi/mca/io/romio314/romio/doc/users-guide.tex +++ b/ompi/mca/io/romio314/romio/doc/users-guide.tex @@ -21,7 +21,7 @@ \rule{1.75in}{.01in} \\ -\vskip 1.3in +\vskip 1.3in {\Large\bf Users Guide for ROMIO: A High-Performance, \\ [1ex] Portable MPI-IO Implementation} \\ [4ex] by \\ [2ex] @@ -60,7 +60,7 @@ %% \newcommand{\ls}[1] - {\dimen0=\fontdimen6\the\font + {\dimen0=\fontdimen6\the\font \lineskip=#1\dimen0 \advance\lineskip.5\fontdimen5\the\font \advance\lineskip-\dimen0 @@ -98,10 +98,10 @@ ROMIO version~1.2.4 on various machines. \end{abstract} -\section{Introduction} +\section{Introduction} ROMIO\footnote{\tt http://www.mcs.anl.gov/romio} is a -high-performance, portable implementation of MPI-IO (the I/O chapter in +high-performance, portable implementation of MPI-IO (the I/O chapter in MPI~\cite{mpi97a}). This document describes how to install and use ROMIO version~1.2.4 on various machines. @@ -143,7 +143,7 @@ \section{General Information} This version of ROMIO is included in MPICH 1.2.4; an earlier version is included in at least the following MPI implementations: LAM, HP -MPI, SGI MPI, and NEC MPI. +MPI, SGI MPI, and NEC MPI. Note that proper I/O error codes and classes are returned and the status variable is filled only when used with MPICH revision 1.2.1 or later. @@ -163,7 +163,7 @@ \section{General Information} Please read the limitations of this version of ROMIO that are listed in Section~\ref{sec:limit} of this document (e.g., restriction to homogeneous -environments). +environments). \subsection{ROMIO Optimizations} \label{sec:opt} @@ -179,8 +179,8 @@ \subsection{ROMIO Optimizations} of data. The added network cost of performing an I/O operation across the network, as in parallel I/O systems, is often high because of latency. Thus, this naive approach typically performs very poorly because of -the overhead of multiple operations. -% +the overhead of multiple operations. +% In the data sieving technique, a number of noncontiguous regions are accessed by reading a block of data containing all of the regions, including the unwanted data between them (called ``holes''). The regions @@ -229,7 +229,7 @@ \subsection{Hints} \item \texttt{ind\_wr\_buffer\_size} -- Controls the size (in bytes) of the intermediate buffer used by ROMIO when performing data sieving during write operations. Default is \texttt{524288} (512~Kbytes). -\item \texttt{romio\_ds\_read} -- +\item \texttt{romio\_ds\_read} -- Determines when ROMIO will choose to perform data sieving. Valid values are \texttt{enable}, \texttt{disable}, or \texttt{automatic}. Default value is \texttt{automatic}. In \texttt{automatic} mode ROMIO @@ -256,8 +256,8 @@ \subsection{Hints} \texttt{automatic}, ROMIO will use heuristics to determine when to enable the optimization. \item \texttt{romio\_cb\_write} -- Controls when collective buffering is -applied to collective write operations. Valid values are -\texttt{enable}, \texttt{disable}, and \texttt{automatic}. Default is +applied to collective write operations. Valid values are +\texttt{enable}, \texttt{disable}, and \texttt{automatic}. Default is \texttt{automatic}. See the description of \texttt{romio\_cb\_read} for an explanation of the values. \item \texttt{romio\_no\_indep\_rw} -- This hint controls when ``deferred @@ -265,7 +265,7 @@ \subsection{Hints} performing any file operation on non-aggregator nodes. The application is expected to use only collective operations. This is discussed in further detail below. -\item \texttt{cb\_config\_list} -- Provides explicit control over +\item \texttt{cb\_config\_list} -- Provides explicit control over aggregators. This is discussed in further detail below. \end{itemize} @@ -286,7 +286,7 @@ \subsection{Hints} cb_config_list => hostspec [ ',' cb_config_list ] hostspec => hostname [ ':' maxprocesses ] hostname => - | '*' + | '*' maxprocesses => | '*' \end{verbatim} @@ -353,13 +353,13 @@ \subsection{Hints} that they are listed in \texttt{cb\_config\_list}. The following hint controls the deferred open feature of romio and are also -applicable to all file system types: +applicable to all file system types: \begin{itemize} -\item \texttt{romio\_no\_indep\_rw} -- If the application plans on performing only +\item \texttt{romio\_no\_indep\_rw} -- If the application plans on performing only collecitve operations and this hint is set to ``true'', then ROMIO can have just the aggregators open a file. The \texttt{cb\_config\_list} and \texttt{cb\_nodes} hints can be given to further control which nodes are - aggregators. + aggregators. \end{itemize} For PVFS, PIOFS, and PFS: @@ -414,7 +414,7 @@ \subsubsection{Hints for PVFS (v1)} \subsubsection{Hints for PVFS (v2)} \label{sec:hints_pvfs} -The PVFS v2 file system has many tuning parameters. +The PVFS v2 file system has many tuning parameters. \begin{itemize} \item dtype i/o \end{itemize} @@ -450,7 +450,7 @@ \subsubsection{Hints for Lustre} collective write performance for some kinds of workloads. So, to avoid this, we define the \texttt{romio\_lustre\_ds\_in\_coll} hint to disable the read-modify-write step in collective I/O. This optimization is distinct from the one in -independent I/O (controlled by \texttt{romio\_ds\_read} and +independent I/O (controlled by \texttt{romio\_ds\_read} and \texttt{romio\_ds\_write}). \end{itemize} @@ -466,7 +466,7 @@ \subsubsection{Hints for PANFS (Panasas)} \begin{itemize} \item \texttt{panfs\_layout\_type} Specifies the layout of a file: 2 = RAID0 -3 = RAID5 Parity Stripes +3 = RAID5 Parity Stripes \item \texttt{panfs\_layout\_stripe\_unit} The size of the stripe unit in bytes @@ -484,7 +484,7 @@ \subsubsection{Hints for PANFS (Panasas)} \item \texttt{panfs\_layout\_visit\_policy} If the layout type is RAID5 Parity Stripes, the policy used to determine the parity stripe a given file offset is -written to: 1 = Round Robin +written to: 1 = Round Robin \end{itemize} PanFS supports the ``concurrent write'' (CW) mode, where groups of @@ -508,7 +508,7 @@ \subsubsection{Hints for PANFS (Panasas)} Below is an example PanFS layout using the following parameters: \begin{verbatim} - + - panfs_layout_type = 3 - panfs_layout_total_num_comps = 100 - panfs_layout_parity_stripe_width = 10 @@ -557,7 +557,7 @@ \subsubsection{Systemwide Hints} ROMIO will look for these hints in the file \texttt{/etc/romio-hints}. A user can set the environment variable \texttt{ROMIO\_HINTS} to the name of a file -which ROMIO will use instead. +which ROMIO will use instead. \subsection{Using ROMIO on NFS} @@ -578,17 +578,17 @@ \subsection{Using ROMIO on NFS} The following are some instructions we received from Ian Wells of HP for setting the {\tt noac} option on NFS. We have not tried them -ourselves. We are including them here because you may find +ourselves. We are including them here because you may find them useful. Note that some of the steps may be specific to HP systems, and you may need root permission to execute some of the -commands. +commands. -\begin{verbatim} +\begin{verbatim} >1. first confirm you are running nfs version 3 > >rpcnfo -p `hostname` | grep nfs > - >ie + >ie > goedel >rpcinfo -p goedel | grep nfs > 100003 2 udp 2049 nfs > 100003 3 udp 2049 nfs @@ -600,11 +600,11 @@ \subsection{Using ROMIO on NFS} > Here is an example of a correct fstab entry for /epm1: > > ie grep epm1 /etc/fstab - > + > > ROOOOT 11>grep epm1 /etc/fstab > gershwin:/epm1 /rmt/gershwin/epm1 nfs bg,intr,noac 0 0 > - > if the noac option is not present, add it + > if the noac option is not present, add it > and then remount this directory > on each of the machines that will be used to share MPIO files > @@ -615,7 +615,7 @@ \subsection{Using ROMIO on NFS} > >3. Confirm that the directory is mounted noac: > - >ROOOOT >grep gershwin /etc/mnttab + >ROOOOT >grep gershwin /etc/mnttab >gershwin:/epm1 /rmt/gershwin/epm1 nfs >noac,acregmin=0,acregmax=0,acdirmin=0,acdirmax=0 0 0 899911504 \end{verbatim} @@ -626,7 +626,7 @@ \subsubsection{ROMIO, NFS, and Synchronization} the disk before replying that an operation is complete. This means that the actual I/O cost on the server side cannot be hidden with caching, etc. when this option is selected. - + In the ``async'' mode the server can get the data into a buffer (and perhaps put it in the write queue; this depends on the implementation) and reply right away. Obviously if the server were to go down after the @@ -706,7 +706,7 @@ \section{Installation Instructions} Since ROMIO is included in MPICH, LAM, HP MPI, SGI MPI, and NEC MPI, you don't need to install it separately if you are using any of these MPI implementations. If you are using some other MPI, you -can configure and build ROMIO as follows: +can configure and build ROMIO as follows: Untar the tar file as \begin{verbatim} @@ -731,12 +731,12 @@ \section{Installation Instructions} filename}''. The {\tt configure} script by default configures ROMIO for the file -systems most likely +systems most likely to be used on the given machine. If you wish, you can explicitly specify the file systems by using the ``{\tt -file\_system}'' option to configure. Multiple file systems can be specified by using `+' as a separator, e.g., \\ \hspace*{.4in} {\tt ./configure -file\_system=xfs+nfs} \\ -For the entire list of options to configure, do\\ +For the entire list of options to configure, do\\ \hspace*{.4in} {\tt ./configure -h | more} \\ After building a specific version, you can install it in a particular directory with \\ @@ -773,7 +773,7 @@ \subsection{Configuring for Linux and Large Files } compiled with a different size of {\tt off\_t}). The latter exposes the gnu libc functions open64(), write64(), read64(), etc. ROMIO does not make use of the 64 bit system calls directly at this time, but we -add this flag for good measure. +add this flag for good measure. If your linux system is relatively new, there is an excellent chance it is running kernel 2.4.0 or newer and glibc-2.2.0 or newer. Add the @@ -804,7 +804,7 @@ \section{Testing ROMIO} \section{Compiling and Running MPI-IO Programs} If ROMIO is not already included in the MPI implementation, you need to include the file {\tt mpio.h} for C or {\tt mpiof.h} for Fortran in -your MPI-IO program. +your MPI-IO program. Note that on HP machines running HPUX and on NEC SX-4, you need to compile Fortran programs with {\tt mpifort}, because {\tt mpif77} does @@ -891,10 +891,10 @@ \section{Usage Tips} \hspace*{.4in} {\tt setenv MPI\_TYPE\_MAX 65536}\\ Use a larger number if you still get the error message. \item If a Fortran program uses a file handle created using ROMIO's C -interface, or vice versa, you must use the functions {\tt MPI\_File\_c2f} +interface, or vice versa, you must use the functions {\tt MPI\_File\_c2f} or {\tt MPI\_File\_f2c} (see \S~4.12.4 in~\cite{mpi97a}). Such a situation occurs, for example, if a Fortran program uses an I/O -library written in C +library written in C with MPI-IO calls. Similar functions {\tt MPIO\_Request\_f2c} and {\tt MPIO\_Request\_c2f} are also provided. \item For Fortran programs on the Intel Paragon, you may need @@ -902,7 +902,7 @@ \section{Usage Tips} statement, e.g., \\ \hspace*{.4in} {\tt include '/usr/local/mpich/include/mpif.h'}\\ instead of \\ -\hspace*{.4in} {\tt include 'mpif.h'}\\ +\hspace*{.4in} {\tt include 'mpif.h'}\\ This is because the {\tt -I} option to the Paragon Fortran compiler {\tt if77} doesn't work correctly. It always looks in the default directories first and, @@ -1007,15 +1007,15 @@ \subsection{Major Changes in Version 1.0.2} components of the MPI I/O chapter not yet implemented are file interoperability and error handling. -\item Added support for using ``direct I/O'' on SGI's XFS file system. +\item Added support for using ``direct I/O'' on SGI's XFS file system. Direct I/O is an optional feature of XFS in which data is moved - directly between the user's buffer and the storage devices, bypassing - the file-system cache. This can improve performance significantly on + directly between the user's buffer and the storage devices, bypassing + the file-system cache. This can improve performance significantly on systems with high disk bandwidth. Without high disk bandwidth, regular I/O (that uses the file-system cache) perfoms better. ROMIO, therefore, does not use direct I/O by default. The user can turn on direct I/O (separately for reading and writing) either by - using environment variables or by using MPI's hints mechanism (info). + using environment variables or by using MPI's hints mechanism (info). To use the environment-variables method, do \begin{verbatim} setenv MPIO_DIRECT_READ TRUE @@ -1032,7 +1032,7 @@ \subsection{Major Changes in Version 1.0.2} process in the MPI job. This is not guaranteed by the MPI Standard, but it works with SGI's MPI and the {\tt ch\_shmem} device of MPICH. -\item Added support (new ADIO device, {\tt ad\_pvfs}) for the PVFS parallel +\item Added support (new ADIO device, {\tt ad\_pvfs}) for the PVFS parallel file system for Linux clusters, developed at Clemson University (see {\tt http://www.parl.clemson.edu/pvfs}). To use it, you must first install PVFS and then when configuring ROMIO, specify @@ -1048,13 +1048,13 @@ \subsection{Major Changes in Version 1.0.2} \item Uses weak symbols (where available) for building the profiling version, i.e., the PMPI routines. As a result, the size of the library is reduced - considerably. + considerably. \item The Makefiles use {\em virtual paths} if supported by the make utility. GNU {\tt make} supports it, for example. This feature allows you to untar the distribution in some directory, say a slow NFS directory, - and compile the library (create the .o files) in another + and compile the library (create the .o files) in another directory, say on a faster local disk. For example, if the tar file has been untarred in an NFS directory called {\tt /home/thakur/romio}, one can compile it in a different directory, say {\tt /tmp/thakur}, as @@ -1067,7 +1067,7 @@ \subsection{Major Changes in Version 1.0.2} The .o files will be created in {\tt /tmp/thakur}; the library will be created in\newline {\tt /home/thakur/romio/lib/\$ARCH/libmpio.a}. This method works only if the {\tt make} utility supports {\em - virtual paths}. + virtual paths}. If the default {\tt make} utility does not, you can install GNU {\tt make} which does, and specify it to {\tt configure} as \begin{verbatim} @@ -1078,8 +1078,8 @@ \subsection{Major Changes in Version 1.0.2} \item This version is included in MPICH 1.2.0. If you are using MPICH, you need not download ROMIO separately; it gets built as part of MPICH. - The previous version of ROMIO is included in LAM, HP MPI, SGI MPI, and - NEC MPI. NEC has also implemented the MPI-IO functions missing + The previous version of ROMIO is included in LAM, HP MPI, SGI MPI, and + NEC MPI. NEC has also implemented the MPI-IO functions missing in ROMIO, and therefore NEC MPI has a complete implementation of MPI-IO. \end{itemize} @@ -1095,7 +1095,7 @@ \subsection{Major Changes in Version 1.0.1} \item New devices {\tt ad\_hfs} for HP HFS file system and {\tt ad\_xfs} for SGI XFS file system. -\item Users no longer need to prefix the filename with the type of +\item Users no longer need to prefix the filename with the type of file system; ROMIO determines the file-system type on its own. \item Added support for 64-bit file sizes on IBM PIOFS, SGI XFS, @@ -1104,11 +1104,11 @@ \subsection{Major Changes in Version 1.0.1} \item {\tt MPI\_Offset} is an 8-byte integer on machines that support 8-byte integers. It is of type {\tt long long} in C and {\tt integer*8} in Fortran. With a Fortran 90 compiler, you can use either -{\tt integer*8} or {\tt integer(kind=MPI\_OFFSET\_KIND)}. -If you {\tt printf} an {\tt MPI\_Offset} in C, remember to use {\tt \%lld} -or {\tt \%ld} as required by your compiler. (See what is used in the test +{\tt integer*8} or {\tt integer(kind=MPI\_OFFSET\_KIND)}. +If you {\tt printf} an {\tt MPI\_Offset} in C, remember to use {\tt \%lld} +or {\tt \%ld} as required by your compiler. (See what is used in the test program {\tt romio/test/misc.c}). -On some machines, ROMIO detects at configure time that {\tt long long} is +On some machines, ROMIO detects at configure time that {\tt long long} is either not supported by the C compiler or it doesn't work properly. In such cases, configure sets {\tt MPI\_Offset} to {\tt long} in C and {\tt integer} in Fortran. This happens on Intel Paragon, Sun4, and FreeBSD. @@ -1124,7 +1124,7 @@ \subsection{Major Changes in Version 1.0.1} \texttt{striping\_unit} (on PFS and PIOFS), \texttt{start\_iodevice} (on PFS and PIOFS), and \texttt{pfs\_svr\_buf} (on PFS only). - + \end{itemize} \newpage @@ -1134,7 +1134,7 @@ \subsection{Major Changes in Version 1.0.1} %\bibliography{/homes/thakur/tex/bib/papers,/homes/robl/projects/papers/pario} % this is the pared-down one containing only those references used in % users-guide.tex -% to regenerate, uncomment the full databases above, then run +% to regenerate, uncomment the full databases above, then run % ~gropp/bin/citetags users-guide.tex | sort | uniq | \ % ~gropp/bin/citefind - /homes/thakur/tex/bib/papers.bib \ % /homes/robl/projects/papers/pario diff --git a/ompi/mca/io/romio314/romio/include/mpio.h.in b/ompi/mca/io/romio314/romio/include/mpio.h.in index e6c066e678f..0e3de89b9f4 100644 --- a/ompi/mca/io/romio314/romio/include/mpio.h.in +++ b/ompi/mca/io/romio314/romio/include/mpio.h.in @@ -1,7 +1,7 @@ /* -*- Mode: C; c-basic-offset:4 ; -*- */ -/* +/* * - * Copyright (C) 1997 University of Chicago. + * Copyright (C) 1997 University of Chicago. * See COPYRIGHT notice in top-level directory. */ @@ -36,7 +36,7 @@ typedef struct ADIOI_FileD *MPI_File; @DEFINE_HAVE_MPI_GREQUEST@ #ifndef HAVE_MPI_GREQUEST -typedef struct ADIOI_RequestD *MPIO_Request; +typedef struct ADIOI_RequestD *MPIO_Request; #else #define MPIO_Request MPI_Request #define MPIO_USES_MPI_REQUEST @@ -54,7 +54,7 @@ typedef struct ADIOI_RequestD *MPIO_Request; this definition. */ #ifndef HAVE_MPI_DATAREP_FUNCTIONS #define HAVE_MPI_DATAREP_FUNCTIONS -typedef int (MPI_Datarep_conversion_function)(void *, MPI_Datatype, int, +typedef int (MPI_Datarep_conversion_function)(void *, MPI_Datatype, int, void *, MPI_Offset, void *); typedef int (MPI_Datarep_extent_function)(MPI_Datatype datatype, MPI_Aint *, void *); @@ -65,7 +65,7 @@ typedef int (MPI_Datarep_extent_function)(MPI_Datatype datatype, MPI_Aint *, @NEEDS_MPI_FINT@ #endif #ifdef NEEDS_MPI_FINT -typedef int MPI_Fint; +typedef int MPI_Fint; #endif #ifndef HAVE_MPI_INFO @@ -81,7 +81,7 @@ typedef int MPI_Fint; #define MPI_MODE_RDONLY 2 /* ADIO_RDONLY */ #define MPI_MODE_RDWR 8 /* ADIO_RDWR */ #define MPI_MODE_WRONLY 4 /* ADIO_WRONLY */ -#define MPI_MODE_CREATE 1 /* ADIO_CREATE */ +#define MPI_MODE_CREATE 1 /* ADIO_CREATE */ #define MPI_MODE_EXCL 64 /* ADIO_EXCL */ #define MPI_MODE_DELETE_ON_CLOSE 16 /* ADIO_DELETE_ON_CLOSE */ #define MPI_MODE_UNIQUE_OPEN 32 /* ADIO_UNIQUE_OPEN */ @@ -123,7 +123,7 @@ typedef int MPI_Fint; /* MPI-IO function prototypes */ -/* The compiler must support ANSI C style prototypes, otherwise +/* The compiler must support ANSI C style prototypes, otherwise long long constants (e.g. 0) may get passed as ints. */ #ifndef HAVE_PRAGMA_HP_SEC_DEF @@ -161,8 +161,8 @@ int MPI_File_write_at_all(MPI_File fh, MPI_Offset offset, const void *buf, int c MPICH_ATTR_POINTER_WITH_TYPE_TAG(3,5); /* nonblocking calls currently use MPIO_Request, because generalized - requests not yet implemented. For the same reason, MPIO_Test and - MPIO_Wait are used to test and wait on nonblocking I/O requests */ + requests not yet implemented. For the same reason, MPIO_Test and + MPIO_Wait are used to test and wait on nonblocking I/O requests */ int MPI_File_iread_at(MPI_File fh, MPI_Offset offset, void *buf, int count, MPI_Datatype datatype, MPIO_Request *request) MPICH_ATTR_POINTER_WITH_TYPE_TAG(3,5); int MPI_File_iwrite_at(MPI_File fh, MPI_Offset offset, const void *buf, int count, @@ -180,8 +180,8 @@ int MPI_File_write_all(MPI_File fh, const void *buf, int count, MPI_Datatype dat MPI_Status *status) MPICH_ATTR_POINTER_WITH_TYPE_TAG(2,4); /* nonblocking calls currently use MPIO_Request, because generalized - requests not yet implemented. For the same reason, MPIO_Test and - MPIO_Wait are used to test and wait on nonblocking I/O requests */ + requests not yet implemented. For the same reason, MPIO_Test and + MPIO_Wait are used to test and wait on nonblocking I/O requests */ int MPI_File_iread(MPI_File fh, void *buf, int count, MPI_Datatype datatype, MPIO_Request *request) MPICH_ATTR_POINTER_WITH_TYPE_TAG(2,4); @@ -357,9 +357,9 @@ int PMPI_File_set_info(MPI_File, MPI_Info); int PMPI_File_get_info(MPI_File, MPI_Info *); /* Section 9.3 */ -int PMPI_File_set_view(MPI_File, MPI_Offset, +int PMPI_File_set_view(MPI_File, MPI_Offset, MPI_Datatype, MPI_Datatype, const char *, MPI_Info); -int PMPI_File_get_view(MPI_File, MPI_Offset *, +int PMPI_File_get_view(MPI_File, MPI_Offset *, MPI_Datatype *, MPI_Datatype *, char *); /* Section 9.4.2 */ @@ -377,8 +377,8 @@ int PMPI_File_write_at_all(MPI_File, MPI_Offset, const void *, MPICH_ATTR_POINTER_WITH_TYPE_TAG(3,5); /* nonblocking calls currently use MPIO_Request, because generalized - requests not yet implemented. For the same reason, MPIO_Test and - MPIO_Wait are used to test and wait on nonblocking I/O requests */ + requests not yet implemented. For the same reason, MPIO_Test and + MPIO_Wait are used to test and wait on nonblocking I/O requests */ int PMPI_File_iread_at(MPI_File, MPI_Offset, void *, int, MPI_Datatype, MPIO_Request *) @@ -398,8 +398,8 @@ int PMPI_File_write_all(MPI_File, const void *, int, MPI_Datatype, MPI_Status *) MPICH_ATTR_POINTER_WITH_TYPE_TAG(2,4); /* nonblocking calls currently use MPIO_Request, because generalized - requests not yet implemented. For the same reason, MPIO_Test and - MPIO_Wait are used to test and wait on nonblocking I/O requests */ + requests not yet implemented. For the same reason, MPIO_Test and + MPIO_Wait are used to test and wait on nonblocking I/O requests */ int PMPI_File_iread(MPI_File, void *, int, MPI_Datatype, MPIO_Request *) MPICH_ATTR_POINTER_WITH_TYPE_TAG(2,4); @@ -415,7 +415,7 @@ int PMPI_File_read_shared(MPI_File, void *, int, MPI_Datatype, MPI_Status *) MPICH_ATTR_POINTER_WITH_TYPE_TAG(2,4); int PMPI_File_write_shared(MPI_File, const void *, int, MPI_Datatype, MPI_Status *) MPICH_ATTR_POINTER_WITH_TYPE_TAG(2,4); -int PMPI_File_iread_shared(MPI_File, void *, int, +int PMPI_File_iread_shared(MPI_File, void *, int, MPI_Datatype, MPIO_Request *) MPICH_ATTR_POINTER_WITH_TYPE_TAG(2,4); int PMPI_File_iwrite_shared(MPI_File, const void *, int, @@ -474,11 +474,11 @@ int PMPI_File_get_errhandler( MPI_File, MPI_Errhandler * ); #ifndef HAVE_MPI_DARRAY_SUBARRAY /* Section 4.14.4 */ -int PMPI_Type_create_subarray(int, int *, int *, int *, int, +int PMPI_Type_create_subarray(int, int *, int *, int *, int, MPI_Datatype, MPI_Datatype *); /* Section 4.14.5 */ -int PMPI_Type_create_darray(int, int, int, int *, int *, +int PMPI_Type_create_darray(int, int, int, int *, int *, int *, int *, int, MPI_Datatype, MPI_Datatype *); #endif diff --git a/ompi/mca/io/romio314/romio/include/mpiof.h.in b/ompi/mca/io/romio314/romio/include/mpiof.h.in index fa945807bdc..cfb26c5d355 100644 --- a/ompi/mca/io/romio314/romio/include/mpiof.h.in +++ b/ompi/mca/io/romio314/romio/include/mpiof.h.in @@ -1,9 +1,9 @@ -! -! Copyright (C) 1997 University of Chicago. +! +! Copyright (C) 1997 University of Chicago. ! See COPYRIGHT notice in top-level directory. ! -! -! user include file for Fortran MPI-IO programs +! +! user include file for Fortran MPI-IO programs ! INTEGER MPI_MODE_RDONLY, MPI_MODE_RDWR, MPI_MODE_WRONLY INTEGER MPI_MODE_DELETE_ON_CLOSE, MPI_MODE_UNIQUE_OPEN diff --git a/ompi/mca/io/romio314/romio/mpi-io/close.c b/ompi/mca/io/romio314/romio/mpi-io/close.c index 160b6615d6e..cfad507369a 100644 --- a/ompi/mca/io/romio314/romio/mpi-io/close.c +++ b/ompi/mca/io/romio314/romio/mpi-io/close.c @@ -1,7 +1,7 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */ -/* +/* * - * Copyright (C) 1997 University of Chicago. + * Copyright (C) 1997 University of Chicago. * See COPYRIGHT notice in top-level directory. */ diff --git a/ompi/mca/io/romio314/romio/mpi-io/delete.c b/ompi/mca/io/romio314/romio/mpi-io/delete.c index 7335136d835..caec75bd835 100644 --- a/ompi/mca/io/romio314/romio/mpi-io/delete.c +++ b/ompi/mca/io/romio314/romio/mpi-io/delete.c @@ -1,7 +1,7 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */ -/* +/* * - * Copyright (C) 1997 University of Chicago. + * Copyright (C) 1997 University of Chicago. * See COPYRIGHT notice in top-level directory. */ @@ -41,7 +41,7 @@ int MPI_File_delete(ROMIO_CONST char *filename, MPI_Info info) ADIOI_Fns *fsops; #ifdef MPI_hpux int fl_xmpi; - + HPMP_IO_START(fl_xmpi, BLKMPIFILEDELETE, TRDTBLOCK, MPI_FILE_NULL, MPI_DATATYPE_NULL, -1); #endif /* MPI_hpux */ @@ -54,14 +54,14 @@ int MPI_File_delete(ROMIO_CONST char *filename, MPI_Info info) if (error_code != MPI_SUCCESS) goto fn_exit; /* resolve file system type from file name; this is a collective call */ - ADIO_ResolveFileType(MPI_COMM_SELF, filename, &file_system, &fsops, + ADIO_ResolveFileType(MPI_COMM_SELF, filename, &file_system, &fsops, &error_code); /* --BEGIN ERROR HANDLING-- */ if (error_code != MPI_SUCCESS) { /* ADIO_ResolveFileType() will print as informative a message as it - * possibly can or call MPIR_Err_setmsg. We just need to propagate + * possibly can or call MPIR_Err_setmsg. We just need to propagate * the error up. In the PRINT_ERR_MSG case MPI_Abort has already * been called as well, so we probably didn't even make it this far. */ @@ -84,7 +84,7 @@ int MPI_File_delete(ROMIO_CONST char *filename, MPI_Info info) if (error_code != MPI_SUCCESS) error_code = MPIO_Err_return_file(MPI_FILE_NULL, error_code); /* --END ERROR HANDLING-- */ - + #ifdef MPI_hpux HPMP_IO_END(fl_xmpi, MPI_FILE_NULL, MPI_DATATYPE_NULL, -1); #endif /* MPI_hpux */ diff --git a/ompi/mca/io/romio314/romio/mpi-io/file_c2f.c b/ompi/mca/io/romio314/romio/mpi-io/file_c2f.c index 4eaae9fafa0..4d2032bc8f5 100644 --- a/ompi/mca/io/romio314/romio/mpi-io/file_c2f.c +++ b/ompi/mca/io/romio314/romio/mpi-io/file_c2f.c @@ -1,7 +1,7 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */ -/* +/* * - * Copyright (C) 1997 University of Chicago. + * Copyright (C) 1997 University of Chicago. * See COPYRIGHT notice in top-level directory. */ diff --git a/ompi/mca/io/romio314/romio/mpi-io/file_f2c.c b/ompi/mca/io/romio314/romio/mpi-io/file_f2c.c index b6b4896b389..fded33df357 100644 --- a/ompi/mca/io/romio314/romio/mpi-io/file_f2c.c +++ b/ompi/mca/io/romio314/romio/mpi-io/file_f2c.c @@ -1,7 +1,7 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */ -/* +/* * - * Copyright (C) 1997 University of Chicago. + * Copyright (C) 1997 University of Chicago. * See COPYRIGHT notice in top-level directory. */ diff --git a/ompi/mca/io/romio314/romio/mpi-io/fortran/closef.c b/ompi/mca/io/romio314/romio/mpi-io/fortran/closef.c index aebdf56a611..514d64c1285 100644 --- a/ompi/mca/io/romio314/romio/mpi-io/fortran/closef.c +++ b/ompi/mca/io/romio314/romio/mpi-io/fortran/closef.c @@ -1,7 +1,7 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */ -/* +/* * - * Copyright (C) 1997 University of Chicago. + * Copyright (C) 1997 University of Chicago. * See COPYRIGHT notice in top-level directory. */ diff --git a/ompi/mca/io/romio314/romio/mpi-io/fortran/deletef.c b/ompi/mca/io/romio314/romio/mpi-io/fortran/deletef.c index 660129f7605..d44092d4db1 100644 --- a/ompi/mca/io/romio314/romio/mpi-io/fortran/deletef.c +++ b/ompi/mca/io/romio314/romio/mpi-io/fortran/deletef.c @@ -1,7 +1,7 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */ -/* +/* * - * Copyright (C) 1997 University of Chicago. + * Copyright (C) 1997 University of Chicago. * See COPYRIGHT notice in top-level directory. */ diff --git a/ompi/mca/io/romio314/romio/mpi-io/fortran/fsyncf.c b/ompi/mca/io/romio314/romio/mpi-io/fortran/fsyncf.c index b52245256b8..52e905aec99 100644 --- a/ompi/mca/io/romio314/romio/mpi-io/fortran/fsyncf.c +++ b/ompi/mca/io/romio314/romio/mpi-io/fortran/fsyncf.c @@ -1,7 +1,7 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */ -/* +/* * - * Copyright (C) 1997 University of Chicago. + * Copyright (C) 1997 University of Chicago. * See COPYRIGHT notice in top-level directory. */ @@ -95,7 +95,7 @@ FORTRAN_API void FORT_CALL mpi_file_sync_(MPI_Fint *fh, MPI_Fint *ierr ); FORTRAN_API void FORT_CALL mpi_file_sync_(MPI_Fint *fh, MPI_Fint *ierr ) { MPI_File fh_c; - + fh_c = MPI_File_f2c(*fh); *ierr = MPI_File_sync(fh_c); } diff --git a/ompi/mca/io/romio314/romio/mpi-io/fortran/get_amodef.c b/ompi/mca/io/romio314/romio/mpi-io/fortran/get_amodef.c index 2427f068310..81e77d534a1 100644 --- a/ompi/mca/io/romio314/romio/mpi-io/fortran/get_amodef.c +++ b/ompi/mca/io/romio314/romio/mpi-io/fortran/get_amodef.c @@ -1,7 +1,7 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */ -/* +/* * - * Copyright (C) 1997 University of Chicago. + * Copyright (C) 1997 University of Chicago. * See COPYRIGHT notice in top-level directory. */ @@ -95,7 +95,7 @@ FORTRAN_API void FORT_CALL mpi_file_get_amode_(MPI_Fint *fh, MPI_Fint *amode, MP FORTRAN_API void FORT_CALL mpi_file_get_amode_(MPI_Fint *fh, MPI_Fint *amode, MPI_Fint *ierr ) { MPI_File fh_c; - + fh_c = MPI_File_f2c(*fh); *ierr = MPI_File_get_amode(fh_c, amode); } diff --git a/ompi/mca/io/romio314/romio/mpi-io/fortran/get_atomf.c b/ompi/mca/io/romio314/romio/mpi-io/fortran/get_atomf.c index 98bf557e34b..3aa79475fc0 100644 --- a/ompi/mca/io/romio314/romio/mpi-io/fortran/get_atomf.c +++ b/ompi/mca/io/romio314/romio/mpi-io/fortran/get_atomf.c @@ -1,7 +1,7 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */ -/* +/* * - * Copyright (C) 1997 University of Chicago. + * Copyright (C) 1997 University of Chicago. * See COPYRIGHT notice in top-level directory. */ @@ -95,7 +95,7 @@ FORTRAN_API void FORT_CALL mpi_file_get_atomicity_(MPI_Fint *fh, MPI_Fint *flag, FORTRAN_API void FORT_CALL mpi_file_get_atomicity_(MPI_Fint *fh, MPI_Fint *flag, MPI_Fint *ierr ) { MPI_File fh_c; - + fh_c = MPI_File_f2c(*fh); *ierr = MPI_File_get_atomicity(fh_c, flag); } diff --git a/ompi/mca/io/romio314/romio/mpi-io/fortran/get_bytofff.c b/ompi/mca/io/romio314/romio/mpi-io/fortran/get_bytofff.c index fb38b7be4ca..3e28c26fc48 100644 --- a/ompi/mca/io/romio314/romio/mpi-io/fortran/get_bytofff.c +++ b/ompi/mca/io/romio314/romio/mpi-io/fortran/get_bytofff.c @@ -1,7 +1,7 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */ -/* +/* * - * Copyright (C) 1997 University of Chicago. + * Copyright (C) 1997 University of Chicago. * See COPYRIGHT notice in top-level directory. */ @@ -95,7 +95,7 @@ FORTRAN_API void FORT_CALL mpi_file_get_byte_offset_(MPI_Fint *fh, MPI_Offset *o FORTRAN_API void FORT_CALL mpi_file_get_byte_offset_(MPI_Fint *fh,MPI_Offset *offset, MPI_Offset *disp, MPI_Fint *ierr ) { MPI_File fh_c; - + fh_c = MPI_File_f2c(*fh); *ierr = MPI_File_get_byte_offset(fh_c,*offset,disp); } diff --git a/ompi/mca/io/romio314/romio/mpi-io/fortran/get_errhf.c b/ompi/mca/io/romio314/romio/mpi-io/fortran/get_errhf.c index 96901ad7766..4ec1936c2e9 100644 --- a/ompi/mca/io/romio314/romio/mpi-io/fortran/get_errhf.c +++ b/ompi/mca/io/romio314/romio/mpi-io/fortran/get_errhf.c @@ -1,7 +1,7 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */ -/* +/* * - * Copyright (C) 1997 University of Chicago. + * Copyright (C) 1997 University of Chicago. * See COPYRIGHT notice in top-level directory. */ @@ -96,7 +96,7 @@ FORTRAN_API void FORT_CALL mpi_file_get_errhandler_(MPI_Fint *fh, MPI_Fint *err_ { MPI_File fh_c; MPI_Errhandler err_handler_c; - + fh_c = MPI_File_f2c(*fh); *ierr = MPI_File_get_errhandler(fh_c, &err_handler_c); *err_handler = MPI_Errhandler_c2f(err_handler_c); diff --git a/ompi/mca/io/romio314/romio/mpi-io/fortran/get_extentf.c b/ompi/mca/io/romio314/romio/mpi-io/fortran/get_extentf.c index 9a11d3c946f..363118b773d 100644 --- a/ompi/mca/io/romio314/romio/mpi-io/fortran/get_extentf.c +++ b/ompi/mca/io/romio314/romio/mpi-io/fortran/get_extentf.c @@ -1,7 +1,7 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */ -/* +/* * - * Copyright (C) 1997 University of Chicago. + * Copyright (C) 1997 University of Chicago. * See COPYRIGHT notice in top-level directory. */ @@ -100,7 +100,7 @@ void mpi_file_get_type_extent_(MPI_Fint *fh,MPI_Fint *datatype, MPI_File fh_c; MPI_Datatype datatype_c; MPI_Aint extent_c; - + fh_c = MPI_File_f2c(*fh); datatype_c = MPI_Type_f2c(*datatype); @@ -118,7 +118,7 @@ FORTRAN_API void FORT_CALL mpi_file_get_type_extent_(MPI_Fint *fh,MPI_Datatype * { MPI_File fh_c; MPI_Aint extent_c; - + fh_c = MPI_File_f2c(*fh); *ierr = MPI_File_get_type_extent(fh_c,*datatype, &extent_c); *(MPI_Aint*)extent = extent_c; /* Have to assume it's really an MPI_Aint?*/ diff --git a/ompi/mca/io/romio314/romio/mpi-io/fortran/get_groupf.c b/ompi/mca/io/romio314/romio/mpi-io/fortran/get_groupf.c index bb8c9a9cba7..abb27073847 100644 --- a/ompi/mca/io/romio314/romio/mpi-io/fortran/get_groupf.c +++ b/ompi/mca/io/romio314/romio/mpi-io/fortran/get_groupf.c @@ -1,7 +1,7 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */ -/* +/* * - * Copyright (C) 1997 University of Chicago. + * Copyright (C) 1997 University of Chicago. * See COPYRIGHT notice in top-level directory. */ @@ -109,7 +109,7 @@ FORTRAN_API void FORT_CALL mpi_file_get_group_(MPI_Fint *fh,MPI_Group *group, MP FORTRAN_API void FORT_CALL mpi_file_get_group_(MPI_Fint *fh,MPI_Group *group, MPI_Fint *ierr ) { MPI_File fh_c; - + fh_c = MPI_File_f2c(*fh); *ierr = MPI_File_get_group(fh_c, group); } diff --git a/ompi/mca/io/romio314/romio/mpi-io/fortran/get_infof.c b/ompi/mca/io/romio314/romio/mpi-io/fortran/get_infof.c index eb2ba551a45..3771727e697 100644 --- a/ompi/mca/io/romio314/romio/mpi-io/fortran/get_infof.c +++ b/ompi/mca/io/romio314/romio/mpi-io/fortran/get_infof.c @@ -1,7 +1,7 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */ -/* +/* * - * Copyright (C) 1997 University of Chicago. + * Copyright (C) 1997 University of Chicago. * See COPYRIGHT notice in top-level directory. */ @@ -96,7 +96,7 @@ FORTRAN_API void FORT_CALL mpi_file_get_info_(MPI_Fint *fh, MPI_Fint *info_used, { MPI_File fh_c; MPI_Info info_used_c; - + fh_c = MPI_File_f2c(*fh); *ierr = MPI_File_get_info(fh_c, &info_used_c); diff --git a/ompi/mca/io/romio314/romio/mpi-io/fortran/get_posn_shf.c b/ompi/mca/io/romio314/romio/mpi-io/fortran/get_posn_shf.c index 8f5bff5c06e..a6a1bbc3e0f 100644 --- a/ompi/mca/io/romio314/romio/mpi-io/fortran/get_posn_shf.c +++ b/ompi/mca/io/romio314/romio/mpi-io/fortran/get_posn_shf.c @@ -1,7 +1,7 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */ -/* +/* * - * Copyright (C) 1997 University of Chicago. + * Copyright (C) 1997 University of Chicago. * See COPYRIGHT notice in top-level directory. */ @@ -90,13 +90,13 @@ extern FORTRAN_API void FORT_CALL mpi_file_get_position_shared_( MPI_Fint *, MPI #endif /* Prototype to keep compiler happy */ -FORTRAN_API void FORT_CALL mpi_file_get_position_shared_(MPI_Fint *fh, MPI_Offset *offset, +FORTRAN_API void FORT_CALL mpi_file_get_position_shared_(MPI_Fint *fh, MPI_Offset *offset, MPI_Fint *ierr ); FORTRAN_API void FORT_CALL mpi_file_get_position_shared_(MPI_Fint *fh, MPI_Offset *offset, MPI_Fint *ierr ) { MPI_File fh_c; - + fh_c = MPI_File_f2c(*fh); *ierr = MPI_File_get_position_shared(fh_c, offset); } diff --git a/ompi/mca/io/romio314/romio/mpi-io/fortran/get_posnf.c b/ompi/mca/io/romio314/romio/mpi-io/fortran/get_posnf.c index e21f37674a7..b97190cfcbd 100644 --- a/ompi/mca/io/romio314/romio/mpi-io/fortran/get_posnf.c +++ b/ompi/mca/io/romio314/romio/mpi-io/fortran/get_posnf.c @@ -1,7 +1,7 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */ -/* +/* * - * Copyright (C) 1997 University of Chicago. + * Copyright (C) 1997 University of Chicago. * See COPYRIGHT notice in top-level directory. */ @@ -95,7 +95,7 @@ FORTRAN_API void FORT_CALL mpi_file_get_position_(MPI_Fint *fh, MPI_Offset *offs FORTRAN_API void FORT_CALL mpi_file_get_position_(MPI_Fint *fh, MPI_Offset *offset, MPI_Fint *ierr ) { MPI_File fh_c; - + fh_c = MPI_File_f2c(*fh); *ierr = MPI_File_get_position(fh_c, offset); } diff --git a/ompi/mca/io/romio314/romio/mpi-io/fortran/get_sizef.c b/ompi/mca/io/romio314/romio/mpi-io/fortran/get_sizef.c index 71ff17bb562..343704bb577 100644 --- a/ompi/mca/io/romio314/romio/mpi-io/fortran/get_sizef.c +++ b/ompi/mca/io/romio314/romio/mpi-io/fortran/get_sizef.c @@ -1,7 +1,7 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */ -/* +/* * - * Copyright (C) 1997 University of Chicago. + * Copyright (C) 1997 University of Chicago. * See COPYRIGHT notice in top-level directory. */ @@ -95,7 +95,7 @@ FORTRAN_API void FORT_CALL mpi_file_get_size_(MPI_Fint *fh, MPI_Offset *size, MP FORTRAN_API void FORT_CALL mpi_file_get_size_(MPI_Fint *fh, MPI_Offset *size, MPI_Fint *ierr ) { MPI_File fh_c; - + fh_c = MPI_File_f2c(*fh); *ierr = MPI_File_get_size(fh_c, size); } diff --git a/ompi/mca/io/romio314/romio/mpi-io/fortran/get_viewf.c b/ompi/mca/io/romio314/romio/mpi-io/fortran/get_viewf.c index 8a4bc3d7fb8..6022379bb10 100644 --- a/ompi/mca/io/romio314/romio/mpi-io/fortran/get_viewf.c +++ b/ompi/mca/io/romio314/romio/mpi-io/fortran/get_viewf.c @@ -1,7 +1,7 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */ -/* +/* * - * Copyright (C) 1997 University of Chicago. + * Copyright (C) 1997 University of Chicago. * See COPYRIGHT notice in top-level directory. */ @@ -127,7 +127,7 @@ void mpi_file_get_view_(MPI_Fint *fh,MPI_Offset *disp,MPI_Fint *etype, /* this should be flagged as an error. */ *ierr = MPI_ERR_UNKNOWN; } - + *etype = MPI_Type_c2f(etype_c); *filetype = MPI_Type_c2f(filetype_c); ADIOI_Free(tmprep); @@ -159,7 +159,7 @@ FORTRAN_API void FORT_CALL mpi_file_get_view_( MPI_Fint *fh, MPI_Offset *disp, M FPRINTF(stderr, "MPI_File_get_view: datarep is an invalid address\n"); MPI_Abort(MPI_COMM_WORLD, 1); } - + tmprep = (char *) ADIOI_Malloc((MPI_MAX_DATAREP_STRING+1) * sizeof(char)); fh_c = MPI_File_f2c(*fh); etype_c = MPI_Type_f2c(*etype); diff --git a/ompi/mca/io/romio314/romio/mpi-io/fortran/iotestf.c b/ompi/mca/io/romio314/romio/mpi-io/fortran/iotestf.c index 2a2c96c886b..346fdead50b 100644 --- a/ompi/mca/io/romio314/romio/mpi-io/fortran/iotestf.c +++ b/ompi/mca/io/romio314/romio/mpi-io/fortran/iotestf.c @@ -1,7 +1,7 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */ -/* +/* * - * Copyright (C) 1997 University of Chicago. + * Copyright (C) 1997 University of Chicago. * See COPYRIGHT notice in top-level directory. */ @@ -94,7 +94,7 @@ FORTRAN_API void FORT_CALL mpio_test_(MPI_Fint *request,MPI_Fint *flag,MPI_Statu FORTRAN_API void FORT_CALL mpio_test_(MPI_Fint *request,MPI_Fint *flag,MPI_Status *status, MPI_Fint *ierr ) { MPIO_Request req_c; - + req_c = MPIO_Request_f2c(*request); *ierr = MPIO_Test(&req_c,flag,status); *request = MPIO_Request_c2f(req_c); diff --git a/ompi/mca/io/romio314/romio/mpi-io/fortran/iowaitf.c b/ompi/mca/io/romio314/romio/mpi-io/fortran/iowaitf.c index a87ca6a6d91..c4c551914fb 100644 --- a/ompi/mca/io/romio314/romio/mpi-io/fortran/iowaitf.c +++ b/ompi/mca/io/romio314/romio/mpi-io/fortran/iowaitf.c @@ -1,7 +1,7 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */ -/* +/* * - * Copyright (C) 1997 University of Chicago. + * Copyright (C) 1997 University of Chicago. * See COPYRIGHT notice in top-level directory. */ @@ -94,7 +94,7 @@ FORTRAN_API void FORT_CALL mpio_wait_(MPI_Fint *request,MPI_Status *status, MPI_ FORTRAN_API void FORT_CALL mpio_wait_(MPI_Fint *request,MPI_Status *status, MPI_Fint *ierr ) { MPIO_Request req_c; - + req_c = MPIO_Request_f2c(*request); *ierr = MPIO_Wait(&req_c, status); *request = MPIO_Request_c2f(req_c); diff --git a/ompi/mca/io/romio314/romio/mpi-io/fortran/iread_atf.c b/ompi/mca/io/romio314/romio/mpi-io/fortran/iread_atf.c index c6a687dcee0..5d983c98a0a 100644 --- a/ompi/mca/io/romio314/romio/mpi-io/fortran/iread_atf.c +++ b/ompi/mca/io/romio314/romio/mpi-io/fortran/iread_atf.c @@ -1,7 +1,7 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */ -/* +/* * - * Copyright (C) 1997 University of Chicago. + * Copyright (C) 1997 University of Chicago. * See COPYRIGHT notice in top-level directory. */ @@ -102,7 +102,7 @@ void mpi_file_iread_at_(MPI_Fint *fh,MPI_Offset *offset,void *buf, MPI_File fh_c; MPIO_Request req_c; MPI_Datatype datatype_c; - + fh_c = MPI_File_f2c(*fh); datatype_c = MPI_Type_f2c(*datatype); @@ -122,7 +122,7 @@ FORTRAN_API void FORT_CALL mpi_file_iread_at_(MPI_Fint *fh,MPI_Offset *offset,vo { MPI_File fh_c; MPIO_Request req_c; - + fh_c = MPI_File_f2c(*fh); *ierr = MPI_File_iread_at(fh_c,*offset,buf,*count,*datatype,&req_c); *request = MPIO_Request_c2f(req_c); diff --git a/ompi/mca/io/romio314/romio/mpi-io/fortran/iread_shf.c b/ompi/mca/io/romio314/romio/mpi-io/fortran/iread_shf.c index 84dc3753c4a..9262240effe 100644 --- a/ompi/mca/io/romio314/romio/mpi-io/fortran/iread_shf.c +++ b/ompi/mca/io/romio314/romio/mpi-io/fortran/iread_shf.c @@ -1,7 +1,7 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */ -/* +/* * - * Copyright (C) 1997 University of Chicago. + * Copyright (C) 1997 University of Chicago. * See COPYRIGHT notice in top-level directory. */ @@ -100,7 +100,7 @@ void mpi_file_iread_shared_(MPI_Fint *fh,void *buf,MPI_Fint *count, MPI_File fh_c; MPIO_Request req_c; MPI_Datatype datatype_c; - + datatype_c = MPI_Type_f2c(*datatype); fh_c = MPI_File_f2c(*fh); *ierr = MPI_File_iread_shared(fh_c,buf,*count,datatype_c,&req_c); @@ -116,7 +116,7 @@ FORTRAN_API void FORT_CALL mpi_file_iread_shared_(MPI_Fint *fh,void *buf,MPI_Fin { MPI_File fh_c; MPIO_Request req_c; - + fh_c = MPI_File_f2c(*fh); *ierr = MPI_File_iread_shared(fh_c,buf,*count,*datatype,&req_c); *request = MPIO_Request_c2f(req_c); diff --git a/ompi/mca/io/romio314/romio/mpi-io/fortran/ireadf.c b/ompi/mca/io/romio314/romio/mpi-io/fortran/ireadf.c index ae9fbaf3bfd..15e05546589 100644 --- a/ompi/mca/io/romio314/romio/mpi-io/fortran/ireadf.c +++ b/ompi/mca/io/romio314/romio/mpi-io/fortran/ireadf.c @@ -1,7 +1,7 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */ -/* +/* * - * Copyright (C) 1997 University of Chicago. + * Copyright (C) 1997 University of Chicago. * See COPYRIGHT notice in top-level directory. */ @@ -100,7 +100,7 @@ void mpi_file_iread_(MPI_Fint *fh,void *buf,MPI_Fint *count, MPI_File fh_c; MPIO_Request req_c; MPI_Datatype datatype_c; - + datatype_c = MPI_Type_f2c(*datatype); fh_c = MPI_File_f2c(*fh); *ierr = MPI_File_iread(fh_c,buf,*count,datatype_c,&req_c); @@ -116,7 +116,7 @@ FORTRAN_API void FORT_CALL mpi_file_iread_(MPI_Fint *fh,void *buf,MPI_Fint *coun { MPI_File fh_c; MPIO_Request req_c; - + fh_c = MPI_File_f2c(*fh); *ierr = MPI_File_iread(fh_c,buf,*count,*datatype,&req_c); *request = MPIO_Request_c2f(req_c); diff --git a/ompi/mca/io/romio314/romio/mpi-io/fortran/iwrite_atf.c b/ompi/mca/io/romio314/romio/mpi-io/fortran/iwrite_atf.c index b326fc3d1a7..752e95938c8 100644 --- a/ompi/mca/io/romio314/romio/mpi-io/fortran/iwrite_atf.c +++ b/ompi/mca/io/romio314/romio/mpi-io/fortran/iwrite_atf.c @@ -1,7 +1,7 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */ -/* +/* * - * Copyright (C) 1997 University of Chicago. + * Copyright (C) 1997 University of Chicago. * See COPYRIGHT notice in top-level directory. */ @@ -102,7 +102,7 @@ void mpi_file_iwrite_at_(MPI_Fint *fh,MPI_Offset *offset,void *buf, MPI_File fh_c; MPIO_Request req_c; MPI_Datatype datatype_c; - + fh_c = MPI_File_f2c(*fh); datatype_c = MPI_Type_f2c(*datatype); @@ -121,7 +121,7 @@ FORTRAN_API void FORT_CALL mpi_file_iwrite_at_(MPI_Fint *fh,MPI_Offset *offset,v { MPI_File fh_c; MPIO_Request req_c; - + fh_c = MPI_File_f2c(*fh); *ierr = MPI_File_iwrite_at(fh_c,*offset,buf,*count,*datatype,&req_c); *request = MPIO_Request_c2f(req_c); diff --git a/ompi/mca/io/romio314/romio/mpi-io/fortran/iwrite_shf.c b/ompi/mca/io/romio314/romio/mpi-io/fortran/iwrite_shf.c index 90ea68777b6..d5233b41ca6 100644 --- a/ompi/mca/io/romio314/romio/mpi-io/fortran/iwrite_shf.c +++ b/ompi/mca/io/romio314/romio/mpi-io/fortran/iwrite_shf.c @@ -1,7 +1,7 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */ -/* +/* * - * Copyright (C) 1997 University of Chicago. + * Copyright (C) 1997 University of Chicago. * See COPYRIGHT notice in top-level directory. */ @@ -99,7 +99,7 @@ void mpi_file_iwrite_shared_(MPI_Fint *fh,void *buf,MPI_Fint *count, MPI_File fh_c; MPIO_Request req_c; MPI_Datatype datatype_c; - + fh_c = MPI_File_f2c(*fh); datatype_c = MPI_Type_f2c(*datatype); @@ -109,14 +109,14 @@ void mpi_file_iwrite_shared_(MPI_Fint *fh,void *buf,MPI_Fint *count, #else /* Prototype to keep compiler happy */ FORTRAN_API void FORT_CALL mpi_file_iwrite_shared_(MPI_Fint *fh,void *buf,MPI_Fint *count, - MPI_Datatype *datatype,MPI_Fint *request, MPI_Fint *ierr ); + MPI_Datatype *datatype,MPI_Fint *request, MPI_Fint *ierr ); FORTRAN_API void FORT_CALL mpi_file_iwrite_shared_(MPI_Fint *fh,void *buf,MPI_Fint *count, MPI_Datatype *datatype,MPI_Fint *request, MPI_Fint *ierr ) { MPI_File fh_c; MPIO_Request req_c; - + fh_c = MPI_File_f2c(*fh); *ierr = MPI_File_iwrite_shared(fh_c,buf,*count,*datatype,&req_c); *request = MPIO_Request_c2f(req_c); diff --git a/ompi/mca/io/romio314/romio/mpi-io/fortran/iwritef.c b/ompi/mca/io/romio314/romio/mpi-io/fortran/iwritef.c index 4410c011a14..4ea2d686b6e 100644 --- a/ompi/mca/io/romio314/romio/mpi-io/fortran/iwritef.c +++ b/ompi/mca/io/romio314/romio/mpi-io/fortran/iwritef.c @@ -1,7 +1,7 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */ -/* +/* * - * Copyright (C) 1997 University of Chicago. + * Copyright (C) 1997 University of Chicago. * See COPYRIGHT notice in top-level directory. */ @@ -100,7 +100,7 @@ void mpi_file_iwrite_(MPI_Fint *fh,void *buf,MPI_Fint *count, MPI_File fh_c; MPIO_Request req_c; MPI_Datatype datatype_c; - + fh_c = MPI_File_f2c(*fh); datatype_c = MPI_Type_f2c(*datatype); @@ -117,7 +117,7 @@ FORTRAN_API void FORT_CALL mpi_file_iwrite_(MPI_Fint *fh,void *buf,MPI_Fint *cou { MPI_File fh_c; MPIO_Request req_c; - + fh_c = MPI_File_f2c(*fh); *ierr = MPI_File_iwrite(fh_c,buf,*count,*datatype,&req_c); *request = MPIO_Request_c2f(req_c); diff --git a/ompi/mca/io/romio314/romio/mpi-io/fortran/openf.c b/ompi/mca/io/romio314/romio/mpi-io/fortran/openf.c index ea1a5d1c47e..1f5bb50f73a 100644 --- a/ompi/mca/io/romio314/romio/mpi-io/fortran/openf.c +++ b/ompi/mca/io/romio314/romio/mpi-io/fortran/openf.c @@ -1,7 +1,7 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */ -/* +/* * - * Copyright (C) 1997 University of Chicago. + * Copyright (C) 1997 University of Chicago. * See COPYRIGHT notice in top-level directory. */ diff --git a/ompi/mca/io/romio314/romio/mpi-io/fortran/preallocf.c b/ompi/mca/io/romio314/romio/mpi-io/fortran/preallocf.c index 515aa4a9125..6130bf086e0 100644 --- a/ompi/mca/io/romio314/romio/mpi-io/fortran/preallocf.c +++ b/ompi/mca/io/romio314/romio/mpi-io/fortran/preallocf.c @@ -1,7 +1,7 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */ -/* +/* * - * Copyright (C) 1997 University of Chicago. + * Copyright (C) 1997 University of Chicago. * See COPYRIGHT notice in top-level directory. */ @@ -95,7 +95,7 @@ FORTRAN_API void FORT_CALL mpi_file_preallocate_(MPI_Fint *fh,MPI_Offset *size, FORTRAN_API void FORT_CALL mpi_file_preallocate_(MPI_Fint *fh,MPI_Offset *size, MPI_Fint *ierr ) { MPI_File fh_c; - + fh_c = MPI_File_f2c(*fh); *ierr = MPI_File_preallocate(fh_c,*size); } diff --git a/ompi/mca/io/romio314/romio/mpi-io/fortran/rd_atallbf.c b/ompi/mca/io/romio314/romio/mpi-io/fortran/rd_atallbf.c index 5fff4e9e7ec..379e9b8273b 100644 --- a/ompi/mca/io/romio314/romio/mpi-io/fortran/rd_atallbf.c +++ b/ompi/mca/io/romio314/romio/mpi-io/fortran/rd_atallbf.c @@ -1,7 +1,7 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */ -/* +/* * - * Copyright (C) 1997 University of Chicago. + * Copyright (C) 1997 University of Chicago. * See COPYRIGHT notice in top-level directory. */ @@ -114,7 +114,7 @@ FORTRAN_API void FORT_CALL mpi_file_read_at_all_begin_(MPI_Fint *fh,MPI_Offset * MPI_Fint *count,MPI_Fint *datatype, MPI_Fint *ierr ) { MPI_File fh_c; - + fh_c = MPI_File_f2c(*fh); *ierr = MPI_File_read_at_all_begin(fh_c,*offset,buf,*count,(MPI_Datatype) *datatype); } diff --git a/ompi/mca/io/romio314/romio/mpi-io/fortran/rd_atallef.c b/ompi/mca/io/romio314/romio/mpi-io/fortran/rd_atallef.c index 8a3441126dd..702e8f81a20 100644 --- a/ompi/mca/io/romio314/romio/mpi-io/fortran/rd_atallef.c +++ b/ompi/mca/io/romio314/romio/mpi-io/fortran/rd_atallef.c @@ -1,7 +1,7 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */ -/* +/* * - * Copyright (C) 1997 University of Chicago. + * Copyright (C) 1997 University of Chicago. * See COPYRIGHT notice in top-level directory. */ @@ -95,7 +95,7 @@ FORTRAN_API void FORT_CALL mpi_file_read_at_all_end_(MPI_Fint *fh,void *buf,MPI_ FORTRAN_API void FORT_CALL mpi_file_read_at_all_end_(MPI_Fint *fh,void *buf,MPI_Status *status, MPI_Fint *ierr ) { MPI_File fh_c; - + fh_c = MPI_File_f2c(*fh); *ierr = MPI_File_read_at_all_end(fh_c,buf,status); } diff --git a/ompi/mca/io/romio314/romio/mpi-io/fortran/read_allbf.c b/ompi/mca/io/romio314/romio/mpi-io/fortran/read_allbf.c index 5708a03c8dc..4370cbb6f90 100644 --- a/ompi/mca/io/romio314/romio/mpi-io/fortran/read_allbf.c +++ b/ompi/mca/io/romio314/romio/mpi-io/fortran/read_allbf.c @@ -1,7 +1,7 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */ -/* +/* * - * Copyright (C) 1997 University of Chicago. + * Copyright (C) 1997 University of Chicago. * See COPYRIGHT notice in top-level directory. */ @@ -99,7 +99,7 @@ void mpi_file_read_all_begin_(MPI_Fint *fh,void *buf,MPI_Fint *count, { MPI_File fh_c; MPI_Datatype datatype_c; - + fh_c = MPI_File_f2c(*fh); datatype_c = MPI_Type_f2c(*datatype); @@ -113,7 +113,7 @@ FORTRAN_API void FORT_CALL mpi_file_read_all_begin_(MPI_Fint *fh,void *buf,MPI_F FORTRAN_API void FORT_CALL mpi_file_read_all_begin_(MPI_Fint *fh,void *buf,MPI_Fint *count, MPI_Fint *datatype, MPI_Fint *ierr ){ MPI_File fh_c; - + fh_c = MPI_File_f2c(*fh); *ierr = MPI_File_read_all_begin(fh_c,buf,*count,(MPI_Datatype) *datatype); } diff --git a/ompi/mca/io/romio314/romio/mpi-io/fortran/read_allef.c b/ompi/mca/io/romio314/romio/mpi-io/fortran/read_allef.c index 776b182d640..3fbc321f8b6 100644 --- a/ompi/mca/io/romio314/romio/mpi-io/fortran/read_allef.c +++ b/ompi/mca/io/romio314/romio/mpi-io/fortran/read_allef.c @@ -1,7 +1,7 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */ -/* +/* * - * Copyright (C) 1997 University of Chicago. + * Copyright (C) 1997 University of Chicago. * See COPYRIGHT notice in top-level directory. */ @@ -90,13 +90,13 @@ extern FORTRAN_API void FORT_CALL mpi_file_read_all_end_( MPI_Fint *, void*, MPI #endif /* Prototype to keep compiler happy */ -FORTRAN_API void FORT_CALL mpi_file_read_all_end_(MPI_Fint *fh,void *buf,MPI_Status *status, +FORTRAN_API void FORT_CALL mpi_file_read_all_end_(MPI_Fint *fh,void *buf,MPI_Status *status, MPI_Fint *ierr ); FORTRAN_API void FORT_CALL mpi_file_read_all_end_(MPI_Fint *fh,void *buf,MPI_Status *status, MPI_Fint *ierr ) { MPI_File fh_c; - + fh_c = MPI_File_f2c(*fh); *ierr = MPI_File_read_all_end(fh_c,buf,status); diff --git a/ompi/mca/io/romio314/romio/mpi-io/fortran/read_allf.c b/ompi/mca/io/romio314/romio/mpi-io/fortran/read_allf.c index 99e1229b7f1..ae0d8563dd4 100644 --- a/ompi/mca/io/romio314/romio/mpi-io/fortran/read_allf.c +++ b/ompi/mca/io/romio314/romio/mpi-io/fortran/read_allf.c @@ -1,7 +1,7 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */ -/* +/* * - * Copyright (C) 1997 University of Chicago. + * Copyright (C) 1997 University of Chicago. * See COPYRIGHT notice in top-level directory. */ @@ -99,7 +99,7 @@ void mpi_file_read_all_(MPI_Fint *fh,void *buf,MPI_Fint *count, { MPI_File fh_c; MPI_Datatype datatype_c; - + fh_c = MPI_File_f2c(*fh); datatype_c = MPI_Type_f2c(*datatype); @@ -113,7 +113,7 @@ FORTRAN_API void FORT_CALL mpi_file_read_all_(MPI_Fint *fh,void *buf,MPI_Fint *c FORTRAN_API void FORT_CALL mpi_file_read_all_(MPI_Fint *fh,void *buf,MPI_Fint *count, MPI_Fint *datatype,MPI_Status *status, MPI_Fint *ierr ){ MPI_File fh_c; - + fh_c = MPI_File_f2c(*fh); *ierr = MPI_File_read_all(fh_c,buf,*count,(MPI_Datatype)*datatype,status); } diff --git a/ompi/mca/io/romio314/romio/mpi-io/fortran/read_atallf.c b/ompi/mca/io/romio314/romio/mpi-io/fortran/read_atallf.c index b9f30a00562..cf059a33eb3 100644 --- a/ompi/mca/io/romio314/romio/mpi-io/fortran/read_atallf.c +++ b/ompi/mca/io/romio314/romio/mpi-io/fortran/read_atallf.c @@ -1,7 +1,7 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */ -/* +/* * - * Copyright (C) 1997 University of Chicago. + * Copyright (C) 1997 University of Chicago. * See COPYRIGHT notice in top-level directory. */ @@ -118,7 +118,7 @@ FORTRAN_API void FORT_CALL mpi_file_read_at_all_(MPI_Fint *fh,MPI_Offset *offset MPI_Status *status, MPI_Fint *ierr ) { MPI_File fh_c; - + fh_c = MPI_File_f2c(*fh); *ierr = MPI_File_read_at_all(fh_c,*offset,buf,*count,(MPI_Datatype)*datatype,status); } diff --git a/ompi/mca/io/romio314/romio/mpi-io/fortran/read_atf.c b/ompi/mca/io/romio314/romio/mpi-io/fortran/read_atf.c index 2602e399bf6..c6e00bb274f 100644 --- a/ompi/mca/io/romio314/romio/mpi-io/fortran/read_atf.c +++ b/ompi/mca/io/romio314/romio/mpi-io/fortran/read_atf.c @@ -1,7 +1,7 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */ -/* +/* * - * Copyright (C) 1997 University of Chicago. + * Copyright (C) 1997 University of Chicago. * See COPYRIGHT notice in top-level directory. */ @@ -99,7 +99,7 @@ void mpi_file_read_at_(MPI_Fint *fh,MPI_Offset *offset,void *buf, { MPI_File fh_c; MPI_Datatype datatype_c; - + fh_c = MPI_File_f2c(*fh); datatype_c = MPI_Type_f2c(*datatype); @@ -114,7 +114,7 @@ FORTRAN_API void FORT_CALL mpi_file_read_at_(MPI_Fint *fh,MPI_Offset *offset,voi MPI_Fint *count,MPI_Fint *datatype,MPI_Status *status, MPI_Fint *ierr ) { MPI_File fh_c; - + fh_c = MPI_File_f2c(*fh); *ierr = MPI_File_read_at(fh_c,*offset,buf,*count,(MPI_Datatype)*datatype,status); } diff --git a/ompi/mca/io/romio314/romio/mpi-io/fortran/read_ordbf.c b/ompi/mca/io/romio314/romio/mpi-io/fortran/read_ordbf.c index 74389174f4c..ca1d03e298e 100644 --- a/ompi/mca/io/romio314/romio/mpi-io/fortran/read_ordbf.c +++ b/ompi/mca/io/romio314/romio/mpi-io/fortran/read_ordbf.c @@ -1,7 +1,7 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */ -/* +/* * - * Copyright (C) 1997 University of Chicago. + * Copyright (C) 1997 University of Chicago. * See COPYRIGHT notice in top-level directory. */ @@ -99,7 +99,7 @@ void mpi_file_read_ordered_begin_(MPI_Fint *fh,void *buf,MPI_Fint *count, { MPI_File fh_c; MPI_Datatype datatype_c; - + fh_c = MPI_File_f2c(*fh); datatype_c = MPI_Type_f2c(*datatype); @@ -113,7 +113,7 @@ FORTRAN_API void FORT_CALL mpi_file_read_ordered_begin_(MPI_Fint *fh,void *buf,M FORTRAN_API void FORT_CALL mpi_file_read_ordered_begin_(MPI_Fint *fh,void *buf,MPI_Fint *count, MPI_Fint *datatype,MPI_Fint *ierr ){ MPI_File fh_c; - + fh_c = MPI_File_f2c(*fh); *ierr = MPI_File_read_ordered_begin(fh_c,buf,*count,(MPI_Datatype)*datatype); } diff --git a/ompi/mca/io/romio314/romio/mpi-io/fortran/read_ordef.c b/ompi/mca/io/romio314/romio/mpi-io/fortran/read_ordef.c index 63425c8ad33..83b7c9476c5 100644 --- a/ompi/mca/io/romio314/romio/mpi-io/fortran/read_ordef.c +++ b/ompi/mca/io/romio314/romio/mpi-io/fortran/read_ordef.c @@ -1,7 +1,7 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */ -/* +/* * - * Copyright (C) 1997 University of Chicago. + * Copyright (C) 1997 University of Chicago. * See COPYRIGHT notice in top-level directory. */ @@ -90,13 +90,13 @@ extern FORTRAN_API void FORT_CALL mpi_file_read_ordered_end_( MPI_Fint *, void*, #endif /* Prototype to keep compiler happy */ -FORTRAN_API void FORT_CALL mpi_file_read_ordered_end_(MPI_Fint *fh,void *buf,MPI_Status *status, +FORTRAN_API void FORT_CALL mpi_file_read_ordered_end_(MPI_Fint *fh,void *buf,MPI_Status *status, MPI_Fint *ierr ); FORTRAN_API void FORT_CALL mpi_file_read_ordered_end_(MPI_Fint *fh,void *buf,MPI_Status *status, MPI_Fint *ierr ) { MPI_File fh_c; - + fh_c = MPI_File_f2c(*fh); *ierr = MPI_File_read_ordered_end(fh_c,buf,status); diff --git a/ompi/mca/io/romio314/romio/mpi-io/fortran/read_ordf.c b/ompi/mca/io/romio314/romio/mpi-io/fortran/read_ordf.c index a45ae1282ff..9b8633224c2 100644 --- a/ompi/mca/io/romio314/romio/mpi-io/fortran/read_ordf.c +++ b/ompi/mca/io/romio314/romio/mpi-io/fortran/read_ordf.c @@ -1,7 +1,7 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */ -/* +/* * - * Copyright (C) 1997 University of Chicago. + * Copyright (C) 1997 University of Chicago. * See COPYRIGHT notice in top-level directory. */ @@ -99,7 +99,7 @@ void mpi_file_read_ordered_(MPI_Fint *fh,void *buf,MPI_Fint *count, { MPI_File fh_c; MPI_Datatype datatype_c; - + fh_c = MPI_File_f2c(*fh); datatype_c = MPI_Type_f2c(*datatype); @@ -113,7 +113,7 @@ FORTRAN_API void FORT_CALL mpi_file_read_ordered_(MPI_Fint *fh,void *buf,MPI_Fin FORTRAN_API void FORT_CALL mpi_file_read_ordered_(MPI_Fint *fh,void *buf,MPI_Fint *count, MPI_Fint *datatype,MPI_Status *status, MPI_Fint *ierr ){ MPI_File fh_c; - + fh_c = MPI_File_f2c(*fh); *ierr = MPI_File_read_ordered(fh_c,buf,*count,(MPI_Datatype)*datatype,status); } diff --git a/ompi/mca/io/romio314/romio/mpi-io/fortran/read_shf.c b/ompi/mca/io/romio314/romio/mpi-io/fortran/read_shf.c index 937e54fb95e..098bf242fdf 100644 --- a/ompi/mca/io/romio314/romio/mpi-io/fortran/read_shf.c +++ b/ompi/mca/io/romio314/romio/mpi-io/fortran/read_shf.c @@ -1,7 +1,7 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */ -/* +/* * - * Copyright (C) 1997 University of Chicago. + * Copyright (C) 1997 University of Chicago. * See COPYRIGHT notice in top-level directory. */ @@ -98,7 +98,7 @@ void mpi_file_read_shared_(MPI_Fint *fh,void *buf,MPI_Fint *count, { MPI_File fh_c; MPI_Datatype datatype_c; - + fh_c = MPI_File_f2c(*fh); datatype_c = MPI_Type_f2c(*datatype); @@ -107,12 +107,12 @@ void mpi_file_read_shared_(MPI_Fint *fh,void *buf,MPI_Fint *count, #else /* Prototype to keep compiler happy */ FORTRAN_API void FORT_CALL mpi_file_read_shared_(MPI_Fint *fh,void *buf,MPI_Fint *count, - MPI_Fint *datatype,MPI_Status *status, MPI_Fint *ierr ); + MPI_Fint *datatype,MPI_Status *status, MPI_Fint *ierr ); FORTRAN_API void FORT_CALL mpi_file_read_shared_(MPI_Fint *fh,void *buf,MPI_Fint *count, MPI_Fint *datatype,MPI_Status *status, MPI_Fint *ierr ) { MPI_File fh_c; - + fh_c = MPI_File_f2c(*fh); *ierr = MPI_File_read_shared(fh_c,buf,*count,(MPI_Datatype)*datatype,status); } diff --git a/ompi/mca/io/romio314/romio/mpi-io/fortran/readf.c b/ompi/mca/io/romio314/romio/mpi-io/fortran/readf.c index c802739139e..4600e591bc3 100644 --- a/ompi/mca/io/romio314/romio/mpi-io/fortran/readf.c +++ b/ompi/mca/io/romio314/romio/mpi-io/fortran/readf.c @@ -1,7 +1,7 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */ -/* +/* * - * Copyright (C) 1997 University of Chicago. + * Copyright (C) 1997 University of Chicago. * See COPYRIGHT notice in top-level directory. */ @@ -99,7 +99,7 @@ void mpi_file_read_(MPI_Fint *fh,void *buf,MPI_Fint *count, { MPI_File fh_c; MPI_Datatype datatype_c; - + fh_c = MPI_File_f2c(*fh); datatype_c = MPI_Type_f2c(*datatype); @@ -114,7 +114,7 @@ FORTRAN_API void FORT_CALL mpi_file_read_(MPI_Fint *fh,void *buf,MPI_Fint *count MPI_Fint *datatype,MPI_Status *status, MPI_Fint *ierr ) { MPI_File fh_c; - + fh_c = MPI_File_f2c(*fh); *ierr = MPI_File_read(fh_c,buf,*count,(MPI_Datatype)*datatype,status); } diff --git a/ompi/mca/io/romio314/romio/mpi-io/fortran/seek_shf.c b/ompi/mca/io/romio314/romio/mpi-io/fortran/seek_shf.c index 158e626c0e7..74a65df92c8 100644 --- a/ompi/mca/io/romio314/romio/mpi-io/fortran/seek_shf.c +++ b/ompi/mca/io/romio314/romio/mpi-io/fortran/seek_shf.c @@ -1,7 +1,7 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */ -/* +/* * - * Copyright (C) 1997 University of Chicago. + * Copyright (C) 1997 University of Chicago. * See COPYRIGHT notice in top-level directory. */ @@ -90,13 +90,13 @@ extern FORTRAN_API void FORT_CALL mpi_file_seek_shared_( MPI_Fint *, MPI_Offset #endif /* Prototype to keep compiler happy */ -FORTRAN_API void FORT_CALL mpi_file_seek_shared_(MPI_Fint *fh,MPI_Offset *offset,MPI_Fint *whence, +FORTRAN_API void FORT_CALL mpi_file_seek_shared_(MPI_Fint *fh,MPI_Offset *offset,MPI_Fint *whence, MPI_Fint *ierr ); FORTRAN_API void FORT_CALL mpi_file_seek_shared_(MPI_Fint *fh,MPI_Offset *offset,MPI_Fint *whence, MPI_Fint *ierr ) { MPI_File fh_c; - + fh_c = MPI_File_f2c(*fh); *ierr = MPI_File_seek_shared(fh_c,*offset,*whence); } diff --git a/ompi/mca/io/romio314/romio/mpi-io/fortran/seekf.c b/ompi/mca/io/romio314/romio/mpi-io/fortran/seekf.c index 8e125d1849c..d05bd7789e3 100644 --- a/ompi/mca/io/romio314/romio/mpi-io/fortran/seekf.c +++ b/ompi/mca/io/romio314/romio/mpi-io/fortran/seekf.c @@ -1,7 +1,7 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */ -/* +/* * - * Copyright (C) 1997 University of Chicago. + * Copyright (C) 1997 University of Chicago. * See COPYRIGHT notice in top-level directory. */ @@ -95,7 +95,7 @@ FORTRAN_API void FORT_CALL mpi_file_seek_(MPI_Fint *fh,MPI_Offset *offset,MPI_Fi FORTRAN_API void FORT_CALL mpi_file_seek_(MPI_Fint *fh,MPI_Offset *offset,MPI_Fint *whence, MPI_Fint *ierr ) { MPI_File fh_c; - + fh_c = MPI_File_f2c(*fh); *ierr = MPI_File_seek(fh_c,*offset,*whence); } diff --git a/ompi/mca/io/romio314/romio/mpi-io/fortran/set_atomf.c b/ompi/mca/io/romio314/romio/mpi-io/fortran/set_atomf.c index c4388950da5..2c8aa6ce6df 100644 --- a/ompi/mca/io/romio314/romio/mpi-io/fortran/set_atomf.c +++ b/ompi/mca/io/romio314/romio/mpi-io/fortran/set_atomf.c @@ -1,7 +1,7 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */ -/* +/* * - * Copyright (C) 1997 University of Chicago. + * Copyright (C) 1997 University of Chicago. * See COPYRIGHT notice in top-level directory. */ @@ -95,7 +95,7 @@ FORTRAN_API void FORT_CALL mpi_file_set_atomicity_(MPI_Fint *fh,MPI_Fint *flag, FORTRAN_API void FORT_CALL mpi_file_set_atomicity_(MPI_Fint *fh,MPI_Fint *flag, MPI_Fint *ierr ) { MPI_File fh_c; - + fh_c = MPI_File_f2c(*fh); *ierr = MPI_File_set_atomicity(fh_c,*flag); } diff --git a/ompi/mca/io/romio314/romio/mpi-io/fortran/set_errhf.c b/ompi/mca/io/romio314/romio/mpi-io/fortran/set_errhf.c index c622660898c..7f440192087 100644 --- a/ompi/mca/io/romio314/romio/mpi-io/fortran/set_errhf.c +++ b/ompi/mca/io/romio314/romio/mpi-io/fortran/set_errhf.c @@ -1,7 +1,7 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */ -/* +/* * - * Copyright (C) 1997 University of Chicago. + * Copyright (C) 1997 University of Chicago. * See COPYRIGHT notice in top-level directory. */ @@ -96,7 +96,7 @@ FORTRAN_API void FORT_CALL mpi_file_set_errhandler_(MPI_Fint *fh, MPI_Fint *err_ { MPI_File fh_c; MPI_Errhandler err_handler_c; - + fh_c = MPI_File_f2c(*fh); err_handler_c = MPI_Errhandler_f2c(*err_handler); diff --git a/ompi/mca/io/romio314/romio/mpi-io/fortran/set_infof.c b/ompi/mca/io/romio314/romio/mpi-io/fortran/set_infof.c index 85c05e8f1da..12731b273d8 100644 --- a/ompi/mca/io/romio314/romio/mpi-io/fortran/set_infof.c +++ b/ompi/mca/io/romio314/romio/mpi-io/fortran/set_infof.c @@ -1,7 +1,7 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */ -/* +/* * - * Copyright (C) 1997 University of Chicago. + * Copyright (C) 1997 University of Chicago. * See COPYRIGHT notice in top-level directory. */ @@ -96,7 +96,7 @@ FORTRAN_API void FORT_CALL mpi_file_set_info_(MPI_Fint *fh, MPI_Fint *info, MPI_ { MPI_File fh_c; MPI_Info info_c; - + fh_c = MPI_File_f2c(*fh); info_c = MPI_Info_f2c(*info); diff --git a/ompi/mca/io/romio314/romio/mpi-io/fortran/set_sizef.c b/ompi/mca/io/romio314/romio/mpi-io/fortran/set_sizef.c index c595f2b8550..cda35300572 100644 --- a/ompi/mca/io/romio314/romio/mpi-io/fortran/set_sizef.c +++ b/ompi/mca/io/romio314/romio/mpi-io/fortran/set_sizef.c @@ -1,7 +1,7 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */ -/* +/* * - * Copyright (C) 1997 University of Chicago. + * Copyright (C) 1997 University of Chicago. * See COPYRIGHT notice in top-level directory. */ @@ -95,7 +95,7 @@ FORTRAN_API void FORT_CALL mpi_file_set_size_(MPI_Fint *fh,MPI_Offset *size, MPI FORTRAN_API void FORT_CALL mpi_file_set_size_(MPI_Fint *fh,MPI_Offset *size, MPI_Fint *ierr ) { MPI_File fh_c; - + fh_c = MPI_File_f2c(*fh); *ierr = MPI_File_set_size(fh_c,*size); } diff --git a/ompi/mca/io/romio314/romio/mpi-io/fortran/set_viewf.c b/ompi/mca/io/romio314/romio/mpi-io/fortran/set_viewf.c index 97151d978fb..6bc7d931646 100644 --- a/ompi/mca/io/romio314/romio/mpi-io/fortran/set_viewf.c +++ b/ompi/mca/io/romio314/romio/mpi-io/fortran/set_viewf.c @@ -1,7 +1,7 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */ -/* +/* * - * Copyright (C) 1997 University of Chicago. + * Copyright (C) 1997 University of Chicago. * See COPYRIGHT notice in top-level directory. */ @@ -104,10 +104,10 @@ void mpi_file_set_view_(MPI_Fint *fh,MPI_Offset *disp,MPI_Fint *etype, { char *newstr; MPI_File fh_c; - int i, real_len; + int i, real_len; MPI_Datatype etype_c, filetype_c; MPI_Info info_c; - + etype_c = MPI_Type_f2c(*etype); filetype_c = MPI_Type_f2c(*filetype); info_c = MPI_Info_f2c(*info); @@ -127,9 +127,9 @@ void mpi_file_set_view_(MPI_Fint *fh,MPI_Offset *disp,MPI_Fint *etype, newstr = (char *) ADIOI_Malloc((real_len+1)*sizeof(char)); ADIOI_Strncpy(newstr, datarep, real_len); newstr[real_len] = '\0'; - + fh_c = MPI_File_f2c(*fh); - + *ierr = MPI_File_set_view(fh_c,*disp,etype_c,filetype_c,newstr,info_c); ADIOI_Free(newstr); @@ -152,9 +152,9 @@ FORTRAN_API void FORT_CALL mpi_file_set_view_( MPI_Fint *fh, MPI_Offset *disp, M #endif char *newstr; MPI_File fh_c; - int i, real_len; + int i, real_len; MPI_Info info_c; - + info_c = MPI_Info_f2c(*info); /* strip trailing blanks in datarep */ @@ -172,9 +172,9 @@ FORTRAN_API void FORT_CALL mpi_file_set_view_( MPI_Fint *fh, MPI_Offset *disp, M newstr = (char *) ADIOI_Malloc((real_len+1)*sizeof(char)); ADIOI_Strncpy(newstr, datarep, real_len); newstr[real_len] = '\0'; - + fh_c = MPI_File_f2c(*fh); - + *ierr = MPI_File_set_view(fh_c,*disp,*etype,*filetype,newstr,info_c); ADIOI_Free(newstr); diff --git a/ompi/mca/io/romio314/romio/mpi-io/fortran/wr_atallbf.c b/ompi/mca/io/romio314/romio/mpi-io/fortran/wr_atallbf.c index 836dd378aee..36e32f0d449 100644 --- a/ompi/mca/io/romio314/romio/mpi-io/fortran/wr_atallbf.c +++ b/ompi/mca/io/romio314/romio/mpi-io/fortran/wr_atallbf.c @@ -1,7 +1,7 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */ -/* +/* * - * Copyright (C) 1997 University of Chicago. + * Copyright (C) 1997 University of Chicago. * See COPYRIGHT notice in top-level directory. */ @@ -100,7 +100,7 @@ void mpi_file_write_at_all_begin_(MPI_Fint *fh,MPI_Offset *offset,void *buf, { MPI_File fh_c; MPI_Datatype datatype_c; - + fh_c = MPI_File_f2c(*fh); datatype_c = MPI_Type_f2c(*datatype); @@ -115,7 +115,7 @@ FORTRAN_API void FORT_CALL mpi_file_write_at_all_begin_(MPI_Fint *fh,MPI_Offset MPI_Fint *count,MPI_Fint *datatype, MPI_Fint *ierr ) { MPI_File fh_c; - + fh_c = MPI_File_f2c(*fh); *ierr = MPI_File_write_at_all_begin(fh_c,*offset,buf,*count,(MPI_Datatype)*datatype); } diff --git a/ompi/mca/io/romio314/romio/mpi-io/fortran/wr_atallef.c b/ompi/mca/io/romio314/romio/mpi-io/fortran/wr_atallef.c index 7426c48bf2f..f190a3538b1 100644 --- a/ompi/mca/io/romio314/romio/mpi-io/fortran/wr_atallef.c +++ b/ompi/mca/io/romio314/romio/mpi-io/fortran/wr_atallef.c @@ -1,7 +1,7 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */ -/* +/* * - * Copyright (C) 1997 University of Chicago. + * Copyright (C) 1997 University of Chicago. * See COPYRIGHT notice in top-level directory. */ @@ -90,13 +90,13 @@ extern FORTRAN_API void FORT_CALL mpi_file_write_at_all_end_( MPI_Fint *, void*, #endif /* Prototype to keep compiler happy */ -FORTRAN_API void FORT_CALL mpi_file_write_at_all_end_(MPI_Fint *fh,void *buf,MPI_Status *status, +FORTRAN_API void FORT_CALL mpi_file_write_at_all_end_(MPI_Fint *fh,void *buf,MPI_Status *status, MPI_Fint *ierr ); FORTRAN_API void FORT_CALL mpi_file_write_at_all_end_(MPI_Fint *fh,void *buf,MPI_Status *status, MPI_Fint *ierr ) { MPI_File fh_c; - + fh_c = MPI_File_f2c(*fh); *ierr = MPI_File_write_at_all_end(fh_c,buf,status); diff --git a/ompi/mca/io/romio314/romio/mpi-io/fortran/write_allbf.c b/ompi/mca/io/romio314/romio/mpi-io/fortran/write_allbf.c index e47d3cd2008..680d0925f14 100644 --- a/ompi/mca/io/romio314/romio/mpi-io/fortran/write_allbf.c +++ b/ompi/mca/io/romio314/romio/mpi-io/fortran/write_allbf.c @@ -1,7 +1,7 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */ -/* +/* * - * Copyright (C) 1997 University of Chicago. + * Copyright (C) 1997 University of Chicago. * See COPYRIGHT notice in top-level directory. */ @@ -97,7 +97,7 @@ void mpi_file_write_all_begin_(MPI_Fint *fh,void *buf,MPI_Fint *count, MPI_Fint *datatype, MPI_Fint *ierr ){ MPI_File fh_c; MPI_Datatype datatype_c; - + fh_c = MPI_File_f2c(*fh); datatype_c = MPI_Type_f2c(*datatype); @@ -110,7 +110,7 @@ FORTRAN_API void FORT_CALL mpi_file_write_all_begin_(MPI_Fint *fh,void *buf,MPI_ FORTRAN_API void FORT_CALL mpi_file_write_all_begin_(MPI_Fint *fh,void *buf,MPI_Fint *count, MPI_Fint *datatype, MPI_Fint *ierr ){ MPI_File fh_c; - + fh_c = MPI_File_f2c(*fh); *ierr = MPI_File_write_all_begin(fh_c,buf,*count,(MPI_Datatype)*datatype); } diff --git a/ompi/mca/io/romio314/romio/mpi-io/fortran/write_allef.c b/ompi/mca/io/romio314/romio/mpi-io/fortran/write_allef.c index 9ccc1eb8db9..3ae1018f399 100644 --- a/ompi/mca/io/romio314/romio/mpi-io/fortran/write_allef.c +++ b/ompi/mca/io/romio314/romio/mpi-io/fortran/write_allef.c @@ -1,7 +1,7 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */ -/* +/* * - * Copyright (C) 1997 University of Chicago. + * Copyright (C) 1997 University of Chicago. * See COPYRIGHT notice in top-level directory. */ @@ -90,11 +90,11 @@ extern FORTRAN_API void FORT_CALL mpi_file_write_all_end_( MPI_Fint *, void*, MP #endif /* Prototype to keep compiler happy */ -FORTRAN_API void FORT_CALL mpi_file_write_all_end_(MPI_Fint *fh,void *buf,MPI_Status *status, MPI_Fint *ierr ); +FORTRAN_API void FORT_CALL mpi_file_write_all_end_(MPI_Fint *fh,void *buf,MPI_Status *status, MPI_Fint *ierr ); FORTRAN_API void FORT_CALL mpi_file_write_all_end_(MPI_Fint *fh,void *buf,MPI_Status *status, MPI_Fint *ierr ){ MPI_File fh_c; - + fh_c = MPI_File_f2c(*fh); *ierr = MPI_File_write_all_end(fh_c,buf,status); diff --git a/ompi/mca/io/romio314/romio/mpi-io/fortran/write_allf.c b/ompi/mca/io/romio314/romio/mpi-io/fortran/write_allf.c index bddcac15092..f4e57f3b22c 100644 --- a/ompi/mca/io/romio314/romio/mpi-io/fortran/write_allf.c +++ b/ompi/mca/io/romio314/romio/mpi-io/fortran/write_allf.c @@ -1,7 +1,7 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */ -/* +/* * - * Copyright (C) 1997 University of Chicago. + * Copyright (C) 1997 University of Chicago. * See COPYRIGHT notice in top-level directory. */ @@ -98,7 +98,7 @@ void mpi_file_write_all_(MPI_Fint *fh,void *buf,MPI_Fint *count, MPI_Fint *datatype,MPI_Status *status, MPI_Fint *ierr ){ MPI_File fh_c; MPI_Datatype datatype_c; - + fh_c = MPI_File_f2c(*fh); datatype_c = MPI_Type_f2c(*datatype); @@ -112,7 +112,7 @@ FORTRAN_API void FORT_CALL mpi_file_write_all_(MPI_Fint *fh,void *buf,MPI_Fint * FORTRAN_API void FORT_CALL mpi_file_write_all_(MPI_Fint *fh,void *buf,MPI_Fint *count, MPI_Fint *datatype,MPI_Status *status, MPI_Fint *ierr ){ MPI_File fh_c; - + fh_c = MPI_File_f2c(*fh); *ierr = MPI_File_write_all(fh_c,buf,*count,*datatype,status); } diff --git a/ompi/mca/io/romio314/romio/mpi-io/fortran/write_atallf.c b/ompi/mca/io/romio314/romio/mpi-io/fortran/write_atallf.c index 030e7ae359d..1e377a7905f 100644 --- a/ompi/mca/io/romio314/romio/mpi-io/fortran/write_atallf.c +++ b/ompi/mca/io/romio314/romio/mpi-io/fortran/write_atallf.c @@ -1,7 +1,7 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */ -/* +/* * - * Copyright (C) 1997 University of Chicago. + * Copyright (C) 1997 University of Chicago. * See COPYRIGHT notice in top-level directory. */ @@ -101,7 +101,7 @@ void mpi_file_write_at_all_(MPI_Fint *fh,MPI_Offset *offset,void *buf, { MPI_File fh_c; MPI_Datatype datatype_c; - + fh_c = MPI_File_f2c(*fh); datatype_c = MPI_Type_f2c(*datatype); @@ -118,7 +118,7 @@ FORTRAN_API void FORT_CALL mpi_file_write_at_all_(MPI_Fint *fh,MPI_Offset *offse MPI_Status *status, MPI_Fint *ierr ) { MPI_File fh_c; - + fh_c = MPI_File_f2c(*fh); *ierr = MPI_File_write_at_all(fh_c,*offset,buf,*count,*datatype,status); } diff --git a/ompi/mca/io/romio314/romio/mpi-io/fortran/write_atf.c b/ompi/mca/io/romio314/romio/mpi-io/fortran/write_atf.c index 8ba429dcc6b..e7b66404252 100644 --- a/ompi/mca/io/romio314/romio/mpi-io/fortran/write_atf.c +++ b/ompi/mca/io/romio314/romio/mpi-io/fortran/write_atf.c @@ -1,7 +1,7 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */ -/* +/* * - * Copyright (C) 1997 University of Chicago. + * Copyright (C) 1997 University of Chicago. * See COPYRIGHT notice in top-level directory. */ @@ -101,7 +101,7 @@ void mpi_file_write_at_(MPI_Fint *fh,MPI_Offset *offset,void *buf, { MPI_File fh_c; MPI_Datatype datatype_c; - + fh_c = MPI_File_f2c(*fh); datatype_c = MPI_Type_f2c(*datatype); @@ -118,7 +118,7 @@ FORTRAN_API void FORT_CALL mpi_file_write_at_(MPI_Fint *fh,MPI_Offset *offset,vo MPI_Status *status, MPI_Fint *ierr ) { MPI_File fh_c; - + fh_c = MPI_File_f2c(*fh); *ierr = MPI_File_write_at(fh_c,*offset,buf,*count,*datatype,status); } diff --git a/ompi/mca/io/romio314/romio/mpi-io/fortran/write_ordbf.c b/ompi/mca/io/romio314/romio/mpi-io/fortran/write_ordbf.c index 41b9accfbef..d3f1226f55d 100644 --- a/ompi/mca/io/romio314/romio/mpi-io/fortran/write_ordbf.c +++ b/ompi/mca/io/romio314/romio/mpi-io/fortran/write_ordbf.c @@ -1,7 +1,7 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */ -/* +/* * - * Copyright (C) 1997 University of Chicago. + * Copyright (C) 1997 University of Chicago. * See COPYRIGHT notice in top-level directory. */ @@ -98,7 +98,7 @@ void mpi_file_write_ordered_begin_(MPI_Fint *fh,void *buf,MPI_Fint *count, MPI_Fint *datatype, MPI_Fint *ierr ){ MPI_File fh_c; MPI_Datatype datatype_c; - + fh_c = MPI_File_f2c(*fh); datatype_c = MPI_Type_f2c(*datatype); @@ -112,7 +112,7 @@ FORTRAN_API void FORT_CALL mpi_file_write_ordered_begin_(MPI_Fint *fh,void *buf, FORTRAN_API void FORT_CALL mpi_file_write_ordered_begin_(MPI_Fint *fh,void *buf,MPI_Fint *count, MPI_Fint *datatype, MPI_Fint *ierr ){ MPI_File fh_c; - + fh_c = MPI_File_f2c(*fh); *ierr = MPI_File_write_ordered_begin(fh_c,buf,*count,*datatype); } diff --git a/ompi/mca/io/romio314/romio/mpi-io/fortran/write_ordef.c b/ompi/mca/io/romio314/romio/mpi-io/fortran/write_ordef.c index a979f70eee9..8eaea853446 100644 --- a/ompi/mca/io/romio314/romio/mpi-io/fortran/write_ordef.c +++ b/ompi/mca/io/romio314/romio/mpi-io/fortran/write_ordef.c @@ -1,7 +1,7 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */ -/* +/* * - * Copyright (C) 1997 University of Chicago. + * Copyright (C) 1997 University of Chicago. * See COPYRIGHT notice in top-level directory. */ @@ -90,12 +90,12 @@ extern FORTRAN_API void FORT_CALL mpi_file_write_ordered_end_( MPI_Fint *, void* #endif /* Prototype to keep compiler happy */ -FORTRAN_API void FORT_CALL mpi_file_write_ordered_end_(MPI_Fint *fh,void *buf,MPI_Status *status, +FORTRAN_API void FORT_CALL mpi_file_write_ordered_end_(MPI_Fint *fh,void *buf,MPI_Status *status, MPI_Fint *ierr ); FORTRAN_API void FORT_CALL mpi_file_write_ordered_end_(MPI_Fint *fh,void *buf,MPI_Status *status, MPI_Fint *ierr ){ MPI_File fh_c; - + fh_c = MPI_File_f2c(*fh); *ierr = MPI_File_write_ordered_end(fh_c,buf,status); diff --git a/ompi/mca/io/romio314/romio/mpi-io/fortran/write_ordf.c b/ompi/mca/io/romio314/romio/mpi-io/fortran/write_ordf.c index fe96406d9bb..4c779bfffe0 100644 --- a/ompi/mca/io/romio314/romio/mpi-io/fortran/write_ordf.c +++ b/ompi/mca/io/romio314/romio/mpi-io/fortran/write_ordf.c @@ -1,7 +1,7 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */ -/* +/* * - * Copyright (C) 1997 University of Chicago. + * Copyright (C) 1997 University of Chicago. * See COPYRIGHT notice in top-level directory. */ @@ -98,7 +98,7 @@ void mpi_file_write_ordered_(MPI_Fint *fh,void *buf,MPI_Fint *count, MPI_Fint *datatype,MPI_Status *status, MPI_Fint *ierr ){ MPI_File fh_c; MPI_Datatype datatype_c; - + fh_c = MPI_File_f2c(*fh); datatype_c = MPI_Type_f2c(*datatype); @@ -112,7 +112,7 @@ FORTRAN_API void FORT_CALL mpi_file_write_ordered_(MPI_Fint *fh,void *buf,MPI_Fi FORTRAN_API void FORT_CALL mpi_file_write_ordered_(MPI_Fint *fh,void *buf,MPI_Fint *count, MPI_Fint *datatype,MPI_Status *status, MPI_Fint *ierr ){ MPI_File fh_c; - + fh_c = MPI_File_f2c(*fh); *ierr = MPI_File_write_ordered(fh_c,buf,*count,*datatype,status); } diff --git a/ompi/mca/io/romio314/romio/mpi-io/fortran/write_shf.c b/ompi/mca/io/romio314/romio/mpi-io/fortran/write_shf.c index 30d80e0ee6d..4aa0138c90a 100644 --- a/ompi/mca/io/romio314/romio/mpi-io/fortran/write_shf.c +++ b/ompi/mca/io/romio314/romio/mpi-io/fortran/write_shf.c @@ -1,7 +1,7 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */ -/* +/* * - * Copyright (C) 1997 University of Chicago. + * Copyright (C) 1997 University of Chicago. * See COPYRIGHT notice in top-level directory. */ @@ -99,7 +99,7 @@ void mpi_file_write_shared_(MPI_Fint *fh,void *buf,MPI_Fint *count, { MPI_File fh_c; MPI_Datatype datatype_c; - + fh_c = MPI_File_f2c(*fh); datatype_c = MPI_Type_f2c(*datatype); @@ -114,7 +114,7 @@ FORTRAN_API void FORT_CALL mpi_file_write_shared_(MPI_Fint *fh,void *buf,MPI_Fin MPI_Fint *datatype,MPI_Status *status, MPI_Fint *ierr ) { MPI_File fh_c; - + fh_c = MPI_File_f2c(*fh); *ierr = MPI_File_write_shared(fh_c, buf,*count,*datatype,status); } diff --git a/ompi/mca/io/romio314/romio/mpi-io/fortran/writef.c b/ompi/mca/io/romio314/romio/mpi-io/fortran/writef.c index 9b07ba095fa..7bab1687815 100644 --- a/ompi/mca/io/romio314/romio/mpi-io/fortran/writef.c +++ b/ompi/mca/io/romio314/romio/mpi-io/fortran/writef.c @@ -1,7 +1,7 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */ -/* +/* * - * Copyright (C) 1997 University of Chicago. + * Copyright (C) 1997 University of Chicago. * See COPYRIGHT notice in top-level directory. */ @@ -99,7 +99,7 @@ void mpi_file_write_(MPI_Fint *fh,void *buf,MPI_Fint *count, { MPI_File fh_c; MPI_Datatype datatype_c; - + fh_c = MPI_File_f2c(*fh); datatype_c = MPI_Type_f2c(*datatype); @@ -114,7 +114,7 @@ FORTRAN_API void FORT_CALL mpi_file_write_(MPI_Fint *fh,void *buf,MPI_Fint *coun MPI_Fint *datatype,MPI_Status *status, MPI_Fint *ierr ) { MPI_File fh_c; - + fh_c = MPI_File_f2c(*fh); *ierr = MPI_File_write(fh_c, buf,*count,*datatype,status); } diff --git a/ompi/mca/io/romio314/romio/mpi-io/fsync.c b/ompi/mca/io/romio314/romio/mpi-io/fsync.c index 0e4f1b964ad..3f457f7a57e 100644 --- a/ompi/mca/io/romio314/romio/mpi-io/fsync.c +++ b/ompi/mca/io/romio314/romio/mpi-io/fsync.c @@ -1,7 +1,7 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */ -/* +/* * - * Copyright (C) 1997 University of Chicago. + * Copyright (C) 1997 University of Chicago. * See COPYRIGHT notice in top-level directory. */ @@ -69,7 +69,7 @@ int MPI_File_sync(MPI_File fh) #ifdef MPI_hpux HPMP_IO_END(fl_xmpi, adio_fh, MPI_DATATYPE_NULL, -1); #endif /* MPI_hpux */ - + fn_exit: MPIU_THREAD_CS_EXIT(ALLFUNC,); return error_code; diff --git a/ompi/mca/io/romio314/romio/mpi-io/get_amode.c b/ompi/mca/io/romio314/romio/mpi-io/get_amode.c index 4770c839598..3ff5077ae22 100644 --- a/ompi/mca/io/romio314/romio/mpi-io/get_amode.c +++ b/ompi/mca/io/romio314/romio/mpi-io/get_amode.c @@ -1,7 +1,7 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */ -/* +/* * - * Copyright (C) 1997 University of Chicago. + * Copyright (C) 1997 University of Chicago. * See COPYRIGHT notice in top-level directory. */ @@ -41,7 +41,7 @@ int MPI_File_get_amode(MPI_File fh, int *amode) int error_code=MPI_SUCCESS; static char myname[] = "MPI_FILE_GET_AMODE"; ADIO_File adio_fh; - + adio_fh = MPIO_File_resolve(fh); /* --BEGIN ERROR HANDLING-- */ diff --git a/ompi/mca/io/romio314/romio/mpi-io/get_atom.c b/ompi/mca/io/romio314/romio/mpi-io/get_atom.c index 1a9eae1389e..306c31ce89b 100644 --- a/ompi/mca/io/romio314/romio/mpi-io/get_atom.c +++ b/ompi/mca/io/romio314/romio/mpi-io/get_atom.c @@ -1,7 +1,7 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */ -/* +/* * - * Copyright (C) 1997 University of Chicago. + * Copyright (C) 1997 University of Chicago. * See COPYRIGHT notice in top-level directory. */ @@ -41,7 +41,7 @@ int MPI_File_get_atomicity(MPI_File fh, int *flag) int error_code; ADIO_File adio_fh; static char myname[] = "MPI_FILE_GET_ATOMICITY"; - + adio_fh = MPIO_File_resolve(fh); /* --BEGIN ERROR HANDLING-- */ diff --git a/ompi/mca/io/romio314/romio/mpi-io/get_bytoff.c b/ompi/mca/io/romio314/romio/mpi-io/get_bytoff.c index f5cb452e5a8..7376e7af5f6 100644 --- a/ompi/mca/io/romio314/romio/mpi-io/get_bytoff.c +++ b/ompi/mca/io/romio314/romio/mpi-io/get_bytoff.c @@ -1,7 +1,7 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */ -/* +/* * - * Copyright (C) 1997 University of Chicago. + * Copyright (C) 1997 University of Chicago. * See COPYRIGHT notice in top-level directory. */ @@ -27,7 +27,7 @@ int MPI_File_get_byte_offset(MPI_File fh, MPI_Offset offset, MPI_Offset *disp) _ #endif /*@ - MPI_File_get_byte_offset - Returns the absolute byte position in + MPI_File_get_byte_offset - Returns the absolute byte position in the file corresponding to "offset" etypes relative to the current view diff --git a/ompi/mca/io/romio314/romio/mpi-io/get_errh.c b/ompi/mca/io/romio314/romio/mpi-io/get_errh.c index 5a4d9ee4845..6915218fde3 100644 --- a/ompi/mca/io/romio314/romio/mpi-io/get_errh.c +++ b/ompi/mca/io/romio314/romio/mpi-io/get_errh.c @@ -1,7 +1,7 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */ -/* +/* * - * Copyright (C) 1997 University of Chicago. + * Copyright (C) 1997 University of Chicago. * See COPYRIGHT notice in top-level directory. */ diff --git a/ompi/mca/io/romio314/romio/mpi-io/get_extent.c b/ompi/mca/io/romio314/romio/mpi-io/get_extent.c index 31a841c0706..103f8815c62 100644 --- a/ompi/mca/io/romio314/romio/mpi-io/get_extent.c +++ b/ompi/mca/io/romio314/romio/mpi-io/get_extent.c @@ -1,7 +1,7 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */ -/* +/* * - * Copyright (C) 1997 University of Chicago. + * Copyright (C) 1997 University of Chicago. * See COPYRIGHT notice in top-level directory. */ diff --git a/ompi/mca/io/romio314/romio/mpi-io/get_group.c b/ompi/mca/io/romio314/romio/mpi-io/get_group.c index 4d82ed8cb1f..34293c98b10 100644 --- a/ompi/mca/io/romio314/romio/mpi-io/get_group.c +++ b/ompi/mca/io/romio314/romio/mpi-io/get_group.c @@ -1,7 +1,7 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */ -/* +/* * - * Copyright (C) 1997 University of Chicago. + * Copyright (C) 1997 University of Chicago. * See COPYRIGHT notice in top-level directory. */ @@ -26,7 +26,7 @@ int MPI_File_get_group(MPI_File fh, MPI_Group *group) __attribute__((weak,alias( #endif /*@ - MPI_File_get_group - Returns the group of processes that + MPI_File_get_group - Returns the group of processes that opened the file Input Parameters: diff --git a/ompi/mca/io/romio314/romio/mpi-io/get_info.c b/ompi/mca/io/romio314/romio/mpi-io/get_info.c index 1d0b397411b..7685c9f04ad 100644 --- a/ompi/mca/io/romio314/romio/mpi-io/get_info.c +++ b/ompi/mca/io/romio314/romio/mpi-io/get_info.c @@ -1,7 +1,7 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */ -/* +/* * - * Copyright (C) 1997 University of Chicago. + * Copyright (C) 1997 University of Chicago. * See COPYRIGHT notice in top-level directory. */ diff --git a/ompi/mca/io/romio314/romio/mpi-io/get_posn.c b/ompi/mca/io/romio314/romio/mpi-io/get_posn.c index 49d8e4919bd..1d5c59f600f 100644 --- a/ompi/mca/io/romio314/romio/mpi-io/get_posn.c +++ b/ompi/mca/io/romio314/romio/mpi-io/get_posn.c @@ -1,7 +1,7 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */ -/* +/* * - * Copyright (C) 1997 University of Chicago. + * Copyright (C) 1997 University of Chicago. * See COPYRIGHT notice in top-level directory. */ @@ -27,7 +27,7 @@ int MPI_File_get_position(MPI_File fh, MPI_Offset *offset) __attribute__((weak,a #endif /*@ - MPI_File_get_position - Returns the current position of the + MPI_File_get_position - Returns the current position of the individual file pointer in etype units relative to the current view diff --git a/ompi/mca/io/romio314/romio/mpi-io/get_posn_sh.c b/ompi/mca/io/romio314/romio/mpi-io/get_posn_sh.c index c6172c9d734..d0bfa44a4d8 100644 --- a/ompi/mca/io/romio314/romio/mpi-io/get_posn_sh.c +++ b/ompi/mca/io/romio314/romio/mpi-io/get_posn_sh.c @@ -1,7 +1,7 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */ -/* +/* * - * Copyright (C) 1997 University of Chicago. + * Copyright (C) 1997 University of Chicago. * See COPYRIGHT notice in top-level directory. */ @@ -26,7 +26,7 @@ int MPI_File_get_position_shared(MPI_File fh, MPI_Offset *offset) __attribute__( #endif /*@ - MPI_File_get_position_shared - Returns the current position of the + MPI_File_get_position_shared - Returns the current position of the shared file pointer in etype units relative to the current view Input Parameters: diff --git a/ompi/mca/io/romio314/romio/mpi-io/get_size.c b/ompi/mca/io/romio314/romio/mpi-io/get_size.c index d7836c2791e..ad4b8223a79 100644 --- a/ompi/mca/io/romio314/romio/mpi-io/get_size.c +++ b/ompi/mca/io/romio314/romio/mpi-io/get_size.c @@ -1,7 +1,7 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */ -/* +/* * - * Copyright (C) 1997 University of Chicago. + * Copyright (C) 1997 University of Chicago. * See COPYRIGHT notice in top-level directory. */ diff --git a/ompi/mca/io/romio314/romio/mpi-io/get_view.c b/ompi/mca/io/romio314/romio/mpi-io/get_view.c index f2d288952c2..e4a7e947bca 100644 --- a/ompi/mca/io/romio314/romio/mpi-io/get_view.c +++ b/ompi/mca/io/romio314/romio/mpi-io/get_view.c @@ -1,7 +1,7 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */ -/* +/* * - * Copyright (C) 1997 University of Chicago. + * Copyright (C) 1997 University of Chicago. * See COPYRIGHT notice in top-level directory. */ @@ -62,7 +62,7 @@ int MPI_File_get_view(MPI_File fh, MPI_Offset *disp, MPI_Datatype *etype, if (datarep <= (char *) 0) { error_code = MPIO_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE, - myname, __LINE__, MPI_ERR_ARG, + myname, __LINE__, MPI_ERR_ARG, "**iodatarepnomem", 0); error_code = MPIO_Err_return_file(adio_fh, error_code); goto fn_exit; @@ -70,14 +70,14 @@ int MPI_File_get_view(MPI_File fh, MPI_Offset *disp, MPI_Datatype *etype, /* --END ERROR HANDLING-- */ *disp = adio_fh->disp; - ADIOI_Strncpy(datarep, + ADIOI_Strncpy(datarep, (adio_fh->is_external32 ? "external32": "native"), MPI_MAX_DATAREP_STRING); MPI_Type_get_envelope(adio_fh->etype, &i, &j, &k, &combiner); if (combiner == MPI_COMBINER_NAMED) *etype = adio_fh->etype; else { /* FIXME: It is wrong to use MPI_Type_contiguous; the user could choose to - re-implement MPI_Type_contiguous in an unexpected way. Either use + re-implement MPI_Type_contiguous in an unexpected way. Either use MPIR_Barrier_impl as in MPICH or PMPI_Type_contiguous */ MPI_Type_contiguous(1, adio_fh->etype, ©_etype); diff --git a/ompi/mca/io/romio314/romio/mpi-io/glue/default/mpio_err.c b/ompi/mca/io/romio314/romio/mpi-io/glue/default/mpio_err.c index fbfc8a2c971..11c50bf5aae 100644 --- a/ompi/mca/io/romio314/romio/mpi-io/glue/default/mpio_err.c +++ b/ompi/mca/io/romio314/romio/mpi-io/glue/default/mpio_err.c @@ -1,6 +1,6 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */ -/* - * Copyright (C) 2004 University of Chicago. +/* + * Copyright (C) 2004 University of Chicago. * See COPYRIGHT notice in top-level directory. */ diff --git a/ompi/mca/io/romio314/romio/mpi-io/glue/default/mpio_file.c b/ompi/mca/io/romio314/romio/mpi-io/glue/default/mpio_file.c index 7a43b01b946..51df8784b13 100644 --- a/ompi/mca/io/romio314/romio/mpi-io/glue/default/mpio_file.c +++ b/ompi/mca/io/romio314/romio/mpi-io/glue/default/mpio_file.c @@ -1,7 +1,7 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */ -/* +/* * - * Copyright (C) 2004 University of Chicago. + * Copyright (C) 2004 University of Chicago. * See COPYRIGHT notice in top-level directory. */ @@ -43,7 +43,7 @@ extern int ADIOI_Ftable_max; MPI_File MPIO_File_f2c(MPI_Fint fh) { #ifndef INT_LT_POINTER - return (MPI_File) ((void *) fh); + return (MPI_File) ((void *) fh); /* the extra cast is to get rid of a compiler warning on Exemplar. The warning is because MPI_File points to a structure containing longlongs, which may be 8-byte aligned. But MPI_Fint itself @@ -71,15 +71,15 @@ MPI_Fint MPIO_File_c2f(MPI_File fh) if (!ADIOI_Ftable) { ADIOI_Ftable_max = 1024; ADIOI_Ftable = (MPI_File *) - ADIOI_Malloc(ADIOI_Ftable_max*sizeof(MPI_File)); - ADIOI_Ftable_ptr = 0; /* 0 can't be used though, because + ADIOI_Malloc(ADIOI_Ftable_max*sizeof(MPI_File)); + ADIOI_Ftable_ptr = 0; /* 0 can't be used though, because MPI_FILE_NULL=0 */ for (i=0; icookie != ADIOI_REQ_COOKIE)) { error_code = MPIO_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE, diff --git a/ompi/mca/io/romio314/romio/mpi-io/iotestall.c b/ompi/mca/io/romio314/romio/mpi-io/iotestall.c index d88aeb77667..c6db21802f3 100644 --- a/ompi/mca/io/romio314/romio/mpi-io/iotestall.c +++ b/ompi/mca/io/romio314/romio/mpi-io/iotestall.c @@ -1,7 +1,7 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */ -/* +/* * - * Copyright (C) 2003 University of Chicago. + * Copyright (C) 2003 University of Chicago. * See COPYRIGHT notice in top-level directory. */ @@ -31,7 +31,7 @@ int MPIO_Testall(int count, MPIO_Request requests[], int *flag, MPI_Status statuses[]) { - int done, i, err; + int done, i, err; MPIU_THREADPRIV_DECL; MPIU_THREAD_CS_ENTER(ALLFUNC,); @@ -40,10 +40,10 @@ int MPIO_Testall(int count, MPIO_Request requests[], int *flag, goto fn_exit; } - /* This is actually very difficult to do. We can't use MPIO_Test, + /* This is actually very difficult to do. We can't use MPIO_Test, since we must change the requests only if *ALL* requests are complete */ - /* FIXME: THIS IS NOT CORRECT (see above). But most applications won't + /* FIXME: THIS IS NOT CORRECT (see above). But most applications won't care */ done = 1; for (i=0; iatomicity)) ADIO_IreadContig(adio_fh, buf, count, datatype, file_ptr_type, - off, request, &error_code); + off, request, &error_code); else { /* to maintain strict atomicity semantics with other concurrent operations, lock (exclusive) and call blocking routine */ @@ -151,7 +151,7 @@ int MPIOI_File_iread(MPI_File fh, MPI_Offset offset, int file_ptr_type, void *bu } } else ADIO_IreadStrided(adio_fh, buf, count, datatype, file_ptr_type, - offset, request, &error_code); + offset, request, &error_code); fn_exit: MPIU_THREAD_CS_EXIT(ALLFUNC,); diff --git a/ompi/mca/io/romio314/romio/mpi-io/iread_at.c b/ompi/mca/io/romio314/romio/mpi-io/iread_at.c index 29bda8462f1..29fd7cf31c2 100644 --- a/ompi/mca/io/romio314/romio/mpi-io/iread_at.c +++ b/ompi/mca/io/romio314/romio/mpi-io/iread_at.c @@ -1,7 +1,7 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */ -/* +/* * - * Copyright (C) 1997 University of Chicago. + * Copyright (C) 1997 University of Chicago. * See COPYRIGHT notice in top-level directory. */ @@ -45,7 +45,7 @@ Output Parameters: .N fortran @*/ -int MPI_File_iread_at(MPI_File fh, MPI_Offset offset, void *buf, int count, MPI_Datatype datatype, +int MPI_File_iread_at(MPI_File fh, MPI_Offset offset, void *buf, int count, MPI_Datatype datatype, MPIO_Request *request) { int error_code; diff --git a/ompi/mca/io/romio314/romio/mpi-io/iread_sh.c b/ompi/mca/io/romio314/romio/mpi-io/iread_sh.c index b6713463fb1..ca1bc17a7cd 100644 --- a/ompi/mca/io/romio314/romio/mpi-io/iread_sh.c +++ b/ompi/mca/io/romio314/romio/mpi-io/iread_sh.c @@ -1,7 +1,7 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */ -/* +/* * - * Copyright (C) 1997 University of Chicago. + * Copyright (C) 1997 University of Chicago. * See COPYRIGHT notice in top-level directory. */ @@ -110,7 +110,7 @@ int MPI_File_iread_shared(MPI_File fh, void *buf, int count, } ADIO_ReadContig(adio_fh, buf, count, datatype, ADIO_EXPLICIT_OFFSET, - off, &status, &error_code); + off, &status, &error_code); if (adio_fh->file_system != ADIO_NFS) { diff --git a/ompi/mca/io/romio314/romio/mpi-io/iwrite.c b/ompi/mca/io/romio314/romio/mpi-io/iwrite.c index cd4f69fbc50..fc8dd44544e 100644 --- a/ompi/mca/io/romio314/romio/mpi-io/iwrite.c +++ b/ompi/mca/io/romio314/romio/mpi-io/iwrite.c @@ -1,7 +1,7 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */ -/* +/* * - * Copyright (C) 1997 University of Chicago. + * Copyright (C) 1997 University of Chicago. * See COPYRIGHT notice in top-level directory. */ @@ -118,7 +118,7 @@ int MPIOI_File_iwrite(MPI_File fh, ADIOI_Datatype_iscontig(datatype, &buftype_is_contig); ADIOI_Datatype_iscontig(adio_fh->filetype, &filetype_is_contig); - + ADIOI_TEST_DEFERRED(adio_fh, myname, &error_code); if (buftype_is_contig && filetype_is_contig) { @@ -144,7 +144,7 @@ int MPIOI_File_iwrite(MPI_File fh, } ADIO_WriteContig(adio_fh, buf, count, datatype, file_ptr_type, off, - &status, &error_code); + &status, &error_code); if (ADIO_Feature(adio_fh, ADIO_LOCKS) ) { @@ -153,7 +153,7 @@ int MPIOI_File_iwrite(MPI_File fh, if (error_code == MPI_SUCCESS) { nbytes = count * datatype_size; } - + MPIO_Completed_request_create(&adio_fh, nbytes, &error_code, request); } } diff --git a/ompi/mca/io/romio314/romio/mpi-io/iwrite_at.c b/ompi/mca/io/romio314/romio/mpi-io/iwrite_at.c index 10862bd0334..ee536754239 100644 --- a/ompi/mca/io/romio314/romio/mpi-io/iwrite_at.c +++ b/ompi/mca/io/romio314/romio/mpi-io/iwrite_at.c @@ -1,7 +1,7 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */ -/* +/* * - * Copyright (C) 1997 University of Chicago. + * Copyright (C) 1997 University of Chicago. * See COPYRIGHT notice in top-level directory. */ @@ -47,7 +47,7 @@ Output Parameters: #endif int MPI_File_iwrite_at(MPI_File fh, MPI_Offset offset, ROMIO_CONST void *buf, - int count, MPI_Datatype datatype, + int count, MPI_Datatype datatype, MPIO_Request *request) { int error_code; diff --git a/ompi/mca/io/romio314/romio/mpi-io/iwrite_sh.c b/ompi/mca/io/romio314/romio/mpi-io/iwrite_sh.c index c436221c14f..458202d5ac2 100644 --- a/ompi/mca/io/romio314/romio/mpi-io/iwrite_sh.c +++ b/ompi/mca/io/romio314/romio/mpi-io/iwrite_sh.c @@ -1,7 +1,7 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */ -/* +/* * - * Copyright (C) 1997 University of Chicago. + * Copyright (C) 1997 University of Chicago. * See COPYRIGHT notice in top-level directory. */ @@ -92,7 +92,7 @@ int MPI_File_iwrite_shared(MPI_File fh, ROMIO_CONST void *buf, int count, off = adio_fh->disp + adio_fh->etype_size * shared_fp; if (!(adio_fh->atomicity)) ADIO_IwriteContig(adio_fh, buf, count, datatype, ADIO_EXPLICIT_OFFSET, - off, request, &error_code); + off, request, &error_code); else { /* to maintain strict atomicity semantics with other concurrent operations, lock (exclusive) and call blocking routine */ @@ -101,7 +101,7 @@ int MPI_File_iwrite_shared(MPI_File fh, ROMIO_CONST void *buf, int count, ADIOI_WRITE_LOCK(adio_fh, off, SEEK_SET, bufsize); ADIO_WriteContig(adio_fh, buf, count, datatype, ADIO_EXPLICIT_OFFSET, - off, &status, &error_code); + off, &status, &error_code); if (adio_fh->file_system != ADIO_NFS) ADIOI_UNLOCK(adio_fh, off, SEEK_SET, bufsize); @@ -111,7 +111,7 @@ int MPI_File_iwrite_shared(MPI_File fh, ROMIO_CONST void *buf, int count, } else ADIO_IwriteStrided(adio_fh, buf, count, datatype, ADIO_EXPLICIT_OFFSET, - shared_fp, request, &error_code); + shared_fp, request, &error_code); fn_exit: MPIU_THREAD_CS_EXIT(ALLFUNC,); diff --git a/ompi/mca/io/romio314/romio/mpi-io/mpich_fileutil.c b/ompi/mca/io/romio314/romio/mpi-io/mpich_fileutil.c index b853b588fa7..7f35430c704 100644 --- a/ompi/mca/io/romio314/romio/mpi-io/mpich_fileutil.c +++ b/ompi/mca/io/romio314/romio/mpi-io/mpich_fileutil.c @@ -1,5 +1,5 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */ -/* +/* * (C) 2001 by Argonne National Laboratory. * See COPYRIGHT in top-level directory. */ @@ -14,13 +14,13 @@ */ int MPIR_ROMIO_Get_file_errhand( MPI_File, MPI_Errhandler * ); int MPIR_ROMIO_Set_file_errhand( MPI_File, MPI_Errhandler ); -void MPIR_Get_file_error_routine( MPI_Errhandler, - void (**)(MPI_File *, int *, ...), +void MPIR_Get_file_error_routine( MPI_Errhandler, + void (**)(MPI_File *, int *, ...), int * ); /* These next two routines are used to allow MPICH to access/set the error handers in the MPI_File structure until MPICH knows about the - file structure, and to handle the errhandler structure, which + file structure, and to handle the errhandler structure, which includes a reference count. Not currently used. */ int MPIR_ROMIO_Set_file_errhand( MPI_File file_ptr, MPI_Errhandler e ) { @@ -30,7 +30,7 @@ int MPIR_ROMIO_Set_file_errhand( MPI_File file_ptr, MPI_Errhandler e ) return MPI_ERR_FILE; } /* --END ERROR HANDLING-- */ - else + else file_ptr->err_handler = e; return 0; } @@ -49,7 +49,7 @@ int MPIR_ROMIO_Get_file_errhand( MPI_File file_ptr, MPI_Errhandler *e ) } /* --END ERROR HANDLING-- */ else { - if (file_ptr->err_handler == MPI_ERRORS_RETURN) + if (file_ptr->err_handler == MPI_ERRORS_RETURN) *e = 0; else *e = file_ptr->err_handler; diff --git a/ompi/mca/io/romio314/romio/mpi-io/mpioimpl.h b/ompi/mca/io/romio314/romio/mpi-io/mpioimpl.h index 8f67bcea70c..a73561acb2b 100644 --- a/ompi/mca/io/romio314/romio/mpi-io/mpioimpl.h +++ b/ompi/mca/io/romio314/romio/mpi-io/mpioimpl.h @@ -1,13 +1,13 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */ -/* +/* * - * Copyright (C) 1997 University of Chicago. + * Copyright (C) 1997 University of Chicago. * See COPYRIGHT notice in top-level directory. */ /* header file for MPI-IO implementation. not intended to be - user-visible */ + user-visible */ #ifndef MPIOIMPL_INCLUDE #define MPIOIMPL_INCLUDE @@ -31,8 +31,8 @@ #else /* not ROMIO_INSIDE_MPICH */ /* Any MPI implementation that wishes to follow the thread-safety and - error reporting features provided by MPICH must implement these - four functions. Defining these as empty should not change the behavior + error reporting features provided by MPICH must implement these + four functions. Defining these as empty should not change the behavior of correct programs */ #define MPIU_THREAD_CS_ENTER(x,y) #define MPIU_THREAD_CS_EXIT(x,y) diff --git a/ompi/mca/io/romio314/romio/mpi-io/mpioprof.h b/ompi/mca/io/romio314/romio/mpi-io/mpioprof.h index 108fd6441ed..672642028b5 100644 --- a/ompi/mca/io/romio314/romio/mpi-io/mpioprof.h +++ b/ompi/mca/io/romio314/romio/mpi-io/mpioprof.h @@ -1,12 +1,12 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */ -/* +/* * - * Copyright (C) 1997 University of Chicago. + * Copyright (C) 1997 University of Chicago. * See COPYRIGHT notice in top-level directory. */ -/* - This header file converts all MPI_ names into PMPI_ names, for +/* + This header file converts all MPI_ names into PMPI_ names, for building the profiling interface */ @@ -63,7 +63,7 @@ #undef MPI_File_read #define MPI_File_read PMPI_File_read #undef MPI_File_read_all -#define MPI_File_read_all PMPI_File_read_all +#define MPI_File_read_all PMPI_File_read_all #undef MPI_File_write #define MPI_File_write PMPI_File_write #undef MPI_File_write_all diff --git a/ompi/mca/io/romio314/romio/mpi-io/mpir-mpioinit.c b/ompi/mca/io/romio314/romio/mpi-io/mpir-mpioinit.c index be78b45e989..914b8d53890 100644 --- a/ompi/mca/io/romio314/romio/mpi-io/mpir-mpioinit.c +++ b/ompi/mca/io/romio314/romio/mpi-io/mpir-mpioinit.c @@ -28,8 +28,8 @@ void MPIR_MPIOInit(int * error_code) { /* --BEGIN ERROR HANDLING-- */ if (!flag) { - *error_code = MPIO_Err_create_code(MPI_SUCCESS, - MPIR_ERR_RECOVERABLE, myname, __LINE__, + *error_code = MPIO_Err_create_code(MPI_SUCCESS, + MPIR_ERR_RECOVERABLE, myname, __LINE__, MPI_ERR_OTHER, "**initialized", 0); *error_code = MPIO_Err_return_file(MPI_FILE_NULL, *error_code); return; @@ -37,7 +37,7 @@ void MPIR_MPIOInit(int * error_code) { /* --END ERROR HANDLING-- */ MPI_Keyval_create(MPI_NULL_COPY_FN, ADIOI_End_call, &ADIO_Init_keyval, - (void *) 0); + (void *) 0); /* put a dummy attribute on MPI_COMM_SELF, because we want the delete function to be called when MPI_COMM_SELF is freed. Clarified @@ -51,6 +51,6 @@ void MPIR_MPIOInit(int * error_code) { } *error_code = MPI_SUCCESS; } -/* - * vim: ts=8 sts=4 sw=4 noexpandtab +/* + * vim: ts=8 sts=4 sw=4 noexpandtab */ diff --git a/ompi/mca/io/romio314/romio/mpi-io/mpiu_external32.c b/ompi/mca/io/romio314/romio/mpi-io/mpiu_external32.c index ce2253bb4bd..39b4949f582 100644 --- a/ompi/mca/io/romio314/romio/mpi-io/mpiu_external32.c +++ b/ompi/mca/io/romio314/romio/mpi-io/mpiu_external32.c @@ -41,7 +41,7 @@ int MPIU_write_external32_conversion_fn (const void *userbuf, MPI_Datatype datat else { void *tmp_buf = NULL; - tmp_buf = ADIOI_Malloc(bytes); + tmp_buf = ADIOI_Malloc(bytes); if (!tmp_buf) { mpi_errno = MPI_ERR_NO_MEM; @@ -99,7 +99,7 @@ int MPIU_read_external32_conversion_fn(void *userbuf, MPI_Datatype datatype, else { void *tmp_buf = NULL; - tmp_buf = ADIOI_Malloc(bytes); + tmp_buf = ADIOI_Malloc(bytes); if (!tmp_buf) { mpi_errno = MPI_ERR_NO_MEM; @@ -167,6 +167,6 @@ int MPIU_external32_buffer_setup(const void * buf, int count, MPI_Datatype type, } -/* - * vim: ts=8 sts=4 sw=4 noexpandtab +/* + * vim: ts=8 sts=4 sw=4 noexpandtab */ diff --git a/ompi/mca/io/romio314/romio/mpi-io/mpiu_greq.c b/ompi/mca/io/romio314/romio/mpi-io/mpiu_greq.c index cb6e9d2ed49..77589e0ea48 100644 --- a/ompi/mca/io/romio314/romio/mpi-io/mpiu_greq.c +++ b/ompi/mca/io/romio314/romio/mpi-io/mpiu_greq.c @@ -36,7 +36,7 @@ int MPIU_Greq_free_fn(void *extra_state) { /* frees the memory allocated in MPIO_Completed_request_create */ ADIOI_Free(extra_state); - + return MPI_SUCCESS; } int MPIU_Greq_cancel_fn(void *extra_state, int complete) diff --git a/ompi/mca/io/romio314/romio/mpi-io/open.c b/ompi/mca/io/romio314/romio/mpi-io/open.c index a2a68c95f22..4174dad10ec 100644 --- a/ompi/mca/io/romio314/romio/mpi-io/open.c +++ b/ompi/mca/io/romio314/romio/mpi-io/open.c @@ -1,7 +1,7 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */ -/* +/* * - * Copyright (C) 1997 University of Chicago. + * Copyright (C) 1997 University of Chicago. * See COPYRIGHT notice in top-level directory. */ @@ -71,7 +71,7 @@ int MPI_File_open(MPI_Comm comm, ROMIO_CONST char *filename, int amode, if (error_code || flag) { error_code = MPIO_Err_create_code(error_code, MPIR_ERR_RECOVERABLE, - myname, __LINE__, MPI_ERR_COMM, + myname, __LINE__, MPI_ERR_COMM, "**commnotintra", 0); goto fn_fail; } @@ -80,16 +80,16 @@ int MPI_File_open(MPI_Comm comm, ROMIO_CONST char *filename, int amode, ((amode&MPI_MODE_WRONLY)?1:0) != 1 ) { error_code = MPIO_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE, - myname, __LINE__, MPI_ERR_AMODE, + myname, __LINE__, MPI_ERR_AMODE, "**fileamodeone", 0); goto fn_fail; } - if ((amode & MPI_MODE_RDONLY) && + if ((amode & MPI_MODE_RDONLY) && ((amode & MPI_MODE_CREATE) || (amode & MPI_MODE_EXCL))) { error_code = MPIO_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE, - myname, __LINE__, MPI_ERR_AMODE, + myname, __LINE__, MPI_ERR_AMODE, "**fileamoderead", 0); goto fn_fail; } @@ -97,7 +97,7 @@ int MPI_File_open(MPI_Comm comm, ROMIO_CONST char *filename, int amode, if ((amode & MPI_MODE_RDWR) && (amode & MPI_MODE_SEQUENTIAL)) { error_code = MPIO_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE, - myname, __LINE__, MPI_ERR_AMODE, + myname, __LINE__, MPI_ERR_AMODE, "**fileamodeseq", 0); goto fn_fail; } @@ -132,7 +132,7 @@ int MPI_File_open(MPI_Comm comm, ROMIO_CONST char *filename, int amode, if (error_code != MPI_SUCCESS) { /* ADIO_ResolveFileType() will print as informative a message as it - * possibly can or call MPIO_Err_setmsg. We just need to propagate + * possibly can or call MPIO_Err_setmsg. We just need to propagate * the error up. */ goto fn_fail; @@ -149,7 +149,7 @@ int MPI_File_open(MPI_Comm comm, ROMIO_CONST char *filename, int amode, filename = tmp + 1; } -/* use default values for disp, etype, filetype */ +/* use default values for disp, etype, filetype */ *fh = ADIO_Open(comm, dupcomm, filename, file_system, fsops, amode, 0, MPI_BYTE, MPI_BYTE, info, ADIO_PERM_NULL, &error_code); @@ -163,10 +163,10 @@ int MPI_File_open(MPI_Comm comm, ROMIO_CONST char *filename, int amode, /* if MPI_MODE_SEQUENTIAL requested, file systems cannot do explicit offset * or independent file pointer accesses, leaving not much else aside from * shared file pointer accesses. */ - if ( !ADIO_Feature((*fh), ADIO_SHARED_FP) && (amode & MPI_MODE_SEQUENTIAL)) + if ( !ADIO_Feature((*fh), ADIO_SHARED_FP) && (amode & MPI_MODE_SEQUENTIAL)) { - error_code = MPIO_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE, - myname, __LINE__, + error_code = MPIO_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE, + myname, __LINE__, MPI_ERR_UNSUPPORTED_OPERATION, "**iosequnsupported", 0); ADIO_Close(*fh, &error_code); @@ -176,7 +176,7 @@ int MPI_File_open(MPI_Comm comm, ROMIO_CONST char *filename, int amode, /* determine name of file that will hold the shared file pointer */ /* can't support shared file pointers on a file system that doesn't support file locking. */ - if ((error_code == MPI_SUCCESS) && + if ((error_code == MPI_SUCCESS) && ADIO_Feature((*fh), ADIO_SHARED_FP)) { MPI_Comm_rank(dupcomm, &rank); ADIOI_Shfp_fname(*fh, rank, &error_code); @@ -184,7 +184,7 @@ int MPI_File_open(MPI_Comm comm, ROMIO_CONST char *filename, int amode, goto fn_fail; /* if MPI_MODE_APPEND, set the shared file pointer to end of file. - indiv. file pointer already set to end of file in ADIO_Open. + indiv. file pointer already set to end of file in ADIO_Open. Here file view is just bytes. */ if ((*fh)->access_mode & MPI_MODE_APPEND) { if (rank == (*fh)->hints->ranklist[0]) /* only one person need set the sharedfp */ diff --git a/ompi/mca/io/romio314/romio/mpi-io/prealloc.c b/ompi/mca/io/romio314/romio/mpi-io/prealloc.c index 3853f8da662..e1cf7f91fd4 100644 --- a/ompi/mca/io/romio314/romio/mpi-io/prealloc.c +++ b/ompi/mca/io/romio314/romio/mpi-io/prealloc.c @@ -1,7 +1,7 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */ -/* +/* * - * Copyright (C) 1997 University of Chicago. + * Copyright (C) 1997 University of Chicago. * See COPYRIGHT notice in top-level directory. */ @@ -92,7 +92,7 @@ int MPI_File_preallocate(MPI_File fh, MPI_Offset size) /* --END ERROR HANDLING-- */ } MPI_Barrier(adio_fh->comm); - + #ifdef MPI_hpux HPMP_IO_END(fl_xmpi, adio_fh, MPI_DATATYPE_NULL, -1); #endif /* MPI_hpux */ diff --git a/ompi/mca/io/romio314/romio/mpi-io/rd_atallb.c b/ompi/mca/io/romio314/romio/mpi-io/rd_atallb.c index a7120b3da21..c6cc5e5cd5e 100644 --- a/ompi/mca/io/romio314/romio/mpi-io/rd_atallb.c +++ b/ompi/mca/io/romio314/romio/mpi-io/rd_atallb.c @@ -1,7 +1,7 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */ -/* +/* * - * Copyright (C) 1997 University of Chicago. + * Copyright (C) 1997 University of Chicago. * See COPYRIGHT notice in top-level directory. */ diff --git a/ompi/mca/io/romio314/romio/mpi-io/rd_atalle.c b/ompi/mca/io/romio314/romio/mpi-io/rd_atalle.c index 207d5ba3698..69eafa21b73 100644 --- a/ompi/mca/io/romio314/romio/mpi-io/rd_atalle.c +++ b/ompi/mca/io/romio314/romio/mpi-io/rd_atalle.c @@ -1,7 +1,7 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */ -/* +/* * - * Copyright (C) 1997 University of Chicago. + * Copyright (C) 1997 University of Chicago. * See COPYRIGHT notice in top-level directory. */ diff --git a/ompi/mca/io/romio314/romio/mpi-io/read.c b/ompi/mca/io/romio314/romio/mpi-io/read.c index ff109bd4f68..cff5a323179 100644 --- a/ompi/mca/io/romio314/romio/mpi-io/read.c +++ b/ompi/mca/io/romio314/romio/mpi-io/read.c @@ -1,6 +1,6 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */ -/* - * Copyright (C) 1997 University of Chicago. +/* + * Copyright (C) 1997 University of Chicago. * See COPYRIGHT notice in top-level directory. */ @@ -155,7 +155,7 @@ int MPIOI_File_read(MPI_File fh, } ADIO_ReadContig(adio_fh, xbuf, count, datatype, file_ptr_type, - off, status, &error_code); + off, status, &error_code); if ((adio_fh->atomicity) && ADIO_Feature(adio_fh, ADIO_LOCKS)) { ADIOI_UNLOCK(adio_fh, off, SEEK_SET, bufsize); diff --git a/ompi/mca/io/romio314/romio/mpi-io/read_all.c b/ompi/mca/io/romio314/romio/mpi-io/read_all.c index 72ea518e199..0761e454f83 100644 --- a/ompi/mca/io/romio314/romio/mpi-io/read_all.c +++ b/ompi/mca/io/romio314/romio/mpi-io/read_all.c @@ -1,6 +1,6 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */ -/* - * Copyright (C) 1997 University of Chicago. +/* + * Copyright (C) 1997 University of Chicago. * See COPYRIGHT notice in top-level directory. */ diff --git a/ompi/mca/io/romio314/romio/mpi-io/read_allb.c b/ompi/mca/io/romio314/romio/mpi-io/read_allb.c index 3f0fd90728d..3bc0304f06f 100644 --- a/ompi/mca/io/romio314/romio/mpi-io/read_allb.c +++ b/ompi/mca/io/romio314/romio/mpi-io/read_allb.c @@ -1,7 +1,7 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */ -/* +/* * - * Copyright (C) 1997 University of Chicago. + * Copyright (C) 1997 University of Chicago. * See COPYRIGHT notice in top-level directory. */ @@ -85,7 +85,7 @@ int MPIOI_File_read_all_begin(MPI_File fh, goto fn_exit; } /* --END ERROR HANDLING-- */ - + MPI_Type_size_x(datatype, &datatype_size); /* --BEGIN ERROR HANDLING-- */ @@ -95,7 +95,7 @@ int MPIOI_File_read_all_begin(MPI_File fh, if (adio_fh->split_coll_count) { error_code = MPIO_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE, - myname, __LINE__, MPI_ERR_IO, + myname, __LINE__, MPI_ERR_IO, "**iosplitcoll", 0); error_code = MPIO_Err_return_file(adio_fh, error_code); goto fn_exit; diff --git a/ompi/mca/io/romio314/romio/mpi-io/read_alle.c b/ompi/mca/io/romio314/romio/mpi-io/read_alle.c index a86c465b123..823f815da52 100644 --- a/ompi/mca/io/romio314/romio/mpi-io/read_alle.c +++ b/ompi/mca/io/romio314/romio/mpi-io/read_alle.c @@ -1,7 +1,7 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */ -/* +/* * - * Copyright (C) 1997 University of Chicago. + * Copyright (C) 1997 University of Chicago. * See COPYRIGHT notice in top-level directory. */ @@ -69,7 +69,7 @@ int MPIOI_File_read_all_end(MPI_File fh, if (!(adio_fh->split_coll_count)) { error_code = MPIO_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE, - myname, __LINE__, MPI_ERR_IO, + myname, __LINE__, MPI_ERR_IO, "**iosplitcollnone", 0); error_code = MPIO_Err_return_file(adio_fh, error_code); goto fn_exit; diff --git a/ompi/mca/io/romio314/romio/mpi-io/read_at.c b/ompi/mca/io/romio314/romio/mpi-io/read_at.c index 53dc8c42e3a..732741ba536 100644 --- a/ompi/mca/io/romio314/romio/mpi-io/read_at.c +++ b/ompi/mca/io/romio314/romio/mpi-io/read_at.c @@ -1,6 +1,6 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */ -/* - * Copyright (C) 1997 University of Chicago. +/* + * Copyright (C) 1997 University of Chicago. * See COPYRIGHT notice in top-level directory. */ diff --git a/ompi/mca/io/romio314/romio/mpi-io/read_atall.c b/ompi/mca/io/romio314/romio/mpi-io/read_atall.c index 2bed66be4d6..17efe4f2ae5 100644 --- a/ompi/mca/io/romio314/romio/mpi-io/read_atall.c +++ b/ompi/mca/io/romio314/romio/mpi-io/read_atall.c @@ -1,7 +1,7 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */ -/* +/* * - * Copyright (C) 1997 University of Chicago. + * Copyright (C) 1997 University of Chicago. * See COPYRIGHT notice in top-level directory. */ @@ -45,7 +45,7 @@ Output Parameters: .N fortran @*/ int MPI_File_read_at_all(MPI_File fh, MPI_Offset offset, void *buf, - int count, MPI_Datatype datatype, + int count, MPI_Datatype datatype, MPI_Status *status) { int error_code; diff --git a/ompi/mca/io/romio314/romio/mpi-io/read_ord.c b/ompi/mca/io/romio314/romio/mpi-io/read_ord.c index 5bfe77ce390..9bcc5981104 100644 --- a/ompi/mca/io/romio314/romio/mpi-io/read_ord.c +++ b/ompi/mca/io/romio314/romio/mpi-io/read_ord.c @@ -1,7 +1,7 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */ -/* +/* * - * Copyright (C) 1997 University of Chicago. + * Copyright (C) 1997 University of Chicago. * See COPYRIGHT notice in top-level directory. */ @@ -77,7 +77,7 @@ int MPI_File_read_ordered(MPI_File fh, void *buf, int count, MPI_Comm_rank(adio_fh->comm, &myrank); incr = (count*datatype_size)/adio_fh->etype_size; - + /* Use a message as a 'token' to order the operations */ source = myrank - 1; dest = myrank + 1; diff --git a/ompi/mca/io/romio314/romio/mpi-io/read_ordb.c b/ompi/mca/io/romio314/romio/mpi-io/read_ordb.c index 32971f12133..f3019ed766f 100644 --- a/ompi/mca/io/romio314/romio/mpi-io/read_ordb.c +++ b/ompi/mca/io/romio314/romio/mpi-io/read_ordb.c @@ -1,7 +1,7 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */ -/* +/* * - * Copyright (C) 1997 University of Chicago. + * Copyright (C) 1997 University of Chicago. * See COPYRIGHT notice in top-level directory. */ @@ -62,7 +62,7 @@ int MPI_File_read_ordered_begin(MPI_File fh, void *buf, int count, if (adio_fh->split_coll_count) { error_code = MPIO_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE, - myname, __LINE__, MPI_ERR_IO, + myname, __LINE__, MPI_ERR_IO, "**iosplitcoll", 0); error_code = MPIO_Err_return_file(adio_fh, error_code); goto fn_exit; diff --git a/ompi/mca/io/romio314/romio/mpi-io/read_orde.c b/ompi/mca/io/romio314/romio/mpi-io/read_orde.c index 5bf853ed7ca..88a5a88e64e 100644 --- a/ompi/mca/io/romio314/romio/mpi-io/read_orde.c +++ b/ompi/mca/io/romio314/romio/mpi-io/read_orde.c @@ -1,7 +1,7 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */ -/* +/* * - * Copyright (C) 1997 University of Chicago. + * Copyright (C) 1997 University of Chicago. * See COPYRIGHT notice in top-level directory. */ @@ -55,7 +55,7 @@ int MPI_File_read_ordered_end(MPI_File fh, void *buf, MPI_Status *status) if (!(adio_fh->split_coll_count)) { error_code = MPIO_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE, - myname, __LINE__, MPI_ERR_IO, + myname, __LINE__, MPI_ERR_IO, "**iosplitcollnone", 0); error_code = MPIO_Err_return_file(adio_fh, error_code); goto fn_exit; diff --git a/ompi/mca/io/romio314/romio/mpi-io/read_sh.c b/ompi/mca/io/romio314/romio/mpi-io/read_sh.c index 8f2ed6d114f..288593261d9 100644 --- a/ompi/mca/io/romio314/romio/mpi-io/read_sh.c +++ b/ompi/mca/io/romio314/romio/mpi-io/read_sh.c @@ -1,7 +1,7 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */ -/* +/* * - * Copyright (C) 1997 University of Chicago. + * Copyright (C) 1997 University of Chicago. * See COPYRIGHT notice in top-level directory. */ @@ -119,14 +119,14 @@ int MPI_File_read_shared(MPI_File fh, void *buf, int count, off = adio_fh->disp + adio_fh->etype_size * shared_fp; /* if atomic mode requested, lock (exclusive) the region, because there - could be a concurrent noncontiguous request. On NFS, locking + could be a concurrent noncontiguous request. On NFS, locking is done in the ADIO_ReadContig.*/ if ((adio_fh->atomicity) && (adio_fh->file_system != ADIO_NFS)) ADIOI_WRITE_LOCK(adio_fh, off, SEEK_SET, bufsize); ADIO_ReadContig(adio_fh, xbuf, count, datatype, ADIO_EXPLICIT_OFFSET, - off, status, &error_code); + off, status, &error_code); if ((adio_fh->atomicity) && (adio_fh->file_system != ADIO_NFS)) ADIOI_UNLOCK(adio_fh, off, SEEK_SET, bufsize); diff --git a/ompi/mca/io/romio314/romio/mpi-io/register_datarep.c b/ompi/mca/io/romio314/romio/mpi-io/register_datarep.c index b6f1d68e9da..a0a4df14f68 100644 --- a/ompi/mca/io/romio314/romio/mpi-io/register_datarep.c +++ b/ompi/mca/io/romio314/romio/mpi-io/register_datarep.c @@ -1,6 +1,6 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */ -/* - * Copyright (C) 1997 University of Chicago. +/* + * Copyright (C) 1997 University of Chicago. * See COPYRIGHT notice in top-level directory. */ @@ -28,7 +28,7 @@ int MPI_Register_datarep(const char *datarep, MPI_Datarep_conversion_function *r #endif /*@ - MPI_Register_datarep - Register functions for user-defined data + MPI_Register_datarep - Register functions for user-defined data representations Input Parameters: @@ -45,12 +45,12 @@ Input Parameters: Notes: This function allows the user to provide routines to convert data from an external representation, used within a file, and the native representation, - used within the CPU. There is one predefined data representation, + used within the CPU. There is one predefined data representation, 'external32'. Please consult the MPI-2 standard for details on this function. .N fortran - + @*/ int MPI_Register_datarep(ROMIO_CONST char *datarep, MPI_Datarep_conversion_function *read_conversion_fn, diff --git a/ompi/mca/io/romio314/romio/mpi-io/seek.c b/ompi/mca/io/romio314/romio/mpi-io/seek.c index c4ed20f2232..a9e92569ced 100644 --- a/ompi/mca/io/romio314/romio/mpi-io/seek.c +++ b/ompi/mca/io/romio314/romio/mpi-io/seek.c @@ -1,7 +1,7 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */ -/* +/* * - * Copyright (C) 1997 University of Chicago. + * Copyright (C) 1997 University of Chicago. * See COPYRIGHT notice in top-level directory. */ diff --git a/ompi/mca/io/romio314/romio/mpi-io/seek_sh.c b/ompi/mca/io/romio314/romio/mpi-io/seek_sh.c index 772852c9d07..17320da8664 100644 --- a/ompi/mca/io/romio314/romio/mpi-io/seek_sh.c +++ b/ompi/mca/io/romio314/romio/mpi-io/seek_sh.c @@ -1,6 +1,6 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */ -/* - * Copyright (C) 1997 University of Chicago. +/* + * Copyright (C) 1997 University of Chicago. * See COPYRIGHT notice in top-level directory. */ @@ -108,7 +108,7 @@ int MPI_File_seek_shared(MPI_File fh, MPI_Offset offset, int whence) error_code = MPIO_Err_create_code(MPI_SUCCESS, MPIR_ERR_FATAL, myname, __LINE__, - MPI_ERR_INTERN, + MPI_ERR_INTERN, "**iosharedfailed", 0); error_code = MPIO_Err_return_file(adio_fh, error_code); goto fn_exit; @@ -163,7 +163,7 @@ int MPI_File_seek_shared(MPI_File fh, MPI_Offset offset, int whence) error_code = MPIO_Err_create_code(MPI_SUCCESS, MPIR_ERR_FATAL, myname, __LINE__, - MPI_ERR_INTERN, + MPI_ERR_INTERN, "**iosharedfailed", 0); error_code = MPIO_Err_return_file(adio_fh, error_code); goto fn_exit; diff --git a/ompi/mca/io/romio314/romio/mpi-io/set_atom.c b/ompi/mca/io/romio314/romio/mpi-io/set_atom.c index 20121167041..246b5d7feda 100644 --- a/ompi/mca/io/romio314/romio/mpi-io/set_atom.c +++ b/ompi/mca/io/romio314/romio/mpi-io/set_atom.c @@ -1,7 +1,7 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */ -/* +/* * - * Copyright (C) 1997 University of Chicago. + * Copyright (C) 1997 University of Chicago. * See COPYRIGHT notice in top-level directory. */ @@ -60,7 +60,7 @@ int MPI_File_set_atomicity(MPI_File fh, int flag) /* --BEGIN ERROR HANDLING-- */ if (tmp_flag != flag) { error_code = MPIO_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE, - myname, __LINE__, MPI_ERR_ARG, + myname, __LINE__, MPI_ERR_ARG, "**notsame", 0); error_code = MPIO_Err_return_file(adio_fh, error_code); goto fn_exit; diff --git a/ompi/mca/io/romio314/romio/mpi-io/set_errh.c b/ompi/mca/io/romio314/romio/mpi-io/set_errh.c index 793f4804868..a591dde64b1 100644 --- a/ompi/mca/io/romio314/romio/mpi-io/set_errh.c +++ b/ompi/mca/io/romio314/romio/mpi-io/set_errh.c @@ -1,7 +1,7 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */ -/* +/* * - * Copyright (C) 1997 University of Chicago. + * Copyright (C) 1997 University of Chicago. * See COPYRIGHT notice in top-level directory. */ diff --git a/ompi/mca/io/romio314/romio/mpi-io/set_info.c b/ompi/mca/io/romio314/romio/mpi-io/set_info.c index 1d95f2cc306..a57a88f3e2a 100644 --- a/ompi/mca/io/romio314/romio/mpi-io/set_info.c +++ b/ompi/mca/io/romio314/romio/mpi-io/set_info.c @@ -1,7 +1,7 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */ -/* +/* * - * Copyright (C) 1997 University of Chicago. + * Copyright (C) 1997 University of Chicago. * See COPYRIGHT notice in top-level directory. */ diff --git a/ompi/mca/io/romio314/romio/mpi-io/set_size.c b/ompi/mca/io/romio314/romio/mpi-io/set_size.c index 74c3f6cd18c..b76b903062c 100644 --- a/ompi/mca/io/romio314/romio/mpi-io/set_size.c +++ b/ompi/mca/io/romio314/romio/mpi-io/set_size.c @@ -1,7 +1,7 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */ -/* +/* * - * Copyright (C) 1997 University of Chicago. + * Copyright (C) 1997 University of Chicago. * See COPYRIGHT notice in top-level directory. */ @@ -88,7 +88,7 @@ int MPI_File_set_size(MPI_File fh, MPI_Offset size) ADIO_Resize(adio_fh, size, &error_code); /* TODO: what to do with error code? */ - + /* --BEGIN ERROR HANDLING-- */ if (error_code != MPI_SUCCESS) error_code = MPIO_Err_return_file(adio_fh, error_code); diff --git a/ompi/mca/io/romio314/romio/mpi-io/set_view.c b/ompi/mca/io/romio314/romio/mpi-io/set_view.c index 4a820a88e37..ce1dc9b0c34 100644 --- a/ompi/mca/io/romio314/romio/mpi-io/set_view.c +++ b/ompi/mca/io/romio314/romio/mpi-io/set_view.c @@ -1,7 +1,7 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */ -/* +/* * - * Copyright (C) 1997 University of Chicago. + * Copyright (C) 1997 University of Chicago. * See COPYRIGHT notice in top-level directory. */ @@ -58,7 +58,7 @@ int MPI_File_set_view(MPI_File fh, MPI_Offset disp, MPI_Datatype etype, if ((disp < 0) && (disp != MPI_DISPLACEMENT_CURRENT)) { error_code = MPIO_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE, - myname, __LINE__, MPI_ERR_ARG, + myname, __LINE__, MPI_ERR_ARG, "**iobaddisp", 0); error_code = MPIO_Err_return_file(adio_fh, error_code); goto fn_exit; @@ -95,7 +95,7 @@ int MPI_File_set_view(MPI_File fh, MPI_Offset disp, MPI_Datatype etype, (disp != MPI_DISPLACEMENT_CURRENT)) { error_code = MPIO_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE, - myname, __LINE__, MPI_ERR_ARG, + myname, __LINE__, MPI_ERR_ARG, "**iodispifseq", 0); error_code = MPIO_Err_return_file(adio_fh, error_code); goto fn_exit; @@ -105,7 +105,7 @@ int MPI_File_set_view(MPI_File fh, MPI_Offset disp, MPI_Datatype etype, !(adio_fh->access_mode & MPI_MODE_SEQUENTIAL)) { error_code = MPIO_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE, - myname, __LINE__, MPI_ERR_ARG, + myname, __LINE__, MPI_ERR_ARG, "**iodispifseq", 0); error_code = MPIO_Err_return_file(adio_fh, error_code); goto fn_exit; @@ -135,7 +135,7 @@ int MPI_File_set_view(MPI_File fh, MPI_Offset disp, MPI_Datatype etype, { error_code = MPIO_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE, myname, __LINE__, - MPI_ERR_UNSUPPORTED_DATAREP, + MPI_ERR_UNSUPPORTED_DATAREP, "**unsupporteddatarep",0); error_code = MPIO_Err_return_file(adio_fh, error_code); goto fn_exit; @@ -167,11 +167,11 @@ int MPI_File_set_view(MPI_File fh, MPI_Offset disp, MPI_Datatype etype, if (ADIO_Feature(adio_fh, ADIO_SHARED_FP) && (adio_fh->shared_fp_fd != ADIO_FILE_NULL)) { - /* only one process needs to set it to zero, but I don't want to - create the shared-file-pointer file if shared file pointers have - not been used so far. Therefore, every process that has already - opened the shared-file-pointer file sets the shared file pointer - to zero. If the file was not opened, the value is automatically + /* only one process needs to set it to zero, but I don't want to + create the shared-file-pointer file if shared file pointers have + not been used so far. Therefore, every process that has already + opened the shared-file-pointer file sets the shared file pointer + to zero. If the file was not opened, the value is automatically zero. Note that shared file pointer is stored as no. of etypes relative to the current view, whereas indiv. file pointer is stored in bytes. */ diff --git a/ompi/mca/io/romio314/romio/mpi-io/wr_atallb.c b/ompi/mca/io/romio314/romio/mpi-io/wr_atallb.c index 83eb28bc886..18023d73b31 100644 --- a/ompi/mca/io/romio314/romio/mpi-io/wr_atallb.c +++ b/ompi/mca/io/romio314/romio/mpi-io/wr_atallb.c @@ -1,7 +1,7 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */ -/* +/* * - * Copyright (C) 1997 University of Chicago. + * Copyright (C) 1997 University of Chicago. * See COPYRIGHT notice in top-level directory. */ diff --git a/ompi/mca/io/romio314/romio/mpi-io/wr_atalle.c b/ompi/mca/io/romio314/romio/mpi-io/wr_atalle.c index fa2ab514231..cac4931488a 100644 --- a/ompi/mca/io/romio314/romio/mpi-io/wr_atalle.c +++ b/ompi/mca/io/romio314/romio/mpi-io/wr_atalle.c @@ -1,7 +1,7 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */ -/* +/* * - * Copyright (C) 1997 University of Chicago. + * Copyright (C) 1997 University of Chicago. * See COPYRIGHT notice in top-level directory. */ diff --git a/ompi/mca/io/romio314/romio/mpi-io/write.c b/ompi/mca/io/romio314/romio/mpi-io/write.c index 899991d4897..c9f3197aca5 100644 --- a/ompi/mca/io/romio314/romio/mpi-io/write.c +++ b/ompi/mca/io/romio314/romio/mpi-io/write.c @@ -1,6 +1,6 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */ -/* - * Copyright (C) 1997 University of Chicago. +/* + * Copyright (C) 1997 University of Chicago. * See COPYRIGHT notice in top-level directory. */ @@ -72,7 +72,7 @@ int MPIOI_File_write(MPI_File fh, MPI_Datatype datatype, char *myname, MPI_Status *status) -{ +{ int error_code, buftype_is_contig, filetype_is_contig; MPI_Count datatype_size; ADIO_Offset off, bufsize; @@ -128,7 +128,7 @@ int MPIOI_File_write(MPI_File fh, xbuf = buf; if (adio_fh->is_external32) { error_code = MPIU_external32_buffer_setup(buf, count, datatype, &e32buf); - if (error_code != MPI_SUCCESS) + if (error_code != MPI_SUCCESS) goto fn_exit; xbuf = e32buf; @@ -157,7 +157,7 @@ int MPIOI_File_write(MPI_File fh, } ADIO_WriteContig(adio_fh, xbuf, count, datatype, file_ptr_type, - off, status, &error_code); + off, status, &error_code); if ((adio_fh->atomicity) && ADIO_Feature(adio_fh, ADIO_LOCKS)) { diff --git a/ompi/mca/io/romio314/romio/mpi-io/write_all.c b/ompi/mca/io/romio314/romio/mpi-io/write_all.c index ab77a95b61f..d0cd8ed7ecc 100644 --- a/ompi/mca/io/romio314/romio/mpi-io/write_all.c +++ b/ompi/mca/io/romio314/romio/mpi-io/write_all.c @@ -1,7 +1,7 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */ -/* +/* * - * Copyright (C) 1997 University of Chicago. + * Copyright (C) 1997 University of Chicago. * See COPYRIGHT notice in top-level directory. */ @@ -112,7 +112,7 @@ int MPIOI_File_write_all(MPI_File fh, xbuf = buf; if (adio_fh->is_external32) { error_code = MPIU_external32_buffer_setup(buf, count, datatype, &e32buf); - if (error_code != MPI_SUCCESS) + if (error_code != MPI_SUCCESS) goto fn_exit; xbuf = e32buf; diff --git a/ompi/mca/io/romio314/romio/mpi-io/write_allb.c b/ompi/mca/io/romio314/romio/mpi-io/write_allb.c index 4678477c3bd..be5977cb45b 100644 --- a/ompi/mca/io/romio314/romio/mpi-io/write_allb.c +++ b/ompi/mca/io/romio314/romio/mpi-io/write_allb.c @@ -1,7 +1,7 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */ -/* +/* * - * Copyright (C) 1997 University of Chicago. + * Copyright (C) 1997 University of Chicago. * See COPYRIGHT notice in top-level directory. */ @@ -89,7 +89,7 @@ int MPIOI_File_write_all_begin(MPI_File fh, if (adio_fh->split_coll_count) { error_code = MPIO_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE, - myname, __LINE__, MPI_ERR_IO, + myname, __LINE__, MPI_ERR_IO, "**iosplitcoll", 0); error_code = MPIO_Err_return_file(adio_fh, error_code); goto fn_exit; @@ -108,7 +108,7 @@ int MPIOI_File_write_all_begin(MPI_File fh, xbuf = buf; if (adio_fh->is_external32) { error_code = MPIU_external32_buffer_setup(buf, count, datatype, &e32buf); - if (error_code != MPI_SUCCESS) + if (error_code != MPI_SUCCESS) goto fn_exit; xbuf = e32buf; diff --git a/ompi/mca/io/romio314/romio/mpi-io/write_alle.c b/ompi/mca/io/romio314/romio/mpi-io/write_alle.c index 8b999a1eb53..79be7975600 100644 --- a/ompi/mca/io/romio314/romio/mpi-io/write_alle.c +++ b/ompi/mca/io/romio314/romio/mpi-io/write_alle.c @@ -1,7 +1,7 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */ -/* +/* * - * Copyright (C) 1997 University of Chicago. + * Copyright (C) 1997 University of Chicago. * See COPYRIGHT notice in top-level directory. */ @@ -69,7 +69,7 @@ int MPIOI_File_write_all_end(MPI_File fh, if (!(adio_fh->split_coll_count)) { error_code = MPIO_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE, - myname, __LINE__, MPI_ERR_IO, + myname, __LINE__, MPI_ERR_IO, "**iosplitcollnone", 0); error_code = MPIO_Err_return_file(adio_fh, error_code); goto fn_exit; diff --git a/ompi/mca/io/romio314/romio/mpi-io/write_at.c b/ompi/mca/io/romio314/romio/mpi-io/write_at.c index 4362322b67b..cc4a2af16e1 100644 --- a/ompi/mca/io/romio314/romio/mpi-io/write_at.c +++ b/ompi/mca/io/romio314/romio/mpi-io/write_at.c @@ -1,6 +1,6 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */ -/* - * Copyright (C) 1997 University of Chicago. +/* + * Copyright (C) 1997 University of Chicago. * See COPYRIGHT notice in top-level directory. */ diff --git a/ompi/mca/io/romio314/romio/mpi-io/write_atall.c b/ompi/mca/io/romio314/romio/mpi-io/write_atall.c index 6f815cdd4cb..300fadffee0 100644 --- a/ompi/mca/io/romio314/romio/mpi-io/write_atall.c +++ b/ompi/mca/io/romio314/romio/mpi-io/write_atall.c @@ -1,7 +1,7 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */ -/* +/* * - * Copyright (C) 1997 University of Chicago. + * Copyright (C) 1997 University of Chicago. * See COPYRIGHT notice in top-level directory. */ @@ -45,7 +45,7 @@ Output Parameters: .N fortran @*/ int MPI_File_write_at_all(MPI_File fh, MPI_Offset offset, ROMIO_CONST void *buf, - int count, MPI_Datatype datatype, + int count, MPI_Datatype datatype, MPI_Status *status) { int error_code; diff --git a/ompi/mca/io/romio314/romio/mpi-io/write_ord.c b/ompi/mca/io/romio314/romio/mpi-io/write_ord.c index 60c14f3e803..72d8b085bbd 100644 --- a/ompi/mca/io/romio314/romio/mpi-io/write_ord.c +++ b/ompi/mca/io/romio314/romio/mpi-io/write_ord.c @@ -1,7 +1,7 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */ -/* +/* * - * Copyright (C) 1997 University of Chicago. + * Copyright (C) 1997 University of Chicago. * See COPYRIGHT notice in top-level directory. * */ @@ -92,7 +92,7 @@ int MPI_File_write_ordered(MPI_File fh, ROMIO_CONST void *buf, int count, /* --BEGIN ERROR HANDLING-- */ if (error_code != MPI_SUCCESS) { error_code = MPIO_Err_create_code(MPI_SUCCESS, MPIR_ERR_FATAL, - myname, __LINE__, MPI_ERR_INTERN, + myname, __LINE__, MPI_ERR_INTERN, "**iosharedfailed", 0); error_code = MPIO_Err_return_file(adio_fh, error_code); goto fn_exit; @@ -104,7 +104,7 @@ int MPI_File_write_ordered(MPI_File fh, ROMIO_CONST void *buf, int count, xbuf = buf; if (adio_fh->is_external32) { error_code = MPIU_external32_buffer_setup(buf, count, datatype, &e32buf); - if (error_code != MPI_SUCCESS) + if (error_code != MPI_SUCCESS) goto fn_exit; xbuf = e32buf; diff --git a/ompi/mca/io/romio314/romio/mpi-io/write_ordb.c b/ompi/mca/io/romio314/romio/mpi-io/write_ordb.c index 77bbab030ee..aaacc9ec1a6 100644 --- a/ompi/mca/io/romio314/romio/mpi-io/write_ordb.c +++ b/ompi/mca/io/romio314/romio/mpi-io/write_ordb.c @@ -1,7 +1,7 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */ -/* +/* * - * Copyright (C) 1997 University of Chicago. + * Copyright (C) 1997 University of Chicago. * See COPYRIGHT notice in top-level directory. */ @@ -64,7 +64,7 @@ int MPI_File_write_ordered_begin(MPI_File fh, ROMIO_CONST void *buf, int count, if (adio_fh->split_coll_count) { error_code = MPIO_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE, - myname, __LINE__, MPI_ERR_IO, + myname, __LINE__, MPI_ERR_IO, "**iosplitcoll", 0); error_code = MPIO_Err_return_file(adio_fh, error_code); goto fn_exit; @@ -98,7 +98,7 @@ int MPI_File_write_ordered_begin(MPI_File fh, ROMIO_CONST void *buf, int count, if (error_code != MPI_SUCCESS) { error_code = MPIO_Err_create_code(MPI_SUCCESS, MPIR_ERR_FATAL, - myname, __LINE__, MPI_ERR_INTERN, + myname, __LINE__, MPI_ERR_INTERN, "**iosharedfailed", 0); error_code = MPIO_Err_return_file(adio_fh, error_code); goto fn_exit; @@ -110,7 +110,7 @@ int MPI_File_write_ordered_begin(MPI_File fh, ROMIO_CONST void *buf, int count, xbuf = buf; if (adio_fh->is_external32) { error_code = MPIU_external32_buffer_setup(buf, count, datatype, &e32buf); - if (error_code != MPI_SUCCESS) + if (error_code != MPI_SUCCESS) goto fn_exit; xbuf = e32buf; diff --git a/ompi/mca/io/romio314/romio/mpi-io/write_orde.c b/ompi/mca/io/romio314/romio/mpi-io/write_orde.c index a0a6eea461c..34617a96df2 100644 --- a/ompi/mca/io/romio314/romio/mpi-io/write_orde.c +++ b/ompi/mca/io/romio314/romio/mpi-io/write_orde.c @@ -1,7 +1,7 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */ -/* +/* * - * Copyright (C) 1997 University of Chicago. + * Copyright (C) 1997 University of Chicago. * See COPYRIGHT notice in top-level directory. */ @@ -55,7 +55,7 @@ int MPI_File_write_ordered_end(MPI_File fh, ROMIO_CONST void *buf, MPI_Status *s if (!(adio_fh->split_coll_count)) { error_code = MPIO_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE, - myname, __LINE__, MPI_ERR_IO, + myname, __LINE__, MPI_ERR_IO, "**iosplitcollnone", 0); error_code = MPIO_Err_return_file(adio_fh, error_code); goto fn_exit; diff --git a/ompi/mca/io/romio314/romio/mpi-io/write_sh.c b/ompi/mca/io/romio314/romio/mpi-io/write_sh.c index fc2b7b23de1..00c90449f5a 100644 --- a/ompi/mca/io/romio314/romio/mpi-io/write_sh.c +++ b/ompi/mca/io/romio314/romio/mpi-io/write_sh.c @@ -1,7 +1,7 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */ -/* +/* * - * Copyright (C) 1997 University of Chicago. + * Copyright (C) 1997 University of Chicago. * See COPYRIGHT notice in top-level directory. */ @@ -95,7 +95,7 @@ int MPI_File_write_shared(MPI_File fh, ROMIO_CONST void *buf, int count, if (error_code != MPI_SUCCESS) { error_code = MPIO_Err_create_code(MPI_SUCCESS, MPIR_ERR_FATAL, - myname, __LINE__, MPI_ERR_INTERN, + myname, __LINE__, MPI_ERR_INTERN, "**iosharedfailed", 0); error_code = MPIO_Err_return_file(adio_fh, error_code); goto fn_exit; @@ -105,7 +105,7 @@ int MPI_File_write_shared(MPI_File fh, ROMIO_CONST void *buf, int count, xbuf = buf; if (adio_fh->is_external32) { error_code = MPIU_external32_buffer_setup(buf, count, datatype, &e32buf); - if (error_code != MPI_SUCCESS) + if (error_code != MPI_SUCCESS) goto fn_exit; xbuf = e32buf; @@ -118,14 +118,14 @@ int MPI_File_write_shared(MPI_File fh, ROMIO_CONST void *buf, int count, off = adio_fh->disp + adio_fh->etype_size * shared_fp; /* if atomic mode requested, lock (exclusive) the region, because there - could be a concurrent noncontiguous request. On NFS, locking is + could be a concurrent noncontiguous request. On NFS, locking is done in the ADIO_WriteContig.*/ if ((adio_fh->atomicity) && (adio_fh->file_system != ADIO_NFS)) ADIOI_WRITE_LOCK(adio_fh, off, SEEK_SET, bufsize); ADIO_WriteContig(adio_fh, xbuf, count, datatype, ADIO_EXPLICIT_OFFSET, - off, status, &error_code); + off, status, &error_code); if ((adio_fh->atomicity) && (adio_fh->file_system != ADIO_NFS)) ADIOI_UNLOCK(adio_fh, off, SEEK_SET, bufsize); diff --git a/ompi/mca/io/romio314/romio/mpi2-other/array/Makefile.in b/ompi/mca/io/romio314/romio/mpi2-other/array/Makefile.in index 41a366e398b..89cc636189c 100644 --- a/ompi/mca/io/romio314/romio/mpi2-other/array/Makefile.in +++ b/ompi/mca/io/romio314/romio/mpi2-other/array/Makefile.in @@ -44,7 +44,7 @@ all: $(LIBNAME) profile: $(MPIO_REAL_POBJECTS) $(AR) $(LIBNAME) $(MPIO_REAL_POBJECTS) - $(RANLIB) $(LIBNAME) + $(RANLIB) $(LIBNAME) @if [ "@ENABLE_SHLIB@" != "none" ] ; then \ $(MAKE) P$(SHLIBNAME).la ;\ fi @@ -52,10 +52,10 @@ profile: $(MPIO_REAL_POBJECTS) $(LIBNAME): $(MPIO_OBJECTS) $(AR) $(LIBNAME) $(MPIO_OBJECTS) - $(RANLIB) $(LIBNAME) + $(RANLIB) $(LIBNAME) MPIO_LOOBJECTS = $(MPIO_OBJECTS:.o=.lo) -$(SHLIBNAME).la: $(MPIO_LOOBJECTS) +$(SHLIBNAME).la: $(MPIO_LOOBJECTS) $(AR) $(SHLIBNAME).la $(MPIO_LOOBJECTS) # -------------------------------------------------------------------------- @@ -69,7 +69,7 @@ P$(SHLIBNAME).la: $(MPIO_TMP_LOPOBJECTS) clean: @rm -f *.o *.lo *.gcno *.gcda *.bb *.bbg - @rm -f ${srcdir}/*.gcno ${srcdir}/*.gcda + @rm -f ${srcdir}/*.gcno ${srcdir}/*.gcda @rm -f ${srcdir}/*.bb ${srcdir}/*.bbg # Rules for the profiling objects diff --git a/ompi/mca/io/romio314/romio/mpi2-other/array/darray.c b/ompi/mca/io/romio314/romio/mpi2-other/array/darray.c index 6918c5ec708..0e157a521f7 100644 --- a/ompi/mca/io/romio314/romio/mpi2-other/array/darray.c +++ b/ompi/mca/io/romio314/romio/mpi2-other/array/darray.c @@ -1,7 +1,7 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */ -/* +/* * - * Copyright (C) 1997 University of Chicago. + * Copyright (C) 1997 University of Chicago. * See COPYRIGHT notice in top-level directory. */ @@ -42,11 +42,11 @@ Output Parameters: .N fortran @*/ -int MPI_Type_create_darray(int size, int rank, int ndims, - int *array_of_gsizes, int *array_of_distribs, - int *array_of_dargs, int *array_of_psizes, - int order, MPI_Datatype oldtype, - MPI_Datatype *newtype) +int MPI_Type_create_darray(int size, int rank, int ndims, + int *array_of_gsizes, int *array_of_distribs, + int *array_of_dargs, int *array_of_psizes, + int order, MPI_Datatype oldtype, + MPI_Datatype *newtype) { int err, error_code; int i; @@ -108,7 +108,7 @@ int MPI_Type_create_darray(int size, int rank, int ndims, /* array_of_distribs checked below */ - if ((array_of_dargs[i] != MPI_DISTRIBUTE_DFLT_DARG) && + if ((array_of_dargs[i] != MPI_DISTRIBUTE_DFLT_DARG) && (array_of_dargs[i] <= 0)) { error_code = MPIO_Err_create_code(MPI_SUCCESS, @@ -161,7 +161,7 @@ int MPI_Type_create_darray(int size, int rank, int ndims, MPI_Type_extent(oldtype, &orig_extent); -/* check if MPI_Aint is large enough for size of global array. +/* check if MPI_Aint is large enough for size of global array. if not, complain. */ size_with_aint = orig_extent; diff --git a/ompi/mca/io/romio314/romio/mpi2-other/array/fortran/Makefile.in b/ompi/mca/io/romio314/romio/mpi2-other/array/fortran/Makefile.in index 63c8fe5e02a..00a8de8184f 100644 --- a/ompi/mca/io/romio314/romio/mpi2-other/array/fortran/Makefile.in +++ b/ompi/mca/io/romio314/romio/mpi2-other/array/fortran/Makefile.in @@ -29,16 +29,16 @@ all: $(LIBNAME) profile: $(MPIO_REAL_PFOBJECTS) $(AR) $(LIBNAME) $(MPIO_REAL_PFOBJECTS) - $(RANLIB) $(LIBNAME) + $(RANLIB) $(LIBNAME) @rm -f _*.o $(LIBNAME): $(MPIO_FOBJECTS) $(AR) $(LIBNAME) $(MPIO_FOBJECTS) - $(RANLIB) $(LIBNAME) + $(RANLIB) $(LIBNAME) -clean: +clean: @rm -f *.o *.lo *.gcno *.gcda *.bb *.bbg - @rm -f ${srcdir}/*.gcno ${srcdir}/*.gcda + @rm -f ${srcdir}/*.gcno ${srcdir}/*.gcda @rm -f ${srcdir}/*.bb ${srcdir}/*.bbg # Rules for the profiling objects diff --git a/ompi/mca/io/romio314/romio/mpi2-other/array/fortran/darrayf.c b/ompi/mca/io/romio314/romio/mpi2-other/array/fortran/darrayf.c index 7a16bbb3cc8..5048250068e 100644 --- a/ompi/mca/io/romio314/romio/mpi2-other/array/fortran/darrayf.c +++ b/ompi/mca/io/romio314/romio/mpi2-other/array/fortran/darrayf.c @@ -1,7 +1,7 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */ -/* +/* * - * Copyright (C) 1997 University of Chicago. + * Copyright (C) 1997 University of Chicago. * See COPYRIGHT notice in top-level directory. */ diff --git a/ompi/mca/io/romio314/romio/mpi2-other/array/fortran/subarrayf.c b/ompi/mca/io/romio314/romio/mpi2-other/array/fortran/subarrayf.c index 7a2e54a530b..176c06f9e5b 100644 --- a/ompi/mca/io/romio314/romio/mpi2-other/array/fortran/subarrayf.c +++ b/ompi/mca/io/romio314/romio/mpi2-other/array/fortran/subarrayf.c @@ -1,7 +1,7 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */ -/* +/* * - * Copyright (C) 1997 University of Chicago. + * Copyright (C) 1997 University of Chicago. * See COPYRIGHT notice in top-level directory. */ diff --git a/ompi/mca/io/romio314/romio/mpi2-other/array/subarray.c b/ompi/mca/io/romio314/romio/mpi2-other/array/subarray.c index 69974c75056..11ac7f07387 100644 --- a/ompi/mca/io/romio314/romio/mpi2-other/array/subarray.c +++ b/ompi/mca/io/romio314/romio/mpi2-other/array/subarray.c @@ -1,6 +1,6 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */ -/* - * Copyright (C) 1997 University of Chicago. +/* + * Copyright (C) 1997 University of Chicago. * See COPYRIGHT notice in top-level directory. */ @@ -38,9 +38,9 @@ Output Parameters: .N fortran @*/ -int MPI_Type_create_subarray(int ndims, int *array_of_sizes, +int MPI_Type_create_subarray(int ndims, int *array_of_sizes, int *array_of_subsizes, int *array_of_starts, - int order, MPI_Datatype oldtype, + int order, MPI_Datatype oldtype, MPI_Datatype *newtype) { MPI_Aint extent, size_with_aint; @@ -130,7 +130,7 @@ int MPI_Type_create_subarray(int ndims, int *array_of_sizes, MPI_Type_extent(oldtype, &extent); -/* check if MPI_Aint is large enough for size of global array. +/* check if MPI_Aint is large enough for size of global array. if not, complain. */ size_with_aint = extent; diff --git a/ompi/mca/io/romio314/romio/mpi2-other/info/Makefile.in b/ompi/mca/io/romio314/romio/mpi2-other/info/Makefile.in index b77f08e2b3a..52087ca8196 100644 --- a/ompi/mca/io/romio314/romio/mpi2-other/info/Makefile.in +++ b/ompi/mca/io/romio314/romio/mpi2-other/info/Makefile.in @@ -51,7 +51,7 @@ all: $(LIBNAME) profile: $(MPIO_REAL_POBJECTS) $(AR) $(LIBNAME) $(MPIO_REAL_POBJECTS) - $(RANLIB) $(LIBNAME) + $(RANLIB) $(LIBNAME) @if [ "@ENABLE_SHLIB@" != "none" ] ; then \ $(MAKE) P$(SHLIBNAME).la ;\ fi @@ -59,10 +59,10 @@ profile: $(MPIO_REAL_POBJECTS) $(LIBNAME): $(MPIO_OBJECTS) $(AR) $(LIBNAME) $(MPIO_OBJECTS) - $(RANLIB) $(LIBNAME) + $(RANLIB) $(LIBNAME) MPIO_LOOBJECTS = $(MPIO_OBJECTS:.o=.lo) -$(SHLIBNAME).la: $(MPIO_LOOBJECTS) +$(SHLIBNAME).la: $(MPIO_LOOBJECTS) $(AR) $(SHLIBNAME).la $(MPIO_LOOBJECTS) # -------------------------------------------------------------------------- @@ -76,7 +76,7 @@ P$(SHLIBNAME).la: $(MPIO_TMP_LOPOBJECTS) clean: @rm -f *.o *.lo *.gcno *.gcda *.bb *.bbg - @rm -f ${srcdir}/*.gcno ${srcdir}/*.gcda + @rm -f ${srcdir}/*.gcno ${srcdir}/*.gcda @rm -f ${srcdir}/*.bb ${srcdir}/*.bbg diff --git a/ompi/mca/io/romio314/romio/mpi2-other/info/fortran/Makefile.in b/ompi/mca/io/romio314/romio/mpi2-other/info/fortran/Makefile.in index c84769c5467..3b9a2df643e 100644 --- a/ompi/mca/io/romio314/romio/mpi2-other/info/fortran/Makefile.in +++ b/ompi/mca/io/romio314/romio/mpi2-other/info/fortran/Makefile.in @@ -32,16 +32,16 @@ all: $(LIBNAME) profile: $(MPIO_REAL_POBJECTS) $(AR) $(LIBNAME) $(MPIO_REAL_POBJECTS) - $(RANLIB) $(LIBNAME) + $(RANLIB) $(LIBNAME) @rm -f _*.o $(LIBNAME): $(MPIO_OBJECTS) $(AR) $(LIBNAME) $(MPIO_OBJECTS) - $(RANLIB) $(LIBNAME) + $(RANLIB) $(LIBNAME) clean: @rm -f *.o *.lo *.gcno *.gcda *.bb *.bbg - @rm -f ${srcdir}/*.gcno ${srcdir}/*.gcda + @rm -f ${srcdir}/*.gcno ${srcdir}/*.gcda @rm -f ${srcdir}/*.bb ${srcdir}/*.bbg # Rules for the profiling objects diff --git a/ompi/mca/io/romio314/romio/mpi2-other/info/fortran/info_createf.c b/ompi/mca/io/romio314/romio/mpi2-other/info/fortran/info_createf.c index 5d01ce93d02..1568ba51e50 100644 --- a/ompi/mca/io/romio314/romio/mpi2-other/info/fortran/info_createf.c +++ b/ompi/mca/io/romio314/romio/mpi2-other/info/fortran/info_createf.c @@ -1,7 +1,7 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */ -/* +/* * - * Copyright (C) 1997 University of Chicago. + * Copyright (C) 1997 University of Chicago. * See COPYRIGHT notice in top-level directory. */ diff --git a/ompi/mca/io/romio314/romio/mpi2-other/info/fortran/info_deletef.c b/ompi/mca/io/romio314/romio/mpi2-other/info/fortran/info_deletef.c index f7a7bb4d58e..92a2496caf3 100644 --- a/ompi/mca/io/romio314/romio/mpi2-other/info/fortran/info_deletef.c +++ b/ompi/mca/io/romio314/romio/mpi2-other/info/fortran/info_deletef.c @@ -1,7 +1,7 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */ -/* +/* * - * Copyright (C) 1997 University of Chicago. + * Copyright (C) 1997 University of Chicago. * See COPYRIGHT notice in top-level directory. */ @@ -97,7 +97,7 @@ void mpi_info_delete_(MPI_Fint *info, char *key, int *ierr, int keylen) /* strip leading and trailing blanks in key */ lead_blanks = 0; - for (i=0; icookie != MPIR_INFO_COOKIE)) + if ((info <= (MPI_Info) 0) || (info->cookie != MPIR_INFO_COOKIE)) return (MPI_Fint) 0; if (!MPIR_Infotable) { MPIR_Infotable_max = 1024; MPIR_Infotable = (MPI_Info *) - ADIOI_Malloc(MPIR_Infotable_max*sizeof(MPI_Info)); - MPIR_Infotable_ptr = 0; /* 0 can't be used though, because + ADIOI_Malloc(MPIR_Infotable_max*sizeof(MPI_Info)); + MPIR_Infotable_ptr = 0; /* 0 can't be used though, because MPI_INFO_NULL=0 */ for (i=0; ikey = 0; (*info)->value = 0; (*info)->next = 0; - /* this is the first structure in this linked list. it is + /* this is the first structure in this linked list. it is always kept empty. new (key,value) pairs are added after it. */ fn_exit: diff --git a/ompi/mca/io/romio314/romio/mpi2-other/info/info_delete.c b/ompi/mca/io/romio314/romio/mpi2-other/info/info_delete.c index 06ec27ee8dd..7ee0941591d 100644 --- a/ompi/mca/io/romio314/romio/mpi2-other/info/info_delete.c +++ b/ompi/mca/io/romio314/romio/mpi2-other/info/info_delete.c @@ -1,7 +1,7 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */ -/* +/* * - * Copyright (C) 1997 University of Chicago. + * Copyright (C) 1997 University of Chicago. * See COPYRIGHT notice in top-level directory. */ @@ -63,7 +63,7 @@ int MPI_Info_delete(MPI_Info info, char *key) while (curr) { if (!strcmp(curr->key, key)) { - ADIOI_Free(curr->key); + ADIOI_Free(curr->key); ADIOI_Free(curr->value); prev->next = curr->next; ADIOI_Free(curr); @@ -76,7 +76,7 @@ int MPI_Info_delete(MPI_Info info, char *key) if (!done) { FPRINTF(stderr, "MPI_Info_delete: key not defined in info\n"); - MPI_Abort(MPI_COMM_WORLD, 1); + MPI_Abort(MPI_COMM_WORLD, 1); } return MPI_SUCCESS; diff --git a/ompi/mca/io/romio314/romio/mpi2-other/info/info_dup.c b/ompi/mca/io/romio314/romio/mpi2-other/info/info_dup.c index a1c1a9969fc..08b593d8335 100644 --- a/ompi/mca/io/romio314/romio/mpi2-other/info/info_dup.c +++ b/ompi/mca/io/romio314/romio/mpi2-other/info/info_dup.c @@ -1,7 +1,7 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */ -/* +/* * - * Copyright (C) 1997 University of Chicago. + * Copyright (C) 1997 University of Chicago. * See COPYRIGHT notice in top-level directory. */ @@ -58,7 +58,7 @@ int MPI_Info_dup(MPI_Info info, MPI_Info *newinfo) curr_new->key = ADIOI_Strdup(curr_old->key); curr_new->value = ADIOI_Strdup(curr_old->value); curr_new->next = 0; - + curr_old = curr_old->next; } diff --git a/ompi/mca/io/romio314/romio/mpi2-other/info/info_f2c.c b/ompi/mca/io/romio314/romio/mpi2-other/info/info_f2c.c index 49d8ae5eec2..ec9e2b67920 100644 --- a/ompi/mca/io/romio314/romio/mpi2-other/info/info_f2c.c +++ b/ompi/mca/io/romio314/romio/mpi2-other/info/info_f2c.c @@ -1,7 +1,7 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */ -/* +/* * - * Copyright (C) 1997 University of Chicago. + * Copyright (C) 1997 University of Chicago. * See COPYRIGHT notice in top-level directory. */ diff --git a/ompi/mca/io/romio314/romio/mpi2-other/info/info_free.c b/ompi/mca/io/romio314/romio/mpi2-other/info/info_free.c index e682ce0cd58..b9238c7e081 100644 --- a/ompi/mca/io/romio314/romio/mpi2-other/info/info_free.c +++ b/ompi/mca/io/romio314/romio/mpi2-other/info/info_free.c @@ -1,7 +1,7 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */ -/* +/* * - * Copyright (C) 1997 University of Chicago. + * Copyright (C) 1997 University of Chicago. * See COPYRIGHT notice in top-level directory. */ diff --git a/ompi/mca/io/romio314/romio/mpi2-other/info/info_get.c b/ompi/mca/io/romio314/romio/mpi2-other/info/info_get.c index ca4274672b5..05125c585fa 100644 --- a/ompi/mca/io/romio314/romio/mpi2-other/info/info_get.c +++ b/ompi/mca/io/romio314/romio/mpi2-other/info/info_get.c @@ -1,7 +1,7 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */ -/* +/* * - * Copyright (C) 1997 University of Chicago. + * Copyright (C) 1997 University of Chicago. * See COPYRIGHT notice in top-level directory. */ diff --git a/ompi/mca/io/romio314/romio/mpi2-other/info/info_getnks.c b/ompi/mca/io/romio314/romio/mpi2-other/info/info_getnks.c index fd20826950f..da728d7cb6c 100644 --- a/ompi/mca/io/romio314/romio/mpi2-other/info/info_getnks.c +++ b/ompi/mca/io/romio314/romio/mpi2-other/info/info_getnks.c @@ -1,7 +1,7 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */ -/* +/* * - * Copyright (C) 1997 University of Chicago. + * Copyright (C) 1997 University of Chicago. * See COPYRIGHT notice in top-level directory. */ diff --git a/ompi/mca/io/romio314/romio/mpi2-other/info/info_getnth.c b/ompi/mca/io/romio314/romio/mpi2-other/info/info_getnth.c index fa325c41cc0..bcb608248f2 100644 --- a/ompi/mca/io/romio314/romio/mpi2-other/info/info_getnth.c +++ b/ompi/mca/io/romio314/romio/mpi2-other/info/info_getnth.c @@ -1,7 +1,7 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */ -/* +/* * - * Copyright (C) 1997 University of Chicago. + * Copyright (C) 1997 University of Chicago. * See COPYRIGHT notice in top-level directory. */ diff --git a/ompi/mca/io/romio314/romio/mpi2-other/info/info_getvln.c b/ompi/mca/io/romio314/romio/mpi2-other/info/info_getvln.c index b484929b720..363e4dfbc9c 100644 --- a/ompi/mca/io/romio314/romio/mpi2-other/info/info_getvln.c +++ b/ompi/mca/io/romio314/romio/mpi2-other/info/info_getvln.c @@ -1,7 +1,7 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */ -/* +/* * - * Copyright (C) 1997 University of Chicago. + * Copyright (C) 1997 University of Chicago. * See COPYRIGHT notice in top-level directory. */ diff --git a/ompi/mca/io/romio314/romio/mpi2-other/info/info_set.c b/ompi/mca/io/romio314/romio/mpi2-other/info/info_set.c index e60d89d79e5..b52ebc8bfed 100644 --- a/ompi/mca/io/romio314/romio/mpi2-other/info/info_set.c +++ b/ompi/mca/io/romio314/romio/mpi2-other/info/info_set.c @@ -1,7 +1,7 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */ -/* +/* * - * Copyright (C) 1997 University of Chicago. + * Copyright (C) 1997 University of Chicago. * See COPYRIGHT notice in top-level directory. */ @@ -77,7 +77,7 @@ int MPI_Info_set(MPI_Info info, char *key, char *value) while (curr) { if (!strcmp(curr->key, key)) { - ADIOI_Free(curr->value); + ADIOI_Free(curr->value); curr->value = ADIOI_Strdup(value); break; } diff --git a/ompi/mca/io/romio314/romio/test-internal/file_realms_test.c b/ompi/mca/io/romio314/romio/test-internal/file_realms_test.c index b848259e0ef..72cef99d255 100644 --- a/ompi/mca/io/romio314/romio/test-internal/file_realms_test.c +++ b/ompi/mca/io/romio314/romio/test-internal/file_realms_test.c @@ -1,6 +1,6 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */ -/* - * Copyright (C) 2008 University of Chicago. +/* + * Copyright (C) 2008 University of Chicago. * See COPYRIGHT notice in top-level directory. */ diff --git a/ompi/mca/io/romio314/romio/test-internal/heap_test.c b/ompi/mca/io/romio314/romio/test-internal/heap_test.c index 2f0041df368..083e1429dd1 100644 --- a/ompi/mca/io/romio314/romio/test-internal/heap_test.c +++ b/ompi/mca/io/romio314/romio/test-internal/heap_test.c @@ -49,7 +49,7 @@ int main(int argc, char **argv) { int test_type = RANDOM; test_params_t predefined_tests[PREDEF_TESTS]; test_params_t test; - + /* parse args */ adding_elements = 0; curr_add_idx = 0; @@ -260,10 +260,10 @@ int run_test(test_params_t *test) { int i, j, k, err_flag = 0; int curr_insert_idx = 0; int curr_extract_idx = 0; - + create_heap(&myheap, test->heap_size); myheap.size = 0; - + extracted = (ADIO_Offset *) malloc(test->heap_size * sizeof(ADIO_Offset)); for (i=0; i < test->action_arr_sz; i++) { for (j=0; jaction_count_arr[i]; j++) { @@ -275,7 +275,7 @@ int run_test(test_params_t *test) { myheap.nodes[k].offset = test->offsets[k]; myheap.nodes[k].proc = k; } - build_heap(&myheap); + build_heap(&myheap); break; case INSERT: ADIOI_Heap_insert(&myheap, test->offsets[curr_insert_idx], @@ -341,7 +341,7 @@ void init_predefined_test(test_params_t *params, int index) { strcpy(params->name, "TEST 1"); params->heap_size = 15; params->action_arr_sz = 3; - + /* allocate space */ params->action_arr = (int *) malloc (params->action_arr_sz*sizeof(int)); @@ -399,7 +399,7 @@ void init_predefined_test(test_params_t *params, int index) { strcpy(params->name, "TEST 1"); params->heap_size = 15; params->action_arr_sz = 3; - + /* allocate space */ params->action_arr = (int *) malloc (params->action_arr_sz*sizeof(int)); @@ -409,7 +409,7 @@ void init_predefined_test(test_params_t *params, int index) { if (params->verify) params->correct_order = (ADIO_Offset *) malloc(params->heap_size*sizeof(ADIO_Offset)); - + /* Set values */ params->offsets[0] = 65; params->offsets[1] = 53; diff --git a/ompi/mca/io/romio314/romio/test-internal/io_bounds_test.c b/ompi/mca/io/romio314/romio/test-internal/io_bounds_test.c index f7514764e42..a4d70a4cfb5 100644 --- a/ompi/mca/io/romio314/romio/test-internal/io_bounds_test.c +++ b/ompi/mca/io/romio314/romio/test-internal/io_bounds_test.c @@ -1,6 +1,6 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */ -/* - * Copyright (C) 2008 University of Chicago. +/* + * Copyright (C) 2008 University of Chicago. * See COPYRIGHT notice in top-level directory. */ @@ -40,7 +40,7 @@ int main (int argc, char **argv) { MPI_Init (&argc, &argv); MPI_Comm_rank (MPI_COMM_WORLD, &rank); - + if (argc != 1) { if (!rank) { printf ("Use only one process\n"); @@ -116,7 +116,7 @@ int run_test (test_param_t *test) { MPI_File_seek (fh, test->offset, MPI_SEEK_SET); ADIOI_Calc_bounds ((ADIO_File) fh, test->count, MPI_BYTE, ADIO_INDIVIDUAL, - test->offset, &st_offset, &end_offset); + test->offset, &st_offset, &end_offset); ind_err = 0; if (st_offset != test->correct_st_offset) { @@ -173,7 +173,7 @@ int print_usage () " io_bounds_test -A -T \n"); } -int print_test_params (test_param_t *test) +int print_test_params (test_param_t *test) { int i; printf ( @@ -181,7 +181,7 @@ int print_test_params (test_param_t *test) "bytes: %d\n" "Filetype [n](disp, lens, type):\n", test->offset, test->count); - + for (i=0; itype_count; i++) { printf ( " [%d](%lld, %d, ", diff --git a/ompi/mca/io/romio314/romio/test/.codingcheck b/ompi/mca/io/romio314/romio/test/.codingcheck index 0d0a655fa81..823ebd28ea7 100644 --- a/ompi/mca/io/romio314/romio/test/.codingcheck +++ b/ompi/mca/io/romio314/romio/test/.codingcheck @@ -1,10 +1,10 @@ # -# We allow these routines for the romio test programs -%romioTestRoutines = ( +# We allow these routines for the romio test programs +%romioTestRoutines = ( 'printf' => sys, 'fprintf' => sys , 'sprintf' => sys, 'vprintf' => sys, - 'strcpy' => sys, 'strncpy' => sys, 'strcat' => sys, + 'strcpy' => sys, 'strncpy' => sys, 'strcat' => sys, 'malloc' => sys, 'free' => sys, 'calloc' => sys, 'strdup' => sys, - 'assert' => sys, 'snprintf' => sys, + 'assert' => sys, 'snprintf' => sys, ); if (defined(&PushAllowFuncNames)) { &PushAllowFuncNames( "romioTestRoutines", "tree", "add" ); diff --git a/ompi/mca/io/romio314/romio/test/Makefile.am b/ompi/mca/io/romio314/romio/test/Makefile.am index caa5be91e7a..74940cd8a90 100644 --- a/ompi/mca/io/romio314/romio/test/Makefile.am +++ b/ompi/mca/io/romio314/romio/test/Makefile.am @@ -26,7 +26,7 @@ CTESTS = simple perf async coll_test coll_perf misc file_info excl large_array \ atomicity noncontig i_noncontig noncontig_coll split_coll shared_fp \ large_file psimple error status noncontig_coll2 aggregation1 aggregation2 \ async-multiple ordered_fp hindexed external32 types_with_zeros darray_read -FTESTS = fcoll_test fperf fmisc pfcoll_test +FTESTS = fcoll_test fperf fmisc pfcoll_test noinst_PROGRAMS = $(CTESTS) diff --git a/ompi/mca/io/romio314/romio/test/Mfile.in b/ompi/mca/io/romio314/romio/test/Mfile.in index c2e1c298259..6b432ac10df 100644 --- a/ompi/mca/io/romio314/romio/test/Mfile.in +++ b/ompi/mca/io/romio314/romio/test/Mfile.in @@ -24,7 +24,7 @@ srcdir = @srcdir@ @VPATH@ PROFLIB = -CFLAGS = @CFLAGS@ @DEFS@ $(OPTFLAGS) +CFLAGS = @CFLAGS@ @DEFS@ $(OPTFLAGS) CCFLAGS = $(CFLAGS) FFLAGS = $(OPTFLAGS) @FFLAGS@ # Use LIBS to add any special libraries for C programs @@ -32,18 +32,18 @@ LIBS = @LIB_PATH@ @LIB_LIST@ # Use FLIBS to add any special libraries for Fortran programs FLIBS = @FLIB_PATH@ @LIB_LIST@ @F77EXTRALIBS@ EXECS = $(CTESTS) $(FTESTS) -OTHEREXECS = +OTHEREXECS = CTESTS = simple perf async coll_test coll_perf misc file_info excl \ large_array \ atomicity noncontig i_noncontig noncontig_coll split_coll shared_fp \ large_file psimple error status noncontig_coll2 -FTESTS = fcoll_test fperf fmisc pfcoll_test +FTESTS = fcoll_test fperf fmisc pfcoll_test default: $(EXECS) # # Note that runtests builds the executables as required -testing: +testing: -./runtests $(TESTARGS) all: testing @@ -62,7 +62,7 @@ fcoll_test: fcoll_test.f fmisc: fmisc.f $(F77) $(USER_FFLAGS) -o fmisc fmisc.f $(FLIBS) -pfcoll_test: pfcoll_test.f +pfcoll_test: pfcoll_test.f $(F77) $(USER_FFLAGS) -o pfcoll_test pfcoll_test.f $(FLIBS) # diff --git a/ompi/mca/io/romio314/romio/test/README b/ompi/mca/io/romio314/romio/test/README index dbab8e393d4..15a24804b42 100644 --- a/ompi/mca/io/romio314/romio/test/README +++ b/ompi/mca/io/romio314/romio/test/README @@ -1,9 +1,9 @@ -This directory contains a few example programs. +This directory contains a few example programs. Each program takes the filename as a command-line argument -"-fname filename". +"-fname filename". -If you are using "mpirun" to run an MPI program, you can run the +If you are using "mpirun" to run an MPI program, you can run the program "simple" with two processes as follows: mpirun -np 2 simple -fname test @@ -22,9 +22,9 @@ perf.c: A simple read and write performance test. Each process writes reads it back. For a different access size, change the value of SIZE in the code. The bandwidth is reported for two cases: (1) without including MPI_File_sync and (2) including - MPI_File_sync. - -async.c: This program is the same as simple.c, except that it uses + MPI_File_sync. + +async.c: This program is the same as simple.c, except that it uses asynchronous I/O. coll_test.c: This program tests the use of collective I/O. It writes @@ -42,8 +42,8 @@ coll_perf.c: Measures the I/O bandwidth for writing/reading a 3D misc.c: Tests various miscellaneous MPI-IO functions -atomicity.c: Tests whether atomicity semantics are satisfied for - overlapping accesses in atomic mode. The probability of detecting +atomicity.c: Tests whether atomicity semantics are satisfied for + overlapping accesses in atomic mode. The probability of detecting errors is higher if you run it on 8 or more processes. large_file.c: Tests access to large files. Writes a 4-Gbyte file and @@ -53,20 +53,20 @@ large_file.c: Tests access to large files. Writes a 4-Gbyte file and large_array.c: Tests writing and reading a 4-Gbyte distributed array using the distributed array datatype constructor. Works only on file systems that support 64-bit file sizes and MPI implementations - that support 64-bit MPI_Aint. + that support 64-bit MPI_Aint. -file_info.c: Tests the setting and retrieval of hints via +file_info.c: Tests the setting and retrieval of hints via MPI_File_set_info and MPI_File_get_info excl.c: Tests MPI_File_open with MPI_MODE_EXCL -noncontig.c: Tests noncontiguous accesses in memory and file using +noncontig.c: Tests noncontiguous accesses in memory and file using independent I/O. Run it on two processes only. noncontig_coll.c: Same as noncontig.c, but uses collective I/O -noncontig_coll2.c: Same as noncontig_coll.c, but exercises the - cb_config_list hint and aggregation handling more. +noncontig_coll2.c: Same as noncontig_coll.c, but exercises the + cb_config_list hint and aggregation handling more. i_noncontig.c: Same as noncontig.c, but uses nonblocking I/O @@ -78,7 +78,7 @@ fperf.f: Fortran version of perf.c fcoll_test.f: Fortran version of coll_test.c -pfcoll_test.f: Same as fcoll_test.f but uses the PMPI versions of +pfcoll_test.f: Same as fcoll_test.f but uses the PMPI versions of all MPI routines fmisc.f: Fortran version of misc.c diff --git a/ompi/mca/io/romio314/romio/test/aggregation1.c b/ompi/mca/io/romio314/romio/test/aggregation1.c index dee42d60a5c..9b4a76acd16 100644 --- a/ompi/mca/io/romio314/romio/test/aggregation1.c +++ b/ompi/mca/io/romio314/romio/test/aggregation1.c @@ -1,5 +1,5 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */ -/* +/* * (C) 2007 by Argonne National Laboratory. * See COPYRIGHT in top-level directory. */ @@ -15,7 +15,7 @@ #include #define NUM_OBJS 4 -#define OBJ_SIZE 1048576 +#define OBJ_SIZE 1048576 extern char *optarg; extern int optind, opterr, optopt; @@ -29,7 +29,7 @@ Usage( int line ) { int rank; MPI_Comm_rank(MPI_COMM_WORLD, &rank); if ( rank == 0 ) { - fprintf( stderr, + fprintf( stderr, "Usage (line %d): %s [-d] [-h] -f filename\n" "\t-d for debugging\n" "\t-h to turn on the hints to force collective aggregation\n", @@ -61,7 +61,7 @@ print_hints( int rank, MPI_File *mfh ) { for( i = 0; i < nkeys; i++ ) { MPI_Info_get_nthkey( info, i, key ); printf( "%35s -> ", key ); - MPI_Info_get( info, key, 1024, value, &dummy_int ); + MPI_Info_get( info, key, 1024, value, &dummy_int ); printf( "%s\n", value ); } MPI_Info_free(&info); @@ -93,10 +93,10 @@ write_file( char *target, int rank, MPI_Info *info ) { buffer = malloc(OBJ_SIZE); if ( debug ) printf( "%d writing file %s\n", rank, target ); - - if( (mpi_ret = MPI_File_open(MPI_COMM_WORLD, target, + + if( (mpi_ret = MPI_File_open(MPI_COMM_WORLD, target, MPI_MODE_WRONLY | MPI_MODE_CREATE, *info, &wfh ) ) - != MPI_SUCCESS ) + != MPI_SUCCESS ) { fatal_error( mpi_ret, NULL, "open for write" ); } @@ -106,7 +106,7 @@ write_file( char *target, int rank, MPI_Info *info ) { fill_buffer( buffer, OBJ_SIZE, rank, offset ); if ( debug ) printf( "%s", buffer ); if ( (mpi_ret = MPI_File_write_at_all( wfh, offset, buffer, OBJ_SIZE, - MPI_CHAR, &mpi_stat ) ) != MPI_SUCCESS ) + MPI_CHAR, &mpi_stat ) ) != MPI_SUCCESS ) { fatal_error( mpi_ret, &mpi_stat, "write" ); } @@ -125,7 +125,7 @@ static int reduce_corruptions( int corrupt_blocks ) { int mpi_ret; int sum; - if ( ( mpi_ret = MPI_Reduce( &corrupt_blocks, &sum, 1, + if ( ( mpi_ret = MPI_Reduce( &corrupt_blocks, &sum, 1, MPI_INT, MPI_SUM, 0, MPI_COMM_WORLD ) ) != MPI_SUCCESS ) { fatal_error( mpi_ret, NULL, "MPI_Reduce" ); @@ -145,9 +145,9 @@ read_file( char *target, int rank, MPI_Info *info, int *corrupt_blocks ) { verify_buf = (char *)malloc(OBJ_SIZE); if ( debug ) printf( "%d reading file %s\n", rank, target ); - - if( (mpi_ret = MPI_File_open(MPI_COMM_WORLD, target, - MPI_MODE_RDONLY, *info, &rfh ) ) != MPI_SUCCESS ) + + if( (mpi_ret = MPI_File_open(MPI_COMM_WORLD, target, + MPI_MODE_RDONLY, *info, &rfh ) ) != MPI_SUCCESS ) { fatal_error( mpi_ret, NULL, "open for read" ); } @@ -157,7 +157,7 @@ read_file( char *target, int rank, MPI_Info *info, int *corrupt_blocks ) { fill_buffer( verify_buf, OBJ_SIZE, rank, offset ); if ( debug ) printf( "Expecting %s", buffer ); if ( (mpi_ret = MPI_File_read_at_all( rfh, offset, buffer, OBJ_SIZE, - MPI_CHAR, &mpi_stat ) ) != MPI_SUCCESS ) + MPI_CHAR, &mpi_stat ) ) != MPI_SUCCESS ) { fatal_error( mpi_ret, &mpi_stat, "read" ); } @@ -182,9 +182,9 @@ read_file( char *target, int rank, MPI_Info *info, int *corrupt_blocks ) { static void set_hints( MPI_Info *info ) { - MPI_Info_set( *info, "romio_cb_write", "enable" ); - MPI_Info_set( *info, "romio_no_indep_rw", "1" ); - MPI_Info_set( *info, "cb_nodes", "1" ); + MPI_Info_set( *info, "romio_cb_write", "enable" ); + MPI_Info_set( *info, "romio_no_indep_rw", "1" ); + MPI_Info_set( *info, "cb_nodes", "1" ); MPI_Info_set( *info, "cb_buffer_size", "4194304" ); } @@ -199,7 +199,7 @@ set_hints( MPI_Info *info, char *hints ) { val = strtok( NULL, delimiter ); if ( debug ) printf( "HINT: %s = %s\n", key, val ); if ( ! val ) { - Usage( __LINE__ ); + Usage( __LINE__ ); } MPI_Info_set( *info, key, val ); key = strtok( NULL, delimiter ); @@ -208,7 +208,7 @@ set_hints( MPI_Info *info, char *hints ) { } */ -int +int main( int argc, char *argv[] ) { int nproc = 1, rank = 0; char *target = NULL; diff --git a/ompi/mca/io/romio314/romio/test/aggregation2.c b/ompi/mca/io/romio314/romio/test/aggregation2.c index a35ebe0caf5..8108b466e42 100644 --- a/ompi/mca/io/romio314/romio/test/aggregation2.c +++ b/ompi/mca/io/romio314/romio/test/aggregation2.c @@ -1,5 +1,5 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */ -/* +/* * (C) 2007 by Argonne National Laboratory. * See COPYRIGHT in top-level directory. */ @@ -29,7 +29,7 @@ static void handle_error(int errcode, const char *str) MPI_Abort(MPI_COMM_WORLD, 1); } -int main(int argc, char ** argv) +int main(int argc, char ** argv) { MPI_Info info = MPI_INFO_NULL; MPI_File fh; @@ -51,19 +51,19 @@ int main(int argc, char ** argv) } off = rank*sizeof(buffer); - errcode = MPI_File_open(MPI_COMM_WORLD, argv[1], + errcode = MPI_File_open(MPI_COMM_WORLD, argv[1], MPI_MODE_WRONLY|MPI_MODE_CREATE, info, &fh); if (errcode != MPI_SUCCESS) handle_error(errcode, "MPI_File_open"); - errcode = MPI_File_write_at_all(fh, off, buffer, BUFSIZE, + errcode = MPI_File_write_at_all(fh, off, buffer, BUFSIZE, MPI_INT, &status); if (errcode != MPI_SUCCESS) handle_error(errcode, "MPI_File_write_at_all"); errcode = MPI_File_close(&fh); if (errcode != MPI_SUCCESS) handle_error(errcode, "MPI_File_close"); - errcode = MPI_File_open(MPI_COMM_WORLD, argv[1], + errcode = MPI_File_open(MPI_COMM_WORLD, argv[1], MPI_MODE_RDONLY, info, &fh); if (errcode != MPI_SUCCESS) handle_error(errcode, "MPI_File_open"); - errcode = MPI_File_read_at_all(fh, off, buf2, BUFSIZE, + errcode = MPI_File_read_at_all(fh, off, buf2, BUFSIZE, MPI_INT, &status); if (errcode != MPI_SUCCESS) handle_error(errcode, "MPI_File_read_at_all"); errcode = MPI_File_close(&fh); diff --git a/ompi/mca/io/romio314/romio/test/async-multiple.c b/ompi/mca/io/romio314/romio/test/async-multiple.c index ec9726ce55d..f52c95a4daf 100644 --- a/ompi/mca/io/romio314/romio/test/async-multiple.c +++ b/ompi/mca/io/romio314/romio/test/async-multiple.c @@ -1,6 +1,6 @@ -/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- * vim: ts=8 sts=4 sw=4 noexpandtab - * + * * (C) 2001 by Argonne National Laboratory. * See COPYRIGHT in top-level directory. */ @@ -14,11 +14,11 @@ /* Uses asynchronous I/O. Each process writes to separate files and reads them back. The file name is taken as a command-line argument, - and the process rank is appended to it.*/ + and the process rank is appended to it.*/ void handle_error(int errcode, const char *str); -void handle_error(int errcode, const char *str) +void handle_error(int errcode, const char *str) { char msg[MPI_MAX_ERROR_STRING]; int resultlen; @@ -39,7 +39,7 @@ int main(int argc, char **argv) MPI_Init(&argc,&argv); MPI_Comm_rank(MPI_COMM_WORLD, &rank); -/* process 0 takes the file name as a command-line argument and +/* process 0 takes the file name as a command-line argument and broadcasts it to other processes */ if (!rank) { i = 1; @@ -74,14 +74,14 @@ int main(int argc, char **argv) strcpy(tmp, filename); sprintf(filename, "%s.%d", tmp, rank); - errcode = MPI_File_open(MPI_COMM_SELF, filename, + errcode = MPI_File_open(MPI_COMM_SELF, filename, MPI_MODE_CREATE | MPI_MODE_RDWR, MPI_INFO_NULL, &fh); if (errcode != MPI_SUCCESS) { handle_error(errcode, "MPI_File_open"); } MPI_File_set_view(fh, 0, MPI_INT, MPI_INT, "native", MPI_INFO_NULL); - for (i=0; i /* tests whether atomicity semantics are satisfied for overlapping accesses - in atomic mode. The probability of detecting errors is higher if you run + in atomic mode. The probability of detecting errors is higher if you run it on 8 or more processes. */ /* The file name is taken as a command-line argument. */ @@ -30,7 +30,7 @@ int main(int argc, char **argv) MPI_Comm_rank(MPI_COMM_WORLD, &mynod); MPI_Comm_size(MPI_COMM_WORLD, &nprocs); -/* process 0 takes the file name as a command-line argument and +/* process 0 takes the file name as a command-line argument and broadcasts it to other processes */ if (!mynod) { i = 1; @@ -63,7 +63,7 @@ int main(int argc, char **argv) /* initialize file to all zeros */ if (!mynod) { MPI_File_delete(filename, MPI_INFO_NULL); - MPI_File_open(MPI_COMM_SELF, filename, MPI_MODE_CREATE | + MPI_File_open(MPI_COMM_SELF, filename, MPI_MODE_CREATE | MPI_MODE_RDWR, MPI_INFO_NULL, &fh); for (i=0; i - */ + * yet :> + */ #include @@ -16,10 +16,10 @@ #include #include -#define CHECK(fn) {int errcode; errcode = (fn); if (errcode != MPI_SUCCESS) handle_error(errcode, NULL); } +#define CHECK(fn) {int errcode; errcode = (fn); if (errcode != MPI_SUCCESS) handle_error(errcode, NULL); } -static void handle_error(int errcode, char *str) +static void handle_error(int errcode, char *str) { char msg[MPI_MAX_ERROR_STRING]; int resultlen; @@ -28,7 +28,7 @@ static void handle_error(int errcode, char *str) MPI_Abort(MPI_COMM_WORLD, 1); } -static void typestats(MPI_Datatype type) +static void typestats(MPI_Datatype type) { MPI_Aint lb, extent; MPI_Count size; @@ -41,7 +41,7 @@ static void typestats(MPI_Datatype type) } -static int verify_type(char *filename, MPI_Datatype type, +static int verify_type(char *filename, MPI_Datatype type, int64_t expected_extent, int do_coll) { int rank, canary; @@ -53,10 +53,10 @@ static int verify_type(char *filename, MPI_Datatype type, MPI_Comm_rank(MPI_COMM_WORLD, &rank); - CHECK( MPI_File_open(MPI_COMM_WORLD, filename, + CHECK( MPI_File_open(MPI_COMM_WORLD, filename, MPI_MODE_CREATE|MPI_MODE_RDWR, MPI_INFO_NULL, &fh)); - CHECK( MPI_File_set_view(fh, rank*sizeof(int), - MPI_BYTE, type, "native", MPI_INFO_NULL)); + CHECK( MPI_File_set_view(fh, rank*sizeof(int), + MPI_BYTE, type, "native", MPI_INFO_NULL)); MPI_Type_size_x(type, &tsize); @@ -69,14 +69,14 @@ static int verify_type(char *filename, MPI_Datatype type, CHECK( MPI_File_write_at(fh, tsize, &canary, 1, MPI_INT, &status)); } - CHECK( MPI_File_set_view(fh, 0, MPI_INT, MPI_INT, "native", - MPI_INFO_NULL)); + CHECK( MPI_File_set_view(fh, 0, MPI_INT, MPI_INT, "native", + MPI_INFO_NULL)); if (do_coll) { - CHECK( MPI_File_read_at_all(fh, expected_extent/sizeof(int)+rank, + CHECK( MPI_File_read_at_all(fh, expected_extent/sizeof(int)+rank, &compare, 1, MPI_INT, &status)); } else { - CHECK( MPI_File_read_at(fh, expected_extent/sizeof(int)+rank, + CHECK( MPI_File_read_at(fh, expected_extent/sizeof(int)+rank, &compare, 1, MPI_INT, &status)); } @@ -92,7 +92,7 @@ static int verify_type(char *filename, MPI_Datatype type, } else { if (rank == 0) MPI_File_delete(filename, MPI_INFO_NULL); } - + return (toterrs); } @@ -105,14 +105,14 @@ static int testtype(char *filename, MPI_Datatype type, int64_t expected_extent) MPI_Comm_rank(MPI_COMM_WORLD, &rank); if (!rank) typestats(type); - ret = verify_type(filename, type, expected_extent, nocollective); + ret = verify_type(filename, type, expected_extent, nocollective); if (ret) { errs++; fprintf(stderr, "type %d failed indep\n", type); - } else + } else if (!rank) printf("indep: OK "); - ret = verify_type(filename, type, expected_extent, collective); + ret = verify_type(filename, type, expected_extent, collective); if (ret) { errs++; fprintf(stderr, "type %d failed collective\n", type); @@ -133,7 +133,7 @@ int main(int argc, char **argv) int subs[2]; int starts[2]; - MPI_Datatype baseindex, indexed1G, indexed3G, indexed6G; + MPI_Datatype baseindex, indexed1G, indexed3G, indexed6G; MPI_Datatype subarray1G, subarray3G, subarray6G; int ret, rank; @@ -166,7 +166,7 @@ int main(int argc, char **argv) MPI_Type_contiguous(6144, baseindex, &indexed6G); MPI_Type_commit(&indexed6G); - /* TODO: + /* TODO: * - add a darray test * - add a test with crazy extents */ sizes[0] = 1024*16; @@ -174,17 +174,17 @@ int main(int argc, char **argv) subs[0] = subs[1] = 256; starts[0] = starts[1] = 0; - MPI_Type_create_subarray(ndims, sizes, subs, starts, + MPI_Type_create_subarray(ndims, sizes, subs, starts, MPI_ORDER_C, MPI_INT, &subarray1G); MPI_Type_commit(&subarray1G); sizes[1] = 1024*16*3; - MPI_Type_create_subarray(ndims, sizes, subs, starts, + MPI_Type_create_subarray(ndims, sizes, subs, starts, MPI_ORDER_C, MPI_INT, &subarray3G); MPI_Type_commit(&subarray3G); sizes[1] = 1024*16*6; - MPI_Type_create_subarray(ndims, sizes, subs, starts, + MPI_Type_create_subarray(ndims, sizes, subs, starts, MPI_ORDER_C, MPI_INT, &subarray6G); MPI_Type_commit(&subarray6G); @@ -202,11 +202,11 @@ int main(int argc, char **argv) ret = testtype(argv[1], subarray6G, (int64_t)1024*1024*1024*6); if(!ret && !rank) fprintf(stderr, " No Errors\n"); - + MPI_Finalize(); return (-ret); } -/* - * vim: ts=8 sts=4 sw=4 noexpandtab +/* + * vim: ts=8 sts=4 sw=4 noexpandtab */ diff --git a/ompi/mca/io/romio314/romio/test/coll_perf.c b/ompi/mca/io/romio314/romio/test/coll_perf.c index 0f9042d5ff5..f576e85d5a9 100644 --- a/ompi/mca/io/romio314/romio/test/coll_perf.c +++ b/ompi/mca/io/romio314/romio/test/coll_perf.c @@ -1,5 +1,5 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */ -/* +/* * (C) 2001 by Argonne National Laboratory. * See COPYRIGHT in top-level directory. */ @@ -14,7 +14,7 @@ block-distributed array to a file corresponding to the global array in row-major (C) order. Note that the file access pattern is noncontiguous. - + Array size 128^3. For other array sizes, change array_of_gsizes below.*/ @@ -35,7 +35,7 @@ int main(int argc, char **argv) MPI_Comm_rank(MPI_COMM_WORLD, &mynod); MPI_Comm_size(MPI_COMM_WORLD, &nprocs); -/* process 0 takes the file name as a command-line argument and +/* process 0 takes the file name as a command-line argument and broadcasts it to other processes */ if (!mynod) { i = 1; @@ -91,7 +91,7 @@ int main(int argc, char **argv) /* to eliminate paging effects, do the operations once but don't time them */ - MPI_File_open(MPI_COMM_WORLD, filename, MPI_MODE_CREATE | MPI_MODE_RDWR, + MPI_File_open(MPI_COMM_WORLD, filename, MPI_MODE_CREATE | MPI_MODE_RDWR, MPI_INFO_NULL, &fh); MPI_File_set_view(fh, 0, MPI_INT, newtype, "native", MPI_INFO_NULL); MPI_File_write_all(fh, buf, bufcount, MPI_INT, &status); @@ -102,7 +102,7 @@ int main(int argc, char **argv) MPI_Barrier(MPI_COMM_WORLD); /* now time write_all */ - MPI_File_open(MPI_COMM_WORLD, filename, MPI_MODE_CREATE | MPI_MODE_RDWR, + MPI_File_open(MPI_COMM_WORLD, filename, MPI_MODE_CREATE | MPI_MODE_RDWR, MPI_INFO_NULL, &fh); MPI_File_set_view(fh, 0, MPI_INT, newtype, "native", MPI_INFO_NULL); @@ -124,8 +124,8 @@ int main(int argc, char **argv) MPI_Barrier(MPI_COMM_WORLD); /* now time read_all */ - MPI_File_open(MPI_COMM_WORLD, filename, MPI_MODE_CREATE | MPI_MODE_RDWR, - MPI_INFO_NULL, &fh); + MPI_File_open(MPI_COMM_WORLD, filename, MPI_MODE_CREATE | MPI_MODE_RDWR, + MPI_INFO_NULL, &fh); MPI_File_set_view(fh, 0, MPI_INT, newtype, "native", MPI_INFO_NULL); MPI_Barrier(MPI_COMM_WORLD); diff --git a/ompi/mca/io/romio314/romio/test/coll_test.c b/ompi/mca/io/romio314/romio/test/coll_test.c index 4efc4d25b02..b7aeda221bc 100644 --- a/ompi/mca/io/romio314/romio/test/coll_test.c +++ b/ompi/mca/io/romio314/romio/test/coll_test.c @@ -1,5 +1,5 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */ -/* +/* * (C) 2001 by Argonne National Laboratory. * See COPYRIGHT in top-level directory. */ @@ -17,7 +17,7 @@ /* The file name is taken as a command-line argument. */ /* Note that the file access pattern is noncontiguous. */ - + void handle_error(int errcode, const char *str); void handle_error(int errcode, const char *str) @@ -49,7 +49,7 @@ int main(int argc, char **argv) MPI_Comm_rank(MPI_COMM_WORLD, &mynod); MPI_Comm_size(MPI_COMM_WORLD, &nprocs); -/* process 0 takes the file name as a command-line argument and +/* process 0 takes the file name as a command-line argument and broadcasts it to other processes */ if (!mynod) { i = 1; @@ -94,7 +94,7 @@ int main(int argc, char **argv) for (i=0; i #include #include -/* +/* * the new defered open code made some changes to the way we manage CREAT|EXCL, * so test out that code path */ @@ -19,7 +19,7 @@ struct options { typedef struct options options; -void handle_error(int errcode, char *str) +void handle_error(int errcode, char *str) { char msg[MPI_MAX_ERROR_STRING]; int resultlen; @@ -28,7 +28,7 @@ void handle_error(int errcode, char *str) MPI_Abort(MPI_COMM_WORLD, 1); } -void parse_args(int argc, char ** argv, int rank, options *opts) +void parse_args(int argc, char ** argv, int rank, options *opts) { int i, len=0; if (rank == 0) { diff --git a/ompi/mca/io/romio314/romio/test/darray_read.c b/ompi/mca/io/romio314/romio/test/darray_read.c index e4eb709e8e1..418ae4c9ea0 100644 --- a/ompi/mca/io/romio314/romio/test/darray_read.c +++ b/ompi/mca/io/romio314/romio/test/darray_read.c @@ -19,9 +19,9 @@ static void handle_error(int errcode, const char *str) } -int main(int argc, char *argv[]) -{ - int i, j, nerrors=0, total_errors=0; +int main(int argc, char *argv[]) +{ + int i, j, nerrors=0, total_errors=0; int rank, size; int bpos; @@ -43,7 +43,7 @@ int main(int argc, char *argv[]) int tsize, nelem; MPI_File dfile; - + MPI_Init(&argc, &argv); MPI_Comm_size(MPI_COMM_WORLD, &size); @@ -121,4 +121,4 @@ int main(int argc, char *argv[]) exit(total_errors); -} +} diff --git a/ompi/mca/io/romio314/romio/test/error.c b/ompi/mca/io/romio314/romio/test/error.c index ee24e964596..c7aaca61da8 100644 --- a/ompi/mca/io/romio314/romio/test/error.c +++ b/ompi/mca/io/romio314/romio/test/error.c @@ -1,5 +1,5 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */ -/* +/* * (C) 2001 by Argonne National Laboratory. * See COPYRIGHT in top-level directory. */ @@ -29,7 +29,7 @@ int main(int argc, char **argv) } #endif -/* process 0 takes the file name as a command-line argument and +/* process 0 takes the file name as a command-line argument and broadcasts it to other processes */ if (!rank) { i = 1; @@ -53,7 +53,7 @@ int main(int argc, char **argv) filename = (char *) malloc(len+10); MPI_Bcast(filename, len+10, MPI_CHAR, 0, MPI_COMM_WORLD); } - + /* each process opens a separate file called filename.'myrank' */ tmp = (char *) malloc(len+10); strcpy(tmp, filename); @@ -61,11 +61,11 @@ int main(int argc, char **argv) err = MPI_File_open(MPI_COMM_SELF, filename, MPI_MODE_CREATE+MPI_MODE_RDWR, MPI_INFO_NULL, &fh); - err = MPI_File_set_view(fh, -1, MPI_BYTE, MPI_BYTE, "native", + err = MPI_File_set_view(fh, -1, MPI_BYTE, MPI_BYTE, "native", MPI_INFO_NULL); /* disp is deliberately passed as -1 */ - /* This test is designed for ROMIO specifically and tests for a + /* This test is designed for ROMIO specifically and tests for a specific error message */ if (err != MPI_SUCCESS) { MPI_Error_string(err, string, &len); @@ -102,5 +102,5 @@ int main(int argc, char **argv) } MPI_Finalize(); - return 0; + return 0; } diff --git a/ompi/mca/io/romio314/romio/test/excl.c b/ompi/mca/io/romio314/romio/test/excl.c index e48bb3deee5..5deef15779a 100644 --- a/ompi/mca/io/romio314/romio/test/excl.c +++ b/ompi/mca/io/romio314/romio/test/excl.c @@ -1,5 +1,5 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */ -/* +/* * (C) 2001 by Argonne National Laboratory. * See COPYRIGHT in top-level directory. */ @@ -20,7 +20,7 @@ int main(int argc, char **argv) MPI_Init(&argc,&argv); MPI_Comm_rank(MPI_COMM_WORLD, &rank); -/* process 0 takes the file name as a command-line argument and +/* process 0 takes the file name as a command-line argument and broadcasts it to other processes */ if (!rank) { i = 1; @@ -44,13 +44,13 @@ int main(int argc, char **argv) filename = (char *) malloc(len+10); MPI_Bcast(filename, len+10, MPI_CHAR, 0, MPI_COMM_WORLD); } - + if (!rank) MPI_File_delete(filename, MPI_INFO_NULL); MPI_Barrier(MPI_COMM_WORLD); /* this open should succeed */ - err = MPI_File_open(MPI_COMM_WORLD, filename, + err = MPI_File_open(MPI_COMM_WORLD, filename, MPI_MODE_CREATE | MPI_MODE_EXCL | MPI_MODE_RDWR, MPI_INFO_NULL , &fh); if (err != MPI_SUCCESS) { errs++; @@ -61,7 +61,7 @@ int main(int argc, char **argv) MPI_Barrier(MPI_COMM_WORLD); /* this open should fail */ - err = MPI_File_open(MPI_COMM_WORLD, filename, + err = MPI_File_open(MPI_COMM_WORLD, filename, MPI_MODE_CREATE | MPI_MODE_EXCL | MPI_MODE_RDWR, MPI_INFO_NULL , &fh); if (err == MPI_SUCCESS) { errs++; @@ -80,5 +80,5 @@ int main(int argc, char **argv) free(filename); MPI_Finalize(); - return 0; + return 0; } diff --git a/ompi/mca/io/romio314/romio/test/external32.c b/ompi/mca/io/romio314/romio/test/external32.c index bca3555ae86..17a886cf0da 100644 --- a/ompi/mca/io/romio314/romio/test/external32.c +++ b/ompi/mca/io/romio314/romio/test/external32.c @@ -1,6 +1,6 @@ /* * This code was written by Intel Corporation. Copyright (C) 2011-2012 Intel Corporation. - * Intel provides this material to Argonne National Laboratory subject to + * Intel provides this material to Argonne National Laboratory subject to * Software Grant and Corporate Contributor License Agreement dated February 8, 2012. * * See COPYRIGHT in top-level directory. @@ -60,14 +60,14 @@ int main( int argc, char* argv[] ) { for( i = 0; i < 3; i++ ) { /* Open file */ - CHECK(MPI_File_open( MPI_COMM_WORLD, TEST_FILENAME, + CHECK(MPI_File_open( MPI_COMM_WORLD, TEST_FILENAME, MPI_MODE_RDWR | MPI_MODE_CREATE, MPI_INFO_NULL, &fileh ) ); /* Set view */ CHECK(MPI_File_set_view( fileh, 0, MPI_INT, MPI_INT, datarep[i], MPI_INFO_NULL )); /* Write into file */ - CHECK(MPI_File_write_at( fileh, (MPI_Offset)rank, (void*)&sample_i, 1, + CHECK(MPI_File_write_at( fileh, (MPI_Offset)rank, (void*)&sample_i, 1, MPI_INT, MPI_STATUS_IGNORE )); /* Close file */ diff --git a/ompi/mca/io/romio314/romio/test/fcoll_test.f.in b/ompi/mca/io/romio314/romio/test/fcoll_test.f.in index 9c2b172218d..57551d5a6a3 100644 --- a/ompi/mca/io/romio314/romio/test/fcoll_test.f.in +++ b/ompi/mca/io/romio314/romio/test/fcoll_test.f.in @@ -1,5 +1,5 @@ -! -*- Mode: Fortran; -*- -! +! -*- Mode: Fortran; -*- +! ! (C) 2001 by Argonne National Laboratory. ! See COPYRIGHT in top-level directory. ! @@ -11,7 +11,7 @@ ! Fortran equivalent of coll_test.c - integer FILESIZE + integer FILESIZE parameter (FILESIZE=32*32*32*4) ! A 32^3 array. For other array sizes, change FILESIZE above and @@ -22,7 +22,7 @@ ! back, and checks that the data read is correct. ! Note that the file access pattern is noncontiguous. - + integer newtype, i, ndims, array_of_gsizes(3) integer order, intsize, nprocs, j, array_of_distribs(3) integer array_of_dargs(3), array_of_psizes(3) @@ -39,7 +39,7 @@ call MPI_COMM_SIZE(MPI_COMM_WORLD, nprocs, ierr) call MPI_COMM_RANK(MPI_COMM_WORLD, mynod, ierr) -! process 0 takes the file name as a command-line argument and +! process 0 takes the file name as a command-line argument and ! broadcasts it to other processes if (mynod .eq. 0) then @@ -61,14 +61,14 @@ @F77GETARG@ call MPI_BCAST(str, 1024, MPI_CHARACTER, 0, & & MPI_COMM_WORLD, ierr) - else + else call MPI_BCAST(str, 1024, MPI_CHARACTER, 0, & & MPI_COMM_WORLD, ierr) end if ! create the distributed array filetype - + ndims = 3 order = MPI_ORDER_FORTRAN @@ -96,12 +96,12 @@ call MPI_TYPE_COMMIT(newtype, ierr) -! initialize writebuf +! initialize writebuf call MPI_TYPE_SIZE(newtype, bufcount, ierr) call MPI_TYPE_SIZE(MPI_INTEGER, intsize, ierr) bufcount = bufcount/intsize - do i=1, bufcount + do i=1, bufcount writebuf(i) = 1 end do @@ -132,7 +132,7 @@ call MPI_FILE_OPEN(MPI_COMM_WORLD, str, & & MPI_MODE_CREATE+MPI_MODE_RDWR, MPI_INFO_NULL, fh, ierr) - disp = 0 + disp = 0 call MPI_FILE_SET_VIEW(fh, disp, MPI_INTEGER, newtype, "native", & & MPI_INFO_NULL, ierr) call MPI_FILE_WRITE_ALL(fh, writebuf, bufcount, MPI_INTEGER, & @@ -143,7 +143,7 @@ call MPI_FILE_OPEN(MPI_COMM_WORLD, str, & & MPI_MODE_CREATE+MPI_MODE_RDWR, MPI_INFO_NULL, fh, ierr) - + call MPI_FILE_SET_VIEW(fh, disp, MPI_INTEGER, newtype, "native", & & MPI_INFO_NULL, ierr) call MPI_FILE_READ_ALL(fh, readbuf, bufcount, MPI_INTEGER, & @@ -161,7 +161,7 @@ call MPI_TYPE_FREE(newtype, ierr) call MPI_Allreduce( errs, toterrs, 1, MPI_INTEGER, MPI_SUM, & - $ MPI_COMM_WORLD, ierr ) + $ MPI_COMM_WORLD, ierr ) if (mynod .eq. 0) then if( toterrs .gt. 0 ) then print *, 'Found ', toterrs, ' errors' diff --git a/ompi/mca/io/romio314/romio/test/file_info.c b/ompi/mca/io/romio314/romio/test/file_info.c index fba618974fc..066915f88d2 100644 --- a/ompi/mca/io/romio314/romio/test/file_info.c +++ b/ompi/mca/io/romio314/romio/test/file_info.c @@ -1,5 +1,5 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */ -/* +/* * (C) 2001 by Argonne National Laboratory. * See COPYRIGHT in top-level directory. */ @@ -44,10 +44,10 @@ hint_defaults UFS_DEFAULTS = { }; hint_defaults BLUEGENE_DEFAULTS = { - .cb_buffer_size = 16777216, - .ind_rd_buffer_size = 4194304, - .ind_wr_buffer_size = 4194304, - .romio_cb_read = "enable", + .cb_buffer_size = 16777216, + .ind_rd_buffer_size = 4194304, + .ind_wr_buffer_size = 4194304, + .romio_cb_read = "enable", .romio_cb_write = "enable", .cb_config_list = NULL}; @@ -76,7 +76,7 @@ int main(int argc, char **argv) MPI_Comm_rank(MPI_COMM_WORLD, &mynod); MPI_Comm_size(MPI_COMM_WORLD, &nprocs); -/* process 0 takes the file name as a command-line argument and +/* process 0 takes the file name as a command-line argument and broadcasts it to other processes */ if (!mynod) { i = 1; @@ -119,7 +119,7 @@ int main(int argc, char **argv) /* open the file with MPI_INFO_NULL */ - ret = MPI_File_open(MPI_COMM_WORLD, filename, MPI_MODE_CREATE | MPI_MODE_RDWR, + ret = MPI_File_open(MPI_COMM_WORLD, filename, MPI_MODE_CREATE | MPI_MODE_RDWR, MPI_INFO_NULL, &fh); if (ret != MPI_SUCCESS) handle_error(ret, "MPI_File_open"); @@ -132,8 +132,8 @@ int main(int argc, char **argv) MPI_Info_get_nthkey(info_used, i, key); MPI_Info_get(info_used, key, MPI_MAX_INFO_VAL-1, value, &flag); #ifdef INFO_DEBUG - if (!mynod) - fprintf(stderr, "Process %d, Default: key = %s, value = %s\n", mynod, + if (!mynod) + fprintf(stderr, "Process %d, Default: key = %s, value = %s\n", mynod, key, value); #endif if (!strcmp("striping_factor", key)) { @@ -249,13 +249,13 @@ int main(int argc, char **argv) MPI_Info_set(info, "ind_wr_buffer_size", "1048576"); -/* The following three hints related to file striping are accepted only - on Intel PFS and IBM PIOFS file systems and are ignored elsewhere. - They can be specified only at file-creation time; if specified later +/* The following three hints related to file striping are accepted only + on Intel PFS and IBM PIOFS file systems and are ignored elsewhere. + They can be specified only at file-creation time; if specified later they will be ignored. */ /* number of I/O devices across which the file will be striped. - accepted only if 0 < value < default_striping_factor; + accepted only if 0 < value < default_striping_factor; ignored otherwise */ if (default_striping_factor - 1 > 0) { sprintf(value, "%d", default_striping_factor-1); @@ -275,18 +275,18 @@ int main(int argc, char **argv) #endif /* the I/O device number from which to start striping the file. - accepted only if 0 <= value < default_striping_factor; + accepted only if 0 <= value < default_striping_factor; ignored otherwise */ sprintf(value, "%d", default_striping_factor-2); MPI_Info_set(info, "start_iodevice", value); -/* The following hint about PFS server buffering is accepted only on - Intel PFS. It can be specified anytime. */ +/* The following hint about PFS server buffering is accepted only on + Intel PFS. It can be specified anytime. */ MPI_Info_set(info, "pfs_svr_buf", "true"); /* open the file and set new info */ - ret = MPI_File_open(MPI_COMM_WORLD, filename, MPI_MODE_CREATE | MPI_MODE_RDWR, + ret = MPI_File_open(MPI_COMM_WORLD, filename, MPI_MODE_CREATE | MPI_MODE_RDWR, info, &fh); if (ret != MPI_SUCCESS) handle_error(ret, "MPI_File_open"); @@ -298,8 +298,8 @@ int main(int argc, char **argv) for (i=0; i #include -/* Writes a 4-Gbyte distributed array, reads it back, and then deletes the +/* Writes a 4-Gbyte distributed array, reads it back, and then deletes the file. Uses collective I/O. */ /* The file name is taken as a command-line argument. */ /* Run it only on a machine with sufficient memory and a file system on which ROMIO supports large files, i.e., PIOFS, XFS, SFS, and HFS */ -/* This program will work only if the MPI implementation defines MPI_Aint +/* This program will work only if the MPI implementation defines MPI_Aint as a 64-bit integer. */ - + int main(int argc, char **argv) { MPI_Datatype newtype; @@ -35,7 +35,7 @@ int main(int argc, char **argv) MPI_Comm_rank(MPI_COMM_WORLD, &mynod); MPI_Comm_size(MPI_COMM_WORLD, &nprocs); -/* process 0 takes the file name as a command-line argument and +/* process 0 takes the file name as a command-line argument and broadcasts it to other processes */ if (!mynod) { i = 1; @@ -78,7 +78,7 @@ int main(int argc, char **argv) for (i=0; i #include -/* writes a file of size 4 Gbytes and reads it back. +/* writes a file of size 4 Gbytes and reads it back. should be run on one process only*/ /* The file name is taken as a command-line argument. */ -/* Can be used only on file systems on which ROMIO supports large files, +/* Can be used only on file systems on which ROMIO supports large files, i.e., PIOFS, XFS, SFS, and HFS. */ - + #define SIZE 1048576*4 /* no. of long longs in each write/read */ #define NTIMES 128 /* no. of writes/reads */ @@ -63,7 +63,7 @@ int main(int argc, char **argv) for (i=0; irefct = 1; + array->refct = 1; if (commrank == 0) { /* process 0 keeps the real list */ @@ -102,7 +102,7 @@ int cb_gather_name_array(MPI_Comm comm, ADIO_cb_name_array *arrayp) procname = array->names; /* simpler to read */ procname_len = (int *) ADIOI_Malloc(commsize * sizeof(int)); - if (procname_len == NULL) { + if (procname_len == NULL) { return -1; } } @@ -112,7 +112,7 @@ int cb_gather_name_array(MPI_Comm comm, ADIO_cb_name_array *arrayp) array->names = NULL; } /* gather lengths first */ - MPI_Gather(&my_procname_len, 1, MPI_INT, + MPI_Gather(&my_procname_len, 1, MPI_INT, procname_len, 1, MPI_INT, 0, comm); if (commrank == 0) { @@ -125,7 +125,7 @@ int cb_gather_name_array(MPI_Comm comm, ADIO_cb_name_array *arrayp) for (i=0; i < commsize; i++) { /* add one to the lengths because we need to count the * terminator, and we are going to use this list of lengths - * again in the gatherv. + * again in the gatherv. */ procname_len[i]++; procname[i] = malloc(procname_len[i]); @@ -133,12 +133,12 @@ int cb_gather_name_array(MPI_Comm comm, ADIO_cb_name_array *arrayp) return -1; } } - + /* create our list of displacements for the gatherv. we're going * to do everything relative to the start of the region allocated * for procname[0] * - * I suppose it is theoretically possible that the distance between + * I suppose it is theoretically possible that the distance between * malloc'd regions could be more than will fit in an int. We don't * cover that case. */ @@ -152,7 +152,7 @@ int cb_gather_name_array(MPI_Comm comm, ADIO_cb_name_array *arrayp) /* now gather strings */ if (commrank == 0) { - MPI_Gatherv(my_procname, my_procname_len + 1, MPI_CHAR, + MPI_Gatherv(my_procname, my_procname_len + 1, MPI_CHAR, procname[0], procname_len, disp, MPI_CHAR, 0, comm); } @@ -160,7 +160,7 @@ int cb_gather_name_array(MPI_Comm comm, ADIO_cb_name_array *arrayp) /* if we didn't do this, we would need to allocate procname[] * on all processes...which seems a little silly. */ - MPI_Gatherv(my_procname, my_procname_len + 1, MPI_CHAR, + MPI_Gatherv(my_procname, my_procname_len + 1, MPI_CHAR, NULL, NULL, NULL, MPI_CHAR, 0, comm); } @@ -195,7 +195,7 @@ void default_str(int mynod, int len, ADIO_cb_name_array array, char *dest) } MPI_Bcast(dest, len, MPI_CHAR, 0, MPI_COMM_WORLD); } -void reverse_str(int mynod, int len, ADIO_cb_name_array array, char *dest) +void reverse_str(int mynod, int len, ADIO_cb_name_array array, char *dest) { char *ptr; int i, p; @@ -261,10 +261,10 @@ int main(int argc, char **argv) MPI_Init(&argc,&argv); MPI_Comm_size(MPI_COMM_WORLD, &nprocs); - MPI_Comm_rank(MPI_COMM_WORLD, &mynod); + MPI_Comm_rank(MPI_COMM_WORLD, &mynod); + - - /* process 0 takes the file name as a command-line argument and + /* process 0 takes the file name as a command-line argument and broadcasts it to other processes */ if (!mynod) { i = 1; @@ -323,7 +323,7 @@ int main(int argc, char **argv) errs += test_file(filename, mynod, nprocs, cb_config_string, "collective w/ hinting: default order", verbose); /* reverse order */ - reverse_str(mynod, cb_config_len, array, cb_config_string); + reverse_str(mynod, cb_config_len, array, cb_config_string); errs += test_file(filename, mynod, nprocs, cb_config_string, "collective w/ hinting: reverse order", verbose); /* reverse, every other */ @@ -335,7 +335,7 @@ int main(int argc, char **argv) errs += test_file(filename, mynod, nprocs, cb_config_string, "collective w/ hinting: permutation2", verbose); MPI_Allreduce(&errs, &sum_errs, 1, MPI_INT, MPI_SUM, MPI_COMM_WORLD); - + if (!mynod) { if (sum_errs) fprintf(stderr, "Found %d error cases\n", sum_errs); else printf(" No Errors\n"); @@ -348,7 +348,7 @@ int main(int argc, char **argv) #define SEEDER(x,y,z) ((x)*1000000 + (y) + (x)*(z)) -int test_file(char *filename, int mynod, int nprocs, char * cb_hosts, const char *msg, int verbose) +int test_file(char *filename, int mynod, int nprocs, char * cb_hosts, const char *msg, int verbose) { MPI_Datatype typevec, newtype, t[3]; int *buf, i, b[3], errcode, errors=0; @@ -394,7 +394,7 @@ int test_file(char *filename, int mynod, int nprocs, char * cb_hosts, const char } MPI_Barrier(MPI_COMM_WORLD); - errcode = MPI_File_open(MPI_COMM_WORLD, filename, + errcode = MPI_File_open(MPI_COMM_WORLD, filename, MPI_MODE_CREATE | MPI_MODE_RDWR, info, &fh); if (errcode != MPI_SUCCESS) { handle_error(errcode, "MPI_File_open"); @@ -418,7 +418,7 @@ int test_file(char *filename, int mynod, int nprocs, char * cb_hosts, const char } /* the verification for N compute nodes is tricky. Say we have 3 - * processors. + * processors. * process 0 sees: 0 -1 -1 3 -1 -1 ... * process 1 sees: -1 34 -1 -1 37 -1 ... * process 2 sees: -1 -1 68 -1 -1 71 ... */ @@ -436,7 +436,7 @@ int test_file(char *filename, int mynod, int nprocs, char * cb_hosts, const char for(/* 'i' set in above loop */; i #include -/* A simple performance test. The file name is taken as a +/* A simple performance test. The file name is taken as a command-line argument. */ #define SIZE (1048576*4) /* read/write size per node in bytes */ @@ -26,7 +26,7 @@ int main(int argc, char **argv) MPI_Comm_size(MPI_COMM_WORLD, &nprocs); MPI_Comm_rank(MPI_COMM_WORLD, &mynod); -/* process 0 takes the file name as a command-line argument and +/* process 0 takes the file name as a command-line argument and broadcasts it to other processes */ if (!mynod) { i = 1; @@ -56,7 +56,7 @@ int main(int argc, char **argv) buf = (int *) malloc(SIZE); for (j=0; j ${bfile}.tout if diff -b $bfile.tout $stdfile > /dev/null ; then true - elif [ -s $stdfile2 ] ; then + elif [ -s $stdfile2 ] ; then # check for alternate in case configuration has fewer datatypes if diff -b $bfile.tout $stdfile2 > /dev/null ; then true @@ -150,7 +150,7 @@ RunTest() { rm -rf $FILENAME* echo "**** Testing ${1}.c ****" mpirun -np 4 ./${1} -fname $FILENAME "$@" >>${1}.out 2>1 - CheckOutput ${1} + CheckOutput ${1} CleanExe ${1} } @@ -194,13 +194,13 @@ $mpirun -np 4 ./async-multiple -fname $FILENAME CleanExe async-multiple # OutTime -if [ $subset_only -eq 0 ] ; then +if [ $subset_only -eq 0 ] ; then testfiles="$testfiles atomicity.out" \rm -f atomicity.out MakeExe atomicity \rm -f $FILENAME* echo '**** Testing atomicity.c ****' - # Atomicity test recommends at least 8 processes (separate processors + # Atomicity test recommends at least 8 processes (separate processors # even better) $mpirun -np 4 ./atomicity -fname $FILENAME # CheckOutput atomicity @@ -300,7 +300,7 @@ CleanExe hindexed # -offm 4 -hints romio_cb_write enable -fname $FILENAME #CleanExe write_all_test OutTime -if [ $subset_only -eq 0 ] ; then +if [ $subset_only -eq 0 ] ; then testfiles="$testfiles misc.out" \rm -f misc.out MakeExe misc @@ -387,7 +387,7 @@ echo '**** Testing darray_read ****' $mpirun -np 4 ./darray_read $FILENAME CleanExe darray_read # -if [ @NOF77@ = 0 ] ; then +if [ @NOF77@ = 0 ] ; then echo "" echo "FORTRAN TESTS" OutTime @@ -439,7 +439,7 @@ fi # nodiff=1 # for file in $testfiles ; do # stdfile="${srcdir}/std/`basename $file .out`.std" -# # if basename is sendrecv or isndrcv, then we may want to test +# # if basename is sendrecv or isndrcv, then we may want to test # # with .std2 as well. We should really separate out the long double # # tests ... # if [ -s $stdfile ] ; then @@ -450,7 +450,7 @@ fi # grep -v 'FORTRAN STOP' ${file} > ${bfile} # if diff -b $bfile $stdfile > /dev/null ; then # true -# elif [ -s $stdfile2 ] ; then +# elif [ -s $stdfile2 ] ; then # # check for alternate in case configuration has fewer datatypes # if diff -b $bfile $stdfile2 > /dev/null ; then # true @@ -476,4 +476,4 @@ fi # echo "-- No differences found; test successful" # fi exit 0 - + diff --git a/ompi/mca/io/romio314/romio/test/shared_fp.c b/ompi/mca/io/romio314/romio/test/shared_fp.c index df41bdd818d..26f3770c8bc 100644 --- a/ompi/mca/io/romio314/romio/test/shared_fp.c +++ b/ompi/mca/io/romio314/romio/test/shared_fp.c @@ -1,5 +1,5 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */ -/* +/* * (C) 2001 by Argonne National Laboratory. * See COPYRIGHT in top-level directory. */ @@ -12,7 +12,7 @@ void handle_error(int errcode, const char *str); -void handle_error(int errcode, const char *str) +void handle_error(int errcode, const char *str) { char msg[MPI_MAX_ERROR_STRING]; int resultlen; @@ -34,7 +34,7 @@ int main(int argc, char **argv) MPI_Init(&argc,&argv); MPI_Comm_rank(MPI_COMM_WORLD, &rank); -/* process 0 takes the file name as a command-line argument and +/* process 0 takes the file name as a command-line argument and broadcasts it to other processes */ if (!rank) { i = 1; @@ -58,7 +58,7 @@ int main(int argc, char **argv) filename = (char *) malloc(len+10); MPI_Bcast(filename, len+10, MPI_CHAR, 0, MPI_COMM_WORLD); } - + buf = (int *) malloc(COUNT * sizeof(int)); MPI_Comm_rank(MPI_COMM_WORLD, &rank); @@ -66,7 +66,7 @@ int main(int argc, char **argv) for (i=0; i /* - * This program tests to see if fcntl returns success when asked to + * This program tests to see if fcntl returns success when asked to * establish a file lock. This test is intended for use on file systems * such as NFS that may not implement file locks. ROMIO makes use * of file locks to implement certain operations, and may not work - * properly if file locks are not available. + * properly if file locks are not available. * * This is a simple test and has at least two limitations: - * - * 1. Some implementations of NFS are known to return success for + * + * 1. Some implementations of NFS are known to return success for * setting a file lock when in fact no lock has been set. This * test will not detect such erroneous implementations of NFS * * 2. Some implementations will hang (enter and wait indefinitately) * within the fcntl call. This program will also hang in that case. - * Under normal conditions, this program should only take a few seconds to + * Under normal conditions, this program should only take a few seconds to * run. * * The program prints a message showing the success or failure of @@ -48,7 +48,7 @@ int main( int argc, char *argv[] ) filename = "conftest.dat"; } - + lock.l_type = F_WRLCK; lock.l_start = 0; lock.l_whence = SEEK_SET; diff --git a/ompi/mca/io/romio314/romio/util/romioinstall.in b/ompi/mca/io/romio314/romio/util/romioinstall.in index 95a63092315..e80027a2a57 100644 --- a/ompi/mca/io/romio314/romio/util/romioinstall.in +++ b/ompi/mca/io/romio314/romio/util/romioinstall.in @@ -2,7 +2,7 @@ # # This script needs to be built by configure. # -# This is a script to install ROMIO. It can be invoked with +# This is a script to install ROMIO. It can be invoked with # make install # # (if you used -prefix at configure time) or, @@ -47,8 +47,8 @@ failmode=soft # Shell procedures to copy files and create directories # -# We could use install, but install is too different and too hard to -# test. So here are the routines to copy file, make directories, and +# We could use install, but install is too different and too hard to +# test. So here are the routines to copy file, make directories, and # replace #...# and @...@ in files CP=cp # @@ -72,14 +72,14 @@ elif [ -d $1 ] ; then echo ">>> $1 is a directory; not copied <<<" errs=`expr $errs + 1` if [ $failmode = "hard" ] ; then exit 1 ; fi -elif [ ! -f $1 ] ; then +elif [ ! -f $1 ] ; then echo "**File $1 does not exist (or is not a regular file)!" errs=`expr $errs + 1` if [ $failmode = "hard" ] ; then exit 1 ; fi else if [ $verbose = 1 ] ; then echo "Copying $1 to $dest" ; fi # We don't delete the file in the event that we are copying the - # file over itself (we SHOULD check for that separately, by checking + # file over itself (we SHOULD check for that separately, by checking # that directories are distinct) #if [ -f $dest ] ; then $Show rm -f $dest ; fi $Show $CP $1 $dest @@ -111,7 +111,7 @@ CopyFileP() { # for arg in "$@" ; do - case "$arg" in + case "$arg" in -prefix=*) PREFIX=`echo $arg | sed -e 's/-prefix=//'` prefix=$PREFIX @@ -132,7 +132,7 @@ if test -z "$prefix" ; then fi # Uninstall filename -if [ -z "$UNINSTALLFILE" ] ; then +if [ -z "$UNINSTALLFILE" ] ; then UNINSTALLFILE="$sbindir/romiouninstall" MkDir `dirname $UNINSTALLFILE` fi @@ -162,13 +162,13 @@ if test "$WANT_INSTALL" = "1"; then #cp -r $ROMIO_HOME/lib $PREFIX #chmod 755 $PREFIX/lib for file in $TOP_BUILD_DIR/lib/* ; do - if [ -f $file ] ; then - CopyFileP $file $libdir + if [ -f $file ] ; then + CopyFileP $file $libdir fi done - # Romio also copies directories in the lib directory. + # Romio also copies directories in the lib directory. - if [ -z "$mandir" ] ; then + if [ -z "$mandir" ] ; then mandir=$PREFIX/man fi echo "copying directory $ROMIO_HOME/man to $mandir" @@ -182,7 +182,7 @@ if test "$WANT_INSTALL" = "1"; then echo "copying directory $ROMIO_HOME/test to $exampledir" MkDir $exampledir MkDir $exampledir/std - for file in $ROMIO_HOME/test/std/* ; do + for file in $ROMIO_HOME/test/std/* ; do CopyFile $file $exampledir/std done CopyFile test/Makefile $exampledir @@ -201,9 +201,9 @@ if test "$WANT_INSTALL" = "1"; then NEWLIB=`echo $TMPNEWLIB | sed 's/\//\\\\\//g'` sed -e 5s/INCLUDE_DIR[\ ]*=\ [/a-z0-9.A-Z_-]*/INCLUDE_DIR\ =\ $NEWINC/ \ -e 6s/LIBS[\ ]*=\ [/a-z0-9.A-Z_-]*/LIBS\ =\ $NEWLIB/ $exampledir/Makefile\ - > $exampledir/.romiotmp + > $exampledir/.romiotmp mv $exampledir/.romiotmp $exampledir/Makefile - + # MkDir $sbindir echo "rm -f $UNINSTALLFILE" >> $UNINSTALLFILE diff --git a/ompi/mca/io/romio314/romio/util/tarch b/ompi/mca/io/romio314/romio/util/tarch index ccccf4d514a..bc077f31a7c 100755 --- a/ompi/mca/io/romio314/romio/util/tarch +++ b/ompi/mca/io/romio314/romio/util/tarch @@ -55,7 +55,7 @@ for LARCH in $ARCHLIST ; do case $LARCH in SUPER-UX) FARCH=SX4; break ;; AIX|RIOS) FARCH=rs6000; break ;; - HP-UX) + HP-UX) if [ -a /dev/kmem ] ; then FARCH=hpux ; else @@ -68,9 +68,9 @@ for LARCH in $ARCHLIST ; do GARCH=$LARCH ;; sun4*) Version=`$UNAME -r` - # In "improving" SunOS, the useful feature of "substr" was withdrawn - # from expr. Can't let the users have life too easy, can we? This - # means that we can't just use + # In "improving" SunOS, the useful feature of "substr" was withdrawn + # from expr. Can't let the users have life too easy, can we? This + # means that we can't just use # set MajorVersion = `expr substr $Version 1 1` # because it won't work on Solaris systems. The following should work # on both: @@ -93,8 +93,8 @@ for LARCH in $ARCHLIST ; do break ;; mips|dec-5000) FARCH=dec5000 ; break ;; - next) FARCH=NeXT ; break ;; - KSR1|KSR2) FARCH=ksr ; break ;; + next) FARCH=NeXT ; break ;; + KSR1|KSR2) FARCH=ksr ; break ;; FreeBSD) FARCH=freebsd ; break ;; OpenBSD) FARCH=openbsd ; break ;; NetBSD) FARCH=netbsd ; break ;; diff --git a/ompi/mca/io/romio314/src/io_romio314.h b/ompi/mca/io/romio314/src/io_romio314.h index 93be7e65bef..86fd9b062a7 100644 --- a/ompi/mca/io/romio314/src/io_romio314.h +++ b/ompi/mca/io/romio314/src/io_romio314.h @@ -10,6 +10,8 @@ * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2008 Sun Microsystems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -62,12 +64,12 @@ typedef struct mca_io_romio314_data_t mca_io_romio314_data_t; */ /* Section 9.2 */ int mca_io_romio314_file_open (struct ompi_communicator_t *comm, - char *filename, + const char *filename, int amode, struct ompi_info_t *info, ompi_file_t *fh); int mca_io_romio314_file_close (struct ompi_file_t *fh); -int mca_io_romio314_file_delete (char *filename, +int mca_io_romio314_file_delete (const char *filename, struct ompi_info_t *info); int mca_io_romio314_file_set_size (struct ompi_file_t *fh, MPI_Offset size); @@ -87,7 +89,7 @@ int mca_io_romio314_file_set_view (struct ompi_file_t *fh, MPI_Offset disp, struct ompi_datatype_t *etype, struct ompi_datatype_t *filetype, - char *datarep, + const char *datarep, struct ompi_info_t *info); int mca_io_romio314_file_get_view (struct ompi_file_t *fh, MPI_Offset * disp, @@ -110,13 +112,13 @@ int mca_io_romio314_file_read_at_all (struct ompi_file_t *fh, ompi_status_public_t * status); int mca_io_romio314_file_write_at (struct ompi_file_t *fh, MPI_Offset offset, - void *buf, + const void *buf, int count, struct ompi_datatype_t *datatype, ompi_status_public_t * status); int mca_io_romio314_file_write_at_all (struct ompi_file_t *fh, MPI_Offset offset, - void *buf, + const void *buf, int count, struct ompi_datatype_t *datatype, ompi_status_public_t * status); @@ -128,7 +130,7 @@ int mca_io_romio314_file_iread_at (struct ompi_file_t *fh, ompi_request_t **request); int mca_io_romio314_file_iwrite_at (struct ompi_file_t *fh, MPI_Offset offset, - void *buf, + const void *buf, int count, struct ompi_datatype_t *datatype, ompi_request_t **request); @@ -145,12 +147,12 @@ int mca_io_romio314_file_read_all (struct ompi_file_t *fh, struct ompi_datatype_t *datatype, ompi_status_public_t * status); int mca_io_romio314_file_write (struct ompi_file_t *fh, - void *buf, + const void *buf, int count, struct ompi_datatype_t *datatype, ompi_status_public_t * status); int mca_io_romio314_file_write_all (struct ompi_file_t *fh, - void *buf, + const void *buf, int count, struct ompi_datatype_t *datatype, ompi_status_public_t * status); @@ -160,7 +162,7 @@ int mca_io_romio314_file_iread (struct ompi_file_t *fh, struct ompi_datatype_t *datatype, ompi_request_t **request); int mca_io_romio314_file_iwrite (struct ompi_file_t *fh, - void *buf, + const void *buf, int count, struct ompi_datatype_t *datatype, ompi_request_t **request); @@ -180,7 +182,7 @@ int mca_io_romio314_file_read_shared (struct ompi_file_t *fh, struct ompi_datatype_t *datatype, ompi_status_public_t * status); int mca_io_romio314_file_write_shared (struct ompi_file_t *fh, - void *buf, + const void *buf, int count, struct ompi_datatype_t *datatype, ompi_status_public_t * status); @@ -190,7 +192,7 @@ int mca_io_romio314_file_iread_shared (struct ompi_file_t *fh, struct ompi_datatype_t *datatype, ompi_request_t **request); int mca_io_romio314_file_iwrite_shared (struct ompi_file_t *fh, - void *buf, + const void *buf, int count, struct ompi_datatype_t *datatype, ompi_request_t **request); @@ -200,7 +202,7 @@ int mca_io_romio314_file_read_ordered (struct ompi_file_t *fh, struct ompi_datatype_t *datatype, ompi_status_public_t * status); int mca_io_romio314_file_write_ordered (struct ompi_file_t *fh, - void *buf, + const void *buf, int count, struct ompi_datatype_t *datatype, ompi_status_public_t * status); @@ -221,11 +223,11 @@ int mca_io_romio314_file_read_at_all_end (struct ompi_file_t *fh, ompi_status_public_t * status); int mca_io_romio314_file_write_at_all_begin (struct ompi_file_t *fh, MPI_Offset offset, - void *buf, + const void *buf, int count, struct ompi_datatype_t *datatype); int mca_io_romio314_file_write_at_all_end (struct ompi_file_t *fh, - void *buf, + const void *buf, ompi_status_public_t * status); int mca_io_romio314_file_read_all_begin (struct ompi_file_t *fh, void *buf, @@ -235,11 +237,11 @@ int mca_io_romio314_file_read_all_end (struct ompi_file_t *fh, void *buf, ompi_status_public_t * status); int mca_io_romio314_file_write_all_begin (struct ompi_file_t *fh, - void *buf, + const void *buf, int count, struct ompi_datatype_t *datatype); int mca_io_romio314_file_write_all_end (struct ompi_file_t *fh, - void *buf, + const void *buf, ompi_status_public_t * status); int mca_io_romio314_file_read_ordered_begin (struct ompi_file_t *fh, void *buf, @@ -249,11 +251,11 @@ int mca_io_romio314_file_read_ordered_end (struct ompi_file_t *fh, void *buf, ompi_status_public_t * status); int mca_io_romio314_file_write_ordered_begin (struct ompi_file_t *fh, - void *buf, + const void *buf, int count, struct ompi_datatype_t *datatype); int mca_io_romio314_file_write_ordered_end (struct ompi_file_t *fh, - void *buf, + const void *buf, struct ompi_status_public_t * status); /* Section 9.5.1 */ diff --git a/ompi/mca/io/romio314/src/io_romio314_component.c b/ompi/mca/io/romio314/src/io_romio314_component.c index 329f73767f6..60954575760 100644 --- a/ompi/mca/io/romio314/src/io_romio314_component.c +++ b/ompi/mca/io/romio314/src/io_romio314_component.c @@ -48,13 +48,13 @@ static const struct mca_io_base_module_2_0_0_t * static int file_unquery(struct ompi_file_t *file, struct mca_io_base_file_t *private_data); -static int delete_query(char *filename, struct ompi_info_t *info, +static int delete_query(const char *filename, struct ompi_info_t *info, struct mca_io_base_delete_t **private_data, bool *usable, int *priorty); -static int delete_select(char *filename, struct ompi_info_t *info, +static int delete_select(const char *filename, struct ompi_info_t *info, struct mca_io_base_delete_t *private_data); -static int register_datarep(char *, +static int register_datarep(const char *, MPI_Datarep_conversion_function*, MPI_Datarep_conversion_function*, MPI_Datarep_extent_function*, @@ -222,7 +222,7 @@ static int file_unquery(struct ompi_file_t *file, } -static int delete_query(char *filename, struct ompi_info_t *info, +static int delete_query(const char *filename, struct ompi_info_t *info, struct mca_io_base_delete_t **private_data, bool *usable, int *priority) { @@ -234,7 +234,7 @@ static int delete_query(char *filename, struct ompi_info_t *info, } -static int delete_select(char *filename, struct ompi_info_t *info, +static int delete_select(const char *filename, struct ompi_info_t *info, struct mca_io_base_delete_t *private_data) { int ret; @@ -248,7 +248,7 @@ static int delete_select(char *filename, struct ompi_info_t *info, static int -register_datarep(char * datarep, +register_datarep(const char * datarep, MPI_Datarep_conversion_function* read_fn, MPI_Datarep_conversion_function* write_fn, MPI_Datarep_extent_function* extent_fn, diff --git a/ompi/mca/io/romio314/src/io_romio314_file_open.c b/ompi/mca/io/romio314/src/io_romio314_file_open.c index 66b82bfb9cd..acb7d91ef8b 100644 --- a/ompi/mca/io/romio314/src/io_romio314_file_open.c +++ b/ompi/mca/io/romio314/src/io_romio314_file_open.c @@ -1,19 +1,21 @@ /* - * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2005 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * $COPYRIGHT$ + * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2005 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. + * $COPYRIGHT$ * - * Additional copyrights may follow + * Additional copyrights may follow * - * $HEADER$ + * $HEADER$ */ #include "ompi_config.h" @@ -27,7 +29,7 @@ int mca_io_romio314_file_open (ompi_communicator_t *comm, - char *filename, + const char *filename, int amode, ompi_info_t *info, ompi_file_t *fh) @@ -182,7 +184,7 @@ mca_io_romio314_file_set_view (ompi_file_t *fh, MPI_Offset disp, struct ompi_datatype_t *etype, struct ompi_datatype_t *filetype, - char *datarep, + const char *datarep, ompi_info_t *info) { int ret; diff --git a/ompi/mca/io/romio314/src/io_romio314_file_write.c b/ompi/mca/io/romio314/src/io_romio314_file_write.c index 49b7d970521..628cfd2e592 100644 --- a/ompi/mca/io/romio314/src/io_romio314_file_write.c +++ b/ompi/mca/io/romio314/src/io_romio314_file_write.c @@ -1,19 +1,21 @@ /* - * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2005 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * $COPYRIGHT$ + * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2005 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. + * $COPYRIGHT$ * - * Additional copyrights may follow + * Additional copyrights may follow * - * $HEADER$ + * $HEADER$ */ #include "ompi_config.h" @@ -25,7 +27,7 @@ int mca_io_romio314_file_write_at (ompi_file_t *fh, MPI_Offset offset, - void *buf, + const void *buf, int count, struct ompi_datatype_t *datatype, ompi_status_public_t * status) @@ -48,7 +50,7 @@ mca_io_romio314_file_write_at (ompi_file_t *fh, int mca_io_romio314_file_write_at_all (ompi_file_t *fh, MPI_Offset offset, - void *buf, + const void *buf, int count, struct ompi_datatype_t *datatype, ompi_status_public_t * status) @@ -71,7 +73,7 @@ mca_io_romio314_file_write_at_all (ompi_file_t *fh, int mca_io_romio314_file_iwrite_at (ompi_file_t *fh, MPI_Offset offset, - void *buf, + const void *buf, int count, struct ompi_datatype_t *datatype, ompi_request_t **request) @@ -95,7 +97,7 @@ mca_io_romio314_file_iwrite_at (ompi_file_t *fh, int mca_io_romio314_file_write (ompi_file_t *fh, - void *buf, + const void *buf, int count, struct ompi_datatype_t *datatype, ompi_status_public_t * status) @@ -115,7 +117,7 @@ mca_io_romio314_file_write (ompi_file_t *fh, int mca_io_romio314_file_write_all (ompi_file_t *fh, - void *buf, + const void *buf, int count, struct ompi_datatype_t *datatype, ompi_status_public_t * status) @@ -135,7 +137,7 @@ mca_io_romio314_file_write_all (ompi_file_t *fh, int mca_io_romio314_file_iwrite (ompi_file_t *fh, - void *buf, + const void *buf, int count, struct ompi_datatype_t *datatype, ompi_request_t **request) @@ -156,7 +158,7 @@ mca_io_romio314_file_iwrite (ompi_file_t *fh, int mca_io_romio314_file_write_shared (ompi_file_t *fh, - void *buf, + const void *buf, int count, struct ompi_datatype_t *datatype, ompi_status_public_t * status) @@ -176,7 +178,7 @@ mca_io_romio314_file_write_shared (ompi_file_t *fh, int mca_io_romio314_file_iwrite_shared (ompi_file_t *fh, - void *buf, + const void *buf, int count, struct ompi_datatype_t *datatype, ompi_request_t **request) @@ -196,7 +198,7 @@ mca_io_romio314_file_iwrite_shared (ompi_file_t *fh, int mca_io_romio314_file_write_ordered (ompi_file_t *fh, - void *buf, + const void *buf, int count, struct ompi_datatype_t *datatype, ompi_status_public_t * status) @@ -217,7 +219,7 @@ mca_io_romio314_file_write_ordered (ompi_file_t *fh, int mca_io_romio314_file_write_at_all_begin (ompi_file_t *fh, MPI_Offset offset, - void *buf, + const void *buf, int count, struct ompi_datatype_t *datatype) { @@ -235,7 +237,7 @@ mca_io_romio314_file_write_at_all_begin (ompi_file_t *fh, int mca_io_romio314_file_write_at_all_end (ompi_file_t *fh, - void *buf, + const void *buf, ompi_status_public_t * status) { int ret; @@ -252,7 +254,7 @@ mca_io_romio314_file_write_at_all_end (ompi_file_t *fh, int mca_io_romio314_file_write_all_begin (ompi_file_t *fh, - void *buf, + const void *buf, int count, struct ompi_datatype_t *datatype) { @@ -270,7 +272,7 @@ mca_io_romio314_file_write_all_begin (ompi_file_t *fh, int mca_io_romio314_file_write_all_end (ompi_file_t *fh, - void *buf, + const void *buf, ompi_status_public_t * status) { int ret; @@ -286,7 +288,7 @@ mca_io_romio314_file_write_all_end (ompi_file_t *fh, int mca_io_romio314_file_write_ordered_begin (ompi_file_t *fh, - void *buf, + const void *buf, int count, struct ompi_datatype_t *datatype) { @@ -304,7 +306,7 @@ mca_io_romio314_file_write_ordered_begin (ompi_file_t *fh, int mca_io_romio314_file_write_ordered_end (ompi_file_t *fh, - void *buf, + const void *buf, ompi_status_public_t * status) { int ret; diff --git a/ompi/mca/io/romio314/src/io_romio314_module.c b/ompi/mca/io/romio314/src/io_romio314_module.c index cb46168ae4c..3a40046cbdf 100644 --- a/ompi/mca/io/romio314/src/io_romio314_module.c +++ b/ompi/mca/io/romio314/src/io_romio314_module.c @@ -59,6 +59,8 @@ mca_io_base_module_2_0_0_t mca_io_romio314_module = { mca_io_romio314_file_write_at_all, mca_io_romio314_file_iread_at, mca_io_romio314_file_iwrite_at, + NULL, /* iread_at_all */ + NULL, /* iwrite_at_all */ /* non-indexed IO operations */ mca_io_romio314_file_read, @@ -67,6 +69,8 @@ mca_io_base_module_2_0_0_t mca_io_romio314_module = { mca_io_romio314_file_write_all, mca_io_romio314_file_iread, mca_io_romio314_file_iwrite, + NULL, /* iread_all */ + NULL, /* iwrite_all */ mca_io_romio314_file_seek, mca_io_romio314_file_get_position, diff --git a/ompi/mca/mca.h b/ompi/mca/mca.h index d297c453eac..f9e4ea341a4 100644 --- a/ompi/mca/mca.h +++ b/ompi/mca/mca.h @@ -6,7 +6,7 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. @@ -14,13 +14,13 @@ * Copyright (c) 2015 Los Alamos National Security, LLC. All rights * reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ -/** - * @file +/** + * @file * * Top-level interface for \em all orte MCA components. */ diff --git a/ompi/mca/mtl/Makefile.am b/ompi/mca/mtl/Makefile.am index 5a9ac3fe577..310261b8f5f 100644 --- a/ompi/mca/mtl/Makefile.am +++ b/ompi/mca/mtl/Makefile.am @@ -5,15 +5,15 @@ # Copyright (c) 2004-2005 The University of Tennessee and The University # of Tennessee Research Foundation. All rights # reserved. -# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, # University of Stuttgart. All rights reserved. # Copyright (c) 2004-2006 The Regents of the University of California. # All rights reserved. # Copyright (c) 2010 Cisco Systems, Inc. All rights reserved. # $COPYRIGHT$ -# +# # Additional copyrights may follow -# +# # $HEADER$ # diff --git a/ompi/mca/mtl/base/Makefile.am b/ompi/mca/mtl/base/Makefile.am index 73bfda0c6c7..fb63092ea20 100644 --- a/ompi/mca/mtl/base/Makefile.am +++ b/ompi/mca/mtl/base/Makefile.am @@ -5,14 +5,14 @@ # Copyright (c) 2004-2005 The University of Tennessee and The University # of Tennessee Research Foundation. All rights # reserved. -# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, # University of Stuttgart. All rights reserved. # Copyright (c) 2004-2006 The Regents of the University of California. # All rights reserved. # $COPYRIGHT$ -# +# # Additional copyrights may follow -# +# # $HEADER$ # diff --git a/ompi/mca/mtl/base/base.h b/ompi/mca/mtl/base/base.h index 4f553711104..37db84c9fd8 100644 --- a/ompi/mca/mtl/base/base.h +++ b/ompi/mca/mtl/base/base.h @@ -1,3 +1,4 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ /* * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana * University Research and Technology @@ -5,21 +6,24 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2006 The Regents of the University of California. * All rights reserved. + * Copyright (c) 2015 Los Alamos National Security, LLC. All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ #ifndef MCA_MTL_BASE_H #define MCA_MTL_BASE_H + #include "ompi_config.h" +#include "opal/mca/base/mca_base_framework.h" #include "ompi/mca/mca.h" #include "ompi/mca/mtl/mtl.h" @@ -32,9 +36,10 @@ BEGIN_C_DECLS OMPI_DECLSPEC extern mca_mtl_base_component_t* ompi_mtl_base_selected_component; - + OMPI_DECLSPEC int ompi_mtl_base_select(bool enable_progress_threads, - bool enable_mpi_threads); + bool enable_mpi_threads, + int *priority); OMPI_DECLSPEC extern mca_base_framework_t ompi_mtl_base_framework; diff --git a/ompi/mca/mtl/base/mtl_base_datatype.h b/ompi/mca/mtl/base/mtl_base_datatype.h index 0d515e2b8ad..41559245745 100644 --- a/ompi/mca/mtl/base/mtl_base_datatype.h +++ b/ompi/mca/mtl/base/mtl_base_datatype.h @@ -5,14 +5,14 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2006 The Regents of the University of California. * All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -39,7 +39,7 @@ ompi_mtl_datatype_pack(struct opal_convertor_t *convertor, uint32_t iov_count = 1; #if !(OPAL_ENABLE_HETEROGENEOUS_SUPPORT) - if (convertor->pDesc && + if (convertor->pDesc && !(convertor->flags & CONVERTOR_COMPLETED) && opal_datatype_is_contiguous_memory_layout(convertor->pDesc, convertor->count)) { @@ -63,9 +63,9 @@ ompi_mtl_datatype_pack(struct opal_convertor_t *convertor, if (NULL == iov.iov_base) return OMPI_ERR_OUT_OF_RESOURCE; *freeAfter = true; } - + opal_convertor_pack( convertor, &iov, &iov_count, buffer_len ); - + *buffer = iov.iov_base; return OMPI_SUCCESS; @@ -89,7 +89,7 @@ ompi_mtl_datatype_recv_buf(struct opal_convertor_t *convertor, *buffer = malloc(*buffer_len); *free_on_error = true; } else { - *buffer = convertor->pBaseBuf + + *buffer = convertor->pBaseBuf + convertor->use_desc->desc[convertor->use_desc->used].end_loop.first_elem_disp; } return OMPI_SUCCESS; diff --git a/ompi/mca/mtl/base/mtl_base_frame.c b/ompi/mca/mtl/base/mtl_base_frame.c index d49a8e5c508..ea5784304a6 100644 --- a/ompi/mca/mtl/base/mtl_base_frame.c +++ b/ompi/mca/mtl/base/mtl_base_frame.c @@ -1,3 +1,4 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ /* * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana * University Research and Technology @@ -5,16 +6,16 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2006 The Regents of the University of California. * All rights reserved. * Copyright (c) 2015 Los Alamos National Security, LLC. All rights * reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -48,12 +49,14 @@ mca_mtl_base_module_t *ompi_mtl = NULL; * need to reexamine this at a later time. */ int -ompi_mtl_base_select(bool enable_progress_threads, - bool enable_mpi_threads) +ompi_mtl_base_select (bool enable_progress_threads, + bool enable_mpi_threads, + int *priority) { int ret = OMPI_ERR_NOT_FOUND; mca_mtl_base_component_t *best_component = NULL; mca_mtl_base_module_t *best_module = NULL; + int best_priority; /* * Select the best component @@ -61,12 +64,13 @@ ompi_mtl_base_select(bool enable_progress_threads, if( OPAL_SUCCESS != mca_base_select("mtl", ompi_mtl_base_framework.framework_output, &ompi_mtl_base_framework.framework_components, (mca_base_module_t **) &best_module, - (mca_base_component_t **) &best_component) ) { + (mca_base_component_t **) &best_component, + &best_priority) ) { /* notify caller that no available component found */ return ret; } - opal_output_verbose( 10, ompi_mtl_base_framework.framework_output, + opal_output_verbose( 10, ompi_mtl_base_framework.framework_output, "select: initializing %s component %s", best_component->mtl_version.mca_type_name, best_component->mtl_version.mca_component_name ); @@ -81,15 +85,16 @@ ompi_mtl_base_select(bool enable_progress_threads, "select: init returned success"); ompi_mtl_base_selected_component = best_component; ompi_mtl = best_module; + *priority = best_priority; ret = OMPI_SUCCESS; } /* All done */ if (NULL == ompi_mtl) { - opal_output_verbose( 10, ompi_mtl_base_framework.framework_output, + opal_output_verbose( 10, ompi_mtl_base_framework.framework_output, "select: no component selected"); } else { - opal_output_verbose( 10, ompi_mtl_base_framework.framework_output, + opal_output_verbose( 10, ompi_mtl_base_framework.framework_output, "select: component %s selected", ompi_mtl_base_selected_component-> mtl_version.mca_component_name ); diff --git a/ompi/mca/mtl/configure.m4 b/ompi/mca/mtl/configure.m4 index b3ae21dcde1..3ba838b4ae2 100644 --- a/ompi/mca/mtl/configure.m4 +++ b/ompi/mca/mtl/configure.m4 @@ -3,9 +3,9 @@ # Copyright (c) 2013 Sandia National Laboratories. All rights reserved. # # $COPYRIGHT$ -# +# # Additional copyrights may follow -# +# # $HEADER$ # diff --git a/ompi/mca/mtl/mtl.h b/ompi/mca/mtl/mtl.h index 5a0c0cb33eb..f703250b4e7 100644 --- a/ompi/mca/mtl/mtl.h +++ b/ompi/mca/mtl/mtl.h @@ -6,9 +6,9 @@ * Copyright (c) 2015 Los Alamos National Security, LLC. All rights * reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -48,7 +48,7 @@ struct ompi_request_t; struct opal_convertor_t; struct mca_mtl_base_module_t; - + struct mca_mtl_request_t { /** pointer to associated ompi_request_t */ struct ompi_request_t *ompi_req; @@ -56,6 +56,12 @@ struct mca_mtl_request_t { }; typedef struct mca_mtl_request_t mca_mtl_request_t; + +/** + * MTL module flags + */ +#define MCA_MTL_BASE_FLAG_REQUIRE_WORLD 0x00000001 + /** * Initialization routine for MTL component * @@ -75,18 +81,18 @@ typedef struct mca_mtl_request_t mca_mtl_request_t; * user and the component must be capable of coping * with threads. If the component can cope with * MPI_THREAD_MULTIPLE, enable_mpi_thread_multiple - * should be set to true. Otherwise, it is assumed + * should be set to true. Otherwise, it is assumed * that only THREAD_FUNNELLED and THREAD_SERIALIZED * can be used. * @param enable_mpi_thread_multiple (OUT) Component does / does not * support MPI_THREAD_MULTIPLE. This variable only - * needs to be set if enable_mpi_threads is true. + * needs to be set if enable_mpi_threads is true. * Otherwise, the return value will be ignored. * * @retval NULL component can not operate on the current machine * @retval non-NULL component interface function */ -typedef struct mca_mtl_base_module_t* +typedef struct mca_mtl_base_module_t* (*mca_mtl_base_component_init_fn_t)(bool enable_progress_threads, bool enable_mpi_threads); @@ -101,25 +107,25 @@ typedef struct mca_mtl_base_component_2_0_0_t mca_mtl_base_component_t; /** - * MCA->MTL Clean up any resources held by MTL module - * + * MCA->MTL Clean up any resources held by MTL module + * * Opposite of module_init. Called when communication will no longer * be necessary. ussually this is during MPI_FINALIZE, but it can be * earlier if the component was not selected to run. Assuming * module_init was called, finalize will always be called before the * component_close function is called. - * + * * @param mtl (IN) MTL module returned from call to initialize * * @retval OMPI_SUCCESS cleanup finished successfully * @retval other failure during cleanup - * + * */ typedef int (*mca_mtl_base_module_finalize_fn_t)(struct mca_mtl_base_module_t* mtl); /** - * PML->MTL notification of change in the process list. + * PML->MTL notification of change in the process list. * * The mca_mtl_base_module_add_procs_fn_t() is used by the PML to * notify the MTL that new processes are connected to the current @@ -144,7 +150,7 @@ typedef int (*mca_mtl_base_module_finalize_fn_t)(struct mca_mtl_base_module_t* m * @retval other failure during setup */ typedef int (*mca_mtl_base_module_add_procs_fn_t)( - struct mca_mtl_base_module_t* mtl, + struct mca_mtl_base_module_t* mtl, size_t nprocs, struct ompi_proc_t** procs); @@ -166,7 +172,7 @@ typedef int (*mca_mtl_base_module_add_procs_fn_t)( * @return Status indicating if cleanup was successful */ typedef int (*mca_mtl_base_module_del_procs_fn_t)( - struct mca_mtl_base_module_t* mtl, + struct mca_mtl_base_module_t* mtl, size_t nprocs, struct ompi_proc_t** procs); @@ -185,7 +191,7 @@ typedef int (*mca_mtl_base_module_del_procs_fn_t)( * @param comm (IN) Communicator used for operation * @param dest (IN) Destination rank for send (relative to comm) * @param tag (IN) MPI tag used for sending. See note below. - * @param convertor (IN) Datatype convertor describing send datatype. + * @param convertor (IN) Datatype convertor describing send datatype. * Already prepared for send. * @param mode (IN) Mode for send operation * @@ -202,7 +208,7 @@ typedef int (*mca_mtl_base_module_del_procs_fn_t)( * if a negative tag is used. */ typedef int (*mca_mtl_base_module_send_fn_t)( - struct mca_mtl_base_module_t* mtl, + struct mca_mtl_base_module_t* mtl, struct ompi_communicator_t *comm, int dest, int tag, @@ -229,11 +235,11 @@ typedef int (*mca_mtl_base_module_send_fn_t)( * @param comm (IN) Communicator used for operation * @param dest (IN) Destination rank for send (relative to comm) * @param tag (IN) MPI tag used for sending. See note below. - * @param convertor (IN) Datatype convertor describing send datatype. + * @param convertor (IN) Datatype convertor describing send datatype. * Already prepared for send. * @param mode (IN) Mode for send operation (see pml.h) - * @param blocking (IN) True if the call originated from a blocking - * call, but the PML decided to use a + * @param blocking (IN) True if the call originated from a blocking + * call, but the PML decided to use a * non-blocking operation, likely for * internal performance decisions This is an * optimization flag and is not needed for @@ -250,7 +256,7 @@ typedef int (*mca_mtl_base_module_send_fn_t)( * if a negative tag is used. */ typedef int (*mca_mtl_base_module_isend_fn_t)( - struct mca_mtl_base_module_t* mtl, + struct mca_mtl_base_module_t* mtl, struct ompi_communicator_t *comm, int dest, int tag, @@ -277,7 +283,7 @@ typedef int (*mca_mtl_base_module_isend_fn_t)( * @param comm (IN) Communicator used for operation * @param src (IN) Source rank for send (relative to comm) * @param tag (IN) MPI tag used for sending. See note below. - * @param convertor (IN) Datatype convertor describing receive datatype. + * @param convertor (IN) Datatype convertor describing receive datatype. * Already prepared for receive. * @param mtl_request (IN) Pointer to mtl_request. The ompi_req field * will be populated with an initialized @@ -310,7 +316,7 @@ typedef int (*mca_mtl_base_module_irecv_fn_t)( * @param src (IN) Source rank for send (relative to comm) * @param tag (IN) MPI tag used for sending. See note below. * @param flag (OUT) true if message available, false otherwise - * @param status (OUT) Status structure for information on + * @param status (OUT) Status structure for information on * available message * * \note While MPI does not allow users to specify negative tags, they @@ -320,7 +326,7 @@ typedef int (*mca_mtl_base_module_irecv_fn_t)( * against negative tags. */ typedef int (*mca_mtl_base_module_iprobe_fn_t)( - struct mca_mtl_base_module_t* mtl, + struct mca_mtl_base_module_t* mtl, struct ompi_communicator_t *comm, int src, int tag, @@ -357,7 +363,7 @@ typedef int (*mca_mtl_base_module_improbe_fn_t)(struct mca_mtl_base_module_t *mt * */ typedef int (*mca_mtl_base_module_cancel_fn_t)( - struct mca_mtl_base_module_t* mtl, + struct mca_mtl_base_module_t* mtl, mca_mtl_request_t *mtl_request, int flag); diff --git a/ompi/mca/mtl/mxm/help-mtl-mxm.txt b/ompi/mca/mtl/mxm/help-mtl-mxm.txt index 99e086c6817..32a06782c62 100644 --- a/ompi/mca/mtl/mxm/help-mtl-mxm.txt +++ b/ompi/mca/mtl/mxm/help-mtl-mxm.txt @@ -16,18 +16,18 @@ the environment). [unable to create endpoint] MXM was unable to create an endpoint. Please make sure that the network link is -active on the node and the hardware is functioning. +active on the node and the hardware is functioning. Error: %s [unable to extract endpoint ptl address] -MXM was unable to read settings for endpoint +MXM was unable to read settings for endpoint PTL ID: %d Error: %s [unable to extract endpoint address] -MXM was unable to read settings for endpoint +MXM was unable to read settings for endpoint Error: %s diff --git a/ompi/mca/mtl/mxm/mtl_mxm.c b/ompi/mca/mtl/mxm/mtl_mxm.c index fc7155822c7..9db70a0d7b7 100644 --- a/ompi/mca/mtl/mxm/mtl_mxm.c +++ b/ompi/mca/mtl/mxm/mtl_mxm.c @@ -1,7 +1,7 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ /* * Copyright (C) 2001-2011 Mellanox Technologies Ltd. ALL RIGHTS RESERVED. - * Copyright (c) 2013-2014 Intel, Inc. All rights reserved + * Copyright (c) 2013-2015 Intel, Inc. All rights reserved * Copyright (c) 2014 Research Organization for Information Science * and Technology (RIST). All rights reserved. * Copyright (c) 2015 Los Alamos National Security, LLC. All rights @@ -177,7 +177,7 @@ static int ompi_mtl_mxm_send_ep_address(void *address, size_t address_len) /* Send address length */ sprintf(modex_name, "%s-len", modex_component_name); - OPAL_MODEX_SEND_STRING(rc, PMIX_SYNC_REQD, PMIX_GLOBAL, + OPAL_MODEX_SEND_STRING(rc, OPAL_PMIX_GLOBAL, modex_name, &address_len, sizeof(address_len)); if (OMPI_SUCCESS != rc) { MXM_ERROR("failed to send address length"); @@ -192,7 +192,7 @@ static int ompi_mtl_mxm_send_ep_address(void *address, size_t address_len) while (modex_buf_size) { sprintf(modex_name, "%s-%d", modex_component_name, modex_name_id); modex_cur_size = (modex_buf_size < modex_max_size) ? modex_buf_size : modex_max_size; - OPAL_MODEX_SEND_STRING(rc, PMIX_SYNC_REQD, PMIX_GLOBAL, + OPAL_MODEX_SEND_STRING(rc, OPAL_PMIX_GLOBAL, modex_name, modex_buf_ptr, modex_cur_size); if (OMPI_SUCCESS != rc) { MXM_ERROR("Open MPI couldn't distribute EP connection details"); @@ -232,7 +232,7 @@ static int ompi_mtl_mxm_recv_ep_address(ompi_proc_t *source_proc, void **address /* Receive address length */ sprintf(modex_name, "%s-len", modex_component_name); - OPAL_MODEX_RECV_STRING(rc, modex_name, &source_proc->super, + OPAL_MODEX_RECV_STRING(rc, modex_name, &source_proc->super.proc_name, (char**)&address_len_buf_ptr, &modex_cur_size); if (OMPI_SUCCESS != rc) { @@ -253,7 +253,7 @@ static int ompi_mtl_mxm_recv_ep_address(ompi_proc_t *source_proc, void **address modex_buf_size = 0; while (modex_buf_size < *address_len_p) { sprintf(modex_name, "%s-%d", modex_component_name, modex_name_id); - OPAL_MODEX_RECV_STRING(rc, modex_name, &source_proc->super, + OPAL_MODEX_RECV_STRING(rc, modex_name, &source_proc->super.proc_name, (char**)&modex_buf_ptr, &modex_cur_size); if (OMPI_SUCCESS != rc) { @@ -304,27 +304,30 @@ int ompi_mtl_mxm_module_init(void) } #endif - if (NULL == (procs = ompi_proc_world(&totps))) { - MXM_ERROR("Unable to obtain process list"); - return OMPI_ERROR; - } + totps = ompi_proc_world_size (); if (totps < (size_t)ompi_mtl_mxm.mxm_np) { MXM_VERBOSE(1, "MXM support will be disabled because of total number " "of processes (%lu) is less than the minimum set by the " "mtl_mxm_np MCA parameter (%u)", totps, ompi_mtl_mxm.mxm_np); - free(procs); return OMPI_ERR_NOT_SUPPORTED; } MXM_VERBOSE(1, "MXM support enabled"); if (ORTE_NODE_RANK_INVALID == (lr = ompi_process_info.my_node_rank)) { MXM_ERROR("Unable to obtain local node rank"); - free(procs); return OMPI_ERROR; } nlps = ompi_process_info.num_local_peers + 1; + /* local procs are always allocated. if that ever changes this will need to + * be modified. */ + procs = ompi_proc_get_allocated (&totps); + if (NULL == procs) { + MXM_ERROR("Unable to obtain process list"); + return OMPI_ERROR; + } + for (proc = 0; proc < totps; proc++) { if (OPAL_PROC_ON_LOCAL_NODE(procs[proc]->super.proc_flags)) { mxlr = max(mxlr, procs[proc]->super.proc_name.vpid); @@ -386,6 +389,9 @@ int ompi_mtl_mxm_module_init(void) /* Register the MXM progress function */ opal_progress_register(ompi_mtl_mxm_progress); + + ompi_mtl_mxm.super.mtl_flags |= MCA_MTL_BASE_FLAG_REQUIRE_WORLD; + #if MXM_API >= MXM_VERSION(2,0) if (ompi_mtl_mxm.using_mem_hooks) { @@ -595,14 +601,8 @@ int ompi_mtl_mxm_del_procs(struct mca_mtl_base_module_t *mtl, size_t nprocs, size_t i; #if MXM_API >= MXM_VERSION(3,1) - if (ompi_mtl_mxm.bulk_disconnect) { - size_t nprocs_world; - ompi_proc_t **procs; - procs = ompi_proc_world(&nprocs_world); - if (nprocs == nprocs_world) { - mxm_ep_powerdown(ompi_mtl_mxm.ep); - } - free(procs); + if (ompi_mtl_mxm.bulk_disconnect && nprocs == ompi_proc_world_size ()) { + mxm_ep_powerdown(ompi_mtl_mxm.ep); } #endif diff --git a/ompi/mca/mtl/mxm/mtl_mxm_component.c b/ompi/mca/mtl/mxm/mtl_mxm_component.c index 9b5e9b66179..dba2bd045ef 100644 --- a/ompi/mca/mtl/mxm/mtl_mxm_component.c +++ b/ompi/mca/mtl/mxm/mtl_mxm_component.c @@ -16,6 +16,7 @@ #include "opal/util/show_help.h" #include "ompi/proc/proc.h" #include "opal/memoryhooks/memory.h" +#include "opal/mca/memory/base/base.h" #include "ompi/runtime/mpiruntime.h" #include "mtl_mxm.h" @@ -130,7 +131,8 @@ static int ompi_mtl_mxm_component_register(void) free(runtime_version); #endif - param_priority = 100; + /* set high enought to defeat ob1's default */ + param_priority = 30; (void) mca_base_component_var_register (c, "priority", "Priority of the MXM MTL component", MCA_BASE_VAR_TYPE_INT, NULL, 0, 0, @@ -191,14 +193,15 @@ static int ompi_mtl_mxm_component_open(void) cur_ver = mxm_get_version(); if (cur_ver != MXM_API) { MXM_VERBOSE(1, - "WARNING: OMPI was compiled with MXM version %d.%d but version %ld.%ld detected.", + "WARNING: OMPI was compiled with MXM version %d.%d but version %ld.%ld detected.", MXM_VERNO_MAJOR, - MXM_VERNO_MINOR, + MXM_VERNO_MINOR, (cur_ver >> MXM_MAJOR_BIT) & 0xff, (cur_ver >> MXM_MINOR_BIT) & 0xff); } #if MXM_API >= MXM_VERSION(2,0) + (void)mca_base_framework_open(&opal_memory_base_framework, 0); /* Register memory hooks */ if ((OPAL_MEMORY_FREE_SUPPORT | OPAL_MEMORY_MUNMAP_SUPPORT) == ((OPAL_MEMORY_FREE_SUPPORT | OPAL_MEMORY_MUNMAP_SUPPORT) & @@ -280,6 +283,7 @@ static int ompi_mtl_mxm_component_close(void) #if MXM_API >= MXM_VERSION(2,0) mxm_config_free_ep_opts(ompi_mtl_mxm.mxm_ep_opts); mxm_config_free_context_opts(ompi_mtl_mxm.mxm_ctx_opts); + mca_base_framework_close(&opal_memory_base_framework); #else mxm_config_free(ompi_mtl_mxm.mxm_ep_opts); mxm_config_free(ompi_mtl_mxm.mxm_ctx_opts); diff --git a/ompi/mca/mtl/mxm/mtl_mxm_endpoint.c b/ompi/mca/mtl/mxm/mtl_mxm_endpoint.c index cb8df9d0221..6fe543a0002 100644 --- a/ompi/mca/mtl/mxm/mtl_mxm_endpoint.c +++ b/ompi/mca/mtl/mxm/mtl_mxm_endpoint.c @@ -14,7 +14,7 @@ #include "mtl_mxm.h" #include "mtl_mxm_types.h" -#include "mtl_mxm_endpoint.h" +#include "mtl_mxm_endpoint.h" /* * Initialize state of the endpoint instance. diff --git a/ompi/mca/mtl/mxm/mtl_mxm_send.c b/ompi/mca/mtl/mxm/mtl_mxm_send.c index 9d72621c219..0f5c0c9b42f 100644 --- a/ompi/mca/mtl/mxm/mtl_mxm_send.c +++ b/ompi/mca/mtl/mxm/mtl_mxm_send.c @@ -16,7 +16,7 @@ #include "mtl_mxm_request.h" #include "ompi/mca/mtl/base/mtl_base_datatype.h" -static inline __opal_attribute_always_inline__ +static inline __opal_attribute_always_inline__ size_t ompi_mtl_mxm_stream_pack(opal_convertor_t *convertor, void *buffer, size_t length, size_t offset) { @@ -58,7 +58,7 @@ static inline __opal_attribute_always_inline__ int size_t *buffer_len = &mxm_send_req->base.data.buffer.length; #if !(OPAL_ENABLE_HETEROGENEOUS_SUPPORT) - if (convertor->pDesc && + if (convertor->pDesc && opal_datatype_is_contiguous_memory_layout(convertor->pDesc, convertor->count)) { mxm_send_req->base.data.buffer.ptr = convertor->pBaseBuf; @@ -211,7 +211,7 @@ int ompi_mtl_mxm_isend(struct mca_mtl_base_module_t* mtl, mxm_send_req->opcode = MXM_REQ_OP_SEND; if (mode == MCA_PML_BASE_SEND_SYNCHRONOUS) { mxm_send_req->base.flags |= MXM_REQ_FLAG_SEND_SYNC; - } + } #else #if defined(MXM_REQ_SEND_FLAG_REENTRANT) mxm_send_req->flags = MXM_REQ_SEND_FLAG_REENTRANT; diff --git a/ompi/mca/mtl/ofi/Makefile.am b/ompi/mca/mtl/ofi/Makefile.am index 7312ba4fdf0..7f81b4545fa 100644 --- a/ompi/mca/mtl/ofi/Makefile.am +++ b/ompi/mca/mtl/ofi/Makefile.am @@ -11,7 +11,7 @@ EXTRA_DIST = post_configure.sh -AM_CPPFLAGS = $(ompi_mtl_ofi_CPPFLAGS) +AM_CPPFLAGS = $(ompi_mtl_ofi_CPPFLAGS) $(opal_common_libfabric_CPPFLAGS) dist_ompidata_DATA = help-mtl-ofi.txt @@ -43,8 +43,12 @@ mca_mtl_ofi_la_SOURCES = $(mtl_ofi_sources) mca_mtl_ofi_la_LDFLAGS = \ $(ompi_mtl_ofi_LDFLAGS) \ -module -avoid-version -mca_mtl_ofi_la_LIBADD = $(ompi_mtl_ofi_LIBS) +mca_mtl_ofi_la_LIBADD = $(ompi_mtl_ofi_LIBS) \ + $(OPAL_TOP_BUILDDIR)/opal/mca/common/libfabric/lib@OPAL_LIB_PREFIX@mca_common_libfabric.la noinst_LTLIBRARIES = $(component_noinst) libmca_mtl_ofi_la_SOURCES = $(mtl_ofi_sources) -libmca_mtl_ofi_la_LDFLAGS = -module -avoid-version +libmca_mtl_ofi_la_LDFLAGS = \ + $(ompi_mtl_ofi_LDFLAGS) \ + -module -avoid-version +libmca_mtl_ofi_la_LIBADD = $(ompi_mtl_ofi_LIBS) diff --git a/ompi/mca/mtl/ofi/configure.m4 b/ompi/mca/mtl/ofi/configure.m4 index 39bf83ad99a..627298dcda6 100644 --- a/ompi/mca/mtl/ofi/configure.m4 +++ b/ompi/mca/mtl/ofi/configure.m4 @@ -23,7 +23,10 @@ AC_DEFUN([MCA_ompi_mtl_ofi_POST_CONFIG], [ AC_DEFUN([MCA_ompi_mtl_ofi_CONFIG],[ AC_CONFIG_FILES([ompi/mca/mtl/ofi/Makefile]) - OPAL_CHECK_LIBFABRIC([ompi_mtl_ofi], + # ensure we already ran the common libfabric config + AC_REQUIRE([MCA_opal_common_libfabric_CONFIG]) + + AS_IF([test "$opal_common_libfabric_happy" = "yes"], [$1], [$2]) ])dnl diff --git a/ompi/mca/mtl/ofi/help-mtl-ofi.txt b/ompi/mca/mtl/ofi/help-mtl-ofi.txt index 84752d9d391..2338d548f01 100644 --- a/ompi/mca/mtl/ofi/help-mtl-ofi.txt +++ b/ompi/mca/mtl/ofi/help-mtl-ofi.txt @@ -1,6 +1,6 @@ # -*- text -*- # -# Copyright (c) 2013-2014 Intel, Inc. All rights reserved +# Copyright (c) 2013-2015 Intel, Inc. All rights reserved # # $COPYRIGHT$ # @@ -8,12 +8,3 @@ # # $HEADER$ # -[ofi init] -Initialization of OFI library failed. - - Error: %s -# -[debug level] -Unable to set OFI debug level. - - Error: %s diff --git a/ompi/mca/mtl/ofi/mtl_ofi.c b/ompi/mca/mtl/ofi/mtl_ofi.c index 1f4abb72ba8..ed6aae6bc44 100644 --- a/ompi/mca/mtl/ofi/mtl_ofi.c +++ b/ompi/mca/mtl/ofi/mtl_ofi.c @@ -110,6 +110,15 @@ ompi_mtl_ofi_add_procs(struct mca_mtl_base_module_t *mtl, */ for (i = 0; i < nprocs; ++i) { endpoint = OBJ_NEW(mca_mtl_ofi_endpoint_t); + if (NULL == endpoint) { + opal_output_verbose(1, ompi_mtl_base_framework.framework_output, + "%s:%d: mtl/ofi: could not allocate endpoint" + " structure\n", + __FILE__, __LINE__); + ret = OMPI_ERROR; + goto bail; + } + endpoint->mtl_ofi_module = &ompi_mtl_ofi; endpoint->peer_fiaddr = fi_addrs[i]; diff --git a/ompi/mca/mtl/ofi/mtl_ofi.h b/ompi/mca/mtl/ofi/mtl_ofi.h index 48cdee2747a..d5cd1f8ace7 100644 --- a/ompi/mca/mtl/ofi/mtl_ofi.h +++ b/ompi/mca/mtl/ofi/mtl_ofi.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2013-2015 Intel, Inc. All rights reserved + * Copyright (c) 2013-2016 Intel, Inc. All rights reserved * * $COPYRIGHT$ * @@ -33,29 +33,35 @@ #include "ompi/mca/mtl/base/mtl_base_datatype.h" #include "ompi/message/message.h" -#include "mtl_ofi.h" #include "mtl_ofi_types.h" #include "mtl_ofi_request.h" #include "mtl_ofi_endpoint.h" #include "mtl_ofi_compat.h" +#define MTL_OFI_RETRY_UNTIL_DONE(FUNC) \ + do { \ + do { \ + ret = FUNC; \ + if(OPAL_LIKELY(0 == ret)) {break;} \ + } while(-FI_EAGAIN == ret); \ + } while(0); + BEGIN_C_DECLS extern mca_mtl_ofi_module_t ompi_mtl_ofi; extern mca_base_framework_t ompi_mtl_base_framework; -extern int ompi_mtl_ofi_add_procs(struct mca_mtl_base_module_t *mtl, - size_t nprocs, - struct ompi_proc_t **procs); - extern int ompi_mtl_ofi_del_procs(struct mca_mtl_base_module_t *mtl, size_t nprocs, struct ompi_proc_t **procs); +int ompi_mtl_ofi_progress_no_inline(void); + __opal_attribute_always_inline__ static inline int ompi_mtl_ofi_progress(void) { - int ret, count = 0; + ssize_t ret; + int count = 0; struct fi_cq_tagged_entry wc = { 0 }; struct fi_cq_err_entry error = { 0 }; ompi_mtl_ofi_request_t *ofi_req = NULL; @@ -75,7 +81,7 @@ ompi_mtl_ofi_progress(void) ret = ofi_req->event_callback(&wc, ofi_req); if (OMPI_SUCCESS != ret) { opal_output(ompi_mtl_base_framework.framework_output, - "Error returned by request event callback: %d", + "Error returned by request event callback: %zd", ret); abort(); } @@ -88,9 +94,10 @@ ompi_mtl_ofi_progress(void) ret = fi_cq_readerr(ompi_mtl_ofi.cq, &error, 0); - if (ret) { + if (0 > ret) { opal_output(ompi_mtl_base_framework.framework_output, - "Error returned from fi_cq_readerr: %d", ret); + "Error returned from fi_cq_readerr: %zd", ret); + abort(); } assert(error.op_context); @@ -99,7 +106,7 @@ ompi_mtl_ofi_progress(void) ret = ofi_req->error_callback(&error, ofi_req); if (OMPI_SUCCESS != ret) { opal_output(ompi_mtl_base_framework.framework_output, - "Error returned by request error callback: %d", + "Error returned by request error callback: %zd", ret); abort(); } @@ -115,42 +122,7 @@ ompi_mtl_ofi_progress(void) /* MTL interface functions */ -__opal_attribute_always_inline__ static inline int -ompi_mtl_ofi_finalize(struct mca_mtl_base_module_t *mtl) -{ - opal_progress_unregister(ompi_mtl_ofi_progress); - - /** - * Close all the OFI objects - */ - if (fi_close((fid_t)ompi_mtl_ofi.ep)) { - opal_output(ompi_mtl_base_framework.framework_output, - "fi_close failed: %s", strerror(errno)); - abort(); - } - if (fi_close((fid_t)ompi_mtl_ofi.cq)) { - opal_output(ompi_mtl_base_framework.framework_output, - "fi_close failed: %s", strerror(errno)); - abort(); - } - if (fi_close((fid_t)ompi_mtl_ofi.av)) { - opal_output(ompi_mtl_base_framework.framework_output, - "fi_close failed: %s", strerror(errno)); - abort(); - } - if (fi_close((fid_t)ompi_mtl_ofi.domain)) { - opal_output(ompi_mtl_base_framework.framework_output, - "fi_close failed: %s", strerror(errno)); - abort(); - } - if (fi_close((fid_t)ompi_mtl_ofi.fabric)) { - opal_output(ompi_mtl_base_framework.framework_output, - "fi_close failed: %s", strerror(errno)); - abort(); - } - - return OMPI_SUCCESS; -} +int ompi_mtl_ofi_finalize(struct mca_mtl_base_module_t *mtl); __opal_attribute_always_inline__ static inline int ompi_mtl_ofi_get_error(int error_num) @@ -182,7 +154,7 @@ ompi_mtl_ofi_send_error_callback(struct fi_cq_err_entry *error, ompi_mtl_ofi_request_t *ofi_req) { switch(error->err) { - case FI_EMSGSIZE: + case FI_ETRUNC: ofi_req->status.MPI_ERROR = MPI_ERR_TRUNCATE; break; default: @@ -249,10 +221,10 @@ ompi_mtl_ofi_send_start(struct mca_mtl_base_module_t *mtl, mca_pml_base_send_mode_t mode, ompi_mtl_ofi_request_t *ofi_req) { - int ret; + int ompi_ret; void *start; size_t length; - ssize_t ret_length; + ssize_t ret; bool free_after; uint64_t match_bits; ompi_proc_t *ompi_proc = NULL; @@ -260,10 +232,10 @@ ompi_mtl_ofi_send_start(struct mca_mtl_base_module_t *mtl, ompi_mtl_ofi_request_t *ack_req = NULL; /* For synchronous send */ ompi_proc = ompi_comm_peer_lookup(comm, dest); - endpoint = ompi_proc->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_MTL]; + endpoint = ompi_mtl_ofi_get_endpoint(mtl, ompi_proc); - ret = ompi_mtl_datatype_pack(convertor, &start, &length, &free_after); - if (OMPI_SUCCESS != ret) return ret; + ompi_ret = ompi_mtl_datatype_pack(convertor, &start, &length, &free_after); + if (OMPI_SUCCESS != ompi_ret) return ompi_ret; ofi_req->buffer = (free_after) ? start : NULL; ofi_req->length = length; @@ -279,19 +251,19 @@ ompi_mtl_ofi_send_start(struct mca_mtl_base_module_t *mtl, ofi_req->completion_count = 2; MTL_OFI_SET_SEND_BITS(match_bits, comm->c_contextid, comm->c_my_rank, tag, MTL_OFI_SYNC_SEND); - ret_length = fi_trecv(ompi_mtl_ofi.ep, - NULL, - 0, - NULL, - endpoint->peer_fiaddr, - match_bits | MTL_OFI_SYNC_SEND_ACK, - 0, /* Exact match, no ignore bits */ - (void *) &ack_req->ctx); - if (OPAL_UNLIKELY(ret_length < 0)) { + MTL_OFI_RETRY_UNTIL_DONE(fi_trecv(ompi_mtl_ofi.ep, + NULL, + 0, + NULL, + endpoint->peer_fiaddr, + match_bits | MTL_OFI_SYNC_SEND_ACK, + 0, /* Exact match, no ignore bits */ + (void *) &ack_req->ctx)); + if (OPAL_UNLIKELY(0 > ret)) { opal_output_verbose(1, ompi_mtl_base_framework.framework_output, "%s:%d: fi_trecv failed: %s(%zd)", - __FILE__, __LINE__, - strerror(errno), ret_length); + __FILE__, __LINE__, fi_strerror(-ret), ret); + free(ack_req); return ompi_mtl_ofi_get_error(ret); } } else { @@ -301,32 +273,35 @@ ompi_mtl_ofi_send_start(struct mca_mtl_base_module_t *mtl, } if (ompi_mtl_ofi.max_inject_size >= length) { - ret_length = fi_tinject(ompi_mtl_ofi.ep, - start, - length, - endpoint->peer_fiaddr, - match_bits); - if (OPAL_UNLIKELY(0 > ret_length)) { + MTL_OFI_RETRY_UNTIL_DONE(fi_tinject(ompi_mtl_ofi.ep, + start, + length, + endpoint->peer_fiaddr, + match_bits)); + if (OPAL_UNLIKELY(0 > ret)) { opal_output_verbose(1, ompi_mtl_base_framework.framework_output, - "%s:%d: fi_tinject failed: %zd", - __FILE__, __LINE__, ret_length); + "%s:%d: fi_tinject failed: %s(%zd)", + __FILE__, __LINE__, fi_strerror(-ret), ret); + if (ack_req) { + fi_cancel((fid_t)ompi_mtl_ofi.ep, &ack_req->ctx); + free(ack_req); + } return ompi_mtl_ofi_get_error(ret); } ofi_req->event_callback(NULL,ofi_req); } else { - ret_length = fi_tsend(ompi_mtl_ofi.ep, - start, - length, - NULL, - endpoint->peer_fiaddr, - match_bits, - (void *) &ofi_req->ctx); - - if (OPAL_UNLIKELY(0 > ret_length)) { + MTL_OFI_RETRY_UNTIL_DONE(fi_tsend(ompi_mtl_ofi.ep, + start, + length, + NULL, + endpoint->peer_fiaddr, + match_bits, + (void *) &ofi_req->ctx)); + if (OPAL_UNLIKELY(0 > ret)) { opal_output_verbose(1, ompi_mtl_base_framework.framework_output, - "%s:%d: fi_tsend failed: %zd", - __FILE__, __LINE__, ret_length); + "%s:%d: fi_tsend failed: %s(%zd)", + __FILE__, __LINE__, fi_strerror(-ret), ret); return ompi_mtl_ofi_get_error(ret); } } @@ -422,8 +397,8 @@ __opal_attribute_always_inline__ static inline int ompi_mtl_ofi_recv_callback(struct fi_cq_tagged_entry *wc, ompi_mtl_ofi_request_t *ofi_req) { - int ret; - ssize_t ret_length; + int ompi_ret; + ssize_t ret; ompi_proc_t *ompi_proc = NULL; mca_mtl_ofi_endpoint_t *endpoint = NULL; int src; @@ -453,14 +428,14 @@ ompi_mtl_ofi_recv_callback(struct fi_cq_tagged_entry *wc, * Unpack data into recv buffer if necessary. */ if (OPAL_UNLIKELY(ofi_req->buffer)) { - ret = ompi_mtl_datatype_unpack(ofi_req->convertor, - ofi_req->buffer, - wc->len); - if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) { + ompi_ret = ompi_mtl_datatype_unpack(ofi_req->convertor, + ofi_req->buffer, + wc->len); + if (OPAL_UNLIKELY(OMPI_SUCCESS != ompi_ret)) { opal_output_verbose(1, ompi_mtl_base_framework.framework_output, "%s:%d: ompi_mtl_datatype_unpack failed: %d", - __FILE__, __LINE__, ret); - status->MPI_ERROR = ret; + __FILE__, __LINE__, ompi_ret); + status->MPI_ERROR = ompi_ret; } } @@ -484,24 +459,23 @@ ompi_mtl_ofi_recv_callback(struct fi_cq_tagged_entry *wc, * If the recv request was posted for any source, * we need to extract the source's actual address. */ - if (!ofi_req->remote_addr) { + if (ompi_mtl_ofi.any_addr == ofi_req->remote_addr) { src = MTL_OFI_GET_SOURCE(wc->tag); - ompi_proc = ompi_comm_peer_lookup(ofi_req->comm, src ); - endpoint = ompi_proc->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_MTL]; + ompi_proc = ompi_comm_peer_lookup(ofi_req->comm, src); + endpoint = ompi_mtl_ofi_get_endpoint(ofi_req->mtl, ompi_proc); ofi_req->remote_addr = endpoint->peer_fiaddr; } - ret_length = fi_tsend(ompi_mtl_ofi.ep, - NULL, - 0, - NULL, - ofi_req->remote_addr, - wc->tag | MTL_OFI_SYNC_SEND_ACK, - (void *) &ofi_req->ctx); - - if (OPAL_UNLIKELY(ret_length < 0)) { + MTL_OFI_RETRY_UNTIL_DONE(fi_tsend(ompi_mtl_ofi.ep, + NULL, + 0, + NULL, + ofi_req->remote_addr, + wc->tag | MTL_OFI_SYNC_SEND_ACK, + (void *) &ofi_req->ctx)); + if (OPAL_UNLIKELY(0 > ret)) { opal_output_verbose(1, ompi_mtl_base_framework.framework_output, - "%s:%d: fi_tsend failed: %zd", - __FILE__, __LINE__, ret_length); + "%s:%d: fi_tsend failed: %s(%zd)", + __FILE__, __LINE__, fi_strerror(-ret), ret); status->MPI_ERROR = OMPI_ERROR; } } else { @@ -524,11 +498,13 @@ ompi_mtl_ofi_recv_error_callback(struct fi_cq_err_entry *error, status->MPI_TAG = MTL_OFI_GET_TAG(ofi_req->match_bits); status->MPI_SOURCE = MTL_OFI_GET_SOURCE(ofi_req->match_bits); - /* FIXME: This could be done on a single line... */ switch (error->err) { - case FI_EMSGSIZE: + case FI_ETRUNC: status->MPI_ERROR = MPI_ERR_TRUNCATE; break; + case FI_ECANCELED: + status->_cancelled = true; + break; default: status->MPI_ERROR = MPI_ERR_INTERN; } @@ -545,8 +521,8 @@ ompi_mtl_ofi_irecv(struct mca_mtl_base_module_t *mtl, struct opal_convertor_t *convertor, mca_mtl_request_t *mtl_request) { - int ret = OMPI_SUCCESS; - ssize_t ret_length; + int ompi_ret = OMPI_SUCCESS; + ssize_t ret; uint64_t match_bits, mask_bits; fi_addr_t remote_addr; ompi_proc_t *ompi_proc = NULL; @@ -558,7 +534,7 @@ ompi_mtl_ofi_irecv(struct mca_mtl_base_module_t *mtl, if (MPI_ANY_SOURCE != src) { ompi_proc = ompi_comm_peer_lookup(comm, src); - endpoint = ompi_proc->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_MTL]; + endpoint = ompi_mtl_ofi_get_endpoint(mtl, ompi_proc); remote_addr = endpoint->peer_fiaddr; } else { remote_addr = ompi_mtl_ofi.any_addr; @@ -566,9 +542,12 @@ ompi_mtl_ofi_irecv(struct mca_mtl_base_module_t *mtl, MTL_OFI_SET_RECV_BITS(match_bits, mask_bits, comm->c_contextid, src, tag); - ret = ompi_mtl_datatype_recv_buf(convertor, &start, &length, &free_after); - if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) { - return ret; + ompi_ret = ompi_mtl_datatype_recv_buf(convertor, + &start, + &length, + &free_after); + if (OPAL_UNLIKELY(OMPI_SUCCESS != ompi_ret)) { + return ompi_ret; } ofi_req->type = OMPI_MTL_OFI_RECV; @@ -583,22 +562,21 @@ ompi_mtl_ofi_irecv(struct mca_mtl_base_module_t *mtl, ofi_req->remote_addr = remote_addr; ofi_req->match_bits = match_bits; - ret_length = fi_trecv(ompi_mtl_ofi.ep, - start, - length, - NULL, - remote_addr, - match_bits, - mask_bits, - (void *)&ofi_req->ctx); - - if (OPAL_UNLIKELY(ret_length < 0)) { + MTL_OFI_RETRY_UNTIL_DONE(fi_trecv(ompi_mtl_ofi.ep, + start, + length, + NULL, + remote_addr, + match_bits, + mask_bits, + (void *)&ofi_req->ctx)); + if (OPAL_UNLIKELY(0 > ret)) { if (NULL != ofi_req->buffer) { free(ofi_req->buffer); } opal_output_verbose(1, ompi_mtl_base_framework.framework_output, "%s:%d: fi_trecv failed: %s(%zd)", - __FILE__, __LINE__, strerror(errno), ret_length); + __FILE__, __LINE__, fi_strerror(-ret), ret); return ompi_mtl_ofi_get_error(ret); } @@ -638,11 +616,13 @@ ompi_mtl_ofi_mrecv_error_callback(struct fi_cq_err_entry *error, status->MPI_TAG = MTL_OFI_GET_TAG(ofi_req->match_bits); status->MPI_SOURCE = MTL_OFI_GET_SOURCE(ofi_req->match_bits); - /* FIXME: This could be done on a single line... */ switch (error->err) { - case FI_EMSGSIZE: + case FI_ETRUNC: status->MPI_ERROR = MPI_ERR_TRUNCATE; break; + case FI_ECANCELED: + status->_cancelled = true; + break; default: status->MPI_ERROR = MPI_ERR_INTERN; } @@ -667,12 +647,16 @@ ompi_mtl_ofi_imrecv(struct mca_mtl_base_module_t *mtl, bool free_after; struct iovec iov; struct fi_msg_tagged msg; - int ret; + int ompi_ret; + ssize_t ret; uint64_t msgflags = FI_CLAIM; - ret = ompi_mtl_datatype_recv_buf(convertor, &start, &length, &free_after); - if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) { - return ret; + ompi_ret = ompi_mtl_datatype_recv_buf(convertor, + &start, + &length, + &free_after); + if (OPAL_UNLIKELY(OMPI_SUCCESS != ompi_ret)) { + return ompi_ret; } ofi_req->type = OMPI_MTL_OFI_RECV; @@ -698,12 +682,12 @@ ompi_mtl_ofi_imrecv(struct mca_mtl_base_module_t *mtl, msg.context = (void *)&ofi_req->ctx; msg.data = 0; - ret = fi_trecvmsg(ompi_mtl_ofi.ep, &msg, msgflags); - if (ret < 0) { + MTL_OFI_RETRY_UNTIL_DONE(fi_trecvmsg(ompi_mtl_ofi.ep, &msg, msgflags)); + if (OPAL_UNLIKELY(0 > ret)) { opal_output_verbose(1, ompi_mtl_base_framework.framework_output, - "%s:%d: unexpected return code from fi_trecvmsg: %d", - __FILE__, __LINE__, ret); - return ompi_mtl_ofi_get_error(-ret); + "%s:%d: fi_trecvmsg failed: %s(%zd)", + __FILE__, __LINE__, fi_strerror(-ret), ret); + return ompi_mtl_ofi_get_error(ret); } return OMPI_SUCCESS; @@ -753,7 +737,7 @@ ompi_mtl_ofi_iprobe(struct mca_mtl_base_module_t *mtl, mca_mtl_ofi_endpoint_t *endpoint = NULL; fi_addr_t remote_proc = 0; uint64_t match_bits, mask_bits; - int ret; + ssize_t ret; struct fi_msg_tagged msg; uint64_t msgflags = FI_PEEK; @@ -762,7 +746,7 @@ ompi_mtl_ofi_iprobe(struct mca_mtl_base_module_t *mtl, */ if (MPI_ANY_SOURCE != src) { ompi_proc = ompi_comm_peer_lookup( comm, src ); - endpoint = ompi_proc->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_MTL]; + endpoint = ompi_mtl_ofi_get_endpoint(mtl, ompi_proc); remote_proc = endpoint->peer_fiaddr; } @@ -791,18 +775,18 @@ ompi_mtl_ofi_iprobe(struct mca_mtl_base_module_t *mtl, ofi_req.completion_count = 1; ofi_req.match_state = 0; - ret = fi_trecvmsg(ompi_mtl_ofi.ep, &msg, msgflags); - if (ret < 0 && -FI_ENOMSG == ret) { + MTL_OFI_RETRY_UNTIL_DONE(fi_trecvmsg(ompi_mtl_ofi.ep, &msg, msgflags)); + if (-FI_ENOMSG == ret) { /** * The search request completed but no matching message was found. */ *flag = 0; return OMPI_SUCCESS; - } else if (ret < 0) { + } else if (OPAL_UNLIKELY(0 > ret)) { opal_output_verbose(1, ompi_mtl_base_framework.framework_output, - "%s:%d: unexpected return code from fi_trecvmsg: %d", - __FILE__, __LINE__, ret); - return ompi_mtl_ofi_get_error(-ret); + "%s:%d: fi_trecvmsg failed: %s(%zd)", + __FILE__, __LINE__, fi_strerror(-ret), ret); + return ompi_mtl_ofi_get_error(ret); } while (0 < ofi_req.completion_count) { @@ -833,7 +817,7 @@ ompi_mtl_ofi_improbe(struct mca_mtl_base_module_t *mtl, mca_mtl_ofi_endpoint_t *endpoint = NULL; fi_addr_t remote_proc = 0; uint64_t match_bits, mask_bits; - int ret; + ssize_t ret; struct fi_msg_tagged msg; uint64_t msgflags = FI_PEEK | FI_CLAIM; @@ -847,7 +831,7 @@ ompi_mtl_ofi_improbe(struct mca_mtl_base_module_t *mtl, */ if (MPI_ANY_SOURCE != src) { ompi_proc = ompi_comm_peer_lookup( comm, src ); - endpoint = ompi_proc->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_MTL]; + endpoint = ompi_mtl_ofi_get_endpoint(mtl, ompi_proc); remote_proc = endpoint->peer_fiaddr; } @@ -876,18 +860,20 @@ ompi_mtl_ofi_improbe(struct mca_mtl_base_module_t *mtl, ofi_req->completion_count = 1; ofi_req->match_state = 0; - ret = fi_trecvmsg(ompi_mtl_ofi.ep, &msg, msgflags); - if (ret < 0 && -FI_ENOMSG == ret) { + MTL_OFI_RETRY_UNTIL_DONE(fi_trecvmsg(ompi_mtl_ofi.ep, &msg, msgflags)); + if (-FI_ENOMSG == ret) { /** * The search request completed but no matching message was found. */ *matched = 0; + free(ofi_req); return OMPI_SUCCESS; - } else if (ret < 0) { + } else if (OPAL_UNLIKELY(0 > ret)) { opal_output_verbose(1, ompi_mtl_base_framework.framework_output, - "%s:%d: unexpected return code from fi_trecvmsg: %d", - __FILE__, __LINE__, ret); - return ompi_mtl_ofi_get_error(-ret); + "%s:%d: fi_trecvmsg failed: %s(%zd)", + __FILE__, __LINE__, fi_strerror(-ret), ret); + free(ofi_req); + return ompi_mtl_ofi_get_error(ret); } while (0 < ofi_req->completion_count) { @@ -912,6 +898,7 @@ ompi_mtl_ofi_improbe(struct mca_mtl_base_module_t *mtl, } else { (*message) = MPI_MESSAGE_NULL; + free(ofi_req); } return OMPI_SUCCESS; @@ -944,10 +931,16 @@ ompi_mtl_ofi_cancel(struct mca_mtl_base_module_t *mtl, ret = fi_cancel((fid_t)ompi_mtl_ofi.ep, &ofi_req->ctx); if (0 == ret) { /** - * The request was successfully cancelled. + * Wait for the request to be cancelled. + */ + while (!ofi_req->super.ompi_req->req_status._cancelled) { + opal_progress(); + } + } else { + /** + * Could not cancel the request. */ - ofi_req->super.ompi_req->req_status._cancelled = true; - ofi_req->super.completion_callback(&ofi_req->super); + ofi_req->super.ompi_req->req_status._cancelled = false; } } break; @@ -973,7 +966,6 @@ ompi_mtl_ofi_del_comm(struct mca_mtl_base_module_t *mtl, return OMPI_SUCCESS; } - END_C_DECLS #endif /* MTL_OFI_H_HAS_BEEN_INCLUDED */ diff --git a/ompi/mca/mtl/ofi/mtl_ofi_compat.h b/ompi/mca/mtl/ofi/mtl_ofi_compat.h index f8fe3a7b894..1a878418436 100644 --- a/ompi/mca/mtl/ofi/mtl_ofi_compat.h +++ b/ompi/mca/mtl/ofi/mtl_ofi_compat.h @@ -21,14 +21,14 @@ #if (OPAL_MAJOR_VERSION >= 2) #include "opal/mca/pmix/pmix.h" +#include "opal/mca/pmix/pmix_types.h" #define OFI_COMPAT_MODEX_RECV(ret, mtl_version, proc, ep_name, size) \ - OPAL_MODEX_RECV((ret), (mtl_version), &(proc)->super, (ep_name), (size)); + OPAL_MODEX_RECV((ret), (mtl_version), &(proc)->super.proc_name, (ep_name), (size)); #define OFI_COMPAT_MODEX_SEND(ret, mtl_version, ep_name, namelen) \ OPAL_MODEX_SEND((ret), \ - PMIX_SYNC_REQD, \ - PMIX_GLOBAL, \ + OPAL_PMIX_GLOBAL, \ (mtl_version), \ (ep_name)[0], \ (namelen)); diff --git a/ompi/mca/mtl/ofi/mtl_ofi_component.c b/ompi/mca/mtl/ofi/mtl_ofi_component.c index c16dc966cd5..d9d3a10b47d 100644 --- a/ompi/mca/mtl/ofi/mtl_ofi_component.c +++ b/ompi/mca/mtl/ofi/mtl_ofi_component.c @@ -1,9 +1,9 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ /* - * Copyright (c) 2013-2015 Intel, Inc. All rights reserved + * Copyright (c) 2013-2016 Intel, Inc. All rights reserved * * Copyright (c) 2014-2015 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2015 Los Alamos National Security, LLC. All rights + * Copyright (c) 2015-2016 Los Alamos National Security, LLC. All rights * reserved. * $COPYRIGHT$ * @@ -13,6 +13,7 @@ */ #include "mtl_ofi.h" +#include "opal/util/argv.h" static int ompi_mtl_ofi_component_open(void); static int ompi_mtl_ofi_component_query(mca_base_module_t **module, int *priority); @@ -24,6 +25,32 @@ ompi_mtl_ofi_component_init(bool enable_progress_threads, bool enable_mpi_threads); static int param_priority; +static char *prov_include; +static char *prov_exclude; +static int control_progress; +static int data_progress; + +/* + * Enumerators + */ + +enum { + MTL_OFI_PROG_AUTO=1, + MTL_OFI_PROG_MANUAL, + MTL_OFI_PROG_UNKNOWN, +}; + +mca_base_var_enum_value_t control_prog_type[] = { + {MTL_OFI_PROG_AUTO, "auto"}, + {MTL_OFI_PROG_MANUAL, "manual"}, + {0, NULL} +}; + +mca_base_var_enum_value_t data_prog_type[] = { + {MTL_OFI_PROG_AUTO, "auto"}, + {MTL_OFI_PROG_MANUAL, "manual"}, + {0, NULL} +}; mca_mtl_ofi_component_t mca_mtl_ofi_component = { { @@ -53,21 +80,65 @@ mca_mtl_ofi_component_t mca_mtl_ofi_component = { static int ompi_mtl_ofi_component_register(void) { - ompi_mtl_ofi.provider_name = NULL; - (void) mca_base_component_var_register(&mca_mtl_ofi_component.super.mtl_version, - "provider", - "Name of OFI provider to use", - MCA_BASE_VAR_TYPE_STRING, NULL, 0, 0, - OPAL_INFO_LVL_4, - MCA_BASE_VAR_SCOPE_READONLY, - &ompi_mtl_ofi.provider_name); - param_priority = 10; /* for now give a lower priority than the psm mtl */ + int ret; + mca_base_var_enum_t *new_enum = NULL; + + param_priority = 25; /* for now give a lower priority than the psm mtl */ + mca_base_component_var_register(&mca_mtl_ofi_component.super.mtl_version, + "priority", "Priority of the OFI MTL component", + MCA_BASE_VAR_TYPE_INT, NULL, 0, 0, + OPAL_INFO_LVL_9, + MCA_BASE_VAR_SCOPE_READONLY, + ¶m_priority); + + prov_include = "psm,psm2,gni"; + mca_base_component_var_register(&mca_mtl_ofi_component.super.mtl_version, + "provider_include", + "Comma-delimited list of OFI providers that are considered for use (e.g., \"psm,psm2\"; an empty value means that all providers will be considered). Mutually exclusive with mtl_ofi_provider_exclude.", + MCA_BASE_VAR_TYPE_STRING, NULL, 0, 0, + OPAL_INFO_LVL_1, + MCA_BASE_VAR_SCOPE_READONLY, + &prov_include); + + prov_exclude = NULL; + mca_base_component_var_register(&mca_mtl_ofi_component.super.mtl_version, + "provider_exclude", + "Comma-delimited list of OFI providers that are not considered for use (default: \"sockets,mxm\"; empty value means that all providers will be considered). Mutually exclusive with mtl_ofi_provider_include.", + MCA_BASE_VAR_TYPE_STRING, NULL, 0, 0, + OPAL_INFO_LVL_1, + MCA_BASE_VAR_SCOPE_READONLY, + &prov_exclude); + + ret = mca_base_var_enum_create ("control_prog_type", control_prog_type, &new_enum); + if (OPAL_SUCCESS != ret) { + return ret; + } + + control_progress = MTL_OFI_PROG_MANUAL; mca_base_component_var_register (&mca_mtl_ofi_component.super.mtl_version, - "priority", "Priority of the OFI MTL component", - MCA_BASE_VAR_TYPE_INT, NULL, 0, 0, - OPAL_INFO_LVL_9, - MCA_BASE_VAR_SCOPE_READONLY, - ¶m_priority); + "control_progress", + "Specify control progress model (default: manual). Set to auto for auto progress.", + MCA_BASE_VAR_TYPE_INT, new_enum, 0, 0, + OPAL_INFO_LVL_3, + MCA_BASE_VAR_SCOPE_READONLY, + &control_progress); + OBJ_RELEASE(new_enum); + + ret = mca_base_var_enum_create ("data_prog_type", data_prog_type, &new_enum); + if (OPAL_SUCCESS != ret) { + return ret; + } + + data_progress = MTL_OFI_PROG_AUTO; + mca_base_component_var_register(&mca_mtl_ofi_component.super.mtl_version, + "data_progress", + "Specify data progress model (default: auto). Set to manual for manual progress.", + MCA_BASE_VAR_TYPE_INT, new_enum, 0, 0, + OPAL_INFO_LVL_3, + MCA_BASE_VAR_SCOPE_READONLY, + &data_progress); + OBJ_RELEASE(new_enum); + return OMPI_SUCCESS; } @@ -84,6 +155,21 @@ ompi_mtl_ofi_component_open(void) ompi_mtl_ofi.cq = NULL; ompi_mtl_ofi.ep = NULL; + /** + * Sanity check: provider_include and provider_exclude must be mutually + * exclusive + */ + if (OMPI_SUCCESS != + mca_base_var_check_exclusive("ompi", + mca_mtl_ofi_component.super.mtl_version.mca_type_name, + mca_mtl_ofi_component.super.mtl_version.mca_component_name, + "provider_include", + mca_mtl_ofi_component.super.mtl_version.mca_type_name, + mca_mtl_ofi_component.super.mtl_version.mca_component_name, + "provider_exclude")) { + return OMPI_ERR_NOT_AVAILABLE; + } + return OMPI_SUCCESS; } @@ -101,6 +187,78 @@ ompi_mtl_ofi_component_close(void) return OMPI_SUCCESS; } +int +ompi_mtl_ofi_progress_no_inline(void) +{ + return ompi_mtl_ofi_progress(); +} + +static int +is_in_list(char **list, char *item) +{ + int i = 0; + + if ((NULL == list) || (NULL == item)) { + return 0; + } + + while (NULL != list[i]) { + if (0 == strncmp(item, list[i], strlen(item))) { + return 1; + } else { + i++; + } + } + + return 0; +} + +static struct fi_info* +select_ofi_provider(struct fi_info *providers) +{ + char **include_list = NULL; + char **exclude_list = NULL; + struct fi_info *prov = providers; + + opal_output_verbose(1, ompi_mtl_base_framework.framework_output, + "%s:%d: mtl:ofi:provider_include = \"%s\"\n", + __FILE__, __LINE__, prov_include); + opal_output_verbose(1, ompi_mtl_base_framework.framework_output, + "%s:%d: mtl:ofi:provider_exclude = \"%s\"\n", + __FILE__, __LINE__, prov_exclude); + + if (NULL != prov_include) { + include_list = opal_argv_split(prov_include, ','); + while ((NULL != prov) && + (!is_in_list(include_list, prov->fabric_attr->prov_name))) { + opal_output_verbose(1, ompi_mtl_base_framework.framework_output, + "%s:%d: mtl:ofi: \"%s\" not in include list\n", + __FILE__, __LINE__, + prov->fabric_attr->prov_name); + prov = prov->next; + } + } else if (NULL != prov_exclude) { + exclude_list = opal_argv_split(prov_exclude, ','); + while ((NULL != prov) && + (is_in_list(exclude_list, prov->fabric_attr->prov_name))) { + opal_output_verbose(1, ompi_mtl_base_framework.framework_output, + "%s:%d: mtl:ofi: \"%s\" in exclude list\n", + __FILE__, __LINE__, + prov->fabric_attr->prov_name); + prov = prov->next; + } + } + + opal_argv_free(include_list); + opal_argv_free(exclude_list); + + opal_output_verbose(1, ompi_mtl_base_framework.framework_output, + "%s:%d: mtl:ofi:prov: %s\n", + __FILE__, __LINE__, + (prov ? prov->fabric_attr->prov_name : "none")); + + return prov; +} static mca_mtl_base_module_t* ompi_mtl_ofi_component_init(bool enable_progress_threads, @@ -122,6 +280,7 @@ ompi_mtl_ofi_component_init(bool enable_progress_threads, * ep_type: reliable datagram operation * caps: Capabilities required from the provider. * Tag matching is specified to implement MPI semantics. + * msg_order: Guarantee that messages with same tag are ordered. */ hints = fi_allocinfo(); if (!hints) { @@ -130,23 +289,29 @@ ompi_mtl_ofi_component_init(bool enable_progress_threads, __FILE__, __LINE__); goto error; } - hints->mode = FI_CONTEXT; - hints->ep_attr->type = FI_EP_RDM; /* Reliable datagram */ - hints->caps = FI_TAGGED; /* Tag matching interface */ + hints->mode = FI_CONTEXT; + hints->ep_attr->type = FI_EP_RDM; /* Reliable datagram */ + hints->caps = FI_TAGGED; /* Tag matching interface */ + hints->tx_attr->msg_order = FI_ORDER_SAS; + hints->rx_attr->msg_order = FI_ORDER_SAS; - /** - * Refine filter for additional capabilities - * threading: Disable locking - * control_progress: enable async progress - */ - hints->domain_attr->threading = FI_THREAD_ENDPOINT; - hints->domain_attr->control_progress = FI_PROGRESS_AUTO; - if (NULL != ompi_mtl_ofi.provider_name) { - hints->fabric_attr->prov_name = strdup(ompi_mtl_ofi.provider_name); + hints->domain_attr->threading = FI_THREAD_UNSPEC; + + if (MTL_OFI_PROG_AUTO == control_progress) { + hints->domain_attr->control_progress = FI_PROGRESS_AUTO; } else { - hints->fabric_attr->prov_name = NULL; + hints->domain_attr->control_progress = FI_PROGRESS_MANUAL; } + if (MTL_OFI_PROG_MANUAL == data_progress) { + hints->domain_attr->data_progress = FI_PROGRESS_MANUAL; + } else { + hints->domain_attr->data_progress = FI_PROGRESS_AUTO; + } + + hints->domain_attr->resource_mgmt = FI_RM_ENABLED; + hints->domain_attr->av_type = FI_AV_MAP; + /** * FI_VERSION provides binary backward and forward compatibility support * Specify the version of OFI is coded to, the provider will select struct @@ -173,10 +338,16 @@ ompi_mtl_ofi_component_init(bool enable_progress_threads, } /** - * Here we elect to use the first provider from the list. - * Further filtering could be done at this point (e.g. name). + * Select a provider from the list returned by fi_getinfo(). */ - prov = providers; + prov = select_ofi_provider(providers); + if (!prov) { + opal_output_verbose(1, ompi_mtl_base_framework.framework_output, + "%s:%d: select_ofi_provider: no provider found\n", + __FILE__, __LINE__); + goto error; + } + /** * Open fabric @@ -251,9 +422,10 @@ ompi_mtl_ofi_component_init(bool enable_progress_threads, /** * The remote fi_addr will be stored in the ofi_endpoint struct. - * So, we use the AV in "map" mode. */ + av_attr.type = FI_AV_MAP; + ret = fi_av_open(ompi_mtl_ofi.domain, &av_attr, &ompi_mtl_ofi.av, NULL); if (ret) { opal_output_verbose(1, ompi_mtl_base_framework.framework_output, @@ -338,7 +510,7 @@ ompi_mtl_ofi_component_init(bool enable_progress_threads, /** * Activate progress callback. */ - ret = opal_progress_register(ompi_mtl_ofi_progress); + ret = opal_progress_register(ompi_mtl_ofi_progress_no_inline); if (OMPI_SUCCESS != ret) { opal_output_verbose(1, ompi_mtl_base_framework.framework_output, "%s:%d: opal_progress_register failed: %d\n", @@ -373,5 +545,42 @@ ompi_mtl_ofi_component_init(bool enable_progress_threads, return NULL; } +int +ompi_mtl_ofi_finalize(struct mca_mtl_base_module_t *mtl) +{ + opal_progress_unregister(ompi_mtl_ofi_progress_no_inline); + + /** + * * Close all the OFI objects + * */ + if (fi_close((fid_t)ompi_mtl_ofi.ep)) { + opal_output(ompi_mtl_base_framework.framework_output, + "fi_close failed: %s", strerror(errno)); + abort(); + } + if (fi_close((fid_t)ompi_mtl_ofi.cq)) { + opal_output(ompi_mtl_base_framework.framework_output, + "fi_close failed: %s", strerror(errno)); + abort(); + } + if (fi_close((fid_t)ompi_mtl_ofi.av)) { + opal_output(ompi_mtl_base_framework.framework_output, + "fi_close failed: %s", strerror(errno)); + abort(); + } + if (fi_close((fid_t)ompi_mtl_ofi.domain)) { + opal_output(ompi_mtl_base_framework.framework_output, + "fi_close failed: %s", strerror(errno)); + abort(); + } + if (fi_close((fid_t)ompi_mtl_ofi.fabric)) { + opal_output(ompi_mtl_base_framework.framework_output, + "fi_close failed: %s", strerror(errno)); + abort(); + } + + return OMPI_SUCCESS; +} + diff --git a/ompi/mca/mtl/ofi/mtl_ofi_endpoint.h b/ompi/mca/mtl/ofi/mtl_ofi_endpoint.h index 2799d495b58..788d0919168 100644 --- a/ompi/mca/mtl/ofi/mtl_ofi_endpoint.h +++ b/ompi/mca/mtl/ofi/mtl_ofi_endpoint.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2013-2015 Intel, Inc. All rights reserved + * Copyright (c) 2013-2016 Intel, Inc. All rights reserved * * $COPYRIGHT$ * @@ -11,10 +11,12 @@ #ifndef OMPI_MTL_OFI_ENDPOINT_H #define OMPI_MTL_OFI_ENDPOINT_H -#include "mtl_ofi.h" - BEGIN_C_DECLS +extern int ompi_mtl_ofi_add_procs(struct mca_mtl_base_module_t *mtl, + size_t nprocs, + struct ompi_proc_t **procs); + OBJ_CLASS_DECLARATION(mca_mtl_ofi_endpoint_t); /** @@ -35,7 +37,15 @@ struct mca_mtl_ofi_endpoint_t { }; typedef struct mca_mtl_ofi_endpoint_t mca_mtl_ofi_endpoint_t; -OBJ_CLASS_DECLARATION(mca_mtl_ofi_endpoint); + +static inline mca_mtl_ofi_endpoint_t *ompi_mtl_ofi_get_endpoint (struct mca_mtl_base_module_t* mtl, ompi_proc_t *ompi_proc) +{ + if (OPAL_UNLIKELY(NULL == ompi_proc->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_MTL])) { + ompi_mtl_ofi_add_procs(mtl, 1, &ompi_proc); + } + + return ompi_proc->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_MTL]; +} END_C_DECLS #endif diff --git a/ompi/mca/mtl/ofi/mtl_ofi_request.h b/ompi/mca/mtl/ofi/mtl_ofi_request.h index ee544073cc7..5e2faad6456 100644 --- a/ompi/mca/mtl/ofi/mtl_ofi_request.h +++ b/ompi/mca/mtl/ofi/mtl_ofi_request.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2013-2015 Intel, Inc. All rights reserved + * Copyright (c) 2013-2016 Intel, Inc. All rights reserved * * $COPYRIGHT$ * @@ -55,6 +55,10 @@ struct ompi_mtl_ofi_request_t { /* lookup source of an ANY_SOURCE Recv */ struct ompi_communicator_t *comm; + /** Reference to the MTL used to lookup */ + /* source of an ANY_SOURCE Recv */ + struct mca_mtl_base_module_t* mtl; + /** Pack buffer */ void *buffer; diff --git a/ompi/mca/mtl/ofi/mtl_ofi_types.h b/ompi/mca/mtl/ofi/mtl_ofi_types.h index e56a4398965..1b1bdb1e1c5 100644 --- a/ompi/mca/mtl/ofi/mtl_ofi_types.h +++ b/ompi/mca/mtl/ofi/mtl_ofi_types.h @@ -84,7 +84,7 @@ typedef struct mca_mtl_ofi_component_t { { \ match_bits = contextid; \ match_bits = (match_bits << 16); \ - match_bits |= source; \ + match_bits |= (uint64_t)source; \ match_bits = (match_bits << 32); \ match_bits |= (MTL_OFI_TAG_MASK & tag) | type; \ } @@ -106,7 +106,7 @@ typedef struct mca_mtl_ofi_component_t { match_bits = (match_bits << 32); \ mask_bits |= MTL_OFI_SOURCE_MASK; \ } else { \ - match_bits |= source; \ + match_bits |= (uint64_t)source; \ match_bits = (match_bits << 32); \ } \ \ diff --git a/ompi/mca/mtl/portals4/Makefile.am b/ompi/mca/mtl/portals4/Makefile.am index 7294515f752..1693ff435d7 100644 --- a/ompi/mca/mtl/portals4/Makefile.am +++ b/ompi/mca/mtl/portals4/Makefile.am @@ -5,7 +5,7 @@ # Copyright (c) 2004-2005 The University of Tennessee and The University # of Tennessee Research Foundation. All rights # reserved. -# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, # University of Stuttgart. All rights reserved. # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. @@ -13,9 +13,9 @@ # Copyright (c) 2010-2012 Sandia National Laboratories. All rights reserved. # Copyright (c) 2014 Intel, Inc. All rights reserved # $COPYRIGHT$ -# +# # Additional copyrights may follow -# +# # $HEADER$ # diff --git a/ompi/mca/mtl/portals4/configure.m4 b/ompi/mca/mtl/portals4/configure.m4 index 260051d7984..23ecbebd504 100644 --- a/ompi/mca/mtl/portals4/configure.m4 +++ b/ompi/mca/mtl/portals4/configure.m4 @@ -6,16 +6,16 @@ # Copyright (c) 2004-2005 The University of Tennessee and The University # of Tennessee Research Foundation. All rights # reserved. -# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, # University of Stuttgart. All rights reserved. # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. # Copyright (c) 2010 Cisco Systems, Inc. All rights reserved. # Copyright (c) 2013 Sandia National Laboratories. All rights reserved. # $COPYRIGHT$ -# +# # Additional copyrights may follow -# +# # $HEADER$ # @@ -26,7 +26,7 @@ AC_DEFUN([MCA_ompi_mtl_portals4_POST_CONFIG], [ AS_IF([test "$1" = "1"], [OMPI_REQUIRE_ENDPOINT_TAG([PORTALS4])]) ])dnl -# MCA_mtl_portals4_CONFIG(action-if-can-compile, +# MCA_mtl_portals4_CONFIG(action-if-can-compile, # [action-if-cant-compile]) # ------------------------------------------------ AC_DEFUN([MCA_ompi_mtl_portals4_CONFIG],[ diff --git a/ompi/mca/mtl/portals4/mtl_portals4.c b/ompi/mca/mtl/portals4/mtl_portals4.c index f7e14b6d1b8..2d25c8db7dd 100644 --- a/ompi/mca/mtl/portals4/mtl_portals4.c +++ b/ompi/mca/mtl/portals4/mtl_portals4.c @@ -22,6 +22,7 @@ #include +#include "ompi/communicator/communicator.h" #include "ompi/proc/proc.h" #include "ompi/mca/mtl/mtl.h" #include "opal/class/opal_list.h" @@ -85,9 +86,11 @@ portals4_init_interface(void) /* Create send and long message (read) portal table entries */ ret = PtlPTAlloc(ompi_mtl_portals4.ni_h, - PTL_PT_ONLY_USE_ONCE | - PTL_PT_ONLY_TRUNCATE | - PTL_PT_FLOWCTRL, +#if OMPI_MTL_PORTALS4_FLOW_CONTROL + PTL_PT_FLOWCTRL | +#endif + PTL_PT_ONLY_USE_ONCE | + PTL_PT_ONLY_TRUNCATE, ompi_mtl_portals4.recv_eq_h, REQ_RECV_TABLE_ID, &ompi_mtl_portals4.recv_idx); @@ -132,7 +135,7 @@ portals4_init_interface(void) ret = PtlMDBind(ompi_mtl_portals4.ni_h, &md, - &ompi_mtl_portals4.zero_md_h); + &ompi_mtl_portals4.zero_md_h); if (PTL_OK != ret) { opal_output_verbose(1, ompi_mtl_base_framework.framework_output, "%s:%d: PtlMDBind failed: %d\n", @@ -163,9 +166,9 @@ portals4_init_interface(void) me.ct_handle = PTL_CT_NONE; me.min_free = 0; me.uid = ompi_mtl_portals4.uid; - me.options = PTL_ME_OP_PUT | + me.options = PTL_ME_OP_PUT | PTL_ME_EVENT_LINK_DISABLE | - PTL_ME_EVENT_COMM_DISABLE | + PTL_ME_EVENT_COMM_DISABLE | PTL_ME_EVENT_UNLINK_DISABLE; if (ompi_mtl_portals4.use_logical) { me.match_id.rank = PTL_RANK_ANY; @@ -174,8 +177,8 @@ portals4_init_interface(void) me.match_id.phys.pid = PTL_PID_ANY; } me.match_bits = MTL_PORTALS4_LONG_MSG; - me.ignore_bits = MTL_PORTALS4_CONTEXT_MASK | - MTL_PORTALS4_SOURCE_MASK | + me.ignore_bits = MTL_PORTALS4_CONTEXT_MASK | + MTL_PORTALS4_SOURCE_MASK | MTL_PORTALS4_TAG_MASK; ret = PtlMEAppend(ompi_mtl_portals4.ni_h, ompi_mtl_portals4.recv_idx, @@ -241,47 +244,28 @@ portals4_init_interface(void) return OMPI_ERROR; } -int -ompi_mtl_portals4_add_procs(struct mca_mtl_base_module_t *mtl, - size_t nprocs, - struct ompi_proc_t** procs) +static int +create_maptable(size_t nprocs, + ompi_proc_t **procs) { - int ret, me; + int ret; size_t i; - bool new_found = false; ptl_process_t *maptable; - if (ompi_mtl_portals4.use_logical) { - maptable = malloc(sizeof(ptl_process_t) * nprocs); - if (NULL == maptable) { - opal_output_verbose(1, ompi_mtl_base_framework.framework_output, - "%s:%d: malloc failed\n", - __FILE__, __LINE__); - return OMPI_ERR_OUT_OF_RESOURCE; - } + maptable = malloc(sizeof(ptl_process_t) * nprocs); + if (NULL == maptable) { + opal_output_verbose(1, ompi_mtl_base_framework.framework_output, + "%s:%d: malloc failed\n", + __FILE__, __LINE__); + return OMPI_ERR_OUT_OF_RESOURCE; } - /* Get the list of ptl_process_id_t from the runtime and copy into structure */ - for (i = 0 ; i < nprocs ; ++i) { + for (i=0;isuper.proc_arch != ompi_proc_local()->super.proc_arch) { - opal_output_verbose(1, ompi_mtl_base_framework.framework_output, - "Portals 4 MTL does not support heterogeneous operations."); - opal_output_verbose(1, ompi_mtl_base_framework.framework_output, - "Proc %s architecture %x, mine %x.", - OMPI_NAME_PRINT(&procs[i]->super.proc_name), - procs[i]->super.proc_arch, ompi_proc_local()->super.proc_arch); - return OMPI_ERR_NOT_SUPPORTED; - } - OPAL_MODEX_RECV(ret, &mca_mtl_portals4_component.mtl_version, - &procs[i]->super, (uint8_t**)&modex_id, &size); + &procs[i]->super.proc_name, (uint8_t**)&modex_id, &size); if (OMPI_SUCCESS != ret) { opal_output_verbose(1, ompi_mtl_base_framework.framework_output, "%s:%d: ompi_modex_recv failed: %d\n", @@ -294,40 +278,161 @@ ompi_mtl_portals4_add_procs(struct mca_mtl_base_module_t *mtl, return OMPI_ERR_BAD_PARAM; } - if (NULL == procs[i]->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_PORTALS4]) { - ptl_process_t *peer_id; - peer_id = malloc(sizeof(ptl_process_t)); - if (NULL == peer_id) { + maptable[i].phys.pid = modex_id->phys.pid; + maptable[i].phys.nid = modex_id->phys.nid; + opal_output_verbose(50, ompi_mtl_base_framework.framework_output, + "logical: global rank=%d pid=%x nid=%x\n", + (int)i, maptable[i].phys.pid, maptable[i].phys.nid); + } + + ret = PtlSetMap(ompi_mtl_portals4.ni_h, nprocs, maptable); + if (OMPI_SUCCESS != ret) { + opal_output_verbose(1, ompi_mtl_base_framework.framework_output, + "%s:%d: logical mapping failed: %d\n", + __FILE__, __LINE__, ret); + return ret; + } + opal_output_verbose(1, ompi_mtl_base_framework.framework_output, + "logical mapping OK\n"); + + free(maptable); + + return OMPI_SUCCESS; +} + +static int +create_endpoint(ompi_proc_t *proc) +{ + ptl_process_t *endpoint; + + endpoint = malloc(sizeof(ptl_process_t)); + if (NULL == endpoint) { + opal_output_verbose(1, ompi_mtl_base_framework.framework_output, + "%s:%d: malloc failed: %s\n", + __FILE__, __LINE__, strerror(errno)); + return OMPI_ERR_OUT_OF_RESOURCE; + } else { + if (ompi_mtl_portals4.use_logical) { + endpoint->phys.nid = 0; + endpoint->phys.pid = 0; + endpoint->rank = proc->super.proc_name.vpid; + } else { + int ret; + ptl_process_t *modex_id; + size_t size; + + OPAL_MODEX_RECV(ret, &mca_mtl_portals4_component.mtl_version, + &proc->super.proc_name, (uint8_t**)&modex_id, &size); + if (OMPI_SUCCESS != ret) { + opal_output_verbose(1, ompi_mtl_base_framework.framework_output, + "%s:%d: ompi_modex_recv failed: %d\n", + __FILE__, __LINE__, ret); + return ret; + } else if (sizeof(ptl_process_t) != size) { opal_output_verbose(1, ompi_mtl_base_framework.framework_output, - "%s:%d: malloc failed: %d\n", + "%s:%d: ompi_modex_recv failed (size mismatch): %d\n", __FILE__, __LINE__, ret); - return OMPI_ERR_OUT_OF_RESOURCE; + return OMPI_ERR_BAD_PARAM; } - if (ompi_mtl_portals4.use_logical) { - peer_id->rank = i; - maptable[i].phys.pid = modex_id->phys.pid; - maptable[i].phys.nid = modex_id->phys.nid; - opal_output_verbose(50, ompi_mtl_base_framework.framework_output, - "logical: global rank=%d pid=%d nid=%d\n", - (int)i, maptable[i].phys.pid, maptable[i].phys.nid); - } else { - *peer_id = *modex_id; + + *endpoint = *modex_id; + } + } + + proc->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_PORTALS4] = endpoint; + + return OMPI_SUCCESS; +} + +ompi_proc_t * +ompi_mtl_portals4_get_proc_group(struct ompi_group_t *group, int rank) +{ + int ret; + + ompi_proc_t *proc = ompi_group_peer_lookup (group, rank); + if (NULL == proc->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_PORTALS4]) { + ret = create_endpoint(proc); + if (OMPI_SUCCESS != ret) { + return NULL; + } +#if 0 + } else { + /* + * sanity check + */ + int ret; + ptl_process_t *modex_id; + size_t size; + + OPAL_MODEX_RECV(ret, &mca_mtl_portals4_component.mtl_version, + &proc->super.proc_name, (uint8_t**)&modex_id, &size); + + ptl_process_t *peer = (ptl_process_t*) proc->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_PORTALS4]; + if (ompi_mtl_portals4.use_logical) { + if ((size_t)peer->rank != proc->super.proc_name.vpid) { + opal_output_verbose(1, ompi_mtl_base_framework.framework_output, + "%s:%d: existing peer and rank don't match\n", + __FILE__, __LINE__); + return OMPI_ERROR; } + } + else if (peer->phys.nid != modex_id->phys.nid || + peer->phys.pid != modex_id->phys.pid) { + opal_output_verbose(1, ompi_mtl_base_framework.framework_output, + "%s:%d: existing peer and modex peer don't match\n", + __FILE__, __LINE__); + return OMPI_ERROR; + } +#endif + } - procs[i]->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_PORTALS4] = peer_id; + return proc; +} - new_found = true; +static int +add_endpoints(size_t nprocs, + ompi_proc_t **procs) +{ + int ret; + size_t i; + + /* Get the list of ptl_process_id_t from the runtime and copy into structure */ + for (i = 0 ; i < nprocs ; ++i) { + if (procs[i]->super.proc_arch != ompi_proc_local()->super.proc_arch) { + opal_output_verbose(1, ompi_mtl_base_framework.framework_output, + "Portals 4 MTL does not support heterogeneous operations."); + opal_output_verbose(1, ompi_mtl_base_framework.framework_output, + "Proc %s architecture %x, mine %x.", + OMPI_NAME_PRINT(&procs[i]->super.proc_name), + procs[i]->super.proc_arch, ompi_proc_local()->super.proc_arch); + return OMPI_ERR_NOT_SUPPORTED; + } + + if (NULL == procs[i]->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_PORTALS4]) { + ret = create_endpoint(procs[i]); + if (OMPI_SUCCESS != ret) { + return ret; + } +#if 0 } else { + /* + * sanity check + */ + int ret; + ptl_process_t *modex_id; + size_t size; + + OPAL_MODEX_RECV(ret, &mca_mtl_portals4_component.mtl_version, + &procs[i]->super.proc_name, (uint8_t**)&modex_id, &size); + ptl_process_t *proc = (ptl_process_t*) procs[i]->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_PORTALS4]; if (ompi_mtl_portals4.use_logical) { - if ((size_t)proc->rank != i) { + if ((size_t)proc->rank != procs[i]->super.proc_name.vpid) { opal_output_verbose(1, ompi_mtl_base_framework.framework_output, "%s:%d: existing peer and rank don't match\n", __FILE__, __LINE__); return OMPI_ERROR; } - maptable[i].phys.pid = modex_id->phys.pid; - maptable[i].phys.nid = modex_id->phys.nid; } else if (proc->phys.nid != modex_id->phys.nid || proc->phys.pid != modex_id->phys.pid) { @@ -336,45 +441,82 @@ ompi_mtl_portals4_add_procs(struct mca_mtl_base_module_t *mtl, __FILE__, __LINE__); return OMPI_ERROR; } +#endif } } - if (ompi_mtl_portals4.use_logical) { - ret = PtlSetMap(ompi_mtl_portals4.ni_h, nprocs, maptable); - if (OMPI_SUCCESS != ret) { - opal_output_verbose(1, ompi_mtl_base_framework.framework_output, - "%s:%d: logical mapping failed: %d\n", - __FILE__, __LINE__, ret); - return ret; - } - opal_output_verbose(1, ompi_mtl_base_framework.framework_output, - "logical mapping OK\n"); - free(maptable); - } + return OMPI_SUCCESS; +} - portals4_init_interface(); +#define NEED_ALL_PROCS (ompi_mtl_portals4.use_logical || ompi_mtl_portals4.use_flowctl) - /* activate progress callback */ - ret = opal_progress_register(ompi_mtl_portals4_progress); +int +ompi_mtl_portals4_add_procs(struct mca_mtl_base_module_t *mtl, + size_t nprocs, + struct ompi_proc_t** procs) +{ + int ret; + + /* + * The PML handed us a list of procs that need Portals4 + * peer info. Complete those procs here. + */ + ret = add_endpoints(nprocs, + procs); if (OMPI_SUCCESS != ret) { opal_output_verbose(1, ompi_mtl_base_framework.framework_output, - "%s:%d: opal_progress_register failed: %d\n", + "%s:%d: add_endpoints failed: %d\n", __FILE__, __LINE__, ret); return ret; } + if (1 == ompi_mtl_portals4.need_init) { + if (1 == ompi_mtl_portals4.use_logical) { + ret = create_maptable(nprocs, procs); + if (OMPI_SUCCESS != ret) { + opal_output_verbose(1, ompi_mtl_base_framework.framework_output, + "%s:%d: ompi_mtl_portals4_add_procs::create_maptable() failed: %d\n", + __FILE__, __LINE__, ret); + return ret; + } + } + + /* + * This is the first time through here. Initialize + * Portals4 and register the progress thread. + */ + portals4_init_interface(); + + /* activate progress callback */ + ret = opal_progress_register(ompi_mtl_portals4_progress); + if (OMPI_SUCCESS != ret) { + opal_output_verbose(1, ompi_mtl_base_framework.framework_output, + "%s:%d: opal_progress_register failed: %d\n", + __FILE__, __LINE__, ret); + return ret; + } + #if OMPI_MTL_PORTALS4_FLOW_CONTROL - if (new_found) { - ret = ompi_mtl_portals4_flowctl_add_procs(me, nprocs, procs); + opal_output_verbose(50, ompi_mtl_base_framework.framework_output, + "add_procs() - me=%d\n", ompi_proc_local_proc->super.proc_name.vpid); + + opal_output_verbose(50, ompi_mtl_base_framework.framework_output, + "add_procs() - adding flowctl procs\n"); + + ret = ompi_mtl_portals4_flowctl_add_procs(ompi_proc_local_proc->super.proc_name.vpid, + nprocs, + procs); if (OMPI_SUCCESS != ret) { opal_output_verbose(1, ompi_mtl_base_framework.framework_output, "%s:%d: flowctl_add_procs failed: %d\n", __FILE__, __LINE__, ret); return ret; } - } #endif + ompi_mtl_portals4.need_init = 0; + } + return OMPI_SUCCESS; } @@ -382,10 +524,13 @@ ompi_mtl_portals4_add_procs(struct mca_mtl_base_module_t *mtl, int ompi_mtl_portals4_del_procs(struct mca_mtl_base_module_t *mtl, size_t nprocs, - struct ompi_proc_t** procs) + struct ompi_proc_t** procs) { size_t i; + opal_output_verbose(50, ompi_mtl_base_framework.framework_output, + "del_procs() - enter\n"); + for (i = 0 ; i < nprocs ; ++i) { if (NULL != procs[i]->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_PORTALS4]) { free(procs[i]->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_PORTALS4]); @@ -393,6 +538,9 @@ ompi_mtl_portals4_del_procs(struct mca_mtl_base_module_t *mtl, } } + opal_output_verbose(50, ompi_mtl_base_framework.framework_output, + "del_procs() - exit\n"); + return OMPI_SUCCESS; } @@ -408,15 +556,31 @@ ompi_mtl_portals4_finalize(struct mca_mtl_base_module_t *mtl) #endif ompi_mtl_portals4_recv_short_fini(); - PtlMEUnlink(ompi_mtl_portals4.long_overflow_me_h); - PtlMDRelease(ompi_mtl_portals4.zero_md_h); - PtlMDRelease(ompi_mtl_portals4.send_md_h); + if (!PtlHandleIsEqual(ompi_mtl_portals4.long_overflow_me_h, PTL_INVALID_HANDLE)) { + PtlMEUnlink(ompi_mtl_portals4.long_overflow_me_h); + } + if (!PtlHandleIsEqual(ompi_mtl_portals4.zero_md_h, PTL_INVALID_HANDLE)) { + PtlMDRelease(ompi_mtl_portals4.zero_md_h); + } + if (!PtlHandleIsEqual(ompi_mtl_portals4.send_md_h, PTL_INVALID_HANDLE)) { + PtlMDRelease(ompi_mtl_portals4.send_md_h); + } + if (ompi_mtl_portals4.read_idx != (ptl_pt_index_t) ~0UL) { + PtlPTFree(ompi_mtl_portals4.ni_h, ompi_mtl_portals4.read_idx); + } + if (ompi_mtl_portals4.recv_idx != (ptl_pt_index_t) ~0UL) { + PtlPTFree(ompi_mtl_portals4.ni_h, ompi_mtl_portals4.recv_idx); + } + if (!PtlHandleIsEqual(ompi_mtl_portals4.send_eq_h, PTL_INVALID_HANDLE)) { + PtlEQFree(ompi_mtl_portals4.send_eq_h); + } + if (!PtlHandleIsEqual(ompi_mtl_portals4.recv_eq_h, PTL_INVALID_HANDLE)) { + PtlEQFree(ompi_mtl_portals4.recv_eq_h); + } + if (!PtlHandleIsEqual(ompi_mtl_portals4.ni_h, PTL_INVALID_HANDLE)) { + PtlNIFini(ompi_mtl_portals4.ni_h); + } - PtlPTFree(ompi_mtl_portals4.ni_h, ompi_mtl_portals4.read_idx); - PtlPTFree(ompi_mtl_portals4.ni_h, ompi_mtl_portals4.recv_idx); - PtlEQFree(ompi_mtl_portals4.send_eq_h); - PtlEQFree(ompi_mtl_portals4.recv_eq_h); - PtlNIFini(ompi_mtl_portals4.ni_h); PtlFini(); return OMPI_SUCCESS; diff --git a/ompi/mca/mtl/portals4/mtl_portals4.h b/ompi/mca/mtl/portals4/mtl_portals4.h index 77b9b4de6d4..82975f6219d 100644 --- a/ompi/mca/mtl/portals4/mtl_portals4.h +++ b/ompi/mca/mtl/portals4/mtl_portals4.h @@ -26,9 +26,12 @@ #include "opal/class/opal_free_list.h" #include "opal/class/opal_list.h" #include "opal/datatype/opal_convertor.h" +#include "ompi/proc/proc.h" #include "ompi/mca/mtl/mtl.h" #include "ompi/mca/mtl/base/base.h" +#include "ompi/communicator/communicator.h" + #include "mtl_portals4_flowctl.h" BEGIN_C_DECLS @@ -38,19 +41,30 @@ struct mca_mtl_portals4_send_request_t; struct mca_mtl_portals4_module_t { mca_mtl_base_module_t base; + /* add_procs() can get called multiple times. this prevents multiple calls to portals4_init_interface(). */ + int32_t need_init; + /* Use the logical to physical table to accelerate portals4 adressing: 1 (true) : 0 (false) */ - int use_logical; + int32_t use_logical; + + /* Process_id */ + ptl_process_t ptl_process_id; + /* Use flow control: 1 (true) : 0 (false) */ + int32_t use_flowctl; + + /** Short limit; Size limit for short messages */ + uint64_t short_limit; /** Eager limit; messages greater than this use a rendezvous protocol */ - unsigned long long eager_limit; + uint64_t eager_limit; /** Size of short message blocks */ - unsigned long long recv_short_size; + uint64_t recv_short_size; /** Number of short message blocks which should be created during startup */ - int recv_short_num; + uint32_t recv_short_num; /** Length of the send event queues */ - int send_queue_size; + uint32_t send_queue_size; /** Length of the receive event queues */ - int recv_queue_size; + uint32_t recv_queue_size; /** Protocol for long message transfer */ enum { eager, rndv } protocol; @@ -59,6 +73,8 @@ struct mca_mtl_portals4_module_t { /** Network interface handle for matched interface */ ptl_handle_ni_t ni_h; + /** Limit given by portals after NIInit */ + uint64_t max_msg_size_mtl; /** Uid for current user */ ptl_uid_t uid; @@ -209,14 +225,37 @@ extern mca_mtl_portals4_module_t ompi_mtl_portals4; #define MTL_PORTALS4_IS_SYNC_MSG(hdr_data) \ (0 != (MTL_PORTALS4_SYNC_MSG & hdr_data)) +/* mtl-portals4 helpers */ +OMPI_DECLSPEC ompi_proc_t * +ompi_mtl_portals4_get_proc_group(struct ompi_group_t *group, int rank); + +static inline ptl_process_t +ompi_mtl_portals4_get_peer_group(struct ompi_group_t *group, int rank) +{ + return *((ptl_process_t*)(ompi_mtl_portals4_get_proc_group(group, rank)->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_PORTALS4])); +} + +static inline ompi_proc_t * +ompi_mtl_portals4_get_proc(struct ompi_communicator_t *comm, int rank) +{ + return ompi_mtl_portals4_get_proc_group(comm->c_remote_group, rank); +} + +static inline ptl_process_t +ompi_mtl_portals4_get_peer(struct ompi_communicator_t *comm, int rank) +{ + return *((ptl_process_t*)(ompi_mtl_portals4_get_proc(comm, rank)->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_PORTALS4])); +} + + /* MTL interface functions */ extern int ompi_mtl_portals4_finalize(struct mca_mtl_base_module_t *mtl); -extern int ompi_mtl_portals4_add_procs(struct mca_mtl_base_module_t* mtl, +extern int ompi_mtl_portals4_add_procs(struct mca_mtl_base_module_t* mtl, size_t nprocs, struct ompi_proc_t** procs); -extern int ompi_mtl_portals4_del_procs(struct mca_mtl_base_module_t* mtl, +extern int ompi_mtl_portals4_del_procs(struct mca_mtl_base_module_t* mtl, size_t nprocs, struct ompi_proc_t** procs); diff --git a/ompi/mca/mtl/portals4/mtl_portals4_cancel.c b/ompi/mca/mtl/portals4/mtl_portals4_cancel.c index 70a5110793d..1e2e5a43ddf 100644 --- a/ompi/mca/mtl/portals4/mtl_portals4_cancel.c +++ b/ompi/mca/mtl/portals4/mtl_portals4_cancel.c @@ -20,7 +20,7 @@ ompi_mtl_portals4_cancel(struct mca_mtl_base_module_t* mtl, mca_mtl_request_t *mtl_request, int flag) { - ompi_mtl_portals4_base_request_t *base_request = + ompi_mtl_portals4_base_request_t *base_request = (ompi_mtl_portals4_base_request_t*) mtl_request; int ret; @@ -31,7 +31,7 @@ ompi_mtl_portals4_cancel(struct mca_mtl_base_module_t* mtl, case portals4_req_recv: { - ompi_mtl_portals4_recv_request_t *recvreq = + ompi_mtl_portals4_recv_request_t *recvreq = (ompi_mtl_portals4_recv_request_t*) base_request; /* Cancel receive requests if not yet matched (otherwise, diff --git a/ompi/mca/mtl/portals4/mtl_portals4_component.c b/ompi/mca/mtl/portals4/mtl_portals4_component.c index 1c797d19dba..3509efa03be 100644 --- a/ompi/mca/mtl/portals4/mtl_portals4_component.c +++ b/ompi/mca/mtl/portals4/mtl_portals4_component.c @@ -39,8 +39,8 @@ static int ompi_mtl_portals4_component_register(void); static int ompi_mtl_portals4_component_open(void); static int ompi_mtl_portals4_component_close(void); static int ompi_mtl_portals4_component_query(mca_base_module_t **module, int *priority); -static mca_mtl_base_module_t* -ompi_mtl_portals4_component_init(bool enable_progress_threads, +static mca_mtl_base_module_t* +ompi_mtl_portals4_component_init(bool enable_progress_threads, bool enable_mpi_threads); OMPI_MODULE_DECLSPEC extern mca_mtl_base_component_2_0_0_t mca_mtl_portals4_component; @@ -100,6 +100,18 @@ ompi_mtl_portals4_component_register(void) OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_READONLY, ¶m_priority); + ompi_mtl_portals4.short_limit = 2 * 1024; + (void) mca_base_component_var_register(&mca_mtl_portals4_component.mtl_version, + "short_limit", + "Size limit for short messages", + MCA_BASE_VAR_TYPE_UNSIGNED_LONG_LONG, + NULL, + 0, + 0, + OPAL_INFO_LVL_5, + MCA_BASE_VAR_SCOPE_READONLY, + &ompi_mtl_portals4.short_limit); + ompi_mtl_portals4.eager_limit = 2 * 1024; (void) mca_base_component_var_register(&mca_mtl_portals4_component.mtl_version, @@ -173,6 +185,19 @@ ompi_mtl_portals4_component_register(void) OPAL_INFO_LVL_5, MCA_BASE_VAR_SCOPE_READONLY, &ompi_mtl_portals4.protocol); + + ompi_mtl_portals4.max_msg_size_mtl = PTL_SIZE_MAX; + (void) mca_base_component_var_register(&mca_mtl_portals4_component.mtl_version, + "max_msg_size", + "Max size supported by portals4 (above that, a message is cut into messages less than that size)", + MCA_BASE_VAR_TYPE_UNSIGNED_LONG, + NULL, + 0, + 0, + OPAL_INFO_LVL_5, + MCA_BASE_VAR_SCOPE_READONLY, + &ompi_mtl_portals4.max_msg_size_mtl); + OBJ_RELEASE(new_enum); if (0 > ret) { return OMPI_ERR_NOT_SUPPORTED; @@ -184,7 +209,7 @@ ompi_mtl_portals4_component_register(void) static int ompi_mtl_portals4_component_open(void) { - ompi_mtl_portals4.base.mtl_request_size = + ompi_mtl_portals4.base.mtl_request_size = sizeof(ompi_mtl_portals4_request_t) - sizeof(struct mca_mtl_request_t); @@ -197,24 +222,30 @@ ompi_mtl_portals4_component_open(void) #endif ); opal_output_verbose(1, ompi_mtl_base_framework.framework_output, - "Eager limit: %d", (int) + "Max message size: %lu", (unsigned long) + ompi_mtl_portals4.max_msg_size_mtl); + opal_output_verbose(1, ompi_mtl_base_framework.framework_output, + "Short limit: %d", (int) + ompi_mtl_portals4.short_limit); + opal_output_verbose(1, ompi_mtl_base_framework.framework_output, + "Eager limit: %d", (int) ompi_mtl_portals4.eager_limit); - opal_output_verbose(1, ompi_mtl_base_framework.framework_output, - "Short receive blocks: %d", + opal_output_verbose(1, ompi_mtl_base_framework.framework_output, + "Short receive blocks: %d", ompi_mtl_portals4.recv_short_num); - opal_output_verbose(1, ompi_mtl_base_framework.framework_output, + opal_output_verbose(1, ompi_mtl_base_framework.framework_output, "Send queue size: %d", ompi_mtl_portals4.send_queue_size); - opal_output_verbose(1, ompi_mtl_base_framework.framework_output, + opal_output_verbose(1, ompi_mtl_base_framework.framework_output, "Recv queue size: %d", ompi_mtl_portals4.recv_queue_size); - opal_output_verbose(1, ompi_mtl_base_framework.framework_output, - "Long protocol: %s", + opal_output_verbose(1, ompi_mtl_base_framework.framework_output, + "Long protocol: %s", (ompi_mtl_portals4.protocol == eager) ? "Eager" : (ompi_mtl_portals4.protocol == rndv) ? "Rendezvous" : "Other"); OBJ_CONSTRUCT(&ompi_mtl_portals4.fl_message, opal_free_list_t); opal_free_list_init(&ompi_mtl_portals4.fl_message, - sizeof(ompi_mtl_portals4_message_t) + + sizeof(ompi_mtl_portals4_message_t) + ompi_mtl_portals4.eager_limit, opal_cache_line_size, OBJ_CLASS(ompi_mtl_portals4_message_t), @@ -224,23 +255,40 @@ ompi_mtl_portals4_component_open(void) ompi_mtl_portals4.send_eq_h = PTL_INVALID_HANDLE; ompi_mtl_portals4.recv_eq_h = PTL_INVALID_HANDLE; ompi_mtl_portals4.zero_md_h = PTL_INVALID_HANDLE; - + ompi_mtl_portals4.send_md_h = PTL_INVALID_HANDLE; ompi_mtl_portals4.long_overflow_me_h = PTL_INVALID_HANDLE; ompi_mtl_portals4.recv_idx = (ptl_pt_index_t) ~0UL; ompi_mtl_portals4.read_idx = (ptl_pt_index_t) ~0UL; + ompi_mtl_portals4.need_init=1; + +#if OMPI_MTL_PORTALS4_FLOW_CONTROL + ompi_mtl_portals4.use_flowctl=1; +#else + ompi_mtl_portals4.use_flowctl=0; +#endif + return OMPI_SUCCESS; } +#define NEED_ALL_PROCS (ompi_mtl_portals4.use_logical || ompi_mtl_portals4.use_flowctl) + static int ompi_mtl_portals4_component_query(mca_base_module_t **module, int *priority) { /* * assume if portals4 MTL was compiled, the user wants it */ - + *priority = param_priority; *module = (mca_base_module_t *)&ompi_mtl_portals4.base; + + if (NEED_ALL_PROCS) { + /* let the pml know we need add_procs to be calls with all the + * procs in the job */ + ompi_mtl_portals4.base.mtl_flags |= MCA_MTL_BASE_FLAG_REQUIRE_WORLD; + } + return OMPI_SUCCESS; } @@ -260,6 +308,7 @@ ompi_mtl_portals4_component_init(bool enable_progress_threads, { int ret; ptl_process_t id; + ptl_ni_limits_t actual_limits; if (enable_mpi_threads && ompi_mpi_thread_multiple) { opal_output_verbose(1, ompi_mtl_base_framework.framework_output, @@ -281,13 +330,13 @@ ompi_mtl_portals4_component_init(bool enable_progress_threads, PTL_NI_LOGICAL | PTL_NI_MATCHING, PTL_PID_ANY, NULL, - NULL, + &actual_limits, &ompi_mtl_portals4.ni_h); else ret = PtlNIInit(PTL_IFACE_DEFAULT, PTL_NI_PHYSICAL | PTL_NI_MATCHING, PTL_PID_ANY, NULL, - NULL, + &actual_limits, &ompi_mtl_portals4.ni_h); if (PTL_OK != ret) { opal_output_verbose(1, ompi_mtl_base_framework.framework_output, @@ -296,6 +345,30 @@ ompi_mtl_portals4_component_init(bool enable_progress_threads, goto error; } + if (actual_limits.max_msg_size < ompi_mtl_portals4.max_msg_size_mtl) + ompi_mtl_portals4.max_msg_size_mtl = actual_limits.max_msg_size; + OPAL_OUTPUT_VERBOSE((10, ompi_mtl_base_framework.framework_output, + "Due to portals4 and user configuration messages will not go over the size of %lu", ompi_mtl_portals4.max_msg_size_mtl)); + + if (ompi_comm_rank(MPI_COMM_WORLD) == 0) { + opal_output_verbose(10, ompi_mtl_base_framework.framework_output, "max_entries=%d", actual_limits.max_entries); + opal_output_verbose(10, ompi_mtl_base_framework.framework_output, "max_unexpected_headers=%d", actual_limits.max_unexpected_headers); + opal_output_verbose(10, ompi_mtl_base_framework.framework_output, "max_mds=%d", actual_limits.max_mds); + opal_output_verbose(10, ompi_mtl_base_framework.framework_output, "max_eqs=%d", actual_limits.max_eqs); + opal_output_verbose(10, ompi_mtl_base_framework.framework_output, "max_cts=%d", actual_limits.max_cts); + opal_output_verbose(10, ompi_mtl_base_framework.framework_output, "max_pt_index=%d", actual_limits.max_pt_index); + opal_output_verbose(10, ompi_mtl_base_framework.framework_output, "max_iovecs=%d", actual_limits.max_iovecs); + opal_output_verbose(10, ompi_mtl_base_framework.framework_output, "max_list_size=%d", actual_limits.max_list_size); + opal_output_verbose(10, ompi_mtl_base_framework.framework_output, "max_triggered_ops=%d", actual_limits.max_triggered_ops); + opal_output_verbose(10, ompi_mtl_base_framework.framework_output, "max_msg_size=%ld", actual_limits.max_msg_size); + opal_output_verbose(10, ompi_mtl_base_framework.framework_output, "max_atomic_size=%ld", actual_limits.max_atomic_size); + opal_output_verbose(10, ompi_mtl_base_framework.framework_output, "max_fetch_atomic_size=%ld", actual_limits.max_fetch_atomic_size); + opal_output_verbose(10, ompi_mtl_base_framework.framework_output, "max_waw_ordered_size=%ld", actual_limits.max_waw_ordered_size); + opal_output_verbose(10, ompi_mtl_base_framework.framework_output, "max_war_ordered_size=%ld", actual_limits.max_war_ordered_size); + opal_output_verbose(10, ompi_mtl_base_framework.framework_output, "max_volatile_size=%ld", actual_limits.max_volatile_size); + opal_output_verbose(10, ompi_mtl_base_framework.framework_output, "features=%u", actual_limits.features); + } + ret = PtlGetUid(ompi_mtl_portals4.ni_h, &ompi_mtl_portals4.uid); if (PTL_OK != ret) { opal_output_verbose(1, ompi_mtl_base_framework.framework_output, @@ -313,7 +386,11 @@ ompi_mtl_portals4_component_init(bool enable_progress_threads, goto error; } - OPAL_MODEX_SEND(ret, PMIX_SYNC_REQD, PMIX_GLOBAL, + ompi_mtl_portals4.ptl_process_id = id; + OPAL_OUTPUT_VERBOSE((90, ompi_mtl_base_framework.framework_output, + "PtlGetPhysId rank=%x nid=%x pid=%x\n", id.rank, id.phys.nid, id.phys.pid)); + + OPAL_MODEX_SEND(ret, OPAL_PMIX_GLOBAL, &mca_mtl_portals4_component.mtl_version, &id, sizeof(id)); if (OMPI_SUCCESS != ret) { @@ -327,6 +404,7 @@ ompi_mtl_portals4_component_init(bool enable_progress_threads, "My nid,pid = %x,%x", id.phys.nid, id.phys.pid)); + ompi_mtl_portals4.base.mtl_max_tag = MTL_PORTALS4_MAX_TAG; return &ompi_mtl_portals4.base; error: @@ -471,7 +549,7 @@ ompi_mtl_portals4_progress(void) } #if OMPI_MTL_PORTALS4_FLOW_CONTROL - if (OPAL_UNLIKELY(0 == count && + if (OPAL_UNLIKELY(0 == count && 0 != opal_list_get_size(&ompi_mtl_portals4.flowctl.pending_sends))) { ompi_mtl_portals4_pending_list_progress(); } diff --git a/ompi/mca/mtl/portals4/mtl_portals4_endpoint.h b/ompi/mca/mtl/portals4/mtl_portals4_endpoint.h index 41d27246a52..2c135cc126f 100644 --- a/ompi/mca/mtl/portals4/mtl_portals4_endpoint.h +++ b/ompi/mca/mtl/portals4/mtl_portals4_endpoint.h @@ -20,9 +20,21 @@ #ifndef OMPI_MTL_PORTALS_ENDPOINT_H #define OMPI_MTL_PORTALS_ENDPOINT_H +#include "ompi/mca/mtl/portals4/mtl_portals4.h" + struct mca_mtl_base_endpoint_t { ptl_process_t ptl_proc; }; typedef struct mca_mtl_base_endpoint_t mca_mtl_base_endpoint_t; +static inline mca_mtl_base_endpoint_t * +ompi_mtl_portals4_get_endpoint (struct mca_mtl_base_module_t* mtl, ompi_proc_t *ompi_proc) +{ + if (OPAL_UNLIKELY(NULL == ompi_proc->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_PORTALS4])) { + ompi_mtl_portals4_add_procs (mtl, 1, &ompi_proc); + } + + return ompi_proc->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_PORTALS4]; +} + #endif diff --git a/ompi/mca/mtl/portals4/mtl_portals4_flowctl.c b/ompi/mca/mtl/portals4/mtl_portals4_flowctl.c index b66524e81a6..ee9d055d8ac 100644 --- a/ompi/mca/mtl/portals4/mtl_portals4_flowctl.c +++ b/ompi/mca/mtl/portals4/mtl_portals4_flowctl.c @@ -70,6 +70,13 @@ ompi_mtl_portals4_flowctl_init(void) goto error; } + if (ompi_mtl_portals4.flowctl_idx != REQ_FLOWCTL_TABLE_ID) { + opal_output_verbose(1, ompi_mtl_base_framework.framework_output, + "%s:%d: PtlPTAlloc did not allocate the requested PT: %d\n", + __FILE__, __LINE__, ompi_mtl_portals4.flowctl_idx); + goto error; + } + ret = PtlCTAlloc(ompi_mtl_portals4.ni_h, &ompi_mtl_portals4.flowctl.trigger_ct_h); if (OPAL_UNLIKELY(PTL_OK != ret)) { @@ -93,8 +100,8 @@ ompi_mtl_portals4_flowctl_init(void) } me.ignore_bits = 0; - me.options = PTL_ME_OP_PUT | - PTL_ME_ACK_DISABLE | + me.options = PTL_ME_OP_PUT | + PTL_ME_ACK_DISABLE | PTL_ME_EVENT_LINK_DISABLE | PTL_ME_EVENT_UNLINK_DISABLE | PTL_ME_EVENT_COMM_DISABLE | @@ -125,8 +132,8 @@ ompi_mtl_portals4_flowctl_init(void) __FILE__, __LINE__, ret); goto error; } - me.options = PTL_ME_OP_PUT | - PTL_ME_ACK_DISABLE | + me.options = PTL_ME_OP_PUT | + PTL_ME_ACK_DISABLE | PTL_ME_EVENT_LINK_DISABLE | PTL_ME_EVENT_UNLINK_DISABLE | PTL_ME_EVENT_CT_COMM; @@ -154,8 +161,8 @@ ompi_mtl_portals4_flowctl_init(void) __FILE__, __LINE__, ret); goto error; } - me.options = PTL_ME_OP_PUT | - PTL_ME_ACK_DISABLE | + me.options = PTL_ME_OP_PUT | + PTL_ME_ACK_DISABLE | PTL_ME_EVENT_COMM_DISABLE | PTL_ME_EVENT_LINK_DISABLE | PTL_ME_EVENT_UNLINK_DISABLE | @@ -184,8 +191,8 @@ ompi_mtl_portals4_flowctl_init(void) __FILE__, __LINE__, ret); goto error; } - me.options = PTL_ME_OP_PUT | - PTL_ME_ACK_DISABLE | + me.options = PTL_ME_OP_PUT | + PTL_ME_ACK_DISABLE | PTL_ME_EVENT_LINK_DISABLE | PTL_ME_EVENT_UNLINK_DISABLE | PTL_ME_EVENT_CT_COMM; @@ -219,15 +226,16 @@ ompi_mtl_portals4_flowctl_init(void) int ompi_mtl_portals4_flowctl_fini(void) { - PtlPTFree(ompi_mtl_portals4.ni_h, ompi_mtl_portals4.flowctl_idx); - PtlCTFree(ompi_mtl_portals4.flowctl.trigger_ct_h); - PtlMEUnlink(ompi_mtl_portals4.flowctl.trigger_me_h); - PtlCTFree(ompi_mtl_portals4.flowctl.alert_ct_h); - PtlMEUnlink(ompi_mtl_portals4.flowctl.alert_me_h); - PtlCTFree(ompi_mtl_portals4.flowctl.fanin_ct_h); - PtlMEUnlink(ompi_mtl_portals4.flowctl.fanin_me_h); - PtlCTFree(ompi_mtl_portals4.flowctl.fanout_ct_h); - PtlMEUnlink(ompi_mtl_portals4.flowctl.fanout_me_h); + PtlMEUnlink(ompi_mtl_portals4.flowctl.trigger_me_h); + PtlCTFree(ompi_mtl_portals4.flowctl.trigger_ct_h); + PtlMEUnlink(ompi_mtl_portals4.flowctl.alert_me_h); + PtlCTFree(ompi_mtl_portals4.flowctl.alert_ct_h); + PtlMEUnlink(ompi_mtl_portals4.flowctl.fanin_me_h); + PtlCTFree(ompi_mtl_portals4.flowctl.fanin_ct_h); + PtlMEUnlink(ompi_mtl_portals4.flowctl.fanout_me_h); + PtlCTFree(ompi_mtl_portals4.flowctl.fanout_ct_h); + + PtlPTFree(ompi_mtl_portals4.ni_h, ompi_mtl_portals4.flowctl_idx); return OMPI_SUCCESS; } @@ -263,7 +271,7 @@ ompi_mtl_portals4_flowctl_add_procs(size_t me, ompi_mtl_portals4.flowctl.root = *((ptl_process_t*) procs[0]->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_PORTALS4]); if (false == ompi_mtl_portals4.flowctl.i_am_root) { - ompi_mtl_portals4.flowctl.parent = + ompi_mtl_portals4.flowctl.parent = *((ptl_process_t*) procs[(me - 1) / 2]->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_PORTALS4]); } ompi_mtl_portals4.flowctl.me = @@ -276,7 +284,7 @@ ompi_mtl_portals4_flowctl_add_procs(size_t me, ompi_mtl_portals4.flowctl.num_children++; if (ompi_mtl_portals4.use_logical) ompi_mtl_portals4.flowctl.children[i].rank = tmp; - else ompi_mtl_portals4.flowctl.children[i] = + else ompi_mtl_portals4.flowctl.children[i] = *((ptl_process_t*) procs[tmp]->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_PORTALS4]); } } @@ -290,9 +298,7 @@ ompi_mtl_portals4_flowctl_trigger(void) { int ret; - if (false == ompi_mtl_portals4.flowctl.flowctl_active) { - ompi_mtl_portals4.flowctl.flowctl_active = true; - + if (true == OPAL_ATOMIC_CMPSET_32(&ompi_mtl_portals4.flowctl.flowctl_active, false, true)) { /* send trigger to root */ ret = PtlPut(ompi_mtl_portals4.zero_md_h, 0, @@ -319,9 +325,9 @@ ompi_mtl_portals4_flowctl_trigger(void) static int seqnum_compare(opal_list_item_t **ap, opal_list_item_t **bp) { - ompi_mtl_portals4_pending_request_t *a = + ompi_mtl_portals4_pending_request_t *a = (ompi_mtl_portals4_pending_request_t*) *ap; - ompi_mtl_portals4_pending_request_t *b = + ompi_mtl_portals4_pending_request_t *b = (ompi_mtl_portals4_pending_request_t*) *bp; if (a->ptl_request->opcount > b->ptl_request->opcount) { @@ -365,7 +371,7 @@ start_recover(void) } /* drain all pending sends */ - while (ompi_mtl_portals4.flowctl.send_slots != + while (ompi_mtl_portals4.flowctl.send_slots != ompi_mtl_portals4.flowctl.max_send_slots) { opal_progress(); } @@ -473,7 +479,7 @@ setup_alarm(uint32_t epoch) goto cleanup; } } - + cleanup: return ret; } @@ -490,7 +496,7 @@ setup_barrier(uint32_t epoch) ct.success = ompi_mtl_portals4.flowctl.epoch_counter * ompi_mtl_portals4.flowctl.num_procs; ct.failure = 0; - ret = PtlTriggeredCTSet(ompi_mtl_portals4.flowctl.trigger_ct_h, + ret = PtlTriggeredCTSet(ompi_mtl_portals4.flowctl.trigger_ct_h, ct, ompi_mtl_portals4.flowctl.fanin_ct_h, epoch * (ompi_mtl_portals4.flowctl.num_children + 1)); @@ -591,15 +597,15 @@ flowctl_fanout_callback(ptl_event_t *ev, } gettimeofday(&tv, NULL); - if (((tv.tv_sec * 1000000 + tv.tv_usec) - - (ompi_mtl_portals4.flowctl.tv.tv_sec * 1000000 + ompi_mtl_portals4.flowctl.tv.tv_usec)) + if (((tv.tv_sec * 1000000 + tv.tv_usec) - + (ompi_mtl_portals4.flowctl.tv.tv_sec * 1000000 + ompi_mtl_portals4.flowctl.tv.tv_usec)) < 1000000 * ompi_mtl_portals4.flowctl.backoff_count) { usleep(++ompi_mtl_portals4.flowctl.backoff_count); } else { ompi_mtl_portals4.flowctl.backoff_count = 0; } ompi_mtl_portals4.flowctl.tv = tv; - + ompi_mtl_portals4_pending_list_progress(); OPAL_OUTPUT_VERBOSE((50, ompi_mtl_base_framework.framework_output, diff --git a/ompi/mca/mtl/portals4/mtl_portals4_flowctl.h b/ompi/mca/mtl/portals4/mtl_portals4_flowctl.h index 102659a8a23..7cc634b669b 100644 --- a/ompi/mca/mtl/portals4/mtl_portals4_flowctl.h +++ b/ompi/mca/mtl/portals4/mtl_portals4_flowctl.h @@ -34,7 +34,7 @@ OBJ_CLASS_DECLARATION(ompi_mtl_portals4_pending_request_t); struct ompi_mtl_portals4_flowctl_t { - bool flowctl_active; + int32_t flowctl_active; int32_t send_slots; int32_t max_send_slots; diff --git a/ompi/mca/mtl/portals4/mtl_portals4_probe.c b/ompi/mca/mtl/portals4/mtl_portals4_probe.c index ee761237c2a..5f2a991cfe7 100644 --- a/ompi/mca/mtl/portals4/mtl_portals4_probe.c +++ b/ompi/mca/mtl/portals4/mtl_portals4_probe.c @@ -22,16 +22,17 @@ #include "ompi/message/message.h" #include "mtl_portals4.h" +#include "mtl_portals4_endpoint.h" #include "mtl_portals4_request.h" #include "mtl_portals4_message.h" static int completion_fn(ptl_event_t *ev, ompi_mtl_portals4_base_request_t *ptl_base_request) { - ompi_mtl_portals4_probe_request_t *ptl_request = + ompi_mtl_portals4_probe_request_t *ptl_request = (ompi_mtl_portals4_probe_request_t*) ptl_base_request; - opal_output_verbose(1, ompi_mtl_base_framework.framework_output, + opal_output_verbose(10, ompi_mtl_base_framework.framework_output, "%s:%d: completion_fn: %d %d", __FILE__, __LINE__, ev->type, ev->ni_fail_type); @@ -78,7 +79,7 @@ ompi_mtl_portals4_iprobe(struct mca_mtl_base_module_t* mtl, remote_proc.rank = src; } else { ompi_proc_t* ompi_proc = ompi_comm_peer_lookup( comm, src ); - remote_proc = *((ptl_process_t*) ompi_proc->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_PORTALS4]); + remote_proc = *((ptl_process_t*) ompi_mtl_portals4_get_endpoint (mtl, ompi_proc)); } MTL_PORTALS4_SET_RECV_BITS(match_bits, ignore_bits, comm->c_contextid, @@ -156,7 +157,7 @@ ompi_mtl_portals4_improbe(struct mca_mtl_base_module_t *mtl, remote_proc.rank = src; } else { ompi_proc_t* ompi_proc = ompi_comm_peer_lookup( comm, src ); - remote_proc = *((ptl_process_t*) ompi_proc->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_PORTALS4]); + remote_proc = *((ptl_process_t*) ompi_mtl_portals4_get_endpoint (mtl, ompi_proc)); } MTL_PORTALS4_SET_RECV_BITS(match_bits, ignore_bits, comm->c_contextid, diff --git a/ompi/mca/mtl/portals4/mtl_portals4_recv.c b/ompi/mca/mtl/portals4/mtl_portals4_recv.c index b3096974be0..387aa53be02 100644 --- a/ompi/mca/mtl/portals4/mtl_portals4_recv.c +++ b/ompi/mca/mtl/portals4/mtl_portals4_recv.c @@ -29,6 +29,7 @@ #include "ompi/message/message.h" #include "mtl_portals4.h" +#include "mtl_portals4_endpoint.h" #include "mtl_portals4_request.h" #include "mtl_portals4_recv_short.h" #include "mtl_portals4_message.h" @@ -38,25 +39,9 @@ read_msg(void *start, ptl_size_t length, ptl_process_t target, ptl_match_bits_t match_bits, ptl_size_t remote_offset, ompi_mtl_portals4_recv_request_t *request) { - ptl_md_t md; - int ret; - - /* FIX ME: This needs to be on the send eq... */ - md.start = start; - md.length = length; - md.options = 0; - md.eq_handle = ompi_mtl_portals4.send_eq_h; - md.ct_handle = PTL_CT_NONE; - - ret = PtlMDBind(ompi_mtl_portals4.ni_h, - &md, - &request->md_h); - if (OPAL_UNLIKELY(PTL_OK != ret)) { - opal_output_verbose(1, ompi_mtl_base_framework.framework_output, - "%s:%d: PtlMDBind failed: %d", - __FILE__, __LINE__, ret); - return OMPI_ERR_OUT_OF_RESOURCE; - } + int ret, i; + ptl_size_t rest = length, asked = 0, frag_size; + int32_t pending_reply; #if OMPI_MTL_PORTALS4_FLOW_CONTROL while (OPAL_UNLIKELY(OPAL_THREAD_ADD32(&ompi_mtl_portals4.flowctl.send_slots, -1) < 0)) { @@ -65,20 +50,29 @@ read_msg(void *start, ptl_size_t length, ptl_process_t target, } #endif - ret = PtlGet(request->md_h, - 0, - md.length, - target, - ompi_mtl_portals4.read_idx, - match_bits, - remote_offset, - request); - if (OPAL_UNLIKELY(PTL_OK != ret)) { - opal_output_verbose(1, ompi_mtl_base_framework.framework_output, - "%s:%d: PtlGet failed: %d", - __FILE__, __LINE__, ret); - PtlMDRelease(request->md_h); - return OMPI_ERR_OUT_OF_RESOURCE; + request->pending_reply = (length + ompi_mtl_portals4.max_msg_size_mtl - 1) / ompi_mtl_portals4.max_msg_size_mtl; + pending_reply = request->pending_reply; + + for (i = 0 ; i < pending_reply ; i++) { + OPAL_OUTPUT_VERBOSE((90, ompi_mtl_base_framework.framework_output, "GET (fragment %d/%d) send", + i + 1, pending_reply)); + frag_size = (OPAL_UNLIKELY(rest > ompi_mtl_portals4.max_msg_size_mtl)) ? ompi_mtl_portals4.max_msg_size_mtl : rest; + ret = PtlGet(ompi_mtl_portals4.send_md_h, + (ptl_size_t) start + i * ompi_mtl_portals4.max_msg_size_mtl, + frag_size, + target, + ompi_mtl_portals4.read_idx, + match_bits, + remote_offset + i * ompi_mtl_portals4.max_msg_size_mtl, + request); + if (OPAL_UNLIKELY(PTL_OK != ret)) { + opal_output_verbose(1, ompi_mtl_base_framework.framework_output, + "%s:%d: PtlGet failed: %d", + __FILE__, __LINE__, ret); + return OMPI_ERR_OUT_OF_RESOURCE; + } + rest -= frag_size; + asked += frag_size; } return OMPI_SUCCESS; @@ -91,7 +85,7 @@ ompi_mtl_portals4_recv_progress(ptl_event_t *ev, ompi_mtl_portals4_base_request_t* ptl_base_request) { int ret; - ompi_mtl_portals4_recv_request_t* ptl_request = + ompi_mtl_portals4_recv_request_t* ptl_request = (ompi_mtl_portals4_recv_request_t*) ptl_base_request; size_t msg_length = 0; @@ -109,6 +103,7 @@ ompi_mtl_portals4_recv_progress(ptl_event_t *ev, opal_output_verbose(1, ompi_mtl_base_framework.framework_output, "%s:%d: PTL_EVENT_PUT with ni_fail_type: %d", __FILE__, __LINE__, ev->ni_fail_type); + ret = PTL_FAIL; goto callback_error; } @@ -117,35 +112,39 @@ ompi_mtl_portals4_recv_progress(ptl_event_t *ev, msg_length = MTL_PORTALS4_GET_LENGTH(ev->hdr_data); ptl_request->super.super.ompi_req->req_status.MPI_SOURCE = MTL_PORTALS4_GET_SOURCE(ev->match_bits); - ptl_request->super.super.ompi_req->req_status.MPI_TAG = + ptl_request->super.super.ompi_req->req_status.MPI_TAG = MTL_PORTALS4_GET_TAG(ev->match_bits); if (OPAL_UNLIKELY(msg_length > ptl_request->delivery_len)) { opal_output_verbose(1, ompi_mtl_base_framework.framework_output, - "truncate expected: %ld %ld", + "truncate expected: %ld %ld", msg_length, ptl_request->delivery_len); ptl_request->super.super.ompi_req->req_status.MPI_ERROR = MPI_ERR_TRUNCATE; } + if (ev->mlength < msg_length) + OPAL_OUTPUT_VERBOSE((90, ompi_mtl_base_framework.framework_output, "Truncated message, some PtlGet are required (protocol = %d)", + ompi_mtl_portals4.protocol)); + #if OPAL_ENABLE_DEBUG ptl_request->hdr_data = ev->hdr_data; #endif - if (!MTL_PORTALS4_IS_SHORT_MSG(ev->match_bits) && ompi_mtl_portals4.protocol == rndv) { - /* If it's not a short message and we're doing rndv, we + ptl_request->super.super.ompi_req->req_status._ucount = ev->mlength; + if (!MTL_PORTALS4_IS_SHORT_MSG(ev->match_bits) && msg_length > ev->mlength) { + /* If it's not a short message and we're doing rndv and the message is not complete, we only have the first part of the message. Issue the get to pull the second part of the message. */ - ret = read_msg((char*) ptl_request->delivery_ptr + ompi_mtl_portals4.eager_limit, + ret = read_msg((char*) ptl_request->delivery_ptr + ev->mlength, ((msg_length > ptl_request->delivery_len) ? - ptl_request->delivery_len : msg_length) - ompi_mtl_portals4.eager_limit, + ptl_request->delivery_len : msg_length) - ev->mlength, ev->initiator, ev->hdr_data, - ompi_mtl_portals4.eager_limit, + ev->mlength, ptl_request); if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) { if (NULL != ptl_request->buffer_ptr) free(ptl_request->buffer_ptr); goto callback_error; } - } else { /* If we're either using the eager protocol or were a short message, all data has been received, so complete @@ -159,8 +158,6 @@ ompi_mtl_portals4_recv_progress(ptl_event_t *ev, __FILE__, __LINE__, ret); ptl_request->super.super.ompi_req->req_status.MPI_ERROR = ret; } - ptl_request->super.super.ompi_req->req_status._ucount = ev->mlength; - OPAL_OUTPUT_VERBOSE((50, ompi_mtl_base_framework.framework_output, "Recv %lu (0x%lx) completed, expected", ptl_request->opcount, ptl_request->hdr_data)); @@ -177,17 +174,19 @@ ompi_mtl_portals4_recv_progress(ptl_event_t *ev, opal_output_verbose(1, ompi_mtl_base_framework.framework_output, "%s:%d: PTL_EVENT_REPLY with ni_fail_type: %d", __FILE__, __LINE__, ev->ni_fail_type); - PtlMDRelease(ptl_request->md_h); + ret = PTL_FAIL; goto callback_error; } /* set the received length in the status, now that we know - excatly how much data was sent. */ - ptl_request->super.super.ompi_req->req_status._ucount = ev->mlength; - if (ompi_mtl_portals4.protocol == rndv) { - ptl_request->super.super.ompi_req->req_status._ucount += - ompi_mtl_portals4.eager_limit; + exactly how much data was sent. */ + ptl_request->super.super.ompi_req->req_status._ucount += ev->mlength; + + ret = OPAL_THREAD_ADD32(&(ptl_request->pending_reply), -1); + if (ret > 0) { + return OMPI_SUCCESS; } + assert(ptl_request->pending_reply == 0); #if OMPI_MTL_PORTALS4_FLOW_CONTROL OPAL_THREAD_ADD32(&ompi_mtl_portals4.flowctl.send_slots, 1); @@ -198,8 +197,8 @@ ompi_mtl_portals4_recv_progress(ptl_event_t *ev, three protocols. mlength is only correct for eager, and delivery_len is the length of the buffer, not the length of the send. */ - ret = ompi_mtl_datatype_unpack(ptl_request->convertor, - ptl_request->delivery_ptr, + ret = ompi_mtl_datatype_unpack(ptl_request->convertor, + ptl_request->delivery_ptr, ptl_request->super.super.ompi_req->req_status._ucount); if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) { opal_output_verbose(1, ompi_mtl_base_framework.framework_output, @@ -207,16 +206,15 @@ ompi_mtl_portals4_recv_progress(ptl_event_t *ev, __FILE__, __LINE__, ret); ptl_request->super.super.ompi_req->req_status.MPI_ERROR = ret; } - PtlMDRelease(ptl_request->md_h); - OPAL_OUTPUT_VERBOSE((50, ompi_mtl_base_framework.framework_output, - "Recv %lu (0x%lx) completed, reply", - ptl_request->opcount, ptl_request->hdr_data)); + OPAL_OUTPUT_VERBOSE((50, ompi_mtl_base_framework.framework_output, + "Recv %lu (0x%lx) completed , reply (pending_reply: %d)", + ptl_request->opcount, ptl_request->hdr_data, ptl_request->pending_reply)); ptl_request->super.super.completion_callback(&ptl_request->super.super); break; case PTL_EVENT_PUT_OVERFLOW: - OPAL_OUTPUT_VERBOSE((50, ompi_mtl_base_framework.framework_output, + OPAL_OUTPUT_VERBOSE((50, ompi_mtl_base_framework.framework_output, "Recv %lu (0x%lx) got put_overflow event", ptl_request->opcount, ev->hdr_data)); @@ -224,6 +222,7 @@ ompi_mtl_portals4_recv_progress(ptl_event_t *ev, opal_output_verbose(1, ompi_mtl_base_framework.framework_output, "%s:%d: PTL_EVENT_PUT_OVERFLOW with ni_fail_type: %d", __FILE__, __LINE__, ev->ni_fail_type); + ret = PTL_FAIL; goto callback_error; } @@ -232,11 +231,11 @@ ompi_mtl_portals4_recv_progress(ptl_event_t *ev, msg_length = MTL_PORTALS4_GET_LENGTH(ev->hdr_data); ptl_request->super.super.ompi_req->req_status.MPI_SOURCE = MTL_PORTALS4_GET_SOURCE(ev->match_bits); - ptl_request->super.super.ompi_req->req_status.MPI_TAG = + ptl_request->super.super.ompi_req->req_status.MPI_TAG = MTL_PORTALS4_GET_TAG(ev->match_bits); if (OPAL_UNLIKELY(msg_length > ptl_request->delivery_len)) { opal_output_verbose(1, ompi_mtl_base_framework.framework_output, - "truncate unexpected: %ld %ld %d", + "truncate unexpected: %ld %ld %d", msg_length, ptl_request->delivery_len, MTL_PORTALS4_IS_SHORT_MSG(ev->match_bits)); ptl_request->super.super.ompi_req->req_status.MPI_ERROR = MPI_ERR_TRUNCATE; @@ -258,7 +257,7 @@ ompi_mtl_portals4_recv_progress(ptl_event_t *ev, iov.iov_len = ev->mlength; max_data = iov.iov_len; - ret = opal_convertor_unpack(ptl_request->convertor, + ret = opal_convertor_unpack(ptl_request->convertor, &iov, &iov_count, &max_data ); if (NULL != ptl_request->buffer_ptr) free(ptl_request->buffer_ptr); @@ -271,7 +270,7 @@ ompi_mtl_portals4_recv_progress(ptl_event_t *ev, } /* if it's a sync, send the ack */ if (MTL_PORTALS4_IS_SYNC_MSG(ev->hdr_data)) { - OPAL_OUTPUT_VERBOSE((50, ompi_mtl_base_framework.framework_output, + OPAL_OUTPUT_VERBOSE((50, ompi_mtl_base_framework.framework_output, "Recv %lu (0x%lx) sending sync ack", ptl_request->opcount, ptl_request->hdr_data)); ret = PtlPut(ompi_mtl_portals4.zero_md_h, @@ -292,23 +291,22 @@ ompi_mtl_portals4_recv_progress(ptl_event_t *ev, } } - OPAL_OUTPUT_VERBOSE((50, ompi_mtl_base_framework.framework_output, + OPAL_OUTPUT_VERBOSE((50, ompi_mtl_base_framework.framework_output, "Recv %lu (0x%lx) completed, unexpected short (0x%lx)", ptl_request->opcount, ptl_request->hdr_data, (long) ev->start)); ptl_request->super.super.completion_callback(&ptl_request->super.super); } else { - if (ev->mlength > 0) { - /* if rndv or triggered, copy the eager part to the right place */ - memcpy(ptl_request->delivery_ptr, ev->start, ev->mlength); - } - ret = read_msg((char*) ptl_request->delivery_ptr + ev->mlength, - ((msg_length > ptl_request->delivery_len) ? - ptl_request->delivery_len : msg_length) - ev->mlength, + /* For long messages in the overflow list, ev->mlength = 0 */ + ptl_request->super.super.ompi_req->req_status._ucount = 0; + + ret = read_msg((char*) ptl_request->delivery_ptr, + (msg_length > ptl_request->delivery_len) ? + ptl_request->delivery_len : msg_length, ev->initiator, ev->hdr_data, - ev->mlength, + 0, ptl_request); if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) { if (NULL != ptl_request->buffer_ptr) free(ptl_request->buffer_ptr); @@ -331,7 +329,7 @@ ompi_mtl_portals4_recv_progress(ptl_event_t *ev, return OMPI_SUCCESS; callback_error: - ptl_request->super.super.ompi_req->req_status.MPI_ERROR = + ptl_request->super.super.ompi_req->req_status.MPI_ERROR = ompi_mtl_portals4_get_error(ret); ptl_request->super.super.completion_callback(&ptl_request->super.super); return OMPI_SUCCESS; @@ -349,7 +347,7 @@ ompi_mtl_portals4_irecv(struct mca_mtl_base_module_t* mtl, ptl_match_bits_t match_bits, ignore_bits; int ret = OMPI_SUCCESS; ptl_process_t remote_proc; - ompi_mtl_portals4_recv_request_t *ptl_request = + ompi_mtl_portals4_recv_request_t *ptl_request = (ompi_mtl_portals4_recv_request_t*) mtl_request; void *start; size_t length; @@ -367,7 +365,7 @@ ompi_mtl_portals4_irecv(struct mca_mtl_base_module_t* mtl, remote_proc.rank = src; } else { ompi_proc_t* ompi_proc = ompi_comm_peer_lookup( comm, src ); - remote_proc = *((ptl_process_t*) ompi_proc->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_PORTALS4]); + remote_proc = *((ptl_process_t*) ompi_mtl_portals4_get_endpoint (mtl, ompi_proc)); } MTL_PORTALS4_SET_RECV_BITS(match_bits, ignore_bits, comm->c_contextid, @@ -390,11 +388,12 @@ ompi_mtl_portals4_irecv(struct mca_mtl_base_module_t* mtl, ptl_request->delivery_len = length; ptl_request->req_started = false; ptl_request->super.super.ompi_req->req_status.MPI_ERROR = OMPI_SUCCESS; + ptl_request->pending_reply = 0; OPAL_OUTPUT_VERBOSE((50, ompi_mtl_base_framework.framework_output, "Recv %lu from %x,%x of length %ld (0x%lx, 0x%lx, 0x%lx)\n", ptl_request->opcount, - remote_proc.phys.nid, remote_proc.phys.pid, + remote_proc.phys.nid, remote_proc.phys.pid, (int64_t)length, match_bits, ignore_bits, (unsigned long) ptl_request)); me.start = start; @@ -402,11 +401,11 @@ ompi_mtl_portals4_irecv(struct mca_mtl_base_module_t* mtl, me.ct_handle = PTL_CT_NONE; me.min_free = 0; me.uid = ompi_mtl_portals4.uid; - me.options = - PTL_ME_OP_PUT | - PTL_ME_USE_ONCE | + me.options = + PTL_ME_OP_PUT | + PTL_ME_USE_ONCE | PTL_ME_EVENT_UNLINK_DISABLE; - if (length <= ompi_mtl_portals4.eager_limit) { + if (length <= ompi_mtl_portals4.short_limit) { me.options |= PTL_ME_EVENT_LINK_DISABLE; } me.match_id = remote_proc; @@ -430,13 +429,13 @@ ompi_mtl_portals4_irecv(struct mca_mtl_base_module_t* mtl, /* if a long message, spin until we either have a comm event or a link event, guaranteeing progress for long unexpected messages. */ - if (length > ompi_mtl_portals4.eager_limit) { + if (length > ompi_mtl_portals4.short_limit) { while (true != ptl_request->req_started) { ompi_mtl_portals4_progress(); } } - return OMPI_SUCCESS; + return OMPI_SUCCESS; } @@ -446,13 +445,13 @@ ompi_mtl_portals4_imrecv(struct mca_mtl_base_module_t* mtl, struct ompi_message_t **message, struct mca_mtl_request_t *mtl_request) { - ompi_mtl_portals4_recv_request_t *ptl_request = + ompi_mtl_portals4_recv_request_t *ptl_request = (ompi_mtl_portals4_recv_request_t*) mtl_request; void *start; size_t length; bool free_after; int ret; - ompi_mtl_portals4_message_t *ptl_message = + ompi_mtl_portals4_message_t *ptl_message = (ompi_mtl_portals4_message_t*) (*message)->req_ptr; ret = ompi_mtl_datatype_recv_buf(convertor, &start, &length, &free_after); @@ -471,6 +470,7 @@ ompi_mtl_portals4_imrecv(struct mca_mtl_base_module_t* mtl, ptl_request->delivery_ptr = start; ptl_request->delivery_len = length; ptl_request->super.super.ompi_req->req_status.MPI_ERROR = OMPI_SUCCESS; + ptl_request->pending_reply = 0; OPAL_OUTPUT_VERBOSE((50, ompi_mtl_base_framework.framework_output, "Mrecv %lu of length %ld (0x%lx)\n", diff --git a/ompi/mca/mtl/portals4/mtl_portals4_recv_short.c b/ompi/mca/mtl/portals4/mtl_portals4_recv_short.c index b6b28691586..7f7223e7797 100644 --- a/ompi/mca/mtl/portals4/mtl_portals4_recv_short.c +++ b/ompi/mca/mtl/portals4/mtl_portals4_recv_short.c @@ -5,15 +5,15 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2010 Sandia National Laboratories. All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -37,7 +37,8 @@ static int ompi_mtl_portals4_recv_block_progress(ptl_event_t *ev, ompi_mtl_portals4_base_request_t* ptl_base_request) { - ompi_mtl_portals4_recv_short_request_t *ptl_request = + int ret = OMPI_SUCCESS; + ompi_mtl_portals4_recv_short_request_t *ptl_request = (ompi_mtl_portals4_recv_short_request_t*) ptl_base_request; ompi_mtl_portals4_recv_short_block_t *block = ptl_request->block; @@ -59,10 +60,10 @@ ompi_mtl_portals4_recv_block_progress(ptl_event_t *ev, opal_list_remove_item(&ompi_mtl_portals4.recv_short_blocks, &block->base); OPAL_THREAD_UNLOCK(&ompi_mtl_portals4.short_block_mutex); - ompi_mtl_portals4_recv_short_block_free(block); + ret = ompi_mtl_portals4_recv_short_block_free(block); } else { OPAL_THREAD_UNLOCK(&ompi_mtl_portals4.short_block_mutex); - ompi_mtl_portals4_activate_block(block); + ret = ompi_mtl_portals4_activate_block(block); } break; @@ -85,6 +86,7 @@ ompi_mtl_portals4_recv_block_progress(ptl_event_t *ev, break; case PTL_EVENT_AUTO_UNLINK: + block->me_h = PTL_INVALID_HANDLE; #if OMPI_ENABLE_THREAD_MULTIPLE OPAL_THREAD_LOCK(&ompi_mtl_portals4.short_block_mutex); switch (block->status) { @@ -99,12 +101,12 @@ ompi_mtl_portals4_recv_block_progress(ptl_event_t *ev, opal_list_remove_item(&ompi_mtl_portals4.recv_short_blocks, &block->base); OPAL_THREAD_UNLOCK(&ompi_mtl_portals4.short_block_mutex); - ompi_mtl_portals4_recv_short_block_free(block); + ret = ompi_mtl_portals4_recv_short_block_free(block); } else { OPAL_THREAD_UNLOCK(&ompi_mtl_portals4.short_block_mutex); OPAL_OUTPUT_VERBOSE((10, ompi_mtl_base_framework.framework_output, "mtl:portals4 PTL_EVENT_AUTO_UNLINK received after PTL_EVENT_AUTO_FREE")); - ompi_mtl_portals4_activate_block(block); + ret = ompi_mtl_portals4_activate_block(block); } break; @@ -150,11 +152,11 @@ ompi_mtl_portals4_recv_block_progress(ptl_event_t *ev, break; } - return OMPI_SUCCESS; + return ret; } -static ompi_mtl_portals4_recv_short_block_t* +static ompi_mtl_portals4_recv_short_block_t* ompi_mtl_portals4_recv_short_block_alloc(bool release_on_free) { ompi_mtl_portals4_recv_short_block_t *block; @@ -206,11 +208,12 @@ ompi_mtl_portals4_activate_block(ompi_mtl_portals4_recv_short_block_t *block) me.start = block->start; me.length = ompi_mtl_portals4.recv_short_size; me.ct_handle = PTL_CT_NONE; - me.min_free = ompi_mtl_portals4.eager_limit; + me.min_free = ompi_mtl_portals4.short_limit; me.uid = ompi_mtl_portals4.uid; - me.options = - PTL_ME_OP_PUT | - PTL_ME_MANAGE_LOCAL | + me.options = + PTL_ME_OP_PUT | + PTL_ME_EVENT_COMM_DISABLE | + PTL_ME_MANAGE_LOCAL | PTL_ME_MAY_ALIGN; if (ompi_mtl_portals4.use_logical) { me.match_id.rank = PTL_RANK_ANY; @@ -244,24 +247,25 @@ ompi_mtl_portals4_activate_block(ompi_mtl_portals4_recv_short_block_t *block) int ompi_mtl_portals4_recv_short_init(void) { - int i; + int ret = OMPI_SUCCESS; + uint32_t i; OBJ_CONSTRUCT(&ompi_mtl_portals4.short_block_mutex, opal_mutex_t); OBJ_CONSTRUCT(&(ompi_mtl_portals4.recv_short_blocks), opal_list_t); /* create the recv blocks */ for (i = 0 ; i < ompi_mtl_portals4.recv_short_num ; ++i) { - ompi_mtl_portals4_recv_short_block_t *block = + ompi_mtl_portals4_recv_short_block_t *block = ompi_mtl_portals4_recv_short_block_alloc(false); if (OPAL_UNLIKELY(NULL == block)) { return OMPI_ERR_OUT_OF_RESOURCE; } opal_list_append(&ompi_mtl_portals4.recv_short_blocks, &block->base); - ompi_mtl_portals4_activate_block(block); + ret = ompi_mtl_portals4_activate_block(block); } - return OMPI_SUCCESS; + return ret; } @@ -269,35 +273,45 @@ int ompi_mtl_portals4_recv_short_fini(void) { opal_list_item_t *item; + int ret = OMPI_SUCCESS; OPAL_THREAD_LOCK(&ompi_mtl_portals4.short_block_mutex); while (NULL != (item = opal_list_remove_first(&ompi_mtl_portals4.recv_short_blocks))) { - ompi_mtl_portals4_recv_short_block_t *block = + ompi_mtl_portals4_recv_short_block_t *block = (ompi_mtl_portals4_recv_short_block_t*) item; - ompi_mtl_portals4_recv_short_block_free(block); + ret = ompi_mtl_portals4_recv_short_block_free(block); + ompi_mtl_portals4.active_recv_short_blocks--; } OPAL_THREAD_UNLOCK(&ompi_mtl_portals4.short_block_mutex); - return OMPI_SUCCESS; + return ret; } int ompi_mtl_portals4_recv_short_link(int count) { + int ret = OMPI_SUCCESS; int active = ompi_mtl_portals4.active_recv_short_blocks; int i; if (active < count) { for (i = 0 ; i < (count - active) ; ++i) { - ompi_mtl_portals4_recv_short_block_t *block = - ompi_mtl_portals4_recv_short_block_alloc(false); + ompi_mtl_portals4_recv_short_block_t *block = + ompi_mtl_portals4_recv_short_block_alloc(true); if (NULL == block) { return OMPI_ERR_OUT_OF_RESOURCE; } - ompi_mtl_portals4_activate_block(block); + OPAL_THREAD_LOCK(&ompi_mtl_portals4.short_block_mutex); + opal_list_append(&ompi_mtl_portals4.recv_short_blocks, + &block->base); + OPAL_OUTPUT_VERBOSE((10, ompi_mtl_base_framework.framework_output, + "recv_short_link: total=%d active=%d", + (int) opal_list_get_size(&ompi_mtl_portals4.recv_short_blocks), ompi_mtl_portals4.active_recv_short_blocks)); + OPAL_THREAD_UNLOCK(&ompi_mtl_portals4.short_block_mutex); + ret = ompi_mtl_portals4_activate_block(block); } } - - return OMPI_SUCCESS; + + return ret; } diff --git a/ompi/mca/mtl/portals4/mtl_portals4_recv_short.h b/ompi/mca/mtl/portals4/mtl_portals4_recv_short.h index 0c5c08e5eba..5aab083bedb 100644 --- a/ompi/mca/mtl/portals4/mtl_portals4_recv_short.h +++ b/ompi/mca/mtl/portals4/mtl_portals4_recv_short.h @@ -5,15 +5,15 @@ * Copyright (c) 2004-2006 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2010 Sandia National Laboratories. All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ diff --git a/ompi/mca/mtl/portals4/mtl_portals4_request.h b/ompi/mca/mtl/portals4/mtl_portals4_request.h index 7a90ff46537..e187bce765e 100644 --- a/ompi/mca/mtl/portals4/mtl_portals4_request.h +++ b/ompi/mca/mtl/portals4/mtl_portals4_request.h @@ -52,6 +52,8 @@ struct ompi_mtl_portals4_isend_request_t { #if OMPI_MTL_PORTALS4_FLOW_CONTROL struct ompi_mtl_portals4_pending_request_t *pending; #endif + ptl_size_t length; + int32_t pending_get; uint32_t event_count; }; typedef struct ompi_mtl_portals4_isend_request_t ompi_mtl_portals4_isend_request_t; @@ -68,12 +70,12 @@ typedef struct ompi_mtl_portals4_send_request_t ompi_mtl_portals4_send_request_t struct ompi_mtl_portals4_recv_request_t { ompi_mtl_portals4_base_request_t super; void *buffer_ptr; - ptl_handle_md_t md_h; ptl_handle_me_t me_h; struct opal_convertor_t *convertor; void *delivery_ptr; size_t delivery_len; volatile bool req_started; + int32_t pending_reply; #if OPAL_ENABLE_DEBUG uint64_t opcount; ptl_hdr_data_t hdr_data; diff --git a/ompi/mca/mtl/portals4/mtl_portals4_send.c b/ompi/mca/mtl/portals4/mtl_portals4_send.c index 7d6ac1f2388..6393b9a465b 100644 --- a/ompi/mca/mtl/portals4/mtl_portals4_send.c +++ b/ompi/mca/mtl/portals4/mtl_portals4_send.c @@ -28,6 +28,7 @@ #include "ompi/mca/mtl/base/mtl_base_datatype.h" #include "mtl_portals4.h" +#include "mtl_portals4_endpoint.h" #include "mtl_portals4_request.h" #if OMPI_MTL_PORTALS4_FLOW_CONTROL #include "mtl_portals4_flowctl.h" @@ -35,17 +36,40 @@ static inline int -ompi_mtl_portals4_callback(ptl_event_t *ev, +ompi_mtl_portals4_callback(ptl_event_t *ev, ompi_mtl_portals4_base_request_t* ptl_base_request, bool *complete) { int retval = OMPI_SUCCESS, ret, val, add = 1; - ompi_mtl_portals4_isend_request_t* ptl_request = + ompi_mtl_portals4_isend_request_t* ptl_request = (ompi_mtl_portals4_isend_request_t*) ptl_base_request; + if (PTL_EVENT_GET == ev->type) { + ret = OPAL_THREAD_ADD32(&(ptl_request->pending_get), -1); + if (ret > 0) { + /* wait for other gets */ + OPAL_OUTPUT_VERBOSE((90, ompi_mtl_base_framework.framework_output, "PTL_EVENT_GET received now pending_get=%d",ret)); + return retval; + } + assert(ptl_request->pending_get == 0); + + /* last get received */ + OPAL_OUTPUT_VERBOSE((90, ompi_mtl_base_framework.framework_output, "PTL_EVENT_GET: PtlMEUnlink is called ptl_request->me_h=%d (pending get=%d)", ptl_request->me_h, ret)); + + if (!PtlHandleIsEqual(ptl_request->me_h, PTL_INVALID_HANDLE)) { + ret = PtlMEUnlink(ptl_request->me_h); + if (PTL_OK != ret) { + opal_output_verbose(1, ompi_mtl_base_framework.framework_output, + "%s:%d: send callback PtlMEUnlink returned %d", + __FILE__, __LINE__, ret); + } + ptl_request->me_h = PTL_INVALID_HANDLE; + } + } + #if OMPI_MTL_PORTALS4_FLOW_CONTROL if (OPAL_UNLIKELY(ev->ni_fail_type == PTL_NI_PT_DISABLED)) { - ompi_mtl_portals4_pending_request_t *pending = + ompi_mtl_portals4_pending_request_t *pending = ptl_request->pending; OPAL_OUTPUT_VERBOSE((10, ompi_mtl_base_framework.framework_output, @@ -65,9 +89,10 @@ ompi_mtl_portals4_callback(ptl_event_t *ev, "%s:%d: send callback PtlMEUnlink returned %d", __FILE__, __LINE__, ret); } + ptl_request->me_h = PTL_INVALID_HANDLE; } - opal_list_append(&ompi_mtl_portals4.flowctl.pending_sends, + opal_list_append(&ompi_mtl_portals4.flowctl.pending_sends, &pending->super.super); OPAL_THREAD_ADD32(&ompi_mtl_portals4.flowctl.send_slots, 1); ompi_mtl_portals4_flowctl_trigger(); @@ -88,11 +113,40 @@ ompi_mtl_portals4_callback(ptl_event_t *ev, "send %lu got event of type %d", ptl_request->opcount, ev->type)); + /* First put achieved successfully (In the Priority List), so it may be necessary to decrement the number of pending get + * If the protocol is eager, just decrement pending_get + * Else (the protocol is rndv), decrement pending_get only if length % max_msg_size <= eager_limit + * (This is the case where the eager part allows to save one get) + */ if ((PTL_EVENT_ACK == ev->type) && (PTL_PRIORITY_LIST == ev->ptl_list) && - (eager == ompi_mtl_portals4.protocol) && + (0 < ptl_request->pending_get)) { + + if ((eager == ompi_mtl_portals4.protocol) || + (ptl_request->length % ompi_mtl_portals4.max_msg_size_mtl <= ompi_mtl_portals4.eager_limit)) { + val = OPAL_THREAD_ADD32(&(ptl_request->pending_get), -1); + } + if (0 == val) { + add = 2; /* We haven't to wait for any get, so we have to add an extra count to cause the message to complete */ + if (!PtlHandleIsEqual(ptl_request->me_h, PTL_INVALID_HANDLE)) { + ret = PtlMEUnlink(ptl_request->me_h); + if (PTL_OK != ret) { + opal_output_verbose(1, ompi_mtl_base_framework.framework_output, + "%s:%d: send callback PtlMEUnlink returned %d", + __FILE__, __LINE__, ret); + } + ptl_request->me_h = PTL_INVALID_HANDLE; + } + } + } + + if ((PTL_EVENT_ACK == ev->type) && + (PTL_PRIORITY_LIST == ev->ptl_list) && + (ev->mlength == ptl_request->length) && (!PtlHandleIsEqual(ptl_request->me_h, PTL_INVALID_HANDLE))) { - /* long expected messages with the eager protocol won't see a + /* long expected messages with the eager protocol + (and also with the rndv protocol if the length + is less or egal to eager_limit) won't see a get event to complete the message. Give them an extra count to cause the message to complete with just the SEND and ACK events and remove the ME. (we wait for the counter @@ -104,10 +158,10 @@ ompi_mtl_portals4_callback(ptl_event_t *ev, "%s:%d: send callback PtlMEUnlink returned %d", __FILE__, __LINE__, ret); } + ptl_request->me_h = PTL_INVALID_HANDLE; add++; } val = OPAL_THREAD_ADD32((int32_t*)&ptl_request->event_count, add); - assert(val <= 3); if (val == 3) { @@ -129,7 +183,7 @@ ompi_mtl_portals4_callback(ptl_event_t *ev, } #endif } - + return retval; } @@ -140,7 +194,7 @@ ompi_mtl_portals4_send_callback(ptl_event_t *ev, { bool complete = false; int ret; - ompi_mtl_portals4_send_request_t* ptl_request = + ompi_mtl_portals4_send_request_t* ptl_request = (ompi_mtl_portals4_send_request_t*) ptl_base_request; ret = ompi_mtl_portals4_callback(ev, ptl_base_request, &complete); @@ -160,7 +214,7 @@ ompi_mtl_portals4_isend_callback(ptl_event_t *ev, { bool complete = false; int ret; - ompi_mtl_portals4_isend_request_t* ptl_request = + ompi_mtl_portals4_isend_request_t* ptl_request = (ompi_mtl_portals4_isend_request_t*) ptl_base_request; ret = ompi_mtl_portals4_callback(ev, ptl_base_request, &complete); @@ -174,7 +228,7 @@ ompi_mtl_portals4_isend_callback(ptl_event_t *ev, static inline int -ompi_mtl_portals4_short_isend(mca_pml_base_send_mode_t mode, +ompi_mtl_portals4_short_isend(mca_pml_base_send_mode_t mode, void *start, int length, int contextid, int tag, int localrank, ptl_process_t ptl_proc, @@ -185,11 +239,12 @@ ompi_mtl_portals4_short_isend(mca_pml_base_send_mode_t mode, ptl_me_t me; ptl_hdr_data_t hdr_data; - MTL_PORTALS4_SET_SEND_BITS(match_bits, contextid, localrank, tag, + MTL_PORTALS4_SET_SEND_BITS(match_bits, contextid, localrank, tag, MTL_PORTALS4_SHORT_MSG); - MTL_PORTALS4_SET_HDR_DATA(hdr_data, ptl_request->opcount, length, + MTL_PORTALS4_SET_HDR_DATA(hdr_data, ptl_request->opcount, length, (MCA_PML_BASE_SEND_SYNCHRONOUS == mode) ? 1 : 0); + ptl_request->me_h = PTL_INVALID_HANDLE; if (MCA_PML_BASE_SEND_SYNCHRONOUS == mode) { me.start = NULL; @@ -197,9 +252,9 @@ ompi_mtl_portals4_short_isend(mca_pml_base_send_mode_t mode, me.ct_handle = PTL_CT_NONE; me.min_free = 0; me.uid = ompi_mtl_portals4.uid; - me.options = - PTL_ME_OP_PUT | - PTL_ME_USE_ONCE | + me.options = + PTL_ME_OP_PUT | + PTL_ME_USE_ONCE | PTL_ME_EVENT_LINK_DISABLE | PTL_ME_EVENT_UNLINK_DISABLE; me.match_id = ptl_proc; @@ -216,6 +271,7 @@ ompi_mtl_portals4_short_isend(mca_pml_base_send_mode_t mode, opal_output_verbose(1, ompi_mtl_base_framework.framework_output, "%s:%d: PtlMEAppend failed: %d", __FILE__, __LINE__, ret); + ptl_request->me_h = PTL_INVALID_HANDLE; return ompi_mtl_portals4_get_error(ret); } @@ -224,7 +280,6 @@ ompi_mtl_portals4_short_isend(mca_pml_base_send_mode_t mode, ptl_request->opcount, hdr_data, match_bits)); } else { ptl_request->event_count = 1; - ptl_request->me_h = PTL_INVALID_HANDLE; OPAL_OUTPUT_VERBOSE((50, ompi_mtl_base_framework.framework_output, "Send %lu short send with hdr_data 0x%lx (0x%lx)", @@ -235,6 +290,7 @@ ompi_mtl_portals4_short_isend(mca_pml_base_send_mode_t mode, "Send %lu, start: %p", ptl_request->opcount, start)); + ptl_request->pending_get = 0; ret = PtlPut(ompi_mtl_portals4.send_md_h, (ptl_size_t) start, length, @@ -251,16 +307,17 @@ ompi_mtl_portals4_short_isend(mca_pml_base_send_mode_t mode, __FILE__, __LINE__, ret); if (MCA_PML_BASE_SEND_SYNCHRONOUS == mode) { PtlMEUnlink(ptl_request->me_h); + ptl_request->me_h = PTL_INVALID_HANDLE; } return ompi_mtl_portals4_get_error(ret); } - + return OMPI_SUCCESS; } static inline int ompi_mtl_portals4_long_isend(void *start, size_t length, int contextid, int tag, - int localrank, + int localrank, ptl_process_t ptl_proc, ompi_mtl_portals4_isend_request_t *ptl_request) { @@ -270,7 +327,7 @@ ompi_mtl_portals4_long_isend(void *start, size_t length, int contextid, int tag, ptl_hdr_data_t hdr_data; ptl_size_t put_length; - MTL_PORTALS4_SET_SEND_BITS(match_bits, contextid, localrank, tag, + MTL_PORTALS4_SET_SEND_BITS(match_bits, contextid, localrank, tag, MTL_PORTALS4_LONG_MSG); MTL_PORTALS4_SET_HDR_DATA(hdr_data, ptl_request->opcount, length, 0); @@ -280,9 +337,8 @@ ompi_mtl_portals4_long_isend(void *start, size_t length, int contextid, int tag, me.ct_handle = PTL_CT_NONE; me.min_free = 0; me.uid = ompi_mtl_portals4.uid; - me.options = - PTL_ME_OP_GET | - PTL_ME_USE_ONCE | + me.options = + PTL_ME_OP_GET | PTL_ME_EVENT_LINK_DISABLE | PTL_ME_EVENT_UNLINK_DISABLE; me.match_id = ptl_proc; @@ -306,8 +362,32 @@ ompi_mtl_portals4_long_isend(void *start, size_t length, int contextid, int tag, "Send %lu long send with hdr_data 0x%lx (0x%lx)", ptl_request->opcount, hdr_data, match_bits)); - put_length = (rndv == ompi_mtl_portals4.protocol) ? - (ptl_size_t) ompi_mtl_portals4.eager_limit : (ptl_size_t) length; + if (rndv == ompi_mtl_portals4.protocol) { + ptl_size_t min = (OPAL_LIKELY (ompi_mtl_portals4.eager_limit < ompi_mtl_portals4.max_msg_size_mtl)) ? + ompi_mtl_portals4.eager_limit : + ompi_mtl_portals4.max_msg_size_mtl; + if ((ptl_size_t) length > (ptl_size_t) min) { + OPAL_OUTPUT_VERBOSE((90, ompi_mtl_base_framework.framework_output, + "msg truncated by %ld", length - min)); + put_length = (ptl_size_t) min; + } + else + put_length = (ptl_size_t) length; + } else { // eager protocol + if (length > ompi_mtl_portals4.max_msg_size_mtl) + put_length = (ptl_size_t) ompi_mtl_portals4.max_msg_size_mtl; + else + put_length = (ptl_size_t) length; + } + + /* We have to wait for some GET events. + If the first put falls in overflow list, the number of GET event is egal to: + (length - 1) / ompi_mtl_portals4.max_msg_size_mtl + 1 + else we will re-calculate this number when we received the first ACK event (with remote overflow list) + */ + + ptl_request->pending_get = (length - 1) / ompi_mtl_portals4.max_msg_size_mtl + 1; + OPAL_OUTPUT_VERBOSE((90, ompi_mtl_base_framework.framework_output, "pending_get=%d", ptl_request->pending_get)); ret = PtlPut(ompi_mtl_portals4.send_md_h, (ptl_size_t) start, @@ -323,7 +403,8 @@ ompi_mtl_portals4_long_isend(void *start, size_t length, int contextid, int tag, opal_output_verbose(1, ompi_mtl_base_framework.framework_output, "%s:%d: PtlPut failed: %d", __FILE__, __LINE__, ret); - PtlMEUnlink(ptl_request->me_h); + PtlMEUnlink(ptl_request->me_h); + ptl_request->me_h = PTL_INVALID_HANDLE; return ompi_mtl_portals4_get_error(ret); } @@ -354,7 +435,7 @@ ompi_mtl_portals4_pending_list_progress() } pending = (ompi_mtl_portals4_pending_request_t*) item; - if (pending->length <= ompi_mtl_portals4.eager_limit) { + if (pending->length <= ompi_mtl_portals4.short_limit) { ret = ompi_mtl_portals4_short_isend(pending->mode, pending->start, pending->length, @@ -405,7 +486,7 @@ ompi_mtl_portals4_send_start(struct mca_mtl_base_module_t* mtl, ptl_proc.rank = dest; } else { ompi_proc_t *ompi_proc = ompi_comm_peer_lookup(comm, dest); - ptl_proc = *((ptl_process_t*) ompi_proc->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_PORTALS4]); + ptl_proc = *((ptl_process_t*) ompi_mtl_portals4_get_endpoint (mtl, ompi_proc)); } ret = ompi_mtl_datatype_pack(convertor, &start, &length, &free_after); @@ -413,6 +494,7 @@ ompi_mtl_portals4_send_start(struct mca_mtl_base_module_t* mtl, ptl_request->opcount = OPAL_THREAD_ADD64((int64_t*)&ompi_mtl_portals4.opcount, 1); ptl_request->buffer_ptr = (free_after) ? start : NULL; + ptl_request->length = length; ptl_request->event_count = 0; OPAL_OUTPUT_VERBOSE((50, ompi_mtl_base_framework.framework_output, @@ -460,7 +542,7 @@ ompi_mtl_portals4_send_start(struct mca_mtl_base_module_t* mtl, return OMPI_SUCCESS; } #endif - if (length <= ompi_mtl_portals4.eager_limit) { + if (length <= ompi_mtl_portals4.short_limit) { ret = ompi_mtl_portals4_short_isend(mode, start, length, @@ -478,7 +560,7 @@ ompi_mtl_portals4_send_start(struct mca_mtl_base_module_t* mtl, ptl_proc, ptl_request); } - + return ret; } diff --git a/ompi/mca/mtl/psm/Makefile.am b/ompi/mca/mtl/psm/Makefile.am index 8f7690c04be..816309f753b 100644 --- a/ompi/mca/mtl/psm/Makefile.am +++ b/ompi/mca/mtl/psm/Makefile.am @@ -5,15 +5,15 @@ # Copyright (c) 2004-2005 The University of Tennessee and The University # of Tennessee Research Foundation. All rights # reserved. -# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, # University of Stuttgart. All rights reserved. # Copyright (c) 2004-2006 The Regents of the University of California. # All rights reserved. # Copyright (c) 2010 Cisco Systems, Inc. All rights reserved. # $COPYRIGHT$ -# +# # Additional copyrights may follow -# +# # $HEADER$ # diff --git a/ompi/mca/mtl/psm/help-mtl-psm.txt b/ompi/mca/mtl/psm/help-mtl-psm.txt index 7ca60a5396f..9572b48ca47 100644 --- a/ompi/mca/mtl/psm/help-mtl-psm.txt +++ b/ompi/mca/mtl/psm/help-mtl-psm.txt @@ -19,7 +19,7 @@ Unable to set PSM debug level. # [unable to open endpoint] PSM was unable to open an endpoint. Please make sure that the network link is -active on the node and the hardware is functioning. +active on the node and the hardware is functioning. Error: %s # diff --git a/ompi/mca/mtl/psm/mtl_psm.c b/ompi/mca/mtl/psm/mtl_psm.c index 004cbe1ea32..856589523a5 100644 --- a/ompi/mca/mtl/psm/mtl_psm.c +++ b/ompi/mca/mtl/psm/mtl_psm.c @@ -39,14 +39,14 @@ mca_mtl_psm_module_t ompi_mtl_psm = { /* NTH: PSM supports 16 bit context ids */ .mtl_max_contextid = (1UL << 16) - 1, .mtl_max_tag = (1UL << 30), /* must allow negatives */ - + .mtl_add_procs = ompi_mtl_psm_add_procs, .mtl_del_procs = ompi_mtl_psm_del_procs, .mtl_finalize = ompi_mtl_psm_finalize, - + .mtl_send = ompi_mtl_psm_send, .mtl_isend = ompi_mtl_psm_isend, - + .mtl_irecv = ompi_mtl_psm_irecv, .mtl_iprobe = ompi_mtl_psm_iprobe, .mtl_imrecv = ompi_mtl_psm_imrecv, @@ -55,12 +55,12 @@ mca_mtl_psm_module_t ompi_mtl_psm = { .mtl_cancel = ompi_mtl_psm_cancel, .mtl_add_comm = ompi_mtl_psm_add_comm, .mtl_del_comm = ompi_mtl_psm_del_comm - } + } }; static psm_error_t -ompi_mtl_psm_errhandler(psm_ep_t ep, const psm_error_t error, +ompi_mtl_psm_errhandler(psm_ep_t ep, const psm_error_t error, const char *error_string, psm_error_token_t token) { switch (error) { @@ -87,7 +87,7 @@ ompi_mtl_psm_errhandler(psm_ep_t ep, const psm_error_t error, int ompi_mtl_psm_progress( void ); -int ompi_mtl_psm_module_init(int local_rank, int num_local_procs) { +int ompi_mtl_psm_module_init(int local_rank, int num_local_procs) { psm_error_t err; psm_ep_t ep; /* endpoint handle */ psm_mq_t mq; @@ -101,7 +101,7 @@ int ompi_mtl_psm_module_init(int local_rank, int num_local_procs) { generated_key = getenv("OMPI_MCA_orte_precondition_transports"); memset(uu, 0, sizeof(psm_uuid_t)); - + if (!generated_key || (strlen(generated_key) != 33) || sscanf(generated_key, "%016llx-%016llx", &uu[0], &uu[1]) != 2) { @@ -110,7 +110,7 @@ int ompi_mtl_psm_module_init(int local_rank, int num_local_procs) { generated_key ? "could not be parsed from" : "not present in", ompi_process_info.nodename); return OMPI_ERROR; - + } /* Handle our own errors for opening endpoints */ @@ -123,7 +123,7 @@ int ompi_mtl_psm_module_init(int local_rank, int num_local_procs) { setenv("MPI_LOCALRANKID", env_string, 0); snprintf(env_string, sizeof(env_string), "%d", num_local_procs); setenv("MPI_LOCALNRANKS", env_string, 0); - + /* Setup the endpoint options. */ bzero((void*) &ep_opt, sizeof(ep_opt)); ep_opt.timeout = ompi_mtl_psm.connect_timeout * 1e9; @@ -132,10 +132,10 @@ int ompi_mtl_psm_module_init(int local_rank, int num_local_procs) { ep_opt.shm_mbytes = -1; /* Choose PSM defaults */ ep_opt.sendbufs_num = -1; /* Choose PSM defaults */ -#if PSM_VERNO >= 0x0101 +#if PSM_VERNO >= 0x0101 ep_opt.network_pkey = ompi_mtl_psm.ib_pkey; #endif - + #if PSM_VERNO >= 0x0107 ep_opt.port = ompi_mtl_psm.ib_port; ep_opt.outsl = ompi_mtl_psm.ib_service_level; @@ -157,9 +157,9 @@ int ompi_mtl_psm_module_init(int local_rank, int num_local_procs) { /* Future errors are handled by the default error handler */ psm_error_register_handler(ompi_mtl_psm.ep, PSM_ERRHANDLER_DEFAULT); - - err = psm_mq_init(ep, - 0xffff000000000000ULL, + + err = psm_mq_init(ep, + 0xffff000000000000ULL, NULL, 0, &mq); @@ -174,24 +174,24 @@ int ompi_mtl_psm_module_init(int local_rank, int num_local_procs) { ompi_mtl_psm.epid = epid; ompi_mtl_psm.mq = mq; - OPAL_MODEX_SEND(rc, PMIX_SYNC_REQD, PMIX_GLOBAL, - &mca_mtl_psm_component.super.mtl_version, - &ompi_mtl_psm.epid, + OPAL_MODEX_SEND(rc, OPAL_PMIX_GLOBAL, + &mca_mtl_psm_component.super.mtl_version, + &ompi_mtl_psm.epid, sizeof(psm_epid_t)); if (OMPI_SUCCESS != rc) { - opal_output(0, "Open MPI couldn't send PSM epid to head node process"); + opal_output(0, "Open MPI couldn't send PSM epid to head node process"); return OMPI_ERROR; } /* register the psm progress function */ opal_progress_register(ompi_mtl_psm_progress); - + return OMPI_SUCCESS; } int -ompi_mtl_psm_finalize(struct mca_mtl_base_module_t* mtl) { +ompi_mtl_psm_finalize(struct mca_mtl_base_module_t* mtl) { psm_error_t err; opal_progress_unregister(ompi_mtl_psm_progress); @@ -199,21 +199,21 @@ ompi_mtl_psm_finalize(struct mca_mtl_base_module_t* mtl) { /* free resources */ err = psm_mq_finalize(ompi_mtl_psm.mq); if (err) { - opal_output(0, "Error in psm_mq_finalize (error %s)\n", + opal_output(0, "Error in psm_mq_finalize (error %s)\n", psm_error_get_string(err)); return OMPI_ERROR; } err = psm_ep_close(ompi_mtl_psm.ep, PSM_EP_CLOSE_GRACEFUL, 1*1e9); if (err) { - opal_output(0, "Error in psm_ep_close (error %s)\n", + opal_output(0, "Error in psm_ep_close (error %s)\n", psm_error_get_string(err)); return OMPI_ERROR; } err = psm_finalize(); if (err) { - opal_output(0, "Error in psm_finalize (error %s)\n", + opal_output(0, "Error in psm_finalize (error %s)\n", psm_error_get_string(err)); return OMPI_ERROR; } @@ -255,7 +255,7 @@ ompi_mtl_psm_add_procs(struct mca_mtl_base_module_t *mtl, size_t nprocs, struct ompi_proc_t** procs) { - int i,j; + int i,j; int rc; psm_epid_t *epids_in = NULL; int *mask_in = NULL; @@ -265,7 +265,7 @@ ompi_mtl_psm_add_procs(struct mca_mtl_base_module_t *mtl, size_t size; int proc_errors[PSM_ERROR_LAST] = { 0 }; int timeout_in_secs; - + assert(mtl == &ompi_mtl_psm.super); rc = OMPI_ERR_OUT_OF_RESOURCE; @@ -295,8 +295,8 @@ ompi_mtl_psm_add_procs(struct mca_mtl_base_module_t *mtl, continue; } - OPAL_MODEX_RECV(rc, &mca_mtl_psm_component.super.mtl_version, - &procs[i]->super, (void**)&epid, &size); + OPAL_MODEX_RECV(rc, &mca_mtl_psm_component.super.mtl_version, + &procs[i]->super.proc_name, (void**)&epid, &size); if (rc != OMPI_SUCCESS || size != sizeof(psm_epid_t)) { rc = OMPI_ERROR; goto bail; @@ -331,7 +331,7 @@ ompi_mtl_psm_add_procs(struct mca_mtl_base_module_t *mtl, errstr = (char *) ompi_mtl_psm_connect_error_msg(thiserr); if (proc_errors[thiserr] == 0) { proc_errors[thiserr] = 1; - opal_output(0, "PSM EP connect error (%s):", + opal_output(0, "PSM EP connect error (%s):", errstr ? errstr : "unknown connect error"); for (j = 0; j < (int) nprocs; j++) { if (errs_out[j] == thiserr) { @@ -350,14 +350,14 @@ ompi_mtl_psm_add_procs(struct mca_mtl_base_module_t *mtl, * user. PSM prints the error and the offending endpoint's hostname * and exits with -1 */ psm_error_register_handler(ompi_mtl_psm.ep, PSM_ERRHANDLER_DEFAULT); - + /* Fill in endpoint data */ - for (i = 0; i < (int) nprocs; i++) { + for (i = 0; i < (int) nprocs; i++) { if (0 == mask_in[i]) { continue; } - mca_mtl_psm_endpoint_t *endpoint = + mca_mtl_psm_endpoint_t *endpoint = (mca_mtl_psm_endpoint_t *) OBJ_NEW(mca_mtl_psm_endpoint_t); endpoint->peer_epid = epids_in[i]; endpoint->peer_addr = epaddrs_out[i]; @@ -366,7 +366,7 @@ ompi_mtl_psm_add_procs(struct mca_mtl_base_module_t *mtl, rc = OMPI_SUCCESS; } - + bail: if (epids_in != NULL) { free(epids_in); @@ -409,7 +409,7 @@ ompi_mtl_psm_del_comm(struct mca_mtl_base_module_t *mtl, } -int ompi_mtl_psm_progress( void ) { +int ompi_mtl_psm_progress( void ) { psm_error_t err; mca_mtl_psm_request_t* mtl_psm_request; psm_mq_status_t psm_status; @@ -423,7 +423,7 @@ int ompi_mtl_psm_progress( void ) { } else if (err != PSM_OK) { goto error; } - + completed++; err = psm_mq_test(&req, &psm_status); @@ -434,7 +434,7 @@ int ompi_mtl_psm_progress( void ) { mtl_psm_request = (mca_mtl_psm_request_t*) psm_status.context; if (mtl_psm_request->type == OMPI_MTL_PSM_IRECV) { - ompi_mtl_datatype_unpack(mtl_psm_request->convertor, + ompi_mtl_datatype_unpack(mtl_psm_request->convertor, mtl_psm_request->buf, psm_status.msg_length); @@ -442,11 +442,11 @@ int ompi_mtl_psm_progress( void ) { PSM_GET_MQRANK(psm_status.msg_tag); mtl_psm_request->super.ompi_req->req_status.MPI_TAG = PSM_GET_MQUTAG(psm_status.msg_tag); - mtl_psm_request->super.ompi_req->req_status._ucount = + mtl_psm_request->super.ompi_req->req_status._ucount = psm_status.nbytes; } - - if(mtl_psm_request->type == OMPI_MTL_PSM_ISEND) { + + if(mtl_psm_request->type == OMPI_MTL_PSM_ISEND) { if (mtl_psm_request->free_after) { free(mtl_psm_request->buf); } @@ -454,15 +454,15 @@ int ompi_mtl_psm_progress( void ) { switch (psm_status.error_code) { case PSM_OK: - mtl_psm_request->super.ompi_req->req_status.MPI_ERROR = + mtl_psm_request->super.ompi_req->req_status.MPI_ERROR = OMPI_SUCCESS; break; case PSM_MQ_TRUNCATION: - mtl_psm_request->super.ompi_req->req_status.MPI_ERROR = + mtl_psm_request->super.ompi_req->req_status.MPI_ERROR = MPI_ERR_TRUNCATE; break; default: - mtl_psm_request->super.ompi_req->req_status.MPI_ERROR = + mtl_psm_request->super.ompi_req->req_status.MPI_ERROR = MPI_ERR_INTERN; } @@ -471,7 +471,7 @@ int ompi_mtl_psm_progress( void ) { } while (1); - error: + error: opal_show_help("help-mtl-psm.txt", "error polling network", true, psm_error_get_string(err)); diff --git a/ompi/mca/mtl/psm/mtl_psm.h b/ompi/mca/mtl/psm/mtl_psm.h index edc34d41ba4..52a590b3d35 100644 --- a/ompi/mca/mtl/psm/mtl_psm.h +++ b/ompi/mca/mtl/psm/mtl_psm.h @@ -1,3 +1,4 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ /* * Copyright (c) 2004-2006 The Trustees of Indiana University and Indiana * University Research and Technology @@ -10,6 +11,8 @@ * Copyright (c) 2004-2006 The Regents of the University of California. * All rights reserved. * Copyright (c) 2006 QLogic Corporation. All rights reserved. + * Copyright (c) 2015 Los Alamos National Security, LLC. All rights + * reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -23,6 +26,7 @@ #include "ompi/mca/pml/pml.h" #include "ompi/mca/mtl/mtl.h" #include "ompi/mca/mtl/base/base.h" +#include "ompi/proc/proc.h" #include "opal/datatype/opal_convertor.h" #include #include @@ -31,23 +35,23 @@ BEGIN_C_DECLS /* MTL interface functions */ -extern int ompi_mtl_psm_add_procs(struct mca_mtl_base_module_t* mtl, +extern int ompi_mtl_psm_add_procs(struct mca_mtl_base_module_t* mtl, size_t nprocs, struct ompi_proc_t** procs); - -extern int ompi_mtl_psm_del_procs(struct mca_mtl_base_module_t* mtl, + +extern int ompi_mtl_psm_del_procs(struct mca_mtl_base_module_t* mtl, size_t nprocs, struct ompi_proc_t** procs); int -ompi_mtl_psm_send(struct mca_mtl_base_module_t* mtl, +ompi_mtl_psm_send(struct mca_mtl_base_module_t* mtl, struct ompi_communicator_t* comm, int dest, int tag, struct opal_convertor_t *convertor, mca_pml_base_send_mode_t mode); -extern int ompi_mtl_psm_isend(struct mca_mtl_base_module_t* mtl, +extern int ompi_mtl_psm_isend(struct mca_mtl_base_module_t* mtl, struct ompi_communicator_t* comm, int dest, int tag, @@ -62,9 +66,9 @@ extern int ompi_mtl_psm_irecv(struct mca_mtl_base_module_t* mtl, int tag, struct opal_convertor_t *convertor, struct mca_mtl_request_t *mtl_request); - - -extern int ompi_mtl_psm_iprobe(struct mca_mtl_base_module_t* mtl, + + +extern int ompi_mtl_psm_iprobe(struct mca_mtl_base_module_t* mtl, struct ompi_communicator_t *comm, int src, int tag, @@ -85,7 +89,7 @@ extern int ompi_mtl_psm_improbe(struct mca_mtl_base_module_t *mtl, struct ompi_status_public_t *status); extern int ompi_mtl_psm_cancel(struct mca_mtl_base_module_t* mtl, - struct mca_mtl_request_t *mtl_request, + struct mca_mtl_request_t *mtl_request, int flag); extern int ompi_mtl_psm_add_comm(struct mca_mtl_base_module_t *mtl, @@ -93,13 +97,13 @@ extern int ompi_mtl_psm_add_comm(struct mca_mtl_base_module_t *mtl, extern int ompi_mtl_psm_del_comm(struct mca_mtl_base_module_t *mtl, struct ompi_communicator_t *comm); - + extern int ompi_mtl_psm_finalize(struct mca_mtl_base_module_t* mtl); int ompi_mtl_psm_module_init(int local_rank, int num_local_procs); - - + + END_C_DECLS #endif /* MTL_PSM_H_HAS_BEEN_INCLUDED */ diff --git a/ompi/mca/mtl/psm/mtl_psm_cancel.c b/ompi/mca/mtl/psm/mtl_psm_cancel.c index 8c22673d6ce..c797598c04d 100644 --- a/ompi/mca/mtl/psm/mtl_psm_cancel.c +++ b/ompi/mca/mtl/psm/mtl_psm_cancel.c @@ -21,34 +21,34 @@ #include "mtl_psm.h" #include "mtl_psm_request.h" -int ompi_mtl_psm_cancel(struct mca_mtl_base_module_t* mtl, - struct mca_mtl_request_t *mtl_request, +int ompi_mtl_psm_cancel(struct mca_mtl_base_module_t* mtl, + struct mca_mtl_request_t *mtl_request, int flag) { psm_error_t err; psm_mq_status_t status; - - mca_mtl_psm_request_t *mtl_psm_request = - (mca_mtl_psm_request_t*) mtl_request; - + + mca_mtl_psm_request_t *mtl_psm_request = + (mca_mtl_psm_request_t*) mtl_request; + /* PSM does not support canceling sends */ if(OMPI_MTL_PSM_ISEND == mtl_psm_request->type) { return OMPI_SUCCESS; } - err = psm_mq_cancel(&mtl_psm_request->psm_request); - if(PSM_OK == err) { + err = psm_mq_cancel(&mtl_psm_request->psm_request); + if(PSM_OK == err) { err = psm_mq_test(&mtl_psm_request->psm_request, &status); - if(PSM_OK == err) { + if(PSM_OK == err) { mtl_request->ompi_req->req_status._cancelled = true; mtl_psm_request->super.completion_callback(&mtl_psm_request->super); return OMPI_SUCCESS; - } else { + } else { return OMPI_ERROR; } - } else if(PSM_MQ_INCOMPLETE == err) { - return OMPI_SUCCESS; - } - + } else if(PSM_MQ_INCOMPLETE == err) { + return OMPI_SUCCESS; + } + return OMPI_ERROR; } diff --git a/ompi/mca/mtl/psm/mtl_psm_component.c b/ompi/mca/mtl/psm/mtl_psm_component.c index b55d56626b9..ef72bc6e344 100644 --- a/ompi/mca/mtl/psm/mtl_psm_component.c +++ b/ompi/mca/mtl/psm/mtl_psm_component.c @@ -13,7 +13,7 @@ * Copyright (c) 2006-2010 QLogic Corporation. All rights reserved. * Copyright (c) 2012-2015 Los Alamos National Security, LLC. * All rights reserved. - * Copyright (c) 2014 Intel Corporation. All rights reserved. + * Copyright (c) 2016 Intel Corporation. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -37,6 +37,7 @@ #include #include #include +#include static int param_priority; @@ -45,7 +46,7 @@ static int ompi_mtl_psm_component_close(void); static int ompi_mtl_psm_component_query(mca_base_module_t **module, int *priority); static int ompi_mtl_psm_component_register(void); -static mca_mtl_base_module_t* ompi_mtl_psm_component_init( bool enable_progress_threads, +static mca_mtl_base_module_t* ompi_mtl_psm_component_init( bool enable_progress_threads, bool enable_mpi_threads ); mca_mtl_psm_component_t mca_mtl_psm_component = { @@ -53,7 +54,7 @@ mca_mtl_psm_component_t mca_mtl_psm_component = { { /* First, the mca_base_component_t struct containing meta * information about the component itself */ - + .mtl_version = { MCA_MTL_BASE_VERSION_2_0_0, @@ -81,16 +82,17 @@ static mca_base_var_enum_value_t path_query_values[] = { {0, NULL} }; #endif - + static int ompi_mtl_psm_component_register(void) { #if PSM_VERNO >= 0x010d mca_base_var_enum_t *new_enum; #endif - - param_priority = 100; + + /* set priority high enough to beat ob1's default */ + param_priority = 30; (void) mca_base_component_var_register (&mca_mtl_psm_component.super.mtl_version, "priority", "Priority of the PSM MTL component", MCA_BASE_VAR_TYPE_INT, NULL, 0, 0, @@ -138,7 +140,7 @@ ompi_mtl_psm_component_register(void) NULL, 0, 0, OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_READONLY, &ompi_mtl_psm.ib_service_level); - + ompi_mtl_psm.ib_pkey = 0x7fffUL; (void) mca_base_component_var_register(&mca_mtl_psm_component.super.mtl_version, "ib_pkey", "Infiniband partition key", @@ -146,7 +148,7 @@ ompi_mtl_psm_component_register(void) OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_READONLY, &ompi_mtl_psm.ib_pkey); - + #if PSM_VERNO >= 0x010d ompi_mtl_psm.ib_service_id = 0x1000117500000000ull; (void) mca_base_component_var_register(&mca_mtl_psm_component.super.mtl_version, @@ -176,7 +178,7 @@ static int ompi_mtl_psm_component_open(void) { struct stat st; - + if (ompi_mtl_psm.ib_service_level < 0) { ompi_mtl_psm.ib_service_level = 0; } else if (ompi_mtl_psm.ib_service_level > 15) { @@ -184,12 +186,41 @@ ompi_mtl_psm_component_open(void) } /* Component available only if Truescale hardware is present */ - if (0 == stat("/dev/ipath", &st)) { - return OMPI_SUCCESS; + if (0 != stat("/dev/ipath", &st)) { + return OPAL_ERR_NOT_AVAILABLE; } - else { + + /* Component available only if at least one qib port is ACTIVE */ + bool foundOnlineQibPort = false; + size_t i; + char portState[128]; + FILE *devFile; + glob_t globbuf; + globbuf.gl_offs = 0; + if (glob("/sys/class/infiniband/qib*/ports/*/state", + GLOB_DOOFFS, NULL, &globbuf) != 0) { return OPAL_ERR_NOT_AVAILABLE; } + + for (i=0;i < globbuf.gl_pathc; i++) { + devFile = fopen(globbuf.gl_pathv[i], "r"); + fgets(portState, sizeof(portState), devFile); + fclose(devFile); + + if (strstr(portState, "ACTIVE") != NULL) { + /* Found at least one ACTIVE port */ + foundOnlineQibPort = true; + break; + } + } + + globfree(&globbuf); + + if (!foundOnlineQibPort) { + return OPAL_ERR_NOT_AVAILABLE; + } + + return OMPI_SUCCESS; } static int @@ -253,7 +284,7 @@ ompi_mtl_psm_component_init(bool enable_progress_threads, int num_total_procs = 0; /* Compute the total number of processes on this host and our local rank - * on that node. We need to provide PSM with these values so it can + * on that node. We need to provide PSM with these values so it can * allocate hardware contexts appropriately across processes. */ if (OMPI_SUCCESS != get_num_local_procs(&num_local_procs)) { @@ -271,11 +302,11 @@ ompi_mtl_psm_component_init(bool enable_progress_threads, return NULL; } - + #if PSM_VERNO >= 0x010c /* Set infinipath debug level */ - err = psm_setopt(PSM_COMPONENT_CORE, 0, PSM_CORE_OPT_DEBUG, - (const void*) &ompi_mtl_psm.debug_level, + err = psm_setopt(PSM_COMPONENT_CORE, 0, PSM_CORE_OPT_DEBUG, + (const void*) &ompi_mtl_psm.debug_level, sizeof(unsigned)); if (err) { /* Non fatal error. Can continue */ @@ -284,9 +315,9 @@ ompi_mtl_psm_component_init(bool enable_progress_threads, psm_error_get_string(err)); } #endif - + if (getenv("PSM_DEVICES") == NULL) { - /* Only allow for shm and ipath devices in 2.0 and earlier releases + /* Only allow for shm and ipath devices in 2.0 and earlier releases * (unless the user overrides the setting). */ if (PSM_VERNO >= 0x0104) { @@ -304,7 +335,7 @@ ompi_mtl_psm_component_init(bool enable_progress_threads, } } } - + err = psm_init(&verno_major, &verno_minor); if (err) { opal_show_help("help-mtl-psm.txt", @@ -312,23 +343,23 @@ ompi_mtl_psm_component_init(bool enable_progress_threads, psm_error_get_string(err)); return NULL; } - + /* Complete PSM initialization */ ompi_mtl_psm_module_init(local_rank, num_local_procs); - ompi_mtl_psm.super.mtl_request_size = - sizeof(mca_mtl_psm_request_t) - + ompi_mtl_psm.super.mtl_request_size = + sizeof(mca_mtl_psm_request_t) - sizeof(struct mca_mtl_request_t); /* don't register the err handler until we know we will be active */ err = psm_error_register_handler(NULL /* no ep */, PSM_ERRHANDLER_NOP); if (err) { - opal_output(0, "Error in psm_error_register_handler (error %s)\n", + opal_output(0, "Error in psm_error_register_handler (error %s)\n", psm_error_get_string(err)); return NULL; } - + return &ompi_mtl_psm.super; } diff --git a/ompi/mca/mtl/psm/mtl_psm_endpoint.c b/ompi/mca/mtl/psm/mtl_psm_endpoint.c index a58c90d68d3..992777a7252 100644 --- a/ompi/mca/mtl/psm/mtl_psm_endpoint.c +++ b/ompi/mca/mtl/psm/mtl_psm_endpoint.c @@ -5,15 +5,15 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2006 The Regents of the University of California. * All rights reserved. * Copyright (c) 2006 QLogic Corporation. All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -24,7 +24,7 @@ #include "ompi/types.h" #include "mtl_psm.h" #include "mtl_psm_types.h" -#include "mtl_psm_endpoint.h" +#include "mtl_psm_endpoint.h" /* * Initialize state of the endpoint instance. @@ -47,8 +47,8 @@ static void mca_mtl_psm_endpoint_destruct(mca_mtl_psm_endpoint_t* endpoint) OBJ_CLASS_INSTANCE( - mca_mtl_psm_endpoint_t, - opal_list_item_t, - mca_mtl_psm_endpoint_construct, + mca_mtl_psm_endpoint_t, + opal_list_item_t, + mca_mtl_psm_endpoint_construct, mca_mtl_psm_endpoint_destruct); diff --git a/ompi/mca/mtl/psm/mtl_psm_endpoint.h b/ompi/mca/mtl/psm/mtl_psm_endpoint.h index 0270493b89e..b08e9fdbc48 100644 --- a/ompi/mca/mtl/psm/mtl_psm_endpoint.h +++ b/ompi/mca/mtl/psm/mtl_psm_endpoint.h @@ -1,3 +1,4 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ /* * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana * University Research and Technology @@ -5,15 +6,17 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2006 The Regents of the University of California. * All rights reserved. * Copyright (c) 2006 QLogic Corporation. All rights reserved. + * Copyright (c) 2015 Los Alamos National Security, LLC. All rights + * reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -40,10 +43,10 @@ OBJ_CLASS_DECLARATION(mca_mtl_psm_endpoint_t); struct mca_mtl_psm_endpoint_t { opal_list_item_t super; - + struct mca_mtl_psm_module_t* mtl_psm_module; /**< MTL instance that created this connection */ - + psm_epid_t peer_epid; /**< The unique epid for the opened port */ @@ -54,5 +57,14 @@ struct mca_mtl_psm_endpoint_t { typedef struct mca_mtl_psm_endpoint_t mca_mtl_psm_endpoint_t; OBJ_CLASS_DECLARATION(mca_mtl_psm_endpoint); +static inline mca_mtl_psm_endpoint_t *ompi_mtl_psm_get_endpoint (struct mca_mtl_base_module_t* mtl, ompi_proc_t *ompi_proc) +{ + if (OPAL_UNLIKELY(NULL == ompi_proc->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_MTL])) { + ompi_mtl_psm_add_procs (mtl, 1, &ompi_proc); + } + + return ompi_proc->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_MTL]; +} + END_C_DECLS #endif diff --git a/ompi/mca/mtl/psm/mtl_psm_probe.c b/ompi/mca/mtl/psm/mtl_psm_probe.c index eaaf7d000be..c0728c207c5 100644 --- a/ompi/mca/mtl/psm/mtl_psm_probe.c +++ b/ompi/mca/mtl/psm/mtl_psm_probe.c @@ -11,7 +11,7 @@ * All rights reserved. * Copyright (c) 2006 QLogic Corporation. All rights reserved. * Copyright (c) 2006-2007 Los Alamos National Security, LLC. All rights - * reserved. + * reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -26,7 +26,7 @@ #include "ompi/communicator/communicator.h" -int ompi_mtl_psm_iprobe(struct mca_mtl_base_module_t* mtl, +int ompi_mtl_psm_iprobe(struct mca_mtl_base_module_t* mtl, struct ompi_communicator_t *comm, int src, int tag, @@ -42,7 +42,7 @@ int ompi_mtl_psm_iprobe(struct mca_mtl_base_module_t* mtl, err = psm_mq_iprobe(ompi_mtl_psm.mq, mqtag, tagsel, &mqstat); if (err == PSM_OK) { *flag = 1; - if(MPI_STATUS_IGNORE != status) { + if(MPI_STATUS_IGNORE != status) { status->MPI_SOURCE = PSM_GET_MQRANK(mqstat.msg_tag); status->MPI_TAG = PSM_GET_MQUTAG(mqstat.msg_tag); status->_ucount = mqstat.nbytes; @@ -58,7 +58,7 @@ int ompi_mtl_psm_iprobe(struct mca_mtl_base_module_t* mtl, status->MPI_ERROR = MPI_ERR_INTERN; } } - + return OMPI_SUCCESS; } else if (err == PSM_MQ_INCOMPLETE) { diff --git a/ompi/mca/mtl/psm/mtl_psm_recv.c b/ompi/mca/mtl/psm/mtl_psm_recv.c index 3a4c830b46a..b345ae19aa9 100644 --- a/ompi/mca/mtl/psm/mtl_psm_recv.c +++ b/ompi/mca/mtl/psm/mtl_psm_recv.c @@ -36,18 +36,18 @@ ompi_mtl_psm_irecv(struct mca_mtl_base_module_t* mtl, struct opal_convertor_t *convertor, struct mca_mtl_request_t *mtl_request) { - int ret; + int ret; psm_error_t err; mca_mtl_psm_request_t * mtl_psm_request = (mca_mtl_psm_request_t*) mtl_request; uint64_t mqtag; uint64_t tagsel; size_t length; - + ret = ompi_mtl_datatype_recv_buf(convertor, &mtl_psm_request->buf, - &length, + &length, &mtl_psm_request->free_after); - + if (OMPI_SUCCESS != ret) return ret; mtl_psm_request->length = length; @@ -59,7 +59,7 @@ ompi_mtl_psm_irecv(struct mca_mtl_base_module_t* mtl, #if 0 printf("recv bits: 0x%016llx 0x%016llx\n", mqtag, tagsel); #endif - err = psm_mq_irecv(ompi_mtl_psm.mq, + err = psm_mq_irecv(ompi_mtl_psm.mq, mqtag, tagsel, 0, @@ -67,7 +67,7 @@ ompi_mtl_psm_irecv(struct mca_mtl_base_module_t* mtl, length, mtl_psm_request, &mtl_psm_request->psm_request); - + if (err) { opal_show_help("help-mtl-psm.txt", "error posting receive", true, diff --git a/ompi/mca/mtl/psm/mtl_psm_request.h b/ompi/mca/mtl/psm/mtl_psm_request.h index 98662e64631..2188c9096f9 100644 --- a/ompi/mca/mtl/psm/mtl_psm_request.h +++ b/ompi/mca/mtl/psm/mtl_psm_request.h @@ -5,15 +5,15 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2006 QLogic Corporation. All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -28,7 +28,7 @@ typedef enum { OMPI_MTL_PSM_IRECV } mca_mtl_psm_request_type_t; -struct mca_mtl_psm_request_t { +struct mca_mtl_psm_request_t { struct mca_mtl_request_t super; mca_mtl_psm_request_type_t type; psm_mq_req_t psm_request; @@ -37,7 +37,7 @@ struct mca_mtl_psm_request_t { size_t length; struct opal_convertor_t *convertor; bool free_after; -}; +}; typedef struct mca_mtl_psm_request_t mca_mtl_psm_request_t; #endif diff --git a/ompi/mca/mtl/psm/mtl_psm_send.c b/ompi/mca/mtl/psm/mtl_psm_send.c index 4f53da54938..c30801b1fbd 100644 --- a/ompi/mca/mtl/psm/mtl_psm_send.c +++ b/ompi/mca/mtl/psm/mtl_psm_send.c @@ -1,3 +1,4 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ /* * Copyright (c) 2004-2006 The Trustees of Indiana University and Indiana * University Research and Technology @@ -10,6 +11,8 @@ * Copyright (c) 2004-2006 The Regents of the University of California. * All rights reserved. * Copyright (c) 2006 QLogic Corporation. All rights reserved. + * Copyright (c) 2015 Los Alamos National Security, LLC. All rights + * reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -28,7 +31,7 @@ #include "ompi/mca/mtl/base/mtl_base_datatype.h" int -ompi_mtl_psm_send(struct mca_mtl_base_module_t* mtl, +ompi_mtl_psm_send(struct mca_mtl_base_module_t* mtl, struct ompi_communicator_t* comm, int dest, int tag, @@ -42,18 +45,18 @@ ompi_mtl_psm_send(struct mca_mtl_base_module_t* mtl, int ret; size_t length; ompi_proc_t* ompi_proc = ompi_comm_peer_lookup( comm, dest ); - mca_mtl_psm_endpoint_t* psm_endpoint = (mca_mtl_psm_endpoint_t*) ompi_proc->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_MTL]; + mca_mtl_psm_endpoint_t* psm_endpoint = ompi_mtl_psm_get_endpoint (mtl, ompi_proc); assert(mtl == &ompi_mtl_psm.super); mqtag = PSM_MAKE_MQTAG(comm->c_contextid, comm->c_my_rank, tag); - - ret = ompi_mtl_datatype_pack(convertor, + + ret = ompi_mtl_datatype_pack(convertor, &mtl_psm_request.buf, - &length, + &length, &mtl_psm_request.free_after); - + mtl_psm_request.length = length; mtl_psm_request.convertor = convertor; mtl_psm_request.type = OMPI_MTL_PSM_ISEND; @@ -78,7 +81,7 @@ ompi_mtl_psm_send(struct mca_mtl_base_module_t* mtl, } int -ompi_mtl_psm_isend(struct mca_mtl_base_module_t* mtl, +ompi_mtl_psm_isend(struct mca_mtl_base_module_t* mtl, struct ompi_communicator_t* comm, int dest, int tag, @@ -94,16 +97,16 @@ ompi_mtl_psm_isend(struct mca_mtl_base_module_t* mtl, mca_mtl_psm_request_t * mtl_psm_request = (mca_mtl_psm_request_t*) mtl_request; size_t length; ompi_proc_t* ompi_proc = ompi_comm_peer_lookup( comm, dest ); - mca_mtl_psm_endpoint_t* psm_endpoint = (mca_mtl_psm_endpoint_t*)ompi_proc->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_MTL]; + mca_mtl_psm_endpoint_t* psm_endpoint = ompi_mtl_psm_get_endpoint (mtl, ompi_proc); assert(mtl == &ompi_mtl_psm.super); mqtag = PSM_MAKE_MQTAG(comm->c_contextid, comm->c_my_rank, tag); - - ret = ompi_mtl_datatype_pack(convertor, + + ret = ompi_mtl_datatype_pack(convertor, &mtl_psm_request->buf, - &length, + &length, &mtl_psm_request->free_after); mtl_psm_request->length= length; @@ -114,7 +117,7 @@ ompi_mtl_psm_isend(struct mca_mtl_base_module_t* mtl, if (mode == MCA_PML_BASE_SEND_SYNCHRONOUS) flags |= PSM_MQ_FLAG_SENDSYNC; - + psm_error = psm_mq_isend(ompi_mtl_psm.mq, psm_endpoint->peer_addr, flags, @@ -123,6 +126,6 @@ ompi_mtl_psm_isend(struct mca_mtl_base_module_t* mtl, length, mtl_psm_request, &mtl_psm_request->psm_request); - + return psm_error == PSM_OK ? OMPI_SUCCESS : OMPI_ERROR; } diff --git a/ompi/mca/mtl/psm/mtl_psm_types.h b/ompi/mca/mtl/psm/mtl_psm_types.h index 8516b688e97..9c31f8bd041 100644 --- a/ompi/mca/mtl/psm/mtl_psm_types.h +++ b/ompi/mca/mtl/psm/mtl_psm_types.h @@ -27,27 +27,27 @@ #include "ompi/mca/mtl/mtl.h" #include "ompi/mca/mtl/base/base.h" -#include "mtl_psm_endpoint.h" +#include "mtl_psm_endpoint.h" #include "psm.h" BEGIN_C_DECLS -/** +/** * MTL Module Interface */ -struct mca_mtl_psm_module_t { +struct mca_mtl_psm_module_t { mca_mtl_base_module_t super; /**< base MTL interface */ int32_t connect_timeout; - + int32_t debug_level; int32_t ib_unit; int32_t ib_port; int32_t ib_service_level; uint64_t ib_pkey; - + #if PSM_VERNO >= 0x010d unsigned long long ib_service_id; /* use int instead of psm_path_res_t so we can register this with @@ -59,19 +59,19 @@ struct mca_mtl_psm_module_t { psm_mq_t mq; psm_epid_t epid; psm_epaddr_t epaddr; -}; +}; typedef struct mca_mtl_psm_module_t mca_mtl_psm_module_t; extern mca_mtl_psm_module_t ompi_mtl_psm; -struct mca_mtl_psm_component_t { - mca_mtl_base_component_2_0_0_t super; /**< base MTL component */ +struct mca_mtl_psm_component_t { + mca_mtl_base_component_2_0_0_t super; /**< base MTL component */ }; typedef struct mca_mtl_psm_component_t mca_mtl_psm_component_t; OMPI_DECLSPEC extern mca_mtl_psm_component_t mca_mtl_psm_component; - + #define PSM_MAKE_MQTAG(ctxt,rank,utag) \ ( (((ctxt)&0xffffULL)<<48)| (((rank)&0xffffULL)<<32)| \ (((utag)&0xffffffffULL)) ) diff --git a/ompi/mca/mtl/psm2/Makefile.am b/ompi/mca/mtl/psm2/Makefile.am new file mode 100644 index 00000000000..fa3c5201bb6 --- /dev/null +++ b/ompi/mca/mtl/psm2/Makefile.am @@ -0,0 +1,61 @@ +# +# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana +# University Research and Technology +# Corporation. All rights reserved. +# Copyright (c) 2004-2005 The University of Tennessee and The University +# of Tennessee Research Foundation. All rights +# reserved. +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# University of Stuttgart. All rights reserved. +# Copyright (c) 2004-2006 The Regents of the University of California. +# All rights reserved. +# Copyright (c) 2010 Cisco Systems, Inc. All rights reserved. +# Copyright (c) 2015 Intel, Inc. All rights reserved +# $COPYRIGHT$ +# +# Additional copyrights may follow +# +# $HEADER$ +# + +EXTRA_DIST = post_configure.sh + +AM_CPPFLAGS = $(mtl_psm2_CPPFLAGS) + +dist_ompidata_DATA = help-mtl-psm2.txt + +mtl_psm2_sources = \ + mtl_psm2.c \ + mtl_psm2.h \ + mtl_psm2_cancel.c \ + mtl_psm2_component.c \ + mtl_psm2_endpoint.c \ + mtl_psm2_endpoint.h \ + mtl_psm2_probe.c \ + mtl_psm2_recv.c \ + mtl_psm2_request.h \ + mtl_psm2_send.c \ + mtl_psm2_types.h + +# Make the output library in this directory, and name it either +# mca__.la (for DSO builds) or libmca__.la +# (for static builds). + +if MCA_BUILD_ompi_mtl_psm2_DSO +component_noinst = +component_install = mca_mtl_psm2.la +else +component_noinst = libmca_mtl_psm2.la +component_install = +endif + +mcacomponentdir = $(ompilibdir) +mcacomponent_LTLIBRARIES = $(component_install) +mca_mtl_psm2_la_SOURCES = $(mtl_psm2_sources) +mca_mtl_psm2_la_LIBADD = $(mtl_psm2_LIBS) +mca_mtl_psm2_la_LDFLAGS = -module -avoid-version $(mtl_psm2_LDFLAGS) + +noinst_LTLIBRARIES = $(component_noinst) +libmca_mtl_psm2_la_SOURCES = $(mtl_psm2_sources) +libmca_mtl_psm2_la_LIBADD = $(mtl_psm2_LIBS) +libmca_mtl_psm2_la_LDFLAGS = -module -avoid-version $(mtl_psm2_LDFLAGS) diff --git a/ompi/mca/mtl/psm2/configure.m4 b/ompi/mca/mtl/psm2/configure.m4 new file mode 100644 index 00000000000..c72c5fd03f6 --- /dev/null +++ b/ompi/mca/mtl/psm2/configure.m4 @@ -0,0 +1,49 @@ +# -*- shell-script -*- +# +# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana +# University Research and Technology +# Corporation. All rights reserved. +# Copyright (c) 2004-2005 The University of Tennessee and The University +# of Tennessee Research Foundation. All rights +# reserved. +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# University of Stuttgart. All rights reserved. +# Copyright (c) 2004-2005 The Regents of the University of California. +# All rights reserved. +# Copyright (c) 2010 Cisco Systems, Inc. All rights reserved. +# Copyright (c) 2013 Sandia National Laboratories. All rights reserved. +# Copyright (c) 2014 Intel Corporation. All rights reserved. +# $COPYRIGHT$ +# +# Additional copyrights may follow +# +# $HEADER$ +# + +# MCA_ompi_mtl_psm2_POST_CONFIG(will_build) +# ---------------------------------------- +# Only require the tag if we're actually going to be built +AC_DEFUN([MCA_ompi_mtl_psm2_POST_CONFIG], [ + AS_IF([test "$1" = "1"], [OMPI_REQUIRE_ENDPOINT_TAG([MTL])]) +])dnl + +# MCA_mtl_psm2_CONFIG([action-if-can-compile], +# [action-if-cant-compile]) +# ------------------------------------------------ +AC_DEFUN([MCA_ompi_mtl_psm2_CONFIG],[ + AC_CONFIG_FILES([ompi/mca/mtl/psm2/Makefile]) + + OMPI_CHECK_PSM2([mtl_psm2], + [mtl_psm2_happy="yes"], + [mtl_psm2_happy="no"]) + + AS_IF([test "$mtl_psm2_happy" = "yes"], + [$1], + [$2]) + + # substitute in the things needed to build psm2 + AC_SUBST([mtl_psm2_CFLAGS]) + AC_SUBST([mtl_psm2_CPPFLAGS]) + AC_SUBST([mtl_psm2_LDFLAGS]) + AC_SUBST([mtl_psm2_LIBS]) +])dnl diff --git a/ompi/mca/mtl/psm2/help-mtl-psm2.txt b/ompi/mca/mtl/psm2/help-mtl-psm2.txt new file mode 100644 index 00000000000..16c5116a2f9 --- /dev/null +++ b/ompi/mca/mtl/psm2/help-mtl-psm2.txt @@ -0,0 +1,44 @@ +# -*- text -*- +# +# Copyright (C) 2009. QLogic Corporation. All rights reserved. +# Copyright (c) 2013-2015 Intel, Inc. All rights reserved. +# $COPYRIGHT$ +# +# Additional copyrights may follow +# +# $HEADER$ +# +[psm2 init] +Initialization of PSM2 library failed. + + Error: %s +# +[debug level] +Unable to set PSM2 debug level. + + Error: %s +# +[unable to open endpoint] +PSM2 was unable to open an endpoint. Please make sure that the network link is +active on the node and the hardware is functioning. + + Error: %s +# +[no uuid present] +Error obtaining unique transport key from ORTE (orte_precondition_transports %s +the environment). + + Local host: %s +# +[error polling network] +Error %s occurred in attempting to make network progress (psm2_mq_ipeek). +# +[error posting receive] +Unable to post application receive buffer (psm2_mq_irecv or psm2_mq_imrecv). + + Error: %s + Buffer: %p + Length: %d +# +[path query mechanism unknown] +Unknown path record query mechanism %s. Supported mechanisms are %s. diff --git a/ompi/mca/mtl/psm2/mtl_psm2.c b/ompi/mca/mtl/psm2/mtl_psm2.c new file mode 100644 index 00000000000..55d0dde4e18 --- /dev/null +++ b/ompi/mca/mtl/psm2/mtl_psm2.c @@ -0,0 +1,463 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ +/* + * Copyright (c) 2004-2006 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2010 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2006 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2006 QLogic Corporation. All rights reserved. + * Copyright (c) 2013-2015 Intel, Inc. All rights reserved + * Copyright (c) 2014 Los Alamos National Security, LLC. All rights + * reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include "ompi_config.h" + +#include "opal/mca/pmix/pmix.h" +#include "ompi/mca/mtl/mtl.h" +#include "ompi/mca/mtl/base/mtl_base_datatype.h" +#include "opal/util/show_help.h" +#include "ompi/proc/proc.h" + +#include "mtl_psm2.h" +#include "mtl_psm2_types.h" +#include "mtl_psm2_endpoint.h" +#include "mtl_psm2_request.h" + +mca_mtl_psm2_module_t ompi_mtl_psm2 = { + .super = { + /* NTH: PSM2 supports 16 bit context ids */ + .mtl_max_contextid = (1UL << 16) - 1, + .mtl_max_tag = (1UL << 30), /* must allow negatives */ + + .mtl_add_procs = ompi_mtl_psm2_add_procs, + .mtl_del_procs = ompi_mtl_psm2_del_procs, + .mtl_finalize = ompi_mtl_psm2_finalize, + + .mtl_send = ompi_mtl_psm2_send, + .mtl_isend = ompi_mtl_psm2_isend, + + .mtl_irecv = ompi_mtl_psm2_irecv, + .mtl_iprobe = ompi_mtl_psm2_iprobe, + .mtl_imrecv = ompi_mtl_psm2_imrecv, + .mtl_improbe = ompi_mtl_psm2_improbe, + + .mtl_cancel = ompi_mtl_psm2_cancel, + .mtl_add_comm = ompi_mtl_psm2_add_comm, + .mtl_del_comm = ompi_mtl_psm2_del_comm + } +}; + +static +psm2_error_t +ompi_mtl_psm2_errhandler(psm2_ep_t ep, const psm2_error_t error, + const char *error_string, psm2_error_token_t token) +{ + switch (error) { + /* We don't want PSM2 to default to exiting when the following errors occur */ + case PSM2_EP_DEVICE_FAILURE: + case PSM2_EP_NO_DEVICE: + case PSM2_EP_NO_PORTS_AVAIL: + case PSM2_EP_NO_NETWORK: + case PSM2_EP_INVALID_UUID_KEY: + opal_show_help("help-mtl-psm2.txt", + "unable to open endpoint", true, + psm2_error_get_string(error)); + break; + + /* We can't handle any other errors than the ones above */ + default: + opal_output(0, "Open MPI detected an unexpected PSM2 error in opening " + "an endpoint: %s\n", error_string); + return psm2_error_defer(token); + break; + } + return error; +} + +int ompi_mtl_psm2_progress( void ); + +int ompi_mtl_psm2_module_init(int local_rank, int num_local_procs) { + psm2_error_t err; + psm2_ep_t ep; /* endpoint handle */ + psm2_mq_t mq; + psm2_epid_t epid; /* unique lid+port identifier */ + psm2_uuid_t unique_job_key; + struct psm2_ep_open_opts ep_opt; + unsigned long long *uu = (unsigned long long *) unique_job_key; + char *generated_key; + char env_string[256]; + int rc; + + generated_key = getenv("OMPI_MCA_orte_precondition_transports"); + memset(uu, 0, sizeof(psm2_uuid_t)); + + if (!generated_key || (strlen(generated_key) != 33) || + sscanf(generated_key, "%016llx-%016llx", &uu[0], &uu[1]) != 2) + { + opal_show_help("help-mtl-psm2.txt", + "no uuid present", true, + generated_key ? "could not be parsed from" : + "not present in", ompi_process_info.nodename); + return OMPI_ERROR; + + } + + /* Handle our own errors for opening endpoints */ + psm2_error_register_handler(ompi_mtl_psm2.ep, ompi_mtl_psm2_errhandler); + + /* Setup MPI_LOCALRANKID and MPI_LOCALNRANKS so PSM2 can allocate hardware + * contexts correctly. + */ + snprintf(env_string, sizeof(env_string), "%d", local_rank); + setenv("MPI_LOCALRANKID", env_string, 0); + snprintf(env_string, sizeof(env_string), "%d", num_local_procs); + setenv("MPI_LOCALNRANKS", env_string, 0); + + /* Setup the endpoint options. */ + psm2_ep_open_opts_get_defaults(&ep_opt); + ep_opt.timeout = ompi_mtl_psm2.connect_timeout * 1e9; + ep_opt.affinity = PSM2_EP_OPEN_AFFINITY_SKIP; /* do not let PSM2 set affinity */ + + /* Open PSM2 endpoint */ + err = psm2_ep_open(unique_job_key, &ep_opt, &ep, &epid); + if (err) { + opal_show_help("help-mtl-psm2.txt", + "unable to open endpoint", true, + psm2_error_get_string(err)); + return OMPI_ERROR; + } + + /* Future errors are handled by the default error handler */ + psm2_error_register_handler(ompi_mtl_psm2.ep, PSM2_ERRHANDLER_DEFAULT); + + err = psm2_mq_init(ep, + 0xffff000000000000ULL, + NULL, + 0, + &mq); + if (err) { + opal_show_help("help-mtl-psm2.txt", + "psm2 init", true, + psm2_error_get_string(err)); + return OMPI_ERROR; + } + + ompi_mtl_psm2.ep = ep; + ompi_mtl_psm2.epid = epid; + ompi_mtl_psm2.mq = mq; + + OPAL_MODEX_SEND(rc, OPAL_PMIX_GLOBAL, + &mca_mtl_psm2_component.super.mtl_version, + &ompi_mtl_psm2.epid, + sizeof(psm2_epid_t)); + + if (OMPI_SUCCESS != rc) { + opal_output(0, "Open MPI couldn't send PSM2 epid to head node process"); + return OMPI_ERROR; + } + + + /* register the psm2 progress function */ + opal_progress_register(ompi_mtl_psm2_progress); + + return OMPI_SUCCESS; +} + +int +ompi_mtl_psm2_finalize(struct mca_mtl_base_module_t* mtl) { + psm2_error_t err; + + opal_progress_unregister(ompi_mtl_psm2_progress); + + /* free resources */ + err = psm2_mq_finalize(ompi_mtl_psm2.mq); + if (err) { + opal_output(0, "Error in psm2_mq_finalize (error %s)\n", + psm2_error_get_string(err)); + return OMPI_ERROR; + } + + err = psm2_ep_close(ompi_mtl_psm2.ep, PSM2_EP_CLOSE_GRACEFUL, 1*1e9); + if (err) { + opal_output(0, "Error in psm2_ep_close (error %s)\n", + psm2_error_get_string(err)); + return OMPI_ERROR; + } + + err = psm2_finalize(); + if (err) { + opal_output(0, "Error in psm2_finalize (error %s)\n", + psm2_error_get_string(err)); + return OMPI_ERROR; + } + + return OMPI_SUCCESS; +} + +static +const char * +ompi_mtl_psm2_connect_error_msg(psm2_error_t err) +{ + switch (err) { /* See if we expect the error */ + case PSM2_EPID_UNREACHABLE: + case PSM2_EPID_INVALID_NODE: + case PSM2_EPID_INVALID_MTU: + case PSM2_EPID_INVALID_UUID_KEY: + case PSM2_EPID_INVALID_VERSION: + case PSM2_EPID_INVALID_CONNECT: + return psm2_error_get_string(err); + break; + case PSM2_EPID_UNKNOWN: + return "Connect status could not be determined " + "because of other errors"; + default: + return NULL; + } +} + +#ifndef min +# define min(a,b) ((a) < (b) ? (a) : (b)) +#endif + +#ifndef max +# define max(a,b) ((a) > (b) ? (a) : (b)) +#endif + +int +ompi_mtl_psm2_add_procs(struct mca_mtl_base_module_t *mtl, + size_t nprocs, + struct ompi_proc_t** procs) +{ + int i,j; + int rc; + psm2_epid_t *epids_in = NULL; + int *mask_in = NULL; + psm2_epid_t *epid; + psm2_epaddr_t *epaddrs_out = NULL; + psm2_error_t *errs_out = NULL, err; + size_t size; + int proc_errors[PSM2_ERROR_LAST] = { 0 }; + int timeout_in_secs; + + assert(mtl == &ompi_mtl_psm2.super); + rc = OMPI_ERR_OUT_OF_RESOURCE; + + errs_out = (psm2_error_t *) malloc(nprocs * sizeof(psm2_error_t)); + if (errs_out == NULL) { + goto bail; + } + epids_in = (psm2_epid_t *) malloc(nprocs * sizeof(psm2_epid_t)); + if (epids_in == NULL) { + goto bail; + } + mask_in = (int *) malloc(nprocs * sizeof(int)); + if (mask_in == NULL) { + goto bail; + } + epaddrs_out = (psm2_epaddr_t *) malloc(nprocs * sizeof(psm2_epaddr_t)); + if (epaddrs_out == NULL) { + goto bail; + } + rc = OMPI_SUCCESS; + + /* Get the epids for all the processes from modex */ + for (i = 0; i < (int) nprocs; i++) { + if (NULL != procs[i]->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_MTL]) { + /* Already connected: don't connect again */ + mask_in[i] = 0; + continue; + } + + OPAL_MODEX_RECV(rc, &mca_mtl_psm2_component.super.mtl_version, + &procs[i]->super.proc_name, (void**)&epid, &size); + if (rc != OMPI_SUCCESS || size != sizeof(psm2_epid_t)) { + return OMPI_ERROR; + } + epids_in[i] = *epid; + mask_in[i] = 1; + } + + timeout_in_secs = max(ompi_mtl_psm2.connect_timeout, 0.5 * nprocs); + + psm2_error_register_handler(ompi_mtl_psm2.ep, PSM2_ERRHANDLER_NOP); + + err = psm2_ep_connect(ompi_mtl_psm2.ep, + nprocs, + epids_in, + mask_in, + errs_out, + epaddrs_out, + timeout_in_secs * 1e9); + if (err) { + char *errstr = (char *) ompi_mtl_psm2_connect_error_msg(err); + if (errstr == NULL) { + opal_output(0, "PSM2 returned unhandled/unknown connect error: %s\n", + psm2_error_get_string(err)); + } + for (i = 0; i < (int) nprocs; i++) { + if (0 == mask_in[i]) { + continue; + } + + psm2_error_t thiserr = errs_out[i]; + errstr = (char *) ompi_mtl_psm2_connect_error_msg(thiserr); + if (proc_errors[thiserr] == 0) { + proc_errors[thiserr] = 1; + opal_output(0, "PSM2 EP connect error (%s):", + errstr ? errstr : "unknown connect error"); + for (j = 0; j < (int) nprocs; j++) { + if (errs_out[j] == thiserr) { + opal_output(0, " %s", (NULL == procs[j]->super.proc_hostname) ? + "unknown" : procs[j]->super.proc_hostname); + } + } + opal_output(0, "\n"); + } + } + + rc = OMPI_ERROR; + } + else { + /* Default error handling is enabled, errors will not be returned to + * user. PSM2 prints the error and the offending endpoint's hostname + * and exits with -1 */ + psm2_error_register_handler(ompi_mtl_psm2.ep, PSM2_ERRHANDLER_DEFAULT); + + /* Fill in endpoint data */ + for (i = 0; i < (int) nprocs; i++) { + if (0 == mask_in[i]) { + continue; + } + + mca_mtl_psm2_endpoint_t *endpoint = + (mca_mtl_psm2_endpoint_t *) OBJ_NEW(mca_mtl_psm2_endpoint_t); + endpoint->peer_epid = epids_in[i]; + endpoint->peer_addr = epaddrs_out[i]; + procs[i]->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_MTL] = endpoint; + } + + rc = OMPI_SUCCESS; + } + +bail: + if (epids_in != NULL) { + free(epids_in); + } + if (mask_in != NULL) { + free(mask_in); + } + if (errs_out != NULL) { + free(errs_out); + } + if (epaddrs_out != NULL) { + free(epaddrs_out); + } + + return rc; +} + +int +ompi_mtl_psm2_del_procs(struct mca_mtl_base_module_t *mtl, + size_t nprocs, + struct ompi_proc_t** procs) +{ + return OMPI_SUCCESS; +} + + +int +ompi_mtl_psm2_add_comm(struct mca_mtl_base_module_t *mtl, + struct ompi_communicator_t *comm) +{ + return OMPI_SUCCESS; +} + + +int +ompi_mtl_psm2_del_comm(struct mca_mtl_base_module_t *mtl, + struct ompi_communicator_t *comm) +{ + return OMPI_SUCCESS; +} + + +int ompi_mtl_psm2_progress( void ) { + psm2_error_t err; + mca_mtl_psm2_request_t* mtl_psm2_request; + psm2_mq_status2_t psm2_status; + psm2_mq_req_t req; + int completed = 1; + + do { + err = psm2_mq_ipeek2(ompi_mtl_psm2.mq, &req, NULL); + if (err == PSM2_MQ_INCOMPLETE) { + return completed; + } else if (err != PSM2_OK) { + goto error; + } + + completed++; + + err = psm2_mq_test2(&req, &psm2_status); + if (err != PSM2_OK) { + goto error; + } + + mtl_psm2_request = (mca_mtl_psm2_request_t*) psm2_status.context; + + if (mtl_psm2_request->type == OMPI_mtl_psm2_IRECV) { + + mtl_psm2_request->super.ompi_req->req_status.MPI_SOURCE = + psm2_status.msg_tag.tag1; + mtl_psm2_request->super.ompi_req->req_status.MPI_TAG = + psm2_status.msg_tag.tag0; + mtl_psm2_request->super.ompi_req->req_status._ucount = + psm2_status.nbytes; + + ompi_mtl_datatype_unpack(mtl_psm2_request->convertor, + mtl_psm2_request->buf, + psm2_status.msg_length); + } + + if(mtl_psm2_request->type == OMPI_mtl_psm2_ISEND) { + if (mtl_psm2_request->free_after) { + free(mtl_psm2_request->buf); + } + } + + switch (psm2_status.error_code) { + case PSM2_OK: + mtl_psm2_request->super.ompi_req->req_status.MPI_ERROR = + OMPI_SUCCESS; + break; + case PSM2_MQ_TRUNCATION: + mtl_psm2_request->super.ompi_req->req_status.MPI_ERROR = + MPI_ERR_TRUNCATE; + break; + default: + mtl_psm2_request->super.ompi_req->req_status.MPI_ERROR = + MPI_ERR_INTERN; + } + + mtl_psm2_request->super.completion_callback(&mtl_psm2_request->super); + + } + while (1); + + error: + opal_show_help("help-mtl-psm2.txt", + "error polling network", true, + psm2_error_get_string(err)); + return 1; +} diff --git a/ompi/mca/mtl/psm2/mtl_psm2.h b/ompi/mca/mtl/psm2/mtl_psm2.h new file mode 100644 index 00000000000..44152656bf2 --- /dev/null +++ b/ompi/mca/mtl/psm2/mtl_psm2.h @@ -0,0 +1,110 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ +/* + * Copyright (c) 2004-2006 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2005 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2006 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2006 QLogic Corporation. All rights reserved. + * Copyright (c) 2015 Intel, Inc. All rights reserved + * Copyright (c) 2015 Los Alamos National Security, LLC. All rights + * reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#ifndef MTL_PSM2_H_HAS_BEEN_INCLUDED +#define MTL_PSM2_H_HAS_BEEN_INCLUDED + +#include "ompi/mca/pml/pml.h" +#include "ompi/mca/mtl/mtl.h" +#include "ompi/mca/mtl/base/base.h" +#include "ompi/proc/proc.h" +#include "opal/datatype/opal_convertor.h" +#include +#include + +BEGIN_C_DECLS + + +/* MTL interface functions */ +extern int ompi_mtl_psm2_add_procs(struct mca_mtl_base_module_t* mtl, + size_t nprocs, + struct ompi_proc_t** procs); + +extern int ompi_mtl_psm2_del_procs(struct mca_mtl_base_module_t* mtl, + size_t nprocs, + struct ompi_proc_t** procs); + +int +ompi_mtl_psm2_send(struct mca_mtl_base_module_t* mtl, + struct ompi_communicator_t* comm, + int dest, + int tag, + struct opal_convertor_t *convertor, + mca_pml_base_send_mode_t mode); + +extern int ompi_mtl_psm2_isend(struct mca_mtl_base_module_t* mtl, + struct ompi_communicator_t* comm, + int dest, + int tag, + struct opal_convertor_t *convertor, + mca_pml_base_send_mode_t mode, + bool blocking, + mca_mtl_request_t * mtl_request); + +extern int ompi_mtl_psm2_irecv(struct mca_mtl_base_module_t* mtl, + struct ompi_communicator_t *comm, + int src, + int tag, + struct opal_convertor_t *convertor, + struct mca_mtl_request_t *mtl_request); + + +extern int ompi_mtl_psm2_iprobe(struct mca_mtl_base_module_t* mtl, + struct ompi_communicator_t *comm, + int src, + int tag, + int *flag, + struct ompi_status_public_t *status); + +extern int ompi_mtl_psm2_imrecv(struct mca_mtl_base_module_t* mtl, + struct opal_convertor_t *convertor, + struct ompi_message_t **message, + struct mca_mtl_request_t *mtl_request); + +extern int ompi_mtl_psm2_improbe(struct mca_mtl_base_module_t *mtl, + struct ompi_communicator_t *comm, + int src, + int tag, + int *matched, + struct ompi_message_t **message, + struct ompi_status_public_t *status); + +extern int ompi_mtl_psm2_cancel(struct mca_mtl_base_module_t* mtl, + struct mca_mtl_request_t *mtl_request, + int flag); + +extern int ompi_mtl_psm2_add_comm(struct mca_mtl_base_module_t *mtl, + struct ompi_communicator_t *comm); + +extern int ompi_mtl_psm2_del_comm(struct mca_mtl_base_module_t *mtl, + struct ompi_communicator_t *comm); + +extern int ompi_mtl_psm2_finalize(struct mca_mtl_base_module_t* mtl); + +int ompi_mtl_psm2_module_init(int local_rank, int num_local_procs); + + + +END_C_DECLS + +#endif /* MTL_PSM2_H_HAS_BEEN_INCLUDED */ diff --git a/ompi/mca/mtl/psm2/mtl_psm2_cancel.c b/ompi/mca/mtl/psm2/mtl_psm2_cancel.c new file mode 100644 index 00000000000..22a8f827c83 --- /dev/null +++ b/ompi/mca/mtl/psm2/mtl_psm2_cancel.c @@ -0,0 +1,55 @@ +/* + * Copyright (c) 2004-2006 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2005 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2006 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2006 QLogic Corporation. All rights reserved. + * Copyright (c) 2015 Intel, Inc. All rights reserved + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include "ompi_config.h" +#include "mtl_psm2.h" +#include "mtl_psm2_request.h" + +int ompi_mtl_psm2_cancel(struct mca_mtl_base_module_t* mtl, + struct mca_mtl_request_t *mtl_request, + int flag) { + + psm2_error_t err; + psm2_mq_status_t status; + + mca_mtl_psm2_request_t *mtl_psm2_request = + (mca_mtl_psm2_request_t*) mtl_request; + + /* PSM2 does not support canceling sends */ + if(OMPI_mtl_psm2_ISEND == mtl_psm2_request->type) { + return OMPI_SUCCESS; + } + + err = psm2_mq_cancel(&mtl_psm2_request->psm2_request); + if(PSM2_OK == err) { + err = psm2_mq_test(&mtl_psm2_request->psm2_request, &status); + if(PSM2_OK == err) { + mtl_request->ompi_req->req_status._cancelled = true; + mtl_psm2_request->super.completion_callback(&mtl_psm2_request->super); + return OMPI_SUCCESS; + } else { + return OMPI_ERROR; + } + } else if(PSM2_MQ_INCOMPLETE == err) { + return OMPI_SUCCESS; + } + + return OMPI_ERROR; +} diff --git a/ompi/mca/mtl/psm2/mtl_psm2_component.c b/ompi/mca/mtl/psm2/mtl_psm2_component.c new file mode 100644 index 00000000000..26bccd22049 --- /dev/null +++ b/ompi/mca/mtl/psm2/mtl_psm2_component.c @@ -0,0 +1,253 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ +/* + * Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2005 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2006-2010 QLogic Corporation. All rights reserved. + * Copyright (c) 2012-2015 Los Alamos National Security, LLC. + * All rights reserved. + * Copyright (c) 2013-2016 Intel, Inc. All rights reserved + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include "ompi_config.h" + +#include "opal/mca/event/event.h" +#include "opal/util/output.h" +#include "opal/util/show_help.h" +#include "ompi/proc/proc.h" + +#include "mtl_psm2.h" +#include "mtl_psm2_types.h" +#include "mtl_psm2_request.h" + +#include "psm2.h" + +#include +#include +#include +#include + +static int param_priority; + +static int ompi_mtl_psm2_component_open(void); +static int ompi_mtl_psm2_component_close(void); +static int ompi_mtl_psm2_component_query(mca_base_module_t **module, int *priority); +static int ompi_mtl_psm2_component_register(void); + +static mca_mtl_base_module_t* ompi_mtl_psm2_component_init( bool enable_progress_threads, + bool enable_mpi_threads ); + +mca_mtl_psm2_component_t mca_mtl_psm2_component = { + + { + /* First, the mca_base_component_t struct containing meta + * information about the component itself */ + + .mtl_version = { + MCA_MTL_BASE_VERSION_2_0_0, + + .mca_component_name = "psm2", + MCA_BASE_MAKE_VERSION(component, OMPI_MAJOR_VERSION, OMPI_MINOR_VERSION, + OMPI_RELEASE_VERSION), + .mca_open_component = ompi_mtl_psm2_component_open, + .mca_close_component = ompi_mtl_psm2_component_close, + .mca_query_component = ompi_mtl_psm2_component_query, + .mca_register_component_params = ompi_mtl_psm2_component_register, + }, + .mtl_data = { + /* The component is not checkpoint ready */ + MCA_BASE_METADATA_PARAM_NONE + }, + + .mtl_init = ompi_mtl_psm2_component_init, + } +}; + +static int +ompi_mtl_psm2_component_register(void) +{ + ompi_mtl_psm2.connect_timeout = 180; + (void) mca_base_component_var_register(&mca_mtl_psm2_component.super.mtl_version, + "connect_timeout", + "PSM2 connection timeout value in seconds", + MCA_BASE_VAR_TYPE_INT, NULL, 0, 0, + OPAL_INFO_LVL_9, + MCA_BASE_VAR_SCOPE_READONLY, + &ompi_mtl_psm2.connect_timeout); + + /* set priority high enough to beat ob1's default (also set higher than psm) */ + param_priority = 40; + (void) mca_base_component_var_register (&mca_mtl_psm2_component.super.mtl_version, + "priority", "Priority of the PSM2 MTL component", + MCA_BASE_VAR_TYPE_INT, NULL, 0, 0, + OPAL_INFO_LVL_9, + MCA_BASE_VAR_SCOPE_READONLY, + ¶m_priority); + + return OMPI_SUCCESS; +} + +static int +ompi_mtl_psm2_component_open(void) +{ + glob_t globbuf; + globbuf.gl_offs = 0; + + /* Component available only if Omni-Path hardware is present */ + if ((glob("/dev/hfi1_[0-9]", GLOB_DOOFFS, NULL, &globbuf) != 0) && + (glob("/dev/hfi1_[0-9][0-9]", GLOB_APPEND, NULL, &globbuf) != 0)) { + return OPAL_ERR_NOT_AVAILABLE; + } + + globfree(&globbuf); + + /* Component available only if at least one hfi1 port is ACTIVE */ + bool foundOnlineHfi1Port = false; + size_t i; + char portState[128]; + FILE *devFile; + if (glob("/sys/class/infiniband/hfi1_*/ports/*/state", + GLOB_DOOFFS, NULL, &globbuf) != 0) { + return OPAL_ERR_NOT_AVAILABLE; + } + + for (i=0;i < globbuf.gl_pathc; i++) { + devFile = fopen(globbuf.gl_pathv[i], "r"); + fgets(portState, sizeof(portState), devFile); + fclose(devFile); + + if (strstr(portState, "ACTIVE") != NULL) { + /* Found at least one ACTIVE port */ + foundOnlineHfi1Port = true; + break; + } + } + + globfree(&globbuf); + + if (!foundOnlineHfi1Port) { + return OPAL_ERR_NOT_AVAILABLE; + } + + return OMPI_SUCCESS; +} + +static int +ompi_mtl_psm2_component_query(mca_base_module_t **module, int *priority) +{ + /* + * if we get here it means that PSM2 is available so give high priority + */ + + *priority = param_priority; + *module = (mca_base_module_t *)&ompi_mtl_psm2.super; + return OMPI_SUCCESS; +} + +static int +ompi_mtl_psm2_component_close(void) +{ + return OMPI_SUCCESS; +} + +static int +get_num_total_procs(int *out_ntp) +{ + *out_ntp = (int)ompi_process_info.num_procs; + return OMPI_SUCCESS; +} + +static int +get_num_local_procs(int *out_nlp) +{ + /* num_local_peers does not include us in + * its calculation, so adjust for that */ + *out_nlp = (int)(1 + ompi_process_info.num_local_peers); + return OMPI_SUCCESS; +} + +static int +get_local_rank(int *out_rank) +{ + ompi_node_rank_t my_node_rank; + + *out_rank = 0; + + if (OMPI_NODE_RANK_INVALID == (my_node_rank = + ompi_process_info.my_node_rank)) { + return OMPI_ERROR; + } + *out_rank = (int)my_node_rank; + return OMPI_SUCCESS; +} + +static mca_mtl_base_module_t * +ompi_mtl_psm2_component_init(bool enable_progress_threads, + bool enable_mpi_threads) +{ + psm2_error_t err; + int verno_major = PSM2_VERNO_MAJOR; + int verno_minor = PSM2_VERNO_MINOR; + int local_rank = -1, num_local_procs = 0; + int num_total_procs = 0; + + /* Compute the total number of processes on this host and our local rank + * on that node. We need to provide PSM2 with these values so it can + * allocate hardware contexts appropriately across processes. + */ + if (OMPI_SUCCESS != get_num_local_procs(&num_local_procs)) { + opal_output(0, "Cannot determine number of local processes. " + "Cannot continue.\n"); + return NULL; + } + if (OMPI_SUCCESS != get_local_rank(&local_rank)) { + opal_output(0, "Cannot determine local rank. Cannot continue.\n"); + return NULL; + } + if (OMPI_SUCCESS != get_num_total_procs(&num_total_procs)) { + opal_output(0, "Cannot determine total number of processes. " + "Cannot continue.\n"); + return NULL; + } + + err = psm2_error_register_handler(NULL /* no ep */, + PSM2_ERRHANDLER_NOP); + if (err) { + opal_output(0, "Error in psm2_error_register_handler (error %s)\n", + psm2_error_get_string(err)); + return NULL; + } + + if (num_local_procs == num_total_procs) { + setenv("PSM2_DEVICES", "self,shm", 0); + } + + err = psm2_init(&verno_major, &verno_minor); + if (err) { + opal_show_help("help-mtl-psm2.txt", + "psm2 init", true, + psm2_error_get_string(err)); + return NULL; + } + + /* Complete PSM2 initialization */ + ompi_mtl_psm2_module_init(local_rank, num_local_procs); + + ompi_mtl_psm2.super.mtl_request_size = + sizeof(mca_mtl_psm2_request_t) - + sizeof(struct mca_mtl_request_t); + + return &ompi_mtl_psm2.super; +} diff --git a/ompi/mca/mtl/psm2/mtl_psm2_endpoint.c b/ompi/mca/mtl/psm2/mtl_psm2_endpoint.c new file mode 100644 index 00000000000..76d17815f8f --- /dev/null +++ b/ompi/mca/mtl/psm2/mtl_psm2_endpoint.c @@ -0,0 +1,54 @@ +/* + * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2005 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2006 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2006 QLogic Corporation. All rights reserved. + * Copyright (c) 2015 Intel, Inc. All rights reserved + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + + +#include "ompi_config.h" +#include +#include +#include "ompi/types.h" +#include "mtl_psm2.h" +#include "mtl_psm2_types.h" +#include "mtl_psm2_endpoint.h" + +/* + * Initialize state of the endpoint instance. + * + */ + +static void mca_mtl_psm2_endpoint_construct(mca_mtl_psm2_endpoint_t* endpoint) +{ + endpoint->mtl_psm2_module = NULL; +} + +/* + * Destroy a endpoint + * + */ + +static void mca_mtl_psm2_endpoint_destruct(mca_mtl_psm2_endpoint_t* endpoint) +{ +} + + +OBJ_CLASS_INSTANCE( + mca_mtl_psm2_endpoint_t, + opal_list_item_t, + mca_mtl_psm2_endpoint_construct, + mca_mtl_psm2_endpoint_destruct); diff --git a/ompi/mca/mtl/psm2/mtl_psm2_endpoint.h b/ompi/mca/mtl/psm2/mtl_psm2_endpoint.h new file mode 100644 index 00000000000..d90ca227d98 --- /dev/null +++ b/ompi/mca/mtl/psm2/mtl_psm2_endpoint.h @@ -0,0 +1,68 @@ +/* + * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2005 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2006 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2006 QLogic Corporation. All rights reserved. + * Copyright (c) 2015 Intel, Inc. All rights reserved + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#ifndef MCA_MTL_PSM2_ENDPOINT_H +#define MCA_MTL_PSM2_ENDPOINT_H + +#include "opal/class/opal_list.h" +#include "opal/mca/event/event.h" +#include "ompi/mca/mtl/mtl.h" +#include "mtl_psm2.h" + +#include "psm2.h" + +BEGIN_C_DECLS + +OBJ_CLASS_DECLARATION(mca_mtl_psm2_endpoint_t); + +/** + * An abstraction that represents a connection to a endpoint process. + * An instance of mca_mtl_psm2_endpoint_t is associated w/ each process + * and MTL pair at startup. However, connections to the endpoint + * are established dynamically on an as-needed basis: + */ + +struct mca_mtl_psm2_endpoint_t { + opal_list_item_t super; + + struct mca_mtl_psm2_module_t* mtl_psm2_module; + /**< MTL instance that created this connection */ + + psm2_epid_t peer_epid; + /**< The unique epid for the opened port */ + + psm2_epaddr_t peer_addr; + /**< The connected endpoint handle*/ +}; + +typedef struct mca_mtl_psm2_endpoint_t mca_mtl_psm2_endpoint_t; +OBJ_CLASS_DECLARATION(mca_mtl_psm2_endpoint); + +static inline mca_mtl_psm2_endpoint_t *ompi_mtl_psm2_get_endpoint (struct mca_mtl_base_module_t* mtl, ompi_proc_t *ompi_proc) +{ + if (OPAL_UNLIKELY(NULL == ompi_proc->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_MTL])) { + ompi_mtl_psm2_add_procs (mtl, 1, &ompi_proc); + } + + return ompi_proc->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_MTL]; +} + +END_C_DECLS +#endif diff --git a/ompi/mca/mtl/psm2/mtl_psm2_probe.c b/ompi/mca/mtl/psm2/mtl_psm2_probe.c new file mode 100644 index 00000000000..b81317507be --- /dev/null +++ b/ompi/mca/mtl/psm2/mtl_psm2_probe.c @@ -0,0 +1,134 @@ +/* + * Copyright (c) 2004-2006 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2010 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2006 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2006 QLogic Corporation. All rights reserved. + * Copyright (c) 2006-2007 Los Alamos National Security, LLC. All rights + * reserved. + * Copyright (c) 2013-2015 Intel, Inc. All rights reserved + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include "ompi_config.h" +#include "mtl_psm2.h" +#include "mtl_psm2_types.h" +#include "psm2.h" +#include "ompi/communicator/communicator.h" +#include "ompi/message/message.h" + + +int ompi_mtl_psm2_iprobe(struct mca_mtl_base_module_t* mtl, + struct ompi_communicator_t *comm, + int src, + int tag, + int *flag, + struct ompi_status_public_t *status) +{ + psm2_mq_tag_t mqtag, tagsel; + psm2_mq_status2_t mqstat; + psm2_error_t err; + + PSM2_MAKE_TAGSEL(src, tag, comm->c_contextid, mqtag, tagsel); + + err = psm2_mq_iprobe2(ompi_mtl_psm2.mq, + PSM2_MQ_ANY_ADDR, &mqtag, &tagsel, &mqstat); + if (err == PSM2_OK) { + *flag = 1; + if(MPI_STATUS_IGNORE != status) { + status->MPI_SOURCE = mqstat.msg_tag.tag1; + status->MPI_TAG = mqstat.msg_tag.tag0; + status->_ucount = mqstat.nbytes; + + switch (mqstat.error_code) { + case PSM2_OK: + status->MPI_ERROR = OMPI_SUCCESS; + break; + case PSM2_MQ_TRUNCATION: + status->MPI_ERROR = MPI_ERR_TRUNCATE; + break; + default: + status->MPI_ERROR = MPI_ERR_INTERN; + } + } + + return OMPI_SUCCESS; + } + else if (err == PSM2_MQ_INCOMPLETE) { + *flag = 0; + return OMPI_SUCCESS; + } + else + return OMPI_ERROR; +} + + +int +ompi_mtl_psm2_improbe(struct mca_mtl_base_module_t *mtl, + struct ompi_communicator_t *comm, + int src, + int tag, + int *matched, + struct ompi_message_t **message, + struct ompi_status_public_t *status) +{ + struct ompi_message_t* msg; + psm2_mq_tag_t mqtag, tagsel; + psm2_mq_status2_t mqstat; + psm2_mq_req_t mqreq; + psm2_error_t err; + + PSM2_MAKE_TAGSEL(src, tag, comm->c_contextid, mqtag, tagsel); + + err = psm2_mq_improbe2(ompi_mtl_psm2.mq, + PSM2_MQ_ANY_ADDR, &mqtag, &tagsel, &mqreq, &mqstat); + if (err == PSM2_OK) { + + if(MPI_STATUS_IGNORE != status) { + status->MPI_SOURCE = mqstat.msg_tag.tag1; + status->MPI_TAG = mqstat.msg_tag.tag0; + status->_ucount = mqstat.nbytes; + + switch (mqstat.error_code) { + case PSM2_OK: + status->MPI_ERROR = OMPI_SUCCESS; + break; + case PSM2_MQ_TRUNCATION: + status->MPI_ERROR = MPI_ERR_TRUNCATE; + break; + default: + status->MPI_ERROR = MPI_ERR_INTERN; + } + } + + msg = ompi_message_alloc(); + if(NULL == msg) { + return OMPI_ERR_OUT_OF_RESOURCE; + } + + msg->comm = comm; + msg->req_ptr = mqreq; + msg->peer = mqstat.msg_tag.tag1; + msg->count = mqstat.nbytes; + + *message = msg; + *matched = 1; + return OMPI_SUCCESS; + } else if(err == PSM2_MQ_INCOMPLETE) { + *matched = 0; + *message = MPI_MESSAGE_NULL; + return OMPI_SUCCESS; + } else { + return OMPI_ERROR; + } +} diff --git a/ompi/mca/mtl/psm2/mtl_psm2_recv.c b/ompi/mca/mtl/psm2/mtl_psm2_recv.c new file mode 100644 index 00000000000..a62e3db3bb6 --- /dev/null +++ b/ompi/mca/mtl/psm2/mtl_psm2_recv.c @@ -0,0 +1,124 @@ +/* + * Copyright (c) 2004-2006 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2005 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2006 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2006 QLogic Corporation. All rights reserved. + * Copyright (c) 2013-2015 Intel, Inc. All rights reserved + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + + +#include "ompi_config.h" +#include "ompi/communicator/communicator.h" +#include "ompi/message/message.h" +#include "opal/datatype/opal_convertor.h" +#include "ompi/mca/mtl/base/mtl_base_datatype.h" +#include "opal/util/show_help.h" + +#include "mtl_psm2.h" +#include "mtl_psm2_types.h" +#include "mtl_psm2_request.h" + +int +ompi_mtl_psm2_irecv(struct mca_mtl_base_module_t* mtl, + struct ompi_communicator_t *comm, + int src, + int tag, + struct opal_convertor_t *convertor, + struct mca_mtl_request_t *mtl_request) +{ + int ret; + psm2_error_t err; + mca_mtl_psm2_request_t * mtl_psm2_request = (mca_mtl_psm2_request_t*) mtl_request; + psm2_mq_tag_t mqtag; + psm2_mq_tag_t tagsel; + size_t length; + + ret = ompi_mtl_datatype_recv_buf(convertor, + &mtl_psm2_request->buf, + &length, + &mtl_psm2_request->free_after); + + if (OMPI_SUCCESS != ret) return ret; + + mtl_psm2_request->length = length; + mtl_psm2_request->convertor = convertor; + mtl_psm2_request->type = OMPI_mtl_psm2_IRECV; + + PSM2_MAKE_TAGSEL(src, tag, comm->c_contextid, mqtag, tagsel); + + err = psm2_mq_irecv2(ompi_mtl_psm2.mq, + PSM2_MQ_ANY_ADDR, + &mqtag, + &tagsel, + 0, + mtl_psm2_request->buf, + length, + mtl_psm2_request, + &mtl_psm2_request->psm2_request); + + if (err) { + opal_show_help("help-mtl-psm2.txt", + "error posting receive", true, + psm2_error_get_string(err), + mtl_psm2_request->buf, length); + return OMPI_ERROR; + } + + return OMPI_SUCCESS; +} + + +int +ompi_mtl_psm2_imrecv(struct mca_mtl_base_module_t* mtl, + struct opal_convertor_t *convertor, + struct ompi_message_t **message, + struct mca_mtl_request_t *mtl_request) +{ + mca_mtl_psm2_request_t *mtl_psm2_request = + (mca_mtl_psm2_request_t*) mtl_request; + size_t length; + psm2_error_t err; + int ret; + + mtl_psm2_request->psm2_request = + (psm2_mq_req_t)(*message)->req_ptr; + + ret = ompi_mtl_datatype_recv_buf(convertor, + &mtl_psm2_request->buf, + &length, + &mtl_psm2_request->free_after); + + if (OMPI_SUCCESS != ret) return ret; + + mtl_psm2_request->length = length; + mtl_psm2_request->convertor = convertor; + mtl_psm2_request->type = OMPI_mtl_psm2_IRECV; + + + err = psm2_mq_imrecv(ompi_mtl_psm2.mq, 0, + mtl_psm2_request->buf, length, mtl_psm2_request, + &mtl_psm2_request->psm2_request); + + if(err) { + opal_show_help("help-mtl-psm2.txt", + "error posting receive", true, + psm2_error_get_string(err), + mtl_psm2_request->buf, length); + return OMPI_ERROR; + } + + *message = MPI_MESSAGE_NULL; + return OMPI_SUCCESS; +} diff --git a/ompi/mca/mtl/psm2/mtl_psm2_request.h b/ompi/mca/mtl/psm2/mtl_psm2_request.h new file mode 100644 index 00000000000..7e6410afb37 --- /dev/null +++ b/ompi/mca/mtl/psm2/mtl_psm2_request.h @@ -0,0 +1,44 @@ +/* + * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2005 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2006 QLogic Corporation. All rights reserved. + * Copyright (c) 2015 Intel, Inc. All rights reserved + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#ifndef OMPI_MTL_PSM2_REQUEST_H +#define OMPI_MTL_PSM2_REQUEST_H + +#include "opal/datatype/opal_convertor.h" + + +typedef enum { + OMPI_mtl_psm2_ISEND, + OMPI_mtl_psm2_IRECV +} mca_mtl_psm2_request_type_t; + +struct mca_mtl_psm2_request_t { + struct mca_mtl_request_t super; + mca_mtl_psm2_request_type_t type; + psm2_mq_req_t psm2_request; + /* psm2_segment_t psm2_segment[1]; */ + void *buf; + size_t length; + struct opal_convertor_t *convertor; + bool free_after; +}; +typedef struct mca_mtl_psm2_request_t mca_mtl_psm2_request_t; + +#endif diff --git a/ompi/mca/mtl/psm2/mtl_psm2_send.c b/ompi/mca/mtl/psm2/mtl_psm2_send.c new file mode 100644 index 00000000000..d4ed8136bf6 --- /dev/null +++ b/ompi/mca/mtl/psm2/mtl_psm2_send.c @@ -0,0 +1,129 @@ +/* + * Copyright (c) 2004-2006 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2005 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2006 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2006 QLogic Corporation. All rights reserved. + * Copyright (c) 2013-2015 Intel, Inc. All rights reserved + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include "ompi_config.h" +#include "ompi/mca/pml/pml.h" +#include "ompi/communicator/communicator.h" +#include "opal/datatype/opal_convertor.h" + +#include "mtl_psm2.h" +#include "mtl_psm2_types.h" +#include "mtl_psm2_request.h" +#include "ompi/mca/mtl/base/mtl_base_datatype.h" + +int +ompi_mtl_psm2_send(struct mca_mtl_base_module_t* mtl, + struct ompi_communicator_t* comm, + int dest, + int tag, + struct opal_convertor_t *convertor, + mca_pml_base_send_mode_t mode) +{ + psm2_error_t err; + mca_mtl_psm2_request_t mtl_psm2_request; + psm2_mq_tag_t mqtag; + uint32_t flags = 0; + int ret; + size_t length; + ompi_proc_t* ompi_proc = ompi_comm_peer_lookup( comm, dest ); + mca_mtl_psm2_endpoint_t* psm2_endpoint = ompi_mtl_psm2_get_endpoint (mtl, ompi_proc); + + assert(mtl == &ompi_mtl_psm2.super); + + PSM2_MAKE_MQTAG(comm->c_contextid, comm->c_my_rank, tag, mqtag); + + ret = ompi_mtl_datatype_pack(convertor, + &mtl_psm2_request.buf, + &length, + &mtl_psm2_request.free_after); + + + mtl_psm2_request.length = length; + mtl_psm2_request.convertor = convertor; + mtl_psm2_request.type = OMPI_mtl_psm2_ISEND; + + if (OMPI_SUCCESS != ret) return ret; + + if (mode == MCA_PML_BASE_SEND_SYNCHRONOUS) + flags |= PSM2_MQ_FLAG_SENDSYNC; + + err = psm2_mq_send2(ompi_mtl_psm2.mq, + psm2_endpoint->peer_addr, + flags, + &mqtag, + mtl_psm2_request.buf, + length); + + if (mtl_psm2_request.free_after) { + free(mtl_psm2_request.buf); + } + + return err == PSM2_OK ? OMPI_SUCCESS : OMPI_ERROR; +} + +int +ompi_mtl_psm2_isend(struct mca_mtl_base_module_t* mtl, + struct ompi_communicator_t* comm, + int dest, + int tag, + struct opal_convertor_t *convertor, + mca_pml_base_send_mode_t mode, + bool blocking, + mca_mtl_request_t * mtl_request) +{ + psm2_error_t psm2_error; + psm2_mq_tag_t mqtag; + uint32_t flags = 0; + int ret; + mca_mtl_psm2_request_t * mtl_psm2_request = (mca_mtl_psm2_request_t*) mtl_request; + size_t length; + ompi_proc_t* ompi_proc = ompi_comm_peer_lookup( comm, dest ); + mca_mtl_psm2_endpoint_t* psm2_endpoint = ompi_mtl_psm2_get_endpoint (mtl, ompi_proc); + + assert(mtl == &ompi_mtl_psm2.super); + + PSM2_MAKE_MQTAG(comm->c_contextid, comm->c_my_rank, tag, mqtag); + + + ret = ompi_mtl_datatype_pack(convertor, + &mtl_psm2_request->buf, + &length, + &mtl_psm2_request->free_after); + + mtl_psm2_request->length= length; + mtl_psm2_request->convertor = convertor; + mtl_psm2_request->type = OMPI_mtl_psm2_ISEND; + + if (OMPI_SUCCESS != ret) return ret; + + if (mode == MCA_PML_BASE_SEND_SYNCHRONOUS) + flags |= PSM2_MQ_FLAG_SENDSYNC; + + psm2_error = psm2_mq_isend2(ompi_mtl_psm2.mq, + psm2_endpoint->peer_addr, + flags, + &mqtag, + mtl_psm2_request->buf, + length, + mtl_psm2_request, + &mtl_psm2_request->psm2_request); + + return psm2_error == PSM2_OK ? OMPI_SUCCESS : OMPI_ERROR; +} diff --git a/ompi/mca/mtl/psm2/mtl_psm2_types.h b/ompi/mca/mtl/psm2/mtl_psm2_types.h new file mode 100644 index 00000000000..31f0deb7ca1 --- /dev/null +++ b/ompi/mca/mtl/psm2/mtl_psm2_types.h @@ -0,0 +1,93 @@ +/* + * Copyright (c) 2004-2006 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2007 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2006 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2006 QLogic Corporation. All rights reserved. + * Copyright (c) 2011 Los Alamos National Security, LLC. + * All rights reserved. + * Copyright (c) 2013-2015 Intel, Inc. All rights reserved + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#ifndef MTL_PSM2_TYPES_H_HAS_BEEN_INCLUDED +#define MTL_PSM2_TYPES_H_HAS_BEEN_INCLUDED + +#include "ompi_config.h" +#include "mtl_psm2.h" + +#include "ompi/communicator/communicator.h" + +#include "ompi/mca/mtl/mtl.h" +#include "ompi/mca/mtl/base/base.h" +#include "mtl_psm2_endpoint.h" + +#include "psm2.h" + + +BEGIN_C_DECLS + +/** + * MTL Module Interface + */ +struct mca_mtl_psm2_module_t { + mca_mtl_base_module_t super; /**< base MTL interface */ + + int32_t connect_timeout; + + psm2_ep_t ep; + psm2_mq_t mq; + psm2_epid_t epid; + psm2_epaddr_t epaddr; +}; + +typedef struct mca_mtl_psm2_module_t mca_mtl_psm2_module_t; + +extern mca_mtl_psm2_module_t ompi_mtl_psm2; + +struct mca_mtl_psm2_component_t { + mca_mtl_base_component_2_0_0_t super; /**< base MTL component */ +}; +typedef struct mca_mtl_psm2_component_t mca_mtl_psm2_component_t; + +OMPI_DECLSPEC extern mca_mtl_psm2_component_t mca_mtl_psm2_component; + +#define PSM2_MAKE_MQTAG(ctxt,rank,utag,tag) \ + do { \ + (tag).tag0 = utag; \ + (tag).tag1 = rank; \ + (tag).tag2 = ctxt; \ + } while (0) + +#define PSM2_MAKE_TAGSEL(user_rank, user_tag, user_ctxt, tag, _tagsel) \ + do { \ + (tag).tag0 = user_tag; \ + (tag).tag1 = user_rank; \ + (tag).tag2 = user_ctxt; \ + (_tagsel).tag0 = 0xffffffffULL; \ + (_tagsel).tag1 = 0xffffffffULL; \ + (_tagsel).tag2 = 0xffffffffULL; \ + if((user_tag) == MPI_ANY_TAG) \ + { \ + (_tagsel).tag0 = 0x80000000ULL; \ + (tag).tag0 = 0x00000000ULL; \ + } \ + if((user_rank) == MPI_ANY_SOURCE) \ + { \ + (_tagsel).tag1 = 0x00000000ULL; \ + } \ + } while (0) + +END_C_DECLS + +#endif /* MTL_PSM2_TYPES_H_HAS_BEEN_INCLUDED */ diff --git a/opal/mca/reachable/base/owner.txt b/ompi/mca/mtl/psm2/owner.txt similarity index 100% rename from opal/mca/reachable/base/owner.txt rename to ompi/mca/mtl/psm2/owner.txt diff --git a/ompi/mca/mtl/psm2/post_configure.sh b/ompi/mca/mtl/psm2/post_configure.sh new file mode 100644 index 00000000000..c47eb335b2f --- /dev/null +++ b/ompi/mca/mtl/psm2/post_configure.sh @@ -0,0 +1 @@ +DIRECT_CALL_HEADER="ompi/mca/mtl/psm2/mtl_psm2.h" diff --git a/ompi/mca/op/Makefile.am b/ompi/mca/op/Makefile.am index c90f533e0b8..8c392f1dbec 100644 --- a/ompi/mca/op/Makefile.am +++ b/ompi/mca/op/Makefile.am @@ -5,15 +5,15 @@ # Copyright (c) 2004-2005 The University of Tennessee and The University # of Tennessee Research Foundation. All rights # reserved. -# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, # University of Stuttgart. All rights reserved. # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. # Copyright (c) 2008-2010 Cisco Systems, Inc. All rights reserved. # $COPYRIGHT$ -# +# # Additional copyrights may follow -# +# # $HEADER$ # diff --git a/ompi/mca/op/base/Makefile.include b/ompi/mca/op/base/Makefile.include index 65cf72f54bd..6b857193e66 100644 --- a/ompi/mca/op/base/Makefile.include +++ b/ompi/mca/op/base/Makefile.include @@ -6,20 +6,18 @@ # Copyright (c) 2004-2005 The University of Tennessee and The University # of Tennessee Research Foundation. All rights # reserved. -# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, # University of Stuttgart. All rights reserved. # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. -# Copyright (c) 2008-2009 Cisco Systems, Inc. All rights reserved. +# Copyright (c) 2008-2015 Cisco Systems, Inc. All rights reserved. # $COPYRIGHT$ -# +# # Additional copyrights may follow -# +# # $HEADER$ # -dist_ompidata_DATA = base/help-mca-op-base.txt - headers += \ base/base.h \ base/functions.h diff --git a/ompi/mca/op/base/base.h b/ompi/mca/op/base/base.h index 2aed7fc50a2..f26992c23a5 100644 --- a/ompi/mca/op/base/base.h +++ b/ompi/mca/op/base/base.h @@ -5,16 +5,16 @@ * Copyright (c) 2004-2006 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2006 Sun Microsystems, Inc. All rights reserved. * Copyright (c) 2008-2009 Cisco Systems, Inc. All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ /** @@ -37,7 +37,7 @@ #include "opal/mca/base/base.h" #include "ompi/mca/op/op.h" -BEGIN_C_DECLS +BEGIN_C_DECLS typedef struct ompi_op_base_selected_module_t { opal_list_item_t super; diff --git a/ompi/mca/op/base/functions.h b/ompi/mca/op/base/functions.h index 9df49a75372..b5d37d70a4b 100644 --- a/ompi/mca/op/base/functions.h +++ b/ompi/mca/op/base/functions.h @@ -6,7 +6,7 @@ * Copyright (c) 2004-2010 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2007 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2007 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. @@ -14,9 +14,9 @@ * Copyright (c) 2013 Los Alamos National Security, LLC. All rights * reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -33,9 +33,9 @@ BEGIN_C_DECLS * Globals holding all the "base" function pointers, indexed by op and * datatype. */ -OMPI_DECLSPEC extern ompi_op_base_handler_fn_t +OMPI_DECLSPEC extern ompi_op_base_handler_fn_t ompi_op_base_functions[OMPI_OP_BASE_FORTRAN_OP_MAX][OMPI_OP_BASE_TYPE_MAX]; -OMPI_DECLSPEC extern ompi_op_base_3buff_handler_fn_t +OMPI_DECLSPEC extern ompi_op_base_3buff_handler_fn_t ompi_op_base_3buff_functions[OMPI_OP_BASE_FORTRAN_OP_MAX][OMPI_OP_BASE_TYPE_MAX]; END_C_DECLS diff --git a/ompi/mca/op/base/help-mca-op-base.txt b/ompi/mca/op/base/help-mca-op-base.txt deleted file mode 100644 index 1ab8bbf71f5..00000000000 --- a/ompi/mca/op/base/help-mca-op-base.txt +++ /dev/null @@ -1,37 +0,0 @@ -# -*- text -*- -# -# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana -# University Research and Technology -# Corporation. All rights reserved. -# Copyright (c) 2004-2005 The University of Tennessee and The University -# of Tennessee Research Foundation. All rights -# reserved. -# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, -# University of Stuttgart. All rights reserved. -# Copyright (c) 2004-2005 The Regents of the University of California. -# All rights reserved. -# Copyright (c) 2008 Cisco Systems, Inc. All rights reserved. -# $COPYRIGHT$ -# -# Additional copyrights may follow -# -# $HEADER$ -# -# This is the US/English help file for Open MPI MCA op-specific -# error messages. -# -[op-select:none-available] -Although some op components are available on your system, none of -them said that they could be used for a new MPI_Op ("%s"). - -This is extremely unusual -- a basic function should always be able to -be selected for any MPI_Op. As such, this likely means that something -else is wrong with either your Open MPI installation or your system. -# -[op-unselect:failed-finalize] -A op module failed to finalize properly when a MPI_Op that was -using it was destroyed. - -This is somewhat unusual: the module itself may be at fault, or this -may be a symptom of another issue (e.g., a memory problem). -# diff --git a/ompi/mca/op/base/op_base_find_available.c b/ompi/mca/op/base/op_base_find_available.c index 50ef4e2b506..3fdaf86f18c 100644 --- a/ompi/mca/op/base/op_base_find_available.c +++ b/ompi/mca/op/base/op_base_find_available.c @@ -5,15 +5,15 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2008-2009 Cisco Systems, Inc. All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -142,7 +142,7 @@ static int init_query(const mca_base_component_t * c, * Query a specific component, op v2.0.0 */ static int init_query_1_0_0(const mca_base_component_t * component, - bool enable_progress_threads, + bool enable_progress_threads, bool enable_mpi_threads) { ompi_op_base_component_1_0_0_t *op = diff --git a/ompi/mca/op/base/op_base_frame.c b/ompi/mca/op/base/op_base_frame.c index 35b53f56824..90167300851 100644 --- a/ompi/mca/op/base/op_base_frame.c +++ b/ompi/mca/op/base/op_base_frame.c @@ -5,24 +5,22 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2008-2009 Cisco Systems, Inc. All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ #include "ompi_config.h" -#ifdef HAVE_STRING_H #include -#endif #include "opal/util/output.h" #include "ompi/mca/mca.h" @@ -56,9 +54,9 @@ static void module_constructor_1_0_0(ompi_op_base_module_1_0_0_t *m) memset(&(m->opm_3buff_fns), 0, sizeof(m->opm_3buff_fns)); } -OBJ_CLASS_INSTANCE(ompi_op_base_module_t, opal_object_t, +OBJ_CLASS_INSTANCE(ompi_op_base_module_t, opal_object_t, module_constructor, NULL); -OBJ_CLASS_INSTANCE(ompi_op_base_module_1_0_0_t, opal_object_t, +OBJ_CLASS_INSTANCE(ompi_op_base_module_1_0_0_t, opal_object_t, module_constructor_1_0_0, NULL); MCA_BASE_FRAMEWORK_DECLARE(ompi, op, NULL, NULL, NULL, NULL, diff --git a/ompi/mca/op/base/op_base_functions.c b/ompi/mca/op/base/op_base_functions.c index ee79a3bb26b..99a248f85c7 100644 --- a/ompi/mca/op/base/op_base_functions.c +++ b/ompi/mca/op/base/op_base_functions.c @@ -6,7 +6,7 @@ * Copyright (c) 2004-2010 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2007 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2007 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. @@ -14,9 +14,9 @@ * Copyright (c) 2013 Los Alamos National Security, LLC. All rights * reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -1370,7 +1370,7 @@ LOC_FUNC_3BUF(minloc, long_double_int, <) (OMPI_OP_FLAGS_INTRINSIC | OMPI_OP_FLAGS_ASSOC | \ OMPI_OP_FLAGS_FLOAT_ASSOC | OMPI_OP_FLAGS_COMMUTE) -ompi_op_base_handler_fn_t ompi_op_base_functions[OMPI_OP_BASE_FORTRAN_OP_MAX][OMPI_OP_BASE_TYPE_MAX] = +ompi_op_base_handler_fn_t ompi_op_base_functions[OMPI_OP_BASE_FORTRAN_OP_MAX][OMPI_OP_BASE_TYPE_MAX] = { /* Corresponds to MPI_OP_NULL */ [OMPI_OP_BASE_FORTRAN_NULL] = { @@ -1457,7 +1457,7 @@ ompi_op_base_handler_fn_t ompi_op_base_functions[OMPI_OP_BASE_FORTRAN_OP_MAX][OM }; -ompi_op_base_3buff_handler_fn_t ompi_op_base_3buff_functions[OMPI_OP_BASE_FORTRAN_OP_MAX][OMPI_OP_BASE_TYPE_MAX] = +ompi_op_base_3buff_handler_fn_t ompi_op_base_3buff_functions[OMPI_OP_BASE_FORTRAN_OP_MAX][OMPI_OP_BASE_TYPE_MAX] = { /* Corresponds to MPI_OP_NULL */ [OMPI_OP_BASE_FORTRAN_NULL] = { diff --git a/ompi/mca/op/base/op_base_op_select.c b/ompi/mca/op/base/op_base_op_select.c index 5586882fb78..8312414703d 100644 --- a/ompi/mca/op/base/op_base_op_select.c +++ b/ompi/mca/op/base/op_base_op_select.c @@ -6,7 +6,7 @@ * Copyright (c) 2004-2009 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. @@ -15,9 +15,9 @@ * Copyright (c) 2008 Sun Microsystems, Inc. All rights reserved. * Copyright (c) 2008-2009 Cisco Systems, Inc. All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -57,17 +57,17 @@ typedef struct avail_op_t { /* * Local functions */ -static opal_list_t *check_components(opal_list_t *components, +static opal_list_t *check_components(opal_list_t *components, ompi_op_t *op); -static int check_one_component(ompi_op_t *op, +static int check_one_component(ompi_op_t *op, const mca_base_component_t *component, ompi_op_base_module_1_0_0_t **module); -static int query(const mca_base_component_t *component, +static int query(const mca_base_component_t *component, ompi_op_t *op, int *priority, ompi_op_base_module_1_0_0_t **module); -static int query_1_0_0(const ompi_op_base_component_1_0_0_t *op_component, +static int query_1_0_0(const ompi_op_base_component_1_0_0_t *op_component, ompi_op_t *op, int *priority, ompi_op_base_module_1_0_0_t **module); @@ -94,7 +94,7 @@ int ompi_op_base_op_select(ompi_op_t *op) /* Announce */ opal_output_verbose(10, ompi_op_base_framework.framework_output, - "op:base:op_select: new op: %s", + "op:base:op_select: new op: %s", op->o_name); /* Make a module for all the base functions so that other modules @@ -110,7 +110,7 @@ int ompi_op_base_op_select(ompi_op_t *op) memset(&op->o_func, 0, sizeof(op->o_func)); memset(&op->o_3buff_intrinsic, 0, sizeof(op->o_3buff_intrinsic)); for (i = 0; i < OMPI_OP_BASE_TYPE_MAX; ++i) { - op->o_func.intrinsic.fns[i] = + op->o_func.intrinsic.fns[i] = ompi_op_base_functions[op->o_f_to_c_index][i]; op->o_func.intrinsic.modules[i] = module; OBJ_RETAIN(module); @@ -126,14 +126,14 @@ int ompi_op_base_op_select(ompi_op_t *op) /* Check for any components that want to run. It's not an error if there are none; we'll just use all the base functions in this case. */ - opal_output_verbose(10, ompi_op_base_framework.framework_output, + opal_output_verbose(10, ompi_op_base_framework.framework_output, "op:base:op_select: Checking all available components"); selectable = check_components(&ompi_op_base_framework.framework_components, op); /* Do the selection loop. The selectable list is in priority order; lowest priority first. */ for (item = opal_list_remove_first(selectable); - NULL != item; + NULL != item; item = opal_list_remove_first(selectable)) { avail_op_t *avail = (avail_op_t*) item; @@ -162,7 +162,7 @@ int ompi_op_base_op_select(ompi_op_t *op) /* 3-buffer variants */ if (NULL != avail->ao_module->opm_3buff_fns[i]) { OBJ_RELEASE(op->o_func.intrinsic.modules[i]); - op->o_3buff_intrinsic.fns[i] = + op->o_3buff_intrinsic.fns[i] = avail->ao_module->opm_3buff_fns[i]; op->o_3buff_intrinsic.modules[i] = avail->ao_module; OBJ_RETAIN(avail->ao_module); @@ -224,7 +224,7 @@ static int avail_op_compare(opal_list_item_t **itema, * only those who returned that they want to run, and put them in * priority order (lowest to highest). */ -static opal_list_t *check_components(opal_list_t *components, +static opal_list_t *check_components(opal_list_t *components, ompi_op_t *op) { int priority; @@ -233,7 +233,7 @@ static opal_list_t *check_components(opal_list_t *components, ompi_op_base_module_1_0_0_t *module; opal_list_t *selectable; avail_op_t *avail; - + /* Make a list of the components that query successfully */ selectable = OBJ_NEW(opal_list_t); @@ -251,7 +251,7 @@ static opal_list_t *check_components(opal_list_t *components, avail = OBJ_NEW(avail_op_t); avail->ao_priority = priority; avail->ao_module = module; - + opal_list_append(selectable, (opal_list_item_t*)avail); } } @@ -266,7 +266,7 @@ static opal_list_t *check_components(opal_list_t *components, /* * Check a single component */ -static int check_one_component(ompi_op_t *op, +static int check_one_component(ompi_op_t *op, const mca_base_component_t *component, ompi_op_base_module_1_0_0_t **module) { @@ -277,13 +277,13 @@ static int check_one_component(ompi_op_t *op, if (OMPI_SUCCESS == err) { priority = (priority < 100) ? priority : 100; - opal_output_verbose(10, ompi_op_base_framework.framework_output, - "op:base:op_select: component available: %s, priority: %d", + opal_output_verbose(10, ompi_op_base_framework.framework_output, + "op:base:op_select: component available: %s, priority: %d", component->mca_component_name, priority); } else { priority = -1; - opal_output_verbose(10, ompi_op_base_framework.framework_output, + opal_output_verbose(10, ompi_op_base_framework.framework_output, "op:base:op_select: component not available: %s", component->mca_component_name); } @@ -300,19 +300,19 @@ static int check_one_component(ompi_op_t *op, * Take any version of a op module, query it, and return the right * module struct */ -static int query(const mca_base_component_t *component, - ompi_op_t *op, +static int query(const mca_base_component_t *component, + ompi_op_t *op, int *priority, ompi_op_base_module_1_0_0_t **module) { *module = NULL; if (1 == component->mca_type_major_version && 0 == component->mca_type_minor_version && 0 == component->mca_type_release_version) { - const ompi_op_base_component_1_0_0_t *op100 = + const ompi_op_base_component_1_0_0_t *op100 = (ompi_op_base_component_1_0_0_t *) component; return query_1_0_0(op100, op, priority, module); - } + } /* Unknown op API version -- return error */ diff --git a/ompi/mca/op/base/owner.txt b/ompi/mca/op/base/owner.txt index e6150b6b0fc..e6967790514 100644 --- a/ompi/mca/op/base/owner.txt +++ b/ompi/mca/op/base/owner.txt @@ -4,4 +4,4 @@ # status: e.g. active, maintenance, unmaintained # owner: project -status: active +status: unmaintained diff --git a/ompi/mca/op/example/Makefile.am b/ompi/mca/op/example/Makefile.am index 3499b39a4ae..62626c10976 100644 --- a/ompi/mca/op/example/Makefile.am +++ b/ompi/mca/op/example/Makefile.am @@ -5,15 +5,15 @@ # Copyright (c) 2004-2005 The University of Tennessee and The University # of Tennessee Research Foundation. All rights # reserved. -# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, # University of Stuttgart. All rights reserved. # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. # Copyright (c) 2008-2014 Cisco Systems, Inc. All rights reserved. # $COPYRIGHT$ -# +# # Additional copyrights may follow -# +# # $HEADER$ # @@ -71,7 +71,7 @@ mcacomponent_LTLIBRARIES = $(component) mca_op_example_la_SOURCES = $(component_sources) mca_op_example_la_LDFLAGS = -module -avoid-version -# Specific information for static builds. +# Specific information for static builds. # # Note that we *must* "noinst"; the upper-layer Makefile.am's will # slurp in the resulting .la library into libmpi. diff --git a/ompi/mca/op/example/README.txt b/ompi/mca/op/example/README.txt index 3a4706eb5c3..af4d75d58a2 100644 --- a/ompi/mca/op/example/README.txt +++ b/ompi/mca/op/example/README.txt @@ -39,7 +39,7 @@ may not be worthwhile to use the hardware unless the amount of data to be processed is "big enough" (meaning that the cost of the registration and/or copy-in/copy-out is ameliorated) or the memory to be processed is already registered or is otherwise local to the the -accelerator hardware. +accelerator hardware. Hence, at run-time, the module may choose to use the accelerator hardware or fail over to a "basic" version of the operation. This @@ -96,7 +96,7 @@ Now your component should be fully functional (although entirely renamed as "foo" instead of "example"). You can go to the top-level OMPI directory and run "autogen.pl" (which will find your component and att it to the configure/build process) and then "configure ..." -and "make ..." as normal. +and "make ..." as normal. shell$ cd (top_ompi_dir) shell$ ./autogen.pl @@ -109,7 +109,7 @@ shell$ make install # ...lots of output... After you have installed Open MPI, running "ompi_info" should show -your "foo" component in the output. +your "foo" component in the output. shell$ ompi_info | grep op: MCA op: example (MCA v2.0, API v1.0, Component v1.4) @@ -121,5 +121,5 @@ the output of autogen.pl, configure, and make to ensure that "foo" was found, configured, and built successfully. Once ompi_info sees your component, start editing the "foo" component -files in a meaningful way. +files in a meaningful way. diff --git a/ompi/mca/op/example/configure.m4 b/ompi/mca/op/example/configure.m4 index 12c8c124ac3..a15970ccdc2 100644 --- a/ompi/mca/op/example/configure.m4 +++ b/ompi/mca/op/example/configure.m4 @@ -6,15 +6,15 @@ # Copyright (c) 2004-2005 The University of Tennessee and The University # of Tennessee Research Foundation. All rights # reserved. -# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, # University of Stuttgart. All rights reserved. # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. # Copyright (c) 2008-2014 Cisco Systems, Inc. All rights reserved. # $COPYRIGHT$ -# +# # Additional copyrights may follow -# +# # $HEADER$ # @@ -23,7 +23,7 @@ # This script must define (via AC_DEFUN) an m4 macro named # MCA___CONFIG that executes either $1 if the # component wants to build itself, or $2 if the component does not -# want to build itself. +# want to build itself. # Do *NOT* invoke AC_MSG_ERROR, or any other macro that will abort # configure, except upon catastrophic error. For example, it *is* a @@ -40,7 +40,7 @@ AC_DEFUN([MCA_ompi_op_example_CONFIG],[ AC_CONFIG_FILES([ompi/mca/op/example/Makefile]) # Add checks here for any necessary header files and/or libraries - # that must be present to compile your component. + # that must be present to compile your component. # This example performs a fairly simple test (checking for the # "struct sockaddr_in" C type), just for the sake of showing you @@ -48,9 +48,9 @@ AC_DEFUN([MCA_ompi_op_example_CONFIG],[ # of the test. # check for sockaddr_in (a good sign we have TCP) - AC_CHECK_TYPES([struct sockaddr_in], + AC_CHECK_TYPES([struct sockaddr_in], [$1], - [$2], + [$2], [AC_INCLUDES_DEFAULT #ifdef HAVE_NETINET_IN_H #include diff --git a/ompi/mca/op/example/op_example.h b/ompi/mca/op/example/op_example.h index c70d0b50a59..eaaba98534d 100644 --- a/ompi/mca/op/example/op_example.h +++ b/ompi/mca/op/example/op_example.h @@ -5,15 +5,15 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2008-2009 Cisco Systems, Inc. All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -85,7 +85,7 @@ OBJ_CLASS_DECLARATION(ompi_op_example_module_bxor_t); * itself can cache additional information after that that can be used * by both the component and modules. */ -OMPI_DECLSPEC extern ompi_op_example_component_t +OMPI_DECLSPEC extern ompi_op_example_component_t mca_op_example_component; /** diff --git a/ompi/mca/op/example/op_example_component.c b/ompi/mca/op/example/op_example_component.c index 750227838a5..43a1e6d71f2 100644 --- a/ompi/mca/op/example/op_example_component.c +++ b/ompi/mca/op/example/op_example_component.c @@ -6,7 +6,7 @@ * Copyright (c) 2004-2011 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. @@ -14,9 +14,9 @@ * Copyright (c) 2015 Los Alamos National Security, LLC. All rights * reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -132,7 +132,7 @@ static int example_component_register(void) containing the major.minor.release version number from the libfoo support library (see configure.m4 for how we got these C macros). */ - asprintf(&str, "%s.%s.%s", + asprintf(&str, "%s.%s.%s", OP_EXAMPLE_LIBFOO_VERSION_MAJOR, OP_EXAMPLE_LIBFOO_VERSION_MINOR, OP_EXAMPLE_LIBFOO_VERSION_RELEASE); diff --git a/ompi/mca/op/example/op_example_module_bxor.c b/ompi/mca/op/example/op_example_module_bxor.c index aa83f8e1360..23a90ede488 100644 --- a/ompi/mca/op/example/op_example_module_bxor.c +++ b/ompi/mca/op/example/op_example_module_bxor.c @@ -5,15 +5,15 @@ * Copyright (c) 2004-2007 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2008-2009 Cisco Systems, Inc. All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -109,7 +109,7 @@ static OBJ_CLASS_INSTANCE(module_bxor_t, /** * Bxor function for C int */ -static void bxor_int(void *in, void *out, int *count, +static void bxor_int(void *in, void *out, int *count, ompi_datatype_t **type, ompi_op_base_module_t *module) { module_bxor_t *m = (module_bxor_t*) module; @@ -143,7 +143,7 @@ static void bxor_int(void *in, void *out, int *count, /** * Bxor function for C long */ -static void bxor_long(void *in, void *out, int *count, +static void bxor_long(void *in, void *out, int *count, ompi_datatype_t **type, ompi_op_base_module_t *module) { module_bxor_t *m = (module_bxor_t*) module; @@ -157,7 +157,7 @@ static void bxor_long(void *in, void *out, int *count, /** * Bxor function for Fortran INTEGER */ -static void bxor_integer(void *in, void *out, int *count, +static void bxor_integer(void *in, void *out, int *count, ompi_datatype_t **type, ompi_op_base_module_t *module) { module_bxor_t *m = (module_bxor_t*) module; @@ -193,7 +193,7 @@ ompi_op_base_module_t *ompi_op_example_setup_bxor(ompi_op_t *op) /* C int */ module->super.opm_fns[OMPI_OP_BASE_TYPE_INT] = bxor_int; module->fallback_int = op->o_func.intrinsic.fns[OMPI_OP_BASE_TYPE_INT]; - module->fallback_int_module = + module->fallback_int_module = op->o_func.intrinsic.modules[OMPI_OP_BASE_TYPE_INT]; /* If you cache a fallback function, you *must* RETAIN (i.e., increase the refcount) its module so that the module knows that @@ -203,15 +203,15 @@ ompi_op_base_module_t *ompi_op_example_setup_bxor(ompi_op_t *op) /* C long */ module->super.opm_fns[OMPI_OP_BASE_TYPE_LONG] = bxor_long; module->fallback_long = op->o_func.intrinsic.fns[OMPI_OP_BASE_TYPE_LONG]; - module->fallback_long_module = + module->fallback_long_module = op->o_func.intrinsic.modules[OMPI_OP_BASE_TYPE_LONG]; OBJ_RETAIN(module->fallback_long_module); /* Fortran INTEGER */ module->super.opm_fns[OMPI_OP_BASE_TYPE_INTEGER] = bxor_integer; - module->fallback_integer = + module->fallback_integer = op->o_func.intrinsic.fns[OMPI_OP_BASE_TYPE_INTEGER]; - module->fallback_integer_module = + module->fallback_integer_module = op->o_func.intrinsic.modules[OMPI_OP_BASE_TYPE_INTEGER]; OBJ_RETAIN(module->fallback_integer_module); diff --git a/ompi/mca/op/example/op_example_module_max.c b/ompi/mca/op/example/op_example_module_max.c index 9eb11add42d..4c43ecf22a4 100644 --- a/ompi/mca/op/example/op_example_module_max.c +++ b/ompi/mca/op/example/op_example_module_max.c @@ -5,15 +5,15 @@ * Copyright (c) 2004-2007 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2008-2009 Cisco Systems, Inc. All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -118,7 +118,7 @@ static OBJ_CLASS_INSTANCE(module_max_t, /** * Max function for C float */ -static void max_float(void *in, void *out, int *count, +static void max_float(void *in, void *out, int *count, ompi_datatype_t **type, ompi_op_base_module_t *module) { module_max_t *m = (module_max_t*) module; @@ -152,7 +152,7 @@ static void max_float(void *in, void *out, int *count, /** * Max function for C double */ -static void max_double(void *in, void *out, int *count, +static void max_double(void *in, void *out, int *count, ompi_datatype_t **type, ompi_op_base_module_t *module) { module_max_t *m = (module_max_t*) module; @@ -166,7 +166,7 @@ static void max_double(void *in, void *out, int *count, /** * Max function for Fortran REAL */ -static void max_real(void *in, void *out, int *count, +static void max_real(void *in, void *out, int *count, ompi_datatype_t **type, ompi_op_base_module_t *module) { module_max_t *m = (module_max_t*) module; @@ -180,8 +180,8 @@ static void max_real(void *in, void *out, int *count, /** * Max function for Fortran DOUBLE PRECISION */ -static void max_double_precision(void *in, void *out, int *count, - ompi_datatype_t **type, +static void max_double_precision(void *in, void *out, int *count, + ompi_datatype_t **type, ompi_op_base_module_t *module) { module_max_t *m = (module_max_t*) module; @@ -189,7 +189,7 @@ static void max_double_precision(void *in, void *out, int *count, /* Just another example function -- similar to max_int() */ - m->fallback_double_precision(in, out, count, type, + m->fallback_double_precision(in, out, count, type, m->fallback_double_precision_module); } @@ -215,7 +215,7 @@ ompi_op_base_module_t *ompi_op_example_setup_max(ompi_op_t *op) /* C float */ module->super.opm_fns[OMPI_OP_BASE_TYPE_FLOAT] = max_float; module->fallback_float = op->o_func.intrinsic.fns[OMPI_OP_BASE_TYPE_FLOAT]; - module->fallback_float_module = + module->fallback_float_module = op->o_func.intrinsic.modules[OMPI_OP_BASE_TYPE_FLOAT]; /* If you cache a fallback function, you *must* RETAIN (i.e., increase the refcount) its module so that the module knows that @@ -224,9 +224,9 @@ ompi_op_base_module_t *ompi_op_example_setup_max(ompi_op_t *op) /* Fortran REAL */ module->super.opm_fns[OMPI_OP_BASE_TYPE_REAL] = max_real; - module->fallback_real = + module->fallback_real = op->o_func.intrinsic.fns[OMPI_OP_BASE_TYPE_REAL]; - module->fallback_real_module = + module->fallback_real_module = op->o_func.intrinsic.modules[OMPI_OP_BASE_TYPE_REAL]; OBJ_RETAIN(module->fallback_real_module); @@ -235,18 +235,18 @@ ompi_op_base_module_t *ompi_op_example_setup_max(ompi_op_t *op) if (mca_op_example_component.double_supported) { /* C double */ module->super.opm_fns[OMPI_OP_BASE_TYPE_DOUBLE] = max_double; - module->fallback_double = + module->fallback_double = op->o_func.intrinsic.fns[OMPI_OP_BASE_TYPE_DOUBLE]; - module->fallback_double_module = + module->fallback_double_module = op->o_func.intrinsic.modules[OMPI_OP_BASE_TYPE_DOUBLE]; OBJ_RETAIN(module->fallback_double_module); - + /* Fortran DOUBLE PRECISION */ - module->super.opm_fns[OMPI_OP_BASE_TYPE_DOUBLE_PRECISION] = + module->super.opm_fns[OMPI_OP_BASE_TYPE_DOUBLE_PRECISION] = max_double_precision; - module->fallback_double_precision = + module->fallback_double_precision = op->o_func.intrinsic.fns[OMPI_OP_BASE_TYPE_DOUBLE_PRECISION]; - module->fallback_double_precision_module = + module->fallback_double_precision_module = op->o_func.intrinsic.modules[OMPI_OP_BASE_TYPE_DOUBLE_PRECISION]; OBJ_RETAIN(module->fallback_double_precision_module); } diff --git a/ompi/mca/op/example/owner.txt b/ompi/mca/op/example/owner.txt index 0cc0384f0eb..2d23c9be654 100644 --- a/ompi/mca/op/example/owner.txt +++ b/ompi/mca/op/example/owner.txt @@ -3,5 +3,5 @@ # owner: institution that is responsible for this package # status: e.g. active, maintenance, unmaintained # -owner: CISCO +owner: project status: maintenance diff --git a/ompi/mca/op/op.h b/ompi/mca/op/op.h index e27950fb45d..a4006b20c61 100644 --- a/ompi/mca/op/op.h +++ b/ompi/mca/op/op.h @@ -6,20 +6,20 @@ * Copyright (c) 2004-2010 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2006-2007 Los Alamos National Security, LLC. All rights - * reserved. + * reserved. * Copyright (c) 2007-2008 UT-Battelle, LLC * Copyright (c) 2007-2009 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2013-2015 Los Alamos National Security, LLC. All rights * reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ /** @@ -243,7 +243,7 @@ struct ompi_op_base_module_1_0_0_t; typedef struct ompi_op_base_module_1_0_0_t ompi_op_base_module_t; /** - * Typedef for 2-buffer op functions. + * Typedef for 2-buffer op functions. * * We don't use MPI_User_function because this would create a * confusing dependency loop between this file and mpi.h. So this is @@ -305,7 +305,7 @@ typedef int (*ompi_op_base_component_init_query_fn_t) * not wish to run or return an error during module_enable(). * * @param[in] op The MPI_Op being created - * @param[out] priority Priority setting for component on + * @param[out] priority Priority setting for component on * this op * * @returns An initialized module structure if the component can @@ -385,7 +385,7 @@ OMPI_DECLSPEC OBJ_CLASS_DECLARATION(ompi_op_base_module_1_0_0_t); /** * Struct that is used in op.h to hold all the function pointers and * pointers to the corresopnding modules (so that we can properly - * RETAIN/RELEASE them) + * RETAIN/RELEASE them) */ typedef struct ompi_op_base_op_fns_1_0_0_t { ompi_op_base_handler_fn_1_0_0_t fns[OMPI_OP_BASE_TYPE_MAX]; @@ -397,7 +397,7 @@ typedef ompi_op_base_op_fns_1_0_0_t ompi_op_base_op_fns_t; /** * Struct that is used in op.h to hold all the function pointers and * pointers to the corresopnding modules (so that we can properly - * RETAIN/RELEASE them) + * RETAIN/RELEASE them) */ typedef struct ompi_op_base_op_3buff_fns_1_0_0_t { ompi_op_base_3buff_handler_fn_1_0_0_t fns[OMPI_OP_BASE_TYPE_MAX]; diff --git a/ompi/mca/op/x86/.opal_unignore b/ompi/mca/op/x86/.opal_unignore deleted file mode 100644 index 814285c7e50..00000000000 --- a/ompi/mca/op/x86/.opal_unignore +++ /dev/null @@ -1 +0,0 @@ -jsquyres diff --git a/ompi/mca/op/x86/Makefile.am b/ompi/mca/op/x86/Makefile.am deleted file mode 100644 index 947f09f13b5..00000000000 --- a/ompi/mca/op/x86/Makefile.am +++ /dev/null @@ -1,54 +0,0 @@ -# -# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana -# University Research and Technology -# Corporation. All rights reserved. -# Copyright (c) 2004-2005 The University of Tennessee and The University -# of Tennessee Research Foundation. All rights -# reserved. -# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, -# University of Stuttgart. All rights reserved. -# Copyright (c) 2004-2005 The Regents of the University of California. -# All rights reserved. -# Copyright (c) 2008-2010 Cisco Systems, Inc. All rights reserved. -# $COPYRIGHT$ -# -# Additional copyrights may follow -# -# $HEADER$ -# - -sources = \ - op_x86.h \ - op_x86_component.c \ - op_x86_module_sum.c - -if MCA_BUILD_ompi_op_x86_DSO -lib = -lib_sources = -component = mca_op_x86.la -component_sources = $(sources) -else -lib = libmca_op_x86.la -lib_sources = $(sources) -component = -component_sources = -endif - -# Specific information for DSO builds. -# -# The DSO should install itself in $(ompilibdir) (by default, -# $prefix/lib/openmpi). - -mcacomponentdir = $(ompilibdir) -mcacomponent_LTLIBRARIES = $(component) -mca_op_x86_la_SOURCES = $(component_sources) -mca_op_x86_la_LDFLAGS = -module -avoid-version - -# Specific information for static builds. -# -# Note that we *must* "noinst"; the upper-layer Makefile.am's will -# slurp in the resulting .la library into libmpi. - -noinst_LTLIBRARIES = $(lib) -libmca_op_x86_la_SOURCES = $(lib_sources) -libmca_op_x86_la_LDFLAGS = -module -avoid-version diff --git a/ompi/mca/op/x86/configure.m4 b/ompi/mca/op/x86/configure.m4 deleted file mode 100644 index 4f5665be0e8..00000000000 --- a/ompi/mca/op/x86/configure.m4 +++ /dev/null @@ -1,34 +0,0 @@ -# -*- shell-script -*- -# -# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana -# University Research and Technology -# Corporation. All rights reserved. -# Copyright (c) 2004-2005 The University of Tennessee and The University -# of Tennessee Research Foundation. All rights -# reserved. -# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, -# University of Stuttgart. All rights reserved. -# Copyright (c) 2004-2005 The Regents of the University of California. -# All rights reserved. -# Copyright (c) 2008-2010 Cisco Systems, Inc. All rights reserved. -# $COPYRIGHT$ -# -# Additional copyrights may follow -# -# $HEADER$ -# - -# MCA_op_x86_CONFIG([action-if-found], [action-if-not-found]) -# ----------------------------------------------------------- -AC_DEFUN([MCA_ompi_op_x86_CONFIG],[ - AC_CONFIG_FILES([ompi/mca/op/x86/Makefile]) - - # check for sockaddr_in (a good sign we have TCP) - AC_CHECK_TYPES([struct sockaddr_in], - [$1], - [$2], - [AC_INCLUDES_DEFAULT -#ifdef HAVE_NETINET_IN_H -#include -#endif]) -])dnl diff --git a/ompi/mca/op/x86/op_x86.h b/ompi/mca/op/x86/op_x86.h deleted file mode 100644 index 7a8a51ccc83..00000000000 --- a/ompi/mca/op/x86/op_x86.h +++ /dev/null @@ -1,80 +0,0 @@ -/* - * Copyright (c) 2004-2008 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2005 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * Copyright (c) 2008-2009 Cisco Systems, Inc. All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#ifndef MCA_OP_X86_EXPORT_H -#define MCA_OP_X86_EXPORT_H - -#include "ompi_config.h" - -#include "ompi/mca/mca.h" -#include "opal/class/opal_object.h" - -#include "ompi/mca/op/op.h" - -BEGIN_C_DECLS - -/** - * Flags for each hardware type - */ -typedef enum { - OP_X86_HW_FLAGS_MMX = 1, - OP_X86_HW_FLAGS_MMX2 = 2, - OP_X86_HW_FLAGS_SSE = 4, - OP_X86_HW_FLAGS_SSE2 = 8, - OP_X86_HW_FLAGS_SSE3 = 16 -} op_x86_hw_flags_t; - -/** - * Derive a struct from the base op component struct, allowing us to - * cache some component-specific information on our well-known - * component struct. - */ -typedef struct { - /** The base op component struct */ - ompi_op_base_component_1_0_0_t super; - - /* What hardware do we have? */ - op_x86_hw_flags_t oxc_hw_flags; -} ompi_op_x86_component_t; - -/** - * Derive a struct from the base op module struct, allowing us to - * cache some module-specific information for SUM. - */ -typedef struct { - ompi_op_base_module_1_0_0_t super; - - /* JMS need anything here? */ -} ompi_op_x86_module_sum_t; - -OBJ_CLASS_DECLARATION(ompi_op_x86_module_sum_t); - -/** - * Well-known component instance - */ -OMPI_DECLSPEC extern ompi_op_x86_component_t mca_op_x86_component; - -/** - * Setup for MPI_MAX and return a module. - */ -OMPI_DECLSPEC ompi_op_base_module_t *ompi_op_x86_setup_sum(ompi_op_t *op); - -END_C_DECLS - -#endif /* MCA_OP_X86_EXPORT_H */ diff --git a/ompi/mca/op/x86/op_x86_component.c b/ompi/mca/op/x86/op_x86_component.c deleted file mode 100644 index 67cabee6141..00000000000 --- a/ompi/mca/op/x86/op_x86_component.c +++ /dev/null @@ -1,266 +0,0 @@ -/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ -/* - * Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2007 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * Copyright (c) 2008-2013 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2015 Los Alamos National Security, LLC. All rights - * reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -/** @file - * - * This is the "x86" component source code. It contains the - * well-known struct that OMPI will dlsym() (or equivalent) for to - * find how to access the rest of the component and any modules that - * are created. - */ - -#include "ompi_config.h" - -#include "opal/util/output.h" -#include "opal/mca/base/mca_base_var.h" - -#include "ompi/constants.h" -#include "ompi/op/op.h" -#include "ompi/mca/op/op.h" -#include "ompi/mca/op/base/base.h" -#include "ompi/mca/op/x86/op_x86.h" - -static int x86_component_open(void); -static int x86_component_close(void); -static int x86_component_init_query(bool enable_progress_threads, - bool enable_mpi_threads); -static struct ompi_op_base_module_1_0_0_t * - x86_component_op_query(struct ompi_op_t *op, int *priority); -static int x86_component_register(void); - -ompi_op_x86_component_t mca_op_x86_component = { - /* First, the mca_base_component_t struct containing meta - information about the component itself */ - { - .opc_version = { - OMPI_OP_BASE_VERSION_1_0_0, - - .mca_component_name = "x86", - MCA_BASE_MAKE_VERSION(component, OMPI_MAJOR_VERSION, OMPI_MINOR_VERSION, - OMPI_RELEASE_VERSION), - .mca_open_component = x86_component_open, - .mca_close_component = x86_component_close, - .mca_register_component_params = x86_component_register, - }, - .opc_data = { - /* The component is checkpoint ready */ - MCA_BASE_METADATA_PARAM_CHECKPOINT - }, - - .opc_init_query = x86_component_init_query, - .opc_op_query = x86_component_op_query, - }, - - /* Now comes the x86-component-specific data. In this case, - we'll just leave it blank, defaulting all the values to - 0/false/whatever. We'll fill them in with meaningful values - during _component_init_query(). */ -}; - -/* - * Component open - */ -static int x86_component_open(void) -{ - opal_output(ompi_op_base_framework.framework_output, "x86 component open"); - - /* A first level check to see if x86 is even available in this - process. E.g., you may want to do a first-order check to see - if hardware is available. If so, return OMPI_SUCCESS. If not, - return anything other than OMPI_SUCCESS and the component will - silently be ignored. - - Note that if this function returns non-OMPI_SUCCESS, then this - component won't even be shown in ompi_info output (which is - probably not what you want). - */ - - return OMPI_SUCCESS; -} - - -/* - * Component close - */ -static int x86_component_close(void) -{ - opal_output(ompi_op_base_framework.framework_output, "x86 component close"); - - /* If x86 was opened successfully, close it (i.e., release any - resources that may have been allocated on this component). - Note that _component_close() will always be called at the end - of the process, so it may have been after any/all of the other - component functions have been invoked (and possibly even after - modules have been created and/or destroyed). */ - - return OMPI_SUCCESS; -} - - -/* - * Probe the hardware and see what we have - */ -static void hardware_probe(void) -{ - /* ... JMS fill in here ... */ -} - -static bool x86_mmx_available; -static bool x86_mmx2_available; -static bool x86_sse_available; -static bool x86_sse2_available; -static bool x86_sse3_available; - -/* - * Register MCA params. - */ -static int x86_component_register(void) -{ - opal_output(ompi_op_base_framework.framework_output, "x86 component register"); - - /* Probe the hardware and see what we have */ - hardware_probe(); - - x86_mmx_available = (0 != (mca_op_x86_component.oxc_hw_flags & OP_X86_HW_FLAGS_MMX)); - (void) mca_base_component_var_register(&mca_op_x86_component.super.opc_version, - "mmx_available", "Whether the hardware supports MMX or not", - MCA_BASE_VAR_TYPE_BOOL, NULL, 0, - MCA_BASE_VAR_FLAG_DEFAULT_ONLY, - OPAL_INFO_LVL_9, - MCA_BASE_VAR_SCOPE_READONLY, - &x86_mmx_available); - - x86_mmx2_available = (0 != (mca_op_x86_component.oxc_hw_flags & OP_X86_HW_FLAGS_MMX2)); - (void) mca_base_component_var_register(&mca_op_x86_component.super.opc_version, - "mmx2_available", "Whether the hardware supports MMX2 or not", - MCA_BASE_VAR_TYPE_BOOL, NULL, 0, - MCA_BASE_VAR_FLAG_DEFAULT_ONLY, - OPAL_INFO_LVL_9, - MCA_BASE_VAR_SCOPE_READONLY, - &x86_mmx2_available); - - x86_sse_available = (0 != (mca_op_x86_component.oxc_hw_flags & OP_X86_HW_FLAGS_SSE)); - (void) mca_base_component_var_register(&mca_op_x86_component.super.opc_version, - "sse_available", "Whether the hardware supports SSE or not", - MCA_BASE_VAR_TYPE_BOOL, NULL, 0, - MCA_BASE_VAR_FLAG_DEFAULT_ONLY, - OPAL_INFO_LVL_9, - MCA_BASE_VAR_SCOPE_READONLY, - &x86_sse_available); - - x86_sse2_available = (0 != (mca_op_x86_component.oxc_hw_flags & OP_X86_HW_FLAGS_SSE2)); - (void) mca_base_component_var_register(&mca_op_x86_component.super.opc_version, - "sse2_available", "Whether the hardware supports SSE2 or not", - MCA_BASE_VAR_TYPE_BOOL, NULL, 0, - MCA_BASE_VAR_FLAG_DEFAULT_ONLY, - OPAL_INFO_LVL_9, - MCA_BASE_VAR_SCOPE_READONLY, - &x86_sse2_available); - - x86_sse3_available = (0 != (mca_op_x86_component.oxc_hw_flags & OP_X86_HW_FLAGS_SSE3)); - (void) mca_base_component_var_register(&mca_op_x86_component.super.opc_version, - "sse3_available", "Whether the hardware supports SSE3 or not", - MCA_BASE_VAR_TYPE_BOOL, NULL, 0, - MCA_BASE_VAR_FLAG_DEFAULT_ONLY, - OPAL_INFO_LVL_9, - MCA_BASE_VAR_SCOPE_READONLY, - &x86_sse3_available); - - return OMPI_SUCCESS; -} - - -/* - * Query whether this component wants to be used in this process. - */ -static int x86_component_init_query(bool enable_progress_threads, - bool enable_mpi_threads) -{ - opal_output(ompi_op_base_framework.framework_output, "x86 component init query"); - - /* If we have any hardware and we're not threaded, success */ - if (0 != mca_op_x86_component.oxc_hw_flags && !enable_mpi_threads) { - return OMPI_SUCCESS; - } - return OMPI_ERR_NOT_SUPPORTED; -} - - -/* - * Query whether this component can be used for a specific op - */ -static struct ompi_op_base_module_1_0_0_t * - x86_component_op_query(struct ompi_op_t *op, int *priority) -{ - ompi_op_base_module_t *module = NULL; - - opal_output(ompi_op_base_framework.framework_output, "x86 component op query"); - - /* Sanity check -- although the framework should never invoke the - _component_op_query() on non-intrinsic MPI_Op's, we'll put a - check here just to be sure. */ - if (0 == (OMPI_OP_FLAGS_INTRINSIC & op->o_flags)) { - opal_output(0, "x86 component op query: not an intrinsic MPI_Op -- skipping"); - return NULL; - } - - /* What follows is an x86 of how to determine whether your - component supports the queried MPI_Op. You can do this lots of - different ways; this is but one x86. */ - - /* Note that we *do* have the hardware; _component_init_query() - would not have returned OMPI_SUCCESS if we didn't have the - hardware (and therefore this function would never have been - called). So we don't need to check for the hardware again. - Instead, we need to do finer-grained checks (e.g., do we - support this op, and if so, what datatypes are supported?). - - So check to see whether this MPI_Op operation is supported on - the hardware that this component supports (which may involve - querying the hardware to see what it is capable of). - - You can see what operation is being requested by checking the - "op->o_f_to_c_index" value against the OMPI_OP_BASE_FORTRAN_* - enums. See ompi/mca/op/op.h for a full list of the - OMPI_OP_BASE_FORTRAN_* enums. - - In this x86 component, we support MAX and BXOR. */ - switch (op->o_f_to_c_index) { - case OMPI_OP_BASE_FORTRAN_SUM: - /* Corresponds to MPI_SUM */ - module = ompi_op_x86_setup_sum(op); - break; - } - - /* If we got a module from above, we'll return it. Otherwise, - we'll return NULL, indicating that this component does not want - to be considered for selection for this MPI_Op. Note that the - "setup" functions each returned a *x86* component pointer - (vs. a *base* component pointer -- where an *x86* component - is a base component plus some other module-specific cached - information), so we have to cast it to the right pointer type - before returning. */ - if (NULL != module) { - *priority = 25; - } - return (ompi_op_base_module_1_0_0_t *) module; -} diff --git a/ompi/mca/op/x86/op_x86_module_sum.c b/ompi/mca/op/x86/op_x86_module_sum.c deleted file mode 100644 index 0dfedc92076..00000000000 --- a/ompi/mca/op/x86/op_x86_module_sum.c +++ /dev/null @@ -1,207 +0,0 @@ -/* - * Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2010 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * Copyright (c) 2008-2009 Cisco Systems, Inc. All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -/** @file - * - * This is the sum module source code. It contains the "setup" - * functions that will create a module for the MPI_SUM MPI_Op. - */ - -#include "ompi_config.h" - -#include "opal/class/opal_object.h" -#include "opal/util/output.h" - -#include "ompi/constants.h" -#include "ompi/op/op.h" -#include "ompi/mca/op/op.h" -#include "ompi/mca/op/base/base.h" -#include "ompi/mca/op/x86/op_x86.h" - -/** - * SUM module struct, including local cached info - */ -typedef struct { - ompi_op_base_module_1_0_0_t super; - - /* Fallback function pointers and modules. Only doing a few types - to begin with... will fill in others once we have figured out - the basics of the assembly stuff. */ - ompi_op_base_handler_fn_t fallback_float; - ompi_op_base_module_t *fallback_float_module; - - ompi_op_base_handler_fn_t fallback_int16_t; - ompi_op_base_module_t *fallback_int16_t_module; - ompi_op_base_handler_fn_t fallback_int32_t; - ompi_op_base_module_t *fallback_int32_t_module; - ompi_op_base_handler_fn_t fallback_int64_t; - ompi_op_base_module_t *fallback_int64_t_module; -} module_sum_t; - -/** - * Sum module constructor - */ -static void module_sum_constructor(module_sum_t *m) -{ - m->fallback_float = NULL; - m->fallback_float_module = NULL; - - m->fallback_int16_t = NULL; - m->fallback_int16_t_module = NULL; - m->fallback_int32_t = NULL; - m->fallback_int32_t_module = NULL; - m->fallback_int64_t = NULL; - m->fallback_int64_t_module = NULL; -} - -/** - * Sum module destructor - */ -static void module_sum_destructor(module_sum_t *m) -{ - m->fallback_float = (ompi_op_base_handler_fn_t) 0xdeadbeef; - m->fallback_float_module = (ompi_op_base_module_t*) 0xdeadbeef; - - m->fallback_int16_t = (ompi_op_base_handler_fn_t) 0xdeadbeef; - m->fallback_int16_t_module = (ompi_op_base_module_t*) 0xdeadbeef; - m->fallback_int32_t = (ompi_op_base_handler_fn_t) 0xdeadbeef; - m->fallback_int32_t_module = (ompi_op_base_module_t*) 0xdeadbeef; - m->fallback_int64_t = (ompi_op_base_handler_fn_t) 0xdeadbeef; - m->fallback_int64_t_module = (ompi_op_base_module_t*) 0xdeadbeef; -} - -/** - * Setup the class for the sum module, listing: - * - the name of the class - * - the "parent" of the class - * - function pointer for the constructor (or NULL) - * - function pointer for the destructor (or NULL) - */ -static OBJ_CLASS_INSTANCE(module_sum_t, - ompi_op_base_module_t, - module_sum_constructor, - module_sum_destructor); - -/** - * Sum function for C float - */ -static void sum_float(void *in, void *out, int *count, - ompi_datatype_t **type, ompi_op_base_module_t *module) -{ - module_sum_t *m = (module_sum_t*) module; - - /* Be chatty to the output, just so that we can see that this - function was called */ - opal_output(0, "In x86 sum float function"); -} - -/** - * Sum function for C int16_t - */ -static void sum_int16_t(void *in, void *out, int *count, - ompi_datatype_t **type, ompi_op_base_module_t *module) -{ - module_sum_t *m = (module_sum_t*) module; - opal_output(0, "In x86 sum int16_t function"); -} - -/** - * Sum function for C int32_t - */ -static void sum_int32_t(void *in, void *out, int *count, - ompi_datatype_t **type, ompi_op_base_module_t *module) -{ - module_sum_t *m = (module_sum_t*) module; - opal_output(0, "In x86 sum int function"); -} - -/** - * Sum function for C int64_t - */ -static void sum_int64_t(void *in, void *out, int *count, - ompi_datatype_t **type, ompi_op_base_module_t *module) -{ - module_sum_t *m = (module_sum_t*) module; - opal_output(0, "In x86 sum int function"); -} - -/** - * Setup function for MPI_SUM. If we get here, we can assume that a) - * the hardware is present, b) the MPI thread scenario is what we - * want, and c) the SUM operation is supported. So this function's - * job is to create a module and fill in function pointers for the - * functions that this hardware supports. - */ -ompi_op_base_module_t *ompi_op_x86_setup_sum(ompi_op_t *op) -{ - module_sum_t *module = OBJ_NEW(module_sum_t); - - /* JMS It might be better to set function pointers here based on - the hardware (MMX*, SSE@) -- i.e., make first layer decision of - which will be used. I don't know if that's Right, though, - because we might want to dispatch to different hardware based - on the size of the operation...? Just recording the idea - here... */ - - /* Commenting out everything for the moment, just so that we can - focus on the hardware detection piece first. */ -#if 0 - /* C float */ - module->super.opm_fns[OMPI_OP_BASE_TYPE_FLOAT] = sum_float; - module->fallback_float = op->o_func.intrinsic.fns[OMPI_OP_BASE_TYPE_FLOAT]; - module->fallback_float_module = - op->o_func.intrinsic.modules[OMPI_OP_BASE_TYPE_FLOAT]; - /* If you cache a fallback function, you *must* RETAIN (i.e., - increase the refcount) its module so that the module knows that - it is being used and won't be freed/destructed. */ - OBJ_RETAIN(module->fallback_float_module); - - /* C int16_t */ - module->super.opm_fns[OMPI_OP_BASE_TYPE_INT16_T] = sum_int16_t; - module->fallback_int16_t = op->o_func.intrinsic.fns[OMPI_OP_BASE_TYPE_INT16_T]; - module->fallback_int16_t_module = - op->o_func.intrinsic.modules[OMPI_OP_BASE_TYPE_INT16_T]; - /* If you cache a fallback function, you *must* RETAIN (i.e., - increase the refcount) its module so that the module knows that - it is being used and won't be freed/destructed. */ - OBJ_RETAIN(module->fallback_int16_t_module); - - /* C int32_t */ - module->super.opm_fns[OMPI_OP_BASE_TYPE_INT32_T] = sum_int32_t; - module->fallback_int32_t = op->o_func.intrinsic.fns[OMPI_OP_BASE_TYPE_INT32_T]; - module->fallback_int32_t_module = - op->o_func.intrinsic.modules[OMPI_OP_BASE_TYPE_INT32_T]; - /* If you cache a fallback function, you *must* RETAIN (i.e., - increase the refcount) its module so that the module knows that - it is being used and won't be freed/destructed. */ - OBJ_RETAIN(module->fallback_int32_t_module); - - /* C int64_t */ - module->super.opm_fns[OMPI_OP_BASE_TYPE_INT64_T] = sum_int64_t; - module->fallback_int64_t = op->o_func.intrinsic.fns[OMPI_OP_BASE_TYPE_INT64_T]; - module->fallback_int64_t_module = - op->o_func.intrinsic.modules[OMPI_OP_BASE_TYPE_INT64_T]; - /* If you cache a fallback function, you *must* RETAIN (i.e., - increase the refcount) its module so that the module knows that - it is being used and won't be freed/destructed. */ - OBJ_RETAIN(module->fallback_int64_t_module); -#endif - - return (ompi_op_base_module_t*) module; -} diff --git a/ompi/mca/op/x86/owner.txt b/ompi/mca/op/x86/owner.txt deleted file mode 100644 index 0cc0384f0eb..00000000000 --- a/ompi/mca/op/x86/owner.txt +++ /dev/null @@ -1,7 +0,0 @@ -# -# owner/status file -# owner: institution that is responsible for this package -# status: e.g. active, maintenance, unmaintained -# -owner: CISCO -status: maintenance diff --git a/ompi/mca/osc/Makefile.am b/ompi/mca/osc/Makefile.am index 5f0add5bd75..464d64bff4c 100644 --- a/ompi/mca/osc/Makefile.am +++ b/ompi/mca/osc/Makefile.am @@ -5,15 +5,15 @@ # Copyright (c) 2004-2005 The University of Tennessee and The University # of Tennessee Research Foundation. All rights # reserved. -# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, # University of Stuttgart. All rights reserved. # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. # Copyright (c) 2010 Cisco Systems, Inc. All rights reserved. # $COPYRIGHT$ -# +# # Additional copyrights may follow -# +# # $HEADER$ # diff --git a/ompi/mca/osc/base/Makefile.am b/ompi/mca/osc/base/Makefile.am index a79b091af3b..311fbe0394e 100644 --- a/ompi/mca/osc/base/Makefile.am +++ b/ompi/mca/osc/base/Makefile.am @@ -3,17 +3,20 @@ # All rights reserved. # Copyright (c) 2004-2005 The Trustees of the University of Tennessee. # All rights reserved. -# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, # University of Stuttgart. All rights reserved. # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. +# Copyright (c) 2017 IBM Corporation. All rights reserved. # $COPYRIGHT$ -# +# # Additional copyrights may follow -# +# # $HEADER$ # +dist_ompidata_DATA = base/help-mca-osc-base.txt + headers += \ base/base.h \ base/osc_base_obj_convert.h diff --git a/ompi/mca/osc/base/base.h b/ompi/mca/osc/base/base.h index 680942e6015..bb368be82b9 100644 --- a/ompi/mca/osc/base/base.h +++ b/ompi/mca/osc/base/base.h @@ -3,14 +3,14 @@ * All rights reserved. * Copyright (c) 2004-2006 The Trustees of the University of Tennessee. * All rights reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ /** @file: diff --git a/ompi/mca/osc/base/help-mca-osc-base.txt b/ompi/mca/osc/base/help-mca-osc-base.txt new file mode 100644 index 00000000000..abe658fd625 --- /dev/null +++ b/ompi/mca/osc/base/help-mca-osc-base.txt @@ -0,0 +1,18 @@ +# -*- text -*- +# +# Copyright (c) 2017 IBM Corporation. All rights reserved. +# $COPYRIGHT$ +# +# Additional copyrights may follow +# +# $HEADER$ +# +# This is the US/English help file for Open MPI MCA osc-specific +# error messages. +# +[unsupported-dt] +Unsupported datatype and op combination used in a one-sided operation. + + Datatype : %s + Operation: %s + Rank : %d diff --git a/ompi/mca/osc/base/osc_base_frame.c b/ompi/mca/osc/base/osc_base_frame.c index fe4751f6206..24a6a9dc126 100644 --- a/ompi/mca/osc/base/osc_base_frame.c +++ b/ompi/mca/osc/base/osc_base_frame.c @@ -3,16 +3,16 @@ * All rights reserved. * Copyright (c) 2004-2005 The Trustees of the University of Tennessee. * All rights reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2014 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -65,7 +65,7 @@ ompi_osc_base_finalize(void) opal_list_item_t* item; /* Finalize all available modules */ - while (NULL != + while (NULL != (item = opal_list_remove_first(&ompi_osc_base_framework.framework_components))) { ompi_osc_base_component_t *component = (ompi_osc_base_component_t*) ((mca_base_component_list_item_t*) item)->cli_component; diff --git a/ompi/mca/osc/base/osc_base_init.c b/ompi/mca/osc/base/osc_base_init.c index 57d42081349..1e0cba6629a 100644 --- a/ompi/mca/osc/base/osc_base_init.c +++ b/ompi/mca/osc/base/osc_base_init.c @@ -4,16 +4,16 @@ * All rights reserved. * Copyright (c) 2004-2005 The Trustees of the University of Tennessee. * All rights reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2014 Los Alamos National Security, LLC. All rights * reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ diff --git a/ompi/mca/osc/base/osc_base_obj_convert.c b/ompi/mca/osc/base/osc_base_obj_convert.c index a5c3a694ecb..9d3082c08f3 100644 --- a/ompi/mca/osc/base/osc_base_obj_convert.c +++ b/ompi/mca/osc/base/osc_base_obj_convert.c @@ -4,21 +4,22 @@ * All rights reserved. * Copyright (c) 2004-2006 The Trustees of the University of Tennessee. * All rights reserved. - * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2007-2015 Los Alamos National Security, LLC. All rights - * reserved. + * reserved. * Copyright (c) 2009 Sun Microsystems, Inc. All rights reserved. * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. * Copyright (c) 2015 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2015 Intel, Inc. All rights reserved. + * Copyright (c) 2017 IBM Corporation. All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -31,6 +32,7 @@ #include "opal/datatype/opal_convertor.h" #include "opal/datatype/opal_convertor_internal.h" #include "opal/datatype/opal_datatype_prototypes.h" +#include "opal/util/show_help.h" #include "ompi/op/op.h" #include "ompi/datatype/ompi_datatype.h" @@ -43,7 +45,7 @@ int ompi_osc_base_get_primitive_type_info(ompi_datatype_t *datatype, - ompi_datatype_t **prim_datatype, + ompi_datatype_t **prim_datatype, uint32_t *prim_count) { ompi_datatype_t *primitive_datatype = NULL; @@ -76,6 +78,24 @@ int ompi_osc_base_process_op (void *outbuf, void *inbuf, size_t inbuflen, return OMPI_ERR_NOT_SUPPORTED; } + /* TODO: Remove the following check when support is added. + * See the following issue for the current state: + * https://github.com/open-mpi/ompi/issues/1666 + */ + if(MPI_MINLOC == op || MPI_MAXLOC == op) { + if(MPI_SHORT_INT == datatype || + MPI_DOUBLE_INT == datatype || + MPI_LONG_INT == datatype || + MPI_LONG_DOUBLE_INT == datatype) { + ompi_communicator_t *comm = &ompi_mpi_comm_world.comm; + opal_show_help("help-mca-osc-base.txt", "unsupported-dt", true, + datatype->name, + op->o_name, + comm->c_my_rank); + ompi_mpi_abort(comm, -1); + } + } + if (ompi_datatype_is_predefined(datatype)) { ompi_op_reduce(op, inbuf, outbuf, count, datatype); } else { @@ -134,7 +154,7 @@ int ompi_osc_base_process_op (void *outbuf, void *inbuf, size_t inbuflen, return OMPI_SUCCESS; } -int ompi_osc_base_sndrcv_op (void *origin, int32_t origin_count, +int ompi_osc_base_sndrcv_op (const void *origin, int32_t origin_count, struct ompi_datatype_t *origin_dt, void *target, int32_t target_count, struct ompi_datatype_t *target_dt, @@ -152,7 +172,7 @@ int ompi_osc_base_sndrcv_op (void *origin, int32_t origin_count, bool done; if (ompi_datatype_is_predefined(origin_dt) && origin_dt == target_dt) { - ompi_op_reduce(op, origin, target, origin_count, origin_dt); + ompi_op_reduce(op, (void *)origin, target, origin_count, origin_dt); return OMPI_SUCCESS; } diff --git a/ompi/mca/osc/base/osc_base_obj_convert.h b/ompi/mca/osc/base/osc_base_obj_convert.h index 7d36c33b324..c6514bbbe00 100644 --- a/ompi/mca/osc/base/osc_base_obj_convert.h +++ b/ompi/mca/osc/base/osc_base_obj_convert.h @@ -3,14 +3,16 @@ * All rights reserved. * Copyright (c) 2004-2005 The Trustees of the University of Tennessee. * All rights reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -74,7 +76,7 @@ static inline ompi_op_t * ompi_osc_base_op_create(int op_id) { - ompi_op_t *op = MPI_Op_f2c(op_id); + ompi_op_t *op = PMPI_Op_f2c(op_id); OBJ_RETAIN(op); return op; } @@ -96,7 +98,7 @@ ompi_osc_base_op_create(int op_id) * @retval OMPI_SUCCESS Success */ OMPI_DECLSPEC int ompi_osc_base_get_primitive_type_info(ompi_datatype_t *datatype, - ompi_datatype_t **prim_datatype, + ompi_datatype_t **prim_datatype, uint32_t *prim_count); @@ -117,7 +119,7 @@ OMPI_DECLSPEC int ompi_osc_base_process_op(void *outbuf, int count, ompi_op_t *op); -OMPI_DECLSPEC int ompi_osc_base_sndrcv_op(void *origin, +OMPI_DECLSPEC int ompi_osc_base_sndrcv_op(const void *origin, int32_t origin_count, struct ompi_datatype_t *origin_dt, void *target, diff --git a/ompi/mca/osc/base/owner.txt b/ompi/mca/osc/base/owner.txt index e6150b6b0fc..48ac538cbb0 100644 --- a/ompi/mca/osc/base/owner.txt +++ b/ompi/mca/osc/base/owner.txt @@ -3,5 +3,5 @@ # owner: institution that is responsible for this package # status: e.g. active, maintenance, unmaintained # -owner: project +owner: LANL status: active diff --git a/ompi/mca/osc/osc.h b/ompi/mca/osc/osc.h index e852f68e0d5..61ae2880036 100644 --- a/ompi/mca/osc/osc.h +++ b/ompi/mca/osc/osc.h @@ -4,22 +4,24 @@ * All rights reserved. * Copyright (c) 2004-2011 The Trustees of the University of Tennessee. * All rights reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2007-2015 Los Alamos National Security, LLC. All rights - * reserved. + * reserved. * Copyright (c) 2010 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ /** * @file - * + * * One-sided Communication interface * * Interface for implementing the one-sided communication chapter of @@ -32,9 +34,7 @@ #include "opal_config.h" -#ifdef HAVE_STDDEF_H #include -#endif #include "ompi/mca/mca.h" @@ -127,13 +127,13 @@ typedef int (*ompi_osc_base_component_query_fn_t)(struct ompi_win_t *win, * memory can be associated with this window. The module should be * ready for use immediately upon return of this function, and the * module is responsible for providing any required collective - * synchronization before the end of the call. + * synchronization before the end of the call. * * @note The comm is the communicator specified from the user, so * normal internal usage rules apply. In other words, if you need * communication for the life of the window, you should call * comm_dup() during this function. - * + * * @param[in/out] win The window handle, already filled in by MPI_WIN_CREATE() * @param[in] info An info structure with hints from the user * regarding the usage of the component @@ -183,7 +183,7 @@ typedef int (*ompi_osc_base_module_win_shared_query_fn_t)(struct ompi_win_t *win size_t *size, int *disp_unit, void *baseptr); typedef int (*ompi_osc_base_module_win_attach_fn_t)(struct ompi_win_t *win, void *base, size_t size); -typedef int (*ompi_osc_base_module_win_detach_fn_t)(struct ompi_win_t *win, void *base); +typedef int (*ompi_osc_base_module_win_detach_fn_t)(struct ompi_win_t *win, const void *base); /** * Free resources associated with win @@ -203,7 +203,7 @@ typedef int (*ompi_osc_base_module_win_detach_fn_t)(struct ompi_win_t *win, void typedef int (*ompi_osc_base_module_free_fn_t)(struct ompi_win_t *win); -typedef int (*ompi_osc_base_module_put_fn_t)(void *origin_addr, +typedef int (*ompi_osc_base_module_put_fn_t)(const void *origin_addr, int origin_count, struct ompi_datatype_t *origin_dt, int target, @@ -223,7 +223,7 @@ typedef int (*ompi_osc_base_module_get_fn_t)(void *origin_addr, struct ompi_win_t *win); -typedef int (*ompi_osc_base_module_accumulate_fn_t)(void *origin_addr, +typedef int (*ompi_osc_base_module_accumulate_fn_t)(const void *origin_addr, int origin_count, struct ompi_datatype_t *origin_dt, int target, @@ -233,15 +233,15 @@ typedef int (*ompi_osc_base_module_accumulate_fn_t)(void *origin_addr, struct ompi_op_t *op, struct ompi_win_t *win); -typedef int (*ompi_osc_base_module_compare_and_swap_fn_t)(void *origin_addr, - void *compare_addr, +typedef int (*ompi_osc_base_module_compare_and_swap_fn_t)(const void *origin_addr, + const void *compare_addr, void *result_addr, struct ompi_datatype_t *dt, int target, OPAL_PTRDIFF_TYPE target_disp, struct ompi_win_t *win); -typedef int (*ompi_osc_base_module_fetch_and_op_fn_t)(void *origin_addr, +typedef int (*ompi_osc_base_module_fetch_and_op_fn_t)(const void *origin_addr, void *result_addr, struct ompi_datatype_t *dt, int target, @@ -249,20 +249,20 @@ typedef int (*ompi_osc_base_module_fetch_and_op_fn_t)(void *origin_addr, struct ompi_op_t *op, struct ompi_win_t *win); -typedef int (*ompi_osc_base_module_get_accumulate_fn_t)(void *origin_addr, - int origin_count, +typedef int (*ompi_osc_base_module_get_accumulate_fn_t)(const void *origin_addr, + int origin_count, struct ompi_datatype_t *origin_datatype, - void *result_addr, - int result_count, + void *result_addr, + int result_count, struct ompi_datatype_t *result_datatype, - int target_rank, - OPAL_PTRDIFF_TYPE target_disp, + int target_rank, + OPAL_PTRDIFF_TYPE target_disp, int target_count, - struct ompi_datatype_t *target_datatype, - struct ompi_op_t *op, + struct ompi_datatype_t *target_datatype, + struct ompi_op_t *op, struct ompi_win_t *win); -typedef int (*ompi_osc_base_module_rput_fn_t)(void *origin_addr, +typedef int (*ompi_osc_base_module_rput_fn_t)(const void *origin_addr, int origin_count, struct ompi_datatype_t *origin_dt, int target, @@ -283,7 +283,7 @@ typedef int (*ompi_osc_base_module_rget_fn_t)(void *origin_addr, struct ompi_request_t **request); -typedef int (*ompi_osc_base_module_raccumulate_fn_t)(void *origin_addr, +typedef int (*ompi_osc_base_module_raccumulate_fn_t)(const void *origin_addr, int origin_count, struct ompi_datatype_t *origin_dt, int target, @@ -294,17 +294,17 @@ typedef int (*ompi_osc_base_module_raccumulate_fn_t)(void *origin_addr, struct ompi_win_t *win, struct ompi_request_t **request); -typedef int (*ompi_osc_base_module_rget_accumulate_fn_t)(void *origin_addr, - int origin_count, +typedef int (*ompi_osc_base_module_rget_accumulate_fn_t)(const void *origin_addr, + int origin_count, struct ompi_datatype_t *origin_datatype, - void *result_addr, - int result_count, + void *result_addr, + int result_count, struct ompi_datatype_t *result_datatype, - int target_rank, - OPAL_PTRDIFF_TYPE target_disp, + int target_rank, + OPAL_PTRDIFF_TYPE target_disp, int target_count, - struct ompi_datatype_t *target_datatype, - struct ompi_op_t *op, + struct ompi_datatype_t *target_datatype, + struct ompi_op_t *op, struct ompi_win_t *win, struct ompi_request_t **request); @@ -374,7 +374,7 @@ struct ompi_osc_base_module_3_0_0_t { ompi_osc_base_module_win_attach_fn_t osc_win_attach; ompi_osc_base_module_win_detach_fn_t osc_win_detach; - ompi_osc_base_module_free_fn_t osc_free; + ompi_osc_base_module_free_fn_t osc_free; ompi_osc_base_module_put_fn_t osc_put; ompi_osc_base_module_get_fn_t osc_get; diff --git a/ompi/mca/osc/portals4/Makefile.am b/ompi/mca/osc/portals4/Makefile.am index 5d01899a095..73b7ed9d5ff 100644 --- a/ompi/mca/osc/portals4/Makefile.am +++ b/ompi/mca/osc/portals4/Makefile.am @@ -1,9 +1,9 @@ # # Copyright (c) 2011 Sandia National Laboratories. All rights reserved. # $COPYRIGHT$ -# +# # Additional copyrights may follow -# +# # $HEADER$ # diff --git a/ompi/mca/osc/portals4/configure.m4 b/ompi/mca/osc/portals4/configure.m4 index e48cefee99f..1b33b6607d7 100644 --- a/ompi/mca/osc/portals4/configure.m4 +++ b/ompi/mca/osc/portals4/configure.m4 @@ -2,9 +2,9 @@ # # Copyright (c) 2011 Sandia National Laboratories. All rights reserved. # $COPYRIGHT$ -# +# # Additional copyrights may follow -# +# # $HEADER$ # @@ -15,7 +15,7 @@ AC_DEFUN([MCA_ompi_osc_portals4_POST_CONFIG], [ AS_IF([test "$1" = "1"], [OMPI_REQUIRE_ENDPOINT_TAG([PORTALS4])]) ])dnl -# MCA_osc_portals4_CONFIG(action-if-can-compile, +# MCA_osc_portals4_CONFIG(action-if-can-compile, # [action-if-cant-compile]) # ------------------------------------------------ AC_DEFUN([MCA_ompi_osc_portals4_CONFIG],[ diff --git a/ompi/mca/osc/portals4/osc_portals4.h b/ompi/mca/osc/portals4/osc_portals4.h index c0f93accb07..b35c0ed9053 100644 --- a/ompi/mca/osc/portals4/osc_portals4.h +++ b/ompi/mca/osc/portals4/osc_portals4.h @@ -1,12 +1,14 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ /* - * Copyright (c) 2011-2013 Sandia National Laboratories. All rights reserved. + * Copyright (c) 2011-2017 Sandia National Laboratories. All rights reserved. * Copyright (c) 2015 Los Alamos National Security, LLC. All rights * reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -17,6 +19,12 @@ #include "ompi/group/group.h" #include "ompi/communicator/communicator.h" +#include "ompi/mca/mtl/portals4/mtl_portals4.h" + +#define REQ_OSC_TABLE_ID 4 + +#define OSC_PORTALS4_IOVEC_MAX 64 + #define OSC_PORTALS4_MB_DATA 0x0000000000000000ULL #define OSC_PORTALS4_MB_CONTROL 0x1000000000000000ULL @@ -47,6 +55,11 @@ struct ompi_osc_portals4_component_t { ptl_size_t matching_atomic_max; ptl_size_t matching_fetch_atomic_max; ptl_size_t matching_atomic_ordered_size; + ptl_size_t ptl_max_msg_size; /* max size given by portals (cf PtlNIInit) */ + bool no_locks; + ptl_uid_t uid; + opal_mutex_t lock; + opal_condition_t cond; opal_free_list_t requests; /* request free list for the r* communication variants */ }; @@ -76,6 +89,7 @@ struct ompi_osc_portals4_module_t { ptl_handle_ni_t ni_h; /* network interface used by this window */ ptl_pt_index_t pt_idx; /* portal table index used by this window (this will be same across window) */ ptl_handle_ct_t ct_h; /* Counting event handle used for completion in this window */ + int ct_link; /* PTL_EVENT_LINK flag */ ptl_handle_md_t md_h; /* memory descriptor describing all of memory used by this window */ ptl_handle_md_t req_md_h; /* memory descriptor with event completion used by this window */ ptl_handle_me_t data_me_h; /* data match list entry (MB are CID | OSC_PORTALS4_MB_DATA) */ @@ -83,6 +97,11 @@ struct ompi_osc_portals4_module_t { int64_t opcount; ptl_match_bits_t match_bits; /* match bits for module. Same as cid for comm in most cases. */ + ptl_iovec_t *origin_iovec_list; /* list of memory segments that compose the noncontiguous region */ + ptl_handle_md_t origin_iovec_md_h; /* memory descriptor describing a noncontiguous region in this window */ + ptl_iovec_t *result_iovec_list; /* list of memory segments that compose the noncontiguous region */ + ptl_handle_md_t result_iovec_md_h; /* memory descriptor describing a noncontiguous region in this window */ + ptl_size_t atomic_max; /* max size of atomic messages. Will guarantee ordering IF ordering requested */ ptl_size_t fetch_atomic_max; /* max size of fetchatomic messages. Will guarantee ordering IF ordering requested */ @@ -116,11 +135,11 @@ get_displacement(ompi_osc_portals4_module_t *module, int ompi_osc_portals4_attach(struct ompi_win_t *win, void *base, size_t len); -int ompi_osc_portals4_detach(struct ompi_win_t *win, void *base); +int ompi_osc_portals4_detach(struct ompi_win_t *win, const void *base); int ompi_osc_portals4_free(struct ompi_win_t *win); -int ompi_osc_portals4_put(void *origin_addr, +int ompi_osc_portals4_put(const void *origin_addr, int origin_count, struct ompi_datatype_t *origin_dt, int target, @@ -138,7 +157,7 @@ int ompi_osc_portals4_get(void *origin_addr, struct ompi_datatype_t *target_dt, struct ompi_win_t *win); -int ompi_osc_portals4_accumulate(void *origin_addr, +int ompi_osc_portals4_accumulate(const void *origin_addr, int origin_count, struct ompi_datatype_t *origin_dt, int target, @@ -148,15 +167,15 @@ int ompi_osc_portals4_accumulate(void *origin_addr, struct ompi_op_t *op, struct ompi_win_t *win); -int ompi_osc_portals4_compare_and_swap(void *origin_addr, - void *compare_addr, +int ompi_osc_portals4_compare_and_swap(const void *origin_addr, + const void *compare_addr, void *result_addr, struct ompi_datatype_t *dt, int target, OPAL_PTRDIFF_TYPE target_disp, struct ompi_win_t *win); -int ompi_osc_portals4_fetch_and_op(void *origin_addr, +int ompi_osc_portals4_fetch_and_op(const void *origin_addr, void *result_addr, struct ompi_datatype_t *dt, int target, @@ -164,20 +183,20 @@ int ompi_osc_portals4_fetch_and_op(void *origin_addr, struct ompi_op_t *op, struct ompi_win_t *win); -int ompi_osc_portals4_get_accumulate(void *origin_addr, - int origin_count, +int ompi_osc_portals4_get_accumulate(const void *origin_addr, + int origin_count, struct ompi_datatype_t *origin_datatype, - void *result_addr, - int result_count, + void *result_addr, + int result_count, struct ompi_datatype_t *result_datatype, - int target_rank, - MPI_Aint target_disp, + int target_rank, + OPAL_PTRDIFF_TYPE target_disp, int target_count, - struct ompi_datatype_t *target_datatype, - struct ompi_op_t *op, + struct ompi_datatype_t *target_datatype, + struct ompi_op_t *op, struct ompi_win_t *win); -int ompi_osc_portals4_rput(void *origin_addr, +int ompi_osc_portals4_rput(const void *origin_addr, int origin_count, struct ompi_datatype_t *origin_dt, int target, @@ -197,7 +216,7 @@ int ompi_osc_portals4_rget(void *origin_addr, struct ompi_win_t *win, struct ompi_request_t **request); -int ompi_osc_portals4_raccumulate(void *origin_addr, +int ompi_osc_portals4_raccumulate(const void *origin_addr, int origin_count, struct ompi_datatype_t *origin_dt, int target, @@ -208,17 +227,17 @@ int ompi_osc_portals4_raccumulate(void *origin_addr, struct ompi_win_t *win, struct ompi_request_t **request); -int ompi_osc_portals4_rget_accumulate(void *origin_addr, - int origin_count, +int ompi_osc_portals4_rget_accumulate(const void *origin_addr, + int origin_count, struct ompi_datatype_t *origin_datatype, - void *result_addr, - int result_count, + void *result_addr, + int result_count, struct ompi_datatype_t *result_datatype, - int target_rank, - MPI_Aint target_disp, + int target_rank, + OPAL_PTRDIFF_TYPE target_disp, int target_count, - struct ompi_datatype_t *target_datatype, - struct ompi_op_t *op, + struct ompi_datatype_t *target_datatype, + struct ompi_op_t *op, struct ompi_win_t *win, struct ompi_request_t **request); @@ -288,17 +307,15 @@ ompi_osc_portals4_complete_all(ompi_osc_portals4_module_t *module) } static inline ptl_process_t -ompi_osc_portals4_get_peer(ompi_osc_portals4_module_t *module, int rank) +ompi_osc_portals4_get_peer_group(struct ompi_group_t *group, int rank) { - ompi_proc_t *proc = ompi_comm_peer_lookup(module->comm, rank); - return *((ptl_process_t*) proc->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_PORTALS4]); + return ompi_mtl_portals4_get_peer_group(group, rank); } static inline ptl_process_t -ompi_osc_portals4_get_peer_group(struct ompi_group_t *group, int rank) +ompi_osc_portals4_get_peer(ompi_osc_portals4_module_t *module, int rank) { - ompi_proc_t *proc = ompi_group_get_proc_ptr(group, rank); - return *((ptl_process_t*) proc->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_PORTALS4]); + return ompi_osc_portals4_get_peer_group(module->comm->c_remote_group, rank); } #endif diff --git a/ompi/mca/osc/portals4/osc_portals4_active_target.c b/ompi/mca/osc/portals4/osc_portals4_active_target.c index 656aa990b0a..92b605fb15f 100644 --- a/ompi/mca/osc/portals4/osc_portals4_active_target.c +++ b/ompi/mca/osc/portals4/osc_portals4_active_target.c @@ -1,9 +1,9 @@ /* * Copyright (c) 2011 Sandia National Laboratories. All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -15,8 +15,6 @@ #include "osc_portals4.h" -#include "ompi/mca/mtl/portals4/mtl_portals4_endpoint.h" - int ompi_osc_portals4_fence(int assert, struct ompi_win_t *win) diff --git a/ompi/mca/osc/portals4/osc_portals4_comm.c b/ompi/mca/osc/portals4/osc_portals4_comm.c index 4a096b41037..3b197f9708c 100644 --- a/ompi/mca/osc/portals4/osc_portals4_comm.c +++ b/ompi/mca/osc/portals4/osc_portals4_comm.c @@ -1,12 +1,14 @@ /* - * Copyright (c) 2011-2013 Sandia National Laboratories. All rights reserved. + * Copyright (c) 2011-2017 Sandia National Laboratories. All rights reserved. * Copyright (c) 2014 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -19,8 +21,6 @@ #include "osc_portals4.h" #include "osc_portals4_request.h" -#include "ompi/mca/mtl/portals4/mtl_portals4_endpoint.h" - static int ompi_osc_portals4_get_op(struct ompi_op_t *op, ptl_op_t *ptl_op) @@ -178,400 +178,2614 @@ ompi_osc_portals4_get_dt(struct ompi_datatype_t *dt, ptl_datatype_t *ptl_dt) return 0; } +static ptl_size_t +number_of_fragments(ptl_size_t length, ptl_size_t maxlength) +{ + ptl_size_t nb_frag = length == 0 ? 1 : (length - 1) / maxlength + 1; + OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output, + "%s,%d : %ld fragment(s)", __FUNCTION__, __LINE__, nb_frag)); + return nb_frag; +} -int -ompi_osc_portals4_rput(void *origin_addr, - int origin_count, - struct ompi_datatype_t *origin_dt, - int target, - OPAL_PTRDIFF_TYPE target_disp, - int target_count, - struct ompi_datatype_t *target_dt, - struct ompi_win_t *win, - struct ompi_request_t **ompi_req) +/* put in segments no larger than segment_length */ +static int +segmentedPut(int64_t *opcount, + ptl_handle_md_t md_h, + ptl_size_t origin_offset, + ptl_size_t put_length, + ptl_size_t segment_length, + ptl_ack_req_t ack_req, + ptl_process_t target_id, + ptl_pt_index_t pt_index, + ptl_match_bits_t match_bits, + ptl_size_t target_offset, + void *user_ptr, + ptl_hdr_data_t hdr_data) { int ret; - ompi_osc_portals4_request_t *request; - ompi_osc_portals4_module_t *module = - (ompi_osc_portals4_module_t*) win->w_osc_module; - ptl_process_t peer = ompi_osc_portals4_get_peer(module, target); - size_t length; - size_t offset; - - OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output, - "rput: 0x%lx, %d, %s, %d, %d, %d, %s, 0x%lx", - (unsigned long) origin_addr, origin_count, - origin_dt->name, target, (int) target_disp, - target_count, target_dt->name, - (unsigned long) win)); - - OMPI_OSC_PORTALS4_REQUEST_ALLOC(win, request); - if (NULL == request) return OMPI_ERR_TEMP_OUT_OF_RESOURCE; - *ompi_req = &request->super; - - offset = get_displacement(module, target) * target_disp; + ptl_size_t bytes_put = 0; + + do { + opal_atomic_add_64(opcount, 1); + + ptl_size_t frag_length = MIN(put_length, segment_length); + OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output, + "Put size : %lu/%lu, offset:%lu", frag_length, put_length, bytes_put)); + ret = PtlPut(md_h, + origin_offset + bytes_put, + frag_length, + ack_req, + target_id, + pt_index, + match_bits, + target_offset + bytes_put, + user_ptr, + hdr_data); + if (PTL_OK != ret) { + opal_atomic_add_64(opcount, -1); + opal_output_verbose(1, ompi_osc_base_framework.framework_output, + "%s:%d PtlPut failed with return value %d", + __FUNCTION__, __LINE__, ret); + return ret; + } + put_length -= frag_length; + bytes_put += frag_length; + } while (put_length); + return PTL_OK; +} - if (!ompi_datatype_is_contiguous_memory_layout(origin_dt, origin_count) || - !ompi_datatype_is_contiguous_memory_layout(target_dt, target_count)) { - OMPI_OSC_PORTALS4_REQUEST_RETURN(request); - opal_output(ompi_osc_base_framework.framework_output, - "MPI_Rput: transfer of non-contiguous memory is not currently supported.\n"); - return OMPI_ERR_NOT_SUPPORTED; - } else { - (void)opal_atomic_add_64(&module->opcount, 1); - request->ops_expected = 1; - ret = ompi_datatype_type_size(origin_dt, &length); - if (OMPI_SUCCESS != ret) { - OMPI_OSC_PORTALS4_REQUEST_RETURN(request); +/* get in segments no larger than segment_length */ +static int +segmentedGet(int64_t *opcount, + ptl_handle_md_t md_h, + ptl_size_t origin_offset, + ptl_size_t get_length, + ptl_size_t segment_length, + ptl_process_t target_id, + ptl_pt_index_t pt_index, + ptl_match_bits_t match_bits, + ptl_size_t target_offset, + void *user_ptr) +{ + int ret; + ptl_size_t bytes_gotten = 0; + + do { + opal_atomic_add_64(opcount, 1); + + ptl_size_t frag_length = MIN(get_length, segment_length); + OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output, + "Get size : %lu/%lu, offset:%lu", frag_length, get_length, bytes_gotten)); + + ret = PtlGet(md_h, + (ptl_size_t) origin_offset + bytes_gotten, + frag_length, + target_id, + pt_index, + match_bits, + target_offset + bytes_gotten, + user_ptr); + if (PTL_OK != ret) { + opal_atomic_add_64(opcount, -1); + opal_output_verbose(1, ompi_osc_base_framework.framework_output, + "%s:%d PtlGet failed with return value %d", + __FUNCTION__, __LINE__, ret); return ret; } - length *= origin_count; - ret = PtlPut(module->req_md_h, - (ptl_size_t) origin_addr, - length, - PTL_ACK_REQ, - peer, - module->pt_idx, - module->match_bits, - offset, - request, - 0); - if (OMPI_SUCCESS != ret) { - OMPI_OSC_PORTALS4_REQUEST_RETURN(request); + get_length -= frag_length; + bytes_gotten += frag_length; + } while (get_length); + return PTL_OK; +} + +/* atomic op in segments no larger than segment_length */ +static int +segmentedAtomic(int64_t *opcount, + ptl_handle_md_t md_h, + ptl_size_t origin_offset, + ptl_size_t length, + ptl_size_t segment_length, + ptl_process_t target_id, + ptl_pt_index_t pt_index, + ptl_match_bits_t match_bits, + ptl_size_t target_offset, + void *user_ptr, + ptl_op_t ptl_op, + ptl_datatype_t ptl_dt) +{ + int ret; + ptl_size_t sent = 0; + + do { + opal_atomic_add_64(opcount, 1); + + ptl_size_t frag_length = MIN(length, segment_length); + OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output, + "Atomic size : %lu/%lu, offset:%lu", frag_length, length, sent)); + ret = PtlAtomic(md_h, + (ptl_size_t) origin_offset + sent, + frag_length, + PTL_ACK_REQ, + target_id, + pt_index, + match_bits, + target_offset + sent, + user_ptr, + 0, + ptl_op, + ptl_dt); + if (PTL_OK != ret) { + opal_atomic_add_64(opcount, -1); + opal_output_verbose(1, ompi_osc_base_framework.framework_output, + "%s:%d PtlAtomic failed with return value %d", + __FUNCTION__, __LINE__, ret); return ret; } - } + length -= frag_length; + sent += frag_length; + } while (length); + return PTL_OK; +} - return OMPI_SUCCESS; +/* atomic op in segments no larger than segment_length */ +static int +segmentedFetchAtomic(int64_t *opcount, + ptl_handle_md_t result_md_h, + ptl_size_t result_offset, + ptl_handle_md_t origin_md_h, + ptl_size_t origin_offset, + ptl_size_t length, + ptl_size_t segment_length, + ptl_process_t target_id, + ptl_pt_index_t pt_index, + ptl_match_bits_t match_bits, + ptl_size_t target_offset, + void *user_ptr, + ptl_op_t ptl_op, + ptl_datatype_t ptl_dt) +{ + int ret; + ptl_size_t sent = 0; + + do { + opal_atomic_add_64(opcount, 1); + + ptl_size_t frag_length = MIN(length, segment_length); + OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output, + "Atomic size : %lu/%lu, offset:%lu", frag_length, length, sent)); + ret = PtlFetchAtomic(result_md_h, + result_offset + sent, + origin_md_h, + origin_offset + sent, + frag_length, + target_id, + pt_index, + match_bits, + target_offset + sent, + user_ptr, + 0, + ptl_op, + ptl_dt); + if (PTL_OK != ret) { + opal_atomic_add_64(opcount, -1); + opal_output_verbose(1, ompi_osc_base_framework.framework_output, + "%s:%d PtlFetchAtomic failed with return value %d", + __FUNCTION__, __LINE__, ret); + return ret; + } + length -= frag_length; + sent += frag_length; + } while (length); + return PTL_OK; } +/* swap in segments no larger than segment_length */ +static int +segmentedSwap(int64_t *opcount, + ptl_handle_md_t result_md_h, + ptl_size_t result_offset, + ptl_handle_md_t origin_md_h, + ptl_size_t origin_offset, + ptl_size_t length, + ptl_size_t segment_length, + ptl_process_t target_id, + ptl_pt_index_t pt_index, + ptl_match_bits_t match_bits, + ptl_size_t target_offset, + void *user_ptr, + ptl_datatype_t ptl_dt) +{ + int ret; + ptl_size_t sent = 0; + + do { + opal_atomic_add_64(opcount, 1); + + ptl_size_t frag_length = MIN(length, segment_length); + OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output, + "Swap size : %lu/%lu, offset:%lu", frag_length, length, sent)); + ret = PtlSwap(result_md_h, + result_offset + sent, + origin_md_h, + (ptl_size_t) origin_offset + sent, + frag_length, + target_id, + pt_index, + match_bits, + target_offset + sent, + user_ptr, + 0, + NULL, + PTL_SWAP, + ptl_dt); + if (PTL_OK != ret) { + opal_atomic_add_64(opcount, -1); + opal_output_verbose(1, ompi_osc_base_framework.framework_output, + "%s:%d PtlSwap failed with return value %d", + __FUNCTION__, __LINE__, ret); + return ret; + } + length -= frag_length; + sent += frag_length; + } while (length); + return PTL_OK; +} -int -ompi_osc_portals4_rget(void *origin_addr, - int origin_count, - struct ompi_datatype_t *origin_dt, - int target, - OPAL_PTRDIFF_TYPE target_disp, - int target_count, - struct ompi_datatype_t *target_dt, - struct ompi_win_t *win, - struct ompi_request_t **ompi_req) +static int +create_iov_list(const void *address, + int count, + ompi_datatype_t *datatype, + ptl_iovec_t **ptl_iovec, + ptl_size_t *ptl_iovec_count) { + struct iovec iov[OSC_PORTALS4_IOVEC_MAX]; + opal_convertor_t convertor; + uint32_t iov_count; + uint32_t iov_index, ptl_iovec_index; + /* needed for opal_convertor_raw but not used */ + size_t size; int ret; - ompi_osc_portals4_request_t *request; - ompi_osc_portals4_module_t *module = - (ompi_osc_portals4_module_t*) win->w_osc_module; - ptl_process_t peer = ompi_osc_portals4_get_peer(module, target); - size_t length; - size_t offset; + bool done; - OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output, - "rget: 0x%lx, %d, %s, %d, %d, %d, %s, 0x%lx", - (unsigned long) origin_addr, origin_count, - origin_dt->name, target, (int) target_disp, - target_count, target_dt->name, - (unsigned long) win)); + OBJ_CONSTRUCT(&convertor, opal_convertor_t); + ret = opal_convertor_copy_and_prepare_for_send (ompi_mpi_local_convertor, &datatype->super, count, + address, 0, &convertor); + if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) { + return ret; + } - OMPI_OSC_PORTALS4_REQUEST_ALLOC(win, request); - if (NULL == request) return OMPI_ERR_TEMP_OUT_OF_RESOURCE; - *ompi_req = &request->super; - offset = get_displacement(module, target) * target_disp; + *ptl_iovec_count = 0; + ptl_iovec_index = 0; + do { + /* decode segments of the data */ + iov_count = OSC_PORTALS4_IOVEC_MAX; + iov_index = 0; - if (!ompi_datatype_is_contiguous_memory_layout(origin_dt, origin_count) || - !ompi_datatype_is_contiguous_memory_layout(target_dt, target_count)) { - OMPI_OSC_PORTALS4_REQUEST_RETURN(request); - opal_output(ompi_osc_base_framework.framework_output, - "MPI_Rget: transfer of non-contiguous memory is not currently supported.\n"); - return OMPI_ERR_NOT_SUPPORTED; - } else { - (void)opal_atomic_add_64(&module->opcount, 1); - request->ops_expected = 1; - ret = ompi_datatype_type_size(origin_dt, &length); - if (OMPI_SUCCESS != ret) { - OMPI_OSC_PORTALS4_REQUEST_RETURN(request); - return ret; - } - length *= origin_count; - ret = PtlGet(module->req_md_h, - (ptl_size_t) origin_addr, - length, - peer, - module->pt_idx, - module->match_bits, - offset, - request); - if (OMPI_SUCCESS != ret) { - OMPI_OSC_PORTALS4_REQUEST_RETURN(request); - return ret; + /* opal_convertor_raw returns done when it has reached the end of the data */ + done = opal_convertor_raw (&convertor, iov, &iov_count, &size); + + *ptl_iovec_count += iov_count; + *ptl_iovec = (ptl_iovec_t *)realloc(*ptl_iovec, *ptl_iovec_count * sizeof(ptl_iovec_t)); + + while (iov_index != iov_count) { + OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output, + "adding iov[%d].[%p,%lu] to ptl_iovec", iov_index, iov[iov_index].iov_base, iov[iov_index].iov_len)); + (*ptl_iovec)[ptl_iovec_index].iov_base = iov[iov_index].iov_base; + (*ptl_iovec)[ptl_iovec_index].iov_len = iov[iov_index].iov_len; + + ptl_iovec_index++; + iov_index++; } - } + + assert(*ptl_iovec_count == ptl_iovec_index); + } while (!done); return OMPI_SUCCESS; -} +} -int -ompi_osc_portals4_raccumulate(void *origin_addr, - int origin_count, - struct ompi_datatype_t *origin_dt, - int target, - OPAL_PTRDIFF_TYPE target_disp, - int target_count, - struct ompi_datatype_t *target_dt, - struct ompi_op_t *op, - struct ompi_win_t *win, - struct ompi_request_t **ompi_req) +/* get from a contiguous remote to an iovec local */ +static int +get_to_iovec(ompi_osc_portals4_module_t *module, + const void *origin_address, + int origin_count, + ompi_datatype_t *origin_datatype, + ptl_process_t peer, + int target_count, + ompi_datatype_t *target_datatype, + size_t offset, + ptl_pt_index_t pt_index, + ptl_match_bits_t match_bits, + void *user_ptr) { int ret; - ompi_osc_portals4_request_t *request; - ompi_osc_portals4_module_t *module = - (ompi_osc_portals4_module_t*) win->w_osc_module; - ptl_process_t peer = ompi_osc_portals4_get_peer(module, target); - size_t length, sent; - size_t offset; - ptl_op_t ptl_op; - ptl_datatype_t ptl_dt; - - OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output, - "raccumulate: 0x%lx, %d, %s, %d, %d, %d, %s, %s 0x%lx", - (unsigned long) origin_addr, origin_count, - origin_dt->name, target, (int) target_disp, - target_count, target_dt->name, - op->o_name, - (unsigned long) win)); + size_t size; + OPAL_PTRDIFF_TYPE length, origin_lb, target_lb, extent; + ptl_md_t md; + + if (module->origin_iovec_md_h != PTL_INVALID_HANDLE) { + PtlMDRelease(module->origin_iovec_md_h); + free(module->origin_iovec_list); + module->origin_iovec_md_h = PTL_INVALID_HANDLE; + module->origin_iovec_list = NULL; + } - OMPI_OSC_PORTALS4_REQUEST_ALLOC(win, request); - if (NULL == request) return OMPI_ERR_TEMP_OUT_OF_RESOURCE; - *ompi_req = &request->super; + ptl_size_t iovec_count=0; + create_iov_list( + origin_address, + origin_count, + origin_datatype, + &module->origin_iovec_list, + &iovec_count); - offset = get_displacement(module, target) * target_disp; + ret = ompi_datatype_get_true_extent(origin_datatype, &origin_lb, &extent); + if (OMPI_SUCCESS != ret) { + return ret; + } + ret = ompi_datatype_get_true_extent(target_datatype, &target_lb, &extent); + if (OMPI_SUCCESS != ret) { + return ret; + } + ompi_datatype_type_size(origin_datatype, &size); + length = size * origin_count; - if (!ompi_datatype_is_contiguous_memory_layout(origin_dt, origin_count) || - !ompi_datatype_is_contiguous_memory_layout(target_dt, target_count)) { - OMPI_OSC_PORTALS4_REQUEST_RETURN(request); - opal_output(ompi_osc_base_framework.framework_output, - "MPI_Raccumulate: transfer of non-contiguous memory is not currently supported.\n"); - return OMPI_ERR_NOT_SUPPORTED; + md.start = module->origin_iovec_list; + md.length = iovec_count; + if (user_ptr) { + md.options = PTL_IOVEC | PTL_MD_EVENT_SEND_DISABLE | PTL_MD_EVENT_CT_REPLY | PTL_MD_EVENT_CT_ACK; } else { - ptl_size_t md_offset; + md.options = PTL_IOVEC | PTL_MD_EVENT_SUCCESS_DISABLE | PTL_MD_EVENT_CT_REPLY | PTL_MD_EVENT_CT_ACK; + } + md.eq_handle = mca_osc_portals4_component.matching_eq_h; + md.ct_handle = module->ct_h; + ret = PtlMDBind(module->ni_h, &md, &module->origin_iovec_md_h); + if (PTL_OK != ret) { + opal_output_verbose(1, ompi_osc_base_framework.framework_output, + "%s:%d: PtlMDBind(iovec) failed: %d\n", + __FILE__, __LINE__, ret); + return ret; + } - ret = ompi_datatype_type_size(origin_dt, &length); - if (OMPI_SUCCESS != ret) { - OMPI_OSC_PORTALS4_REQUEST_RETURN(request); - return ret; - } - length *= origin_count; - sent = 0; + opal_atomic_add_64(&module->opcount, 1); + + OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output, + "%s,%d Get(origin_count=%d, origin_lb=%lu, target_count=%d, target_lb=%lu, size=%lu, length=%lu, offset=%lu, op_count=%ld)", + __FUNCTION__, __LINE__, origin_count, origin_lb, target_count, target_lb, size, length, offset, module->opcount)); + ret = PtlGet(module->origin_iovec_md_h, + (ptl_size_t) origin_lb, + length, + peer, + module->pt_idx, + module->match_bits, + offset + target_lb, + user_ptr); + if (PTL_OK != ret) { + OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output, + "%s,%d PtlGet() failed: ret = %d", + __FUNCTION__, __LINE__, ret)); + opal_atomic_add_64(&module->opcount, -1); + return ret; + } - md_offset = (ptl_size_t) origin_addr; + return OMPI_SUCCESS; +} - do { - size_t msg_length = MIN(module->atomic_max, length - sent); - (void)opal_atomic_add_64(&module->opcount, 1); - request->ops_expected++; +/* get to an iovec MD from a contiguous target using fragments no larger + * than max_fetch_atomic_size to guarantee atomic writes at the origin */ +static int +atomic_get_to_iovec(ompi_osc_portals4_module_t *module, + const void *origin_address, + int origin_count, + ompi_datatype_t *origin_datatype, + ptl_process_t peer, + int target_count, + ompi_datatype_t *target_datatype, + size_t offset, + ptl_pt_index_t pt_index, + ptl_match_bits_t match_bits, + void *user_ptr) +{ + int ret; + size_t size; + OPAL_PTRDIFF_TYPE length, origin_lb, target_lb, extent; + ptl_md_t md; + + if (module->origin_iovec_md_h != PTL_INVALID_HANDLE) { + PtlMDRelease(module->origin_iovec_md_h); + free(module->origin_iovec_list); + module->origin_iovec_md_h = PTL_INVALID_HANDLE; + module->origin_iovec_list = NULL; + } - if (MPI_REPLACE == op) { - ret = PtlPut(module->req_md_h, - md_offset + sent, - msg_length, - PTL_ACK_REQ, - peer, - module->pt_idx, - module->match_bits, - offset + sent, - request, - 0); - } else { - ret = ompi_osc_portals4_get_dt(origin_dt, &ptl_dt); - if (OMPI_SUCCESS != ret) return ret; + ptl_size_t iovec_count=0; + create_iov_list( + origin_address, + origin_count, + origin_datatype, + &module->origin_iovec_list, + &iovec_count); - ret = ompi_osc_portals4_get_op(op, &ptl_op); - if (OMPI_SUCCESS != ret) return ret; + ret = ompi_datatype_get_true_extent(origin_datatype, &origin_lb, &extent); + if (OMPI_SUCCESS != ret) { + return ret; + } + ret = ompi_datatype_get_true_extent(target_datatype, &target_lb, &extent); + if (OMPI_SUCCESS != ret) { + return ret; + } + ompi_datatype_type_size(origin_datatype, &size); + length = size * origin_count; - ret = PtlAtomic(module->req_md_h, - offset + sent, - msg_length, - PTL_ACK_REQ, - peer, - module->pt_idx, - module->match_bits, - offset + sent, - request, - 0, - ptl_op, - ptl_dt); - } - if (OMPI_SUCCESS != ret) { - OMPI_OSC_PORTALS4_REQUEST_RETURN(request); - return ret; - } - sent += msg_length; - } while (sent < length); + md.start = module->origin_iovec_list; + md.length = iovec_count; + if (user_ptr) { + md.options = PTL_IOVEC | PTL_MD_EVENT_SEND_DISABLE | PTL_MD_EVENT_CT_REPLY | PTL_MD_EVENT_CT_ACK; + } else { + md.options = PTL_IOVEC | PTL_MD_EVENT_SUCCESS_DISABLE | PTL_MD_EVENT_CT_REPLY | PTL_MD_EVENT_CT_ACK; + } + md.eq_handle = mca_osc_portals4_component.matching_eq_h; + md.ct_handle = module->ct_h; + ret = PtlMDBind(module->ni_h, &md, &module->origin_iovec_md_h); + if (PTL_OK != ret) { + opal_output_verbose(1, ompi_osc_base_framework.framework_output, + "%s:%d: PtlMDBind(iovec) failed: %d\n", + __FILE__, __LINE__, ret); + return ret; + } + + OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output, + "%s,%d Get(origin_count=%d, origin_lb=%lu, target_count=%d, target_lb=%lu, size=%lu, length=%lu, offset=%lu, op_count=%ld)", + __FUNCTION__, __LINE__, origin_count, origin_lb, target_count, target_lb, size, length, offset, module->opcount)); + ret = segmentedGet(&module->opcount, + module->origin_iovec_md_h, + (ptl_size_t) origin_lb, + length, + module->fetch_atomic_max, + peer, + module->pt_idx, + module->match_bits, + offset + target_lb, + user_ptr); + if (PTL_OK != ret) { + return ret; } return OMPI_SUCCESS; } - -int -ompi_osc_portals4_rget_accumulate(void *origin_addr, - int origin_count, - struct ompi_datatype_t *origin_dt, - void *result_addr, - int result_count, - struct ompi_datatype_t *result_dt, - int target, - MPI_Aint target_disp, - int target_count, - struct ompi_datatype_t *target_dt, - struct ompi_op_t *op, - struct ompi_win_t *win, - struct ompi_request_t **ompi_req) +/* put from an iovec MD into a contiguous target */ +static int +put_from_iovec(ompi_osc_portals4_module_t *module, + const void *origin_address, + int origin_count, + ompi_datatype_t *origin_datatype, + ptl_process_t peer, + int target_count, + ompi_datatype_t *target_datatype, + size_t offset, + ptl_pt_index_t pt_index, + ptl_match_bits_t match_bits, + void *user_ptr) { int ret; - ompi_osc_portals4_request_t *request; - ompi_osc_portals4_module_t *module = - (ompi_osc_portals4_module_t*) win->w_osc_module; - ptl_process_t peer = ompi_osc_portals4_get_peer(module, target); - size_t length, sent; - size_t offset; - ptl_op_t ptl_op; - ptl_datatype_t ptl_dt; - - OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output, - "rget_accumulate: 0x%lx, %d, %s, 0x%lx, %d, %s, %d, %d, %d, %s, %s, 0x%lx", - (unsigned long) origin_addr, origin_count, - origin_dt->name, (unsigned long) result_addr, - result_count, result_dt->name, - target, (int) target_disp, - target_count, target_dt->name, - op->o_name, - (unsigned long) win)); + size_t size; + OPAL_PTRDIFF_TYPE length, origin_lb, target_lb, extent; + ptl_md_t md; + + if (module->origin_iovec_md_h != PTL_INVALID_HANDLE) { + PtlMDRelease(module->origin_iovec_md_h); + free(module->origin_iovec_list); + module->origin_iovec_md_h = PTL_INVALID_HANDLE; + module->origin_iovec_list = NULL; + } - OMPI_OSC_PORTALS4_REQUEST_ALLOC(win, request); - if (NULL == request) return OMPI_ERR_TEMP_OUT_OF_RESOURCE; - *ompi_req = &request->super; + ptl_size_t iovec_count=0; + create_iov_list( + origin_address, + origin_count, + origin_datatype, + &module->origin_iovec_list, + &iovec_count); - offset = get_displacement(module, target) * target_disp; + ret = ompi_datatype_get_true_extent(origin_datatype, &origin_lb, &extent); + if (OMPI_SUCCESS != ret) { + return ret; + } + ret = ompi_datatype_get_true_extent(target_datatype, &target_lb, &extent); + if (OMPI_SUCCESS != ret) { + return ret; + } + ompi_datatype_type_size(origin_datatype, &size); + length = size * origin_count; - if (!ompi_datatype_is_contiguous_memory_layout(origin_dt, origin_count) || - !ompi_datatype_is_contiguous_memory_layout(result_dt, result_count) || - !ompi_datatype_is_contiguous_memory_layout(target_dt, target_count)) { - OMPI_OSC_PORTALS4_REQUEST_RETURN(request); - opal_output(ompi_osc_base_framework.framework_output, - "MPI_Rget_accumulate: transfer of non-contiguous memory is not currently supported.\n"); - return OMPI_ERR_NOT_SUPPORTED; + md.start = module->origin_iovec_list; + md.length = iovec_count; + if (user_ptr) { + md.options = PTL_IOVEC | PTL_MD_EVENT_SEND_DISABLE | PTL_MD_EVENT_CT_REPLY | PTL_MD_EVENT_CT_ACK; } else { - sent = 0; + md.options = PTL_IOVEC | PTL_MD_EVENT_SUCCESS_DISABLE | PTL_MD_EVENT_CT_REPLY | PTL_MD_EVENT_CT_ACK; + } + md.eq_handle = mca_osc_portals4_component.matching_eq_h; + md.ct_handle = module->ct_h; + ret = PtlMDBind(module->ni_h, &md, &module->origin_iovec_md_h); + if (PTL_OK != ret) { + opal_output_verbose(1, ompi_osc_base_framework.framework_output, + "%s:%d: PtlMDBind(iovec) failed: %d\n", + __FILE__, __LINE__, ret); + return ret; + } - if (MPI_REPLACE == op) { - ptl_size_t result_md_offset, origin_md_offset; + opal_atomic_add_64(&module->opcount, 1); + + OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output, + "%s,%d Put(origin_count=%d, origin_lb=%lu, target_count=%d, target_lb=%lu, size=%lu, length=%lu, offset=%lu, op_count=%ld)", + __FUNCTION__, __LINE__, origin_count, origin_lb, target_count, target_lb, size, length, offset, module->opcount)); + ret = PtlPut(module->origin_iovec_md_h, + (ptl_size_t) origin_lb, + length, + PTL_ACK_REQ, + peer, + module->pt_idx, + module->match_bits, + offset + target_lb, + user_ptr, + 0); + if (PTL_OK != ret) { + OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output, + "%s,%d PtlPut() failed: ret = %d", + __FUNCTION__, __LINE__, ret)); + opal_atomic_add_64(&module->opcount, -1); + return ret; + } + + return OMPI_SUCCESS; +} + +/* put from an iovec MD into a contiguous target using fragments no larger + * than max_atomic_size to guarantee atomic writes at the target */ +static int +atomic_put_from_iovec(ompi_osc_portals4_module_t *module, + const void *origin_address, + int origin_count, + ompi_datatype_t *origin_datatype, + ptl_process_t peer, + int target_count, + ompi_datatype_t *target_datatype, + size_t offset, + ptl_pt_index_t pt_index, + ptl_match_bits_t match_bits, + void *user_ptr) +{ + int ret; + size_t size; + OPAL_PTRDIFF_TYPE length, origin_lb, target_lb, extent; + ptl_md_t md; + + if (module->origin_iovec_md_h != PTL_INVALID_HANDLE) { + PtlMDRelease(module->origin_iovec_md_h); + free(module->origin_iovec_list); + module->origin_iovec_md_h = PTL_INVALID_HANDLE; + module->origin_iovec_list = NULL; + } + + ptl_size_t iovec_count=0; + create_iov_list( + origin_address, + origin_count, + origin_datatype, + &module->origin_iovec_list, + &iovec_count); + + ret = ompi_datatype_get_true_extent(origin_datatype, &origin_lb, &extent); + if (OMPI_SUCCESS != ret) { + return ret; + } + ret = ompi_datatype_get_true_extent(target_datatype, &target_lb, &extent); + if (OMPI_SUCCESS != ret) { + return ret; + } + ompi_datatype_type_size(origin_datatype, &size); + length = size * origin_count; + + md.start = module->origin_iovec_list; + md.length = iovec_count; + if (user_ptr) { + md.options = PTL_IOVEC | PTL_MD_EVENT_SEND_DISABLE | PTL_MD_EVENT_CT_REPLY | PTL_MD_EVENT_CT_ACK; + } else { + md.options = PTL_IOVEC | PTL_MD_EVENT_SUCCESS_DISABLE | PTL_MD_EVENT_CT_REPLY | PTL_MD_EVENT_CT_ACK; + } + md.eq_handle = mca_osc_portals4_component.matching_eq_h; + md.ct_handle = module->ct_h; + ret = PtlMDBind(module->ni_h, &md, &module->origin_iovec_md_h); + if (PTL_OK != ret) { + opal_output_verbose(1, ompi_osc_base_framework.framework_output, + "%s:%d: PtlMDBind(iovec) failed: %d\n", + __FILE__, __LINE__, ret); + return ret; + } + + OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output, + "%s,%d Put(origin_count=%d, origin_lb=%lu, target_count=%d, target_lb=%lu, length=%lu, op_count=%ld)", + __FUNCTION__, __LINE__, origin_count, origin_lb, target_count, target_lb, length, module->opcount)); + ret = segmentedPut(&module->opcount, + module->origin_iovec_md_h, + (ptl_size_t) origin_lb, + length, + module->atomic_max, + PTL_ACK_REQ, + peer, + module->pt_idx, + module->match_bits, + offset + target_lb, + NULL, + 0); + if (OMPI_SUCCESS != ret) { + return ret; + } + + return OMPI_SUCCESS; +} + +/* perform atomic operation on iovec local and contiguous remote */ +static int +atomic_from_iovec(ompi_osc_portals4_module_t *module, + const void *origin_address, + int origin_count, + ompi_datatype_t *origin_datatype, + ptl_process_t peer, + int target_count, + ompi_datatype_t *target_datatype, + size_t offset, + ptl_pt_index_t pt_index, + ptl_match_bits_t match_bits, + struct ompi_op_t *op, + void *user_ptr) +{ + int ret; + size_t size; + OPAL_PTRDIFF_TYPE length, origin_lb, target_lb, extent; + ptl_md_t md; + ptl_op_t ptl_op; + ptl_datatype_t ptl_dt; + + if (module->origin_iovec_md_h != PTL_INVALID_HANDLE) { + PtlMDRelease(module->origin_iovec_md_h); + free(module->origin_iovec_list); + module->origin_iovec_md_h = PTL_INVALID_HANDLE; + module->origin_iovec_list = NULL; + } + + ptl_size_t iovec_count=0; + create_iov_list( + origin_address, + origin_count, + origin_datatype, + &module->origin_iovec_list, + &iovec_count); + + ret = ompi_osc_portals4_get_dt(target_datatype, &ptl_dt); + if (OMPI_SUCCESS != ret) { + opal_output(ompi_osc_base_framework.framework_output, + "datatype is not currently supported"); + return OMPI_ERR_NOT_SUPPORTED; + } + ret = ompi_osc_portals4_get_op(op, &ptl_op); + if (OMPI_SUCCESS != ret) { + opal_output(ompi_osc_base_framework.framework_output, + "operation is not currently supported"); + return OMPI_ERR_NOT_SUPPORTED; + } + + ret = ompi_datatype_get_true_extent(origin_datatype, &origin_lb, &extent); + if (OMPI_SUCCESS != ret) { + return ret; + } + ret = ompi_datatype_get_true_extent(target_datatype, &target_lb, &extent); + if (OMPI_SUCCESS != ret) { + return ret; + } + ompi_datatype_type_size(origin_datatype, &size); + length = size * origin_count; + + md.start = module->origin_iovec_list; + md.length = iovec_count; + if (user_ptr) { + md.options = PTL_IOVEC | PTL_MD_EVENT_SEND_DISABLE | PTL_MD_EVENT_CT_REPLY | PTL_MD_EVENT_CT_ACK; + } else { + md.options = PTL_IOVEC | PTL_MD_EVENT_SUCCESS_DISABLE | PTL_MD_EVENT_CT_REPLY | PTL_MD_EVENT_CT_ACK; + } + md.eq_handle = mca_osc_portals4_component.matching_eq_h; + md.ct_handle = module->ct_h; + ret = PtlMDBind(module->ni_h, &md, &module->origin_iovec_md_h); + if (PTL_OK != ret) { + opal_output_verbose(1, ompi_osc_base_framework.framework_output, + "%s:%d: PtlMDBind(iovec) failed: %d\n", + __FILE__, __LINE__, ret); + return ret; + } + + ret = segmentedAtomic(&module->opcount, + module->origin_iovec_md_h, + (ptl_size_t) origin_lb, + length, + module->atomic_max, + peer, + module->pt_idx, + module->match_bits, + offset + target_lb, + user_ptr, + ptl_op, + ptl_dt); + if (OMPI_SUCCESS != ret) { + return ret; + } + + return OMPI_SUCCESS; +} + +/* perform atomic operation on iovec local and contiguous remote */ +static int +swap_to_iovec(ompi_osc_portals4_module_t *module, + const void *result_address, + int result_count, + ompi_datatype_t *result_datatype, + const void *origin_address, + int origin_count, + ompi_datatype_t *origin_datatype, + ptl_process_t peer, + int target_count, + ompi_datatype_t *target_datatype, + size_t offset, + ptl_pt_index_t pt_index, + ptl_match_bits_t match_bits, + void *user_ptr) +{ + int ret; + size_t size; + ptl_size_t iovec_count=0; + OPAL_PTRDIFF_TYPE length, result_lb, origin_lb, target_lb, extent; + ptl_md_t md; + ptl_datatype_t ptl_dt; + + if (module->result_iovec_md_h != PTL_INVALID_HANDLE) { + PtlMDRelease(module->result_iovec_md_h); + free(module->result_iovec_list); + module->result_iovec_md_h = PTL_INVALID_HANDLE; + module->result_iovec_list = NULL; + } + + create_iov_list( + result_address, + result_count, + result_datatype, + &module->result_iovec_list, + &iovec_count); + + md.start = module->result_iovec_list; + md.length = iovec_count; + if (user_ptr) { + md.options = PTL_IOVEC | PTL_MD_EVENT_SEND_DISABLE | PTL_MD_EVENT_CT_REPLY | PTL_MD_EVENT_CT_ACK; + } else { + md.options = PTL_IOVEC | PTL_MD_EVENT_SUCCESS_DISABLE | PTL_MD_EVENT_CT_REPLY | PTL_MD_EVENT_CT_ACK; + } + md.eq_handle = mca_osc_portals4_component.matching_eq_h; + md.ct_handle = module->ct_h; + ret = PtlMDBind(module->ni_h, &md, &module->result_iovec_md_h); + if (PTL_OK != ret) { + opal_output_verbose(1, ompi_osc_base_framework.framework_output, + "%s:%d: PtlMDBind(iovec) failed: %d\n", + __FILE__, __LINE__, ret); + return ret; + } + + if (module->origin_iovec_md_h != PTL_INVALID_HANDLE) { + PtlMDRelease(module->origin_iovec_md_h); + free(module->origin_iovec_list); + module->origin_iovec_md_h = PTL_INVALID_HANDLE; + module->origin_iovec_list = NULL; + } + + create_iov_list( + origin_address, + origin_count, + origin_datatype, + &module->origin_iovec_list, + &iovec_count); + + md.start = module->origin_iovec_list; + md.length = iovec_count; + md.options = PTL_IOVEC | PTL_MD_EVENT_SUCCESS_DISABLE | PTL_MD_EVENT_CT_REPLY | PTL_MD_EVENT_CT_ACK; + md.eq_handle = mca_osc_portals4_component.matching_eq_h; + md.ct_handle = module->ct_h; + ret = PtlMDBind(module->ni_h, &md, &module->origin_iovec_md_h); + if (PTL_OK != ret) { + opal_output_verbose(1, ompi_osc_base_framework.framework_output, + "%s:%d: PtlMDBind(iovec) failed: %d\n", + __FILE__, __LINE__, ret); + return ret; + } + + ret = ompi_osc_portals4_get_dt(target_datatype, &ptl_dt); + if (OMPI_SUCCESS != ret) { + opal_output(ompi_osc_base_framework.framework_output, + "datatype is not currently supported"); + return OMPI_ERR_NOT_SUPPORTED; + } + + ret = ompi_datatype_get_true_extent(result_datatype, &result_lb, &extent); + if (OMPI_SUCCESS != ret) { + return ret; + } + ret = ompi_datatype_get_true_extent(origin_datatype, &origin_lb, &extent); + if (OMPI_SUCCESS != ret) { + return ret; + } + ret = ompi_datatype_get_true_extent(target_datatype, &target_lb, &extent); + if (OMPI_SUCCESS != ret) { + return ret; + } + ompi_datatype_type_size(origin_datatype, &size); + length = size * origin_count; + + ret = segmentedSwap(&module->opcount, + module->result_iovec_md_h, + (ptl_size_t) result_lb, + module->origin_iovec_md_h, + (ptl_size_t) origin_lb, + length, + module->fetch_atomic_max, + peer, + module->pt_idx, + module->match_bits, + offset + target_lb, + user_ptr, + ptl_dt); + if (OMPI_SUCCESS != ret) { + return ret; + } + + return OMPI_SUCCESS; +} + +/* perform fetch atomic operation on iovec local and contiguous remote */ +static int +fetch_atomic_to_iovec(ompi_osc_portals4_module_t *module, + const void *result_address, + int result_count, + ompi_datatype_t *result_datatype, + const void *origin_address, + int origin_count, + ompi_datatype_t *origin_datatype, + ptl_process_t peer, + int target_count, + ompi_datatype_t *target_datatype, + size_t offset, + ptl_pt_index_t pt_index, + ptl_match_bits_t match_bits, + struct ompi_op_t *op, + void *user_ptr) +{ + int ret; + size_t size; + ptl_size_t iovec_count=0; + OPAL_PTRDIFF_TYPE length, result_lb, origin_lb, target_lb, extent; + ptl_md_t md; + ptl_op_t ptl_op; + ptl_datatype_t ptl_dt; + + if (module->result_iovec_md_h != PTL_INVALID_HANDLE) { + PtlMDRelease(module->result_iovec_md_h); + free(module->result_iovec_list); + module->result_iovec_md_h = PTL_INVALID_HANDLE; + module->result_iovec_list = NULL; + } + + create_iov_list( + result_address, + result_count, + result_datatype, + &module->result_iovec_list, + &iovec_count); + + md.start = module->result_iovec_list; + md.length = iovec_count; + if (user_ptr) { + md.options = PTL_IOVEC | PTL_MD_EVENT_SEND_DISABLE | PTL_MD_EVENT_CT_REPLY | PTL_MD_EVENT_CT_ACK; + } else { + md.options = PTL_IOVEC | PTL_MD_EVENT_SUCCESS_DISABLE | PTL_MD_EVENT_CT_REPLY | PTL_MD_EVENT_CT_ACK; + } + md.eq_handle = mca_osc_portals4_component.matching_eq_h; + md.ct_handle = module->ct_h; + ret = PtlMDBind(module->ni_h, &md, &module->result_iovec_md_h); + if (PTL_OK != ret) { + opal_output_verbose(1, ompi_osc_base_framework.framework_output, + "%s:%d: PtlMDBind(iovec) failed: %d\n", + __FILE__, __LINE__, ret); + return ret; + } + + if (module->origin_iovec_md_h != PTL_INVALID_HANDLE) { + PtlMDRelease(module->origin_iovec_md_h); + free(module->origin_iovec_list); + module->origin_iovec_md_h = PTL_INVALID_HANDLE; + module->origin_iovec_list = NULL; + } + + create_iov_list( + origin_address, + origin_count, + origin_datatype, + &module->origin_iovec_list, + &iovec_count); + + md.start = module->origin_iovec_list; + md.length = iovec_count; + md.options = PTL_IOVEC | PTL_MD_EVENT_SUCCESS_DISABLE | PTL_MD_EVENT_CT_REPLY | PTL_MD_EVENT_CT_ACK; + md.eq_handle = mca_osc_portals4_component.matching_eq_h; + md.ct_handle = module->ct_h; + ret = PtlMDBind(module->ni_h, &md, &module->origin_iovec_md_h); + if (PTL_OK != ret) { + opal_output_verbose(1, ompi_osc_base_framework.framework_output, + "%s:%d: PtlMDBind(iovec) failed: %d\n", + __FILE__, __LINE__, ret); + return ret; + } + + ret = ompi_osc_portals4_get_dt(target_datatype, &ptl_dt); + if (OMPI_SUCCESS != ret) { + opal_output(ompi_osc_base_framework.framework_output, + "datatype is not currently supported"); + return OMPI_ERR_NOT_SUPPORTED; + } + ret = ompi_osc_portals4_get_op(op, &ptl_op); + if (OMPI_SUCCESS != ret) { + opal_output(ompi_osc_base_framework.framework_output, + "operation is not currently supported"); + return OMPI_ERR_NOT_SUPPORTED; + } + + ret = ompi_datatype_get_true_extent(result_datatype, &result_lb, &extent); + if (OMPI_SUCCESS != ret) { + return ret; + } + ret = ompi_datatype_get_true_extent(origin_datatype, &origin_lb, &extent); + if (OMPI_SUCCESS != ret) { + return ret; + } + ret = ompi_datatype_get_true_extent(target_datatype, &target_lb, &extent); + if (OMPI_SUCCESS != ret) { + return ret; + } + ompi_datatype_type_size(origin_datatype, &size); + length = size * origin_count; + + ret = segmentedFetchAtomic(&module->opcount, + module->result_iovec_md_h, + (ptl_size_t) result_lb, + module->origin_iovec_md_h, + (ptl_size_t) origin_lb, + length, + module->fetch_atomic_max, + peer, + module->pt_idx, + module->match_bits, + offset + target_lb, + user_ptr, + ptl_op, + ptl_dt); + if (OMPI_SUCCESS != ret) { + return ret; + } + + return OMPI_SUCCESS; +} + +/* + * Derived from ompi_osc_rdma_master_noncontig() + */ + +/* put in the largest chunks possible given the noncontiguous restriction */ +static int +put_to_noncontig(int64_t *opcount, + ptl_handle_md_t md_h, + const void *origin_address, + int origin_count, + ompi_datatype_t *origin_datatype, + ptl_process_t peer, + int target_count, + ompi_datatype_t *target_datatype, + size_t offset, + ptl_pt_index_t pt_index, + ptl_match_bits_t match_bits, + void *user_ptr) +{ + struct iovec origin_iovec[OSC_PORTALS4_IOVEC_MAX], target_iovec[OSC_PORTALS4_IOVEC_MAX]; + opal_convertor_t origin_convertor, target_convertor; + uint32_t origin_iov_count, target_iov_count; + uint32_t origin_iov_index, target_iov_index; + /* needed for opal_convertor_raw but not used */ + size_t origin_size, target_size, rdma_len; + size_t max_rdma_len = mca_osc_portals4_component.ptl_max_msg_size; + int ret; + bool done; + + /* prepare convertors for the source and target. these convertors will be used to determine the + * contiguous segments within the source and target. */ + OBJ_CONSTRUCT(&origin_convertor, opal_convertor_t); + ret = opal_convertor_copy_and_prepare_for_send (ompi_mpi_local_convertor, &origin_datatype->super, origin_count, + (void*)origin_address, 0, &origin_convertor); + if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) { + return ret; + } + + OBJ_CONSTRUCT(&target_convertor, opal_convertor_t); + ret = opal_convertor_copy_and_prepare_for_send (ompi_mpi_local_convertor, &target_datatype->super, target_count, + (void *)NULL, 0, &target_convertor); + if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) { + return ret; + } + + origin_iov_index = 0; + origin_iov_count = 0; + + do { + /* decode segments of the remote data */ + target_iov_count = OSC_PORTALS4_IOVEC_MAX; + target_iov_index = 0; + + /* opal_convertor_raw returns done when it has reached the end of the data */ + done = opal_convertor_raw (&target_convertor, target_iovec, &target_iov_count, &target_size); + + /* loop on the target segments until we have exhaused the decoded source data */ + while (target_iov_index != target_iov_count) { + if (origin_iov_index == origin_iov_count) { + /* decode segments of the target buffer */ + origin_iov_count = OSC_PORTALS4_IOVEC_MAX; + origin_iov_index = 0; + (void) opal_convertor_raw (&origin_convertor, origin_iovec, &origin_iov_count, &origin_size); + } + + /* we already checked that the target was large enough. this should be impossible */ + assert (0 != origin_iov_count); + + /* determine how much to transfer in this operation */ + rdma_len = MIN(MIN(origin_iovec[origin_iov_index].iov_len, target_iovec[target_iov_index].iov_len), max_rdma_len); + + opal_atomic_add_64(opcount, 1); + + OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output, + "performing rdma on contiguous region. local: %p, remote: %p, len: %lu", + origin_iovec[origin_iov_index].iov_base, target_iovec[target_iov_index].iov_base, + (unsigned long) target_iovec[target_iov_index].iov_len)); + + ret = PtlPut(md_h, + (ptl_size_t)origin_iovec[origin_iov_index].iov_base, + rdma_len, + PTL_ACK_REQ, + peer, + pt_index, + match_bits, + offset + (ptl_size_t)target_iovec[target_iov_index].iov_base, + user_ptr, + 0); + if (OPAL_UNLIKELY(PTL_OK != ret)) { + opal_atomic_add_64(opcount, -1); + return ret; + } + + /* adjust io vectors */ + origin_iovec[origin_iov_index].iov_len -= rdma_len; + target_iovec[target_iov_index].iov_len -= rdma_len; + origin_iovec[origin_iov_index].iov_base = (void *)((intptr_t) origin_iovec[origin_iov_index].iov_base + rdma_len); + target_iovec[target_iov_index].iov_base = (void *)((intptr_t) target_iovec[target_iov_index].iov_base + rdma_len); + + origin_iov_index += (0 == origin_iovec[origin_iov_index].iov_len); + target_iov_index += (0 == target_iovec[target_iov_index].iov_len); + } + } while (!done); + + /* clean up convertors */ + opal_convertor_cleanup (&origin_convertor); + OBJ_DESTRUCT(&origin_convertor); + opal_convertor_cleanup (&target_convertor); + OBJ_DESTRUCT(&target_convertor); + + return OMPI_SUCCESS; +} + +/* put in fragments no larger than max_atomic_size to guarantee atomic writes at the target */ +static int +atomic_put_to_noncontig(ompi_osc_portals4_module_t *module, + ptl_handle_md_t md_h, + const void *origin_address, + int origin_count, + ompi_datatype_t *origin_datatype, + ptl_process_t peer, + int target_count, + ompi_datatype_t *target_datatype, + size_t offset, + ptl_pt_index_t pt_index, + ptl_match_bits_t match_bits, + void *user_ptr) +{ + struct iovec origin_iovec[OSC_PORTALS4_IOVEC_MAX], target_iovec[OSC_PORTALS4_IOVEC_MAX]; + opal_convertor_t origin_convertor, target_convertor; + uint32_t origin_iov_count, target_iov_count; + uint32_t origin_iov_index, target_iov_index; + /* needed for opal_convertor_raw but not used */ + size_t origin_size, target_size, rdma_len; + size_t max_rdma_len = module->atomic_max; + int ret; + bool done; + + /* prepare convertors for the source and target. these convertors will be used to determine the + * contiguous segments within the source and target. */ + OBJ_CONSTRUCT(&origin_convertor, opal_convertor_t); + ret = opal_convertor_copy_and_prepare_for_send (ompi_mpi_local_convertor, &origin_datatype->super, origin_count, + (void*)origin_address, 0, &origin_convertor); + if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) { + return ret; + } + + OBJ_CONSTRUCT(&target_convertor, opal_convertor_t); + ret = opal_convertor_copy_and_prepare_for_send (ompi_mpi_local_convertor, &target_datatype->super, target_count, + (void *)NULL, 0, &target_convertor); + if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) { + return ret; + } + + origin_iov_index = 0; + origin_iov_count = 0; + + do { + /* decode segments of the remote data */ + target_iov_count = OSC_PORTALS4_IOVEC_MAX; + target_iov_index = 0; + + /* opal_convertor_raw returns done when it has reached the end of the data */ + done = opal_convertor_raw (&target_convertor, target_iovec, &target_iov_count, &target_size); + + /* loop on the target segments until we have exhaused the decoded source data */ + while (target_iov_index != target_iov_count) { + if (origin_iov_index == origin_iov_count) { + /* decode segments of the target buffer */ + origin_iov_count = OSC_PORTALS4_IOVEC_MAX; + origin_iov_index = 0; + (void) opal_convertor_raw (&origin_convertor, origin_iovec, &origin_iov_count, &origin_size); + } + + /* we already checked that the target was large enough. this should be impossible */ + assert (0 != origin_iov_count); + + /* determine how much to transfer in this operation */ + rdma_len = MIN(MIN(origin_iovec[origin_iov_index].iov_len, target_iovec[target_iov_index].iov_len), max_rdma_len); + + opal_atomic_add_64(&module->opcount, 1); + + OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output, + "performing rdma on contiguous region. local: %p, remote: %p, len: %lu", + origin_iovec[origin_iov_index].iov_base, target_iovec[target_iov_index].iov_base, + (unsigned long) target_iovec[target_iov_index].iov_len)); + + ret = PtlPut(md_h, + (ptl_size_t)origin_iovec[origin_iov_index].iov_base, + rdma_len, + PTL_ACK_REQ, + peer, + pt_index, + match_bits, + offset + (ptl_size_t)target_iovec[target_iov_index].iov_base, + user_ptr, + 0); + if (OPAL_UNLIKELY(PTL_OK != ret)) { + opal_atomic_add_64(&module->opcount, -1); + return ret; + } + + /* adjust io vectors */ + origin_iovec[origin_iov_index].iov_len -= rdma_len; + target_iovec[target_iov_index].iov_len -= rdma_len; + origin_iovec[origin_iov_index].iov_base = (void *)((intptr_t) origin_iovec[origin_iov_index].iov_base + rdma_len); + target_iovec[target_iov_index].iov_base = (void *)((intptr_t) target_iovec[target_iov_index].iov_base + rdma_len); + + origin_iov_index += (0 == origin_iovec[origin_iov_index].iov_len); + target_iov_index += (0 == target_iovec[target_iov_index].iov_len); + } + } while (!done); + + return OMPI_SUCCESS; +} + +/* perform atomic operation on (non)contiguous local and noncontiguous remote */ +static int +atomic_to_noncontig(ompi_osc_portals4_module_t *module, + ptl_handle_md_t md_h, + const void *origin_address, + int origin_count, + ompi_datatype_t *origin_datatype, + ptl_process_t peer, + int target_count, + ompi_datatype_t *target_datatype, + size_t offset, + ptl_pt_index_t pt_index, + ptl_match_bits_t match_bits, + struct ompi_op_t *op, + void *user_ptr) +{ + struct iovec origin_iovec[OSC_PORTALS4_IOVEC_MAX], target_iovec[OSC_PORTALS4_IOVEC_MAX]; + opal_convertor_t origin_convertor, target_convertor; + uint32_t origin_iov_count, target_iov_count; + uint32_t origin_iov_index, target_iov_index; + ptl_op_t ptl_op; + ptl_datatype_t ptl_dt; + /* needed for opal_convertor_raw but not used */ + size_t origin_size, target_size, atomic_len; + int ret; + bool done; + + /* prepare convertors for the source and target. these convertors will be used to determine the + * contiguous segments within the source and target. */ + OBJ_CONSTRUCT(&origin_convertor, opal_convertor_t); + ret = opal_convertor_copy_and_prepare_for_send (ompi_mpi_local_convertor, &origin_datatype->super, origin_count, + (void*)origin_address, 0, &origin_convertor); + if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) { + return ret; + } + + OBJ_CONSTRUCT(&target_convertor, opal_convertor_t); + ret = opal_convertor_copy_and_prepare_for_send (ompi_mpi_local_convertor, &target_datatype->super, target_count, + (void *)NULL, 0, &target_convertor); + if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) { + return ret; + } + + ret = ompi_osc_portals4_get_dt(target_datatype, &ptl_dt); + if (OMPI_SUCCESS != ret) { + opal_output(ompi_osc_base_framework.framework_output, + "datatype is not currently supported"); + return OMPI_ERR_NOT_SUPPORTED; + } + ret = ompi_osc_portals4_get_op(op, &ptl_op); + if (OMPI_SUCCESS != ret) { + opal_output(ompi_osc_base_framework.framework_output, + "operation is not currently supported"); + return OMPI_ERR_NOT_SUPPORTED; + } + + origin_iov_index = 0; + origin_iov_count = 0; + + do { + /* decode segments of the remote data */ + target_iov_count = OSC_PORTALS4_IOVEC_MAX; + target_iov_index = 0; + + /* opal_convertor_raw returns done when it has reached the end of the data */ + done = opal_convertor_raw (&target_convertor, target_iovec, &target_iov_count, &target_size); + + /* loop on the target segments until we have exhaused the decoded source data */ + while (target_iov_index != target_iov_count) { + if (origin_iov_index == origin_iov_count) { + /* decode segments of the target buffer */ + origin_iov_count = OSC_PORTALS4_IOVEC_MAX; + origin_iov_index = 0; + (void) opal_convertor_raw (&origin_convertor, origin_iovec, &origin_iov_count, &origin_size); + } + + /* we already checked that the target was large enough. this should be impossible */ + assert (0 != origin_iov_count); + + /* determine how much to transfer in this operation */ + atomic_len = MIN(MIN(origin_iovec[origin_iov_index].iov_len, target_iovec[target_iov_index].iov_len), module->atomic_max); + + opal_atomic_add_64(&module->opcount, 1); + + OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output, + "performing rdma on contiguous region. local: %p, remote: %p, len: %lu", + origin_iovec[origin_iov_index].iov_base, target_iovec[target_iov_index].iov_base, + (unsigned long) target_iovec[target_iov_index].iov_len)); + + OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output, + "%s,%d Atomic", __FUNCTION__, __LINE__)); + ret = PtlAtomic(md_h, + (ptl_size_t)origin_iovec[origin_iov_index].iov_base, + atomic_len, + PTL_ACK_REQ, + peer, + pt_index, + match_bits, + offset + (ptl_size_t)target_iovec[target_iov_index].iov_base, + user_ptr, + 0, + ptl_op, + ptl_dt); + if (OPAL_UNLIKELY(PTL_OK != ret)) { + opal_atomic_add_64(&module->opcount, -1); + return ret; + } + + /* adjust io vectors */ + origin_iovec[origin_iov_index].iov_len -= atomic_len; + target_iovec[target_iov_index].iov_len -= atomic_len; + origin_iovec[origin_iov_index].iov_base = (void *)((intptr_t) origin_iovec[origin_iov_index].iov_base + atomic_len); + target_iovec[target_iov_index].iov_base = (void *)((intptr_t) target_iovec[target_iov_index].iov_base + atomic_len); + + origin_iov_index += (0 == origin_iovec[origin_iov_index].iov_len); + target_iov_index += (0 == target_iovec[target_iov_index].iov_len); + } + } while (!done); + + return OMPI_SUCCESS; +} + +/* get from a noncontiguous remote to an (non)contiguous local */ +static int +get_from_noncontig(int64_t *opcount, + ptl_handle_md_t md_h, + const void *origin_address, + int origin_count, + ompi_datatype_t *origin_datatype, + ptl_process_t peer, + int target_count, + ompi_datatype_t *target_datatype, + size_t offset, + ptl_pt_index_t pt_index, + ptl_match_bits_t match_bits, + void *user_ptr) +{ + struct iovec origin_iovec[OSC_PORTALS4_IOVEC_MAX], target_iovec[OSC_PORTALS4_IOVEC_MAX]; + opal_convertor_t origin_convertor, target_convertor; + uint32_t origin_iov_count, target_iov_count; + uint32_t origin_iov_index, target_iov_index; + /* needed for opal_convertor_raw but not used */ + size_t origin_size, target_size, rdma_len; + size_t max_rdma_len = mca_osc_portals4_component.ptl_max_msg_size; + int ret; + bool done; + + /* prepare convertors for the source and target. these convertors will be used to determine the + * contiguous segments within the source and target. */ + OBJ_CONSTRUCT(&origin_convertor, opal_convertor_t); + ret = opal_convertor_copy_and_prepare_for_send (ompi_mpi_local_convertor, &origin_datatype->super, origin_count, + (void*)origin_address, 0, &origin_convertor); + if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) { + return ret; + } + + OBJ_CONSTRUCT(&target_convertor, opal_convertor_t); + ret = opal_convertor_copy_and_prepare_for_send (ompi_mpi_local_convertor, &target_datatype->super, target_count, + (void *)NULL, 0, &target_convertor); + if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) { + return ret; + } + + origin_iov_index = 0; + origin_iov_count = 0; + + do { + /* decode segments of the remote data */ + target_iov_count = OSC_PORTALS4_IOVEC_MAX; + target_iov_index = 0; + + /* opal_convertor_raw returns done when it has reached the end of the data */ + done = opal_convertor_raw (&target_convertor, target_iovec, &target_iov_count, &target_size); + + /* loop on the target segments until we have exhaused the decoded source data */ + while (target_iov_index != target_iov_count) { + if (origin_iov_index == origin_iov_count) { + /* decode segments of the target buffer */ + origin_iov_count = OSC_PORTALS4_IOVEC_MAX; + origin_iov_index = 0; + (void) opal_convertor_raw (&origin_convertor, origin_iovec, &origin_iov_count, &origin_size); + } + + /* we already checked that the target was large enough. this should be impossible */ + assert (0 != origin_iov_count); + + /* determine how much to transfer in this operation */ + rdma_len = MIN(MIN(origin_iovec[origin_iov_index].iov_len, target_iovec[target_iov_index].iov_len), max_rdma_len); + + opal_atomic_add_64(opcount, 1); + + OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output, + "performing rdma on contiguous region. local: %p, remote: %p, len: %lu", + origin_iovec[origin_iov_index].iov_base, target_iovec[target_iov_index].iov_base, + (unsigned long) target_iovec[target_iov_index].iov_len)); + + ret = PtlGet(md_h, + (ptl_size_t)origin_iovec[origin_iov_index].iov_base, + rdma_len, + peer, + pt_index, + match_bits, + offset + (ptl_size_t)target_iovec[target_iov_index].iov_base, + user_ptr); + if (OPAL_UNLIKELY(PTL_OK != ret)) { + opal_atomic_add_64(opcount, -1); + return ret; + } + + /* adjust io vectors */ + origin_iovec[origin_iov_index].iov_len -= rdma_len; + target_iovec[target_iov_index].iov_len -= rdma_len; + origin_iovec[origin_iov_index].iov_base = (void *)((intptr_t) origin_iovec[origin_iov_index].iov_base + rdma_len); + target_iovec[target_iov_index].iov_base = (void *)((intptr_t) target_iovec[target_iov_index].iov_base + rdma_len); + + origin_iov_index += (0 == origin_iovec[origin_iov_index].iov_len); + target_iov_index += (0 == target_iovec[target_iov_index].iov_len); + } + } while (!done); + + return OMPI_SUCCESS; +} + +/* get from a noncontiguous remote to an (non)contiguous local */ +static int +atomic_get_from_noncontig(ompi_osc_portals4_module_t *module, + ptl_handle_md_t md_h, + const void *origin_address, + int origin_count, + ompi_datatype_t *origin_datatype, + ptl_process_t peer, + int target_count, + ompi_datatype_t *target_datatype, + size_t offset, + ptl_pt_index_t pt_index, + ptl_match_bits_t match_bits, + void *user_ptr) +{ + struct iovec origin_iovec[OSC_PORTALS4_IOVEC_MAX], target_iovec[OSC_PORTALS4_IOVEC_MAX]; + opal_convertor_t origin_convertor, target_convertor; + uint32_t origin_iov_count, target_iov_count; + uint32_t origin_iov_index, target_iov_index; + /* needed for opal_convertor_raw but not used */ + size_t origin_size, target_size, rdma_len; + size_t max_rdma_len = module->fetch_atomic_max; + int ret; + bool done; + + /* prepare convertors for the source and target. these convertors will be used to determine the + * contiguous segments within the source and target. */ + OBJ_CONSTRUCT(&origin_convertor, opal_convertor_t); + ret = opal_convertor_copy_and_prepare_for_send (ompi_mpi_local_convertor, &origin_datatype->super, origin_count, + (void*)origin_address, 0, &origin_convertor); + if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) { + return ret; + } + + OBJ_CONSTRUCT(&target_convertor, opal_convertor_t); + ret = opal_convertor_copy_and_prepare_for_send (ompi_mpi_local_convertor, &target_datatype->super, target_count, + (void *)NULL, 0, &target_convertor); + if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) { + return ret; + } + + origin_iov_index = 0; + origin_iov_count = 0; + + do { + /* decode segments of the remote data */ + target_iov_count = OSC_PORTALS4_IOVEC_MAX; + target_iov_index = 0; + + /* opal_convertor_raw returns done when it has reached the end of the data */ + done = opal_convertor_raw (&target_convertor, target_iovec, &target_iov_count, &target_size); + + /* loop on the target segments until we have exhaused the decoded source data */ + while (target_iov_index != target_iov_count) { + if (origin_iov_index == origin_iov_count) { + /* decode segments of the target buffer */ + origin_iov_count = OSC_PORTALS4_IOVEC_MAX; + origin_iov_index = 0; + (void) opal_convertor_raw (&origin_convertor, origin_iovec, &origin_iov_count, &origin_size); + } + + /* we already checked that the target was large enough. this should be impossible */ + assert (0 != origin_iov_count); + + /* determine how much to transfer in this operation */ + rdma_len = MIN(MIN(origin_iovec[origin_iov_index].iov_len, target_iovec[target_iov_index].iov_len), max_rdma_len); + + opal_atomic_add_64(&module->opcount, 1); + + OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output, + "performing rdma on contiguous region. local: %p, remote: %p, len: %lu", + origin_iovec[origin_iov_index].iov_base, target_iovec[target_iov_index].iov_base, + (unsigned long) target_iovec[target_iov_index].iov_len)); + + ret = PtlGet(md_h, + (ptl_size_t)origin_iovec[origin_iov_index].iov_base, + rdma_len, + peer, + pt_index, + match_bits, + offset + (ptl_size_t)target_iovec[target_iov_index].iov_base, + user_ptr); + if (OPAL_UNLIKELY(PTL_OK != ret)) { + opal_atomic_add_64(&module->opcount, -1); + return ret; + } + + /* adjust io vectors */ + origin_iovec[origin_iov_index].iov_len -= rdma_len; + target_iovec[target_iov_index].iov_len -= rdma_len; + origin_iovec[origin_iov_index].iov_base = (void *)((intptr_t) origin_iovec[origin_iov_index].iov_base + rdma_len); + target_iovec[target_iov_index].iov_base = (void *)((intptr_t) target_iovec[target_iov_index].iov_base + rdma_len); + + origin_iov_index += (0 == origin_iovec[origin_iov_index].iov_len); + target_iov_index += (0 == target_iovec[target_iov_index].iov_len); + } + } while (!done); + + return OMPI_SUCCESS; +} + +/* swap from a noncontiguous remote to an (non)contiguous local */ +static int +swap_from_noncontig(ompi_osc_portals4_module_t *module, + ptl_handle_md_t result_md_h, + const void *result_address, + int result_count, + ompi_datatype_t *result_datatype, + ptl_handle_md_t origin_md_h, + const void *origin_address, + int origin_count, + ompi_datatype_t *origin_datatype, + ptl_process_t peer, + int target_count, + ompi_datatype_t *target_datatype, + size_t offset, + ptl_pt_index_t pt_index, + ptl_match_bits_t match_bits, + void *user_ptr) +{ + struct iovec result_iovec[OSC_PORTALS4_IOVEC_MAX], origin_iovec[OSC_PORTALS4_IOVEC_MAX], target_iovec[OSC_PORTALS4_IOVEC_MAX]; + opal_convertor_t result_convertor, origin_convertor, target_convertor; + uint32_t result_iov_count, origin_iov_count, target_iov_count; + uint32_t result_iov_index, origin_iov_index, target_iov_index; + /* needed for opal_convertor_raw but not used */ + size_t result_size, origin_size, target_size, rdma_len; + size_t max_rdma_len = module->fetch_atomic_max; + ptl_datatype_t ptl_dt; + + int ret; + bool done; + + /* prepare convertors for the result, source and target. these convertors will be used to determine the + * contiguous segments within the source and target. */ + OBJ_CONSTRUCT(&result_convertor, opal_convertor_t); + ret = opal_convertor_copy_and_prepare_for_send (ompi_mpi_local_convertor, &result_datatype->super, result_count, + (void*)result_address, 0, &result_convertor); + if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) { + return ret; + } + + OBJ_CONSTRUCT(&origin_convertor, opal_convertor_t); + ret = opal_convertor_copy_and_prepare_for_send (ompi_mpi_local_convertor, &origin_datatype->super, origin_count, + (void*)origin_address, 0, &origin_convertor); + if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) { + return ret; + } + + OBJ_CONSTRUCT(&target_convertor, opal_convertor_t); + ret = opal_convertor_copy_and_prepare_for_send (ompi_mpi_local_convertor, &target_datatype->super, target_count, + (void *)NULL, 0, &target_convertor); + if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) { + return ret; + } + + ret = ompi_osc_portals4_get_dt(target_datatype, &ptl_dt); + if (OMPI_SUCCESS != ret) { + opal_output(ompi_osc_base_framework.framework_output, + "datatype is not currently supported"); + return OMPI_ERR_NOT_SUPPORTED; + } + + result_iov_index = 0; + result_iov_count = 0; + origin_iov_index = 0; + origin_iov_count = 0; + + do { + /* decode segments of the remote data */ + target_iov_count = OSC_PORTALS4_IOVEC_MAX; + target_iov_index = 0; + + /* opal_convertor_raw returns done when it has reached the end of the data */ + done = opal_convertor_raw (&target_convertor, target_iovec, &target_iov_count, &target_size); + + /* loop on the target segments until we have exhaused the decoded source data */ + while (target_iov_index != target_iov_count) { + if (result_iov_index == result_iov_count) { + /* decode segments of the target buffer */ + result_iov_count = OSC_PORTALS4_IOVEC_MAX; + result_iov_index = 0; + (void) opal_convertor_raw (&result_convertor, result_iovec, &result_iov_count, &result_size); + } + if (origin_iov_index == origin_iov_count) { + /* decode segments of the target buffer */ + origin_iov_count = OSC_PORTALS4_IOVEC_MAX; + origin_iov_index = 0; + (void) opal_convertor_raw (&origin_convertor, origin_iovec, &origin_iov_count, &origin_size); + } + + /* we already checked that the target was large enough. this should be impossible */ + assert (0 != result_iov_count); + assert (0 != origin_iov_count); + + /* determine how much to transfer in this operation */ + rdma_len = MIN(MIN(origin_iovec[origin_iov_index].iov_len, target_iovec[target_iov_index].iov_len), max_rdma_len); + + opal_atomic_add_64(&module->opcount, 1); + + OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output, + "performing swap on contiguous region. result: %p origin: %p, target: %p, len: %lu", + result_iovec[result_iov_index].iov_base, + origin_iovec[origin_iov_index].iov_base, + target_iovec[target_iov_index].iov_base, + (unsigned long) target_iovec[target_iov_index].iov_len)); + + ret = PtlSwap(result_md_h, + (ptl_size_t)result_iovec[result_iov_index].iov_base, + origin_md_h, + (ptl_size_t)origin_iovec[origin_iov_index].iov_base, + rdma_len, + peer, + pt_index, + match_bits, + offset + (ptl_size_t)target_iovec[target_iov_index].iov_base, + user_ptr, + 0, + NULL, + PTL_SWAP, + ptl_dt); + if (PTL_OK != ret) { + opal_output_verbose(1, ompi_osc_base_framework.framework_output, + "%s:%d PtlSwap failed with return value %d", + __FUNCTION__, __LINE__, ret); + opal_atomic_add_64(&module->opcount, -1); + return ret; + } + + /* adjust io vectors */ + result_iovec[result_iov_index].iov_len -= rdma_len; + origin_iovec[origin_iov_index].iov_len -= rdma_len; + target_iovec[target_iov_index].iov_len -= rdma_len; + result_iovec[result_iov_index].iov_base = (void *)((intptr_t) result_iovec[result_iov_index].iov_base + rdma_len); + origin_iovec[origin_iov_index].iov_base = (void *)((intptr_t) origin_iovec[origin_iov_index].iov_base + rdma_len); + target_iovec[target_iov_index].iov_base = (void *)((intptr_t) target_iovec[target_iov_index].iov_base + rdma_len); + + result_iov_index += (0 == result_iovec[result_iov_index].iov_len); + origin_iov_index += (0 == origin_iovec[origin_iov_index].iov_len); + target_iov_index += (0 == target_iovec[target_iov_index].iov_len); + } + } while (!done); + + return OMPI_SUCCESS; +} + +/* swap from a noncontiguous remote to an (non)contiguous local */ +static int +fetch_atomic_from_noncontig(ompi_osc_portals4_module_t *module, + ptl_handle_md_t result_md_h, + const void *result_address, + int result_count, + ompi_datatype_t *result_datatype, + ptl_handle_md_t origin_md_h, + const void *origin_address, + int origin_count, + ompi_datatype_t *origin_datatype, + ptl_process_t peer, + int target_count, + ompi_datatype_t *target_datatype, + size_t offset, + ptl_pt_index_t pt_index, + ptl_match_bits_t match_bits, + struct ompi_op_t *op, + void *user_ptr) +{ + struct iovec result_iovec[OSC_PORTALS4_IOVEC_MAX], origin_iovec[OSC_PORTALS4_IOVEC_MAX], target_iovec[OSC_PORTALS4_IOVEC_MAX]; + opal_convertor_t result_convertor, origin_convertor, target_convertor; + uint32_t result_iov_count, origin_iov_count, target_iov_count; + uint32_t result_iov_index, origin_iov_index, target_iov_index; + /* needed for opal_convertor_raw but not used */ + size_t result_size, origin_size, target_size, rdma_len; + size_t max_rdma_len = module->fetch_atomic_max; + ptl_op_t ptl_op; + ptl_datatype_t ptl_dt; + + int ret; + bool done; + + /* prepare convertors for the result, source and target. these convertors will be used to determine the + * contiguous segments within the source and target. */ + OBJ_CONSTRUCT(&result_convertor, opal_convertor_t); + ret = opal_convertor_copy_and_prepare_for_send (ompi_mpi_local_convertor, &result_datatype->super, result_count, + (void*)result_address, 0, &result_convertor); + if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) { + return ret; + } + + OBJ_CONSTRUCT(&origin_convertor, opal_convertor_t); + ret = opal_convertor_copy_and_prepare_for_send (ompi_mpi_local_convertor, &origin_datatype->super, origin_count, + (void*)origin_address, 0, &origin_convertor); + if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) { + return ret; + } + + OBJ_CONSTRUCT(&target_convertor, opal_convertor_t); + ret = opal_convertor_copy_and_prepare_for_send (ompi_mpi_local_convertor, &target_datatype->super, target_count, + (void *)NULL, 0, &target_convertor); + if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) { + return ret; + } + + ret = ompi_osc_portals4_get_dt(target_datatype, &ptl_dt); + if (OMPI_SUCCESS != ret) { + opal_output(ompi_osc_base_framework.framework_output, + "datatype is not currently supported"); + return OMPI_ERR_NOT_SUPPORTED; + } + ret = ompi_osc_portals4_get_op(op, &ptl_op); + if (OMPI_SUCCESS != ret) { + opal_output(ompi_osc_base_framework.framework_output, + "operation is not currently supported"); + return OMPI_ERR_NOT_SUPPORTED; + } + + result_iov_index = 0; + result_iov_count = 0; + origin_iov_index = 0; + origin_iov_count = 0; + + do { + /* decode segments of the remote data */ + target_iov_count = OSC_PORTALS4_IOVEC_MAX; + target_iov_index = 0; + + /* opal_convertor_raw returns done when it has reached the end of the data */ + done = opal_convertor_raw (&target_convertor, target_iovec, &target_iov_count, &target_size); + + /* loop on the target segments until we have exhaused the decoded source data */ + while (target_iov_index != target_iov_count) { + if (result_iov_index == result_iov_count) { + /* decode segments of the target buffer */ + result_iov_count = OSC_PORTALS4_IOVEC_MAX; + result_iov_index = 0; + (void) opal_convertor_raw (&result_convertor, result_iovec, &result_iov_count, &result_size); + } + if (origin_iov_index == origin_iov_count) { + /* decode segments of the target buffer */ + origin_iov_count = OSC_PORTALS4_IOVEC_MAX; + origin_iov_index = 0; + (void) opal_convertor_raw (&origin_convertor, origin_iovec, &origin_iov_count, &origin_size); + } + + /* we already checked that the target was large enough. this should be impossible */ + assert (0 != result_iov_count); + assert (0 != origin_iov_count); + + /* determine how much to transfer in this operation */ + rdma_len = MIN(MIN(origin_iovec[origin_iov_index].iov_len, target_iovec[target_iov_index].iov_len), max_rdma_len); + + opal_atomic_add_64(&module->opcount, 1); + + OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output, + "performing swap on contiguous region. result: %p origin: %p, target: %p, len: %lu", + result_iovec[result_iov_index].iov_base, + origin_iovec[origin_iov_index].iov_base, + target_iovec[target_iov_index].iov_base, + (unsigned long) target_iovec[target_iov_index].iov_len)); + + ret = PtlFetchAtomic(result_md_h, + (ptl_size_t)result_iovec[result_iov_index].iov_base, + origin_md_h, + (ptl_size_t)origin_iovec[origin_iov_index].iov_base, + rdma_len, + peer, + pt_index, + match_bits, + offset + (ptl_size_t)target_iovec[target_iov_index].iov_base, + user_ptr, + 0, + ptl_op, + ptl_dt); + if (PTL_OK != ret) { + opal_output_verbose(1, ompi_osc_base_framework.framework_output, + "%s:%d PtlFetchAtomic failed with return value %d", + __FUNCTION__, __LINE__, ret); + opal_atomic_add_64(&module->opcount, -1); + return ret; + } + + /* adjust io vectors */ + result_iovec[result_iov_index].iov_len -= rdma_len; + origin_iovec[origin_iov_index].iov_len -= rdma_len; + target_iovec[target_iov_index].iov_len -= rdma_len; + result_iovec[result_iov_index].iov_base = (void *)((intptr_t) result_iovec[result_iov_index].iov_base + rdma_len); + origin_iovec[origin_iov_index].iov_base = (void *)((intptr_t) origin_iovec[origin_iov_index].iov_base + rdma_len); + target_iovec[target_iov_index].iov_base = (void *)((intptr_t) target_iovec[target_iov_index].iov_base + rdma_len); + + result_iov_index += (0 == result_iovec[result_iov_index].iov_len); + origin_iov_index += (0 == origin_iovec[origin_iov_index].iov_len); + target_iov_index += (0 == target_iovec[target_iov_index].iov_len); + } + } while (!done); + + return OMPI_SUCCESS; +} + +int +ompi_osc_portals4_rput(const void *origin_addr, + int origin_count, + struct ompi_datatype_t *origin_dt, + int target, + OPAL_PTRDIFF_TYPE target_disp, + int target_count, + struct ompi_datatype_t *target_dt, + struct ompi_win_t *win, + struct ompi_request_t **ompi_req) +{ + int ret; + ompi_osc_portals4_request_t *request; + ompi_osc_portals4_module_t *module = + (ompi_osc_portals4_module_t*) win->w_osc_module; + ptl_process_t peer = ompi_osc_portals4_get_peer(module, target); + size_t size, offset; + OPAL_PTRDIFF_TYPE length, origin_lb, target_lb, extent; + + OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output, + "rput: 0x%lx, %d, %s, %d, %lu, %d, %s, 0x%lx", + (unsigned long) origin_addr, origin_count, + origin_dt->name, target, (unsigned long) target_disp, + target_count, target_dt->name, + (unsigned long) win)); + + OMPI_OSC_PORTALS4_REQUEST_ALLOC(win, request); + if (NULL == request) return OMPI_ERR_TEMP_OUT_OF_RESOURCE; + *ompi_req = &request->super; + + offset = get_displacement(module, target) * target_disp; + + if (!ompi_datatype_is_contiguous_memory_layout(target_dt, target_count)) { + ret = put_to_noncontig(&module->opcount, + module->req_md_h, + origin_addr, + origin_count, + origin_dt, + peer, + target_count, + target_dt, + offset, + module->pt_idx, + module->match_bits, + request); + if (PTL_OK != ret) { + OMPI_OSC_PORTALS4_REQUEST_RETURN(request); + OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output, + "%s,%d put_to_noncontig() failed: ret = %d", + __FUNCTION__, __LINE__, ret)); + return ret; + } + } else if (!ompi_datatype_is_contiguous_memory_layout(origin_dt, origin_count)) { + ret = put_from_iovec(module, + origin_addr, + origin_count, + origin_dt, + peer, + target_count, + target_dt, + offset, + module->pt_idx, + module->match_bits, + request); + if (PTL_OK != ret) { + OMPI_OSC_PORTALS4_REQUEST_RETURN(request); + OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output, + "%s,%d put_from_iovec() failed: ret = %d", + __FUNCTION__, __LINE__, ret)); + return ret; + } + } else { + ret = ompi_datatype_get_true_extent(origin_dt, &origin_lb, &extent); + if (OMPI_SUCCESS != ret) { + OMPI_OSC_PORTALS4_REQUEST_RETURN(request); + return ret; + } + ret = ompi_datatype_get_true_extent(target_dt, &target_lb, &extent); + if (OMPI_SUCCESS != ret) { + OMPI_OSC_PORTALS4_REQUEST_RETURN(request); + return ret; + } + ompi_datatype_type_size(origin_dt, &size); + length = size * origin_count; + + request->ops_expected += number_of_fragments(length, mca_osc_portals4_component.ptl_max_msg_size); + + OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output, + "%s,%d RPut(origin_count=%d, origin_lb=%lu, target_count=%d, target_lb=%lu, length=%lu, op_count=%ld)", + __FUNCTION__, __LINE__, origin_count, origin_lb, target_count, target_lb, length, module->opcount)); + ret = segmentedPut(&module->opcount, + module->req_md_h, + (ptl_size_t) origin_addr + origin_lb, + length, + mca_osc_portals4_component.ptl_max_msg_size, + PTL_ACK_REQ, + peer, + module->pt_idx, + module->match_bits, + offset + target_lb, + request, + 0); + if (OMPI_SUCCESS != ret) { + OMPI_OSC_PORTALS4_REQUEST_RETURN(request); + return ret; + } + } + + return OMPI_SUCCESS; +} + + +int +ompi_osc_portals4_rget(void *origin_addr, + int origin_count, + struct ompi_datatype_t *origin_dt, + int target, + OPAL_PTRDIFF_TYPE target_disp, + int target_count, + struct ompi_datatype_t *target_dt, + struct ompi_win_t *win, + struct ompi_request_t **ompi_req) +{ + int ret; + ompi_osc_portals4_request_t *request; + ompi_osc_portals4_module_t *module = + (ompi_osc_portals4_module_t*) win->w_osc_module; + ptl_process_t peer = ompi_osc_portals4_get_peer(module, target); + size_t offset, size; + OPAL_PTRDIFF_TYPE length, origin_lb, target_lb, extent; + + OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output, + "rget: 0x%lx, %d, %s, %d, %lu, %d, %s, 0x%lx", + (unsigned long) origin_addr, origin_count, + origin_dt->name, target, (unsigned long) target_disp, + target_count, target_dt->name, + (unsigned long) win)); + + OMPI_OSC_PORTALS4_REQUEST_ALLOC(win, request); + if (NULL == request) return OMPI_ERR_TEMP_OUT_OF_RESOURCE; + *ompi_req = &request->super; + + offset = get_displacement(module, target) * target_disp; + + if (!ompi_datatype_is_contiguous_memory_layout(target_dt, target_count)) { + ret = get_from_noncontig(&module->opcount, + module->req_md_h, + origin_addr, + origin_count, + origin_dt, + peer, + target_count, + target_dt, + offset, + module->pt_idx, + module->match_bits, + request); + if (PTL_OK != ret) { + OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output, + "%s,%d get_from_noncontig() failed: ret = %d", + __FUNCTION__, __LINE__, ret)); + return ret; + } + } else if (!ompi_datatype_is_contiguous_memory_layout(origin_dt, origin_count)) { + ret = get_to_iovec(module, + origin_addr, + origin_count, + origin_dt, + peer, + target_count, + target_dt, + offset, + module->pt_idx, + module->match_bits, + request); + if (PTL_OK != ret) { + OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output, + "%s,%d get_to_iovec() failed: ret = %d", + __FUNCTION__, __LINE__, ret)); + return ret; + } + } else { + ret = ompi_datatype_get_true_extent(origin_dt, &origin_lb, &extent); + if (OMPI_SUCCESS != ret) { + return ret; + } + ret = ompi_datatype_get_true_extent(target_dt, &target_lb, &extent); + if (OMPI_SUCCESS != ret) { + return ret; + } + ompi_datatype_type_size(origin_dt, &size); + length = size * origin_count; + + request->ops_expected += number_of_fragments(length, mca_osc_portals4_component.ptl_max_msg_size); + + OPAL_OUTPUT_VERBOSE((90,ompi_osc_base_framework.framework_output, + "%s,%d RGet", __FUNCTION__, __LINE__)); + ret = segmentedGet(&module->opcount, + module->req_md_h, + (ptl_size_t) origin_addr + origin_lb, + length, + mca_osc_portals4_component.ptl_max_msg_size, + peer, + module->pt_idx, + module->match_bits, + offset + target_lb, + request); + if (OMPI_SUCCESS != ret) { + OMPI_OSC_PORTALS4_REQUEST_RETURN(request); + return ret; + } + } + + return OMPI_SUCCESS; +} + + +int +ompi_osc_portals4_raccumulate(const void *origin_addr, + int origin_count, + struct ompi_datatype_t *origin_dt, + int target, + OPAL_PTRDIFF_TYPE target_disp, + int target_count, + struct ompi_datatype_t *target_dt, + struct ompi_op_t *op, + struct ompi_win_t *win, + struct ompi_request_t **ompi_req) +{ + int ret; + ompi_osc_portals4_request_t *request; + ompi_osc_portals4_module_t *module = + (ompi_osc_portals4_module_t*) win->w_osc_module; + ptl_process_t peer = ompi_osc_portals4_get_peer(module, target); + size_t offset, size; + ptl_op_t ptl_op; + ptl_datatype_t ptl_dt; + OPAL_PTRDIFF_TYPE sent, length, origin_lb, target_lb, extent; + + OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output, + "raccumulate: 0x%lx, %d, %s, %d, %lu, %d, %s, %s 0x%lx", + (unsigned long) origin_addr, origin_count, + origin_dt->name, target, (unsigned long) target_disp, + target_count, target_dt->name, + op->o_name, + (unsigned long) win)); + + OMPI_OSC_PORTALS4_REQUEST_ALLOC(win, request); + if (NULL == request) return OMPI_ERR_TEMP_OUT_OF_RESOURCE; + *ompi_req = &request->super; + + offset = get_displacement(module, target) * target_disp; + + if (!ompi_datatype_is_contiguous_memory_layout(target_dt, target_count)) { + if (MPI_REPLACE == op) { + ret = atomic_put_to_noncontig(module, + module->req_md_h, + origin_addr, + origin_count, + origin_dt, + peer, + target_count, + target_dt, + offset, + module->pt_idx, + module->match_bits, + request); + if (PTL_OK != ret) { + OMPI_OSC_PORTALS4_REQUEST_RETURN(request); + OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output, + "%s,%d atomic_put_to_noncontig() failed: ret = %d", + __FUNCTION__, __LINE__, ret)); + return ret; + } + } else { + ret = atomic_to_noncontig(module, + module->req_md_h, + origin_addr, + origin_count, + origin_dt, + peer, + target_count, + target_dt, + offset, + module->pt_idx, + module->match_bits, + op, + request); + if (PTL_OK != ret) { + OMPI_OSC_PORTALS4_REQUEST_RETURN(request); + OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output, + "%s,%d atomic_to_noncontig() failed: ret = %d", + __FUNCTION__, __LINE__, ret)); + return ret; + } + } + } else if (!ompi_datatype_is_contiguous_memory_layout(origin_dt, origin_count)) { + if (MPI_REPLACE == op) { + ret = atomic_put_from_iovec(module, + origin_addr, + origin_count, + origin_dt, + peer, + target_count, + target_dt, + offset, + module->pt_idx, + module->match_bits, + request); + if (PTL_OK != ret) { + OMPI_OSC_PORTALS4_REQUEST_RETURN(request); + OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output, + "%s,%d atomic_put_from_iovec() failed: ret = %d", + __FUNCTION__, __LINE__, ret)); + return ret; + } + } else { + ret = atomic_from_iovec(module, + origin_addr, + origin_count, + origin_dt, + peer, + target_count, + target_dt, + offset, + module->pt_idx, + module->match_bits, + op, + request); + if (PTL_OK != ret) { + OMPI_OSC_PORTALS4_REQUEST_RETURN(request); + OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output, + "%s,%d atomic_from_iovec() failed: ret = %d", + __FUNCTION__, __LINE__, ret)); + return ret; + } + } + } else { + ptl_size_t md_offset; + + ret = ompi_datatype_get_true_extent(origin_dt, &origin_lb, &extent); + if (OMPI_SUCCESS != ret) { + OMPI_OSC_PORTALS4_REQUEST_RETURN(request); + return ret; + } + ret = ompi_datatype_get_true_extent(target_dt, &target_lb, &extent); + if (OMPI_SUCCESS != ret) { + OMPI_OSC_PORTALS4_REQUEST_RETURN(request); + return ret; + } + ompi_datatype_type_size(origin_dt, &size); + length = size * origin_count; + sent = 0; + + md_offset = (ptl_size_t) origin_addr; + + request->ops_expected += number_of_fragments(length, module->atomic_max); + + if (MPI_REPLACE == op) { + OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output, + "%s,%d Put", __FUNCTION__, __LINE__)); + ret = segmentedPut(&module->opcount, + module->req_md_h, + md_offset + origin_lb, + length, + module->atomic_max, + PTL_ACK_REQ, + peer, + module->pt_idx, + module->match_bits, + offset + target_lb, + request, + 0); + if (OMPI_SUCCESS != ret) { + OMPI_OSC_PORTALS4_REQUEST_RETURN(request); + return ret; + } + } else { + ret = ompi_osc_portals4_get_dt(origin_dt, &ptl_dt); + if (OMPI_SUCCESS != ret) { + OMPI_OSC_PORTALS4_REQUEST_RETURN(request); + opal_output(ompi_osc_base_framework.framework_output, + "datatype is not currently supported"); + return OMPI_ERR_NOT_SUPPORTED; + } + ret = ompi_osc_portals4_get_op(op, &ptl_op); + if (OMPI_SUCCESS != ret) { + OMPI_OSC_PORTALS4_REQUEST_RETURN(request); + opal_output(ompi_osc_base_framework.framework_output, + "operation is not currently supported"); + return OMPI_ERR_NOT_SUPPORTED; + } + do { + size_t msg_length = MIN(module->atomic_max, length - sent); + + (void)opal_atomic_add_64(&module->opcount, 1); + + OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output, + "%s,%d Atomic", __FUNCTION__, __LINE__)); + ret = PtlAtomic(module->req_md_h, + md_offset + sent + origin_lb, + msg_length, + PTL_ACK_REQ, + peer, + module->pt_idx, + module->match_bits, + offset + sent + target_lb, + request, + 0, + ptl_op, + ptl_dt); + if (OMPI_SUCCESS != ret) { + (void)opal_atomic_add_64(&module->opcount, -1); + OMPI_OSC_PORTALS4_REQUEST_RETURN(request); + return ret; + } + sent += msg_length; + } while (sent < length); + } + } + + return OMPI_SUCCESS; +} + + +int +ompi_osc_portals4_rget_accumulate(const void *origin_addr, + int origin_count, + struct ompi_datatype_t *origin_dt, + void *result_addr, + int result_count, + struct ompi_datatype_t *result_dt, + int target, + OPAL_PTRDIFF_TYPE target_disp, + int target_count, + struct ompi_datatype_t *target_dt, + struct ompi_op_t *op, + struct ompi_win_t *win, + struct ompi_request_t **ompi_req) +{ + int ret; + ompi_osc_portals4_request_t *request; + ompi_osc_portals4_module_t *module = + (ompi_osc_portals4_module_t*) win->w_osc_module; + ptl_process_t peer = ompi_osc_portals4_get_peer(module, target); + size_t target_offset, size; + ptl_op_t ptl_op; + ptl_datatype_t ptl_dt; + OPAL_PTRDIFF_TYPE length, origin_lb, target_lb, result_lb, extent; + + OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output, + "rget_accumulate: 0x%lx, %d, %s, 0x%lx, %d, %s, %d, %lu, %d, %s, %s, 0x%lx", + (unsigned long) origin_addr, origin_count, + origin_dt->name, (unsigned long) result_addr, + result_count, result_dt->name, + target, (unsigned long) target_disp, + target_count, target_dt->name, + op->o_name, + (unsigned long) win)); + + OMPI_OSC_PORTALS4_REQUEST_ALLOC(win, request); + if (NULL == request) return OMPI_ERR_TEMP_OUT_OF_RESOURCE; + *ompi_req = &request->super; + + target_offset = get_displacement(module, target) * target_disp; + + if (target_count > 0 && !ompi_datatype_is_contiguous_memory_layout(target_dt, target_count)) { + if (MPI_REPLACE == op) { + OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output, + "rget_accumulate: MPI_REPLACE non-contiguous target")); + ret = swap_from_noncontig(module, + module->req_md_h, + result_addr, + result_count, + result_dt, + module->md_h, + origin_addr, + origin_count, + origin_dt, + peer, + target_count, + target_dt, + target_offset, + module->pt_idx, + module->match_bits, + request); + if (PTL_OK != ret) { + OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output, + "%s,%d swap_from_noncontig() failed: ret = %d", + __FUNCTION__, __LINE__, ret)); + OMPI_OSC_PORTALS4_REQUEST_RETURN(request); + return ret; + } + } else if (MPI_NO_OP == op) { + OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output, + "rget_accumulate: MPI_NO_OP non-contiguous target")); + ret = atomic_get_from_noncontig(module, + module->req_md_h, + result_addr, + result_count, + result_dt, + peer, + target_count, + target_dt, + target_offset, + module->pt_idx, + module->match_bits, + request); + if (PTL_OK != ret) { + OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output, + "%s,%d atomic_get_from_noncontig() failed: ret = %d", + __FUNCTION__, __LINE__, ret)); + OMPI_OSC_PORTALS4_REQUEST_RETURN(request); + return ret; + } + } else { + OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output, + "rget_accumulate: other-op non-contiguous target")); + ret = fetch_atomic_from_noncontig(module, + module->req_md_h, + result_addr, + result_count, + result_dt, + module->md_h, + origin_addr, + origin_count, + origin_dt, + peer, + target_count, + target_dt, + target_offset, + module->pt_idx, + module->match_bits, + op, + request); + if (PTL_OK != ret) { + OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output, + "%s,%d fetch_atomic_from_noncontig() failed: ret = %d", + __FUNCTION__, __LINE__, ret)); + OMPI_OSC_PORTALS4_REQUEST_RETURN(request); + return ret; + } + } + } else if ((origin_count > 0 && !ompi_datatype_is_contiguous_memory_layout(origin_dt, origin_count)) || + (result_count > 0 && !ompi_datatype_is_contiguous_memory_layout(result_dt, result_count))) { + if (MPI_REPLACE == op) { + OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output, + "rget_accumulate: MPI_REPLACE non-contiguous origin/result")); + ret = swap_to_iovec(module, + result_addr, + result_count, + result_dt, + origin_addr, + origin_count, + origin_dt, + peer, + target_count, + target_dt, + target_offset, + module->pt_idx, + module->match_bits, + request); + if (PTL_OK != ret) { + OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output, + "%s,%d swap_to_iovec() failed: ret = %d", + __FUNCTION__, __LINE__, ret)); + OMPI_OSC_PORTALS4_REQUEST_RETURN(request); + return ret; + } + } else if (MPI_NO_OP == op) { + OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output, + "rget_accumulate: MPI_NO_OP non-contiguous origin/result")); + ret = atomic_get_to_iovec(module, + result_addr, + result_count, + result_dt, + peer, + target_count, + target_dt, + target_offset, + module->pt_idx, + module->match_bits, + request); + if (PTL_OK != ret) { + OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output, + "%s,%d atomic_get_to_iovec() failed: ret = %d", + __FUNCTION__, __LINE__, ret)); + OMPI_OSC_PORTALS4_REQUEST_RETURN(request); + return ret; + } + } else { + OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output, + "rget_accumulate: other-op non-contiguous origin/result")); + ret = fetch_atomic_to_iovec(module, + result_addr, + result_count, + result_dt, + origin_addr, + origin_count, + origin_dt, + peer, + target_count, + target_dt, + target_offset, + module->pt_idx, + module->match_bits, + op, + request); + if (PTL_OK != ret) { + OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output, + "%s,%d fetch_atomic_to_iovec() failed: ret = %d", + __FUNCTION__, __LINE__, ret)); + OMPI_OSC_PORTALS4_REQUEST_RETURN(request); + return ret; + } + } + } else { + if (MPI_REPLACE == op) { + OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output, + "rget_accumulate: MPI_REPLACE contiguous")); + ptl_size_t result_md_offset, origin_md_offset; + + ret = ompi_datatype_get_true_extent(origin_dt, &origin_lb, &extent); + if (OMPI_SUCCESS != ret) { + OMPI_OSC_PORTALS4_REQUEST_RETURN(request); + return ret; + } + ret = ompi_datatype_get_true_extent(target_dt, &target_lb, &extent); + if (OMPI_SUCCESS != ret) { + OMPI_OSC_PORTALS4_REQUEST_RETURN(request); + return ret; + } + ret = ompi_datatype_get_true_extent(result_dt, &result_lb, &extent); + if (OMPI_SUCCESS != ret) { + OMPI_OSC_PORTALS4_REQUEST_RETURN(request); + return ret; + } + ompi_datatype_type_size(origin_dt, &size); + length = size * origin_count; - ret = ompi_datatype_type_size(origin_dt, &length); + ret = ompi_osc_portals4_get_dt(origin_dt, &ptl_dt); if (OMPI_SUCCESS != ret) { + opal_output(ompi_osc_base_framework.framework_output, + "datatype is not currently supported"); OMPI_OSC_PORTALS4_REQUEST_RETURN(request); - return ret; + return OMPI_ERR_NOT_SUPPORTED; } - length *= origin_count; result_md_offset = (ptl_size_t) result_addr; origin_md_offset = (ptl_size_t) origin_addr; - do { - size_t msg_length = MIN(module->fetch_atomic_max, length - sent); + request->ops_expected += number_of_fragments(length, module->fetch_atomic_max); - (void)opal_atomic_add_64(&module->opcount, 1); - request->ops_expected++; - - ret = PtlSwap(module->req_md_h, - result_md_offset + sent, - module->md_h, - origin_md_offset + sent, - msg_length, - peer, - module->pt_idx, - module->match_bits, - offset + sent, - request, - 0, - NULL, - PTL_SWAP, - ptl_dt); - sent += msg_length; - } while (sent < length); + ret = segmentedSwap(&module->opcount, + module->req_md_h, + result_md_offset + result_lb, + module->md_h, + origin_md_offset + origin_lb, + length, + module->fetch_atomic_max, + peer, + module->pt_idx, + module->match_bits, + target_offset + target_lb, + request, + ptl_dt); + if (OMPI_SUCCESS != ret) { + OMPI_OSC_PORTALS4_REQUEST_RETURN(request); + return ret; + } } else if (MPI_NO_OP == op) { + OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output, + "rget_accumulate: MPI_NO_OP contiguous")); ptl_size_t md_offset; - ret = ompi_datatype_type_size(target_dt, &length); + ret = ompi_datatype_get_true_extent(target_dt, &target_lb, &extent); + if (OMPI_SUCCESS != ret) { + OMPI_OSC_PORTALS4_REQUEST_RETURN(request); + return ret; + } + ret = ompi_datatype_get_true_extent(result_dt, &result_lb, &extent); if (OMPI_SUCCESS != ret) { OMPI_OSC_PORTALS4_REQUEST_RETURN(request); return ret; } - length *= target_count; + ompi_datatype_type_size(target_dt, &size); + length = size * target_count; md_offset = (ptl_size_t) result_addr; - do { - size_t msg_length = MIN(module->fetch_atomic_max, length - sent); - - (void)opal_atomic_add_64(&module->opcount, 1); - request->ops_expected++; - - ret = PtlGet(module->req_md_h, - md_offset + sent, - msg_length, - peer, - module->pt_idx, - module->match_bits, - offset + sent, - request); - sent += msg_length; - } while (sent < length); + request->ops_expected += number_of_fragments(length, module->fetch_atomic_max); + + OPAL_OUTPUT_VERBOSE((90,ompi_osc_base_framework.framework_output, + "%s,%d MPI_Get_accumulate", __FUNCTION__, __LINE__)); + ret = segmentedGet(&module->opcount, + module->req_md_h, + (ptl_size_t) md_offset + result_lb, + length, + module->fetch_atomic_max, + peer, + module->pt_idx, + module->match_bits, + target_offset + target_lb, + request); + if (OMPI_SUCCESS != ret) { + OMPI_OSC_PORTALS4_REQUEST_RETURN(request); + return ret; + } } else { + OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output, + "rget_accumulate: other-op contiguous")); ptl_size_t result_md_offset, origin_md_offset; - ret = ompi_datatype_type_size(origin_dt, &length); + ret = ompi_datatype_get_true_extent(origin_dt, &origin_lb, &extent); + if (OMPI_SUCCESS != ret) { + OMPI_OSC_PORTALS4_REQUEST_RETURN(request); + return ret; + } + ret = ompi_datatype_get_true_extent(target_dt, &target_lb, &extent); + if (OMPI_SUCCESS != ret) { + OMPI_OSC_PORTALS4_REQUEST_RETURN(request); + return ret; + } + ret = ompi_datatype_get_true_extent(result_dt, &result_lb, &extent); if (OMPI_SUCCESS != ret) { OMPI_OSC_PORTALS4_REQUEST_RETURN(request); return ret; } - length *= origin_count; + ompi_datatype_type_size(origin_dt, &size); + length = size * origin_count; result_md_offset = (ptl_size_t) result_addr; origin_md_offset = (ptl_size_t) origin_addr; ret = ompi_osc_portals4_get_dt(origin_dt, &ptl_dt); - if (OMPI_SUCCESS != ret) return ret; + if (OMPI_SUCCESS != ret) { + opal_output(ompi_osc_base_framework.framework_output, + "datatype is not currently supported"); + OMPI_OSC_PORTALS4_REQUEST_RETURN(request); + return OMPI_ERR_NOT_SUPPORTED; + } ret = ompi_osc_portals4_get_op(op, &ptl_op); - if (OMPI_SUCCESS != ret) return ret; - - do { - size_t msg_length = MIN(module->fetch_atomic_max, length - sent); + if (OMPI_SUCCESS != ret) { + opal_output(ompi_osc_base_framework.framework_output, + "operation is not currently supported"); + OMPI_OSC_PORTALS4_REQUEST_RETURN(request); + return OMPI_ERR_NOT_SUPPORTED; + } - (void)opal_atomic_add_64(&module->opcount, 1); - request->ops_expected++; - - ret = PtlFetchAtomic(module->req_md_h, - result_md_offset + sent, - module->md_h, - origin_md_offset + sent, - msg_length, - peer, - module->pt_idx, - module->match_bits, - offset + sent, - request, - 0, - ptl_op, - ptl_dt); - sent += msg_length; - } while (sent < length); - } - if (OMPI_SUCCESS != ret) { - OMPI_OSC_PORTALS4_REQUEST_RETURN(request); - return ret; + request->ops_expected += number_of_fragments(length, module->fetch_atomic_max); + + ret = segmentedFetchAtomic(&module->opcount, + module->req_md_h, + result_md_offset + result_lb, + module->md_h, + origin_md_offset + origin_lb, + length, + module->fetch_atomic_max, + peer, + module->pt_idx, + module->match_bits, + target_offset + target_lb, + request, + ptl_op, + ptl_dt); + if (OMPI_SUCCESS != ret) { + OMPI_OSC_PORTALS4_REQUEST_RETURN(request); + return ret; + } } } @@ -580,7 +2794,7 @@ ompi_osc_portals4_rget_accumulate(void *origin_addr, int -ompi_osc_portals4_put(void *origin_addr, +ompi_osc_portals4_put(const void *origin_addr, int origin_count, struct ompi_datatype_t *origin_dt, int target, @@ -593,40 +2807,82 @@ ompi_osc_portals4_put(void *origin_addr, ompi_osc_portals4_module_t *module = (ompi_osc_portals4_module_t*) win->w_osc_module; ptl_process_t peer = ompi_osc_portals4_get_peer(module, target); - size_t length; - size_t offset; + size_t offset, size; + OPAL_PTRDIFF_TYPE length, origin_lb, target_lb, extent; OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output, - "put: 0x%lx, %d, %s, %d, %d, %d, %s, 0x%lx", + "put: 0x%lx, %d, %s, %d, %lu, %d, %s, 0x%lx", (unsigned long) origin_addr, origin_count, - origin_dt->name, target, (int) target_disp, + origin_dt->name, target, (unsigned long) target_disp, target_count, target_dt->name, (unsigned long) win)); offset = get_displacement(module, target) * target_disp; - if (!ompi_datatype_is_contiguous_memory_layout(origin_dt, origin_count) || - !ompi_datatype_is_contiguous_memory_layout(target_dt, target_count)) { - opal_output(ompi_osc_base_framework.framework_output, - "MPI_Put: transfer of non-contiguous memory is not currently supported.\n"); - return OMPI_ERR_NOT_SUPPORTED; + if (!ompi_datatype_is_contiguous_memory_layout(target_dt, target_count)) { + ret = put_to_noncontig(&module->opcount, + module->md_h, + origin_addr, + origin_count, + origin_dt, + peer, + target_count, + target_dt, + offset, + module->pt_idx, + module->match_bits, + NULL); + if (PTL_OK != ret) { + OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output, + "%s,%d put_to_noncontig() failed: ret = %d", + __FUNCTION__, __LINE__, ret)); + return ret; + } + } else if (!ompi_datatype_is_contiguous_memory_layout(origin_dt, origin_count)) { + ret = put_from_iovec(module, + origin_addr, + origin_count, + origin_dt, + peer, + target_count, + target_dt, + offset, + module->pt_idx, + module->match_bits, + NULL); + if (PTL_OK != ret) { + OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output, + "%s,%d put_from_iovec() failed: ret = %d", + __FUNCTION__, __LINE__, ret)); + return ret; + } } else { - (void)opal_atomic_add_64(&module->opcount, 1); - ret = ompi_datatype_type_size(origin_dt, &length); + ret = ompi_datatype_get_true_extent(origin_dt, &origin_lb, &extent); if (OMPI_SUCCESS != ret) { return ret; } - length *= origin_count; - ret = PtlPut(module->md_h, - (ptl_size_t) origin_addr, - length, - PTL_ACK_REQ, - peer, - module->pt_idx, - module->match_bits, - offset, - NULL, - 0); + ret = ompi_datatype_get_true_extent(target_dt, &target_lb, &extent); + if (OMPI_SUCCESS != ret) { + return ret; + } + ompi_datatype_type_size(origin_dt, &size); + length = size * origin_count; + + OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output, + "%s,%d Put(origin_count=%d, origin_lb=%lu, target_count=%d, target_lb=%lu, length=%lu, op_count=%ld)", + __FUNCTION__, __LINE__, origin_count, origin_lb, target_count, target_lb, length, module->opcount)); + ret = segmentedPut(&module->opcount, + module->md_h, + (ptl_size_t) origin_addr + origin_lb, + length, + mca_osc_portals4_component.ptl_max_msg_size, + PTL_ACK_REQ, + peer, + module->pt_idx, + module->match_bits, + offset + target_lb, + NULL, + 0); if (OMPI_SUCCESS != ret) { return ret; } @@ -650,38 +2906,79 @@ ompi_osc_portals4_get(void *origin_addr, ompi_osc_portals4_module_t *module = (ompi_osc_portals4_module_t*) win->w_osc_module; ptl_process_t peer = ompi_osc_portals4_get_peer(module, target); - size_t length; - size_t offset; + size_t offset, size; + OPAL_PTRDIFF_TYPE length, origin_lb, target_lb, extent; OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output, - "get: 0x%lx, %d, %s, %d, %d, %d, %s, 0x%lx", + "get: 0x%lx, %d, %s, %d, %lu, %d, %s, 0x%lx", (unsigned long) origin_addr, origin_count, - origin_dt->name, target, (int) target_disp, + origin_dt->name, target, (unsigned long) target_disp, target_count, target_dt->name, (unsigned long) win)); offset = get_displacement(module, target) * target_disp; - if (!ompi_datatype_is_contiguous_memory_layout(origin_dt, origin_count) || - !ompi_datatype_is_contiguous_memory_layout(target_dt, target_count)) { - opal_output(ompi_osc_base_framework.framework_output, - "MPI_Get: transfer of non-contiguous memory is not currently supported.\n"); - return OMPI_ERR_NOT_SUPPORTED; + if (!ompi_datatype_is_contiguous_memory_layout(target_dt, target_count)) { + ret = get_from_noncontig(&module->opcount, + module->md_h, + origin_addr, + origin_count, + origin_dt, + peer, + target_count, + target_dt, + offset, + module->pt_idx, + module->match_bits, + NULL); + if (PTL_OK != ret) { + OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output, + "%s,%d get_from_noncontig() failed: ret = %d", + __FUNCTION__, __LINE__, ret)); + return ret; + } + } else if (!ompi_datatype_is_contiguous_memory_layout(origin_dt, origin_count)) { + ret = get_to_iovec(module, + origin_addr, + origin_count, + origin_dt, + peer, + target_count, + target_dt, + offset, + module->pt_idx, + module->match_bits, + NULL); + if (PTL_OK != ret) { + OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output, + "%s,%d get_to_iovec() failed: ret = %d", + __FUNCTION__, __LINE__, ret)); + return ret; + } } else { - (void)opal_atomic_add_64(&module->opcount, 1); - ret = ompi_datatype_type_size(origin_dt, &length); + ret = ompi_datatype_get_true_extent(origin_dt, &origin_lb, &extent); if (OMPI_SUCCESS != ret) { return ret; } - length *= origin_count; - ret = PtlGet(module->md_h, - (ptl_size_t) origin_addr, - length, - peer, - module->pt_idx, - module->match_bits, - offset, - NULL); + ret = ompi_datatype_get_true_extent(target_dt, &target_lb, &extent); + if (OMPI_SUCCESS != ret) { + return ret; + } + ompi_datatype_type_size(origin_dt, &size); + length = size * origin_count; + + OPAL_OUTPUT_VERBOSE((90,ompi_osc_base_framework.framework_output, + "%s,%d Get", __FUNCTION__, __LINE__)); + ret = segmentedGet(&module->opcount, + module->md_h, + (ptl_size_t) origin_addr + origin_lb, + length, + mca_osc_portals4_component.ptl_max_msg_size, + peer, + module->pt_idx, + module->match_bits, + offset + target_lb, + NULL); if (OMPI_SUCCESS != ret) { return ret; } @@ -692,7 +2989,7 @@ ompi_osc_portals4_get(void *origin_addr, int -ompi_osc_portals4_accumulate(void *origin_addr, +ompi_osc_portals4_accumulate(const void *origin_addr, int origin_count, struct ompi_datatype_t *origin_dt, int target, @@ -706,78 +3003,175 @@ ompi_osc_portals4_accumulate(void *origin_addr, ompi_osc_portals4_module_t *module = (ompi_osc_portals4_module_t*) win->w_osc_module; ptl_process_t peer = ompi_osc_portals4_get_peer(module, target); - size_t length, sent; - size_t offset; + size_t offset, size; ptl_op_t ptl_op; ptl_datatype_t ptl_dt; + OPAL_PTRDIFF_TYPE sent, length, origin_lb, target_lb, extent; OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output, - "accumulate: 0x%lx, %d, %s, %d, %d, %d, %s, %s, 0x%lx", + "accumulate: 0x%lx, %d, %s, %d, %lu, %d, %s, %s, 0x%lx", (unsigned long) origin_addr, origin_count, - origin_dt->name, target, (int) target_disp, + origin_dt->name, target, (unsigned long) target_disp, target_count, target_dt->name, op->o_name, (unsigned long) win)); offset = get_displacement(module, target) * target_disp; - if (!ompi_datatype_is_contiguous_memory_layout(origin_dt, origin_count) || - !ompi_datatype_is_contiguous_memory_layout(target_dt, target_count)) { - opal_output(ompi_osc_base_framework.framework_output, - "MPI_Accumulate: transfer of non-contiguous memory is not currently supported.\n"); - return OMPI_ERR_NOT_SUPPORTED; + if (!ompi_datatype_is_contiguous_memory_layout(target_dt, target_count)) { + if (MPI_REPLACE == op) { + ret = atomic_put_to_noncontig(module, + module->md_h, + origin_addr, + origin_count, + origin_dt, + peer, + target_count, + target_dt, + offset, + module->pt_idx, + module->match_bits, + NULL); + if (PTL_OK != ret) { + OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output, + "%s,%d atomic_put_to_noncontig() failed: ret = %d", + __FUNCTION__, __LINE__, ret)); + return ret; + } + } else { + ret = atomic_to_noncontig(module, + module->md_h, + origin_addr, + origin_count, + origin_dt, + peer, + target_count, + target_dt, + offset, + module->pt_idx, + module->match_bits, + op, + NULL); + if (PTL_OK != ret) { + OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output, + "%s,%d atomic_to_noncontig() failed: ret = %d", + __FUNCTION__, __LINE__, ret)); + return ret; + } + } + } else if (!ompi_datatype_is_contiguous_memory_layout(origin_dt, origin_count)) { + if (MPI_REPLACE == op) { + ret = atomic_put_from_iovec(module, + origin_addr, + origin_count, + origin_dt, + peer, + target_count, + target_dt, + offset, + module->pt_idx, + module->match_bits, + NULL); + if (PTL_OK != ret) { + OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output, + "%s,%d atomic_put_from_iovec() failed: ret = %d", + __FUNCTION__, __LINE__, ret)); + return ret; + } + } else { + ret = atomic_from_iovec(module, + origin_addr, + origin_count, + origin_dt, + peer, + target_count, + target_dt, + offset, + module->pt_idx, + module->match_bits, + op, + NULL); + if (PTL_OK != ret) { + OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output, + "%s,%d atomic_from_iovec() failed: ret = %d", + __FUNCTION__, __LINE__, ret)); + return ret; + } + } } else { ptl_size_t md_offset; - ret = ompi_datatype_type_size(origin_dt, &length); + ret = ompi_datatype_get_true_extent(origin_dt, &origin_lb, &extent); + if (OMPI_SUCCESS != ret) { + return ret; + } + ret = ompi_datatype_get_true_extent(target_dt, &target_lb, &extent); if (OMPI_SUCCESS != ret) { return ret; } - length *= origin_count; + ompi_datatype_type_size(origin_dt, &size); + length = size * origin_count; sent = 0; md_offset = (ptl_size_t) origin_addr; - do { - size_t msg_length = MIN(module->atomic_max, length - sent); - (void)opal_atomic_add_64(&module->opcount, 1); - - if (MPI_REPLACE == op) { - ret = PtlPut(module->md_h, - md_offset + sent, - msg_length, - PTL_ACK_REQ, - peer, - module->pt_idx, - module->match_bits, - offset + sent, - NULL, - 0); - } else { - ret = ompi_osc_portals4_get_dt(origin_dt, &ptl_dt); - if (OMPI_SUCCESS != ret) return ret; + if (MPI_REPLACE == op) { + OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output, + "%s,%d Put", __FUNCTION__, __LINE__)); + ret = segmentedPut(&module->opcount, + module->md_h, + md_offset + origin_lb, + length, + module->atomic_max, + PTL_ACK_REQ, + peer, + module->pt_idx, + module->match_bits, + offset + target_lb, + NULL, + 0); + if (OMPI_SUCCESS != ret) { + return ret; + } + } else { + ret = ompi_osc_portals4_get_dt(origin_dt, &ptl_dt); + if (OMPI_SUCCESS != ret) { + opal_output(ompi_osc_base_framework.framework_output, + "datatype is not currently supported"); + return OMPI_ERR_NOT_SUPPORTED; + } + ret = ompi_osc_portals4_get_op(op, &ptl_op); + if (OMPI_SUCCESS != ret) { + opal_output(ompi_osc_base_framework.framework_output, + "operation is not currently supported"); + return OMPI_ERR_NOT_SUPPORTED; + } + do { + size_t msg_length = MIN(module->atomic_max, length - sent); - ret = ompi_osc_portals4_get_op(op, &ptl_op); - if (OMPI_SUCCESS != ret) return ret; + (void)opal_atomic_add_64(&module->opcount, 1); + OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output, + "%s,%d Atomic", __FUNCTION__, __LINE__)); ret = PtlAtomic(module->md_h, - md_offset + sent, + md_offset + sent + origin_lb, msg_length, PTL_ACK_REQ, peer, module->pt_idx, module->match_bits, - offset + sent, + offset + sent + target_lb, NULL, 0, ptl_op, ptl_dt); - } - if (OMPI_SUCCESS != ret) { - return ret; - } - sent += msg_length; - } while (sent < length); + if (OMPI_SUCCESS != ret) { + (void)opal_atomic_add_64(&module->opcount, -1); + return ret; + } + sent += msg_length; + } while (sent < length); + } } return OMPI_SUCCESS; @@ -785,150 +3179,318 @@ ompi_osc_portals4_accumulate(void *origin_addr, int -ompi_osc_portals4_get_accumulate(void *origin_addr, - int origin_count, +ompi_osc_portals4_get_accumulate(const void *origin_addr, + int origin_count, struct ompi_datatype_t *origin_dt, - void *result_addr, - int result_count, + void *result_addr, + int result_count, struct ompi_datatype_t *result_dt, - int target, - MPI_Aint target_disp, + int target, + OPAL_PTRDIFF_TYPE target_disp, int target_count, struct ompi_datatype_t *target_dt, - struct ompi_op_t *op, + struct ompi_op_t *op, struct ompi_win_t *win) { int ret; ompi_osc_portals4_module_t *module = (ompi_osc_portals4_module_t*) win->w_osc_module; ptl_process_t peer = ompi_osc_portals4_get_peer(module, target); - size_t length, sent; - size_t offset; + size_t target_offset, size; ptl_op_t ptl_op; ptl_datatype_t ptl_dt; + OPAL_PTRDIFF_TYPE length, origin_lb, target_lb, result_lb, extent; OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output, - "get_accumulate: 0x%lx, %d, %s, 0x%lx, %d, %s, %d, %d, %d, %s, %s, 0x%lx", + "get_accumulate: 0x%lx, %d, %s, 0x%lx, %d, %s, %d, %lu, %d, %s, %s, 0x%lx", (unsigned long) origin_addr, origin_count, origin_dt->name, (unsigned long) result_addr, result_count, result_dt->name, - target, (int) target_disp, + target, (unsigned long) target_disp, target_count, target_dt->name, op->o_name, (unsigned long) win)); - offset = get_displacement(module, target) * target_disp; + target_offset = get_displacement(module, target) * target_disp; - /* we don't support non-contiguous buffers. but if the count is 0, we don't care if buffer is non-contiguous. */ - if ((origin_count > 0 && !ompi_datatype_is_contiguous_memory_layout(origin_dt, origin_count)) || - (result_count > 0 && !ompi_datatype_is_contiguous_memory_layout(result_dt, result_count)) || - (target_count > 0 && !ompi_datatype_is_contiguous_memory_layout(target_dt, target_count))) { - opal_output(ompi_osc_base_framework.framework_output, - "MPI_Get_accumulate: transfer of non-contiguous memory is not currently supported.\n"); - return OMPI_ERR_NOT_SUPPORTED; + if (target_count > 0 && !ompi_datatype_is_contiguous_memory_layout(target_dt, target_count)) { + if (MPI_REPLACE == op) { + OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output, + "get_accumulate: MPI_REPLACE non-contiguous target")); + ret = swap_from_noncontig(module, + module->md_h, + result_addr, + result_count, + result_dt, + module->md_h, + origin_addr, + origin_count, + origin_dt, + peer, + target_count, + target_dt, + target_offset, + module->pt_idx, + module->match_bits, + NULL); + if (PTL_OK != ret) { + OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output, + "%s,%d swap_from_noncontig() failed: ret = %d", + __FUNCTION__, __LINE__, ret)); + return ret; + } + } else if (MPI_NO_OP == op) { + OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output, + "get_accumulate: MPI_NO_OP non-contiguous target")); + ret = atomic_get_from_noncontig(module, + module->md_h, + result_addr, + result_count, + result_dt, + peer, + target_count, + target_dt, + target_offset, + module->pt_idx, + module->match_bits, + NULL); + if (PTL_OK != ret) { + OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output, + "%s,%d atomic_get_from_noncontig() failed: ret = %d", + __FUNCTION__, __LINE__, ret)); + return ret; + } + } else { + OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output, + "get_accumulate: other-op non-contiguous target")); + ret = fetch_atomic_from_noncontig(module, + module->md_h, + result_addr, + result_count, + result_dt, + module->md_h, + origin_addr, + origin_count, + origin_dt, + peer, + target_count, + target_dt, + target_offset, + module->pt_idx, + module->match_bits, + op, + NULL); + if (PTL_OK != ret) { + OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output, + "%s,%d fetch_atomic_from_noncontig() failed: ret = %d", + __FUNCTION__, __LINE__, ret)); + return ret; + } + } + } else if ((origin_count > 0 && !ompi_datatype_is_contiguous_memory_layout(origin_dt, origin_count)) || + (result_count > 0 && !ompi_datatype_is_contiguous_memory_layout(result_dt, result_count))) { + if (MPI_REPLACE == op) { + OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output, + "get_accumulate: MPI_REPLACE non-contiguous origin/result")); + ret = swap_to_iovec(module, + result_addr, + result_count, + result_dt, + origin_addr, + origin_count, + origin_dt, + peer, + target_count, + target_dt, + target_offset, + module->pt_idx, + module->match_bits, + NULL); + if (PTL_OK != ret) { + OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output, + "%s,%d swap_to_iovec() failed: ret = %d", + __FUNCTION__, __LINE__, ret)); + return ret; + } + } else if (MPI_NO_OP == op) { + OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output, + "get_accumulate: MPI_NO_OP non-contiguous origin/result")); + ret = atomic_get_to_iovec(module, + result_addr, + result_count, + result_dt, + peer, + target_count, + target_dt, + target_offset, + module->pt_idx, + module->match_bits, + NULL); + if (PTL_OK != ret) { + OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output, + "%s,%d atomic_get_to_iovec() failed: ret = %d", + __FUNCTION__, __LINE__, ret)); + return ret; + } + } else { + OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output, + "get_accumulate: other-op non-contiguous origin/result")); + ret = fetch_atomic_to_iovec(module, + result_addr, + result_count, + result_dt, + origin_addr, + origin_count, + origin_dt, + peer, + target_count, + target_dt, + target_offset, + module->pt_idx, + module->match_bits, + op, + NULL); + if (PTL_OK != ret) { + OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output, + "%s,%d fetch_atomic_to_iovec() failed: ret = %d", + __FUNCTION__, __LINE__, ret)); + return ret; + } + } } else { - sent = 0; if (MPI_REPLACE == op) { + OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output, + "get_accumulate: MPI_REPLACE contiguous")); ptl_size_t result_md_offset, origin_md_offset; - ret = ompi_datatype_type_size(origin_dt, &length); + ret = ompi_datatype_get_true_extent(origin_dt, &origin_lb, &extent); + if (OMPI_SUCCESS != ret) { + return ret; + } + ret = ompi_datatype_get_true_extent(target_dt, &target_lb, &extent); + if (OMPI_SUCCESS != ret) { + return ret; + } + ret = ompi_datatype_get_true_extent(result_dt, &result_lb, &extent); if (OMPI_SUCCESS != ret) { return ret; } - length *= origin_count; + ompi_datatype_type_size(origin_dt, &size); + length = size * origin_count; + + ret = ompi_osc_portals4_get_dt(origin_dt, &ptl_dt); + if (OMPI_SUCCESS != ret) { + opal_output(ompi_osc_base_framework.framework_output, + "MPI_Get_accumulate: datatype is not currently supported"); + return OMPI_ERR_NOT_SUPPORTED; + } result_md_offset = (ptl_size_t) result_addr; origin_md_offset = (ptl_size_t) origin_addr; - do { - size_t msg_length = MIN(module->fetch_atomic_max, length - sent); - - (void)opal_atomic_add_64(&module->opcount, 1); - - ret = PtlSwap(module->md_h, - result_md_offset + sent, - module->md_h, - origin_md_offset + sent, - msg_length, - peer, - module->pt_idx, - module->match_bits, - offset + sent, - NULL, - 0, - NULL, - PTL_SWAP, - ptl_dt); - sent += msg_length; - } while (sent < length); + ret = segmentedSwap(&module->opcount, + module->md_h, + result_md_offset + result_lb, + module->md_h, + origin_md_offset + origin_lb, + length, + module->fetch_atomic_max, + peer, + module->pt_idx, + module->match_bits, + target_offset + target_lb, + NULL, + ptl_dt); + if (OMPI_SUCCESS != ret) { + return ret; + } } else if (MPI_NO_OP == op) { + OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output, + "get_accumulate: MPI_NO_OP contiguous")); ptl_size_t md_offset; - ret = ompi_datatype_type_size(target_dt, &length); + ret = ompi_datatype_get_true_extent(target_dt, &target_lb, &extent); + if (OMPI_SUCCESS != ret) { + return ret; + } + ret = ompi_datatype_get_true_extent(result_dt, &result_lb, &extent); if (OMPI_SUCCESS != ret) { return ret; } - length *= target_count; + ompi_datatype_type_size(target_dt, &size); + length = size * target_count; md_offset = (ptl_size_t) result_addr; - do { - size_t msg_length = MIN(module->fetch_atomic_max, length - sent); - - (void)opal_atomic_add_64(&module->opcount, 1); - - ret = PtlGet(module->md_h, - md_offset + sent, - msg_length, - peer, - module->pt_idx, - module->match_bits, - offset + sent, - NULL); - sent += msg_length; - } while (sent < length); + OPAL_OUTPUT_VERBOSE((90,ompi_osc_base_framework.framework_output, + "%s,%d MPI_Get_accumulate", __FUNCTION__, __LINE__)); + ret = segmentedGet(&module->opcount, + module->md_h, + (ptl_size_t) md_offset + result_lb, + length, + module->fetch_atomic_max, + peer, + module->pt_idx, + module->match_bits, + target_offset + target_lb, + NULL); + if (OMPI_SUCCESS != ret) { + return ret; + } } else { - ptl_size_t result_md_offset, origin_md_offset; + OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output, + "get_accumulate: other-op contiguous")); + ptl_size_t result_md_offset, origin_md_offset; - ret = ompi_datatype_type_size(origin_dt, &length); + ret = ompi_datatype_get_true_extent(origin_dt, &origin_lb, &extent); + if (OMPI_SUCCESS != ret) { + return ret; + } + ret = ompi_datatype_get_true_extent(target_dt, &target_lb, &extent); + if (OMPI_SUCCESS != ret) { + return ret; + } + ret = ompi_datatype_get_true_extent(result_dt, &result_lb, &extent); if (OMPI_SUCCESS != ret) { return ret; } - length *= origin_count; + ompi_datatype_type_size(origin_dt, &size); + length = size * origin_count; result_md_offset = (ptl_size_t) result_addr; origin_md_offset = (ptl_size_t) origin_addr; ret = ompi_osc_portals4_get_dt(origin_dt, &ptl_dt); - if (OMPI_SUCCESS != ret) return ret; + if (OMPI_SUCCESS != ret) { + opal_output(ompi_osc_base_framework.framework_output, + "MPI_Get_accumulate: datatype is not currently supported"); + return OMPI_ERR_NOT_SUPPORTED; + } ret = ompi_osc_portals4_get_op(op, &ptl_op); - if (OMPI_SUCCESS != ret) return ret; - - - do { - size_t msg_length = MIN(module->fetch_atomic_max, length - sent); - - (void)opal_atomic_add_64(&module->opcount, 1); + if (OMPI_SUCCESS != ret) { + opal_output(ompi_osc_base_framework.framework_output, + "MPI_Get_accumulate: operation is not currently supported"); + return OMPI_ERR_NOT_SUPPORTED; + } - ret = PtlFetchAtomic(module->md_h, - result_md_offset + sent, - module->md_h, - origin_md_offset + sent, - msg_length, - peer, - module->pt_idx, - module->match_bits, - offset + sent, - NULL, - 0, - ptl_op, - ptl_dt); - sent += msg_length; - } while (sent < length); - } - if (OMPI_SUCCESS != ret) { - return ret; + ret = segmentedFetchAtomic(&module->opcount, + module->md_h, + result_md_offset + result_lb, + module->md_h, + origin_md_offset + origin_lb, + length, + module->fetch_atomic_max, + peer, + module->pt_idx, + module->match_bits, + target_offset + target_lb, + NULL, + ptl_op, + ptl_dt); + if (OMPI_SUCCESS != ret) { + return ret; + } } } @@ -937,8 +3499,8 @@ ompi_osc_portals4_get_accumulate(void *origin_addr, int -ompi_osc_portals4_compare_and_swap(void *origin_addr, - void *compare_addr, +ompi_osc_portals4_compare_and_swap(const void *origin_addr, + const void *compare_addr, void *result_addr, struct ompi_datatype_t *dt, int target, @@ -955,28 +3517,34 @@ ompi_osc_portals4_compare_and_swap(void *origin_addr, ptl_size_t result_md_offset, origin_md_offset; OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output, - "compare_and_swap: 0x%lx, 0x%lx, 0x%lx, %s, %d, %d, 0x%lx", - (unsigned long) origin_addr, + "compare_and_swap: 0x%lx, 0x%lx, 0x%lx, %s, %d, %lu, 0x%lx", + (unsigned long) origin_addr, (unsigned long) compare_addr, (unsigned long) result_addr, - dt->name, target, (int) target_disp, + dt->name, target, (unsigned long) target_disp, (unsigned long) win)); ret = ompi_osc_portals4_get_dt(dt, &ptl_dt); - if (OMPI_SUCCESS != ret) return ret; + if (OMPI_SUCCESS != ret) { + opal_output(ompi_osc_base_framework.framework_output, + "MPI_Compare_and_swap: datatype is not currently supported"); + return OMPI_ERR_NOT_SUPPORTED; + } offset = get_displacement(module, target) * target_disp; ret = ompi_datatype_type_size(dt, &length); if (OMPI_SUCCESS != ret) return ret; - assert(length < module->fetch_atomic_max); + assert(length <= module->fetch_atomic_max); result_md_offset = (ptl_size_t) result_addr; origin_md_offset = (ptl_size_t) origin_addr; (void)opal_atomic_add_64(&module->opcount, 1); + OPAL_OUTPUT_VERBOSE((90,ompi_osc_base_framework.framework_output, + "%s,%d Swap", __FUNCTION__, __LINE__)); ret = PtlSwap(module->md_h, result_md_offset, module->md_h, @@ -1000,7 +3568,7 @@ ompi_osc_portals4_compare_and_swap(void *origin_addr, int -ompi_osc_portals4_fetch_and_op(void *origin_addr, +ompi_osc_portals4_fetch_and_op(const void *origin_addr, void *result_addr, struct ompi_datatype_t *dt, int target, @@ -1018,24 +3586,26 @@ ompi_osc_portals4_fetch_and_op(void *origin_addr, ptl_datatype_t ptl_dt; OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output, - "fetch_and_op: 0x%lx, 0x%lx, %s, %d, %d, %s, 0x%lx", - (unsigned long) origin_addr, + "fetch_and_op: 0x%lx, 0x%lx, %s, %d, %lu, %s, 0x%lx", + (unsigned long) origin_addr, (unsigned long) result_addr, - dt->name, target, (int) target_disp, + dt->name, target, (unsigned long) target_disp, op->o_name, (unsigned long) win)); ret = ompi_osc_portals4_get_dt(dt, &ptl_dt); - if (OMPI_SUCCESS != ret) return ret; + if (OMPI_SUCCESS != ret) { + opal_output(ompi_osc_base_framework.framework_output, + "MPI_Fetch_and_op: datatype is not currently supported"); + return OMPI_ERR_NOT_SUPPORTED; + } offset = get_displacement(module, target) * target_disp; ret = ompi_datatype_type_size(dt, &length); if (OMPI_SUCCESS != ret) return ret; - assert(length < module->fetch_atomic_max); - - (void)opal_atomic_add_64(&module->opcount, 1); + assert(length <= module->fetch_atomic_max); if (MPI_REPLACE == op) { ptl_size_t result_md_offset, origin_md_offset; @@ -1043,6 +3613,9 @@ ompi_osc_portals4_fetch_and_op(void *origin_addr, result_md_offset = (ptl_size_t) result_addr; origin_md_offset = (ptl_size_t) origin_addr; + (void)opal_atomic_add_64(&module->opcount, 1); + OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output, + "%s,%d Swap", __FUNCTION__, __LINE__)); ret = PtlSwap(module->md_h, result_md_offset, module->md_h, @@ -1062,6 +3635,9 @@ ompi_osc_portals4_fetch_and_op(void *origin_addr, md_offset = (ptl_size_t) result_addr; + (void)opal_atomic_add_64(&module->opcount, 1); + OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output, + "%s,%d Get", __FUNCTION__, __LINE__)); ret = PtlGet(module->md_h, md_offset, length, @@ -1071,14 +3647,21 @@ ompi_osc_portals4_fetch_and_op(void *origin_addr, offset, NULL); } else { - ptl_size_t result_md_offset, origin_md_offset; + ptl_size_t result_md_offset, origin_md_offset; + (void)opal_atomic_add_64(&module->opcount, 1); ret = ompi_osc_portals4_get_op(op, &ptl_op); - if (OMPI_SUCCESS != ret) return ret; + if (OMPI_SUCCESS != ret) { + opal_output(ompi_osc_base_framework.framework_output, + "MPI_Fetch_and_op: operation is not currently supported"); + return OMPI_ERR_NOT_SUPPORTED; + } result_md_offset = (ptl_size_t) result_addr; origin_md_offset = (ptl_size_t) origin_addr; + OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output, + "%s,%d FetchAtomic", __FUNCTION__, __LINE__)); ret = PtlFetchAtomic(module->md_h, result_md_offset, module->md_h, diff --git a/ompi/mca/osc/portals4/osc_portals4_component.c b/ompi/mca/osc/portals4/osc_portals4_component.c index 5f30d4e4b50..984a74c9d43 100644 --- a/ompi/mca/osc/portals4/osc_portals4_component.c +++ b/ompi/mca/osc/portals4/osc_portals4_component.c @@ -1,12 +1,14 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ /* - * Copyright (c) 2011-2013 Sandia National Laboratories. All rights reserved. + * Copyright (c) 2011-2017 Sandia National Laboratories. All rights reserved. * Copyright (c) 2015 Los Alamos National Security, LLC. All rights * reserved. + * Copyright (c) 2015-2017 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -196,7 +198,7 @@ progress_callback(void) "%s:%d: PtlEQGet reported dropped event", __FILE__, __LINE__); goto process; - } else if (PTL_EQ_EMPTY) { + } else if (PTL_EQ_EMPTY == ret) { return 0; } else { opal_output_verbose(1, ompi_osc_base_framework.framework_output, @@ -216,15 +218,18 @@ progress_callback(void) count++; if (NULL != ev.user_ptr) { - /* can't disable send events, but they don't count in ops */ - if (ev.type == PTL_EVENT_SEND) continue; + /* be sure that we receive the PTL_EVENT_LINK */ + if (ev.type == PTL_EVENT_LINK) { + *(int *)ev.user_ptr = *(int *)ev.user_ptr + 1; + opal_condition_broadcast(&mca_osc_portals4_component.cond); + continue; + } + req = (ompi_osc_portals4_request_t*) ev.user_ptr; opal_atomic_add_size_t(&req->super.req_status._ucount, ev.mlength); ops = opal_atomic_add_32(&req->ops_committed, 1); if (ops == req->ops_expected) { - OPAL_THREAD_LOCK(&ompi_request_lock); ompi_request_complete(&req->super, true); - OPAL_THREAD_UNLOCK(&ompi_request_lock); } } } @@ -243,7 +248,7 @@ component_open(void) static int component_register(void) { - bool ompi_osc_portals4_no_locks = false; + mca_osc_portals4_component.no_locks = false; (void) mca_base_component_var_register(&mca_osc_portals4_component.super.osc_version, "no_locks", "Enable optimizations available only if MPI_LOCK is " @@ -252,7 +257,19 @@ component_register(void) MCA_BASE_VAR_TYPE_BOOL, NULL, 0, 0, OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_READONLY, - &ompi_osc_portals4_no_locks); + &mca_osc_portals4_component.no_locks); + + mca_osc_portals4_component.ptl_max_msg_size = PTL_SIZE_MAX; + (void) mca_base_component_var_register(&mca_osc_portals4_component.super.osc_version, + "max_msg_size", + "Max size supported by portals4 (above that, a message is cut into messages less than that size)", + MCA_BASE_VAR_TYPE_UNSIGNED_LONG, + NULL, + 0, + 0, + OPAL_INFO_LVL_9, + MCA_BASE_VAR_SCOPE_READONLY, + &mca_osc_portals4_component.ptl_max_msg_size); return OMPI_SUCCESS; } @@ -287,9 +304,14 @@ component_init(bool enable_progress_threads, bool enable_mpi_threads) /* BWB: FIX ME: Need to make sure our ID matches with the MTL... */ + if (mca_osc_portals4_component.ptl_max_msg_size > actual.max_msg_size) + mca_osc_portals4_component.ptl_max_msg_size = actual.max_msg_size; + OPAL_OUTPUT_VERBOSE((10, ompi_osc_base_framework.framework_output, + "max_size = %lu", mca_osc_portals4_component.ptl_max_msg_size)); + mca_osc_portals4_component.matching_atomic_max = actual.max_atomic_size; mca_osc_portals4_component.matching_fetch_atomic_max = actual.max_fetch_atomic_size; - mca_osc_portals4_component.matching_atomic_ordered_size = + mca_osc_portals4_component.matching_atomic_ordered_size = MAX(actual.max_waw_ordered_size, actual.max_war_ordered_size); ret = PtlEQAlloc(mca_osc_portals4_component.matching_ni_h, @@ -305,7 +327,7 @@ component_init(bool enable_progress_threads, bool enable_mpi_threads) ret = PtlPTAlloc(mca_osc_portals4_component.matching_ni_h, 0, mca_osc_portals4_component.matching_eq_h, - 4, + REQ_OSC_TABLE_ID, &mca_osc_portals4_component.matching_pt_idx); if (PTL_OK != ret) { opal_output_verbose(1, ompi_osc_base_framework.framework_output, @@ -314,6 +336,13 @@ component_init(bool enable_progress_threads, bool enable_mpi_threads) return ret; } + if (mca_osc_portals4_component.matching_pt_idx != REQ_OSC_TABLE_ID) { + opal_output_verbose(1, ompi_osc_base_framework.framework_output, + "%s:%d: PtlPTAlloc did not allocate the requested PT: %d\n", + __FILE__, __LINE__, mca_osc_portals4_component.matching_pt_idx); + return ret; + } + OBJ_CONSTRUCT(&mca_osc_portals4_component.requests, opal_free_list_t); ret = opal_free_list_init (&mca_osc_portals4_component.requests, sizeof(ompi_osc_portals4_request_t), @@ -339,7 +368,7 @@ component_init(bool enable_progress_threads, bool enable_mpi_threads) } -static int +static int component_finalize(void) { PtlNIFini(mca_osc_portals4_component.matching_ni_h); @@ -353,8 +382,18 @@ component_query(struct ompi_win_t *win, void **base, size_t size, int disp_unit, struct ompi_communicator_t *comm, struct ompi_info_t *info, int flavor) { + int ret; + if (MPI_WIN_FLAVOR_SHARED == flavor) return -1; + ret = PtlGetUid(mca_osc_portals4_component.matching_ni_h, &mca_osc_portals4_component.uid); + if (PTL_OK != ret) { + opal_output_verbose(1, ompi_osc_base_framework.framework_output, + "%s:%d: PtlGetUid failed: %d\n", + __FILE__, __LINE__, ret); + return OMPI_ERROR; + } + return 20; } @@ -379,7 +418,7 @@ component_select(struct ompi_win_t *win, void **base, size_t size, int disp_unit if (NULL == module) return OMPI_ERR_TEMP_OUT_OF_RESOURCE; /* fill in the function pointer part */ - memcpy(module, &ompi_osc_portals4_module_template, + memcpy(module, &ompi_osc_portals4_module_template, sizeof(ompi_osc_base_module_t)); /* fill in our part */ @@ -404,8 +443,8 @@ component_select(struct ompi_win_t *win, void **base, size_t size, int disp_unit /* share everyone's displacement units. Only do an allgather if strictly necessary, since it requires O(p) state. */ tmp = disp_unit; - ret = module->comm->c_coll.coll_bcast(&tmp, 1, MPI_INT, 0, - module->comm, + ret = module->comm->c_coll.coll_bcast(&tmp, 1, MPI_INT, 0, + module->comm, module->comm->c_coll.coll_bcast_module); if (OMPI_SUCCESS != ret) { opal_output_verbose(1, ompi_osc_base_framework.framework_output, @@ -440,7 +479,7 @@ component_select(struct ompi_win_t *win, void **base, size_t size, int disp_unit __FILE__, __LINE__, ret); goto error; } - + md.start = 0; md.length = PTL_SIZE_MAX; md.options = PTL_MD_EVENT_SUCCESS_DISABLE | PTL_MD_EVENT_CT_REPLY | PTL_MD_EVENT_CT_ACK; @@ -456,7 +495,7 @@ component_select(struct ompi_win_t *win, void **base, size_t size, int disp_unit md.start = 0; md.length = PTL_SIZE_MAX; - md.options = PTL_MD_EVENT_CT_REPLY | PTL_MD_EVENT_CT_ACK; + md.options = PTL_MD_EVENT_SEND_DISABLE | PTL_MD_EVENT_CT_REPLY | PTL_MD_EVENT_CT_ACK; md.eq_handle = mca_osc_portals4_component.matching_eq_h; md.ct_handle = module->ct_h; ret = PtlMDBind(module->ni_h, &md, &module->req_md_h); @@ -467,6 +506,11 @@ component_select(struct ompi_win_t *win, void **base, size_t size, int disp_unit goto error; } + module->origin_iovec_list = NULL; + module->origin_iovec_md_h = PTL_INVALID_HANDLE; + module->result_iovec_list = NULL; + module->result_iovec_md_h = PTL_INVALID_HANDLE; + if (MPI_WIN_FLAVOR_DYNAMIC == flavor) { me.start = 0; me.length = PTL_SIZE_MAX; @@ -475,7 +519,7 @@ component_select(struct ompi_win_t *win, void **base, size_t size, int disp_unit me.length = size; } me.ct_handle = PTL_CT_NONE; - me.uid = PTL_UID_ANY; + me.uid = mca_osc_portals4_component.uid; me.options = PTL_ME_OP_PUT | PTL_ME_OP_GET | PTL_ME_NO_TRUNCATE | PTL_ME_EVENT_SUCCESS_DISABLE; me.match_id.phys.nid = PTL_NID_ANY; me.match_id.phys.pid = PTL_PID_ANY; @@ -486,7 +530,7 @@ component_select(struct ompi_win_t *win, void **base, size_t size, int disp_unit module->pt_idx, &me, PTL_PRIORITY_LIST, - NULL, + &module->ct_link, &module->data_me_h); if (PTL_OK != ret) { opal_output_verbose(1, ompi_osc_base_framework.framework_output, @@ -498,7 +542,7 @@ component_select(struct ompi_win_t *win, void **base, size_t size, int disp_unit me.start = &module->state; me.length = sizeof(module->state); me.ct_handle = PTL_CT_NONE; - me.uid = PTL_UID_ANY; + me.uid = mca_osc_portals4_component.uid; me.options = PTL_ME_OP_PUT | PTL_ME_OP_GET | PTL_ME_NO_TRUNCATE | PTL_ME_EVENT_SUCCESS_DISABLE; me.match_id.phys.nid = PTL_NID_ANY; me.match_id.phys.pid = PTL_PID_ANY; @@ -509,7 +553,7 @@ component_select(struct ompi_win_t *win, void **base, size_t size, int disp_unit module->pt_idx, &me, PTL_PRIORITY_LIST, - NULL, + &module->ct_link, &module->control_me_h); if (PTL_OK != ret) { opal_output_verbose(1, ompi_osc_base_framework.framework_output, @@ -546,7 +590,7 @@ component_select(struct ompi_win_t *win, void **base, size_t size, int disp_unit module->passive_target_access_epoch = false; -#if OPAL_ASSEMBLY_ARCH == OPAL_AMD64 || OPAL_ASSEMBLY_ARCH == OPAL_IA32 +#if OPAL_ASSEMBLY_ARCH == OPAL_X86_64 || OPAL_ASSEMBLY_ARCH == OPAL_IA32 *model = MPI_WIN_UNIFIED; #else *model = MPI_WIN_SEPARATE; @@ -557,6 +601,13 @@ component_select(struct ompi_win_t *win, void **base, size_t size, int disp_unit PtlAtomicSync(); /* Make sure that everyone's ready to receive. */ + OPAL_THREAD_LOCK(&mca_osc_portals4_component.lock); + while (module->ct_link != 2) { + opal_condition_wait(&mca_osc_portals4_component.cond, + &mca_osc_portals4_component.lock); + } + OPAL_THREAD_UNLOCK(&mca_osc_portals4_component.lock); + module->comm->c_coll.coll_barrier(module->comm, module->comm->c_coll.coll_barrier_module); @@ -583,7 +634,7 @@ ompi_osc_portals4_attach(struct ompi_win_t *win, void *base, size_t len) int -ompi_osc_portals4_detach(struct ompi_win_t *win, void *base) +ompi_osc_portals4_detach(struct ompi_win_t *win, const void *base) { return OMPI_SUCCESS; } @@ -601,8 +652,17 @@ ompi_osc_portals4_free(struct ompi_win_t *win) module->comm->c_coll.coll_barrier_module); /* cleanup */ + PtlMEUnlink(module->control_me_h); PtlMEUnlink(module->data_me_h); PtlMDRelease(module->md_h); + if (module->origin_iovec_md_h != PTL_INVALID_HANDLE) { + PtlMDRelease(module->origin_iovec_md_h); + free(module->origin_iovec_list); + } + if (module->result_iovec_md_h != PTL_INVALID_HANDLE) { + PtlMDRelease(module->result_iovec_md_h); + free(module->result_iovec_list); + } PtlMDRelease(module->req_md_h); PtlCTFree(module->ct_h); if (NULL != module->disp_units) free(module->disp_units); diff --git a/ompi/mca/osc/portals4/osc_portals4_passive_target.c b/ompi/mca/osc/portals4/osc_portals4_passive_target.c index 2a3a55110e0..b39d4d904fe 100644 --- a/ompi/mca/osc/portals4/osc_portals4_passive_target.c +++ b/ompi/mca/osc/portals4/osc_portals4_passive_target.c @@ -4,9 +4,9 @@ * of Tennessee Research Foundation. All rights * reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -18,8 +18,6 @@ #include "osc_portals4.h" -#include "ompi/mca/mtl/portals4/mtl_portals4_endpoint.h" - enum locktype_t { lock_nocheck, lock_exclusive, @@ -133,7 +131,7 @@ lk_add64(ompi_osc_portals4_module_t *module, static inline int -start_exclusive(ompi_osc_portals4_module_t *module, +start_exclusive(ompi_osc_portals4_module_t *module, int target) { int64_t result; @@ -151,7 +149,7 @@ start_exclusive(ompi_osc_portals4_module_t *module, static inline int -end_exclusive(ompi_osc_portals4_module_t *module, +end_exclusive(ompi_osc_portals4_module_t *module, int target) { int ret; @@ -162,7 +160,7 @@ end_exclusive(ompi_osc_portals4_module_t *module, static inline int -start_shared(ompi_osc_portals4_module_t *module, +start_shared(ompi_osc_portals4_module_t *module, int target) { int64_t result; @@ -186,7 +184,7 @@ start_shared(ompi_osc_portals4_module_t *module, static inline int -end_shared(ompi_osc_portals4_module_t *module, +end_shared(ompi_osc_portals4_module_t *module, int target) { int64_t result; @@ -205,7 +203,7 @@ ompi_osc_portals4_lock(int lock_type, { ompi_osc_portals4_module_t *module = (ompi_osc_portals4_module_t*) win->w_osc_module; - ompi_osc_portals4_outstanding_lock_t* lock; + ompi_osc_portals4_outstanding_lock_t* lock; int ret; module->passive_target_access_epoch = true; @@ -283,7 +281,7 @@ ompi_osc_portals4_lock_all(int assert, { ompi_osc_portals4_module_t *module = (ompi_osc_portals4_module_t*) win->w_osc_module; - ompi_osc_portals4_outstanding_lock_t* lock; + ompi_osc_portals4_outstanding_lock_t* lock; int ret = OMPI_SUCCESS; module->passive_target_access_epoch = true; diff --git a/ompi/mca/osc/portals4/osc_portals4_request.c b/ompi/mca/osc/portals4/osc_portals4_request.c index a6ba1aeb153..19461ec0580 100644 --- a/ompi/mca/osc/portals4/osc_portals4_request.c +++ b/ompi/mca/osc/portals4/osc_portals4_request.c @@ -1,9 +1,9 @@ /* * Copyright (c) 2011 Sandia National Laboratories. All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -26,7 +26,7 @@ request_cancel(struct ompi_request_t *request, int complete) static int request_free(struct ompi_request_t **ompi_req) { - ompi_osc_portals4_request_t *request = + ompi_osc_portals4_request_t *request = (ompi_osc_portals4_request_t*) *ompi_req; if (true != request->super.req_complete) { diff --git a/ompi/mca/osc/portals4/osc_portals4_request.h b/ompi/mca/osc/portals4/osc_portals4_request.h index 0b7a9945fef..ae1be6f44d2 100644 --- a/ompi/mca/osc/portals4/osc_portals4_request.h +++ b/ompi/mca/osc/portals4/osc_portals4_request.h @@ -4,9 +4,9 @@ * Copyright (c) 2015 Los Alamos National Security, LLC. All rights * reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -33,6 +33,7 @@ OBJ_CLASS_DECLARATION(ompi_osc_portals4_request_t); req->super.req_mpi_object.win = win; \ req->super.req_complete = false; \ req->super.req_state = OMPI_REQUEST_ACTIVE; \ + req->super.req_status.MPI_ERROR = MPI_SUCCESS; \ req->ops_expected = 0; \ req->ops_committed = 0; \ } while (0) diff --git a/ompi/mca/osc/pt2pt/Makefile.am b/ompi/mca/osc/pt2pt/Makefile.am index 83bdb33e6c5..17d08ff50e1 100644 --- a/ompi/mca/osc/pt2pt/Makefile.am +++ b/ompi/mca/osc/pt2pt/Makefile.am @@ -32,7 +32,9 @@ pt2pt_sources = \ osc_pt2pt_request.h \ osc_pt2pt_request.c \ osc_pt2pt_active_target.c \ - osc_pt2pt_passive_target.c + osc_pt2pt_passive_target.c \ + osc_pt2pt_sync.h \ + osc_pt2pt_sync.c # Make the output library in this directory, and name it either # mca__.la (for DSO builds) or libmca__.la diff --git a/ompi/mca/osc/pt2pt/osc_pt2pt.h b/ompi/mca/osc/pt2pt/osc_pt2pt.h index b7a9520eea3..bbb35f55629 100644 --- a/ompi/mca/osc/pt2pt/osc_pt2pt.h +++ b/ompi/mca/osc/pt2pt/osc_pt2pt.h @@ -8,10 +8,13 @@ * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. - * Copyright (c) 2007-2015 Los Alamos National Security, LLC. All rights + * Copyright (c) 2007-2016 Los Alamos National Security, LLC. All rights * reserved. * Copyright (c) 2010 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2012-2013 Sandia National Laboratories. All rights reserved. + * Copyright (c) 2015-2016 Research Organization for Information Science + * and Technology (RIST). All rights reserved. + * Copyright (c) 2016 FUJITSU LIMITED. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -27,23 +30,24 @@ #include "opal/class/opal_free_list.h" #include "opal/class/opal_hash_table.h" #include "opal/threads/threads.h" -#include "opal/mca/btl/btl.h" +#include "opal/util/output.h" #include "ompi/win/win.h" +#include "ompi/info/info.h" #include "ompi/communicator/communicator.h" #include "ompi/datatype/ompi_datatype.h" #include "ompi/request/request.h" #include "ompi/mca/osc/osc.h" #include "ompi/mca/osc/base/base.h" -#include "opal/mca/btl/btl.h" -#include "ompi/mca/bml/bml.h" #include "ompi/memchecker.h" #include "osc_pt2pt_header.h" +#include "osc_pt2pt_sync.h" BEGIN_C_DECLS struct ompi_osc_pt2pt_frag_t; +struct ompi_osc_pt2pt_receive_t; struct ompi_osc_pt2pt_component_t { /** Extend the basic osc component interface */ @@ -58,6 +62,9 @@ struct ompi_osc_pt2pt_component_t { /** module count */ int module_count; + /** number of buffers per window */ + int receive_count; + /** free list of ompi_osc_pt2pt_frag_t structures */ opal_free_list_t frags; @@ -73,16 +80,34 @@ struct ompi_osc_pt2pt_component_t { /** List of operations that need to be processed */ opal_list_t pending_operations; + /** List of receives to be processed */ + opal_list_t pending_receives; + + /** Lock for pending_receives */ + opal_mutex_t pending_receives_lock; + /** Is the progress function enabled? */ bool progress_enable; }; typedef struct ompi_osc_pt2pt_component_t ompi_osc_pt2pt_component_t; +enum { + /** peer has sent an unexpected post message (no matching start) */ + OMPI_OSC_PT2PT_PEER_FLAG_UNEX = 1, + /** eager sends are active on this peer */ + OMPI_OSC_PT2PT_PEER_FLAG_EAGER = 2, + /** peer has been locked (on-demand locking for lock_all) */ + OMPI_OSC_PT2PT_PEER_FLAG_LOCK = 4, +}; + struct ompi_osc_pt2pt_peer_t { /** make this an opal object */ opal_object_t super; + /** rank of this peer */ + int rank; + /** pointer to the current send fragment for each outgoing target */ struct ompi_osc_pt2pt_frag_t *active_frag; @@ -92,24 +117,60 @@ struct ompi_osc_pt2pt_peer_t { /** fragments queued to this target */ opal_list_t queued_frags; - /** number of acks pending. New requests can not be sent out if there are - * acks pending (to fulfill the ordering constraints of accumulate) */ - uint32_t num_acks_pending; - /** number of fragments incomming (negative - expected, positive - unsynchronized) */ int32_t passive_incoming_frag_count; - /** peer is in an access epoch */ - bool access_epoch; - - /** eager sends are active to this peer */ - bool eager_send_active; + /** peer flags */ + volatile int32_t flags; }; typedef struct ompi_osc_pt2pt_peer_t ompi_osc_pt2pt_peer_t; OBJ_CLASS_DECLARATION(ompi_osc_pt2pt_peer_t); -#define SEQ_INVALID 0xFFFFFFFFFFFFFFFFULL +static inline bool ompi_osc_pt2pt_peer_locked (ompi_osc_pt2pt_peer_t *peer) +{ + return !!(peer->flags & OMPI_OSC_PT2PT_PEER_FLAG_LOCK); +} + +static inline bool ompi_osc_pt2pt_peer_unex (ompi_osc_pt2pt_peer_t *peer) +{ + return !!(peer->flags & OMPI_OSC_PT2PT_PEER_FLAG_UNEX); +} + +static inline bool ompi_osc_pt2pt_peer_eager_active (ompi_osc_pt2pt_peer_t *peer) +{ + return !!(peer->flags & OMPI_OSC_PT2PT_PEER_FLAG_EAGER); +} + +static inline void ompi_osc_pt2pt_peer_set_flag (ompi_osc_pt2pt_peer_t *peer, int32_t flag, bool value) +{ + int32_t peer_flags, new_flags; + do { + peer_flags = peer->flags; + if (value) { + new_flags = peer_flags | flag; + } else { + new_flags = peer_flags & ~flag; + } + } while (!OPAL_ATOMIC_CMPSET_32 (&peer->flags, peer_flags, new_flags)); +} + +static inline void ompi_osc_pt2pt_peer_set_locked (ompi_osc_pt2pt_peer_t *peer, bool value) +{ + ompi_osc_pt2pt_peer_set_flag (peer, OMPI_OSC_PT2PT_PEER_FLAG_LOCK, value); +} + +static inline void ompi_osc_pt2pt_peer_set_unex (ompi_osc_pt2pt_peer_t *peer, bool value) +{ + ompi_osc_pt2pt_peer_set_flag (peer, OMPI_OSC_PT2PT_PEER_FLAG_UNEX, value); +} + +static inline void ompi_osc_pt2pt_peer_set_eager_active (ompi_osc_pt2pt_peer_t *peer, bool value) +{ + ompi_osc_pt2pt_peer_set_flag (peer, OMPI_OSC_PT2PT_PEER_FLAG_EAGER, value); +} + +OBJ_CLASS_DECLARATION(ompi_osc_pt2pt_peer_t); /** Module structure. Exactly one of these is associated with each PT2PT window */ @@ -120,6 +181,9 @@ struct ompi_osc_pt2pt_module_t { /** window should have accumulate ordering... */ bool accumulate_ordering; + /** no locks info key value */ + bool no_locks; + /** pointer to free on cleanup (may be NULL) */ void *free_after; @@ -139,54 +203,39 @@ struct ompi_osc_pt2pt_module_t { /** condition variable associated with lock */ opal_condition_t cond; - /** lock for atomic window updates from reductions */ - opal_mutex_t acc_lock; + /** hash table of peer objects */ + opal_hash_table_t peer_hash; - /** peer data */ - ompi_osc_pt2pt_peer_t *peers; + /** lock protecting peer_hash */ + opal_mutex_t peer_lock; /** Nmber of communication fragments started for this epoch, by peer. Not in peer data to make fence more manageable. */ uint32_t *epoch_outgoing_frag_count; /** cyclic counter for a unique tage for long messages. */ - unsigned int tag_counter; + uint32_t tag_counter; /* Number of outgoing fragments that have completed since the begining of time */ - uint32_t outgoing_frag_count; + volatile uint32_t outgoing_frag_count; /* Next outgoing fragment count at which we want a signal on cond */ - uint32_t outgoing_frag_signal_count; + volatile uint32_t outgoing_frag_signal_count; /* Number of incoming fragments that have completed since the begining of time */ - uint32_t active_incoming_frag_count; + volatile uint32_t active_incoming_frag_count; /* Next incoming buffer count at which we want a signal on cond */ - uint32_t active_incoming_frag_signal_count; - - /* Number of flush ack requests send since beginning of time */ - uint64_t flush_ack_requested_count; - /* Number of flush ack replies received since beginning of - time. cond should be signalled on every flush reply - received. */ - uint64_t flush_ack_received_count; + volatile uint32_t active_incoming_frag_signal_count; /** Number of targets locked/being locked */ unsigned int passive_target_access_epoch; - /** start sending data eagerly */ - bool active_eager_send_active; - - /** Indicates the window is in an all access epoch (fence, lock_all) */ - bool all_access_epoch; + /** Indicates the window is in a pcsw or all access (fence, lock_all) epoch */ + ompi_osc_pt2pt_sync_t all_sync; /* ********************* PWSC data ************************ */ struct ompi_group_t *pw_group; - struct ompi_group_t *sc_group; - - /** Number of "ping" messages from the remote post group we've - received */ - int32_t num_post_msgs; /** Number of "count" messages from the remote complete group we've received */ @@ -205,33 +254,50 @@ struct ompi_osc_pt2pt_module_t { opal_list_t locks_pending; /** origin side list of locks currently outstanding */ - opal_list_t outstanding_locks; + opal_hash_table_t outstanding_locks; - uint64_t lock_serial_number; + /** receive fragments */ + struct ompi_osc_pt2pt_receive_t *recv_frags; - unsigned char *incoming_buffer; - ompi_request_t *frag_request; + /** number of receive fragments */ + unsigned int recv_frag_count; /* enforce accumulate semantics */ opal_atomic_lock_t accumulate_lock; opal_list_t pending_acc; - /* enforce pscw matching */ - /** list of unmatched post messages */ - opal_list_t pending_posts; - /** Lock for garbage collection lists */ opal_mutex_t gc_lock; - /** List of requests that need to be freed */ - opal_list_t request_gc; - /** List of buffers that need to be freed */ opal_list_t buffer_gc; }; typedef struct ompi_osc_pt2pt_module_t ompi_osc_pt2pt_module_t; OMPI_MODULE_DECLSPEC extern ompi_osc_pt2pt_component_t mca_osc_pt2pt_component; +static inline ompi_osc_pt2pt_peer_t *ompi_osc_pt2pt_peer_lookup (ompi_osc_pt2pt_module_t *module, + int rank) +{ + ompi_osc_pt2pt_peer_t *peer = NULL; + (void) opal_hash_table_get_value_uint32 (&module->peer_hash, rank, (void **) &peer); + + if (OPAL_UNLIKELY(NULL == peer)) { + OPAL_THREAD_LOCK(&module->peer_lock); + (void) opal_hash_table_get_value_uint32 (&module->peer_hash, rank, (void **) &peer); + + if (NULL == peer) { + peer = OBJ_NEW(ompi_osc_pt2pt_peer_t); + peer->rank = rank; + + (void) opal_hash_table_set_value_uint32 (&module->peer_hash, rank, (void *) peer); + } + OPAL_THREAD_UNLOCK(&module->peer_lock); + } + + return peer; +} + + struct ompi_osc_pt2pt_pending_t { opal_list_item_t super; ompi_osc_pt2pt_module_t *module; @@ -241,16 +307,25 @@ struct ompi_osc_pt2pt_pending_t { typedef struct ompi_osc_pt2pt_pending_t ompi_osc_pt2pt_pending_t; OBJ_CLASS_DECLARATION(ompi_osc_pt2pt_pending_t); +struct ompi_osc_pt2pt_receive_t { + opal_list_item_t super; + ompi_osc_pt2pt_module_t *module; + ompi_request_t *pml_request; + void *buffer; +}; +typedef struct ompi_osc_pt2pt_receive_t ompi_osc_pt2pt_receive_t; +OBJ_CLASS_DECLARATION(ompi_osc_pt2pt_receive_t); + #define GET_MODULE(win) ((ompi_osc_pt2pt_module_t*) win->w_osc_module) extern bool ompi_osc_pt2pt_no_locks; int ompi_osc_pt2pt_attach(struct ompi_win_t *win, void *base, size_t len); -int ompi_osc_pt2pt_detach(struct ompi_win_t *win, void *base); +int ompi_osc_pt2pt_detach(struct ompi_win_t *win, const void *base); int ompi_osc_pt2pt_free(struct ompi_win_t *win); -int ompi_osc_pt2pt_put(void *origin_addr, +int ompi_osc_pt2pt_put(const void *origin_addr, int origin_count, struct ompi_datatype_t *origin_dt, int target, @@ -259,34 +334,34 @@ int ompi_osc_pt2pt_put(void *origin_addr, struct ompi_datatype_t *target_dt, struct ompi_win_t *win); -int ompi_osc_pt2pt_accumulate(void *origin_addr, - int origin_count, - struct ompi_datatype_t *origin_dt, - int target, - OPAL_PTRDIFF_TYPE target_disp, - int target_count, - struct ompi_datatype_t *target_dt, - struct ompi_op_t *op, - struct ompi_win_t *win); +int ompi_osc_pt2pt_accumulate(const void *origin_addr, + int origin_count, + struct ompi_datatype_t *origin_dt, + int target, + OPAL_PTRDIFF_TYPE target_disp, + int target_count, + struct ompi_datatype_t *target_dt, + struct ompi_op_t *op, + struct ompi_win_t *win); int ompi_osc_pt2pt_get(void *origin_addr, - int origin_count, - struct ompi_datatype_t *origin_dt, - int target, - OPAL_PTRDIFF_TYPE target_disp, - int target_count, - struct ompi_datatype_t *target_dt, - struct ompi_win_t *win); + int origin_count, + struct ompi_datatype_t *origin_dt, + int target, + OPAL_PTRDIFF_TYPE target_disp, + int target_count, + struct ompi_datatype_t *target_dt, + struct ompi_win_t *win); -int ompi_osc_pt2pt_compare_and_swap(void *origin_addr, - void *compare_addr, +int ompi_osc_pt2pt_compare_and_swap(const void *origin_addr, + const void *compare_addr, void *result_addr, struct ompi_datatype_t *dt, int target, OPAL_PTRDIFF_TYPE target_disp, struct ompi_win_t *win); -int ompi_osc_pt2pt_fetch_and_op(void *origin_addr, +int ompi_osc_pt2pt_fetch_and_op(const void *origin_addr, void *result_addr, struct ompi_datatype_t *dt, int target, @@ -294,7 +369,7 @@ int ompi_osc_pt2pt_fetch_and_op(void *origin_addr, struct ompi_op_t *op, struct ompi_win_t *win); -int ompi_osc_pt2pt_get_accumulate(void *origin_addr, +int ompi_osc_pt2pt_get_accumulate(const void *origin_addr, int origin_count, struct ompi_datatype_t *origin_datatype, void *result_addr, @@ -307,7 +382,7 @@ int ompi_osc_pt2pt_get_accumulate(void *origin_addr, struct ompi_op_t *op, struct ompi_win_t *win); -int ompi_osc_pt2pt_rput(void *origin_addr, +int ompi_osc_pt2pt_rput(const void *origin_addr, int origin_count, struct ompi_datatype_t *origin_dt, int target, @@ -327,7 +402,7 @@ int ompi_osc_pt2pt_rget(void *origin_addr, struct ompi_win_t *win, struct ompi_request_t **request); -int ompi_osc_pt2pt_raccumulate(void *origin_addr, +int ompi_osc_pt2pt_raccumulate(const void *origin_addr, int origin_count, struct ompi_datatype_t *origin_dt, int target, @@ -338,7 +413,7 @@ int ompi_osc_pt2pt_raccumulate(void *origin_addr, struct ompi_win_t *win, struct ompi_request_t **request); -int ompi_osc_pt2pt_rget_accumulate(void *origin_addr, +int ompi_osc_pt2pt_rget_accumulate(const void *origin_addr, int origin_count, struct ompi_datatype_t *origin_datatype, void *result_addr, @@ -355,7 +430,10 @@ int ompi_osc_pt2pt_rget_accumulate(void *origin_addr, int ompi_osc_pt2pt_fence(int assert, struct ompi_win_t *win); /* received a post message */ -int osc_pt2pt_incoming_post (ompi_osc_pt2pt_module_t *module, int source); +void osc_pt2pt_incoming_post (ompi_osc_pt2pt_module_t *module, int source); + +/* received a complete message */ +void osc_pt2pt_incoming_complete (ompi_osc_pt2pt_module_t *module, int source, int frag_count); int ompi_osc_pt2pt_start(struct ompi_group_t *group, int assert, @@ -404,13 +482,7 @@ int ompi_osc_pt2pt_component_irecv(ompi_osc_pt2pt_module_t *module, int tag, struct ompi_communicator_t *comm); -int ompi_osc_pt2pt_component_isend(ompi_osc_pt2pt_module_t *module, - void *buf, - size_t count, - struct ompi_datatype_t *datatype, - int dest, - int tag, - struct ompi_communicator_t *comm); +int ompi_osc_pt2pt_lock_remote (ompi_osc_pt2pt_module_t *module, int target, ompi_osc_pt2pt_sync_t *lock); /** * ompi_osc_pt2pt_progress_pending_acc: @@ -449,7 +521,8 @@ static inline void mark_incoming_completion (ompi_osc_pt2pt_module_t *module, in opal_condition_broadcast(&module->cond); } } else { - ompi_osc_pt2pt_peer_t *peer = module->peers + source; + ompi_osc_pt2pt_peer_t *peer = ompi_osc_pt2pt_peer_lookup (module, source); + OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output, "mark_incoming_completion marking passive incoming complete. source = %d, count = %d", source, (int) peer->passive_incoming_frag_count + 1)); @@ -565,7 +638,7 @@ static inline void osc_pt2pt_copy_on_recv (void *target, void *source, size_t so * buffer. The copy is done with a convertor generated from proc, * datatype, and count. */ -static inline void osc_pt2pt_copy_for_send (void *target, size_t target_len, void *source, ompi_proc_t *proc, +static inline void osc_pt2pt_copy_for_send (void *target, size_t target_len, const void *source, ompi_proc_t *proc, int count, ompi_datatype_t *datatype) { opal_convertor_t convertor; @@ -586,41 +659,25 @@ static inline void osc_pt2pt_copy_for_send (void *target, size_t target_len, voi } /** - * osc_pt2pt_request_gc_clean: + * osc_pt2pt_gc_clean: * * @short Release finished PML requests and accumulate buffers. * - * @long This function exists because it is not possible to free a PML request - * or buffer from a request completion callback. We instead put requests - * and buffers on the module's garbage collection lists and release then - * at a later time. + * @long This function exists because it is not possible to free a buffer from + * a request completion callback. We instead put requests and buffers on the + * module's garbage collection lists and release then at a later time. */ static inline void osc_pt2pt_gc_clean (ompi_osc_pt2pt_module_t *module) { - ompi_request_t *request; opal_list_item_t *item; OPAL_THREAD_LOCK(&module->gc_lock); - - while (NULL != (request = (ompi_request_t *) opal_list_remove_first (&module->request_gc))) { - OPAL_THREAD_UNLOCK(&module->gc_lock); - ompi_request_free (&request); - OPAL_THREAD_LOCK(&module->gc_lock); - } - while (NULL != (item = opal_list_remove_first (&module->buffer_gc))) { OBJ_RELEASE(item); } - OPAL_THREAD_UNLOCK(&module->gc_lock); } -static inline void osc_pt2pt_gc_add_request (ompi_osc_pt2pt_module_t *module, ompi_request_t *request) -{ - OPAL_THREAD_SCOPED_LOCK(&module->gc_lock, - opal_list_append (&module->request_gc, (opal_list_item_t *) request)); -} - static inline void osc_pt2pt_gc_add_buffer (ompi_osc_pt2pt_module_t *module, opal_list_item_t *buffer) { OPAL_THREAD_SCOPED_LOCK(&module->gc_lock, @@ -639,24 +696,49 @@ static inline void osc_pt2pt_add_pending (ompi_osc_pt2pt_pending_t *pending) /** * get_tag: * - * @short Get a send/recv tag for large memory operations. + * @short Get a send/recv base tag for large memory operations. * * @param[in] module - OSC PT2PT module * - * @long This function aquires a 16-bit tag for use with large memory operations. The + * @long This function acquires a 16-bit tag for use with large memory operations. The * tag will be odd or even depending on if this is in a passive target access - * or not. + * or not. An actual tag that will be passed to PML send/recv function is given + * by tag_to_target or tag_to_origin function depending on the communication + * direction. */ static inline int get_tag(ompi_osc_pt2pt_module_t *module) { /* the LSB of the tag is used be the receiver to determine if the message is a passive or active target (ie, where to mark completion). */ - int tmp = module->tag_counter + !!(module->passive_target_access_epoch); + int32_t tmp = OPAL_THREAD_ADD32((volatile int32_t *) &module->tag_counter, 4); + return (tmp & OSC_PT2PT_FRAG_MASK) | !!(module->passive_target_access_epoch); +} - module->tag_counter = (module->tag_counter + 2) & OSC_PT2PT_FRAG_MASK; +/** + * tag_to_target: + * + * @short Get a tag used for PML send/recv communication from an origin to a target. + * + * @param[in] tag - base tag given by get_tag function. + */ +static inline int tag_to_target(int tag) +{ + /* (returned_tag >> 1) & 0x1 == 0 */ + return tag + 0; +} - return tmp; +/** + * tag_to_origin: + * + * @short Get a tag used for PML send/recv communication from a target to an origin. + * + * @param[in] tag - base tag given by get_tag function. + */ +static inline int tag_to_origin(int tag) +{ + /* (returned_tag >> 1) & 0x1 == 1 */ + return tag + 2; } /** @@ -702,6 +784,16 @@ static inline int ompi_osc_pt2pt_accumulate_trylock (ompi_osc_pt2pt_module_t *mo return opal_atomic_trylock (&module->accumulate_lock); } +/** + * @brief check if this process has this process is in a passive target access epoch + * + * @param[in] module osc pt2pt module + */ +static inline bool ompi_osc_pt2pt_in_passive_epoch (ompi_osc_pt2pt_module_t *module) +{ + return 0 != module->passive_target_access_epoch; +} + /** * ompi_osc_pt2pt_accumulate_unlock: * @@ -720,9 +812,142 @@ static inline void ompi_osc_pt2pt_accumulate_unlock (ompi_osc_pt2pt_module_t *mo } } -static inline bool ompi_osc_pt2pt_check_access_epoch (ompi_osc_pt2pt_module_t *module, int rank) +/** + * Find the first outstanding lock of the target. + * + * @param[in] module osc pt2pt module + * @param[in] target target rank + * @param[out] peer peer object associated with the target + * + * @returns an outstanding lock on success + * + * This function looks for an outstanding lock to the target. If a lock exists it is returned. + */ +static inline ompi_osc_pt2pt_sync_t *ompi_osc_pt2pt_module_lock_find (ompi_osc_pt2pt_module_t *module, int target, + ompi_osc_pt2pt_peer_t **peer) +{ + ompi_osc_pt2pt_sync_t *outstanding_lock = NULL; + + (void) opal_hash_table_get_value_uint32 (&module->outstanding_locks, (uint32_t) target, (void **) &outstanding_lock); + if (NULL != outstanding_lock && peer) { + *peer = outstanding_lock->peer_list.peer; + } + + return outstanding_lock; +} + +/** + * Add an outstanding lock + * + * @param[in] module osc pt2pt module + * @param[in] lock lock object + * + * This function inserts a lock object to the list of outstanding locks. The caller must be holding the module + * lock. + */ +static inline void ompi_osc_pt2pt_module_lock_insert (struct ompi_osc_pt2pt_module_t *module, ompi_osc_pt2pt_sync_t *lock) +{ + (void) opal_hash_table_set_value_uint32 (&module->outstanding_locks, (uint32_t) lock->sync.lock.target, (void *) lock); +} + + +/** + * Remove an outstanding lock + * + * @param[in] module osc pt2pt module + * @param[in] lock lock object + * + * This function removes a lock object to the list of outstanding locks. The caller must be holding the module + * lock. + */ +static inline void ompi_osc_pt2pt_module_lock_remove (struct ompi_osc_pt2pt_module_t *module, ompi_osc_pt2pt_sync_t *lock) +{ + + (void) opal_hash_table_remove_value_uint32 (&module->outstanding_locks, (uint32_t) lock->sync.lock.target); +} + +/** + * Lookup a synchronization object associated with the target + * + * @param[in] module osc pt2pt module + * @param[in] target target rank + * @param[out] peer peer object + * + * @returns NULL if the target is not locked, fenced, or part of a pscw sync + * @returns synchronization object on success + * + * This function returns the synchronization object associated with an access epoch for + * the target. If the target is not part of any current access epoch then NULL is returned. + */ +static inline ompi_osc_pt2pt_sync_t *ompi_osc_pt2pt_module_sync_lookup (ompi_osc_pt2pt_module_t *module, int target, + struct ompi_osc_pt2pt_peer_t **peer) +{ + ompi_osc_pt2pt_peer_t *tmp; + + if (NULL == peer) { + peer = &tmp; + } + + OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output, + "osc/pt2pt: looking for synchronization object for target %d", target)); + + switch (module->all_sync.type) { + case OMPI_OSC_PT2PT_SYNC_TYPE_NONE: + if (!module->no_locks) { + return ompi_osc_pt2pt_module_lock_find (module, target, peer); + } + + return NULL; + case OMPI_OSC_PT2PT_SYNC_TYPE_FENCE: + case OMPI_OSC_PT2PT_SYNC_TYPE_LOCK: + OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output, + "osc/pt2pt: found fence/lock_all access epoch for target %d", target)); + + /* fence epoch is now active */ + module->all_sync.epoch_active = true; + *peer = ompi_osc_pt2pt_peer_lookup (module, target); + if (OMPI_OSC_PT2PT_SYNC_TYPE_LOCK == module->all_sync.type && !ompi_osc_pt2pt_peer_locked (*peer)) { + (void) ompi_osc_pt2pt_lock_remote (module, target, &module->all_sync); + } + + return &module->all_sync; + case OMPI_OSC_PT2PT_SYNC_TYPE_PSCW: + if (ompi_osc_pt2pt_sync_pscw_peer (module, target, peer)) { + OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output, + "osc/pt2pt: found PSCW access epoch target for %d", target)); + return &module->all_sync; + } + } + + return NULL; +} + +/** + * @brief check if an access epoch is active + * + * @param[in] module osc pt2pt module + * + * @returns true if any type of access epoch is active + * @returns false otherwise + * + * This function is used to check for conflicting access epochs. + */ +static inline bool ompi_osc_pt2pt_access_epoch_active (ompi_osc_pt2pt_module_t *module) { - return module->all_access_epoch || module->peers[rank].access_epoch; + return (module->all_sync.epoch_active || ompi_osc_pt2pt_in_passive_epoch (module)); +} + +static inline bool ompi_osc_pt2pt_peer_sends_active (ompi_osc_pt2pt_module_t *module, int rank) +{ + ompi_osc_pt2pt_sync_t *sync; + ompi_osc_pt2pt_peer_t *peer; + + sync = ompi_osc_pt2pt_module_sync_lookup (module, rank, &peer); + if (!sync) { + return false; + } + + return sync->eager_send_active || ompi_osc_pt2pt_peer_eager_active (peer); } END_C_DECLS diff --git a/ompi/mca/osc/pt2pt/osc_pt2pt_active_target.c b/ompi/mca/osc/pt2pt/osc_pt2pt_active_target.c index 5e5e5d414a1..55917ca65ae 100644 --- a/ompi/mca/osc/pt2pt/osc_pt2pt_active_target.c +++ b/ompi/mca/osc/pt2pt/osc_pt2pt_active_target.c @@ -8,9 +8,9 @@ * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. - * Copyright (c) 2007-2014 Los Alamos National Security, LLC. All rights + * Copyright (c) 2007-2016 Los Alamos National Security, LLC. All rights * reserved. - * Copyright (c) 2010 IBM Corporation. All rights reserved. + * Copyright (c) 2010-2016 IBM Corporation. All rights reserved. * Copyright (c) 2012-2013 Sandia National Laboratories. All rights reserved. * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. @@ -35,74 +35,95 @@ #include "ompi/mca/osc/base/base.h" /** - * ompi_osc_pt2pt_pending_post_t: + * compare_ranks: * - * Describes a post operation that was encountered outside its - * matching start operation. + * @param[in] ptra Pointer to integer item + * @param[in] ptrb Pointer to integer item + * + * @returns 0 if *ptra == *ptrb + * @returns -1 if *ptra < *ptrb + * @returns 1 otherwise + * + * This function is used to sort the rank list. It can be removed if + * groups are always in order. */ -struct ompi_osc_pt2pt_pending_post_t { - opal_list_item_t super; - int rank; -}; -typedef struct ompi_osc_pt2pt_pending_post_t ompi_osc_pt2pt_pending_post_t; -OBJ_CLASS_DECLARATION(ompi_osc_pt2pt_pending_post_t); - -OBJ_CLASS_INSTANCE(ompi_osc_pt2pt_pending_post_t, opal_list_item_t, NULL, NULL); - -static bool group_contains_proc (ompi_group_t *group, ompi_proc_t *proc) +static int compare_ranks (const void *ptra, const void *ptrb) { - int group_size = ompi_group_size (group); + int a = *((int *) ptra); + int b = *((int *) ptrb); - for (int i = 0 ; i < group_size ; ++i) { - ompi_proc_t *group_proc = ompi_group_peer_lookup (group, i); - - /* it is safe to compare procs by pointer */ - if (group_proc == proc) { - return true; - } + if (a < b) { + return -1; + } else if (a > b) { + return 1; } - return false; + return 0; } -static int* -get_comm_ranks(ompi_osc_pt2pt_module_t *module, - ompi_group_t *sub_group) +/** + * ompi_osc_pt2pt_get_comm_ranks: + * + * @param[in] module - OSC PT2PT module + * @param[in] sub_group - Group with ranks to translate + * + * @returns an array of translated ranks on success or NULL on failure + * + * Translate the ranks given in {sub_group} into ranks in the + * communicator used to create {module}. + */ +static ompi_osc_pt2pt_peer_t **ompi_osc_pt2pt_get_peers (ompi_osc_pt2pt_module_t *module, ompi_group_t *sub_group) { - int *ranks1 = NULL, *ranks2 = NULL; - bool success = false; - int i, ret; - - ranks1 = malloc(sizeof(int) * ompi_group_size(sub_group)); - if (NULL == ranks1) goto cleanup; - ranks2 = malloc(sizeof(int) * ompi_group_size(sub_group)); - if (NULL == ranks2) goto cleanup; + int size = ompi_group_size(sub_group); + ompi_osc_pt2pt_peer_t **peers; + int *ranks1, *ranks2; + int ret; + + ranks1 = calloc (size, sizeof(int)); + ranks2 = calloc (size, sizeof(int)); + peers = calloc (size, sizeof (ompi_osc_pt2pt_peer_t *)); + if (NULL == ranks1 || NULL == ranks2 || NULL == peers) { + free (ranks1); + free (ranks2); + free (peers); + return NULL; + } - for (i = 0 ; i < ompi_group_size(sub_group) ; ++i) { + for (int i = 0 ; i < size ; ++i) { ranks1[i] = i; } - ret = ompi_group_translate_ranks(sub_group, - ompi_group_size(sub_group), - ranks1, - module->comm->c_local_group, - ranks2); - if (OMPI_SUCCESS != ret) goto cleanup; - - success = true; + ret = ompi_group_translate_ranks (sub_group, size, ranks1, module->comm->c_local_group, + ranks2); + free (ranks1); + if (OMPI_SUCCESS != ret) { + free (ranks2); + free (peers); + return NULL; + } - cleanup: - if (NULL != ranks1) free(ranks1); - if (!success) { - if (NULL != ranks2) free(ranks2); - ranks2 = NULL; + qsort (ranks2, size, sizeof (int), compare_ranks); + for (int i = 0 ; i < size ; ++i) { + peers[i] = ompi_osc_pt2pt_peer_lookup (module, ranks2[i]); + OBJ_RETAIN(peers[i]); } + free (ranks2); - return ranks2; + return peers; } -int -ompi_osc_pt2pt_fence(int assert, ompi_win_t *win) +static void ompi_osc_pt2pt_release_peers (ompi_osc_pt2pt_peer_t **peers, int npeers) +{ + if (peers) { + for (int i = 0 ; i < npeers ; ++i) { + OBJ_RELEASE(peers[i]); + } + + free (peers); + } +} + +int ompi_osc_pt2pt_fence(int assert, ompi_win_t *win) { ompi_osc_pt2pt_module_t *module = GET_MODULE(win); uint32_t incoming_reqs; @@ -112,18 +133,21 @@ ompi_osc_pt2pt_fence(int assert, ompi_win_t *win) "osc pt2pt: fence start")); /* can't enter an active target epoch when in a passive target epoch */ - if (module->passive_target_access_epoch) { + if (ompi_osc_pt2pt_in_passive_epoch (module)) { + OPAL_OUTPUT_VERBOSE((25, ompi_osc_base_framework.framework_output, + "osc pt2pt: could not enter fence. already in an access epoch")); return OMPI_ERR_RMA_SYNC; } /* active sends are now active (we will close the epoch if NOSUCCEED is specified) */ if (0 == (assert & MPI_MODE_NOSUCCEED)) { - module->active_eager_send_active = true; - module->all_access_epoch = true; + module->all_sync.type = OMPI_OSC_PT2PT_SYNC_TYPE_FENCE; + module->all_sync.eager_send_active = true; } /* short-circuit the noprecede case */ if (0 != (assert & MPI_MODE_NOPRECEDE)) { + module->comm->c_coll.coll_barrier (module->comm, module->comm->c_coll.coll_barrier_module); OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output, "osc pt2pt: fence end (short circuit)")); return ret; @@ -168,9 +192,11 @@ ompi_osc_pt2pt_fence(int assert, ompi_win_t *win) if (assert & MPI_MODE_NOSUCCEED) { /* as specified in MPI-3 p 438 3-5 the fence can end an epoch. it isn't explicitly * stated that MPI_MODE_NOSUCCEED ends the epoch but it is a safe assumption. */ - module->active_eager_send_active = false; - module->all_access_epoch = false; + ompi_osc_pt2pt_sync_reset (&module->all_sync); } + + module->all_sync.epoch_active = false; + opal_condition_broadcast (&module->cond); OPAL_THREAD_UNLOCK(&module->lock); @@ -181,124 +207,134 @@ ompi_osc_pt2pt_fence(int assert, ompi_win_t *win) } -int -ompi_osc_pt2pt_start(ompi_group_t *group, - int assert, - ompi_win_t *win) +int ompi_osc_pt2pt_start (ompi_group_t *group, int assert, ompi_win_t *win) { ompi_osc_pt2pt_module_t *module = GET_MODULE(win); - ompi_osc_pt2pt_pending_post_t *pending_post, *next; - int group_size; - int *ranks; + ompi_osc_pt2pt_sync_t *sync = &module->all_sync; OPAL_THREAD_LOCK(&module->lock); - /* ensure we're not already in a start or passive target. we can no check for all - * access here due to fence */ - if (NULL != module->sc_group || module->passive_target_access_epoch) { + /* check if we are already in an access epoch */ + if (ompi_osc_pt2pt_access_epoch_active (module)) { OPAL_THREAD_UNLOCK(&module->lock); return OMPI_ERR_RMA_SYNC; } - /* save the group */ - OBJ_RETAIN(group); - ompi_group_increment_proc_count(group); + /* mark all procs in this group as being in an access epoch */ + sync->num_peers = ompi_group_size (group); + sync->sync.pscw.group = group; - module->sc_group = group; + /* haven't processed any post messages yet */ + sync->sync_expected = sync->num_peers; - /* mark all procs in this group as being in an access epoch */ - group_size = ompi_group_size (module->sc_group); + /* If the previous epoch was from Fence, then eager_send_active is still + * set to true at this time, but it shoulnd't be true until we get our + * incoming Posts. So reset to 'false' for this new epoch. + */ + sync->eager_send_active = false; OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output, "ompi_osc_pt2pt_start entering with group size %d...", - group_size)); + sync->num_peers)); - ranks = get_comm_ranks(module, module->sc_group); - if (NULL == ranks) return OMPI_ERR_TEMP_OUT_OF_RESOURCE; + sync->type = OMPI_OSC_PT2PT_SYNC_TYPE_PSCW; - for (int i = 0 ; i < group_size ; ++i) { - /* when the post comes in we will be in an access epoch with this proc */ - module->peers[ranks[i]].access_epoch = true; - } + /* prevent us from entering a passive-target, fence, or another pscw access epoch until + * the matching complete is called */ + sync->epoch_active = true; - free (ranks); + /* save the group */ + OBJ_RETAIN(group); - OPAL_LIST_FOREACH_SAFE(pending_post, next, &module->pending_posts, ompi_osc_pt2pt_pending_post_t) { - ompi_proc_t *pending_proc = ompi_comm_peer_lookup (module->comm, pending_post->rank); + if (0 == ompi_group_size (group)) { + /* nothing more to do. this is an empty start epoch */ + sync->eager_send_active = true; + OPAL_THREAD_UNLOCK(&module->lock); + return OMPI_SUCCESS; + } - if (group_contains_proc (module->sc_group, pending_proc)) { - ompi_osc_pt2pt_peer_t *peer = module->peers + pending_post->rank; + opal_atomic_wmb (); - OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output, "Consumed unexpected post message from %d", - pending_post->rank)); - ++module->num_post_msgs; - peer->eager_send_active = true; + /* translate the group ranks into the communicator */ + sync->peer_list.peers = ompi_osc_pt2pt_get_peers (module, group); + if (NULL == sync->peer_list.peers) { + OPAL_THREAD_UNLOCK(&module->lock); + return OMPI_ERR_OUT_OF_RESOURCE; + } - opal_list_remove_item (&module->pending_posts, &pending_post->super); - OBJ_RELEASE(pending_post); + if (!(assert & MPI_MODE_NOCHECK)) { + OPAL_THREAD_LOCK(&sync->lock); + for (int i = 0 ; i < sync->num_peers ; ++i) { + ompi_osc_pt2pt_peer_t *peer = sync->peer_list.peers[i]; + + if (ompi_osc_pt2pt_peer_unex (peer)) { + /* the peer already sent a post message for this pscw access epoch */ + OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output, + "found unexpected post from %d", + peer->rank)); + OPAL_THREAD_ADD32 (&sync->sync_expected, -1); + ompi_osc_pt2pt_peer_set_unex (peer, false); + } } + OPAL_THREAD_UNLOCK(&sync->lock); + } else { + sync->sync_expected = 0; } - /* disable eager sends until we've receved the proper number of - post messages, at which time we know all our peers are ready to - receive messages. */ - module->active_eager_send_active = false; - - /* possible we've already received a couple in messages, so - add however many we're going to wait for */ - module->num_post_msgs -= ompi_group_size(module->sc_group); - OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output, - "num_post_msgs = %d", module->num_post_msgs)); + "post messages still needed: %d", sync->sync_expected)); /* if we've already received all the post messages, we can eager send. Otherwise, eager send will be enabled when numb_post_messages reaches 0 */ - if (0 == module->num_post_msgs) { - module->active_eager_send_active = true; + if (0 == sync->sync_expected) { + sync->eager_send_active = true; } OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output, - "ompi_osc_pt2pt_start complete")); + "ompi_osc_pt2pt_start complete. eager sends active: %d", + sync->eager_send_active)); OPAL_THREAD_UNLOCK(&module->lock); return OMPI_SUCCESS; } -int -ompi_osc_pt2pt_complete(ompi_win_t *win) +int ompi_osc_pt2pt_complete (ompi_win_t *win) { ompi_osc_pt2pt_module_t *module = GET_MODULE(win); - ompi_osc_pt2pt_header_complete_t complete_req; - ompi_osc_pt2pt_peer_t *peer; + ompi_osc_pt2pt_sync_t *sync = &module->all_sync; + int my_rank = ompi_comm_rank (module->comm); + ompi_osc_pt2pt_peer_t **peers; int ret = OMPI_SUCCESS; - int i; - int *ranks = NULL; ompi_group_t *group; + size_t group_size; OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output, "ompi_osc_pt2pt_complete entering...")); - if (NULL == module->sc_group) { + OPAL_THREAD_LOCK(&module->lock); + if (OMPI_OSC_PT2PT_SYNC_TYPE_PSCW != sync->type) { + OPAL_THREAD_UNLOCK(&module->lock); return OMPI_ERR_RMA_SYNC; } - ranks = get_comm_ranks(module, module->sc_group); - if (NULL == ranks) return OMPI_ERR_TEMP_OUT_OF_RESOURCE; + /* wait for all the post messages */ + ompi_osc_pt2pt_sync_wait (sync); - OPAL_THREAD_LOCK(&module->lock); + /* phase 1 cleanup sync object */ + group = sync->sync.pscw.group; + group_size = sync->num_peers; + + peers = sync->peer_list.peers; + + /* need to reset the sync here to avoid processing incorrect post messages */ + ompi_osc_pt2pt_sync_reset (sync); - /* wait for all the post messages */ - while (0 != module->num_post_msgs) { - OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output, - "waiting for post messages. num_post_msgs = %d", module->num_post_msgs)); - opal_condition_wait(&module->cond, &module->lock); - } OPAL_THREAD_UNLOCK(&module->lock); OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output, - "ompi_osc_pt2pt_complete sending complete messages")); + "ompi_osc_pt2pt_complete all posts received. sending complete messages...")); /* for each process in group, send a control message with number of updates coming, then start all the requests. Note that the @@ -307,25 +343,28 @@ ompi_osc_pt2pt_complete(ompi_win_t *win) At the same time, clean out the outgoing count for the next round. */ - for (i = 0 ; i < ompi_group_size(module->sc_group) ; ++i) { - ompi_proc_t *proc = ompi_comm_peer_lookup(module->comm, ranks[i]); - if (ompi_proc_local() == proc) { + for (size_t i = 0 ; i < group_size ; ++i) { + ompi_osc_pt2pt_header_complete_t complete_req; + int rank = peers[i]->rank; + + if (my_rank == rank) { /* shortcut for self */ - OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output, "ompi_osc_pt2pt_complete self complete")); - module->num_complete_msgs++; + osc_pt2pt_incoming_complete (module, rank, 0); continue; } complete_req.base.type = OMPI_OSC_PT2PT_HDR_TYPE_COMPLETE; complete_req.base.flags = OMPI_OSC_PT2PT_HDR_FLAG_VALID; -#if OPAL_ENABLE_HETEROGENEOUS_SUPPORT && OPAL_ENABLE_DEBUG + complete_req.frag_count = module->epoch_outgoing_frag_count[rank]; +#if OPAL_ENABLE_HETEROGENEOUS_SUPPORT +#if OPAL_ENABLE_DEBUG complete_req.padding[0] = 0; complete_req.padding[1] = 0; #endif - complete_req.frag_count = module->epoch_outgoing_frag_count[ranks[i]]; - osc_pt2pt_hton(&complete_req, proc); + osc_pt2pt_hton(&complete_req, ompi_comm_peer_lookup (module->comm, rank)); +#endif - peer = module->peers + ranks[i]; + ompi_osc_pt2pt_peer_t *peer = ompi_osc_pt2pt_peer_lookup (module, rank); /* XXX -- TODO -- since fragment are always delivered in order we do not need to count anything but long * requests. once that is done this can be removed. */ @@ -335,66 +374,58 @@ ompi_osc_pt2pt_complete(ompi_win_t *win) OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output, "ompi_osc_pt2pt_complete sending complete message to %d. frag_count: %u", - ranks[i], complete_req.frag_count)); + rank, complete_req.frag_count)); + ret = ompi_osc_pt2pt_control_send (module, rank, &complete_req, + sizeof(ompi_osc_pt2pt_header_complete_t)); + if (OMPI_SUCCESS != ret) { + break; + } - peer->access_epoch = false; + ret = ompi_osc_pt2pt_frag_flush_target (module, rank); + if (OMPI_SUCCESS != ret) { + break; + } - ret = ompi_osc_pt2pt_control_send (module, ranks[i], &complete_req, - sizeof(ompi_osc_pt2pt_header_complete_t)); - if (OMPI_SUCCESS != ret) goto cleanup; + /* zero the fragment counts here to ensure they are zerod */ + module->epoch_outgoing_frag_count[rank] = 0; + } - ret = ompi_osc_pt2pt_frag_flush_target (module, ranks[i]); - if (OMPI_SUCCESS != ret) goto cleanup; + if (peers) { + /* release our reference to peers in this group */ + ompi_osc_pt2pt_release_peers (peers, group_size); } - OPAL_THREAD_LOCK(&module->lock); - /* zero the fragment counts here to ensure they are zerod */ - for (i = 0 ; i < ompi_group_size(module->sc_group) ; ++i) { - peer = module->peers + ranks[i]; - module->epoch_outgoing_frag_count[ranks[i]] = 0; - peer->eager_send_active = false; + if (OMPI_SUCCESS != ret) { + return ret; } + OPAL_THREAD_LOCK(&module->lock); /* wait for outgoing requests to complete. Don't wait for incoming, as we're only completing the access epoch, not the exposure epoch */ while (module->outgoing_frag_count != module->outgoing_frag_signal_count) { opal_condition_wait(&module->cond, &module->lock); } - /* phase 1 cleanup group */ - group = module->sc_group; - module->sc_group = NULL; - /* unlock here, as group cleanup can take a while... */ OPAL_THREAD_UNLOCK(&module->lock); /* phase 2 cleanup group */ - ompi_group_decrement_proc_count(group); OBJ_RELEASE(group); OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output, "ompi_osc_pt2pt_complete complete")); - free (ranks); return OMPI_SUCCESS; - - cleanup: - if (NULL != ranks) free(ranks); - - return ret; } -int -ompi_osc_pt2pt_post(ompi_group_t *group, - int assert, - ompi_win_t *win) +int ompi_osc_pt2pt_post (ompi_group_t *group, int assert, ompi_win_t *win) { - int *ranks; int ret = OMPI_SUCCESS; ompi_osc_pt2pt_module_t *module = GET_MODULE(win); ompi_osc_pt2pt_header_post_t post_req; + ompi_osc_pt2pt_peer_t **peers; /* can't check for all access epoch here due to fence */ if (module->pw_group) { @@ -405,17 +436,17 @@ ompi_osc_pt2pt_post(ompi_group_t *group, "ompi_osc_pt2pt_post entering with group size %d...", ompi_group_size (group))); - /* save the group */ - OBJ_RETAIN(group); - ompi_group_increment_proc_count(group); - - OPAL_THREAD_LOCK(&(module->lock)); + OPAL_THREAD_LOCK(&module->lock); /* ensure we're not already in a post */ if (NULL != module->pw_group) { OPAL_THREAD_UNLOCK(&(module->lock)); return OMPI_ERR_RMA_SYNC; } + + /* save the group */ + OBJ_RETAIN(group); + module->pw_group = group; /* Update completion counter. Can't have received any completion @@ -425,18 +456,26 @@ ompi_osc_pt2pt_post(ompi_group_t *group, OPAL_THREAD_UNLOCK(&(module->lock)); + if ((assert & MPI_MODE_NOCHECK) || 0 == ompi_group_size (group)) { + return OMPI_SUCCESS; + } + OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output, "sending post messages")); - ranks = get_comm_ranks(module, module->pw_group); - if (NULL == ranks) { + /* translate group ranks into the communicator */ + peers = ompi_osc_pt2pt_get_peers (module, module->pw_group); + if (OPAL_UNLIKELY(NULL == peers)) { return OMPI_ERR_OUT_OF_RESOURCE; } /* send a hello counter to everyone in group */ for (int i = 0 ; i < ompi_group_size(module->pw_group) ; ++i) { - OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output, "Sending post message to rank %d", ranks[i])); - ompi_proc_t *proc = ompi_comm_peer_lookup(module->comm, ranks[i]); + ompi_osc_pt2pt_peer_t *peer = peers[i]; + int rank = peer->rank; + + OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output, "Sending post message to rank %d", rank)); + ompi_proc_t *proc = ompi_comm_peer_lookup (module->comm, rank); /* shortcut for self */ if (ompi_proc_local() == proc) { @@ -447,26 +486,24 @@ ompi_osc_pt2pt_post(ompi_group_t *group, post_req.base.type = OMPI_OSC_PT2PT_HDR_TYPE_POST; post_req.base.flags = OMPI_OSC_PT2PT_HDR_FLAG_VALID; - post_req.windx = ompi_comm_get_cid(module->comm); osc_pt2pt_hton(&post_req, proc); /* we don't want to send any data, since we're the exposure epoch only, so use an unbuffered send */ - ret = ompi_osc_pt2pt_control_send_unbuffered(module, ranks[i], &post_req, - sizeof(ompi_osc_pt2pt_header_post_t)); + ret = ompi_osc_pt2pt_control_send_unbuffered(module, rank, &post_req, + sizeof(ompi_osc_pt2pt_header_post_t)); if (OMPI_SUCCESS != ret) { break; } } - free (ranks); + ompi_osc_pt2pt_release_peers (peers, ompi_group_size(module->pw_group)); return ret; } -int -ompi_osc_pt2pt_wait(ompi_win_t *win) +int ompi_osc_pt2pt_wait (ompi_win_t *win) { ompi_osc_pt2pt_module_t *module = GET_MODULE(win); ompi_group_t *group; @@ -481,9 +518,10 @@ ompi_osc_pt2pt_wait(ompi_win_t *win) OPAL_THREAD_LOCK(&module->lock); while (0 != module->num_complete_msgs || module->active_incoming_frag_count != module->active_incoming_frag_signal_count) { - OPAL_OUTPUT_VERBOSE((25, ompi_osc_base_framework.framework_output, - "num_complete_msgs = %d, active_incoming_frag_count = %d, active_incoming_frag_signal_count = %d", - module->num_complete_msgs, module->active_incoming_frag_count, module->active_incoming_frag_signal_count)); + OPAL_OUTPUT_VERBOSE((25, ompi_osc_base_framework.framework_output, "num_complete_msgs = %d, " + "active_incoming_frag_count = %d, active_incoming_frag_signal_count = %d", + module->num_complete_msgs, module->active_incoming_frag_count, + module->active_incoming_frag_signal_count)); opal_condition_wait(&module->cond, &module->lock); } @@ -491,7 +529,6 @@ ompi_osc_pt2pt_wait(ompi_win_t *win) module->pw_group = NULL; OPAL_THREAD_UNLOCK(&module->lock); - ompi_group_decrement_proc_count(group); OBJ_RELEASE(group); OPAL_OUTPUT_VERBOSE((25, ompi_osc_base_framework.framework_output, @@ -501,9 +538,7 @@ ompi_osc_pt2pt_wait(ompi_win_t *win) } -int -ompi_osc_pt2pt_test(ompi_win_t *win, - int *flag) +int ompi_osc_pt2pt_test (ompi_win_t *win, int *flag) { ompi_osc_pt2pt_module_t *module = GET_MODULE(win); ompi_group_t *group; @@ -531,7 +566,6 @@ ompi_osc_pt2pt_test(ompi_win_t *win, OPAL_THREAD_UNLOCK(&(module->lock)); - ompi_group_decrement_proc_count(group); OBJ_RELEASE(group); return OMPI_SUCCESS; @@ -542,41 +576,45 @@ ompi_osc_pt2pt_test(ompi_win_t *win, return ret; } -int osc_pt2pt_incoming_post (ompi_osc_pt2pt_module_t *module, int source) +void osc_pt2pt_incoming_complete (ompi_osc_pt2pt_module_t *module, int source, int frag_count) { - ompi_proc_t *source_proc = ompi_comm_peer_lookup (module->comm, source); - ompi_osc_pt2pt_peer_t *peer = module->peers + source; + OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output, + "osc pt2pt: process_complete got complete message from %d. expected fragment count %d. " + "current signal count %d. current incomming count: %d. expected complete msgs: %d", + source, frag_count, module->active_incoming_frag_signal_count, + module->active_incoming_frag_count, module->num_complete_msgs)); - OPAL_THREAD_LOCK(&module->lock); + /* the current fragment is not part of the frag_count so we need to add it here */ + OPAL_THREAD_ADD32((int32_t *) &module->active_incoming_frag_signal_count, frag_count); - /* verify that this proc is part of the current start group */ - if (!module->sc_group || !group_contains_proc (module->sc_group, source_proc)) { - ompi_osc_pt2pt_pending_post_t *pending_post = OBJ_NEW(ompi_osc_pt2pt_pending_post_t); + if (0 == OPAL_THREAD_ADD32((int32_t *) &module->num_complete_msgs, 1)) { + opal_condition_broadcast (&module->cond); + } +} - OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output, - "received unexpected post message from %d. module->sc_group = %p, size = %d", - source, (void*)module->sc_group, module->sc_group ? ompi_group_size (module->sc_group) : 0)); +void osc_pt2pt_incoming_post (ompi_osc_pt2pt_module_t *module, int source) +{ + ompi_osc_pt2pt_sync_t *sync = &module->all_sync; - pending_post->rank = source; + OPAL_THREAD_LOCK(&sync->lock); - opal_list_append (&module->pending_posts, &pending_post->super); + /* verify that this proc is part of the current start group */ + if (!ompi_osc_pt2pt_sync_pscw_peer (module, source, NULL)) { + ompi_osc_pt2pt_peer_t *peer = ompi_osc_pt2pt_peer_lookup (module, source); - OPAL_THREAD_UNLOCK(&module->lock); - return OMPI_SUCCESS; - } + OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output, + "received unexpected post message from %d for future PSCW synchronization", + source)); - assert (!peer->eager_send_active); - peer->eager_send_active = true; + ompi_osc_pt2pt_peer_set_unex (peer, true); + OPAL_THREAD_UNLOCK(&sync->lock); + } else { + OPAL_THREAD_UNLOCK(&sync->lock); - module->num_post_msgs++; - OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output, - "received post message. num_post_msgs = %d", module->num_post_msgs)); + ompi_osc_pt2pt_sync_expected (sync); - if (0 == module->num_post_msgs) { - module->active_eager_send_active = true; + OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output, + "received post message for PSCW synchronization. post messages still needed: %d", + sync->sync_expected)); } - opal_condition_broadcast (&module->cond); - OPAL_THREAD_UNLOCK(&module->lock); - - return OMPI_SUCCESS; } diff --git a/ompi/mca/osc/pt2pt/osc_pt2pt_comm.c b/ompi/mca/osc/pt2pt/osc_pt2pt_comm.c index 1ba8e287a50..3973cf88d9b 100644 --- a/ompi/mca/osc/pt2pt/osc_pt2pt_comm.c +++ b/ompi/mca/osc/pt2pt/osc_pt2pt_comm.c @@ -8,12 +8,14 @@ * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. - * Copyright (c) 2007-2014 Los Alamos National Security, LLC. All rights + * Copyright (c) 2007-2016 Los Alamos National Security, LLC. All rights * reserved. * Copyright (c) 2010 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2012-2013 Sandia National Laboratories. All rights reserved. * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. + * Copyright (c) 2016 FUJITSU LIMITED. All rights reserved. + * Copyright (c) 2016 IBM Corporation. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -21,12 +23,6 @@ * $HEADER$ */ -#include "ompi_config.h" -#include "mpi.h" - -#include -#include - #include "osc_pt2pt.h" #include "osc_pt2pt_request.h" #include "osc_pt2pt_header.h" @@ -35,32 +31,62 @@ #include "opal_stdint.h" #include "ompi/memchecker.h" -#include "ompi/mca/pml/pml.h" #include "ompi/mca/osc/base/osc_base_obj_convert.h" -#include "ompi/mca/osc/base/base.h" + +#include /* progress an OSC request */ +static int ompi_osc_pt2pt_comm_complete (ompi_request_t *request) +{ + ompi_osc_pt2pt_module_t *module = + (ompi_osc_pt2pt_module_t*) request->req_complete_cb_data; + + OPAL_OUTPUT_VERBOSE((10, ompi_osc_base_framework.framework_output, + "isend_completion_cb called")); + + mark_outgoing_completion(module); + + ompi_request_free (&request); + + return 1; +} + static int ompi_osc_pt2pt_req_comm_complete (ompi_request_t *request) { ompi_osc_pt2pt_request_t *pt2pt_request = (ompi_osc_pt2pt_request_t *) request->req_complete_cb_data; - ompi_osc_pt2pt_module_t *module = pt2pt_request->module; OPAL_OUTPUT_VERBOSE((10, ompi_osc_base_framework.framework_output, "ompi_osc_pt2pt_req_comm_complete called tag = %d", request->req_status.MPI_TAG)); - mark_outgoing_completion (module); + /* update the cbdata for ompi_osc_pt2pt_comm_complete */ + request->req_complete_cb_data = pt2pt_request->module; if (0 == OPAL_THREAD_ADD32(&pt2pt_request->outstanding_requests, -1)) { ompi_osc_pt2pt_request_complete (pt2pt_request, request->req_status.MPI_ERROR); } - /* put this request on the garbage colletion list */ - osc_pt2pt_gc_add_request (module, request); + return ompi_osc_pt2pt_comm_complete (request); +} - return OMPI_SUCCESS; +static inline int ompi_osc_pt2pt_data_isend (ompi_osc_pt2pt_module_t *module, const void *buf, + size_t count, ompi_datatype_t *datatype, int dest, + int tag, ompi_osc_pt2pt_request_t *request) +{ + /* increment the outgoing send count */ + ompi_osc_signal_outgoing (module, dest, 1); + + if (NULL != request) { + ++request->outstanding_requests; + return ompi_osc_pt2pt_isend_w_cb (buf, count, datatype, dest, tag, module->comm, + ompi_osc_pt2pt_req_comm_complete, request); + } + + return ompi_osc_pt2pt_isend_w_cb (buf, count, datatype, dest, tag, module->comm, + ompi_osc_pt2pt_comm_complete, module); } + static int ompi_osc_pt2pt_dt_send_complete (ompi_request_t *request) { ompi_datatype_t *datatype = (ompi_datatype_t *) request->req_complete_cb_data; @@ -69,41 +95,31 @@ static int ompi_osc_pt2pt_dt_send_complete (ompi_request_t *request) OBJ_RELEASE(datatype); OPAL_THREAD_LOCK(&mca_osc_pt2pt_component.lock); - opal_hash_table_get_value_uint32(&mca_osc_pt2pt_component.modules, - ompi_comm_get_cid(request->req_mpi_object.comm), - (void **) &module); + (void) opal_hash_table_get_value_uint32(&mca_osc_pt2pt_component.modules, + ompi_comm_get_cid(request->req_mpi_object.comm), + (void **) &module); OPAL_THREAD_UNLOCK(&mca_osc_pt2pt_component.lock); assert (NULL != module); - /* put this request on the garbage colletion list */ - osc_pt2pt_gc_add_request (module, request); + ompi_request_free (&request); - return OMPI_SUCCESS; + return 1; } /* self communication optimizations */ -static inline int ompi_osc_pt2pt_put_self (void *source, int source_count, ompi_datatype_t *source_datatype, - OPAL_PTRDIFF_TYPE target_disp, int target_count, ompi_datatype_t *target_datatype, - ompi_osc_pt2pt_module_t *module, ompi_osc_pt2pt_request_t *request) +static inline int ompi_osc_pt2pt_put_self (ompi_osc_pt2pt_sync_t *pt2pt_sync, const void *source, int source_count, + ompi_datatype_t *source_datatype, OPAL_PTRDIFF_TYPE target_disp, int target_count, + ompi_datatype_t *target_datatype, ompi_osc_pt2pt_module_t *module, + ompi_osc_pt2pt_request_t *request) { void *target = (unsigned char*) module->baseptr + ((unsigned long) target_disp * module->disp_unit); int ret; /* if we are in active target mode wait until all post messages arrive */ - if (module->sc_group && !module->active_eager_send_active) { - OPAL_THREAD_LOCK(&module->lock); - while (0 != module->num_post_msgs) { - opal_condition_wait(&module->cond, &module->lock); - } - OPAL_THREAD_UNLOCK(&module->lock); - } - - if (!(module->passive_target_access_epoch || module->active_eager_send_active)) { - return OMPI_ERR_RMA_SYNC; - } + ompi_osc_pt2pt_sync_wait_expected (pt2pt_sync); - ret = ompi_datatype_sndrcv (source, source_count, source_datatype, + ret = ompi_datatype_sndrcv ((void *)source, source_count, source_datatype, target, target_count, target_datatype); if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) { return ret; @@ -116,26 +132,16 @@ static inline int ompi_osc_pt2pt_put_self (void *source, int source_count, ompi_ return OMPI_SUCCESS; } -static inline int ompi_osc_pt2pt_get_self (void *target, int target_count, ompi_datatype_t *target_datatype, - OPAL_PTRDIFF_TYPE source_disp, int source_count, ompi_datatype_t *source_datatype, - ompi_osc_pt2pt_module_t *module, ompi_osc_pt2pt_request_t *request) +static inline int ompi_osc_pt2pt_get_self (ompi_osc_pt2pt_sync_t *pt2pt_sync, void *target, int target_count, ompi_datatype_t *target_datatype, + OPAL_PTRDIFF_TYPE source_disp, int source_count, ompi_datatype_t *source_datatype, + ompi_osc_pt2pt_module_t *module, ompi_osc_pt2pt_request_t *request) { void *source = (unsigned char*) module->baseptr + ((unsigned long) source_disp * module->disp_unit); int ret; /* if we are in active target mode wait until all post messages arrive */ - if (module->sc_group && !module->active_eager_send_active) { - OPAL_THREAD_LOCK(&module->lock); - while (0 != module->num_post_msgs) { - opal_condition_wait(&module->cond, &module->lock); - } - OPAL_THREAD_UNLOCK(&module->lock); - } - - if (!(module->passive_target_access_epoch || module->active_eager_send_active)) { - return OMPI_ERR_RMA_SYNC; - } + ompi_osc_pt2pt_sync_wait_expected (pt2pt_sync); ret = ompi_datatype_sndrcv (source, source_count, source_datatype, target, target_count, target_datatype); @@ -150,24 +156,14 @@ static inline int ompi_osc_pt2pt_get_self (void *target, int target_count, ompi_ return OMPI_SUCCESS; } -static inline int ompi_osc_pt2pt_cas_self (void *source, void *compare, void *result, ompi_datatype_t *datatype, - OPAL_PTRDIFF_TYPE target_disp, ompi_osc_pt2pt_module_t *module) +static inline int ompi_osc_pt2pt_cas_self (ompi_osc_pt2pt_sync_t *pt2pt_sync, const void *source, const void *compare, void *result, + ompi_datatype_t *datatype, OPAL_PTRDIFF_TYPE target_disp, ompi_osc_pt2pt_module_t *module) { void *target = (unsigned char*) module->baseptr + ((unsigned long) target_disp * module->disp_unit); /* if we are in active target mode wait until all post messages arrive */ - if (module->sc_group && !module->active_eager_send_active) { - OPAL_THREAD_LOCK(&module->lock); - while (0 != module->num_post_msgs) { - opal_condition_wait(&module->cond, &module->lock); - } - OPAL_THREAD_UNLOCK(&module->lock); - } - - if (!(module->passive_target_access_epoch || module->active_eager_send_active)) { - return OMPI_ERR_RMA_SYNC; - } + ompi_osc_pt2pt_sync_wait_expected (pt2pt_sync); ompi_osc_pt2pt_accumulate_lock (module); @@ -182,33 +178,23 @@ static inline int ompi_osc_pt2pt_cas_self (void *source, void *compare, void *re return OMPI_SUCCESS; } -static inline int ompi_osc_pt2pt_acc_self (void *source, int source_count, ompi_datatype_t *source_datatype, - OPAL_PTRDIFF_TYPE target_disp, int target_count, ompi_datatype_t *target_datatype, - ompi_op_t *op, ompi_osc_pt2pt_module_t *module, ompi_osc_pt2pt_request_t *request) +static inline int ompi_osc_pt2pt_acc_self (ompi_osc_pt2pt_sync_t *pt2pt_sync, const void *source, int source_count, ompi_datatype_t *source_datatype, + OPAL_PTRDIFF_TYPE target_disp, int target_count, ompi_datatype_t *target_datatype, + ompi_op_t *op, ompi_osc_pt2pt_module_t *module, ompi_osc_pt2pt_request_t *request) { void *target = (unsigned char*) module->baseptr + ((unsigned long) target_disp * module->disp_unit); int ret; /* if we are in active target mode wait until all post messages arrive */ - if (module->sc_group && !module->active_eager_send_active) { - OPAL_THREAD_LOCK(&module->lock); - while (0 != module->num_post_msgs) { - opal_condition_wait(&module->cond, &module->lock); - } - OPAL_THREAD_UNLOCK(&module->lock); - } - - if (!(module->passive_target_access_epoch || module->active_eager_send_active)) { - return OMPI_ERR_RMA_SYNC; - } + ompi_osc_pt2pt_sync_wait_expected (pt2pt_sync); ompi_osc_pt2pt_accumulate_lock (module); if (&ompi_mpi_op_replace.op != op) { ret = ompi_osc_base_sndrcv_op (source, source_count, source_datatype, target, target_count, target_datatype, op); } else { - ret = ompi_datatype_sndrcv (source, source_count, source_datatype, target, target_count, target_datatype); + ret = ompi_datatype_sndrcv ((void *)source, source_count, source_datatype, target, target_count, target_datatype); } ompi_osc_pt2pt_accumulate_unlock (module); @@ -226,27 +212,17 @@ static inline int ompi_osc_pt2pt_acc_self (void *source, int source_count, ompi_ return OMPI_SUCCESS; } -static inline int ompi_osc_pt2pt_gacc_self (void *source, int source_count, ompi_datatype_t *source_datatype, - void *result, int result_count, ompi_datatype_t *result_datatype, - OPAL_PTRDIFF_TYPE target_disp, int target_count, ompi_datatype_t *target_datatype, - ompi_op_t *op, ompi_osc_pt2pt_module_t *module, ompi_osc_pt2pt_request_t *request) +static inline int ompi_osc_pt2pt_gacc_self (ompi_osc_pt2pt_sync_t *pt2pt_sync, const void *source, int source_count, ompi_datatype_t *source_datatype, + void *result, int result_count, ompi_datatype_t *result_datatype, + OPAL_PTRDIFF_TYPE target_disp, int target_count, ompi_datatype_t *target_datatype, + ompi_op_t *op, ompi_osc_pt2pt_module_t *module, ompi_osc_pt2pt_request_t *request) { void *target = (unsigned char*) module->baseptr + ((unsigned long) target_disp * module->disp_unit); int ret; - /* if we are in active target mode wait until all post messages arrive */ - if (module->sc_group && !module->active_eager_send_active) { - OPAL_THREAD_LOCK(&module->lock); - while (0 != module->num_post_msgs) { - opal_condition_wait(&module->cond, &module->lock); - } - OPAL_THREAD_UNLOCK(&module->lock); - } - - if (!(module->passive_target_access_epoch || module->active_eager_send_active)) { - return OMPI_ERR_RMA_SYNC; - } + OPAL_OUTPUT_VERBOSE((MCA_BASE_VERBOSE_TRACE, ompi_osc_base_framework.framework_output, "ompi_osc_pt2pt_gacc_self: starting local " + "get accumulate")); ompi_osc_pt2pt_accumulate_lock (module); @@ -264,7 +240,7 @@ static inline int ompi_osc_pt2pt_gacc_self (void *source, int source_count, ompi if (&ompi_mpi_op_replace.op != op) { ret = ompi_osc_base_sndrcv_op (source, source_count, source_datatype, target, target_count, target_datatype, op); } else { - ret = ompi_datatype_sndrcv (source, source_count, source_datatype, target, target_count, target_datatype); + ret = ompi_datatype_sndrcv ((void *)source, source_count, source_datatype, target, target_count, target_datatype); } } @@ -277,6 +253,9 @@ static inline int ompi_osc_pt2pt_gacc_self (void *source, int source_count, ompi ompi_osc_pt2pt_accumulate_unlock (module); + OPAL_OUTPUT_VERBOSE((MCA_BASE_VERBOSE_TRACE, ompi_osc_base_framework.framework_output, "ompi_osc_pt2pt_gacc_self: local get " + "accumulate complete")); + if (request) { /* NTH: is it ok to use an ompi error code here? */ ompi_osc_pt2pt_request_complete (request, ret); @@ -286,7 +265,7 @@ static inline int ompi_osc_pt2pt_gacc_self (void *source, int source_count, ompi } /* end: self communication optimizations */ -static inline int ompi_osc_pt2pt_put_w_req (void *origin_addr, int origin_count, +static inline int ompi_osc_pt2pt_put_w_req (const void *origin_addr, int origin_count, struct ompi_datatype_t *origin_dt, int target, OPAL_PTRDIFF_TYPE target_disp, int target_count, struct ompi_datatype_t *target_dt, @@ -296,6 +275,7 @@ static inline int ompi_osc_pt2pt_put_w_req (void *origin_addr, int origin_count, ompi_proc_t *proc = ompi_comm_peer_lookup(module->comm, target); ompi_osc_pt2pt_frag_t *frag; ompi_osc_pt2pt_header_put_t *header; + ompi_osc_pt2pt_sync_t *pt2pt_sync; size_t ddt_len, payload_len, frag_len; bool is_long_datatype = false; bool is_long_msg = false; @@ -309,7 +289,8 @@ static inline int ompi_osc_pt2pt_put_w_req (void *origin_addr, int origin_count, origin_dt->name, target, (int) target_disp, target_count, target_dt->name, win->w_name)); - if (!ompi_osc_pt2pt_check_access_epoch (module, target)) { + pt2pt_sync = ompi_osc_pt2pt_module_sync_lookup (module, target, NULL); + if (OPAL_UNLIKELY(NULL == pt2pt_sync)) { return OMPI_ERR_RMA_SYNC; } @@ -324,9 +305,9 @@ static inline int ompi_osc_pt2pt_put_w_req (void *origin_addr, int origin_count, /* optimize self communication. TODO: optimize local communication */ if (ompi_comm_rank (module->comm) == target) { - return ompi_osc_pt2pt_put_self (origin_addr, origin_count, origin_dt, - target_disp, target_count, target_dt, - module, request); + return ompi_osc_pt2pt_put_self (pt2pt_sync, origin_addr, origin_count, origin_dt, + target_disp, target_count, target_dt, + module, request); } /* Compute datatype and payload lengths. Note that the datatype description @@ -335,14 +316,14 @@ static inline int ompi_osc_pt2pt_put_w_req (void *origin_addr, int origin_count, payload_len = origin_dt->super.size * origin_count; frag_len = sizeof(ompi_osc_pt2pt_header_put_t) + ddt_len + payload_len; - ret = ompi_osc_pt2pt_frag_alloc(module, target, frag_len, &frag, &ptr); + ret = ompi_osc_pt2pt_frag_alloc(module, target, frag_len, &frag, &ptr, false, true); if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) { frag_len = sizeof(ompi_osc_pt2pt_header_put_t) + ddt_len; - ret = ompi_osc_pt2pt_frag_alloc(module, target, frag_len, &frag, &ptr); + ret = ompi_osc_pt2pt_frag_alloc(module, target, frag_len, &frag, &ptr, true, false); if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) { /* allocate space for the header plus space to store ddt_len */ frag_len = sizeof(ompi_osc_pt2pt_header_put_t) + 8; - ret = ompi_osc_pt2pt_frag_alloc(module, target, frag_len, &frag, &ptr); + ret = ompi_osc_pt2pt_frag_alloc(module, target, frag_len, &frag, &ptr, true, false); if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) { return OMPI_ERR_OUT_OF_RESOURCE; } @@ -354,16 +335,18 @@ static inline int ompi_osc_pt2pt_put_w_req (void *origin_addr, int origin_count, tag = get_tag(module); } - /* flush will be called at the end of this function. make sure the post message has - * arrived. */ - if ((is_long_msg || request) && module->sc_group) { - OPAL_THREAD_LOCK(&module->lock); - while (0 != module->num_post_msgs) { - OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output, - "waiting for post messages. num_post_msgs = %d", module->num_post_msgs)); - opal_condition_wait(&module->cond, &module->lock); + if (is_long_msg) { + /* wait for eager sends to be active before starting a long put */ + if (pt2pt_sync->type == OMPI_OSC_PT2PT_SYNC_TYPE_LOCK) { + OPAL_THREAD_LOCK(&pt2pt_sync->lock); + ompi_osc_pt2pt_peer_t *peer = ompi_osc_pt2pt_peer_lookup (module, target); + while (!(peer->flags & OMPI_OSC_PT2PT_PEER_FLAG_EAGER)) { + opal_condition_wait(&pt2pt_sync->cond, &pt2pt_sync->lock); + } + OPAL_THREAD_UNLOCK(&pt2pt_sync->lock); + } else { + ompi_osc_pt2pt_sync_wait_expected (pt2pt_sync); } - OPAL_THREAD_UNLOCK(&module->lock); } OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output, @@ -389,9 +372,9 @@ static inline int ompi_osc_pt2pt_put_w_req (void *origin_addr, int origin_count, OBJ_RETAIN(target_dt); - ret = ompi_osc_pt2pt_isend_w_cb ((void *) packed_ddt, ddt_len, MPI_BYTE, target, - tag, module->comm, ompi_osc_pt2pt_dt_send_complete, - target_dt); + ret = ompi_osc_pt2pt_isend_w_cb ((void *) packed_ddt, ddt_len, MPI_BYTE, + target, tag_to_target(tag), module->comm, + ompi_osc_pt2pt_dt_send_complete, target_dt); if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) { break; } @@ -420,18 +403,8 @@ static inline int ompi_osc_pt2pt_put_w_req (void *origin_addr, int origin_count, header->tag = tag; osc_pt2pt_hton(header, proc); - /* increase the outgoing signal count */ - ompi_osc_signal_outgoing (module, target, 1); - - if (request) { - request->outstanding_requests = 1; - ret = ompi_osc_pt2pt_isend_w_cb (origin_addr, origin_count, origin_dt, - target, tag, module->comm, ompi_osc_pt2pt_req_comm_complete, - request); - } else { - ret = ompi_osc_pt2pt_component_isend (module,origin_addr, origin_count, origin_dt, target, tag, - module->comm); - } + ret = ompi_osc_pt2pt_data_isend (module,origin_addr, origin_count, origin_dt, + target, tag_to_target(tag), request); } } while (0); @@ -439,18 +412,11 @@ static inline int ompi_osc_pt2pt_put_w_req (void *origin_addr, int origin_count, header->base.flags |= OMPI_OSC_PT2PT_HDR_FLAG_VALID; } - ret = ompi_osc_pt2pt_frag_finish(module, frag); - - if (request || is_long_msg) { - /* need to flush now in case the caller decides to wait on the request */ - ompi_osc_pt2pt_frag_flush_target (module, target); - } - - return ret; + return ompi_osc_pt2pt_frag_finish(module, frag); } int -ompi_osc_pt2pt_put(void *origin_addr, int origin_count, +ompi_osc_pt2pt_put(const void *origin_addr, int origin_count, struct ompi_datatype_t *origin_dt, int target, OPAL_PTRDIFF_TYPE target_disp, int target_count, @@ -463,7 +429,7 @@ ompi_osc_pt2pt_put(void *origin_addr, int origin_count, static int -ompi_osc_pt2pt_accumulate_w_req (void *origin_addr, int origin_count, +ompi_osc_pt2pt_accumulate_w_req (const void *origin_addr, int origin_count, struct ompi_datatype_t *origin_dt, int target, OPAL_PTRDIFF_TYPE target_disp, int target_count, @@ -478,6 +444,7 @@ ompi_osc_pt2pt_accumulate_w_req (void *origin_addr, int origin_count, bool is_long_msg = false; ompi_osc_pt2pt_frag_t *frag; ompi_osc_pt2pt_header_acc_t *header; + ompi_osc_pt2pt_sync_t *pt2pt_sync; size_t ddt_len, payload_len, frag_len; char *ptr; const void *packed_ddt; @@ -490,7 +457,8 @@ ompi_osc_pt2pt_accumulate_w_req (void *origin_addr, int origin_count, target_count, target_dt->name, op->o_name, win->w_name)); - if (!ompi_osc_pt2pt_check_access_epoch (module, target)) { + pt2pt_sync = ompi_osc_pt2pt_module_sync_lookup (module, target, NULL); + if (OPAL_UNLIKELY(NULL == pt2pt_sync)) { return OMPI_ERR_RMA_SYNC; } @@ -505,9 +473,9 @@ ompi_osc_pt2pt_accumulate_w_req (void *origin_addr, int origin_count, /* optimize the self case. TODO: optimize the local case */ if (ompi_comm_rank (module->comm) == target) { - return ompi_osc_pt2pt_acc_self (origin_addr, origin_count, origin_dt, - target_disp, target_count, target_dt, - op, module, request); + return ompi_osc_pt2pt_acc_self (pt2pt_sync, origin_addr, origin_count, origin_dt, + target_disp, target_count, target_dt, + op, module, request); } /* Compute datatype and payload lengths. Note that the datatype description @@ -516,14 +484,14 @@ ompi_osc_pt2pt_accumulate_w_req (void *origin_addr, int origin_count, payload_len = origin_dt->super.size * origin_count; frag_len = sizeof(*header) + ddt_len + payload_len; - ret = ompi_osc_pt2pt_frag_alloc(module, target, frag_len, &frag, &ptr); + ret = ompi_osc_pt2pt_frag_alloc(module, target, frag_len, &frag, &ptr, false, true); if (OMPI_SUCCESS != ret) { frag_len = sizeof(*header) + ddt_len; - ret = ompi_osc_pt2pt_frag_alloc(module, target, frag_len, &frag, &ptr); + ret = ompi_osc_pt2pt_frag_alloc(module, target, frag_len, &frag, &ptr, true, !request); if (OMPI_SUCCESS != ret) { /* allocate space for the header plus space to store ddt_len */ frag_len = sizeof(*header) + 8; - ret = ompi_osc_pt2pt_frag_alloc(module, target, frag_len, &frag, &ptr); + ret = ompi_osc_pt2pt_frag_alloc(module, target, frag_len, &frag, &ptr, true, !request); if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) { return OMPI_ERR_OUT_OF_RESOURCE; } @@ -535,16 +503,18 @@ ompi_osc_pt2pt_accumulate_w_req (void *origin_addr, int origin_count, tag = get_tag (module); } - /* flush will be called at the end of this function. make sure the post message has - * arrived. */ - if ((is_long_msg || request) && module->sc_group) { - OPAL_THREAD_LOCK(&module->lock); - while (0 != module->num_post_msgs) { - OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output, - "waiting for post messages. num_post_msgs = %d", module->num_post_msgs)); - opal_condition_wait(&module->cond, &module->lock); + if (is_long_msg) { + /* wait for synchronization before posting a long message */ + if (pt2pt_sync->type == OMPI_OSC_PT2PT_SYNC_TYPE_LOCK) { + OPAL_THREAD_LOCK(&pt2pt_sync->lock); + ompi_osc_pt2pt_peer_t *peer = ompi_osc_pt2pt_peer_lookup (module, target); + while (!(peer->flags & OMPI_OSC_PT2PT_PEER_FLAG_EAGER)) { + opal_condition_wait(&pt2pt_sync->cond, &pt2pt_sync->lock); + } + OPAL_THREAD_UNLOCK(&pt2pt_sync->lock); + } else { + ompi_osc_pt2pt_sync_wait_expected (pt2pt_sync); } - OPAL_THREAD_UNLOCK(&module->lock); } header = (ompi_osc_pt2pt_header_acc_t*) ptr; @@ -567,9 +537,9 @@ ompi_osc_pt2pt_accumulate_w_req (void *origin_addr, int origin_count, OBJ_RETAIN(target_dt); - ret = ompi_osc_pt2pt_isend_w_cb ((void *) packed_ddt, ddt_len, MPI_BYTE, target, - tag, module->comm, ompi_osc_pt2pt_dt_send_complete, - target_dt); + ret = ompi_osc_pt2pt_isend_w_cb ((void *) packed_ddt, ddt_len, MPI_BYTE, + target, tag_to_target(tag), module->comm, + ompi_osc_pt2pt_dt_send_complete, target_dt); if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) { break; } @@ -601,18 +571,8 @@ ompi_osc_pt2pt_accumulate_w_req (void *origin_addr, int origin_count, OPAL_OUTPUT_VERBOSE((25, ompi_osc_base_framework.framework_output, "acc: starting long accumulate with tag %d", tag)); - /* increment the outgoing send count */ - ompi_osc_signal_outgoing (module, target, 1); - - if (request) { - request->outstanding_requests = 1; - ret = ompi_osc_pt2pt_isend_w_cb (origin_addr, origin_count, origin_dt, - target, tag, module->comm, ompi_osc_pt2pt_req_comm_complete, - request); - } else { - ret = ompi_osc_pt2pt_component_isend (module, origin_addr, origin_count, origin_dt, target, tag, - module->comm); - } + ret = ompi_osc_pt2pt_data_isend (module, origin_addr, origin_count, origin_dt, + target, tag_to_target(tag), request); } } while (0); @@ -624,18 +584,11 @@ ompi_osc_pt2pt_accumulate_w_req (void *origin_addr, int origin_count, header->base.flags |= OMPI_OSC_PT2PT_HDR_FLAG_VALID; } - ret = ompi_osc_pt2pt_frag_finish(module, frag); - - if (is_long_msg || request) { - /* need to flush now in case the caller decides to wait on the request */ - ompi_osc_pt2pt_frag_flush_target (module, target); - } - - return ret; + return ompi_osc_pt2pt_frag_finish(module, frag); } int -ompi_osc_pt2pt_accumulate(void *origin_addr, int origin_count, +ompi_osc_pt2pt_accumulate(const void *origin_addr, int origin_count, struct ompi_datatype_t *origin_dt, int target, OPAL_PTRDIFF_TYPE target_disp, int target_count, @@ -647,7 +600,7 @@ ompi_osc_pt2pt_accumulate(void *origin_addr, int origin_count, target_dt, op, win, NULL); } -int ompi_osc_pt2pt_compare_and_swap (void *origin_addr, void *compare_addr, +int ompi_osc_pt2pt_compare_and_swap (const void *origin_addr, const void *compare_addr, void *result_addr, struct ompi_datatype_t *dt, int target, OPAL_PTRDIFF_TYPE target_disp, struct ompi_win_t *win) @@ -656,6 +609,7 @@ int ompi_osc_pt2pt_compare_and_swap (void *origin_addr, void *compare_addr, ompi_proc_t *proc = ompi_comm_peer_lookup(module->comm, target); ompi_osc_pt2pt_frag_t *frag; ompi_osc_pt2pt_header_cswap_t *header; + ompi_osc_pt2pt_sync_t *pt2pt_sync; size_t ddt_len, payload_len, frag_len; ompi_osc_pt2pt_request_t *request; const void *packed_ddt; @@ -668,21 +622,19 @@ int ompi_osc_pt2pt_compare_and_swap (void *origin_addr, void *compare_addr, (unsigned long) result_addr, dt->name, target, (int) target_disp, win->w_name)); - if (!ompi_osc_pt2pt_check_access_epoch (module, target)) { + pt2pt_sync = ompi_osc_pt2pt_module_sync_lookup (module, target, NULL); + if (OPAL_UNLIKELY(NULL == pt2pt_sync)) { return OMPI_ERR_RMA_SYNC; } /* optimize self case. TODO: optimize local case */ if (ompi_comm_rank (module->comm) == target) { - return ompi_osc_pt2pt_cas_self (origin_addr, compare_addr, result_addr, dt, target_disp, - module); + return ompi_osc_pt2pt_cas_self (pt2pt_sync, origin_addr, compare_addr, result_addr, dt, target_disp, + module); } /* compare-and-swaps are always request based, so that we know where to land the data */ OMPI_OSC_PT2PT_REQUEST_ALLOC(win, request); - if (NULL == request) { - return OMPI_ERR_OUT_OF_RESOURCE; - } request->type = OMPI_OSC_PT2PT_HDR_TYPE_CSWAP; request->origin_addr = origin_addr; @@ -697,8 +649,13 @@ int ompi_osc_pt2pt_compare_and_swap (void *origin_addr, void *compare_addr, /* we need to send both the origin and compare buffers */ payload_len = dt->super.size * 2; + ret = ompi_datatype_get_pack_description(dt, &packed_ddt); + if (OMPI_SUCCESS != ret) { + return ret; + } + frag_len = sizeof(ompi_osc_pt2pt_header_cswap_t) + ddt_len + payload_len; - ret = ompi_osc_pt2pt_frag_alloc(module, target, frag_len, &frag, &ptr); + ret = ompi_osc_pt2pt_frag_alloc(module, target, frag_len, &frag, &ptr, false, false); if (OMPI_SUCCESS != ret) { return OMPI_ERR_OUT_OF_RESOURCE; } @@ -715,7 +672,6 @@ int ompi_osc_pt2pt_compare_and_swap (void *origin_addr, void *compare_addr, osc_pt2pt_hton(header, proc); ptr += sizeof(ompi_osc_pt2pt_header_cswap_t); - ret = ompi_datatype_get_pack_description(dt, &packed_ddt); memcpy((unsigned char*) ptr, packed_ddt, ddt_len); ptr += ddt_len; @@ -725,19 +681,18 @@ int ompi_osc_pt2pt_compare_and_swap (void *origin_addr, void *compare_addr, osc_pt2pt_copy_for_send (ptr, dt->super.size, compare_addr, proc, 1, dt); request->outstanding_requests = 1; - ret = ompi_osc_pt2pt_irecv_w_cb (result_addr, 1, dt, target, tag, module->comm, + ret = ompi_osc_pt2pt_irecv_w_cb (result_addr, 1, dt, + target, tag_to_origin(tag), module->comm, NULL, ompi_osc_pt2pt_req_comm_complete, request); if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) { return ret; } - ret = ompi_osc_pt2pt_frag_finish(module, frag); - - return ret; + return ompi_osc_pt2pt_frag_finish (module, frag); } -int ompi_osc_pt2pt_fetch_and_op(void *origin_addr, void *result_addr, +int ompi_osc_pt2pt_fetch_and_op(const void *origin_addr, void *result_addr, struct ompi_datatype_t *dt, int target, OPAL_PTRDIFF_TYPE target_disp, struct ompi_op_t *op, struct ompi_win_t *win) @@ -746,7 +701,7 @@ int ompi_osc_pt2pt_fetch_and_op(void *origin_addr, void *result_addr, target, target_disp, 1, dt, op, win); } -int ompi_osc_pt2pt_rput(void *origin_addr, int origin_count, +int ompi_osc_pt2pt_rput(const void *origin_addr, int origin_count, struct ompi_datatype_t *origin_dt, int target, OPAL_PTRDIFF_TYPE target_disp, int target_count, struct ompi_datatype_t *target_dt, @@ -762,9 +717,6 @@ int ompi_osc_pt2pt_rput(void *origin_addr, int origin_count, target_count, target_dt->name, win->w_name)); OMPI_OSC_PT2PT_REQUEST_ALLOC(win, pt2pt_request); - if (NULL == pt2pt_request) { - return OMPI_ERR_OUT_OF_RESOURCE; - } /* short-circuit case */ if (0 == origin_count || 0 == target_count) { @@ -802,6 +754,7 @@ static inline int ompi_osc_pt2pt_rget_internal (void *origin_addr, int origin_co bool is_long_datatype = false; ompi_osc_pt2pt_frag_t *frag; ompi_osc_pt2pt_header_get_t *header; + ompi_osc_pt2pt_sync_t *pt2pt_sync; size_t ddt_len, frag_len; char *ptr; const void *packed_ddt; @@ -813,15 +766,13 @@ static inline int ompi_osc_pt2pt_rget_internal (void *origin_addr, int origin_co origin_dt->name, target, (int) target_disp, target_count, target_dt->name, win->w_name)); - if (!ompi_osc_pt2pt_check_access_epoch (module, target)) { + pt2pt_sync = ompi_osc_pt2pt_module_sync_lookup (module, target, NULL); + if (OPAL_UNLIKELY(NULL == pt2pt_sync)) { return OMPI_ERR_RMA_SYNC; } /* gets are always request based, so that we know where to land the data */ OMPI_OSC_PT2PT_REQUEST_ALLOC(win, pt2pt_request); - if (NULL == pt2pt_request) { - return OMPI_ERR_OUT_OF_RESOURCE; - } pt2pt_request->internal = release_req; @@ -835,9 +786,9 @@ static inline int ompi_osc_pt2pt_rget_internal (void *origin_addr, int origin_co /* optimize self communication. TODO: optimize local communication */ if (ompi_comm_rank (module->comm) == target) { *request = &pt2pt_request->super; - return ompi_osc_pt2pt_get_self (origin_addr, origin_count, origin_dt, - target_disp, target_count, target_dt, - module, pt2pt_request); + return ompi_osc_pt2pt_get_self (pt2pt_sync, origin_addr, origin_count, origin_dt, + target_disp, target_count, target_dt, + module, pt2pt_request); } pt2pt_request->type = OMPI_OSC_PT2PT_HDR_TYPE_GET; @@ -851,11 +802,11 @@ static inline int ompi_osc_pt2pt_rget_internal (void *origin_addr, int origin_co ddt_len = ompi_datatype_pack_description_length(target_dt); frag_len = sizeof(ompi_osc_pt2pt_header_get_t) + ddt_len; - ret = ompi_osc_pt2pt_frag_alloc(module, target, frag_len, &frag, &ptr); + ret = ompi_osc_pt2pt_frag_alloc(module, target, frag_len, &frag, &ptr, false, release_req); if (OMPI_SUCCESS != ret) { /* allocate space for the header plus space to store ddt_len */ - frag_len = sizeof(ompi_osc_pt2pt_header_put_t) + 8; - ret = ompi_osc_pt2pt_frag_alloc(module, target, frag_len, &frag, &ptr); + frag_len = sizeof(ompi_osc_pt2pt_header_get_t) + 8; + ret = ompi_osc_pt2pt_frag_alloc(module, target, frag_len, &frag, &ptr, false, release_req); if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) { return OMPI_ERR_OUT_OF_RESOURCE; } @@ -868,14 +819,9 @@ static inline int ompi_osc_pt2pt_rget_internal (void *origin_addr, int origin_co /* for bookkeeping the get is "outgoing" */ ompi_osc_signal_outgoing (module, target, 1); - /* flush will be called at the end of this function. make sure the post message has - * arrived. */ - if (!release_req && module->sc_group) { - while (0 != module->num_post_msgs) { - OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output, - "waiting for post messages. num_post_msgs = %d", module->num_post_msgs)); - opal_condition_wait(&module->cond, &module->lock); - } + if (!release_req) { + /* wait for epoch to begin before starting rget operation */ + ompi_osc_pt2pt_sync_wait_expected (pt2pt_sync); } header = (ompi_osc_pt2pt_header_get_t*) ptr; @@ -900,9 +846,9 @@ static inline int ompi_osc_pt2pt_rget_internal (void *origin_addr, int origin_co OBJ_RETAIN(target_dt); - ret = ompi_osc_pt2pt_isend_w_cb ((void *) packed_ddt, ddt_len, MPI_BYTE, target, - tag, module->comm, ompi_osc_pt2pt_dt_send_complete, - target_dt); + ret = ompi_osc_pt2pt_isend_w_cb ((void *) packed_ddt, ddt_len, MPI_BYTE, + target, tag_to_target(tag), module->comm, + ompi_osc_pt2pt_dt_send_complete, target_dt); if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) { break; } @@ -916,8 +862,9 @@ static inline int ompi_osc_pt2pt_rget_internal (void *origin_addr, int origin_co /* TODO -- store the request somewhere so we can cancel it on error */ pt2pt_request->outstanding_requests = 1; - ret = ompi_osc_pt2pt_irecv_w_cb (origin_addr, origin_count, origin_dt, target, tag, - module->comm, NULL, ompi_osc_pt2pt_req_comm_complete, pt2pt_request); + ret = ompi_osc_pt2pt_irecv_w_cb (origin_addr, origin_count, origin_dt, + target, tag_to_origin(tag), module->comm, + NULL, ompi_osc_pt2pt_req_comm_complete, pt2pt_request); } while (0); if (OMPI_SUCCESS == ret) { @@ -925,14 +872,7 @@ static inline int ompi_osc_pt2pt_rget_internal (void *origin_addr, int origin_co *request = &pt2pt_request->super; } - ret = ompi_osc_pt2pt_frag_finish(module, frag); - - if (!release_req) { - /* need to flush now in case the caller decides to wait on the request */ - ompi_osc_pt2pt_frag_flush_target (module, target); - } - - return ret; + return ompi_osc_pt2pt_frag_finish(module, frag); } int ompi_osc_pt2pt_rget (void *origin_addr, int origin_count, struct ompi_datatype_t *origin_dt, @@ -956,7 +896,7 @@ int ompi_osc_pt2pt_get (void *origin_addr, int origin_count, struct ompi_datatyp target_count, target_dt, win, true, &request); } -int ompi_osc_pt2pt_raccumulate(void *origin_addr, int origin_count, +int ompi_osc_pt2pt_raccumulate(const void *origin_addr, int origin_count, struct ompi_datatype_t *origin_dt, int target, OPAL_PTRDIFF_TYPE target_disp, int target_count, struct ompi_datatype_t *target_dt, struct ompi_op_t *op, @@ -973,9 +913,6 @@ int ompi_osc_pt2pt_raccumulate(void *origin_addr, int origin_count, win->w_name)); OMPI_OSC_PT2PT_REQUEST_ALLOC(win, pt2pt_request); - if (NULL == pt2pt_request) { - return OMPI_ERR_OUT_OF_RESOURCE; - } /* short-circuit case */ if (0 == origin_count || 0 == target_count) { @@ -1001,7 +938,7 @@ int ompi_osc_pt2pt_raccumulate(void *origin_addr, int origin_count, static inline -int ompi_osc_pt2pt_rget_accumulate_internal (void *origin_addr, int origin_count, +int ompi_osc_pt2pt_rget_accumulate_internal (const void *origin_addr, int origin_count, struct ompi_datatype_t *origin_datatype, void *result_addr, int result_count, struct ompi_datatype_t *result_datatype, @@ -1017,6 +954,7 @@ int ompi_osc_pt2pt_rget_accumulate_internal (void *origin_addr, int origin_count bool is_long_msg = false; ompi_osc_pt2pt_frag_t *frag; ompi_osc_pt2pt_header_acc_t *header; + ompi_osc_pt2pt_sync_t *pt2pt_sync; size_t ddt_len, payload_len, frag_len; char *ptr; const void *packed_ddt; @@ -1030,15 +968,13 @@ int ompi_osc_pt2pt_rget_accumulate_internal (void *origin_addr, int origin_count target_rank, (int) target_disp, target_count, target_datatype->name, op->o_name, win->w_name)); - if (!ompi_osc_pt2pt_check_access_epoch (module, target_rank)) { + pt2pt_sync = ompi_osc_pt2pt_module_sync_lookup (module, target_rank, NULL); + if (OPAL_UNLIKELY(NULL == pt2pt_sync)) { return OMPI_ERR_RMA_SYNC; } /* get_accumulates are always request based, so that we know where to land the data */ OMPI_OSC_PT2PT_REQUEST_ALLOC(win, pt2pt_request); - if (OPAL_UNLIKELY(NULL == pt2pt_request)) { - return OMPI_ERR_OUT_OF_RESOURCE; - } pt2pt_request->internal = release_req; @@ -1049,13 +985,18 @@ int ompi_osc_pt2pt_rget_accumulate_internal (void *origin_addr, int origin_count return OMPI_SUCCESS; } + if (!release_req) { + /* wait for epoch to begin before starting operation */ + ompi_osc_pt2pt_sync_wait_expected (pt2pt_sync); + } + /* optimize the self case. TODO: optimize the local case */ if (ompi_comm_rank (module->comm) == target_rank) { *request = &pt2pt_request->super; - return ompi_osc_pt2pt_gacc_self (origin_addr, origin_count, origin_datatype, - result_addr, result_count, result_datatype, - target_disp, target_count, target_datatype, - op, module, pt2pt_request); + return ompi_osc_pt2pt_gacc_self (pt2pt_sync, origin_addr, origin_count, origin_datatype, + result_addr, result_count, result_datatype, + target_disp, target_count, target_datatype, + op, module, pt2pt_request); } pt2pt_request->type = OMPI_OSC_PT2PT_HDR_TYPE_GET_ACC; @@ -1075,14 +1016,14 @@ int ompi_osc_pt2pt_rget_accumulate_internal (void *origin_addr, int origin_count } frag_len = sizeof(*header) + ddt_len + payload_len; - ret = ompi_osc_pt2pt_frag_alloc(module, target_rank, frag_len, &frag, &ptr); + ret = ompi_osc_pt2pt_frag_alloc(module, target_rank, frag_len, &frag, &ptr, false, release_req); if (OMPI_SUCCESS != ret) { frag_len = sizeof(*header) + ddt_len; - ret = ompi_osc_pt2pt_frag_alloc(module, target_rank, frag_len, &frag, &ptr); + ret = ompi_osc_pt2pt_frag_alloc(module, target_rank, frag_len, &frag, &ptr, true, release_req); if (OMPI_SUCCESS != ret) { /* allocate space for the header plus space to store ddt_len */ frag_len = sizeof(*header) + 8; - ret = ompi_osc_pt2pt_frag_alloc(module, target_rank, frag_len, &frag, &ptr); + ret = ompi_osc_pt2pt_frag_alloc(module, target_rank, frag_len, &frag, &ptr, true, release_req); if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) { return OMPI_ERR_OUT_OF_RESOURCE; } @@ -1102,18 +1043,6 @@ int ompi_osc_pt2pt_rget_accumulate_internal (void *origin_addr, int origin_count /* increment the number of outgoing fragments */ ompi_osc_signal_outgoing (module, target_rank, pt2pt_request->outstanding_requests); - /* flush will be called at the end of this function. make sure the post message has - * arrived. */ - if (!release_req && module->sc_group) { - OPAL_THREAD_LOCK(&module->lock); - while (0 != module->num_post_msgs) { - OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output, - "waiting for post messages. num_post_msgs = %d", module->num_post_msgs)); - opal_condition_wait(&module->cond, &module->lock); - } - OPAL_THREAD_UNLOCK(&module->lock); - } - header = (ompi_osc_pt2pt_header_acc_t *) ptr; header->base.flags = 0; header->len = frag_len; @@ -1136,9 +1065,9 @@ int ompi_osc_pt2pt_rget_accumulate_internal (void *origin_addr, int origin_count OBJ_RETAIN(target_datatype); - ret = ompi_osc_pt2pt_isend_w_cb ((void *) packed_ddt, ddt_len, MPI_BYTE, target_rank, - tag, module->comm, ompi_osc_pt2pt_dt_send_complete, - target_datatype); + ret = ompi_osc_pt2pt_isend_w_cb ((void *) packed_ddt, ddt_len, MPI_BYTE, + target_rank, tag_to_target(tag), module->comm, + ompi_osc_pt2pt_dt_send_complete, target_datatype); if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) { break; } @@ -1150,8 +1079,9 @@ int ompi_osc_pt2pt_rget_accumulate_internal (void *origin_addr, int origin_count ptr += ddt_len; } - ret = ompi_osc_pt2pt_irecv_w_cb (result_addr, result_count, result_datatype, target_rank, tag, - module->comm, NULL, ompi_osc_pt2pt_req_comm_complete, pt2pt_request); + ret = ompi_osc_pt2pt_irecv_w_cb (result_addr, result_count, result_datatype, + target_rank, tag_to_origin(tag), module->comm, + NULL, ompi_osc_pt2pt_req_comm_complete, pt2pt_request); if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) { break; } @@ -1168,8 +1098,9 @@ int ompi_osc_pt2pt_rget_accumulate_internal (void *origin_addr, int origin_count header->base.type = OMPI_OSC_PT2PT_HDR_TYPE_GET_ACC_LONG; osc_pt2pt_hton(header, proc); - ret = ompi_osc_pt2pt_isend_w_cb (origin_addr, origin_count, origin_datatype, target_rank, - tag, module->comm, ompi_osc_pt2pt_req_comm_complete, pt2pt_request); + ret = ompi_osc_pt2pt_isend_w_cb (origin_addr, origin_count, origin_datatype, + target_rank, tag_to_target(tag), module->comm, + ompi_osc_pt2pt_req_comm_complete, pt2pt_request); } } while (0); @@ -1178,17 +1109,10 @@ int ompi_osc_pt2pt_rget_accumulate_internal (void *origin_addr, int origin_count *request = (ompi_request_t *) pt2pt_request; } - ret = ompi_osc_pt2pt_frag_finish(module, frag); - - if (!release_req) { - /* need to flush now in case the caller decides to wait on the request */ - ompi_osc_pt2pt_frag_flush_target (module, target_rank); - } - - return ret; + return ompi_osc_pt2pt_frag_finish(module, frag); } -int ompi_osc_pt2pt_get_accumulate(void *origin_addr, int origin_count, +int ompi_osc_pt2pt_get_accumulate(const void *origin_addr, int origin_count, struct ompi_datatype_t *origin_dt, void *result_addr, int result_count, struct ompi_datatype_t *result_dt, @@ -1205,7 +1129,7 @@ int ompi_osc_pt2pt_get_accumulate(void *origin_addr, int origin_count, } -int ompi_osc_pt2pt_rget_accumulate(void *origin_addr, int origin_count, +int ompi_osc_pt2pt_rget_accumulate(const void *origin_addr, int origin_count, struct ompi_datatype_t *origin_dt, void *result_addr, int result_count, struct ompi_datatype_t *result_dt, diff --git a/ompi/mca/osc/pt2pt/osc_pt2pt_component.c b/ompi/mca/osc/pt2pt/osc_pt2pt_component.c index 4c2d06d74f8..e41a8306b7d 100644 --- a/ompi/mca/osc/pt2pt/osc_pt2pt_component.c +++ b/ompi/mca/osc/pt2pt/osc_pt2pt_component.c @@ -14,7 +14,7 @@ * Copyright (c) 2006-2008 University of Houston. All rights reserved. * Copyright (c) 2010 Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2012-2013 Sandia National Laboratories. All rights reserved. - * Copyright (c) 2015 Research Organization for Information Science + * Copyright (c) 2015-2016 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * @@ -28,25 +28,12 @@ #include #include "osc_pt2pt.h" -#include "osc_pt2pt_data_move.h" #include "osc_pt2pt_frag.h" #include "osc_pt2pt_request.h" +#include "osc_pt2pt_data_move.h" -#include "opal/threads/condition.h" -#include "opal/threads/mutex.h" -#include "opal/util/arch.h" -#include "opal/align.h" -#include "opal/mca/btl/btl.h" - -#include "ompi/info/info.h" -#include "ompi/communicator/communicator.h" -#include "ompi/mca/osc/osc.h" -#include "ompi/mca/osc/base/base.h" #include "ompi/mca/osc/base/osc_base_obj_convert.h" -#include "opal/mca/btl/btl.h" -#include "ompi/mca/pml/pml.h" -static int component_open(void); static int component_register(void); static int component_init(bool enable_progress_threads, bool enable_mpi_threads); static int component_finalize(void); @@ -64,7 +51,6 @@ ompi_osc_pt2pt_component_t mca_osc_pt2pt_component = { .mca_component_name = "pt2pt", MCA_BASE_MAKE_VERSION(component, OMPI_MAJOR_VERSION, OMPI_MINOR_VERSION, OMPI_RELEASE_VERSION), - .mca_open_component = component_open, .mca_register_component_params = component_register, }, .osc_data = { @@ -128,53 +114,15 @@ bool ompi_osc_pt2pt_no_locks = false; /* look up parameters for configuring this window. The code first looks in the info structure passed by the user, then through mca parameters. */ -static bool -check_config_value_bool(char *key, ompi_info_t *info) +static bool check_config_value_bool(char *key, ompi_info_t *info, bool result) { - char *value_string; - int value_len, ret, flag, param; - const bool *flag_value; - bool result; - - ret = ompi_info_get_valuelen(info, key, &value_len, &flag); - if (OMPI_SUCCESS != ret) goto info_not_found; - if (flag == 0) goto info_not_found; - value_len++; + int flag; - value_string = (char*)malloc(sizeof(char) * value_len + 1); /* Should malloc 1 char for NUL-termination */ - if (NULL == value_string) goto info_not_found; - - ret = ompi_info_get(info, key, value_len, value_string, &flag); - if (OMPI_SUCCESS != ret) { - free(value_string); - goto info_not_found; - } - assert(flag != 0); - ret = ompi_info_value_to_bool(value_string, &result); - free(value_string); - if (OMPI_SUCCESS != ret) goto info_not_found; + (void) ompi_info_get_bool (info, key, &result, &flag); return result; - - info_not_found: - param = mca_base_var_find("ompi", "osc", "pt2pt", key); - if (0 > param) return false; - - ret = mca_base_var_get_value(param, &flag_value, NULL, NULL); - if (OMPI_SUCCESS != ret) return false; - - return flag_value[0]; } - -static int -component_open(void) -{ - return OMPI_SUCCESS; -} - - -static int -component_register(void) +static int component_register (void) { ompi_osc_pt2pt_no_locks = false; (void) mca_base_component_var_register(&mca_osc_pt2pt_component.super.osc_version, @@ -194,44 +142,62 @@ component_register(void) NULL, 0, 0, OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_READONLY, &mca_osc_pt2pt_component.buffer_size); + mca_osc_pt2pt_component.receive_count = 4; + (void) mca_base_component_var_register (&mca_osc_pt2pt_component.super.osc_version, "receive_count", + "Number of receives to post for each window for incoming fragments " + "(default: 4)", MCA_BASE_VAR_TYPE_UNSIGNED_INT, NULL, 0, 0, OPAL_INFO_LVL_4, + MCA_BASE_VAR_SCOPE_READONLY, &mca_osc_pt2pt_component.receive_count); + return OMPI_SUCCESS; } static int component_progress (void) { - int count = opal_list_get_size (&mca_osc_pt2pt_component.pending_operations); + int pending_count = opal_list_get_size (&mca_osc_pt2pt_component.pending_operations); + int recv_count = opal_list_get_size (&mca_osc_pt2pt_component.pending_receives); ompi_osc_pt2pt_pending_t *pending, *next; - if (0 == count) { - return 0; + if (recv_count) { + for (int i = 0 ; i < recv_count ; ++i) { + OPAL_THREAD_LOCK(&mca_osc_pt2pt_component.pending_receives_lock); + ompi_osc_pt2pt_receive_t *recv = (ompi_osc_pt2pt_receive_t *) opal_list_remove_first (&mca_osc_pt2pt_component.pending_receives); + OPAL_THREAD_UNLOCK(&mca_osc_pt2pt_component.pending_receives_lock); + if (NULL == recv) { + break; + } + + (void) ompi_osc_pt2pt_process_receive (recv); + } } /* process one incoming request */ - OPAL_THREAD_LOCK(&mca_osc_pt2pt_component.pending_operations_lock); - OPAL_LIST_FOREACH_SAFE(pending, next, &mca_osc_pt2pt_component.pending_operations, ompi_osc_pt2pt_pending_t) { - int ret; - - switch (pending->header.base.type) { - case OMPI_OSC_PT2PT_HDR_TYPE_FLUSH_REQ: - ret = ompi_osc_pt2pt_process_flush (pending->module, pending->source, - &pending->header.flush); - break; - case OMPI_OSC_PT2PT_HDR_TYPE_UNLOCK_REQ: - ret = ompi_osc_pt2pt_process_unlock (pending->module, pending->source, - &pending->header.unlock); - break; - default: - /* shouldn't happen */ - assert (0); - abort (); - } - - if (OMPI_SUCCESS == ret) { - opal_list_remove_item (&mca_osc_pt2pt_component.pending_operations, &pending->super); - OBJ_RELEASE(pending); - } + if (pending_count) { + OPAL_THREAD_LOCK(&mca_osc_pt2pt_component.pending_operations_lock); + OPAL_LIST_FOREACH_SAFE(pending, next, &mca_osc_pt2pt_component.pending_operations, ompi_osc_pt2pt_pending_t) { + int ret; + + switch (pending->header.base.type) { + case OMPI_OSC_PT2PT_HDR_TYPE_FLUSH_REQ: + ret = ompi_osc_pt2pt_process_flush (pending->module, pending->source, + &pending->header.flush); + break; + case OMPI_OSC_PT2PT_HDR_TYPE_UNLOCK_REQ: + ret = ompi_osc_pt2pt_process_unlock (pending->module, pending->source, + &pending->header.unlock); + break; + default: + /* shouldn't happen */ + assert (0); + abort (); + } + + if (OMPI_SUCCESS == ret) { + opal_list_remove_item (&mca_osc_pt2pt_component.pending_operations, &pending->super); + OBJ_RELEASE(pending); + } + } + OPAL_THREAD_UNLOCK(&mca_osc_pt2pt_component.pending_operations_lock); } - OPAL_THREAD_UNLOCK(&mca_osc_pt2pt_component.pending_operations_lock); return 1; } @@ -245,6 +211,8 @@ component_init(bool enable_progress_threads, OBJ_CONSTRUCT(&mca_osc_pt2pt_component.lock, opal_mutex_t); OBJ_CONSTRUCT(&mca_osc_pt2pt_component.pending_operations, opal_list_t); OBJ_CONSTRUCT(&mca_osc_pt2pt_component.pending_operations_lock, opal_mutex_t); + OBJ_CONSTRUCT(&mca_osc_pt2pt_component.pending_receives, opal_list_t); + OBJ_CONSTRUCT(&mca_osc_pt2pt_component.pending_receives_lock, opal_mutex_t); OBJ_CONSTRUCT(&mca_osc_pt2pt_component.modules, opal_hash_table_t); @@ -305,6 +273,8 @@ component_finalize(void) OBJ_DESTRUCT(&mca_osc_pt2pt_component.requests); OBJ_DESTRUCT(&mca_osc_pt2pt_component.pending_operations); OBJ_DESTRUCT(&mca_osc_pt2pt_component.pending_operations_lock); + OBJ_DESTRUCT(&mca_osc_pt2pt_component.pending_receives); + OBJ_DESTRUCT(&mca_osc_pt2pt_component.pending_receives_lock); return OMPI_SUCCESS; } @@ -346,15 +316,25 @@ component_select(struct ompi_win_t *win, void **base, size_t size, int disp_unit /* initialize the objects, so that always free in cleanup */ OBJ_CONSTRUCT(&module->lock, opal_mutex_t); OBJ_CONSTRUCT(&module->cond, opal_condition_t); - OBJ_CONSTRUCT(&module->acc_lock, opal_mutex_t); OBJ_CONSTRUCT(&module->locks_pending, opal_list_t); OBJ_CONSTRUCT(&module->locks_pending_lock, opal_mutex_t); - OBJ_CONSTRUCT(&module->outstanding_locks, opal_list_t); + OBJ_CONSTRUCT(&module->outstanding_locks, opal_hash_table_t); OBJ_CONSTRUCT(&module->pending_acc, opal_list_t); - OBJ_CONSTRUCT(&module->pending_posts, opal_list_t); - OBJ_CONSTRUCT(&module->request_gc, opal_list_t); OBJ_CONSTRUCT(&module->buffer_gc, opal_list_t); OBJ_CONSTRUCT(&module->gc_lock, opal_mutex_t); + OBJ_CONSTRUCT(&module->all_sync, ompi_osc_pt2pt_sync_t); + OBJ_CONSTRUCT(&module->peer_hash, opal_hash_table_t); + OBJ_CONSTRUCT(&module->peer_lock, opal_mutex_t); + + ret = opal_hash_table_init (&module->outstanding_locks, 64); + if (OPAL_SUCCESS != ret) { + goto cleanup; + } + + ret = opal_hash_table_init (&module->peer_hash, 128); + if (OPAL_SUCCESS != ret) { + goto cleanup; + } /* options */ /* FIX ME: should actually check this value... */ @@ -388,17 +368,6 @@ component_select(struct ompi_win_t *win, void **base, size_t size, int disp_unit /* record my displacement unit. Always resolved at target */ module->disp_unit = disp_unit; - /* peer data */ - module->peers = calloc(ompi_comm_size(comm), sizeof(ompi_osc_pt2pt_peer_t)); - if (NULL == module->peers) { - ret = OMPI_ERR_TEMP_OUT_OF_RESOURCE; - goto cleanup; - } - - for (int i = 0 ; i < ompi_comm_size (comm) ; ++i) { - OBJ_CONSTRUCT(module->peers + i, ompi_osc_pt2pt_peer_t); - } - /* peer op count data */ module->epoch_outgoing_frag_count = calloc (ompi_comm_size(comm), sizeof(uint32_t)); if (NULL == module->epoch_outgoing_frag_count) { @@ -408,18 +377,16 @@ component_select(struct ompi_win_t *win, void **base, size_t size, int disp_unit /* the statement below (from Brian) does not seem correct so disable active target on the * window. if this end up being incorrect please revert this one change */ - module->active_eager_send_active = false; #if 0 /* initially, we're in that pseudo-fence state, so we allow eager sends (yay for Fence). Other protocols will disable before they start their epochs, so this isn't a problem. */ - module->active_eager_send_active = true; + module->all_sync.type = OMPI_OSC_PT2PT_SYNC_TYPE_FENCE; + module->all_sync.eager_send_active = true; #endif /* lock data */ - if (check_config_value_bool("no_locks", info)) { - win->w_flags |= OMPI_WIN_NO_LOCKS; - } + module->no_locks = check_config_value_bool ("no_locks", info, ompi_osc_pt2pt_no_locks); /* update component data */ OPAL_THREAD_LOCK(&mca_osc_pt2pt_component.lock); @@ -439,11 +406,6 @@ component_select(struct ompi_win_t *win, void **base, size_t size, int disp_unit /* sync memory - make sure all initialization completed */ opal_atomic_mb(); - module->incoming_buffer = malloc (mca_osc_pt2pt_component.buffer_size + sizeof (ompi_osc_pt2pt_frag_header_t)); - if (OPAL_UNLIKELY(NULL == module->incoming_buffer)) { - goto cleanup; - } - ret = ompi_osc_pt2pt_frag_start_receive (module); if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) { goto cleanup; @@ -460,6 +422,10 @@ component_select(struct ompi_win_t *win, void **base, size_t size, int disp_unit mca_osc_pt2pt_component.progress_enable = true; } + if (module->no_locks) { + win->w_flags |= OMPI_WIN_NO_LOCKS; + } + OPAL_OUTPUT_VERBOSE((10, ompi_osc_base_framework.framework_output, "done creating pt2pt window %d", ompi_comm_get_cid(module->comm))); @@ -499,10 +465,33 @@ ompi_osc_pt2pt_get_info(struct ompi_win_t *win, struct ompi_info_t **info_used) OBJ_CLASS_INSTANCE(ompi_osc_pt2pt_pending_t, opal_list_item_t, NULL, NULL); +static void ompi_osc_pt2pt_receive_construct (ompi_osc_pt2pt_receive_t *recv) +{ + recv->buffer = NULL; + recv->pml_request = NULL; +} + +static void ompi_osc_pt2pt_receive_destruct (ompi_osc_pt2pt_receive_t *recv) +{ + free (recv->buffer); + if (recv->pml_request && MPI_REQUEST_NULL != recv->pml_request) { + recv->pml_request->req_complete_cb = NULL; + ompi_request_cancel (recv->pml_request); + ompi_request_free (&recv->pml_request); + } +} + +OBJ_CLASS_INSTANCE(ompi_osc_pt2pt_receive_t, opal_list_item_t, + ompi_osc_pt2pt_receive_construct, + ompi_osc_pt2pt_receive_destruct); + static void ompi_osc_pt2pt_peer_construct (ompi_osc_pt2pt_peer_t *peer) { OBJ_CONSTRUCT(&peer->queued_frags, opal_list_t); OBJ_CONSTRUCT(&peer->lock, opal_mutex_t); + peer->active_frag = NULL; + peer->passive_incoming_frag_count = 0; + peer->flags = 0; } static void ompi_osc_pt2pt_peer_destruct (ompi_osc_pt2pt_peer_t *peer) diff --git a/ompi/mca/osc/pt2pt/osc_pt2pt_data_move.c b/ompi/mca/osc/pt2pt/osc_pt2pt_data_move.c index 4f1de661ea8..059c83be450 100644 --- a/ompi/mca/osc/pt2pt/osc_pt2pt_data_move.c +++ b/ompi/mca/osc/pt2pt/osc_pt2pt_data_move.c @@ -8,12 +8,13 @@ * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. - * Copyright (c) 2007-2014 Los Alamos National Security, LLC. All rights + * Copyright (c) 2007-2016 Los Alamos National Security, LLC. All rights * reserved. * Copyright (c) 2009-2011 Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2012-2013 Sandia National Laboratories. All rights reserved. * Copyright (c) 2014-2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. + * Copyright (c) 2016 FUJITSU LIMITED. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -21,26 +22,19 @@ * $HEADER$ */ -#include "ompi_config.h" - #include "osc_pt2pt.h" #include "osc_pt2pt_header.h" #include "osc_pt2pt_data_move.h" #include "osc_pt2pt_frag.h" #include "osc_pt2pt_request.h" -#include "opal/threads/condition.h" -#include "opal/threads/mutex.h" #include "opal/util/arch.h" -#include "opal/util/output.h" #include "opal/sys/atomic.h" #include "opal/align.h" -#include "opal/mca/btl/btl.h" #include "ompi/mca/pml/pml.h" #include "ompi/mca/pml/base/pml_base_sendreq.h" #include "opal/mca/btl/btl.h" -#include "ompi/mca/osc/base/base.h" #include "ompi/mca/osc/base/osc_base_obj_convert.h" #include "ompi/datatype/ompi_datatype.h" #include "ompi/op/op.h" @@ -83,10 +77,6 @@ static void osc_pt2pt_accumulate_data_destructor (osc_pt2pt_accumulate_data_t *a if (acc_data->datatype) { OBJ_RELEASE(acc_data->datatype); } - - if (acc_data->op) { - OBJ_RELEASE(acc_data->op); - } } OBJ_CLASS_DECLARATION(osc_pt2pt_accumulate_data_t); @@ -218,13 +208,13 @@ static inline int datatype_buffer_length (ompi_datatype_t *datatype, int count) * to a target) before this is sent. */ int ompi_osc_pt2pt_control_send (ompi_osc_pt2pt_module_t *module, int target, - void *data, size_t len) + void *data, size_t len) { ompi_osc_pt2pt_frag_t *frag; char *ptr; int ret; - ret = ompi_osc_pt2pt_frag_alloc(module, target, len, &frag, &ptr); + ret = ompi_osc_pt2pt_frag_alloc(module, target, len, &frag, &ptr, false, true); if (OPAL_LIKELY(OMPI_SUCCESS == ret)) { memcpy (ptr, data, len); @@ -248,10 +238,8 @@ static int ompi_osc_pt2pt_control_send_unbuffered_cb (ompi_request_t *request) /* free the temporary buffer */ free (ctx); - /* put this request on the garbage colletion list */ - osc_pt2pt_gc_add_request (module, request); - - return OMPI_SUCCESS; + ompi_request_free (&request); + return 1; } /** @@ -409,7 +397,7 @@ static inline int process_put_long(ompi_osc_pt2pt_module_t* module, int source, ret = ompi_osc_pt2pt_component_irecv (module, target, put_header->count, datatype, source, - put_header->tag, + tag_to_target(put_header->tag), module->comm); if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) { OPAL_OUTPUT_VERBOSE((1, ompi_osc_base_framework.framework_output, @@ -447,10 +435,8 @@ static int osc_pt2pt_incoming_req_complete (ompi_request_t *request) mark_incoming_completion (module, rank); - /* put this request on the garbage colletion list */ - osc_pt2pt_gc_add_request (module, request); - - return OMPI_SUCCESS; + ompi_request_free (&request); + return 1; } struct osc_pt2pt_get_post_send_cb_data_t { @@ -470,10 +456,8 @@ static int osc_pt2pt_get_post_send_cb (ompi_request_t *request) /* mark this as a completed "incoming" request */ mark_incoming_completion (module, rank); - /* put this request on the garbage colletion list */ - osc_pt2pt_gc_add_request (module, request); - - return OMPI_SUCCESS; + ompi_request_free (&request); + return 1; } /** @@ -494,6 +478,7 @@ static int osc_pt2pt_get_post_send (ompi_osc_pt2pt_module_t *module, void *sourc ompi_datatype_t *datatype, int peer, int tag) { struct osc_pt2pt_get_post_send_cb_data_t *data; + int ret; data = malloc (sizeof (*data)); if (OPAL_UNLIKELY(NULL == data)) { @@ -505,8 +490,14 @@ static int osc_pt2pt_get_post_send (ompi_osc_pt2pt_module_t *module, void *sourc * in an active target epoch) */ data->peer = (tag & 0x1) ? peer : MPI_PROC_NULL; - return ompi_osc_pt2pt_isend_w_cb (source, count, datatype, peer, tag, module->comm, + /* data will be freed by the callback */ + ret = ompi_osc_pt2pt_isend_w_cb (source, count, datatype, peer, tag, module->comm, osc_pt2pt_get_post_send_cb, (void *) data); + if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) { + free (data); + } + + return ret; } /** @@ -538,7 +529,8 @@ static inline int process_get (ompi_osc_pt2pt_module_t* module, int target, } /* send get data */ - ret = osc_pt2pt_get_post_send (module, source, get_header->count, datatype, target, get_header->tag); + ret = osc_pt2pt_get_post_send (module, source, get_header->count, datatype, + target, tag_to_origin(get_header->tag)); OBJ_RELEASE(datatype); @@ -644,7 +636,6 @@ static int osc_pt2pt_accumulate_allocate (ompi_osc_pt2pt_module_t *module, int p acc_data->datatype = datatype; OBJ_RETAIN(datatype); acc_data->op = op; - OBJ_RETAIN(op); acc_data->request_count = request_count; *acc_data_out = acc_data; @@ -702,9 +693,7 @@ static int accumulate_cb (ompi_request_t *request) osc_pt2pt_gc_add_buffer (module, &acc_data->super); } - /* put this request on the garbage colletion list */ - osc_pt2pt_gc_add_request (module, request); - + ompi_request_free (&request); return ret; } @@ -712,7 +701,7 @@ static int accumulate_cb (ompi_request_t *request) static int ompi_osc_pt2pt_acc_op_queue (ompi_osc_pt2pt_module_t *module, ompi_osc_pt2pt_header_t *header, int source, char *data, size_t data_len, ompi_datatype_t *datatype) { - ompi_osc_pt2pt_peer_t *peer = module->peers + source; + ompi_osc_pt2pt_peer_t *peer = ompi_osc_pt2pt_peer_lookup (module, source); osc_pt2pt_pending_acc_t *pending_acc; OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output, @@ -774,13 +763,11 @@ static int replace_cb (ompi_request_t *request) mark_incoming_completion (module, rank); - /* put this request on the garbage colletion list */ - osc_pt2pt_gc_add_request (module, request); - /* unlock the accumulate lock */ ompi_osc_pt2pt_accumulate_unlock (module); - return OMPI_SUCCESS; + ompi_request_free (&request); + return 1; } /** @@ -813,8 +800,6 @@ static int ompi_osc_pt2pt_acc_start (ompi_osc_pt2pt_module_t *module, int source ret = osc_pt2pt_accumulate_buffer (target, data, data_len, proc, acc_header->count, datatype, op); - OBJ_RELEASE(op); - ompi_osc_pt2pt_accumulate_unlock (module); return ret; @@ -854,9 +839,9 @@ static int ompi_osc_pt2pt_acc_long_start (ompi_osc_pt2pt_module_t *module, int s do { if (op == &ompi_mpi_op_replace.op) { - ret = ompi_osc_pt2pt_irecv_w_cb (target, acc_header->count, datatype, source, - acc_header->tag, module->comm, NULL, - replace_cb, module); + ret = ompi_osc_pt2pt_irecv_w_cb (target, acc_header->count, datatype, + source, tag_to_target(acc_header->tag), module->comm, + NULL, replace_cb, module); break; } @@ -877,21 +862,20 @@ static int ompi_osc_pt2pt_acc_long_start (ompi_osc_pt2pt_module_t *module, int s } ret = osc_pt2pt_accumulate_allocate (module, source, target, buffer, buflen, proc, acc_header->count, - datatype, op, 1, &acc_data); + datatype, op, 1, &acc_data); if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) { free (buffer); break; } - ret = ompi_osc_pt2pt_irecv_w_cb (buffer, primitive_count, primitive_datatype, source, - acc_header->tag, module->comm, NULL, accumulate_cb, acc_data); + ret = ompi_osc_pt2pt_irecv_w_cb (buffer, primitive_count, primitive_datatype, + source, tag_to_target(acc_header->tag), module->comm, + NULL, accumulate_cb, acc_data); if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) { OBJ_RELEASE(acc_data); } } while (0); - OBJ_RELEASE(op); - if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) { ompi_osc_pt2pt_accumulate_unlock (module); } @@ -934,15 +918,14 @@ static int ompi_osc_pt2pt_gacc_start (ompi_osc_pt2pt_module_t *module, int sourc break; } - ret = ompi_osc_pt2pt_isend_w_cb (target, acc_header->count, datatype, source, acc_header->tag, - module->comm, accumulate_cb, acc_data); + ret = ompi_osc_pt2pt_isend_w_cb (target, acc_header->count, datatype, + source, tag_to_origin(acc_header->tag), module->comm, + accumulate_cb, acc_data); if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) { OBJ_RELEASE(acc_data); } } while (0); - OBJ_RELEASE(op); - if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) { ompi_osc_pt2pt_accumulate_unlock (module); } @@ -985,12 +968,6 @@ static int ompi_osc_gacc_long_start (ompi_osc_pt2pt_module_t *module, int source buflen = datatype_buffer_length (datatype, acc_header->count); do { - buffer = malloc (buflen); - if (OPAL_UNLIKELY(NULL == buffer)) { - ret = OMPI_ERR_OUT_OF_RESOURCE; - break; - } - ret = ompi_osc_base_get_primitive_type_info (datatype, &primitive_datatype, &primitive_count); if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) { break; @@ -998,21 +975,30 @@ static int ompi_osc_gacc_long_start (ompi_osc_pt2pt_module_t *module, int source primitive_count *= acc_header->count; + buffer = malloc (buflen); + if (OPAL_UNLIKELY(NULL == buffer)) { + ret = OMPI_ERR_OUT_OF_RESOURCE; + break; + } + ret = osc_pt2pt_accumulate_allocate (module, source, target, buffer, buflen, proc, acc_header->count, - datatype, op, 2, &acc_data); + datatype, op, 2, &acc_data); if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) { + free (buffer); break; } - ret = ompi_osc_pt2pt_irecv_w_cb (buffer, acc_header->count, datatype, source, acc_header->tag, - module->comm, &recv_request, accumulate_cb, acc_data); + ret = ompi_osc_pt2pt_irecv_w_cb (buffer, acc_header->count, datatype, + source, tag_to_target(acc_header->tag), module->comm, + &recv_request, accumulate_cb, acc_data); if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) { OBJ_RELEASE(acc_data); break; } - ret = ompi_osc_pt2pt_isend_w_cb (target, primitive_count, primitive_datatype, source, acc_header->tag, - module->comm, accumulate_cb, acc_data); + ret = ompi_osc_pt2pt_isend_w_cb (target, primitive_count, primitive_datatype, + source, tag_to_origin(acc_header->tag), module->comm, + accumulate_cb, acc_data); if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) { /* cancel the receive and free the accumulate data */ ompi_request_cancel (recv_request); @@ -1021,8 +1007,6 @@ static int ompi_osc_gacc_long_start (ompi_osc_pt2pt_module_t *module, int source } } while (0); - OBJ_RELEASE(op); - if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) { ompi_osc_pt2pt_accumulate_unlock (module); } @@ -1066,8 +1050,8 @@ static int ompi_osc_pt2pt_cswap_start (ompi_osc_pt2pt_module_t *module, int sour do { /* no reason to do a non-blocking send here */ - ret = MCA_PML_CALL(send(target, 1, datatype, source, cswap_header->tag, MCA_PML_BASE_SEND_STANDARD, - module->comm)); + ret = MCA_PML_CALL(send(target, 1, datatype, source, tag_to_origin(cswap_header->tag), + MCA_PML_BASE_SEND_STANDARD, module->comm)); if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) { break; } @@ -1337,20 +1321,8 @@ static inline int process_cswap (ompi_osc_pt2pt_module_t *module, int source, static inline int process_complete (ompi_osc_pt2pt_module_t *module, int source, ompi_osc_pt2pt_header_complete_t *complete_header) { - OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output, - "osc pt2pt: process_complete got complete message from %d. expected fragment count %d. " - "current signal count %d. current incomming count: %d", - source, complete_header->frag_count, module->active_incoming_frag_signal_count, - module->active_incoming_frag_count)); - /* the current fragment is not part of the frag_count so we need to add it here */ - OPAL_THREAD_ADD32((int32_t *) &module->active_incoming_frag_signal_count, - complete_header->frag_count + 1); - - - if (0 == OPAL_THREAD_ADD32((int32_t *) &module->num_complete_msgs, 1)) { - opal_condition_broadcast (&module->cond); - } + osc_pt2pt_incoming_complete (module, source, complete_header->frag_count + 1); return sizeof (*complete_header); } @@ -1361,7 +1333,7 @@ static inline int process_complete (ompi_osc_pt2pt_module_t *module, int source, static inline int process_flush (ompi_osc_pt2pt_module_t *module, int source, ompi_osc_pt2pt_header_flush_t *flush_header) { - ompi_osc_pt2pt_peer_t *peer = module->peers + source; + ompi_osc_pt2pt_peer_t *peer = ompi_osc_pt2pt_peer_lookup (module, source); int ret; OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output, @@ -1395,7 +1367,7 @@ static inline int process_flush (ompi_osc_pt2pt_module_t *module, int source, static inline int process_unlock (ompi_osc_pt2pt_module_t *module, int source, ompi_osc_pt2pt_header_unlock_t *unlock_header) { - ompi_osc_pt2pt_peer_t *peer = module->peers + source; + ompi_osc_pt2pt_peer_t *peer = ompi_osc_pt2pt_peer_lookup (module, source); int ret; OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output, @@ -1453,13 +1425,11 @@ static int process_large_datatype_request_cb (ompi_request_t *request) return OMPI_ERROR; } - /* put this request on the garbage colletion list */ - osc_pt2pt_gc_add_request (module, request); - /* free the datatype buffer */ osc_pt2pt_gc_add_buffer (module, &ddt_buffer->super); - return OMPI_SUCCESS; + ompi_request_free (&request); + return 1; } /** @@ -1527,8 +1497,9 @@ static int process_large_datatype_request (ompi_osc_pt2pt_module_t *module, int memcpy (ddt_buffer->header, header, header_len); ret = ompi_osc_pt2pt_irecv_w_cb ((void *)((uintptr_t) ddt_buffer->header + header_len), - ddt_len, MPI_BYTE, source, tag, module->comm, NULL, - process_large_datatype_request_cb, ddt_buffer); + ddt_len, MPI_BYTE, + source, tag_to_target(tag), module->comm, + NULL, process_large_datatype_request_cb, ddt_buffer); if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) { OBJ_RELEASE(ddt_buffer); return ret; @@ -1575,12 +1546,6 @@ static inline int process_frag (ompi_osc_pt2pt_module_t *module, ret = process_acc_long (module, frag->source, &header->acc); break; - case OMPI_OSC_PT2PT_HDR_TYPE_LOCK_REQ: - ret = ompi_osc_pt2pt_process_lock(module, frag->source, &header->lock); - if (OPAL_LIKELY(OMPI_SUCCESS == ret)) { - ret = sizeof (header->lock); - } - break; case OMPI_OSC_PT2PT_HDR_TYPE_UNLOCK_REQ: ret = process_unlock(module, frag->source, &header->unlock); break; @@ -1633,13 +1598,38 @@ static inline int process_frag (ompi_osc_pt2pt_module_t *module, /* dispatch for callback on message completion */ static int ompi_osc_pt2pt_callback (ompi_request_t *request) { - ompi_osc_pt2pt_module_t *module = (ompi_osc_pt2pt_module_t *) request->req_complete_cb_data; - ompi_osc_pt2pt_header_t *base_header = - (ompi_osc_pt2pt_header_t *) module->incoming_buffer; - size_t incoming_length = request->req_status._ucount; - int source = request->req_status.MPI_SOURCE; + ompi_osc_pt2pt_receive_t *recv = (ompi_osc_pt2pt_receive_t *) request->req_complete_cb_data; + + OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output, "received pt2pt fragment")); - OPAL_THREAD_UNLOCK(&ompi_request_lock); + /* to avoid deep recursion from complet -> start -> complete -> ... we simply put this + * request on a list and let it be processed by opal_progress(). */ + OPAL_THREAD_LOCK(&mca_osc_pt2pt_component.pending_receives_lock); + opal_list_append (&mca_osc_pt2pt_component.pending_receives, &recv->super); + OPAL_THREAD_UNLOCK(&mca_osc_pt2pt_component.pending_receives_lock); + + return OMPI_SUCCESS; +} + +static int ompi_osc_pt2pt_receive_repost (ompi_osc_pt2pt_receive_t *recv) +{ + /* wait until the request has been marked as complete */ + ompi_request_wait_completion (recv->pml_request); + + /* ompi_request_complete clears the callback */ + recv->pml_request->req_complete_cb = ompi_osc_pt2pt_callback; + recv->pml_request->req_complete_cb_data = (void *) recv; + + return MCA_PML_CALL(start(1, &recv->pml_request)); +} + +int ompi_osc_pt2pt_process_receive (ompi_osc_pt2pt_receive_t *recv) +{ + ompi_osc_pt2pt_module_t *module = (ompi_osc_pt2pt_module_t *) recv->module; + ompi_osc_pt2pt_header_t *base_header = (ompi_osc_pt2pt_header_t *) recv->buffer; + size_t incoming_length = recv->pml_request->req_status._ucount; + int source = recv->pml_request->req_status.MPI_SOURCE; + int rc; assert(incoming_length >= sizeof(ompi_osc_pt2pt_header_base_t)); @@ -1653,10 +1643,14 @@ static int ompi_osc_pt2pt_callback (ompi_request_t *request) process_frag(module, (ompi_osc_pt2pt_frag_header_t *) base_header); /* only data fragments should be included in the completion counters */ - mark_incoming_completion (module, (base_header->base.flags & OMPI_OSC_PT2PT_HDR_FLAG_PASSIVE_TARGET) ? source : MPI_PROC_NULL); + mark_incoming_completion (module, (base_header->base.flags & OMPI_OSC_PT2PT_HDR_FLAG_PASSIVE_TARGET) ? + source : MPI_PROC_NULL); break; case OMPI_OSC_PT2PT_HDR_TYPE_POST: - (void) osc_pt2pt_incoming_post (module, source); + osc_pt2pt_incoming_post (module, source); + break; + case OMPI_OSC_PT2PT_HDR_TYPE_LOCK_REQ: + ompi_osc_pt2pt_process_lock(module, source, (ompi_osc_pt2pt_header_lock_t *) base_header); break; case OMPI_OSC_PT2PT_HDR_TYPE_LOCK_ACK: ompi_osc_pt2pt_process_lock_ack(module, (ompi_osc_pt2pt_header_lock_ack_t *) base_header); @@ -1678,61 +1672,56 @@ static int ompi_osc_pt2pt_callback (ompi_request_t *request) osc_pt2pt_gc_clean (module); - /* put this request on the garbage colletion list */ - osc_pt2pt_gc_add_request (module, request); - ompi_osc_pt2pt_frag_start_receive (module); - - OPAL_THREAD_LOCK(&ompi_request_lock); + rc = ompi_osc_pt2pt_receive_repost (recv); OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output, - "finished posting receive request")); + "finished posting receive request. rc: %d", rc)); return OMPI_SUCCESS; } int ompi_osc_pt2pt_frag_start_receive (ompi_osc_pt2pt_module_t *module) { - return ompi_osc_pt2pt_irecv_w_cb (module->incoming_buffer, mca_osc_pt2pt_component.buffer_size + sizeof (ompi_osc_pt2pt_frag_header_t), - MPI_BYTE, OMPI_ANY_SOURCE, OSC_PT2PT_FRAG_TAG, module->comm, &module->frag_request, - ompi_osc_pt2pt_callback, module); -} + int rc; -int ompi_osc_pt2pt_component_irecv (ompi_osc_pt2pt_module_t *module, void *buf, - size_t count, struct ompi_datatype_t *datatype, - int src, int tag, struct ompi_communicator_t *comm) -{ - return ompi_osc_pt2pt_irecv_w_cb (buf, count, datatype, src, tag, comm, NULL, - osc_pt2pt_incoming_req_complete, module); -} - - -static int -isend_completion_cb(ompi_request_t *request) -{ - ompi_osc_pt2pt_module_t *module = - (ompi_osc_pt2pt_module_t*) request->req_complete_cb_data; + module->recv_frag_count = mca_osc_pt2pt_component.receive_count; + if (0 == module->recv_frag_count) { + module->recv_frag_count = 1; + } - OPAL_OUTPUT_VERBOSE((10, ompi_osc_base_framework.framework_output, - "isend_completion_cb called")); + module->recv_frags = malloc (sizeof (module->recv_frags[0]) * module->recv_frag_count); + if (NULL == module->recv_frags) { + return OMPI_ERR_OUT_OF_RESOURCE; + } - mark_outgoing_completion(module); + for (unsigned int i = 0 ; i < module->recv_frag_count ; ++i) { + OBJ_CONSTRUCT(module->recv_frags + i, ompi_osc_pt2pt_receive_t); + module->recv_frags[i].module = module; + module->recv_frags[i].buffer = malloc (mca_osc_pt2pt_component.buffer_size + sizeof (ompi_osc_pt2pt_frag_header_t)); + if (NULL == module->recv_frags[i].buffer) { + return OMPI_ERR_OUT_OF_RESOURCE; + } - /* put this request on the garbage colletion list */ - osc_pt2pt_gc_add_request (module, request); + rc = ompi_osc_pt2pt_irecv_w_cb (module->recv_frags[i].buffer, mca_osc_pt2pt_component.buffer_size + sizeof (ompi_osc_pt2pt_frag_header_t), + MPI_BYTE, OMPI_ANY_SOURCE, OSC_PT2PT_FRAG_TAG, module->comm, &module->recv_frags[i].pml_request, + ompi_osc_pt2pt_callback, module->recv_frags + i); + if (OMPI_SUCCESS != rc) { + return rc; + } + } return OMPI_SUCCESS; } - -int ompi_osc_pt2pt_component_isend (ompi_osc_pt2pt_module_t *module, void *buf, - size_t count, struct ompi_datatype_t *datatype, - int dest, int tag, struct ompi_communicator_t *comm) +int ompi_osc_pt2pt_component_irecv (ompi_osc_pt2pt_module_t *module, void *buf, + size_t count, struct ompi_datatype_t *datatype, + int src, int tag, struct ompi_communicator_t *comm) { - return ompi_osc_pt2pt_isend_w_cb (buf, count, datatype, dest, tag, comm, - isend_completion_cb, module); + return ompi_osc_pt2pt_irecv_w_cb (buf, count, datatype, src, tag, comm, NULL, + osc_pt2pt_incoming_req_complete, module); } -int ompi_osc_pt2pt_isend_w_cb (void *ptr, int count, ompi_datatype_t *datatype, int target, int tag, +int ompi_osc_pt2pt_isend_w_cb (const void *ptr, int count, ompi_datatype_t *datatype, int target, int tag, ompi_communicator_t *comm, ompi_request_complete_fn_t cb, void *ctx) { ompi_request_t *request; @@ -1742,7 +1731,7 @@ int ompi_osc_pt2pt_isend_w_cb (void *ptr, int count, ompi_datatype_t *datatype, "osc pt2pt: ompi_osc_pt2pt_isend_w_cb sending %d bytes to %d with tag %d", count, target, tag)); - ret = MCA_PML_CALL(isend_init(ptr, count, datatype, target, tag, + ret = MCA_PML_CALL(isend_init((void *)ptr, count, datatype, target, tag, MCA_PML_BASE_SEND_STANDARD, comm, &request)); if (OMPI_SUCCESS != ret) { OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output, @@ -1762,27 +1751,31 @@ int ompi_osc_pt2pt_irecv_w_cb (void *ptr, int count, ompi_datatype_t *datatype, ompi_communicator_t *comm, ompi_request_t **request_out, ompi_request_complete_fn_t cb, void *ctx) { - ompi_request_t *request; + ompi_request_t *dummy; int ret; + if (NULL == request_out) { + request_out = &dummy; + } + OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output, "osc pt2pt: ompi_osc_pt2pt_irecv_w_cb receiving %d bytes from %d with tag %d", count, target, tag)); - ret = MCA_PML_CALL(irecv_init(ptr, count, datatype, target, tag, comm, &request)); + ret = MCA_PML_CALL(irecv_init(ptr, count, datatype, target, tag, comm, request_out)); if (OMPI_SUCCESS != ret) { OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output, "error posting receive. ret = %d", ret)); return ret; } - request->req_complete_cb = cb; - request->req_complete_cb_data = ctx; - if (request_out) { - *request_out = request; - } + (*request_out)->req_complete_cb = cb; + (*request_out)->req_complete_cb_data = ctx; - ret = MCA_PML_CALL(start(1, &request)); + ret = MCA_PML_CALL(start(1, request_out)); + + OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output, + "osc pt2pt: pml start returned %d", ret)); return ret; } diff --git a/ompi/mca/osc/pt2pt/osc_pt2pt_data_move.h b/ompi/mca/osc/pt2pt/osc_pt2pt_data_move.h index cd79a39a86a..8468c406630 100644 --- a/ompi/mca/osc/pt2pt/osc_pt2pt_data_move.h +++ b/ompi/mca/osc/pt2pt/osc_pt2pt_data_move.h @@ -11,6 +11,8 @@ * Copyright (c) 2012 Sandia National Laboratories. All rights reserved. * Copyright (c) 2014 Los Alamos National Security, LLC. All rights * reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -66,7 +68,7 @@ int ompi_osc_pt2pt_control_send_unbuffered (ompi_osc_pt2pt_module_t *module, * be called with the associated request. The context specified in ctx will be stored in * the req_completion_cb_data member of the ompi_request_t for use by the callback. */ -int ompi_osc_pt2pt_isend_w_cb (void *ptr, int count, ompi_datatype_t *datatype, int target, int tag, +int ompi_osc_pt2pt_isend_w_cb (const void *ptr, int count, ompi_datatype_t *datatype, int target, int tag, ompi_communicator_t *comm, ompi_request_complete_fn_t cb, void *ctx); /** @@ -142,4 +144,16 @@ void ompi_osc_pt2pt_process_flush_ack (ompi_osc_pt2pt_module_t *module, int sour */ int ompi_osc_pt2pt_frag_start_receive (ompi_osc_pt2pt_module_t *module); +/** + * ompi_osc_pt2pt_process_receive: + * + * @short Report a receive request + * + * @param[in] recv - Receive structure + * + * @long This function reposts a receive request. This function should not be called from + * a pml request callback as it can lead to deep recursion during heavy load. + */ +int ompi_osc_pt2pt_process_receive (ompi_osc_pt2pt_receive_t *recv); + #endif diff --git a/ompi/mca/osc/pt2pt/osc_pt2pt_frag.c b/ompi/mca/osc/pt2pt/osc_pt2pt_frag.c index 0e0c588bef2..95d27b8b2b7 100644 --- a/ompi/mca/osc/pt2pt/osc_pt2pt_frag.c +++ b/ompi/mca/osc/pt2pt/osc_pt2pt_frag.c @@ -12,17 +12,12 @@ * $HEADER$ */ -#include "ompi_config.h" - -#include "opal/class/opal_list.h" -#include "ompi/mca/osc/base/base.h" -#include "ompi/mca/pml/pml.h" - #include "osc_pt2pt.h" #include "osc_pt2pt_frag.h" #include "osc_pt2pt_data_move.h" -static void ompi_osc_pt2pt_frag_constructor (ompi_osc_pt2pt_frag_t *frag){ +static void ompi_osc_pt2pt_frag_constructor (ompi_osc_pt2pt_frag_t *frag) +{ frag->buffer = frag->super.ptr; } @@ -42,11 +37,9 @@ static int frag_send_cb (ompi_request_t *request) mark_outgoing_completion(module); opal_free_list_return (&mca_osc_pt2pt_component.frags, &frag->super); + ompi_request_free (&request); - /* put this request on the garbage colletion list */ - osc_pt2pt_gc_add_request (module, request); - - return OMPI_SUCCESS; + return 1; } static int frag_send (ompi_osc_pt2pt_module_t *module, ompi_osc_pt2pt_frag_t *frag) @@ -68,7 +61,7 @@ static int frag_send (ompi_osc_pt2pt_module_t *module, ompi_osc_pt2pt_frag_t *fr int ompi_osc_pt2pt_frag_start (ompi_osc_pt2pt_module_t *module, ompi_osc_pt2pt_frag_t *frag) { - ompi_osc_pt2pt_peer_t *peer = module->peers + frag->target; + ompi_osc_pt2pt_peer_t *peer = ompi_osc_pt2pt_peer_lookup (module, frag->target); int ret; assert(0 == frag->pending && peer->active_frag != frag); @@ -79,7 +72,7 @@ int ompi_osc_pt2pt_frag_start (ompi_osc_pt2pt_module_t *module, /* if eager sends are not active, can't send yet, so buffer and get out... */ - if (!(peer->eager_send_active || module->all_access_epoch) || opal_list_get_size (&peer->queued_frags)) { + if (!ompi_osc_pt2pt_peer_sends_active (module, frag->target) || opal_list_get_size (&peer->queued_frags)) { OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output, "queuing fragment to peer %d", frag->target)); OPAL_THREAD_SCOPED_LOCK(&peer->lock, @@ -97,9 +90,9 @@ int ompi_osc_pt2pt_frag_start (ompi_osc_pt2pt_module_t *module, return ret; } -static int ompi_osc_pt2pt_flush_active_frag (ompi_osc_pt2pt_module_t *module, int target) +static int ompi_osc_pt2pt_flush_active_frag (ompi_osc_pt2pt_module_t *module, ompi_osc_pt2pt_peer_t *peer) { - ompi_osc_pt2pt_frag_t *active_frag = module->peers[target].active_frag; + ompi_osc_pt2pt_frag_t *active_frag = peer->active_frag; int ret = OMPI_SUCCESS; if (NULL == active_frag) { @@ -108,16 +101,16 @@ static int ompi_osc_pt2pt_flush_active_frag (ompi_osc_pt2pt_module_t *module, in } OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output, - "osc pt2pt: flushing active fragment to target %d. pending: %d", target, - active_frag->pending)); + "osc pt2pt: flushing active fragment to target %d. pending: %d", + active_frag->target, active_frag->pending)); - if (opal_atomic_cmpset (&module->peers[target].active_frag, active_frag, NULL)) { + if (opal_atomic_cmpset (&peer->active_frag, active_frag, NULL)) { if (0 != OPAL_THREAD_ADD32(&active_frag->pending, -1)) { /* communication going on while synchronizing; this is an rma usage bug */ return OMPI_ERR_RMA_SYNC; } - ompi_osc_signal_outgoing (module, target, 1); + ompi_osc_signal_outgoing (module, active_frag->target, 1); ret = frag_send (module, active_frag); } @@ -126,7 +119,7 @@ static int ompi_osc_pt2pt_flush_active_frag (ompi_osc_pt2pt_module_t *module, in int ompi_osc_pt2pt_frag_flush_target (ompi_osc_pt2pt_module_t *module, int target) { - ompi_osc_pt2pt_peer_t *peer = module->peers + target; + ompi_osc_pt2pt_peer_t *peer = ompi_osc_pt2pt_peer_lookup (module, target); ompi_osc_pt2pt_frag_t *frag; int ret = OMPI_SUCCESS; @@ -150,7 +143,7 @@ int ompi_osc_pt2pt_frag_flush_target (ompi_osc_pt2pt_module_t *module, int targe } /* flush the active frag */ - ret = ompi_osc_pt2pt_flush_active_frag (module, target); + ret = ompi_osc_pt2pt_flush_active_frag (module, peer); OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output, "osc pt2pt: frag flush target %d finished", target)); @@ -158,45 +151,23 @@ int ompi_osc_pt2pt_frag_flush_target (ompi_osc_pt2pt_module_t *module, int targe return ret; } - int ompi_osc_pt2pt_frag_flush_all (ompi_osc_pt2pt_module_t *module) { int ret = OMPI_SUCCESS; - ompi_osc_pt2pt_frag_t *frag; OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output, "osc pt2pt: frag flush all begin")); - /* try to start all the queued frags */ + /* try to start frags queued to all peers */ for (int i = 0 ; i < ompi_comm_size (module->comm) ; ++i) { - ompi_osc_pt2pt_peer_t *peer = module->peers + i; - - while (NULL != (frag = ((ompi_osc_pt2pt_frag_t *) opal_list_remove_first (&peer->queued_frags)))) { - ret = frag_send(module, frag); - if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) { - break; - } - } - - /* XXX -- TODO -- better error handling */ + ret = ompi_osc_pt2pt_frag_flush_target (module, i); if (OMPI_SUCCESS != ret) { - return ret; - } - } - - OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output, - "osc pt2pt: flushing all active fragments")); - - /* flush the active frag */ - for (int i = 0 ; i < ompi_comm_size(module->comm) ; ++i) { - ret = ompi_osc_pt2pt_flush_active_frag (module, i); - if (OMPI_SUCCESS != ret) { - return ret; + break; } } OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output, - "osc pt2pt: frag flush all done")); + "osc pt2pt: frag flush all done. ret: %d", ret)); return ret; } diff --git a/ompi/mca/osc/pt2pt/osc_pt2pt_frag.h b/ompi/mca/osc/pt2pt/osc_pt2pt_frag.h index 7417f7bc001..42ef305f9c0 100644 --- a/ompi/mca/osc/pt2pt/osc_pt2pt_frag.h +++ b/ompi/mca/osc/pt2pt/osc_pt2pt_frag.h @@ -1,7 +1,7 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ /* * Copyright (c) 2012 Sandia National Laboratories. All rights reserved. - * Copyright (c) 2014-2015 Los Alamos National Security, LLC. All rights + * Copyright (c) 2014-2016 Los Alamos National Security, LLC. All rights * reserved. * $COPYRIGHT$ * @@ -33,26 +33,86 @@ struct ompi_osc_pt2pt_frag_t { char *top; /* Number of operations which have started writing into the frag, but not yet completed doing so */ - int32_t pending; + volatile int32_t pending; + int32_t pending_long_sends; ompi_osc_pt2pt_frag_header_t *header; ompi_osc_pt2pt_module_t *module; }; typedef struct ompi_osc_pt2pt_frag_t ompi_osc_pt2pt_frag_t; OBJ_CLASS_DECLARATION(ompi_osc_pt2pt_frag_t); -extern int ompi_osc_pt2pt_frag_start(ompi_osc_pt2pt_module_t *module, ompi_osc_pt2pt_frag_t *buffer); -extern int ompi_osc_pt2pt_frag_flush_target(ompi_osc_pt2pt_module_t *module, int target); -extern int ompi_osc_pt2pt_frag_flush_all(ompi_osc_pt2pt_module_t *module); +int ompi_osc_pt2pt_frag_start(ompi_osc_pt2pt_module_t *module, ompi_osc_pt2pt_frag_t *buffer); +int ompi_osc_pt2pt_frag_flush_target(ompi_osc_pt2pt_module_t *module, int target); +int ompi_osc_pt2pt_frag_flush_all(ompi_osc_pt2pt_module_t *module); + +static inline int ompi_osc_pt2pt_frag_finish (ompi_osc_pt2pt_module_t *module, + ompi_osc_pt2pt_frag_t* buffer) +{ + opal_atomic_wmb (); + if (0 == OPAL_THREAD_ADD32(&buffer->pending, -1)) { + opal_atomic_mb (); + return ompi_osc_pt2pt_frag_start(module, buffer); + } + + return OMPI_SUCCESS; +} + +static inline ompi_osc_pt2pt_frag_t *ompi_osc_pt2pt_frag_alloc_non_buffered (ompi_osc_pt2pt_module_t *module, + ompi_osc_pt2pt_peer_t *peer, + size_t request_len) +{ + ompi_osc_pt2pt_frag_t *curr; + + /* to ensure ordering flush the buffer on the peer */ + curr = peer->active_frag; + if (NULL != curr && opal_atomic_cmpset (&peer->active_frag, curr, NULL)) { + /* If there's something pending, the pending finish will + start the buffer. Otherwise, we need to start it now. */ + int ret = ompi_osc_pt2pt_frag_finish (module, curr); + if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) { + return NULL; + } + } + + curr = (ompi_osc_pt2pt_frag_t *) opal_free_list_get (&mca_osc_pt2pt_component.frags); + if (OPAL_UNLIKELY(NULL == curr)) { + return NULL; + } + + curr->target = peer->rank; + + curr->header = (ompi_osc_pt2pt_frag_header_t*) curr->buffer; + curr->top = (char*) (curr->header + 1); + curr->remain_len = mca_osc_pt2pt_component.buffer_size; + curr->module = module; + curr->pending = 1; + + curr->header->base.type = OMPI_OSC_PT2PT_HDR_TYPE_FRAG; + curr->header->base.flags = OMPI_OSC_PT2PT_HDR_FLAG_VALID; + if (module->passive_target_access_epoch) { + curr->header->base.flags |= OMPI_OSC_PT2PT_HDR_FLAG_PASSIVE_TARGET; + } + curr->header->source = ompi_comm_rank(module->comm); + curr->header->num_ops = 1; + + return curr; +} /* - * Note: module lock must be held during this operation + * Note: this function takes the module lock + * + * buffered sends will cache the fragment on the peer object associated with the + * target. unbuffered-sends will cause the target fragment to be flushed and + * will not be cached on the peer. this causes the fragment to be flushed as + * soon as it is sent. this allows request-based rma fragments to be completed + * so MPI_Test/MPI_Wait/etc will work as expected. */ static inline int ompi_osc_pt2pt_frag_alloc (ompi_osc_pt2pt_module_t *module, int target, size_t request_len, ompi_osc_pt2pt_frag_t **buffer, - char **ptr) + char **ptr, bool long_send, bool buffered) { + ompi_osc_pt2pt_peer_t *peer = ompi_osc_pt2pt_peer_lookup (module, target); ompi_osc_pt2pt_frag_t *curr; - int ret; /* osc pt2pt headers can have 64-bit values. these will need to be aligned * on an 8-byte boundary on some architectures so we up align the allocation @@ -63,53 +123,33 @@ static inline int ompi_osc_pt2pt_frag_alloc (ompi_osc_pt2pt_module_t *module, in return OMPI_ERR_OUT_OF_RESOURCE; } + OPAL_OUTPUT_VERBOSE((MCA_BASE_VERBOSE_TRACE, ompi_osc_base_framework.framework_output, + "attempting to allocate buffer for %lu bytes to target %d. long send: %d, " + "buffered: %d", (unsigned long) request_len, target, long_send, buffered)); + OPAL_THREAD_LOCK(&module->lock); - curr = module->peers[target].active_frag; - if (NULL == curr || curr->remain_len < request_len) { - opal_free_list_item_t *item = NULL; - - if (NULL != curr) { - curr->remain_len = 0; - module->peers[target].active_frag = NULL; - opal_atomic_mb (); - - /* If there's something pending, the pending finish will - start the buffer. Otherwise, we need to start it now. */ - if (0 == OPAL_THREAD_ADD32(&curr->pending, -1)) { - ret = ompi_osc_pt2pt_frag_start(module, curr); - if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) { - return ret; - } + if (buffered) { + curr = peer->active_frag; + if (NULL == curr || curr->remain_len < request_len || (long_send && curr->pending_long_sends == 32)) { + curr = ompi_osc_pt2pt_frag_alloc_non_buffered (module, peer, request_len); + if (OPAL_UNLIKELY(NULL == curr)) { + OPAL_THREAD_UNLOCK(&module->lock); + return OMPI_ERR_OUT_OF_RESOURCE; } - } - item = opal_free_list_get (&mca_osc_pt2pt_component.frags); - if (OPAL_UNLIKELY(NULL == item)) { - return OMPI_ERR_OUT_OF_RESOURCE; + curr->pending_long_sends = long_send; + peer->active_frag = curr; + } else { + OPAL_THREAD_ADD32(&curr->header->num_ops, 1); + curr->pending_long_sends += long_send; } - curr = module->peers[target].active_frag = - (ompi_osc_pt2pt_frag_t*) item; - - curr->target = target; - - curr->header = (ompi_osc_pt2pt_frag_header_t*) curr->buffer; - curr->top = (char*) (curr->header + 1); - curr->remain_len = mca_osc_pt2pt_component.buffer_size; - curr->module = module; - curr->pending = 1; - curr->header->base.type = OMPI_OSC_PT2PT_HDR_TYPE_FRAG; - curr->header->base.flags = OMPI_OSC_PT2PT_HDR_FLAG_VALID; - if (module->passive_target_access_epoch) { - curr->header->base.flags |= OMPI_OSC_PT2PT_HDR_FLAG_PASSIVE_TARGET; - } - curr->header->source = ompi_comm_rank(module->comm); - curr->header->num_ops = 0; - curr->header->windx = ompi_comm_get_cid(module->comm); - - if (curr->remain_len < request_len) { + OPAL_THREAD_ADD32(&curr->pending, 1); + } else { + curr = ompi_osc_pt2pt_frag_alloc_non_buffered (module, peer, request_len); + if (OPAL_UNLIKELY(NULL == curr)) { OPAL_THREAD_UNLOCK(&module->lock); - return OMPI_ERR_TEMP_OUT_OF_RESOURCE; + return OMPI_ERR_OUT_OF_RESOURCE; } } @@ -118,24 +158,8 @@ static inline int ompi_osc_pt2pt_frag_alloc (ompi_osc_pt2pt_module_t *module, in curr->top += request_len; curr->remain_len -= request_len; - OPAL_THREAD_UNLOCK(&module->lock); - - OPAL_THREAD_ADD32(&curr->pending, 1); - OPAL_THREAD_ADD32(&curr->header->num_ops, 1); - return OMPI_SUCCESS; -} - - -/* - * Note: module lock must be held for this operation - */ -static inline int ompi_osc_pt2pt_frag_finish(ompi_osc_pt2pt_module_t *module, - ompi_osc_pt2pt_frag_t* buffer) -{ - if (0 == OPAL_THREAD_ADD32(&buffer->pending, -1)) { - return ompi_osc_pt2pt_frag_start(module, buffer); - } + OPAL_THREAD_UNLOCK(&module->lock); return OMPI_SUCCESS; } diff --git a/ompi/mca/osc/pt2pt/osc_pt2pt_header.h b/ompi/mca/osc/pt2pt/osc_pt2pt_header.h index 6b5f00d2cec..f979d9bf61e 100644 --- a/ompi/mca/osc/pt2pt/osc_pt2pt_header.h +++ b/ompi/mca/osc/pt2pt/osc_pt2pt_header.h @@ -8,7 +8,7 @@ * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. - * Copyright (c) 2007-2014 Los Alamos National Security, LLC. All rights + * Copyright (c) 2007-2015 Los Alamos National Security, LLC. All rights * reserved. * Copyright (c) 2010 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2010 Oracle and/or its affiliates. All rights reserved. @@ -118,7 +118,6 @@ typedef struct ompi_osc_pt2pt_header_cswap_t ompi_osc_pt2pt_header_cswap_t; struct ompi_osc_pt2pt_header_post_t { ompi_osc_pt2pt_header_base_t base; - uint16_t windx; }; typedef struct ompi_osc_pt2pt_header_post_t ompi_osc_pt2pt_header_post_t; @@ -134,7 +133,6 @@ typedef struct ompi_osc_pt2pt_header_lock_t ompi_osc_pt2pt_header_lock_t; struct ompi_osc_pt2pt_header_lock_ack_t { ompi_osc_pt2pt_header_base_t base; - uint16_t windx; uint32_t source; uint64_t lock_ptr; }; @@ -166,7 +164,7 @@ struct ompi_osc_pt2pt_header_flush_t { uint8_t padding[2]; #endif uint32_t frag_count; - uint64_t serial_number; + uint64_t lock_ptr; }; typedef struct ompi_osc_pt2pt_header_flush_t ompi_osc_pt2pt_header_flush_t; @@ -175,13 +173,12 @@ struct ompi_osc_pt2pt_header_flush_ack_t { #if OPAL_ENABLE_HETEROGENEOUS_SUPPORT uint8_t padding[6]; #endif - uint64_t serial_number; + uint64_t lock_ptr; }; typedef struct ompi_osc_pt2pt_header_flush_ack_t ompi_osc_pt2pt_header_flush_ack_t; struct ompi_osc_pt2pt_frag_header_t { ompi_osc_pt2pt_header_base_t base; - uint16_t windx; /* cid of communicator backing window (our window id) */ uint32_t source; /* rank in window of source process */ int32_t num_ops; /* number of operations in this buffer */ uint32_t pad; /* ensure the fragment header is a multiple of 8 bytes */ @@ -208,16 +205,14 @@ typedef union ompi_osc_pt2pt_header_t ompi_osc_pt2pt_header_t; #if OPAL_ENABLE_HETEROGENEOUS_SUPPORT #define MCA_OSC_PT2PT_FRAG_HDR_NTOH(h) \ - (h).windx = ntohs((h).windx); \ (h).source = ntohl((h).source); \ (h).num_ops = ntohl((h).num_ops); \ (h).pad = ntohl((h).pad); #define MCA_OSC_PT2PT_FRAG_HDR_HTON(h) \ - (h).windx = htons((h).windx); \ (h).source = htonl((h).source); \ (h).num_ops = htonl((h).num_ops); \ (h).pad = htonl((h).pad); - + #define MCA_OSC_PT2PT_PUT_HDR_NTOH(h) \ (h).tag = ntohs((h).tag); \ (h).count = ntohl((h).count); \ @@ -254,34 +249,24 @@ typedef union ompi_osc_pt2pt_header_t ompi_osc_pt2pt_header_t; (h).op = htonl((h).op); #define MCA_OSC_PT2PT_LOCK_HDR_NTOH(h) \ - (h).lock_type = ntohl((h).lock_type); \ - (h).lock_ptr = ntoh64((h).lock_ptr) + (h).lock_type = ntohl((h).lock_type) #define MCA_OSC_PT2PT_LOCK_HDR_HTON(h) \ - (h).lock_type = htonl((h).lock_type); \ - (h).lock_ptr = hton64((h).lock_ptr) + (h).lock_type = htonl((h).lock_type) #define MCA_OSC_PT2PT_UNLOCK_HDR_NTOH(h) \ (h).lock_type = ntohl((h).lock_type); \ - (h).lock_ptr = ntoh64((h).lock_ptr); \ (h).frag_count = ntohl((h).frag_count) #define MCA_OSC_PT2PT_UNLOCK_HDR_HTON(h) \ (h).lock_type = htonl((h).lock_type); \ - (h).lock_ptr = hton64((h).lock_ptr); \ (h).frag_count = htonl((h).frag_count) #define MCA_OSC_PT2PT_LOCK_ACK_HDR_NTOH(h) \ - (h).windx = ntohs((h).windx); \ - (h).source = ntohl((h).source); \ - (h).lock_ptr = ntoh64((h).lock_ptr) + (h).source = ntohl((h).source) #define MCA_OSC_PT2PT_LOCK_ACK_HDR_HTON(h) \ - (h).windx = htonl((h).windx); \ - (h).source= htonl((h).source); \ - (h).lock_ptr = hton64((h).lock_ptr) + (h).source= htonl((h).source) -#define MCA_OSC_PT2PT_UNLOCK_ACK_HDR_NTOH(h) \ - (h).lock_ptr = ntoh64((h).lock_ptr); -#define MCA_OSC_PT2PT_UNLOCK_ACK_HDR_HTON(h) \ - (h).lock_ptr = hton64((h).lock_ptr); +#define MCA_OSC_PT2PT_UNLOCK_ACK_HDR_NTOH(h) +#define MCA_OSC_PT2PT_UNLOCK_ACK_HDR_HTON(h) #define MCA_OSC_PT2PT_COMPLETE_HDR_NTOH(h) \ (h).frag_count = ntohl((h).frag_count) @@ -289,21 +274,15 @@ typedef union ompi_osc_pt2pt_header_t ompi_osc_pt2pt_header_t; (h).frag_count = htonl((h).frag_count) #define MCA_OSC_PT2PT_FLUSH_HDR_NTOH(h) \ - (h).frag_count = ntohl((h).frag_count); \ - (h).serial_number = ntoh64((h).serial_number) + (h).frag_count = ntohl((h).frag_count) #define MCA_OSC_PT2PT_FLUSH_HDR_HTON(h) \ - (h).frag_count = htonl((h).frag_count); \ - (h).serial_number = ntoh64((h).serial_number) - -#define MCA_OSC_PT2PT_FLUSH_ACK_HDR_NTOH(h) \ - (h).serial_number = ntoh64((h).serial_number) -#define MCA_OSC_PT2PT_FLUSH_ACK_HDR_HTON(h) \ - (h).serial_number = ntoh64((h).serial_number) - -#define MCA_OSC_PT2PT_POST_HDR_NTOH(h) \ - (h).windx = ntohs((h).windx) -#define MCA_OSC_PT2PT_POST_HDR_HTON(h) \ - (h).windx = htons((h).windx) + (h).frag_count = htonl((h).frag_count) + +#define MCA_OSC_PT2PT_FLUSH_ACK_HDR_NTOH(h) +#define MCA_OSC_PT2PT_FLUSH_ACK_HDR_HTON(h) + +#define MCA_OSC_PT2PT_POST_HDR_NTOH(h) +#define MCA_OSC_PT2PT_POST_HDR_HTON(h) #define MCA_OSC_PT2PT_CSWAP_HDR_NTOH(h) \ (h).tag = ntohs((h).tag); \ diff --git a/ompi/mca/osc/pt2pt/osc_pt2pt_module.c b/ompi/mca/osc/pt2pt/osc_pt2pt_module.c index 51a4c2d79ad..0280f4738ca 100644 --- a/ompi/mca/osc/pt2pt/osc_pt2pt_module.c +++ b/ompi/mca/osc/pt2pt/osc_pt2pt_module.c @@ -8,9 +8,11 @@ * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. - * Copyright (c) 2007-2014 Los Alamos National Security, LLC. All rights + * Copyright (c) 2007-2016 Los Alamos National Security, LLC. All rights * reserved. * Copyright (c) 2012-2013 Sandia National Laboratories. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -18,37 +20,29 @@ * $HEADER$ */ -#include "ompi_config.h" - #include "osc_pt2pt.h" -#include "opal/threads/mutex.h" -#include "opal/mca/btl/btl.h" -#include "ompi/win/win.h" -#include "ompi/communicator/communicator.h" -#include "ompi/mca/osc/base/base.h" -#include "mpi.h" - -int -ompi_osc_pt2pt_attach(struct ompi_win_t *win, void *base, size_t len) +int ompi_osc_pt2pt_attach(struct ompi_win_t *win, void *base, size_t len) { return OMPI_SUCCESS; } int -ompi_osc_pt2pt_detach(struct ompi_win_t *win, void *base) +ompi_osc_pt2pt_detach(struct ompi_win_t *win, const void *base) { return OMPI_SUCCESS; } -int -ompi_osc_pt2pt_free(ompi_win_t *win) +int ompi_osc_pt2pt_free(ompi_win_t *win) { int ret = OMPI_SUCCESS; ompi_osc_pt2pt_module_t *module = GET_MODULE(win); + ompi_osc_pt2pt_peer_t *peer; + uint32_t key; + void *node; if (NULL == module) { return OMPI_SUCCESS; @@ -61,8 +55,8 @@ ompi_osc_pt2pt_free(ompi_win_t *win) /* finish with a barrier */ if (ompi_group_size(win->w_group) > 1) { - ret = module->comm->c_coll.coll_barrier(module->comm, - module->comm->c_coll.coll_barrier_module); + (void) module->comm->c_coll.coll_barrier (module->comm, + module->comm->c_coll.coll_barrier_module); } /* remove from component information */ @@ -76,42 +70,45 @@ ompi_osc_pt2pt_free(ompi_win_t *win) OBJ_DESTRUCT(&module->outstanding_locks); OBJ_DESTRUCT(&module->locks_pending); OBJ_DESTRUCT(&module->locks_pending_lock); - OBJ_DESTRUCT(&module->acc_lock); OBJ_DESTRUCT(&module->cond); OBJ_DESTRUCT(&module->lock); + OBJ_DESTRUCT(&module->all_sync); /* it is erroneous to close a window with active operations on it so we should * probably produce an error here instead of cleaning up */ OPAL_LIST_DESTRUCT(&module->pending_acc); - OPAL_LIST_DESTRUCT(&module->pending_posts); osc_pt2pt_gc_clean (module); - OPAL_LIST_DESTRUCT(&module->request_gc); OPAL_LIST_DESTRUCT(&module->buffer_gc); OBJ_DESTRUCT(&module->gc_lock); - if (NULL != module->peers) { - for (int i = 0 ; i < ompi_comm_size (module->comm) ; ++i) { - OBJ_DESTRUCT(module->peers + i); + ret = opal_hash_table_get_first_key_uint32 (&module->peer_hash, &key, (void **) &peer, &node); + while (OPAL_SUCCESS == ret) { + OBJ_RELEASE(peer); + ret = opal_hash_table_get_next_key_uint32 (&module->peer_hash, &key, (void **) &peer, node, + &node); + } + + OBJ_DESTRUCT(&module->peer_hash); + OBJ_DESTRUCT(&module->peer_lock); + + if (NULL != module->recv_frags) { + for (unsigned int i = 0 ; i < module->recv_frag_count ; ++i) { + OBJ_DESTRUCT(module->recv_frags + i); } - free(module->peers); + free (module->recv_frags); } if (NULL != module->epoch_outgoing_frag_count) free(module->epoch_outgoing_frag_count); - if (NULL != module->frag_request) { - module->frag_request->req_complete_cb = NULL; - ompi_request_cancel (module->frag_request); - ompi_request_free (&module->frag_request); - } if (NULL != module->comm) { ompi_comm_free(&module->comm); } - if (NULL != module->incoming_buffer) free (module->incoming_buffer); + if (NULL != module->free_after) free(module->free_after); free (module); - return ret; + return OMPI_SUCCESS; } diff --git a/ompi/mca/osc/pt2pt/osc_pt2pt_passive_target.c b/ompi/mca/osc/pt2pt/osc_pt2pt_passive_target.c index 9de24ae67cc..1f3bf4eb628 100644 --- a/ompi/mca/osc/pt2pt/osc_pt2pt_passive_target.c +++ b/ompi/mca/osc/pt2pt/osc_pt2pt_passive_target.c @@ -8,12 +8,12 @@ * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. - * Copyright (c) 2007-2015 Los Alamos National Security, LLC. All rights + * Copyright (c) 2007-2016 Los Alamos National Security, LLC. All rights * reserved. - * Copyright (c) 2010 IBM Corporation. All rights reserved. + * Copyright (c) 2010-2016 IBM Corporation. All rights reserved. * Copyright (c) 2012-2013 Sandia National Laboratories. All rights reserved. * Copyright (c) 2015 Intel, Inc. All rights reserved. - * Copyright (c) 2015 Research Organization for Information Science + * Copyright (c) 2015-2016 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * @@ -37,7 +37,7 @@ #include "opal/include/opal_stdint.h" static bool ompi_osc_pt2pt_lock_try_acquire (ompi_osc_pt2pt_module_t* module, int source, int lock_type, - uint64_t serial_number); + uint64_t lock_ptr); /* target-side tracking of a lock request */ struct ompi_osc_pt2pt_pending_lock_t { @@ -50,113 +50,55 @@ typedef struct ompi_osc_pt2pt_pending_lock_t ompi_osc_pt2pt_pending_lock_t; OBJ_CLASS_INSTANCE(ompi_osc_pt2pt_pending_lock_t, opal_list_item_t, NULL, NULL); - -/* origin-side tracking of a lock request */ -struct ompi_osc_pt2pt_outstanding_lock_t { - opal_list_item_t super; - int target; - int assert; - bool flushing; - int32_t lock_acks_expected; - int32_t unlock_acks_expected; - int32_t flush_acks_expected; - uint64_t serial_number; - int32_t type; -}; -typedef struct ompi_osc_pt2pt_outstanding_lock_t ompi_osc_pt2pt_outstanding_lock_t; -OBJ_CLASS_INSTANCE(ompi_osc_pt2pt_outstanding_lock_t, opal_list_item_t, - NULL, NULL); - static int ompi_osc_activate_next_lock (ompi_osc_pt2pt_module_t *module); static inline int queue_lock (ompi_osc_pt2pt_module_t *module, int requestor, int lock_type, uint64_t lock_ptr); -static int ompi_osc_pt2pt_flush_lock (ompi_osc_pt2pt_module_t *module, ompi_osc_pt2pt_outstanding_lock_t *lock, +static int ompi_osc_pt2pt_flush_lock (ompi_osc_pt2pt_module_t *module, ompi_osc_pt2pt_sync_t *lock, int target); - -/** - * Find the first outstanding lock to a target. - * - * @param[in] module - OSC PT2PT module - * @param[in] target - Target rank - * - * @returns an outstanding lock on success - * - * This function traverses the outstanding_locks list in the module - * looking for a lock that matches target. The caller must hold the - * module lock. - */ -static inline ompi_osc_pt2pt_outstanding_lock_t *find_outstanding_lock_st (ompi_osc_pt2pt_module_t *module, int target) -{ - ompi_osc_pt2pt_outstanding_lock_t *outstanding_lock, *lock = NULL; - - OPAL_LIST_FOREACH(outstanding_lock, &module->outstanding_locks, ompi_osc_pt2pt_outstanding_lock_t) { - if (outstanding_lock->target == target) { - lock = outstanding_lock; - break; - } - } - - return lock; -} - -static inline ompi_osc_pt2pt_outstanding_lock_t *find_outstanding_lock (ompi_osc_pt2pt_module_t *module, int target) -{ - ompi_osc_pt2pt_outstanding_lock_t *lock; - - OPAL_THREAD_LOCK(&module->lock); - lock = find_outstanding_lock_st (module, target); - OPAL_THREAD_UNLOCK(&module->lock); - - return lock; -} - -static inline ompi_osc_pt2pt_outstanding_lock_t *find_outstanding_lock_by_serial (ompi_osc_pt2pt_module_t *module, uint64_t serial_number) -{ - ompi_osc_pt2pt_outstanding_lock_t *outstanding_lock, *lock = NULL; - - OPAL_THREAD_LOCK(&module->lock); - OPAL_LIST_FOREACH(outstanding_lock, &module->outstanding_locks, ompi_osc_pt2pt_outstanding_lock_t) { - if (outstanding_lock->serial_number == serial_number) { - lock = outstanding_lock; - break; - } - } - OPAL_THREAD_UNLOCK(&module->lock); - - return lock; -} - -static inline int ompi_osc_pt2pt_lock_self (ompi_osc_pt2pt_module_t *module, ompi_osc_pt2pt_outstanding_lock_t *lock) +static inline int ompi_osc_pt2pt_lock_self (ompi_osc_pt2pt_module_t *module, ompi_osc_pt2pt_sync_t *lock) { const int my_rank = ompi_comm_rank (module->comm); + ompi_osc_pt2pt_peer_t *peer = ompi_osc_pt2pt_peer_lookup (module, my_rank); + int lock_type = lock->sync.lock.type; bool acquired = false; - acquired = ompi_osc_pt2pt_lock_try_acquire (module, my_rank, lock->type, (uint64_t) (uintptr_t) lock); + assert (lock->type == OMPI_OSC_PT2PT_SYNC_TYPE_LOCK); + + (void) OPAL_THREAD_ADD32(&lock->sync_expected, 1); + + acquired = ompi_osc_pt2pt_lock_try_acquire (module, my_rank, lock_type, (uint64_t) (uintptr_t) lock); if (!acquired) { /* queue the lock */ - queue_lock (module, my_rank, lock->type, (uint64_t) (uintptr_t) lock); + queue_lock (module, my_rank, lock_type, (uint64_t) (uintptr_t) lock); /* If locking local, can't be non-blocking according to the standard. We need to wait for the ack here. */ - OPAL_THREAD_LOCK(&module->lock); - while (lock->lock_acks_expected) { - opal_condition_wait(&module->cond, &module->lock); - } - OPAL_THREAD_UNLOCK(&module->lock); + ompi_osc_pt2pt_sync_wait_expected (lock); } + ompi_osc_pt2pt_peer_set_locked (peer, true); + ompi_osc_pt2pt_peer_set_eager_active (peer, true); + OPAL_OUTPUT_VERBOSE((25, ompi_osc_base_framework.framework_output, "local lock aquired")); return OMPI_SUCCESS; } -static inline void ompi_osc_pt2pt_unlock_self (ompi_osc_pt2pt_module_t *module, ompi_osc_pt2pt_outstanding_lock_t *lock) +static inline void ompi_osc_pt2pt_unlock_self (ompi_osc_pt2pt_module_t *module, ompi_osc_pt2pt_sync_t *lock) { + const int my_rank = ompi_comm_rank (module->comm); + ompi_osc_pt2pt_peer_t *peer = ompi_osc_pt2pt_peer_lookup (module, my_rank); + int lock_type = lock->sync.lock.type; + + (void) OPAL_THREAD_ADD32(&lock->sync_expected, 1); + + assert (lock->type == OMPI_OSC_PT2PT_SYNC_TYPE_LOCK); + OPAL_OUTPUT_VERBOSE((25, ompi_osc_base_framework.framework_output, "ompi_osc_pt2pt_unlock_self: unlocking myself. lock state = %d", module->lock_status)); - if (MPI_LOCK_EXCLUSIVE == lock->type) { + if (MPI_LOCK_EXCLUSIVE == lock_type) { OPAL_THREAD_ADD32(&module->lock_status, 1); ompi_osc_activate_next_lock (module); } else if (0 == OPAL_THREAD_ADD32(&module->lock_status, -1)) { @@ -166,15 +108,30 @@ static inline void ompi_osc_pt2pt_unlock_self (ompi_osc_pt2pt_module_t *module, /* need to ensure we make progress */ opal_progress(); - OPAL_THREAD_ADD32(&lock->unlock_acks_expected, -1); + ompi_osc_pt2pt_peer_set_locked (peer, false); + ompi_osc_pt2pt_peer_set_eager_active (peer, false); + + ompi_osc_pt2pt_sync_expected (lock); } -static inline int ompi_osc_pt2pt_lock_remote (ompi_osc_pt2pt_module_t *module, int target, ompi_osc_pt2pt_outstanding_lock_t *lock) +int ompi_osc_pt2pt_lock_remote (ompi_osc_pt2pt_module_t *module, int target, ompi_osc_pt2pt_sync_t *lock) { + ompi_osc_pt2pt_peer_t *peer = ompi_osc_pt2pt_peer_lookup (module, target); + int lock_type = lock->sync.lock.type; ompi_osc_pt2pt_header_lock_t lock_req; int ret; + OPAL_THREAD_LOCK(&peer->lock); + if (ompi_osc_pt2pt_peer_locked (peer)) { + OPAL_THREAD_UNLOCK(&peer->lock); + return OMPI_SUCCESS; + } + + (void) OPAL_THREAD_ADD32(&lock->sync_expected, 1); + + assert (lock->type == OMPI_OSC_PT2PT_SYNC_TYPE_LOCK); + /* generate a lock request */ lock_req.base.type = OMPI_OSC_PT2PT_HDR_TYPE_LOCK_REQ; lock_req.base.flags = OMPI_OSC_PT2PT_HDR_FLAG_VALID | OMPI_OSC_PT2PT_HDR_FLAG_PASSIVE_TARGET; @@ -182,26 +139,33 @@ static inline int ompi_osc_pt2pt_lock_remote (ompi_osc_pt2pt_module_t *module, i lock_req.padding[0] = 0; lock_req.padding[1] = 0; #endif - lock_req.lock_type = lock->type; + lock_req.lock_type = lock_type; lock_req.lock_ptr = (uint64_t) (uintptr_t) lock; OSC_PT2PT_HTON(&lock_req, module, target); - ret = ompi_osc_pt2pt_control_send (module, target, &lock_req, sizeof (lock_req)); + ret = ompi_osc_pt2pt_control_send_unbuffered (module, target, &lock_req, sizeof (lock_req)); if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) { - return ret; + OPAL_THREAD_ADD32(&lock->sync_expected, -1); + } else { + ompi_osc_pt2pt_peer_set_locked (peer, true); } - /* make sure the request gets sent, so we can start eager sending... */ - ret = ompi_osc_pt2pt_frag_flush_target (module, target); + OPAL_THREAD_UNLOCK(&peer->lock); return ret; } -static inline int ompi_osc_pt2pt_unlock_remote (ompi_osc_pt2pt_module_t *module, int target, ompi_osc_pt2pt_outstanding_lock_t *lock) +static inline int ompi_osc_pt2pt_unlock_remote (ompi_osc_pt2pt_module_t *module, int target, ompi_osc_pt2pt_sync_t *lock) { - ompi_osc_pt2pt_peer_t *peer = module->peers + target; - ompi_osc_pt2pt_header_unlock_t unlock_req; int32_t frag_count = opal_atomic_swap_32 ((int32_t *) module->epoch_outgoing_frag_count + target, -1); + ompi_osc_pt2pt_peer_t *peer = ompi_osc_pt2pt_peer_lookup (module, target); + int lock_type = lock->sync.lock.type; + ompi_osc_pt2pt_header_unlock_t unlock_req; + int ret; + + (void) OPAL_THREAD_ADD32(&lock->sync_expected, 1); + + assert (lock->type == OMPI_OSC_PT2PT_SYNC_TYPE_LOCK); unlock_req.base.type = OMPI_OSC_PT2PT_HDR_TYPE_UNLOCK_REQ; unlock_req.base.flags = OMPI_OSC_PT2PT_HDR_FLAG_VALID | OMPI_OSC_PT2PT_HDR_FLAG_PASSIVE_TARGET; @@ -210,7 +174,7 @@ static inline int ompi_osc_pt2pt_unlock_remote (ompi_osc_pt2pt_module_t *module, unlock_req.padding[1] = 0; #endif unlock_req.frag_count = frag_count; - unlock_req.lock_type = lock->type; + unlock_req.lock_type = lock_type; unlock_req.lock_ptr = (uint64_t) (uintptr_t) lock; OSC_PT2PT_HTON(&unlock_req, module, target); @@ -225,20 +189,32 @@ static inline int ompi_osc_pt2pt_unlock_remote (ompi_osc_pt2pt_module_t *module, unlock_req.frag_count)); /* send control message with unlock request and count */ - return ompi_osc_pt2pt_control_send (module, target, &unlock_req, sizeof (unlock_req)); + ret = ompi_osc_pt2pt_control_send (module, target, &unlock_req, sizeof (unlock_req)); + if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) { + return ret; + } + + ompi_osc_pt2pt_peer_set_locked (peer, false); + ompi_osc_pt2pt_peer_set_eager_active (peer, false); + + return ompi_osc_pt2pt_frag_flush_target(module, target); } -static inline int ompi_osc_pt2pt_flush_remote (ompi_osc_pt2pt_module_t *module, int target, ompi_osc_pt2pt_outstanding_lock_t *lock) +static inline int ompi_osc_pt2pt_flush_remote (ompi_osc_pt2pt_module_t *module, int target, ompi_osc_pt2pt_sync_t *lock) { - ompi_osc_pt2pt_peer_t *peer = module->peers + target; + ompi_osc_pt2pt_peer_t *peer = ompi_osc_pt2pt_peer_lookup (module, target); ompi_osc_pt2pt_header_flush_t flush_req; int32_t frag_count = opal_atomic_swap_32 ((int32_t *) module->epoch_outgoing_frag_count + target, -1); int ret; + (void) OPAL_THREAD_ADD32(&lock->sync_expected, 1); + + assert (lock->type == OMPI_OSC_PT2PT_SYNC_TYPE_LOCK); + flush_req.base.type = OMPI_OSC_PT2PT_HDR_TYPE_FLUSH_REQ; flush_req.base.flags = OMPI_OSC_PT2PT_HDR_FLAG_VALID | OMPI_OSC_PT2PT_HDR_FLAG_PASSIVE_TARGET; flush_req.frag_count = frag_count; - flush_req.serial_number = lock->serial_number; + flush_req.lock_ptr = (uint64_t) (uintptr_t) lock; /* XXX -- TODO -- since fragment are always delivered in order we do not need to count anything but long * requests. once that is done this can be removed. */ @@ -262,17 +238,16 @@ static inline int ompi_osc_pt2pt_flush_remote (ompi_osc_pt2pt_module_t *module, return ompi_osc_pt2pt_frag_flush_target (module, target); } -static int ompi_osc_pt2pt_lock_internal_execute (ompi_osc_pt2pt_module_t *module, ompi_osc_pt2pt_outstanding_lock_t *lock) +static int ompi_osc_pt2pt_lock_internal_execute (ompi_osc_pt2pt_module_t *module, ompi_osc_pt2pt_sync_t *lock) { int my_rank = ompi_comm_rank (module->comm); - int target = lock->target; - int assert = lock->assert; + int target = lock->sync.lock.target; + int assert = lock->sync.lock.assert; int ret; - if (0 == (assert & MPI_MODE_NOCHECK)) { - lock->lock_acks_expected = (-1 == target) ? ompi_comm_size (module->comm) : 1; - lock->unlock_acks_expected = lock->lock_acks_expected; + assert (lock->type == OMPI_OSC_PT2PT_SYNC_TYPE_LOCK); + if (0 == (assert & MPI_MODE_NOCHECK)) { if (my_rank != target && target != -1) { ret = ompi_osc_pt2pt_lock_remote (module, target, lock); } else { @@ -284,19 +259,9 @@ static int ompi_osc_pt2pt_lock_internal_execute (ompi_osc_pt2pt_module_t *module return ret; } - if (-1 == target) { - for (int i = 0 ; i < ompi_comm_size(module->comm) ; ++i) { - if (my_rank == i) { - continue; - } - - ret = ompi_osc_pt2pt_lock_remote (module, i, lock); - if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) { - return ret; - } - } - - } + /* for lock_all there is nothing more to do. we will lock peer's on demand */ + } else { + lock->eager_send_active = true; } return OMPI_SUCCESS; @@ -305,63 +270,91 @@ static int ompi_osc_pt2pt_lock_internal_execute (ompi_osc_pt2pt_module_t *module static int ompi_osc_pt2pt_lock_internal (int lock_type, int target, int assert, ompi_win_t *win) { ompi_osc_pt2pt_module_t *module = GET_MODULE(win); - ompi_osc_pt2pt_outstanding_lock_t *lock; - ompi_osc_pt2pt_peer_t *peer = NULL; + ompi_osc_pt2pt_sync_t *lock; int ret = OMPI_SUCCESS; - if (-1 != target) { - peer = module->peers + target; + /* Check if no_locks is set. TODO: we also need to track whether we are in an + * active target epoch. Fence can make this tricky to track. */ + if (-1 == target) { + if (module->all_sync.epoch_active) { + OPAL_OUTPUT_VERBOSE((1, ompi_osc_base_framework.framework_output, "osc/pt2pt: attempted " + "to lock all when active target epoch is %s and lock all epoch is %s. type %d", + (OMPI_OSC_PT2PT_SYNC_TYPE_LOCK != module->all_sync.type && module->all_sync.epoch_active) ? + "active" : "inactive", + (OMPI_OSC_PT2PT_SYNC_TYPE_LOCK == module->all_sync.type) ? "active" : "inactive", + module->all_sync.type)); + return OMPI_ERR_RMA_SYNC; + } + } else { + if (module->all_sync.epoch_active && (OMPI_OSC_PT2PT_SYNC_TYPE_LOCK != module->all_sync.type || MPI_LOCK_EXCLUSIVE == lock_type)) { + /* impossible to get an exclusive lock while holding a global shared lock or in a active + * target access epoch */ + return OMPI_ERR_RMA_SYNC; + } } /* Check if no_locks is set. TODO: we also need to track whether we are in an * active target epoch. Fence can make this tricky to track. */ - if (module->sc_group) { + if (module->all_sync.epoch_active || (OMPI_OSC_PT2PT_SYNC_TYPE_LOCK == module->all_sync.type && + (MPI_LOCK_EXCLUSIVE == lock_type || -1 == target))) { + OPAL_OUTPUT_VERBOSE((25, ompi_osc_base_framework.framework_output, "osc pt2pt: attempted " + "to acquire a lock on %d with type %d when active sync is %s and lock " + "all epoch is %s", target, lock_type, module->all_sync.epoch_active ? "active" : "inactive", + (OMPI_OSC_PT2PT_SYNC_TYPE_LOCK == module->all_sync.type && + (MPI_LOCK_EXCLUSIVE == lock_type || -1 == target)) ? "active" : "inactive")); return OMPI_ERR_RMA_SYNC; } + if (OMPI_OSC_PT2PT_SYNC_TYPE_FENCE == module->all_sync.type) { + /* if not communication has occurred during a fence epoch then we can enter a lock epoch + * just need to clear the all access epoch */ + module->all_sync.type = OMPI_OSC_PT2PT_SYNC_TYPE_NONE; + } + OPAL_OUTPUT_VERBOSE((25, ompi_osc_base_framework.framework_output, "osc pt2pt: lock %d %d", target, lock_type)); /* create lock item */ - lock = OBJ_NEW(ompi_osc_pt2pt_outstanding_lock_t); - if (OPAL_UNLIKELY(NULL == lock)) { - return OMPI_ERR_OUT_OF_RESOURCE; + if (-1 != target) { + lock = ompi_osc_pt2pt_sync_allocate (module); + if (OPAL_UNLIKELY(NULL == lock)) { + return OMPI_ERR_OUT_OF_RESOURCE; + } + + lock->peer_list.peer = ompi_osc_pt2pt_peer_lookup (module, target); + } else { + lock = &module->all_sync; } - lock->target = target; - lock->lock_acks_expected = 0; - lock->unlock_acks_expected = 0; - lock->serial_number = OPAL_THREAD_ADD64((int64_t *) &module->lock_serial_number, 1); - lock->type = lock_type; - lock->assert = assert; + lock->type = OMPI_OSC_PT2PT_SYNC_TYPE_LOCK; + lock->sync.lock.target = target; + lock->sync.lock.type = lock_type; + lock->sync.lock.assert = assert; + lock->num_peers = (-1 == target) ? ompi_comm_size (module->comm) : 1; + lock->sync_expected = 0; /* delay all eager sends until we've heard back.. */ OPAL_THREAD_LOCK(&module->lock); /* check for conflicting lock */ - if (find_outstanding_lock_st (module, target)) { - OBJ_RELEASE(lock); + if (ompi_osc_pt2pt_module_lock_find (module, target, NULL)) { + ompi_osc_pt2pt_sync_return (lock); OPAL_THREAD_UNLOCK(&module->lock); return OMPI_ERR_RMA_CONFLICT; } - /* when the lock ack returns we will be in an access epoch with this peer/all peers (target = -1) */ - if (-1 == target) { - module->all_access_epoch = true; - } else { - peer->access_epoch = true; - } - ++module->passive_target_access_epoch; - opal_list_append(&module->outstanding_locks, &lock->super); + ompi_osc_pt2pt_module_lock_insert (module, lock); + OPAL_THREAD_UNLOCK(&module->lock); ret = ompi_osc_pt2pt_lock_internal_execute (module, lock); if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) { - OPAL_THREAD_SCOPED_LOCK(&module->lock, - opal_list_remove_item(&module->outstanding_locks, &lock->super)); - OBJ_RELEASE(lock); + OPAL_THREAD_SCOPED_LOCK(&module->lock, ompi_osc_pt2pt_module_lock_remove (module, lock)); + if (&module->all_sync != lock) { + ompi_osc_pt2pt_sync_return (lock); + } } return ret; @@ -370,20 +363,15 @@ static int ompi_osc_pt2pt_lock_internal (int lock_type, int target, int assert, static int ompi_osc_pt2pt_unlock_internal (int target, ompi_win_t *win) { ompi_osc_pt2pt_module_t *module = GET_MODULE(win); - ompi_osc_pt2pt_outstanding_lock_t *lock = NULL; + ompi_osc_pt2pt_sync_t *lock = NULL; int my_rank = ompi_comm_rank (module->comm); - ompi_osc_pt2pt_peer_t *peer = NULL; int ret = OMPI_SUCCESS; - if (-1 != target) { - peer = module->peers + target; - } - OPAL_OUTPUT_VERBOSE((25, ompi_osc_base_framework.framework_output, "ompi_osc_pt2pt_unlock_internal: unlocking target %d", target)); OPAL_THREAD_LOCK(&module->lock); - lock = find_outstanding_lock_st (module, target); + lock = ompi_osc_pt2pt_module_lock_find (module, target, NULL); if (OPAL_UNLIKELY(NULL == lock)) { OPAL_OUTPUT_VERBOSE((25, ompi_osc_base_framework.framework_output, "ompi_osc_pt2pt_unlock: target %d is not locked in window %s", @@ -392,80 +380,77 @@ static int ompi_osc_pt2pt_unlock_internal (int target, ompi_win_t *win) return OMPI_ERR_RMA_SYNC; } - opal_list_remove_item (&module->outstanding_locks, &lock->super); - OPAL_OUTPUT_VERBOSE((25, ompi_osc_base_framework.framework_output, "ompi_osc_pt2pt_unlock_internal: lock acks still expected: %d", - lock->lock_acks_expected)); + lock->sync_expected)); /* wait until ack has arrived from target */ - while (lock->lock_acks_expected) { - opal_condition_wait(&module->cond, &module->lock); - } + ompi_osc_pt2pt_sync_wait_expected (lock); OPAL_THREAD_UNLOCK(&module->lock); OPAL_OUTPUT_VERBOSE((25, ompi_osc_base_framework.framework_output, "ompi_osc_pt2pt_unlock_internal: all lock acks received")); - if (lock->assert & MPI_MODE_NOCHECK) { - /* flush instead */ - ompi_osc_pt2pt_flush_lock (module, lock, target); - } else if (my_rank != target) { - if (-1 == target) { - /* send unlock messages to all of my peers */ - for (int i = 0 ; i < ompi_comm_size(module->comm) ; ++i) { - if (my_rank == i) { - continue; + if (!(lock->sync.lock.assert & MPI_MODE_NOCHECK)) { + if (my_rank != target) { + if (-1 == target) { + /* send unlock messages to all of my peers */ + for (int i = 0 ; i < ompi_comm_size(module->comm) ; ++i) { + ompi_osc_pt2pt_peer_t *peer = ompi_osc_pt2pt_peer_lookup (module, i); + + if (my_rank == i || !ompi_osc_pt2pt_peer_locked (peer)) { + continue; + } + + ret = ompi_osc_pt2pt_unlock_remote (module, i, lock); + if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) { + return ret; + } } - ret = ompi_osc_pt2pt_unlock_remote (module, i, lock); + ompi_osc_pt2pt_unlock_self (module, lock); + } else { + ret = ompi_osc_pt2pt_unlock_remote (module, target, lock); if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) { return ret; } } - ompi_osc_pt2pt_unlock_self (module, lock); - } else { - ret = ompi_osc_pt2pt_unlock_remote (module, target, lock); - if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) { - return ret; + /* wait for unlock acks. this signals remote completion of fragments */ + ompi_osc_pt2pt_sync_wait_expected (lock); + + /* It is possible for the unlock to finish too early before the data + * is actually present in the recv buffer (for non-contiguous datatypes) + * So make sure to wait for all of the fragments to arrive. + */ + OPAL_THREAD_LOCK(&module->lock); + while (module->outgoing_frag_count < module->outgoing_frag_signal_count) { + opal_condition_wait(&module->cond, &module->lock); } - } + OPAL_THREAD_UNLOCK(&module->lock); - /* start all sendreqs to target */ - if (-1 == target) { - ret = ompi_osc_pt2pt_frag_flush_all (module); + OPAL_OUTPUT_VERBOSE((25, ompi_osc_base_framework.framework_output, + "ompi_osc_pt2pt_unlock: unlock of %d complete", target)); } else { - ret = ompi_osc_pt2pt_frag_flush_target(module, target); - } - - if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) { - return ret; - } - - /* wait for unlock acks. this signals remote completion of fragments */ - OPAL_THREAD_LOCK(&module->lock); - while (lock->unlock_acks_expected) { - opal_condition_wait(&module->cond, &module->lock); + ompi_osc_pt2pt_unlock_self (module, lock); } - OPAL_THREAD_UNLOCK(&module->lock); - - OPAL_OUTPUT_VERBOSE((25, ompi_osc_base_framework.framework_output, - "ompi_osc_pt2pt_unlock: unlock of %d complete", target)); } else { - ompi_osc_pt2pt_unlock_self (module, lock); + /* flush instead */ + ompi_osc_pt2pt_flush_lock (module, lock, target); } OPAL_THREAD_LOCK(&module->lock); - if (-1 != target) { - peer->access_epoch = false; + ompi_osc_pt2pt_module_lock_remove (module, lock); + + if (-1 != lock->sync.lock.target) { + ompi_osc_pt2pt_sync_return (lock); } else { - module->all_access_epoch = false; + ompi_osc_pt2pt_sync_reset (lock); } + --module->passive_target_access_epoch; - OPAL_THREAD_UNLOCK(&module->lock); - OBJ_RELEASE(lock); + OPAL_THREAD_UNLOCK(&module->lock); return ret; } @@ -500,7 +485,7 @@ int ompi_osc_pt2pt_sync (struct ompi_win_t *win) return OMPI_SUCCESS; } -static int ompi_osc_pt2pt_flush_lock (ompi_osc_pt2pt_module_t *module, ompi_osc_pt2pt_outstanding_lock_t *lock, +static int ompi_osc_pt2pt_flush_lock (ompi_osc_pt2pt_module_t *module, ompi_osc_pt2pt_sync_t *lock, int target) { int ret; @@ -508,20 +493,7 @@ static int ompi_osc_pt2pt_flush_lock (ompi_osc_pt2pt_module_t *module, ompi_osc_ /* wait until ack has arrived from target, since we need to be able to eager send before we can transfer all the data... */ - OPAL_THREAD_LOCK(&module->lock); - while (lock->lock_acks_expected && lock->flushing) { - opal_condition_wait(&module->cond, &module->lock); - } - - lock->flushing = true; - - if (-1 == target) { - lock->flush_acks_expected = ompi_comm_size(module->comm) - 1; - } else { - lock->flush_acks_expected = 1; - } - - OPAL_THREAD_UNLOCK(&module->lock); + ompi_osc_pt2pt_sync_wait_expected (lock); if (-1 == target) { /* NTH: no local flush */ @@ -536,7 +508,6 @@ static int ompi_osc_pt2pt_flush_lock (ompi_osc_pt2pt_module_t *module, ompi_osc_ } } } else { - /* send control message with flush request and count */ ret = ompi_osc_pt2pt_flush_remote (module, target, lock); if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) { @@ -544,16 +515,9 @@ static int ompi_osc_pt2pt_flush_lock (ompi_osc_pt2pt_module_t *module, ompi_osc_ } } - /* wait for all the requests and the flush ack (meaning remote completion) */ - OPAL_THREAD_LOCK(&module->lock); - while (lock->flush_acks_expected) { - opal_condition_wait(&module->cond, &module->lock); - } - - lock->flushing = false; - opal_condition_broadcast(&module->cond); - - OPAL_THREAD_UNLOCK(&module->lock); + /* wait for all flush acks (meaning remote completion) */ + ompi_osc_pt2pt_sync_wait_expected (lock); + opal_condition_broadcast (&module->cond); return OMPI_SUCCESS; } @@ -561,7 +525,7 @@ static int ompi_osc_pt2pt_flush_lock (ompi_osc_pt2pt_module_t *module, ompi_osc_ int ompi_osc_pt2pt_flush (int target, struct ompi_win_t *win) { ompi_osc_pt2pt_module_t *module = GET_MODULE(win); - ompi_osc_pt2pt_outstanding_lock_t *lock; + ompi_osc_pt2pt_sync_t *lock; int ret; assert (0 <= target); @@ -580,10 +544,14 @@ int ompi_osc_pt2pt_flush (int target, struct ompi_win_t *win) return OMPI_SUCCESS; } - lock = find_outstanding_lock (module, target); + OPAL_THREAD_LOCK(&module->lock); + lock = ompi_osc_pt2pt_module_lock_find (module, target, NULL); if (NULL == lock) { - lock = find_outstanding_lock (module, -1); + if (OMPI_OSC_PT2PT_SYNC_TYPE_LOCK == module->all_sync.type) { + lock = &module->all_sync; + } } + OPAL_THREAD_UNLOCK(&module->lock); if (OPAL_UNLIKELY(NULL == lock)) { OPAL_OUTPUT_VERBOSE((25, ompi_osc_base_framework.framework_output, "ompi_osc_pt2pt_flush: target %d is not locked in window %s", @@ -600,12 +568,12 @@ int ompi_osc_pt2pt_flush (int target, struct ompi_win_t *win) int ompi_osc_pt2pt_flush_all (struct ompi_win_t *win) { ompi_osc_pt2pt_module_t *module = GET_MODULE(win); - ompi_osc_pt2pt_outstanding_lock_t *lock; - int ret = OMPI_SUCCESS; + ompi_osc_pt2pt_sync_t *lock; + int target, ret; + void *node; /* flush is only allowed from within a passive target epoch */ - if (OPAL_UNLIKELY(!module->passive_target_access_epoch || - 0 == opal_list_get_size (&module->outstanding_locks))) { + if (OPAL_UNLIKELY(!module->passive_target_access_epoch)) { OPAL_OUTPUT_VERBOSE((25, ompi_osc_base_framework.framework_output, "ompi_osc_pt2pt_flush_all: no targets are locked in window %s", win->w_name)); @@ -616,11 +584,22 @@ int ompi_osc_pt2pt_flush_all (struct ompi_win_t *win) "ompi_osc_pt2pt_flush_all entering...")); /* flush all locks */ - OPAL_LIST_FOREACH(lock, &module->outstanding_locks, ompi_osc_pt2pt_outstanding_lock_t) { - ret = ompi_osc_pt2pt_flush_lock (module, lock, lock->target); - if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) { - break; - } + ret = opal_hash_table_get_first_key_uint32 (&module->outstanding_locks, (uint32_t *) &target, + (void **) &lock, &node); + if (OPAL_SUCCESS == ret) { + do { + ret = ompi_osc_pt2pt_flush_lock (module, lock, lock->sync.lock.target); + if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) { + break; + } + + ret = opal_hash_table_get_next_key_uint32 (&module->outstanding_locks, (uint32_t *) &target, + (void **) lock, node, &node); + if (OPAL_SUCCESS != ret) { + ret = OPAL_SUCCESS; + break; + } + } while (1); } OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output, @@ -686,7 +665,7 @@ int ompi_osc_pt2pt_flush_local_all (struct ompi_win_t *win) static inline int activate_lock (ompi_osc_pt2pt_module_t *module, int requestor, uint64_t lock_ptr) { - ompi_osc_pt2pt_outstanding_lock_t *lock; + ompi_osc_pt2pt_sync_t *lock; if (ompi_comm_rank (module->comm) != requestor) { ompi_osc_pt2pt_header_lock_ack_t lock_ack; @@ -694,7 +673,6 @@ static inline int activate_lock (ompi_osc_pt2pt_module_t *module, int requestor, lock_ack.base.type = OMPI_OSC_PT2PT_HDR_TYPE_LOCK_ACK; lock_ack.base.flags = OMPI_OSC_PT2PT_HDR_FLAG_VALID; lock_ack.source = ompi_comm_rank(module->comm); - lock_ack.windx = ompi_comm_get_cid(module->comm); lock_ack.lock_ptr = lock_ptr; OSC_PT2PT_HTON(&lock_ack, module, requestor); @@ -710,15 +688,13 @@ static inline int activate_lock (ompi_osc_pt2pt_module_t *module, int requestor, OPAL_OUTPUT_VERBOSE((25, ompi_osc_base_framework.framework_output, "osc pt2pt: releasing local lock")); - lock = (ompi_osc_pt2pt_outstanding_lock_t *) (uintptr_t) lock_ptr; + lock = (ompi_osc_pt2pt_sync_t *) (uintptr_t) lock_ptr; if (OPAL_UNLIKELY(NULL == lock)) { OPAL_OUTPUT_VERBOSE((5, ompi_osc_base_framework.framework_output, "lock could not be located")); } - if (0 == OPAL_THREAD_ADD32(&lock->lock_acks_expected, -1)) { - opal_condition_broadcast (&module->cond); - } + ompi_osc_pt2pt_sync_expected (lock); return OMPI_SUCCESS; } @@ -828,65 +804,51 @@ int ompi_osc_pt2pt_process_lock (ompi_osc_pt2pt_module_t* module, int source, /* initiator-side function called when the target acks the lock request. */ void ompi_osc_pt2pt_process_lock_ack (ompi_osc_pt2pt_module_t *module, - ompi_osc_pt2pt_header_lock_ack_t *lock_ack_header) + ompi_osc_pt2pt_header_lock_ack_t *lock_ack_header) { - ompi_osc_pt2pt_peer_t *peer = module->peers + lock_ack_header->source; - ompi_osc_pt2pt_outstanding_lock_t *lock; + ompi_osc_pt2pt_peer_t *peer = ompi_osc_pt2pt_peer_lookup (module, lock_ack_header->source); + ompi_osc_pt2pt_sync_t *lock; OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output, "ompi_osc_pt2pt_process_lock_ack: processing lock ack from %d for lock %" PRIu64, lock_ack_header->source, lock_ack_header->lock_ptr)); - lock = (ompi_osc_pt2pt_outstanding_lock_t *) (uintptr_t) lock_ack_header->lock_ptr; + lock = (ompi_osc_pt2pt_sync_t *) (uintptr_t) lock_ack_header->lock_ptr; assert (NULL != lock); - /* no need to hold the lock to set this */ - peer->eager_send_active = true; - if (0 == OPAL_THREAD_ADD32(&lock->lock_acks_expected, -1)) { - opal_condition_broadcast(&module->cond); - } + ompi_osc_pt2pt_peer_set_eager_active (peer, true); - opal_condition_broadcast(&module->cond); + ompi_osc_pt2pt_sync_expected (lock); } void ompi_osc_pt2pt_process_flush_ack (ompi_osc_pt2pt_module_t *module, int source, ompi_osc_pt2pt_header_flush_ack_t *flush_ack_header) { - ompi_osc_pt2pt_outstanding_lock_t *lock; + ompi_osc_pt2pt_sync_t *lock; OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output, - "ompi_osc_pt2pt_process_flush_ack: processing flush ack from %d for lock %" PRIu64, - source, flush_ack_header->serial_number)); + "ompi_osc_pt2pt_process_flush_ack: processing flush ack from %d for lock 0x%" PRIx64, + source, flush_ack_header->lock_ptr)); - /* NTH: need to verify that this will work as expected */ - lock = find_outstanding_lock_by_serial (module, flush_ack_header->serial_number); + lock = (ompi_osc_pt2pt_sync_t *) (uintptr_t) flush_ack_header->lock_ptr; assert (NULL != lock); - if (0 == OPAL_THREAD_ADD32(&lock->flush_acks_expected, -1)) { - opal_condition_broadcast(&module->cond); - } - - opal_condition_broadcast(&module->cond); + ompi_osc_pt2pt_sync_expected (lock); } void ompi_osc_pt2pt_process_unlock_ack (ompi_osc_pt2pt_module_t *module, int source, - ompi_osc_pt2pt_header_unlock_ack_t *unlock_ack_header) + ompi_osc_pt2pt_header_unlock_ack_t *unlock_ack_header) { - ompi_osc_pt2pt_peer_t *peer = module->peers + source; - ompi_osc_pt2pt_outstanding_lock_t *lock; + ompi_osc_pt2pt_sync_t *lock; OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output, "ompi_osc_pt2pt_process_unlock_ack: processing unlock ack from %d", source)); /* NTH: need to verify that this will work as expected */ - lock = (ompi_osc_pt2pt_outstanding_lock_t *) (intptr_t) unlock_ack_header->lock_ptr; + lock = (ompi_osc_pt2pt_sync_t *) (intptr_t) unlock_ack_header->lock_ptr; assert (NULL != lock); - peer->eager_send_active = false; - - if (0 == OPAL_THREAD_ADD32(&lock->unlock_acks_expected, -1)) { - opal_condition_broadcast(&module->cond); - } + ompi_osc_pt2pt_sync_expected (lock); } /** @@ -902,12 +864,14 @@ void ompi_osc_pt2pt_process_unlock_ack (ompi_osc_pt2pt_module_t *module, int sou * active a pending lock if the lock becomes free. */ int ompi_osc_pt2pt_process_unlock (ompi_osc_pt2pt_module_t *module, int source, - ompi_osc_pt2pt_header_unlock_t *unlock_header) + ompi_osc_pt2pt_header_unlock_t *unlock_header) { + ompi_osc_pt2pt_peer_t *peer = ompi_osc_pt2pt_peer_lookup (module, source); ompi_osc_pt2pt_header_unlock_ack_t unlock_ack; - ompi_osc_pt2pt_peer_t *peer = module->peers + source; int ret; + assert (NULL != peer); + OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output, "ompi_osc_pt2pt_process_unlock entering (passive_incoming_frag_count: %d)...", peer->passive_incoming_frag_count)); @@ -949,11 +913,13 @@ int ompi_osc_pt2pt_process_unlock (ompi_osc_pt2pt_module_t *module, int source, } int ompi_osc_pt2pt_process_flush (ompi_osc_pt2pt_module_t *module, int source, - ompi_osc_pt2pt_header_flush_t *flush_header) + ompi_osc_pt2pt_header_flush_t *flush_header) { - ompi_osc_pt2pt_peer_t *peer = module->peers + source; + ompi_osc_pt2pt_peer_t *peer = ompi_osc_pt2pt_peer_lookup (module, source); ompi_osc_pt2pt_header_flush_ack_t flush_ack; + assert (NULL != peer); + OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output, "ompi_osc_pt2pt_process_flush entering (passive_incoming_frag_count: %d)...", peer->passive_incoming_frag_count)); @@ -965,7 +931,7 @@ int ompi_osc_pt2pt_process_flush (ompi_osc_pt2pt_module_t *module, int source, flush_ack.base.type = OMPI_OSC_PT2PT_HDR_TYPE_FLUSH_ACK; flush_ack.base.flags = OMPI_OSC_PT2PT_HDR_FLAG_VALID; - flush_ack.serial_number = flush_header->serial_number; + flush_ack.lock_ptr = flush_header->lock_ptr; OSC_PT2PT_HTON(&flush_ack, module, source); return ompi_osc_pt2pt_control_send_unbuffered (module, source, &flush_ack, sizeof (flush_ack)); diff --git a/ompi/mca/osc/pt2pt/osc_pt2pt_request.c b/ompi/mca/osc/pt2pt/osc_pt2pt_request.c index eddccf5b426..0f817d00d70 100644 --- a/ompi/mca/osc/pt2pt/osc_pt2pt_request.c +++ b/ompi/mca/osc/pt2pt/osc_pt2pt_request.c @@ -3,6 +3,9 @@ * Copyright (c) 2011-2012 Sandia National Laboratories. All rights reserved. * Copyright (c) 2014 Los Alamos National Security, LLC. All rights * reserved. + * Copyright (c) 2016 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -32,7 +35,7 @@ request_free(struct ompi_request_t **ompi_req) ompi_osc_pt2pt_request_t *request = (ompi_osc_pt2pt_request_t*) *ompi_req; - if (true != request->super.req_complete) { + if (REQUEST_COMPLETED != request->super.req_complete) { return MPI_ERR_REQUEST; } @@ -51,6 +54,7 @@ request_construct(ompi_osc_pt2pt_request_t *request) request->super.req_status._cancelled = 0; request->super.req_free = request_free; request->super.req_cancel = request_cancel; + request->outstanding_requests = 0; } OBJ_CLASS_INSTANCE(ompi_osc_pt2pt_request_t, diff --git a/ompi/mca/osc/pt2pt/osc_pt2pt_request.h b/ompi/mca/osc/pt2pt/osc_pt2pt_request.h index 1c0db9e1234..dee5c86892d 100644 --- a/ompi/mca/osc/pt2pt/osc_pt2pt_request.h +++ b/ompi/mca/osc/pt2pt/osc_pt2pt_request.h @@ -1,8 +1,10 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ /* * Copyright (c) 2012 Sandia National Laboratories. All rights reserved. - * Copyright (c) 2014-2015 Los Alamos National Security, LLC. All rights + * Copyright (c) 2014-2016 Los Alamos National Security, LLC. All rights * reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -22,7 +24,7 @@ struct ompi_osc_pt2pt_request_t { ompi_request_t super; int type; - void *origin_addr; + const void *origin_addr; int origin_count; struct ompi_datatype_t *origin_dt; ompi_osc_pt2pt_module_t* module; @@ -55,6 +57,7 @@ OBJ_CLASS_DECLARATION(ompi_osc_pt2pt_request_t); #define OMPI_OSC_PT2PT_REQUEST_RETURN(req) \ do { \ OMPI_REQUEST_FINI(&(req)->super); \ + (req)->outstanding_requests = 0; \ opal_free_list_return (&mca_osc_pt2pt_component.requests, \ (opal_free_list_item_t *) (req)); \ } while (0) diff --git a/ompi/mca/osc/pt2pt/osc_pt2pt_sync.c b/ompi/mca/osc/pt2pt/osc_pt2pt_sync.c new file mode 100644 index 00000000000..7e289148016 --- /dev/null +++ b/ompi/mca/osc/pt2pt/osc_pt2pt_sync.c @@ -0,0 +1,93 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ +/* + * Copyright (c) 2015 Los Alamos National Security, LLC. All rights + * reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include "osc_pt2pt.h" +#include "osc_pt2pt_sync.h" + +static void ompi_osc_pt2pt_sync_constructor (ompi_osc_pt2pt_sync_t *sync) +{ + sync->type = OMPI_OSC_PT2PT_SYNC_TYPE_NONE; + sync->eager_send_active = false; + sync->epoch_active = false; + OBJ_CONSTRUCT(&sync->lock, opal_mutex_t); + OBJ_CONSTRUCT(&sync->cond, opal_condition_t); +} + +static void ompi_osc_pt2pt_sync_destructor (ompi_osc_pt2pt_sync_t *sync) +{ + OBJ_DESTRUCT(&sync->lock); + OBJ_DESTRUCT(&sync->cond); +} + +OBJ_CLASS_INSTANCE(ompi_osc_pt2pt_sync_t, opal_free_list_item_t, + ompi_osc_pt2pt_sync_constructor, + ompi_osc_pt2pt_sync_destructor); + +ompi_osc_pt2pt_sync_t *ompi_osc_pt2pt_sync_allocate (struct ompi_osc_pt2pt_module_t *module) +{ + ompi_osc_pt2pt_sync_t *sync; + + /* module is not used yet */ + (void) module; + + sync = OBJ_NEW (ompi_osc_pt2pt_sync_t); + if (OPAL_UNLIKELY(NULL == sync)) { + return NULL; + } + + sync->module = module; + return sync; +} + +void ompi_osc_pt2pt_sync_return (ompi_osc_pt2pt_sync_t *sync) +{ + OBJ_RELEASE(sync); +} + +static inline bool ompi_osc_pt2pt_sync_array_peer (int rank, ompi_osc_pt2pt_peer_t **peers, size_t nranks, + struct ompi_osc_pt2pt_peer_t **peer) +{ + int mid = nranks / 2; + + /* base cases */ + if (0 == nranks || (1 == nranks && peers[0]->rank != rank)) { + if (peer) { + *peer = NULL; + } + return false; + } else if (peers[0]->rank == rank) { + if (peer) { + *peer = peers[0]; + } + return true; + } + + if (peers[mid]->rank > rank) { + return ompi_osc_pt2pt_sync_array_peer (rank, peers, mid, peer); + } + + return ompi_osc_pt2pt_sync_array_peer (rank, peers + mid, nranks - mid, peer); +} + +bool ompi_osc_pt2pt_sync_pscw_peer (ompi_osc_pt2pt_module_t *module, int target, struct ompi_osc_pt2pt_peer_t **peer) +{ + ompi_osc_pt2pt_sync_t *pt2pt_sync = &module->all_sync; + + /* check synchronization type */ + if (OMPI_OSC_PT2PT_SYNC_TYPE_PSCW != pt2pt_sync->type) { + if (peer) { + *peer = NULL; + } + return false; + } + + return ompi_osc_pt2pt_sync_array_peer (target, pt2pt_sync->peer_list.peers, pt2pt_sync->num_peers, peer); +} diff --git a/ompi/mca/osc/pt2pt/osc_pt2pt_sync.h b/ompi/mca/osc/pt2pt/osc_pt2pt_sync.h new file mode 100644 index 00000000000..87bd1c45ad2 --- /dev/null +++ b/ompi/mca/osc/pt2pt/osc_pt2pt_sync.h @@ -0,0 +1,184 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ +/* + * Copyright (c) 2015-2016 Los Alamos National Security, LLC. All rights + * reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#ifndef OMPI_OSC_PT2PT_SYNC_H +#define OMPI_OSC_PT2PT_SYNC_H + +#include "ompi_config.h" +#include "opal/class/opal_free_list.h" +#include "opal/threads/threads.h" + +enum ompi_osc_pt2pt_sync_type_t { + /** default value */ + OMPI_OSC_PT2PT_SYNC_TYPE_NONE, + /** lock access epoch */ + OMPI_OSC_PT2PT_SYNC_TYPE_LOCK, + /** fence access epoch */ + OMPI_OSC_PT2PT_SYNC_TYPE_FENCE, + /* post-start-complete-wait access epoch */ + OMPI_OSC_PT2PT_SYNC_TYPE_PSCW, +}; +typedef enum ompi_osc_pt2pt_sync_type_t ompi_osc_pt2pt_sync_type_t; + +struct ompi_osc_pt2pt_module_t; +struct ompi_osc_pt2pt_peer_t; + +/** + * @brief synchronization object + * + * This structure holds information about an access epoch. + */ +struct ompi_osc_pt2pt_sync_t { + opal_free_list_item_t super; + + struct ompi_osc_pt2pt_module_t *module; + + /** synchronization type */ + ompi_osc_pt2pt_sync_type_t type; + + /** synchronization data */ + union { + /** lock specific synchronization data */ + struct { + /** lock target rank (-1 for all) */ + int target; + /** lock type: MPI_LOCK_SHARED, MPI_LOCK_EXCLUSIVE */ + int type; + /** assert specified at lock acquire time */ + int assert; + } lock; + /** post/start/complete/wait specific synchronization data */ + struct { + /** group passed to ompi_osc_pt2pt_start */ + ompi_group_t *group; + } pscw; + } sync; + + /** array of peers for this sync */ + union { + /** multiple peers (lock all, pscw, fence) */ + struct ompi_osc_pt2pt_peer_t **peers; + /** single peer (targeted lock) */ + struct ompi_osc_pt2pt_peer_t *peer; + } peer_list; + + /** number of peers */ + int num_peers; + + /** number of synchronization messages expected */ + volatile int32_t sync_expected; + + /** eager sends are active to all peers in this access epoch */ + volatile bool eager_send_active; + + /** communication has started on this epoch */ + bool epoch_active; + + /** lock to protect sync structure members */ + opal_mutex_t lock; + + /** condition variable for changes in the sync object */ + opal_condition_t cond; +}; +typedef struct ompi_osc_pt2pt_sync_t ompi_osc_pt2pt_sync_t; + +OBJ_CLASS_DECLARATION(ompi_osc_pt2pt_sync_t); + +/** + * @brief allocate a new synchronization object + * + * @param[in] module osc pt2pt module + * + * @returns NULL on failure + * @returns a new synchronization object on success + */ +ompi_osc_pt2pt_sync_t *ompi_osc_pt2pt_sync_allocate (struct ompi_osc_pt2pt_module_t *module); + +/** + * @brief release a synchronization object + * + * @param[in] pt2pt_sync synchronization object allocated by ompi_osc_pt2pt_sync_allocate() + */ +void ompi_osc_pt2pt_sync_return (ompi_osc_pt2pt_sync_t *pt2pt_sync); + +/** + * Check if the target is part of a PSCW access epoch + * + * @param[in] module osc pt2pt module + * @param[in] target target rank + * @param[out] peer peer object + * + * @returns false if the window is not in a PSCW access epoch or the peer is not + * in the group passed to MPI_Win_start + * @returns true otherwise + * + * This functions verifies the target is part of an active PSCW access epoch. + */ +bool ompi_osc_pt2pt_sync_pscw_peer (struct ompi_osc_pt2pt_module_t *module, int target, struct ompi_osc_pt2pt_peer_t **peer); + +/** + * Wait for all remote peers in the synchronization to respond + */ +static inline void ompi_osc_pt2pt_sync_wait (ompi_osc_pt2pt_sync_t *sync) +{ + OPAL_THREAD_LOCK(&sync->lock); + while (!sync->eager_send_active) { + OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output, + "waiting for access epoch to start")); + opal_condition_wait(&sync->cond, &sync->lock); + } + OPAL_THREAD_UNLOCK(&sync->lock); + + OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output, + "access epoch ready")); +} + +/** + * Wait for all remote peers in the synchronization to respond + */ +static inline void ompi_osc_pt2pt_sync_wait_expected (ompi_osc_pt2pt_sync_t *sync) +{ + OPAL_THREAD_LOCK(&sync->lock); + while (sync->sync_expected) { + OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output, + "waiting for %d syncronization messages", + sync->sync_expected)); + opal_condition_wait(&sync->cond, &sync->lock); + } + OPAL_THREAD_UNLOCK(&sync->lock); + + OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output, + "all synchronization messages received")); +} + +static inline void ompi_osc_pt2pt_sync_expected (ompi_osc_pt2pt_sync_t *sync) +{ + int32_t new_value = OPAL_THREAD_ADD32 (&sync->sync_expected, -1); + if (0 == new_value) { + OPAL_THREAD_LOCK(&sync->lock); + if (!(sync->type == OMPI_OSC_PT2PT_SYNC_TYPE_LOCK && sync->num_peers > 1)) { + sync->eager_send_active = true; + } + opal_condition_broadcast (&sync->cond); + OPAL_THREAD_UNLOCK(&sync->lock); + } +} + +static inline void ompi_osc_pt2pt_sync_reset (ompi_osc_pt2pt_sync_t *sync) +{ + sync->type = OMPI_OSC_PT2PT_SYNC_TYPE_NONE; + sync->eager_send_active = false; + sync->epoch_active = 0; + sync->peer_list.peers = NULL; + sync->sync.pscw.group = NULL; +} + +#endif /* OMPI_OSC_PT2PT_SYNC_H */ diff --git a/ompi/mca/osc/pt2pt/owner.txt b/ompi/mca/osc/pt2pt/owner.txt index e6150b6b0fc..48ac538cbb0 100644 --- a/ompi/mca/osc/pt2pt/owner.txt +++ b/ompi/mca/osc/pt2pt/owner.txt @@ -3,5 +3,5 @@ # owner: institution that is responsible for this package # status: e.g. active, maintenance, unmaintained # -owner: project +owner: LANL status: active diff --git a/ompi/mca/osc/rdma/Makefile.am b/ompi/mca/osc/rdma/Makefile.am new file mode 100644 index 00000000000..80082a0e711 --- /dev/null +++ b/ompi/mca/osc/rdma/Makefile.am @@ -0,0 +1,64 @@ +# +# Copyright (c) 2004-2005 The Trustees of Indiana University. +# All rights reserved. +# Copyright (c) 2004-2005 The Trustees of the University of Tennessee. +# All rights reserved. +# Copyright (c) 2004-2009 High Performance Computing Center Stuttgart, +# University of Stuttgart. All rights reserved. +# Copyright (c) 2004-2005 The Regents of the University of California. +# All rights reserved. +# Copyright (c) 2010 Cisco Systems, Inc. All rights reserved. +# Copyright (c) 2014-2015 Los Alamos National Security, LLC. All rights +# reserved. +# $COPYRIGHT$ +# +# Additional copyrights may follow +# +# $HEADER$ +# + +rdma_sources = \ + osc_rdma.h \ + osc_rdma_module.c \ + osc_rdma_comm.h \ + osc_rdma_comm.c \ + osc_rdma_accumulate.c \ + osc_rdma_accumulate.h \ + osc_rdma_component.c \ + osc_rdma_frag.h \ + osc_rdma_frag.c \ + osc_rdma_request.h \ + osc_rdma_request.c \ + osc_rdma_active_target.h \ + osc_rdma_active_target.c \ + osc_rdma_passive_target.h \ + osc_rdma_passive_target.c \ + osc_rdma_lock.h \ + osc_rdma_peer.h \ + osc_rdma_peer.c \ + osc_rdma_dynamic.h \ + osc_rdma_dynamic.c \ + osc_rdma_sync.h \ + osc_rdma_sync.c \ + osc_rdma_types.h + +# Make the output library in this directory, and name it either +# mca__.la (for DSO builds) or libmca__.la +# (for static builds). + +if MCA_BUILD_ompi_osc_rdma_DSO +component_noinst = +component_install = mca_osc_rdma.la +else +component_noinst = libmca_osc_rdma.la +component_install = +endif + +mcacomponentdir = $(ompilibdir) +mcacomponent_LTLIBRARIES = $(component_install) +mca_osc_rdma_la_SOURCES = $(rdma_sources) +mca_osc_rdma_la_LDFLAGS = -module -avoid-version + +noinst_LTLIBRARIES = $(component_noinst) +libmca_osc_rdma_la_SOURCES = $(rdma_sources) +libmca_osc_rdma_la_LDFLAGS = -module -avoid-version diff --git a/ompi/mca/osc/rdma/configure.m4 b/ompi/mca/osc/rdma/configure.m4 new file mode 100644 index 00000000000..47bd082413c --- /dev/null +++ b/ompi/mca/osc/rdma/configure.m4 @@ -0,0 +1,26 @@ +# -*- shell-script -*- +# +# Copyright (c) 2013 Sandia National Laboratories. All rights reserved. +# $COPYRIGHT$ +# +# Additional copyrights may follow +# +# $HEADER$ +# + +# MCA_ompi_osc_rdma_POST_CONFIG(will_build) +# ---------------------------------------- +# Only require the tag if we're actually going to be built, since bml +# is one of the ones frequently disabled for large installs. +AC_DEFUN([MCA_ompi_osc_rdma_POST_CONFIG], [ + AS_IF([test "$1" = "1"], [OMPI_REQUIRE_ENDPOINT_TAG([BML])]) +])dnl + +# MCA_ompi_osc_rdma_CONFIG(action-if-can-compile, +# [action-if-cant-compile]) +# ------------------------------------------------ +# We can always build, unless we were explicitly disabled. +AC_DEFUN([MCA_ompi_osc_rdma_CONFIG],[ + AC_CONFIG_FILES([ompi/mca/osc/rdma/Makefile]) + [$1] +])dnl diff --git a/ompi/mca/osc/rdma/osc_rdma.h b/ompi/mca/osc/rdma/osc_rdma.h new file mode 100644 index 00000000000..4f62e8242b4 --- /dev/null +++ b/ompi/mca/osc/rdma/osc_rdma.h @@ -0,0 +1,522 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ +/* + * Copyright (c) 2004-2005 The Trustees of Indiana University. + * All rights reserved. + * Copyright (c) 2004-2006 The Trustees of the University of Tennessee. + * All rights reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2007-2017 Los Alamos National Security, LLC. All rights + * reserved. + * Copyright (c) 2010 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2012-2013 Sandia National Laboratories. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#ifndef OMPI_OSC_RDMA_H +#define OMPI_OSC_RDMA_H + +#include "ompi_config.h" +#include "opal/class/opal_free_list.h" +#include "opal/class/opal_hash_table.h" +#include "opal/threads/threads.h" +#include "opal/util/output.h" + +#include "opal/mca/shmem/shmem.h" +#include "opal/mca/shmem/base/base.h" + +#include "ompi/win/win.h" +#include "ompi/communicator/communicator.h" +#include "ompi/datatype/ompi_datatype.h" +#include "ompi/request/request.h" +#include "ompi/mca/osc/osc.h" +#include "ompi/mca/osc/base/base.h" +#include "opal/mca/btl/btl.h" +#include "ompi/memchecker.h" +#include "ompi/op/op.h" +#include "opal/align.h" + +#include "osc_rdma_types.h" +#include "osc_rdma_sync.h" + +#include "osc_rdma_peer.h" + +#include "opal_stdint.h" + +/** + * @brief osc rdma component structure + */ +struct ompi_osc_rdma_component_t { + /** Extend the basic osc component interface */ + ompi_osc_base_component_t super; + + /** lock access to modules */ + opal_mutex_t lock; + + /** cid -> module mapping */ + opal_hash_table_t modules; + + /** free list of ompi_osc_rdma_frag_t structures */ + opal_free_list_t frags; + + /** Free list of requests */ + opal_free_list_t requests; + + /** RDMA component buffer size */ + unsigned int buffer_size; + + /** aggregation limit */ + unsigned int aggregation_limit; + + /** List of requests that need to be freed */ + opal_list_t request_gc; + + /** List of buffers that need to be freed */ + opal_list_t buffer_gc; + + /** Maximum number of segments that can be attached to a dynamic window */ + unsigned int max_attach; + + /** Default value of the no_locks info key for new windows */ + bool no_locks; + + /** Accumulate operations will only operate on a single intrinsic datatype */ + bool acc_single_intrinsic; + + /** Use network AMOs when available */ + bool acc_use_amo; + + /** Priority of the osc/rdma component */ + unsigned int priority; + + /** aggregation free list */ + opal_free_list_t aggregate; +}; +typedef struct ompi_osc_rdma_component_t ompi_osc_rdma_component_t; + +struct ompi_osc_rdma_frag_t; + +/** + * @brief osc rdma module structure + * + * Each MPI window is associated with a single osc module. This struct + * stores the data relevant to the osc/rdma component. + */ +struct ompi_osc_rdma_module_t { + /** Extend the basic osc module interface */ + ompi_osc_base_module_t super; + + /** pointer back to MPI window */ + struct ompi_win_t *win; + + /** Mutex lock protecting module data */ + opal_mutex_t lock; + + + /* window configuration */ + + /** value of same_disp_unit info key for this window */ + bool same_disp_unit; + + /** value of same_size info key for this window */ + bool same_size; + + /** passive-target synchronization will not be used in this window */ + bool no_locks; + + bool acc_single_intrinsic; + + bool acc_use_amo; + + /** flavor of this window */ + int flavor; + + /** size of local window */ + size_t size; + + /** Local displacement unit. */ + int disp_unit; + + + /** global leader */ + ompi_osc_rdma_peer_t *leader; + + /** pointer to free on cleanup (may be NULL) */ + void *free_after; + + /** local state structure (shared memory) */ + ompi_osc_rdma_state_t *state; + + /** node-level communication data (shared memory) */ + unsigned char *node_comm_info; + + /* only relevant on the lowest rank on each node (shared memory) */ + ompi_osc_rdma_rank_data_t *rank_array; + + + /** communicator created with this window. This is the cid used + * in the component's modules mapping. */ + ompi_communicator_t *comm; + + /* temporary communicators for window initialization */ + ompi_communicator_t *local_leaders; + ompi_communicator_t *shared_comm; + + /** node id of this rank */ + int node_id; + + /** number of nodes */ + int node_count; + + /** handle valid for local state (valid for local data for MPI_Win_allocate) */ + mca_btl_base_registration_handle_t *state_handle; + + /** registration handle for the window base (only used for MPI_Win_create) */ + mca_btl_base_registration_handle_t *base_handle; + + /** size of a region */ + size_t region_size; + + /** size of the state structure */ + size_t state_size; + + /** offset in the shared memory segment where the state array starts */ + size_t state_offset; + + /* ********************* sync data ************************ */ + + /** global sync object (PSCW, fence, lock all) */ + ompi_osc_rdma_sync_t all_sync; + + /** current group associate with pscw exposure epoch */ + struct ompi_group_t *pw_group; + + /** list of unmatched post messages */ + opal_list_t pending_posts; + + /* ********************* LOCK data ************************ */ + + /** number of outstanding locks */ + osc_rdma_counter_t passive_target_access_epoch; + + /** origin side list of locks currently outstanding */ + opal_hash_table_t outstanding_locks; + + /** array of locks (small jobs) */ + ompi_osc_rdma_sync_t **outstanding_lock_array; + + + /* ******************* peer storage *********************** */ + + /** hash table of allocated peers */ + opal_hash_table_t peer_hash; + + /** array of allocated peers (small jobs) */ + ompi_osc_rdma_peer_t **peer_array; + + /** lock for peer hash table/array */ + opal_mutex_t peer_lock; + + + /** BTL in use */ + struct mca_btl_base_module_t *selected_btl; + + /** registered fragment used for locally buffered RDMA transfers */ + struct ompi_osc_rdma_frag_t *rdma_frag; + + /** registration handles for dynamically attached regions. These are not stored + * in the state structure as it is entirely local. */ + ompi_osc_rdma_handle_t *dynamic_handles; + + /** shared memory segment. this segment holds this node's portion of the rank -> node + * mapping array, node communication data (node_comm_info), state for all local ranks, + * and data for all local ranks (MPI_Win_allocate only) */ + void *segment_base; + + /** opal shared memory structure for the shared memory segment */ + opal_shmem_ds_t seg_ds; + + + /* performance values */ + + /** number of times a put had to be retried */ + unsigned long put_retry_count; + + /** number of time a get had to be retried */ + unsigned long get_retry_count; +}; +typedef struct ompi_osc_rdma_module_t ompi_osc_rdma_module_t; +OMPI_MODULE_DECLSPEC extern ompi_osc_rdma_component_t mca_osc_rdma_component; + +#define GET_MODULE(win) ((ompi_osc_rdma_module_t*) win->w_osc_module) + +int ompi_osc_rdma_free (struct ompi_win_t *win); + + +/* peer functions */ + +/** + * @brief cache a peer object + * + * @param[in] module osc rdma module + * @param[in] peer peer object to cache + * + * @returns OMPI_SUCCESS on success + * @returns OMPI_ERR_OUT_OF_RESOURCE on failure + */ +int ompi_osc_module_add_peer (ompi_osc_rdma_module_t *module, ompi_osc_rdma_peer_t *peer); + +/** + * @brief check if a peer object is cached for a remote rank + * + * @param[in] module osc rdma module + * @param[in] peer_id remote peer rank + * + * @returns peer object on success + * @returns NULL if a peer object is not cached for the peer + */ +static inline ompi_osc_rdma_peer_t *ompi_osc_module_get_peer (ompi_osc_rdma_module_t *module, int peer_id) +{ + if (NULL == module->peer_array) { + ompi_osc_rdma_peer_t *peer = NULL; + (void) opal_hash_table_get_value_uint32 (&module->peer_hash, peer_id, (void **) &peer); + return peer; + } + + return module->peer_array[peer_id]; +} + +/** + * @brief get the peer object for a remote rank + * + * @param[in] module osc rdma module + * @param[in] peer_id remote peer rank + */ +static inline ompi_osc_rdma_peer_t *ompi_osc_rdma_module_peer (ompi_osc_rdma_module_t *module, int peer_id) +{ + ompi_osc_rdma_peer_t *peer; + + peer = ompi_osc_module_get_peer (module, peer_id); + if (NULL != peer) { + return peer; + } + + return ompi_osc_rdma_peer_lookup (module, peer_id); +} + +/** + * @brief check if this process has this process is in a passive target access epoch + * + * @param[in] module osc rdma module + */ +static inline bool ompi_osc_rdma_in_passive_epoch (ompi_osc_rdma_module_t *module) +{ + return 0 != module->passive_target_access_epoch; +} + +static inline int _ompi_osc_rdma_register (ompi_osc_rdma_module_t *module, struct mca_btl_base_endpoint_t *endpoint, void *ptr, + size_t size, uint32_t flags, mca_btl_base_registration_handle_t **handle, int line, const char *file) +{ + if (module->selected_btl->btl_register_mem) { + OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_INFO, "registering segment with btl. range: %p - %p (%lu bytes)", + ptr, (char *) ptr + size, size); + + *handle = module->selected_btl->btl_register_mem (module->selected_btl, endpoint, ptr, size, flags); + if (OPAL_UNLIKELY(NULL == *handle)) { + OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_DEBUG, "failed to register pointer with selected BTL. base: %p, " + "size: %lu. file: %s, line: %d", ptr, (unsigned long) size, file, line); + return OMPI_ERR_OUT_OF_RESOURCE; + } + } else { + *handle = NULL; + } + + return OMPI_SUCCESS; +} + +#define ompi_osc_rdma_register(...) _ompi_osc_rdma_register(__VA_ARGS__, __LINE__, __FILE__) + +static inline void _ompi_osc_rdma_deregister (ompi_osc_rdma_module_t *module, mca_btl_base_registration_handle_t *handle, int line, const char *file) +{ + if (handle) { + module->selected_btl->btl_deregister_mem (module->selected_btl, handle); + } +} + +#define ompi_osc_rdma_deregister(...) _ompi_osc_rdma_deregister(__VA_ARGS__, __LINE__, __FILE__) + +static inline void ompi_osc_rdma_progress (ompi_osc_rdma_module_t *module) { + opal_progress (); +} + +/** + * Find the first outstanding lock of the target. + * + * @param[in] module osc rdma module + * @param[in] target target rank + * @param[out] peer peer object associated with the target + * + * @returns an outstanding lock on success + * + * This function looks for an outstanding lock to the target. If a lock exists it is returned. + */ +static inline ompi_osc_rdma_sync_t *ompi_osc_rdma_module_lock_find (ompi_osc_rdma_module_t *module, int target, + ompi_osc_rdma_peer_t **peer) +{ + ompi_osc_rdma_sync_t *outstanding_lock = NULL; + + if (OPAL_LIKELY(NULL != module->outstanding_lock_array)) { + outstanding_lock = module->outstanding_lock_array[target]; + } else { + (void) opal_hash_table_get_value_uint32 (&module->outstanding_locks, (uint32_t) target, (void **) &outstanding_lock); + } + + if (NULL != outstanding_lock && peer) { + *peer = outstanding_lock->peer_list.peer; + } + + return outstanding_lock; +} + +/** + * Add an outstanding lock + * + * @param[in] module osc rdma module + * @param[in] lock lock object + * + * This function inserts a lock object to the list of outstanding locks. The caller must be holding the module + * lock. + */ +static inline void ompi_osc_rdma_module_lock_insert (struct ompi_osc_rdma_module_t *module, ompi_osc_rdma_sync_t *lock) +{ + if (OPAL_LIKELY(NULL != module->outstanding_lock_array)) { + module->outstanding_lock_array[lock->sync.lock.target] = lock; + } else { + (void) opal_hash_table_set_value_uint32 (&module->outstanding_locks, (uint32_t) lock->sync.lock.target, (void *) lock); + } +} + + +/** + * Remove an outstanding lock + * + * @param[in] module osc rdma module + * @param[in] lock lock object + * + * This function removes a lock object to the list of outstanding locks. The caller must be holding the module + * lock. + */ +static inline void ompi_osc_rdma_module_lock_remove (struct ompi_osc_rdma_module_t *module, ompi_osc_rdma_sync_t *lock) +{ + if (OPAL_LIKELY(NULL != module->outstanding_lock_array)) { + module->outstanding_lock_array[lock->sync.lock.target] = NULL; + } else { + (void) opal_hash_table_remove_value_uint32 (&module->outstanding_locks, (uint32_t) lock->sync.lock.target); + } +} + +/** + * Lookup a synchronization object associated with the target + * + * @param[in] module osc rdma module + * @param[in] target target rank + * @param[out] peer peer object + * + * @returns NULL if the target is not locked, fenced, or part of a pscw sync + * @returns synchronization object on success + * + * This function returns the synchronization object associated with an access epoch for + * the target. If the target is not part of any current access epoch then NULL is returned. + */ +static inline ompi_osc_rdma_sync_t *ompi_osc_rdma_module_sync_lookup (ompi_osc_rdma_module_t *module, int target, struct ompi_osc_rdma_peer_t **peer) +{ + OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_TRACE, "looking for synchronization object for target %d", target); + + switch (module->all_sync.type) { + case OMPI_OSC_RDMA_SYNC_TYPE_NONE: + if (!module->no_locks) { + return ompi_osc_rdma_module_lock_find (module, target, peer); + } + + return NULL; + case OMPI_OSC_RDMA_SYNC_TYPE_FENCE: + case OMPI_OSC_RDMA_SYNC_TYPE_LOCK: + OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_TRACE, "found fence/lock_all access epoch for target %d", target); + + /* fence epoch is now active */ + module->all_sync.epoch_active = true; + *peer = ompi_osc_rdma_module_peer (module, target); + + return &module->all_sync; + case OMPI_OSC_RDMA_SYNC_TYPE_PSCW: + if (ompi_osc_rdma_sync_pscw_peer (module, target, peer)) { + OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_TRACE, "found PSCW access epoch target for %d", target); + return &module->all_sync; + } + } + + OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_INFO, "no access epoch found for target %d", target); + + return NULL; +} + +/** + * @brief complete all outstanding rdma operations to all peers + * + * @param[in] module osc rdma module + */ +static inline void ompi_osc_rdma_sync_rdma_complete (ompi_osc_rdma_sync_t *sync) +{ + ompi_osc_rdma_aggregation_t *aggregation, *next; + + if (opal_list_get_size (&sync->aggregations)) { + OPAL_THREAD_SCOPED_LOCK(&sync->lock, + OPAL_LIST_FOREACH_SAFE(aggregation, next, &sync->aggregations, ompi_osc_rdma_aggregation_t) { + ompi_osc_rdma_peer_aggregate_flush (aggregation->peer); + }); + } + + do { + opal_progress (); + } while (sync->outstanding_rdma); +} + +/** + * @brief check if an access epoch is active + * + * @param[in] module osc rdma module + * + * @returns true if any type of access epoch is active + * @returns false otherwise + * + * This function is used to check for conflicting access epochs. + */ +static inline bool ompi_osc_rdma_access_epoch_active (ompi_osc_rdma_module_t *module) +{ + return (module->all_sync.epoch_active || ompi_osc_rdma_in_passive_epoch (module)); +} + +static inline void ompi_osc_rdma_aggregation_return (ompi_osc_rdma_aggregation_t *aggregation) +{ + if (aggregation->sync) { + opal_list_remove_item (&aggregation->sync->aggregations, (opal_list_item_t *) aggregation); + } + + opal_free_list_return(&mca_osc_rdma_component.aggregate, (opal_free_list_item_t *) aggregation); +} + + +__opal_attribute_always_inline__ +static inline bool ompi_osc_rdma_oor (int rc) +{ + /* check for OPAL_SUCCESS first to short-circuit the statement in the common case */ + return (OPAL_SUCCESS != rc && (OPAL_ERR_OUT_OF_RESOURCE == rc || OPAL_ERR_TEMP_OUT_OF_RESOURCE == rc)); +} + +#endif /* OMPI_OSC_RDMA_H */ diff --git a/ompi/mca/osc/rdma/osc_rdma_accumulate.c b/ompi/mca/osc/rdma/osc_rdma_accumulate.c new file mode 100644 index 00000000000..2d0cf71a9c7 --- /dev/null +++ b/ompi/mca/osc/rdma/osc_rdma_accumulate.c @@ -0,0 +1,1250 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ +/* + * Copyright (c) 2014-2016 Los Alamos National Security, LLC. All rights + * reserved. + * Copyright (c) 2016 Research Organization for Information Science + * and Technology (RIST). All rights reserved. + * Copyright (c) 2017 IBM Corporation. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include "osc_rdma_accumulate.h" +#include "osc_rdma_request.h" +#include "osc_rdma_comm.h" +#include "opal/util/show_help.h" + +#include "ompi/mca/osc/base/osc_base_obj_convert.h" + +static int ompi_osc_rdma_gacc_local (const void *source_buffer, int source_count, ompi_datatype_t *source_datatype, + void *result_buffer, int result_count, ompi_datatype_t *result_datatype, + ompi_osc_rdma_peer_t *peer, uint64_t target_address, + mca_btl_base_registration_handle_t *target_handle, int target_count, + ompi_datatype_t *target_datatype, ompi_op_t *op, ompi_osc_rdma_module_t *module, + ompi_osc_rdma_request_t *request) +{ + int ret = OMPI_SUCCESS; + + do { + OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_TRACE, "performing accumulate with local region(s)"); + + if (!ompi_osc_rdma_peer_is_exclusive (peer)) { + (void) ompi_osc_rdma_lock_acquire_exclusive (module, peer, offsetof (ompi_osc_rdma_state_t, accumulate_lock)); + } + + if (NULL != result_buffer) { + /* get accumulate */ + + ret = ompi_datatype_sndrcv ((void *) (intptr_t) target_address, target_count, target_datatype, + result_buffer, result_count, result_datatype); + + if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) { + break; + } + } + + if (&ompi_mpi_op_no_op.op != op) { + if (&ompi_mpi_op_replace.op != op) { + ret = ompi_osc_base_sndrcv_op (source_buffer, source_count, source_datatype, (void *) (intptr_t) target_address, + target_count, target_datatype, op); + } else { + ret = ompi_datatype_sndrcv (source_buffer, source_count, source_datatype, (void *) (intptr_t) target_address, + target_count, target_datatype); + } + } + + if (!ompi_osc_rdma_peer_is_exclusive (peer)) { + (void) ompi_osc_rdma_lock_release_exclusive (module, peer, offsetof (ompi_osc_rdma_state_t, accumulate_lock)); + } + } while (0); + + if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) { + OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_ERROR, "local accumulate failed with ompi error code %d", ret); + return ret; + } + + if (request) { + /* NTH: is it ok to use an ompi error code here? */ + ompi_osc_rdma_request_complete (request, ret); + } + + return ret; +} + +static inline int ompi_osc_rdma_cas_local (const void *source_addr, const void *compare_addr, void *result_addr, + ompi_datatype_t *datatype, ompi_osc_rdma_peer_t *peer, + uint64_t target_address, mca_btl_base_registration_handle_t *target_handle, + ompi_osc_rdma_module_t *module) +{ + OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_TRACE, "performing compare-and-swap with local regions"); + + ompi_osc_rdma_lock_acquire_exclusive (module, peer, offsetof (ompi_osc_rdma_state_t, accumulate_lock)); + + memcpy (result_addr, (void *) (uintptr_t) target_address, datatype->super.size); + + if (0 == memcmp (compare_addr, result_addr, datatype->super.size)) { + memcpy ((void *) (uintptr_t) target_address, source_addr, datatype->super.size); + } + + ompi_osc_rdma_lock_release_exclusive (module, peer, offsetof (ompi_osc_rdma_state_t, accumulate_lock)); + + return OMPI_SUCCESS; +} + +/* completion of an accumulate put */ +static void ompi_osc_rdma_acc_put_complete (struct mca_btl_base_module_t *btl, struct mca_btl_base_endpoint_t *endpoint, + void *local_address, mca_btl_base_registration_handle_t *local_handle, + void *context, void *data, int status) +{ + ompi_osc_rdma_request_t *request = (ompi_osc_rdma_request_t *) context; + ompi_osc_rdma_sync_t *sync = request->sync; + ompi_osc_rdma_peer_t *peer = request->peer; + + OSC_RDMA_VERBOSE(status ? MCA_BASE_VERBOSE_ERROR : MCA_BASE_VERBOSE_TRACE, "remote accumulate (put/get) complete on " + "sync %p. local address %p. opal status %d", (void *) sync, local_address, status); + + ompi_osc_rdma_frag_complete (request->frag); + ompi_osc_rdma_request_complete (request, status); + + if (!ompi_osc_rdma_peer_is_exclusive (peer)) { + (void) ompi_osc_rdma_lock_release_exclusive (sync->module, peer, offsetof (ompi_osc_rdma_state_t, accumulate_lock)); + } + + ompi_osc_rdma_sync_rdma_dec (sync); + peer->flags &= ~OMPI_OSC_RDMA_PEER_ACCUMULATING; +} + +/* completion of an accumulate get operation */ +static void ompi_osc_rdma_acc_get_complete (struct mca_btl_base_module_t *btl, struct mca_btl_base_endpoint_t *endpoint, + void *local_address, mca_btl_base_registration_handle_t *local_handle, + void *context, void *data, int status) +{ + ompi_osc_rdma_request_t *request = (ompi_osc_rdma_request_t *) context; + intptr_t source = (intptr_t) local_address + request->offset; + ompi_osc_rdma_sync_t *sync = request->sync; + ompi_osc_rdma_module_t *module = sync->module; + + assert (OMPI_SUCCESS == status); + + OSC_RDMA_VERBOSE(status ? MCA_BASE_VERBOSE_ERROR : MCA_BASE_VERBOSE_TRACE, "remote accumulate get complete on sync %p. " + "status %d. request type %d", (void *) sync, status, request->type); + + if (OMPI_SUCCESS == status && OMPI_OSC_RDMA_TYPE_GET_ACC == request->type) { + OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_TRACE, "unpacking get accumulate result into user buffer"); + if (NULL == request->result_addr) { + /* result buffer is not necessarily contiguous. use the opal datatype engine to + * copy the data over in this case */ + struct iovec iov = {.iov_base = (void *) source, request->len}; + uint32_t iov_count = 1; + size_t size = request->len; + + opal_convertor_unpack (&request->convertor, &iov, &iov_count, &size); + opal_convertor_cleanup (&request->convertor); + } else { + /* copy contiguous data to the result buffer */ + ompi_datatype_sndrcv ((void *) source, request->len, MPI_BYTE, request->result_addr, + request->result_count, request->result_dt); + } + + if (&ompi_mpi_op_no_op.op == request->op) { + /* this is a no-op. nothing more to do except release resources and the accumulate lock */ + ompi_osc_rdma_acc_put_complete (btl, endpoint, local_address, local_handle, context, data, status); + + return; + } + } + + /* accumulate the data */ + if (&ompi_mpi_op_replace.op != request->op) { + ompi_op_reduce (request->op, request->origin_addr, (void *) source, request->origin_count, request->origin_dt); + } else { + memcpy ((void *) source, request->origin_addr, request->len); + } + + OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_TRACE, "putting locally accumulated result into target window"); + + /* initiate the put of the accumulated data */ + status = module->selected_btl->btl_put (module->selected_btl, endpoint, (void *) source, + request->target_address, local_handle, + (mca_btl_base_registration_handle_t *) request->ctx, + request->len, 0, MCA_BTL_NO_ORDER, ompi_osc_rdma_acc_put_complete, + request, NULL); + /* TODO -- we can do better. probably should queue up the next step and handle it in progress */ + assert (OPAL_SUCCESS == status); +} + +static inline int ompi_osc_rdma_gacc_contig (ompi_osc_rdma_sync_t *sync, const void *source, int source_count, ompi_datatype_t *source_datatype, + void *result, int result_count, ompi_datatype_t *result_datatype, + ompi_osc_rdma_peer_t *peer, uint64_t target_address, + mca_btl_base_registration_handle_t *target_handle, int target_count, + ompi_datatype_t *target_datatype, ompi_op_t *op, ompi_osc_rdma_request_t *request) +{ + ompi_osc_rdma_module_t *module = sync->module; + const size_t btl_alignment_mask = ALIGNMENT_MASK(module->selected_btl->btl_get_alignment); + unsigned long len = target_count * target_datatype->super.size; + ompi_osc_rdma_frag_t *frag = NULL; + unsigned long aligned_len, offset; + char *ptr = NULL; + int ret; + + OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_TRACE, "initiating accumulate on contiguous region of %lu bytes to remote address %" PRIx64 + ", sync %p", len, target_address, (void *) sync); + + offset = target_address & btl_alignment_mask;; + aligned_len = (len + offset + btl_alignment_mask) & ~btl_alignment_mask; + + ret = ompi_osc_rdma_frag_alloc (module, aligned_len, &frag, &ptr); + if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) { + OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_WARN, "could not allocate a temporary buffer for accumulate"); + return OMPI_ERR_OUT_OF_RESOURCE; + } + + OPAL_THREAD_LOCK(&module->lock); + /* to ensure order wait until the previous accumulate completes */ + while (ompi_osc_rdma_peer_is_accumulating (peer)) { + OPAL_THREAD_UNLOCK(&module->lock); + ompi_osc_rdma_progress (module); + OPAL_THREAD_LOCK(&module->lock); + } + + peer->flags |= OMPI_OSC_RDMA_PEER_ACCUMULATING; + OPAL_THREAD_UNLOCK(&module->lock); + + if (!ompi_osc_rdma_peer_is_exclusive (peer)) { + (void) ompi_osc_rdma_lock_acquire_exclusive (module, peer, offsetof (ompi_osc_rdma_state_t, accumulate_lock)); + } + + /* set up the request */ + request->frag = frag; + request->origin_addr = (void *) source; + request->origin_dt = source_datatype; + request->origin_count = source_count; + request->ctx = (void *) target_handle; + request->result_addr = result; + request->result_count = result_count; + request->result_dt = result_datatype; + request->offset = (ptrdiff_t) target_address & btl_alignment_mask; + request->target_address = target_address; + request->len = len; + request->op = op; + request->sync = sync; + + ompi_osc_rdma_sync_rdma_inc (sync); + + if (&ompi_mpi_op_replace.op != op || OMPI_OSC_RDMA_TYPE_GET_ACC == request->type) { + /* align the target address */ + target_address = target_address & ~btl_alignment_mask; + + OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_TRACE, "initiating btl get. local: %p (handle %p), remote: 0x%" PRIx64 + " (handle %p)", ptr, (void *) frag->handle, target_address, (void *) target_handle); + + ret = module->selected_btl->btl_get (module->selected_btl, peer->data_endpoint, ptr, + target_address, frag->handle, target_handle, aligned_len, + 0, MCA_BTL_NO_ORDER, ompi_osc_rdma_acc_get_complete, + request, NULL); + } else { + /* copy the put accumulate data */ + memcpy (ptr, source, len); + + OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_TRACE, "initiating btl put. local: %p (handle %p), remote: 0x%" PRIx64 + " (handle %p)", ptr, (void *) frag->handle, target_address, (void *) target_handle); + + ret = module->selected_btl->btl_put (module->selected_btl, peer->data_endpoint, ptr, + target_address, frag->handle, target_handle, len, 0, + MCA_BTL_NO_ORDER, ompi_osc_rdma_acc_put_complete, + request, NULL); + } + + if (OPAL_UNLIKELY(OMPI_SUCCESS == ret)) { + return OMPI_SUCCESS; + } + + OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_INFO, "accumulate btl operation failed with opal error code %d", ret); + + if (!ompi_osc_rdma_peer_is_exclusive (peer)) { + (void) ompi_osc_rdma_lock_release_exclusive (module, peer, offsetof (ompi_osc_rdma_state_t, accumulate_lock)); + } + + ompi_osc_rdma_cleanup_rdma (sync, frag, NULL, NULL); + + return ret; +} + +static inline int ompi_osc_rdma_gacc_master (ompi_osc_rdma_sync_t *sync, const void *source_addr, int source_count, + ompi_datatype_t *source_datatype, void *result_addr, int result_count, + ompi_datatype_t *result_datatype, ompi_osc_rdma_peer_t *peer, uint64_t target_address, + mca_btl_base_registration_handle_t *target_handle, int target_count, + ompi_datatype_t *target_datatype, ompi_op_t *op, ompi_osc_rdma_request_t *request) +{ + ompi_osc_rdma_module_t *module = sync->module; + struct iovec source_iovec[OMPI_OSC_RDMA_DECODE_MAX], target_iovec[OMPI_OSC_RDMA_DECODE_MAX]; + const size_t acc_limit = (mca_osc_rdma_component.buffer_size >> 3); + uint32_t source_primitive_count, target_primitive_count; + opal_convertor_t source_convertor, target_convertor; + uint32_t source_iov_count, target_iov_count; + uint32_t source_iov_index, target_iov_index; + ompi_datatype_t *source_primitive, *target_primitive; + /* needed for opal_convertor_raw but not used */ + size_t source_size, target_size; + ompi_osc_rdma_request_t *subreq; + size_t result_position; + ptrdiff_t lb, extent; + int ret, acc_len; + bool done; + + (void) ompi_datatype_get_extent (target_datatype, &lb, &extent); + target_address += lb; + + /* fast path for accumulate on built-in types */ + if (OPAL_LIKELY((!source_count || ompi_datatype_is_predefined (source_datatype)) && + ompi_datatype_is_predefined (target_datatype) && + (!result_count || ompi_datatype_is_predefined (result_datatype)) && + (target_datatype->super.size * target_count <= acc_limit))) { + if (NULL == request) { + OMPI_OSC_RDMA_REQUEST_ALLOC(module, peer, request); + request->internal = true; + } + + request->type = result_datatype ? OMPI_OSC_RDMA_TYPE_GET_ACC : OMPI_OSC_RDMA_TYPE_ACC; + + if (source_datatype) { + (void) ompi_datatype_get_extent (source_datatype, &lb, &extent); + source_addr = (void *)((intptr_t) source_addr + lb); + } + + if (result_datatype) { + (void) ompi_datatype_get_extent (result_datatype, &lb, &extent); + result_addr = (void *)((intptr_t) result_addr + lb); + } + + ret = ompi_osc_rdma_gacc_contig (sync, source_addr, source_count, source_datatype, result_addr, + result_count, result_datatype, peer, target_address, + target_handle, target_count, target_datatype, op, + request); + if (OPAL_LIKELY(OMPI_SUCCESS == ret)) { + return OMPI_SUCCESS; + } + + if (source_datatype) { + /* the convertors will handle the lb */ + (void) ompi_datatype_get_extent (source_datatype, &lb, &extent); + source_addr = (void *)((intptr_t) source_addr - lb); + } + + if (result_datatype) { + (void) ompi_datatype_get_extent (result_datatype, &lb, &extent); + result_addr = (void *)((intptr_t) result_addr - lb); + } + } + + OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_TRACE, "scheduling accumulate on non-contiguous datatype(s)"); + + /* the convertor will handle lb from here */ + (void) ompi_datatype_get_extent (target_datatype, &lb, &extent); + target_address -= lb; + + /* get the primitive datatype info */ + ret = ompi_osc_base_get_primitive_type_info (target_datatype, &target_primitive, &target_primitive_count); + if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) { + /* target datatype is not made up of a single basic datatype */ + return ret; + } + + if (source_datatype) { + ret = ompi_osc_base_get_primitive_type_info (source_datatype, &source_primitive, &source_primitive_count); + if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) { + /* target datatype is not made up of a single basic datatype */ + return ret; + } + + if (OPAL_UNLIKELY(source_primitive != target_primitive)) { + return MPI_ERR_TYPE; + } + } + + /* prepare convertors for the source and target. these convertors will be used to determine the + * contiguous segments within the source and target. */ + /* the source may be NULL if using MPI_OP_NO_OP with MPI_Get_accumulate */ + if (source_datatype) { + OBJ_CONSTRUCT(&source_convertor, opal_convertor_t); + ret = opal_convertor_copy_and_prepare_for_send (ompi_mpi_local_convertor, &source_datatype->super, source_count, source_addr, + 0, &source_convertor); + if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) { + return ret; + } + } + + /* target_datatype can never be NULL */ + OBJ_CONSTRUCT(&target_convertor, opal_convertor_t); + ret = opal_convertor_copy_and_prepare_for_send (ompi_mpi_local_convertor, &target_datatype->super, target_count, + (void *) (intptr_t) target_address, 0, &target_convertor); + if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) { + return ret; + } + + if (request) { + /* keep the request from completing until all the transfers have started */ + request->outstanding_requests = 1; + } + + target_iov_index = 0; + target_iov_count = 0; + result_position = 0; + + do { + /* decode segments of the source data */ + source_iov_count = OMPI_OSC_RDMA_DECODE_MAX; + source_iov_index = 0; + /* opal_convertor_raw returns done when it has reached the end of the data */ + if (!source_datatype) { + done = true; + source_iovec[0].iov_len = (size_t) -1; + source_iovec[0].iov_base = NULL; + source_iov_count = 1; + } else { + done = opal_convertor_raw (&source_convertor, source_iovec, &source_iov_count, &source_size); + } + + /* loop on the target segments until we have exhaused the decoded source data */ + while (source_iov_index != source_iov_count) { + if (target_iov_index == target_iov_count) { + /* decode segments of the target buffer */ + target_iov_count = OMPI_OSC_RDMA_DECODE_MAX; + target_iov_index = 0; + (void) opal_convertor_raw (&target_convertor, target_iovec, &target_iov_count, &target_size); + } + + /* we already checked that the target was large enough. this should be impossible */ + assert (0 != target_iov_count); + + /* determine how much to put in this operation */ + acc_len = min(target_iovec[target_iov_index].iov_len, source_iovec[source_iov_index].iov_len); + acc_len = min((size_t) acc_len, acc_limit); + + /* execute the get */ + OMPI_OSC_RDMA_REQUEST_ALLOC(module, peer, subreq); + subreq->internal = true; + subreq->parent_request = request; + if (request) { + (void) OPAL_THREAD_ADD32 (&request->outstanding_requests, 1); + } + + if (result_datatype) { + /* prepare a convertor for this part of the result */ + opal_convertor_copy_and_prepare_for_recv (ompi_mpi_local_convertor, &result_datatype->super, result_count, + result_addr, 0, &subreq->convertor); + opal_convertor_set_position (&subreq->convertor, &result_position); + subreq->type = OMPI_OSC_RDMA_TYPE_GET_ACC; + } else { + subreq->type = OMPI_OSC_RDMA_TYPE_ACC; + } + + ret = ompi_osc_rdma_gacc_contig (sync, source_iovec[source_iov_index].iov_base, acc_len / target_primitive->super.size, + target_primitive, NULL, 0, NULL, peer, (uint64_t) (intptr_t) target_iovec[target_iov_index].iov_base, + target_handle, acc_len / target_primitive->super.size, target_primitive, op, subreq); + if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) { + if (OPAL_UNLIKELY(OMPI_ERR_OUT_OF_RESOURCE != ret)) { + /* something bad happened. need to figure out how to handle these errors */ + return ret; + } + + /* progress and try again */ + ompi_osc_rdma_progress (module); + continue; + } + + /* adjust io vectors */ + target_iovec[target_iov_index].iov_len -= acc_len; + source_iovec[source_iov_index].iov_len -= acc_len; + target_iovec[target_iov_index].iov_base = (void *)((intptr_t) target_iovec[target_iov_index].iov_base + acc_len); + source_iovec[source_iov_index].iov_base = (void *)((intptr_t) source_iovec[source_iov_index].iov_base + acc_len); + result_position += acc_len; + + source_iov_index += !source_datatype || (0 == source_iovec[source_iov_index].iov_len); + target_iov_index += (0 == target_iovec[target_iov_index].iov_len); + } + } while (!done); + + if (request) { + /* release our reference so the request can complete */ + (void) OPAL_THREAD_ADD32 (&request->outstanding_requests, -1); + } + + if (source_datatype) { + opal_convertor_cleanup (&source_convertor); + OBJ_DESTRUCT(&source_convertor); + } + + OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_TRACE, "finished scheduling rdma on non-contiguous datatype(s)"); + + opal_convertor_cleanup (&target_convertor); + OBJ_DESTRUCT(&target_convertor); + + return OMPI_SUCCESS; +} + +static void ompi_osc_rdma_cas_atomic_complete (struct mca_btl_base_module_t *btl, struct mca_btl_base_endpoint_t *endpoint, + void *local_address, mca_btl_base_registration_handle_t *local_handle, + void *context, void *data, int status) +{ + ompi_osc_rdma_sync_t *sync = (ompi_osc_rdma_sync_t *) context; + ompi_osc_rdma_frag_t *frag = (ompi_osc_rdma_frag_t *) data; + void *result_addr = (void *)(intptr_t) ((int64_t *) local_address)[1]; + size_t size = ((int64_t *) local_address)[2]; + + OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_TRACE, "atomic compare-and-swap complete. result: 0x%" PRIx64, + *((int64_t *) local_address)); + + /* copy the result */ + memcpy (result_addr, local_address, size); + + ompi_osc_rdma_sync_rdma_dec (sync); + ompi_osc_rdma_frag_complete (frag); +} + +static inline int ompi_osc_rdma_cas_atomic (ompi_osc_rdma_sync_t *sync, const void *source_addr, const void *compare_addr, + void *result_addr, ompi_datatype_t *datatype, ompi_osc_rdma_peer_t *peer, + uint64_t target_address, mca_btl_base_registration_handle_t *target_handle) +{ + ompi_osc_rdma_module_t *module = sync->module; + const size_t size = datatype->super.size; + ompi_osc_rdma_frag_t *frag = NULL; + int64_t compare, source; + int ret, flags; + char *ptr; + + if (8 != size && !(4 == size && (MCA_BTL_ATOMIC_SUPPORTS_32BIT & module->selected_btl->btl_flags))) { + return OMPI_ERR_NOT_SUPPORTED; + } + + compare = (8 == size) ? ((int64_t *) compare_addr)[0] : ((int32_t *) compare_addr)[0]; + source = (8 == size) ? ((int64_t *) source_addr)[0] : ((int32_t *) source_addr)[0]; + flags = (4 == size) ? MCA_BTL_ATOMIC_FLAG_32BIT : 0; + + OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_TRACE, "initiating compare-and-swap using %d-bit btl atomics. compare: 0x%" + PRIx64 ", origin: 0x%" PRIx64, (int) size * 8, *((int64_t *) compare_addr), *((int64_t *) source_addr)); + + ret = ompi_osc_rdma_frag_alloc (module, 24, &frag, &ptr); + if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) { + return ret; + } + + /* store the destination and size in the temporary buffer */ + ((int64_t *) ptr)[1] = (intptr_t) result_addr; + ((int64_t *) ptr)[2] = size; + + ompi_osc_rdma_sync_rdma_inc (sync); + + do { + ret = module->selected_btl->btl_atomic_cswap (module->selected_btl, peer->data_endpoint, ptr, target_address, + frag->handle, target_handle, compare, source, flags, MCA_BTL_NO_ORDER, + ompi_osc_rdma_cas_atomic_complete, sync, frag); + + ompi_osc_rdma_progress (module); + } while (OPAL_UNLIKELY(OMPI_ERR_OUT_OF_RESOURCE == ret || OPAL_ERR_TEMP_OUT_OF_RESOURCE == ret)); + + if (OPAL_SUCCESS != ret) { + ompi_osc_rdma_sync_rdma_dec (sync); + + if (1 == ret) { + memcpy (result_addr, ptr, size); + ret = OMPI_SUCCESS; + } + + ompi_osc_rdma_frag_complete (frag); + } + + return ret; +} + +static inline void ompi_osc_rdma_fetch_and_op_atomic_complete (struct mca_btl_base_module_t *btl, struct mca_btl_base_endpoint_t *endpoint, + void *local_address, mca_btl_base_registration_handle_t *local_handle, + void *context, void *data, int status) +{ + ompi_osc_rdma_sync_t *sync = (ompi_osc_rdma_sync_t *) context; + ompi_osc_rdma_frag_t *frag = (ompi_osc_rdma_frag_t *) data; + void *result_addr = (void *)(intptr_t) ((int64_t *) local_address)[1]; + ompi_osc_rdma_request_t *req = (ompi_osc_rdma_request_t *) (intptr_t) ((int64_t *) local_address)[2]; + size_t size = ((int64_t *) local_address)[3]; + + OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_TRACE, "atomic fetch-and-op complete. result: 0x%" PRIx64, + *((int64_t *) local_address)); + + /* copy the result */ + if (result_addr) { + memcpy (result_addr, local_address, size); + } + + ompi_osc_rdma_sync_rdma_dec (sync); + ompi_osc_rdma_frag_complete (frag); + if (req) { + ompi_osc_rdma_request_complete (req, status); + } +} + +static int ompi_osc_rdma_op_mapping[OMPI_OP_NUM_OF_TYPES] = { + [OMPI_OP_MAX] = MCA_BTL_ATOMIC_MAX, + [OMPI_OP_MIN] = MCA_BTL_ATOMIC_MIN, + [OMPI_OP_SUM] = MCA_BTL_ATOMIC_ADD, + [OMPI_OP_BAND] = MCA_BTL_ATOMIC_AND, + [OMPI_OP_BOR] = MCA_BTL_ATOMIC_OR, + [OMPI_OP_BXOR] = MCA_BTL_ATOMIC_XOR, + [OMPI_OP_LAND] = MCA_BTL_ATOMIC_LAND, + [OMPI_OP_LOR] = MCA_BTL_ATOMIC_LOR, + [OMPI_OP_LXOR] = MCA_BTL_ATOMIC_LXOR, + [OMPI_OP_REPLACE] = MCA_BTL_ATOMIC_SWAP, +}; + +static int ompi_osc_rdma_fetch_and_op_atomic (ompi_osc_rdma_sync_t *sync, const void *origin_addr, void *result_addr, ompi_datatype_t *dt, + ptrdiff_t extent, ompi_osc_rdma_peer_t *peer, uint64_t target_address, + mca_btl_base_registration_handle_t *target_handle, ompi_op_t *op, ompi_osc_rdma_request_t *req) +{ + ompi_osc_rdma_module_t *module = sync->module; + int32_t atomic_flags = module->selected_btl->btl_atomic_flags; + ompi_osc_rdma_frag_t *frag = NULL; + int ret, btl_op, flags; + char *ptr = NULL; + int64_t origin; + + if ((8 != extent && !((MCA_BTL_ATOMIC_SUPPORTS_32BIT & atomic_flags) && 4 == extent)) || + (!(OMPI_DATATYPE_FLAG_DATA_INT & dt->super.flags) && !(MCA_BTL_ATOMIC_SUPPORTS_FLOAT & atomic_flags)) || + !ompi_op_is_intrinsic (op) || (0 == ompi_osc_rdma_op_mapping[op->op_type])) { + return OMPI_ERR_NOT_SUPPORTED; + } + + flags = (4 == extent) ? MCA_BTL_ATOMIC_FLAG_32BIT : 0; + if (OMPI_DATATYPE_FLAG_DATA_FLOAT & dt->super.flags) { + flags |= MCA_BTL_ATOMIC_FLAG_FLOAT; + } + + btl_op = ompi_osc_rdma_op_mapping[op->op_type]; + + OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_TRACE, "initiating fetch-and-op using %d-bit btl atomics. origin: 0x%" PRIx64, + (4 == extent) ? 32 : 64, *((int64_t *) origin_addr)); + + ret = ompi_osc_rdma_frag_alloc (module, 32, &frag, &ptr); + if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) { + return ret; + } + + origin = (8 == extent) ? ((int64_t *) origin_addr)[0] : ((int32_t *) origin_addr)[0]; + + /* store the destination, request, and extent in the temporary buffer for the callback */ + ((int64_t *) ptr)[1] = (intptr_t) result_addr; + ((int64_t *) ptr)[2] = (intptr_t) req; + ((int64_t *) ptr)[3] = extent; + + ompi_osc_rdma_sync_rdma_inc (sync); + + do { + ret = module->selected_btl->btl_atomic_fop (module->selected_btl, peer->data_endpoint, ptr, target_address, + frag->handle, target_handle, btl_op, origin, flags, + MCA_BTL_NO_ORDER, ompi_osc_rdma_fetch_and_op_atomic_complete, + sync, frag); + + ompi_osc_rdma_progress (module); + } while (OPAL_UNLIKELY(OMPI_ERR_OUT_OF_RESOURCE == ret || OPAL_ERR_TEMP_OUT_OF_RESOURCE == ret)); + + if (OPAL_SUCCESS != ret) { + ompi_osc_rdma_sync_rdma_dec (sync); + + if (OPAL_LIKELY(1 == ret)) { + memcpy (result_addr, ptr, extent); + if (req) { + ompi_osc_rdma_request_complete (req, OMPI_SUCCESS); + } + ret = OPAL_SUCCESS; + } + + ompi_osc_rdma_frag_complete (frag); + } + + return ret; +} + +static int ompi_osc_rdma_fetch_and_op_cas (ompi_osc_rdma_sync_t *sync, const void *origin_addr, void *result_addr, ompi_datatype_t *dt, + ptrdiff_t extent, ompi_osc_rdma_peer_t *peer, uint64_t target_address, + mca_btl_base_registration_handle_t *target_handle, ompi_op_t *op, ompi_osc_rdma_request_t *req) +{ + ompi_osc_rdma_module_t *module = sync->module; + ompi_osc_rdma_frag_t *frag = NULL; + uint64_t address, offset; + char *ptr = NULL; + int ret; + + if (extent > 8) { + return OMPI_ERR_NOT_SUPPORTED; + } + + /* align the address. the user should not call with an unaligned address so don't need to range check here */ + address = target_address & ~7; + offset = target_address & ~address; + + OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_TRACE, "initiating fetch-and-op using compare-and-swap. origin: 0x%" PRIx64, + *((int64_t *) origin_addr)); + + ret = ompi_osc_rdma_frag_alloc (module, 16, &frag, &ptr); + if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) { + return ret; + } + + /* store the destination in the temporary buffer */ + do { + volatile bool complete = false; + + ret = ompi_osc_get_data_blocking (module, peer->data_endpoint, address, target_handle, ptr, 8); + if (OMPI_SUCCESS != ret) { + ompi_osc_rdma_frag_complete (frag); + return ret; + } + + ((int64_t *) ptr)[1] = ((int64_t *) ptr)[0]; + + if (&ompi_mpi_op_no_op.op == op) { + memcpy (ptr + offset, origin_addr, extent); + } else { + ompi_op_reduce (op, (void *) origin_addr, ptr + offset, 1, dt); + } + + do { + ret = module->selected_btl->btl_atomic_cswap (module->selected_btl, peer->data_endpoint, ptr, address, + frag->handle, target_handle, ((int64_t *) ptr)[1], + ((int64_t *) ptr)[0], 0, MCA_BTL_NO_ORDER, + ompi_osc_rdma_atomic_complete, (void *) &complete, NULL); + + ompi_osc_rdma_progress (module); + } while (OPAL_UNLIKELY(OPAL_ERR_OUT_OF_RESOURCE == ret || OPAL_ERR_TEMP_OUT_OF_RESOURCE == ret)); + + if (OPAL_UNLIKELY(OPAL_SUCCESS != ret)) { + break; + } + + while (!complete) { + ompi_osc_rdma_progress (module); + } + + if (((int64_t *) ptr)[1] == ((int64_t *) ptr)[0]) { + break; + } + } while (1); + + if (result_addr) { + memcpy (result_addr, ptr + 8 + offset, extent); + } + + ompi_osc_rdma_frag_complete (frag); + + return ret; +} + +static void ompi_osc_rdma_acc_single_atomic_complete (struct mca_btl_base_module_t *btl, struct mca_btl_base_endpoint_t *endpoint, + void *local_address, mca_btl_base_registration_handle_t *local_handle, + void *context, void *data, int status) +{ + ompi_osc_rdma_sync_t *sync = (ompi_osc_rdma_sync_t *) context; + ompi_osc_rdma_request_t *req = (ompi_osc_rdma_request_t *) data; + + OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_TRACE, "atomic accumulate complete"); + + ompi_osc_rdma_sync_rdma_dec (sync); + if (req) { + ompi_osc_rdma_request_complete (req, status); + } +} + +static int ompi_osc_rdma_acc_single_atomic (ompi_osc_rdma_sync_t *sync, const void *origin_addr, ompi_datatype_t *dt, ptrdiff_t extent, + ompi_osc_rdma_peer_t *peer, uint64_t target_address, mca_btl_base_registration_handle_t *target_handle, + ompi_op_t *op, ompi_osc_rdma_request_t *req) +{ + ompi_osc_rdma_module_t *module = sync->module; + int32_t atomic_flags = module->selected_btl->btl_atomic_flags; + int ret, btl_op, flags; + int64_t origin; + + if (!(module->selected_btl->btl_flags & MCA_BTL_FLAGS_ATOMIC_OPS)) { + /* btl put atomics not supported or disabled. fall back on fetch-and-op */ + return ompi_osc_rdma_fetch_and_op_atomic (sync, origin_addr, NULL, dt, extent, peer, target_address, target_handle, op, req); + } + + if ((8 != extent && !((MCA_BTL_ATOMIC_SUPPORTS_32BIT & atomic_flags) && 4 == extent)) || + (!(OMPI_DATATYPE_FLAG_DATA_INT & dt->super.flags) && !(MCA_BTL_ATOMIC_SUPPORTS_FLOAT & atomic_flags)) || + !ompi_op_is_intrinsic (op) || (0 == ompi_osc_rdma_op_mapping[op->op_type])) { + return OMPI_ERR_NOT_SUPPORTED; + } + + origin = (8 == extent) ? ((uint64_t *) origin_addr)[0] : ((uint32_t *) origin_addr)[0]; + + /* set the appropriate flags for this atomic */ + flags = (4 == extent) ? MCA_BTL_ATOMIC_FLAG_32BIT : 0; + if (OMPI_DATATYPE_FLAG_DATA_FLOAT & dt->super.flags) { + flags |= MCA_BTL_ATOMIC_FLAG_FLOAT; + } + + btl_op = ompi_osc_rdma_op_mapping[op->op_type]; + + OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_TRACE, "initiating accumulate using 64-bit btl atomics. origin: 0x%" PRIx64, + *((int64_t *) origin_addr)); + + ompi_osc_rdma_sync_rdma_inc (sync); + + do { + ret = module->selected_btl->btl_atomic_op (module->selected_btl, peer->data_endpoint, target_address, + target_handle, btl_op, origin, flags, MCA_BTL_NO_ORDER, + ompi_osc_rdma_acc_single_atomic_complete, sync, req); + + ompi_osc_rdma_progress (module); + } while (OPAL_UNLIKELY(OMPI_ERR_OUT_OF_RESOURCE == ret || OPAL_ERR_TEMP_OUT_OF_RESOURCE == ret)); + + if (OPAL_SUCCESS != ret) { + ompi_osc_rdma_sync_rdma_dec (sync); + if (1 == ret) { + if (req) { + ompi_osc_rdma_request_complete (req, OMPI_SUCCESS); + } + ret = OMPI_SUCCESS; + } + } + + return ret; +} + +/** + * ompi_osc_rdma_cas_get_complete: + * Note: This function will not work as is in a heterogeneous environment. + */ +static void ompi_osc_rdma_cas_get_complete (struct mca_btl_base_module_t *btl, struct mca_btl_base_endpoint_t *endpoint, + void *local_address, mca_btl_base_registration_handle_t *local_handle, + void *context, void *data, int status) +{ + ompi_osc_rdma_request_t *request = (ompi_osc_rdma_request_t *) context; + ompi_osc_rdma_sync_t *sync = request->sync; + ompi_osc_rdma_module_t *module = sync->module; + intptr_t source = (intptr_t) local_address + request->offset; + ompi_osc_rdma_frag_t *frag = request->frag; + ompi_osc_rdma_peer_t *peer = request->peer; + int ret; + + OSC_RDMA_VERBOSE(status ? MCA_BASE_VERBOSE_ERROR : MCA_BASE_VERBOSE_TRACE, "remote compare-and-swap get complete on sync %p. " + "status %d", (void *) sync, status); + + if (OPAL_UNLIKELY(OMPI_SUCCESS != status)) { + return; + } + + /* copy data to the user buffer (for gacc) */ + memcpy (request->result_addr, (void *) source, request->len); + + if (0 == memcmp ((void *) source, request->compare_addr, request->len)) { + /* the target and compare buffers match. write the source to the target */ + memcpy ((void *) source, request->origin_addr, request->len); + + ret = module->selected_btl->btl_put (module->selected_btl, peer->data_endpoint, local_address, + request->target_address, local_handle, + (mca_btl_base_registration_handle_t *) request->ctx, + request->len, 0, MCA_BTL_NO_ORDER, + ompi_osc_rdma_acc_put_complete, request, NULL); + if (OPAL_UNLIKELY(OPAL_SUCCESS != ret)) { + OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_ERROR, "could not start put to complete accumulate operation. opal return code " + "%d", ret); + } + + /* TODO -- we can do better. probably should queue up the next step and handle it in progress */ + assert (OPAL_SUCCESS == ret); + + return; + } + + /* this is a no-op. nothing more to do except release the accumulate lock */ + ompi_osc_rdma_frag_complete (frag); + + if (!ompi_osc_rdma_peer_is_exclusive (peer)) { + (void) ompi_osc_rdma_lock_release_exclusive (module, request->peer, + offsetof (ompi_osc_rdma_state_t, accumulate_lock)); + } + + /* the request is now complete and the outstanding rdma operation is complete */ + ompi_osc_rdma_request_complete (request, status); + + ompi_osc_rdma_sync_rdma_dec (sync); + peer->flags &= ~OMPI_OSC_RDMA_PEER_ACCUMULATING; +} + +static inline int cas_rdma (ompi_osc_rdma_sync_t *sync, const void *source_addr, const void *compare_addr, void *result_addr, + ompi_datatype_t *datatype, ompi_osc_rdma_peer_t *peer, uint64_t target_address, + mca_btl_base_registration_handle_t *target_handle) +{ + ompi_osc_rdma_module_t *module = sync->module; + const size_t btl_alignment_mask = ALIGNMENT_MASK(module->selected_btl->btl_get_alignment); + unsigned long offset, aligned_len, len = datatype->super.size; + ompi_osc_rdma_frag_t *frag = NULL; + ompi_osc_rdma_request_t *request; + char *ptr = NULL; + int ret; + + OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_TRACE, "initiating compare-and-swap using RMDA on %lu bytes to remote address %" PRIx64 + ", sync %p", len, target_address, (void *) sync); + + OMPI_OSC_RDMA_REQUEST_ALLOC(module, peer, request); + + request->internal = true; + request->type = OMPI_OSC_RDMA_TYPE_CSWAP; + request->sync = sync; + + OPAL_THREAD_LOCK(&module->lock); + /* to ensure order wait until the previous accumulate completes */ + while (ompi_osc_rdma_peer_is_accumulating (peer)) { + OPAL_THREAD_UNLOCK(&module->lock); + ompi_osc_rdma_progress (module); + OPAL_THREAD_LOCK(&module->lock); + } + peer->flags |= OMPI_OSC_RDMA_PEER_ACCUMULATING; + OPAL_THREAD_UNLOCK(&module->lock); + + offset = target_address & btl_alignment_mask;; + aligned_len = (len + offset + btl_alignment_mask) & ~btl_alignment_mask; + + do { + ret = ompi_osc_rdma_frag_alloc (module, aligned_len, &frag, &ptr); + if (OPAL_UNLIKELY(OMPI_SUCCESS == ret)) { + break; + } + + OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_WARN, "could not allocate an rdma fragment for compare-and-swap"); + ompi_osc_rdma_progress (module); + } while (1); + + if (!ompi_osc_rdma_peer_is_exclusive (peer)) { + (void) ompi_osc_rdma_lock_acquire_exclusive (module, peer, offsetof (ompi_osc_rdma_state_t, accumulate_lock)); + } + + /* set up the request */ + request->frag = frag; + request->origin_addr = (void *) source_addr; + request->ctx = (void *) target_handle; + request->result_addr = result_addr; + request->compare_addr = compare_addr; + request->result_dt = datatype; + request->offset = (ptrdiff_t) offset; + request->target_address = target_address; + request->len = len; + + OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_TRACE, "RDMA compare-and-swap initiating btl get"); + + do { + ret = module->selected_btl->btl_get (module->selected_btl, peer->data_endpoint, ptr, + target_address, frag->handle, target_handle, + aligned_len, 0, MCA_BTL_NO_ORDER, + ompi_osc_rdma_cas_get_complete, request, NULL); + if (OPAL_LIKELY(OPAL_SUCCESS == ret)) { + break; + } + + if (OPAL_UNLIKELY(OPAL_ERR_OUT_OF_RESOURCE != ret && OPAL_ERR_TEMP_OUT_OF_RESOURCE != ret)) { + if (!ompi_osc_rdma_peer_is_exclusive (peer)) { + (void) ompi_osc_rdma_lock_release_exclusive (module, peer, offsetof (ompi_osc_rdma_state_t, accumulate_lock)); + } + ompi_osc_rdma_frag_complete (frag); + return ret; + } + + ompi_osc_rdma_progress (module); + } while (1); + + ompi_osc_rdma_sync_rdma_inc (sync); + + return OMPI_SUCCESS; +} + + +int ompi_osc_rdma_compare_and_swap (const void *origin_addr, const void *compare_addr, void *result_addr, + ompi_datatype_t *dt, int target_rank, OPAL_PTRDIFF_TYPE target_disp, + ompi_win_t *win) +{ + ompi_osc_rdma_module_t *module = GET_MODULE(win); + ompi_osc_rdma_peer_t *peer; + mca_btl_base_registration_handle_t *target_handle; + ompi_osc_rdma_sync_t *sync; + uint64_t target_address; + int ret; + + OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_TRACE, "cswap: 0x%lx, 0x%lx, 0x%lx, %s, %d, %d, %s", + (unsigned long) origin_addr, (unsigned long) compare_addr, (unsigned long) result_addr, + dt->name, target_rank, (int) target_disp, win->w_name); + + sync = ompi_osc_rdma_module_sync_lookup (module, target_rank, &peer); + if (OPAL_UNLIKELY(NULL == sync)) { + return OMPI_ERR_RMA_SYNC; + } + + ret = osc_rdma_get_remote_segment (module, peer, target_disp, dt->super.size, &target_address, &target_handle); + if (OPAL_UNLIKELY(OPAL_SUCCESS != ret)) { + return ret; + } + + if (win->w_acc_ops <= OMPI_WIN_ACCUMULATE_OPS_SAME_OP) { + /* the user has indicated that they will only use the same op (or same op and no op) + * for operations on overlapping memory ranges. that indicates it is safe to go ahead + * and use network atomic operations. */ + ret = ompi_osc_rdma_cas_atomic (sync, origin_addr, compare_addr, result_addr, dt, + peer, target_address, target_handle); + if (OMPI_SUCCESS == ret) { + return OMPI_SUCCESS; + } + } + + if (ompi_osc_rdma_peer_local_base (peer)) { + return ompi_osc_rdma_cas_local (origin_addr, compare_addr, result_addr, dt, + peer, target_address, target_handle, module); + } + + return cas_rdma (sync, origin_addr, compare_addr, result_addr, dt, peer, target_address, + target_handle); +} + + +static inline +int ompi_osc_rdma_rget_accumulate_internal (ompi_osc_rdma_sync_t *sync, const void *origin_addr, int origin_count, + ompi_datatype_t *origin_datatype, void *result_addr, int result_count, + ompi_datatype_t *result_datatype, ompi_osc_rdma_peer_t *peer, + int target_rank, MPI_Aint target_disp, int target_count, + ompi_datatype_t *target_datatype, ompi_op_t *op, + ompi_osc_rdma_request_t *request) +{ + ompi_osc_rdma_module_t *module = sync->module; + mca_btl_base_registration_handle_t *target_handle; + uint64_t target_address; + ptrdiff_t lb, extent; + int ret; + + /* short-circuit case. note that origin_count may be 0 if op is MPI_NO_OP */ + if ((result_addr && 0 == result_count) || 0 == target_count) { + if (request) { + ompi_osc_rdma_request_complete (request, MPI_SUCCESS); + } + + return OMPI_SUCCESS; + } + + /* TODO: Remove the following check when support is added. + * See the following issue for the current state: + * https://github.com/open-mpi/ompi/issues/1666 + */ + if(MPI_MINLOC == op || MPI_MAXLOC == op) { + if(MPI_SHORT_INT == origin_datatype || + MPI_DOUBLE_INT == origin_datatype || + MPI_LONG_INT == origin_datatype || + MPI_LONG_DOUBLE_INT == origin_datatype) { + ompi_communicator_t *comm = &ompi_mpi_comm_world.comm; + opal_show_help("help-mca-osc-base.txt", "unsupported-dt", true, + origin_datatype->name, + op->o_name, + comm->c_my_rank); + ompi_mpi_abort(comm, -1); + } + if(MPI_SHORT_INT == result_datatype || + MPI_DOUBLE_INT == result_datatype || + MPI_LONG_INT == result_datatype || + MPI_LONG_DOUBLE_INT == result_datatype) { + ompi_communicator_t *comm = &ompi_mpi_comm_world.comm; + opal_show_help("help-mca-osc-base.txt", "unsupported-dt", true, + result_datatype->name, + op->o_name, + comm->c_my_rank); + ompi_mpi_abort(comm, -1); + } + if(MPI_SHORT_INT == target_datatype || + MPI_DOUBLE_INT == target_datatype || + MPI_LONG_INT == target_datatype || + MPI_LONG_DOUBLE_INT == target_datatype) { + ompi_communicator_t *comm = &ompi_mpi_comm_world.comm; + opal_show_help("help-mca-osc-base.txt", "unsupported-dt", true, + target_datatype->name, + op->o_name, + comm->c_my_rank); + ompi_mpi_abort(comm, -1); + } + } + + (void) ompi_datatype_get_extent (origin_datatype, &lb, &extent); + + ret = osc_rdma_get_remote_segment (module, peer, target_disp, extent * target_count, &target_address, &target_handle); + if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) { + return ret; + } + + if (module->acc_single_intrinsic && extent <= 8) { + if (module->acc_use_amo && ompi_datatype_is_predefined (origin_datatype)) { + if (NULL == result_addr) { + ret = ompi_osc_rdma_acc_single_atomic (sync, origin_addr, origin_datatype, extent, peer, target_address, + target_handle, op, request); + } else { + ret = ompi_osc_rdma_fetch_and_op_atomic (sync, origin_addr, result_addr, origin_datatype, extent, peer, target_address, + target_handle, op, request); + } + + if (OMPI_SUCCESS == ret) { + return OMPI_SUCCESS; + } + } + + ret = ompi_osc_rdma_fetch_and_op_cas (sync, origin_addr, result_addr, origin_datatype, extent, peer, target_address, + target_handle, op, request); + if (OMPI_SUCCESS == ret) { + return OMPI_SUCCESS; + } + } + + if (ompi_osc_rdma_peer_local_base (peer)) { + /* local/self optimization */ + return ompi_osc_rdma_gacc_local (origin_addr, origin_count, origin_datatype, result_addr, result_count, + result_datatype, peer, target_address, target_handle, target_count, + target_datatype, op, module, request); + } + + return ompi_osc_rdma_gacc_master (sync, origin_addr, origin_count, origin_datatype, result_addr, result_count, + result_datatype, peer, target_address, target_handle, target_count, + target_datatype, op, request); +} + +int ompi_osc_rdma_get_accumulate (const void *origin_addr, int origin_count, ompi_datatype_t *origin_datatype, + void *result_addr, int result_count, ompi_datatype_t *result_datatype, + int target_rank, MPI_Aint target_disp, int target_count, ompi_datatype_t *target_datatype, + ompi_op_t *op, ompi_win_t *win) +{ + ompi_osc_rdma_module_t *module = GET_MODULE(win); + ompi_osc_rdma_peer_t *peer; + ompi_osc_rdma_sync_t *sync; + + OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_TRACE, "get_acc: 0x%lx, %d, %s, 0x%lx, %d, %s, %d, 0x%lx, %d, %s, %s, %s", + (unsigned long) origin_addr, origin_count, origin_datatype->name, + (unsigned long) result_addr, result_count, result_datatype->name, target_rank, + (unsigned long) target_disp, target_count, target_datatype->name, op->o_name, + win->w_name); + + sync = ompi_osc_rdma_module_sync_lookup (module, target_rank, &peer); + if (OPAL_UNLIKELY(NULL == sync)) { + return OMPI_ERR_RMA_SYNC; + } + + return ompi_osc_rdma_rget_accumulate_internal (sync, origin_addr, origin_count, origin_datatype, + result_addr, result_count, result_datatype, + peer, target_rank, target_disp, target_count, + target_datatype, op, NULL); +} + + +int ompi_osc_rdma_rget_accumulate (const void *origin_addr, int origin_count, ompi_datatype_t *origin_datatype, + void *result_addr, int result_count, ompi_datatype_t *result_datatype, + int target_rank, MPI_Aint target_disp, int target_count, ompi_datatype_t *target_datatype, + ompi_op_t *op, ompi_win_t *win, ompi_request_t **request) +{ + ompi_osc_rdma_module_t *module = GET_MODULE(win); + ompi_osc_rdma_peer_t *peer; + ompi_osc_rdma_request_t *rdma_request; + ompi_osc_rdma_sync_t *sync; + int ret; + + OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_TRACE, "rget_acc: 0x%lx, %d, %s, 0x%lx, %d, %s, %d, 0x%lx, %d, %s, %s, %s", + (unsigned long) origin_addr, origin_count, origin_datatype->name, + (unsigned long) result_addr, result_count, result_datatype->name, target_rank, + (unsigned long) target_disp, target_count, target_datatype->name, op->o_name, + win->w_name); + + sync = ompi_osc_rdma_module_sync_lookup (module, target_rank, &peer); + if (OPAL_UNLIKELY(NULL == sync)) { + return OMPI_ERR_RMA_SYNC; + } + + OMPI_OSC_RDMA_REQUEST_ALLOC(module, peer, rdma_request); + + ret = ompi_osc_rdma_rget_accumulate_internal (sync, origin_addr, origin_count, origin_datatype, result_addr, + result_count, result_datatype, peer, target_rank, target_disp, + target_count, target_datatype, op, rdma_request); + if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) { + OMPI_OSC_RDMA_REQUEST_RETURN(rdma_request); + return ret; + } + + *request = &rdma_request->super; + + return OMPI_SUCCESS; +} + +int ompi_osc_rdma_raccumulate (const void *origin_addr, int origin_count, ompi_datatype_t *origin_datatype, int target_rank, + OPAL_PTRDIFF_TYPE target_disp, int target_count, ompi_datatype_t *target_datatype, ompi_op_t *op, + ompi_win_t *win, ompi_request_t **request) +{ + ompi_osc_rdma_module_t *module = GET_MODULE(win); + ompi_osc_rdma_peer_t *peer; + ompi_osc_rdma_request_t *rdma_request; + ompi_osc_rdma_sync_t *sync; + int ret; + + OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_TRACE, "racc: 0x%lx, %d, %s, %d, 0x%lx, %d, %s, %s, %s", + (unsigned long) origin_addr, origin_count, origin_datatype->name, target_rank, + (unsigned long) target_disp, target_count, target_datatype->name, op->o_name, win->w_name); + + sync = ompi_osc_rdma_module_sync_lookup (module, target_rank, &peer); + if (OPAL_UNLIKELY(NULL == sync)) { + return OMPI_ERR_RMA_SYNC; + } + + OMPI_OSC_RDMA_REQUEST_ALLOC(module, peer, rdma_request); + + ret = ompi_osc_rdma_rget_accumulate_internal (sync, origin_addr, origin_count, origin_datatype, NULL, 0, + NULL, peer, target_rank, target_disp, target_count, target_datatype, + op, rdma_request); + if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) { + OMPI_OSC_RDMA_REQUEST_RETURN(rdma_request); + return ret; + } + + *request = &rdma_request->super; + + return OMPI_SUCCESS; +} + +int ompi_osc_rdma_accumulate (const void *origin_addr, int origin_count, ompi_datatype_t *origin_datatype, int target_rank, + OPAL_PTRDIFF_TYPE target_disp, int target_count, ompi_datatype_t *target_datatype, ompi_op_t *op, + ompi_win_t *win) +{ + ompi_osc_rdma_module_t *module = GET_MODULE(win); + ompi_osc_rdma_peer_t *peer; + ompi_osc_rdma_sync_t *sync; + + OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_TRACE, "acc: 0x%lx, %d, %s, %d, 0x%lx, %d, %s, %s, %s", + (unsigned long) origin_addr, origin_count, origin_datatype->name, target_rank, + (unsigned long) target_disp, target_count, target_datatype->name, op->o_name, win->w_name); + + sync = ompi_osc_rdma_module_sync_lookup (module, target_rank, &peer); + if (OPAL_UNLIKELY(NULL == sync)) { + return OMPI_ERR_RMA_SYNC; + } + + return ompi_osc_rdma_rget_accumulate_internal (sync, origin_addr, origin_count, origin_datatype, NULL, 0, + NULL, peer, target_rank, target_disp, target_count, target_datatype, + op, NULL); +} + + +int ompi_osc_rdma_fetch_and_op (const void *origin_addr, void *result_addr, ompi_datatype_t *dt, int target_rank, + OPAL_PTRDIFF_TYPE target_disp, ompi_op_t *op, ompi_win_t *win) +{ + ompi_osc_rdma_module_t *module = GET_MODULE(win); + ompi_osc_rdma_peer_t *peer; + ompi_osc_rdma_sync_t *sync; + + OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_TRACE, "fop: %p, %s, %d, %lu, %s, %s", result_addr, dt->name, + target_rank, (unsigned long) target_disp, op->o_name, win->w_name); + + sync = ompi_osc_rdma_module_sync_lookup (module, target_rank, &peer); + if (OPAL_UNLIKELY(NULL == sync)) { + return OMPI_ERR_RMA_SYNC; + } + + return ompi_osc_rdma_rget_accumulate_internal (sync, origin_addr, 1, dt, result_addr, 1, dt, peer, + target_rank, target_disp, 1, dt, op, NULL); +} diff --git a/ompi/mca/osc/rdma/osc_rdma_accumulate.h b/ompi/mca/osc/rdma/osc_rdma_accumulate.h new file mode 100644 index 00000000000..7ab370ab2b8 --- /dev/null +++ b/ompi/mca/osc/rdma/osc_rdma_accumulate.h @@ -0,0 +1,43 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ +/* + * Copyright (c) 2014-2016 Los Alamos National Security, LLC. All rights + * reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#if !defined(OSC_RDMA_ACCUMULATE_H) +#define OSC_RDMA_ACCUMULATE_H + +#include "osc_rdma.h" + +int ompi_osc_rdma_compare_and_swap (const void *origin_addr, const void *compare_addr, void *result_addr, + ompi_datatype_t *dt, int target_rank, OPAL_PTRDIFF_TYPE target_disp, + ompi_win_t *win); + +int ompi_osc_rdma_accumulate (const void *origin_addr, int origin_count, ompi_datatype_t *origin_datatype, int target_rank, + OPAL_PTRDIFF_TYPE target_disp, int target_count, ompi_datatype_t *target_datatype, ompi_op_t *op, + ompi_win_t *win); + +int ompi_osc_rdma_fetch_and_op (const void *origin_addr, void *result_addr, ompi_datatype_t *dt, int target_rank, + OPAL_PTRDIFF_TYPE target_disp, ompi_op_t *op, ompi_win_t *win); + +int ompi_osc_rdma_get_accumulate (const void *origin_addr, int origin_count, ompi_datatype_t *origin_datatype, + void *result_addr, int result_count, ompi_datatype_t *result_datatype, + int target_rank, MPI_Aint target_disp, int target_count, ompi_datatype_t *target_datatype, + ompi_op_t *op, ompi_win_t *win); + +int ompi_osc_rdma_raccumulate (const void *origin_addr, int origin_count, ompi_datatype_t *origin_datatype, int target_rank, + OPAL_PTRDIFF_TYPE target_disp, int target_count, ompi_datatype_t *target_datatype, ompi_op_t *op, + ompi_win_t *win, ompi_request_t **request); + +int ompi_osc_rdma_rget_accumulate (const void *origin_addr, int origin_count, ompi_datatype_t *origin_datatype, + void *result_addr, int result_count, ompi_datatype_t *result_datatype, + int target_rank, MPI_Aint target_disp, int target_count, ompi_datatype_t *target_datatype, + ompi_op_t *op, ompi_win_t *win, ompi_request_t **request); + + +#endif /* OSC_RDMA_ACCUMULATE_H */ diff --git a/ompi/mca/osc/rdma/osc_rdma_active_target.c b/ompi/mca/osc/rdma/osc_rdma_active_target.c new file mode 100644 index 00000000000..11338489cb8 --- /dev/null +++ b/ompi/mca/osc/rdma/osc_rdma_active_target.c @@ -0,0 +1,641 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ +/* + * Copyright (c) 2004-2005 The Trustees of Indiana University. + * All rights reserved. + * Copyright (c) 2004-2005 The Trustees of the University of Tennessee. + * All rights reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2007-2015 Los Alamos National Security, LLC. All rights + * reserved. + * Copyright (c) 2010 IBM Corporation. All rights reserved. + * Copyright (c) 2012-2013 Sandia National Laboratories. All rights reserved. + * Copyright (c) 2015 Cisco Systems, Inc. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include "ompi_config.h" + +#include "osc_rdma.h" +#include "osc_rdma_frag.h" +#include "osc_rdma_active_target.h" + +#include "mpi.h" +#include "opal/threads/mutex.h" +#include "ompi/communicator/communicator.h" +#include "ompi/mca/osc/base/base.h" + +/** + * ompi_osc_rdma_pending_post_t: + * + * Describes a post operation that was encountered outside it's + * matching start operation. + */ +struct ompi_osc_rdma_pending_post_t { + opal_list_item_t super; + int rank; +}; +typedef struct ompi_osc_rdma_pending_post_t ompi_osc_rdma_pending_post_t; + +static OBJ_CLASS_INSTANCE(ompi_osc_rdma_pending_post_t, opal_list_item_t, NULL, NULL); + +/** + * Dummy completion function for atomic operations + */ +void ompi_osc_rdma_atomic_complete (mca_btl_base_module_t *btl, struct mca_btl_base_endpoint_t *endpoint, + void *local_address, mca_btl_base_registration_handle_t *local_handle, + void *context, void *data, int status) +{ + volatile bool *atomic_complete = (volatile bool *) context; + + if (atomic_complete) { + *atomic_complete = true; + } +} + +/** + * compare_ranks: + * + * @param[in] ptra Pointer to integer item + * @param[in] ptrb Pointer to integer item + * + * @returns 0 if *ptra == *ptrb + * @returns -1 if *ptra < *ptrb + * @returns 1 otherwise + * + * This function is used to sort the rank list. It can be removed if + * groups are always in order. + */ +static int compare_ranks (const void *ptra, const void *ptrb) +{ + int a = *((int *) ptra); + int b = *((int *) ptrb); + + if (a < b) { + return -1; + } else if (a > b) { + return 1; + } + + return 0; +} + +/** + * ompi_osc_rdma_get_comm_ranks: + * + * @param[in] module - OSC RDMA module + * @param[in] sub_group - Group with ranks to translate + * + * @returns an array of translated ranks on success or NULL on failure + * + * Translate the ranks given in {sub_group} into ranks in the + * communicator used to create {module}. + */ +static ompi_osc_rdma_peer_t **ompi_osc_rdma_get_peers (ompi_osc_rdma_module_t *module, ompi_group_t *sub_group) +{ + int size = ompi_group_size(sub_group); + ompi_osc_rdma_peer_t **peers; + int *ranks1, *ranks2; + int ret; + + ranks1 = calloc (size, sizeof(int)); + ranks2 = calloc (size, sizeof(int)); + peers = calloc (size, sizeof (ompi_osc_rdma_peer_t *)); + if (NULL == ranks1 || NULL == ranks2 || NULL == peers) { + free (ranks1); + free (ranks2); + free (peers); + return NULL; + } + + for (int i = 0 ; i < size ; ++i) { + ranks1[i] = i; + } + + ret = ompi_group_translate_ranks (sub_group, size, ranks1, module->comm->c_local_group, + ranks2); + free (ranks1); + if (OMPI_SUCCESS != ret) { + free (ranks2); + free (peers); + return NULL; + } + + qsort (ranks2, size, sizeof (int), compare_ranks); + for (int i = 0 ; i < size ; ++i) { + peers[i] = ompi_osc_rdma_module_peer (module, ranks2[i]); + if (NULL == peers[i]) { + free (peers); + peers = NULL; + break; + } + + OBJ_RETAIN(peers[i]); + } + free (ranks2); + + return peers; +} + +static void ompi_osc_rdma_release_peers (ompi_osc_rdma_peer_t **peers, int npeers) +{ + for (int i = 0 ; i < npeers ; ++i) { + OBJ_RELEASE(peers[i]); + } + + free (peers); +} + +static void ompi_osc_rdma_handle_post (ompi_osc_rdma_module_t *module, int rank, ompi_osc_rdma_peer_t **peers, int npeers) { + ompi_osc_rdma_state_t *state = module->state; + ompi_osc_rdma_pending_post_t *pending_post; + + /* look for the posting peer in the group */ + for (int j = 0 ; j < npeers ; ++j) { + if (rank == peers[j]->rank) { + OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_INFO, "got expected post from %d. still expecting posts from %d processes", + rank, (int) (npeers - state->num_post_msgs - 1)); + ++state->num_post_msgs; + return; + } + } + + /* post does not belong to this start epoch. save it for later */ + OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_INFO, "got unexpected post from %d . queueing for later", rank); + pending_post = OBJ_NEW(ompi_osc_rdma_pending_post_t); + pending_post->rank = rank; + OPAL_THREAD_SCOPED_LOCK(&module->lock, opal_list_append (&module->pending_posts, &pending_post->super)); +} + +int ompi_osc_rdma_post_atomic (ompi_group_t *group, int assert, ompi_win_t *win) +{ + ompi_osc_rdma_module_t *module = GET_MODULE(win); + ompi_osc_rdma_peer_t **peers; + int my_rank = ompi_comm_rank (module->comm); + ompi_osc_rdma_state_t *state = module->state; + volatile bool atomic_complete; + ompi_osc_rdma_frag_t *frag; + osc_rdma_counter_t *temp; + int ret; + + OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_TRACE, "post: %p, %d, %s", (void*) group, assert, win->w_name); + + /* check if we are already in a post epoch */ + if (module->pw_group) { + return OMPI_ERR_RMA_SYNC; + } + + /* save the group */ + OBJ_RETAIN(group); + + OPAL_THREAD_LOCK(&module->lock); + + /* ensure we're not already in a post */ + if (NULL != module->pw_group) { + OPAL_THREAD_UNLOCK(&(module->lock)); + return OMPI_ERR_RMA_SYNC; + } + module->pw_group = group; + + /* Update completion counter. Can't have received any completion + messages yet; complete won't send a completion header until + we've sent a post header. */ + state->num_complete_msgs = 0; + OPAL_THREAD_UNLOCK(&module->lock); + + /* allocate a temporary buffer for atomic response */ + ret = ompi_osc_rdma_frag_alloc (module, 8, &frag, (char **) &temp); + + if ((assert & MPI_MODE_NOCHECK) || 0 == ompi_group_size (group)) { + return OMPI_SUCCESS; + } + + if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) { + return OMPI_ERR_OUT_OF_RESOURCE; + } + + /* translate group ranks into the communicator */ + peers = ompi_osc_rdma_get_peers (module, module->pw_group); + if (OPAL_UNLIKELY(NULL == peers)) { + ompi_osc_rdma_frag_complete (frag); + return OMPI_ERR_OUT_OF_RESOURCE; + } + + OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_TRACE, "sending post messages"); + + /* send a hello counter to everyone in group */ + for (int i = 0 ; i < ompi_group_size(module->pw_group) ; ++i) { + ompi_osc_rdma_peer_t *peer = peers[i]; + uint64_t target = (uint64_t) (intptr_t) peer->state + offsetof (ompi_osc_rdma_state_t, post_index); + int post_index; + + if (peer->rank == my_rank) { + ompi_osc_rdma_handle_post (module, my_rank, NULL, 0); + continue; + } + + /* get a post index */ + atomic_complete = false; + if (!ompi_osc_rdma_peer_local_state (peer)) { + do { + ret = module->selected_btl->btl_atomic_fop (module->selected_btl, peer->state_endpoint, temp, target, frag->handle, + peer->state_handle, MCA_BTL_ATOMIC_ADD, 1, 0, MCA_BTL_NO_ORDER, + ompi_osc_rdma_atomic_complete, (void *) &atomic_complete, NULL); + assert (OPAL_SUCCESS >= ret); + + if (OMPI_SUCCESS == ret) { + while (!atomic_complete) { + ompi_osc_rdma_progress (module); + } + + break; + } + + ompi_osc_rdma_progress (module); + } while (1); + } else { + *temp = ompi_osc_rdma_counter_add ((osc_rdma_counter_t *) (intptr_t) target, 1) - 1; + } + post_index = (*temp) & (OMPI_OSC_RDMA_POST_PEER_MAX - 1); + + target = (uint64_t) (intptr_t) peer->state + offsetof (ompi_osc_rdma_state_t, post_peers) + + sizeof (osc_rdma_counter_t) * post_index; + + do { + OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_TRACE, "attempting to post to index %d @ rank %d", post_index, peer->rank); + + /* try to post. if the value isn't 0 then another rank is occupying this index */ + if (!ompi_osc_rdma_peer_local_state (peer)) { + atomic_complete = false; + ret = module->selected_btl->btl_atomic_cswap (module->selected_btl, peer->state_endpoint, temp, target, frag->handle, peer->state_handle, + 0, 1 + (int64_t) my_rank, 0, MCA_BTL_NO_ORDER, ompi_osc_rdma_atomic_complete, + (void *) &atomic_complete, NULL); + assert (OPAL_SUCCESS >= ret); + + if (OMPI_SUCCESS == ret) { + while (!atomic_complete) { + ompi_osc_rdma_progress (module); + } + } else { + ompi_osc_rdma_progress (module); + continue; + } + + } else { + *temp = !ompi_osc_rdma_lock_cmpset ((osc_rdma_counter_t *) target, 0, 1 + (osc_rdma_counter_t) my_rank); + } + + if (OPAL_LIKELY(0 == *temp)) { + break; + } + + /* prevent circular wait by checking for post messages received */ + for (int j = 0 ; j < OMPI_OSC_RDMA_POST_PEER_MAX ; ++j) { + /* no post at this index (yet) */ + if (0 == state->post_peers[j]) { + continue; + } + + ompi_osc_rdma_handle_post (module, state->post_peers[j] - 1, NULL, 0); + state->post_peers[j] = 0; + } + + usleep (100); + } while (1); + } + + ompi_osc_rdma_frag_complete (frag); + + ompi_osc_rdma_release_peers (peers, ompi_group_size(module->pw_group)); + + OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_TRACE, "post complete"); + + return OMPI_SUCCESS; +} + +int ompi_osc_rdma_start_atomic (ompi_group_t *group, int assert, ompi_win_t *win) +{ + ompi_osc_rdma_module_t *module = GET_MODULE(win); + ompi_osc_rdma_pending_post_t *pending_post, *next; + ompi_osc_rdma_state_t *state = module->state; + ompi_osc_rdma_sync_t *sync = &module->all_sync; + int group_size = ompi_group_size (group); + + OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_TRACE, "start: %p, %d, %s", (void*) group, assert, + win->w_name); + + OPAL_THREAD_LOCK(&module->lock); + + /* check if we are already in an access epoch */ + if (ompi_osc_rdma_access_epoch_active (module)) { + OPAL_THREAD_UNLOCK(&module->lock); + return OMPI_ERR_RMA_SYNC; + } + + /* mark all procs in this group as being in an access epoch */ + sync->num_peers = ompi_group_size (group); + sync->sync.pscw.group = group; + + /* haven't processed any post messaes yet */ + state->num_post_msgs = 0; + + OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_TRACE, "start group size %d", sync->num_peers); + + if (0 == ompi_group_size (group)) { + /* nothing more to do. this is an empty start epoch */ + OPAL_THREAD_UNLOCK(&module->lock); + return OMPI_SUCCESS; + } + + opal_atomic_wmb (); + + sync->type = OMPI_OSC_RDMA_SYNC_TYPE_PSCW; + + /* prevent us from entering a passive-target, fence, or another pscw access epoch until + * the matching complete is called */ + sync->epoch_active = true; + + /* translate the group ranks into the communicator */ + sync->peer_list.peers = ompi_osc_rdma_get_peers (module, group); + if (NULL == sync->peer_list.peers) { + OPAL_THREAD_UNLOCK(&module->lock); + return OMPI_ERR_OUT_OF_RESOURCE; + } + + /* save the group */ + OBJ_RETAIN(group); + + if (!(assert & MPI_MODE_NOCHECK)) { + /* look through list of pending posts */ + OPAL_LIST_FOREACH_SAFE(pending_post, next, &module->pending_posts, ompi_osc_rdma_pending_post_t) { + for (int i = 0 ; i < group_size ; ++i) { + ompi_osc_rdma_peer_t *peer = sync->peer_list.peers[i]; + + if (pending_post->rank == peer->rank) { + OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_TRACE, "found queued post from %d. still expecting posts " + "from %d processes", peer->rank, (int) (group_size - state->num_post_msgs - 1)); + opal_list_remove_item (&module->pending_posts, &pending_post->super); + OBJ_RELEASE(pending_post); + /* only one thread can process post messages so there is no need of atomics here */ + ++state->num_post_msgs; + break; + } + } + } + + /* wait for all post messages to arrive */ + while (state->num_post_msgs != group_size) { + OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_TRACE, "waiting for post messages. have %d of %d", + (int) state->num_post_msgs, group_size); + for (int i = 0 ; i < OMPI_OSC_RDMA_POST_PEER_MAX ; ++i) { + /* no post at this index (yet) */ + if (0 == state->post_peers[i]) { + continue; + } + + ompi_osc_rdma_handle_post (module, state->post_peers[i] - 1, sync->peer_list.peers, group_size); + state->post_peers[i] = 0; + } + + ompi_osc_rdma_progress (module); + } + } else { + state->num_post_msgs = group_size; + } + + OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_TRACE, "start complete"); + + OPAL_THREAD_UNLOCK(&module->lock); + return OMPI_SUCCESS; +} + +int ompi_osc_rdma_complete_atomic (ompi_win_t *win) +{ + ompi_osc_rdma_module_t *module = GET_MODULE(win); + ompi_osc_rdma_sync_t *sync = &module->all_sync; + ompi_osc_rdma_frag_t *frag = NULL; + ompi_osc_rdma_peer_t **peers; + void *scratch_lock = NULL; + ompi_group_t *group; + int group_size, ret; + + OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_TRACE, "complete: %s", win->w_name); + + OPAL_THREAD_LOCK(&module->lock); + if (OMPI_OSC_RDMA_SYNC_TYPE_PSCW != sync->type) { + OPAL_THREAD_UNLOCK(&module->lock); + return OMPI_ERR_RMA_SYNC; + } + + /* phase 1 cleanup sync object */ + group = sync->sync.pscw.group; + group_size = sync->num_peers; + sync->type = OMPI_OSC_RDMA_SYNC_TYPE_NONE; + sync->epoch_active = false; + + /* phase 2 cleanup group */ + OBJ_RELEASE(group); + + peers = sync->peer_list.peers; + if (NULL == peers) { + /* empty peer list */ + OPAL_THREAD_UNLOCK(&(module->lock)); + OBJ_RELEASE(group); + return OMPI_SUCCESS; + } + + sync->peer_list.peers = NULL; + + OPAL_THREAD_UNLOCK(&(module->lock)); + + ompi_osc_rdma_sync_rdma_complete (sync); + + if (!(MCA_BTL_FLAGS_ATOMIC_OPS & module->selected_btl->btl_flags)) { + /* need a temporary buffer for performing fetching atomics */ + ret = ompi_osc_rdma_frag_alloc (module, 8, &frag, (char **) &scratch_lock); + if (OPAL_UNLIKELY(OPAL_SUCCESS != ret)) { + return ret; + } + } + + /* for each process in the group increment their number of complete messages */ + for (int i = 0 ; i < group_size ; ++i) { + ompi_osc_rdma_peer_t *peer = peers[i]; + intptr_t target = (intptr_t) peer->state + offsetof (ompi_osc_rdma_state_t, num_complete_msgs); + + if (!ompi_osc_rdma_peer_local_state (peer)) { + do { + if (MCA_BTL_FLAGS_ATOMIC_OPS & module->selected_btl->btl_flags) { + ret = module->selected_btl->btl_atomic_op (module->selected_btl, peer->state_endpoint, target, peer->state_handle, + MCA_BTL_ATOMIC_ADD, 1, 0, MCA_BTL_NO_ORDER, + ompi_osc_rdma_atomic_complete, NULL, NULL); + } else { + /* don't care about the read value so use the scratch lock */ + ret = module->selected_btl->btl_atomic_fop (module->selected_btl, peer->state_endpoint, scratch_lock, + target, frag->handle, peer->state_handle, MCA_BTL_ATOMIC_ADD, 1, + 0, MCA_BTL_NO_ORDER, ompi_osc_rdma_atomic_complete, NULL, NULL); + } + + if (OPAL_LIKELY(OMPI_SUCCESS == ret)) { + break; + } + } while (1); + } else { + (void) ompi_osc_rdma_counter_add ((osc_rdma_counter_t *) target, 1); + } + } + + if (frag) { + ompi_osc_rdma_frag_complete (frag); + } + + /* release our reference to peers in this group */ + ompi_osc_rdma_release_peers (peers, group_size); + + OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_TRACE, "complete complete"); + + return OMPI_SUCCESS; +} + +int ompi_osc_rdma_wait_atomic (ompi_win_t *win) +{ + ompi_osc_rdma_module_t *module = GET_MODULE(win); + ompi_osc_rdma_state_t *state = module->state; + ompi_group_t *group; + int group_size; + + OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_TRACE, "wait: %s", win->w_name); + + OPAL_THREAD_LOCK(&module->lock); + if (NULL == module->pw_group) { + OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_INFO, "no matching post"); + OPAL_THREAD_UNLOCK(&module->lock); + return OMPI_ERR_RMA_SYNC; + } + + group_size = ompi_group_size (module->pw_group); + OPAL_THREAD_UNLOCK(&module->lock); + + OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_TRACE, "waiting on complete message. have %d of %d", + (int) state->num_complete_msgs, group_size); + + while (group_size != state->num_complete_msgs) { + ompi_osc_rdma_progress (module); + opal_atomic_mb (); + } + + OPAL_THREAD_LOCK(&module->lock); + state->num_complete_msgs = 0; + group = module->pw_group; + module->pw_group = NULL; + OPAL_THREAD_UNLOCK(&module->lock); + + OBJ_RELEASE(group); + + OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_TRACE, "wait complete"); + + return OMPI_SUCCESS; +} + + +int ompi_osc_rdma_test_atomic (ompi_win_t *win, int *flag) +{ + ompi_osc_rdma_module_t *module = GET_MODULE(win); + ompi_osc_rdma_state_t *state = module->state; + ompi_group_t *group; + int group_size; + + OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_TRACE, "test: %s", win->w_name); + + OPAL_THREAD_LOCK(&module->lock); + if (NULL == module->pw_group) { + OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_INFO, "no matching post"); + OPAL_THREAD_UNLOCK(&module->lock); + return OMPI_ERR_RMA_SYNC; + } + + group_size = ompi_group_size (module->pw_group); + + *flag = (group_size == state->num_complete_msgs); + OPAL_THREAD_UNLOCK(&module->lock); + + OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_TRACE, "checking on complete message. have %d of %d", + (int) state->num_complete_msgs, group_size); + + if (!*flag) { + ompi_osc_rdma_progress (module); + return OMPI_SUCCESS; + } + + state->num_complete_msgs = 0; + + OPAL_THREAD_LOCK(&(module->lock)); + group = module->pw_group; + module->pw_group = NULL; + OPAL_THREAD_UNLOCK(&(module->lock)); + + OBJ_RELEASE(group); + + return OMPI_SUCCESS; +} + +int ompi_osc_rdma_fence_atomic (int assert, ompi_win_t *win) +{ + ompi_osc_rdma_module_t *module = GET_MODULE(win); + int ret = OMPI_SUCCESS; + + OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_TRACE, "fence: %d, %s", assert, win->w_name); + + /* can't enter an active target epoch while a lock is active */ + if (ompi_osc_rdma_in_passive_epoch (module) || module->pw_group) { + OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_INFO, "can not start fence epoch due to conflicting epoch"); + return OMPI_ERR_RMA_SYNC; + } + + OPAL_THREAD_LOCK(&module->lock); + + /* active sends are now active (we will close the epoch if NOSUCCEED is specified) */ + if (0 == (assert & MPI_MODE_NOSUCCEED)) { + module->all_sync.type = OMPI_OSC_RDMA_SYNC_TYPE_FENCE; + module->all_sync.num_peers = ompi_comm_size (module->comm); + /* NTH: should add a fast access array for peers here later. for now just use the + * hash table. */ + } + + /* technically it is possible to enter a lock epoch (which will close the fence epoch) if + * no communication has occurred. this flag will be set on the next put, get, accumulate, etc. */ + module->all_sync.epoch_active = false; + + /* short-circuit the noprecede case */ + if (0 != (assert & MPI_MODE_NOPRECEDE)) { + OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_TRACE, "fence complete (short circuit)"); + /* no communication can occur until a peer has entered the same fence epoch. for now + * a barrier is used to ensure this is the case. */ + ret = module->comm->c_coll.coll_barrier(module->comm, module->comm->c_coll.coll_barrier_module); + OPAL_THREAD_UNLOCK(&module->lock); + return ret; + } + + ompi_osc_rdma_sync_rdma_complete (&module->all_sync); + + /* ensure all writes to my memory are complete */ + ret = module->comm->c_coll.coll_barrier(module->comm, module->comm->c_coll.coll_barrier_module); + + if (assert & MPI_MODE_NOSUCCEED) { + /* as specified in MPI-3 p 438 3-5 the fence can end an epoch. it isn't explicitly + * stated that MPI_MODE_NOSUCCEED ends the epoch but it is a safe assumption. */ + module->all_sync.type = OMPI_OSC_RDMA_SYNC_TYPE_NONE; + } + + OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_TRACE, "fence complete"); + + OPAL_THREAD_UNLOCK(&module->lock); + + return ret; +} diff --git a/ompi/mca/osc/rdma/osc_rdma_active_target.h b/ompi/mca/osc/rdma/osc_rdma_active_target.h new file mode 100644 index 00000000000..605d006c029 --- /dev/null +++ b/ompi/mca/osc/rdma/osc_rdma_active_target.h @@ -0,0 +1,42 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ +/* + * Copyright (c) 2004-2005 The Trustees of Indiana University. + * All rights reserved. + * Copyright (c) 2004-2005 The Trustees of the University of Tennessee. + * All rights reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2007-2014 Los Alamos National Security, LLC. All rights + * reserved. + * Copyright (c) 2010 IBM Corporation. All rights reserved. + * Copyright (c) 2012-2013 Sandia National Laboratories. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#if !defined(OSC_RDMA_ACTIVE_TARGET_H) +#define OSC_RDMA_ACTIVE_TARGET_H + +#include "osc_rdma.h" +#include "osc_rdma_sync.h" +#include "osc_rdma_lock.h" + +int ompi_osc_rdma_fence_atomic (int assert, struct ompi_win_t *win); + +int ompi_osc_rdma_start_atomic (struct ompi_group_t *group, + int assert, struct ompi_win_t *win); +int ompi_osc_rdma_complete_atomic (struct ompi_win_t *win); + +int ompi_osc_rdma_post_atomic (struct ompi_group_t *group, + int assert, struct ompi_win_t *win); + +int ompi_osc_rdma_wait_atomic (struct ompi_win_t *win); + +int ompi_osc_rdma_test_atomic (struct ompi_win_t *win, int *flag); + +#endif /* OSC_RDMA_ACTIVE_TARGET_H */ diff --git a/ompi/mca/osc/rdma/osc_rdma_comm.c b/ompi/mca/osc/rdma/osc_rdma_comm.c new file mode 100644 index 00000000000..7efde7c39be --- /dev/null +++ b/ompi/mca/osc/rdma/osc_rdma_comm.c @@ -0,0 +1,954 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ +/* + * Copyright (c) 2014-2016 Los Alamos National Security, LLC. All rights + * reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include "osc_rdma_comm.h" +#include "osc_rdma_sync.h" +#include "osc_rdma_request.h" +#include "osc_rdma_dynamic.h" + +#include "ompi/mca/osc/base/osc_base_obj_convert.h" +#include "opal/align.h" + +static int ompi_osc_rdma_get_contig (ompi_osc_rdma_sync_t *sync, ompi_osc_rdma_peer_t *peer, uint64_t source_address, + mca_btl_base_registration_handle_t *source_handle, void *target_buffer, size_t size, + ompi_osc_rdma_request_t *request); + +static void ompi_osc_get_data_complete (struct mca_btl_base_module_t *btl, struct mca_btl_base_endpoint_t *endpoint, + void *local_address, mca_btl_base_registration_handle_t *local_handle, + void *context, void *data, int status) +{ + assert (OPAL_SUCCESS == status); + ((bool *) context)[0] = true; +} + +int ompi_osc_get_data_blocking (ompi_osc_rdma_module_t *module, struct mca_btl_base_endpoint_t *endpoint, + uint64_t source_address, mca_btl_base_registration_handle_t *source_handle, + void *data, size_t len) +{ + mca_btl_base_registration_handle_t *local_handle = NULL; + ompi_osc_rdma_frag_t *frag = NULL; + volatile bool read_complete = false; + char *ptr = data; + int ret; + + OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_TRACE, "reading state data from endpoint %p. source: 0x%" PRIx64 ", len: %lu", + (void *) endpoint, source_address, (unsigned long) len); + + if (module->selected_btl->btl_register_mem && len >= module->selected_btl->btl_get_local_registration_threshold) { + ret = ompi_osc_rdma_frag_alloc (module, len, &frag, &ptr); + if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) { + OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_ERROR, "error allocating temporary buffer"); + return ret; + } + + local_handle = frag->handle; + OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_TRACE, "allocated temporary buffer %p in fragment %p", ptr, + (void *) frag); + } + + assert (!(source_address & ALIGNMENT_MASK(module->selected_btl->btl_get_alignment))); + + do { + ret = module->selected_btl->btl_get (module->selected_btl, endpoint, ptr, source_address, + local_handle, source_handle, len, 0, MCA_BTL_NO_ORDER, + ompi_osc_get_data_complete, (void *) &read_complete, NULL); + if (OPAL_LIKELY(OMPI_ERR_OUT_OF_RESOURCE != ret)) { + break; + } + + ompi_osc_rdma_progress (module); + } while (1); + + if (OPAL_UNLIKELY(OMPI_SUCCESS > ret)) { + OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_ERROR, "btl get failed with opal error code %d", ret); + + if (frag) { + ompi_osc_rdma_frag_complete (frag); + } + + return ret; + } + + /* block until the callback is called */ + while (!read_complete) { + ompi_osc_rdma_progress (module); + } + + OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_TRACE, "finished reading state data from endpoint %p", (void *) endpoint); + + opal_memchecker_base_mem_defined (ptr, len); + + if (frag) { + memcpy (data, ptr, len); + + /* done with the fragment */ + ompi_osc_rdma_frag_complete (frag); + } + + return OMPI_SUCCESS; +} + +/** + * @brief function signature for the rdma transfer function used by ompi_osc_rdma_master_noncontig() + * + * @param[in] peer peer object for remote peer + * @param[in] remote_address base of remote region (destination for put, source for get) + * @param[in] remote_handle btl registration handle for remote region (must be valid for the entire region) + * @param[in] local_address base of local region (source for put, destination for get) + * @param[in] size number of bytes to transfer + * @param[in] module osc rdma module + * @param[in] request osc rdma request if used (can be NULL) + * + * @returns OMPI_SUCCESS on success + * @returns OMPI_ERR_OUT_OF_RESOURCE on temporary error + * @returns other OMPI error on fatal error + * + * This function does the work of scheduling a contiguous transfer between the local and remote regions. + */ +typedef int (*ompi_osc_rdma_fn_t) (ompi_osc_rdma_sync_t *sync, ompi_osc_rdma_peer_t *peer, uint64_t remote_address, + mca_btl_base_registration_handle_t *remote_handle, void *local_address, size_t size, + ompi_osc_rdma_request_t *request); + +/** + * @brief break down rdma transaction into contiguous regions + * + * @param[in] local_address base of local region (source for put, destination for get) + * @param[in] local_count number of elements in local region + * @param[in] local_datatype datatype of local region + * @param[in] peer peer object for remote peer + * @param[in] remote_address base of remote region (destination for put, source for get) + * @param[in] remote_handle btl registration handle for remote region (must be valid for the entire region) + * @param[in] remote_count number of elements in remote region + * @param[in] remote_datatype datatype of remote region + * @param[in] module osc rdma module + * @param[in] request osc rdma request if used (can be NULL) + * @param[in] max_rdma_len maximum length of an rdma request (usually btl limitation) + * @param[in] rdma_fn function to use for contiguous rdma operations + * @param[in] alloc_reqs true if rdma_fn requires a valid request object (any allocated objects will be marked internal) + * + * This function does the work of breaking a non-contiguous rdma transfer into contiguous components. It will + * continue to submit rdma transfers until the entire region is transferred or a fatal error occurs. + */ +static int ompi_osc_rdma_master_noncontig (ompi_osc_rdma_sync_t *sync, void *local_address, int local_count, ompi_datatype_t *local_datatype, + ompi_osc_rdma_peer_t *peer, uint64_t remote_address, + mca_btl_base_registration_handle_t *remote_handle, int remote_count, + ompi_datatype_t *remote_datatype, ompi_osc_rdma_request_t *request, const size_t max_rdma_len, + const ompi_osc_rdma_fn_t rdma_fn, const bool alloc_reqs) +{ + ompi_osc_rdma_module_t *module = sync->module; + struct iovec local_iovec[OMPI_OSC_RDMA_DECODE_MAX], remote_iovec[OMPI_OSC_RDMA_DECODE_MAX]; + opal_convertor_t local_convertor, remote_convertor; + uint32_t local_iov_count, remote_iov_count; + uint32_t local_iov_index, remote_iov_index; + /* needed for opal_convertor_raw but not used */ + size_t local_size, remote_size, rdma_len; + ompi_osc_rdma_request_t *subreq; + int ret; + bool done; + + subreq = NULL; + + OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_TRACE, "scheduling rdma on non-contiguous datatype(s)"); + + /* prepare convertors for the source and target. these convertors will be used to determine the + * contiguous segments within the source and target. */ + OBJ_CONSTRUCT(&remote_convertor, opal_convertor_t); + ret = opal_convertor_copy_and_prepare_for_send (ompi_mpi_local_convertor, &remote_datatype->super, remote_count, + (void *) (intptr_t) remote_address, 0, &remote_convertor); + if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) { + return ret; + } + + OBJ_CONSTRUCT(&local_convertor, opal_convertor_t); + ret = opal_convertor_copy_and_prepare_for_send (ompi_mpi_local_convertor, &local_datatype->super, local_count, + local_address, 0, &local_convertor); + if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) { + return ret; + } + + if (request) { + /* keep the request from completing until all the transfers have started */ + request->outstanding_requests = 1; + } + + local_iov_index = 0; + local_iov_count = 0; + + do { + /* decode segments of the remote data */ + remote_iov_count = OMPI_OSC_RDMA_DECODE_MAX; + remote_iov_index = 0; + + /* opal_convertor_raw returns done when it has reached the end of the data */ + done = opal_convertor_raw (&remote_convertor, remote_iovec, &remote_iov_count, &remote_size); + + /* loop on the target segments until we have exhaused the decoded source data */ + while (remote_iov_index != remote_iov_count) { + if (local_iov_index == local_iov_count) { + /* decode segments of the target buffer */ + local_iov_count = OMPI_OSC_RDMA_DECODE_MAX; + local_iov_index = 0; + (void) opal_convertor_raw (&local_convertor, local_iovec, &local_iov_count, &local_size); + } + + /* we already checked that the target was large enough. this should be impossible */ + assert (0 != local_iov_count); + + /* determine how much to transfer in this operation */ + rdma_len = min(min(local_iovec[local_iov_index].iov_len, remote_iovec[remote_iov_index].iov_len), max_rdma_len); + + /* execute the get */ + if (!subreq && alloc_reqs) { + OMPI_OSC_RDMA_REQUEST_ALLOC(module, peer, subreq); + subreq->internal = true; + subreq->type = OMPI_OSC_RDMA_TYPE_RDMA; + subreq->parent_request = request; + + if (request) { + (void) OPAL_THREAD_ADD32 (&request->outstanding_requests, 1); + } + } else if (!alloc_reqs) { + subreq = request; + } + + OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_TRACE, "performing rdma on contiguous region. local: %p, remote: %p, len: %lu", + local_iovec[local_iov_index].iov_base, remote_iovec[remote_iov_index].iov_base, + (unsigned long) remote_iovec[remote_iov_index].iov_len); + + ret = rdma_fn (sync, peer, (uint64_t) (intptr_t) remote_iovec[remote_iov_index].iov_base, remote_handle, + local_iovec[local_iov_index].iov_base, rdma_len, subreq); + if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) { + if (OPAL_UNLIKELY(OMPI_ERR_OUT_OF_RESOURCE != ret)) { + if (request) { + (void) OPAL_THREAD_ADD32 (&request->outstanding_requests, -1); + } + + if (alloc_reqs) { + OMPI_OSC_RDMA_REQUEST_RETURN(subreq); + } + + /* something bad happened. need to figure out best way to handle rma errors */ + return ret; + } + + /* progress and try again */ + ompi_osc_rdma_progress (module); + continue; + } + subreq = NULL; + + /* adjust io vectors */ + local_iovec[local_iov_index].iov_len -= rdma_len; + remote_iovec[remote_iov_index].iov_len -= rdma_len; + local_iovec[local_iov_index].iov_base = (void *)((intptr_t) local_iovec[local_iov_index].iov_base + rdma_len); + remote_iovec[remote_iov_index].iov_base = (void *)((intptr_t) remote_iovec[remote_iov_index].iov_base + rdma_len); + + local_iov_index += (0 == local_iovec[local_iov_index].iov_len); + remote_iov_index += (0 == remote_iovec[remote_iov_index].iov_len); + } + } while (!done); + + if (request) { + /* release our reference so the request can complete */ + if (1 == request->outstanding_requests) { + ompi_osc_rdma_request_complete (request, OMPI_SUCCESS); + } + + (void) OPAL_THREAD_ADD32 (&request->outstanding_requests, -1); + } + + OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_TRACE, "finished scheduling rdma on non-contiguous datatype(s)"); + + /* clean up convertors */ + opal_convertor_cleanup (&local_convertor); + OBJ_DESTRUCT(&local_convertor); + opal_convertor_cleanup (&remote_convertor); + OBJ_DESTRUCT(&remote_convertor); + + return OMPI_SUCCESS; +} + +static inline int ompi_osc_rdma_master (ompi_osc_rdma_sync_t *sync, void *local_address, int local_count, + ompi_datatype_t *local_datatype, ompi_osc_rdma_peer_t *peer, + uint64_t remote_address, mca_btl_base_registration_handle_t *remote_handle, + int remote_count, ompi_datatype_t *remote_datatype, + ompi_osc_rdma_request_t *request, const size_t max_rdma_len, + const ompi_osc_rdma_fn_t rdma_fn, const bool alloc_reqs) +{ + size_t rdma_len; + ptrdiff_t lb, extent; + int ret; + + rdma_len = local_datatype->super.size * local_count; + + /* fast path for contiguous rdma */ + if (OPAL_LIKELY(ompi_datatype_is_contiguous_memory_layout (local_datatype, local_count) && + ompi_datatype_is_contiguous_memory_layout (remote_datatype, remote_count) && + rdma_len <= max_rdma_len)) { + if (NULL == request && alloc_reqs) { + ompi_osc_rdma_module_t *module = sync->module; + OMPI_OSC_RDMA_REQUEST_ALLOC(module, peer, request); + request->internal = true; + request->type = OMPI_OSC_RDMA_TYPE_RDMA; + } + + /* ignore failure here */ + (void) ompi_datatype_get_true_extent (local_datatype, &lb, &extent); + local_address = (void *)((intptr_t) local_address + lb); + + (void) ompi_datatype_get_true_extent (remote_datatype, &lb, &extent); + remote_address += lb; + + OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_TRACE, "performing rdma on contiguous region. local: %p, " + "remote: 0x%lx, length: %lu", local_address, (unsigned long) remote_address, + rdma_len); + + do { + ret = rdma_fn (sync, peer, remote_address, remote_handle, local_address, rdma_len, request); + if (OPAL_LIKELY(OPAL_SUCCESS == ret)) { + return OMPI_SUCCESS; + } + + ompi_osc_rdma_progress (sync->module); + } while (1); + } + + return ompi_osc_rdma_master_noncontig (sync, local_address, local_count, local_datatype, peer, remote_address, + remote_handle, remote_count, remote_datatype, request, + max_rdma_len, rdma_fn, alloc_reqs); +} + +static int ompi_osc_rdma_copy_local (const void *source, int source_count, ompi_datatype_t *source_datatype, + void *target, int target_count, ompi_datatype_t *target_datatype, + ompi_osc_rdma_request_t *request) +{ + int ret; + + OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_TRACE, "performing local copy from %p -> %p", source, target); + + opal_atomic_mb (); + ret = ompi_datatype_sndrcv (source, source_count, source_datatype, target, target_count, target_datatype); + + if (request) { + ompi_osc_rdma_request_complete (request, ret); + } + + return ret; +} + +static void ompi_osc_rdma_put_complete (struct mca_btl_base_module_t *btl, struct mca_btl_base_endpoint_t *endpoint, + void *local_address, mca_btl_base_registration_handle_t *local_handle, + void *context, void *data, int status) +{ + ompi_osc_rdma_sync_t *sync = (ompi_osc_rdma_sync_t *) context; + ompi_osc_rdma_frag_t *frag = (ompi_osc_rdma_frag_t *) data; + ompi_osc_rdma_request_t *request = NULL; + + assert (OPAL_SUCCESS == status); + + /* the lowest bit is used as a flag indicating this put operation has a request */ + if ((intptr_t) context & 0x1) { + request = (ompi_osc_rdma_request_t *) ((intptr_t) context & ~1); + sync = request->sync; + + /* NTH -- TODO: better error handling */ + ompi_osc_rdma_request_complete (request, status); + } + + OSC_RDMA_VERBOSE(status ? MCA_BASE_VERBOSE_ERROR : MCA_BASE_VERBOSE_TRACE, "btl put complete on sync %p. local " + "address %p. opal status %d", (void *) sync, local_address, status); + + if (frag) { + ompi_osc_rdma_frag_complete (frag); + } else { + ompi_osc_rdma_deregister (sync->module, local_handle); + } + + ompi_osc_rdma_sync_rdma_dec (sync); +} + +static void ompi_osc_rdma_aggregate_put_complete (struct mca_btl_base_module_t *btl, struct mca_btl_base_endpoint_t *endpoint, + void *local_address, mca_btl_base_registration_handle_t *local_handle, + void *context, void *data, int status) +{ + ompi_osc_rdma_aggregation_t *aggregation = (ompi_osc_rdma_aggregation_t *) context; + ompi_osc_rdma_sync_t *sync = aggregation->sync; + ompi_osc_rdma_frag_t *frag = aggregation->frag; + + assert (OPAL_SUCCESS == status); + + OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_TRACE, "aggregate put complete %p on sync %p. local address %p. status %d", + (void *) aggregation, (void *) sync, local_address, status); + + ompi_osc_rdma_frag_complete (frag); + ompi_osc_rdma_aggregation_return (aggregation); + + /* make sure the aggregation is returned before marking the operation as complete */ + opal_atomic_wmb (); + + ompi_osc_rdma_sync_rdma_dec (sync); +} + +static int ompi_osc_rdma_put_real (ompi_osc_rdma_sync_t *sync, ompi_osc_rdma_peer_t *peer, uint64_t target_address, + mca_btl_base_registration_handle_t *target_handle, void *ptr, + mca_btl_base_registration_handle_t *local_handle, size_t size, + mca_btl_base_rdma_completion_fn_t cb, void *context, void *cbdata) { + ompi_osc_rdma_module_t *module = sync->module; + int ret; + + OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_TRACE, "initiating btl put of %lu bytes to remote address %" PRIx64 ", sync " + "object %p...", (unsigned long) size, target_address, (void *) sync); + + /* flag outstanding rma requests */ + ompi_osc_rdma_sync_rdma_inc (sync); + + do { + ret = module->selected_btl->btl_put (module->selected_btl, peer->data_endpoint, ptr, target_address, + local_handle, target_handle, size, 0, MCA_BTL_NO_ORDER, + cb, context, cbdata); + if (OPAL_UNLIKELY(OMPI_SUCCESS == ret)) { + return OMPI_SUCCESS; + } + + ++module->put_retry_count; + + if (OPAL_ERR_OUT_OF_RESOURCE != ret && OPAL_ERR_TEMP_OUT_OF_RESOURCE != ret) { + break; + } + + /* spin a bit on progress */ + for (int i = 0 ; i < 10 ; ++i) { + ompi_osc_rdma_progress (module); + } + } while (1); + + OSC_RDMA_VERBOSE(10, "btl put failed with opal error code %d", ret); + + return ret; +} + +static void ompi_osc_rdma_aggregate_append (ompi_osc_rdma_aggregation_t *aggregation, ompi_osc_rdma_request_t *request, + void *source_buffer, size_t size) +{ + size_t offset = aggregation->buffer_used; + + OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_TRACE, "appending %lu bytes of data from %p to aggregate fragment %p with start " + "address 0x%lx", (unsigned long) size, source_buffer, (void *) aggregation, + (unsigned long) aggregation->target_address); + + memcpy (aggregation->buffer + offset, source_buffer, size); + + aggregation->buffer_used += size; + + if (request) { + /* the local buffer is now available */ + ompi_osc_rdma_request_complete (request, 0); + } +} + +static int ompi_osc_rdma_aggregate_alloc (ompi_osc_rdma_sync_t *sync, ompi_osc_rdma_peer_t *peer, uint64_t target_address, + mca_btl_base_registration_handle_t *target_handle, void *source_buffer, size_t size, + ompi_osc_rdma_request_t *request, int type) +{ + ompi_osc_rdma_module_t *module = sync->module; + ompi_osc_rdma_aggregation_t *aggregation; + int ret; + + aggregation = (ompi_osc_rdma_aggregation_t *) opal_free_list_get (&mca_osc_rdma_component.aggregate); + if (OPAL_UNLIKELY(NULL == aggregation)) { + return OPAL_ERR_OUT_OF_RESOURCE; + } + + ret = ompi_osc_rdma_frag_alloc (module, mca_osc_rdma_component.aggregation_limit, &aggregation->frag, + &aggregation->buffer); + if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) { + opal_free_list_return(&mca_osc_rdma_component.aggregate, (opal_free_list_item_t *) aggregation); + return ret; + } + + OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_TRACE, "allocated new aggregate fragment %p for target %d", (void *) aggregation, + peer->rank); + + peer->aggregate = aggregation; + + aggregation->target_address = target_address; + aggregation->target_handle = target_handle; + aggregation->buffer_size = mca_osc_rdma_component.aggregation_limit; + aggregation->sync = sync; + aggregation->peer = peer; + aggregation->type = type; + aggregation->buffer_used = 0; + + ompi_osc_rdma_aggregate_append (aggregation, request, source_buffer, size); + + opal_list_append (&sync->aggregations, (opal_list_item_t *) aggregation); + + return OMPI_SUCCESS; +} + +static int ompi_osc_rdma_put_contig (ompi_osc_rdma_sync_t *sync, ompi_osc_rdma_peer_t *peer, uint64_t target_address, + mca_btl_base_registration_handle_t *target_handle, void *source_buffer, size_t size, + ompi_osc_rdma_request_t *request) +{ + ompi_osc_rdma_module_t *module = sync->module; + ompi_osc_rdma_aggregation_t *aggregation = peer->aggregate; + mca_btl_base_registration_handle_t *local_handle = NULL; + ompi_osc_rdma_frag_t *frag = NULL; + char *ptr = source_buffer; + void *cbcontext; + int ret; + + if (aggregation) { + if (size <= (aggregation->buffer_size - aggregation->buffer_used) && (target_handle == aggregation->target_handle) && + (target_address == aggregation->target_address + aggregation->buffer_used)) { + assert (OMPI_OSC_RDMA_TYPE_PUT == aggregation->type); + ompi_osc_rdma_aggregate_append (aggregation, request, source_buffer, size); + return OMPI_SUCCESS; + } + + /* can't aggregate this operation. flush the previous segment */ + ret = ompi_osc_rdma_peer_aggregate_flush (peer); + if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) { + return ret; + } + } + + if (size <= (mca_osc_rdma_component.aggregation_limit >> 2)) { + ret = ompi_osc_rdma_aggregate_alloc (sync, peer, target_address, target_handle, source_buffer, size, request, + OMPI_OSC_RDMA_TYPE_PUT); + if (OPAL_LIKELY(OMPI_SUCCESS == ret)) { + if (request) { + + } + return ret; + } + } + + if (module->selected_btl->btl_register_mem && size > module->selected_btl->btl_put_local_registration_threshold) { + ret = ompi_osc_rdma_frag_alloc (module, size, &frag, &ptr); + if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) { + ret = ompi_osc_rdma_register (module, peer->data_endpoint, source_buffer, size, 0, &local_handle); + if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) { + return ret; + } + } else { + memcpy (ptr, source_buffer, size); + local_handle = frag->handle; + } + } + + /* increment the outstanding request counter in the request object */ + if (request) { + (void) OPAL_THREAD_ADD32 (&request->outstanding_requests, 1); + cbcontext = (void *) ((intptr_t) request | 1); + request->sync = sync; + } else { + cbcontext = (void *) sync; + } + + ret = ompi_osc_rdma_put_real (sync, peer, target_address, target_handle, ptr, local_handle, size, ompi_osc_rdma_put_complete, + cbcontext, frag); + if (OPAL_UNLIKELY(OMPI_SUCCESS == ret)) { + return OMPI_SUCCESS; + } + + ompi_osc_rdma_cleanup_rdma (sync, frag, local_handle, request); + + return ret; +} + +static void ompi_osc_rdma_get_complete (struct mca_btl_base_module_t *btl, struct mca_btl_base_endpoint_t *endpoint, + void *local_address, mca_btl_base_registration_handle_t *local_handle, + void *context, void *data, int status) +{ + ompi_osc_rdma_request_t *request = (ompi_osc_rdma_request_t *) context; + intptr_t source = (intptr_t) local_address + request->offset; + ompi_osc_rdma_frag_t *frag = (ompi_osc_rdma_frag_t *) data; + ompi_osc_rdma_sync_t *sync = request->sync; + void *origin_addr = request->origin_addr; + + OSC_RDMA_VERBOSE(status ? MCA_BASE_VERBOSE_ERROR : MCA_BASE_VERBOSE_TRACE, "btl get complete on sync %p. local " + "address %p. origin %p. opal status %d", (void *) sync, local_address, origin_addr, status); + + assert (OPAL_SUCCESS == status); + + if (request->buffer || NULL != frag) { + if (OPAL_LIKELY(OMPI_SUCCESS == status)) { + memcpy (origin_addr, (void *) source, request->len); + } + } + + if (NULL != frag) { + ompi_osc_rdma_frag_complete (frag); + } else { + ompi_osc_rdma_deregister (sync->module, local_handle); + } + + ompi_osc_rdma_sync_rdma_dec (sync); + + ompi_osc_rdma_request_complete (request, status); +} + +int ompi_osc_rdma_peer_aggregate_flush (ompi_osc_rdma_peer_t *peer) +{ + ompi_osc_rdma_aggregation_t *aggregation = peer->aggregate; + int ret; + + if (NULL == aggregation) { + return OMPI_SUCCESS; + } + + OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_TRACE, "flusing aggregate fragment %p", (void *) aggregation); + + assert (OMPI_OSC_RDMA_TYPE_PUT == aggregation->type); + + ret = ompi_osc_rdma_put_real (aggregation->sync, peer, aggregation->target_address, aggregation->target_handle, + aggregation->buffer, aggregation->frag->handle, aggregation->buffer_used, + ompi_osc_rdma_aggregate_put_complete, (void *) aggregation, NULL); + + peer->aggregate = NULL; + + if (OPAL_UNLIKELY(OMPI_SUCCESS == ret)) { + return OMPI_SUCCESS; + } + + ompi_osc_rdma_cleanup_rdma (aggregation->sync, aggregation->frag, NULL, NULL); + + ompi_osc_rdma_aggregation_return (aggregation); + + return ret; + +} + +static int ompi_osc_rdma_get_partial (ompi_osc_rdma_sync_t *sync, ompi_osc_rdma_peer_t *peer, uint64_t source_address, + mca_btl_base_registration_handle_t *source_handle, void *target_buffer, size_t size, + ompi_osc_rdma_request_t *request) { + ompi_osc_rdma_module_t *module = sync->module; + ompi_osc_rdma_request_t *subreq; + int ret; + + OMPI_OSC_RDMA_REQUEST_ALLOC(module, peer, subreq); + subreq->internal = true; + subreq->type = OMPI_OSC_RDMA_TYPE_RDMA; + subreq->parent_request = request; + (void) OPAL_THREAD_ADD32 (&request->outstanding_requests, 1); + + ret = ompi_osc_rdma_get_contig (sync, peer, source_address, source_handle, target_buffer, size, subreq); + if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) { + OMPI_OSC_RDMA_REQUEST_RETURN(subreq); + (void) OPAL_THREAD_ADD32 (&request->outstanding_requests, -1); + } + + return ret; +} + +static int ompi_osc_rdma_get_contig (ompi_osc_rdma_sync_t *sync, ompi_osc_rdma_peer_t *peer, uint64_t source_address, + mca_btl_base_registration_handle_t *source_handle, void *target_buffer, size_t size, + ompi_osc_rdma_request_t *request) +{ + ompi_osc_rdma_module_t *module = sync->module; + const size_t btl_alignment_mask = ALIGNMENT_MASK(module->selected_btl->btl_get_alignment); + mca_btl_base_registration_handle_t *local_handle = NULL; + ompi_osc_rdma_frag_t *frag = NULL; + osc_rdma_size_t aligned_len; + osc_rdma_base_t aligned_source_base, aligned_source_bound; + char *ptr = target_buffer; + int ret; + + aligned_source_base = source_address & ~btl_alignment_mask; + aligned_source_bound = (source_address + size + btl_alignment_mask) & ~btl_alignment_mask; + aligned_len = aligned_source_bound - aligned_source_base; + + OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_TRACE, "initiating get of %lu bytes from remote ptr %" PRIx64 " to local ptr %p", + size, source_address, target_buffer); + + if ((module->selected_btl->btl_register_mem && size > module->selected_btl->btl_get_local_registration_threshold) || + (((uint64_t) target_buffer | size | source_address) & btl_alignment_mask)) { + + ret = ompi_osc_rdma_frag_alloc (module, aligned_len, &frag, &ptr); + if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) { + if (OMPI_ERR_VALUE_OUT_OF_BOUNDS == ret) { + /* region is too large for a buffered read */ + size_t subsize; + + if ((source_address & btl_alignment_mask) && (source_address & btl_alignment_mask) == ((intptr_t) target_buffer & btl_alignment_mask)) { + /* remote region has the same alignment but the base is not aligned. perform a small + * buffered get of the beginning of the remote region */ + aligned_source_base = OPAL_ALIGN(source_address, module->selected_btl->btl_get_alignment, osc_rdma_base_t); + subsize = (size_t) (aligned_source_base - source_address); + + ret = ompi_osc_rdma_get_partial (sync, peer, source_address, source_handle, target_buffer, subsize, request); + if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) { + return ret; + } + + source_address += subsize; + target_buffer = (void *) ((intptr_t) target_buffer + subsize); + size -= subsize; + + aligned_len = aligned_source_bound - aligned_source_base; + } + + if (!(((uint64_t) target_buffer | source_address) & btl_alignment_mask) && + (size & btl_alignment_mask)) { + /* remote region bases are aligned but the bounds are not. perform a + * small buffered get of the end of the remote region */ + aligned_len = size & ~btl_alignment_mask; + subsize = size - aligned_len; + size = aligned_len; + ret = ompi_osc_rdma_get_partial (sync, peer, source_address + aligned_len, source_handle, + (void *) ((intptr_t) target_buffer + aligned_len), subsize, request); + if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) { + return ret; + } + } + /* (remaining) user request is now correctly aligned */ + } + + if ((((uint64_t) target_buffer | size | source_address) & btl_alignment_mask)) { + /* local and remote alignments differ */ + request->buffer = ptr = malloc (aligned_len); + } else { + ptr = target_buffer; + } + + if (NULL != ptr) { + (void) ompi_osc_rdma_register (module, peer->data_endpoint, ptr, aligned_len, MCA_BTL_REG_FLAG_LOCAL_WRITE, + &local_handle); + } + + if (OPAL_UNLIKELY(NULL == local_handle)) { + free (request->buffer); + request->buffer = NULL; + return ret; + } + } else { + OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_TRACE, "using internal buffer %p in fragment %p for get of size %lu bytes, source address 0x%lx", + ptr, (void *) frag, (unsigned long) aligned_len, (unsigned long) aligned_source_base); + local_handle = frag->handle; + } + } + + request->offset = source_address - aligned_source_base; + request->len = size; + request->origin_addr = target_buffer; + request->sync = sync; + + ompi_osc_rdma_sync_rdma_inc (sync); + + do { + ret = module->selected_btl->btl_get (module->selected_btl, peer->data_endpoint, ptr, aligned_source_base, local_handle, + source_handle, aligned_len, 0, MCA_BTL_NO_ORDER, ompi_osc_rdma_get_complete, + request, frag); + if (OPAL_UNLIKELY(OMPI_SUCCESS == ret)) { + return OMPI_SUCCESS; + } + + ++module->get_retry_count; + + if (OPAL_ERR_OUT_OF_RESOURCE != ret && OPAL_ERR_TEMP_OUT_OF_RESOURCE != ret) { + break; + } + + /* spin a bit on progress */ + for (int i = 0 ; i < 10 ; ++i) { + ompi_osc_rdma_progress (module); + } + } while (1); + + OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_ERROR, "btl get failed with opal error code %d", ret); + + ompi_osc_rdma_cleanup_rdma (sync, frag, local_handle, request); + + return ret; +} + +static inline int ompi_osc_rdma_put_w_req (ompi_osc_rdma_sync_t *sync, const void *origin_addr, int origin_count, + ompi_datatype_t *origin_datatype, ompi_osc_rdma_peer_t *peer, + OPAL_PTRDIFF_TYPE target_disp, int target_count, + ompi_datatype_t *target_datatype, ompi_osc_rdma_request_t *request) +{ + ompi_osc_rdma_module_t *module = sync->module; + mca_btl_base_registration_handle_t *target_handle; + uint64_t target_address; + int ret; + + /* short-circuit case */ + if (0 == origin_count || 0 == target_count) { + if (request) { + ompi_osc_rdma_request_complete (request, MPI_SUCCESS); + } + + return OMPI_SUCCESS; + } + + ret = osc_rdma_get_remote_segment (module, peer, target_disp, target_datatype->super.size * target_count, + &target_address, &target_handle); + if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) { + return ret; + } + + /* optimize communication with peers that we can do direct load and store operations on */ + if (ompi_osc_rdma_peer_local_base (peer)) { + return ompi_osc_rdma_copy_local (origin_addr, origin_count, origin_datatype, (void *) (intptr_t) target_address, + target_count, target_datatype, request); + } + + return ompi_osc_rdma_master (sync, (void *) origin_addr, origin_count, origin_datatype, peer, target_address, target_handle, + target_count, target_datatype, request, module->selected_btl->btl_put_limit, + ompi_osc_rdma_put_contig, false); +} + +static inline int ompi_osc_rdma_get_w_req (ompi_osc_rdma_sync_t *sync, void *origin_addr, int origin_count, ompi_datatype_t *origin_datatype, + ompi_osc_rdma_peer_t *peer, OPAL_PTRDIFF_TYPE source_disp, int source_count, + ompi_datatype_t *source_datatype, ompi_osc_rdma_request_t *request) +{ + ompi_osc_rdma_module_t *module = sync->module; + mca_btl_base_registration_handle_t *source_handle; + uint64_t source_address; + int ret; + + /* short-circuit case */ + if (0 == origin_count || 0 == source_count) { + if (request) { + ompi_osc_rdma_request_complete (request, MPI_SUCCESS); + } + + return OMPI_SUCCESS; + } + + ret = osc_rdma_get_remote_segment (module, peer, source_disp, source_datatype->super.size * source_count, + &source_address, &source_handle); + if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) { + return ret; + } + + /* optimize self/local communication */ + if (ompi_osc_rdma_peer_local_base (peer)) { + return ompi_osc_rdma_copy_local ((void *) (intptr_t) source_address, source_count, source_datatype, + origin_addr, origin_count, origin_datatype, request); + } + + return ompi_osc_rdma_master (sync, origin_addr, origin_count, origin_datatype, peer, source_address, + source_handle, source_count, source_datatype, request, + module->selected_btl->btl_get_limit, ompi_osc_rdma_get_contig, true); +} +int ompi_osc_rdma_put (const void *origin_addr, int origin_count, ompi_datatype_t *origin_datatype, + int target_rank, OPAL_PTRDIFF_TYPE target_disp, int target_count, + ompi_datatype_t *target_datatype, ompi_win_t *win) +{ + ompi_osc_rdma_module_t *module = GET_MODULE(win); + ompi_osc_rdma_peer_t *peer; + ompi_osc_rdma_sync_t *sync; + + OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_TRACE, "put: 0x%lx, %d, %s, %d, %d, %d, %s, %s", (unsigned long) origin_addr, + origin_count, origin_datatype->name, target_rank, (int) target_disp, target_count, + target_datatype->name, win->w_name); + + sync = ompi_osc_rdma_module_sync_lookup (module, target_rank, &peer); + if (OPAL_UNLIKELY(NULL == sync)) { + return OMPI_ERR_RMA_SYNC; + } + + return ompi_osc_rdma_put_w_req (sync, origin_addr, origin_count, origin_datatype, peer, target_disp, + target_count, target_datatype, NULL); +} + +int ompi_osc_rdma_rput (const void *origin_addr, int origin_count, ompi_datatype_t *origin_datatype, + int target_rank, OPAL_PTRDIFF_TYPE target_disp, int target_count, + ompi_datatype_t *target_datatype, ompi_win_t *win, + ompi_request_t **request) +{ + ompi_osc_rdma_module_t *module = GET_MODULE(win); + ompi_osc_rdma_peer_t *peer; + ompi_osc_rdma_request_t *rdma_request; + ompi_osc_rdma_sync_t *sync; + int ret; + + OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_TRACE, "rput: 0x%lx, %d, %s, %d, %d, %d, %s, %s", (unsigned long) origin_addr, origin_count, + origin_datatype->name, target_rank, (int) target_disp, target_count, target_datatype->name, win->w_name); + + sync = ompi_osc_rdma_module_sync_lookup (module, target_rank, &peer); + if (OPAL_UNLIKELY(NULL == sync)) { + return OMPI_ERR_RMA_SYNC; + } + + OMPI_OSC_RDMA_REQUEST_ALLOC(module, peer, rdma_request); + + rdma_request->type = OMPI_OSC_RDMA_TYPE_PUT; + + ret = ompi_osc_rdma_put_w_req (sync, origin_addr, origin_count, origin_datatype, peer, target_disp, + target_count, target_datatype, rdma_request); + if (OMPI_SUCCESS != ret) { + OMPI_OSC_RDMA_REQUEST_RETURN(rdma_request); + return ret; + } + + *request = (ompi_request_t *) rdma_request; + + return OMPI_SUCCESS; +} + +int ompi_osc_rdma_get (void *origin_addr, int origin_count, ompi_datatype_t *origin_datatype, + int source_rank, OPAL_PTRDIFF_TYPE source_disp, int source_count, + ompi_datatype_t *source_datatype, ompi_win_t *win) +{ + ompi_osc_rdma_module_t *module = GET_MODULE(win); + ompi_osc_rdma_peer_t *peer; + ompi_osc_rdma_sync_t *sync; + + OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_TRACE, "get: 0x%lx, %d, %s, %d, %d, %d, %s, %s", (unsigned long) origin_addr, + origin_count, origin_datatype->name, source_rank, (int) source_disp, source_count, + source_datatype->name, win->w_name); + + sync = ompi_osc_rdma_module_sync_lookup (module, source_rank, &peer); + if (OPAL_UNLIKELY(NULL == sync)) { + return OMPI_ERR_RMA_SYNC; + } + + return ompi_osc_rdma_get_w_req (sync, origin_addr, origin_count, origin_datatype, peer, + source_disp, source_count, source_datatype, NULL); +} + +int ompi_osc_rdma_rget (void *origin_addr, int origin_count, ompi_datatype_t *origin_datatype, + int source_rank, OPAL_PTRDIFF_TYPE source_disp, int source_count, + ompi_datatype_t *source_datatype, ompi_win_t *win, + ompi_request_t **request) +{ + ompi_osc_rdma_module_t *module = GET_MODULE(win); + ompi_osc_rdma_peer_t *peer; + ompi_osc_rdma_request_t *rdma_request; + ompi_osc_rdma_sync_t *sync; + int ret; + + OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_TRACE, "rget: 0x%lx, %d, %s, %d, %d, %d, %s, %s", (unsigned long) origin_addr, + origin_count, origin_datatype->name, source_rank, (int) source_disp, source_count, + source_datatype->name, win->w_name); + + sync = ompi_osc_rdma_module_sync_lookup (module, source_rank, &peer); + if (OPAL_UNLIKELY(NULL == sync)) { + return OMPI_ERR_RMA_SYNC; + } + + OMPI_OSC_RDMA_REQUEST_ALLOC(module, peer, rdma_request); + + rdma_request->type = OMPI_OSC_RDMA_TYPE_GET; + ret = ompi_osc_rdma_get_w_req (sync, origin_addr, origin_count, origin_datatype, peer, + source_disp, source_count, source_datatype, rdma_request); + if (OMPI_SUCCESS != ret) { + OMPI_OSC_RDMA_REQUEST_RETURN(rdma_request); + return ret; + } + + *request = (ompi_request_t *) rdma_request; + + return OMPI_SUCCESS; +} diff --git a/ompi/mca/osc/rdma/osc_rdma_comm.h b/ompi/mca/osc/rdma/osc_rdma_comm.h new file mode 100644 index 00000000000..e9b048c56ee --- /dev/null +++ b/ompi/mca/osc/rdma/osc_rdma_comm.h @@ -0,0 +1,135 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ +/* + * Copyright (c) 2014-2015 Los Alamos National Security, LLC. All rights + * reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#if !defined(OMPI_OSC_RDMA_COMM_H) +#define OMPI_OSC_RDMA_COMM_H + +#include "osc_rdma_dynamic.h" +#include "osc_rdma_request.h" +#include "osc_rdma_sync.h" +#include "osc_rdma_lock.h" + +#define OMPI_OSC_RDMA_DECODE_MAX 64 + +#define min(a,b) ((a) < (b) ? (a) : (b)) +#define ALIGNMENT_MASK(x) ((x) ? (x) - 1 : 0) + +/* helper functions */ +static inline void ompi_osc_rdma_cleanup_rdma (ompi_osc_rdma_sync_t *sync, ompi_osc_rdma_frag_t *frag, + mca_btl_base_registration_handle_t *handle, ompi_osc_rdma_request_t *request) +{ + if (frag) { + ompi_osc_rdma_frag_complete (frag); + } else { + ompi_osc_rdma_deregister (sync->module, handle); + } + + if (request) { + (void) OPAL_THREAD_ADD32 (&request->outstanding_requests, -1); + } + + ompi_osc_rdma_sync_rdma_dec (sync); +} + +/** + * @brief find a remote segment associate with the memory region + * + * @param[in] module osc rdma module + * @param[in] peer peer object for remote peer + * @param[in] target_disp displacement in remote region + * @param[in] length length of remote region + * @param[out] remote_address remote address + * @param[out] remote_handle btl handle for remote region (valid over entire region) + * + * @returns OMPI_SUCCESS on success + * @returns OMPI_ERR_RMA_RANGE if the address range is not valid at the remote window + * @returns other OMPI error on error + */ +static inline int osc_rdma_get_remote_segment (ompi_osc_rdma_module_t *module, ompi_osc_rdma_peer_t *peer, OPAL_PTRDIFF_TYPE target_disp, + size_t length, uint64_t *remote_address, mca_btl_base_registration_handle_t **remote_handle) +{ + ompi_osc_rdma_region_t *region; + int ret; + + OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_TRACE, "getting remote address for peer %d target_disp %lu. peer flags: 0x%x", + peer->rank, (unsigned long) target_disp, peer->flags); + + if (MPI_WIN_FLAVOR_DYNAMIC == module->flavor) { + ret = ompi_osc_rdma_find_dynamic_region (module, peer, (uint64_t) target_disp, length, ®ion); + if (OMPI_SUCCESS != ret) { + OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_INFO, "could not retrieve region for %" PRIx64 " from window rank %d", + (uint64_t) target_disp, peer->rank); + return ret; + } + + *remote_address = (uint64_t) target_disp; + *remote_handle = (mca_btl_base_registration_handle_t *) region->btl_handle_data; + } else { + ompi_osc_rdma_peer_extended_t *ex_peer = (ompi_osc_rdma_peer_extended_t *) peer; + int disp_unit = (module->same_disp_unit) ? module->disp_unit : ex_peer->disp_unit; + size_t size = (module->same_size) ? module->size : (size_t) ex_peer->size; + + *remote_address = ex_peer->super.base + disp_unit * target_disp; + if (OPAL_UNLIKELY(*remote_address + length > (ex_peer->super.base + size))) { + OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_INFO, "remote address range 0x%" PRIx64 " - 0x%" PRIx64 + " is out of range. Valid address range is 0x%" PRIx64 " - 0x%" PRIx64 " (%" PRIu64 " bytes)", + *remote_address, *remote_address + length, ex_peer->super.base, ex_peer->super.base + size, + (uint64_t) size); + return OMPI_ERR_RMA_RANGE; + } + + *remote_handle = ex_peer->super.base_handle; + } + + OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_TRACE, "remote address: 0x%" PRIx64 ", handle: %p", *remote_address, (void *) *remote_handle); + + return OMPI_SUCCESS; +} + +/* prototypes for implementations of MPI RMA window functions. these will be called from the + * mpi interface (ompi/mpi/c) */ +int ompi_osc_rdma_put (const void *origin_addr, int origin_count, ompi_datatype_t *origin_dt, + int target, OPAL_PTRDIFF_TYPE target_disp, int target_count, + ompi_datatype_t *target_dt, ompi_win_t *win); + +int ompi_osc_rdma_get (void *origin_addr, int origin_count, ompi_datatype_t *origin_dt, + int target, OPAL_PTRDIFF_TYPE target_disp, int target_count, + ompi_datatype_t *target_dt, ompi_win_t *win); + +int ompi_osc_rdma_rput (const void *origin_addr, int origin_count, ompi_datatype_t *origin_dt, + int target, OPAL_PTRDIFF_TYPE target_disp, int target_count, + ompi_datatype_t *target_dt, ompi_win_t *win, + ompi_request_t **request); + +int ompi_osc_rdma_rget (void *origin_addr, int origin_count, ompi_datatype_t *origin_dt, + int target, OPAL_PTRDIFF_TYPE target_disp, int target_count, + ompi_datatype_t *target_dt, ompi_win_t *win, + ompi_request_t **request); + +/** + * @brief read data from a remote memory region (blocking) + * + * @param[in] module osc rdma module + * @param[in] endpoint btl endpoint + * @param[in] source_address remote address to read from + * @param[in] source_handle btl registration handle for remote region (must be valid for the entire region) + * @param[in] data local buffer to store to + * @param[in] len number of bytes to read + * + * This is an internal function for reading data from a remote peer. It is used to read peer and state + * data that is stored on the remote peer. The peer object does not have to be fully initialized to + * work. Only the btl endpoint is needed. + */ +int ompi_osc_get_data_blocking (ompi_osc_rdma_module_t *module, struct mca_btl_base_endpoint_t *endpoint, + uint64_t source_address, mca_btl_base_registration_handle_t *source_handle, + void *data, size_t len); + +#endif /* OMPI_OSC_RDMA_COMM_H */ diff --git a/ompi/mca/osc/rdma/osc_rdma_component.c b/ompi/mca/osc/rdma/osc_rdma_component.c new file mode 100644 index 00000000000..4a99f1a49e5 --- /dev/null +++ b/ompi/mca/osc/rdma/osc_rdma_component.c @@ -0,0 +1,1251 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ +/* + * Copyright (c) 2004-2007 The Trustees of Indiana University. + * All rights reserved. + * Copyright (c) 2004-2008 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2007-2016 Los Alamos National Security, LLC. All rights + * reserved. + * Copyright (c) 2006-2008 University of Houston. All rights reserved. + * Copyright (c) 2010 Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2012-2015 Sandia National Laboratories. All rights reserved. + * Copyright (c) 2015 NVIDIA Corporation. All rights reserved. + * Copyright (c) 2015 Intel, Inc. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include "ompi_config.h" + +#include + +#include "osc_rdma.h" +#include "osc_rdma_frag.h" +#include "osc_rdma_request.h" +#include "osc_rdma_active_target.h" +#include "osc_rdma_passive_target.h" +#include "osc_rdma_comm.h" +#include "osc_rdma_dynamic.h" +#include "osc_rdma_accumulate.h" + +#include "opal/threads/mutex.h" +#include "opal/util/arch.h" +#include "opal/util/argv.h" +#include "opal/align.h" +#if OPAL_CUDA_SUPPORT +#include "opal/datatype/opal_datatype_cuda.h" +#endif /* OPAL_CUDA_SUPPORT */ + +#include "ompi/info/info.h" +#include "ompi/communicator/communicator.h" +#include "ompi/mca/osc/osc.h" +#include "ompi/mca/osc/base/base.h" +#include "ompi/mca/osc/base/osc_base_obj_convert.h" +#include "ompi/mca/pml/pml.h" +#include "opal/mca/btl/base/base.h" +#include "opal/mca/base/mca_base_pvar.h" +#include "ompi/mca/bml/base/base.h" + +static int ompi_osc_rdma_component_register (void); +static int ompi_osc_rdma_component_init (bool enable_progress_threads, bool enable_mpi_threads); +static int ompi_osc_rdma_component_finalize (void); +static int ompi_osc_rdma_component_query (struct ompi_win_t *win, void **base, size_t size, int disp_unit, + struct ompi_communicator_t *comm, struct ompi_info_t *info, + int flavor); +static int ompi_osc_rdma_component_select (struct ompi_win_t *win, void **base, size_t size, int disp_unit, + struct ompi_communicator_t *comm, struct ompi_info_t *info, + int flavor, int *model); + +static int ompi_osc_rdma_set_info (struct ompi_win_t *win, struct ompi_info_t *info); +static int ompi_osc_rdma_get_info (struct ompi_win_t *win, struct ompi_info_t **info_used); + +static int ompi_osc_rdma_query_btls (ompi_communicator_t *comm, struct mca_btl_base_module_t **btl); + +static char *ompi_osc_rdma_btl_names; + +ompi_osc_rdma_component_t mca_osc_rdma_component = { + .super = { + .osc_version = { + OMPI_OSC_BASE_VERSION_3_0_0, + .mca_component_name = "rdma", + MCA_BASE_MAKE_VERSION(component, OMPI_MAJOR_VERSION, OMPI_MINOR_VERSION, + OMPI_RELEASE_VERSION), + .mca_register_component_params = ompi_osc_rdma_component_register + }, + .osc_data = { + /* The component is not checkpoint ready */ + MCA_BASE_METADATA_PARAM_NONE + }, + .osc_init = ompi_osc_rdma_component_init, + .osc_query = ompi_osc_rdma_component_query, + .osc_select = ompi_osc_rdma_component_select, + .osc_finalize = ompi_osc_rdma_component_finalize + } +}; + +ompi_osc_base_module_t ompi_osc_rdma_module_rdma_template = { + .osc_win_attach = ompi_osc_rdma_attach, + .osc_win_detach = ompi_osc_rdma_detach, + .osc_free = ompi_osc_rdma_free, + + .osc_put = ompi_osc_rdma_put, + .osc_get = ompi_osc_rdma_get, + .osc_accumulate = ompi_osc_rdma_accumulate, + .osc_compare_and_swap = ompi_osc_rdma_compare_and_swap, + .osc_fetch_and_op = ompi_osc_rdma_fetch_and_op, + .osc_get_accumulate = ompi_osc_rdma_get_accumulate, + + .osc_rput = ompi_osc_rdma_rput, + .osc_rget = ompi_osc_rdma_rget, + .osc_raccumulate = ompi_osc_rdma_raccumulate, + .osc_rget_accumulate = ompi_osc_rdma_rget_accumulate, + + .osc_fence = ompi_osc_rdma_fence_atomic, + + .osc_start = ompi_osc_rdma_start_atomic, + .osc_complete = ompi_osc_rdma_complete_atomic, + .osc_post = ompi_osc_rdma_post_atomic, + .osc_wait = ompi_osc_rdma_wait_atomic, + .osc_test = ompi_osc_rdma_test_atomic, + + .osc_lock = ompi_osc_rdma_lock_atomic, + .osc_unlock = ompi_osc_rdma_unlock_atomic, + .osc_lock_all = ompi_osc_rdma_lock_all_atomic, + .osc_unlock_all = ompi_osc_rdma_unlock_all_atomic, + + .osc_sync = ompi_osc_rdma_sync, + .osc_flush = ompi_osc_rdma_flush, + .osc_flush_all = ompi_osc_rdma_flush_all, + .osc_flush_local = ompi_osc_rdma_flush_local, + .osc_flush_local_all = ompi_osc_rdma_flush_local_all, + + .osc_set_info = ompi_osc_rdma_set_info, + .osc_get_info = ompi_osc_rdma_get_info +}; + +/* look up parameters for configuring this window. The code first + looks in the info structure passed by the user, then it checks + for a matching MCA variable. */ +static bool check_config_value_bool (char *key, ompi_info_t *info) +{ + int ret, flag, param; + bool result = false; + const bool *flag_value = &result; + + ret = ompi_info_get_bool (info, key, &result, &flag); + if (OMPI_SUCCESS == ret && flag) { + return result; + } + + param = mca_base_var_find("ompi", "osc", "rdma", key); + if (0 <= param) { + (void) mca_base_var_get_value(param, &flag_value, NULL, NULL); + } + + return flag_value[0]; +} + +static int ompi_osc_rdma_pvar_read (const struct mca_base_pvar_t *pvar, void *value, void *obj) +{ + ompi_win_t *win = (ompi_win_t *) obj; + ompi_osc_rdma_module_t *module = GET_MODULE(win); + int offset = (int) (intptr_t) pvar->ctx; + + memcpy (value, (char *) module + offset, sizeof (unsigned long)); + + return OMPI_SUCCESS; +} + +static int ompi_osc_rdma_component_register (void) +{ + mca_osc_rdma_component.no_locks = false; + (void) mca_base_component_var_register(&mca_osc_rdma_component.super.osc_version, + "no_locks", "Enable optimizations available only if MPI_LOCK is " + "not used. Info key of same name overrides this value (default: false)", + MCA_BASE_VAR_TYPE_BOOL, NULL, 0, 0, OPAL_INFO_LVL_5, + MCA_BASE_VAR_SCOPE_GROUP, &mca_osc_rdma_component.no_locks); + + mca_osc_rdma_component.acc_single_intrinsic = false; + (void) mca_base_component_var_register(&mca_osc_rdma_component.super.osc_version, "acc_single_intrinsic", + "Enable optimizations for MPI_Fetch_and_op, MPI_Accumulate, etc for codes " + "that will not use anything more than a single predefined datatype (default: false)", + MCA_BASE_VAR_TYPE_BOOL, NULL, 0, 0, OPAL_INFO_LVL_5, + MCA_BASE_VAR_SCOPE_GROUP, &mca_osc_rdma_component.acc_single_intrinsic); + + mca_osc_rdma_component.acc_use_amo = true; + (void) mca_base_component_var_register(&mca_osc_rdma_component.super.osc_version, "acc_use_amo", + "Enable the use of network atomic memory operations when using single " + "intrinsic optimizations. If not set network compare-and-swap will be " + "used instread (default: true)", MCA_BASE_VAR_TYPE_BOOL, NULL, 0, 0, OPAL_INFO_LVL_5, + MCA_BASE_VAR_SCOPE_GROUP, &mca_osc_rdma_component.acc_use_amo); + + mca_osc_rdma_component.buffer_size = 32768; + (void) mca_base_component_var_register (&mca_osc_rdma_component.super.osc_version, "buffer_size", + "Size of temporary buffers (default: 32k)", MCA_BASE_VAR_TYPE_UNSIGNED_INT, + NULL, 0, 0, OPAL_INFO_LVL_3, MCA_BASE_VAR_SCOPE_LOCAL, + &mca_osc_rdma_component.buffer_size); + + mca_osc_rdma_component.max_attach = 32; + (void) mca_base_component_var_register (&mca_osc_rdma_component.super.osc_version, "max_attach", + "Maximum number of buffers that can be attached to a dynamic window. " + "Keep in mind that each attached buffer will use a potentially limited " + "resource (default: 32)", MCA_BASE_VAR_TYPE_UNSIGNED_INT, NULL, 0, 0, + OPAL_INFO_LVL_3, MCA_BASE_VAR_SCOPE_GROUP, &mca_osc_rdma_component.max_attach); + + mca_osc_rdma_component.aggregation_limit = 1024; + (void) mca_base_component_var_register (&mca_osc_rdma_component.super.osc_version, "aggregation_limit", + "Maximum size of an aggregated put/get. Messages are aggregated for consecutive" + "put and get operations. In some cases this may lead to higher latency but " + "should also lead to higher bandwidth utilization. Set to 0 to disable (default:" + " 1k)", MCA_BASE_VAR_TYPE_UNSIGNED_INT, NULL, 0, 0, OPAL_INFO_LVL_3, + MCA_BASE_VAR_SCOPE_GROUP, &mca_osc_rdma_component.aggregation_limit); + + mca_osc_rdma_component.priority = 90; + (void) mca_base_component_var_register (&mca_osc_rdma_component.super.osc_version, "priority", + "Priority of the osc/rdma component (default: 90)", + MCA_BASE_VAR_TYPE_UNSIGNED_INT, NULL, 0, 0, OPAL_INFO_LVL_3, + MCA_BASE_VAR_SCOPE_GROUP, &mca_osc_rdma_component.priority); + + ompi_osc_rdma_btl_names = "openib,ugni"; + (void) mca_base_component_var_register (&mca_osc_rdma_component.super.osc_version, "btls", + "Comma-delimited list of BTL component names to allow without verifying " + "connectivity. Do not add a BTL to to this list unless it can reach all " + "processes in any communicator used with an MPI window (default: openib,ugni)", + MCA_BASE_VAR_TYPE_STRING, NULL, 0, 0, OPAL_INFO_LVL_3, + MCA_BASE_VAR_SCOPE_GROUP, &ompi_osc_rdma_btl_names); + + + /* register performance variables */ + + (void) mca_base_component_pvar_register (&mca_osc_rdma_component.super.osc_version, "put_retry_count", + "Number of times put transaction were retried due to resource limitations", + OPAL_INFO_LVL_4, MCA_BASE_PVAR_CLASS_COUNTER, MCA_BASE_VAR_TYPE_UNSIGNED_LONG, + NULL, MCA_BASE_VAR_BIND_MPI_WIN, MCA_BASE_PVAR_FLAG_CONTINUOUS, + ompi_osc_rdma_pvar_read, NULL, NULL, + (void *) (intptr_t) offsetof (ompi_osc_rdma_module_t, put_retry_count)); + + (void) mca_base_component_pvar_register (&mca_osc_rdma_component.super.osc_version, "get_retry_count", + "Number of times get transaction were retried due to resource limitations", + OPAL_INFO_LVL_4, MCA_BASE_PVAR_CLASS_COUNTER, MCA_BASE_VAR_TYPE_UNSIGNED_LONG, + NULL, MCA_BASE_VAR_BIND_MPI_WIN, MCA_BASE_PVAR_FLAG_CONTINUOUS, + ompi_osc_rdma_pvar_read, NULL, NULL, + (void *) (intptr_t) offsetof (ompi_osc_rdma_module_t, get_retry_count)); + + return OMPI_SUCCESS; +} + +static int ompi_osc_rdma_component_init (bool enable_progress_threads, + bool enable_mpi_threads) +{ + int ret; + + OBJ_CONSTRUCT(&mca_osc_rdma_component.lock, opal_mutex_t); + OBJ_CONSTRUCT(&mca_osc_rdma_component.request_gc, opal_list_t); + OBJ_CONSTRUCT(&mca_osc_rdma_component.buffer_gc, opal_list_t); + OBJ_CONSTRUCT(&mca_osc_rdma_component.modules, opal_hash_table_t); + + opal_hash_table_init(&mca_osc_rdma_component.modules, 2); + + OBJ_CONSTRUCT(&mca_osc_rdma_component.frags, opal_free_list_t); + ret = opal_free_list_init (&mca_osc_rdma_component.frags, + sizeof(ompi_osc_rdma_frag_t), 8, + OBJ_CLASS(ompi_osc_rdma_frag_t), + mca_osc_rdma_component.buffer_size, 8, + 4, -1, 4, NULL, 0, NULL, NULL, NULL); + if (OPAL_SUCCESS != ret) { + opal_output_verbose(1, ompi_osc_base_framework.framework_output, + "%s:%d: opal_free_list_init_new failed: %d", + __FILE__, __LINE__, ret); + return ret; + } + + OBJ_CONSTRUCT(&mca_osc_rdma_component.requests, opal_free_list_t); + ret = opal_free_list_init (&mca_osc_rdma_component.requests, + sizeof(ompi_osc_rdma_request_t), 8, + OBJ_CLASS(ompi_osc_rdma_request_t), 0, 0, + 0, -1, 32, NULL, 0, NULL, NULL, NULL); + if (OPAL_SUCCESS != ret) { + opal_output_verbose(1, ompi_osc_base_framework.framework_output, + "%s:%d: opal_free_list_init failed: %d\n", + __FILE__, __LINE__, ret); + } + + OBJ_CONSTRUCT(&mca_osc_rdma_component.aggregate, opal_free_list_t); + + if (!enable_mpi_threads && mca_osc_rdma_component.aggregation_limit) { + ret = opal_free_list_init (&mca_osc_rdma_component.aggregate, + sizeof(ompi_osc_rdma_aggregation_t), 8, + OBJ_CLASS(ompi_osc_rdma_aggregation_t), 0, 0, + 32, 128, 32, NULL, 0, NULL, NULL, NULL); + + if (OPAL_SUCCESS != ret) { + opal_output_verbose(1, ompi_osc_base_framework.framework_output, + "%s:%d: opal_free_list_init failed: %d\n", + __FILE__, __LINE__, ret); + } + } else { + /* only enable put aggregation when not using threads */ + mca_osc_rdma_component.aggregation_limit = 0; + } + + return ret; +} + + +int ompi_osc_rdma_component_finalize (void) +{ + size_t num_modules; + + if (0 != (num_modules = opal_hash_table_get_size(&mca_osc_rdma_component.modules))) { + opal_output(ompi_osc_base_framework.framework_output, "WARNING: There were %d Windows created but " + "not freed.", (int) num_modules); + } + + OBJ_DESTRUCT(&mca_osc_rdma_component.frags); + OBJ_DESTRUCT(&mca_osc_rdma_component.modules); + OBJ_DESTRUCT(&mca_osc_rdma_component.lock); + OBJ_DESTRUCT(&mca_osc_rdma_component.requests); + OBJ_DESTRUCT(&mca_osc_rdma_component.request_gc); + OBJ_DESTRUCT(&mca_osc_rdma_component.buffer_gc); + OBJ_DESTRUCT(&mca_osc_rdma_component.aggregate); + + return OMPI_SUCCESS; +} + + +static int ompi_osc_rdma_component_query (struct ompi_win_t *win, void **base, size_t size, int disp_unit, + struct ompi_communicator_t *comm, struct ompi_info_t *info, + int flavor) +{ + + if (MPI_WIN_FLAVOR_SHARED == flavor) { + return -1; + } + +#if OPAL_CUDA_SUPPORT + /* GPU buffers are not supported by the rdma component */ + if (MPI_WIN_FLAVOR_CREATE == flavor) { + if (opal_cuda_check_bufs(*base, NULL)) { + return -1; + } + } +#endif /* OPAL_CUDA_SUPPORT */ + + if (OMPI_SUCCESS != ompi_osc_rdma_query_btls (comm, NULL)) { + return -1; + } + + + return mca_osc_rdma_component.priority; +} + +#define RANK_ARRAY_COUNT(module) ((ompi_comm_size ((module)->comm) + (module)->node_count - 1) / (module)->node_count) + +static int ompi_osc_rdma_initialize_region (ompi_osc_rdma_module_t *module, void **base, size_t size) { + ompi_osc_rdma_region_t *region = (ompi_osc_rdma_region_t *) module->state->regions; + int ret; + + /* store displacement unit */ + module->state->disp_unit = module->disp_unit; + + /* store region info */ + module->state->region_count = 1; + region->base = (osc_rdma_base_t) (intptr_t) *base; + region->len = size; + + if (module->selected_btl->btl_register_mem && size) { + if (MPI_WIN_FLAVOR_ALLOCATE != module->flavor || NULL == module->state_handle) { + ret = ompi_osc_rdma_register (module, MCA_BTL_ENDPOINT_ANY, *base, size, MCA_BTL_REG_FLAG_ACCESS_ANY, + &module->base_handle); + if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) { + return OMPI_ERR_OUT_OF_RESOURCE; + } + + memcpy (region->btl_handle_data, module->base_handle, module->selected_btl->btl_registration_handle_size); + } else { + memcpy (region->btl_handle_data, module->state_handle, module->selected_btl->btl_registration_handle_size); + } + } + + return OMPI_SUCCESS; +} + +static int allocate_state_single (ompi_osc_rdma_module_t *module, void **base, size_t size) +{ + size_t total_size, local_rank_array_size, leader_peer_data_size; + ompi_osc_rdma_peer_t *my_peer; + int ret, my_rank; + + OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_TRACE, "allocating private internal state"); + + my_rank = ompi_comm_rank (module->comm); + + local_rank_array_size = sizeof (ompi_osc_rdma_rank_data_t) * RANK_ARRAY_COUNT(module); + leader_peer_data_size = module->region_size * module->node_count; + + /* allocate anything that will be accessed remotely in the same region. this cuts down on the number of + * registration handles needed to access this data. */ + total_size = module->state_size + local_rank_array_size + leader_peer_data_size; + + if (MPI_WIN_FLAVOR_ALLOCATE == module->flavor) { + total_size += size; + } + + /* the local data is ordered as follows: rank array (leader, offset mapping), state, leader peer data, and base + * (if using MPI_Win_allocate). In this case the leader peer data array does not need to be stored in the same + * segment but placing it there simplifies the peer data fetch and cleanup code. */ + + module->rank_array = calloc (total_size, 1); + if (OPAL_UNLIKELY(NULL == module->rank_array)) { + return OMPI_ERR_OUT_OF_RESOURCE; + } + + module->state_offset = local_rank_array_size; + + module->state = (ompi_osc_rdma_state_t *) ((intptr_t) module->rank_array + module->state_offset); + module->node_comm_info = (unsigned char *) ((intptr_t) module->state + module->state_size); + + if (MPI_WIN_FLAVOR_ALLOCATE == module->flavor) { + *base = (void *) ((intptr_t) module->node_comm_info + leader_peer_data_size); + } + + /* just go ahead and register the whole segment */ + ret = ompi_osc_rdma_register (module, MCA_BTL_ENDPOINT_ANY, module->rank_array, total_size, + MCA_BTL_REG_FLAG_ACCESS_ANY, &module->state_handle); + if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) { + return ret; + } + + if (MPI_WIN_FLAVOR_DYNAMIC != module->flavor) { + ret = ompi_osc_rdma_initialize_region (module, base, size); + if (OMPI_SUCCESS != ret) { + return ret; + } + } + + ret = ompi_osc_rdma_new_peer (module, my_rank, &my_peer); + if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) { + return ret; + } + + ret = ompi_osc_module_add_peer (module, my_peer); + if (OPAL_UNLIKELY(OPAL_SUCCESS != ret)) { + OBJ_RELEASE(my_peer); + return ret; + } + + module->free_after = module->rank_array; + my_peer->flags |= OMPI_OSC_RDMA_PEER_LOCAL_BASE; + my_peer->state = (uint64_t) (uintptr_t) module->state; + + if (module->selected_btl->btl_flags & MCA_BTL_ATOMIC_SUPPORTS_GLOB) { + /* all peers are local or it is safe to mix cpu and nic atomics */ + my_peer->flags |= OMPI_OSC_RDMA_PEER_LOCAL_STATE; + } else { + /* use my endpoint handle to modify the peer's state */ + my_peer->state_handle = module->state_handle; + my_peer->state_endpoint = ompi_osc_rdma_peer_btl_endpoint (module, my_rank); + } + + if (MPI_WIN_FLAVOR_DYNAMIC != module->flavor) { + ompi_osc_rdma_peer_extended_t *ex_peer = (ompi_osc_rdma_peer_extended_t *) my_peer; + + ex_peer->super.base = (intptr_t) *base; + + if (!module->same_size) { + ex_peer->size = size; + } + + if (MPI_WIN_FLAVOR_ALLOCATE == module->flavor) { + ex_peer->super.base_handle = module->state_handle; + } + } + + return OMPI_SUCCESS; +} + +struct _local_data { + int rank; + size_t size; +}; + +static int allocate_state_shared (ompi_osc_rdma_module_t *module, void **base, size_t size) +{ + ompi_communicator_t *shared_comm; + unsigned long offset, total_size; + unsigned long state_base, data_base; + int local_rank, local_size, ret; + size_t local_rank_array_size, leader_peer_data_size; + int my_rank = ompi_comm_rank (module->comm); + int global_size = ompi_comm_size (module->comm); + ompi_osc_rdma_region_t *state_region; + int my_base_offset = 0; + struct _local_data *temp; + char *data_file; + + shared_comm = module->shared_comm; + + local_rank = ompi_comm_rank (shared_comm); + local_size = ompi_comm_size (shared_comm); + + if (1 == local_size) { + /* no point using a shared segment if there are no other processes on this node */ + return allocate_state_single (module, base, size); + } + + OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_TRACE, "allocating shared internal state"); + + local_rank_array_size = sizeof (ompi_osc_rdma_rank_data_t) * RANK_ARRAY_COUNT (module); + leader_peer_data_size = module->region_size * module->node_count; + + /* calculate base offsets */ + module->state_offset = state_base = local_rank_array_size + module->region_size; + data_base = state_base + leader_peer_data_size + module->state_size * local_size; + + do { + temp = calloc (local_size, sizeof (temp[0])); + if (NULL == temp) { + ret = OMPI_ERR_OUT_OF_RESOURCE; + break; + } + + temp[local_rank].rank = my_rank; + temp[local_rank].size = size; + + /* gather the local sizes and ranks */ + ret = shared_comm->c_coll.coll_allgather (MPI_IN_PLACE, sizeof (*temp), MPI_BYTE, temp, sizeof (*temp), + MPI_BYTE, shared_comm, shared_comm->c_coll.coll_allgather_module); + if (OMPI_SUCCESS != ret) { + break; + } + + total_size = data_base; + + if (MPI_WIN_FLAVOR_ALLOCATE == module->flavor) { + for (int i = 0 ; i < local_size ; ++i) { + if (local_rank == i) { + my_base_offset = total_size; + } + total_size += temp[i].size; + } + } + + /* allocate the shared memory segment */ + ret = asprintf (&data_file, "%s"OPAL_PATH_SEP"window_%d.%s", + ompi_process_info.job_session_dir, ompi_comm_get_cid (module->comm), + ompi_process_info.nodename); + if (0 > ret) { + ret = OMPI_ERR_OUT_OF_RESOURCE; + break; + } + + if (0 == local_rank) { + /* allocate enough space for the state + data for all local ranks */ + ret = opal_shmem_segment_create (&module->seg_ds, data_file, total_size); + free (data_file); + if (OPAL_SUCCESS != ret) { + OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_ERROR, "failed to create shared memory segment"); + break; + } + } + + ret = module->comm->c_coll.coll_bcast (&module->seg_ds, sizeof (module->seg_ds), MPI_BYTE, 0, + shared_comm, shared_comm->c_coll.coll_bcast_module); + if (OMPI_SUCCESS != ret) { + break; + } + + module->segment_base = opal_shmem_segment_attach (&module->seg_ds); + if (NULL == module->segment_base) { + OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_ERROR, "failed to attach to the shared memory segment"); + ret = OPAL_ERROR; + break; + } + + if (size && MPI_WIN_FLAVOR_ALLOCATE == module->flavor) { + *base = (void *)((intptr_t) module->segment_base + my_base_offset); + } + + module->rank_array = (ompi_osc_rdma_rank_data_t *) module->segment_base; + /* put local state region data after the rank array */ + state_region = (ompi_osc_rdma_region_t *) ((uintptr_t) module->segment_base + local_rank_array_size); + module->state = (ompi_osc_rdma_state_t *) ((uintptr_t) module->segment_base + state_base + module->state_size * local_rank); + + /* all local ranks share the array containing the peer data of leader ranks */ + module->node_comm_info = (unsigned char *) ((uintptr_t) module->segment_base + state_base + module->state_size * local_size); + + /* initialize my state */ + memset (module->state, 0, module->state_size); + + if (0 == local_rank) { + /* just go ahead and register the whole segment */ + ret = ompi_osc_rdma_register (module, MCA_BTL_ENDPOINT_ANY, module->segment_base, total_size, MCA_BTL_REG_FLAG_ACCESS_ANY, + &module->state_handle); + if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) { + break; + } + + state_region->base = (intptr_t) module->segment_base; + if (module->state_handle) { + memcpy (state_region->btl_handle_data, module->state_handle, module->selected_btl->btl_registration_handle_size); + } + } + + if (MPI_WIN_FLAVOR_CREATE == module->flavor) { + ret = ompi_osc_rdma_initialize_region (module, base, size); + if (OMPI_SUCCESS != ret) { + break; + } + } + + /* barrier to make sure all ranks have attached */ + shared_comm->c_coll.coll_barrier(shared_comm, shared_comm->c_coll.coll_barrier_module); + + /* unlink the shared memory backing file */ + if (0 == local_rank) { + opal_shmem_unlink (&module->seg_ds); + } + + if (MPI_WIN_FLAVOR_ALLOCATE == module->flavor) { + ompi_osc_rdma_region_t *region = (ompi_osc_rdma_region_t *) module->state->regions; + module->state->disp_unit = module->disp_unit; + module->state->region_count = 1; + region->base = state_region->base + my_base_offset; + region->len = size; + if (module->selected_btl->btl_register_mem) { + memcpy (region->btl_handle_data, state_region->btl_handle_data, module->selected_btl->btl_registration_handle_size); + } + } + + /* barrier to make sure all ranks have attached */ + shared_comm->c_coll.coll_barrier(shared_comm, shared_comm->c_coll.coll_barrier_module); + + offset = data_base; + for (int i = 0 ; i < local_size ; ++i) { + ompi_osc_rdma_peer_extended_t *ex_peer; + ompi_osc_rdma_state_t *peer_state; + ompi_osc_rdma_peer_t *peer; + int peer_rank = temp[i].rank; + + ret = ompi_osc_rdma_new_peer (module, peer_rank, &peer); + if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) { + break; + } + + ex_peer = (ompi_osc_rdma_peer_extended_t *) peer; + + /* peer state local pointer */ + peer_state = (ompi_osc_rdma_state_t *) ((uintptr_t) module->segment_base + state_base + module->state_size * i); + + if (local_size == global_size || (module->selected_btl->btl_flags & MCA_BTL_ATOMIC_SUPPORTS_GLOB)) { + /* all peers are local or it is safe to mix cpu and nic atomics */ + peer->flags |= OMPI_OSC_RDMA_PEER_LOCAL_STATE; + peer->state = (osc_rdma_counter_t) peer_state; + } else { + /* use my endpoint handle to modify the peer's state */ + if (module->selected_btl->btl_register_mem) { + peer->state_handle = (mca_btl_base_registration_handle_t *) state_region->btl_handle_data; + } + peer->state = (osc_rdma_counter_t) ((uintptr_t) state_region->base + state_base + module->state_size * i); + peer->state_endpoint = ompi_osc_rdma_peer_btl_endpoint (module, temp[0].rank); + } + + /* finish setting up the local peer structure */ + if (MPI_WIN_FLAVOR_DYNAMIC != module->flavor) { + if (!module->same_disp_unit) { + ex_peer->disp_unit = peer_state->disp_unit; + } + + if (!module->same_size) { + ex_peer->size = temp[i].size; + } + + if (my_rank == peer_rank) { + peer->flags |= OMPI_OSC_RDMA_PEER_LOCAL_BASE; + } + + if (MPI_WIN_FLAVOR_ALLOCATE == module->flavor) { + if (temp[i].size) { + ex_peer->super.base = state_region->base + offset; + offset += temp[i].size; + } else { + ex_peer->super.base = 0; + } + } + + ompi_osc_rdma_region_t *peer_region = (ompi_osc_rdma_region_t *) peer_state->regions; + + ex_peer->super.base = peer_region->base; + if (module->selected_btl->btl_register_mem) { + ex_peer->super.base_handle = (mca_btl_base_registration_handle_t *) peer_region->btl_handle_data; + } + } + + ompi_osc_module_add_peer (module, peer); + } + } while (0); + + free (temp); + + return ret; +} + +static int ompi_osc_rdma_query_btls (ompi_communicator_t *comm, struct mca_btl_base_module_t **btl) +{ + struct mca_btl_base_module_t **possible_btls = NULL; + int comm_size = ompi_comm_size (comm); + int rc = OMPI_SUCCESS, max_btls = 0; + unsigned int selected_latency = INT_MAX; + struct mca_btl_base_module_t *selected_btl = NULL; + mca_btl_base_selected_module_t *item; + int *btl_counts = NULL; + char **btls_to_use; + void *tmp; + + btls_to_use = opal_argv_split (ompi_osc_rdma_btl_names, ','); + if (btls_to_use) { + /* rdma and atomics are only supported with BTLs at the moment */ + OPAL_LIST_FOREACH(item, &mca_btl_base_modules_initialized, mca_btl_base_selected_module_t) { + for (int i = 0 ; btls_to_use[i] ; ++i) { + if (0 != strcmp (btls_to_use[i], item->btl_module->btl_component->btl_version.mca_component_name)) { + continue; + } + + if ((item->btl_module->btl_flags & (MCA_BTL_FLAGS_RDMA)) == MCA_BTL_FLAGS_RDMA && + (item->btl_module->btl_flags & (MCA_BTL_FLAGS_ATOMIC_FOPS | MCA_BTL_FLAGS_ATOMIC_OPS))) { + if (!selected_btl || item->btl_module->btl_latency < selected_btl->btl_latency) { + selected_btl = item->btl_module; + } + } + } + } + + opal_argv_free (btls_to_use); + } + + if (btl) { + *btl = selected_btl; + } + + if (NULL != selected_btl) { + OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_INFO, "selected btl: %s", + selected_btl->btl_component->btl_version.mca_component_name); + return OMPI_SUCCESS; + } + + for (int i = 0 ; i < comm_size ; ++i) { + ompi_proc_t *proc = ompi_comm_peer_lookup (comm, i); + mca_bml_base_endpoint_t *endpoint; + int num_btls, prev_max; + + endpoint = mca_bml_base_get_endpoint (proc); + if (NULL == endpoint) { + /* can't continue if some peer is unreachable */ + rc = OMPI_ERR_UNREACH; + break; + } + + num_btls = mca_bml_base_btl_array_get_size (&endpoint->btl_rdma); + if (0 == num_btls) { + rc = OMPI_ERR_NOT_AVAILABLE; + /* at least one rank doesn't have an RDMA capable btl */ + break; + } + + prev_max = max_btls; + + max_btls = (max_btls > num_btls) ? max_btls : num_btls; + + tmp = realloc (possible_btls, sizeof (void *) * max_btls); + if (NULL == tmp) { + rc = OMPI_ERR_OUT_OF_RESOURCE; + break; + } + possible_btls = tmp; + + for (int j = prev_max ; j < max_btls ; ++j) { + possible_btls[j] = NULL; + } + + tmp = realloc (btl_counts, sizeof (int) * max_btls); + if (NULL == tmp) { + rc = OMPI_ERR_OUT_OF_RESOURCE; + break; + } + btl_counts = tmp; + + for (int i_btl = 0 ; i_btl < num_btls ; ++i_btl) { + /* for this implementation we need only compare-and-swap and fetch-and-add */ + if ((endpoint->btl_rdma.bml_btls[i_btl].btl->btl_flags & (MCA_BTL_FLAGS_RDMA | MCA_BTL_FLAGS_ATOMIC_FOPS)) == + (MCA_BTL_FLAGS_RDMA | MCA_BTL_FLAGS_ATOMIC_FOPS) && (endpoint->btl_rdma.bml_btls[i_btl].btl->btl_atomic_flags & + MCA_BTL_ATOMIC_SUPPORTS_ADD)) { + for (int j = 0 ; j < max_btls ; ++j) { + if (endpoint->btl_rdma.bml_btls[i_btl].btl == possible_btls[j]) { + ++btl_counts[j]; + break; + } else if (NULL == possible_btls[j]) { + possible_btls[j] = endpoint->btl_rdma.bml_btls[i_btl].btl; + btl_counts[j] = 1; + break; + } + } + } + } + } + + if (OMPI_SUCCESS != rc) { + free (possible_btls); + free (btl_counts); + + /* no btl = no rdma/atomics */ + return OMPI_ERR_NOT_AVAILABLE; + } + + for (int i = 0 ; i < max_btls ; ++i) { + if (NULL == possible_btls[i]) { + break; + } + + if (btl_counts[i] == comm_size && possible_btls[i]->btl_latency < selected_latency) { + selected_btl = possible_btls[i]; + selected_latency = possible_btls[i]->btl_latency; + } + } + + free (possible_btls); + free (btl_counts); + + if (btl) { + *btl = selected_btl; + } + + if (NULL == selected_btl) { + OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_INFO, "no suitable btls found"); + /* no btl = no rdma/atomics */ + return OMPI_ERR_NOT_AVAILABLE; + } + + OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_INFO, "selected btl: %s", + selected_btl->btl_component->btl_version.mca_component_name); + + return OMPI_SUCCESS; +} + +static int ompi_osc_rdma_share_data (ompi_osc_rdma_module_t *module) +{ + ompi_osc_rdma_region_t *my_data; + int ret, global_result; + int my_rank = ompi_comm_rank (module->comm); + int comm_size = ompi_comm_size (module->comm); + ompi_osc_rdma_rank_data_t *temp; + + do { + temp = malloc (sizeof (*temp) * comm_size); + if (NULL == temp) { + ret = OMPI_ERR_OUT_OF_RESOURCE; + break; + } + + /* fill in rank -> node translation */ + temp[my_rank].node_id = module->node_id; + temp[my_rank].rank = ompi_comm_rank (module->shared_comm); + + ret = module->comm->c_coll.coll_allgather (MPI_IN_PLACE, 1, MPI_2INT, temp, 1, MPI_2INT, + module->comm, module->comm->c_coll.coll_allgather_module); + if (OMPI_SUCCESS != ret) { + break; + } + + if (0 == ompi_comm_rank (module->shared_comm)) { + /* fill in my part of the node array */ + my_data = (ompi_osc_rdma_region_t *) ((intptr_t) module->node_comm_info + ompi_comm_rank (module->local_leaders) * + module->region_size); + + my_data->base = (uint64_t) (intptr_t) module->rank_array; + /* store my rank in the length field */ + my_data->len = (osc_rdma_size_t) my_rank; + + if (module->selected_btl->btl_register_mem) { + memcpy (my_data->btl_handle_data, module->state_handle, module->selected_btl->btl_registration_handle_size); + } + + /* gather state data at each node leader */ + if (ompi_comm_size (module->local_leaders) > 1) { + ret = module->local_leaders->c_coll.coll_allgather (MPI_IN_PLACE, module->region_size, MPI_BYTE, module->node_comm_info, + module->region_size, MPI_BYTE, module->local_leaders, + module->local_leaders->c_coll.coll_allgather_module); + if (OMPI_SUCCESS != ret) { + OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_ERROR, "leader allgather failed with ompi error code %d", ret); + break; + } + } + + int base_rank = ompi_comm_rank (module->local_leaders) * ((comm_size + module->node_count - 1) / module->node_count); + + /* fill in the local part of the rank -> node map */ + for (int i = 0 ; i < RANK_ARRAY_COUNT(module) ; ++i) { + int save_rank = base_rank + i; + if (save_rank >= comm_size) { + break; + } + + module->rank_array[i] = temp[save_rank]; + } + } + + free (temp); + } while (0); + + + ret = module->comm->c_coll.coll_allreduce (&ret, &global_result, 1, MPI_INT, MPI_MIN, module->comm, + module->comm->c_coll.coll_allreduce_module); + + if (OMPI_SUCCESS != ret) { + global_result = ret; + } + + /* none of these communicators are needed anymore so free them now*/ + if (MPI_COMM_NULL != module->local_leaders) { + ompi_comm_free (&module->local_leaders); + } + + if (MPI_COMM_NULL != module->shared_comm) { + ompi_comm_free (&module->shared_comm); + } + + return global_result; +} + +static int ompi_osc_rdma_create_groups (ompi_osc_rdma_module_t *module) +{ + int comm_rank, ret, local_rank; + int values[2] = {0, 0}; + + /* create a shared communicator to handle communication about the local segment */ + ret = ompi_comm_split_type (module->comm, MPI_COMM_TYPE_SHARED, 0, NULL, &module->shared_comm); + if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) { + OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_ERROR, "failed to create a shared memory communicator. error code %d", ret); + return ret; + } + + local_rank = ompi_comm_rank (module->shared_comm); + + comm_rank = ompi_comm_rank (module->comm); + + ret = ompi_comm_split (module->comm, (0 == local_rank) ? 0 : MPI_UNDEFINED, comm_rank, &module->local_leaders, + false); + if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) { + OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_ERROR, "failed to create local leaders communicator. error code %d", ret); + return ret; + } + + if (0 == local_rank) { + values[0] = ompi_comm_size (module->local_leaders); + values[1] = ompi_comm_rank (module->local_leaders); + } + + if (ompi_comm_size (module->shared_comm) > 1) { + ret = module->shared_comm->c_coll.coll_bcast (values, 2, MPI_INT, 0, module->shared_comm, + module->shared_comm->c_coll.coll_bcast_module); + if (OMPI_SUCCESS != ret) { + OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_ERROR, "failed to broadcast local data. error code %d", ret); + return ret; + } + } + + module->node_count = values[0]; + module->node_id = values[1]; + + return OMPI_SUCCESS; +} + +/** + * @brief check the displacement unit and size against peers + * + * @param[in] module osc rdma module + * @param[in] disp_unit the displacement unit for this process + * @param[in] size the window size for this process + * + * This function checks if all ranks have the same displacement unit or size and sets the appropriate + * flags on the module. + */ +static int ompi_osc_rdma_check_parameters (ompi_osc_rdma_module_t *module, int disp_unit, size_t size) +{ + long values[4]; + int ret; + + if (MPI_WIN_FLAVOR_DYNAMIC == module->flavor || (module->same_size && module->same_disp_unit)) { + /* done */ + return OMPI_SUCCESS; + } + + /* check displacements and sizes */ + values[0] = disp_unit; + values[1] = -disp_unit; + values[2] = size; + values[3] = -(ssize_t) size; + + ret = module->comm->c_coll.coll_allreduce (MPI_IN_PLACE, values, 4, MPI_LONG, MPI_MIN, module->comm, + module->comm->c_coll.coll_allreduce_module); + if (OMPI_SUCCESS != ret) { + return ret; + } + + if (values[0] == -values[1]) { + /* same displacement */ + module->same_disp_unit = true; + } + + if (values[2] == -values[3]) { + /* same size */ + module->same_size = true; + } + + return OMPI_SUCCESS; +} + + +static int ompi_osc_rdma_component_select (struct ompi_win_t *win, void **base, size_t size, int disp_unit, + struct ompi_communicator_t *comm, struct ompi_info_t *info, + int flavor, int *model) +{ + ompi_osc_rdma_module_t *module = NULL; + int world_size = ompi_comm_size (comm); + int init_limit = 256; + int ret; + char *name; + + /* the osc/sm component is the exclusive provider for support for shared + * memory windows */ + if (MPI_WIN_FLAVOR_SHARED == flavor) { + return OMPI_ERR_NOT_SUPPORTED; + } + + /* create module structure with all fields initialized to zero */ + module = (ompi_osc_rdma_module_t *) calloc (1, sizeof (ompi_osc_rdma_module_t)); + if (NULL == module) { + return OMPI_ERR_OUT_OF_RESOURCE; + } + + /* initialize the objects, so that always free in cleanup */ + OBJ_CONSTRUCT(&module->lock, opal_recursive_mutex_t); + OBJ_CONSTRUCT(&module->outstanding_locks, opal_hash_table_t); + OBJ_CONSTRUCT(&module->pending_posts, opal_list_t); + OBJ_CONSTRUCT(&module->peer_lock, opal_mutex_t); + OBJ_CONSTRUCT(&module->all_sync, ompi_osc_rdma_sync_t); + + module->same_disp_unit = check_config_value_bool ("same_disp_unit", info); + module->same_size = check_config_value_bool ("same_size", info); + module->no_locks = check_config_value_bool ("no_locks", info); + module->acc_single_intrinsic = check_config_value_bool ("ompi_single_accumulate", info); + module->acc_use_amo = mca_osc_rdma_component.acc_use_amo; + + module->all_sync.module = module; + + module->flavor = flavor; + module->win = win; + module->disp_unit = disp_unit; + module->size = size; + + /* set the module so we properly cleanup */ + win->w_osc_module = (ompi_osc_base_module_t*) module; + + if (!module->no_locks) { + if (world_size > init_limit) { + ret = opal_hash_table_init (&module->outstanding_locks, init_limit); + if (OPAL_SUCCESS != ret) { + ompi_osc_rdma_free (win); + return ret; + } + } else { + module->outstanding_lock_array = calloc (world_size, sizeof (module->outstanding_lock_array[0])); + if (NULL == module->outstanding_lock_array) { + ompi_osc_rdma_free (win); + return OMPI_ERR_OUT_OF_RESOURCE; + } + } + } + + ret = ompi_comm_dup(comm, &module->comm); + if (OMPI_SUCCESS != ret) { + ompi_osc_rdma_free (win); + return ret; + } + + OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_INFO, "creating osc/rdma window of flavor %d with id %d", + flavor, ompi_comm_get_cid(module->comm)); + + /* peer data */ + if (world_size > init_limit) { + OBJ_CONSTRUCT(&module->peer_hash, opal_hash_table_t); + ret = opal_hash_table_init (&module->peer_hash, init_limit); + } else { + module->peer_array = calloc (world_size, sizeof (ompi_osc_rdma_peer_t *)); + if (NULL == module->peer_array) { + ret = OMPI_ERR_OUT_OF_RESOURCE; + } + } + + if (OPAL_SUCCESS != ret) { + ompi_osc_rdma_free (win); + return ret; + } + + /* find rdma capable endpoints */ + ret = ompi_osc_rdma_query_btls (module->comm, &module->selected_btl); + if (OMPI_SUCCESS != ret) { + ompi_osc_rdma_free (win); + return ret; + } + + /* calculate and store various structure sizes */ + + module->region_size = module->selected_btl->btl_registration_handle_size + sizeof (ompi_osc_rdma_region_t); + + module->state_size = sizeof (ompi_osc_rdma_state_t); + + if (MPI_WIN_FLAVOR_DYNAMIC != module->flavor) { + module->state_size += module->region_size; + } else { + module->state_size += mca_osc_rdma_component.max_attach * module->region_size; + } + + /* fill in the function pointer part */ + memcpy(&module->super, &ompi_osc_rdma_module_rdma_template, sizeof(module->super)); + + ret = ompi_osc_rdma_check_parameters (module, disp_unit, size); + if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) { + ompi_osc_rdma_free (win); + return ret; + } + + ret = ompi_osc_rdma_create_groups (module); + if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) { + ompi_osc_rdma_free (win); + return ret; + } + + /* fill in our part */ + ret = allocate_state_shared (module, base, size); + if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) { + OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_ERROR, "failed to allocate internal state"); + ompi_osc_rdma_free (win); + return ret; + } + + if (MPI_WIN_FLAVOR_DYNAMIC == flavor) { + /* allocate space to store local btl handles for attached regions */ + module->dynamic_handles = (ompi_osc_rdma_handle_t *) calloc (mca_osc_rdma_component.max_attach, + sizeof (module->dynamic_handles[0])); + if (NULL == module->dynamic_handles) { + ompi_osc_rdma_free (win); + return OMPI_ERR_OUT_OF_RESOURCE; + } + } + + /* lock data */ + if (module->no_locks) { + win->w_flags |= OMPI_WIN_NO_LOCKS; + } + + if (module->same_size) { + win->w_flags |= OMPI_WIN_SAME_SIZE; + } + + if (module->same_disp_unit) { + win->w_flags |= OMPI_WIN_SAME_DISP; + } + + /* update component data */ + OPAL_THREAD_LOCK(&mca_osc_rdma_component.lock); + ret = opal_hash_table_set_value_uint32(&mca_osc_rdma_component.modules, + ompi_comm_get_cid(module->comm), + module); + OPAL_THREAD_UNLOCK(&mca_osc_rdma_component.lock); + if (OMPI_SUCCESS != ret) { + ompi_osc_rdma_free (win); + return ret; + } + + /* fill in window information */ + *model = MPI_WIN_UNIFIED; + win->w_osc_module = (ompi_osc_base_module_t*) module; + asprintf(&name, "rdma window %d", ompi_comm_get_cid(module->comm)); + ompi_win_set_name(win, name); + free(name); + + /* sync memory - make sure all initialization completed */ + opal_atomic_mb(); + + ret = ompi_osc_rdma_share_data (module); + if (OMPI_SUCCESS != ret) { + OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_ERROR, "failed to share window data with peers"); + ompi_osc_rdma_free (win); + } else { + /* for now the leader is always rank 0 in the communicator */ + module->leader = ompi_osc_rdma_module_peer (module, 0); + + OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_INFO, "finished creating osc/rdma window with id %d", + ompi_comm_get_cid(module->comm)); + } + + return ret; +} + + +static int ompi_osc_rdma_set_info (struct ompi_win_t *win, struct ompi_info_t *info) +{ + ompi_osc_rdma_module_t *module = GET_MODULE(win); + bool temp; + + temp = check_config_value_bool ("no_locks", info); + if (temp && !module->no_locks) { + /* clean up the lock hash. it is up to the user to ensure no lock is + * outstanding from this process when setting the info key */ + OBJ_DESTRUCT(&module->outstanding_locks); + OBJ_CONSTRUCT(&module->outstanding_locks, opal_hash_table_t); + + module->no_locks = true; + win->w_flags |= OMPI_WIN_NO_LOCKS; + } else if (!temp && module->no_locks) { + int world_size = ompi_comm_size (module->comm); + int init_limit = world_size > 256 ? 256 : world_size; + int ret; + + ret = opal_hash_table_init (&module->outstanding_locks, init_limit); + if (OPAL_SUCCESS != ret) { + return ret; + } + + module->no_locks = false; + win->w_flags &= ~OMPI_WIN_NO_LOCKS; + } + + /* enforce collectiveness... */ + return module->comm->c_coll.coll_barrier(module->comm, + module->comm->c_coll.coll_barrier_module); +} + + +static int ompi_osc_rdma_get_info (struct ompi_win_t *win, struct ompi_info_t **info_used) +{ + ompi_info_t *info = OBJ_NEW(ompi_info_t); + + if (NULL == info) { + return OMPI_ERR_TEMP_OUT_OF_RESOURCE; + } + + *info_used = info; + + return OMPI_SUCCESS; +} + +OBJ_CLASS_INSTANCE(ompi_osc_rdma_aggregation_t, opal_list_item_t, NULL, NULL); diff --git a/ompi/mca/osc/rdma/osc_rdma_dynamic.c b/ompi/mca/osc/rdma/osc_rdma_dynamic.c new file mode 100644 index 00000000000..c1c21581e94 --- /dev/null +++ b/ompi/mca/osc/rdma/osc_rdma_dynamic.c @@ -0,0 +1,397 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ +/* + * Copyright (c) 2014-2016 Los Alamos National Security, LLC. All rights + * reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include "osc_rdma_comm.h" +#include "osc_rdma_lock.h" + +#include "mpi.h" + +#include "opal/util/sys_limits.h" + +/** + * ompi_osc_rdma_find_region_containing: + * + * @param[in] regions sorted list of regions + * @param[in] min_index minimum index to search (call with 0) + * @param[in] max_index maximum index to search (call with length - 1) + * @param[in] base base of region to search for + * @param[in] bound bound of region to search for + * @param[in] region_size size of an ompi_osc_rdma_region_t object + * @param[out] region_index index of region if found (may be NULL) + * + * @returns an index on success or -1 on failure + * + * This function searches through a sorted list of rdma regions {regions} and finds + * the region that contains the region specified by {base} and {bound}. If a + * matching region is found the index of that region is returned else the function + * returns -1. + */ +static inline ompi_osc_rdma_region_t *ompi_osc_rdma_find_region_containing (ompi_osc_rdma_region_t *regions, int min_index, + int max_index, intptr_t base, intptr_t bound, + size_t region_size, int *region_index) +{ + int mid_index = (max_index + min_index) >> 1; + ompi_osc_rdma_region_t *region = (ompi_osc_rdma_region_t *)((intptr_t) regions + mid_index * region_size); + intptr_t region_bound; + + if (min_index > max_index) { + return NULL; + } + + region_bound = (intptr_t) (region->base + region->len); + + OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_DEBUG, "checking memory region %p-%p against %p-%p (index %d) (min_index = %d, max_index = %d)", + (void *) base, (void *) bound, (void *) region->base, (void *)(region->base + region->len), mid_index, + min_index, max_index); + + if (region->base > base) { + return ompi_osc_rdma_find_region_containing (regions, min_index, mid_index-1, base, bound, region_size, region_index); + } else if (bound <= region_bound) { + if (region_index) { + *region_index = mid_index; + } + + return region; + } + + return ompi_osc_rdma_find_region_containing (regions, mid_index+1, max_index, base, bound, region_size, region_index); +} + +/* binary search for insertion point */ +static ompi_osc_rdma_region_t *find_insertion_point (ompi_osc_rdma_region_t *regions, int min_index, int max_index, intptr_t base, + size_t region_size, int *region_index) +{ + int mid_index = (max_index + min_index) >> 1; + ompi_osc_rdma_region_t *region = (ompi_osc_rdma_region_t *)((intptr_t) regions + mid_index * region_size); + + OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_TRACE, "find_insertion_point (%d, %d, %lx, %lu)\n", min_index, max_index, base, region_size); + + if (max_index < min_index) { + *region_index = min_index; + return (ompi_osc_rdma_region_t *)((intptr_t) regions + min_index * region_size); + } + + if (region->base > base) { + return find_insertion_point (regions, min_index, mid_index-1, base, region_size, region_index); + } else { + return find_insertion_point (regions, mid_index+1, max_index, base, region_size, region_index); + } +} + +int ompi_osc_rdma_attach (struct ompi_win_t *win, void *base, size_t len) +{ + ompi_osc_rdma_module_t *module = GET_MODULE(win); + const int my_rank = ompi_comm_rank (module->comm); + ompi_osc_rdma_peer_t *my_peer = ompi_osc_rdma_module_peer (module, my_rank); + ompi_osc_rdma_region_t *region; + osc_rdma_counter_t region_count; + osc_rdma_counter_t region_id; + void *bound; + intptr_t page_size = opal_getpagesize (); + int region_index; + int ret; + + if (module->flavor != MPI_WIN_FLAVOR_DYNAMIC) { + return OMPI_ERR_RMA_FLAVOR; + } + + if (0 == len) { + /* shot-circuit 0-byte case */ + return OMPI_SUCCESS; + } + + OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_TRACE, "attach: %s, %p, %lu", win->w_name, base, (unsigned long) len); + + OPAL_THREAD_LOCK(&module->lock); + + region_count = module->state->region_count & 0xffffffffL; + region_id = module->state->region_count >> 32; + + if (region_count == mca_osc_rdma_component.max_attach) { + OPAL_THREAD_UNLOCK(&module->lock); + return OMPI_ERR_RMA_ATTACH; + } + + /* it is wasteful to register less than a page. this may allow the remote side to access more + * memory but the MPI standard covers this with calling the calling behavior erroneous */ + bound = (void *)OPAL_ALIGN((intptr_t) base + len, page_size, intptr_t); + base = (void *)((intptr_t) base & ~(page_size - 1)); + len = (size_t)((intptr_t) bound - (intptr_t) base); + + /* see if a matching region already exists */ + region = ompi_osc_rdma_find_region_containing ((ompi_osc_rdma_region_t *) module->state->regions, 0, region_count - 1, (intptr_t) base, + (intptr_t) bound, module->region_size, ®ion_index); + if (NULL != region) { + ++module->dynamic_handles[region_index].refcnt; + OPAL_THREAD_UNLOCK(&module->lock); + /* no need to invalidate remote caches */ + return OMPI_SUCCESS; + } + + /* region is in flux */ + module->state->region_count = -1; + opal_atomic_wmb (); + + ompi_osc_rdma_lock_acquire_exclusive (module, my_peer, offsetof (ompi_osc_rdma_state_t, regions_lock)); + + /* do a binary seach for where the region should be inserted */ + if (region_count) { + region = find_insertion_point ((ompi_osc_rdma_region_t *) module->state->regions, 0, region_count - 1, (intptr_t) base, + module->region_size, ®ion_index); + + if (region_index < region_count) { + memmove ((void *) ((intptr_t) region + module->region_size), region, (region_count - region_index) * module->region_size); + + if (module->selected_btl->btl_register_mem) { + memmove (module->dynamic_handles + region_index + 1, module->dynamic_handles + region_index, + (region_count - region_index) * sizeof (module->dynamic_handles[0])); + } + } + } else { + region_index = 0; + region = (ompi_osc_rdma_region_t *) module->state->regions; + } + + region->base = (intptr_t) base; + region->len = len; + + OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_DEBUG, "attaching dynamic memory region {%p, %p} at index %d", + base, (void *)((intptr_t) base + len), region_index); + + if (module->selected_btl->btl_register_mem) { + mca_btl_base_registration_handle_t *handle; + + ret = ompi_osc_rdma_register (module, MCA_BTL_ENDPOINT_ANY, (void *) region->base, region->len, MCA_BTL_REG_FLAG_ACCESS_ANY, + &handle); + if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) { + OPAL_THREAD_UNLOCK(&module->lock); + return OMPI_ERR_RMA_ATTACH; + } + + memcpy (region->btl_handle_data, handle, module->selected_btl->btl_registration_handle_size); + module->dynamic_handles[region_index].btl_handle = handle; + } else { + module->dynamic_handles[region_index].btl_handle = NULL; + } + + module->dynamic_handles[region_index].refcnt = 1; + +#if OPAL_ENABLE_DEBUG + for (int i = 0 ; i < region_count + 1 ; ++i) { + region = (ompi_osc_rdma_region_t *) ((intptr_t) module->state->regions + i * module->region_size); + + OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_DEBUG, " dynamic region %d: {%p, %lu}", i, + (void *) region->base, (unsigned long) region->len); + } +#endif + + opal_atomic_mb (); + /* the region state has changed */ + module->state->region_count = ((region_id + 1) << 32) | (region_count + 1); + + ompi_osc_rdma_lock_release_exclusive (module, my_peer, offsetof (ompi_osc_rdma_state_t, regions_lock)); + OPAL_THREAD_UNLOCK(&module->lock); + + OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_TRACE, "attach complete"); + + return OMPI_SUCCESS; +} + + +int ompi_osc_rdma_detach (struct ompi_win_t *win, const void *base) +{ + ompi_osc_rdma_module_t *module = GET_MODULE(win); + const int my_rank = ompi_comm_rank (module->comm); + ompi_osc_rdma_peer_dynamic_t *my_peer = (ompi_osc_rdma_peer_dynamic_t *) ompi_osc_rdma_module_peer (module, my_rank); + osc_rdma_counter_t region_count, region_id; + ompi_osc_rdma_region_t *region; + int region_index; + + if (module->flavor != MPI_WIN_FLAVOR_DYNAMIC) { + return OMPI_ERR_WIN; + } + + OPAL_THREAD_LOCK(&module->lock); + + OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_TRACE, "detach: %s, %p", win->w_name, base); + + /* the upper 4 bytes of the region count are an instance counter */ + region_count = module->state->region_count & 0xffffffffL; + region_id = module->state->region_count >> 32; + + region = ompi_osc_rdma_find_region_containing ((ompi_osc_rdma_region_t *) module->state->regions, 0, + region_count - 1, (intptr_t) base, (intptr_t) base + 1, + module->region_size, ®ion_index); + if (NULL == region) { + OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_INFO, "could not find dynamic memory region starting at %p", base); + OPAL_THREAD_UNLOCK(&module->lock); + return OMPI_ERROR; + } + + if (--module->dynamic_handles[region_index].refcnt > 0) { + OPAL_THREAD_UNLOCK(&module->lock); + OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_TRACE, "detach complete"); + return OMPI_SUCCESS; + } + + /* lock the region so it can't change while a peer is reading it */ + ompi_osc_rdma_lock_acquire_exclusive (module, &my_peer->super, offsetof (ompi_osc_rdma_state_t, regions_lock)); + + OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_DEBUG, "detaching dynamic memory region {%p, %p} from index %d", + base, (void *)((intptr_t) base + region->len), region_index); + + if (module->selected_btl->btl_register_mem) { + ompi_osc_rdma_deregister (module, module->dynamic_handles[region_index].btl_handle); + + if (region_index < region_count - 1) { + memmove (module->dynamic_handles + region_index, module->dynamic_handles + region_index + 1, + (region_count - region_index - 1) * sizeof (void *)); + } + + memset (module->dynamic_handles + region_count - 1, 0, sizeof (module->dynamic_handles[0])); + } + + if (region_index < region_count - 1) { + memmove (region, (void *)((intptr_t) region + module->region_size), + (region_count - region_index - 1) * module->region_size);; + } + + module->state->region_count = ((region_id + 1) << 32) | (region_count - 1); + + ompi_osc_rdma_lock_release_exclusive (module, &my_peer->super, offsetof (ompi_osc_rdma_state_t, regions_lock)); + + OPAL_THREAD_UNLOCK(&module->lock); + + OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_TRACE, "detach complete"); + + return OMPI_SUCCESS; +} + +/** + * @brief refresh the local view of the dynamic memory region + * + * @param[in] module osc rdma module + * @param[in] peer peer object to refresh + * + * This function does the work of keeping the local view of a remote peer in sync with what is attached + * to the remote window. It is called on every address translation since there is no way (currently) to + * detect that the attached regions have changed. To reduce the amount of data read we first read the + * region count (which contains an id). If that hasn't changed the region data is not updated. If the + * list of attached regions has changed then all valid regions are read from the peer while holding + * their region lock. + */ +static int ompi_osc_rdma_refresh_dynamic_region (ompi_osc_rdma_module_t *module, ompi_osc_rdma_peer_dynamic_t *peer) { + osc_rdma_counter_t region_count, region_id; + uint64_t source_address; + int ret; + + OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_TRACE, "refreshing dynamic memory regions for target %d", peer->super.rank); + + /* this loop is meant to prevent us from reading data while the remote side is in attach */ + do { + osc_rdma_counter_t remote_value; + + source_address = (uint64_t)(intptr_t) peer->super.state + offsetof (ompi_osc_rdma_state_t, region_count); + ret = ompi_osc_get_data_blocking (module, peer->super.state_endpoint, source_address, peer->super.state_handle, + &remote_value, sizeof (remote_value)); + if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) { + return ret; + } + + region_id = remote_value >> 32; + region_count = remote_value & 0xffffffffl; + /* check if the region is changing */ + } while (0xffffffffl == region_count); + + OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_DEBUG, "target region: id 0x%lx, count 0x%lx (cached: 0x%x, 0x%x)", + (unsigned long) region_id, (unsigned long) region_count, peer->region_id, peer->region_count); + + if (0 == region_count) { + return OMPI_ERR_RMA_RANGE; + } + + /* check if the cached copy is out of date */ + OPAL_THREAD_LOCK(&module->lock); + + if (peer->region_id != region_id) { + unsigned region_len = module->region_size * region_count; + void *temp; + + OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_DEBUG, "dynamic memory cache is out of data. reloading from peer"); + + /* allocate only enough space for the remote regions */ + temp = realloc (peer->regions, region_len); + if (NULL == temp) { + OPAL_THREAD_UNLOCK(&module->lock); + return OMPI_ERR_OUT_OF_RESOURCE; + } + peer->regions = temp; + + /* lock the region */ + ompi_osc_rdma_lock_acquire_shared (module, &peer->super, 1, offsetof (ompi_osc_rdma_state_t, regions_lock), + OMPI_OSC_RDMA_LOCK_EXCLUSIVE); + + source_address = (uint64_t)(intptr_t) peer->super.state + offsetof (ompi_osc_rdma_state_t, regions); + ret = ompi_osc_get_data_blocking (module, peer->super.state_endpoint, source_address, peer->super.state_handle, + peer->regions, region_len); + if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) { + OPAL_THREAD_UNLOCK(&module->lock); + return ret; + } + + /* release the region lock */ + ompi_osc_rdma_lock_release_shared (module, &peer->super, -1, offsetof (ompi_osc_rdma_state_t, regions_lock)); + + /* update cached region ids */ + peer->region_id = region_id; + peer->region_count = region_count; + } + + OPAL_THREAD_UNLOCK(&module->lock); + + OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_TRACE, "finished refreshing dynamic memory regions for target %d", peer->super.rank); + + return OMPI_SUCCESS; +} + +int ompi_osc_rdma_find_dynamic_region (ompi_osc_rdma_module_t *module, ompi_osc_rdma_peer_t *peer, uint64_t base, size_t len, + ompi_osc_rdma_region_t **region) +{ + ompi_osc_rdma_peer_dynamic_t *dy_peer = (ompi_osc_rdma_peer_dynamic_t *) peer; + intptr_t bound = (intptr_t) base + len; + ompi_osc_rdma_region_t *regions; + int ret, region_count; + + OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_TRACE, "locating dynamic memory region matching: {%" PRIx64 ", %" PRIx64 "}" + " (len %lu)", base, base + len, (unsigned long) len); + + if (!ompi_osc_rdma_peer_local_state (peer)) { + ret = ompi_osc_rdma_refresh_dynamic_region (module, dy_peer); + if (OMPI_SUCCESS != ret) { + return ret; + } + + regions = dy_peer->regions; + region_count = dy_peer->region_count; + } else { + ompi_osc_rdma_state_t *peer_state = (ompi_osc_rdma_state_t *) peer->state; + regions = (ompi_osc_rdma_region_t *) peer_state->regions; + region_count = peer_state->region_count; + } + + *region = ompi_osc_rdma_find_region_containing (regions, 0, region_count - 1, (intptr_t) base, bound, module->region_size, NULL); + if (!*region) { + return OMPI_ERR_RMA_RANGE; + } + + /* round a matching region */ + return OMPI_SUCCESS; +} diff --git a/ompi/mca/osc/rdma/osc_rdma_dynamic.h b/ompi/mca/osc/rdma/osc_rdma_dynamic.h new file mode 100644 index 00000000000..632fc5eebe5 --- /dev/null +++ b/ompi/mca/osc/rdma/osc_rdma_dynamic.h @@ -0,0 +1,60 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ +/* + * Copyright (c) 2014-2015 Los Alamos National Security, LLC. All rights + * reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include "osc_rdma.h" + +/** + * @brief attach a region to a window + * + * @param[in] win mpi window + * @param[in] base base pointer of region + * @param[in] len region size + * + * @returns OMPI_SUCCESS on success + * @returns OMPI_ERR_RMA_FLAVOR if the window is not a dynamic window + * @returns OMPI_ERR_RMA_ATTACH if the region could not be attached + * + * This function attaches a region to the local window. After this call + * completes the region will be available for RMA access by all peers in + * the window. + */ +int ompi_osc_rdma_attach (struct ompi_win_t *win, void *base, size_t len); + +/** + * @brief detach a region from a window + * + * @param[in] win mpi window + * @param[in] base base pointer of region specified to ompi_osc_rdma_attach() + * + * @returns OMPI_SUCCESS on success + * @returns OMPI_ERR_RMA_FLAVOR if the window is not a dynamic window + * @returns OMPI_ERROR if the region is not attached + * + * This function requires that a region with the same base has been attached + * using the ompi_osc_rdma_attach() function. + */ +int ompi_osc_rdma_detach (struct ompi_win_t *win, const void *base); + +/** + * @brief find dynamic region associated with a peer, base, and len + * + * @param[in] module osc rdma module + * @param[in] peer peer object for remote peer + * @param[in] base base pointer for region + * @param[in] len length of region + * @param[out] region region structure for the region + * + * @returns OMPI_SUCCESS on success + * @returns OMPI_ERR_OUT_OF_RESOURCE on resource failure + * @returns OMPI_ERR_RMA_RANGE if no region matches + */ +int ompi_osc_rdma_find_dynamic_region (ompi_osc_rdma_module_t *module, ompi_osc_rdma_peer_t *peer, uint64_t base, size_t len, + ompi_osc_rdma_region_t **region); diff --git a/ompi/mca/osc/rdma/osc_rdma_frag.c b/ompi/mca/osc/rdma/osc_rdma_frag.c new file mode 100644 index 00000000000..ca9144fd2f0 --- /dev/null +++ b/ompi/mca/osc/rdma/osc_rdma_frag.c @@ -0,0 +1,16 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ +/* + * Copyright (c) 2012-2013 Sandia National Laboratories. All rights reserved. + * Copyright (c) 2014-2015 Los Alamos National Security, LLC. All rights + * reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include "osc_rdma.h" +#include "osc_rdma_frag.h" + +OBJ_CLASS_INSTANCE(ompi_osc_rdma_frag_t, opal_free_list_item_t, NULL, NULL); diff --git a/ompi/mca/osc/rdma/osc_rdma_frag.h b/ompi/mca/osc/rdma/osc_rdma_frag.h new file mode 100644 index 00000000000..e9636a24d25 --- /dev/null +++ b/ompi/mca/osc/rdma/osc_rdma_frag.h @@ -0,0 +1,123 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ +/* + * Copyright (c) 2012 Sandia National Laboratories. All rights reserved. + * Copyright (c) 2014-2015 Los Alamos National Security, LLC. All rights + * reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#ifndef OSC_RDMA_FRAG_H +#define OSC_RDMA_FRAG_H + +#include "osc_rdma.h" +#include "opal/align.h" + +/** Communication buffer for packing messages */ +struct ompi_osc_rdma_frag_t { + opal_free_list_item_t super; + + /* start of unused space */ + unsigned char *top; + + /* space remaining in buffer */ + uint32_t remain_len; + /* Number of operations which have started writing into the frag, but not yet completed doing so */ + int32_t pending; + + ompi_osc_rdma_module_t *module; + mca_btl_base_registration_handle_t *handle; +}; +typedef struct ompi_osc_rdma_frag_t ompi_osc_rdma_frag_t; +OBJ_CLASS_DECLARATION(ompi_osc_rdma_frag_t); + + +static inline void ompi_osc_rdma_frag_complete (ompi_osc_rdma_frag_t *frag) +{ + if (0 == OPAL_THREAD_ADD32(&frag->pending, -1)) { + opal_atomic_rmb (); + + ompi_osc_rdma_deregister (frag->module, frag->handle); + frag->handle = NULL; + + opal_free_list_return (&mca_osc_rdma_component.frags, (opal_free_list_item_t *) frag); + } +} + +/* + * Note: module lock must be held during this operation + */ +static inline int ompi_osc_rdma_frag_alloc (ompi_osc_rdma_module_t *module, size_t request_len, + ompi_osc_rdma_frag_t **buffer, char **ptr) +{ + ompi_osc_rdma_frag_t *curr; + int ret; + + /* ensure all buffers are 8-byte aligned */ + request_len = OPAL_ALIGN(request_len, 8, size_t); + + if (request_len > (mca_osc_rdma_component.buffer_size >> 1)) { + return OMPI_ERR_VALUE_OUT_OF_BOUNDS; + } + + OPAL_THREAD_LOCK(&module->lock); + curr = module->rdma_frag; + if (OPAL_UNLIKELY(NULL == curr || curr->remain_len < request_len)) { + if (NULL == curr || (NULL != curr && curr->pending > 1)) { + opal_free_list_item_t *item = NULL; + + /* release the initial reference to the buffer */ + module->rdma_frag = NULL; + + if (curr) { + ompi_osc_rdma_frag_complete (curr); + } + + item = opal_free_list_get (&mca_osc_rdma_component.frags); + if (OPAL_UNLIKELY(NULL == item)) { + OPAL_THREAD_UNLOCK(&module->lock); + return OMPI_ERR_OUT_OF_RESOURCE; + } + + curr = module->rdma_frag = (ompi_osc_rdma_frag_t *) item; + + curr->handle = NULL; + curr->pending = 1; + curr->module = module; + } + + curr->top = curr->super.ptr; + curr->remain_len = mca_osc_rdma_component.buffer_size; + + if (curr->remain_len < request_len) { + OPAL_THREAD_UNLOCK(&module->lock); + return OMPI_ERR_TEMP_OUT_OF_RESOURCE; + } + } + + if (!curr->handle && module->selected_btl->btl_register_mem) { + ret = ompi_osc_rdma_register (module, MCA_BTL_ENDPOINT_ANY, curr->super.ptr, mca_osc_rdma_component.buffer_size, + MCA_BTL_REG_FLAG_ACCESS_ANY, &curr->handle); + if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) { + OPAL_THREAD_UNLOCK(&module->lock); + return ret; + } + } + + + *ptr = (char *) curr->top; + *buffer = curr; + + curr->top += request_len; + curr->remain_len -= request_len; + OPAL_THREAD_ADD32(&curr->pending, 1); + + OPAL_THREAD_UNLOCK(&module->lock); + + return OMPI_SUCCESS; +} + +#endif diff --git a/ompi/mca/osc/rdma/osc_rdma_lock.h b/ompi/mca/osc/rdma/osc_rdma_lock.h new file mode 100644 index 00000000000..5583711ef28 --- /dev/null +++ b/ompi/mca/osc/rdma/osc_rdma_lock.h @@ -0,0 +1,352 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ +/* + * Copyright (c) 2014-2017 Los Alamos National Security, LLC. All rights + * reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#if !defined(OMPI_OSC_RDMA_LOCK_H) +#define OMPI_OSC_RDMA_LOCK_H + +#include "osc_rdma_types.h" +#include "osc_rdma_frag.h" + +static inline int ompi_osc_rdma_trylock_local (volatile ompi_osc_rdma_lock_t *lock) +{ + return !ompi_osc_rdma_lock_cmpset (lock, 0, OMPI_OSC_RDMA_LOCK_EXCLUSIVE); +} + +static inline void ompi_osc_rdma_unlock_local (volatile ompi_osc_rdma_lock_t *lock) +{ + (void) ompi_osc_rdma_lock_add (lock, -OMPI_OSC_RDMA_LOCK_EXCLUSIVE); +} + +/** + * Dummy completion function for atomic operations + */ +void ompi_osc_rdma_atomic_complete (mca_btl_base_module_t *btl, struct mca_btl_base_endpoint_t *endpoint, + void *local_address, mca_btl_base_registration_handle_t *local_handle, + void *context, void *data, int status); + +__opal_attribute_always_inline__ +static inline int ompi_osc_rdma_lock_btl_fop (ompi_osc_rdma_module_t *module, ompi_osc_rdma_peer_t *peer, uint64_t address, + int op, ompi_osc_rdma_lock_t operand, ompi_osc_rdma_lock_t *result) +{ + volatile bool atomic_complete = false; + ompi_osc_rdma_frag_t *frag = NULL; + ompi_osc_rdma_lock_t *temp = NULL; + int ret; + + /* spin until the btl has accepted the operation */ + do { + if (NULL == frag) { + ret = ompi_osc_rdma_frag_alloc (module, 8, &frag, (char **) &temp); + } + if (NULL != frag) { + ret = module->selected_btl->btl_atomic_fop (module->selected_btl, peer->state_endpoint, temp, (intptr_t) address, + frag->handle, peer->state_handle, op, operand, 0, + MCA_BTL_NO_ORDER, ompi_osc_rdma_atomic_complete, (void *) &atomic_complete, + NULL); + } + + if (OPAL_LIKELY(!ompi_osc_rdma_oor(ret))) { + break; + } + ompi_osc_rdma_progress (module); + } while (1); + + if (OPAL_SUCCESS == ret) { + while (!atomic_complete) { + ompi_osc_rdma_progress (module); + } + } else if (1 == ret) { + ret = OMPI_SUCCESS; + } + + if (NULL != frag) { + if (result) { + *result = *temp; + } + ompi_osc_rdma_frag_complete (frag); + } + + return ret; +} + +__opal_attribute_always_inline__ +static inline int ompi_osc_rdma_lock_btl_op (ompi_osc_rdma_module_t *module, ompi_osc_rdma_peer_t *peer, uint64_t address, + int op, ompi_osc_rdma_lock_t operand) +{ + volatile bool atomic_complete = false; + int ret; + + if (!(module->selected_btl->btl_flags & MCA_BTL_FLAGS_ATOMIC_OPS)) { + return ompi_osc_rdma_lock_btl_fop (module, peer, address, op, operand, NULL); + } + + /* spin until the btl has accepted the operation */ + do { + ret = module->selected_btl->btl_atomic_op (module->selected_btl, peer->state_endpoint, (intptr_t) address, peer->state_handle, + op, operand, 0, MCA_BTL_NO_ORDER, ompi_osc_rdma_atomic_complete, + (void *) &atomic_complete, NULL); + + if (OPAL_LIKELY(!ompi_osc_rdma_oor(ret))) { + break; + } + ompi_osc_rdma_progress (module); + } while (1); + + if (OPAL_SUCCESS == ret) { + while (!atomic_complete) { + ompi_osc_rdma_progress (module); + } + } else if (1 == ret) { + ret = OMPI_SUCCESS; + } + + return ret; +} + +__opal_attribute_always_inline__ +static inline int ompi_osc_rdma_lock_btl_cswap (ompi_osc_rdma_module_t *module, ompi_osc_rdma_peer_t *peer, uint64_t address, + ompi_osc_rdma_lock_t compare, ompi_osc_rdma_lock_t value, ompi_osc_rdma_lock_t *result) +{ + volatile bool atomic_complete = false; + ompi_osc_rdma_frag_t *frag = NULL; + ompi_osc_rdma_lock_t *temp = NULL; + int ret; + + /* spin until the btl has accepted the operation */ + do { + if (NULL == frag) { + ret = ompi_osc_rdma_frag_alloc (module, 8, &frag, (char **) &temp); + } + if (NULL != frag) { + ret = module->selected_btl->btl_atomic_cswap (module->selected_btl, peer->state_endpoint, temp, address, frag->handle, + peer->state_handle, compare, value, 0, 0, ompi_osc_rdma_atomic_complete, + (void *) &atomic_complete, NULL); + } + + if (OPAL_LIKELY(!ompi_osc_rdma_oor(ret))) { + break; + } + ompi_osc_rdma_progress (module); + } while (1); + + if (OPAL_SUCCESS == ret) { + while (!atomic_complete) { + ompi_osc_rdma_progress (module); + } + } else if (1 == ret) { + ret = OMPI_SUCCESS; + } + + if (NULL != frag) { + if (*result) { + *result = *temp; + } + ompi_osc_rdma_frag_complete (frag); + } + + return ret; +} + +/** + * ompi_osc_rdma_lock_acquire_shared: + * + * @param[in] module - osc/rdma module + * @param[in] peer - peer object + * @param[in] value - increment value + * @param[in] offset - offset of lock in remote peer's state segment + * + * @returns OMPI_SUCCESS on success and another ompi error code on failure + * + * This function increments a remote shared lock. The value provided in + * {value} should be the negative of the one used for ompi_osc_rdma_lock_acquire_shared. + * It is erroneous to release a shared lock not held by the calling process. + */ +static inline int ompi_osc_rdma_lock_release_shared (ompi_osc_rdma_module_t *module, ompi_osc_rdma_peer_t *peer, + ompi_osc_rdma_lock_t value, ptrdiff_t offset) +{ + uint64_t lock = (uint64_t) (intptr_t) peer->state + offset; + + OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_DEBUG, "releasing shared lock %" PRIx64 " on peer %d. value 0x%lx", lock, + peer->rank, (unsigned long) value); + + if (!ompi_osc_rdma_peer_local_state (peer)) { + return ompi_osc_rdma_lock_btl_op (module, peer, lock, MCA_BTL_ATOMIC_ADD, value); + } + + (void) ompi_osc_rdma_lock_add ((volatile ompi_osc_rdma_lock_t *) lock, value); + + return OMPI_SUCCESS; +} + +/** + * ompi_osc_rdma_lock_acquire_shared: + * + * @param[in] module - osc rdma module + * @param[in] peer - owner of lock + * @param[in] value - increment value + * @param[in] offset - offset of lock in remote peer's state segment + * @param[in] check - check value for success + * + * @returns OMPI_SUCCESS on success and another ompi error code on failure + * + * This function increments a remote shared lock and checks it against the + * check value in {check}. If any of the bits in the prior counter value + * match those in {check} the function decrements the value and tries again. + */ +static inline int ompi_osc_rdma_lock_acquire_shared (ompi_osc_rdma_module_t *module, ompi_osc_rdma_peer_t *peer, + ompi_osc_rdma_lock_t value, ptrdiff_t offset, + ompi_osc_rdma_lock_t check) +{ + uint64_t lock = (uint64_t) peer->state + offset; + ompi_osc_rdma_lock_t lock_state; + int ret; + + OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_DEBUG, "acquiring shared lock %" PRIx64 " on peer %d. value 0x%lx", lock, + peer->rank, (unsigned long) value); + + /* spin until the lock has been acquired */ + if (!ompi_osc_rdma_peer_local_state (peer)) { + do { + ret = ompi_osc_rdma_lock_btl_fop (module, peer, lock, MCA_BTL_ATOMIC_ADD, value, &lock_state); + if (OPAL_UNLIKELY(OPAL_SUCCESS != ret)) { + OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_DEBUG, "failed to increment shared lock. opal error code %d", ret); + return ret; + } + + OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_DEBUG, "shared lock incremented. old value 0x%lx", (unsigned long) lock_state); + + if (!(lock_state & check)) { + break; + } + + OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_DEBUG, "another peer has exclusive access to lock"); + + /* NTH: i think this is correct. backoff! */ + ompi_osc_rdma_lock_release_shared (module, peer, -value, offset); + ompi_osc_rdma_progress (module); + } while (1); + } else { + do { + lock_state = ompi_osc_rdma_lock_add ((volatile ompi_osc_rdma_lock_t *) lock, value); + OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_DEBUG, "local shared lock incremented. old value 0x%lx", + (unsigned long) lock_state); + if (!(lock_state & check)) { + break; + } + + (void) ompi_osc_rdma_lock_add ((volatile ompi_osc_rdma_lock_t *) lock, -value); + ompi_osc_rdma_progress (module); + } while (1); + } + + OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_DEBUG, "shared lock acquired"); + + return OMPI_SUCCESS; +} + +/** + * ompi_osc_rdma_lock_try_acquire_exclusive: + * + * @param[in] module - osc/rdma module + * @param[in] peer - peer object + * @param[in] offset - offset of lock in peer's state structure + * + * @returns 0 on success, 1 on failure + * + * This function attempts to obtain an exclusive lock at {offset} in a peer's state. + */ +static inline int ompi_osc_rdma_lock_try_acquire_exclusive (ompi_osc_rdma_module_t *module, ompi_osc_rdma_peer_t *peer, + ptrdiff_t offset) +{ + uint64_t lock = (uint64_t) (uintptr_t) peer->state + offset; + int ret; + + OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_DEBUG, "trying to acquire exclusive lock %" PRIx64 " on peer %d", lock, + peer->rank); + + if (!ompi_osc_rdma_peer_local_state (peer)) { + /* set the temporary value so we can detect success. note that a lock should never be -1 */ + ompi_osc_rdma_lock_t lock_state = -1; + + ret = ompi_osc_rdma_lock_btl_cswap (module, peer, lock, 0, OMPI_OSC_RDMA_LOCK_EXCLUSIVE, &lock_state); + if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) { + return ret; + } + +#if OPAL_ENABLE_DEBUG + if (0 == lock_state) { + OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_DEBUG, "exclusive lock acquired"); + } else { + OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_DEBUG, "could not acquire exclusive lock"); + } +#endif + + return lock_state != 0; + } + + return ompi_osc_rdma_trylock_local ((int64_t *)(intptr_t) lock); +} + +/** + * ompi_osc_rdma_lock_acquire_exclusive: + * + * @param[in] module - osc/rdma module + * @param[in] peer - peer object + * @param[in] offset - offset into the remote peer's state segment + * + * @returns OMPI_SUCCESS on success or another ompi error code on failure + * + * This function obtains an exclusive lock at {offset} in a peer's state. + */ +static inline int ompi_osc_rdma_lock_acquire_exclusive (ompi_osc_rdma_module_t *module, ompi_osc_rdma_peer_t *peer, + ptrdiff_t offset) +{ + int ret; + + while (1 != (ret = ompi_osc_rdma_lock_try_acquire_exclusive (module, peer, offset))) { + ompi_osc_rdma_progress (module); + } + + return ret; +} + +/** + * ompi_osc_rdma_lock_release_exclusive: + * + * @param[in] peer - peer to unlock + * @param[in] offset - offset into the remote peer's state segment + * + * @returns OMPI_SUCCESS on success or another ompi error code on failure + * + * This function unlocks the lock at {offset} in the remote peer's state + * structure. It is illegal to call this function unless this process + * holds the lock. + */ +static inline int ompi_osc_rdma_lock_release_exclusive (ompi_osc_rdma_module_t *module, ompi_osc_rdma_peer_t *peer, + ptrdiff_t offset) +{ + uint64_t lock = (uint64_t) (intptr_t) peer->state + offset; + int ret = OMPI_SUCCESS; + + OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_DEBUG, "releasing exclusive lock %" PRIx64 " on peer %d", lock, peer->rank); + + if (!ompi_osc_rdma_peer_local_state (peer)) { + ret = ompi_osc_rdma_lock_btl_op (module, peer, lock, MCA_BTL_ATOMIC_ADD, -OMPI_OSC_RDMA_LOCK_EXCLUSIVE); + } else { + ompi_osc_rdma_unlock_local ((volatile ompi_osc_rdma_lock_t *)(intptr_t) lock); + } + + OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_DEBUG, "exclusive lock released"); + + return ret; +} + +#endif /* OMPI_OSC_RDMA_LOCK_H */ diff --git a/ompi/mca/osc/rdma/osc_rdma_module.c b/ompi/mca/osc/rdma/osc_rdma_module.c new file mode 100644 index 00000000000..5a8272e8700 --- /dev/null +++ b/ompi/mca/osc/rdma/osc_rdma_module.c @@ -0,0 +1,144 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ +/* + * Copyright (c) 2004-2005 The Trustees of Indiana University. + * All rights reserved. + * Copyright (c) 2004-2005 The Trustees of the University of Tennessee. + * All rights reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2007-2015 Los Alamos National Security, LLC. All rights + * reserved. + * Copyright (c) 2012-2013 Sandia National Laboratories. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include "osc_rdma.h" +#include "osc_rdma_lock.h" + +#include "mpi.h" + +int ompi_osc_module_add_peer (ompi_osc_rdma_module_t *module, ompi_osc_rdma_peer_t *peer) +{ + int ret = OMPI_SUCCESS; + + if (NULL == module->peer_array) { + ret = opal_hash_table_set_value_uint32 (&module->peer_hash, peer->rank, (void *) peer); + } else { + module->peer_array[peer->rank] = peer; + } + + return ret; +} + +int ompi_osc_rdma_free(ompi_win_t *win) +{ + int ret = OMPI_SUCCESS; + ompi_osc_rdma_module_t *module = GET_MODULE(win); + ompi_osc_rdma_peer_t *peer; + uint32_t key; + void *node; + + if (NULL == module) { + return OMPI_SUCCESS; + } + + if (NULL != module->comm) { + opal_output_verbose(1, ompi_osc_base_framework.framework_output, + "rdma component destroying window with id %d", + ompi_comm_get_cid(module->comm)); + + /* finish with a barrier */ + if (ompi_group_size(win->w_group) > 1) { + (void) module->comm->c_coll.coll_barrier (module->comm, + module->comm->c_coll.coll_barrier_module); + } + + /* remove from component information */ + OPAL_THREAD_LOCK(&mca_osc_rdma_component.lock); + opal_hash_table_remove_value_uint32(&mca_osc_rdma_component.modules, + ompi_comm_get_cid(module->comm)); + OPAL_THREAD_UNLOCK(&mca_osc_rdma_component.lock); + } + + win->w_osc_module = NULL; + + if (module->state) { + int region_count = module->state->region_count & 0xffffffffL; + if (NULL != module->dynamic_handles) { + for (int i = 0 ; i < region_count ; ++i) { + ompi_osc_rdma_deregister (module, module->dynamic_handles[i].btl_handle); + } + + free (module->dynamic_handles); + } + } + + OBJ_DESTRUCT(&module->outstanding_locks); + OBJ_DESTRUCT(&module->lock); + OBJ_DESTRUCT(&module->peer_lock); + OBJ_DESTRUCT(&module->all_sync); + + ompi_osc_rdma_deregister (module, module->state_handle); + ompi_osc_rdma_deregister (module, module->base_handle); + + OPAL_LIST_DESTRUCT(&module->pending_posts); + + if (NULL != module->rdma_frag) { + ompi_osc_rdma_deregister (module, module->rdma_frag->handle); + } + + /* remove all cached peers */ + if (NULL == module->peer_array) { + ret = opal_hash_table_get_first_key_uint32 (&module->peer_hash, &key, (void **) &peer, &node); + while (OPAL_SUCCESS == ret) { + OBJ_RELEASE(peer); + ret = opal_hash_table_get_next_key_uint32 (&module->peer_hash, &key, (void **) &peer, + node, &node); + } + + OBJ_DESTRUCT(&module->peer_hash); + } else { + for (int i = 0 ; i < ompi_comm_rank (module->comm) ; ++i) { + if (NULL != module->peer_array[i]) { + OBJ_RELEASE(module->peer_array[i]); + } + } + + free (module->peer_array); + } + + if (NULL != module->outstanding_lock_array) { + free (module->outstanding_lock_array); + } + + if (module->local_leaders && MPI_COMM_NULL != module->local_leaders) { + ompi_comm_free (&module->local_leaders); + } + + if (module->shared_comm && MPI_COMM_NULL != module->shared_comm) { + ompi_comm_free (&module->shared_comm); + } + + if (module->comm && MPI_COMM_NULL != module->comm) { + ompi_comm_free (&module->comm); + } + + if (NULL != module->free_after) { + free(module->free_after); + } + + if (module->segment_base) { + opal_shmem_segment_detach (&module->seg_ds); + module->segment_base = NULL; + } + + free (module); + + return OMPI_SUCCESS; +} diff --git a/ompi/mca/osc/rdma/osc_rdma_passive_target.c b/ompi/mca/osc/rdma/osc_rdma_passive_target.c new file mode 100644 index 00000000000..6358020f984 --- /dev/null +++ b/ompi/mca/osc/rdma/osc_rdma_passive_target.c @@ -0,0 +1,377 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ +/* + * Copyright (c) 2004-2005 The Trustees of Indiana University. + * All rights reserved. + * Copyright (c) 2004-2005 The Trustees of the University of Tennessee. + * All rights reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2007-2016 Los Alamos National Security, LLC. All rights + * reserved. + * Copyright (c) 2010 IBM Corporation. All rights reserved. + * Copyright (c) 2012-2013 Sandia National Laboratories. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include "ompi_config.h" + +#include "osc_rdma_passive_target.h" +#include "osc_rdma_comm.h" + +#include "mpi.h" + + +int ompi_osc_rdma_sync (struct ompi_win_t *win) +{ + ompi_osc_rdma_progress (GET_MODULE(win)); + return OMPI_SUCCESS; +} + +int ompi_osc_rdma_flush (int target, struct ompi_win_t *win) +{ + ompi_osc_rdma_module_t *module = GET_MODULE(win); + ompi_osc_rdma_sync_t *lock; + ompi_osc_rdma_peer_t *peer; + + assert (0 <= target); + + OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_TRACE, "flush: %d, %s", target, win->w_name); + + OPAL_THREAD_LOCK(&module->lock); + + lock = ompi_osc_rdma_module_sync_lookup (module, target, &peer); + if (OPAL_UNLIKELY(NULL == lock || OMPI_OSC_RDMA_SYNC_TYPE_LOCK != lock->type)) { + OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_INFO, "flush: target %d is not locked in window %s", + target, win->w_name); + OPAL_THREAD_UNLOCK(&module->lock); + return OMPI_ERR_RMA_SYNC; + } + OPAL_THREAD_UNLOCK(&module->lock); + + /* finish all outstanding fragments */ + ompi_osc_rdma_sync_rdma_complete (lock); + + OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_TRACE, "flush on target %d complete", target); + + return OMPI_SUCCESS; +} + + +int ompi_osc_rdma_flush_all (struct ompi_win_t *win) +{ + ompi_osc_rdma_module_t *module = GET_MODULE(win); + ompi_osc_rdma_sync_t *lock; + int ret = OMPI_SUCCESS; + uint32_t key; + void *node; + + /* flush is only allowed from within a passive target epoch */ + if (!ompi_osc_rdma_in_passive_epoch (module)) { + return OMPI_ERR_RMA_SYNC; + } + + OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_TRACE, "flush_all: %s", win->w_name); + + /* globally complete all outstanding rdma requests */ + if (OMPI_OSC_RDMA_SYNC_TYPE_LOCK == module->all_sync.type) { + ompi_osc_rdma_sync_rdma_complete (&module->all_sync); + } + + /* flush all locks */ + ret = opal_hash_table_get_first_key_uint32 (&module->outstanding_locks, &key, (void **) &lock, &node); + while (OPAL_SUCCESS == ret) { + OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_DEBUG, "flushing lock %p", (void *) lock); + ompi_osc_rdma_sync_rdma_complete (lock); + ret = opal_hash_table_get_next_key_uint32 (&module->outstanding_locks, &key, (void **) &lock, + node, &node); + } + + OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_TRACE, "flush_all complete"); + + return OPAL_SUCCESS; +} + + +int ompi_osc_rdma_flush_local (int target, struct ompi_win_t *win) +{ + return ompi_osc_rdma_flush (target, win); +} + + +int ompi_osc_rdma_flush_local_all (struct ompi_win_t *win) +{ + return ompi_osc_rdma_flush_all (win); +} + +/* locking via atomics */ +static inline int ompi_osc_rdma_lock_atomic_internal (ompi_osc_rdma_module_t *module, ompi_osc_rdma_peer_t *peer, + ompi_osc_rdma_sync_t *lock) +{ + int ret; + + if (MPI_LOCK_EXCLUSIVE == lock->sync.lock.type) { + do { + OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_DEBUG, "incrementing global exclusive lock"); + /* lock the master lock. this requires no rank has a global shared lock */ + ret = ompi_osc_rdma_lock_acquire_shared (module, module->leader, 1, offsetof (ompi_osc_rdma_state_t, global_lock), 0xffffffff00000000L); + if (OMPI_SUCCESS != ret) { + ompi_osc_rdma_progress (module); + continue; + } + + OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_DEBUG, "acquiring exclusive lock on peer"); + ret = ompi_osc_rdma_lock_try_acquire_exclusive (module, peer, offsetof (ompi_osc_rdma_state_t, local_lock)); + if (ret) { + /* release the global lock */ + ompi_osc_rdma_lock_release_shared (module, module->leader, -1, offsetof (ompi_osc_rdma_state_t, global_lock)); + ompi_osc_rdma_progress (module); + continue; + } + + peer->flags |= OMPI_OSC_RDMA_PEER_EXCLUSIVE; + break; + } while (1); + } else { + do { + /* go right to the target to acquire a shared lock */ + OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_DEBUG, "incrementing global shared lock"); + ret = ompi_osc_rdma_lock_acquire_shared (module, peer, 1, offsetof (ompi_osc_rdma_state_t, local_lock), + OMPI_OSC_RDMA_LOCK_EXCLUSIVE); + if (OMPI_SUCCESS == ret) { + return OMPI_SUCCESS; + } + + ompi_osc_rdma_progress (module); + } while (1); + } + + return OMPI_SUCCESS; +} + +static inline int ompi_osc_rdma_unlock_atomic_internal (ompi_osc_rdma_module_t *module, ompi_osc_rdma_peer_t *peer, + ompi_osc_rdma_sync_t *lock) +{ + if (MPI_LOCK_EXCLUSIVE == lock->sync.lock.type) { + OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_DEBUG, "releasing exclusive lock on peer"); + ompi_osc_rdma_lock_release_exclusive (module, peer, offsetof (ompi_osc_rdma_state_t, local_lock)); + OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_DEBUG, "decrementing global exclusive lock"); + ompi_osc_rdma_lock_release_shared (module, module->leader, -1, offsetof (ompi_osc_rdma_state_t, global_lock)); + peer->flags &= ~OMPI_OSC_RDMA_PEER_EXCLUSIVE; + } else { + OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_DEBUG, "decrementing global shared lock"); + ompi_osc_rdma_lock_release_shared (module, peer, -1, offsetof (ompi_osc_rdma_state_t, local_lock)); + } + + return OMPI_SUCCESS; +} + +int ompi_osc_rdma_lock_atomic (int lock_type, int target, int assert, ompi_win_t *win) +{ + ompi_osc_rdma_module_t *module = GET_MODULE(win); + ompi_osc_rdma_peer_t *peer = ompi_osc_rdma_module_peer (module, target); + ompi_osc_rdma_sync_t *lock; + int ret = OMPI_SUCCESS; + + OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_TRACE, "lock: %d, %d, %d, %s", lock_type, target, assert, win->w_name); + + if (module->no_locks) { + OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_INFO, "attempted to lock with no_locks set"); + return OMPI_ERR_RMA_SYNC; + } + + if (module->all_sync.epoch_active && (OMPI_OSC_RDMA_SYNC_TYPE_LOCK != module->all_sync.type || MPI_LOCK_EXCLUSIVE == lock_type)) { + /* impossible to get an exclusive lock while holding a global shared lock or in a active + * target access epoch */ + return OMPI_ERR_RMA_SYNC; + } + + /* clear the global sync object (in case MPI_Win_fence was called) */ + module->all_sync.type = OMPI_OSC_RDMA_SYNC_TYPE_NONE; + + /* create lock item */ + lock = ompi_osc_rdma_sync_allocate (module); + if (OPAL_UNLIKELY(NULL == lock)) { + return OMPI_ERR_OUT_OF_RESOURCE; + } + + lock->type = OMPI_OSC_RDMA_SYNC_TYPE_LOCK; + lock->sync.lock.target = target; + lock->sync.lock.type = lock_type; + lock->sync.lock.assert = assert; + + lock->peer_list.peer = peer; + lock->num_peers = 1; + OBJ_RETAIN(peer); + + if (0 == (assert & MPI_MODE_NOCHECK)) { + ret = ompi_osc_rdma_lock_atomic_internal (module, peer, lock); + } + + if (OPAL_LIKELY(OMPI_SUCCESS == ret)) { + ++module->passive_target_access_epoch; + + opal_atomic_wmb (); + + OPAL_THREAD_SCOPED_LOCK(&module->lock, ompi_osc_rdma_module_lock_insert (module, lock)); + } else { + OBJ_RELEASE(lock); + } + + OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_TRACE, "lock %d complete", target); + + return ret; +} + + +int ompi_osc_rdma_unlock_atomic (int target, ompi_win_t *win) +{ + ompi_osc_rdma_module_t *module = GET_MODULE(win); + ompi_osc_rdma_peer_t *peer; + ompi_osc_rdma_sync_t *lock; + int ret = OMPI_SUCCESS; + + OPAL_THREAD_LOCK(&module->lock); + + OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_TRACE, "unlock: %d, %s", target, win->w_name); + + lock = ompi_osc_rdma_module_lock_find (module, target, &peer); + if (OPAL_UNLIKELY(NULL == lock)) { + OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_INFO, "target %d is not locked in window %s", + target, win->w_name); + OPAL_THREAD_UNLOCK(&module->lock); + return OMPI_ERR_RMA_SYNC; + } + + ompi_osc_rdma_module_lock_remove (module, lock); + + /* finish all outstanding fragments */ + ompi_osc_rdma_sync_rdma_complete (lock); + + if (!(lock->sync.lock.assert & MPI_MODE_NOCHECK)) { + ret = ompi_osc_rdma_unlock_atomic_internal (module, peer, lock); + } + + /* release our reference to this peer */ + OBJ_RELEASE(peer); + + OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_TRACE, "unlock %d complete", target); + + --module->passive_target_access_epoch; + + opal_atomic_wmb (); + + OPAL_THREAD_UNLOCK(&module->lock); + + /* delete the lock */ + ompi_osc_rdma_sync_return (lock); + + return ret; +} + +int ompi_osc_rdma_lock_all_atomic (int assert, struct ompi_win_t *win) +{ + ompi_osc_rdma_module_t *module = GET_MODULE(win); + ompi_osc_rdma_sync_t *lock; + int ret = OMPI_SUCCESS; + + OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_TRACE, "lock_all: %d, %s", assert, win->w_name); + + if (module->no_locks) { + OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_INFO, "attempted to lock with no_locks set"); + return OMPI_ERR_RMA_SYNC; + } + + OPAL_THREAD_LOCK(&module->lock); + if (module->all_sync.epoch_active) { + OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_INFO, "attempted lock_all when active target epoch is %s " + "and lock all epoch is %s", + (OMPI_OSC_RDMA_SYNC_TYPE_LOCK != module->all_sync.type && module->all_sync.epoch_active) ? + "active" : "inactive", + (OMPI_OSC_RDMA_SYNC_TYPE_LOCK == module->all_sync.type) ? "active" : "inactive"); + OPAL_THREAD_UNLOCK(&module->lock); + return OMPI_ERR_RMA_SYNC; + } + + /* set up lock */ + lock = &module->all_sync; + + lock->type = OMPI_OSC_RDMA_SYNC_TYPE_LOCK; + lock->sync.lock.target = -1; + lock->sync.lock.type = MPI_LOCK_SHARED; + lock->sync.lock.assert = assert; + lock->num_peers = ompi_comm_size (module->comm); + + lock->epoch_active = true; + /* NTH: TODO -- like fence it might be a good idea to create an array to access all peers + * without having to access the hash table. Such a change would likely increase performance + * at the expense of memory usage. Ex. if a window has 1M peers then 8MB per process would + * be needed for this array. */ + + if (0 == (assert & MPI_MODE_NOCHECK)) { + /* increment the global shared lock */ + ret = ompi_osc_rdma_lock_acquire_shared (module, module->leader, 0x0000000100000000UL, + offsetof(ompi_osc_rdma_state_t, global_lock), + 0x00000000ffffffffUL); + } + + if (OPAL_LIKELY(OMPI_SUCCESS != ret)) { + lock->type = OMPI_OSC_RDMA_SYNC_TYPE_NONE; + lock->num_peers = 0; + lock->epoch_active = false; + } else { + ++module->passive_target_access_epoch; + } + + opal_atomic_wmb (); + + OPAL_THREAD_UNLOCK(&module->lock); + + OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_TRACE, "lock_all complete"); + + return ret; +} + +int ompi_osc_rdma_unlock_all_atomic (struct ompi_win_t *win) +{ + ompi_osc_rdma_module_t *module = GET_MODULE(win); + ompi_osc_rdma_sync_t *lock; + + OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_TRACE, "unlock_all: %s", win->w_name); + + OPAL_THREAD_LOCK(&module->lock); + + lock = &module->all_sync; + if (OMPI_OSC_RDMA_SYNC_TYPE_LOCK != lock->type) { + OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_INFO, "not locked in window %s", win->w_name); + OPAL_THREAD_UNLOCK(&module->lock); + return OMPI_ERR_RMA_SYNC; + } + + /* finish all outstanding fragments */ + ompi_osc_rdma_sync_rdma_complete (lock); + + if (0 == (lock->sync.lock.assert & MPI_MODE_NOCHECK)) { + /* decrement the master lock shared count */ + (void) ompi_osc_rdma_lock_release_shared (module, module->leader, -0x0000000100000000UL, offsetof (ompi_osc_rdma_state_t, global_lock)); + } + + lock->type = OMPI_OSC_RDMA_SYNC_TYPE_NONE; + lock->num_peers = 0; + lock->epoch_active = false; + + --module->passive_target_access_epoch; + + opal_atomic_wmb (); + + OPAL_THREAD_UNLOCK(&module->lock); + + OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_TRACE, "unlock_all complete"); + + return OMPI_SUCCESS; +} diff --git a/ompi/mca/osc/rdma/osc_rdma_passive_target.h b/ompi/mca/osc/rdma/osc_rdma_passive_target.h new file mode 100644 index 00000000000..ee4a1f91281 --- /dev/null +++ b/ompi/mca/osc/rdma/osc_rdma_passive_target.h @@ -0,0 +1,131 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ +/* + * Copyright (c) 2014-2015 Los Alamos National Security, LLC. All rights + * reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#if !defined(OSC_RDMA_PASSIVE_TARGET_H) +#define OSC_RDMA_PASSIVE_TARGET_H + +#include "osc_rdma.h" +#include "osc_rdma_sync.h" +#include "osc_rdma_lock.h" + +/** + * @brief lock the target in the window using network/cpu atomics + * + * @param[in] lock_type mpi lock type (MPI_LOCK_SHARED, MPI_LOCK_EXCLUSIVE) + * @param[in] target target process + * @param[in] assert asserts + * @param[in] win mpi window + * + * @returns OMPI_SUCCESS on success + * @returns OMPI_ERR_RMA_SYNC if there is a conflicting RMA epoch + */ +int ompi_osc_rdma_lock_atomic (int lock_type, int target, int assert, ompi_win_t *win); + +/** + * @brief unlock the target in the window using network/cpu atomics + * + * @param[in] target target process + * @param[in] win mpi window + * + * @returns OMPI_SUCCESS on success + * @returns OMPI_ERR_RMA_SYNC if the target is not locked + */ +int ompi_osc_rdma_unlock_atomic (int target, ompi_win_t *win); + +/** + * @brief lock all targets in window using network/cpu atomics + * + * @param[in] assert asserts + * @param[in] win mpi window + * + * @returns OMPI_SUCCESS on success + * @returns OMPI_ERR_RMA_SYNC if there is a conflicting RMA epoch + */ +int ompi_osc_rdma_lock_all_atomic (int assert, struct ompi_win_t *win); + +/** + * @brief unlock all targets in window using network/cpu atomics + * + * @param[in] assert asserts + * @param[in] win mpi window + * + * @returns OMPI_SUCCESS on success + * @returns OMPI_ERR_RMA_SYNC if the window is not in a lock all access epoch + */ +int ompi_osc_rdma_unlock_all_atomic (struct ompi_win_t *win); + +/** + * @brief synchronize the public and private copies of the window + * + * @param[in] win mpi window + * + * @returns OMPI_SUCCESS on success + * + * Just acts as a memory barrier since this module only supports a unified memory + * model. + */ +int ompi_osc_rdma_sync (struct ompi_win_t *win); + +/** + * @brief flush rdma transactions to a target + * + * @param[in] target target process + * @param[in] win mpi window + * + * @returns OMPI_SUCCESS on success + * @returns OMPI_ERR_RMA_SYNC if the target is not locked + */ +int ompi_osc_rdma_flush (int target, struct ompi_win_t *win); + +/** + * @brief flush rdma transactions to all target(s) + * + * @param[in] win mpi window + * + * @returns OMPI_SUCCESS on success + * @returns OMPI_ERR_RMA_SYNC if no processes are locked + * + * osc/rdma does not make a distinction between local and remote rma + * completion. this could change in a future release as small messages + * may be internally buffered. + */ +int ompi_osc_rdma_flush_all (struct ompi_win_t *win); + +/** + * @brief flush rdma transactions to a target (local completion) + * + * @param[in] target target process + * @param[in] win mpi window + * + * @returns OMPI_SUCCESS on success + * @returns OMPI_ERR_RMA_SYNC if the target is not locked + * + * osc/rdma does not make a distinction between local and remote rma + * completion. this could change in a future release as small messages + * may be internally buffered. + */ +int ompi_osc_rdma_flush_local (int target, struct ompi_win_t *win); + +/** + * @brief flush rdma transactions to all target(s) (local completion) + * + * @param[in] win mpi window + * + * @returns OMPI_SUCCESS on success + * @returns OMPI_ERR_RMA_SYNC if no processes are locked + * + * osc/rdma does not make a distinction between local and remote rma + * completion. this could change in a future release as small messages + * may be internally buffered. + */ +int ompi_osc_rdma_flush_local_all (struct ompi_win_t *win); + +#endif diff --git a/ompi/mca/osc/rdma/osc_rdma_peer.c b/ompi/mca/osc/rdma/osc_rdma_peer.c new file mode 100644 index 00000000000..7d7967ef66a --- /dev/null +++ b/ompi/mca/osc/rdma/osc_rdma_peer.c @@ -0,0 +1,341 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ +/* + * Copyright (c) 2007-2016 Los Alamos National Security, LLC. All rights + * reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. + * Copyright (c) 2016 Cisco Systems, Inc. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include "ompi_config.h" + +#ifdef HAVE_ALLOCA_H +#include +#endif + +#include "osc_rdma_comm.h" + +#include "ompi/mca/bml/base/base.h" + +#define NODE_ID_TO_RANK(module, peer_data, node_id) ((int)(peer_data)->len) + +/** + * @brief find the btl endpoint for a process + * + * @param[in] module osc rdma module + * @param[in] peer_id process rank in the module communicator + * + * @returns NULL on error + * @returns btl endpoint on success + */ +struct mca_btl_base_endpoint_t *ompi_osc_rdma_peer_btl_endpoint (struct ompi_osc_rdma_module_t *module, int peer_id) +{ + ompi_proc_t *proc = ompi_comm_peer_lookup (module->comm, peer_id); + mca_bml_base_endpoint_t *bml_endpoint; + int num_btls; + + /* for not just use the bml to get the btl endpoint */ + bml_endpoint = mca_bml_base_get_endpoint (proc); + + num_btls = mca_bml_base_btl_array_get_size (&bml_endpoint->btl_rdma); + + for (int btl_index = 0 ; btl_index < num_btls ; ++btl_index) { + if (bml_endpoint->btl_rdma.bml_btls[btl_index].btl == module->selected_btl) { + return bml_endpoint->btl_rdma.bml_btls[btl_index].btl_endpoint; + } + } + + /* very unlikely. if this happened the btl section process is broken */ + return NULL; +} + +int ompi_osc_rdma_new_peer (struct ompi_osc_rdma_module_t *module, int peer_id, ompi_osc_rdma_peer_t **peer_out) { + struct mca_btl_base_endpoint_t *endpoint; + ompi_osc_rdma_peer_t *peer; + + *peer_out = NULL; + + endpoint = ompi_osc_rdma_peer_btl_endpoint (module, peer_id); + if (OPAL_UNLIKELY(NULL == endpoint)) { + return OMPI_ERR_UNREACH; + } + + if (MPI_WIN_FLAVOR_DYNAMIC == module->flavor) { + peer = (ompi_osc_rdma_peer_t *) OBJ_NEW(ompi_osc_rdma_peer_dynamic_t); + } else if (module->same_size && module->same_disp_unit) { + /* use a smaller peer object when same_size and same_disp_unit are set */ + peer = (ompi_osc_rdma_peer_t *) OBJ_NEW(ompi_osc_rdma_peer_basic_t); + } else { + peer = (ompi_osc_rdma_peer_t *) OBJ_NEW(ompi_osc_rdma_peer_extended_t); + } + + peer->data_endpoint = endpoint; + peer->rank = peer_id; + + *peer_out = peer; + + return OMPI_SUCCESS; +} + +/** + * @brief finish initializing a peer object + * + * @param[in] module osc rdma module + * @param[in] peer peer object to set up + * + * This function reads the registration handle and state pointer from the peer that holds that data. If necessary + * it will then ready information about the peer from its state data structure. This information includes the + * displacement unit, base pointer, window size, and registation handle (if applicable). + */ +static int ompi_osc_rdma_peer_setup (ompi_osc_rdma_module_t *module, ompi_osc_rdma_peer_t *peer) +{ + ompi_osc_rdma_peer_extended_t *ex_peer = (ompi_osc_rdma_peer_extended_t *) peer; + uint64_t peer_data_size; + uint64_t peer_data_offset, array_pointer; + struct mca_btl_base_endpoint_t *array_endpoint; + ompi_osc_rdma_region_t *array_peer_data, *node_peer_data; + ompi_osc_rdma_rank_data_t rank_data; + int registration_handle_size = 0; + int node_id, node_rank, array_index; + int ret, disp_unit, comm_size; + char *peer_data; + + OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_DEBUG, "configuring peer for rank %d", peer->rank); + + if (module->selected_btl->btl_register_mem) { + registration_handle_size = module->selected_btl->btl_registration_handle_size; + } + + comm_size = ompi_comm_size (module->comm); + + /* each node is responsible for holding a part of the rank -> node/local rank mapping array. this code + * calculates the node and offset the mapping can be found. once the mapping has been read the state + * part of the peer structure can be initialized. */ + node_id = (peer->rank * module->node_count) / comm_size; + array_peer_data = (ompi_osc_rdma_region_t *) ((intptr_t) module->node_comm_info + node_id * module->region_size); + + /* the node leader rank is stored in the length field */ + node_rank = NODE_ID_TO_RANK(module, array_peer_data, node_id); + array_index = peer->rank % ((comm_size + module->node_count - 1) / module->node_count); + + array_pointer = array_peer_data->base + array_index * sizeof (rank_data); + + /* lookup the btl endpoint needed to retrieve the mapping */ + array_endpoint = ompi_osc_rdma_peer_btl_endpoint (module, node_rank); + if (OPAL_UNLIKELY(NULL == array_endpoint)) { + return OMPI_ERR_UNREACH; + } + + OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_DEBUG, "reading region data for %d from rank: %d, index: %d, pointer: 0x%" PRIx64 + ", size: %lu", peer->rank, node_rank, array_index, array_pointer, sizeof (rank_data)); + + ret = ompi_osc_get_data_blocking (module, array_endpoint, array_pointer, (mca_btl_base_registration_handle_t *) array_peer_data->btl_handle_data, + &rank_data, sizeof (rank_data)); + if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) { + return ret; + } + + /* initialize the state part of the peer object. NTH: for now the state data is for every node is stored on + * every node. this gives a good balance of code complexity and memory usage at this time. we take advantage + * of this by re-using the endpoint and pointer stored in the node_comm_info array. */ + node_peer_data = (ompi_osc_rdma_region_t *) ((intptr_t) module->node_comm_info + rank_data.node_id * module->region_size); + + peer->state = node_peer_data->base + module->state_offset + module->state_size * rank_data.rank; + + if (registration_handle_size) { + peer->state_handle = (mca_btl_base_registration_handle_t *) node_peer_data->btl_handle_data; + } + + peer->state_endpoint = ompi_osc_rdma_peer_btl_endpoint (module, NODE_ID_TO_RANK(module, node_peer_data, rank_data.node_id)); + if (OPAL_UNLIKELY(NULL == peer->state_endpoint)) { + return OPAL_ERR_UNREACH; + } + + /* nothing more to do for dynamic memory windows */ + if (MPI_WIN_FLAVOR_DYNAMIC == module->flavor) { + return OMPI_SUCCESS; + } + + /* read window data from the target rank */ + if (module->same_disp_unit) { + /* do not bother reading the displacement unit as it is already known */ + peer_data_offset = offsetof (ompi_osc_rdma_state_t, regions); + } else { + peer_data_offset = offsetof (ompi_osc_rdma_state_t, disp_unit); + } + + peer_data_size = module->state_size - peer_data_offset; + peer_data = alloca (peer_data_size); + + /* read window data from the end of the target's state structure */ + ret = ompi_osc_get_data_blocking (module, peer->state_endpoint, peer->state + peer_data_offset, peer->state_handle, + peer_data, peer_data_size); + if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) { + return ret; + } + + if (!module->same_disp_unit) { + /* unpack displacement */ + memcpy (&ex_peer->disp_unit, peer_data, sizeof (ex_peer->disp_unit)); + peer_data += offsetof (ompi_osc_rdma_state_t, regions) - offsetof (ompi_osc_rdma_state_t, disp_unit); + disp_unit = ex_peer->disp_unit; + } else { + disp_unit = module->disp_unit; + } + + ompi_osc_rdma_region_t *base_region = (ompi_osc_rdma_region_t *) peer_data; + + OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_DEBUG, "peer %d: remote base region: 0x%" PRIx64 ", size: %" PRId64 + ", flags: 0x%x, disp_unit: %d", peer->rank, base_region->base, base_region->len, + peer->flags, disp_unit); + + if (ompi_osc_rdma_peer_local_base (peer)) { + /* for now we store the local address in the standard place. do no overwrite it */ + return OMPI_SUCCESS; + } + + ex_peer->super.base = base_region->base; + + /* save size and base */ + if (!module->same_size) { + ex_peer->size = base_region->len; + } + + if (base_region->len) { + if (registration_handle_size) { + ex_peer->super.base_handle = malloc (registration_handle_size); + if (OPAL_UNLIKELY(NULL == ex_peer->super.base_handle)) { + return OMPI_ERR_OUT_OF_RESOURCE; + } + + peer->flags |= OMPI_OSC_RDMA_PEER_BASE_FREE; + + memcpy (ex_peer->super.base_handle, base_region->btl_handle_data, registration_handle_size); + } + + if (MPI_WIN_FLAVOR_ALLOCATE == module->flavor) { + ex_peer->super.super.data_endpoint = ex_peer->super.super.state_endpoint; + } + } + + return OMPI_SUCCESS; +} + +/** + * @brief lookup (or allocate) a peer for a rank (internal) + * + * @param[in] module osc rdma module + * @param[in] peer_id rank of remote peer (in module communicator) + * + * @returns peer object on success + * @returns NULL on error + * + * This is an internal function for looking up or allocating a peer object for a window rank. This + * function requires the peer lock to be held and is only expected to be called from itself or + * the ompi_osc_rdma_peer_lookup() helper function. + */ +static struct ompi_osc_rdma_peer_t *ompi_osc_rdma_peer_lookup_internal (struct ompi_osc_rdma_module_t *module, int peer_id) +{ + ompi_osc_rdma_peer_t *peer; + int ret; + + OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_DEBUG, "looking up peer data for rank %d", peer_id); + + peer = ompi_osc_module_get_peer (module, peer_id); + if (NULL != peer) { + return peer; + } + + ret = ompi_osc_rdma_new_peer (module, peer_id, &peer); + if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) { + return NULL; + } + + ret = ompi_osc_rdma_peer_setup (module, peer); + if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) { + OBJ_RELEASE(peer); + return NULL; + } + + ret = ompi_osc_module_add_peer (module, peer); + if (OPAL_SUCCESS != ret) { + /* out of memory */ + OBJ_RELEASE(peer); + return NULL; + } + + /* ensure the peer hash is updated before we drop the lock */ + opal_atomic_wmb (); + + return peer; +} + +struct ompi_osc_rdma_peer_t *ompi_osc_rdma_peer_lookup (struct ompi_osc_rdma_module_t *module, int peer_id) +{ + struct ompi_osc_rdma_peer_t *peer; + + opal_mutex_lock (&module->peer_lock); + peer = ompi_osc_rdma_peer_lookup_internal (module, peer_id); + opal_mutex_unlock (&module->peer_lock); + + return peer; +} + + +/******* peer objects *******/ + +static void ompi_osc_rdma_peer_construct (ompi_osc_rdma_peer_t *peer) +{ + memset ((char *) peer + sizeof (peer->super), 0, sizeof (*peer) - sizeof (peer->super)); +} + +static void ompi_osc_rdma_peer_destruct (ompi_osc_rdma_peer_t *peer) +{ + if (peer->state_handle && (peer->flags & OMPI_OSC_RDMA_PEER_STATE_FREE)) { + free (peer->state_handle); + } +} + +OBJ_CLASS_INSTANCE(ompi_osc_rdma_peer_t, opal_object_t, + ompi_osc_rdma_peer_construct, + ompi_osc_rdma_peer_destruct); + +static void ompi_osc_rdma_peer_basic_construct (ompi_osc_rdma_peer_basic_t *peer) +{ + memset ((char *) peer + sizeof (peer->super), 0, sizeof (*peer) - sizeof (peer->super)); +} + +static void ompi_osc_rdma_peer_basic_destruct (ompi_osc_rdma_peer_basic_t *peer) +{ + if (peer->base_handle && (peer->super.flags & OMPI_OSC_RDMA_PEER_BASE_FREE)) { + free (peer->base_handle); + } +} + +OBJ_CLASS_INSTANCE(ompi_osc_rdma_peer_basic_t, ompi_osc_rdma_peer_t, + ompi_osc_rdma_peer_basic_construct, + ompi_osc_rdma_peer_basic_destruct); + +OBJ_CLASS_INSTANCE(ompi_osc_rdma_peer_extended_t, ompi_osc_rdma_peer_basic_t, + NULL, NULL); + +static void ompi_osc_rdma_peer_dynamic_construct (ompi_osc_rdma_peer_dynamic_t *peer) +{ + memset ((char *) peer + sizeof (peer->super), 0, sizeof (*peer) - sizeof (peer->super)); +} + +static void ompi_osc_rdma_peer_dynamic_destruct (ompi_osc_rdma_peer_dynamic_t *peer) +{ + if (peer->regions) { + free (peer->regions); + } +} + +OBJ_CLASS_INSTANCE(ompi_osc_rdma_peer_dynamic_t, ompi_osc_rdma_peer_t, + ompi_osc_rdma_peer_dynamic_construct, + ompi_osc_rdma_peer_dynamic_destruct); diff --git a/ompi/mca/osc/rdma/osc_rdma_peer.h b/ompi/mca/osc/rdma/osc_rdma_peer.h new file mode 100644 index 00000000000..6716733a43a --- /dev/null +++ b/ompi/mca/osc/rdma/osc_rdma_peer.h @@ -0,0 +1,225 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ +/* + * Copyright (c) 2014-2015 Los Alamos National Security, LLC. All rights + * reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#ifndef OMPI_OSC_RDMA_PEER_H +#define OMPI_OSC_RDMA_PEER_H + +#include "osc_rdma_types.h" + +struct ompi_osc_rdma_module_t; + +/** + * @brief osc rdma peer object + * + * This object is used as a cache for information associated with a peer. + */ +struct ompi_osc_rdma_peer_t { + opal_object_t super; + + /** rdma data endpoint for this peer */ + struct mca_btl_base_endpoint_t *data_endpoint; + + /** endpoint for reading/modifying peer state */ + struct mca_btl_base_endpoint_t *state_endpoint; + + /** remote peer's state pointer */ + osc_rdma_base_t state; + + /** registration handle associated with the state */ + mca_btl_base_registration_handle_t *state_handle; + + /** rank of this peer in the window */ + int rank; + + /** peer flags */ + int flags; + + /** aggregation support */ + ompi_osc_rdma_aggregation_t *aggregate; +}; +typedef struct ompi_osc_rdma_peer_t ompi_osc_rdma_peer_t; + +/** + * @brief peer object used when using dynamic windows + */ +struct ompi_osc_rdma_peer_dynamic_t { + ompi_osc_rdma_peer_t super; + + /** last region id seen for this peer */ + uint32_t region_id; + + /** number of regions in the regions array */ + uint32_t region_count; + + /** cached array of attached regions for this peer */ + struct ompi_osc_rdma_region_t *regions; +}; + +typedef struct ompi_osc_rdma_peer_dynamic_t ompi_osc_rdma_peer_dynamic_t; + +/** + * @brief basic peer object for non-dynamic windows used when all peers + * have the same displacement unit and size + */ +struct ompi_osc_rdma_peer_basic_t { + ompi_osc_rdma_peer_t super; + + /** remote peer's base pointer */ + osc_rdma_base_t base; + + /** local pointer to peer's base */ + osc_rdma_base_t local_base; + + /** registration handle associated with the base */ + mca_btl_base_registration_handle_t *base_handle; +}; + +typedef struct ompi_osc_rdma_peer_basic_t ompi_osc_rdma_peer_basic_t; + +/** + * @brief peer object used when no assumption can be made about the + * peer's displacement unit or size + */ +struct ompi_osc_rdma_peer_extended_t { + ompi_osc_rdma_peer_basic_t super; + + /** remote peer's region size */ + osc_rdma_size_t size; + + /** displacement unit */ + int disp_unit; +}; + +typedef struct ompi_osc_rdma_peer_extended_t ompi_osc_rdma_peer_extended_t; + +/** + * @brief object class declarations + */ +OBJ_CLASS_DECLARATION(ompi_osc_rdma_peer_t); +OBJ_CLASS_DECLARATION(ompi_osc_rdma_peer_dynamic_t); +OBJ_CLASS_DECLARATION(ompi_osc_rdma_peer_basic_t); +OBJ_CLASS_DECLARATION(ompi_osc_rdma_peer_extended_t); + +/** + * @brief used to identify the node and local rank of a peer + */ +struct ompi_osc_rdma_rank_data_t { + /** index of none in none_comm_info array */ + unsigned int node_id; + /** local rank of process */ + unsigned int rank; +}; +typedef struct ompi_osc_rdma_rank_data_t ompi_osc_rdma_rank_data_t; + +enum { + /** peer is locked for exclusive access */ + OMPI_OSC_RDMA_PEER_EXCLUSIVE = 0x01, + /** peer's base is accessible with direct loads/stores */ + OMPI_OSC_RDMA_PEER_LOCAL_BASE = 0x02, + /** peer state is local */ + OMPI_OSC_RDMA_PEER_LOCAL_STATE = 0x04, + /** currently accumulating on peer */ + OMPI_OSC_RDMA_PEER_ACCUMULATING = 0x08, + /** peer is in an active access epoch (pscw) */ + OMPI_OSC_RDMA_PEER_ACCESS_ACTIVE_EPOCH = 0x10, + /** peer state handle should be freed */ + OMPI_OSC_RDMA_PEER_STATE_FREE = 0x20, + /** peer base handle should be freed */ + OMPI_OSC_RDMA_PEER_BASE_FREE = 0x40, +}; + +/** + * @brief allocate a peer object and initialize some of it structures + * + * @param[in] module osc rdma module + * @param[in] peer_id peer's rank in the communicator + * @param[out] peer_out new peer object + * + * The type of the object returned depends on the window settings. For example for a dynamic window + * this will return a peer of type \ref ompi_osc_rdma_peer_dynamic_t. + */ +int ompi_osc_rdma_new_peer (struct ompi_osc_rdma_module_t *module, int peer_id, ompi_osc_rdma_peer_t **peer_out); + +/** + * @brief lookup (or allocate) a peer + * + * @param[in] module osc rdma module + * @param[in] peer_id peer's rank in the communicator + * + * This function is used by the ompi_osc_rdma_module_peer() inline function to allocate a peer object. It is not + * intended to be called from anywhere else. + */ +struct ompi_osc_rdma_peer_t *ompi_osc_rdma_peer_lookup (struct ompi_osc_rdma_module_t *module, int peer_id); + +/** + * @brief flush queued aggregated operation + * + * @param[in] peer osc rdma peer + */ +int ompi_osc_rdma_peer_aggregate_flush (ompi_osc_rdma_peer_t *peer); + +/** + * @brief lookup the btl endpoint for a peer + * + * @param[in] module osc rdma module + * @param[in] peer_id peer's rank in the communicator + * + * @returns btl endpoint for the peer on success + * @returns NULL on failure + */ +struct mca_btl_base_endpoint_t *ompi_osc_rdma_peer_btl_endpoint (struct ompi_osc_rdma_module_t *module, int peer_id); + +/** + * @brief check if this process holds an exclusive lock on a peer + * + * @param[in] peer peer object to check + */ +static inline bool ompi_osc_rdma_peer_is_exclusive (ompi_osc_rdma_peer_t *peer) +{ + return !!(peer->flags & OMPI_OSC_RDMA_PEER_EXCLUSIVE); +} + +/** + * @brief check if this process is currently accumulating on a peer + * + * @param[in] peer peer object to check + */ +static inline bool ompi_osc_rdma_peer_is_accumulating (ompi_osc_rdma_peer_t *peer) +{ + return !!(peer->flags & OMPI_OSC_RDMA_PEER_ACCUMULATING); +} + +/** + * @brief check if the peer's base pointer is local to this process + * + * @param[in] peer peer object to check + */ +static inline bool ompi_osc_rdma_peer_local_base (ompi_osc_rdma_peer_t *peer) +{ + return !!(peer->flags & OMPI_OSC_RDMA_PEER_LOCAL_BASE); +} + +/** + * @brief check if the peer's state pointer is local to this process + * + * @param[in] peer peer object to check + * + * The OMPI_OSC_RDMA_PEER_LOCAL_STATE flag will only be set if either 1) we + * will not be mixing btl atomics and cpu atomics, or 2) it is safe to mix + * btl and cpu atomics. + */ +static inline bool ompi_osc_rdma_peer_local_state (ompi_osc_rdma_peer_t *peer) +{ + return !!(peer->flags & OMPI_OSC_RDMA_PEER_LOCAL_STATE); +} + + +#endif /* OMPI_OSC_RDMA_PEER_H */ diff --git a/ompi/mca/osc/rdma/osc_rdma_request.c b/ompi/mca/osc/rdma/osc_rdma_request.c new file mode 100644 index 00000000000..625b4d380ed --- /dev/null +++ b/ompi/mca/osc/rdma/osc_rdma_request.c @@ -0,0 +1,80 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ +/* + * Copyright (c) 2011-2012 Sandia National Laboratories. All rights reserved. + * Copyright (c) 2014-2015 Los Alamos National Security, LLC. All rights + * reserved. + * Copyright (c) 2016 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include "ompi_config.h" + +#include "ompi/request/request.h" +#include "ompi/mca/osc/osc.h" +#include "ompi/mca/osc/base/base.h" +#include "ompi/mca/osc/base/osc_base_obj_convert.h" + +#include "osc_rdma.h" +#include "osc_rdma_request.h" + +static int request_cancel(struct ompi_request_t *request, int complete) +{ + return MPI_ERR_REQUEST; +} + +static int request_free(struct ompi_request_t **ompi_req) +{ + ompi_osc_rdma_request_t *request = + (ompi_osc_rdma_request_t*) *ompi_req; + + if( REQUEST_COMPLETE(&request->super) ) { + return MPI_ERR_REQUEST; + } + + OMPI_OSC_RDMA_REQUEST_RETURN(request); + + *ompi_req = MPI_REQUEST_NULL; + + return OMPI_SUCCESS; +} + +static int request_complete (struct ompi_request_t *request) +{ + ompi_osc_rdma_request_t *parent_request = ((ompi_osc_rdma_request_t *) request)->parent_request; + + if (parent_request && 0 == OPAL_THREAD_ADD32 (&parent_request->outstanding_requests, -1)) { + ompi_osc_rdma_request_complete (parent_request, OMPI_SUCCESS); + } + + return OMPI_SUCCESS; +} + +static void request_construct(ompi_osc_rdma_request_t *request) +{ + request->super.req_type = OMPI_REQUEST_WIN; + request->super.req_status._cancelled = 0; + request->super.req_free = request_free; + request->super.req_cancel = request_cancel; + request->super.req_complete_cb = request_complete; + request->parent_request = NULL; + request->buffer = NULL; + request->internal = false; + request->outstanding_requests = 0; + OBJ_CONSTRUCT(&request->convertor, opal_convertor_t); +} + +static void request_destruct(ompi_osc_rdma_request_t *request) +{ + OBJ_DESTRUCT(&request->convertor); +} + +OBJ_CLASS_INSTANCE(ompi_osc_rdma_request_t, + ompi_request_t, + request_construct, + request_destruct); diff --git a/ompi/mca/osc/rdma/osc_rdma_request.h b/ompi/mca/osc/rdma/osc_rdma_request.h new file mode 100644 index 00000000000..3cec365a7aa --- /dev/null +++ b/ompi/mca/osc/rdma/osc_rdma_request.h @@ -0,0 +1,111 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ +/* + * Copyright (c) 2012 Sandia National Laboratories. All rights reserved. + * Copyright (c) 2014-2015 Los Alamos National Security, LLC. All rights + * reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#ifndef OMPI_OSC_RDMA_REQUEST_H +#define OMPI_OSC_RDMA_REQUEST_H + +#include "osc_rdma.h" + +enum ompi_osc_rdma_request_type_t { + OMPI_OSC_RDMA_TYPE_GET, + OMPI_OSC_RDMA_TYPE_PUT, + OMPI_OSC_RDMA_TYPE_RDMA, + OMPI_OSC_RDMA_TYPE_ACC, + OMPI_OSC_RDMA_TYPE_GET_ACC, + OMPI_OSC_RDMA_TYPE_CSWAP, +}; +typedef enum ompi_osc_rdma_request_type_t ompi_osc_rdma_request_type_t; + +struct ompi_osc_rdma_request_t { + ompi_request_t super; + + ompi_osc_rdma_peer_t *peer; + + ompi_osc_rdma_request_type_t type; + void *origin_addr; + int origin_count; + struct ompi_datatype_t *origin_dt; + + void *result_addr; + int result_count; + struct ompi_datatype_t *result_dt; + + const void *compare_addr; + + ompi_op_t *op; + + ompi_osc_rdma_module_t *module; + int32_t outstanding_requests; + bool internal; + + ptrdiff_t offset; + size_t len; + void *ctx; + void *frag; + + uint64_t target_address; + + struct ompi_osc_rdma_request_t *parent_request; + /* used for non-contiguous get accumulate operations */ + opal_convertor_t convertor; + + /** synchronization object */ + struct ompi_osc_rdma_sync_t *sync; + void *buffer; +}; +typedef struct ompi_osc_rdma_request_t ompi_osc_rdma_request_t; +OBJ_CLASS_DECLARATION(ompi_osc_rdma_request_t); + +/* REQUEST_ALLOC is only called from "top-level" functions (rdma_rput, + rdma_rget, etc.), so it's ok to spin here... */ +#define OMPI_OSC_RDMA_REQUEST_ALLOC(rmodule, rpeer, req) \ + do { \ + opal_free_list_item_t *item; \ + do { \ + item = opal_free_list_get (&mca_osc_rdma_component.requests); \ + if (NULL == item) { \ + ompi_osc_rdma_progress (rmodule); \ + } \ + } while (NULL == item); \ + req = (ompi_osc_rdma_request_t*) item; \ + OMPI_REQUEST_INIT(&req->super, false); \ + req->super.req_mpi_object.win = module->win; \ + req->super.req_state = OMPI_REQUEST_ACTIVE; \ + req->module = rmodule; \ + req->peer = (rpeer); \ + } while (0) + +#define OMPI_OSC_RDMA_REQUEST_RETURN(req) \ + do { \ + OMPI_REQUEST_FINI(&(req)->super); \ + free ((req)->buffer); \ + (req)->buffer = NULL; \ + (req)->parent_request = NULL; \ + (req)->internal = false; \ + (req)->outstanding_requests = 0; \ + opal_free_list_return (&mca_osc_rdma_component.requests, \ + (opal_free_list_item_t *) (req)); \ + } while (0) + +static inline void ompi_osc_rdma_request_complete (ompi_osc_rdma_request_t *request, int mpi_error) +{ + if (!request->internal) { + request->super.req_status.MPI_ERROR = mpi_error; + + /* mark the request complete at the mpi level */ + ompi_request_complete (&request->super, true); + } else { + OMPI_OSC_RDMA_REQUEST_RETURN (request); + } +} + +#endif /* OMPI_OSC_RDMA_REQUEST_H */ diff --git a/ompi/mca/osc/rdma/osc_rdma_sync.c b/ompi/mca/osc/rdma/osc_rdma_sync.c new file mode 100644 index 00000000000..dca7e328d89 --- /dev/null +++ b/ompi/mca/osc/rdma/osc_rdma_sync.c @@ -0,0 +1,83 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ +/* + * Copyright (c) 2015 Los Alamos National Security, LLC. All rights + * reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include "osc_rdma.h" +#include "osc_rdma_sync.h" + +static void ompi_osc_rdma_sync_constructor (ompi_osc_rdma_sync_t *rdma_sync) +{ + rdma_sync->type = OMPI_OSC_RDMA_SYNC_TYPE_NONE; + rdma_sync->epoch_active = false; + rdma_sync->outstanding_rdma = 0; + OBJ_CONSTRUCT(&rdma_sync->aggregations, opal_list_t); + OBJ_CONSTRUCT(&rdma_sync->lock, opal_mutex_t); +} + +static void ompi_osc_rdma_sync_destructor (ompi_osc_rdma_sync_t *rdma_sync) +{ + OBJ_DESTRUCT(&rdma_sync->aggregations); + OBJ_DESTRUCT(&rdma_sync->lock); +} + +OBJ_CLASS_INSTANCE(ompi_osc_rdma_sync_t, opal_object_t, ompi_osc_rdma_sync_constructor, + ompi_osc_rdma_sync_destructor); + +ompi_osc_rdma_sync_t *ompi_osc_rdma_sync_allocate (struct ompi_osc_rdma_module_t *module) +{ + ompi_osc_rdma_sync_t *rdma_sync; + + rdma_sync = OBJ_NEW (ompi_osc_rdma_sync_t); + if (OPAL_UNLIKELY(NULL == rdma_sync)) { + return NULL; + } + + rdma_sync->module = module; + return rdma_sync; +} + +void ompi_osc_rdma_sync_return (ompi_osc_rdma_sync_t *rdma_sync) +{ + OBJ_RELEASE(rdma_sync); +} + +static inline bool ompi_osc_rdma_sync_array_peer (int rank, ompi_osc_rdma_peer_t **peers, size_t nranks, + struct ompi_osc_rdma_peer_t **peer) +{ + int mid = nranks / 2; + + /* base cases */ + if (0 == nranks || (1 == nranks && peers[0]->rank != rank)) { + *peer = NULL; + return false; + } else if (peers[0]->rank == rank) { + *peer = peers[0]; + return true; + } + + if (peers[mid]->rank > rank) { + return ompi_osc_rdma_sync_array_peer (rank, peers, mid, peer); + } + + return ompi_osc_rdma_sync_array_peer (rank, peers + mid, nranks - mid, peer); +} + +bool ompi_osc_rdma_sync_pscw_peer (ompi_osc_rdma_module_t *module, int target, struct ompi_osc_rdma_peer_t **peer) +{ + ompi_osc_rdma_sync_t *rdma_sync = &module->all_sync; + + /* check synchronization type */ + if (OMPI_OSC_RDMA_SYNC_TYPE_PSCW != rdma_sync->type) { + *peer = NULL; + return false; + } + + return ompi_osc_rdma_sync_array_peer (target, rdma_sync->peer_list.peers, rdma_sync->num_peers, peer); +} diff --git a/ompi/mca/osc/rdma/osc_rdma_sync.h b/ompi/mca/osc/rdma/osc_rdma_sync.h new file mode 100644 index 00000000000..c4ffbbd4c3c --- /dev/null +++ b/ompi/mca/osc/rdma/osc_rdma_sync.h @@ -0,0 +1,158 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ +/* + * Copyright (c) 2015 Los Alamos National Security, LLC. All rights + * reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#if !defined(OSC_RDMA_SYNC_H) +#define OSC_RDMA_SYNC_H + +#include "osc_rdma_types.h" +#include "opal/class/opal_object.h" +#include "opal/threads/threads.h" + +/** + * @brief synchronization types + */ +enum ompi_osc_rdma_sync_type_t { + /** default value */ + OMPI_OSC_RDMA_SYNC_TYPE_NONE, + /** lock access epoch */ + OMPI_OSC_RDMA_SYNC_TYPE_LOCK, + /** fence access epoch */ + OMPI_OSC_RDMA_SYNC_TYPE_FENCE, + /* post-start-complete-wait access epoch */ + OMPI_OSC_RDMA_SYNC_TYPE_PSCW, +}; +typedef enum ompi_osc_rdma_sync_type_t ompi_osc_rdma_sync_type_t; + +struct ompi_osc_rdma_module_t; + +/** + * @brief synchronization object + * + * This structure holds information about an access epoch. + */ +struct ompi_osc_rdma_sync_t { + opal_object_t super; + + /** osc rdma module */ + struct ompi_osc_rdma_module_t *module; + + /** synchronization type */ + ompi_osc_rdma_sync_type_t type; + + /** synchronization data */ + union { + /** lock specific synchronization data */ + struct { + /** lock target rank (-1 for all) */ + int target; + + /** lock type: MPI_LOCK_SHARED, MPI_LOCK_EXCLUSIVE */ + int16_t type; + + /** assert specified at lock acquire time. at this time Open MPI + * only uses 5-bits for asserts. if this number goes over 16 this + * will need to be changed to accomodate. */ + int16_t assert; + } lock; + + /** post/start/complete/wait specific synchronization data */ + struct { + /** group passed to ompi_osc_rdma_start */ + ompi_group_t *group; + } pscw; + } sync; + + /** array of peers for this sync */ + union { + /** multiple peers (lock all, pscw, fence) */ + struct ompi_osc_rdma_peer_t **peers; + /** single peer (targeted lock) */ + struct ompi_osc_rdma_peer_t *peer; + } peer_list; + + /** number of peers */ + int num_peers; + + /** communication has started on this epoch */ + bool epoch_active; + + /** outstanding rdma operations on epoch */ + osc_rdma_counter_t outstanding_rdma; + + /** aggregated operations in this epoch */ + opal_list_t aggregations; + + /** lock to protect sync structure members */ + opal_mutex_t lock; +}; +typedef struct ompi_osc_rdma_sync_t ompi_osc_rdma_sync_t; + +OBJ_CLASS_DECLARATION(ompi_osc_rdma_sync_t); + +/** + * @brief allocate a new synchronization object + * + * @param[in] module osc rdma module + * + * @returns NULL on failure + * @returns a new synchronization object on success + */ +ompi_osc_rdma_sync_t *ompi_osc_rdma_sync_allocate (struct ompi_osc_rdma_module_t *module); + +/** + * @brief release a synchronization object + * + * @param[in] rdma_sync synchronization object allocated by ompi_osc_rdma_sync_allocate() + */ +void ompi_osc_rdma_sync_return (ompi_osc_rdma_sync_t *rdma_sync); + +/** + * Check if the target is part of a PSCW access epoch + * + * @param[in] module osc rdma module + * @param[in] target target rank + * @param[out] peer peer object + * + * @returns false if the window is not in a PSCW access epoch or the peer is not + * in the group passed to MPI_Win_start + * @returns true otherwise + * + * This functions verifies the target is part of an active PSCW access epoch. + */ +bool ompi_osc_rdma_sync_pscw_peer (struct ompi_osc_rdma_module_t *module, int target, struct ompi_osc_rdma_peer_t **peer); + +/** + * @brief increment the outstanding rdma operation counter (atomic) + * + * @param[in] rdma_sync osc rdma synchronization object + */ +static inline void ompi_osc_rdma_sync_rdma_inc (ompi_osc_rdma_sync_t *rdma_sync) +{ + ompi_osc_rdma_counter_add (&rdma_sync->outstanding_rdma, 1); + + OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_INFO, "inc: there are %ld outstanding rdma operations", + (unsigned long) rdma_sync->outstanding_rdma); +} + +/** + * @brief decrement the outstanding rdma operation counter (atomic) + * + * @param[in] rdma_sync osc rdma synchronization object + */ +static inline void ompi_osc_rdma_sync_rdma_dec (ompi_osc_rdma_sync_t *rdma_sync) +{ + ompi_osc_rdma_counter_add (&rdma_sync->outstanding_rdma, -1); + + OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_INFO, "dec: there are %ld outstanding rdma operations", + (unsigned long) rdma_sync->outstanding_rdma); +} + +#endif /* OSC_RDMA_SYNC_H */ diff --git a/ompi/mca/osc/rdma/osc_rdma_types.h b/ompi/mca/osc/rdma/osc_rdma_types.h new file mode 100644 index 00000000000..123238d0209 --- /dev/null +++ b/ompi/mca/osc/rdma/osc_rdma_types.h @@ -0,0 +1,210 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ +/* + * Copyright (c) 2014-2015 Los Alamos National Security, LLC. All rights + * reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#ifndef OMPI_OSC_RDMA_TYPES_H +#define OMPI_OSC_RDMA_TYPES_H + +#include "ompi_config.h" + +/* forward declarations of some other component types */ +struct ompi_osc_rdma_frag_t; +struct ompi_osc_rdma_sync_t; +struct ompi_osc_rdma_peer_t; + +#if OPAL_HAVE_ATOMIC_MATH_64 + +typedef int64_t osc_rdma_base_t; +typedef int64_t osc_rdma_size_t; +typedef int64_t osc_rdma_counter_t; + +#define ompi_osc_rdma_counter_add opal_atomic_add_64 + +#else + +typedef int32_t osc_rdma_base_t; +typedef int32_t osc_rdma_size_t; +typedef int32_t osc_rdma_counter_t; + +#define ompi_osc_rdma_counter_add opal_atomic_add_32 + +#endif + +#if OPAL_HAVE_ATOMIC_MATH_64 + +#define OMPI_OSC_RDMA_LOCK_EXCLUSIVE 0x8000000000000000l + +typedef int64_t ompi_osc_rdma_lock_t; + +static inline int64_t ompi_osc_rdma_lock_add (volatile int64_t *p, int64_t value) +{ + int64_t new; + + opal_atomic_mb (); + new = opal_atomic_add_64 (p, value) - value; + opal_atomic_mb (); + + return new; +} + +static inline int ompi_osc_rdma_lock_cmpset (volatile int64_t *p, int64_t comp, int64_t value) +{ + int ret; + + opal_atomic_mb (); + ret = opal_atomic_cmpset_64 (p, comp, value); + opal_atomic_mb (); + + return ret; +} + +#else + +#define OMPI_OSC_RDMA_LOCK_EXCLUSIVE 0x80000000l + +typedef int32_t ompi_osc_rdma_lock_t; + +static inline int32_t ompi_osc_rdma_lock_add (volatile int32_t *p, int32_t value) +{ + int32_t new; + + opal_atomic_mb (); + /* opal_atomic_add_32 differs from normal atomics in that is returns the new value */ + new = opal_atomic_add_32 (p, value) - value; + opal_atomic_mb (); + + return new; +} + +static inline int ompi_osc_rdma_lock_cmpset (volatile int32_t *p, int32_t comp, int32_t value) +{ + int ret; + + opal_atomic_mb (); + ret = opal_atomic_cmpset_32 (p, comp, value); + opal_atomic_mb (); + + return ret; +} + +#endif /* OPAL_HAVE_ATOMIC_MATH_64 */ + +/** + * @brief structure describing a window memory region + */ +struct ompi_osc_rdma_region_t { + /** base of the region */ + osc_rdma_base_t base; + /** length (in bytes) of the region */ + osc_rdma_size_t len; + /** BTL segment for the region (may be empty) */ + unsigned char btl_handle_data[]; +}; +typedef struct ompi_osc_rdma_region_t ompi_osc_rdma_region_t; + +/** + * @brief data handle for dynamic memory regions + * + * This structure holds the btl handle (if one exists) and the + * reference count for a dynamically attached region. The reference + * count is used to keep track of the number of times a memory + * region associated with a page (or set of pages) has been attached. + */ +struct ompi_osc_rdma_handle_t { + /** btl handle for the memory region */ + mca_btl_base_registration_handle_t *btl_handle; + /** number of attaches assocated with this region */ + int refcnt; +}; +typedef struct ompi_osc_rdma_handle_t ompi_osc_rdma_handle_t; + +/** + * @brief number of state buffers that can be used for storing + * post messages. + * + * This value was chosen because post exposure epochs are expected to be + * small relative to the size of the communicator. The value is constant + * and not exposed as an MCA variable to keep the layout of the + * \ref ompi_osc_rdma_state_t structure simple. + */ +#define OMPI_OSC_RDMA_POST_PEER_MAX 32 + +/** + * @brief window state structure + * + * This structure holds the information relevant to the window state + * of a peer. The structure synchronization data and includes useful + * information that can be remotely read by other peers in the window. + */ +struct ompi_osc_rdma_state_t { + /** used when rdma is in use to handle excusive locks and global shared locks (lock_all) */ + ompi_osc_rdma_lock_t global_lock; + /** lock state for this node. the top bit indicates if a exclusive lock exists and the + * remaining bits count the number of shared locks */ + ompi_osc_rdma_lock_t local_lock; + /** lock for the accumulate state to ensure ordering and consistency */ + ompi_osc_rdma_lock_t accumulate_lock; + /** current index to post to. compare-and-swap must be used to ensure + * the index is free */ + osc_rdma_counter_t post_index; + /** post buffers */ + osc_rdma_counter_t post_peers[OMPI_OSC_RDMA_POST_PEER_MAX]; + /** counter for number of post messages received */ + osc_rdma_counter_t num_post_msgs; + /** counter for number of complete messages received */ + osc_rdma_counter_t num_complete_msgs; + /** lock for the region state to ensure consistency */ + ompi_osc_rdma_lock_t regions_lock; + /** displacement unit for this process */ + int64_t disp_unit; + /** number of attached regions. this count will be 1 in non-dynamic regions */ + osc_rdma_counter_t region_count; + /** attached memory regions */ + unsigned char regions[]; +}; +typedef struct ompi_osc_rdma_state_t ompi_osc_rdma_state_t; + +struct ompi_osc_rdma_aggregation_t { + opal_list_item_t super; + + /** associated peer */ + struct ompi_osc_rdma_peer_t *peer; + + /** aggregation buffer frag */ + struct ompi_osc_rdma_frag_t *frag; + + /** synchronization object */ + struct ompi_osc_rdma_sync_t *sync; + + /** aggregation buffer */ + char *buffer; + + /** target for the operation */ + osc_rdma_base_t target_address; + + /** handle for target memory address */ + mca_btl_base_registration_handle_t *target_handle; + + /** buffer size */ + size_t buffer_size; + + /** buffer used */ + size_t buffer_used; + + /** type */ + int type; +}; +typedef struct ompi_osc_rdma_aggregation_t ompi_osc_rdma_aggregation_t; + +OBJ_CLASS_DECLARATION(ompi_osc_rdma_aggregation_t); + +#define OSC_RDMA_VERBOSE(x, ...) OPAL_OUTPUT_VERBOSE((x, ompi_osc_base_framework.framework_output, __VA_ARGS__)) + +#endif /* OMPI_OSC_RDMA_TYPES_H */ diff --git a/ompi/mca/osc/rdma/owner.txt b/ompi/mca/osc/rdma/owner.txt index c766c3ebc75..48ac538cbb0 100644 --- a/ompi/mca/osc/rdma/owner.txt +++ b/ompi/mca/osc/rdma/owner.txt @@ -3,5 +3,5 @@ # owner: institution that is responsible for this package # status: e.g. active, maintenance, unmaintained # -owner: ? -status: dead? +owner: LANL +status: active diff --git a/ompi/mca/osc/sm/Makefile.am b/ompi/mca/osc/sm/Makefile.am index 5b648e5d9a0..8a5a8284d2c 100644 --- a/ompi/mca/osc/sm/Makefile.am +++ b/ompi/mca/osc/sm/Makefile.am @@ -1,9 +1,9 @@ # # Copyright (c) 2011 Sandia National Laboratories. All rights reserved. # $COPYRIGHT$ -# +# # Additional copyrights may follow -# +# # $HEADER$ # diff --git a/ompi/mca/osc/sm/osc_sm.h b/ompi/mca/osc/sm/osc_sm.h index 2e5e254f4c3..7c058465b07 100644 --- a/ompi/mca/osc/sm/osc_sm.h +++ b/ompi/mca/osc/sm/osc_sm.h @@ -38,7 +38,6 @@ struct ompi_osc_sm_lock_t { typedef struct ompi_osc_sm_lock_t ompi_osc_sm_lock_t; struct ompi_osc_sm_node_state_t { - int32_t post_count; int32_t complete_count; ompi_osc_sm_lock_t lock; opal_atomic_lock_t accumulate_lock; @@ -81,17 +80,20 @@ struct ompi_osc_sm_module_t { ompi_osc_sm_global_state_t *global_state; ompi_osc_sm_node_state_t *my_node_state; ompi_osc_sm_node_state_t *node_states; + uint64_t **posts; + + opal_mutex_t lock; }; typedef struct ompi_osc_sm_module_t ompi_osc_sm_module_t; int ompi_osc_sm_shared_query(struct ompi_win_t *win, int rank, size_t *size, int *disp_unit, void *baseptr); int ompi_osc_sm_attach(struct ompi_win_t *win, void *base, size_t len); -int ompi_osc_sm_detach(struct ompi_win_t *win, void *base); +int ompi_osc_sm_detach(struct ompi_win_t *win, const void *base); int ompi_osc_sm_free(struct ompi_win_t *win); -int ompi_osc_sm_put(void *origin_addr, +int ompi_osc_sm_put(const void *origin_addr, int origin_count, struct ompi_datatype_t *origin_dt, int target, @@ -109,7 +111,7 @@ int ompi_osc_sm_get(void *origin_addr, struct ompi_datatype_t *target_dt, struct ompi_win_t *win); -int ompi_osc_sm_accumulate(void *origin_addr, +int ompi_osc_sm_accumulate(const void *origin_addr, int origin_count, struct ompi_datatype_t *origin_dt, int target, @@ -119,15 +121,15 @@ int ompi_osc_sm_accumulate(void *origin_addr, struct ompi_op_t *op, struct ompi_win_t *win); -int ompi_osc_sm_compare_and_swap(void *origin_addr, - void *compare_addr, +int ompi_osc_sm_compare_and_swap(const void *origin_addr, + const void *compare_addr, void *result_addr, struct ompi_datatype_t *dt, int target, OPAL_PTRDIFF_TYPE target_disp, struct ompi_win_t *win); -int ompi_osc_sm_fetch_and_op(void *origin_addr, +int ompi_osc_sm_fetch_and_op(const void *origin_addr, void *result_addr, struct ompi_datatype_t *dt, int target, @@ -135,7 +137,7 @@ int ompi_osc_sm_fetch_and_op(void *origin_addr, struct ompi_op_t *op, struct ompi_win_t *win); -int ompi_osc_sm_get_accumulate(void *origin_addr, +int ompi_osc_sm_get_accumulate(const void *origin_addr, int origin_count, struct ompi_datatype_t *origin_datatype, void *result_addr, @@ -148,7 +150,7 @@ int ompi_osc_sm_get_accumulate(void *origin_addr, struct ompi_op_t *op, struct ompi_win_t *win); -int ompi_osc_sm_rput(void *origin_addr, +int ompi_osc_sm_rput(const void *origin_addr, int origin_count, struct ompi_datatype_t *origin_dt, int target, @@ -168,7 +170,7 @@ int ompi_osc_sm_rget(void *origin_addr, struct ompi_win_t *win, struct ompi_request_t **request); -int ompi_osc_sm_raccumulate(void *origin_addr, +int ompi_osc_sm_raccumulate(const void *origin_addr, int origin_count, struct ompi_datatype_t *origin_dt, int target, @@ -179,7 +181,7 @@ int ompi_osc_sm_raccumulate(void *origin_addr, struct ompi_win_t *win, struct ompi_request_t **request); -int ompi_osc_sm_rget_accumulate(void *origin_addr, +int ompi_osc_sm_rget_accumulate(const void *origin_addr, int origin_count, struct ompi_datatype_t *origin_datatype, void *result_addr, diff --git a/ompi/mca/osc/sm/osc_sm_active_target.c b/ompi/mca/osc/sm/osc_sm_active_target.c index 95df572d616..003dec6ca2e 100644 --- a/ompi/mca/osc/sm/osc_sm_active_target.c +++ b/ompi/mca/osc/sm/osc_sm_active_target.c @@ -1,15 +1,15 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ /* * Copyright (c) 2012 Sandia National Laboratories. All rights reserved. - * Copyright (c) 2014 Los Alamos National Security, LLC. All rights + * Copyright (c) 2014-2016 Los Alamos National Security, LLC. All rights * reserved. * Copyright (c) 2014 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -22,6 +22,74 @@ #include "osc_sm.h" +/** + * compare_ranks: + * + * @param[in] ptra Pointer to integer item + * @param[in] ptrb Pointer to integer item + * + * @returns 0 if *ptra == *ptrb + * @returns -1 if *ptra < *ptrb + * @returns 1 otherwise + * + * This function is used to sort the rank list. It can be removed if + * groups are always in order. + */ +static int compare_ranks (const void *ptra, const void *ptrb) +{ + int a = *((int *) ptra); + int b = *((int *) ptrb); + + if (a < b) { + return -1; + } else if (a > b) { + return 1; + } + + return 0; +} + +/** + * ompi_osc_pt2pt_get_comm_ranks: + * + * @param[in] module - OSC PT2PT module + * @param[in] sub_group - Group with ranks to translate + * + * @returns an array of translated ranks on success or NULL on failure + * + * Translate the ranks given in {sub_group} into ranks in the + * communicator used to create {module}. + */ +static int *ompi_osc_sm_group_ranks (ompi_group_t *group, ompi_group_t *sub_group) +{ + int size = ompi_group_size(sub_group); + int *ranks1, *ranks2; + int ret; + + ranks1 = calloc (size, sizeof(int)); + ranks2 = calloc (size, sizeof(int)); + if (NULL == ranks1 || NULL == ranks2) { + free (ranks1); + free (ranks2); + return NULL; + } + + for (int i = 0 ; i < size ; ++i) { + ranks1[i] = i; + } + + ret = ompi_group_translate_ranks (sub_group, size, ranks1, group, ranks2); + free (ranks1); + if (OMPI_SUCCESS != ret) { + free (ranks2); + return NULL; + } + + qsort (ranks2, size, sizeof (int), compare_ranks); + + return ranks2; +} + int ompi_osc_sm_fence(int assert, struct ompi_win_t *win) @@ -54,7 +122,6 @@ ompi_osc_sm_fence(int assert, struct ompi_win_t *win) } } - int ompi_osc_sm_start(struct ompi_group_t *group, int assert, @@ -62,20 +129,43 @@ ompi_osc_sm_start(struct ompi_group_t *group, { ompi_osc_sm_module_t *module = (ompi_osc_sm_module_t*) win->w_osc_module; + int my_rank = ompi_comm_rank (module->comm); + + OBJ_RETAIN(group); + + if (!OPAL_ATOMIC_CMPSET_PTR(&module->start_group, NULL, group)) { + OBJ_RELEASE(group); + return OMPI_ERR_RMA_SYNC; + } if (0 == (assert & MPI_MODE_NOCHECK)) { int size; - OBJ_RETAIN(group); - module->start_group = group; + int *ranks = ompi_osc_sm_group_ranks (module->comm->c_local_group, group); + if (NULL == ranks) { + return OMPI_ERR_OUT_OF_RESOURCE; + } + size = ompi_group_size(module->start_group); - while (module->my_node_state->post_count != size) { - opal_progress(); - opal_atomic_mb(); - } - } else { - module->start_group = NULL; + for (int i = 0 ; i < size ; ++i) { + int rank_byte = ranks[i] >> 6; + uint64_t old, rank_bit = ((uint64_t) 1) << (ranks[i] & 0x3f); + + /* wait for rank to post */ + while (!(module->posts[my_rank][rank_byte] & rank_bit)) { + opal_progress(); + opal_atomic_mb(); + } + + opal_atomic_rmb (); + + do { + old = module->posts[my_rank][rank_byte]; + } while (!opal_atomic_cmpset_64 ((int64_t *) module->posts[my_rank] + rank_byte, old, old ^ rank_bit)); + } + + free (ranks); } opal_atomic_mb(); @@ -88,30 +178,33 @@ ompi_osc_sm_complete(struct ompi_win_t *win) { ompi_osc_sm_module_t *module = (ompi_osc_sm_module_t*) win->w_osc_module; - int gsize, csize; + ompi_group_t *group; + int gsize; /* ensure all memory operations have completed */ opal_atomic_mb(); - if (NULL != module->start_group) { - module->my_node_state->post_count = 0; - opal_atomic_mb(); + group = module->start_group; + if (NULL == group || !OPAL_ATOMIC_CMPSET_PTR(&module->start_group, group, NULL)) { + return OMPI_ERR_RMA_SYNC; + } - gsize = ompi_group_size(module->start_group); - csize = ompi_comm_size(module->comm); - for (int i = 0 ; i < gsize ; ++i) { - for (int j = 0 ; j < csize ; ++j) { - if (ompi_group_peer_lookup(module->start_group, i) == - ompi_comm_peer_lookup(module->comm, j)) { - (void)opal_atomic_add_32(&module->node_states[j].complete_count, 1); - } - } - } + opal_atomic_mb(); - OBJ_RELEASE(module->start_group); - module->start_group = NULL; + int *ranks = ompi_osc_sm_group_ranks (module->comm->c_local_group, group); + if (NULL == ranks) { + return OMPI_ERR_OUT_OF_RESOURCE; } + gsize = ompi_group_size(group); + for (int i = 0 ; i < gsize ; ++i) { + (void) opal_atomic_add_32(&module->node_states[ranks[i]].complete_count, 1); + } + + free (ranks); + + OBJ_RELEASE(group); + opal_atomic_mb(); return OMPI_SUCCESS; } @@ -124,29 +217,45 @@ ompi_osc_sm_post(struct ompi_group_t *group, { ompi_osc_sm_module_t *module = (ompi_osc_sm_module_t*) win->w_osc_module; - int gsize, csize; + int my_rank = ompi_comm_rank (module->comm); + int my_byte = my_rank >> 6; + uint64_t my_bit = ((uint64_t) 1) << (my_rank & 0x3f); + int gsize; + + OPAL_THREAD_LOCK(&module->lock); + + if (NULL != module->post_group) { + OPAL_THREAD_UNLOCK(&module->lock); + return OMPI_ERR_RMA_SYNC; + } + + module->post_group = group; + + OBJ_RETAIN(group); if (0 == (assert & MPI_MODE_NOCHECK)) { - OBJ_RETAIN(group); - module->post_group = group; + int *ranks = ompi_osc_sm_group_ranks (module->comm->c_local_group, group); + if (NULL == ranks) { + return OMPI_ERR_OUT_OF_RESOURCE; + } module->my_node_state->complete_count = 0; opal_atomic_mb(); gsize = ompi_group_size(module->post_group); - csize = ompi_comm_size(module->comm); for (int i = 0 ; i < gsize ; ++i) { - for (int j = 0 ; j < csize ; ++j) { - if (ompi_group_peer_lookup(module->post_group, i) == - ompi_comm_peer_lookup(module->comm, j)) { - (void)opal_atomic_add_32(&module->node_states[j].post_count, 1); - } - } + (void) opal_atomic_add_64 ((int64_t *) module->posts[ranks[i]] + my_byte, my_bit); } - } else { - module->post_group = NULL; + + opal_atomic_wmb (); + + free (ranks); + + opal_progress (); } + OPAL_THREAD_UNLOCK(&module->lock); + return OMPI_SUCCESS; } @@ -156,19 +265,29 @@ ompi_osc_sm_wait(struct ompi_win_t *win) { ompi_osc_sm_module_t *module = (ompi_osc_sm_module_t*) win->w_osc_module; + ompi_group_t *group; - if (NULL != module->post_group) { - int size = ompi_group_size(module->post_group); + OPAL_THREAD_LOCK(&module->lock); - while (module->my_node_state->complete_count != size) { - opal_progress(); - opal_atomic_mb(); - } + if (NULL == module->post_group) { + OPAL_THREAD_UNLOCK(&module->lock); + return OMPI_ERR_RMA_SYNC; + } - OBJ_RELEASE(module->post_group); - module->post_group = NULL; + group = module->post_group; + + int size = ompi_group_size (group); + + while (module->my_node_state->complete_count != size) { + opal_progress(); + opal_atomic_mb(); } + OBJ_RELEASE(group); + module->post_group = NULL; + + OPAL_THREAD_UNLOCK(&module->lock); + /* ensure all memory operations have completed */ opal_atomic_mb(); @@ -183,19 +302,25 @@ ompi_osc_sm_test(struct ompi_win_t *win, ompi_osc_sm_module_t *module = (ompi_osc_sm_module_t*) win->w_osc_module; - if (NULL != module->post_group) { - int size = ompi_group_size(module->post_group); + OPAL_THREAD_LOCK(&module->lock); - if (module->my_node_state->complete_count == size) { - OBJ_RELEASE(module->post_group); - module->post_group = NULL; - *flag = 1; - } + if (NULL == module->post_group) { + OPAL_THREAD_UNLOCK(&module->lock); + return OMPI_ERR_RMA_SYNC; + } + + int size = ompi_group_size(module->post_group); + + if (module->my_node_state->complete_count == size) { + OBJ_RELEASE(module->post_group); + module->post_group = NULL; + *flag = 1; } else { - opal_atomic_mb(); *flag = 0; } + OPAL_THREAD_UNLOCK(&module->lock); + /* ensure all memory operations have completed */ opal_atomic_mb(); diff --git a/ompi/mca/osc/sm/osc_sm_comm.c b/ompi/mca/osc/sm/osc_sm_comm.c index f0d3ee87cd3..e6f3da44e68 100644 --- a/ompi/mca/osc/sm/osc_sm_comm.c +++ b/ompi/mca/osc/sm/osc_sm_comm.c @@ -3,10 +3,12 @@ * Copyright (c) 2011 Sandia National Laboratories. All rights reserved. * Copyright (c) 2014 Los Alamos National Security, LLC. All rights * reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -19,7 +21,7 @@ #include "osc_sm.h" int -ompi_osc_sm_rput(void *origin_addr, +ompi_osc_sm_rput(const void *origin_addr, int origin_count, struct ompi_datatype_t *origin_dt, int target, @@ -43,7 +45,7 @@ ompi_osc_sm_rput(void *origin_addr, remote_address = ((char*) (module->bases[target])) + module->disp_units[target] * target_disp; - ret = ompi_datatype_sndrcv(origin_addr, origin_count, origin_dt, + ret = ompi_datatype_sndrcv((void *)origin_addr, origin_count, origin_dt, remote_address, target_count, target_dt); if (OMPI_SUCCESS != ret) { return ret; @@ -99,7 +101,7 @@ ompi_osc_sm_rget(void *origin_addr, int -ompi_osc_sm_raccumulate(void *origin_addr, +ompi_osc_sm_raccumulate(const void *origin_addr, int origin_count, struct ompi_datatype_t *origin_dt, int target, @@ -127,7 +129,7 @@ ompi_osc_sm_raccumulate(void *origin_addr, opal_atomic_lock(&module->node_states[target].accumulate_lock); if (op == &ompi_mpi_op_replace.op) { - ret = ompi_datatype_sndrcv(origin_addr, origin_count, origin_dt, + ret = ompi_datatype_sndrcv((void *)origin_addr, origin_count, origin_dt, remote_address, target_count, target_dt); } else { ret = ompi_osc_base_sndrcv_op(origin_addr, origin_count, origin_dt, @@ -147,17 +149,17 @@ ompi_osc_sm_raccumulate(void *origin_addr, int -ompi_osc_sm_rget_accumulate(void *origin_addr, - int origin_count, +ompi_osc_sm_rget_accumulate(const void *origin_addr, + int origin_count, struct ompi_datatype_t *origin_dt, - void *result_addr, - int result_count, + void *result_addr, + int result_count, struct ompi_datatype_t *result_dt, - int target, - MPI_Aint target_disp, + int target, + MPI_Aint target_disp, int target_count, - struct ompi_datatype_t *target_dt, - struct ompi_op_t *op, + struct ompi_datatype_t *target_dt, + struct ompi_op_t *op, struct ompi_win_t *win, struct ompi_request_t **ompi_req) { @@ -183,7 +185,7 @@ ompi_osc_sm_rget_accumulate(void *origin_addr, if (OMPI_SUCCESS != ret || op == &ompi_mpi_op_no_op.op) goto done; if (op == &ompi_mpi_op_replace.op) { - ret = ompi_datatype_sndrcv(origin_addr, origin_count, origin_dt, + ret = ompi_datatype_sndrcv((void *)origin_addr, origin_count, origin_dt, remote_address, target_count, target_dt); } else { ret = ompi_osc_base_sndrcv_op(origin_addr, origin_count, origin_dt, @@ -204,7 +206,7 @@ ompi_osc_sm_rget_accumulate(void *origin_addr, int -ompi_osc_sm_put(void *origin_addr, +ompi_osc_sm_put(const void *origin_addr, int origin_count, struct ompi_datatype_t *origin_dt, int target, @@ -227,7 +229,7 @@ ompi_osc_sm_put(void *origin_addr, remote_address = ((char*) (module->bases[target])) + module->disp_units[target] * target_disp; - ret = ompi_datatype_sndrcv(origin_addr, origin_count, origin_dt, + ret = ompi_datatype_sndrcv((void *)origin_addr, origin_count, origin_dt, remote_address, target_count, target_dt); return ret; @@ -266,7 +268,7 @@ ompi_osc_sm_get(void *origin_addr, int -ompi_osc_sm_accumulate(void *origin_addr, +ompi_osc_sm_accumulate(const void *origin_addr, int origin_count, struct ompi_datatype_t *origin_dt, int target, @@ -293,7 +295,7 @@ ompi_osc_sm_accumulate(void *origin_addr, opal_atomic_lock(&module->node_states[target].accumulate_lock); if (op == &ompi_mpi_op_replace.op) { - ret = ompi_datatype_sndrcv(origin_addr, origin_count, origin_dt, + ret = ompi_datatype_sndrcv((void *)origin_addr, origin_count, origin_dt, remote_address, target_count, target_dt); } else { ret = ompi_osc_base_sndrcv_op(origin_addr, origin_count, origin_dt, @@ -307,17 +309,17 @@ ompi_osc_sm_accumulate(void *origin_addr, int -ompi_osc_sm_get_accumulate(void *origin_addr, - int origin_count, +ompi_osc_sm_get_accumulate(const void *origin_addr, + int origin_count, struct ompi_datatype_t *origin_dt, - void *result_addr, - int result_count, + void *result_addr, + int result_count, struct ompi_datatype_t *result_dt, - int target, - MPI_Aint target_disp, + int target, + MPI_Aint target_disp, int target_count, struct ompi_datatype_t *target_dt, - struct ompi_op_t *op, + struct ompi_op_t *op, struct ompi_win_t *win) { int ret; @@ -342,7 +344,7 @@ ompi_osc_sm_get_accumulate(void *origin_addr, if (OMPI_SUCCESS != ret || op == &ompi_mpi_op_no_op.op) goto done; if (op == &ompi_mpi_op_replace.op) { - ret = ompi_datatype_sndrcv(origin_addr, origin_count, origin_dt, + ret = ompi_datatype_sndrcv((void *)origin_addr, origin_count, origin_dt, remote_address, target_count, target_dt); } else { ret = ompi_osc_base_sndrcv_op(origin_addr, origin_count, origin_dt, @@ -358,8 +360,8 @@ ompi_osc_sm_get_accumulate(void *origin_addr, int -ompi_osc_sm_compare_and_swap(void *origin_addr, - void *compare_addr, +ompi_osc_sm_compare_and_swap(const void *origin_addr, + const void *compare_addr, void *result_addr, struct ompi_datatype_t *dt, int target, @@ -373,7 +375,7 @@ ompi_osc_sm_compare_and_swap(void *origin_addr, OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output, "compare_and_swap: 0x%lx, %s, %d, %d, 0x%lx", - (unsigned long) origin_addr, + (unsigned long) origin_addr, dt->name, target, (int) target_disp, (unsigned long) win)); @@ -398,7 +400,7 @@ ompi_osc_sm_compare_and_swap(void *origin_addr, int -ompi_osc_sm_fetch_and_op(void *origin_addr, +ompi_osc_sm_fetch_and_op(const void *origin_addr, void *result_addr, struct ompi_datatype_t *dt, int target, @@ -412,7 +414,7 @@ ompi_osc_sm_fetch_and_op(void *origin_addr, OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output, "fetch_and_op: 0x%lx, %s, %d, %d, %s, 0x%lx", - (unsigned long) origin_addr, + (unsigned long) origin_addr, dt->name, target, (int) target_disp, op->o_name, (unsigned long) win)); @@ -429,7 +431,7 @@ ompi_osc_sm_fetch_and_op(void *origin_addr, if (op == &ompi_mpi_op_replace.op) { ompi_datatype_copy_content_same_ddt(dt, 1, (char*) remote_address, (char*) origin_addr); } else { - ompi_op_reduce(op, origin_addr, remote_address, 1, dt); + ompi_op_reduce(op, (void *)origin_addr, remote_address, 1, dt); } done: diff --git a/ompi/mca/osc/sm/osc_sm_component.c b/ompi/mca/osc/sm/osc_sm_component.c index 95688a47efe..e5288c2b4a4 100644 --- a/ompi/mca/osc/sm/osc_sm_component.c +++ b/ompi/mca/osc/sm/osc_sm_component.c @@ -4,11 +4,13 @@ * Copyright (c) 2014-2015 Los Alamos National Security, LLC. All rights * reserved. * Copyright (c) 2014 Intel, Inc. All rights reserved. - * Copyright (c) 2015 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015-2017 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -19,6 +21,7 @@ #include "ompi/mca/osc/base/osc_base_obj_convert.h" #include "ompi/request/request.h" #include "opal/util/sys_limits.h" +#include "opal/include/opal/align.h" #include "osc_sm.h" @@ -56,45 +59,45 @@ ompi_osc_sm_component_t mca_osc_sm_component = { ompi_osc_sm_module_t ompi_osc_sm_module_template = { { - ompi_osc_sm_shared_query, - - ompi_osc_sm_attach, - ompi_osc_sm_detach, - ompi_osc_sm_free, - - ompi_osc_sm_put, - ompi_osc_sm_get, - ompi_osc_sm_accumulate, - ompi_osc_sm_compare_and_swap, - ompi_osc_sm_fetch_and_op, - ompi_osc_sm_get_accumulate, - - ompi_osc_sm_rput, - ompi_osc_sm_rget, - ompi_osc_sm_raccumulate, - ompi_osc_sm_rget_accumulate, - - ompi_osc_sm_fence, - - ompi_osc_sm_start, - ompi_osc_sm_complete, - ompi_osc_sm_post, - ompi_osc_sm_wait, - ompi_osc_sm_test, - - ompi_osc_sm_lock, - ompi_osc_sm_unlock, - ompi_osc_sm_lock_all, - ompi_osc_sm_unlock_all, - - ompi_osc_sm_sync, - ompi_osc_sm_flush, - ompi_osc_sm_flush_all, - ompi_osc_sm_flush_local, - ompi_osc_sm_flush_local_all, - - ompi_osc_sm_set_info, - ompi_osc_sm_get_info + .osc_win_shared_query = ompi_osc_sm_shared_query, + + .osc_win_attach = ompi_osc_sm_attach, + .osc_win_detach = ompi_osc_sm_detach, + .osc_free = ompi_osc_sm_free, + + .osc_put = ompi_osc_sm_put, + .osc_get = ompi_osc_sm_get, + .osc_accumulate = ompi_osc_sm_accumulate, + .osc_compare_and_swap = ompi_osc_sm_compare_and_swap, + .osc_fetch_and_op = ompi_osc_sm_fetch_and_op, + .osc_get_accumulate = ompi_osc_sm_get_accumulate, + + .osc_rput = ompi_osc_sm_rput, + .osc_rget = ompi_osc_sm_rget, + .osc_raccumulate = ompi_osc_sm_raccumulate, + .osc_rget_accumulate = ompi_osc_sm_rget_accumulate, + + .osc_fence = ompi_osc_sm_fence, + + .osc_start = ompi_osc_sm_start, + .osc_complete = ompi_osc_sm_complete, + .osc_post = ompi_osc_sm_post, + .osc_wait = ompi_osc_sm_wait, + .osc_test = ompi_osc_sm_test, + + .osc_lock = ompi_osc_sm_lock, + .osc_unlock = ompi_osc_sm_unlock, + .osc_lock_all = ompi_osc_sm_lock_all, + .osc_unlock_all = ompi_osc_sm_unlock_all, + + .osc_sync = ompi_osc_sm_sync, + .osc_flush = ompi_osc_sm_flush, + .osc_flush_all = ompi_osc_sm_flush_all, + .osc_flush_local = ompi_osc_sm_flush_local, + .osc_flush_local_all = ompi_osc_sm_flush_local_all, + + .osc_set_info = ompi_osc_sm_set_info, + .osc_get_info = ompi_osc_sm_get_info } }; @@ -113,7 +116,7 @@ component_init(bool enable_progress_threads, bool enable_mpi_threads) } -static int +static int component_finalize(void) { /* clean up requests free list */ @@ -125,17 +128,13 @@ component_finalize(void) static int check_win_ok(ompi_communicator_t *comm, int flavor) { - int i; - if (! (MPI_WIN_FLAVOR_SHARED == flavor || MPI_WIN_FLAVOR_ALLOCATE == flavor) ) { return OMPI_ERR_NOT_SUPPORTED; } - for (i = 0 ; i < ompi_comm_size(comm) ; ++i) { - if (!OPAL_PROC_ON_LOCAL_NODE(ompi_comm_peer_lookup(comm, i)->super.proc_flags)) { - return OMPI_ERR_RMA_SHARED; - } + if (ompi_group_have_remote_peers (comm->c_local_group)) { + return OMPI_ERR_RMA_SHARED; } return OMPI_SUCCESS; @@ -165,6 +164,7 @@ component_select(struct ompi_win_t *win, void **base, size_t size, int disp_unit int flavor, int *model) { ompi_osc_sm_module_t *module = NULL; + int comm_size = ompi_comm_size (comm); int ret = OMPI_ERROR; if (OMPI_SUCCESS != (ret = check_win_ok(comm, flavor))) { @@ -176,8 +176,10 @@ component_select(struct ompi_win_t *win, void **base, size_t size, int disp_unit calloc(1, sizeof(ompi_osc_sm_module_t)); if (NULL == module) return OMPI_ERR_TEMP_OUT_OF_RESOURCE; + OBJ_CONSTRUCT(&module->lock, opal_mutex_t); + /* fill in the function pointer part */ - memcpy(module, &ompi_osc_sm_module_template, + memcpy(module, &ompi_osc_sm_module_template, sizeof(ompi_osc_base_module_t)); /* need our communicator for collectives in next phase */ @@ -187,7 +189,7 @@ component_select(struct ompi_win_t *win, void **base, size_t size, int disp_unit module->flavor = flavor; /* create the segment */ - if (1 == ompi_comm_size(comm)) { + if (1 == comm_size) { module->segment_base = NULL; module->sizes = malloc(sizeof(size_t)); if (NULL == module->sizes) return OMPI_ERR_TEMP_OUT_OF_RESOURCE; @@ -202,13 +204,15 @@ component_select(struct ompi_win_t *win, void **base, size_t size, int disp_unit if (NULL == module->global_state) return OMPI_ERR_TEMP_OUT_OF_RESOURCE; module->node_states = malloc(sizeof(ompi_osc_sm_node_state_t)); if (NULL == module->node_states) return OMPI_ERR_TEMP_OUT_OF_RESOURCE; - + module->posts = calloc (1, sizeof(module->posts[0]) + sizeof (uint64_t)); + if (NULL == module->posts) return OMPI_ERR_TEMP_OUT_OF_RESOURCE; + module->posts[0] = (uint64_t *) (module->posts + 1); } else { unsigned long total, *rbuf; - char *data_file; int i, flag; size_t pagesize; - size_t state_size; + size_t state_size; + size_t posts_size, post_size = (comm_size + 63) / 64; OPAL_OUTPUT_VERBOSE((1, ompi_osc_base_framework.framework_output, "allocating shared memory region of size %ld\n", (long) size)); @@ -216,7 +220,7 @@ component_select(struct ompi_win_t *win, void **base, size_t size, int disp_unit /* get the pagesize */ pagesize = opal_getpagesize(); - rbuf = malloc(sizeof(unsigned long) * ompi_comm_size(module->comm)); + rbuf = malloc(sizeof(unsigned long) * comm_size); if (NULL == rbuf) return OMPI_ERR_TEMP_OUT_OF_RESOURCE; module->noncontig = false; @@ -237,25 +241,30 @@ component_select(struct ompi_win_t *win, void **base, size_t size, int disp_unit if (OMPI_SUCCESS != ret) return ret; total = 0; - for (i = 0 ; i < ompi_comm_size(module->comm) ; ++i) { + for (i = 0 ; i < comm_size ; ++i) { total += rbuf[i]; } - if (asprintf(&data_file, "%s"OPAL_PATH_SEP"shared_window_%d.%s", - ompi_process_info.job_session_dir, - ompi_comm_get_cid(module->comm), - ompi_process_info.nodename) < 0) { - return OMPI_ERR_OUT_OF_RESOURCE; - } - /* user opal/shmem directly to create a shared memory segment */ - state_size = sizeof(ompi_osc_sm_global_state_t) + sizeof(ompi_osc_sm_node_state_t) * ompi_comm_size(module->comm); - if (0 == ompi_comm_rank (module->comm)) { - ret = opal_shmem_segment_create (&module->seg_ds, data_file, total + pagesize + state_size); - if (OPAL_SUCCESS != ret) { - goto error; - } - } + state_size = sizeof(ompi_osc_sm_global_state_t) + sizeof(ompi_osc_sm_node_state_t) * comm_size; + state_size += OPAL_ALIGN_PAD_AMOUNT(state_size, 64); + posts_size = comm_size * post_size * sizeof (uint64_t); + posts_size += OPAL_ALIGN_PAD_AMOUNT(posts_size, 64); + if (0 == ompi_comm_rank (module->comm)) { + char *data_file; + if (asprintf(&data_file, "%s"OPAL_PATH_SEP"shared_window_%d.%s", + ompi_process_info.proc_session_dir, + ompi_comm_get_cid(module->comm), + ompi_process_info.nodename) < 0) { + return OMPI_ERR_OUT_OF_RESOURCE; + } + + ret = opal_shmem_segment_create (&module->seg_ds, data_file, total + pagesize + state_size + posts_size); + free(data_file); + if (OPAL_SUCCESS != ret) { + goto error; + } + } ret = module->comm->c_coll.coll_bcast (&module->seg_ds, sizeof (module->seg_ds), MPI_BYTE, 0, module->comm, module->comm->c_coll.coll_bcast_module); @@ -268,15 +277,23 @@ component_select(struct ompi_win_t *win, void **base, size_t size, int disp_unit goto error; } - module->sizes = malloc(sizeof(size_t) * ompi_comm_size(module->comm)); + module->sizes = malloc(sizeof(size_t) * comm_size); if (NULL == module->sizes) return OMPI_ERR_TEMP_OUT_OF_RESOURCE; - module->bases = malloc(sizeof(void*) * ompi_comm_size(module->comm)); + module->bases = malloc(sizeof(void*) * comm_size); if (NULL == module->bases) return OMPI_ERR_TEMP_OUT_OF_RESOURCE; + module->posts = calloc (comm_size, sizeof (module->posts[0])); + if (NULL == module->posts) return OMPI_ERR_TEMP_OUT_OF_RESOURCE; - module->global_state = (ompi_osc_sm_global_state_t *) (module->segment_base); + /* set module->posts[0] first to ensure 64-bit alignment */ + module->posts[0] = (uint64_t *) (module->segment_base); + module->global_state = (ompi_osc_sm_global_state_t *) (module->posts[0] + comm_size * post_size); module->node_states = (ompi_osc_sm_node_state_t *) (module->global_state + 1); - for (i = 0, total = state_size ; i < ompi_comm_size(module->comm) ; ++i) { + for (i = 0, total = state_size + posts_size ; i < comm_size ; ++i) { + if (i > 0) { + module->posts[i] = module->posts[i - 1] + post_size; + } + module->sizes[i] = rbuf[i]; if (module->sizes[i]) { module->bases[i] = ((char *) module->segment_base) + total; @@ -298,7 +315,7 @@ component_select(struct ompi_win_t *win, void **base, size_t size, int disp_unit opal_atomic_init(&module->my_node_state->accumulate_lock, OPAL_ATOMIC_UNLOCKED); /* share everyone's displacement units. */ - module->disp_units = malloc(sizeof(int) * ompi_comm_size(module->comm)); + module->disp_units = malloc(sizeof(int) * comm_size); ret = module->comm->c_coll.coll_allgather(&disp_unit, 1, MPI_INT, module->disp_units, 1, MPI_INT, module->comm, @@ -311,7 +328,7 @@ component_select(struct ompi_win_t *win, void **base, size_t size, int disp_unit /* initialize synchronization code */ module->my_sense = 1; - module->outstanding_locks = calloc(ompi_comm_size(module->comm), sizeof(enum ompi_osc_sm_locktype_t)); + module->outstanding_locks = calloc(comm_size, sizeof(enum ompi_osc_sm_locktype_t)); if (NULL == module->outstanding_locks) { ret = OMPI_ERR_TEMP_OUT_OF_RESOURCE; goto error; @@ -348,7 +365,7 @@ component_select(struct ompi_win_t *win, void **base, size_t size, int disp_unit } module->global_state->use_barrier_for_fence = 0; module->global_state->sense = module->my_sense; - module->global_state->count = ompi_comm_size(module->comm); + module->global_state->count = comm_size; pthread_mutexattr_destroy(&mattr); } else { module->global_state->use_barrier_for_fence = 1; @@ -369,8 +386,8 @@ component_select(struct ompi_win_t *win, void **base, size_t size, int disp_unit return OMPI_SUCCESS; error: - if (NULL != module->comm) ompi_comm_free(&module->comm); - if (NULL != module) free(module); + win->w_osc_module = &module->super; + ompi_osc_sm_free (win); return ret; } @@ -424,7 +441,7 @@ ompi_osc_sm_attach(struct ompi_win_t *win, void *base, size_t len) int -ompi_osc_sm_detach(struct ompi_win_t *win, void *base) +ompi_osc_sm_detach(struct ompi_win_t *win, const void *base) { ompi_osc_sm_module_t *module = (ompi_osc_sm_module_t*) win->w_osc_module; @@ -457,12 +474,19 @@ ompi_osc_sm_free(struct ompi_win_t *win) free(module->node_states); free(module->global_state); free(module->bases[0]); - free(module->bases); - free(module->sizes); } + free(module->disp_units); + free(module->outstanding_locks); + free(module->sizes); + free(module->bases); + + free (module->posts); /* cleanup */ ompi_comm_free(&module->comm); + + OBJ_DESTRUCT(&module->lock); + free(module); return OMPI_SUCCESS; @@ -491,7 +515,7 @@ ompi_osc_sm_get_info(struct ompi_win_t *win, struct ompi_info_t **info_used) if (NULL == info) return OMPI_ERR_TEMP_OUT_OF_RESOURCE; if (module->flavor == MPI_WIN_FLAVOR_SHARED) { - ompi_info_set(info, "blocking_fence", + ompi_info_set(info, "blocking_fence", (1 == module->global_state->use_barrier_for_fence) ? "true" : "false"); ompi_info_set(info, "alloc_shared_noncontig", (module->noncontig) ? "true" : "false"); diff --git a/ompi/mca/osc/sm/osc_sm_passive_target.c b/ompi/mca/osc/sm/osc_sm_passive_target.c index ffe93fdb5d7..889ac829dd1 100644 --- a/ompi/mca/osc/sm/osc_sm_passive_target.c +++ b/ompi/mca/osc/sm/osc_sm_passive_target.c @@ -5,9 +5,9 @@ * reserved. * Copyright (c) 2015 Cisco Systems, Inc. All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -55,10 +55,10 @@ lk_fetch32(ompi_osc_sm_module_t *module, static inline int -start_exclusive(ompi_osc_sm_module_t *module, +start_exclusive(ompi_osc_sm_module_t *module, int target) { - uint32_t me = lk_fetch_add32(module, target, + uint32_t me = lk_fetch_add32(module, target, offsetof(ompi_osc_sm_lock_t, counter), 1); while (me != lk_fetch32(module, target, @@ -71,7 +71,7 @@ start_exclusive(ompi_osc_sm_module_t *module, static inline int -end_exclusive(ompi_osc_sm_module_t *module, +end_exclusive(ompi_osc_sm_module_t *module, int target) { lk_add32(module, target, offsetof(ompi_osc_sm_lock_t, write), 1); @@ -82,10 +82,10 @@ end_exclusive(ompi_osc_sm_module_t *module, static inline int -start_shared(ompi_osc_sm_module_t *module, +start_shared(ompi_osc_sm_module_t *module, int target) { - uint32_t me = lk_fetch_add32(module, target, + uint32_t me = lk_fetch_add32(module, target, offsetof(ompi_osc_sm_lock_t, counter), 1); while (me != lk_fetch32(module, target, @@ -100,7 +100,7 @@ start_shared(ompi_osc_sm_module_t *module, static inline int -end_shared(ompi_osc_sm_module_t *module, +end_shared(ompi_osc_sm_module_t *module, int target) { lk_add32(module, target, offsetof(ompi_osc_sm_lock_t, write), 1); diff --git a/ompi/mca/osc/sm/owner.txt b/ompi/mca/osc/sm/owner.txt index af11a265bb0..52961b5d12f 100644 --- a/ompi/mca/osc/sm/owner.txt +++ b/ompi/mca/osc/sm/owner.txt @@ -3,5 +3,5 @@ # owner: institution that is responsible for this package # status: e.g. active, maintenance, unmaintained # -owner: ? -status: maintenance? +owner: LANL +status: maintenance diff --git a/ompi/mca/pml/Makefile.am b/ompi/mca/pml/Makefile.am index 231762066ec..2c05ec3d562 100644 --- a/ompi/mca/pml/Makefile.am +++ b/ompi/mca/pml/Makefile.am @@ -5,15 +5,15 @@ # Copyright (c) 2004-2005 The University of Tennessee and The University # of Tennessee Research Foundation. All rights # reserved. -# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, # University of Stuttgart. All rights reserved. # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. # Copyright (c) 2010-2015 Cisco Systems, Inc. All rights reserved. # $COPYRIGHT$ -# +# # Additional copyrights may follow -# +# # $HEADER$ # @@ -35,4 +35,4 @@ endif include base/Makefile.am distclean-local: - rm -f base/static-components.h + rm -f base/static-components.h diff --git a/ompi/mca/pml/base/Makefile.am b/ompi/mca/pml/base/Makefile.am index a5b8245a1cf..ab2848b21ab 100644 --- a/ompi/mca/pml/base/Makefile.am +++ b/ompi/mca/pml/base/Makefile.am @@ -5,15 +5,15 @@ # Copyright (c) 2004-2005 The University of Tennessee and The University # of Tennessee Research Foundation. All rights # reserved. -# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, # University of Stuttgart. All rights reserved. # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. -# Copyright (c) 2013 Los Alamos National Security, LLC. All rights reserved. +# Copyright (c) 2013-2015 Los Alamos National Security, LLC. All rights reserved. # $COPYRIGHT$ -# +# # Additional copyrights may follow -# +# # $HEADER$ # @@ -31,5 +31,4 @@ libmca_pml_la_SOURCES += \ base/pml_base_recvreq.c \ base/pml_base_request.c \ base/pml_base_select.c \ - base/pml_base_sendreq.c \ - base/pml_base_ft.c + base/pml_base_sendreq.c diff --git a/ompi/mca/pml/base/base.h b/ompi/mca/pml/base/base.h index 0ccd5859227..39bc6c2dc5c 100644 --- a/ompi/mca/pml/base/base.h +++ b/ompi/mca/pml/base/base.h @@ -6,15 +6,15 @@ * Copyright (c) 2004-2007 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. - * Copyright (c) 2013 Los Alamos National Security, LLC. All rights reserved. + * Copyright (c) 2013-2015 Los Alamos National Security, LLC. All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -38,7 +38,7 @@ BEGIN_C_DECLS /* * This is the base priority for a PML wrapper component - * If there exists more than one then it is undefined + * If there exists more than one then it is undefined * which one is picked. */ #define PML_SELECT_WRAPPER_PRIORITY -128 @@ -62,8 +62,6 @@ OMPI_DECLSPEC int mca_pml_base_pml_check_selected(const char *my_pml, OMPI_DECLSPEC int mca_pml_base_finalize(void); -OMPI_DECLSPEC int mca_pml_base_ft_event(int state); - /* * Globals */ diff --git a/ompi/mca/pml/base/pml_base_bsend.c b/ompi/mca/pml/base/pml_base_bsend.c index 66dbeab7c81..8b56312ef2f 100644 --- a/ompi/mca/pml/base/pml_base_bsend.c +++ b/ompi/mca/pml/base/pml_base_bsend.c @@ -1,3 +1,4 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ /* * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana * University Research and Technology @@ -5,15 +6,20 @@ * Copyright (c) 2004-2007 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2007 Sun Microsystems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. + * Copyright (c) 2015 Los Alamos National Security, LLC. All rights + * reserved. + * Copyright (c) 2017 IBM Corporation. All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -27,7 +33,7 @@ #include "ompi/mca/pml/base/pml_base_request.h" #include "ompi/mca/pml/base/pml_base_sendreq.h" #include "ompi/mca/pml/base/pml_base_bsend.h" -#include "opal/mca/mpool/mpool.h" +#include "opal/mca/mpool/mpool.h" #ifdef HAVE_UNISTD_H #include @@ -35,28 +41,25 @@ static opal_mutex_t mca_pml_bsend_mutex; /* lock for thread safety */ static opal_condition_t mca_pml_bsend_condition; /* condition variable to block on detach */ -static mca_allocator_base_component_t* mca_pml_bsend_allocator_component; +static mca_allocator_base_component_t* mca_pml_bsend_allocator_component; static mca_allocator_base_module_t* mca_pml_bsend_allocator; /* sub-allocator to manage users buffer */ static size_t mca_pml_bsend_usersize; /* user provided buffer size */ -static unsigned char *mca_pml_bsend_userbase; /* user provided buffer base */ -static unsigned char *mca_pml_bsend_base; /* adjusted base of user buffer */ -static unsigned char *mca_pml_bsend_addr; /* current offset into user buffer */ +unsigned char *mca_pml_bsend_userbase = NULL; /* user provided buffer base */ +unsigned char *mca_pml_bsend_base = NULL; /* adjusted base of user buffer */ +unsigned char *mca_pml_bsend_addr = NULL; /* current offset into user buffer */ static size_t mca_pml_bsend_size; /* adjusted size of user buffer */ static size_t mca_pml_bsend_count; /* number of outstanding requests */ static size_t mca_pml_bsend_pagesz; /* mmap page size */ static int mca_pml_bsend_pagebits; /* number of bits in pagesz */ -static int32_t mca_pml_bsend_init = 0; +static int32_t mca_pml_bsend_init = 0; /* defined in pml_base_open.c */ extern char *ompi_pml_base_bsend_allocator_name; /* - * Routine to return pages to sub-allocator as needed + * Routine to return pages to sub-allocator as needed */ -static void* mca_pml_bsend_alloc_segment( - struct mca_mpool_base_module_t* module, - size_t* size_inout, - mca_mpool_base_registration_t** registration) +static void* mca_pml_bsend_alloc_segment(void *ctx, size_t *size_inout) { void *addr; size_t size = *size_inout; @@ -64,11 +67,10 @@ static void* mca_pml_bsend_alloc_segment( return NULL; } /* allocate all that is left */ - size = mca_pml_bsend_size - (mca_pml_bsend_addr - mca_pml_bsend_base); + size = mca_pml_bsend_size - (mca_pml_bsend_addr - mca_pml_bsend_base); addr = mca_pml_bsend_addr; mca_pml_bsend_addr += size; *size_inout = size; - if (NULL != registration) *registration = NULL; return addr; } @@ -107,7 +109,7 @@ int mca_pml_base_bsend_init(bool thread_safe) */ int mca_pml_base_bsend_fini(void) { - if(OPAL_THREAD_ADD32(&mca_pml_bsend_init,-1) > 0) + if(OPAL_THREAD_ADD32(&mca_pml_bsend_init,-1) > 0) return OMPI_SUCCESS; if(NULL != mca_pml_bsend_allocator) @@ -152,7 +154,7 @@ int mca_pml_base_bsend_attach(void* addr, int size) */ mca_pml_bsend_userbase = (unsigned char*)addr; mca_pml_bsend_usersize = size; - /* + /* * Align to pointer boundaries. The bsend overhead is large enough * to account for this. Compute any alignment that needs to be done. */ @@ -168,7 +170,7 @@ int mca_pml_base_bsend_attach(void* addr, int size) } /* - * User-level call to detach buffer + * User-level call to detach buffer */ int mca_pml_base_bsend_detach(void* addr, int* size) { @@ -183,7 +185,7 @@ int mca_pml_base_bsend_detach(void* addr, int* size) /* wait on any pending requests */ while(mca_pml_bsend_count != 0) opal_condition_wait(&mca_pml_bsend_condition, &mca_pml_bsend_mutex); - + /* free resources associated with the allocator */ mca_pml_bsend_allocator->alc_finalize(mca_pml_bsend_allocator); mca_pml_bsend_allocator = NULL; @@ -203,10 +205,10 @@ int mca_pml_base_bsend_detach(void* addr, int* size) mca_pml_bsend_count = 0; OPAL_THREAD_UNLOCK(&mca_pml_bsend_mutex); return OMPI_SUCCESS; -} +} - -/* + +/* * pack send buffer into buffer */ @@ -230,14 +232,14 @@ int mca_pml_base_bsend_request_start(ompi_request_t* request) /* allocate a buffer to hold packed message */ sendreq->req_addr = mca_pml_bsend_allocator->alc_alloc( - mca_pml_bsend_allocator, sendreq->req_bytes_packed, 0, NULL); + mca_pml_bsend_allocator, sendreq->req_bytes_packed, 0); if(NULL == sendreq->req_addr) { /* release resources when request is freed */ sendreq->req_base.req_pml_complete = true; OPAL_THREAD_UNLOCK(&mca_pml_bsend_mutex); return OMPI_ERR_BUFFER; } - + OPAL_THREAD_UNLOCK(&mca_pml_bsend_mutex); /* The convertor is already initialized in the begining so we just have to @@ -247,25 +249,25 @@ int mca_pml_base_bsend_request_start(ompi_request_t* request) iov.iov_len = sendreq->req_bytes_packed; iov_count = 1; max_data = iov.iov_len; - if((rc = opal_convertor_pack( &sendreq->req_base.req_convertor, - &iov, - &iov_count, + if((rc = opal_convertor_pack( &sendreq->req_base.req_convertor, + &iov, + &iov_count, &max_data )) < 0) { return OMPI_ERROR; } - + /* setup convertor to point to packed buffer (at position zero) */ opal_convertor_prepare_for_send( &sendreq->req_base.req_convertor, &(ompi_mpi_packed.dt.super), max_data, sendreq->req_addr ); /* increment count of pending requests */ mca_pml_bsend_count++; } - + return OMPI_SUCCESS; } -/* +/* * allocate buffer */ @@ -285,7 +287,7 @@ int mca_pml_base_bsend_request_alloc(ompi_request_t* request) /* allocate a buffer to hold packed message */ sendreq->req_addr = mca_pml_bsend_allocator->alc_alloc( - mca_pml_bsend_allocator, sendreq->req_bytes_packed, 0, NULL); + mca_pml_bsend_allocator, sendreq->req_bytes_packed, 0); if(NULL == sendreq->req_addr) { /* release resources when request is freed */ sendreq->req_base.req_pml_complete = true; @@ -299,11 +301,11 @@ int mca_pml_base_bsend_request_alloc(ompi_request_t* request) /* increment count of pending requests */ mca_pml_bsend_count++; OPAL_THREAD_UNLOCK(&mca_pml_bsend_mutex); - + return OMPI_SUCCESS; } -/* +/* * allocate buffer */ @@ -319,7 +321,7 @@ void* mca_pml_base_bsend_request_alloc_buf( size_t length ) /* allocate a buffer to hold packed message */ buf = mca_pml_bsend_allocator->alc_alloc( - mca_pml_bsend_allocator, length, 0, NULL); + mca_pml_bsend_allocator, length, 0); if(NULL == buf) { /* release resources when request is freed */ OPAL_THREAD_UNLOCK(&mca_pml_bsend_mutex); @@ -332,13 +334,13 @@ void* mca_pml_base_bsend_request_alloc_buf( size_t length ) /* increment count of pending requests */ mca_pml_bsend_count++; OPAL_THREAD_UNLOCK(&mca_pml_bsend_mutex); - + return buf; } /* - * Request completed - free buffer and decrement pending count + * Request completed - free buffer and decrement pending count */ int mca_pml_base_bsend_request_free(void* addr) { @@ -347,7 +349,7 @@ int mca_pml_base_bsend_request_free(void* addr) /* free buffer */ mca_pml_bsend_allocator->alc_free(mca_pml_bsend_allocator, addr); - + /* decrement count of buffered requests */ if(--mca_pml_bsend_count == 0) opal_condition_signal(&mca_pml_bsend_condition); @@ -355,17 +357,17 @@ int mca_pml_base_bsend_request_free(void* addr) OPAL_THREAD_UNLOCK(&mca_pml_bsend_mutex); return OMPI_SUCCESS; } - + /* - * Request completed - free buffer and decrement pending count + * Request completed - free buffer and decrement pending count */ int mca_pml_base_bsend_request_fini(ompi_request_t* request) { mca_pml_base_send_request_t* sendreq = (mca_pml_base_send_request_t*)request; - if(sendreq->req_bytes_packed == 0 || - sendreq->req_addr == NULL || + if(sendreq->req_bytes_packed == 0 || + sendreq->req_addr == NULL || sendreq->req_addr == sendreq->req_base.req_addr) return OMPI_SUCCESS; @@ -373,7 +375,7 @@ int mca_pml_base_bsend_request_fini(ompi_request_t* request) OPAL_THREAD_LOCK(&mca_pml_bsend_mutex); /* free buffer */ - mca_pml_bsend_allocator->alc_free(mca_pml_bsend_allocator, sendreq->req_addr); + mca_pml_bsend_allocator->alc_free(mca_pml_bsend_allocator, (void *)sendreq->req_addr); sendreq->req_addr = sendreq->req_base.req_addr; /* decrement count of buffered requests */ @@ -383,5 +385,5 @@ int mca_pml_base_bsend_request_fini(ompi_request_t* request) OPAL_THREAD_UNLOCK(&mca_pml_bsend_mutex); return OMPI_SUCCESS; } - + diff --git a/ompi/mca/pml/base/pml_base_bsend.h b/ompi/mca/pml/base/pml_base_bsend.h index 2ceabbb6ed1..7bdddb019b7 100644 --- a/ompi/mca/pml/base/pml_base_bsend.h +++ b/ompi/mca/pml/base/pml_base_bsend.h @@ -5,14 +5,15 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. + * Copyright (c) 2017 IBM Corporation. All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -35,7 +36,11 @@ OMPI_DECLSPEC int mca_pml_base_bsend_request_alloc(ompi_request_t*); OMPI_DECLSPEC int mca_pml_base_bsend_request_start(ompi_request_t*); OMPI_DECLSPEC int mca_pml_base_bsend_request_fini(ompi_request_t*); OMPI_DECLSPEC void* mca_pml_base_bsend_request_alloc_buf( size_t length ); -OMPI_DECLSPEC int mca_pml_base_bsend_request_free(void* addr); +OMPI_DECLSPEC int mca_pml_base_bsend_request_free(void* addr); + +extern unsigned char *mca_pml_bsend_userbase; /* user provided buffer base */ +extern unsigned char *mca_pml_bsend_base; /* adjusted base of user buffer */ +extern unsigned char *mca_pml_bsend_addr; /* current offset into user buffer */ END_C_DECLS diff --git a/ompi/mca/pml/base/pml_base_frame.c b/ompi/mca/pml/base/pml_base_frame.c index 84186f2434c..2d02f347a41 100644 --- a/ompi/mca/pml/base/pml_base_frame.c +++ b/ompi/mca/pml/base/pml_base_frame.c @@ -6,7 +6,7 @@ * Copyright (c) 2004-2007 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2007 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2007 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. @@ -16,9 +16,9 @@ * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -26,9 +26,7 @@ #include "ompi_config.h" #include -#ifdef HAVE_STRING_H #include -#endif #ifdef HAVE_UNISTD_H #include #endif /* HAVE_UNIST_H */ @@ -50,7 +48,7 @@ #include "ompi/mca/pml/base/static-components.h" -int mca_pml_base_progress(void) +int mca_pml_base_progress(void) { return OMPI_SUCCESS; } @@ -87,13 +85,13 @@ mca_pml_base_component_t mca_pml_base_selected_component = {{0}}; opal_pointer_array_t mca_pml_base_pml = {{0}}; char *ompi_pml_base_bsend_allocator_name = NULL; -#if !MCA_ompi_pml_DIRECT_CALL && OPAL_ENABLE_FT_CR == 1 +#if !MCA_ompi_pml_DIRECT_CALL static char *ompi_pml_base_wrapper = NULL; #endif static int mca_pml_base_register(mca_base_register_flag_t flags) { -#if !MCA_ompi_pml_DIRECT_CALL && OPAL_ENABLE_FT_CR == 1 +#if !MCA_ompi_pml_DIRECT_CALL int var_id; #endif @@ -104,7 +102,7 @@ static int mca_pml_base_register(mca_base_register_flag_t flags) MCA_BASE_VAR_SCOPE_READONLY, &ompi_pml_base_bsend_allocator_name); -#if !MCA_ompi_pml_DIRECT_CALL && OPAL_ENABLE_FT_CR == 1 +#if !MCA_ompi_pml_DIRECT_CALL ompi_pml_base_wrapper = NULL; var_id = mca_base_var_register("ompi", "pml", "base", "wrapper", "Use a Wrapper component around the selected PML component", @@ -125,7 +123,7 @@ int mca_pml_base_finalize(void) { return OMPI_SUCCESS; } - + static int mca_pml_base_close(void) { int i, j; @@ -148,7 +146,7 @@ static int mca_pml_base_close(void) OBJ_DESTRUCT(&mca_pml_base_recv_requests); mca_pml.pml_progress = mca_pml_base_progress; - + /* Free all the strings in the array */ j = opal_pointer_array_get_size(&mca_pml_base_pml); for (i = 0; i < j; ++i) { @@ -182,7 +180,7 @@ static int mca_pml_base_open(mca_base_open_flag_t flags) /* Open up all available components */ - if (OPAL_SUCCESS != + if (OPAL_SUCCESS != mca_base_framework_components_open(&ompi_pml_base_framework, flags)) { return OMPI_ERROR; } @@ -196,7 +194,7 @@ static int mca_pml_base_open(mca_base_open_flag_t flags) * Right now our selection of BTLs is completely broken. If we have * multiple PMLs that use BTLs than we will open all BTLs several times, leading to * undefined behaviors. The simplest solution, at least until we - * figure out the correct way to do it, is to force a default PML that + * figure out the correct way to do it, is to force a default PML that * uses BTLs and any other PMLs that do not in the mca_pml_base_pml array. */ @@ -213,23 +211,13 @@ static int mca_pml_base_open(mca_base_open_flag_t flags) if( (NULL == default_pml || NULL == default_pml[0] || 0 == strlen(default_pml[0])) || (default_pml[0][0] == '^') ) { - opal_pointer_array_add(&mca_pml_base_pml, strdup("ob1")); + opal_pointer_array_add(&mca_pml_base_pml, strdup("ob1")); opal_pointer_array_add(&mca_pml_base_pml, strdup("yalla")); opal_pointer_array_add(&mca_pml_base_pml, strdup("cm")); } else { opal_pointer_array_add(&mca_pml_base_pml, strdup(default_pml[0])); } } -#if OPAL_ENABLE_FT_CR == 1 - /* - * Which PML Wrapper component to use, if any - * - NULL or "" = No wrapper - * - ow. select that specific wrapper component - */ - if( NULL != ompi_pml_base_wrapper) { - opal_pointer_array_add(&mca_pml_base_pml, ompi_pml_base_wrapper); - } -#endif #endif diff --git a/ompi/mca/pml/base/pml_base_ft.c b/ompi/mca/pml/base/pml_base_ft.c deleted file mode 100644 index a08400e3ad4..00000000000 --- a/ompi/mca/pml/base/pml_base_ft.c +++ /dev/null @@ -1,79 +0,0 @@ -/* - * Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2006 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#include "ompi_config.h" -#include "ompi/constants.h" -#include "ompi/types.h" - -#include "ompi/mca/pml/pml.h" -#include "ompi/mca/pml/base/base.h" - -#include "ompi/mca/bml/base/base.h" - -int mca_pml_base_ft_event(int state) -{ - int ret; - -#if 0 - opal_output(0, "pml:base: ft_event: Called (%d)!!\n", state); -#endif - - if(OPAL_CRS_CHECKPOINT == state) { - ; - } - else if(OPAL_CRS_CONTINUE == state) { - ; - } - else if(OPAL_CRS_RESTART == state) { - ; - } - else if(OPAL_CRS_TERM == state ) { - ; - } - else { - ; - } - - /* Call the BML - * BML is expected to call ft_event in - * - BTL(s) - * - MPool(s) - */ - if( OMPI_SUCCESS != (ret = mca_bml.bml_ft_event(state))) { - opal_output(0, "pml:base: ft_event: BML ft_event function failed: %d\n", - ret); - } - - if(OPAL_CRS_CHECKPOINT == state) { - ; - } - else if(OPAL_CRS_CONTINUE == state) { - ; - } - else if(OPAL_CRS_RESTART == state) { - ; - } - else if(OPAL_CRS_TERM == state ) { - ; - } - else { - ; - } - - return OMPI_SUCCESS; -} diff --git a/ompi/mca/pml/base/pml_base_recvreq.c b/ompi/mca/pml/base/pml_base_recvreq.c index 6ad6fa3dc23..54d53c63bda 100644 --- a/ompi/mca/pml/base/pml_base_recvreq.c +++ b/ompi/mca/pml/base/pml_base_recvreq.c @@ -5,14 +5,14 @@ * Copyright (c) 2004-2007 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2007 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2007 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ /*%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%*/ @@ -33,7 +33,7 @@ OBJ_CLASS_INSTANCE( mca_pml_base_recv_request_construct, mca_pml_base_recv_request_destruct ); - + static void mca_pml_base_recv_request_construct(mca_pml_base_recv_request_t* request) { diff --git a/ompi/mca/pml/base/pml_base_recvreq.h b/ompi/mca/pml/base/pml_base_recvreq.h index b7866ead440..687b756e03e 100644 --- a/ompi/mca/pml/base/pml_base_recvreq.h +++ b/ompi/mca/pml/base/pml_base_recvreq.h @@ -2,18 +2,18 @@ * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. - * Copyright (c) 2004-2010 The University of Tennessee and The University + * Copyright (c) 2004-2016 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2006 Cisco Systems, Inc. All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ /** @@ -78,7 +78,7 @@ OMPI_DECLSPEC OBJ_CLASS_DECLARATION(mca_pml_base_recv_request_t); (request)->req_base.req_sequence = 0; \ (request)->req_base.req_datatype = datatype; \ /* What about req_type ? */ \ - (request)->req_base.req_pml_complete = OPAL_INT_TO_BOOL(persistent); \ + (request)->req_base.req_pml_complete = false; \ (request)->req_base.req_free_called = false; \ } /** @@ -99,11 +99,11 @@ OMPI_DECLSPEC OBJ_CLASS_DECLARATION(mca_pml_base_recv_request_t); (request)->req_ompi.req_status._ucount = 0; \ (request)->req_ompi.req_status._cancelled = 0; \ \ - (request)->req_ompi.req_complete = false; \ + (request)->req_ompi.req_complete = REQUEST_PENDING; \ (request)->req_ompi.req_state = OMPI_REQUEST_ACTIVE; \ } while (0) -/** +/** * Return a receive request. Handle the release of the communicator and the * attached datatype. * diff --git a/ompi/mca/pml/base/pml_base_request.c b/ompi/mca/pml/base/pml_base_request.c index c1794c3b483..c073957bf5d 100644 --- a/ompi/mca/pml/base/pml_base_request.c +++ b/ompi/mca/pml/base/pml_base_request.c @@ -6,16 +6,16 @@ * Copyright (c) 2004-2007 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2007 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2007 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2015 Los Alamos National Security, LLC. All rights * reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ diff --git a/ompi/mca/pml/base/pml_base_request.h b/ompi/mca/pml/base/pml_base_request.h index 1530723957c..90a86505e07 100644 --- a/ompi/mca/pml/base/pml_base_request.h +++ b/ompi/mca/pml/base/pml_base_request.h @@ -3,20 +3,22 @@ * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. - * Copyright (c) 2004-2007 The University of Tennessee and The University + * Copyright (c) 2004-2016 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2009 Sun Microsystems, Inc. All rights reserved. * Copyright (c) 2015 Los Alamos National Security, LLC. All rights * reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ /** @@ -47,24 +49,24 @@ OMPI_DECLSPEC extern opal_free_list_t mca_pml_base_recv_requests; */ /* * The following include pulls in shared typedefs with debugger plugins. - * For more information on why we do this see the Notice to developers + * For more information on why we do this see the Notice to developers * comment at the top of the ompi_msgq_dll.c file. */ #include "pml_base_request_dbg.h" /** - * Base type for PML P2P requests + * Base type for PML P2P requests */ struct mca_pml_base_request_t { /* START: These fields have to match the definition of the mca_pml_cm_request_t */ ompi_request_t req_ompi; /**< base request */ - volatile bool req_pml_complete; /**< flag indicating if the pt-2-pt layer is done with this request */ + volatile int32_t req_pml_complete; /**< flag indicating if the pt-2-pt layer is done with this request */ + volatile int32_t req_free_called; /**< flag indicating if the user has freed this request */ mca_pml_base_request_type_t req_type; /**< MPI request type - used for test */ struct ompi_communicator_t *req_comm; /**< communicator pointer */ struct ompi_datatype_t *req_datatype; /**< pointer to data type */ - volatile bool req_free_called; /**< flag indicating if the user has freed this request */ opal_convertor_t req_convertor; /**< always need the convertor */ /* END: These field have to match the definition of the mca_pml_cm_request_t */ diff --git a/ompi/mca/pml/base/pml_base_request_dbg.h b/ompi/mca/pml/base/pml_base_request_dbg.h index 312d8e08274..4644a89c6c4 100644 --- a/ompi/mca/pml/base/pml_base_request_dbg.h +++ b/ompi/mca/pml/base/pml_base_request_dbg.h @@ -3,9 +3,9 @@ * Copyright (c) 2009 Sun Microsystems, Inc. All rights reserved. * Copyright (c) 2011 Sandia National Laboratories. All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ #ifndef MCA_PML_BASE_REQUEST_DBG_H @@ -13,7 +13,7 @@ /* * This file contains definitions used by both OMPI and debugger plugins. - * For more information on why we do this see the Notice to developers + * For more information on why we do this see the Notice to developers * comment at the top of the ompi_msgq_dll.c file. */ diff --git a/ompi/mca/pml/base/pml_base_select.c b/ompi/mca/pml/base/pml_base_select.c index 62717bf4740..ee41c8eda82 100644 --- a/ompi/mca/pml/base/pml_base_select.c +++ b/ompi/mca/pml/base/pml_base_select.c @@ -10,7 +10,7 @@ * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. - * Copyright (c) 2012 Los Alamos National Security, LLC. All rights + * Copyright (c) 2012-2015 Los Alamos National Security, LLC. All rights * reserved. * Copyright (c) 2013-2014 Intel, Inc. All rights reserved * Copyright (c) 2015 Cisco Systems, Inc. All rights reserved. @@ -23,9 +23,7 @@ #include "ompi_config.h" -#ifdef HAVE_STRING_H #include -#endif #include "opal/class/opal_list.h" #include "opal/util/output.h" @@ -69,10 +67,6 @@ int mca_pml_base_select(bool enable_progress_threads, opal_list_t opened; opened_component_t *om = NULL; bool found_pml; -#if OPAL_ENABLE_FT_CR == 1 - mca_pml_base_component_t *wrapper_component = NULL; - int wrapper_priority = -1; -#endif /* Traverse the list of available components; call their init functions. */ @@ -136,19 +130,6 @@ int mca_pml_base_select(bool enable_progress_threads, opal_output_verbose( 10, ompi_pml_base_framework.framework_output, "select: init returned priority %d", priority ); -#if OPAL_ENABLE_FT_CR == 1 - /* Determine if this is the wrapper component */ - if( priority <= PML_SELECT_WRAPPER_PRIORITY) { - opal_output_verbose( 10, ompi_pml_base_framework.framework_output, - "pml:select: Wrapper Component: Component %s was determined to be a Wrapper PML with priority %d", - component->pmlm_version.mca_component_name, priority ); - wrapper_priority = priority; - wrapper_component = component; - continue; - } - /* Otherwise determine if this is the best component */ - else -#endif if (priority > best_priority) { best_priority = priority; best_component = component; @@ -202,11 +183,7 @@ int mca_pml_base_select(bool enable_progress_threads, item = opal_list_remove_first(&opened)) { om = (opened_component_t *) item; - if (om->om_component != best_component -#if OPAL_ENABLE_FT_CR == 1 - && om->om_component != wrapper_component -#endif - ) { + if (om->om_component != best_component) { /* Finalize */ if (NULL != om->om_component->pmlm_finalize) { @@ -226,21 +203,6 @@ int mca_pml_base_select(bool enable_progress_threads, } OBJ_DESTRUCT( &opened ); -#if OPAL_ENABLE_FT_CR == 1 - /* Remove the wrapper component from the ompi_pml_base_framework.framework_components list - * so we don't unload it prematurely in the next call - */ - if( NULL != wrapper_component ) { - OPAL_LIST_FOREACH(cli, &ompi_pml_base_framework.framework_components, mca_base_component_list_item_t) { - component = (mca_pml_base_component_t *) cli->cli_component; - - if( component == wrapper_component ) { - opal_list_remove_item(&ompi_pml_base_framework.framework_components, item); - } - } - } -#endif - /* Save the winner */ mca_pml_base_selected_component = *best_component; @@ -257,32 +219,6 @@ int mca_pml_base_select(bool enable_progress_threads, &ompi_pml_base_framework.framework_components, (mca_base_component_t *) best_component); -#if OPAL_ENABLE_FT_CR == 1 - /* If we have a wrapper then initalize it */ - if( NULL != wrapper_component ) { - priority = PML_SELECT_WRAPPER_PRIORITY; - opal_output_verbose( 10, ompi_pml_base_framework.framework_output, - "pml:select: Wrapping: Component %s [%d] is being wrapped by component %s [%d]", - mca_pml_base_selected_component.pmlm_version.mca_component_name, - best_priority, - wrapper_component->pmlm_version.mca_component_name, - wrapper_priority ); - - /* Ask the wrapper commponent to wrap around the currently - * selected component. Indicated by the priority value provided - * this will cause the wrapper to do something different this time around - */ - module = wrapper_component->pmlm_init(&priority, - enable_progress_threads, - enable_mpi_threads); - /* Replace with the wrapper */ - best_component = wrapper_component; - mca_pml_base_selected_component = *best_component; - best_module = module; - mca_pml = *best_module; - } -#endif - /* register the winner's callback */ if( NULL != mca_pml.pml_progress ) { opal_progress_register(mca_pml.pml_progress); @@ -314,7 +250,7 @@ mca_pml_base_pml_selected(const char *name) { int rc; - OPAL_MODEX_SEND(rc, PMIX_SYNC_REQD, PMIX_GLOBAL, &pml_base_component, name, strlen(name) + 1); + OPAL_MODEX_SEND(rc, OPAL_PMIX_GLOBAL, &pml_base_component, name, strlen(name) + 1); return rc; } @@ -345,7 +281,7 @@ mca_pml_base_pml_check_selected(const char *my_pml, /* get the name of the PML module selected by rank=0 */ OPAL_MODEX_RECV(ret, &pml_base_component, - &procs[0]->super, (void**) &remote_pml, &size); + &procs[0]->super.proc_name, (void**) &remote_pml, &size); /* if this key wasn't found, then just assume all is well... */ if (OMPI_SUCCESS != ret) { diff --git a/ompi/mca/pml/base/pml_base_sendreq.c b/ompi/mca/pml/base/pml_base_sendreq.c index d7a4ffcf11e..fabd711129b 100644 --- a/ompi/mca/pml/base/pml_base_sendreq.c +++ b/ompi/mca/pml/base/pml_base_sendreq.c @@ -5,14 +5,14 @@ * Copyright (c) 2004-2007 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2007 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2007 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ #include "ompi_config.h" diff --git a/ompi/mca/pml/base/pml_base_sendreq.h b/ompi/mca/pml/base/pml_base_sendreq.h index 349269fefd9..95d442f043d 100644 --- a/ompi/mca/pml/base/pml_base_sendreq.h +++ b/ompi/mca/pml/base/pml_base_sendreq.h @@ -2,18 +2,20 @@ * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. - * Copyright (c) 2004-2007 The University of Tennessee and The University + * Copyright (c) 2004-2016 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2006 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ /** @@ -31,11 +33,11 @@ BEGIN_C_DECLS /** - * Base type for send requests + * Base type for send requests */ struct mca_pml_base_send_request_t { mca_pml_base_request_t req_base; /**< base request type - common data structure for use by wait/test */ - void *req_addr; /**< pointer to send buffer - may not be application buffer */ + const void *req_addr; /**< pointer to send buffer - may not be application buffer */ size_t req_bytes_packed; /**< packed size of a message given the datatype and count */ mca_pml_base_send_mode_t req_send_mode; /**< type of send */ }; @@ -79,14 +81,14 @@ OMPI_DECLSPEC OBJ_CLASS_DECLARATION( mca_pml_base_send_request_t ); (request)->req_base.req_ompi.req_mpi_object.comm = comm; \ (request)->req_addr = addr; \ (request)->req_send_mode = mode; \ - (request)->req_base.req_addr = addr; \ + (request)->req_base.req_addr = (void *)addr; \ (request)->req_base.req_count = count; \ (request)->req_base.req_datatype = datatype; \ (request)->req_base.req_peer = (int32_t)peer; \ (request)->req_base.req_tag = (int32_t)tag; \ (request)->req_base.req_comm = comm; \ /* (request)->req_base.req_proc is set on request allocation */ \ - (request)->req_base.req_pml_complete = OPAL_INT_TO_BOOL(persistent); \ + (request)->req_base.req_pml_complete = false; \ (request)->req_base.req_free_called = false; \ (request)->req_base.req_ompi.req_status._cancelled = 0; \ (request)->req_bytes_packed = 0; \ @@ -117,7 +119,7 @@ OMPI_DECLSPEC OBJ_CLASS_DECLARATION( mca_pml_base_send_request_t ); #define MCA_PML_BASE_SEND_START( request ) \ do { \ (request)->req_pml_complete = false; \ - (request)->req_ompi.req_complete = false; \ + (request)->req_ompi.req_complete = REQUEST_PENDING; \ (request)->req_ompi.req_state = OMPI_REQUEST_ACTIVE; \ (request)->req_ompi.req_status._cancelled = 0; \ } while (0) diff --git a/ompi/mca/pml/bfo/.opal_ignore b/ompi/mca/pml/bfo/.opal_ignore deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/ompi/mca/pml/bfo/Makefile.am b/ompi/mca/pml/bfo/Makefile.am deleted file mode 100644 index c11a8fd4035..00000000000 --- a/ompi/mca/pml/bfo/Makefile.am +++ /dev/null @@ -1,76 +0,0 @@ -# -# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana -# University Research and Technology -# Corporation. All rights reserved. -# Copyright (c) 2004-2005 The University of Tennessee and The University -# of Tennessee Research Foundation. All rights -# reserved. -# Copyright (c) 2004-2009 High Performance Computing Center Stuttgart, -# University of Stuttgart. All rights reserved. -# Copyright (c) 2004-2005 The Regents of the University of California. -# All rights reserved. -# Copyright (c) 2009-2010 Oracle and/or its affiliates. All rights reserved. -# Copyright (c) 2009-2010 Cisco Systems, Inc. All rights reserved. -# -# $COPYRIGHT$ -# -# Additional copyrights may follow -# -# $HEADER$ -# - -AM_CPPFLAGS = -DPML_BFO=1 - -dist_ompidata_DATA = \ - help-mpi-pml-bfo.txt - -EXTRA_DIST = post_configure.sh - -bfo_sources = \ - pml_bfo.c \ - pml_bfo.h \ - pml_bfo_comm.c \ - pml_bfo_comm.h \ - pml_bfo_component.c \ - pml_bfo_component.h \ - pml_bfo_failover.c \ - pml_bfo_failover.h \ - pml_bfo_hdr.h \ - pml_bfo_iprobe.c \ - pml_bfo_irecv.c \ - pml_bfo_isend.c \ - pml_bfo_progress.c \ - pml_bfo_rdma.c \ - pml_bfo_rdma.h \ - pml_bfo_rdmafrag.c \ - pml_bfo_rdmafrag.h \ - pml_bfo_recvfrag.c \ - pml_bfo_recvfrag.h \ - pml_bfo_recvreq.c \ - pml_bfo_recvreq.h \ - pml_bfo_sendreq.c \ - pml_bfo_sendreq.h \ - pml_bfo_start.c - -# If we have CUDA support requested, build the CUDA file also -if OPAL_cuda_support -bfo_sources += \ - pml_bfo_cuda.c -endif - -if MCA_BUILD_ompi_pml_bfo_DSO -component_noinst = -component_install = mca_pml_bfo.la -else -component_noinst = libmca_pml_bfo.la -component_install = -endif - -mcacomponentdir = $(ompilibdir) -mcacomponent_LTLIBRARIES = $(component_install) -mca_pml_bfo_la_SOURCES = $(bfo_sources) -mca_pml_bfo_la_LDFLAGS = -module -avoid-version - -noinst_LTLIBRARIES = $(component_noinst) -libmca_pml_bfo_la_SOURCES = $(bfo_sources) -libmca_pml_bfo_la_LDFLAGS = -module -avoid-version diff --git a/ompi/mca/pml/bfo/README b/ompi/mca/pml/bfo/README deleted file mode 100644 index 1351f12c653..00000000000 --- a/ompi/mca/pml/bfo/README +++ /dev/null @@ -1,340 +0,0 @@ -Copyright (c) 2010 Oracle and/or its affiliates. All rights reserved. - -BFO DESIGN DOCUMENT -This document describes the use and design of the bfo. In addition, -there is a section at the end explaining why this functionality was -not merged into the ob1 PML. - -1. GENERAL USAGE -First, one has to configure the failover code into the openib BTL so -that bfo will work correctly. To do this: -configure --enable-btl-openib-failover. - -Then, when running one needs to select the bfo PML explicitly. -mpirun --mca pml bfo - -Note that one needs to both configure with --enable-btl-openib-failover -and run with --mca pml bfo to get the failover support. If one of -these two steps is skipped, then the MPI job will just abort in the -case of an error like it normally does with the ob1 PML. - -2. GENERAL FUNCTION -The bfo failover feature requires two or more openib BTLs in use. In -normal operation, it will stripe the communication over the multiple -BTLs. When an error is detected, it will stop using the BTL that -incurred the error and continue the communication over the remaining -BTL. Once a BTL has been mapped out, it cannot be used by the job -again, even if the underlying fabric becomes functional again. Only -new jobs started after the fabric comes back up will use both BTLs. - -The bfo works in conjunction with changes that were made in the openib -BTL. As noted above, those changes need to be configured into the -BTL for everything to work properly. - -The bfo only fails over between openib BTLs. It cannot failover from -an openib BTL to TCP, for example. - -3. GENERAL DESIGN -The bfo (Btl FailOver) PML was designed to work in clusters that have -multiple openib BTLs. It was designed to be lightweight so as to -avoid any adverse effects on latency. To that end, there is no -tracking of fragments or messages in the bfo PML. Rather, it depends -on the underlying BTL to notify it of each fragment that has an error. -The bfo then decides what needs to be done based on the type of -fragment that gets an error. - -No additional sequence numbers were introduced in the bfo. Instead, -it makes use of the sequence numbers that exist in the MATCH, RNDV and -RGET fragment header. In that way, duplicate fragments that have -MATCH information in them can be detected. Other fragments, like PUT -and ACK, are never retransmitted so it does not matter that they do -not have sequence numbers. The FIN header was a special case in that -it was changed to include the MATCH header so that the tag, source, -and context fields could be used to check for duplicate FINs. - -Note that the assumption is that the underlying BTL will always issue -a callback with an error flag when it thinks a fragment has an error. -This means that even after an error is detected on a BTL, the BTL -continues to be checked for any other messages that may also complete -with an error. This is potentially a unique characteristic of the -openib BTL when running over RC connections that allows the BFO to -work properly. - -One scenario that is particularly difficult to handle is the case -where a fragment has an error but the message actually makes it to the -other side. It is because of this that all fragments need to be -checked to make sure they are not a duplicate. This scenario also -complicates some of the rendezvous protocols as the two sides may not -agree where the problem occurred. For example, one can imagine a -sender getting an error on a final FIN message, but the FIN message -actually arrives at the other side. The receiver thinks the -communication is done and moves on. The sender thinks there was a -problem, and that the communication needs to restart. - -It is also important to note that a message cannot signal a successful -completion and *not* make it to the receiver. This would probably cause -the bfo to hang. - -4. ERRORS -Errors are detected in the openib BTL layer and propagated to the PML -layer. Typically, the errors occur while polling the completion -queue, but can happen in other areas as well. When an error occurs, -an additional callback is called so the PML can map out the connection -for future sending. Then the callback associated with the fragment is -called, but with the error field set to OMPI_ERROR. This way, the PML -knows that this fragment may not have made it to the remote side. - -The first callback into the PML is via the mca_pml_bfo_error_handler() -callback and the PML uses this to remove a connection for future -sending. If the error_proc_t field is NULL, then the entire BTL is -removed for any future communication. If the error_proc_t is not -NULL, then the BTL is only removed for the connection associated with -the error_proc_t. - -The second callback is the standard one for a completion event, and -this can trigger various activities in the PML. The regular callback -function is called but the status is set to OMPI_ERROR. The PML layer -detects this and calls some failover specific routines depending on -the type of fragment that got the error. - - -5. RECOVERY OF MATCH FRAGMENTS -Note: For a general description of how the various fragments interact, -see Appendix 1 at the end of this document. - -In the case of a MATCH fragment, the fragment is simply resent. Care -has to be taken with a MATCH fragment that is sent via the standard -interface and one that is sent via the sendi interface. In the -standard send, the send request is still available and is therefore -reset reused to send the MATCH fragment. In the case of the sendi -fragment, the send request is gone, so the fragment is regenerated -from the information contained within the fragment. - -6. RECOVERY OF RNDV or LARGE MESSAGE RDMA -In the case of a large message RDMA transfer or a RNDV transfer where -the message consists of several fragments, the restart is a little -more complicated. This includes fragments like RNDV, PUT, RGET, FRAG, -FIN, and RDMA write and RDMA read completions. In most cases, the -requests associated with these fragments are reset and restarted. - -First, it should be pointed out that a new variable was added to the -send and receive requests. This variable tracks outstanding send -events that have not yet received their completion events. This new -variable is used so that a request is not restarted until all the -outstanding events have completed. If one does not wait for the -outstanding events to complete, then one may restart a request and -then a completion event will happen on the wrong request. - -There is a second variable added to each request and that is one that -shows whether the request is already in an error state. When a request -reaches the state that it has an error flagged on it and the outstanding -completion events are down to zero, it can start the restart dance -as described below. - -7. SPECIAL CASE FOR FIN FRAGMENT -Like the MATCH fragment, the FIN message is also simply resent. Like -the sendi MATCH fragment, there may be no request associated with the -FIN message when it gets an error, so the fragment is recreated from -the information in the fragment. The FIN fragment was modified to -have additional information like what is in a MATCH fragment including -the context, source, and tag. In this way, we can figure out if the -FIN message is a duplicate on the receiving side. - -8. RESTART DANCE -When the bfo determines that there are no outstanding completion events, -a restart dance is initiated. There are four new PML message types that -have been created to participate in the dance. - 1. RNDVRESTARTNOTIFY - 2. RECVERRNOTIFY - 3. RNDVRESTARTACK - 4. RNDVRESTARTNACK - -When the send request is in an error state and the outstanding -completion events is zero, RNDVRESTARTNOTIFY is sent from the sender -to the receiver to let it know that the communication needs to be -restarted. Upon receipt of the RNDVRESTARTNOTIFY, the receiver first -checks to make sure that it is still pointing to a valid receiver -request. If so, it marks the receive request in error. It then -checks to see if there are any outstanding completion events on the -receiver. If there are no outstanding completion events, the receiver -sends the RNDVRESTARTACK. If there are outstanding completion events, -then the RNDVRESTARTACK gets sent later when a completion event occurs -that brings the outstanding event count to zero. - -In the case that the receiver determines that it is no longer looking -at a valid receive request, which means the request is complete, the -receiver responds with a RNDVRESTARTNACK. While rare, this case can -happen for example, when a final FRAG message triggers an error on the -sender, but actually makes it to the receiver. - -The RECVERRNOTIFY fragment is used so the receiver can let the sender -sender know that it had an error. The sender then waits for all of -its completion events, and then sends a RNDVRESTARTNOTIFY. - -All the handling of these new messages is contained in the -pml_bfo_failover files. - -9. BTL SUPPORT -The openib BTL also supplies a lot of support for the bfo PML. First, -fragments can be stored in the BTL during normal operation if -resources become scarce. This means that when an error is detected in -the BTL, it needs to scour its internal queues for fragments that are -destined for the BTL and error them out. The function -error_out_all_pending_frags() takes care of this functionality. And -some of the fragments stored can be coalesced, so care has to be taken -to tease out each message from a coalesced fragment. - -There is also some special code in the BTL to handle some strange -occurrences that were observed in the BTL. First, there are times -where only one half of the connection gets an error. This can result -in a mismatch between what the PML thinks is available to it and can -cause hangs. Therefore, when a BTL detects an error, it sends a -special message down the working BTL connection to tell the remote -side that it needs to be brought down as well. - -Secondly, it has been observed that a message can get stuck in the -eager RDMA connection between two BTLs. In this case, an error is -detected on one side, but the other side never sees the message. -Therefore, a special message is sent to the other side telling it to -move along in the eager RDMA connection. This is all somewhat -confusing. See the code in the btl_openib_failover.c file for the -details. - -10. MERGING -Every effort was made to try and merge the bfo PML into the ob1 PML. -The idea was that any upgrades to the ob1 PML would automatically make -it into the bfo PML and this would enhance maintainability of all the -code. However, it was deemed that this merging would cause more -problems than it would solve. What was attempted and why the -conclusion was made are documented here. - -One can look at the bfo and easily see the differences between it and -ob1. All the bfo specific code is surrounded by #if PML_BFO. In -addition, there are two additional files in the bfo, -pml_bfo_failover.c and pml_bfo_failover.h. - -To merge them, the following was attempted. First, add all the code -in #if regions into the ob1 PML. As of this writing, there are 73 -#ifs that would have to be added into ob1. - -Secondly, remove almost all the pml_bfo files and replace them with -links to the ob1 files. - -Third, create a new header file that did name shifting of all the -functions so that ob1 and bfo could live together. This also included -having to create macros for the names of header files as well. To -help illustrate the name shifting issue, here is what the file might -look like in the bfo directory. - -/* Need macros for the header files as they are different in the - * different PMLs */ -#define PML "bfo" -#define PML_OB1_H "pml_bfo.h" -#define PML_OB1_COMM_H "pml_bfo_comm.h" -#define PML_OB1_COMPONENT_H "pml_bfo_component.h" -#define PML_OB1_HDR_H "pml_bfo_hdr.h" -#define PML_OB1_RDMA_H "pml_bfo_rdma.h" -#define PML_OB1_RDMAFRAG_H "pml_bfo_rdmafrag.h" -#define PML_OB1_RECVFRAG_H "pml_bfo_recvfrag.h" -#define PML_OB1_RECVREQ_H "pml_bfo_recvreq.h" -#define PML_OB1_SENDREQ_H "pml_bfo_sendreq.h" - -/* Name shifting of functions from ob1 to bfo (incomplete list) */ -#define mca_pml_ob1 mca_pml_bfo -#define mca_pml_ob1_t mca_pml_bfo_t -#define mca_pml_ob1_component mca_pml_bfo_component -#define mca_pml_ob1_add_procs mca_pml_bfo_add_procs -#define mca_pml_ob1_del_procs mca_pml_bfo_del_procs -#define mca_pml_ob1_enable mca_pml_bfo_enable -#define mca_pml_ob1_progress mca_pml_bfo_progress -#define mca_pml_ob1_add_comm mca_pml_bfo_add_comm -#define mca_pml_ob1_del_comm mca_pml_bfo_del_comm -#define mca_pml_ob1_irecv_init mca_pml_bfo_irecv_init -#define mca_pml_ob1_irecv mca_pml_bfo_irecv -#define mca_pml_ob1_recv mca_pml_bfo_recv -#define mca_pml_ob1_isend_init mca_pml_bfo_isend_init -#define mca_pml_ob1_isend mca_pml_bfo_isend -#define mca_pml_ob1_send mca_pml_bfo_send -#define mca_pml_ob1_iprobe mca_pml_bfo_iprobe -[...and much more ...] - -The pml_bfo_hdr.h file was not a link because the changes in it were -so extensive. Also the Makefile was kept separate so it could include -the additional failover files as well as add a compile directive that -would force the files to be compiled as bfo instead of ob1. - -After these changes were made, several independent developers reviewed -the results and concluded that making these changes would have too -much of a negative impact on ob1 maintenance. First, the code became -much harder to read with all the additional #ifdefs. Secondly, the -possibility of adding other features, like csum, to ob1 would only -make this issue even worse. Therefore, it was decided to keep the bfo -PML separate from ob1. - -11. UTILITIES -In an ideal world, any bug fixes that are made in the ob1 PML would -also be made in the csum and the bfo PMLs. However, that does not -always happen. Therefore, there are two new utilities added to the -contrib directory. - -check-ob1-revision.pl -check-ob1-pml-diffs.pl - -The first one can be run to see if ob1 has changed from its last known -state. Here is an example. - - machine =>check-ob1-revision.pl -Running svn diff -r24138 ../ompi/mca/pml/ob1 -No new changes detected in ob1. Everything is fine. - -If there are differences, then one needs to review them and potentially -add them to the bfo (and csum also if one feels like it). -After that, bump up the value in the script to the latest value. - -The second script allows one to see the differences between the ob1 -and bfo PML. Here is an example. - - machine =>check-ob1-pml-diffs.pl - -Starting script to check differences between bfo and ob1... -Files Compared: pml_ob1.c and pml_bfo.c -No differences encountered -Files Compared: pml_ob1.h and pml_bfo.h -[...snip...] -Files Compared: pml_ob1_start.c and pml_bfo_start.c -No differences encountered - -There is a lot more in the script that tells how it is used. - - -Appendix 1: SIMPLE OVERVIEW OF COMMUNICATION PROTOCOLS -The drawings below attempt to describe some of the general flow of -fragments in the various protocols that are supported in the PMLs. -The "read" and "write" are actual RDMA actions and do not pertain to -fragments that are sent. As can be inferred, they use FIN messages to -indicate their completion. - - -MATCH PROTOCOL -sender >->->-> MATCH >->->-> receiver - -SEND WITH MULTIPLE FRAGMENTS -sender >->->-> RNDV >->->-> receiver - <-<-<-< ACK <-<-<-< - >->->-> FRAG >->->-> - >->->-> FRAG >->->-> - >->->-> FRAG >->->-> - -RDMA PUT -sender >->->-> RNDV >->->-> receiver - <-<-<-< PUT <-<-<-< - <-<-<-< PUT <-<-<-< - >->->-> write >->->-> - >->->-> FIN >->->-> - >->->-> write >->->-> - >->->-> FIN >->->-> - -RMA GET -sender >->->-> RGET >->->-> receiver - <-<-<-< read <-<-<-< - <-<-<-< FIN <-<-<-< \ No newline at end of file diff --git a/ompi/mca/pml/bfo/configure.m4 b/ompi/mca/pml/bfo/configure.m4 deleted file mode 100644 index dbf0a3bacf2..00000000000 --- a/ompi/mca/pml/bfo/configure.m4 +++ /dev/null @@ -1,27 +0,0 @@ -# -*- shell-script -*- -# -# Copyright (c) 2013 Sandia National Laboratories. All rights reserved. -# $COPYRIGHT$ -# -# Additional copyrights may follow -# -# $HEADER$ -# - -# MCA_ompi_pml_bfo_POST_CONFIG(will_build) -# ---------------------------------------- -# The BFO PML requires a BML endpoint tag to compile, so require it. -# Require in POST_CONFIG instead of CONFIG so that we only require it -# if we're not disabled. -AC_DEFUN([MCA_ompi_pml_bfo_POST_CONFIG], [ - AS_IF([test "$1" = "1"], [OMPI_REQUIRE_ENDPOINT_TAG([BML])]) -])dnl - -# MCA_ompi_pml_bfo_CONFIG(action-if-can-compile, -# [action-if-cant-compile]) -# ------------------------------------------------ -# We can always build, unless we were explicitly disabled. -AC_DEFUN([MCA_ompi_pml_bfo_CONFIG],[ - AC_CONFIG_FILES([ompi/mca/pml/bfo/Makefile]) - [$1] -])dnl diff --git a/ompi/mca/pml/bfo/help-mpi-pml-bfo.txt b/ompi/mca/pml/bfo/help-mpi-pml-bfo.txt deleted file mode 100644 index b3c44ec80ec..00000000000 --- a/ompi/mca/pml/bfo/help-mpi-pml-bfo.txt +++ /dev/null @@ -1,20 +0,0 @@ -# -*- text -*- -# -# Copyright (c) 2009-2010 Oracle and/or its affiliates. All rights reserved. -# $COPYRIGHT$ -# -# Additional copyrights may follow -# -# $HEADER$ -# -[eager_limit_too_small] -The "eager limit" MCA parameter in the %s BTL was set to a value which -is too low for Open MPI to function properly. Please re-run your job -with a higher eager limit value for this BTL; the exact MCA parameter -name and its corresponding minimum value is shown below. - - Local host: %s - BTL name: %s - BTL eager limit value: %d (set via btl_%s_eager_limit) - BTL eager limit minimum: %d - MCA parameter name: btl_%s_eager_limit diff --git a/ompi/mca/pml/bfo/owner.txt b/ompi/mca/pml/bfo/owner.txt deleted file mode 100644 index 9e43c5910a8..00000000000 --- a/ompi/mca/pml/bfo/owner.txt +++ /dev/null @@ -1,7 +0,0 @@ -# -# owner/status file -# owner: institution that is responsible for this package -# status: e.g. active, maintenance, unmaintained -# -owner: ? -status: unmaintained diff --git a/ompi/mca/pml/bfo/pml_bfo.c b/ompi/mca/pml/bfo/pml_bfo.c deleted file mode 100644 index 47c2ad2019f..00000000000 --- a/ompi/mca/pml/bfo/pml_bfo.c +++ /dev/null @@ -1,875 +0,0 @@ -/* -*- Mode: C; c-basic-offset:4 ; -*- */ -/* - * Copyright (c) 2004-2010 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2009 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * Copyright (c) 2008 UT-Battelle, LLC. All rights reserved. - * Copyright (c) 2006-2008 University of Houston. All rights reserved. - * Copyright (c) 2009-2010 Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2011 Sandia National Laboratories. All rights reserved. - * Copyright (c) 2011-2012 Los Alamos National Security, LLC. - * All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#include "ompi_config.h" - -#include -#include - -#include "opal/class/opal_bitmap.h" -#include "opal/util/output.h" -#include "opal/util/show_help.h" -#include "opal/mca/btl/btl.h" -#include "opal/mca/btl/base/base.h" -#include "opal/mca/pmix/pmix.h" - -#include "ompi/mca/pml/pml.h" -#include "ompi/mca/pml/base/base.h" -#include "ompi/mca/pml/base/base.h" -#include "ompi/mca/bml/base/base.h" -#include "ompi/runtime/ompi_cr.h" - -#include "pml_bfo.h" -#include "pml_bfo_component.h" -#include "pml_bfo_comm.h" -#include "pml_bfo_hdr.h" -#include "pml_bfo_recvfrag.h" -#include "pml_bfo_sendreq.h" -#include "pml_bfo_recvreq.h" -#include "pml_bfo_rdmafrag.h" -#if PML_BFO -#include "pml_bfo_failover.h" -#endif /* PML_BFO */ - -mca_pml_bfo_t mca_pml_bfo = { - { - mca_pml_bfo_add_procs, - mca_pml_bfo_del_procs, - mca_pml_bfo_enable, - mca_pml_bfo_progress, - mca_pml_bfo_add_comm, - mca_pml_bfo_del_comm, - mca_pml_bfo_irecv_init, - mca_pml_bfo_irecv, - mca_pml_bfo_recv, - mca_pml_bfo_isend_init, - mca_pml_bfo_isend, - mca_pml_bfo_send, - mca_pml_bfo_iprobe, - mca_pml_bfo_probe, - mca_pml_bfo_start, - mca_pml_bfo_improbe, - mca_pml_bfo_mprobe, - mca_pml_bfo_imrecv, - mca_pml_bfo_mrecv, - mca_pml_bfo_dump, - mca_pml_bfo_ft_event, - 65535, - INT_MAX - } -}; - - -void mca_pml_bfo_error_handler( struct mca_btl_base_module_t* btl, - int32_t flags, ompi_proc_t* errproc, - char* btlinfo ); - -int mca_pml_bfo_enable(bool enable) -{ - if( false == enable ) { - return OMPI_SUCCESS; - } - - OBJ_CONSTRUCT(&mca_pml_bfo.lock, opal_mutex_t); - - /* fragments */ - OBJ_CONSTRUCT(&mca_pml_bfo.rdma_frags, ompi_free_list_t); - ompi_free_list_init_new( &mca_pml_bfo.rdma_frags, - sizeof(mca_pml_bfo_rdma_frag_t), - opal_cache_line_size, - OBJ_CLASS(mca_pml_bfo_rdma_frag_t), - 0,opal_cache_line_size, - mca_pml_bfo.free_list_num, - mca_pml_bfo.free_list_max, - mca_pml_bfo.free_list_inc, - NULL ); - - OBJ_CONSTRUCT(&mca_pml_bfo.recv_frags, ompi_free_list_t); - - ompi_free_list_init_new( &mca_pml_bfo.recv_frags, - sizeof(mca_pml_bfo_recv_frag_t) + mca_pml_bfo.unexpected_limit, - opal_cache_line_size, - OBJ_CLASS(mca_pml_bfo_recv_frag_t), - 0,opal_cache_line_size, - mca_pml_bfo.free_list_num, - mca_pml_bfo.free_list_max, - mca_pml_bfo.free_list_inc, - NULL ); - - OBJ_CONSTRUCT(&mca_pml_bfo.pending_pckts, ompi_free_list_t); - ompi_free_list_init_new( &mca_pml_bfo.pending_pckts, - sizeof(mca_pml_bfo_pckt_pending_t), - opal_cache_line_size, - OBJ_CLASS(mca_pml_bfo_pckt_pending_t), - 0,opal_cache_line_size, - mca_pml_bfo.free_list_num, - mca_pml_bfo.free_list_max, - mca_pml_bfo.free_list_inc, - NULL ); - - - OBJ_CONSTRUCT(&mca_pml_bfo.buffers, ompi_free_list_t); - OBJ_CONSTRUCT(&mca_pml_bfo.send_ranges, ompi_free_list_t); - ompi_free_list_init_new( &mca_pml_bfo.send_ranges, - sizeof(mca_pml_bfo_send_range_t) + - (mca_pml_bfo.max_send_per_range - 1) * sizeof(mca_pml_bfo_com_btl_t), - opal_cache_line_size, - OBJ_CLASS(mca_pml_bfo_send_range_t), - 0,opal_cache_line_size, - mca_pml_bfo.free_list_num, - mca_pml_bfo.free_list_max, - mca_pml_bfo.free_list_inc, - NULL ); - - /* pending operations */ - OBJ_CONSTRUCT(&mca_pml_bfo.send_pending, opal_list_t); - OBJ_CONSTRUCT(&mca_pml_bfo.recv_pending, opal_list_t); - OBJ_CONSTRUCT(&mca_pml_bfo.pckt_pending, opal_list_t); - OBJ_CONSTRUCT(&mca_pml_bfo.rdma_pending, opal_list_t); - /* missing communicator pending list */ - OBJ_CONSTRUCT(&mca_pml_bfo.non_existing_communicator_pending, opal_list_t); - - /** - * If we get here this is the PML who get selected for the run. We - * should get ownership for the send and receive requests list, and - * initialize them with the size of our own requests. - */ - ompi_free_list_init_new( &mca_pml_base_send_requests, - sizeof(mca_pml_bfo_send_request_t) + - (mca_pml_bfo.max_rdma_per_request - 1) * - sizeof(mca_pml_bfo_com_btl_t), - opal_cache_line_size, - OBJ_CLASS(mca_pml_bfo_send_request_t), - 0,opal_cache_line_size, - mca_pml_bfo.free_list_num, - mca_pml_bfo.free_list_max, - mca_pml_bfo.free_list_inc, - NULL ); - - ompi_free_list_init_new( &mca_pml_base_recv_requests, - sizeof(mca_pml_bfo_recv_request_t) + - (mca_pml_bfo.max_rdma_per_request - 1) * - sizeof(mca_pml_bfo_com_btl_t), - opal_cache_line_size, - OBJ_CLASS(mca_pml_bfo_recv_request_t), - 0,opal_cache_line_size, - mca_pml_bfo.free_list_num, - mca_pml_bfo.free_list_max, - mca_pml_bfo.free_list_inc, - NULL ); - - mca_pml_bfo.enabled = true; - return OMPI_SUCCESS; -} - -int mca_pml_bfo_add_comm(ompi_communicator_t* comm) -{ - /* allocate pml specific comm data */ - mca_pml_bfo_comm_t* pml_comm = OBJ_NEW(mca_pml_bfo_comm_t); - opal_list_item_t *item, *next_item; - mca_pml_bfo_recv_frag_t* frag; - mca_pml_bfo_comm_proc_t* pml_proc; - mca_pml_bfo_match_hdr_t* hdr; - int i; - - if (NULL == pml_comm) { - return OMPI_ERR_OUT_OF_RESOURCE; - } - - /* should never happen, but it was, so check */ - if (comm->c_contextid > mca_pml_bfo.super.pml_max_contextid) { - OBJ_RELEASE(pml_comm); - return OMPI_ERR_OUT_OF_RESOURCE; - } - - mca_pml_bfo_comm_init_size(pml_comm, comm->c_remote_group->grp_proc_count); - comm->c_pml_comm = pml_comm; - - for( i = 0; i < comm->c_remote_group->grp_proc_count; i++ ) { - pml_comm->procs[i].ompi_proc = ompi_group_peer_lookup(comm->c_remote_group,i); - OBJ_RETAIN(pml_comm->procs[i].ompi_proc); - } - /* Grab all related messages from the non_existing_communicator pending queue */ - for( item = opal_list_get_first(&mca_pml_bfo.non_existing_communicator_pending); - item != opal_list_get_end(&mca_pml_bfo.non_existing_communicator_pending); - item = next_item ) { - frag = (mca_pml_bfo_recv_frag_t*)item; - next_item = opal_list_get_next(item); - hdr = &frag->hdr.hdr_match; - - /* Is this fragment for the current communicator ? */ - if( frag->hdr.hdr_match.hdr_ctx != comm->c_contextid ) - continue; - - /* As we now know we work on a fragment for this communicator - * we should remove it from the - * non_existing_communicator_pending list. */ - opal_list_remove_item( &mca_pml_bfo.non_existing_communicator_pending, - item ); - - add_fragment_to_unexpected: - - /* We generate the MSG_ARRIVED event as soon as the PML is aware - * of a matching fragment arrival. Independing if it is received - * on the correct order or not. This will allow the tools to - * figure out if the messages are not received in the correct - * order (if multiple network interfaces). - */ - PERUSE_TRACE_MSG_EVENT(PERUSE_COMM_MSG_ARRIVED, comm, - hdr->hdr_src, hdr->hdr_tag, PERUSE_RECV); - - /* There is no matching to be done, and no lock to be held on the communicator as - * we know at this point that the communicator has not yet been returned to the user. - * The only required protection is around the non_existing_communicator_pending queue. - * We just have to push the fragment into the unexpected list of the corresponding - * proc, or into the out-of-order (cant_match) list. - */ - pml_proc = &(pml_comm->procs[hdr->hdr_src]); - - if( ((uint16_t)hdr->hdr_seq) == ((uint16_t)pml_proc->expected_sequence) ) { - /* We're now expecting the next sequence number. */ - pml_proc->expected_sequence++; - opal_list_append( &pml_proc->unexpected_frags, (opal_list_item_t*)frag ); - PERUSE_TRACE_MSG_EVENT(PERUSE_COMM_MSG_INSERT_IN_UNEX_Q, comm, - hdr->hdr_src, hdr->hdr_tag, PERUSE_RECV); - /* And now the ugly part. As some fragments can be inserted in the cant_match list, - * every time we succesfully add a fragment in the unexpected list we have to make - * sure the next one is not in the cant_match. Otherwise, we will endup in a deadlock - * situation as the cant_match is only checked when a new fragment is received from - * the network. - */ - for(frag = (mca_pml_bfo_recv_frag_t *)opal_list_get_first(&pml_proc->frags_cant_match); - frag != (mca_pml_bfo_recv_frag_t *)opal_list_get_end(&pml_proc->frags_cant_match); - frag = (mca_pml_bfo_recv_frag_t *)opal_list_get_next(frag)) { - hdr = &frag->hdr.hdr_match; - /* If the message has the next expected seq from that proc... */ - if(hdr->hdr_seq != pml_proc->expected_sequence) - continue; - - opal_list_remove_item(&pml_proc->frags_cant_match, (opal_list_item_t*)frag); - goto add_fragment_to_unexpected; - } - } else { - opal_list_append( &pml_proc->frags_cant_match, (opal_list_item_t*)frag ); - } - } - return OMPI_SUCCESS; -} - -int mca_pml_bfo_del_comm(ompi_communicator_t* comm) -{ - mca_pml_bfo_comm_t* pml_comm = comm->c_pml_comm; - int i; - - for( i = 0; i < comm->c_remote_group->grp_proc_count; i++ ) { - OBJ_RELEASE(pml_comm->procs[i].ompi_proc); - } - OBJ_RELEASE(comm->c_pml_comm); - comm->c_pml_comm = NULL; - return OMPI_SUCCESS; -} - - -/* - * For each proc setup a datastructure that indicates the BTLs - * that can be used to reach the destination. - * - */ - -int mca_pml_bfo_add_procs(ompi_proc_t** procs, size_t nprocs) -{ - opal_bitmap_t reachable; - int rc; - opal_list_item_t *item; - - if(nprocs == 0) - return OMPI_SUCCESS; - - OBJ_CONSTRUCT(&reachable, opal_bitmap_t); - rc = opal_bitmap_init(&reachable, (int)nprocs); - if(OMPI_SUCCESS != rc) - return rc; - - /* - * JJH: Disable this in FT enabled builds since - * we use a wrapper PML. It will cause this check to - * return failure as all processes will return the wrapper PML - * component in use instead of the wrapped PML component underneath. - */ -#if OPAL_ENABLE_FT_CR == 0 - /* make sure remote procs are using the same PML as us */ - if (OMPI_SUCCESS != (rc = mca_pml_base_pml_check_selected("bfo", - procs, - nprocs))) { - return rc; - } -#endif - - rc = mca_bml.bml_add_procs( nprocs, - procs, - &reachable ); - if(OMPI_SUCCESS != rc) - goto cleanup_and_return; - - /* Check that values supplied by all initialized btls will work - for us. Note that this is the list of all initialized BTLs, - not the ones used for the just added procs. This is a little - overkill and inaccurate, as we may end up not using the BTL in - question and all add_procs calls after the first one are - duplicating an already completed check. But the final - initialization of the PML occurs before the final - initialization of the BTLs, and iterating through the in-use - BTLs requires iterating over the procs, as the BML does not - expose all currently in use btls. */ - - for (item = opal_list_get_first(&mca_btl_base_modules_initialized) ; - item != opal_list_get_end(&mca_btl_base_modules_initialized) ; - item = opal_list_get_next(item)) { - mca_btl_base_selected_module_t *sm = - (mca_btl_base_selected_module_t*) item; - if (sm->btl_module->btl_eager_limit < sizeof(mca_pml_bfo_hdr_t)) { - opal_show_help("help-mpi-pml-bfo.txt", "eager_limit_too_small", - true, - sm->btl_component->btl_version.mca_component_name, - ompi_process_info.nodename, - sm->btl_component->btl_version.mca_component_name, - sm->btl_module->btl_eager_limit, - sm->btl_component->btl_version.mca_component_name, - sizeof(mca_pml_bfo_hdr_t), - sm->btl_component->btl_version.mca_component_name); - rc = OMPI_ERR_BAD_PARAM; - goto cleanup_and_return; - } - } - - - /* TODO: Move these callback registration to another place */ - rc = mca_bml.bml_register( MCA_PML_BFO_HDR_TYPE_MATCH, - mca_pml_bfo_recv_frag_callback_match, - NULL ); - if(OMPI_SUCCESS != rc) - goto cleanup_and_return; - - rc = mca_bml.bml_register( MCA_PML_BFO_HDR_TYPE_RNDV, - mca_pml_bfo_recv_frag_callback_rndv, - NULL ); - if(OMPI_SUCCESS != rc) - goto cleanup_and_return; - - rc = mca_bml.bml_register( MCA_PML_BFO_HDR_TYPE_RGET, - mca_pml_bfo_recv_frag_callback_rget, - NULL ); - if(OMPI_SUCCESS != rc) - goto cleanup_and_return; - - rc = mca_bml.bml_register( MCA_PML_BFO_HDR_TYPE_ACK, - mca_pml_bfo_recv_frag_callback_ack, - NULL ); - if(OMPI_SUCCESS != rc) - goto cleanup_and_return; - - rc = mca_bml.bml_register( MCA_PML_BFO_HDR_TYPE_FRAG, - mca_pml_bfo_recv_frag_callback_frag, - NULL ); - if(OMPI_SUCCESS != rc) - goto cleanup_and_return; - - rc = mca_bml.bml_register( MCA_PML_BFO_HDR_TYPE_PUT, - mca_pml_bfo_recv_frag_callback_put, - NULL ); - if(OMPI_SUCCESS != rc) - goto cleanup_and_return; - - rc = mca_bml.bml_register( MCA_PML_BFO_HDR_TYPE_FIN, - mca_pml_bfo_recv_frag_callback_fin, - NULL ); - if(OMPI_SUCCESS != rc) - goto cleanup_and_return; - -#if PML_BFO - rc = mca_pml_bfo_register_callbacks(); - if(OMPI_SUCCESS != rc) - goto cleanup_and_return; -#endif /* PML_BFO */ - /* register error handlers */ - rc = mca_bml.bml_register_error((mca_btl_base_module_error_cb_fn_t)mca_pml_bfo_error_handler); - if(OMPI_SUCCESS != rc) - goto cleanup_and_return; - - cleanup_and_return: - OBJ_DESTRUCT(&reachable); - - return rc; -} - -/* - * iterate through each proc and notify any PTLs associated - * with the proc that it is/has gone away - */ - -int mca_pml_bfo_del_procs(ompi_proc_t** procs, size_t nprocs) -{ - return mca_bml.bml_del_procs(nprocs, procs); -} - -/* - * diagnostics - */ - -int mca_pml_bfo_dump(struct ompi_communicator_t* comm, int verbose) -{ - struct mca_pml_comm_t* pml_comm = comm->c_pml_comm; - int i; - - /* iterate through all procs on communicator */ - for( i = 0; i < (int)pml_comm->num_procs; i++ ) { - mca_pml_bfo_comm_proc_t* proc = &pml_comm->procs[i]; - mca_bml_base_endpoint_t* ep = (mca_bml_base_endpoint_t*)proc->ompi_proc->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_BML]; - size_t n; - - opal_output(0, "[Rank %d]\n", i); - /* dump all receive queues */ - - /* dump all btls */ - for(n=0; nbtl_eager.arr_size; n++) { - mca_bml_base_btl_t* bml_btl = &ep->btl_eager.bml_btls[n]; - bml_btl->btl->btl_dump(bml_btl->btl, bml_btl->btl_endpoint, verbose); - } - } - return OMPI_SUCCESS; -} - -static void mca_pml_bfo_fin_completion( mca_btl_base_module_t* btl, - struct mca_btl_base_endpoint_t* ep, - struct mca_btl_base_descriptor_t* des, - int status ) -{ - - mca_bml_base_btl_t* bml_btl = (mca_bml_base_btl_t*) des->des_context; - -#if PML_BFO - if( OPAL_UNLIKELY(OMPI_SUCCESS != status) ) { - mca_pml_bfo_repost_fin(des); - return; - } - MCA_PML_BFO_CHECK_EAGER_BML_BTL_ON_FIN_COMPLETION(bml_btl, btl, des); -#endif /* PML_BFO */ - /* check for pending requests */ - MCA_PML_BFO_PROGRESS_PENDING(bml_btl); -} - -/** - * Send an FIN to the peer. If we fail to send this ack (no more available - * fragments or the send failed) this function automatically add the FIN - * to the list of pending FIN, Which guarantee that the FIN will be sent - * later. - */ -int mca_pml_bfo_send_fin( ompi_proc_t* proc, - mca_bml_base_btl_t* bml_btl, - opal_ptr_t hdr_des, - uint8_t order, -#if PML_BFO - uint32_t status, - uint16_t seq, - uint8_t restartseq, - uint16_t ctx, uint32_t src) -#else /* PML_BFO */ - uint32_t status ) -#endif /* PML_BFO */ -{ - mca_btl_base_descriptor_t* fin; - mca_pml_bfo_fin_hdr_t* hdr; - int rc; - - mca_bml_base_alloc(bml_btl, &fin, order, sizeof(mca_pml_bfo_fin_hdr_t), - MCA_BTL_DES_FLAGS_PRIORITY | MCA_BTL_DES_FLAGS_BTL_OWNERSHIP); - - if(NULL == fin) { - MCA_PML_BFO_ADD_FIN_TO_PENDING(proc, hdr_des, bml_btl, order, status); - return OMPI_ERR_OUT_OF_RESOURCE; - } - fin->des_cbfunc = mca_pml_bfo_fin_completion; - fin->des_cbdata = NULL; - - /* fill in header */ - hdr = (mca_pml_bfo_fin_hdr_t*)fin->des_local->seg_addr.pval; - hdr->hdr_common.hdr_flags = 0; - hdr->hdr_common.hdr_type = MCA_PML_BFO_HDR_TYPE_FIN; - hdr->hdr_des = hdr_des; - hdr->hdr_fail = status; -#if PML_BFO - fin->des_cbdata = proc; - hdr->hdr_match.hdr_seq = seq; - hdr->hdr_match.hdr_ctx = ctx; - hdr->hdr_match.hdr_src = src; - hdr->hdr_match.hdr_common.hdr_flags = restartseq; /* use unused hdr_flags field */ -#endif /* PML_BFO */ - - bfo_hdr_hton(hdr, MCA_PML_BFO_HDR_TYPE_FIN, proc); - - /* queue request */ - rc = mca_bml_base_send( bml_btl, - fin, - MCA_PML_BFO_HDR_TYPE_FIN ); - if( OPAL_LIKELY( rc >= 0 ) ) { - if( OPAL_LIKELY( 1 == rc ) ) { - MCA_PML_BFO_PROGRESS_PENDING(bml_btl); - } - return OMPI_SUCCESS; - } - mca_bml_base_free(bml_btl, fin); - MCA_PML_BFO_ADD_FIN_TO_PENDING(proc, hdr_des, bml_btl, order, status); - return OMPI_ERR_OUT_OF_RESOURCE; -} - -void mca_pml_bfo_process_pending_packets(mca_bml_base_btl_t* bml_btl) -{ - mca_pml_bfo_pckt_pending_t *pckt; - int32_t i, rc, s = (int32_t)opal_list_get_size(&mca_pml_bfo.pckt_pending); - - for(i = 0; i < s; i++) { - mca_bml_base_btl_t *send_dst = NULL; - OPAL_THREAD_LOCK(&mca_pml_bfo.lock); - pckt = (mca_pml_bfo_pckt_pending_t*) - opal_list_remove_first(&mca_pml_bfo.pckt_pending); - OPAL_THREAD_UNLOCK(&mca_pml_bfo.lock); - if(NULL == pckt) - break; - if(pckt->bml_btl != NULL && - pckt->bml_btl->btl == bml_btl->btl) { - send_dst = pckt->bml_btl; - } else { - mca_bml_base_endpoint_t* endpoint = - (mca_bml_base_endpoint_t*) pckt->proc->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_BML]; - send_dst = mca_bml_base_btl_array_find( - &endpoint->btl_eager, bml_btl->btl); - } - if(NULL == send_dst) { - OPAL_THREAD_LOCK(&mca_pml_bfo.lock); - opal_list_append(&mca_pml_bfo.pckt_pending, - (opal_list_item_t*)pckt); - OPAL_THREAD_UNLOCK(&mca_pml_bfo.lock); - continue; - } - - switch(pckt->hdr.hdr_common.hdr_type) { - case MCA_PML_BFO_HDR_TYPE_ACK: - rc = mca_pml_bfo_recv_request_ack_send_btl(pckt->proc, - send_dst, - pckt->hdr.hdr_ack.hdr_src_req.lval, - pckt->hdr.hdr_ack.hdr_dst_req.pval, - pckt->hdr.hdr_ack.hdr_send_offset, - pckt->hdr.hdr_common.hdr_flags & MCA_PML_BFO_HDR_FLAGS_NORDMA); - if( OPAL_UNLIKELY(OMPI_ERR_OUT_OF_RESOURCE == rc) ) { - OPAL_THREAD_LOCK(&mca_pml_bfo.lock); - opal_list_append(&mca_pml_bfo.pckt_pending, - (opal_list_item_t*)pckt); - OPAL_THREAD_UNLOCK(&mca_pml_bfo.lock); - return; - } - break; - case MCA_PML_BFO_HDR_TYPE_FIN: - rc = mca_pml_bfo_send_fin(pckt->proc, send_dst, - pckt->hdr.hdr_fin.hdr_des, - pckt->order, -#if PML_BFO - pckt->hdr.hdr_fin.hdr_fail, - pckt->hdr.hdr_fin.hdr_match.hdr_seq, - pckt->hdr.hdr_fin.hdr_match.hdr_common.hdr_flags, - pckt->hdr.hdr_fin.hdr_match.hdr_ctx, - pckt->hdr.hdr_fin.hdr_match.hdr_src); -#else /* PML_BFO */ - pckt->hdr.hdr_fin.hdr_fail); -#endif /* PML_BFO */ - if( OPAL_UNLIKELY(OMPI_ERR_OUT_OF_RESOURCE == rc) ) { - return; - } - break; - default: - opal_output(0, "[%s:%d] wrong header type\n", - __FILE__, __LINE__); - break; - } - /* We're done with this packet, return it back to the free list */ - MCA_PML_BFO_PCKT_PENDING_RETURN(pckt); - } -} - -void mca_pml_bfo_process_pending_rdma(void) -{ - mca_pml_bfo_rdma_frag_t* frag; - int32_t i, rc, s = (int32_t)opal_list_get_size(&mca_pml_bfo.rdma_pending); - - for(i = 0; i < s; i++) { - OPAL_THREAD_LOCK(&mca_pml_bfo.lock); - frag = (mca_pml_bfo_rdma_frag_t*) - opal_list_remove_first(&mca_pml_bfo.rdma_pending); - OPAL_THREAD_UNLOCK(&mca_pml_bfo.lock); - if(NULL == frag) - break; - if(frag->rdma_state == MCA_PML_BFO_RDMA_PUT) { - frag->retries++; - rc = mca_pml_bfo_send_request_put_frag(frag); - } else { - rc = mca_pml_bfo_recv_request_get_frag(frag); - } - if(OMPI_ERR_OUT_OF_RESOURCE == rc) - break; - } -} - - -void mca_pml_bfo_error_handler( - struct mca_btl_base_module_t* btl, int32_t flags, - ompi_proc_t* errproc, char* btlinfo ) { -#if PML_BFO - if (flags & MCA_BTL_ERROR_FLAGS_NONFATAL) { - mca_pml_bfo_failover_error_handler(btl, flags, errproc, btlinfo); - return; - } -#endif /* PML_BFO */ - ompi_rte_abort(-1, NULL); -} - -#if OPAL_ENABLE_FT_CR == 0 -int mca_pml_bfo_ft_event( int state ) { - return OMPI_SUCCESS; -} -#else -int mca_pml_bfo_ft_event( int state ) -{ - static bool first_continue_pass = false; - ompi_proc_t** procs = NULL; - size_t num_procs; - int ret, p; - - if(OPAL_CRS_CHECKPOINT == state) { - if( opal_cr_timing_barrier_enabled ) { - OPAL_CR_SET_TIMER(OPAL_CR_TIMER_CRCPBR1); - opal_pmix.fence(NULL, 0); - } - - OPAL_CR_SET_TIMER(OPAL_CR_TIMER_P2P0); - } - else if(OPAL_CRS_CONTINUE == state) { - first_continue_pass = !first_continue_pass; - - if( !first_continue_pass ) { - if( opal_cr_timing_barrier_enabled ) { - OPAL_CR_SET_TIMER(OPAL_CR_TIMER_COREBR0); - opal_pmix.fence(NULL, 0); - } - OPAL_CR_SET_TIMER(OPAL_CR_TIMER_P2P2); - } - - if (opal_cr_continue_like_restart && !first_continue_pass) { - /* - * Get a list of processes - */ - procs = ompi_proc_all(&num_procs); - if(NULL == procs) { - return OMPI_ERR_OUT_OF_RESOURCE; - } - - /* - * Refresh the proc structure, and publish our proc info in the modex. - * NOTE: Do *not* call ompi_proc_finalize as there are many places in - * the code that point to indv. procs in this strucutre. For our - * needs here we only need to fix up the modex, bml and pml - * references. - */ - if (OMPI_SUCCESS != (ret = ompi_proc_refresh())) { - opal_output(0, - "pml:bfo: ft_event(Restart): proc_refresh Failed %d", - ret); - for(p = 0; p < (int)num_procs; ++p) { - OBJ_RELEASE(procs[p]); - } - free (procs); - return ret; - } - } - } - else if(OPAL_CRS_RESTART_PRE == state ) { - /* Nothing here */ - } - else if(OPAL_CRS_RESTART == state ) { - /* - * Get a list of processes - */ - procs = ompi_proc_all(&num_procs); - if(NULL == procs) { - return OMPI_ERR_OUT_OF_RESOURCE; - } - - /* - * Clean out the modex information since it is invalid now. - * ompi_rte_purge_proc_attrs(); - * This happens at the ORTE level, so doing it again here will cause - * some issues with socket caching. - */ - - - /* - * Refresh the proc structure, and publish our proc info in the modex. - * NOTE: Do *not* call ompi_proc_finalize as there are many places in - * the code that point to indv. procs in this strucutre. For our - * needs here we only need to fix up the modex, bml and pml - * references. - */ - if (OMPI_SUCCESS != (ret = ompi_proc_refresh())) { - opal_output(0, - "pml:bfo: ft_event(Restart): proc_refresh Failed %d", - ret); - for(p = 0; p < (int)num_procs; ++p) { - OBJ_RELEASE(procs[p]); - } - free (procs); - return ret; - } - } - else if(OPAL_CRS_TERM == state ) { - ; - } - else { - ; - } - - /* Call the BML - * BML is expected to call ft_event in - * - BTL(s) - * - MPool(s) - */ - if( OMPI_SUCCESS != (ret = mca_bml.bml_ft_event(state))) { - opal_output(0, "pml:base: ft_event: BML ft_event function failed: %d\n", - ret); - } - - if(OPAL_CRS_CHECKPOINT == state) { - OPAL_CR_SET_TIMER(OPAL_CR_TIMER_P2P1); - - if( opal_cr_timing_barrier_enabled ) { - OPAL_CR_SET_TIMER(OPAL_CR_TIMER_P2PBR0); - /* JJH Cannot barrier here due to progress engine -- ompi_rte_barrier();*/ - } - } - else if(OPAL_CRS_CONTINUE == state) { - if( !first_continue_pass ) { - if( opal_cr_timing_barrier_enabled ) { - OPAL_CR_SET_TIMER(OPAL_CR_TIMER_P2PBR1); - opal_pmix.fence(NULL, 0); - } - OPAL_CR_SET_TIMER(OPAL_CR_TIMER_P2P3); - } - - if (opal_cr_continue_like_restart && !first_continue_pass) { - /* - * Exchange the modex information once again. - * BTLs will have republished their modex information. - */ - opal_pmix.fence(NULL, 0); - - /* - * Startup the PML stack now that the modex is running again - * Add the new procs (BTLs redo modex recv's) - */ - if( OMPI_SUCCESS != (ret = mca_pml_bfo_add_procs(procs, num_procs) ) ) { - opal_output(0, "pml:bfo: ft_event(Restart): Failed in add_procs (%d)", ret); - return ret; - } - - /* Is this barrier necessary ? JJH */ - opal_pmix.fence(NULL, 0); - - if( NULL != procs ) { - for(p = 0; p < (int)num_procs; ++p) { - OBJ_RELEASE(procs[p]); - } - free(procs); - procs = NULL; - } - } - if( !first_continue_pass ) { - if( opal_cr_timing_barrier_enabled ) { - OPAL_CR_SET_TIMER(OPAL_CR_TIMER_P2PBR2); - opal_pmix.fence(NULL, 0); - } - OPAL_CR_SET_TIMER(OPAL_CR_TIMER_CRCP1); - } - } - else if(OPAL_CRS_RESTART_PRE == state ) { - /* Nothing here */ - } - else if(OPAL_CRS_RESTART == state ) { - /* - * Exchange the modex information once again. - * BTLs will have republished their modex information. - */ - opal_pmix.fence(NULL, 0); - - /* - * Startup the PML stack now that the modex is running again - * Add the new procs (BTLs redo modex recv's) - */ - if( OMPI_SUCCESS != (ret = mca_pml_bfo_add_procs(procs, num_procs) ) ) { - opal_output(0, "pml:bfo: ft_event(Restart): Failed in add_procs (%d)", ret); - return ret; - } - - /* Is this barrier necessary ? JJH */ - opal_pmix.fence(NULL, 0); - - if( NULL != procs ) { - for(p = 0; p < (int)num_procs; ++p) { - OBJ_RELEASE(procs[p]); - } - free(procs); - procs = NULL; - } - } - else if(OPAL_CRS_TERM == state ) { - ; - } - else { - ; - } - - return OMPI_SUCCESS; -} -#endif /* OPAL_ENABLE_FT_CR */ - -int mca_pml_bfo_com_btl_comp(const void *v1, const void *v2) -{ - const mca_pml_bfo_com_btl_t *b1 = (const mca_pml_bfo_com_btl_t *) v1; - const mca_pml_bfo_com_btl_t *b2 = (const mca_pml_bfo_com_btl_t *) v2; - - if(b1->bml_btl->btl_weight < b2->bml_btl->btl_weight) - return 1; - if(b1->bml_btl->btl_weight > b2->bml_btl->btl_weight) - return -1; - - return 0; -} - diff --git a/ompi/mca/pml/bfo/pml_bfo.h b/ompi/mca/pml/bfo/pml_bfo.h deleted file mode 100644 index 80d8b30e459..00000000000 --- a/ompi/mca/pml/bfo/pml_bfo.h +++ /dev/null @@ -1,362 +0,0 @@ -/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ -/* - * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2013 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * Copyright (c) 2010 Oracle and/or its affiliates. All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ -/** - * @file - */ - -#ifndef MCA_PML_BFO_H -#define MCA_PML_BFO_H - -#include "ompi_config.h" -#include "opal/class/ompi_free_list.h" -#include "ompi/request/request.h" -#include "ompi/mca/pml/pml.h" -#include "ompi/mca/pml/base/pml_base_request.h" -#include "ompi/mca/pml/base/pml_base_bsend.h" -#include "ompi/mca/pml/base/pml_base_sendreq.h" -#include "ompi/datatype/ompi_datatype.h" -#include "pml_bfo_hdr.h" -#include "ompi/mca/bml/base/base.h" -#include "ompi/proc/proc.h" -#include "opal/mca/allocator/base/base.h" - -BEGIN_C_DECLS - -/** - * BFO PML module - */ - -struct mca_pml_bfo_t { - mca_pml_base_module_t super; - - int priority; - int free_list_num; /* initial size of free list */ - int free_list_max; /* maximum size of free list */ - int free_list_inc; /* number of elements to grow free list */ - unsigned int send_pipeline_depth; - unsigned int recv_pipeline_depth; - unsigned int rdma_put_retries_limit; - int max_rdma_per_request; - int max_send_per_range; - bool leave_pinned; - int leave_pinned_pipeline; - - /* lock queue access */ - opal_mutex_t lock; - - /* free lists */ - ompi_free_list_t rdma_frags; - ompi_free_list_t recv_frags; - ompi_free_list_t pending_pckts; - ompi_free_list_t buffers; - ompi_free_list_t send_ranges; - - /* list of pending operations */ - opal_list_t pckt_pending; - opal_list_t send_pending; - opal_list_t recv_pending; - opal_list_t rdma_pending; - /* List of pending fragments without a matching communicator */ - opal_list_t non_existing_communicator_pending; - bool enabled; - char* allocator_name; - mca_allocator_base_module_t* allocator; - unsigned int unexpected_limit; -}; -typedef struct mca_pml_bfo_t mca_pml_bfo_t; - -extern mca_pml_bfo_t mca_pml_bfo; -extern int mca_pml_bfo_output; - -/* - * PML interface functions. - */ - -extern int mca_pml_bfo_add_comm( - struct ompi_communicator_t* comm -); - -extern int mca_pml_bfo_del_comm( - struct ompi_communicator_t* comm -); - -extern int mca_pml_bfo_add_procs( - struct ompi_proc_t **procs, - size_t nprocs -); - -extern int mca_pml_bfo_del_procs( - struct ompi_proc_t **procs, - size_t nprocs -); - -extern int mca_pml_bfo_enable( bool enable ); - -extern int mca_pml_bfo_progress(void); - -extern int mca_pml_bfo_iprobe( int dst, - int tag, - struct ompi_communicator_t* comm, - int *matched, - ompi_status_public_t* status ); - -extern int mca_pml_bfo_probe( int dst, - int tag, - struct ompi_communicator_t* comm, - ompi_status_public_t* status ); - -extern int mca_pml_bfo_improbe( int dst, - int tag, - struct ompi_communicator_t* comm, - int *matched, - struct ompi_message_t **message, - ompi_status_public_t* status ); - -extern int mca_pml_bfo_mprobe( int dst, - int tag, - struct ompi_communicator_t* comm, - struct ompi_message_t **message, - ompi_status_public_t* status ); - -extern int mca_pml_bfo_isend_init( void *buf, - size_t count, - ompi_datatype_t *datatype, - int dst, - int tag, - mca_pml_base_send_mode_t mode, - struct ompi_communicator_t* comm, - struct ompi_request_t **request ); - -extern int mca_pml_bfo_isend( void *buf, - size_t count, - ompi_datatype_t *datatype, - int dst, - int tag, - mca_pml_base_send_mode_t mode, - struct ompi_communicator_t* comm, - struct ompi_request_t **request ); - -extern int mca_pml_bfo_send( void *buf, - size_t count, - ompi_datatype_t *datatype, - int dst, - int tag, - mca_pml_base_send_mode_t mode, - struct ompi_communicator_t* comm ); - -extern int mca_pml_bfo_irecv_init( void *buf, - size_t count, - ompi_datatype_t *datatype, - int src, - int tag, - struct ompi_communicator_t* comm, - struct ompi_request_t **request ); - -extern int mca_pml_bfo_irecv( void *buf, - size_t count, - ompi_datatype_t *datatype, - int src, - int tag, - struct ompi_communicator_t* comm, - struct ompi_request_t **request ); - -extern int mca_pml_bfo_recv( void *buf, - size_t count, - ompi_datatype_t *datatype, - int src, - int tag, - struct ompi_communicator_t* comm, - ompi_status_public_t* status ); - -extern int mca_pml_bfo_imrecv( void *buf, - size_t count, - ompi_datatype_t *datatype, - struct ompi_message_t **message, - struct ompi_request_t **request ); - -extern int mca_pml_bfo_mrecv( void *buf, - size_t count, - ompi_datatype_t *datatype, - struct ompi_message_t **message, - ompi_status_public_t* status ); - -extern int mca_pml_bfo_dump( struct ompi_communicator_t* comm, - int verbose ); - -extern int mca_pml_bfo_start( size_t count, - ompi_request_t** requests ); - -extern int mca_pml_bfo_ft_event( int state ); - -END_C_DECLS - -struct mca_pml_bfo_pckt_pending_t { - ompi_free_list_item_t super; - ompi_proc_t* proc; - mca_pml_bfo_hdr_t hdr; - struct mca_bml_base_btl_t *bml_btl; - uint8_t order; -}; -typedef struct mca_pml_bfo_pckt_pending_t mca_pml_bfo_pckt_pending_t; -OBJ_CLASS_DECLARATION(mca_pml_bfo_pckt_pending_t); - -#define MCA_PML_BFO_PCKT_PENDING_ALLOC(pckt) \ -do { \ - ompi_free_list_item_t* item; \ - OMPI_FREE_LIST_WAIT_MT(&mca_pml_bfo.pending_pckts, item); \ - pckt = (mca_pml_bfo_pckt_pending_t*)item; \ -} while (0) - -#define MCA_PML_BFO_PCKT_PENDING_RETURN(pckt) \ -do { \ - /* return packet */ \ - OMPI_FREE_LIST_RETURN_MT(&mca_pml_bfo.pending_pckts, \ - (ompi_free_list_item_t*)pckt); \ -} while(0) - -#define MCA_PML_BFO_ADD_FIN_TO_PENDING(P, D, B, O, S) \ - do { \ - mca_pml_bfo_pckt_pending_t *_pckt; \ - \ - MCA_PML_BFO_PCKT_PENDING_ALLOC(_pckt); \ - _pckt->hdr.hdr_common.hdr_type = MCA_PML_BFO_HDR_TYPE_FIN; \ - _pckt->hdr.hdr_fin.hdr_des = (D); \ - _pckt->hdr.hdr_fin.hdr_fail = (S); \ - _pckt->proc = (P); \ - _pckt->bml_btl = (B); \ - _pckt->order = (O); \ - OPAL_THREAD_LOCK(&mca_pml_bfo.lock); \ - opal_list_append(&mca_pml_bfo.pckt_pending, \ - (opal_list_item_t*)_pckt); \ - OPAL_THREAD_UNLOCK(&mca_pml_bfo.lock); \ - } while(0) - - -int mca_pml_bfo_send_fin(ompi_proc_t* proc, mca_bml_base_btl_t* bml_btl, -#if PML_BFO - opal_ptr_t hdr_des, uint8_t order, uint32_t status, - uint16_t seq, uint8_t reqseq, uint16_t ctx, uint32_t src); -#else /* PML_BFO */ - opal_ptr_t hdr_des, uint8_t order, uint32_t status); -#endif /* PML_BFO */ - -/* This function tries to resend FIN/ACK packets from pckt_pending queue. - * Packets are added to the queue when sending of FIN or ACK is failed due to - * resource unavailability. bml_btl passed to the function doesn't represents - * packet's destination, it represents BTL on which resource was freed, so only - * this BTL should be considered for resending packets */ -void mca_pml_bfo_process_pending_packets(mca_bml_base_btl_t* bml_btl); - -/* This function retries failed PUT/GET operations on frag. When RDMA operation - * cannot be accomplished for some reason, frag is put on the rdma_pending list. - * Later the operation is retried. The destination of RDMA operation is stored - * inside the frag structure */ -void mca_pml_bfo_process_pending_rdma(void); - -#define MCA_PML_BFO_PROGRESS_PENDING(bml_btl) \ - do { \ - if(opal_list_get_size(&mca_pml_bfo.pckt_pending)) \ - mca_pml_bfo_process_pending_packets(bml_btl); \ - if(opal_list_get_size(&mca_pml_bfo.recv_pending)) \ - mca_pml_bfo_recv_request_process_pending(); \ - if(opal_list_get_size(&mca_pml_bfo.send_pending)) \ - mca_pml_bfo_send_request_process_pending(bml_btl); \ - if(opal_list_get_size(&mca_pml_bfo.rdma_pending)) \ - mca_pml_bfo_process_pending_rdma(); \ - } while (0) - -/* - * Compute the total number of bytes on supplied descriptor - */ -static inline int mca_pml_bfo_compute_segment_length (size_t seg_size, void *segments, size_t count, - size_t hdrlen) { - size_t i, length; - - for (i = 0, length = -hdrlen ; i < count ; ++i) { - mca_btl_base_segment_t *segment = - (mca_btl_base_segment_t *)((char *) segments + i * seg_size); - - length += segment->seg_len; - } - - return length; -} - -static inline int mca_pml_bfo_compute_segment_length_base (mca_btl_base_segment_t *segments, - size_t count, size_t hdrlen) { - size_t i, length; - - for (i = 0, length = -hdrlen ; i < count ; ++i) { - length += segments[i].seg_len; - } - - return length; -} - -/* represent BTL chosen for sending request */ -struct mca_pml_bfo_com_btl_t { - mca_bml_base_btl_t *bml_btl; - struct mca_mpool_base_registration_t* btl_reg; - size_t length; -}; -typedef struct mca_pml_bfo_com_btl_t mca_pml_bfo_com_btl_t; - -int mca_pml_bfo_com_btl_comp(const void *v1, const void *v2); - -/* Calculate what percentage of a message to send through each BTL according to - * relative weight */ -static inline void -mca_pml_bfo_calc_weighted_length( mca_pml_bfo_com_btl_t *btls, int num_btls, size_t size, - double weight_total ) -{ - int i; - size_t length_left; - - /* shortcut for common case for only one BTL */ - if( OPAL_LIKELY(1 == num_btls) ) { - btls[0].length = size; - return; - } - - /* sort BTLs according of their weights so BTLs with smaller weight will - * not hijack all of the traffic */ - qsort( btls, num_btls, sizeof(mca_pml_bfo_com_btl_t), - mca_pml_bfo_com_btl_comp ); - - for(length_left = size, i = 0; i < num_btls; i++) { - mca_bml_base_btl_t* bml_btl = btls[i].bml_btl; - size_t length = 0; - if( OPAL_UNLIKELY(0 != length_left) ) { - length = (length_left > bml_btl->btl->btl_eager_limit)? - ((size_t)(size * (bml_btl->btl_weight / weight_total))) : - length_left; - - if(length > length_left) - length = length_left; - length_left -= length; - } - btls[i].length = length; - } - - /* account for rounding errors */ - btls[0].length += length_left; -} - -#endif diff --git a/ompi/mca/pml/bfo/pml_bfo_comm.c b/ompi/mca/pml/bfo/pml_bfo_comm.c deleted file mode 100644 index 6e00f6ca2ee..00000000000 --- a/ompi/mca/pml/bfo/pml_bfo_comm.c +++ /dev/null @@ -1,100 +0,0 @@ -/* - * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2006 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * Copyright (c) 2010-2012 Oracle and/or its affiliates. All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#include "ompi_config.h" -#include - -#include "pml_bfo.h" -#include "pml_bfo_comm.h" - - - -static void mca_pml_bfo_comm_proc_construct(mca_pml_bfo_comm_proc_t* proc) -{ - proc->expected_sequence = 1; - proc->ompi_proc = NULL; - proc->send_sequence = 0; - OBJ_CONSTRUCT(&proc->frags_cant_match, opal_list_t); - OBJ_CONSTRUCT(&proc->specific_receives, opal_list_t); - OBJ_CONSTRUCT(&proc->unexpected_frags, opal_list_t); -} - - -static void mca_pml_bfo_comm_proc_destruct(mca_pml_bfo_comm_proc_t* proc) -{ - OBJ_DESTRUCT(&proc->frags_cant_match); - OBJ_DESTRUCT(&proc->specific_receives); - OBJ_DESTRUCT(&proc->unexpected_frags); -} - - -static OBJ_CLASS_INSTANCE( - mca_pml_bfo_comm_proc_t, - opal_object_t, - mca_pml_bfo_comm_proc_construct, - mca_pml_bfo_comm_proc_destruct); - - -static void mca_pml_bfo_comm_construct(mca_pml_bfo_comm_t* comm) -{ - OBJ_CONSTRUCT(&comm->wild_receives, opal_list_t); - OBJ_CONSTRUCT(&comm->matching_lock, opal_mutex_t); - comm->recv_sequence = 0; - comm->procs = NULL; - comm->last_probed = 0; - comm->num_procs = 0; -} - - -static void mca_pml_bfo_comm_destruct(mca_pml_bfo_comm_t* comm) -{ - size_t i; - for(i=0; inum_procs; i++) - OBJ_DESTRUCT((&comm->procs[i])); - if(NULL != comm->procs) - free(comm->procs); - OBJ_DESTRUCT(&comm->wild_receives); - OBJ_DESTRUCT(&comm->matching_lock); -} - - -OBJ_CLASS_INSTANCE( - mca_pml_bfo_comm_t, - opal_object_t, - mca_pml_bfo_comm_construct, - mca_pml_bfo_comm_destruct); - - -int mca_pml_bfo_comm_init_size(mca_pml_bfo_comm_t* comm, size_t size) -{ - size_t i; - - /* send message sequence-number support - sender side */ - comm->procs = (mca_pml_bfo_comm_proc_t*)malloc(sizeof(mca_pml_bfo_comm_proc_t)*size); - if(NULL == comm->procs) { - return OMPI_ERR_OUT_OF_RESOURCE; - } - for(i=0; iprocs+i, mca_pml_bfo_comm_proc_t); - } - comm->num_procs = size; - return OMPI_SUCCESS; -} - - diff --git a/ompi/mca/pml/bfo/pml_bfo_comm.h b/ompi/mca/pml/bfo/pml_bfo_comm.h deleted file mode 100644 index 06273ce9e93..00000000000 --- a/ompi/mca/pml/bfo/pml_bfo_comm.h +++ /dev/null @@ -1,81 +0,0 @@ -/* - * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2006 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * Copyright (c) 2010-2012 Oracle and/or its affiliates. All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ -/** - * @file - */ -#ifndef MCA_PML_BFO_COMM_H -#define MCA_PML_BFO_COMM_H - -#include "opal/threads/mutex.h" -#include "opal/class/opal_list.h" -#include "ompi/proc/proc.h" -BEGIN_C_DECLS - - -struct mca_pml_bfo_comm_proc_t { - opal_object_t super; - uint16_t expected_sequence; /**< send message sequence number - receiver side */ - struct ompi_proc_t* ompi_proc; -#if OPAL_ENABLE_MULTI_THREADS - volatile int32_t send_sequence; /**< send side sequence number */ -#else - int32_t send_sequence; /**< send side sequence number */ -#endif - opal_list_t frags_cant_match; /**< out-of-order fragment queues */ - opal_list_t specific_receives; /**< queues of unmatched specific receives */ - opal_list_t unexpected_frags; /**< unexpected fragment queues */ -}; -typedef struct mca_pml_bfo_comm_proc_t mca_pml_bfo_comm_proc_t; - - -/** - * Cached on ompi_communicator_t to hold queues/state - * used by the PML<->PTL interface for matching logic. - */ -struct mca_pml_comm_t { - opal_object_t super; -#if OPAL_ENABLE_MULTI_THREADS - volatile uint32_t recv_sequence; /**< recv request sequence number - receiver side */ -#else - uint32_t recv_sequence; /**< recv request sequence number - receiver side */ -#endif - opal_mutex_t matching_lock; /**< matching lock */ - opal_list_t wild_receives; /**< queue of unmatched wild (source process not specified) receives */ - mca_pml_bfo_comm_proc_t* procs; - size_t num_procs; - size_t last_probed; -}; -typedef struct mca_pml_comm_t mca_pml_bfo_comm_t; - -OBJ_CLASS_DECLARATION(mca_pml_bfo_comm_t); - - -/** - * Initialize an instance of mca_pml_bfo_comm_t based on the communicator size. - * - * @param comm Instance of mca_pml_bfo_comm_t - * @param size Size of communicator - * @return OMPI_SUCCESS or error status on failure. - */ - -extern int mca_pml_bfo_comm_init_size(mca_pml_bfo_comm_t* comm, size_t size); - -END_C_DECLS -#endif - diff --git a/ompi/mca/pml/bfo/pml_bfo_component.c b/ompi/mca/pml/bfo/pml_bfo_component.c deleted file mode 100644 index 274f5fb0475..00000000000 --- a/ompi/mca/pml/bfo/pml_bfo_component.c +++ /dev/null @@ -1,274 +0,0 @@ -/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ -/* - * Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2009 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * Copyright (c) 2007-2010 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2010 Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2014 Research Organization for Information Science - * and Technology (RIST). All rights reserved. - * Copyright (c) 2015 Los Alamos National Security, LLC. All rights - * reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#include "ompi_config.h" -#include "opal/mca/event/event.h" -#include "mpi.h" -#include "ompi/runtime/params.h" -#include "ompi/mca/pml/pml.h" -#include "ompi/mca/pml/base/pml_base_bsend.h" -#include "pml_bfo.h" -#include "pml_bfo_hdr.h" -#include "pml_bfo_sendreq.h" -#include "pml_bfo_recvreq.h" -#include "pml_bfo_rdmafrag.h" -#include "pml_bfo_recvfrag.h" -#include "ompi/mca/bml/base/base.h" -#include "pml_bfo_component.h" -#include "opal/mca/allocator/base/base.h" -#include "opal/runtime/opal_params.h" - -OBJ_CLASS_INSTANCE( mca_pml_bfo_pckt_pending_t, - ompi_free_list_item_t, - NULL, - NULL ); - -static int mca_pml_bfo_component_register(void); -static int mca_pml_bfo_component_open(void); -static int mca_pml_bfo_component_close(void); -static mca_pml_base_module_t* -mca_pml_bfo_component_init( int* priority, bool enable_progress_threads, - bool enable_mpi_threads ); -static int mca_pml_bfo_component_fini(void); -int mca_pml_bfo_output = 0; -static int mca_pml_bfo_verbose = 0; - -mca_pml_base_component_2_0_0_t mca_pml_bfo_component = { - - /* First, the mca_base_component_t struct containing meta - information about the component itself */ - - .pmlm_version = { - MCA_PML_BASE_VERSION_2_0_0, - - .mca_component_name = "bfo", - MCA_BASE_MAKE_VERSION(component, OMPI_MAJOR_VERSION, OMPI_MINOR_VERSION, - OMPI_RELEASE_VERSION), - .mca_open_component = mca_pml_bfo_component_open, - .mca_close_component = mca_pml_bfo_component_close, - .mca_register_component_params = mca_pml_bfo_component_register, - }, - .pmlm_data = { - /* The component is checkpoint ready */ - MCA_BASE_METADATA_PARAM_CHECKPOINT - }, - - .pmlm_init = mca_pml_bfo_component_init, - .pmlm_finalize = mca_pml_bfo_component_fini, -}; - -void *mca_pml_bfo_seg_alloc( struct mca_mpool_base_module_t* mpool, - size_t* size, - mca_mpool_base_registration_t** registration); - -void mca_pml_bfo_seg_free( struct mca_mpool_base_module_t* mpool, - void* segment ); - -static inline int mca_pml_bfo_param_register_int( - const char* param_name, - int default_value, - int *storage) -{ - *storage = default_value; - (void) mca_base_component_var_register(&mca_pml_bfo_component.pmlm_version, param_name, - NULL, MCA_BASE_VAR_TYPE_INT, NULL, 0, 0, - OPAL_INFO_LVL_9, - MCA_BASE_VAR_SCOPE_READONLY, storage); - - return *storage; -} - -static inline unsigned int mca_pml_bfo_param_register_uint( - const char* param_name, - unsigned int default_value, - unsigned int *storage) -{ - *storage = default_value; - (void) mca_base_component_var_register(&mca_pml_bfo_component.pmlm_version, param_name, - NULL, MCA_BASE_VAR_TYPE_UNSIGNED_INT, NULL, 0, 0, - OPAL_INFO_LVL_9, - MCA_BASE_VAR_SCOPE_READONLY, storage); - - return *storage; -} - -static int mca_pml_bfo_component_register(void) -{ - int default_priority; - -#if PML_BFO - default_priority = 5; -#else /* PML_BFO */ - default_priority = 20; - mca_pml_bfo_param_register_int("priority", 20); -#endif /* PML_BFO */ - - (void) mca_pml_bfo_param_register_int("verbose", 0, &mca_pml_bfo_verbose); - (void) mca_pml_bfo_param_register_int("free_list_num", 4, &mca_pml_bfo.free_list_num); - (void) mca_pml_bfo_param_register_int("free_list_max", -1, &mca_pml_bfo.free_list_max); - (void) mca_pml_bfo_param_register_int("free_list_inc", 64, &mca_pml_bfo.free_list_inc); - (void) mca_pml_bfo_param_register_int("priority", default_priority, &mca_pml_bfo.priority); - (void) mca_pml_bfo_param_register_uint("send_pipeline_depth", 3, &mca_pml_bfo.send_pipeline_depth); - (void) mca_pml_bfo_param_register_uint("recv_pipeline_depth", 4, &mca_pml_bfo.recv_pipeline_depth); - (void) mca_pml_bfo_param_register_uint("rdma_put_retries_limit", 5, &mca_pml_bfo.rdma_put_retries_limit); - (void) mca_pml_bfo_param_register_int("max_rdma_per_request", 4, &mca_pml_bfo.max_rdma_per_request); - (void) mca_pml_bfo_param_register_int("max_send_per_range", 4, &mca_pml_bfo.max_send_per_range); - (void) mca_pml_bfo_param_register_uint("unexpected_limit", 128, &mca_pml_bfo.unexpected_limit); - - mca_pml_bfo.allocator_name = "bucket"; - (void) mca_base_component_var_register(&mca_pml_bfo_component.pmlm_version, - "allocator", - "Name of allocator component for unexpected messages", - MCA_BASE_VAR_TYPE_STRING, NULL, 0, 0, - OPAL_INFO_LVL_9, - MCA_BASE_VAR_SCOPE_READONLY, - &mca_pml_bfo.allocator_name); - - return OMPI_SUCCESS; -} - -static int mca_pml_bfo_component_open(void) -{ - mca_pml_bfo_output = opal_output_open(NULL); - opal_output_set_verbosity(mca_pml_bfo_output, mca_pml_bfo_verbose); - - mca_pml_bfo.enabled = false; - return mca_base_framework_open(&ompi_bml_base_framework, 0); -} - - -static int mca_pml_bfo_component_close(void) -{ - int rc; - - if (OMPI_SUCCESS != (rc = mca_base_framework_close(&ompi_bml_base_framework))) { - return rc; - } - opal_output_close(mca_pml_bfo_output); - - return OMPI_SUCCESS; -} - - -static mca_pml_base_module_t* -mca_pml_bfo_component_init( int* priority, - bool enable_progress_threads, - bool enable_mpi_threads ) -{ - mca_allocator_base_component_t* allocator_component; - - opal_output_verbose( 10, mca_pml_bfo_output, - "in bfo, my priority is %d\n", mca_pml_bfo.priority); - - if((*priority) > mca_pml_bfo.priority) { - *priority = mca_pml_bfo.priority; - return NULL; - } - *priority = mca_pml_bfo.priority; - - allocator_component = mca_allocator_component_lookup( mca_pml_bfo.allocator_name ); - if(NULL == allocator_component) { - opal_output(0, "mca_pml_bfo_component_init: can't find allocator: %s\n", mca_pml_bfo.allocator_name); - return NULL; - } - - mca_pml_bfo.allocator = allocator_component->allocator_init(true, - mca_pml_bfo_seg_alloc, - mca_pml_bfo_seg_free, NULL); - if(NULL == mca_pml_bfo.allocator) { - opal_output(0, "mca_pml_bfo_component_init: unable to initialize allocator\n"); - return NULL; - } - - - if(OMPI_SUCCESS != mca_bml_base_init( enable_progress_threads, - enable_mpi_threads)) { - return NULL; - } - - /* Set this here (vs in component_open()) because - opal_leave_pinned* may have been set after MCA params were - read (e.g., by the openib btl) */ - mca_pml_bfo.leave_pinned = (1 == opal_leave_pinned); - mca_pml_bfo.leave_pinned_pipeline = (int) opal_leave_pinned_pipeline; - - return &mca_pml_bfo.super; -} - -int mca_pml_bfo_component_fini(void) -{ - int rc; - - /* Shutdown BML */ - if(OMPI_SUCCESS != (rc = mca_bml.bml_finalize())) - return rc; - - if(!mca_pml_bfo.enabled) - return OMPI_SUCCESS; /* never selected.. return success.. */ - mca_pml_bfo.enabled = false; /* not anymore */ - - OBJ_DESTRUCT(&mca_pml_bfo.rdma_pending); - OBJ_DESTRUCT(&mca_pml_bfo.pckt_pending); - OBJ_DESTRUCT(&mca_pml_bfo.recv_pending); - OBJ_DESTRUCT(&mca_pml_bfo.send_pending); - OBJ_DESTRUCT(&mca_pml_bfo.non_existing_communicator_pending); - OBJ_DESTRUCT(&mca_pml_bfo.buffers); - OBJ_DESTRUCT(&mca_pml_bfo.pending_pckts); - OBJ_DESTRUCT(&mca_pml_bfo.recv_frags); - OBJ_DESTRUCT(&mca_pml_bfo.rdma_frags); - OBJ_DESTRUCT(&mca_pml_bfo.lock); - - if(OMPI_SUCCESS != (rc = mca_pml_bfo.allocator->alc_finalize(mca_pml_bfo.allocator))) { - return rc; - } - -#if 0 - if (mca_pml_base_send_requests.fl_num_allocated != - mca_pml_base_send_requests.super.opal_list_length) { - opal_output(0, "bfo send requests: %d allocated %d returned\n", - mca_pml_base_send_requests.fl_num_allocated, - mca_pml_base_send_requests.super.opal_list_length); - } - if (mca_pml_base_recv_requests.fl_num_allocated != - mca_pml_base_recv_requests.super.opal_list_length) { - opal_output(0, "bfo recv requests: %d allocated %d returned\n", - mca_pml_base_recv_requests.fl_num_allocated, - mca_pml_base_recv_requests.super.opal_list_length); - } -#endif - - return OMPI_SUCCESS; -} - -void *mca_pml_bfo_seg_alloc( struct mca_mpool_base_module_t* mpool, - size_t* size, - mca_mpool_base_registration_t** registration) { - return malloc(*size); -} - -void mca_pml_bfo_seg_free( struct mca_mpool_base_module_t* mpool, - void* segment ) { - free(segment); -} diff --git a/ompi/mca/pml/bfo/pml_bfo_component.h b/ompi/mca/pml/bfo/pml_bfo_component.h deleted file mode 100644 index 2fd08d018eb..00000000000 --- a/ompi/mca/pml/bfo/pml_bfo_component.h +++ /dev/null @@ -1,33 +0,0 @@ -/* - * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2006 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2010 Oracle and/or its affiliates. All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ -/** - * @file - */ - -#ifndef MCA_PML_BFO_COMPONENT_H -#define MCA_PML_BFO_COMPONENT_H - -BEGIN_C_DECLS - -/* - * PML module functions. - */ -OMPI_MODULE_DECLSPEC extern mca_pml_base_component_2_0_0_t mca_pml_bfo_component; - -END_C_DECLS - -#endif diff --git a/ompi/mca/pml/bfo/pml_bfo_cuda.c b/ompi/mca/pml/bfo/pml_bfo_cuda.c deleted file mode 100644 index 3bb3c9537fc..00000000000 --- a/ompi/mca/pml/bfo/pml_bfo_cuda.c +++ /dev/null @@ -1,162 +0,0 @@ -/* - * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2008 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * Copyright (c) 2008 UT-Battelle, LLC. All rights reserved. - * Copyright (c) 2010-2012 Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2012 NVIDIA Corporation. All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - - -#include "ompi_config.h" -#include "opal/prefetch.h" -#include "opal/mca/btl/btl.h" -#include "opal/mca/mpool/mpool.h" -#include "ompi/constants.h" -#include "ompi/mca/pml/pml.h" -#include "pml_bfo.h" -#include "pml_bfo_hdr.h" -#include "pml_bfo_rdmafrag.h" -#include "pml_bfo_recvreq.h" -#include "pml_bfo_sendreq.h" -#include "ompi/mca/bml/base/base.h" -#include "ompi/memchecker.h" - -size_t mca_pml_bfo_rdma_cuda_btls( - mca_bml_base_endpoint_t* bml_endpoint, - unsigned char* base, - size_t size, - mca_pml_bfo_com_btl_t* rdma_btls); - -int mca_pml_bfo_cuda_need_buffers(void * rreq, - mca_btl_base_module_t* btl); - -/** - * Handle the CUDA buffer. - */ -int mca_pml_bfo_send_request_start_cuda(mca_pml_bfo_send_request_t* sendreq, - mca_bml_base_btl_t* bml_btl, - size_t size) { - int rc; -#if OPAL_CUDA_SUPPORT_41 - sendreq->req_send.req_base.req_convertor.flags &= ~CONVERTOR_CUDA; - if (opal_convertor_need_buffers(&sendreq->req_send.req_base.req_convertor) == false) { - unsigned char *base; - opal_convertor_get_current_pointer( &sendreq->req_send.req_base.req_convertor, (void**)&base ); - /* Set flag back */ - sendreq->req_send.req_base.req_convertor.flags |= CONVERTOR_CUDA; - if( 0 != (sendreq->req_rdma_cnt = (uint32_t)mca_pml_bfo_rdma_cuda_btls( - sendreq->req_endpoint, - base, - sendreq->req_send.req_bytes_packed, - sendreq->req_rdma))) { - rc = mca_pml_bfo_send_request_start_rdma(sendreq, bml_btl, - sendreq->req_send.req_bytes_packed); - if( OPAL_UNLIKELY(OMPI_SUCCESS != rc) ) { - mca_pml_bfo_free_rdma_resources(sendreq); - } - } else { - if (bml_btl->btl_flags & MCA_BTL_FLAGS_CUDA_PUT) { - rc = mca_pml_bfo_send_request_start_rndv(sendreq, bml_btl, size, - MCA_PML_BFO_HDR_FLAGS_CONTIG); - } else { - rc = mca_pml_bfo_send_request_start_rndv(sendreq, bml_btl, size, 0); - } - } - } else { - /* Do not send anything with first rendezvous message as copying GPU - * memory into RNDV message is expensive. */ - sendreq->req_send.req_base.req_convertor.flags |= CONVERTOR_CUDA; - rc = mca_pml_bfo_send_request_start_rndv(sendreq, bml_btl, 0, 0); - } -#else - /* Just do the rendezvous but set initial data to be sent to zero */ - rc = mca_pml_bfo_send_request_start_rndv(sendreq, bml_btl, 0, 0); -#endif /* OPAL_CUDA_SUPPORT_41 */ - return rc; -} - - - -size_t mca_pml_bfo_rdma_cuda_btls( - mca_bml_base_endpoint_t* bml_endpoint, - unsigned char* base, - size_t size, - mca_pml_bfo_com_btl_t* rdma_btls) -{ - int num_btls = mca_bml_base_btl_array_get_size(&bml_endpoint->btl_send); - double weight_total = 0; - int num_btls_used = 0, n; - - /* shortcut when there are no rdma capable btls */ - if(num_btls == 0) { - return 0; - } - - /* check to see if memory is registered */ - for(n = 0; n < num_btls && num_btls_used < mca_pml_bfo.max_rdma_per_request; - n++) { - mca_bml_base_btl_t* bml_btl = - mca_bml_base_btl_array_get_index(&bml_endpoint->btl_send, n); - - if (bml_btl->btl_flags & MCA_BTL_FLAGS_CUDA_GET) { - mca_mpool_base_registration_t* reg = NULL; - mca_mpool_base_module_t *btl_mpool = bml_btl->btl->btl_mpool; - - if( NULL != btl_mpool ) { - /* register the memory */ - btl_mpool->mpool_register(btl_mpool, base, size, 0, ®); - } - - if(NULL == reg) - continue; - - rdma_btls[num_btls_used].bml_btl = bml_btl; - rdma_btls[num_btls_used].btl_reg = reg; - weight_total += bml_btl->btl_weight; - num_btls_used++; - } - } - - /* if we don't use leave_pinned and all BTLs that already have this memory - * registered amount to less then half of available bandwidth - fall back to - * pipeline protocol */ - if(0 == num_btls_used || (!mca_pml_bfo.leave_pinned && weight_total < 0.5)) - return 0; - - mca_pml_bfo_calc_weighted_length(rdma_btls, num_btls_used, size, - weight_total); - - return num_btls_used; -} - -int mca_pml_bfo_cuda_need_buffers(void * rreq, - mca_btl_base_module_t* btl) -{ - mca_pml_bfo_recv_request_t* recvreq = (mca_pml_bfo_recv_request_t*)rreq; - if ((recvreq->req_recv.req_base.req_convertor.flags & CONVERTOR_CUDA) && - (btl->btl_flags & MCA_BTL_FLAGS_CUDA_GET)) { - recvreq->req_recv.req_base.req_convertor.flags &= ~CONVERTOR_CUDA; - if(opal_convertor_need_buffers(&recvreq->req_recv.req_base.req_convertor) == true) { - recvreq->req_recv.req_base.req_convertor.flags |= CONVERTOR_CUDA; - return true; - } else { - recvreq->req_recv.req_base.req_convertor.flags |= CONVERTOR_CUDA; - return false; - } - } - return true; -} - diff --git a/ompi/mca/pml/bfo/pml_bfo_failover.c b/ompi/mca/pml/bfo/pml_bfo_failover.c deleted file mode 100644 index 25e6228954d..00000000000 --- a/ompi/mca/pml/bfo/pml_bfo_failover.c +++ /dev/null @@ -1,2187 +0,0 @@ -/* - * Copyright (c) 2010 Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2011-2012 Los Alamos National Security, LLC. - * All rights reserved. - * Copyright (c) 2013 Intel, Inc. All rights reserved - * Copyright (c) 2014 Research Organization for Information Science - * and Technology (RIST). All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -/** - * @file - * Functions that implement failover capabilities. To utilize the - * failover feature, one needs to configure the library with - * --enable-openib-failover. Then the system that is being used - * must have two or more openib BTLs in use. When an error occurs, - * the BTL will call into this PML to map out the offending BTL and - * continue using the one that is still working. - * Most of the differences between the ob1 PML and the bfo PML are - * contained in this file. - */ - -#include "ompi_config.h" - -#include -#include - -#include "opal/class/opal_bitmap.h" -#include "opal/mca/btl/btl.h" -#include "opal/mca/btl/base/base.h" -#include "ompi/mca/pml/pml.h" -#include "ompi/mca/pml/base/base.h" -#include "ompi/mca/pml/base/base.h" -#include "pml_bfo.h" -#include "pml_bfo_component.h" -#include "pml_bfo_comm.h" -#include "pml_bfo_hdr.h" -#include "pml_bfo_recvfrag.h" -#include "pml_bfo_sendreq.h" -#include "pml_bfo_recvreq.h" -#include "pml_bfo_rdmafrag.h" -#include "pml_bfo_failover.h" -#include "ompi/mca/bml/base/base.h" - -#include "ompi/runtime/ompi_cr.h" - -static void mca_pml_bfo_error_pending_packets(mca_btl_base_module_t* btl, - mca_bml_base_endpoint_t* ep); - -/** - * When running with failover enabled, check the PML sequence numbers - * to see if we have received a duplicate message. This check is done - * for for all MATCH fragments. It is also done for RNDV and RGET - * fragments that do not have the MCA_PML_BFO_HDR_FLAGS_RESTART flag - * set. - * We set the window size to half the total range of sequence numbers. - * We only enter this code when the seq_num is not the expected one. - * A few more notes on the algorithm used here. In normal operation, - * the expected value will either be equal to or less than the - * sequence number of the header. This is because we are using this - * sequence number to detect packets arriving prior to them being - * expected. If we determine that expected is less than header, then - * make sure this is not a rollover case. We do that by adding the - * maxnum to the expected. - * @param proc Pointer to proc from where message came - * @param hdr Pointer to header of message - */ -bool mca_pml_bfo_is_duplicate_msg(mca_pml_bfo_comm_proc_t* proc, - mca_pml_bfo_match_hdr_t *hdr) -{ - const int window = 32768; - const int maxnum = 65536; - mca_pml_bfo_recv_frag_t *frag; - -#if 0 - opal_output(0, "checking dup, exp=%d, act=%d, type=%d, cant_match=%d\n", - (uint16_t)proc->expected_sequence, - hdr->hdr_seq, hdr->hdr_common.hdr_type, - opal_list_get_size(&proc->frags_cant_match)); -#endif - - /* Few cases near end of values where expected may equal 65535 and - * an out of order shows up that may equal something like 1. */ - if (OPAL_UNLIKELY((uint16_t)proc->expected_sequence > hdr->hdr_seq)) { - if (((uint16_t)proc->expected_sequence - hdr->hdr_seq) < window) { - opal_output_verbose(20, mca_pml_bfo_output, - "%s:%d: frag duplicated, exp=%d, act=%d, type=%d\n", - __FILE__, __LINE__, (uint16_t)proc->expected_sequence, - hdr->hdr_seq, hdr->hdr_common.hdr_type); - return true; - } - } else { - /* This is the normal flow through this code. We also need to - * use the maxnum to ensure that we handle cases where the - * expected number has rolled over but then a duplicate message - * shows up that is greater than it. */ - if ((((uint16_t)proc->expected_sequence + maxnum) - hdr->hdr_seq) < window) { - opal_output_verbose(20, mca_pml_bfo_output, - "%s:%d: frag duplicated, exp=%d, act=%d, type=%d\n", - __FILE__, __LINE__, (uint16_t)proc->expected_sequence, - hdr->hdr_seq, hdr->hdr_common.hdr_type); - return true; - } - } - - /* Need to explicitly check against any out of order fragments. Unfortunately, we - * always have to do this since we can get a duplicate out of order fragment. */ - if(OPAL_UNLIKELY(opal_list_get_size(&proc->frags_cant_match) > 0)) { - for(frag = (mca_pml_bfo_recv_frag_t*)opal_list_get_first(&proc->frags_cant_match); - frag != (mca_pml_bfo_recv_frag_t*)opal_list_get_end(&proc->frags_cant_match); - frag = (mca_pml_bfo_recv_frag_t*)opal_list_get_next(frag)) - { - mca_pml_bfo_match_hdr_t* mhdr = &frag->hdr.hdr_match; - - if(mhdr->hdr_seq == hdr->hdr_seq) { - opal_output_verbose(20, mca_pml_bfo_output, - "%s:%d: frag duplicated on frags_cant_match list, seq=%d, type=%d\n", - __FILE__, __LINE__, hdr->hdr_seq, hdr->hdr_common.hdr_type); - return true; - } - } - } - - return false; -} - -/** - * This function checks to see if we have received a duplicate FIN - * message. This is done by first pulling the pointer of the request - * that the FIN message is pointing to from the message. We then - * check the various fields in the request to the fields in the header - * and make sure they match. If they do not, then the request must - * have been recycled already and this is a duplicate FIN message. We - * have to do this check on every FIN message that we receive. - */ -bool mca_pml_bfo_is_duplicate_fin(mca_pml_bfo_hdr_t* hdr, mca_btl_base_descriptor_t* rdma, - mca_btl_base_module_t* btl) -{ - mca_pml_base_request_t* basereq; - /* When running with failover enabled, need to ensure that this - * is not a duplicate FIN message. */ - if (btl->btl_flags & MCA_BTL_FLAGS_FAILOVER_SUPPORT) { - /* The first check is to make sure the descriptor is pointing - * to a valid request. The descriptor may be pointing to NULL - * if it was freed and not reused yet. */ - if (NULL == rdma->des_cbdata) { - opal_output_verbose(20, mca_pml_bfo_output, - "FIN: received: dropping because not pointing to valid descriptor " - "PML=%d CTX=%d SRC=%d RQS=%d", - hdr->hdr_fin.hdr_match.hdr_seq, - hdr->hdr_fin.hdr_match.hdr_ctx, - hdr->hdr_fin.hdr_match.hdr_src, - hdr->hdr_fin.hdr_match.hdr_common.hdr_flags); - return true; - } - - basereq = (mca_pml_base_request_t*)rdma->des_cbdata; - /* Now we know the descriptor is pointing to a non-null request. - * Does it match what we expect? To make sure the receiver request - * matches the FIN message, check the context number, source of the - * message, and MPI sequence number. Then make sure that it also - * matches the internal sequencing number of the requests. We need - * to look at the type of request we are pointing at to figure out - * what fields to access. */ - if (basereq->req_type == MCA_PML_REQUEST_RECV) { - mca_pml_bfo_recv_request_t* recvreq = (mca_pml_bfo_recv_request_t*)basereq; - if ((hdr->hdr_fin.hdr_match.hdr_ctx != - recvreq->req_recv.req_base.req_comm->c_contextid) || - (hdr->hdr_fin.hdr_match.hdr_src != - recvreq->req_recv.req_base.req_ompi.req_status.MPI_SOURCE) || - (hdr->hdr_fin.hdr_match.hdr_seq != (uint16_t)recvreq->req_msgseq)) { - opal_output_verbose(5, mca_pml_bfo_output, - "FIN: received on receiver: dropping because no match " - "PML:exp=%d,act=%d CTX:exp=%d,act=%d SRC:exp=%d,act=%d " - "RQS:exp=%d,act=%d, dst_req=%p", - (uint16_t)recvreq->req_msgseq, hdr->hdr_fin.hdr_match.hdr_seq, - recvreq->req_recv.req_base.req_comm->c_contextid, - hdr->hdr_fin.hdr_match.hdr_ctx, - recvreq->req_recv.req_base.req_ompi.req_status.MPI_SOURCE, - hdr->hdr_fin.hdr_match.hdr_src, - recvreq->req_restartseq, - hdr->hdr_fin.hdr_match.hdr_common.hdr_flags, - (void *)recvreq); - return true; - } - if (hdr->hdr_fin.hdr_match.hdr_common.hdr_flags != recvreq->req_restartseq) { - opal_output_verbose(5, mca_pml_bfo_output, - "FIN: received on receiver: dropping because old " - "PML:exp=%d,act=%d CTX:exp=%d,act=%d SRC:exp=%d,act=%d " - "RQS:exp=%d,act=%d, dst_req=%p", - (uint16_t)recvreq->req_msgseq, hdr->hdr_fin.hdr_match.hdr_seq, - recvreq->req_recv.req_base.req_comm->c_contextid, - hdr->hdr_fin.hdr_match.hdr_ctx, - recvreq->req_recv.req_base.req_ompi.req_status.MPI_SOURCE, - hdr->hdr_fin.hdr_match.hdr_src, - recvreq->req_restartseq, - hdr->hdr_fin.hdr_match.hdr_common.hdr_flags, - (void *)recvreq); - return true; - } - } else if (basereq->req_type == MCA_PML_REQUEST_SEND) { - mca_pml_bfo_send_request_t* sendreq = (mca_pml_bfo_send_request_t*)basereq; - if ((hdr->hdr_fin.hdr_match.hdr_ctx != - sendreq->req_send.req_base.req_comm->c_contextid) || - (hdr->hdr_fin.hdr_match.hdr_src != - sendreq->req_send.req_base.req_peer) || - (hdr->hdr_fin.hdr_match.hdr_seq != - (uint16_t)sendreq->req_send.req_base.req_sequence)) { - uint16_t seq = (uint16_t)sendreq->req_send.req_base.req_sequence; - opal_output_verbose(5, mca_pml_bfo_output, - "FIN: received on sender: dropping because no match " - "PML:exp=%d,act=%d CTX:exp=%d,act=%d SRC:exp=%d,act=%d " - "RQS:exp=%d,act=%d, dst_req=%p", - seq, hdr->hdr_fin.hdr_match.hdr_seq, - sendreq->req_send.req_base.req_comm->c_contextid, - hdr->hdr_fin.hdr_match.hdr_ctx, - sendreq->req_send.req_base.req_peer, - hdr->hdr_fin.hdr_match.hdr_src, - sendreq->req_restartseq, - hdr->hdr_fin.hdr_match.hdr_common.hdr_flags, - (void *)sendreq); - return true; - } - if (hdr->hdr_fin.hdr_match.hdr_common.hdr_flags != sendreq->req_restartseq) { - uint16_t seq = (uint16_t)sendreq->req_send.req_base.req_sequence; - opal_output_verbose(5, mca_pml_bfo_output, - "FIN: received on sender: dropping because old " - "PML:exp=%d,act=%d CTX:exp=%d,act=%d SRC:exp=%d,act=%d " - "RQS:exp=%d,act=%d, dst_req=%p", - seq, hdr->hdr_fin.hdr_match.hdr_seq, - sendreq->req_send.req_base.req_comm->c_contextid, - hdr->hdr_fin.hdr_match.hdr_ctx, - sendreq->req_send.req_base.req_peer, - hdr->hdr_fin.hdr_match.hdr_src, - sendreq->req_restartseq, - hdr->hdr_fin.hdr_match.hdr_common.hdr_flags, - (void *)sendreq); - return true; - } - } else { - /* We can get here if the descriptor has been reused, but - * not as an RDMA descriptor. In that case, the callback - * function has been set to something else. Clearly the - * descriptor we are interested is gone, so just drop the - * FIN message. */ - opal_output_verbose(5, mca_pml_bfo_output, - "FIN: received: dropping because descriptor has been reused " - "PML=%d CTX=%d SRC=%d RQS=%d rdma->des_flags=%d", - hdr->hdr_fin.hdr_match.hdr_seq, hdr->hdr_fin.hdr_match.hdr_ctx, - hdr->hdr_fin.hdr_match.hdr_src, hdr->hdr_fin.hdr_match.hdr_common.hdr_flags, - rdma->des_flags); - return true; - } - } - return false; -} - -/** - * Repost a FIN message if we get an error on the completion event. - */ -void mca_pml_bfo_repost_fin(struct mca_btl_base_descriptor_t* des) { - /* In the error case, we will repost the FIN message. I had - * considered restarting the request. The problem is that the - * request may be already complete when we detect that a FIN - * message got an error on its completion event. For example, with - * the PUT protocol, if the RDMA writes succeed and all the data - * has been sent, then the request is marked as complete and can be - * freed. Therefore, an error on the FIN message has no request to - * refer back to. So, we will just repost it. However, we are also - * faced with the case where the FIN message has an error but it - * actually makes it to the other side. In that case we are now - * sending a FIN message to a non-existent request on the receiver - * side. To handle that, we have added the match information to - * the FIN message. That way, we can check on the receiving side - * to ensure that it is pointing to a valid request. */ - mca_pml_bfo_fin_hdr_t* hdr; - mca_bml_base_endpoint_t* bml_endpoint; - ompi_proc_t *proc; - mca_bml_base_btl_t* bml_btl; - - proc = (ompi_proc_t*) des->des_cbdata; - bml_endpoint = (mca_bml_base_endpoint_t*) proc->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_BML]; - hdr = (mca_pml_bfo_fin_hdr_t*)des->des_local->seg_addr.pval; - - opal_output_verbose(20, mca_pml_bfo_output, - "REPOST: BFO_HDR_TYPE_FIN: seq=%d,myrank=%d,peer=%d,hdr->hdr_fail=%d,src=%d", - hdr->hdr_match.hdr_seq, OMPI_PROC_MY_NAME->vpid, OMPI_CAST_RTE_NAME(&proc->super.proc_name)->vpid, - hdr->hdr_fail, hdr->hdr_match.hdr_src); - - bml_btl = mca_bml_base_btl_array_get_next(&bml_endpoint->btl_eager); - - /* Reconstruct the fin for sending on the other BTL */ - mca_pml_bfo_send_fin(proc, bml_btl, - hdr->hdr_des, MCA_BTL_NO_ORDER, - hdr->hdr_fail, hdr->hdr_match.hdr_seq, - hdr->hdr_match.hdr_common.hdr_flags, - hdr->hdr_match.hdr_ctx, hdr->hdr_match.hdr_src); - return; -} - -/** - * This function is called when a RNDV or RGET is received with the - * FLAGS_RESTART flag set. This means this message already has a - * receive request already associated with it. - */ -mca_pml_bfo_recv_request_t* mca_pml_bfo_get_request(mca_pml_bfo_match_hdr_t *hdr) { - mca_pml_bfo_recv_request_t *match = NULL; - mca_pml_bfo_rendezvous_hdr_t * rhdr = (mca_pml_bfo_rendezvous_hdr_t *) hdr; - match = (mca_pml_bfo_recv_request_t *) rhdr->hdr_dst_req.pval; - - /* Check to see if we have received a duplicate RNDV (or RGET). This can - * occur because we got an error when we reposted the RNDV. Therefore, - * we make sure that the request has not completed from underneath us - * and been recycled. Secondly, make sure we are not getting it a - * second time for the same request. */ - if ((rhdr->hdr_match.hdr_ctx != match->req_recv.req_base.req_comm->c_contextid) || - (rhdr->hdr_match.hdr_src != match->req_recv.req_base.req_ompi.req_status.MPI_SOURCE) || - (rhdr->hdr_match.hdr_seq != (uint16_t)match->req_msgseq) || - (rhdr->hdr_restartseq == match->req_restartseq)) { - if (hdr->hdr_common.hdr_type == MCA_PML_BFO_HDR_TYPE_RNDV) { - opal_output_verbose(20, mca_pml_bfo_output, - "RNDV: received with RESTART flag: duplicate, dropping " - "PML:exp=%d,act=%d RQS=%d, src_req=%p, dst_req=%p, peer=%d", - match->req_msgseq, rhdr->hdr_match.hdr_seq, match->req_restartseq, - match->remote_req_send.pval, (void *)match, - match->req_recv.req_base.req_ompi.req_status.MPI_SOURCE); - } else { - opal_output_verbose(20, mca_pml_bfo_output, - "RGET: received with RESTART flag: duplicate, dropping " - "PML:exp=%d,act=%d RQS=%d, src_req=%p, dst_req=%p, peer=%d", - match->req_msgseq, rhdr->hdr_match.hdr_seq, match->req_restartseq, - match->remote_req_send.pval, (void *)match, - match->req_recv.req_base.req_ompi.req_status.MPI_SOURCE); - } - return NULL; - } - - mca_pml_bfo_recv_request_reset(match); - if (hdr->hdr_common.hdr_type == MCA_PML_BFO_HDR_TYPE_RNDV) { - opal_output_verbose(30, mca_pml_bfo_output, - "RNDV: received with RESTART flag: restarting recv, " - "PML:exp=%d,act=%d RQS(new)=%d, src_req=%p, dst_req=%p, peer=%d", - match->req_msgseq, rhdr->hdr_match.hdr_seq, match->req_restartseq, - match->remote_req_send.pval, (void *)match, - match->req_recv.req_base.req_ompi.req_status.MPI_SOURCE); - } else { - opal_output_verbose(30, mca_pml_bfo_output, - "RGET: received with RESTART flag: restarting recv, " - "PML:exp=%d,act=%d RQS(new)=%d, src_req=%p, dst_req=%p, peer=%d", - match->req_msgseq, rhdr->hdr_match.hdr_seq, match->req_restartseq, - match->remote_req_send.pval, (void *)match, - match->req_recv.req_base.req_ompi.req_status.MPI_SOURCE); - } - return match; -} - -/** - * Callback for when a RNDVRESTARTNOTIFY message is received. A - * RNDVRESTARTNOTIFY message is sent from the sender to the receiver - * telling the receiver that the message is going to be started over. - * The receiver first makes sure that the request being pointed to is - * still valid. If it is not, that means the receiver must have - * completed the request and therefore we need to send a NACK back to - * the sender. The receiver then makes sure this is not a duplicate - * message. If it is a duplicate, it will just drop it. Otherwise, - * it will then send a RNDVRESTARTACK message if there are no - * outstanding events on the receiver. Otherwise, it will just change - * the state of the request and wait for another event to send the - * RNDVRESTARTACK to the sender. - */ -void mca_pml_bfo_recv_frag_callback_rndvrestartnotify(mca_btl_base_module_t* btl, - mca_btl_base_tag_t tag, - mca_btl_base_descriptor_t* des, - void* cbdata ) { - mca_btl_base_segment_t* segments = des->des_local; - mca_pml_bfo_hdr_t* hdr = (mca_pml_bfo_hdr_t*)segments->seg_addr.pval; - mca_pml_bfo_recv_request_t* recvreq; - ompi_proc_t* ompi_proc; - ompi_process_name_t orte_proc; - - bfo_hdr_ntoh(hdr, MCA_PML_BFO_HDR_TYPE_RNDVRESTARTNOTIFY); - recvreq = (mca_pml_bfo_recv_request_t*)hdr->hdr_restart.hdr_dst_req.pval; - - /* Check to see if the receive request is still valid. If the - * request is recycled, that means the original request must have - * completed and we therefore need to send a NACK back to the sender. - * Note that when the request is gone, we need to pull some information - * off the header so that we can figure out where to send the NACK - * message back to. */ - if ((hdr->hdr_match.hdr_ctx != recvreq->req_recv.req_base.req_comm->c_contextid) || - (hdr->hdr_match.hdr_src != recvreq->req_recv.req_base.req_ompi.req_status.MPI_SOURCE) || - (hdr->hdr_match.hdr_seq != (uint16_t)recvreq->req_msgseq)) { - orte_proc.jobid = hdr->hdr_restart.hdr_jobid; - orte_proc.vpid = hdr->hdr_restart.hdr_vpid; - - ompi_proc = ompi_proc_find(&orte_proc); - opal_output_verbose(20, mca_pml_bfo_output, - "RNDVRESTARTNOTIFY: received: does not match request, sending NACK back " - "PML:req=%d,hdr=%d CTX:req=%d,hdr=%d SRC:req=%d,hdr=%d " - "RQS:req=%d,hdr=%d src_req=%p, dst_req=%p, peer=%d, hdr->hdr_jobid=%d, " - "hdr->hdr_vpid=%d, proc_hostname=%s", - (uint16_t)recvreq->req_msgseq, hdr->hdr_match.hdr_seq, - recvreq->req_recv.req_base.req_comm->c_contextid, hdr->hdr_match.hdr_ctx, - recvreq->req_recv.req_base.req_ompi.req_status.MPI_SOURCE, - hdr->hdr_match.hdr_src, recvreq->req_restartseq, - hdr->hdr_restart.hdr_restartseq, - recvreq->remote_req_send.pval, (void *)recvreq, - recvreq->req_recv.req_base.req_ompi.req_status.MPI_SOURCE, - hdr->hdr_restart.hdr_jobid, hdr->hdr_restart.hdr_vpid, - (NULL == ompi_proc->super.proc_hostname) ? "unknown" : ompi_proc->super.proc_hostname); - mca_pml_bfo_recv_request_rndvrestartnack(des, ompi_proc, false); - return; - } - - /* We know that we have the correct receive request. Make sure this is not - * a duplicate RNDVRESTARTNOTIFY on this request. */ - if (hdr->hdr_restart.hdr_restartseq == recvreq->req_restartseq) { - opal_output_verbose(20, mca_pml_bfo_output, - "RNDVRESTARTNOTIFY: received duplicate: dropping RNDVRESTARTNOTIFY " - "message PML:req=%d,hdr=%d CTX:req=%d,hdr=%d SRC:req=%d,hdr=%d " - "RQS:req=%d,hdr=%d src_req=%p, dst_req=%p, peer=%d", - (uint16_t)recvreq->req_msgseq, hdr->hdr_match.hdr_seq, - recvreq->req_recv.req_base.req_comm->c_contextid, hdr->hdr_match.hdr_ctx, - recvreq->req_recv.req_base.req_ompi.req_status.MPI_SOURCE, - hdr->hdr_match.hdr_src, recvreq->req_restartseq, - hdr->hdr_restart.hdr_restartseq, - recvreq->remote_req_send.pval, (void *)recvreq, - recvreq->req_recv.req_base.req_ompi.req_status.MPI_SOURCE); - return; - } - - /* Increment restart number. */ - recvreq->req_restartseq++; - recvreq->req_errstate |= RECVREQ_RNDVRESTART_RECVED; - opal_output_verbose(30, mca_pml_bfo_output, - "RNDVRESTARTNOTIFY: received: outstanding receive events=%d, " - "PML=%d, RQS=%d, src_req=%p, dst_req=%p, peer=%d", - recvreq->req_events, recvreq->req_msgseq, recvreq->req_restartseq, - recvreq->remote_req_send.pval, (void *)recvreq, - recvreq->req_recv.req_base.req_ompi.req_status.MPI_SOURCE); - - if (0 == recvreq->req_events) { - mca_pml_bfo_recv_request_rndvrestartack(recvreq, MCA_PML_BFO_HDR_TYPE_RNDVRESTARTNOTIFY, - OMPI_SUCCESS, btl); - } - - return; -} - -/** - * Callback for when a RNDVRESTARTACK message is received. This - * message is sent from the receiver to the sender to acknowledge - * the receipt of the RNDVRESTARTNOTIFY message. At this point, - * the sender can reset the send request and restart the message. - */ -void mca_pml_bfo_recv_frag_callback_rndvrestartack(mca_btl_base_module_t* btl, - mca_btl_base_tag_t tag, - mca_btl_base_descriptor_t* des, - void* cbdata ) { - mca_btl_base_segment_t* segments = des->des_local; - mca_pml_bfo_hdr_t* hdr = (mca_pml_bfo_hdr_t*)segments->seg_addr.pval; - mca_pml_bfo_send_request_t* sendreq; - - bfo_hdr_ntoh(hdr, MCA_PML_BFO_HDR_TYPE_RNDVRESTARTACK); - sendreq = (mca_pml_bfo_send_request_t*)hdr->hdr_restart.hdr_src_req.pval; - - /* Check to see if we have received a duplicate message. The - * first three comparisons make sure that we are not looking at a - * recycled request. The last check makes sure we are not getting - * a duplicate message for this specific request. All of this is - * needed because the receiver might get an error and repost the - * RNDVRESTARTACK message, but the RNDVRESTARTACK was actually received. */ - if ((hdr->hdr_match.hdr_ctx != sendreq->req_send.req_base.req_comm->c_contextid) || - (hdr->hdr_match.hdr_src != sendreq->req_send.req_base.req_peer) || - (hdr->hdr_match.hdr_seq != (uint16_t)sendreq->req_send.req_base.req_sequence) || - (hdr->hdr_restart.hdr_restartseq != sendreq->req_restartseq)) { - opal_output_verbose(20, mca_pml_bfo_output, - "RNDVRESTARTACK: received: does not match request, dropping " - "PML:exp=%d,act=%d CTX:exp=%d,act=%d SRC:exp=%d,act=%d EXP:exp=%d,act=%d " - "src_req=%p, dst_req=%p, peer=%d", - (uint16_t)sendreq->req_send.req_base.req_sequence, hdr->hdr_match.hdr_seq, - sendreq->req_send.req_base.req_comm->c_contextid, hdr->hdr_match.hdr_ctx, - sendreq->req_send.req_base.req_peer, hdr->hdr_match.hdr_src, - sendreq->req_restartseq, hdr->hdr_restart.hdr_restartseq, - (void *)sendreq, sendreq->req_recv.pval, - sendreq->req_send.req_base.req_peer); - return; - } - - sendreq->req_restart++; - if (2 == sendreq->req_restart) { - opal_output_verbose(30, mca_pml_bfo_output, - "RNDVRESTARTACK: received: restarting send " - "PML=%d, RQS=%d, src_req=%p, dst_req=%p, peer=%d", - hdr->hdr_match.hdr_seq, hdr->hdr_restart.hdr_restartseq, - (void *)sendreq, sendreq->req_recv.pval, - sendreq->req_send.req_base.req_peer); - mca_pml_bfo_send_request_restart(sendreq, false, 0); - } else { - opal_output_verbose(30, mca_pml_bfo_output, - "RNDVRESTARTACK received: waiting for RNDVRESTARTNOTIFY completion " - "PML=%d, RQS=%d, src_req=%p, dst_req=%p, peer=%d", - hdr->hdr_match.hdr_seq, hdr->hdr_restart.hdr_restartseq, - (void *)sendreq, sendreq->req_recv.pval, - sendreq->req_send.req_base.req_peer); - } - return; -} - - -/** - * Callback for when a RECVERRNOTIFY message is received. This message - * is sent from the receiver to the sender and tells the sender that - * the receiver has seen an error. This will trigger the sender - * to start the request restart sequence. - */ -void mca_pml_bfo_recv_frag_callback_recverrnotify(mca_btl_base_module_t* btl, - mca_btl_base_tag_t tag, - mca_btl_base_descriptor_t* des, - void* cbdata ) { - mca_btl_base_segment_t* segments = des->des_local; - mca_pml_bfo_hdr_t* hdr = (mca_pml_bfo_hdr_t*)segments->seg_addr.pval; - mca_pml_bfo_send_request_t* sendreq; - - bfo_hdr_ntoh(hdr, MCA_PML_BFO_HDR_TYPE_RECVERRNOTIFY); - sendreq = (mca_pml_bfo_send_request_t*)hdr->hdr_restart.hdr_src_req.pval; - - /* First make sure that this message is pointing to a valid request. - * This can be determined if the communicator context, the source of - * the message, and the MPI sequence number all match. */ - if ((hdr->hdr_match.hdr_ctx != sendreq->req_send.req_base.req_comm->c_contextid) || - (hdr->hdr_match.hdr_src != sendreq->req_send.req_base.req_peer) || - (hdr->hdr_match.hdr_seq != (uint16_t)sendreq->req_send.req_base.req_sequence)) { - opal_output_verbose(20, mca_pml_bfo_output, - "RECVERRNOTIFY: received: does not match request, dropping " - "PML:exp=%d,act=%d CTX:exp=%d,act=%d SRC:exp=%d,act=%d RQS:exp=%d,act=%d " - "src_req=%p, dst_req=%p, peer=%d", - (uint16_t)sendreq->req_send.req_base.req_sequence, hdr->hdr_match.hdr_seq, - sendreq->req_send.req_base.req_comm->c_contextid, hdr->hdr_match.hdr_ctx, - sendreq->req_send.req_base.req_peer, hdr->hdr_match.hdr_src, - sendreq->req_restartseq, hdr->hdr_restart.hdr_restartseq, - (void *)sendreq, sendreq->req_recv.pval, - sendreq->req_send.req_base.req_peer); - return; - } - - /* If a good ACK was never received, then the first ACK received - * might be a RECVERRNOTIFY message. In that case, the sendreq does not - * have a valid req_recv pointer in it. Therefore, check for that - * case and update the field in the sendreq if necessary. */ - if (NULL == sendreq->req_recv.pval) { - sendreq->req_recv = hdr->hdr_restart.hdr_dst_req; - } - - /* Now check to see a restart needs to be issued. The request - * sequence number in the header is compared against the current - * request sequence number in the send request. If the header - * sequence number is greater than or equal to the send request - * number, then a rndvrestartnotify is issued. There are some cases - * where a few extra rndvrestartnotifys are issued. That is OK as - * it will all work itself out. The idea is to prevent many - * restarts unnecessarily. This still allows multiple restarts to - * happen. It could be that sometime later another error occurs - * which initiates a restart. That is OK as it will have the new - * sequence number and all is well. */ - if (hdr->hdr_restart.hdr_restartseq >= sendreq->req_restartseq) { - assert(sendreq->req_send.req_base.req_ompi.req_state == OMPI_REQUEST_ACTIVE); - sendreq->req_error++; - opal_output_verbose(30, mca_pml_bfo_output, - "RECVERRNOTIFY: received: sendreq has error, outstanding events=%d, " - "PML=%d, RQS=%d, src_req=%p, dst_req=%p, peer=%d", - sendreq->req_events, (uint16_t)sendreq->req_send.req_base.req_sequence, - sendreq->req_restartseq, (void *)sendreq, - sendreq->req_recv.pval, - sendreq->req_send.req_base.req_peer); - - if (0 == sendreq->req_events) { - mca_pml_bfo_send_request_rndvrestartnotify(sendreq, false, - MCA_PML_BFO_HDR_TYPE_RECVERRNOTIFY, - OMPI_SUCCESS, btl); - } - } else { - opal_output_verbose(30, mca_pml_bfo_output, - "RECVERRNOTIFY: received: error has already been noted, ignoring " - "PML:exp=%d,act=%d RQS:exp=%d,act=%d src_req=%p, dst_req=%p, peer=%d", - sendreq->req_restartseq, hdr->hdr_restart.hdr_restartseq, - (uint16_t)sendreq->req_send.req_base.req_sequence, hdr->hdr_match.hdr_seq, - (void *)sendreq, sendreq->req_recv.pval, - sendreq->req_send.req_base.req_peer); - } - return; -} - -/** - * Callback for when a RNDVRESTARTNACK message is received. This message - * is sent from the receiver to the sender and tells the sender that - * the receiver has already completed the message and there is nothing - * else to be done. The sender should then just make the send request - * complete. - */ -void mca_pml_bfo_recv_frag_callback_rndvrestartnack(mca_btl_base_module_t* btl, - mca_btl_base_tag_t tag, - mca_btl_base_descriptor_t* des, - void* cbdata ) { - - mca_btl_base_segment_t* segments = des->des_local; - mca_pml_bfo_hdr_t* hdr = (mca_pml_bfo_hdr_t*)segments->seg_addr.pval; - mca_pml_bfo_send_request_t* sendreq; - - bfo_hdr_ntoh(hdr, MCA_PML_BFO_HDR_TYPE_RNDVRESTARTNACK); - sendreq = (mca_pml_bfo_send_request_t*)hdr->hdr_restart.hdr_src_req.pval; - - /* Not convinced a RNDVRESTARTNACK that does not match a request can - * happen, but have the check in here anyways for now */ - if ((hdr->hdr_match.hdr_ctx != sendreq->req_send.req_base.req_comm->c_contextid) || - (hdr->hdr_match.hdr_src != sendreq->req_send.req_base.req_peer) || - (hdr->hdr_match.hdr_seq != (uint16_t)sendreq->req_send.req_base.req_sequence) || - (hdr->hdr_restart.hdr_restartseq != sendreq->req_restartseq)) { - opal_output_verbose(20, mca_pml_bfo_output, - "RNDVRESTARTNACK: received: does not match request, dropping " - "PML:exp=%d,act=%d CTX:exp=%d,act=%d SRC:exp=%d,act=%d EXP:exp=%d,act=%d " - "src_req=%p, dst_req=%p, peer=%d", - (uint16_t)sendreq->req_send.req_base.req_sequence, hdr->hdr_match.hdr_seq, - sendreq->req_send.req_base.req_comm->c_contextid, hdr->hdr_match.hdr_ctx, - sendreq->req_send.req_base.req_peer, hdr->hdr_match.hdr_src, - sendreq->req_restartseq, hdr->hdr_restart.hdr_restartseq, - (void *)sendreq, sendreq->req_recv.pval, - sendreq->req_send.req_base.req_peer); - return; - } - - opal_output_verbose(20, mca_pml_bfo_output, - "RNDVRESTARTNACK: received: marking send request as complete " - "PML=%d CTX=%d SRC=%d EXP=%d " - "src_req=%p, dst_req=%p, peer=%d", - (uint16_t)sendreq->req_send.req_base.req_sequence, - sendreq->req_send.req_base.req_comm->c_contextid, - sendreq->req_send.req_base.req_peer, sendreq->req_restartseq, - (void *)sendreq, sendreq->req_recv.pval, - sendreq->req_send.req_base.req_peer); - /* Mark the sender complete. This data exchange is over. */ - send_request_pml_complete(sendreq); - return; -} - - -/** - * This function gets called when failover is enabled and an error - * occurs during the rendezvous protocol. A message is sent to the - * receiving side notifying the request that the communication is - * going to be starting over. However, none of the information in the - * send request is reset yet, so that any in flight fragments can - * still find a home. Information in the send request gets reset when - * the completion event for this send occurs AND an ACK has been - * received back from the receiver. - */ -void mca_pml_bfo_send_request_rndvrestartnotify(mca_pml_bfo_send_request_t* sendreq, - bool repost, mca_btl_base_tag_t tag, - int status, mca_btl_base_module_t* btl) -{ - mca_btl_base_descriptor_t* des; - mca_pml_bfo_restart_hdr_t* restart; - int rc; - mca_bml_base_btl_t* bml_btl; - ompi_proc_t* proc = (ompi_proc_t*)sendreq->req_send.req_base.req_proc; - mca_bml_base_endpoint_t* bml_endpoint = (mca_bml_base_endpoint_t*) proc->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_BML]; - - /* If this message is not a repost, then update the sequence number. */ - if (!repost) { - /* Bump up the rendezvous request sequence number. */ - sendreq->req_restartseq++; - } - - assert(0 == sendreq->req_events); - assert(0 != bml_endpoint->btl_eager.arr_size); - - /* In the case that this is started because the receiver has - * sent us a message, then attempt to use a different BTL than the - * error message was received on. This may potentially tickle the - * error sooner if this side has not seen it yet. */ - bml_btl = mca_bml_base_btl_array_get_next(&bml_endpoint->btl_eager); - if (bml_btl->btl == btl) { - /* If there is more than one BTL left, then we will get a - * different one. If there is only one, we will just get - * the same one back again. That is OK. */ - bml_btl = mca_bml_base_btl_array_get_next(&bml_endpoint->btl_eager); - } - - /* allocate descriptor */ - mca_bml_base_alloc(bml_btl, &des, MCA_BTL_NO_ORDER, - sizeof(mca_pml_bfo_restart_hdr_t), - MCA_BTL_DES_FLAGS_PRIORITY | MCA_BTL_DES_FLAGS_BTL_OWNERSHIP | - MCA_BTL_DES_SEND_ALWAYS_CALLBACK); - if( OPAL_UNLIKELY(NULL == des) ) { - opal_output(0, "%s:%d Our of resources, cannot proceed", __FILE__, __LINE__); - ompi_rte_abort(-1, NULL); - } - - /* fill out header */ - restart = (mca_pml_bfo_restart_hdr_t*)des->des_local->seg_addr.pval; - restart->hdr_match.hdr_common.hdr_flags = 0; - restart->hdr_match.hdr_common.hdr_type = MCA_PML_BFO_HDR_TYPE_RNDVRESTARTNOTIFY; - restart->hdr_match.hdr_ctx = sendreq->req_send.req_base.req_comm->c_contextid; - restart->hdr_match.hdr_src = sendreq->req_send.req_base.req_comm->c_my_rank; - restart->hdr_match.hdr_seq = (uint16_t)sendreq->req_send.req_base.req_sequence; - restart->hdr_restartseq = sendreq->req_restartseq; - restart->hdr_src_req.pval = sendreq; - restart->hdr_dst_req = sendreq->req_recv; - restart->hdr_dst_rank = sendreq->req_send.req_base.req_peer; /* Needed for NACKs */ - restart->hdr_jobid = OMPI_PROC_MY_NAME->jobid; - restart->hdr_vpid = OMPI_PROC_MY_NAME->vpid; - - bfo_hdr_hton(restart, MCA_PML_BFO_HDR_TYPE_RNDVRESTARTNOTIFY, proc); - - /* initialize descriptor */ - des->des_cbfunc = mca_pml_bfo_rndvrestartnotify_completion; - - opal_output_verbose(30, mca_pml_bfo_output, - "RNDVRESTARTNOTIFY: sent: PML=%d, RQS(new)=%d, CTX=%d, SRC=%d, " - "src_req=%p, dst_req=%p, peer=%d", - (uint16_t)sendreq->req_send.req_base.req_sequence, sendreq->req_restartseq, - restart->hdr_match.hdr_ctx, restart->hdr_match.hdr_src, - (void *)sendreq, sendreq->req_recv.pval, - sendreq->req_send.req_base.req_peer); - - rc = mca_bml_base_send(bml_btl, des, MCA_PML_BFO_HDR_TYPE_RNDVRESTARTNOTIFY); - if( OPAL_UNLIKELY( rc < 0 ) ) { - opal_output(0, "[%s:%d] Cannot send rndvrestartnotify message", __FILE__, __LINE__); - ompi_rte_abort(-1, NULL); - } - -} - -/** - * This function restarts a RNDV send request. When this is called, - * all the fields in the send request are reset and the send is - * started over. The sendreq->req_restartseq will be non-zero which will - * trigger a special flag in the RNDV header which indicates the match - * has already happened on the receiving side. - */ -void mca_pml_bfo_send_request_restart(mca_pml_bfo_send_request_t* sendreq, - bool repost, mca_btl_base_tag_t tag) -{ - size_t offset = 0; - opal_list_item_t *first_item; - opal_list_item_t *last_item; - mca_bml_base_endpoint_t* endpoint; - size_t i; - - /* If the tag is something valid, it was a repost. We could also - * check the repost field as well. Maybe I can drop the - * repost and have the tag double as it. */ - switch (tag) { - case MCA_PML_BFO_HDR_TYPE_RNDV: - opal_output_verbose(30, mca_pml_bfo_output, - "RNDV: completion failed, reset and repost: PML=%d, RQS=%d, " - "CTX=%d, SRC=%d, src_req=%p, peer=%d", - (uint16_t)sendreq->req_send.req_base.req_sequence, sendreq->req_restartseq, - sendreq->req_send.req_base.req_comm->c_contextid, - sendreq->req_send.req_base.req_comm->c_my_rank, (void *)sendreq, - sendreq->req_send.req_base.req_peer); - break; - case MCA_PML_BFO_HDR_TYPE_RGET: - opal_output_verbose(30, mca_pml_bfo_output, - "RGET: completion failed, reset and repost: PML=%d, RQS=%d, " - "CTX=%d, SRC=%d, src_req=%p, peer=%d", - (uint16_t)sendreq->req_send.req_base.req_sequence, sendreq->req_restartseq, - sendreq->req_send.req_base.req_comm->c_contextid, - sendreq->req_send.req_base.req_comm->c_my_rank, (void *)sendreq, - sendreq->req_send.req_base.req_peer); - break; - default: - break; - } - - /* Return mpool resources, they get reacquired when request starts over. */ - mca_pml_bfo_free_rdma_resources(sendreq); - - /* Release any memory in use if this is a buffered send */ - if (sendreq->req_send.req_send_mode == MCA_PML_BASE_SEND_BUFFERED && - sendreq->req_send.req_addr != sendreq->req_send.req_base.req_addr) { - mca_pml_base_bsend_request_fini((ompi_request_t*)sendreq); - } - - /* Clear out any unsent send ranges. Recreate the functionality - * from the get_send_range() and get_next_send_range() functions. */ - OPAL_THREAD_LOCK(&sendreq->req_send_range_lock); - first_item = opal_list_get_begin(&sendreq->req_send_ranges); - last_item = opal_list_get_last(&sendreq->req_send_ranges); - while (first_item != last_item) { - opal_list_remove_item(&sendreq->req_send_ranges, last_item); - OMPI_FREE_LIST_RETURN_MT(&mca_pml_bfo.send_ranges, (ompi_free_list_item_t *)last_item); - last_item = opal_list_get_last(&sendreq->req_send_ranges); - } - OPAL_THREAD_UNLOCK(&sendreq->req_send_range_lock); - - /* Reset the converter to the beginning. */ - opal_convertor_set_position(&sendreq->req_send.req_base.req_convertor, - &offset); - - /* Bump up internal sequence number to handle possible duplicate - * RNDV messages. In the case of reposting a RNDV message, do not - * increment the value. That way, a duplicate message can be - * detected. */ - if (!repost) { - sendreq->req_restartseq++; - } - - /* This code here is essentially the same is mca_pml_bfo_send_request_start() - * but with a few modifications since we are restarting the request, not - * starting entirely from scratch. */ - endpoint = (mca_bml_base_endpoint_t*)sendreq->req_send.req_base.req_proc->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_BML]; - sendreq->req_endpoint = endpoint; - sendreq->req_state = 0; - sendreq->req_lock = 0; - sendreq->req_pipeline_depth = 0; - sendreq->req_bytes_delivered = 0; - sendreq->req_pending = MCA_PML_BFO_SEND_PENDING_NONE; - - /* Note that we do not reset the following three items. - * They stay with their original values. - * sendreq->req_send.req_base.req_sequence - * sendreq->req_restartseq - * sendreq->req_recv.pval - */ - sendreq->req_restart = 0; /* reset in case we restart again */ - sendreq->req_error = 0; /* clear error state */ - sendreq->req_events = 0; /* clear events, probably 0 anyways */ - - MCA_PML_BASE_SEND_START( &sendreq->req_send.req_base ); - - for(i = 0; i < mca_bml_base_btl_array_get_size(&endpoint->btl_eager); i++) { - mca_bml_base_btl_t* bml_btl; - int rc; - - /* select a btl */ - bml_btl = mca_bml_base_btl_array_get_next(&endpoint->btl_eager); - rc = mca_pml_bfo_send_request_start_btl(sendreq, bml_btl); - if(OPAL_LIKELY(OMPI_ERR_OUT_OF_RESOURCE != rc)) - return; - } - add_request_to_send_pending(sendreq, MCA_PML_BFO_SEND_PENDING_START, true); -} - -/** - * This function will repost a match fragment. This function has to - * handle the case where there may not be a request associated with - * the fragment and just use the information in the fragment to - * repost the send. - */ -void mca_pml_bfo_repost_match_fragment(struct mca_btl_base_descriptor_t* des) -{ - mca_pml_bfo_send_request_t* sendreq = (mca_pml_bfo_send_request_t*)des->des_cbdata; - mca_bml_base_btl_t* bml_btl = (mca_bml_base_btl_t*) des->des_context; - struct mca_bml_base_endpoint_t* endpoint; - int rc; - size_t offset = 0; - - /* At this point a determination has to be made whether the - * BFO_HDR_TYPE_MATCH fragment was sent via the sendi interface or - * via the regular send interface. This is important because if it - * was sent via the sendi interface, then the request associated - * with it has already been completed and released. This can be - * determined by looking at the des->des_flags field of the - * descriptor. If the ALWAYS_CALLBACK flag is set then it is known - * that there is a valid send request associated with the fragment - * and it can be used to extricate information. If ALWAYS_CALLBACK - * is not set, then the endpoint information is in the callback - * data field and where to resend the fragment can be determined - * from the fragment. */ - if (des->des_flags & MCA_BTL_DES_SEND_ALWAYS_CALLBACK) { - endpoint = sendreq->req_endpoint; - opal_output_verbose(30, mca_pml_bfo_output, - "MATCH: repost: src_req=%p", - (void *)sendreq); - } else { - endpoint = des->des_cbdata; - opal_output_verbose(30, mca_pml_bfo_output, - "MATCH: repost: des=%p (sendi fragment)", - (void *)des); - } - - assert(0 != endpoint->btl_eager.arr_size); - bml_btl = mca_bml_base_btl_array_get_next(&endpoint->btl_eager); - - if (des->des_flags & MCA_BTL_DES_SEND_ALWAYS_CALLBACK) { - /* Reset the converter to the beginning if the message is - * not a zero-length message. In the case of zero-length - * message, the convertor is not being used. */ - if (0 != sendreq->req_send.req_bytes_packed) { - opal_convertor_set_position(&sendreq->req_send.req_base.req_convertor, - &offset); - } - rc = mca_pml_bfo_send_request_start_btl(sendreq, bml_btl); - if (OMPI_SUCCESS == rc) { - return; - } else if (OMPI_ERR_OUT_OF_RESOURCE == rc) { - opal_output_verbose(30, mca_pml_bfo_output, - "Warning: delaying reposting of BFO_HDR_TYPE_MATCH, btls=%d", - (int)sendreq->req_endpoint->btl_eager.arr_size); - add_request_to_send_pending(sendreq, MCA_PML_BFO_SEND_PENDING_START, true); - return; - } else { - opal_output(0, "%s:%d FATAL ERROR, cannot repost BFO_HDR_TYPE_MATCH", - __FILE__, __LINE__); - ompi_rte_abort(-1, NULL); - } - } else { - /* No send request available so alloc and repost explicitly */ - mca_btl_base_descriptor_t* newdes = NULL; - mca_btl_base_segment_t* oldseg; - mca_btl_base_segment_t* newseg; - - oldseg = des->des_local; - /* The alloc routine must be called with the MCA_BTL_NO_ORDER - * flag so that the allocation routine works. The allocation - * will fill in the order flag in the descriptor. */ - mca_bml_base_alloc( bml_btl, &newdes, - MCA_BTL_NO_ORDER, - oldseg->seg_len, - MCA_BTL_DES_FLAGS_PRIORITY | MCA_BTL_DES_FLAGS_BTL_OWNERSHIP); - if (OPAL_UNLIKELY(NULL == newdes)) { - opal_output(0, "%s:%d FATAL ERROR, cannot repost BFO_HDR_TYPE_MATCH", - __FILE__, __LINE__); - ompi_rte_abort(-1, NULL); - } - newseg = newdes->des_local; - /* Copy over all the data that is actually sent over the wire */ - memcpy(newseg->seg_addr.pval, oldseg->seg_addr.pval, oldseg->seg_len); - newseg->seg_len = oldseg->seg_len; - - /* This call will either return OMPI_SUCCESS or OMPI_ERROR. The - * OMPI_SUCCESS only says that the send request can be freed. - * It may be that the message was queued up in the BTL. */ - rc = mca_bml_base_send(bml_btl, newdes, MCA_PML_BFO_HDR_TYPE_MATCH); - - /* Some BTLs will set the CALLBACK flag but we do not want that - * as there is no longer a request associated with this descriptor. - * Therefore, always make sure it is cleared. */ - newdes->des_flags &= ~MCA_BTL_DES_SEND_ALWAYS_CALLBACK; - - if( OPAL_LIKELY( rc >= 0 )) { - /* Just let the normal flow of data free whatever needs - * to be freed */ - return; - } else { - opal_output(0, "%s:%d FATAL ERROR, cannot repost BFO_HDR_TYPE_MATCH", - __FILE__, __LINE__); - ompi_rte_abort(-1, NULL); - } - } - /* No need to free any descriptors. The BTLs take care of it since - * we originally allocated with MCA_BTL_DES_FLAGS_BTL_OWNERSHIP. */ -} - -/** - * Completion callback for rndvrestartnotify completion event. If the - * RNDVRESTARTACK has already been received, then reset and restart. - * Otherwise, just update the state and let the RNDVRESTARTACK trigger - * the reset and restart. - */ -void -mca_pml_bfo_rndvrestartnotify_completion(mca_btl_base_module_t* btl, - struct mca_btl_base_endpoint_t* ep, - struct mca_btl_base_descriptor_t* des, - int status) -{ - mca_pml_bfo_restart_hdr_t* restart; - mca_pml_bfo_send_request_t* sendreq; - - restart = (mca_pml_bfo_restart_hdr_t*)des->des_local->seg_addr.pval; - sendreq = (mca_pml_bfo_send_request_t*) restart->hdr_src_req.pval; - - /* Need to resend this message in the case that it fails */ - if( OPAL_UNLIKELY((OMPI_SUCCESS != status))) { - opal_output_verbose(30, mca_pml_bfo_output, - "RNDVRESTARTNOTIFY: completion failed: repost " - "PML=%d, RQS=%d, src_req=%p, dst_req=%p, peer=%d", - (uint16_t)sendreq->req_send.req_base.req_sequence, - sendreq->req_restartseq, - (void *)sendreq, sendreq->req_recv.pval, - sendreq->req_send.req_base.req_peer); - /* Repost the message and indicate it is a repost, not a new one. No need - * to check the req_events as this is the only possible outstanding send - * event when we have posted this message. We also know the sendreq is still - * available because nothing can proceed until this completion event happens - * successfully as we track the req_restart value. */ - mca_pml_bfo_send_request_rndvrestartnotify(sendreq, true, - MCA_PML_BFO_HDR_TYPE_RNDVRESTARTNOTIFY, - status, btl); - return; - } - - /* The req_restart value is incremented to indicate completion of - * the RNDVRESTARTNOTIFY message. Then (typically) the arrival of the - * ACK message will cause the request to reset and restart. Need to - * make sure that RNDVRESTARTNOTIFY callback has been called as well as - * the ACK back from the receiver prior to resetting and restarting - * the request. This is needed in case we get an error on the - * RNDVRESTARTNOTIFY message, but it actually makes it over. We want - * to make sure the send request has not restarted yet. So, keep a - * counter that counts to 2. */ - sendreq->req_restart++; - if (2 == sendreq->req_restart) { - opal_output_verbose(30, mca_pml_bfo_output, - "RNDVRESTARTNOTIFY: completion: restarting request " - "PML=%d, RQS=%d, CTX=%d, src_req=%p, dst_req=%p, peer=%d", - (uint16_t)sendreq->req_send.req_base.req_sequence, - sendreq->req_restartseq, - sendreq->req_send.req_base.req_comm->c_contextid, - sendreq->req_recv.pval, (void *)sendreq, - sendreq->req_send.req_base.req_peer); - mca_pml_bfo_send_request_restart(sendreq, false, 0); - } else { - opal_output_verbose(30, mca_pml_bfo_output, - "RNDVRESTARTNOTIFY: completion: waiting for ack " - "PML=%d, RQS=%d, CTX=%d, src_req=%p, dst_req=%p, peer=%d", - (uint16_t)sendreq->req_send.req_base.req_sequence, - sendreq->req_restartseq, - sendreq->req_send.req_base.req_comm->c_contextid, - sendreq->req_recv.pval, (void *)sendreq, - sendreq->req_send.req_base.req_peer); - } -} - -/** - * This function is called when an error is detected on a completion - * event on the receiving side. This can come from a ACK, PUT, RDMA - * read (GET) or RECVERRNOTIFY completion event. When this happens, check - * the state of the request and decide if the sender needs be notified - * that a problem was seen. If no RECVERRNOTIFY message has been sent and - * no RNDVRESTARTNOTIFY has been received from the sender, then send a - * message telling the sender an error was seen. - */ -void mca_pml_bfo_recv_request_recverrnotify(mca_pml_bfo_recv_request_t* recvreq, - mca_btl_base_tag_t tag, int status) -{ - mca_btl_base_descriptor_t* des; - mca_pml_bfo_restart_hdr_t* restart; - ompi_proc_t* proc = (ompi_proc_t*)recvreq->req_recv.req_base.req_proc; - mca_bml_base_endpoint_t* bml_endpoint = (mca_bml_base_endpoint_t*) proc->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_BML]; - mca_bml_base_btl_t* bml_btl; - int rc; - - assert(0 != bml_endpoint->btl_eager.arr_size); - - bml_btl = mca_bml_base_btl_array_get_next(&bml_endpoint->btl_eager); - - /* allocate descriptor */ - mca_bml_base_alloc(bml_btl, &des, MCA_BTL_NO_ORDER, - sizeof(mca_pml_bfo_restart_hdr_t), - MCA_BTL_DES_FLAGS_PRIORITY | MCA_BTL_DES_FLAGS_BTL_OWNERSHIP | - MCA_BTL_DES_SEND_ALWAYS_CALLBACK); - if( OPAL_UNLIKELY(NULL == des) ) { - opal_output(0, "%s:%d Out of resources, cannot proceed", __FILE__, __LINE__); - ompi_rte_abort(-1, NULL); - } - - /* fill out header */ - restart = (mca_pml_bfo_restart_hdr_t*)des->des_local->seg_addr.pval; - restart->hdr_match.hdr_common.hdr_flags = 0; - restart->hdr_match.hdr_common.hdr_type = MCA_PML_BFO_HDR_TYPE_RECVERRNOTIFY; - restart->hdr_match.hdr_ctx = recvreq->req_recv.req_base.req_comm->c_contextid; - restart->hdr_match.hdr_src = recvreq->req_recv.req_base.req_comm->c_my_rank; - restart->hdr_match.hdr_seq = (uint16_t)recvreq->req_msgseq; - restart->hdr_restartseq = recvreq->req_restartseq; - restart->hdr_src_req = recvreq->remote_req_send; - restart->hdr_dst_req.pval = recvreq; - - bfo_hdr_hton(restart, MCA_PML_BFO_HDR_TYPE_RECVERRNOTIFY, proc); - - /* initialize descriptor */ - des->des_cbfunc = mca_pml_bfo_recv_restart_completion; - - opal_output_verbose(30, mca_pml_bfo_output, - "RECVERRNOTIFY: sending to sender, " - "PML=%d, RQS=%d, src_req=%p, dst_req=%p, peer=%d, btl=%p", - recvreq->req_msgseq, recvreq->req_restartseq, - recvreq->remote_req_send.pval, - (void *)recvreq, - recvreq->req_recv.req_base.req_ompi.req_status.MPI_SOURCE, - (void *)bml_btl->btl); - - rc = mca_bml_base_send(bml_btl, des, MCA_PML_BFO_HDR_TYPE_RECVERRNOTIFY); - if( OPAL_UNLIKELY( rc < 0 ) ) { - opal_output(0, "[%s:%d] Cannot send recverrnotify message", __FILE__, __LINE__); - ompi_rte_abort(-1, NULL); - } - /* Prevent future error messages on this request */ - recvreq->req_errstate |= RECVREQ_RECVERRSENT; -} - -/** - * This function is called when it may be time to send a RNDVRESTARTACK - * message back to the sending side. This can happen because we - * received a RNDVRESTARTNOTIFY message from the sender. This can - * also happen if we have noticed that the request has received the - * RNDVRESTARTNOTIFY message, but has not yet sent out the RNDVRESTARTACK - * because there were still some pending receive events on the request. - * That means we can enter this routine from a completion event on a ACK, - * PUT, or RDMA read as well as from the receipt of a RNDVRESTARTNOTIFY - * message. If all is good, we sent the RNDVRESTARTACK message back to - * the sender. Then sometime later a message will arrive telling us - * to reset and restart the receive request. - */ -void mca_pml_bfo_recv_request_rndvrestartack(mca_pml_bfo_recv_request_t* recvreq, - mca_btl_base_tag_t tag, int status, - mca_btl_base_module_t* btl) -{ - mca_btl_base_descriptor_t* des; - mca_pml_bfo_restart_hdr_t* restart; - ompi_proc_t* proc = (ompi_proc_t*)recvreq->req_recv.req_base.req_proc; - mca_bml_base_endpoint_t* bml_endpoint = (mca_bml_base_endpoint_t*) proc->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_BML]; - mca_bml_base_btl_t* bml_btl; - int rc; - - assert((recvreq->req_errstate & RECVREQ_RNDVRESTART_RECVED) == RECVREQ_RNDVRESTART_RECVED); - assert((recvreq->req_errstate & RECVREQ_RNDVRESTART_ACKED) == 0); - assert(0 != bml_endpoint->btl_eager.arr_size); - - bml_btl = mca_bml_base_btl_array_get_next(&bml_endpoint->btl_eager); - - /* Attempt to use a different BTL than the error message was - * received on. This may potentially tickle the error sooner if - * this side has not seen it yet. */ - if (bml_btl->btl == btl) { - /* If there is more than one BTL left, then we will get a - * different one. If there is only one, we will just get - * the same one back again. That is OK. */ - bml_btl = mca_bml_base_btl_array_get_next(&bml_endpoint->btl_eager); - } - - /* allocate descriptor */ - mca_bml_base_alloc(bml_btl, &des, MCA_BTL_NO_ORDER, - sizeof(mca_pml_bfo_restart_hdr_t), - MCA_BTL_DES_FLAGS_PRIORITY | MCA_BTL_DES_FLAGS_BTL_OWNERSHIP | - MCA_BTL_DES_SEND_ALWAYS_CALLBACK); - if( OPAL_UNLIKELY(NULL == des) ) { - opal_output(0, "%s:%d Out of resources, cannot proceed", __FILE__, __LINE__); - ompi_rte_abort(-1, NULL); - } - - /* fill out header */ - restart = (mca_pml_bfo_restart_hdr_t*)des->des_local->seg_addr.pval; - restart->hdr_match.hdr_common.hdr_flags = 0; - restart->hdr_match.hdr_common.hdr_type = MCA_PML_BFO_HDR_TYPE_RNDVRESTARTACK; - restart->hdr_match.hdr_ctx = recvreq->req_recv.req_base.req_comm->c_contextid; - restart->hdr_match.hdr_src = recvreq->req_recv.req_base.req_comm->c_my_rank; - restart->hdr_match.hdr_seq = (uint16_t)recvreq->req_msgseq; - restart->hdr_restartseq = recvreq->req_restartseq; - restart->hdr_src_req = recvreq->remote_req_send; - restart->hdr_dst_req.pval = recvreq; - - bfo_hdr_hton(restart, MCA_PML_BFO_HDR_TYPE_RNDVRESTARTACK, proc); - - /* initialize descriptor */ - des->des_cbfunc = mca_pml_bfo_recv_restart_completion; - des->des_cbdata = (void *)proc; - - opal_output_verbose(30, mca_pml_bfo_output, - "RNDVRESTARTACK: due to PML tag=%d completion, sending to " - "sender, PML=%d, RQS=%d, src_req=%p, dst_req=%p, status=%d, " - "peer=%d, btl=%p", - tag, recvreq->req_msgseq, recvreq->req_restartseq, - recvreq->remote_req_send.pval, (void *)recvreq, status, - recvreq->req_recv.req_base.req_ompi.req_status.MPI_SOURCE, - (void *)bml_btl->btl); - - rc = mca_bml_base_send(bml_btl, des, MCA_PML_BFO_HDR_TYPE_RNDVRESTARTACK); - if( OPAL_UNLIKELY( rc < 0 ) ) { - opal_output(0, "[%s:%d] Cannot send rndvrestartack message", __FILE__, __LINE__); - ompi_rte_abort(-1, NULL); - } - /* Move to the next state so we do not send anymore ACKs */ - recvreq->req_errstate &= ~RECVREQ_RNDVRESTART_RECVED; - recvreq->req_errstate |= RECVREQ_RNDVRESTART_ACKED; -} - -/** - * Called after the receipt of a RNDVRESTARTNOTIFY message to a request - * that no longer matches. This can happen if the sender detected an - * error, but the receiver actually received all the data. Therefore - * send a NACK back instead of the ACK so that the sender can complete - * its request. This happens very rarely. Note that we need to make - * use of the hdr_dst_rank that we received from the notify message. - * This is so the sending side make sure the message matches a valid - * request on the sending side. - */ -void mca_pml_bfo_recv_request_rndvrestartnack(mca_btl_base_descriptor_t* olddes, - ompi_proc_t* ompi_proc, bool repost) -{ - mca_btl_base_segment_t* segments; - mca_pml_bfo_restart_hdr_t* hdr; /* hdr of NOTIFY message */ - mca_pml_bfo_restart_hdr_t* nack; /* hdr of NACK message */ - mca_btl_base_descriptor_t* des; - mca_bml_base_endpoint_t* bml_endpoint; - mca_bml_base_btl_t* bml_btl; - int rc; - - if (repost) { - /* In the case where we are reposting the NACK, the information - * is in the src area, since we are reposting a send. In addition, - * we get the ompi_proc from the old descriptor. */ - ompi_proc = olddes->des_cbdata; - } - - segments = olddes->des_local; - hdr = (mca_pml_bfo_restart_hdr_t*)segments->seg_addr.pval; - - bml_endpoint = ompi_proc->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_BML]; - assert(0 != bml_endpoint->btl_eager.arr_size); - bml_btl = mca_bml_base_btl_array_get_next(&bml_endpoint->btl_eager); - - /* allocate descriptor */ - mca_bml_base_alloc(bml_btl, &des, MCA_BTL_NO_ORDER, - sizeof(mca_pml_bfo_restart_hdr_t), - MCA_BTL_DES_FLAGS_PRIORITY | MCA_BTL_DES_FLAGS_BTL_OWNERSHIP | - MCA_BTL_DES_SEND_ALWAYS_CALLBACK); - if( OPAL_UNLIKELY(NULL == des) ) { - opal_output(0, "%s:%d Out of resources, cannot proceed", __FILE__, __LINE__); - ompi_rte_abort(-1, NULL); - } - - /* fill out header */ - nack = (mca_pml_bfo_restart_hdr_t*)des->des_local->seg_addr.pval; - nack->hdr_match.hdr_common.hdr_flags = 0; - nack->hdr_match.hdr_common.hdr_type = MCA_PML_BFO_HDR_TYPE_RNDVRESTARTNACK; - nack->hdr_match.hdr_ctx = hdr->hdr_match.hdr_ctx; - nack->hdr_match.hdr_src = hdr->hdr_dst_rank; /* Receiver rank */ - nack->hdr_match.hdr_seq = hdr->hdr_match.hdr_seq; - nack->hdr_restartseq = hdr->hdr_restartseq; - nack->hdr_src_req = hdr->hdr_src_req; - nack->hdr_dst_req.pval = 0; - - bfo_hdr_hton(nack, MCA_PML_BFO_HDR_TYPE_RNDVRESTARTNACK, ompi_proc); - - /* Initialize descriptor. Save away ompi_proc in case we need - * to respost this fragmnet. */ - des->des_cbfunc = mca_pml_bfo_recv_restart_completion; - des->des_cbdata = ompi_proc; - - opal_output_verbose(30, mca_pml_bfo_output, - "RNDVRESTARTNACK: sending to sender, " - "PML=%d, RQS=%d, CTX=%d, SRC=%d, peer=%d", - nack->hdr_match.hdr_seq, nack->hdr_restartseq, - nack->hdr_match.hdr_ctx, nack->hdr_match.hdr_src, - OMPI_CAST_RTE_NAME(&ompi_proc->super.proc_name)->vpid); - - rc = mca_bml_base_send(bml_btl, des, MCA_PML_BFO_HDR_TYPE_RNDVRESTARTNACK); - if( OPAL_UNLIKELY( rc < 0 ) ) { - opal_output(0, "[%s:%d] Cannot send rndvrestartnack message", __FILE__, __LINE__); - ompi_rte_abort(-1, NULL); - } -} - - -/** - * Reset all the receive request fields to match what a request - * looks like when it is first started. This gets called when - * the rendezvous/rget message is being restarted. - */ -void mca_pml_bfo_recv_request_reset(mca_pml_bfo_recv_request_t* match) { - int i; - - assert(true != match->req_recv.req_base.req_pml_complete); - - /* Free up any resources that were reserved for this receive. This - * was copied from the receive completion code. */ - for(i = 0; i < (int)match->req_rdma_cnt; i++) { - mca_mpool_base_registration_t* btl_reg = match->req_rdma[i].btl_reg; - if( NULL != btl_reg && btl_reg->mpool != NULL) { - btl_reg->mpool->mpool_deregister( btl_reg->mpool, btl_reg ); - } - } - match->req_rdma_cnt = 0; - - /* This code is mostly copied from mca_pml_bfo_recv_req_start. - * Note 1: Leave req_bytes_expected as the original value. No - * need to adjust this as it is set when convertor is created. - * Note 2: Leave req_bytes_delivered as the original value. - * This is created when the convertor is created and represents - * the expected bytes from the user. */ - assert(0 == match->req_events); - match->req_errstate = 0; - match->req_lock = 0; - match->req_pipeline_depth = 0; - match->req_bytes_received = 0; - match->req_rdma_idx = 0; - match->req_rdma_offset = 0; - match->req_send_offset = 0; - match->req_pending = false; - match->req_ack_sent = false; - match->req_restartseq++; - - /* These really should not need to be set, but this matches some - * of the initialization within MCA_PML_BASE_RECV_START. */ - match->req_recv.req_base.req_pml_complete = false; - match->req_recv.req_base.req_ompi.req_complete = false; - match->req_recv.req_base.req_ompi.req_state = OMPI_REQUEST_ACTIVE; - - /* Reset the convertor */ - opal_convertor_set_position(&match->req_recv.req_base.req_convertor, - &match->req_rdma_offset); - return; -} - -/* - * Completion callback for RNDVRESTARTACK, RNDVRESTARTNACK and RECVERRNOTIFY. - */ -void mca_pml_bfo_recv_restart_completion( mca_btl_base_module_t* btl, - struct mca_btl_base_endpoint_t* ep, - struct mca_btl_base_descriptor_t* des, - int status ) -{ - if(OPAL_UNLIKELY(OMPI_SUCCESS != status)) { - mca_pml_bfo_common_hdr_t* common = des->des_local->seg_addr.pval; - mca_pml_bfo_restart_hdr_t* restart; /* RESTART header */ - mca_pml_bfo_recv_request_t* recvreq; - - switch (common->hdr_type) { - case MCA_PML_BFO_HDR_TYPE_RNDVRESTARTACK: - restart = (mca_pml_bfo_restart_hdr_t*)des->des_local->seg_addr.pval; - recvreq = (mca_pml_bfo_recv_request_t*) restart->hdr_dst_req.pval; - opal_output_verbose(30, mca_pml_bfo_output, - "RNDVRESTARTACK: completion failed: try again " - "PML:req=%d,hdr=%d RQS:req=%d,hdr=%d CTX:req=%d,hdr=%d " - "src_req=%p, dst_req=%p, peer=%d", - recvreq->req_msgseq, restart->hdr_match.hdr_seq, - recvreq->req_restartseq, restart->hdr_restartseq, - recvreq->req_recv.req_base.req_comm->c_contextid, - restart->hdr_match.hdr_ctx, - recvreq->remote_req_send.pval, - (void *)recvreq, - recvreq->req_recv.req_base.req_ompi.req_status.MPI_SOURCE); - - /* Adjust the states back to avoid assert errors */ - recvreq->req_errstate &= ~RECVREQ_RNDVRESTART_ACKED; - recvreq->req_errstate |= RECVREQ_RNDVRESTART_RECVED; - mca_pml_bfo_recv_request_rndvrestartack(recvreq, MCA_PML_BFO_HDR_TYPE_RNDVRESTARTACK, - status, btl); - break; - case MCA_PML_BFO_HDR_TYPE_RNDVRESTARTNACK: - opal_output_verbose(30, mca_pml_bfo_output, - "RNDVRESTARTNACK: completion failed: try again " - "des=%p ", (void *)des); - /* Just blast it again. No request associated with it. */ - mca_pml_bfo_recv_request_rndvrestartnack(des, NULL, true); - break; - case MCA_PML_BFO_HDR_TYPE_RECVERRNOTIFY: - restart = (mca_pml_bfo_restart_hdr_t*)des->des_local->seg_addr.pval; - recvreq = (mca_pml_bfo_recv_request_t*) restart->hdr_dst_req.pval; - /* With just two BTLs, this should never happen as we are - * typically sending the RECVERRNOTIFY message on the - * working BTL. But, just in case, if we get an error, - * send it again. */ - opal_output_verbose(30, mca_pml_bfo_output, - "RECVERRNOTIFY: completion failed: try again, " - "PML=%d, RQS=%d, src_req=%p, dst_req=%p, peer=%d", - recvreq->req_msgseq, recvreq->req_restartseq, - recvreq->remote_req_send.pval, - (void *)recvreq, - recvreq->req_recv.req_base.req_ompi.req_status.MPI_SOURCE); - mca_pml_bfo_recv_request_recverrnotify(recvreq, MCA_PML_BFO_HDR_TYPE_RECVERRNOTIFY, - status); - break; - default: - opal_output(0, "[%s:%d] Unknown callback error", __FILE__, __LINE__); - ompi_rte_abort(-1, NULL); - } - } -} - -/* - * Remove a btl for future communication on an endpoint. - */ -void mca_pml_bfo_map_out_btl(struct mca_btl_base_module_t* btl, - ompi_proc_t *errproc, char *btlname) -{ - mca_bml_base_endpoint_t* ep; - bool remove = false; - int i; - - ep = (mca_bml_base_endpoint_t*)errproc->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_BML]; - - /* The bml_del_proc_btl function does not indicate if it - * actually removed a btl, so let me check up front. This is - * done so that we can only print out messages when a btl is - * actually going to be removed. These arrays are small so it - * is OK to walk through all of them even though it may be - * redundant. */ - for( i = 0; i < (int)ep->btl_eager.arr_size; i++ ) { - if( ep->btl_eager.bml_btls[i].btl == btl ) { - remove = true; - } - } - for( i = 0; i < (int)ep->btl_send.arr_size; i++ ) { - if( ep->btl_send.bml_btls[i].btl == btl ) { - remove = true; - } - } - for( i = 0; i < (int)ep->btl_rdma.arr_size; i++ ) { - if( ep->btl_rdma.bml_btls[i].btl == btl ) { - remove = true; - } - } - - if (true == remove) { - mca_bml.bml_del_proc_btl(errproc, btl); - - opal_output_verbose(10, mca_pml_bfo_output, - "BTL %s error: rank=%d mapping out %s " - "to rank=%d on node=%s \n", - btl->btl_component->btl_version.mca_component_name, - OMPI_PROC_MY_NAME->vpid, - btlname, OMPI_CAST_RTE_NAME(&errproc->super.proc_name)->vpid, - (NULL == errproc->super.proc_hostname) ? "unknown" : errproc->super.proc_hostname); - - /* Need to search for any pending packets associated - * with this endpoint and remove them. We may also - * have to restarts depending on the state of the - * requests. */ - mca_pml_bfo_error_pending_packets(btl, ep); - - if ((ep->btl_eager.arr_size == 0) && - (ep->btl_send.arr_size == 0) && - (ep->btl_rdma.arr_size == 0)) { - opal_output(0, "%s:%d: No more interfaces, aborting", - __FILE__, __LINE__); - ompi_rte_abort(-1, NULL); - } - } -} - -void mca_pml_bfo_failover_error_handler(struct mca_btl_base_module_t* btl, - int32_t flags, ompi_proc_t *errproc, char *btlname) -{ - ompi_proc_t** procs; - size_t p, num_procs; - - /* If we are in here, we know that the we were called - * with the flags == MCA_BTL_ERROR_FLAGS_NONFATAL so no - * need to check it in here. */ - assert(flags & MCA_BTL_ERROR_FLAGS_NONFATAL); - - procs = ompi_proc_all(&num_procs); - - if(NULL == procs) { - opal_output(0, "%s:%d: Out of memory, giving up.", - __FILE__, __LINE__); - ompi_rte_abort(-1, NULL); - } - - if (NULL == btlname) { - btlname = "unknown"; - } - - /* If the process to map out is not specified then map out the - * entire BTL. Otherwise, only map out the BTL for the specific - * remote process. */ - if (NULL == errproc) { - for( p = 0; p < num_procs; p++ ) { - mca_pml_bfo_map_out_btl(btl, procs[p], btlname); - } - } else { - mca_pml_bfo_map_out_btl(btl, errproc, btlname); - } - free(procs); -} - -/** - * This function is called since when we are mapping out a BML. This - * will walk through the four PML lists and dispatch with the - * fragments/requests. There are four different lists and each one is - * handled slighty differently. In all cases, we first see if the - * message is associated with the endpoint that is being mapped out. - * If not, then just leave it alone and put it back on the list. If - * it is associated with the endpoint, then a each list handles it - * slighlty differently. Also, in some cases, we actually adjust the - * pointers to the BMLs in the messages as they may have changed when - * the BML is mapped out. That is because this is called after we - * have mapped out the offending BML and adjusted the array of - * available BMLs. - */ -static void mca_pml_bfo_error_pending_packets(mca_btl_base_module_t* btl, - mca_bml_base_endpoint_t* ep) { - int32_t i, s; - - /* The pckt_pending list contains both ACK and FIN messages. - * ACKs can be sent over any BTL associated with the endpoint. - * Therefore, the bml_btl entry for ACKS is NULL and they do - * not need to be adjusted. It is also worth noting that - * the ACK will be the only outstanding message associated - * with a request so we can just let nature takes it course. - * - * FIN messages do have a BML associated with them, but they - * can also be sent over any BTL. Therefore, adjust the bml - * pointer in the pckt to ensure it points at a valid BML. - */ - - s = (int32_t)opal_list_get_size(&mca_pml_bfo.pckt_pending); - for(i = 0; i < s; i++) { - mca_pml_bfo_pckt_pending_t *pckt; - opal_output_verbose(0, mca_pml_bfo_output, - "INFO: pckt_pending list has %d entries", s); -#if 1 - /* TODO: Error out until code is tested */ - opal_output_verbose(0, mca_pml_bfo_output, - "%s:%d: Support not implemented, aborting", - __FILE__, __LINE__); - ompi_rte_abort(-1, NULL); -#endif - OPAL_THREAD_LOCK(&mca_pml_bfo.lock); - pckt = (mca_pml_bfo_pckt_pending_t*) - opal_list_remove_first(&mca_pml_bfo.pckt_pending); - OPAL_THREAD_UNLOCK(&mca_pml_bfo.lock); - - /* My guess is that this can happen in the threaded - * case where the other thread removed some packets - * after we determined the size of the list. */ - if(NULL == pckt) - break; - - /* If there is no bml stored on the packet, then just - * put it back on the list as there is nothing to adjust. - * This appears to be true with ACK packets. */ - if (NULL == pckt->bml_btl) { - OPAL_THREAD_LOCK(&mca_pml_bfo.lock); - opal_list_append(&mca_pml_bfo.pckt_pending, - (opal_list_item_t*)pckt); - OPAL_THREAD_UNLOCK(&mca_pml_bfo.lock); - continue; - } - - /* Now see if this endpoint matches the one we are mapping - * out. If so, adjust the bml entry so to ensure it is - * not pointing at a stale bml. We do not really care - * which BML it is pointing at as long as it is valid. - * In either case, then put entry back on the list. */ - if (pckt->proc->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_BML] == ep) { - opal_output_verbose(15, mca_pml_bfo_output, - "INFO: Found matching pckt on pckt_pending list, adjusting bml"); - pckt->bml_btl = mca_bml_base_btl_array_get_next(&ep->btl_eager); - } - OPAL_THREAD_LOCK(&mca_pml_bfo.lock); - opal_list_append(&mca_pml_bfo.pckt_pending, - (opal_list_item_t*)pckt); - OPAL_THREAD_UNLOCK(&mca_pml_bfo.lock); - - } - - /* This next list holds rdma fragments. We need to walk through - * the list and see if any are associated with the endpoint - * we are mapping out. If not, then just put back on the - * list. If they are, then we need to error them out. One issue - * is that we need to deal with the case where there may be more - * then one pending rdma fragment for a request. */ - s = (int32_t)opal_list_get_size(&mca_pml_bfo.rdma_pending); - for(i = 0; i < s; i++) { - mca_pml_bfo_rdma_frag_t* frag; - mca_pml_bfo_send_request_t* sendreq; - mca_pml_bfo_recv_request_t* recvreq; - opal_output_verbose(0, mca_pml_bfo_output, - "INFO: rdma_pending list has %d entries", s); -#if 1 - /* TODO: Error out until code is tested */ - opal_output_verbose(0, mca_pml_bfo_output, - "%s:%d: Support not implemented, aborting", - __FILE__, __LINE__); - ompi_rte_abort(-1, NULL); -#endif - OPAL_THREAD_LOCK(&mca_pml_bfo.lock); - frag = (mca_pml_bfo_rdma_frag_t*) - opal_list_remove_first(&mca_pml_bfo.rdma_pending); - OPAL_THREAD_UNLOCK(&mca_pml_bfo.lock); - - /* My guess is that this can happen in the threaded - * case where the other thread removed some packets - * after we determined the size of the list. */ - if(NULL == frag) - break; - - /* Check to see if it matches our endpoint. If it does, - * then check if it matches the BTL that is being mapped - * out. If it does not, then just readjust the BML pointer. - * If it does, then we need to do something with it. */ - if (frag->rdma_ep != ep) { - OPAL_THREAD_LOCK(&mca_pml_bfo.lock); - opal_list_append(&mca_pml_bfo.rdma_pending, - (opal_list_item_t*)frag); - OPAL_THREAD_UNLOCK(&mca_pml_bfo.lock); - continue; - } - - /* If we are here, then we know we are working on the same - * endpoint. Now check the BTL. */ - if (frag->rdma_btl != btl) { - opal_output_verbose(15, mca_pml_bfo_output, - "INFO: Found matching frag on rdma_pending list, adjusting bml"); - /* The BTL this RDMA is associated with is not the - * one that is getting mapped out, so just adjust the - * BML pointer and put back on the list. */ - frag->rdma_bml = mca_bml_base_btl_array_find(&ep->btl_rdma, frag->rdma_btl); - OPAL_THREAD_LOCK(&mca_pml_bfo.lock); - opal_list_append(&mca_pml_bfo.rdma_pending, - (opal_list_item_t*)frag); - OPAL_THREAD_UNLOCK(&mca_pml_bfo.lock); - continue; - } - - /* Now we call the restart routine. This is just like if we got - * a completion event after calling an RDMA write. This will - * take care of figuring out if we need to restart the request - * or wait for any outstanding events to complete. */ - if(frag->rdma_state == MCA_PML_BFO_RDMA_PUT) { - opal_output_verbose(15, mca_pml_bfo_output, - "INFO: Found matching PUT frag on rdma_pending list, restarting"); - sendreq = frag->rdma_req; - mca_pml_bfo_send_request_rndvrestartnotify(sendreq, false, - MCA_PML_BFO_HDR_TYPE_PUT, 2, btl); - MCA_PML_BFO_RDMA_FRAG_RETURN(frag); - } else { - opal_output_verbose(15, mca_pml_bfo_output, - "INFO: Found matching RGET frag on rdma_pending list, sending reqerror"); - /* This is just like what we do on an rget completion event */ - recvreq = (mca_pml_bfo_recv_request_t*)frag->rdma_req; - mca_pml_bfo_recv_request_recverrnotify(recvreq, MCA_PML_BFO_HDR_TYPE_RGET, 2); - - /* See if the request has received a RNDVRESTARTNOTIFY */ - if( OPAL_UNLIKELY(recvreq->req_errstate)) { - if (recvreq->req_errstate & RECVREQ_RNDVRESTART_RECVED) { - mca_pml_bfo_recv_request_rndvrestartack(recvreq, - MCA_PML_BFO_HDR_TYPE_RGET, - 2, btl); - } - } - MCA_PML_BFO_RDMA_FRAG_RETURN(frag); - } - } - - s = opal_list_get_size(&mca_pml_bfo.send_pending); - /* Look for pending events on our endpoint */ - for(i = 0; i < s; i++) { - mca_pml_bfo_send_request_t* sendreq; - ompi_proc_t* proc; - mca_bml_base_endpoint_t* bml_endpoint; - opal_output_verbose(0, mca_pml_bfo_output, - "INFO: send_pending list has %d entries", s); -#if 1 - /* TODO: Error out until code is tested */ - opal_output_verbose(0, mca_pml_bfo_output, - "%s:%d: Support not implemented, aborting", - __FILE__, __LINE__); - ompi_rte_abort(-1, NULL); -#endif - OPAL_THREAD_LOCK(&mca_pml_bfo.lock); - sendreq = (mca_pml_bfo_send_request_t*) - opal_list_remove_first(&mca_pml_bfo.send_pending); - OPAL_THREAD_UNLOCK(&mca_pml_bfo.lock); - - /* My guess is that this can happen in the threaded - * case where the other thread removed some packets - * after we determined the size of the list. */ - if(NULL == sendreq) - break; - - proc = (ompi_proc_t*)sendreq->req_send.req_base.req_proc; - bml_endpoint = (mca_bml_base_endpoint_t*) proc->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_BML]; - - /* Check to see if it matches our endpoint. If it does not, - * then just put it back on the list as there is nothing - * we need to do with it. */ - if (bml_endpoint != ep) { - OPAL_THREAD_LOCK(&mca_pml_bfo.lock); - opal_list_append(&mca_pml_bfo.send_pending, - (opal_list_item_t*)sendreq); - OPAL_THREAD_UNLOCK(&mca_pml_bfo.lock); - continue; - } - - switch(sendreq->req_pending) { - case MCA_PML_BFO_SEND_PENDING_SCHEDULE: - /* If this send request is using the endpoint that received - * the error, then let us error it out. In the case - * where there is only one fragment left to be scheduled - * and it would have gone over the good BTL, this is - * not necessary. But, we will use simplicity here - * and assume that some of the fragments are still - * scheduled to go over the broken BTL. */ - sendreq->req_error++; - mca_pml_bfo_send_request_rndvrestartnotify(sendreq, false, - MCA_PML_BFO_HDR_TYPE_FRAG, 2, btl); - break; - case MCA_PML_BFO_SEND_PENDING_START: - /* If the request has not even started, then just put it back - * on the list. Nothing else to do with it. */ - OPAL_THREAD_LOCK(&mca_pml_bfo.lock); - opal_list_append(&mca_pml_bfo.send_pending, - (opal_list_item_t*)sendreq); - OPAL_THREAD_UNLOCK(&mca_pml_bfo.lock); - break; - default: - opal_output(0, "[%s:%d] wrong send request type\n", - __FILE__, __LINE__); - break; - } - } - - s = (int)opal_list_get_size(&mca_pml_bfo.recv_pending); - for(i = 0; i < s; i++) { - mca_pml_bfo_recv_request_t* recvreq; - ompi_proc_t* proc; - mca_bml_base_endpoint_t* bml_endpoint; - opal_output_verbose(0, mca_pml_bfo_output, - "INFO: recv_pending list has %d entries", s); -#if 1 - /* TODO: Error out until code is tested */ - opal_output_verbose(0, mca_pml_bfo_output, - "%s:%d: Support not implemented, aborting", - __FILE__, __LINE__); - ompi_rte_abort(-1, NULL); -#endif - OPAL_THREAD_LOCK(&mca_pml_bfo.lock); - recvreq = (mca_pml_bfo_recv_request_t*) - opal_list_remove_first(&mca_pml_bfo.recv_pending); - OPAL_THREAD_UNLOCK(&mca_pml_bfo.lock); - - /* My guess is that this can happen in the threaded - * case where the other thread removed some packets - * after we determined the size of the list. */ - if(NULL == recvreq) - break; - - proc = (ompi_proc_t*)recvreq->req_recv.req_base.req_proc; - bml_endpoint = (mca_bml_base_endpoint_t*) proc->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_BML]; - - if (bml_endpoint != ep) { - OPAL_THREAD_LOCK(&mca_pml_bfo.lock); - opal_list_append(&mca_pml_bfo.recv_pending, - (opal_list_item_t*)recvreq); - OPAL_THREAD_UNLOCK(&mca_pml_bfo.lock); - continue; - } - - mca_pml_bfo_recv_request_recverrnotify(recvreq, MCA_PML_BFO_HDR_TYPE_PUT, 2); - } -} - -/** - * Call each time we get a completion event on ACK or PUT message. - * These types of messages are receive control type messages. This - * function is only called if the underlying BTL supports failover. - * Otherwise, there is no need for this check. - */ -void mca_pml_bfo_check_recv_ctl_completion_status(mca_btl_base_module_t* btl, - struct mca_btl_base_descriptor_t* des, - int status) -{ - mca_pml_bfo_common_hdr_t * common = des->des_local->seg_addr.pval; - mca_pml_bfo_rdma_hdr_t* hdr; /* PUT header */ - struct mca_btl_base_descriptor_t* rdma_des; - mca_pml_bfo_recv_request_t* recvreq; - - if(OPAL_UNLIKELY(OMPI_SUCCESS != status)) { - switch (common->hdr_type) { - case MCA_PML_BFO_HDR_TYPE_ACK: - recvreq = des->des_cbdata; - - /* Record the error. Send RECVERRNOTIFY if necessary. */ - if (recvreq->req_errstate) { - opal_output_verbose(30, mca_pml_bfo_output, - "ACK: completion failed, error already seen, " - "PML=%d, RQS=%d, src_req=%p, dst_req=%p, peer=%d", - recvreq->req_msgseq, recvreq->req_restartseq, - recvreq->remote_req_send.pval, (void *)recvreq, - recvreq->req_recv.req_base.req_ompi.req_status.MPI_SOURCE); - } else { - opal_output_verbose(30, mca_pml_bfo_output, - "ACK: completion failed, sending RECVERRNOTIFY to sender, " - "PML=%d, RQS=%d, src_req=%p, dst_req=%p, peer=%d", - recvreq->req_msgseq, recvreq->req_restartseq, - recvreq->remote_req_send.pval, (void *)recvreq, - recvreq->req_recv.req_base.req_ompi.req_status.MPI_SOURCE); - mca_pml_bfo_recv_request_recverrnotify(recvreq, MCA_PML_BFO_HDR_TYPE_ACK, status); - } - break; - - case MCA_PML_BFO_HDR_TYPE_PUT: - hdr = (mca_pml_bfo_rdma_hdr_t*)des->des_local->seg_addr.pval; - rdma_des = hdr->hdr_des.pval; - recvreq = des->des_cbdata; - if ((NULL != rdma_des->des_cbdata) && (recvreq == rdma_des->des_cbdata)) { - /* We now record the error, send the RECVERRNOTIFY if - * necessary, and free the descriptor. Prior to this, - * we want to ensure that we have not reached the case - * where the PUT message actually made it over and we - * have already received a FIN back. We first check to - * see if the RDMA descriptor cbdata is pointing to - * NULL. If it is, this means that the PUT message must - * have made it over and a corresponding FIN already - * made it back and freed the RDMA descriptor. Second, - * if it is non-null, we make sure that it is pointing - * to the same request as the PUT descriptor is. If - * it is not, again we assume that the FIN came back - * and freed it. And we can count on the fact that the - * recvreq has not been freed or reused as it is held - * until this very completion event occurs. */ - if (recvreq->req_errstate) { - opal_output_verbose(30, mca_pml_bfo_output, - "PUT: completion failed, error already seen, " - "PML=%d, RQS=%d, src_req=%p, dst_req=%p, peer=%d", - recvreq->req_msgseq, recvreq->req_restartseq, - recvreq->remote_req_send.pval, (void *)recvreq, - recvreq->req_recv.req_base.req_ompi.req_status.MPI_SOURCE); - } else { - opal_output_verbose(30, mca_pml_bfo_output, - "PUT: completion failed, sending RECVERRNOTIFY to sender, " - "PML=%d, RQS=%d, src_req=%p, dst_req=%p, peer=%d", - recvreq->req_msgseq, recvreq->req_restartseq, - recvreq->remote_req_send.pval, (void *)recvreq, - recvreq->req_recv.req_base.req_ompi.req_status.MPI_SOURCE); - mca_pml_bfo_recv_request_recverrnotify(recvreq, MCA_PML_BFO_HDR_TYPE_PUT, status); - } -#if 0 - /* TODO: Add descriptor to receive request so it can - * be freed only when receive request is freed and - * only if needed. */ - btl->btl_free(btl, rdma_des); -#endif - } - break; - default: - ompi_rte_abort(-1, NULL); - } - } - - switch (common->hdr_type) { - case MCA_PML_BFO_HDR_TYPE_ACK: - recvreq = des->des_cbdata; - recvreq->req_events--; - assert(recvreq->req_events >= 0); - if(OPAL_UNLIKELY (recvreq->req_errstate & RECVREQ_RNDVRESTART_RECVED)) { - opal_output_verbose(30, mca_pml_bfo_output, - "ACK: completion: recvreq in error, outstanding events=%d " - "PML=%d, RQS=%d, src_req=%p, dst_req=%p, status=%d, peer=%d", - recvreq->req_events, recvreq->req_msgseq, recvreq->req_restartseq, - recvreq->remote_req_send.pval, (void *)recvreq, status, - recvreq->req_recv.req_base.req_ompi.req_status.MPI_SOURCE); - if (0 == recvreq->req_events) { - mca_pml_bfo_recv_request_rndvrestartack(recvreq, MCA_PML_BFO_HDR_TYPE_ACK, - status, btl); - } - return; - } - recv_request_pml_complete_check(recvreq); - break; - case MCA_PML_BFO_HDR_TYPE_PUT: - recvreq = des->des_cbdata; - recvreq->req_events--; - assert(recvreq->req_events >= 0); - if(OPAL_UNLIKELY(recvreq->req_errstate & RECVREQ_RNDVRESTART_RECVED)) { - opal_output_verbose(30, mca_pml_bfo_output, - "PUT: completion: recvreq in error, outstanding events=%d " - "PML=%d, RQS=%d, src_req=%p, dst_req=%p, status=%d, peer=%d", - recvreq->req_events, recvreq->req_msgseq, recvreq->req_restartseq, - recvreq->remote_req_send.pval, (void *)recvreq, status, - recvreq->req_recv.req_base.req_ompi.req_status.MPI_SOURCE); - if (0 == recvreq->req_events) { - mca_pml_bfo_recv_request_rndvrestartack(recvreq, MCA_PML_BFO_HDR_TYPE_PUT, - status, btl); - } - return; - } - recv_request_pml_complete_check(recvreq); - break; - } -} - -/** - * Register four functions to handle extra PML message types that - * are utilized when a failover occurs. - */ -int mca_pml_bfo_register_callbacks(void) { - int rc; - /* The following four functions are utilized when failover - * support for openib is enabled. */ - rc = mca_bml.bml_register( MCA_PML_BFO_HDR_TYPE_RNDVRESTARTNOTIFY, - mca_pml_bfo_recv_frag_callback_rndvrestartnotify, - NULL ); - if(OMPI_SUCCESS != rc) - return rc; - - rc = mca_bml.bml_register( MCA_PML_BFO_HDR_TYPE_RNDVRESTARTACK, - mca_pml_bfo_recv_frag_callback_rndvrestartack, - NULL ); - if(OMPI_SUCCESS != rc) - return rc; - - rc = mca_bml.bml_register( MCA_PML_BFO_HDR_TYPE_RNDVRESTARTNACK, - mca_pml_bfo_recv_frag_callback_rndvrestartnack, - NULL ); - if(OMPI_SUCCESS != rc) - return rc; - - rc = mca_bml.bml_register( MCA_PML_BFO_HDR_TYPE_RECVERRNOTIFY, - mca_pml_bfo_recv_frag_callback_recverrnotify, - NULL ); - if(OMPI_SUCCESS != rc) - return rc; - - return rc; -} - -/** - * Update a few fields when we are restarting either a RNDV or - * RGET type message. - */ -void mca_pml_bfo_update_rndv_fields(mca_pml_bfo_hdr_t* hdr, - mca_pml_bfo_send_request_t* sendreq, char *type) -{ - hdr->hdr_common.hdr_flags |= MCA_PML_BFO_HDR_FLAGS_RESTART; - hdr->hdr_rndv.hdr_dst_req = sendreq->req_recv; - hdr->hdr_rndv.hdr_restartseq = sendreq->req_restartseq; - opal_output_verbose(30, mca_pml_bfo_output, - "%s: restarting: PML=%d, RQS=%d, CTX=%d, SRC=%d, " - "src_req=%p, dst_req=%p, peer=%d", - type, (uint16_t)sendreq->req_send.req_base.req_sequence, - sendreq->req_restartseq, - sendreq->req_send.req_base.req_comm->c_contextid, - sendreq->req_send.req_base.req_comm->c_my_rank, (void *)sendreq, - sendreq->req_recv.pval, sendreq->req_send.req_base.req_peer); -} - -/** - * The following set of functions are all called when it is determined - * that the cached bml_btl->btl does not match the btl handed back - * by the callback function. This means that the bml_btl array has - * been shuffled and the bml_btl matching the btl has to be found - * back. If it cannot be found, then just find a different one to - * use. - */ -void mca_pml_bfo_update_eager_bml_btl_recv_ctl(mca_bml_base_btl_t** bml_btl, - mca_btl_base_module_t* btl, - struct mca_btl_base_descriptor_t* des) -{ - if ((*bml_btl)->btl != btl) { - mca_pml_bfo_common_hdr_t * common = des->des_local->seg_addr.pval; - mca_pml_bfo_ack_hdr_t* ack; /* ACK header */ - mca_pml_bfo_recv_request_t* recvreq = NULL; - char *type = NULL; - - switch (common->hdr_type) { - case MCA_PML_BFO_HDR_TYPE_ACK: - ack = (mca_pml_bfo_ack_hdr_t*)des->des_local->seg_addr.pval; - recvreq = (mca_pml_bfo_recv_request_t*) ack->hdr_dst_req.pval; - type = "ACK"; - break; - case MCA_PML_BFO_HDR_TYPE_PUT: - recvreq = des->des_cbdata; - type = "PUT"; - break; - default: - /* In theory, this can never happen. */ - opal_output(0, "%s:%d FATAL ERROR, unknown header (hdr=%d)", - __FILE__, __LINE__, common->hdr_type); - ompi_rte_abort(-1, NULL); - } - - mca_pml_bfo_find_recvreq_eager_bml_btl(bml_btl, btl, recvreq, type); - } -} - -void mca_pml_bfo_find_sendreq_eager_bml_btl(mca_bml_base_btl_t** bml_btl, - mca_btl_base_module_t* btl, - mca_pml_bfo_send_request_t* sendreq, - char* type) -{ - if ((*bml_btl)->btl != btl) { - opal_output_verbose(25, mca_pml_bfo_output, - "%s completion: BML does not match BTL, find it back, " - "PML=%d, RQS=%d, src_req=%p, dst_req=%p, peer=%d", - type, (uint16_t)sendreq->req_send.req_base.req_sequence, - sendreq->req_restartseq, (void *)sendreq, - sendreq->req_recv.pval, - sendreq->req_send.req_base.req_peer); - *bml_btl = mca_bml_base_btl_array_find(&sendreq->req_endpoint->btl_eager, btl); - if (NULL == *bml_btl) { - opal_output_verbose(25, mca_pml_bfo_output, - "%s completion: BML is gone, find another one, " - "PML=%d, RQS=%d, src_req=%p, dst_req=%p, peer=%d", - type, (uint16_t)sendreq->req_send.req_base.req_sequence, - sendreq->req_restartseq, (void *)sendreq, - sendreq->req_recv.pval, - sendreq->req_send.req_base.req_peer); - *bml_btl = mca_bml_base_btl_array_get_next(&sendreq->req_endpoint->btl_eager); - } - } -} - -void mca_pml_bfo_find_sendreq_rdma_bml_btl(mca_bml_base_btl_t** bml_btl, - mca_btl_base_module_t* btl, - mca_pml_bfo_send_request_t* sendreq, - char* type) -{ - if ((*bml_btl)->btl != btl) { - opal_output_verbose(25, mca_pml_bfo_output, - "%s completion: BML does not match BTL, find it back, " - "PML=%d, RQS=%d, src_req=%p, dst_req=%p, peer=%d", - type, (uint16_t)sendreq->req_send.req_base.req_sequence, - sendreq->req_restartseq, (void *)sendreq, - sendreq->req_recv.pval, - sendreq->req_send.req_base.req_peer); - *bml_btl = mca_bml_base_btl_array_find(&sendreq->req_endpoint->btl_rdma, btl); - if (NULL == *bml_btl) { - opal_output_verbose(25, mca_pml_bfo_output, - "%s completion: BML is gone, find another one, " - "PML=%d, RQS=%d, src_req=%p, dst_req=%p, peer=%d", - type, (uint16_t)sendreq->req_send.req_base.req_sequence, - sendreq->req_restartseq, (void *)sendreq, - sendreq->req_recv.pval, - sendreq->req_send.req_base.req_peer); - *bml_btl = mca_bml_base_btl_array_get_next(&sendreq->req_endpoint->btl_rdma); - } - } -} - -void mca_pml_bfo_find_recvreq_eager_bml_btl(mca_bml_base_btl_t** bml_btl, - mca_btl_base_module_t* btl, - mca_pml_bfo_recv_request_t* recvreq, - char* type) -{ - if ((*bml_btl)->btl != btl) { - ompi_proc_t *proc = (ompi_proc_t*)recvreq->req_recv.req_base.req_proc; - mca_bml_base_endpoint_t* bml_endpoint = (mca_bml_base_endpoint_t*) proc->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_BML]; - - opal_output_verbose(25, mca_pml_bfo_output, - "%s completion: BML does not match BTL, find it back, " - "PML=%d, RQS=%d, src_req=%p, dst_req=%p, peer=%d", - type, recvreq->req_msgseq, recvreq->req_restartseq, - recvreq->remote_req_send.pval, (void *)recvreq, - recvreq->req_recv.req_base.req_ompi.req_status.MPI_SOURCE); - - *bml_btl = mca_bml_base_btl_array_find(&bml_endpoint->btl_eager, btl); - if (NULL == *bml_btl) { - opal_output_verbose(25, mca_pml_bfo_output, - "%s completion: BML is gone, find another one, " - "PML=%d, RQS=%d, src_req=%p, dst_req=%p, peer=%d", - type, recvreq->req_msgseq, recvreq->req_restartseq, - recvreq->remote_req_send.pval, (void *)recvreq, - recvreq->req_recv.req_base.req_ompi.req_status.MPI_SOURCE); - - *bml_btl = mca_bml_base_btl_array_get_next(&bml_endpoint->btl_eager); - } - } -} - -void mca_pml_bfo_find_recvreq_rdma_bml_btl(mca_bml_base_btl_t** bml_btl, - mca_btl_base_module_t* btl, - mca_pml_bfo_recv_request_t* recvreq, - char* type) -{ - if ((*bml_btl)->btl != btl) { - ompi_proc_t *proc = (ompi_proc_t*)recvreq->req_recv.req_base.req_proc; - mca_bml_base_endpoint_t* bml_endpoint = (mca_bml_base_endpoint_t*) proc->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_BML]; - - opal_output_verbose(25, mca_pml_bfo_output, - "%s completion: BML does not match BTL, find it back, " - "PML=%d, RQS=%d, src_req=%p, dst_req=%p, peer=%d", - type, recvreq->req_msgseq, recvreq->req_restartseq, - recvreq->remote_req_send.pval, (void *)recvreq, - recvreq->req_recv.req_base.req_ompi.req_status.MPI_SOURCE); - - *bml_btl = mca_bml_base_btl_array_find(&bml_endpoint->btl_rdma, btl); - if (NULL == *bml_btl) { - opal_output_verbose(25, mca_pml_bfo_output, - "%s completion: BML is gone, find another one, " - "PML=%d, RQS=%d, src_req=%p, dst_req=%p, peer=%d", - type, recvreq->req_msgseq, recvreq->req_restartseq, - recvreq->remote_req_send.pval, (void *)recvreq, - recvreq->req_recv.req_base.req_ompi.req_status.MPI_SOURCE); - - *bml_btl = mca_bml_base_btl_array_get_next(&bml_endpoint->btl_rdma); - } - } -} - -/** - * The completion event for the RNDV message has returned with an - * error. We know that the send request we are looking at is valid - * because it cannot be completed until the sendreq->req_state value - * reaches 0. And for the sendreq->req_state to reach 0, the - * completion event on the RNDV message must occur. So, we do not - * bother checking whether the send request is valid, because we know - * it is, but we put a few asserts in for good measure. We then check - * a few fields in the request to decide what to do. If the - * sendreq->req_error is set, that means that something has happend - * already to the request and we do not want to restart it. - * Presumably, we may have received a RECVERRNOTIFY message from the - * receiver. We also check the sendreq->req_acked field to see if it - * has been acked. If it has, then again we do not restart everything - * because obviously the RNDV message has made it to the other side. - */ -bool mca_pml_bfo_rndv_completion_status_error(struct mca_btl_base_descriptor_t* des, - mca_pml_bfo_send_request_t* sendreq) -{ - assert(((mca_pml_bfo_hdr_t*)((des)->des_local->seg_addr.pval))->hdr_match.hdr_ctx == - (sendreq)->req_send.req_base.req_comm->c_contextid); - assert(((mca_pml_bfo_hdr_t*)((des)->des_local->seg_addr.pval))->hdr_match.hdr_src == - (sendreq)->req_send.req_base.req_comm->c_my_rank); - assert(((mca_pml_bfo_hdr_t*)((des)->des_local->seg_addr.pval))->hdr_match.hdr_seq == - (uint16_t)(sendreq)->req_send.req_base.req_sequence); - if ((!(sendreq)->req_error) && (NULL == (sendreq)->req_recv.pval)) { - (sendreq)->req_events--; - /* Assume RNDV did not make it, so restart from the beginning. */ - mca_pml_bfo_send_request_restart(sendreq, true, MCA_PML_BFO_HDR_TYPE_RNDV); - return true; - } - return false; -} - -/** - * Check to see if an error has occurred on this send request. If it has - * and there are no outstanding events, then we can start the restart dance. - */ -void mca_pml_bfo_completion_sendreq_has_error(mca_pml_bfo_send_request_t* sendreq, - int status, - mca_btl_base_module_t* btl, - int type, - char *description) -{ - opal_output_verbose(30, mca_pml_bfo_output, - "%s: completion: sendreq has error, outstanding events=%d, " - "PML=%d, RQS=%d, src_req=%p, dst_req=%p, status=%d, peer=%d", - description, - sendreq->req_events, (uint16_t)sendreq->req_send.req_base.req_sequence, - sendreq->req_restartseq, (void *)sendreq, - sendreq->req_recv.pval, - status, sendreq->req_send.req_base.req_peer); - if (0 == sendreq->req_events) { - mca_pml_bfo_send_request_rndvrestartnotify(sendreq, false, - type, status, btl); - } -} - -/* If we get an error on the RGET message, then first make sure that - * header matches the send request that we are pointing to. This is - * necessary, because even though the sending side got an error, the - * RGET may have made it to the receiving side and the message transfer - * may have completed. This would then mean the send request has been - * completed and perhaps in use by another communication. So there is - * no need to restart this request. Therefore, ensure that we are - * looking at the same request that the header thinks we are looking - * at. If not, then there is nothing else to be done. */ -void mca_pml_bfo_send_ctl_completion_status_error(struct mca_btl_base_descriptor_t* des) -{ - mca_pml_bfo_send_request_t* sendreq = (mca_pml_bfo_send_request_t*)des->des_cbdata; - mca_pml_bfo_hdr_t* hdr = des->des_local->seg_addr.pval; - switch (hdr->hdr_common.hdr_type) { - case MCA_PML_BFO_HDR_TYPE_RGET: - if ((hdr->hdr_match.hdr_ctx != sendreq->req_send.req_base.req_comm->c_contextid) || - (hdr->hdr_match.hdr_src != sendreq->req_send.req_base.req_comm->c_my_rank) || - (hdr->hdr_match.hdr_seq != (uint16_t)sendreq->req_send.req_base.req_sequence)) { - opal_output_verbose(30, mca_pml_bfo_output, - "RGET: completion event: dropping because no valid request " - "PML:exp=%d,act=%d CTX:exp=%d,act=%d SRC:exp=%d,act=%d " - "RQS:exp=%d,act=%d, dst_req=%p", - (uint16_t)sendreq->req_send.req_base.req_sequence, - hdr->hdr_match.hdr_seq, - sendreq->req_send.req_base.req_comm->c_contextid, - hdr->hdr_match.hdr_ctx, - sendreq->req_send.req_base.req_comm->c_my_rank, - hdr->hdr_match.hdr_src, - sendreq->req_restartseq, hdr->hdr_rndv.hdr_restartseq, - (void *)sendreq); - return; - } - mca_pml_bfo_send_request_restart(sendreq, true, MCA_PML_BFO_HDR_TYPE_RGET); - return; - default: - opal_output(0, "%s:%d FATAL ERROR, unknown header (hdr=%d)", - __FILE__, __LINE__, hdr->hdr_common.hdr_type); - ompi_rte_abort(-1, NULL); - } -} diff --git a/ompi/mca/pml/bfo/pml_bfo_failover.h b/ompi/mca/pml/bfo/pml_bfo_failover.h deleted file mode 100644 index dbfc9ab2451..00000000000 --- a/ompi/mca/pml/bfo/pml_bfo_failover.h +++ /dev/null @@ -1,398 +0,0 @@ -/* - * Copyright (c) 2010 Oracle and/or its affiliates. All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -/** - * @file - * Functions that implement failover capabilities. - */ - -#ifndef MCA_PML_BFO_FAILOVER_H -#define MCA_PML_BFO_FAILOVER_H - -#include "opal/mca/btl/btl.h" -#include "pml_bfo_hdr.h" - -BEGIN_C_DECLS - -bool mca_pml_bfo_is_duplicate_msg(mca_pml_bfo_comm_proc_t* proc, - mca_pml_bfo_match_hdr_t *hdr); -bool mca_pml_bfo_is_duplicate_fin(mca_pml_bfo_hdr_t* hdr, mca_btl_base_descriptor_t* rdma, - mca_btl_base_module_t* btl); - -mca_pml_bfo_recv_request_t* mca_pml_bfo_get_request(mca_pml_bfo_match_hdr_t *hdr); - -void mca_pml_bfo_send_request_restart(mca_pml_bfo_send_request_t* sendreq, - bool repost, mca_btl_base_tag_t tag); -void mca_pml_bfo_send_request_rndvrestartnotify(mca_pml_bfo_send_request_t* sendreq, - bool repost, mca_btl_base_tag_t tag, int status, - mca_btl_base_module_t* btl); - -void -mca_pml_bfo_rndvrestartnotify_completion(mca_btl_base_module_t* btl, - struct mca_btl_base_endpoint_t* ep, - struct mca_btl_base_descriptor_t* des, - int status); -void -mca_pml_bfo_check_recv_ctl_completion_status(mca_btl_base_module_t* btl, - struct mca_btl_base_descriptor_t* des, - int status); - -/* Reset a receive request to the beginning */ -void mca_pml_bfo_recv_request_reset(mca_pml_bfo_recv_request_t* recvreq); -/* Notify sender that receiver detected an error */ -void mca_pml_bfo_recv_request_recverrnotify(mca_pml_bfo_recv_request_t* recvreq, - mca_btl_base_tag_t tag, int status); -/* Ack the RNDVRESTARTNOTIFY message */ -void mca_pml_bfo_recv_request_rndvrestartack(mca_pml_bfo_recv_request_t* recvreq, - mca_btl_base_tag_t tag, int status, - mca_btl_base_module_t* btl); -/* Nack the RNDVRESTARTNOTIFY message */ -void mca_pml_bfo_recv_request_rndvrestartnack(mca_btl_base_descriptor_t* olddes, - ompi_proc_t* ompi_proc, bool repost); - -void mca_pml_bfo_recv_restart_completion(mca_btl_base_module_t* btl, - struct mca_btl_base_endpoint_t* ep, - struct mca_btl_base_descriptor_t* des, - int status); -void mca_pml_bfo_failover_error_handler(struct mca_btl_base_module_t* btl, - int32_t flags, ompi_proc_t *errproc, char *btlname); -void mca_pml_bfo_repost_match_fragment(struct mca_btl_base_descriptor_t* des); -void mca_pml_bfo_repost_fin(struct mca_btl_base_descriptor_t* des); - -void mca_pml_bfo_map_out_btl(struct mca_btl_base_module_t* btl, - ompi_proc_t *errproc, char *btlname); - -extern void mca_pml_bfo_map_out( mca_btl_base_module_t *btl, - mca_btl_base_tag_t tag, - mca_btl_base_descriptor_t* descriptor, - void* cbdata ); - -int mca_pml_bfo_register_callbacks(void); - -void mca_pml_bfo_update_rndv_fields(mca_pml_bfo_hdr_t* hdr, - mca_pml_bfo_send_request_t*, char *type); - -void mca_pml_bfo_update_bml_btl(mca_bml_base_btl_t** bml_btl, mca_btl_base_module_t* btl, - struct mca_btl_base_descriptor_t* des); - -void mca_pml_bfo_find_recvreq_eager_bml_btl(mca_bml_base_btl_t** bml_btl, - mca_btl_base_module_t* btl, - mca_pml_bfo_recv_request_t* recvreq, - char* type); - -void mca_pml_bfo_find_sendreq_eager_bml_btl(mca_bml_base_btl_t** bml_btl, - mca_btl_base_module_t* btl, - mca_pml_bfo_send_request_t* sendreq, - char* type); - -void mca_pml_bfo_find_sendreq_rdma_bml_btl(mca_bml_base_btl_t** bml_btl, - mca_btl_base_module_t* btl, - mca_pml_bfo_send_request_t* sendreq, - char* type); - -void mca_pml_bfo_update_eager_bml_btl_recv_ctl(mca_bml_base_btl_t** bml_btl, - mca_btl_base_module_t* btl, - struct mca_btl_base_descriptor_t* des); -void mca_pml_bfo_find_recvreq_rdma_bml_btl(mca_bml_base_btl_t** bml_btl, - mca_btl_base_module_t* btl, - mca_pml_bfo_recv_request_t* recvreq, - char* type); - -bool mca_pml_bfo_rndv_completion_status_error(struct mca_btl_base_descriptor_t* des, - mca_pml_bfo_send_request_t* sendreq); -void mca_pml_bfo_send_ctl_completion_status_error(struct mca_btl_base_descriptor_t* des); - - -void mca_pml_bfo_completion_sendreq_has_error(mca_pml_bfo_send_request_t* sendreq, - int status, - mca_btl_base_module_t* btl, - int type, - char *description); -/** - * Four new callbacks for the four new message types. - */ -extern void mca_pml_bfo_recv_frag_callback_rndvrestartnotify( mca_btl_base_module_t *btl, - mca_btl_base_tag_t tag, - mca_btl_base_descriptor_t* descriptor, - void* cbdata ); - -extern void mca_pml_bfo_recv_frag_callback_rndvrestartack( mca_btl_base_module_t *btl, - mca_btl_base_tag_t tag, - mca_btl_base_descriptor_t* descriptor, - void* cbdata ); - -extern void mca_pml_bfo_recv_frag_callback_rndvrestartnack( mca_btl_base_module_t *btl, - mca_btl_base_tag_t tag, - mca_btl_base_descriptor_t* descriptor, - void* cbdata ); - -extern void mca_pml_bfo_recv_frag_callback_recverrnotify( mca_btl_base_module_t *btl, - mca_btl_base_tag_t tag, - mca_btl_base_descriptor_t* descriptor, - void* cbdata ); - -/** - * A bunch of macros to help isolate failover code from regular ob1 code. - */ - -/* Drop any ACK fragments if request is in error state. Do not want - * to initiate any more activity. */ -#define MCA_PML_BFO_ERROR_CHECK_ON_ACK_CALLBACK(sendreq) \ - if( OPAL_UNLIKELY((sendreq)->req_error)) { \ - opal_output_verbose(20, mca_pml_bfo_output, \ - "ACK: received: dropping because request in error, " \ - "PML=%d, RQS=%d, src_req=%p, dst_req=%p, peer=%d", \ - (uint16_t)(sendreq)->req_send.req_base.req_sequence, \ - (sendreq)->req_restartseq, \ - (void *)(sendreq), (sendreq)->req_recv.pval, \ - (sendreq)->req_send.req_base.req_peer); \ - return; \ - } - -/* Drop any FRAG fragments if request is in error state. Do not want - * to initiate any more activity. */ -#define MCA_PML_BFO_ERROR_CHECK_ON_FRAG_CALLBACK(recvreq) \ - if( OPAL_UNLIKELY((recvreq)->req_errstate)) { \ - opal_output_verbose(20, mca_pml_bfo_output, \ - "FRAG: received: dropping because request in error, " \ - "PML=%d, src_req=%p, dst_req=%p, peer=%d, offset=%d", \ - (uint16_t)(recvreq)->req_msgseq, \ - (recvreq)->remote_req_send.pval, \ - (void *)(recvreq), \ - (recvreq)->req_recv.req_base.req_ompi.req_status.MPI_SOURCE, \ - (int)hdr->hdr_frag.hdr_frag_offset); \ - return; \ - } - -/* Drop any PUT fragments if request is in error state. Do not want - * to initiate any more activity. */ -#define MCA_PML_BFO_ERROR_CHECK_ON_PUT_CALLBACK(sendreq) \ - if( OPAL_UNLIKELY((sendreq)->req_error)) { \ - opal_output_verbose(20, mca_pml_bfo_output, \ - "PUT: received: dropping because request in error, " \ - "PML=%d, src_req=%p, dst_req=%p, peer=%d", \ - (uint16_t)(sendreq)->req_send.req_base.req_sequence, \ - (void *)(sendreq), (sendreq)->req_recv.pval, \ - (sendreq)->req_send.req_base.req_peer); \ - return; \ - } - -/** - * Macros for pml_bfo_recvreq.c file. - */ - -/* This can happen if a FIN message arrives after the request was - * marked in error. So, just drop the message. Note that the status - * field is not being checked. That is because the status field is the - * value returned in the FIN hdr.hdr_fail field and may be used for - * other things. Note that we allow the various fields to be updated - * in case this actually completes the request and the sending side - * thinks it is done. */ -#define MCA_PML_BFO_ERROR_CHECK_ON_FIN_FOR_PUT(recvreq) \ - if( OPAL_UNLIKELY((recvreq)->req_errstate)) { \ - opal_output_verbose(20, mca_pml_bfo_output, \ - "FIN: received on broken request, skipping, " \ - "PML=%d, RQS=%d, src_req=%p, dst_req=%p, peer=%d", \ - (recvreq)->req_msgseq, (recvreq)->req_restartseq, \ - (recvreq)->remote_req_send.pval, (void *)(recvreq), \ - (recvreq)->req_recv.req_base.req_ompi.req_status.MPI_SOURCE); \ - /* Even though in error, it still might complete. */ \ - recv_request_pml_complete_check(recvreq); \ - return; \ - } - -#define MCA_PML_BFO_ERROR_CHECK_ON_RDMA_READ_COMPLETION(recvreq) \ - if ((recvreq)->req_errstate) { \ - opal_output_verbose(30, mca_pml_bfo_output, \ - "RDMA read: completion failed, error already seen, " \ - "PML=%d, RQS=%d, src_req=%lx, dst_req=%lx, peer=%d", \ - (recvreq)->req_msgseq, (recvreq)->req_restartseq, \ - (unsigned long)(recvreq)->remote_req_send.pval, \ - (unsigned long)(recvreq), \ - (recvreq)->req_recv.req_base.req_ompi.req_status.MPI_SOURCE); \ - return; \ - } else { \ - opal_output_verbose(30, mca_pml_bfo_output, \ - "RDMA read: completion failed, sending RECVERRNOTIFY to " \ - "sender, PML=%d, RQS=%d, src_req=%lx, dst_req=%lx, peer=%d", \ - (recvreq)->req_msgseq, (recvreq)->req_restartseq, \ - (unsigned long)(recvreq)->remote_req_send.pval, \ - (unsigned long)(recvreq), \ - (recvreq)->req_recv.req_base.req_ompi.req_status.MPI_SOURCE); \ - mca_pml_bfo_recv_request_recverrnotify(recvreq, MCA_PML_BFO_HDR_TYPE_RGET, status); \ - } - -#define MCA_PML_BFO_SECOND_ERROR_CHECK_ON_RDMA_READ_COMPLETION(recvreq, status, btl) \ - /* See if the request has received a RNDVRESTARTNOTIFY */ \ - if( OPAL_UNLIKELY(recvreq->req_errstate)) { \ - if (recvreq->req_errstate & RECVREQ_RNDVRESTART_RECVED) { \ - opal_output_verbose(30, mca_pml_bfo_output, \ - "RDMA read: completion: recvreq has error, outstanding events=%d " \ - "PML=%d, RQS=%d, src_req=%lx, dst_req=%lx, status=%d, peer=%d", \ - recvreq->req_events, recvreq->req_msgseq, recvreq->req_restartseq, \ - (unsigned long)recvreq->remote_req_send.pval, \ - (unsigned long)recvreq, status, \ - recvreq->req_recv.req_base.req_ompi.req_status.MPI_SOURCE); \ - if (0 == recvreq->req_events) { \ - mca_pml_bfo_recv_request_rndvrestartack(recvreq, MCA_PML_BFO_HDR_TYPE_RGET, \ - status, btl); \ - } \ - } \ - MCA_PML_BFO_RDMA_FRAG_RETURN(frag); \ - return; \ - } - -/** - * Macros for pml_bfo_sendreq.c file. - */ - -/* This macro is called on the sending side after receiving - * a PUT message. There is a chance that this PUT message - * has shown up and is attempting to modify the state of - * the req_state, but the req_state is no longer being tracked - * because the RNDV message has turned into a RGET message - * because it got an error on the RNDV completion. - */ -#define MCA_PML_BFO_VERIFY_SENDREQ_REQ_STATE_VALUE(sendreq) \ - if (sendreq->req_state == -1) { \ - OPAL_THREAD_ADD32(&sendreq->req_state, 1); \ - } - -/* Now check the error state. This request can be in error if the - * RNDV message made it over, but the receiver got an error trying to - * send the ACK back and therefore sent a RECVERRNOTIFY message. In - * that case, we want to start the restart dance as the receiver has - * matched this message already. Only restart if there are no - * outstanding events on send request. */ -#define MCA_PML_BFO_RNDV_COMPLETION_SENDREQ_ERROR_CHECK(sendreq, status, btl, type, description) \ - if( OPAL_UNLIKELY ((sendreq)->req_error)) { \ - mca_pml_bfo_completion_sendreq_has_error(sendreq, status, \ - btl, type, description); \ - return; \ - } - -/** - * This macro is called within the frag completion function in two - * places. It is called to see if any errors occur prior to the - * completion event on the frag. It is then called a second time - * after the scheduling routine is called as the scheduling routine - * may have detected that a BTL that was cached on the request had - * been removed and therefore marked the request in error. In that - * case, the scheduling of fragments can no longer proceed properly, - * and if there are no outstanding events, iniated the restart dance. - */ -#define MCA_PML_BFO_FRAG_COMPLETION_SENDREQ_ERROR_CHECK(sendreq, status, btl, type, description) \ - if( OPAL_UNLIKELY((sendreq)->req_error)) { \ - mca_pml_bfo_completion_sendreq_has_error(sendreq, status, \ - btl, type, description); \ - return; \ - } - -/* This can happen if a FIN message arrives after the request was - * marked in error. So, just drop the message. Note that the status - * field is not checked here. That is because that is the value - * returned in the FIN hdr.hdr_fail field and may be used for other - * things. */ -#define MCA_PML_BFO_RGET_COMPLETION_SENDREQ_ERROR_CHECK(sendreq, btl, des) \ - if( OPAL_UNLIKELY(sendreq->req_error)) { \ - opal_output_verbose(30, mca_pml_bfo_output, \ - "FIN: received on broken request, skipping, " \ - "PML=%d, src_req=%lx, dst_req=%lx, peer=%d", \ - (uint16_t)sendreq->req_send.req_base.req_sequence, \ - (unsigned long)sendreq, (unsigned long)sendreq->req_recv.pval, \ - sendreq->req_send.req_base.req_peer); \ - btl->btl_free(btl, des); \ - return; \ - } - - -/* Check if there has been an error on the send request when we get - * a completion event on the RDMA write. */ -#define MCA_PML_BFO_PUT_COMPLETION_SENDREQ_ERROR_CHECK(sendreq, status, btl) \ - if ( OPAL_UNLIKELY(sendreq->req_error)) { \ - mca_pml_bfo_completion_sendreq_has_error(sendreq, status, btl, \ - MCA_PML_BFO_HDR_TYPE_PUT, "RDMA write"); \ - MCA_PML_BFO_RDMA_FRAG_RETURN(frag); \ - return; \ - } - -#define MCA_PML_BFO_CHECK_FOR_RNDV_RESTART(hdr, sendreq, type) \ - if (0 < sendreq->req_restartseq) { \ - mca_pml_bfo_update_rndv_fields(hdr, sendreq, type); \ - } - -/* If a bml_btl gets mapped out, then we need to adjust it based - * on the btl from the callback function. These macros are called on - * every callback to make sure things are copacetic. - */ -#define MCA_PML_BFO_CHECK_EAGER_BML_BTL_ON_FIN_COMPLETION(bml_btl, btl, des) \ - if (bml_btl->btl != btl) { \ - ompi_proc_t *proc = (ompi_proc_t*) des->des_cbdata; \ - mca_bml_base_endpoint_t* bml_endpoint = (mca_bml_base_endpoint_t*) proc->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_BML]; \ - bml_btl = mca_bml_base_btl_array_find(&bml_endpoint->btl_eager, btl); \ - } -#define MCA_PML_BFO_CHECK_SENDREQ_EAGER_BML_BTL(bml_btl, btl, sendreq, type) \ - if (bml_btl->btl != btl) { \ - mca_pml_bfo_find_sendreq_eager_bml_btl(&bml_btl, btl, sendreq, type); \ - } -#define MCA_PML_BFO_CHECK_SENDREQ_RDMA_BML_BTL(bml_btl, btl, sendreq, type) \ - if (bml_btl->btl != btl) { \ - mca_pml_bfo_find_sendreq_rdma_bml_btl(&bml_btl, btl, sendreq, type); \ - } - -#define MCA_PML_BFO_CHECK_RECVREQ_EAGER_BML_BTL(bml_btl, btl, recvreq, type) \ - if (bml_btl->btl != btl) { \ - mca_pml_bfo_find_recvreq_eager_bml_btl(&bml_btl, btl, recvreq, type); \ - } - -#define MCA_PML_BFO_CHECK_RECVREQ_RDMA_BML_BTL(bml_btl, btl, recvreq, type) \ - if (bml_btl->btl != btl) { \ - mca_pml_bfo_find_recvreq_rdma_bml_btl(&bml_btl, btl, recvreq, type); \ - } - -#define MCA_PML_BFO_CHECK_RECVREQ_EAGER_BML_BTL_RECV_CTL(bml_btl, btl, des) \ - if (bml_btl->btl != btl) { \ - mca_pml_bfo_update_eager_bml_btl_recv_ctl(&bml_btl, btl, des); \ - } - -#define MCA_PML_BFO_CHECK_FOR_REMOVED_BML(sendreq, frag, btl) \ - if( OPAL_UNLIKELY(NULL == frag->rdma_bml) ) { \ - opal_output_verbose(30, mca_pml_bfo_output, \ - "PUT received: no matching BTL to RDMA write to, oustanding " \ - "events=%d, PML=%d, RQS=%d, src_req=%p, dst_req=%p, peer=%d", \ - sendreq->req_events, \ - (uint16_t)sendreq->req_send.req_base.req_sequence, \ - sendreq->req_restartseq, (void *)sendreq, \ - sendreq->req_recv.pval, sendreq->req_send.req_base.req_peer); \ - MCA_PML_BFO_RDMA_FRAG_RETURN(frag); \ - sendreq->req_error++; \ - if (0 == sendreq->req_events) { \ - mca_pml_bfo_send_request_rndvrestartnotify(sendreq, false, \ - MCA_PML_BFO_HDR_TYPE_PUT, \ - OMPI_ERROR, btl); \ - } \ - return; \ - } - -/* This macro checks to see if the cached number of BTLs in the - * send request still matches the value from the endpoint. - * If it does not, this means that a BTL was removed from the - * available list. In this case, start the request over. - */ -#define MCA_PML_BFO_CHECK_FOR_REMOVED_BTL(sendreq, range) \ - if ((int)mca_bml_base_btl_array_get_size(&sendreq->req_endpoint->btl_send) \ - != range->range_btl_cnt) { \ - sendreq->req_error++; \ - return OMPI_ERROR; \ - } - - -END_C_DECLS - -#endif diff --git a/ompi/mca/pml/bfo/pml_bfo_hdr.h b/ompi/mca/pml/bfo/pml_bfo_hdr.h deleted file mode 100644 index b8c8d145245..00000000000 --- a/ompi/mca/pml/bfo/pml_bfo_hdr.h +++ /dev/null @@ -1,539 +0,0 @@ -/* - * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * Copyright (c) 2009 IBM Corporation. All rights reserved. - * Copyright (c) 2010 Oracle and/or its affiliates. All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ -/** - * @file - */ -#ifndef MCA_PML_BFO_HEADER_H -#define MCA_PML_BFO_HEADER_H - -#include "ompi_config.h" -#ifdef HAVE_SYS_TYPES_H -#include -#endif -#ifdef HAVE_NETINET_IN_H -#include -#endif - -#include "opal/types.h" -#include "opal/util/arch.h" -#include "opal/mca/btl/btl.h" -#include "ompi/proc/proc.h" - -#define MCA_PML_BFO_HDR_TYPE_MATCH (MCA_BTL_TAG_PML + 1) -#define MCA_PML_BFO_HDR_TYPE_RNDV (MCA_BTL_TAG_PML + 2) -#define MCA_PML_BFO_HDR_TYPE_RGET (MCA_BTL_TAG_PML + 3) -#define MCA_PML_BFO_HDR_TYPE_ACK (MCA_BTL_TAG_PML + 4) -#define MCA_PML_BFO_HDR_TYPE_NACK (MCA_BTL_TAG_PML + 5) -#define MCA_PML_BFO_HDR_TYPE_FRAG (MCA_BTL_TAG_PML + 6) -#define MCA_PML_BFO_HDR_TYPE_GET (MCA_BTL_TAG_PML + 7) -#define MCA_PML_BFO_HDR_TYPE_PUT (MCA_BTL_TAG_PML + 8) -#define MCA_PML_BFO_HDR_TYPE_FIN (MCA_BTL_TAG_PML + 9) -#if PML_BFO -#define MCA_PML_BFO_HDR_TYPE_RNDVRESTARTNOTIFY (MCA_BTL_TAG_PML + 10) -#define MCA_PML_BFO_HDR_TYPE_RNDVRESTARTACK (MCA_BTL_TAG_PML + 11) -#define MCA_PML_BFO_HDR_TYPE_RNDVRESTARTNACK (MCA_BTL_TAG_PML + 12) -#define MCA_PML_BFO_HDR_TYPE_RECVERRNOTIFY (MCA_BTL_TAG_PML + 13) -#endif /* PML_BFO */ - -#define MCA_PML_BFO_HDR_FLAGS_ACK 1 /* is an ack required */ -#define MCA_PML_BFO_HDR_FLAGS_NBO 2 /* is the hdr in network byte order */ -#define MCA_PML_BFO_HDR_FLAGS_PIN 4 /* is user buffer pinned */ -#define MCA_PML_BFO_HDR_FLAGS_CONTIG 8 /* is user buffer contiguous */ -#define MCA_PML_BFO_HDR_FLAGS_NORDMA 16 /* rest will be send by copy-in-out */ -#if PML_BFO -#define MCA_PML_BFO_HDR_FLAGS_RESTART 32 /* restart RNDV because of error */ -#endif /* PML_BFO */ - -/** - * Common hdr attributes - must be first element in each hdr type - */ -struct mca_pml_bfo_common_hdr_t { - uint8_t hdr_type; /**< type of envelope */ - uint8_t hdr_flags; /**< flags indicating how fragment should be processed */ -}; -typedef struct mca_pml_bfo_common_hdr_t mca_pml_bfo_common_hdr_t; - -#define MCA_PML_BFO_COMMON_HDR_NTOH(h) -#define MCA_PML_BFO_COMMON_HDR_HTON(h) - -/** - * Header definition for the first fragment, contains the - * attributes required to match the corresponding posted receive. - */ -struct mca_pml_bfo_match_hdr_t { - mca_pml_bfo_common_hdr_t hdr_common; /**< common attributes */ - uint16_t hdr_ctx; /**< communicator index */ - int32_t hdr_src; /**< source rank */ - int32_t hdr_tag; /**< user tag */ - uint16_t hdr_seq; /**< message sequence number */ -#if OPAL_ENABLE_HETEROGENEOUS_SUPPORT - uint8_t hdr_padding[2]; /**< explicitly pad to 16 bytes. Compilers seem to already prefer to do this, but make it explicit just in case */ -#endif -}; -#if OPAL_ENABLE_HETEROGENEOUS_SUPPORT -#define OMPI_PML_BFO_MATCH_HDR_LEN 16 -#else -#define OMPI_PML_BFO_MATCH_HDR_LEN 14 -#endif - -typedef struct mca_pml_bfo_match_hdr_t mca_pml_bfo_match_hdr_t; - -#if OPAL_ENABLE_HETEROGENEOUS_SUPPORT && OPAL_ENABLE_DEBUG -#define MCA_PML_BFO_MATCH_HDR_FILL(h) \ -do { \ - (h).hdr_padding[0] = 0; \ - (h).hdr_padding[1] = 0; \ -} while(0) -#else -#define MCA_PML_BFO_MATCH_HDR_FILL(h) -#endif /* OPAL_ENABLE_HETEROGENEOUS_SUPPORT && OPAL_ENABLE_DEBUG */ - -#define MCA_PML_BFO_MATCH_HDR_NTOH(h) \ -do { \ - MCA_PML_BFO_COMMON_HDR_NTOH((h).hdr_common); \ - (h).hdr_ctx = ntohs((h).hdr_ctx); \ - (h).hdr_src = ntohl((h).hdr_src); \ - (h).hdr_tag = ntohl((h).hdr_tag); \ - (h).hdr_seq = ntohs((h).hdr_seq); \ -} while (0) - -#define MCA_PML_BFO_MATCH_HDR_HTON(h) \ -do { \ - MCA_PML_BFO_COMMON_HDR_HTON((h).hdr_common); \ - MCA_PML_BFO_MATCH_HDR_FILL(h); \ - (h).hdr_ctx = htons((h).hdr_ctx); \ - (h).hdr_src = htonl((h).hdr_src); \ - (h).hdr_tag = htonl((h).hdr_tag); \ - (h).hdr_seq = htons((h).hdr_seq); \ -} while (0) - -/** - * Header definition for the first fragment when an acknowledgment - * is required. This could be the first fragment of a large message - * or a short message that requires an ack (synchronous). - */ -struct mca_pml_bfo_rendezvous_hdr_t { - mca_pml_bfo_match_hdr_t hdr_match; - uint64_t hdr_msg_length; /**< message length */ - opal_ptr_t hdr_src_req; /**< pointer to source request - returned in ack */ -#if PML_BFO - opal_ptr_t hdr_dst_req; /**< pointer to dst req */ - uint8_t hdr_restartseq; /**< restart sequence */ -#endif /* PML_BFO */ -}; -typedef struct mca_pml_bfo_rendezvous_hdr_t mca_pml_bfo_rendezvous_hdr_t; - -#if OPAL_ENABLE_HETEROGENEOUS_SUPPORT && OPAL_ENABLE_DEBUG -#define MCA_PML_BFO_RNDV_HDR_FILL(h) \ - MCA_PML_BFO_MATCH_HDR_FILL((h).hdr_match) -#else -#define MCA_PML_BFO_RNDV_HDR_FILL(h) -#endif /* OPAL_ENABLE_HETEROGENEOUS_SUPPORT && OPAL_ENABLE_DEBUG */ - -/* Note that hdr_src_req is not put in network byte order because it - is never processed by the receiver, other than being copied into - the ack header */ -#define MCA_PML_BFO_RNDV_HDR_NTOH(h) \ - do { \ - MCA_PML_BFO_MATCH_HDR_NTOH((h).hdr_match); \ - (h).hdr_msg_length = ntoh64((h).hdr_msg_length); \ - } while (0) - -#define MCA_PML_BFO_RNDV_HDR_HTON(h) \ - do { \ - MCA_PML_BFO_MATCH_HDR_HTON((h).hdr_match); \ - MCA_PML_BFO_RNDV_HDR_FILL(h); \ - (h).hdr_msg_length = hton64((h).hdr_msg_length); \ - } while (0) - -/** - * Header definition for a combined rdma rendezvous/get - */ -struct mca_pml_bfo_rget_hdr_t { - mca_pml_bfo_rendezvous_hdr_t hdr_rndv; - uint32_t hdr_seg_cnt; /**< number of segments for rdma */ -#if OPAL_ENABLE_HETEROGENEOUS_SUPPORT - uint8_t hdr_padding[4]; -#endif - opal_ptr_t hdr_des; /**< source descriptor */ -}; -typedef struct mca_pml_bfo_rget_hdr_t mca_pml_bfo_rget_hdr_t; - -#if OPAL_ENABLE_HETEROGENEOUS_SUPPORT && OPAL_ENABLE_DEBUG -#define MCA_PML_BFO_RGET_HDR_FILL(h) \ -do { \ - MCA_PML_BFO_RNDV_HDR_FILL((h).hdr_rndv); \ - (h).hdr_padding[0] = 0; \ - (h).hdr_padding[1] = 0; \ - (h).hdr_padding[2] = 0; \ - (h).hdr_padding[3] = 0; \ -} while(0) -#else -#define MCA_PML_BFO_RGET_HDR_FILL(h) -#endif /* OPAL_ENABLE_HETEROGENEOUS_SUPPORT && OPAL_ENABLE_DEBUG */ - -#define MCA_PML_BFO_RGET_HDR_NTOH(h) \ - do { \ - MCA_PML_BFO_RNDV_HDR_NTOH((h).hdr_rndv); \ - (h).hdr_seg_cnt = ntohl((h).hdr_seg_cnt); \ - } while (0) - -#define MCA_PML_BFO_RGET_HDR_HTON(h) \ - do { \ - MCA_PML_BFO_RNDV_HDR_HTON((h).hdr_rndv); \ - MCA_PML_BFO_RGET_HDR_FILL(h); \ - (h).hdr_seg_cnt = htonl((h).hdr_seg_cnt); \ - } while (0) - -/** - * Header for subsequent fragments. - */ -struct mca_pml_bfo_frag_hdr_t { - mca_pml_bfo_common_hdr_t hdr_common; /**< common attributes */ -#if OPAL_ENABLE_HETEROGENEOUS_SUPPORT - uint8_t hdr_padding[6]; -#endif - uint64_t hdr_frag_offset; /**< offset into message */ - opal_ptr_t hdr_src_req; /**< pointer to source request */ - opal_ptr_t hdr_dst_req; /**< pointer to matched receive */ -}; -typedef struct mca_pml_bfo_frag_hdr_t mca_pml_bfo_frag_hdr_t; - -#if OPAL_ENABLE_HETEROGENEOUS_SUPPORT && OPAL_ENABLE_DEBUG -#define MCA_PML_BFO_FRAG_HDR_FILL(h) \ -do { \ - (h).hdr_padding[0] = 0; \ - (h).hdr_padding[1] = 0; \ - (h).hdr_padding[2] = 0; \ - (h).hdr_padding[3] = 0; \ - (h).hdr_padding[4] = 0; \ - (h).hdr_padding[5] = 0; \ -} while(0) -#else -#define MCA_PML_BFO_FRAG_HDR_FILL(h) -#endif /* OPAL_ENABLE_HETEROGENEOUS_SUPPORT && OPAL_ENABLE_DEBUG */ - -#define MCA_PML_BFO_FRAG_HDR_NTOH(h) \ - do { \ - MCA_PML_BFO_COMMON_HDR_NTOH((h).hdr_common); \ - (h).hdr_frag_offset = ntoh64((h).hdr_frag_offset); \ - } while (0) - -#define MCA_PML_BFO_FRAG_HDR_HTON(h) \ - do { \ - MCA_PML_BFO_COMMON_HDR_HTON((h).hdr_common); \ - MCA_PML_BFO_FRAG_HDR_FILL(h); \ - (h).hdr_frag_offset = hton64((h).hdr_frag_offset); \ - } while (0) - -/** - * Header used to acknowledgment outstanding fragment(s). - */ - -struct mca_pml_bfo_ack_hdr_t { - mca_pml_bfo_common_hdr_t hdr_common; /**< common attributes */ -#if OPAL_ENABLE_HETEROGENEOUS_SUPPORT - uint8_t hdr_padding[6]; -#endif - opal_ptr_t hdr_src_req; /**< source request */ - opal_ptr_t hdr_dst_req; /**< matched receive request */ - uint64_t hdr_send_offset; /**< starting point of copy in/out */ -}; -typedef struct mca_pml_bfo_ack_hdr_t mca_pml_bfo_ack_hdr_t; - -#if OPAL_ENABLE_HETEROGENEOUS_SUPPORT && OPAL_ENABLE_DEBUG -#define MCA_PML_BFO_ACK_HDR_FILL(h) \ -do { \ - (h).hdr_padding[0] = 0; \ - (h).hdr_padding[1] = 0; \ - (h).hdr_padding[2] = 0; \ - (h).hdr_padding[3] = 0; \ - (h).hdr_padding[4] = 0; \ - (h).hdr_padding[5] = 0; \ -} while (0) -#else -#define MCA_PML_BFO_ACK_HDR_FILL(h) -#endif /* OPAL_ENABLE_HETEROGENEOUS_SUPPORT && OPAL_ENABLE_DEBUG */ - -/* Note that the request headers are not put in NBO because the - src_req is already in receiver's byte order and the dst_req is not - used by the receiver for anything other than backpointers in return - headers */ -#define MCA_PML_BFO_ACK_HDR_NTOH(h) \ - do { \ - MCA_PML_BFO_COMMON_HDR_NTOH((h).hdr_common); \ - (h).hdr_send_offset = ntoh64((h).hdr_send_offset); \ - } while (0) - -#define MCA_PML_BFO_ACK_HDR_HTON(h) \ - do { \ - MCA_PML_BFO_COMMON_HDR_HTON((h).hdr_common); \ - MCA_PML_BFO_ACK_HDR_FILL(h); \ - (h).hdr_send_offset = hton64((h).hdr_send_offset); \ - } while (0) - -/** - * Header used to initiate an RDMA operation. - */ - -struct mca_pml_bfo_rdma_hdr_t { - mca_pml_bfo_common_hdr_t hdr_common; /**< common attributes */ -#if OPAL_ENABLE_HETEROGENEOUS_SUPPORT - uint8_t hdr_padding[2]; /** two to pad out the hdr to a 4 byte alignment. hdr_req will then be 8 byte aligned after 4 for hdr_seg_cnt */ -#endif - uint32_t hdr_seg_cnt; /**< number of segments for rdma */ - opal_ptr_t hdr_req; /**< destination request */ -#if PML_BFO - opal_ptr_t hdr_dst_req; /**< pointer to destination request */ -#endif /* PML_BFO */ - opal_ptr_t hdr_des; /**< source descriptor */ - uint64_t hdr_rdma_offset; /**< current offset into user buffer */ - mca_btl_base_segment_t hdr_segs[1]; /**< list of segments for rdma */ -}; -typedef struct mca_pml_bfo_rdma_hdr_t mca_pml_bfo_rdma_hdr_t; - -#if OPAL_ENABLE_HETEROGENEOUS_SUPPORT && OPAL_ENABLE_DEBUG -#define MCA_PML_BFO_RDMA_HDR_FILL(h) \ -do { \ - (h).hdr_padding[0] = 0; \ - (h).hdr_padding[1] = 0; \ -} while(0) -#else -#define MCA_PML_BFO_RDMA_HDR_FILL(h) -#endif /* OPAL_ENABLE_HETEROGENEOUS_SUPPORT && OPAL_ENABLE_DEBUG */ - -#define MCA_PML_BFO_RDMA_HDR_NTOH(h) \ - do { \ - MCA_PML_BFO_COMMON_HDR_NTOH((h).hdr_common); \ - (h).hdr_seg_cnt = ntohl((h).hdr_seg_cnt); \ - (h).hdr_rdma_offset = ntoh64((h).hdr_rdma_offset); \ - } while (0) - -#define MCA_PML_BFO_RDMA_HDR_HTON(h) \ - do { \ - MCA_PML_BFO_COMMON_HDR_HTON((h).hdr_common); \ - MCA_PML_BFO_RDMA_HDR_FILL(h); \ - (h).hdr_seg_cnt = htonl((h).hdr_seg_cnt); \ - (h).hdr_rdma_offset = hton64((h).hdr_rdma_offset); \ - } while (0) - -/** - * Header used to complete an RDMA operation. - */ - -struct mca_pml_bfo_fin_hdr_t { - mca_pml_bfo_common_hdr_t hdr_common; /**< common attributes */ -#if OPAL_ENABLE_HETEROGENEOUS_SUPPORT - uint8_t hdr_padding[2]; -#endif -#if PML_BFO - /* Match info is needed to check for duplicate FIN messages. */ - mca_pml_bfo_match_hdr_t hdr_match; -#endif /* PML_BFO */ - uint32_t hdr_fail; /**< RDMA operation failed */ - opal_ptr_t hdr_des; /**< completed descriptor */ -}; -typedef struct mca_pml_bfo_fin_hdr_t mca_pml_bfo_fin_hdr_t; - -#if PML_BFO -#if OPAL_ENABLE_HETEROGENEOUS_SUPPORT && OPAL_ENABLE_DEBUG -#define MCA_PML_BFO_FIN_HDR_FILL(h) \ -do { \ - (h).hdr_padding[0] = 0; \ - (h).hdr_padding[1] = 0; \ - MCA_PML_BFO_MATCH_HDR_FILL((h).hdr_match); \ -} while (0) -#else -#define MCA_PML_BFO_FIN_HDR_FILL(h) -#endif /* OPAL_ENABLE_HETEROGENEOUS_SUPPORT && OPAL_ENABLE_DEBUG */ - -#define MCA_PML_BFO_FIN_HDR_NTOH(h) \ - do { \ - MCA_PML_BFO_COMMON_HDR_NTOH((h).hdr_common); \ - MCA_PML_BFO_MATCH_HDR_NTOH((h).hdr_match); \ - } while (0) - -#define MCA_PML_BFO_FIN_HDR_HTON(h) \ - do { \ - MCA_PML_BFO_COMMON_HDR_HTON((h).hdr_common); \ - MCA_PML_BFO_MATCH_HDR_HTON((h).hdr_match); \ - MCA_PML_BFO_FIN_HDR_FILL(h); \ - } while (0) -#else /* PML_BFO */ -#if OPAL_ENABLE_HETEROGENEOUS_SUPPORT && OPAL_ENABLE_DEBUG -#define MCA_PML_BFO_FIN_HDR_FILL(h) \ -do { \ - (h).hdr_padding[0] = 0; \ - (h).hdr_padding[1] = 0; \ -} while (0) -#else -#define MCA_PML_BFO_FIN_HDR_FILL(h) -#endif /* OPAL_ENABLE_HETEROGENEOUS_SUPPORT && OPAL_ENABLE_DEBUG */ - -#define MCA_PML_BFO_FIN_HDR_NTOH(h) \ - do { \ - MCA_PML_BFO_COMMON_HDR_NTOH((h).hdr_common); \ - } while (0) - -#define MCA_PML_BFO_FIN_HDR_HTON(h) \ - do { \ - MCA_PML_BFO_COMMON_HDR_HTON((h).hdr_common); \ - MCA_PML_BFO_FIN_HDR_FILL(h); \ - } while (0) -#endif /* PML_BFO */ - -#if PML_BFO -/** - * Header used to restart a rendezvous request. - */ -struct mca_pml_bfo_restart_hdr_t { - mca_pml_bfo_match_hdr_t hdr_match; /**< needed to avoid duplicate messages */ - uint8_t hdr_restartseq; /**< restart sequence */ -#if OPAL_ENABLE_HETEROGENEOUS_SUPPORT - uint8_t hdr_padding[3]; -#endif - opal_ptr_t hdr_src_req; /**< source request */ - opal_ptr_t hdr_dst_req; /**< matched receive request */ - int32_t hdr_dst_rank; /**< needed to send NACK */ - uint32_t hdr_jobid; /**< needed to send NACK */ - uint32_t hdr_vpid; /**< needed to send NACK */ -}; -typedef struct mca_pml_bfo_restart_hdr_t mca_pml_bfo_restart_hdr_t; - -/* Only need to put parts of the restart header in NBO. No need - to do hdr_src_req and hdr_dst_req as they are only used on the - by the process that originated them. */ -#define MCA_PML_BFO_RESTART_HDR_NTOH(h) \ - do { \ - MCA_PML_BFO_MATCH_HDR_NTOH((h).hdr_match); \ - (h).hdr_dst_rank = ntohl((h).hdr_dst_rank); \ - (h).hdr_jobid = ntohl((h).hdr_jobid); \ - (h).hdr_vpid = ntohl((h).hdr_vpid); \ - } while (0) - -#define MCA_PML_BFO_RESTART_HDR_HTON(h) \ - do { \ - MCA_PML_BFO_MATCH_HDR_HTON((h).hdr_match); \ - (h).hdr_dst_rank = htonl((h).hdr_dst_rank); \ - (h).hdr_jobid = htonl((h).hdr_jobid); \ - (h).hdr_vpid = htonl((h).hdr_vpid); \ - } while (0) - -#endif /* PML_BFO */ -/** - * Union of defined hdr types. - */ -union mca_pml_bfo_hdr_t { - mca_pml_bfo_common_hdr_t hdr_common; - mca_pml_bfo_match_hdr_t hdr_match; - mca_pml_bfo_rendezvous_hdr_t hdr_rndv; - mca_pml_bfo_rget_hdr_t hdr_rget; - mca_pml_bfo_frag_hdr_t hdr_frag; - mca_pml_bfo_ack_hdr_t hdr_ack; - mca_pml_bfo_rdma_hdr_t hdr_rdma; - mca_pml_bfo_fin_hdr_t hdr_fin; -#if PML_BFO - mca_pml_bfo_restart_hdr_t hdr_restart; -#endif /* PML_BFO */ -}; -typedef union mca_pml_bfo_hdr_t mca_pml_bfo_hdr_t; - -#if !defined(WORDS_BIGENDIAN) && OPAL_ENABLE_HETEROGENEOUS_SUPPORT -static inline __opal_attribute_always_inline__ void -bfo_hdr_ntoh(mca_pml_bfo_hdr_t *hdr, const uint8_t hdr_type) -{ - if(!(hdr->hdr_common.hdr_flags & MCA_PML_BFO_HDR_FLAGS_NBO)) - return; - - switch(hdr_type) { - case MCA_PML_BFO_HDR_TYPE_MATCH: - MCA_PML_BFO_MATCH_HDR_NTOH(hdr->hdr_match); - break; - case MCA_PML_BFO_HDR_TYPE_RNDV: - MCA_PML_BFO_RNDV_HDR_NTOH(hdr->hdr_rndv); - break; - case MCA_PML_BFO_HDR_TYPE_RGET: - MCA_PML_BFO_RGET_HDR_NTOH(hdr->hdr_rget); - break; - case MCA_PML_BFO_HDR_TYPE_ACK: - MCA_PML_BFO_ACK_HDR_NTOH(hdr->hdr_ack); - break; - case MCA_PML_BFO_HDR_TYPE_FRAG: - MCA_PML_BFO_FRAG_HDR_NTOH(hdr->hdr_frag); - break; - case MCA_PML_BFO_HDR_TYPE_PUT: - MCA_PML_BFO_RDMA_HDR_NTOH(hdr->hdr_rdma); - break; - case MCA_PML_BFO_HDR_TYPE_FIN: - MCA_PML_BFO_FIN_HDR_NTOH(hdr->hdr_fin); - break; - default: - assert(0); - break; - } -} -#else -#define bfo_hdr_ntoh(h, t) do{}while(0) -#endif - -#if OPAL_ENABLE_HETEROGENEOUS_SUPPORT -#define bfo_hdr_hton(h, t, p) \ - bfo_hdr_hton_intr((mca_pml_bfo_hdr_t*)h, t, p) -static inline __opal_attribute_always_inline__ void -bfo_hdr_hton_intr(mca_pml_bfo_hdr_t *hdr, const uint8_t hdr_type, - const ompi_proc_t *proc) -{ -#ifdef WORDS_BIGENDIAN - hdr->hdr_common.hdr_flags |= MCA_PML_BFO_HDR_FLAGS_NBO; -#else - - if(!(proc->super.proc_arch & OPAL_ARCH_ISBIGENDIAN)) - return; - - hdr->hdr_common.hdr_flags |= MCA_PML_BFO_HDR_FLAGS_NBO; - switch(hdr_type) { - case MCA_PML_BFO_HDR_TYPE_MATCH: - MCA_PML_BFO_MATCH_HDR_HTON(hdr->hdr_match); - break; - case MCA_PML_BFO_HDR_TYPE_RNDV: - MCA_PML_BFO_RNDV_HDR_HTON(hdr->hdr_rndv); - break; - case MCA_PML_BFO_HDR_TYPE_RGET: - MCA_PML_BFO_RGET_HDR_HTON(hdr->hdr_rget); - break; - case MCA_PML_BFO_HDR_TYPE_ACK: - MCA_PML_BFO_ACK_HDR_HTON(hdr->hdr_ack); - break; - case MCA_PML_BFO_HDR_TYPE_FRAG: - MCA_PML_BFO_FRAG_HDR_HTON(hdr->hdr_frag); - break; - case MCA_PML_BFO_HDR_TYPE_PUT: - MCA_PML_BFO_RDMA_HDR_HTON(hdr->hdr_rdma); - break; - case MCA_PML_BFO_HDR_TYPE_FIN: - MCA_PML_BFO_FIN_HDR_HTON(hdr->hdr_fin); - break; - default: - assert(0); - break; - } -#endif -} -#else -#define bfo_hdr_hton(h, t, p) do{}while(0) -#endif -#endif diff --git a/ompi/mca/pml/bfo/pml_bfo_iprobe.c b/ompi/mca/pml/bfo/pml_bfo_iprobe.c deleted file mode 100644 index 1f07c2425b6..00000000000 --- a/ompi/mca/pml/bfo/pml_bfo_iprobe.c +++ /dev/null @@ -1,171 +0,0 @@ -/* - * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2013 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * Copyright (c) 2009-2012 Oracle and/or its affiliates. All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#include "ompi_config.h" -#include "ompi/request/request.h" -#include "ompi/message/message.h" -#include "pml_bfo_recvreq.h" - - -int mca_pml_bfo_iprobe(int src, - int tag, - struct ompi_communicator_t *comm, - int *matched, ompi_status_public_t * status) -{ - int rc = OMPI_SUCCESS; - mca_pml_bfo_recv_request_t recvreq; - - OBJ_CONSTRUCT( &recvreq, mca_pml_bfo_recv_request_t ); - recvreq.req_recv.req_base.req_ompi.req_type = OMPI_REQUEST_PML; - recvreq.req_recv.req_base.req_type = MCA_PML_REQUEST_IPROBE; - - MCA_PML_BFO_RECV_REQUEST_INIT(&recvreq, NULL, 0, &ompi_mpi_char.dt, src, tag, comm, true); - MCA_PML_BFO_RECV_REQUEST_START(&recvreq); - - if( recvreq.req_recv.req_base.req_ompi.req_complete == true ) { - if( NULL != status ) { - *status = recvreq.req_recv.req_base.req_ompi.req_status; - } - rc = recvreq.req_recv.req_base.req_ompi.req_status.MPI_ERROR; - *matched = 1; - } else { - *matched = 0; - opal_progress(); - } - MCA_PML_BASE_RECV_REQUEST_FINI( &recvreq.req_recv ); - return rc; -} - - -int mca_pml_bfo_probe(int src, - int tag, - struct ompi_communicator_t *comm, - ompi_status_public_t * status) -{ - int rc = OMPI_SUCCESS; - mca_pml_bfo_recv_request_t recvreq; - - OBJ_CONSTRUCT( &recvreq, mca_pml_bfo_recv_request_t ); - recvreq.req_recv.req_base.req_ompi.req_type = OMPI_REQUEST_PML; - recvreq.req_recv.req_base.req_type = MCA_PML_REQUEST_PROBE; - - MCA_PML_BFO_RECV_REQUEST_INIT(&recvreq, NULL, 0, &ompi_mpi_char.dt, src, tag, comm, true); - MCA_PML_BFO_RECV_REQUEST_START(&recvreq); - - ompi_request_wait_completion(&recvreq.req_recv.req_base.req_ompi); - rc = recvreq.req_recv.req_base.req_ompi.req_status.MPI_ERROR; - if (NULL != status) { - *status = recvreq.req_recv.req_base.req_ompi.req_status; - } - - MCA_PML_BASE_RECV_REQUEST_FINI( &recvreq.req_recv ); - return rc; -} - - -int -mca_pml_bfo_improbe(int src, - int tag, - struct ompi_communicator_t *comm, - int *matched, - struct ompi_message_t **message, - ompi_status_public_t * status) -{ - int rc = OMPI_SUCCESS; - mca_pml_bfo_recv_request_t *recvreq; - - *message = ompi_message_alloc(); - if (NULL == *message) return OMPI_ERR_TEMP_OUT_OF_RESOURCE; - - MCA_PML_BFO_RECV_REQUEST_ALLOC(recvreq); - if (NULL == recvreq) - return OMPI_ERR_OUT_OF_RESOURCE; - recvreq->req_recv.req_base.req_type = MCA_PML_REQUEST_IMPROBE; - - /* initialize the request enough to probe and get the status */ - MCA_PML_BFO_RECV_REQUEST_INIT(recvreq, NULL, 0, &ompi_mpi_char.dt, - src, tag, comm, false); - MCA_PML_BFO_RECV_REQUEST_START(recvreq); - - if( recvreq->req_recv.req_base.req_ompi.req_complete == true ) { - if( NULL != status ) { - *status = recvreq->req_recv.req_base.req_ompi.req_status; - } - *matched = 1; - - (*message)->comm = comm; - (*message)->req_ptr = recvreq; - (*message)->peer = recvreq->req_recv.req_base.req_ompi.req_status.MPI_SOURCE; - (*message)->count = recvreq->req_recv.req_base.req_ompi.req_status._ucount; - - rc = recvreq->req_recv.req_base.req_ompi.req_status.MPI_ERROR; - } else { - *matched = 0; - - /* we only free if we didn't match, because we're going to - translate the request into a receive request later on if it - was matched */ - MCA_PML_BFO_RECV_REQUEST_RETURN( recvreq ); - ompi_message_return(*message); - *message = MPI_MESSAGE_NULL; - - opal_progress(); - } - - return rc; -} - - -int -mca_pml_bfo_mprobe(int src, - int tag, - struct ompi_communicator_t *comm, - struct ompi_message_t **message, - ompi_status_public_t * status) -{ - int rc = OMPI_SUCCESS; - mca_pml_bfo_recv_request_t *recvreq; - - *message = ompi_message_alloc(); - if (NULL == *message) return OMPI_ERR_TEMP_OUT_OF_RESOURCE; - - MCA_PML_BFO_RECV_REQUEST_ALLOC(recvreq); - if (NULL == recvreq) - return OMPI_ERR_TEMP_OUT_OF_RESOURCE; - recvreq->req_recv.req_base.req_type = MCA_PML_REQUEST_MPROBE; - - /* initialize the request enough to probe and get the status */ - MCA_PML_BFO_RECV_REQUEST_INIT(recvreq, NULL, 0, &ompi_mpi_char.dt, - src, tag, comm, false); - MCA_PML_BFO_RECV_REQUEST_START(recvreq); - - ompi_request_wait_completion(&recvreq->req_recv.req_base.req_ompi); - rc = recvreq->req_recv.req_base.req_ompi.req_status.MPI_ERROR; - - if( NULL != status ) { - *status = recvreq->req_recv.req_base.req_ompi.req_status; - } - - (*message)->comm = comm; - (*message)->req_ptr = recvreq; - (*message)->peer = recvreq->req_recv.req_base.req_ompi.req_status.MPI_SOURCE; - (*message)->count = recvreq->req_recv.req_base.req_ompi.req_status._ucount; - - return rc; -} diff --git a/ompi/mca/pml/bfo/pml_bfo_irecv.c b/ompi/mca/pml/bfo/pml_bfo_irecv.c deleted file mode 100644 index 0278a93a921..00000000000 --- a/ompi/mca/pml/bfo/pml_bfo_irecv.c +++ /dev/null @@ -1,308 +0,0 @@ -/* - * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2013 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * Copyright (c) 2007 Los Alamos National Security, LLC. All rights - * reserved. - * Copyright (c) 2010-2012 Oracle and/or its affiliates. All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#include "ompi_config.h" -#include "ompi/request/request.h" -#include "pml_bfo_recvreq.h" -#include "pml_bfo_recvfrag.h" -#include "ompi/peruse/peruse-internal.h" -#include "ompi/message/message.h" - -int mca_pml_bfo_irecv_init(void *addr, - size_t count, - ompi_datatype_t * datatype, - int src, - int tag, - struct ompi_communicator_t *comm, - struct ompi_request_t **request) -{ - mca_pml_bfo_recv_request_t *recvreq; - MCA_PML_BFO_RECV_REQUEST_ALLOC(recvreq); - if (NULL == recvreq) - return OMPI_ERR_OUT_OF_RESOURCE; - - MCA_PML_BFO_RECV_REQUEST_INIT(recvreq, - addr, - count, datatype, src, tag, comm, true); - - PERUSE_TRACE_COMM_EVENT (PERUSE_COMM_REQ_ACTIVATE, - &((recvreq)->req_recv.req_base), - PERUSE_RECV); - - *request = (ompi_request_t *) recvreq; - return OMPI_SUCCESS; -} - -int mca_pml_bfo_irecv(void *addr, - size_t count, - ompi_datatype_t * datatype, - int src, - int tag, - struct ompi_communicator_t *comm, - struct ompi_request_t **request) -{ - mca_pml_bfo_recv_request_t *recvreq; - MCA_PML_BFO_RECV_REQUEST_ALLOC(recvreq); - if (NULL == recvreq) - return OMPI_ERR_OUT_OF_RESOURCE; - - MCA_PML_BFO_RECV_REQUEST_INIT(recvreq, - addr, - count, datatype, src, tag, comm, false); - - PERUSE_TRACE_COMM_EVENT (PERUSE_COMM_REQ_ACTIVATE, - &((recvreq)->req_recv.req_base), - PERUSE_RECV); - - MCA_PML_BFO_RECV_REQUEST_START(recvreq); - *request = (ompi_request_t *) recvreq; - return OMPI_SUCCESS; -} - - -int mca_pml_bfo_recv(void *addr, - size_t count, - ompi_datatype_t * datatype, - int src, - int tag, - struct ompi_communicator_t *comm, - ompi_status_public_t * status) -{ - int rc; - mca_pml_bfo_recv_request_t *recvreq; - MCA_PML_BFO_RECV_REQUEST_ALLOC(recvreq); - if (NULL == recvreq) - return OMPI_ERR_OUT_OF_RESOURCE; - - MCA_PML_BFO_RECV_REQUEST_INIT(recvreq, - addr, - count, datatype, src, tag, comm, false); - - PERUSE_TRACE_COMM_EVENT (PERUSE_COMM_REQ_ACTIVATE, - &((recvreq)->req_recv.req_base), - PERUSE_RECV); - - MCA_PML_BFO_RECV_REQUEST_START(recvreq); - ompi_request_wait_completion(&recvreq->req_recv.req_base.req_ompi); - - if (NULL != status) { /* return status */ - *status = recvreq->req_recv.req_base.req_ompi.req_status; - } - rc = recvreq->req_recv.req_base.req_ompi.req_status.MPI_ERROR; - ompi_request_free( (ompi_request_t**)&recvreq ); - return rc; -} - - -int -mca_pml_bfo_imrecv( void *buf, - size_t count, - ompi_datatype_t *datatype, - struct ompi_message_t **message, - struct ompi_request_t **request ) -{ - mca_pml_bfo_recv_frag_t* frag; - mca_pml_bfo_recv_request_t *recvreq; - mca_pml_bfo_hdr_t *hdr; - int src, tag; - ompi_communicator_t *comm; - mca_pml_bfo_comm_proc_t* proc; - mca_pml_bfo_comm_t* bfo_comm; - uint64_t seq; - - /* get the request from the message and the frag from the request - before we overwrite everything */ - recvreq = (mca_pml_bfo_recv_request_t*) (*message)->req_ptr; - frag = (mca_pml_bfo_recv_frag_t*) recvreq->req_recv.req_base.req_addr; - src = recvreq->req_recv.req_base.req_ompi.req_status.MPI_SOURCE; - tag = recvreq->req_recv.req_base.req_ompi.req_status.MPI_TAG; - comm = (*message)->comm; - bfo_comm = recvreq->req_recv.req_base.req_comm->c_pml_comm; - seq = recvreq->req_recv.req_base.req_sequence; - - /* make the request a recv request again */ - /* The old request kept pointers to comm and the char datatype. - We're about to release those, but need to make sure comm - doesn't go out of scope (we don't care about the char datatype - anymore). So retain comm, then release the frag, then reinit - the frag (which will retain comm), then release comm (but the - frag still has it's ref, so it'll stay in scope). Make - sense? */ - OBJ_RETAIN(comm); - MCA_PML_BASE_RECV_REQUEST_FINI(&recvreq->req_recv); - recvreq->req_recv.req_base.req_type = MCA_PML_REQUEST_RECV; - MCA_PML_BFO_RECV_REQUEST_INIT(recvreq, - buf, - count, datatype, - src, tag, comm, false); - OBJ_RELEASE(comm); - - PERUSE_TRACE_COMM_EVENT (PERUSE_COMM_REQ_ACTIVATE, - &((recvreq)->req_recv.req_base), - PERUSE_RECV); - - /* init/re-init the request */ - recvreq->req_lock = 0; - recvreq->req_pipeline_depth = 0; - recvreq->req_bytes_received = 0; - /* What about req_rdma_cnt ? */ - recvreq->req_rdma_idx = 0; - recvreq->req_pending = false; - recvreq->req_ack_sent = false; - - MCA_PML_BASE_RECV_START(&recvreq->req_recv.req_base); - - /* Note - sequence number already assigned */ - recvreq->req_recv.req_base.req_sequence = seq; - - proc = &bfo_comm->procs[recvreq->req_recv.req_base.req_peer]; - recvreq->req_recv.req_base.req_proc = proc->ompi_proc; - prepare_recv_req_converter(recvreq); - - /* we can't go through the match, since we already have the match. - Cheat and do what REQUEST_START does, but without the frag - search */ - hdr = (mca_pml_bfo_hdr_t*)frag->segments->seg_addr.pval; - switch(hdr->hdr_common.hdr_type) { - case MCA_PML_BFO_HDR_TYPE_MATCH: - mca_pml_bfo_recv_request_progress_match(recvreq, frag->btl, frag->segments, - frag->num_segments); - break; - case MCA_PML_BFO_HDR_TYPE_RNDV: - mca_pml_bfo_recv_request_progress_rndv(recvreq, frag->btl, frag->segments, - frag->num_segments); - break; - case MCA_PML_BFO_HDR_TYPE_RGET: - mca_pml_bfo_recv_request_progress_rget(recvreq, frag->btl, frag->segments, - frag->num_segments); - break; - default: - assert(0); - } - MCA_PML_BFO_RECV_FRAG_RETURN(frag); - - ompi_message_return(*message); - *message = MPI_MESSAGE_NULL; - *request = (ompi_request_t *) recvreq; - - return OMPI_SUCCESS; -} - - -int -mca_pml_bfo_mrecv( void *buf, - size_t count, - ompi_datatype_t *datatype, - struct ompi_message_t **message, - ompi_status_public_t* status ) -{ - mca_pml_bfo_recv_frag_t* frag; - mca_pml_bfo_recv_request_t *recvreq; - mca_pml_bfo_hdr_t *hdr; - int src, tag, rc; - ompi_communicator_t *comm; - mca_pml_bfo_comm_proc_t* proc; - mca_pml_bfo_comm_t* bfo_comm; - uint64_t seq; - - /* get the request from the message and the frag from the request - before we overwrite everything */ - comm = (*message)->comm; - recvreq = (mca_pml_bfo_recv_request_t*) (*message)->req_ptr; - frag = (mca_pml_bfo_recv_frag_t*) recvreq->req_recv.req_base.req_addr; - src = recvreq->req_recv.req_base.req_ompi.req_status.MPI_SOURCE; - tag = recvreq->req_recv.req_base.req_ompi.req_status.MPI_TAG; - seq = recvreq->req_recv.req_base.req_sequence; - bfo_comm = recvreq->req_recv.req_base.req_comm->c_pml_comm; - - /* make the request a recv request again */ - /* The old request kept pointers to comm and the char datatype. - We're about to release those, but need to make sure comm - doesn't go out of scope (we don't care about the char datatype - anymore). So retain comm, then release the frag, then reinit - the frag (which will retain comm), then release comm (but the - frag still has it's ref, so it'll stay in scope). Make - sense? */ - OBJ_RETAIN(comm); - MCA_PML_BASE_RECV_REQUEST_FINI(&recvreq->req_recv); - recvreq->req_recv.req_base.req_type = MCA_PML_REQUEST_RECV; - MCA_PML_BFO_RECV_REQUEST_INIT(recvreq, - buf, - count, datatype, - src, tag, comm, false); - OBJ_RELEASE(comm); - - PERUSE_TRACE_COMM_EVENT (PERUSE_COMM_REQ_ACTIVATE, - &((recvreq)->req_recv.req_base), - PERUSE_RECV); - - /* init/re-init the request */ - recvreq->req_lock = 0; - recvreq->req_pipeline_depth = 0; - recvreq->req_bytes_received = 0; - recvreq->req_rdma_cnt = 0; - recvreq->req_rdma_idx = 0; - recvreq->req_pending = false; - - MCA_PML_BASE_RECV_START(&recvreq->req_recv.req_base); - - /* Note - sequence number already assigned */ - recvreq->req_recv.req_base.req_sequence = seq; - - proc = &bfo_comm->procs[recvreq->req_recv.req_base.req_peer]; - recvreq->req_recv.req_base.req_proc = proc->ompi_proc; - prepare_recv_req_converter(recvreq); - - /* we can't go through the match, since we already have the match. - Cheat and do what REQUEST_START does, but without the frag - search */ - hdr = (mca_pml_bfo_hdr_t*)frag->segments->seg_addr.pval; - switch(hdr->hdr_common.hdr_type) { - case MCA_PML_BFO_HDR_TYPE_MATCH: - mca_pml_bfo_recv_request_progress_match(recvreq, frag->btl, frag->segments, - frag->num_segments); - break; - case MCA_PML_BFO_HDR_TYPE_RNDV: - mca_pml_bfo_recv_request_progress_rndv(recvreq, frag->btl, frag->segments, - frag->num_segments); - break; - case MCA_PML_BFO_HDR_TYPE_RGET: - mca_pml_bfo_recv_request_progress_rget(recvreq, frag->btl, frag->segments, - frag->num_segments); - break; - default: - assert(0); - } - - ompi_message_return(*message); - *message = MPI_MESSAGE_NULL; - ompi_request_wait_completion(&(recvreq->req_recv.req_base.req_ompi)); - - MCA_PML_BFO_RECV_FRAG_RETURN(frag); - - if (NULL != status) { /* return status */ - *status = recvreq->req_recv.req_base.req_ompi.req_status; - } - rc = recvreq->req_recv.req_base.req_ompi.req_status.MPI_ERROR; - ompi_request_free( (ompi_request_t**)&recvreq ); - return rc; -} - diff --git a/ompi/mca/pml/bfo/pml_bfo_isend.c b/ompi/mca/pml/bfo/pml_bfo_isend.c deleted file mode 100644 index bd14fac91b6..00000000000 --- a/ompi/mca/pml/bfo/pml_bfo_isend.c +++ /dev/null @@ -1,129 +0,0 @@ -/* - * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2013 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * Copyright (c) 2007 Los Alamos National Security, LLC. All rights - * reserved. - * Copyright (c) 2010 Oracle and/or its affiliates. All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#include "ompi_config.h" - -#include "pml_bfo.h" -#include "pml_bfo_sendreq.h" -#include "pml_bfo_recvreq.h" -#include "ompi/peruse/peruse-internal.h" - -int mca_pml_bfo_isend_init(void *buf, - size_t count, - ompi_datatype_t * datatype, - int dst, - int tag, - mca_pml_base_send_mode_t sendmode, - ompi_communicator_t * comm, - ompi_request_t ** request) -{ - mca_pml_bfo_send_request_t *sendreq = NULL; - MCA_PML_BFO_SEND_REQUEST_ALLOC(comm, dst, sendreq); - if (NULL == sendreq) - return OMPI_ERR_OUT_OF_RESOURCE; - - MCA_PML_BFO_SEND_REQUEST_INIT(sendreq, - buf, - count, - datatype, - dst, tag, - comm, sendmode, true); - - PERUSE_TRACE_COMM_EVENT (PERUSE_COMM_REQ_ACTIVATE, - &(sendreq)->req_send.req_base, - PERUSE_SEND); - - *request = (ompi_request_t *) sendreq; - return OMPI_SUCCESS; -} - - -int mca_pml_bfo_isend(void *buf, - size_t count, - ompi_datatype_t * datatype, - int dst, - int tag, - mca_pml_base_send_mode_t sendmode, - ompi_communicator_t * comm, - ompi_request_t ** request) -{ - int rc; - mca_pml_bfo_send_request_t *sendreq = NULL; - - MCA_PML_BFO_SEND_REQUEST_ALLOC(comm, dst, sendreq); - if (NULL == sendreq) - return OMPI_ERR_OUT_OF_RESOURCE; - - MCA_PML_BFO_SEND_REQUEST_INIT(sendreq, - buf, - count, - datatype, - dst, tag, - comm, sendmode, false); - - PERUSE_TRACE_COMM_EVENT (PERUSE_COMM_REQ_ACTIVATE, - &(sendreq)->req_send.req_base, - PERUSE_SEND); - - MCA_PML_BFO_SEND_REQUEST_START(sendreq, rc); - *request = (ompi_request_t *) sendreq; - return rc; -} - - -int mca_pml_bfo_send(void *buf, - size_t count, - ompi_datatype_t * datatype, - int dst, - int tag, - mca_pml_base_send_mode_t sendmode, - ompi_communicator_t * comm) -{ - int rc; - mca_pml_bfo_send_request_t *sendreq; - - MCA_PML_BFO_SEND_REQUEST_ALLOC(comm, dst, sendreq); - if (NULL == sendreq) - return OMPI_ERR_OUT_OF_RESOURCE; - - MCA_PML_BFO_SEND_REQUEST_INIT(sendreq, - buf, - count, - datatype, - dst, tag, - comm, sendmode, false); - - PERUSE_TRACE_COMM_EVENT (PERUSE_COMM_REQ_ACTIVATE, - &(sendreq)->req_send.req_base, - PERUSE_SEND); - - MCA_PML_BFO_SEND_REQUEST_START(sendreq, rc); - if (rc != OMPI_SUCCESS) { - MCA_PML_BFO_SEND_REQUEST_RETURN( sendreq ); - return rc; - } - - ompi_request_wait_completion(&sendreq->req_send.req_base.req_ompi); - - rc = sendreq->req_send.req_base.req_ompi.req_status.MPI_ERROR; - ompi_request_free( (ompi_request_t**)&sendreq ); - return rc; -} diff --git a/ompi/mca/pml/bfo/pml_bfo_progress.c b/ompi/mca/pml/bfo/pml_bfo_progress.c deleted file mode 100644 index 07c92125d02..00000000000 --- a/ompi/mca/pml/bfo/pml_bfo_progress.c +++ /dev/null @@ -1,78 +0,0 @@ -/* - * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2008 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * Copyright (c) 2010 Oracle and/or its affiliates. All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#include "ompi_config.h" - -#include "pml_bfo.h" -#include "pml_bfo_sendreq.h" -#include "ompi/mca/bml/base/base.h" - -int mca_pml_bfo_progress(void) -{ - int i, queue_length = opal_list_get_size(&mca_pml_bfo.send_pending); - int j, completed_requests = 0; - bool send_succedded; - - if( OPAL_LIKELY(0 == queue_length) ) - return 0; - - for( i = 0; i < queue_length; i++ ) { - mca_pml_bfo_send_pending_t pending_type = MCA_PML_BFO_SEND_PENDING_NONE; - mca_pml_bfo_send_request_t* sendreq; - mca_bml_base_endpoint_t* endpoint; - - sendreq = get_request_from_send_pending(&pending_type); - if(OPAL_UNLIKELY(NULL == sendreq)) - break; - - switch(pending_type) { - case MCA_PML_BFO_SEND_PENDING_NONE: - assert(0); - return 0; - case MCA_PML_BFO_SEND_PENDING_SCHEDULE: - if( mca_pml_bfo_send_request_schedule_exclusive(sendreq) == - OMPI_ERR_OUT_OF_RESOURCE ) { - return 0; - } - completed_requests++; - break; - case MCA_PML_BFO_SEND_PENDING_START: - endpoint = sendreq->req_endpoint; - send_succedded = false; - for(j = 0; j < (int)mca_bml_base_btl_array_get_size(&endpoint->btl_eager); j++) { - mca_bml_base_btl_t* bml_btl; - int rc; - - /* select a btl */ - bml_btl = mca_bml_base_btl_array_get_next(&endpoint->btl_eager); - rc = mca_pml_bfo_send_request_start_btl(sendreq, bml_btl); - if( OPAL_LIKELY(OMPI_SUCCESS == rc) ) { - send_succedded = true; - completed_requests++; - break; - } - } - if( false == send_succedded ) { - add_request_to_send_pending(sendreq, MCA_PML_BFO_SEND_PENDING_START, true); - } - } - } - return completed_requests; -} - diff --git a/ompi/mca/pml/bfo/pml_bfo_rdma.c b/ompi/mca/pml/bfo/pml_bfo_rdma.c deleted file mode 100644 index bad66e8fd21..00000000000 --- a/ompi/mca/pml/bfo/pml_bfo_rdma.c +++ /dev/null @@ -1,118 +0,0 @@ -/* - * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2006 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * Copyright (c) 2010 Oracle and/or its affiliates. All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - - -/*%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%*/ - -#include "ompi_config.h" -#include "ompi/constants.h" -#include "ompi/mca/pml/pml.h" -#include "ompi/mca/bml/bml.h" -#include "opal/mca/mpool/mpool.h" -#include "pml_bfo.h" -#include "pml_bfo_rdma.h" - -/* Use this registration if no registration needed for a BTL instead of NULL. - * This will help other code to distinguish case when memory is not registered - * from case when registration is not needed */ -static mca_mpool_base_registration_t pml_bfo_dummy_reg; - -/* - * Check to see if memory is registered or can be registered. Build a - * set of registrations on the request. - */ - -size_t mca_pml_bfo_rdma_btls( - mca_bml_base_endpoint_t* bml_endpoint, - unsigned char* base, - size_t size, - mca_pml_bfo_com_btl_t* rdma_btls) -{ - int num_btls = mca_bml_base_btl_array_get_size(&bml_endpoint->btl_rdma); - double weight_total = 0; - int num_btls_used = 0, n; - - /* shortcut when there are no rdma capable btls */ - if(num_btls == 0) { - return 0; - } - - /* check to see if memory is registered */ - for(n = 0; n < num_btls && num_btls_used < mca_pml_bfo.max_rdma_per_request; - n++) { - mca_bml_base_btl_t* bml_btl = - mca_bml_base_btl_array_get_index(&bml_endpoint->btl_rdma, - (bml_endpoint->btl_rdma_index + n) % num_btls); - mca_mpool_base_registration_t* reg = &pml_bfo_dummy_reg; - mca_mpool_base_module_t *btl_mpool = bml_btl->btl->btl_mpool; - - if( NULL != btl_mpool ) { - if(!mca_pml_bfo.leave_pinned) { - /* look through existing registrations */ - btl_mpool->mpool_find(btl_mpool, base, size, ®); - } else { - /* register the memory */ - btl_mpool->mpool_register(btl_mpool, base, size, 0, ®); - } - - if(NULL == reg) - continue; - } - - rdma_btls[num_btls_used].bml_btl = bml_btl; - rdma_btls[num_btls_used].btl_reg = reg; - weight_total += bml_btl->btl_weight; - num_btls_used++; - } - - /* if we don't use leave_pinned and all BTLs that already have this memory - * registered amount to less then half of available bandwidth - fall back to - * pipeline protocol */ - if(0 == num_btls_used || (!mca_pml_bfo.leave_pinned && weight_total < 0.5)) - return 0; - - mca_pml_bfo_calc_weighted_length(rdma_btls, num_btls_used, size, - weight_total); - - bml_endpoint->btl_rdma_index = (bml_endpoint->btl_rdma_index + 1) % num_btls; - return num_btls_used; -} - -size_t mca_pml_bfo_rdma_pipeline_btls( mca_bml_base_endpoint_t* bml_endpoint, - size_t size, - mca_pml_bfo_com_btl_t* rdma_btls ) -{ - int i, num_btls = mca_bml_base_btl_array_get_size(&bml_endpoint->btl_rdma); - double weight_total = 0; - - for(i = 0; i < num_btls && i < mca_pml_bfo.max_rdma_per_request; i++) { - rdma_btls[i].bml_btl = - mca_bml_base_btl_array_get_next(&bml_endpoint->btl_rdma); - if(NULL != rdma_btls[i].bml_btl->btl->btl_mpool) - rdma_btls[i].btl_reg = NULL; - else - rdma_btls[i].btl_reg = &pml_bfo_dummy_reg; - - weight_total += rdma_btls[i].bml_btl->btl_weight; - } - - mca_pml_bfo_calc_weighted_length(rdma_btls, i, size, weight_total); - - return i; -} diff --git a/ompi/mca/pml/bfo/pml_bfo_rdma.h b/ompi/mca/pml/bfo/pml_bfo_rdma.h deleted file mode 100644 index 8572682d36a..00000000000 --- a/ompi/mca/pml/bfo/pml_bfo_rdma.h +++ /dev/null @@ -1,42 +0,0 @@ -/* - * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2005 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * Copyright (c) 2010 Oracle and/or its affiliates. All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ -/** - * @file - */ - -#ifndef MCA_PML_BFO_RDMA_H -#define MCA_PML_BFO_RDMA_H - -struct mca_bml_base_endpoint_t; - -/* - * Of the set of available btls that support RDMA, - * find those that already have registrations - or - * register if required (for leave_pinned option) - */ -size_t mca_pml_bfo_rdma_btls(struct mca_bml_base_endpoint_t* endpoint, - unsigned char* base, size_t size, struct mca_pml_bfo_com_btl_t* btls); - -/* Choose RDMA BTLs to use for sending of a request by pipeline protocol. - * Calculate number of bytes to send through each BTL according to available - * bandwidth */ -size_t mca_pml_bfo_rdma_pipeline_btls(struct mca_bml_base_endpoint_t* endpoint, - size_t size, mca_pml_bfo_com_btl_t* rdma_btls); -#endif - diff --git a/ompi/mca/pml/bfo/pml_bfo_rdmafrag.c b/ompi/mca/pml/bfo/pml_bfo_rdmafrag.c deleted file mode 100644 index b99e30a8de5..00000000000 --- a/ompi/mca/pml/bfo/pml_bfo_rdmafrag.c +++ /dev/null @@ -1,30 +0,0 @@ -/* - * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2005 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * Copyright (c) 2010 Oracle and/or its affiliates. All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#include "ompi_config.h" - -#include "pml_bfo.h" -#include "pml_bfo_rdmafrag.h" - - -OBJ_CLASS_INSTANCE( - mca_pml_bfo_rdma_frag_t, - ompi_free_list_item_t, - NULL, - NULL); diff --git a/ompi/mca/pml/bfo/pml_bfo_rdmafrag.h b/ompi/mca/pml/bfo/pml_bfo_rdmafrag.h deleted file mode 100644 index 832597a5a23..00000000000 --- a/ompi/mca/pml/bfo/pml_bfo_rdmafrag.h +++ /dev/null @@ -1,74 +0,0 @@ -/* - * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2013 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * Copyright (c) 2010 Oracle and/or its affiliates. All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ -/** - * @file - */ - -#ifndef MCA_PML_BFO_RDMAFRAG_H -#define MCA_PML_BFO_RDMAFRAG_H - -#include "pml_bfo_hdr.h" - -BEGIN_C_DECLS - -typedef enum { - MCA_PML_BFO_RDMA_PUT, - MCA_PML_BFO_RDMA_GET -} mca_pml_bfo_rdma_state_t; - -struct mca_pml_bfo_rdma_frag_t { - ompi_free_list_item_t super; - mca_bml_base_btl_t* rdma_bml; -#if PML_BFO - mca_btl_base_module_t* rdma_btl; -#endif /* PML_BFO */ - mca_pml_bfo_hdr_t rdma_hdr; - mca_pml_bfo_rdma_state_t rdma_state; - size_t rdma_length; - uint8_t rdma_segs[MCA_BTL_SEG_MAX_SIZE * MCA_BTL_DES_MAX_SEGMENTS]; - void *rdma_req; - struct mca_bml_base_endpoint_t* rdma_ep; - opal_convertor_t convertor; - mca_mpool_base_registration_t* reg; - uint32_t retries; -}; -typedef struct mca_pml_bfo_rdma_frag_t mca_pml_bfo_rdma_frag_t; - -OBJ_CLASS_DECLARATION(mca_pml_bfo_rdma_frag_t); - - -#define MCA_PML_BFO_RDMA_FRAG_ALLOC(frag) \ -do { \ - ompi_free_list_item_t* item; \ - OMPI_FREE_LIST_WAIT_MT(&mca_pml_bfo.rdma_frags, item); \ - frag = (mca_pml_bfo_rdma_frag_t*)item; \ -} while(0) - -#define MCA_PML_BFO_RDMA_FRAG_RETURN(frag) \ -do { \ - /* return fragment */ \ - OMPI_FREE_LIST_RETURN_MT(&mca_pml_bfo.rdma_frags, \ - (ompi_free_list_item_t*)frag); \ -} while(0) - - -END_C_DECLS - -#endif - diff --git a/ompi/mca/pml/bfo/pml_bfo_recvfrag.c b/ompi/mca/pml/bfo/pml_bfo_recvfrag.c deleted file mode 100644 index 6ef8f4f58d1..00000000000 --- a/ompi/mca/pml/bfo/pml_bfo_recvfrag.c +++ /dev/null @@ -1,743 +0,0 @@ -/* - * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2013 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2007 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * Copyright (c) 2008 UT-Battelle, LLC. All rights reserved. - * Copyright (c) 2006-2008 University of Houston. All rights reserved. - * Copyright (c) 2009-2012 Oracle and/or its affiliates. All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -/** - * @file - */ - -#include "ompi_config.h" - -#include "opal/class/opal_list.h" -#include "opal/threads/mutex.h" -#include "opal/prefetch.h" - -#include "ompi/constants.h" -#include "ompi/communicator/communicator.h" -#include "ompi/mca/pml/pml.h" -#include "ompi/peruse/peruse-internal.h" -#include "ompi/memchecker.h" - -#include "pml_bfo.h" -#include "pml_bfo_comm.h" -#include "pml_bfo_recvfrag.h" -#include "pml_bfo_recvreq.h" -#include "pml_bfo_sendreq.h" -#include "pml_bfo_hdr.h" -#if PML_BFO -#include "pml_bfo_failover.h" -#endif /* PML_BFO */ - -OBJ_CLASS_INSTANCE( mca_pml_bfo_buffer_t, - ompi_free_list_item_t, - NULL, - NULL ); - -OBJ_CLASS_INSTANCE( mca_pml_bfo_recv_frag_t, - opal_list_item_t, - NULL, - NULL ); - -/** - * Static functions. - */ - -/** - * Append a unexpected descriptor to a queue. This function will allocate and - * initialize the fragment (if necessary) and then will add it to the specified - * queue. The allocated fragment is not returned to the caller. - */ -static void -append_frag_to_list(opal_list_t *queue, mca_btl_base_module_t *btl, - mca_pml_bfo_match_hdr_t *hdr, mca_btl_base_segment_t* segments, - size_t num_segments, mca_pml_bfo_recv_frag_t* frag) -{ - if(NULL == frag) { - MCA_PML_BFO_RECV_FRAG_ALLOC(frag); - MCA_PML_BFO_RECV_FRAG_INIT(frag, hdr, segments, num_segments, btl); - } - opal_list_append(queue, (opal_list_item_t*)frag); -} - -/** - * Match incoming recv_frags against posted receives. - * Supports out of order delivery. - * - * @param frag_header (IN) Header of received recv_frag. - * @param frag_desc (IN) Received recv_frag descriptor. - * @param match_made (OUT) Flag indicating wether a match was made. - * @param additional_matches (OUT) List of additional matches - * @return OMPI_SUCCESS or error status on failure. - */ -static int mca_pml_bfo_recv_frag_match( mca_btl_base_module_t *btl, - mca_pml_bfo_match_hdr_t *hdr, - mca_btl_base_segment_t* segments, - size_t num_segments, - int type); - -static mca_pml_bfo_recv_request_t* -match_one(mca_btl_base_module_t *btl, - mca_pml_bfo_match_hdr_t *hdr, mca_btl_base_segment_t* segments, - size_t num_segments, ompi_communicator_t *comm_ptr, - mca_pml_bfo_comm_proc_t *proc, - mca_pml_bfo_recv_frag_t* frag); - -void mca_pml_bfo_recv_frag_callback_match(mca_btl_base_module_t* btl, - mca_btl_base_tag_t tag, - mca_btl_base_descriptor_t* des, - void* cbdata ) -{ - mca_btl_base_segment_t* segments = des->des_local; - mca_pml_bfo_match_hdr_t* hdr = (mca_pml_bfo_match_hdr_t*)segments->seg_addr.pval; - ompi_communicator_t *comm_ptr; - mca_pml_bfo_recv_request_t *match = NULL; - mca_pml_bfo_comm_t *comm; - mca_pml_bfo_comm_proc_t *proc; - size_t num_segments = des->des_local_count; - size_t bytes_received = 0; - - assert(num_segments <= MCA_BTL_DES_MAX_SEGMENTS); - - if( OPAL_UNLIKELY(segments->seg_len < OMPI_PML_BFO_MATCH_HDR_LEN) ) { - return; - } - bfo_hdr_ntoh(((mca_pml_bfo_hdr_t*) hdr), MCA_PML_BFO_HDR_TYPE_MATCH); - - /* communicator pointer */ - comm_ptr = ompi_comm_lookup(hdr->hdr_ctx); - if(OPAL_UNLIKELY(NULL == comm_ptr)) { - /* This is a special case. A message for a not yet existing - * communicator can happens. Instead of doing a matching we - * will temporarily add it the a pending queue in the PML. - * Later on, when the communicator is completely instantiated, - * this pending queue will be searched and all matching fragments - * moved to the right communicator. - */ - append_frag_to_list( &mca_pml_bfo.non_existing_communicator_pending, - btl, hdr, segments, num_segments, NULL ); - return; - } - comm = (mca_pml_bfo_comm_t *)comm_ptr->c_pml_comm; - - /* source sequence number */ - proc = &comm->procs[hdr->hdr_src]; - - /* We generate the MSG_ARRIVED event as soon as the PML is aware - * of a matching fragment arrival. Independing if it is received - * on the correct order or not. This will allow the tools to - * figure out if the messages are not received in the correct - * order (if multiple network interfaces). - */ - PERUSE_TRACE_MSG_EVENT(PERUSE_COMM_MSG_ARRIVED, comm_ptr, - hdr->hdr_src, hdr->hdr_tag, PERUSE_RECV); - - /* get next expected message sequence number - if threaded - * run, lock to make sure that if another thread is processing - * a frag from the same message a match is made only once. - * Also, this prevents other posted receives (for a pair of - * end points) from being processed, and potentially "loosing" - * the fragment. - */ - OPAL_THREAD_LOCK(&comm->matching_lock); - - /* get sequence number of next message that can be processed */ - if(OPAL_UNLIKELY((((uint16_t) hdr->hdr_seq) != ((uint16_t) proc->expected_sequence)) || - (opal_list_get_size(&proc->frags_cant_match) > 0 ))) { - goto slow_path; - } - - /* This is the sequence number we were expecting, so we can try - * matching it to already posted receives. - */ - - /* We're now expecting the next sequence number. */ - proc->expected_sequence++; - - /* We generate the SEARCH_POSTED_QUEUE only when the message is - * received in the correct sequence. Otherwise, we delay the event - * generation until we reach the correct sequence number. - */ - PERUSE_TRACE_MSG_EVENT(PERUSE_COMM_SEARCH_POSTED_Q_BEGIN, comm_ptr, - hdr->hdr_src, hdr->hdr_tag, PERUSE_RECV); - - match = match_one(btl, hdr, segments, num_segments, comm_ptr, proc, NULL); - - /* The match is over. We generate the SEARCH_POSTED_Q_END here, - * before going into the mca_pml_bfo_check_cantmatch_for_match so - * we can make a difference for the searching time for all - * messages. - */ - PERUSE_TRACE_MSG_EVENT(PERUSE_COMM_SEARCH_POSTED_Q_END, comm_ptr, - hdr->hdr_src, hdr->hdr_tag, PERUSE_RECV); - - /* release matching lock before processing fragment */ - OPAL_THREAD_UNLOCK(&comm->matching_lock); - - if(OPAL_LIKELY(match)) { - bytes_received = segments->seg_len - OMPI_PML_BFO_MATCH_HDR_LEN; - match->req_recv.req_bytes_packed = bytes_received; - - MCA_PML_BFO_RECV_REQUEST_MATCHED(match, hdr); - if(match->req_bytes_expected > 0) { - struct iovec iov[MCA_BTL_DES_MAX_SEGMENTS]; - uint32_t iov_count = 1; - - /* - * Make user buffer accessable(defined) before unpacking. - */ - MEMCHECKER( - memchecker_call(&opal_memchecker_base_mem_defined, - match->req_recv.req_base.req_addr, - match->req_recv.req_base.req_count, - match->req_recv.req_base.req_datatype); - ); - - iov[0].iov_len = bytes_received; - iov[0].iov_base = (IOVBASE_TYPE*)((unsigned char*)segments->seg_addr.pval + - OMPI_PML_BFO_MATCH_HDR_LEN); - while (iov_count < num_segments) { - bytes_received += segments[iov_count].seg_len; - iov[iov_count].iov_len = segments[iov_count].seg_len; - iov[iov_count].iov_base = (IOVBASE_TYPE*)((unsigned char*)segments[iov_count].seg_addr.pval); - iov_count++; - } - opal_convertor_unpack( &match->req_recv.req_base.req_convertor, - iov, - &iov_count, - &bytes_received ); - match->req_bytes_received = bytes_received; - /* - * Unpacking finished, make the user buffer unaccessable again. - */ - MEMCHECKER( - memchecker_call(&opal_memchecker_base_mem_noaccess, - match->req_recv.req_base.req_addr, - match->req_recv.req_base.req_count, - match->req_recv.req_base.req_datatype); - ); - } - - /* no need to check if complete we know we are.. */ - /* don't need a rmb as that is for checking */ - recv_request_pml_complete(match); - } - return; - - slow_path: - OPAL_THREAD_UNLOCK(&comm->matching_lock); -#if PML_BFO - if (true == mca_pml_bfo_is_duplicate_msg(proc, hdr)) { - return; - } -#endif /* PML_BFO */ - mca_pml_bfo_recv_frag_match(btl, hdr, segments, - num_segments, MCA_PML_BFO_HDR_TYPE_MATCH); -} - - -void mca_pml_bfo_recv_frag_callback_rndv(mca_btl_base_module_t* btl, - mca_btl_base_tag_t tag, - mca_btl_base_descriptor_t* des, - void* cbdata ) -{ - mca_btl_base_segment_t* segments = des->des_local; - mca_pml_bfo_hdr_t* hdr = (mca_pml_bfo_hdr_t*)segments->seg_addr.pval; - - if( OPAL_UNLIKELY(segments->seg_len < sizeof(mca_pml_bfo_common_hdr_t)) ) { - return; - } - bfo_hdr_ntoh(hdr, MCA_PML_BFO_HDR_TYPE_RNDV); - mca_pml_bfo_recv_frag_match(btl, &hdr->hdr_match, segments, - des->des_local_count, MCA_PML_BFO_HDR_TYPE_RNDV); - return; -} - -void mca_pml_bfo_recv_frag_callback_rget(mca_btl_base_module_t* btl, - mca_btl_base_tag_t tag, - mca_btl_base_descriptor_t* des, - void* cbdata ) -{ - mca_btl_base_segment_t* segments = des->des_local; - mca_pml_bfo_hdr_t* hdr = (mca_pml_bfo_hdr_t*)segments->seg_addr.pval; - - if( OPAL_UNLIKELY(segments->seg_len < sizeof(mca_pml_bfo_common_hdr_t)) ) { - return; - } - bfo_hdr_ntoh(hdr, MCA_PML_BFO_HDR_TYPE_RGET); - mca_pml_bfo_recv_frag_match(btl, &hdr->hdr_match, segments, - des->des_local_count, MCA_PML_BFO_HDR_TYPE_RGET); - return; -} - - - -void mca_pml_bfo_recv_frag_callback_ack(mca_btl_base_module_t* btl, - mca_btl_base_tag_t tag, - mca_btl_base_descriptor_t* des, - void* cbdata ) -{ - mca_btl_base_segment_t* segments = des->des_local; - mca_pml_bfo_hdr_t* hdr = (mca_pml_bfo_hdr_t*)segments->seg_addr.pval; - mca_pml_bfo_send_request_t* sendreq; - - if( OPAL_UNLIKELY(segments->seg_len < sizeof(mca_pml_bfo_common_hdr_t)) ) { - return; - } - - bfo_hdr_ntoh(hdr, MCA_PML_BFO_HDR_TYPE_ACK); - sendreq = (mca_pml_bfo_send_request_t*)hdr->hdr_ack.hdr_src_req.pval; - sendreq->req_recv = hdr->hdr_ack.hdr_dst_req; -#if PML_BFO - MCA_PML_BFO_ERROR_CHECK_ON_ACK_CALLBACK(sendreq); -#endif /* PML_BFO */ - - /* if the request should be delivered entirely by copy in/out - * then throttle sends */ - if(hdr->hdr_common.hdr_flags & MCA_PML_BFO_HDR_FLAGS_NORDMA) - sendreq->req_throttle_sends = true; - - mca_pml_bfo_send_request_copy_in_out(sendreq, - hdr->hdr_ack.hdr_send_offset, - sendreq->req_send.req_bytes_packed - - hdr->hdr_ack.hdr_send_offset); - - if (sendreq->req_state != 0) { - /* Typical receipt of an ACK message causes req_state to be - * decremented. However, a send request that started as an - * RGET request can become a RNDV. For example, when the - * receiver determines that its receive buffer is not - * contiguous and therefore cannot support the RGET - * protocol. A send request that started with the RGET - * protocol has req_state == 0 and as such should not be - * decremented. - */ - OPAL_THREAD_ADD32(&sendreq->req_state, -1); - } - - if(send_request_pml_complete_check(sendreq) == false) - mca_pml_bfo_send_request_schedule(sendreq); - - return; -} - -void mca_pml_bfo_recv_frag_callback_frag(mca_btl_base_module_t* btl, - mca_btl_base_tag_t tag, - mca_btl_base_descriptor_t* des, - void* cbdata ) { - mca_btl_base_segment_t* segments = des->des_local; - mca_pml_bfo_hdr_t* hdr = (mca_pml_bfo_hdr_t*)segments->seg_addr.pval; - mca_pml_bfo_recv_request_t* recvreq; - - if( OPAL_UNLIKELY(segments->seg_len < sizeof(mca_pml_bfo_common_hdr_t)) ) { - return; - } - bfo_hdr_ntoh(hdr, MCA_PML_BFO_HDR_TYPE_FRAG); - recvreq = (mca_pml_bfo_recv_request_t*)hdr->hdr_frag.hdr_dst_req.pval; -#if PML_BFO - MCA_PML_BFO_ERROR_CHECK_ON_FRAG_CALLBACK(recvreq); -#endif /* PML_BFO */ - mca_pml_bfo_recv_request_progress_frag(recvreq,btl,segments,des->des_local_count); - - return; -} - - -void mca_pml_bfo_recv_frag_callback_put(mca_btl_base_module_t* btl, - mca_btl_base_tag_t tag, - mca_btl_base_descriptor_t* des, - void* cbdata ) { - mca_btl_base_segment_t* segments = des->des_local; - mca_pml_bfo_hdr_t* hdr = (mca_pml_bfo_hdr_t*)segments->seg_addr.pval; - mca_pml_bfo_send_request_t* sendreq; - - if( OPAL_UNLIKELY(segments->seg_len < sizeof(mca_pml_bfo_common_hdr_t)) ) { - return; - } - - bfo_hdr_ntoh(hdr, MCA_PML_BFO_HDR_TYPE_PUT); - sendreq = (mca_pml_bfo_send_request_t*)hdr->hdr_rdma.hdr_req.pval; -#if PML_BFO - MCA_PML_BFO_ERROR_CHECK_ON_PUT_CALLBACK(sendreq); -#endif /* PML_BFO */ - mca_pml_bfo_send_request_put(sendreq,btl,&hdr->hdr_rdma); - - return; -} - - -void mca_pml_bfo_recv_frag_callback_fin(mca_btl_base_module_t* btl, - mca_btl_base_tag_t tag, - mca_btl_base_descriptor_t* des, - void* cbdata ) { - mca_btl_base_segment_t* segments = des->des_local; - mca_pml_bfo_hdr_t* hdr = (mca_pml_bfo_hdr_t*)segments->seg_addr.pval; - mca_btl_base_descriptor_t* rdma; - - if( OPAL_UNLIKELY(segments->seg_len < sizeof(mca_pml_bfo_common_hdr_t)) ) { - return; - } - - bfo_hdr_ntoh(hdr, MCA_PML_BFO_HDR_TYPE_FIN); - rdma = (mca_btl_base_descriptor_t*)hdr->hdr_fin.hdr_des.pval; -#if PML_BFO - if (true == mca_pml_bfo_is_duplicate_fin(hdr, rdma, btl)) { - return; - } -#endif /* PML_BFO */ - rdma->des_cbfunc(btl, NULL, rdma, - hdr->hdr_fin.hdr_fail ? OMPI_ERROR : OMPI_SUCCESS); - - return; -} - - - -#define PML_MAX_SEQ ~((mca_pml_sequence_t)0); - -static inline mca_pml_bfo_recv_request_t* get_posted_recv(opal_list_t *queue) -{ - if(opal_list_get_size(queue) == 0) - return NULL; - - return (mca_pml_bfo_recv_request_t*)opal_list_get_first(queue); -} - -static inline mca_pml_bfo_recv_request_t* get_next_posted_recv( - opal_list_t *queue, - mca_pml_bfo_recv_request_t* req) -{ - opal_list_item_t *i = opal_list_get_next((opal_list_item_t*)req); - - if(opal_list_get_end(queue) == i) - return NULL; - - return (mca_pml_bfo_recv_request_t*)i; -} - -static mca_pml_bfo_recv_request_t *match_incomming( - mca_pml_bfo_match_hdr_t *hdr, mca_pml_bfo_comm_t *comm, - mca_pml_bfo_comm_proc_t *proc) -{ - mca_pml_bfo_recv_request_t *specific_recv, *wild_recv; - mca_pml_sequence_t wild_recv_seq, specific_recv_seq; - int tag = hdr->hdr_tag; - - specific_recv = get_posted_recv(&proc->specific_receives); - wild_recv = get_posted_recv(&comm->wild_receives); - - wild_recv_seq = wild_recv ? - wild_recv->req_recv.req_base.req_sequence : PML_MAX_SEQ; - specific_recv_seq = specific_recv ? - specific_recv->req_recv.req_base.req_sequence : PML_MAX_SEQ; - - /* they are equal only if both are PML_MAX_SEQ */ - while(wild_recv_seq != specific_recv_seq) { - mca_pml_bfo_recv_request_t **match; - opal_list_t *queue; - int req_tag; - mca_pml_sequence_t *seq; - - if (OPAL_UNLIKELY(wild_recv_seq < specific_recv_seq)) { - match = &wild_recv; - queue = &comm->wild_receives; - seq = &wild_recv_seq; - } else { - match = &specific_recv; - queue = &proc->specific_receives; - seq = &specific_recv_seq; - } - - req_tag = (*match)->req_recv.req_base.req_tag; - if(req_tag == tag || (req_tag == OMPI_ANY_TAG && tag >= 0)) { - opal_list_remove_item(queue, (opal_list_item_t*)(*match)); - PERUSE_TRACE_COMM_EVENT(PERUSE_COMM_REQ_REMOVE_FROM_POSTED_Q, - &((*match)->req_recv.req_base), PERUSE_RECV); - return *match; - } - - *match = get_next_posted_recv(queue, *match); - *seq = (*match) ? (*match)->req_recv.req_base.req_sequence : PML_MAX_SEQ; - } - - return NULL; -} - -static mca_pml_bfo_recv_request_t* -match_one(mca_btl_base_module_t *btl, - mca_pml_bfo_match_hdr_t *hdr, mca_btl_base_segment_t* segments, - size_t num_segments, ompi_communicator_t *comm_ptr, - mca_pml_bfo_comm_proc_t *proc, - mca_pml_bfo_recv_frag_t* frag) -{ - mca_pml_bfo_recv_request_t *match; - mca_pml_bfo_comm_t *comm = (mca_pml_bfo_comm_t *)comm_ptr->c_pml_comm; - - do { - match = match_incomming(hdr, comm, proc); - - /* if match found, process data */ - if(OPAL_LIKELY(NULL != match)) { - match->req_recv.req_base.req_proc = proc->ompi_proc; - - if(OPAL_UNLIKELY(MCA_PML_REQUEST_PROBE == match->req_recv.req_base.req_type)) { - /* complete the probe */ - mca_pml_bfo_recv_request_matched_probe(match, btl, segments, - num_segments); - /* attempt to match actual request */ - continue; - } else if (MCA_PML_REQUEST_MPROBE == match->req_recv.req_base.req_type) { - /* create a receive frag and associate it with the - request, which is then completed so that it can be - restarted later during mrecv */ - mca_pml_bfo_recv_frag_t *tmp; - if(NULL == frag) { - MCA_PML_BFO_RECV_FRAG_ALLOC(tmp); - MCA_PML_BFO_RECV_FRAG_INIT(tmp, hdr, segments, num_segments, btl); - } else { - tmp = frag; - } - - match->req_recv.req_base.req_addr = tmp; - mca_pml_bfo_recv_request_matched_probe(match, btl, segments, - num_segments); - /* this frag is already processed, so we want to break out - of the loop and not end up back on the unexpected queue. */ - return NULL; - } - - PERUSE_TRACE_COMM_EVENT(PERUSE_COMM_MSG_MATCH_POSTED_REQ, - &(match->req_recv.req_base), PERUSE_RECV); - return match; - } - - /* if no match found, place on unexpected queue */ - append_frag_to_list(&proc->unexpected_frags, btl, hdr, segments, - num_segments, frag); - PERUSE_TRACE_MSG_EVENT(PERUSE_COMM_MSG_INSERT_IN_UNEX_Q, comm_ptr, - hdr->hdr_src, hdr->hdr_tag, PERUSE_RECV); - return NULL; - } while(true); -} - -static mca_pml_bfo_recv_frag_t* check_cantmatch_for_match(mca_pml_bfo_comm_proc_t *proc) -{ - mca_pml_bfo_recv_frag_t *frag; - - /* search the list for a fragment from the send with sequence - * number next_msg_seq_expected - */ - for(frag = (mca_pml_bfo_recv_frag_t*)opal_list_get_first(&proc->frags_cant_match); - frag != (mca_pml_bfo_recv_frag_t*)opal_list_get_end(&proc->frags_cant_match); - frag = (mca_pml_bfo_recv_frag_t*)opal_list_get_next(frag)) - { - mca_pml_bfo_match_hdr_t* hdr = &frag->hdr.hdr_match; - /* - * If the message has the next expected seq from that proc... - */ - if(hdr->hdr_seq != proc->expected_sequence) - continue; - - opal_list_remove_item(&proc->frags_cant_match, (opal_list_item_t*)frag); - return frag; - } - - return NULL; -} - -/** - * RCS/CTS receive side matching - * - * @param hdr list of parameters needed for matching - * This list is also embeded in frag, - * but this allows to save a memory copy when - * a match is made in this routine. (IN) - * @param frag pointer to receive fragment which we want - * to match (IN/OUT). If a match is not made, - * hdr is copied to frag. - * @param match_made parameter indicating if we matched frag/ - * hdr (OUT) - * @param additional_matches if a match is made with frag, we - * may be able to match fragments that previously - * have arrived out-of-order. If this is the - * case, the associated fragment descriptors are - * put on this list for further processing. (OUT) - * - * @return OMPI error code - * - * This routine is used to try and match a newly arrived message fragment - * to pre-posted receives. The following assumptions are made - * - fragments are received out of order - * - for long messages, e.g. more than one fragment, a RTS/CTS algorithm - * is used. - * - 2nd and greater fragments include a receive descriptor pointer - * - fragments may be dropped - * - fragments may be corrupt - * - this routine may be called simultaneously by more than one thread - */ -static int mca_pml_bfo_recv_frag_match( mca_btl_base_module_t *btl, - mca_pml_bfo_match_hdr_t *hdr, - mca_btl_base_segment_t* segments, - size_t num_segments, - int type) -{ - /* local variables */ - uint16_t next_msg_seq_expected, frag_msg_seq; - ompi_communicator_t *comm_ptr; - mca_pml_bfo_recv_request_t *match = NULL; - mca_pml_bfo_comm_t *comm; - mca_pml_bfo_comm_proc_t *proc; - mca_pml_bfo_recv_frag_t* frag = NULL; - - /* communicator pointer */ - comm_ptr = ompi_comm_lookup(hdr->hdr_ctx); - if(OPAL_UNLIKELY(NULL == comm_ptr)) { - /* This is a special case. A message for a not yet existing - * communicator can happens. Instead of doing a matching we - * will temporarily add it the a pending queue in the PML. - * Later on, when the communicator is completely instantiated, - * this pending queue will be searched and all matching fragments - * moved to the right communicator. - */ - append_frag_to_list( &mca_pml_bfo.non_existing_communicator_pending, - btl, hdr, segments, num_segments, NULL ); - return OMPI_SUCCESS; - } - comm = (mca_pml_bfo_comm_t *)comm_ptr->c_pml_comm; - - /* source sequence number */ - frag_msg_seq = hdr->hdr_seq; - proc = &comm->procs[hdr->hdr_src]; - - /** - * We generate the MSG_ARRIVED event as soon as the PML is aware of a matching - * fragment arrival. Independing if it is received on the correct order or not. - * This will allow the tools to figure out if the messages are not received in the - * correct order (if multiple network interfaces). - */ - PERUSE_TRACE_MSG_EVENT(PERUSE_COMM_MSG_ARRIVED, comm_ptr, - hdr->hdr_src, hdr->hdr_tag, PERUSE_RECV); - - /* get next expected message sequence number - if threaded - * run, lock to make sure that if another thread is processing - * a frag from the same message a match is made only once. - * Also, this prevents other posted receives (for a pair of - * end points) from being processed, and potentially "loosing" - * the fragment. - */ - OPAL_THREAD_LOCK(&comm->matching_lock); - -#if PML_BFO - if(OPAL_UNLIKELY(hdr->hdr_common.hdr_flags & MCA_PML_BFO_HDR_FLAGS_RESTART)) { - if (NULL == (match = mca_pml_bfo_get_request(hdr))) { - return OMPI_SUCCESS; - } - } else { -#endif /* PML_BFO */ - /* get sequence number of next message that can be processed */ - next_msg_seq_expected = (uint16_t)proc->expected_sequence; - if(OPAL_UNLIKELY(frag_msg_seq != next_msg_seq_expected)) - goto wrong_seq; - - /* - * This is the sequence number we were expecting, - * so we can try matching it to already posted - * receives. - */ - -out_of_order_match: - /* We're now expecting the next sequence number. */ - proc->expected_sequence++; - - /** - * We generate the SEARCH_POSTED_QUEUE only when the message is received - * in the correct sequence. Otherwise, we delay the event generation until - * we reach the correct sequence number. - */ - PERUSE_TRACE_MSG_EVENT(PERUSE_COMM_SEARCH_POSTED_Q_BEGIN, comm_ptr, - hdr->hdr_src, hdr->hdr_tag, PERUSE_RECV); - - match = match_one(btl, hdr, segments, num_segments, comm_ptr, proc, frag); - - /** - * The match is over. We generate the SEARCH_POSTED_Q_END here, before going - * into the mca_pml_bfo_check_cantmatch_for_match so we can make a difference - * for the searching time for all messages. - */ - PERUSE_TRACE_MSG_EVENT(PERUSE_COMM_SEARCH_POSTED_Q_END, comm_ptr, - hdr->hdr_src, hdr->hdr_tag, PERUSE_RECV); - - /* release matching lock before processing fragment */ - OPAL_THREAD_UNLOCK(&comm->matching_lock); - -#if PML_BFO - } -#endif /* PML_BFO */ - if(OPAL_LIKELY(match)) { - switch(type) { - case MCA_PML_BFO_HDR_TYPE_MATCH: - mca_pml_bfo_recv_request_progress_match(match, btl, segments, num_segments); - break; - case MCA_PML_BFO_HDR_TYPE_RNDV: - mca_pml_bfo_recv_request_progress_rndv(match, btl, segments, num_segments); - break; - case MCA_PML_BFO_HDR_TYPE_RGET: - mca_pml_bfo_recv_request_progress_rget(match, btl, segments, num_segments); - break; - } - - if(OPAL_UNLIKELY(frag)) - MCA_PML_BFO_RECV_FRAG_RETURN(frag); - } - - /* - * Now that new message has arrived, check to see if - * any fragments on the c_c_frags_cant_match list - * may now be used to form new matchs - */ - if(OPAL_UNLIKELY(opal_list_get_size(&proc->frags_cant_match) > 0)) { - OPAL_THREAD_LOCK(&comm->matching_lock); - if((frag = check_cantmatch_for_match(proc))) { - hdr = &frag->hdr.hdr_match; - segments = frag->segments; - num_segments = frag->num_segments; - btl = frag->btl; - type = hdr->hdr_common.hdr_type; - goto out_of_order_match; - } - OPAL_THREAD_UNLOCK(&comm->matching_lock); - } - - return OMPI_SUCCESS; -wrong_seq: - /* - * This message comes after the next expected, so it - * is ahead of sequence. Save it for later. - */ -#if PML_BFO - if (true == mca_pml_bfo_is_duplicate_msg(proc, hdr)) { - return OMPI_SUCCESS; - } -#endif /* PML_BFO */ - append_frag_to_list(&proc->frags_cant_match, btl, hdr, segments, - num_segments, NULL); - OPAL_THREAD_UNLOCK(&comm->matching_lock); - return OMPI_SUCCESS; -} - diff --git a/ompi/mca/pml/bfo/pml_bfo_recvfrag.h b/ompi/mca/pml/bfo/pml_bfo_recvfrag.h deleted file mode 100644 index 3c975cb810b..00000000000 --- a/ompi/mca/pml/bfo/pml_bfo_recvfrag.h +++ /dev/null @@ -1,172 +0,0 @@ -/* - * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2013 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * Copyright (c) 2008 UT-Battelle, LLC. All rights reserved. - * Copyright (c) 2010 Oracle and/or its affiliates. All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ -/** - * @file - */ - -#ifndef MCA_PML_BFO_RECVFRAG_H -#define MCA_PML_BFO_RECVFRAG_H - -#include "pml_bfo_hdr.h" - -BEGIN_C_DECLS - -struct mca_pml_bfo_buffer_t { - size_t len; - void * addr; -}; -typedef struct mca_pml_bfo_buffer_t mca_pml_bfo_buffer_t; - - -struct mca_pml_bfo_recv_frag_t { - ompi_free_list_item_t super; - mca_pml_bfo_hdr_t hdr; - size_t num_segments; - mca_btl_base_module_t* btl; - mca_btl_base_segment_t segments[MCA_BTL_DES_MAX_SEGMENTS]; - mca_pml_bfo_buffer_t buffers[MCA_BTL_DES_MAX_SEGMENTS]; - unsigned char addr[1]; -}; -typedef struct mca_pml_bfo_recv_frag_t mca_pml_bfo_recv_frag_t; - -OBJ_CLASS_DECLARATION(mca_pml_bfo_recv_frag_t); - - -#define MCA_PML_BFO_RECV_FRAG_ALLOC(frag) \ -do { \ - ompi_free_list_item_t* item; \ - OMPI_FREE_LIST_WAIT_MT(&mca_pml_bfo.recv_frags, item); \ - frag = (mca_pml_bfo_recv_frag_t*)item; \ -} while(0) - - -#define MCA_PML_BFO_RECV_FRAG_INIT(frag, hdr, segs, cnt, btl ) \ -do { \ - size_t i, _size; \ - mca_btl_base_segment_t* macro_segments = frag->segments; \ - mca_pml_bfo_buffer_t* buffers = frag->buffers; \ - unsigned char* _ptr = (unsigned char*)frag->addr; \ - /* init recv_frag */ \ - frag->btl = btl; \ - frag->hdr = *(mca_pml_bfo_hdr_t*)hdr; \ - frag->num_segments = 1; \ - _size = segs[0].seg_len; \ - for( i = 1; i < cnt; i++ ) { \ - _size += segs[i].seg_len; \ - } \ - /* copy over data */ \ - if(_size <= mca_pml_bfo.unexpected_limit ) { \ - macro_segments[0].seg_addr.pval = frag->addr; \ - } else { \ - buffers[0].len = _size; \ - buffers[0].addr = (char*) \ - mca_pml_bfo.allocator->alc_alloc( mca_pml_bfo.allocator, \ - buffers[0].len, \ - 0, NULL); \ - _ptr = (unsigned char*)(buffers[0].addr); \ - macro_segments[0].seg_addr.pval = buffers[0].addr; \ - } \ - macro_segments[0].seg_len = _size; \ - for( i = 0; i < cnt; i++ ) { \ - memcpy( _ptr, segs[i].seg_addr.pval, segs[i].seg_len); \ - _ptr += segs[i].seg_len; \ - } \ - } while(0) - - -#define MCA_PML_BFO_RECV_FRAG_RETURN(frag) \ -do { \ - if( frag->segments[0].seg_len > mca_pml_bfo.unexpected_limit ) { \ - /* return buffers */ \ - mca_pml_bfo.allocator->alc_free( mca_pml_bfo.allocator, \ - frag->buffers[0].addr ); \ - } \ - frag->num_segments = 0; \ - \ - /* return recv_frag */ \ - OMPI_FREE_LIST_RETURN_MT(&mca_pml_bfo.recv_frags, \ - (ompi_free_list_item_t*)frag); \ - } while(0) - - -/** - * Callback from BTL on receipt of a recv_frag (match). - */ - -extern void mca_pml_bfo_recv_frag_callback_match( mca_btl_base_module_t *btl, - mca_btl_base_tag_t tag, - mca_btl_base_descriptor_t* descriptor, - void* cbdata ); - -/** - * Callback from BTL on receipt of a recv_frag (rndv). - */ - -extern void mca_pml_bfo_recv_frag_callback_rndv( mca_btl_base_module_t *btl, - mca_btl_base_tag_t tag, - mca_btl_base_descriptor_t* descriptor, - void* cbdata ); -/** - * Callback from BTL on receipt of a recv_frag (rget). - */ - -extern void mca_pml_bfo_recv_frag_callback_rget( mca_btl_base_module_t *btl, - mca_btl_base_tag_t tag, - mca_btl_base_descriptor_t* descriptor, - void* cbdata ); - -/** - * Callback from BTL on receipt of a recv_frag (ack). - */ - -extern void mca_pml_bfo_recv_frag_callback_ack( mca_btl_base_module_t *btl, - mca_btl_base_tag_t tag, - mca_btl_base_descriptor_t* descriptor, - void* cbdata ); -/** - * Callback from BTL on receipt of a recv_frag (frag). - */ - -extern void mca_pml_bfo_recv_frag_callback_frag( mca_btl_base_module_t *btl, - mca_btl_base_tag_t tag, - mca_btl_base_descriptor_t* descriptor, - void* cbdata ); -/** - * Callback from BTL on receipt of a recv_frag (put). - */ - -extern void mca_pml_bfo_recv_frag_callback_put( mca_btl_base_module_t *btl, - mca_btl_base_tag_t tag, - mca_btl_base_descriptor_t* descriptor, - void* cbdata ); -/** - * Callback from BTL on receipt of a recv_frag (fin). - */ - -extern void mca_pml_bfo_recv_frag_callback_fin( mca_btl_base_module_t *btl, - mca_btl_base_tag_t tag, - mca_btl_base_descriptor_t* descriptor, - void* cbdata ); - - -END_C_DECLS - -#endif - diff --git a/ompi/mca/pml/bfo/pml_bfo_recvreq.c b/ompi/mca/pml/bfo/pml_bfo_recvreq.c deleted file mode 100644 index bce99353a2d..00000000000 --- a/ompi/mca/pml/bfo/pml_bfo_recvreq.c +++ /dev/null @@ -1,1163 +0,0 @@ -/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ -/* - * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2013 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * Copyright (c) 2008 UT-Battelle, LLC. All rights reserved. - * Copyright (c) 2010-2012 Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2011-2012 Los Alamos National Security, LLC. - * All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#include "ompi_config.h" - -#include "opal/util/arch.h" -#include "opal/mca/btl/btl.h" -#include "opal/mca/mpool/mpool.h" -#include "ompi/mca/pml/pml.h" -#include "ompi/mca/bml/bml.h" -#include "pml_bfo_comm.h" -#include "pml_bfo_recvreq.h" -#include "pml_bfo_recvfrag.h" -#include "pml_bfo_sendreq.h" -#include "pml_bfo_rdmafrag.h" -#include "ompi/mca/bml/base/base.h" -#include "ompi/memchecker.h" - -#if OPAL_CUDA_SUPPORT -int mca_pml_bfo_cuda_need_buffers(mca_pml_bfo_recv_request_t* recvreq, - mca_btl_base_module_t* btl); -#endif /* OPAL_CUDA_SUPPORT */ -#if PML_BFO -#include "pml_bfo_failover.h" -#endif /* PML_BFO */ - -void mca_pml_bfo_recv_request_process_pending(void) -{ - mca_pml_bfo_recv_request_t* recvreq; - int rc, i, s = (int)opal_list_get_size(&mca_pml_bfo.recv_pending); - - for(i = 0; i < s; i++) { - OPAL_THREAD_LOCK(&mca_pml_bfo.lock); - recvreq = (mca_pml_bfo_recv_request_t*) - opal_list_remove_first(&mca_pml_bfo.recv_pending); - OPAL_THREAD_UNLOCK(&mca_pml_bfo.lock); - if( OPAL_UNLIKELY(NULL == recvreq) ) - break; - recvreq->req_pending = false; - rc = mca_pml_bfo_recv_request_schedule_exclusive(recvreq, NULL); - if(OMPI_ERR_OUT_OF_RESOURCE == rc) - break; - } -} - -static int mca_pml_bfo_recv_request_free(struct ompi_request_t** request) -{ - mca_pml_bfo_recv_request_t* recvreq = *(mca_pml_bfo_recv_request_t**)request; - - assert( false == recvreq->req_recv.req_base.req_free_called ); - - OPAL_THREAD_LOCK(&ompi_request_lock); - recvreq->req_recv.req_base.req_free_called = true; - - PERUSE_TRACE_COMM_EVENT( PERUSE_COMM_REQ_NOTIFY, - &(recvreq->req_recv.req_base), PERUSE_RECV ); - - if( true == recvreq->req_recv.req_base.req_pml_complete ) { - /* make buffer defined when the request is compeleted, - and before releasing the objects. */ - MEMCHECKER( - memchecker_call(&opal_memchecker_base_mem_defined, - recvreq->req_recv.req_base.req_addr, - recvreq->req_recv.req_base.req_count, - recvreq->req_recv.req_base.req_datatype); - ); - - MCA_PML_BFO_RECV_REQUEST_RETURN( recvreq ); - } - - OPAL_THREAD_UNLOCK(&ompi_request_lock); - *request = MPI_REQUEST_NULL; - return OMPI_SUCCESS; -} - -static int mca_pml_bfo_recv_request_cancel(struct ompi_request_t* ompi_request, int complete) -{ - mca_pml_bfo_recv_request_t* request = (mca_pml_bfo_recv_request_t*)ompi_request; - mca_pml_bfo_comm_t* comm = request->req_recv.req_base.req_comm->c_pml_comm; - - if( true == ompi_request->req_complete ) { /* way to late to cancel this one */ - /* - * Receive request completed, make user buffer accessable. - */ - MEMCHECKER( - memchecker_call(&opal_memchecker_base_mem_defined, - request->req_recv.req_base.req_addr, - request->req_recv.req_base.req_count, - request->req_recv.req_base.req_datatype); - ); - return OMPI_SUCCESS; - } - - /* The rest should be protected behind the match logic lock */ - OPAL_THREAD_LOCK(&comm->matching_lock); - if( OMPI_ANY_TAG == ompi_request->req_status.MPI_TAG ) { /* the match has not been already done */ - if( request->req_recv.req_base.req_peer == OMPI_ANY_SOURCE ) { - opal_list_remove_item( &comm->wild_receives, (opal_list_item_t*)request ); - } else { - mca_pml_bfo_comm_proc_t* proc = comm->procs + request->req_recv.req_base.req_peer; - opal_list_remove_item(&proc->specific_receives, (opal_list_item_t*)request); - } - PERUSE_TRACE_COMM_EVENT( PERUSE_COMM_REQ_REMOVE_FROM_POSTED_Q, - &(request->req_recv.req_base), PERUSE_RECV ); - /** - * As now the PML is done with this request we have to force the pml_complete - * to true. Otherwise, the request will never be freed. - */ - request->req_recv.req_base.req_pml_complete = true; - } - OPAL_THREAD_UNLOCK(&comm->matching_lock); - - OPAL_THREAD_LOCK(&ompi_request_lock); - ompi_request->req_status._cancelled = true; - /* This macro will set the req_complete to true so the MPI Test/Wait* functions - * on this request will be able to complete. As the status is marked as - * cancelled the cancel state will be detected. - */ - MCA_PML_BFO_RECV_REQUEST_MPI_COMPLETE(request); - OPAL_THREAD_UNLOCK(&ompi_request_lock); - /* - * Receive request cancelled, make user buffer accessable. - */ - MEMCHECKER( - memchecker_call(&opal_memchecker_base_mem_defined, - request->req_recv.req_base.req_addr, - request->req_recv.req_base.req_count, - request->req_recv.req_base.req_datatype); - ); - return OMPI_SUCCESS; -} - -static void mca_pml_bfo_recv_request_construct(mca_pml_bfo_recv_request_t* request) -{ - request->req_recv.req_base.req_type = MCA_PML_REQUEST_RECV; - request->req_recv.req_base.req_ompi.req_free = mca_pml_bfo_recv_request_free; - request->req_recv.req_base.req_ompi.req_cancel = mca_pml_bfo_recv_request_cancel; - request->req_rdma_cnt = 0; - OBJ_CONSTRUCT(&request->lock, opal_mutex_t); -} - -OBJ_CLASS_INSTANCE( - mca_pml_bfo_recv_request_t, - mca_pml_base_recv_request_t, - mca_pml_bfo_recv_request_construct, - NULL); - - -/* - * Release resources. - */ - -static void mca_pml_bfo_recv_ctl_completion( mca_btl_base_module_t* btl, - struct mca_btl_base_endpoint_t* ep, - struct mca_btl_base_descriptor_t* des, - int status ) -{ - mca_bml_base_btl_t* bml_btl = (mca_bml_base_btl_t*)des->des_context; - -#if PML_BFO - if (btl->btl_flags & MCA_BTL_FLAGS_FAILOVER_SUPPORT) { - mca_pml_bfo_check_recv_ctl_completion_status(btl, des, status); - } - MCA_PML_BFO_CHECK_RECVREQ_EAGER_BML_BTL_RECV_CTL(bml_btl, btl, des); -#endif /* PML_BFO */ - MCA_PML_BFO_PROGRESS_PENDING(bml_btl); -} - -/* - * Put operation has completed remotely - update request status - */ - -static void mca_pml_bfo_put_completion( mca_btl_base_module_t* btl, - struct mca_btl_base_endpoint_t* ep, - struct mca_btl_base_descriptor_t* des, - int status ) -{ - mca_bml_base_btl_t* bml_btl = (mca_bml_base_btl_t*)des->des_context; - mca_pml_bfo_recv_request_t* recvreq = (mca_pml_bfo_recv_request_t*)des->des_cbdata; - size_t bytes_received = 0; - - if( OPAL_LIKELY(status == OMPI_SUCCESS) ) { - bytes_received = mca_pml_bfo_compute_segment_length (btl->btl_seg_size, - (void *) des->des_remote, - des->des_remote_count, 0); - } - OPAL_THREAD_ADD_SIZE_T(&recvreq->req_pipeline_depth,-1); - -#if PML_BFO - btl->btl_free(btl, des); - MCA_PML_BFO_ERROR_CHECK_ON_FIN_FOR_PUT(recvreq); - MCA_PML_BFO_CHECK_RECVREQ_EAGER_BML_BTL(bml_btl, btl, recvreq, "PUT"); -#else /* PML_BFO */ - mca_bml_base_free(bml_btl, des); -#endif /* PML_BFO */ - - /* check completion status */ - OPAL_THREAD_ADD_SIZE_T(&recvreq->req_bytes_received, bytes_received); - if(recv_request_pml_complete_check(recvreq) == false && - recvreq->req_rdma_offset < recvreq->req_send_offset) { - /* schedule additional rdma operations */ - mca_pml_bfo_recv_request_schedule(recvreq, bml_btl); - } - MCA_PML_BFO_PROGRESS_PENDING(bml_btl); -} - -/* - * - */ - -int mca_pml_bfo_recv_request_ack_send_btl( - ompi_proc_t* proc, mca_bml_base_btl_t* bml_btl, - uint64_t hdr_src_req, void *hdr_dst_req, uint64_t hdr_send_offset, - bool nordma) -{ - mca_btl_base_descriptor_t* des; - mca_pml_bfo_ack_hdr_t* ack; - int rc; - - /* allocate descriptor */ - mca_bml_base_alloc(bml_btl, &des, MCA_BTL_NO_ORDER, - sizeof(mca_pml_bfo_ack_hdr_t), - MCA_BTL_DES_FLAGS_PRIORITY | MCA_BTL_DES_FLAGS_BTL_OWNERSHIP | MCA_BTL_DES_SEND_ALWAYS_CALLBACK); - if( OPAL_UNLIKELY(NULL == des) ) { - return OMPI_ERR_OUT_OF_RESOURCE; - } - - /* fill out header */ - ack = (mca_pml_bfo_ack_hdr_t*)des->des_local->seg_addr.pval; - ack->hdr_common.hdr_type = MCA_PML_BFO_HDR_TYPE_ACK; - ack->hdr_common.hdr_flags = nordma ? MCA_PML_BFO_HDR_FLAGS_NORDMA : 0; - ack->hdr_src_req.lval = hdr_src_req; - ack->hdr_dst_req.pval = hdr_dst_req; - ack->hdr_send_offset = hdr_send_offset; - - bfo_hdr_hton(ack, MCA_PML_BFO_HDR_TYPE_ACK, proc); - - /* initialize descriptor */ - des->des_cbfunc = mca_pml_bfo_recv_ctl_completion; -#if PML_BFO - des->des_cbdata = hdr_dst_req; -#endif /* PML_BFO */ - - rc = mca_bml_base_send(bml_btl, des, MCA_PML_BFO_HDR_TYPE_ACK); - if( OPAL_LIKELY( rc >= 0 ) ) { -#if PML_BFO - if ((bml_btl->btl_flags & MCA_BTL_FLAGS_FAILOVER_SUPPORT) && - (des->des_flags & MCA_BTL_DES_SEND_ALWAYS_CALLBACK)) { - ((mca_pml_bfo_recv_request_t *)hdr_dst_req)->req_events++; - } -#endif /* PML_BFO */ - return OMPI_SUCCESS; - } - mca_bml_base_free(bml_btl, des); - return OMPI_ERR_OUT_OF_RESOURCE; -} - -static int mca_pml_bfo_recv_request_ack( - mca_pml_bfo_recv_request_t* recvreq, - mca_pml_bfo_rendezvous_hdr_t* hdr, - size_t bytes_received) -{ - ompi_proc_t* proc = (ompi_proc_t*)recvreq->req_recv.req_base.req_proc; - mca_bml_base_endpoint_t* bml_endpoint = NULL; - - bml_endpoint = (mca_bml_base_endpoint_t*) proc->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_BML]; - - /* by default copy everything */ - recvreq->req_send_offset = bytes_received; - if(hdr->hdr_msg_length > bytes_received) { - size_t rdma_num = mca_bml_base_btl_array_get_size(&bml_endpoint->btl_rdma); - /* - * lookup request buffer to determine if memory is already - * registered. - */ - - if(opal_convertor_need_buffers(&recvreq->req_recv.req_base.req_convertor) == 0 && - hdr->hdr_match.hdr_common.hdr_flags & MCA_PML_BFO_HDR_FLAGS_CONTIG && - rdma_num != 0) { - unsigned char *base; - opal_convertor_get_current_pointer( &recvreq->req_recv.req_base.req_convertor, (void**)&(base) ); - - if(hdr->hdr_match.hdr_common.hdr_flags & MCA_PML_BFO_HDR_FLAGS_PIN) - recvreq->req_rdma_cnt = mca_pml_bfo_rdma_btls(bml_endpoint, - base, recvreq->req_recv.req_bytes_packed, - recvreq->req_rdma ); - else - recvreq->req_rdma_cnt = 0; - - /* memory is already registered on both sides */ - if (recvreq->req_rdma_cnt != 0) { - recvreq->req_send_offset = hdr->hdr_msg_length; - /* are rdma devices available for long rdma protocol */ - } else if(bml_endpoint->btl_send_limit < hdr->hdr_msg_length) { - /* use convertor to figure out the rdma offset for this request */ - recvreq->req_send_offset = hdr->hdr_msg_length - - bml_endpoint->btl_pipeline_send_length; - - if(recvreq->req_send_offset < bytes_received) - recvreq->req_send_offset = bytes_received; - - /* use converter to figure out the rdma offset for this - * request */ - opal_convertor_set_position(&recvreq->req_recv.req_base.req_convertor, - &recvreq->req_send_offset); - - recvreq->req_rdma_cnt = - mca_pml_bfo_rdma_pipeline_btls(bml_endpoint, - recvreq->req_send_offset - bytes_received, - recvreq->req_rdma); - } - } - /* nothing to send by copy in/out - no need to ack */ - if(recvreq->req_send_offset == hdr->hdr_msg_length) - return OMPI_SUCCESS; - } - /* let know to shedule function there is no need to put ACK flag */ - recvreq->req_ack_sent = true; - return mca_pml_bfo_recv_request_ack_send(proc, hdr->hdr_src_req.lval, - recvreq, recvreq->req_send_offset, - recvreq->req_send_offset == bytes_received); -} - -/** - * Return resources used by the RDMA - */ - -static void mca_pml_bfo_rget_completion( mca_btl_base_module_t* btl, - struct mca_btl_base_endpoint_t* ep, - struct mca_btl_base_descriptor_t* des, - int status ) -{ - mca_bml_base_btl_t* bml_btl = (mca_bml_base_btl_t*)des->des_context; - mca_pml_bfo_rdma_frag_t* frag = (mca_pml_bfo_rdma_frag_t*)des->des_cbdata; - mca_pml_bfo_recv_request_t* recvreq = (mca_pml_bfo_recv_request_t*)frag->rdma_req; - -#if PML_BFO - if (btl->btl_flags & MCA_BTL_FLAGS_FAILOVER_SUPPORT) { - recvreq->req_events--; - } -#endif /* PML_BFO */ - /* check completion status */ - if( OPAL_UNLIKELY(OMPI_SUCCESS != status) ) { -#if PML_BFO - MCA_PML_BFO_ERROR_CHECK_ON_RDMA_READ_COMPLETION(recvreq); -#else /* PML_BFO */ - /* TSW - FIX */ - OMPI_ERROR_LOG(status); - ompi_rte_abort(-1, NULL); -#endif /* PML_BFO */ - } -#if PML_BFO - MCA_PML_BFO_SECOND_ERROR_CHECK_ON_RDMA_READ_COMPLETION(recvreq, status, btl); - MCA_PML_BFO_CHECK_RECVREQ_RDMA_BML_BTL(bml_btl, btl, recvreq, "RDMA write"); -#endif /* PML_BFO */ - - mca_pml_bfo_send_fin(recvreq->req_recv.req_base.req_proc, - bml_btl, - frag->rdma_hdr.hdr_rget.hdr_des, -#if PML_BFO - des->order, 0, (uint16_t)recvreq->req_msgseq, recvreq->req_restartseq, - recvreq->req_recv.req_base.req_comm->c_contextid, - recvreq->req_recv.req_base.req_comm->c_my_rank); -#else /* PML_BFO */ - des->order, 0); -#endif /* PML_BFO */ - - /* is receive request complete */ - OPAL_THREAD_ADD_SIZE_T(&recvreq->req_bytes_received, frag->rdma_length); - recv_request_pml_complete_check(recvreq); - - MCA_PML_BFO_RDMA_FRAG_RETURN(frag); - - MCA_PML_BFO_PROGRESS_PENDING(bml_btl); -} - - -/* - * - */ -int mca_pml_bfo_recv_request_get_frag( mca_pml_bfo_rdma_frag_t* frag ) -{ - mca_pml_bfo_recv_request_t* recvreq = (mca_pml_bfo_recv_request_t*)frag->rdma_req; - mca_bml_base_btl_t* bml_btl = frag->rdma_bml; - mca_btl_base_descriptor_t* descriptor; - size_t save_size = frag->rdma_length; - int rc; - - /* prepare descriptor */ - mca_bml_base_prepare_dst( bml_btl, - NULL, - &recvreq->req_recv.req_base.req_convertor, - MCA_BTL_NO_ORDER, - 0, - &frag->rdma_length, - MCA_BTL_DES_FLAGS_BTL_OWNERSHIP | MCA_BTL_DES_SEND_ALWAYS_CALLBACK | - MCA_BTL_DES_FLAGS_GET, - &descriptor ); - if( OPAL_UNLIKELY(NULL == descriptor) ) { - frag->rdma_length = save_size; - OPAL_THREAD_LOCK(&mca_pml_bfo.lock); - opal_list_append(&mca_pml_bfo.rdma_pending, (opal_list_item_t*)frag); - OPAL_THREAD_UNLOCK(&mca_pml_bfo.lock); - return OMPI_ERR_OUT_OF_RESOURCE; - } - - descriptor->des_remote = (mca_btl_base_segment_t *) frag->rdma_segs; - descriptor->des_remote_count = frag->rdma_hdr.hdr_rdma.hdr_seg_cnt; - descriptor->des_cbfunc = mca_pml_bfo_rget_completion; - descriptor->des_cbdata = frag; - - PERUSE_TRACE_COMM_OMPI_EVENT(PERUSE_COMM_REQ_XFER_CONTINUE, - &(recvreq->req_recv.req_base), - frag->rdma_length, PERUSE_RECV); - - /* queue up get request */ - rc = mca_bml_base_get(bml_btl,descriptor); - if( OPAL_UNLIKELY(OMPI_SUCCESS != rc) ) { - if(OMPI_ERR_OUT_OF_RESOURCE == rc) { - mca_bml_base_free(bml_btl, descriptor); - OPAL_THREAD_LOCK(&mca_pml_bfo.lock); - opal_list_append(&mca_pml_bfo.rdma_pending, - (opal_list_item_t*)frag); - OPAL_THREAD_UNLOCK(&mca_pml_bfo.lock); - return OMPI_ERR_OUT_OF_RESOURCE; - } else { - OMPI_ERROR_LOG(rc); - ompi_rte_abort(-1, NULL); - } - } -#if PML_BFO - if ((bml_btl->btl_flags & MCA_BTL_FLAGS_FAILOVER_SUPPORT) && - (descriptor->des_flags & MCA_BTL_DES_SEND_ALWAYS_CALLBACK)) { - recvreq->req_events++; - } -#endif /* PML_BFO */ - - return OMPI_SUCCESS; -} - - - - -/* - * Update the recv request status to reflect the number of bytes - * received and actually delivered to the application. - */ - -void mca_pml_bfo_recv_request_progress_frag( mca_pml_bfo_recv_request_t* recvreq, - mca_btl_base_module_t* btl, - mca_btl_base_segment_t* segments, - size_t num_segments ) -{ - size_t bytes_received, data_offset = 0; - size_t bytes_delivered __opal_attribute_unused__; /* is being set to zero in MCA_PML_BFO_RECV_REQUEST_UNPACK */ - mca_pml_bfo_hdr_t* hdr = (mca_pml_bfo_hdr_t*)segments->seg_addr.pval; - - bytes_received = mca_pml_bfo_compute_segment_length_base (segments, num_segments, - sizeof(mca_pml_bfo_frag_hdr_t)); - data_offset = hdr->hdr_frag.hdr_frag_offset; - /* - * Make user buffer accessable(defined) before unpacking. - */ - MEMCHECKER( - memchecker_call(&opal_memchecker_base_mem_defined, - recvreq->req_recv.req_base.req_addr, - recvreq->req_recv.req_base.req_count, - recvreq->req_recv.req_base.req_datatype); - ); - MCA_PML_BFO_RECV_REQUEST_UNPACK( recvreq, - segments, - num_segments, - sizeof(mca_pml_bfo_frag_hdr_t), - data_offset, - bytes_received, - bytes_delivered ); - /* - * Unpacking finished, make the user buffer unaccessable again. - */ - MEMCHECKER( - memchecker_call(&opal_memchecker_base_mem_noaccess, - recvreq->req_recv.req_base.req_addr, - recvreq->req_recv.req_base.req_count, - recvreq->req_recv.req_base.req_datatype); - ); - - OPAL_THREAD_ADD_SIZE_T(&recvreq->req_bytes_received, bytes_received); - /* check completion status */ - if(recv_request_pml_complete_check(recvreq) == false && - recvreq->req_rdma_offset < recvreq->req_send_offset) { - /* schedule additional rdma operations */ - mca_pml_bfo_recv_request_schedule(recvreq, NULL); - } -} - -/* - * Update the recv request status to reflect the number of bytes - * received and actually delivered to the application. - */ - -void mca_pml_bfo_recv_request_progress_rget( mca_pml_bfo_recv_request_t* recvreq, - mca_btl_base_module_t* btl, - mca_btl_base_segment_t* segments, - size_t num_segments ) -{ - mca_pml_bfo_rget_hdr_t* hdr = (mca_pml_bfo_rget_hdr_t*)segments->seg_addr.pval; - mca_bml_base_endpoint_t* bml_endpoint = NULL; - mca_pml_bfo_rdma_frag_t* frag; - size_t i, size = 0; - - recvreq->req_recv.req_bytes_packed = hdr->hdr_rndv.hdr_msg_length; - -#if PML_BFO - recvreq->remote_req_send = hdr->hdr_rndv.hdr_src_req; -#endif /* PML_BFO */ - MCA_PML_BFO_RECV_REQUEST_MATCHED(recvreq, &hdr->hdr_rndv.hdr_match); - - /* if receive buffer is not contiguous we can't just RDMA read into it, so - * fall back to copy in/out protocol. It is a pity because buffer on the - * sender side is already registered. We need to be smarter here, perhaps - * do couple of RDMA reads */ - if(opal_convertor_need_buffers(&recvreq->req_recv.req_base.req_convertor) == true) { -#if OPAL_CUDA_SUPPORT - if (mca_pml_bfo_cuda_need_buffers(recvreq, btl)) { - mca_pml_bfo_recv_request_ack(recvreq, &hdr->hdr_rndv, 0); - return; - } -#else /* OPAL_CUDA_SUPPORT */ - mca_pml_bfo_recv_request_ack(recvreq, &hdr->hdr_rndv, 0); - return; -#endif /* OPAL_CUDA_SUPPORT */ - } - - MCA_PML_BFO_RDMA_FRAG_ALLOC(frag); - if( OPAL_UNLIKELY(NULL == frag) ) { - /* GLB - FIX */ - OMPI_ERROR_LOG(OMPI_ERR_OUT_OF_RESOURCE); - ompi_rte_abort(-1, NULL); - } - - /* lookup bml datastructures */ - bml_endpoint = (mca_bml_base_endpoint_t*)recvreq->req_recv.req_base.req_proc->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_BML]; - - assert (btl->btl_seg_size * hdr->hdr_seg_cnt <= sizeof (frag->rdma_segs)); - - /* allocate/initialize a fragment */ - memmove (frag->rdma_segs, hdr + 1, btl->btl_seg_size * hdr->hdr_seg_cnt); - - for(i = 0; i < hdr->hdr_seg_cnt; i++) { - mca_btl_base_segment_t *seg = (mca_btl_base_segment_t *)(frag->rdma_segs + i * btl->btl_seg_size); - -#if OPAL_ENABLE_HETEROGENEOUS_SUPPORT - if ((recvreq->req_recv.req_base.req_proc->super.proc_arch & OPAL_ARCH_ISBIGENDIAN) != - (ompi_proc_local()->super.proc_arch & OPAL_ARCH_ISBIGENDIAN)) { - size += opal_swap_bytes4(seg->seg_len); - } else -#endif - { - size += seg->seg_len; - } - } -#if PML_BFO - frag->rdma_btl = btl; -#endif /* PML_BFO */ - frag->rdma_bml = mca_bml_base_btl_array_find(&bml_endpoint->btl_rdma, btl); -#if OPAL_CUDA_SUPPORT - if( OPAL_UNLIKELY(NULL == frag->rdma_bml) ) { - if (recvreq->req_recv.req_base.req_convertor.flags & CONVERTOR_CUDA) { - /* Check to see if this is a CUDA get */ - if (btl->btl_flags & MCA_BTL_FLAGS_CUDA_GET) { - frag->rdma_bml = mca_bml_base_btl_array_find(&bml_endpoint->btl_send, btl); - } - if( OPAL_UNLIKELY(NULL == frag->rdma_bml) ) { - opal_output(0, "[%s:%d] invalid bml for rdma get", __FILE__, __LINE__); - ompi_rte_abort(-1, NULL); - } - } else { - /* Just default back to send and receive. Must be mix of GPU and HOST memory. */ - mca_pml_bfo_recv_request_ack(recvreq, &hdr->hdr_rndv, 0); - return; - } - } -#else /* OPAL_CUDA_SUPPORT */ - if( OPAL_UNLIKELY(NULL == frag->rdma_bml) ) { - opal_output(0, "[%s:%d] invalid bml for rdma get", __FILE__, __LINE__); - ompi_rte_abort(-1, NULL); - } -#endif /* OPAL_CUDA_SUPPORT */ - frag->rdma_hdr.hdr_rget = *hdr; - frag->rdma_req = recvreq; - frag->rdma_ep = bml_endpoint; - frag->rdma_length = size; - frag->rdma_state = MCA_PML_BFO_RDMA_GET; - frag->reg = NULL; - - mca_pml_bfo_recv_request_get_frag(frag); - return; -} - -/* - * Update the recv request status to reflect the number of bytes - * received and actually delivered to the application. - */ - -void mca_pml_bfo_recv_request_progress_rndv( mca_pml_bfo_recv_request_t* recvreq, - mca_btl_base_module_t* btl, - mca_btl_base_segment_t* segments, - size_t num_segments ) -{ - size_t bytes_received; - size_t bytes_delivered __opal_attribute_unused__; /* is being set to zero in MCA_PML_BFO_RECV_REQUEST_UNPACK */ - size_t data_offset = 0; - mca_pml_bfo_hdr_t* hdr = (mca_pml_bfo_hdr_t*)segments->seg_addr.pval; - - bytes_received = mca_pml_bfo_compute_segment_length_base (segments, num_segments, - sizeof(mca_pml_bfo_rendezvous_hdr_t)); - - recvreq->req_recv.req_bytes_packed = hdr->hdr_rndv.hdr_msg_length; - recvreq->remote_req_send = hdr->hdr_rndv.hdr_src_req; - recvreq->req_rdma_offset = bytes_received; - MCA_PML_BFO_RECV_REQUEST_MATCHED(recvreq, &hdr->hdr_match); - mca_pml_bfo_recv_request_ack(recvreq, &hdr->hdr_rndv, bytes_received); - /** - * The PUT protocol do not attach any data to the original request. - * Therefore, we might want to avoid unpacking if there is nothing to - * unpack. - */ - if( 0 < bytes_received ) { - MEMCHECKER( - memchecker_call(&opal_memchecker_base_mem_defined, - recvreq->req_recv.req_base.req_addr, - recvreq->req_recv.req_base.req_count, - recvreq->req_recv.req_base.req_datatype); - ); - MCA_PML_BFO_RECV_REQUEST_UNPACK( recvreq, - segments, - num_segments, - sizeof(mca_pml_bfo_rendezvous_hdr_t), - data_offset, - bytes_received, - bytes_delivered ); - MEMCHECKER( - memchecker_call(&opal_memchecker_base_mem_noaccess, - recvreq->req_recv.req_base.req_addr, - recvreq->req_recv.req_base.req_count, - recvreq->req_recv.req_base.req_datatype); - ); - } - OPAL_THREAD_ADD_SIZE_T(&recvreq->req_bytes_received, bytes_received); - /* check completion status */ - if(recv_request_pml_complete_check(recvreq) == false && - recvreq->req_rdma_offset < recvreq->req_send_offset) { - /* schedule additional rdma operations */ - mca_pml_bfo_recv_request_schedule(recvreq, NULL); - } -} - -/* - * Update the recv request status to reflect the number of bytes - * received and actually delivered to the application. - */ -void mca_pml_bfo_recv_request_progress_match( mca_pml_bfo_recv_request_t* recvreq, - mca_btl_base_module_t* btl, - mca_btl_base_segment_t* segments, - size_t num_segments ) -{ - size_t bytes_received, data_offset = 0; - size_t bytes_delivered __opal_attribute_unused__; /* is being set to zero in MCA_PML_BFO_RECV_REQUEST_UNPACK */ - mca_pml_bfo_hdr_t* hdr = (mca_pml_bfo_hdr_t*)segments->seg_addr.pval; - - bytes_received = mca_pml_bfo_compute_segment_length_base (segments, num_segments, - OMPI_PML_BFO_MATCH_HDR_LEN); - - recvreq->req_recv.req_bytes_packed = bytes_received; - - MCA_PML_BFO_RECV_REQUEST_MATCHED(recvreq, &hdr->hdr_match); - /* - * Make user buffer accessable(defined) before unpacking. - */ - MEMCHECKER( - memchecker_call(&opal_memchecker_base_mem_defined, - recvreq->req_recv.req_base.req_addr, - recvreq->req_recv.req_base.req_count, - recvreq->req_recv.req_base.req_datatype); - ); - MCA_PML_BFO_RECV_REQUEST_UNPACK( recvreq, - segments, - num_segments, - OMPI_PML_BFO_MATCH_HDR_LEN, - data_offset, - bytes_received, - bytes_delivered); - /* - * Unpacking finished, make the user buffer unaccessable again. - */ - MEMCHECKER( - memchecker_call(&opal_memchecker_base_mem_noaccess, - recvreq->req_recv.req_base.req_addr, - recvreq->req_recv.req_base.req_count, - recvreq->req_recv.req_base.req_datatype); - ); - - /* - * No need for atomic here, as we know there is only one fragment - * for this request. - */ - recvreq->req_bytes_received += bytes_received; - recv_request_pml_complete(recvreq); -} - - -/** - * Handle completion of a probe request - */ - -void mca_pml_bfo_recv_request_matched_probe( mca_pml_bfo_recv_request_t* recvreq, - mca_btl_base_module_t* btl, - mca_btl_base_segment_t* segments, - size_t num_segments ) -{ - size_t bytes_packed = 0; - mca_pml_bfo_hdr_t* hdr = (mca_pml_bfo_hdr_t*)segments->seg_addr.pval; - - switch(hdr->hdr_common.hdr_type) { - case MCA_PML_BFO_HDR_TYPE_MATCH: - bytes_packed = mca_pml_bfo_compute_segment_length_base (segments, num_segments, - OMPI_PML_BFO_MATCH_HDR_LEN); - break; - case MCA_PML_BFO_HDR_TYPE_RNDV: - case MCA_PML_BFO_HDR_TYPE_RGET: - bytes_packed = hdr->hdr_rndv.hdr_msg_length; - break; - } - - /* set completion status */ - recvreq->req_recv.req_base.req_ompi.req_status.MPI_TAG = hdr->hdr_match.hdr_tag; - recvreq->req_recv.req_base.req_ompi.req_status.MPI_SOURCE = hdr->hdr_match.hdr_src; - recvreq->req_bytes_received = bytes_packed; - recvreq->req_bytes_expected = bytes_packed; - - recv_request_pml_complete(recvreq); -} - - -/* - * Schedule RDMA protocol. - * -*/ - -int mca_pml_bfo_recv_request_schedule_once( mca_pml_bfo_recv_request_t* recvreq, - mca_bml_base_btl_t *start_bml_btl ) -{ - mca_bml_base_btl_t* bml_btl; - int num_tries = recvreq->req_rdma_cnt, num_fail = 0; - size_t i, prev_bytes_remaining = 0; - size_t bytes_remaining = recvreq->req_send_offset - - recvreq->req_rdma_offset; - - /* if starting bml_btl is provided schedule next fragment on it first */ - if(start_bml_btl != NULL) { - for(i = 0; i < recvreq->req_rdma_cnt; i++) { - if(recvreq->req_rdma[i].bml_btl != start_bml_btl) - continue; - /* something left to be send? */ - if( OPAL_LIKELY(recvreq->req_rdma[i].length) ) - recvreq->req_rdma_idx = i; - break; - } - } - - while(bytes_remaining > 0 && - recvreq->req_pipeline_depth < mca_pml_bfo.recv_pipeline_depth) { - size_t size, seg_size; - mca_pml_bfo_rdma_hdr_t* hdr; - mca_btl_base_descriptor_t* dst; - mca_btl_base_descriptor_t* ctl; - mca_mpool_base_registration_t * reg = NULL; - mca_btl_base_module_t* btl; - int rc, rdma_idx; - - if(prev_bytes_remaining == bytes_remaining) { - if(++num_fail == num_tries) { - OPAL_THREAD_LOCK(&mca_pml_bfo.lock); - if(false == recvreq->req_pending) { - opal_list_append(&mca_pml_bfo.recv_pending, - (opal_list_item_t*)recvreq); - recvreq->req_pending = true; - } - OPAL_THREAD_UNLOCK(&mca_pml_bfo.lock); - return OMPI_ERR_OUT_OF_RESOURCE; - } - } else { - num_fail = 0; - prev_bytes_remaining = bytes_remaining; - } - - do { - rdma_idx = recvreq->req_rdma_idx; - bml_btl = recvreq->req_rdma[rdma_idx].bml_btl; - reg = recvreq->req_rdma[rdma_idx].btl_reg; - size = recvreq->req_rdma[rdma_idx].length; - if(++recvreq->req_rdma_idx >= recvreq->req_rdma_cnt) - recvreq->req_rdma_idx = 0; - } while(!size); - btl = bml_btl->btl; - - /* makes sure that we don't exceed BTL max rdma size - * if memory is not pinned already */ - if( (NULL == reg) && (btl->btl_rdma_pipeline_frag_size != 0) && - (size > btl->btl_rdma_pipeline_frag_size)) { - size = btl->btl_rdma_pipeline_frag_size; - } - - /* take lock to protect converter against concurrent access - * from unpack */ - OPAL_THREAD_LOCK(&recvreq->lock); - opal_convertor_set_position( &recvreq->req_recv.req_base.req_convertor, - &recvreq->req_rdma_offset ); - - /* prepare a descriptor for RDMA */ - mca_bml_base_prepare_dst(bml_btl, reg, - &recvreq->req_recv.req_base.req_convertor, - MCA_BTL_NO_ORDER, 0, &size, MCA_BTL_DES_FLAGS_BTL_OWNERSHIP | - MCA_BTL_DES_FLAGS_PUT, &dst); - OPAL_THREAD_UNLOCK(&recvreq->lock); - - if(OPAL_UNLIKELY(dst == NULL)) { - continue; - } - - dst->des_cbfunc = mca_pml_bfo_put_completion; - dst->des_cbdata = recvreq; - - seg_size = btl->btl_seg_size * dst->des_local_count; - - /* prepare a descriptor for rdma control message */ - mca_bml_base_alloc(bml_btl, &ctl, MCA_BTL_NO_ORDER, sizeof(mca_pml_bfo_rdma_hdr_t) + seg_size, - MCA_BTL_DES_FLAGS_PRIORITY | MCA_BTL_DES_FLAGS_BTL_OWNERSHIP | MCA_BTL_DES_SEND_ALWAYS_CALLBACK); - - if( OPAL_UNLIKELY(NULL == ctl) ) { - mca_bml_base_free(bml_btl,dst); - continue; - } - ctl->des_cbfunc = mca_pml_bfo_recv_ctl_completion; -#if PML_BFO - ctl->des_cbdata = recvreq; -#endif /* PML_BFO */ - - /* fill in rdma header */ - hdr = (mca_pml_bfo_rdma_hdr_t*)ctl->des_local->seg_addr.pval; - hdr->hdr_common.hdr_type = MCA_PML_BFO_HDR_TYPE_PUT; - hdr->hdr_common.hdr_flags = - (!recvreq->req_ack_sent) ? MCA_PML_BFO_HDR_TYPE_ACK : 0; - hdr->hdr_req = recvreq->remote_req_send; -#if PML_BFO - hdr->hdr_dst_req.pval = recvreq; /* only needed in the first put message */ -#endif /* PML_BFO */ - hdr->hdr_des.pval = dst; - hdr->hdr_rdma_offset = recvreq->req_rdma_offset; - hdr->hdr_seg_cnt = dst->des_local_count; - - /* copy segments */ - memmove (hdr + 1, dst->des_local, seg_size); - - if(!recvreq->req_ack_sent) - recvreq->req_ack_sent = true; - bfo_hdr_hton(hdr, MCA_PML_BFO_HDR_TYPE_PUT, recvreq->req_recv.req_base.req_proc); - - PERUSE_TRACE_COMM_OMPI_EVENT( PERUSE_COMM_REQ_XFER_CONTINUE, - &(recvreq->req_recv.req_base), size, - PERUSE_RECV); - - /* send rdma request to peer */ - rc = mca_bml_base_send(bml_btl, ctl, MCA_PML_BFO_HDR_TYPE_PUT); - if( OPAL_LIKELY( rc >= 0 ) ) { -#if PML_BFO - if ((btl->btl_flags & MCA_BTL_FLAGS_FAILOVER_SUPPORT) && - (ctl->des_flags & MCA_BTL_DES_SEND_ALWAYS_CALLBACK)) { - recvreq->req_events++; - } -#endif /* PML_BFO */ - /* update request state */ - recvreq->req_rdma_offset += size; - OPAL_THREAD_ADD_SIZE_T(&recvreq->req_pipeline_depth, 1); - recvreq->req_rdma[rdma_idx].length -= size; - bytes_remaining -= size; - } else { - mca_bml_base_free(bml_btl,ctl); - mca_bml_base_free(bml_btl,dst); - } - } - - return OMPI_SUCCESS; -} - -#define IS_PROB_REQ(R) \ - ((MCA_PML_REQUEST_IPROBE == (R)->req_recv.req_base.req_type) || \ - (MCA_PML_REQUEST_PROBE == (R)->req_recv.req_base.req_type) || \ - (MCA_PML_REQUEST_IMPROBE == (R)->req_recv.req_base.req_type) || \ - (MCA_PML_REQUEST_MPROBE == (R)->req_recv.req_base.req_type)) -#define IS_MPROB_REQ(R) \ - ((MCA_PML_REQUEST_IMPROBE == (R)->req_recv.req_base.req_type) || \ - (MCA_PML_REQUEST_MPROBE == (R)->req_recv.req_base.req_type)) - -static inline void append_recv_req_to_queue(opal_list_t *queue, - mca_pml_bfo_recv_request_t *req) -{ - if(OPAL_UNLIKELY(req->req_recv.req_base.req_type == MCA_PML_REQUEST_IPROBE || - req->req_recv.req_base.req_type == MCA_PML_REQUEST_IMPROBE)) - return; - - opal_list_append(queue, (opal_list_item_t*)req); - - /** - * We don't want to generate this kind of event for MPI_Probe. Hopefully, - * the compiler will optimize out the empty if loop in the case where PERUSE - * support is not required by the user. - */ - if(req->req_recv.req_base.req_type != MCA_PML_REQUEST_PROBE || - req->req_recv.req_base.req_type != MCA_PML_REQUEST_MPROBE) { - PERUSE_TRACE_COMM_EVENT(PERUSE_COMM_REQ_INSERT_IN_POSTED_Q, - &(req->req_recv.req_base), PERUSE_RECV); - } -} - -/* - * this routine tries to match a posted receive. If a match is found, - * it places the request in the appropriate matched receive list. This - * function has to be called with the communicator matching lock held. -*/ -static mca_pml_bfo_recv_frag_t* -recv_req_match_specific_proc( const mca_pml_bfo_recv_request_t *req, - mca_pml_bfo_comm_proc_t *proc ) -{ - opal_list_t* unexpected_frags = &proc->unexpected_frags; - opal_list_item_t *i; - mca_pml_bfo_recv_frag_t* frag; - int tag = req->req_recv.req_base.req_tag; - - if(opal_list_get_size(unexpected_frags) == 0) - return NULL; - - if( OMPI_ANY_TAG == tag ) { - for (i = opal_list_get_first(unexpected_frags); - i != opal_list_get_end(unexpected_frags); - i = opal_list_get_next(i)) { - frag = (mca_pml_bfo_recv_frag_t*)i; - - if( frag->hdr.hdr_match.hdr_tag >= 0 ) - return frag; - } - } else { - for (i = opal_list_get_first(unexpected_frags); - i != opal_list_get_end(unexpected_frags); - i = opal_list_get_next(i)) { - frag = (mca_pml_bfo_recv_frag_t*)i; - - if( frag->hdr.hdr_match.hdr_tag == tag ) - return frag; - } - } - return NULL; -} - -/* - * this routine is used to try and match a wild posted receive - where - * wild is determined by the value assigned to the source process -*/ -static mca_pml_bfo_recv_frag_t* -recv_req_match_wild( mca_pml_bfo_recv_request_t* req, - mca_pml_bfo_comm_proc_t **p) -{ - mca_pml_bfo_comm_t* comm = req->req_recv.req_base.req_comm->c_pml_comm; - mca_pml_bfo_comm_proc_t* proc = comm->procs; - size_t i; - - /* - * Loop over all the outstanding messages to find one that matches. - * There is an outer loop over lists of messages from each - * process, then an inner loop over the messages from the - * process. - * - * In order to avoid starvation do this in a round-robin fashion. - */ - for (i = comm->last_probed + 1; i < comm->num_procs; i++) { - mca_pml_bfo_recv_frag_t* frag; - - /* loop over messages from the current proc */ - if((frag = recv_req_match_specific_proc(req, &proc[i]))) { - *p = &proc[i]; - comm->last_probed = i; - req->req_recv.req_base.req_proc = proc[i].ompi_proc; - prepare_recv_req_converter(req); - return frag; /* match found */ - } - } - for (i = 0; i <= comm->last_probed; i++) { - mca_pml_bfo_recv_frag_t* frag; - - /* loop over messages from the current proc */ - if((frag = recv_req_match_specific_proc(req, &proc[i]))) { - *p = &proc[i]; - comm->last_probed = i; - req->req_recv.req_base.req_proc = proc[i].ompi_proc; - prepare_recv_req_converter(req); - return frag; /* match found */ - } - } - - *p = NULL; - return NULL; -} - - -void mca_pml_bfo_recv_req_start(mca_pml_bfo_recv_request_t *req) -{ - mca_pml_bfo_comm_t* comm = req->req_recv.req_base.req_comm->c_pml_comm; - mca_pml_bfo_comm_proc_t* proc; - mca_pml_bfo_recv_frag_t* frag; - opal_list_t *queue; - mca_pml_bfo_hdr_t* hdr; - - /* init/re-init the request */ - req->req_lock = 0; - req->req_pipeline_depth = 0; - req->req_bytes_received = 0; - req->req_bytes_expected = 0; - /* What about req_rdma_cnt ? */ -#if PML_BFO - req->req_rdma_cnt = 0; - req->req_events = 0; - req->req_restartseq = 0; - req->req_errstate = 0; -#endif /* PML_BFO */ - req->req_rdma_idx = 0; - req->req_pending = false; - req->req_ack_sent = false; - - MCA_PML_BASE_RECV_START(&req->req_recv.req_base); - - OPAL_THREAD_LOCK(&comm->matching_lock); - /** - * The laps of time between the ACTIVATE event and the SEARCH_UNEX one include - * the cost of the request lock. - */ - PERUSE_TRACE_COMM_EVENT(PERUSE_COMM_SEARCH_UNEX_Q_BEGIN, - &(req->req_recv.req_base), PERUSE_RECV); - - /* assign sequence number */ - req->req_recv.req_base.req_sequence = comm->recv_sequence++; - - /* attempt to match posted recv */ - if(req->req_recv.req_base.req_peer == OMPI_ANY_SOURCE) { - frag = recv_req_match_wild(req, &proc); - queue = &comm->wild_receives; -#if !OPAL_ENABLE_HETEROGENEOUS_SUPPORT - /* As we are in a homogeneous environment we know that all remote - * architectures are exactly the same as the local one. Therefore, - * we can safely construct the convertor based on the proc - * information of rank 0. - */ - if( NULL == frag ) { - req->req_recv.req_base.req_proc = ompi_proc_local_proc; - prepare_recv_req_converter(req); - } -#endif /* !OPAL_ENABLE_HETEROGENEOUS_SUPPORT */ - } else { - proc = &comm->procs[req->req_recv.req_base.req_peer]; - req->req_recv.req_base.req_proc = proc->ompi_proc; - frag = recv_req_match_specific_proc(req, proc); - queue = &proc->specific_receives; - /* wild cardrecv will be prepared on match */ - prepare_recv_req_converter(req); - } - - if(OPAL_UNLIKELY(NULL == frag)) { - PERUSE_TRACE_COMM_EVENT(PERUSE_COMM_SEARCH_UNEX_Q_END, - &(req->req_recv.req_base), PERUSE_RECV); - /* We didn't find any matches. Record this irecv so we can match - it when the message comes in. */ - append_recv_req_to_queue(queue, req); - req->req_match_received = false; - OPAL_THREAD_UNLOCK(&comm->matching_lock); - } else { - if(OPAL_LIKELY(!IS_PROB_REQ(req))) { - PERUSE_TRACE_COMM_EVENT(PERUSE_COMM_REQ_MATCH_UNEX, - &(req->req_recv.req_base), PERUSE_RECV); - - hdr = (mca_pml_bfo_hdr_t*)frag->segments->seg_addr.pval; - PERUSE_TRACE_MSG_EVENT(PERUSE_COMM_MSG_REMOVE_FROM_UNEX_Q, - req->req_recv.req_base.req_comm, - hdr->hdr_match.hdr_src, - hdr->hdr_match.hdr_tag, - PERUSE_RECV); - - PERUSE_TRACE_COMM_EVENT(PERUSE_COMM_SEARCH_UNEX_Q_END, - &(req->req_recv.req_base), PERUSE_RECV); - - opal_list_remove_item(&proc->unexpected_frags, - (opal_list_item_t*)frag); - OPAL_THREAD_UNLOCK(&comm->matching_lock); - - switch(hdr->hdr_common.hdr_type) { - case MCA_PML_BFO_HDR_TYPE_MATCH: - mca_pml_bfo_recv_request_progress_match(req, frag->btl, frag->segments, - frag->num_segments); - break; - case MCA_PML_BFO_HDR_TYPE_RNDV: - mca_pml_bfo_recv_request_progress_rndv(req, frag->btl, frag->segments, - frag->num_segments); - break; - case MCA_PML_BFO_HDR_TYPE_RGET: - mca_pml_bfo_recv_request_progress_rget(req, frag->btl, frag->segments, - frag->num_segments); - break; - default: - assert(0); - } - - MCA_PML_BFO_RECV_FRAG_RETURN(frag); - - } else if (OPAL_UNLIKELY(IS_MPROB_REQ(req))) { - /* Remove the fragment from the match list, as it's now - matched. Stash it somewhere in the request (which, - yes, is a complete hack), where it will be plucked out - during the end of mprobe. The request will then be - "recreated" as a receive request, and the frag will be - restarted with this request during mrecv */ - opal_list_remove_item(&proc->unexpected_frags, - (opal_list_item_t*)frag); - OPAL_THREAD_UNLOCK(&comm->matching_lock); - - req->req_recv.req_base.req_addr = frag; - mca_pml_bfo_recv_request_matched_probe(req, frag->btl, - frag->segments, frag->num_segments); - - } else { - OPAL_THREAD_UNLOCK(&comm->matching_lock); - mca_pml_bfo_recv_request_matched_probe(req, frag->btl, - frag->segments, frag->num_segments); - } - } -} diff --git a/ompi/mca/pml/bfo/pml_bfo_recvreq.h b/ompi/mca/pml/bfo/pml_bfo_recvreq.h deleted file mode 100644 index 4aa05d1dbda..00000000000 --- a/ompi/mca/pml/bfo/pml_bfo_recvreq.h +++ /dev/null @@ -1,449 +0,0 @@ -/* - * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2014 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2007 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * Copyright (c) 2008 UT-Battelle, LLC. All rights reserved. - * Copyright (c) 2010 Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2011-2012 Los Alamos National Security, LLC. - * All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ -/** - * @file - */ -#ifndef OMPI_PML_BFO_RECV_REQUEST_H -#define OMPI_PML_BFO_RECV_REQUEST_H - -#include "pml_bfo.h" -#include "pml_bfo_rdma.h" -#include "pml_bfo_rdmafrag.h" -#include "ompi/proc/proc.h" -#include "ompi/mca/pml/bfo/pml_bfo_comm.h" -#include "opal/mca/mpool/base/base.h" -#include "ompi/mca/pml/base/pml_base_recvreq.h" -#if PML_BFO -#define RECVREQ_RECVERRSENT 0x01 -#define RECVREQ_RNDVRESTART_RECVED 0x02 -#define RECVREQ_RNDVRESTART_ACKED 0x04 -#endif /* PML_BFO */ - -BEGIN_C_DECLS - -struct mca_pml_bfo_recv_request_t { - mca_pml_base_recv_request_t req_recv; - opal_ptr_t remote_req_send; -#if PML_BFO - int32_t req_msgseq; /* PML sequence number */ - int32_t req_events; /* number of outstanding events on request */ - int32_t req_restartseq; /* sequence number of restarted request */ - int32_t req_errstate; /* state of request if in error */ -#endif /* PML_BFO */ - int32_t req_lock; - size_t req_pipeline_depth; - size_t req_bytes_received; /**< amount of data transferred into the user buffer */ - size_t req_bytes_expected; /**< local size of the data as suggested by the user */ - size_t req_rdma_offset; - size_t req_send_offset; - uint32_t req_rdma_cnt; - uint32_t req_rdma_idx; - bool req_pending; - bool req_ack_sent; /**< whether ack was sent to the sender */ - bool req_match_received; /**< Prevent request to be completed prematurely */ - opal_mutex_t lock; - mca_pml_bfo_com_btl_t req_rdma[1]; -}; -typedef struct mca_pml_bfo_recv_request_t mca_pml_bfo_recv_request_t; - -OBJ_CLASS_DECLARATION(mca_pml_bfo_recv_request_t); - -static inline bool lock_recv_request(mca_pml_bfo_recv_request_t *recvreq) -{ - return OPAL_THREAD_ADD32(&recvreq->req_lock, 1) == 1; -} - -static inline bool unlock_recv_request(mca_pml_bfo_recv_request_t *recvreq) -{ - return OPAL_THREAD_ADD32(&recvreq->req_lock, -1) == 0; -} - -/** - * Allocate a recv request from the modules free list. - * - * @param rc (OUT) OMPI_SUCCESS or error status on failure. - * @return Receive request. - */ -#define MCA_PML_BFO_RECV_REQUEST_ALLOC(recvreq) \ -do { \ - ompi_free_list_item_t* item; \ - OMPI_FREE_LIST_GET_MT(&mca_pml_base_recv_requests, item); \ - recvreq = (mca_pml_bfo_recv_request_t*)item; \ -} while(0) - - -/** - * Initialize a receive request with call parameters. - * - * @param request (IN) Receive request. - * @param addr (IN) User buffer. - * @param count (IN) Number of elements of indicated datatype. - * @param datatype (IN) User defined datatype. - * @param src (IN) Source rank w/in the communicator. - * @param tag (IN) User defined tag. - * @param comm (IN) Communicator. - * @param persistent (IN) Is this a ersistent request. - */ -#define MCA_PML_BFO_RECV_REQUEST_INIT( request, \ - addr, \ - count, \ - datatype, \ - src, \ - tag, \ - comm, \ - persistent) \ -do { \ - MCA_PML_BASE_RECV_REQUEST_INIT( &(request)->req_recv, \ - addr, \ - count, \ - datatype, \ - src, \ - tag, \ - comm, \ - persistent); \ -} while(0) - -/** - * Mark the request as completed at MPI level for internal purposes. - * - * @param recvreq (IN) Receive request. - */ -#define MCA_PML_BFO_RECV_REQUEST_MPI_COMPLETE( recvreq ) \ - do { \ - PERUSE_TRACE_COMM_EVENT( PERUSE_COMM_REQ_COMPLETE, \ - &(recvreq->req_recv.req_base), PERUSE_RECV ); \ - ompi_request_complete( &(recvreq->req_recv.req_base.req_ompi), true ); \ - } while (0) - -/* - * Free the PML receive request - */ -#define MCA_PML_BFO_RECV_REQUEST_RETURN(recvreq) \ - { \ - MCA_PML_BASE_RECV_REQUEST_FINI(&(recvreq)->req_recv); \ - OMPI_FREE_LIST_RETURN_MT( &mca_pml_base_recv_requests, \ - (ompi_free_list_item_t*)(recvreq)); \ - } - -/** - * Complete receive request. Request structure cannot be accessed after calling - * this function any more. - * - * @param recvreq (IN) Receive request. - */ -static inline void -recv_request_pml_complete(mca_pml_bfo_recv_request_t *recvreq) -{ - size_t i; - - assert(false == recvreq->req_recv.req_base.req_pml_complete); - - if(recvreq->req_recv.req_bytes_packed > 0) { - PERUSE_TRACE_COMM_EVENT( PERUSE_COMM_REQ_XFER_END, - &recvreq->req_recv.req_base, PERUSE_RECV ); - } - - for(i = 0; i < recvreq->req_rdma_cnt; i++) { - mca_mpool_base_registration_t* btl_reg = recvreq->req_rdma[i].btl_reg; - if( NULL != btl_reg && btl_reg->mpool != NULL) { - btl_reg->mpool->mpool_deregister( btl_reg->mpool, btl_reg ); - } - } - recvreq->req_rdma_cnt = 0; -#if PML_BFO - recvreq->req_msgseq -= 100; -#endif /* PML_BFO */ - - OPAL_THREAD_LOCK(&ompi_request_lock); - if(true == recvreq->req_recv.req_base.req_free_called) { - if( MPI_SUCCESS != recvreq->req_recv.req_base.req_ompi.req_status.MPI_ERROR ) { - ompi_mpi_abort(&ompi_mpi_comm_world.comm, MPI_ERR_REQUEST); - } - MCA_PML_BFO_RECV_REQUEST_RETURN(recvreq); - } else { - /* initialize request status */ - recvreq->req_recv.req_base.req_pml_complete = true; - recvreq->req_recv.req_base.req_ompi.req_status._ucount = - recvreq->req_bytes_received; - if (recvreq->req_recv.req_bytes_packed > recvreq->req_bytes_expected) { - recvreq->req_recv.req_base.req_ompi.req_status._ucount = - recvreq->req_recv.req_bytes_packed; - recvreq->req_recv.req_base.req_ompi.req_status.MPI_ERROR = - MPI_ERR_TRUNCATE; - } - MCA_PML_BFO_RECV_REQUEST_MPI_COMPLETE(recvreq); - } - OPAL_THREAD_UNLOCK(&ompi_request_lock); -} - -static inline bool -recv_request_pml_complete_check(mca_pml_bfo_recv_request_t *recvreq) -{ -#if OPAL_ENABLE_MULTI_THREADS - opal_atomic_rmb(); -#endif - if(recvreq->req_match_received && - recvreq->req_bytes_received >= recvreq->req_recv.req_bytes_packed && -#if PML_BFO - (0 == recvreq->req_events) && lock_recv_request(recvreq)) { -#else /* PML_BFO */ - lock_recv_request(recvreq)) { -#endif /* PML_BFO */ - recv_request_pml_complete(recvreq); - return true; - } - - return false; -} - -extern void mca_pml_bfo_recv_req_start(mca_pml_bfo_recv_request_t *req); -#define MCA_PML_BFO_RECV_REQUEST_START(r) mca_pml_bfo_recv_req_start(r) - -static inline void prepare_recv_req_converter(mca_pml_bfo_recv_request_t *req) -{ - if( req->req_recv.req_base.req_datatype->super.size | req->req_recv.req_base.req_count ) { - opal_convertor_copy_and_prepare_for_recv( - req->req_recv.req_base.req_proc->super.proc_convertor, - &(req->req_recv.req_base.req_datatype->super), - req->req_recv.req_base.req_count, - req->req_recv.req_base.req_addr, - 0, - &req->req_recv.req_base.req_convertor); - opal_convertor_get_unpacked_size(&req->req_recv.req_base.req_convertor, - &req->req_bytes_expected); - } -} - -#define MCA_PML_BFO_RECV_REQUEST_MATCHED(request, hdr) \ - recv_req_matched(request, hdr) - -static inline void recv_req_matched(mca_pml_bfo_recv_request_t *req, - mca_pml_bfo_match_hdr_t *hdr) -{ - req->req_recv.req_base.req_ompi.req_status.MPI_SOURCE = hdr->hdr_src; - req->req_recv.req_base.req_ompi.req_status.MPI_TAG = hdr->hdr_tag; - req->req_match_received = true; -#if PML_BFO - req->req_msgseq = hdr->hdr_seq; -#endif /* PML_BFO */ -#if OPAL_ENABLE_MULTI_THREADS - opal_atomic_wmb(); -#endif - if(req->req_recv.req_bytes_packed > 0) { -#if OPAL_ENABLE_HETEROGENEOUS_SUPPORT - if(MPI_ANY_SOURCE == req->req_recv.req_base.req_peer) { - /* non wildcard prepared during post recv */ - prepare_recv_req_converter(req); - } -#endif /* OPAL_ENABLE_HETEROGENEOUS_SUPPORT */ - PERUSE_TRACE_COMM_EVENT(PERUSE_COMM_REQ_XFER_BEGIN, - &req->req_recv.req_base, PERUSE_RECV); - } -} - - -/** - * - */ - -#define MCA_PML_BFO_RECV_REQUEST_UNPACK( request, \ - segments, \ - num_segments, \ - seg_offset, \ - data_offset, \ - bytes_received, \ - bytes_delivered) \ -do { \ - bytes_delivered = 0; \ - if(request->req_recv.req_bytes_packed > 0) { \ - struct iovec iov[MCA_BTL_DES_MAX_SEGMENTS]; \ - uint32_t iov_count = 0; \ - size_t max_data = bytes_received; \ - size_t n, offset = seg_offset; \ - mca_btl_base_segment_t* segment = segments; \ - \ - OPAL_THREAD_LOCK(&request->lock); \ - for( n = 0; n < num_segments; n++, segment++ ) { \ - if(offset >= segment->seg_len) { \ - offset -= segment->seg_len; \ - } else { \ - iov[iov_count].iov_len = segment->seg_len - offset; \ - iov[iov_count].iov_base = (IOVBASE_TYPE*) \ - ((unsigned char*)segment->seg_addr.pval + offset); \ - iov_count++; \ - offset = 0; \ - } \ - } \ - PERUSE_TRACE_COMM_OMPI_EVENT (PERUSE_COMM_REQ_XFER_CONTINUE, \ - &(recvreq->req_recv.req_base), max_data, \ - PERUSE_RECV); \ - opal_convertor_set_position( &(request->req_recv.req_base.req_convertor), \ - &data_offset ); \ - opal_convertor_unpack( &(request)->req_recv.req_base.req_convertor, \ - iov, \ - &iov_count, \ - &max_data ); \ - bytes_delivered = max_data; \ - OPAL_THREAD_UNLOCK(&request->lock); \ - } \ -} while (0) - - -/** - * - */ - -void mca_pml_bfo_recv_request_progress_match( - mca_pml_bfo_recv_request_t* req, - struct mca_btl_base_module_t* btl, - mca_btl_base_segment_t* segments, - size_t num_segments); - -/** - * - */ - -void mca_pml_bfo_recv_request_progress_frag( - mca_pml_bfo_recv_request_t* req, - struct mca_btl_base_module_t* btl, - mca_btl_base_segment_t* segments, - size_t num_segments); - -/** - * - */ - -void mca_pml_bfo_recv_request_progress_rndv( - mca_pml_bfo_recv_request_t* req, - struct mca_btl_base_module_t* btl, - mca_btl_base_segment_t* segments, - size_t num_segments); - -/** - * - */ - -void mca_pml_bfo_recv_request_progress_rget( - mca_pml_bfo_recv_request_t* req, - struct mca_btl_base_module_t* btl, - mca_btl_base_segment_t* segments, - size_t num_segments); - -/** - * - */ - -void mca_pml_bfo_recv_request_matched_probe( - mca_pml_bfo_recv_request_t* req, - struct mca_btl_base_module_t* btl, - mca_btl_base_segment_t* segments, - size_t num_segments); - -/** - * - */ - -int mca_pml_bfo_recv_request_schedule_once( - mca_pml_bfo_recv_request_t* req, mca_bml_base_btl_t* start_bml_btl); - -static inline int mca_pml_bfo_recv_request_schedule_exclusive( - mca_pml_bfo_recv_request_t* req, - mca_bml_base_btl_t* start_bml_btl) -{ - int rc; - - do { - rc = mca_pml_bfo_recv_request_schedule_once(req, start_bml_btl); - if(rc == OMPI_ERR_OUT_OF_RESOURCE) - break; - } while(!unlock_recv_request(req)); - - if(OMPI_SUCCESS == rc) - recv_request_pml_complete_check(req); - - return rc; -} - -static inline void mca_pml_bfo_recv_request_schedule( - mca_pml_bfo_recv_request_t* req, - mca_bml_base_btl_t* start_bml_btl) -{ - if(!lock_recv_request(req)) - return; - - (void)mca_pml_bfo_recv_request_schedule_exclusive(req, start_bml_btl); -} - -#define MCA_PML_BFO_ADD_ACK_TO_PENDING(P, S, D, O) \ - do { \ - mca_pml_bfo_pckt_pending_t *_pckt; \ - \ - MCA_PML_BFO_PCKT_PENDING_ALLOC(_pckt); \ - _pckt->hdr.hdr_common.hdr_type = MCA_PML_BFO_HDR_TYPE_ACK; \ - _pckt->hdr.hdr_ack.hdr_src_req.lval = (S); \ - _pckt->hdr.hdr_ack.hdr_dst_req.pval = (D); \ - _pckt->hdr.hdr_ack.hdr_send_offset = (O); \ - _pckt->proc = (P); \ - _pckt->bml_btl = NULL; \ - OPAL_THREAD_LOCK(&mca_pml_bfo.lock); \ - opal_list_append(&mca_pml_bfo.pckt_pending, \ - (opal_list_item_t*)_pckt); \ - OPAL_THREAD_UNLOCK(&mca_pml_bfo.lock); \ - } while(0) - -int mca_pml_bfo_recv_request_ack_send_btl(ompi_proc_t* proc, - mca_bml_base_btl_t* bml_btl, uint64_t hdr_src_req, void *hdr_dst_req, - uint64_t hdr_rdma_offset, bool nordma); - -static inline int mca_pml_bfo_recv_request_ack_send(ompi_proc_t* proc, - uint64_t hdr_src_req, void *hdr_dst_req, uint64_t hdr_send_offset, - bool nordma) -{ - size_t i; - mca_bml_base_btl_t* bml_btl; - mca_bml_base_endpoint_t* endpoint = - (mca_bml_base_endpoint_t*)proc->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_BML]; - - for(i = 0; i < mca_bml_base_btl_array_get_size(&endpoint->btl_eager); i++) { - bml_btl = mca_bml_base_btl_array_get_next(&endpoint->btl_eager); - if(mca_pml_bfo_recv_request_ack_send_btl(proc, bml_btl, hdr_src_req, - hdr_dst_req, hdr_send_offset, nordma) == OMPI_SUCCESS) - return OMPI_SUCCESS; - } - - MCA_PML_BFO_ADD_ACK_TO_PENDING(proc, hdr_src_req, hdr_dst_req, - hdr_send_offset); - - return OMPI_ERR_OUT_OF_RESOURCE; -} - -int mca_pml_bfo_recv_request_get_frag(mca_pml_bfo_rdma_frag_t* frag); - -/* This function tries to continue recvreq that stuck due to resource - * unavailability. Recvreq is added to recv_pending list if scheduling of put - * operation cannot be accomplished for some reason. */ -void mca_pml_bfo_recv_request_process_pending(void); - -END_C_DECLS - -#endif - diff --git a/ompi/mca/pml/bfo/pml_bfo_sendreq.c b/ompi/mca/pml/bfo/pml_bfo_sendreq.c deleted file mode 100644 index 8f5624db29e..00000000000 --- a/ompi/mca/pml/bfo/pml_bfo_sendreq.c +++ /dev/null @@ -1,1404 +0,0 @@ -/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ -/* - * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2013 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * Copyright (c) 2008 UT-Battelle, LLC. All rights reserved. - * Copyright (c) 2010-2012 Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2011-2012 Los Alamos National Security, LLC. - * All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - - -#include "ompi_config.h" -#include "opal/prefetch.h" -#include "opal/mca/btl/btl.h" -#include "opal/mca/mpool/mpool.h" -#include "ompi/constants.h" -#include "ompi/mca/pml/pml.h" -#include "pml_bfo.h" -#include "pml_bfo_hdr.h" -#include "pml_bfo_sendreq.h" -#include "pml_bfo_rdmafrag.h" -#include "pml_bfo_recvreq.h" -#if PML_BFO -#include "pml_bfo_failover.h" -#endif /* PML_BFO */ -#include "ompi/mca/bml/base/base.h" -#include "ompi/memchecker.h" - -OBJ_CLASS_INSTANCE(mca_pml_bfo_send_range_t, ompi_free_list_item_t, - NULL, NULL); - -void mca_pml_bfo_send_request_process_pending(mca_bml_base_btl_t *bml_btl) -{ - int rc, i, s = opal_list_get_size(&mca_pml_bfo.send_pending); - - /* advance pending requests */ - for(i = 0; i < s; i++) { - mca_pml_bfo_send_pending_t pending_type = MCA_PML_BFO_SEND_PENDING_NONE; - mca_pml_bfo_send_request_t* sendreq; - mca_bml_base_btl_t *send_dst; - - sendreq = get_request_from_send_pending(&pending_type); - if(OPAL_UNLIKELY(NULL == sendreq)) - break; - - switch(pending_type) { - case MCA_PML_BFO_SEND_PENDING_SCHEDULE: - rc = mca_pml_bfo_send_request_schedule_exclusive(sendreq); - if(OMPI_ERR_OUT_OF_RESOURCE == rc) { - return; - } - break; - case MCA_PML_BFO_SEND_PENDING_START: - send_dst = mca_bml_base_btl_array_find( - &sendreq->req_endpoint->btl_eager, bml_btl->btl); - if (NULL == send_dst) { - /* Put request back onto pending list and try next one. */ - add_request_to_send_pending(sendreq, - MCA_PML_BFO_SEND_PENDING_START, true); - } else { - rc = mca_pml_bfo_send_request_start_btl(sendreq, send_dst); - if (OMPI_ERR_OUT_OF_RESOURCE == rc) { - /* No more resources on this btl so prepend to the pending - * list to minimize reordering and give up for now. */ - add_request_to_send_pending(sendreq, - MCA_PML_BFO_SEND_PENDING_START, false); - return; - } - } - break; - default: - opal_output(0, "[%s:%d] wrong send request type\n", - __FILE__, __LINE__); - break; - } - } -} - -/* - * The free call mark the final stage in a request life-cycle. Starting from this - * point the request is completed at both PML and user level, and can be used - * for others p2p communications. Therefore, in the case of the BFO PML it should - * be added to the free request list. - */ -static int mca_pml_bfo_send_request_free(struct ompi_request_t** request) -{ - mca_pml_bfo_send_request_t* sendreq = *(mca_pml_bfo_send_request_t**)request; - - assert( false == sendreq->req_send.req_base.req_free_called ); - - OPAL_THREAD_LOCK(&ompi_request_lock); - sendreq->req_send.req_base.req_free_called = true; - - PERUSE_TRACE_COMM_EVENT( PERUSE_COMM_REQ_NOTIFY, - &(sendreq->req_send.req_base), PERUSE_SEND ); - - if( true == sendreq->req_send.req_base.req_pml_complete ) { - /* make buffer defined when the request is compeleted, - and before releasing the objects. */ - MEMCHECKER( - memchecker_call(&opal_memchecker_base_mem_defined, - sendreq->req_send.req_base.req_addr, - sendreq->req_send.req_base.req_count, - sendreq->req_send.req_base.req_datatype); - ); - - MCA_PML_BFO_SEND_REQUEST_RETURN( sendreq ); - } - - OPAL_THREAD_UNLOCK(&ompi_request_lock); - - *request = MPI_REQUEST_NULL; - return OMPI_SUCCESS; -} - -static int mca_pml_bfo_send_request_cancel(struct ompi_request_t* request, int complete) -{ - /* we dont cancel send requests by now */ - return OMPI_SUCCESS; -} - -static void mca_pml_bfo_send_request_construct(mca_pml_bfo_send_request_t* req) -{ - req->req_send.req_base.req_type = MCA_PML_REQUEST_SEND; - req->req_send.req_base.req_ompi.req_free = mca_pml_bfo_send_request_free; - req->req_send.req_base.req_ompi.req_cancel = mca_pml_bfo_send_request_cancel; - req->req_rdma_cnt = 0; - req->req_throttle_sends = false; - OBJ_CONSTRUCT(&req->req_send_ranges, opal_list_t); - OBJ_CONSTRUCT(&req->req_send_range_lock, opal_mutex_t); -} - -static void mca_pml_bfo_send_request_destruct(mca_pml_bfo_send_request_t* req) -{ - OBJ_DESTRUCT(&req->req_send_ranges); - OBJ_DESTRUCT(&req->req_send_range_lock); -} - -OBJ_CLASS_INSTANCE( mca_pml_bfo_send_request_t, - mca_pml_base_send_request_t, - mca_pml_bfo_send_request_construct, - mca_pml_bfo_send_request_destruct ); - -/** - * Completion of a short message - nothing left to schedule. - */ - -static inline void -mca_pml_bfo_match_completion_free_request( mca_bml_base_btl_t* bml_btl, - mca_pml_bfo_send_request_t* sendreq ) -{ - if( sendreq->req_send.req_bytes_packed > 0 ) { - PERUSE_TRACE_COMM_EVENT( PERUSE_COMM_REQ_XFER_BEGIN, - &(sendreq->req_send.req_base), PERUSE_SEND ); - } - - /* signal request completion */ - send_request_pml_complete(sendreq); - - /* check for pending requests */ - MCA_PML_BFO_PROGRESS_PENDING(bml_btl); -} - -static void -mca_pml_bfo_match_completion_free( struct mca_btl_base_module_t* btl, - struct mca_btl_base_endpoint_t* ep, - struct mca_btl_base_descriptor_t* des, - int status ) -{ - mca_pml_bfo_send_request_t* sendreq = (mca_pml_bfo_send_request_t*)des->des_cbdata; - mca_bml_base_btl_t* bml_btl = (mca_bml_base_btl_t*) des->des_context; - - /* check completion status */ - if( OPAL_UNLIKELY(OMPI_SUCCESS != status) ) { -#if PML_BFO - mca_pml_bfo_repost_match_fragment(des); - return; -#else /* PML_BFO */ - /* TSW - FIX */ - opal_output(0, "%s:%d FATAL", __FILE__, __LINE__); - ompi_rte_abort(-1, NULL); -#endif /* PML_BFO */ - } -#if PML_BFO - MCA_PML_BFO_CHECK_SENDREQ_EAGER_BML_BTL(bml_btl, btl, sendreq, "MATCH"); -#endif /* PML_BFO */ - mca_pml_bfo_match_completion_free_request( bml_btl, sendreq ); -} - -static inline void -mca_pml_bfo_rndv_completion_request( mca_bml_base_btl_t* bml_btl, - mca_pml_bfo_send_request_t* sendreq, - size_t req_bytes_delivered ) -{ - if( sendreq->req_send.req_bytes_packed > 0 ) { - PERUSE_TRACE_COMM_EVENT( PERUSE_COMM_REQ_XFER_BEGIN, - &(sendreq->req_send.req_base), PERUSE_SEND ); - } - - OPAL_THREAD_ADD_SIZE_T(&sendreq->req_bytes_delivered, req_bytes_delivered); - - /* advance the request */ - OPAL_THREAD_ADD32(&sendreq->req_state, -1); - - send_request_pml_complete_check(sendreq); - - /* check for pending requests */ - MCA_PML_BFO_PROGRESS_PENDING(bml_btl); -} - -/* - * Completion of the first fragment of a long message that - * requires an acknowledgement - */ -static void -mca_pml_bfo_rndv_completion( mca_btl_base_module_t* btl, - struct mca_btl_base_endpoint_t* ep, - struct mca_btl_base_descriptor_t* des, - int status ) -{ - mca_pml_bfo_send_request_t* sendreq = (mca_pml_bfo_send_request_t*)des->des_cbdata; - mca_bml_base_btl_t* bml_btl = (mca_bml_base_btl_t*)des->des_context; - size_t req_bytes_delivered; - - /* check completion status */ - if( OPAL_UNLIKELY(OMPI_SUCCESS != status) ) { -#if PML_BFO - if (true == mca_pml_bfo_rndv_completion_status_error(des, sendreq)) - return; -#else /* PML_BFO */ - /* TSW - FIX */ - opal_output(0, "%s:%d FATAL", __FILE__, __LINE__); - ompi_rte_abort(-1, NULL); -#endif /* PML_BFO */ - } -#if PML_BFO - sendreq->req_events--; - MCA_PML_BFO_RNDV_COMPLETION_SENDREQ_ERROR_CHECK(sendreq, status, btl, - MCA_PML_BFO_HDR_TYPE_RNDV, "RNDV"); -#endif /* PML_BFO */ - - /* count bytes of user data actually delivered. As the rndv completion only - * happens in one thread, the increase of the req_bytes_delivered does not - * have to be atomic. - */ - req_bytes_delivered = mca_pml_bfo_compute_segment_length (btl->btl_seg_size, - (void *) des->des_local, - des->des_local_count, - sizeof(mca_pml_bfo_rendezvous_hdr_t)); - -#if PML_BFO - MCA_PML_BFO_CHECK_SENDREQ_EAGER_BML_BTL(bml_btl, btl, sendreq, "RNDV"); -#endif /* PML_BFO */ - mca_pml_bfo_rndv_completion_request( bml_btl, sendreq, req_bytes_delivered ); -} - - -/** - * Completion of a get request. - */ - -static void -mca_pml_bfo_rget_completion( mca_btl_base_module_t* btl, - struct mca_btl_base_endpoint_t* ep, - struct mca_btl_base_descriptor_t* des, - int status ) -{ - mca_pml_bfo_send_request_t* sendreq = (mca_pml_bfo_send_request_t*)des->des_cbdata; - mca_bml_base_btl_t* bml_btl = (mca_bml_base_btl_t*)des->des_context; - size_t req_bytes_delivered; -#if PML_BFO - MCA_PML_BFO_RGET_COMPLETION_SENDREQ_ERROR_CHECK(sendreq, btl, des); -#endif /* PML_BFO */ - - /* count bytes of user data actually delivered and check for request completion */ - req_bytes_delivered = mca_pml_bfo_compute_segment_length (btl->btl_seg_size, - (void *) des->des_local, - des->des_local_count, 0); - OPAL_THREAD_ADD_SIZE_T(&sendreq->req_bytes_delivered, req_bytes_delivered); - - send_request_pml_complete_check(sendreq); - /* free the descriptor */ -#if PML_BFO - btl->btl_free(btl, des); - MCA_PML_BFO_CHECK_SENDREQ_RDMA_BML_BTL(bml_btl, btl, sendreq, "RGET"); -#else /* PML_BFO */ - mca_bml_base_free(bml_btl, des); -#endif /* PML_BFO */ - MCA_PML_BFO_PROGRESS_PENDING(bml_btl); -} - - -/** - * Completion of a control message - return resources. - */ - -static void -mca_pml_bfo_send_ctl_completion( mca_btl_base_module_t* btl, - struct mca_btl_base_endpoint_t* ep, - struct mca_btl_base_descriptor_t* des, - int status ) -{ - mca_bml_base_btl_t* bml_btl = (mca_bml_base_btl_t*) des->des_context; - -#if PML_BFO - if(OPAL_UNLIKELY(OMPI_SUCCESS != status)) { - mca_pml_bfo_send_ctl_completion_status_error(des); - return; - } - MCA_PML_BFO_CHECK_SENDREQ_EAGER_BML_BTL(bml_btl, btl, des->des_cbdata, "RGET"); -#endif /* PML_BFO */ - /* check for pending requests */ - MCA_PML_BFO_PROGRESS_PENDING(bml_btl); -} - -/** - * Completion of additional fragments of a large message - may need - * to schedule additional fragments. - */ - -static void -mca_pml_bfo_frag_completion( mca_btl_base_module_t* btl, - struct mca_btl_base_endpoint_t* ep, - struct mca_btl_base_descriptor_t* des, - int status ) -{ - mca_pml_bfo_send_request_t* sendreq = (mca_pml_bfo_send_request_t*)des->des_cbdata; - mca_bml_base_btl_t* bml_btl = (mca_bml_base_btl_t*) des->des_context; - size_t req_bytes_delivered; -#if PML_BFO - sendreq->req_events--; -#endif /* PML_BFO */ - - /* check completion status */ - if( OPAL_UNLIKELY(OMPI_SUCCESS != status) ) { -#if PML_BFO - sendreq->req_error++; -#else /* PML_BFO */ - /* TSW - FIX */ - opal_output(0, "%s:%d FATAL", __FILE__, __LINE__); - ompi_rte_abort(-1, NULL); -#endif /* PML_BFO */ - } - - /* count bytes of user data actually delivered */ - req_bytes_delivered = mca_pml_bfo_compute_segment_length (btl->btl_seg_size, - (void *) des->des_local, - des->des_local_count, - sizeof(mca_pml_bfo_frag_hdr_t)); - - OPAL_THREAD_ADD_SIZE_T(&sendreq->req_pipeline_depth, -1); - OPAL_THREAD_ADD_SIZE_T(&sendreq->req_bytes_delivered, req_bytes_delivered); - -#if PML_BFO - MCA_PML_BFO_FRAG_COMPLETION_SENDREQ_ERROR_CHECK(sendreq, status, btl, - MCA_PML_BFO_HDR_TYPE_FRAG, "FRAG"); -#endif /* PML_BFO */ - if(send_request_pml_complete_check(sendreq) == false) { - mca_pml_bfo_send_request_schedule(sendreq); -#if PML_BFO - MCA_PML_BFO_FRAG_COMPLETION_SENDREQ_ERROR_CHECK(sendreq, status, btl, - MCA_PML_BFO_HDR_TYPE_FRAG, - "FRAG (BTL removal)"); -#endif /* PML_BFO */ - } - - /* check for pending requests */ -#if PML_BFO - MCA_PML_BFO_CHECK_SENDREQ_EAGER_BML_BTL(bml_btl, btl, sendreq, "FRAG"); -#endif /* PML_BFO */ - MCA_PML_BFO_PROGRESS_PENDING(bml_btl); -} - -/** - * Buffer the entire message and mark as complete. - */ - -int mca_pml_bfo_send_request_start_buffered( - mca_pml_bfo_send_request_t* sendreq, - mca_bml_base_btl_t* bml_btl, - size_t size) -{ - mca_btl_base_descriptor_t* des; - mca_btl_base_segment_t* segment; - mca_pml_bfo_hdr_t* hdr; - struct iovec iov; - unsigned int iov_count; - size_t max_data, req_bytes_delivered; - int rc; - - /* allocate descriptor */ - mca_bml_base_alloc(bml_btl, &des, - MCA_BTL_NO_ORDER, - sizeof(mca_pml_bfo_rendezvous_hdr_t) + size, - MCA_BTL_DES_FLAGS_PRIORITY | MCA_BTL_DES_FLAGS_BTL_OWNERSHIP); - if( OPAL_UNLIKELY(NULL == des) ) { - return OMPI_ERR_OUT_OF_RESOURCE; - } - segment = des->des_local; - - /* pack the data into the BTL supplied buffer */ - iov.iov_base = (IOVBASE_TYPE*)((unsigned char*)segment->seg_addr.pval + - sizeof(mca_pml_bfo_rendezvous_hdr_t)); - iov.iov_len = size; - iov_count = 1; - max_data = size; - if((rc = opal_convertor_pack( &sendreq->req_send.req_base.req_convertor, - &iov, - &iov_count, - &max_data)) < 0) { - mca_bml_base_free(bml_btl, des); - return rc; - } - req_bytes_delivered = max_data; - - /* build rendezvous header */ - hdr = (mca_pml_bfo_hdr_t*)segment->seg_addr.pval; - hdr->hdr_common.hdr_flags = 0; - hdr->hdr_common.hdr_type = MCA_PML_BFO_HDR_TYPE_RNDV; - hdr->hdr_match.hdr_ctx = sendreq->req_send.req_base.req_comm->c_contextid; - hdr->hdr_match.hdr_src = sendreq->req_send.req_base.req_comm->c_my_rank; - hdr->hdr_match.hdr_tag = sendreq->req_send.req_base.req_tag; - hdr->hdr_match.hdr_seq = (uint16_t)sendreq->req_send.req_base.req_sequence; - hdr->hdr_rndv.hdr_msg_length = sendreq->req_send.req_bytes_packed; - hdr->hdr_rndv.hdr_src_req.pval = sendreq; -#if PML_BFO - MCA_PML_BFO_CHECK_FOR_RNDV_RESTART(hdr, sendreq, "RNDV(buffered)"); -#endif /* PML_BFO */ - - bfo_hdr_hton(hdr, MCA_PML_BFO_HDR_TYPE_RNDV, - sendreq->req_send.req_base.req_proc); - - /* update lengths */ - segment->seg_len = sizeof(mca_pml_bfo_rendezvous_hdr_t) + max_data; - - des->des_cbfunc = mca_pml_bfo_rndv_completion; - des->des_cbdata = sendreq; - - /* buffer the remainder of the message */ - rc = mca_pml_base_bsend_request_alloc((ompi_request_t*)sendreq); - if( OPAL_UNLIKELY(OMPI_SUCCESS != rc) ) { - mca_bml_base_free(bml_btl, des); - return rc; - } - - iov.iov_base = (IOVBASE_TYPE*)(((unsigned char*)sendreq->req_send.req_addr) + max_data); - iov.iov_len = max_data = sendreq->req_send.req_bytes_packed - max_data; - - if((rc = opal_convertor_pack( &sendreq->req_send.req_base.req_convertor, - &iov, - &iov_count, - &max_data)) < 0) { - mca_bml_base_free(bml_btl, des); - return rc; - } - - /* re-init convertor for packed data */ - opal_convertor_prepare_for_send( &sendreq->req_send.req_base.req_convertor, - &(ompi_mpi_byte.dt.super), - sendreq->req_send.req_bytes_packed, - sendreq->req_send.req_addr ); - - /* wait for ack and completion */ - sendreq->req_state = 2; - - /* request is complete at mpi level */ - OPAL_THREAD_LOCK(&ompi_request_lock); - MCA_PML_BFO_SEND_REQUEST_MPI_COMPLETE(sendreq, true); - OPAL_THREAD_UNLOCK(&ompi_request_lock); - - /* send */ - rc = mca_bml_base_send(bml_btl, des, MCA_PML_BFO_HDR_TYPE_RNDV); - if( OPAL_LIKELY( rc >= 0 ) ) { - if( OPAL_LIKELY( 1 == rc ) ) { - mca_pml_bfo_rndv_completion_request( bml_btl, sendreq, req_bytes_delivered); - } -#if PML_BFO - if (des->des_flags & MCA_BTL_DES_SEND_ALWAYS_CALLBACK) { - sendreq->req_events++; - } -#endif /* PML_BFO */ - return OMPI_SUCCESS; - } - mca_bml_base_free(bml_btl, des ); - return rc; -} - - -/** - * We work on a buffered request with a size smaller than the eager size - * or the BTL is not able to send the data IN_PLACE. Request a segment - * that is used for initial hdr and any eager data. This is used only - * from the _START macro. - */ -int mca_pml_bfo_send_request_start_copy( mca_pml_bfo_send_request_t* sendreq, - mca_bml_base_btl_t* bml_btl, - size_t size ) -{ - mca_btl_base_descriptor_t* des = NULL; - mca_btl_base_segment_t* segment; - mca_pml_bfo_hdr_t* hdr; - struct iovec iov; - unsigned int iov_count; - size_t max_data = size; - int rc; - - if(NULL != bml_btl->btl->btl_sendi) { - mca_pml_bfo_match_hdr_t match; - match.hdr_common.hdr_flags = 0; - match.hdr_common.hdr_type = MCA_PML_BFO_HDR_TYPE_MATCH; - match.hdr_ctx = sendreq->req_send.req_base.req_comm->c_contextid; - match.hdr_src = sendreq->req_send.req_base.req_comm->c_my_rank; - match.hdr_tag = sendreq->req_send.req_base.req_tag; - match.hdr_seq = (uint16_t)sendreq->req_send.req_base.req_sequence; - - bfo_hdr_hton(&match, MCA_PML_BFO_HDR_TYPE_MATCH, - sendreq->req_send.req_base.req_proc); - - /* try to send immediately */ - rc = mca_bml_base_sendi( bml_btl, &sendreq->req_send.req_base.req_convertor, - &match, OMPI_PML_BFO_MATCH_HDR_LEN, - size, MCA_BTL_NO_ORDER, - MCA_BTL_DES_FLAGS_PRIORITY | MCA_BTL_DES_FLAGS_BTL_OWNERSHIP, - MCA_PML_BFO_HDR_TYPE_MATCH, - &des); - if( OPAL_LIKELY(OMPI_SUCCESS == rc) ) { -#if PML_BFO - /* Needed in case of failover */ - if (NULL != des) { - des->des_cbfunc = mca_pml_bfo_match_completion_free; - des->des_cbdata = sendreq->req_endpoint; - } -#endif /* PML_BFO */ - /* signal request completion */ - send_request_pml_complete(sendreq); - - /* check for pending requests */ - MCA_PML_BFO_PROGRESS_PENDING(bml_btl); - return OMPI_SUCCESS; - } - } else { - /* allocate descriptor */ - mca_bml_base_alloc( bml_btl, &des, - MCA_BTL_NO_ORDER, - OMPI_PML_BFO_MATCH_HDR_LEN + size, - MCA_BTL_DES_FLAGS_PRIORITY | MCA_BTL_DES_FLAGS_BTL_OWNERSHIP); - } - if( OPAL_UNLIKELY(NULL == des) ) { - return OMPI_ERR_OUT_OF_RESOURCE; - } - - segment = des->des_local; - - if(size > 0) { - /* pack the data into the supplied buffer */ - iov.iov_base = (IOVBASE_TYPE*)((unsigned char*)segment->seg_addr.pval + - OMPI_PML_BFO_MATCH_HDR_LEN); - iov.iov_len = size; - iov_count = 1; - /* - * Before copy the user buffer, make the target part - * accessible. - */ - MEMCHECKER( - memchecker_call(&opal_memchecker_base_mem_defined, - sendreq->req_send.req_base.req_addr, - sendreq->req_send.req_base.req_count, - sendreq->req_send.req_base.req_datatype); - ); - (void)opal_convertor_pack( &sendreq->req_send.req_base.req_convertor, - &iov, &iov_count, &max_data ); - /* - * Packing finished, make the user buffer unaccessable. - */ - MEMCHECKER( - memchecker_call(&opal_memchecker_base_mem_noaccess, - sendreq->req_send.req_base.req_addr, - sendreq->req_send.req_base.req_count, - sendreq->req_send.req_base.req_datatype); - ); - } - - - /* build match header */ - hdr = (mca_pml_bfo_hdr_t*)segment->seg_addr.pval; - hdr->hdr_common.hdr_flags = 0; - hdr->hdr_common.hdr_type = MCA_PML_BFO_HDR_TYPE_MATCH; - hdr->hdr_match.hdr_ctx = sendreq->req_send.req_base.req_comm->c_contextid; - hdr->hdr_match.hdr_src = sendreq->req_send.req_base.req_comm->c_my_rank; - hdr->hdr_match.hdr_tag = sendreq->req_send.req_base.req_tag; - hdr->hdr_match.hdr_seq = (uint16_t)sendreq->req_send.req_base.req_sequence; - - bfo_hdr_hton(hdr, MCA_PML_BFO_HDR_TYPE_MATCH, - sendreq->req_send.req_base.req_proc); - - /* update lengths */ - segment->seg_len = OMPI_PML_BFO_MATCH_HDR_LEN + max_data; - - /* short message */ - des->des_cbdata = sendreq; - des->des_cbfunc = mca_pml_bfo_match_completion_free; - - /* send */ - rc = mca_bml_base_send_status(bml_btl, des, MCA_PML_BFO_HDR_TYPE_MATCH); - if( OPAL_LIKELY( rc >= OMPI_SUCCESS ) ) { - if( OPAL_LIKELY( 1 == rc ) ) { - mca_pml_bfo_match_completion_free_request( bml_btl, sendreq ); - } - return OMPI_SUCCESS; - } - if (OMPI_ERR_RESOURCE_BUSY == rc) { - /* No more resources. Allow the upper level to queue the send */ - rc = OMPI_ERR_OUT_OF_RESOURCE; - } - - mca_bml_base_free (bml_btl, des); - - return rc; -} - -/** - * BTL can send directly from user buffer so allow the BTL - * to prepare the segment list. Start sending a small message. - */ - -int mca_pml_bfo_send_request_start_prepare( mca_pml_bfo_send_request_t* sendreq, - mca_bml_base_btl_t* bml_btl, - size_t size ) -{ - mca_btl_base_descriptor_t* des; - mca_btl_base_segment_t* segment; - mca_pml_bfo_hdr_t* hdr; - int rc; - - /* prepare descriptor */ - mca_bml_base_prepare_src( bml_btl, - NULL, - &sendreq->req_send.req_base.req_convertor, - MCA_BTL_NO_ORDER, - OMPI_PML_BFO_MATCH_HDR_LEN, - &size, - MCA_BTL_DES_FLAGS_PRIORITY | MCA_BTL_DES_FLAGS_BTL_OWNERSHIP, - &des ); - if( OPAL_UNLIKELY(NULL == des) ) { - return OMPI_ERR_OUT_OF_RESOURCE; - } - segment = des->des_local; - - /* build match header */ - hdr = (mca_pml_bfo_hdr_t*)segment->seg_addr.pval; - hdr->hdr_common.hdr_flags = 0; - hdr->hdr_common.hdr_type = MCA_PML_BFO_HDR_TYPE_MATCH; - hdr->hdr_match.hdr_ctx = sendreq->req_send.req_base.req_comm->c_contextid; - hdr->hdr_match.hdr_src = sendreq->req_send.req_base.req_comm->c_my_rank; - hdr->hdr_match.hdr_tag = sendreq->req_send.req_base.req_tag; - hdr->hdr_match.hdr_seq = (uint16_t)sendreq->req_send.req_base.req_sequence; - - bfo_hdr_hton(hdr, MCA_PML_BFO_HDR_TYPE_MATCH, - sendreq->req_send.req_base.req_proc); - - /* short message */ - des->des_cbfunc = mca_pml_bfo_match_completion_free; - des->des_cbdata = sendreq; - - /* send */ - rc = mca_bml_base_send(bml_btl, des, MCA_PML_BFO_HDR_TYPE_MATCH); - if( OPAL_LIKELY( rc >= 0 ) ) { - if( OPAL_LIKELY( 1 == rc ) ) { - mca_pml_bfo_match_completion_free_request( bml_btl, sendreq ); - } - return OMPI_SUCCESS; - } - mca_bml_base_free(bml_btl, des ); - return rc; -} - - -/** - * We have contigous data that is registered - schedule across - * available nics. - */ - -int mca_pml_bfo_send_request_start_rdma( mca_pml_bfo_send_request_t* sendreq, - mca_bml_base_btl_t* bml_btl, - size_t size ) -{ - /* - * When req_rdma array is constructed the first element of the array always - * assigned different btl in round robin fashion (if there are more than - * one RDMA capable BTLs). This way round robin distribution of RDMA - * operation is achieved. - */ - - mca_btl_base_descriptor_t* des, *src = NULL; - mca_btl_base_segment_t* segment; - mca_pml_bfo_hdr_t* hdr; - bool need_local_cb = false; - int rc; - - bml_btl = sendreq->req_rdma[0].bml_btl; - if((sendreq->req_rdma_cnt == 1) && (bml_btl->btl_flags & (MCA_BTL_FLAGS_GET | MCA_BTL_FLAGS_CUDA_GET))) { - mca_mpool_base_registration_t* reg = sendreq->req_rdma[0].btl_reg; - size_t seg_size; - size_t old_position = sendreq->req_send.req_base.req_convertor.bConverted; - - MEMCHECKER( - memchecker_call(&opal_memchecker_base_mem_defined, - sendreq->req_send.req_base.req_addr, - sendreq->req_send.req_base.req_count, - sendreq->req_send.req_base.req_datatype); - ); - /* prepare source descriptor/segment(s) */ - /* PML owns this descriptor and will free it in */ - /* get_completion */ - mca_bml_base_prepare_src( bml_btl, - reg, - &sendreq->req_send.req_base.req_convertor, - MCA_BTL_NO_ORDER, - 0, - &size, - MCA_BTL_DES_FLAGS_GET, - &src ); - MEMCHECKER( - memchecker_call(&opal_memchecker_base_mem_noaccess, - sendreq->req_send.req_base.req_addr, - sendreq->req_send.req_base.req_count, - sendreq->req_send.req_base.req_datatype); - ); - if( OPAL_UNLIKELY(NULL == src) ) { - opal_convertor_set_position(&sendreq->req_send.req_base.req_convertor, - &old_position); - return OMPI_ERR_OUT_OF_RESOURCE; - } - src->des_cbfunc = mca_pml_bfo_rget_completion; - src->des_cbdata = sendreq; - - seg_size = bml_btl->btl->btl_seg_size * src->des_local_count; - - /* allocate space for get hdr + segment list */ - mca_bml_base_alloc(bml_btl, &des, MCA_BTL_NO_ORDER, - sizeof(mca_pml_bfo_rget_hdr_t) + seg_size, - MCA_BTL_DES_FLAGS_PRIORITY | MCA_BTL_DES_FLAGS_BTL_OWNERSHIP); - if( OPAL_UNLIKELY(NULL == des) ) { - opal_convertor_set_position( &sendreq->req_send.req_base.req_convertor, - &old_position ); - mca_bml_base_free(bml_btl, src); - return OMPI_ERR_OUT_OF_RESOURCE; - } - segment = des->des_local; - - /* build match header */ - hdr = (mca_pml_bfo_hdr_t*)segment->seg_addr.pval; - hdr->hdr_common.hdr_flags = MCA_PML_BFO_HDR_FLAGS_CONTIG|MCA_PML_BFO_HDR_FLAGS_PIN; - hdr->hdr_common.hdr_type = MCA_PML_BFO_HDR_TYPE_RGET; - hdr->hdr_match.hdr_ctx = sendreq->req_send.req_base.req_comm->c_contextid; - hdr->hdr_match.hdr_src = sendreq->req_send.req_base.req_comm->c_my_rank; - hdr->hdr_match.hdr_tag = sendreq->req_send.req_base.req_tag; - hdr->hdr_match.hdr_seq = (uint16_t)sendreq->req_send.req_base.req_sequence; - hdr->hdr_rndv.hdr_msg_length = sendreq->req_send.req_bytes_packed; - hdr->hdr_rndv.hdr_src_req.pval = sendreq; -#if PML_BFO - MCA_PML_BFO_CHECK_FOR_RNDV_RESTART(hdr, sendreq, "RGET"); -#endif /* PML_BFO */ - hdr->hdr_rget.hdr_des.pval = src; - hdr->hdr_rget.hdr_seg_cnt = src->des_local_count; - - bfo_hdr_hton(hdr, MCA_PML_BFO_HDR_TYPE_RGET, - sendreq->req_send.req_base.req_proc); - - /* copy segment data */ - memmove (&hdr->hdr_rget + 1, src->des_local, seg_size); - - des->des_cbfunc = mca_pml_bfo_send_ctl_completion; - - /** - * Well, it's a get so we will not know when the peer get the data anyway. - * If we generate the PERUSE event here, at least we will know when do we - * sent the GET message ... - */ - if( sendreq->req_send.req_bytes_packed > 0 ) { - PERUSE_TRACE_COMM_EVENT( PERUSE_COMM_REQ_XFER_BEGIN, - &(sendreq->req_send.req_base), PERUSE_SEND ); - } - - } else { - - /* allocate a rendezvous header - dont eager send any data - * receiver will schedule rdma put(s) of the entire message - */ - - mca_bml_base_alloc(bml_btl, &des, - MCA_BTL_NO_ORDER, - sizeof(mca_pml_bfo_rendezvous_hdr_t), - MCA_BTL_DES_FLAGS_PRIORITY | MCA_BTL_DES_FLAGS_BTL_OWNERSHIP); - if( OPAL_UNLIKELY(NULL == des)) { - return OMPI_ERR_OUT_OF_RESOURCE; - } - segment = des->des_local; - - /* build hdr */ - hdr = (mca_pml_bfo_hdr_t*)segment->seg_addr.pval; - hdr->hdr_common.hdr_flags = MCA_PML_BFO_HDR_FLAGS_CONTIG|MCA_PML_BFO_HDR_FLAGS_PIN; - hdr->hdr_common.hdr_type = MCA_PML_BFO_HDR_TYPE_RNDV; - hdr->hdr_match.hdr_ctx = sendreq->req_send.req_base.req_comm->c_contextid; - hdr->hdr_match.hdr_src = sendreq->req_send.req_base.req_comm->c_my_rank; - hdr->hdr_match.hdr_tag = sendreq->req_send.req_base.req_tag; - hdr->hdr_match.hdr_seq = (uint16_t)sendreq->req_send.req_base.req_sequence; - hdr->hdr_rndv.hdr_msg_length = sendreq->req_send.req_bytes_packed; - hdr->hdr_rndv.hdr_src_req.pval = sendreq; -#if PML_BFO - MCA_PML_BFO_CHECK_FOR_RNDV_RESTART(hdr, sendreq, "RNDV"); -#endif /* PML_BFO */ - - bfo_hdr_hton(hdr, MCA_PML_BFO_HDR_TYPE_RNDV, - sendreq->req_send.req_base.req_proc); - - /* update lengths with number of bytes actually packed */ - segment->seg_len = sizeof(mca_pml_bfo_rendezvous_hdr_t); - - /* first fragment of a long message */ - des->des_cbfunc = mca_pml_bfo_rndv_completion; - need_local_cb = true; - - /* wait for ack and completion */ - sendreq->req_state = 2; - } - - des->des_cbdata = sendreq; - - /* send */ - rc = mca_bml_base_send(bml_btl, des, hdr->hdr_common.hdr_type); - if( OPAL_LIKELY( rc >= 0 ) ) { - if( OPAL_LIKELY( 1 == rc ) && (true == need_local_cb)) { - mca_pml_bfo_rndv_completion_request( bml_btl, sendreq, 0 ); - } -#if PML_BFO - if (MCA_PML_BFO_HDR_TYPE_RNDV == hdr->hdr_common.hdr_type) { - if (des->des_flags & MCA_BTL_DES_SEND_ALWAYS_CALLBACK) { - sendreq->req_events++; - } - } -#endif /* PML_BFO */ - return OMPI_SUCCESS; - } - mca_bml_base_free(bml_btl, des); - if (NULL != src) { - mca_bml_base_free (bml_btl, src); - } - - return rc; -} - - -/** - * Rendezvous is required. Not doing rdma so eager send up to - * the btls eager limit. - */ - -int mca_pml_bfo_send_request_start_rndv( mca_pml_bfo_send_request_t* sendreq, - mca_bml_base_btl_t* bml_btl, - size_t size, - int flags ) -{ - mca_btl_base_descriptor_t* des; - mca_btl_base_segment_t* segment; - mca_pml_bfo_hdr_t* hdr; - int rc; - - /* prepare descriptor */ - if(size == 0) { - mca_bml_base_alloc( bml_btl, - &des, - MCA_BTL_NO_ORDER, - sizeof(mca_pml_bfo_rendezvous_hdr_t), - MCA_BTL_DES_FLAGS_PRIORITY | MCA_BTL_DES_FLAGS_BTL_OWNERSHIP ); - } else { - MEMCHECKER( - memchecker_call(&opal_memchecker_base_mem_defined, - sendreq->req_send.req_base.req_addr, - sendreq->req_send.req_base.req_count, - sendreq->req_send.req_base.req_datatype); - ); - mca_bml_base_prepare_src( bml_btl, - NULL, - &sendreq->req_send.req_base.req_convertor, - MCA_BTL_NO_ORDER, - sizeof(mca_pml_bfo_rendezvous_hdr_t), - &size, - MCA_BTL_DES_FLAGS_PRIORITY | MCA_BTL_DES_FLAGS_BTL_OWNERSHIP, - &des ); - MEMCHECKER( - memchecker_call(&opal_memchecker_base_mem_noaccess, - sendreq->req_send.req_base.req_addr, - sendreq->req_send.req_base.req_count, - sendreq->req_send.req_base.req_datatype); - ); - } - - if( OPAL_UNLIKELY(NULL == des) ) { - return OMPI_ERR_OUT_OF_RESOURCE; - } - segment = des->des_local; - - /* build hdr */ - hdr = (mca_pml_bfo_hdr_t*)segment->seg_addr.pval; - hdr->hdr_common.hdr_flags = flags; - hdr->hdr_common.hdr_type = MCA_PML_BFO_HDR_TYPE_RNDV; - hdr->hdr_match.hdr_ctx = sendreq->req_send.req_base.req_comm->c_contextid; - hdr->hdr_match.hdr_src = sendreq->req_send.req_base.req_comm->c_my_rank; - hdr->hdr_match.hdr_tag = sendreq->req_send.req_base.req_tag; - hdr->hdr_match.hdr_seq = (uint16_t)sendreq->req_send.req_base.req_sequence; - hdr->hdr_rndv.hdr_msg_length = sendreq->req_send.req_bytes_packed; - hdr->hdr_rndv.hdr_src_req.pval = sendreq; -#if PML_BFO - MCA_PML_BFO_CHECK_FOR_RNDV_RESTART(hdr, sendreq, "RNDV"); -#endif /* PML_BFO */ - - bfo_hdr_hton(hdr, MCA_PML_BFO_HDR_TYPE_RNDV, - sendreq->req_send.req_base.req_proc); - - /* first fragment of a long message */ - des->des_cbdata = sendreq; - des->des_cbfunc = mca_pml_bfo_rndv_completion; - - /* wait for ack and completion */ - sendreq->req_state = 2; - - /* send */ - rc = mca_bml_base_send(bml_btl, des, MCA_PML_BFO_HDR_TYPE_RNDV); - if( OPAL_LIKELY( rc >= 0 ) ) { - if( OPAL_LIKELY( 1 == rc ) ) { - mca_pml_bfo_rndv_completion_request( bml_btl, sendreq, size ); - } -#if PML_BFO - if (des->des_flags & MCA_BTL_DES_SEND_ALWAYS_CALLBACK) { - sendreq->req_events++; - } -#endif /* PML_BFO */ - return OMPI_SUCCESS; - } - mca_bml_base_free(bml_btl, des ); - return rc; -} - -void mca_pml_bfo_send_request_copy_in_out( mca_pml_bfo_send_request_t *sendreq, - uint64_t send_offset, - uint64_t send_length ) -{ - mca_pml_bfo_send_range_t *sr; - ompi_free_list_item_t *i; - mca_bml_base_endpoint_t* bml_endpoint = sendreq->req_endpoint; - int num_btls = mca_bml_base_btl_array_get_size(&bml_endpoint->btl_send); - int n; - double weight_total = 0; - - if( OPAL_UNLIKELY(0 == send_length) ) - return; - - OMPI_FREE_LIST_WAIT_MT(&mca_pml_bfo.send_ranges, i); - - sr = (mca_pml_bfo_send_range_t*)i; - - sr->range_send_offset = send_offset; - sr->range_send_length = send_length; - sr->range_btl_idx = 0; - - for(n = 0; n < num_btls && n < mca_pml_bfo.max_send_per_range; n++) { - sr->range_btls[n].bml_btl = - mca_bml_base_btl_array_get_next(&bml_endpoint->btl_send); - weight_total += sr->range_btls[n].bml_btl->btl_weight; - } - - sr->range_btl_cnt = n; - mca_pml_bfo_calc_weighted_length(sr->range_btls, n, send_length, - weight_total); - - OPAL_THREAD_LOCK(&sendreq->req_send_range_lock); - opal_list_append(&sendreq->req_send_ranges, (opal_list_item_t*)sr); - OPAL_THREAD_UNLOCK(&sendreq->req_send_range_lock); -} - -static inline mca_pml_bfo_send_range_t * -get_send_range_nolock(mca_pml_bfo_send_request_t* sendreq) -{ - opal_list_item_t *item; - - item = opal_list_get_first(&sendreq->req_send_ranges); - - if(opal_list_get_end(&sendreq->req_send_ranges) == item) - return NULL; - - return (mca_pml_bfo_send_range_t*)item; -} - -static inline mca_pml_bfo_send_range_t * -get_send_range(mca_pml_bfo_send_request_t* sendreq) -{ - mca_pml_bfo_send_range_t *range; - - OPAL_THREAD_LOCK(&sendreq->req_send_range_lock); - range = get_send_range_nolock(sendreq); - OPAL_THREAD_UNLOCK(&sendreq->req_send_range_lock); - - return range; -} - -static inline mca_pml_bfo_send_range_t * -get_next_send_range(mca_pml_bfo_send_request_t* sendreq, - mca_pml_bfo_send_range_t *range) -{ - OPAL_THREAD_LOCK(&sendreq->req_send_range_lock); - opal_list_remove_item(&sendreq->req_send_ranges, (opal_list_item_t *)range); - OMPI_FREE_LIST_RETURN_MT(&mca_pml_bfo.send_ranges, &range->base); - range = get_send_range_nolock(sendreq); - OPAL_THREAD_UNLOCK(&sendreq->req_send_range_lock); - - return range; -} - -/** - * Schedule pipeline of send descriptors for the given request. - * Up to the rdma threshold. If this is a send based protocol, - * the rdma threshold is the end of the message. Otherwise, schedule - * fragments up to the threshold to overlap initial registration/setup - * costs of the rdma. Only one thread can be inside this function. - */ - -int -mca_pml_bfo_send_request_schedule_once(mca_pml_bfo_send_request_t* sendreq) -{ - size_t prev_bytes_remaining = 0; - mca_pml_bfo_send_range_t *range; - int num_fail = 0; - - /* check pipeline_depth here before attempting to get any locks */ - if(true == sendreq->req_throttle_sends && - sendreq->req_pipeline_depth >= mca_pml_bfo.send_pipeline_depth) - return OMPI_SUCCESS; - - range = get_send_range(sendreq); - - while(range && (false == sendreq->req_throttle_sends || - sendreq->req_pipeline_depth < mca_pml_bfo.send_pipeline_depth)) { - mca_pml_bfo_frag_hdr_t* hdr; - mca_btl_base_descriptor_t* des; - int rc, btl_idx; - size_t size, offset, data_remaining = 0; - mca_bml_base_btl_t* bml_btl; - - assert(range->range_send_length != 0); -#if PML_BFO - MCA_PML_BFO_CHECK_FOR_REMOVED_BTL(sendreq, range); -#endif /* PML_BFO */ - - if(prev_bytes_remaining == range->range_send_length) - num_fail++; - else - num_fail = 0; - - prev_bytes_remaining = range->range_send_length; - - if( OPAL_UNLIKELY(num_fail == range->range_btl_cnt) ) { - assert(sendreq->req_pending == MCA_PML_BFO_SEND_PENDING_NONE); - add_request_to_send_pending(sendreq, - MCA_PML_BFO_SEND_PENDING_SCHEDULE, true); - /* Note that request remains locked. send_request_process_pending() - * function will call shedule_exclusive() directly without taking - * the lock */ - return OMPI_ERR_OUT_OF_RESOURCE; - } - -cannot_pack: - do { - btl_idx = range->range_btl_idx; - if(++range->range_btl_idx == range->range_btl_cnt) - range->range_btl_idx = 0; - } while(!range->range_btls[btl_idx].length); - - bml_btl = range->range_btls[btl_idx].bml_btl; - /* If there is a remaining data from another BTL that was too small - * for converter to pack then send it through another BTL */ - range->range_btls[btl_idx].length += data_remaining; - size = range->range_btls[btl_idx].length; - - /* makes sure that we don't exceed BTL max send size */ - if(bml_btl->btl->btl_max_send_size != 0) { - size_t max_send_size = bml_btl->btl->btl_max_send_size - - sizeof(mca_pml_bfo_frag_hdr_t); - - if (size > max_send_size) { - size = max_send_size; - } - } - - /* pack into a descriptor */ - offset = (size_t)range->range_send_offset; - opal_convertor_set_position(&sendreq->req_send.req_base.req_convertor, - &offset); - range->range_send_offset = (uint64_t)offset; - - data_remaining = size; - MEMCHECKER( - memchecker_call(&opal_memchecker_base_mem_defined, - sendreq->req_send.req_base.req_addr, - sendreq->req_send.req_base.req_count, - sendreq->req_send.req_base.req_datatype); - ); - mca_bml_base_prepare_src(bml_btl, NULL, - &sendreq->req_send.req_base.req_convertor, - MCA_BTL_NO_ORDER, - sizeof(mca_pml_bfo_frag_hdr_t), - &size, MCA_BTL_DES_FLAGS_BTL_OWNERSHIP | MCA_BTL_DES_SEND_ALWAYS_CALLBACK, &des); - MEMCHECKER( - memchecker_call(&opal_memchecker_base_mem_noaccess, - sendreq->req_send.req_base.req_addr, - sendreq->req_send.req_base.req_count, - sendreq->req_send.req_base.req_datatype); - ); - - if( OPAL_UNLIKELY(des == NULL || size == 0) ) { - if(des) { - /* Converter can't pack this chunk. Append to another chunk - * from other BTL */ - mca_bml_base_free(bml_btl, des); - range->range_btls[btl_idx].length -= data_remaining; - goto cannot_pack; - } - continue; - } - - des->des_cbfunc = mca_pml_bfo_frag_completion; - des->des_cbdata = sendreq; - - /* setup header */ - hdr = (mca_pml_bfo_frag_hdr_t*)des->des_local->seg_addr.pval; - hdr->hdr_common.hdr_flags = 0; - hdr->hdr_common.hdr_type = MCA_PML_BFO_HDR_TYPE_FRAG; - hdr->hdr_frag_offset = range->range_send_offset; - hdr->hdr_src_req.pval = sendreq; - hdr->hdr_dst_req = sendreq->req_recv; - - bfo_hdr_hton(hdr, MCA_PML_BFO_HDR_TYPE_FRAG, - sendreq->req_send.req_base.req_proc); - -#if OMPI_WANT_PERUSE - PERUSE_TRACE_COMM_OMPI_EVENT(PERUSE_COMM_REQ_XFER_CONTINUE, - &(sendreq->req_send.req_base), size, PERUSE_SEND); -#endif /* OMPI_WANT_PERUSE */ - - /* initiate send - note that this may complete before the call returns */ - rc = mca_bml_base_send(bml_btl, des, MCA_PML_BFO_HDR_TYPE_FRAG); - if( OPAL_LIKELY(rc >= 0) ) { - /* update state */ - range->range_btls[btl_idx].length -= size; - range->range_send_length -= size; - range->range_send_offset += size; - OPAL_THREAD_ADD_SIZE_T(&sendreq->req_pipeline_depth, 1); - if(range->range_send_length == 0) { - range = get_next_send_range(sendreq, range); - prev_bytes_remaining = 0; - } -#if PML_BFO - if (des->des_flags & MCA_BTL_DES_SEND_ALWAYS_CALLBACK) { - sendreq->req_events++; - } -#endif /* PML_BFO */ - } else { - mca_bml_base_free(bml_btl,des); - } - } - - return OMPI_SUCCESS; -} - - -/** - * An RDMA put operation has completed: - * (1) Update request status and if required set completed - * (2) Send FIN control message to the destination - */ - -static void mca_pml_bfo_put_completion( mca_btl_base_module_t* btl, - struct mca_btl_base_endpoint_t* ep, - struct mca_btl_base_descriptor_t* des, - int status ) -{ - mca_pml_bfo_rdma_frag_t* frag = (mca_pml_bfo_rdma_frag_t*)des->des_cbdata; - mca_pml_bfo_send_request_t* sendreq = (mca_pml_bfo_send_request_t*)frag->rdma_req; - mca_bml_base_btl_t* bml_btl = (mca_bml_base_btl_t*) des->des_context; - - /* check completion status */ - if( OPAL_UNLIKELY(OMPI_SUCCESS != status) ) { -#if PML_BFO - sendreq->req_error++; -#else /* PML_BFO */ - /* TSW - FIX */ - OMPI_ERROR_LOG(status); - ompi_rte_abort(-1, NULL); -#endif /* PML_BFO */ - } -#if PML_BFO - sendreq->req_events--; - MCA_PML_BFO_PUT_COMPLETION_SENDREQ_ERROR_CHECK(sendreq, status, btl); - MCA_PML_BFO_CHECK_SENDREQ_EAGER_BML_BTL(bml_btl, btl, sendreq, "RDMA write"); -#endif /* PML_BFO */ - - mca_pml_bfo_send_fin(sendreq->req_send.req_base.req_proc, - bml_btl, - frag->rdma_hdr.hdr_rdma.hdr_des, -#if PML_BFO - des->order, 0, (uint16_t)sendreq->req_send.req_base.req_sequence, - sendreq->req_restartseq, sendreq->req_send.req_base.req_comm->c_contextid, - sendreq->req_send.req_base.req_comm->c_my_rank); -#else /* PML_BFO */ - des->order, 0); -#endif /* PML_BFO */ - - /* check for request completion */ - OPAL_THREAD_ADD_SIZE_T(&sendreq->req_bytes_delivered, frag->rdma_length); - - send_request_pml_complete_check(sendreq); - - MCA_PML_BFO_RDMA_FRAG_RETURN(frag); - - MCA_PML_BFO_PROGRESS_PENDING(bml_btl); -} - -int mca_pml_bfo_send_request_put_frag( mca_pml_bfo_rdma_frag_t* frag ) -{ - mca_mpool_base_registration_t* reg = NULL; - mca_bml_base_btl_t* bml_btl = frag->rdma_bml; - mca_btl_base_descriptor_t* des; - size_t save_size = frag->rdma_length; - int rc; - - /* setup descriptor */ - mca_bml_base_prepare_src( bml_btl, - reg, - &frag->convertor, - MCA_BTL_NO_ORDER, - 0, - &frag->rdma_length, - MCA_BTL_DES_FLAGS_BTL_OWNERSHIP | - MCA_BTL_DES_FLAGS_PUT, - &des ); - - if( OPAL_UNLIKELY(NULL == des) ) { - if(frag->retries < mca_pml_bfo.rdma_put_retries_limit) { - size_t offset = (size_t)frag->rdma_hdr.hdr_rdma.hdr_rdma_offset; - frag->rdma_length = save_size; - opal_convertor_set_position(&frag->convertor, &offset); - OPAL_THREAD_LOCK(&mca_pml_bfo.lock); - opal_list_append(&mca_pml_bfo.rdma_pending, (opal_list_item_t*)frag); - OPAL_THREAD_UNLOCK(&mca_pml_bfo.lock); - } else { - mca_pml_bfo_send_request_t *sendreq = - (mca_pml_bfo_send_request_t*)frag->rdma_req; - - /* tell receiver to unregister memory */ - mca_pml_bfo_send_fin(sendreq->req_send.req_base.req_proc, - bml_btl, frag->rdma_hdr.hdr_rdma.hdr_des, -#if PML_BFO - MCA_BTL_NO_ORDER, 1, (uint16_t)sendreq->req_send.req_base.req_sequence, - sendreq->req_restartseq, sendreq->req_send.req_base.req_comm->c_contextid, - sendreq->req_send.req_base.req_comm->c_my_rank); -#else /* PML_BFO */ - MCA_BTL_NO_ORDER, 1); -#endif /* PML_BFO */ - - /* send fragment by copy in/out */ - mca_pml_bfo_send_request_copy_in_out(sendreq, - frag->rdma_hdr.hdr_rdma.hdr_rdma_offset, frag->rdma_length); - /* if a pointer to a receive request is not set it means that - * ACK was not yet received. Don't schedule sends before ACK */ - if(NULL != sendreq->req_recv.pval) - mca_pml_bfo_send_request_schedule(sendreq); - } - return OMPI_ERR_OUT_OF_RESOURCE; - } - - des->des_remote = (mca_btl_base_segment_t *) frag->rdma_segs; - des->des_remote_count = frag->rdma_hdr.hdr_rdma.hdr_seg_cnt; - des->des_cbfunc = mca_pml_bfo_put_completion; - des->des_cbdata = frag; - - PERUSE_TRACE_COMM_OMPI_EVENT( PERUSE_COMM_REQ_XFER_CONTINUE, - &(((mca_pml_bfo_send_request_t*)frag->rdma_req)->req_send.req_base), save_size, PERUSE_SEND ); - - rc = mca_bml_base_put(bml_btl, des); - if( OPAL_UNLIKELY(OMPI_SUCCESS != rc) ) { - mca_bml_base_free(bml_btl, des); - frag->rdma_length = save_size; - if(OMPI_ERR_OUT_OF_RESOURCE == rc) { - OPAL_THREAD_LOCK(&mca_pml_bfo.lock); - opal_list_append(&mca_pml_bfo.rdma_pending, (opal_list_item_t*)frag); - OPAL_THREAD_UNLOCK(&mca_pml_bfo.lock); - return OMPI_ERR_OUT_OF_RESOURCE; - } else { - /* TSW - FIX */ - OMPI_ERROR_LOG(rc); - ompi_rte_abort(-1, NULL); - } - } -#if PML_BFO - if (des->des_flags & MCA_BTL_DES_SEND_ALWAYS_CALLBACK) { - ((mca_pml_bfo_send_request_t*)frag->rdma_req)->req_events++; - } -#endif /* PML_BFO */ - return OMPI_SUCCESS; -} - -/** - * Receiver has scheduled an RDMA operation: - * (1) Allocate an RDMA fragment to maintain the state of the operation - * (2) Call BTL prepare_src to pin/prepare source buffers - * (3) Queue the RDMA put - */ - -void mca_pml_bfo_send_request_put( mca_pml_bfo_send_request_t* sendreq, - mca_btl_base_module_t* btl, - mca_pml_bfo_rdma_hdr_t* hdr ) -{ - mca_bml_base_endpoint_t *bml_endpoint = sendreq->req_endpoint; - mca_pml_bfo_rdma_frag_t* frag; - size_t i, size = 0; - - if(hdr->hdr_common.hdr_flags & MCA_PML_BFO_HDR_TYPE_ACK) { - OPAL_THREAD_ADD32(&sendreq->req_state, -1); - } -#if PML_BFO - MCA_PML_BFO_VERIFY_SENDREQ_REQ_STATE_VALUE(sendreq); - sendreq->req_recv = hdr->hdr_dst_req; /* only needed once, but it is OK */ -#endif /* PML_BFO */ - - MCA_PML_BFO_RDMA_FRAG_ALLOC(frag); - if( OPAL_UNLIKELY(NULL == frag) ) { - /* TSW - FIX */ - OMPI_ERROR_LOG(OMPI_ERR_OUT_OF_RESOURCE); - ompi_rte_abort(-1, NULL); - } - - assert (btl->btl_seg_size * hdr->hdr_seg_cnt <= sizeof (frag->rdma_segs)); - - /* setup fragment */ - memmove (frag->rdma_segs, hdr + 1, btl->btl_seg_size * hdr->hdr_seg_cnt); - - for( i = 0; i < hdr->hdr_seg_cnt; i++ ) { - mca_btl_base_segment_t *seg = (mca_btl_base_segment_t *) ((uintptr_t)(frag->rdma_segs) + i * btl->btl_seg_size); - -#if OPAL_ENABLE_HETEROGENEOUS_SUPPORT - if ((sendreq->req_send.req_base.req_proc->super.proc_arch & OPAL_ARCH_ISBIGENDIAN) != - (ompi_proc_local()->super.proc_arch & OPAL_ARCH_ISBIGENDIAN)) { - size += opal_swap_bytes4(seg->seg_len); - } else -#endif - { - size += seg->seg_len; - } - } - - frag->rdma_bml = mca_bml_base_btl_array_find(&bml_endpoint->btl_rdma, btl); -#if PML_BFO - MCA_PML_BFO_CHECK_FOR_REMOVED_BML(sendreq, frag, btl); - frag->rdma_btl = btl; /* in case frag ends up on pending */ -#endif /* PML_BFO */ - frag->rdma_hdr.hdr_rdma = *hdr; - frag->rdma_req = sendreq; - frag->rdma_ep = bml_endpoint; - frag->rdma_length = size; - frag->rdma_state = MCA_PML_BFO_RDMA_PUT; - frag->reg = NULL; - frag->retries = 0; - - /* lookup the corresponding registration */ - for(i=0; ireq_rdma_cnt; i++) { - if(sendreq->req_rdma[i].bml_btl == frag->rdma_bml) { - frag->reg = sendreq->req_rdma[i].btl_reg; - break; - } - } - - /* RDMA writes may proceed in parallel to send and to each other, so - * create clone of the convertor for each RDMA fragment - */ - size = hdr->hdr_rdma_offset; - opal_convertor_clone_with_position(&sendreq->req_send.req_base.req_convertor, - &frag->convertor, 0, &size); - - mca_pml_bfo_send_request_put_frag(frag); -} - diff --git a/ompi/mca/pml/bfo/pml_bfo_sendreq.h b/ompi/mca/pml/bfo/pml_bfo_sendreq.h deleted file mode 100644 index cd8419c6ce4..00000000000 --- a/ompi/mca/pml/bfo/pml_bfo_sendreq.h +++ /dev/null @@ -1,498 +0,0 @@ -/* - * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2014 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * Copyright (c) 2009-2012 Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2011-2012 Los Alamos National Security, LLC. - * All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#ifndef OMPI_PML_BFO_SEND_REQUEST_H -#define OMPI_PML_BFO_SEND_REQUEST_H - -#include "opal/mca/btl/btl.h" -#include "opal/mca/mpool/base/base.h" -#include "opal/datatype/opal_convertor.h" -#include "ompi/mca/pml/base/pml_base_sendreq.h" -#include "pml_bfo_comm.h" -#include "pml_bfo_hdr.h" -#include "pml_bfo_rdma.h" -#include "pml_bfo_rdmafrag.h" -#include "ompi/mca/bml/bml.h" - -BEGIN_C_DECLS - -typedef enum { - MCA_PML_BFO_SEND_PENDING_NONE, - MCA_PML_BFO_SEND_PENDING_SCHEDULE, - MCA_PML_BFO_SEND_PENDING_START -} mca_pml_bfo_send_pending_t; - -struct mca_pml_bfo_send_request_t { - mca_pml_base_send_request_t req_send; - mca_bml_base_endpoint_t* req_endpoint; - opal_ptr_t req_recv; -#if PML_BFO - int32_t req_events; /* number of outstanding events on request */ - int32_t req_restartseq; /* sequence number of restarted request */ - int32_t req_restart; /* state of restarted request */ - int32_t req_error; /* non-zero when error has occurred on request */ -#endif /* PML_BFO */ - int32_t req_state; - int32_t req_lock; - bool req_throttle_sends; - size_t req_pipeline_depth; - size_t req_bytes_delivered; - uint32_t req_rdma_cnt; - mca_pml_bfo_send_pending_t req_pending; - opal_mutex_t req_send_range_lock; - opal_list_t req_send_ranges; - mca_pml_bfo_com_btl_t req_rdma[1]; -}; -typedef struct mca_pml_bfo_send_request_t mca_pml_bfo_send_request_t; - -OBJ_CLASS_DECLARATION(mca_pml_bfo_send_request_t); - -struct mca_pml_bfo_send_range_t { - ompi_free_list_item_t base; - uint64_t range_send_offset; - uint64_t range_send_length; - int range_btl_idx; - int range_btl_cnt; - mca_pml_bfo_com_btl_t range_btls[1]; -}; -typedef struct mca_pml_bfo_send_range_t mca_pml_bfo_send_range_t; -OBJ_CLASS_DECLARATION(mca_pml_bfo_send_range_t); - -static inline bool lock_send_request(mca_pml_bfo_send_request_t *sendreq) -{ - return OPAL_THREAD_ADD32(&sendreq->req_lock, 1) == 1; -} - -static inline bool unlock_send_request(mca_pml_bfo_send_request_t *sendreq) -{ - return OPAL_THREAD_ADD32(&sendreq->req_lock, -1) == 0; -} - -static inline void -add_request_to_send_pending(mca_pml_bfo_send_request_t* sendreq, - const mca_pml_bfo_send_pending_t type, - const bool append) -{ - opal_list_item_t *item = (opal_list_item_t*)sendreq; - - OPAL_THREAD_LOCK(&mca_pml_bfo.lock); - sendreq->req_pending = type; - if(append) - opal_list_append(&mca_pml_bfo.send_pending, item); - else - opal_list_prepend(&mca_pml_bfo.send_pending, item); - - OPAL_THREAD_UNLOCK(&mca_pml_bfo.lock); -} - -static inline mca_pml_bfo_send_request_t* -get_request_from_send_pending(mca_pml_bfo_send_pending_t *type) -{ - mca_pml_bfo_send_request_t *sendreq; - - OPAL_THREAD_LOCK(&mca_pml_bfo.lock); - sendreq = (mca_pml_bfo_send_request_t*) - opal_list_remove_first(&mca_pml_bfo.send_pending); - if(sendreq) { - *type = sendreq->req_pending; - sendreq->req_pending = MCA_PML_BFO_SEND_PENDING_NONE; - } - OPAL_THREAD_UNLOCK(&mca_pml_bfo.lock); - - return sendreq; -} - -#define MCA_PML_BFO_SEND_REQUEST_ALLOC( comm, \ - dst, \ - sendreq) \ - { \ - ompi_proc_t *proc = ompi_comm_peer_lookup( comm, dst ); \ - ompi_free_list_item_t* item; \ - \ - sendreq = NULL; \ - if( OPAL_LIKELY(NULL != proc) ) { \ - OMPI_FREE_LIST_WAIT_MT(&mca_pml_base_send_requests, item); \ - sendreq = (mca_pml_bfo_send_request_t*)item; \ - sendreq->req_send.req_base.req_proc = proc; \ - } \ - } - - -#define MCA_PML_BFO_SEND_REQUEST_INIT( sendreq, \ - buf, \ - count, \ - datatype, \ - dst, \ - tag, \ - comm, \ - sendmode, \ - persistent) \ - { \ - MCA_PML_BASE_SEND_REQUEST_INIT(&sendreq->req_send, \ - buf, \ - count, \ - datatype, \ - dst, \ - tag, \ - comm, \ - sendmode, \ - persistent, \ - 0); /* convertor_flags */ \ - (sendreq)->req_recv.pval = NULL; \ - } - - -static inline void mca_pml_bfo_free_rdma_resources(mca_pml_bfo_send_request_t* sendreq) -{ - size_t r; - - /* return mpool resources */ - for(r = 0; r < sendreq->req_rdma_cnt; r++) { - mca_mpool_base_registration_t* reg = sendreq->req_rdma[r].btl_reg; - if( NULL != reg && reg->mpool != NULL ) { - reg->mpool->mpool_deregister(reg->mpool, reg); - } - } - sendreq->req_rdma_cnt = 0; -} - - -/** - * Start a send request. - */ - -#define MCA_PML_BFO_SEND_REQUEST_START(sendreq, rc) \ - do { \ - rc = mca_pml_bfo_send_request_start(sendreq); \ - } while (0) - - -/* - * Mark a send request as completed at the MPI level. - */ - -#define MCA_PML_BFO_SEND_REQUEST_MPI_COMPLETE(sendreq, with_signal) \ -do { \ - (sendreq)->req_send.req_base.req_ompi.req_status.MPI_SOURCE = \ - (sendreq)->req_send.req_base.req_comm->c_my_rank; \ - (sendreq)->req_send.req_base.req_ompi.req_status.MPI_TAG = \ - (sendreq)->req_send.req_base.req_tag; \ - (sendreq)->req_send.req_base.req_ompi.req_status.MPI_ERROR = OMPI_SUCCESS; \ - (sendreq)->req_send.req_base.req_ompi.req_status._ucount = \ - (sendreq)->req_send.req_bytes_packed; \ - ompi_request_complete( &((sendreq)->req_send.req_base.req_ompi), (with_signal) ); \ - \ - PERUSE_TRACE_COMM_EVENT( PERUSE_COMM_REQ_COMPLETE, \ - &(sendreq->req_send.req_base), PERUSE_SEND); \ -} while(0) - -/* - * Release resources associated with a request - */ - -#define MCA_PML_BFO_SEND_REQUEST_RETURN(sendreq) \ - do { \ - /* Let the base handle the reference counts */ \ - MCA_PML_BASE_SEND_REQUEST_FINI((&(sendreq)->req_send)); \ - OMPI_FREE_LIST_RETURN_MT( &mca_pml_base_send_requests, \ - (ompi_free_list_item_t*)sendreq); \ - } while(0) - - -/* - * The PML has completed a send request. Note that this request - * may have been orphaned by the user or have already completed - * at the MPI level. - * This function will never be called directly from the upper level, as it - * should only be an internal call to the PML. - * - */ -static inline void -send_request_pml_complete(mca_pml_bfo_send_request_t *sendreq) -{ - assert(false == sendreq->req_send.req_base.req_pml_complete); - - if(sendreq->req_send.req_bytes_packed > 0) { - PERUSE_TRACE_COMM_EVENT( PERUSE_COMM_REQ_XFER_END, - &(sendreq->req_send.req_base), PERUSE_SEND); - } - - /* return mpool resources */ - mca_pml_bfo_free_rdma_resources(sendreq); - - if (sendreq->req_send.req_send_mode == MCA_PML_BASE_SEND_BUFFERED && - sendreq->req_send.req_addr != sendreq->req_send.req_base.req_addr) { - mca_pml_base_bsend_request_fini((ompi_request_t*)sendreq); - } - - OPAL_THREAD_LOCK(&ompi_request_lock); - if(false == sendreq->req_send.req_base.req_ompi.req_complete) { - /* Should only be called for long messages (maybe synchronous) */ - MCA_PML_BFO_SEND_REQUEST_MPI_COMPLETE(sendreq, true); - } else { - if( MPI_SUCCESS != sendreq->req_send.req_base.req_ompi.req_status.MPI_ERROR ) { - ompi_mpi_abort(&ompi_mpi_comm_world.comm, MPI_ERR_REQUEST); - } - } - sendreq->req_send.req_base.req_pml_complete = true; -#if PML_BFO - sendreq->req_send.req_base.req_sequence -= 100; -#endif /* PML_BFO */ - - if(sendreq->req_send.req_base.req_free_called) { - MCA_PML_BFO_SEND_REQUEST_RETURN(sendreq); - } - OPAL_THREAD_UNLOCK(&ompi_request_lock); -} - -/* returns true if request was completed on PML level */ -static inline bool -send_request_pml_complete_check(mca_pml_bfo_send_request_t *sendreq) -{ -#if OPAL_ENABLE_MULTI_THREADS - opal_atomic_rmb(); -#endif - /* if no more events are expected for the request and the whole message is - * already sent and send fragment scheduling isn't running in another - * thread then complete the request on PML level. From now on, if user - * called free on this request, the request structure can be reused for - * another request or if the request is persistent it can be restarted */ - if(sendreq->req_state == 0 && - sendreq->req_bytes_delivered >= sendreq->req_send.req_bytes_packed - && lock_send_request(sendreq)) { - send_request_pml_complete(sendreq); - return true; - } - - return false; -} - -/** - * Schedule additional fragments - */ -int -mca_pml_bfo_send_request_schedule_once(mca_pml_bfo_send_request_t*); - -static inline int -mca_pml_bfo_send_request_schedule_exclusive(mca_pml_bfo_send_request_t* sendreq) -{ - int rc; - do { - rc = mca_pml_bfo_send_request_schedule_once(sendreq); - if(rc == OMPI_ERR_OUT_OF_RESOURCE) - break; - } while(!unlock_send_request(sendreq)); - - if(OMPI_SUCCESS == rc) - send_request_pml_complete_check(sendreq); - - return rc; -} - -static inline void -mca_pml_bfo_send_request_schedule(mca_pml_bfo_send_request_t* sendreq) -{ - /* - * Only allow one thread in this routine for a given request. - * However, we cannot block callers on a mutex, so simply keep track - * of the number of times the routine has been called and run through - * the scheduling logic once for every call. - */ - - if(!lock_send_request(sendreq)) - return; - - mca_pml_bfo_send_request_schedule_exclusive(sendreq); -} - -#if OPAL_CUDA_SUPPORT -int mca_pml_bfo_send_request_start_cuda( - mca_pml_bfo_send_request_t* sendreq, - mca_bml_base_btl_t* bml_btl, - size_t size); -#endif /* OPAL_CUDA_SUPPORT */ - -/** - * Start the specified request - */ - -int mca_pml_bfo_send_request_start_buffered( - mca_pml_bfo_send_request_t* sendreq, - mca_bml_base_btl_t* bml_btl, - size_t size); - -int mca_pml_bfo_send_request_start_copy( - mca_pml_bfo_send_request_t* sendreq, - mca_bml_base_btl_t* bml_btl, - size_t size); - -int mca_pml_bfo_send_request_start_prepare( - mca_pml_bfo_send_request_t* sendreq, - mca_bml_base_btl_t* bml_btl, - size_t size); - -int mca_pml_bfo_send_request_start_rdma( - mca_pml_bfo_send_request_t* sendreq, - mca_bml_base_btl_t* bml_btl, - size_t size); - -int mca_pml_bfo_send_request_start_rndv( - mca_pml_bfo_send_request_t* sendreq, - mca_bml_base_btl_t* bml_btl, - size_t size, - int flags); - -static inline int -mca_pml_bfo_send_request_start_btl( mca_pml_bfo_send_request_t* sendreq, - mca_bml_base_btl_t* bml_btl ) -{ - size_t size = sendreq->req_send.req_bytes_packed; - mca_btl_base_module_t* btl = bml_btl->btl; - size_t eager_limit = btl->btl_eager_limit - sizeof(mca_pml_bfo_hdr_t); - int rc; - - if( OPAL_LIKELY(size <= eager_limit) ) { - switch(sendreq->req_send.req_send_mode) { - case MCA_PML_BASE_SEND_SYNCHRONOUS: - rc = mca_pml_bfo_send_request_start_rndv(sendreq, bml_btl, size, 0); - break; - case MCA_PML_BASE_SEND_BUFFERED: - rc = mca_pml_bfo_send_request_start_copy(sendreq, bml_btl, size); - break; - case MCA_PML_BASE_SEND_COMPLETE: - rc = mca_pml_bfo_send_request_start_prepare(sendreq, bml_btl, size); - break; - default: - if (size != 0 && bml_btl->btl_flags & MCA_BTL_FLAGS_SEND_INPLACE) { - rc = mca_pml_bfo_send_request_start_prepare(sendreq, bml_btl, size); - } else { - rc = mca_pml_bfo_send_request_start_copy(sendreq, bml_btl, size); - } - break; - } - } else { - size = eager_limit; - if(OPAL_UNLIKELY(btl->btl_rndv_eager_limit < eager_limit)) - size = btl->btl_rndv_eager_limit; - if(sendreq->req_send.req_send_mode == MCA_PML_BASE_SEND_BUFFERED) { - rc = mca_pml_bfo_send_request_start_buffered(sendreq, bml_btl, size); - } else if - (opal_convertor_need_buffers(&sendreq->req_send.req_base.req_convertor) == false) { - unsigned char *base; - opal_convertor_get_current_pointer( &sendreq->req_send.req_base.req_convertor, (void**)&base ); - - if( 0 != (sendreq->req_rdma_cnt = (uint32_t)mca_pml_bfo_rdma_btls( - sendreq->req_endpoint, - base, - sendreq->req_send.req_bytes_packed, - sendreq->req_rdma))) { - rc = mca_pml_bfo_send_request_start_rdma(sendreq, bml_btl, - sendreq->req_send.req_bytes_packed); - if( OPAL_UNLIKELY(OMPI_SUCCESS != rc) ) { - mca_pml_bfo_free_rdma_resources(sendreq); - } - } else { - rc = mca_pml_bfo_send_request_start_rndv(sendreq, bml_btl, size, - MCA_PML_BFO_HDR_FLAGS_CONTIG); - } - } else { -#if OPAL_CUDA_SUPPORT - if (sendreq->req_send.req_base.req_convertor.flags & CONVERTOR_CUDA) { - return mca_pml_bfo_send_request_start_cuda(sendreq, bml_btl, size); - } -#endif /* OPAL_CUDA_SUPPORT */ - rc = mca_pml_bfo_send_request_start_rndv(sendreq, bml_btl, size, 0); - } - } - - return rc; -} - -static inline int -mca_pml_bfo_send_request_start( mca_pml_bfo_send_request_t* sendreq ) -{ - mca_pml_bfo_comm_t* comm = sendreq->req_send.req_base.req_comm->c_pml_comm; - mca_bml_base_endpoint_t* endpoint = (mca_bml_base_endpoint_t*) - sendreq->req_send.req_base.req_proc->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_BML]; - size_t i; - - if( OPAL_UNLIKELY(endpoint == NULL) ) { - return OMPI_ERR_UNREACH; - } - - sendreq->req_endpoint = endpoint; - sendreq->req_state = 0; - sendreq->req_lock = 0; - sendreq->req_pipeline_depth = 0; - sendreq->req_bytes_delivered = 0; - sendreq->req_pending = MCA_PML_BFO_SEND_PENDING_NONE; - sendreq->req_send.req_base.req_sequence = OPAL_THREAD_ADD32( - &comm->procs[sendreq->req_send.req_base.req_peer].send_sequence,1); -#if PML_BFO - sendreq->req_restartseq = 0; /* counts up restarts */ - sendreq->req_restart = 0; /* reset in case we restart again */ - sendreq->req_error = 0; /* clear error state */ - sendreq->req_events = 0; /* clear events, probably 0 anyways */ -#endif /* PML_BFO */ - - MCA_PML_BASE_SEND_START( &sendreq->req_send.req_base ); - - for(i = 0; i < mca_bml_base_btl_array_get_size(&endpoint->btl_eager); i++) { - mca_bml_base_btl_t* bml_btl; - int rc; - - /* select a btl */ - bml_btl = mca_bml_base_btl_array_get_next(&endpoint->btl_eager); - rc = mca_pml_bfo_send_request_start_btl(sendreq, bml_btl); - if( OPAL_LIKELY(OMPI_ERR_OUT_OF_RESOURCE != rc) ) - return rc; - } - add_request_to_send_pending(sendreq, MCA_PML_BFO_SEND_PENDING_START, true); - - return OMPI_SUCCESS; -} - -/** - * Initiate a put scheduled by the receiver. - */ - -void mca_pml_bfo_send_request_put( mca_pml_bfo_send_request_t* sendreq, - mca_btl_base_module_t* btl, - mca_pml_bfo_rdma_hdr_t* hdr ); - -int mca_pml_bfo_send_request_put_frag(mca_pml_bfo_rdma_frag_t* frag); - -/* This function tries to continue sendreq that was stuck because of resource - * unavailability. A sendreq may be added to send_pending list if there is no - * resource to send initial packet or there is not resource to schedule data - * for sending. The reason the sendreq was added to the list is stored inside - * sendreq struct and appropriate operation is retried when resource became - * available. bml_btl passed to the function doesn't represents sendreq - * destination, it represents BTL on which resource was freed, so only this BTL - * should be considered for sending packets */ -void mca_pml_bfo_send_request_process_pending(mca_bml_base_btl_t *bml_btl); - -void mca_pml_bfo_send_request_copy_in_out(mca_pml_bfo_send_request_t *sendreq, - uint64_t send_offset, uint64_t send_length); - -END_C_DECLS - -#endif /* OMPI_PML_BFO_SEND_REQUEST_H */ diff --git a/ompi/mca/pml/bfo/pml_bfo_start.c b/ompi/mca/pml/bfo/pml_bfo_start.c deleted file mode 100644 index f9c166d6a45..00000000000 --- a/ompi/mca/pml/bfo/pml_bfo_start.c +++ /dev/null @@ -1,148 +0,0 @@ -/* - * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2007 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * Copyright (c) 2006 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2010 Oracle and/or its affiliates. All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#include "ompi_config.h" - -#include "pml_bfo.h" -#include "pml_bfo_recvreq.h" -#include "pml_bfo_sendreq.h" -#include "ompi/memchecker.h" - - -int mca_pml_bfo_start(size_t count, ompi_request_t** requests) -{ - int rc; - size_t i; - bool reuse_old_request = true; - - for(i=0; ireq_type) { - continue; - } - - /* If the persistent request is currently active - obtain the - * request lock and verify the status is incomplete. if the - * pml layer has not completed the request - mark the request - * as free called - so that it will be freed when the request - * completes - and create a new request. - */ - - reuse_old_request = true; - switch(pml_request->req_ompi.req_state) { - case OMPI_REQUEST_INACTIVE: - if(pml_request->req_pml_complete == true) - break; - /* otherwise fall through */ - case OMPI_REQUEST_ACTIVE: { - - ompi_request_t *request; - OPAL_THREAD_LOCK(&ompi_request_lock); - if (pml_request->req_pml_complete == false) { - /* free request after it completes */ - pml_request->req_free_called = true; - } else { - /* can reuse the existing request */ - OPAL_THREAD_UNLOCK(&ompi_request_lock); - break; - } - - reuse_old_request = false; - /* allocate a new request */ - switch(pml_request->req_type) { - case MCA_PML_REQUEST_SEND: { - mca_pml_base_send_mode_t sendmode = - ((mca_pml_base_send_request_t*)pml_request)->req_send_mode; - rc = mca_pml_bfo_isend_init( - pml_request->req_addr, - pml_request->req_count, - pml_request->req_datatype, - pml_request->req_peer, - pml_request->req_tag, - sendmode, - pml_request->req_comm, - &request); - break; - } - case MCA_PML_REQUEST_RECV: - rc = mca_pml_bfo_irecv_init( - pml_request->req_addr, - pml_request->req_count, - pml_request->req_datatype, - pml_request->req_peer, - pml_request->req_tag, - pml_request->req_comm, - &request); - break; - default: - rc = OMPI_ERR_REQUEST; - break; - } - OPAL_THREAD_UNLOCK(&ompi_request_lock); - if(OMPI_SUCCESS != rc) - return rc; - pml_request = (mca_pml_base_request_t*)request; - requests[i] = request; - break; - } - default: - return OMPI_ERR_REQUEST; - } - - /* start the request */ - switch(pml_request->req_type) { - case MCA_PML_REQUEST_SEND: - { - mca_pml_bfo_send_request_t* sendreq = (mca_pml_bfo_send_request_t*)pml_request; - MEMCHECKER( - memchecker_call(&opal_memchecker_base_isdefined, - pml_request->req_addr, pml_request->req_count, - pml_request->req_datatype); - ); - if( reuse_old_request && (sendreq->req_send.req_bytes_packed != 0) ) { - size_t offset = 0; - /** - * Reset the convertor in case we're dealing with the original - * request, which when completed do not reset the convertor. - */ - opal_convertor_set_position( &sendreq->req_send.req_base.req_convertor, - &offset ); - } - MCA_PML_BFO_SEND_REQUEST_START(sendreq, rc); - if(rc != OMPI_SUCCESS) - return rc; - break; - } - case MCA_PML_REQUEST_RECV: - { - mca_pml_bfo_recv_request_t* recvreq = (mca_pml_bfo_recv_request_t*)pml_request; - MCA_PML_BFO_RECV_REQUEST_START(recvreq); - break; - } - default: - return OMPI_ERR_REQUEST; - } - } - return OMPI_SUCCESS; -} - diff --git a/ompi/mca/pml/bfo/post_configure.sh b/ompi/mca/pml/bfo/post_configure.sh deleted file mode 100644 index 77a7d52608a..00000000000 --- a/ompi/mca/pml/bfo/post_configure.sh +++ /dev/null @@ -1 +0,0 @@ -DIRECT_CALL_HEADER="ompi/mca/pml/bfo/pml_bfo.h" diff --git a/ompi/mca/pml/cm/Makefile.am b/ompi/mca/pml/cm/Makefile.am index 80acfd31e1f..28ad04fb5dc 100644 --- a/ompi/mca/pml/cm/Makefile.am +++ b/ompi/mca/pml/cm/Makefile.am @@ -1,13 +1,13 @@ # # Copyright (c) 2004-2006 The Regents of the University of California. # All rights reserved. -# Copyright (c) 2009 High Performance Computing Center Stuttgart, +# Copyright (c) 2009 High Performance Computing Center Stuttgart, # University of Stuttgart. All rights reserved. # Copyright (c) 2010 Cisco Systems, Inc. All rights reserved. # $COPYRIGHT$ -# +# # Additional copyrights may follow -# +# # $HEADER$ # diff --git a/ompi/mca/pml/cm/pml_cm.h b/ompi/mca/pml/cm/pml_cm.h index 96e8ecc3bde..ba055c474ea 100644 --- a/ompi/mca/pml/cm/pml_cm.h +++ b/ompi/mca/pml/cm/pml_cm.h @@ -4,6 +4,8 @@ * Copyright (c) 2004-2007 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -14,6 +16,10 @@ #ifndef PML_CM_H #define PML_CM_H +#ifdef HAVE_ALLOCA_H +#include +#endif + #include "ompi_config.h" #include "ompi/request/request.h" #include "ompi/mca/pml/pml.h" @@ -36,11 +42,11 @@ BEGIN_C_DECLS struct mca_mtl_request_t; -/* Array of send completion callback - one per send type - * These are called internally by the library when the send - * is completed from its perspective. +/* Array of send completion callback - one per send type + * These are called internally by the library when the send + * is completed from its perspective. */ -extern void (*send_completion_callbacks[]) +extern void (*send_completion_callbacks[]) (struct mca_mtl_request_t *mtl_request); struct ompi_pml_cm_t { @@ -48,7 +54,6 @@ struct ompi_pml_cm_t { int free_list_num; int free_list_max; int free_list_inc; - int default_priority; }; typedef struct ompi_pml_cm_t ompi_pml_cm_t; extern ompi_pml_cm_t ompi_pml_cm; @@ -77,13 +82,13 @@ mca_pml_cm_irecv_init(void *addr, #if OPAL_ENABLE_HETEROGENEOUS_SUPPORT ompi_proc_t* ompi_proc; #endif - + MCA_PML_CM_HVY_RECV_REQUEST_ALLOC(recvreq); if( OPAL_UNLIKELY(NULL == recvreq) ) return OMPI_ERR_OUT_OF_RESOURCE; - - MCA_PML_CM_HVY_RECV_REQUEST_INIT(recvreq, ompi_proc, comm, tag, src, - datatype, addr, count, true); - + + MCA_PML_CM_HVY_RECV_REQUEST_INIT(recvreq, ompi_proc, comm, tag, src, + datatype, addr, count, true); + *request = (ompi_request_t*) recvreq; return OMPI_SUCCESS; @@ -106,7 +111,7 @@ mca_pml_cm_irecv(void *addr, MCA_PML_CM_THIN_RECV_REQUEST_ALLOC(recvreq); if( OPAL_UNLIKELY(NULL == recvreq) ) return OMPI_ERR_OUT_OF_RESOURCE; - + MCA_PML_CM_THIN_RECV_REQUEST_INIT(recvreq, ompi_proc, comm, @@ -114,7 +119,7 @@ mca_pml_cm_irecv(void *addr, datatype, addr, count); - + MCA_PML_CM_THIN_RECV_REQUEST_START(recvreq, comm, tag, src, ret); if( OPAL_LIKELY(OMPI_SUCCESS == ret) ) *request = (ompi_request_t*) recvreq; @@ -122,7 +127,7 @@ mca_pml_cm_irecv(void *addr, return ret; } -__opal_attribute_always_inline__ static inline void +__opal_attribute_always_inline__ static inline void mca_pml_cm_recv_fast_completion(struct mca_mtl_request_t *mtl_request) { // Do nothing! @@ -207,7 +212,7 @@ mca_pml_cm_recv(void *addr, } __opal_attribute_always_inline__ static inline int -mca_pml_cm_isend_init(void* buf, +mca_pml_cm_isend_init(const void* buf, size_t count, ompi_datatype_t* datatype, int dst, @@ -220,20 +225,26 @@ mca_pml_cm_isend_init(void* buf, #if OPAL_ENABLE_HETEROGENEOUS_SUPPORT ompi_proc_t* ompi_proc; #endif - + MCA_PML_CM_HVY_SEND_REQUEST_ALLOC(sendreq, comm, dst, ompi_proc); if (OPAL_UNLIKELY(NULL == sendreq)) return OMPI_ERR_OUT_OF_RESOURCE; - - MCA_PML_CM_HVY_SEND_REQUEST_INIT(sendreq, ompi_proc, comm, tag, dst, + + MCA_PML_CM_HVY_SEND_REQUEST_INIT(sendreq, ompi_proc, comm, tag, dst, datatype, sendmode, true, false, buf, count); - + + /* Work around a leak in start by marking this request as complete. The + * problem occured because we do not have a way to differentiate an + * inital request and an incomplete pml request in start. This line + * allows us to detect this state. */ + sendreq->req_send.req_base.req_pml_complete = true; + *request = (ompi_request_t*) sendreq; return OMPI_SUCCESS; } __opal_attribute_always_inline__ static inline int -mca_pml_cm_isend(void* buf, +mca_pml_cm_isend(const void* buf, size_t count, ompi_datatype_t* datatype, int dst, @@ -243,68 +254,68 @@ mca_pml_cm_isend(void* buf, ompi_request_t** request) { int ret; - - if(sendmode == MCA_PML_BASE_SEND_BUFFERED ) { + + if(sendmode == MCA_PML_BASE_SEND_BUFFERED ) { mca_pml_cm_hvy_send_request_t* sendreq; #if OPAL_ENABLE_HETEROGENEOUS_SUPPORT ompi_proc_t* ompi_proc = NULL; #endif - + MCA_PML_CM_HVY_SEND_REQUEST_ALLOC(sendreq, comm, dst, ompi_proc); if (OPAL_UNLIKELY(NULL == sendreq)) return OMPI_ERR_OUT_OF_RESOURCE; - - MCA_PML_CM_HVY_SEND_REQUEST_INIT(sendreq, - ompi_proc, - comm, - tag, - dst, + + MCA_PML_CM_HVY_SEND_REQUEST_INIT(sendreq, + ompi_proc, + comm, + tag, + dst, datatype, sendmode, false, false, - buf, + buf, count); - + MCA_PML_CM_HVY_SEND_REQUEST_START( sendreq, ret); - + if (OPAL_LIKELY(OMPI_SUCCESS == ret)) *request = (ompi_request_t*) sendreq; - } else { + } else { mca_pml_cm_thin_send_request_t* sendreq; #if OPAL_ENABLE_HETEROGENEOUS_SUPPORT ompi_proc_t* ompi_proc = NULL; #endif MCA_PML_CM_THIN_SEND_REQUEST_ALLOC(sendreq, comm, dst, ompi_proc); if (OPAL_UNLIKELY(NULL == sendreq)) return OMPI_ERR_OUT_OF_RESOURCE; - - MCA_PML_CM_THIN_SEND_REQUEST_INIT(sendreq, - ompi_proc, - comm, - tag, - dst, + + MCA_PML_CM_THIN_SEND_REQUEST_INIT(sendreq, + ompi_proc, + comm, + tag, + dst, datatype, sendmode, - buf, + buf, count); - + MCA_PML_CM_THIN_SEND_REQUEST_START( - sendreq, + sendreq, comm, tag, dst, sendmode, - false, + false, ret); - + if (OPAL_LIKELY(OMPI_SUCCESS == ret)) *request = (ompi_request_t*) sendreq; - + } - + return ret; } __opal_attribute_always_inline__ static inline int -mca_pml_cm_send(void *buf, +mca_pml_cm_send(const void *buf, size_t count, ompi_datatype_t* datatype, int dst, @@ -315,17 +326,17 @@ mca_pml_cm_send(void *buf, int ret = OMPI_ERROR; ompi_proc_t * ompi_proc; - if(sendmode == MCA_PML_BASE_SEND_BUFFERED) { + if(sendmode == MCA_PML_BASE_SEND_BUFFERED) { mca_pml_cm_hvy_send_request_t *sendreq; MCA_PML_CM_HVY_SEND_REQUEST_ALLOC(sendreq, comm, dst, ompi_proc); if (OPAL_UNLIKELY(NULL == sendreq)) return OMPI_ERR_OUT_OF_RESOURCE; - + MCA_PML_CM_HVY_SEND_REQUEST_INIT(sendreq, ompi_proc, comm, tag, - dst, + dst, datatype, sendmode, false, @@ -337,18 +348,18 @@ mca_pml_cm_send(void *buf, MCA_PML_CM_HVY_SEND_REQUEST_RETURN(sendreq); return ret; } - + ompi_request_free( (ompi_request_t**)&sendreq ); - } else { + } else { opal_convertor_t convertor; OBJ_CONSTRUCT(&convertor, opal_convertor_t); #if !(OPAL_ENABLE_HETEROGENEOUS_SUPPORT) if (opal_datatype_is_contiguous_memory_layout(&datatype->super, count)) { - + convertor.remoteArch = ompi_mpi_local_convertor->remoteArch; convertor.flags = ompi_mpi_local_convertor->flags; convertor.master = ompi_mpi_local_convertor->master; - + convertor.local_size = count * datatype->super.size; convertor.pBaseBuf = (unsigned char*)buf + datatype->super.true_lb; convertor.count = count; @@ -362,16 +373,16 @@ mca_pml_cm_send(void *buf, &datatype->super, count, buf, 0, &convertor); } - - ret = OMPI_MTL_CALL(send(ompi_mtl, - comm, - dst, - tag, + + ret = OMPI_MTL_CALL(send(ompi_mtl, + comm, + dst, + tag, &convertor, sendmode)); OBJ_DESTRUCT(&convertor); } - + return ret; } @@ -456,7 +467,7 @@ mca_pml_cm_imrecv(void *buf, MCA_PML_CM_THIN_RECV_REQUEST_ALLOC(recvreq); if( OPAL_UNLIKELY(NULL == recvreq) ) return OMPI_ERR_OUT_OF_RESOURCE; - + MCA_PML_CM_THIN_RECV_REQUEST_INIT(recvreq, ompi_proc, comm, @@ -464,7 +475,7 @@ mca_pml_cm_imrecv(void *buf, datatype, buf, count); - + MCA_PML_CM_THIN_RECV_REQUEST_MATCHED_START(recvreq, message, ret); if( OPAL_LIKELY(OMPI_SUCCESS == ret) ) *request = (ompi_request_t*) recvreq; @@ -491,13 +502,13 @@ mca_pml_cm_mrecv(void *buf, MCA_PML_CM_THIN_RECV_REQUEST_INIT(recvreq, ompi_proc, - comm, + comm, (*message)->peer, datatype, buf, count); - - MCA_PML_CM_THIN_RECV_REQUEST_MATCHED_START(recvreq, + + MCA_PML_CM_THIN_RECV_REQUEST_MATCHED_START(recvreq, message, ret); if( OPAL_UNLIKELY(OMPI_SUCCESS != ret) ) { MCA_PML_CM_THIN_RECV_REQUEST_RETURN(recvreq); diff --git a/ompi/mca/pml/cm/pml_cm_component.c b/ompi/mca/pml/cm/pml_cm_component.c index f5e932f7d0a..72f79312cf9 100644 --- a/ompi/mca/pml/cm/pml_cm_component.c +++ b/ompi/mca/pml/cm/pml_cm_component.c @@ -61,7 +61,7 @@ mca_pml_base_component_2_0_0_t mca_pml_cm_component = { .pmlm_finalize = mca_pml_cm_component_fini, }; -/* Array of send completion callback - one per send type +/* Array of send completion callback - one per send type * These are called internally by the library when the send * is completed from its perspective. */ @@ -101,14 +101,6 @@ mca_pml_cm_component_register(void) MCA_BASE_VAR_SCOPE_READONLY, &ompi_pml_cm.free_list_inc); - ompi_pml_cm.default_priority = 10; - (void) mca_base_component_var_register(&mca_pml_cm_component.pmlm_version, "priority", - "CM PML selection priority", - MCA_BASE_VAR_TYPE_INT, NULL, 0, 0, - OPAL_INFO_LVL_9, - MCA_BASE_VAR_SCOPE_READONLY, - &ompi_pml_cm.default_priority); - return OPAL_SUCCESS; } @@ -124,7 +116,7 @@ mca_pml_cm_component_open(void) ret = OPAL_ERR_NOT_AVAILABLE; } } - + return ret; } @@ -143,36 +135,25 @@ mca_pml_cm_component_init(int* priority, { int ret; - if((*priority) > ompi_pml_cm.default_priority) { - *priority = ompi_pml_cm.default_priority; - return NULL; - } - *priority = ompi_pml_cm.default_priority; - opal_output_verbose( 10, 0, + *priority = -1; + + opal_output_verbose( 10, 0, "in cm pml priority is %d\n", *priority); /* find a useable MTL */ - ret = ompi_mtl_base_select(enable_progress_threads, enable_mpi_threads); - if (OMPI_SUCCESS != ret) { - *priority = -1; + ret = ompi_mtl_base_select(enable_progress_threads, enable_mpi_threads, priority); + if (OMPI_SUCCESS != ret) { return NULL; - } else if((strcmp(ompi_mtl_base_selected_component->mtl_version.mca_component_name, "psm") == 0) || - (strcmp(ompi_mtl_base_selected_component->mtl_version.mca_component_name, "mxm") == 0) || - (strcmp(ompi_mtl_base_selected_component->mtl_version.mca_component_name, "ofi") == 0) || - (strcmp(ompi_mtl_base_selected_component->mtl_version.mca_component_name, "portals4") == 0)) { - /* - * If MTL is MXM or PSM then up our priority - * For every other communication layer having MTLs and BTLs, the user/admin - * may still select PML/ob1 (BTLs) or PML/cm (MTLs) if preferable for the app/site. - */ - *priority = 30; } - + if (ompi_mtl->mtl_flags & MCA_MTL_BASE_FLAG_REQUIRE_WORLD) { + ompi_pml_cm.super.pml_flags |= MCA_PML_BASE_FLAG_REQUIRE_WORLD; + } + /* update our tag / context id max values based on MTL information */ ompi_pml_cm.super.pml_max_contextid = ompi_mtl->mtl_max_contextid; ompi_pml_cm.super.pml_max_tag = ompi_mtl->mtl_max_tag; - + return &ompi_pml_cm.super; } diff --git a/ompi/mca/pml/cm/pml_cm_component.h b/ompi/mca/pml/cm/pml_cm_component.h index 559707042de..fc5cf233b72 100644 --- a/ompi/mca/pml/cm/pml_cm_component.h +++ b/ompi/mca/pml/cm/pml_cm_component.h @@ -5,16 +5,16 @@ * Copyright (c) 2004-2006 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ /** - * @file + * @file */ #ifndef MCA_PML_CM_COMPONENT_H diff --git a/ompi/mca/pml/cm/pml_cm_recvreq.c b/ompi/mca/pml/cm/pml_cm_recvreq.c index a2eeac13d62..707666c6aac 100644 --- a/ompi/mca/pml/cm/pml_cm_recvreq.c +++ b/ompi/mca/pml/cm/pml_cm_recvreq.c @@ -5,7 +5,7 @@ * Copyright (c) 2004-2007 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2006 The Regents of the University of California. * All rights reserved. @@ -24,25 +24,22 @@ static int mca_pml_cm_recv_request_free(struct ompi_request_t** request) { - mca_pml_cm_request_t* recvreq = *(mca_pml_cm_request_t**)request; - + mca_pml_cm_request_t* recvreq = *(mca_pml_cm_request_t**)request; + assert( false == recvreq->req_free_called ); - - OPAL_THREAD_LOCK(&ompi_request_lock); + recvreq->req_free_called = true; if( true == recvreq->req_pml_complete ) { if( MCA_PML_CM_REQUEST_RECV_THIN == recvreq->req_pml_type ) { MCA_PML_CM_THIN_RECV_REQUEST_RETURN((mca_pml_cm_hvy_recv_request_t*)recvreq ); } else { MCA_PML_CM_HVY_RECV_REQUEST_RETURN((mca_pml_cm_hvy_recv_request_t*)recvreq ); - } + } } - OPAL_THREAD_UNLOCK(&ompi_request_lock); - *request = MPI_REQUEST_NULL; return OMPI_SUCCESS; -} +} void mca_pml_cm_recv_request_completion(struct mca_mtl_request_t *mtl_request) @@ -56,7 +53,7 @@ void mca_pml_cm_recv_request_completion(struct mca_mtl_request_t *mtl_request) } } -static void +static void mca_pml_cm_recv_request_construct(mca_pml_cm_thin_recv_request_t* recvreq) { recvreq->req_base.req_ompi.req_free = mca_pml_cm_recv_request_free; diff --git a/ompi/mca/pml/cm/pml_cm_recvreq.h b/ompi/mca/pml/cm/pml_cm_recvreq.h index 1a9501eaafd..9dd3319cfec 100644 --- a/ompi/mca/pml/cm/pml_cm_recvreq.h +++ b/ompi/mca/pml/cm/pml_cm_recvreq.h @@ -6,7 +6,7 @@ * Copyright (c) 2004-2013 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2006 The Regents of the University of California. * All rights reserved. @@ -44,7 +44,7 @@ struct mca_pml_cm_hvy_recv_request_t { size_t req_bytes_packed; /**< packed size of a message given the datatype and count */ bool req_blocking; mca_mtl_request_t req_mtl; /**< the mtl specific memory. This field should be the last in the struct */ -}; +}; typedef struct mca_pml_cm_hvy_recv_request_t mca_pml_cm_hvy_recv_request_t; OBJ_CLASS_DECLARATION(mca_pml_cm_hvy_recv_request_t); @@ -297,7 +297,7 @@ do { \ do { \ ompi_request_complete( &(recvreq->req_base.req_ompi), true ); \ } while (0) - + /** * Return a recv request to the modules free list. @@ -308,15 +308,12 @@ do { \ do { \ assert( false == recvreq->req_base.req_pml_complete ); \ \ - OPAL_THREAD_LOCK(&ompi_request_lock); \ - \ if( true == recvreq->req_base.req_free_called ) { \ MCA_PML_CM_THIN_RECV_REQUEST_RETURN( recvreq ); \ } else { \ recvreq->req_base.req_pml_complete = true; \ ompi_request_complete( &(recvreq->req_base.req_ompi), true ); \ } \ - OPAL_THREAD_UNLOCK(&ompi_request_lock); \ } while(0) @@ -331,8 +328,6 @@ do { \ do { \ assert( false == recvreq->req_base.req_pml_complete ); \ \ - OPAL_THREAD_LOCK(&ompi_request_lock); \ - \ if( true == recvreq->req_base.req_free_called ) { \ MCA_PML_CM_HVY_RECV_REQUEST_RETURN( recvreq ); \ } else { \ @@ -345,7 +340,6 @@ do { \ recvreq->req_base.req_pml_complete = true; \ ompi_request_complete( &(recvreq->req_base.req_ompi), true ); \ } \ - OPAL_THREAD_UNLOCK(&ompi_request_lock); \ } while(0) diff --git a/ompi/mca/pml/cm/pml_cm_request.c b/ompi/mca/pml/cm/pml_cm_request.c index feab34e23fc..df029595970 100644 --- a/ompi/mca/pml/cm/pml_cm_request.c +++ b/ompi/mca/pml/cm/pml_cm_request.c @@ -5,7 +5,7 @@ * Copyright (c) 2004-2006 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2006 The Regents of the University of California. * All rights reserved. @@ -22,16 +22,16 @@ #include "pml_cm_request.h" -static void mca_pml_cm_request_construct( mca_pml_cm_request_t* req) { +static void mca_pml_cm_request_construct( mca_pml_cm_request_t* req) { OBJ_CONSTRUCT(&req->req_convertor, opal_convertor_t); req->req_ompi.req_type = OMPI_REQUEST_PML; } -static void mca_pml_cm_request_destruct( mca_pml_cm_request_t* req) { +static void mca_pml_cm_request_destruct( mca_pml_cm_request_t* req) { OBJ_DESTRUCT(&req->req_convertor); } -OBJ_CLASS_INSTANCE(mca_pml_cm_request_t, - ompi_request_t, +OBJ_CLASS_INSTANCE(mca_pml_cm_request_t, + ompi_request_t, mca_pml_cm_request_construct, mca_pml_cm_request_destruct); diff --git a/ompi/mca/pml/cm/pml_cm_request.h b/ompi/mca/pml/cm/pml_cm_request.h index 3451da375bc..f0605f94a12 100644 --- a/ompi/mca/pml/cm/pml_cm_request.h +++ b/ompi/mca/pml/cm/pml_cm_request.h @@ -2,10 +2,10 @@ * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. - * Copyright (c) 2004-2007 The University of Tennessee and The University + * Copyright (c) 2004-2016 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2006 The Regents of the University of California. * All rights reserved. @@ -36,17 +36,17 @@ typedef enum { } mca_pml_cm_request_type_t; /** - * Base type for PML CM P2P requests + * Base type for PML CM P2P requests */ -struct mca_pml_cm_request_t { +struct mca_pml_cm_request_t { /* START: These fields have to match the definition of the mca_pml_base_request_t */ ompi_request_t req_ompi; /**< base request */ - volatile bool req_pml_complete; /**< flag indicating if the pt-2-pt layer is done with this request */ + volatile int32_t req_pml_complete; /**< flag indicating if the pt-2-pt layer is done with this request */ + volatile int32_t req_free_called; /**< flag indicating if the user has freed this request */ mca_pml_cm_request_type_t req_pml_type; struct ompi_communicator_t *req_comm; /**< communicator pointer */ struct ompi_datatype_t *req_datatype; /**< pointer to data type */ - volatile bool req_free_called; /**< flag indicating if the user has freed this request */ opal_convertor_t req_convertor; /**< convertor that describes the memory layout */ /* END: These fields have to match the definition of the mca_pml_base_request_t */ }; diff --git a/ompi/mca/pml/cm/pml_cm_sendreq.c b/ompi/mca/pml/cm/pml_cm_sendreq.c index abb4f33fc84..8d0f3bad90f 100644 --- a/ompi/mca/pml/cm/pml_cm_sendreq.c +++ b/ompi/mca/pml/cm/pml_cm_sendreq.c @@ -5,7 +5,7 @@ * Copyright (c) 2004-2007 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2006 The Regents of the University of California. * All rights reserved. @@ -34,8 +34,7 @@ mca_pml_cm_send_request_free(struct ompi_request_t** request) { mca_pml_cm_send_request_t* sendreq = *(mca_pml_cm_send_request_t**)request; assert( false == sendreq->req_base.req_free_called ); - - OPAL_THREAD_LOCK(&ompi_request_lock); + sendreq->req_base.req_free_called = true; if( true == sendreq->req_base.req_pml_complete ) { if( MCA_PML_CM_REQUEST_SEND_THIN == sendreq->req_base.req_pml_type ) { @@ -44,9 +43,7 @@ mca_pml_cm_send_request_free(struct ompi_request_t** request) MCA_PML_CM_HVY_SEND_REQUEST_RETURN( ((mca_pml_cm_hvy_send_request_t*) sendreq) ); } } - - OPAL_THREAD_UNLOCK(&ompi_request_lock); - + *request = MPI_REQUEST_NULL; return OMPI_SUCCESS; } @@ -54,7 +51,7 @@ mca_pml_cm_send_request_free(struct ompi_request_t** request) void mca_pml_cm_send_request_completion(struct mca_mtl_request_t *mtl_request) { - mca_pml_cm_send_request_t *base_request = + mca_pml_cm_send_request_t *base_request = (mca_pml_cm_send_request_t*) mtl_request->ompi_req; if( MCA_PML_CM_REQUEST_SEND_THIN == base_request->req_base.req_pml_type ) { MCA_PML_CM_THIN_SEND_REQUEST_PML_COMPLETE(((mca_pml_cm_thin_send_request_t*) base_request)); @@ -70,17 +67,17 @@ static void mca_pml_cm_send_request_construct(mca_pml_cm_hvy_send_request_t* sen sendreq->req_send.req_base.req_ompi.req_cancel = mca_pml_cm_cancel; } -OBJ_CLASS_INSTANCE(mca_pml_cm_send_request_t, - mca_pml_cm_request_t, +OBJ_CLASS_INSTANCE(mca_pml_cm_send_request_t, + mca_pml_cm_request_t, NULL, NULL); -OBJ_CLASS_INSTANCE(mca_pml_cm_thin_send_request_t, - mca_pml_cm_send_request_t, - mca_pml_cm_send_request_construct, +OBJ_CLASS_INSTANCE(mca_pml_cm_thin_send_request_t, + mca_pml_cm_send_request_t, + mca_pml_cm_send_request_construct, NULL); -OBJ_CLASS_INSTANCE(mca_pml_cm_hvy_send_request_t, +OBJ_CLASS_INSTANCE(mca_pml_cm_hvy_send_request_t, mca_pml_cm_send_request_t, mca_pml_cm_send_request_construct, NULL); diff --git a/ompi/mca/pml/cm/pml_cm_sendreq.h b/ompi/mca/pml/cm/pml_cm_sendreq.h index d699c2a25ba..d0c22a9cea2 100644 --- a/ompi/mca/pml/cm/pml_cm_sendreq.h +++ b/ompi/mca/pml/cm/pml_cm_sendreq.h @@ -6,12 +6,14 @@ * Copyright (c) 2004-2013 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2006 The Regents of the University of California. * All rights reserved. * Copyright (c) 2015 Los Alamos National Security, LLC. All rights * reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -29,7 +31,7 @@ #include "ompi/mca/mtl/mtl.h" #include "opal/prefetch.h" -struct mca_pml_cm_send_request_t { +struct mca_pml_cm_send_request_t { mca_pml_cm_request_t req_base; mca_pml_base_send_mode_t req_send_mode; }; @@ -37,7 +39,7 @@ typedef struct mca_pml_cm_send_request_t mca_pml_cm_send_request_t; OBJ_CLASS_DECLARATION(mca_pml_cm_send_request_t); -struct mca_pml_cm_thin_send_request_t { +struct mca_pml_cm_thin_send_request_t { mca_pml_cm_send_request_t req_send; mca_mtl_request_t req_mtl; /**< the mtl specific memory. This field should be the last in the struct */ }; @@ -47,7 +49,7 @@ OBJ_CLASS_DECLARATION(mca_pml_cm_thin_send_request_t); struct mca_pml_cm_hvy_send_request_t { mca_pml_cm_send_request_t req_send; - void *req_addr; /**< pointer to application buffer */ + const void *req_addr; /**< pointer to application buffer */ size_t req_count; /**< count of user datatype elements */ int32_t req_peer; /**< peer process - rank w/in this communicator */ int32_t req_tag; /**< user defined tag */ @@ -314,7 +316,7 @@ do { \ #define MCA_PML_CM_SEND_REQUEST_START_SETUP(req_send) \ do { \ (req_send)->req_base.req_pml_complete = false; \ - (req_send)->req_base.req_ompi.req_complete = false; \ + (req_send)->req_base.req_ompi.req_complete = REQUEST_PENDING; \ (req_send)->req_base.req_ompi.req_state = \ OMPI_REQUEST_ACTIVE; \ (req_send)->req_base.req_ompi.req_status._cancelled = 0; \ @@ -365,7 +367,7 @@ do { \ } \ } \ } while(0); - + #define MCA_PML_CM_HVY_SEND_REQUEST_START(sendreq, ret) \ do { \ @@ -394,7 +396,7 @@ do { /* * The PML has completed a send request. Note that this request * may have been orphaned by the user or have already completed - * at the MPI level. + * at the MPI level. * This macro will never be called directly from the upper level, as it should * only be an internal call to the PML. */ @@ -407,8 +409,7 @@ do { mca_pml_base_bsend_request_free(sendreq->req_buff); \ } \ \ - OPAL_THREAD_LOCK(&ompi_request_lock); \ - if( false == sendreq->req_send.req_base.req_ompi.req_complete ) { \ + if( !REQUEST_COMPLETE(&sendreq->req_send.req_base.req_ompi)) { \ /* Should only be called for long messages (maybe synchronous) */ \ ompi_request_complete(&(sendreq->req_send.req_base.req_ompi), true); \ } \ @@ -424,7 +425,6 @@ do { &offset); \ } \ } \ - OPAL_THREAD_UNLOCK(&ompi_request_lock); \ } while (0) @@ -445,7 +445,7 @@ do { /* * The PML has completed a send request. Note that this request * may have been orphaned by the user or have already completed - * at the MPI level. + * at the MPI level. * This macro will never be called directly from the upper level, as it should * only be an internal call to the PML. */ @@ -453,8 +453,7 @@ do { do { \ assert( false == sendreq->req_send.req_base.req_pml_complete ); \ \ - OPAL_THREAD_LOCK(&ompi_request_lock); \ - if( false == sendreq->req_send.req_base.req_ompi.req_complete ) { \ + if( !REQUEST_COMPLETE(&sendreq->req_send.req_base.req_ompi)) { \ /* Should only be called for long messages (maybe synchronous) */ \ ompi_request_complete(&(sendreq->req_send.req_base.req_ompi), true); \ } \ @@ -463,10 +462,9 @@ do { \ if( sendreq->req_send.req_base.req_free_called ) { \ MCA_PML_CM_THIN_SEND_REQUEST_RETURN( sendreq ); \ } \ - OPAL_THREAD_UNLOCK(&ompi_request_lock); \ } while (0) - - + + /* * Release resources associated with a request */ diff --git a/ompi/mca/pml/cm/pml_cm_start.c b/ompi/mca/pml/cm/pml_cm_start.c index b86891043df..326449961df 100644 --- a/ompi/mca/pml/cm/pml_cm_start.c +++ b/ompi/mca/pml/cm/pml_cm_start.c @@ -1,3 +1,4 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ /* * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana * University Research and Technology @@ -5,11 +6,13 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2006 The Regents of the University of California. * All rights reserved. * Copyright (c) 2006 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2016 Los Alamos National Security, LLC. All rights + * reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -32,87 +35,50 @@ int mca_pml_cm_start(size_t count, ompi_request_t** requests) { int rc; - size_t i; - for (i = 0 ; i < count ; i++) { - mca_pml_cm_request_t *pml_request = - (mca_pml_cm_request_t*)requests[i]; - if (OMPI_REQUEST_PML != requests[i]->req_type) { - continue; - } - if (NULL == pml_request) { + + for (size_t i = 0 ; i < count ; i++) { + mca_pml_cm_request_t *pml_request = (mca_pml_cm_request_t*)requests[i]; + if (OMPI_REQUEST_PML != requests[i]->req_type || NULL == pml_request) { continue; } - /* If the persistent request is currebtly active - obtain the - * request lock and verify the status is incomplete. if the - * pml layer has not completed the request - mark the request - * as free called - so that it will be freed when the request - * completes - and create a new request. - */ - switch (pml_request->req_ompi.req_state) { - case OMPI_REQUEST_INACTIVE: - if (pml_request->req_pml_complete == true) - break; - - case OMPI_REQUEST_ACTIVE: { - /* otherwise fall through */ - ompi_request_t *request; - - OPAL_THREAD_LOCK(&ompi_request_lock); - if (pml_request->req_pml_complete == false) { - /* free request after it completes */ - pml_request->req_free_called = true; - } else { - /* can reuse the existing request */ - OPAL_THREAD_UNLOCK(&ompi_request_lock); - break; - } - - /* allocate a new request */ - switch (pml_request->req_pml_type) { - case MCA_PML_CM_REQUEST_SEND_HEAVY: { - mca_pml_cm_hvy_send_request_t* sendreq = (mca_pml_cm_hvy_send_request_t*) pml_request; - rc = mca_pml_cm_isend_init( sendreq->req_addr, - sendreq->req_count, - sendreq->req_send.req_base.req_datatype, - sendreq->req_peer, - sendreq->req_tag, - sendreq->req_send.req_send_mode, - sendreq->req_send.req_base.req_comm, - &request ); - break; - } - case MCA_PML_CM_REQUEST_RECV_HEAVY: { - mca_pml_cm_hvy_recv_request_t* recvreq = (mca_pml_cm_hvy_recv_request_t*) pml_request; - rc = mca_pml_cm_irecv_init( recvreq->req_addr, - recvreq->req_count, - recvreq->req_base.req_datatype, - recvreq->req_peer, - recvreq->req_tag, - recvreq->req_base.req_comm, - &request ); - break; - } - default: - rc = OMPI_ERR_REQUEST; - break; - } - OPAL_THREAD_UNLOCK(&ompi_request_lock); - if(OMPI_SUCCESS != rc) - return rc; - pml_request = (mca_pml_cm_request_t*)request; - requests[i] = request; - break; - } - default: - return OMPI_ERR_REQUEST; - } - + /* start the request */ switch (pml_request->req_pml_type) { case MCA_PML_CM_REQUEST_SEND_HEAVY: { mca_pml_cm_hvy_send_request_t* sendreq = (mca_pml_cm_hvy_send_request_t*)pml_request; + if (!sendreq->req_send.req_base.req_pml_complete) { + ompi_request_t *request; + + /* buffered sends can be mpi complete and pml incomplete. to support this + * case we need to allocate a new request. */ + rc = mca_pml_cm_isend_init (sendreq->req_addr, + sendreq->req_count, + sendreq->req_send.req_base.req_datatype, + sendreq->req_peer, + sendreq->req_tag, + sendreq->req_send.req_send_mode, + sendreq->req_send.req_base.req_comm, + &request); + if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) { + return rc; + } + + /* copy the callback and callback data to the new requests */ + request->req_complete_cb = pml_request->req_ompi.req_complete_cb; + request->req_complete_cb_data = pml_request->req_ompi.req_complete_cb_data; + + /* ensure the old request gets released */ + pml_request->req_free_called = true; + + sendreq = (mca_pml_cm_hvy_send_request_t *) request; + requests[i] = request; + } + + /* reset the completion flag */ + sendreq->req_send.req_base.req_pml_complete = false; + MCA_PML_CM_HVY_SEND_REQUEST_START(sendreq, rc); if(rc != OMPI_SUCCESS) return rc; diff --git a/ompi/mca/pml/configure.m4 b/ompi/mca/pml/configure.m4 index 49efbc6b933..b8d5b8f9c99 100644 --- a/ompi/mca/pml/configure.m4 +++ b/ompi/mca/pml/configure.m4 @@ -3,9 +3,9 @@ # Copyright (c) 2013 Sandia National Laboratories. All rights reserved. # # $COPYRIGHT$ -# +# # Additional copyrights may follow -# +# # $HEADER$ # diff --git a/ompi/mca/pml/crcpw/Makefile.am b/ompi/mca/pml/crcpw/Makefile.am deleted file mode 100644 index 381c37fd35a..00000000000 --- a/ompi/mca/pml/crcpw/Makefile.am +++ /dev/null @@ -1,42 +0,0 @@ -# -# Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana -# University Research and Technology -# Corporation. All rights reserved. -# Copyright (c) 2004-2005 The University of Tennessee and The University -# of Tennessee Research Foundation. All rights -# reserved. -# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, -# University of Stuttgart. All rights reserved. -# Copyright (c) 2004-2005 The Regents of the University of California. -# All rights reserved. -# -# Copyright (c) 2010 Cisco Systems, Inc. All rights reserved. -# $COPYRIGHT$ -# -# Additional copyrights may follow -# -# $HEADER$ -# - -crcpw_sources = \ - pml_crcpw.h \ - pml_crcpw_component.c \ - pml_crcpw_module.c - -if MCA_BUILD_ompi_pml_crcpw_DSO -component_noinst = -component_install = mca_pml_crcpw.la -else -component_noinst = libmca_pml_crcpw.la -component_install = -endif - - -mcacomponentdir = $(ompilibdir) -mcacomponent_LTLIBRARIES = $(component_install) -mca_pml_crcpw_la_SOURCES = $(crcpw_sources) -mca_pml_crcpw_la_LDFLAGS = -module -avoid-version - -noinst_LTLIBRARIES = $(component_noinst) -libmca_pml_crcpw_la_SOURCES = $(crcpw_sources) -libmca_pml_crcpw_la_LDFLAGS = -module -avoid-version diff --git a/ompi/mca/pml/crcpw/configure.m4 b/ompi/mca/pml/crcpw/configure.m4 deleted file mode 100644 index b09529be4f5..00000000000 --- a/ompi/mca/pml/crcpw/configure.m4 +++ /dev/null @@ -1,28 +0,0 @@ -# -*- shell-script -*- -# -# Copyright (c) 2004-2010 The Trustees of Indiana University. -# All rights reserved. -# Copyright (c) 2004-2005 The Trustees of the University of Tennessee. -# All rights reserved. -# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, -# University of Stuttgart. All rights reserved. -# Copyright (c) 2004-2005 The Regents of the University of California. -# All rights reserved. -# Copyright (c) 2010 Cisco Systems, Inc. All rights reserved. -# $COPYRIGHT$ -# -# Additional copyrights may follow -# -# $HEADER$ -# - -# MCA_pml_crcpw_CONFIG([action-if-found], [action-if-not-found]) -# ----------------------------------------------------------- -AC_DEFUN([MCA_ompi_pml_crcpw_CONFIG],[ - AC_CONFIG_FILES([ompi/mca/pml/crcpw/Makefile]) - - # If we don't want FT, don't compile this component - AS_IF([test "$opal_want_ft_cr" = "1"], - [$1], - [$2]) -])dnl diff --git a/ompi/mca/pml/crcpw/owner.txt b/ompi/mca/pml/crcpw/owner.txt deleted file mode 100644 index 8ad5fc38ed2..00000000000 --- a/ompi/mca/pml/crcpw/owner.txt +++ /dev/null @@ -1,7 +0,0 @@ -# -# owner/status file -# owner: institution that is responsible for this package -# status: e.g. active, maintenance, unmaintained -# -owner: IU? -status: unmaintained diff --git a/ompi/mca/pml/crcpw/pml_crcpw.h b/ompi/mca/pml/crcpw/pml_crcpw.h deleted file mode 100644 index 5f6b8008362..00000000000 --- a/ompi/mca/pml/crcpw/pml_crcpw.h +++ /dev/null @@ -1,149 +0,0 @@ -/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ -/* - * Copyright (c) 2004-2009 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2007 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2006 The Regents of the University of California. - * All rights reserved. - * Copyright (c) 2015 Los Alamos National Security, LLC. All rights - * reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ -/** - * @file - */ - -#ifndef MCA_PML_CRCPW_H -#define MCA_PML_CRCPW_H - -#include "ompi_config.h" - -#include "opal/class/opal_free_list.h" -#include "ompi/request/request.h" -#include "ompi/mca/pml/pml.h" -#include "ompi/mca/pml/base/pml_base_request.h" -#include "ompi/mca/pml/base/pml_base_bsend.h" -#include "ompi/mca/pml/base/pml_base_sendreq.h" -#include "ompi/datatype/ompi_datatype.h" -#include "ompi/mca/crcp/crcp.h" -#include "ompi/mca/crcp/base/base.h" - -BEGIN_C_DECLS - - /** - * CRCPW PML module - */ - struct mca_pml_crcpw_component_t { - mca_pml_base_component_t super; - int verbose; - int priority; - int output_handle; - bool pml_crcp_wrapped; - }; - typedef struct mca_pml_crcpw_component_t mca_pml_crcpw_component_t; - OMPI_MODULE_DECLSPEC extern mca_pml_crcpw_component_t mca_pml_crcpw_component; - - struct mca_pml_crcpw_module_t { - mca_pml_base_module_t super; - mca_pml_base_component_t wrapped_pml_component; - mca_pml_base_module_t wrapped_pml_module; - }; - typedef struct mca_pml_crcpw_module_t mca_pml_crcpw_module_t; - extern mca_pml_crcpw_module_t mca_pml_crcpw_module; - - /* Free list of PML states */ - OMPI_MODULE_DECLSPEC extern opal_free_list_t pml_state_list; - OMPI_MODULE_DECLSPEC extern bool pml_crcpw_is_finalized; - - /* - * PML module functions. - */ - int mca_pml_crcpw_component_open(void); - int mca_pml_crcpw_component_close(void); - - mca_pml_base_module_t* mca_pml_crcpw_component_init(int *priority, - bool enable_progress_threads, - bool enable_mpi_threads - ); - - int mca_pml_crcpw_component_finalize(void); - - /* - * PML interface functions. - */ - int mca_pml_crcpw_enable( bool enable ); - - int mca_pml_crcpw_add_comm( struct ompi_communicator_t* comm ); - int mca_pml_crcpw_del_comm( struct ompi_communicator_t* comm ); - - int mca_pml_crcpw_add_procs( struct ompi_proc_t **procs, size_t nprocs ); - int mca_pml_crcpw_del_procs( struct ompi_proc_t **procs, size_t nprocs ); - - int mca_pml_crcpw_progress(void); - - int mca_pml_crcpw_iprobe(int dst, int tag, struct ompi_communicator_t* comm, int *matched, ompi_status_public_t* status ); - - int mca_pml_crcpw_probe( int dst, int tag, struct ompi_communicator_t* comm, ompi_status_public_t* status ); - - - int mca_pml_crcpw_improbe( int dst, - int tag, - struct ompi_communicator_t* comm, - int *matched, - struct ompi_message_t **message, - ompi_status_public_t* status ); - - int mca_pml_crcpw_mprobe( int dst, - int tag, - struct ompi_communicator_t* comm, - struct ompi_message_t **message, - ompi_status_public_t* status ); - - int mca_pml_crcpw_isend_init( void *buf, size_t count, ompi_datatype_t *datatype, int dst, int tag, - mca_pml_base_send_mode_t mode, struct ompi_communicator_t* comm, struct ompi_request_t **request ); - - int mca_pml_crcpw_isend( void *buf, size_t count, ompi_datatype_t *datatype, int dst, int tag, - mca_pml_base_send_mode_t mode, struct ompi_communicator_t* comm, struct ompi_request_t **request ); - - int mca_pml_crcpw_send( void *buf, size_t count, ompi_datatype_t *datatype, int dst, int tag, - mca_pml_base_send_mode_t mode, struct ompi_communicator_t* comm ); - - int mca_pml_crcpw_irecv_init( void *buf, size_t count, ompi_datatype_t *datatype, int src, int tag, - struct ompi_communicator_t* comm, struct ompi_request_t **request); - - int mca_pml_crcpw_irecv( void *buf, size_t count, ompi_datatype_t *datatype, int src, int tag, - struct ompi_communicator_t* comm, struct ompi_request_t **request ); - - int mca_pml_crcpw_recv( void *buf, size_t count, ompi_datatype_t *datatype, int src, int tag, - struct ompi_communicator_t* comm, ompi_status_public_t* status); - - int mca_pml_crcpw_imrecv( void *buf, - size_t count, - ompi_datatype_t *datatype, - struct ompi_message_t **message, - struct ompi_request_t **request ); - - int mca_pml_crcpw_mrecv( void *buf, - size_t count, - ompi_datatype_t *datatype, - struct ompi_message_t **message, - ompi_status_public_t* status ); - - int mca_pml_crcpw_dump( struct ompi_communicator_t* comm, int verbose ); - - int mca_pml_crcpw_start( size_t count, ompi_request_t** requests ); - - int mca_pml_crcpw_ft_event(int state); - -END_C_DECLS - -#endif /* MCA_PML_CRCPW_H */ diff --git a/ompi/mca/pml/crcpw/pml_crcpw_component.c b/ompi/mca/pml/crcpw/pml_crcpw_component.c deleted file mode 100644 index 64bbfe455d1..00000000000 --- a/ompi/mca/pml/crcpw/pml_crcpw_component.c +++ /dev/null @@ -1,192 +0,0 @@ -/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ -/* - * Copyright (c) 2004-2009 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2006 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2006 The Regents of the University of California. - * All rights reserved. - * Copyright (c) 2010 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2015 Los Alamos National Security, LLC. All rights - * reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#include "ompi_config.h" -#include "opal/runtime/opal.h" -#include "opal/util/output.h" -#include "opal/mca/event/event.h" -#include "opal/mca/btl/base/base.h" - -#include "mpi.h" -#include "ompi/mca/pml/pml.h" -#include "ompi/mca/pml/base/pml_base_bsend.h" -#include "ompi/mca/pml/crcpw/pml_crcpw.h" -#include "ompi/mca/bml/base/base.h" - -static int mca_pml_crcpw_component_register(void); - -mca_pml_crcpw_component_t mca_pml_crcpw_component = { - { - /* First, the mca_base_component_t struct containing meta - information about the component itself */ - - .pmlm_version = { - MCA_PML_BASE_VERSION_2_0_0, - - .mca_component_name = "crcpw", - MCA_BASE_MAKE_VERSION(component, OMPI_MAJOR_VERSION, OMPI_MINOR_VERSION, - OMPI_RELEASE_VERSION), - .mca_open_component = mca_pml_crcpw_component_open, - .mca_close_component = mca_pml_crcpw_component_close, - .mca_register_component_params = mca_pml_crcpw_component_register, - }, - .pmlm_data = { - /* The component is checkpoint ready */ - MCA_BASE_METADATA_PARAM_CHECKPOINT - }, - - .pmlm_init = mca_pml_crcpw_component_init, - .pmlm_finalize = mca_pml_crcpw_component_finalize, - }, - /* Verbosity */ - 0, - /* Priority */ - PML_SELECT_WRAPPER_PRIORITY, - /* Are we being used as a wrapper? */ - false -}; - -opal_free_list_t pml_state_list; -bool pml_crcpw_is_finalized = false; - -static int mca_pml_crcpw_component_register(void) -{ - /* - * Register some MCA parameters - */ - mca_pml_crcpw_component.priority = PML_SELECT_WRAPPER_PRIORITY; - (void) mca_base_component_var_register(&mca_pml_crcpw_component.super.pmlm_version, "priority", - "Priority of the PML crcpw component", - MCA_BASE_VAR_TYPE_INT, NULL, 0, 0, - OPAL_INFO_LVL_9, - MCA_BASE_VAR_SCOPE_READONLY, - &mca_pml_crcpw_component.priority); - - mca_pml_crcpw_component.verbose = 0; - (void) mca_base_component_var_register(&mca_pml_crcpw_component.super.pmlm_version, "verbose", - "Verbose level for the PML crcpw component", - MCA_BASE_VAR_TYPE_INT, NULL, 0, 0, - OPAL_INFO_LVL_9, - MCA_BASE_VAR_SCOPE_READONLY, - &mca_pml_crcpw_component.verbose); - - return OMPI_SUCCESS; -} - -int mca_pml_crcpw_component_open(void) -{ - opal_output_verbose( 10, mca_pml_crcpw_component.output_handle, - "pml:crcpw: component_open: Open"); - - mca_pml_crcpw_component.output_handle = opal_output_open(NULL); - if ( 0 != mca_pml_crcpw_component.verbose) { - opal_output_set_verbosity(mca_pml_crcpw_component.output_handle, - mca_pml_crcpw_component.verbose); - } - - /* - * Debug Output - */ - opal_output_verbose(10, mca_pml_crcpw_component.output_handle, - "pml:crcpw: open()"); - opal_output_verbose(20, mca_pml_crcpw_component.output_handle, - "pml:crcpw: open: priority = %d", - mca_pml_crcpw_component.priority); - opal_output_verbose(20, mca_pml_crcpw_component.output_handle, - "pml:crcpw: open: verbosity = %d", - mca_pml_crcpw_component.verbose); - - return OMPI_SUCCESS; -} - - -int mca_pml_crcpw_component_close(void) -{ - opal_output_verbose( 20, mca_pml_crcpw_component.output_handle, - "pml:crcpw: component_close: Close"); - - return OMPI_SUCCESS; -} - - -mca_pml_base_module_t* mca_pml_crcpw_component_init(int* priority, - bool enable_progress_threads, - bool enable_mpi_threads) -{ - /* We use the PML_SELECT_WRAPPER_PRIORITY to indicate when this - * component should wrap around what is already selected - * If it is not set to this seminal value, then we are doing a - * normal selection operation - */ - if(*priority == PML_SELECT_WRAPPER_PRIORITY ) { - opal_output_verbose( 20, mca_pml_crcpw_component.output_handle, - "pml:crcpw: component_init: Wrap the selected component %s", - mca_pml_base_selected_component.pmlm_version.mca_component_name); - - mca_pml_crcpw_module.wrapped_pml_component = mca_pml_base_selected_component; - mca_pml_crcpw_module.wrapped_pml_module = mca_pml; - mca_pml_crcpw_component.pml_crcp_wrapped = true; - - opal_output_verbose( 20, mca_pml_crcpw_component.output_handle, - "pml:crcpw: component_init: Initalize Wrapper"); - - OBJ_CONSTRUCT(&pml_state_list, opal_free_list_t); - opal_free_list_init (&pml_state_list, - sizeof(ompi_crcp_base_pml_state_t), - opal_cache_line_size, - OBJ_CLASS(ompi_crcp_base_pml_state_t), - 0,opal_cache_line_size, - 5, /* Initial number */ - -1, /* Max = Unlimited */ - 64, /* Increment by */ - NULL, 0, NULL, NULL, NULL); - } - else { - opal_output_verbose( 20, mca_pml_crcpw_component.output_handle, - "pml:crcpw: component_init: Priority %d", - mca_pml_crcpw_component.priority); - } - - - *priority = mca_pml_crcpw_component.priority; - - pml_crcpw_is_finalized = false; - - return &mca_pml_crcpw_module.super; -} - -int mca_pml_crcpw_component_finalize(void) -{ - opal_output_verbose( 20, mca_pml_crcpw_component.output_handle, - "pml:crcpw: component_finalize: Finalize"); - - OBJ_DESTRUCT(&pml_state_list); - - pml_crcpw_is_finalized = true; - - if(mca_pml_crcpw_component.pml_crcp_wrapped) { - return mca_pml_crcpw_module.wrapped_pml_component.pmlm_finalize(); - } - - return OMPI_SUCCESS; -} - diff --git a/ompi/mca/pml/crcpw/pml_crcpw_module.c b/ompi/mca/pml/crcpw/pml_crcpw_module.c deleted file mode 100644 index 51ed4442213..00000000000 --- a/ompi/mca/pml/crcpw/pml_crcpw_module.c +++ /dev/null @@ -1,842 +0,0 @@ -/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ -/* - * Copyright (c) 2004-2009 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2013 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2006 The Regents of the University of California. - * All rights reserved. - * Copyright (c) 2011 Sandia National Laboratories. All rights reserved. - * Copyright (c) 2015 Los Alamos National Security, LLC. All rights - * reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#include "ompi_config.h" - -#include -#include - -#include "opal/mca/btl/base/base.h" -#include "ompi/mca/pml/pml.h" -#include "ompi/mca/pml/crcpw/pml_crcpw.h" -#include "ompi/mca/bml/base/base.h" - -#include "opal/class/opal_free_list.h" - -mca_pml_crcpw_module_t mca_pml_crcpw_module = { - { - mca_pml_crcpw_add_procs, - mca_pml_crcpw_del_procs, - mca_pml_crcpw_enable, - mca_pml_crcpw_progress, - mca_pml_crcpw_add_comm, - mca_pml_crcpw_del_comm, - mca_pml_crcpw_irecv_init, - mca_pml_crcpw_irecv, - mca_pml_crcpw_recv, - mca_pml_crcpw_isend_init, - mca_pml_crcpw_isend, - mca_pml_crcpw_send, - mca_pml_crcpw_iprobe, - mca_pml_crcpw_probe, - mca_pml_crcpw_start, - mca_pml_crcpw_improbe, - mca_pml_crcpw_mprobe, - mca_pml_crcpw_imrecv, - mca_pml_crcpw_mrecv, - mca_pml_crcpw_dump, - mca_pml_crcpw_ft_event, - - 32768, - INT_MAX - } -}; - -#define PML_CRCP_STATE_ALLOC(pml_state) \ -do { \ - if( !pml_crcpw_is_finalized ) { \ - pml_state = (ompi_crcp_base_pml_state_t*) \ - opal_free_list_wait (&pml_state_list); \ - } \ -} while(0); - -#define PML_CRCP_STATE_RETURN(pml_state) \ -do { \ - if( !pml_crcpw_is_finalized ) { \ - opal_free_list_return (&pml_state_list, \ - (opal_free_list_item_t*)pml_state); \ - } \ -} while(0); - -int mca_pml_crcpw_enable(bool enable) -{ - int ret; - ompi_crcp_base_pml_state_t * pml_state = NULL; - - if( OPAL_UNLIKELY(NULL == ompi_crcp.pml_enable) ) { - return mca_pml_crcpw_module.wrapped_pml_module.pml_enable(enable); - } - - PML_CRCP_STATE_ALLOC(pml_state); - - pml_state->wrapped_pml_component = &(mca_pml_crcpw_module.wrapped_pml_component); - pml_state->wrapped_pml_module = &(mca_pml_crcpw_module.wrapped_pml_module); - - pml_state->state = OMPI_CRCP_PML_PRE; - pml_state = ompi_crcp.pml_enable(enable, pml_state); - if( OMPI_SUCCESS != pml_state->error_code) { - ret = pml_state->error_code; - PML_CRCP_STATE_RETURN(pml_state); - return ret; - } - - if( OMPI_CRCP_PML_SKIP != pml_state->state) { - if( OMPI_SUCCESS != (ret = mca_pml_crcpw_module.wrapped_pml_module.pml_enable(enable) ) ) { - PML_CRCP_STATE_RETURN(pml_state); - return ret; - } - } - - pml_state->state = OMPI_CRCP_PML_POST; - pml_state = ompi_crcp.pml_enable(enable, pml_state); - if( OMPI_SUCCESS != pml_state->error_code) { - ret = pml_state->error_code; - PML_CRCP_STATE_RETURN(pml_state); - return ret; - } - - PML_CRCP_STATE_RETURN(pml_state); - - return OMPI_SUCCESS; -} - -int mca_pml_crcpw_add_comm(ompi_communicator_t* comm) -{ - int ret; - ompi_crcp_base_pml_state_t * pml_state = NULL; - - if( OPAL_UNLIKELY(NULL == ompi_crcp.pml_add_comm) ) { - return mca_pml_crcpw_module.wrapped_pml_module.pml_add_comm(comm); - } - - PML_CRCP_STATE_ALLOC(pml_state); - - pml_state->wrapped_pml_component = &(mca_pml_crcpw_module.wrapped_pml_component); - pml_state->wrapped_pml_module = &(mca_pml_crcpw_module.wrapped_pml_module); - - pml_state->state = OMPI_CRCP_PML_PRE; - pml_state = ompi_crcp.pml_add_comm(comm, pml_state); - if( OMPI_SUCCESS != pml_state->error_code) { - ret = pml_state->error_code; - PML_CRCP_STATE_RETURN(pml_state); - return ret; - } - - if( OMPI_CRCP_PML_SKIP != pml_state->state) { - if( OMPI_SUCCESS != (ret = mca_pml_crcpw_module.wrapped_pml_module.pml_add_comm(comm) ) ) { - PML_CRCP_STATE_RETURN(pml_state); - return ret; - } - } - - pml_state->state = OMPI_CRCP_PML_POST; - pml_state = ompi_crcp.pml_add_comm(comm, pml_state); - if( OMPI_SUCCESS != pml_state->error_code) { - ret = pml_state->error_code; - PML_CRCP_STATE_RETURN(pml_state); - return ret; - } - - PML_CRCP_STATE_RETURN(pml_state); - - return OMPI_SUCCESS; -} - -int mca_pml_crcpw_del_comm(ompi_communicator_t* comm) -{ - int ret; - ompi_crcp_base_pml_state_t * pml_state = NULL; - - if( OPAL_UNLIKELY(NULL == ompi_crcp.pml_del_comm) ) { - return mca_pml_crcpw_module.wrapped_pml_module.pml_del_comm(comm); - } - - PML_CRCP_STATE_ALLOC(pml_state); - if( NULL == pml_state ) { - return mca_pml_crcpw_module.wrapped_pml_module.pml_del_comm(comm); - } - - pml_state->wrapped_pml_component = &(mca_pml_crcpw_module.wrapped_pml_component); - pml_state->wrapped_pml_module = &(mca_pml_crcpw_module.wrapped_pml_module); - - pml_state->state = OMPI_CRCP_PML_PRE; - pml_state = ompi_crcp.pml_del_comm(comm, pml_state); - if( OMPI_SUCCESS != pml_state->error_code) { - ret = pml_state->error_code; - PML_CRCP_STATE_RETURN(pml_state); - return ret; - } - - if( OMPI_CRCP_PML_SKIP != pml_state->state) { - if( OMPI_SUCCESS != (ret = mca_pml_crcpw_module.wrapped_pml_module.pml_del_comm(comm) ) ) { - PML_CRCP_STATE_RETURN(pml_state); - return ret; - } - } - - pml_state->state = OMPI_CRCP_PML_POST; - pml_state = ompi_crcp.pml_del_comm(comm, pml_state); - if( OMPI_SUCCESS != pml_state->error_code) { - ret = pml_state->error_code; - PML_CRCP_STATE_RETURN(pml_state); - return ret; - } - - PML_CRCP_STATE_RETURN(pml_state); - - return OMPI_SUCCESS; -} - -int mca_pml_crcpw_add_procs(ompi_proc_t** procs, size_t nprocs) -{ - int ret; - ompi_crcp_base_pml_state_t * pml_state = NULL; - - PML_CRCP_STATE_ALLOC(pml_state); - - pml_state->wrapped_pml_component = &(mca_pml_crcpw_module.wrapped_pml_component); - pml_state->wrapped_pml_module = &(mca_pml_crcpw_module.wrapped_pml_module); - - pml_state->state = OMPI_CRCP_PML_PRE; - pml_state = ompi_crcp.pml_add_procs(procs, nprocs, pml_state); - if( OMPI_SUCCESS != pml_state->error_code) { - ret = pml_state->error_code; - PML_CRCP_STATE_RETURN(pml_state); - return ret; - } - - if( OMPI_CRCP_PML_SKIP != pml_state->state) { - if( OMPI_SUCCESS != (ret = mca_pml_crcpw_module.wrapped_pml_module.pml_add_procs(procs, nprocs) ) ) { - PML_CRCP_STATE_RETURN(pml_state); - return ret; - } - } - - pml_state->state = OMPI_CRCP_PML_POST; - pml_state = ompi_crcp.pml_add_procs(procs, nprocs, pml_state); - if( OMPI_SUCCESS != pml_state->error_code) { - ret = pml_state->error_code; - PML_CRCP_STATE_RETURN(pml_state); - return ret; - } - - PML_CRCP_STATE_RETURN(pml_state); - - return OMPI_SUCCESS; -} - -int mca_pml_crcpw_del_procs(ompi_proc_t** procs, size_t nprocs) -{ - int ret; - ompi_crcp_base_pml_state_t * pml_state = NULL; - - PML_CRCP_STATE_ALLOC(pml_state); - - pml_state->wrapped_pml_component = &(mca_pml_crcpw_module.wrapped_pml_component); - pml_state->wrapped_pml_module = &(mca_pml_crcpw_module.wrapped_pml_module); - - pml_state->state = OMPI_CRCP_PML_PRE; - pml_state = ompi_crcp.pml_del_procs(procs, nprocs, pml_state); - if( OMPI_SUCCESS != pml_state->error_code) { - ret = pml_state->error_code; - PML_CRCP_STATE_RETURN(pml_state); - return ret; - } - - if( OMPI_CRCP_PML_SKIP != pml_state->state) { - if( OMPI_SUCCESS != (ret = mca_pml_crcpw_module.wrapped_pml_module.pml_del_procs(procs, nprocs) ) ) { - PML_CRCP_STATE_RETURN(pml_state); - return ret; - } - } - - pml_state->state = OMPI_CRCP_PML_POST; - pml_state = ompi_crcp.pml_del_procs(procs, nprocs, pml_state); - if( OMPI_SUCCESS != pml_state->error_code) { - ret = pml_state->error_code; - PML_CRCP_STATE_RETURN(pml_state); - return ret; - } - - PML_CRCP_STATE_RETURN(pml_state); - - return OMPI_SUCCESS; -} - -int mca_pml_crcpw_iprobe(int dst, int tag, struct ompi_communicator_t* comm, int *matched, ompi_status_public_t* status ) -{ - int ret; - ompi_crcp_base_pml_state_t * pml_state = NULL; - - PML_CRCP_STATE_ALLOC(pml_state); - - pml_state->wrapped_pml_component = &(mca_pml_crcpw_module.wrapped_pml_component); - pml_state->wrapped_pml_module = &(mca_pml_crcpw_module.wrapped_pml_module); - - pml_state->state = OMPI_CRCP_PML_PRE; - pml_state = ompi_crcp.pml_iprobe(dst, tag, comm, matched, status, pml_state); - if( OMPI_SUCCESS != pml_state->error_code) { - ret = pml_state->error_code; - PML_CRCP_STATE_RETURN(pml_state); - return ret; - } - - if( OMPI_CRCP_PML_DONE == pml_state->state) { - goto CLEANUP; - } - - if( OMPI_CRCP_PML_SKIP != pml_state->state) { - if( OMPI_SUCCESS != (ret = mca_pml_crcpw_module.wrapped_pml_module.pml_iprobe(dst, tag, comm, matched, status) ) ) { - PML_CRCP_STATE_RETURN(pml_state); - return ret; - } - } - - pml_state->state = OMPI_CRCP_PML_POST; - pml_state = ompi_crcp.pml_iprobe(dst, tag, comm, matched, status, pml_state); - if( OMPI_SUCCESS != pml_state->error_code) { - ret = pml_state->error_code; - PML_CRCP_STATE_RETURN(pml_state); - return ret; - } - - CLEANUP: - PML_CRCP_STATE_RETURN(pml_state); - - return OMPI_SUCCESS; -} - -int mca_pml_crcpw_probe( int dst, int tag, struct ompi_communicator_t* comm, ompi_status_public_t* status ) -{ - int ret; - ompi_crcp_base_pml_state_t * pml_state = NULL; - - PML_CRCP_STATE_ALLOC(pml_state); - - pml_state->wrapped_pml_component = &(mca_pml_crcpw_module.wrapped_pml_component); - pml_state->wrapped_pml_module = &(mca_pml_crcpw_module.wrapped_pml_module); - - pml_state->state = OMPI_CRCP_PML_PRE; - pml_state = ompi_crcp.pml_probe(dst, tag, comm, status, pml_state); - if( OMPI_SUCCESS != pml_state->error_code) { - ret = pml_state->error_code; - PML_CRCP_STATE_RETURN(pml_state); - return ret; - } - - if( OMPI_CRCP_PML_DONE == pml_state->state) { - goto CLEANUP; - } - - if( OMPI_CRCP_PML_SKIP != pml_state->state) { - if( OMPI_SUCCESS != (ret = mca_pml_crcpw_module.wrapped_pml_module.pml_probe(dst, tag, comm, status) ) ) { - PML_CRCP_STATE_RETURN(pml_state); - return ret; - } - } - - pml_state->state = OMPI_CRCP_PML_POST; - pml_state = ompi_crcp.pml_probe(dst, tag, comm, status, pml_state); - if( OMPI_SUCCESS != pml_state->error_code) { - ret = pml_state->error_code; - PML_CRCP_STATE_RETURN(pml_state); - return ret; - } - - CLEANUP: - PML_CRCP_STATE_RETURN(pml_state); - - return OMPI_SUCCESS; -} - -int mca_pml_crcpw_isend_init( void *buf, size_t count, ompi_datatype_t *datatype, int dst, int tag, - mca_pml_base_send_mode_t mode, struct ompi_communicator_t* comm, struct ompi_request_t **request ) -{ - int ret; - ompi_crcp_base_pml_state_t * pml_state = NULL; - - PML_CRCP_STATE_ALLOC(pml_state); - - pml_state->wrapped_pml_component = &(mca_pml_crcpw_module.wrapped_pml_component); - pml_state->wrapped_pml_module = &(mca_pml_crcpw_module.wrapped_pml_module); - - pml_state->state = OMPI_CRCP_PML_PRE; - pml_state = ompi_crcp.pml_isend_init(buf, count, datatype, dst, tag, mode, comm, request, pml_state); - if( OMPI_SUCCESS != pml_state->error_code) { - ret = pml_state->error_code; - PML_CRCP_STATE_RETURN(pml_state); - return ret; - } - - if( OMPI_CRCP_PML_SKIP != pml_state->state) { - if( OMPI_SUCCESS != (ret = mca_pml_crcpw_module.wrapped_pml_module.pml_isend_init(buf, count, datatype, dst, tag, mode, comm, request) ) ) { - PML_CRCP_STATE_RETURN(pml_state); - return ret; - } - } - - pml_state->state = OMPI_CRCP_PML_POST; - pml_state = ompi_crcp.pml_isend_init(buf, count, datatype, dst, tag, mode, comm, request, pml_state); - if( OMPI_SUCCESS != pml_state->error_code) { - ret = pml_state->error_code; - PML_CRCP_STATE_RETURN(pml_state); - return ret; - } - - PML_CRCP_STATE_RETURN(pml_state); - - return OMPI_SUCCESS; -} - -int mca_pml_crcpw_isend( void *buf, size_t count, ompi_datatype_t *datatype, int dst, int tag, - mca_pml_base_send_mode_t mode, struct ompi_communicator_t* comm, struct ompi_request_t **request ) -{ - int ret; - ompi_crcp_base_pml_state_t * pml_state = NULL; - - PML_CRCP_STATE_ALLOC(pml_state); - - pml_state->wrapped_pml_component = &(mca_pml_crcpw_module.wrapped_pml_component); - pml_state->wrapped_pml_module = &(mca_pml_crcpw_module.wrapped_pml_module); - - pml_state->state = OMPI_CRCP_PML_PRE; - pml_state = ompi_crcp.pml_isend(buf, count, datatype, dst, tag, mode, comm, request, pml_state); - if( OMPI_SUCCESS != pml_state->error_code) { - ret = pml_state->error_code; - PML_CRCP_STATE_RETURN(pml_state); - return ret; - } - - if( OMPI_CRCP_PML_SKIP != pml_state->state) { - if( OMPI_SUCCESS != (ret = mca_pml_crcpw_module.wrapped_pml_module.pml_isend(buf, count, datatype, dst, tag, mode, comm, request) ) ) { - PML_CRCP_STATE_RETURN(pml_state); - return ret; - } - } - - pml_state->state = OMPI_CRCP_PML_POST; - pml_state = ompi_crcp.pml_isend(buf, count, datatype, dst, tag, mode, comm, request, pml_state); - if( OMPI_SUCCESS != pml_state->error_code) { - ret = pml_state->error_code; - PML_CRCP_STATE_RETURN(pml_state); - return ret; - } - - PML_CRCP_STATE_RETURN(pml_state); - - opal_cr_stall_check = false; - OPAL_CR_TEST_CHECKPOINT_READY(); - - return OMPI_SUCCESS; -} - -int mca_pml_crcpw_send( void *buf, size_t count, ompi_datatype_t *datatype, int dst, int tag, - mca_pml_base_send_mode_t mode, struct ompi_communicator_t* comm ) -{ - int ret; - ompi_crcp_base_pml_state_t * pml_state = NULL; - - PML_CRCP_STATE_ALLOC(pml_state); - - pml_state->wrapped_pml_component = &(mca_pml_crcpw_module.wrapped_pml_component); - pml_state->wrapped_pml_module = &(mca_pml_crcpw_module.wrapped_pml_module); - - pml_state->state = OMPI_CRCP_PML_PRE; - pml_state = ompi_crcp.pml_send(buf, count, datatype, dst, tag, mode, comm, pml_state); - if( OMPI_SUCCESS != pml_state->error_code) { - ret = pml_state->error_code; - PML_CRCP_STATE_RETURN(pml_state); - return ret; - } - - if( OMPI_CRCP_PML_SKIP != pml_state->state) { - if( OMPI_SUCCESS != (ret = mca_pml_crcpw_module.wrapped_pml_module.pml_send(buf, count, datatype, dst, tag, mode, comm) ) ) { - PML_CRCP_STATE_RETURN(pml_state); - return ret; - } - } - - pml_state->state = OMPI_CRCP_PML_POST; - pml_state = ompi_crcp.pml_send(buf, count, datatype, dst, tag, mode, comm, pml_state); - if( OMPI_SUCCESS != pml_state->error_code) { - ret = pml_state->error_code; - PML_CRCP_STATE_RETURN(pml_state); - return ret; - } - - PML_CRCP_STATE_RETURN(pml_state); - - opal_cr_stall_check = false; - OPAL_CR_TEST_CHECKPOINT_READY(); - - return OMPI_SUCCESS; -} - -int mca_pml_crcpw_irecv_init( void *buf, size_t count, ompi_datatype_t *datatype, int src, int tag, - struct ompi_communicator_t* comm, struct ompi_request_t **request) -{ - int ret; - ompi_crcp_base_pml_state_t * pml_state = NULL; - - PML_CRCP_STATE_ALLOC(pml_state); - - pml_state->wrapped_pml_component = &(mca_pml_crcpw_module.wrapped_pml_component); - pml_state->wrapped_pml_module = &(mca_pml_crcpw_module.wrapped_pml_module); - - pml_state->state = OMPI_CRCP_PML_PRE; - pml_state = ompi_crcp.pml_irecv_init(buf, count, datatype, src, tag, comm, request, pml_state); - if( OMPI_SUCCESS != pml_state->error_code) { - ret = pml_state->error_code; - PML_CRCP_STATE_RETURN(pml_state); - return ret; - } - - if( OMPI_CRCP_PML_SKIP != pml_state->state) { - if( OMPI_SUCCESS != (ret = mca_pml_crcpw_module.wrapped_pml_module.pml_irecv_init(buf, count, datatype, src, tag, comm, request) ) ) { - PML_CRCP_STATE_RETURN(pml_state); - return ret; - } - } - - pml_state->state = OMPI_CRCP_PML_POST; - pml_state = ompi_crcp.pml_irecv_init(buf, count, datatype, src, tag, comm, request, pml_state); - if( OMPI_SUCCESS != pml_state->error_code) { - ret = pml_state->error_code; - PML_CRCP_STATE_RETURN(pml_state); - return ret; - } - - PML_CRCP_STATE_RETURN(pml_state); - - return OMPI_SUCCESS; -} - -int mca_pml_crcpw_irecv( void *buf, size_t count, ompi_datatype_t *datatype, int src, int tag, - struct ompi_communicator_t* comm, struct ompi_request_t **request ) -{ - int ret; - ompi_crcp_base_pml_state_t * pml_state = NULL; - - PML_CRCP_STATE_ALLOC(pml_state); - - pml_state->wrapped_pml_component = &(mca_pml_crcpw_module.wrapped_pml_component); - pml_state->wrapped_pml_module = &(mca_pml_crcpw_module.wrapped_pml_module); - - pml_state->state = OMPI_CRCP_PML_PRE; - pml_state = ompi_crcp.pml_irecv(buf, count, datatype, src, tag, comm, request, pml_state); - if( OMPI_SUCCESS != pml_state->error_code) { - ret = pml_state->error_code; - PML_CRCP_STATE_RETURN(pml_state); - return ret; - } - - if( OMPI_CRCP_PML_DONE == pml_state->state) { - goto CLEANUP; - } - - if( OMPI_CRCP_PML_SKIP != pml_state->state) { - if( OMPI_SUCCESS != (ret = mca_pml_crcpw_module.wrapped_pml_module.pml_irecv(buf, count, datatype, src, tag, comm, request) ) ) { - PML_CRCP_STATE_RETURN(pml_state); - return ret; - } - } - - pml_state->state = OMPI_CRCP_PML_POST; - pml_state = ompi_crcp.pml_irecv(buf, count, datatype, src, tag, comm, request, pml_state); - if( OMPI_SUCCESS != pml_state->error_code) { - ret = pml_state->error_code; - PML_CRCP_STATE_RETURN(pml_state); - return ret; - } - - CLEANUP: - PML_CRCP_STATE_RETURN(pml_state); - - return OMPI_SUCCESS; -} - -int mca_pml_crcpw_recv( void *buf, size_t count, ompi_datatype_t *datatype, int src, int tag, - struct ompi_communicator_t* comm, ompi_status_public_t* given_status) -{ - int ret = OMPI_SUCCESS, actual_ret = OMPI_SUCCESS; - ompi_status_public_t* status = NULL; - ompi_crcp_base_pml_state_t * pml_state = NULL; - - PML_CRCP_STATE_ALLOC(pml_state); - - pml_state->wrapped_pml_component = &(mca_pml_crcpw_module.wrapped_pml_component); - pml_state->wrapped_pml_module = &(mca_pml_crcpw_module.wrapped_pml_module); - - if( given_status == NULL) { - status = (ompi_status_public_t*)malloc(sizeof(ompi_status_public_t)); - } - else { - status = given_status; - } - - pml_state->state = OMPI_CRCP_PML_PRE; - pml_state = ompi_crcp.pml_recv(buf, count, datatype, src, tag, comm, status, pml_state); - if( OMPI_SUCCESS != pml_state->error_code) { - ret = pml_state->error_code; - PML_CRCP_STATE_RETURN(pml_state); - return ret; - } - - if( OMPI_CRCP_PML_DONE == pml_state->state) { - goto CLEANUP; - } - - if( OMPI_CRCP_PML_SKIP != pml_state->state) { - if( OMPI_SUCCESS != (actual_ret = mca_pml_crcpw_module.wrapped_pml_module.pml_recv(buf, count, datatype, src, tag, comm, status) ) ) { - PML_CRCP_STATE_RETURN(pml_state); - return ret; - } - } - - pml_state->state = OMPI_CRCP_PML_POST; - pml_state = ompi_crcp.pml_recv(buf, count, datatype, src, tag, comm, status, pml_state); - if( OMPI_SUCCESS != pml_state->error_code) { - ret = pml_state->error_code; - PML_CRCP_STATE_RETURN(pml_state); - return ret; - } - - if( given_status == NULL) { - free(status); - } - - CLEANUP: - PML_CRCP_STATE_RETURN(pml_state); - - opal_cr_stall_check = false; - OPAL_CR_TEST_CHECKPOINT_READY(); - - return actual_ret; -} - -int mca_pml_crcpw_dump( struct ompi_communicator_t* comm, int verbose ) -{ - int ret; - ompi_crcp_base_pml_state_t * pml_state = NULL; - - PML_CRCP_STATE_ALLOC(pml_state); - - pml_state->wrapped_pml_component = &(mca_pml_crcpw_module.wrapped_pml_component); - pml_state->wrapped_pml_module = &(mca_pml_crcpw_module.wrapped_pml_module); - - pml_state->state = OMPI_CRCP_PML_PRE; - pml_state = ompi_crcp.pml_dump(comm, verbose, pml_state); - if( OMPI_SUCCESS != pml_state->error_code) { - ret = pml_state->error_code; - PML_CRCP_STATE_RETURN(pml_state); - return ret; - } - - if( OMPI_CRCP_PML_SKIP != pml_state->state) { - if( OMPI_SUCCESS != (ret = mca_pml_crcpw_module.wrapped_pml_module.pml_dump(comm, verbose) ) ) { - PML_CRCP_STATE_RETURN(pml_state); - return ret; - } - } - - pml_state->state = OMPI_CRCP_PML_POST; - pml_state = ompi_crcp.pml_dump(comm, verbose, pml_state); - if( OMPI_SUCCESS != pml_state->error_code) { - ret = pml_state->error_code; - PML_CRCP_STATE_RETURN(pml_state); - return ret; - } - - PML_CRCP_STATE_RETURN(pml_state); - - return OMPI_SUCCESS; -} - -int mca_pml_crcpw_progress(void) -{ - int ret; - ompi_crcp_base_pml_state_t * pml_state = NULL; - - if( OPAL_LIKELY(NULL == ompi_crcp.pml_progress) ) { - return mca_pml_crcpw_module.wrapped_pml_module.pml_progress(); - } - - PML_CRCP_STATE_ALLOC(pml_state); - - pml_state->wrapped_pml_component = &(mca_pml_crcpw_module.wrapped_pml_component); - pml_state->wrapped_pml_module = &(mca_pml_crcpw_module.wrapped_pml_module); - - pml_state->state = OMPI_CRCP_PML_PRE; - pml_state = ompi_crcp.pml_progress(pml_state); - if( OMPI_SUCCESS != pml_state->error_code) { - ret = pml_state->error_code; - PML_CRCP_STATE_RETURN(pml_state); - return ret; - } - - if( OMPI_CRCP_PML_SKIP != pml_state->state) { - if( OMPI_SUCCESS != (ret = mca_pml_crcpw_module.wrapped_pml_module.pml_progress() ) ) { - PML_CRCP_STATE_RETURN(pml_state); - return ret; - } - } - - pml_state->state = OMPI_CRCP_PML_POST; - pml_state = ompi_crcp.pml_progress(pml_state); - if( OMPI_SUCCESS != pml_state->error_code) { - ret = pml_state->error_code; - PML_CRCP_STATE_RETURN(pml_state); - return ret; - } - - PML_CRCP_STATE_RETURN(pml_state); - - return OMPI_SUCCESS; -} - -int mca_pml_crcpw_start( size_t count, ompi_request_t** requests ) -{ - int ret; - ompi_crcp_base_pml_state_t * pml_state = NULL; - - PML_CRCP_STATE_ALLOC(pml_state); - - pml_state->wrapped_pml_component = &(mca_pml_crcpw_module.wrapped_pml_component); - pml_state->wrapped_pml_module = &(mca_pml_crcpw_module.wrapped_pml_module); - - pml_state->state = OMPI_CRCP_PML_PRE; - pml_state = ompi_crcp.pml_start(count, requests, pml_state); - if( OMPI_SUCCESS != pml_state->error_code) { - ret = pml_state->error_code; - PML_CRCP_STATE_RETURN(pml_state); - return ret; - } - - if( OMPI_CRCP_PML_DONE == pml_state->state) { - goto CLEANUP; - } - - if( OMPI_CRCP_PML_SKIP != pml_state->state) { - if( OMPI_SUCCESS != (ret = mca_pml_crcpw_module.wrapped_pml_module.pml_start(count, requests) ) ) { - PML_CRCP_STATE_RETURN(pml_state); - return ret; - } - } - - pml_state->state = OMPI_CRCP_PML_POST; - pml_state = ompi_crcp.pml_start(count, requests, pml_state); - if( OMPI_SUCCESS != pml_state->error_code) { - ret = pml_state->error_code; - PML_CRCP_STATE_RETURN(pml_state); - return ret; - } - - CLEANUP: - PML_CRCP_STATE_RETURN(pml_state); - - return OMPI_SUCCESS; -} - - -int -mca_pml_crcpw_improbe(int dst, - int tag, - struct ompi_communicator_t* comm, - int *matched, - struct ompi_message_t **message, - ompi_status_public_t* status) -{ - return OMPI_ERR_NOT_SUPPORTED; -} - - -int -mca_pml_crcpw_mprobe(int dst, - int tag, - struct ompi_communicator_t* comm, - struct ompi_message_t **message, - ompi_status_public_t* status) -{ - return OMPI_ERR_NOT_SUPPORTED; -} - - -int -mca_pml_crcpw_imrecv(void *buf, - size_t count, - ompi_datatype_t *datatype, - struct ompi_message_t **message, - struct ompi_request_t **request) -{ - return OMPI_ERR_NOT_SUPPORTED; -} - - -int -mca_pml_crcpw_mrecv(void *buf, - size_t count, - ompi_datatype_t *datatype, - struct ompi_message_t **message, - ompi_status_public_t* status) -{ - return OMPI_ERR_NOT_SUPPORTED; -} - - -int mca_pml_crcpw_ft_event(int state) -{ - int ret; - ompi_crcp_base_pml_state_t * pml_state = NULL; - - PML_CRCP_STATE_ALLOC(pml_state); - - pml_state->wrapped_pml_component = &(mca_pml_crcpw_module.wrapped_pml_component); - pml_state->wrapped_pml_module = &(mca_pml_crcpw_module.wrapped_pml_module); - - pml_state->state = OMPI_CRCP_PML_PRE; - pml_state = ompi_crcp.pml_ft_event(state, pml_state); - if( OMPI_SUCCESS != pml_state->error_code) { - ret = pml_state->error_code; - PML_CRCP_STATE_RETURN(pml_state); - return ret; - } - - if( OMPI_CRCP_PML_SKIP != pml_state->state && - NULL != mca_pml_crcpw_module.wrapped_pml_module.pml_ft_event) { - if( OMPI_SUCCESS != (ret = mca_pml_crcpw_module.wrapped_pml_module.pml_ft_event(state) ) ) { - PML_CRCP_STATE_RETURN(pml_state); - return ret; - } - } - - pml_state->state = OMPI_CRCP_PML_POST; - pml_state = ompi_crcp.pml_ft_event(state, pml_state); - if( OMPI_SUCCESS != pml_state->error_code) { - ret = pml_state->error_code; - PML_CRCP_STATE_RETURN(pml_state); - return ret; - } - - PML_CRCP_STATE_RETURN(pml_state); - - return OMPI_SUCCESS; -} diff --git a/ompi/mca/pml/example/Makefile.am b/ompi/mca/pml/example/Makefile.am index ee7868ae2af..b1cb203e84b 100644 --- a/ompi/mca/pml/example/Makefile.am +++ b/ompi/mca/pml/example/Makefile.am @@ -5,13 +5,13 @@ # Copyright (c) 2004-2005 The University of Tennessee and The University # of Tennessee Research Foundation. All rights # reserved. -# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, # University of Stuttgart. All rights reserved. # Copyright (c) 2010 Cisco Systems, Inc. All rights reserved. # $COPYRIGHT$ -# +# # Additional copyrights may follow -# +# # $HEADER$ # diff --git a/ompi/mca/pml/example/pml_example.c b/ompi/mca/pml/example/pml_example.c index 799cf234cfc..ef12f3c7e42 100644 --- a/ompi/mca/pml/example/pml_example.c +++ b/ompi/mca/pml/example/pml_example.c @@ -40,7 +40,7 @@ mca_pml_example_t mca_pml_example = { mca_pml_example_mprobe, mca_pml_example_imrecv, mca_pml_example_mrecv, - mca_pml_example_ft_event, + NULL, 32768, (0x7fffffff) @@ -76,23 +76,3 @@ int mca_pml_example_del_procs(ompi_proc_t** procs, size_t nprocs) { return OMPI_SUCCESS; } - -int mca_pml_example_ft_event(int state) { - if(OPAL_CRS_CHECKPOINT == state) { - ; - } - else if(OPAL_CRS_CONTINUE == state) { - ; - } - else if(OPAL_CRS_RESTART == state) { - ; - } - else if(OPAL_CRS_TERM == state ) { - ; - } - else { - ; - } - - return OMPI_SUCCESS; -} diff --git a/ompi/mca/pml/example/pml_example.h b/ompi/mca/pml/example/pml_example.h index 14573e8cc2e..2fe52a3e180 100644 --- a/ompi/mca/pml/example/pml_example.h +++ b/ompi/mca/pml/example/pml_example.h @@ -6,6 +6,8 @@ * of Tennessee Research Foundation. All rights * reserved. * Copyright (c) 2011 Sandia National Laboratories. All rights reserved. + * Copyright (c) 2015 Los Alamos National Security, LLC. All rights + * reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -147,9 +149,6 @@ extern int mca_pml_example_progress(void); extern int mca_pml_example_start( size_t count, ompi_request_t** requests ); -extern int mca_pml_example_ft_event(int state); - - END_C_DECLS #endif /* PML_EXAMPLE_H_HAS_BEEN_INCLUDED */ diff --git a/ompi/mca/pml/ob1/Makefile.am b/ompi/mca/pml/ob1/Makefile.am index 4a6d3459df7..4609a29484e 100644 --- a/ompi/mca/pml/ob1/Makefile.am +++ b/ompi/mca/pml/ob1/Makefile.am @@ -13,9 +13,9 @@ # Copyright (c) 2009-2014 Cisco Systems, Inc. All rights reserved. # Copyright (c) 2012 NVIDIA Corporation. All rights reserved. # $COPYRIGHT$ -# +# # Additional copyrights may follow -# +# # $HEADER$ # @@ -46,7 +46,7 @@ ob1_sources = \ pml_ob1_recvreq.h \ pml_ob1_sendreq.c \ pml_ob1_sendreq.h \ - pml_ob1_start.c + pml_ob1_start.c # If we have CUDA support requested, build the CUDA file also if OPAL_cuda_support diff --git a/ompi/mca/pml/ob1/configure.m4 b/ompi/mca/pml/ob1/configure.m4 index 8020463da87..a8a8ad1e040 100644 --- a/ompi/mca/pml/ob1/configure.m4 +++ b/ompi/mca/pml/ob1/configure.m4 @@ -2,9 +2,9 @@ # # Copyright (c) 2013 Sandia National Laboratories. All rights reserved. # $COPYRIGHT$ -# +# # Additional copyrights may follow -# +# # $HEADER$ # @@ -17,7 +17,7 @@ AC_DEFUN([MCA_ompi_pml_ob1_POST_CONFIG], [ AS_IF([test "$1" = "1"], [OMPI_REQUIRE_ENDPOINT_TAG([BML])]) ])dnl -# MCA_ompi_pml_ob1_CONFIG(action-if-can-compile, +# MCA_ompi_pml_ob1_CONFIG(action-if-can-compile, # [action-if-cant-compile]) # ------------------------------------------------ # We can always build, unless we were explicitly disabled. diff --git a/ompi/mca/pml/ob1/help-mpi-pml-ob1.txt b/ompi/mca/pml/ob1/help-mpi-pml-ob1.txt index 46bbf95af26..b03cedd5fd8 100644 --- a/ompi/mca/pml/ob1/help-mpi-pml-ob1.txt +++ b/ompi/mca/pml/ob1/help-mpi-pml-ob1.txt @@ -2,9 +2,9 @@ # # Copyright (c) 2009 Sun Microsystems, Inc. All rights reserved. # $COPYRIGHT$ -# +# # Additional copyrights may follow -# +# # $HEADER$ # [eager_limit_too_small] @@ -17,7 +17,7 @@ name and its corresponding minimum value is shown below. BTL name: %s BTL eager limit value: %d (set via btl_%s_eager_limit) BTL eager limit minimum: %d - MCA parameter name: btl_%s_eager_limit + MCA parameter name: btl_%s_eager_limit # [cuda_eager_limit_too_small] The "CUDA eager limit" MCA parameter in the %s BTL was set to a value which @@ -29,7 +29,7 @@ name and its corresponding minimum value is shown below. BTL name: %s BTL CUDA eager limit value: %d (set via btl_%s_cuda_eager_limit) BTL CUDA eager limit minimum: %d - MCA parameter name: btl_%s_cuda_eager_limit + MCA parameter name: btl_%s_cuda_eager_limit # [cuda_rdma_limit_too_small] The "CUDA rdma limit" MCA parameter in the %s BTL was set to a value which @@ -41,4 +41,4 @@ name and its corresponding minimum value is shown below. BTL name: %s BTL CUDA rndv limit value: %d (set via btl_%s_cuda_rdma_limit) BTL CUDA rndv limit minimum: %d - MCA parameter name: btl_%s_cuda_rdma_limit + MCA parameter name: btl_%s_cuda_rdma_limit diff --git a/ompi/mca/pml/ob1/pml_ob1.c b/ompi/mca/pml/ob1/pml_ob1.c index e0c19459e24..ad320b68719 100644 --- a/ompi/mca/pml/ob1/pml_ob1.c +++ b/ompi/mca/pml/ob1/pml_ob1.c @@ -16,7 +16,8 @@ * Copyright (c) 2011 Sandia National Laboratories. All rights reserved. * Copyright (c) 2011-2015 Los Alamos National Security, LLC. All rights * reserved. - * Copyright (c) 2012 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2012 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 FUJITSU LIMITED. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -41,7 +42,6 @@ #include "ompi/mca/pml/base/base.h" #include "ompi/mca/bml/base/base.h" #include "opal/mca/pmix/pmix.h" -#include "ompi/runtime/ompi_cr.h" #include "pml_ob1.h" #include "pml_ob1_component.h" @@ -74,16 +74,16 @@ mca_pml_ob1_t mca_pml_ob1 = { mca_pml_ob1_imrecv, mca_pml_ob1_mrecv, mca_pml_ob1_dump, - mca_pml_ob1_ft_event, + NULL, 65535, INT_MAX } }; #if OPAL_CUDA_SUPPORT -void mca_pml_ob1_cuda_add_ipc_support(struct mca_btl_base_module_t* btl, - int32_t flags, ompi_proc_t* errproc, - char* btlinfo); +extern void mca_pml_ob1_cuda_add_ipc_support(struct mca_btl_base_module_t* btl, + int32_t flags, ompi_proc_t* errproc, + char* btlinfo); #endif /* OPAL_CUDA_SUPPORT */ void mca_pml_ob1_error_handler( struct mca_btl_base_module_t* btl, @@ -109,7 +109,7 @@ int mca_pml_ob1_enable(bool enable) mca_pml_ob1.free_list_max, mca_pml_ob1.free_list_inc, NULL, 0, NULL, NULL, NULL); - + OBJ_CONSTRUCT(&mca_pml_ob1.recv_frags, opal_free_list_t); opal_free_list_init ( &mca_pml_ob1.recv_frags, @@ -121,7 +121,7 @@ int mca_pml_ob1_enable(bool enable) mca_pml_ob1.free_list_max, mca_pml_ob1.free_list_inc, NULL, 0, NULL, NULL, NULL); - + OBJ_CONSTRUCT(&mca_pml_ob1.pending_pckts, opal_free_list_t); opal_free_list_init ( &mca_pml_ob1.pending_pckts, sizeof(mca_pml_ob1_pckt_pending_t), @@ -191,11 +191,9 @@ int mca_pml_ob1_add_comm(ompi_communicator_t* comm) { /* allocate pml specific comm data */ mca_pml_ob1_comm_t* pml_comm = OBJ_NEW(mca_pml_ob1_comm_t); - opal_list_item_t *item, *next_item; - mca_pml_ob1_recv_frag_t* frag; + mca_pml_ob1_recv_frag_t *frag, *next_frag; mca_pml_ob1_comm_proc_t* pml_proc; mca_pml_ob1_match_hdr_t* hdr; - int i; if (NULL == pml_comm) { return OMPI_ERR_OUT_OF_RESOURCE; @@ -210,16 +208,8 @@ int mca_pml_ob1_add_comm(ompi_communicator_t* comm) mca_pml_ob1_comm_init_size(pml_comm, comm->c_remote_group->grp_proc_count); comm->c_pml_comm = pml_comm; - for( i = 0; i < comm->c_remote_group->grp_proc_count; i++ ) { - pml_comm->procs[i].ompi_proc = ompi_group_peer_lookup(comm->c_remote_group,i); - OBJ_RETAIN(pml_comm->procs[i].ompi_proc); - } /* Grab all related messages from the non_existing_communicator pending queue */ - for( item = opal_list_get_first(&mca_pml_ob1.non_existing_communicator_pending); - item != opal_list_get_end(&mca_pml_ob1.non_existing_communicator_pending); - item = next_item ) { - frag = (mca_pml_ob1_recv_frag_t*)item; - next_item = opal_list_get_next(item); + OPAL_LIST_FOREACH_SAFE(frag, next_frag, &mca_pml_ob1.non_existing_communicator_pending, mca_pml_ob1_recv_frag_t) { hdr = &frag->hdr.hdr_match; /* Is this fragment for the current communicator ? */ @@ -229,8 +219,8 @@ int mca_pml_ob1_add_comm(ompi_communicator_t* comm) /* As we now know we work on a fragment for this communicator * we should remove it from the * non_existing_communicator_pending list. */ - opal_list_remove_item( &mca_pml_ob1.non_existing_communicator_pending, - item ); + opal_list_remove_item (&mca_pml_ob1.non_existing_communicator_pending, + (opal_list_item_t *) frag); add_fragment_to_unexpected: @@ -249,7 +239,7 @@ int mca_pml_ob1_add_comm(ompi_communicator_t* comm) * We just have to push the fragment into the unexpected list of the corresponding * proc, or into the out-of-order (cant_match) list. */ - pml_proc = &(pml_comm->procs[hdr->hdr_src]); + pml_proc = mca_pml_ob1_peer_lookup(comm, hdr->hdr_src); if( ((uint16_t)hdr->hdr_seq) == ((uint16_t)pml_proc->expected_sequence) ) { /* We're now expecting the next sequence number. */ @@ -283,12 +273,6 @@ int mca_pml_ob1_add_comm(ompi_communicator_t* comm) int mca_pml_ob1_del_comm(ompi_communicator_t* comm) { - mca_pml_ob1_comm_t* pml_comm = comm->c_pml_comm; - int i; - - for( i = 0; i < comm->c_remote_group->grp_proc_count; i++ ) { - OBJ_RELEASE(pml_comm->procs[i].ompi_proc); - } OBJ_RELEASE(comm->c_pml_comm); comm->c_pml_comm = NULL; return OMPI_SUCCESS; @@ -303,9 +287,9 @@ int mca_pml_ob1_del_comm(ompi_communicator_t* comm) int mca_pml_ob1_add_procs(ompi_proc_t** procs, size_t nprocs) { + mca_btl_base_selected_module_t *sm; opal_bitmap_t reachable; int rc; - opal_list_item_t *item; if(nprocs == 0) return OMPI_SUCCESS; @@ -315,21 +299,6 @@ int mca_pml_ob1_add_procs(ompi_proc_t** procs, size_t nprocs) if(OMPI_SUCCESS != rc) return rc; - /* - * JJH: Disable this in FT enabled builds since - * we use a wrapper PML. It will cause this check to - * return failure as all processes will return the wrapper PML - * component in use instead of the wrapped PML component underneath. - */ -#if OPAL_ENABLE_FT_CR == 0 - /* make sure remote procs are using the same PML as us */ - if (OMPI_SUCCESS != (rc = mca_pml_base_pml_check_selected("ob1", - procs, - nprocs))) { - return rc; - } -#endif - rc = mca_bml.bml_add_procs( nprocs, procs, &reachable ); @@ -347,14 +316,10 @@ int mca_pml_ob1_add_procs(ompi_proc_t** procs, size_t nprocs) BTLs requires iterating over the procs, as the BML does not expose all currently in use btls. */ - for (item = opal_list_get_first(&mca_btl_base_modules_initialized) ; - item != opal_list_get_end(&mca_btl_base_modules_initialized) ; - item = opal_list_get_next(item)) { - mca_btl_base_selected_module_t *sm = - (mca_btl_base_selected_module_t*) item; + OPAL_LIST_FOREACH(sm, &mca_btl_base_modules_initialized, mca_btl_base_selected_module_t) { if (sm->btl_module->btl_eager_limit < sizeof(mca_pml_ob1_hdr_t)) { opal_show_help("help-mpi-pml-ob1.txt", "eager_limit_too_small", - true, + true, sm->btl_component->btl_version.mca_component_name, ompi_process_info.nodename, sm->btl_component->btl_version.mca_component_name, @@ -376,7 +341,7 @@ int mca_pml_ob1_add_procs(ompi_proc_t** procs, size_t nprocs) if (0 != sm->btl_module->btl_cuda_eager_limit) { if (sm->btl_module->btl_cuda_eager_limit < sizeof(mca_pml_ob1_hdr_t)) { opal_show_help("help-mpi-pml-ob1.txt", "cuda_eager_limit_too_small", - true, + true, sm->btl_component->btl_version.mca_component_name, ompi_process_info.nodename, sm->btl_component->btl_version.mca_component_name, @@ -394,7 +359,7 @@ int mca_pml_ob1_add_procs(ompi_proc_t** procs, size_t nprocs) } else { if (sm->btl_module->btl_cuda_rdma_limit < sm->btl_module->btl_cuda_eager_limit) { opal_show_help("help-mpi-pml-ob1.txt", "cuda_rdma_limit_too_small", - true, + true, sm->btl_component->btl_version.mca_component_name, ompi_process_info.nodename, sm->btl_component->btl_version.mca_component_name, @@ -416,7 +381,7 @@ int mca_pml_ob1_add_procs(ompi_proc_t** procs, size_t nprocs) NULL ); if(OMPI_SUCCESS != rc) goto cleanup_and_return; - + rc = mca_bml.bml_register( MCA_PML_OB1_HDR_TYPE_RNDV, mca_pml_ob1_recv_frag_callback_rndv, NULL ); @@ -428,19 +393,19 @@ int mca_pml_ob1_add_procs(ompi_proc_t** procs, size_t nprocs) NULL ); if(OMPI_SUCCESS != rc) goto cleanup_and_return; - + rc = mca_bml.bml_register( MCA_PML_OB1_HDR_TYPE_ACK, mca_pml_ob1_recv_frag_callback_ack, NULL ); if(OMPI_SUCCESS != rc) goto cleanup_and_return; - + rc = mca_bml.bml_register( MCA_PML_OB1_HDR_TYPE_FRAG, mca_pml_ob1_recv_frag_callback_frag, NULL ); if(OMPI_SUCCESS != rc) goto cleanup_and_return; - + rc = mca_bml.bml_register( MCA_PML_OB1_HDR_TYPE_PUT, mca_pml_ob1_recv_frag_callback_put, NULL ); @@ -452,12 +417,12 @@ int mca_pml_ob1_add_procs(ompi_proc_t** procs, size_t nprocs) NULL ); if(OMPI_SUCCESS != rc) goto cleanup_and_return; - + /* register error handlers */ rc = mca_bml.bml_register_error(mca_pml_ob1_error_handler); if(OMPI_SUCCESS != rc) goto cleanup_and_return; - + cleanup_and_return: OBJ_DESTRUCT(&reachable); @@ -558,7 +523,7 @@ static void mca_pml_ob1_dump_frag_list(opal_list_t* queue, bool is_req) else snprintf(ctag, 64, "%d", req->req_tag); opal_output(0, "req %p peer %s tag %s addr %p count %lu datatype %s [%p] [%s %s] req_seq %" PRIu64, - (void*) req, cpeer, ctag, + (void*) req, cpeer, ctag, (void*) req->req_addr, req->req_count, (0 != req->req_count ? req->req_datatype->name : "N/A"), (void*) req->req_datatype, @@ -589,13 +554,19 @@ int mca_pml_ob1_dump(struct ompi_communicator_t* comm, int verbose) /* iterate through all procs on communicator */ for( i = 0; i < (int)pml_comm->num_procs; i++ ) { - mca_pml_ob1_comm_proc_t* proc = &pml_comm->procs[i]; - mca_bml_base_endpoint_t* ep = (mca_bml_base_endpoint_t*)proc->ompi_proc->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_BML]; + mca_pml_ob1_comm_proc_t* proc = pml_comm->procs[i]; + + if (NULL == proc) { + continue; + } + + mca_bml_base_endpoint_t* ep = mca_bml_base_get_endpoint(proc->ompi_proc); size_t n; opal_output(0, "[Rank %d] expected_seq %d ompi_proc %p send_seq %d\n", - i, proc->expected_sequence, (void*) proc->ompi_proc, + i, proc->expected_sequence, (void*) proc->ompi_proc, proc->send_sequence); + /* dump all receive queues */ if( opal_list_get_size(&proc->specific_receives) ) { opal_output(0, "expected specific receives\n"); @@ -623,8 +594,8 @@ static void mca_pml_ob1_fin_completion( mca_btl_base_module_t* btl, struct mca_btl_base_descriptor_t* des, int status ) { - - mca_bml_base_btl_t* bml_btl = (mca_bml_base_btl_t*) des->des_context; + + mca_bml_base_btl_t* bml_btl = (mca_bml_base_btl_t*) des->des_context; /* check for pending requests */ MCA_PML_OB1_PROGRESS_PENDING(bml_btl); @@ -688,11 +659,11 @@ void mca_pml_ob1_process_pending_packets(mca_bml_base_btl_t* bml_btl) OPAL_THREAD_UNLOCK(&mca_pml_ob1.lock); if(NULL == pckt) break; - if(pckt->bml_btl != NULL && + if(pckt->bml_btl != NULL && pckt->bml_btl->btl == bml_btl->btl) { send_dst = pckt->bml_btl; } else { - mca_bml_base_endpoint_t* endpoint = + mca_bml_base_endpoint_t* endpoint = (mca_bml_base_endpoint_t*) pckt->proc->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_BML]; send_dst = mca_bml_base_btl_array_find( &endpoint->btl_eager, bml_btl->btl); @@ -729,6 +700,7 @@ void mca_pml_ob1_process_pending_packets(mca_bml_base_btl_t* bml_btl) pckt->order, pckt->status); if( OPAL_UNLIKELY(OMPI_ERR_OUT_OF_RESOURCE == rc) ) { + MCA_PML_OB1_PCKT_PENDING_RETURN(pckt); return; } break; @@ -770,7 +742,7 @@ void mca_pml_ob1_process_pending_rdma(void) void mca_pml_ob1_error_handler( struct mca_btl_base_module_t* btl, int32_t flags, - opal_proc_t* errproc, char* btlinfo ) { + opal_proc_t* errproc, char* btlinfo ) { #if OPAL_CUDA_SUPPORT if (flags & MCA_BTL_ERROR_FLAGS_ADD_CUDA_IPC) { mca_pml_ob1_cuda_add_ipc_support(btl, flags, (struct ompi_proc_t*)errproc, btlinfo); @@ -780,209 +752,6 @@ void mca_pml_ob1_error_handler( ompi_rte_abort(-1, btlinfo); } -#if OPAL_ENABLE_FT_CR == 0 -int mca_pml_ob1_ft_event( int state ) { - return OMPI_SUCCESS; -} -#else -int mca_pml_ob1_ft_event( int state ) -{ - static bool first_continue_pass = false; - ompi_proc_t** procs = NULL; - size_t num_procs; - int ret, p; - - if(OPAL_CRS_CHECKPOINT == state) { - if( opal_cr_timing_barrier_enabled ) { - OPAL_CR_SET_TIMER(OPAL_CR_TIMER_CRCPBR1); - opal_pmix.fence(NULL, 0); - } - - OPAL_CR_SET_TIMER(OPAL_CR_TIMER_P2P0); - } - else if(OPAL_CRS_CONTINUE == state) { - first_continue_pass = !first_continue_pass; - - if( !first_continue_pass ) { - if( opal_cr_timing_barrier_enabled ) { - OPAL_CR_SET_TIMER(OPAL_CR_TIMER_COREBR0); - opal_pmix.fence(NULL, 0); - } - OPAL_CR_SET_TIMER(OPAL_CR_TIMER_P2P2); - } - - if (opal_cr_continue_like_restart && !first_continue_pass) { - /* - * Get a list of processes - */ - procs = ompi_proc_all(&num_procs); - if(NULL == procs) { - return OMPI_ERR_OUT_OF_RESOURCE; - } - - /* - * Refresh the proc structure, and publish our proc info in the modex. - * NOTE: Do *not* call ompi_proc_finalize as there are many places in - * the code that point to indv. procs in this strucutre. For our - * needs here we only need to fix up the modex, bml and pml - * references. - */ - if (OMPI_SUCCESS != (ret = ompi_proc_refresh())) { - opal_output(0, - "pml:ob1: ft_event(Restart): proc_refresh Failed %d", - ret); - for(p = 0; p < (int)num_procs; ++p) { - OBJ_RELEASE(procs[p]); - } - free (procs); - return ret; - } - } - } - else if(OPAL_CRS_RESTART_PRE == state ) { - /* Nothing here */ - } - else if(OPAL_CRS_RESTART == state ) { - /* - * Get a list of processes - */ - procs = ompi_proc_all(&num_procs); - if(NULL == procs) { - return OMPI_ERR_OUT_OF_RESOURCE; - } - - /* - * Clean out the modex information since it is invalid now. - * ompi_rte_purge_proc_attrs(); - * This happens at the ORTE level, so doing it again here will cause - * some issues with socket caching. - */ - - - /* - * Refresh the proc structure, and publish our proc info in the modex. - * NOTE: Do *not* call ompi_proc_finalize as there are many places in - * the code that point to indv. procs in this strucutre. For our - * needs here we only need to fix up the modex, bml and pml - * references. - */ - if (OMPI_SUCCESS != (ret = ompi_proc_refresh())) { - opal_output(0, - "pml:ob1: ft_event(Restart): proc_refresh Failed %d", - ret); - for(p = 0; p < (int)num_procs; ++p) { - OBJ_RELEASE(procs[p]); - } - free (procs); - return ret; - } - } - else if(OPAL_CRS_TERM == state ) { - ; - } - else { - ; - } - - /* Call the BML - * BML is expected to call ft_event in - * - BTL(s) - * - MPool(s) - */ - if( OMPI_SUCCESS != (ret = mca_bml.bml_ft_event(state))) { - opal_output(0, "pml:base: ft_event: BML ft_event function failed: %d\n", - ret); - } - - if(OPAL_CRS_CHECKPOINT == state) { - OPAL_CR_SET_TIMER(OPAL_CR_TIMER_P2P1); - - if( opal_cr_timing_barrier_enabled ) { - OPAL_CR_SET_TIMER(OPAL_CR_TIMER_P2PBR0); - /* JJH Cannot barrier here due to progress engine -- ompi_rte_barrier();*/ - } - } - else if(OPAL_CRS_CONTINUE == state) { - if( !first_continue_pass ) { - if( opal_cr_timing_barrier_enabled ) { - OPAL_CR_SET_TIMER(OPAL_CR_TIMER_P2PBR1); - opal_pmix.fence(NULL, 0); - } - OPAL_CR_SET_TIMER(OPAL_CR_TIMER_P2P3); - } - - if (opal_cr_continue_like_restart && !first_continue_pass) { - opal_pmix.fence(NULL, 0); - - /* - * Startup the PML stack now that the modex is running again - * Add the new procs (BTLs redo modex recv's) - */ - if( OMPI_SUCCESS != (ret = mca_pml_ob1_add_procs(procs, num_procs) ) ) { - opal_output(0, "pml:ob1: ft_event(Restart): Failed in add_procs (%d)", ret); - return ret; - } - - /* Is this barrier necessary ? JJH */ - opal_pmix.fence(NULL, 0); - - if( NULL != procs ) { - for(p = 0; p < (int)num_procs; ++p) { - OBJ_RELEASE(procs[p]); - } - free(procs); - procs = NULL; - } - } - if( !first_continue_pass ) { - if( opal_cr_timing_barrier_enabled ) { - OPAL_CR_SET_TIMER(OPAL_CR_TIMER_P2PBR2); - opal_pmix.fence(NULL, 0); - } - OPAL_CR_SET_TIMER(OPAL_CR_TIMER_CRCP1); - } - } - else if(OPAL_CRS_RESTART_PRE == state ) { - /* Nothing here */ - } - else if(OPAL_CRS_RESTART == state ) { - /* - * Exchange the modex information once again. - * BTLs will have republished their modex information. - */ - opal_pmix.fence(NULL, 0); - - /* - * Startup the PML stack now that the modex is running again - * Add the new procs (BTLs redo modex recv's) - */ - if( OMPI_SUCCESS != (ret = mca_pml_ob1_add_procs(procs, num_procs) ) ) { - opal_output(0, "pml:ob1: ft_event(Restart): Failed in add_procs (%d)", ret); - return ret; - } - - /* Is this barrier necessary ? JJH */ - opal_pmix.fence(NULL, 0); - - if( NULL != procs ) { - for(p = 0; p < (int)num_procs; ++p) { - OBJ_RELEASE(procs[p]); - } - free(procs); - procs = NULL; - } - } - else if(OPAL_CRS_TERM == state ) { - ; - } - else { - ; - } - - return OMPI_SUCCESS; -} -#endif /* OPAL_ENABLE_FT_CR */ - int mca_pml_ob1_com_btl_comp(const void *v1, const void *v2) { const mca_pml_ob1_com_btl_t *b1 = (const mca_pml_ob1_com_btl_t *) v1; diff --git a/ompi/mca/pml/ob1/pml_ob1.h b/ompi/mca/pml/ob1/pml_ob1.h index a564e90d264..3762e51856d 100644 --- a/ompi/mca/pml/ob1/pml_ob1.h +++ b/ompi/mca/pml/ob1/pml_ob1.h @@ -3,25 +3,27 @@ * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. - * Copyright (c) 2004-2013 The University of Tennessee and The University + * Copyright (c) 2004-2016 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2010 Oracle and/or its affiliates. All rights reserved * Copyright (c) 2011 Sandia National Laboratories. All rights reserved. - * Copyright (c) 2012-2015 Los Alamos National Security, LLC. All rights + * Copyright (c) 2012-2017 Los Alamos National Security, LLC. All rights * reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ /** - * @file + * @file */ #ifndef MCA_PML_OB1_H @@ -47,7 +49,7 @@ BEGIN_C_DECLS */ struct mca_pml_ob1_t { - mca_pml_base_module_t super; + mca_pml_base_module_t super; int priority; int free_list_num; /* initial size of free list */ @@ -58,9 +60,8 @@ struct mca_pml_ob1_t { size_t rdma_retries_limit; int max_rdma_per_request; int max_send_per_range; - bool leave_pinned; - int leave_pinned_pipeline; - + bool use_all_rdma; + /* lock queue access */ opal_mutex_t lock; @@ -78,16 +79,16 @@ struct mca_pml_ob1_t { opal_list_t rdma_pending; /* List of pending fragments without a matching communicator */ opal_list_t non_existing_communicator_pending; - bool enabled; + bool enabled; char* allocator_name; - mca_allocator_base_module_t* allocator; + mca_allocator_base_module_t* allocator; unsigned int unexpected_limit; }; -typedef struct mca_pml_ob1_t mca_pml_ob1_t; +typedef struct mca_pml_ob1_t mca_pml_ob1_t; extern mca_pml_ob1_t mca_pml_ob1; extern int mca_pml_ob1_output; - +extern bool mca_pml_ob1_matching_protection; /* * PML interface functions. */ @@ -138,7 +139,7 @@ extern int mca_pml_ob1_mprobe( int dst, struct ompi_message_t **message, ompi_status_public_t* status ); -extern int mca_pml_ob1_isend_init( void *buf, +extern int mca_pml_ob1_isend_init( const void *buf, size_t count, ompi_datatype_t *datatype, int dst, @@ -147,7 +148,7 @@ extern int mca_pml_ob1_isend_init( void *buf, struct ompi_communicator_t* comm, struct ompi_request_t **request ); -extern int mca_pml_ob1_isend( void *buf, +extern int mca_pml_ob1_isend( const void *buf, size_t count, ompi_datatype_t *datatype, int dst, @@ -156,7 +157,7 @@ extern int mca_pml_ob1_isend( void *buf, struct ompi_communicator_t* comm, struct ompi_request_t **request ); -extern int mca_pml_ob1_send( void *buf, +extern int mca_pml_ob1_send( const void *buf, size_t count, ompi_datatype_t *datatype, int dst, @@ -206,8 +207,6 @@ extern int mca_pml_ob1_dump( struct ompi_communicator_t* comm, extern int mca_pml_ob1_start( size_t count, ompi_request_t** requests ); -extern int mca_pml_ob1_ft_event( int state ); - /** * We will use these requests to hold on a traditionally allocated * requests in order to allow the parallel debugger full access to the @@ -259,8 +258,26 @@ do { \ OPAL_THREAD_UNLOCK(&mca_pml_ob1.lock); \ } while(0) +#define OB1_MATCHING_LOCK(lock) \ + do { \ + if( mca_pml_ob1_matching_protection ) { \ + opal_mutex_lock(lock); \ + } \ + else { OPAL_THREAD_LOCK(lock); } \ + } while(0) + + +#define OB1_MATCHING_UNLOCK(lock) \ + do { \ + if( mca_pml_ob1_matching_protection ) { \ + opal_mutex_unlock(lock); \ + } \ + else { OPAL_THREAD_UNLOCK(lock); } \ + } while(0) + + -int mca_pml_ob1_send_fin(ompi_proc_t* proc, mca_bml_base_btl_t* bml_btl, +int mca_pml_ob1_send_fin(ompi_proc_t* proc, mca_bml_base_btl_t* bml_btl, opal_ptr_t hdr_frag, uint64_t size, uint8_t order, int status); /* This function tries to resend FIN/ACK packets from pckt_pending queue. diff --git a/ompi/mca/pml/ob1/pml_ob1_comm.c b/ompi/mca/pml/ob1/pml_ob1_comm.c index 8c157220268..4333c11e6ae 100644 --- a/ompi/mca/pml/ob1/pml_ob1_comm.c +++ b/ompi/mca/pml/ob1/pml_ob1_comm.c @@ -5,14 +5,14 @@ * Copyright (c) 2004-2006 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -40,20 +40,22 @@ static void mca_pml_ob1_comm_proc_destruct(mca_pml_ob1_comm_proc_t* proc) OBJ_DESTRUCT(&proc->frags_cant_match); OBJ_DESTRUCT(&proc->specific_receives); OBJ_DESTRUCT(&proc->unexpected_frags); + if (proc->ompi_proc) { + OBJ_RELEASE(proc->ompi_proc); + } } -static OBJ_CLASS_INSTANCE( - mca_pml_ob1_comm_proc_t, - opal_object_t, - mca_pml_ob1_comm_proc_construct, - mca_pml_ob1_comm_proc_destruct); +OBJ_CLASS_INSTANCE(mca_pml_ob1_comm_proc_t, opal_object_t, + mca_pml_ob1_comm_proc_construct, + mca_pml_ob1_comm_proc_destruct); static void mca_pml_ob1_comm_construct(mca_pml_ob1_comm_t* comm) { OBJ_CONSTRUCT(&comm->wild_receives, opal_list_t); OBJ_CONSTRUCT(&comm->matching_lock, opal_mutex_t); + OBJ_CONSTRUCT(&comm->proc_lock, opal_mutex_t); comm->recv_sequence = 0; comm->procs = NULL; comm->last_probed = 0; @@ -63,13 +65,19 @@ static void mca_pml_ob1_comm_construct(mca_pml_ob1_comm_t* comm) static void mca_pml_ob1_comm_destruct(mca_pml_ob1_comm_t* comm) { - size_t i; - for(i=0; inum_procs; i++) - OBJ_DESTRUCT((&comm->procs[i])); - if(NULL != comm->procs) + if (NULL != comm->procs) { + for (size_t i = 0; i < comm->num_procs; ++i) { + if (comm->procs[i]) { + OBJ_RELEASE(comm->procs[i]); + } + } + free(comm->procs); + } + OBJ_DESTRUCT(&comm->wild_receives); OBJ_DESTRUCT(&comm->matching_lock); + OBJ_DESTRUCT(&comm->proc_lock); } @@ -80,18 +88,13 @@ OBJ_CLASS_INSTANCE( mca_pml_ob1_comm_destruct); -int mca_pml_ob1_comm_init_size(mca_pml_ob1_comm_t* comm, size_t size) +int mca_pml_ob1_comm_init_size (mca_pml_ob1_comm_t* comm, size_t size) { - size_t i; - /* send message sequence-number support - sender side */ - comm->procs = (mca_pml_ob1_comm_proc_t*)malloc(sizeof(mca_pml_ob1_comm_proc_t)*size); + comm->procs = (mca_pml_ob1_comm_proc_t **) calloc(size, sizeof (mca_pml_ob1_comm_proc_t *)); if(NULL == comm->procs) { return OMPI_ERR_OUT_OF_RESOURCE; } - for(i=0; iprocs+i, mca_pml_ob1_comm_proc_t); - } comm->num_procs = size; return OMPI_SUCCESS; } diff --git a/ompi/mca/pml/ob1/pml_ob1_comm.h b/ompi/mca/pml/ob1/pml_ob1_comm.h index 84aa3231d1a..10839eb3eaa 100644 --- a/ompi/mca/pml/ob1/pml_ob1_comm.h +++ b/ompi/mca/pml/ob1/pml_ob1_comm.h @@ -1,18 +1,21 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ /* * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. - * Copyright (c) 2004-2006 The University of Tennessee and The University + * Copyright (c) 2004-2016 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. + * Copyright (c) 2015 Los Alamos National Security, LLC. All rights + * reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ /** @@ -24,6 +27,7 @@ #include "opal/threads/mutex.h" #include "opal/class/opal_list.h" #include "ompi/proc/proc.h" +#include "ompi/communicator/communicator.h" BEGIN_C_DECLS @@ -42,21 +46,19 @@ struct mca_pml_ob1_comm_proc_t { }; typedef struct mca_pml_ob1_comm_proc_t mca_pml_ob1_comm_proc_t; +OBJ_CLASS_DECLARATION(mca_pml_ob1_comm_proc_t); /** * Cached on ompi_communicator_t to hold queues/state - * used by the PML<->PTL interface for matching logic. + * used by the PML<->PTL interface for matching logic. */ struct mca_pml_comm_t { opal_object_t super; -#if OPAL_ENABLE_MULTI_THREADS volatile uint32_t recv_sequence; /**< recv request sequence number - receiver side */ -#else - uint32_t recv_sequence; /**< recv request sequence number - receiver side */ -#endif opal_mutex_t matching_lock; /**< matching lock */ opal_list_t wild_receives; /**< queue of unmatched wild (source process not specified) receives */ - mca_pml_ob1_comm_proc_t* procs; + opal_mutex_t proc_lock; + mca_pml_ob1_comm_proc_t **procs; size_t num_procs; size_t last_probed; }; @@ -64,12 +66,30 @@ typedef struct mca_pml_comm_t mca_pml_ob1_comm_t; OBJ_CLASS_DECLARATION(mca_pml_ob1_comm_t); +static inline mca_pml_ob1_comm_proc_t *mca_pml_ob1_peer_lookup (struct ompi_communicator_t *comm, int rank) +{ + mca_pml_ob1_comm_t *pml_comm = (mca_pml_ob1_comm_t *)comm->c_pml_comm; + + if (OPAL_UNLIKELY(NULL == pml_comm->procs[rank])) { + OPAL_THREAD_LOCK(&pml_comm->proc_lock); + if (NULL == pml_comm->procs[rank]) { + mca_pml_ob1_comm_proc_t* proc = OBJ_NEW(mca_pml_ob1_comm_proc_t); + proc->ompi_proc = ompi_comm_peer_lookup (comm, rank); + OBJ_RETAIN(proc->ompi_proc); + opal_atomic_wmb (); + pml_comm->procs[rank] = proc; + } + OPAL_THREAD_UNLOCK(&pml_comm->proc_lock); + } + + return pml_comm->procs[rank]; +} /** * Initialize an instance of mca_pml_ob1_comm_t based on the communicator size. * * @param comm Instance of mca_pml_ob1_comm_t - * @param size Size of communicator + * @param size Size of communicator * @return OMPI_SUCCESS or error status on failure. */ diff --git a/ompi/mca/pml/ob1/pml_ob1_component.c b/ompi/mca/pml/ob1/pml_ob1_component.c index 2f37468ee20..6557bc20371 100644 --- a/ompi/mca/pml/ob1/pml_ob1_component.c +++ b/ompi/mca/pml/ob1/pml_ob1_component.c @@ -3,21 +3,21 @@ * Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. - * Copyright (c) 2004-2009 The University of Tennessee and The University + * Copyright (c) 2004-2016 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2007-2010 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2010 Oracle and/or its affiliates. All rights reserved - * Copyright (c) 2013-2015 Los Alamos National Security, LLC. All rights + * Copyright (c) 2013-2017 Los Alamos National Security, LLC. All rights * reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -33,11 +33,12 @@ #include "pml_ob1_recvreq.h" #include "pml_ob1_rdmafrag.h" #include "pml_ob1_recvfrag.h" -#include "ompi/mca/bml/base/base.h" +#include "ompi/mca/bml/base/base.h" #include "pml_ob1_component.h" #include "opal/mca/allocator/base/base.h" #include "opal/mca/base/mca_base_pvar.h" #include "opal/runtime/opal_params.h" +#include "opal/mca/btl/base/base.h" OBJ_CLASS_INSTANCE( mca_pml_ob1_pckt_pending_t, opal_free_list_item_t, @@ -53,6 +54,7 @@ mca_pml_ob1_component_init( int* priority, bool enable_progress_threads, static int mca_pml_ob1_component_fini(void); int mca_pml_ob1_output = 0; static int mca_pml_ob1_verbose = 0; +bool mca_pml_ob1_matching_protection = false; mca_pml_base_component_2_0_0_t mca_pml_ob1_component = { /* First, the mca_base_component_t struct containing meta @@ -60,7 +62,7 @@ mca_pml_base_component_2_0_0_t mca_pml_ob1_component = { .pmlm_version = { MCA_PML_BASE_VERSION_2_0_0, - + .mca_component_name = "ob1", .mca_component_major_version = OMPI_MAJOR_VERSION, .mca_component_minor_version = OMPI_MINOR_VERSION, @@ -78,12 +80,9 @@ mca_pml_base_component_2_0_0_t mca_pml_ob1_component = { .pmlm_finalize = mca_pml_ob1_component_fini, }; -void *mca_pml_ob1_seg_alloc( struct mca_mpool_base_module_t* mpool, - size_t* size, - mca_mpool_base_registration_t** registration); - -void mca_pml_ob1_seg_free( struct mca_mpool_base_module_t* mpool, - void* segment ); +void *mca_pml_ob1_seg_alloc (void *ctx, size_t* size); + +void mca_pml_ob1_seg_free (void *ctx, void *segment); static inline int mca_pml_ob1_param_register_int( const char* param_name, @@ -144,9 +143,12 @@ static int mca_pml_ob1_get_unex_msgq_size (const struct mca_base_pvar_t *pvar, v int i; for (i = 0 ; i < comm_size ; ++i) { - pml_proc = pml_comm->procs + i; - - values[i] = opal_list_get_size (&pml_proc->unexpected_frags); + pml_proc = pml_comm->procs[i]; + if (pml_proc) { + values[i] = opal_list_get_size (&pml_proc->unexpected_frags); + } else { + values[i] = 0; + } } return OMPI_SUCCESS; @@ -162,9 +164,13 @@ static int mca_pml_ob1_get_posted_recvq_size (const struct mca_base_pvar_t *pvar int i; for (i = 0 ; i < comm_size ; ++i) { - pml_proc = pml_comm->procs + i; + pml_proc = pml_comm->procs[i]; - values[i] = opal_list_get_size (&pml_proc->specific_receives); + if (pml_proc) { + values[i] = opal_list_get_size (&pml_proc->specific_receives); + } else { + values[i] = 0; + } } return OMPI_SUCCESS; @@ -191,7 +197,13 @@ static int mca_pml_ob1_component_register(void) mca_pml_ob1_param_register_int("max_send_per_range", 4, &mca_pml_ob1.max_send_per_range); mca_pml_ob1_param_register_uint("unexpected_limit", 128, &mca_pml_ob1.unexpected_limit); - + + mca_pml_ob1.use_all_rdma = false; + (void) mca_base_component_var_register(&mca_pml_ob1_component.pmlm_version, "use_all_rdma", + "Use all available RDMA btls for the RDMA and RDMA pipeline protocols " + "(default: false)", MCA_BASE_VAR_TYPE_BOOL, NULL, 0, 0, + OPAL_INFO_LVL_5, MCA_BASE_VAR_SCOPE_GROUP, &mca_pml_ob1.use_all_rdma); + mca_pml_ob1.allocator_name = "bucket"; (void) mca_base_component_var_register(&mca_pml_ob1_component.pmlm_version, "allocator", "Name of allocator component for unexpected messages", @@ -218,7 +230,7 @@ static int mca_pml_ob1_component_open(void) mca_pml_ob1_output = opal_output_open(NULL); opal_output_set_verbosity(mca_pml_ob1_output, mca_pml_ob1_verbose); - mca_pml_ob1.enabled = false; + mca_pml_ob1.enabled = false; return mca_base_framework_open(&ompi_bml_base_framework, 0); } @@ -237,7 +249,7 @@ static int mca_pml_ob1_component_close(void) static mca_pml_base_module_t* -mca_pml_ob1_component_init( int* priority, +mca_pml_ob1_component_init( int* priority, bool enable_progress_threads, bool enable_mpi_threads ) { @@ -246,10 +258,6 @@ mca_pml_ob1_component_init( int* priority, opal_output_verbose( 10, mca_pml_ob1_output, "in ob1, my priority is %d\n", mca_pml_ob1.priority); - if((*priority) > mca_pml_ob1.priority) { - *priority = mca_pml_ob1.priority; - return NULL; - } *priority = mca_pml_ob1.priority; allocator_component = mca_allocator_component_lookup( mca_pml_ob1.allocator_name ); @@ -266,16 +274,26 @@ mca_pml_ob1_component_init( int* priority, return NULL; } - if(OMPI_SUCCESS != mca_bml_base_init( enable_progress_threads, + if(OMPI_SUCCESS != mca_bml_base_init( enable_progress_threads, enable_mpi_threads)) { return NULL; } - /* Set this here (vs in component_open()) because - opal_leave_pinned* may have been set after MCA params were - read (e.g., by the openib btl) */ - mca_pml_ob1.leave_pinned = (1 == opal_leave_pinned); - mca_pml_ob1.leave_pinned_pipeline = (int) opal_leave_pinned_pipeline; + /* check if any btls do not support dynamic add_procs */ + mca_btl_base_selected_module_t* selected_btl; + OPAL_LIST_FOREACH(selected_btl, &mca_btl_base_modules_initialized, mca_btl_base_selected_module_t) { + mca_btl_base_module_t *btl = selected_btl->btl_module; + + if (btl->btl_flags & MCA_BTL_FLAGS_BTL_PROGRESS_THREAD_ENABLED) { + mca_pml_ob1_matching_protection = true; + } + + if (btl->btl_flags & MCA_BTL_FLAGS_SINGLE_ADD_PROCS) { + mca_pml_ob1.super.pml_flags |= MCA_PML_BASE_FLAG_REQUIRE_WORLD; + break; + } + + } return &mca_pml_ob1.super; } @@ -289,9 +307,21 @@ int mca_pml_ob1_component_fini(void) return rc; if(!mca_pml_ob1.enabled) - return OMPI_SUCCESS; /* never selected.. return success.. */ + return OMPI_SUCCESS; /* never selected.. return success.. */ mca_pml_ob1.enabled = false; /* not anymore */ + /* return the static receive/send requests to the respective free list and + * let the free list handle destruction. */ + if( NULL != mca_pml_ob1_recvreq ) { + opal_free_list_return (&mca_pml_base_recv_requests, (opal_free_list_item_t *) mca_pml_ob1_recvreq); + mca_pml_ob1_recvreq = NULL; + } + + if( NULL != mca_pml_ob1_sendreq ) { + opal_free_list_return (&mca_pml_base_send_requests, (opal_free_list_item_t *) mca_pml_ob1_sendreq); + mca_pml_ob1_sendreq = NULL; + } + OBJ_DESTRUCT(&mca_pml_ob1.rdma_pending); OBJ_DESTRUCT(&mca_pml_ob1.pckt_pending); OBJ_DESTRUCT(&mca_pml_ob1.recv_pending); @@ -304,15 +334,6 @@ int mca_pml_ob1_component_fini(void) OBJ_DESTRUCT(&mca_pml_ob1.lock); OBJ_DESTRUCT(&mca_pml_ob1.send_ranges); - if( NULL != mca_pml_ob1_recvreq ) { - OBJ_DESTRUCT(mca_pml_ob1_recvreq); - mca_pml_ob1_recvreq = NULL; - } - if( NULL != mca_pml_ob1_sendreq ) { - OBJ_DESTRUCT(mca_pml_ob1_sendreq); - mca_pml_ob1_sendreq = NULL; - } - if( NULL != mca_pml_ob1.allocator ) { (void)mca_pml_ob1.allocator->alc_finalize(mca_pml_ob1.allocator); mca_pml_ob1.allocator = NULL; @@ -336,13 +357,12 @@ int mca_pml_ob1_component_fini(void) return OMPI_SUCCESS; } -void *mca_pml_ob1_seg_alloc( struct mca_mpool_base_module_t* mpool, - size_t* size, - mca_mpool_base_registration_t** registration) { +void *mca_pml_ob1_seg_alloc (void *ctx, size_t *size) +{ return malloc(*size); } -void mca_pml_ob1_seg_free( struct mca_mpool_base_module_t* mpool, - void* segment ) { +void mca_pml_ob1_seg_free (void *ctx, void *segment) +{ free(segment); } diff --git a/ompi/mca/pml/ob1/pml_ob1_component.h b/ompi/mca/pml/ob1/pml_ob1_component.h index 5021da20beb..c72ccc79538 100644 --- a/ompi/mca/pml/ob1/pml_ob1_component.h +++ b/ompi/mca/pml/ob1/pml_ob1_component.h @@ -5,16 +5,16 @@ * Copyright (c) 2004-2006 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ /** - * @file + * @file */ #ifndef MCA_PML_OB1_COMPONENT_H diff --git a/ompi/mca/pml/ob1/pml_ob1_cuda.c b/ompi/mca/pml/ob1/pml_ob1_cuda.c index 2be7afa8d6d..84f6464a753 100644 --- a/ompi/mca/pml/ob1/pml_ob1_cuda.c +++ b/ompi/mca/pml/ob1/pml_ob1_cuda.c @@ -6,27 +6,28 @@ * Copyright (c) 2004-2008 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2008 UT-Battelle, LLC. All rights reserved. * Copyright (c) 2010 Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2012-2015 NVIDIA Corporation. All rights reserved. - * Copyright (c) 2015 Los Alamos National Security, LLC. All rights + * Copyright (c) 2015-2017 Los Alamos National Security, LLC. All rights * reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ #include "ompi_config.h" #include "opal/prefetch.h" +#include "opal/runtime/opal_params.h" #include "opal/mca/btl/btl.h" -#include "opal/mca/mpool/mpool.h" +#include "opal/mca/mpool/mpool.h" #include "ompi/constants.h" #include "ompi/mca/pml/pml.h" #include "pml_ob1.h" @@ -56,14 +57,13 @@ int mca_pml_ob1_send_request_start_cuda(mca_pml_ob1_send_request_t* sendreq, mca_bml_base_btl_t* bml_btl, size_t size) { int rc; -#if OPAL_CUDA_SUPPORT_41 #if OPAL_CUDA_GDR_SUPPORT /* With some BTLs, switch to RNDV from RGET at large messages */ - if ((sendreq->req_send.req_base.req_convertor.flags & CONVERTOR_CUDA) && + if ((sendreq->req_send.req_base.req_convertor.flags & CONVERTOR_CUDA) && (sendreq->req_send.req_bytes_packed > (bml_btl->btl->btl_cuda_rdma_limit - sizeof(mca_pml_ob1_hdr_t)))) { return mca_pml_ob1_send_request_start_rndv(sendreq, bml_btl, 0, 0); } -#endif /* OPAL_CUDA_GDR_SUPPORT */ +#endif /* OPAL_CUDA_GDR_SUPPORT */ sendreq->req_send.req_base.req_convertor.flags &= ~CONVERTOR_CUDA; if (opal_convertor_need_buffers(&sendreq->req_send.req_base.req_convertor) == false) { @@ -95,14 +95,10 @@ int mca_pml_ob1_send_request_start_cuda(mca_pml_ob1_send_request_t* sendreq, sendreq->req_send.req_base.req_convertor.flags |= CONVERTOR_CUDA; rc = mca_pml_ob1_send_request_start_rndv(sendreq, bml_btl, 0, 0); } -#else - /* Just do the rendezvous but set initial data to be sent to zero */ - rc = mca_pml_ob1_send_request_start_rndv(sendreq, bml_btl, 0, 0); -#endif /* OPAL_CUDA_SUPPORT_41 */ return rc; } - + size_t mca_pml_ob1_rdma_cuda_btls( mca_bml_base_endpoint_t* bml_endpoint, @@ -119,7 +115,7 @@ size_t mca_pml_ob1_rdma_cuda_btls( return 0; } - /* check to see if memory is registered */ + /* check to see if memory is registered */ for(n = 0; n < num_btls && num_btls_used < mca_pml_ob1.max_rdma_per_request; n++) { mca_bml_base_btl_t* bml_btl = @@ -148,7 +144,7 @@ size_t mca_pml_ob1_rdma_cuda_btls( /* if we don't use leave_pinned and all BTLs that already have this memory * registered amount to less then half of available bandwidth - fall back to * pipeline protocol */ - if(0 == num_btls_used || (!mca_pml_ob1.leave_pinned && weight_total < 0.5)) + if(0 == num_btls_used || (!opal_leave_pinned && weight_total < 0.5)) return 0; mca_pml_ob1_calc_weighted_length(rdma_btls, num_btls_used, size, @@ -158,11 +154,10 @@ size_t mca_pml_ob1_rdma_cuda_btls( } int mca_pml_ob1_cuda_need_buffers(void * rreq, - mca_btl_base_module_t* btl) + mca_btl_base_module_t* btl) { mca_pml_ob1_recv_request_t* recvreq = (mca_pml_ob1_recv_request_t*)rreq; - mca_bml_base_endpoint_t* bml_endpoint = - (mca_bml_base_endpoint_t*)recvreq->req_recv.req_base.req_proc->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_BML]; + mca_bml_base_endpoint_t* bml_endpoint = mca_bml_base_get_endpoint (recvreq->req_recv.req_base.req_proc); mca_bml_base_btl_t *bml_btl = mca_bml_base_btl_array_find(&bml_endpoint->btl_send, btl); /* A btl could be in the rdma list but not in the send list so check there also */ @@ -193,7 +188,7 @@ int mca_pml_ob1_cuda_need_buffers(void * rreq, * future. */ void mca_pml_ob1_cuda_add_ipc_support(struct mca_btl_base_module_t* btl, int32_t flags, ompi_proc_t* errproc, char* btlinfo) -{ +{ mca_bml_base_endpoint_t* ep; int btl_verbose_stream = 0; int i; diff --git a/ompi/mca/pml/ob1/pml_ob1_hdr.h b/ompi/mca/pml/ob1/pml_ob1_hdr.h index 7bfa86a53ca..8f8f2ecf4fd 100644 --- a/ompi/mca/pml/ob1/pml_ob1_hdr.h +++ b/ompi/mca/pml/ob1/pml_ob1_hdr.h @@ -6,7 +6,7 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. @@ -14,9 +14,9 @@ * Copyright (c) 2012-2015 Los Alamos National Security, LLC. All rights * reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ /** @@ -56,7 +56,7 @@ #define MCA_PML_OB1_HDR_FLAGS_SIGNAL 32 /* message can be optionally signalling */ /** - * Common hdr attributes - must be first element in each hdr type + * Common hdr attributes - must be first element in each hdr type */ struct mca_pml_ob1_common_hdr_t { uint8_t hdr_type; /**< type of envelope */ @@ -71,11 +71,11 @@ static inline void mca_pml_ob1_common_hdr_prepare (mca_pml_ob1_common_hdr_t *hdr hdr->hdr_flags = hdr_flags; } -#define MCA_PML_OB1_COMMON_HDR_NTOH(h) -#define MCA_PML_OB1_COMMON_HDR_HTON(h) +#define MCA_PML_OB1_COMMON_HDR_NTOH(h) +#define MCA_PML_OB1_COMMON_HDR_HTON(h) /** - * Header definition for the first fragment, contains the + * Header definition for the first fragment, contains the * attributes required to match the corresponding posted receive. */ struct mca_pml_ob1_match_hdr_t { @@ -92,7 +92,7 @@ struct mca_pml_ob1_match_hdr_t { #define OMPI_PML_OB1_MATCH_HDR_LEN 16 #else #define OMPI_PML_OB1_MATCH_HDR_LEN 14 -#endif +#endif typedef struct mca_pml_ob1_match_hdr_t mca_pml_ob1_match_hdr_t; @@ -126,7 +126,7 @@ do { \ (h).hdr_src = htonl((h).hdr_src); \ (h).hdr_tag = htonl((h).hdr_tag); \ (h).hdr_seq = htons((h).hdr_seq); \ -} while (0) +} while (0) /** * Header definition for the first fragment when an acknowledgment @@ -162,7 +162,7 @@ static inline void mca_pml_ob1_rendezvous_hdr_prepare (mca_pml_ob1_rendezvous_hd do { \ MCA_PML_OB1_MATCH_HDR_HTON((h).hdr_match); \ (h).hdr_msg_length = hton64((h).hdr_msg_length); \ - } while (0) + } while (0) /** * Header definition for a combined rdma rendezvous/get @@ -208,7 +208,7 @@ static inline void mca_pml_ob1_rget_hdr_prepare (mca_pml_ob1_rget_hdr_t *hdr, ui do { \ MCA_PML_OB1_RNDV_HDR_HTON((h).hdr_rndv); \ (h).hdr_src_ptr = hton64((h).hdr_src_ptr); \ - } while (0) + } while (0) /** * Header for subsequent fragments. @@ -305,7 +305,7 @@ static inline void mca_pml_ob1_ack_hdr_prepare (mca_pml_ob1_ack_hdr_t *hdr, uint MCA_PML_OB1_COMMON_HDR_HTON((h).hdr_common); \ (h).hdr_send_offset = hton64((h).hdr_send_offset); \ (h).hdr_send_size = hton64((h).hdr_send_size); \ - } while (0) + } while (0) /** * Header used to initiate an RDMA operation. @@ -363,7 +363,7 @@ static inline void mca_pml_ob1_rdma_hdr_prepare (mca_pml_ob1_rdma_hdr_t *hdr, ui (h).hdr_rdma_offset = hton64((h).hdr_rdma_offset); \ (h).hdr_dst_ptr = hton64((h).hdr_dst_ptr); \ (h).hdr_dst_size = hton64((h).hdr_dst_size); \ - } while (0) + } while (0) /** * Header used to complete an RDMA operation. diff --git a/ompi/mca/pml/ob1/pml_ob1_iprobe.c b/ompi/mca/pml/ob1/pml_ob1_iprobe.c index f57ce2389df..914474c6668 100644 --- a/ompi/mca/pml/ob1/pml_ob1_iprobe.c +++ b/ompi/mca/pml/ob1/pml_ob1_iprobe.c @@ -2,19 +2,19 @@ * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. - * Copyright (c) 2004-2013 The University of Tennessee and The University + * Copyright (c) 2004-2016 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2009-2012 Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2011-2012 Sandia National Laboratories. All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -36,10 +36,10 @@ int mca_pml_ob1_iprobe(int src, recvreq.req_recv.req_base.req_ompi.req_type = OMPI_REQUEST_PML; recvreq.req_recv.req_base.req_type = MCA_PML_REQUEST_IPROBE; - MCA_PML_OB1_RECV_REQUEST_INIT(&recvreq, NULL, 0, &ompi_mpi_char.dt, src, tag, comm, true); + MCA_PML_OB1_RECV_REQUEST_INIT(&recvreq, NULL, 0, &ompi_mpi_char.dt, src, tag, comm, false); MCA_PML_OB1_RECV_REQUEST_START(&recvreq); - if( recvreq.req_recv.req_base.req_ompi.req_complete == true ) { + if( REQUEST_COMPLETE( &(recvreq.req_recv.req_base.req_ompi)) ) { if( NULL != status ) { *status = recvreq.req_recv.req_base.req_ompi.req_status; } @@ -66,7 +66,7 @@ int mca_pml_ob1_probe(int src, recvreq.req_recv.req_base.req_ompi.req_type = OMPI_REQUEST_PML; recvreq.req_recv.req_base.req_type = MCA_PML_REQUEST_PROBE; - MCA_PML_OB1_RECV_REQUEST_INIT(&recvreq, NULL, 0, &ompi_mpi_char.dt, src, tag, comm, true); + MCA_PML_OB1_RECV_REQUEST_INIT(&recvreq, NULL, 0, &ompi_mpi_char.dt, src, tag, comm, false); MCA_PML_OB1_RECV_REQUEST_START(&recvreq); ompi_request_wait_completion(&recvreq.req_recv.req_base.req_ompi); @@ -84,7 +84,7 @@ int mca_pml_ob1_improbe(int src, int tag, struct ompi_communicator_t *comm, - int *matched, + int *matched, struct ompi_message_t **message, ompi_status_public_t * status) { @@ -102,11 +102,11 @@ mca_pml_ob1_improbe(int src, recvreq->req_recv.req_base.req_type = MCA_PML_REQUEST_IMPROBE; /* initialize the request enough to probe and get the status */ - MCA_PML_OB1_RECV_REQUEST_INIT(recvreq, NULL, 0, &ompi_mpi_char.dt, + MCA_PML_OB1_RECV_REQUEST_INIT(recvreq, NULL, 0, &ompi_mpi_char.dt, src, tag, comm, false); MCA_PML_OB1_RECV_REQUEST_START(recvreq); - if( recvreq->req_recv.req_base.req_ompi.req_complete == true ) { + if( REQUEST_COMPLETE( &(recvreq->req_recv.req_base.req_ompi)) ) { if( NULL != status ) { *status = recvreq->req_recv.req_base.req_ompi.req_status; } @@ -127,7 +127,7 @@ mca_pml_ob1_improbe(int src, MCA_PML_OB1_RECV_REQUEST_RETURN( recvreq ); ompi_message_return(*message); *message = MPI_MESSAGE_NULL; - + opal_progress(); } @@ -156,7 +156,7 @@ mca_pml_ob1_mprobe(int src, recvreq->req_recv.req_base.req_type = MCA_PML_REQUEST_MPROBE; /* initialize the request enough to probe and get the status */ - MCA_PML_OB1_RECV_REQUEST_INIT(recvreq, NULL, 0, &ompi_mpi_char.dt, + MCA_PML_OB1_RECV_REQUEST_INIT(recvreq, NULL, 0, &ompi_mpi_char.dt, src, tag, comm, false); MCA_PML_OB1_RECV_REQUEST_START(recvreq); diff --git a/ompi/mca/pml/ob1/pml_ob1_irecv.c b/ompi/mca/pml/ob1/pml_ob1_irecv.c index 5efa0897578..3e471398feb 100644 --- a/ompi/mca/pml/ob1/pml_ob1_irecv.c +++ b/ompi/mca/pml/ob1/pml_ob1_irecv.c @@ -3,18 +3,20 @@ * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. - * Copyright (c) 2004-2014 The University of Tennessee and The University + * Copyright (c) 2004-2015 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. - * Copyright (c) 2007-2014 Los Alamos National Security, LLC. All rights + * Copyright (c) 2007-2015 Los Alamos National Security, LLC. All rights * reserved. * Copyright (c) 2010-2012 Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2011 Sandia National Laboratories. All rights reserved. - * Copyright (c) 2014 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2014 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2016 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -28,10 +30,15 @@ #include "pml_ob1_recvfrag.h" #include "ompi/peruse/peruse-internal.h" #include "ompi/message/message.h" -#if HAVE_ALLOCA_H -#include -#endif /* HAVE_ALLOCA_H */ - +#include "ompi/memchecker.h" + +/** + * Single usage request. As we allow recursive calls to recv + * (from the request completion callback), we cannot rely on + * using a global request. Thus, once a recv acquires ownership + * this global request, it should set it to NULL to prevent + * the reuse until the first user completes. + */ mca_pml_ob1_recv_request_t *mca_pml_ob1_recvreq = NULL; int mca_pml_ob1_irecv_init(void *addr, @@ -47,6 +54,7 @@ int mca_pml_ob1_irecv_init(void *addr, if (NULL == recvreq) return OMPI_ERR_TEMP_OUT_OF_RESOURCE; + recvreq->req_recv.req_base.req_type = MCA_PML_REQUEST_RECV; MCA_PML_OB1_RECV_REQUEST_INIT(recvreq, addr, count, datatype, src, tag, comm, true); @@ -72,6 +80,7 @@ int mca_pml_ob1_irecv(void *addr, if (NULL == recvreq) return OMPI_ERR_TEMP_OUT_OF_RESOURCE; + recvreq->req_recv.req_base.req_type = MCA_PML_REQUEST_RECV; MCA_PML_OB1_RECV_REQUEST_INIT(recvreq, addr, count, datatype, src, tag, comm, false); @@ -97,20 +106,18 @@ int mca_pml_ob1_recv(void *addr, mca_pml_ob1_recv_request_t *recvreq = NULL; int rc; -#if !OPAL_ENABLE_MULTI_THREADS +#if !OMPI_ENABLE_THREAD_MULTIPLE recvreq = mca_pml_ob1_recvreq; + mca_pml_ob1_recvreq = NULL; if( OPAL_UNLIKELY(NULL == recvreq) ) -#endif /* !OPAL_ENABLE_MULTI_THREADS */ +#endif /* !OMPI_ENABLE_THREAD_MULTIPLE */ { MCA_PML_OB1_RECV_REQUEST_ALLOC(recvreq); if (NULL == recvreq) return OMPI_ERR_TEMP_OUT_OF_RESOURCE; -#if !OPAL_ENABLE_MULTI_THREADS - mca_pml_ob1_recvreq = recvreq; -#endif /* !OPAL_ENABLE_MULTI_THREADS */ } - OBJ_CONSTRUCT(recvreq, mca_pml_ob1_recv_request_t); + recvreq->req_recv.req_base.req_type = MCA_PML_REQUEST_RECV; MCA_PML_OB1_RECV_REQUEST_INIT(recvreq, addr, count, datatype, src, tag, comm, false); @@ -126,8 +133,28 @@ int mca_pml_ob1_recv(void *addr, } rc = recvreq->req_recv.req_base.req_ompi.req_status.MPI_ERROR; - MCA_PML_BASE_RECV_REQUEST_FINI(&recvreq->req_recv); - OBJ_DESTRUCT(recvreq); + + if (recvreq->req_recv.req_base.req_pml_complete) { + /* make buffer defined when the request is compeleted, + and before releasing the objects. */ + MEMCHECKER( + memchecker_call(&opal_memchecker_base_mem_defined, + recvreq->req_recv.req_base.req_addr, + recvreq->req_recv.req_base.req_count, + recvreq->req_recv.req_base.req_datatype); + ); + } + +#if OMPI_ENABLE_THREAD_MULTIPLE + MCA_PML_OB1_RECV_REQUEST_RETURN(recvreq); +#else + if( NULL != mca_pml_ob1_recvreq ) { + MCA_PML_OB1_RECV_REQUEST_RETURN(recvreq); + } else { + mca_pml_ob1_recv_request_fini (recvreq); + mca_pml_ob1_recvreq = recvreq; + } +#endif return rc; } @@ -146,7 +173,6 @@ mca_pml_ob1_imrecv( void *buf, int src, tag; ompi_communicator_t *comm; mca_pml_ob1_comm_proc_t* proc; - mca_pml_ob1_comm_t* ob1_comm; uint64_t seq; /* get the request from the message and the frag from the request @@ -156,7 +182,6 @@ mca_pml_ob1_imrecv( void *buf, src = recvreq->req_recv.req_base.req_ompi.req_status.MPI_SOURCE; tag = recvreq->req_recv.req_base.req_ompi.req_status.MPI_TAG; comm = (*message)->comm; - ob1_comm = recvreq->req_recv.req_base.req_comm->c_pml_comm; seq = recvreq->req_recv.req_base.req_sequence; /* make the request a recv request again */ @@ -194,7 +219,7 @@ mca_pml_ob1_imrecv( void *buf, /* Note - sequence number already assigned */ recvreq->req_recv.req_base.req_sequence = seq; - proc = &ob1_comm->procs[recvreq->req_recv.req_base.req_peer]; + proc = mca_pml_ob1_peer_lookup (comm, recvreq->req_recv.req_base.req_peer); recvreq->req_recv.req_base.req_proc = proc->ompi_proc; prepare_recv_req_converter(recvreq); @@ -241,7 +266,6 @@ mca_pml_ob1_mrecv( void *buf, int src, tag, rc; ompi_communicator_t *comm; mca_pml_ob1_comm_proc_t* proc; - mca_pml_ob1_comm_t* ob1_comm; uint64_t seq; /* get the request from the message and the frag from the request @@ -252,7 +276,6 @@ mca_pml_ob1_mrecv( void *buf, src = recvreq->req_recv.req_base.req_ompi.req_status.MPI_SOURCE; tag = recvreq->req_recv.req_base.req_ompi.req_status.MPI_TAG; seq = recvreq->req_recv.req_base.req_sequence; - ob1_comm = recvreq->req_recv.req_base.req_comm->c_pml_comm; /* make the request a recv request again */ /* The old request kept pointers to comm and the char datatype. @@ -288,7 +311,7 @@ mca_pml_ob1_mrecv( void *buf, /* Note - sequence number already assigned */ recvreq->req_recv.req_base.req_sequence = seq; - proc = &ob1_comm->procs[recvreq->req_recv.req_base.req_peer]; + proc = mca_pml_ob1_peer_lookup (comm, recvreq->req_recv.req_base.req_peer); recvreq->req_recv.req_base.req_proc = proc->ompi_proc; prepare_recv_req_converter(recvreq); diff --git a/ompi/mca/pml/ob1/pml_ob1_isend.c b/ompi/mca/pml/ob1/pml_ob1_isend.c index 982eb1a2df7..1e96cdcc78a 100644 --- a/ompi/mca/pml/ob1/pml_ob1_isend.c +++ b/ompi/mca/pml/ob1/pml_ob1_isend.c @@ -28,13 +28,17 @@ #include "pml_ob1_sendreq.h" #include "pml_ob1_recvreq.h" #include "ompi/peruse/peruse-internal.h" -#if HAVE_ALLOCA_H -#include -#endif /* HAVE_ALLOCA_H */ +/** + * Single usage request. As we allow recursive calls (as an + * example from the request completion callback), we cannot rely + * on using a global request. Thus, once a send acquires ownership + * of this global request, it should set it to NULL to prevent + * the reuse until the first user completes. + */ mca_pml_ob1_send_request_t *mca_pml_ob1_sendreq = NULL; -int mca_pml_ob1_isend_init(void *buf, +int mca_pml_ob1_isend_init(const void *buf, size_t count, ompi_datatype_t * datatype, int dst, @@ -59,12 +63,18 @@ int mca_pml_ob1_isend_init(void *buf, &(sendreq)->req_send.req_base, PERUSE_SEND); + /* Work around a leak in start by marking this request as complete. The + * problem occured because we do not have a way to differentiate an + * inital request and an incomplete pml request in start. This line + * allows us to detect this state. */ + sendreq->req_send.req_base.req_pml_complete = true; + *request = (ompi_request_t *) sendreq; return OMPI_SUCCESS; } /* try to get a small message out on to the wire quickly */ -static inline int mca_pml_ob1_send_inline (void *buf, size_t count, +static inline int mca_pml_ob1_send_inline (const void *buf, size_t count, ompi_datatype_t * datatype, int dst, int tag, int16_t seqn, ompi_proc_t *dst_proc, mca_bml_base_endpoint_t* endpoint, @@ -120,7 +130,7 @@ static inline int mca_pml_ob1_send_inline (void *buf, size_t count, return (int) size; } -int mca_pml_ob1_isend(void *buf, +int mca_pml_ob1_isend(const void *buf, size_t count, ompi_datatype_t * datatype, int dst, @@ -129,15 +139,18 @@ int mca_pml_ob1_isend(void *buf, ompi_communicator_t * comm, ompi_request_t ** request) { - mca_pml_ob1_comm_t* ob1_comm = comm->c_pml_comm; + mca_pml_ob1_comm_proc_t *ob1_proc = mca_pml_ob1_peer_lookup (comm, dst); mca_pml_ob1_send_request_t *sendreq = NULL; - ompi_proc_t *dst_proc = ompi_comm_peer_lookup (comm, dst); - mca_bml_base_endpoint_t* endpoint = (mca_bml_base_endpoint_t*) - dst_proc->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_BML]; + ompi_proc_t *dst_proc = ob1_proc->ompi_proc; + mca_bml_base_endpoint_t* endpoint = mca_bml_base_get_endpoint (dst_proc); int16_t seqn; int rc; - seqn = (uint16_t) OPAL_THREAD_ADD32(&ob1_comm->procs[dst].send_sequence, 1); + if (OPAL_UNLIKELY(NULL == endpoint)) { + return OMPI_ERR_UNREACH; + } + + seqn = (uint16_t) OPAL_THREAD_ADD32(&ob1_proc->send_sequence, 1); if (MCA_PML_BASE_SEND_SYNCHRONOUS != sendmode) { rc = mca_pml_ob1_send_inline (buf, count, datatype, dst, tag, seqn, dst_proc, @@ -171,7 +184,7 @@ int mca_pml_ob1_isend(void *buf, return rc; } -int mca_pml_ob1_send(void *buf, +int mca_pml_ob1_send(const void *buf, size_t count, ompi_datatype_t * datatype, int dst, @@ -179,14 +192,17 @@ int mca_pml_ob1_send(void *buf, mca_pml_base_send_mode_t sendmode, ompi_communicator_t * comm) { - mca_pml_ob1_comm_t* ob1_comm = comm->c_pml_comm; - ompi_proc_t *dst_proc = ompi_comm_peer_lookup (comm, dst); - mca_bml_base_endpoint_t* endpoint = (mca_bml_base_endpoint_t*) - dst_proc->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_BML]; + mca_pml_ob1_comm_proc_t *ob1_proc = mca_pml_ob1_peer_lookup (comm, dst); + ompi_proc_t *dst_proc = ob1_proc->ompi_proc; + mca_bml_base_endpoint_t* endpoint = mca_bml_base_get_endpoint (dst_proc); mca_pml_ob1_send_request_t *sendreq = NULL; int16_t seqn; int rc; + if (OPAL_UNLIKELY(NULL == endpoint)) { + return OMPI_ERR_UNREACH; + } + if (OPAL_UNLIKELY(MCA_PML_BASE_SEND_BUFFERED == sendmode)) { /* large buffered sends *need* a real request so use isend instead */ ompi_request_t *brequest; @@ -196,16 +212,12 @@ int mca_pml_ob1_send(void *buf, return rc; } - /* free the request and return. don't care if it completes now */ + ompi_request_wait_completion (brequest); ompi_request_free (&brequest); return OMPI_SUCCESS; } - if (OPAL_UNLIKELY(NULL == endpoint)) { - return OMPI_ERR_UNREACH; - } - - seqn = (uint16_t) OPAL_THREAD_ADD32(&ob1_comm->procs[dst].send_sequence, 1); + seqn = (uint16_t) OPAL_THREAD_ADD32(&ob1_proc->send_sequence, 1); /** * The immediate send will not have a request, so they are @@ -220,19 +232,16 @@ int mca_pml_ob1_send(void *buf, } } -#if !OPAL_ENABLE_MULTI_THREADS +#if !OMPI_ENABLE_THREAD_MULTIPLE sendreq = mca_pml_ob1_sendreq; + mca_pml_ob1_sendreq = NULL; if( OPAL_UNLIKELY(NULL == sendreq) ) -#endif /* !OPAL_ENABLE_MULTI_THREADS */ +#endif /* !OMPI_ENABLE_THREAD_MULTIPLE */ { MCA_PML_OB1_SEND_REQUEST_ALLOC(comm, dst, sendreq); if (NULL == sendreq) return OMPI_ERR_TEMP_OUT_OF_RESOURCE; -#if !OPAL_ENABLE_MULTI_THREADS - mca_pml_ob1_sendreq = sendreq; -#endif /* !OPAL_ENABLE_MULTI_THREADS */ } - OBJ_CONSTRUCT(sendreq, mca_pml_ob1_send_request_t); sendreq->req_send.req_base.req_proc = dst_proc; sendreq->rdma_frag = NULL; @@ -252,9 +261,18 @@ int mca_pml_ob1_send(void *buf, ompi_request_wait_completion(&sendreq->req_send.req_base.req_ompi); rc = sendreq->req_send.req_base.req_ompi.req_status.MPI_ERROR; - MCA_PML_BASE_SEND_REQUEST_FINI(&sendreq->req_send); } - OBJ_DESTRUCT(sendreq); + +#if OMPI_ENABLE_THREAD_MULTIPLE + MCA_PML_OB1_SEND_REQUEST_RETURN(sendreq); +#else + if( NULL != mca_pml_ob1_sendreq ) { + MCA_PML_OB1_SEND_REQUEST_RETURN(sendreq); + } else { + mca_pml_ob1_send_request_fini (sendreq); + mca_pml_ob1_sendreq = sendreq; + } +#endif return rc; } diff --git a/ompi/mca/pml/ob1/pml_ob1_progress.c b/ompi/mca/pml/ob1/pml_ob1_progress.c index 8c61ac51835..ea283293e3b 100644 --- a/ompi/mca/pml/ob1/pml_ob1_progress.c +++ b/ompi/mca/pml/ob1/pml_ob1_progress.c @@ -6,14 +6,14 @@ * Copyright (c) 2004-2008 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -21,7 +21,7 @@ #include "pml_ob1.h" #include "pml_ob1_sendreq.h" -#include "ompi/mca/bml/base/base.h" +#include "ompi/mca/bml/base/base.h" #if OPAL_CUDA_SUPPORT #include "opal/mca/common/cuda/common_cuda.h" #include "pml_ob1_recvreq.h" @@ -69,7 +69,7 @@ int mca_pml_ob1_progress(void) for(j = 0; j < (int)mca_bml_base_btl_array_get_size(&endpoint->btl_eager); j++) { mca_bml_base_btl_t* bml_btl; int rc; - + /* select a btl */ bml_btl = mca_bml_base_btl_array_get_next(&endpoint->btl_eager); rc = mca_pml_ob1_send_request_start_btl(sendreq, bml_btl); diff --git a/ompi/mca/pml/ob1/pml_ob1_rdma.c b/ompi/mca/pml/ob1/pml_ob1_rdma.c index c2c9bbbe89d..8cae10b0322 100644 --- a/ompi/mca/pml/ob1/pml_ob1_rdma.c +++ b/ompi/mca/pml/ob1/pml_ob1_rdma.c @@ -6,16 +6,16 @@ * Copyright (c) 2004-2006 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. - * Copyright (c) 2014-2015 Los Alamos National Security, LLC. All rights + * Copyright (c) 2014-2017 Los Alamos National Security, LLC. All rights * reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -26,12 +26,13 @@ #include "ompi/constants.h" #include "ompi/mca/pml/pml.h" #include "ompi/mca/bml/bml.h" -#include "opal/mca/mpool/mpool.h" +#include "opal/mca/mpool/mpool.h" +#include "opal/runtime/opal_params.h" #include "pml_ob1.h" #include "pml_ob1_rdma.h" /* - * Check to see if memory is registered or can be registered. Build a + * Check to see if memory is registered or can be registered. Build a * set of registrations on the request. */ @@ -42,6 +43,7 @@ size_t mca_pml_ob1_rdma_btls( mca_pml_ob1_com_btl_t* rdma_btls) { int num_btls = mca_bml_base_btl_array_get_size(&bml_endpoint->btl_rdma); + int num_eager_btls = mca_bml_base_btl_array_get_size (&bml_endpoint->btl_eager); double weight_total = 0; int num_btls_used = 0; @@ -50,19 +52,35 @@ size_t mca_pml_ob1_rdma_btls( return 0; } - /* check to see if memory is registered */ + /* check to see if memory is registered */ for (int n = 0; n < num_btls && num_btls_used < mca_pml_ob1.max_rdma_per_request; n++) { mca_bml_base_btl_t* bml_btl = mca_bml_base_btl_array_get_index(&bml_endpoint->btl_rdma, (bml_endpoint->btl_rdma_index + n) % num_btls); mca_btl_base_registration_handle_t *reg_handle = NULL; mca_btl_base_module_t *btl = bml_btl->btl; + /* NTH: go ahead and use an rdma btl if is the only one */ + bool ignore = !mca_pml_ob1.use_all_rdma; + + /* do not use rdma btls that are not in the eager list. this is necessary to avoid using + * btls that exist on the endpoint only to support RMA. */ + for (int i = 0 ; i < num_eager_btls && ignore ; ++i) { + mca_bml_base_btl_t *eager_btl = mca_bml_base_btl_array_get_index (&bml_endpoint->btl_eager, i); + if (eager_btl->btl_endpoint == bml_btl->btl_endpoint) { + ignore = false; + break; + } + } + + if (ignore) { + continue; + } if (btl->btl_register_mem) { /* do not use the RDMA protocol with this btl if 1) leave pinned is disabled, * 2) the btl supports put, and 3) the fragment is larger than the minimum * pipeline size specified by the BTL */ - if (!mca_pml_ob1.leave_pinned && (btl->btl_flags & MCA_BTL_FLAGS_PUT) && + if (!opal_leave_pinned && (btl->btl_flags & MCA_BTL_FLAGS_PUT) && size > btl->btl_min_rdma_pipeline_size) { continue; } @@ -85,7 +103,7 @@ size_t mca_pml_ob1_rdma_btls( /* if we don't use leave_pinned and all BTLs that already have this memory * registered amount to less then half of available bandwidth - fall back to * pipeline protocol */ - if (0 == num_btls_used || (!mca_pml_ob1.leave_pinned && weight_total < 0.5)) + if (0 == num_btls_used || (!opal_leave_pinned && weight_total < 0.5)) return 0; mca_pml_ob1_calc_weighted_length(rdma_btls, num_btls_used, size, @@ -95,22 +113,66 @@ size_t mca_pml_ob1_rdma_btls( return num_btls_used; } +size_t mca_pml_ob1_rdma_pipeline_btls_count (mca_bml_base_endpoint_t* bml_endpoint) +{ + int num_btls = mca_bml_base_btl_array_get_size (&bml_endpoint->btl_rdma); + int num_eager_btls = mca_bml_base_btl_array_get_size (&bml_endpoint->btl_eager); + int rdma_count = 0; + + for(int i = 0; i < num_btls && i < mca_pml_ob1.max_rdma_per_request; ++i) { + mca_bml_base_btl_t *bml_btl = mca_bml_base_btl_array_get_next(&bml_endpoint->btl_rdma); + /* NTH: go ahead and use an rdma btl if is the only one */ + bool ignore = !mca_pml_ob1.use_all_rdma; + + for (int i = 0 ; i < num_eager_btls && ignore ; ++i) { + mca_bml_base_btl_t *eager_btl = mca_bml_base_btl_array_get_index (&bml_endpoint->btl_eager, i); + if (eager_btl->btl_endpoint == bml_btl->btl_endpoint) { + ignore = false; + break; + } + } + + if (!ignore) { + ++rdma_count; + } + } + + return rdma_count; +} + size_t mca_pml_ob1_rdma_pipeline_btls( mca_bml_base_endpoint_t* bml_endpoint, size_t size, mca_pml_ob1_com_btl_t* rdma_btls ) { - int i, num_btls = mca_bml_base_btl_array_get_size(&bml_endpoint->btl_rdma); + int num_btls = mca_bml_base_btl_array_get_size (&bml_endpoint->btl_rdma); + int num_eager_btls = mca_bml_base_btl_array_get_size (&bml_endpoint->btl_eager); double weight_total = 0; + int rdma_count = 0; + + for(int i = 0; i < num_btls && i < mca_pml_ob1.max_rdma_per_request; i++) { + mca_bml_base_btl_t *bml_btl = mca_bml_base_btl_array_get_next(&bml_endpoint->btl_rdma); + /* NTH: go ahead and use an rdma btl if is the only one */ + bool ignore = !mca_pml_ob1.use_all_rdma; + + for (int i = 0 ; i < num_eager_btls && ignore ; ++i) { + mca_bml_base_btl_t *eager_btl = mca_bml_base_btl_array_get_index (&bml_endpoint->btl_eager, i); + if (eager_btl->btl_endpoint == bml_btl->btl_endpoint) { + ignore = false; + break; + } + } + + if (ignore) { + continue; + } - for(i = 0; i < num_btls && i < mca_pml_ob1.max_rdma_per_request; i++) { - rdma_btls[i].bml_btl = - mca_bml_base_btl_array_get_next(&bml_endpoint->btl_rdma); - rdma_btls[i].btl_reg = NULL; + rdma_btls[rdma_count].bml_btl = bml_btl; + rdma_btls[rdma_count++].btl_reg = NULL; - weight_total += rdma_btls[i].bml_btl->btl_weight; + weight_total += bml_btl->btl_weight; } - mca_pml_ob1_calc_weighted_length(rdma_btls, i, size, weight_total); + mca_pml_ob1_calc_weighted_length (rdma_btls, rdma_count, size, weight_total); - return i; + return rdma_count; } diff --git a/ompi/mca/pml/ob1/pml_ob1_rdma.h b/ompi/mca/pml/ob1/pml_ob1_rdma.h index 3ed0655795f..7729043abe6 100644 --- a/ompi/mca/pml/ob1/pml_ob1_rdma.h +++ b/ompi/mca/pml/ob1/pml_ob1_rdma.h @@ -1,3 +1,4 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ /* * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana * University Research and Technology @@ -9,6 +10,8 @@ * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. + * Copyright (c) 2016 Los Alamos National Security, LLC. All rights + * reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -18,7 +21,7 @@ /** * @file */ - + #ifndef MCA_PML_OB1_RDMA_H #define MCA_PML_OB1_RDMA_H @@ -37,5 +40,8 @@ size_t mca_pml_ob1_rdma_btls(struct mca_bml_base_endpoint_t* endpoint, * bandwidth */ size_t mca_pml_ob1_rdma_pipeline_btls(struct mca_bml_base_endpoint_t* endpoint, size_t size, mca_pml_ob1_com_btl_t* rdma_btls); + +size_t mca_pml_ob1_rdma_pipeline_btls_count (mca_bml_base_endpoint_t* bml_endpoint); + #endif diff --git a/ompi/mca/pml/ob1/pml_ob1_rdmafrag.h b/ompi/mca/pml/ob1/pml_ob1_rdmafrag.h index ea15b67300b..70a390d8073 100644 --- a/ompi/mca/pml/ob1/pml_ob1_rdmafrag.h +++ b/ompi/mca/pml/ob1/pml_ob1_rdmafrag.h @@ -6,22 +6,22 @@ * Copyright (c) 2004-2013 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2014-2015 Los Alamos National Security, LLC. All rights * reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ /** * @file */ - + #ifndef MCA_PML_OB1_RDMAFRAG_H #define MCA_PML_OB1_RDMAFRAG_H diff --git a/ompi/mca/pml/ob1/pml_ob1_recvfrag.c b/ompi/mca/pml/ob1/pml_ob1_recvfrag.c index dd43e1483c8..5f3f8fdc484 100644 --- a/ompi/mca/pml/ob1/pml_ob1_recvfrag.c +++ b/ompi/mca/pml/ob1/pml_ob1_recvfrag.c @@ -3,10 +3,10 @@ * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. - * Copyright (c) 2004-2013 The University of Tennessee and The University + * Copyright (c) 2004-2016 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2007 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2007 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. @@ -18,9 +18,9 @@ * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -83,29 +83,29 @@ append_frag_to_list(opal_list_t *queue, mca_btl_base_module_t *btl, } /** - * Match incoming recv_frags against posted receives. + * Match incoming recv_frags against posted receives. * Supports out of order delivery. - * + * * @param frag_header (IN) Header of received recv_frag. * @param frag_desc (IN) Received recv_frag descriptor. * @param match_made (OUT) Flag indicating wether a match was made. - * @param additional_matches (OUT) List of additional matches + * @param additional_matches (OUT) List of additional matches * @return OMPI_SUCCESS or error status on failure. */ -static int mca_pml_ob1_recv_frag_match( mca_btl_base_module_t *btl, +static int mca_pml_ob1_recv_frag_match( mca_btl_base_module_t *btl, mca_pml_ob1_match_hdr_t *hdr, mca_btl_base_segment_t* segments, size_t num_segments, int type); - + static mca_pml_ob1_recv_request_t* match_one(mca_btl_base_module_t *btl, mca_pml_ob1_match_hdr_t *hdr, mca_btl_base_segment_t* segments, size_t num_segments, ompi_communicator_t *comm_ptr, mca_pml_ob1_comm_proc_t *proc, mca_pml_ob1_recv_frag_t* frag); - -void mca_pml_ob1_recv_frag_callback_match(mca_btl_base_module_t* btl, + +void mca_pml_ob1_recv_frag_callback_match(mca_btl_base_module_t* btl, mca_btl_base_tag_t tag, mca_btl_base_descriptor_t* des, void* cbdata ) @@ -120,12 +120,12 @@ void mca_pml_ob1_recv_frag_callback_match(mca_btl_base_module_t* btl, size_t bytes_received = 0; assert(num_segments <= MCA_BTL_DES_MAX_SEGMENTS); - + if( OPAL_UNLIKELY(segments->seg_len < OMPI_PML_OB1_MATCH_HDR_LEN) ) { return; } ob1_hdr_ntoh(((mca_pml_ob1_hdr_t*) hdr), MCA_PML_OB1_HDR_TYPE_MATCH); - + /* communicator pointer */ comm_ptr = ompi_comm_lookup(hdr->hdr_ctx); if(OPAL_UNLIKELY(NULL == comm_ptr)) { @@ -141,10 +141,10 @@ void mca_pml_ob1_recv_frag_callback_match(mca_btl_base_module_t* btl, return; } comm = (mca_pml_ob1_comm_t *)comm_ptr->c_pml_comm; - + /* source sequence number */ - proc = &comm->procs[hdr->hdr_src]; - + proc = mca_pml_ob1_peer_lookup (comm_ptr, hdr->hdr_src); + /* We generate the MSG_ARRIVED event as soon as the PML is aware * of a matching fragment arrival. Independing if it is received * on the correct order or not. This will allow the tools to @@ -153,7 +153,7 @@ void mca_pml_ob1_recv_frag_callback_match(mca_btl_base_module_t* btl, */ PERUSE_TRACE_MSG_EVENT(PERUSE_COMM_MSG_ARRIVED, comm_ptr, hdr->hdr_src, hdr->hdr_tag, PERUSE_RECV); - + /* get next expected message sequence number - if threaded * run, lock to make sure that if another thread is processing * a frag from the same message a match is made only once. @@ -161,18 +161,18 @@ void mca_pml_ob1_recv_frag_callback_match(mca_btl_base_module_t* btl, * end points) from being processed, and potentially "loosing" * the fragment. */ - OPAL_THREAD_LOCK(&comm->matching_lock); - + OB1_MATCHING_LOCK(&comm->matching_lock); + /* get sequence number of next message that can be processed */ if(OPAL_UNLIKELY((((uint16_t) hdr->hdr_seq) != ((uint16_t) proc->expected_sequence)) || (opal_list_get_size(&proc->frags_cant_match) > 0 ))) { goto slow_path; } - + /* This is the sequence number we were expecting, so we can try * matching it to already posted receives. */ - + /* We're now expecting the next sequence number. */ proc->expected_sequence++; @@ -182,9 +182,9 @@ void mca_pml_ob1_recv_frag_callback_match(mca_btl_base_module_t* btl, */ PERUSE_TRACE_MSG_EVENT(PERUSE_COMM_SEARCH_POSTED_Q_BEGIN, comm_ptr, hdr->hdr_src, hdr->hdr_tag, PERUSE_RECV); - + match = match_one(btl, hdr, segments, num_segments, comm_ptr, proc, NULL); - + /* The match is over. We generate the SEARCH_POSTED_Q_END here, * before going into the mca_pml_ob1_check_cantmatch_for_match so * we can make a difference for the searching time for all @@ -192,19 +192,19 @@ void mca_pml_ob1_recv_frag_callback_match(mca_btl_base_module_t* btl, */ PERUSE_TRACE_MSG_EVENT(PERUSE_COMM_SEARCH_POSTED_Q_END, comm_ptr, hdr->hdr_src, hdr->hdr_tag, PERUSE_RECV); - + /* release matching lock before processing fragment */ - OPAL_THREAD_UNLOCK(&comm->matching_lock); + OB1_MATCHING_UNLOCK(&comm->matching_lock); if(OPAL_LIKELY(match)) { bytes_received = segments->seg_len - OMPI_PML_OB1_MATCH_HDR_LEN; match->req_recv.req_bytes_packed = bytes_received; - + MCA_PML_OB1_RECV_REQUEST_MATCHED(match, hdr); - if(match->req_bytes_expected > 0) { + if(match->req_bytes_expected > 0) { struct iovec iov[MCA_BTL_DES_MAX_SEGMENTS]; uint32_t iov_count = 1; - + /* * Make user buffer accessable(defined) before unpacking. */ @@ -214,7 +214,7 @@ void mca_pml_ob1_recv_frag_callback_match(mca_btl_base_module_t* btl, match->req_recv.req_base.req_count, match->req_recv.req_base.req_datatype); ); - + iov[0].iov_len = bytes_received; iov[0].iov_base = (IOVBASE_TYPE*)((unsigned char*)segments->seg_addr.pval + OMPI_PML_OB1_MATCH_HDR_LEN); @@ -239,28 +239,28 @@ void mca_pml_ob1_recv_frag_callback_match(mca_btl_base_module_t* btl, match->req_recv.req_base.req_datatype); ); } - + /* no need to check if complete we know we are.. */ /* don't need a rmb as that is for checking */ recv_request_pml_complete(match); } return; - + slow_path: - OPAL_THREAD_UNLOCK(&comm->matching_lock); + OB1_MATCHING_UNLOCK(&comm->matching_lock); mca_pml_ob1_recv_frag_match(btl, hdr, segments, num_segments, MCA_PML_OB1_HDR_TYPE_MATCH); } -void mca_pml_ob1_recv_frag_callback_rndv(mca_btl_base_module_t* btl, +void mca_pml_ob1_recv_frag_callback_rndv(mca_btl_base_module_t* btl, mca_btl_base_tag_t tag, mca_btl_base_descriptor_t* des, void* cbdata ) { mca_btl_base_segment_t* segments = des->des_segments; mca_pml_ob1_hdr_t* hdr = (mca_pml_ob1_hdr_t*)segments->seg_addr.pval; - + if( OPAL_UNLIKELY(segments->seg_len < sizeof(mca_pml_ob1_common_hdr_t)) ) { return; } @@ -270,14 +270,14 @@ void mca_pml_ob1_recv_frag_callback_rndv(mca_btl_base_module_t* btl, return; } -void mca_pml_ob1_recv_frag_callback_rget(mca_btl_base_module_t* btl, +void mca_pml_ob1_recv_frag_callback_rget(mca_btl_base_module_t* btl, mca_btl_base_tag_t tag, mca_btl_base_descriptor_t* des, void* cbdata ) { mca_btl_base_segment_t* segments = des->des_segments; mca_pml_ob1_hdr_t* hdr = (mca_pml_ob1_hdr_t*)segments->seg_addr.pval; - + if( OPAL_UNLIKELY(segments->seg_len < sizeof(mca_pml_ob1_common_hdr_t)) ) { return; } @@ -287,9 +287,9 @@ void mca_pml_ob1_recv_frag_callback_rget(mca_btl_base_module_t* btl, return; } - -void mca_pml_ob1_recv_frag_callback_ack(mca_btl_base_module_t* btl, + +void mca_pml_ob1_recv_frag_callback_ack(mca_btl_base_module_t* btl, mca_btl_base_tag_t tag, mca_btl_base_descriptor_t* des, void* cbdata ) @@ -298,7 +298,7 @@ void mca_pml_ob1_recv_frag_callback_ack(mca_btl_base_module_t* btl, mca_pml_ob1_hdr_t* hdr = (mca_pml_ob1_hdr_t*)segments->seg_addr.pval; mca_pml_ob1_send_request_t* sendreq; size_t size; - + if( OPAL_UNLIKELY(segments->seg_len < sizeof(mca_pml_ob1_common_hdr_t)) ) { return; } @@ -360,7 +360,7 @@ void mca_pml_ob1_recv_frag_callback_ack(mca_btl_base_module_t* btl, return; } -void mca_pml_ob1_recv_frag_callback_frag(mca_btl_base_module_t* btl, +void mca_pml_ob1_recv_frag_callback_frag(mca_btl_base_module_t* btl, mca_btl_base_tag_t tag, mca_btl_base_descriptor_t* des, void* cbdata ) { @@ -383,7 +383,7 @@ void mca_pml_ob1_recv_frag_callback_frag(mca_btl_base_module_t* btl, /* This will trigger the opal_convertor_pack to start asynchronous copy. */ mca_pml_ob1_recv_request_frag_copy_start(recvreq,btl,segments,des->des_segment_count,des); - + /* Let BTL know that it CANNOT free the frag */ des->des_flags |= MCA_BTL_DES_FLAGS_CUDA_COPY_ASYNC; @@ -397,34 +397,34 @@ void mca_pml_ob1_recv_frag_callback_frag(mca_btl_base_module_t* btl, } -void mca_pml_ob1_recv_frag_callback_put(mca_btl_base_module_t* btl, +void mca_pml_ob1_recv_frag_callback_put(mca_btl_base_module_t* btl, mca_btl_base_tag_t tag, mca_btl_base_descriptor_t* des, void* cbdata ) { mca_btl_base_segment_t* segments = des->des_segments; mca_pml_ob1_hdr_t* hdr = (mca_pml_ob1_hdr_t*)segments->seg_addr.pval; mca_pml_ob1_send_request_t* sendreq; - + if( OPAL_UNLIKELY(segments->seg_len < sizeof(mca_pml_ob1_common_hdr_t)) ) { return; } - + ob1_hdr_ntoh(hdr, MCA_PML_OB1_HDR_TYPE_PUT); sendreq = (mca_pml_ob1_send_request_t*)hdr->hdr_rdma.hdr_req.pval; mca_pml_ob1_send_request_put(sendreq,btl,&hdr->hdr_rdma); - + return; } -void mca_pml_ob1_recv_frag_callback_fin(mca_btl_base_module_t* btl, +void mca_pml_ob1_recv_frag_callback_fin(mca_btl_base_module_t* btl, mca_btl_base_tag_t tag, mca_btl_base_descriptor_t* des, void* cbdata ) { mca_btl_base_segment_t* segments = des->des_segments; mca_pml_ob1_fin_hdr_t* hdr = (mca_pml_ob1_fin_hdr_t *) segments->seg_addr.pval; mca_pml_ob1_rdma_frag_t *frag; - + if( OPAL_UNLIKELY(segments->seg_len < sizeof(mca_pml_ob1_fin_hdr_t)) ) { return; } @@ -618,10 +618,10 @@ static mca_pml_ob1_recv_frag_t* check_cantmatch_for_match(mca_pml_ob1_comm_proc_ * - fragments may be corrupt * - this routine may be called simultaneously by more than one thread */ -static int mca_pml_ob1_recv_frag_match( mca_btl_base_module_t *btl, +static int mca_pml_ob1_recv_frag_match( mca_btl_base_module_t *btl, mca_pml_ob1_match_hdr_t *hdr, mca_btl_base_segment_t* segments, - size_t num_segments, + size_t num_segments, int type) { /* local variables */ @@ -650,7 +650,7 @@ static int mca_pml_ob1_recv_frag_match( mca_btl_base_module_t *btl, /* source sequence number */ frag_msg_seq = hdr->hdr_seq; - proc = &comm->procs[hdr->hdr_src]; + proc = mca_pml_ob1_peer_lookup (comm_ptr, hdr->hdr_src); /** * We generate the MSG_ARRIVED event as soon as the PML is aware of a matching @@ -662,13 +662,13 @@ static int mca_pml_ob1_recv_frag_match( mca_btl_base_module_t *btl, hdr->hdr_src, hdr->hdr_tag, PERUSE_RECV); /* get next expected message sequence number - if threaded - * run, lock to make sure that if another thread is processing + * run, lock to make sure that if another thread is processing * a frag from the same message a match is made only once. * Also, this prevents other posted receives (for a pair of * end points) from being processed, and potentially "loosing" * the fragment. */ - OPAL_THREAD_LOCK(&comm->matching_lock); + OB1_MATCHING_LOCK(&comm->matching_lock); /* get sequence number of next message that can be processed */ next_msg_seq_expected = (uint16_t)proc->expected_sequence; @@ -704,7 +704,7 @@ static int mca_pml_ob1_recv_frag_match( mca_btl_base_module_t *btl, hdr->hdr_src, hdr->hdr_tag, PERUSE_RECV); /* release matching lock before processing fragment */ - OPAL_THREAD_UNLOCK(&comm->matching_lock); + OB1_MATCHING_UNLOCK(&comm->matching_lock); if(OPAL_LIKELY(match)) { switch(type) { @@ -718,18 +718,18 @@ static int mca_pml_ob1_recv_frag_match( mca_btl_base_module_t *btl, mca_pml_ob1_recv_request_progress_rget(match, btl, segments, num_segments); break; } - + if(OPAL_UNLIKELY(frag)) MCA_PML_OB1_RECV_FRAG_RETURN(frag); } - - /* + + /* * Now that new message has arrived, check to see if * any fragments on the c_c_frags_cant_match list * may now be used to form new matchs */ if(OPAL_UNLIKELY(opal_list_get_size(&proc->frags_cant_match) > 0)) { - OPAL_THREAD_LOCK(&comm->matching_lock); + OB1_MATCHING_LOCK(&comm->matching_lock); if((frag = check_cantmatch_for_match(proc))) { hdr = &frag->hdr.hdr_match; segments = frag->segments; @@ -738,7 +738,7 @@ static int mca_pml_ob1_recv_frag_match( mca_btl_base_module_t *btl, type = hdr->hdr_common.hdr_type; goto out_of_order_match; } - OPAL_THREAD_UNLOCK(&comm->matching_lock); + OB1_MATCHING_UNLOCK(&comm->matching_lock); } return OMPI_SUCCESS; @@ -749,7 +749,7 @@ static int mca_pml_ob1_recv_frag_match( mca_btl_base_module_t *btl, */ append_frag_to_list(&proc->frags_cant_match, btl, hdr, segments, num_segments, NULL); - OPAL_THREAD_UNLOCK(&comm->matching_lock); + OB1_MATCHING_UNLOCK(&comm->matching_lock); return OMPI_SUCCESS; } diff --git a/ompi/mca/pml/ob1/pml_ob1_recvfrag.h b/ompi/mca/pml/ob1/pml_ob1_recvfrag.h index 82f90b29f0f..80bcef1501f 100644 --- a/ompi/mca/pml/ob1/pml_ob1_recvfrag.h +++ b/ompi/mca/pml/ob1/pml_ob1_recvfrag.h @@ -6,7 +6,7 @@ * Copyright (c) 2004-2013 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. @@ -15,9 +15,9 @@ * Copyright (c) 2012-2015 Los Alamos National Security, LLC. All rights * reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ /** @@ -58,7 +58,7 @@ do { \ opal_free_list_wait (&mca_pml_ob1.recv_frags); \ } while(0) - + #define MCA_PML_OB1_RECV_FRAG_INIT(frag, hdr, segs, cnt, btl ) \ do { \ size_t i, _size; \ @@ -81,7 +81,7 @@ do { \ buffers[0].addr = (char*) \ mca_pml_ob1.allocator->alc_alloc( mca_pml_ob1.allocator, \ buffers[0].len, \ - 0, NULL); \ + 0); \ _ptr = (unsigned char*)(buffers[0].addr); \ macro_segments[0].seg_addr.pval = buffers[0].addr; \ } \ @@ -112,16 +112,16 @@ do { \ * Callback from BTL on receipt of a recv_frag (match). */ -extern void mca_pml_ob1_recv_frag_callback_match( mca_btl_base_module_t *btl, +extern void mca_pml_ob1_recv_frag_callback_match( mca_btl_base_module_t *btl, mca_btl_base_tag_t tag, mca_btl_base_descriptor_t* descriptor, void* cbdata ); - + /** * Callback from BTL on receipt of a recv_frag (rndv). */ -extern void mca_pml_ob1_recv_frag_callback_rndv( mca_btl_base_module_t *btl, +extern void mca_pml_ob1_recv_frag_callback_rndv( mca_btl_base_module_t *btl, mca_btl_base_tag_t tag, mca_btl_base_descriptor_t* descriptor, void* cbdata ); @@ -129,7 +129,7 @@ extern void mca_pml_ob1_recv_frag_callback_rndv( mca_btl_base_module_t *btl, * Callback from BTL on receipt of a recv_frag (rget). */ -extern void mca_pml_ob1_recv_frag_callback_rget( mca_btl_base_module_t *btl, +extern void mca_pml_ob1_recv_frag_callback_rget( mca_btl_base_module_t *btl, mca_btl_base_tag_t tag, mca_btl_base_descriptor_t* descriptor, void* cbdata ); @@ -138,7 +138,7 @@ extern void mca_pml_ob1_recv_frag_callback_rget( mca_btl_base_module_t *btl, * Callback from BTL on receipt of a recv_frag (ack). */ -extern void mca_pml_ob1_recv_frag_callback_ack( mca_btl_base_module_t *btl, +extern void mca_pml_ob1_recv_frag_callback_ack( mca_btl_base_module_t *btl, mca_btl_base_tag_t tag, mca_btl_base_descriptor_t* descriptor, void* cbdata ); @@ -146,7 +146,7 @@ extern void mca_pml_ob1_recv_frag_callback_ack( mca_btl_base_module_t *btl, * Callback from BTL on receipt of a recv_frag (frag). */ -extern void mca_pml_ob1_recv_frag_callback_frag( mca_btl_base_module_t *btl, +extern void mca_pml_ob1_recv_frag_callback_frag( mca_btl_base_module_t *btl, mca_btl_base_tag_t tag, mca_btl_base_descriptor_t* descriptor, void* cbdata ); @@ -154,7 +154,7 @@ extern void mca_pml_ob1_recv_frag_callback_frag( mca_btl_base_module_t *btl, * Callback from BTL on receipt of a recv_frag (put). */ -extern void mca_pml_ob1_recv_frag_callback_put( mca_btl_base_module_t *btl, +extern void mca_pml_ob1_recv_frag_callback_put( mca_btl_base_module_t *btl, mca_btl_base_tag_t tag, mca_btl_base_descriptor_t* descriptor, void* cbdata ); @@ -162,12 +162,12 @@ extern void mca_pml_ob1_recv_frag_callback_put( mca_btl_base_module_t *btl, * Callback from BTL on receipt of a recv_frag (fin). */ -extern void mca_pml_ob1_recv_frag_callback_fin( mca_btl_base_module_t *btl, +extern void mca_pml_ob1_recv_frag_callback_fin( mca_btl_base_module_t *btl, mca_btl_base_tag_t tag, mca_btl_base_descriptor_t* descriptor, void* cbdata ); - + END_C_DECLS #endif diff --git a/ompi/mca/pml/ob1/pml_ob1_recvreq.c b/ompi/mca/pml/ob1/pml_ob1_recvreq.c index 30b5df4a6aa..f4cfb67736f 100644 --- a/ompi/mca/pml/ob1/pml_ob1_recvreq.c +++ b/ompi/mca/pml/ob1/pml_ob1_recvreq.c @@ -3,10 +3,10 @@ * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. - * Copyright (c) 2004-2013 The University of Tennessee and The University + * Copyright (c) 2004-2016 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. @@ -19,24 +19,24 @@ * Copyright (c) 2014-2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ #include "ompi_config.h" -#include "opal/mca/mpool/mpool.h" +#include "opal/mca/mpool/mpool.h" #include "opal/util/arch.h" #include "ompi/mca/pml/pml.h" -#include "ompi/mca/bml/bml.h" +#include "ompi/mca/bml/bml.h" #include "pml_ob1_comm.h" #include "pml_ob1_recvreq.h" #include "pml_ob1_recvfrag.h" #include "pml_ob1_sendreq.h" #include "pml_ob1_rdmafrag.h" -#include "ompi/mca/bml/base/base.h" +#include "ompi/mca/bml/base/base.h" #include "ompi/memchecker.h" #if OPAL_CUDA_SUPPORT #include "opal/datatype/opal_datatype_cuda.h" @@ -69,50 +69,50 @@ void mca_pml_ob1_recv_request_process_pending(void) static int mca_pml_ob1_recv_request_free(struct ompi_request_t** request) { - mca_pml_ob1_recv_request_t* recvreq = *(mca_pml_ob1_recv_request_t**)request; + mca_pml_ob1_recv_request_t* recvreq = *(mca_pml_ob1_recv_request_t**)request; - assert( false == recvreq->req_recv.req_base.req_free_called ); + if(false == recvreq->req_recv.req_base.req_free_called){ - OPAL_THREAD_LOCK(&ompi_request_lock); - recvreq->req_recv.req_base.req_free_called = true; + recvreq->req_recv.req_base.req_free_called = true; + PERUSE_TRACE_COMM_EVENT( PERUSE_COMM_REQ_NOTIFY, + &(recvreq->req_recv.req_base), PERUSE_RECV ); - PERUSE_TRACE_COMM_EVENT( PERUSE_COMM_REQ_NOTIFY, - &(recvreq->req_recv.req_base), PERUSE_RECV ); + if( true == recvreq->req_recv.req_base.req_pml_complete ) { + /* make buffer defined when the request is compeleted, + and before releasing the objects. */ + MEMCHECKER( + memchecker_call(&opal_memchecker_base_mem_defined, + recvreq->req_recv.req_base.req_addr, + recvreq->req_recv.req_base.req_count, + recvreq->req_recv.req_base.req_datatype); + ); - if( true == recvreq->req_recv.req_base.req_pml_complete ) { - /* make buffer defined when the request is compeleted, - and before releasing the objects. */ - MEMCHECKER( - memchecker_call(&opal_memchecker_base_mem_defined, - recvreq->req_recv.req_base.req_addr, - recvreq->req_recv.req_base.req_count, - recvreq->req_recv.req_base.req_datatype); - ); + MCA_PML_OB1_RECV_REQUEST_RETURN( recvreq ); + } - MCA_PML_OB1_RECV_REQUEST_RETURN( recvreq ); } - - OPAL_THREAD_UNLOCK(&ompi_request_lock); *request = MPI_REQUEST_NULL; return OMPI_SUCCESS; -} +} static int mca_pml_ob1_recv_request_cancel(struct ompi_request_t* ompi_request, int complete) { mca_pml_ob1_recv_request_t* request = (mca_pml_ob1_recv_request_t*)ompi_request; - mca_pml_ob1_comm_t* comm = request->req_recv.req_base.req_comm->c_pml_comm; + ompi_communicator_t *comm = request->req_recv.req_base.req_comm; + mca_pml_ob1_comm_t *ob1_comm = comm->c_pml_comm; + /* The rest should be protected behind the match logic lock */ + OB1_MATCHING_LOCK(&ob1_comm->matching_lock); if( true == request->req_match_received ) { /* way to late to cancel this one */ + OB1_MATCHING_UNLOCK(&ob1_comm->matching_lock); assert( OMPI_ANY_TAG != ompi_request->req_status.MPI_TAG ); /* not matched isn't it */ return OMPI_SUCCESS; } - /* The rest should be protected behind the match logic lock */ - OPAL_THREAD_LOCK(&comm->matching_lock); if( request->req_recv.req_base.req_peer == OMPI_ANY_SOURCE ) { - opal_list_remove_item( &comm->wild_receives, (opal_list_item_t*)request ); + opal_list_remove_item( &ob1_comm->wild_receives, (opal_list_item_t*)request ); } else { - mca_pml_ob1_comm_proc_t* proc = comm->procs + request->req_recv.req_base.req_peer; + mca_pml_ob1_comm_proc_t* proc = mca_pml_ob1_peer_lookup (comm, request->req_recv.req_base.req_peer); opal_list_remove_item(&proc->specific_receives, (opal_list_item_t*)request); } PERUSE_TRACE_COMM_EVENT( PERUSE_COMM_REQ_REMOVE_FROM_POSTED_Q, @@ -122,16 +122,14 @@ static int mca_pml_ob1_recv_request_cancel(struct ompi_request_t* ompi_request, * to true. Otherwise, the request will never be freed. */ request->req_recv.req_base.req_pml_complete = true; - OPAL_THREAD_UNLOCK(&comm->matching_lock); - - OPAL_THREAD_LOCK(&ompi_request_lock); + OB1_MATCHING_UNLOCK(&ob1_comm->matching_lock); + ompi_request->req_status._cancelled = true; /* This macro will set the req_complete to true so the MPI Test/Wait* functions * on this request will be able to complete. As the status is marked as * cancelled the cancel state will be detected. */ MCA_PML_OB1_RECV_REQUEST_MPI_COMPLETE(request); - OPAL_THREAD_UNLOCK(&ompi_request_lock); /* * Receive request cancelled, make user buffer accessible. */ @@ -228,10 +226,10 @@ int mca_pml_ob1_recv_request_ack_send_btl( /* allocate descriptor */ mca_bml_base_alloc(bml_btl, &des, MCA_BTL_NO_ORDER, sizeof(mca_pml_ob1_ack_hdr_t), - MCA_BTL_DES_FLAGS_PRIORITY | MCA_BTL_DES_FLAGS_BTL_OWNERSHIP | + MCA_BTL_DES_FLAGS_PRIORITY | MCA_BTL_DES_FLAGS_BTL_OWNERSHIP | MCA_BTL_DES_SEND_ALWAYS_CALLBACK | MCA_BTL_DES_FLAGS_SIGNAL); if( OPAL_UNLIKELY(NULL == des) ) { - return OMPI_ERR_OUT_OF_RESOURCE; + return OMPI_ERR_OUT_OF_RESOURCE; } /* fill out header */ @@ -249,26 +247,26 @@ int mca_pml_ob1_recv_request_ack_send_btl( return OMPI_SUCCESS; } mca_bml_base_free(bml_btl, des); - return OMPI_ERR_OUT_OF_RESOURCE; + return OMPI_ERR_OUT_OF_RESOURCE; } static int mca_pml_ob1_recv_request_ack( mca_pml_ob1_recv_request_t* recvreq, - mca_pml_ob1_rendezvous_hdr_t* hdr, + mca_pml_ob1_rendezvous_hdr_t* hdr, size_t bytes_received) { ompi_proc_t* proc = (ompi_proc_t*)recvreq->req_recv.req_base.req_proc; mca_bml_base_endpoint_t* bml_endpoint = NULL; - bml_endpoint = (mca_bml_base_endpoint_t*) proc->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_BML]; + bml_endpoint = mca_bml_base_get_endpoint (proc); /* by default copy everything */ recvreq->req_send_offset = bytes_received; if(hdr->hdr_msg_length > bytes_received) { - size_t rdma_num = mca_bml_base_btl_array_get_size(&bml_endpoint->btl_rdma); + size_t rdma_num = mca_pml_ob1_rdma_pipeline_btls_count (bml_endpoint); /* * lookup request buffer to determine if memory is already - * registered. + * registered. */ if(opal_convertor_need_buffers(&recvreq->req_recv.req_base.req_convertor) == 0 && @@ -276,7 +274,7 @@ static int mca_pml_ob1_recv_request_ack( rdma_num != 0) { unsigned char *base; opal_convertor_get_current_pointer( &recvreq->req_recv.req_base.req_convertor, (void**)&(base) ); - + if(hdr->hdr_match.hdr_common.hdr_flags & MCA_PML_OB1_HDR_FLAGS_PIN) recvreq->req_rdma_cnt = mca_pml_ob1_rdma_btls(bml_endpoint, base, recvreq->req_recv.req_bytes_packed, @@ -290,7 +288,7 @@ static int mca_pml_ob1_recv_request_ack( /* are rdma devices available for long rdma protocol */ } else if(bml_endpoint->btl_send_limit < hdr->hdr_msg_length) { /* use convertor to figure out the rdma offset for this request */ - recvreq->req_send_offset = hdr->hdr_msg_length - + recvreq->req_send_offset = hdr->hdr_msg_length - bml_endpoint->btl_pipeline_send_length; if(recvreq->req_send_offset < bytes_received) @@ -413,7 +411,7 @@ static int mca_pml_ob1_recv_request_put_frag (mca_pml_ob1_rdma_frag_t *frag) return OMPI_ERR_OUT_OF_RESOURCE; } ctl->des_cbfunc = mca_pml_ob1_recv_ctl_completion; - + /* fill in rdma header */ hdr = (mca_pml_ob1_rdma_hdr_t *) ctl->des_segments->seg_addr.pval; mca_pml_ob1_rdma_hdr_prepare (hdr, (!recvreq->req_ack_sent) ? MCA_PML_OB1_HDR_TYPE_ACK : 0, @@ -485,7 +483,7 @@ int mca_pml_ob1_recv_request_get_frag (mca_pml_ob1_rdma_frag_t *frag) /* * Update the recv request status to reflect the number of bytes - * received and actually delivered to the application. + * received and actually delivered to the application. */ void mca_pml_ob1_recv_request_progress_frag( mca_pml_ob1_recv_request_t* recvreq, @@ -526,7 +524,7 @@ void mca_pml_ob1_recv_request_progress_frag( mca_pml_ob1_recv_request_t* recvreq recvreq->req_recv.req_base.req_count, recvreq->req_recv.req_base.req_datatype); ); - + OPAL_THREAD_ADD_SIZE_T(&recvreq->req_bytes_received, bytes_received); /* check completion status */ if(recv_request_pml_complete_check(recvreq) == false && @@ -542,7 +540,7 @@ void mca_pml_ob1_recv_request_progress_frag( mca_pml_ob1_recv_request_t* recvreq * mca_pml_ob1_recv_request_progress_frag function. This fires off * the asynchronous copy and returns. Unused fields in the descriptor * are used to pass extra information for when the asynchronous copy - * completes. No memchecker support in this function as copies are + * completes. No memchecker support in this function as copies are * happening asynchronously. */ void mca_pml_ob1_recv_request_frag_copy_start( mca_pml_ob1_recv_request_t* recvreq, @@ -600,7 +598,7 @@ void mca_pml_ob1_recv_request_frag_copy_finished( mca_btl_base_module_t* btl, size_t bytes_received = (size_t) (intptr_t) des->des_cbdata; OPAL_OUTPUT((-1, "frag_copy_finished (delivered=%d), frag=%p", (int)bytes_received, (void *)des)); - /* Call into the BTL so it can free the descriptor. At this point, it is + /* Call into the BTL so it can free the descriptor. At this point, it is * known that the data has been copied out of the descriptor. */ des->des_cbfunc(NULL, NULL, des, 0); @@ -617,7 +615,7 @@ void mca_pml_ob1_recv_request_frag_copy_finished( mca_btl_base_module_t* btl, /* * Update the recv request status to reflect the number of bytes - * received and actually delivered to the application. + * received and actually delivered to the application. */ void mca_pml_ob1_recv_request_progress_rget( mca_pml_ob1_recv_request_t* recvreq, @@ -636,9 +634,10 @@ void mca_pml_ob1_recv_request_progress_rget( mca_pml_ob1_recv_request_t* recvreq bytes_remaining = hdr->hdr_rndv.hdr_msg_length; recvreq->req_recv.req_bytes_packed = hdr->hdr_rndv.hdr_msg_length; recvreq->req_send_offset = 0; + recvreq->req_rdma_offset = 0; MCA_PML_OB1_RECV_REQUEST_MATCHED(recvreq, &hdr->hdr_rndv.hdr_match); - + /* if receive buffer is not contiguous we can't just RDMA read into it, so * fall back to copy in/out protocol. It is a pity because buffer on the * sender side is already registered. We need to be smarter here, perhaps @@ -652,9 +651,9 @@ void mca_pml_ob1_recv_request_progress_rget( mca_pml_ob1_recv_request_t* recvreq return; } } - + /* lookup bml datastructures */ - bml_endpoint = (mca_bml_base_endpoint_t*)recvreq->req_recv.req_base.req_proc->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_BML]; + bml_endpoint = mca_bml_base_get_endpoint (recvreq->req_recv.req_base.req_proc); rdma_bml = mca_bml_base_btl_array_find(&bml_endpoint->btl_rdma, btl); #if OPAL_CUDA_SUPPORT @@ -770,7 +769,7 @@ void mca_pml_ob1_recv_request_progress_rget( mca_pml_ob1_recv_request_t* recvreq /* * Update the recv request status to reflect the number of bytes - * received and actually delivered to the application. + * received and actually delivered to the application. */ void mca_pml_ob1_recv_request_progress_rndv( mca_pml_ob1_recv_request_t* recvreq, @@ -834,12 +833,12 @@ void mca_pml_ob1_recv_request_progress_rndv( mca_pml_ob1_recv_request_t* recvreq opal_cuda_set_copy_function_async(&recvreq->req_recv.req_base.req_convertor, strm); } #endif - + } /* * Update the recv request status to reflect the number of bytes - * received and actually delivered to the application. + * received and actually delivered to the application. */ void mca_pml_ob1_recv_request_progress_match( mca_pml_ob1_recv_request_t* recvreq, mca_btl_base_module_t* btl, @@ -854,7 +853,7 @@ void mca_pml_ob1_recv_request_progress_match( mca_pml_ob1_recv_request_t* recvre OMPI_PML_OB1_MATCH_HDR_LEN); recvreq->req_recv.req_bytes_packed = bytes_received; - + MCA_PML_OB1_RECV_REQUEST_MATCHED(recvreq, &hdr->hdr_match); /* * Make user buffer accessable(defined) before unpacking. @@ -881,7 +880,7 @@ void mca_pml_ob1_recv_request_progress_match( mca_pml_ob1_recv_request_t* recvre recvreq->req_recv.req_base.req_count, recvreq->req_recv.req_base.req_datatype); ); - + /* * No need for atomic here, as we know there is only one fragment * for this request. @@ -933,7 +932,7 @@ void mca_pml_ob1_recv_request_matched_probe( mca_pml_ob1_recv_request_t* recvreq int mca_pml_ob1_recv_request_schedule_once( mca_pml_ob1_recv_request_t* recvreq, mca_bml_base_btl_t *start_bml_btl ) { - mca_bml_base_btl_t* bml_btl; + mca_bml_base_btl_t* bml_btl; int num_tries = recvreq->req_rdma_cnt, num_fail = 0; size_t i, prev_bytes_remaining = 0; size_t bytes_remaining = recvreq->req_send_offset - @@ -1052,10 +1051,6 @@ int mca_pml_ob1_recv_request_schedule_once( mca_pml_ob1_recv_request_t* recvreq, static inline void append_recv_req_to_queue(opal_list_t *queue, mca_pml_ob1_recv_request_t *req) { - if(OPAL_UNLIKELY(req->req_recv.req_base.req_type == MCA_PML_REQUEST_IPROBE || - req->req_recv.req_base.req_type == MCA_PML_REQUEST_IMPROBE)) - return; - opal_list_append(queue, (opal_list_item_t*)req); /** @@ -1079,8 +1074,11 @@ static mca_pml_ob1_recv_frag_t* recv_req_match_specific_proc( const mca_pml_ob1_recv_request_t *req, mca_pml_ob1_comm_proc_t *proc ) { + if (NULL == proc) { + return NULL; + } + opal_list_t* unexpected_frags = &proc->unexpected_frags; - opal_list_item_t *i; mca_pml_ob1_recv_frag_t* frag; int tag = req->req_recv.req_base.req_tag; @@ -1088,20 +1086,12 @@ recv_req_match_specific_proc( const mca_pml_ob1_recv_request_t *req, return NULL; if( OMPI_ANY_TAG == tag ) { - for (i = opal_list_get_first(unexpected_frags); - i != opal_list_get_end(unexpected_frags); - i = opal_list_get_next(i)) { - frag = (mca_pml_ob1_recv_frag_t*)i; - + OPAL_LIST_FOREACH(frag, unexpected_frags, mca_pml_ob1_recv_frag_t) { if( frag->hdr.hdr_match.hdr_tag >= 0 ) return frag; } } else { - for (i = opal_list_get_first(unexpected_frags); - i != opal_list_get_end(unexpected_frags); - i = opal_list_get_next(i)) { - frag = (mca_pml_ob1_recv_frag_t*)i; - + OPAL_LIST_FOREACH(frag, unexpected_frags, mca_pml_ob1_recv_frag_t) { if( frag->hdr.hdr_match.hdr_tag == tag ) return frag; } @@ -1118,7 +1108,7 @@ recv_req_match_wild( mca_pml_ob1_recv_request_t* req, mca_pml_ob1_comm_proc_t **p) { mca_pml_ob1_comm_t* comm = req->req_recv.req_base.req_comm->c_pml_comm; - mca_pml_ob1_comm_proc_t* proc = comm->procs; + mca_pml_ob1_comm_proc_t **procp = comm->procs; size_t i; /* @@ -1133,10 +1123,10 @@ recv_req_match_wild( mca_pml_ob1_recv_request_t* req, mca_pml_ob1_recv_frag_t* frag; /* loop over messages from the current proc */ - if((frag = recv_req_match_specific_proc(req, &proc[i]))) { - *p = &proc[i]; + if((frag = recv_req_match_specific_proc(req, procp[i]))) { + *p = procp[i]; comm->last_probed = i; - req->req_recv.req_base.req_proc = proc[i].ompi_proc; + req->req_recv.req_base.req_proc = procp[i]->ompi_proc; prepare_recv_req_converter(req); return frag; /* match found */ } @@ -1145,10 +1135,10 @@ recv_req_match_wild( mca_pml_ob1_recv_request_t* req, mca_pml_ob1_recv_frag_t* frag; /* loop over messages from the current proc */ - if((frag = recv_req_match_specific_proc(req, &proc[i]))) { - *p = &proc[i]; + if((frag = recv_req_match_specific_proc(req, procp[i]))) { + *p = procp[i]; comm->last_probed = i; - req->req_recv.req_base.req_proc = proc[i].ompi_proc; + req->req_recv.req_base.req_proc = procp[i]->ompi_proc; prepare_recv_req_converter(req); return frag; /* match found */ } @@ -1161,7 +1151,8 @@ recv_req_match_wild( mca_pml_ob1_recv_request_t* req, void mca_pml_ob1_recv_req_start(mca_pml_ob1_recv_request_t *req) { - mca_pml_ob1_comm_t* comm = req->req_recv.req_base.req_comm->c_pml_comm; + ompi_communicator_t *comm = req->req_recv.req_base.req_comm; + mca_pml_ob1_comm_t *ob1_comm = comm->c_pml_comm; mca_pml_ob1_comm_proc_t* proc; mca_pml_ob1_recv_frag_t* frag; opal_list_t *queue; @@ -1179,7 +1170,7 @@ void mca_pml_ob1_recv_req_start(mca_pml_ob1_recv_request_t *req) MCA_PML_BASE_RECV_START(&req->req_recv.req_base); - OPAL_THREAD_LOCK(&comm->matching_lock); + OB1_MATCHING_LOCK(&ob1_comm->matching_lock); /** * The laps of time between the ACTIVATE event and the SEARCH_UNEX one include * the cost of the request lock. @@ -1188,12 +1179,12 @@ void mca_pml_ob1_recv_req_start(mca_pml_ob1_recv_request_t *req) &(req->req_recv.req_base), PERUSE_RECV); /* assign sequence number */ - req->req_recv.req_base.req_sequence = comm->recv_sequence++; + req->req_recv.req_base.req_sequence = ob1_comm->recv_sequence++; /* attempt to match posted recv */ if(req->req_recv.req_base.req_peer == OMPI_ANY_SOURCE) { frag = recv_req_match_wild(req, &proc); - queue = &comm->wild_receives; + queue = &ob1_comm->wild_receives; #if !OPAL_ENABLE_HETEROGENEOUS_SUPPORT /* As we are in a homogeneous environment we know that all remote * architectures are exactly the same as the local one. Therefore, @@ -1206,11 +1197,11 @@ void mca_pml_ob1_recv_req_start(mca_pml_ob1_recv_request_t *req) } #endif /* !OPAL_ENABLE_HETEROGENEOUS_SUPPORT */ } else { - proc = &comm->procs[req->req_recv.req_base.req_peer]; + proc = mca_pml_ob1_peer_lookup (comm, req->req_recv.req_base.req_peer); req->req_recv.req_base.req_proc = proc->ompi_proc; frag = recv_req_match_specific_proc(req, proc); queue = &proc->specific_receives; - /* wild cardrecv will be prepared on match */ + /* wildcard recv will be prepared on match */ prepare_recv_req_converter(req); } @@ -1219,9 +1210,11 @@ void mca_pml_ob1_recv_req_start(mca_pml_ob1_recv_request_t *req) &(req->req_recv.req_base), PERUSE_RECV); /* We didn't find any matches. Record this irecv so we can match it when the message comes in. */ - append_recv_req_to_queue(queue, req); + if(OPAL_LIKELY(req->req_recv.req_base.req_type != MCA_PML_REQUEST_IPROBE && + req->req_recv.req_base.req_type != MCA_PML_REQUEST_IMPROBE)) + append_recv_req_to_queue(queue, req); req->req_match_received = false; - OPAL_THREAD_UNLOCK(&comm->matching_lock); + OB1_MATCHING_UNLOCK(&ob1_comm->matching_lock); } else { if(OPAL_LIKELY(!IS_PROB_REQ(req))) { PERUSE_TRACE_COMM_EVENT(PERUSE_COMM_REQ_MATCH_UNEX, @@ -1239,8 +1232,8 @@ void mca_pml_ob1_recv_req_start(mca_pml_ob1_recv_request_t *req) opal_list_remove_item(&proc->unexpected_frags, (opal_list_item_t*)frag); - OPAL_THREAD_UNLOCK(&comm->matching_lock); - + OB1_MATCHING_UNLOCK(&ob1_comm->matching_lock); + switch(hdr->hdr_common.hdr_type) { case MCA_PML_OB1_HDR_TYPE_MATCH: mca_pml_ob1_recv_request_progress_match(req, frag->btl, frag->segments, @@ -1257,7 +1250,7 @@ void mca_pml_ob1_recv_req_start(mca_pml_ob1_recv_request_t *req) default: assert(0); } - + MCA_PML_OB1_RECV_FRAG_RETURN(frag); } else if (OPAL_UNLIKELY(IS_MPROB_REQ(req))) { @@ -1269,14 +1262,14 @@ void mca_pml_ob1_recv_req_start(mca_pml_ob1_recv_request_t *req) restarted with this request during mrecv */ opal_list_remove_item(&proc->unexpected_frags, (opal_list_item_t*)frag); - OPAL_THREAD_UNLOCK(&comm->matching_lock); + OB1_MATCHING_UNLOCK(&ob1_comm->matching_lock); req->req_recv.req_base.req_addr = frag; mca_pml_ob1_recv_request_matched_probe(req, frag->btl, frag->segments, frag->num_segments); } else { - OPAL_THREAD_UNLOCK(&comm->matching_lock); + OB1_MATCHING_UNLOCK(&ob1_comm->matching_lock); mca_pml_ob1_recv_request_matched_probe(req, frag->btl, frag->segments, frag->num_segments); } diff --git a/ompi/mca/pml/ob1/pml_ob1_recvreq.h b/ompi/mca/pml/ob1/pml_ob1_recvreq.h index c9b75ecf2cf..6d575693237 100644 --- a/ompi/mca/pml/ob1/pml_ob1_recvreq.h +++ b/ompi/mca/pml/ob1/pml_ob1_recvreq.h @@ -3,10 +3,10 @@ * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. - * Copyright (c) 2004-2014 The University of Tennessee and The University + * Copyright (c) 2004-2016 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2007 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2007 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. @@ -17,9 +17,9 @@ * and Technology (RIST). All rights reserved. * * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ /** @@ -123,21 +123,26 @@ do { \ */ #define MCA_PML_OB1_RECV_REQUEST_MPI_COMPLETE( recvreq ) \ do { \ - PERUSE_TRACE_COMM_EVENT( PERUSE_COMM_REQ_COMPLETE, \ - &(recvreq->req_recv.req_base), PERUSE_RECV ); \ + PERUSE_TRACE_COMM_EVENT( PERUSE_COMM_REQ_COMPLETE, \ + &(recvreq->req_recv.req_base), PERUSE_RECV ); \ ompi_request_complete( &(recvreq->req_recv.req_base.req_ompi), true ); \ } while (0) +static inline void mca_pml_ob1_recv_request_fini (mca_pml_ob1_recv_request_t *recvreq) +{ + MCA_PML_BASE_RECV_REQUEST_FINI(&recvreq->req_recv); + if ((recvreq)->local_handle) { + mca_bml_base_deregister_mem (recvreq->rdma_bml, recvreq->local_handle); + recvreq->local_handle = NULL; + } +} + /* * Free the PML receive request */ #define MCA_PML_OB1_RECV_REQUEST_RETURN(recvreq) \ { \ - MCA_PML_BASE_RECV_REQUEST_FINI(&(recvreq)->req_recv); \ - if ((recvreq)->local_handle) { \ - mca_bml_base_deregister_mem ((recvreq)->rdma_bml, (recvreq)->local_handle); \ - (recvreq)->local_handle = NULL; \ - } \ + mca_pml_ob1_recv_request_fini (recvreq); \ opal_free_list_return (&mca_pml_base_recv_requests, \ (opal_free_list_item_t*)(recvreq)); \ } @@ -153,47 +158,48 @@ recv_request_pml_complete(mca_pml_ob1_recv_request_t *recvreq) { size_t i; - assert(false == recvreq->req_recv.req_base.req_pml_complete); + if(false == recvreq->req_recv.req_base.req_pml_complete){ - if(recvreq->req_recv.req_bytes_packed > 0) { - PERUSE_TRACE_COMM_EVENT( PERUSE_COMM_REQ_XFER_END, - &recvreq->req_recv.req_base, PERUSE_RECV ); - } + if(recvreq->req_recv.req_bytes_packed > 0) { + PERUSE_TRACE_COMM_EVENT( PERUSE_COMM_REQ_XFER_END, + &recvreq->req_recv.req_base, PERUSE_RECV ); + } - for(i = 0; i < recvreq->req_rdma_cnt; i++) { - struct mca_btl_base_registration_handle_t *handle = recvreq->req_rdma[i].btl_reg; - mca_bml_base_btl_t *bml_btl = recvreq->req_rdma[i].bml_btl; + for(i = 0; i < recvreq->req_rdma_cnt; i++) { + struct mca_btl_base_registration_handle_t *handle = recvreq->req_rdma[i].btl_reg; + mca_bml_base_btl_t *bml_btl = recvreq->req_rdma[i].bml_btl; - if (NULL != handle) { - mca_bml_base_deregister_mem (bml_btl, handle); + if (NULL != handle) { + mca_bml_base_deregister_mem (bml_btl, handle); + } } - } - recvreq->req_rdma_cnt = 0; + recvreq->req_rdma_cnt = 0; - OPAL_THREAD_LOCK(&ompi_request_lock); - if(true == recvreq->req_recv.req_base.req_free_called) { - if( MPI_SUCCESS != recvreq->req_recv.req_base.req_ompi.req_status.MPI_ERROR ) { - ompi_mpi_abort(&ompi_mpi_comm_world.comm, MPI_ERR_REQUEST); - } - MCA_PML_OB1_RECV_REQUEST_RETURN(recvreq); - } else { - /* initialize request status */ - recvreq->req_recv.req_base.req_pml_complete = true; - recvreq->req_recv.req_base.req_ompi.req_status._ucount = - recvreq->req_bytes_received; - if (recvreq->req_recv.req_bytes_packed > recvreq->req_bytes_expected) { + + if(true == recvreq->req_recv.req_base.req_free_called) { + if( MPI_SUCCESS != recvreq->req_recv.req_base.req_ompi.req_status.MPI_ERROR ) { + ompi_mpi_abort(&ompi_mpi_comm_world.comm, MPI_ERR_REQUEST); + } + MCA_PML_OB1_RECV_REQUEST_RETURN(recvreq); + } else { + /* initialize request status */ + recvreq->req_recv.req_base.req_pml_complete = true; recvreq->req_recv.req_base.req_ompi.req_status._ucount = - recvreq->req_recv.req_bytes_packed; - recvreq->req_recv.req_base.req_ompi.req_status.MPI_ERROR = - MPI_ERR_TRUNCATE; + recvreq->req_bytes_received; + if (recvreq->req_recv.req_bytes_packed > recvreq->req_bytes_expected) { + recvreq->req_recv.req_base.req_ompi.req_status._ucount = + recvreq->req_recv.req_bytes_packed; + recvreq->req_recv.req_base.req_ompi.req_status.MPI_ERROR = + MPI_ERR_TRUNCATE; + } + if (OPAL_UNLIKELY(recvreq->local_handle)) { + mca_bml_base_deregister_mem (recvreq->rdma_bml, recvreq->local_handle); + recvreq->local_handle = NULL; + } + MCA_PML_OB1_RECV_REQUEST_MPI_COMPLETE(recvreq); } - if (OPAL_UNLIKELY(recvreq->local_handle)) { - mca_bml_base_deregister_mem (recvreq->rdma_bml, recvreq->local_handle); - recvreq->local_handle = NULL; - } - MCA_PML_OB1_RECV_REQUEST_MPI_COMPLETE(recvreq); + } - OPAL_THREAD_UNLOCK(&ompi_request_lock); } static inline bool @@ -330,7 +336,7 @@ void mca_pml_ob1_recv_request_frag_copy_start( size_t num_segments, mca_btl_base_descriptor_t* des); -void mca_pml_ob1_recv_request_frag_copy_finished(struct mca_btl_base_module_t* btl, +void mca_pml_ob1_recv_request_frag_copy_finished(struct mca_btl_base_module_t* btl, struct mca_btl_base_endpoint_t* ep, struct mca_btl_base_descriptor_t* des, int status ); @@ -428,8 +434,9 @@ static inline int mca_pml_ob1_recv_request_ack_send(ompi_proc_t* proc, { size_t i; mca_bml_base_btl_t* bml_btl; - mca_bml_base_endpoint_t* endpoint = - (mca_bml_base_endpoint_t*)proc->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_BML]; + mca_bml_base_endpoint_t* endpoint = mca_bml_base_get_endpoint (proc); + + assert (NULL != endpoint); for(i = 0; i < mca_bml_base_btl_array_get_size(&endpoint->btl_eager); i++) { bml_btl = mca_bml_base_btl_array_get_next(&endpoint->btl_eager); diff --git a/ompi/mca/pml/ob1/pml_ob1_sendreq.c b/ompi/mca/pml/ob1/pml_ob1_sendreq.c index 832a0c50249..57ff6fd3dc1 100644 --- a/ompi/mca/pml/ob1/pml_ob1_sendreq.c +++ b/ompi/mca/pml/ob1/pml_ob1_sendreq.c @@ -3,30 +3,30 @@ * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. - * Copyright (c) 2004-2013 The University of Tennessee and The University + * Copyright (c) 2004-2016 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2008 UT-Battelle, LLC. All rights reserved. * Copyright (c) 2010 Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2012 NVIDIA Corporation. All rights reserved. - * Copyright (c) 2012-2015 Los Alamos National Security, LLC. All rights + * Copyright (c) 2012-2016 Los Alamos National Security, LLC. All rights * reserved. * Copyright (c) 2015 Cisco Systems, Inc. All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ #include "ompi_config.h" #include "opal/prefetch.h" -#include "opal/mca/mpool/mpool.h" +#include "opal/mca/mpool/mpool.h" #include "ompi/constants.h" #include "ompi/mca/pml/pml.h" #include "pml_ob1.h" @@ -37,6 +37,7 @@ #include "ompi/mca/bml/base/base.h" #include "ompi/memchecker.h" + OBJ_CLASS_INSTANCE(mca_pml_ob1_send_range_t, opal_free_list_item_t, NULL, NULL); @@ -97,31 +98,26 @@ void mca_pml_ob1_send_request_process_pending(mca_bml_base_btl_t *bml_btl) static int mca_pml_ob1_send_request_free(struct ompi_request_t** request) { mca_pml_ob1_send_request_t* sendreq = *(mca_pml_ob1_send_request_t**)request; - - assert( false == sendreq->req_send.req_base.req_free_called ); - - OPAL_THREAD_LOCK(&ompi_request_lock); - sendreq->req_send.req_base.req_free_called = true; + if(false == sendreq->req_send.req_base.req_free_called) { - PERUSE_TRACE_COMM_EVENT( PERUSE_COMM_REQ_NOTIFY, + sendreq->req_send.req_base.req_free_called = true; + PERUSE_TRACE_COMM_EVENT( PERUSE_COMM_REQ_NOTIFY, &(sendreq->req_send.req_base), PERUSE_SEND ); - if( true == sendreq->req_send.req_base.req_pml_complete ) { - /* make buffer defined when the request is compeleted, - and before releasing the objects. */ - MEMCHECKER( - memchecker_call(&opal_memchecker_base_mem_defined, - sendreq->req_send.req_base.req_addr, - sendreq->req_send.req_base.req_count, - sendreq->req_send.req_base.req_datatype); - ); - - MCA_PML_OB1_SEND_REQUEST_RETURN( sendreq ); + if( true == sendreq->req_send.req_base.req_pml_complete ) { + /* make buffer defined when the request is compeleted, + and before releasing the objects. */ + MEMCHECKER( + memchecker_call(&opal_memchecker_base_mem_defined, + sendreq->req_send.req_base.req_addr, + sendreq->req_send.req_base.req_count, + sendreq->req_send.req_base.req_datatype); + ); + + MCA_PML_OB1_SEND_REQUEST_RETURN( sendreq ); + } + *request = MPI_REQUEST_NULL; } - - OPAL_THREAD_UNLOCK(&ompi_request_lock); - - *request = MPI_REQUEST_NULL; return OMPI_SUCCESS; } @@ -163,7 +159,7 @@ OBJ_CLASS_INSTANCE( mca_pml_ob1_send_request_t, */ static inline void -mca_pml_ob1_match_completion_free_request( mca_bml_base_btl_t* bml_btl, +mca_pml_ob1_match_completion_free_request( mca_bml_base_btl_t* bml_btl, mca_pml_ob1_send_request_t* sendreq ) { if( sendreq->req_send.req_bytes_packed > 0 ) { @@ -179,13 +175,13 @@ mca_pml_ob1_match_completion_free_request( mca_bml_base_btl_t* bml_btl, } static void -mca_pml_ob1_match_completion_free( struct mca_btl_base_module_t* btl, +mca_pml_ob1_match_completion_free( struct mca_btl_base_module_t* btl, struct mca_btl_base_endpoint_t* ep, struct mca_btl_base_descriptor_t* des, int status ) { mca_pml_ob1_send_request_t* sendreq = (mca_pml_ob1_send_request_t*)des->des_cbdata; - mca_bml_base_btl_t* bml_btl = (mca_bml_base_btl_t*) des->des_context; + mca_bml_base_btl_t* bml_btl = (mca_bml_base_btl_t*) des->des_context; /* check completion status */ if( OPAL_UNLIKELY(OMPI_SUCCESS != status) ) { @@ -218,7 +214,7 @@ mca_pml_ob1_rndv_completion_request( mca_bml_base_btl_t* bml_btl, } /* - * Completion of the first fragment of a long message that + * Completion of the first fragment of a long message that * requires an acknowledgement */ static void @@ -281,7 +277,7 @@ mca_pml_ob1_send_ctl_completion( mca_btl_base_module_t* btl, struct mca_btl_base_descriptor_t* des, int status ) { - mca_bml_base_btl_t* bml_btl = (mca_bml_base_btl_t*) des->des_context; + mca_bml_base_btl_t* bml_btl = (mca_bml_base_btl_t*) des->des_context; /* check for pending requests */ MCA_PML_OB1_PROGRESS_PENDING(bml_btl); @@ -376,18 +372,18 @@ int mca_pml_ob1_send_request_start_buffered( int rc; /* allocate descriptor */ - mca_bml_base_alloc(bml_btl, &des, + mca_bml_base_alloc(bml_btl, &des, MCA_BTL_NO_ORDER, sizeof(mca_pml_ob1_rendezvous_hdr_t) + size, MCA_BTL_DES_FLAGS_PRIORITY | MCA_BTL_DES_FLAGS_BTL_OWNERSHIP | MCA_BTL_DES_FLAGS_SIGNAL); if( OPAL_UNLIKELY(NULL == des) ) { return OMPI_ERR_OUT_OF_RESOURCE; - } + } segment = des->des_segments; /* pack the data into the BTL supplied buffer */ - iov.iov_base = (IOVBASE_TYPE*)((unsigned char*)segment->seg_addr.pval + + iov.iov_base = (IOVBASE_TYPE*)((unsigned char*)segment->seg_addr.pval + sizeof(mca_pml_ob1_rendezvous_hdr_t)); iov.iov_len = size; iov_count = 1; @@ -441,14 +437,12 @@ int mca_pml_ob1_send_request_start_buffered( &(ompi_mpi_byte.dt.super), sendreq->req_send.req_bytes_packed, sendreq->req_send.req_addr ); - + /* wait for ack and completion */ sendreq->req_state = 2; /* request is complete at mpi level */ - OPAL_THREAD_LOCK(&ompi_request_lock); MCA_PML_OB1_SEND_REQUEST_MPI_COMPLETE(sendreq, true); - OPAL_THREAD_UNLOCK(&ompi_request_lock); /* send */ rc = mca_bml_base_send(bml_btl, des, MCA_PML_OB1_HDR_TYPE_RNDV); @@ -501,9 +495,6 @@ int mca_pml_ob1_send_request_start_copy( mca_pml_ob1_send_request_t* sendreq, if( OPAL_LIKELY(OMPI_SUCCESS == rc) ) { /* signal request completion */ send_request_pml_complete(sendreq); - - /* check for pending requests */ - MCA_PML_OB1_PROGRESS_PENDING(bml_btl); return OMPI_SUCCESS; } @@ -511,7 +502,7 @@ int mca_pml_ob1_send_request_start_copy( mca_pml_ob1_send_request_t* sendreq, if (size > 0 && NULL != des) { MCA_PML_OB1_SEND_REQUEST_RESET(sendreq); } - } else { + } else { /* allocate descriptor */ mca_bml_base_alloc( bml_btl, &des, MCA_BTL_NO_ORDER, @@ -531,7 +522,7 @@ int mca_pml_ob1_send_request_start_copy( mca_pml_ob1_send_request_t* sendreq, iov.iov_len = size; iov_count = 1; /* - * Before copy the user buffer, make the target part + * Before copy the user buffer, make the target part * accessible. */ MEMCHECKER( @@ -553,7 +544,7 @@ int mca_pml_ob1_send_request_start_copy( mca_pml_ob1_send_request_t* sendreq, ); } - + /* build match header */ hdr = (mca_pml_ob1_hdr_t*)segment->seg_addr.pval; mca_pml_ob1_match_hdr_prepare (&hdr->hdr_match, MCA_PML_OB1_HDR_TYPE_MATCH, 0, @@ -573,7 +564,7 @@ int mca_pml_ob1_send_request_start_copy( mca_pml_ob1_send_request_t* sendreq, /* send */ rc = mca_bml_base_send_status(bml_btl, des, MCA_PML_OB1_HDR_TYPE_MATCH); - if( OPAL_LIKELY( rc >= OMPI_SUCCESS ) ) { + if( OPAL_LIKELY( rc >= OPAL_SUCCESS ) ) { if( OPAL_LIKELY( 1 == rc ) ) { mca_pml_ob1_match_completion_free_request( bml_btl, sendreq ); } @@ -632,8 +623,8 @@ int mca_pml_ob1_send_request_start_prepare( mca_pml_ob1_send_request_t* sendreq, des->des_cbdata = sendreq; /* send */ - rc = mca_bml_base_send(bml_btl, des, MCA_PML_OB1_HDR_TYPE_MATCH); - if( OPAL_LIKELY( rc >= 0 ) ) { + rc = mca_bml_base_send(bml_btl, des, MCA_PML_OB1_HDR_TYPE_MATCH); + if( OPAL_LIKELY( rc >= OPAL_SUCCESS ) ) { if( OPAL_LIKELY( 1 == rc ) ) { mca_pml_ob1_match_completion_free_request( bml_btl, sendreq ); } @@ -694,9 +685,6 @@ int mca_pml_ob1_send_request_start_rdma( mca_pml_ob1_send_request_t* sendreq, frag->cbfunc = mca_pml_ob1_rget_completion; /* do not store the local handle in the fragment. it will be released by mca_pml_ob1_free_rdma_resources */ - /* save the fragment for get->put fallback */ - sendreq->rdma_frag = frag; - reg_size = bml_btl->btl->btl_registration_handle_size; /* allocate space for get hdr + segment list */ @@ -705,9 +693,13 @@ int mca_pml_ob1_send_request_start_rdma( mca_pml_ob1_send_request_t* sendreq, MCA_BTL_DES_FLAGS_SIGNAL); if( OPAL_UNLIKELY(NULL == des) ) { /* NTH: no need to reset the converter here. it will be reset before it is retried */ + MCA_PML_OB1_RDMA_FRAG_RETURN(frag); return OMPI_ERR_OUT_OF_RESOURCE; } + /* save the fragment for get->put fallback */ + sendreq->rdma_frag = frag; + /* build match header */ hdr = (mca_pml_ob1_rget_hdr_t *) des->des_segments->seg_addr.pval; /* TODO -- Add support for multiple segments for get */ @@ -762,11 +754,11 @@ int mca_pml_ob1_send_request_start_rndv( mca_pml_ob1_send_request_t* sendreq, /* prepare descriptor */ if(size == 0) { - mca_bml_base_alloc( bml_btl, - &des, + mca_bml_base_alloc( bml_btl, + &des, MCA_BTL_NO_ORDER, sizeof(mca_pml_ob1_rendezvous_hdr_t), - MCA_BTL_DES_FLAGS_PRIORITY | MCA_BTL_DES_FLAGS_BTL_OWNERSHIP ); + MCA_BTL_DES_FLAGS_PRIORITY | MCA_BTL_DES_FLAGS_BTL_OWNERSHIP ); } else { MEMCHECKER( memchecker_call(&opal_memchecker_base_mem_defined, @@ -774,7 +766,7 @@ int mca_pml_ob1_send_request_start_rndv( mca_pml_ob1_send_request_t* sendreq, sendreq->req_send.req_base.req_count, sendreq->req_send.req_base.req_datatype); ); - mca_bml_base_prepare_src( bml_btl, + mca_bml_base_prepare_src( bml_btl, &sendreq->req_send.req_base.req_convertor, MCA_BTL_NO_ORDER, sizeof(mca_pml_ob1_rendezvous_hdr_t), @@ -792,7 +784,7 @@ int mca_pml_ob1_send_request_start_rndv( mca_pml_ob1_send_request_t* sendreq, if( OPAL_UNLIKELY(NULL == des) ) { return OMPI_ERR_OUT_OF_RESOURCE; - } + } segment = des->des_segments; /* build hdr */ @@ -911,7 +903,7 @@ get_next_send_range(mca_pml_ob1_send_request_t* sendreq, int mca_pml_ob1_send_request_schedule_once(mca_pml_ob1_send_request_t* sendreq) -{ +{ size_t prev_bytes_remaining = 0; mca_pml_ob1_send_range_t *range; int num_fail = 0; @@ -941,7 +933,7 @@ mca_pml_ob1_send_request_schedule_once(mca_pml_ob1_send_request_t* sendreq) prev_bytes_remaining = range->range_send_length; if( OPAL_UNLIKELY(num_fail == range->range_btl_cnt) ) { - assert(sendreq->req_pending == MCA_PML_OB1_SEND_PENDING_NONE); + /*TODO : assert(sendreq->req_pending == MCA_PML_OB1_SEND_PENDING_NONE); */ add_request_to_send_pending(sendreq, MCA_PML_OB1_SEND_PENDING_SCHEDULE, true); /* Note that request remains locked. send_request_process_pending() @@ -965,17 +957,25 @@ mca_pml_ob1_send_request_schedule_once(mca_pml_ob1_send_request_t* sendreq) /* makes sure that we don't exceed BTL max send size */ if(bml_btl->btl->btl_max_send_size != 0) { +#if OPAL_CUDA_SUPPORT + size_t max_send_size; + if ((sendreq->req_send.req_base.req_convertor.flags & CONVERTOR_CUDA) && (bml_btl->btl->btl_cuda_max_send_size != 0)) { + max_send_size = bml_btl->btl->btl_cuda_max_send_size - sizeof(mca_pml_ob1_frag_hdr_t); + } else { + max_send_size = bml_btl->btl->btl_max_send_size - sizeof(mca_pml_ob1_frag_hdr_t); + } +#else /* OPAL_CUDA_SUPPORT */ size_t max_send_size = bml_btl->btl->btl_max_send_size - sizeof(mca_pml_ob1_frag_hdr_t); - +#endif /* OPAL_CUDA_SUPPORT */ if (size > max_send_size) { size = max_send_size; } } - + /* pack into a descriptor */ offset = (size_t)range->range_send_offset; - opal_convertor_set_position(&sendreq->req_send.req_base.req_convertor, + opal_convertor_set_position(&sendreq->req_send.req_base.req_convertor, &offset); range->range_send_offset = (uint64_t)offset; @@ -1004,7 +1004,7 @@ mca_pml_ob1_send_request_schedule_once(mca_pml_ob1_send_request_t* sendreq) mca_bml_base_free(bml_btl, des); range->range_btls[btl_idx].length -= data_remaining; goto cannot_pack; - } + } continue; } @@ -1062,13 +1062,13 @@ mca_pml_ob1_send_request_schedule_once(mca_pml_ob1_send_request_t* sendreq) range = get_next_send_range(sendreq, range); prev_bytes_remaining = 0; } - } else { + } else { mca_bml_base_free(bml_btl,des); } } return OMPI_SUCCESS; -} +} /** @@ -1117,7 +1117,7 @@ static void mca_pml_ob1_put_completion (mca_btl_base_module_t* btl, struct mca_b /* check completion status */ if( OPAL_UNLIKELY(OMPI_SUCCESS == status) ) { - /* TODO -- readd ordering */ + /* TODO -- read ordering */ mca_pml_ob1_send_fin (sendreq->req_send.req_base.req_proc, bml_btl, frag->rdma_hdr.hdr_rdma.hdr_frag, frag->rdma_length, 0, 0); @@ -1137,7 +1137,7 @@ static void mca_pml_ob1_put_completion (mca_btl_base_module_t* btl, struct mca_b } int mca_pml_ob1_send_request_put_frag( mca_pml_ob1_rdma_frag_t *frag ) -{ +{ mca_pml_ob1_send_request_t *sendreq = (mca_pml_ob1_send_request_t *) frag->rdma_req; mca_btl_base_registration_handle_t *local_handle = NULL; mca_bml_base_btl_t *bml_btl = frag->rdma_bml; @@ -1186,7 +1186,7 @@ int mca_pml_ob1_send_request_put_frag( mca_pml_ob1_rdma_frag_t *frag ) * Receiver has scheduled an RDMA operation: * (1) Allocate an RDMA fragment to maintain the state of the operation * (2) Call BTL prepare_src to pin/prepare source buffers - * (3) Queue the RDMA put + * (3) Queue the RDMA put */ void mca_pml_ob1_send_request_put( mca_pml_ob1_send_request_t* sendreq, @@ -1196,7 +1196,7 @@ void mca_pml_ob1_send_request_put( mca_pml_ob1_send_request_t* sendreq, mca_bml_base_endpoint_t *bml_endpoint = sendreq->req_endpoint; mca_pml_ob1_rdma_frag_t* frag; - if(hdr->hdr_common.hdr_flags & MCA_PML_OB1_HDR_TYPE_ACK) { + if(hdr->hdr_common.hdr_flags & MCA_PML_OB1_HDR_TYPE_ACK) { OPAL_THREAD_ADD32(&sendreq->req_state, -1); } @@ -1222,7 +1222,7 @@ void mca_pml_ob1_send_request_put( mca_pml_ob1_send_request_t* sendreq, frag->rdma_bml = mca_bml_base_btl_array_find(&bml_endpoint->btl_rdma, btl); frag->rdma_hdr.hdr_rdma = *hdr; - frag->rdma_req = sendreq; + frag->rdma_req = sendreq; frag->rdma_length = hdr->hdr_dst_size; frag->rdma_state = MCA_PML_OB1_RDMA_PUT; frag->remote_address = hdr->hdr_dst_ptr; diff --git a/ompi/mca/pml/ob1/pml_ob1_sendreq.h b/ompi/mca/pml/ob1/pml_ob1_sendreq.h index e606b8c99e2..d9fa0c852f2 100644 --- a/ompi/mca/pml/ob1/pml_ob1_sendreq.h +++ b/ompi/mca/pml/ob1/pml_ob1_sendreq.h @@ -3,10 +3,10 @@ * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. - * Copyright (c) 2004-2014 The University of Tennessee and The University + * Copyright (c) 2004-2016 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. @@ -15,9 +15,9 @@ * Copyright (c) 2011-2015 Los Alamos National Security, LLC. All rights * reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -31,7 +31,7 @@ #include "pml_ob1_hdr.h" #include "pml_ob1_rdma.h" #include "pml_ob1_rdmafrag.h" -#include "ompi/mca/bml/bml.h" +#include "ompi/mca/bml/bml.h" BEGIN_C_DECLS @@ -50,9 +50,9 @@ struct mca_pml_ob1_send_request_t { bool req_throttle_sends; size_t req_pipeline_depth; size_t req_bytes_delivered; - uint32_t req_rdma_cnt; + uint32_t req_rdma_cnt; mca_pml_ob1_send_pending_t req_pending; - opal_mutex_t req_send_range_lock; + opal_mutex_t req_send_range_lock; opal_list_t req_send_ranges; mca_pml_ob1_rdma_frag_t *rdma_frag; /** The size of this array is set from mca_pml_ob1.max_rdma_per_request */ @@ -182,7 +182,7 @@ static inline void mca_pml_ob1_free_rdma_resources (mca_pml_ob1_send_request_t* /** - * Start a send request. + * Start a send request. */ #define MCA_PML_OB1_SEND_REQUEST_START(sendreq, rc) \ @@ -209,24 +209,29 @@ do { (sendreq)->req_send.req_base.req_ompi.req_status.MPI_ERROR = OMPI_SUCCESS; \ (sendreq)->req_send.req_base.req_ompi.req_status._ucount = \ (sendreq)->req_send.req_bytes_packed; \ - ompi_request_complete( &((sendreq)->req_send.req_base.req_ompi), (with_signal) ); \ - \ PERUSE_TRACE_COMM_EVENT( PERUSE_COMM_REQ_COMPLETE, \ &(sendreq->req_send.req_base), PERUSE_SEND); \ + \ + ompi_request_complete( &((sendreq)->req_send.req_base.req_ompi), (with_signal) ); \ } while(0) +static inline void mca_pml_ob1_send_request_fini (mca_pml_ob1_send_request_t *sendreq) +{ + /* Let the base handle the reference counts */ + MCA_PML_BASE_SEND_REQUEST_FINI((&(sendreq)->req_send)); + if (sendreq->rdma_frag) { + MCA_PML_OB1_RDMA_FRAG_RETURN (sendreq->rdma_frag); + sendreq->rdma_frag = NULL; + } +} + /* * Release resources associated with a request */ #define MCA_PML_OB1_SEND_REQUEST_RETURN(sendreq) \ do { \ - /* Let the base handle the reference counts */ \ - MCA_PML_BASE_SEND_REQUEST_FINI((&(sendreq)->req_send)); \ - if (sendreq->rdma_frag) { \ - MCA_PML_OB1_RDMA_FRAG_RETURN (sendreq->rdma_frag); \ - sendreq->rdma_frag = NULL; \ - } \ + mca_pml_ob1_send_request_fini (sendreq); \ opal_free_list_return ( &mca_pml_base_send_requests, \ (opal_free_list_item_t*)sendreq); \ } while(0) @@ -243,36 +248,35 @@ do { static inline void send_request_pml_complete(mca_pml_ob1_send_request_t *sendreq) { - assert(false == sendreq->req_send.req_base.req_pml_complete); + if(false == sendreq->req_send.req_base.req_pml_complete) { + if(sendreq->req_send.req_bytes_packed > 0) { + PERUSE_TRACE_COMM_EVENT( PERUSE_COMM_REQ_XFER_END, + &(sendreq->req_send.req_base), PERUSE_SEND); + } - if(sendreq->req_send.req_bytes_packed > 0) { - PERUSE_TRACE_COMM_EVENT( PERUSE_COMM_REQ_XFER_END, - &(sendreq->req_send.req_base), PERUSE_SEND); - } + /* return mpool resources */ + mca_pml_ob1_free_rdma_resources(sendreq); - /* return mpool resources */ - mca_pml_ob1_free_rdma_resources(sendreq); + if (sendreq->req_send.req_send_mode == MCA_PML_BASE_SEND_BUFFERED && + sendreq->req_send.req_addr != sendreq->req_send.req_base.req_addr) { + mca_pml_base_bsend_request_fini((ompi_request_t*)sendreq); + } - if (sendreq->req_send.req_send_mode == MCA_PML_BASE_SEND_BUFFERED && - sendreq->req_send.req_addr != sendreq->req_send.req_base.req_addr) { - mca_pml_base_bsend_request_fini((ompi_request_t*)sendreq); - } + if (!sendreq->req_send.req_base.req_free_called) { + sendreq->req_send.req_base.req_pml_complete = true; - OPAL_THREAD_LOCK(&ompi_request_lock); - if(false == sendreq->req_send.req_base.req_ompi.req_complete) { - /* Should only be called for long messages (maybe synchronous) */ - MCA_PML_OB1_SEND_REQUEST_MPI_COMPLETE(sendreq, true); - } else { - if( MPI_SUCCESS != sendreq->req_send.req_base.req_ompi.req_status.MPI_ERROR ) { - ompi_mpi_abort(&ompi_mpi_comm_world.comm, MPI_ERR_REQUEST); + if( !REQUEST_COMPLETE( &((sendreq->req_send).req_base.req_ompi)) ) { + /* Should only be called for long messages (maybe synchronous) */ + MCA_PML_OB1_SEND_REQUEST_MPI_COMPLETE(sendreq, true); + } else { + if( MPI_SUCCESS != sendreq->req_send.req_base.req_ompi.req_status.MPI_ERROR ) { + ompi_mpi_abort(&ompi_mpi_comm_world.comm, MPI_ERR_REQUEST); + } + } + } else { + MCA_PML_OB1_SEND_REQUEST_RETURN(sendreq); } } - sendreq->req_send.req_base.req_pml_complete = true; - - if(sendreq->req_send.req_base.req_free_called) { - MCA_PML_OB1_SEND_REQUEST_RETURN(sendreq); - } - OPAL_THREAD_UNLOCK(&ompi_request_lock); } /* returns true if request was completed on PML level */ @@ -298,12 +302,12 @@ send_request_pml_complete_check(mca_pml_ob1_send_request_t *sendreq) } /** - * Schedule additional fragments + * Schedule additional fragments */ int mca_pml_ob1_send_request_schedule_once(mca_pml_ob1_send_request_t*); -static inline int +static inline int mca_pml_ob1_send_request_schedule_exclusive(mca_pml_ob1_send_request_t* sendreq) { int rc; @@ -337,7 +341,7 @@ mca_pml_ob1_send_request_schedule(mca_pml_ob1_send_request_t* sendreq) #if OPAL_CUDA_SUPPORT int mca_pml_ob1_send_request_start_cuda( - mca_pml_ob1_send_request_t* sendreq, + mca_pml_ob1_send_request_t* sendreq, mca_bml_base_btl_t* bml_btl, size_t size); #endif /* OPAL_CUDA_SUPPORT */ @@ -475,16 +479,16 @@ mca_pml_ob1_send_request_start_seq (mca_pml_ob1_send_request_t* sendreq, mca_bml static inline int mca_pml_ob1_send_request_start( mca_pml_ob1_send_request_t* sendreq ) { - mca_bml_base_endpoint_t* endpoint = (mca_bml_base_endpoint_t*) - sendreq->req_send.req_base.req_proc->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_BML]; - mca_pml_ob1_comm_t* comm = sendreq->req_send.req_base.req_comm->c_pml_comm; + mca_bml_base_endpoint_t *endpoint = mca_bml_base_get_endpoint (sendreq->req_send.req_base.req_proc); + ompi_communicator_t *comm = sendreq->req_send.req_base.req_comm; + mca_pml_ob1_comm_proc_t *ob1_proc = mca_pml_ob1_peer_lookup (comm, sendreq->req_send.req_base.req_peer); int32_t seqn; if (OPAL_UNLIKELY(NULL == endpoint)) { return OMPI_ERR_UNREACH; } - seqn = OPAL_THREAD_ADD32(&comm->procs[sendreq->req_send.req_base.req_peer].send_sequence, 1); + seqn = OPAL_THREAD_ADD32(&ob1_proc->send_sequence, 1); return mca_pml_ob1_send_request_start_seq (sendreq, endpoint, seqn); } diff --git a/ompi/mca/pml/ob1/pml_ob1_start.c b/ompi/mca/pml/ob1/pml_ob1_start.c index 2203a6b545f..ebc4050d4de 100644 --- a/ompi/mca/pml/ob1/pml_ob1_start.c +++ b/ompi/mca/pml/ob1/pml_ob1_start.c @@ -1,20 +1,23 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ /* * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. - * Copyright (c) 2004-2007 The University of Tennessee and The University + * Copyright (c) 2004-2016 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2006 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2010 Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2016 Los Alamos National Security, LLC. All rights + * reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -29,89 +32,26 @@ int mca_pml_ob1_start(size_t count, ompi_request_t** requests) { int rc; - size_t i; - bool reuse_old_request = true; - for(i=0; ireq_type) { + if (NULL == pml_request || OMPI_REQUEST_PML != requests[i]->req_type) { continue; } - /* If the persistent request is currently active - obtain the - * request lock and verify the status is incomplete. if the - * pml layer has not completed the request - mark the request - * as free called - so that it will be freed when the request + /* If the persistent request is currently active - verify the status + * is incomplete. if the pml layer has not completed the request - mark + * the request as free called - so that it will be freed when the request * completes - and create a new request. */ - reuse_old_request = true; - switch(pml_request->req_ompi.req_state) { - case OMPI_REQUEST_INACTIVE: - if(pml_request->req_pml_complete == true) - break; - /* otherwise fall through */ - case OMPI_REQUEST_ACTIVE: { - - ompi_request_t *request; - OPAL_THREAD_LOCK(&ompi_request_lock); - if (pml_request->req_pml_complete == false) { - /* free request after it completes */ - pml_request->req_free_called = true; - } else { - /* can reuse the existing request */ - OPAL_THREAD_UNLOCK(&ompi_request_lock); - break; - } - - reuse_old_request = false; - /* allocate a new request */ - switch(pml_request->req_type) { - case MCA_PML_REQUEST_SEND: { - mca_pml_base_send_mode_t sendmode = - ((mca_pml_base_send_request_t*)pml_request)->req_send_mode; - rc = mca_pml_ob1_isend_init( - pml_request->req_addr, - pml_request->req_count, - pml_request->req_datatype, - pml_request->req_peer, - pml_request->req_tag, - sendmode, - pml_request->req_comm, - &request); - break; - } - case MCA_PML_REQUEST_RECV: - rc = mca_pml_ob1_irecv_init( - pml_request->req_addr, - pml_request->req_count, - pml_request->req_datatype, - pml_request->req_peer, - pml_request->req_tag, - pml_request->req_comm, - &request); - break; - default: - rc = OMPI_ERR_REQUEST; - break; - } - OPAL_THREAD_UNLOCK(&ompi_request_lock); - if(OMPI_SUCCESS != rc) - return rc; - pml_request = (mca_pml_base_request_t*)request; - requests[i] = request; - break; - } - default: - return OMPI_ERR_REQUEST; - } +#if OPAL_ENABLE_MULTI_THREADS + opal_atomic_rmb(); +#endif /* start the request */ switch(pml_request->req_type) { - case MCA_PML_REQUEST_SEND: + case MCA_PML_REQUEST_SEND: { mca_pml_ob1_send_request_t* sendreq = (mca_pml_ob1_send_request_t*)pml_request; MEMCHECKER( @@ -119,15 +59,46 @@ int mca_pml_ob1_start(size_t count, ompi_request_t** requests) pml_request->req_addr, pml_request->req_count, pml_request->req_datatype); ); - if( reuse_old_request && (sendreq->req_send.req_bytes_packed != 0) ) { + + if (!pml_request->req_pml_complete) { + ompi_request_t *request; + + /* buffered sends can be mpi complete and pml incomplete. to support this + * case we need to allocate a new request. */ + rc = mca_pml_ob1_isend_init (pml_request->req_addr, + pml_request->req_count, + pml_request->req_datatype, + pml_request->req_peer, + pml_request->req_tag, + sendreq->req_send.req_send_mode, + pml_request->req_comm, + &request); + if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) { + return rc; + } + + /* copy the callback and callback data to the new requests */ + request->req_complete_cb = pml_request->req_ompi.req_complete_cb; + request->req_complete_cb_data = pml_request->req_ompi.req_complete_cb_data; + + /* ensure the old request gets released */ + pml_request->req_free_called = true; + + sendreq = (mca_pml_ob1_send_request_t *) request; + requests[i] = request; + } else if (sendreq->req_send.req_bytes_packed != 0) { size_t offset = 0; /** * Reset the convertor in case we're dealing with the original * request, which when completed do not reset the convertor. */ - opal_convertor_set_position( &sendreq->req_send.req_base.req_convertor, - &offset ); + opal_convertor_set_position (&sendreq->req_send.req_base.req_convertor, + &offset); } + + /* reset the completion flag */ + pml_request->req_pml_complete = false; + MCA_PML_OB1_SEND_REQUEST_START(sendreq, rc); if(rc != OMPI_SUCCESS) return rc; diff --git a/ompi/mca/pml/pml.h b/ompi/mca/pml/pml.h index a62f4c35c20..0b70da841b8 100644 --- a/ompi/mca/pml/pml.h +++ b/ompi/mca/pml/pml.h @@ -6,22 +6,24 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2006 The Regents of the University of California. * All rights reserved. * Copyright (c) 2006-2015 Los Alamos National Security, LLC. All rights - * reserved. + * reserved. * Copyright (c) 2011 Sandia National Laboratories. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ /** * @file - * + * * P2P Management Layer (PML) * * An MCA component type that provides the P2P interface functionality @@ -49,7 +51,7 @@ * make downcalls into the PML to provide the initial list of * processes (ompi_proc_t instances), and notification of changes * (add/delete). - * + * * The PML module must select the set of BTL components that are to be * used to reach a given destination. These should be cached on a PML * specific data structure that is hung off the ompi_proc_t. @@ -59,7 +61,7 @@ * over the available BTLs. * */ - + #ifndef MCA_PML_H #define MCA_PML_H @@ -79,7 +81,7 @@ struct ompi_proc_t; /** * MCA->PML Called by MCA framework to initialize the component. - * + * * @param priority (OUT) Relative priority or ranking used by MCA to * selected a component. * @@ -91,8 +93,8 @@ struct ompi_proc_t; * indicates whether multiple threads may invoke this component * simultaneously or not. */ -typedef struct mca_pml_base_module_1_0_0_t * (*mca_pml_base_component_init_fn_t)( - int *priority, +typedef struct mca_pml_base_module_1_0_1_t * (*mca_pml_base_component_init_fn_t)( + int *priority, bool enable_progress_threads, bool enable_mpi_threads); @@ -138,7 +140,7 @@ typedef int (*mca_pml_base_module_add_procs_fn_t)(struct ompi_proc_t **procs, si * @param nprocs Size of process array * @return OMPI_SUCCESS or failure status. * - * Provides a notification to the PML that processes have + * Provides a notification to the PML that processes have * gone away, and provides the PML the opportunity to cleanup * any data cached on the ompi_proc_t data structure. */ @@ -159,8 +161,8 @@ typedef int (*mca_pml_base_module_enable_fn_t)( * For non-threaded case, provides MCA the opportunity to * progress outstanding requests on all btls. * - * * @return Count of "completions", a metric of - * how many items where completed in the call + * * @return Count of "completions", a metric of + * how many items where completed in the call * to progress. */ typedef int (*mca_pml_base_module_progress_fn_t)(void); @@ -194,7 +196,7 @@ typedef int (*mca_pml_base_module_add_comm_fn_t)(struct ompi_communicator_t* com typedef int (*mca_pml_base_module_del_comm_fn_t)(struct ompi_communicator_t* comm); /** - * Initialize a persistent receive request. + * Initialize a persistent receive request. * * @param buf (IN) User buffer. * @param count (IN) Number of elements of the specified datatype. @@ -206,17 +208,17 @@ typedef int (*mca_pml_base_module_del_comm_fn_t)(struct ompi_communicator_t* com * @return OMPI_SUCCESS or failure status. */ typedef int (*mca_pml_base_module_irecv_init_fn_t)( - void *buf, - size_t count, - struct ompi_datatype_t *datatype, + void *buf, + size_t count, + struct ompi_datatype_t *datatype, int src, - int tag, + int tag, struct ompi_communicator_t* comm, - struct ompi_request_t **request + struct ompi_request_t **request ); /** - * Post a receive request. + * Post a receive request. * * @param buf (IN) User buffer. * @param count (IN) Number of elements of the specified datatype. @@ -245,7 +247,7 @@ typedef int (*mca_pml_base_module_imrecv_fn_t)( ); /** - * Post a receive and wait for completion. + * Post a receive and wait for completion. * * @param buf (IN) User buffer * @param count (IN) Number of elements of the specified datatype @@ -274,7 +276,7 @@ typedef int (*mca_pml_base_module_mrecv_fn_t)( ); /** - * Initialize a persistent send request. + * Initialize a persistent send request. * * @param buf (IN) User buffer. * @param count (IN) Number of elements of the specified datatype. @@ -287,7 +289,7 @@ typedef int (*mca_pml_base_module_mrecv_fn_t)( * @return OMPI_SUCCESS or failure status. */ typedef int (*mca_pml_base_module_isend_init_fn_t)( - void *buf, + const void *buf, size_t count, struct ompi_datatype_t *datatype, int dst, @@ -299,7 +301,7 @@ typedef int (*mca_pml_base_module_isend_init_fn_t)( /** - * Post a send request. + * Post a send request. * * @param buf (IN) User buffer. * @param count (IN) Number of elements of the specified datatype. @@ -312,7 +314,7 @@ typedef int (*mca_pml_base_module_isend_init_fn_t)( * @return OMPI_SUCCESS or failure status. */ typedef int (*mca_pml_base_module_isend_fn_t)( - void *buf, + const void *buf, size_t count, struct ompi_datatype_t *datatype, int dst, @@ -336,7 +338,7 @@ typedef int (*mca_pml_base_module_isend_fn_t)( * @return OMPI_SUCCESS or failure status. */ typedef int (*mca_pml_base_module_send_fn_t)( - void *buf, + const void *buf, size_t count, struct ompi_datatype_t *datatype, int dst, @@ -412,7 +414,7 @@ typedef int (*mca_pml_base_module_mprobe_fn_t)( /** * Cancel pending operation. - * + * * @param request (IN) Request * @return OMPI_SUCCESS or failure status. * @@ -424,7 +426,7 @@ typedef int (*mca_pml_base_module_cancel_fn_t)( /** * Has a request been cancelled? - * + * * @param request (IN) Request * @return OMPI_SUCCESS or failure status. * @@ -477,13 +479,18 @@ typedef int (*mca_pml_base_module_dump_fn_t)( */ typedef int (*mca_pml_base_module_ft_event_fn_t) (int status); - +/** + * pml module flags + */ +/** PML requires requires all procs in the job on the first call to + * add_procs */ +#define MCA_PML_BASE_FLAG_REQUIRE_WORLD 0x00000001 /** * PML instance. */ -struct mca_pml_base_module_1_0_0_t { +struct mca_pml_base_module_1_0_1_t { /* downcalls from MCA to PML */ mca_pml_base_module_add_procs_fn_t pml_add_procs; @@ -517,9 +524,10 @@ struct mca_pml_base_module_1_0_0_t { /* maximum constant sizes */ uint32_t pml_max_contextid; int pml_max_tag; + int pml_flags; }; -typedef struct mca_pml_base_module_1_0_0_t mca_pml_base_module_1_0_0_t; -typedef mca_pml_base_module_1_0_0_t mca_pml_base_module_t; +typedef struct mca_pml_base_module_1_0_1_t mca_pml_base_module_1_0_1_t; +typedef mca_pml_base_module_1_0_1_t mca_pml_base_module_t; /* * Macro for use in components that are of type pml @@ -544,6 +552,10 @@ typedef mca_pml_base_module_1_0_0_t mca_pml_base_module_t; OMPI_DECLSPEC extern mca_pml_base_module_t mca_pml; +static inline bool mca_pml_base_requires_world (void) +{ + return !!(mca_pml.pml_flags & MCA_PML_BASE_FLAG_REQUIRE_WORLD); +} END_C_DECLS #endif /* MCA_PML_H */ diff --git a/ompi/mca/pml/pml_constants.h b/ompi/mca/pml/pml_constants.h index f074378550e..5992b97f81a 100644 --- a/ompi/mca/pml/pml_constants.h +++ b/ompi/mca/pml/pml_constants.h @@ -6,20 +6,20 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2006 The Regents of the University of California. * All rights reserved. * Copyright (c) 2006-2015 Los Alamos National Security, LLC. All rights - * reserved. + * reserved. * Copyright (c) 2011 Sandia National Laboratories. All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ - + #ifndef MCA_PML_CONSTANTS_H #define MCA_PML_CONSTANTS_H diff --git a/ompi/mca/pml/ucx/Makefile.am b/ompi/mca/pml/ucx/Makefile.am new file mode 100644 index 00000000000..0fdd85e2723 --- /dev/null +++ b/ompi/mca/pml/ucx/Makefile.am @@ -0,0 +1,45 @@ +# +# Copyright (C) Mellanox Technologies Ltd. 2001-2015. ALL RIGHTS RESERVED. +# $COPYRIGHT$ +# +# Additional copyrights may follow +# +# $HEADER$ +# + + +# Make the output library in this directory, and name it either +# mca__.la (for DSO builds) or libmca__.la +# (for static builds). + +AM_CPPFLAGS = $(pml_ucx_CPPFLAGS) + +local_sources = \ + pml_ucx.h \ + pml_ucx.c \ + pml_ucx_request.h \ + pml_ucx_request.c \ + pml_ucx_datatype.h \ + pml_ucx_datatype.c \ + pml_ucx_freelist.h \ + pml_ucx_component.c + +if MCA_BUILD_ompi_pml_ucx_DSO +component_noinst = +component_install = mca_pml_ucx.la +else +component_noinst = libmca_pml_ucx.la +component_install = +endif + +mcacomponentdir = $(ompilibdir) +mcacomponent_LTLIBRARIES = $(component_install) +mca_pml_ucx_la_SOURCES = $(local_sources) +mca_pml_ucx_la_LIBADD = $(pml_ucx_LIBS) +mca_pml_ucx_la_LDFLAGS = -module -avoid-version + +noinst_LTLIBRARIES = $(component_noinst) +libmca_pml_ucx_la_SOURCES = $(local_sources) +libmca_pml_ucx_la_LIBADD = $(pml_ucx_LIBS) +libmca_pml_ucx_la_LDFLAGS = -module -avoid-version + diff --git a/ompi/mca/pml/ucx/configure.m4 b/ompi/mca/pml/ucx/configure.m4 new file mode 100644 index 00000000000..9ee0273b398 --- /dev/null +++ b/ompi/mca/pml/ucx/configure.m4 @@ -0,0 +1,30 @@ +# +# Copyright (C) Mellanox Technologies Ltd. 2001-2015. ALL RIGHTS RESERVED. +# $COPYRIGHT$ +# +# Additional copyrights may follow +# +# $HEADER$ +# + + +AC_DEFUN([MCA_ompi_pml_ucx_POST_CONFIG], [ + AS_IF([test "$1" = "1"], [OMPI_REQUIRE_ENDPOINT_TAG([PML])]) +]) + +AC_DEFUN([MCA_ompi_pml_ucx_CONFIG], [ + AC_CONFIG_FILES([ompi/mca/pml/ucx/Makefile]) + + OMPI_CHECK_UCX([pml_ucx], + [pml_ucx_happy="yes"], + [pml_ucx_happy="no"]) + + AS_IF([test "$pml_ucx_happy" = "yes"], + [$1], + [$2]) + + # substitute in the things needed to build ucx + AC_SUBST([pml_ucx_CPPFLAGS]) + AC_SUBST([pml_ucx_LDFLAGS]) + AC_SUBST([pml_ucx_LIBS]) +]) diff --git a/ompi/mca/pml/ucx/pml_ucx.c b/ompi/mca/pml/ucx/pml_ucx.c new file mode 100644 index 00000000000..cf4b49f8304 --- /dev/null +++ b/ompi/mca/pml/ucx/pml_ucx.c @@ -0,0 +1,939 @@ +/* + * Copyright (C) Mellanox Technologies Ltd. 2001-2011. ALL RIGHTS RESERVED. + * Copyright (c) 2016 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include "pml_ucx.h" + +#include "opal/runtime/opal.h" +#include "opal/mca/pmix/pmix.h" +#include "ompi/message/message.h" +#include "ompi/mca/pml/base/pml_base_bsend.h" +#include "pml_ucx_request.h" + +#include + + +#define PML_UCX_TRACE_SEND(_msg, _buf, _count, _datatype, _dst, _tag, _mode, _comm, ...) \ + PML_UCX_VERBOSE(8, _msg " buf %p count %zu type '%s' dst %d tag %d mode %s comm %d '%s'", \ + __VA_ARGS__, \ + (_buf), (_count), (_datatype)->name, (_dst), (_tag), \ + mca_pml_ucx_send_mode_name(_mode), (_comm)->c_contextid, \ + (_comm)->c_name); + +#define PML_UCX_TRACE_RECV(_msg, _buf, _count, _datatype, _src, _tag, _comm, ...) \ + PML_UCX_VERBOSE(8, _msg " buf %p count %zu type '%s' src %d tag %d comm %d '%s'", \ + __VA_ARGS__, \ + (_buf), (_count), (_datatype)->name, (_src), (_tag), \ + (_comm)->c_contextid, (_comm)->c_name); + +#define PML_UCX_TRACE_PROBE(_msg, _src, _tag, _comm) \ + PML_UCX_VERBOSE(8, _msg " src %d tag %d comm %d '%s'", \ + _src, (_tag), (_comm)->c_contextid, (_comm)->c_name); + +#define PML_UCX_TRACE_MRECV(_msg, _buf, _count, _datatype, _message) \ + PML_UCX_VERBOSE(8, _msg " buf %p count %zu type '%s' msg *%p=%p (%p)", \ + (_buf), (_count), (_datatype)->name, (void*)(_message), \ + (void*)*(_message), (*(_message))->req_ptr); + +#define MODEX_KEY "pml-ucx" + +mca_pml_ucx_module_t ompi_pml_ucx = { + { + mca_pml_ucx_add_procs, + mca_pml_ucx_del_procs, + mca_pml_ucx_enable, + NULL, + mca_pml_ucx_add_comm, + mca_pml_ucx_del_comm, + mca_pml_ucx_irecv_init, + mca_pml_ucx_irecv, + mca_pml_ucx_recv, + mca_pml_ucx_isend_init, + mca_pml_ucx_isend, + mca_pml_ucx_send, + mca_pml_ucx_iprobe, + mca_pml_ucx_probe, + mca_pml_ucx_start, + mca_pml_ucx_improbe, + mca_pml_ucx_mprobe, + mca_pml_ucx_imrecv, + mca_pml_ucx_mrecv, + mca_pml_ucx_dump, + NULL, /* FT */ + 1ul << (PML_UCX_TAG_BITS - 1), + 1ul << (PML_UCX_CONTEXT_BITS), + }, + NULL, /* ucp_context */ + NULL /* ucp_worker */ +}; + +static int mca_pml_ucx_send_worker_address(void) +{ + ucp_address_t *address; + ucs_status_t status; + size_t addrlen; + int rc; + + status = ucp_worker_get_address(ompi_pml_ucx.ucp_worker, &address, &addrlen); + if (UCS_OK != status) { + PML_UCX_ERROR("Failed to get worker address"); + return OMPI_ERROR; + } + + OPAL_MODEX_SEND(rc, OPAL_PMIX_GLOBAL, + &mca_pml_ucx_component.pmlm_version, (void*)address, addrlen); + if (OMPI_SUCCESS != rc) { + PML_UCX_ERROR("Open MPI couldn't distribute EP connection details"); + return OMPI_ERROR; + } + + ucp_worker_release_address(ompi_pml_ucx.ucp_worker, address); + + return OMPI_SUCCESS; +} + +static int mca_pml_ucx_recv_worker_address(ompi_proc_t *proc, + ucp_address_t **address_p, + size_t *addrlen_p) +{ + int ret; + + *address_p = NULL; + OPAL_MODEX_RECV(ret, &mca_pml_ucx_component.pmlm_version, &proc->super.proc_name, + (void**)address_p, addrlen_p); + if (ret < 0) { + PML_UCX_ERROR("Failed to receive EP address"); + } + return ret; +} + +int mca_pml_ucx_open(void) +{ + ucp_context_attr_t attr; + ucp_params_t params; + ucp_config_t *config; + ucs_status_t status; + + PML_UCX_VERBOSE(1, "mca_pml_ucx_open"); + + /* Read options */ + status = ucp_config_read("MPI", NULL, &config); + if (UCS_OK != status) { + return OMPI_ERROR; + } + + /* Initialize UCX context */ + params.field_mask = UCP_PARAM_FIELD_FEATURES | + UCP_PARAM_FIELD_REQUEST_SIZE | + UCP_PARAM_FIELD_REQUEST_INIT | + UCP_PARAM_FIELD_REQUEST_CLEANUP | + UCP_PARAM_FIELD_TAG_SENDER_MASK; + params.features = UCP_FEATURE_TAG; + params.request_size = sizeof(ompi_request_t); + params.request_init = mca_pml_ucx_request_init; + params.request_cleanup = mca_pml_ucx_request_cleanup; + params.tag_sender_mask = PML_UCX_SPECIFIC_SOURCE_MASK; + + status = ucp_init(¶ms, config, &ompi_pml_ucx.ucp_context); + ucp_config_release(config); + + if (UCS_OK != status) { + return OMPI_ERROR; + } + + /* Query UCX attributes */ + attr.field_mask = UCP_ATTR_FIELD_REQUEST_SIZE; + status = ucp_context_query(ompi_pml_ucx.ucp_context, &attr); + if (UCS_OK != status) { + ucp_cleanup(ompi_pml_ucx.ucp_context); + ompi_pml_ucx.ucp_context = NULL; + return OMPI_ERROR; + } + + ompi_pml_ucx.request_size = attr.request_size; + + return OMPI_SUCCESS; +} + +int mca_pml_ucx_close(void) +{ + PML_UCX_VERBOSE(1, "mca_pml_ucx_close"); + + if (ompi_pml_ucx.ucp_context != NULL) { + ucp_cleanup(ompi_pml_ucx.ucp_context); + ompi_pml_ucx.ucp_context = NULL; + } + return OMPI_SUCCESS; +} + +int mca_pml_ucx_init(void) +{ + ucp_worker_params_t params; + ucs_status_t status; + int rc; + + PML_UCX_VERBOSE(1, "mca_pml_ucx_init"); + + /* TODO check MPI thread mode */ + params.field_mask = UCP_WORKER_PARAM_FIELD_THREAD_MODE; + params.thread_mode = UCS_THREAD_MODE_SINGLE; + + status = ucp_worker_create(ompi_pml_ucx.ucp_context, ¶ms, + &ompi_pml_ucx.ucp_worker); + if (UCS_OK != status) { + return OMPI_ERROR; + } + + rc = mca_pml_ucx_send_worker_address(); + if (rc < 0) { + return rc; + } + + /* Initialize the free lists */ + OBJ_CONSTRUCT(&ompi_pml_ucx.persistent_reqs, mca_pml_ucx_freelist_t); + OBJ_CONSTRUCT(&ompi_pml_ucx.convs, mca_pml_ucx_freelist_t); + + /* Create a completed request to be returned from isend */ + OBJ_CONSTRUCT(&ompi_pml_ucx.completed_send_req, ompi_request_t); + mca_pml_ucx_completed_request_init(&ompi_pml_ucx.completed_send_req); + + opal_progress_register(mca_pml_ucx_progress); + + PML_UCX_VERBOSE(2, "created ucp context %p, worker %p", + (void *)ompi_pml_ucx.ucp_context, + (void *)ompi_pml_ucx.ucp_worker); + return OMPI_SUCCESS; +} + +int mca_pml_ucx_cleanup(void) +{ + PML_UCX_VERBOSE(1, "mca_pml_ucx_cleanup"); + + opal_progress_unregister(mca_pml_ucx_progress); + + ompi_pml_ucx.completed_send_req.req_state = OMPI_REQUEST_INVALID; + OMPI_REQUEST_FINI(&ompi_pml_ucx.completed_send_req); + OBJ_DESTRUCT(&ompi_pml_ucx.completed_send_req); + + OBJ_DESTRUCT(&ompi_pml_ucx.convs); + OBJ_DESTRUCT(&ompi_pml_ucx.persistent_reqs); + + if (ompi_pml_ucx.ucp_worker) { + ucp_worker_destroy(ompi_pml_ucx.ucp_worker); + ompi_pml_ucx.ucp_worker = NULL; + } + + return OMPI_SUCCESS; +} + +ucp_ep_h mca_pml_ucx_add_proc(ompi_communicator_t *comm, int dst) +{ + ucp_ep_params_t ep_params; + ucp_address_t *address; + ucs_status_t status; + size_t addrlen; + ucp_ep_h ep; + int ret; + + ompi_proc_t *proc0 = ompi_comm_peer_lookup(comm, 0); + ompi_proc_t *proc_peer = ompi_comm_peer_lookup(comm, dst); + + /* Note, mca_pml_base_pml_check_selected, doesn't use 3rd argument */ + if (OMPI_SUCCESS != (ret = mca_pml_base_pml_check_selected("ucx", + &proc0, + dst))) { + return NULL; + } + + ret = mca_pml_ucx_recv_worker_address(proc_peer, &address, &addrlen); + if (ret < 0) { + PML_UCX_ERROR("Failed to receive worker address from proc: %d", proc_peer->super.proc_name.vpid); + return NULL; + } + + PML_UCX_VERBOSE(2, "connecting to proc. %d", proc_peer->super.proc_name.vpid); + + ep_params.field_mask = UCP_EP_PARAM_FIELD_REMOTE_ADDRESS; + ep_params.address = address; + + status = ucp_ep_create(ompi_pml_ucx.ucp_worker, &ep_params, &ep); + free(address); + if (UCS_OK != status) { + PML_UCX_ERROR("Failed to connect to proc: %d, %s", proc_peer->super.proc_name.vpid, + ucs_status_string(status)); + return NULL; + } + + proc_peer->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_PML] = ep; + + return ep; +} + +int mca_pml_ucx_add_procs(struct ompi_proc_t **procs, size_t nprocs) +{ + ucp_ep_params_t ep_params; + ucp_address_t *address; + ucs_status_t status; + ompi_proc_t *proc; + size_t addrlen; + ucp_ep_h ep; + size_t i; + int ret; + + if (OMPI_SUCCESS != (ret = mca_pml_base_pml_check_selected("ucx", + procs, + nprocs))) { + return ret; + } + + for (i = 0; i < nprocs; ++i) { + proc = procs[(i + OMPI_PROC_MY_NAME->vpid) % nprocs]; + + ret = mca_pml_ucx_recv_worker_address(proc, &address, &addrlen); + if (ret < 0) { + PML_UCX_ERROR("Failed to receive worker address from proc: %d", + proc->super.proc_name.vpid); + return ret; + } + + if (proc->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_PML]) { + PML_UCX_VERBOSE(3, "already connected to proc. %d", proc->super.proc_name.vpid); + continue; + } + + PML_UCX_VERBOSE(2, "connecting to proc. %d", proc->super.proc_name.vpid); + + ep_params.field_mask = UCP_EP_PARAM_FIELD_REMOTE_ADDRESS; + ep_params.address = address; + + status = ucp_ep_create(ompi_pml_ucx.ucp_worker, &ep_params, &ep); + free(address); + + if (UCS_OK != status) { + PML_UCX_ERROR("Failed to connect to proc: %d, %s", proc->super.proc_name.vpid, + ucs_status_string(status)); + return OMPI_ERROR; + } + + proc->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_PML] = ep; + } + + return OMPI_SUCCESS; +} + +static void mca_pml_ucx_waitall(void **reqs, size_t *count_p) +{ + ucs_status_t status; + size_t i; + + PML_UCX_VERBOSE(2, "waiting for %d disconnect requests", (int)*count_p); + for (i = 0; i < *count_p; ++i) { + do { + opal_progress(); + status = ucp_request_test(reqs[i], NULL); + } while (status == UCS_INPROGRESS); + if (status != UCS_OK) { + PML_UCX_ERROR("disconnect request failed: %s", + ucs_status_string(status)); + } + ucp_request_free(reqs[i]); + reqs[i] = NULL; + } + + *count_p = 0; +} + +int mca_pml_ucx_del_procs(struct ompi_proc_t **procs, size_t nprocs) +{ + ompi_proc_t *proc; + size_t num_reqs, max_reqs; + void *dreq, **dreqs; + ucp_ep_h ep; + size_t i; + + max_reqs = ompi_pml_ucx.num_disconnect; + if (max_reqs > nprocs) { + max_reqs = nprocs; + } + + dreqs = malloc(sizeof(*dreqs) * max_reqs); + if (dreqs == NULL) { + return OMPI_ERR_OUT_OF_RESOURCE; + } + + num_reqs = 0; + + for (i = 0; i < nprocs; ++i) { + proc = procs[(i + OMPI_PROC_MY_NAME->vpid) % nprocs]; + ep = proc->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_PML]; + if (ep == NULL) { + continue; + } + + PML_UCX_VERBOSE(2, "disconnecting from rank %d", proc->super.proc_name.vpid); + dreq = ucp_disconnect_nb(ep); + if (dreq != NULL) { + if (UCS_PTR_IS_ERR(dreq)) { + PML_UCX_ERROR("ucp_disconnect_nb(%d) failed: %s", + proc->super.proc_name.vpid, + ucs_status_string(UCS_PTR_STATUS(dreq))); + } else { + dreqs[num_reqs++] = dreq; + } + } + + proc->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_PML] = NULL; + + if ((int)num_reqs >= ompi_pml_ucx.num_disconnect) { + mca_pml_ucx_waitall(dreqs, &num_reqs); + } + } + + mca_pml_ucx_waitall(dreqs, &num_reqs); + free(dreqs); + + opal_pmix.fence(NULL, 0); + + return OMPI_SUCCESS; +} + +int mca_pml_ucx_enable(bool enable) +{ + PML_UCX_FREELIST_INIT(&ompi_pml_ucx.persistent_reqs, + mca_pml_ucx_persistent_request_t, + 128, -1, 128); + PML_UCX_FREELIST_INIT(&ompi_pml_ucx.convs, + mca_pml_ucx_convertor_t, + 128, -1, 128); + return OMPI_SUCCESS; +} + +int mca_pml_ucx_progress(void) +{ + ucp_worker_progress(ompi_pml_ucx.ucp_worker); + return OMPI_SUCCESS; +} + +int mca_pml_ucx_add_comm(struct ompi_communicator_t* comm) +{ + return OMPI_SUCCESS; +} + +int mca_pml_ucx_del_comm(struct ompi_communicator_t* comm) +{ + return OMPI_SUCCESS; +} + +int mca_pml_ucx_irecv_init(void *buf, size_t count, ompi_datatype_t *datatype, + int src, int tag, struct ompi_communicator_t* comm, + struct ompi_request_t **request) +{ + mca_pml_ucx_persistent_request_t *req; + + req = (mca_pml_ucx_persistent_request_t *)PML_UCX_FREELIST_GET(&ompi_pml_ucx.persistent_reqs); + if (req == NULL) { + return OMPI_ERR_OUT_OF_RESOURCE; + } + + PML_UCX_TRACE_RECV("irecv_init request *%p=%p", buf, count, datatype, src, + tag, comm, (void*)request, (void*)req); + + req->ompi.req_state = OMPI_REQUEST_INACTIVE; + req->flags = 0; + req->buffer = buf; + req->count = count; + req->datatype = mca_pml_ucx_get_datatype(datatype); + + PML_UCX_MAKE_RECV_TAG(req->tag, req->recv.tag_mask, tag, src, comm); + + *request = &req->ompi; + return OMPI_SUCCESS; +} + +int mca_pml_ucx_irecv(void *buf, size_t count, ompi_datatype_t *datatype, + int src, int tag, struct ompi_communicator_t* comm, + struct ompi_request_t **request) +{ + ucp_tag_t ucp_tag, ucp_tag_mask; + ompi_request_t *req; + + PML_UCX_TRACE_RECV("irecv request *%p", buf, count, datatype, src, tag, comm, + (void*)request); + + PML_UCX_MAKE_RECV_TAG(ucp_tag, ucp_tag_mask, tag, src, comm); + req = (ompi_request_t*)ucp_tag_recv_nb(ompi_pml_ucx.ucp_worker, buf, count, + mca_pml_ucx_get_datatype(datatype), + ucp_tag, ucp_tag_mask, + mca_pml_ucx_recv_completion); + if (UCS_PTR_IS_ERR(req)) { + PML_UCX_ERROR("ucx recv failed: %s", ucs_status_string(UCS_PTR_STATUS(req))); + return OMPI_ERROR; + } + + PML_UCX_VERBOSE(8, "got request %p", (void*)req); + *request = req; + return OMPI_SUCCESS; +} + +int mca_pml_ucx_recv(void *buf, size_t count, ompi_datatype_t *datatype, int src, + int tag, struct ompi_communicator_t* comm, + ompi_status_public_t* mpi_status) +{ + ucp_tag_t ucp_tag, ucp_tag_mask; + ucp_tag_recv_info_t info; + ucs_status_t status; + void *req; + + PML_UCX_TRACE_RECV("%s", buf, count, datatype, src, tag, comm, "recv"); + + PML_UCX_MAKE_RECV_TAG(ucp_tag, ucp_tag_mask, tag, src, comm); + req = (char *)alloca(ompi_pml_ucx.request_size) + ompi_pml_ucx.request_size; + status = ucp_tag_recv_nbr(ompi_pml_ucx.ucp_worker, buf, count, + mca_pml_ucx_get_datatype(datatype), + ucp_tag, ucp_tag_mask, req); + + ucp_worker_progress(ompi_pml_ucx.ucp_worker); + for (;;) { + status = ucp_request_test(req, &info); + if (status != UCS_INPROGRESS) { + mca_pml_ucx_set_recv_status_safe(mpi_status, status, &info); + return OMPI_SUCCESS; + } + opal_progress(); + } +} + +static inline const char *mca_pml_ucx_send_mode_name(mca_pml_base_send_mode_t mode) +{ + switch (mode) { + case MCA_PML_BASE_SEND_SYNCHRONOUS: + return "sync"; + case MCA_PML_BASE_SEND_COMPLETE: + return "complete"; + case MCA_PML_BASE_SEND_BUFFERED: + return "buffered"; + case MCA_PML_BASE_SEND_READY: + return "ready"; + case MCA_PML_BASE_SEND_STANDARD: + return "standard"; + case MCA_PML_BASE_SEND_SIZE: + return "size"; + default: + return "unknown"; + } +} + +int mca_pml_ucx_isend_init(const void *buf, size_t count, ompi_datatype_t *datatype, + int dst, int tag, mca_pml_base_send_mode_t mode, + struct ompi_communicator_t* comm, + struct ompi_request_t **request) +{ + mca_pml_ucx_persistent_request_t *req; + ucp_ep_h ep; + + req = (mca_pml_ucx_persistent_request_t *)PML_UCX_FREELIST_GET(&ompi_pml_ucx.persistent_reqs); + if (req == NULL) { + return OMPI_ERR_OUT_OF_RESOURCE; + } + + PML_UCX_TRACE_SEND("isend_init request *%p=%p", buf, count, datatype, dst, + tag, mode, comm, (void*)request, (void*)req) + + ep = mca_pml_ucx_get_ep(comm, dst); + if (OPAL_UNLIKELY(NULL == ep)) { + PML_UCX_ERROR("Failed to get ep for rank %d", dst); + return OMPI_ERROR; + } + + req->ompi.req_state = OMPI_REQUEST_INACTIVE; + req->flags = MCA_PML_UCX_REQUEST_FLAG_SEND; + req->buffer = (void *)buf; + req->count = count; + req->tag = PML_UCX_MAKE_SEND_TAG(tag, comm); + req->send.mode = mode; + req->send.ep = ep; + if (MCA_PML_BASE_SEND_BUFFERED == mode) { + req->ompi_datatype = datatype; + OBJ_RETAIN(datatype); + } else { + req->datatype = mca_pml_ucx_get_datatype(datatype); + } + + *request = &req->ompi; + return OMPI_SUCCESS; +} + +static int +mca_pml_ucx_bsend(ucp_ep_h ep, const void *buf, size_t count, + ompi_datatype_t *datatype, uint64_t pml_tag) +{ + ompi_request_t *req; + void *packed_data; + size_t packed_length; + size_t offset; + uint32_t iov_count; + struct iovec iov; + opal_convertor_t opal_conv; + + OBJ_CONSTRUCT(&opal_conv, opal_convertor_t); + opal_convertor_copy_and_prepare_for_send(ompi_proc_local_proc->super.proc_convertor, + &datatype->super, count, buf, 0, + &opal_conv); + opal_convertor_get_packed_size(&opal_conv, &packed_length); + + packed_data = mca_pml_base_bsend_request_alloc_buf(packed_length); + if (OPAL_UNLIKELY(NULL == packed_data)) { + OBJ_DESTRUCT(&opal_conv); + PML_UCX_ERROR("bsend: failed to allocate buffer"); + return OMPI_ERR_OUT_OF_RESOURCE; + } + + iov_count = 1; + iov.iov_base = packed_data; + iov.iov_len = packed_length; + + PML_UCX_VERBOSE(8, "bsend of packed buffer %p len %d", packed_data, packed_length); + offset = 0; + opal_convertor_set_position(&opal_conv, &offset); + if (0 > opal_convertor_pack(&opal_conv, &iov, &iov_count, &packed_length)) { + mca_pml_base_bsend_request_free(packed_data); + OBJ_DESTRUCT(&opal_conv); + PML_UCX_ERROR("bsend: failed to pack user datatype"); + return OMPI_ERROR; + } + + OBJ_DESTRUCT(&opal_conv); + + req = (ompi_request_t*)ucp_tag_send_nb(ep, packed_data, packed_length, + ucp_dt_make_contig(1), pml_tag, + mca_pml_ucx_bsend_completion); + if (NULL == req) { + /* request was completed in place */ + mca_pml_base_bsend_request_free(packed_data); + return OMPI_SUCCESS; + } + + if (OPAL_UNLIKELY(UCS_PTR_IS_ERR(req))) { + mca_pml_base_bsend_request_free(packed_data); + PML_UCX_ERROR("ucx bsend failed: %s", ucs_status_string(UCS_PTR_STATUS(req))); + return OMPI_ERROR; + } + + req->req_complete_cb_data = packed_data; + return OMPI_SUCCESS; +} + +int mca_pml_ucx_isend(const void *buf, size_t count, ompi_datatype_t *datatype, + int dst, int tag, mca_pml_base_send_mode_t mode, + struct ompi_communicator_t* comm, + struct ompi_request_t **request) +{ + ompi_request_t *req; + ucp_ep_h ep; + + PML_UCX_TRACE_SEND("i%ssend request *%p", + buf, count, datatype, dst, tag, mode, comm, + mode == MCA_PML_BASE_SEND_BUFFERED ? "b" : "", + (void*)request) + + /* TODO special care to sync/buffered send */ + + ep = mca_pml_ucx_get_ep(comm, dst); + if (OPAL_UNLIKELY(NULL == ep)) { + PML_UCX_ERROR("Failed to get ep for rank %d", dst); + return OMPI_ERROR; + } + + /* Special care to sync/buffered send */ + if (OPAL_UNLIKELY(MCA_PML_BASE_SEND_BUFFERED == mode)) { + *request = &ompi_pml_ucx.completed_send_req; + return mca_pml_ucx_bsend(ep, buf, count, datatype, + PML_UCX_MAKE_SEND_TAG(tag, comm)); + } + + req = (ompi_request_t*)ucp_tag_send_nb(ep, buf, count, + mca_pml_ucx_get_datatype(datatype), + PML_UCX_MAKE_SEND_TAG(tag, comm), + mca_pml_ucx_send_completion); + if (req == NULL) { + PML_UCX_VERBOSE(8, "returning completed request"); + *request = &ompi_pml_ucx.completed_send_req; + return OMPI_SUCCESS; + } else if (!UCS_PTR_IS_ERR(req)) { + PML_UCX_VERBOSE(8, "got request %p", (void*)req); + *request = req; + return OMPI_SUCCESS; + } else { + PML_UCX_ERROR("ucx send failed: %s", ucs_status_string(UCS_PTR_STATUS(req))); + return OMPI_ERROR; + } +} + +int mca_pml_ucx_send(const void *buf, size_t count, ompi_datatype_t *datatype, int dst, + int tag, mca_pml_base_send_mode_t mode, + struct ompi_communicator_t* comm) +{ + ompi_request_t *req; + ucp_ep_h ep; + + PML_UCX_TRACE_SEND("%s", buf, count, datatype, dst, tag, mode, comm, + mode == MCA_PML_BASE_SEND_BUFFERED ? "bsend" : "send"); + + ep = mca_pml_ucx_get_ep(comm, dst); + if (OPAL_UNLIKELY(NULL == ep)) { + PML_UCX_ERROR("Failed to get ep for rank %d", dst); + return OMPI_ERROR; + } + + /* Special care to sync/buffered send */ + if (OPAL_UNLIKELY(MCA_PML_BASE_SEND_BUFFERED == mode)) { + return mca_pml_ucx_bsend(ep, buf, count, datatype, + PML_UCX_MAKE_SEND_TAG(tag, comm)); + } + + req = (ompi_request_t*)ucp_tag_send_nb(ep, buf, count, + mca_pml_ucx_get_datatype(datatype), + PML_UCX_MAKE_SEND_TAG(tag, comm), + mca_pml_ucx_send_completion); + if (OPAL_LIKELY(req == NULL)) { + return OMPI_SUCCESS; + } else if (!UCS_PTR_IS_ERR(req)) { + PML_UCX_VERBOSE(8, "got request %p", (void*)req); + ucp_worker_progress(ompi_pml_ucx.ucp_worker); + ompi_request_wait(&req, MPI_STATUS_IGNORE); + return OMPI_SUCCESS; + } else { + PML_UCX_ERROR("ucx send failed: %s", ucs_status_string(UCS_PTR_STATUS(req))); + return OMPI_ERROR; + } +} + +int mca_pml_ucx_iprobe(int src, int tag, struct ompi_communicator_t* comm, + int *matched, ompi_status_public_t* mpi_status) +{ + ucp_tag_t ucp_tag, ucp_tag_mask; + ucp_tag_recv_info_t info; + ucp_tag_message_h ucp_msg; + + PML_UCX_TRACE_PROBE("iprobe", src, tag, comm); + + PML_UCX_MAKE_RECV_TAG(ucp_tag, ucp_tag_mask, tag, src, comm); + ucp_msg = ucp_tag_probe_nb(ompi_pml_ucx.ucp_worker, ucp_tag, ucp_tag_mask, + 0, &info); + if (ucp_msg != NULL) { + *matched = 1; + mca_pml_ucx_set_recv_status_safe(mpi_status, UCS_OK, &info); + } else { + opal_progress(); + *matched = 0; + } + return OMPI_SUCCESS; +} + +int mca_pml_ucx_probe(int src, int tag, struct ompi_communicator_t* comm, + ompi_status_public_t* mpi_status) +{ + ucp_tag_t ucp_tag, ucp_tag_mask; + ucp_tag_recv_info_t info; + ucp_tag_message_h ucp_msg; + + PML_UCX_TRACE_PROBE("probe", src, tag, comm); + + PML_UCX_MAKE_RECV_TAG(ucp_tag, ucp_tag_mask, tag, src, comm); + for (;;) { + ucp_msg = ucp_tag_probe_nb(ompi_pml_ucx.ucp_worker, ucp_tag, ucp_tag_mask, + 0, &info); + if (ucp_msg != NULL) { + mca_pml_ucx_set_recv_status_safe(mpi_status, UCS_OK, &info); + return OMPI_SUCCESS; + } + + opal_progress(); + } +} + +int mca_pml_ucx_improbe(int src, int tag, struct ompi_communicator_t* comm, + int *matched, struct ompi_message_t **message, + ompi_status_public_t* mpi_status) +{ + ucp_tag_t ucp_tag, ucp_tag_mask; + ucp_tag_recv_info_t info; + ucp_tag_message_h ucp_msg; + + PML_UCX_TRACE_PROBE("improbe", src, tag, comm); + + PML_UCX_MAKE_RECV_TAG(ucp_tag, ucp_tag_mask, tag, src, comm); + ucp_msg = ucp_tag_probe_nb(ompi_pml_ucx.ucp_worker, ucp_tag, ucp_tag_mask, + 1, &info); + if (ucp_msg != NULL) { + PML_UCX_MESSAGE_NEW(comm, ucp_msg, &info, message); + PML_UCX_VERBOSE(8, "got message %p (%p)", (void*)*message, (void*)ucp_msg); + *matched = 1; + mca_pml_ucx_set_recv_status_safe(mpi_status, UCS_OK, &info); + } else { + opal_progress(); + *matched = 0; + } + return OMPI_SUCCESS; +} + +int mca_pml_ucx_mprobe(int src, int tag, struct ompi_communicator_t* comm, + struct ompi_message_t **message, + ompi_status_public_t* mpi_status) +{ + ucp_tag_t ucp_tag, ucp_tag_mask; + ucp_tag_recv_info_t info; + ucp_tag_message_h ucp_msg; + + PML_UCX_TRACE_PROBE("mprobe", src, tag, comm); + + PML_UCX_MAKE_RECV_TAG(ucp_tag, ucp_tag_mask, tag, src, comm); + for (;;) { + ucp_msg = ucp_tag_probe_nb(ompi_pml_ucx.ucp_worker, ucp_tag, ucp_tag_mask, + 1, &info); + if (ucp_msg != NULL) { + PML_UCX_MESSAGE_NEW(comm, ucp_msg, &info, message); + PML_UCX_VERBOSE(8, "got message %p (%p)", (void*)*message, (void*)ucp_msg); + mca_pml_ucx_set_recv_status_safe(mpi_status, UCS_OK, &info); + return OMPI_SUCCESS; + } + + opal_progress(); + } +} + +int mca_pml_ucx_imrecv(void *buf, size_t count, ompi_datatype_t *datatype, + struct ompi_message_t **message, + struct ompi_request_t **request) +{ + ompi_request_t *req; + + PML_UCX_TRACE_MRECV("imrecv", buf, count, datatype, message); + + req = (ompi_request_t*)ucp_tag_msg_recv_nb(ompi_pml_ucx.ucp_worker, buf, count, + mca_pml_ucx_get_datatype(datatype), + (*message)->req_ptr, + mca_pml_ucx_recv_completion); + if (UCS_PTR_IS_ERR(req)) { + PML_UCX_ERROR("ucx msg recv failed: %s", ucs_status_string(UCS_PTR_STATUS(req))); + return OMPI_ERROR; + } + + PML_UCX_VERBOSE(8, "got request %p", (void*)req); + PML_UCX_MESSAGE_RELEASE(message); + *request = req; + return OMPI_SUCCESS; +} + +int mca_pml_ucx_mrecv(void *buf, size_t count, ompi_datatype_t *datatype, + struct ompi_message_t **message, + ompi_status_public_t* status) +{ + ompi_request_t *req; + + PML_UCX_TRACE_MRECV("mrecv", buf, count, datatype, message); + + req = (ompi_request_t*)ucp_tag_msg_recv_nb(ompi_pml_ucx.ucp_worker, buf, count, + mca_pml_ucx_get_datatype(datatype), + (*message)->req_ptr, + mca_pml_ucx_recv_completion); + if (UCS_PTR_IS_ERR(req)) { + PML_UCX_ERROR("ucx msg recv failed: %s", ucs_status_string(UCS_PTR_STATUS(req))); + return OMPI_ERROR; + } + + PML_UCX_MESSAGE_RELEASE(message); + + ompi_request_wait(&req, status); + return OMPI_SUCCESS; +} + +int mca_pml_ucx_start(size_t count, ompi_request_t** requests) +{ + mca_pml_ucx_persistent_request_t *preq; + ompi_request_t *tmp_req; + size_t i; + int rc; + + for (i = 0; i < count; ++i) { + preq = (mca_pml_ucx_persistent_request_t *)requests[i]; + + if ((preq == NULL) || (OMPI_REQUEST_PML != preq->ompi.req_type)) { + /* Skip irrelevant requests */ + continue; + } + + PML_UCX_ASSERT(preq->ompi.req_state != OMPI_REQUEST_INVALID); + preq->ompi.req_state = OMPI_REQUEST_ACTIVE; + mca_pml_ucx_request_reset(&preq->ompi); + + if (preq->flags & MCA_PML_UCX_REQUEST_FLAG_SEND) { + if (OPAL_UNLIKELY(MCA_PML_BASE_SEND_BUFFERED == preq->send.mode)) { + PML_UCX_VERBOSE(8, "start bsend request %p", (void*)preq); + rc = mca_pml_ucx_bsend(preq->send.ep, preq->buffer, preq->count, + preq->ompi_datatype, preq->tag); + if (OMPI_SUCCESS != rc) { + return rc; + } + /* pretend that we got immediate completion */ + tmp_req = NULL; + } else { + PML_UCX_VERBOSE(8, "start send request %p", (void*)preq); + tmp_req = (ompi_request_t*)ucp_tag_send_nb(preq->send.ep, preq->buffer, + preq->count, preq->datatype, + preq->tag, + mca_pml_ucx_psend_completion); + } + } else { + PML_UCX_VERBOSE(8, "start recv request %p", (void*)preq); + tmp_req = (ompi_request_t*)ucp_tag_recv_nb(ompi_pml_ucx.ucp_worker, + preq->buffer, preq->count, + preq->datatype, preq->tag, + preq->recv.tag_mask, + mca_pml_ucx_precv_completion); + } + + if (tmp_req == NULL) { + /* Only send can complete immediately */ + PML_UCX_ASSERT(preq->flags & MCA_PML_UCX_REQUEST_FLAG_SEND); + + PML_UCX_VERBOSE(8, "send completed immediately, completing persistent request %p", + (void*)preq); + mca_pml_ucx_set_send_status(&preq->ompi.req_status, UCS_OK); + ompi_request_complete(&preq->ompi, true); + } else if (!UCS_PTR_IS_ERR(tmp_req)) { + if (REQUEST_COMPLETE(tmp_req)) { + /* tmp_req is already completed */ + PML_UCX_VERBOSE(8, "completing persistent request %p", (void*)preq); + mca_pml_ucx_persistent_request_complete(preq, tmp_req); + } else { + /* tmp_req would be completed by callback and trigger completion + * of preq */ + PML_UCX_VERBOSE(8, "temporary request %p will complete persistent request %p", + (void*)tmp_req, (void*)preq); + tmp_req->req_complete_cb_data = preq; + preq->tmp_req = tmp_req; + } + } else { + PML_UCX_ERROR("ucx %s failed: %s", + (preq->flags & MCA_PML_UCX_REQUEST_FLAG_SEND) ? "send" : "recv", + ucs_status_string(UCS_PTR_STATUS(tmp_req))); + return OMPI_ERROR; + } + } + + return OMPI_SUCCESS; +} + +int mca_pml_ucx_dump(struct ompi_communicator_t* comm, int verbose) +{ + return OMPI_SUCCESS; +} diff --git a/ompi/mca/pml/ucx/pml_ucx.h b/ompi/mca/pml/ucx/pml_ucx.h new file mode 100644 index 00000000000..44320b2a48e --- /dev/null +++ b/ompi/mca/pml/ucx/pml_ucx.h @@ -0,0 +1,153 @@ +/* + * Copyright (C) Mellanox Technologies Ltd. 2001-2011. ALL RIGHTS RESERVED. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#ifndef PML_UCX_H_ +#define PML_UCX_H_ + +#include "ompi_config.h" +#include "ompi/request/request.h" +#include "ompi/mca/pml/pml.h" +#include "ompi/mca/pml/base/base.h" +#include "ompi/datatype/ompi_datatype.h" +#include "ompi/communicator/communicator.h" +#include "ompi/request/request.h" + +#include +#include "pml_ucx_freelist.h" + + +typedef struct mca_pml_ucx_module mca_pml_ucx_module_t; +typedef struct pml_ucx_persistent_request mca_pml_ucx_persistent_request_t; +typedef struct pml_ucx_convertor mca_pml_ucx_convertor_t; + +/* + * TODO version check + */ + +struct mca_pml_ucx_module { + mca_pml_base_module_t super; + + /* UCX global objects */ + ucp_context_h ucp_context; + ucp_worker_h ucp_worker; + + /* Requests */ + mca_pml_ucx_freelist_t persistent_reqs; + ompi_request_t completed_send_req; + size_t request_size; + int num_disconnect; + + /* Converters pool */ + mca_pml_ucx_freelist_t convs; + + int priority; + int verbose; + int output; +}; + +extern mca_pml_base_component_2_0_0_t mca_pml_ucx_component; +extern mca_pml_ucx_module_t ompi_pml_ucx; + + +/* Debugging */ +#define PML_UCX_ENABLE_DEBUG OPAL_ENABLE_DEBUG +#if PML_UCX_ENABLE_DEBUG +# define PML_UCX_MAX_VERBOSE 9 +# define PML_UCX_ASSERT(_x) assert(_x) +#else +# define PML_UCX_MAX_VERBOSE 2 +# define PML_UCX_ASSERT(_x) +#endif + +#define _PML_UCX_QUOTE(_x) \ + # _x +#define PML_UCX_QUOTE(_x) \ + _PML_UCX_QUOTE(_x) + +#define PML_UCX_ERROR(...) \ + opal_output_verbose(0, ompi_pml_ucx.output, \ + __FILE__ ":" PML_UCX_QUOTE(__LINE__) \ + " Error: " __VA_ARGS__) + +#define PML_UCX_VERBOSE(_level, ... ) \ + if (((_level) <= PML_UCX_MAX_VERBOSE) && ((_level) <= ompi_pml_ucx.verbose)) { \ + opal_output_verbose(_level, ompi_pml_ucx.output, \ + __FILE__ ":" PML_UCX_QUOTE(__LINE__) " " \ + __VA_ARGS__); \ + } + +int mca_pml_ucx_open(void); +int mca_pml_ucx_close(void); +int mca_pml_ucx_init(void); +int mca_pml_ucx_cleanup(void); + +ucp_ep_h mca_pml_ucx_add_proc(ompi_communicator_t *comm, int dst); +int mca_pml_ucx_add_procs(struct ompi_proc_t **procs, size_t nprocs); +int mca_pml_ucx_del_procs(struct ompi_proc_t **procs, size_t nprocs); + +int mca_pml_ucx_enable(bool enable); +int mca_pml_ucx_progress(void); + +int mca_pml_ucx_add_comm(struct ompi_communicator_t* comm); +int mca_pml_ucx_del_comm(struct ompi_communicator_t* comm); + +int mca_pml_ucx_irecv_init(void *buf, size_t count, ompi_datatype_t *datatype, + int src, int tag, struct ompi_communicator_t* comm, + struct ompi_request_t **request); + +int mca_pml_ucx_irecv(void *buf, size_t count, ompi_datatype_t *datatype, + int src, int tag, struct ompi_communicator_t* comm, + struct ompi_request_t **request); + +int mca_pml_ucx_recv(void *buf, size_t count, ompi_datatype_t *datatype, int src, + int tag, struct ompi_communicator_t* comm, + ompi_status_public_t* status); + +int mca_pml_ucx_isend_init(const void *buf, size_t count, ompi_datatype_t *datatype, + int dst, int tag, mca_pml_base_send_mode_t mode, + struct ompi_communicator_t* comm, + struct ompi_request_t **request); + +int mca_pml_ucx_isend(const void *buf, size_t count, ompi_datatype_t *datatype, + int dst, int tag, mca_pml_base_send_mode_t mode, + struct ompi_communicator_t* comm, + struct ompi_request_t **request); + +int mca_pml_ucx_send(const void *buf, size_t count, ompi_datatype_t *datatype, int dst, + int tag, mca_pml_base_send_mode_t mode, + struct ompi_communicator_t* comm); + +int mca_pml_ucx_iprobe(int src, int tag, struct ompi_communicator_t* comm, + int *matched, ompi_status_public_t* status); + +int mca_pml_ucx_probe(int src, int tag, struct ompi_communicator_t* comm, + ompi_status_public_t* status); + +int mca_pml_ucx_improbe(int src, int tag, struct ompi_communicator_t* comm, + int *matched, struct ompi_message_t **message, + ompi_status_public_t* status); + +int mca_pml_ucx_mprobe(int src, int tag, struct ompi_communicator_t* comm, + struct ompi_message_t **message, + ompi_status_public_t* status); + +int mca_pml_ucx_imrecv(void *buf, size_t count, ompi_datatype_t *datatype, + struct ompi_message_t **message, + struct ompi_request_t **request); + +int mca_pml_ucx_mrecv(void *buf, size_t count, ompi_datatype_t *datatype, + struct ompi_message_t **message, + ompi_status_public_t* status); + +int mca_pml_ucx_start(size_t count, ompi_request_t** requests); + +int mca_pml_ucx_dump(struct ompi_communicator_t* comm, int verbose); + + +#endif /* PML_UCX_H_ */ diff --git a/ompi/mca/pml/ucx/pml_ucx_component.c b/ompi/mca/pml/ucx/pml_ucx_component.c new file mode 100644 index 00000000000..528bfd871ed --- /dev/null +++ b/ompi/mca/pml/ucx/pml_ucx_component.c @@ -0,0 +1,114 @@ +/* + * Copyright (C) Mellanox Technologies Ltd. 2001-2011. ALL RIGHTS RESERVED. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include "pml_ucx.h" + + +static int mca_pml_ucx_component_register(void); +static int mca_pml_ucx_component_open(void); +static int mca_pml_ucx_component_close(void); + +static mca_pml_base_module_t* +mca_pml_ucx_component_init(int* priority, bool enable_progress_threads, + bool enable_mpi_threads); +static int mca_pml_ucx_component_fini(void); + + +mca_pml_base_component_2_0_0_t mca_pml_ucx_component = { + + /* First, the mca_base_component_t struct containing meta + * information about the component itself */ + { + MCA_PML_BASE_VERSION_2_0_0, + + "ucx", /* MCA component name */ + OMPI_MAJOR_VERSION, /* MCA component major version */ + OMPI_MINOR_VERSION, /* MCA component minor version */ + OMPI_RELEASE_VERSION, /* MCA component release version */ + mca_pml_ucx_component_open, /* component open */ + mca_pml_ucx_component_close, /* component close */ + NULL, + mca_pml_ucx_component_register, + }, + { + /* This component is not checkpoint ready */ + MCA_BASE_METADATA_PARAM_NONE + }, + + mca_pml_ucx_component_init, /* component init */ + mca_pml_ucx_component_fini /* component finalize */ +}; + +static int mca_pml_ucx_component_register(void) +{ + ompi_pml_ucx.verbose = 0; + (void) mca_base_component_var_register(&mca_pml_ucx_component.pmlm_version, "verbose", + "Verbose level of the UCX component", + MCA_BASE_VAR_TYPE_INT, NULL, 0, 0, + OPAL_INFO_LVL_9, + MCA_BASE_VAR_SCOPE_LOCAL, + &ompi_pml_ucx.verbose); + + ompi_pml_ucx.priority = 5; + (void) mca_base_component_var_register(&mca_pml_ucx_component.pmlm_version, "priority", + "Priority of the UCX component", + MCA_BASE_VAR_TYPE_INT, NULL, 0, 0, + OPAL_INFO_LVL_3, + MCA_BASE_VAR_SCOPE_LOCAL, + &ompi_pml_ucx.priority); + + ompi_pml_ucx.num_disconnect = 1; + (void) mca_base_component_var_register(&mca_pml_ucx_component.pmlm_version, "num_disconnect", + "How many disconnects to do in parallel", + MCA_BASE_VAR_TYPE_INT, NULL, 0, 0, + OPAL_INFO_LVL_3, + MCA_BASE_VAR_SCOPE_LOCAL, + &ompi_pml_ucx.num_disconnect); + return 0; +} + +static int mca_pml_ucx_component_open(void) +{ + ompi_pml_ucx.output = opal_output_open(NULL); + opal_output_set_verbosity(ompi_pml_ucx.output, ompi_pml_ucx.verbose); + return mca_pml_ucx_open(); +} + +static int mca_pml_ucx_component_close(void) +{ + int rc; + + rc = mca_pml_ucx_close(); + if (rc != 0) { + return rc; + } + + opal_output_close(ompi_pml_ucx.output); + return 0; +} + +static mca_pml_base_module_t* +mca_pml_ucx_component_init(int* priority, bool enable_progress_threads, + bool enable_mpi_threads) +{ + int ret; + + if ( (ret = mca_pml_ucx_init()) != 0) { + return NULL; + } + + *priority = ompi_pml_ucx.priority; + return &ompi_pml_ucx.super; +} + +static int mca_pml_ucx_component_fini(void) +{ + return mca_pml_ucx_cleanup(); +} + diff --git a/ompi/mca/pml/ucx/pml_ucx_datatype.c b/ompi/mca/pml/ucx/pml_ucx_datatype.c new file mode 100644 index 00000000000..488642fcda8 --- /dev/null +++ b/ompi/mca/pml/ucx/pml_ucx_datatype.c @@ -0,0 +1,157 @@ +/* + * Copyright (C) Mellanox Technologies Ltd. 2001-2011. ALL RIGHTS RESERVED. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include "pml_ucx_datatype.h" + +#include "ompi/runtime/mpiruntime.h" + +#include + + +static void* pml_ucx_generic_datatype_start_pack(void *context, const void *buffer, + size_t count) +{ + ompi_datatype_t *datatype = context; + mca_pml_ucx_convertor_t *convertor; + + convertor = (mca_pml_ucx_convertor_t *)PML_UCX_FREELIST_GET(&ompi_pml_ucx.convs); + + OBJ_RETAIN(datatype); + convertor->datatype = datatype; + opal_convertor_copy_and_prepare_for_send(ompi_proc_local_proc->super.proc_convertor, + &datatype->super, count, buffer, 0, + &convertor->opal_conv); + return convertor; +} + +static void* pml_ucx_generic_datatype_start_unpack(void *context, void *buffer, + size_t count) +{ + ompi_datatype_t *datatype = context; + mca_pml_ucx_convertor_t *convertor; + + convertor = (mca_pml_ucx_convertor_t *)PML_UCX_FREELIST_GET(&ompi_pml_ucx.convs); + + OBJ_RETAIN(datatype); + convertor->datatype = datatype; + opal_convertor_copy_and_prepare_for_recv(ompi_proc_local_proc->super.proc_convertor, + &datatype->super, count, buffer, 0, + &convertor->opal_conv); + return convertor; +} + +static size_t pml_ucx_generic_datatype_packed_size(void *state) +{ + mca_pml_ucx_convertor_t *convertor = state; + size_t size; + + opal_convertor_get_packed_size(&convertor->opal_conv, &size); + return size; +} + +static size_t pml_ucx_generic_datatype_pack(void *state, size_t offset, + void *dest, size_t max_length) +{ + mca_pml_ucx_convertor_t *convertor = state; + uint32_t iov_count; + struct iovec iov; + size_t length; + + iov_count = 1; + iov.iov_base = dest; + iov.iov_len = max_length; + + opal_convertor_set_position(&convertor->opal_conv, &offset); + length = max_length; + opal_convertor_pack(&convertor->opal_conv, &iov, &iov_count, &length); + return length; +} + +static ucs_status_t pml_ucx_generic_datatype_unpack(void *state, size_t offset, + const void *src, size_t length) +{ + mca_pml_ucx_convertor_t *convertor = state; + + uint32_t iov_count; + struct iovec iov; + + iov_count = 1; + iov.iov_base = (void*)src; + iov.iov_len = length; + + opal_convertor_set_position(&convertor->opal_conv, &offset); + opal_convertor_unpack(&convertor->opal_conv, &iov, &iov_count, &length); + return UCS_OK; +} + +static void pml_ucx_generic_datatype_finish(void *state) +{ + mca_pml_ucx_convertor_t *convertor = state; + + opal_convertor_cleanup(&convertor->opal_conv); + OBJ_RELEASE(convertor->datatype); + PML_UCX_FREELIST_RETURN(&ompi_pml_ucx.convs, &convertor->super); +} + +static ucp_generic_dt_ops_t pml_ucx_generic_datatype_ops = { + .start_pack = pml_ucx_generic_datatype_start_pack, + .start_unpack = pml_ucx_generic_datatype_start_unpack, + .packed_size = pml_ucx_generic_datatype_packed_size, + .pack = pml_ucx_generic_datatype_pack, + .unpack = pml_ucx_generic_datatype_unpack, + .finish = pml_ucx_generic_datatype_finish +}; + +ucp_datatype_t mca_pml_ucx_init_datatype(ompi_datatype_t *datatype) +{ + ucp_datatype_t ucp_datatype; + ucs_status_t status; + ptrdiff_t lb; + size_t size; + + ompi_datatype_type_lb(datatype, &lb); + + if ((datatype->super.flags & OPAL_DATATYPE_FLAG_CONTIGUOUS) && + (datatype->super.flags & OPAL_DATATYPE_FLAG_NO_GAPS) && + (lb == 0)) + { + ompi_datatype_type_size(datatype, &size); + PML_UCX_ASSERT(size > 0); + datatype->pml_data = ucp_dt_make_contig(size); + return datatype->pml_data; + } + + status = ucp_dt_create_generic(&pml_ucx_generic_datatype_ops, + datatype, &ucp_datatype); + if (status != UCS_OK) { + PML_UCX_ERROR("Failed to create UCX datatype for %s", datatype->name); + ompi_mpi_abort(&ompi_mpi_comm_world.comm, 1); + } + + PML_UCX_VERBOSE(7, "created generic UCX datatype 0x%"PRIx64, ucp_datatype) + // TODO put this on a list to be destroyed later + + datatype->pml_data = ucp_datatype; + return ucp_datatype; +} + +static void mca_pml_ucx_convertor_construct(mca_pml_ucx_convertor_t *convertor) +{ + OBJ_CONSTRUCT(&convertor->opal_conv, opal_convertor_t); +} + +static void mca_pml_ucx_convertor_destruct(mca_pml_ucx_convertor_t *convertor) +{ + OBJ_DESTRUCT(&convertor->opal_conv); +} + +OBJ_CLASS_INSTANCE(mca_pml_ucx_convertor_t, + opal_free_list_item_t, + mca_pml_ucx_convertor_construct, + mca_pml_ucx_convertor_destruct); diff --git a/ompi/mca/pml/ucx/pml_ucx_datatype.h b/ompi/mca/pml/ucx/pml_ucx_datatype.h new file mode 100644 index 00000000000..79dce36cc8e --- /dev/null +++ b/ompi/mca/pml/ucx/pml_ucx_datatype.h @@ -0,0 +1,39 @@ +/* + * Copyright (C) Mellanox Technologies Ltd. 2001-2011. ALL RIGHTS RESERVED. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#ifndef PML_UCX_DATATYPE_H_ +#define PML_UCX_DATATYPE_H_ + +#include "pml_ucx.h" + + +struct pml_ucx_convertor { + opal_free_list_item_t super; + ompi_datatype_t *datatype; + opal_convertor_t opal_conv; +}; + + +ucp_datatype_t mca_pml_ucx_init_datatype(ompi_datatype_t *datatype); + +OBJ_CLASS_DECLARATION(mca_pml_ucx_convertor_t); + + +static inline ucp_datatype_t mca_pml_ucx_get_datatype(ompi_datatype_t *datatype) +{ + ucp_datatype_t ucp_type = datatype->pml_data; + + if (OPAL_LIKELY(ucp_type != 0)) { + return ucp_type; + } + + return mca_pml_ucx_init_datatype(datatype); +} + +#endif /* PML_UCX_DATATYPE_H_ */ diff --git a/ompi/mca/pml/ucx/pml_ucx_freelist.h b/ompi/mca/pml/ucx/pml_ucx_freelist.h new file mode 100644 index 00000000000..8c16d6e5a4c --- /dev/null +++ b/ompi/mca/pml/ucx/pml_ucx_freelist.h @@ -0,0 +1,30 @@ +/* + * Copyright (C) Mellanox Technologies Ltd. 2001-2011. ALL RIGHTS RESERVED. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#ifndef PML_UCX_FREELIST_H_ +#define PML_UCX_FREELIST_H_ + +#include "ompi_config.h" +#include "opal/class/opal_free_list.h" + + +#define mca_pml_ucx_freelist_t opal_free_list_t + +#define PML_UCX_FREELIST_GET(_freelist) \ + opal_free_list_get (_freelist) + +#define PML_UCX_FREELIST_RETURN(_freelist, _item) \ + opal_free_list_return(_freelist, _item) + +#define PML_UCX_FREELIST_INIT(_fl, _type, _initial, _max, _batch) \ + opal_free_list_init(_fl, sizeof(_type), 8, OBJ_CLASS(_type), \ + 0, 0, _initial, _max, _batch, NULL, 0, NULL, NULL, NULL) + + +#endif /* PML_UCX_FREELIST_H_ */ diff --git a/ompi/mca/pml/ucx/pml_ucx_request.c b/ompi/mca/pml/ucx/pml_ucx_request.c new file mode 100644 index 00000000000..28415f107e5 --- /dev/null +++ b/ompi/mca/pml/ucx/pml_ucx_request.c @@ -0,0 +1,232 @@ +/* + * Copyright (C) Mellanox Technologies Ltd. 2001-2011. ALL RIGHTS RESERVED. + * Copyright (c) 2016 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include "pml_ucx_request.h" +#include "ompi/mca/pml/base/pml_base_bsend.h" +#include "ompi/message/message.h" +#include + + +static int mca_pml_ucx_request_free(ompi_request_t **rptr) +{ + ompi_request_t *req = *rptr; + + PML_UCX_VERBOSE(9, "free request *%p=%p", (void*)rptr, (void*)req); + + *rptr = MPI_REQUEST_NULL; + mca_pml_ucx_request_reset(req); + ucp_request_free(req); + return OMPI_SUCCESS; +} + +static int mca_pml_ucx_request_cancel(ompi_request_t *req, int flag) +{ + ucp_request_cancel(ompi_pml_ucx.ucp_worker, req); + return OMPI_SUCCESS; +} + +void mca_pml_ucx_send_completion(void *request, ucs_status_t status) +{ + ompi_request_t *req = request; + + PML_UCX_VERBOSE(8, "send request %p completed with status %s", (void*)req, + ucs_status_string(status)); + + mca_pml_ucx_set_send_status(&req->req_status, status); + PML_UCX_ASSERT( !(REQUEST_COMPLETE(req))); + ompi_request_complete(req, true); +} + +void mca_pml_ucx_bsend_completion(void *request, ucs_status_t status) +{ + ompi_request_t *req = request; + + PML_UCX_VERBOSE(8, "bsend request %p buffer %p completed with status %s", (void*)req, + req->req_complete_cb_data, ucs_status_string(status)); + mca_pml_base_bsend_request_free(req->req_complete_cb_data); + req->req_complete_cb_data = NULL; + mca_pml_ucx_set_send_status(&req->req_status, status); + PML_UCX_ASSERT( !(REQUEST_COMPLETE(req))); + mca_pml_ucx_request_free(&req); +} + +void mca_pml_ucx_recv_completion(void *request, ucs_status_t status, + ucp_tag_recv_info_t *info) +{ + ompi_request_t *req = request; + + PML_UCX_VERBOSE(8, "receive request %p completed with status %s tag %"PRIx64" len %zu", + (void*)req, ucs_status_string(status), info->sender_tag, + info->length); + + mca_pml_ucx_set_recv_status(&req->req_status, status, info); + PML_UCX_ASSERT( !(REQUEST_COMPLETE(req))); + ompi_request_complete(req, true); +} + +static void mca_pml_ucx_persistent_request_detach(mca_pml_ucx_persistent_request_t *preq, + ompi_request_t *tmp_req) +{ + tmp_req->req_complete_cb_data = NULL; + preq->tmp_req = NULL; +} + +void mca_pml_ucx_persistent_request_complete(mca_pml_ucx_persistent_request_t *preq, + ompi_request_t *tmp_req) +{ + preq->ompi.req_status = tmp_req->req_status; + ompi_request_complete(&preq->ompi, true); + mca_pml_ucx_persistent_request_detach(preq, tmp_req); + mca_pml_ucx_request_reset(tmp_req); + ucp_request_free(tmp_req); +} + +static inline void mca_pml_ucx_preq_completion(ompi_request_t *tmp_req) +{ + mca_pml_ucx_persistent_request_t *preq; + + ompi_request_complete(tmp_req, false); + preq = (mca_pml_ucx_persistent_request_t*)tmp_req->req_complete_cb_data; + if (preq != NULL) { + PML_UCX_ASSERT(preq->tmp_req != NULL); + mca_pml_ucx_persistent_request_complete(preq, tmp_req); + } +} + +void mca_pml_ucx_psend_completion(void *request, ucs_status_t status) +{ + ompi_request_t *tmp_req = request; + + PML_UCX_VERBOSE(8, "persistent send request %p completed with status %s", + (void*)tmp_req, ucs_status_string(status)); + + mca_pml_ucx_set_send_status(&tmp_req->req_status, status); + mca_pml_ucx_preq_completion(tmp_req); +} + +void mca_pml_ucx_precv_completion(void *request, ucs_status_t status, + ucp_tag_recv_info_t *info) +{ + ompi_request_t *tmp_req = request; + + PML_UCX_VERBOSE(8, "persistent receive request %p completed with status %s tag %"PRIx64" len %zu", + (void*)tmp_req, ucs_status_string(status), info->sender_tag, + info->length); + + mca_pml_ucx_set_recv_status(&tmp_req->req_status, status, info); + mca_pml_ucx_preq_completion(tmp_req); +} + +static void mca_pml_ucx_request_init_common(ompi_request_t* ompi_req, + bool req_persistent, + ompi_request_state_t state, + ompi_request_free_fn_t req_free, + ompi_request_cancel_fn_t req_cancel) +{ + OMPI_REQUEST_INIT(ompi_req, req_persistent); + ompi_req->req_type = OMPI_REQUEST_PML; + ompi_req->req_state = state; + ompi_req->req_free = req_free; + ompi_req->req_cancel = req_cancel; + /* This field is used to attach persistant request to a temporary req. + * Receive (ucp_tag_recv_nb) may call completion callback + * before the field is set. If the field is not NULL then mca_pml_ucx_preq_completion() + * will try to complete bogus persistant request. + */ + ompi_req->req_complete_cb_data = NULL; +} + +void mca_pml_ucx_request_init(void *request) +{ + ompi_request_t* ompi_req = request; + OBJ_CONSTRUCT(ompi_req, ompi_request_t); + mca_pml_ucx_request_init_common(ompi_req, false, OMPI_REQUEST_ACTIVE, + mca_pml_ucx_request_free, + mca_pml_ucx_request_cancel); +} + +void mca_pml_ucx_request_cleanup(void *request) +{ + ompi_request_t* ompi_req = request; + ompi_req->req_state = OMPI_REQUEST_INVALID; + OMPI_REQUEST_FINI(ompi_req); + OBJ_DESTRUCT(ompi_req); +} + +static int mca_pml_ucx_persistent_request_free(ompi_request_t **rptr) +{ + mca_pml_ucx_persistent_request_t* preq = (mca_pml_ucx_persistent_request_t*)*rptr; + ompi_request_t *tmp_req = preq->tmp_req; + + preq->ompi.req_state = OMPI_REQUEST_INVALID; + if (tmp_req != NULL) { + mca_pml_ucx_persistent_request_detach(preq, tmp_req); + ucp_request_free(tmp_req); + } + if ((preq->flags & MCA_PML_UCX_REQUEST_FLAG_SEND) && + (MCA_PML_BASE_SEND_BUFFERED == preq->send.mode)) { + OBJ_RELEASE(preq->ompi_datatype); + } + PML_UCX_FREELIST_RETURN(&ompi_pml_ucx.persistent_reqs, &preq->ompi.super); + *rptr = MPI_REQUEST_NULL; + return OMPI_SUCCESS; +} + +static int mca_pml_ucx_persistent_request_cancel(ompi_request_t *req, int flag) +{ + mca_pml_ucx_persistent_request_t* preq = (mca_pml_ucx_persistent_request_t*)req; + + if (preq->tmp_req != NULL) { + ucp_request_cancel(ompi_pml_ucx.ucp_worker, preq->tmp_req); + } + return OMPI_SUCCESS; +} + +static void mca_pml_ucx_persisternt_request_construct(mca_pml_ucx_persistent_request_t* req) +{ + mca_pml_ucx_request_init_common(&req->ompi, true, OMPI_REQUEST_INACTIVE, + mca_pml_ucx_persistent_request_free, + mca_pml_ucx_persistent_request_cancel); + req->tmp_req = NULL; +} + +static void mca_pml_ucx_persisternt_request_destruct(mca_pml_ucx_persistent_request_t* req) +{ + req->ompi.req_state = OMPI_REQUEST_INVALID; + OMPI_REQUEST_FINI(&req->ompi); +} + +OBJ_CLASS_INSTANCE(mca_pml_ucx_persistent_request_t, + ompi_request_t, + mca_pml_ucx_persisternt_request_construct, + mca_pml_ucx_persisternt_request_destruct); + +static int mca_pml_completed_request_free(struct ompi_request_t** rptr) +{ + *rptr = MPI_REQUEST_NULL; + return OMPI_SUCCESS; +} + +static int mca_pml_completed_request_cancel(struct ompi_request_t* ompi_req, int flag) +{ + return OMPI_SUCCESS; +} + +void mca_pml_ucx_completed_request_init(ompi_request_t *ompi_req) +{ + mca_pml_ucx_request_init_common(ompi_req, false, OMPI_REQUEST_ACTIVE, + mca_pml_completed_request_free, + mca_pml_completed_request_cancel); + ompi_request_complete(ompi_req, false); + +} + diff --git a/ompi/mca/pml/ucx/pml_ucx_request.h b/ompi/mca/pml/ucx/pml_ucx_request.h new file mode 100644 index 00000000000..5aa657eccbd --- /dev/null +++ b/ompi/mca/pml/ucx/pml_ucx_request.h @@ -0,0 +1,201 @@ +/* + * Copyright (C) Mellanox Technologies Ltd. 2001-2015. ALL RIGHTS RESERVED. + * Copyright (c) 2016 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#ifndef PML_UCX_REQUEST_H_ +#define PML_UCX_REQUEST_H_ + +#include "pml_ucx.h" +#include "pml_ucx_datatype.h" + + +enum { + MCA_PML_UCX_REQUEST_FLAG_SEND = (1 << 0), /* Persistent send */ + MCA_PML_UCX_REQUEST_FLAG_FREE_CALLED = (1 << 1), + MCA_PML_UCX_REQUEST_FLAG_COMPLETED = (1 << 2) +}; + +/* + * UCX tag structure: + * + * 01234567 01234567 01234567 01234567 01234567 01234567 01234567 01234567 + * | | + * message tag (24) | source rank (24) | context id (16) + * | | + */ +#define PML_UCX_TAG_BITS 24 +#define PML_UCX_RANK_BITS 24 +#define PML_UCX_CONTEXT_BITS 16 +#define PML_UCX_ANY_SOURCE_MASK 0x800000000000fffful +#define PML_UCX_SPECIFIC_SOURCE_MASK 0x800000fffffffffful +#define PML_UCX_TAG_MASK 0x7fffff0000000000ul + + +#define PML_UCX_MAKE_SEND_TAG(_tag, _comm) \ + ((((uint64_t) (_tag) ) << (PML_UCX_RANK_BITS + PML_UCX_CONTEXT_BITS)) | \ + (((uint64_t)(_comm)->c_my_rank ) << PML_UCX_CONTEXT_BITS) | \ + ((uint64_t)(_comm)->c_contextid)) + + +#define PML_UCX_MAKE_RECV_TAG(_ucp_tag, _ucp_tag_mask, _tag, _src, _comm) \ + { \ + if ((_src) == MPI_ANY_SOURCE) { \ + _ucp_tag_mask = PML_UCX_ANY_SOURCE_MASK; \ + } else { \ + _ucp_tag_mask = PML_UCX_SPECIFIC_SOURCE_MASK; \ + } \ + \ + _ucp_tag = (((uint64_t)(_src) & UCS_MASK(PML_UCX_RANK_BITS)) << PML_UCX_CONTEXT_BITS) | \ + (_comm)->c_contextid; \ + \ + if ((_tag) != MPI_ANY_TAG) { \ + _ucp_tag_mask |= PML_UCX_TAG_MASK; \ + _ucp_tag |= ((uint64_t)(_tag)) << (PML_UCX_RANK_BITS + PML_UCX_CONTEXT_BITS); \ + } \ + } + +#define PML_UCX_TAG_GET_SOURCE(_tag) \ + (((_tag) >> PML_UCX_CONTEXT_BITS) & UCS_MASK(PML_UCX_RANK_BITS)) + + +#define PML_UCX_TAG_GET_MPI_TAG(_tag) \ + ((_tag) >> (PML_UCX_CONTEXT_BITS + PML_UCX_RANK_BITS)) + + +#define PML_UCX_MESSAGE_NEW(_comm, _ucp_msg, _info, _message) \ + { \ + struct ompi_message_t *msg = ompi_message_alloc(); \ + if (msg == NULL) { \ + /* TODO release UCP message */ \ + return OMPI_ERR_OUT_OF_RESOURCE; \ + } \ + \ + msg->comm = (_comm); \ + msg->req_ptr = (_ucp_msg); \ + msg->peer = PML_UCX_TAG_GET_SOURCE((_info)->sender_tag); \ + msg->count = (_info)->length; \ + *(_message) = msg; \ + } + + +#define PML_UCX_MESSAGE_RELEASE(_message) \ + { \ + ompi_message_return(*(_message)); \ + *(_message) = NULL; \ + } + + +struct pml_ucx_persistent_request { + ompi_request_t ompi; + ompi_request_t *tmp_req; + unsigned flags; + void *buffer; + size_t count; + union { + ucp_datatype_t datatype; + ompi_datatype_t *ompi_datatype; + }; + ucp_tag_t tag; + struct { + mca_pml_base_send_mode_t mode; + ucp_ep_h ep; + } send; + struct { + ucp_tag_t tag_mask; + } recv; +}; + + +void mca_pml_ucx_send_completion(void *request, ucs_status_t status); + +void mca_pml_ucx_recv_completion(void *request, ucs_status_t status, + ucp_tag_recv_info_t *info); + +void mca_pml_ucx_psend_completion(void *request, ucs_status_t status); + +void mca_pml_ucx_bsend_completion(void *request, ucs_status_t status); + +void mca_pml_ucx_precv_completion(void *request, ucs_status_t status, + ucp_tag_recv_info_t *info); + +void mca_pml_ucx_persistent_request_complete(mca_pml_ucx_persistent_request_t *preq, + ompi_request_t *tmp_req); + +void mca_pml_ucx_completed_request_init(ompi_request_t *ompi_req); + +void mca_pml_ucx_request_init(void *request); + +void mca_pml_ucx_request_cleanup(void *request); + + +static inline ucp_ep_h mca_pml_ucx_get_ep(ompi_communicator_t *comm, int dst) +{ + ucp_ep_h ep = ompi_comm_peer_lookup(comm,dst)->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_PML]; + if (OPAL_UNLIKELY(NULL == ep)) { + ep = mca_pml_ucx_add_proc(comm, dst); + } + + return ep; +} + +static inline void mca_pml_ucx_request_reset(ompi_request_t *req) +{ + req->req_complete = REQUEST_PENDING; +} + +static void mca_pml_ucx_set_send_status(ompi_status_public_t* mpi_status, + ucs_status_t status) +{ + if (OPAL_LIKELY(status == UCS_OK)) { + mpi_status->MPI_ERROR = MPI_SUCCESS; + mpi_status->_cancelled = false; + } else if (status == UCS_ERR_CANCELED) { + mpi_status->_cancelled = true; + } else { + mpi_status->MPI_ERROR = MPI_ERR_INTERN; + } +} + +static inline void mca_pml_ucx_set_recv_status(ompi_status_public_t* mpi_status, + ucs_status_t ucp_status, + const ucp_tag_recv_info_t *info) +{ + int64_t tag; + + if (OPAL_LIKELY(ucp_status == UCS_OK)) { + tag = info->sender_tag; + mpi_status->MPI_ERROR = MPI_SUCCESS; + mpi_status->MPI_SOURCE = PML_UCX_TAG_GET_SOURCE(tag); + mpi_status->MPI_TAG = PML_UCX_TAG_GET_MPI_TAG(tag); + mpi_status->_cancelled = false; + mpi_status->_ucount = info->length; + } else if (ucp_status == UCS_ERR_MESSAGE_TRUNCATED) { + mpi_status->MPI_ERROR = MPI_ERR_TRUNCATE; + } else if (ucp_status == UCS_ERR_CANCELED) { + mpi_status->_cancelled = true; + } else { + mpi_status->MPI_ERROR = MPI_ERR_INTERN; + } +} + +static inline void mca_pml_ucx_set_recv_status_safe(ompi_status_public_t* mpi_status, + ucs_status_t ucp_status, + const ucp_tag_recv_info_t *info) +{ + if (mpi_status != MPI_STATUS_IGNORE) { + mca_pml_ucx_set_recv_status(mpi_status, ucp_status, info); + } +} + +OBJ_CLASS_DECLARATION(mca_pml_ucx_persistent_request_t); + + +#endif /* PML_UCX_REQUEST_H_ */ diff --git a/ompi/mca/pml/v/Makefile.am b/ompi/mca/pml/v/Makefile.am index 3eac8182dc3..c7c51db30c3 100644 --- a/ompi/mca/pml/v/Makefile.am +++ b/ompi/mca/pml/v/Makefile.am @@ -3,9 +3,9 @@ # All rights reserved. # Copyright (c) 2010 Cisco Systems, Inc. All rights reserved. # $COPYRIGHT$ -# +# # Additional copyrights may follow -# +# # $HEADER$ # diff --git a/ompi/mca/pml/v/owner.txt b/ompi/mca/pml/v/owner.txt index b46973d4c9a..c47a2d510b1 100644 --- a/ompi/mca/pml/v/owner.txt +++ b/ompi/mca/pml/v/owner.txt @@ -4,4 +4,4 @@ # status: e.g. active, maintenance, unmaintained # owner: UTK -status: maintenance? +status: maintenance diff --git a/ompi/mca/pml/v/pml_v_output.c b/ompi/mca/pml/v/pml_v_output.c index 671ff51e4c2..4d9102a822a 100644 --- a/ompi/mca/pml/v/pml_v_output.c +++ b/ompi/mca/pml/v/pml_v_output.c @@ -17,32 +17,30 @@ #if defined(HAVE_UNISTD_H) #include #endif -#if defined(HAVE_STRING_H) #include -#endif int pml_v_output_open(char *output, int verbosity) { opal_output_stream_t lds; - char hostname[32] = "NA"; - + char hostname[OPAL_MAXHOSTNAMELEN] = "NA"; + OBJ_CONSTRUCT(&lds, opal_output_stream_t); if(!output) { - mca_pml_v.output = 0; - } + mca_pml_v.output = 0; + } else { if(!strcmp(output, "stdout")) { lds.lds_want_stdout = true; - } + } else if(!strcmp(output, "stderr")) { lds.lds_want_stderr = true; } - else + else { lds.lds_want_file = true; lds.lds_file_suffix = output; } lds.lds_is_debugging = true; - gethostname(hostname, 32); + gethostname(hostname, sizeof(hostname)); asprintf(&lds.lds_prefix, "[%s:%05d] pml_v: ", hostname, getpid()); lds.lds_verbose_level = verbosity; mca_pml_v.output = opal_output_open(&lds); diff --git a/ompi/mca/pml/v/pml_v_output.h b/ompi/mca/pml/v/pml_v_output.h index a2d2967ea1c..13c9c1e4821 100644 --- a/ompi/mca/pml/v/pml_v_output.h +++ b/ompi/mca/pml/v/pml_v_output.h @@ -32,7 +32,7 @@ static inline void V_OUTPUT_ERR(const char *fmt, ... ) assert(-1 != ret); opal_output(0, "%s", str); free(str); - va_end(list); + va_end(list); } /* Tricky stuff to define V_OUTPUT and V_OUTPUT_VERBOSE with variadic arguments @@ -48,7 +48,7 @@ static inline void V_OUTPUT_ERR(const char *fmt, ... ) OPAL_OUTPUT((pml_v_output, ARGS)) # define V_OUTPUT_VERBOSE(V, ARGS...) \ OPAL_OUTPUT_VERBOSE((V, mca_pml_v.output, ARGS)) - + #elif OPAL_ENABLE_DEBUG /* No variadic macros available... So sad */ static inline void V_OUTPUT(const char* fmt, ... ) __opal_attribute_format__(__printf__, 1, 2); diff --git a/ompi/mca/pml/yalla/Makefile.am b/ompi/mca/pml/yalla/Makefile.am index a0f64874766..0ca79ef7dd7 100644 --- a/ompi/mca/pml/yalla/Makefile.am +++ b/ompi/mca/pml/yalla/Makefile.am @@ -1,5 +1,7 @@ # -# Copyright (C) Mellanox Technologies Ltd. 2001-2014. ALL RIGHTS RESERVED. +# Copyright (c) 2001-2014 Mellanox Technologies Ltd. ALL RIGHTS RESERVED. +# Copyright (c) 2015 Research Organization for Information Science +# and Technology (RIST). All rights reserved. # $COPYRIGHT$ # # Additional copyrights may follow @@ -36,10 +38,10 @@ mcacomponentdir = $(ompilibdir) mcacomponent_LTLIBRARIES = $(component_install) mca_pml_yalla_la_SOURCES = $(local_sources) mca_pml_yalla_la_LIBADD = $(pml_yalla_LIBS) -mca_pml_yalla_la_LDFLAGS = -module -avoid-version +mca_pml_yalla_la_LDFLAGS = -module -avoid-version $(pml_yalla_LDFLAGS) noinst_LTLIBRARIES = $(component_noinst) libmca_pml_yalla_la_SOURCES = $(local_sources) libmca_pml_yalla_la_LIBADD = $(pml_yalla_LIBS) -libmca_pml_yalla_la_LDFLAGS = -module -avoid-version +libmca_pml_yalla_la_LDFLAGS = -module -avoid-version $(pml_yalla_LDFLAGS) diff --git a/ompi/mca/pml/yalla/pml_yalla.c b/ompi/mca/pml/yalla/pml_yalla.c index a7c22497314..f16cdda5151 100644 --- a/ompi/mca/pml/yalla/pml_yalla.c +++ b/ompi/mca/pml/yalla/pml_yalla.c @@ -1,5 +1,9 @@ /* - * Copyright (C) Mellanox Technologies Ltd. 2001-2011. ALL RIGHTS RESERVED. + * Copyright (C) 2001-2011 Mellanox Technologies Ltd. ALL RIGHTS RESERVED. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. + * Copyright (c) 2015 Los Alamos National Security, LLC. All rights + * reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -7,11 +11,17 @@ * $HEADER$ */ +#ifdef HAVE_ALLOCA_H +#include +#endif + #include "pml_yalla.h" #include "pml_yalla_request.h" #include "opal/runtime/opal.h" #include "opal/memoryhooks/memory.h" +#include "opal/util/opal_environ.h" +#include "opal/mca/memory/base/base.h" #include "opal/mca/pmix/pmix.h" #include "ompi/mca/pml/base/pml_base_bsend.h" #include "ompi/message/message.h" @@ -68,7 +78,7 @@ static int send_ep_address(void) return OMPI_ERROR; } - OPAL_MODEX_SEND(rc, PMIX_SYNC_REQD, PMIX_GLOBAL, + OPAL_MODEX_SEND(rc, OPAL_PMIX_GLOBAL, &mca_pml_yalla_component.pmlm_version, address, addrlen); if (OMPI_SUCCESS != rc) { PML_YALLA_ERROR("Open MPI couldn't distribute EP connection details"); @@ -82,7 +92,7 @@ static int recv_ep_address(ompi_proc_t *proc, void **address_p, size_t *addrlen_ { int rc; - OPAL_MODEX_RECV(rc, &mca_pml_yalla_component.pmlm_version, &proc->super, + OPAL_MODEX_RECV(rc, &mca_pml_yalla_component.pmlm_version, &proc->super.proc_name, address_p, addrlen_p); if (rc < 0) { PML_YALLA_ERROR("Failed to receive EP address"); @@ -103,24 +113,26 @@ int mca_pml_yalla_open(void) PML_YALLA_VERBOSE(1, "mca_pml_yalla_open"); + (void)mca_base_framework_open(&opal_memory_base_framework, 0); + /* Set memory hooks */ if ((OPAL_MEMORY_FREE_SUPPORT | OPAL_MEMORY_MUNMAP_SUPPORT) == ((OPAL_MEMORY_FREE_SUPPORT | OPAL_MEMORY_MUNMAP_SUPPORT) & opal_mem_hooks_support_level())) { PML_YALLA_VERBOSE(1, "enabling on-demand memory mapping"); - opal_setenv("MXM_PML_MEM_ON_DEMAND_MAP", "y", false, &environ); + opal_setenv("MXM_MPI_MEM_ON_DEMAND_MAP", "y", false, &environ); ompi_pml_yalla.using_mem_hooks = 1; } else { PML_YALLA_VERBOSE(1, "disabling on-demand memory mapping"); ompi_pml_yalla.using_mem_hooks = 0; } - opal_setenv("MXM_PML_SINGLE_THREAD", ompi_mpi_thread_multiple ? "n" : "y", + opal_setenv("MXM_MPI_SINGLE_THREAD", ompi_mpi_thread_multiple ? "n" : "y", false, &environ); /* Read options */ error = mxm_config_read_opts(&ompi_pml_yalla.ctx_opts, &ompi_pml_yalla.ep_opts, - "PML", NULL, 0); + "MPI", NULL, 0); if (MXM_OK != error) { return OMPI_ERROR; } @@ -147,6 +159,7 @@ int mca_pml_yalla_close(void) mxm_cleanup(ompi_pml_yalla.mxm_context); ompi_pml_yalla.mxm_context = NULL; } + mca_base_framework_close(&opal_memory_base_framework); return 0; } @@ -178,6 +191,8 @@ int mca_pml_yalla_init(void) OBJ_CONSTRUCT(&ompi_pml_yalla.convs, mca_pml_yalla_freelist_t); opal_progress_register(mca_pml_yalla_progress); + + ompi_pml_yalla.super.pml_flags |= MCA_PML_BASE_FLAG_REQUIRE_WORLD; PML_YALLA_VERBOSE(2, "created mxm context %p ep %p", (void *)ompi_pml_yalla.mxm_context, (void *)ompi_pml_yalla.mxm_ep); @@ -381,7 +396,7 @@ int mca_pml_yalla_recv(void *buf, size_t count, ompi_datatype_t *datatype, int s return OMPI_SUCCESS; } -int mca_pml_yalla_isend_init(void *buf, size_t count, ompi_datatype_t *datatype, +int mca_pml_yalla_isend_init(const void *buf, size_t count, ompi_datatype_t *datatype, int dst, int tag, mca_pml_base_send_mode_t mode, struct ompi_communicator_t* comm, struct ompi_request_t **request) @@ -448,7 +463,7 @@ static int mca_pml_yalla_bsend(mxm_send_req_t *mxm_sreq) return OMPI_SUCCESS; } -int mca_pml_yalla_isend(void *buf, size_t count, ompi_datatype_t *datatype, +int mca_pml_yalla_isend(const void *buf, size_t count, ompi_datatype_t *datatype, int dst, int tag, mca_pml_base_send_mode_t mode, struct ompi_communicator_t* comm, struct ompi_request_t **request) @@ -467,10 +482,8 @@ int mca_pml_yalla_isend(void *buf, size_t count, ompi_datatype_t *datatype, if (mode == MCA_PML_BASE_SEND_BUFFERED) { rc = mca_pml_yalla_bsend(&sreq->mxm); - OPAL_THREAD_LOCK(&ompi_request_lock); sreq->super.ompi.req_status.MPI_ERROR = rc; ompi_request_complete(&sreq->super.ompi, true); - OPAL_THREAD_UNLOCK(&ompi_request_lock); *request = &sreq->super.ompi; return rc; } @@ -484,7 +497,7 @@ int mca_pml_yalla_isend(void *buf, size_t count, ompi_datatype_t *datatype, return OMPI_SUCCESS; } -int mca_pml_yalla_send(void *buf, size_t count, ompi_datatype_t *datatype, int dst, +int mca_pml_yalla_send(const void *buf, size_t count, ompi_datatype_t *datatype, int dst, int tag, mca_pml_base_send_mode_t mode, struct ompi_communicator_t* comm) { @@ -697,10 +710,8 @@ int mca_pml_yalla_start(size_t count, ompi_request_t** requests) if (req->flags & MCA_PML_YALLA_REQUEST_FLAG_BSEND) { PML_YALLA_VERBOSE(8, "start bsend request %p", (void *)sreq); rc = mca_pml_yalla_bsend(&sreq->mxm); - OPAL_THREAD_LOCK(&ompi_request_lock); sreq->super.ompi.req_status.MPI_ERROR = rc; ompi_request_complete(&sreq->super.ompi, true); - OPAL_THREAD_UNLOCK(&ompi_request_lock); if (OMPI_SUCCESS != rc) { return rc; } diff --git a/ompi/mca/pml/yalla/pml_yalla.h b/ompi/mca/pml/yalla/pml_yalla.h index 0284a88f9ce..b69fd7b7d55 100644 --- a/ompi/mca/pml/yalla/pml_yalla.h +++ b/ompi/mca/pml/yalla/pml_yalla.h @@ -107,17 +107,17 @@ int mca_pml_yalla_recv(void *buf, size_t count, ompi_datatype_t *datatype, int s int tag, struct ompi_communicator_t* comm, ompi_status_public_t* status); -int mca_pml_yalla_isend_init(void *buf, size_t count, ompi_datatype_t *datatype, +int mca_pml_yalla_isend_init(const void *buf, size_t count, ompi_datatype_t *datatype, int dst, int tag, mca_pml_base_send_mode_t mode, struct ompi_communicator_t* comm, struct ompi_request_t **request); -int mca_pml_yalla_isend(void *buf, size_t count, ompi_datatype_t *datatype, +int mca_pml_yalla_isend(const void *buf, size_t count, ompi_datatype_t *datatype, int dst, int tag, mca_pml_base_send_mode_t mode, struct ompi_communicator_t* comm, struct ompi_request_t **request); -int mca_pml_yalla_send(void *buf, size_t count, ompi_datatype_t *datatype, int dst, +int mca_pml_yalla_send(const void *buf, size_t count, ompi_datatype_t *datatype, int dst, int tag, mca_pml_base_send_mode_t mode, struct ompi_communicator_t* comm); diff --git a/ompi/mca/pml/yalla/pml_yalla_request.c b/ompi/mca/pml/yalla/pml_yalla_request.c index eff05a57dbb..b8f5ccca05c 100644 --- a/ompi/mca/pml/yalla/pml_yalla_request.c +++ b/ompi/mca/pml/yalla/pml_yalla_request.c @@ -49,11 +49,9 @@ static int mca_pml_yalla_send_request_free(ompi_request_t **request) PML_YALLA_VERBOSE(9, "free send request *%p=%p", (void *)request, (void *)*request); - OPAL_THREAD_LOCK(&ompi_request_lock); if (mca_pml_yalla_check_request_state(req)) { mca_pml_yalla_request_release(req, &ompi_pml_yalla.send_reqs); } - OPAL_THREAD_UNLOCK(&ompi_request_lock); *request = MPI_REQUEST_NULL; return OMPI_SUCCESS; @@ -64,7 +62,7 @@ static int mca_pml_yalla_send_request_cancel(ompi_request_t *request, int flag) mca_pml_yalla_send_request_t *sreq = (mca_pml_yalla_send_request_t*)request; mxm_error_t error; - if (request->req_complete) { + if (REQUEST_COMPLETE(request)) { /* * This might be a buffered send request which has completed anyway, so * we cannot cancel it anymore. Just hope for the best. @@ -90,11 +88,9 @@ static int mca_pml_yalla_recv_request_free(ompi_request_t **request) PML_YALLA_VERBOSE(9, "free receive request *%p=%p", (void *)request, (void *)*request); - OPAL_THREAD_LOCK(&ompi_request_lock); if (mca_pml_yalla_check_request_state(req)) { mca_pml_yalla_request_release(req, &ompi_pml_yalla.recv_reqs); } - OPAL_THREAD_UNLOCK(&ompi_request_lock); *request = MPI_REQUEST_NULL; return OMPI_SUCCESS; @@ -178,13 +174,11 @@ static void mca_pml_yalla_send_completion_cb(void *context) PML_YALLA_VERBOSE(8, "send request %p completed with status %s", (void *)sreq, mxm_error_string(sreq->mxm.base.error)); - OPAL_THREAD_LOCK(&ompi_request_lock); ompi_request_complete(&sreq->super.ompi, true); if (sreq->super.flags & MCA_PML_YALLA_REQUEST_FLAG_FREE_CALLED) { PML_YALLA_VERBOSE(7, "release request %p because free was already called", (void *)sreq); mca_pml_yalla_request_release(&sreq->super, &ompi_pml_yalla.send_reqs); } - OPAL_THREAD_UNLOCK(&ompi_request_lock); } static void mca_pml_yalla_bsend_completion_cb(void *context) @@ -211,13 +205,11 @@ static void mca_pml_yalla_recv_completion_cb(void *context) rreq->mxm.tag, rreq->mxm.tag_mask, rreq->mxm.completion.actual_len); - OPAL_THREAD_LOCK(&ompi_request_lock); ompi_request_complete(&rreq->super.ompi, true); if (rreq->super.flags & MCA_PML_YALLA_REQUEST_FLAG_FREE_CALLED) { PML_YALLA_VERBOSE(7, "release request %p because free was already called", (void *)rreq); mca_pml_yalla_request_release(&rreq->super, &ompi_pml_yalla.recv_reqs); } - OPAL_THREAD_UNLOCK(&ompi_request_lock); } static void mca_pml_yalla_send_request_construct(mca_pml_yalla_send_request_t* sreq) diff --git a/ompi/mca/pml/yalla/pml_yalla_request.h b/ompi/mca/pml/yalla/pml_yalla_request.h index 915bfe51ccb..6a4ed2d59a1 100644 --- a/ompi/mca/pml/yalla/pml_yalla_request.h +++ b/ompi/mca/pml/yalla/pml_yalla_request.h @@ -1,7 +1,7 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ /* * Copyright (C) Mellanox Technologies Ltd. 2001-2011. ALL RIGHTS RESERVED. - * Copyright (c) 2015 Los Alamos National Security, LLC. All rights + * Copyright (c) 2015-2016 Los Alamos National Security, LLC. All rights * reserved. * $COPYRIGHT$ * @@ -53,7 +53,7 @@ void mca_pml_yalla_init_reqs(void); #define PML_YALLA_RESET_OMPI_REQ(_ompi_req, _state) \ { \ (_ompi_req)->req_state = _state; \ - (_ompi_req)->req_complete = false; \ + (_ompi_req)->req_complete = REQUEST_PENDING; \ (_ompi_req)->req_status._cancelled = false; \ } @@ -121,7 +121,7 @@ void mca_pml_yalla_init_reqs(void); #define PML_YALLA_FREE_BLOCKING_MXM_REQ(_req) \ { \ - if ((_req)->completed_cb != NULL) { \ + if ((_req)->data_type == MXM_REQ_DATA_STREAM) { \ mca_pml_yalla_convertor_free((mca_pml_yalla_convertor_t*)((_req)->context)); \ } \ } @@ -184,6 +184,7 @@ void mca_pml_yalla_init_reqs(void); (_mpi_status)->MPI_ERROR = OMPI_SUCCESS; \ break; \ case MXM_ERR_CANCELED: \ + (_mpi_status)->MPI_ERROR = OMPI_SUCCESS; \ (_mpi_status)->_cancelled = true; \ break; \ case MXM_ERR_MESSAGE_TRUNCATED: \ diff --git a/ompi/mca/pubsub/Makefile.am b/ompi/mca/pubsub/Makefile.am deleted file mode 100644 index 0ad7b4a83d7..00000000000 --- a/ompi/mca/pubsub/Makefile.am +++ /dev/null @@ -1,37 +0,0 @@ -# -# Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana -# University Research and Technology -# Corporation. All rights reserved. -# Copyright (c) 2004-2005 The University of Tennessee and The University -# of Tennessee Research Foundation. All rights -# reserved. -# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, -# University of Stuttgart. All rights reserved. -# Copyright (c) 2004-2005 The Regents of the University of California. -# All rights reserved. -# Copyright (c) 2010 Cisco Systems, Inc. All rights reserved. -# $COPYRIGHT$ -# -# Additional copyrights may follow -# -# $HEADER$ -# - -# main library setup -noinst_LTLIBRARIES = libmca_pubsub.la -libmca_pubsub_la_SOURCES = - -# local files -headers = pubsub.h -libmca_pubsub_la_SOURCES += $(headers) - -# Conditionally install the header files -if WANT_INSTALL_HEADERS -ompidir = $(ompiincludedir)/$(subdir) -nobase_ompi_HEADERS = $(headers) -endif - -include base/Makefile.am - -distclean-local: - rm -f base/static-components.h diff --git a/ompi/mca/pubsub/base/Makefile.am b/ompi/mca/pubsub/base/Makefile.am deleted file mode 100644 index 93e159113f0..00000000000 --- a/ompi/mca/pubsub/base/Makefile.am +++ /dev/null @@ -1,28 +0,0 @@ -# -# Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana -# University Research and Technology -# Corporation. All rights reserved. -# Copyright (c) 2004-2005 The University of Tennessee and The University -# of Tennessee Research Foundation. All rights -# reserved. -# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, -# University of Stuttgart. All rights reserved. -# Copyright (c) 2004-2005 The Regents of the University of California. -# All rights reserved. -# Copyright (c) 2013 Los Alamos National Security, LLC. -# All rights reserved. -# $COPYRIGHT$ -# -# Additional copyrights may follow -# -# $HEADER$ -# - -headers += \ - base/base.h - -libmca_pubsub_la_SOURCES += \ - base/pubsub_base_frame.c \ - base/pubsub_base_select.c \ - base/pubsub_base_null_fns.c - diff --git a/ompi/mca/pubsub/base/base.h b/ompi/mca/pubsub/base/base.h deleted file mode 100644 index 7bfd648e586..00000000000 --- a/ompi/mca/pubsub/base/base.h +++ /dev/null @@ -1,54 +0,0 @@ -/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ -/* - * Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2005 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * Copyright (c) 2012-2013 Los Alamos National Security, LLC. - * All rights reserved - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ -#ifndef OMPI_MCA_PUBSUB_BASE_H -#define OMPI_MCA_PUBSUB_BASE_H - -#include "ompi_config.h" -#include "ompi/constants.h" - -#include "ompi/mca/pubsub/pubsub.h" - -/* - * Global functions for MCA overall PUBSUB - */ - -BEGIN_C_DECLS - -/* - * MCA framework - */ -OMPI_DECLSPEC extern mca_base_framework_t ompi_pubsub_base_framework; -/* - * Select an available component. - */ -OMPI_DECLSPEC int ompi_pubsub_base_select(void); - -/* NULL functions */ -OMPI_DECLSPEC int ompi_pubsub_base_null_publish(const char *service, ompi_info_t *info, const char *port); -OMPI_DECLSPEC int ompi_pubsub_base_null_unpublish(const char *service, ompi_info_t *info); -OMPI_DECLSPEC char* ompi_pubsub_base_null_lookup(const char *service, ompi_info_t *info); - -/* useful globals */ -OMPI_DECLSPEC extern ompi_pubsub_base_module_t ompi_pubsub; - -END_C_DECLS - -#endif /* OMPI_MCA_PUBSUB_BASE_H */ diff --git a/ompi/mca/pubsub/base/owner.txt b/ompi/mca/pubsub/base/owner.txt deleted file mode 100644 index 4ad6f408ca3..00000000000 --- a/ompi/mca/pubsub/base/owner.txt +++ /dev/null @@ -1,7 +0,0 @@ -# -# owner/status file -# owner: institution that is responsible for this package -# status: e.g. active, maintenance, unmaintained -# -owner: INTEL -status: maintenance diff --git a/ompi/mca/pubsub/base/pubsub_base_frame.c b/ompi/mca/pubsub/base/pubsub_base_frame.c deleted file mode 100644 index 5270666ee47..00000000000 --- a/ompi/mca/pubsub/base/pubsub_base_frame.c +++ /dev/null @@ -1,64 +0,0 @@ -/* - * Copyright (c) 2004-2007 The Trustees of Indiana University. - * All rights reserved. - * Copyright (c) 2004-2005 The Trustees of the University of Tennessee. - * All rights reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * Copyright (c) 2012-2013 Los Alamos National Security, LLC. - * All rights reserved - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#include "ompi_config.h" - -#include "ompi/mca/mca.h" -#include "opal/util/output.h" -#include "opal/mca/base/base.h" - - -#include "ompi/mca/pubsub/pubsub.h" -#include "ompi/mca/pubsub/base/base.h" - -#include "ompi/mca/pubsub/base/static-components.h" - -/* - * Globals - */ -OMPI_DECLSPEC ompi_pubsub_base_module_t ompi_pubsub={ - NULL, - ompi_pubsub_base_null_publish, - ompi_pubsub_base_null_unpublish, - ompi_pubsub_base_null_lookup, - NULL -}; - -static int ompi_pubsub_base_close(void) -{ - /* Close the selected component */ - if( NULL != ompi_pubsub.finalize ) { - ompi_pubsub.finalize(); - } - - return mca_base_framework_components_close(&ompi_pubsub_base_framework, NULL); -} - -/** - * Function for finding and opening either all MCA components, - * or the one that was specifically requested via a MCA parameter. - */ -static int ompi_pubsub_base_open(mca_base_open_flag_t flags) -{ - /* Open up all available components */ - return mca_base_framework_components_open(&ompi_pubsub_base_framework, flags); -} - -MCA_BASE_FRAMEWORK_DECLARE(ompi, pubsub, "OMPI Publish-Subscribe Subsystem", NULL, - ompi_pubsub_base_open, ompi_pubsub_base_close, - mca_pubsub_base_static_components, 0); diff --git a/ompi/mca/pubsub/base/pubsub_base_null_fns.c b/ompi/mca/pubsub/base/pubsub_base_null_fns.c deleted file mode 100644 index 158b9c32cb0..00000000000 --- a/ompi/mca/pubsub/base/pubsub_base_null_fns.c +++ /dev/null @@ -1,38 +0,0 @@ -/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ -/* - * Copyright (c) 2004-2007 The Trustees of Indiana University. - * All rights reserved. - * Copyright (c) 2004-2005 The Trustees of the University of Tennessee. - * All rights reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * Copyright (c) 2013 Los Alamos National Security, LLC. All rights - * reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#include "ompi_config.h" - -#include "ompi/mca/pubsub/pubsub.h" -#include "ompi/mca/pubsub/base/base.h" - -int ompi_pubsub_base_null_publish(const char *service, ompi_info_t *info, const char *port) -{ - return OMPI_ERR_NOT_SUPPORTED; -} - -int ompi_pubsub_base_null_unpublish(const char *service, ompi_info_t *info) -{ - return OMPI_ERR_NOT_SUPPORTED; -} - -char* ompi_pubsub_base_null_lookup(const char *service, ompi_info_t *info) -{ - return NULL; -} diff --git a/ompi/mca/pubsub/base/pubsub_base_select.c b/ompi/mca/pubsub/base/pubsub_base_select.c deleted file mode 100644 index e6d01f35ea4..00000000000 --- a/ompi/mca/pubsub/base/pubsub_base_select.c +++ /dev/null @@ -1,60 +0,0 @@ -/* - * Copyright (c) 2004-2008 The Trustees of Indiana University. - * All rights reserved. - * Copyright (c) 2004-2005 The Trustees of the University of Tennessee. - * All rights reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * Copyright (c) 2012-2013 Los Alamos National Security, LLC. All rights - * reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#include "ompi_config.h" - -#include "ompi/mca/mca.h" -#include "opal/mca/base/base.h" - -#include "opal/mca/base/mca_base_component_repository.h" - -#include "ompi/mca/pubsub/pubsub.h" -#include "ompi/mca/pubsub/base/base.h" - - -int ompi_pubsub_base_select(void) -{ - int ret; - ompi_pubsub_base_component_t *best_component = NULL; - ompi_pubsub_base_module_t *best_module = NULL; - - /* - * Select the best component - */ - if( OPAL_SUCCESS != (ret = mca_base_select("pubsub", ompi_pubsub_base_framework.framework_output, - &ompi_pubsub_base_framework.framework_components, - (mca_base_module_t **) &best_module, - (mca_base_component_t **) &best_component))) { - /* it is okay not to find any executable components */ - if (OMPI_ERR_NOT_FOUND == ret) { - ret = OPAL_SUCCESS; - } - goto cleanup; - } - - /* Save the winner */ - ompi_pubsub = *best_module; - - /* init the selected module */ - if (NULL != ompi_pubsub.init) { - ret = ompi_pubsub.init(); - } - - cleanup: - return ret; -} diff --git a/ompi/mca/pubsub/orte/Makefile.am b/ompi/mca/pubsub/orte/Makefile.am deleted file mode 100644 index 3b976a7ec5a..00000000000 --- a/ompi/mca/pubsub/orte/Makefile.am +++ /dev/null @@ -1,42 +0,0 @@ -# -# Copyright (c) 2004-2006 The Regents of the University of California. -# All rights reserved. -# Copyright (c) 2009 High Performance Computing Center Stuttgart, -# University of Stuttgart. All rights reserved. -# Copyright (c) 2010 Cisco Systems, Inc. All rights reserved. -# $COPYRIGHT$ -# -# Additional copyrights may follow -# -# $HEADER$ -# - -dist_ompidata_DATA = help-ompi-pubsub-orte.txt - -# Make the output library in this directory, and name it either -# mca__.la (for DSO builds) or libmca__.la -# (for static builds). - -if MCA_BUILD_ompi_pubsub_orte_DSO -component_noinst = -component_install = mca_pubsub_orte.la -else -component_noinst = libmca_pubsub_orte.la -component_install = -endif - -local_sources = \ - pubsub_orte.c \ - pubsub_orte.h \ - pubsub_orte_component.c - -mcacomponentdir = $(ompilibdir) -mcacomponent_LTLIBRARIES = $(component_install) -mca_pubsub_orte_la_SOURCES = $(local_sources) -mca_pubsub_orte_la_LDFLAGS = -module -avoid-version $(pubsub_orte_LDFLAGS) - -noinst_LTLIBRARIES = $(component_noinst) -libmca_pubsub_orte_la_SOURCES = $(local_sources) -libmca_pubsub_orte_la_LIBADD = $(pubsub_orte_LIBS) -libmca_pubsub_orte_la_LDFLAGS = -module -avoid-version $(pubsub_orte_LDFLAGS) - diff --git a/ompi/mca/pubsub/orte/configure.m4 b/ompi/mca/pubsub/orte/configure.m4 deleted file mode 100644 index 11103a85d61..00000000000 --- a/ompi/mca/pubsub/orte/configure.m4 +++ /dev/null @@ -1,24 +0,0 @@ -# -*- shell-script -*- -# -# Copyright (c) 2011 Los Alamos National Security, LLC. -# All rights reserved. -# -# $COPYRIGHT$ -# -# Additional copyrights may follow -# -# $HEADER$ -# - -# MCA_pubsub_orte_CONFIG([action-if-found], [action-if-not-found]) -# ----------------------------------------------------------- -AC_DEFUN([MCA_ompi_pubsub_orte_CONFIG],[ - AC_CONFIG_FILES([ompi/mca/pubsub/orte/Makefile]) - - AC_ARG_WITH([orte], - AC_HELP_STRING([--with-orte], - [Use ORTE run-time environment (default: yes)])) - AS_IF([test "$with_orte" != "no"], - [$1], - [$2]) -]) diff --git a/ompi/mca/pubsub/orte/help-ompi-pubsub-orte.txt b/ompi/mca/pubsub/orte/help-ompi-pubsub-orte.txt deleted file mode 100644 index b00908c65fb..00000000000 --- a/ompi/mca/pubsub/orte/help-ompi-pubsub-orte.txt +++ /dev/null @@ -1,43 +0,0 @@ -# -*- text -*- -# -# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana -# University Research and Technology -# Corporation. All rights reserved. -# Copyright (c) 2004-2005 The University of Tennessee and The University -# of Tennessee Research Foundation. All rights -# reserved. -# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, -# University of Stuttgart. All rights reserved. -# Copyright (c) 2004-2005 The Regents of the University of California. -# All rights reserved. -# Copyright (c) 2007 Cisco Systems, Inc. All rights reserved. -# $COPYRIGHT$ -# -# Additional copyrights may follow -# -# $HEADER$ -# -# This is the US/English general help file for Open MPI. -# -[pubsub-orte:no-server] -Process rank %ld attempted to %s a global ompi_server that -could not be contacted. This is typically caused by either not -specifying the contact info for the server, or by the server not -currently executing. If you did specify the contact info for a -server, please check to see that the server is running and start -it again (or have your sys admin start it) if it isn't. - -[pubsub-orte:unknown-order] -Process rank %ld attempted to lookup a value but provided an -unrecognized order parameter. Order parameters are used to tell Open -MPI if it should first look for the requested value locally (i.e., from -the current job) or from a global ompi_server. Accepted order -parameters are "local" and "global", respectively. - -[pubsub-orte:too-many-orders] -Process rank %ld attempted to lookup a value but provided too many -order parameters (%ld found). Order parameters are used to tell -Open MPI if it should first look for the requested value locally -(i.e., from the current job) or from a global ompi_server. Accepted -order parameters are "local" and "global", respectively, and each can -only be specified once. diff --git a/ompi/mca/pubsub/orte/owner.txt b/ompi/mca/pubsub/orte/owner.txt deleted file mode 100644 index 4ad6f408ca3..00000000000 --- a/ompi/mca/pubsub/orte/owner.txt +++ /dev/null @@ -1,7 +0,0 @@ -# -# owner/status file -# owner: institution that is responsible for this package -# status: e.g. active, maintenance, unmaintained -# -owner: INTEL -status: maintenance diff --git a/ompi/mca/pubsub/orte/pubsub_orte.c b/ompi/mca/pubsub/orte/pubsub_orte.c deleted file mode 100644 index 72f877b20ac..00000000000 --- a/ompi/mca/pubsub/orte/pubsub_orte.c +++ /dev/null @@ -1,582 +0,0 @@ -/* - * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2011 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * Copyright (c) 2007 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2012-2013 Los Alamos National Security, LLC. All rights - * reserved. - * Copyright (c) 2013 Intel, Inc. All rights reserved. - * Copyright (c) 2015 Research Organization for Information Science - * and Technology (RIST). All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#include "ompi_config.h" -#include "ompi/constants.h" - -#include -#include -#include - -#include "opal/util/show_help.h" -#include "opal/util/argv.h" -#include "opal/dss/dss.h" - -#include "orte/mca/errmgr/errmgr.h" -#include "orte/mca/rml/rml.h" -#include "orte/mca/rml/rml_types.h" -#include "orte/mca/rml/base/rml_contact.h" -#include "orte/mca/routed/routed.h" -#include "orte/util/name_fns.h" -#include "orte/runtime/orte_globals.h" -#include "orte/runtime/orte_data_server.h" - -#include "ompi/info/info.h" -#include "ompi/mca/rte/rte.h" - -#include "ompi/mca/pubsub/base/base.h" -#include "pubsub_orte.h" - -/* Establish contact with the server - * - * NOTE: we do not do this automatically during init to avoid - * forcing every process to pay the time penalty during MPI_Init - * when only a few, if any, will ever call pub/lookup/unpub. In - * addition, those that -do- call these functions may well only - * use local (as opposed to global) storage, and hence will have - * no need to talk to the server, even though a sys admin may - * have set one up. So we do a lazy setup of the server contact - * info - it only gets setup the first time we call a function - * that wants to talk to the global server - */ -static bool server_setup=false; - -static void setup_server(void) -{ - opal_buffer_t buf; - int rc; - - OPAL_OUTPUT_VERBOSE((1, ompi_pubsub_base_framework.framework_output, - "%s pubsub:orte: setting up server at URI %s", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - (NULL == mca_pubsub_orte_component.server_uri) ? "NULL" : mca_pubsub_orte_component.server_uri)); - - /* flag setup as completed so we only pass through here once */ - server_setup = true; - - if (NULL == mca_pubsub_orte_component.server_uri) { - /* if the contact info for the server is NULL, then there - * is nothing we can do - there is no path to the server - */ - mca_pubsub_orte_component.server_found = false; - return; - } - - /* init the route to the server - init_routes wants a buffer - * passed to it, so we have to package the server's contact - * info into a buffer - */ - OBJ_CONSTRUCT(&buf, opal_buffer_t); - opal_dss.pack(&buf, &mca_pubsub_orte_component.server_uri, 1, OPAL_STRING); - /* extract the server's name so we have its jobid */ - if (ORTE_SUCCESS != (rc = orte_rml_base_parse_uris(mca_pubsub_orte_component.server_uri, - &mca_pubsub_orte_component.server, NULL))) { - ORTE_ERROR_LOG(rc); - OBJ_DESTRUCT(&buf); - mca_pubsub_orte_component.server_found = false; - return; - } - /* init routes to the server's job */ - if (ORTE_SUCCESS != (rc = orte_routed.init_routes(mca_pubsub_orte_component.server.jobid, &buf))) { - ORTE_ERROR_LOG(rc); - mca_pubsub_orte_component.server_found = false; - OBJ_DESTRUCT(&buf); - return; - } - OBJ_DESTRUCT(&buf); - - /* flag the server as found */ - mca_pubsub_orte_component.server_found = true; - - OPAL_OUTPUT_VERBOSE((1, ompi_pubsub_base_framework.framework_output, - "%s pubsub:orte: server %s setup", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - ORTE_NAME_PRINT(&mca_pubsub_orte_component.server))); -} - -/* - * Init the module - */ -static int init(void) -{ - return OMPI_SUCCESS; -} - -/* - * publish the port_name for the specified service_name. This will - * be published under our process name, so only we will be allowed - * to remove it later. - */ -static int publish ( const char *service_name, ompi_info_t *info, const char *port_name ) -{ - int rc, ret, flag; - bool global_scope = false; - orte_process_name_t *info_host; - opal_buffer_t *buf; - orte_data_server_cmd_t cmd=ORTE_DATA_SERVER_PUBLISH; - orte_std_cntr_t cnt; - orte_rml_recv_cb_t xfer; - bool unique=false; - - ompi_info_get_bool(info, "ompi_global_scope", &global_scope, &flag); - - if (0 == flag) { - /* scope was not defined - see if server exists */ - if (!server_setup) { - setup_server(); - } - if (mca_pubsub_orte_component.server_found) { - /* server was found - use it as our default store */ - info_host = &mca_pubsub_orte_component.server; - global_scope = true; - } else { - /* server was not found - use our HNP as default store */ - info_host = ORTE_PROC_MY_HNP; - } - } else if (!global_scope) { - /* if the scope is not global, then store the value on the HNP */ - info_host = ORTE_PROC_MY_HNP; - } else { - /* has the server been setup yet? */ - if (!server_setup) { - setup_server(); - } - /* store the value on the global ompi_server, but error - * if that server wasn't contacted - */ - if (!mca_pubsub_orte_component.server_found) { - opal_show_help("help-ompi-pubsub-orte.txt", "pubsub-orte:no-server", - true, (long)ORTE_PROC_MY_NAME->vpid, "publish to"); - return OMPI_ERR_NOT_FOUND; - } - info_host = &mca_pubsub_orte_component.server; - } - - OPAL_OUTPUT_VERBOSE((1, ompi_pubsub_base_framework.framework_output, - "%s pubsub:orte: publishing service %s scope %s", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - service_name, global_scope ? "Global" : "Local")); - - ompi_info_get_bool(info, "ompi_unique", &unique, &flag); - if (0 == flag) { - /* uniqueness not specified - overwrite by default */ - unique = false; - } - - /* construct the buffer */ - buf = OBJ_NEW(opal_buffer_t); - - /* pack the publish command */ - if (OPAL_SUCCESS != (rc = opal_dss.pack(buf, &cmd, 1, ORTE_DATA_SERVER_CMD))) { - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(buf); - goto CLEANUP; - } - - /* pack the service name */ - if (OPAL_SUCCESS != (rc = opal_dss.pack(buf, &service_name, 1, OPAL_STRING))) { - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(buf); - goto CLEANUP; - } - - /* pack the port name */ - if (OPAL_SUCCESS != (rc = opal_dss.pack(buf, &port_name, 1, OPAL_STRING))) { - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(buf); - goto CLEANUP; - } - - /* pack the uniqueness flag */ - if (OPAL_SUCCESS != (rc = opal_dss.pack(buf, &unique, 1, OPAL_BOOL))) { - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(buf); - goto CLEANUP; - } - - /* send the data */ - if (0 > (rc = orte_rml.send_buffer_nb(info_host, buf, - ORTE_RML_TAG_DATA_SERVER, - orte_rml_send_callback, NULL))) { - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(buf); - goto CLEANUP; - } - - /* get the answer */ - OBJ_CONSTRUCT(&xfer, orte_rml_recv_cb_t); - xfer.active = true; - orte_rml.recv_buffer_nb(ORTE_NAME_WILDCARD, - ORTE_RML_TAG_DATA_CLIENT, - ORTE_RML_NON_PERSISTENT, - orte_rml_recv_callback, &xfer); - OMPI_WAIT_FOR_COMPLETION(xfer.active); - - /* unpack the result */ - cnt = 1; - if (OPAL_SUCCESS != (rc = opal_dss.unpack(&xfer.data, &ret, &cnt, OPAL_INT))) { - ORTE_ERROR_LOG(rc); - } - rc = ret; - OBJ_DESTRUCT(&xfer); - -CLEANUP: - return rc; -} - -enum { NONE, LOCAL, GLOBAL }; - -static char* lookup ( const char *service_name, ompi_info_t *info ) -{ - orte_process_name_t *info_host; - opal_buffer_t *buf; - orte_data_server_cmd_t cmd=ORTE_DATA_SERVER_LOOKUP; - orte_std_cntr_t cnt=0; - char *port_name=NULL; - int ret, rc, flag, i; - char value[256], **tokens, *ptr; - int lookup[2] = { GLOBAL, LOCAL }; - size_t num_tokens; - orte_rml_recv_cb_t xfer; - - /* Look in the MPI_Info (ompi_info_t*) for the key - * "ompi_lookup_order". Acceptable values are: - * - * - "local" -- only check the local scope - * - "global" -- only check the global scope - * - "local,global" -- check the local scope first, then check the - * global scope - * - "global,local" -- check the global scope first, then check the - * local scope - * - * Give a little leeway in terms of whitespace in the value. - * - * The lookup[2] array will contain the results: lookup[0] is the - * first scope to check, lookup[1] is the 2nd. Either value may - * be NONE, LOCAL, or GLOBAL. If both are NONE, clearly that's an - * error. :-) - */ - ompi_info_get(info, "ompi_lookup_order", sizeof(value) - 1, value, &flag); - if (flag) { - ptr = &value[0]; - while (isspace(*ptr) && (ptr - value) < (int)sizeof(value)) { - ++ptr; - } - if (ptr - value < (int)sizeof(value)) { - tokens = opal_argv_split(ptr, ','); - if (NULL != tokens) { - if ((num_tokens = opal_argv_count(tokens)) > 2) { - /* too many values in the comma-delimited list */ - opal_show_help("help-ompi-pubsub-orte.txt", - "pubsub-orte:too-many-orders", - true, (long)ORTE_PROC_MY_NAME->vpid, - (long)num_tokens); - opal_argv_free(tokens); - return NULL; - } - for (i = 0; i < 2; ++i) { - if (NULL != tokens[i]) { - if (0 == strcasecmp(tokens[i], "local")) { - lookup[i] = LOCAL; - } else if (0 == strcasecmp(tokens[i], "global")) { - lookup[i] = GLOBAL; - } else { - /* unrecognized value -- that's an error */ - opal_show_help("help-ompi-pubsub-orte.txt", - "pubsub-orte:unknown-order", - true, (long)ORTE_PROC_MY_NAME->vpid); - opal_argv_free(tokens); - return NULL; - } - } else { - lookup[i] = NONE; - } - } - opal_argv_free(tokens); - } - } - - if (NONE == lookup[0]) { - /* if the user provided an info key, then we at least must - * be given one place to look - */ - opal_show_help("help-ompi-pubsub-orte.txt", - "pubsub-orte:unknown-order", - true, (long)ORTE_PROC_MY_NAME->vpid); - return NULL; - } - - } else { - /* if no info key was provided, then we default to the global - * server IF it is active - */ - if (!server_setup) { - setup_server(); - } - lookup[1] = NONE; - if (mca_pubsub_orte_component.server_found) { - lookup[0] = GLOBAL; - } else { - /* global server was not found - just look local */ - lookup[0] = LOCAL; - } - } - - OPAL_OUTPUT_VERBOSE((1, ompi_pubsub_base_framework.framework_output, - "%s pubsub:orte: lookup service %s scope %d", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - service_name, lookup[0])); - - /* go find the value */ - for (i=0; i < 2; i++) { - if (LOCAL == lookup[i]) { - /* if the scope is local, then lookup the value on the HNP */ - info_host = ORTE_PROC_MY_HNP; - } else if (GLOBAL == lookup[i]) { - /* has the server been setup yet? */ - if (!server_setup) { - setup_server(); - } - /* lookup the value on the global ompi_server, but error - * if that server wasn't contacted - */ - if (!mca_pubsub_orte_component.server_found) { - opal_show_help("help-ompi-pubsub-orte.txt", - "pubsub-orte:no-server", - true, (long)ORTE_PROC_MY_NAME->vpid, - "lookup from"); - return NULL; - } - info_host = &mca_pubsub_orte_component.server; - } else if (NONE == lookup[i]) { - continue; - } else { - /* unknown host! */ - opal_show_help("help-ompi-pubsub-orte.txt", - "pubsub-orte:unknown-order", - true, (long)ORTE_PROC_MY_NAME->vpid); - return NULL; - } - - /* go look it up */ - /* construct the buffer */ - buf = OBJ_NEW(opal_buffer_t); - - /* pack the lookup command */ - if (OPAL_SUCCESS != (ret = opal_dss.pack(buf, &cmd, 1, ORTE_DATA_SERVER_CMD))) { - ORTE_ERROR_LOG(ret); - OBJ_RELEASE(buf); - goto CLEANUP; - } - - /* pack the service name */ - if (OPAL_SUCCESS != (ret = opal_dss.pack(buf, &service_name, 1, OPAL_STRING))) { - ORTE_ERROR_LOG(ret); - OBJ_RELEASE(buf); - goto CLEANUP; - } - - /* send the cmd */ - if (0 > (ret = orte_rml.send_buffer_nb(info_host, buf, - ORTE_RML_TAG_DATA_SERVER, - orte_rml_send_callback, NULL))) { - ORTE_ERROR_LOG(ret); - OBJ_RELEASE(buf); - goto CLEANUP; - } - - /* get the answer */ - OBJ_CONSTRUCT(&xfer, orte_rml_recv_cb_t); - xfer.active = true; - orte_rml.recv_buffer_nb(ORTE_NAME_WILDCARD, - ORTE_RML_TAG_DATA_CLIENT, - ORTE_RML_NON_PERSISTENT, - orte_rml_recv_callback, &xfer); - OMPI_WAIT_FOR_COMPLETION(xfer.active); - - /* unpack the return code */ - cnt = 1; - if (OPAL_SUCCESS != (ret = opal_dss.unpack(&xfer.data, &rc, &cnt, OPAL_INT))) { - ORTE_ERROR_LOG(ret); - goto CLEANUP; - } - - OPAL_OUTPUT_VERBOSE((1, ompi_pubsub_base_framework.framework_output, - "%s pubsub:orte: lookup returned status %d", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), rc)); - - if (ORTE_SUCCESS == rc) { - /* the server was able to lookup the port - unpack the port name */ - cnt=1; - if (OPAL_SUCCESS != (ret = opal_dss.unpack(&xfer.data, &port_name, &cnt, OPAL_STRING))) { - ORTE_ERROR_LOG(ret); - OBJ_DESTRUCT(&xfer); - goto CLEANUP; - } - - OPAL_OUTPUT_VERBOSE((1, ompi_pubsub_base_framework.framework_output, - "%s pubsub:orte: lookup returned port %s", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - (NULL == port_name) ? "NULL" : port_name)); - - if (NULL != port_name) { - /* got an answer - return it */ - OBJ_DESTRUCT(&xfer); - return port_name; - } - } - - /* if we didn't get a port_name, then continue */ - OBJ_DESTRUCT(&xfer); - } - - /* only get here if we tried both options and failed - since the - * buffer will already have been cleaned up, just return - */ - CLEANUP: - return NULL; -} - -/* - * delete the entry. Only the process who has published - * the service_name has the right to remove this - * service - the server will verify and report the result - */ -static int unpublish ( const char *service_name, ompi_info_t *info ) -{ - int rc, ret, flag; - bool global_scope; - orte_process_name_t *info_host; - opal_buffer_t *buf; - orte_data_server_cmd_t cmd=ORTE_DATA_SERVER_UNPUBLISH; - orte_std_cntr_t cnt; - orte_rml_recv_cb_t xfer; - - ompi_info_get_bool(info, "ompi_global_scope", &global_scope, &flag); - - if (0 == flag) { - /* scope was not defined - see if server exists */ - if (!server_setup) { - setup_server(); - } - if (mca_pubsub_orte_component.server_found) { - /* server was found - use it as our default store */ - info_host = &mca_pubsub_orte_component.server; - global_scope = true; - } else { - /* server was not found - use our HNP as default store */ - info_host = ORTE_PROC_MY_HNP; - } - } else if (!global_scope) { - /* if the scope is not global, then unpublish the value from the HNP */ - info_host = ORTE_PROC_MY_HNP; - } else { - /* has the server been setup yet? */ - if (!server_setup) { - setup_server(); - } - /* unpublish the value from the global ompi_server, but error - * if that server wasn't contacted - */ - if (!mca_pubsub_orte_component.server_found) { - opal_show_help("help-ompi-pubsub-orte.txt", "pubsub-orte:no-server", - true, (long)ORTE_PROC_MY_NAME->vpid, "unpublish from"); - return OMPI_ERR_NOT_FOUND; - } - info_host = &mca_pubsub_orte_component.server; - } - - OPAL_OUTPUT_VERBOSE((1, ompi_pubsub_base_framework.framework_output, - "%s pubsub:orte: unpublish service %s scope %s", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - service_name, global_scope ? "Global" : "Local")); - - /* construct the buffer */ - buf = OBJ_NEW(opal_buffer_t); - - /* pack the unpublish command */ - if (OPAL_SUCCESS != (rc = opal_dss.pack(buf, &cmd, 1, ORTE_DATA_SERVER_CMD))) { - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(buf); - goto CLEANUP; - } - - /* pack the service name */ - if (OPAL_SUCCESS != (rc = opal_dss.pack(buf, &service_name, 1, OPAL_STRING))) { - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(buf); - goto CLEANUP; - } - - /* send the command */ - if (0 > (rc = orte_rml.send_buffer_nb(info_host, buf, ORTE_RML_TAG_DATA_SERVER, - orte_rml_send_callback, NULL))) { - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(buf); - goto CLEANUP; - } - - /* get the answer */ - OBJ_CONSTRUCT(&xfer, orte_rml_recv_cb_t); - xfer.active = true; - orte_rml.recv_buffer_nb(ORTE_NAME_WILDCARD, ORTE_RML_TAG_DATA_CLIENT, - ORTE_RML_NON_PERSISTENT, - orte_rml_recv_callback, &xfer); - OMPI_WAIT_FOR_COMPLETION(xfer.active); - - /* unpack the result */ - cnt = 1; - if (OPAL_SUCCESS != (rc = opal_dss.unpack(&xfer.data, &ret, &cnt, OPAL_INT))) { - ORTE_ERROR_LOG(rc); - OBJ_DESTRUCT(&xfer); - goto CLEANUP; - } - OBJ_DESTRUCT(&xfer); - rc = ret; - -CLEANUP: - return rc; -} - - -/* - * finalize the module - */ -static int finalize(void) -{ - return OMPI_SUCCESS; -} - -/* - * instantiate the module - */ -ompi_pubsub_base_module_t ompi_pubsub_orte_module = { - init, - publish, - unpublish, - lookup, - finalize -}; - - diff --git a/ompi/mca/pubsub/orte/pubsub_orte.h b/ompi/mca/pubsub/orte/pubsub_orte.h deleted file mode 100644 index 8f4acdff0df..00000000000 --- a/ompi/mca/pubsub/orte/pubsub_orte.h +++ /dev/null @@ -1,51 +0,0 @@ -/* - * Copyright (c) 2004-2005 The Trustees of Indiana University. - * All rights reserved. - * Copyright (c) 2004-2006 The Trustees of the University of Tennessee. - * All rights reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * Copyright (c) 2007 Los Alamos National Security, LLC. All rights - * reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#ifndef OMPI_PUBSUB_ORTE_H -#define OMPI_PUBSUB_ORTE_H - -#include "ompi_config.h" - -#include "orte/types.h" - -#include "ompi/mca/pubsub/pubsub.h" - -BEGIN_C_DECLS - -/* - * Extend the pubsub component to hold some useful - * values for this component - */ -typedef struct { - ompi_pubsub_base_component_t super; - orte_process_name_t server; - char *server_uri; - bool server_found; -} ompi_pubsub_orte_component_t; - -/* access to module */ -extern ompi_pubsub_base_module_t ompi_pubsub_orte_module; - -/* access to component so we can get to the locally - * global values - */ -OMPI_MODULE_DECLSPEC extern ompi_pubsub_orte_component_t mca_pubsub_orte_component; - -END_C_DECLS - -#endif /* OMPI_PUBSUB_ORTE_H */ diff --git a/ompi/mca/pubsub/orte/pubsub_orte_component.c b/ompi/mca/pubsub/orte/pubsub_orte_component.c deleted file mode 100644 index 7638fcbb35a..00000000000 --- a/ompi/mca/pubsub/orte/pubsub_orte_component.c +++ /dev/null @@ -1,95 +0,0 @@ -/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ -/* - * Copyright (c) 2004-2008 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2011 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2006 The Regents of the University of California. - * All rights reserved. - * Copyright (c) 2015 Los Alamos National Security, LLC. All rights - * reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#include "ompi_config.h" -#include "ompi/constants.h" - -#include "pubsub_orte.h" - -static int pubsub_orte_component_register(void); -static int pubsub_orte_component_open(void); -static int pubsub_orte_component_close(void); -static int pubsub_orte_component_query(mca_base_module_t **module, int *priority); - -static int my_priority = 50; - -ompi_pubsub_orte_component_t mca_pubsub_orte_component = { - { - /* First, the mca_base_component_t struct containing meta - information about the component itself */ - - .base_version = { - OMPI_PUBSUB_BASE_VERSION_2_0_0, - - .mca_component_name = "orte", - MCA_BASE_MAKE_VERSION(component, OMPI_MAJOR_VERSION, OMPI_MINOR_VERSION, - OMPI_RELEASE_VERSION), - .mca_open_component = pubsub_orte_component_open, - .mca_close_component = pubsub_orte_component_close, - .mca_query_component = pubsub_orte_component_query, - .mca_register_component_params = pubsub_orte_component_register, - }, - .base_data = { - /* This component is checkpoint ready */ - MCA_BASE_METADATA_PARAM_CHECKPOINT - }, - } -}; - -static int pubsub_orte_component_register(void) -{ - my_priority = 50; - (void) mca_base_component_var_register(&mca_pubsub_orte_component.super.base_version, - "priority", "Priority of the pubsub pmi component", - MCA_BASE_VAR_TYPE_INT, NULL, 0, 0, - OPAL_INFO_LVL_9, - MCA_BASE_VAR_SCOPE_READONLY, - &my_priority); - - mca_pubsub_orte_component.server_uri = NULL; - (void) mca_base_component_var_register(&mca_pubsub_orte_component.super.base_version, - "server", "Contact info for ompi_server for publish/subscribe operations", - MCA_BASE_VAR_TYPE_STRING, NULL, 0, 0, - OPAL_INFO_LVL_9, - MCA_BASE_VAR_SCOPE_READONLY, - &mca_pubsub_orte_component.server_uri); - - return OMPI_SUCCESS; -} - -static int pubsub_orte_component_open(void) -{ - return OMPI_SUCCESS; -} - -static int pubsub_orte_component_close(void) -{ - return OMPI_SUCCESS; -} - -static int pubsub_orte_component_query(mca_base_module_t **module, int *priority) -{ - mca_pubsub_orte_component.server_found = false; - - *priority = my_priority; - *module = (mca_base_module_t *) &ompi_pubsub_orte_module; - return OMPI_SUCCESS; -} diff --git a/ompi/mca/pubsub/pmi/Makefile.am b/ompi/mca/pubsub/pmi/Makefile.am deleted file mode 100644 index 60d264d7f24..00000000000 --- a/ompi/mca/pubsub/pmi/Makefile.am +++ /dev/null @@ -1,40 +0,0 @@ -# -# Copyright (c) 2011 Cisco Systems, Inc. All rights reserved. -# Copyright (c) 2014 Intel, Inc. All rights reserved -# $COPYRIGHT$ -# -# Additional copyrights may follow -# -# $HEADER$ -# - -AM_CPPFLAGS = $(pubsub_pmi_CPPFLAGS) - -# Make the output library in this directory, and name it either -# mca__.la (for DSO builds) or libmca__.la -# (for static builds). - -if MCA_BUILD_ompi_pubsub_pmi_DSO -component_noinst = -component_install = mca_pubsub_pmi.la -else -component_noinst = libmca_pubsub_pmi.la -component_install = -endif - -local_sources = \ - pubsub_pmi.c \ - pubsub_pmi.h \ - pubsub_pmi_component.c - -mcacomponentdir = $(ompilibdir) -mcacomponent_LTLIBRARIES = $(component_install) -mca_pubsub_pmi_la_SOURCES = $(local_sources) -mca_pubsub_pmi_la_LDFLAGS = -module -avoid-version $(pubsub_pmi_LDFLAGS) -mca_pubsub_pmi_la_LIBADD = $(pubsub_pmi_LIBS) - -noinst_LTLIBRARIES = $(component_noinst) -libmca_pubsub_pmi_la_SOURCES = $(local_sources) -libmca_pubsub_pmi_la_LIBADD = $(pubsub_pmi_LIBS) -libmca_pubsub_pmi_la_LDFLAGS = -module -avoid-version $(pubsub_pmi_LDFLAGS) - diff --git a/ompi/mca/pubsub/pmi/configure.m4 b/ompi/mca/pubsub/pmi/configure.m4 deleted file mode 100644 index 8e69b82cd55..00000000000 --- a/ompi/mca/pubsub/pmi/configure.m4 +++ /dev/null @@ -1,27 +0,0 @@ -# -*- shell-script -*- -# -# Copyright (c) 2011 Cisco Systems, Inc. All rights reserved. -# $COPYRIGHT$ -# -# Additional copyrights may follow -# -# $HEADER$ -# -# MCA_ompi_pubsub_pmi_CONFIG([action-if-found], [action-if-not-found]) -# ----------------------------------------------------------- -AC_DEFUN([MCA_ompi_pubsub_pmi_CONFIG], [ - AC_CONFIG_FILES([ompi/mca/pubsub/pmi/Makefile]) - - OPAL_CHECK_PMI([pubsub_pmi], [pubsub_pmi_good=1], [pubsub_pmi_good=0]) - - # Evaluate succeed / fail - AS_IF([test "$pubsub_pmi_good" = 1], - [$1], - [$2]) - - # set build flags to use in makefile - AC_SUBST([pubsub_pmi_CPPFLAGS]) - AC_SUBST([pubsub_pmi_LDFLAGS]) - AC_SUBST([pubsub_pmi_LIBS]) - -]) diff --git a/ompi/mca/pubsub/pmi/owner.txt b/ompi/mca/pubsub/pmi/owner.txt deleted file mode 100644 index 4ad6f408ca3..00000000000 --- a/ompi/mca/pubsub/pmi/owner.txt +++ /dev/null @@ -1,7 +0,0 @@ -# -# owner/status file -# owner: institution that is responsible for this package -# status: e.g. active, maintenance, unmaintained -# -owner: INTEL -status: maintenance diff --git a/ompi/mca/pubsub/pmi/pubsub_pmi.c b/ompi/mca/pubsub/pmi/pubsub_pmi.c deleted file mode 100644 index 09de30bceb7..00000000000 --- a/ompi/mca/pubsub/pmi/pubsub_pmi.c +++ /dev/null @@ -1,128 +0,0 @@ -/* - * Copyright (c) 2011 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2011 Los Alamos National Security, LLC. - * All rights reserved. - * Copyright (c) 2014 Intel, Inc. All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#include "ompi_config.h" -#include "ompi/constants.h" - - -#include "opal/mca/pmix/pmix.h" - -#include "ompi/info/info.h" -#include "ompi/mca/rte/rte.h" -#include "ompi/mca/pubsub/base/base.h" -#include "pubsub_pmi.h" - -/* - * Init the module - */ -static int init(void) -{ - // did the pmix.init in the component - return OMPI_SUCCESS; -} - -/* - * publish the port_name for the specified service_name. - */ -static int publish(const char *service_name, ompi_info_t *info, const char *port_name) -{ - pmix_info_t *p; - opal_list_t xfer; - ompi_info_entry_t *ie; - int rc; - - /* transfer the ompi_info_t data to an array of pmix_info_t structs */ - OBJ_CONSTRUCT(&xfer, opal_list_t); - OPAL_LIST_FOREACH(ie, &info->super, ompi_info_entry_t) { - p = OBJ_NEW(pmix_info_t); - strncpy(p->key, ie->ie_key, PMIX_MAX_INFO_KEY); - strncpy(p->value, ie->ie_value, PMIX_MAX_INFO_VAL); - opal_list_append(&xfer, &p->super); - } - - rc = opal_pmix.publish(service_name, &xfer, port_name); - OPAL_LIST_DESTRUCT(&xfer); - return rc; -} - -static char* lookup(const char *service_name, ompi_info_t *info) -{ - char port[PMIX_MAX_VALLEN], *ret; - pmix_info_t *p; - opal_list_t xfer; - ompi_info_entry_t *ie; - int rc; - - /* transfer the ompi_info_t data to an array of pmix_info_t structs */ - OBJ_CONSTRUCT(&xfer, opal_list_t); - OPAL_LIST_FOREACH(ie, &info->super, ompi_info_entry_t) { - p = OBJ_NEW(pmix_info_t); - strncpy(p->key, ie->ie_key, PMIX_MAX_INFO_KEY); - strncpy(p->value, ie->ie_value, PMIX_MAX_INFO_VAL); - opal_list_append(&xfer, &p->super); - } - rc = opal_pmix.lookup(service_name, &xfer, port, PMIX_MAX_VALLEN); - OPAL_LIST_DESTRUCT(&xfer); - - /* in error case port will be set to NULL - * this is what our callers expect to see - * In future maybe some error handling need? - */ - if( rc != OPAL_SUCCESS ){ - // improve error processing - return NULL; - } - ret = strdup(port); - return ret; -} - -/* - * delete the entry */ -static int unpublish(const char *service_name, ompi_info_t *info) -{ - pmix_info_t *p; - opal_list_t xfer; - ompi_info_entry_t *ie; - int rc; - - /* transfer the ompi_info_t data to an array of pmix_info_t structs */ - OBJ_CONSTRUCT(&xfer, opal_list_t); - OPAL_LIST_FOREACH(ie, &info->super, ompi_info_entry_t) { - p = OBJ_NEW(pmix_info_t); - strncpy(p->key, ie->ie_key, PMIX_MAX_INFO_KEY); - strncpy(p->value, ie->ie_value, PMIX_MAX_INFO_VAL); - opal_list_append(&xfer, &p->super); - } - rc = opal_pmix.unpublish(service_name, &xfer); - OPAL_LIST_DESTRUCT(&xfer); - return rc; -} - - -/* - * finalize the module - */ -static int finalize(void) -{ - return OMPI_SUCCESS; -} - -/* - * instantiate the module - */ -ompi_pubsub_base_module_t ompi_pubsub_pmi_module = { - init, - publish, - unpublish, - lookup, - finalize -}; diff --git a/ompi/mca/pubsub/pmi/pubsub_pmi.h b/ompi/mca/pubsub/pmi/pubsub_pmi.h deleted file mode 100644 index b3a6c608a0e..00000000000 --- a/ompi/mca/pubsub/pmi/pubsub_pmi.h +++ /dev/null @@ -1,25 +0,0 @@ -/* - * Copyright (c) 2011 Cisco Systems, Inc. All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#ifndef OMPI_PUBSUB_PMI_H -#define OMPI_PUBSUB_PMI_H - -#include "ompi/mca/pubsub/pubsub.h" - -BEGIN_C_DECLS - -/* access to module */ -extern ompi_pubsub_base_module_t ompi_pubsub_pmi_module; - -/* access to component */ -OMPI_MODULE_DECLSPEC extern ompi_pubsub_base_component_t mca_pubsub_pmi_component; - -END_C_DECLS - -#endif /* OMPI_PUBSUB_PMI_H */ diff --git a/ompi/mca/pubsub/pmi/pubsub_pmi_component.c b/ompi/mca/pubsub/pmi/pubsub_pmi_component.c deleted file mode 100644 index bb03bc7b6c0..00000000000 --- a/ompi/mca/pubsub/pmi/pubsub_pmi_component.c +++ /dev/null @@ -1,90 +0,0 @@ -/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ -/* - * Copyright (c) 2011 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2011-2015 Los Alamos National Security, LLC. All rights - * reserved. - * Copyright (c) 2014 Intel, Inc. All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#include "ompi_config.h" - -#include "opal/runtime/opal_params.h" -#include "opal/mca/pmix/pmix.h" - -#include "ompi/constants.h" -#include "ompi/mca/rte/rte.h" - -#include "pubsub_pmi.h" - -static int pubsub_pmi_component_register(void); -static int pubsub_pmi_component_open(void); -static int pubsub_pmi_component_close(void); -static int pubsub_pmi_component_query(mca_base_module_t **module, int *priority); - -static int my_priority = 100; /* must be above "orte" component */ - -ompi_pubsub_base_component_t mca_pubsub_pmi_component = { - .base_version = { - OMPI_PUBSUB_BASE_VERSION_2_0_0, - - .mca_component_name = "pmi", - MCA_BASE_MAKE_VERSION(component, OMPI_MAJOR_VERSION, OMPI_MINOR_VERSION, - OMPI_RELEASE_VERSION), - .mca_open_component = pubsub_pmi_component_open, - .mca_close_component = pubsub_pmi_component_close, - .mca_query_component = pubsub_pmi_component_query, - .mca_register_component_params = pubsub_pmi_component_register, - }, - .base_data = { - /* This component is checkpoint ready */ - MCA_BASE_METADATA_PARAM_CHECKPOINT - }, -}; - -static int pubsub_pmi_component_register(void) -{ - my_priority = 100; - (void) mca_base_component_var_register(&mca_pubsub_pmi_component.base_version, - "priority", "Priority of the pubsub pmi component", - MCA_BASE_VAR_TYPE_INT, NULL, 0, 0, - OPAL_INFO_LVL_9, - MCA_BASE_VAR_SCOPE_READONLY, - &my_priority); - - return OMPI_SUCCESS; -} - -static int pubsub_pmi_component_open(void) -{ - return OMPI_SUCCESS; -} - -static int pubsub_pmi_component_close(void) -{ - if (NULL != opal_pmix.finalize) { - opal_pmix.finalize(); - } - return OMPI_SUCCESS; -} - -static int pubsub_pmi_component_query(mca_base_module_t **module, int *priority) -{ - if (NULL != opal_pmix.init) { - - if (OPAL_SUCCESS == opal_pmix.init()) { - *priority = my_priority; - *module = (mca_base_module_t *)&ompi_pubsub_pmi_module; - return OMPI_SUCCESS; - } - } - - /* we can't run */ - *priority = -1; - *module = NULL; - return OMPI_ERROR; -} diff --git a/ompi/mca/pubsub/pubsub.h b/ompi/mca/pubsub/pubsub.h deleted file mode 100644 index 72972ddf13c..00000000000 --- a/ompi/mca/pubsub/pubsub.h +++ /dev/null @@ -1,108 +0,0 @@ -/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ -/* - * Copyright (c) 2004-2008 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2005 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * Copyright (c) 2013-2015 Los Alamos National Security, LLC. All rights - * reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ -/** - * @file - * - * Dynamic Process Management Interface - * - */ - -#ifndef OMPI_MCA_PUBSUB_H -#define OMPI_MCA_PUBSUB_H - -#include "ompi_config.h" - -#include "ompi/mca/mca.h" -#include "opal/mca/base/base.h" - - -#include "ompi/info/info.h" - -BEGIN_C_DECLS - -/* - * Initialize a module - */ -typedef int (*ompi_pubsub_base_module_init_fn_t)(void); - -/* - * Publish a data item - */ -typedef int (*ompi_pubsub_base_module_publish_fn_t)(const char *service, ompi_info_t *info, const char *port); - -/* - * Unpublish a data item - */ -typedef int (*ompi_pubsub_base_module_unpublish_fn_t)(const char *service, ompi_info_t *info); - -/* - * Lookup a data item - */ -typedef char* (*ompi_pubsub_base_module_lookup_fn_t)(const char *service, ompi_info_t *info); - -/* - * Finalize a module - */ -typedef int (*ompi_pubsub_base_module_finalize_fn_t)(void); - -/** -* Structure for PUBSUB modules - */ -struct ompi_pubsub_base_module_1_0_0_t { - /** Initialization Function */ - ompi_pubsub_base_module_init_fn_t init; - /* Publish */ - ompi_pubsub_base_module_publish_fn_t publish; - /* Unpublish */ - ompi_pubsub_base_module_unpublish_fn_t unpublish; - /* Lookup */ - ompi_pubsub_base_module_lookup_fn_t lookup; - /* finalize */ - ompi_pubsub_base_module_finalize_fn_t finalize; -}; -typedef struct ompi_pubsub_base_module_1_0_0_t ompi_pubsub_base_module_1_0_0_t; -typedef struct ompi_pubsub_base_module_1_0_0_t ompi_pubsub_base_module_t; - -OMPI_DECLSPEC extern ompi_pubsub_base_module_t ompi_pubsub; - - -/** - * Structure for PUBSUB components. - */ -struct ompi_pubsub_base_component_2_0_0_t { - /** MCA base component */ - mca_base_component_t base_version; - /** MCA base data */ - mca_base_component_data_t base_data; -}; -typedef struct ompi_pubsub_base_component_2_0_0_t ompi_pubsub_base_component_2_0_0_t; -typedef struct ompi_pubsub_base_component_2_0_0_t ompi_pubsub_base_component_t; - -/** - * Macro for use in components that are of type PUBSUB - */ -#define OMPI_PUBSUB_BASE_VERSION_2_0_0 \ - OMPI_MCA_BASE_VERSION_2_1_0("pubsub", 2, 0, 0) - - -END_C_DECLS - -#endif /* OMPI_MCA_PUBSUB_H */ diff --git a/ompi/mca/rte/Makefile.am b/ompi/mca/rte/Makefile.am index 7d455934266..a6c7658178b 100644 --- a/ompi/mca/rte/Makefile.am +++ b/ompi/mca/rte/Makefile.am @@ -1,9 +1,9 @@ # -# Copyright (c) 2012 Los Alamos National Security, LLC. All rights reserved. +# Copyright (c) 2012 Los Alamos National Security, LLC. All rights reserved. # $COPYRIGHT$ -# +# # Additional copyrights may follow -# +# # $HEADER$ # diff --git a/ompi/mca/rte/base/Makefile.am b/ompi/mca/rte/base/Makefile.am index 9561f768dd0..aa7534be2e4 100644 --- a/ompi/mca/rte/base/Makefile.am +++ b/ompi/mca/rte/base/Makefile.am @@ -1,9 +1,9 @@ # # Copyright (c) 2012-2013 Los Alamos National Security, LLC. All rights reserved. # $COPYRIGHT$ -# +# # Additional copyrights may follow -# +# # $HEADER$ # diff --git a/ompi/mca/rte/base/base.h b/ompi/mca/rte/base/base.h index db3f4ee7062..a4f3b5b5afe 100644 --- a/ompi/mca/rte/base/base.h +++ b/ompi/mca/rte/base/base.h @@ -4,9 +4,9 @@ * All rights reserved. * Copyright (c) 2014 Intel, Inc. All rights reserved * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ diff --git a/ompi/mca/rte/base/rte_base_frame.c b/ompi/mca/rte/base/rte_base_frame.c index bd6d436d6c3..c08d911b070 100644 --- a/ompi/mca/rte/base/rte_base_frame.c +++ b/ompi/mca/rte/base/rte_base_frame.c @@ -3,9 +3,9 @@ * All rights reserved. * Copyright (c) 2014 Cisco Systems, Inc. All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ diff --git a/ompi/mca/rte/configure.m4 b/ompi/mca/rte/configure.m4 index ed5b9697480..3ccf2aefa5f 100644 --- a/ompi/mca/rte/configure.m4 +++ b/ompi/mca/rte/configure.m4 @@ -4,9 +4,9 @@ dnl Copyright (c) 2010-2011 Cisco Systems, Inc. All rights reserved. dnl Copyright (c) 2012 Los Alamos National Security, LLC. All rights reserved. dnl $COPYRIGHT$ -dnl +dnl dnl Additional copyrights may follow -dnl +dnl dnl $HEADER$ dnl diff --git a/ompi/mca/rte/orte/Makefile.am b/ompi/mca/rte/orte/Makefile.am index f5c1f210a5c..faaa72f5f69 100644 --- a/ompi/mca/rte/orte/Makefile.am +++ b/ompi/mca/rte/orte/Makefile.am @@ -3,9 +3,9 @@ # All rights reserved. # Copyright (c) 2014 Cisco Systems, Inc. All rights reserved. # $COPYRIGHT$ -# +# # Additional copyrights may follow -# +# # $HEADER$ # @@ -29,10 +29,6 @@ libmca_rte_orte_la_LIBADD = $(top_builddir)/orte/lib@ORTE_LIB_PREFIX@open-rte.la man_pages = mpirun.1 mpiexec.1 ompi-ps.1 ompi-clean.1 ompi-top.1 ompi-server.1 ompi-dvm.1 ompi-submit.1 -if WANT_FT -man_pages += ompi-checkpoint.1 ompi-restart.1 -endif - if OPAL_INSTALL_BINARIES nodist_man_MANS = $(man_pages) @@ -45,11 +41,6 @@ install-exec-hook: (cd $(DESTDIR)$(bindir); rm -f ompi-server$(EXEEXT); $(LN_S) orte-server$(EXEEXT) ompi-server$(EXEEXT)) (cd $(DESTDIR)$(bindir); rm -f ompi-dvm$(EXEEXT); $(LN_S) orte-dvm$(EXEEXT) ompi-dvm$(EXEEXT)) (cd $(DESTDIR)$(bindir); rm -f ompi-submit$(EXEEXT); $(LN_S) orte-submit$(EXEEXT) ompi-submit$(EXEEXT)) -if WANT_FT - (cd $(DESTDIR)$(bindir); rm -f ompi-checkpoint$(EXEEXT); $(LN_S) orte-checkpoint$(EXEEXT) ompi-checkpoint$(EXEEXT)) - (cd $(DESTDIR)$(bindir); rm -f ompi-restart$(EXEEXT); $(LN_S) orte-restart$(EXEEXT) ompi-restart$(EXEEXT)) - (cd $(DESTDIR)$(bindir); rm -f ompi-migrate$(EXEEXT); $(LN_S) orte-migrate$(EXEEXT) ompi-migrate$(EXEEXT)) -endif uninstall-local: rm -f $(DESTDIR)$(bindir)/mpirun$(EXEEXT) \ @@ -60,11 +51,6 @@ uninstall-local: $(DESTDIR)$(bindir)/ompi-server$(EXEEXT) \ $(DESTDIR)$(bindir)/ompi-dvm$(EXEEXT) \ $(DESTDIR)$(bindir)/ompi-submit$(EXEEXT) -if WANT_FT - rm -f $(DESTDIR)$(bindir)/ompi-checkpoint$(EXEEXT) \ - $(DESTDIR)$(bindir)/ompi-restart$(EXEEXT) \ - $(DESTDIR)$(bindir)/ompi-migrate$(EXEEXT) -endif endif # OPAL_INSTALL_BINARIES diff --git a/ompi/mca/rte/orte/configure.m4 b/ompi/mca/rte/orte/configure.m4 index fd3b95dad50..ab8a15df302 100644 --- a/ompi/mca/rte/orte/configure.m4 +++ b/ompi/mca/rte/orte/configure.m4 @@ -4,9 +4,9 @@ # Copyright (c) 2013 Sandia National Laboratories. All rights reserved. # # $COPYRIGHT$ -# +# # Additional copyrights may follow -# +# # $HEADER$ # @@ -28,7 +28,7 @@ AC_DEFUN([MCA_ompi_rte_orte_POST_CONFIG],[ AM_CONDITIONAL([OMPI_RTE_ORTE], [test $1 = 1]) ])dnl -# MCA_rte_orte_CONFIG([action-if-can-compile], +# MCA_rte_orte_CONFIG([action-if-can-compile], # [action-if-cant-compile]) # ------------------------------------------------ AC_DEFUN([MCA_ompi_rte_orte_CONFIG],[ diff --git a/ompi/mca/rte/orte/rte_orte.h b/ompi/mca/rte/orte/rte_orte.h index b99638ddfa8..037f1387b6e 100644 --- a/ompi/mca/rte/orte/rte_orte.h +++ b/ompi/mca/rte/orte/rte_orte.h @@ -1,14 +1,14 @@ /* * Copyright (c) 2012-2013 Los Alamos National Security, LLC. * All rights reserved. - * Copyright (c) 2013-2014 Intel, Inc. All rights reserved + * Copyright (c) 2013-2015 Intel, Inc. All rights reserved * Copyright (c) 2014 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2014 Research Organization for Information Science + * Copyright (c) 2014-2016 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ * * When this component is used, this file is included in the rest of @@ -50,12 +50,18 @@ typedef orte_ns_cmp_bitmask_t ompi_rte_cmp_bitmask_t; #define OMPI_PROC_MY_NAME ORTE_PROC_MY_NAME #define OMPI_NAME_PRINT(a) ORTE_NAME_PRINT((const orte_process_name_t*)a) #define ompi_rte_compare_name_fields(a, b, c) orte_util_compare_name_fields(a, (const orte_process_name_t*)(b), (const orte_process_name_t*)(c)) +#define ompi_rte_convert_string_to_process_name(a,b) orte_util_convert_string_to_process_name(a,b) +#define ompi_rte_convert_process_name_to_string(a,b) orte_util_convert_process_name_to_string(a,b) #define OMPI_NAME_WILDCARD ORTE_NAME_WILDCARD #define OMPI_NODE_RANK_INVALID ORTE_NODE_RANK_INVALID #define OMPI_LOCAL_RANK_INVALID ORTE_LOCAL_RANK_INVALID #define OMPI_RTE_CMP_JOBID ORTE_NS_CMP_JOBID #define OMPI_RTE_CMP_VPID ORTE_NS_CMP_VPID #define OMPI_RTE_CMP_ALL ORTE_NS_CMP_ALL +#define OMPI_LOCAL_JOBID(jobid) ORTE_LOCAL_JOBID(jobid) +#define OMPI_JOB_FAMILY(jobid) ORTE_JOB_FAMILY(jobid) +#define OMPI_CONSTRUCT_JOBID(family,local) ORTE_CONSTRUCT_JOBID(family,local) + /* This is the DSS tag to serialize a proc name */ #define OMPI_NAME ORTE_NAME #define OMPI_PROCESS_NAME_HTON ORTE_PROCESS_NAME_HTON @@ -78,7 +84,7 @@ typedef orte_local_rank_t ompi_local_rank_t; #define ompi_rte_proc_is_bound orte_proc_is_bound /* Error handling objects and operations */ -OMPI_DECLSPEC void __opal_attribute_noreturn__ +OMPI_DECLSPEC void __opal_attribute_noreturn__ ompi_rte_abort(int error_code, char *fmt, ...); #define ompi_rte_abort_peers(a, b, c) orte_errmgr.abort_peers(a, b, c) #define OMPI_RTE_ERRHANDLER_FIRST ORTE_ERRMGR_CALLBACK_FIRST @@ -94,27 +100,6 @@ typedef orte_error_t ompi_rte_error_report_t; #define ompi_rte_finalize() orte_finalize() OMPI_DECLSPEC void ompi_rte_wait_for_debugger(void); -#define OMPI_DB_HOSTNAME ORTE_DB_HOSTNAME -#define OMPI_DB_LOCALITY ORTE_DB_LOCALITY -#define OMPI_DB_GLOBAL_RANK ORTE_DB_GLOBAL_RANK - -/* Communications */ -typedef orte_rml_tag_t ompi_rml_tag_t; -#define ompi_rte_send_buffer_nb(a, b, c, d, e) orte_rml.send_buffer_nb(a, b, c, d, e) -#define ompi_rte_recv_buffer_nb(a, b, c, d, e) orte_rml.recv_buffer_nb(a, b, c, d, e) -#define ompi_rte_recv_cancel(a, b) orte_rml.recv_cancel(a, b) -#define ompi_rte_parse_uris(a, b, c) orte_rml_base_parse_uris(a, b, c) -#define ompi_rte_send_cbfunc orte_rml_send_callback - -/* Communication tags */ -/* carry over the INVALID def */ -#define OMPI_RML_TAG_INVALID ORTE_RML_TAG_INVALID -/* define a starting point to avoid conflicts */ -#define OMPI_RML_TAG_BASE ORTE_RML_TAG_MAX - -#define OMPI_RML_PERSISTENT ORTE_RML_PERSISTENT -#define OMPI_RML_NON_PERSISTENT ORTE_RML_NON_PERSISTENT - typedef struct { ompi_rte_component_t super; opal_mutex_t lock; @@ -136,8 +121,6 @@ static inline orte_process_name_t * OMPI_CAST_RTE_NAME(opal_process_name_t * nam } #endif -#define ompi_direct_modex_cutoff orte_direct_modex_cutoff - END_C_DECLS #endif /* MCA_OMPI_RTE_ORTE_H */ diff --git a/ompi/mca/rte/orte/rte_orte_component.c b/ompi/mca/rte/orte/rte_orte_component.c index dd8f9daee17..1c6817b0bfe 100644 --- a/ompi/mca/rte/orte/rte_orte_component.c +++ b/ompi/mca/rte/orte/rte_orte_component.c @@ -1,14 +1,14 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ /* - * Copyright (c) 2012 Los Alamos National Security, LLC. All rights reserved. - * Copyright (c) 2014 Intel, Inc. All rights reserved. + * Copyright (c) 2012 Los Alamos National Security, LLC. All rights reserved. + * Copyright (c) 2014-2016 Intel, Inc. All rights reserved. * Copyright (c) 2015 Los Alamos National Security, LLC. All rights * reserved. * * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ * * These symbols are in a file by themselves to provide nice linker @@ -83,6 +83,7 @@ static int rte_orte_close(void) { opal_mutex_lock(&mca_rte_orte_component.lock); OPAL_LIST_DESTRUCT(&mca_rte_orte_component.modx_reqs); + opal_mutex_unlock(&mca_rte_orte_component.lock); OBJ_DESTRUCT(&mca_rte_orte_component.lock); return OMPI_SUCCESS; diff --git a/ompi/mca/rte/orte/rte_orte_module.c b/ompi/mca/rte/orte/rte_orte_module.c index 56fe8660a45..ce35edfda46 100644 --- a/ompi/mca/rte/orte/rte_orte_module.c +++ b/ompi/mca/rte/orte/rte_orte_module.c @@ -1,7 +1,7 @@ /* * Copyright (c) 2012-2013 Los Alamos National Security, LLC. * All rights reserved. - * Copyright (c) 2013-2014 Intel, Inc. All rights reserved + * Copyright (c) 2013-2015 Intel, Inc. All rights reserved * Copyright (c) 2012-2014 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. @@ -19,7 +19,6 @@ #include "opal/util/argv.h" #include "opal/util/proc.h" #include "opal/util/opal_getcwd.h" -#include "opal/mca/dstore/dstore.h" #include "opal/mca/pmix/pmix.h" #include "opal/threads/threads.h" #include "opal/class/opal_list.h" @@ -56,7 +55,7 @@ extern ompi_rte_orte_component_t mca_rte_orte_component; void ompi_rte_abort(int error_code, char *fmt, ...) { va_list arglist; - + /* If there was a message, output it */ va_start(arglist, fmt); if( NULL != fmt ) { @@ -66,7 +65,7 @@ void ompi_rte_abort(int error_code, char *fmt, ...) free( buffer ); } va_end(arglist); - + /* if I am a daemon or the HNP... */ if (ORTE_PROC_IS_HNP || ORTE_PROC_IS_DAEMON) { /* whack my local procs */ @@ -113,8 +112,8 @@ void ompi_rte_wait_for_debugger(void) if (1 == MPIR_being_debugged) { debugger = 1; } - - if (!debugger) { + + if (!debugger && NULL == getenv("ORTE_TEST_DEBUGGER_ATTACH")) { /* if not, just return */ return; } @@ -142,7 +141,7 @@ void ompi_rte_wait_for_debugger(void) if (0 != ORTE_PROC_MY_NAME->vpid) { return; } - + /* VPID 0 waits for a message from the HNP */ OBJ_CONSTRUCT(&xfer, orte_rml_recv_cb_t); xfer.active = true; @@ -153,4 +152,4 @@ void ompi_rte_wait_for_debugger(void) /* let the MPI progress engine run while we wait */ OMPI_WAIT_FOR_COMPLETION(xfer.active); } -} +} diff --git a/ompi/mca/rte/rte.h b/ompi/mca/rte/rte.h index 60e2e259eb8..519e3dac12d 100644 --- a/ompi/mca/rte/rte.h +++ b/ompi/mca/rte/rte.h @@ -6,7 +6,7 @@ * Copyright (c) 2014 Intel, Inc. All rights reserved. * * $COPYRIGHT$ - * + * * Additional copyrights may follow */ @@ -34,7 +34,7 @@ * is desired. * 3. OMPI_NAME_PRINT - a macro that prints a process name when given * a pointer to ompi_process_name_t. The output format is to be - * a single string representing the name. This function should + * a single string representing the name. This function should * be thread-safe for multiple threads to call simultaneously. * 4. OMPI_PROC_MY_NAME - a pointer to a global variable containing * the ompi_process_name_t for this process. Typically, this is @@ -55,7 +55,7 @@ * d. OMPI_RTE_CMP_ALL * 7. uint64_t ompi_rte_hash_name(name) - return a string hash uniquely * representing the ompi_process_name passed in. - * 8. OMPI_NAME - an Opal DSS constant for a handler already registered + * 8. OMPI_NAME - an Opal DSS constant for a handler already registered * to serialize/deserialize an ompi_process_name_t structure. * * (b) Collective objects and operations @@ -64,7 +64,7 @@ * following fields: * a. id (ORTE type: int32_t) * b. bool active - * flag that user can poll on to know when collective + * flag that user can poll on to know when collective * has completed - set to false just prior to * calling user callback function, if provided * 2. ompi_rte_modex - a function that performs an exchange of endpoint information @@ -86,7 +86,7 @@ * a. app_num - * b. pid - this process's pid. Should be same as getpid(). * c. num_procs - Number of processes in this job (ie, MCW) - * d. my_node_rank - relative rank on local node to other peers this run-time + * d. my_node_rank - relative rank on local node to other peers this run-time * instance knows about. If doing dynamics, this may be something * different than my_local_rank, but will be my_local_rank in a * static job. @@ -96,19 +96,19 @@ * g. peer_modex - a collective id for the modex operation * h. peer_init_barrier - a collective id for the barrier during MPI_Init * i. peer_fini_barrier - a collective id for the barrier during MPI_Finalize - * j. job_session_dir - + * j. job_session_dir - * k. proc_session_dir - * l. nodename - a string representation for the name of the node this * process is on * m. cpuset - * 2. ompi_process_info - a global instance of the ompi_process_t structure. - * 3. ompi_rte_proc_is_bound - global boolean that will be true if the runtime bound + * 3. ompi_rte_proc_is_bound - global boolean that will be true if the runtime bound * the process to a particular core or set of cores and is false otherwise. * * (d) Error handling objects and operations - * 1. void ompi_rte_abort(int err_code, char *fmt, ...) - Abort the current + * 1. void ompi_rte_abort(int err_code, char *fmt, ...) - Abort the current * process with the specified error code and message. - * 2. int ompi_rte_abort_peers(ompi_process_name_t *procs, size_t nprocs) - + * 2. int ompi_rte_abort_peers(ompi_process_name_t *procs, size_t nprocs) - * Abort the specified list of peers * 3. OMPI_ERROR_LOG(rc) - print error message regarding the given return code * 4. ompi_rte_register_errhandler - register a callback function for the RTE @@ -136,17 +136,17 @@ * The implementation of this function must store a COPY of the data * provided - the data is NOT guaranteed to be valid after return * from the call. - * 3. ompi_rte_db_fetch - - * NOTE: Fetch accepts an 'ompi_proc_t'. + * 3. ompi_rte_db_fetch - + * NOTE: Fetch accepts an 'ompi_proc_t'. * int ompi_rte_db_fetch(const struct ompi_proc_t *proc, * const char *key, - * void **data, + * void **data, * opal_data_type_t type); * 4. ompi_rte_db_fetch_pointer - - * NOTE: Fetch accepts an 'ompi_proc_t'. + * NOTE: Fetch accepts an 'ompi_proc_t'. * int ompi_rte_db_fetch_pointer(const struct ompi_proc_t *proc, * const char *key, - * void **data, + * void **data, * opal_data_type_t type); * 5. Pre-defined db keys (with associated values after rte_init) * a. OMPI_DB_HOSTNAME @@ -196,39 +196,6 @@ END_C_DECLS BEGIN_C_DECLS -/* Each RTE is required to define a DB key for identifying the node - * upon which a process resides, and for providing this information - * for each process - * - * #define OMPI_RTE_NODE_ID - */ - -/* Communication tags */ -#define OMPI_RML_TAG_UDAPL OMPI_RML_TAG_BASE+1 -#define OMPI_RML_TAG_OPENIB OMPI_RML_TAG_BASE+2 -#define OMPI_RML_TAG_XOPENIB OMPI_RML_TAG_BASE+3 -#define OMPI_RML_TAG_COMM_CID_INTRA OMPI_RML_TAG_BASE+4 -#define OMPI_RML_TAG_XOOB OMPI_RML_TAG_BASE+5 -#define OMPI_RML_TAG_SM_BACK_FILE_CREATED OMPI_RML_TAG_BASE+6 -#define OMPI_CRCP_COORD_BOOKMARK_TAG OMPI_RML_TAG_BASE+7 -#define OMPI_COMM_JOIN_TAG OMPI_RML_TAG_BASE+8 - -/* support for shared memory collectives */ -#define OMPI_RML_TAG_COLL_SM2_BACK_FILE_CREATED OMPI_RML_TAG_BASE+9 -/* common sm component query result index */ -#define OMPI_RML_TAG_COMMON_SM_COMP_INDEX OMPI_RML_TAG_BASE+10 - -/* OFACM RML TAGs */ -#define OMPI_RML_TAG_OFACM OMPI_RML_TAG_BASE+11 -#define OMPI_RML_TAG_XOFACM OMPI_RML_TAG_BASE+12 - -#define OMPI_RML_PCONNECT_TAG OMPI_RML_TAG_BASE+13 - -#define OMPI_RML_TAG_USNIC_CONNECTIVITY OMPI_RML_TAG_BASE+14 -#define OMPI_RML_TAG_USNIC_CONNECTIVITY_REPLY OMPI_RML_TAG_BASE+15 - -#define OMPI_RML_TAG_DYNAMIC OMPI_RML_TAG_BASE+200 - /* * MCA Framework */ @@ -240,27 +207,27 @@ OMPI_DECLSPEC extern mca_base_framework_t ompi_rte_base_framework; * progress while waiting, so we loop over opal_progress, letting * the RTE progress thread move the RTE along */ -#define OMPI_WAIT_FOR_COMPLETION(flg) \ - do { \ - opal_output_verbose(1, ompi_rte_base_framework.framework_output, \ - "%s waiting on RTE event at %s:%d", \ - OMPI_NAME_PRINT(OMPI_PROC_MY_NAME), \ - __FILE__, __LINE__); \ - while ((flg)) { \ - opal_progress(); \ - } \ +#define OMPI_WAIT_FOR_COMPLETION(flg) \ + do { \ + opal_output_verbose(1, ompi_rte_base_framework.framework_output, \ + "%s waiting on RTE event at %s:%d", \ + OMPI_NAME_PRINT(OMPI_PROC_MY_NAME), \ + __FILE__, __LINE__); \ + while ((flg)) { \ + opal_progress(); \ + } \ }while(0); -#define OMPI_LAZY_WAIT_FOR_COMPLETION(flg) \ - do { \ - opal_output_verbose(1, ompi_rte_base_framework.framework_output, \ - "%s lazy waiting on RTE event at %s:%d", \ - OMPI_NAME_PRINT(OMPI_PROC_MY_NAME), \ - __FILE__, __LINE__); \ - while ((flg)) { \ - opal_progress(); \ - usleep(100); \ - } \ +#define OMPI_LAZY_WAIT_FOR_COMPLETION(flg) \ + do { \ + opal_output_verbose(1, ompi_rte_base_framework.framework_output, \ + "%s lazy waiting on RTE event at %s:%d", \ + OMPI_NAME_PRINT(OMPI_PROC_MY_NAME), \ + __FILE__, __LINE__); \ + while ((flg)) { \ + opal_progress(); \ + usleep(100); \ + } \ }while(0); typedef struct { diff --git a/ompi/mca/sbgp/Makefile.am b/ompi/mca/sbgp/Makefile.am deleted file mode 100644 index 6e62a802e9f..00000000000 --- a/ompi/mca/sbgp/Makefile.am +++ /dev/null @@ -1,36 +0,0 @@ -# -# Copyright (c) 2009-2012 Oak Ridge National Laboratory. All rights reserved. -# Copyright (c) 2009-2012 Mellanox Technologies. All rights reserved. -# $COPYRIGHT$ -# -# Additional copyrights may follow -# -# $HEADER$ -# - - -# main library setup -noinst_LTLIBRARIES = libmca_sbgp.la -libmca_sbgp_la_SOURCES = - -# header setup -nobase_ompi_HEADERS = -nobase_nodist_ompi_HEADERS = - -# local files -headers = sbgp.h -libmca_sbgp_la_SOURCES += $(headers) $(nodist_headers) - -# Conditionally install the header files -if WANT_INSTALL_HEADERS -nobase_ompi_HEADERS += $(headers) -nobase_nodist_ompi_HEADERS += $(nodist_headers) -ompidir = $(ompiincludedir)/ompi/mca/sbgp -else -ompidir = $(includedir) -endif - -include base/Makefile.am - -distclean-local: - rm -f base/static-components.h diff --git a/ompi/mca/sbgp/base/Makefile.am b/ompi/mca/sbgp/base/Makefile.am deleted file mode 100644 index c64efb79709..00000000000 --- a/ompi/mca/sbgp/base/Makefile.am +++ /dev/null @@ -1,17 +0,0 @@ -# -# Copyright (c) 2009-2012 Oak Ridge National Laboratory. All rights reserved. -# Copyright (c) 2009-2012 Mellanox Technologies. All rights reserved. -# Copyright (c) 2012-2013 Los Alamos National Security, Inc. All rights reserved. -# $COPYRIGHT$ -# -# Additional copyrights may follow -# -# $HEADER$ -# - - -headers += \ - base/base.h -libmca_sbgp_la_SOURCES += \ - base/sbgp_base_frame.c \ - base/sbgp_base_init.c diff --git a/ompi/mca/sbgp/base/base.h b/ompi/mca/sbgp/base/base.h deleted file mode 100644 index 8b803127861..00000000000 --- a/ompi/mca/sbgp/base/base.h +++ /dev/null @@ -1,48 +0,0 @@ -/* - * Copyright (c) 2009-2012 Oak Ridge National Laboratory. All rights reserved. - * Copyright (c) 2009-2012 Mellanox Technologies. All rights reserved. - * Copyright (c) 2012-2013 Los Alamos National Security, Inc. All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#ifndef MCA_SBGP_BASE_H -#define MCA_SBGP_BASE_H - -#include "ompi_config.h" - -#include "ompi/mca/mca.h" -#include "opal/mca/base/mca_base_framework.h" -/* - * Global functions for SBGP - */ - -/* components in use */ -OMPI_MODULE_DECLSPEC extern opal_list_t mca_sbgp_base_components_in_use; -OMPI_MODULE_DECLSPEC extern int mca_sbgp_base_components_in_use_inited; -OMPI_DECLSPEC extern char *ompi_sbgp_subgroups_string; - -BEGIN_C_DECLS - -/* - * MCA Framework - */ -OMPI_DECLSPEC extern mca_base_framework_t ompi_sbgp_base_framework; - -/* select a component */ -OMPI_DECLSPEC int mca_sbgp_base_init(bool, bool); - -/* subgrouping component and key value */ -struct sbgp_base_component_keyval_t { - mca_base_component_list_item_t component; - char *key_value; -}; -typedef struct sbgp_base_component_keyval_t sbgp_base_component_keyval_t; -OBJ_CLASS_DECLARATION(sbgp_base_component_keyval_t); - -END_C_DECLS - -#endif /* MCA_SBGP_BASE_H */ diff --git a/ompi/mca/sbgp/base/owner.txt b/ompi/mca/sbgp/base/owner.txt deleted file mode 100644 index 55663d3bb8a..00000000000 --- a/ompi/mca/sbgp/base/owner.txt +++ /dev/null @@ -1,7 +0,0 @@ -# -# owner/status file -# owner: institution that is responsible for this package -# status: e.g. active, maintenance, unmaintained -# -owner: ? -status: ? diff --git a/ompi/mca/sbgp/base/sbgp_base_close.c b/ompi/mca/sbgp/base/sbgp_base_close.c deleted file mode 100644 index cc7dd26c4e7..00000000000 --- a/ompi/mca/sbgp/base/sbgp_base_close.c +++ /dev/null @@ -1,40 +0,0 @@ -/* - * Copyright (c) 2009-2012 Oak Ridge National Laboratory. All rights reserved. - * Copyright (c) 2009-2012 Mellanox Technologies. All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - - -#include "ompi_config.h" - -#include - -#include "ompi/constants.h" -#include "ompi/mca/mca.h" -#include "opal/mca/base/base.h" -#include "ompi/mca/sbgp/sbgp.h" -#include "ompi/mca/sbgp/base/base.h" -#include "ompi/include/ompi/constants.h" - - -int mca_sbgp_base_close(void) -{ - - /* Close all remaining available modules */ - - mca_base_components_close(ompi_sbgp_base_framework.framework_output, - &mca_sbgp_base_components_opened, NULL); - - /* Close the framework output */ - opal_output_close (ompi_sbgp_base_framework.framework_output); - ompi_sbgp_base_framework.framework_output = -1; - - /* All done */ - - return OMPI_SUCCESS; -} - diff --git a/ompi/mca/sbgp/base/sbgp_base_frame.c b/ompi/mca/sbgp/base/sbgp_base_frame.c deleted file mode 100644 index a0091e35328..00000000000 --- a/ompi/mca/sbgp/base/sbgp_base_frame.c +++ /dev/null @@ -1,205 +0,0 @@ -/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ -/* - * Copyright (c) 2009-2012 Oak Ridge National Laboratory. All rights reserved. - * Copyright (c) 2009-2012 Mellanox Technologies. All rights reserved. - * Copyright (c) 2012-2014 Los Alamos National Security, Inc. All rights reserved. - * Copyright (c) 2015 Research Organization for Information Science - * and Technology (RIST). All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#include "ompi_config.h" -#include - -#ifdef HAVE_UNISTD_H -#include -#endif /* HAVE_UNIST_H */ -#include "ompi/mca/mca.h" -#include "opal/mca/base/base.h" - -#include "ompi/mca/sbgp/sbgp.h" -#include "ompi/mca/sbgp/base/base.h" -#include "ompi/include/ompi/constants.h" -#include "opal/util/argv.h" - -/* - * The following file was created by configure. It contains extern - * statements and the definition of an array of pointers to each - * component's public mca_base_component_t struct. - */ - -#include "ompi/mca/sbgp/base/static-components.h" - -/* -** * Global variables -** */ -opal_list_t mca_sbgp_base_components_in_use = {{0}}; -int mca_sbgp_base_components_in_use_inited=0; -OMPI_DECLSPEC char *ompi_sbgp_subgroups_string = NULL; - -static void mca_sbgp_base_destruct (mca_sbgp_base_module_t *module) -{ - /* free the list of ranks */ - if(module->group_list ) { - free(module->group_list); - module->group_list=NULL; - } -} - -OBJ_CLASS_INSTANCE(mca_sbgp_base_module_t, - opal_object_t, - NULL, - mca_sbgp_base_destruct); - -OBJ_CLASS_INSTANCE(sbgp_base_component_keyval_t, - mca_base_component_list_item_t, - NULL, - NULL); - -/* get list of subgrouping coponents to use */ -static int ompi_sbgp_set_components_to_use(opal_list_t *sbgp_components_avail, - opal_list_t *sbgp_components_in_use) -{ - /* local variables */ - const mca_base_component_t *component; - mca_base_component_list_item_t *cli; - sbgp_base_component_keyval_t *clj; - char **subgroups_requested = NULL, **sbgp_string = NULL; - char *sbgp_component, *sbgp_key; - const char *component_name; - int i, sbgp_size = 0, - sbgp_string_size = 0, - rc = OMPI_SUCCESS; - - /* split the list of requested subgroups */ - subgroups_requested = opal_argv_split(ompi_sbgp_subgroups_string, ','); - if(NULL == subgroups_requested) { - return OMPI_ERROR; - } - sbgp_size = opal_argv_count (subgroups_requested); - - /* Initialize list */ - OBJ_CONSTRUCT(sbgp_components_in_use, opal_list_t); - - /* loop over list of components requested */ - for (i = 0; i < sbgp_size; i++) { - /* get key-value */ - sbgp_string = opal_argv_split(subgroups_requested[i], ':'); - if (NULL == sbgp_string) { - rc = OMPI_ERR_OUT_OF_RESOURCE; - break; - } - - sbgp_string_size = opal_argv_count (sbgp_string); - if (sbgp_string_size < 1 || sbgp_string_size > 2) { - opal_output(ompi_sbgp_base_framework.framework_output, - "Requested SBGP configuration is illegal %s", - subgroups_requested[i]); - opal_argv_free (sbgp_string); - rc = OMPI_ERROR; - break; - } - - /* it is garanteed that sbgp_string[1] will either be NULL (count = 1) or a string */ - sbgp_key = sbgp_string[1]; - sbgp_component = sbgp_string[0]; - - /* loop over discovered components */ - OPAL_LIST_FOREACH(cli, sbgp_components_avail, mca_base_component_list_item_t) { - component = cli->cli_component; - component_name = component->mca_component_name; - - /* key_value[0] has the component name, and key_value[1], if - ** it is not NULL, has the key_value associated with this - ** instance of the compoenent - */ - - if (0 == strcmp (component_name, sbgp_component)) { - /* found selected component */ - clj = OBJ_NEW(sbgp_base_component_keyval_t); - if (NULL == clj) { - rc = OPAL_ERR_OUT_OF_RESOURCE; - opal_argv_free (sbgp_string); - goto exit_ERROR; - } - /* fprintf(stderr,"sbgp selecting %s %s\n", sbgp_component, component_name); */ - - clj->component.cli_component = component; - if (NULL != sbgp_key) { - clj->key_value = strdup(sbgp_key); - } else { - clj->key_value = NULL; - } - opal_list_append(sbgp_components_in_use, (opal_list_item_t *)clj); - break; - } - } - - opal_argv_free (sbgp_string); - } - - /* Note: Need to add error checking to make sure all requested functions - ** were found */ - - /* - ** release resources - ** */ - exit_ERROR: - opal_argv_free (subgroups_requested); - - return rc; -} - -static int mca_sbgp_base_register(mca_base_register_flag_t flags) -{ - /* get list of sub-grouping functions to use */ - ompi_sbgp_subgroups_string = "basesmsocket,basesmuma,ibnet,p2p"; - (void) mca_base_var_register("ompi", "sbgp", "base", "subgroups_string", - "Default set of subgroup operations to apply ", - MCA_BASE_VAR_TYPE_STRING, NULL, 0, 0, - OPAL_INFO_LVL_9, - MCA_BASE_VAR_SCOPE_LOCAL, - &ompi_sbgp_subgroups_string); - - return OMPI_SUCCESS; -} - -static int mca_sbgp_base_close(void) -{ - opal_list_item_t *item; - - while (NULL != (item = opal_list_remove_first (&mca_sbgp_base_components_in_use))) { - OBJ_RELEASE(item); - } - - OBJ_DESTRUCT(&mca_sbgp_base_components_in_use); - - return mca_base_framework_components_close(&ompi_sbgp_base_framework, NULL); -} - -/** - * Function for finding and opening either all MCA components, or the one - * that was specifically requested via a MCA parameter. - */ -static int mca_sbgp_base_open(mca_base_open_flag_t flags) -{ - int ret; - - if (OMPI_SUCCESS != (ret = mca_base_framework_components_open(&ompi_sbgp_base_framework, flags))) { - return ret; - } - - ret = ompi_sbgp_set_components_to_use(&ompi_sbgp_base_framework.framework_components, - &mca_sbgp_base_components_in_use); - - return ret; -} - -MCA_BASE_FRAMEWORK_DECLARE(ompi, sbgp, "OMPI Subgroup Subsystem", mca_sbgp_base_register, - mca_sbgp_base_open, mca_sbgp_base_close, - mca_sbgp_base_static_components, 0); - diff --git a/ompi/mca/sbgp/base/sbgp_base_init.c b/ompi/mca/sbgp/base/sbgp_base_init.c deleted file mode 100644 index d1f66da9b53..00000000000 --- a/ompi/mca/sbgp/base/sbgp_base_init.c +++ /dev/null @@ -1,42 +0,0 @@ -/* - * Copyright (c) 2009-2012 Oak Ridge National Laboratory. All rights reserved. - * Copyright (c) 2009-2012 Mellanox Technologies. All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#include "ompi_config.h" - -#include "ompi/mca/mca.h" -#include "opal/mca/base/base.h" -#include "ompi/mca/sbgp/sbgp.h" -#include "ompi/mca/sbgp/base/base.h" -#include "ompi/include/ompi/constants.h" - -int mca_sbgp_base_init(bool enable_progress_threads, bool enable_mpi_threads) -{ - mca_sbgp_base_component *sbgp_component = NULL; - mca_base_component_list_item_t *cli; - opal_list_item_t *item; - int ret; - - /* loop over component initialization functions */ - for (item = opal_list_get_first((opal_list_t *) &mca_sbgp_base_components_in_use); - opal_list_get_end((opal_list_t *) &mca_sbgp_base_components_in_use) != item; - item = opal_list_get_next(item)) { - - cli = (mca_base_component_list_item_t *) item; - sbgp_component = (mca_sbgp_base_component *)cli->cli_component; - - ret = sbgp_component->sbgp_init_query(true, true); - if( OMPI_SUCCESS != ret) { - return ret; - } - } - - return OMPI_SUCCESS; -} - diff --git a/ompi/mca/sbgp/basesmsocket/Makefile.am b/ompi/mca/sbgp/basesmsocket/Makefile.am deleted file mode 100644 index d08cd5c14c4..00000000000 --- a/ompi/mca/sbgp/basesmsocket/Makefile.am +++ /dev/null @@ -1,41 +0,0 @@ -# -# Copyright (c) 2009-2012 Oak Ridge National Laboratory. All rights reserved. -# Copyright (c) 2009-2012 Mellanox Technologies. All rights reserved. -# Copyright (c) 2015 Cisco Systems, Inc. All rights reserved. -# $COPYRIGHT$ -# -# Additional copyrights may follow -# -# $HEADER$ -# - -sources = \ - sbgp_basesmsocket.h \ - sbgp_basesmsocket_component.c \ - sbgp_basesmsocket_module.c - - -# Make the output library in this directory, and name it either -# mca__.la (for DSO builds) or libmca__.la -# (for static builds). - -component_noinst = -component_install = -if MCA_BUILD_ompi_sbgp_basesmsocket_DSO -component_install += mca_sbgp_basesmsocket.la -else -component_noinst += libmca_sbgp_basesmsocket.la -endif - -# See ompi/mca/btl/sm/Makefile.am for an explanation of -# libmca_common_sm.la. - -mcacomponentdir = $(ompilibdir) -mcacomponent_LTLIBRARIES = $(component_install) -mca_sbgp_basesmsocket_la_SOURCES = $(sources) -mca_sbgp_basesmsocket_la_LDFLAGS = -module -avoid-version -mca_sbgp_basesmsocket_la_LIBADD = - -noinst_LTLIBRARIES = $(component_noinst) -libmca_sbgp_basesmsocket_la_SOURCES =$(sources) -libmca_sbgp_basesmsocket_la_LDFLAGS = -module -avoid-version diff --git a/ompi/mca/sbgp/basesmsocket/configure.m4 b/ompi/mca/sbgp/basesmsocket/configure.m4 deleted file mode 100644 index afc86046737..00000000000 --- a/ompi/mca/sbgp/basesmsocket/configure.m4 +++ /dev/null @@ -1,19 +0,0 @@ -# -*- shell-script -*- -# -# Copyright (c) 2012 Los Alamos National Security, LLC. -# All rights reserved. -# $COPYRIGHT$ -# -# Additional copyrights may follow -# -# $HEADER$ -# -# MCA_sbgp_basesmsocket_CONFIG([action-if-found], [action-if-not-found]) -# ----------------------------------------------------------- -AC_DEFUN([MCA_ompi_sbgp_basesmsocket_CONFIG], [ - AC_CONFIG_FILES([ompi/mca/sbgp/basesmsocket/Makefile]) - - AS_IF([test "$OPAL_HAVE_HWLOC" = 1], - [$1], - [$2]) -]) diff --git a/ompi/mca/sbgp/basesmsocket/owner.txt b/ompi/mca/sbgp/basesmsocket/owner.txt deleted file mode 100644 index 55663d3bb8a..00000000000 --- a/ompi/mca/sbgp/basesmsocket/owner.txt +++ /dev/null @@ -1,7 +0,0 @@ -# -# owner/status file -# owner: institution that is responsible for this package -# status: e.g. active, maintenance, unmaintained -# -owner: ? -status: ? diff --git a/ompi/mca/sbgp/basesmsocket/sbgp_basesmsocket.h b/ompi/mca/sbgp/basesmsocket/sbgp_basesmsocket.h deleted file mode 100644 index 739f913335e..00000000000 --- a/ompi/mca/sbgp/basesmsocket/sbgp_basesmsocket.h +++ /dev/null @@ -1,81 +0,0 @@ -/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ -/* - * Copyright (c) 2009-2012 Oak Ridge National Laboratory. All rights reserved. - * Copyright (c) 2009-2012 Mellanox Technologies. All rights reserved. - * Copyright (c) 2014 Los Alamos National Security, LLC. All rights - * reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#ifndef MCA_BCOL_basesmsocket_EXPORT_H -#define MCA_BCOL_basesmsocket_EXPORT_H - -#include "ompi_config.h" - -#include "mpi.h" -#include "ompi/mca/mca.h" -#include "ompi/mca/sbgp/sbgp.h" -#include "ompi/mca/sbgp/base/base.h" -#include "opal/mca/mpool/mpool.h" -#include "ompi/request/request.h" -#include "ompi/proc/proc.h" -#include "opal/util/output.h" - -BEGIN_C_DECLS - -#ifdef HAVE_SCHED_YIELD -# include -# define SPIN sched_yield() -#else /* no switch available */ -# define SPIN -#endif - -#define BASESMSOCKET_VERBOSE(level, ...) \ - do { \ - OPAL_OUTPUT_VERBOSE((ompi_sbgp_base_framework.framework_output, level, \ - __VA_ARGS__)); \ - } while(0); - -/** - * Structure to hold the basic shared memory coll component. First it holds the - * base coll component, and then holds a bunch of - * sm-coll-component-specific stuff (e.g., current MCA param - * values). - */ -struct mca_sbgp_basesmsocket_component_t { - /** Base coll component */ - mca_sbgp_base_component_2_0_0_t super; -}; - -/** - * Convenience typedef - */ -typedef struct mca_sbgp_basesmsocket_component_t - mca_sbgp_basesmsocket_component_t; - - -/* -** Base sub-group module -**/ - -struct mca_sbgp_basesmsocket_module_t { - /** Collective modules all inherit from opal_object */ - mca_sbgp_base_module_t super; - -}; -typedef struct mca_sbgp_basesmsocket_module_t mca_sbgp_basesmsocket_module_t; -OBJ_CLASS_DECLARATION(mca_sbgp_basesmsocket_module_t); - -/** -* Global component instance -*/ -OMPI_MODULE_DECLSPEC extern mca_sbgp_basesmsocket_component_t mca_sbgp_basesmsocket_component; - - -END_C_DECLS - -#endif /* MCA_BCOL_basesmsocket_EXPORT_H */ diff --git a/ompi/mca/sbgp/basesmsocket/sbgp_basesmsocket_component.c b/ompi/mca/sbgp/basesmsocket/sbgp_basesmsocket_component.c deleted file mode 100644 index 79307da19fa..00000000000 --- a/ompi/mca/sbgp/basesmsocket/sbgp_basesmsocket_component.c +++ /dev/null @@ -1,305 +0,0 @@ -/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ -/* - * Copyright (c) 2009-2012 Oak Ridge National Laboratory. All rights reserved. - * Copyright (c) 2009-2012 Mellanox Technologies. All rights reserved. - * Copyright (c) 2013-2014 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2014-2015 Los Alamos National Security, LLC. All rights - * reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -/** - * @file - * - */ - -#include "ompi_config.h" - -#ifdef HAVE_UNISTD_H -#include -#endif -#include -#ifdef HAVE_SYS_MMAN_H -#include -#endif -#ifdef HAVE_FCNTL_H -#include -#endif - -#include "opal/mca/hwloc/hwloc.h" -#include "opal/mca/hwloc/base/base.h" -#include "opal/dss/dss_internal.h" -#include "opal/class/opal_object.h" - -#include "ompi/constants.h" -#include "ompi/communicator/communicator.h" -#include "sbgp_basesmsocket.h" - -#include "ompi/patterns/comm/coll_ops.h" - - -/* - * Public string showing the coll ompi_sm V2 component version number - */ -const char *mca_sbgp_basesmsocket_component_version_string = - "Open MPI sbgp - basesmsocket collective MCA component version " OMPI_VERSION; - - -/* - * Local functions - */ - -static int basesmsocket_register(void); -static int basesmsocket_open(void); -static int basesmsocket_close(void); -static mca_sbgp_base_module_t *mca_sbgp_basesmsocket_select_procs(struct ompi_proc_t ** procs, - int n_procs_in, - struct ompi_communicator_t *comm, - char *key, - void *output_data - ); -static int mca_sbgp_basesmsocket_init_query(bool enable_progress_threads, - bool enable_mpi_threads); -/*----end local functions ----*/ - -/* - * Instantiate the public struct with all of our public information - * and pointers to our public functions in it - */ - -mca_sbgp_basesmsocket_component_t mca_sbgp_basesmsocket_component = { - - /* First, fill in the super */ - - { - /* First, the mca_component_t struct containing meta - information about the component itself */ - - .sbgp_version = { - MCA_SBGP_BASE_VERSION_2_0_0, - - /* Component name and version */ - - .mca_component_name = "basesmsocket", - MCA_BASE_MAKE_VERSION(component, OMPI_MAJOR_VERSION, OMPI_MINOR_VERSION, - OMPI_RELEASE_VERSION), - - /* Component open and close functions */ - - .mca_open_component = basesmsocket_open, - .mca_close_component = basesmsocket_close, - .mca_register_component_params = basesmsocket_register, - }, - - .sbgp_init_query = mca_sbgp_basesmsocket_init_query, - .select_procs = mca_sbgp_basesmsocket_select_procs, - .priority = 0, - } -}; - -/* - * Register the component - */ -static int basesmsocket_register(void) -{ - mca_sbgp_basesmsocket_component_t *cs = &mca_sbgp_basesmsocket_component; - - cs->super.priority = 90; - (void) mca_base_component_var_register(&mca_sbgp_basesmsocket_component.super.sbgp_version, - "priority", "Priority for the sbgp basesmsocket component", - MCA_BASE_VAR_TYPE_INT, NULL, 0, 0, - OPAL_INFO_LVL_9, - MCA_BASE_VAR_SCOPE_READONLY, &cs->super.priority); - - return OMPI_SUCCESS; -} - -/* - * Open the component - */ -static int basesmsocket_open(void) -{ - return OMPI_SUCCESS; -} - -/* - * Close the component - */ -static int basesmsocket_close(void) -{ - return OMPI_SUCCESS; -} - -/* query to see if the component is available for use, and can - * satisfy the thread and progress requirements - */ -int mca_sbgp_basesmsocket_init_query(bool enable_progress_threads, - bool enable_mpi_threads) -{ - /* at this stage there is no reason to disaulify this component */ - - /* done */ - return OMPI_SUCCESS; -} - -#if 0 -/* NTH: this is no longer used but may be used if we can determine the binding policy*/ -static int mca_sbgp_map_to_logical_socket_id(int *socket) -{ - int ret = OMPI_SUCCESS; - hwloc_obj_t obj; - hwloc_obj_t first_pu_object; - hwloc_bitmap_t good; - int pu_os_index = -1, my_logical_socket_id = -1; - int this_pus_logical_socket_id = -1; - - *socket = my_logical_socket_id; - - /* bozo check */ - if (NULL == opal_hwloc_topology) { - return OPAL_ERR_NOT_INITIALIZED; - } - - good = hwloc_bitmap_alloc(); - if (NULL == good) { - return OPAL_ERR_OUT_OF_RESOURCE; - } - - /* get this process' CPU binding */ - if( 0 != hwloc_get_cpubind(opal_hwloc_topology,good, 0)){ - /* report some error */ - BASESMSOCKET_VERBOSE(10, "The global variable opal_hwloc_topology appears not to have been initialized\n"); - hwloc_bitmap_free(good); - return OMPI_ERROR; - } - - /* find the first logical PU object in the hwloc tree */ - first_pu_object = hwloc_get_obj_by_type(opal_hwloc_topology, HWLOC_OBJ_PU, 0); - - - /* get the next bit in the bitmap (note: if pu_os_index == -1, then the - * first bit is returned - */ - /* traverse the hwloc tree */ - while( -1 != (pu_os_index = hwloc_bitmap_next(good, pu_os_index) ) ) { - /* Traverse all PUs in the machine in logical order, in the simple case - * there should only be a single PU that this process is bound to, right? - * - */ - for( obj = first_pu_object; obj != NULL; obj = obj->next_cousin ) {/* WTF is a "next_cousin" ? */ - /* is this PU the same as the bit I pulled off the mask? */ - if( obj->os_index == (unsigned int) pu_os_index) { - /* Then I found it, break out of for loop */ - break; - } - } - - if( NULL != obj) { - /* if we found the PU, then go upward in the tree - * looking for the enclosing socket - */ - while( (NULL != obj) && ( HWLOC_OBJ_SOCKET != obj->type) ){ - obj = obj->parent; - } - - if( NULL == obj ) { - /* then we couldn't find an enclosing socket, report this */ - } else { - /* We found the enclosing socket */ - if( -1 == my_logical_socket_id ){ - /* this is the first PU that I'm bound to */ - this_pus_logical_socket_id = obj->logical_index; - my_logical_socket_id = this_pus_logical_socket_id; - } else { - /* this is not the first PU that I'm bound to. - * Seems I'm bound to more than a single PU. Question - * is, am I bound to the same socket?? - */ - /* in order to get rid of the compiler warning, I had to cast - * "this_pus_logical_socket_id", at a glance this seems ok, - * but if subgrouping problems arise, maybe look here. I shall - * tag this line with the "mark of the beast" for grepability - * 666 - */ - if( (unsigned int) this_pus_logical_socket_id != obj->logical_index ){ - /* 666 */ - /* Then we're bound to more than one socket...fail */ - this_pus_logical_socket_id = -1; - my_logical_socket_id = -1; - break; - } - } - } - - } - - /* end while */ - } - *socket = my_logical_socket_id; - hwloc_bitmap_free(good); - - return ret; - -} -#endif - -/* This routine is used to find the list of procs that run on the -** same host as the calling process. -*/ - -static mca_sbgp_base_module_t *mca_sbgp_basesmsocket_select_procs(struct ompi_proc_t ** procs, - int n_procs_in, - struct ompi_communicator_t *comm, - char *key, - void *output_data - ) -{ - /* local variables */ - mca_sbgp_basesmsocket_module_t *module; - int proc, cnt, n_local_peers; - - /* initialize data */ - for (proc = 0, n_local_peers = 0 ; proc < n_procs_in ; ++proc) { - if (OPAL_PROC_ON_LOCAL_SOCKET(procs[proc]->super.proc_flags)) { - n_local_peers++; - } - } - - /* we need to return a module even if there is only one local peer. this - * covers the case where there may be a basesmsocket module on one rank - * but not another */ - if (0 == n_local_peers) { - return NULL; - } - - /* create a new module */ - module = OBJ_NEW(mca_sbgp_basesmsocket_module_t); - if (!module) { - return NULL; - } - - module->super.group_size = n_local_peers; - module->super.group_comm = comm; - module->super.group_list = NULL; - module->super.group_net = OMPI_SBGP_SOCKET; - - /* allocate memory and fill in the group_list */ - module->super.group_list = (int *) calloc (n_local_peers, sizeof(int)); - if (NULL == module->super.group_list) { - OBJ_RELEASE(module); - return NULL; - } - - for (proc = 0, cnt = 0 ; proc < n_procs_in ; ++proc) { - if (OPAL_PROC_ON_LOCAL_SOCKET(procs[proc]->super.proc_flags)) { - module->super.group_list[cnt++] = proc; - } - } - - /* Return the module */ - return (mca_sbgp_base_module_t *) module; -} diff --git a/ompi/mca/sbgp/basesmsocket/sbgp_basesmsocket_module.c b/ompi/mca/sbgp/basesmsocket/sbgp_basesmsocket_module.c deleted file mode 100644 index 7f075eecdd8..00000000000 --- a/ompi/mca/sbgp/basesmsocket/sbgp_basesmsocket_module.c +++ /dev/null @@ -1,35 +0,0 @@ -/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ -/* - * Copyright (c) 2009-2012 Oak Ridge National Laboratory. All rights reserved. - * Copyright (c) 2009-2012 Mellanox Technologies. All rights reserved. - * Copyright (c) 2014 Los Alamos National Security, LLC. All rights - * reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -/** - * @file - * - */ - -#include "ompi_config.h" -#ifdef HAVE_UNISTD_H -#include -#endif -#include -#ifdef HAVE_SYS_MMAN_H -#include -#endif -#include -#include - -#include "ompi/constants.h" -#include "ompi/communicator/communicator.h" -#include "ompi/mca/sbgp/basesmsocket/sbgp_basesmsocket.h" - -OBJ_CLASS_INSTANCE(mca_sbgp_basesmsocket_module_t, - mca_sbgp_base_module_t, NULL, NULL); diff --git a/ompi/mca/sbgp/basesmuma/Makefile.am b/ompi/mca/sbgp/basesmuma/Makefile.am deleted file mode 100644 index 1547f8e9505..00000000000 --- a/ompi/mca/sbgp/basesmuma/Makefile.am +++ /dev/null @@ -1,41 +0,0 @@ -# -# Copyright (c) 2009-2012 Oak Ridge National Laboratory. All rights reserved. -# Copyright (c) 2009-2012 Mellanox Technologies. All rights reserved. -# Copyright (c) 2015 Cisco Systems, Inc. All rights reserved. -# $COPYRIGHT$ -# -# Additional copyrights may follow -# -# $HEADER$ -# - -sources = \ - sbgp_basesmuma.h \ - sbgp_basesmuma_component.c \ - sbgp_basesmuma_module.c - - -# Make the output library in this directory, and name it either -# mca__.la (for DSO builds) or libmca__.la -# (for static builds). - -component_noinst = -component_install = -if MCA_BUILD_ompi_sbgp_basesmuma_DSO -component_install += mca_sbgp_basesmuma.la -else -component_noinst += libmca_sbgp_basesmuma.la -endif - -# See ompi/mca/btl/sm/Makefile.am for an explanation of -# libmca_common_sm.la. - -mcacomponentdir = $(ompilibdir) -mcacomponent_LTLIBRARIES = $(component_install) -mca_sbgp_basesmuma_la_SOURCES = $(sources) -mca_sbgp_basesmuma_la_LDFLAGS = -module -avoid-version -mca_sbgp_basesmuma_la_LIBADD = - -noinst_LTLIBRARIES = $(component_noinst) -libmca_sbgp_basesmuma_la_SOURCES =$(sources) -libmca_sbgp_basesmuma_la_LDFLAGS = -module -avoid-version diff --git a/ompi/mca/sbgp/basesmuma/owner.txt b/ompi/mca/sbgp/basesmuma/owner.txt deleted file mode 100644 index 55663d3bb8a..00000000000 --- a/ompi/mca/sbgp/basesmuma/owner.txt +++ /dev/null @@ -1,7 +0,0 @@ -# -# owner/status file -# owner: institution that is responsible for this package -# status: e.g. active, maintenance, unmaintained -# -owner: ? -status: ? diff --git a/ompi/mca/sbgp/basesmuma/sbgp_basesmuma.h b/ompi/mca/sbgp/basesmuma/sbgp_basesmuma.h deleted file mode 100644 index efe501e0466..00000000000 --- a/ompi/mca/sbgp/basesmuma/sbgp_basesmuma.h +++ /dev/null @@ -1,73 +0,0 @@ -/* - * Copyright (c) 2009-2012 Oak Ridge National Laboratory. All rights reserved. - * Copyright (c) 2009-2012 Mellanox Technologies. All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - - -#ifndef MCA_BCOL_basesmuma_EXPORT_H -#define MCA_BCOL_basesmuma_EXPORT_H - -#include "ompi_config.h" - -#include "mpi.h" -#include "ompi/mca/mca.h" -#include "ompi/mca/sbgp/sbgp.h" -#include "opal/mca/mpool/mpool.h" -#include "ompi/request/request.h" -#include "ompi/proc/proc.h" - -BEGIN_C_DECLS - -#ifdef HAVE_SCHED_YIELD -# include -# define SPIN sched_yield() -#else /* no switch available */ -# define SPIN -#endif - - - /** - * Structure to hold the basic shared memory coll component. First it holds the - * base coll component, and then holds a bunch of - * sm-coll-component-specific stuff (e.g., current MCA param - * values). - */ - struct mca_sbgp_basesmuma_component_t { - /** Base coll component */ - mca_sbgp_base_component_2_0_0_t super; - - }; - - /** - * Convenience typedef - */ - typedef struct mca_sbgp_basesmuma_component_t - mca_sbgp_basesmuma_component_t; - - - /* - ** Base sub-group module - **/ - - struct mca_sbgp_basesmuma_module_t { - /** Collective modules all inherit from opal_object */ - mca_sbgp_base_module_t super; - - }; - typedef struct mca_sbgp_basesmuma_module_t mca_sbgp_basesmuma_module_t; - OBJ_CLASS_DECLARATION(mca_sbgp_basesmuma_module_t); - - /** - * Global component instance - */ - OMPI_MODULE_DECLSPEC extern mca_sbgp_basesmuma_component_t mca_sbgp_basesmuma_component; - - -END_C_DECLS - -#endif /* MCA_BCOL_basesmuma_EXPORT_H */ diff --git a/ompi/mca/sbgp/basesmuma/sbgp_basesmuma_component.c b/ompi/mca/sbgp/basesmuma/sbgp_basesmuma_component.c deleted file mode 100644 index 4c6e2328600..00000000000 --- a/ompi/mca/sbgp/basesmuma/sbgp_basesmuma_component.c +++ /dev/null @@ -1,208 +0,0 @@ -/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ -/* - * Copyright (c) 2009-2012 Oak Ridge National Laboratory. All rights reserved. - * Copyright (c) 2009-2012 Mellanox Technologies. All rights reserved. - * Copyright (c) 2013-2015 Los Alamos National Security, LLC. All rights - * reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -/** - * @file - * - */ - -#include "ompi_config.h" -#ifdef HAVE_UNISTD_H -#include -#endif -#include -#ifdef HAVE_SYS_MMAN_H -#include -#endif -#include - -#include "ompi/constants.h" -#include "ompi/communicator/communicator.h" -#include "sbgp_basesmuma.h" - - -/* - * Public string showing the coll ompi_sm V2 component version number - */ -const char *mca_sbgp_basesmuma_component_version_string = - "Open MPI sbgp - basesmuma collective MCA component version " OMPI_VERSION; - - -/* - * Local functions - */ - -static int basesmuma_register(void); -static int basesmuma_open(void); -static int basesmuma_close(void); -static mca_sbgp_base_module_t *mca_sbgp_basesmuma_select_procs(struct ompi_proc_t ** procs, - int n_procs_in, struct ompi_communicator_t *comm, char *key, void *output_data); - -static int mca_sbgp_basesmuma_init_query(bool enable_progress_threads, - bool enable_mpi_threads); - -/* - * Instantiate the public struct with all of our public information - * and pointers to our public functions in it - */ - -mca_sbgp_basesmuma_component_t mca_sbgp_basesmuma_component = { - - /* First, fill in the super */ - - { - /* First, the mca_component_t struct containing meta - information about the component itself */ - - .sbgp_version = { - MCA_SBGP_BASE_VERSION_2_0_0, - - /* Component name and version */ - - .mca_component_name = "basesmuma", - MCA_BASE_MAKE_VERSION(component, OMPI_MAJOR_VERSION, OMPI_MINOR_VERSION, - OMPI_RELEASE_VERSION), - - /* Component open, close, and register functions */ - - .mca_open_component = basesmuma_open, - .mca_close_component = basesmuma_close, - .mca_register_component_params = basesmuma_register, - }, - .sbgp_init_query = mca_sbgp_basesmuma_init_query, - .select_procs = mca_sbgp_basesmuma_select_procs, - .priority = 0, - } -}; - -/* - * Register the component - */ -static int basesmuma_register(void) -{ - mca_sbgp_basesmuma_component_t *cs = &mca_sbgp_basesmuma_component; - - /* set component priority */ - cs->super.priority = 90; - (void) mca_base_component_var_register(&cs->super.sbgp_version, - "priority", "Priority of the sbgp basesmuma", - MCA_BASE_VAR_TYPE_INT, NULL, 0, 0, - OPAL_INFO_LVL_9, - MCA_BASE_VAR_SCOPE_READONLY, - &cs->super.priority); - return OMPI_SUCCESS; -} - -/* - * Open the component - */ -static int basesmuma_open(void) -{ - return OMPI_SUCCESS; -} - - -/* - * Close the component - */ -static int basesmuma_close(void) -{ - return OMPI_SUCCESS; -} - -/* query to see if the component is available for use, and can - * satisfy the thread and progress requirements - */ -int mca_sbgp_basesmuma_init_query(bool enable_progress_threads, - bool enable_mpi_threads) -{ - /* at this stage there is no reason to disaulify this component */ - - /* done */ - return OMPI_SUCCESS; -} - -/* This routine is used to find the list of procs that run on the -** same host as the calling process. -*/ -static mca_sbgp_base_module_t *mca_sbgp_basesmuma_select_procs(struct ompi_proc_t ** procs, - int n_procs_in, - struct ompi_communicator_t *comm, - char *key, - void *output_data - ) -{ - /* local variables */ - int cnt,proc,local,last_local_proc; - mca_sbgp_basesmuma_module_t *module; - - module=OBJ_NEW(mca_sbgp_basesmuma_module_t); - if (!module ) { - return NULL; - } - module->super.group_size=0; - module->super.group_comm = comm; - module->super.group_list = NULL; - module->super.group_net = OMPI_SBGP_MUMA; - for (proc = 0, cnt = 0, last_local_proc = 0 ; proc < n_procs_in ; ++proc) { - local = OPAL_PROC_ON_LOCAL_NODE(procs[proc]->super.proc_flags); - if (local) { - last_local_proc = proc; - cnt++; - } - } - /* if no other local procs found skip to end */ - - if( 2 > cnt ) { - /* There's always at least one - namely myself */ - assert(1 == cnt); - module->super.group_size = 1; - module->super.group_list = (int *) malloc (sizeof (int)); - module->super.group_list[0] = last_local_proc; - /* let ml handle this case */ - goto OneLocalPeer; - } - - /* generate list of local ranks */ - module->super.group_size=cnt; - if( cnt > 0 ) { - module->super.group_list=(int *)malloc(sizeof(int)*cnt); - if(NULL == module->super.group_list){ - goto Error; - } - } - - for (proc = 0, cnt = 0 ; proc < n_procs_in ; ++proc) { - local = OPAL_PROC_ON_LOCAL_NODE(procs[proc]->super.proc_flags); - if( local ) { - module->super.group_list[cnt++] = proc; - } - } -OneLocalPeer: - /* successful completion */ - return (mca_sbgp_base_module_t *)module; - - /* return with error */ - -Error: - - /* clean up */ - if( NULL != module->super.group_list ) { - free(module->super.group_list); - module->super.group_list=NULL; - } - - OBJ_RELEASE(module); - - return NULL; -} diff --git a/ompi/mca/sbgp/basesmuma/sbgp_basesmuma_module.c b/ompi/mca/sbgp/basesmuma/sbgp_basesmuma_module.c deleted file mode 100644 index 79028c4e25a..00000000000 --- a/ompi/mca/sbgp/basesmuma/sbgp_basesmuma_module.c +++ /dev/null @@ -1,48 +0,0 @@ -/* - * Copyright (c) 2009-2012 Oak Ridge National Laboratory. All rights reserved. - * Copyright (c) 2009-2012 Mellanox Technologies. All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -/** - * @file - * - */ - -#include "ompi_config.h" -#ifdef HAVE_UNISTD_H -#include -#endif -#include -#ifdef HAVE_SYS_MMAN_H -#include -#endif -#include -#include - -#include "ompi/constants.h" -#include "ompi/communicator/communicator.h" -#include "ompi/mca/sbgp/basesmuma/sbgp_basesmuma.h" - -/* - * Local functions - */ -static void -mca_sbgp_basesmuma_module_construct(mca_sbgp_basesmuma_module_t *module) -{ -} - -static void -mca_sbgp_basesmuma_module_destruct(mca_sbgp_basesmuma_module_t *module) -{ - /* done */ -} - -OBJ_CLASS_INSTANCE(mca_sbgp_basesmuma_module_t, - mca_sbgp_base_module_t, - mca_sbgp_basesmuma_module_construct, - mca_sbgp_basesmuma_module_destruct); diff --git a/ompi/mca/sbgp/ibnet/.opal_ignore b/ompi/mca/sbgp/ibnet/.opal_ignore deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/ompi/mca/sbgp/ibnet/Makefile.am b/ompi/mca/sbgp/ibnet/Makefile.am deleted file mode 100644 index df6b2bb17ae..00000000000 --- a/ompi/mca/sbgp/ibnet/Makefile.am +++ /dev/null @@ -1,55 +0,0 @@ -# -# Copyright (c) 2009-2012 Oak Ridge National Laboratory. All rights reserved. -# Copyright (c) 2009-2012 Mellanox Technologies. All rights reserved. -# Copyright (c) 2012-2015 Cisco Systems, Inc. All rights reserved. -# $COPYRIGHT$ -# -# Additional copyrights may follow -# -# $HEADER$ -# - -AM_CPPFLAGS = $(sbgp_ibnet_CPPFLAGS) $(btl_openib_CPPFLAGS) - -sources = \ - sbgp_ibnet.h \ - sbgp_ibnet_mca.h \ - sbgp_ibnet_mca.c \ - sbgp_ibnet_component.c \ - sbgp_ibnet_module.c - - -# Make the output library in this directory, and name it either -# mca__.la (for DSO builds) or libmca__.la -# (for static builds). - -component_noinst = -component_install = -if MCA_BUILD_ompi_sbgp_ibnet_DSO -component_install += mca_sbgp_ibnet.la -else -component_noinst += libmca_sbgp_ibnet.la -endif - -# See ompi/mca/btl/sm/Makefile.am for an explanation of -# libmca_common_sm.la. - -mcacomponentdir = $(ompilibdir) -mcacomponent_LTLIBRARIES = $(component_install) -mca_sbgp_ibnet_la_SOURCES = $(sources) -mca_sbgp_ibnet_la_LDFLAGS = -module -avoid-version $(sbgp_ibnet_LDFLAGS) $(btl_openib_LDFLAGS) -mca_sbgp_ibnet_la_LIBADD = $(sbgp_ibnet_LIBS) $(btl_openib_LIBS) \ - $(OMPI_TOP_BUILDDIR)/ompi/mca/common/verbs/libmca_common_verbs.la \ - $(OMPI_TOP_BUILDDIR)/ompi/mca/common/ofacm/libmca_common_ofacm.la - -noinst_LTLIBRARIES = $(component_noinst) -libmca_sbgp_ibnet_la_SOURCES =$(sources) -libmca_sbgp_ibnet_la_LDFLAGS = -module -avoid-version - -$(OMPI_TOP_BUILDDIR)/ompi/mca/common/ofacm/libmca_common_ofacm.la: foo.c - cd $(OMPI_TOP_BUILDDIR)/ompi/mca/common/ofacm && $(MAKE) - -$(OMPI_TOP_BUILDDIR)/ompi/mca/common/ofautils/libmca_common_ofautils.la: foo.c - cd $(OMPI_TOP_BUILDDIR)/ompi/mca/common/ofautils && $(MAKE) - -foo.c: diff --git a/ompi/mca/sbgp/ibnet/configure.m4 b/ompi/mca/sbgp/ibnet/configure.m4 deleted file mode 100644 index 40124f0cefb..00000000000 --- a/ompi/mca/sbgp/ibnet/configure.m4 +++ /dev/null @@ -1,38 +0,0 @@ -# -*- shell-script -*- -# -# Copyright (c) 2009-2012 Oak Ridge National Laboratory. All rights reserved. -# Copyright (c) 2009-2012 Mellanox Technologies. All rights reserved. -# $COPYRIGHT$ -# -# Additional copyrights may follow -# -# $HEADER$ -# - -# MCA_ompi_sbgp_ibnet_CONFIG([should_build]) -# ------------------------------------------ -# AC_DEFUN([MCA_ompi_sbgp_ibnet_POST_CONFIG], [ -# ]) - - -# MCA_ompi_sbgp_ibnet_CONFIG([action-if-can-compile], -# [action-if-cant-compile]) -# ------------------------------------------------ -AC_DEFUN([MCA_ompi_sbgp_ibnet_CONFIG],[ - AC_CONFIG_FILES([ompi/mca/sbgp/ibnet/Makefile]) - sbgp_ofa_happy="no" - sbgp_mlnx_ofed_happy="no" - - OPAL_CHECK_OPENFABRICS([sbgp_ibnet], [sbgp_ofa_happy="yes"]) - OPAL_CHECK_MLNX_OPENFABRICS([sbgp_ibnet], [sbgp_mlnx_ofed_happy="yes"]) - - AS_IF([test "$sbgp_ofa_happy" = "yes" -a "$sbgp_mlnx_ofed_happy" = "yes"], - [$1], - [$2]) - - # substitute in the things needed to build iboffload - AC_SUBST([sbgp_ibnet_CFLAGS]) - AC_SUBST([sbgp_ibnet_CPPFLAGS]) - AC_SUBST([sbgp_ibnet_LDFLAGS]) - AC_SUBST([sbgp_ibnet_LIBS]) -])dnl diff --git a/ompi/mca/sbgp/ibnet/owner.txt b/ompi/mca/sbgp/ibnet/owner.txt deleted file mode 100644 index 55663d3bb8a..00000000000 --- a/ompi/mca/sbgp/ibnet/owner.txt +++ /dev/null @@ -1,7 +0,0 @@ -# -# owner/status file -# owner: institution that is responsible for this package -# status: e.g. active, maintenance, unmaintained -# -owner: ? -status: ? diff --git a/ompi/mca/sbgp/ibnet/sbgp_ibnet.h b/ompi/mca/sbgp/ibnet/sbgp_ibnet.h deleted file mode 100644 index f29ffc33db7..00000000000 --- a/ompi/mca/sbgp/ibnet/sbgp_ibnet.h +++ /dev/null @@ -1,239 +0,0 @@ -/* - * Copyright (c) 2009-2012 Oak Ridge National Laboratory. All rights reserved. - * Copyright (c) 2009-2012 Mellanox Technologies. All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - - -#ifndef MCA_BCOL_ibnet_EXPORT_H -#define MCA_BCOL_ibnet_EXPORT_H - -#include "ompi_config.h" - -#include "mpi.h" -#include "infiniband/verbs.h" -#include "ompi/mca/mca.h" -#include "ompi/mca/sbgp/sbgp.h" -#include "opal/mca/mpool/mpool.h" -#include "ompi/request/request.h" -#include "ompi/proc/proc.h" -#include "ompi/mca/common/ofacm/connect.h" - -BEGIN_C_DECLS - -#ifdef HAVE_SCHED_YIELD -# include -# define SPIN sched_yield() -#else /* no switch available */ -# define SPIN -#endif - -typedef enum { - OFFLOAD_CONNECTX_B0, - OFFLOAD_DISABLE -} coll_offload_support; - -/** - * Structure to hold the basic shared memory coll component. First it holds the - * base coll component, and then holds a bunch of - * sm-coll-component-specific stuff (e.g., current MCA param - * values). - */ -struct mca_sbgp_ibnet_component_t { - /** Base coll component */ - mca_sbgp_base_component_2_0_0_t super; - - /** Enable disable verbose mode */ - int verbose; - - /* Maximum allowed number of subroups */ - int max_sbgps; - /* Enable disable default subnet id warning */ - bool warn_default_gid_prefix; - bool warn_nonexistent_if; - /* IB MTU requested by user */ - int mtu; /** MTU on this port */ - /** IB partition definition */ - int pkey_val; - /* Keeping hca data */ - char *if_include; - char **if_include_list; - char *if_exclude; - char **if_exclude_list; - /** Dummy argv-style list; a copy of names from the - if_[in|ex]clude list that we use for error checking (to ensure - that they all exist) */ - char **if_list; - /** List of iboffload devices that have at list one active port */ - opal_list_t devices; - int curr_max_group_id; - uint32_t total_active_ports; -}; - -/** - * Convenience typedef - */ -typedef struct mca_sbgp_ibnet_component_t -mca_sbgp_ibnet_component_t; - -/* IB port OBJ*/ -struct mca_sbgp_ibnet_port_t { - uint16_t id; /** Port number */ - int stat; /** Port status - Active,Init,etc.. */ - enum ibv_mtu mtu; /** MTU on this port */ - coll_offload_support coll_offload; /** Collectives offload mode */ - uint64_t subnet_id; /** Sunnet id for the port */ - /* uint8_t src_path_bits; */ - uint16_t lid; - uint16_t lmc; - /** Array of the peer's CPCs available on this port */ - uint32_t num_cpcs; - bool used; - ompi_common_ofacm_base_module_data_t *pm_cpc_data; - ompi_common_ofacm_base_module_t *local_cpc; /* selected cpc*/ - ompi_common_ofacm_base_module_data_t *remote_cpc_data; /* data for remote cpc */ -}; - -typedef struct mca_sbgp_ibnet_port_t mca_sbgp_ibnet_port_t; - -typedef enum { - MCA_SBGP_IBNET_NONE = 0, - MCA_SBGP_IBNET_NODE_LEADER = 1<<0, - MCA_SBGP_IBNET_SOCKET_LEADER = 1<<1, - MCA_SBGP_IBNET_SWITCH_LEADER = 1<<2 -} mca_sbgp_ibnet_duty_t; - -typedef enum { - MCA_SBGP_IBNET_ALL_NET, - MCA_SBGP_IBNET_NODE_NET, - MCA_SBGP_IBNET_NONE_NET -} mca_sbgp_ibnet_mode_t; - -struct mca_sbgp_ibnet_proc_t { - opal_list_item_t super; - ompi_proc_t *ompi_proc; /* Ompi proc pointer */ - int ompi_proc_index; /* Index of the proc in array */ - uint32_t rank; /* vpid, remote proc rank */ - uint32_t num_ports; /* number of remote ports */ - int *use_port; /* the size of this array is equal to number of cgroups that points to this proc. - Each cgroup has own index "I". The array keep remote port number that ne need to use - for cgroup "I" - use_port[I]. We need it for iboffload module */ - mca_sbgp_ibnet_port_t *remote_ports_info; /* the array keeps remote port information */ - mca_sbgp_ibnet_duty_t duty; /* Socket leader, Node leader, switch leader, etc. */ -}; - -typedef struct mca_sbgp_ibnet_proc_t mca_sbgp_ibnet_proc_t; -OBJ_CLASS_DECLARATION(mca_sbgp_ibnet_proc_t); - -/* Device OBJ */ -struct mca_sbgp_ibnet_device_t { - opal_list_item_t super; - struct ibv_device* ib_dev; /* pointer to device, from device list */ - int device_index; /* device index in device list */ - struct ibv_device_attr ib_dev_attr; /* attributes of the device */ - int num_act_ports; - int num_allowed_ports; - struct mca_sbgp_ibnet_port_t *ports; - /* CPC stuff */ - ompi_common_ofacm_base_module_t **cpcs; /* Array of CPCs */ - uint8_t num_cpcs; /* Number of elements in cpc array */ -}; - -typedef struct mca_sbgp_ibnet_device_t mca_sbgp_ibnet_device_t; -OBJ_CLASS_DECLARATION(mca_sbgp_ibnet_device_t); - -struct mca_sbgp_ibnet_connection_group_info_t { - int device_index; /* device index in device list */ - uint32_t port; /* port number */ - /* Used for detect number of a port to communicate with remote proc, - index in use_port arrray in the mca_sbgp_ibnet_proc_t structure */ - uint32_t index; - /* array of procs connected with this group */ - uint32_t num_procs; - opal_pointer_array_t *ibnet_procs; -}; -typedef struct mca_sbgp_ibnet_connection_group_info_t - mca_sbgp_ibnet_connection_group_info_t; - -/* - ** Base sub-group module - **/ -struct mca_sbgp_ibnet_module_t { - /** Collective modules all inherit from opal_object */ - mca_sbgp_base_module_t super; - int group_id; - /* opal_pointer_array_t *ibnet_procs; */ - /* number of connection groups */ - int num_cgroups; - /* - * Array of connection groups. There are same procs in these groups, - * but they were created over different ports (and different devices maybe). - */ - mca_sbgp_ibnet_connection_group_info_t *cgroups; - mca_sbgp_ibnet_mode_t mode; /* working mode of the module, it is ALL by default */ -}; -typedef struct mca_sbgp_ibnet_module_t mca_sbgp_ibnet_module_t; -OBJ_CLASS_DECLARATION(mca_sbgp_ibnet_module_t); - -/* Error and verbose prints */ - -static inline int mca_sbgp_ibnet_err(const char* fmt, ...) -{ - va_list list; - int ret; - - va_start(list, fmt); - ret = vfprintf(stderr, fmt, list); - va_end(list); - return ret; -} - -#define IBNET_ERROR(args) \ - do { \ - mca_sbgp_ibnet_err("[%s]%s[%s:%d:%s] IBNET ", \ - ompi_process_info.nodename, \ - OMPI_NAME_PRINT(OMPI_PROC_MY_NAME), \ - __FILE__, __LINE__, __func__); \ - mca_sbgp_ibnet_err args; \ - mca_sbgp_ibnet_err("\n"); \ - } while(0); - -#if OPAL_ENABLE_DEBUG -#define IBNET_VERBOSE(level, args) \ - do { \ - if(mca_sbgp_ibnet_component.verbose >= level) { \ - mca_sbgp_ibnet_err("[%s]%s[%s:%d:%s] IBNET ", \ - ompi_process_info.nodename, \ - OMPI_NAME_PRINT(OMPI_PROC_MY_NAME), \ - __FILE__, __LINE__, __func__); \ - mca_sbgp_ibnet_err args; \ - mca_sbgp_ibnet_err("\n"); \ - } \ - } while(0); -#else -#define IBNET_VERBOSE(level, args) -#endif - -#define MCA_SBGP_IBNET_PKEY_MASK 0x7fff - -/* Error and verbose prints - end */ - -/* This routine is used to find the list of procs that run on the - ** same host as the calling process. - */ -mca_sbgp_base_module_t *mca_sbgp_ibnet_select_procs(struct ompi_proc_t ** procs, - int n_procs_in, struct ompi_communicator_t *comm, char *key, void *output_data); - -/** - * Global component instance - */ -OMPI_MODULE_DECLSPEC extern mca_sbgp_ibnet_component_t mca_sbgp_ibnet_component; - - -END_C_DECLS - -#endif /* MCA_BCOL_ibnet_EXPORT_H */ diff --git a/ompi/mca/sbgp/ibnet/sbgp_ibnet_component.c b/ompi/mca/sbgp/ibnet/sbgp_ibnet_component.c deleted file mode 100644 index 15df331ad30..00000000000 --- a/ompi/mca/sbgp/ibnet/sbgp_ibnet_component.c +++ /dev/null @@ -1,600 +0,0 @@ -/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ -/* - * Copyright (c) 2009-2012 Oak Ridge National Laboratory. All rights reserved. - * Copyright (c) 2009-2012 Mellanox Technologies. All rights reserved. - * Copyright (c) 2012 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2014 Research Organization for Information Science - * and Technology (RIST). All rights reserved. - * Copyright (c) 2015 Los Alamos National Security, LLC. All rights - * reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -/** - * @file - * - */ - -#include "ompi_config.h" -#include "infiniband/verbs.h" -#include -#include -#include -#include - -#include "ompi/constants.h" -#include "ompi/communicator/communicator.h" -#include "opal/util/argv.h" -#include "opal/include/opal/types.h" -#include "opal_stdint.h" -#include "sbgp_ibnet.h" -#include "sbgp_ibnet_mca.h" -#include "ompi/mca/common/ofacm/base.h" -#include "ompi/mca/common/ofacm/connect.h" -#include "ompi/mca/common/verbs/common_verbs.h" - -/* - * Public string showing the coll ompi_sm V2 component version number - */ -const char *mca_sbgp_ibnet_component_version_string = - "Open MPI sbgp - ibnet collective MCA component version " OMPI_VERSION; - -/* - * Local functions - */ - -static int mca_sbgp_ibnet_open(void); -static int mca_sbgp_ibnet_close(void); -static int mca_sbgp_ibnet_init_query(bool enable_progress_threads, - bool enable_mpi_threads); - -/* - * Instantiate the public struct with all of our public information - * and pointers to our public functions in it - */ - -mca_sbgp_ibnet_component_t mca_sbgp_ibnet_component = { - - /* First, fill in the super */ - - { - /* First, the mca_component_t struct containing meta - information about the component itself */ - - .sbgp_version = { - MCA_SBGP_BASE_VERSION_2_0_0, - - /* Component name and version */ - - .mca_component_name = "ibnet", - MCA_BASE_MAKE_VERSION(component, OMPI_MAJOR_VERSION, OMPI_MINOR_VERSION, - OMPI_RELEASE_VERSION), - - /* Component open and close functions */ - - .mca_open_component = mca_sbgp_ibnet_open, - .mca_close_component = mca_sbgp_ibnet_close, - .mca_register_component_params = mca_sbgp_ibnet_register_params, - }, - - .sbgp_init_query = mca_sbgp_ibnet_init_query, - .select_procs =mca_sbgp_ibnet_select_procs, - .priority = 0, - }, - - /* verbose mode */ - false, - - /* Maximum allowed number of subroups*/ - 0, - - /* Enable disable default subnet id warning */ - false, - false, - - /* IB MTU requested by user */ - 0, - - /* IB partition definition */ - 0, - - /* Keeping hca data */ - NULL, - NULL, - NULL, - NULL, - - /** Dummy argv-style list; a copy of names from the - if_[in|ex]clude list that we use for error checking (to ensure - that they all exist) */ - NULL, -}; - -static int mca_sbgp_ibnet_dummy_init_query( - bool enable_progress_threads, bool enable_mpi_threads) -{ - return OMPI_SUCCESS; -} - -/* - * Open the component - */ -static int mca_sbgp_ibnet_open(void) -{ - /* local variables */ - mca_sbgp_ibnet_component_t *cs = &mca_sbgp_ibnet_component; - - mca_sbgp_ibnet_component.pkey_val &= SBGP_IBNET_IB_PKEY_MASK; - - cs->total_active_ports = 0; - cs->curr_max_group_id = 100; - - OBJ_CONSTRUCT(&cs->devices, opal_list_t); - - return OMPI_SUCCESS; -} - -/* - * Close the component - */ -static int mca_sbgp_ibnet_close(void) -{ - mca_sbgp_ibnet_component_t *cs = &mca_sbgp_ibnet_component; - - OBJ_DESTRUCT(&cs->devices); - - return OMPI_SUCCESS; -} - -static void mca_sbgp_ibnet_device_constructor - (mca_sbgp_ibnet_device_t *device) -{ - /* Init OFACM stuf */ - device->ib_dev = NULL; - device->device_index = -1; - device->num_act_ports = 0; - memset(&device->ib_dev_attr, 0, sizeof(struct ibv_device_attr)); - device->cpcs= NULL; - device->num_cpcs = 0; - device->ports = NULL; -} - -static void mca_sbgp_ibnet_device_destructor - (mca_sbgp_ibnet_device_t *device) -{ - /* release memory */ - if (NULL != device->ports) { - free(device->ports); - } -} - -OBJ_CLASS_INSTANCE(mca_sbgp_ibnet_device_t, - opal_list_item_t, - mca_sbgp_ibnet_device_constructor, - mca_sbgp_ibnet_device_destructor); - -static int -get_port_list(mca_sbgp_ibnet_device_t *device, int *allowed_ports) -{ - char *name; - const char *dev_name; - int i, j, k, num_ports = 0; - - dev_name = ibv_get_device_name(device->ib_dev); - name = (char*) malloc(strlen(dev_name) + 4); - if (NULL == name) { - return 0; - } - - num_ports = 0; - if (NULL != mca_sbgp_ibnet_component.if_include_list) { - /* If only the device name is given (eg. mtdevice0,mtdevice1) use all - ports */ - i = 0; - - while (mca_sbgp_ibnet_component.if_include_list[i]) { - if (0 == strcmp(dev_name, - mca_sbgp_ibnet_component.if_include_list[i])) { - num_ports = device->ib_dev_attr.phys_port_cnt; - - IBNET_VERBOSE(10, ("if_include_list - %s.\n", mca_sbgp_ibnet_component.if_include_list[i])); - goto done; - } - ++i; - } - - /* Include only requested ports on the device */ - for (i = 1; i <= device->ib_dev_attr.phys_port_cnt; ++i) { - sprintf(name, "%s:%d", dev_name, i); - - for (j = 0; - NULL != mca_sbgp_ibnet_component.if_include_list[j]; ++j) { - if (0 == strcmp(name, - mca_sbgp_ibnet_component.if_include_list[j])) { - - IBNET_VERBOSE(10, ("Allowed port %d: idx %d; if_include_list - %s\n", - i, num_ports, mca_sbgp_ibnet_component.if_include_list[j])); - - allowed_ports[num_ports++] = i; - break; - } - } - } - } else if (NULL != mca_sbgp_ibnet_component.if_exclude_list) { - /* If only the device name is given (eg. mtdevice0,mtdevice1) exclude - all ports */ - i = 0; - while (mca_sbgp_ibnet_component.if_exclude_list[i]) { - if (0 == strcmp(dev_name, - mca_sbgp_ibnet_component.if_exclude_list[i])) { - num_ports = 0; - goto done; - } - ++i; - } - /* Exclude the specified ports on this device */ - for (i = 1; i <= device->ib_dev_attr.phys_port_cnt; ++i) { - sprintf(name,"%s:%d",dev_name,i); - for (j = 0; - NULL != mca_sbgp_ibnet_component.if_exclude_list[j]; ++j) { - if (0 == strcmp(name, - mca_sbgp_ibnet_component.if_exclude_list[j])) { - /* If found, set a sentinel value */ - j = -1; - break; - } - } - /* If we didn't find it, it's ok to include in the list */ - if (-1 != j) { - allowed_ports[num_ports++] = i; - } - } - } else { - /* Assume that all ports are allowed. num_ports will be adjusted - below to reflect whether this is true or not. */ - for (i = 1; i <= device->ib_dev_attr.phys_port_cnt; ++i) { - allowed_ports[num_ports++] = i; - } - } - -done: - - /* Remove the following from the error-checking if_list: - - bare device name - - device name suffixed with port number */ - if (NULL != mca_sbgp_ibnet_component.if_list) { - for (i = 0; NULL != mca_sbgp_ibnet_component.if_list[i]; ++i) { - /* Look for raw device name */ - if (0 == strcmp(mca_sbgp_ibnet_component.if_list[i], dev_name)) { - j = opal_argv_count(mca_sbgp_ibnet_component.if_list); - opal_argv_delete(&j, &(mca_sbgp_ibnet_component.if_list), - i, 1); - --i; - } - } - - for (i = 1; i <= device->ib_dev_attr.phys_port_cnt; ++i) { - sprintf(name, "%s:%d", dev_name, i); - for (j = 0; NULL != mca_sbgp_ibnet_component.if_list[j]; ++j) { - if (0 == strcmp(mca_sbgp_ibnet_component.if_list[j], name)) { - k = opal_argv_count(mca_sbgp_ibnet_component.if_list); - opal_argv_delete(&k, &(mca_sbgp_ibnet_component.if_list), - j, 1); - --j; - break; - } - } - } - } - - free(name); - - return num_ports; -} - -static int ibnet_init_port(struct mca_sbgp_ibnet_device_t *device, - int port_index, struct ibv_port_attr *ib_port_attr, - struct ibv_context *ib_dev_context) -{ - union ibv_gid gid; - struct mca_sbgp_ibnet_port_t *p = &device->ports[port_index]; - - /* Set port data */ - p->lmc = (1 << ib_port_attr->lmc); - p->lid = ib_port_attr->lid; - p->stat = ib_port_attr->state; - p->mtu = ib_port_attr->active_mtu; - - IBNET_VERBOSE(10, ("Setting port data (%s:%d) lid=%d, lmc=%d, stat=%d, mtu=%d\n", - ibv_get_device_name(device->ib_dev), p->id, p->lid, - p->lmc, p->stat, p->mtu)); - - if (0 != ibv_query_gid(ib_dev_context, p->id, 0, &gid)) { - IBNET_ERROR(("ibv_query_gid failed (%s:%d)\n", - ibv_get_device_name(device->ib_dev), p->id)); - return OMPI_ERR_NOT_FOUND; - } - /* set subnet data */ - p->subnet_id = ntoh64(gid.global.subnet_prefix); - -/* p->subnet_id = gid.global.subnet_prefix; */ - - IBNET_VERBOSE(10, ("my IB-only subnet_id for HCA %d %s port %d is %lx\n" PRIx64, - gid.global.subnet_prefix,ibv_get_device_name(device->ib_dev), p->id, p->subnet_id)); - - return OMPI_SUCCESS; -} - -/* Find active port */ -static mca_sbgp_ibnet_device_t* ibnet_load_ports(struct ibv_device *ib_dev, int device_index) -{ - struct ibv_context *ib_dev_context = NULL; - mca_sbgp_ibnet_device_t *device = NULL; - int *allowed_ports = NULL; - int rc, port_cnt, port, i, ret, p = 0; - -#if defined(HAVE_STRUCT_IBV_DEVICE_TRANSPORT_TYPE) - if (IBV_TRANSPORT_IB != ib_dev->transport_type) { - IBNET_VERBOSE(10, ("Skipping non IB device %s", - ibv_get_device_name(ib_dev))); - goto error; - } -#endif - - device = OBJ_NEW(mca_sbgp_ibnet_device_t); - device->ib_dev = ib_dev; - device->device_index = device_index; - ib_dev_context = ibv_open_device(ib_dev); - - if(NULL == ib_dev_context) { - IBNET_ERROR(("Error obtaining device context for %s errno says %s", - ibv_get_device_name(device->ib_dev), strerror(errno))); - goto error; - } - - if(ibv_query_device(ib_dev_context, &device->ib_dev_attr)) { - IBNET_ERROR(("error obtaining device attributes for %s errno says %s", - ibv_get_device_name(ib_dev), strerror(errno))); - goto error; - } - - allowed_ports = (int *) calloc(device->ib_dev_attr.phys_port_cnt, sizeof(int)); - if (NULL == allowed_ports) { - goto error; - } - - port_cnt = get_port_list(device, allowed_ports); - if (0 == port_cnt) { - goto error; - } - -#if OPAL_ENABLE_DEBUG - for (i = 0; i < port_cnt; ++i) { - IBNET_VERBOSE(10, ("allowed port %d with idx %d.\n", allowed_ports[i], i)); - } -#endif - - device->num_allowed_ports = port_cnt; - device->ports = (mca_sbgp_ibnet_port_t *) calloc(port_cnt, sizeof(mca_sbgp_ibnet_port_t)); - if (NULL == device->ports) { - goto error; - } - - /* Note ports are 1 based (i >= 1) */ - for(port = 0; port < port_cnt; port++) { - struct ibv_port_attr ib_port_attr; - - i = allowed_ports[port]; - if(ibv_query_port(ib_dev_context, i, &ib_port_attr)){ - IBNET_ERROR(("Error getting port attributes for device %s " - "port number %d errno says %s", - ibv_get_device_name(device->ib_dev), i, strerror(errno))); - continue; - } - - if(IBV_PORT_ACTIVE == ib_port_attr.state) { - /* Pasha: Need to think how we want to handle MTUs - if (ib_port_attr.active_mtu < mca_bcol_iboffload_component.mtu){ - device->mtu = ib_port_attr.active_mtu; - } - */ - /* start to put port info */ - device->ports[p].id = i; - device->ports[p].stat = ib_port_attr.state; - device->ports[p].mtu = ib_port_attr.active_mtu; - - device->ports[p].used = true; - - if (0 == mca_sbgp_ibnet_component.pkey_val) { - ret = ibnet_init_port(device, p, &ib_port_attr, ib_dev_context); - if (OMPI_SUCCESS != ret) { - IBNET_ERROR(("Device %s " - "port number %d , failed to init port, errno says %s", - ibv_get_device_name(device->ib_dev), - i, strerror(errno))); - continue; - } - } else { - uint16_t pkey,j; - device->ports[p].used = false; - - for (j = 0; j < device->ib_dev_attr.max_pkeys; j++) { - if(ibv_query_pkey(ib_dev_context, i, j, &pkey)){ - IBNET_ERROR(("error getting pkey for index %d, device %s " - "port number %d errno says %s", - j, ibv_get_device_name(device->ib_dev), i, strerror(errno))); - continue; - } - - pkey = ntohs(pkey) & MCA_SBGP_IBNET_PKEY_MASK; - if (pkey == (uint32_t) mca_sbgp_ibnet_component.pkey_val){ - ret = ibnet_init_port(device, p, &ib_port_attr, ib_dev_context); - if (OMPI_SUCCESS != ret) { - IBNET_ERROR(("Device %s " - "port number %d , failed to init port, errno says %s", - ibv_get_device_name(device->ib_dev), - i, strerror(errno))); - continue; - } - } - } - } - - p++; /* One port was loaded, go to the next one */ - } - } - - device->num_act_ports = p; - /* Update total number of active ports */ - mca_sbgp_ibnet_component.total_active_ports += p; - - if (0 != device->num_act_ports) { - ompi_common_ofacm_base_dev_desc_t dev; - /* Init dev */ - dev.ib_dev = ib_dev; - dev.ib_dev_context = ib_dev_context; - dev.capabilities = 0; - - rc = ompi_common_ofacm_base_select_for_local_port( - &dev, &device->cpcs, (int *)&device->num_cpcs); - /* If we get NOT_SUPPORTED, then no CPC was found for this - port. But that's not a fatal error -- just keep going; - let's see if we find any usable openib modules or not. */ - if (OMPI_SUCCESS != rc) { - /* All others *are* fatal. Note that we already did a - show_help in the lower layer */ - IBNET_VERBOSE(10, ("Device %s, no CPC found", - ibv_get_device_name(device->ib_dev))); - goto error; - } - } - - /* we do not continue to use the device we just collect data, - * so close it for now. We will open it later in iboffload coll*/ - if(ibv_close_device(ib_dev_context)) { - IBNET_ERROR(("Device %s, failed to close the device %s", - ibv_get_device_name(device->ib_dev), strerror(errno))); - } - - if (0 == device->num_act_ports) { - goto error; - } - - /* Pasha - I do not like the error flow here */ - free(allowed_ports); - - return device; - -error: - - if (NULL != allowed_ports) { - free(allowed_ports); - } - - OBJ_DESTRUCT(device); - - return NULL; -} - -/* Create list of IB hca that have active port */ -static int ibnet_load_devices(void) -{ - int num_devs, i; - struct ibv_device **ib_devs = NULL; - - mca_sbgp_ibnet_device_t *device = NULL; - mca_sbgp_ibnet_component_t *cs = &mca_sbgp_ibnet_component; - - IBNET_VERBOSE(7, ("Entering to ibnet_load_devices")); - - /* Get list of devices */ - ib_devs = ompi_ibv_get_device_list(&num_devs); - - if(0 == num_devs || NULL == ib_devs) { - IBNET_VERBOSE(10, ("No ib devices found")); - /* No hca error*/ - opal_show_help("help-mpi-btl-base.txt", "btl:no-nics", true); - return OMPI_ERROR; - } - - for (i = 0; i < num_devs; i++) { - device = ibnet_load_ports(ib_devs[i], i); - if (NULL != device) { - IBNET_VERBOSE(10, ("Device %s was appended to device list with index %d.\n", - ibv_get_device_name(device->ib_dev), i)); - opal_list_append(&cs->devices, - (opal_list_item_t *) device); - } - } - - if (opal_list_is_empty(&cs->devices)) { - /* No relevand devices were found, return error */ - IBNET_ERROR(("No active devices found")); - return OMPI_ERROR; - /* Maybe need to add error here*/ - } - - ompi_ibv_free_device_list(ib_devs); - - return OMPI_SUCCESS; -} - -/* query to see if the component is available for use, and can - * satisfy the thread and progress requirements - */ -int mca_sbgp_ibnet_init_query(bool enable_progress_threads, - bool enable_mpi_threads) -{ - int rc, list_count = 0; - - /* Parse the include and exclude lists, checking for errors */ - mca_sbgp_ibnet_component.if_list = NULL; - mca_sbgp_ibnet_component.if_include_list = NULL; - mca_sbgp_ibnet_component.if_exclude_list = NULL; - - IBNET_VERBOSE(7, ("Calling mca_sbgp_ibnet_init_query")); - - if (NULL != mca_sbgp_ibnet_component.if_include) { - list_count++; - } - - if (NULL != mca_sbgp_ibnet_component.if_exclude) { - list_count++; - } - - if (list_count > 1) { - IBNET_ERROR(("Bad --mca (if_include, if_exclude) parameters !")); - return OMPI_ERROR; - } else if (NULL != mca_sbgp_ibnet_component.if_include) { - mca_sbgp_ibnet_component.if_include_list = - opal_argv_split(mca_sbgp_ibnet_component.if_include, ','); - mca_sbgp_ibnet_component.if_list = - opal_argv_copy(mca_sbgp_ibnet_component.if_include_list); - } else if (NULL != mca_sbgp_ibnet_component.if_exclude) { - mca_sbgp_ibnet_component.if_exclude_list = - opal_argv_split(mca_sbgp_ibnet_component.if_exclude, ','); - mca_sbgp_ibnet_component.if_list = - opal_argv_copy(mca_sbgp_ibnet_component.if_exclude_list); - } - - /* Init CPC components */ - rc = ompi_common_ofacm_base_init(); - if (OMPI_SUCCESS != rc) { - return rc; - } - - /* Load all devices and active ports */ - rc = ibnet_load_devices(); - if (OMPI_SUCCESS != rc) { - return rc; - } - - mca_sbgp_ibnet_component.super.sbgp_init_query = - mca_sbgp_ibnet_dummy_init_query; - - return OMPI_SUCCESS; -} diff --git a/ompi/mca/sbgp/ibnet/sbgp_ibnet_mca.c b/ompi/mca/sbgp/ibnet/sbgp_ibnet_mca.c deleted file mode 100644 index a9c2553c0ef..00000000000 --- a/ompi/mca/sbgp/ibnet/sbgp_ibnet_mca.c +++ /dev/null @@ -1,229 +0,0 @@ -/* - * Copyright (c) 2009-2012 Oak Ridge National Laboratory. All rights reserved. - * Copyright (c) 2009-2012 Mellanox Technologies. All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#include "ompi_config.h" - -#include -#include -#include -#include - -#include "ompi/constants.h" -#include "ompi/communicator/communicator.h" -#include "ompi/mca/bcol/bcol.h" -#include "ompi/mca/bcol/base/base.h" -#include "ompi/mca/common/ofacm/base.h" - -#include "sbgp_ibnet.h" -#include "sbgp_ibnet_mca.h" - -/* - * Local flags - */ -enum { - REGINT_NEG_ONE_OK = 0x01, - REGINT_GE_ZERO = 0x02, - REGINT_GE_ONE = 0x04, - REGINT_NONZERO = 0x08, - REGINT_MAX = 0x88 -}; - -enum { - REGSTR_EMPTY_OK = 0x01, - - REGSTR_MAX = 0x88 -}; - -static mca_base_var_enum_value_t mtu_values[] = { - {IBV_MTU_512, "256B"}, - {IBV_MTU_512, "512B"}, - {IBV_MTU_1024, "1k"}, - {IBV_MTU_2048, "2k"}, - {IBV_MTU_4096, "4k"}, - {0, NULL} -}; - -/* - * utility routine for string parameter registration - */ -static int reg_string(const char* param_name, - const char* deprecated_param_name, - const char* param_desc, - const char* default_value, char **storage, - int flags) -{ - int index; - - /* the MCA variable system will not change this value */ - *storage = (char *) default_value; - index = mca_base_component_var_register(&mca_sbgp_ibnet_component.super.sbgp_version, - param_name, param_desc, MCA_BASE_VAR_TYPE_STRING, - NULL, 0, 0, OPAL_INFO_LVL_9, - MCA_BASE_VAR_SCOPE_READONLY, storage); - if (NULL != deprecated_param_name) { - (void) mca_base_var_register_synonym(index, "ompi", "sbgp", "ibnet", deprecated_param_name, - MCA_BASE_VAR_SYN_FLAG_DEPRECATED); - } - - if (0 != (flags & REGSTR_EMPTY_OK) && (NULL == *storage || 0 == strlen(*storage))) { - opal_output(0, "Bad parameter value for parameter \"%s\"", - param_name); - return OMPI_ERR_BAD_PARAM; - } - - return OMPI_SUCCESS; -} - -/* - * utility routine for integer parameter registration - */ -static int reg_int(const char* param_name, - const char* deprecated_param_name, - const char* param_desc, - int default_value, int *storage, int flags) -{ - int index; - - *storage = default_value; - index = mca_base_component_var_register(&mca_sbgp_ibnet_component.super.sbgp_version, - param_name, param_desc, MCA_BASE_VAR_TYPE_INT, - NULL, 0, 0, OPAL_INFO_LVL_9, - MCA_BASE_VAR_SCOPE_READONLY, storage); - if (NULL != deprecated_param_name) { - (void) mca_base_var_register_synonym(index, "ompi", "sbgp", "ibnet", deprecated_param_name, - MCA_BASE_VAR_SYN_FLAG_DEPRECATED); - } - - if (0 != (flags & REGINT_NEG_ONE_OK) && -1 == *storage) { - return OMPI_SUCCESS; - } - - if ((0 != (flags & REGINT_GE_ZERO) && *storage < 0) || - (0 != (flags & REGINT_GE_ONE) && *storage < 1) || - (0 != (flags & REGINT_NONZERO) && 0 == *storage)) { - opal_output(0, "Bad parameter value for parameter \"%s\"", - param_name); - return OMPI_ERR_BAD_PARAM; - } - - return OMPI_SUCCESS; -} - -/* - * utility routine for boolean parameter registration - */ -static int reg_bool(const char* param_name, - const char* deprecated_param_name, - const char* param_desc, - bool default_value, bool *storage) -{ - int index; - - *storage = default_value; - index = mca_base_component_var_register(&mca_sbgp_ibnet_component.super.sbgp_version, - param_name, param_desc, MCA_BASE_VAR_TYPE_BOOL, - NULL, 0, 0, OPAL_INFO_LVL_9, - MCA_BASE_VAR_SCOPE_READONLY, storage); - if (NULL != deprecated_param_name) { - (void) mca_base_var_register_synonym(index, "ompi", "sbgp", "ibnet", deprecated_param_name, - MCA_BASE_VAR_SYN_FLAG_DEPRECATED); - } - - return OMPI_SUCCESS; -} - -int mca_sbgp_ibnet_register_params(void) -{ - mca_base_var_enum_t *new_enum; - char *msg; - int ret, tmp; - - ret = OMPI_SUCCESS; - -#define CHECK(expr) do { \ - tmp = (expr); \ - if (OMPI_SUCCESS != tmp) ret = tmp; \ - } while (0) - - /* register openib component parameters */ - - CHECK(reg_int("priority", NULL, - "IB offload component priority" - "(from 0(low) to 90 (high))", 90, &mca_sbgp_ibnet_component.super.priority, 0)); - - CHECK(reg_int("verbose", NULL, - "Output some verbose IB offload BTL information " - "(0 = no output, nonzero = output)", 0, &mca_sbgp_ibnet_component.verbose, 0)); - - CHECK(reg_bool("warn_default_gid_prefix", NULL, - "Warn when there is more than one active ports and at least one of them connected to the network with only default GID prefix configured (0 = do not warn; any other value = warn)", - true, &mca_sbgp_ibnet_component.warn_default_gid_prefix)); - CHECK(reg_bool("warn_nonexistent_if", NULL, - "Warn if non-existent devices and/or ports are specified in the sbgp_ibnet_if_[in|ex]clude MCA parameters (0 = do not warn; any other value = warn)", - true, &mca_sbgp_ibnet_component.warn_nonexistent_if)); - - CHECK(reg_int("max_sbgps", NULL, - "Maximum allowed number of subroups", - 100, &mca_sbgp_ibnet_component.max_sbgps, 0)); - - CHECK(reg_int("pkey", "ib_pkey_val", - "OpenFabrics partition key (pkey) value. " - "Unsigned integer decimal or hex values are allowed (e.g., \"3\" or \"0x3f\") and will be masked against the maximum allowable IB paritition key value (0x7fff)", - 0, &mca_sbgp_ibnet_component.pkey_val, 0)); - mca_sbgp_ibnet_component.pkey_val &= SBGP_IBNET_IB_PKEY_MASK; - - asprintf(&msg, "OpenFabrics MTU, in bytes (if not specified in INI files). Valid values are: %d=256 bytes, %d=512 bytes, %d=1024 bytes, %d=2048 bytes, %d=4096 bytes", - IBV_MTU_256, - IBV_MTU_512, - IBV_MTU_1024, - IBV_MTU_2048, - IBV_MTU_4096); - if (NULL == msg) { - /* Don't try to recover from this */ - return OMPI_ERR_OUT_OF_RESOURCE; - } - - CHECK(mca_base_var_enum_create("sbgp_ibnet_mtu", mtu_values, &new_enum)); - if (OPAL_SUCCESS != ret) { - return OMPI_ERR_OUT_OF_RESOURCE; - } - - mca_sbgp_ibnet_component.mtu = IBV_MTU_1024; - ret = mca_base_component_var_register(&mca_sbgp_ibnet_component.super.sbgp_version, - "mtu", msg, MCA_BASE_VAR_TYPE_INT, new_enum, - 0, 0, OPAL_INFO_LVL_9, - MCA_BASE_VAR_SCOPE_READONLY, &mca_sbgp_ibnet_component.mtu); - OBJ_RELEASE(new_enum); - free(msg); - - if (0 > ret) { - return ret; - } - - (void) mca_base_var_register_synonym(ret, "ompi", "sbgp", "ibnet", "ib_mtu", - MCA_BASE_VAR_SYN_FLAG_DEPRECATED); - - CHECK(reg_string("if_include", NULL, - "Comma-delimited list of devices/ports to be used (e.g. \"mthca0,mthca1:2\"; empty value means to use all ports found). Mutually exclusive with sbgp_ibnet_if_exclude.", - NULL, &mca_sbgp_ibnet_component.if_include, - 0)); - - CHECK(reg_string("if_exclude", NULL, - "Comma-delimited list of device/ports to be excluded (empty value means to not exclude any ports). Mutually exclusive with sbgp_ibnet_if_include.", - NULL, &mca_sbgp_ibnet_component.if_exclude, - 0)); - - /* Register any MCA params for the connect pseudo-components */ - if (OMPI_SUCCESS == ret) { - ret = ompi_common_ofacm_base_register(&mca_sbgp_ibnet_component.super.sbgp_version); - } - - return ret; -} diff --git a/ompi/mca/sbgp/ibnet/sbgp_ibnet_mca.h b/ompi/mca/sbgp/ibnet/sbgp_ibnet_mca.h deleted file mode 100644 index 58fd8adcb2c..00000000000 --- a/ompi/mca/sbgp/ibnet/sbgp_ibnet_mca.h +++ /dev/null @@ -1,22 +0,0 @@ -/* - * Copyright (c) 2009-2012 Oak Ridge National Laboratory. All rights reserved. - * Copyright (c) 2009-2012 Mellanox Technologies. All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - /** @file */ - -#ifndef MCA_SBGP_IBNET_MCA_H -#define MCA_SBGP_IBNET_MCA_H - -#include -#include "ompi_config.h" - -#define SBGP_IBNET_IB_PKEY_MASK 0x7fff - -int mca_sbgp_ibnet_register_params(void); - -#endif diff --git a/ompi/mca/sbgp/ibnet/sbgp_ibnet_module.c b/ompi/mca/sbgp/ibnet/sbgp_ibnet_module.c deleted file mode 100644 index fa5d54d1716..00000000000 --- a/ompi/mca/sbgp/ibnet/sbgp_ibnet_module.c +++ /dev/null @@ -1,1029 +0,0 @@ -/* - * Copyright (c) 2009-2012 Oak Ridge National Laboratory. All rights reserved. - * Copyright (c) 2009-2012 Mellanox Technologies. All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -/** - * @file - * - */ - -#include "ompi_config.h" -#include -#include -#include -#include -#include - -#include "ompi/constants.h" -#include "ompi/communicator/communicator.h" -#include "ompi/mca/sbgp/ibnet/sbgp_ibnet.h" -#include "ompi/mca/common/ofacm/base.h" -#include "ompi/mca/common/ofacm/connect.h" -#include "ompi/patterns/comm/coll_ops.h" -/* - * Unused -static int ibnet_module_enable(mca_sbgp_base_module_t *module, - struct ompi_communicator_t *comm); - -*/ - -/* - * Local functions - */ -static void -mca_sbgp_ibnet_module_construct(mca_sbgp_ibnet_module_t *module) -{ - module->cgroups = NULL; - module->group_id = 0; -} - -static void -mca_sbgp_ibnet_module_destruct(mca_sbgp_ibnet_module_t *module) -{ - -} - -OBJ_CLASS_INSTANCE(mca_sbgp_ibnet_module_t, - mca_sbgp_base_module_t, - mca_sbgp_ibnet_module_construct, - mca_sbgp_ibnet_module_destruct); - -static void -mca_sbgp_ibnet_proc_construct(mca_sbgp_ibnet_proc_t *proc) -{ - /* done */ - proc->ompi_proc = 0; - proc->num_ports = 0; - proc->use_port = NULL; - proc->remote_ports_info = NULL; - proc->duty = MCA_SBGP_IBNET_NONE; -} - -static void -mca_sbgp_ibnet_proc_destruct(mca_sbgp_ibnet_proc_t *proc) -{ - /* done */ - if (NULL != proc->remote_ports_info) { - free(proc->remote_ports_info); - /* Pasha: need to check if we need - * to release some data from inside of the proc*/ - } - - if (NULL != proc->use_port) { - free(proc->use_port); - } -} - -OBJ_CLASS_INSTANCE(mca_sbgp_ibnet_proc_t, - opal_list_item_t, - mca_sbgp_ibnet_proc_construct, - mca_sbgp_ibnet_proc_destruct); - - -/* Pack all data to gather buffer */ -static int pack_gather_sbuff(char* sbuffer) -{ - int port, cpc; - coll_offload_support coll_offload_flag = OFFLOAD_CONNECTX_B0; /**< Pasha: add query for collectives offload support */ - - char* pack_ptr = sbuffer; - - mca_sbgp_ibnet_device_t *device = NULL; - uint32_t my_rank = ompi_process_info.my_name.vpid; - opal_list_t *devices = &mca_sbgp_ibnet_component.devices; - - /* Message format: - * - my rank (uint32_t) - * - number of active ports (uint32_t) - * - for each active port: - * + lid (uint16_t) - * + subnetid (uint64_t) - * + mtu (uint32_t) - * + colloffload (uint8_t) - * + num of cpcs (uint8_t) - * + for each cpc: (uint8_t) - * * cpc index (uint8_t) - * * cpc priority (uint8_t) - * * cpc buffer len (uint8_t) - * * cpc buffer (byte * buffer_len) - * - */ - - /* Start to put data */ - - /* Pack my rank , I need it because allgather doesn't work as expected */ - IBNET_VERBOSE(10, ("Send pack rank = %d\n", my_rank)); - IBNET_VERBOSE(10, ("packing %d of %d\n", 1, sizeof(uint32_t))); - - memcpy(pack_ptr, &my_rank, sizeof(uint32_t)); - pack_ptr += sizeof(uint32_t); - - /* Put number of ports that we send */ - IBNET_VERBOSE(10, ("Send pack num of ports = %d\n", mca_sbgp_ibnet_component.total_active_ports)); - IBNET_VERBOSE(10, ("packing %d of %d\n", 1, sizeof(uint32_t))); - - memcpy(pack_ptr, &mca_sbgp_ibnet_component.total_active_ports, sizeof(uint32_t)); - pack_ptr += sizeof(uint32_t); - - /* Go through list of device and build the message*/ - for (device = (mca_sbgp_ibnet_device_t *) opal_list_get_first(devices); - device != (mca_sbgp_ibnet_device_t *) opal_list_get_end(devices); - device = (mca_sbgp_ibnet_device_t *) opal_list_get_next((opal_list_item_t *)device)) { - for (port = 0; port < device->num_allowed_ports; ++port) { - if (!device->ports[port].used) { - continue; - } - - /* put port num */ - IBNET_VERBOSE(10, ("Send pack port num = %d\n", device->ports[port].id)); - IBNET_VERBOSE(10, ("packing %d of %d\n", 1, sizeof(uint16_t))); - - memcpy(pack_ptr, &device->ports[port].id, sizeof(uint16_t)); - pack_ptr += sizeof(uint16_t); - - /* put lid */ - IBNET_VERBOSE(10, ("Send pack lid = %d\n", device->ports[port].lid)); - IBNET_VERBOSE(10, ("packing %d of %d\n", 1, sizeof(uint16_t))); - - memcpy(pack_ptr, &device->ports[port].lid, sizeof(uint16_t)); - pack_ptr += sizeof(uint16_t); - - /* put subnetid */ - IBNET_VERBOSE(10, ("Send pack subnet id = %lx\n", device->ports[port].subnet_id)); - IBNET_VERBOSE(10, ("packing %d of %d\n", 1, sizeof(uint64_t))); - - memcpy(pack_ptr, &device->ports[port].subnet_id, sizeof(uint64_t)); - pack_ptr += sizeof(uint64_t); - - /* put default mtu */ - IBNET_VERBOSE(10, ("Send pack MTU = %d\n", device->ports[port].mtu)); - IBNET_VERBOSE(10, ("packing %d of %d\n", 1, sizeof(uint32_t))); - - memcpy(pack_ptr, &device->ports[port].mtu, sizeof(uint32_t)); - pack_ptr += sizeof(uint32_t); - - /* collectives offload support */ - IBNET_VERBOSE(10, ("Send pack collectives offload = %d\n", OFFLOAD_CONNECTX_B0)); - IBNET_VERBOSE(10, ("packing %d of %d\n", 1, sizeof(uint8_t))); - - /* Pasha: add query for collectives offload support */ - memcpy(pack_ptr, &coll_offload_flag, sizeof(uint8_t)); - pack_ptr += sizeof(uint8_t); - - /* number of cpcs for this port */ - IBNET_VERBOSE(10, ("Send pack number of cpcs = %d\n", device->num_cpcs)); - IBNET_VERBOSE(10, ("packing %d of %d\n", 1, sizeof(uint8_t))); - - memcpy(pack_ptr, &device->num_cpcs, sizeof(uint8_t)); - pack_ptr += sizeof(uint8_t); - - for (cpc = 0; cpc < device->num_cpcs; cpc++) { - uint8_t cpc_index; - uint8_t cpc_buflen; - - /* cpc index */ - cpc_index = ompi_common_ofacm_base_get_cpc_index(device->cpcs[cpc]->data.cbm_component); - - IBNET_VERBOSE(10, ("Send pack cpc index = %d\n", cpc_index)); - IBNET_VERBOSE(10, ("packing %d of %d\n", 1, sizeof(uint8_t))); - - memcpy(pack_ptr, &cpc_index, sizeof(uint8_t)); - pack_ptr += sizeof(uint8_t); - - /* cpc priority */ - IBNET_VERBOSE(10, ("Send pack cpc priority = %d\n", - device->cpcs[cpc]->data.cbm_priority)); - IBNET_VERBOSE(10, ("packing %d of %d\n", 1, sizeof(uint8_t))); - - memcpy(pack_ptr, &device->cpcs[cpc]->data.cbm_priority, sizeof(uint8_t)); - pack_ptr += sizeof(uint8_t); - - /* cpc buffer length in bytes */ - cpc_buflen = device->cpcs[cpc]->data.cbm_modex_message_len; - - IBNET_VERBOSE(10, ("Send pack cpc message len = %d\n", cpc_buflen)); - IBNET_VERBOSE(10, ("packing %d of %d\n", 1, sizeof(uint8_t))); - - memcpy(pack_ptr, &cpc_buflen, sizeof(uint8_t)); - pack_ptr += sizeof(uint8_t); - - /* cpc buffer */ - if (0 != cpc_buflen) { - IBNET_VERBOSE(10, ("Send pack cpc buffer len = %d\n", cpc_buflen)); - IBNET_VERBOSE(10, ("packing %d of %d\n", 1, sizeof(uint8_t))); - - memcpy(pack_ptr, device->cpcs[cpc]->data.cbm_modex_message, cpc_buflen); - pack_ptr += (size_t) cpc_buflen; - } - } - } - } - - return OMPI_SUCCESS; -} - -/* Translation vpid to ompi_proc */ -static int vpid_to_proc(ompi_vpid_t vpid, - struct ompi_proc_t ** procs, int n_procs_in, ompi_proc_t** out_proc) -{ - int i; - for (i = 0; i < n_procs_in; i++) { - if (vpid == procs[i]->proc_name.vpid) { - *out_proc = procs[i]; - return i; - } - } - - return OMPI_ERROR; -} - -static int unpack_and_load_gather_rbuff(char *rbuffer, int max_sent_bytes, - struct ompi_proc_t ** procs, int n_procs_in, opal_list_t *peers_data) -{ - - int i; - char* unpack_ptr; - - /* Message format: - * - my rank (uint32_t) - * - number of active ports (uint32_t) - * - for each active port: - * + lid (uint16_t) - * + subnetid (uint64_t) - * + mtu (uint32_t) - * + colloffload (uint8_t) - * + num of cpcs (uint8_t) - * + for each cpc: (uint8_t) - * * cpc index (uint8_t) - * * cpc priority (uint8_t) - * * cpc buffer len (uint8_t) - * * cpc buffer (byte*buffer_len) - * - */ - - /* Start to unpack data */ - for(i = 0; i < n_procs_in; i++) { - uint32_t p; - mca_sbgp_ibnet_proc_t *ibnet_proc; - - unpack_ptr = rbuffer + (size_t) (i * max_sent_bytes); - - /* create new proc */ - ibnet_proc = OBJ_NEW(mca_sbgp_ibnet_proc_t); - - IBNET_VERBOSE(10, ("element=%d unpacking %d of %d\n", i, 1, sizeof(uint32_t))); - IBNET_VERBOSE(10, ("Recive remote rank %d\n", ibnet_proc->rank)); - - memcpy(&ibnet_proc->rank, unpack_ptr, sizeof(uint32_t)); - unpack_ptr += sizeof(uint32_t); - - /* set back pointer to ompi_proc */ - ibnet_proc->ompi_proc_index = - vpid_to_proc(ibnet_proc->rank, procs, - n_procs_in, &ibnet_proc->ompi_proc); - if (OMPI_ERROR == ibnet_proc->ompi_proc_index) { - return OMPI_ERROR; - } - - IBNET_VERBOSE(10, ("element=%d unpacking %d of %d\n", i, 1, sizeof(uint32_t))); - IBNET_VERBOSE(10, ("Recive number of ports %d\n", ibnet_proc->num_ports)); - - memcpy(&ibnet_proc->num_ports, unpack_ptr, sizeof(uint32_t)); - unpack_ptr += sizeof(uint32_t); - - /* prepare place for port data*/ - ibnet_proc->remote_ports_info = calloc(ibnet_proc->num_ports, sizeof(mca_sbgp_ibnet_port_t)); - if (NULL == ibnet_proc->remote_ports_info) { - return OMPI_ERROR; - } - - /* load the data */ - for(p = 0; p < ibnet_proc->num_ports; p++) { - mca_sbgp_ibnet_port_t *port = &ibnet_proc->remote_ports_info[p]; - uint32_t cpc; - - IBNET_VERBOSE(10, ("element=%d unpacking %d of %d\n", i, 1, sizeof(uint16_t))); - IBNET_VERBOSE(10, ("Recive id %d\n", port->id)); - - memcpy(&port->id, unpack_ptr, sizeof(uint16_t)); - unpack_ptr += sizeof(uint16_t); - - IBNET_VERBOSE(10, ("element=%d unpacking %d of %d\n", i, 1, sizeof(uint16_t))); - IBNET_VERBOSE(10, ("Recive lid %d\n", port->lid)); - - memcpy(&port->lid, unpack_ptr, sizeof(uint16_t)); - unpack_ptr += sizeof(uint16_t); - - IBNET_VERBOSE(10, ("element=%d unpacking %d of %d\n", i, 1, sizeof(uint64_t))); - IBNET_VERBOSE(10, ("Recive subnet id %lx\n", port->subnet_id)); - - memcpy(&port->subnet_id, unpack_ptr, sizeof(uint64_t)); - unpack_ptr += sizeof(uint64_t); - - IBNET_VERBOSE(10, ("element=%d unpacking %d of %d\n", i, 1, sizeof(uint32_t))); - IBNET_VERBOSE(10, ("Recive mtu %d\n", port->mtu)); - - memcpy(&port->mtu, unpack_ptr, sizeof(uint32_t)); - unpack_ptr += sizeof(uint32_t); - - IBNET_VERBOSE(10, ("element=%d unpacking %d of %d\n", i, 1, sizeof(uint8_t))); - IBNET_VERBOSE(10, ("Recive offload %d\n", port->coll_offload)); - - memcpy(&port->coll_offload, unpack_ptr, sizeof(uint8_t)); - unpack_ptr += sizeof(uint8_t); - - IBNET_VERBOSE(10, ("element=%d unpacking %d of %d\n", i, 1, sizeof(uint8_t))); - IBNET_VERBOSE(10, ("Recive number of cpcs %d\n", port->num_cpcs)); - - memcpy(&port->num_cpcs, unpack_ptr, sizeof(uint8_t)); - unpack_ptr += sizeof(uint8_t); - - port->pm_cpc_data = calloc(port->num_cpcs, - sizeof(ompi_common_ofacm_base_module_data_t)); - if (NULL == port->pm_cpc_data) { - return OMPI_ERROR; - } - - /* load cpc data */ - for (cpc = 0; cpc < port->num_cpcs; cpc++) { - ompi_common_ofacm_base_module_data_t *cpc_data = - &port->pm_cpc_data[cpc]; - uint8_t cpc_index = -1; - - IBNET_VERBOSE(10, ("element=%d unpacking %d of %d\n", i, 1, sizeof(uint8_t))); - IBNET_VERBOSE(10, ("Recive cpc index %d\n", cpc_index)); - - memcpy(&cpc_index, unpack_ptr, sizeof(uint8_t)); - unpack_ptr += sizeof(uint8_t); - - cpc_data->cbm_component = - ompi_common_ofacm_base_get_cpc_byindex(cpc_index); - if (NULL == cpc_data->cbm_component) { - IBNET_VERBOSE(10, ("Failed to resolve cpc index %d\n", cpc_index)); - return OMPI_ERROR; - } - - IBNET_VERBOSE(10, ("element=%d unpacking %d of %d\n", i, 1, sizeof(uint8_t))); - IBNET_VERBOSE(10, ("Recive priority %d\n", cpc_data->cbm_priority)); - - memcpy(&cpc_data->cbm_priority, unpack_ptr, sizeof(uint8_t)); - unpack_ptr += sizeof(uint8_t); - - IBNET_VERBOSE(10, ("element=%d unpacking %d of %d\n", i, 1, sizeof(uint8_t))); - IBNET_VERBOSE(10, ("Recive cpc message len %d\n", cpc_data->cbm_modex_message_len)); - - memcpy(&cpc_data->cbm_modex_message_len, unpack_ptr, sizeof(uint8_t)); - unpack_ptr += sizeof(uint8_t); - - if (0 != cpc_data->cbm_modex_message_len) { - int cpc_buflen = cpc_data->cbm_modex_message_len; - - IBNET_VERBOSE(10, ("Recive cpc message data with len %d\n", cpc_buflen)); - IBNET_VERBOSE(10, ("element=%d unpacking %d of %d\n", i, cpc_buflen, cpc_buflen)); - - memcpy(&cpc_data->cbm_modex_message, unpack_ptr, cpc_buflen); - unpack_ptr += (size_t) cpc_buflen; - } - } - } - - /* Put the new proc to the list */ - opal_list_append(peers_data, (opal_list_item_t*) ibnet_proc); - } - - assert((uint32_t) n_procs_in == opal_list_get_size(peers_data)); - return OMPI_SUCCESS; -} - -static int cmp_cgroups(const void *p1, const void *p2) -{ - mca_sbgp_ibnet_connection_group_info_t *g1 = - (mca_sbgp_ibnet_connection_group_info_t *)p1; - mca_sbgp_ibnet_connection_group_info_t *g2 = - (mca_sbgp_ibnet_connection_group_info_t *)p2; - return (g2->num_procs - g1->num_procs); -} - -static int set_ibnet_proc_on_cgroup( - mca_sbgp_ibnet_connection_group_info_t *cgroup, - mca_sbgp_ibnet_proc_t *ibnet_proc, - mca_sbgp_ibnet_device_t *device, - mca_sbgp_ibnet_module_t *module) -{ - uint32_t p; - int k, rc, p_indx; /* port index in array of device */ - - for (p_indx = 0; p_indx < device->num_allowed_ports; ++p_indx) { - if (cgroup->port == device->ports[p_indx].id) { - break; - } - } - - assert(device->num_act_ports > p_indx); - - if (NULL == ibnet_proc->use_port) { - ibnet_proc->use_port = calloc(module->num_cgroups, sizeof(int)); - if (NULL == ibnet_proc->use_port) { - IBNET_ERROR(("Failed to allocate use_port array.")); - return OMPI_ERROR; - } - } - - IBNET_VERBOSE(10, ("Local port is %d, idx - %d.\n", - device->ports[p_indx].id, p_indx)); - - for(p = 0; p < ibnet_proc->num_ports; p++) { - if (device->ports[p_indx].subnet_id == - ibnet_proc->remote_ports_info[p].subnet_id) { - ompi_common_ofacm_base_module_t *local_cpc = NULL; - ompi_common_ofacm_base_module_data_t *remote_cpc_data = NULL; - /* check if we have matching cpc on both sides */ - if (OMPI_SUCCESS != - ompi_common_ofacm_base_find_match(device->cpcs, - device->num_cpcs, - ibnet_proc->remote_ports_info[p].pm_cpc_data, - ibnet_proc->remote_ports_info[p].num_cpcs, - &local_cpc, - &remote_cpc_data)) { - /* Failed to match, can not use the port */ - IBNET_VERBOSE(10, ("Failed to match, can not use the port - %d.\n", p + 1)); - continue; - } - - for (k = 0; k < module->num_cgroups && ((p + 1) != (uint32_t) ibnet_proc->use_port[k]); ++k) - ; - - if (k < module->num_cgroups) { - /* The port in use - another connection group use it */ - continue; - } - - /* It means that connection group 'cgroup' communicates with - this proc over its own remote port */ - ibnet_proc->use_port[cgroup->index] = p + 1; - /* if it is no group array we need to create it*/ - if(OPAL_UNLIKELY(NULL == cgroup->ibnet_procs)) { - cgroup->ibnet_procs = OBJ_NEW(opal_pointer_array_t); - rc = opal_pointer_array_init(cgroup->ibnet_procs, 10, INT_MAX, 10); - if (OPAL_SUCCESS != rc) { - IBNET_ERROR(("Failed to allocate opal_pointer_array")); - return OMPI_ERROR; - } - } - - IBNET_VERBOSE(10, ("Device idx %d, local port idx %d; " - "adding rank %d to the module %p, rem port %d", - device->device_index, p_indx, ibnet_proc->rank, - module, ibnet_proc->remote_ports_info[p].id)); - /* No need to remove: opal_list_remove_item(peers_data, (opal_list_item_t*)ibnet_proc); */ - rc = opal_pointer_array_set_item(cgroup->ibnet_procs, - /* num_selected, */ cgroup->num_procs, - (void *) ibnet_proc); - if (OPAL_SUCCESS != rc) { - IBNET_ERROR( ("Failed to set rank %d to index %d", - ibnet_proc->rank, 1 + cgroup->num_procs)); - return OMPI_ERROR; - } - - /* put selected cpc data to this proc */ - ibnet_proc->remote_ports_info[p].local_cpc = local_cpc; - ibnet_proc->remote_ports_info[p].remote_cpc_data = remote_cpc_data; - - ++cgroup->num_procs; - /* we done for the proc, go to next one */ - break; - } - } - - return OMPI_SUCCESS; -} - -static int setup_cgroup_all( - mca_sbgp_ibnet_connection_group_info_t *cgroup, - mca_sbgp_ibnet_device_t *device, - mca_sbgp_ibnet_module_t *module, - opal_list_t *peers_data) -{ - int rc; - mca_sbgp_ibnet_proc_t *ibnet_proc = NULL; - - for (ibnet_proc = (mca_sbgp_ibnet_proc_t *) opal_list_get_first(peers_data); - ibnet_proc != (mca_sbgp_ibnet_proc_t *) opal_list_get_end(peers_data); - ibnet_proc = (mca_sbgp_ibnet_proc_t *) - opal_list_get_next((opal_list_item_t *)ibnet_proc)) { - - rc = set_ibnet_proc_on_cgroup(cgroup, ibnet_proc, device, module); - if (OMPI_SUCCESS != rc) { - return rc; - } - } - - return OMPI_SUCCESS; -} - -static int setup_cgroup_node(mca_sbgp_ibnet_connection_group_info_t *cgroup, mca_sbgp_ibnet_device_t *device, - mca_sbgp_ibnet_module_t *module, opal_list_t *peers_data) -{ - int rc, local = 0; - mca_sbgp_ibnet_proc_t *ibnet_proc = NULL; - - for (ibnet_proc = (mca_sbgp_ibnet_proc_t *)opal_list_get_first(peers_data); - ibnet_proc != (mca_sbgp_ibnet_proc_t *)opal_list_get_end(peers_data); - ibnet_proc = (mca_sbgp_ibnet_proc_t *) - opal_list_get_next((opal_list_item_t *)ibnet_proc)) { - - local = OPAL_PROC_ON_LOCAL_NODE(ibnet_proc->ompi_proc->super.proc_flags); - if (0 == local) { - /* the remote process resides on different node */ - continue; - } - - /* the process resides on the same machine */ - rc = set_ibnet_proc_on_cgroup(cgroup, ibnet_proc, device, module); - if (OMPI_SUCCESS != rc) { - return rc; - } - } - - return OMPI_SUCCESS; -} - -/* The function should be the heart of the ibnet component. - * Main purpose: - * The function should run over list of all peers and select only "reachable" peers. - * Peer that have subnet_id equal to subnet id that I have on my ports is reachable. - * All peers that have the same number of active ports on the same subnet maybe grouped - * to subgroup? - * Need to think more about the select logic on this stage I just return list of all - * procs - */ -static int select_procs(mca_sbgp_ibnet_module_t *module, opal_list_t *peers_data) -{ - mca_sbgp_ibnet_device_t *device = NULL; - mca_sbgp_ibnet_proc_t *ibnet_proc = NULL; - mca_sbgp_ibnet_connection_group_info_t *cgroup = NULL; - - uint32_t p = 0; - int i = 0, j, rc = OMPI_SUCCESS; - int num_grouped = 0, - groups_to_use = 1; - - mca_sbgp_ibnet_component_t *cs = &mca_sbgp_ibnet_component; - - IBNET_VERBOSE(10, ("Start to select procs.\n")); - - module->num_cgroups = 0; - for (device = (mca_sbgp_ibnet_device_t *) opal_list_get_first(&cs->devices); - device != (mca_sbgp_ibnet_device_t *) opal_list_get_end(&cs->devices); - device = (mca_sbgp_ibnet_device_t *) - opal_list_get_next((opal_list_item_t *) device)) { - module->num_cgroups += device->num_act_ports; - IBNET_VERBOSE(10, ("Device num %d with index %d num of active ports %d\n", - ++i, device->device_index, device->num_act_ports)); - } - - module->cgroups = calloc(module->num_cgroups, - sizeof(mca_sbgp_ibnet_connection_group_info_t)); - - if (NULL == module->cgroups) { - IBNET_ERROR(("Failed to allocate cgroups")); - goto select_error; - } - - IBNET_VERBOSE(10, ("Num of cgroups - %d.\n", module->num_cgroups)); - - /* 1. Run over all active ports and build connection group - * for each one */ - for (device = (mca_sbgp_ibnet_device_t *) opal_list_get_first(&cs->devices); - device != (mca_sbgp_ibnet_device_t *) opal_list_get_end(&cs->devices); - device = (mca_sbgp_ibnet_device_t *) - opal_list_get_next((opal_list_item_t *)device)) { - /* run over active ports on the device */ - for(j = 0; j < device->num_act_ports; j++) { - cgroup = &module->cgroups[num_grouped]; - - /* Init cgroups structs */ - cgroup->device_index = device->device_index; - cgroup->index = num_grouped; - cgroup->port = device->ports[j].id; - cgroup->num_procs = 0; - - /* Setup comunication group */ - switch(module->mode) { - case MCA_SBGP_IBNET_ALL_NET: - rc = setup_cgroup_all(cgroup, device, module, peers_data); - break; - case MCA_SBGP_IBNET_NODE_NET: - rc = setup_cgroup_node(cgroup, device, module, peers_data); - break; - default: - rc = OMPI_ERROR; - IBNET_ERROR(("Module mode is unknow, fatal error")); - } - - if (OMPI_SUCCESS != rc) { - IBNET_ERROR(("Failed to setup cgroup.")); - goto select_error; - } - - if (0 != cgroup->num_procs) { - ++num_grouped; - } - } - } - - if (0 == num_grouped) { - /* No connection group was found */ - IBNET_ERROR(("No connection group was found.")); - goto select_error; - } - - /* If we have more than one single cgroup, - * we need to return groups that connects - * to exactly the same peers - */ - if (num_grouped > 1) { - - /* 2. Sort connection groups by size */ - qsort(module->cgroups, num_grouped, - sizeof(mca_sbgp_ibnet_connection_group_info_t), - cmp_cgroups); - - /* 3. What is the number of groups with maximal size */ - /* The first is Maximal */ - for (groups_to_use = 1; groups_to_use < num_grouped; groups_to_use++) { - if (module->cgroups[0].num_procs != module->cgroups[groups_to_use].num_procs) { - break; - } - } - - /* Ishai - It looks that noone is uses this groups_to_use value. In any case there is a bug in it. */ - /* 4. Check that all the maximal size groups are - * connect to the same peers, if not we just use FIRST cgroup */ - if (groups_to_use > 1) { - /* we need to check that all groups connects - * the same set of peers. */ - for (j = groups_to_use - 1; j > 0; j--) { - for (p = 0; p < module->cgroups[0].num_procs; p++) { - /* compare proc by proc....*/ - if (opal_pointer_array_get_item(module->cgroups[0].ibnet_procs, p) != - opal_pointer_array_get_item(module->cgroups[j].ibnet_procs, p)) { - /* peers are not equal, ignore this group and go to the next one */ - groups_to_use--; - if (j != groups_to_use) { - /* it was not the last group, swap last and this one */ - mca_sbgp_ibnet_connection_group_info_t tmp = module->cgroups[j]; - module->cgroups[j] = module->cgroups[groups_to_use]; - module->cgroups[groups_to_use] = tmp; - } - - break; /* go to the next group */ - } - } - } - } - } - /* updating sgroup number */ - module->num_cgroups = groups_to_use; - /* put array of ranks and size */ - - module->super.group_size = module->cgroups[0].num_procs; - module->super.group_list = (int *) calloc(module->super.group_size, sizeof(int)); - if (NULL == module->super.group_list) { - IBNET_ERROR(("Failed to allocate memory for group list")); - goto select_error; - } - - for (i = 0; i < module->super.group_size; i++) { - ibnet_proc = (mca_sbgp_ibnet_proc_t *) - opal_pointer_array_get_item(module->cgroups[0].ibnet_procs, i); - - assert(NULL != ibnet_proc); - IBNET_VERBOSE(10, ("Adding rank %d to group list", ibnet_proc->rank)); - - module->super.group_list[i] = ibnet_proc->ompi_proc_index; - } - - /* Let proc with lowest index be a leader of the subgroup */ - ibnet_proc = (mca_sbgp_ibnet_proc_t *) - opal_pointer_array_get_item(module->cgroups[0].ibnet_procs, 0); - - assert(NULL != ibnet_proc); - ibnet_proc->duty = MCA_SBGP_IBNET_NODE_LEADER; - -#if OPAL_ENABLE_DEBUG - IBNET_VERBOSE(10, ("Ibnet module: size - %d, num_cgroups - %d.\n", - module->super.group_size, module->num_cgroups)); - - for (i = 0; i < module->num_cgroups; ++i) { - IBNET_VERBOSE(10, ("cgroup %d uses port %d.\n", - i + 1, module->cgroups[i].port)); - } -#endif - - return OMPI_SUCCESS; - -select_error: - if (NULL != module->cgroups) { - for (i = 0; i < num_grouped; i++) { - if (NULL != module->cgroups[i].ibnet_procs) { - /* Ishai: When do we destruct it if the fucntion was successful - only at the end of the process? */ - OBJ_DESTRUCT(module->cgroups[i].ibnet_procs); - } - } - - free(module->cgroups); - } - - if (0 != module->super.group_size && - NULL != module->super.group_list) { - free(module->super.group_list); - } - - for (ibnet_proc = (mca_sbgp_ibnet_proc_t *) opal_list_get_first(peers_data); - ibnet_proc != (mca_sbgp_ibnet_proc_t *) opal_list_get_end(peers_data); - ibnet_proc = (mca_sbgp_ibnet_proc_t *) - opal_list_get_next((opal_list_item_t *) ibnet_proc)) { - if (NULL != ibnet_proc->use_port) { - free(ibnet_proc->use_port); - } - } - - return rc; -} - -/* This routine is used to find the list of procs that run on the -** same host as the calling process. -*/ - -#define IBNET_ALL "all" -#define IBNET_NODE "node" - -static int key2mode(char *key) -{ - if (NULL == key) { - IBNET_VERBOSE(6, ("key is NULL, return MCA_SBGP_IBNET_ALL")); - return MCA_SBGP_IBNET_ALL_NET; - } - if (strlen(IBNET_ALL) == strlen(key) && - 0 == strncmp(IBNET_ALL, key, strlen(IBNET_ALL))) { - IBNET_VERBOSE(6, ("key is MCA_SBGP_IBNET_ALL")); - return MCA_SBGP_IBNET_ALL_NET; - } - if (strlen(IBNET_NODE) == strlen(key) && - 0 == strncmp(IBNET_NODE, key, strlen(IBNET_NODE))) { - IBNET_VERBOSE(6, ("key is NODE")); - return MCA_SBGP_IBNET_NODE_NET; - } - - IBNET_VERBOSE(6, ("key was not detected, return MCA_SBGP_IBNET_NONE")); - return MCA_SBGP_IBNET_NONE_NET; -} - -static int mca_sbgp_ibnet_calc_sbuff_size(void) -{ - int bytes_tosend = 0, port, cpc; - mca_sbgp_ibnet_device_t *device; - - opal_list_t *devices = &mca_sbgp_ibnet_component.devices; - - bytes_tosend += sizeof(uint32_t); /* OPAL_UINT32 rank */ - bytes_tosend += sizeof(uint32_t); /* OPAL_UINT32 num of active ports */ - - /* Go through list of device and build the message*/ - for (device = (mca_sbgp_ibnet_device_t *) opal_list_get_first(devices); - device != (mca_sbgp_ibnet_device_t *) opal_list_get_end(devices); - device = (mca_sbgp_ibnet_device_t *) opal_list_get_next((opal_list_item_t *) device)) { - for (port = 0; port < device->num_allowed_ports; ++port) { - if (!device->ports[port].used) { - continue; - } - - /* OPAL_UINT16 port num */ - bytes_tosend += sizeof(uint16_t); - - /* OPAL_UINT16 lid */ - bytes_tosend += sizeof(uint16_t); - - /* OPAL_UINT64 subnetid */ - bytes_tosend += sizeof(uint64_t); - - /* OPAL_UINT32 default mtu */ - bytes_tosend += sizeof(uint32_t); - - /* OPAL_UINT8 collectives offload support */ - bytes_tosend += sizeof(uint8_t); - - /* OPAL_UINT8 number of cpcs for this port */ - bytes_tosend += sizeof(uint8_t); - - for (cpc = 0; cpc < device->num_cpcs; ++cpc) { - /* OPAL_UINT8 cpc index */ - bytes_tosend += sizeof(uint8_t); - - /* OPAL_UINT8 cpc priority */ - bytes_tosend += sizeof(uint8_t); - - /* cpc buffer length (OPAL_UINT8) in bytes */ - bytes_tosend += device->cpcs[cpc]->data.cbm_modex_message_len; - bytes_tosend += sizeof(uint8_t); - } - } - } - - return bytes_tosend; -} - -mca_sbgp_base_module_t *mca_sbgp_ibnet_select_procs(struct ompi_proc_t **procs, - int n_procs_in, - struct ompi_communicator_t *comm, - char *key, - void *output_data - ) -{ - /* local variables */ - opal_list_t peers_data; - mca_sbgp_ibnet_module_t *module; - - uint32_t rc; - char *sbuff = NULL, *rbuff = NULL; - - int *sbgp_procs_ranks = NULL, *ranks_in_comm = NULL; - int i, my_rank_in_group = -1, my_rank, num_bytes_tosend; - - struct mca_sbgp_ibnet_proc_t *ibnet_proc = NULL; - mca_sbgp_ibnet_component_t *cs = &mca_sbgp_ibnet_component; - - /* Create the module */ - module = OBJ_NEW(mca_sbgp_ibnet_module_t); - if (OPAL_UNLIKELY(NULL == module)) { - return NULL; - } - - module->num_cgroups = 0; - module->cgroups = NULL; - module->mode = key2mode(key); - - if (OPAL_UNLIKELY(MCA_SBGP_IBNET_NONE_NET == module->mode)) { - goto Error_module; - } - - module->super.group_size = 0; - module->super.group_list = NULL; - module->super.group_comm = comm; - module->super.group_net = OMPI_SBGP_IBCX2; - - ranks_in_comm = (int *) malloc(n_procs_in * sizeof(int)); - if (OPAL_UNLIKELY(NULL == ranks_in_comm)) { - IBNET_ERROR(("Cannot allocate memory.\n")); - goto Error; - } - - my_rank = ompi_comm_rank(&ompi_mpi_comm_world.comm); - - for (i = 0; i < n_procs_in; i++) { - ranks_in_comm[i] = procs[i]->proc_name.vpid; - if (my_rank == ranks_in_comm[i]) { - my_rank_in_group = i; - } - } - - /* Prepare send data */ - num_bytes_tosend = mca_sbgp_ibnet_calc_sbuff_size(); - - rc = comm_allreduce_pml(&num_bytes_tosend, - &num_bytes_tosend, 1, - MPI_INT, my_rank_in_group, - MPI_MAX, n_procs_in, - ranks_in_comm, &ompi_mpi_comm_world.comm); - if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) { - goto Error; - } - - IBNET_VERBOSE(10, ("The size of the send buff is %d\n", num_bytes_tosend)); - - assert(num_bytes_tosend > 0); - - /* Allocate send/recv buffers for allgather comunication */ - sbuff = (char *) malloc(num_bytes_tosend); - rbuff = (char *) malloc(num_bytes_tosend * n_procs_in); - if (OPAL_UNLIKELY(NULL == sbuff || NULL == rbuff)) { - IBNET_ERROR(("Failed to allocate buffers for send/recv ibnet allgather\n")); - goto Error; - } - - rc = pack_gather_sbuff(sbuff); - if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) { - goto Error; - } - - rc = comm_allgather_pml((void *) sbuff, (void *) rbuff, - num_bytes_tosend, MPI_BYTE, - my_rank_in_group, n_procs_in, - ranks_in_comm, &ompi_mpi_comm_world.comm); - if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) { - IBNET_ERROR(("Allgather call failed.\n")); - goto Error; - } - - /* Prepare list for arraving data */ - OBJ_CONSTRUCT(&peers_data, opal_list_t); - - /* Load the data to peers data */ - rc = unpack_and_load_gather_rbuff(rbuff, num_bytes_tosend, - procs, n_procs_in, &peers_data); - if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) { - goto Error; - } - - /* Select logic */ - rc = select_procs(module, &peers_data); - if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) { - goto Error; - } - - /* Put group id */ - sbgp_procs_ranks = (int *) malloc(module->super.group_size * - sizeof(int)); - if (OPAL_UNLIKELY(NULL == sbgp_procs_ranks)) { - IBNET_ERROR(("Cannot allocate memory.\n")); - goto Error; - } - - for (i = 0; i < module->super.group_size; ++i) { - ibnet_proc = (struct mca_sbgp_ibnet_proc_t *) - opal_pointer_array_get_item( - module->cgroups[0].ibnet_procs, i); - - sbgp_procs_ranks[i] = ibnet_proc->ompi_proc->proc_name.vpid; - if (my_rank == sbgp_procs_ranks[i]) { - my_rank_in_group = i; - } - - } - - assert(my_rank_in_group >= 0); - - rc = comm_allreduce_pml(&cs->curr_max_group_id, - &cs->curr_max_group_id, 1, - MPI_INT, my_rank_in_group, - MPI_MAX, module->super.group_size, - sbgp_procs_ranks, &ompi_mpi_comm_world.comm); - if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) { - goto Error; - } - - module->group_id = cs->curr_max_group_id; - cs->curr_max_group_id++; - - /* successful completion */ - /* clean up the temporary structures */ - OBJ_DESTRUCT(&peers_data); - - free(sbuff); - free(rbuff); - - free(ranks_in_comm); - free(sbgp_procs_ranks); - - IBNET_VERBOSE(10, ("Return ibnet module.\n")); - return (mca_sbgp_base_module_t *) module; - - /* return with error */ -Error: - /* clean up */ - if(NULL != module->super.group_list) { - free(module->super.group_list); - module->super.group_list = NULL; - } - - /* clean up the temporary structures */ - OBJ_DESTRUCT(&peers_data); - - if (NULL != sbgp_procs_ranks) { - free(sbgp_procs_ranks); - } - - if (NULL != ranks_in_comm) { - free(ranks_in_comm); - } - - if (NULL != sbuff) { - free(sbuff); - } - - if (NULL != rbuff) { - free(rbuff); - } - -Error_module: - OBJ_RELEASE(module); - - return NULL; -} diff --git a/ompi/mca/sbgp/p2p/Makefile.am b/ompi/mca/sbgp/p2p/Makefile.am deleted file mode 100644 index b4941a63765..00000000000 --- a/ompi/mca/sbgp/p2p/Makefile.am +++ /dev/null @@ -1,41 +0,0 @@ -# -# Copyright (c) 2009-2012 Oak Ridge National Laboratory. All rights reserved. -# Copyright (c) 2009-2012 Mellanox Technologies. All rights reserved. -# Copyright (c) 2015 Cisco Systems, Inc. All rights reserved. -# $COPYRIGHT$ -# -# Additional copyrights may follow -# -# $HEADER$ -# - -sources = \ - sbgp_p2p.h \ - sbgp_p2p_component.c \ - sbgp_p2p_module.c - - -# Make the output library in this directory, and name it either -# mca__.la (for DSO builds) or libmca__.la -# (for static builds). - -component_noinst = -component_install = -if MCA_BUILD_ompi_sbgp_p2p_DSO -component_install += mca_sbgp_p2p.la -else -component_noinst += libmca_sbgp_p2p.la -endif - -# See ompi/mca/btl/sm/Makefile.am for an explanation of -# libmca_common_sm.la. - -mcacomponentdir = $(ompilibdir) -mcacomponent_LTLIBRARIES = $(component_install) -mca_sbgp_p2p_la_SOURCES = $(sources) -mca_sbgp_p2p_la_LDFLAGS = -module -avoid-version -mca_sbgp_p2p_la_LIBADD = - -noinst_LTLIBRARIES = $(component_noinst) -libmca_sbgp_p2p_la_SOURCES =$(sources) -libmca_sbgp_p2p_la_LDFLAGS = -module -avoid-version diff --git a/ompi/mca/sbgp/p2p/configure.m4 b/ompi/mca/sbgp/p2p/configure.m4 deleted file mode 100644 index 820602aa6ad..00000000000 --- a/ompi/mca/sbgp/p2p/configure.m4 +++ /dev/null @@ -1,27 +0,0 @@ -# -*- shell-script -*- -# -# Copyright (c) 2013 Sandia National Laboratories. All rights reserved. -# $COPYRIGHT$ -# -# Additional copyrights may follow -# -# $HEADER$ -# - -# MCA_ompi_sbgp_p2p_POST_CONFIG(will_build) -# ---------------------------------------- -# The p2p sbgp requires a BML endpoint tag to compile, so require it. -# Require in POST_CONFIG instead of CONFIG so that we only require it -# if we're not disabled. -AC_DEFUN([MCA_ompi_sbgp_p2p_POST_CONFIG], [ - AS_IF([test "$1" = "1"], [OMPI_REQUIRE_ENDPOINT_TAG([BML])]) -])dnl - -# MCA_ompi_sbgp_p2p_CONFIG(action-if-can-compile, -# [action-if-cant-compile]) -# ------------------------------------------------ -# We can always build, unless we were explicitly disabled. -AC_DEFUN([MCA_ompi_sbgp_p2p_CONFIG],[ - AC_CONFIG_FILES([ompi/mca/sbgp/p2p/Makefile]) - [$1] -])dnl diff --git a/ompi/mca/sbgp/p2p/owner.txt b/ompi/mca/sbgp/p2p/owner.txt deleted file mode 100644 index 55663d3bb8a..00000000000 --- a/ompi/mca/sbgp/p2p/owner.txt +++ /dev/null @@ -1,7 +0,0 @@ -# -# owner/status file -# owner: institution that is responsible for this package -# status: e.g. active, maintenance, unmaintained -# -owner: ? -status: ? diff --git a/ompi/mca/sbgp/p2p/sbgp_p2p.h b/ompi/mca/sbgp/p2p/sbgp_p2p.h deleted file mode 100644 index f8fa5fc1942..00000000000 --- a/ompi/mca/sbgp/p2p/sbgp_p2p.h +++ /dev/null @@ -1,81 +0,0 @@ -/* - * Copyright (c) 2009-2012 Oak Ridge National Laboratory. All rights reserved. - * Copyright (c) 2009-2012 Mellanox Technologies. All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - - -#ifndef MCA_BCOL_p2p_EXPORT_H -#define MCA_BCOL_p2p_EXPORT_H - -#include "ompi_config.h" - -#include "mpi.h" -#include "ompi/mca/mca.h" -#include "ompi/mca/sbgp/sbgp.h" -#include "opal/mca/mpool/mpool.h" -#include "ompi/request/request.h" -#include "ompi/proc/proc.h" - -BEGIN_C_DECLS - -#ifdef HAVE_SCHED_YIELD -# include -# define SPIN sched_yield() -#else /* no switch available */ -# define SPIN -#endif - - - /** - * Structure to hold the basic shared memory coll component. First it holds the - * base coll component, and then holds a bunch of - * sm-coll-component-specific stuff (e.g., current MCA param - * values). - */ - struct mca_sbgp_p2p_component_t { - /** Base coll component */ - mca_sbgp_base_component_2_0_0_t super; - - }; - - /** - * Convenience typedef - */ - typedef struct mca_sbgp_p2p_component_t - mca_sbgp_p2p_component_t; - - - /* - ** Base sub-group module - **/ - - struct mca_sbgp_p2p_module_t { - /** Collective modules all inherit from opal_object */ - mca_sbgp_base_module_t super; - - }; - typedef struct mca_sbgp_p2p_module_t mca_sbgp_p2p_module_t; - OBJ_CLASS_DECLARATION(mca_sbgp_p2p_module_t); - - /* This routine is used to find the list of procs that run on the - ** same host as the calling process. - */ - /* - struct mca_sbgp_base_module_t *mca_sbgp_p2p_select_procs(struct ompi_proc_t ** procs, - int n_procs_in, char *key, void *output_data); - */ - - /** - * Global component instance - */ - OMPI_MODULE_DECLSPEC extern mca_sbgp_p2p_component_t mca_sbgp_p2p_component; - - -END_C_DECLS - -#endif /* MCA_BCOL_p2p_EXPORT_H */ diff --git a/ompi/mca/sbgp/p2p/sbgp_p2p_component.c b/ompi/mca/sbgp/p2p/sbgp_p2p_component.c deleted file mode 100644 index 2fd93da4041..00000000000 --- a/ompi/mca/sbgp/p2p/sbgp_p2p_component.c +++ /dev/null @@ -1,224 +0,0 @@ -/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ -/* - * Copyright (c) 2009-2012 Oak Ridge National Laboratory. All rights reserved. - * Copyright (c) 2009-2012 Mellanox Technologies. All rights reserved. - * Copyright (c) 2014-2015 Los Alamos National Security, LLC. All rights - * reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -/** - * @file - * - */ - -#include "ompi_config.h" -#ifdef HAVE_UNISTD_H -#include -#endif -#include -#ifdef HAVE_SYS_MMAN_H -#include -#endif -#include - -#include "ompi/constants.h" -#include "ompi/communicator/communicator.h" -#include "sbgp_p2p.h" -#include "ompi/mca/bml/bml.h" - - -/* - * Public string showing the coll ompi_sm V2 component version number - */ -const char *mca_sbgp_p2p_component_version_string = - "Open MPI sbgp - p2p collective MCA component version " OMPI_VERSION; - - -/* - * Local functions - */ - -static int p2p_register(void); -static int p2p_open(void); -static int p2p_close(void); -static mca_sbgp_base_module_t * mca_sbgp_p2p_select_procs(struct ompi_proc_t ** procs, - int n_procs_in, struct ompi_communicator_t *comm, char *key, void *output_data); - -static int mca_sbgp_p2p_init_query(bool enable_progress_threads, - bool enable_mpi_threads); - -/* - * Instantiate the public struct with all of our public information - * and pointers to our public functions in it - */ - -mca_sbgp_p2p_component_t mca_sbgp_p2p_component = { - - - { - /* First, the mca_component_t struct containing meta - information about the component itself */ - - .sbgp_version = { - MCA_SBGP_BASE_VERSION_2_0_0, - /* Component name and version */ - - .mca_component_name = "p2p", - MCA_BASE_MAKE_VERSION(component, OMPI_MAJOR_VERSION, OMPI_MINOR_VERSION, - OMPI_RELEASE_VERSION), - - /* Component open and close functions */ - - .mca_open_component = p2p_open, - .mca_close_component = p2p_close, - .mca_register_component_params = p2p_register, - }, - - .sbgp_init_query = mca_sbgp_p2p_init_query, - .select_procs = mca_sbgp_p2p_select_procs, - .priority = 0, - } - -}; - -static int p2p_register(void) -{ - mca_sbgp_p2p_component_t *cs = &mca_sbgp_p2p_component; - cs->super.priority = 90; - (void) mca_base_component_var_register(&cs->super.sbgp_version, - "priority", "Priority for the sbgp p2p component", - MCA_BASE_VAR_TYPE_INT, NULL, 0, 0, - OPAL_INFO_LVL_9, - MCA_BASE_VAR_SCOPE_READONLY, - &cs->super.priority); - - return OMPI_SUCCESS; -} - -/* - * Open the component - */ -static int p2p_open(void) -{ - return OMPI_SUCCESS; -} - - -/* - * Close the component - */ -static int p2p_close(void) -{ - return OMPI_SUCCESS; -} - -/* query to see if the component is available for use, and can - * satisfy the thread and progress requirements - */ -int mca_sbgp_p2p_init_query(bool enable_progress_threads, - bool enable_mpi_threads) -{ - /* at this stage there is no reason to disaulify this component */ - - /* done */ - return OMPI_SUCCESS; -} -/* This routine is used to find the list of procs that run on the -** same host as the calling process. -*/ -static mca_sbgp_base_module_t * mca_sbgp_p2p_select_procs(struct ompi_proc_t ** procs, - int n_procs_in, - struct ompi_communicator_t *comm, - char *key, - void *output_data - ) -{ - /* local variables */ - int cnt, proc, my_rank; - mca_sbgp_p2p_module_t *module; - - /* find my rank in the group */ - for (my_rank = -1, proc = 0 ; proc < n_procs_in ; ++proc) { - if (ompi_proc_local() == procs[proc]) { - my_rank = proc; - } - } - - /* I am not in the list - so will form no local subgroup */ - if (0 > my_rank) { - return NULL; - } - - module = OBJ_NEW(mca_sbgp_p2p_module_t); - if (!module ) { - return NULL; - } - - module->super.group_size = 0; - module->super.group_comm = comm; - module->super.group_net = OMPI_SBGP_P2P; - - /* allocate resources */ - module->super.group_list = (int *) calloc (n_procs_in, sizeof (int)); - if (NULL == module->super.group_list) { - goto Error; - } - - for (cnt = 0, proc = 0 ; proc < n_procs_in ; ++proc) { -#if defined(OMPI_PROC_ENDPOINT_TAG_BML) - mca_bml_base_endpoint_t* endpoint = - (mca_bml_base_endpoint_t*) procs[proc]->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_BML]; -#endif - - if (my_rank == proc || !key) { - module->super.group_list[cnt++] = proc; - continue; - } - -#if defined(OMPI_PROC_ENDPOINT_TAG_BML) - if (NULL != endpoint) { - int num_btls = mca_bml_base_btl_array_get_size(&(endpoint->btl_eager)); - /* loop over btls */ - - for (int i_btl = 0 ; i_btl < num_btls ; ++i_btl) { - /* I am checking for specific btl */ - if (strcmp(endpoint->btl_eager.bml_btls[i_btl].btl-> - btl_component->btl_version.mca_component_name, key)) { - module->super.group_list[cnt++] = proc; - break; - } - } - } -#endif - } - - if (0 == cnt) { - goto Error; - } - - module->super.group_size = cnt; - module->super.group_list = (int *) realloc (module->super.group_list, sizeof (int) * cnt); - if (NULL == module->super.group_list) { - /* Shouldn't ever happen */ - goto Error; - } - - /* successful return */ - return (mca_sbgp_base_module_t *)module; - - /* return with error */ -Error: - /* clean up */ - if (NULL != module->super.group_list) { - free (module->super.group_list); - module->super.group_list = NULL; - } - OBJ_RELEASE(module); - - return NULL; -} diff --git a/ompi/mca/sbgp/p2p/sbgp_p2p_module.c b/ompi/mca/sbgp/p2p/sbgp_p2p_module.c deleted file mode 100644 index 40a1c104bb3..00000000000 --- a/ompi/mca/sbgp/p2p/sbgp_p2p_module.c +++ /dev/null @@ -1,49 +0,0 @@ -/* - * Copyright (c) 2009-2012 Oak Ridge National Laboratory. All rights reserved. - * Copyright (c) 2009-2012 Mellanox Technologies. All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -/** - * @file - * - */ - -#include "ompi_config.h" -#ifdef HAVE_UNISTD_H -#include -#endif -#include -#ifdef HAVE_SYS_MMAN_H -#include -#endif -#include -#include - -#include "ompi/constants.h" -#include "ompi/communicator/communicator.h" -#include "ompi/mca/sbgp/p2p/sbgp_p2p.h" - -/* - * Local functions - */ -static void -mca_sbgp_p2p_module_construct(mca_sbgp_p2p_module_t *module) -{ -} - -static void -mca_sbgp_p2p_module_destruct(mca_sbgp_p2p_module_t *module) -{ - /* done */ -} - - -OBJ_CLASS_INSTANCE(mca_sbgp_p2p_module_t, - mca_sbgp_base_module_t, - mca_sbgp_p2p_module_construct, - mca_sbgp_p2p_module_destruct); diff --git a/ompi/mca/sbgp/sbgp.h b/ompi/mca/sbgp/sbgp.h deleted file mode 100644 index e6235e57b80..00000000000 --- a/ompi/mca/sbgp/sbgp.h +++ /dev/null @@ -1,137 +0,0 @@ -/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ -/* - * Copyright (c) 2009-2012 Oak Ridge National Laboratory. All rights reserved. - * Copyright (c) 2009-2012 Mellanox Technologies. All rights reserved. - * Copyright (c) 2015 Los Alamos National Security, LLC. All rights - * reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#ifndef MCA_SBGP_H -#define MCA_SBGP_H - -#include "ompi_config.h" -#include "opal/class/opal_list.h" -#include "ompi/mca/mca.h" -#include "ompi/communicator/communicator.h" - -#include "opal/util/show_help.h" - -#if defined(c_plusplus) || defined(__cplusplus) -extern "C" { -#endif - -/** - * List of supported network types - */ - -typedef int (*mca_sbgp_component_init_query_fn_t) - (bool enable_progress_threads, bool enable_mpi_threads); - -typedef enum { - OMPI_SBGP_MUMA = 1 << 0, /* Muma */ - OMPI_SBGP_SOCKET = 1 << 1, /* CPU socket */ - OMPI_SBGP_P2P = 1 << 2, /* Point 2 point networks */ - OMPI_SBGP_IBCX2 = 1 << 3, /* Infiniband ConnextX2 */ - OMPI_SBGP_IB = 1 << 4 /* Infiniband */ -} mca_sbgp_net_type; - -/* - * Interface function for routine that will extract subgroups - * - * @param procs (IN) List of mpi processes to filter - * @param n_procs_in (IN) Number of input processes - * @param key (IN) optional key - * @param output_data (OUT) component specific output - * @return module, NULL if one is not created. - * - */ - -struct mca_sbgp_base_module_2_0_0_t { - - /** Collective modules all inherit from opal_object */ - opal_object_t super; - /* group size */ - int group_size; - - /* largest power of 2 in group */ - int pow_2; - - /* number of levels in the tree */ - int n_levels_pow2; - - /* my index in the group list, - * pointer to my rank */ - int my_index; - /* List of ranks. - * Actually we return to ML array of - * indexes to ompi_proc. - * And ML is responsible to replace - * the indexes to ranks */ - int *group_list; - /* pointer to *father* communicator, - * Not sure if we really need it now. I know my rank via my index, - * and ompi_proc I can cache on sbgp module. - * For ib I do not need it */ - struct ompi_communicator_t *group_comm; - /* network supported by this groups */ - mca_sbgp_net_type group_net; - - /*FIXME: - * I don't know where to add the use_hdl flag since the - * mca_bcol_basesmuma_comm_query takes just two input parameters. - */ - bool use_hdl; - -}; -typedef struct mca_sbgp_base_module_2_0_0_t mca_sbgp_base_module_2_0_0_t; -typedef struct mca_sbgp_base_module_2_0_0_t mca_sbgp_base_module_t; -/* typedef mca_sbgp_base_module_2_0_0_t mca_sbgp_base_module_t; */ -OMPI_DECLSPEC OBJ_CLASS_DECLARATION(mca_sbgp_base_module_t); - -typedef mca_sbgp_base_module_t *(*mca_sbgp_create_subgroup_fn_t)( - struct ompi_proc_t ** procs, int n_procs_in, - struct ompi_communicator_t *comm, char *key, - void *output_data - ); - -/** - * Subgrouping component interface - * - * Component interface for the sub-gorup framework. A public - * instance of this structure, called - * mca_sbgp_[component_name]_component, must exist in any sub-group - * component. - */ -struct mca_sbgp_base_component_2_0_0_t { - /** Base component description */ - mca_base_component_t sbgp_version; - - /** Sbgp component init query function */ - mca_sbgp_component_init_query_fn_t sbgp_init_query; - - /** process selection function */ - mca_sbgp_create_subgroup_fn_t select_procs; - - /** priority */ - int priority; - -}; -typedef struct mca_sbgp_base_component_2_0_0_t mca_sbgp_base_component_2_0_0_t; -typedef struct mca_sbgp_base_component_2_0_0_t mca_sbgp_base_component; - - -/* -* Macro for use in components that are of type coll -*/ -#define MCA_SBGP_BASE_VERSION_2_0_0 \ - OMPI_MCA_BASE_VERSION_2_1_0("sbgp", 2, 0, 0) - -#if defined(c_plusplus) || defined(__cplusplus) -} -#endif -#endif /* MCA_SBGP_H */ diff --git a/ompi/mca/sharedfp/Makefile.am b/ompi/mca/sharedfp/Makefile.am index 00ff8d39c4f..afe551c1329 100644 --- a/ompi/mca/sharedfp/Makefile.am +++ b/ompi/mca/sharedfp/Makefile.am @@ -5,16 +5,16 @@ # Copyright (c) 2004-2005 The University of Tennessee and The University # of Tennessee Research Foundation. All rights # reserved. -# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, # University of Stuttgart. All rights reserved. # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. # Copyright (c) 2008-2011 University of Houston. All rights reserved. # Copyright (c) 2010 Cisco Systems, Inc. All rights reserved. # $COPYRIGHT$ -# +# # Additional copyrights may follow -# +# # $HEADER$ # diff --git a/ompi/mca/sharedfp/addproc/.opal_ignore b/ompi/mca/sharedfp/addproc/.opal_ignore deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/ompi/mca/sharedfp/addproc/.opal_unignore b/ompi/mca/sharedfp/addproc/.opal_unignore deleted file mode 100644 index debe198de7d..00000000000 --- a/ompi/mca/sharedfp/addproc/.opal_unignore +++ /dev/null @@ -1 +0,0 @@ -gabriel diff --git a/ompi/mca/sharedfp/addproc/Makefile.am b/ompi/mca/sharedfp/addproc/Makefile.am deleted file mode 100644 index ba5eab9eb90..00000000000 --- a/ompi/mca/sharedfp/addproc/Makefile.am +++ /dev/null @@ -1,63 +0,0 @@ -# -# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana -# University Research and Technology -# Corporation. All rights reserved. -# Copyright (c) 2004-2005 The University of Tennessee and The University -# of Tennessee Research Foundation. All rights -# reserved. -# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, -# University of Stuttgart. All rights reserved. -# Copyright (c) 2004-2005 The Regents of the University of California. -# All rights reserved. -# Copyright (c) 2013 University of Houston. All rights reserved. -# $COPYRIGHT$ -# -# Additional copyrights may follow -# -# $HEADER$ -# - -# Make the output library in this directory, and name it either -# mca__.la (for DSO builds) or libmca__.la -# (for static builds). - -if MCA_BUILD_ompi_sharedfp_addproc_DSO -component_noinst = -component_install = mca_sharedfp_addproc.la -else -component_noinst = libmca_sharedfp_addproc.la -component_install = -endif - -mcacomponentdir = $(ompilibdir) -mcacomponent_LTLIBRARIES = $(component_install) -mca_sharedfp_addproc_la_SOURCES = $(sources) -mca_sharedfp_addproc_la_LDFLAGS = -module -avoid-version - -noinst_LTLIBRARIES = $(component_noinst) -libmca_sharedfp_addproc_la_SOURCES = $(sources) -libmca_sharedfp_addproc_la_LDFLAGS = -module -avoid-version - -# Source files - -#IMPORTANT: Update here when adding new source code files to the library -sources = \ - sharedfp_addproc.h \ - sharedfp_addproc.c \ - sharedfp_addproc_component.c \ - sharedfp_addproc_seek.c \ - sharedfp_addproc_request_position.c \ - sharedfp_addproc_write.c \ - sharedfp_addproc_iwrite.c \ - sharedfp_addproc_read.c \ - sharedfp_addproc_iread.c \ - sharedfp_addproc_file_open.c - -#The additional process is spawned by executing this executable -bin_PROGRAMS = mca_sharedfp_addproc_control - -mca_sharedfp_addproc_control_SOURCES = \ - sharedfp_addproc_control.h \ - sharedfp_addproc_control.c - -mca_sharedfp_addproc_control_LDADD = $(top_builddir)/ompi/libmpi.la diff --git a/ompi/mca/sharedfp/addproc/owner.txt b/ompi/mca/sharedfp/addproc/owner.txt deleted file mode 100644 index f886026a69e..00000000000 --- a/ompi/mca/sharedfp/addproc/owner.txt +++ /dev/null @@ -1,7 +0,0 @@ -# -# owner/status file -# owner: institution that is responsible for this package -# status: e.g. active, maintenance, unmaintained -# -owner: UH -status: maintenance diff --git a/ompi/mca/sharedfp/addproc/sharedfp_addproc.c b/ompi/mca/sharedfp/addproc/sharedfp_addproc.c deleted file mode 100644 index 3e528dd0032..00000000000 --- a/ompi/mca/sharedfp/addproc/sharedfp_addproc.c +++ /dev/null @@ -1,97 +0,0 @@ -/* - * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2006 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * Copyright (c) 2013 University of Houston. All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - * - * These symbols are in a file by themselves to provide nice linker - * semantics. Since linkers generally pull in symbols by object fules, - * keeping these symbols as the only symbols in this file prevents - * utility programs such as "ompi_info" from having to import entire - * modules just to query their version and parameters - */ - -#include "ompi_config.h" -#include "mpi.h" -#include "ompi/mca/sharedfp/sharedfp.h" -#include "ompi/mca/sharedfp/addproc/sharedfp_addproc.h" - -/* - * ******************************************************************* - * ************************ actions structure ************************ - * ******************************************************************* - */ - /* IMPORTANT: Update here when adding sharedfp component interface functions*/ -static mca_sharedfp_base_module_1_0_0_t addproc = { - mca_sharedfp_addproc_module_init, /* initalise after being selected */ - mca_sharedfp_addproc_module_finalize, /* close a module on a communicator */ - mca_sharedfp_addproc_seek, - mca_sharedfp_addproc_get_position, - mca_sharedfp_addproc_read, - mca_sharedfp_addproc_read_ordered, - mca_sharedfp_addproc_read_ordered_begin, - mca_sharedfp_addproc_read_ordered_end, - mca_sharedfp_addproc_iread, - mca_sharedfp_addproc_write, - mca_sharedfp_addproc_write_ordered, - mca_sharedfp_addproc_write_ordered_begin, - mca_sharedfp_addproc_write_ordered_end, - mca_sharedfp_addproc_iwrite, - mca_sharedfp_addproc_file_open, - mca_sharedfp_addproc_file_close -}; -/* - * ******************************************************************* - * ************************* structure ends ************************** - * ******************************************************************* - */ - -int mca_sharedfp_addproc_component_init_query(bool enable_progress_threads, - bool enable_mpi_threads) -{ - /* Nothing to do */ - - return OMPI_SUCCESS; -} - -struct mca_sharedfp_base_module_1_0_0_t * - mca_sharedfp_addproc_component_file_query - (mca_io_ompio_file_t *fh, int *priority) { - *priority = mca_sharedfp_addproc_priority; - - /*test, and update priority*/ - - return &addproc; -} - -int mca_sharedfp_addproc_component_file_unquery (mca_io_ompio_file_t *file) -{ - /* This function might be needed for some purposes later. for now it - * does not have anything to do since there are no steps which need - * to be undone if this module is not selected */ - - return OMPI_SUCCESS; -} - -int mca_sharedfp_addproc_module_init (mca_io_ompio_file_t *file) -{ - return OMPI_SUCCESS; -} - - -int mca_sharedfp_addproc_module_finalize (mca_io_ompio_file_t *file) -{ - return OMPI_SUCCESS; -} diff --git a/ompi/mca/sharedfp/addproc/sharedfp_addproc.h b/ompi/mca/sharedfp/addproc/sharedfp_addproc.h deleted file mode 100644 index 14672830304..00000000000 --- a/ompi/mca/sharedfp/addproc/sharedfp_addproc.h +++ /dev/null @@ -1,164 +0,0 @@ -/* - * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2006 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * Copyright (c) 2013 University of Houston. All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#ifndef MCA_SHAREDFP_ADDPROC_H -#define MCA_SHAREDFP_ADDPROC_H - -#include "ompi_config.h" -#include "ompi/mca/mca.h" -#include "ompi/mca/sharedfp/sharedfp.h" -#include "ompi/mca/io/ompio/io_ompio.h" -#include - -BEGIN_C_DECLS - -int mca_sharedfp_addproc_component_init_query(bool enable_progress_threads, - bool enable_mpi_threads); -struct mca_sharedfp_base_module_1_0_0_t * - mca_sharedfp_addproc_component_file_query (mca_io_ompio_file_t *file, int *priority); -int mca_sharedfp_addproc_component_file_unquery (mca_io_ompio_file_t *file); - -int mca_sharedfp_addproc_module_init (mca_io_ompio_file_t *file); -int mca_sharedfp_addproc_module_finalize (mca_io_ompio_file_t *file); - -extern int mca_sharedfp_addproc_priority; -extern int mca_sharedfp_addproc_verbose; -#if 0 -extern char[MPI_MAX_HOSTNAME_LEN] mca_sharedfp_addproc_control_host; -#endif - -OMPI_MODULE_DECLSPEC extern mca_sharedfp_base_component_2_0_0_t mca_sharedfp_addproc_component; -/* - * ****************************************************************** - * ********* functions which are implemented in this module ********* - * ****************************************************************** - */ -/*IMPORANT: Update here when implementing functions from sharedfp API*/ - -int mca_sharedfp_addproc_seek (mca_io_ompio_file_t *fh, - OMPI_MPI_OFFSET_TYPE offset, int whence); -int mca_sharedfp_addproc_get_position (mca_io_ompio_file_t *fh, - OMPI_MPI_OFFSET_TYPE * offset); -int mca_sharedfp_addproc_file_open (struct ompi_communicator_t *comm, - char* filename, - int amode, - struct ompi_info_t *info, - mca_io_ompio_file_t *fh); -int mca_sharedfp_addproc_file_close (mca_io_ompio_file_t *fh); -int mca_sharedfp_addproc_read (mca_io_ompio_file_t *fh, - void *buf, int count, MPI_Datatype datatype, MPI_Status *status); -int mca_sharedfp_addproc_read_ordered (mca_io_ompio_file_t *fh, - void *buf, int count, struct ompi_datatype_t *datatype, - ompi_status_public_t *status - ); -int mca_sharedfp_addproc_read_ordered_begin (mca_io_ompio_file_t *fh, - void *buf, - int count, - struct ompi_datatype_t *datatype); -int mca_sharedfp_addproc_read_ordered_end (mca_io_ompio_file_t *fh, - void *buf, - ompi_status_public_t *status); -int mca_sharedfp_addproc_iread (mca_io_ompio_file_t *fh, - void *buf, - int count, - struct ompi_datatype_t *datatype, - ompi_request_t **request); -int mca_sharedfp_addproc_write (mca_io_ompio_file_t *fh, - void *buf, - int count, - struct ompi_datatype_t *datatype, - ompi_status_public_t *status); -int mca_sharedfp_addproc_write_ordered (mca_io_ompio_file_t *fh, - void *buf, - int count, - struct ompi_datatype_t *datatype, - ompi_status_public_t *status); -int mca_sharedfp_addproc_write_ordered_begin (mca_io_ompio_file_t *fh, - void *buf, - int count, - struct ompi_datatype_t *datatype); -int mca_sharedfp_addproc_write_ordered_end (mca_io_ompio_file_t *fh, - void *buf, - ompi_status_public_t *status); -int mca_sharedfp_addproc_iwrite (mca_io_ompio_file_t *fh, - void *buf, - int count, - struct ompi_datatype_t *datatype, - ompi_request_t **request); -/****************************************************/ -/*The following are structures and definitions * - * copied over directly from uhio codebase */ -/****************************************************/ - -/*This structure will hang off of the mca_sharedfp_base_data_t's - *selected_module_data attribute - */ -struct mca_sharedfp_addproc_data -{ - MPI_Comm intercom; -}; - -typedef struct mca_sharedfp_addproc_data addproc_data; - - -int mca_sharedfp_addproc_request_position (struct mca_sharedfp_base_data_t * sh, - int bytes_requested, - OMPI_MPI_OFFSET_TYPE * offset); - -#define DO_ACK 0 /* To be set by the Environment Variable*/ -#define REQUEST_TAG 99 -#define ACK_TAG 1 -#define OFFSET_TAG 98 -#define END_TAG 97 - -#define SEEK_END_TAG 91 -#define SEEK_SET_TAG 92 -#define SEEK_CUR_TAG 93 -#define GET_POSITION_TAG 94 - -#define NUM_OF_SPAWNS 1 - -struct list { - - int procNo; - long numBytesArrAddr; - struct list *Next; -}; - -struct Stat { - int tag; - int source; - long* recvBuff; -}; - - -double uhio_shared_gettime(void); - -typedef struct list node; -typedef struct Stat statusStruct; - -/* - * ****************************************************************** - * ************ functions implemented in this module end ************ - * ****************************************************************** - */ - -END_C_DECLS - -#endif /* MCA_SHAREDFP_ADDPROC_H */ diff --git a/ompi/mca/sharedfp/addproc/sharedfp_addproc_component.c b/ompi/mca/sharedfp/addproc/sharedfp_addproc_component.c deleted file mode 100644 index 0b66d83bac0..00000000000 --- a/ompi/mca/sharedfp/addproc/sharedfp_addproc_component.c +++ /dev/null @@ -1,104 +0,0 @@ -/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ -/* - * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2005 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * Copyright (c) 2013 University of Houston. All rights reserved. - * Copyright (c) 2015 Los Alamos National Security, LLC. All rights - * reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - * - * These symbols are in a file by themselves to provide nice linker - * semantics. Since linkers generally pull in symbols by object - * files, keeping these symbols as the only symbols in this file - * prevents utility programs such as "ompi_info" from having to import - * entire components just to query their version and parameters. - */ - -#include "ompi_config.h" -#include "sharedfp_addproc.h" -#include "mpi.h" - -/* - * Public string showing the sharedfp addproc component version number - */ -const char *mca_sharedfp_addproc_component_version_string = - "OMPI/MPI addproc SHAREDFP MCA component version " OMPI_VERSION; - -/* - * Global variables - */ -int mca_sharedfp_addproc_priority=1; -int mca_sharedfp_addproc_verbose=0; -#if 0 -char[MPI_MAX_HOSTNAME_LEN] mca_sharedfp_addproc_control_host; -#endif - -static int addproc_register(void); - -/* - * Instantiate the public struct with all of our public information - * and pointers to our public functions in it - */ -mca_sharedfp_base_component_2_0_0_t mca_sharedfp_addproc_component = { - - /* First, the mca_component_t struct containing meta information - about the component itself */ - - .sharedfpm_version = { - MCA_SHAREDFP_BASE_VERSION_2_0_0, - - /* Component name and version */ - .mca_component_name = "addproc", - MCA_BASE_MAKE_VERSION(component, OMPI_MAJOR_VERSION, OMPI_MINOR_VERSION, - OMPI_RELEASE_VERSION), - .mca_register_component_params = addproc_register, - }, - .sharedfpm_data = { - /* This component is checkpointable */ - MCA_BASE_METADATA_PARAM_CHECKPOINT - }, - .sharedfpm_init_query = mca_sharedfp_addproc_component_init_query, /* get thread level */ - .sharedfpm_file_query = mca_sharedfp_addproc_component_file_query, /* get priority and actions */ - .sharedfpm_file_unquery = mca_sharedfp_addproc_component_file_unquery, /* undo what was done by previous function */ -}; - - -static int addproc_register(void) -{ - mca_sharedfp_addproc_priority = 1; - (void) mca_base_component_var_register(&mca_sharedfp_addproc_component.sharedfpm_version, - "priority", "Priority of the addproc sharedfp component", - MCA_BASE_VAR_TYPE_INT, NULL, 0, 0, - OPAL_INFO_LVL_9, - MCA_BASE_VAR_SCOPE_READONLY, &mca_sharedfp_addproc_priority); - mca_sharedfp_addproc_verbose = 0; - (void) mca_base_component_var_register(&mca_sharedfp_addproc_component.sharedfpm_version, - "verbose", "Verbosity of the addproc sharedfp component", - MCA_BASE_VAR_TYPE_INT, NULL, 0, 0, - OPAL_INFO_LVL_9, - MCA_BASE_VAR_SCOPE_READONLY, &mca_sharedfp_addproc_verbose); - - -#if 0 - memset (mca_sharedfp_addproc_control_host, 0, MPI_MAX_HOSTNAME_LEN); - (void) mca_base_component_var_register(&mca_sharedfp_addproc_component.sharedfpm_version, - "control_host", "Name of the host where to spawn the control process(default:none)", - MCA_BASE_VAR_TYPE_STRING, NULL, 0, 0, - OPAL_INFO_LVL_9, - MCA_BASE_VAR_SCOPE_READONLY, &mca_sharedfp_addproc_control_host); - -#endif - return OMPI_SUCCESS; -} diff --git a/ompi/mca/sharedfp/addproc/sharedfp_addproc_control.c b/ompi/mca/sharedfp/addproc/sharedfp_addproc_control.c deleted file mode 100644 index f3c7b06b461..00000000000 --- a/ompi/mca/sharedfp/addproc/sharedfp_addproc_control.c +++ /dev/null @@ -1,231 +0,0 @@ -/* - * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2005 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * Copyright (c) 2013 University of Houston. All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#include "sharedfp_addproc_control.h" - -/* #define PRINT_TAG 1 */ -void nodeDelete(node **front, node **rear) -{ - node *delNode; - if ((*front) == NULL && (*rear)==NULL) { - printf("The queue is empty\n"); - } - else { - delNode = *front; - if (*front == *rear) { - *rear = NULL; - } - (*front) = (*front)->Next; - - free(delNode); - } - - return; -} - -void nodeInsert(node **front, node **rear, int procNo, long numBytesArrAddr) -{ - node *newNode; - newNode = (node*)malloc(sizeof(node)); - - newNode->Next = NULL; - newNode->procNo = procNo; - newNode->numBytesArrAddr = numBytesArrAddr; - - - if ((*front == NULL) && (*rear == NULL)) { - *front = newNode; - *rear = newNode; -#if 0 - printf("Front and rear both NULL\n"); -#endif - fflush(stdout); - } - else { - (*rear)->Next = newNode; - *rear=newNode; -#if 0 - printf("Front and rear both not NULL\n"); -#endif - fflush(stdout); - } - - return; -} - -int Check_Request_Offset(int tag_received) -{ -#if 0 - printf("Tag received %d\n",tag_received); -#endif - - if (tag_received == REQUEST_TAG) { -#if 0 - printf("Return from Check_Request_Offset\n"); -#endif - return 1; - } - - - return 0; -} - -int Check_Acknowledgement(int tag_received) -{ - if (tag_received == ACK_TAG) - return 1; - - return 0; -} - -int End_control_shared_request(int tag_received) -{ - if (tag_received == END_TAG) - return 1; - - - return 0; -} - - -int main(int argc, char **argv) -{ - long recvBuff; - long offsetValue; - long endoffile; - int size; - int tag_received; - int END_FLAG = 0; - - int recvcount = 1; - MPI_Status status; - MPI_Comm parentComm; - static MPI_Offset offset = 0; - - /*statusStruct arr;*/ - - node *rear, *front; - rear = front = NULL; - -#if 0 - printf("addproc_control: MPI_INIT\n"); fflush(stdout); -#endif - MPI_Init(&argc,&argv); - -#if 0 - printf("addproc_control: MPI_Comm_size\n"); fflush(stdout); -#endif - MPI_Comm_size(MPI_COMM_WORLD,&size); - - - endoffile = 0; - -#if 0 - printf("addproc_control: start listening\n"); fflush(stdout); -#endif - while(!END_FLAG) { - - /* Receive request from other processes */ - MPI_Comm_get_parent(&parentComm); - - MPI_Recv(&recvBuff,recvcount,OMPI_OFFSET_DATATYPE,MPI_ANY_SOURCE,MPI_ANY_TAG,parentComm,&status); - tag_received = status.MPI_TAG; - - switch (tag_received) - { - - case REQUEST_TAG: -#if 0 - printf("addproc_control: Offset requested by the process %d\n",status.MPI_SOURCE); fflush(stdout); -#endif - /* Insert the node into the linked list */ - nodeInsert(&front,&rear,status.MPI_SOURCE,recvBuff); - break; - case END_TAG: -#if 0 - printf("addproc_control: End Control tag received\n"); fflush(stdout); -#endif - END_FLAG = 1; - break; - case SEEK_SET_TAG: - offset = recvBuff; - MPI_Send(&offset,1,OMPI_OFFSET_DATATYPE,status.MPI_SOURCE,SEEK_SET_TAG,parentComm); -#if 0 - printf("addproc_control: Seek set tag received\n"); fflush(stdout); -#endif - break; - case SEEK_CUR_TAG: -#if 0 - printf("addproc_control: Seek CUR Tag received\n"); fflush(stdout); -#endif - /*set the pointer to the offset*/ - offset += recvBuff; - MPI_Send(&offset,1,OMPI_OFFSET_DATATYPE,status.MPI_SOURCE,SEEK_CUR_TAG,parentComm); - break; - case SEEK_END_TAG: -#if 0 - printf("addproc_control: Seek END TAG received\n"); fflush(stdout); -#endif - offset = endoffile; - offset += recvBuff; - MPI_Send(&offset,1,OMPI_OFFSET_DATATYPE,status.MPI_SOURCE,SEEK_END_TAG,parentComm); - break; - case GET_POSITION_TAG: -#if 0 - printf("\naddproc_control: Get Position tag received\n"); fflush(stdout); -#endif - /*Send the offset as requested*/ - MPI_Send(&offset,1,OMPI_OFFSET_DATATYPE,status.MPI_SOURCE,GET_POSITION_TAG,parentComm); - break; - default: - printf("addproc_control: Unknown tag received\n"); fflush(stdout); - break; - } - - while (front != NULL) { - - offsetValue = offset; - - offset += front->numBytesArrAddr; - - /* Store the end of file */ - if (endoffile < offset) - endoffile = offset; - - - /* MPI_Send to the correct process */ - - MPI_Send(&offsetValue,1,OMPI_OFFSET_DATATYPE, front->procNo, OFFSET_TAG, - parentComm); - nodeDelete(&front,&rear); - - } - - } /* End of while(1) loop */ - -#if 0 - printf("addproc_control: finalizing mpi...\n"); fflush(stdout); -#endif - MPI_Finalize(); - -#if 0 - printf("addproc_control: Exiting...\n"); -#endif - return 0; -} diff --git a/ompi/mca/sharedfp/addproc/sharedfp_addproc_control.h b/ompi/mca/sharedfp/addproc/sharedfp_addproc_control.h deleted file mode 100644 index 40072e57f40..00000000000 --- a/ompi/mca/sharedfp/addproc/sharedfp_addproc_control.h +++ /dev/null @@ -1,37 +0,0 @@ -/* - * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2006 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * Copyright (c) 2008 University of Houston. All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#ifndef MCA_SHAREDFP_addproc_control_H -#define MCA_SHAREDFP_addproc_control_H - -#include -#include "mpi.h" -#include "sharedfp_addproc.h" - -BEGIN_C_DECLS - -void nodeDelete(node **front, node **rear); -void nodeInsert(node **front, node **rear, int procNo, long numBytesArrAddr); -int Check_Request_Offset(int tag_received); -int Check_Acknowledgement(int tag_received); -int End_control_shared_request(int tag_received); - -END_C_DECLS - -#endif /* MCA_SHAREDFP_addproc_control_H */ diff --git a/ompi/mca/sharedfp/addproc/sharedfp_addproc_file_open.c b/ompi/mca/sharedfp/addproc/sharedfp_addproc_file_open.c deleted file mode 100644 index 07abc1113d1..00000000000 --- a/ompi/mca/sharedfp/addproc/sharedfp_addproc_file_open.c +++ /dev/null @@ -1,158 +0,0 @@ -/* - * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2005 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * Copyright (c) 2013 University of Houston. All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - - -#include "ompi_config.h" -#include "sharedfp_addproc.h" - -#include "mpi.h" -#include "ompi/constants.h" -#include "ompi/mca/sharedfp/sharedfp.h" -#include "ompi/mca/pml/pml.h" - -#include -#include -#include "ompi/mca/sharedfp/base/base.h" - -int mca_sharedfp_addproc_file_open (struct ompi_communicator_t *comm, - char* filename, - int amode, - struct ompi_info_t *info, - mca_io_ompio_file_t *fh) -{ - int ret = OMPI_SUCCESS, err; - int rank; - struct mca_sharedfp_base_data_t* sh; - mca_io_ompio_file_t * shfileHandle; - MPI_Comm newInterComm; - struct mca_sharedfp_addproc_data * addproc_data = NULL; - - /*-------------------------------------------------*/ - /*Open the same file again without shared file pointer*/ - /*-------------------------------------------------*/ - shfileHandle = (mca_io_ompio_file_t *)malloc(sizeof(mca_io_ompio_file_t)); - ret = ompio_io_ompio_file_open(comm,filename,amode,info,shfileHandle,false); - if ( OMPI_SUCCESS != ret) { - printf( "mca_sharedfp_addproc_file_open: Error during file open\n"); - return ret; - } - - /*Memory is allocated here for the sh structure*/ - if ( mca_sharedfp_addproc_verbose ) { - printf( "mca_sharedfp_addproc_file_open: malloc f_sharedfp_ptr struct\n"); - } - sh = (struct mca_sharedfp_base_data_t*)malloc(sizeof(struct mca_sharedfp_base_data_t)); - if ( NULL == sh ){ - printf( "mca_sharedfp_addproc_file_open: Error, unable to malloc f_sharedfp_ptr struct\n"); - return OMPI_ERR_OUT_OF_RESOURCE; - } - - /*Populate the sh file structure based on the implementation*/ - sh->sharedfh = shfileHandle; /* Shared file pointer*/ - sh->global_offset = 0; /* Global Offset*/ - sh->comm = comm; /* Communicator*/ - sh->selected_module_data = NULL; - - rank = ompi_comm_rank ( sh->comm ); - - if ( mca_sharedfp_addproc_verbose ) { - printf( "mca_sharedfp_addproc_file_open: START spawn by rank=%d\n",rank); - } - - /*Spawn a new process which will maintain the offsets for this file open*/ - ret = MPI_Comm_spawn("mca_sharedfp_addproc_control", MPI_ARGV_NULL, 1, MPI_INFO_NULL, - 0, sh->comm, &newInterComm, &err); - if ( OMPI_SUCCESS != ret ) { - printf( "mca_sharedfp_addproc_file_open: error spawning control process ret=%d\n", - ret); - } - - /*If spawning successful*/ - if (newInterComm) { - addproc_data = (struct mca_sharedfp_addproc_data*)malloc(sizeof(struct mca_sharedfp_addproc_data)); - if ( NULL == addproc_data ){ - printf( "mca_sharedfp_addproc_file_open: Error, unable to malloc addproc_data struct\n"); - return OMPI_ERR_OUT_OF_RESOURCE; - } - - /*Store the new Intercommunicator*/ - addproc_data->intercom = newInterComm; - - /*save the addproc data*/ - sh->selected_module_data = addproc_data; - /*remember the shared file handle*/ - fh->f_sharedfp_data = sh; - } - else{ - printf( "mca_sharedfp_addproc_file_open: DONE spawn by rank=%d, errcode[success=%d, err=%d]=%d\n", - rank, MPI_SUCCESS, MPI_ERR_SPAWN, ret); - ret = OMPI_ERROR; - } - - return ret; -} - -int mca_sharedfp_addproc_file_close (mca_io_ompio_file_t *fh) -{ - struct mca_sharedfp_base_data_t *sh=NULL; - int err = OMPI_SUCCESS; - long sendBuff = 0; - int count = 1; - int rank; - struct mca_sharedfp_addproc_data * addproc_data = NULL; - - if ( NULL == fh->f_sharedfp_data){ - /* Can happen with lazy initialization of the sharedfp structures */ - if ( mca_sharedfp_addproc_verbose ) { - printf( "sharedfp_addproc_file_close - shared file pointer structure not initialized\n"); - } - return OMPI_SUCCESS; - } - sh = fh->f_sharedfp_data; - - rank = ompi_comm_rank ( sh->comm ); - - /* Make sure that all processes are ready to release the - ** shared file pointer resources - */ - sh->comm->c_coll.coll_barrier(sh->comm, sh->comm->c_coll.coll_barrier_module ); - - addproc_data = (struct mca_sharedfp_addproc_data*)(sh->selected_module_data); - - if (addproc_data) { - /*tell additional proc to stop listening*/ - if(0 == rank){ - MCA_PML_CALL(send( &sendBuff, count, OMPI_OFFSET_DATATYPE, 0, END_TAG, - MCA_PML_BASE_SEND_STANDARD, addproc_data->intercom)); - } - - /* Free intercommunicator */ - if(addproc_data->intercom){ - ompi_comm_free(&(addproc_data->intercom)); - } - free(addproc_data); - } - - /* Close the main file opened by this component*/ - err = ompio_io_ompio_file_close(sh->sharedfh); - - /*free shared file pointer data struct*/ - free(sh); - return err; -} diff --git a/ompi/mca/sharedfp/addproc/sharedfp_addproc_iread.c b/ompi/mca/sharedfp/addproc/sharedfp_addproc_iread.c deleted file mode 100644 index 5b1ef44e166..00000000000 --- a/ompi/mca/sharedfp/addproc/sharedfp_addproc_iread.c +++ /dev/null @@ -1,85 +0,0 @@ -/* - * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2005 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * Copyright (c) 2013 University of Houston. All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - - -#include "ompi_config.h" -#include "sharedfp_addproc.h" - -#include "mpi.h" -#include "ompi/constants.h" -#include "ompi/mca/sharedfp/sharedfp.h" - -int mca_sharedfp_addproc_iread(mca_io_ompio_file_t *fh, - void *buf, - int count, - ompi_datatype_t *datatype, - MPI_Request * request) -{ - int ret = OMPI_SUCCESS; - OMPI_MPI_OFFSET_TYPE offset = 0; - long bytesRequested = 0; - size_t numofBytes; - struct mca_sharedfp_base_data_t *sh = NULL; - - if(NULL == fh->f_sharedfp_data){ - opal_output(0, "sharedfp_addproc_iread - shared file pointer structure not initialized correctly\n"); - return OMPI_ERROR; - } - - /* Calculate the number of bytes to write */ - opal_datatype_type_size ( &datatype->super ,&numofBytes); - bytesRequested = count * numofBytes; - - if ( mca_sharedfp_addproc_verbose ){ - printf("mca_sharedfp_addproc_iread: Bytes Requested is %ld\n",bytesRequested); - } - /* Retrieve the shared file data struct */ - sh = fh->f_sharedfp_data; - - /*Request to the additional process for the offset*/ - ret = mca_sharedfp_addproc_request_position(sh,bytesRequested,&offset); - if( OMPI_SUCCESS == ret ){ - if ( mca_sharedfp_addproc_verbose ){ - printf("mca_sharedfp_addproc_iread: Offset received is %lld\n",offset); - } - /* Read from the file */ - ret = ompio_io_ompio_file_iread_at ( sh->sharedfh, offset, buf, count, datatype, request); - } - - return ret; -} -int mca_sharedfp_addproc_read_ordered_begin(mca_io_ompio_file_t *fh, - void *buf, - int count, - struct ompi_datatype_t *datatype) -{ - opal_output(0,"mca_sharedfp_addproc_read_ordered_begin: NOT IMPLEMENTED\n"); - return OMPI_ERROR; - -} - - -int mca_sharedfp_addproc_read_ordered_end(mca_io_ompio_file_t *fh, - void *buf, - ompi_status_public_t *status) -{ - opal_output(0,"mca_sharedfp_addproc_read_ordered_end: NOT IMPLEMENTED\n"); - return OMPI_ERROR; - -} diff --git a/ompi/mca/sharedfp/addproc/sharedfp_addproc_iwrite.c b/ompi/mca/sharedfp/addproc/sharedfp_addproc_iwrite.c deleted file mode 100644 index b846652db1a..00000000000 --- a/ompi/mca/sharedfp/addproc/sharedfp_addproc_iwrite.c +++ /dev/null @@ -1,84 +0,0 @@ -/* - * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2005 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * Copyright (c) 2013 University of Houston. All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - - -#include "ompi_config.h" -#include "sharedfp_addproc.h" - -#include "mpi.h" -#include "ompi/constants.h" -#include "ompi/mca/sharedfp/sharedfp.h" - -int mca_sharedfp_addproc_iwrite(mca_io_ompio_file_t *fh, - void *buf, - int count, - ompi_datatype_t *datatype, - MPI_Request * request) -{ - int ret = OMPI_SUCCESS; - OMPI_MPI_OFFSET_TYPE offset = 0; - long bytesRequested = 0; - size_t numofBytes; - struct mca_sharedfp_base_data_t *sh = NULL; - - if(NULL == fh->f_sharedfp_data){ - opal_output(0, "sharedfp_addproc_iwrite: shared file pointer structure not initialized correctly\n"); - return OMPI_ERROR; - } - - /* Calculate the number of bytes to write */ - opal_datatype_type_size ( &datatype->super, &numofBytes); - bytesRequested = count * numofBytes; - - /* Retrieve the shared file data struct */ - sh = fh->f_sharedfp_data; - - if ( mca_sharedfp_addproc_verbose ){ - printf("sharedfp_addproc_iwrite: Bytes Requested is %ld\n",bytesRequested); - } - /* Request the offset to write bytesRequested bytes */ - ret = mca_sharedfp_addproc_request_position(sh,bytesRequested,&offset); - if ( OMPI_SUCCESS == ret ) { - if ( mca_sharedfp_addproc_verbose ){ - printf("sharedfp_addproc_iwrite: Offset received is %lld\n",offset); - } - /* Write to the file */ - ret = ompio_io_ompio_file_iwrite_at(sh->sharedfh,offset,buf,count,datatype,request); - } - - return ret; -} - -int mca_sharedfp_addproc_write_ordered_begin(mca_io_ompio_file_t *fh, - void *buf, - int count, - struct ompi_datatype_t *datatype) -{ - opal_output(0,"mca_sharedfp_addproc_write_ordered_begin: NOT IMPLEMENTED\n"); - return OMPI_ERROR; -} - - -int mca_sharedfp_addproc_write_ordered_end(mca_io_ompio_file_t *fh, - void *buf, - ompi_status_public_t *status) -{ - opal_output(0,"mca_sharedfp_addproc_write_ordered_end: NOT IMPLEMENTED\n"); - return OMPI_ERROR; -} diff --git a/ompi/mca/sharedfp/addproc/sharedfp_addproc_read.c b/ompi/mca/sharedfp/addproc/sharedfp_addproc_read.c deleted file mode 100644 index f59ce94dd36..00000000000 --- a/ompi/mca/sharedfp/addproc/sharedfp_addproc_read.c +++ /dev/null @@ -1,196 +0,0 @@ -/* - * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2005 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * Copyright (c) 2013 University of Houston. All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - - -#include "ompi_config.h" -#include "sharedfp_addproc.h" - -#include "mpi.h" -#include "ompi/constants.h" -#include "ompi/mca/sharedfp/sharedfp.h" - -int mca_sharedfp_addproc_read ( mca_io_ompio_file_t *fh, - void *buf, int count, MPI_Datatype datatype, MPI_Status *status) -{ - int ret = OMPI_SUCCESS; - OMPI_MPI_OFFSET_TYPE offset = 0; - long bytesRequested = 0; - size_t numofBytes; - struct mca_sharedfp_base_data_t *sh = NULL; - mca_sharedfp_base_module_t * shared_fp_base_module = NULL; - - if(NULL == fh->f_sharedfp_data){ - if ( mca_sharedfp_addproc_verbose ) { - printf("sharedfp_addproc_read: opening the shared file pointer file\n"); - } - shared_fp_base_module = fh->f_sharedfp; - - ret = shared_fp_base_module->sharedfp_file_open(fh->f_comm, - fh->f_filename, - fh->f_amode, - fh->f_info, - fh); - if ( OMPI_SUCCESS != ret ) { - opal_output(0,"sharedfp_addproc_read - error opening the shared file pointer\n"); - return ret; - } - } - - /* Calculate the number of bytes to write */ - opal_datatype_type_size ( &datatype->super ,&numofBytes); - bytesRequested = count * numofBytes; - - if ( mca_sharedfp_addproc_verbose ){ - printf("mca_sharedfp_addproc_read: Bytes Requested is %ld\n", bytesRequested); - } - /* Retrieve the shared file data struct */ - sh = fh->f_sharedfp_data; - - /*Request to the additional process for the offset*/ - ret = mca_sharedfp_addproc_request_position(sh,bytesRequested,&offset); - if( OMPI_SUCCESS == ret ){ - if ( mca_sharedfp_addproc_verbose ){ - printf("mca_sharedfp_addproc_read: Offset received is %lld\n",offset); - } - /* Read from the file */ - ret = ompio_io_ompio_file_read_at(sh->sharedfh,offset,buf,count,datatype,status); - } - - return ret; -} - -int mca_sharedfp_addproc_read_ordered (mca_io_ompio_file_t *fh, - void *buf, - int count, - struct ompi_datatype_t *datatype, - ompi_status_public_t *status) -{ - int ret = OMPI_SUCCESS; - OMPI_MPI_OFFSET_TYPE offset = 0, offsetReceived = 0; - long sendBuff = 0; - long *buff=NULL; - long offsetBuff, bytesRequested = 0; - size_t numofBytes; - int rank, size, i; - struct mca_sharedfp_base_data_t *sh = NULL; - mca_sharedfp_base_module_t * shared_fp_base_module = NULL; - - if(NULL == fh->f_sharedfp_data){ - if ( mca_sharedfp_addproc_verbose ) { - printf("sharedfp_addproc_read_ordered: opening the shared file pointer file\n"); - } - shared_fp_base_module = fh->f_sharedfp; - - ret = shared_fp_base_module->sharedfp_file_open(fh->f_comm, - fh->f_filename, - fh->f_amode, - fh->f_info, - fh); - if ( OMPI_SUCCESS != ret ) { - opal_output(0,"sharedfp_addproc_read_ordered - error opening the shared file pointer\n"); - return ret; - } - } - - - /*Retrieve the new communicator*/ - sh = fh->f_sharedfp_data; - - /* Calculate the number of bytes to read*/ - opal_datatype_type_size ( &datatype->super, &numofBytes); - sendBuff = count * numofBytes; - - /* Get the ranks in the communicator */ - rank = ompi_comm_rank ( sh->comm); - size = ompi_comm_size ( sh->comm); - - if ( 0 == rank ) { - buff = (long*)malloc(sizeof(long) * size); - if ( NULL == buff ) - return OMPI_ERR_OUT_OF_RESOURCE; - } - - ret = sh->comm->c_coll.coll_gather( &sendBuff, 1, OMPI_OFFSET_DATATYPE, - buff, 1, OMPI_OFFSET_DATATYPE, 0, sh->comm, - sh->comm->c_coll.coll_gather_module); - if ( OMPI_SUCCESS != ret ) { - goto exit; - } - - /* All the counts are present now in the recvBuff. - The size of recvBuff is sizeof_newComm - */ - if ( 0 == rank ) { - for (i = 0; i < size ; i ++) { - if ( mca_sharedfp_addproc_verbose ){ - printf("sharedfp_addproc_read_ordered: Buff is %ld\n",buff[i]); - } - bytesRequested += buff[i]; - - if ( mca_sharedfp_addproc_verbose ){ - printf("sharedfp_addproc_read_ordered: Bytes requested are %ld\n",bytesRequested); - } - } - - /* Request the offset to read bytesRequested bytes - ** only the root process needs to do the request, - ** since the root process will then tell the other - ** processes at what offset they should read their - ** share of the data. - */ - ret = mca_sharedfp_addproc_request_position(sh,bytesRequested,&offsetReceived); - if( OMPI_SUCCESS != ret ){ - goto exit; - } - if ( mca_sharedfp_addproc_verbose ){ - printf("sharedfp_addproc_read_ordered: Offset received is %lld\n",offsetReceived); - } - buff[0] += offsetReceived; - - - for (i = 1 ; i < size; i++) { - buff[i] += buff[i-1]; - } - } - - /* Scatter the results to the other processes*/ - ret = sh->comm->c_coll.coll_scatter ( buff, 1, OMPI_OFFSET_DATATYPE, &offsetBuff, - 1, OMPI_OFFSET_DATATYPE, 0, sh->comm, - sh->comm->c_coll.coll_scatter_module ); - if ( OMPI_SUCCESS != ret ) { - goto exit; - } - - /*Each process now has its own individual offset in recvBUFF*/ - offset = offsetBuff - sendBuff; - - if ( mca_sharedfp_addproc_verbose ){ - printf("sharedfp_addproc_read_ordered: Offset returned is %lld\n",offset); - } - - /* read from the file */ - ret = ompio_io_ompio_file_read_at_all(sh->sharedfh,offset,buf,count,datatype,status); - -exit: - if ( NULL != buff ) { - free ( buff ); - } - - return ret; -} diff --git a/ompi/mca/sharedfp/addproc/sharedfp_addproc_request_position.c b/ompi/mca/sharedfp/addproc/sharedfp_addproc_request_position.c deleted file mode 100644 index 4fdd741407d..00000000000 --- a/ompi/mca/sharedfp/addproc/sharedfp_addproc_request_position.c +++ /dev/null @@ -1,75 +0,0 @@ -/* - * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2005 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * Copyright (c) 2013 University of Houston. All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - - -#include "ompi_config.h" -#include "sharedfp_addproc.h" - -#include "mpi.h" -#include "ompi/constants.h" -#include "ompi/mca/pml/pml.h" -#include "ompi/mca/sharedfp/sharedfp.h" - -int mca_sharedfp_addproc_request_position(struct mca_sharedfp_base_data_t * sh, - int bytes_requested, - OMPI_MPI_OFFSET_TYPE *offset) -{ - int ret = OMPI_SUCCESS; - OMPI_MPI_OFFSET_TYPE position = 0; - long sendBuff = bytes_requested ; - int count = 1; - - - struct mca_sharedfp_addproc_data * addproc_data = sh->selected_module_data; - - *offset = 0; - - ret = MCA_PML_CALL(send( &sendBuff, count, OMPI_OFFSET_DATATYPE, 0, REQUEST_TAG, - MCA_PML_BASE_SEND_STANDARD, addproc_data->intercom)); - if ( OMPI_SUCCESS != ret ) { - return ret; - } - ret = MCA_PML_CALL(recv( &position, count, OMPI_OFFSET_DATATYPE, 0, OFFSET_TAG, - addproc_data->intercom, MPI_STATUS_IGNORE)); - - *offset = position; - return ret; -} - -int mca_sharedfp_addproc_get_position(mca_io_ompio_file_t *fh, - OMPI_MPI_OFFSET_TYPE * offset) -{ - int ret = OMPI_SUCCESS; - struct mca_sharedfp_base_data_t *sh = NULL; - - if(NULL == fh->f_sharedfp_data){ - opal_output(0, "sharedfp_addproc_get_position - shared file pointer structure not initialized correctly\n"); - return OMPI_ERROR; - } - - /* Retrieve the shared file data struct*/ - sh = fh->f_sharedfp_data; - - /* Requesting the offset to write 0 bytes, - ** returns the current offset w/o updating it - */ - ret = mca_sharedfp_addproc_request_position(sh, 0, offset); - - return ret; -} diff --git a/ompi/mca/sharedfp/addproc/sharedfp_addproc_seek.c b/ompi/mca/sharedfp/addproc/sharedfp_addproc_seek.c deleted file mode 100644 index c3b5f0e0fe9..00000000000 --- a/ompi/mca/sharedfp/addproc/sharedfp_addproc_seek.c +++ /dev/null @@ -1,69 +0,0 @@ -/* - * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2005 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * Copyright (c) 2013 University of Houston. All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - - -#include "ompi_config.h" -#include "sharedfp_addproc.h" - -#include "mpi.h" -#include "ompi/constants.h" -#include "ompi/mca/pml/pml.h" -#include "ompi/mca/sharedfp/sharedfp.h" - -int -mca_sharedfp_addproc_seek (mca_io_ompio_file_t *fh, - OMPI_MPI_OFFSET_TYPE offset, int whence) -{ - int rank; - int ret = OMPI_SUCCESS; - OMPI_MPI_OFFSET_TYPE position = 0; - struct mca_sharedfp_base_data_t *sh = NULL; - struct mca_sharedfp_addproc_data * addproc_data = sh->selected_module_data; - long buff = 0; - - if(NULL == fh->f_sharedfp_data){ - opal_output(0, "sharedfp_addproc_write_ordered - shared file pointer structure not initialized correctly\n"); - return OMPI_ERROR; - } - - sh = fh->f_sharedfp_data; - rank = ompi_comm_rank ( sh->comm ); - buff = offset; - - - /* This is a collective call, - * only one process needs to communicate with the */ - if(0 == rank){ - ret = MCA_PML_CALL(send ( &buff, 1, OMPI_OFFSET_DATATYPE, 0, whence, - MCA_PML_BASE_SEND_STANDARD, - addproc_data->intercom)); - if ( OMPI_SUCCESS != ret ) { - return OMPI_ERROR; - } - ret = MCA_PML_CALL(recv(&position, 1, OMPI_OFFSET_DATATYPE, 0, whence, - addproc_data->intercom, MPI_STATUS_IGNORE)); - if ( OMPI_SUCCESS != ret ) { - return OMPI_ERROR; - } - - } - ret = sh->comm->c_coll.coll_barrier(sh->comm, sh->comm->c_coll.coll_barrier_module); - - return ret; -} diff --git a/ompi/mca/sharedfp/addproc/sharedfp_addproc_write.c b/ompi/mca/sharedfp/addproc/sharedfp_addproc_write.c deleted file mode 100644 index ab6e530921a..00000000000 --- a/ompi/mca/sharedfp/addproc/sharedfp_addproc_write.c +++ /dev/null @@ -1,196 +0,0 @@ -/* - * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2005 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * Copyright (c) 2013 University of Houston. All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - - -#include "ompi_config.h" -#include "sharedfp_addproc.h" - -#include "mpi.h" -#include "ompi/constants.h" -#include "ompi/mca/sharedfp/sharedfp.h" - -int mca_sharedfp_addproc_write (mca_io_ompio_file_t *fh, - void *buf, - int count, - struct ompi_datatype_t *datatype, - ompi_status_public_t *status) -{ - int ret = OMPI_SUCCESS; - OMPI_MPI_OFFSET_TYPE offset = 0; - long bytesRequested = 0; - size_t numofBytes; - struct mca_sharedfp_base_data_t *sh = NULL; - mca_sharedfp_base_module_t * shared_fp_base_module = NULL; - - if(NULL == fh->f_sharedfp_data){ - if ( mca_sharedfp_addproc_verbose ) { - printf("sharedfp_addproc_write: opening the shared file pointer file\n"); - } - shared_fp_base_module = fh->f_sharedfp; - - ret = shared_fp_base_module->sharedfp_file_open(fh->f_comm, - fh->f_filename, - fh->f_amode, - fh->f_info, - fh); - if ( OMPI_SUCCESS != ret ) { - opal_output(0,"sharedfp_addproc_write - error opening the shared file pointer\n"); - return ret; - } - } - - /* Calculate the number of bytes to write*/ - opal_datatype_type_size ( &datatype->super, &numofBytes); - bytesRequested = count * numofBytes; - - /*Retrieve the shared file data structure */ - sh = fh->f_sharedfp_data; - - if ( mca_sharedfp_addproc_verbose ){ - printf("sharedfp_addproc_write: sharedfp_addproc_write: Bytes Requested is %ld\n",bytesRequested); - } - - /*Request the offset to write bytesRequested bytes*/ - ret = mca_sharedfp_addproc_request_position( sh, bytesRequested, &offset); - if ( OMPI_SUCCESS == ret ) { - if ( mca_sharedfp_addproc_verbose ){ - printf("sharedfp_addproc_write: Offset received is %lld\n",offset); - } - /* Write to the file */ - ret = ompio_io_ompio_file_write_at(sh->sharedfh,offset,buf,count,datatype,status); - } - - return ret; -} - -int mca_sharedfp_addproc_write_ordered (mca_io_ompio_file_t *fh, - void *buf, - int count, - struct ompi_datatype_t *datatype, - ompi_status_public_t *status) -{ - int ret = OMPI_SUCCESS; - OMPI_MPI_OFFSET_TYPE offset = 0, offsetReceived = 0; - long sendBuff = 0; - long *buff=NULL; - long offsetBuff; - long bytesRequested = 0; - int recvcnt = 1, sendcnt = 1; - size_t numofBytes; - int rank, size, i; - struct mca_sharedfp_base_data_t *sh = NULL; - mca_sharedfp_base_module_t * shared_fp_base_module = NULL; - - if ( NULL == fh->f_sharedfp_data){ - if ( mca_sharedfp_addproc_verbose ) { - printf("sharedfp_addproc_write_ordered: opening the shared file pointer\n"); - } - shared_fp_base_module = fh->f_sharedfp; - - ret = shared_fp_base_module->sharedfp_file_open(fh->f_comm, - fh->f_filename, - fh->f_amode, - fh->f_info, - fh); - if ( OMPI_SUCCESS != ret ) { - opal_output(0,"sharedfp_addproc_write_ordered - error opening the shared file pointer\n"); - return ret; - } - } - - /*Retrieve the shared file pointer structure*/ - sh = fh->f_sharedfp_data; - - /* Calculate the number of bytes to write*/ - opal_datatype_type_size ( &datatype->super, &numofBytes); - sendBuff = count * numofBytes; - - /* Get the ranks in the communicator */ - rank = ompi_comm_rank ( sh->comm ); - size = ompi_comm_size ( sh->comm ); - - if ( 0 == rank ) { - buff = (long*)malloc(sizeof(long) * size); - if ( NULL == buff ) - return OMPI_ERR_OUT_OF_RESOURCE; - } - - ret = sh->comm->c_coll.coll_gather ( &sendBuff, sendcnt, OMPI_OFFSET_DATATYPE, buff, - recvcnt, OMPI_OFFSET_DATATYPE, 0, sh->comm, - sh->comm->c_coll.coll_gather_module); - if( OMPI_SUCCESS != ret ){ - goto exit; - } - - /* All the counts are present now in the recvBuff. - The size of recvBuff is sizeof_newComm - */ - if ( 0 == rank ) { - for (i = 0; i < size ; i ++) { - bytesRequested += buff[i]; - - if ( mca_sharedfp_addproc_verbose ){ - printf("sharedfp_addproc_write_ordered: Bytes requested are %ld\n",bytesRequested); - } - } - - /* Request the offset to write bytesRequested bytes - ** only the root process needs to do the request, - ** since the root process will then tell the other - ** processes at what offset they should write their - ** share of the data. - */ - ret = mca_sharedfp_addproc_request_position(sh,bytesRequested,&offsetReceived); - if( OMPI_SUCCESS != ret ){ - goto exit; - } - if ( mca_sharedfp_addproc_verbose ){ - printf("sharedfp_addproc_write_ordered: Offset received is %lld\n",offsetReceived); - } - buff[0] += offsetReceived; - - for (i = 1 ; i < size; i++) { - buff[i] += buff[i-1]; - } - } - - /* Scatter the results to the other processes*/ - ret = sh->comm->c_coll.coll_scatter ( buff, sendcnt, OMPI_OFFSET_DATATYPE, &offsetBuff, - recvcnt, OMPI_OFFSET_DATATYPE, 0, sh->comm, - sh->comm->c_coll.coll_scatter_module ); - if( OMPI_SUCCESS != ret ){ - goto exit; - } - - /*Each process now has its own individual offset in recvBUFF*/ - offset = offsetBuff - sendBuff; - - if ( mca_sharedfp_addproc_verbose ){ - printf("sharedfp_addproc_write_ordered: Offset returned is %lld\n",offset); - } - - /* write to the file */ - ret = ompio_io_ompio_file_write_at_all(sh->sharedfh,offset,buf,count,datatype,status); - -exit: - if ( NULL != buff ) { - free ( buff ); - } - return ret; -} diff --git a/ompi/mca/sharedfp/base/Makefile.am b/ompi/mca/sharedfp/base/Makefile.am index 84101ed3858..419d0de87e1 100644 --- a/ompi/mca/sharedfp/base/Makefile.am +++ b/ompi/mca/sharedfp/base/Makefile.am @@ -5,16 +5,16 @@ # Copyright (c) 2004-2005 The University of Tennessee and The University # of Tennessee Research Foundation. All rights # reserved. -# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, # University of Stuttgart. All rights reserved. # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. # Copyright (c) 2008-2011 University of Houston. All rights reserved. -# Copyright (c) 2012-2013 Los Alamos National Security, Inc. All rights reserved. +# Copyright (c) 2012-2013 Los Alamos National Security, Inc. All rights reserved. # $COPYRIGHT$ -# +# # Additional copyrights may follow -# +# # $HEADER$ # diff --git a/ompi/mca/sharedfp/base/base.h b/ompi/mca/sharedfp/base/base.h index a7492401a67..216b85d8b50 100644 --- a/ompi/mca/sharedfp/base/base.h +++ b/ompi/mca/sharedfp/base/base.h @@ -5,21 +5,21 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2008 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2008-2011 University of Houston. All rights reserved. - * Copyright (c) 2012-2013 Los Alamos National Security, Inc. All rights reserved. + * Copyright (c) 2012-2013 Los Alamos National Security, Inc. All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ -/** +/** * @file * * MCA sharedfp base framework public interface functions. diff --git a/ompi/mca/sharedfp/base/sharedfp_base_file_select.c b/ompi/mca/sharedfp/base/sharedfp_base_file_select.c index 6a8ba6fc34b..9bc9c275c43 100644 --- a/ompi/mca/sharedfp/base/sharedfp_base_file_select.c +++ b/ompi/mca/sharedfp/base/sharedfp_base_file_select.c @@ -5,16 +5,16 @@ * Copyright (c) 2004-2011 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2008-2013 University of Houston. All rights reserved. - * Copyright (c) 2012-2013 Los Alamos National Security, Inc. All rights reserved. + * Copyright (c) 2012-2013 Los Alamos National Security, Inc. All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -31,7 +31,7 @@ #include "ompi/mca/io/ompio/io_ompio.h" /* - * This structure is needed so that we can close the modules + * This structure is needed so that we can close the modules * which are not selected but were opened. mca_base_modules_close * which does this job for us requires a opal_list_t which contains * these modules @@ -59,20 +59,20 @@ static OBJ_CLASS_INSTANCE(queried_module_t, opal_list_item_t, NULL, NULL); * 4. Select the module with the highest priority * 5. Call the init function on the selected module so that it does the * right setup for the file - * 6. Call finalize on all the other modules which returned + * 6. Call finalize on all the other modules which returned * their module but were unfortunate to not get selected - */ + */ int mca_sharedfp_base_file_select (struct mca_io_ompio_file_t *file, - mca_base_component_t *preferred) + mca_base_component_t *preferred) { - int priority; - int best_priority; - opal_list_item_t *item; + int priority; + int best_priority; + opal_list_item_t *item; mca_base_component_list_item_t *cli; - mca_sharedfp_base_component_t *component; + mca_sharedfp_base_component_t *component; mca_sharedfp_base_component_t *best_component; - mca_sharedfp_base_module_t *module; + mca_sharedfp_base_module_t *module; opal_list_t queried; queried_module_t *om; char *str; @@ -82,22 +82,22 @@ int mca_sharedfp_base_file_select (struct mca_io_ompio_file_t *file, provided then it should be used (if possible) */ if (NULL != preferred) { - + /* We have a preferred component. Check if it is available and if so, whether it wants to run */ - + str = &(preferred->mca_component_name[0]); - + opal_output_verbose(10, ompi_sharedfp_base_framework.framework_output, "sharedfp:base:file_select: Checking preferred component: %s", str); - - /* query the component for its priority and get its module + + /* query the component for its priority and get its module structure. This is necessary to proceed */ - + component = (mca_sharedfp_base_component_t *)preferred; module = component->sharedfpm_file_query (file, &priority); - if (NULL != module && + if (NULL != module && NULL != module->sharedfp_module_init) { /* this query seems to have returned something legitimate @@ -110,7 +110,7 @@ int mca_sharedfp_base_file_select (struct mca_io_ompio_file_t *file, file->f_sharedfp_component = preferred; return module->sharedfp_module_init(file); - } + } /* His preferred component is present, but is unable to * run. This is not a good sign. We should try selecting * some other component We let it fall through and select @@ -127,7 +127,7 @@ int mca_sharedfp_base_file_select (struct mca_io_ompio_file_t *file, * All we need to do is to go through the list of available * components and find the one which has the highest priority and * use that for this file - */ + */ best_component = NULL; best_priority = -1; @@ -149,21 +149,21 @@ int mca_sharedfp_base_file_select (struct mca_io_ompio_file_t *file, } else { /* * call the query function and see what it returns - */ + */ module = component->sharedfpm_file_query (file, &priority); if (NULL == module || NULL == module->sharedfp_module_init) { /* * query did not return any action which can be used - */ + */ opal_output_verbose(10, ompi_sharedfp_base_framework.framework_output, "select: query returned failure"); } else { opal_output_verbose(10, ompi_sharedfp_base_framework.framework_output, "select: query returned priority %d", priority); - /* + /* * is this the best component we have found till now? */ if (priority > best_priority) { @@ -180,8 +180,8 @@ int mca_sharedfp_base_file_select (struct mca_io_ompio_file_t *file, return OMPI_ERR_OUT_OF_RESOURCE; } om->om_component = component; - om->om_module = module; - opal_list_append(&queried, (opal_list_item_t *)om); + om->om_module = module; + opal_list_append(&queried, (opal_list_item_t *)om); } /* end else of if (NULL == module) */ } /* end else of if (NULL == component->sharedfpm_init) */ } /* end for ... end of traversal */ @@ -207,7 +207,7 @@ int mca_sharedfp_base_file_select (struct mca_io_ompio_file_t *file, * returned their priorities from the query. We now have to * unquery() those components which have not been selected and * init() the component which was selected - */ + */ for (item = opal_list_remove_first(&queried); NULL != item; item = opal_list_remove_first(&queried)) { @@ -221,7 +221,7 @@ int mca_sharedfp_base_file_select (struct mca_io_ompio_file_t *file, * defined. Whereever a function pointer is null in the * module structure we need to fill it in with the base * structure function pointers. This is yet to be done - */ + */ /* * We don return here coz we still need to go through and @@ -251,7 +251,7 @@ int mca_sharedfp_base_file_select (struct mca_io_ompio_file_t *file, } /* if not best component */ OBJ_RELEASE(om); } /* traversing through the entire list */ - + opal_output_verbose(10, ompi_sharedfp_base_framework.framework_output, "select: component %s selected", best_component->sharedfpm_version.mca_component_name); diff --git a/ompi/mca/sharedfp/base/sharedfp_base_file_unselect.c b/ompi/mca/sharedfp/base/sharedfp_base_file_unselect.c index 8464e2e402b..8d799ccefa4 100644 --- a/ompi/mca/sharedfp/base/sharedfp_base_file_unselect.c +++ b/ompi/mca/sharedfp/base/sharedfp_base_file_unselect.c @@ -5,15 +5,15 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2008-2011 University of Houston. All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ diff --git a/ompi/mca/sharedfp/base/sharedfp_base_find_available.c b/ompi/mca/sharedfp/base/sharedfp_base_find_available.c index 6529632ebdf..54d5f2fe2da 100644 --- a/ompi/mca/sharedfp/base/sharedfp_base_find_available.c +++ b/ompi/mca/sharedfp/base/sharedfp_base_find_available.c @@ -5,15 +5,15 @@ * Copyright (c) 2004-2011 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2008-2011 University of Houston. All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -40,15 +40,15 @@ static int init_query_2_0_0(const mca_base_component_t *component, mca_base_component_list_item_t *entry, bool enable_progress_threads, bool enable_mpi_threads); - + int mca_sharedfp_base_find_available(bool enable_progress_threads, bool enable_mpi_threads) { opal_list_item_t *item, *next; mca_base_component_list_item_t *cli; - /* The list of components which we should check is already present - in ompi_sharedfp_base_framework.framework_components, which was established in + /* The list of components which we should check is already present + in ompi_sharedfp_base_framework.framework_components, which was established in mca_sharedfp_base_open */ item = opal_list_get_first(&ompi_sharedfp_base_framework.framework_components); @@ -56,7 +56,7 @@ int mca_sharedfp_base_find_available(bool enable_progress_threads, next = opal_list_get_next(item); cli = (mca_base_component_list_item_t*)item; - /* Now for this entry, we have to determine the thread level. Call + /* Now for this entry, we have to determine the thread level. Call a subroutine to do the job for us */ if (OMPI_SUCCESS != init_query(cli->cli_component, cli, @@ -81,15 +81,15 @@ int mca_sharedfp_base_find_available(bool enable_progress_threads, /* All done */ return OMPI_SUCCESS; } - - + + static int init_query(const mca_base_component_t *m, mca_base_component_list_item_t *entry, bool enable_progress_threads, - bool enable_mpi_threads) + bool enable_mpi_threads) { int ret; - + opal_output_verbose(10, ompi_sharedfp_base_framework.framework_output, "sharedfp:find_available: querying sharedfp component %s", m->mca_component_name); @@ -117,7 +117,7 @@ static int init_query(const mca_base_component_t *m, m->mca_component_name); if (NULL != m->mca_close_component) { m->mca_close_component(); - } + } } else { opal_output_verbose(10, ompi_sharedfp_base_framework.framework_output, "sharedfp:find_avalable: sharedfp component %s is available", @@ -132,11 +132,11 @@ static int init_query(const mca_base_component_t *m, static int init_query_2_0_0(const mca_base_component_t *component, mca_base_component_list_item_t *entry, bool enable_progress_threads, - bool enable_mpi_threads) + bool enable_mpi_threads) { - mca_sharedfp_base_component_2_0_0_t *sharedfp = + mca_sharedfp_base_component_2_0_0_t *sharedfp = (mca_sharedfp_base_component_2_0_0_t *) component; - + return sharedfp->sharedfpm_init_query(enable_progress_threads, enable_mpi_threads); } diff --git a/ompi/mca/sharedfp/base/sharedfp_base_frame.c b/ompi/mca/sharedfp/base/sharedfp_base_frame.c index b3d73f3499f..366e063005a 100644 --- a/ompi/mca/sharedfp/base/sharedfp_base_frame.c +++ b/ompi/mca/sharedfp/base/sharedfp_base_frame.c @@ -5,18 +5,18 @@ * Copyright (c) 2004-2011 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2008-2011 University of Houston. All rights reserved. - * Copyright (c) 2012-2013 Los Alamos National Security, Inc. All rights reserved. + * Copyright (c) 2012-2013 Los Alamos National Security, Inc. All rights reserved. * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ diff --git a/ompi/mca/sharedfp/configure.m4 b/ompi/mca/sharedfp/configure.m4 index 7ffd5384ed7..9859df4226d 100644 --- a/ompi/mca/sharedfp/configure.m4 +++ b/ompi/mca/sharedfp/configure.m4 @@ -1,22 +1,28 @@ # -*- shell-script -*- # -# Copyright (c) 2011 Cisco Systems, Inc. All rights reserved. +# Copyright (c) 2011 Cisco Systems, Inc. All rights reserved. +# Copyright (c) 2016 Research Organization for Information Science +# and Technology (RIST). All rights reserved. # # $COPYRIGHT$ -# +# # Additional copyrights may follow -# +# # $HEADER$ # # MCA_ompi_sharedfp_CONFIG(project_name, framework_name) # ------------------------------------------- -AC_DEFUN([MCA_ompi_sharedfp_CONFIG], +AC_DEFUN([MCA_ompi_sharedfp_CONFIG], [ - # An AC-ARG-ENABLE for mpi-io was set in ompi/mca/io/configure.m4. - # If it's no, we shouldn't bother building anything in fcoll. - AS_IF([test "$enable_mpi_io" != "no"], - [want_mpi_io=1], - [want_mpi_io=0]) - MCA_CONFIGURE_FRAMEWORK([$1], [$2], [$want_mpi_io]) + OPAL_VAR_SCOPE_PUSH([want_io_ompio]) + + AS_IF([test "$enable_mpi_io" != "no" && + test "$enable_io_ompio" != "no"], + [want_io_ompio=1], + [want_io_ompio=0]) + + MCA_CONFIGURE_FRAMEWORK([$1], [$2], [$want_io_ompio]) + + OPAL_VAR_SCOPE_POP ]) diff --git a/ompi/mca/sharedfp/individual/Makefile.am b/ompi/mca/sharedfp/individual/Makefile.am index dbe77ef9141..36c090604c0 100644 --- a/ompi/mca/sharedfp/individual/Makefile.am +++ b/ompi/mca/sharedfp/individual/Makefile.am @@ -5,15 +5,15 @@ # Copyright (c) 2004-2005 The University of Tennessee and The University # of Tennessee Research Foundation. All rights # reserved. -# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, # University of Stuttgart. All rights reserved. # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. # Copyright (c) 2008 University of Houston. All rights reserved. # $COPYRIGHT$ -# +# # Additional copyrights may follow -# +# # $HEADER$ # diff --git a/ompi/mca/sharedfp/individual/sharedfp_individual.c b/ompi/mca/sharedfp/individual/sharedfp_individual.c index 83a44df5ff0..262e3aeefa3 100644 --- a/ompi/mca/sharedfp/individual/sharedfp_individual.c +++ b/ompi/mca/sharedfp/individual/sharedfp_individual.c @@ -9,7 +9,7 @@ * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. - * Copyright (c) 2013 University of Houston. All rights reserved. + * Copyright (c) 2013-2015 University of Houston. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -26,6 +26,7 @@ #include "ompi_config.h" #include "mpi.h" #include "ompi/mca/sharedfp/sharedfp.h" +#include "ompi/mca/sharedfp/base/base.h" #include "ompi/mca/sharedfp/individual/sharedfp_individual.h" /* @@ -84,14 +85,16 @@ struct mca_sharedfp_base_module_1_0_0_t * mca_sharedfp_individual_component_file if ( amode & MPI_MODE_WRONLY || amode & MPI_MODE_RDWR ) { wronly_flag=true; if ( mca_sharedfp_individual_verbose ) { - printf("mca_sharedfp_individual_component_file_query: " - "MPI_MODE_WRONLY[true=%d,false=%d]=%d\n",true,false,wronly_flag); + opal_output(ompi_sharedfp_base_framework.framework_output, + "mca_sharedfp_individual_component_file_query: " + "MPI_MODE_WRONLY[true=%d,false=%d]=%d\n",true,false,wronly_flag); } } else { wronly_flag=false; if ( mca_sharedfp_individual_verbose ) { - printf("mca_sharedfp_individual_component_file_query: Can not run!, " - "MPI_MODE_WRONLY[true=%d,false=%d]=%d\n",true,false,wronly_flag); + opal_output(ompi_sharedfp_base_framework.framework_output, + "mca_sharedfp_individual_component_file_query: Can not run!, " + "MPI_MODE_WRONLY[true=%d,false=%d]=%d\n",true,false,wronly_flag); } } @@ -102,27 +105,30 @@ struct mca_sharedfp_base_module_1_0_0_t * mca_sharedfp_individual_component_file valuelen = MPI_MAX_INFO_VAL; ompi_info_get ( info,"OMPIO_SHAREDFP_RELAXED_ORDERING", valuelen, value, &flag); if ( flag ) { - if ( mca_sharedfp_individual_verbose ) { - printf("mca_sharedfp_individual_component_file_query: " - "OMPIO_SHAREDFP_RELAXED_ORDERING=%s\n",value); + if ( mca_sharedfp_individual_verbose ) { + opal_output(ompi_sharedfp_base_framework.framework_output, + "mca_sharedfp_individual_component_file_query: " + "OMPIO_SHAREDFP_RELAXED_ORDERING=%s\n",value); } /* flag - Returns true if key defined, false if not (boolean). */ relaxed_order_flag=true; - } - else { - if ( mca_sharedfp_individual_verbose ) { - printf("mca_sharedfp_individual_component_file_query: " - "OMPIO_SHAREDFP_RELAXED_ORDERING MPI_Info key not set. " - "Set this key in order to increase this component's priority value.\n"); + } + else { + if ( mca_sharedfp_individual_verbose ) { + opal_output(ompi_sharedfp_base_framework.framework_output, + "mca_sharedfp_individual_component_file_query: " + "OMPIO_SHAREDFP_RELAXED_ORDERING MPI_Info key not set. " + "Set this key in order to increase this component's priority value.\n"); } } - } + } else { if ( mca_sharedfp_individual_verbose ) { - printf("mca_sharedfp_individual_component_file_query: " - "OMPIO_SHAREDFP_RELAXED_ORDERING MPI_Info key not set, " - "got MPI_INFO_NULL. Set this key in order to increase " - "this component's priority value.\n"); + opal_output(ompi_sharedfp_base_framework.framework_output, + "mca_sharedfp_individual_component_file_query: " + "OMPIO_SHAREDFP_RELAXED_ORDERING MPI_Info key not set, " + "got MPI_INFO_NULL. Set this key in order to increase " + "this component's priority value.\n"); } } @@ -133,7 +139,7 @@ struct mca_sharedfp_base_module_1_0_0_t * mca_sharedfp_individual_component_file */ if ( wronly_flag && relaxed_order_flag){ *priority=mca_sharedfp_individual_priority; - } + } else { *priority=1; } diff --git a/ompi/mca/sharedfp/individual/sharedfp_individual.h b/ompi/mca/sharedfp/individual/sharedfp_individual.h index 7339d3ccc2c..8c0516897ae 100644 --- a/ompi/mca/sharedfp/individual/sharedfp_individual.h +++ b/ompi/mca/sharedfp/individual/sharedfp_individual.h @@ -10,6 +10,8 @@ * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2013 University of Houston. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -50,10 +52,10 @@ OMPI_MODULE_DECLSPEC extern mca_sharedfp_base_component_2_0_0_t mca_sharedfp_ind int mca_sharedfp_individual_get_position(mca_io_ompio_file_t *fh, OMPI_MPI_OFFSET_TYPE * offset); -int mca_sharedfp_individual_seek (mca_io_ompio_file_t *fh, +int mca_sharedfp_individual_seek (mca_io_ompio_file_t *fh, OMPI_MPI_OFFSET_TYPE offset, int whence); int mca_sharedfp_individual_file_open (struct ompi_communicator_t *comm, - char* filename, + const char* filename, int amode, struct ompi_info_t *info, mca_io_ompio_file_t *fh); @@ -76,24 +78,24 @@ int mca_sharedfp_individual_iread (mca_io_ompio_file_t *fh, struct ompi_datatype_t *datatype, ompi_request_t **request); int mca_sharedfp_individual_write (mca_io_ompio_file_t *fh, - void *buf, + const void *buf, int count, struct ompi_datatype_t *datatype, ompi_status_public_t *status); int mca_sharedfp_individual_write_ordered (mca_io_ompio_file_t *fh, - void *buf, + const void *buf, int count, struct ompi_datatype_t *datatype, ompi_status_public_t *status); int mca_sharedfp_individual_write_ordered_begin (mca_io_ompio_file_t *fh, - void *buf, + const void *buf, int count, struct ompi_datatype_t *datatype); int mca_sharedfp_individual_write_ordered_end (mca_io_ompio_file_t *fh, - void *buf, + const void *buf, ompi_status_public_t *status); int mca_sharedfp_individual_iwrite (mca_io_ompio_file_t *fh, - void *buf, + const void *buf, int count, struct ompi_datatype_t *datatype, ompi_request_t **request); @@ -139,9 +141,9 @@ mca_sharedfp_individual_header_record* mca_sharedfp_individual_insert_headnode(v int mca_sharedfp_individual_collaborate_data(struct mca_sharedfp_base_data_t *sh); int mca_sharedfp_individual_get_timestamps_and_reclengths(double **buff, long **rec_length, MPI_Offset **offbuff,struct mca_sharedfp_base_data_t *sh); int mca_sharedfp_individual_create_buff(double **ts,MPI_Offset **off,int totalnodes,int size); -int mca_sharedfp_individual_sort_timestamps(double **ts,MPI_Offset **off, int totalnodes); +int mca_sharedfp_individual_sort_timestamps(double **ts,MPI_Offset **off, int **ranks, int totalnodes); MPI_Offset mca_sharedfp_individual_assign_globaloffset(MPI_Offset **offsetbuff,int totalnodes,struct mca_sharedfp_base_data_t *sh); -int mca_sharedfp_individual_getoffset(double timestamp, double *ts, int totalnodes); +int mca_sharedfp_individual_getoffset(double timestamp, double *ts, int *ranks, int myrank, int totalnodes); /*int mca_sharedfp_individual_cleanup(double *ts, int* rnk, MPI_Offset *off);*/ int mca_sharedfp_individual_insert_metadata(int functype,long recordlength,struct mca_sharedfp_base_data_t *sh ); diff --git a/ompi/mca/sharedfp/individual/sharedfp_individual_collaborate_data.c b/ompi/mca/sharedfp/individual/sharedfp_individual_collaborate_data.c index 4e7c3c785db..024a7edd0f6 100644 --- a/ompi/mca/sharedfp/individual/sharedfp_individual_collaborate_data.c +++ b/ompi/mca/sharedfp/individual/sharedfp_individual_collaborate_data.c @@ -9,7 +9,7 @@ * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. - * Copyright (c) 2013 University of Houston. All rights reserved. + * Copyright (c) 2013-2015 University of Houston. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -24,6 +24,7 @@ #include "mpi.h" #include "ompi/constants.h" #include "ompi/mca/sharedfp/sharedfp.h" +#include "ompi/mca/sharedfp/base/base.h" #include "ompi/mca/io/ompio/io_ompio.h" #include @@ -33,11 +34,12 @@ int mca_sharedfp_individual_collaborate_data(struct mca_sharedfp_base_data_t *sh { int ret = OMPI_SUCCESS; mca_sharedfp_individual_header_record *headnode = NULL; - char *buff=NULL; + char *buff=NULL; MPI_Comm comm; int rank, size; int nodesoneachprocess = 0; - int idx = 0,i = 0; + int idx=0,i=0,j=0, l=0; + int *ranks = NULL; double *timestampbuff = NULL; OMPI_MPI_OFFSET_TYPE *offsetbuff = NULL; int *countbuff = NULL; @@ -47,6 +49,7 @@ int mca_sharedfp_individual_collaborate_data(struct mca_sharedfp_base_data_t *sh OMPI_MPI_OFFSET_TYPE *local_off = NULL; int totalnodes = 0; ompi_status_public_t status; + int recordlength=0; comm = sh->comm; @@ -55,8 +58,8 @@ int mca_sharedfp_individual_collaborate_data(struct mca_sharedfp_base_data_t *sh headnode = (mca_sharedfp_individual_header_record*)sh->selected_module_data; if ( NULL == headnode) { - opal_output(0, "sharedfp_individual_collaborate_data: headnode is NULL but file is open\n"); - return OMPI_ERROR; + opal_output(0, "sharedfp_individual_collaborate_data: headnode is NULL but file is open\n"); + return OMPI_ERROR; } /* Number of nodes on each process is the sum of records @@ -65,7 +68,8 @@ int mca_sharedfp_individual_collaborate_data(struct mca_sharedfp_base_data_t *sh nodesoneachprocess = headnode->numofrecordsonfile + headnode->numofrecords; if ( mca_sharedfp_individual_verbose ) { - printf("Nodes of each process = %d\n",nodesoneachprocess); + opal_output(ompi_sharedfp_base_framework.framework_output, + "Nodes of each process = %d\n",nodesoneachprocess); } countbuff = (int*)malloc(size * sizeof(int)); @@ -87,17 +91,17 @@ int mca_sharedfp_individual_collaborate_data(struct mca_sharedfp_base_data_t *sh goto exit; } - comm->c_coll.coll_allgather ( &nodesoneachprocess, 1, MPI_INT, - countbuff, 1, MPI_INT, comm, + comm->c_coll.coll_allgather ( &nodesoneachprocess, 1, MPI_INT, + countbuff, 1, MPI_INT, comm, comm->c_coll.coll_allgather_module ); if ( mca_sharedfp_individual_verbose) { for (i = 0; i < size ; i++) { - printf("sharedfp_individual_collaborate_data: Countbuff[%d] = %d\n", i, countbuff[i]); + opal_output(ompi_sharedfp_base_framework.framework_output,"sharedfp_individual_collaborate_data: Countbuff[%d] = %d\n", i, countbuff[i]); } } - if ( nodesoneachprocess == 0) { + if ( 0 == nodesoneachprocess ) { ind_ts[0] = 0; ind_recordlength[0] = 0; local_off[0] = 0; @@ -106,14 +110,26 @@ int mca_sharedfp_individual_collaborate_data(struct mca_sharedfp_base_data_t *sh for(i = 0; i < size; i++) { displ[i] = totalnodes; if ( mca_sharedfp_individual_verbose ) { - printf("sharedfp_individual_collaborate_data: displ[%d] = %d\n",i,displ[i]); - } + opal_output(ompi_sharedfp_base_framework.framework_output, + "sharedfp_individual_collaborate_data: displ[%d] = %d\n",i,displ[i]); + } totalnodes = totalnodes + countbuff[i]; } - if (totalnodes <= 0 ) { + if (totalnodes <= 0 ) { + goto exit; + } + + ranks = (int *) malloc ( totalnodes * sizeof(int)); + if ( NULL == ranks ) { + ret = OMPI_ERR_OUT_OF_RESOURCE; goto exit; } + for ( l=0, i=0; ic_coll.coll_allgatherv ( ind_recordlength, countbuff[rank], OMPI_OFFSET_DATATYPE, offsetbuff, countbuff, displ, OMPI_OFFSET_DATATYPE, comm, comm->c_coll.coll_allgatherv_module ); if ( OMPI_SUCCESS != ret ) { goto exit; } - - ret = mca_sharedfp_individual_sort_timestamps(×tampbuff, &offsetbuff,totalnodes); + + ret = mca_sharedfp_individual_sort_timestamps(×tampbuff, &offsetbuff, &ranks, totalnodes); if ( OMPI_SUCCESS != ret ) { goto exit; } - + sh->global_offset = mca_sharedfp_individual_assign_globaloffset ( &offsetbuff, totalnodes, sh); - - buff = (char * ) malloc( ind_recordlength[0] * 1.2 ); + + recordlength = ind_recordlength[0] * 1.2; + buff = (char * ) malloc( recordlength ); if ( NULL == buff ) { ret = OMPI_ERR_OUT_OF_RESOURCE; goto exit; } - + for (i = 0; i < nodesoneachprocess ; i++) { + if ( ind_recordlength[i] > recordlength ) { + recordlength = ind_recordlength[i] * 1.2; + buff = (char *) realloc ( buff, recordlength ); + if ( NULL == buff ) { + ret = OMPI_ERR_OUT_OF_RESOURCE; + goto exit; + } + } + /*Read from the local data file*/ ompio_io_ompio_file_read_at ( headnode->datafilehandle, local_off[i], buff, ind_recordlength[i], MPI_BYTE, &status); - - idx = mca_sharedfp_individual_getoffset(ind_ts[i],timestampbuff,totalnodes); - + + idx = mca_sharedfp_individual_getoffset(ind_ts[i],timestampbuff, ranks, rank, totalnodes); + if ( mca_sharedfp_individual_verbose ) { - printf("sharedfp_individual_collaborate_data: Process %d writing %ld bytes to main file \n", - rank,ind_recordlength[i]); - } - + opal_output(ompi_sharedfp_base_framework.framework_output, + "sharedfp_individual_collaborate_data: Process %d writing %ld bytes to main file at position" + "%lld (%d)\n", rank, ind_recordlength[i], offsetbuff[idx], idx); + } + /*Write into main data file*/ ompio_io_ompio_file_write_at( sh->sharedfh, offsetbuff[idx], buff, ind_recordlength[i], MPI_BYTE, &status); - - } + + } exit: if ( NULL != countbuff ) { @@ -192,6 +219,9 @@ int mca_sharedfp_individual_collaborate_data(struct mca_sharedfp_base_data_t *sh if ( NULL != buff ) { free ( buff ); } + if ( NULL != ranks ) { + free ( ranks ); + } return ret; } @@ -212,7 +242,7 @@ int mca_sharedfp_individual_get_timestamps_and_reclengths ( double **buff, long currnode = headnode->next; if ( mca_sharedfp_individual_verbose ) { - printf("Num is %d\n",num); + opal_output(ompi_sharedfp_base_framework.framework_output,"Num is %d\n",num); } if ( 0 == num ) { @@ -235,8 +265,9 @@ int mca_sharedfp_individual_get_timestamps_and_reclengths ( double **buff, long } if ( mca_sharedfp_individual_verbose ) { - printf("sharedfp_individual_get_timestamps_and_reclengths: Numofrecords on file %d\n", - headnode->numofrecordsonfile); + opal_output(ompi_sharedfp_base_framework.framework_output, + "sharedfp_individual_get_timestamps_and_reclengths: Numofrecords on file %d\n", + headnode->numofrecordsonfile); } if (headnode->numofrecordsonfile > 0) { @@ -252,9 +283,10 @@ int mca_sharedfp_individual_get_timestamps_and_reclengths ( double **buff, long metaoffset = metaoffset + sizeof(struct mca_sharedfp_individual_record2); - if ( mca_sharedfp_individual_verbose ) { - printf("sharedfp_individual_get_timestamps_and_reclengths: Ctr = %d\n",ctr); - } + if ( mca_sharedfp_individual_verbose ) { + opal_output(ompi_sharedfp_base_framework.framework_output, + "sharedfp_individual_get_timestamps_and_reclengths: Ctr = %d\n",ctr); + } ctr++; } @@ -266,9 +298,9 @@ int mca_sharedfp_individual_get_timestamps_and_reclengths ( double **buff, long /* Add the records from the linked list */ currnode = headnode->next; while (currnode) { - if ( mca_sharedfp_individual_verbose ) { - printf("Ctr = %d\n",ctr); - } + if ( mca_sharedfp_individual_verbose ) { + opal_output(ompi_sharedfp_base_framework.framework_output,"Ctr = %d\n",ctr); + } /* Some error over here..need to check this code again */ /*while(headnode->next != NULL)*/ @@ -279,9 +311,10 @@ int mca_sharedfp_individual_get_timestamps_and_reclengths ( double **buff, long ctr = ctr + 1; headnode->next = currnode->next; - if ( mca_sharedfp_individual_verbose ) { - printf("sharedfp_individual_get_timestamps_and_reclengths: node deleted from the metadatalinked list\n"); - } + if ( mca_sharedfp_individual_verbose ) { + opal_output(ompi_sharedfp_base_framework.framework_output, + "sharedfp_individual_get_timestamps_and_reclengths: node deleted from the metadatalinked list\n"); + } free(currnode); currnode = headnode->next; @@ -292,7 +325,7 @@ int mca_sharedfp_individual_get_timestamps_and_reclengths ( double **buff, long headnode->numofrecords = 0; exit: - + return ret; } @@ -316,7 +349,7 @@ int mca_sharedfp_individual_create_buff(double **ts,MPI_Offset **off,int totaln } /*Sort the timestamp buffer*/ -int mca_sharedfp_individual_sort_timestamps(double **ts, MPI_Offset **off, int totalnodes) +int mca_sharedfp_individual_sort_timestamps(double **ts, MPI_Offset **off, int **ranks, int totalnodes) { int i = 0; @@ -324,7 +357,7 @@ int mca_sharedfp_individual_sort_timestamps(double **ts, MPI_Offset **off, int int flag = 1; double tempts = 0.0; OMPI_MPI_OFFSET_TYPE tempoffset = 0; - + int temprank = 0; for (i= 1; (i <= totalnodes)&&(flag) ; i++) { flag = 0; @@ -340,6 +373,11 @@ int mca_sharedfp_individual_sort_timestamps(double **ts, MPI_Offset **off, int *(*off + j) = *(*off + j + 1); *(*off + j + 1) = tempoffset; + /*swap ranks*/ + temprank = *(*ranks + j); + *(*ranks + j) = *(*ranks + j + 1); + *(*ranks + j + 1) = temprank; + flag = 1; } } @@ -358,7 +396,7 @@ MPI_Offset mca_sharedfp_individual_assign_globaloffset(MPI_Offset **offsetbuff, for (i = 0; i < totalnodes; i++) { temp = *(*offsetbuff + i); - + if (i == 0) { *(*offsetbuff + i ) = sh->global_offset; } @@ -373,15 +411,16 @@ MPI_Offset mca_sharedfp_individual_assign_globaloffset(MPI_Offset **offsetbuff, } -int mca_sharedfp_individual_getoffset(double timestamp, double *ts, int totalnodes) +int mca_sharedfp_individual_getoffset(double timestamp, double *ts, int *ranks, int myrank, int totalnodes) { int i = 0; int notfound = 1; + while (notfound) { - if (ts[i] == timestamp) + if (ts[i] == timestamp && ranks[i] == myrank ) break; - + i++; if (i == totalnodes) { diff --git a/ompi/mca/sharedfp/individual/sharedfp_individual_component.c b/ompi/mca/sharedfp/individual/sharedfp_individual_component.c index 3e9adcc9be3..93fda2ff485 100644 --- a/ompi/mca/sharedfp/individual/sharedfp_individual_component.c +++ b/ompi/mca/sharedfp/individual/sharedfp_individual_component.c @@ -6,7 +6,7 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. @@ -14,9 +14,9 @@ * Copyright (c) 2015 Los Alamos National Security, LLC. All rights * reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ * * These symbols are in a file by themselves to provide nice linker diff --git a/ompi/mca/sharedfp/individual/sharedfp_individual_file_open.c b/ompi/mca/sharedfp/individual/sharedfp_individual_file_open.c index 01dac1eceb2..d7e86f08e2d 100644 --- a/ompi/mca/sharedfp/individual/sharedfp_individual_file_open.c +++ b/ompi/mca/sharedfp/individual/sharedfp_individual_file_open.c @@ -5,15 +5,17 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. - * Copyright (c) 2013 University of Houston. All rights reserved. + * Copyright (c) 2013-2015 University of Houston. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -29,7 +31,7 @@ int mca_sharedfp_individual_file_open (struct ompi_communicator_t *comm, - char* filename, + const char* filename, int amode, struct ompi_info_t *info, mca_io_ompio_file_t *fh) @@ -49,6 +51,11 @@ int mca_sharedfp_individual_file_open (struct ompi_communicator_t *comm, /*Open the same file again without shared file pointer*/ /*-------------------------------------------------*/ shfileHandle = (mca_io_ompio_file_t *) malloc ( sizeof(mca_io_ompio_file_t)); + if ( NULL == shfileHandle ) { + opal_output(0, "mca_sharedfp_individual_file_open: unable to allocate memory\n"); + return OMPI_ERR_OUT_OF_RESOURCE; + } + err = ompio_io_ompio_file_open ( comm, filename, amode, info, shfileHandle, false); if ( OMPI_SUCCESS != err ) { opal_output(0, "mca_sharedfp_individual_file_open: Error during file open\n"); @@ -59,11 +66,12 @@ int mca_sharedfp_individual_file_open (struct ompi_communicator_t *comm, if ( NULL == sh ){ opal_output(0, "mca_sharedfp_individual_file_open: Error, unable to malloc " "f_sharedfp_ptr struct\n"); + free ( shfileHandle ); return OMPI_ERR_OUT_OF_RESOURCE; } - + rank = ompi_comm_rank ( comm ); - + /*Populate the sh file structure based on the implementation*/ sh->sharedfh = shfileHandle; /* Shared file pointer*/ sh->global_offset = 0; /* Global Offset*/ @@ -78,21 +86,39 @@ int mca_sharedfp_individual_file_open (struct ompi_communicator_t *comm, /* NOTE: Open the data file without shared file pointer */ /*--------------------------------------------------------*/ if ( mca_sharedfp_individual_verbose ) { - printf("mca_sharedfp_individual_file_open: open data file.\n"); + opal_output(ompi_sharedfp_base_framework.framework_output, + "mca_sharedfp_individual_file_open: open data file.\n"); } /* data filename created by appending .data.$rank to the original filename*/ len = strlen (filename ) + 64; datafilename = (char*)malloc( len ); + if ( NULL == datafilename ) { + opal_output(0, "mca_sharedfp_individual_file_open: unable to allocate memory\n"); + free ( shfileHandle ); + free ( sh ); + return OMPI_ERR_OUT_OF_RESOURCE; + } snprintf(datafilename, len, "%s%s%d",filename,".data.",rank); datafilehandle = (mca_io_ompio_file_t *)malloc(sizeof(mca_io_ompio_file_t)); + if ( NULL == datafilehandle ) { + opal_output(0, "mca_sharedfp_individual_file_open: unable to allocate memory\n"); + free ( shfileHandle ); + free ( sh ); + free ( datafilename ); + return OMPI_ERR_OUT_OF_RESOURCE; + } err = ompio_io_ompio_file_open(MPI_COMM_SELF, datafilename, MPI_MODE_RDWR | MPI_MODE_CREATE | MPI_MODE_DELETE_ON_CLOSE, MPI_INFO_NULL, datafilehandle, false); if ( OMPI_SUCCESS != err) { opal_output(0, "mca_sharedfp_individual_file_open: Error during datafile file open\n"); + free (shfileHandle ); + free (sh); + free (datafilename); + free (datafilehandle); return err; } @@ -101,19 +127,43 @@ int mca_sharedfp_individual_file_open (struct ompi_communicator_t *comm, /* NOTE: Open the meta file without shared file pointer */ /*----------------------------------------------------------*/ if ( mca_sharedfp_individual_verbose ) { - printf("mca_sharedfp_individual_file_open: metadata file.\n"); + opal_output(ompi_sharedfp_base_framework.framework_output, + "mca_sharedfp_individual_file_open: metadata file.\n"); } /* metadata filename created by appending .metadata.$rank to the original filename*/ - metadatafilename = (char*) malloc ( len ); + metadatafilename = (char*) malloc ( len ); + if ( NULL == metadatafilename ) { + free (shfileHandle ); + free (sh); + free (datafilename); + free (datafilehandle); + opal_output(0, "mca_sharedfp_individual_file_open: Error during memory allocation\n"); + return OMPI_ERR_OUT_OF_RESOURCE; + } snprintf ( metadatafilename, len, "%s%s%d", filename, ".metadata.",rank); metadatafilehandle = (mca_io_ompio_file_t *)malloc(sizeof(mca_io_ompio_file_t)); + if ( NULL == metadatafilehandle ) { + free (shfileHandle ); + free (sh); + free (datafilename); + free (datafilehandle); + free (metadatafilename); + opal_output(0, "mca_sharedfp_individual_file_open: Error during memory allocation\n"); + return OMPI_ERR_OUT_OF_RESOURCE; + } err = ompio_io_ompio_file_open ( MPI_COMM_SELF,metadatafilename, MPI_MODE_RDWR | MPI_MODE_CREATE | MPI_MODE_DELETE_ON_CLOSE, MPI_INFO_NULL, metadatafilehandle, false); if ( OMPI_SUCCESS != err) { opal_output(0, "mca_sharedfp_individual_file_open: Error during metadatafile file open\n"); + free (shfileHandle ); + free (sh); + free (datafilename); + free (datafilehandle); + free (metadatafilename); + free (metadatafilehandle); return err; } @@ -140,7 +190,8 @@ int mca_sharedfp_individual_file_close (mca_io_ompio_file_t *fh) if ( NULL == fh->f_sharedfp_data ){ if ( mca_sharedfp_individual_verbose ) { - printf("sharedfp_inidividual_file_close - shared file pointer structure not initialized\n"); + opal_output(ompi_sharedfp_base_framework.framework_output, + "sharedfp_inidividual_file_close - shared file pointer structure not initialized\n"); } return OMPI_SUCCESS; } @@ -195,16 +246,16 @@ mca_sharedfp_individual_header_record* mca_sharedfp_individual_insert_headnode ( if (!headnode) return NULL; } - + headnode->numofrecords = 0; /* No records in the linked list */ headnode->numofrecordsonfile = 0; /* No records in the metadatafile for this file */ - + headnode->datafile_offset = 0; headnode->metadatafile_offset = 0; - + headnode->metafile_start_offset = 0; headnode->datafile_start_offset = 0; - + headnode->metadatafilehandle = 0; headnode->datafilehandle = 0; headnode->next = NULL; diff --git a/ompi/mca/sharedfp/individual/sharedfp_individual_gettime.c b/ompi/mca/sharedfp/individual/sharedfp_individual_gettime.c index 93735783125..3176e66acc7 100644 --- a/ompi/mca/sharedfp/individual/sharedfp_individual_gettime.c +++ b/ompi/mca/sharedfp/individual/sharedfp_individual_gettime.c @@ -23,6 +23,6 @@ double mca_sharedfp_individual_gettime(void) gettimeofday(×tamp,NULL); seconds = (double)timestamp.tv_sec; microsec = ((double)timestamp.tv_usec)/((double)1000000.0); - + return (seconds+microsec); } diff --git a/ompi/mca/sharedfp/individual/sharedfp_individual_insert_metadata.c b/ompi/mca/sharedfp/individual/sharedfp_individual_insert_metadata.c index cffba81bc91..191c4e1bf3f 100644 --- a/ompi/mca/sharedfp/individual/sharedfp_individual_insert_metadata.c +++ b/ompi/mca/sharedfp/individual/sharedfp_individual_insert_metadata.c @@ -9,7 +9,7 @@ * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. - * Copyright (c) 2013 University of Houston. All rights reserved. + * Copyright (c) 2013-2015 University of Houston. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -24,6 +24,7 @@ #include "mpi.h" #include "ompi/constants.h" #include "ompi/mca/sharedfp/sharedfp.h" +#include "ompi/mca/sharedfp/base/base.h" #include #include @@ -44,14 +45,15 @@ int mca_sharedfp_individual_insert_metadata(int functype,long recordlength,struc if ( mca_sharedfp_individual_verbose ) { - printf("sharedfp_individual_insert_metadata: Headnode->numofrecords = %d\n", - headnode->numofrecords); + opal_output(ompi_sharedfp_base_framework.framework_output, + "sharedfp_individual_insert_metadata: Headnode->numofrecords = %d\n", + headnode->numofrecords); } /* Check if the maximum limit is reached for the records in the linked list*/ if (headnode->numofrecords == MAX_METADATA_RECORDS) { /* Entire linked list is now deleted and a new file*/ ret = mca_sharedfp_individual_write_metadata_file(sh); - headnode->next = NULL; + headnode->next = NULL; } /* Allocate a new Node */ @@ -70,13 +72,13 @@ int mca_sharedfp_individual_insert_metadata(int functype,long recordlength,struc newnode->localposition = headnode->datafile_offset; /* Datafile offset*/ newnode->recordlength = recordlength; newnode->next = NULL; - - if ( headnode->next == NULL) { + + if ( headnode->next == NULL) { /*headnode allocated but no further metadata node is allocated*/ headnode->next = newnode; } - else { + else { /*We need to append the new node*/ tempnode = headnode->next; @@ -113,12 +115,17 @@ int mca_sharedfp_individual_write_metadata_file(struct mca_sharedfp_base_data_t buff.recordlength = current->recordlength; if ( mca_sharedfp_individual_verbose ) { - printf("sharedfp_individual_write_metadata_file: Buff recordid %ld\n",buff.recordid); - printf("sharedfp_individual_write_metadata_file: Buff timestamp %f\n", buff.timestamp); - printf("sharedfp_individual_write_metadata_file: Buff localposition %lld\n",buff.localposition); - printf("sharedfp_individual_write_metadata_file: Buff recordlength %ld\n",buff.recordlength); - printf("sharedfp_individual_write_metadata_file: Size of buff %ld\n",sizeof(buff)); - } + opal_output(ompi_sharedfp_base_framework.framework_output, + "sharedfp_individual_write_metadata_file: Buff recordid %ld\n",buff.recordid); + opal_output(ompi_sharedfp_base_framework.framework_output, + "sharedfp_individual_write_metadata_file: Buff timestamp %f\n", buff.timestamp); + opal_output(ompi_sharedfp_base_framework.framework_output, + "sharedfp_individual_write_metadata_file: Buff localposition %lld\n",buff.localposition); + opal_output(ompi_sharedfp_base_framework.framework_output, + "sharedfp_individual_write_metadata_file: Buff recordlength %ld\n",buff.recordlength); + opal_output(ompi_sharedfp_base_framework.framework_output, + "sharedfp_individual_write_metadata_file: Size of buff %ld\n",sizeof(buff)); + } headnode->next = current->next; free(current); diff --git a/ompi/mca/sharedfp/individual/sharedfp_individual_iwrite.c b/ompi/mca/sharedfp/individual/sharedfp_individual_iwrite.c index f2663f70470..223d6703d4e 100644 --- a/ompi/mca/sharedfp/individual/sharedfp_individual_iwrite.c +++ b/ompi/mca/sharedfp/individual/sharedfp_individual_iwrite.c @@ -9,7 +9,9 @@ * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. - * Copyright (c) 2013 University of Houston. All rights reserved. + * Copyright (c) 2013-2015 University of Houston. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -24,9 +26,10 @@ #include "mpi.h" #include "ompi/constants.h" #include "ompi/mca/sharedfp/sharedfp.h" +#include "ompi/mca/sharedfp/base/base.h" int mca_sharedfp_individual_iwrite(mca_io_ompio_file_t *fh, - void *buf, + const void *buf, int count, ompi_datatype_t *datatype, MPI_Request * request) @@ -40,7 +43,8 @@ int mca_sharedfp_individual_iwrite(mca_io_ompio_file_t *fh, if(fh->f_sharedfp_data==NULL){ if ( mca_sharedfp_individual_verbose ) { - printf("mca_sharedfp_individual_iwrite: opening the shared file pointer\n"); + opal_output(ompi_sharedfp_base_framework.framework_output, + "mca_sharedfp_individual_iwrite: opening the shared file pointer\n"); } shared_fp_base_module = fh->f_sharedfp; @@ -69,11 +73,11 @@ int mca_sharedfp_individual_iwrite(mca_io_ompio_file_t *fh, /*Insert metadata record into a queue*/ ret = mca_sharedfp_individual_insert_metadata(OMPI_FILE_WRITE_SHARED,totalbytes,sh); - + /*Write the data into individual file*/ ret = ompio_io_ompio_file_iwrite_at ( headnode->datafilehandle, headnode->datafile_offset, - buf, count, datatype, request); + buf, count, datatype, request); if ( OMPI_SUCCESS != ret ) { opal_output(0,"sharedfp_individual_iwrite: Error while iwriting the datafile \n"); return ret; @@ -86,18 +90,137 @@ int mca_sharedfp_individual_iwrite(mca_io_ompio_file_t *fh, } int mca_sharedfp_individual_write_ordered_begin(mca_io_ompio_file_t *fh, - void *buf, + const void *buf, int count, struct ompi_datatype_t *datatype) { - opal_output(0,"mca_sharedfp_individual_write_ordered_begin: NOT IMPLEMENTED\n"); - return OMPI_ERROR; + int ret = OMPI_SUCCESS; + int size = 0, rank = 0; + int i = 0; + size_t numofbytes = 0; + size_t totalbytes = 0; + OMPI_MPI_OFFSET_TYPE *offbuff=NULL; + OMPI_MPI_OFFSET_TYPE global_offset = 0; + OMPI_MPI_OFFSET_TYPE prev_offset = 0; + OMPI_MPI_OFFSET_TYPE temp = 0, offset = 0; + mca_sharedfp_individual_header_record *headnode = NULL; + struct mca_sharedfp_base_data_t *sh = NULL; + mca_sharedfp_base_module_t * shared_fp_base_module = NULL; + + if(fh->f_sharedfp_data==NULL){ + if ( mca_sharedfp_individual_verbose ) { + opal_output(ompi_sharedfp_base_framework.framework_output, + "sharedfp_individual_write_ordered_begin - opening the shared file pointer\n"); + } + shared_fp_base_module = fh->f_sharedfp; + + ret = shared_fp_base_module->sharedfp_file_open(fh->f_comm, + fh->f_filename, + fh->f_amode, + fh->f_info, + fh); + if ( OMPI_SUCCESS != ret ) { + opal_output(0,"sharedfp_individual_write_ordered_begin - error opening the shared file pointer\n"); + return ret; + } + } + + if ( true == fh->f_split_coll_in_use ) { + opal_output(0, "Only one split collective I/O operation allowed per file handle at any given point in time!\n"); + return MPI_ERR_REQUEST; + } + + /*Retrieve the sharedfp data structures*/ + sh = fh->f_sharedfp_data; + rank = ompi_comm_rank ( sh->comm ); + size = ompi_comm_size ( sh->comm ); + + /* Calculate the number of bytes of data that needs to be written*/ + opal_datatype_type_size ( &datatype->super, &numofbytes); + totalbytes = count * numofbytes; + + headnode = (mca_sharedfp_individual_header_record*)sh->selected_module_data; + if ( NULL == headnode) { + opal_output (0, "sharedfp_individual_write_ordered_begin: headnode is NULL but file is open\n"); + return OMPI_ERROR; + } + + /* Data from all the metadata is combined and written to the main file */ + ret = mca_sharedfp_individual_collaborate_data ( sh ); + if ( OMPI_SUCCESS != ret) { + return ret; + } + + if ( 0 == rank ) { + offbuff = (OMPI_MPI_OFFSET_TYPE *)malloc ( sizeof(OMPI_MPI_OFFSET_TYPE) * size); + if (NULL == offbuff ) { + return OMPI_ERR_OUT_OF_RESOURCE; + } + } + + /*collect the total bytes to be written*/ + sh->comm->c_coll.coll_gather ( &totalbytes, 1, OMPI_OFFSET_DATATYPE, + offbuff, 1, OMPI_OFFSET_DATATYPE, 0, + sh->comm, sh->comm->c_coll.coll_gather_module ); + + if ( 0 == rank ) { + prev_offset = offbuff[0]; + offbuff[0] = sh->global_offset; + + for (i = 1; i < size ; i++){ + temp = offbuff[i]; + offbuff[i] = offbuff[i - 1] + prev_offset; + prev_offset = temp; + } + + for (i = 0; i < size; i++){ + global_offset = offbuff[size - 1] + prev_offset; + } + } + + + /* Scatter the results to the other processes */ + ret = sh->comm->c_coll.coll_scatter ( offbuff, 1, OMPI_OFFSET_DATATYPE, + &offset, 1, OMPI_OFFSET_DATATYPE, 0, + sh->comm, sh->comm->c_coll.coll_scatter_module ); + if ( OMPI_SUCCESS != ret ) { + opal_output(0,"sharedfp_individual_write_ordered_begin: Error in scattering offsets \n"); + goto exit; + } + + ret = sh->comm->c_coll.coll_bcast ( &global_offset, 1, OMPI_OFFSET_DATATYPE, + 0, sh->comm, sh->comm->c_coll.coll_bcast_module ); + if ( OMPI_SUCCESS != ret ) { + opal_output(0,"sharedfp_individual_write_ordered_begin: Error while bcasting global offset \n"); + goto exit; + } + + sh->global_offset = global_offset; + + /*use file_write_at_all to ensure the order*/ + ret = ompio_io_ompio_file_iwrite_at_all(sh->sharedfh,offset, buf,count,datatype, + &fh->f_split_coll_req); + fh->f_split_coll_in_use = true; + if ( OMPI_SUCCESS != ret ) { + opal_output(0,"sharedfp_individual_write_ordered_begin: Error while writing the datafile \n"); + } + +exit: + if ( NULL != offbuff ) { + free ( offbuff); + } + + return ret; } int mca_sharedfp_individual_write_ordered_end(mca_io_ompio_file_t *fh, - void *buf, + const void *buf, ompi_status_public_t *status) { - opal_output(0,"mca_sharedfp_individual_write_ordered_end: NOT IMPLEMENTED\n"); - return OMPI_ERROR; + int ret = OMPI_SUCCESS; + ret = ompi_request_wait ( &fh->f_split_coll_req, status ); + + /* remove the flag again */ + fh->f_split_coll_in_use = false; + return ret; } diff --git a/ompi/mca/sharedfp/individual/sharedfp_individual_read.c b/ompi/mca/sharedfp/individual/sharedfp_individual_read.c index 27446002f45..a14ae8323d7 100644 --- a/ompi/mca/sharedfp/individual/sharedfp_individual_read.c +++ b/ompi/mca/sharedfp/individual/sharedfp_individual_read.c @@ -5,15 +5,15 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2008 University of Houston. All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -24,6 +24,7 @@ #include "mpi.h" #include "ompi/constants.h" #include "ompi/mca/sharedfp/sharedfp.h" +#include "ompi/mca/sharedfp/base/base.h" int mca_sharedfp_individual_read ( mca_io_ompio_file_t *fh, void *buf, int count, MPI_Datatype datatype, MPI_Status *status) diff --git a/ompi/mca/sharedfp/individual/sharedfp_individual_seek.c b/ompi/mca/sharedfp/individual/sharedfp_individual_seek.c index fcbf0d993e7..7a6071e3c5d 100644 --- a/ompi/mca/sharedfp/individual/sharedfp_individual_seek.c +++ b/ompi/mca/sharedfp/individual/sharedfp_individual_seek.c @@ -5,15 +5,15 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2013 University of Houston. All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ diff --git a/ompi/mca/sharedfp/individual/sharedfp_individual_write.c b/ompi/mca/sharedfp/individual/sharedfp_individual_write.c index f7e4cb47905..6f921f4656e 100644 --- a/ompi/mca/sharedfp/individual/sharedfp_individual_write.c +++ b/ompi/mca/sharedfp/individual/sharedfp_individual_write.c @@ -5,15 +5,17 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2013 University of Houston. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -24,9 +26,10 @@ #include "mpi.h" #include "ompi/constants.h" #include "ompi/mca/sharedfp/sharedfp.h" +#include "ompi/mca/sharedfp/base/base.h" int mca_sharedfp_individual_write (mca_io_ompio_file_t *fh, - void *buf, + const void *buf, int count, struct ompi_datatype_t *datatype, ompi_status_public_t *status) @@ -40,7 +43,7 @@ int mca_sharedfp_individual_write (mca_io_ompio_file_t *fh, if ( NULL == fh->f_sharedfp_data ) { if ( mca_sharedfp_individual_verbose ) { - printf("sharedfp_individual_write: opening the shared file pointer file\n"); + opal_output(ompi_sharedfp_base_framework.framework_output,"sharedfp_individual_write: opening the shared file pointer file\n"); } shared_fp_base_module = fh->f_sharedfp; @@ -84,7 +87,7 @@ int mca_sharedfp_individual_write (mca_io_ompio_file_t *fh, } int mca_sharedfp_individual_write_ordered (mca_io_ompio_file_t *fh, - void *buf, + const void *buf, int count, struct ompi_datatype_t *datatype, ompi_status_public_t *status) @@ -104,7 +107,8 @@ int mca_sharedfp_individual_write_ordered (mca_io_ompio_file_t *fh, if(fh->f_sharedfp_data==NULL){ if ( mca_sharedfp_individual_verbose ) { - printf("sharedfp_individual_write - opening the shared file pointer\n"); + opal_output(ompi_sharedfp_base_framework.framework_output, + "sharedfp_individual_write_ordered - opening the shared file pointer\n"); } shared_fp_base_module = fh->f_sharedfp; @@ -114,7 +118,7 @@ int mca_sharedfp_individual_write_ordered (mca_io_ompio_file_t *fh, fh->f_info, fh); if ( OMPI_SUCCESS != ret ) { - opal_output(0,"sharedfp_individual_write - error opening the shared file pointer\n"); + opal_output(0,"sharedfp_individual_write_ordered - error opening the shared file pointer\n"); return ret; } } @@ -133,7 +137,7 @@ int mca_sharedfp_individual_write_ordered (mca_io_ompio_file_t *fh, opal_output (0, "sharedfp_individual_write_ordered: headnode is NULL but file is open\n"); return OMPI_ERROR; } - + /* Data from all the metadata is combined and written to the main file */ ret = mca_sharedfp_individual_collaborate_data ( sh ); if ( OMPI_SUCCESS != ret) { @@ -146,25 +150,25 @@ int mca_sharedfp_individual_write_ordered (mca_io_ompio_file_t *fh, return OMPI_ERR_OUT_OF_RESOURCE; } } - + /*collect the total bytes to be written*/ - sh->comm->c_coll.coll_gather ( &totalbytes, 1, OMPI_OFFSET_DATATYPE, + sh->comm->c_coll.coll_gather ( &totalbytes, 1, OMPI_OFFSET_DATATYPE, offbuff, 1, OMPI_OFFSET_DATATYPE, 0, sh->comm, sh->comm->c_coll.coll_gather_module ); - + if ( 0 == rank ) { prev_offset = offbuff[0]; offbuff[0] = sh->global_offset; - + for (i = 1; i < size ; i++){ temp = offbuff[i]; offbuff[i] = offbuff[i - 1] + prev_offset; prev_offset = temp; } - + for (i = 0; i < size; i++){ global_offset = offbuff[size - 1] + prev_offset; - } + } } @@ -176,8 +180,8 @@ int mca_sharedfp_individual_write_ordered (mca_io_ompio_file_t *fh, opal_output(0,"sharedfp_individual_write_ordered: Error in scattering offsets \n"); goto exit; } - - ret = sh->comm->c_coll.coll_bcast ( &global_offset, 1, OMPI_OFFSET_DATATYPE, + + ret = sh->comm->c_coll.coll_bcast ( &global_offset, 1, OMPI_OFFSET_DATATYPE, 0, sh->comm, sh->comm->c_coll.coll_bcast_module ); if ( OMPI_SUCCESS != ret ) { opal_output(0,"sharedfp_individual_write_ordered: Error while bcasting global offset \n"); @@ -185,7 +189,7 @@ int mca_sharedfp_individual_write_ordered (mca_io_ompio_file_t *fh, } sh->global_offset = global_offset; - + /*use file_write_at_all to ensure the order*/ ret = ompio_io_ompio_file_write_at_all(sh->sharedfh,offset, buf,count,datatype,status); if ( OMPI_SUCCESS != ret ) { @@ -196,6 +200,6 @@ int mca_sharedfp_individual_write_ordered (mca_io_ompio_file_t *fh, if ( NULL != offbuff ) { free ( offbuff); } - + return ret; } diff --git a/ompi/mca/sharedfp/lockedfile/Makefile.am b/ompi/mca/sharedfp/lockedfile/Makefile.am index 5b7d6ac2192..c0ea5abdd51 100644 --- a/ompi/mca/sharedfp/lockedfile/Makefile.am +++ b/ompi/mca/sharedfp/lockedfile/Makefile.am @@ -5,15 +5,15 @@ # Copyright (c) 2004-2005 The University of Tennessee and The University # of Tennessee Research Foundation. All rights # reserved. -# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, # University of Stuttgart. All rights reserved. # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. # Copyright (c) 2008 University of Houston. All rights reserved. # $COPYRIGHT$ -# +# # Additional copyrights may follow -# +# # $HEADER$ # diff --git a/ompi/mca/sharedfp/lockedfile/sharedfp_lockedfile.c b/ompi/mca/sharedfp/lockedfile/sharedfp_lockedfile.c index 3c8f1fdc2d0..06c433cffcb 100644 --- a/ompi/mca/sharedfp/lockedfile/sharedfp_lockedfile.c +++ b/ompi/mca/sharedfp/lockedfile/sharedfp_lockedfile.c @@ -70,7 +70,7 @@ int mca_sharedfp_lockedfile_component_init_query(bool enable_progress_threads, /* Nothing to do */ return OMPI_SUCCESS; -} +} struct mca_sharedfp_base_module_1_0_0_t * mca_sharedfp_lockedfile_component_file_query(mca_io_ompio_file_t *fh, int *priority) { struct flock lock; @@ -132,11 +132,11 @@ struct mca_sharedfp_base_module_1_0_0_t * mca_sharedfp_lockedfile_component_file err = fcntl(fd, F_SETLKW, &lock); opal_output(ompi_sharedfp_base_framework.framework_output, "mca_sharedfp_lockedfile_component_file_query: returned err=%d, for fd=%d\n",err,fd); - + if (err) { - opal_output(ompi_sharedfp_base_framework.framework_output, + opal_output(ompi_sharedfp_base_framework.framework_output, "mca_sharedfp_lockedfile_component_file_query: Failed to set a file lock on %s %s\n", filename, strerror(errno) ); - opal_output(ompi_sharedfp_base_framework.framework_output, + opal_output(ompi_sharedfp_base_framework.framework_output, "err=%d, errno=%d, EOPNOTSUPP=%d, EINVAL=%d, ENOSYS=%d, EACCES=%d, EAGAIN=%d, EBADF=%d\n", err, errno, EOPNOTSUPP, EINVAL, ENOSYS, EACCES, EAGAIN, EBADF); @@ -147,7 +147,7 @@ struct mca_sharedfp_base_module_1_0_0_t * mca_sharedfp_lockedfile_component_file } else { - opal_output(ompi_sharedfp_base_framework.framework_output, + opal_output(ompi_sharedfp_base_framework.framework_output, "mca_sharedfp_lockedfile_component_file_query: fcntl claims success in setting a file lock on %s\n", filename ); has_file_lock_support=true; @@ -170,7 +170,7 @@ struct mca_sharedfp_base_module_1_0_0_t * mca_sharedfp_lockedfile_component_file } int mca_sharedfp_lockedfile_component_file_unquery (mca_io_ompio_file_t *file) -{ +{ /* This function might be needed for some purposes later. for now it * does not have anything to do since there are no steps which need * to be undone if this module is not selected */ diff --git a/ompi/mca/sharedfp/lockedfile/sharedfp_lockedfile.h b/ompi/mca/sharedfp/lockedfile/sharedfp_lockedfile.h index 3e90e6639a5..2774ef5ee66 100644 --- a/ompi/mca/sharedfp/lockedfile/sharedfp_lockedfile.h +++ b/ompi/mca/sharedfp/lockedfile/sharedfp_lockedfile.h @@ -10,6 +10,8 @@ * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2013 University of Houston. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -48,12 +50,12 @@ OMPI_MODULE_DECLSPEC extern mca_sharedfp_base_component_2_0_0_t mca_sharedfp_loc */ /*IMPORANT: Update here when implementing functions from sharedfp API*/ -int mca_sharedfp_lockedfile_seek (mca_io_ompio_file_t *fh, +int mca_sharedfp_lockedfile_seek (mca_io_ompio_file_t *fh, OMPI_MPI_OFFSET_TYPE offset, int whence); int mca_sharedfp_lockedfile_get_position (mca_io_ompio_file_t *fh, OMPI_MPI_OFFSET_TYPE * offset); int mca_sharedfp_lockedfile_file_open (struct ompi_communicator_t *comm, - char* filename, + const char* filename, int amode, struct ompi_info_t *info, mca_io_ompio_file_t *fh); @@ -77,24 +79,24 @@ int mca_sharedfp_lockedfile_iread (mca_io_ompio_file_t *fh, struct ompi_datatype_t *datatype, ompi_request_t **request); int mca_sharedfp_lockedfile_write (mca_io_ompio_file_t *fh, - void *buf, + const void *buf, int count, struct ompi_datatype_t *datatype, ompi_status_public_t *status); int mca_sharedfp_lockedfile_write_ordered (mca_io_ompio_file_t *fh, - void *buf, + const void *buf, int count, struct ompi_datatype_t *datatype, ompi_status_public_t *status); int mca_sharedfp_lockedfile_write_ordered_begin (mca_io_ompio_file_t *fh, - void *buf, + const void *buf, int count, struct ompi_datatype_t *datatype); int mca_sharedfp_lockedfile_write_ordered_end (mca_io_ompio_file_t *fh, - void *buf, + const void *buf, ompi_status_public_t *status); int mca_sharedfp_lockedfile_iwrite (mca_io_ompio_file_t *fh, - void *buf, + const void *buf, int count, struct ompi_datatype_t *datatype, ompi_request_t **request); diff --git a/ompi/mca/sharedfp/lockedfile/sharedfp_lockedfile_component.c b/ompi/mca/sharedfp/lockedfile/sharedfp_lockedfile_component.c index 18e0696fe90..3d142c4e25f 100644 --- a/ompi/mca/sharedfp/lockedfile/sharedfp_lockedfile_component.c +++ b/ompi/mca/sharedfp/lockedfile/sharedfp_lockedfile_component.c @@ -6,7 +6,7 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. @@ -14,9 +14,9 @@ * Copyright (c) 2015 Los Alamos National Security, LLC. All rights * reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ * * These symbols are in a file by themselves to provide nice linker diff --git a/ompi/mca/sharedfp/lockedfile/sharedfp_lockedfile_file_open.c b/ompi/mca/sharedfp/lockedfile/sharedfp_lockedfile_file_open.c index 4687a70c9fe..c40348d848b 100644 --- a/ompi/mca/sharedfp/lockedfile/sharedfp_lockedfile_file_open.c +++ b/ompi/mca/sharedfp/lockedfile/sharedfp_lockedfile_file_open.c @@ -5,15 +5,17 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. - * Copyright (c) 2013 University of Houston. All rights reserved. + * Copyright (c) 2013-2017 University of Houston. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -23,16 +25,18 @@ #include "mpi.h" #include "ompi/constants.h" +#include "ompi/group/group.h" +#include "ompi/proc/proc.h" #include "ompi/mca/sharedfp/sharedfp.h" #include "ompi/mca/sharedfp/base/base.h" #ifdef HAVE_SYS_STAT_H -#include +#include #endif #include int mca_sharedfp_lockedfile_file_open (struct ompi_communicator_t *comm, - char* filename, + const char* filename, int amode, struct ompi_info_t *info, mca_io_ompio_file_t *fh) @@ -42,7 +46,8 @@ int mca_sharedfp_lockedfile_file_open (struct ompi_communicator_t *comm, int handle, rank; struct mca_sharedfp_lockedfile_data * module_data = NULL; struct mca_sharedfp_base_data_t* sh; - mca_io_ompio_file_t * shfileHandle; + mca_io_ompio_file_t * shfileHandle, *ompio_fh; + mca_io_ompio_data_t *data; /*------------------------------------------------------------*/ /*Open the same file again without shared file pointer support*/ @@ -53,11 +58,23 @@ int mca_sharedfp_lockedfile_file_open (struct ompi_communicator_t *comm, opal_output(0, "mca_sharedfp_lockedfile_file_open: Error during file open\n"); return err; } + shfileHandle->f_fh = fh->f_fh; + data = (mca_io_ompio_data_t *) fh->f_fh->f_io_selected_data; + ompio_fh = &data->ompio_fh; + + err = mca_io_ompio_set_view_internal (shfileHandle, + ompio_fh->f_disp, + ompio_fh->f_etype, + ompio_fh->f_orig_filetype, + ompio_fh->f_datarep, + MPI_INFO_NULL); + /*Memory is allocated here for the sh structure*/ sh = (struct mca_sharedfp_base_data_t*)malloc(sizeof(struct mca_sharedfp_base_data_t)); if ( NULL == sh){ opal_output(0, "mca_sharedfp_lockedfile_file_open: Error, unable to malloc f_sharedfp_ptr struct\n"); + free ( shfileHandle); return OMPI_ERR_OUT_OF_RESOURCE; } /*Populate the sh file structure based on the implementation*/ @@ -70,18 +87,37 @@ int mca_sharedfp_lockedfile_file_open (struct ompi_communicator_t *comm, /*Open a new file which will maintain the pointer for this file open*/ if ( mca_sharedfp_lockedfile_verbose ) { - printf("mca_sharedfp_lockedfile_file_open: open locked file.\n"); + opal_output(ompi_sharedfp_base_framework.framework_output, + "mca_sharedfp_lockedfile_file_open: open locked file.\n"); } module_data = (struct mca_sharedfp_lockedfile_data*)malloc(sizeof(struct mca_sharedfp_lockedfile_data)); if ( NULL == module_data ) { - printf("mca_sharedfp_lockedfile_file_open: Error, unable to malloc lockedfile_data struct\n"); + opal_output(ompi_sharedfp_base_framework.framework_output, + "mca_sharedfp_lockedfile_file_open: Error, unable to malloc lockedfile_data struct\n"); + free (shfileHandle); + free (sh); return OMPI_ERR_OUT_OF_RESOURCE; } - lockedfilename = (char*)malloc(sizeof(char) * (strlen(filename) + 64)); - sprintf(lockedfilename,"%s%s",filename,".lockedfile"); + opal_jobid_t masterjobid; + if ( 0 == comm->c_my_rank ) { + ompi_proc_t *masterproc = ompi_group_peer_lookup(comm->c_local_group, 0 ); + masterjobid = OMPI_CAST_RTE_NAME(&masterproc->super.proc_name)->jobid; + } + comm->c_coll.coll_bcast ( &masterjobid, 1, MPI_UNSIGNED, 0, comm, + comm->c_coll.coll_bcast_module ); + + size_t filenamelen = strlen(filename) + 16; + lockedfilename = (char*)malloc(sizeof(char) * filenamelen); + if ( NULL == lockedfilename ) { + free (shfileHandle); + free (sh); + free (module_data); + return OMPI_ERR_OUT_OF_RESOURCE; + } + snprintf(lockedfilename, filenamelen, "%s-%u%s",filename,masterjobid,".lock"); module_data->filename = lockedfilename; /*-------------------------------------------------*/ @@ -100,7 +136,9 @@ int mca_sharedfp_lockedfile_file_open (struct ompi_communicator_t *comm, handle = open ( lockedfilename, O_RDWR, 0644 ); if ( -1 == handle ) { - printf("[%d]mca_sharedfp_lockedfile_file_open: Error during file open\n", rank); + opal_output(0, "[%d]mca_sharedfp_lockedfile_file_open: Error during file open\n", rank); + free (shfileHandle); + free (sh); free(module_data); return OMPI_ERROR; } @@ -111,7 +149,7 @@ int mca_sharedfp_lockedfile_file_open (struct ompi_communicator_t *comm, sh->selected_module_data = module_data; /*remember the shared file handle*/ fh->f_sharedfp_data = sh; - + comm->c_coll.coll_barrier ( comm, comm->c_coll.coll_barrier_module ); return err; @@ -127,7 +165,7 @@ int mca_sharedfp_lockedfile_file_close (mca_io_ompio_file_t *fh) if ( fh->f_sharedfp_data==NULL){ /* Can happen with lazy_open being set */ if ( mca_sharedfp_lockedfile_verbose ) { - printf("sharedfp_lockedfile_file_close - shared file pointer structure not initialized\n"); + opal_output(0, "sharedfp_lockedfile_file_close - shared file pointer structure not initialized\n"); } return OMPI_SUCCESS; } diff --git a/ompi/mca/sharedfp/lockedfile/sharedfp_lockedfile_iread.c b/ompi/mca/sharedfp/lockedfile/sharedfp_lockedfile_iread.c index 6a828fe9565..70df7914f60 100644 --- a/ompi/mca/sharedfp/lockedfile/sharedfp_lockedfile_iread.c +++ b/ompi/mca/sharedfp/lockedfile/sharedfp_lockedfile_iread.c @@ -9,7 +9,7 @@ * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. - * Copyright (c) 2013 University of Houston. All rights reserved. + * Copyright (c) 2013-2015 University of Houston. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -24,6 +24,7 @@ #include "mpi.h" #include "ompi/constants.h" #include "ompi/mca/sharedfp/sharedfp.h" +#include "ompi/mca/sharedfp/base/base.h" #include "ompi/mca/io/ompio/io_ompio.h" int mca_sharedfp_lockedfile_iread(mca_io_ompio_file_t *fh, @@ -41,7 +42,8 @@ int mca_sharedfp_lockedfile_iread(mca_io_ompio_file_t *fh, if ( NULL == fh->f_sharedfp_data ) { if ( mca_sharedfp_lockedfile_verbose ) { - printf("sharedfp_lockedfile_iread: opening the shared file pointer\n"); + opal_output(ompi_sharedfp_base_framework.framework_output, + "sharedfp_lockedfile_iread: opening the shared file pointer\n"); } shared_fp_base_module = fh->f_sharedfp; @@ -61,7 +63,8 @@ int mca_sharedfp_lockedfile_iread(mca_io_ompio_file_t *fh, bytesRequested = count * numofBytes; if ( mca_sharedfp_lockedfile_verbose ) { - printf("sharedfp_lockedfile_iread - Bytes Requested is %ld\n",bytesRequested); + opal_output(ompi_sharedfp_base_framework.framework_output, + "sharedfp_lockedfile_iread - Bytes Requested is %ld\n",bytesRequested); } @@ -70,9 +73,12 @@ int mca_sharedfp_lockedfile_iread(mca_io_ompio_file_t *fh, /*Request the offset to write bytesRequested bytes*/ ret = mca_sharedfp_lockedfile_request_position(sh,bytesRequested,&offset); + offset /= sh->sharedfh->f_etype_size; + if ( -1 != ret ) { if ( mca_sharedfp_lockedfile_verbose ) { - printf("sharedfp_lockedfile_iread - Offset received is %lld\n",offset); + opal_output(ompi_sharedfp_base_framework.framework_output, + "sharedfp_lockedfile_iread - Offset received is %lld\n",offset); } /* Read the file */ @@ -87,8 +93,127 @@ int mca_sharedfp_lockedfile_read_ordered_begin(mca_io_ompio_file_t *fh, int count, struct ompi_datatype_t *datatype) { - opal_output(0,"mca_sharedfp_lockedfile_write_ordered_begin: NOT IMPLEMENTED\n"); - return OMPI_ERROR; + int ret = OMPI_SUCCESS; + mca_sharedfp_base_module_t * shared_fp_base_module=NULL; + OMPI_MPI_OFFSET_TYPE offset = 0; + long sendBuff = 0; + long *buff=NULL; + long offsetBuff; + OMPI_MPI_OFFSET_TYPE offsetReceived = 0; + long bytesRequested = 0; + int recvcnt = 1, sendcnt = 1; + size_t numofBytes; + int rank, size, i; + struct mca_sharedfp_base_data_t *sh = NULL; + + if(fh->f_sharedfp_data==NULL){ + if ( mca_sharedfp_lockedfile_verbose ) { + opal_output(ompi_sharedfp_base_framework.framework_output, + "sharedfp_lockedfile_read_ordered_begin: opening the shared file pointer\n"); + } + shared_fp_base_module = fh->f_sharedfp; + + ret = shared_fp_base_module->sharedfp_file_open(fh->f_comm, + fh->f_filename, + fh->f_amode, + fh->f_info, + fh); + if ( OMPI_SUCCESS != ret ) { + opal_output(0,"sharedfp_lockedfile_read_ordered_begin - error opening the shared file pointer\n"); + return ret; + } + } + + + if ( true == fh->f_split_coll_in_use ) { + opal_output(ompi_sharedfp_base_framework.framework_output, + "Only one split collective I/O operation allowed per file handle at any given point in time!\n"); + return MPI_ERR_REQUEST; + } + + /*Retrieve the new communicator*/ + sh = fh->f_sharedfp_data; + + /* Calculate the number of bytes to write*/ + opal_datatype_type_size ( &datatype->super, &numofBytes); + sendBuff = count * numofBytes; + + /* Get the ranks in the communicator */ + rank = ompi_comm_rank ( sh->comm ); + size = ompi_comm_size ( sh->comm ); + + if ( 0 == rank ) { + buff = (long*) malloc (sizeof(long) * size); + if ( NULL == buff ) { + return OMPI_ERR_OUT_OF_RESOURCE; + } + } + + ret = sh->comm->c_coll.coll_gather ( &sendBuff, sendcnt, OMPI_OFFSET_DATATYPE, buff, recvcnt, + OMPI_OFFSET_DATATYPE, 0, sh->comm, + sh->comm->c_coll.coll_gather_module ); + if ( OMPI_SUCCESS != ret ) { + goto exit; + } + + /* All the counts are present now in the recvBuff. + The size of recvBuff is sizeof_newComm + */ + if (rank == 0) { + for ( i = 0; i < size ; i ++) { + bytesRequested += buff[i]; + if ( mca_sharedfp_lockedfile_verbose ) { + opal_output(ompi_sharedfp_base_framework.framework_output, + "sharedfp_lockedfile_read_ordered_begin: Bytes requested are %ld\n",bytesRequested); + } + } + + /*Request the offset to write bytesRequested bytes + only the root process needs to do the request, + since the root process will then tell the other + processes at what offset they should write their + share of the data. + */ + ret = mca_sharedfp_lockedfile_request_position(sh,bytesRequested,&offsetReceived); + if ( OMPI_SUCCESS != ret ){ + goto exit; + } + if ( mca_sharedfp_lockedfile_verbose ) { + opal_output(ompi_sharedfp_base_framework.framework_output, + "sharedfp_lockedfile_read_ordered_begin: Offset received is %lld\n",offsetReceived); + } + buff[0] += offsetReceived; + for (i = 1 ; i < size; i++) { + buff[i] += buff[i-1]; + } + } + + /* Scatter the results to the other processes*/ + ret = sh->comm->c_coll.coll_scatter ( buff, sendcnt, OMPI_OFFSET_DATATYPE, + &offsetBuff, recvcnt, OMPI_OFFSET_DATATYPE, 0, + sh->comm, sh->comm->c_coll.coll_scatter_module ); + if ( OMPI_SUCCESS != ret ) { + goto exit; + } + + /*Each process now has its own individual offset*/ + offset = offsetBuff - sendBuff; + offset /= sh->sharedfh->f_etype_size; + + if ( mca_sharedfp_lockedfile_verbose ) { + opal_output(ompi_sharedfp_base_framework.framework_output, + "sharedfp_lockedfile_read_ordered_begin: Offset returned is %lld\n",offset); + } + + ret = ompio_io_ompio_file_iread_at_all ( sh->sharedfh, offset, buf, count, datatype, &fh->f_split_coll_req ); + fh->f_split_coll_in_use = true; + +exit: + if ( NULL != buff ) { + free ( buff); + } + + return ret; } @@ -96,6 +221,10 @@ int mca_sharedfp_lockedfile_read_ordered_end(mca_io_ompio_file_t *fh, void *buf, ompi_status_public_t *status) { - opal_output(0,"mca_sharedfp_lockedfile_write_ordered_end: NOT IMPLEMENTED\n"); - return OMPI_ERROR; + int ret = OMPI_SUCCESS; + ret = ompi_request_wait ( &fh->f_split_coll_req, status ); + + /* remove the flag again */ + fh->f_split_coll_in_use = false; + return ret; } diff --git a/ompi/mca/sharedfp/lockedfile/sharedfp_lockedfile_iwrite.c b/ompi/mca/sharedfp/lockedfile/sharedfp_lockedfile_iwrite.c index 49f1917b140..c12f60f6a66 100644 --- a/ompi/mca/sharedfp/lockedfile/sharedfp_lockedfile_iwrite.c +++ b/ompi/mca/sharedfp/lockedfile/sharedfp_lockedfile_iwrite.c @@ -9,7 +9,9 @@ * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. - * Copyright (c) 2013 University of Houston. All rights reserved. + * Copyright (c) 2013-2015 University of Houston. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -24,10 +26,11 @@ #include "mpi.h" #include "ompi/constants.h" #include "ompi/mca/sharedfp/sharedfp.h" +#include "ompi/mca/sharedfp/base/base.h" #include "ompi/mca/io/ompio/io_ompio.h" int mca_sharedfp_lockedfile_iwrite(mca_io_ompio_file_t *fh, - void *buf, + const void *buf, int count, ompi_datatype_t *datatype, MPI_Request * request) @@ -41,7 +44,8 @@ int mca_sharedfp_lockedfile_iwrite(mca_io_ompio_file_t *fh, if(fh->f_sharedfp_data==NULL){ if ( mca_sharedfp_lockedfile_verbose ) { - printf("sharedfp_lockedfile_iwrite: opening the shared file pointer\n"); + opal_output(ompi_sharedfp_base_framework.framework_output, + "sharedfp_lockedfile_iwrite: opening the shared file pointer\n"); } shared_fp_base_module = fh->f_sharedfp; @@ -60,7 +64,8 @@ int mca_sharedfp_lockedfile_iwrite(mca_io_ompio_file_t *fh, opal_datatype_type_size ( &datatype->super, &numofBytes); bytesRequested = count * numofBytes; if ( mca_sharedfp_lockedfile_verbose ) { - printf("sharedfp_lockedfile_iwrite: Bytes Requested is %ld\n",bytesRequested); + opal_output(ompi_sharedfp_base_framework.framework_output, + "sharedfp_lockedfile_iwrite: Bytes Requested is %ld\n",bytesRequested); } /*Retrieve the shared file data struct*/ @@ -68,9 +73,12 @@ int mca_sharedfp_lockedfile_iwrite(mca_io_ompio_file_t *fh, /*Request the offset to write bytesRequested bytes*/ ret = mca_sharedfp_lockedfile_request_position(sh,bytesRequested,&offset); + offset /= sh->sharedfh->f_etype_size; + if ( -1 != ret) { if ( mca_sharedfp_lockedfile_verbose ) { - printf("sharedfp_lockedfile_iwrite: Offset received is %lld\n",offset); + opal_output(ompi_sharedfp_base_framework.framework_output, + "sharedfp_lockedfile_iwrite: Offset received is %lld\n",offset); } /* Write to the file */ @@ -81,20 +89,142 @@ int mca_sharedfp_lockedfile_iwrite(mca_io_ompio_file_t *fh, } int mca_sharedfp_lockedfile_write_ordered_begin(mca_io_ompio_file_t *fh, - void *buf, + const void *buf, int count, struct ompi_datatype_t *datatype) { - opal_output(0,"mca_sharedfp_lockedfile_write_ordered_begin: NOT IMPLEMENTED\n"); - return OMPI_ERROR; + int ret = OMPI_SUCCESS; + mca_sharedfp_base_module_t * shared_fp_base_module=NULL; + OMPI_MPI_OFFSET_TYPE offset = 0; + long sendBuff = 0; + long *buff=NULL; + long offsetBuff; + OMPI_MPI_OFFSET_TYPE offsetReceived = 0; + long bytesRequested = 0; + int recvcnt = 1, sendcnt = 1; + size_t numofBytes; + int rank, size, i; + struct mca_sharedfp_base_data_t *sh = NULL; + + if(fh->f_sharedfp_data==NULL){ + if ( mca_sharedfp_lockedfile_verbose ) { + opal_output(ompi_sharedfp_base_framework.framework_output, + "sharedfp_lockedfile_write_ordered_begin: opening the shared file pointer\n"); + } + shared_fp_base_module = fh->f_sharedfp; + + ret = shared_fp_base_module->sharedfp_file_open(fh->f_comm, + fh->f_filename, + fh->f_amode, + fh->f_info, + fh); + if ( OMPI_SUCCESS != ret ) { + opal_output(0,"sharedfp_lockedfile_write_ordered_begin - error opening the shared file pointer\n"); + return ret; + } + } + + + if ( true == fh->f_split_coll_in_use ) { + opal_output(0, "Only one split collective I/O operation allowed per file handle at any given point in time!\n"); + return MPI_ERR_REQUEST; + } + + /*Retrieve the new communicator*/ + sh = fh->f_sharedfp_data; + + /* Calculate the number of bytes to write*/ + opal_datatype_type_size ( &datatype->super, &numofBytes); + sendBuff = count * numofBytes; + + /* Get the ranks in the communicator */ + rank = ompi_comm_rank ( sh->comm ); + size = ompi_comm_size ( sh->comm ); + + if ( 0 == rank ) { + buff = (long*) malloc (sizeof(long) * size); + if ( NULL == buff ) { + return OMPI_ERR_OUT_OF_RESOURCE; + } + } + + ret = sh->comm->c_coll.coll_gather ( &sendBuff, sendcnt, OMPI_OFFSET_DATATYPE, buff, recvcnt, + OMPI_OFFSET_DATATYPE, 0, sh->comm, + sh->comm->c_coll.coll_gather_module ); + if ( OMPI_SUCCESS != ret ) { + goto exit; + } + + /* All the counts are present now in the recvBuff. + The size of recvBuff is sizeof_newComm + */ + if (rank == 0) { + for ( i = 0; i < size ; i ++) { + bytesRequested += buff[i]; + if ( mca_sharedfp_lockedfile_verbose ) { + opal_output(ompi_sharedfp_base_framework.framework_output, + "sharedfp_lockedfile_write_ordered_begin: Bytes requested are %ld\n",bytesRequested); + } + } + + /*Request the offset to write bytesRequested bytes + only the root process needs to do the request, + since the root process will then tell the other + processes at what offset they should write their + share of the data. + */ + ret = mca_sharedfp_lockedfile_request_position(sh,bytesRequested,&offsetReceived); + if ( OMPI_SUCCESS != ret ){ + goto exit; + } + if ( mca_sharedfp_lockedfile_verbose ) { + opal_output(ompi_sharedfp_base_framework.framework_output, + "sharedfp_lockedfile_write_ordered_begin: Offset received is %lld\n",offsetReceived); + } + buff[0] += offsetReceived; + for (i = 1 ; i < size; i++) { + buff[i] += buff[i-1]; + } + } + + /* Scatter the results to the other processes*/ + ret = sh->comm->c_coll.coll_scatter ( buff, sendcnt, OMPI_OFFSET_DATATYPE, + &offsetBuff, recvcnt, OMPI_OFFSET_DATATYPE, 0, + sh->comm, sh->comm->c_coll.coll_scatter_module ); + if ( OMPI_SUCCESS != ret ) { + goto exit; + } + + /*Each process now has its own individual offset*/ + offset = offsetBuff - sendBuff; + offset /= sh->sharedfh->f_etype_size; + + if ( mca_sharedfp_lockedfile_verbose ) { + opal_output(ompi_sharedfp_base_framework.framework_output, + "sharedfp_lockedfile_write_ordered_begin: Offset returned is %lld\n",offset); + } + + ret = ompio_io_ompio_file_iwrite_at_all ( sh->sharedfh, offset, buf, count, datatype, &fh->f_split_coll_req ); + fh->f_split_coll_in_use = true; + +exit: + if ( NULL != buff ) { + free ( buff); + } + + return ret; } int mca_sharedfp_lockedfile_write_ordered_end(mca_io_ompio_file_t *fh, - void *buf, + const void *buf, ompi_status_public_t *status) { - opal_output(0,"mca_sharedfp_lockedfile_write_ordered_end: NOT IMPLEMENTED\n"); - return OMPI_ERROR; + int ret = OMPI_SUCCESS; + ret = ompi_request_wait ( &fh->f_split_coll_req, status ); + + /* remove the flag again */ + fh->f_split_coll_in_use = false; + return ret; } diff --git a/ompi/mca/sharedfp/lockedfile/sharedfp_lockedfile_read.c b/ompi/mca/sharedfp/lockedfile/sharedfp_lockedfile_read.c index 745e0a66263..cd7df48e575 100644 --- a/ompi/mca/sharedfp/lockedfile/sharedfp_lockedfile_read.c +++ b/ompi/mca/sharedfp/lockedfile/sharedfp_lockedfile_read.c @@ -9,7 +9,7 @@ * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. - * Copyright (c) 2013 University of Houston. All rights reserved. + * Copyright (c) 2013-2015 University of Houston. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -24,6 +24,7 @@ #include "mpi.h" #include "ompi/constants.h" #include "ompi/mca/sharedfp/sharedfp.h" +#include "ompi/mca/sharedfp/base/base.h" int mca_sharedfp_lockedfile_read ( mca_io_ompio_file_t *fh, void *buf, int count, MPI_Datatype datatype, MPI_Status *status) @@ -37,7 +38,8 @@ int mca_sharedfp_lockedfile_read ( mca_io_ompio_file_t *fh, if ( fh->f_sharedfp_data == NULL ) { if ( mca_sharedfp_lockedfile_verbose ) { - printf("sharedfp_lockedfile_read: opening the shared file pointer\n"); + opal_output(ompi_sharedfp_base_framework.framework_output, + "sharedfp_lockedfile_read: opening the shared file pointer\n"); } shared_fp_base_module = fh->f_sharedfp; @@ -57,7 +59,8 @@ int mca_sharedfp_lockedfile_read ( mca_io_ompio_file_t *fh, bytesRequested = count * numofBytes; if ( mca_sharedfp_lockedfile_verbose ) { - printf("sharedfp_lockedfile_read: Bytes Requested is %ld\n",bytesRequested); + opal_output(ompi_sharedfp_base_framework.framework_output, + "sharedfp_lockedfile_read: Bytes Requested is %ld\n",bytesRequested); } /*Retrieve the shared file data struct*/ @@ -65,9 +68,12 @@ int mca_sharedfp_lockedfile_read ( mca_io_ompio_file_t *fh, /*Request the offset to write bytesRequested bytes*/ ret = mca_sharedfp_lockedfile_request_position(sh,bytesRequested,&offset); + offset /= sh->sharedfh->f_etype_size; + if (-1 != ret ) { if ( mca_sharedfp_lockedfile_verbose ) { - printf("sharedfp_lockedfile_read: Offset received is %lld\n",offset); + opal_output(ompi_sharedfp_base_framework.framework_output, + "sharedfp_lockedfile_read: Offset received is %lld\n",offset); } /* Read the file */ @@ -98,7 +104,8 @@ int mca_sharedfp_lockedfile_read_ordered (mca_io_ompio_file_t *fh, if ( fh->f_sharedfp_data == NULL){ if ( mca_sharedfp_lockedfile_verbose ) { - printf("sharedfp_lockedfile_read_ordered: opening the shared file pointer\n"); + opal_output(ompi_sharedfp_base_framework.framework_output, + "sharedfp_lockedfile_read_ordered: opening the shared file pointer\n"); } shared_fp_base_module = fh->f_sharedfp; @@ -130,7 +137,7 @@ int mca_sharedfp_lockedfile_read_ordered (mca_io_ompio_file_t *fh, return OMPI_ERR_OUT_OF_RESOURCE; } - ret = sh->comm->c_coll.coll_gather ( &sendBuff, sendcnt, OMPI_OFFSET_DATATYPE, + ret = sh->comm->c_coll.coll_gather ( &sendBuff, sendcnt, OMPI_OFFSET_DATATYPE, buff, recvcnt, OMPI_OFFSET_DATATYPE, 0, sh->comm, sh->comm->c_coll.coll_gather_module ); if ( OMPI_SUCCESS != ret ) { @@ -144,7 +151,8 @@ int mca_sharedfp_lockedfile_read_ordered (mca_io_ompio_file_t *fh, for (i = 0; i < size ; i ++) { bytesRequested += buff[i]; if ( mca_sharedfp_lockedfile_verbose ) { - printf("sharedfp_lockedfile_read_ordered: Bytes requested are %ld\n",bytesRequested); + opal_output(ompi_sharedfp_base_framework.framework_output, + "sharedfp_lockedfile_read_ordered: Bytes requested are %ld\n",bytesRequested); } } @@ -159,7 +167,8 @@ int mca_sharedfp_lockedfile_read_ordered (mca_io_ompio_file_t *fh, goto exit; } if ( mca_sharedfp_lockedfile_verbose ) { - printf("sharedfp_lockedfile_read_ordered: Offset received is %lld\n",offsetReceived); + opal_output(ompi_sharedfp_base_framework.framework_output, + "sharedfp_lockedfile_read_ordered: Offset received is %lld\n",offsetReceived); } buff[0] += offsetReceived; @@ -175,9 +184,11 @@ int mca_sharedfp_lockedfile_read_ordered (mca_io_ompio_file_t *fh, /*Each process now has its own individual offset in recvBUFF*/ offset = offsetBuff - sendBuff; + offset /= sh->sharedfh->f_etype_size; if ( mca_sharedfp_lockedfile_verbose ) { - printf("sharedfp_lockedfile_read_ordered: Offset returned is %lld\n",offset); + opal_output(ompi_sharedfp_base_framework.framework_output, + "sharedfp_lockedfile_read_ordered: Offset returned is %lld\n",offset); } /* read to the file */ diff --git a/ompi/mca/sharedfp/lockedfile/sharedfp_lockedfile_request_position.c b/ompi/mca/sharedfp/lockedfile/sharedfp_lockedfile_request_position.c index 3420712b3b4..8edfa2bf8c3 100644 --- a/ompi/mca/sharedfp/lockedfile/sharedfp_lockedfile_request_position.c +++ b/ompi/mca/sharedfp/lockedfile/sharedfp_lockedfile_request_position.c @@ -9,7 +9,7 @@ * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. - * Copyright (c) 2013 University of Houston. All rights reserved. + * Copyright (c) 2013-2015 University of Houston. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -24,6 +24,7 @@ #include "mpi.h" #include "ompi/constants.h" #include "ompi/mca/sharedfp/sharedfp.h" +#include "ompi/mca/sharedfp/base/base.h" /*Use fcntl to lock the hidden file which stores the current position*/ #include @@ -62,13 +63,14 @@ int mca_sharedfp_lockedfile_request_position(struct mca_sharedfp_base_data_t * s /* Aquire an exclusive lock */ if (fcntl(fd, F_SETLKW, &fl) == -1) { - printf("sharedfp_lockedfile_request_position: errorr acquiring lock: fcntl(%d,F_SETLKW,&fl)\n",fd); - printf("sharedfp_lockedfile_request_position: error(%i): %s", errno, strerror(errno)); + opal_output(0,"sharedfp_lockedfile_request_position: errorr acquiring lock: fcntl(%d,F_SETLKW,&fl)\n",fd); + opal_output(0,"sharedfp_lockedfile_request_position: error(%i): %s", errno, strerror(errno)); return OMPI_ERROR; } else{ if ( mca_sharedfp_lockedfile_verbose ) { - printf("sharedfp_lockedfile_request_position: Success: acquired lock.for fd: %d\n",fd); + opal_output(ompi_sharedfp_base_framework.framework_output, + "sharedfp_lockedfile_request_position: Success: acquired lock.for fd: %d\n",fd); } } @@ -76,14 +78,16 @@ int mca_sharedfp_lockedfile_request_position(struct mca_sharedfp_base_data_t * s lseek ( fd, 0, SEEK_SET ); read ( fd, &buf, sizeof(OMPI_MPI_OFFSET_TYPE)); if ( mca_sharedfp_lockedfile_verbose ) { - printf("sharedfp_lockedfile_request_position: Read last_offset=%lld! ret=%d\n",buf, ret); + opal_output(ompi_sharedfp_base_framework.framework_output, + "sharedfp_lockedfile_request_position: Read last_offset=%lld! ret=%d\n",buf, ret); } /* increment the position */ position = buf + bytes_requested; if ( mca_sharedfp_lockedfile_verbose ) { - printf("sharedfp_lockedfile_request_position: old_offset=%lld, bytes_requested=%d, new offset=%lld!\n", - buf,bytes_requested,position); + opal_output(ompi_sharedfp_base_framework.framework_output, + "sharedfp_lockedfile_request_position: old_offset=%lld, bytes_requested=%d, new offset=%lld!\n", + buf,bytes_requested,position); } /* write to the file */ @@ -92,7 +96,8 @@ int mca_sharedfp_lockedfile_request_position(struct mca_sharedfp_base_data_t * s /* unlock the file */ if ( mca_sharedfp_lockedfile_verbose ) { - printf("sharedfp_lockedfile_request_position: Releasing lock..."); + opal_output(ompi_sharedfp_base_framework.framework_output, + "sharedfp_lockedfile_request_position: Releasing lock..."); } /* NOTE: We thought we could reuse the flock struct @@ -108,13 +113,14 @@ int mca_sharedfp_lockedfile_request_position(struct mca_sharedfp_base_data_t * s fl.l_pid = getpid(); if (fcntl(fd, F_SETLK, &fl) == -1) { - printf("sharedfp_lockedfile_request_position:failed to release lock for fd: %d\n",fd); - printf("error(%i): %s", errno, strerror(errno)); + opal_output(0,"sharedfp_lockedfile_request_position:failed to release lock for fd: %d\n",fd); + opal_output(0,"error(%i): %s", errno, strerror(errno)); return OMPI_ERROR; } else { if ( mca_sharedfp_lockedfile_verbose ) { - printf("sharedfp_lockedfile_request_position: released lock.for fd: %d\n",fd); + opal_output(ompi_sharedfp_base_framework.framework_output, + "sharedfp_lockedfile_request_position: released lock.for fd: %d\n",fd); } } diff --git a/ompi/mca/sharedfp/lockedfile/sharedfp_lockedfile_seek.c b/ompi/mca/sharedfp/lockedfile/sharedfp_lockedfile_seek.c index 1cfdce9ead2..83279be719f 100644 --- a/ompi/mca/sharedfp/lockedfile/sharedfp_lockedfile_seek.c +++ b/ompi/mca/sharedfp/lockedfile/sharedfp_lockedfile_seek.c @@ -92,7 +92,7 @@ mca_sharedfp_lockedfile_seek (mca_io_ompio_file_t *fh, offset = end_position + offset; opal_output(ompi_sharedfp_base_framework.framework_output, "MPI_SEEK_END: file_get_size=%lld\n",end_position); - + if ( offset < 0){ opal_output(0,"sharedfp_lockedfile_seek - MPI_SEEK_CUR, offset must be > 0, got offset=%lld.\n",offset); ret = -1; @@ -140,7 +140,8 @@ mca_sharedfp_lockedfile_seek (mca_io_ompio_file_t *fh, *-------------------- */ if ( mca_sharedfp_lockedfile_verbose ) { - printf("sharedfp_lockedfile_seek: Releasing lock..."); + opal_output(ompi_sharedfp_base_framework.framework_output, + "sharedfp_lockedfile_seek: Releasing lock..."); } fl.l_type = F_UNLCK; /* set to unlock same region */ fl.l_whence = SEEK_SET; diff --git a/ompi/mca/sharedfp/lockedfile/sharedfp_lockedfile_write.c b/ompi/mca/sharedfp/lockedfile/sharedfp_lockedfile_write.c index 22c89251084..eb44cefb9a2 100644 --- a/ompi/mca/sharedfp/lockedfile/sharedfp_lockedfile_write.c +++ b/ompi/mca/sharedfp/lockedfile/sharedfp_lockedfile_write.c @@ -9,7 +9,9 @@ * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. - * Copyright (c) 2013 University of Houston. All rights reserved. + * Copyright (c) 2013-2015 University of Houston. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -24,9 +26,10 @@ #include "mpi.h" #include "ompi/constants.h" #include "ompi/mca/sharedfp/sharedfp.h" +#include "ompi/mca/sharedfp/base/base.h" int mca_sharedfp_lockedfile_write (mca_io_ompio_file_t *fh, - void *buf, + const void *buf, int count, struct ompi_datatype_t *datatype, ompi_status_public_t *status) @@ -40,7 +43,8 @@ int mca_sharedfp_lockedfile_write (mca_io_ompio_file_t *fh, if ( NULL == fh->f_sharedfp_data ){ if ( mca_sharedfp_lockedfile_verbose ) { - printf("sharedfp_lockedfile_write - opening the shared file pointer\n"); + opal_output(ompi_sharedfp_base_framework.framework_output, + "sharedfp_lockedfile_write - opening the shared file pointer\n"); } shared_fp_base_module = fh->f_sharedfp; @@ -59,7 +63,8 @@ int mca_sharedfp_lockedfile_write (mca_io_ompio_file_t *fh, opal_datatype_type_size( &datatype->super, &numofBytes); bytesRequested = count * numofBytes; if ( mca_sharedfp_lockedfile_verbose ) { - printf("sharedfp_lockedfile_write: Bytes Requested is %ld\n",bytesRequested); + opal_output(ompi_sharedfp_base_framework.framework_output, + "sharedfp_lockedfile_write: Bytes Requested is %ld\n",bytesRequested); } /*Retrieve the shared file data struct*/ @@ -67,9 +72,12 @@ int mca_sharedfp_lockedfile_write (mca_io_ompio_file_t *fh, /* Request the offset to write bytesRequested bytes */ ret = mca_sharedfp_lockedfile_request_position ( sh, bytesRequested, &offset); + offset /= sh->sharedfh->f_etype_size; + if (-1 != ret ) { if ( mca_sharedfp_lockedfile_verbose ) { - printf("sharedfp_lockedfile_write: Offset received is %lld\n",offset); + opal_output(ompi_sharedfp_base_framework.framework_output, + "sharedfp_lockedfile_write: Offset received is %lld\n",offset); } /* Write to the file */ ret = ompio_io_ompio_file_write_at ( sh->sharedfh, offset, buf, count, datatype, status); @@ -79,7 +87,7 @@ int mca_sharedfp_lockedfile_write (mca_io_ompio_file_t *fh, } int mca_sharedfp_lockedfile_write_ordered (mca_io_ompio_file_t *fh, - void *buf, + const void *buf, int count, struct ompi_datatype_t *datatype, ompi_status_public_t *status) @@ -100,7 +108,8 @@ int mca_sharedfp_lockedfile_write_ordered (mca_io_ompio_file_t *fh, if( NULL == fh->f_sharedfp_data ) { if ( mca_sharedfp_lockedfile_verbose ) { - printf("sharedfp_lockedfile_write_ordered - opening the shared file pointer\n"); + opal_output(ompi_sharedfp_base_framework.framework_output, + "sharedfp_lockedfile_write_ordered - opening the shared file pointer\n"); } shared_fp_base_module = fh->f_sharedfp; @@ -133,8 +142,8 @@ int mca_sharedfp_lockedfile_write_ordered (mca_io_ompio_file_t *fh, } } - ret = sh->comm->c_coll.coll_gather ( &sendBuff, sendcnt, OMPI_OFFSET_DATATYPE, buff, recvcnt, - OMPI_OFFSET_DATATYPE, 0, sh->comm, + ret = sh->comm->c_coll.coll_gather ( &sendBuff, sendcnt, OMPI_OFFSET_DATATYPE, buff, recvcnt, + OMPI_OFFSET_DATATYPE, 0, sh->comm, sh->comm->c_coll.coll_gather_module ); if ( OMPI_SUCCESS != ret ) { goto exit; @@ -147,7 +156,8 @@ int mca_sharedfp_lockedfile_write_ordered (mca_io_ompio_file_t *fh, for ( i = 0; i < size ; i ++) { bytesRequested += buff[i]; if ( mca_sharedfp_lockedfile_verbose ) { - printf("sharedfp_lockedfile_write_ordered: Bytes requested are %ld\n",bytesRequested); + opal_output(ompi_sharedfp_base_framework.framework_output, + "sharedfp_lockedfile_write_ordered: Bytes requested are %ld\n",bytesRequested); } } @@ -162,7 +172,8 @@ int mca_sharedfp_lockedfile_write_ordered (mca_io_ompio_file_t *fh, goto exit; } if ( mca_sharedfp_lockedfile_verbose ) { - printf("sharedfp_lockedfile_write_ordered: Offset received is %lld\n",offsetReceived); + opal_output(ompi_sharedfp_base_framework.framework_output, + "sharedfp_lockedfile_write_ordered: Offset received is %lld\n",offsetReceived); } buff[0] += offsetReceived; for (i = 1 ; i < size; i++) { @@ -180,9 +191,11 @@ int mca_sharedfp_lockedfile_write_ordered (mca_io_ompio_file_t *fh, /*Each process now has its own individual offset*/ offset = offsetBuff - sendBuff; + offset /= sh->sharedfh->f_etype_size; if ( mca_sharedfp_lockedfile_verbose ) { - printf("sharedfp_lockedfile_write_ordered: Offset returned is %lld\n",offset); + opal_output(ompi_sharedfp_base_framework.framework_output, + "sharedfp_lockedfile_write_ordered: Offset returned is %lld\n",offset); } /* write to the file */ diff --git a/ompi/mca/sharedfp/sharedfp.h b/ompi/mca/sharedfp/sharedfp.h index 824c674a3fe..1c370c00f3d 100644 --- a/ompi/mca/sharedfp/sharedfp.h +++ b/ompi/mca/sharedfp/sharedfp.h @@ -13,6 +13,8 @@ * Copyright (c) 2008-2013 University of Houston. All rights reserved. * Copyright (c) 2015 Los Alamos National Security, LLC. All rights * reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -70,7 +72,7 @@ struct ompi_file_t; * **************** component struct ******************************* */ -typedef int (*mca_sharedfp_base_component_init_query_1_0_0_fn_t) +typedef int (*mca_sharedfp_base_component_init_query_1_0_0_fn_t) (bool enable_progress_threads, bool enable_mpi_threads); @@ -120,28 +122,28 @@ typedef int (*mca_sharedfp_base_module_get_position_fn_t)( struct mca_io_ompio_file_t *fh, OMPI_MPI_OFFSET_TYPE * offset); typedef int (*mca_sharedfp_base_module_write_fn_t)( struct mca_io_ompio_file_t *fh, - void *buf, + const void *buf, int count, struct ompi_datatype_t *datatype, ompi_status_public_t *status); typedef int (*mca_sharedfp_base_module_write_ordered_fn_t)( struct mca_io_ompio_file_t *fh, - void *buf, + const void *buf, int count, struct ompi_datatype_t *datatype, ompi_status_public_t *status); typedef int (*mca_sharedfp_base_module_write_ordered_begin_fn_t)( struct mca_io_ompio_file_t *fh, - void *buf, + const void *buf, int count, struct ompi_datatype_t *datatype); typedef int (*mca_sharedfp_base_module_write_ordered_end_fn_t)( struct mca_io_ompio_file_t *fh, - void *buf, + const void *buf, ompi_status_public_t *status); typedef int (*mca_sharedfp_base_module_iwrite_fn_t)( struct mca_io_ompio_file_t *fh, - void *buf, + const void *buf, int count, struct ompi_datatype_t *datatype, ompi_request_t ** request); @@ -173,7 +175,7 @@ typedef int (*mca_sharedfp_base_module_read_ordered_end_fn_t)( void *buf, ompi_status_public_t *status); typedef int (*mca_sharedfp_base_module_file_open_fn_t)( - struct ompi_communicator_t *comm, char *filename, int amode, + struct ompi_communicator_t *comm, const char *filename, int amode, struct ompi_info_t *info, struct mca_io_ompio_file_t *fh); typedef int (*mca_sharedfp_base_module_file_close_fn_t)(struct mca_io_ompio_file_t *fh); diff --git a/ompi/mca/sharedfp/sm/Makefile.am b/ompi/mca/sharedfp/sm/Makefile.am index 8c5d21a9b52..2783a9ad679 100644 --- a/ompi/mca/sharedfp/sm/Makefile.am +++ b/ompi/mca/sharedfp/sm/Makefile.am @@ -5,15 +5,15 @@ # Copyright (c) 2004-2005 The University of Tennessee and The University # of Tennessee Research Foundation. All rights # reserved. -# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, # University of Stuttgart. All rights reserved. # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. # Copyright (c) 2008 University of Houston. All rights reserved. # $COPYRIGHT$ -# +# # Additional copyrights may follow -# +# # $HEADER$ # @@ -52,4 +52,4 @@ sources = \ sharedfp_sm_iwrite.c \ sharedfp_sm_read.c \ sharedfp_sm_iread.c \ - sharedfp_sm_file_open.c + sharedfp_sm_file_open.c diff --git a/ompi/mca/sharedfp/sm/configure.m4 b/ompi/mca/sharedfp/sm/configure.m4 new file mode 100644 index 00000000000..7224f15a93a --- /dev/null +++ b/ompi/mca/sharedfp/sm/configure.m4 @@ -0,0 +1,38 @@ +# -*- shell-script -*- +# +# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana +# University Research and Technology +# Corporation. All rights reserved. +# Copyright (c) 2004-2005 The University of Tennessee and The University +# of Tennessee Research Foundation. All rights +# reserved. +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# University of Stuttgart. All rights reserved. +# Copyright (c) 2004-2012 The Regents of the University of California. +# All rights reserved. +# Copyright (c) 2010-2014 Cisco Systems, Inc. All rights reserved. +# Copyright (c) 2008-2015 University of Houston. All rights reserved. +# $COPYRIGHT$ +# +# Additional copyrights may follow +# +# $HEADER$ +# + +# MCA_sharedfp_sm_CONFIG(action-if-can-compile, +# [action-if-cant-compile]) +# ------------------------------------------------ +AC_DEFUN([MCA_ompi_sharedfp_sm_CONFIG],[ + AC_CONFIG_FILES([ompi/mca/sharedfp/sm/Makefile]) + + sharedfp_sm_happy=no + AC_CHECK_HEADER([semaphore.h], + [AC_CHECK_FUNCS([sem_open],[sharedfp_sm_happy=yes],[])]) + + AC_CHECK_HEADER([semaphore.h], + [AC_CHECK_FUNCS([sem_init],[sharedfp_sm_happy=yes],[])]) + + AS_IF([test "$sharedfp_sm_happy" = "yes"], + [$1], + [$2]) +])dnl diff --git a/ompi/mca/sharedfp/sm/sharedfp_sm.c b/ompi/mca/sharedfp/sm/sharedfp_sm.c index bef74b72329..bc28839e6a7 100644 --- a/ompi/mca/sharedfp/sm/sharedfp_sm.c +++ b/ompi/mca/sharedfp/sm/sharedfp_sm.c @@ -5,15 +5,15 @@ * Copyright (c) 2004-2006 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2008-2013 University of Houston. All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ * * These symbols are in a file by themselves to provide nice linker @@ -63,9 +63,9 @@ int mca_sharedfp_sm_component_init_query(bool enable_progress_threads, bool enable_mpi_threads) { /* Nothing to do */ - + return OMPI_SUCCESS; -} +} struct mca_sharedfp_base_module_1_0_0_t * mca_sharedfp_sm_component_file_query(mca_io_ompio_file_t *fh, int *priority) { @@ -81,16 +81,16 @@ struct mca_sharedfp_base_module_1_0_0_t * mca_sharedfp_sm_component_file_query(m ** original test copied from mca/coll/sm/coll_sm_module.c: */ ompi_group_t *group = comm->c_local_group; - + for (i = 0; i < size; ++i) { - proc = ompi_group_peer_lookup(group,i); - if (!OPAL_PROC_ON_LOCAL_NODE(proc->super.proc_flags)){ - opal_output(ompi_sharedfp_base_framework.framework_output, - "mca_sharedfp_sm_component_file_query: Disqualifying myself: (%d/%s) " - "not all processes are on the same node.", - comm->c_contextid, comm->c_name); - return NULL; - } + proc = ompi_group_peer_lookup(group,i); + if (!OPAL_PROC_ON_LOCAL_NODE(proc->super.proc_flags)){ + opal_output(ompi_sharedfp_base_framework.framework_output, + "mca_sharedfp_sm_component_file_query: Disqualifying myself: (%d/%s) " + "not all processes are on the same node.", + comm->c_contextid, comm->c_name); + return NULL; + } } /* This module can run */ *priority = mca_sharedfp_sm_priority; @@ -98,9 +98,9 @@ struct mca_sharedfp_base_module_1_0_0_t * mca_sharedfp_sm_component_file_query(m } int mca_sharedfp_sm_component_file_unquery (mca_io_ompio_file_t *file) -{ +{ /* This function might be needed for some purposes later. for now it - * does not have anything to do since there are no steps which need + * does not have anything to do since there are no steps which need * to be undone if this module is not selected */ return OMPI_SUCCESS; @@ -111,7 +111,7 @@ int mca_sharedfp_sm_module_init (mca_io_ompio_file_t *file) return OMPI_SUCCESS; } - + int mca_sharedfp_sm_module_finalize (mca_io_ompio_file_t *file) { return OMPI_SUCCESS; diff --git a/ompi/mca/sharedfp/sm/sharedfp_sm.h b/ompi/mca/sharedfp/sm/sharedfp_sm.h index b308c0e33e0..4f62e2997b3 100644 --- a/ompi/mca/sharedfp/sm/sharedfp_sm.h +++ b/ompi/mca/sharedfp/sm/sharedfp_sm.h @@ -10,6 +10,9 @@ * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2008-2015 University of Houston. All rights reserved. + * Copyright (c) 2015 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -47,12 +50,12 @@ OMPI_MODULE_DECLSPEC extern mca_sharedfp_base_component_2_0_0_t mca_sharedfp_sm_ * ****************************************************************** */ /*IMPORANT: Update here when implementing functions from sharedfp API*/ -int mca_sharedfp_sm_seek (mca_io_ompio_file_t *fh, +int mca_sharedfp_sm_seek (mca_io_ompio_file_t *fh, OMPI_MPI_OFFSET_TYPE offset, int whence); int mca_sharedfp_sm_get_position (mca_io_ompio_file_t *fh, OMPI_MPI_OFFSET_TYPE * offset); int mca_sharedfp_sm_file_open (struct ompi_communicator_t *comm, - char* filename, + const char* filename, int amode, struct ompi_info_t *info, mca_io_ompio_file_t *fh); @@ -76,33 +79,32 @@ int mca_sharedfp_sm_iread (mca_io_ompio_file_t *fh, struct ompi_datatype_t *datatype, ompi_request_t **request); int mca_sharedfp_sm_write (mca_io_ompio_file_t *fh, - void *buf, + const void *buf, int count, struct ompi_datatype_t *datatype, ompi_status_public_t *status); int mca_sharedfp_sm_write_ordered (mca_io_ompio_file_t *fh, - void *buf, + const void *buf, int count, struct ompi_datatype_t *datatype, ompi_status_public_t *status); int mca_sharedfp_sm_write_ordered_begin (mca_io_ompio_file_t *fh, - void *buf, + const void *buf, int count, struct ompi_datatype_t *datatype); int mca_sharedfp_sm_write_ordered_end (mca_io_ompio_file_t *fh, - void *buf, + const void *buf, ompi_status_public_t *status); int mca_sharedfp_sm_iwrite (mca_io_ompio_file_t *fh, - void *buf, + const void *buf, int count, struct ompi_datatype_t *datatype, ompi_request_t **request); /*--------------------------------------------------------------* *Structures and definitions only for this component *--------------------------------------------------------------*/ - -struct sm_offset{ - sem_t *mutex; /* the mutex: a Posix memory-based unnamed semaphore */ +struct mca_sharedfp_sm_offset{ + sem_t mutex; /* the mutex: a POSIX memory-based unnamed semaphore */ long long offset; /* and the shared file pointer offset */ }; @@ -111,10 +113,13 @@ struct sm_offset{ */ struct mca_sharedfp_sm_data { - struct sm_offset * sm_offset_ptr; + struct mca_sharedfp_sm_offset * sm_offset_ptr; /*save filename so that we can remove the file on close*/ char * sm_filename; - sem_t *mutex; /* the mutex: a Posix memory-based named semaphore */ + /* The mutex: it will either point to a POSIX memory-based named + semaphore, or it will point to the a POSIX memory-based unnamed + semaphore located in sm_offset_ptr->mutex. */ + sem_t *mutex; char *sem_name; /* Name of the semaphore */ }; diff --git a/ompi/mca/sharedfp/sm/sharedfp_sm_component.c b/ompi/mca/sharedfp/sm/sharedfp_sm_component.c index 3f629da8a28..9abb8e387f2 100644 --- a/ompi/mca/sharedfp/sm/sharedfp_sm_component.c +++ b/ompi/mca/sharedfp/sm/sharedfp_sm_component.c @@ -6,17 +6,17 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. - * Copyright (c) 2013 University of Houston. All rights reserved. + * Copyright (c) 2013-2015 University of Houston. All rights reserved. * Copyright (c) 2015 Los Alamos National Security, LLC. All rights * reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ * * These symbols are in a file by themselves to provide nice linker @@ -38,7 +38,7 @@ const char *mca_sharedfp_sm_component_version_string = /* * Global variables */ -int mca_sharedfp_sm_priority=10; +int mca_sharedfp_sm_priority=30; int mca_sharedfp_sm_verbose=0; static int sm_register(void); @@ -72,7 +72,7 @@ mca_sharedfp_base_component_2_0_0_t mca_sharedfp_sm_component = { static int sm_register(void) { - mca_sharedfp_sm_priority = 10; + mca_sharedfp_sm_priority = 30; (void) mca_base_component_var_register(&mca_sharedfp_sm_component.sharedfpm_version, "priority", "Priority of the sm sharedfp component", MCA_BASE_VAR_TYPE_INT, NULL, 0, 0, diff --git a/ompi/mca/sharedfp/sm/sharedfp_sm_file_open.c b/ompi/mca/sharedfp/sm/sharedfp_sm_file_open.c index d6c15a92ad4..0c713b393d5 100644 --- a/ompi/mca/sharedfp/sm/sharedfp_sm_file_open.c +++ b/ompi/mca/sharedfp/sm/sharedfp_sm_file_open.c @@ -9,10 +9,11 @@ * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. - * Copyright (c) 2013-2015 University of Houston. All rights reserved. + * Copyright (c) 2013-2017 University of Houston. All rights reserved. * Copyright (c) 2013 Intel, Inc. All rights reserved. * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. + * Copyright (c) 2015 Cisco Systems, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -34,6 +35,8 @@ #include "mpi.h" #include "ompi/constants.h" +#include "ompi/group/group.h" +#include "ompi/proc/proc.h" #include "ompi/mca/sharedfp/sharedfp.h" #include "ompi/mca/sharedfp/base/base.h" @@ -43,7 +46,7 @@ int mca_sharedfp_sm_file_open (struct ompi_communicator_t *comm, - char* filename, + const char* filename, int amode, struct ompi_info_t *info, mca_io_ompio_file_t *fh) @@ -51,11 +54,12 @@ int mca_sharedfp_sm_file_open (struct ompi_communicator_t *comm, int err = OMPI_SUCCESS; struct mca_sharedfp_base_data_t* sh; struct mca_sharedfp_sm_data * sm_data = NULL; - mca_io_ompio_file_t * shfileHandle; + mca_io_ompio_file_t * shfileHandle, *ompio_fh; char * filename_basename; char * sm_filename; - struct sm_offset * sm_offset_ptr; - struct sm_offset sm_offset; + struct mca_sharedfp_sm_offset * sm_offset_ptr; + struct mca_sharedfp_sm_offset sm_offset; + mca_io_ompio_data_t *data; int sm_fd; int rank; @@ -63,38 +67,55 @@ int mca_sharedfp_sm_file_open (struct ompi_communicator_t *comm, /*Open the same file again without shared file pointer*/ /*----------------------------------------------------*/ shfileHandle = (mca_io_ompio_file_t *)malloc(sizeof(mca_io_ompio_file_t)); + if ( NULL == shfileHandle ) { + opal_output(0, "mca_sharedfp_sm_file_open: Error during memory allocation\n"); + return OMPI_ERR_OUT_OF_RESOURCE; + } err = ompio_io_ompio_file_open(comm,filename,amode,info,shfileHandle,false); if ( OMPI_SUCCESS != err) { opal_output(0, "mca_sharedfp_sm_file_open: Error during file open\n"); + free (shfileHandle); return err; } + shfileHandle->f_fh = fh->f_fh; + data = (mca_io_ompio_data_t *) fh->f_fh->f_io_selected_data; + ompio_fh = &data->ompio_fh; + + err = mca_io_ompio_set_view_internal (shfileHandle, + ompio_fh->f_disp, + ompio_fh->f_etype, + ompio_fh->f_orig_filetype, + ompio_fh->f_datarep, + MPI_INFO_NULL); /*Memory is allocated here for the sh structure*/ if ( mca_sharedfp_sm_verbose ) { - printf( "mca_sharedfp_sm_file_open: malloc f_sharedfp_ptr struct\n"); + opal_output(ompi_sharedfp_base_framework.framework_output, + "mca_sharedfp_sm_file_open: malloc f_sharedfp_ptr struct\n"); } sh = (struct mca_sharedfp_base_data_t*)malloc(sizeof(struct mca_sharedfp_base_data_t)); if ( NULL == sh ) { - opal_output(0, "mca_sharedfp_sm_file_open: Error, unable to malloc f_sharedfp_ptr struct\n"); - free(shfileHandle); - return OMPI_ERR_OUT_OF_RESOURCE; + opal_output(0, "mca_sharedfp_sm_file_open: Error, unable to malloc f_sharedfp_ptr struct\n"); + free(shfileHandle); + return OMPI_ERR_OUT_OF_RESOURCE; } /*Populate the sh file structure based on the implementation*/ - sh->sharedfh = shfileHandle; /* Shared file pointer*/ - sh->global_offset = 0; /* Global Offset*/ - sh->comm = comm; /* Communicator*/ + sh->sharedfh = shfileHandle; /* Shared file pointer*/ + sh->global_offset = 0; /* Global Offset*/ + sh->comm = comm; /* Communicator*/ sh->selected_module_data = NULL; rank = ompi_comm_rank ( sh->comm ); /*Open a shared memory segment which will hold the shared file pointer*/ if ( mca_sharedfp_sm_verbose ) { - printf( "mca_sharedfp_sm_file_open: allocatge shared memory segment.\n"); + opal_output(ompi_sharedfp_base_framework.framework_output, + "mca_sharedfp_sm_file_open: allocatge shared memory segment.\n"); } - + sm_data = (struct mca_sharedfp_sm_data*) malloc ( sizeof(struct mca_sharedfp_sm_data)); if ( NULL == sm_data ){ opal_output(0, "mca_sharedfp_sm_file_open: Error, unable to malloc sm_data struct\n"); @@ -112,7 +133,7 @@ int mca_sharedfp_sm_file_open (struct ompi_communicator_t *comm, ** overwriting each other, e.g. orte_process_info.proc_session_dir */ /*sprintf(sm_filename,"%s%s",filename,".sm");*/ - filename_basename = basename(filename); + filename_basename = basename((void *)filename); sm_filename = (char*) malloc( sizeof(char) * (strlen(filename_basename)+64) ); if (NULL == sm_filename) { free(sm_data); @@ -120,88 +141,96 @@ int mca_sharedfp_sm_file_open (struct ompi_communicator_t *comm, free(shfileHandle); return OMPI_ERR_OUT_OF_RESOURCE; } - sprintf(sm_filename,"/tmp/OMPIO_sharedfp_sm_%s%s",filename_basename,".sm"); + opal_jobid_t masterjobid; + if ( 0 == comm->c_my_rank ) { + ompi_proc_t *masterproc = ompi_group_peer_lookup(comm->c_local_group, 0 ); + masterjobid = OMPI_CAST_RTE_NAME(&masterproc->super.proc_name)->jobid; + } + comm->c_coll.coll_bcast ( &masterjobid, 1, MPI_UNSIGNED, 0, comm, + comm->c_coll.coll_bcast_module ); + + sprintf(sm_filename,"/tmp/OMPIO_%s_%d_%s",filename_basename, masterjobid, ".sm"); /* open shared memory file, initialize to 0, map into memory */ sm_fd = open(sm_filename, O_RDWR | O_CREAT, S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH); if ( sm_fd == -1){ /*error opening file*/ - printf("mca_sharedfp_sm_file_open: Error, unable to open file for mmap: %s\n",sm_filename); + opal_output(0,"mca_sharedfp_sm_file_open: Error, unable to open file for mmap: %s\n",sm_filename); free(sm_filename); free(sm_data); free(sh); free(shfileHandle); - return OMPI_ERROR; + return OMPI_ERROR; } - free(sm_filename); sm_data->sm_filename = sm_filename; - + /*TODO: is it necessary to write to the file first?*/ if( 0 == rank ){ - memset ( &sm_offset, 0, sizeof (struct sm_offset )); - write ( sm_fd, &sm_offset, sizeof(struct sm_offset)); + memset ( &sm_offset, 0, sizeof (struct mca_sharedfp_sm_offset )); + write ( sm_fd, &sm_offset, sizeof(struct mca_sharedfp_sm_offset)); } comm->c_coll.coll_barrier (comm, comm->c_coll.coll_barrier_module ); - + /*the file has been written to, now we can map*/ - sm_offset_ptr = mmap(NULL, sizeof(struct sm_offset), PROT_READ | PROT_WRITE, - MAP_SHARED, sm_fd, 0); - + sm_offset_ptr = mmap(NULL, sizeof(struct mca_sharedfp_sm_offset), PROT_READ | PROT_WRITE, + MAP_SHARED, sm_fd, 0); + close(sm_fd); - + if ( sm_offset_ptr==MAP_FAILED){ - err = OMPI_ERROR; - printf("mca_sharedfp_sm_file_open: Error, unable to mmap file: %s\n",sm_filename); - printf("%s\n", strerror(errno)); + err = OMPI_ERROR; + opal_output(0, "mca_sharedfp_sm_file_open: Error, unable to mmap file: %s\n",sm_filename); + opal_output(0, "%s\n", strerror(errno)); free(sm_filename); free(sm_data); free(sh); free(shfileHandle); - return OMPI_ERROR; + return OMPI_ERROR; } /* Initialize semaphore so that is shared between processes. */ /* the semaphore is shared by keeping it in the shared memory segment */ -#ifdef OMPIO_SHAREDFP_USE_UNNAMED_SEMAPHORES - if(sem_init(&sm_offset_ptr->mutex, 1, 1) != -1){ +#if defined(HAVE_SEM_OPEN) + +#if defined (__APPLE__) + sm_data->sem_name = (char*) malloc( sizeof(char) * 32); + snprintf(sm_data->sem_name,31,"OMPIO_%s",filename_basename); #else - sm_data->sem_name = (char*) malloc( sizeof(char) * (strlen(filename_basename)+32) ); - sprintf(sm_data->sem_name,"OMPIO_sharedfp_sem_%s",filename_basename); + sm_data->sem_name = (char*) malloc( sizeof(char) * 253); + snprintf(sm_data->sem_name,252,"OMPIO_%s",filename_basename); +#endif if( (sm_data->mutex = sem_open(sm_data->sem_name, O_CREAT, 0644, 1)) != SEM_FAILED ) { +#elif defined(HAVE_SEM_INIT) + sm_data->mutex = &sm_offset_ptr->mutex; + if(sem_init(&sm_offset_ptr->mutex, 1, 1) != -1){ #endif - /*If opening was successful*/ - /*Store the new file handle*/ - sm_data->sm_offset_ptr = sm_offset_ptr; - /* Assign the sm_data to sh->selected_module_data*/ - sh->selected_module_data = sm_data; - /*remember the shared file handle*/ - fh->f_sharedfp_data = sh; - - /*write initial zero*/ - if(rank==0){ - MPI_Offset position=0; - -#ifdef OMPIO_SHAREDFP_USE_UNNAMED_SEMAPHORES - sem_wait(sm_offset_ptr->mutex); - sm_offset_ptr->offset=position; - sem_post(sm_offset_ptr->mutex); -#else - sem_wait(sm_data->mutex); - sm_offset_ptr->offset=position; - sem_post(sm_data->mutex); -#endif - } + /*If opening was successful*/ + /*Store the new file handle*/ + sm_data->sm_offset_ptr = sm_offset_ptr; + /* Assign the sm_data to sh->selected_module_data*/ + sh->selected_module_data = sm_data; + /*remember the shared file handle*/ + fh->f_sharedfp_data = sh; + + /*write initial zero*/ + if(rank==0){ + MPI_Offset position=0; + + sem_wait(sm_data->mutex); + sm_offset_ptr->offset=position; + sem_post(sm_data->mutex); + } }else{ free(sm_filename); - free(sm_data); - free(sh); - free(shfileHandle); - munmap(sm_offset_ptr, sizeof(struct sm_offset)); - err = OMPI_ERROR; + free(sm_data); + free(sh); + free(shfileHandle); + munmap(sm_offset_ptr, sizeof(struct mca_sharedfp_sm_offset)); + err = OMPI_ERROR; } comm->c_coll.coll_barrier (comm, comm->c_coll.coll_barrier_module ); @@ -218,9 +247,10 @@ int mca_sharedfp_sm_file_close (mca_io_ompio_file_t *fh) struct mca_sharedfp_sm_data * file_data=NULL; if( NULL == fh->f_sharedfp_data ){ - if ( mca_sharedfp_sm_verbose ) { - printf("sharedfp_sm_file_close: shared file pointer structure not initialized\n"); - } + if ( mca_sharedfp_sm_verbose ) { + opal_output(ompi_sharedfp_base_framework.framework_output, + "sharedfp_sm_file_close: shared file pointer structure not initialized\n"); + } return OMPI_SUCCESS; } sh = fh->f_sharedfp_data; @@ -236,14 +266,14 @@ int mca_sharedfp_sm_file_close (mca_io_ompio_file_t *fh) /*Close sm handle*/ if (file_data->sm_offset_ptr) { /* destroy semaphore */ -#ifdef OMPIO_SHAREDFP_USE_UNNAMED_SEMAPHORES - sem_destroy(file_data->sm_offset_ptr->mutex); -#else - sem_unlink (file_data->sem_name); - free (file_data->sem_name); +#if defined(HAVE_SEM_OPEN) + sem_unlink (file_data->sem_name); + free (file_data->sem_name); +#elif defined(HAVE_SEM_INIT) + sem_destroy(&file_data->sm_offset_ptr->mutex); #endif /*Release the shared memory segment.*/ - munmap(file_data->sm_offset_ptr,sizeof(struct sm_offset)); + munmap(file_data->sm_offset_ptr,sizeof(struct mca_sharedfp_sm_offset)); /*Q: Do we need to delete the file? */ remove(file_data->sm_filename); } diff --git a/ompi/mca/sharedfp/sm/sharedfp_sm_get_position.c b/ompi/mca/sharedfp/sm/sharedfp_sm_get_position.c index 0ff66c5d5ff..35e8b609bae 100644 --- a/ompi/mca/sharedfp/sm/sharedfp_sm_get_position.c +++ b/ompi/mca/sharedfp/sm/sharedfp_sm_get_position.c @@ -37,7 +37,7 @@ mca_sharedfp_sm_get_position(mca_io_ompio_file_t *fh, if(fh->f_sharedfp_data==NULL){ opal_output(ompi_sharedfp_base_framework.framework_output, - "sharedfp_sm_write - opening the shared file pointer\n"); + "sharedfp_sm_write - opening the shared file pointer\n"); shared_fp_base_module = fh->f_sharedfp; ret = shared_fp_base_module->sharedfp_file_open(fh->f_comm, diff --git a/ompi/mca/sharedfp/sm/sharedfp_sm_iread.c b/ompi/mca/sharedfp/sm/sharedfp_sm_iread.c index 9d04a4e5fdc..17652cfa87b 100644 --- a/ompi/mca/sharedfp/sm/sharedfp_sm_iread.c +++ b/ompi/mca/sharedfp/sm/sharedfp_sm_iread.c @@ -9,7 +9,7 @@ * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. - * Copyright (c) 2013 University of Houston. All rights reserved. + * Copyright (c) 2013-2015 University of Houston. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -24,12 +24,13 @@ #include "mpi.h" #include "ompi/constants.h" #include "ompi/mca/sharedfp/sharedfp.h" +#include "ompi/mca/sharedfp/base/base.h" int mca_sharedfp_sm_iread(mca_io_ompio_file_t *fh, - void *buf, - int count, - ompi_datatype_t *datatype, - MPI_Request * request) + void *buf, + int count, + ompi_datatype_t *datatype, + MPI_Request * request) { int ret = OMPI_SUCCESS; OMPI_MPI_OFFSET_TYPE offset = 0; @@ -39,9 +40,10 @@ int mca_sharedfp_sm_iread(mca_io_ompio_file_t *fh, mca_sharedfp_base_module_t * shared_fp_base_module = NULL; if( NULL == fh->f_sharedfp_data){ - if ( mca_sharedfp_sm_verbose ) { - printf("sharedfp_sm_iread: opening the shared file pointer\n"); - } + if ( mca_sharedfp_sm_verbose ) { + opal_output(ompi_sharedfp_base_framework.framework_output, + "sharedfp_sm_iread: opening the shared file pointer\n"); + } shared_fp_base_module = fh->f_sharedfp; ret = shared_fp_base_module->sharedfp_file_open(fh->f_comm, @@ -63,15 +65,18 @@ int mca_sharedfp_sm_iread(mca_io_ompio_file_t *fh, sh = fh->f_sharedfp_data; if ( mca_sharedfp_sm_verbose ) { - printf("sharedfp_sm_iread: Bytes Requested is %ld\n",bytesRequested); + opal_output(ompi_sharedfp_base_framework.framework_output, + "sharedfp_sm_iread: Bytes Requested is %ld\n",bytesRequested); } /*Request the offset to write bytesRequested bytes*/ ret = mca_sharedfp_sm_request_position(sh,bytesRequested,&offset); + offset /= sh->sharedfh->f_etype_size; if ( -1 != ret ) { - if ( mca_sharedfp_sm_verbose ) { - printf("sharedfp_sm_iread: Offset received is %lld\n",offset); - } + if ( mca_sharedfp_sm_verbose ) { + opal_output(ompi_sharedfp_base_framework.framework_output, + "sharedfp_sm_iread: Offset received is %lld\n",offset); + } /* Read the file */ ret = ompio_io_ompio_file_iread_at(sh->sharedfh,offset,buf,count,datatype,request); } @@ -84,15 +89,140 @@ int mca_sharedfp_sm_read_ordered_begin(mca_io_ompio_file_t *fh, int count, struct ompi_datatype_t *datatype) { - opal_output(0,"mca_sharedfp_sm_read_ordered_begin: NOT IMPLEMENTED\n"); - return OMPI_ERROR; + int ret = OMPI_SUCCESS; + OMPI_MPI_OFFSET_TYPE offset = 0; + long sendBuff = 0; + long *buff=NULL; + long offsetBuff; + OMPI_MPI_OFFSET_TYPE offsetReceived = 0; + long bytesRequested = 0; + int recvcnt = 1, sendcnt = 1; + size_t numofBytes; + int rank, size, i; + struct mca_sharedfp_base_data_t *sh = NULL; + mca_sharedfp_base_module_t * shared_fp_base_module = NULL; + + if ( NULL == fh->f_sharedfp_data){ + if ( mca_sharedfp_sm_verbose ) { + opal_output(ompi_sharedfp_base_framework.framework_output, + "sharedfp_sm_read_ordered_begin: opening the shared file pointer\n"); + } + shared_fp_base_module = fh->f_sharedfp; + + ret = shared_fp_base_module->sharedfp_file_open(fh->f_comm, + fh->f_filename, + fh->f_amode, + fh->f_info, + fh); + if ( OMPI_SUCCESS != ret ) { + opal_output(0,"sharedfp_sm_read_ordered_begin - error opening the shared file pointer\n"); + return ret; + } + } + if ( true == fh->f_split_coll_in_use ) { + opal_output(0,"Only one split collective I/O operation allowed per file handle at any given point in time!\n"); + return MPI_ERR_REQUEST; + } + + + + /*Retrieve the new communicator*/ + sh = fh->f_sharedfp_data; + + /* Calculate the number of bytes to read*/ + opal_datatype_type_size ( &datatype->super, &numofBytes); + sendBuff = count * numofBytes; + + /* Get the ranks in the communicator */ + rank = ompi_comm_rank ( sh->comm ); + size = ompi_comm_size ( sh->comm ); + + if ( 0 == rank ) { + buff = (long*)malloc(sizeof(long) * size); + if ( NULL == buff ) + return OMPI_ERR_OUT_OF_RESOURCE; + } + + ret = sh->comm->c_coll.coll_gather ( &sendBuff, sendcnt, OMPI_OFFSET_DATATYPE, + buff, recvcnt, OMPI_OFFSET_DATATYPE, 0, + sh->comm, sh->comm->c_coll.coll_gather_module ); + if( OMPI_SUCCESS != ret){ + goto exit; + } + + /* All the counts are present now in the recvBuff. + ** The size of recvBuff is sizeof_newComm + */ + if ( 0 == rank ) { + for (i = 0; i < size ; i ++) { + bytesRequested += buff[i]; + if ( mca_sharedfp_sm_verbose ) { + opal_output(ompi_sharedfp_base_framework.framework_output, + "mca_sharedfp_sm_read_ordered_begin: Bytes requested are %ld\n", + bytesRequested); + } + } + + /* Request the offset to read bytesRequested bytes + ** only the root process needs to do the request, + ** since the root process will then tell the other + ** processes at what offset they should read their + ** share of the data. + */ + ret = mca_sharedfp_sm_request_position(sh,bytesRequested,&offsetReceived); + if( OMPI_SUCCESS != ret){ + goto exit; + } + if ( mca_sharedfp_sm_verbose ) { + opal_output(ompi_sharedfp_base_framework.framework_output, + "mca_sharedfp_sm_read_ordered_begin: Offset received is %lld\n",offsetReceived); + } + + buff[0] += offsetReceived; + for (i = 1 ; i < size; i++) { + buff[i] += buff[i-1]; + } + } + + /* Scatter the results to the other processes*/ + ret = sh->comm->c_coll.coll_scatter ( buff, sendcnt, OMPI_OFFSET_DATATYPE, + &offsetBuff, recvcnt, OMPI_OFFSET_DATATYPE, 0, + sh->comm, sh->comm->c_coll.coll_scatter_module ); + if( OMPI_SUCCESS != ret){ + goto exit; + } + + /*Each process now has its own individual offset in recvBUFF*/ + offset = offsetBuff - sendBuff; + offset /= sh->sharedfh->f_etype_size; + + if ( mca_sharedfp_sm_verbose ) { + opal_output(ompi_sharedfp_base_framework.framework_output, + "mca_sharedfp_sm_read_ordered_begin: Offset returned is %lld\n",offset); + } + + /* read to the file */ + ret = ompio_io_ompio_file_iread_at_all(sh->sharedfh,offset,buf,count,datatype, + &fh->f_split_coll_req); + fh->f_split_coll_in_use = true; + +exit: + if ( NULL != buff ) { + free ( buff ); + } + + return ret; } int mca_sharedfp_sm_read_ordered_end(mca_io_ompio_file_t *fh, - void *buf, - ompi_status_public_t *status) + void *buf, + ompi_status_public_t *status) { - opal_output(0,"mca_sharedfp_sm_read_ordered_end: NOT IMPLEMENTED\n"); - return OMPI_ERROR; + int ret = OMPI_SUCCESS; + ret = ompi_request_wait ( &fh->f_split_coll_req, status ); + + /* remove the flag again */ + fh->f_split_coll_in_use = false; + return ret; } diff --git a/ompi/mca/sharedfp/sm/sharedfp_sm_iwrite.c b/ompi/mca/sharedfp/sm/sharedfp_sm_iwrite.c index 81eae8b8610..6527c888c7c 100644 --- a/ompi/mca/sharedfp/sm/sharedfp_sm_iwrite.c +++ b/ompi/mca/sharedfp/sm/sharedfp_sm_iwrite.c @@ -10,6 +10,8 @@ * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2013 University of Houston. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -24,9 +26,10 @@ #include "mpi.h" #include "ompi/constants.h" #include "ompi/mca/sharedfp/sharedfp.h" +#include "ompi/mca/sharedfp/base/base.h" int mca_sharedfp_sm_iwrite(mca_io_ompio_file_t *fh, - void *buf, + const void *buf, int count, ompi_datatype_t *datatype, MPI_Request * request) @@ -39,20 +42,21 @@ int mca_sharedfp_sm_iwrite(mca_io_ompio_file_t *fh, mca_sharedfp_base_module_t * shared_fp_base_module = NULL; if( NULL == fh->f_sharedfp_data){ - if ( mca_sharedfp_sm_verbose ) { - printf("sharedfp_sm_iwrite - opening the shared file pointer\n"); - } - shared_fp_base_module = fh->f_sharedfp; - - ret = shared_fp_base_module->sharedfp_file_open(fh->f_comm, - fh->f_filename, - fh->f_amode, - fh->f_info, - fh); - if ( OMPI_SUCCESS != ret ) { - opal_output(0,"sharedfp_sm_iwrite - error opening the shared file pointer\n"); - return ret; - } + if ( mca_sharedfp_sm_verbose ) { + opal_output(ompi_sharedfp_base_framework.framework_output, + "sharedfp_sm_iwrite - opening the shared file pointer\n"); + } + shared_fp_base_module = fh->f_sharedfp; + + ret = shared_fp_base_module->sharedfp_file_open(fh->f_comm, + fh->f_filename, + fh->f_amode, + fh->f_info, + fh); + if ( OMPI_SUCCESS != ret ) { + opal_output(0,"sharedfp_sm_iwrite - error opening the shared file pointer\n"); + return ret; + } } /* Calculate the number of bytes to write */ @@ -63,15 +67,18 @@ int mca_sharedfp_sm_iwrite(mca_io_ompio_file_t *fh, sh = fh->f_sharedfp_data; if ( mca_sharedfp_sm_verbose ) { - printf("sharedfp_sm_iwrite: Bytes Requested is %ld\n",bytesRequested); + opal_output(ompi_sharedfp_base_framework.framework_output, + "sharedfp_sm_iwrite: Bytes Requested is %ld\n",bytesRequested); } /* Request the offset to write bytesRequested bytes */ ret = mca_sharedfp_sm_request_position(sh,bytesRequested,&offset); + offset /= sh->sharedfh->f_etype_size; - if ( -1 != ret ) { - if ( mca_sharedfp_sm_verbose ) { - printf("sharedfp_sm_iwrite: Offset received is %lld\n",offset); - } + if ( -1 != ret ) { + if ( mca_sharedfp_sm_verbose ) { + opal_output(ompi_sharedfp_base_framework.framework_output, + "sharedfp_sm_iwrite: Offset received is %lld\n",offset); + } /* Write to the file */ ret = ompio_io_ompio_file_iwrite_at(sh->sharedfh,offset,buf,count,datatype,request); } @@ -81,19 +88,144 @@ int mca_sharedfp_sm_iwrite(mca_io_ompio_file_t *fh, } int mca_sharedfp_sm_write_ordered_begin(mca_io_ompio_file_t *fh, - void *buf, + const void *buf, int count, struct ompi_datatype_t *datatype) { - opal_output(0,"mca_sharedfp_sm_write_ordered_begin: NOT IMPLEMENTED\n"); - return OMPI_ERROR; + int ret = OMPI_SUCCESS; + OMPI_MPI_OFFSET_TYPE offset = 0; + long sendBuff = 0; + long *buff=NULL; + long offsetBuff; + OMPI_MPI_OFFSET_TYPE offsetReceived = 0; + long bytesRequested = 0; + int recvcnt = 1, sendcnt = 1; + size_t numofBytes; + int rank, size, i; + struct mca_sharedfp_base_data_t *sh = NULL; + mca_sharedfp_base_module_t * shared_fp_base_module = NULL; + + if ( NULL == fh->f_sharedfp_data){ + if ( mca_sharedfp_sm_verbose ) { + opal_output(ompi_sharedfp_base_framework.framework_output, + "sharedfp_sm_write_ordered_begin: opening the shared file pointer\n"); + } + shared_fp_base_module = fh->f_sharedfp; + + ret = shared_fp_base_module->sharedfp_file_open(fh->f_comm, + fh->f_filename, + fh->f_amode, + fh->f_info, + fh); + if ( OMPI_SUCCESS != ret ) { + opal_output(0,"sharedfp_sm_write_ordered_begin - error opening the shared file pointer\n"); + return ret; + } + } + if ( true == fh->f_split_coll_in_use ) { + opal_output(0, "Only one split collective I/O operation allowed per file handle at any given point in time!\n"); + return MPI_ERR_REQUEST; + } + + + + /*Retrieve the new communicator*/ + sh = fh->f_sharedfp_data; + + /* Calculate the number of bytes to read*/ + opal_datatype_type_size ( &datatype->super, &numofBytes); + sendBuff = count * numofBytes; + + /* Get the ranks in the communicator */ + rank = ompi_comm_rank ( sh->comm ); + size = ompi_comm_size ( sh->comm ); + + if ( 0 == rank ) { + buff = (long*)malloc(sizeof(long) * size); + if ( NULL == buff ) + return OMPI_ERR_OUT_OF_RESOURCE; + } + + ret = sh->comm->c_coll.coll_gather ( &sendBuff, sendcnt, OMPI_OFFSET_DATATYPE, + buff, recvcnt, OMPI_OFFSET_DATATYPE, 0, + sh->comm, sh->comm->c_coll.coll_gather_module ); + if( OMPI_SUCCESS != ret){ + goto exit; + } + + /* All the counts are present now in the recvBuff. + ** The size of recvBuff is sizeof_newComm + */ + if ( 0 == rank ) { + for (i = 0; i < size ; i ++) { + bytesRequested += buff[i]; + if ( mca_sharedfp_sm_verbose ) { + opal_output(ompi_sharedfp_base_framework.framework_output, + "mca_sharedfp_sm_write_ordered_begin: Bytes requested are %ld\n", + bytesRequested); + } + } + + /* Request the offset to read bytesRequested bytes + ** only the root process needs to do the request, + ** since the root process will then tell the other + ** processes at what offset they should read their + ** share of the data. + */ + ret = mca_sharedfp_sm_request_position(sh,bytesRequested,&offsetReceived); + if( OMPI_SUCCESS != ret){ + goto exit; + } + if ( mca_sharedfp_sm_verbose ) { + opal_output(ompi_sharedfp_base_framework.framework_output, + "mca_sharedfp_sm_write_ordered_begin: Offset received is %lld\n",offsetReceived); + } + + buff[0] += offsetReceived; + for (i = 1 ; i < size; i++) { + buff[i] += buff[i-1]; + } + } + + /* Scatter the results to the other processes*/ + ret = sh->comm->c_coll.coll_scatter ( buff, sendcnt, OMPI_OFFSET_DATATYPE, + &offsetBuff, recvcnt, OMPI_OFFSET_DATATYPE, 0, + sh->comm, sh->comm->c_coll.coll_scatter_module ); + if( OMPI_SUCCESS != ret){ + goto exit; + } + + /*Each process now has its own individual offset in recvBUFF*/ + offset = offsetBuff - sendBuff; + offset /= sh->sharedfh->f_etype_size; + + if ( mca_sharedfp_sm_verbose ) { + opal_output(ompi_sharedfp_base_framework.framework_output, + "mca_sharedfp_sm_write_ordered_begin: Offset returned is %lld\n",offset); + } + + /* read to the file */ + ret = ompio_io_ompio_file_iwrite_at_all(sh->sharedfh,offset,buf,count,datatype, + &fh->f_split_coll_req); + fh->f_split_coll_in_use = true; + +exit: + if ( NULL != buff ) { + free ( buff ); + } + + return ret; } int mca_sharedfp_sm_write_ordered_end(mca_io_ompio_file_t *fh, - void *buf, + const void *buf, ompi_status_public_t *status) { - opal_output(0,"mca_sharedfp_sm_write_ordered_end: NOT IMPLEMENTED\n"); - return OMPI_ERROR; + int ret = OMPI_SUCCESS; + ret = ompi_request_wait ( &fh->f_split_coll_req, status ); + + /* remove the flag again */ + fh->f_split_coll_in_use = false; + return ret; } diff --git a/ompi/mca/sharedfp/sm/sharedfp_sm_read.c b/ompi/mca/sharedfp/sm/sharedfp_sm_read.c index 04cc34e69e9..8f2cd77426a 100644 --- a/ompi/mca/sharedfp/sm/sharedfp_sm_read.c +++ b/ompi/mca/sharedfp/sm/sharedfp_sm_read.c @@ -9,7 +9,7 @@ * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. - * Copyright (c) 2013 University of Houston. All rights reserved. + * Copyright (c) 2013-2015 University of Houston. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -24,6 +24,7 @@ #include "mpi.h" #include "ompi/constants.h" #include "ompi/mca/sharedfp/sharedfp.h" +#include "ompi/mca/sharedfp/base/base.h" int mca_sharedfp_sm_read ( mca_io_ompio_file_t *fh, void *buf, int count, MPI_Datatype datatype, MPI_Status *status) @@ -36,9 +37,10 @@ int mca_sharedfp_sm_read ( mca_io_ompio_file_t *fh, mca_sharedfp_base_module_t * shared_fp_base_module = NULL; if( NULL == fh->f_sharedfp_data){ - if ( mca_sharedfp_sm_verbose ) { - printf("sharedfp_sm_read - opening the shared file pointer\n"); - } + if ( mca_sharedfp_sm_verbose ) { + opal_output(ompi_sharedfp_base_framework.framework_output, + "sharedfp_sm_read - opening the shared file pointer\n"); + } shared_fp_base_module = fh->f_sharedfp; ret = shared_fp_base_module->sharedfp_file_open(fh->f_comm, @@ -60,16 +62,19 @@ int mca_sharedfp_sm_read ( mca_io_ompio_file_t *fh, sh = fh->f_sharedfp_data; if ( mca_sharedfp_sm_verbose ) { - printf("sharedfp_sm_read: Bytes Requested is %ld\n",bytesRequested); + opal_output(ompi_sharedfp_base_framework.framework_output, + "sharedfp_sm_read: Bytes Requested is %ld\n",bytesRequested); } /*Request the offset to write bytesRequested bytes*/ ret = mca_sharedfp_sm_request_position(sh,bytesRequested,&offset); + offset /= sh->sharedfh->f_etype_size; if ( -1 != ret ) { - if ( mca_sharedfp_sm_verbose ) { - printf("sharedfp_sm_read: Offset received is %lld\n",offset); - } + if ( mca_sharedfp_sm_verbose ) { + opal_output(ompi_sharedfp_base_framework.framework_output, + "sharedfp_sm_read: Offset received is %lld\n",offset); + } /* Read the file */ ret = ompio_io_ompio_file_read_at(sh->sharedfh,offset,buf,count,datatype,status); @@ -98,9 +103,10 @@ int mca_sharedfp_sm_read_ordered (mca_io_ompio_file_t *fh, mca_sharedfp_base_module_t * shared_fp_base_module = NULL; if ( NULL == fh->f_sharedfp_data){ - if ( mca_sharedfp_sm_verbose ) { - printf("sharedfp_sm_read_ordered: opening the shared file pointer\n"); - } + if ( mca_sharedfp_sm_verbose ) { + opal_output(ompi_sharedfp_base_framework.framework_output, + "sharedfp_sm_read_ordered: opening the shared file pointer\n"); + } shared_fp_base_module = fh->f_sharedfp; ret = shared_fp_base_module->sharedfp_file_open(fh->f_comm, @@ -132,10 +138,10 @@ int mca_sharedfp_sm_read_ordered (mca_io_ompio_file_t *fh, } ret = sh->comm->c_coll.coll_gather ( &sendBuff, sendcnt, OMPI_OFFSET_DATATYPE, - buff, recvcnt, OMPI_OFFSET_DATATYPE, 0, - sh->comm, sh->comm->c_coll.coll_gather_module ); + buff, recvcnt, OMPI_OFFSET_DATATYPE, 0, + sh->comm, sh->comm->c_coll.coll_gather_module ); if( OMPI_SUCCESS != ret){ - goto exit; + goto exit; } /* All the counts are present now in the recvBuff. @@ -143,25 +149,27 @@ int mca_sharedfp_sm_read_ordered (mca_io_ompio_file_t *fh, */ if ( 0 == rank ) { for (i = 0; i < size ; i ++) { - bytesRequested += buff[i]; - if ( mca_sharedfp_sm_verbose ) { - printf("mca_sharedfp_sm_read_ordered: Bytes requested are %ld\n",bytesRequested); - } + bytesRequested += buff[i]; + if ( mca_sharedfp_sm_verbose ) { + opal_output(ompi_sharedfp_base_framework.framework_output, + "mca_sharedfp_sm_read_ordered: Bytes requested are %ld\n",bytesRequested); + } } /* Request the offset to read bytesRequested bytes - ** only the root process needs to do the request, - ** since the root process will then tell the other - ** processes at what offset they should read their - ** share of the data. - */ + ** only the root process needs to do the request, + ** since the root process will then tell the other + ** processes at what offset they should read their + ** share of the data. + */ ret = mca_sharedfp_sm_request_position(sh,bytesRequested,&offsetReceived); if( OMPI_SUCCESS != ret){ - goto exit; + goto exit; + } + if ( mca_sharedfp_sm_verbose ) { + opal_output(ompi_sharedfp_base_framework.framework_output, + "mca_sharedfp_sm_read_ordered: Offset received is %lld\n",offsetReceived); } - if ( mca_sharedfp_sm_verbose ) { - printf("mca_sharedfp_sm_read_ordered: Offset received is %lld\n",offsetReceived); - } buff[0] += offsetReceived; for (i = 1 ; i < size; i++) { @@ -171,16 +179,19 @@ int mca_sharedfp_sm_read_ordered (mca_io_ompio_file_t *fh, /* Scatter the results to the other processes*/ ret = sh->comm->c_coll.coll_scatter ( buff, sendcnt, OMPI_OFFSET_DATATYPE, - &offsetBuff, recvcnt, OMPI_OFFSET_DATATYPE, 0, - sh->comm, sh->comm->c_coll.coll_scatter_module ); + &offsetBuff, recvcnt, OMPI_OFFSET_DATATYPE, 0, + sh->comm, sh->comm->c_coll.coll_scatter_module ); if( OMPI_SUCCESS != ret){ - goto exit; + goto exit; } /*Each process now has its own individual offset in recvBUFF*/ offset = offsetBuff - sendBuff; + offset /= sh->sharedfh->f_etype_size; + if ( mca_sharedfp_sm_verbose ) { - printf("mca_sharedfp_sm_read_ordered: Offset returned is %lld\n",offset); + opal_output(ompi_sharedfp_base_framework.framework_output, + "mca_sharedfp_sm_read_ordered: Offset returned is %lld\n",offset); } /* read to the file */ @@ -188,7 +199,7 @@ int mca_sharedfp_sm_read_ordered (mca_io_ompio_file_t *fh, exit: if ( NULL != buff ) { - free ( buff ); + free ( buff ); } return ret; diff --git a/ompi/mca/sharedfp/sm/sharedfp_sm_request_position.c b/ompi/mca/sharedfp/sm/sharedfp_sm_request_position.c index 6414c792097..0764f46ec37 100644 --- a/ompi/mca/sharedfp/sm/sharedfp_sm_request_position.c +++ b/ompi/mca/sharedfp/sm/sharedfp_sm_request_position.c @@ -10,6 +10,7 @@ * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2013-2015 University of Houston. All rights reserved. + * Copyright (c) 2015 Cisco Systems, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -24,6 +25,7 @@ #include "mpi.h" #include "ompi/constants.h" #include "ompi/mca/sharedfp/sharedfp.h" +#include "ompi/mca/sharedfp/base/base.h" /*use a semaphore to lock the shared memory*/ #include @@ -36,51 +38,49 @@ int mca_sharedfp_sm_request_position(struct mca_sharedfp_base_data_t * sh, OMPI_MPI_OFFSET_TYPE position = 0; OMPI_MPI_OFFSET_TYPE old_offset; struct mca_sharedfp_sm_data * sm_data = sh->selected_module_data; - struct sm_offset * sm_offset_ptr = NULL; + struct mca_sharedfp_sm_offset * sm_offset_ptr = NULL; int rank = ompi_comm_rank ( sh->comm); *offset = 0; if ( mca_sharedfp_sm_verbose ) { - printf("Aquiring lock, rank=%d...",rank); + opal_output(ompi_sharedfp_base_framework.framework_output, + "Aquiring lock, rank=%d...",rank); } sm_offset_ptr = sm_data->sm_offset_ptr; /* Aquire an exclusive lock */ -#ifdef OMPIO_SHAREDFP_USE_UNNAMED_SEMAPHORES - sem_wait(sm_offset_ptr->mutex); -#else sem_wait(sm_data->mutex); -#endif if ( mca_sharedfp_sm_verbose ) { - printf("Succeeded! Acquired sm lock.for rank=%d\n",rank); + opal_output(ompi_sharedfp_base_framework.framework_output, + "Succeeded! Acquired sm lock.for rank=%d\n",rank); } old_offset=sm_offset_ptr->offset; if ( mca_sharedfp_sm_verbose ) { - printf("Read last_offset=%lld!\n",old_offset); + opal_output(ompi_sharedfp_base_framework.framework_output, + "Read last_offset=%lld!\n",old_offset); } position = old_offset + bytes_requested; if ( mca_sharedfp_sm_verbose ) { - printf("old_offset=%lld, bytes_requested=%d, new offset=%lld!\n",old_offset,bytes_requested,position); + opal_output(ompi_sharedfp_base_framework.framework_output, + "old_offset=%lld, bytes_requested=%d, new offset=%lld!\n",old_offset,bytes_requested,position); } sm_offset_ptr->offset=position; if ( mca_sharedfp_sm_verbose ) { - printf("Releasing sm lock...rank=%d",rank); + opal_output(ompi_sharedfp_base_framework.framework_output, + "Releasing sm lock...rank=%d",rank); } -#ifdef OMPIO_SHAREDFP_USE_UNNAMED_SEMAPHORES - sem_post(sm_offset_ptr->mutex); -#else sem_post(sm_data->mutex); -#endif if ( mca_sharedfp_sm_verbose ) { - printf("Released lock! released lock.for rank=%d\n",rank); + opal_output(ompi_sharedfp_base_framework.framework_output, + "Released lock! released lock.for rank=%d\n",rank); } *offset = old_offset; diff --git a/ompi/mca/sharedfp/sm/sharedfp_sm_seek.c b/ompi/mca/sharedfp/sm/sharedfp_sm_seek.c index a3d475101ab..7284291f06d 100644 --- a/ompi/mca/sharedfp/sm/sharedfp_sm_seek.c +++ b/ompi/mca/sharedfp/sm/sharedfp_sm_seek.c @@ -10,6 +10,7 @@ * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2013-2015 University of Houston. All rights reserved. + * Copyright (c) 2015 Cisco Systems, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -24,6 +25,7 @@ #include "mpi.h" #include "ompi/constants.h" #include "ompi/mca/sharedfp/sharedfp.h" +#include "ompi/mca/sharedfp/base/base.h" /*use a semaphore to lock the shared memory location*/ #include @@ -38,12 +40,13 @@ mca_sharedfp_sm_seek (mca_io_ompio_file_t *fh, struct mca_sharedfp_base_data_t *sh = NULL; mca_sharedfp_base_module_t * shared_fp_base_module = NULL; struct mca_sharedfp_sm_data * sm_data = NULL; - struct sm_offset * sm_offset_ptr = NULL; + struct mca_sharedfp_sm_offset * sm_offset_ptr = NULL; if( NULL == fh->f_sharedfp_data ) { - if ( mca_sharedfp_sm_verbose ) { - printf("sharedfp_sm_seek: opening the shared file pointer\n"); - } + if ( mca_sharedfp_sm_verbose ) { + opal_output(ompi_sharedfp_base_framework.framework_output, + "sharedfp_sm_seek: opening the shared file pointer\n"); + } shared_fp_base_module = fh->f_sharedfp; ret = shared_fp_base_module->sharedfp_file_open(fh->f_comm, @@ -67,40 +70,44 @@ mca_sharedfp_sm_seek (mca_io_ompio_file_t *fh, opal_output(0,"sharedfp_sm_seek - MPI_SEEK_SET, offset must be > 0, got offset=%lld.\n",offset); ret = -1; } - if ( mca_sharedfp_sm_verbose ) { - printf("sharedfp_sm_seek: MPI_SEEK_SET new_offset=%lld\n",offset); - } + if ( mca_sharedfp_sm_verbose ) { + opal_output(ompi_sharedfp_base_framework.framework_output, + "sharedfp_sm_seek: MPI_SEEK_SET new_offset=%lld\n",offset); + } } - else if( MPI_SEEK_CUR == whence){ + else if( MPI_SEEK_CUR == whence){ OMPI_MPI_OFFSET_TYPE current_position; ret = mca_sharedfp_sm_get_position ( fh, ¤t_position); - if ( mca_sharedfp_sm_verbose ) { - printf("sharedfp_sm_seek: MPI_SEEK_CUR: curr=%lld, offset=%lld, call status=%d\n", - current_position,offset,status); - } + if ( mca_sharedfp_sm_verbose ) { + opal_output(ompi_sharedfp_base_framework.framework_output, + "sharedfp_sm_seek: MPI_SEEK_CUR: curr=%lld, offset=%lld, call status=%d\n", + current_position,offset,status); + } offset = current_position + offset; - if ( mca_sharedfp_sm_verbose ) { - printf("sharedfp_sm_seek: MPI_SEEK_CUR: new_offset=%lld\n",offset); - } + if ( mca_sharedfp_sm_verbose ) { + opal_output(ompi_sharedfp_base_framework.framework_output, + "sharedfp_sm_seek: MPI_SEEK_CUR: new_offset=%lld\n",offset); + } if(offset < 0){ opal_output(0,"sharedfp_sm_seek - MPI_SEEK_CURE, offset must be > 0, got offset=%lld.\n",offset); ret = -1; } } - else if( MPI_SEEK_END == whence){ + else if( MPI_SEEK_END == whence){ end_position=0; ompio_io_ompio_file_get_size(sh->sharedfh,&end_position); offset = end_position + offset; - if ( mca_sharedfp_sm_verbose ) { - printf("sharedfp_sm_seek: MPI_SEEK_END: file_get_size=%lld\n",end_position); - } + if ( mca_sharedfp_sm_verbose ) { + opal_output(ompi_sharedfp_base_framework.framework_output, + "sharedfp_sm_seek: MPI_SEEK_END: file_get_size=%lld\n",end_position); + } if(offset < 0){ opal_output(0,"sharedfp_sm_seek - MPI_SEEK_CUR, offset must be > 0, got offset=%lld.\n",offset); ret = -1; } } - else { + else { opal_output(0,"sharedfp_sm_seek - whence=%i is not supported\n",whence); ret = -1; } @@ -114,31 +121,26 @@ mca_sharedfp_sm_seek (mca_io_ompio_file_t *fh, /*-------------------*/ /*lock the file */ /*--------------------*/ - if ( mca_sharedfp_sm_verbose ) { - printf("sharedfp_sm_seek: Aquiring lock, rank=%d...",rank); fflush(stdout); - } + if ( mca_sharedfp_sm_verbose ) { + opal_output(ompi_sharedfp_base_framework.framework_output, + "sharedfp_sm_seek: Aquiring lock, rank=%d...",rank); fflush(stdout); + } /* Aquire an exclusive lock */ sm_offset_ptr = sm_data->sm_offset_ptr; -#ifdef OMPIO_SHAREDFP_USE_UNNAMED_SEMAPHORES - sem_wait(sm_offset_ptr->mutex); -#else - sem_wait(sm_data->mutex); -#endif + sem_wait(sm_data->mutex); - if ( mca_sharedfp_sm_verbose ) { - printf("sharedfp_sm_seek: Success! Acquired sm lock.for rank=%d\n",rank); - } + if ( mca_sharedfp_sm_verbose ) { + opal_output(ompi_sharedfp_base_framework.framework_output, + "sharedfp_sm_seek: Success! Acquired sm lock.for rank=%d\n",rank); + } sm_offset_ptr->offset=offset; - if ( mca_sharedfp_sm_verbose ) { - printf("sharedfp_sm_seek: Releasing sm lock...rank=%d",rank); fflush(stdout); - } -#ifdef OMPIO_SHAREDFP_USE_UNNAMED_SEMAPHORES - sem_post(sm_offset_ptr->mutex); -#else - sem_post(sm_data->mutex); -#endif + if ( mca_sharedfp_sm_verbose ) { + opal_output(ompi_sharedfp_base_framework.framework_output, + "sharedfp_sm_seek: Releasing sm lock...rank=%d",rank); fflush(stdout); + } + sem_post(sm_data->mutex); } /* since we are only letting process 0, update the current pointer diff --git a/ompi/mca/sharedfp/sm/sharedfp_sm_write.c b/ompi/mca/sharedfp/sm/sharedfp_sm_write.c index 15cd325a0e8..fd0ffab6784 100644 --- a/ompi/mca/sharedfp/sm/sharedfp_sm_write.c +++ b/ompi/mca/sharedfp/sm/sharedfp_sm_write.c @@ -9,7 +9,9 @@ * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. - * Copyright (c) 2013 University of Houston. All rights reserved. + * Copyright (c) 2013-2015 University of Houston. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -24,9 +26,10 @@ #include "mpi.h" #include "ompi/constants.h" #include "ompi/mca/sharedfp/sharedfp.h" +#include "ompi/mca/sharedfp/base/base.h" int mca_sharedfp_sm_write (mca_io_ompio_file_t *fh, - void *buf, + const void *buf, int count, struct ompi_datatype_t *datatype, ompi_status_public_t *status) @@ -39,9 +42,10 @@ int mca_sharedfp_sm_write (mca_io_ompio_file_t *fh, mca_sharedfp_base_module_t * shared_fp_base_module = NULL; if( NULL == fh->f_sharedfp_data ){ - if ( mca_sharedfp_sm_verbose ) { - printf("sharedfp_sm_write: opening the shared file pointer\n"); - } + if ( mca_sharedfp_sm_verbose ) { + opal_output(ompi_sharedfp_base_framework.framework_output, + "sharedfp_sm_write: opening the shared file pointer\n"); + } shared_fp_base_module = fh->f_sharedfp; ret = shared_fp_base_module->sharedfp_file_open(fh->f_comm, @@ -63,15 +67,18 @@ int mca_sharedfp_sm_write (mca_io_ompio_file_t *fh, sh = fh->f_sharedfp_data; if ( mca_sharedfp_sm_verbose ) { - printf("sharedfp_sm_write: Requested is %ld\n",bytesRequested); + opal_output(ompi_sharedfp_base_framework.framework_output, + "sharedfp_sm_write: Requested is %ld\n",bytesRequested); } /*Request the offset to write bytesRequested bytes*/ ret = mca_sharedfp_sm_request_position(sh,bytesRequested,&offset); + offset /= sh->sharedfh->f_etype_size; if ( -1 != ret ) { - if ( mca_sharedfp_sm_verbose ) { - printf("sharedfp_sm_write: fset received is %lld\n",offset); - } + if ( mca_sharedfp_sm_verbose ) { + opal_output(ompi_sharedfp_base_framework.framework_output, + "sharedfp_sm_write: fset received is %lld\n",offset); + } /* Write to the file*/ ret = ompio_io_ompio_file_write_at(sh->sharedfh,offset,buf,count,datatype,status); @@ -81,7 +88,7 @@ int mca_sharedfp_sm_write (mca_io_ompio_file_t *fh, } int mca_sharedfp_sm_write_ordered (mca_io_ompio_file_t *fh, - void *buf, + const void *buf, int count, struct ompi_datatype_t *datatype, ompi_status_public_t *status) @@ -102,9 +109,10 @@ int mca_sharedfp_sm_write_ordered (mca_io_ompio_file_t *fh, mca_sharedfp_base_module_t * shared_fp_base_module = NULL; if( NULL == fh->f_sharedfp_data){ - if ( mca_sharedfp_sm_verbose ) { - printf("sharedfp_sm_write_ordered: opening the shared file pointer\n"); - } + if ( mca_sharedfp_sm_verbose ) { + opal_output(ompi_sharedfp_base_framework.framework_output, + "sharedfp_sm_write_ordered: opening the shared file pointer\n"); + } shared_fp_base_module = fh->f_sharedfp; ret = shared_fp_base_module->sharedfp_file_open(fh->f_comm, @@ -136,11 +144,11 @@ int mca_sharedfp_sm_write_ordered (mca_io_ompio_file_t *fh, return OMPI_ERR_OUT_OF_RESOURCE; } - ret = sh->comm->c_coll.coll_gather ( &sendBuff, sendcnt, OMPI_OFFSET_DATATYPE, - buff, recvcnt, OMPI_OFFSET_DATATYPE, 0, - sh->comm, sh->comm->c_coll.coll_gather_module ); + ret = sh->comm->c_coll.coll_gather ( &sendBuff, sendcnt, OMPI_OFFSET_DATATYPE, + buff, recvcnt, OMPI_OFFSET_DATATYPE, 0, + sh->comm, sh->comm->c_coll.coll_gather_module ); if ( OMPI_SUCCESS != ret ) { - goto exit; + goto exit; } /* All the counts are present now in the recvBuff. @@ -148,25 +156,27 @@ int mca_sharedfp_sm_write_ordered (mca_io_ompio_file_t *fh, */ if ( 0 == rank ) { for (i = 0; i < size ; i ++) { - bytesRequested += buff[i]; - if ( mca_sharedfp_sm_verbose ) { - printf("sharedfp_sm_write_ordered: Bytes requested are %ld\n",bytesRequested); - } - } + bytesRequested += buff[i]; + if ( mca_sharedfp_sm_verbose ) { + opal_output(ompi_sharedfp_base_framework.framework_output, + "sharedfp_sm_write_ordered: Bytes requested are %ld\n",bytesRequested); + } + } /* Request the offset to write bytesRequested bytes - ** only the root process needs to do the request, - ** since the root process will then tell the other - ** processes at what offset they should write their - ** share of the data. - */ + ** only the root process needs to do the request, + ** since the root process will then tell the other + ** processes at what offset they should write their + ** share of the data. + */ ret = mca_sharedfp_sm_request_position(sh,bytesRequested,&offsetReceived); if( OMPI_SUCCESS != ret){ - goto exit; + goto exit; + } + if ( mca_sharedfp_sm_verbose ) { + opal_output(ompi_sharedfp_base_framework.framework_output, + "sharedfp_sm_write_ordered: Offset received is %lld\n",offsetReceived); } - if ( mca_sharedfp_sm_verbose ) { - printf("sharedfp_sm_write_ordered: Offset received is %lld\n",offsetReceived); - } buff[0] += offsetReceived; for (i = 1 ; i < size; i++) { @@ -176,26 +186,28 @@ int mca_sharedfp_sm_write_ordered (mca_io_ompio_file_t *fh, /* Scatter the results to the other processes*/ ret = sh->comm->c_coll.coll_scatter ( buff, sendcnt, OMPI_OFFSET_DATATYPE, - &offsetBuff, recvcnt, OMPI_OFFSET_DATATYPE, 0, - sh->comm, sh->comm->c_coll.coll_scatter_module ); + &offsetBuff, recvcnt, OMPI_OFFSET_DATATYPE, 0, + sh->comm, sh->comm->c_coll.coll_scatter_module ); if ( OMPI_SUCCESS != ret ) { - goto exit; + goto exit; } /* Each process now has its own individual offset */ offset = offsetBuff - sendBuff; + offset /= sh->sharedfh->f_etype_size; if ( mca_sharedfp_sm_verbose ) { - printf("sharedfp_sm_write_ordered: Offset returned is %lld\n",offset); + opal_output(ompi_sharedfp_base_framework.framework_output, + "sharedfp_sm_write_ordered: Offset returned is %lld\n",offset); } /* write to the file */ ret = ompio_io_ompio_file_write_at_all(sh->sharedfh,offset,buf,count,datatype,status); exit: if ( NULL != buff ) { - free ( buff ); + free ( buff ); } - + return ret; } diff --git a/ompi/mca/topo/Makefile.am b/ompi/mca/topo/Makefile.am index 351495346ca..8f395d955a3 100644 --- a/ompi/mca/topo/Makefile.am +++ b/ompi/mca/topo/Makefile.am @@ -5,15 +5,15 @@ # Copyright (c) 2004-2005 The University of Tennessee and The University # of Tennessee Research Foundation. All rights # reserved. -# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, # University of Stuttgart. All rights reserved. # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. # Copyright (c) 2010 Cisco Systems, Inc. All rights reserved. # $COPYRIGHT$ -# +# # Additional copyrights may follow -# +# # $HEADER$ # diff --git a/ompi/mca/topo/base/Makefile.am b/ompi/mca/topo/base/Makefile.am index 4ef077dc621..812de6db316 100644 --- a/ompi/mca/topo/base/Makefile.am +++ b/ompi/mca/topo/base/Makefile.am @@ -5,15 +5,15 @@ # Copyright (c) 2004-2013 The University of Tennessee and The University # of Tennessee Research Foundation. All rights # reserved. -# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, # University of Stuttgart. All rights reserved. # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. # Copyright (c) 2012-2013 Inria. All rights reserved. # $COPYRIGHT$ -# +# # Additional copyrights may follow -# +# # $HEADER$ # diff --git a/ompi/mca/topo/base/base.h b/ompi/mca/topo/base/base.h index 26fd0013cd5..5e05a8009d4 100644 --- a/ompi/mca/topo/base/base.h +++ b/ompi/mca/topo/base/base.h @@ -5,20 +5,20 @@ * Copyright (c) 2004-2013 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2008 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2009 Oak Ridge National Labs. All rights reserved. - * Copyright (c) 2012-2013 Los Alamos National Security, Inc. All rights reserved. + * Copyright (c) 2012-2013 Los Alamos National Security, Inc. All rights reserved. * Copyright (c) 2012-2013 Inria. All rights reserved. - * Copyright (c) 2014 Research Organization for Information Science + * Copyright (c) 2014-2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -67,90 +67,90 @@ mca_topo_base_find_available(bool enable_progress_threads, * cart_map() for their topology components. But they can implement * these glue functions if they want. * - * These glue functions + * These glue functions */ OMPI_DECLSPEC int mca_topo_base_cart_create(mca_topo_base_module_t *topo_module, ompi_communicator_t* old_comm, int ndims, - int *dims, - int *periods, + const int *dims, + const int *periods, bool reorder, ompi_communicator_t** comm_topo); OMPI_DECLSPEC int -mca_topo_base_cart_coords(ompi_communicator_t *comm, - int rank, +mca_topo_base_cart_coords(ompi_communicator_t *comm, + int rank, int maxdims, int *coords); OMPI_DECLSPEC int -mca_topo_base_cartdim_get(ompi_communicator_t *comm, +mca_topo_base_cartdim_get(ompi_communicator_t *comm, int *ndims); OMPI_DECLSPEC int -mca_topo_base_cart_get(ompi_communicator_t *comm, - int maxdims, +mca_topo_base_cart_get(ompi_communicator_t *comm, + int maxdims, int *dims, - int *periods, + int *periods, int *coords); OMPI_DECLSPEC int mca_topo_base_cart_map(ompi_communicator_t * comm, int ndims, - int *dims, int *periods, int *newrank); + const int *dims, const int *periods, int *newrank); OMPI_DECLSPEC int -mca_topo_base_cart_rank(ompi_communicator_t *comm, - int *coords, +mca_topo_base_cart_rank(ompi_communicator_t *comm, + const int *coords, int *rank); OMPI_DECLSPEC int -mca_topo_base_cart_shift(ompi_communicator_t *comm, - int direction, +mca_topo_base_cart_shift(ompi_communicator_t *comm, + int direction, int disp, - int *rank_source, + int *rank_source, int *rank_dest); - + OMPI_DECLSPEC int -mca_topo_base_cart_sub(ompi_communicator_t *comm, - int *remain_dims, +mca_topo_base_cart_sub(ompi_communicator_t *comm, + const int *remain_dims, ompi_communicator_t **new_comm); - + OMPI_DECLSPEC int -mca_topo_base_graphdims_get(ompi_communicator_t *comm, +mca_topo_base_graphdims_get(ompi_communicator_t *comm, int *nodes, int *nedges); - + OMPI_DECLSPEC int mca_topo_base_graph_create(mca_topo_base_module_t *topo_module, ompi_communicator_t* old_comm, int nnodes, - int *index, - int *edges, + const int *index, + const int *edges, bool reorder, ompi_communicator_t** new_comm); OMPI_DECLSPEC int -mca_topo_base_graph_get(ompi_communicator_t *comm, - int maxindex, - int maxedges, - int *index, +mca_topo_base_graph_get(ompi_communicator_t *comm, + int maxindex, + int maxedges, + int *index, int *edges); OMPI_DECLSPEC int mca_topo_base_graph_map(ompi_communicator_t * comm, int nnodes, - int *index, int *edges, int *newrank); + const int *index, const int *edges, int *newrank); OMPI_DECLSPEC int -mca_topo_base_graph_neighbors(ompi_communicator_t *comm, +mca_topo_base_graph_neighbors(ompi_communicator_t *comm, int rank, - int maxneighbors, + int maxneighbors, int *neighbors); OMPI_DECLSPEC int -mca_topo_base_graph_neighbors_count(ompi_communicator_t *comm, +mca_topo_base_graph_neighbors_count(ompi_communicator_t *comm, int rank, int *nneighbors); @@ -160,26 +160,26 @@ mca_topo_base_graph_neighbors_count(ompi_communicator_t *comm, */ OMPI_DECLSPEC int mca_topo_base_dist_graph_distribute(mca_topo_base_module_t* module, - ompi_communicator_t *comm, - int n, int nodes[], - int degrees[], int targets[], - int weights[], + ompi_communicator_t *comm, + int n, const int nodes[], + const int degrees[], const int targets[], + const int weights[], mca_topo_base_comm_dist_graph_2_2_0_t** ptopo); OMPI_DECLSPEC int mca_topo_base_dist_graph_create(mca_topo_base_module_t* module, - ompi_communicator_t *old_comm, - int n, int nodes[], - int degrees[], int targets[], int weights[], - ompi_info_t *info, int reorder, + ompi_communicator_t *old_comm, + int n, const int nodes[], + const int degrees[], const int targets[], const int weights[], + ompi_info_t *info, int reorder, ompi_communicator_t **new_comm); OMPI_DECLSPEC int mca_topo_base_dist_graph_create_adjacent(mca_topo_base_module_t* module, ompi_communicator_t *old_comm, - int indegree, int sources[], - int sourceweights[], int outdegree, - int destinations[], int destweights[], + int indegree, const int sources[], + const int sourceweights[], int outdegree, + const int destinations[], const int destweights[], ompi_info_t *info, int reorder, ompi_communicator_t **comm_dist_graph); @@ -189,7 +189,7 @@ mca_topo_base_dist_graph_neighbors(ompi_communicator_t *comm, int sources[], int sourceweights[], int maxoutdegree, int destinations[], int destweights[]); - + OMPI_DECLSPEC int mca_topo_base_dist_graph_neighbors_count(ompi_communicator_t *comm, int *inneighbors, int *outneighbors, int *weighted); diff --git a/ompi/mca/topo/base/topo_base_cart_coords.c b/ompi/mca/topo/base/topo_base_cart_coords.c index 63df2a302cb..888e9206e27 100644 --- a/ompi/mca/topo/base/topo_base_cart_coords.c +++ b/ompi/mca/topo/base/topo_base_cart_coords.c @@ -5,16 +5,16 @@ * Copyright (c) 2004-2013 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2008 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2012-2013 Inria. All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -33,7 +33,7 @@ * coordinates of specified process (integer) * * @retval MPI_SUCCESS - */ + */ int mca_topo_base_cart_coords(ompi_communicator_t* comm, int rank, @@ -44,12 +44,12 @@ int mca_topo_base_cart_coords(ompi_communicator_t* comm, /* * loop computing the co-ordinates - */ + */ d = comm->c_topo->mtc.cart->dims; remprocs = ompi_comm_size(comm); - for (i = 0; - (i < comm->c_topo->mtc.cart->ndims) && (i < maxdims); + for (i = 0; + (i < comm->c_topo->mtc.cart->ndims) && (i < maxdims); ++i, ++d) { dim = *d; remprocs /= dim; diff --git a/ompi/mca/topo/base/topo_base_cart_create.c b/ompi/mca/topo/base/topo_base_cart_create.c index 6d1c732579b..c4737096d5c 100644 --- a/ompi/mca/topo/base/topo_base_cart_create.c +++ b/ompi/mca/topo/base/topo_base_cart_create.c @@ -6,7 +6,7 @@ * Copyright (c) 2004-2013 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. @@ -17,9 +17,9 @@ * Copyright (c) 2014-2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -45,17 +45,17 @@ * Open MPI currently ignores the 'reorder' flag. * * @retval OMPI_SUCCESS - */ + */ int mca_topo_base_cart_create(mca_topo_base_module_t *topo, ompi_communicator_t* old_comm, int ndims, - int *dims, - int *periods, + const int *dims, + const int *periods, bool reorder, ompi_communicator_t** comm_topo) { - int nprocs = 1, i, *p, new_rank, num_procs, ret; + int nprocs = 1, i, new_rank, num_procs, ret; ompi_communicator_t *new_comm; ompi_proc_t **topo_procs = NULL; mca_topo_base_comm_cart_2_2_0_t* cart; @@ -65,12 +65,11 @@ int mca_topo_base_cart_create(mca_topo_base_module_t *topo, assert(topo->type == OMPI_COMM_CART); /* Calculate the number of processes in this grid */ - p = dims; - for (i = 0; i < ndims; ++i, ++p) { - if(*p <= 0) { + for (i = 0; i < ndims; ++i) { + if(dims[i] <= 0) { return OMPI_ERROR; } - nprocs *= *p; + nprocs *= dims[i]; } /* check for the error condition */ @@ -82,7 +81,7 @@ int mca_topo_base_cart_create(mca_topo_base_module_t *topo, if (nprocs < num_procs) { num_procs = nprocs; } - + if (new_rank > (nprocs-1)) { ndims = 0; new_rank = MPI_UNDEFINED; @@ -112,7 +111,7 @@ int mca_topo_base_cart_create(mca_topo_base_module_t *topo, return OMPI_ERR_OUT_OF_RESOURCE; } memcpy(cart->periods, periods, ndims * sizeof(int)); - + cart->coords = (int*)malloc(sizeof(int) * ndims); if (NULL == cart->coords) { OBJ_RELEASE(cart); diff --git a/ompi/mca/topo/base/topo_base_cart_get.c b/ompi/mca/topo/base/topo_base_cart_get.c index 4d3c8df8ca5..f0c143c8ed3 100644 --- a/ompi/mca/topo/base/topo_base_cart_get.c +++ b/ompi/mca/topo/base/topo_base_cart_get.c @@ -5,24 +5,22 @@ * Copyright (c) 2004-2013 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2008 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2012-2013 Inria. All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ #include "ompi_config.h" -#ifdef HAVE_STRING_H #include -#endif #include "ompi/mca/topo/base/base.h" #include "ompi/communicator/communicator.h" diff --git a/ompi/mca/topo/base/topo_base_cart_map.c b/ompi/mca/topo/base/topo_base_cart_map.c index c6da20a0068..64faa6612eb 100644 --- a/ompi/mca/topo/base/topo_base_cart_map.c +++ b/ompi/mca/topo/base/topo_base_cart_map.c @@ -5,14 +5,16 @@ * Copyright (c) 2004-2013 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -27,30 +29,30 @@ * @param ndims number of dimensions of cartesian structure (integer) * @param dims integer array of size 'ndims' specifying the number of * processes in each coordinate direction - * @param periods logical array of size 'ndims' specifying the + * @param periods logical array of size 'ndims' specifying the * periodicity specification in each coordinate direction - * @param newrank reordered rank of the calling process; 'MPI_UNDEFINED' + * @param newrank reordered rank of the calling process; 'MPI_UNDEFINED' * if calling process does not belong to grid (integer) * - * @retval MPI_SUCCESS - * @retval MPI_ERR_DIMS + * @retval MPI_SUCCESS + * @retval MPI_ERR_DIMS */ int mca_topo_base_cart_map(ompi_communicator_t* comm, int ndims, - int *dims, int *periods, int *newrank) + const int *dims, const int *periods, int *newrank) { - int nprocs, rank, size, i, *p; + int nprocs, rank, size, i; /* * Compute the # of processes in the grid. */ nprocs = 1; - for (i = 0, p = dims; i < ndims; ++i, ++p) { - if (*p <= 0) { + for (i = 0 ; i < ndims; ++i) { + if (dims[i] <= 0) { return MPI_ERR_DIMS; } - nprocs *= *p; + nprocs *= dims[i]; } /* * Check that number of processes <= size of communicator. diff --git a/ompi/mca/topo/base/topo_base_cart_rank.c b/ompi/mca/topo/base/topo_base_cart_rank.c index eb0b00150ba..677e0792f1c 100644 --- a/ompi/mca/topo/base/topo_base_cart_rank.c +++ b/ompi/mca/topo/base/topo_base_cart_rank.c @@ -5,16 +5,18 @@ * Copyright (c) 2004-2013 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2008 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2012-2013 Inria. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -35,10 +37,10 @@ * @retval MPI_ERR_COMM * @retval MPI_ERR_TOPOLOGY * @retval MPI_ERR_ARG - */ + */ -int mca_topo_base_cart_rank(ompi_communicator_t* comm, - int *coords, +int mca_topo_base_cart_rank(ompi_communicator_t* comm, + const int *coords, int *rank) { int prank; @@ -47,7 +49,6 @@ int mca_topo_base_cart_rank(ompi_communicator_t* comm, int factor; int i; int *d; - int *c; /* * Loop over coordinates computing the rank. @@ -57,11 +58,10 @@ int mca_topo_base_cart_rank(ompi_communicator_t* comm, i = comm->c_topo->mtc.cart->ndims - 1; d = comm->c_topo->mtc.cart->dims + i; - c = coords + i; - for (; i >= 0; --i, --c, --d) { + for (; i >= 0; --i, --d) { dim = *d; - ord = *c; + ord = coords[i]; /* Per MPI-2.1 7.5.4 (description of MPI_CART_RANK), if the dimension is periodic and the coordinate is outside of 0 <= coord(i) < dim, then normalize it. If the dimension is not diff --git a/ompi/mca/topo/base/topo_base_cart_shift.c b/ompi/mca/topo/base/topo_base_cart_shift.c index 63c67138cc6..f3e68de6228 100644 --- a/ompi/mca/topo/base/topo_base_cart_shift.c +++ b/ompi/mca/topo/base/topo_base_cart_shift.c @@ -5,16 +5,16 @@ * Copyright (c) 2004-2013 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2008 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2012-2013 Inria. All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -36,7 +36,7 @@ * Cartesian mesh. * * @retval MPI_SUCCESS - */ + */ int mca_topo_base_cart_shift(ompi_communicator_t* comm, int direction, int disp, diff --git a/ompi/mca/topo/base/topo_base_cart_sub.c b/ompi/mca/topo/base/topo_base_cart_sub.c index 71f1b7bb45a..40176090fff 100644 --- a/ompi/mca/topo/base/topo_base_cart_sub.c +++ b/ompi/mca/topo/base/topo_base_cart_sub.c @@ -6,7 +6,7 @@ * Copyright (c) 2004-2013 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. @@ -17,9 +17,9 @@ * Copyright (c) 2014-2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -30,10 +30,10 @@ /* * function - partitions a communicator into subgroups which * form lower-dimensional cartesian subgrids - * + * * @param comm communicator with cartesian structure (handle) - * @param remain_dims the 'i'th entry of 'remain_dims' specifies whether - * the 'i'th dimension is kept in the subgrid (true) + * @param remain_dims the 'i'th entry of 'remain_dims' specifies whether + * the 'i'th dimension is kept in the subgrid (true) * or is dropped (false) (logical vector) * @param new_comm communicator containing the subgrid that includes the * calling process (handle) @@ -41,16 +41,16 @@ * @retval MPI_SUCCESS * @retval MPI_ERR_TOPOLOGY * @retval MPI_ERR_COMM - */ + */ int mca_topo_base_cart_sub (ompi_communicator_t* comm, - int *remain_dims, + const int *remain_dims, ompi_communicator_t** new_comm) { struct ompi_communicator_t *temp_comm; mca_topo_base_comm_cart_2_2_0_t *old_cart; int errcode, colour, key, colfactor, keyfactor; int ndim, dim, i; - int *d, *dorig = NULL, *dold, *c, *r, *p, *porig = NULL, *pold; + int *d, *dorig = NULL, *dold, *c, *p, *porig = NULL, *pold; mca_topo_base_module_t* topo; mca_topo_base_comm_cart_2_2_0_t* cart; @@ -67,11 +67,10 @@ int mca_topo_base_cart_sub (ompi_communicator_t* comm, i = old_cart->ndims - 1; d = old_cart->dims + i; c = comm->c_topo->mtc.cart->coords + i; - r = remain_dims + i; - for (; i >= 0; --i, --d, --c, --r) { + for (; i >= 0; --i, --d, --c) { dim = *d; - if (*r == 0) { + if (remain_dims[i] == 0) { colour += colfactor * (*c); colfactor *= dim; } else { @@ -94,7 +93,7 @@ int mca_topo_base_cart_sub (ompi_communicator_t* comm, /* Fill the communicator with topology information. */ if (temp_comm != MPI_COMM_NULL) { - + assert( NULL == temp_comm->c_topo ); if (OMPI_SUCCESS != (errcode = mca_topo_base_comm_select(temp_comm, comm->c_topo, @@ -110,9 +109,8 @@ int mca_topo_base_cart_sub (ompi_communicator_t* comm, /* Copy the periods */ porig = p = (int*)malloc(ndim * sizeof(int)); pold = old_cart->periods; - r = remain_dims; - for (i = 0; i < old_cart->ndims; ++i, ++dold, ++pold, ++r) { - if (*r) { + for (i = 0; i < old_cart->ndims; ++i, ++dold, ++pold) { + if (remain_dims[i]) { *d++ = *dold; *p++ = *pold; } diff --git a/ompi/mca/topo/base/topo_base_cartdim_get.c b/ompi/mca/topo/base/topo_base_cartdim_get.c index e566c218dbf..a8094555f4b 100644 --- a/ompi/mca/topo/base/topo_base_cartdim_get.c +++ b/ompi/mca/topo/base/topo_base_cartdim_get.c @@ -5,15 +5,15 @@ * Copyright (c) 2004-2013 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2012-2013 Inria. All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ diff --git a/ompi/mca/topo/base/topo_base_comm_select.c b/ompi/mca/topo/base/topo_base_comm_select.c index 63d0ac418d3..400961c8e93 100644 --- a/ompi/mca/topo/base/topo_base_comm_select.c +++ b/ompi/mca/topo/base/topo_base_comm_select.c @@ -5,16 +5,16 @@ * Copyright (c) 2004-2013 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2008-2013 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2012-2013 Inria. All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -66,7 +66,7 @@ static OBJ_CLASS_INSTANCE(queried_module_t, opal_list_item_t, NULL, NULL); * 3. The query function returns a module and its priority. * 4. Select the module with the highest priority. * 5. OBJ_RELEASE all the "losing" modules. - */ + */ int mca_topo_base_comm_select(const ompi_communicator_t* comm, mca_topo_base_module_t* preferred_module, mca_topo_base_module_t** selected_module, @@ -74,11 +74,11 @@ int mca_topo_base_comm_select(const ompi_communicator_t* comm, { int priority; int best_priority; - opal_list_item_t *item; + opal_list_item_t *item; mca_base_component_list_item_t *cli; - mca_topo_base_component_t *component; + mca_topo_base_component_t *component; mca_topo_base_component_t *best_component; - mca_topo_base_module_t *module; + mca_topo_base_module_t *module; opal_list_t queried; queried_module_t *om; int err = MPI_SUCCESS; @@ -97,12 +97,12 @@ int mca_topo_base_comm_select(const ompi_communicator_t* comm, /* We have a preferred module. Check if it is available and if so, whether it wants to run */ - + opal_output_verbose(10, ompi_topo_base_framework.framework_output, "topo:base:comm_select: Checking preferred component: %s", preferred_module->topo_component->topoc_version.mca_component_name); - /* query the component for its priority and get its module + /* query the component for its priority and get its module structure. This is necessary to proceed */ component = (mca_topo_base_component_t *)preferred_module->topo_component; module = component->topoc_comm_query(comm, &priority, type); @@ -116,7 +116,7 @@ int mca_topo_base_comm_select(const ompi_communicator_t* comm, *selected_module = module; module->topo_component = component; return OMPI_SUCCESS; - } + } /* If we get here, the preferred component is present, but is unable to run. This is not a good sign. We should try selecting some other component. We let it fall through @@ -132,7 +132,7 @@ int mca_topo_base_comm_select(const ompi_communicator_t* comm, * All we need to do is to go through the list of available * components and find the one which has the highest priority and * use that for this communicator - */ + */ best_component = NULL; best_priority = -1; @@ -154,20 +154,20 @@ int mca_topo_base_comm_select(const ompi_communicator_t* comm, } else { /* * call the query function and see what it returns - */ + */ module = component->topoc_comm_query(comm, &priority, type); if (NULL == module) { /* * query did not return any action which can be used - */ + */ opal_output_verbose(10, ompi_topo_base_framework.framework_output, "select: query returned failure"); } else { opal_output_verbose(10, ompi_topo_base_framework.framework_output, "select: query returned priority %d", priority); - /* + /* * is this the best component we have found till now? */ if (priority > best_priority) { @@ -184,8 +184,8 @@ int mca_topo_base_comm_select(const ompi_communicator_t* comm, return OMPI_ERR_OUT_OF_RESOURCE; } om->om_component = component; - om->om_module = module; - opal_list_append(&queried, (opal_list_item_t *)om); + om->om_module = module; + opal_list_append(&queried, (opal_list_item_t *)om); } /* end else of if (NULL == module) */ } /* end else of if (NULL == component->init) */ } /* end for ... end of traversal */ @@ -206,7 +206,7 @@ int mca_topo_base_comm_select(const ompi_communicator_t* comm, * returned their priorities from the query. We now have to * unquery() those components which have not been selected and * init() the component which was selected - */ + */ for (item = opal_list_remove_first(&queried); NULL != item; item = opal_list_remove_first(&queried)) { @@ -220,7 +220,7 @@ int mca_topo_base_comm_select(const ompi_communicator_t* comm, * defined. Whereever a function pointer is null in the * module structure we need to fill it in with the base * structure function pointers. This is yet to be done - */ + */ fill_null_pointers(type, om->om_module); om->om_module->topo_component = best_component; *selected_module = om->om_module; @@ -233,7 +233,7 @@ int mca_topo_base_comm_select(const ompi_communicator_t* comm, } OBJ_RELEASE(om); } /* traversing through the entire list */ - + opal_output_verbose(10, ompi_topo_base_framework.framework_output, "select: component %s selected", best_component->topoc_version.mca_component_name); @@ -248,7 +248,7 @@ int mca_topo_base_comm_select(const ompi_communicator_t* comm, * a check for the common minimum funtions being implemented by the * module. */ -static void fill_null_pointers(int type, mca_topo_base_module_t *module) +static void fill_null_pointers(int type, mca_topo_base_module_t *module) { if( OMPI_COMM_CART == type ) { if (NULL == module->topo.cart.cart_coords) { diff --git a/ompi/mca/topo/base/topo_base_dist_graph_create.c b/ompi/mca/topo/base/topo_base_dist_graph_create.c index dc676e3b016..e92adf5d93e 100644 --- a/ompi/mca/topo/base/topo_base_dist_graph_create.c +++ b/ompi/mca/topo/base/topo_base_dist_graph_create.c @@ -10,6 +10,7 @@ * Copyright (c) 2011-2013 Université Bordeaux 1 * Copyright (c) 2014-2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. + * Copyright (c) 2016 IBM Corporation. All rights reserved. */ #include "ompi_config.h" @@ -32,10 +33,10 @@ typedef struct _dist_graph_elem { } mca_topo_base_dist_graph_elem_t; int mca_topo_base_dist_graph_distribute(mca_topo_base_module_t* module, - ompi_communicator_t *comm, - int n, int nodes[], - int degrees[], int targets[], - int weights[], + ompi_communicator_t *comm, + int n, const int nodes[], + const int degrees[], const int targets[], + const int weights[], mca_topo_base_comm_dist_graph_2_2_0_t** ptopo) { int i, j, err, count, left_over, pending_reqs, current_pos, index, csize; @@ -115,7 +116,7 @@ int mca_topo_base_dist_graph_distribute(mca_topo_base_module_t* module, err = comm->c_coll.coll_reduce_scatter_block( MPI_IN_PLACE, idx, 2, (ompi_datatype_t*)&ompi_mpi_int, MPI_SUM, comm, - comm->c_coll.coll_allreduce_module); + comm->c_coll.coll_reduce_scatter_block_module); /** * At this point in the indexes array we have: * - idx[0].in total number of IN edges @@ -279,11 +280,11 @@ int mca_topo_base_dist_graph_distribute(mca_topo_base_module_t* module, } int mca_topo_base_dist_graph_create(mca_topo_base_module_t* module, - ompi_communicator_t *comm_old, - int n, int nodes[], - int degrees[], int targets[], - int weights[], - ompi_info_t *info, int reorder, + ompi_communicator_t *comm_old, + int n, const int nodes[], + const int degrees[], const int targets[], + const int weights[], + ompi_info_t *info, int reorder, ompi_communicator_t **newcomm) { int err; @@ -303,9 +304,9 @@ int mca_topo_base_dist_graph_create(mca_topo_base_module_t* module, return OMPI_ERR_OUT_OF_RESOURCE; } err = mca_topo_base_dist_graph_distribute(module, - comm_old, + comm_old, n, nodes, - degrees, targets, + degrees, targets, weights, &topo); if( OMPI_SUCCESS != err ) { @@ -318,7 +319,7 @@ int mca_topo_base_dist_graph_create(mca_topo_base_module_t* module, must be set before invoking ompi_comm_enable */ rank = ompi_comm_rank(comm_old); if(OMPI_GROUP_IS_DENSE(comm_old->c_local_group)) { - memcpy(topo_procs, + memcpy(topo_procs, comm_old->c_local_group->grp_proc_pointers, num_procs * sizeof(ompi_proc_t *)); } else { diff --git a/ompi/mca/topo/base/topo_base_dist_graph_create_adjacent.c b/ompi/mca/topo/base/topo_base_dist_graph_create_adjacent.c index 67026af1cfc..6d3d9406339 100644 --- a/ompi/mca/topo/base/topo_base_dist_graph_create_adjacent.c +++ b/ompi/mca/topo/base/topo_base_dist_graph_create_adjacent.c @@ -8,7 +8,7 @@ * reserved. * Copyright (c) 2011-2013 Inria. All rights reserved. * Copyright (c) 2011-2013 Université Bordeaux 1 - * Copyright (c) 2014 Research Organization for Information Science + * Copyright (c) 2014-2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. */ @@ -21,11 +21,11 @@ int mca_topo_base_dist_graph_create_adjacent(mca_topo_base_module_t* module, ompi_communicator_t *comm_old, - int indegree, int sources[], - int sourceweights[], + int indegree, const int sources[], + const int sourceweights[], int outdegree, - int destinations[], - int destweights[], + const int destinations[], + const int destweights[], ompi_info_t *info, int reorder, ompi_communicator_t **newcomm) { @@ -101,7 +101,7 @@ int mca_topo_base_dist_graph_create_adjacent(mca_topo_base_module_t* module, if( MPI_UNWEIGHTED != destweights ) { if( NULL != topo->outw ) free(topo->outw); } - free(topo); + OBJ_RELEASE(topo); } ompi_comm_free(newcomm); return err; diff --git a/ompi/mca/topo/base/topo_base_dist_graph_neighbors.c b/ompi/mca/topo/base/topo_base_dist_graph_neighbors.c index dfe4aa89135..af49fd7dd74 100644 --- a/ompi/mca/topo/base/topo_base_dist_graph_neighbors.c +++ b/ompi/mca/topo/base/topo_base_dist_graph_neighbors.c @@ -8,7 +8,7 @@ * reserved. * Copyright (c) 2011-2013 Inria. All rights reserved. * Copyright (c) 2011-2013 Universite Bordeaux 1 - * Copyright (c) 2014-2015 Research Organization for Information Science + * Copyright (c) 2014-2016 Research Organization for Information Science * and Technology (RIST). All rights reserved. */ @@ -19,7 +19,7 @@ #include "ompi/mca/topo/base/base.h" -int mca_topo_base_dist_graph_neighbors(ompi_communicator_t *comm, +int mca_topo_base_dist_graph_neighbors(ompi_communicator_t *comm, int maxindegree, int sources[], int sourceweights[], int maxoutdegree, int destinations[], @@ -40,13 +40,13 @@ int mca_topo_base_dist_graph_neighbors(ompi_communicator_t *comm, for (i = 0; i < maxindegree; ++i) { sources[i] = dg->in[i]; - if (NULL != dg->inw) { + if (MPI_UNWEIGHTED != sourceweights && NULL != dg->inw) { sourceweights[i] = dg->inw[i]; } } for (i = 0; i < maxoutdegree; ++i) { destinations[i] = dg->out[i]; - if (NULL != dg->outw) { + if (MPI_UNWEIGHTED != destweights && NULL != dg->outw) { destweights[i] = dg->outw[i]; } } diff --git a/ompi/mca/topo/base/topo_base_dist_graph_neighbors_count.c b/ompi/mca/topo/base/topo_base_dist_graph_neighbors_count.c index 0b206e70ad3..36f758c0b94 100644 --- a/ompi/mca/topo/base/topo_base_dist_graph_neighbors_count.c +++ b/ompi/mca/topo/base/topo_base_dist_graph_neighbors_count.c @@ -14,7 +14,7 @@ #include "ompi/mca/topo/base/base.h" -int mca_topo_base_dist_graph_neighbors_count(ompi_communicator_t *comm, +int mca_topo_base_dist_graph_neighbors_count(ompi_communicator_t *comm, int *inneighbors, int *outneighbors, int *weighted) { diff --git a/ompi/mca/topo/base/topo_base_find_available.c b/ompi/mca/topo/base/topo_base_find_available.c index b1ff5e3a9ad..64a831c4cef 100644 --- a/ompi/mca/topo/base/topo_base_find_available.c +++ b/ompi/mca/topo/base/topo_base_find_available.c @@ -5,7 +5,7 @@ * Copyright (c) 2004-2013 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. @@ -13,9 +13,9 @@ * Copyright (c) 2014 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -41,15 +41,15 @@ static int init_query_2_2_0(const mca_base_component_t *component, mca_base_component_list_item_t *entry, bool enable_progress_threads, bool enable_mpi_threads); - + int mca_topo_base_find_available(bool enable_progress_threads, bool enable_mpi_threads) { opal_list_item_t *item, *next; mca_base_component_list_item_t *cli; - /* The list of components which we should check is already present - in ompi_topo_base_framework.framework_components, which was established in + /* The list of components which we should check is already present + in ompi_topo_base_framework.framework_components, which was established in mca_topo_base_open */ item = opal_list_get_first(&ompi_topo_base_framework.framework_components); @@ -57,7 +57,7 @@ int mca_topo_base_find_available(bool enable_progress_threads, next = opal_list_get_next(item); cli = (mca_base_component_list_item_t*)item; - /* Now for this entry, we have to determine the thread level. Call + /* Now for this entry, we have to determine the thread level. Call a subroutine to do the job for us */ if (OMPI_SUCCESS != init_query(cli->cli_component, cli, @@ -83,15 +83,15 @@ int mca_topo_base_find_available(bool enable_progress_threads, /* All done */ return OMPI_SUCCESS; } - - + + static int init_query(const mca_base_component_t *m, mca_base_component_list_item_t *entry, bool enable_progress_threads, - bool enable_mpi_threads) + bool enable_mpi_threads) { int ret; - + opal_output_verbose(10, ompi_topo_base_framework.framework_output, "topo:find_available: querying topo component %s", m->mca_component_name); @@ -122,7 +122,7 @@ static int init_query(const mca_base_component_t *m, m->mca_component_name); if (NULL != m->mca_close_component) { m->mca_close_component(); - } + } } else { opal_output_verbose(10, ompi_topo_base_framework.framework_output, "topo:find_avalable: topo component %s is available", @@ -138,11 +138,11 @@ static int init_query(const mca_base_component_t *m, static int init_query_2_2_0(const mca_base_component_t *component, mca_base_component_list_item_t *entry, bool enable_progress_threads, - bool enable_mpi_threads) + bool enable_mpi_threads) { - mca_topo_base_component_2_2_0_t *topo = + mca_topo_base_component_2_2_0_t *topo = (mca_topo_base_component_2_2_0_t *) component; - + return topo->topoc_init_query(enable_progress_threads, enable_mpi_threads); } diff --git a/ompi/mca/topo/base/topo_base_frame.c b/ompi/mca/topo/base/topo_base_frame.c index 78ffbd4eb92..062786f9308 100644 --- a/ompi/mca/topo/base/topo_base_frame.c +++ b/ompi/mca/topo/base/topo_base_frame.c @@ -5,18 +5,18 @@ * Copyright (c) 2004-2013 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. - * Copyright (c) 2012-2013 Los Alamos National Security, Inc. All rights reserved. + * Copyright (c) 2012-2013 Los Alamos National Security, Inc. All rights reserved. * Copyright (c) 2012-2013 Inria. All rights reserved. * Copyright (c) 2014 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -57,7 +57,7 @@ OBJ_CLASS_INSTANCE(mca_topo_base_module_t, opal_object_t, mca_topo_base_module_construct, mca_topo_base_module_destruct); -static int mca_topo_base_close(void) +static int mca_topo_base_close(void) { return mca_base_framework_components_close(&ompi_topo_base_framework, NULL); } @@ -66,7 +66,7 @@ static int mca_topo_base_close(void) * Function for finding and opening either all the MCA topo components, or * the one that specifically requested via a MCA parameter. */ -static int mca_topo_base_open(mca_base_open_flag_t flags) +static int mca_topo_base_open(mca_base_open_flag_t flags) { return mca_base_framework_components_open(&ompi_topo_base_framework, flags); } diff --git a/ompi/mca/topo/base/topo_base_graph_create.c b/ompi/mca/topo/base/topo_base_graph_create.c index 77a1be61d7a..e2998bd1858 100644 --- a/ompi/mca/topo/base/topo_base_graph_create.c +++ b/ompi/mca/topo/base/topo_base_graph_create.c @@ -5,7 +5,7 @@ * Copyright (c) 2004-2013 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. @@ -14,9 +14,9 @@ * Copyright (c) 2014-2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -43,8 +43,8 @@ int mca_topo_base_graph_create(mca_topo_base_module_t *topo, ompi_communicator_t* old_comm, int nnodes, - int *index, - int *edges, + const int *index, + const int *edges, bool reorder, ompi_communicator_t** comm_topo) { @@ -105,7 +105,7 @@ int mca_topo_base_graph_create(mca_topo_base_module_t *topo, return OMPI_ERR_OUT_OF_RESOURCE; } if(OMPI_GROUP_IS_DENSE(old_comm->c_local_group)) { - memcpy(topo_procs, + memcpy(topo_procs, old_comm->c_local_group->grp_proc_pointers, num_procs * sizeof(ompi_proc_t *)); } else { @@ -140,7 +140,7 @@ int mca_topo_base_graph_create(mca_topo_base_module_t *topo, } return ret; } - + *comm_topo = new_comm; if( MPI_UNDEFINED == new_rank ) { diff --git a/ompi/mca/topo/base/topo_base_graph_get.c b/ompi/mca/topo/base/topo_base_graph_get.c index bc905f42fe1..f6c1a7dd083 100644 --- a/ompi/mca/topo/base/topo_base_graph_get.c +++ b/ompi/mca/topo/base/topo_base_graph_get.c @@ -5,15 +5,15 @@ * Copyright (c) 2004-2013 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2012-2013 Inria. All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -32,7 +32,7 @@ * @param edges array of integers containing the graph structure * * @retval MPI_SUCCESS - */ + */ int mca_topo_base_graph_get(ompi_communicator_t* comm, int maxindex, @@ -53,10 +53,10 @@ int mca_topo_base_graph_get(ompi_communicator_t* comm, p = comm->c_topo->mtc.graph->edges; - for (i = 0; - (i < comm->c_topo->mtc.graph->index[nprocs-1]) && (i < maxedges); + for (i = 0; + (i < comm->c_topo->mtc.graph->index[nprocs-1]) && (i < maxedges); ++i, ++p) { - + *edges++ = *p; } diff --git a/ompi/mca/topo/base/topo_base_graph_map.c b/ompi/mca/topo/base/topo_base_graph_map.c index 70cc91f4f40..7cd738ba51e 100644 --- a/ompi/mca/topo/base/topo_base_graph_map.c +++ b/ompi/mca/topo/base/topo_base_graph_map.c @@ -5,15 +5,17 @@ * Copyright (c) 2004-2013 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2012-2013 Inria. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -29,7 +31,7 @@ * @param index integer array specifying the graph structure * @param edges integer array specifying the graph structure * @param newrank reordered rank of the calling process; 'MPI_UNDEFINED' - * if the calling process does not belong to + * if the calling process does not belong to * graph (integer) * * @retval MPI_SUCCESS @@ -38,7 +40,7 @@ int mca_topo_base_graph_map(ompi_communicator_t * comm, int nnodes, - int *index, int *edges, int *newrank) + const int *index, const int *edges, int *newrank) { int myrank; diff --git a/ompi/mca/topo/base/topo_base_graph_neighbors.c b/ompi/mca/topo/base/topo_base_graph_neighbors.c index ee2a6fe2023..40a4b3723dd 100644 --- a/ompi/mca/topo/base/topo_base_graph_neighbors.c +++ b/ompi/mca/topo/base/topo_base_graph_neighbors.c @@ -5,7 +5,7 @@ * Copyright (c) 2004-2013 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. @@ -13,9 +13,9 @@ * Copyright (c) 2014 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ diff --git a/ompi/mca/topo/base/topo_base_graph_neighbors_count.c b/ompi/mca/topo/base/topo_base_graph_neighbors_count.c index 27c115299e4..02331d12503 100644 --- a/ompi/mca/topo/base/topo_base_graph_neighbors_count.c +++ b/ompi/mca/topo/base/topo_base_graph_neighbors_count.c @@ -5,7 +5,7 @@ * Copyright (c) 2004-2013 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. @@ -13,9 +13,9 @@ * Copyright (c) 2014 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -30,9 +30,9 @@ * @param comm communicator with graph topology (handle) * @param rank rank of process in group of 'comm' (integer) * @param nneighbors number of neighbors of specified process (integer) - * + * * @retval MPI_SUCCESS - */ + */ int mca_topo_base_graph_neighbors_count (ompi_communicator_t* comm, int rank, diff --git a/ompi/mca/topo/base/topo_base_graphdims_get.c b/ompi/mca/topo/base/topo_base_graphdims_get.c index 7c84da94b1b..e34cb2b68ad 100644 --- a/ompi/mca/topo/base/topo_base_graphdims_get.c +++ b/ompi/mca/topo/base/topo_base_graphdims_get.c @@ -5,7 +5,7 @@ * Copyright (c) 2004-2013 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. @@ -13,9 +13,9 @@ * Copyright (c) 2014 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -26,11 +26,11 @@ /* * function - Retrieves graph topology information associated with a * communicator - * + * * @param comm - communicator for group with graph structure (handle) * @param nodes - number of nodes in graph (integer) * @param nedges - number of edges in graph (integer) - * + * * @retval MPI_SUCCESS * @retval MPI_ERR_TOPOLOGY * @retval MPI_ERR_COMM @@ -42,7 +42,7 @@ int mca_topo_base_graphdims_get (ompi_communicator_t* comm, { mca_topo_base_comm_graph_2_2_0_t* graph = comm->c_topo->mtc.graph; *nodes = ompi_comm_size(comm); - *nedges = graph->index[*nodes -1]; + *nedges = graph->index[*nodes -1]; return MPI_SUCCESS; } diff --git a/ompi/mca/topo/base/topo_base_lazy_init.c b/ompi/mca/topo/base/topo_base_lazy_init.c index 42dc376f756..c66462a851c 100644 --- a/ompi/mca/topo/base/topo_base_lazy_init.c +++ b/ompi/mca/topo/base/topo_base_lazy_init.c @@ -6,7 +6,7 @@ * Copyright (c) 2004-2015 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. @@ -15,9 +15,9 @@ * Copyright (c) 2015 Los Alamos National Security, LLC. All rights * reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -34,7 +34,7 @@ it's safe to invoke this function multiple times). We do this because most MPI apps don't use MPI topology functions, so we might as well not load them unless we have to. */ -int mca_topo_base_lazy_init(void) +int mca_topo_base_lazy_init(void) { int err; @@ -43,7 +43,7 @@ int mca_topo_base_lazy_init(void) * Register and open all available components, giving them a chance to access the MCA parameters. */ - err = mca_base_framework_open (&ompi_topo_base_framework, MCA_BASE_REGISTER_DEFAULT); + err = mca_base_framework_open (&ompi_topo_base_framework, MCA_BASE_OPEN_DEFAULT); if (OMPI_SUCCESS != err) { return err; } diff --git a/ompi/mca/topo/basic/.windows b/ompi/mca/topo/basic/.windows deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/ompi/mca/topo/basic/Makefile.am b/ompi/mca/topo/basic/Makefile.am index 75693d9fa65..4e6da4f4fe7 100644 --- a/ompi/mca/topo/basic/Makefile.am +++ b/ompi/mca/topo/basic/Makefile.am @@ -4,15 +4,14 @@ # reserved. # Copyright (c) 2011-2013 INRIA. All rights reserved. # Copyright (c) 2011-2013 Université Bordeaux 1 +# Copyright (c) 2015 Cisco Systems, Inc. All rights reserved. # $COPYRIGHT$ -# +# # Additional copyrights may follow -# +# # $HEADER$ # -EXTRA_DIST = .windows - sources = \ topo_basic.h \ topo_basic_component.c @@ -23,7 +22,7 @@ sources = \ if MCA_BUILD_ompi_topo_basic_DSO lib = -lib_sources = +lib_sources = component = mca_topo_basic.la component_sources = $(sources) else diff --git a/ompi/mca/topo/basic/topo_basic.h b/ompi/mca/topo/basic/topo_basic.h index 460a111ba47..006005bffcd 100644 --- a/ompi/mca/topo/basic/topo_basic.h +++ b/ompi/mca/topo/basic/topo_basic.h @@ -7,9 +7,9 @@ * Copyright (c) 2014 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ diff --git a/ompi/mca/topo/basic/topo_basic_component.c b/ompi/mca/topo/basic/topo_basic_component.c index b0bcc6ffd61..83f26519273 100644 --- a/ompi/mca/topo/basic/topo_basic_component.c +++ b/ompi/mca/topo/basic/topo_basic_component.c @@ -10,9 +10,9 @@ * Copyright (c) 2015 Los Alamos National Security, LLC. All rights * reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -22,7 +22,7 @@ /* * Public string showing the topo basic module version number */ -const char *mca_topo_basic_component_version_string = +const char *mca_topo_basic_component_version_string = "Open MPI basic topology MCA component version" OMPI_VERSION; /* @@ -35,7 +35,7 @@ comm_query(const ompi_communicator_t *comm, int *priority, uint32_t type); /* * Public component structure */ -mca_topo_basic_component_t mca_topo_basic_component = +mca_topo_basic_component_t mca_topo_basic_component = { .topoc_version = { MCA_TOPO_BASE_VERSION_2_2_0, @@ -60,7 +60,7 @@ static int init_query(bool enable_progress_threads, bool enable_mpi_threads) { /* Nothing to do */ return OMPI_SUCCESS; -} +} static struct mca_topo_base_module_t * diff --git a/ompi/mca/topo/example/Makefile.am b/ompi/mca/topo/example/Makefile.am index a5c80c0b01b..190bdf0dc8a 100644 --- a/ompi/mca/topo/example/Makefile.am +++ b/ompi/mca/topo/example/Makefile.am @@ -5,16 +5,16 @@ # Copyright (c) 2004-2013 The University of Tennessee and The University # of Tennessee Research Foundation. All rights # reserved. -# Copyright (c) 2004-2009 High Performance Computing Center Stuttgart, +# Copyright (c) 2004-2009 High Performance Computing Center Stuttgart, # University of Stuttgart. All rights reserved. # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. # Copyright (c) 2009 Cisco Systems, Inc. All rights reserved. # Copyright (c) 2012-2013 Inria. All rights reserved. # $COPYRIGHT$ -# +# # Additional copyrights may follow -# +# # $HEADER$ # @@ -31,7 +31,7 @@ sources = \ if MCA_BUILD_ompi_topo_example_DSO lib = -lib_sources = +lib_sources = component = mca_topo_example.la component_sources = $(sources) else diff --git a/ompi/mca/topo/example/topo_example.h b/ompi/mca/topo/example/topo_example.h index ca7b24315fd..9d14e89d1d9 100644 --- a/ompi/mca/topo/example/topo_example.h +++ b/ompi/mca/topo/example/topo_example.h @@ -5,16 +5,16 @@ * Copyright (c) 2004-2013 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2009 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2012-2013 Inria. All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -42,10 +42,10 @@ BEGIN_C_DECLS /* * Public component instance */ -OMPI_MODULE_DECLSPEC extern mca_topo_base_component_2_2_0_t +OMPI_MODULE_DECLSPEC extern mca_topo_base_component_2_2_0_t mca_topo_example_component; -/* +/* * A unique module class for the module so that we can both cache * module-specific information on the module and have a * module-specific constructor and destructor. @@ -62,7 +62,7 @@ OBJ_CLASS_DECLARATION(mca_topo_example_module_t); /* * Module functions - */ + */ int mca_topo_example_cart_map(struct ompi_communicator_t *comm, int ndims, @@ -79,8 +79,8 @@ int mca_topo_example_graph_map(struct ompi_communicator_t *comm, * ****************************************************************** * ************ functions implemented in this module end ************ * ****************************************************************** - */ - + */ + END_C_DECLS #endif /* MCA_TOPO_EXAMPLE_H */ diff --git a/ompi/mca/topo/example/topo_example_cart_map.c b/ompi/mca/topo/example/topo_example_cart_map.c index eb693142a67..4c78a75348b 100644 --- a/ompi/mca/topo/example/topo_example_cart_map.c +++ b/ompi/mca/topo/example/topo_example_cart_map.c @@ -5,18 +5,18 @@ * Copyright (c) 2004-2013 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2009 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2012-2013 Inria. All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ - */ + */ #include "ompi_config.h" #include "ompi/mca/topo/example/topo_example.h" @@ -30,13 +30,13 @@ * @param ndims number of dimensions of cartesian structure (integer) * @param dims integer array of size 'ndims' specifying the number of * processes in each coordinate direction - * @param periods logical array of size 'ndims' specifying the + * @param periods logical array of size 'ndims' specifying the * periodicity specification in each coordinate direction - * @param newrank reordered rank of the calling process; 'MPI_UNDEFINED' + * @param newrank reordered rank of the calling process; 'MPI_UNDEFINED' * if calling process does not belong to grid (integer) * - * @retval MPI_SUCCESS - * @retval MPI_ERR_DIMS + * @retval MPI_SUCCESS + * @retval MPI_ERR_DIMS */ int mca_topo_example_cart_map (ompi_communicator_t* comm, diff --git a/ompi/mca/topo/example/topo_example_component.c b/ompi/mca/topo/example/topo_example_component.c index 2e09f3e3a33..3874a89b7bc 100644 --- a/ompi/mca/topo/example/topo_example_component.c +++ b/ompi/mca/topo/example/topo_example_component.c @@ -6,7 +6,7 @@ * Copyright (c) 2004-2013 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. @@ -15,9 +15,9 @@ * Copyright (c) 2015 Los Alamos National Security, LLC. All rights * reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -27,7 +27,7 @@ /* * Public string showing the topo example module version number */ -const char *mca_topo_example_component_version_string = +const char *mca_topo_example_component_version_string = "Open MPI example topology MCA component version" OMPI_VERSION; /* @@ -40,7 +40,7 @@ comm_query(const ompi_communicator_t *comm, int *priority, uint32_t type); /* * Public component structure */ -mca_topo_base_component_2_2_0_t mca_topo_example_component = +mca_topo_base_component_2_2_0_t mca_topo_example_component = { .topoc_version = { MCA_TOPO_BASE_VERSION_2_2_0, @@ -64,9 +64,9 @@ mca_topo_base_component_2_2_0_t mca_topo_example_component = static int init_query(bool enable_progress_threads, bool enable_mpi_threads) { /* Nothing to do */ - + return OMPI_SUCCESS; -} +} static struct mca_topo_base_module_t * diff --git a/ompi/mca/topo/example/topo_example_graph_map.c b/ompi/mca/topo/example/topo_example_graph_map.c index dd7b2d0b551..e2fa095babd 100644 --- a/ompi/mca/topo/example/topo_example_graph_map.c +++ b/ompi/mca/topo/example/topo_example_graph_map.c @@ -5,18 +5,18 @@ * Copyright (c) 2004-2013 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2009 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2012-2013 Inria. All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ - */ + */ #include "ompi_config.h" #include "ompi/mca/topo/example/topo_example.h" @@ -31,12 +31,12 @@ * @param index integer array specifying the graph structure * @param edges integer array specifying the graph structure * @param newrank reordered rank of the calling process; 'MPI_UNDEFINED' - * if the calling process does not belong to + * if the calling process does not belong to * graph (integer) * * @retval MPI_SUCCESS * @retval MPI_UNDEFINED - */ + */ int mca_topo_example_graph_map (ompi_communicator_t* comm, int nnodes, @@ -51,8 +51,8 @@ int mca_topo_example_graph_map (ompi_communicator_t* comm, * component, start with a rank that is size-my_initial_rank. */ myrank = ompi_comm_size(comm) - 1 - ompi_comm_rank(comm); - *newrank = + *newrank = ((0 > myrank) || (myrank >= nnodes)) ? MPI_UNDEFINED : myrank; - + return OMPI_SUCCESS; } diff --git a/ompi/mca/topo/example/topo_example_module.c b/ompi/mca/topo/example/topo_example_module.c index 3f347273297..965c2aaf945 100644 --- a/ompi/mca/topo/example/topo_example_module.c +++ b/ompi/mca/topo/example/topo_example_module.c @@ -5,16 +5,16 @@ * Copyright (c) 2004-2013 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2008-2009 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2012-2013 Inria. All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ diff --git a/ompi/mca/topo/topo.h b/ompi/mca/topo/topo.h index e33cb66d250..d4460793b30 100644 --- a/ompi/mca/topo/topo.h +++ b/ompi/mca/topo/topo.h @@ -6,20 +6,20 @@ * Copyright (c) 2004-2013 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2009 Oak Ridge National Labs. All rights reserved. * Copyright (c) 2012-2013 Inria. All rights reserved. - * Copyright (c) 2014 Research Organization for Information Science + * Copyright (c) 2014-2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. * Copyright (c) 2015 Los Alamos National Security, LLC. All rights * reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -32,11 +32,11 @@ #include "opal/mca/base/base.h" #include "ompi/communicator/communicator.h" -/* Forward reference to ompi_proc_t */ +/* Forward reference to ompi_proc_t */ struct ompi_proc_t; typedef struct mca_topo_base_module_t mca_topo_base_module_t; -/* +/* * Initial component query, called during mca_topo_base_open. */ typedef int (*mca_topo_base_component_init_query_2_2_0_fn_t) @@ -48,19 +48,19 @@ typedef int (*mca_topo_base_component_init_query_2_2_0_fn_t) * creation. */ typedef struct mca_topo_base_module_t* -(*mca_topo_base_component_comm_query_2_2_0_fn_t) +(*mca_topo_base_component_comm_query_2_2_0_fn_t) (const ompi_communicator_t *comm, int *priority, uint32_t type); /* * Structure for topo v2.1.0 components.This is chained to MCA v2.0.0 - */ + */ typedef struct mca_topo_base_component_2_2_0_t { mca_base_component_t topoc_version; mca_base_component_data_t topoc_data; mca_topo_base_component_init_query_2_2_0_fn_t topoc_init_query; mca_topo_base_component_comm_query_2_2_0_fn_t topoc_comm_query; -} mca_topo_base_component_2_2_0_t; +} mca_topo_base_component_2_2_0_t; typedef mca_topo_base_component_2_2_0_t mca_topo_base_component_t; /* @@ -135,7 +135,7 @@ typedef mca_topo_base_comm_cgd_union_2_2_0_t mca_topo_base_comm_cgd_union_t; * topology function was called upon is provided as well, in order to have * a valid medium for messaging. In return from the *_create functions, * a new group of processes is expected one containing all processes in - * the local_group of the new communicator. Once this information + * the local_group of the new communicator. Once this information * returned the new communicator will be fully initialized and activated. */ @@ -144,10 +144,10 @@ typedef mca_topo_base_comm_cgd_union_2_2_0_t mca_topo_base_comm_cgd_union_t; */ /* Back end for MPI_CART_COORDS */ -typedef int (*mca_topo_base_module_cart_coords_fn_t) - (struct ompi_communicator_t *comm, - int rank, - int maxdims, +typedef int (*mca_topo_base_module_cart_coords_fn_t) + (struct ompi_communicator_t *comm, + int rank, + int maxdims, int *coords); /* Back end for MPI_CART_CREATE */ @@ -155,17 +155,17 @@ typedef int (*mca_topo_base_module_cart_create_fn_t) (mca_topo_base_module_t *topo_module, ompi_communicator_t* old_comm, int ndims, - int *dims, - int *periods, + const int *dims, + const int *periods, bool reorder, ompi_communicator_t** comm_topo); /* Back end for MPI_CART_GET */ typedef int (*mca_topo_base_module_cart_get_fn_t) - (struct ompi_communicator_t *comm, - int maxdims, + (struct ompi_communicator_t *comm, + int maxdims, int *dims, - int *periods, + int *periods, int *coords); /* Back end for MPI_CARTDIM_GET */ @@ -175,30 +175,30 @@ typedef int (*mca_topo_base_module_cartdim_get_fn_t) /* Back end for MPI_CART_MAP */ typedef int (*mca_topo_base_module_cart_map_fn_t) - (struct ompi_communicator_t *comm, - int ndims, - int *dims, - int *periods, + (struct ompi_communicator_t *comm, + int ndims, + const int *dims, + const int *periods, int *newrank); /* Back end for MPI_CART_RANK */ typedef int (*mca_topo_base_module_cart_rank_fn_t) - (struct ompi_communicator_t *comm, - int *coords, + (struct ompi_communicator_t *comm, + const int *coords, int *rank); /* Back end for MPI_CART_SHIFT */ typedef int (*mca_topo_base_module_cart_shift_fn_t) - (struct ompi_communicator_t *comm, - int direction, + (struct ompi_communicator_t *comm, + int direction, int disp, - int *rank_source, + int *rank_source, int *rank_dest); /* Back end for MPI_CART_SUB */ typedef int (*mca_topo_base_module_cart_sub_fn_t) - (struct ompi_communicator_t *comm, - int *remain_dims, + (struct ompi_communicator_t *comm, + const int *remain_dims, struct ompi_communicator_t ** new_comm); /* Back end for MPI_GRAPH_CREATE */ @@ -206,64 +206,64 @@ typedef int (*mca_topo_base_module_graph_create_fn_t) (mca_topo_base_module_t *topo_module, ompi_communicator_t* old_comm, int nnodes, - int *index, - int *edges, + const int *index, + const int *edges, bool reorder, ompi_communicator_t** new_comm); /* Back end for MPI_GRAPH_GET */ typedef int (*mca_topo_base_module_graph_get_fn_t) - (struct ompi_communicator_t *comm, - int maxindex, - int maxedges, - int *index, + (struct ompi_communicator_t *comm, + int maxindex, + int maxedges, + int *index, int *edges); /* Back end for MPI_GRAPH_MAP */ typedef int (*mca_topo_base_module_graph_map_fn_t) - (struct ompi_communicator_t *comm, - int nnodes, - int *index, - int *edges, + (struct ompi_communicator_t *comm, + int nnodes, + const int *index, + const int *edges, int *newrank); /* Back end for MPI_GRAPHDIMS_GET */ typedef int (*mca_topo_base_module_graphdims_get_fn_t) - (struct ompi_communicator_t *comm, - int *nnodes, + (struct ompi_communicator_t *comm, + int *nnodes, int *nnedges); /* Back end for MPI_GRAPH_NEIGHBORS */ typedef int (*mca_topo_base_module_graph_neighbors_fn_t) - (struct ompi_communicator_t *comm, - int rank, - int maxneighbors, + (struct ompi_communicator_t *comm, + int rank, + int maxneighbors, int *neighbors); /* Back end for MPI_GRAPH_NEIGHBORS_COUNT */ typedef int (*mca_topo_base_module_graph_neighbors_count_fn_t) - (struct ompi_communicator_t *comm, - int rank, + (struct ompi_communicator_t *comm, + int rank, int *nneighbors); /* Back end for MPI_DIST_GRAPH_CREATE */ typedef int (*mca_topo_base_module_dist_graph_create_fn_t) (struct mca_topo_base_module_t* module, - struct ompi_communicator_t *old_comm, - int n, int nodes[], - int degrees[], int targets[], int weights[], - struct ompi_info_t *info, int reorder, + struct ompi_communicator_t *old_comm, + int n, const int nodes[], + const int degrees[], const int targets[], const int weights[], + struct ompi_info_t *info, int reorder, struct ompi_communicator_t **new_comm); /* Back end for MPI_DIST_GRAPH_CREATE_ADJACENT */ typedef int (*mca_topo_base_module_dist_graph_create_adjacent_fn_t) (struct mca_topo_base_module_t* module, ompi_communicator_t *comm_old, - int indegree, int sources[], - int sourceweights[], + int indegree, const int sources[], + const int sourceweights[], int outdegree, - int destinations[], - int destweights[], + const int destinations[], + const int destweights[], struct ompi_info_t *info, int reorder, ompi_communicator_t **comm_dist_graph); @@ -274,7 +274,7 @@ typedef int (*mca_topo_base_module_dist_graph_neighbors_fn_t) int sources[], int sourceweights[], int maxoutdegree, int destinations[], int destweights[]); - + /* Back end for MPI_DIST_GRAPH_NEIGHBORS_COUNT */ typedef int (*mca_topo_base_module_dist_graph_neighbors_count_fn_t) (struct ompi_communicator_t *comm, diff --git a/ompi/mca/vprotocol/Makefile.am b/ompi/mca/vprotocol/Makefile.am index fe3e2c32352..da01c34eb11 100644 --- a/ompi/mca/vprotocol/Makefile.am +++ b/ompi/mca/vprotocol/Makefile.am @@ -4,9 +4,9 @@ # reserved. # Copyright (c) 2010 Cisco Systems, Inc. All rights reserved. # $COPYRIGHT$ -# +# # Additional copyrights may follow -# +# # $HEADER$ # @@ -28,4 +28,4 @@ endif include base/Makefile.am distclean-local: - rm -f base/static-components.h + rm -f base/static-components.h diff --git a/ompi/mca/vprotocol/base/Makefile.am b/ompi/mca/vprotocol/base/Makefile.am index 1690782ec67..f21ef94e8ca 100644 --- a/ompi/mca/vprotocol/base/Makefile.am +++ b/ompi/mca/vprotocol/base/Makefile.am @@ -3,9 +3,9 @@ # of Tennessee Research Foundation. All rights # reserved. # $COPYRIGHT$ -# +# # Additional copyrights may follow -# +# # $HEADER$ # @@ -17,4 +17,4 @@ libmca_vprotocol_la_SOURCES += \ base/vprotocol_base.c \ base/vprotocol_base_select.c \ base/vprotocol_base_parasite.c \ - base/vprotocol_base_request.c + base/vprotocol_base_request.c diff --git a/ompi/mca/vprotocol/base/base.h b/ompi/mca/vprotocol/base/base.h index 76235899130..c976244751a 100644 --- a/ompi/mca/vprotocol/base/base.h +++ b/ompi/mca/vprotocol/base/base.h @@ -3,7 +3,7 @@ * Copyright (c) 2004-2007 The Trustees of the University of Tennessee. * All rights reserved. * Copyright (c) 2010 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2012-2015 Los Alamos National Security, Inc. All rights reserved. + * Copyright (c) 2012-2015 Los Alamos National Security, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -17,6 +17,7 @@ #include "ompi_config.h" #include "ompi/constants.h" #include "ompi/mca/mca.h" +#include "opal/mca/base/mca_base_framework.h" #include "ompi/mca/vprotocol/vprotocol.h" BEGIN_C_DECLS @@ -38,37 +39,37 @@ OMPI_DECLSPEC extern mca_pml_v_t mca_pml_v; */ OMPI_DECLSPEC extern mca_base_framework_t ompi_vprotocol_base_framework; -/* this needs to be called before vprotocol is opened. this replaces the +/* this needs to be called before vprotocol is opened. this replaces the need for a unique open function */ void mca_vprotocol_base_set_include_list(char *vprotocol_include_list); /* select a component */ -OMPI_DECLSPEC int mca_vprotocol_base_select(bool enable_progress_threads, +OMPI_DECLSPEC int mca_vprotocol_base_select(bool enable_progress_threads, bool enable_mpi_threads); OMPI_DECLSPEC int mca_vprotocol_base_parasite(void); - + OMPI_DECLSPEC extern char *mca_vprotocol_base_include_list; OMPI_DECLSPEC extern mca_vprotocol_base_component_t mca_vprotocol_component; OMPI_DECLSPEC extern mca_vprotocol_base_module_t mca_vprotocol; - + /* Macro for use in components that are of type vprotocol */ #define MCA_VPROTOCOL_BASE_VERSION_2_0_0 \ OMPI_MCA_BASE_VERSION_2_1_0("vprotocol", 2, 0, 0) - -/* Macro to mark an invalid component version (0.0.0). Any component showing - * that version number will be ignored. + +/* Macro to mark an invalid component version (0.0.0). Any component showing + * that version number will be ignored. */ #define MCA_VPROTOCOL_BASE_VERSION_0_0_0 \ /* vprotocol v0.0 is chained to MCA v2.0 */ \ OMPI_MCA_BASE_VERSION_2_1_0("vprotocol", 0, 0, 0) - + #define mca_vprotocol_base_selected() ( \ 0 != mca_vprotocol_component.pmlm_version.mca_type_major_version \ ) - + END_C_DECLS #endif /* __INCLUDE_VPROTOCOL_BASE_H_ */ diff --git a/ompi/mca/vprotocol/base/vprotocol_base.c b/ompi/mca/vprotocol/base/vprotocol_base.c index f8f8e77d219..262255d5cd2 100644 --- a/ompi/mca/vprotocol/base/vprotocol_base.c +++ b/ompi/mca/vprotocol/base/vprotocol_base.c @@ -2,7 +2,7 @@ * Copyright (c) 2004-2007 The Trustees of the University of Tennessee. * All rights reserved. * Copyright (c) 2009 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2012-2013 Los Alamos National Security, Inc. All rights reserved. + * Copyright (c) 2012-2013 Los Alamos National Security, Inc. All rights reserved. * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ @@ -21,9 +21,9 @@ char *mca_vprotocol_base_include_list = NULL; mca_pml_v_t mca_pml_v = {-1, 0, 0}; -/* Load any vprotocol MCA component and call open function of all those +/* Load any vprotocol MCA component and call open function of all those * components. - * + * * Also fill the mca_vprotocol_base_include_list with components that exists */ diff --git a/ompi/mca/vprotocol/base/vprotocol_base_parasite.c b/ompi/mca/vprotocol/base/vprotocol_base_parasite.c index 034b319882e..699972f4136 100644 --- a/ompi/mca/vprotocol/base/vprotocol_base_parasite.c +++ b/ompi/mca/vprotocol/base/vprotocol_base_parasite.c @@ -13,35 +13,35 @@ #include "vprotocol_base_request.h" int mca_vprotocol_base_parasite(void) { - if(mca_vprotocol.add_procs) + if(mca_vprotocol.add_procs) mca_pml.pml_add_procs = mca_vprotocol.add_procs; - if(mca_vprotocol.del_procs) + if(mca_vprotocol.del_procs) mca_pml.pml_del_procs = mca_vprotocol.del_procs; if(mca_vprotocol.progress) mca_pml.pml_progress = mca_vprotocol.progress; - if(mca_vprotocol.add_comm) + if(mca_vprotocol.add_comm) mca_pml.pml_add_comm = mca_vprotocol.add_comm; - if(mca_vprotocol.del_comm) + if(mca_vprotocol.del_comm) mca_pml.pml_del_comm = mca_vprotocol.del_comm; if(mca_vprotocol.irecv_init) mca_pml.pml_irecv_init = mca_vprotocol.irecv_init; - if(mca_vprotocol.irecv) + if(mca_vprotocol.irecv) mca_pml.pml_irecv = mca_vprotocol.irecv; - if(mca_vprotocol.recv) + if(mca_vprotocol.recv) mca_pml.pml_recv = mca_vprotocol.recv; - if(mca_vprotocol.isend_init) + if(mca_vprotocol.isend_init) mca_pml.pml_isend_init = mca_vprotocol.isend_init; - if(mca_vprotocol.isend) + if(mca_vprotocol.isend) mca_pml.pml_isend = mca_vprotocol.isend; - if(mca_vprotocol.send) + if(mca_vprotocol.send) mca_pml.pml_send = mca_vprotocol.send; - if(mca_vprotocol.iprobe) + if(mca_vprotocol.iprobe) mca_pml.pml_iprobe = mca_vprotocol.iprobe; - if(mca_vprotocol.probe) + if(mca_vprotocol.probe) mca_pml.pml_probe = mca_vprotocol.probe; - if(mca_vprotocol.start) + if(mca_vprotocol.start) mca_pml.pml_start = mca_vprotocol.start; - if(mca_vprotocol.dump) + if(mca_vprotocol.dump) mca_pml.pml_dump = mca_vprotocol.dump; if(mca_vprotocol.wait) ompi_request_functions.req_wait = mca_vprotocol.wait; diff --git a/ompi/mca/vprotocol/base/vprotocol_base_request.c b/ompi/mca/vprotocol/base/vprotocol_base_request.c index 5a672d5240a..a4357103fb8 100644 --- a/ompi/mca/vprotocol/base/vprotocol_base_request.c +++ b/ompi/mca/vprotocol/base/vprotocol_base_request.c @@ -15,19 +15,19 @@ #include "vprotocol_base_request.h" #include "ompi/mca/pml/v/pml_v_output.h" -int mca_vprotocol_base_request_parasite(void) +int mca_vprotocol_base_request_parasite(void) { int ret; - + if(mca_vprotocol.req_recv_class) { opal_free_list_t pml_fl_save = mca_pml_base_recv_requests; - mca_pml_v.host_pml_req_recv_size = + mca_pml_v.host_pml_req_recv_size = pml_fl_save.fl_frag_class->cls_sizeof; V_OUTPUT_VERBOSE(300, "req_rebuild: recv\tsize %lu+%lu\talignment=%lu", (unsigned long) mca_pml_v.host_pml_req_recv_size, (unsigned long) mca_vprotocol.req_recv_class->cls_sizeof, (unsigned long) pml_fl_save.fl_frag_alignment); - mca_vprotocol.req_recv_class->cls_parent = - pml_fl_save.fl_frag_class; - mca_vprotocol.req_recv_class->cls_sizeof += + mca_vprotocol.req_recv_class->cls_parent = + pml_fl_save.fl_frag_class; + mca_vprotocol.req_recv_class->cls_sizeof += pml_fl_save.fl_frag_class->cls_sizeof; /* rebuild the requests free list with the right size */ OBJ_DESTRUCT(&mca_pml_base_recv_requests); @@ -42,8 +42,8 @@ int mca_vprotocol_base_request_parasite(void) pml_fl_save.fl_max_to_alloc, pml_fl_save.fl_num_per_alloc, pml_fl_save.fl_mpool, - pml_fl_save.fl_mpool_reg_flags, - 0, + pml_fl_save.fl_rcache_reg_flags, + pml_fl_save.fl_rcache, pml_fl_save.item_init, pml_fl_save.ctx); if(OMPI_SUCCESS != ret) return ret; @@ -51,12 +51,12 @@ int mca_vprotocol_base_request_parasite(void) if(mca_vprotocol.req_send_class) { opal_free_list_t pml_fl_save = mca_pml_base_send_requests; - mca_pml_v.host_pml_req_send_size = + mca_pml_v.host_pml_req_send_size = pml_fl_save.fl_frag_class->cls_sizeof; V_OUTPUT_VERBOSE(300, "req_rebuild: send\tsize %lu+%lu\talignment=%lu", (unsigned long) mca_pml_v.host_pml_req_send_size, (unsigned long) mca_vprotocol.req_send_class->cls_sizeof, (unsigned long) pml_fl_save.fl_frag_alignment); - mca_vprotocol.req_send_class->cls_parent = - pml_fl_save.fl_frag_class; - mca_vprotocol.req_send_class->cls_sizeof += + mca_vprotocol.req_send_class->cls_parent = + pml_fl_save.fl_frag_class; + mca_vprotocol.req_send_class->cls_sizeof += pml_fl_save.fl_frag_class->cls_sizeof; /* rebuild the requests free list with the right size */ OBJ_DESTRUCT(&mca_pml_base_send_requests); @@ -71,12 +71,12 @@ int mca_vprotocol_base_request_parasite(void) pml_fl_save.fl_max_to_alloc, pml_fl_save.fl_num_per_alloc, pml_fl_save.fl_mpool, - pml_fl_save.fl_mpool_reg_flags, - 0, + pml_fl_save.fl_rcache_reg_flags, + pml_fl_save.fl_rcache, pml_fl_save.item_init, pml_fl_save.ctx); if(OMPI_SUCCESS != ret) return ret; } return OMPI_SUCCESS; - + } diff --git a/ompi/mca/vprotocol/base/vprotocol_base_request.h b/ompi/mca/vprotocol/base/vprotocol_base_request.h index 0262de95c29..231dc44d491 100644 --- a/ompi/mca/vprotocol/base/vprotocol_base_request.h +++ b/ompi/mca/vprotocol/base/vprotocol_base_request.h @@ -19,23 +19,23 @@ BEGIN_C_DECLS /** Rebuild the PML requests pools to make room for extra space at end of each - * request. - * The extra data is allocated in each requests so that it can hold instances - * of the req_recv_class and req_send_class fields of the + * request. + * The extra data is allocated in each requests so that it can hold instances + * of the req_recv_class and req_send_class fields of the * mca_vprotocol_base_module_t. If those fields are NULL the requests are not * recreated. * @return OMPI_SUCCESS or failure status */ -OMPI_DECLSPEC int mca_vprotocol_base_request_parasite(void); +OMPI_DECLSPEC int mca_vprotocol_base_request_parasite(void); + - /** Gives the actual address of the protocol specific part of a recv request. * @param req (IN) the address of an ompi_request. * @return address of the custom vprotocol data associated with the request. */ #define VPROTOCOL_RECV_FTREQ(req) \ (((uintptr_t) req) + mca_pml_v.host_pml_req_recv_size) - + /** Gives the address of the real request associated with a protocol specific * send request. * @param ftreq (IN) the address of a protocol specific request. @@ -44,7 +44,7 @@ OMPI_DECLSPEC int mca_vprotocol_base_request_parasite(void); #define VPROTOCOL_RECV_REQ(ftreq) \ ((mca_pml_base_recv_request_t *) \ (((uintptr_t) ftreq) - mca_pml_v.host_pml_req_send_size)) - + /** Gives the actual address of the protocol specific part of a send request. * @param req (IN) the address of an ompi_request. * @return address of the custom vprotocol data associated with the request. @@ -60,8 +60,8 @@ OMPI_DECLSPEC int mca_vprotocol_base_request_parasite(void); #define VPROTOCOL_SEND_REQ(ftreq) \ ((mca_pml_base_send_request_t *) \ (((uintptr_t) ftreq) - mca_pml_v.host_pml_req_send_size)) - -/** Unified macro to get the actual address of the protocol specific part of + +/** Unified macro to get the actual address of the protocol specific part of * an send - or - recv request. * @param request (IN) the address of an ompi_request. * @return address of the custom vprotocol data associated with the request. @@ -76,7 +76,7 @@ OMPI_DECLSPEC int mca_vprotocol_base_request_parasite(void); : VPROTOCOL_RECV_FTREQ(req) \ ) \ ) - + END_C_DECLS #endif /* __INCLUDE_VPROTOCOL_REQUEST_H_ */ diff --git a/ompi/mca/vprotocol/base/vprotocol_base_select.c b/ompi/mca/vprotocol/base/vprotocol_base_select.c index 99baa9606e9..47e8ea6190d 100644 --- a/ompi/mca/vprotocol/base/vprotocol_base_select.c +++ b/ompi/mca/vprotocol/base/vprotocol_base_select.c @@ -1,7 +1,7 @@ /* * Copyright (c) 2004-2007 The Trustees of the University of Tennessee. * All rights reserved. - * Copyright (c) 2012-2013 Los Alamos National Security, Inc. All rights reserved. + * Copyright (c) 2012-2013 Los Alamos National Security, Inc. All rights reserved. * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ @@ -13,9 +13,7 @@ #include "ompi_config.h" -#ifdef HAVE_STRING_H #include -#endif #include "base.h" #include "ompi/mca/mca.h" @@ -24,7 +22,7 @@ mca_vprotocol_base_module_t mca_vprotocol = {0}; mca_vprotocol_base_component_t mca_vprotocol_component = { - {MCA_VPROTOCOL_BASE_VERSION_0_0_0} /* Initialized with invalid version */ + {MCA_VPROTOCOL_BASE_VERSION_0_0_0} /* Initialized with invalid version */ }; typedef struct opened_component_t { @@ -43,7 +41,7 @@ typedef struct opened_component_t { * will have all of its function pointers saved and returned to the * caller. */ -int mca_vprotocol_base_select(bool enable_progress_threads, +int mca_vprotocol_base_select(bool enable_progress_threads, bool enable_mpi_threads) { int priority = 0, best_priority = -1; @@ -53,20 +51,20 @@ int mca_vprotocol_base_select(bool enable_progress_threads, mca_vprotocol_base_module_t *module = NULL, *best_module = NULL; opal_list_t opened; opened_component_t *om = NULL; - + /* Traverse the list of available components; call their init functions. */ OBJ_CONSTRUCT(&opened, opal_list_t); - OPAL_LIST_FOREACH(cli, &ompi_vprotocol_base_framework.framework_components, mca_base_component_list_item_t) + OPAL_LIST_FOREACH(cli, &ompi_vprotocol_base_framework.framework_components, mca_base_component_list_item_t) { component = (mca_vprotocol_base_component_t *) cli->cli_component; if (NULL == mca_vprotocol_base_include_list) { continue; - } + } V_OUTPUT_VERBOSE(500, "vprotocol select: initializing %s component %s", component->pmlm_version.mca_type_name, component->pmlm_version.mca_component_name); - if(strcmp(component->pmlm_version.mca_component_name, + if(strcmp(component->pmlm_version.mca_component_name, mca_vprotocol_base_include_list)) { V_OUTPUT_VERBOSE(500, "This component is not in the include list: skipping %s", component->pmlm_version.mca_component_name); continue; @@ -79,37 +77,37 @@ int mca_vprotocol_base_select(bool enable_progress_threads, if (NULL == module) { V_OUTPUT_VERBOSE(2, "vprotocol select: init returned failure for component %s", component->pmlm_version.mca_component_name); continue; - } + } V_OUTPUT_VERBOSE(500, "vprotocol select: component %s init returned priority %d", component->pmlm_version.mca_component_name, priority); - if (priority > best_priority) + if (priority > best_priority) { best_priority = priority; best_component = component; best_module = module; } - + om = (opened_component_t *) malloc(sizeof(opened_component_t)); if (NULL == om) return OMPI_ERR_OUT_OF_RESOURCE; OBJ_CONSTRUCT(om, opal_list_item_t); om->om_component = component; opal_list_append(&opened, (opal_list_item_t*) om); } - + /* Finished querying all components. Check for the bozo case. */ if (NULL == best_component) { V_OUTPUT_VERBOSE(2, "vprotocol select: no protocol has returned a positive priority, fault tolerance is OFF"); - } - else + } + else { /* Save the winner */ mca_vprotocol_component = *best_component; mca_vprotocol = *best_module; } - + /* Finalize all non-selected components */ for (item = opal_list_remove_first(&opened); NULL != item; - item = opal_list_remove_first(&opened)) + item = opal_list_remove_first(&opened)) { om = (opened_component_t *) item; if (om->om_component != best_component) { @@ -125,17 +123,17 @@ int mca_vprotocol_base_select(bool enable_progress_threads, OBJ_DESTRUCT(om); free(om); } - - mca_base_components_close(mca_pml_v.output, - &ompi_vprotocol_base_framework.framework_components, + + mca_base_components_close(mca_pml_v.output, + &ompi_vprotocol_base_framework.framework_components, (mca_base_component_t *) best_component); - + /* All done */ - if(best_component != NULL) + if(best_component != NULL) { V_OUTPUT_VERBOSE(500, "vprotocol select: component %s selected", mca_vprotocol_component.pmlm_version.mca_component_name); return OMPI_SUCCESS; } - else + else return OMPI_ERR_NOT_FOUND; } diff --git a/ompi/mca/vprotocol/example/Makefile.am b/ompi/mca/vprotocol/example/Makefile.am index 792904559eb..fff5e295ef3 100644 --- a/ompi/mca/vprotocol/example/Makefile.am +++ b/ompi/mca/vprotocol/example/Makefile.am @@ -2,9 +2,9 @@ # Copyright (c) 2004-2007 The Trustees of the University of Tennessee. # All rights reserved. # $COPYRIGHT$ -# +# # Additional copyrights may follow -# +# # $HEADER$ # @@ -13,7 +13,7 @@ # (for static builds). if MCA_BUILD_ompi_vprotocol_example_DSO -component_noinst = +component_noinst = component_install = mca_vprotocol_example.la else component_noinst = libmca_vprotocol_example.la @@ -32,16 +32,16 @@ local_sources = \ vprotocol_example_send.c \ vprotocol_example_probe.c \ vprotocol_example_wait.h \ - vprotocol_example_wait.c + vprotocol_example_wait.c mcacomponentdir = $(ompilibdir) mcacomponent_LTLIBRARIES = $(component_install) mca_vprotocol_example_la_SOURCES = $(local_sources) -mca_vprotocol_example_la_LIBADD = +mca_vprotocol_example_la_LIBADD = mca_vprotocol_example_la_CFLAGS = mca_vprotocol_example_la_LDFLAGS = -module -avoid-version noinst_LTLIBRARIES = $(component_noinst) libmca_vprotocol_example_la_SOURCES = $(local_sources) -libmca_vprotocol_example_la_LIBADD = +libmca_vprotocol_example_la_LIBADD = libmca_vprotocol_example_la_CFLAGS = libmca_vprotocol_example_la_LDFLAGS = -module -avoid-version diff --git a/ompi/mca/vprotocol/example/vprotocol_example.c b/ompi/mca/vprotocol/example/vprotocol_example.c index f672ef1e95a..54bc78a3508 100644 --- a/ompi/mca/vprotocol/example/vprotocol_example.c +++ b/ompi/mca/vprotocol/example/vprotocol_example.c @@ -11,15 +11,15 @@ #include "ompi_config.h" #include "vprotocol_example.h" -mca_vprotocol_example_module_t mca_vprotocol_example = +mca_vprotocol_example_module_t mca_vprotocol_example = { - { + { /* mca_pml_base_module_add_procs_fn_t */ mca_vprotocol_example_add_procs, /* mca_pml_base_module_del_procs_fn_t */ mca_vprotocol_example_del_procs, /* mca_pml_base_module_enable_fn_t */ mca_vprotocol_example_enable, /* mca_pml_base_module_progress_fn_t */ mca_vprotocol_example_progress, - /* mca_pml_base_module_add_comm_fn_t */ mca_vprotocol_example_add_comm, + /* mca_pml_base_module_add_comm_fn_t */ mca_vprotocol_example_add_comm, /* mca_pml_base_module_del_comm_fn_t */ mca_vprotocol_example_del_comm, /* mca_pml_base_module_irecv_init_fn_t */ mca_vprotocol_example_irecv_init, /* mca_pml_base_module_irecv_fn_t */ mca_vprotocol_example_irecv, @@ -29,10 +29,10 @@ mca_vprotocol_example_module_t mca_vprotocol_example = /* mca_pml_base_module_send_fn_t */ mca_vprotocol_example_send, /* mca_pml_base_module_iprobe_fn_t */ mca_vprotocol_example_iprobe, /* mca_pml_base_module_probe_fn_t */ mca_vprotocol_example_probe, - /* mca_pml_base_module_start_fn_t */ mca_vprotocol_example_start, + /* mca_pml_base_module_start_fn_t */ mca_vprotocol_example_start, /* mca_pml_base_module_dump_fn_t */ mca_vprotocol_example_dump, - + /* opal_class_t * */ NULL, }, /** diff --git a/ompi/mca/vprotocol/example/vprotocol_example.h b/ompi/mca/vprotocol/example/vprotocol_example.h index 7b44a7212e6..98b119851a1 100644 --- a/ompi/mca/vprotocol/example/vprotocol_example.h +++ b/ompi/mca/vprotocol/example/vprotocol_example.h @@ -34,11 +34,11 @@ OMPI_DECLSPEC int mca_vprotocol_example_progress(void); OMPI_DECLSPEC int mca_vprotocol_example_add_comm(struct ompi_communicator_t* comm); OMPI_DECLSPEC int mca_vprotocol_example_del_comm(struct ompi_communicator_t* comm); -OMPI_DECLSPEC int mca_vprotocol_example_irecv_init(void *buf, - size_t count, - struct ompi_datatype_t *datatype, +OMPI_DECLSPEC int mca_vprotocol_example_irecv_init(void *buf, + size_t count, + struct ompi_datatype_t *datatype, int src, - int tag, + int tag, struct ompi_communicator_t* comm, struct ompi_request_t **request ); OMPI_DECLSPEC int mca_vprotocol_example_irecv(void *addr, diff --git a/ompi/mca/vprotocol/example/vprotocol_example_comm.c b/ompi/mca/vprotocol/example/vprotocol_example_comm.c index 647728913d7..bd14319e580 100644 --- a/ompi/mca/vprotocol/example/vprotocol_example_comm.c +++ b/ompi/mca/vprotocol/example/vprotocol_example_comm.c @@ -7,7 +7,7 @@ * * $HEADER$ */ - + #include "../pml_v.h" #include "vprotocol_example.h" diff --git a/ompi/mca/vprotocol/example/vprotocol_example_component.c b/ompi/mca/vprotocol/example/vprotocol_example_component.c index 96caacea9ab..610171bb647 100644 --- a/ompi/mca/vprotocol/example/vprotocol_example_component.c +++ b/ompi/mca/vprotocol/example/vprotocol_example_component.c @@ -29,7 +29,7 @@ static int mca_vprotocol_example_component_finalize(void); static int _priority; -mca_pml_v_protocol_base_component_2_0_0_t mca_vprotocol_example_component = +mca_pml_v_protocol_base_component_2_0_0_t mca_vprotocol_example_component = { /* First, the mca_base_component_t struct containing meta * information about the component itself */ @@ -53,7 +53,7 @@ mca_pml_v_protocol_base_component_2_0_0_t mca_vprotocol_example_component = /** MCA level functions */ - + int mca_vprotocol_example_component_open(void) { _priority = mca_param_register_int( "priority", -1); @@ -69,7 +69,7 @@ int mca_vprotocol_example_component_close(void) /** VPROTOCOL level functions (same as PML one) */ - + mca_pml_v_protocol_base_module_t *mca_vprotocol_example_component_init( int* priority, bool enable_progress_threads, bool enable_mpi_threads) @@ -78,28 +78,28 @@ mca_pml_v_protocol_base_module_t *mca_vprotocol_example_component_init( int* pri *priority = _priority; /** - * Some protocols requires sanity check about thread support (those making piecewise deterministic assumption) + * Some protocols requires sanity check about thread support (those making piecewise deterministic assumption) if(enable_mpi_threads) { OPAL_OUTPUT_VERBOSE( mca_pml_v_verbose, mca_pml_v_output, "vprotocol_example.init: threads are enabled, and not supported by vprotocol example fault tolerant layer, will not load")); return NULL; } */ - + /** * Insert your own protocol initialization here */ return &mca_vprotocol_example.super; } - + int mca_vprotocol_example_component_finalize(void) { V_OUTPUT_VERBOSE(10, "vprotocol_example_finalize"); - + /** * Insert your own garbage collecting here */ - + return OMPI_SUCCESS; } diff --git a/ompi/mca/vprotocol/example/vprotocol_example_probe.c b/ompi/mca/vprotocol/example/vprotocol_example_probe.c index 69ede2cfd5c..87b6de4caee 100644 --- a/ompi/mca/vprotocol/example/vprotocol_example_probe.c +++ b/ompi/mca/vprotocol/example/vprotocol_example_probe.c @@ -19,11 +19,11 @@ int mca_vprotocol_example_probe( int src, int tag, V_OUTPUT_VERBOSE(50, "mca_vprotocol_example_probe(%d, %d, %d)", src, tag, comm->c_contextid); return mca_pml_v.host_pml.pml_probe(src, tag, comm, status); } - + int mca_vprotocol_example_iprobe( int src, int tag, struct ompi_communicator_t *comm, int *matched, ompi_status_public_t * status ) -{ +{ V_OUTPUT_VERBOSE(60, "mca_vprotocol_example_iprobe(%d, %d, %d)", src, tag, comm->c_contextid); return mca_pml_v.host_pml.pml_iprobe(src, tag, comm, matched, status); } diff --git a/ompi/mca/vprotocol/example/vprotocol_example_recv.c b/ompi/mca/vprotocol/example/vprotocol_example_recv.c index 0506ad5af0f..349e340270a 100644 --- a/ompi/mca/vprotocol/example/vprotocol_example_recv.c +++ b/ompi/mca/vprotocol/example/vprotocol_example_recv.c @@ -15,22 +15,22 @@ #include "../pml_v.h" #include "vprotocol_example.h" -int mca_vprotocol_example_irecv_init(void *addr, - size_t count, - struct ompi_datatype_t *datatype, +int mca_vprotocol_example_irecv_init(void *addr, + size_t count, + struct ompi_datatype_t *datatype, int src, - int tag, + int tag, struct ompi_communicator_t* comm, struct ompi_request_t **request ) { int ret; - + ret = mca_pml_v.host_pml.pml_irecv_init(addr, count, datatype, src, tag, comm, request); V_OUTPUT_VERBOSE(50, "posted\tirecv_init %ld\tcomm %d\tfrom %d\ttag %d\tsize %ld", ((mca_pml_base_request_t *)*request)->req_sequence, comm->c_contextid, src, tag, (long) count); return ret; } -int mca_vprotocol_example_irecv(void *addr, +int mca_vprotocol_example_irecv(void *addr, size_t count, ompi_datatype_t * datatype, int src, @@ -39,7 +39,7 @@ int mca_vprotocol_example_irecv(void *addr, struct ompi_request_t **request) { int ret; - + ret = mca_pml_v.host_pml.pml_irecv(addr, count, datatype, src, tag, comm, request); V_OUTPUT_VERBOSE(50, "posted\tirecv %ld\tcomm %d\tfrom %d\ttag %d\tsize %ld", ((mca_pml_base_request_t *)*request)->req_sequence, comm->c_contextid, src, tag, (long) count); return ret; @@ -54,12 +54,12 @@ int mca_vprotocol_example_recv(void *addr, ompi_status_public_t * status ) { int ret; - V_OUTPUT_VERBOSE(50, "posted\trecv \tcomm %d\tfrom %d\ttag %d\tsize %ld", comm->c_contextid, src, tag, (long) count); + V_OUTPUT_VERBOSE(50, "posted\trecv \tcomm %d\tfrom %d\ttag %d\tsize %ld", comm->c_contextid, src, tag, (long) count); ret = mca_pml_v.host_pml.pml_recv(addr, count, datatype, src, tag, comm, status); # ifdef OPAL_ENABLE_DEBUG if(status) V_OUTPUT_VERBOSE(75, "deliver\trecv \tcomm %d\tfrom %d(%d)\ttag %d(%d)\tsize %ld(%ld)\tstatus %d", comm->c_contextid, src, status->MPI_SOURCE, tag, status->MPI_TAG, (long) count, (long) status->_ucount, status->MPI_ERROR); - else + else V_OUTPUT_VERBOSE(75, "deliver\trecv \tcomm %d\tfrom %d\ttag %d\tsize %ld", comm->c_contextid, src, tag, (long) count); # endif return ret; diff --git a/ompi/mca/vprotocol/example/vprotocol_example_send.c b/ompi/mca/vprotocol/example/vprotocol_example_send.c index 22c769ddc78..28540e8bc58 100644 --- a/ompi/mca/vprotocol/example/vprotocol_example_send.c +++ b/ompi/mca/vprotocol/example/vprotocol_example_send.c @@ -12,12 +12,12 @@ #include "../pml_v.h" #include "vprotocol_example.h" -int mca_vprotocol_example_isend_init(void *addr, - size_t count, - struct ompi_datatype_t *datatype, +int mca_vprotocol_example_isend_init(void *addr, + size_t count, + struct ompi_datatype_t *datatype, int dst, - int tag, - mca_pml_base_send_mode_t sendmode, + int tag, + mca_pml_base_send_mode_t sendmode, struct ompi_communicator_t* comm, struct ompi_request_t **request ) { @@ -25,12 +25,12 @@ int mca_vprotocol_example_isend_init(void *addr, return mca_pml_v.host_pml.pml_isend_init(addr, count, datatype, dst, tag, sendmode, comm, request); } -int mca_vprotocol_example_isend(void *addr, +int mca_vprotocol_example_isend(void *addr, size_t count, ompi_datatype_t * datatype, int dst, int tag, - mca_pml_base_send_mode_t sendmode, + mca_pml_base_send_mode_t sendmode, struct ompi_communicator_t *comm, struct ompi_request_t **request) { diff --git a/ompi/mca/vprotocol/example/vprotocol_example_start.c b/ompi/mca/vprotocol/example/vprotocol_example_start.c index 77c5c14229e..e9a6d25d2d5 100644 --- a/ompi/mca/vprotocol/example/vprotocol_example_start.c +++ b/ompi/mca/vprotocol/example/vprotocol_example_start.c @@ -13,7 +13,7 @@ #include "vprotocol_example_start.h" OMPI_DECLSPEC int mca_vprotocol_example_start(size_t count, ompi_request_t **requests) -{ +{ V_OUTPUT_VERBOSE(50, "starting %ld requests", (long) count); return mca_pml_v.host_pml.pml_start(count, requests); } diff --git a/ompi/mca/vprotocol/pessimist/Makefile.am b/ompi/mca/vprotocol/pessimist/Makefile.am index ec6e8d6e569..9a1305b1f06 100644 --- a/ompi/mca/vprotocol/pessimist/Makefile.am +++ b/ompi/mca/vprotocol/pessimist/Makefile.am @@ -2,9 +2,9 @@ # Copyright (c) 2004-2007 The Trustees of the University of Tennessee. # All rights reserved. # $COPYRIGHT$ -# +# # Additional copyrights may follow -# +# # $HEADER$ # @@ -38,7 +38,7 @@ local_sources = \ vprotocol_pessimist_sender_based.c if MCA_BUILD_ompi_vprotocol_pessimist_DSO -component_noinst = +component_noinst = component_install = mca_vprotocol_pessimist.la else component_noinst = libmca_vprotocol_pessimist.la diff --git a/ompi/mca/vprotocol/pessimist/vprotocol_pessimist.c b/ompi/mca/vprotocol/pessimist/vprotocol_pessimist.c index 55040f4e023..b3ba707c746 100644 --- a/ompi/mca/vprotocol/pessimist/vprotocol_pessimist.c +++ b/ompi/mca/vprotocol/pessimist/vprotocol_pessimist.c @@ -10,9 +10,9 @@ #include "ompi_config.h" #include "vprotocol_pessimist.h" -mca_vprotocol_pessimist_module_t mca_vprotocol_pessimist = +mca_vprotocol_pessimist_module_t mca_vprotocol_pessimist = { - { + { /* mca_pml_base_module_add_procs_fn_t */ NULL, /* mca_pml_base_module_del_procs_fn_t */ NULL, /* mca_pml_base_module_enable_fn_f */ mca_vprotocol_pessimist_enable, @@ -31,7 +31,7 @@ mca_vprotocol_pessimist_module_t mca_vprotocol_pessimist = /* mca_pml_base_module_send_fn_t */ mca_vprotocol_pessimist_send, /* mca_pml_base_module_iprobe_fn_t */ mca_vprotocol_pessimist_iprobe, /* mca_pml_base_module_probe_fn_t */ mca_vprotocol_pessimist_probe, - /* mca_pml_base_module_start_fn_t */ mca_vprotocol_pessimist_start, + /* mca_pml_base_module_start_fn_t */ mca_vprotocol_pessimist_start, /* mca_pml_base_module_dump_fn_t */ mca_vprotocol_pessimist_dump, /* ompi_request_test_fn_t */ mca_vprotocol_pessimist_test, @@ -42,7 +42,7 @@ mca_vprotocol_pessimist_module_t mca_vprotocol_pessimist = /* ompi_request_waitany_fn_t */ mca_vprotocol_pessimist_wait_any, /* ompi_request_waitall_fn_t */ NULL, /* ompi_request_waitsome_fn_t */ mca_vprotocol_pessimist_wait_some, - + /* opal_class_t * */ OBJ_CLASS(mca_vprotocol_pessimist_recv_request_t), /* opal_class_t * */ OBJ_CLASS(mca_vprotocol_pessimist_send_request_t), }, diff --git a/ompi/mca/vprotocol/pessimist/vprotocol_pessimist.h b/ompi/mca/vprotocol/pessimist/vprotocol_pessimist.h index 1d5e72bfd8c..fec59cd6e34 100644 --- a/ompi/mca/vprotocol/pessimist/vprotocol_pessimist.h +++ b/ompi/mca/vprotocol/pessimist/vprotocol_pessimist.h @@ -4,6 +4,8 @@ * All rights reserved. * Copyright (c) 2015 Los Alamos National Security, LLC. All rights * reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -41,10 +43,10 @@ typedef struct mca_vprotocol_pessimist_module_t { /* space for allocating events */ opal_free_list_t events_pool; - + /* Sender Based repository */ vprotocol_pessimist_sender_based_t sender_based; - + /* replay mode variables */ bool replay; opal_list_t replay_events; @@ -77,7 +79,7 @@ int mca_vprotocol_pessimist_recv(void *addr, struct ompi_communicator_t *comm, ompi_status_public_t * status ); -int mca_vprotocol_pessimist_isend(void *buf, +int mca_vprotocol_pessimist_isend(const void *buf, size_t count, ompi_datatype_t* datatype, int dst, @@ -85,7 +87,7 @@ int mca_vprotocol_pessimist_isend(void *buf, mca_pml_base_send_mode_t sendmode, ompi_communicator_t* comm, ompi_request_t** request ); -int mca_vprotocol_pessimist_send(void *buf, +int mca_vprotocol_pessimist_send(const void *buf, size_t count, ompi_datatype_t* datatype, int dst, diff --git a/ompi/mca/vprotocol/pessimist/vprotocol_pessimist_component.c b/ompi/mca/vprotocol/pessimist/vprotocol_pessimist_component.c index 8a226855c84..6b944574cb8 100644 --- a/ompi/mca/vprotocol/pessimist/vprotocol_pessimist_component.c +++ b/ompi/mca/vprotocol/pessimist/vprotocol_pessimist_component.c @@ -31,7 +31,7 @@ static int _sender_based_size; static int _event_buffer_size; static char *_mmap_file_name; -mca_vprotocol_base_component_2_0_0_t mca_vprotocol_pessimist_component = +mca_vprotocol_base_component_2_0_0_t mca_vprotocol_pessimist_component = { /* First, the mca_base_component_t struct containing meta * information about the component itself */ @@ -114,7 +114,7 @@ static int mca_vprotocol_pessimist_component_close(void) static mca_vprotocol_base_module_t *mca_vprotocol_pessimist_component_init( int* priority, bool enable_progress_threads, bool enable_mpi_threads) -{ +{ V_OUTPUT_VERBOSE(500, "vprotocol_pessimist: component_init"); *priority = _priority; @@ -139,16 +139,16 @@ static mca_vprotocol_base_module_t *mca_vprotocol_pessimist_component_init( int* _free_list_max, _free_list_inc, NULL, 0, NULL, NULL, NULL); - mca_vprotocol_pessimist.event_buffer_max_length = + mca_vprotocol_pessimist.event_buffer_max_length = _event_buffer_size / sizeof(vprotocol_pessimist_mem_event_t); mca_vprotocol_pessimist.event_buffer_length = 0; - mca_vprotocol_pessimist.event_buffer = + mca_vprotocol_pessimist.event_buffer = (vprotocol_pessimist_mem_event_t *) malloc(_event_buffer_size); mca_vprotocol_pessimist.el_comm = MPI_COMM_NULL; - + return &mca_vprotocol_pessimist.super; } - + static int mca_vprotocol_pessimist_component_finalize(void) { V_OUTPUT_VERBOSE(500, "vprotocol_pessimist_finalize"); @@ -162,7 +162,7 @@ static int mca_vprotocol_pessimist_component_finalize(void) int mca_vprotocol_pessimist_enable(bool enable) { if(enable) { int ret; - if((ret = vprotocol_pessimist_sender_based_init(_mmap_file_name, + if((ret = vprotocol_pessimist_sender_based_init(_mmap_file_name, _sender_based_size)) != OMPI_SUCCESS) return ret; } diff --git a/ompi/mca/vprotocol/pessimist/vprotocol_pessimist_event.h b/ompi/mca/vprotocol/pessimist/vprotocol_pessimist_event.h index 830838e9637..748acf89e92 100644 --- a/ompi/mca/vprotocol/pessimist/vprotocol_pessimist_event.h +++ b/ompi/mca/vprotocol/pessimist/vprotocol_pessimist_event.h @@ -20,7 +20,7 @@ BEGIN_C_DECLS -/* Make sure -Wformat is happy... */ +/* Make sure -Wformat is happy... */ typedef uint64_t vprotocol_pessimist_clock_t; #define PRIpclock PRIx64 diff --git a/ompi/mca/vprotocol/pessimist/vprotocol_pessimist_eventlog.c b/ompi/mca/vprotocol/pessimist/vprotocol_pessimist_eventlog.c index 7ed2b5801b1..ce3c6b172b6 100644 --- a/ompi/mca/vprotocol/pessimist/vprotocol_pessimist_eventlog.c +++ b/ompi/mca/vprotocol/pessimist/vprotocol_pessimist_eventlog.c @@ -2,7 +2,8 @@ * Copyright (c) 2004-2011 The Trustees of the University of Tennessee. * All rights reserved. * Copyright (c) 2012 Los Alamos National Security, LLC. All rights - * reserved. + * reserved. + * Copyright (c) 2015 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -12,81 +13,61 @@ #include "ompi_config.h" #include "vprotocol_pessimist_eventlog.h" - -#include "ompi/mca/dpm/dpm.h" -#include "ompi/mca/pubsub/pubsub.h" +#include "opal/mca/pmix/pmix.h" +#include "ompi/dpm/dpm.h" int vprotocol_pessimist_event_logger_connect(int el_rank, ompi_communicator_t **el_comm) { int rc; - opal_buffer_t *buffer; char *port; - ompi_process_name_t el_proc; - char *hnp_uri, *rml_uri; - ompi_rml_tag_t el_tag; - char name[MPI_MAX_PORT_NAME]; int rank; vprotocol_pessimist_clock_t connect_info[2]; - - snprintf(name, MPI_MAX_PORT_NAME, VPROTOCOL_EVENT_LOGGER_NAME_FMT, el_rank); - port = ompi_pubsub.lookup(name, MPI_INFO_NULL); - if(NULL == port) - { + opal_list_t results; + opal_pmix_pdata_t *pdat; + + OBJ_CONSTRUCT(&results, opal_list_t); + pdat = OBJ_NEW(opal_pmix_pdata_t); + asprintf(&pdat->value.key, VPROTOCOL_EVENT_LOGGER_NAME_FMT, el_rank); + opal_list_append(&results, &pdat->super); + + rc = opal_pmix.lookup(&results, NULL); + if (OPAL_SUCCESS != rc || + OPAL_STRING != pdat->value.type || + NULL == pdat->value.data.string) { + OPAL_LIST_DESTRUCT(&results); return OMPI_ERR_NOT_FOUND; } + port = strdup(pdat->value.data.string); + OPAL_LIST_DESTRUCT(&results); V_OUTPUT_VERBOSE(45, "Found port < %s >", port); - - /* separate the string into the HNP and RML URI and tag */ - if (OMPI_SUCCESS != (rc = ompi_dpm.parse_port(port, &hnp_uri, &rml_uri, &el_tag))) { - OMPI_ERROR_LOG(rc); - return rc; - } - /* extract the originating proc's name */ - if (OMPI_SUCCESS != (rc = ompi_rte_parse_uris(rml_uri, &el_proc, NULL))) { - OMPI_ERROR_LOG(rc); - free(rml_uri); free(hnp_uri); - return rc; - } - /* make sure we can route rml messages to the destination */ - if (OMPI_SUCCESS != (rc = ompi_dpm.route_to_port(hnp_uri, &el_proc))) { - OMPI_ERROR_LOG(rc); - free(rml_uri); free(hnp_uri); - return rc; - } - free(rml_uri); free(hnp_uri); - - /* Send an rml message to tell the remote end to wake up and jump into - * connect/accept */ - buffer = OBJ_NEW(opal_buffer_t); - ompi_rte_send_buffer_nb(&el_proc, buffer, el_tag+1, NULL, NULL); - rc = ompi_dpm.connect_accept(MPI_COMM_SELF, 0, port, true, el_comm); + rc = ompi_dpm_connect_accept(MPI_COMM_SELF, 0, port, true, el_comm); if(OMPI_SUCCESS != rc) { OMPI_ERROR_LOG(rc); } - + /* Send Rank, receive max buffer size and max_clock back */ - MPI_Comm_rank(MPI_COMM_WORLD, &rank); - rc = mca_pml_v.host_pml.pml_send(&rank, 1, MPI_INTEGER, 0, + rank = ompi_comm_rank(&ompi_mpi_comm_world.comm); + rc = mca_pml_v.host_pml.pml_send(&rank, 1, MPI_INTEGER, 0, VPROTOCOL_PESSIMIST_EVENTLOG_NEW_CLIENT_CMD, - MCA_PML_BASE_SEND_STANDARD, + MCA_PML_BASE_SEND_STANDARD, mca_vprotocol_pessimist.el_comm); if(OPAL_UNLIKELY(MPI_SUCCESS != rc)) OMPI_ERRHANDLER_INVOKE(mca_vprotocol_pessimist.el_comm, rc, __FILE__ ": failed sending event logger handshake"); - rc = mca_pml_v.host_pml.pml_recv(&connect_info, 2, MPI_UNSIGNED_LONG_LONG, + rc = mca_pml_v.host_pml.pml_recv(&connect_info, 2, MPI_UNSIGNED_LONG_LONG, 0, VPROTOCOL_PESSIMIST_EVENTLOG_NEW_CLIENT_CMD, mca_vprotocol_pessimist.el_comm, MPI_STATUS_IGNORE); if(OPAL_UNLIKELY(MPI_SUCCESS != rc)) \ OMPI_ERRHANDLER_INVOKE(mca_vprotocol_pessimist.el_comm, rc, \ - __FILE__ ": failed receiving event logger handshake"); - + __FILE__ ": failed receiving event logger handshake"); + return rc; } int vprotocol_pessimist_event_logger_disconnect(ompi_communicator_t *el_comm) { - ompi_dpm.disconnect(el_comm); + ompi_dpm_disconnect(el_comm); return OMPI_SUCCESS; } @@ -102,21 +83,21 @@ void vprotocol_pessimist_matching_replay(int *src) { event = (mca_vprotocol_pessimist_event_t *) opal_list_get_next(event)) { vprotocol_pessimist_matching_event_t *mevent; - - if(VPROTOCOL_PESSIMIST_EVENT_TYPE_MATCHING != event->type) continue; + + if(VPROTOCOL_PESSIMIST_EVENT_TYPE_MATCHING != event->type) continue; mevent = &(event->u_event.e_matching); if(mevent->reqid == mca_vprotocol_pessimist.clock) { /* this is the event to replay */ V_OUTPUT_VERBOSE(70, "pessimist: replay\tmatch\t%"PRIpclock"\trecv is forced from %d", mevent->reqid, mevent->src); (*src) = mevent->src; - opal_list_remove_item(&mca_vprotocol_pessimist.replay_events, + opal_list_remove_item(&mca_vprotocol_pessimist.replay_events, (opal_list_item_t *) event); VPESSIMIST_EVENT_RETURN(event); - } + } #if OPAL_ENABLE_DEBUG - else if(mevent->reqid > max) - max = mevent->reqid; + else if(mevent->reqid > max) + max = mevent->reqid; } /* not forcing a ANY SOURCE event whose recieve clock is lower than max * is a bug indicating we have missed an event during logging ! */ @@ -127,7 +108,7 @@ void vprotocol_pessimist_matching_replay(int *src) { } void vprotocol_pessimist_delivery_replay(size_t n, ompi_request_t **reqs, - int *outcount, int *index, + int *outcount, int *index, ompi_status_public_t *status) { mca_vprotocol_pessimist_event_t *event; @@ -135,7 +116,7 @@ void vprotocol_pessimist_delivery_replay(size_t n, ompi_request_t **reqs, event != (mca_vprotocol_pessimist_event_t *) opal_list_get_end(&mca_vprotocol_pessimist.replay_events); event = (mca_vprotocol_pessimist_event_t *) opal_list_get_next(event)) { - vprotocol_pessimist_delivery_event_t *devent; + vprotocol_pessimist_delivery_event_t *devent; if(VPROTOCOL_PESSIMIST_EVENT_TYPE_DELIVERY != event->type) continue; devent = &(event->u_event.e_delivery); diff --git a/ompi/mca/vprotocol/pessimist/vprotocol_pessimist_eventlog.h b/ompi/mca/vprotocol/pessimist/vprotocol_pessimist_eventlog.h index eb5c2e1787d..af0631812b3 100644 --- a/ompi/mca/vprotocol/pessimist/vprotocol_pessimist_eventlog.h +++ b/ompi/mca/vprotocol/pessimist/vprotocol_pessimist_eventlog.h @@ -32,7 +32,7 @@ int vprotocol_pessimist_event_logger_disconnect(ompi_communicator_t *el_comm); */ /** Adds a matching event for this request in the event list for any ANY_SOURCE - * recv. This event have to be updated later by + * recv. This event have to be updated later by * VPROTOCOL_PESSIMIST_MATCHING_LOG_FINALIZE * req (IN/OUT): posted RECV request (mca_pml_base_request_t *) * VPESSIMIST_REQ(req) is updated to keep track of the associated event @@ -111,17 +111,17 @@ static inline void vprotocol_pessimist_matching_log_finish(ompi_request_t *req) } while(0) -/* This function sends any pending event to the Event Logger. All available +/* This function sends any pending event to the Event Logger. All available * events are merged into a single message (if small enough). */ -static inline void vprotocol_pessimist_event_flush(void) +static inline void vprotocol_pessimist_event_flush(void) { if(OPAL_UNLIKELY(!opal_list_is_empty(&mca_vprotocol_pessimist.pending_events))) { mca_vprotocol_pessimist_event_t *event; mca_vprotocol_pessimist_event_t *prv_event; - - for(event = + + for(event = (mca_vprotocol_pessimist_event_t *) opal_list_get_first(&mca_vprotocol_pessimist.pending_events); event != @@ -152,7 +152,7 @@ static inline void vprotocol_pessimist_event_flush(void) mca_vprotocol_pessimist.event_buffer_max_length) __VPROTOCOL_PESSIMIST_SEND_BUFFER(); assert(mca_vprotocol_pessimist.event_buffer_length < mca_vprotocol_pessimist.event_buffer_max_length); - prv_event = (mca_vprotocol_pessimist_event_t *) + prv_event = (mca_vprotocol_pessimist_event_t *) opal_list_remove_item(&mca_vprotocol_pessimist.pending_events, (opal_list_item_t *) event); VPESSIMIST_EVENT_RETURN(event); @@ -163,44 +163,44 @@ static inline void vprotocol_pessimist_event_flush(void) } /** Replay matching order according to event list during recovery - * src (IN/OUT): the requested source. If it is ANY_SOURCE it is changed to - * the matched source at first run. - * comm (IN): the communicator's context id is used to know the next unique + * src (IN/OUT): the requested source. If it is ANY_SOURCE it is changed to + * the matched source at first run. + * comm (IN): the communicator's context id is used to know the next unique * request id that will be allocated by PML */ #define VPROTOCOL_PESSIMIST_MATCHING_REPLAY(src) do { \ if(mca_vprotocol_pessimist.replay && ((src) == MPI_ANY_SOURCE)) \ vprotocol_pessimist_matching_replay(&(src)); \ -} while(0) +} while(0) void vprotocol_pessimist_matching_replay(int *src); /******************************************************************************* - * WAIT/TEST-SOME/ANY & PROBES + * WAIT/TEST-SOME/ANY & PROBES */ /** Store the delivered request after a non deterministic delivery * req (IN): the delivered request (pml_base_request_t *) - */ + */ static inline void vprotocol_pessimist_delivery_log(ompi_request_t *req) { mca_vprotocol_pessimist_event_t *event; vprotocol_pessimist_delivery_event_t *devent; - + if(req == NULL) - { + { /* No request delivered to this probe, we need to count howmany times */ V_OUTPUT_VERBOSE(70, "pessimist:\tlog\tdeliver\t%"PRIpclock"\tnone", mca_vprotocol_pessimist.clock); event = (mca_vprotocol_pessimist_event_t*) opal_list_get_last(&mca_vprotocol_pessimist.pending_events); if(event->type == VPROTOCOL_PESSIMIST_EVENT_TYPE_DELIVERY && event->u_event.e_delivery.reqid == 0) - { + { /* consecutive probes not delivering anything are merged */ event->u_event.e_delivery.probeid = mca_vprotocol_pessimist.clock++; } else - { - /* Previous event is not a failed probe, lets create a new + { + /* Previous event is not a failed probe, lets create a new "failed probe" event (reqid=0) then */ VPESSIMIST_DELIVERY_EVENT_NEW(event); devent = &(event->u_event.e_delivery); @@ -211,7 +211,7 @@ static inline void vprotocol_pessimist_delivery_log(ompi_request_t *req) } } else - { + { /* A request have been delivered, log which one it is */ V_OUTPUT_VERBOSE(70, "pessimist:\tlog\tdeliver\t%"PRIpclock"\treq %"PRIpclock, mca_vprotocol_pessimist.clock, VPESSIMIST_FTREQ(req)->reqid); VPESSIMIST_DELIVERY_EVENT_NEW(event); @@ -228,7 +228,7 @@ static inline void vprotocol_pessimist_delivery_log(ompi_request_t *req) * n (IN): the number of input requests * reqs (IN): the set of considered requests (pml_base_request_t *) * outcount (OUT): number of delivered requests - * i (OUT): index(es) of the delivered request + * i (OUT): index(es) of the delivered request * status (OUT): status of the delivered request */ #define VPROTOCOL_PESSIMIST_DELIVERY_REPLAY(n, reqs, outcount, i, status) do {\ diff --git a/ompi/mca/vprotocol/pessimist/vprotocol_pessimist_eventlog_protocol.h b/ompi/mca/vprotocol/pessimist/vprotocol_pessimist_eventlog_protocol.h index 96999901129..34ff5efd372 100644 --- a/ompi/mca/vprotocol/pessimist/vprotocol_pessimist_eventlog_protocol.h +++ b/ompi/mca/vprotocol/pessimist/vprotocol_pessimist_eventlog_protocol.h @@ -22,7 +22,7 @@ typedef enum { VPROTOCOL_PESSIMIST_EVENTLOG_CLOSE_SERVER_CMD, VPROTOCOL_PESSIMIST_EVENTLOG_SAVE_SERVER_CMD, VPROTOCOL_PESSIMIST_EVENTLOG_LOAD_SERVER_CMD, - + VPROTOCOL_PESSIMIST_EVENTLOG_NEW_CLIENT_CMD, VPROTOCOL_PESSIMIST_EVENTLOG_QUIT_CLIENT_CMD, diff --git a/ompi/mca/vprotocol/pessimist/vprotocol_pessimist_probe.c b/ompi/mca/vprotocol/pessimist/vprotocol_pessimist_probe.c index 40c19b6b359..f9bd38b6d8f 100644 --- a/ompi/mca/vprotocol/pessimist/vprotocol_pessimist_probe.c +++ b/ompi/mca/vprotocol/pessimist/vprotocol_pessimist_probe.c @@ -23,16 +23,16 @@ static inline int replay_probe(int src, int tag, static inline void log_probe(int ret, int src, int tag, struct ompi_communicator_t *comm, ompi_status_public_t * status); - + /******************************************************************************* * MPI level functions */ int mca_vprotocol_pessimist_iprobe( int src, int tag, struct ompi_communicator_t *comm, int *matched, ompi_status_public_t * status ) -{ +{ int ret; - + if(mca_vprotocol_pessimist.replay) { ret = replay_iprobe(src, tag, comm, matched, status); @@ -50,7 +50,7 @@ int mca_vprotocol_pessimist_probe( int src, int tag, ompi_status_public_t * status ) { int ret; - + if(mca_vprotocol_pessimist.replay) { ret = replay_probe(src, tag, comm, status); @@ -72,7 +72,7 @@ static inline int replay_iprobe(int src, int tag, { return OMPI_ERROR; } - + static inline void log_iprobe(int ret, int src, int tag, struct ompi_communicator_t *comm, int *matched, ompi_status_public_t * status) diff --git a/ompi/mca/vprotocol/pessimist/vprotocol_pessimist_proc.c b/ompi/mca/vprotocol/pessimist/vprotocol_pessimist_proc.c index e8344133cb9..1b7b9d90499 100644 --- a/ompi/mca/vprotocol/pessimist/vprotocol_pessimist_proc.c +++ b/ompi/mca/vprotocol/pessimist/vprotocol_pessimist_proc.c @@ -13,7 +13,7 @@ int mca_vprotocol_pessimist_add_procs(struct ompi_proc_t **procs, size_t nprocs) { - /* TODO: for each proc, retrieve post send of sender based request, post recieve of list + /* TODO: for each proc, retrieve post send of sender based request, post recieve of list block any other communications until we are up. To be determined how i manage to send (or resend) data to failed nodes */ return mca_pml_v.host_pml.pml_add_procs(procs, nprocs); diff --git a/ompi/mca/vprotocol/pessimist/vprotocol_pessimist_progress.c b/ompi/mca/vprotocol/pessimist/vprotocol_pessimist_progress.c index 256edd20126..3b993ce61dc 100644 --- a/ompi/mca/vprotocol/pessimist/vprotocol_pessimist_progress.c +++ b/ompi/mca/vprotocol/pessimist/vprotocol_pessimist_progress.c @@ -17,7 +17,7 @@ int mca_vprotocol_pessimist_progress(void) { int ret; - + printf("PROGRESS\n"); /* First let the real progress take place */ ret = mca_pml_v.host_pml.pml_progress(); diff --git a/ompi/mca/vprotocol/pessimist/vprotocol_pessimist_request.c b/ompi/mca/vprotocol/pessimist/vprotocol_pessimist_request.c index 5c4b90104fc..7408157e63d 100644 --- a/ompi/mca/vprotocol/pessimist/vprotocol_pessimist_request.c +++ b/ompi/mca/vprotocol/pessimist/vprotocol_pessimist_request.c @@ -15,16 +15,16 @@ static void vprotocol_pessimist_request_construct(mca_pml_base_request_t *req); -OBJ_CLASS_INSTANCE(mca_vprotocol_pessimist_recv_request_t, mca_pml_base_request_t, +OBJ_CLASS_INSTANCE(mca_vprotocol_pessimist_recv_request_t, mca_pml_base_request_t, vprotocol_pessimist_request_construct, NULL); -OBJ_CLASS_INSTANCE(mca_vprotocol_pessimist_send_request_t, mca_pml_base_request_t, +OBJ_CLASS_INSTANCE(mca_vprotocol_pessimist_send_request_t, mca_pml_base_request_t, vprotocol_pessimist_request_construct, NULL); static void vprotocol_pessimist_request_construct(mca_pml_base_request_t *req) { mca_vprotocol_pessimist_request_t *ftreq; - + ftreq = VPESSIMIST_FTREQ(req); V_OUTPUT_VERBOSE(250, "pessimist:\treq\tnew\treq=%p\tPreq=%p (aligned to %p)", (void *) req, (void *) ftreq, (void *) &ftreq->pml_req_free); req->req_ompi.req_status.MPI_SOURCE = -1; /* no matching made flag */ @@ -38,7 +38,7 @@ static void vprotocol_pessimist_request_construct(mca_pml_base_request_t *req) int mca_vprotocol_pessimist_request_free(ompi_request_t **req) { - mca_pml_base_request_t *pmlreq = (mca_pml_base_request_t *) *req; + mca_pml_base_request_t *pmlreq = (mca_pml_base_request_t *) *req; V_OUTPUT_VERBOSE(50, "pessimist:\treq\tfree\t%"PRIpclock"\tpeer %d\ttag %d\tsize %lu", VPESSIMIST_FTREQ(pmlreq)->reqid, pmlreq->req_peer, pmlreq->req_tag, (unsigned long) pmlreq->req_count); vprotocol_pessimist_matching_log_finish(*req); pmlreq->req_ompi.req_status.MPI_SOURCE = -1; /* no matching made flag */ diff --git a/ompi/mca/vprotocol/pessimist/vprotocol_pessimist_request.h b/ompi/mca/vprotocol/pessimist/vprotocol_pessimist_request.h index 27f6781bdc9..23f5e7ab903 100644 --- a/ompi/mca/vprotocol/pessimist/vprotocol_pessimist_request.h +++ b/ompi/mca/vprotocol/pessimist/vprotocol_pessimist_request.h @@ -38,10 +38,10 @@ OBJ_CLASS_DECLARATION(mca_vprotocol_pessimist_send_request_t); #define VPESSIMIST_RECV_FTREQ(req) \ ((mca_vprotocol_pessimist_recv_request_t *) VPROTOCOL_RECV_FTREQ(req)) - + #define VPESSIMIST_SEND_FTREQ(req) \ ((mca_vprotocol_pessimist_send_request_t *) VPROTOCOL_SEND_FTREQ(req)) - + #define VPESSIMIST_FTREQ_INIT(req) do { \ VPESSIMIST_FTREQ(req)->reqid = mca_vprotocol_pessimist.clock++; \ } while(0) diff --git a/ompi/mca/vprotocol/pessimist/vprotocol_pessimist_send.c b/ompi/mca/vprotocol/pessimist/vprotocol_pessimist_send.c index ddcc1503ca7..7fcc7bc59ea 100644 --- a/ompi/mca/vprotocol/pessimist/vprotocol_pessimist_send.c +++ b/ompi/mca/vprotocol/pessimist/vprotocol_pessimist_send.c @@ -1,6 +1,8 @@ /* * Copyright (c) 2004-2007 The Trustees of the University of Tennessee. * All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -12,7 +14,7 @@ #include "vprotocol_pessimist.h" #include "vprotocol_pessimist_sender_based.h" -int mca_vprotocol_pessimist_isend(void *buf, +int mca_vprotocol_pessimist_isend(const void *buf, size_t count, ompi_datatype_t* datatype, int dst, @@ -22,19 +24,19 @@ int mca_vprotocol_pessimist_isend(void *buf, ompi_request_t** request ) { int ret; - + V_OUTPUT_VERBOSE(50, "pessimist:\tisend\tposted\t%"PRIpclock"\tto %d\ttag %d\tsize %lu", mca_vprotocol_pessimist.clock, dst, tag, (unsigned long) count); vprotocol_pessimist_event_flush(); - ret = mca_pml_v.host_pml.pml_isend(buf, count, datatype, dst, tag, sendmode, + ret = mca_pml_v.host_pml.pml_isend(buf, count, datatype, dst, tag, sendmode, comm, request); VPESSIMIST_FTREQ_INIT(*request); vprotocol_pessimist_sender_based_copy_start(*request); return ret; } -int mca_vprotocol_pessimist_send(void *buf, +int mca_vprotocol_pessimist_send(const void *buf, size_t count, ompi_datatype_t* datatype, int dst, @@ -45,11 +47,11 @@ int mca_vprotocol_pessimist_send(void *buf, ompi_request_t *request = MPI_REQUEST_NULL; int rc; - V_OUTPUT_VERBOSE(50, "pessimist:\tsend\tposted\t%"PRIpclock"\tto %d\ttag %d\tsize %lu", + V_OUTPUT_VERBOSE(50, "pessimist:\tsend\tposted\t%"PRIpclock"\tto %d\ttag %d\tsize %lu", mca_vprotocol_pessimist.clock, dst, tag, (unsigned long) count); vprotocol_pessimist_event_flush(); - mca_pml_v.host_pml.pml_isend(buf, count, datatype, dst, tag, sendmode, + mca_pml_v.host_pml.pml_isend(buf, count, datatype, dst, tag, sendmode, comm, &request); VPESSIMIST_FTREQ_INIT(request); vprotocol_pessimist_sender_based_copy_start(request); diff --git a/ompi/mca/vprotocol/pessimist/vprotocol_pessimist_sender_based.c b/ompi/mca/vprotocol/pessimist/vprotocol_pessimist_sender_based.c index 747e70927fb..f9d5772c8f0 100644 --- a/ompi/mca/vprotocol/pessimist/vprotocol_pessimist_sender_based.c +++ b/ompi/mca/vprotocol/pessimist/vprotocol_pessimist_sender_based.c @@ -29,7 +29,7 @@ static int sb_mmap_file_open(const char *path) sb.sb_fd = open(path, O_CREAT | O_TRUNC | O_RDWR, 0600); if(-1 == sb.sb_fd) { - V_OUTPUT_ERR("pml_v: vprotocol_pessimist: sender_based_init: open (%s): %s", + V_OUTPUT_ERR("pml_v: vprotocol_pessimist: sender_based_init: open (%s): %s", path, strerror(errno)); return OPAL_ERR_FILE_OPEN_FAILURE; } @@ -40,10 +40,10 @@ static void sb_mmap_file_close(void) { int ret = close(sb.sb_fd); if(-1 == ret) - V_OUTPUT_ERR("pml_v: protocol_pessimist: sender_based_finalize: close (%d): %s", + V_OUTPUT_ERR("pml_v: protocol_pessimist: sender_based_finalize: close (%d): %s", sb.sb_fd, strerror(errno)); } - + static void sb_mmap_alloc(void) { #ifndef MAP_NOCACHE @@ -51,18 +51,18 @@ static void sb_mmap_alloc(void) #endif if(-1 == ftruncate(sb.sb_fd, sb.sb_offset + sb.sb_length)) { - V_OUTPUT_ERR("pml_v: vprotocol_pessimist: sender_based_alloc: ftruncate: %s", + V_OUTPUT_ERR("pml_v: vprotocol_pessimist: sender_based_alloc: ftruncate: %s", strerror(errno)); close(sb.sb_fd); ompi_mpi_abort(MPI_COMM_NULL, MPI_ERR_NO_SPACE); } - sb.sb_addr = (uintptr_t) mmap((void *) sb.sb_addr, sb.sb_length, - PROT_WRITE | PROT_READ, - MAP_PRIVATE | MAP_NOCACHE, sb.sb_fd, + sb.sb_addr = (uintptr_t) mmap((void *) sb.sb_addr, sb.sb_length, + PROT_WRITE | PROT_READ, + MAP_PRIVATE | MAP_NOCACHE, sb.sb_fd, sb.sb_offset); if(((uintptr_t) -1) == sb.sb_addr) { - V_OUTPUT_ERR("pml_v: vprotocol_pessimist: sender_based_alloc: mmap: %s", + V_OUTPUT_ERR("pml_v: vprotocol_pessimist: sender_based_alloc: mmap: %s", strerror(errno)); close(sb.sb_fd); ompi_mpi_abort(MPI_COMM_NULL, MPI_ERR_NO_SPACE); @@ -73,17 +73,17 @@ static void sb_mmap_free(void) { int ret = munmap((void *) sb.sb_addr, sb.sb_length); if(-1 == ret) - V_OUTPUT_ERR("pml_v: protocol_pessimsit: sender_based_finalize: munmap (%p): %s", + V_OUTPUT_ERR("pml_v: protocol_pessimsit: sender_based_finalize: munmap (%p): %s", (void *) sb.sb_addr, strerror(errno)); } -int vprotocol_pessimist_sender_based_init(const char *mmapfile, size_t size) +int vprotocol_pessimist_sender_based_init(const char *mmapfile, size_t size) { char *path; #ifdef SB_USE_CONVERTOR_METHOD mca_pml_base_send_request_t pml_req; - sb.sb_conv_to_pessimist_offset = (uintptr_t) VPROTOCOL_SEND_REQ(NULL) - - ((uintptr_t) &pml_req.req_base.req_convertor - + sb.sb_conv_to_pessimist_offset = (uintptr_t) VPROTOCOL_SEND_REQ(NULL) - + ((uintptr_t) &pml_req.req_base.req_convertor - (uintptr_t) &pml_req); V_OUTPUT_VERBOSE(500, "pessimist: conv_to_pessimist_offset: %p", (void *) sb.sb_conv_to_pessimist_offset); #endif @@ -95,11 +95,11 @@ int vprotocol_pessimist_sender_based_init(const char *mmapfile, size_t size) #ifdef SB_USE_PROGRESS_METHOD OBJ_CONSTRUCT(&sb.sb_sendreq, opal_list_t); #endif - - asprintf(&path, "%s"OPAL_PATH_SEP"%s", ompi_process_info.proc_session_dir, + + asprintf(&path, "%s"OPAL_PATH_SEP"%s", ompi_process_info.proc_session_dir, mmapfile); if(OPAL_SUCCESS != sb_mmap_file_open(path)) - return OPAL_ERR_FILE_OPEN_FAILURE; + return OPAL_ERR_FILE_OPEN_FAILURE; free(path); return OMPI_SUCCESS; } @@ -112,7 +112,7 @@ void vprotocol_pessimist_sender_based_finalize(void) } -/** Manage mmap floating window, allocating enough memory for the message to be +/** Manage mmap floating window, allocating enough memory for the message to be * asynchronously copied to disk. */ void vprotocol_pessimist_sender_based_alloc(size_t len) @@ -123,11 +123,11 @@ void vprotocol_pessimist_sender_based_alloc(size_t len) else ompi_comm_dup(MPI_COMM_SELF, &sb.sb_comm, 1); #endif - + /* Take care of alignement of sb_offset */ sb.sb_offset += sb.sb_cursor - sb.sb_addr; sb.sb_cursor = sb.sb_offset % sb.sb_pagesize; - sb.sb_offset -= sb.sb_cursor; + sb.sb_offset -= sb.sb_cursor; /* Adjusting sb_length for the largest application message to fit */ len += sb.sb_cursor + sizeof(vprotocol_pessimist_sender_based_header_t); @@ -137,10 +137,10 @@ void vprotocol_pessimist_sender_based_alloc(size_t len) sb.sb_available = sb.sb_length - sb.sb_cursor; sb_mmap_alloc(); - + sb.sb_cursor += sb.sb_addr; /* set absolute addr of sender_based buffer */ V_OUTPUT_VERBOSE(30, "pessimist:\tsb\tgrow\toffset %llu\tlength %llu\tbase %p\tcursor %p", (unsigned long long) sb.sb_offset, (unsigned long long) sb.sb_length, (void *) sb.sb_addr, (void *) sb.sb_cursor); -} +} #undef sb @@ -153,7 +153,7 @@ int32_t vprotocol_pessimist_sender_based_convertor_advance(opal_convertor_t* pCo unsigned int i; size_t pending_length; mca_vprotocol_pessimist_send_request_t *ftreq; - + ftreq = VPESSIMIST_CONV_REQ(pConvertor); pConvertor->flags = ftreq->sb.conv_flags; pConvertor->fAdvance = ftreq->sb.conv_advance; diff --git a/ompi/mca/vprotocol/pessimist/vprotocol_pessimist_sender_based.h b/ompi/mca/vprotocol/pessimist/vprotocol_pessimist_sender_based.h index c1a21c9ca7d..b639b47b394 100644 --- a/ompi/mca/vprotocol/pessimist/vprotocol_pessimist_sender_based.h +++ b/ompi/mca/vprotocol/pessimist/vprotocol_pessimist_sender_based.h @@ -20,7 +20,7 @@ BEGIN_C_DECLS -/** Prepare for using the sender based storage +/** Prepare for using the sender based storage */ int vprotocol_pessimist_sender_based_init(const char *mmapfile, size_t size); @@ -28,7 +28,7 @@ int vprotocol_pessimist_sender_based_init(const char *mmapfile, size_t size); */ void vprotocol_pessimist_sender_based_finalize(void); -/** Manage mmap floating window, allocating enough memory for the message to be +/** Manage mmap floating window, allocating enough memory for the message to be * asynchronously copied to disk. */ void vprotocol_pessimist_sender_based_alloc(size_t len); @@ -92,7 +92,7 @@ int32_t vprotocol_pessimist_sender_based_convertor_advance(opal_convertor_t*, * progress method */ #elif defined(SB_USE_PROGRESS_METHOD) -static inline void __SENDER_BASED_METHOD_COPY(mca_pml_base_send_request_t *req) +static inline void __SENDER_BASED_METHOD_COPY(mca_pml_base_send_request_t *req) { if(req->req_bytes_packed) { @@ -103,12 +103,12 @@ static inline void __SENDER_BASED_METHOD_COPY(mca_pml_base_send_request_t *req) } } -static inline int vprotocol_pessimist_sb_progress_req(mca_pml_base_send_request_t *req) +static inline int vprotocol_pessimist_sb_progress_req(mca_pml_base_send_request_t *req) { mca_vprotocol_pessimist_request_t *ftreq = VPESSIMIST_SEND_FTREQ(req); size_t max_data = 0; - - if(ftreq->sb.bytes_progressed < req->req_bytes_packed) + + if(ftreq->sb.bytes_progressed < req->req_bytes_packed) { opal_convertor_t conv; unsigned int iov_count = 1; @@ -117,11 +117,11 @@ static inline int vprotocol_pessimist_sb_progress_req(mca_pml_base_send_request_ max_data = req->req_bytes_packed - ftreq->sb.bytes_progressed; iov.iov_len = max_data; iov.iov_base = (IOVBASE_TYPE *) (ftreq->sb.cursor + position); - + V_OUTPUT_VERBOSE(80, "pessimist:\tsb\tprgress\t%"PRIpclock"\tsize %lu from position %lu", ftreq->reqid, max_data, position); opal_convertor_clone_with_position(&req->req_base.req_convertor, &conv, 0, &position ); - opal_convertor_pack(&conv, &iov, &iov_count, &max_data); + opal_convertor_pack(&conv, &iov, &iov_count, &max_data); ftreq->sb.bytes_progressed += max_data; } return max_data; @@ -130,15 +130,15 @@ static inline int vprotocol_pessimist_sb_progress_req(mca_pml_base_send_request_ static inline int vprotocol_pessimist_sb_progress_all_reqs(void) { int ret = 0; - + /* progress any waiting Sender Based copy */ if(!opal_list_is_empty(&mca_vprotocol_pessimist.sender_based.sb_sendreq)) { - mca_vprotocol_pessimist_request_t *ftreq = (mca_vprotocol_pessimist_request_t *) + mca_vprotocol_pessimist_request_t *ftreq = (mca_vprotocol_pessimist_request_t *) opal_list_remove_first(&mca_vprotocol_pessimist.sender_based.sb_sendreq); if(vprotocol_pessimist_sb_progress_req(VPROTOCOL_SEND_REQ(ftreq))) ret = 1; - opal_list_append(&mca_vprotocol_pessimist.sender_based.sb_sendreq, + opal_list_append(&mca_vprotocol_pessimist.sender_based.sb_sendreq, &ftreq->list_item); } return ret; @@ -163,7 +163,7 @@ static inline void __SENDER_BASED_METHOD_FLUSH(ompi_request_t *req) #endif /* SB_USE_*_METHOD */ -/** Copy data associated to a pml_base_send_request_t to the sender based +/** Copy data associated to a pml_base_send_request_t to the sender based * message payload buffer */ static inline void vprotocol_pessimist_sender_based_copy_start(ompi_request_t *req) @@ -171,24 +171,24 @@ static inline void vprotocol_pessimist_sender_based_copy_start(ompi_request_t *r vprotocol_pessimist_sender_based_header_t *sbhdr; mca_vprotocol_pessimist_request_t *ftreq = VPESSIMIST_SEND_FTREQ(req); mca_pml_base_send_request_t *pmlreq = (mca_pml_base_send_request_t *) req; - + /* Allocate enough sender-based space to hold the message */ - if(mca_vprotocol_pessimist.sender_based.sb_available < - pmlreq->req_bytes_packed + + if(mca_vprotocol_pessimist.sender_based.sb_available < + pmlreq->req_bytes_packed + sizeof(vprotocol_pessimist_sender_based_header_t)) { vprotocol_pessimist_sender_based_alloc(pmlreq->req_bytes_packed); } - + /* Copy message header to the sender-based space */ /* /!\ This is NOT thread safe */ ftreq->sb.cursor = mca_vprotocol_pessimist.sender_based.sb_cursor; #if 1 mca_vprotocol_pessimist.sender_based.sb_cursor += - sizeof(vprotocol_pessimist_sender_based_header_t) + + sizeof(vprotocol_pessimist_sender_based_header_t) + pmlreq->req_bytes_packed; - mca_vprotocol_pessimist.sender_based.sb_available -= - sizeof(vprotocol_pessimist_sender_based_header_t) + + mca_vprotocol_pessimist.sender_based.sb_available -= + sizeof(vprotocol_pessimist_sender_based_header_t) + pmlreq->req_bytes_packed; #endif sbhdr = (vprotocol_pessimist_sender_based_header_t *) ftreq->sb.cursor; @@ -199,13 +199,13 @@ static inline void vprotocol_pessimist_sender_based_copy_start(ompi_request_t *r sbhdr->sequence = pmlreq->req_base.req_sequence; ftreq->sb.cursor += sizeof(vprotocol_pessimist_sender_based_header_t); V_OUTPUT_VERBOSE(70, "pessimist:\tsb\tsend\t%"PRIpclock"\tsize %lu (+%lu header)", VPESSIMIST_FTREQ(req)->reqid, (long unsigned)pmlreq->req_bytes_packed, (long unsigned)sizeof(vprotocol_pessimist_sender_based_header_t)); - + /* Use one of the previous data copy method */ __SENDER_BASED_METHOD_COPY(pmlreq); } /** Ensure sender based is finished before allowing user to touch send buffer - */ + */ #define vprotocol_pessimist_sender_based_flush(REQ) __SENDER_BASED_METHOD_FLUSH(REQ) END_C_DECLS diff --git a/ompi/mca/vprotocol/pessimist/vprotocol_pessimist_sender_based_types.h b/ompi/mca/vprotocol/pessimist/vprotocol_pessimist_sender_based_types.h index 465c3ddd8b5..c00dfff70e3 100644 --- a/ompi/mca/vprotocol/pessimist/vprotocol_pessimist_sender_based_types.h +++ b/ompi/mca/vprotocol/pessimist/vprotocol_pessimist_sender_based_types.h @@ -16,26 +16,26 @@ BEGIN_C_DECLS -/* There is several different ways of packing the data to the sender-based - * buffer. Just pick one. +/* There is several different ways of packing the data to the sender-based + * buffer. Just pick one. */ -#define SB_USE_PACK_METHOD +#define SB_USE_PACK_METHOD #undef SB_USE_PROGRESS_METHOD #undef SB_USE_CONVERTOR_METHOD -typedef struct vprotocol_pessimist_sender_based_t +typedef struct vprotocol_pessimist_sender_based_t { int sb_pagesize; /* size of memory pages on this architecture */ #ifdef SB_USE_CONVERTOR_METHOD uintptr_t sb_conv_to_pessimist_offset; /* end of request from req_conv */ #endif - int sb_fd; /* file descriptor of mapped file */ + int sb_fd; /* file descriptor of mapped file */ off_t sb_offset; /* offset in mmaped file */ uintptr_t sb_addr; /* base address of mmaped segment */ size_t sb_length; /* length of mmaped segment */ uintptr_t sb_cursor; /* current pointer to writeable memory */ size_t sb_available; /* available space before end of segment */ - + #ifdef SB_USE_PROGRESS_METHOD opal_list_t sb_sendreq; /* requests that needs to be progressed */ #endif @@ -44,13 +44,13 @@ typedef struct vprotocol_pessimist_sender_based_t typedef struct vprotocol_pessimist_sender_based_header_t { size_t size; - int dst; + int dst; int tag; uint32_t contextid; vprotocol_pessimist_clock_t sequence; } vprotocol_pessimist_sender_based_header_t; -typedef struct vprotocol_pessimist_sender_based_request_t +typedef struct vprotocol_pessimist_sender_based_request_t { uintptr_t cursor; size_t bytes_progressed; diff --git a/ompi/mca/vprotocol/pessimist/vprotocol_pessimist_start.c b/ompi/mca/vprotocol/pessimist/vprotocol_pessimist_start.c index aede724c2cc..504c2eab3c7 100644 --- a/ompi/mca/vprotocol/pessimist/vprotocol_pessimist_start.c +++ b/ompi/mca/vprotocol/pessimist/vprotocol_pessimist_start.c @@ -12,7 +12,7 @@ #include "vprotocol_pessimist.h" OMPI_DECLSPEC int mca_vprotocol_pessimist_start(size_t count, ompi_request_t **requests) -{ +{ int ret; size_t i; @@ -20,7 +20,7 @@ OMPI_DECLSPEC int mca_vprotocol_pessimist_start(size_t count, ompi_request_t **r { mca_pml_base_request_t *pml_request = (mca_pml_base_request_t *) requests[i]; if(NULL == pml_request) continue; - + switch(pml_request->req_type) { case MCA_PML_REQUEST_RECV : @@ -28,20 +28,20 @@ OMPI_DECLSPEC int mca_vprotocol_pessimist_start(size_t count, ompi_request_t **r /* It's a persistent recv request, first, see if we have to enforce matching order */ VPROTOCOL_PESSIMIST_MATCHING_REPLAY(pml_request->req_peer); break; - + case MCA_PML_REQUEST_SEND : V_OUTPUT_VERBOSE(50, "pessimist:\tstart\tsend\t%"PRIpclock"\tto %d\ttag %d\tsize %lu", mca_vprotocol_pessimist.clock, pml_request->req_peer, pml_request->req_tag, (long) pml_request->req_count); - /* It's a persistent send request, first, check if we are waiting ack - * for some older events */ + /* It's a persistent send request, first, check if we are waiting ack + * for some older events */ break; - + default: V_OUTPUT_VERBOSE(50, "pessimist:\tstart\twrong %d\t%"PRIpclock"\tfrom %d\ttag %d\tsize %lu", pml_request->req_type, mca_vprotocol_pessimist.clock, pml_request->req_peer, pml_request->req_tag, (long) pml_request->req_count); return OMPI_ERR_REQUEST; } } ret = mca_pml_v.host_pml.pml_start(count, requests); - + /* restore requests status */ return ret; } diff --git a/ompi/mca/vprotocol/pessimist/vprotocol_pessimist_wait.c b/ompi/mca/vprotocol/pessimist/vprotocol_pessimist_wait.c index f446af4699d..7dce3f92375 100644 --- a/ompi/mca/vprotocol/pessimist/vprotocol_pessimist_wait.c +++ b/ompi/mca/vprotocol/pessimist/vprotocol_pessimist_wait.c @@ -29,13 +29,13 @@ static int vprotocol_pessimist_request_no_free(ompi_request_t **req) { int mca_vprotocol_pessimist_test(ompi_request_t ** rptr, int *completed, - ompi_status_public_t * status) + ompi_status_public_t * status) { int ret; int index; - + VPROTOCOL_PESSIMIST_DELIVERY_REPLAY(1, rptr, completed, &index, status); - + ret = mca_pml_v.host_request_fns.req_test(rptr, completed, status); if(completed) vprotocol_pessimist_delivery_log(*rptr); @@ -45,7 +45,7 @@ int mca_vprotocol_pessimist_test(ompi_request_t ** rptr, int *completed, } int mca_vprotocol_pessimist_test_all(size_t count, ompi_request_t ** requests, - int *completed, + int *completed, ompi_status_public_t * statuses) { int ret; @@ -53,8 +53,8 @@ int mca_vprotocol_pessimist_test_all(size_t count, ompi_request_t ** requests, /* /!\ this is not correct until I upgrade DELIVERY_REPLAY to manage several requests at once */ VPROTOCOL_PESSIMIST_DELIVERY_REPLAY(1, requests, completed, &index, statuses); - - ret = mca_pml_v.host_request_fns.req_test_all(count, requests, completed, + + ret = mca_pml_v.host_request_fns.req_test_all(count, requests, completed, statuses); #if 0 /* This is not correct :/ */ @@ -75,11 +75,11 @@ int mca_vprotocol_pessimist_test_any(size_t count, ompi_request_t ** requests, { int ret; size_t i; - + VPROTOCOL_PESSIMIST_DELIVERY_REPLAY(count, requests, completed, index, status); - + PREPARE_REQUESTS_WITH_NO_FREE(count, requests); - + /* Call the real one to do the job */ ret = mca_pml_v.host_request_fns.req_test_any(count, requests, index, completed, status); @@ -118,20 +118,20 @@ int mca_vprotocol_pessimist_wait_any(size_t count, ompi_request_t ** requests, int ret; size_t i; int dummy; - + VPROTOCOL_PESSIMIST_DELIVERY_REPLAY(count, requests, &dummy, index, status); - + PREPARE_REQUESTS_WITH_NO_FREE(count, requests); - + /* Call the real one to do the job */ ret = mca_pml_v.host_request_fns.req_wait_any(count, requests, index, status); - + /* Parse the result */ for(i = 0; i < count; i++) { ompi_request_t *req = requests[i]; if(req == MPI_REQUEST_NULL) continue; - + /* Restore requests and store they've been delivered */ req->req_free = mca_vprotocol_pessimist_request_free; if(i == (size_t) *index) @@ -161,10 +161,10 @@ int mca_vprotocol_pessimist_test_some(size_t count, ompi_request_t ** requests, } int mca_vprotocol_pessimist_wait_some(size_t count, ompi_request_t ** requests, - int *outcount, int *indexes, + int *outcount, int *indexes, ompi_status_public_t * statuses) { - int ret; + int ret; ret = mca_vprotocol_pessimist_wait_any(count, requests, indexes, statuses); if(MPI_UNDEFINED == *indexes) *outcount = 0; else *outcount = 1; diff --git a/ompi/mca/vprotocol/pessimist/vprotocol_pessimist_wait.h b/ompi/mca/vprotocol/pessimist/vprotocol_pessimist_wait.h index 28a00203df6..c6ba2fd89c4 100644 --- a/ompi/mca/vprotocol/pessimist/vprotocol_pessimist_wait.h +++ b/ompi/mca/vprotocol/pessimist/vprotocol_pessimist_wait.h @@ -23,7 +23,7 @@ int mca_vprotocol_pessimist_test(ompi_request_t ** rptr, int *completed, ompi_status_public_t * status); int mca_vprotocol_pessimist_test_all(size_t count, ompi_request_t ** requests, - int *completed, + int *completed, ompi_status_public_t * statuses); int mca_vprotocol_pessimist_test_any(size_t count, ompi_request_t ** requests, @@ -34,11 +34,11 @@ int mca_vprotocol_pessimist_test_some(size_t count, ompi_request_t ** requests, int * outcount, int * indices, ompi_status_public_t * statuses); -int mca_vprotocol_pessimist_wait_any(size_t count, ompi_request_t ** requests, +int mca_vprotocol_pessimist_wait_any(size_t count, ompi_request_t ** requests, int *index, ompi_status_public_t * status); -int mca_vprotocol_pessimist_wait_some(size_t count, ompi_request_t ** requests, - int *outcount, int *indexes, +int mca_vprotocol_pessimist_wait_some(size_t count, ompi_request_t ** requests, + int *outcount, int *indexes, ompi_status_public_t * statuses); END_C_DECLS diff --git a/ompi/mca/vprotocol/vprotocol.h b/ompi/mca/vprotocol/vprotocol.h index f4a33b5ff1e..ff87be2a81f 100644 --- a/ompi/mca/vprotocol/vprotocol.h +++ b/ompi/mca/vprotocol/vprotocol.h @@ -13,14 +13,14 @@ #define __INCLUDE_VPROTOCOL_H_ #include "ompi_config.h" -#include "ompi/mca/mca.h" +#include "ompi/mca/mca.h" #include "ompi/mca/pml/pml.h" #include "ompi/request/request.h" BEGIN_C_DECLS - + /* PML_V->PROTOCOL Called by MCA_PML_V framework to initialize the component. - * + * * @param priority (OUT) Relative priority or ranking used by MCA to * select a component. * @@ -32,8 +32,8 @@ BEGIN_C_DECLS * indicates whether multiple threads may invoke this component * simultaneously or not. */ -typedef struct mca_vprotocol_base_module_2_0_0_t * - (*mca_vprotocol_base_component_init_fn_t)(int *priority, +typedef struct mca_vprotocol_base_module_2_0_0_t * + (*mca_vprotocol_base_component_init_fn_t)(int *priority, bool enable_progress_threads, bool enable_mpi_threads); @@ -56,7 +56,7 @@ typedef mca_vprotocol_base_component_2_0_0_t mca_vprotocol_base_component_t; */ typedef struct mca_vprotocol_base_module_2_0_0_t { - /* PML module stuff */ + /* PML module stuff */ mca_pml_base_module_add_procs_fn_t add_procs; mca_pml_base_module_del_procs_fn_t del_procs; mca_pml_base_module_enable_fn_t enable; @@ -82,7 +82,7 @@ typedef struct mca_vprotocol_base_module_2_0_0_t ompi_request_wait_any_fn_t wait_any; ompi_request_wait_all_fn_t wait_all; ompi_request_wait_some_fn_t wait_some; - + /* Custom requests classes to add extra data at end of pml requests */ opal_class_t * req_recv_class; opal_class_t * req_send_class; diff --git a/ompi/message/Makefile.am b/ompi/message/Makefile.am index 4eaa7b5b53d..8fc7c07e4cd 100644 --- a/ompi/message/Makefile.am +++ b/ompi/message/Makefile.am @@ -6,15 +6,16 @@ # Copyright (c) 2004-2005 The University of Tennessee and The University # of Tennessee Research Foundation. All rights # reserved. -# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, # University of Stuttgart. All rights reserved. # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. # Copyright (c) 2011 Sandia National Laboratories. All rights reserved. +# Copyright (c) 2016 IBM Corporation. All rights reserved. # $COPYRIGHT$ -# +# # Additional copyrights may follow -# +# # $HEADER$ # @@ -23,5 +24,5 @@ headers += \ message/message.h -libmpi_la_SOURCES += \ +lib@OMPI_LIBMPI_NAME@_la_SOURCES += \ message/message.c diff --git a/ompi/message/message.c b/ompi/message/message.c index 86b76a46889..deb0a4697f9 100644 --- a/ompi/message/message.c +++ b/ompi/message/message.c @@ -7,9 +7,9 @@ * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -56,12 +56,12 @@ ompi_message_init(void) ompi_message_null.message.req_ptr = NULL; ompi_message_null.message.count = 0; - ompi_message_null.message.m_f_to_c_index = + ompi_message_null.message.m_f_to_c_index = opal_pointer_array_add(&ompi_message_f_to_c_table, &ompi_message_null); OBJ_CONSTRUCT(&ompi_message_no_proc, ompi_message_t); ompi_message_no_proc.message.m_f_to_c_index = - opal_pointer_array_add(&ompi_message_f_to_c_table, + opal_pointer_array_add(&ompi_message_f_to_c_table, &ompi_message_no_proc); if (1 != ompi_message_no_proc.message.m_f_to_c_index) { return OMPI_ERR_NOT_FOUND; diff --git a/ompi/message/message.h b/ompi/message/message.h index f4de252fab9..60778ebed1a 100644 --- a/ompi/message/message.h +++ b/ompi/message/message.h @@ -5,9 +5,9 @@ * Copyright (c) 2015 Los Alamos National Security, LLC. All rights * reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ diff --git a/ompi/mpi/Makefile.am b/ompi/mpi/Makefile.am index 87e9af8e993..60940b6e7f2 100644 --- a/ompi/mpi/Makefile.am +++ b/ompi/mpi/Makefile.am @@ -5,15 +5,15 @@ # Copyright (c) 2004-2005 The University of Tennessee and The University # of Tennessee Research Foundation. All rights # reserved. -# Copyright (c) 2004-2009 High Performance Computing Center Stuttgart, +# Copyright (c) 2004-2009 High Performance Computing Center Stuttgart, # University of Stuttgart. All rights reserved. # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. # Copyright (c) 2006-2012 Cisco Systems, Inc. All rights reserved. # $COPYRIGHT$ -# +# # Additional copyrights may follow -# +# # $HEADER$ # diff --git a/ompi/mpi/c/Makefile.am b/ompi/mpi/c/Makefile.am index a62c2c72d60..cbca901d614 100644 --- a/ompi/mpi/c/Makefile.am +++ b/ompi/mpi/c/Makefile.am @@ -5,7 +5,7 @@ # Copyright (c) 2004-2013 The University of Tennessee and The University # of Tennessee Research Foundation. All rights # reserved. -# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, # University of Stuttgart. All rights reserved. # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. @@ -15,24 +15,22 @@ # Copyright (c) 2012-2013 Inria. All rights reserved. # Copyright (c) 2013 Los Alamos National Security, LLC. All rights # reserved. +# Copyright (c) 2015 Research Organization for Information Science +# and Technology (RIST). All rights reserved. # $COPYRIGHT$ -# +# # Additional copyrights may follow -# +# # $HEADER$ # SUBDIRS = profile -# -# OMPI_PRPOFILING_DEFINES flag s enabled when we want our MPI_* symbols -# to be replaced by PMPI_*. In other words, this flag decides -# whether "profile/defines.h" is included or not. "profile/defines.h" -# replaces all MPI_* symbols with PMPI_* symbols. In this directory, -# we need it to be 0 -# +# OMPI_BUILD_MPI_PROFILING is enabled when we want our generated MPI_* symbols +# to be replaced by PMPI_*. +# In this directory, we need it to be 0 -AM_CPPFLAGS = -DOMPI_PROFILING_DEFINES=0 +AM_CPPFLAGS = -DOMPI_BUILD_MPI_PROFILING=0 # # The top directory always builds MPI_* bindings. The bottom directory @@ -406,10 +404,14 @@ libmpi_c_mpi_la_SOURCES += \ file_get_type_extent.c \ file_get_view.c \ file_iread_at.c \ + file_iread_at_all.c \ file_iread.c \ + file_iread_all.c \ file_iread_shared.c \ file_iwrite_at.c \ + file_iwrite_at_all.c \ file_iwrite.c \ + file_iwrite_all.c \ file_iwrite_shared.c \ file_open.c \ file_preallocate.c \ diff --git a/ompi/mpi/c/abort.c b/ompi/mpi/c/abort.c index 13746d942c1..a3328f90c3e 100644 --- a/ompi/mpi/c/abort.c +++ b/ompi/mpi/c/abort.c @@ -5,15 +5,17 @@ * Copyright (c) 2004-2014 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2007-2008 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ #include "ompi_config.h" @@ -26,25 +28,22 @@ #include "ompi/memchecker.h" #include "ompi/communicator/communicator.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_Abort = PMPI_Abort #endif - -#if OMPI_PROFILING_DEFINES -#include "ompi/mpi/c/profile/defines.h" +#define MPI_Abort PMPI_Abort #endif static const char FUNC_NAME[] = "MPI_Abort"; -int MPI_Abort(MPI_Comm comm, int errorcode) +int MPI_Abort(MPI_Comm comm, int errorcode) { MEMCHECKER( memchecker_comm(comm); ); - OPAL_CR_ABORT_LIBRARY(); - /* Don't even bother checking comm and errorcode values for errors */ @@ -53,7 +52,7 @@ int MPI_Abort(MPI_Comm comm, int errorcode) } opal_show_help("help-mpi-api.txt", "mpi-abort", true, - ompi_comm_rank(comm), + ompi_comm_rank(comm), ('\0' != comm->c_name[0]) ? comm->c_name : "", errorcode); return ompi_mpi_abort(comm, errorcode); diff --git a/ompi/mpi/c/accumulate.c b/ompi/mpi/c/accumulate.c index ced8c5fff6a..4266ef261c8 100644 --- a/ompi/mpi/c/accumulate.c +++ b/ompi/mpi/c/accumulate.c @@ -6,17 +6,19 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2009 Sun Microsystmes, Inc. All rights reserved. - * Copyright (c) 2013 Los Alamos National Security, LLC. All rights + * Copyright (c) 2013-2015 Los Alamos National Security, LLC. All rights * reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ #include "ompi_config.h" @@ -32,19 +34,18 @@ #include "ompi/datatype/ompi_datatype_internal.h" #include "ompi/memchecker.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_Accumulate = PMPI_Accumulate #endif - -#if OMPI_PROFILING_DEFINES -#include "ompi/mpi/c/profile/defines.h" +#define MPI_Accumulate PMPI_Accumulate #endif static const char FUNC_NAME[] = "MPI_Accumulate"; int MPI_Accumulate(const void *origin_addr, int origin_count, MPI_Datatype origin_datatype, int target_rank, MPI_Aint target_disp, int target_count, - MPI_Datatype target_datatype, MPI_Op op, MPI_Win win) + MPI_Datatype target_datatype, MPI_Op op, MPI_Win win) { int rc; ompi_win_t *ompi_win = (ompi_win_t*) win; @@ -71,7 +72,7 @@ int MPI_Accumulate(const void *origin_addr, int origin_count, MPI_Datatype origi rc = MPI_ERR_OP; } else if (!ompi_op_is_intrinsic(op)) { rc = MPI_ERR_OP; - } else if ( target_disp < 0 ) { + } else if ( MPI_WIN_FLAVOR_DYNAMIC != win->w_flavor && target_disp < 0 ) { rc = MPI_ERR_DISP; } else { OMPI_CHECK_DATATYPE_FOR_ONE_SIDED(rc, origin_datatype, origin_count); @@ -124,16 +125,13 @@ int MPI_Accumulate(const void *origin_addr, int origin_count, MPI_Datatype origi return MPI_SUCCESS; } - OPAL_CR_ENTER_LIBRARY(); - - /* XXX -- CONST -- do not cast away const -- update mca/osc */ - rc = ompi_win->w_osc_module->osc_accumulate((void *) origin_addr, + rc = ompi_win->w_osc_module->osc_accumulate(origin_addr, origin_count, origin_datatype, - target_rank, - target_disp, + target_rank, + target_disp, target_count, - target_datatype, + target_datatype, op, win); OMPI_ERRHANDLER_RETURN(rc, win, rc, FUNC_NAME); } diff --git a/ompi/mpi/c/add_error_class.c b/ompi/mpi/c/add_error_class.c index 79803bc9ed7..7e8d71306f0 100644 --- a/ompi/mpi/c/add_error_class.c +++ b/ompi/mpi/c/add_error_class.c @@ -5,15 +5,17 @@ * Copyright (c) 2004-2006 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2006 University of Houston. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ #include "ompi_config.h" @@ -26,24 +28,21 @@ #include "ompi/communicator/communicator.h" #include "ompi/attribute/attribute.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_Add_error_class = PMPI_Add_error_class #endif - -#if OMPI_PROFILING_DEFINES -#include "ompi/mpi/c/profile/defines.h" +#define MPI_Add_error_class PMPI_Add_error_class #endif static const char FUNC_NAME[] = "MPI_Add_error_class"; -int MPI_Add_error_class(int *errorclass) +int MPI_Add_error_class(int *errorclass) { int err_class; int rc; - OPAL_CR_NOOP_PROGRESS(); - if ( MPI_PARAM_CHECK ) { OMPI_ERR_INIT_FINALIZE(FUNC_NAME); @@ -52,23 +51,23 @@ int MPI_Add_error_class(int *errorclass) MPI_ERR_ARG, FUNC_NAME); } } - + err_class = ompi_mpi_errclass_add(); if ( 0 > err_class ) { return OMPI_ERRHANDLER_INVOKE(MPI_COMM_WORLD, MPI_ERR_INTERN, FUNC_NAME); } - - /* + + /* ** Update the attribute value. See the comments ** in attribute/attribute.c and attribute/attribute_predefined.c - ** why we have to call the fortran attr_set function + ** why we have to call the fortran attr_set function */ - rc = ompi_attr_set_fortran_mpi1 (COMM_ATTR, + rc = ompi_attr_set_fortran_mpi1 (COMM_ATTR, MPI_COMM_WORLD, &MPI_COMM_WORLD->c_keyhash, - MPI_LASTUSEDCODE, + MPI_LASTUSEDCODE, ompi_mpi_errcode_lastused, true); if ( MPI_SUCCESS != rc ) { diff --git a/ompi/mpi/c/add_error_code.c b/ompi/mpi/c/add_error_code.c index fce5e38a436..17e691025ee 100644 --- a/ompi/mpi/c/add_error_code.c +++ b/ompi/mpi/c/add_error_code.c @@ -5,15 +5,17 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2006 University of Houston. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ #include "ompi_config.h" @@ -26,12 +28,11 @@ #include "ompi/errhandler/errcode.h" #include "ompi/attribute/attribute.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_Add_error_code = PMPI_Add_error_code #endif - -#if OMPI_PROFILING_DEFINES -#include "ompi/mpi/c/profile/defines.h" +#define MPI_Add_error_code PMPI_Add_error_code #endif static const char FUNC_NAME[] = "MPI_Add_error_code"; @@ -42,16 +43,14 @@ int MPI_Add_error_code(int errorclass, int *errorcode) int code; int rc; - OPAL_CR_NOOP_PROGRESS(); - if ( MPI_PARAM_CHECK ) { OMPI_ERR_INIT_FINALIZE(FUNC_NAME); if ( ompi_mpi_errcode_is_invalid(errorclass) ) return OMPI_ERRHANDLER_INVOKE(MPI_COMM_WORLD, MPI_ERR_ARG, FUNC_NAME); - - if ( !ompi_mpi_errnum_is_class ( errorclass) ) + + if ( !ompi_mpi_errnum_is_class ( errorclass) ) return OMPI_ERRHANDLER_INVOKE(MPI_COMM_WORLD, MPI_ERR_ARG, FUNC_NAME); @@ -60,28 +59,28 @@ int MPI_Add_error_code(int errorclass, int *errorcode) MPI_ERR_ARG, FUNC_NAME); } } - + code = ompi_mpi_errcode_add ( errorclass); if ( 0 > code ) { return OMPI_ERRHANDLER_INVOKE(MPI_COMM_WORLD, MPI_ERR_INTERN, FUNC_NAME); } - /* + /* ** Update the attribute value. See the comments ** in attribute/attribute.c and attribute/attribute_predefined.c - ** why we have to call the fortran attr_set function + ** why we have to call the fortran attr_set function */ - rc = ompi_attr_set_fortran_mpi1 (COMM_ATTR, + rc = ompi_attr_set_fortran_mpi1 (COMM_ATTR, MPI_COMM_WORLD, &MPI_COMM_WORLD->c_keyhash, - MPI_LASTUSEDCODE, + MPI_LASTUSEDCODE, ompi_mpi_errcode_lastused, true); if ( MPI_SUCCESS != rc ) { return OMPI_ERRHANDLER_INVOKE(MPI_COMM_WORLD, rc, FUNC_NAME); } - + *errorcode = code; return MPI_SUCCESS; } diff --git a/ompi/mpi/c/add_error_string.c b/ompi/mpi/c/add_error_string.c index fe982446e8e..fdc6b364b68 100644 --- a/ompi/mpi/c/add_error_string.c +++ b/ompi/mpi/c/add_error_string.c @@ -5,15 +5,17 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2006 University of Houston. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ #include "ompi_config.h" @@ -26,12 +28,11 @@ #include "ompi/errhandler/errhandler.h" #include "ompi/errhandler/errcode.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_Add_error_string = PMPI_Add_error_string #endif - -#if OMPI_PROFILING_DEFINES -#include "ompi/mpi/c/profile/defines.h" +#define MPI_Add_error_string PMPI_Add_error_string #endif static const char FUNC_NAME[] = "MPI_Add_error_string"; @@ -41,8 +42,6 @@ int MPI_Add_error_string(int errorcode, const char *string) { int rc; - OPAL_CR_NOOP_PROGRESS(); - if ( MPI_PARAM_CHECK ) { OMPI_ERR_INIT_FINALIZE(FUNC_NAME); @@ -54,7 +53,7 @@ int MPI_Add_error_string(int errorcode, const char *string) return OMPI_ERRHANDLER_INVOKE(MPI_COMM_WORLD, MPI_ERR_ARG, FUNC_NAME); - if ( MPI_MAX_ERROR_STRING < (strlen(string)+1) ) + if ( MPI_MAX_ERROR_STRING < (strlen(string)+1) ) return OMPI_ERRHANDLER_INVOKE(MPI_COMM_WORLD, MPI_ERR_ARG, FUNC_NAME); } diff --git a/ompi/mpi/c/address.c b/ompi/mpi/c/address.c index 37e18a71f9b..2f4cb2448d4 100644 --- a/ompi/mpi/c/address.c +++ b/ompi/mpi/c/address.c @@ -5,14 +5,16 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -24,12 +26,11 @@ #include "ompi/communicator/communicator.h" #include "ompi/errhandler/errhandler.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_Address = PMPI_Address #endif - -#if OMPI_PROFILING_DEFINES -#include "ompi/mpi/c/profile/defines.h" +#define MPI_Address PMPI_Address #endif static const char FUNC_NAME[] = "MPI_Address"; @@ -38,8 +39,6 @@ static const char FUNC_NAME[] = "MPI_Address"; int MPI_Address(void *location, MPI_Aint *address) { - OPAL_CR_NOOP_PROGRESS(); - if( MPI_PARAM_CHECK ) { OMPI_ERR_INIT_FINALIZE(FUNC_NAME); if (NULL == location || NULL == address) { diff --git a/ompi/mpi/c/allgather.c b/ompi/mpi/c/allgather.c index 4be47551125..a0089659932 100644 --- a/ompi/mpi/c/allgather.c +++ b/ompi/mpi/c/allgather.c @@ -6,7 +6,7 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. @@ -14,10 +14,12 @@ * Copyright (c) 2010 University of Houston. All rights reserved. * Copyright (c) 2013 Los Alamos National Security, LLC. All rights * reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -31,12 +33,11 @@ #include "ompi/datatype/ompi_datatype.h" #include "ompi/memchecker.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_Allgather = PMPI_Allgather #endif - -#if OMPI_PROFILING_DEFINES -#include "ompi/mpi/c/profile/defines.h" +#define MPI_Allgather PMPI_Allgather #endif static const char FUNC_NAME[] = "MPI_Allgather"; @@ -59,8 +60,8 @@ int MPI_Allgather(const void *sendbuf, int sendcount, MPI_Datatype sendtype, memchecker_comm(comm); /* check whether the actual send buffer is defined. */ if (MPI_IN_PLACE == sendbuf) { - memchecker_call(&opal_memchecker_base_isdefined, - (char *)(recvbuf)+rank*ext, + memchecker_call(&opal_memchecker_base_isdefined, + (char *)(recvbuf)+rank*ext, recvcount, recvtype); } else { memchecker_datatype(sendtype); @@ -94,7 +95,7 @@ int MPI_Allgather(const void *sendbuf, int sendcount, MPI_Datatype sendtype, /* Do we need to do anything? Everyone had to give the same send signature, which means that everyone must have given a sendcount > 0 if there's anything to send for the intra-communicator - case. If we're doing IN_PLACE, however, check recvcount, + case. If we're doing IN_PLACE, however, check recvcount, not sendcount. */ if ( OMPI_COMM_IS_INTRA(comm) ) { if ((MPI_IN_PLACE != sendbuf && 0 == sendcount) || @@ -113,12 +114,9 @@ int MPI_Allgather(const void *sendbuf, int sendcount, MPI_Datatype sendtype, } } - OPAL_CR_ENTER_LIBRARY(); - /* Invoke the coll component to perform the back-end operation */ - /* XXX -- CONST -- do not cast away const -- update mca/coll */ - err = comm->c_coll.coll_allgather((void *) sendbuf, sendcount, sendtype, + err = comm->c_coll.coll_allgather(sendbuf, sendcount, sendtype, recvbuf, recvcount, recvtype, comm, comm->c_coll.coll_allgather_module); OMPI_ERRHANDLER_RETURN(err, comm, err, FUNC_NAME); diff --git a/ompi/mpi/c/allgatherv.c b/ompi/mpi/c/allgatherv.c index 44a912b9180..2a80883d7a4 100644 --- a/ompi/mpi/c/allgatherv.c +++ b/ompi/mpi/c/allgatherv.c @@ -6,18 +6,20 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2010 University of Houston. All rights reserved. * Copyright (c) 2012 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2012-2013 Los Alamos National Security, LLC. All rights - * reserved. + * reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -31,12 +33,11 @@ #include "ompi/datatype/ompi_datatype.h" #include "ompi/memchecker.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_Allgatherv = PMPI_Allgatherv #endif - -#if OMPI_PROFILING_DEFINES -#include "ompi/mpi/c/profile/defines.h" +#define MPI_Allgatherv PMPI_Allgatherv #endif static const char FUNC_NAME[] = "MPI_Allgatherv"; @@ -64,7 +65,7 @@ int MPI_Allgatherv(const void *sendbuf, int sendcount, MPI_Datatype sendtype, (char *)(recvbuf)+displs[i]*ext, recvcounts[i], recvtype); } - + /* check whether the actual send buffer is defined. */ if (MPI_IN_PLACE == sendbuf) { memchecker_call(&opal_memchecker_base_isdefined, @@ -84,7 +85,7 @@ int MPI_Allgatherv(const void *sendbuf, int sendcount, MPI_Datatype sendtype, err = MPI_SUCCESS; OMPI_ERR_INIT_FINALIZE(FUNC_NAME); if (ompi_comm_invalid(comm)) { - return OMPI_ERRHANDLER_INVOKE(MPI_COMM_WORLD, MPI_ERR_COMM, + return OMPI_ERRHANDLER_INVOKE(MPI_COMM_WORLD, MPI_ERR_COMM, FUNC_NAME); } else if (MPI_IN_PLACE == recvbuf) { return OMPI_ERRHANDLER_INVOKE(comm, MPI_ERR_ARG, FUNC_NAME); @@ -108,13 +109,13 @@ int MPI_Allgatherv(const void *sendbuf, int sendcount, MPI_Datatype sendtype, return OMPI_ERRHANDLER_INVOKE(comm, MPI_ERR_COUNT, FUNC_NAME); } } - + if (NULL == displs) { return OMPI_ERRHANDLER_INVOKE(comm, MPI_ERR_BUFFER, FUNC_NAME); } } - /* Do we need to do anything? Everyone had to give the same + /* Do we need to do anything? Everyone had to give the same signature, which means that everyone must have given a sum(recvounts) > 0 if there's anything to do. */ @@ -129,17 +130,14 @@ int MPI_Allgatherv(const void *sendbuf, int sendcount, MPI_Datatype sendtype, } } /* There is no rule that can be applied for inter-communicators, since - recvcount(s)=0 only indicates that the processes in the other group + recvcount(s)=0 only indicates that the processes in the other group do not send anything, sendcount=0 only indicates that I do not send - anything. However, other processes in my group might very well send + anything. However, other processes in my group might very well send something */ - OPAL_CR_ENTER_LIBRARY(); - /* Invoke the coll component to perform the back-end operation */ - /* XXX -- CONST -- do not cast away const -- update mca/coll */ - err = comm->c_coll.coll_allgatherv((void *) sendbuf, sendcount, sendtype, + err = comm->c_coll.coll_allgatherv(sendbuf, sendcount, sendtype, recvbuf, (int *) recvcounts, (int *) displs, recvtype, comm, comm->c_coll.coll_allgatherv_module); diff --git a/ompi/mpi/c/alloc_mem.c b/ompi/mpi/c/alloc_mem.c index dbcef2acada..9c653c9d99a 100644 --- a/ompi/mpi/c/alloc_mem.c +++ b/ompi/mpi/c/alloc_mem.c @@ -1,3 +1,4 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ /* * Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana * University Research and Technology @@ -5,15 +6,19 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2007 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. + * Copyright (c) 2015 Los Alamos National Security, LLC. All rights + * reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -29,12 +34,11 @@ #include "ompi/info/info.h" #include "opal/mca/mpool/mpool.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_Alloc_mem = PMPI_Alloc_mem #endif - -#if OMPI_PROFILING_DEFINES -#include "ompi/mpi/c/profile/defines.h" +#define MPI_Alloc_mem PMPI_Alloc_mem #endif static const char FUNC_NAME[] = "MPI_Alloc_mem"; @@ -42,6 +46,8 @@ static const char FUNC_NAME[] = "MPI_Alloc_mem"; int MPI_Alloc_mem(MPI_Aint size, MPI_Info info, void *baseptr) { + char info_value[MPI_MAX_INFO_VAL + 1]; + char *mpool_hints = NULL; if (MPI_PARAM_CHECK) { OMPI_ERR_INIT_FINALIZE(FUNC_NAME); @@ -53,11 +59,11 @@ int MPI_Alloc_mem(MPI_Aint size, MPI_Info info, void *baseptr) FUNC_NAME); } } - + /* Per these threads: - http://www.open-mpi.org/community/lists/devel/2007/07/1977.php - http://www.open-mpi.org/community/lists/devel/2007/07/1979.php + http://www.open-mpi.org/community/lists/devel/2007/07/1977.php + http://www.open-mpi.org/community/lists/devel/2007/07/1979.php If you call MPI_ALLOC_MEM with a size of 0, you get NULL back .*/ @@ -66,12 +72,18 @@ int MPI_Alloc_mem(MPI_Aint size, MPI_Info info, void *baseptr) return MPI_SUCCESS; } - OPAL_CR_ENTER_LIBRARY(); + if (MPI_INFO_NULL != info) { + int flag; + (void) ompi_info_get (info, "mpool_hints", MPI_MAX_INFO_VAL, info_value, &flag); + if (flag) { + mpool_hints = info_value; + } + } - *((void **) baseptr) = mca_mpool_base_alloc((size_t) size, (struct opal_info_t*)info); - OPAL_CR_EXIT_LIBRARY(); + *((void **) baseptr) = mca_mpool_base_alloc ((size_t) size, (struct opal_info_t*)info, + mpool_hints); if (NULL == *((void **) baseptr)) { - return OMPI_ERRHANDLER_INVOKE(MPI_COMM_WORLD, MPI_ERR_NO_MEM, + return OMPI_ERRHANDLER_INVOKE(MPI_COMM_WORLD, MPI_ERR_NO_MEM, FUNC_NAME); } diff --git a/ompi/mpi/c/allreduce.c b/ompi/mpi/c/allreduce.c index 3b32264b02e..39c79e10c31 100644 --- a/ompi/mpi/c/allreduce.c +++ b/ompi/mpi/c/allreduce.c @@ -6,16 +6,18 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2013 Los Alamos National Security, LLC. All rights * reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -30,29 +32,28 @@ #include "ompi/op/op.h" #include "ompi/memchecker.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_Allreduce = PMPI_Allreduce #endif - -#if OMPI_PROFILING_DEFINES -#include "ompi/mpi/c/profile/defines.h" +#define MPI_Allreduce PMPI_Allreduce #endif static const char FUNC_NAME[] = "MPI_Allreduce"; int MPI_Allreduce(const void *sendbuf, void *recvbuf, int count, - MPI_Datatype datatype, MPI_Op op, MPI_Comm comm) + MPI_Datatype datatype, MPI_Op op, MPI_Comm comm) { int err; MEMCHECKER( memchecker_datatype(datatype); memchecker_comm(comm); - + /* check whether receive buffer is defined. */ memchecker_call(&opal_memchecker_base_isaddressable, recvbuf, count, datatype); - + /* check whether the actual send buffer is defined. */ if (MPI_IN_PLACE == sendbuf) { memchecker_call(&opal_memchecker_base_isdefined, recvbuf, count, datatype); @@ -70,7 +71,7 @@ int MPI_Allreduce(const void *sendbuf, void *recvbuf, int count, err = MPI_SUCCESS; OMPI_ERR_INIT_FINALIZE(FUNC_NAME); if (ompi_comm_invalid(comm)) { - return OMPI_ERRHANDLER_INVOKE(MPI_COMM_WORLD, MPI_ERR_COMM, + return OMPI_ERRHANDLER_INVOKE(MPI_COMM_WORLD, MPI_ERR_COMM, FUNC_NAME); } else if (MPI_OP_NULL == op) { err = MPI_ERR_OP; @@ -79,12 +80,12 @@ int MPI_Allreduce(const void *sendbuf, void *recvbuf, int count, free(msg); return ret; } else if( MPI_IN_PLACE == recvbuf ) { - return OMPI_ERRHANDLER_INVOKE(MPI_COMM_WORLD, MPI_ERR_BUFFER, + return OMPI_ERRHANDLER_INVOKE(MPI_COMM_WORLD, MPI_ERR_BUFFER, FUNC_NAME); - } else if( (sendbuf == recvbuf) && + } else if( (sendbuf == recvbuf) && (MPI_BOTTOM != sendbuf) && (count > 1) ) { - return OMPI_ERRHANDLER_INVOKE(MPI_COMM_WORLD, MPI_ERR_BUFFER, + return OMPI_ERRHANDLER_INVOKE(MPI_COMM_WORLD, MPI_ERR_BUFFER, FUNC_NAME); } else { OMPI_CHECK_DATATYPE_FOR_SEND(err, datatype, count); @@ -95,18 +96,15 @@ int MPI_Allreduce(const void *sendbuf, void *recvbuf, int count, /* MPI-1, p114, says that each process must supply at least one element. But at least the Pallas benchmarks call MPI_REDUCE with a count of 0. So be sure to handle it. */ - + if (0 == count) { return MPI_SUCCESS; } - OPAL_CR_ENTER_LIBRARY(); - /* Invoke the coll component to perform the back-end operation */ OBJ_RETAIN(op); - /* XXX -- CONST -- do not cast away const -- update mca/coll */ - err = comm->c_coll.coll_allreduce((void *) sendbuf, recvbuf, count, + err = comm->c_coll.coll_allreduce(sendbuf, recvbuf, count, datatype, op, comm, comm->c_coll.coll_allreduce_module); OBJ_RELEASE(op); diff --git a/ompi/mpi/c/alltoall.c b/ompi/mpi/c/alltoall.c index be39433ba12..859180f36a4 100644 --- a/ompi/mpi/c/alltoall.c +++ b/ompi/mpi/c/alltoall.c @@ -6,19 +6,19 @@ * Copyright (c) 2004-2012 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2007 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2013 Los Alamos National Security, LLC. All rights * reserved. - * Copyright (c) 2014 Research Organization for Information Science + * Copyright (c) 2014-2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -32,20 +32,19 @@ #include "ompi/datatype/ompi_datatype.h" #include "ompi/memchecker.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_Alltoall = PMPI_Alltoall #endif - -#if OMPI_PROFILING_DEFINES -#include "ompi/mpi/c/profile/defines.h" +#define MPI_Alltoall PMPI_Alltoall #endif static const char FUNC_NAME[] = "MPI_Alltoall"; int MPI_Alltoall(const void *sendbuf, int sendcount, MPI_Datatype sendtype, - void *recvbuf, int recvcount, MPI_Datatype recvtype, - MPI_Comm comm) + void *recvbuf, int recvcount, MPI_Datatype recvtype, + MPI_Comm comm) { size_t sendtype_size, recvtype_size; int err; @@ -73,10 +72,10 @@ int MPI_Alltoall(const void *sendbuf, int sendcount, MPI_Datatype sendtype, err = MPI_SUCCESS; OMPI_ERR_INIT_FINALIZE(FUNC_NAME); if (ompi_comm_invalid(comm)) { - return OMPI_ERRHANDLER_INVOKE(MPI_COMM_WORLD, MPI_ERR_COMM, + return OMPI_ERRHANDLER_INVOKE(MPI_COMM_WORLD, MPI_ERR_COMM, FUNC_NAME); } else if (MPI_IN_PLACE == recvbuf) { - return OMPI_ERRHANDLER_INVOKE(MPI_COMM_WORLD, MPI_ERR_ARG, + return OMPI_ERRHANDLER_INVOKE(MPI_COMM_WORLD, MPI_ERR_ARG, FUNC_NAME); } else { OMPI_CHECK_DATATYPE_FOR_SEND(err, sendtype, sendcount); @@ -104,11 +103,8 @@ int MPI_Alltoall(const void *sendbuf, int sendcount, MPI_Datatype sendtype, return MPI_SUCCESS; } - OPAL_CR_ENTER_LIBRARY(); - /* Invoke the coll component to perform the back-end operation */ - /* XXX -- CONST -- do not cast away const -- update mca/coll */ - err = comm->c_coll.coll_alltoall((void *) sendbuf, sendcount, sendtype, + err = comm->c_coll.coll_alltoall(sendbuf, sendcount, sendtype, recvbuf, recvcount, recvtype, comm, comm->c_coll.coll_alltoall_module); OMPI_ERRHANDLER_RETURN(err, comm, err, FUNC_NAME); diff --git a/ompi/mpi/c/alltoallv.c b/ompi/mpi/c/alltoallv.c index 90451cb2e7a..56cd2a8cce4 100644 --- a/ompi/mpi/c/alltoallv.c +++ b/ompi/mpi/c/alltoallv.c @@ -6,19 +6,19 @@ * Copyright (c) 2004-2012 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2007 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2012-2013 Los Alamos National Security, LLC. All rights - * reserved. - * Copyright (c) 2014 Research Organization for Information Science + * reserved. + * Copyright (c) 2014-2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -32,12 +32,11 @@ #include "ompi/datatype/ompi_datatype.h" #include "ompi/memchecker.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_Alltoallv = PMPI_Alltoallv #endif - -#if OMPI_PROFILING_DEFINES -#include "ompi/mpi/c/profile/defines.h" +#define MPI_Alltoallv PMPI_Alltoallv #endif static const char FUNC_NAME[] = "MPI_Alltoallv"; @@ -46,7 +45,7 @@ static const char FUNC_NAME[] = "MPI_Alltoallv"; int MPI_Alltoallv(const void *sendbuf, const int sendcounts[], const int sdispls[], MPI_Datatype sendtype, void *recvbuf, const int recvcounts[], const int rdispls[], - MPI_Datatype recvtype, MPI_Comm comm) + MPI_Datatype recvtype, MPI_Comm comm) { int i, size, err; @@ -85,7 +84,7 @@ int MPI_Alltoallv(const void *sendbuf, const int sendcounts[], err = MPI_SUCCESS; OMPI_ERR_INIT_FINALIZE(FUNC_NAME); if (ompi_comm_invalid(comm)) { - return OMPI_ERRHANDLER_INVOKE(MPI_COMM_WORLD, MPI_ERR_COMM, + return OMPI_ERRHANDLER_INVOKE(MPI_COMM_WORLD, MPI_ERR_COMM, FUNC_NAME); } @@ -120,12 +119,9 @@ int MPI_Alltoallv(const void *sendbuf, const int sendcounts[], } } - OPAL_CR_ENTER_LIBRARY(); - /* Invoke the coll component to perform the back-end operation */ - /* XXX -- CONST -- do not cast away const -- update mca/coll */ - err = comm->c_coll.coll_alltoallv((void *) sendbuf, (int *) sendcounts, (int *) sdispls, sendtype, - recvbuf, (int *) recvcounts, (int *) rdispls, recvtype, + err = comm->c_coll.coll_alltoallv(sendbuf, sendcounts, sdispls, sendtype, + recvbuf, recvcounts, rdispls, recvtype, comm, comm->c_coll.coll_alltoallv_module); OMPI_ERRHANDLER_RETURN(err, comm, err, FUNC_NAME); } diff --git a/ompi/mpi/c/alltoallw.c b/ompi/mpi/c/alltoallw.c index 8d7c26b6458..266903b43bb 100644 --- a/ompi/mpi/c/alltoallw.c +++ b/ompi/mpi/c/alltoallw.c @@ -6,19 +6,19 @@ * Copyright (c) 2004-2012 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2007 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2012-2013 Los Alamos National Security, LLC. All rights - * reserved. - * Copyright (c) 2014 Research Organization for Information Science + * reserved. + * Copyright (c) 2014-2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -32,12 +32,11 @@ #include "ompi/datatype/ompi_datatype.h" #include "ompi/memchecker.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_Alltoallw = PMPI_Alltoallw #endif - -#if OMPI_PROFILING_DEFINES -#include "ompi/mpi/c/profile/defines.h" +#define MPI_Alltoallw PMPI_Alltoallw #endif static const char FUNC_NAME[] = "MPI_Alltoallw"; @@ -48,8 +47,8 @@ int MPI_Alltoallw(const void *sendbuf, const int sendcounts[], void *recvbuf, const int recvcounts[], const int rdispls[], const MPI_Datatype recvtypes[], MPI_Comm comm) { - int i, size, err; - + int i, size, err; + MEMCHECKER( ptrdiff_t recv_ext; ptrdiff_t send_ext; @@ -80,7 +79,7 @@ int MPI_Alltoallw(const void *sendbuf, const int sendcounts[], err = MPI_SUCCESS; OMPI_ERR_INIT_FINALIZE(FUNC_NAME); if (ompi_comm_invalid(comm)) { - return OMPI_ERRHANDLER_INVOKE(MPI_COMM_WORLD, MPI_ERR_COMM, + return OMPI_ERRHANDLER_INVOKE(MPI_COMM_WORLD, MPI_ERR_COMM, FUNC_NAME); } @@ -115,12 +114,9 @@ int MPI_Alltoallw(const void *sendbuf, const int sendcounts[], } } - OPAL_CR_ENTER_LIBRARY(); - /* Invoke the coll component to perform the back-end operation */ - /* XXX -- CONST -- do not cast away const -- update mca/coll */ - err = comm->c_coll.coll_alltoallw((void *) sendbuf, (int *) sendcounts, (int *) sdispls, (ompi_datatype_t **) sendtypes, - recvbuf, (int *) recvcounts, (int *) rdispls, (ompi_datatype_t **) recvtypes, + err = comm->c_coll.coll_alltoallw(sendbuf, sendcounts, sdispls, (ompi_datatype_t **) sendtypes, + recvbuf, recvcounts, rdispls, (ompi_datatype_t **) recvtypes, comm, comm->c_coll.coll_alltoallw_module); OMPI_ERRHANDLER_RETURN(err, comm, err, FUNC_NAME); } diff --git a/ompi/mpi/c/attr_delete.c b/ompi/mpi/c/attr_delete.c index 3059d01706e..4bb14614e94 100644 --- a/ompi/mpi/c/attr_delete.c +++ b/ompi/mpi/c/attr_delete.c @@ -5,15 +5,17 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2007 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -27,12 +29,11 @@ #include "ompi/attribute/attribute.h" #include "ompi/memchecker.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_Attr_delete = PMPI_Attr_delete #endif - -#if OMPI_PROFILING_DEFINES -#include "ompi/mpi/c/profile/defines.h" +#define MPI_Attr_delete PMPI_Attr_delete #endif static const char FUNC_NAME[] = "MPI_Attr_delete"; @@ -49,16 +50,15 @@ int MPI_Attr_delete(MPI_Comm comm, int keyval) if (MPI_PARAM_CHECK) { OMPI_ERR_INIT_FINALIZE(FUNC_NAME); if (ompi_comm_invalid(comm)) { - return OMPI_ERRHANDLER_INVOKE(MPI_COMM_WORLD, MPI_ERR_COMM, + return OMPI_ERRHANDLER_INVOKE(MPI_COMM_WORLD, MPI_ERR_COMM, FUNC_NAME); } } - OPAL_CR_ENTER_LIBRARY(); - ret = ompi_attr_delete(COMM_ATTR, comm, comm->c_keyhash, keyval, + ret = ompi_attr_delete(COMM_ATTR, comm, comm->c_keyhash, keyval, false); - OMPI_ERRHANDLER_RETURN(ret, comm, MPI_ERR_OTHER, FUNC_NAME); + OMPI_ERRHANDLER_RETURN(ret, comm, MPI_ERR_OTHER, FUNC_NAME); } diff --git a/ompi/mpi/c/attr_fn.c b/ompi/mpi/c/attr_fn.c index 95da43f2117..667085e50de 100644 --- a/ompi/mpi/c/attr_fn.c +++ b/ompi/mpi/c/attr_fn.c @@ -5,14 +5,14 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -61,9 +61,9 @@ int OMPI_C_MPI_TYPE_NULL_DELETE_FN( MPI_Datatype datatype, int type_keyval, return MPI_SUCCESS; } -int OMPI_C_MPI_TYPE_NULL_COPY_FN( MPI_Datatype datatype, int type_keyval, +int OMPI_C_MPI_TYPE_NULL_COPY_FN( MPI_Datatype datatype, int type_keyval, void* extra_state, - void* attribute_val_in, + void* attribute_val_in, void* attribute_val_out, int* flag ) { @@ -71,7 +71,7 @@ int OMPI_C_MPI_TYPE_NULL_COPY_FN( MPI_Datatype datatype, int type_keyval, return MPI_SUCCESS; } -int OMPI_C_MPI_TYPE_DUP_FN( MPI_Datatype datatype, int type_keyval, +int OMPI_C_MPI_TYPE_DUP_FN( MPI_Datatype datatype, int type_keyval, void* extra_state, void* attribute_val_in, void* attribute_val_out, int* flag ) @@ -88,7 +88,7 @@ int OMPI_C_MPI_WIN_NULL_DELETE_FN( MPI_Win window, int win_keyval, return MPI_SUCCESS; } -int OMPI_C_MPI_WIN_NULL_COPY_FN( MPI_Win window, int win_keyval, +int OMPI_C_MPI_WIN_NULL_COPY_FN( MPI_Win window, int win_keyval, void* extra_state, void* attribute_val_in, void* attribute_val_out, int* flag ) @@ -113,7 +113,7 @@ int OMPI_C_MPI_COMM_NULL_DELETE_FN( MPI_Comm comm, int comm_keyval, return MPI_SUCCESS; } -int OMPI_C_MPI_COMM_NULL_COPY_FN( MPI_Comm comm, int comm_keyval, +int OMPI_C_MPI_COMM_NULL_COPY_FN( MPI_Comm comm, int comm_keyval, void* extra_state, void* attribute_val_in, void* attribute_val_out, int* flag ) diff --git a/ompi/mpi/c/attr_get.c b/ompi/mpi/c/attr_get.c index e782623b97d..57ee9aedfad 100644 --- a/ompi/mpi/c/attr_get.c +++ b/ompi/mpi/c/attr_get.c @@ -5,14 +5,16 @@ * Copyright (c) 2004-2006 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -26,12 +28,11 @@ #include "ompi/attribute/attribute.h" #include "ompi/memchecker.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_Attr_get = PMPI_Attr_get #endif - -#if OMPI_PROFILING_DEFINES -#include "ompi/mpi/c/profile/defines.h" +#define MPI_Attr_get PMPI_Attr_get #endif static const char FUNC_NAME[] = "MPI_Attr_get"; @@ -47,12 +48,11 @@ int MPI_Attr_get(MPI_Comm comm, int keyval, void *attribute_val, int *flag) if (MPI_PARAM_CHECK) { OMPI_ERR_INIT_FINALIZE(FUNC_NAME); if ((NULL == attribute_val) || (NULL == flag)) { - return OMPI_ERRHANDLER_INVOKE(MPI_COMM_WORLD, MPI_ERR_ARG, + return OMPI_ERRHANDLER_INVOKE(MPI_COMM_WORLD, MPI_ERR_ARG, FUNC_NAME); } } - OPAL_CR_ENTER_LIBRARY(); /* This stuff is very confusing. Be sure to see src/attribute/attribute.c for a lengthy comment explaining Open diff --git a/ompi/mpi/c/attr_put.c b/ompi/mpi/c/attr_put.c index b14fbad6ec3..4d36cea732e 100644 --- a/ompi/mpi/c/attr_put.c +++ b/ompi/mpi/c/attr_put.c @@ -5,15 +5,17 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2007 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -27,17 +29,16 @@ #include "ompi/attribute/attribute.h" #include "ompi/memchecker.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_Attr_put = PMPI_Attr_put #endif - -#if OMPI_PROFILING_DEFINES -#include "ompi/mpi/c/profile/defines.h" +#define MPI_Attr_put PMPI_Attr_put #endif static const char FUNC_NAME[] = "MPI_Attr_put"; -int MPI_Attr_put(MPI_Comm comm, int keyval, void *attribute_val) +int MPI_Attr_put(MPI_Comm comm, int keyval, void *attribute_val) { int ret; @@ -48,16 +49,15 @@ int MPI_Attr_put(MPI_Comm comm, int keyval, void *attribute_val) if (MPI_PARAM_CHECK) { OMPI_ERR_INIT_FINALIZE(FUNC_NAME); if (ompi_comm_invalid(comm)) { - return OMPI_ERRHANDLER_INVOKE(MPI_COMM_WORLD, MPI_ERR_COMM, + return OMPI_ERRHANDLER_INVOKE(MPI_COMM_WORLD, MPI_ERR_COMM, FUNC_NAME); } } - OPAL_CR_ENTER_LIBRARY(); - ret = ompi_attr_set_c(COMM_ATTR, comm, &comm->c_keyhash, + ret = ompi_attr_set_c(COMM_ATTR, comm, &comm->c_keyhash, keyval, attribute_val, false); - - OMPI_ERRHANDLER_RETURN(ret, comm, MPI_ERR_OTHER, FUNC_NAME); + + OMPI_ERRHANDLER_RETURN(ret, comm, MPI_ERR_OTHER, FUNC_NAME); } diff --git a/ompi/mpi/c/barrier.c b/ompi/mpi/c/barrier.c index 8f5cccd7368..3affecaa1a0 100644 --- a/ompi/mpi/c/barrier.c +++ b/ompi/mpi/c/barrier.c @@ -5,14 +5,16 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ #include "ompi_config.h" @@ -24,25 +26,24 @@ #include "ompi/errhandler/errhandler.h" #include "ompi/memchecker.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_Barrier = PMPI_Barrier #endif - -#if OMPI_PROFILING_DEFINES -#include "ompi/mpi/c/profile/defines.h" +#define MPI_Barrier PMPI_Barrier #endif static const char FUNC_NAME[] = "MPI_Barrier"; -int MPI_Barrier(MPI_Comm comm) +int MPI_Barrier(MPI_Comm comm) { int err = MPI_SUCCESS; - + MEMCHECKER( memchecker_comm(comm); ); - + /* Error checking */ if (MPI_PARAM_CHECK) { @@ -52,8 +53,6 @@ int MPI_Barrier(MPI_Comm comm) } } - OPAL_CR_ENTER_LIBRARY(); - /* Intracommunicators: Only invoke the back-end coll module barrier function if there's more than one process in the communicator */ @@ -61,7 +60,7 @@ int MPI_Barrier(MPI_Comm comm) if (ompi_comm_size(comm) > 1) { err = comm->c_coll.coll_barrier(comm, comm->c_coll.coll_barrier_module); } - } + } /* Intercommunicators -- always invoke, because, by definition, there's always at least 2 processes in an intercommunicator. */ diff --git a/ompi/mpi/c/bcast.c b/ompi/mpi/c/bcast.c index 8044862d66b..2503769e400 100644 --- a/ompi/mpi/c/bcast.c +++ b/ompi/mpi/c/bcast.c @@ -5,14 +5,16 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ #include "ompi_config.h" @@ -25,12 +27,11 @@ #include "ompi/datatype/ompi_datatype.h" #include "ompi/memchecker.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_Bcast = PMPI_Bcast #endif - -#if OMPI_PROFILING_DEFINES -#include "ompi/mpi/c/profile/defines.h" +#define MPI_Bcast PMPI_Bcast #endif static const char FUNC_NAME[] = "MPI_Bcast"; @@ -50,7 +51,7 @@ int MPI_Bcast(void *buffer, int count, MPI_Datatype datatype, memchecker_call(&opal_memchecker_base_isdefined, buffer, count, datatype); } /* check whether receive buffer is addressable. */ - memchecker_call(&opal_memchecker_base_isaddressable, buffer, count, datatype); + memchecker_call(&opal_memchecker_base_isaddressable, buffer, count, datatype); } else { if (MPI_ROOT == root) { /* check whether root's send buffer is defined. */ @@ -66,7 +67,7 @@ int MPI_Bcast(void *buffer, int count, MPI_Datatype datatype, err = MPI_SUCCESS; OMPI_ERR_INIT_FINALIZE(FUNC_NAME); if (ompi_comm_invalid(comm)) { - return OMPI_ERRHANDLER_INVOKE(MPI_COMM_WORLD, MPI_ERR_COMM, + return OMPI_ERRHANDLER_INVOKE(MPI_COMM_WORLD, MPI_ERR_COMM, FUNC_NAME); } @@ -84,7 +85,7 @@ int MPI_Bcast(void *buffer, int count, MPI_Datatype datatype, if ((root >= ompi_comm_size(comm)) || (root < 0)) { return OMPI_ERRHANDLER_INVOKE(comm, MPI_ERR_ROOT, FUNC_NAME); } - } + } /* Errors for intercommunicators */ @@ -93,7 +94,7 @@ int MPI_Bcast(void *buffer, int count, MPI_Datatype datatype, MPI_ROOT == root || MPI_PROC_NULL == root)) { return OMPI_ERRHANDLER_INVOKE(comm, MPI_ERR_ROOT, FUNC_NAME); } - } + } } /* If there's only one node, or if the count is 0, we're done */ @@ -101,9 +102,7 @@ int MPI_Bcast(void *buffer, int count, MPI_Datatype datatype, if ((OMPI_COMM_IS_INTRA(comm) && ompi_comm_size(comm) <= 1) || 0 == count) { return MPI_SUCCESS; - } - - OPAL_CR_ENTER_LIBRARY(); + } /* Invoke the coll component to perform the back-end operation */ diff --git a/ompi/mpi/c/bindings.h b/ompi/mpi/c/bindings.h index 903382cce4f..8c8533cfd7b 100644 --- a/ompi/mpi/c/bindings.h +++ b/ompi/mpi/c/bindings.h @@ -5,15 +5,15 @@ * Copyright (c) 2004-2006 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2010 Cisco Systems, Inc. All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -24,11 +24,6 @@ #include "mpi.h" #include "ompi/datatype/ompi_datatype.h" -/* This library needs to be here so that we can define - * the OPAL_CR_* checks - */ -#include "opal/runtime/opal_cr.h" - BEGIN_C_DECLS /* If compiling in the profile directory, then we don't have weak @@ -60,7 +55,7 @@ BEGIN_C_DECLS /* XXX Fix flags else if( ompi_datatype_is_overlapped((DDT)) ) (RC) = MPI_ERR_TYPE; */ \ else if( !opal_datatype_is_valid(&((DDT)->super)) ) (RC) = MPI_ERR_TYPE; \ } while (0) - + #define OMPI_CHECK_DATATYPE_FOR_ONE_SIDED( RC, DDT, COUNT ) \ do { \ /*(RC) = MPI_SUCCESS; */ \ @@ -72,7 +67,7 @@ BEGIN_C_DECLS } while(0) -/* This macro has to be used to check the correctness of the user buffer depending on the datatype. +/* This macro has to be used to check the correctness of the user buffer depending on the datatype. * This macro expects that the DDT parameter is a valid pointer to an ompi datatype object. */ #define OMPI_CHECK_USER_BUFFER(RC, BUFFER, DDT, COUNT) \ @@ -92,7 +87,7 @@ BEGIN_C_DECLS } \ } \ } while (0) - + END_C_DECLS #endif /* OMPI_C_BINDINGS_H */ diff --git a/ompi/mpi/c/bsend.c b/ompi/mpi/c/bsend.c index 10ef6c0399b..ca924b26fe5 100644 --- a/ompi/mpi/c/bsend.c +++ b/ompi/mpi/c/bsend.c @@ -6,17 +6,19 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2006-2007 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2013 Los Alamos National Security, LLC. All rights * reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ #include "ompi_config.h" @@ -30,12 +32,11 @@ #include "ompi/mca/pml/base/pml_base_bsend.h" #include "ompi/memchecker.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_Bsend = PMPI_Bsend #endif - -#if OMPI_PROFILING_DEFINES -#include "ompi/mpi/c/profile/defines.h" +#define MPI_Bsend PMPI_Bsend #endif static const char FUNC_NAME[] = "MPI_Bsend"; @@ -75,9 +76,7 @@ int MPI_Bsend(const void *buf, int count, MPI_Datatype type, int dest, int tag, return MPI_SUCCESS; } - OPAL_CR_ENTER_LIBRARY(); - /* XXX -- CONST -- do not cast away const -- update mca/pml */ - rc = MCA_PML_CALL(send((void *) buf, count, type, dest, tag, MCA_PML_BASE_SEND_BUFFERED, comm)); + rc = MCA_PML_CALL(send(buf, count, type, dest, tag, MCA_PML_BASE_SEND_BUFFERED, comm)); OMPI_ERRHANDLER_RETURN(rc, comm, rc, FUNC_NAME); } diff --git a/ompi/mpi/c/bsend_init.c b/ompi/mpi/c/bsend_init.c index c5e57bbdf5d..65fea969230 100644 --- a/ompi/mpi/c/bsend_init.c +++ b/ompi/mpi/c/bsend_init.c @@ -3,20 +3,22 @@ * Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. - * Copyright (c) 2004-2005 The University of Tennessee and The University + * Copyright (c) 2004-2016 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2006 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2013 Los Alamos National Security, LLC. All rights * reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ #include "ompi_config.h" @@ -30,12 +32,11 @@ #include "ompi/mca/pml/base/pml_base_bsend.h" #include "ompi/memchecker.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_Bsend_init = PMPI_Bsend_init #endif - -#if OMPI_PROFILING_DEFINES -#include "ompi/mpi/c/profile/defines.h" +#define MPI_Bsend_init PMPI_Bsend_init #endif static const char FUNC_NAME[] = "MPI_Bsend_init"; @@ -63,7 +64,7 @@ int MPI_Bsend_init(const void *buf, int count, MPI_Datatype type, rc = MPI_ERR_TYPE; } else if (tag < 0 || tag > mca_pml.pml_max_tag) { rc = MPI_ERR_TAG; - } else if (ompi_comm_peer_invalid(comm, dest) && + } else if (ompi_comm_peer_invalid(comm, dest) && (MPI_PROC_NULL != dest)) { rc = MPI_ERR_RANK; } else if (request == NULL) { @@ -78,20 +79,17 @@ int MPI_Bsend_init(const void *buf, int count, MPI_Datatype type, ompi_request_t */ (*request)->req_type = OMPI_REQUEST_NOOP; (*request)->req_status = ompi_request_empty.req_status; - (*request)->req_complete = true; + (*request)->req_complete = REQUEST_COMPLETED; (*request)->req_state = OMPI_REQUEST_INACTIVE; (*request)->req_persistent = true; (*request)->req_free = ompi_request_persistent_proc_null_free; return MPI_SUCCESS; } - OPAL_CR_ENTER_LIBRARY(); - /* * Here, we just initialize the request -- memchecker should set the buffer in MPI_Start. */ - /* XXX -- CONST -- do not cast away const -- update mca/pml */ - rc = MCA_PML_CALL(isend_init((void *) buf, count, type, dest, tag, + rc = MCA_PML_CALL(isend_init(buf, count, type, dest, tag, MCA_PML_BASE_SEND_BUFFERED, comm, request)); OMPI_ERRHANDLER_RETURN(rc, comm, rc, FUNC_NAME); } diff --git a/ompi/mpi/c/buffer_attach.c b/ompi/mpi/c/buffer_attach.c index 6400ca2b733..73ca80c5219 100644 --- a/ompi/mpi/c/buffer_attach.c +++ b/ompi/mpi/c/buffer_attach.c @@ -5,14 +5,16 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ #include "ompi_config.h" @@ -25,18 +27,17 @@ #include "ompi/mca/pml/pml.h" #include "ompi/mca/pml/base/pml_base_bsend.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_Buffer_attach = PMPI_Buffer_attach #endif - -#if OMPI_PROFILING_DEFINES -#include "ompi/mpi/c/profile/defines.h" +#define MPI_Buffer_attach PMPI_Buffer_attach #endif static const char FUNC_NAME[] = "MPI_Buffer_attach"; -int MPI_Buffer_attach(void *buffer, int size) +int MPI_Buffer_attach(void *buffer, int size) { int ret = OMPI_SUCCESS; @@ -47,10 +48,8 @@ int MPI_Buffer_attach(void *buffer, int size) } } - OPAL_CR_ENTER_LIBRARY(); ret = mca_pml_base_bsend_attach(buffer, size); - OPAL_CR_EXIT_LIBRARY(); return ret; } diff --git a/ompi/mpi/c/buffer_detach.c b/ompi/mpi/c/buffer_detach.c index 68f171f3b4e..62b6d1e0a66 100644 --- a/ompi/mpi/c/buffer_detach.c +++ b/ompi/mpi/c/buffer_detach.c @@ -5,14 +5,16 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ #include "ompi_config.h" @@ -25,18 +27,17 @@ #include "ompi/mca/pml/pml.h" #include "ompi/mca/pml/base/pml_base_bsend.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_Buffer_detach = PMPI_Buffer_detach #endif - -#if OMPI_PROFILING_DEFINES -#include "ompi/mpi/c/profile/defines.h" +#define MPI_Buffer_detach PMPI_Buffer_detach #endif static const char FUNC_NAME[] = "MPI_Buffer_detach"; -int MPI_Buffer_detach(void *buffer, int *size) +int MPI_Buffer_detach(void *buffer, int *size) { int ret = OMPI_SUCCESS; @@ -47,9 +48,7 @@ int MPI_Buffer_detach(void *buffer, int *size) } } - OPAL_CR_ENTER_LIBRARY(); ret = mca_pml_base_bsend_detach(buffer, size); - OPAL_CR_EXIT_LIBRARY(); return ret; } diff --git a/ompi/mpi/c/cancel.c b/ompi/mpi/c/cancel.c index b2e8c6cd6b9..f71a4ca32a1 100644 --- a/ompi/mpi/c/cancel.c +++ b/ompi/mpi/c/cancel.c @@ -5,15 +5,17 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2009 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -27,18 +29,17 @@ #include "ompi/request/request.h" #include "ompi/memchecker.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_Cancel = PMPI_Cancel #endif - -#if OMPI_PROFILING_DEFINES -#include "ompi/mpi/c/profile/defines.h" +#define MPI_Cancel PMPI_Cancel #endif static const char FUNC_NAME[] = "MPI_Cancel"; -int MPI_Cancel(MPI_Request *request) +int MPI_Cancel(MPI_Request *request) { int rc; @@ -49,9 +50,9 @@ int MPI_Cancel(MPI_Request *request) if ( MPI_PARAM_CHECK ) { rc = MPI_SUCCESS; OMPI_ERR_INIT_FINALIZE(FUNC_NAME); - if (NULL == request || NULL == *request || + if (NULL == request || NULL == *request || MPI_REQUEST_NULL == *request) { - OMPI_ERRHANDLER_RETURN(MPI_ERR_REQUEST, MPI_COMM_WORLD, + OMPI_ERRHANDLER_RETURN(MPI_ERR_REQUEST, MPI_COMM_WORLD, MPI_ERR_REQUEST, FUNC_NAME); } } @@ -60,7 +61,6 @@ int MPI_Cancel(MPI_Request *request) return MPI_SUCCESS; } - OPAL_CR_ENTER_LIBRARY(); rc = ompi_request_cancel(*request); OMPI_ERRHANDLER_RETURN(rc, MPI_COMM_WORLD, rc, FUNC_NAME); } diff --git a/ompi/mpi/c/cart_coords.c b/ompi/mpi/c/cart_coords.c index 8fb587125ae..fcc6ca105fb 100644 --- a/ompi/mpi/c/cart_coords.c +++ b/ompi/mpi/c/cart_coords.c @@ -5,17 +5,19 @@ * Copyright (c) 2004-2013 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2007 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2012 Los Alamos Nat Security, LLC. All rights reserved. * Copyright (c) 2012-2013 Inria. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ #include "ompi_config.h" @@ -29,17 +31,16 @@ #include "ompi/group/group.h" #include "ompi/memchecker.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_Cart_coords = PMPI_Cart_coords #endif - -#if OMPI_PROFILING_DEFINES -#include "ompi/mpi/c/profile/defines.h" +#define MPI_Cart_coords PMPI_Cart_coords #endif static const char FUNC_NAME[] = "MPI_Cart_coords"; -int MPI_Cart_coords(MPI_Comm comm, int rank, int maxdims, int coords[]) +int MPI_Cart_coords(MPI_Comm comm, int rank, int maxdims, int coords[]) { int err; @@ -54,7 +55,7 @@ int MPI_Cart_coords(MPI_Comm comm, int rank, int maxdims, int coords[]) return OMPI_ERRHANDLER_INVOKE (MPI_COMM_WORLD, MPI_ERR_COMM, FUNC_NAME); } - if (OMPI_COMM_IS_INTER(comm)) { + if (OMPI_COMM_IS_INTER(comm)) { return OMPI_ERRHANDLER_INVOKE (comm, MPI_ERR_COMM, FUNC_NAME); } @@ -72,10 +73,8 @@ int MPI_Cart_coords(MPI_Comm comm, int rank, int maxdims, int coords[]) return OMPI_ERRHANDLER_INVOKE (comm, MPI_ERR_TOPOLOGY, FUNC_NAME); } - OPAL_CR_ENTER_LIBRARY(); err = comm->c_topo->topo.cart.cart_coords(comm, rank, maxdims, coords); - OPAL_CR_EXIT_LIBRARY(); OMPI_ERRHANDLER_RETURN(err, comm, err, FUNC_NAME); } diff --git a/ompi/mpi/c/cart_create.c b/ompi/mpi/c/cart_create.c index da4ea608c7d..86c419679a2 100644 --- a/ompi/mpi/c/cart_create.c +++ b/ompi/mpi/c/cart_create.c @@ -6,18 +6,20 @@ * Copyright (c) 2004-2013 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2007-2008 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2012-2013 Los Alamos National Security, LLC. All rights - * reserved. + * reserved. * Copyright (c) 2012-2013 Inria. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ #include "ompi_config.h" @@ -30,12 +32,11 @@ #include "ompi/mca/topo/base/base.h" #include "ompi/memchecker.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_Cart_create = PMPI_Cart_create #endif - -#if OMPI_PROFILING_DEFINES -#include "ompi/mpi/c/profile/defines.h" +#define MPI_Cart_create PMPI_Cart_create #endif static const char FUNC_NAME[] = "MPI_Cart_create"; @@ -58,7 +59,7 @@ int MPI_Cart_create(MPI_Comm old_comm, int ndims, const int dims[], return OMPI_ERRHANDLER_INVOKE (MPI_COMM_WORLD, MPI_ERR_COMM, FUNC_NAME); } else if (OMPI_COMM_IS_INTER(old_comm)) { - return OMPI_ERRHANDLER_INVOKE(MPI_COMM_WORLD, MPI_ERR_COMM, + return OMPI_ERRHANDLER_INVOKE(MPI_COMM_WORLD, MPI_ERR_COMM, FUNC_NAME); } if (ndims < 0) { @@ -87,9 +88,9 @@ int MPI_Cart_create(MPI_Comm old_comm, int ndims, const int dims[], } } - /* - * everything seems to be alright with the communicator, we can go - * ahead and select a topology module for this purpose and create + /* + * everything seems to be alright with the communicator, we can go + * ahead and select a topology module for this purpose and create * the new graph communicator */ if (OMPI_SUCCESS != (err = mca_topo_base_comm_select(old_comm, @@ -99,13 +100,10 @@ int MPI_Cart_create(MPI_Comm old_comm, int ndims, const int dims[], return err; } - /* Now let that topology module rearrange procs/ranks if it wants to */ - /* XXX -- CONST -- do not cast away const -- update mca/topo */ + /* Now let that topology module rearrange procs/ranks if it wants to */ err = topo->topo.cart.cart_create(topo, old_comm, - ndims, (int *) dims, (int *) periods, + ndims, dims, periods, (0 == reorder) ? false : true, comm_cart); - OPAL_CR_EXIT_LIBRARY(); - if (MPI_SUCCESS != err) { OBJ_RELEASE(topo); return OMPI_ERRHANDLER_INVOKE(old_comm, err, FUNC_NAME); diff --git a/ompi/mpi/c/cart_get.c b/ompi/mpi/c/cart_get.c index e3dd1ebfd50..93242234d9a 100644 --- a/ompi/mpi/c/cart_get.c +++ b/ompi/mpi/c/cart_get.c @@ -5,17 +5,19 @@ * Copyright (c) 2004-2013 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2007 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2012 Los Alamos Nat Security, LLC. All rights reserved. * Copyright (c) 2012-2013 Inria. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ #include "ompi_config.h" @@ -28,18 +30,17 @@ #include "ompi/mca/topo/topo.h" #include "ompi/memchecker.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_Cart_get = PMPI_Cart_get #endif - -#if OMPI_PROFILING_DEFINES -#include "ompi/mpi/c/profile/defines.h" +#define MPI_Cart_get PMPI_Cart_get #endif static const char FUNC_NAME[] = "MPI_Cart_get"; int MPI_Cart_get(MPI_Comm comm, int maxdims, int dims[], - int periods[], int coords[]) + int periods[], int coords[]) { int err; @@ -54,7 +55,7 @@ int MPI_Cart_get(MPI_Comm comm, int maxdims, int dims[], return OMPI_ERRHANDLER_INVOKE (MPI_COMM_WORLD, MPI_ERR_COMM, FUNC_NAME); } - if ((0 > maxdims) || (0 < maxdims && + if ((0 > maxdims) || (0 < maxdims && ((NULL == dims) || (NULL == periods) || (NULL == coords)))) { return OMPI_ERRHANDLER_INVOKE (comm, MPI_ERR_ARG, @@ -66,10 +67,8 @@ int MPI_Cart_get(MPI_Comm comm, int maxdims, int dims[], return OMPI_ERRHANDLER_INVOKE (comm, MPI_ERR_TOPOLOGY, FUNC_NAME); } - OPAL_CR_ENTER_LIBRARY(); err = comm->c_topo->topo.cart.cart_get(comm, maxdims, dims, periods, coords); - OPAL_CR_EXIT_LIBRARY(); OMPI_ERRHANDLER_RETURN(err, comm, err, FUNC_NAME); } diff --git a/ompi/mpi/c/cart_map.c b/ompi/mpi/c/cart_map.c index dbb4b7708c3..62bb04909d2 100644 --- a/ompi/mpi/c/cart_map.c +++ b/ompi/mpi/c/cart_map.c @@ -6,18 +6,20 @@ * Copyright (c) 2004-2013 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2007 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2012-2013 Los Alamos National Security, LLC. All rights - * reserved. + * reserved. * Copyright (c) 2012-2013 Inria. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ #include "ompi_config.h" @@ -30,12 +32,11 @@ #include "ompi/mca/topo/topo.h" #include "ompi/memchecker.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_Cart_map = PMPI_Cart_map #endif - -#if OMPI_PROFILING_DEFINES -#include "ompi/mpi/c/profile/defines.h" +#define MPI_Cart_map PMPI_Cart_map #endif static const char FUNC_NAME[] = "MPI_Cart_map"; @@ -57,7 +58,7 @@ int MPI_Cart_map(MPI_Comm comm, int ndims, const int dims[], return OMPI_ERRHANDLER_INVOKE (MPI_COMM_WORLD, MPI_ERR_COMM, FUNC_NAME); } - if (OMPI_COMM_IS_INTER(comm)) { + if (OMPI_COMM_IS_INTER(comm)) { return OMPI_ERRHANDLER_INVOKE (comm, MPI_ERR_COMM, FUNC_NAME); } @@ -67,19 +68,15 @@ int MPI_Cart_map(MPI_Comm comm, int ndims, const int dims[], } } - OPAL_CR_ENTER_LIBRARY(); - if(!OMPI_COMM_IS_CART(comm)) { - /* In case the communicator has no topo-module attached to + /* In case the communicator has no topo-module attached to it, we just return the "default" value suggested by MPI: newrank = rank */ *newrank = ompi_comm_rank(comm); } else { - /* XXX -- CONST -- do not cast away const -- update mca/topo */ - err = comm->c_topo->topo.cart.cart_map(comm, ndims, (int *) dims, - (int *) periods, newrank); + err = comm->c_topo->topo.cart.cart_map(comm, ndims, dims, + periods, newrank); } - OPAL_CR_EXIT_LIBRARY(); OMPI_ERRHANDLER_RETURN(err, comm, err, FUNC_NAME); } diff --git a/ompi/mpi/c/cart_rank.c b/ompi/mpi/c/cart_rank.c index 51bfb49c5f1..979dbd22434 100644 --- a/ompi/mpi/c/cart_rank.c +++ b/ompi/mpi/c/cart_rank.c @@ -6,20 +6,20 @@ * Copyright (c) 2004-2013 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2007-2015 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2012-2013 Los Alamos National Security, LLC. All rights - * reserved. + * reserved. * Copyright (c) 2012-2013 Inria. All rights reserved. - * Copyright (c) 2014 Research Organization for Information Science + * Copyright (c) 2014-2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ #include "ompi_config.h" @@ -32,12 +32,11 @@ #include "ompi/mca/topo/topo.h" #include "ompi/memchecker.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_Cart_rank = PMPI_Cart_rank #endif - -#if OMPI_PROFILING_DEFINES -#include "ompi/mpi/c/profile/defines.h" +#define MPI_Cart_rank PMPI_Cart_rank #endif static const char FUNC_NAME[] = "MPI_Cart_rank"; @@ -58,7 +57,7 @@ int MPI_Cart_rank(MPI_Comm comm, const int coords[], int *rank) return OMPI_ERRHANDLER_INVOKE (MPI_COMM_WORLD, MPI_ERR_COMM, FUNC_NAME); } - if (OMPI_COMM_IS_INTER(comm)) { + if (OMPI_COMM_IS_INTER(comm)) { return OMPI_ERRHANDLER_INVOKE (comm, MPI_ERR_COMM, FUNC_NAME); } @@ -85,7 +84,7 @@ int MPI_Cart_rank(MPI_Comm comm, const int coords[], int *rank) dimension i is not periodic */ for (i = 0; i < cart->ndims; ++i) { if (!cart->periods[i] && - (coords[i] < 0 || + (coords[i] < 0 || coords[i] >= cart->dims[i])) { return OMPI_ERRHANDLER_INVOKE(comm, MPI_ERR_ARG, FUNC_NAME); } @@ -98,11 +97,8 @@ int MPI_Cart_rank(MPI_Comm comm, const int coords[], int *rank) FUNC_NAME); } } - OPAL_CR_ENTER_LIBRARY(); - /* XXX -- CONST -- do not cast away const -- update mca/topo */ - err = comm->c_topo->topo.cart.cart_rank(comm, (int *) coords, rank); - OPAL_CR_EXIT_LIBRARY(); + err = comm->c_topo->topo.cart.cart_rank(comm, coords, rank); OMPI_ERRHANDLER_RETURN(err, comm, err, FUNC_NAME); } diff --git a/ompi/mpi/c/cart_shift.c b/ompi/mpi/c/cart_shift.c index c568de15aea..99d07693114 100644 --- a/ompi/mpi/c/cart_shift.c +++ b/ompi/mpi/c/cart_shift.c @@ -5,16 +5,18 @@ * Copyright (c) 2004-2013 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2007 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2012-2013 Inria. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ #include "ompi_config.h" @@ -27,19 +29,18 @@ #include "ompi/mca/topo/topo.h" #include "ompi/memchecker.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_Cart_shift = PMPI_Cart_shift #endif - -#if OMPI_PROFILING_DEFINES -#include "ompi/mpi/c/profile/defines.h" +#define MPI_Cart_shift PMPI_Cart_shift #endif static const char FUNC_NAME[] = "MPI_Cart_shift"; int MPI_Cart_shift(MPI_Comm comm, int direction, int disp, - int *rank_source, int *rank_dest) + int *rank_source, int *rank_dest) { int err; @@ -54,7 +55,7 @@ int MPI_Cart_shift(MPI_Comm comm, int direction, int disp, return OMPI_ERRHANDLER_INVOKE (MPI_COMM_WORLD, MPI_ERR_COMM, FUNC_NAME); } - if (OMPI_COMM_IS_INTER(comm)) { + if (OMPI_COMM_IS_INTER(comm)) { return OMPI_ERRHANDLER_INVOKE (comm, MPI_ERR_COMM, FUNC_NAME); } @@ -72,11 +73,9 @@ int MPI_Cart_shift(MPI_Comm comm, int direction, int disp, return OMPI_ERRHANDLER_INVOKE (comm, MPI_ERR_TOPOLOGY, FUNC_NAME); } - OPAL_CR_ENTER_LIBRARY(); /* call the function */ err = comm->c_topo->topo.cart.cart_shift(comm, direction, disp, rank_source, rank_dest); - OPAL_CR_EXIT_LIBRARY(); OMPI_ERRHANDLER_RETURN(err, comm, err, FUNC_NAME); } diff --git a/ompi/mpi/c/cart_sub.c b/ompi/mpi/c/cart_sub.c index 8c24ece100e..d05f4d6f80e 100644 --- a/ompi/mpi/c/cart_sub.c +++ b/ompi/mpi/c/cart_sub.c @@ -6,20 +6,20 @@ * Copyright (c) 2004-2013 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2007-2009 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2012-2013 Los Alamos National Security, LLC. All rights - * reserved. + * reserved. * Copyright (c) 2012-2013 Inria. All rights reserved. - * Copyright (c) 2014 Research Organization for Information Science + * Copyright (c) 2014-2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ #include "ompi_config.h" @@ -32,12 +32,11 @@ #include "ompi/mca/topo/topo.h" #include "ompi/memchecker.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_Cart_sub = PMPI_Cart_sub #endif - -#if OMPI_PROFILING_DEFINES -#include "ompi/mpi/c/profile/defines.h" +#define MPI_Cart_sub PMPI_Cart_sub #endif static const char FUNC_NAME[] = "MPI_Cart_sub"; @@ -58,7 +57,7 @@ int MPI_Cart_sub(MPI_Comm comm, const int remain_dims[], MPI_Comm *new_comm) return OMPI_ERRHANDLER_INVOKE (MPI_COMM_WORLD, MPI_ERR_COMM, FUNC_NAME); } - if (OMPI_COMM_IS_INTER(comm)) { + if (OMPI_COMM_IS_INTER(comm)) { return OMPI_ERRHANDLER_INVOKE (comm, MPI_ERR_COMM, FUNC_NAME); } @@ -73,11 +72,8 @@ int MPI_Cart_sub(MPI_Comm comm, const int remain_dims[], MPI_Comm *new_comm) return OMPI_ERRHANDLER_INVOKE (comm, MPI_ERR_TOPOLOGY, FUNC_NAME); } - OPAL_CR_ENTER_LIBRARY(); - /* XXX -- CONST -- do not cast away const -- update mca/topo */ - err = comm->c_topo->topo.cart.cart_sub(comm, (int *) remain_dims, new_comm); - OPAL_CR_EXIT_LIBRARY(); + err = comm->c_topo->topo.cart.cart_sub(comm, remain_dims, new_comm); OMPI_ERRHANDLER_RETURN(err, comm, err, FUNC_NAME); } diff --git a/ompi/mpi/c/cartdim_get.c b/ompi/mpi/c/cartdim_get.c index b99ed0257a1..17e1fbaead4 100644 --- a/ompi/mpi/c/cartdim_get.c +++ b/ompi/mpi/c/cartdim_get.c @@ -5,16 +5,18 @@ * Copyright (c) 2004-2013 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2009 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2012-2013 Inria. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ #include "ompi_config.h" @@ -27,19 +29,18 @@ #include "ompi/mca/topo/topo.h" #include "ompi/memchecker.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_Cartdim_get = PMPI_Cartdim_get #endif - -#if OMPI_PROFILING_DEFINES -#include "ompi/mpi/c/profile/defines.h" +#define MPI_Cartdim_get PMPI_Cartdim_get #endif static const char FUNC_NAME[] = "MPI_Cartdim_get"; -int MPI_Cartdim_get(MPI_Comm comm, int *ndims) +int MPI_Cartdim_get(MPI_Comm comm, int *ndims) { int err; @@ -53,7 +54,7 @@ int MPI_Cartdim_get(MPI_Comm comm, int *ndims) return OMPI_ERRHANDLER_INVOKE (MPI_COMM_WORLD, MPI_ERR_COMM, FUNC_NAME); } - if (OMPI_COMM_IS_INTER(comm)) { + if (OMPI_COMM_IS_INTER(comm)) { return OMPI_ERRHANDLER_INVOKE (comm, MPI_ERR_COMM, FUNC_NAME); } @@ -67,10 +68,8 @@ int MPI_Cartdim_get(MPI_Comm comm, int *ndims) return OMPI_ERRHANDLER_INVOKE (comm, MPI_ERR_TOPOLOGY, FUNC_NAME); } - OPAL_CR_ENTER_LIBRARY(); err = comm->c_topo->topo.cart.cartdim_get(comm, ndims); - OPAL_CR_EXIT_LIBRARY(); OMPI_ERRHANDLER_RETURN(err, comm, err, FUNC_NAME); } diff --git a/ompi/mpi/c/close_port.c b/ompi/mpi/c/close_port.c index 873ce157ae6..1bb1d633123 100644 --- a/ompi/mpi/c/close_port.c +++ b/ompi/mpi/c/close_port.c @@ -6,16 +6,19 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2013 Los Alamos National Security, LLC. All rights * reserved. + * Copyright (c) 2015 Intel, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ #include "ompi_config.h" @@ -25,15 +28,14 @@ #include "ompi/runtime/params.h" #include "ompi/communicator/communicator.h" #include "ompi/errhandler/errhandler.h" -#include "ompi/mca/dpm/dpm.h" +#include "ompi/dpm/dpm.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_Close_port = PMPI_Close_port #endif - -#if OMPI_PROFILING_DEFINES -#include "ompi/mpi/c/profile/defines.h" +#define MPI_Close_port PMPI_Close_port #endif static const char FUNC_NAME[] = "MPI_Close_port"; @@ -43,17 +45,15 @@ int MPI_Close_port(const char *port_name) { int ret; - OPAL_CR_NOOP_PROGRESS(); - if ( MPI_PARAM_CHECK ) { OMPI_ERR_INIT_FINALIZE(FUNC_NAME); if ( NULL == port_name ) - return OMPI_ERRHANDLER_INVOKE(MPI_COMM_WORLD, MPI_ERR_ARG, + return OMPI_ERRHANDLER_INVOKE(MPI_COMM_WORLD, MPI_ERR_ARG, FUNC_NAME); } - ret = ompi_dpm.close_port(port_name); - + ret = ompi_dpm_close_port(port_name); + OMPI_ERRHANDLER_RETURN(ret, MPI_COMM_WORLD, ret, FUNC_NAME); } diff --git a/ompi/mpi/c/comm_accept.c b/ompi/mpi/c/comm_accept.c index 36686369500..10eab8d381e 100644 --- a/ompi/mpi/c/comm_accept.c +++ b/ompi/mpi/c/comm_accept.c @@ -6,7 +6,7 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. @@ -14,10 +14,13 @@ * Copyright (c) 2008 University of Houston, Inc. All rights reserved. * Copyright (c) 2013 Los Alamos National Security, LLC. All rights * reserved. + * Copyright (c) 2015 Intel, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ #include "ompi_config.h" @@ -28,22 +31,21 @@ #include "ompi/communicator/communicator.h" #include "ompi/errhandler/errhandler.h" #include "ompi/info/info.h" -#include "ompi/mca/dpm/dpm.h" +#include "ompi/dpm/dpm.h" #include "ompi/memchecker.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_Comm_accept = PMPI_Comm_accept #endif - -#if OMPI_PROFILING_DEFINES -#include "ompi/mpi/c/profile/defines.h" +#define MPI_Comm_accept PMPI_Comm_accept #endif static const char FUNC_NAME[] = "MPI_Comm_accept"; int MPI_Comm_accept(const char *port_name, MPI_Info info, int root, - MPI_Comm comm, MPI_Comm *newcomm) + MPI_Comm comm, MPI_Comm *newcomm) { int rank, rc; bool send_first=false; /* we receive first */ @@ -57,7 +59,7 @@ int MPI_Comm_accept(const char *port_name, MPI_Info info, int root, OMPI_ERR_INIT_FINALIZE(FUNC_NAME); if (ompi_comm_invalid (comm)) { - return OMPI_ERRHANDLER_INVOKE(MPI_COMM_WORLD, MPI_ERR_COMM, + return OMPI_ERRHANDLER_INVOKE(MPI_COMM_WORLD, MPI_ERR_COMM, FUNC_NAME); } if ( OMPI_COMM_IS_INTER(comm)) { @@ -65,11 +67,11 @@ int MPI_Comm_accept(const char *port_name, MPI_Info info, int root, FUNC_NAME); } if ( (0 > root) || (ompi_comm_size(comm) <= root) ) { - return OMPI_ERRHANDLER_INVOKE(comm, MPI_ERR_ARG, + return OMPI_ERRHANDLER_INVOKE(comm, MPI_ERR_ARG, FUNC_NAME); } if ( NULL == newcomm ) { - return OMPI_ERRHANDLER_INVOKE(comm, MPI_ERR_ARG, + return OMPI_ERRHANDLER_INVOKE(comm, MPI_ERR_ARG, FUNC_NAME); } if (NULL == info || ompi_info_is_freed(info)) { @@ -77,29 +79,28 @@ int MPI_Comm_accept(const char *port_name, MPI_Info info, int root, FUNC_NAME); } } - + rank = ompi_comm_rank ( comm ); if ( MPI_PARAM_CHECK ) { if ( rank == root ) { - if ( NULL == port_name ) - return OMPI_ERRHANDLER_INVOKE(comm, MPI_ERR_ARG, + if ( NULL == port_name ) + return OMPI_ERRHANDLER_INVOKE(comm, MPI_ERR_ARG, FUNC_NAME); } } - /* parse info object. no prefedined values for this function in MPI-2 + /* parse info object. no prefedined values for this function in MPI-2 * so lets ignore it for the moment. * if ( rank == root && MPI_INFO_NULL != info ) { * } */ - OPAL_CR_ENTER_LIBRARY(); if ( rank == root ) { - rc = ompi_dpm.connect_accept (comm, root, port_name, send_first, + rc = ompi_dpm_connect_accept (comm, root, port_name, send_first, &newcomp); } else { - rc = ompi_dpm.connect_accept (comm, root, NULL, send_first, + rc = ompi_dpm_connect_accept (comm, root, NULL, send_first, &newcomp); } diff --git a/ompi/mpi/c/comm_c2f.c b/ompi/mpi/c/comm_c2f.c index d8f19cdb5a7..794cc3c90f1 100644 --- a/ompi/mpi/c/comm_c2f.c +++ b/ompi/mpi/c/comm_c2f.c @@ -5,15 +5,17 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2006-2012 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ #include "ompi_config.h" @@ -26,25 +28,22 @@ #include "ompi/mpi/fortran/base/fint_2_int.h" #include "ompi/memchecker.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_Comm_c2f = PMPI_Comm_c2f #endif - -#if OMPI_PROFILING_DEFINES -#include "ompi/mpi/c/profile/defines.h" +#define MPI_Comm_c2f PMPI_Comm_c2f #endif static const char FUNC_NAME[] = "MPI_Comm_c2f"; -MPI_Fint MPI_Comm_c2f(MPI_Comm comm) +MPI_Fint MPI_Comm_c2f(MPI_Comm comm) { MEMCHECKER( memchecker_comm(comm); ); - OPAL_CR_NOOP_PROGRESS(); - if ( MPI_PARAM_CHECK) { OMPI_ERR_INIT_FINALIZE(FUNC_NAME); diff --git a/ompi/mpi/c/comm_call_errhandler.c b/ompi/mpi/c/comm_call_errhandler.c index dfc7bdbfb64..dd8bae5c6d9 100644 --- a/ompi/mpi/c/comm_call_errhandler.c +++ b/ompi/mpi/c/comm_call_errhandler.c @@ -5,14 +5,16 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ #include "ompi_config.h" @@ -24,14 +26,13 @@ #include "ompi/errhandler/errhandler.h" #include "ompi/memchecker.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_Comm_call_errhandler = PMPI_Comm_call_errhandler #endif - -#if OMPI_PROFILING_DEFINES -#include "ompi/mpi/c/profile/defines.h" +#define MPI_Comm_call_errhandler PMPI_Comm_call_errhandler #endif - + static const char FUNC_NAME[] = "MPI_Comm_call_errhandler"; @@ -42,8 +43,6 @@ int MPI_Comm_call_errhandler(MPI_Comm comm, int errorcode) memchecker_comm(comm); ); - OPAL_CR_NOOP_PROGRESS(); - /* Error checking */ if (MPI_PARAM_CHECK) { diff --git a/ompi/mpi/c/comm_compare.c b/ompi/mpi/c/comm_compare.c index 20f9961460a..757344a9e98 100644 --- a/ompi/mpi/c/comm_compare.c +++ b/ompi/mpi/c/comm_compare.c @@ -5,15 +5,17 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2007 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ #include "ompi_config.h" @@ -25,12 +27,11 @@ #include "ompi/errhandler/errhandler.h" #include "ompi/memchecker.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_Comm_compare = PMPI_Comm_compare #endif - -#if OMPI_PROFILING_DEFINES -#include "ompi/mpi/c/profile/defines.h" +#define MPI_Comm_compare PMPI_Comm_compare #endif static const char FUNC_NAME[] = "MPI_Comm_compare"; @@ -49,19 +50,18 @@ int MPI_Comm_compare(MPI_Comm comm1, MPI_Comm comm2, int *result) { OMPI_ERR_INIT_FINALIZE(FUNC_NAME); if (ompi_comm_invalid(comm1) || ompi_comm_invalid(comm2)) { - return OMPI_ERRHANDLER_INVOKE(MPI_COMM_WORLD, MPI_ERR_COMM, + return OMPI_ERRHANDLER_INVOKE(MPI_COMM_WORLD, MPI_ERR_COMM, FUNC_NAME); } if ( NULL == result ) { - return OMPI_ERRHANDLER_INVOKE(comm1, MPI_ERR_ARG, + return OMPI_ERRHANDLER_INVOKE(comm1, MPI_ERR_ARG, FUNC_NAME); } } - OPAL_CR_ENTER_LIBRARY(); - rc = ompi_comm_compare ( (ompi_communicator_t*)comm1, + rc = ompi_comm_compare ( (ompi_communicator_t*)comm1, (ompi_communicator_t*)comm2, result); OMPI_ERRHANDLER_RETURN ( rc, comm1, rc, FUNC_NAME); diff --git a/ompi/mpi/c/comm_connect.c b/ompi/mpi/c/comm_connect.c index 1dcce1d7a08..d331d928c24 100644 --- a/ompi/mpi/c/comm_connect.c +++ b/ompi/mpi/c/comm_connect.c @@ -6,7 +6,7 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. @@ -14,10 +14,13 @@ * Copyright (c) 2008 University of Houston. All rights reserved. * Copyright (c) 2013 Los Alamos National Security, LLC. All rights * reserved. + * Copyright (c) 2015 Intel, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ #include "ompi_config.h" @@ -28,22 +31,21 @@ #include "ompi/communicator/communicator.h" #include "ompi/errhandler/errhandler.h" #include "ompi/info/info.h" -#include "ompi/mca/dpm/dpm.h" +#include "ompi/dpm/dpm.h" #include "ompi/memchecker.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_Comm_connect = PMPI_Comm_connect #endif - -#if OMPI_PROFILING_DEFINES -#include "ompi/mpi/c/profile/defines.h" +#define MPI_Comm_connect PMPI_Comm_connect #endif static const char FUNC_NAME[] = "MPI_Comm_connect"; int MPI_Comm_connect(const char *port_name, MPI_Info info, int root, - MPI_Comm comm, MPI_Comm *newcomm) + MPI_Comm comm, MPI_Comm *newcomm) { int rank, rc; bool send_first=true; /* yes, we are the active part in this game */ @@ -57,7 +59,7 @@ int MPI_Comm_connect(const char *port_name, MPI_Info info, int root, OMPI_ERR_INIT_FINALIZE(FUNC_NAME); if (ompi_comm_invalid (comm)) { - return OMPI_ERRHANDLER_INVOKE(MPI_COMM_WORLD, MPI_ERR_COMM, + return OMPI_ERRHANDLER_INVOKE(MPI_COMM_WORLD, MPI_ERR_COMM, FUNC_NAME); } if ( OMPI_COMM_IS_INTER(comm)) { @@ -65,11 +67,11 @@ int MPI_Comm_connect(const char *port_name, MPI_Info info, int root, FUNC_NAME); } if ( (0 > root) || (ompi_comm_size(comm) <= root) ) { - return OMPI_ERRHANDLER_INVOKE(comm, MPI_ERR_ARG, + return OMPI_ERRHANDLER_INVOKE(comm, MPI_ERR_ARG, FUNC_NAME); } if ( NULL == newcomm ) { - return OMPI_ERRHANDLER_INVOKE(comm, MPI_ERR_ARG, + return OMPI_ERRHANDLER_INVOKE(comm, MPI_ERR_ARG, FUNC_NAME); } if (NULL == info || ompi_info_is_freed(info)) { @@ -77,12 +79,12 @@ int MPI_Comm_connect(const char *port_name, MPI_Info info, int root, FUNC_NAME); } } - + rank = ompi_comm_rank ( comm ); if ( MPI_PARAM_CHECK ) { if ( rank == root ) { - if ( NULL == port_name ) - return OMPI_ERRHANDLER_INVOKE(comm, MPI_ERR_ARG, + if ( NULL == port_name ) + return OMPI_ERRHANDLER_INVOKE(comm, MPI_ERR_ARG, FUNC_NAME); } } @@ -94,17 +96,16 @@ int MPI_Comm_connect(const char *port_name, MPI_Info info, int root, * } */ - OPAL_CR_ENTER_LIBRARY(); if ( rank == root ) { - rc = ompi_dpm.connect_accept (comm, root, port_name, send_first, + rc = ompi_dpm_connect_accept (comm, root, port_name, send_first, &newcomp); } else { - rc = ompi_dpm.connect_accept (comm, root, NULL, send_first, + rc = ompi_dpm_connect_accept (comm, root, NULL, send_first, &newcomp); - } - + } + *newcomm = newcomp; OMPI_ERRHANDLER_RETURN(rc, comm, rc, FUNC_NAME); } diff --git a/ompi/mpi/c/comm_create.c b/ompi/mpi/c/comm_create.c index f18a7384e8a..89b0b0d97f8 100644 --- a/ompi/mpi/c/comm_create.c +++ b/ompi/mpi/c/comm_create.c @@ -5,15 +5,17 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2008 The Regents of the University of California. * All rights reserved. * Copyright (c) 2006-2007 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ #include "ompi_config.h" @@ -25,19 +27,18 @@ #include "ompi/errhandler/errhandler.h" #include "ompi/memchecker.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_Comm_create = PMPI_Comm_create #endif - -#if OMPI_PROFILING_DEFINES -#include "ompi/mpi/c/profile/defines.h" +#define MPI_Comm_create PMPI_Comm_create #endif static const char FUNC_NAME[] = "MPI_Comm_create"; int MPI_Comm_create(MPI_Comm comm, MPI_Group group, MPI_Comm *newcomm) { - + int rc; MEMCHECKER( @@ -46,23 +47,22 @@ int MPI_Comm_create(MPI_Comm comm, MPI_Group group, MPI_Comm *newcomm) { if ( MPI_PARAM_CHECK ) { OMPI_ERR_INIT_FINALIZE(FUNC_NAME); - + if (ompi_comm_invalid (comm)) - return OMPI_ERRHANDLER_INVOKE(MPI_COMM_WORLD, MPI_ERR_COMM, + return OMPI_ERRHANDLER_INVOKE(MPI_COMM_WORLD, MPI_ERR_COMM, FUNC_NAME); - + if ( MPI_GROUP_NULL == group || NULL == group ) - return OMPI_ERRHANDLER_INVOKE(comm, MPI_ERR_GROUP, + return OMPI_ERRHANDLER_INVOKE(comm, MPI_ERR_GROUP, FUNC_NAME); - + if ( NULL == newcomm ) - return OMPI_ERRHANDLER_INVOKE(comm, MPI_ERR_ARG, + return OMPI_ERRHANDLER_INVOKE(comm, MPI_ERR_ARG, FUNC_NAME); } - OPAL_CR_ENTER_LIBRARY(); - rc = ompi_comm_create ( (ompi_communicator_t*)comm, (ompi_group_t*)group, + rc = ompi_comm_create ( (ompi_communicator_t*)comm, (ompi_group_t*)group, (ompi_communicator_t**)newcomm ); OMPI_ERRHANDLER_RETURN ( rc, comm, rc, FUNC_NAME); } diff --git a/ompi/mpi/c/comm_create_errhandler.c b/ompi/mpi/c/comm_create_errhandler.c index 448ea6e03eb..dba9aa1c1f6 100644 --- a/ompi/mpi/c/comm_create_errhandler.c +++ b/ompi/mpi/c/comm_create_errhandler.c @@ -5,15 +5,17 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2008-2009 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ #include "ompi_config.h" @@ -24,12 +26,11 @@ #include "ompi/communicator/communicator.h" #include "ompi/errhandler/errhandler.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_Comm_create_errhandler = PMPI_Comm_create_errhandler #endif - -#if OMPI_PROFILING_DEFINES -#include "ompi/mpi/c/profile/defines.h" +#define MPI_Comm_create_errhandler PMPI_Comm_create_errhandler #endif @@ -37,7 +38,7 @@ static const char FUNC_NAME[] = "MPI_Comm_create_errhandler"; int MPI_Comm_create_errhandler(MPI_Comm_errhandler_function *function, - MPI_Errhandler *errhandler) + MPI_Errhandler *errhandler) { int err = MPI_SUCCESS; @@ -46,18 +47,17 @@ int MPI_Comm_create_errhandler(MPI_Comm_errhandler_function *function, if (MPI_PARAM_CHECK) { OMPI_ERR_INIT_FINALIZE(FUNC_NAME); - if (NULL == function || + if (NULL == function || NULL == errhandler) { return OMPI_ERRHANDLER_INVOKE(MPI_COMM_WORLD, MPI_ERR_ARG, FUNC_NAME); } } - OPAL_CR_ENTER_LIBRARY(); /* Create and cache the errhandler. Sets a refcount of 1. */ - *errhandler = + *errhandler = ompi_errhandler_create(OMPI_ERRHANDLER_TYPE_COMM, (ompi_errhandler_generic_handler_fn_t*) function, OMPI_ERRHANDLER_LANG_C); diff --git a/ompi/mpi/c/comm_create_group.c b/ompi/mpi/c/comm_create_group.c index a09c22b4a71..e8e9a697ec3 100644 --- a/ompi/mpi/c/comm_create_group.c +++ b/ompi/mpi/c/comm_create_group.c @@ -13,6 +13,8 @@ * Copyright (c) 2006-2007 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2013 Los Alamos National Security, LLC. All rights * reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -29,12 +31,11 @@ #include "ompi/mca/pml/pml.h" #include "ompi/memchecker.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_Comm_create_group = PMPI_Comm_create_group #endif - -#if OMPI_PROFILING_DEFINES -#include "ompi/mpi/c/profile/defines.h" +#define MPI_Comm_create_group PMPI_Comm_create_group #endif static const char FUNC_NAME[] = "MPI_Comm_create_group"; @@ -72,7 +73,6 @@ int MPI_Comm_create_group (MPI_Comm comm, MPI_Group group, int tag, MPI_Comm *ne return MPI_SUCCESS; } - OPAL_CR_ENTER_LIBRARY(); rc = ompi_comm_create_group ((ompi_communicator_t *) comm, (ompi_group_t *) group, tag, (ompi_communicator_t **) newcomm); diff --git a/ompi/mpi/c/comm_create_keyval.c b/ompi/mpi/c/comm_create_keyval.c index 06292d6e915..d8b88bbebd5 100644 --- a/ompi/mpi/c/comm_create_keyval.c +++ b/ompi/mpi/c/comm_create_keyval.c @@ -5,15 +5,17 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2007 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ #include "ompi_config.h" @@ -25,12 +27,11 @@ #include "ompi/errhandler/errhandler.h" #include "ompi/attribute/attribute.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_Comm_create_keyval = PMPI_Comm_create_keyval #endif - -#if OMPI_PROFILING_DEFINES -#include "ompi/mpi/c/profile/defines.h" +#define MPI_Comm_create_keyval PMPI_Comm_create_keyval #endif static const char FUNC_NAME[] = "MPI_Comm_create_keyval"; @@ -48,17 +49,16 @@ int MPI_Comm_create_keyval(MPI_Comm_copy_attr_function *comm_copy_attr_fn, OMPI_ERR_INIT_FINALIZE(FUNC_NAME); if ((NULL == comm_copy_attr_fn) || (NULL == comm_delete_attr_fn) || (NULL == comm_keyval)) { - return OMPI_ERRHANDLER_INVOKE(MPI_COMM_WORLD, MPI_ERR_ARG, + return OMPI_ERRHANDLER_INVOKE(MPI_COMM_WORLD, MPI_ERR_ARG, FUNC_NAME); } } - OPAL_CR_ENTER_LIBRARY(); copy_fn.attr_communicator_copy_fn = (MPI_Comm_internal_copy_attr_function*)comm_copy_attr_fn; del_fn.attr_communicator_delete_fn = comm_delete_attr_fn; - ret = ompi_attr_create_keyval(COMM_ATTR, copy_fn, + ret = ompi_attr_create_keyval(COMM_ATTR, copy_fn, del_fn, comm_keyval, extra_state, 0, NULL); OMPI_ERRHANDLER_RETURN(ret, MPI_COMM_WORLD, MPI_ERR_OTHER, FUNC_NAME); diff --git a/ompi/mpi/c/comm_delete_attr.c b/ompi/mpi/c/comm_delete_attr.c index 6b11f959260..75f740ddd1e 100644 --- a/ompi/mpi/c/comm_delete_attr.c +++ b/ompi/mpi/c/comm_delete_attr.c @@ -5,15 +5,17 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2007 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ #include "ompi_config.h" @@ -26,18 +28,17 @@ #include "ompi/attribute/attribute.h" #include "ompi/memchecker.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_Comm_delete_attr = PMPI_Comm_delete_attr #endif - -#if OMPI_PROFILING_DEFINES -#include "ompi/mpi/c/profile/defines.h" +#define MPI_Comm_delete_attr PMPI_Comm_delete_attr #endif static const char FUNC_NAME[] = "MPI_Comm_delete_attr"; -int MPI_Comm_delete_attr(MPI_Comm comm, int comm_keyval) +int MPI_Comm_delete_attr(MPI_Comm comm, int comm_keyval) { int ret; @@ -48,15 +49,14 @@ int MPI_Comm_delete_attr(MPI_Comm comm, int comm_keyval) if (MPI_PARAM_CHECK) { OMPI_ERR_INIT_FINALIZE(FUNC_NAME); if (ompi_comm_invalid(comm)) { - return OMPI_ERRHANDLER_INVOKE(MPI_COMM_WORLD, MPI_ERR_COMM, + return OMPI_ERRHANDLER_INVOKE(MPI_COMM_WORLD, MPI_ERR_COMM, FUNC_NAME); } } - OPAL_CR_ENTER_LIBRARY(); - ret = ompi_attr_delete(COMM_ATTR, comm, comm->c_keyhash, comm_keyval, + ret = ompi_attr_delete(COMM_ATTR, comm, comm->c_keyhash, comm_keyval, false); - OMPI_ERRHANDLER_RETURN(ret, comm, MPI_ERR_OTHER, FUNC_NAME); + OMPI_ERRHANDLER_RETURN(ret, comm, MPI_ERR_OTHER, FUNC_NAME); } diff --git a/ompi/mpi/c/comm_disconnect.c b/ompi/mpi/c/comm_disconnect.c index a39e9aa6106..ead4917b461 100644 --- a/ompi/mpi/c/comm_disconnect.c +++ b/ompi/mpi/c/comm_disconnect.c @@ -5,15 +5,18 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2006 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Intel, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ #include "ompi_config.h" @@ -25,21 +28,20 @@ #include "ompi/errhandler/errhandler.h" #include "ompi/memchecker.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_Comm_disconnect = PMPI_Comm_disconnect #endif - -#if OMPI_PROFILING_DEFINES -#include "ompi/mpi/c/profile/defines.h" +#define MPI_Comm_disconnect PMPI_Comm_disconnect #endif -#include "ompi/mca/dpm/dpm.h" +#include "ompi/dpm/dpm.h" static const char FUNC_NAME[] = "MPI_Comm_disconnect"; -int MPI_Comm_disconnect(MPI_Comm *comm) +int MPI_Comm_disconnect(MPI_Comm *comm) { int ret = MPI_SUCCESS; @@ -51,18 +53,17 @@ int MPI_Comm_disconnect(MPI_Comm *comm) OMPI_ERR_INIT_FINALIZE(FUNC_NAME); if ( ompi_comm_invalid (*comm)) - return OMPI_ERRHANDLER_INVOKE(MPI_COMM_WORLD, MPI_ERR_COMM, + return OMPI_ERRHANDLER_INVOKE(MPI_COMM_WORLD, MPI_ERR_COMM, FUNC_NAME); } - + if (MPI_COMM_WORLD == *comm || MPI_COMM_SELF == *comm ) { return OMPI_ERRHANDLER_INVOKE(MPI_COMM_WORLD, MPI_ERR_COMM, FUNC_NAME); } - OPAL_CR_ENTER_LIBRARY(); if ( OMPI_COMM_IS_DYNAMIC(*comm)) { - if (OMPI_SUCCESS != ompi_dpm.disconnect (*comm)) { + if (OMPI_SUCCESS != ompi_dpm_disconnect (*comm)) { ret = OMPI_ERRHANDLER_INVOKE(MPI_COMM_WORLD, MPI_ERR_COMM, FUNC_NAME); } } @@ -72,6 +73,5 @@ int MPI_Comm_disconnect(MPI_Comm *comm) ompi_comm_free(comm); - OPAL_CR_EXIT_LIBRARY(); return ret; } diff --git a/ompi/mpi/c/comm_dup.c b/ompi/mpi/c/comm_dup.c index ecffd8e33ff..39b6780c626 100644 --- a/ompi/mpi/c/comm_dup.c +++ b/ompi/mpi/c/comm_dup.c @@ -5,16 +5,18 @@ * Copyright (c) 2004-2008 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2006 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2006-2008 University of Houston. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ #include "ompi_config.h" @@ -26,17 +28,16 @@ #include "ompi/errhandler/errhandler.h" #include "ompi/memchecker.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_Comm_dup = PMPI_Comm_dup #endif - -#if OMPI_PROFILING_DEFINES -#include "ompi/mpi/c/profile/defines.h" +#define MPI_Comm_dup PMPI_Comm_dup #endif static const char FUNC_NAME[] = "MPI_Comm_dup"; -int MPI_Comm_dup(MPI_Comm comm, MPI_Comm *newcomm) +int MPI_Comm_dup(MPI_Comm comm, MPI_Comm *newcomm) { int rc=MPI_SUCCESS; @@ -49,15 +50,14 @@ int MPI_Comm_dup(MPI_Comm comm, MPI_Comm *newcomm) OMPI_ERR_INIT_FINALIZE(FUNC_NAME); if (ompi_comm_invalid (comm)) - return OMPI_ERRHANDLER_INVOKE(MPI_COMM_WORLD, MPI_ERR_COMM, + return OMPI_ERRHANDLER_INVOKE(MPI_COMM_WORLD, MPI_ERR_COMM, FUNC_NAME); - + if ( NULL == newcomm ) - return OMPI_ERRHANDLER_INVOKE(comm, MPI_ERR_ARG, + return OMPI_ERRHANDLER_INVOKE(comm, MPI_ERR_ARG, FUNC_NAME); } - OPAL_CR_ENTER_LIBRARY(); rc = ompi_comm_dup ( comm, newcomm ); OMPI_ERRHANDLER_RETURN ( rc, comm, rc, FUNC_NAME); diff --git a/ompi/mpi/c/comm_dup_with_info.c b/ompi/mpi/c/comm_dup_with_info.c index ec765a5870a..cb3b6257ba5 100644 --- a/ompi/mpi/c/comm_dup_with_info.c +++ b/ompi/mpi/c/comm_dup_with_info.c @@ -14,6 +14,8 @@ * Copyright (c) 2006-2008 University of Houston. All rights reserved. * Copyright (c) 2013 Los Alamos National Security, LLC. All rights * reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -29,12 +31,11 @@ #include "ompi/errhandler/errhandler.h" #include "ompi/memchecker.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_Comm_dup_with_info = PMPI_Comm_dup_with_info #endif - -#if OMPI_PROFILING_DEFINES -#include "ompi/mpi/c/profile/defines.h" +#define MPI_Comm_dup_with_info PMPI_Comm_dup_with_info #endif static const char FUNC_NAME[] = "MPI_Comm_dup_with_info"; @@ -64,7 +65,6 @@ int MPI_Comm_dup_with_info(MPI_Comm comm, MPI_Info info, MPI_Comm *newcomm) FUNC_NAME); } - OPAL_CR_ENTER_LIBRARY(); rc = ompi_comm_dup_with_info (comm, info, newcomm); OMPI_ERRHANDLER_RETURN(rc, comm, rc, FUNC_NAME); diff --git a/ompi/mpi/c/comm_f2c.c b/ompi/mpi/c/comm_f2c.c index 8bedc9a49e9..a0bd6be2acb 100644 --- a/ompi/mpi/c/comm_f2c.c +++ b/ompi/mpi/c/comm_f2c.c @@ -6,15 +6,17 @@ * Copyright (c) 2004-2007 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2006-2012 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ #include "ompi_config.h" @@ -26,23 +28,20 @@ #include "ompi/errhandler/errhandler.h" #include "ompi/mpi/fortran/base/fint_2_int.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_Comm_f2c = PMPI_Comm_f2c #endif - -#if OMPI_PROFILING_DEFINES -#include "ompi/mpi/c/profile/defines.h" +#define MPI_Comm_f2c PMPI_Comm_f2c #endif static const char FUNC_NAME[] = "MPI_Comm_f2c"; -MPI_Comm MPI_Comm_f2c(MPI_Fint comm) +MPI_Comm MPI_Comm_f2c(MPI_Fint comm) { int o_index= OMPI_FINT_2_INT(comm); - OPAL_CR_NOOP_PROGRESS(); - if ( MPI_PARAM_CHECK ) { OMPI_ERR_INIT_FINALIZE(FUNC_NAME); } @@ -55,6 +54,6 @@ MPI_Comm MPI_Comm_f2c(MPI_Fint comm) o_index >= opal_pointer_array_get_size(&ompi_comm_f_to_c_table)) { return NULL; } - + return (MPI_Comm)opal_pointer_array_get_item(&ompi_comm_f_to_c_table, o_index); } diff --git a/ompi/mpi/c/comm_free.c b/ompi/mpi/c/comm_free.c index 1f3bdfc089d..c195613f22f 100644 --- a/ompi/mpi/c/comm_free.c +++ b/ompi/mpi/c/comm_free.c @@ -5,14 +5,16 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ #include "ompi_config.h" @@ -24,18 +26,17 @@ #include "ompi/errhandler/errhandler.h" #include "ompi/memchecker.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_Comm_free = PMPI_Comm_free #endif - -#if OMPI_PROFILING_DEFINES -#include "ompi/mpi/c/profile/defines.h" +#define MPI_Comm_free PMPI_Comm_free #endif static const char FUNC_NAME[] = "MPI_Comm_free"; -int MPI_Comm_free(MPI_Comm *comm) +int MPI_Comm_free(MPI_Comm *comm) { int ret; @@ -45,22 +46,20 @@ int MPI_Comm_free(MPI_Comm *comm) if ( MPI_PARAM_CHECK ) { OMPI_ERR_INIT_FINALIZE(FUNC_NAME); - + if ( NULL == *comm || MPI_COMM_WORLD == *comm || ompi_comm_invalid (*comm)) { - return OMPI_ERRHANDLER_INVOKE(MPI_COMM_WORLD, MPI_ERR_COMM, + return OMPI_ERRHANDLER_INVOKE(MPI_COMM_WORLD, MPI_ERR_COMM, FUNC_NAME); } else if (MPI_COMM_SELF == *comm) { - return OMPI_ERRHANDLER_INVOKE(MPI_COMM_SELF, MPI_ERR_COMM, + return OMPI_ERRHANDLER_INVOKE(MPI_COMM_SELF, MPI_ERR_COMM, FUNC_NAME); } } - OPAL_CR_ENTER_LIBRARY(); - ret = ompi_comm_free ( comm ); + ret = ompi_comm_free ( comm ); OMPI_ERRHANDLER_CHECK(ret, *comm, ret, FUNC_NAME); - OPAL_CR_EXIT_LIBRARY(); return MPI_SUCCESS; } diff --git a/ompi/mpi/c/comm_free_keyval.c b/ompi/mpi/c/comm_free_keyval.c index 37b9604b6e8..eda9c29cc39 100644 --- a/ompi/mpi/c/comm_free_keyval.c +++ b/ompi/mpi/c/comm_free_keyval.c @@ -5,14 +5,16 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ #include "ompi_config.h" @@ -24,18 +26,17 @@ #include "ompi/errhandler/errhandler.h" #include "ompi/attribute/attribute.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_Comm_free_keyval = PMPI_Comm_free_keyval #endif - -#if OMPI_PROFILING_DEFINES -#include "ompi/mpi/c/profile/defines.h" +#define MPI_Comm_free_keyval PMPI_Comm_free_keyval #endif static const char FUNC_NAME[] = "MPI_Comm_free_keyval"; -int MPI_Comm_free_keyval(int *comm_keyval) +int MPI_Comm_free_keyval(int *comm_keyval) { int ret; @@ -44,12 +45,11 @@ int MPI_Comm_free_keyval(int *comm_keyval) if (MPI_PARAM_CHECK) { OMPI_ERR_INIT_FINALIZE(FUNC_NAME); if (NULL == comm_keyval) { - return OMPI_ERRHANDLER_INVOKE(MPI_COMM_WORLD, MPI_ERR_ARG, + return OMPI_ERRHANDLER_INVOKE(MPI_COMM_WORLD, MPI_ERR_ARG, FUNC_NAME); } } - OPAL_CR_ENTER_LIBRARY(); ret = ompi_attr_free_keyval(COMM_ATTR, comm_keyval, 0); diff --git a/ompi/mpi/c/comm_get_attr.c b/ompi/mpi/c/comm_get_attr.c index fd8dec9ee1c..32b387cc9e0 100644 --- a/ompi/mpi/c/comm_get_attr.c +++ b/ompi/mpi/c/comm_get_attr.c @@ -5,15 +5,17 @@ * Copyright (c) 2004-2006 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2009 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ #include "ompi_config.h" @@ -26,12 +28,11 @@ #include "ompi/attribute/attribute.h" #include "ompi/memchecker.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_Comm_get_attr = PMPI_Comm_get_attr #endif - -#if OMPI_PROFILING_DEFINES -#include "ompi/mpi/c/profile/defines.h" +#define MPI_Comm_get_attr PMPI_Comm_get_attr #endif static const char FUNC_NAME[] = "MPI_Comm_get_attr"; @@ -51,20 +52,19 @@ int MPI_Comm_get_attr(MPI_Comm comm, int comm_keyval, if ((NULL == attribute_val) || (NULL == flag)) { return OMPI_ERRHANDLER_INVOKE(comm, MPI_ERR_ARG, FUNC_NAME); } else if (ompi_comm_invalid(comm)) { - return OMPI_ERRHANDLER_INVOKE(MPI_COMM_WORLD, MPI_ERR_COMM, + return OMPI_ERRHANDLER_INVOKE(MPI_COMM_WORLD, MPI_ERR_COMM, FUNC_NAME); } else if (MPI_KEYVAL_INVALID == comm_keyval) { return OMPI_ERRHANDLER_INVOKE(comm, MPI_ERR_KEYVAL, FUNC_NAME); } } - OPAL_CR_ENTER_LIBRARY(); /* This stuff is very confusing. Be sure to see src/attribute/attribute.c for a lengthy comment explaining Open MPI attribute behavior. */ - ret = ompi_attr_get_c(comm->c_keyhash, comm_keyval, + ret = ompi_attr_get_c(comm->c_keyhash, comm_keyval, (void**)attribute_val, flag); - OMPI_ERRHANDLER_RETURN(ret, comm, MPI_ERR_OTHER, FUNC_NAME); + OMPI_ERRHANDLER_RETURN(ret, comm, MPI_ERR_OTHER, FUNC_NAME); } diff --git a/ompi/mpi/c/comm_get_errhandler.c b/ompi/mpi/c/comm_get_errhandler.c index 5249b24d3d9..f36d7f5c7d2 100644 --- a/ompi/mpi/c/comm_get_errhandler.c +++ b/ompi/mpi/c/comm_get_errhandler.c @@ -1,3 +1,4 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ /* * Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana * University Research and Technology @@ -5,15 +6,19 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2007-2009 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. + * Copyright (c) 2016 Los Alamos National Security, LLC. All rights + * reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ #include "ompi_config.h" @@ -25,12 +30,11 @@ #include "ompi/errhandler/errhandler.h" #include "ompi/memchecker.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_Comm_get_errhandler = PMPI_Comm_get_errhandler #endif - -#if OMPI_PROFILING_DEFINES -#include "ompi/mpi/c/profile/defines.h" +#define MPI_Comm_get_errhandler PMPI_Comm_get_errhandler #endif @@ -46,8 +50,6 @@ int MPI_Comm_get_errhandler(MPI_Comm comm, MPI_Errhandler *errhandler) memchecker_comm(comm); ); - OPAL_CR_NOOP_PROGRESS(); - /* Error checking */ if (MPI_PARAM_CHECK) { @@ -65,7 +67,7 @@ int MPI_Comm_get_errhandler(MPI_Comm comm, MPI_Errhandler *errhandler) error_handler became atomic. */ do { tmp = comm->error_handler; - } while (!OPAL_ATOMIC_CMPSET(&(comm->error_handler), tmp, tmp)); + } while (!OPAL_ATOMIC_CMPSET_PTR(&(comm->error_handler), tmp, tmp)); /* Retain the errhandler, corresponding to object refcount decrease in errhandler_free.c. */ diff --git a/ompi/mpi/c/comm_get_info.c b/ompi/mpi/c/comm_get_info.c index 7ce4ef2c50b..e7adfb77e73 100644 --- a/ompi/mpi/c/comm_get_info.c +++ b/ompi/mpi/c/comm_get_info.c @@ -1,10 +1,12 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ /* * Copyright (c) 2014 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -18,12 +20,11 @@ #include #include -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_Comm_get_info = PMPI_Comm_get_info #endif - -#if OMPI_PROFILING_DEFINES -#include "ompi/mpi/c/profile/defines.h" +#define MPI_Comm_get_info PMPI_Comm_get_info #endif static const char FUNC_NAME[] = "MPI_Comm_get_info"; @@ -31,8 +32,6 @@ static const char FUNC_NAME[] = "MPI_Comm_get_info"; int MPI_Comm_get_info(MPI_Comm comm, MPI_Info *info_used) { - OPAL_CR_NOOP_PROGRESS(); - if (MPI_PARAM_CHECK) { OMPI_ERR_INIT_FINALIZE(FUNC_NAME); if (NULL == info_used) { @@ -40,7 +39,7 @@ int MPI_Comm_get_info(MPI_Comm comm, MPI_Info *info_used) FUNC_NAME); } if (ompi_comm_invalid(comm)) { - return OMPI_ERRHANDLER_INVOKE(MPI_COMM_WORLD, MPI_ERR_COMM, + return OMPI_ERRHANDLER_INVOKE(MPI_COMM_WORLD, MPI_ERR_COMM, FUNC_NAME); } } diff --git a/ompi/mpi/c/comm_get_name.c b/ompi/mpi/c/comm_get_name.c index 98173569657..0f067460295 100644 --- a/ompi/mpi/c/comm_get_name.c +++ b/ompi/mpi/c/comm_get_name.c @@ -5,15 +5,17 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2006-2008 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -29,34 +31,31 @@ #include "opal/threads/mutex.h" #include "ompi/memchecker.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_Comm_get_name = PMPI_Comm_get_name #endif - -#if OMPI_PROFILING_DEFINES -#include "ompi/mpi/c/profile/defines.h" +#define MPI_Comm_get_name PMPI_Comm_get_name #endif static const char FUNC_NAME[] = "MPI_Comm_get_name"; -int MPI_Comm_get_name(MPI_Comm comm, char *name, int *length) +int MPI_Comm_get_name(MPI_Comm comm, char *name, int *length) { MEMCHECKER( memchecker_comm(comm); ); - OPAL_CR_NOOP_PROGRESS(); - if ( MPI_PARAM_CHECK ) { OMPI_ERR_INIT_FINALIZE(FUNC_NAME); if ( ompi_comm_invalid ( comm ) ) - return OMPI_ERRHANDLER_INVOKE ( MPI_COMM_WORLD, MPI_ERR_COMM, + return OMPI_ERRHANDLER_INVOKE ( MPI_COMM_WORLD, MPI_ERR_COMM, FUNC_NAME); - if ( NULL == name || NULL == length ) - return OMPI_ERRHANDLER_INVOKE ( comm, MPI_ERR_ARG, + if ( NULL == name || NULL == length ) + return OMPI_ERRHANDLER_INVOKE ( comm, MPI_ERR_ARG, FUNC_NAME); } #ifdef USE_MUTEX_FOR_COMMS @@ -67,7 +66,7 @@ int MPI_Comm_get_name(MPI_Comm comm, char *name, int *length) - name[*resultlen] == '\0' - and therefore (*resultlen) cannot be > (MPI_MAX_OBJECT_NAME-1) - The Fortran API version will pad to the right if necessary. + The Fortran API version will pad to the right if necessary. Note that comm->c_name is guaranteed to be \0-terminated and able to completely fit into MPI_MAX_OBJECT_NAME bytes (i.e., diff --git a/ompi/mpi/c/comm_get_parent.c b/ompi/mpi/c/comm_get_parent.c index cbc73651a80..85d4a73e7f8 100644 --- a/ompi/mpi/c/comm_get_parent.c +++ b/ompi/mpi/c/comm_get_parent.c @@ -5,14 +5,16 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ #include "ompi_config.h" @@ -23,34 +25,31 @@ #include "ompi/communicator/communicator.h" #include "ompi/errhandler/errhandler.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_Comm_get_parent = PMPI_Comm_get_parent #endif - -#if OMPI_PROFILING_DEFINES -#include "ompi/mpi/c/profile/defines.h" +#define MPI_Comm_get_parent PMPI_Comm_get_parent #endif static const char FUNC_NAME[] = "MPI_Comm_get_parent"; -int MPI_Comm_get_parent(MPI_Comm *parent) +int MPI_Comm_get_parent(MPI_Comm *parent) { - OPAL_CR_NOOP_PROGRESS(); - if ( MPI_PARAM_CHECK ) { OMPI_ERR_INIT_FINALIZE(FUNC_NAME); if ( NULL == parent ) { - return OMPI_ERRHANDLER_INVOKE(MPI_COMM_WORLD, MPI_ERR_ARG, + return OMPI_ERRHANDLER_INVOKE(MPI_COMM_WORLD, MPI_ERR_ARG, FUNC_NAME); } } /* - * ompi_mpi_comm_parent is MPI_COMM_NULL, in case this - * world has not been spawned by another MPI job. + * ompi_mpi_comm_parent is MPI_COMM_NULL, in case this + * world has not been spawned by another MPI job. * This is also the return value required by MPI-2. */ diff --git a/ompi/mpi/c/comm_group.c b/ompi/mpi/c/comm_group.c index d36a330e6b3..4fdb681857f 100644 --- a/ompi/mpi/c/comm_group.c +++ b/ompi/mpi/c/comm_group.c @@ -5,15 +5,17 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2006 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ #include "ompi_config.h" @@ -25,12 +27,11 @@ #include "ompi/errhandler/errhandler.h" #include "ompi/memchecker.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_Comm_group = PMPI_Comm_group #endif - -#if OMPI_PROFILING_DEFINES -#include "ompi/mpi/c/profile/defines.h" +#define MPI_Comm_group PMPI_Comm_group #endif static const char FUNC_NAME[] = "MPI_Comm_group"; @@ -49,15 +50,14 @@ int MPI_Comm_group(MPI_Comm comm, MPI_Group *group) { OMPI_ERR_INIT_FINALIZE(FUNC_NAME); if ( ompi_comm_invalid (comm) ) - return OMPI_ERRHANDLER_INVOKE(MPI_COMM_WORLD, MPI_ERR_COMM, + return OMPI_ERRHANDLER_INVOKE(MPI_COMM_WORLD, MPI_ERR_COMM, FUNC_NAME); - if ( NULL == group ) - return OMPI_ERRHANDLER_INVOKE(comm, MPI_ERR_ARG, + if ( NULL == group ) + return OMPI_ERRHANDLER_INVOKE(comm, MPI_ERR_ARG, FUNC_NAME); } /* end if ( MPI_PARAM_CHECK) */ - OPAL_CR_ENTER_LIBRARY(); rc = ompi_comm_group ( (ompi_communicator_t*)comm, (ompi_group_t**)group ); OMPI_ERRHANDLER_RETURN ( rc, comm, rc, FUNC_NAME); diff --git a/ompi/mpi/c/comm_idup.c b/ompi/mpi/c/comm_idup.c index 9edaa8f0be1..4493ed9ba96 100644 --- a/ompi/mpi/c/comm_idup.c +++ b/ompi/mpi/c/comm_idup.c @@ -14,6 +14,8 @@ * Copyright (c) 2006-2008 University of Houston. All rights reserved. * Copyright (c) 2013 Los Alamos National Security, LLC. All rights * reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -29,12 +31,11 @@ #include "ompi/errhandler/errhandler.h" #include "ompi/memchecker.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_Comm_idup = PMPI_Comm_idup #endif - -#if OMPI_PROFILING_DEFINES -#include "ompi/mpi/c/profile/defines.h" +#define MPI_Comm_idup PMPI_Comm_idup #endif static const char FUNC_NAME[] = "MPI_Comm_idup"; @@ -60,7 +61,6 @@ int MPI_Comm_idup(MPI_Comm comm, MPI_Comm *newcomm, MPI_Request *request) FUNC_NAME); } - OPAL_CR_ENTER_LIBRARY(); rc = ompi_comm_idup (comm, newcomm, request); OMPI_ERRHANDLER_RETURN(rc, comm, rc, FUNC_NAME); diff --git a/ompi/mpi/c/comm_join.c b/ompi/mpi/c/comm_join.c index 0733ee7b4d8..82141ac53d3 100644 --- a/ompi/mpi/c/comm_join.c +++ b/ompi/mpi/c/comm_join.c @@ -5,16 +5,18 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2012 Los Alamos National Security, LLC. * All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ #include "ompi_config.h" @@ -38,15 +40,14 @@ #include "ompi/runtime/params.h" #include "ompi/communicator/communicator.h" #include "ompi/errhandler/errhandler.h" -#include "ompi/mca/dpm/dpm.h" +#include "ompi/dpm/dpm.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_Comm_join = PMPI_Comm_join #endif - -#if OMPI_PROFILING_DEFINES -#include "ompi/mpi/c/profile/defines.h" +#define MPI_Comm_join PMPI_Comm_join #endif static const char FUNC_NAME[] = "MPI_Comm_join"; @@ -54,12 +55,11 @@ static const char FUNC_NAME[] = "MPI_Comm_join"; static int ompi_socket_send (int fd, char *buf, int len ); static int ompi_socket_recv (int fd, char *buf, int len ); -int MPI_Comm_join(int fd, MPI_Comm *intercomm) +int MPI_Comm_join(int fd, MPI_Comm *intercomm) { int rc; uint32_t len, rlen, llen, lrlen; int send_first=0; - char *rport; ompi_process_name_t rname, tmp_name; ompi_communicator_t *newcomp; @@ -69,19 +69,12 @@ int MPI_Comm_join(int fd, MPI_Comm *intercomm) OMPI_ERR_INIT_FINALIZE(FUNC_NAME); if ( NULL == intercomm ) { - return OMPI_ERRHANDLER_INVOKE(MPI_COMM_WORLD, MPI_ERR_ARG, + return OMPI_ERRHANDLER_INVOKE(MPI_COMM_WORLD, MPI_ERR_ARG, FUNC_NAME); } } - OPAL_CR_ENTER_LIBRARY(); - /* open a port using the specified tag */ - if (OMPI_SUCCESS != (rc = ompi_dpm.open_port(port_name, OMPI_COMM_JOIN_TAG))) { - OPAL_CR_EXIT_LIBRARY(); - return rc; - } - /* send my process name */ tmp_name = *OMPI_PROC_MY_NAME; OMPI_PROCESS_NAME_HTON(tmp_name); @@ -98,7 +91,6 @@ int MPI_Comm_join(int fd, MPI_Comm *intercomm) } else if (OMPI_PROC_MY_NAME->vpid == rname.vpid) { /* joining to myself is not allowed */ *intercomm = MPI_COMM_NULL; - OPAL_CR_EXIT_LIBRARY(); return MPI_ERR_INTERN; } else { send_first = false; @@ -107,34 +99,32 @@ int MPI_Comm_join(int fd, MPI_Comm *intercomm) send_first = true; } - /* sendrecv port-name through the socket connection. - Need to determine somehow how to avoid a potential deadlock - here. */ - llen = (uint32_t)(strlen(port_name)+1); - len = htonl(llen); - - ompi_socket_send( fd, (char *) &len, sizeof(uint32_t)); - ompi_socket_recv (fd, (char *) &rlen, sizeof(uint32_t)); - - lrlen = ntohl(rlen); - rport = (char *) malloc (lrlen); - if ( NULL == rport ) { - *intercomm = MPI_COMM_NULL; - OPAL_CR_EXIT_LIBRARY(); - return MPI_ERR_INTERN; - } + /* ensure the port name is NULL terminated */ + memset(port_name, 0, MPI_MAX_PORT_NAME); - /* Assumption: socket_send should not block, even if the socket + /* Assumption: socket_send should not block, even if the socket is not configured to be non-blocking, because the message length are so short. */ - ompi_socket_send (fd, port_name, llen); - ompi_socket_recv (fd, rport, lrlen); - - /* use the port we received to connect/accept */ - rc = ompi_dpm.connect_accept (MPI_COMM_SELF, 0, rport, send_first, &newcomp); - - - free ( rport ); + + /* we will only use the send_first proc's port name, + * so pass it to the recv_first participant */ + if (send_first) { + /* open a port */ + if (OMPI_SUCCESS != (rc = ompi_dpm_open_port(port_name))) { + return rc; + } + llen = (uint32_t)(strlen(port_name)+1); + len = htonl(llen); + ompi_socket_send( fd, (char *) &len, sizeof(uint32_t)); + ompi_socket_send (fd, port_name, llen); + } else { + ompi_socket_recv (fd, (char *) &rlen, sizeof(uint32_t)); + lrlen = ntohl(rlen); + ompi_socket_recv (fd, port_name, lrlen); + } + + /* use the port to connect/accept */ + rc = ompi_dpm_connect_accept (MPI_COMM_SELF, 0, port_name, send_first, &newcomp); *intercomm = newcomp; OMPI_ERRHANDLER_RETURN (rc, MPI_COMM_SELF, rc, FUNC_NAME); @@ -148,13 +138,13 @@ static int ompi_socket_send (int fd, char *buf, int len ) ssize_t a; char *c_ptr; int ret = OMPI_SUCCESS; - + num = len; c_ptr = buf; do { s_num = (size_t) num; - a = write ( fd, c_ptr, s_num ); + a = write ( fd, c_ptr, s_num ); if ( a == -1 ) { if ( errno == EINTR ) { /* Catch EINTR on, mainly on IBM RS6000 */ @@ -172,7 +162,7 @@ static int ompi_socket_send (int fd, char *buf, int len ) #endif else { /* Another error occured */ - fprintf (stderr,"ompi_socket_send: error while writing to socket" + fprintf (stderr,"ompi_socket_send: error while writing to socket" " error:%s", strerror (errno) ); return MPI_ERR_OTHER; } @@ -180,7 +170,7 @@ static int ompi_socket_send (int fd, char *buf, int len ) num -= a; c_ptr += a; } while ( num > 0 ); - + if ( num < 0 ) { fprintf (stderr, "ompi_socket_send: more data written then available"); @@ -197,13 +187,13 @@ static int ompi_socket_recv (int fd, char *buf, int len ) ssize_t a; char *c_ptr; int ret = MPI_SUCCESS; - + num = len; c_ptr = buf; do { s_num = (size_t ) num; - a = read ( fd, c_ptr, s_num ); + a = read ( fd, c_ptr, s_num ); if ( a == -1 ) { if ( errno == EINTR ) { /* Catch EINTR on, mainly on IBM RS6000 */ @@ -221,7 +211,7 @@ static int ompi_socket_recv (int fd, char *buf, int len ) #endif else { /* Another error occured */ - fprintf (stderr,"ompi_socket_recv: error while reading from socket" + fprintf (stderr,"ompi_socket_recv: error while reading from socket" " error:%s", strerror (errno) ); return MPI_ERR_OTHER; } diff --git a/ompi/mpi/c/comm_rank.c b/ompi/mpi/c/comm_rank.c index bf8b991d18c..542e02b01b9 100644 --- a/ompi/mpi/c/comm_rank.c +++ b/ompi/mpi/c/comm_rank.c @@ -5,15 +5,17 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2006 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ #include "ompi_config.h" @@ -25,34 +27,31 @@ #include "ompi/errhandler/errhandler.h" #include "ompi/memchecker.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_Comm_rank = PMPI_Comm_rank #endif - -#if OMPI_PROFILING_DEFINES -#include "ompi/mpi/c/profile/defines.h" +#define MPI_Comm_rank PMPI_Comm_rank #endif static const char FUNC_NAME[] = "MPI_Comm_rank"; -int MPI_Comm_rank(MPI_Comm comm, int *rank) +int MPI_Comm_rank(MPI_Comm comm, int *rank) { MEMCHECKER( memchecker_comm(comm); ); - OPAL_CR_NOOP_PROGRESS(); - if ( MPI_PARAM_CHECK ) { OMPI_ERR_INIT_FINALIZE(FUNC_NAME); if (ompi_comm_invalid (comm)) - return OMPI_ERRHANDLER_INVOKE(MPI_COMM_WORLD, MPI_ERR_COMM, + return OMPI_ERRHANDLER_INVOKE(MPI_COMM_WORLD, MPI_ERR_COMM, FUNC_NAME); if ( NULL == rank ) - return OMPI_ERRHANDLER_INVOKE(comm, MPI_ERR_ARG, + return OMPI_ERRHANDLER_INVOKE(comm, MPI_ERR_ARG, FUNC_NAME); } diff --git a/ompi/mpi/c/comm_remote_group.c b/ompi/mpi/c/comm_remote_group.c index d2f8f4e641d..d5c35333b82 100644 --- a/ompi/mpi/c/comm_remote_group.c +++ b/ompi/mpi/c/comm_remote_group.c @@ -5,15 +5,17 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2006 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ #include "ompi_config.h" @@ -26,48 +28,44 @@ #include "ompi/errhandler/errhandler.h" #include "ompi/memchecker.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_Comm_remote_group = PMPI_Comm_remote_group #endif - -#if OMPI_PROFILING_DEFINES -#include "ompi/mpi/c/profile/defines.h" +#define MPI_Comm_remote_group PMPI_Comm_remote_group #endif static const char FUNC_NAME[] = "MPI_Comm_remote_group"; -int MPI_Comm_remote_group(MPI_Comm comm, MPI_Group *group) +int MPI_Comm_remote_group(MPI_Comm comm, MPI_Group *group) { MEMCHECKER( memchecker_comm(comm); ); - OPAL_CR_NOOP_PROGRESS(); - if ( MPI_PARAM_CHECK ) { OMPI_ERR_INIT_FINALIZE(FUNC_NAME); if (ompi_comm_invalid (comm)) { - return OMPI_ERRHANDLER_INVOKE(MPI_COMM_WORLD, MPI_ERR_COMM, + return OMPI_ERRHANDLER_INVOKE(MPI_COMM_WORLD, MPI_ERR_COMM, FUNC_NAME); } if ( NULL == group ) { - return OMPI_ERRHANDLER_INVOKE(comm, MPI_ERR_ARG, + return OMPI_ERRHANDLER_INVOKE(comm, MPI_ERR_ARG, FUNC_NAME); } } - if ( OMPI_COMM_IS_INTER(comm) ) { + if ( OMPI_COMM_IS_INTER(comm) ) { OBJ_RETAIN(comm->c_remote_group); } else { - return OMPI_ERRHANDLER_INVOKE (comm, MPI_ERR_COMM, + return OMPI_ERRHANDLER_INVOKE (comm, MPI_ERR_COMM, FUNC_NAME); } *group = (MPI_Group) comm->c_remote_group; - ompi_group_increment_proc_count(*group); return MPI_SUCCESS; } diff --git a/ompi/mpi/c/comm_remote_size.c b/ompi/mpi/c/comm_remote_size.c index 12d3d90ced0..438940e86f6 100644 --- a/ompi/mpi/c/comm_remote_size.c +++ b/ompi/mpi/c/comm_remote_size.c @@ -5,15 +5,17 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2006 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ #include "ompi_config.h" @@ -25,12 +27,11 @@ #include "ompi/errhandler/errhandler.h" #include "ompi/memchecker.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_Comm_remote_size = PMPI_Comm_remote_size #endif - -#if OMPI_PROFILING_DEFINES -#include "ompi/mpi/c/profile/defines.h" +#define MPI_Comm_remote_size PMPI_Comm_remote_size #endif @@ -42,13 +43,11 @@ int MPI_Comm_remote_size(MPI_Comm comm, int *size) { memchecker_comm(comm); ); - OPAL_CR_NOOP_PROGRESS(); - if ( MPI_PARAM_CHECK ) { OMPI_ERR_INIT_FINALIZE(FUNC_NAME); if (ompi_comm_invalid (comm)) { - return OMPI_ERRHANDLER_INVOKE(MPI_COMM_WORLD, MPI_ERR_COMM, + return OMPI_ERRHANDLER_INVOKE(MPI_COMM_WORLD, MPI_ERR_COMM, FUNC_NAME); } diff --git a/ompi/mpi/c/comm_set_attr.c b/ompi/mpi/c/comm_set_attr.c index f26c4147f06..c1c7cb1ea7e 100644 --- a/ompi/mpi/c/comm_set_attr.c +++ b/ompi/mpi/c/comm_set_attr.c @@ -5,15 +5,17 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2007 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ #include "ompi_config.h" @@ -26,18 +28,17 @@ #include "ompi/attribute/attribute.h" #include "ompi/memchecker.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_Comm_set_attr = PMPI_Comm_set_attr #endif - -#if OMPI_PROFILING_DEFINES -#include "ompi/mpi/c/profile/defines.h" +#define MPI_Comm_set_attr PMPI_Comm_set_attr #endif static const char FUNC_NAME[] = "MPI_Comm_set_attr"; -int MPI_Comm_set_attr(MPI_Comm comm, int comm_keyval, void *attribute_val) +int MPI_Comm_set_attr(MPI_Comm comm, int comm_keyval, void *attribute_val) { int ret; @@ -48,14 +49,13 @@ int MPI_Comm_set_attr(MPI_Comm comm, int comm_keyval, void *attribute_val) if (MPI_PARAM_CHECK) { OMPI_ERR_INIT_FINALIZE(FUNC_NAME); if (ompi_comm_invalid(comm)) { - return OMPI_ERRHANDLER_INVOKE(MPI_COMM_WORLD, MPI_ERR_COMM, + return OMPI_ERRHANDLER_INVOKE(MPI_COMM_WORLD, MPI_ERR_COMM, FUNC_NAME); } } - OPAL_CR_ENTER_LIBRARY(); - ret = ompi_attr_set_c(COMM_ATTR, comm, &comm->c_keyhash, + ret = ompi_attr_set_c(COMM_ATTR, comm, &comm->c_keyhash, comm_keyval, attribute_val, false); - OMPI_ERRHANDLER_RETURN(ret, comm, MPI_ERR_OTHER, FUNC_NAME); + OMPI_ERRHANDLER_RETURN(ret, comm, MPI_ERR_OTHER, FUNC_NAME); } diff --git a/ompi/mpi/c/comm_set_errhandler.c b/ompi/mpi/c/comm_set_errhandler.c index 8213702042e..d44a00aa19a 100644 --- a/ompi/mpi/c/comm_set_errhandler.c +++ b/ompi/mpi/c/comm_set_errhandler.c @@ -1,3 +1,4 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ /* * Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana * University Research and Technology @@ -5,14 +6,18 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. + * Copyright (c) 2016 Los Alamos National Security, LLC. All rights + * reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ #include "ompi_config.h" @@ -24,18 +29,17 @@ #include "ompi/errhandler/errhandler.h" #include "ompi/memchecker.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_Comm_set_errhandler = PMPI_Comm_set_errhandler #endif - -#if OMPI_PROFILING_DEFINES -#include "ompi/mpi/c/profile/defines.h" +#define MPI_Comm_set_errhandler PMPI_Comm_set_errhandler #endif static const char FUNC_NAME[] = "MPI_Comm_set_errhandler"; -int MPI_Comm_set_errhandler(MPI_Comm comm, MPI_Errhandler errhandler) +int MPI_Comm_set_errhandler(MPI_Comm comm, MPI_Errhandler errhandler) { MPI_Errhandler tmp; @@ -44,8 +48,6 @@ int MPI_Comm_set_errhandler(MPI_Comm comm, MPI_Errhandler errhandler) memchecker_comm(comm); ); - OPAL_CR_NOOP_PROGRESS(); - /* Error checking */ if (MPI_PARAM_CHECK) { @@ -68,9 +70,7 @@ int MPI_Comm_set_errhandler(MPI_Comm comm, MPI_Errhandler errhandler) /* Ditch the old errhandler, and decrement its refcount. On 64 bits environments we have to make sure the reading of the error_handler became atomic. */ - do { - tmp = comm->error_handler; - } while (!OPAL_ATOMIC_CMPSET(&(comm->error_handler), tmp, errhandler)); + tmp = OPAL_ATOMIC_SWAP_PTR(&comm->error_handler, errhandler); OBJ_RELEASE(tmp); /* All done */ diff --git a/ompi/mpi/c/comm_set_info.c b/ompi/mpi/c/comm_set_info.c index cf02e66ec26..00e951fede6 100644 --- a/ompi/mpi/c/comm_set_info.c +++ b/ompi/mpi/c/comm_set_info.c @@ -1,10 +1,12 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ /* * Copyright (c) 2014 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -18,12 +20,11 @@ #include #include -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_Comm_set_info = PMPI_Comm_set_info #endif - -#if OMPI_PROFILING_DEFINES -#include "ompi/mpi/c/profile/defines.h" +#define MPI_Comm_set_info PMPI_Comm_set_info #endif static const char FUNC_NAME[] = "MPI_Comm_set_info"; @@ -31,8 +32,6 @@ static const char FUNC_NAME[] = "MPI_Comm_set_info"; int MPI_Comm_set_info(MPI_Comm comm, MPI_Info info) { - OPAL_CR_NOOP_PROGRESS(); - if (MPI_PARAM_CHECK) { OMPI_ERR_INIT_FINALIZE(FUNC_NAME); if (NULL == info || MPI_INFO_NULL == info || @@ -41,7 +40,7 @@ int MPI_Comm_set_info(MPI_Comm comm, MPI_Info info) FUNC_NAME); } if (ompi_comm_invalid(comm)) { - return OMPI_ERRHANDLER_INVOKE(MPI_COMM_WORLD, MPI_ERR_COMM, + return OMPI_ERRHANDLER_INVOKE(MPI_COMM_WORLD, MPI_ERR_COMM, FUNC_NAME); } } diff --git a/ompi/mpi/c/comm_set_name.c b/ompi/mpi/c/comm_set_name.c index 77effc02dd2..a1b96b350fc 100644 --- a/ompi/mpi/c/comm_set_name.c +++ b/ompi/mpi/c/comm_set_name.c @@ -13,6 +13,8 @@ * Copyright (c) 2006 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2012-2013 Los Alamos National Security, LLC. All rights * reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -31,12 +33,11 @@ #include "ompi/totalview.h" #include "ompi/memchecker.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_Comm_set_name = PMPI_Comm_set_name #endif - -#if OMPI_PROFILING_DEFINES -#include "ompi/mpi/c/profile/defines.h" +#define MPI_Comm_set_name PMPI_Comm_set_name #endif static const char FUNC_NAME[] = "MPI_Comm_set_name"; @@ -64,7 +65,6 @@ int MPI_Comm_set_name(MPI_Comm comm, const char *name) } } - OPAL_CR_ENTER_LIBRARY(); rc = ompi_comm_set_name (comm, name ); /* -- Tracing information for new communicator name -- */ @@ -72,10 +72,6 @@ int MPI_Comm_set_name(MPI_Comm comm, const char *name) /* Force TotalView DLL to take note of this name setting */ ++ompi_tv_comm_sequence_number; -#endif - -#if OMPI_PROFILING_DEFINES -#include "ompi/mpi/c/profile/defines.h" #endif OMPI_ERRHANDLER_RETURN(rc, comm, rc, FUNC_NAME); } diff --git a/ompi/mpi/c/comm_size.c b/ompi/mpi/c/comm_size.c index d995753ea52..16eaec27aca 100644 --- a/ompi/mpi/c/comm_size.c +++ b/ompi/mpi/c/comm_size.c @@ -5,15 +5,17 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2006 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -27,30 +29,27 @@ #include "ompi/errhandler/errhandler.h" #include "ompi/memchecker.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_Comm_size = PMPI_Comm_size #endif - -#if OMPI_PROFILING_DEFINES -#include "ompi/mpi/c/profile/defines.h" +#define MPI_Comm_size PMPI_Comm_size #endif static const char FUNC_NAME[] = "MPI_Comm_size"; -int MPI_Comm_size(MPI_Comm comm, int *size) +int MPI_Comm_size(MPI_Comm comm, int *size) { MEMCHECKER( memchecker_comm(comm); ); - OPAL_CR_NOOP_PROGRESS(); - if ( MPI_PARAM_CHECK ) { OMPI_ERR_INIT_FINALIZE(FUNC_NAME); if ( ompi_comm_invalid (comm)) { - return OMPI_ERRHANDLER_INVOKE(MPI_COMM_WORLD, + return OMPI_ERRHANDLER_INVOKE(MPI_COMM_WORLD, MPI_ERR_COMM, FUNC_NAME); } diff --git a/ompi/mpi/c/comm_spawn.c b/ompi/mpi/c/comm_spawn.c index 71ec9a42fe3..9a5d303dec7 100644 --- a/ompi/mpi/c/comm_spawn.c +++ b/ompi/mpi/c/comm_spawn.c @@ -6,7 +6,7 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. @@ -14,10 +14,13 @@ * Copyright (c) 2009 Sun Microsystems, Inc. All rights reserved. * Copyright (c) 2013 Los Alamos National Security, LLC. All rights * reserved. + * Copyright (c) 2015 Intel, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ #include "ompi_config.h" @@ -28,15 +31,14 @@ #include "ompi/runtime/params.h" #include "ompi/communicator/communicator.h" #include "ompi/errhandler/errhandler.h" -#include "ompi/mca/dpm/dpm.h" +#include "ompi/dpm/dpm.h" #include "ompi/memchecker.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_Comm_spawn = PMPI_Comm_spawn #endif - -#if OMPI_PROFILING_DEFINES -#include "ompi/mpi/c/profile/defines.h" +#define MPI_Comm_spawn PMPI_Comm_spawn #endif static const char FUNC_NAME[] = "MPI_Comm_spawn"; @@ -51,16 +53,16 @@ int MPI_Comm_spawn(const char *command, char *argv[], int maxprocs, MPI_Info inf ompi_communicator_t *newcomp=NULL; char port_name[MPI_MAX_PORT_NAME]; bool non_mpi = false; - + MEMCHECKER( memchecker_comm(comm); ); - + if ( MPI_PARAM_CHECK ) { OMPI_ERR_INIT_FINALIZE(FUNC_NAME); if ( ompi_comm_invalid (comm)) { - return OMPI_ERRHANDLER_INVOKE(MPI_COMM_WORLD, MPI_ERR_COMM, + return OMPI_ERRHANDLER_INVOKE(MPI_COMM_WORLD, MPI_ERR_COMM, FUNC_NAME); } if ( OMPI_COMM_IS_INTER(comm)) { @@ -68,7 +70,7 @@ int MPI_Comm_spawn(const char *command, char *argv[], int maxprocs, MPI_Info inf FUNC_NAME); } if ( (0 > root) || (ompi_comm_size(comm) <= root) ) { - return OMPI_ERRHANDLER_INVOKE(comm, MPI_ERR_ARG, + return OMPI_ERRHANDLER_INVOKE(comm, MPI_ERR_ARG, FUNC_NAME); } if ( NULL == intercomm ) { @@ -76,7 +78,7 @@ int MPI_Comm_spawn(const char *command, char *argv[], int maxprocs, MPI_Info inf FUNC_NAME); } } - + rank = ompi_comm_rank ( comm ); if ( MPI_PARAM_CHECK ) { if ( rank == root ) { @@ -97,19 +99,18 @@ int MPI_Comm_spawn(const char *command, char *argv[], int maxprocs, MPI_Info inf /* initialize the port name to avoid problems */ memset(port_name, 0, MPI_MAX_PORT_NAME); - + /* See if the info key "ompi_non_mpi" was set to true */ if (rank == root) { ompi_info_get_bool(info, "ompi_non_mpi", &non_mpi, &flag); } - OPAL_CR_ENTER_LIBRARY(); if ( rank == root ) { if (!non_mpi) { /* Open a port. The port_name is passed as an environment variable to the children. */ - if (OMPI_SUCCESS != (rc = ompi_dpm.open_port (port_name, OMPI_RML_TAG_INVALID))) { + if (OMPI_SUCCESS != (rc = ompi_dpm_open_port (port_name))) { goto error; } } else if (1 < ompi_comm_size(comm)) { @@ -117,7 +118,7 @@ int MPI_Comm_spawn(const char *command, char *argv[], int maxprocs, MPI_Info inf rc = OMPI_ERR_NOT_SUPPORTED; goto error; } - if (OMPI_SUCCESS != (rc = ompi_dpm.spawn (1, &command, &argv, &maxprocs, + if (OMPI_SUCCESS != (rc = ompi_dpm_spawn (1, &command, &argv, &maxprocs, &info, port_name))) { goto error; } @@ -126,17 +127,16 @@ int MPI_Comm_spawn(const char *command, char *argv[], int maxprocs, MPI_Info inf if (non_mpi) { newcomp = MPI_COMM_NULL; } else { - rc = ompi_dpm.connect_accept (comm, root, port_name, send_first, &newcomp); + rc = ompi_dpm_connect_accept (comm, root, port_name, send_first, &newcomp); } error: - OPAL_CR_EXIT_LIBRARY(); /* close the port */ if (rank == root && !non_mpi) { - ompi_dpm.close_port(port_name); + ompi_dpm_close_port(port_name); } - + /* set error codes */ if (MPI_ERRCODES_IGNORE != array_of_errcodes) { for ( i=0; i < maxprocs; i++ ) { diff --git a/ompi/mpi/c/comm_spawn_multiple.c b/ompi/mpi/c/comm_spawn_multiple.c index f9b91d3748a..93452e5dfa7 100644 --- a/ompi/mpi/c/comm_spawn_multiple.c +++ b/ompi/mpi/c/comm_spawn_multiple.c @@ -6,7 +6,7 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. @@ -14,10 +14,13 @@ * Copyright (c) 2009 Sun Microsystems, Inc. All rights reserved. * Copyright (c) 2012-2013 Los Alamos National Security, LLC. All rights * reserved. + * Copyright (c) 2015 Intel, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ #include "ompi_config.h" @@ -28,15 +31,14 @@ #include "ompi/communicator/communicator.h" #include "ompi/errhandler/errhandler.h" #include "ompi/info/info.h" -#include "ompi/mca/dpm/dpm.h" +#include "ompi/dpm/dpm.h" #include "ompi/memchecker.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_Comm_spawn_multiple = PMPI_Comm_spawn_multiple #endif - -#if OMPI_PROFILING_DEFINES -#include "ompi/mpi/c/profile/defines.h" +#define MPI_Comm_spawn_multiple PMPI_Comm_spawn_multiple #endif static const char FUNC_NAME[] = "MPI_Comm_spawn_multiple"; @@ -56,12 +58,12 @@ int MPI_Comm_spawn_multiple(int count, char *array_of_commands[], char **array_o MEMCHECKER( memchecker_comm(comm); ); - + if ( MPI_PARAM_CHECK ) { OMPI_ERR_INIT_FINALIZE(FUNC_NAME); if ( ompi_comm_invalid (comm)) { - return OMPI_ERRHANDLER_INVOKE(MPI_COMM_WORLD, MPI_ERR_COMM, + return OMPI_ERRHANDLER_INVOKE(MPI_COMM_WORLD, MPI_ERR_COMM, FUNC_NAME); } if ( OMPI_COMM_IS_INTER(comm)) { @@ -74,7 +76,7 @@ int MPI_Comm_spawn_multiple(int count, char *array_of_commands[], char **array_o return OMPI_ERRHANDLER_INVOKE(comm, MPI_ERR_ARG, FUNC_NAME); } } - + rank = ompi_comm_rank ( comm ); if ( MPI_PARAM_CHECK ) { if ( rank == root ) { @@ -91,7 +93,7 @@ int MPI_Comm_spawn_multiple(int count, char *array_of_commands[], char **array_o return OMPI_ERRHANDLER_INVOKE(comm, MPI_ERR_INFO, FUNC_NAME); } for (i = 0; i < count; ++i) { - if (NULL == array_of_info[i] || + if (NULL == array_of_info[i] || ompi_info_is_freed(array_of_info[i])) { return OMPI_ERRHANDLER_INVOKE(MPI_COMM_WORLD, MPI_ERR_INFO, FUNC_NAME); @@ -124,7 +126,7 @@ int MPI_Comm_spawn_multiple(int count, char *array_of_commands[], char **array_o if ( 0 > array_of_maxprocs[i] ) { return OMPI_ERRHANDLER_INVOKE(comm, MPI_ERR_ARG, FUNC_NAME); } - } + } } } @@ -142,14 +144,13 @@ int MPI_Comm_spawn_multiple(int count, char *array_of_commands[], char **array_o /* initialize the port name to avoid problems */ memset(port_name, 0, MPI_MAX_PORT_NAME); - - OPAL_CR_ENTER_LIBRARY(); + if ( rank == root ) { if (!non_mpi) { /* Open a port. The port_name is passed as an environment variable to the children. */ - if (OMPI_SUCCESS != (rc = ompi_dpm.open_port (port_name, OMPI_RML_TAG_INVALID))) { + if (OMPI_SUCCESS != (rc = ompi_dpm_open_port (port_name))) { goto error; } } else if (1 < ompi_comm_size(comm)) { @@ -157,7 +158,7 @@ int MPI_Comm_spawn_multiple(int count, char *array_of_commands[], char **array_o rc = OMPI_ERR_NOT_SUPPORTED; goto error; } - if (OMPI_SUCCESS != (rc = ompi_dpm.spawn(count, (const char **) array_of_commands, + if (OMPI_SUCCESS != (rc = ompi_dpm_spawn(count, (const char **) array_of_commands, array_of_argv, array_of_maxprocs, array_of_info, port_name))) { goto error; @@ -167,17 +168,16 @@ int MPI_Comm_spawn_multiple(int count, char *array_of_commands[], char **array_o if (non_mpi) { newcomp = MPI_COMM_NULL; } else { - rc = ompi_dpm.connect_accept (comm, root, port_name, send_first, &newcomp); + rc = ompi_dpm_connect_accept (comm, root, port_name, send_first, &newcomp); } error: - OPAL_CR_EXIT_LIBRARY(); - + /* close the port */ if (rank == root && !non_mpi) { - ompi_dpm.close_port(port_name); + ompi_dpm_close_port(port_name); } - + /* set array of errorcodes */ if (MPI_ERRCODES_IGNORE != array_of_errcodes) { if (NULL != newcomp) { diff --git a/ompi/mpi/c/comm_split.c b/ompi/mpi/c/comm_split.c index 42aaea668b3..6eb4cf5c623 100644 --- a/ompi/mpi/c/comm_split.c +++ b/ompi/mpi/c/comm_split.c @@ -5,15 +5,17 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2006 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ #include "ompi_config.h" @@ -25,12 +27,11 @@ #include "ompi/errhandler/errhandler.h" #include "ompi/memchecker.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_Comm_split = PMPI_Comm_split #endif - -#if OMPI_PROFILING_DEFINES -#include "ompi/mpi/c/profile/defines.h" +#define MPI_Comm_split PMPI_Comm_split #endif static const char FUNC_NAME[] = "MPI_Comm_split"; @@ -48,24 +49,23 @@ int MPI_Comm_split(MPI_Comm comm, int color, int key, MPI_Comm *newcomm) { OMPI_ERR_INIT_FINALIZE(FUNC_NAME); if ( ompi_comm_invalid ( comm )) { - return OMPI_ERRHANDLER_INVOKE(MPI_COMM_WORLD, MPI_ERR_COMM, + return OMPI_ERRHANDLER_INVOKE(MPI_COMM_WORLD, MPI_ERR_COMM, FUNC_NAME); } if ( color < 0 && MPI_UNDEFINED != color ) { - return OMPI_ERRHANDLER_INVOKE(comm, MPI_ERR_ARG, + return OMPI_ERRHANDLER_INVOKE(comm, MPI_ERR_ARG, FUNC_NAME); } - + if ( NULL == newcomm ) { - return OMPI_ERRHANDLER_INVOKE(comm, MPI_ERR_ARG, + return OMPI_ERRHANDLER_INVOKE(comm, MPI_ERR_ARG, FUNC_NAME); } } - OPAL_CR_ENTER_LIBRARY(); - rc = ompi_comm_split ( (ompi_communicator_t*)comm, color, key, + rc = ompi_comm_split ( (ompi_communicator_t*)comm, color, key, (ompi_communicator_t**)newcomm, false); OMPI_ERRHANDLER_RETURN ( rc, comm, rc, FUNC_NAME); } diff --git a/ompi/mpi/c/comm_split_type.c b/ompi/mpi/c/comm_split_type.c index 1fdece79a0f..8c6f80da80c 100644 --- a/ompi/mpi/c/comm_split_type.c +++ b/ompi/mpi/c/comm_split_type.c @@ -5,16 +5,18 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2006 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2012 Sandia National Laboratories. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -28,18 +30,17 @@ #include "ompi/info/info.h" #include "ompi/memchecker.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_Comm_split_type = PMPI_Comm_split_type #endif - -#if OMPI_PROFILING_DEFINES -#include "ompi/mpi/c/profile/defines.h" +#define MPI_Comm_split_type PMPI_Comm_split_type #endif static const char FUNC_NAME[] = "MPI_Comm_split_type"; -int MPI_Comm_split_type(MPI_Comm comm, int split_type, int key, +int MPI_Comm_split_type(MPI_Comm comm, int split_type, int key, MPI_Info info, MPI_Comm *newcomm) { int rc; @@ -52,7 +53,7 @@ int MPI_Comm_split_type(MPI_Comm comm, int split_type, int key, OMPI_ERR_INIT_FINALIZE(FUNC_NAME); if ( ompi_comm_invalid ( comm )) { - return OMPI_ERRHANDLER_INVOKE(MPI_COMM_WORLD, MPI_ERR_COMM, + return OMPI_ERRHANDLER_INVOKE(MPI_COMM_WORLD, MPI_ERR_COMM, FUNC_NAME); } @@ -62,36 +63,35 @@ int MPI_Comm_split_type(MPI_Comm comm, int split_type, int key, } if ( MPI_COMM_TYPE_SHARED != split_type && // Same as OMPI_COMM_TYPE_NODE - OMPI_COMM_TYPE_CLUSTER != split_type && - OMPI_COMM_TYPE_CU != split_type && - OMPI_COMM_TYPE_HOST != split_type && - OMPI_COMM_TYPE_BOARD != split_type && + OMPI_COMM_TYPE_CLUSTER != split_type && + OMPI_COMM_TYPE_CU != split_type && + OMPI_COMM_TYPE_HOST != split_type && + OMPI_COMM_TYPE_BOARD != split_type && OMPI_COMM_TYPE_NODE != split_type && // Same as MPI_COMM_TYPE_SHARED - OMPI_COMM_TYPE_NUMA != split_type && - OMPI_COMM_TYPE_SOCKET != split_type && - OMPI_COMM_TYPE_L3CACHE != split_type && - OMPI_COMM_TYPE_L2CACHE != split_type && - OMPI_COMM_TYPE_L1CACHE != split_type && - OMPI_COMM_TYPE_CORE != split_type && - OMPI_COMM_TYPE_HWTHREAD != split_type && + OMPI_COMM_TYPE_NUMA != split_type && + OMPI_COMM_TYPE_SOCKET != split_type && + OMPI_COMM_TYPE_L3CACHE != split_type && + OMPI_COMM_TYPE_L2CACHE != split_type && + OMPI_COMM_TYPE_L1CACHE != split_type && + OMPI_COMM_TYPE_CORE != split_type && + OMPI_COMM_TYPE_HWTHREAD != split_type && MPI_UNDEFINED != split_type ) { - return OMPI_ERRHANDLER_INVOKE(comm, MPI_ERR_ARG, + return OMPI_ERRHANDLER_INVOKE(comm, MPI_ERR_ARG, FUNC_NAME); } - + if ( NULL == newcomm ) { - return OMPI_ERRHANDLER_INVOKE(comm, MPI_ERR_ARG, + return OMPI_ERRHANDLER_INVOKE(comm, MPI_ERR_ARG, FUNC_NAME); } } - OPAL_CR_ENTER_LIBRARY(); if( (MPI_COMM_SELF == comm) && (MPI_UNDEFINED == split_type) ) { *newcomm = MPI_COMM_NULL; rc = MPI_SUCCESS; } else { - rc = ompi_comm_split_type( (ompi_communicator_t*)comm, split_type, key, info, + rc = ompi_comm_split_type( (ompi_communicator_t*)comm, split_type, key, info, (ompi_communicator_t**)newcomm); } OMPI_ERRHANDLER_RETURN ( rc, comm, rc, FUNC_NAME); diff --git a/ompi/mpi/c/comm_test_inter.c b/ompi/mpi/c/comm_test_inter.c index 29c31132557..141a90d3478 100644 --- a/ompi/mpi/c/comm_test_inter.c +++ b/ompi/mpi/c/comm_test_inter.c @@ -5,15 +5,17 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2006 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ #include "ompi_config.h" @@ -25,12 +27,11 @@ #include "ompi/errhandler/errhandler.h" #include "ompi/memchecker.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_Comm_test_inter = PMPI_Comm_test_inter #endif - -#if OMPI_PROFILING_DEFINES -#include "ompi/mpi/c/profile/defines.h" +#define MPI_Comm_test_inter PMPI_Comm_test_inter #endif static const char FUNC_NAME[] = "MPI_Comm_test_inter"; @@ -42,18 +43,16 @@ int MPI_Comm_test_inter(MPI_Comm comm, int *flag) { memchecker_comm(comm); ); - OPAL_CR_NOOP_PROGRESS(); - if ( MPI_PARAM_CHECK ) { OMPI_ERR_INIT_FINALIZE(FUNC_NAME); - + if ( ompi_comm_invalid ( comm ) ) { return OMPI_ERRHANDLER_INVOKE ( MPI_COMM_WORLD, MPI_ERR_COMM, FUNC_NAME); } - + if ( NULL == flag ) { - return OMPI_ERRHANDLER_INVOKE ( comm, MPI_ERR_ARG, + return OMPI_ERRHANDLER_INVOKE ( comm, MPI_ERR_ARG, FUNC_NAME); } } diff --git a/ompi/mpi/c/compare_and_swap.c b/ompi/mpi/c/compare_and_swap.c index 95531538e6f..9a93eb6d177 100644 --- a/ompi/mpi/c/compare_and_swap.c +++ b/ompi/mpi/c/compare_and_swap.c @@ -1,3 +1,4 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ /* * Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana * University Research and Technology @@ -5,16 +6,20 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2006 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2011 Sandia National Laboratories. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. + * Copyright (c) 2015 Los Alamos National Security, LLC. All rights + * reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ #include "ompi_config.h" @@ -28,19 +33,18 @@ #include "ompi/mca/osc/osc.h" #include "ompi/datatype/ompi_datatype.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_Compare_and_swap = PMPI_Compare_and_swap #endif - -#if OMPI_PROFILING_DEFINES -#include "ompi/mpi/c/profile/defines.h" +#define MPI_Compare_and_swap PMPI_Compare_and_swap #endif static const char FUNC_NAME[] = "MPI_Compare_and_swap"; -int MPI_Compare_and_swap(void *origin_addr, void *compare_addr, void *result_addr, - MPI_Datatype datatype, int target_rank, MPI_Aint target_disp, MPI_Win win) +int MPI_Compare_and_swap(const void *origin_addr, const void *compare_addr, void *result_addr, + MPI_Datatype datatype, int target_rank, MPI_Aint target_disp, MPI_Win win) { int rc; @@ -54,7 +58,7 @@ int MPI_Compare_and_swap(void *origin_addr, void *compare_addr, void *result_add } else if (ompi_win_peer_invalid(win, target_rank) && (MPI_PROC_NULL != target_rank)) { rc = MPI_ERR_RANK; - } else if ( target_disp < 0 ) { + } else if ( MPI_WIN_FLAVOR_DYNAMIC != win->w_flavor && target_disp < 0 ) { rc = MPI_ERR_DISP; } else { OMPI_CHECK_DATATYPE_FOR_ONE_SIDED(rc, datatype, 1); @@ -64,7 +68,6 @@ int MPI_Compare_and_swap(void *origin_addr, void *compare_addr, void *result_add if (MPI_PROC_NULL == target_rank) return MPI_SUCCESS; - OPAL_CR_ENTER_LIBRARY(); rc = win->w_osc_module->osc_compare_and_swap(origin_addr, compare_addr, result_addr, datatype, target_rank, target_disp, win); diff --git a/ompi/mpi/c/dims_create.c b/ompi/mpi/c/dims_create.c index e91c3611be5..d7f2d30b294 100644 --- a/ompi/mpi/c/dims_create.c +++ b/ompi/mpi/c/dims_create.c @@ -5,14 +5,16 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2014 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2014 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2012 Los Alamos National Security, LLC. All rights - * reserved. + * reserved. * Copyright (c) 2014 Intel, Inc. All rights reserved * Copyright (c) 2015 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -29,12 +31,11 @@ #include "ompi/communicator/communicator.h" #include "ompi/errhandler/errhandler.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_Dims_create = PMPI_Dims_create #endif - -#if OMPI_PROFILING_DEFINES -#include "ompi/mpi/c/profile/defines.h" +#define MPI_Dims_create PMPI_Dims_create #endif static const char FUNC_NAME[] = "MPI_Dims_create"; @@ -59,8 +60,6 @@ int MPI_Dims_create(int nnodes, int ndims, int dims[]) int *p; int err; - OPAL_CR_NOOP_PROGRESS(); - if (MPI_PARAM_CHECK) { OMPI_ERR_INIT_FINALIZE(FUNC_NAME); @@ -70,7 +69,7 @@ int MPI_Dims_create(int nnodes, int ndims, int dims[]) } if (1 > ndims) { - return OMPI_ERRHANDLER_INVOKE (MPI_COMM_WORLD, + return OMPI_ERRHANDLER_INVOKE (MPI_COMM_WORLD, MPI_ERR_DIMS, FUNC_NAME); } @@ -162,7 +161,7 @@ assignnodes(int ndim, int nfactor, int *pfacts, int **pdims) int f; int *p; int *pmin; - + if (0 >= ndim) { return MPI_ERR_DIMS; } @@ -177,7 +176,7 @@ assignnodes(int ndim, int nfactor, int *pfacts, int **pdims) for (i = 0, p = bins; i < ndim; ++i, ++p) { *p = 1; } - + /* Loop assigning factors from the highest to the lowest */ for (j = nfactor - 1; j >= 0; --j) { f = pfacts[j]; @@ -190,7 +189,7 @@ assignnodes(int ndim, int nfactor, int *pfacts, int **pdims) } *pmin *= f; } - + /* Sort dimensions in decreasing order (O(n^2) for now) */ for (i = 0, pmin = bins; i < ndim - 1; ++i, ++pmin) { for (j = i + 1, p = pmin + 1; j < ndim; ++j, ++p) { diff --git a/ompi/mpi/c/dist_graph_create.c b/ompi/mpi/c/dist_graph_create.c index b2bc6c2a804..efb3eb1857f 100644 --- a/ompi/mpi/c/dist_graph_create.c +++ b/ompi/mpi/c/dist_graph_create.c @@ -6,6 +6,11 @@ * Copyright (c) 2012-2013 Inria. All rights reserved. * Copyright (c) 2013 Los Alamos National Security, LLC. All rights * reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow * */ @@ -19,12 +24,11 @@ #include "ompi/mca/topo/topo.h" #include "ompi/mca/topo/base/base.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_Dist_graph_create = PMPI_Dist_graph_create #endif - -#if OMPI_PROFILING_DEFINES -#include "ompi/mpi/c/profile/defines.h" +#define MPI_Dist_graph_create PMPI_Dist_graph_create #endif static const char FUNC_NAME[] = "MPI_Dist_graph_create"; @@ -43,10 +47,10 @@ int MPI_Dist_graph_create(MPI_Comm comm_old, int n, const int sources[], if (MPI_PARAM_CHECK) { OMPI_ERR_INIT_FINALIZE(FUNC_NAME); if (ompi_comm_invalid(comm_old)) { - return OMPI_ERRHANDLER_INVOKE(MPI_COMM_WORLD, MPI_ERR_COMM, + return OMPI_ERRHANDLER_INVOKE(MPI_COMM_WORLD, MPI_ERR_COMM, FUNC_NAME); } else if (OMPI_COMM_IS_INTER(comm_old)) { - return OMPI_ERRHANDLER_INVOKE(MPI_COMM_WORLD, MPI_ERR_COMM, + return OMPI_ERRHANDLER_INVOKE(MPI_COMM_WORLD, MPI_ERR_COMM, FUNC_NAME); } else if (n < 0 || NULL == newcomm) { return OMPI_ERRHANDLER_INVOKE(comm_old, MPI_ERR_ARG, FUNC_NAME); @@ -80,12 +84,11 @@ int MPI_Dist_graph_create(MPI_Comm comm_old, int n, const int sources[], /* Ensure there is a topo attached to this communicator */ if(OMPI_SUCCESS != (err = mca_topo_base_comm_select(comm_old, NULL, &topo, OMPI_COMM_DIST_GRAPH))) { - return OMPI_ERRHANDLER_INVOKE(comm_old, err, FUNC_NAME); + return OMPI_ERRHANDLER_INVOKE(comm_old, err, FUNC_NAME); } - /* XXX -- CONST -- do not cast away const -- update mca/topo */ - err = topo->topo.dist_graph.dist_graph_create(topo, comm_old, n, (int *) sources, (int *) degrees, - (int *) destinations, (int *) weights, info, + err = topo->topo.dist_graph.dist_graph_create(topo, comm_old, n, sources, degrees, + destinations, weights, info, reorder, newcomm); OMPI_ERRHANDLER_RETURN(err, comm_old, err, FUNC_NAME); } diff --git a/ompi/mpi/c/dist_graph_create_adjacent.c b/ompi/mpi/c/dist_graph_create_adjacent.c index f74d1595344..bf2f2cfa979 100644 --- a/ompi/mpi/c/dist_graph_create_adjacent.c +++ b/ompi/mpi/c/dist_graph_create_adjacent.c @@ -10,8 +10,13 @@ * Copyright (c) 2013-2014 Los Alamos National Security, LLC. All rights * reserved. * Copyright (c) 2012-2013 Inria. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. + * $COPYRIGHT$ * - * Author(s): Torsten Hoefler + * Additional copyrights may follow + * + * Author(s): Torsten Hoefler * */ @@ -25,12 +30,11 @@ #include "ompi/mca/topo/topo.h" #include "ompi/mca/topo/base/base.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_Dist_graph_create_adjacent = PMPI_Dist_graph_create_adjacent #endif - -#if OMPI_PROFILING_DEFINES -#include "ompi/mpi/c/profile/defines.h" +#define MPI_Dist_graph_create_adjacent PMPI_Dist_graph_create_adjacent #endif static const char FUNC_NAME[] = "MPI_Dist_graph_create_adjacent"; @@ -91,13 +95,12 @@ int MPI_Dist_graph_create_adjacent(MPI_Comm comm_old, /* Ensure there is a topo attached to this communicator */ if(OMPI_SUCCESS != (err = mca_topo_base_comm_select(comm_old, NULL, &topo, OMPI_COMM_DIST_GRAPH))) { - return OMPI_ERRHANDLER_INVOKE(comm_old, err, FUNC_NAME); + return OMPI_ERRHANDLER_INVOKE(comm_old, err, FUNC_NAME); } - /* XXX -- CONST -- do not cast away const -- update mca/topo */ err = topo->topo.dist_graph.dist_graph_create_adjacent(topo, comm_old, indegree, - (int *) sources, (int *) sourceweights, outdegree, - (int *) destinations, (int *) destweights, info, + sources, sourceweights, outdegree, + destinations, destweights, info, reorder, comm_dist_graph); OMPI_ERRHANDLER_RETURN(err, comm_old, err, FUNC_NAME); } diff --git a/ompi/mpi/c/dist_graph_neighbors.c b/ompi/mpi/c/dist_graph_neighbors.c index a33db656e9b..e5819adb309 100644 --- a/ompi/mpi/c/dist_graph_neighbors.c +++ b/ompi/mpi/c/dist_graph_neighbors.c @@ -7,6 +7,11 @@ * reserved. * Copyright (c) 2009 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2012-2013 Inria. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow * */ @@ -20,12 +25,11 @@ #include "ompi/mca/topo/topo.h" #include "ompi/mca/topo/base/base.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_Dist_graph_neighbors = PMPI_Dist_graph_neighbors #endif - -#if OMPI_PROFILING_DEFINES -#include "ompi/mpi/c/profile/defines.h" +#define MPI_Dist_graph_neighbors PMPI_Dist_graph_neighbors #endif static const char FUNC_NAME[] = "MPI_Dist_graph_neighbors"; @@ -45,11 +49,11 @@ int MPI_Dist_graph_neighbors(MPI_Comm comm, int maxindegree, if (MPI_PARAM_CHECK) { OMPI_ERR_INIT_FINALIZE(FUNC_NAME); if (ompi_comm_invalid(comm)) { - return OMPI_ERRHANDLER_INVOKE(MPI_COMM_WORLD, MPI_ERR_COMM, + return OMPI_ERRHANDLER_INVOKE(MPI_COMM_WORLD, MPI_ERR_COMM, FUNC_NAME); } else if (maxindegree < 0 || maxoutdegree < 0 || - (maxindegree > 0 && - (NULL == sources || NULL == sourceweights)) || + (maxindegree > 0 && + (NULL == sources || NULL == sourceweights)) || (maxoutdegree > 0 && (NULL == destinations || NULL == destweights))) { return OMPI_ERRHANDLER_INVOKE(comm, MPI_ERR_ARG, FUNC_NAME); diff --git a/ompi/mpi/c/dist_graph_neighbors_count.c b/ompi/mpi/c/dist_graph_neighbors_count.c index daa216339ab..dfc3ddc582e 100644 --- a/ompi/mpi/c/dist_graph_neighbors_count.c +++ b/ompi/mpi/c/dist_graph_neighbors_count.c @@ -7,6 +7,11 @@ * reserved. * Copyright (c) 2009 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2012-2013 Inria. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow * */ #include @@ -22,12 +27,11 @@ #include "ompi/mca/topo/topo.h" #include "ompi/mca/topo/base/base.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_Dist_graph_neighbors_count = PMPI_Dist_graph_neighbors_count #endif - -#if OMPI_PROFILING_DEFINES -#include "ompi/mpi/c/profile/defines.h" +#define MPI_Dist_graph_neighbors_count PMPI_Dist_graph_neighbors_count #endif static const char FUNC_NAME[] = "MPI_Dist_graph_neighbors_count"; @@ -45,9 +49,9 @@ int MPI_Dist_graph_neighbors_count(MPI_Comm comm, int *inneighbors, if (MPI_PARAM_CHECK) { OMPI_ERR_INIT_FINALIZE(FUNC_NAME); if (ompi_comm_invalid(comm)) { - return OMPI_ERRHANDLER_INVOKE(MPI_COMM_WORLD, MPI_ERR_COMM, + return OMPI_ERRHANDLER_INVOKE(MPI_COMM_WORLD, MPI_ERR_COMM, FUNC_NAME); - } else if (NULL == inneighbors || NULL == outneighbors || + } else if (NULL == inneighbors || NULL == outneighbors || NULL == weighted) { return OMPI_ERRHANDLER_INVOKE(comm, MPI_ERR_ARG, FUNC_NAME); } diff --git a/ompi/mpi/c/errhandler_c2f.c b/ompi/mpi/c/errhandler_c2f.c index 399527f0533..7c0519ec5e8 100644 --- a/ompi/mpi/c/errhandler_c2f.c +++ b/ompi/mpi/c/errhandler_c2f.c @@ -5,15 +5,17 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2006-2012 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -24,12 +26,11 @@ #include "ompi/errhandler/errhandler.h" #include "ompi/mpi/fortran/base/fint_2_int.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_Errhandler_c2f = PMPI_Errhandler_c2f #endif - -#if OMPI_PROFILING_DEFINES -#include "ompi/mpi/c/profile/defines.h" +#define MPI_Errhandler_c2f PMPI_Errhandler_c2f #endif static const char FUNC_NAME[] = "MPI_Errhandler_c2f"; @@ -38,8 +39,6 @@ static const char FUNC_NAME[] = "MPI_Errhandler_c2f"; MPI_Fint MPI_Errhandler_c2f(MPI_Errhandler errhandler) { - OPAL_CR_NOOP_PROGRESS(); - /* Error checking */ if (MPI_PARAM_CHECK) { diff --git a/ompi/mpi/c/errhandler_create.c b/ompi/mpi/c/errhandler_create.c index d9d56f68fe4..cae93f98f40 100644 --- a/ompi/mpi/c/errhandler_create.c +++ b/ompi/mpi/c/errhandler_create.c @@ -5,14 +5,16 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -22,15 +24,13 @@ #include "ompi/communicator/communicator.h" #include "ompi/errhandler/errhandler.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_Errhandler_create = PMPI_Errhandler_create #endif - -#if OMPI_PROFILING_DEFINES -#include "ompi/mpi/c/profile/defines.h" +#define MPI_Errhandler_create PMPI_Errhandler_create #endif - int MPI_Errhandler_create(MPI_Handler_function *function, MPI_Errhandler *errhandler) { @@ -38,5 +38,5 @@ int MPI_Errhandler_create(MPI_Handler_function *function, /* This is a deprecated -- just turn around and call the real function */ - return MPI_Comm_create_errhandler(function, errhandler); + return PMPI_Comm_create_errhandler(function, errhandler); } diff --git a/ompi/mpi/c/errhandler_f2c.c b/ompi/mpi/c/errhandler_f2c.c index 2c6bc5031cf..bf4dce1994f 100644 --- a/ompi/mpi/c/errhandler_f2c.c +++ b/ompi/mpi/c/errhandler_f2c.c @@ -6,15 +6,17 @@ * Copyright (c) 2004-2007 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2006-2012 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -25,12 +27,11 @@ #include "ompi/errhandler/errhandler.h" #include "ompi/mpi/fortran/base/fint_2_int.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_Errhandler_f2c = PMPI_Errhandler_f2c #endif - -#if OMPI_PROFILING_DEFINES -#include "ompi/mpi/c/profile/defines.h" +#define MPI_Errhandler_f2c PMPI_Errhandler_f2c #endif static const char FUNC_NAME[] = "MPI_Errhandler_f2c"; @@ -40,20 +41,18 @@ MPI_Errhandler MPI_Errhandler_f2c(MPI_Fint errhandler_f) { int eh_index = OMPI_FINT_2_INT(errhandler_f); - OPAL_CR_NOOP_PROGRESS(); - /* Error checking */ if (MPI_PARAM_CHECK) { OMPI_ERR_INIT_FINALIZE(FUNC_NAME); } - + /* Per MPI-2:4.12.4, do not invoke an error handler if we get an invalid fortran handle. If we get an invalid fortran handle, return an invalid C handle. */ - if (eh_index < 0 || - eh_index >= + if (eh_index < 0 || + eh_index >= opal_pointer_array_get_size(&ompi_errhandler_f_to_c_table)) { return NULL; } diff --git a/ompi/mpi/c/errhandler_free.c b/ompi/mpi/c/errhandler_free.c index 2a6ab8b1cf6..c021e25e483 100644 --- a/ompi/mpi/c/errhandler_free.c +++ b/ompi/mpi/c/errhandler_free.c @@ -5,14 +5,16 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -23,12 +25,11 @@ #include "ompi/communicator/communicator.h" #include "ompi/errhandler/errhandler.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_Errhandler_free = PMPI_Errhandler_free #endif - -#if OMPI_PROFILING_DEFINES -#include "ompi/mpi/c/profile/defines.h" +#define MPI_Errhandler_free PMPI_Errhandler_free #endif static const char FUNC_NAME[] = "MPI_Errhandler_free"; @@ -37,8 +38,6 @@ static const char FUNC_NAME[] = "MPI_Errhandler_free"; int MPI_Errhandler_free(MPI_Errhandler *errhandler) { - OPAL_CR_NOOP_PROGRESS(); - /* Error checking */ if (MPI_PARAM_CHECK) { @@ -48,7 +47,7 @@ int MPI_Errhandler_free(MPI_Errhandler *errhandler) actually free the underlying intrinsic object). This is ugly but necessary -- see below. */ if (NULL == errhandler || - (ompi_errhandler_is_intrinsic(*errhandler) && + (ompi_errhandler_is_intrinsic(*errhandler) && 1 == (*errhandler)->super.obj_reference_count)) { return OMPI_ERRHANDLER_INVOKE(MPI_COMM_WORLD, MPI_ERR_ARG, "MPI_Errhandler_free"); @@ -68,7 +67,7 @@ int MPI_Errhandler_free(MPI_Errhandler *errhandler) MPI_Finalize(); return 0; } - + So decrease the refcount here. */ OBJ_RELEASE(*errhandler); diff --git a/ompi/mpi/c/errhandler_get.c b/ompi/mpi/c/errhandler_get.c index e91c1be380a..a289ef2c63e 100644 --- a/ompi/mpi/c/errhandler_get.c +++ b/ompi/mpi/c/errhandler_get.c @@ -5,14 +5,16 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -24,25 +26,22 @@ #include "ompi/errhandler/errhandler.h" #include "ompi/memchecker.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_Errhandler_get = PMPI_Errhandler_get #endif - -#if OMPI_PROFILING_DEFINES -#include "ompi/mpi/c/profile/defines.h" +#define MPI_Errhandler_get PMPI_Errhandler_get #endif static const char FUNC_NAME[] = "MPI_Errhandler_get"; -int MPI_Errhandler_get(MPI_Comm comm, MPI_Errhandler *errhandler) +int MPI_Errhandler_get(MPI_Comm comm, MPI_Errhandler *errhandler) { MEMCHECKER( memchecker_comm(comm); ); - OPAL_CR_NOOP_PROGRESS(); - if (MPI_PARAM_CHECK) { OMPI_ERR_INIT_FINALIZE(FUNC_NAME); } @@ -50,5 +49,5 @@ int MPI_Errhandler_get(MPI_Comm comm, MPI_Errhandler *errhandler) /* This is a deprecated -- just turn around and call the real function */ - return MPI_Comm_get_errhandler(comm, errhandler); + return PMPI_Comm_get_errhandler(comm, errhandler); } diff --git a/ompi/mpi/c/errhandler_set.c b/ompi/mpi/c/errhandler_set.c index 244927b6a58..7d572b92a30 100644 --- a/ompi/mpi/c/errhandler_set.c +++ b/ompi/mpi/c/errhandler_set.c @@ -5,14 +5,16 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -24,12 +26,11 @@ #include "ompi/errhandler/errhandler.h" #include "ompi/memchecker.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_Errhandler_set = PMPI_Errhandler_set #endif - -#if OMPI_PROFILING_DEFINES -#include "ompi/mpi/c/profile/defines.h" +#define MPI_Errhandler_set PMPI_Errhandler_set #endif static const char FUNC_NAME[] = "MPI_Errhandler_set"; @@ -41,8 +42,6 @@ int MPI_Errhandler_set(MPI_Comm comm, MPI_Errhandler errhandler) memchecker_comm(comm); ); - OPAL_CR_NOOP_PROGRESS(); - if (MPI_PARAM_CHECK) { OMPI_ERR_INIT_FINALIZE(FUNC_NAME); } @@ -50,5 +49,5 @@ int MPI_Errhandler_set(MPI_Comm comm, MPI_Errhandler errhandler) /* This is a deprecated -- just turn around and call the real function */ - return MPI_Comm_set_errhandler(comm, errhandler); + return PMPI_Comm_set_errhandler(comm, errhandler); } diff --git a/ompi/mpi/c/error_class.c b/ompi/mpi/c/error_class.c index b9472573cd5..94ae92c89a5 100644 --- a/ompi/mpi/c/error_class.c +++ b/ompi/mpi/c/error_class.c @@ -5,14 +5,16 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -24,22 +26,19 @@ #include "ompi/errhandler/errhandler.h" #include "ompi/errhandler/errcode.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_Error_class = PMPI_Error_class #endif - -#if OMPI_PROFILING_DEFINES -#include "ompi/mpi/c/profile/defines.h" +#define MPI_Error_class PMPI_Error_class #endif static const char FUNC_NAME[] = "MPI_Error_class"; -int MPI_Error_class(int errorcode, int *errorclass) +int MPI_Error_class(int errorcode, int *errorclass) { - OPAL_CR_NOOP_PROGRESS(); - if ( MPI_PARAM_CHECK ) { OMPI_ERR_INIT_FINALIZE(FUNC_NAME); @@ -48,8 +47,8 @@ int MPI_Error_class(int errorcode, int *errorclass) FUNC_NAME); } } - - + + *errorclass = ompi_mpi_errcode_get_class(errorcode); return MPI_SUCCESS; } diff --git a/ompi/mpi/c/error_string.c b/ompi/mpi/c/error_string.c index 3b4c7dab57c..1acd153ef97 100644 --- a/ompi/mpi/c/error_string.c +++ b/ompi/mpi/c/error_string.c @@ -5,15 +5,17 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2006 University of Houston. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ #include "ompi_config.h" @@ -25,23 +27,20 @@ #include "ompi/errhandler/errhandler.h" #include "ompi/errhandler/errcode.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_Error_string = PMPI_Error_string #endif - -#if OMPI_PROFILING_DEFINES -#include "ompi/mpi/c/profile/defines.h" +#define MPI_Error_string PMPI_Error_string #endif static const char FUNC_NAME[] = "MPI_Error_string"; -int MPI_Error_string(int errorcode, char *string, int *resultlen) +int MPI_Error_string(int errorcode, char *string, int *resultlen) { char *tmpstring; - OPAL_CR_NOOP_PROGRESS(); - if ( MPI_PARAM_CHECK ) { OMPI_ERR_INIT_FINALIZE(FUNC_NAME); @@ -50,10 +49,10 @@ int MPI_Error_string(int errorcode, char *string, int *resultlen) FUNC_NAME); } } - + tmpstring = ompi_mpi_errnum_get_string (errorcode); strncpy(string, tmpstring, MPI_MAX_ERROR_STRING); *resultlen = (int)strlen(string); - + return MPI_SUCCESS; } diff --git a/ompi/mpi/c/exscan.c b/ompi/mpi/c/exscan.c index e145b443bc9..d8498193022 100644 --- a/ompi/mpi/c/exscan.c +++ b/ompi/mpi/c/exscan.c @@ -6,16 +6,18 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2013 Los Alamos National Security, LLC. All rights * reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -29,19 +31,18 @@ #include "ompi/op/op.h" #include "ompi/memchecker.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_Exscan = PMPI_Exscan #endif - -#if OMPI_PROFILING_DEFINES -#include "ompi/mpi/c/profile/defines.h" +#define MPI_Exscan PMPI_Exscan #endif static const char FUNC_NAME[] = "MPI_Exscan"; int MPI_Exscan(const void *sendbuf, void *recvbuf, int count, - MPI_Datatype datatype, MPI_Op op, MPI_Comm comm) + MPI_Datatype datatype, MPI_Op op, MPI_Comm comm) { int err; @@ -56,7 +57,7 @@ int MPI_Exscan(const void *sendbuf, void *recvbuf, int count, err = MPI_SUCCESS; OMPI_ERR_INIT_FINALIZE(FUNC_NAME); if (ompi_comm_invalid(comm)) { - return OMPI_ERRHANDLER_INVOKE(MPI_COMM_WORLD, MPI_ERR_COMM, + return OMPI_ERRHANDLER_INVOKE(MPI_COMM_WORLD, MPI_ERR_COMM, FUNC_NAME); } @@ -82,13 +83,10 @@ int MPI_Exscan(const void *sendbuf, void *recvbuf, int count, return MPI_SUCCESS; } - OPAL_CR_ENTER_LIBRARY(); - /* Invoke the coll component to perform the back-end operation */ OBJ_RETAIN(op); - /* XXX -- CONST -- do not cast away const -- update mca/coll */ - err = comm->c_coll.coll_exscan((void *) sendbuf, recvbuf, count, + err = comm->c_coll.coll_exscan(sendbuf, recvbuf, count, datatype, op, comm, comm->c_coll.coll_exscan_module); OBJ_RELEASE(op); diff --git a/ompi/mpi/c/fetch_and_op.c b/ompi/mpi/c/fetch_and_op.c index 128e750670b..32c2fdc2fba 100644 --- a/ompi/mpi/c/fetch_and_op.c +++ b/ompi/mpi/c/fetch_and_op.c @@ -1,3 +1,4 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ /* * Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana * University Research and Technology @@ -5,16 +6,20 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2006 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2011 Sandia National Laboratories. All rights reserved. + * Copyright (c) 2015 Los Alamos National Security, LLC. All rights + * reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ #include "ompi_config.h" @@ -28,19 +33,18 @@ #include "ompi/mca/osc/osc.h" #include "ompi/datatype/ompi_datatype.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_Fetch_and_op = PMPI_Fetch_and_op #endif - -#if OMPI_PROFILING_DEFINES -#include "ompi/mpi/c/profile/defines.h" +#define MPI_Fetch_and_op PMPI_Fetch_and_op #endif static const char FUNC_NAME[] = "MPI_Fetch_and_op"; -int MPI_Fetch_and_op(void *origin_addr, void *result_addr, MPI_Datatype datatype, - int target_rank, MPI_Aint target_disp, MPI_Op op, MPI_Win win) +int MPI_Fetch_and_op(const void *origin_addr, void *result_addr, MPI_Datatype datatype, + int target_rank, MPI_Aint target_disp, MPI_Op op, MPI_Win win) { int rc; @@ -54,7 +58,7 @@ int MPI_Fetch_and_op(void *origin_addr, void *result_addr, MPI_Datatype datatype } else if (ompi_win_peer_invalid(win, target_rank) && (MPI_PROC_NULL != target_rank)) { rc = MPI_ERR_RANK; - } else if ( target_disp < 0 ) { + } else if ( MPI_WIN_FLAVOR_DYNAMIC != win->w_flavor && target_disp < 0 ) { rc = MPI_ERR_DISP; } else { OMPI_CHECK_DATATYPE_FOR_ONE_SIDED(rc, datatype, 1); @@ -64,8 +68,6 @@ int MPI_Fetch_and_op(void *origin_addr, void *result_addr, MPI_Datatype datatype if (MPI_PROC_NULL == target_rank) return MPI_SUCCESS; - OPAL_CR_ENTER_LIBRARY(); - rc = win->w_osc_module->osc_fetch_and_op(origin_addr, result_addr, datatype, target_rank, target_disp, op, win); OMPI_ERRHANDLER_RETURN(rc, win, rc, FUNC_NAME); diff --git a/ompi/mpi/c/file_c2f.c b/ompi/mpi/c/file_c2f.c index c563e547b25..bafdf662662 100644 --- a/ompi/mpi/c/file_c2f.c +++ b/ompi/mpi/c/file_c2f.c @@ -5,15 +5,17 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2006-2012 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -25,12 +27,11 @@ #include "ompi/mpi/fortran/base/fint_2_int.h" #include "ompi/file/file.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_File_c2f = PMPI_File_c2f #endif - -#if OMPI_PROFILING_DEFINES -#include "ompi/mpi/c/profile/defines.h" +#define MPI_File_c2f PMPI_File_c2f #endif static const char FUNC_NAME[] = "MPI_File_c2f"; @@ -39,8 +40,6 @@ static const char FUNC_NAME[] = "MPI_File_c2f"; MPI_Fint MPI_File_c2f(MPI_File file) { - OPAL_CR_NOOP_PROGRESS(); - if (MPI_PARAM_CHECK) { OMPI_ERR_INIT_FINALIZE(FUNC_NAME); @@ -56,6 +55,6 @@ MPI_Fint MPI_File_c2f(MPI_File file) return OMPI_INT_2_FINT(-1); } } - + return OMPI_INT_2_FINT(file->f_f_to_c_index); } diff --git a/ompi/mpi/c/file_call_errhandler.c b/ompi/mpi/c/file_call_errhandler.c index 6ca285e4bca..1f6783a1938 100644 --- a/ompi/mpi/c/file_call_errhandler.c +++ b/ompi/mpi/c/file_call_errhandler.c @@ -5,14 +5,16 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -24,22 +26,19 @@ #include "ompi/errhandler/errhandler.h" #include "ompi/file/file.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_File_call_errhandler = PMPI_File_call_errhandler #endif - -#if OMPI_PROFILING_DEFINES -#include "ompi/mpi/c/profile/defines.h" +#define MPI_File_call_errhandler PMPI_File_call_errhandler #endif static const char FUNC_NAME[] = "MPI_File_call_errhandler"; -int MPI_File_call_errhandler(MPI_File fh, int errorcode) +int MPI_File_call_errhandler(MPI_File fh, int errorcode) { - OPAL_CR_NOOP_PROGRESS(); - /* Error checking */ if (MPI_PARAM_CHECK) { diff --git a/ompi/mpi/c/file_close.c b/ompi/mpi/c/file_close.c index 76cc57c8cc6..ce11ba11bd2 100644 --- a/ompi/mpi/c/file_close.c +++ b/ompi/mpi/c/file_close.c @@ -5,14 +5,16 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -23,18 +25,17 @@ #include "ompi/errhandler/errhandler.h" #include "ompi/file/file.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_File_close = PMPI_File_close #endif - -#if OMPI_PROFILING_DEFINES -#include "ompi/mpi/c/profile/defines.h" +#define MPI_File_close PMPI_File_close #endif static const char FUNC_NAME[] = "MPI_File_close"; -int MPI_File_close(MPI_File *fh) +int MPI_File_close(MPI_File *fh) { int rc; @@ -51,8 +52,6 @@ int MPI_File_close(MPI_File *fh) } } - OPAL_CR_ENTER_LIBRARY(); - /* Release the MPI_File; the destructor releases the component, zeroes out fiels, etc. */ diff --git a/ompi/mpi/c/file_create_errhandler.c b/ompi/mpi/c/file_create_errhandler.c index d7fb3de578a..81094d3cf1f 100644 --- a/ompi/mpi/c/file_create_errhandler.c +++ b/ompi/mpi/c/file_create_errhandler.c @@ -5,15 +5,17 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2008-2009 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -25,12 +27,11 @@ #include "ompi/errhandler/errhandler.h" #include "ompi/file/file.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_File_create_errhandler = PMPI_File_create_errhandler #endif - -#if OMPI_PROFILING_DEFINES -#include "ompi/mpi/c/profile/defines.h" +#define MPI_File_create_errhandler PMPI_File_create_errhandler #endif static const char FUNC_NAME[] = "MPI_File_create_errhandler"; @@ -44,18 +45,16 @@ int MPI_File_create_errhandler(MPI_File_errhandler_function *function, if (MPI_PARAM_CHECK) { OMPI_ERR_INIT_FINALIZE(FUNC_NAME); - if (NULL == function || + if (NULL == function || NULL == errhandler) { return OMPI_ERRHANDLER_INVOKE(MPI_COMM_WORLD, MPI_ERR_ARG, "MPI_File_create_errhandler"); } } - OPAL_CR_ENTER_LIBRARY(); - /* Create and cache the errhandler. Sets a refcount of 1. */ - *errhandler = + *errhandler = ompi_errhandler_create(OMPI_ERRHANDLER_TYPE_FILE, (ompi_errhandler_generic_handler_fn_t*) function, OMPI_ERRHANDLER_LANG_C); diff --git a/ompi/mpi/c/file_delete.c b/ompi/mpi/c/file_delete.c index 6915e3f0456..04abd2be595 100644 --- a/ompi/mpi/c/file_delete.c +++ b/ompi/mpi/c/file_delete.c @@ -6,16 +6,18 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2013 Los Alamos National Security, LLC. All rights * reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -29,12 +31,11 @@ #include "ompi/mca/io/io.h" #include "ompi/mca/io/base/base.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_File_delete = PMPI_File_delete #endif - -#if OMPI_PROFILING_DEFINES -#include "ompi/mpi/c/profile/defines.h" +#define MPI_File_delete PMPI_File_delete #endif static const char FUNC_NAME[] = "MPI_File_delete"; @@ -65,19 +66,16 @@ int MPI_File_delete(const char *filename, MPI_Info info) /* The io framework is only initialized lazily. If it hasn't already been initialized, do so now (note that MPI_FILE_OPEN and MPI_FILE_DELETE are the only two places that it will be - initialized). We might want to add a check to see if the + initialized). We might want to add a check to see if the framework is open instead of just incrementing the open count. */ if (OMPI_SUCCESS != (rc = mca_base_framework_open(&ompi_io_base_framework, 0))) { return OMPI_ERRHANDLER_INVOKE(MPI_FILE_NULL, rc, FUNC_NAME); } - OPAL_CR_ENTER_LIBRARY(); - /* Since there is no MPI_File handle associated with this function, the MCA has to do a selection and perform the action */ - /* XXX -- CONST -- do not cast away const -- update mca/io */ - rc = mca_io_base_delete((char *) filename, info); + rc = mca_io_base_delete(filename, info); OMPI_ERRHANDLER_RETURN(rc, MPI_FILE_NULL, rc, FUNC_NAME); } diff --git a/ompi/mpi/c/file_f2c.c b/ompi/mpi/c/file_f2c.c index 3dc3c742d8e..3640a2a9ae5 100644 --- a/ompi/mpi/c/file_f2c.c +++ b/ompi/mpi/c/file_f2c.c @@ -6,15 +6,17 @@ * Copyright (c) 2004-2007 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2006-2012 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -26,23 +28,20 @@ #include "ompi/mpi/fortran/base/fint_2_int.h" #include "ompi/file/file.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_File_f2c = PMPI_File_f2c #endif - -#if OMPI_PROFILING_DEFINES -#include "ompi/mpi/c/profile/defines.h" +#define MPI_File_f2c PMPI_File_f2c #endif static const char FUNC_NAME[] = "MPI_File_f2c"; -MPI_File MPI_File_f2c(MPI_Fint file_f) +MPI_File MPI_File_f2c(MPI_Fint file_f) { int file_index = OMPI_FINT_2_INT(file_f); - OPAL_CR_NOOP_PROGRESS(); - if (MPI_PARAM_CHECK) { OMPI_ERR_INIT_FINALIZE(FUNC_NAME); } @@ -52,7 +51,7 @@ MPI_File MPI_File_f2c(MPI_Fint file_f) return an invalid C handle. */ if (file_index < 0 || - file_index >= + file_index >= opal_pointer_array_get_size(&ompi_file_f_to_c_table)) { return NULL; } diff --git a/ompi/mpi/c/file_get_amode.c b/ompi/mpi/c/file_get_amode.c index 6dd37b96dfa..f000948862a 100644 --- a/ompi/mpi/c/file_get_amode.c +++ b/ompi/mpi/c/file_get_amode.c @@ -5,15 +5,17 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2008 Sun Microsystems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -24,12 +26,11 @@ #include "ompi/errhandler/errhandler.h" #include "ompi/file/file.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_File_get_amode = PMPI_File_get_amode #endif - -#if OMPI_PROFILING_DEFINES -#include "ompi/mpi/c/profile/defines.h" +#define MPI_File_get_amode PMPI_File_get_amode #endif static const char FUNC_NAME[] = "MPI_File_get_amode"; @@ -51,8 +52,6 @@ int MPI_File_get_amode(MPI_File fh, int *amode) OMPI_ERRHANDLER_CHECK(rc, fh, rc, FUNC_NAME); } - OPAL_CR_ENTER_LIBRARY(); - /* Call the back-end io component function */ switch (fh->f_io_version) { @@ -67,6 +66,6 @@ int MPI_File_get_amode(MPI_File fh, int *amode) } /* All done */ - + OMPI_ERRHANDLER_RETURN(rc, fh, rc, FUNC_NAME); } diff --git a/ompi/mpi/c/file_get_atomicity.c b/ompi/mpi/c/file_get_atomicity.c index 78c448c1cc5..e1424643c03 100644 --- a/ompi/mpi/c/file_get_atomicity.c +++ b/ompi/mpi/c/file_get_atomicity.c @@ -5,15 +5,17 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2008 Sun Microsystems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -24,12 +26,11 @@ #include "ompi/errhandler/errhandler.h" #include "ompi/file/file.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_File_get_atomicity = PMPI_File_get_atomicity #endif - -#if OMPI_PROFILING_DEFINES -#include "ompi/mpi/c/profile/defines.h" +#define MPI_File_get_atomicity PMPI_File_get_atomicity #endif static const char FUNC_NAME[] = "MPI_File_get_atomicity"; @@ -51,8 +52,6 @@ int MPI_File_get_atomicity(MPI_File fh, int *flag) OMPI_ERRHANDLER_CHECK(rc, fh, rc, FUNC_NAME); } - OPAL_CR_ENTER_LIBRARY(); - /* Call the back-end io component function */ switch (fh->f_io_version) { @@ -67,6 +66,6 @@ int MPI_File_get_atomicity(MPI_File fh, int *flag) } /* All done */ - + OMPI_ERRHANDLER_RETURN(rc, fh, rc, FUNC_NAME); } diff --git a/ompi/mpi/c/file_get_byte_offset.c b/ompi/mpi/c/file_get_byte_offset.c index 5c09db19f62..92ab09d4523 100644 --- a/ompi/mpi/c/file_get_byte_offset.c +++ b/ompi/mpi/c/file_get_byte_offset.c @@ -5,15 +5,17 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2008 Sun Microsystems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -24,12 +26,11 @@ #include "ompi/errhandler/errhandler.h" #include "ompi/file/file.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_File_get_byte_offset = PMPI_File_get_byte_offset #endif - -#if OMPI_PROFILING_DEFINES -#include "ompi/mpi/c/profile/defines.h" +#define MPI_File_get_byte_offset PMPI_File_get_byte_offset #endif static const char FUNC_NAME[] = "MPI_File_get_byte_offset"; @@ -52,8 +53,6 @@ int MPI_File_get_byte_offset(MPI_File fh, MPI_Offset offset, OMPI_ERRHANDLER_CHECK(rc, fh, rc, FUNC_NAME); } - OPAL_CR_ENTER_LIBRARY(); - /* Call the back-end io component function */ switch (fh->f_io_version) { @@ -68,6 +67,6 @@ int MPI_File_get_byte_offset(MPI_File fh, MPI_Offset offset, } /* All done */ - + OMPI_ERRHANDLER_RETURN(rc, fh, rc, FUNC_NAME); } diff --git a/ompi/mpi/c/file_get_errhandler.c b/ompi/mpi/c/file_get_errhandler.c index 49eb8c7e60e..b969e9c9207 100644 --- a/ompi/mpi/c/file_get_errhandler.c +++ b/ompi/mpi/c/file_get_errhandler.c @@ -1,3 +1,4 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ /* * Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana * University Research and Technology @@ -5,15 +6,19 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2008 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. + * Copyright (c) 2016 Los Alamos National Security, LLC. All rights + * reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -25,23 +30,20 @@ #include "ompi/errhandler/errhandler.h" #include "ompi/file/file.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_File_get_errhandler = PMPI_File_get_errhandler #endif - -#if OMPI_PROFILING_DEFINES -#include "ompi/mpi/c/profile/defines.h" +#define MPI_File_get_errhandler PMPI_File_get_errhandler #endif static const char FUNC_NAME[] = "MPI_File_get_errhandler"; -int MPI_File_get_errhandler( MPI_File file, MPI_Errhandler *errhandler) +int MPI_File_get_errhandler( MPI_File file, MPI_Errhandler *errhandler) { MPI_Errhandler tmp; - OPAL_CR_NOOP_PROGRESS(); - /* Error checking */ if (MPI_PARAM_CHECK) { @@ -64,7 +66,7 @@ int MPI_File_get_errhandler( MPI_File file, MPI_Errhandler *errhandler) error_handler became atomic. */ do { tmp = file->error_handler; - } while (!OPAL_ATOMIC_CMPSET(&(file->error_handler), tmp, tmp)); + } while (!OPAL_ATOMIC_CMPSET_PTR(&(file->error_handler), tmp, tmp)); /* Retain the errhandler, corresponding to object refcount decrease in errhandler_free.c. */ diff --git a/ompi/mpi/c/file_get_group.c b/ompi/mpi/c/file_get_group.c index 3a802b3e158..3e09186377b 100644 --- a/ompi/mpi/c/file_get_group.c +++ b/ompi/mpi/c/file_get_group.c @@ -5,14 +5,16 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -24,12 +26,11 @@ #include "ompi/errhandler/errhandler.h" #include "ompi/file/file.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_File_get_group = PMPI_File_get_group #endif - -#if OMPI_PROFILING_DEFINES -#include "ompi/mpi/c/profile/defines.h" +#define MPI_File_get_group PMPI_File_get_group #endif static const char FUNC_NAME[] = "MPI_File_get_group"; @@ -51,8 +52,6 @@ int MPI_File_get_group(MPI_File fh, MPI_Group *group) OMPI_ERRHANDLER_CHECK(rc, fh, rc, FUNC_NAME); } - OPAL_CR_ENTER_LIBRARY(); - /* Does not need to invoke a back-end io function */ rc = ompi_comm_group (fh->f_comm, group); diff --git a/ompi/mpi/c/file_get_info.c b/ompi/mpi/c/file_get_info.c index b2b2447cc1d..5248fa37cfd 100644 --- a/ompi/mpi/c/file_get_info.c +++ b/ompi/mpi/c/file_get_info.c @@ -5,15 +5,17 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2008 Sun Microsystems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -24,12 +26,11 @@ #include "ompi/errhandler/errhandler.h" #include "ompi/file/file.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_File_get_info = PMPI_File_get_info #endif - -#if OMPI_PROFILING_DEFINES -#include "ompi/mpi/c/profile/defines.h" +#define MPI_File_get_info PMPI_File_get_info #endif static const char FUNC_NAME[] = "MPI_File_get_info"; @@ -51,8 +52,6 @@ int MPI_File_get_info(MPI_File fh, MPI_Info *info_used) OMPI_ERRHANDLER_CHECK(rc, fh, rc, FUNC_NAME); } - OPAL_CR_ENTER_LIBRARY(); - /* Call the back-end io component function */ switch (fh->f_io_version) { @@ -67,6 +66,6 @@ int MPI_File_get_info(MPI_File fh, MPI_Info *info_used) } /* All done */ - + OMPI_ERRHANDLER_RETURN(rc, fh, rc, FUNC_NAME); } diff --git a/ompi/mpi/c/file_get_position.c b/ompi/mpi/c/file_get_position.c index e161b1f4c0c..1f0d44c9c7e 100644 --- a/ompi/mpi/c/file_get_position.c +++ b/ompi/mpi/c/file_get_position.c @@ -5,15 +5,17 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2008 Sun Microsystems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -24,12 +26,11 @@ #include "ompi/errhandler/errhandler.h" #include "ompi/file/file.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_File_get_position = PMPI_File_get_position #endif - -#if OMPI_PROFILING_DEFINES -#include "ompi/mpi/c/profile/defines.h" +#define MPI_File_get_position PMPI_File_get_position #endif static const char FUNC_NAME[] = "MPI_File_get_position"; @@ -51,8 +52,6 @@ int MPI_File_get_position(MPI_File fh, MPI_Offset *offset) OMPI_ERRHANDLER_CHECK(rc, fh, rc, FUNC_NAME); } - OPAL_CR_ENTER_LIBRARY(); - /* Call the back-end io component function */ switch (fh->f_io_version) { @@ -67,6 +66,6 @@ int MPI_File_get_position(MPI_File fh, MPI_Offset *offset) } /* All done */ - + OMPI_ERRHANDLER_RETURN(rc, fh, rc, FUNC_NAME); } diff --git a/ompi/mpi/c/file_get_position_shared.c b/ompi/mpi/c/file_get_position_shared.c index e8bdd26337e..944d3dd1008 100644 --- a/ompi/mpi/c/file_get_position_shared.c +++ b/ompi/mpi/c/file_get_position_shared.c @@ -5,15 +5,17 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2008 Sun Microsystems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -24,12 +26,11 @@ #include "ompi/errhandler/errhandler.h" #include "ompi/file/file.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_File_get_position_shared = PMPI_File_get_position_shared #endif - -#if OMPI_PROFILING_DEFINES -#include "ompi/mpi/c/profile/defines.h" +#define MPI_File_get_position_shared PMPI_File_get_position_shared #endif static const char FUNC_NAME[] = "MPI_File_get_position_shared"; @@ -51,8 +52,6 @@ int MPI_File_get_position_shared(MPI_File fh, MPI_Offset *offset) OMPI_ERRHANDLER_CHECK(rc, fh, rc, FUNC_NAME); } - OPAL_CR_ENTER_LIBRARY(); - /* Call the back-end io component function */ switch (fh->f_io_version) { @@ -67,6 +66,6 @@ int MPI_File_get_position_shared(MPI_File fh, MPI_Offset *offset) } /* All done */ - + OMPI_ERRHANDLER_RETURN(rc, fh, rc, FUNC_NAME); } diff --git a/ompi/mpi/c/file_get_size.c b/ompi/mpi/c/file_get_size.c index fa103179cc5..e9d94efed19 100644 --- a/ompi/mpi/c/file_get_size.c +++ b/ompi/mpi/c/file_get_size.c @@ -5,15 +5,17 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2008 Sun Microsystems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -24,12 +26,11 @@ #include "ompi/errhandler/errhandler.h" #include "ompi/file/file.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_File_get_size = PMPI_File_get_size #endif - -#if OMPI_PROFILING_DEFINES -#include "ompi/mpi/c/profile/defines.h" +#define MPI_File_get_size PMPI_File_get_size #endif static const char FUNC_NAME[] = "MPI_File_get_size"; @@ -51,8 +52,6 @@ int MPI_File_get_size(MPI_File fh, MPI_Offset *size) OMPI_ERRHANDLER_CHECK(rc, fh, rc, FUNC_NAME); } - OPAL_CR_ENTER_LIBRARY(); - /* Call the back-end io component function */ switch (fh->f_io_version) { @@ -67,6 +66,6 @@ int MPI_File_get_size(MPI_File fh, MPI_Offset *size) } /* All done */ - + OMPI_ERRHANDLER_RETURN(rc, fh, rc, FUNC_NAME); } diff --git a/ompi/mpi/c/file_get_type_extent.c b/ompi/mpi/c/file_get_type_extent.c index 1e5beccd9c9..79d1b9ec705 100644 --- a/ompi/mpi/c/file_get_type_extent.c +++ b/ompi/mpi/c/file_get_type_extent.c @@ -5,15 +5,17 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2008 Sun Microsystems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -25,12 +27,11 @@ #include "ompi/file/file.h" #include "ompi/memchecker.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_File_get_type_extent = PMPI_File_get_type_extent #endif - -#if OMPI_PROFILING_DEFINES -#include "ompi/mpi/c/profile/defines.h" +#define MPI_File_get_type_extent PMPI_File_get_type_extent #endif static const char FUNC_NAME[] = "MPI_File_get_type_extent"; @@ -57,8 +58,6 @@ int MPI_File_get_type_extent(MPI_File fh, MPI_Datatype datatype, OMPI_ERRHANDLER_CHECK(rc, fh, rc, FUNC_NAME); } - OPAL_CR_ENTER_LIBRARY(); - /* Call the back-end io component function */ switch (fh->f_io_version) { @@ -73,6 +72,6 @@ int MPI_File_get_type_extent(MPI_File fh, MPI_Datatype datatype, } /* All done */ - + OMPI_ERRHANDLER_RETURN(rc, fh, rc, FUNC_NAME); } diff --git a/ompi/mpi/c/file_get_view.c b/ompi/mpi/c/file_get_view.c index 2780762ea85..ac828b3e125 100644 --- a/ompi/mpi/c/file_get_view.c +++ b/ompi/mpi/c/file_get_view.c @@ -5,15 +5,17 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2008 Sun Microsystems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -24,12 +26,11 @@ #include "ompi/errhandler/errhandler.h" #include "ompi/file/file.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_File_get_view = PMPI_File_get_view #endif - -#if OMPI_PROFILING_DEFINES -#include "ompi/mpi/c/profile/defines.h" +#define MPI_File_get_view PMPI_File_get_view #endif static const char FUNC_NAME[] = "MPI_File_get_view"; @@ -55,8 +56,6 @@ int MPI_File_get_view(MPI_File fh, MPI_Offset *disp, OMPI_ERRHANDLER_CHECK(rc, fh, rc, FUNC_NAME); } - OPAL_CR_ENTER_LIBRARY(); - /* Call the back-end io component function */ switch (fh->f_io_version) { @@ -71,6 +70,6 @@ int MPI_File_get_view(MPI_File fh, MPI_Offset *disp, } /* All done */ - + OMPI_ERRHANDLER_RETURN(rc, fh, rc, FUNC_NAME); } diff --git a/ompi/mpi/c/file_iread.c b/ompi/mpi/c/file_iread.c index e8b54937be3..029d81151c5 100644 --- a/ompi/mpi/c/file_iread.c +++ b/ompi/mpi/c/file_iread.c @@ -5,15 +5,17 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2008 Sun Microsystems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -28,18 +30,17 @@ #include "ompi/mca/io/base/io_base_request.h" #include "ompi/memchecker.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_File_iread = PMPI_File_iread #endif - -#if OMPI_PROFILING_DEFINES -#include "ompi/mpi/c/profile/defines.h" +#define MPI_File_iread PMPI_File_iread #endif static const char FUNC_NAME[] = "MPI_File_iread"; -int MPI_File_iread(MPI_File fh, void *buf, int count, +int MPI_File_iread(MPI_File fh, void *buf, int count, MPI_Datatype datatype, MPI_Request *request) { int rc; @@ -47,7 +48,7 @@ int MPI_File_iread(MPI_File fh, void *buf, int count, MEMCHECKER( memchecker_datatype(datatype); ); - + if (MPI_PARAM_CHECK) { rc = MPI_SUCCESS; OMPI_ERR_INIT_FINALIZE(FUNC_NAME); @@ -64,8 +65,6 @@ int MPI_File_iread(MPI_File fh, void *buf, int count, OMPI_ERRHANDLER_CHECK(rc, fh, rc, FUNC_NAME); } - OPAL_CR_ENTER_LIBRARY(); - /* Call the back-end io component function */ switch (fh->f_io_version) { case MCA_IO_BASE_V_2_0_0: diff --git a/ompi/mpi/c/file_iread_all.c b/ompi/mpi/c/file_iread_all.c new file mode 100644 index 00000000000..f76da8c27d3 --- /dev/null +++ b/ompi/mpi/c/file_iread_all.c @@ -0,0 +1,83 @@ +/* + * Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2005 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2008 Sun Microsystems, Inc. All rights reserved. + * Copyright (c) 2015 University of Houston. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include "ompi_config.h" + +#include "ompi/mpi/c/bindings.h" +#include "ompi/runtime/params.h" +#include "ompi/errhandler/errhandler.h" +#include "ompi/datatype/ompi_datatype.h" +#include "ompi/file/file.h" +#include "ompi/mca/io/io.h" +#include "ompi/mca/io/base/io_base_request.h" +#include "ompi/memchecker.h" + +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS +#pragma weak MPI_File_iread_all = PMPI_File_iread_all +#endif +#define MPI_File_iread_all PMPI_File_iread_all +#endif + +static const char FUNC_NAME[] = "MPI_File_iread_all"; + + +int MPI_File_iread_all(MPI_File fh, void *buf, int count, + MPI_Datatype datatype, MPI_Request *request) +{ + int rc; + + MEMCHECKER( + memchecker_datatype(datatype); + ); + + if (MPI_PARAM_CHECK) { + rc = MPI_SUCCESS; + OMPI_ERR_INIT_FINALIZE(FUNC_NAME); + if (ompi_file_invalid(fh)) { + fh = MPI_FILE_NULL; + rc = MPI_ERR_FILE; + } else if (count < 0) { + rc = MPI_ERR_COUNT; + } else if (NULL == request) { + rc = MPI_ERR_REQUEST; + } else { + OMPI_CHECK_DATATYPE_FOR_RECV(rc, datatype, count); + } + OMPI_ERRHANDLER_CHECK(rc, fh, rc, FUNC_NAME); + } + + /* Call the back-end io component function */ + switch (fh->f_io_version) { + case MCA_IO_BASE_V_2_0_0: + rc = fh->f_io_selected_module.v2_0_0. + io_module_file_iread_all(fh, buf, count, datatype, request); + break; + + default: + rc = MPI_ERR_INTERN; + break; + } + + /* All done */ + OMPI_ERRHANDLER_RETURN(rc, fh, rc, FUNC_NAME); +} diff --git a/ompi/mpi/c/file_iread_at.c b/ompi/mpi/c/file_iread_at.c index 9fac64e4d8a..b10df7a7451 100644 --- a/ompi/mpi/c/file_iread_at.c +++ b/ompi/mpi/c/file_iread_at.c @@ -5,15 +5,17 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2008 Sun Microsystems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -28,12 +30,11 @@ #include "ompi/mca/io/base/io_base_request.h" #include "ompi/memchecker.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_File_iread_at = PMPI_File_iread_at #endif - -#if OMPI_PROFILING_DEFINES -#include "ompi/mpi/c/profile/defines.h" +#define MPI_File_iread_at PMPI_File_iread_at #endif static const char FUNC_NAME[] = "MPI_File_iread_at"; @@ -47,7 +48,7 @@ int MPI_File_iread_at(MPI_File fh, MPI_Offset offset, void *buf, MEMCHECKER( memchecker_datatype(datatype); ); - + if (MPI_PARAM_CHECK) { rc = MPI_SUCCESS; OMPI_ERR_INIT_FINALIZE(FUNC_NAME); @@ -64,13 +65,11 @@ int MPI_File_iread_at(MPI_File fh, MPI_Offset offset, void *buf, OMPI_ERRHANDLER_CHECK(rc, fh, rc, FUNC_NAME); } - OPAL_CR_ENTER_LIBRARY(); - /* Call the back-end io component function */ switch (fh->f_io_version) { case MCA_IO_BASE_V_2_0_0: rc = fh->f_io_selected_module.v2_0_0. - io_module_file_iread_at(fh, offset, buf, count, datatype, + io_module_file_iread_at(fh, offset, buf, count, datatype, request); break; @@ -80,6 +79,6 @@ int MPI_File_iread_at(MPI_File fh, MPI_Offset offset, void *buf, } /* All done */ - + OMPI_ERRHANDLER_RETURN(rc, fh, rc, FUNC_NAME); } diff --git a/ompi/mpi/c/file_iread_at_all.c b/ompi/mpi/c/file_iread_at_all.c new file mode 100644 index 00000000000..909a4e33263 --- /dev/null +++ b/ompi/mpi/c/file_iread_at_all.c @@ -0,0 +1,85 @@ +/* + * Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2005 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2008 Sun Microsystems, Inc. All rights reserved. + * Copyright (c) 2015 University of Houston. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include "ompi_config.h" + +#include "ompi/mpi/c/bindings.h" +#include "ompi/runtime/params.h" +#include "ompi/errhandler/errhandler.h" +#include "ompi/datatype/ompi_datatype.h" +#include "ompi/file/file.h" +#include "ompi/mca/io/io.h" +#include "ompi/mca/io/base/io_base_request.h" +#include "ompi/memchecker.h" + +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS +#pragma weak MPI_File_iread_at_all = PMPI_File_iread_at_all +#endif +#define MPI_File_iread_at_all PMPI_File_iread_at_all +#endif + +static const char FUNC_NAME[] = "MPI_File_iread_at_all"; + + +int MPI_File_iread_at_all(MPI_File fh, MPI_Offset offset, void *buf, + int count, MPI_Datatype datatype, MPI_Request *request) +{ + int rc; + + MEMCHECKER( + memchecker_datatype(datatype); + ); + + if (MPI_PARAM_CHECK) { + rc = MPI_SUCCESS; + OMPI_ERR_INIT_FINALIZE(FUNC_NAME); + if (ompi_file_invalid(fh)) { + fh = MPI_FILE_NULL; + rc = MPI_ERR_FILE; + } else if (count < 0) { + rc = MPI_ERR_COUNT; + } else if (NULL == request) { + rc = MPI_ERR_REQUEST; + } else { + OMPI_CHECK_DATATYPE_FOR_RECV(rc, datatype, count); + } + OMPI_ERRHANDLER_CHECK(rc, fh, rc, FUNC_NAME); + } + + /* Call the back-end io component function */ + switch (fh->f_io_version) { + case MCA_IO_BASE_V_2_0_0: + rc = fh->f_io_selected_module.v2_0_0. + io_module_file_iread_at_all(fh, offset, buf, count, datatype, + request); + break; + + default: + rc = MPI_ERR_INTERN; + break; + } + + /* All done */ + + OMPI_ERRHANDLER_RETURN(rc, fh, rc, FUNC_NAME); +} diff --git a/ompi/mpi/c/file_iread_shared.c b/ompi/mpi/c/file_iread_shared.c index d30c7b3206c..6c49f751e43 100644 --- a/ompi/mpi/c/file_iread_shared.c +++ b/ompi/mpi/c/file_iread_shared.c @@ -5,15 +5,17 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2008 Sun Microsystems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -28,12 +30,11 @@ #include "ompi/mca/io/base/io_base_request.h" #include "ompi/memchecker.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_File_iread_shared = PMPI_File_iread_shared #endif - -#if OMPI_PROFILING_DEFINES -#include "ompi/mpi/c/profile/defines.h" +#define MPI_File_iread_shared PMPI_File_iread_shared #endif static const char FUNC_NAME[] = "MPI_File_iread_shared"; @@ -47,7 +48,7 @@ int MPI_File_iread_shared(MPI_File fh, void *buf, int count, MEMCHECKER( memchecker_datatype(datatype); ); - + if (MPI_PARAM_CHECK) { rc = MPI_SUCCESS; OMPI_ERR_INIT_FINALIZE(FUNC_NAME); @@ -64,8 +65,6 @@ int MPI_File_iread_shared(MPI_File fh, void *buf, int count, OMPI_ERRHANDLER_CHECK(rc, fh, rc, FUNC_NAME); } - OPAL_CR_ENTER_LIBRARY(); - /* Call the back-end io component function */ switch (fh->f_io_version) { case MCA_IO_BASE_V_2_0_0: @@ -79,6 +78,6 @@ int MPI_File_iread_shared(MPI_File fh, void *buf, int count, } /* All done */ - + OMPI_ERRHANDLER_RETURN(rc, fh, rc, FUNC_NAME); } diff --git a/ompi/mpi/c/file_iwrite.c b/ompi/mpi/c/file_iwrite.c index 58205181949..e2f04db2f83 100644 --- a/ompi/mpi/c/file_iwrite.c +++ b/ompi/mpi/c/file_iwrite.c @@ -6,17 +6,19 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2008 Sun Microsystems, Inc. All rights reserved. * Copyright (c) 2013 Los Alamos National Security, LLC. All rights * reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -31,12 +33,11 @@ #include "ompi/mca/io/base/io_base_request.h" #include "ompi/memchecker.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_File_iwrite = PMPI_File_iwrite #endif - -#if OMPI_PROFILING_DEFINES -#include "ompi/mpi/c/profile/defines.h" +#define MPI_File_iwrite PMPI_File_iwrite #endif static const char FUNC_NAME[] = "MPI_File_iwrite"; @@ -68,14 +69,11 @@ int MPI_File_iwrite(MPI_File fh, const void *buf, int count, MPI_Datatype OMPI_ERRHANDLER_CHECK(rc, fh, rc, FUNC_NAME); } - OPAL_CR_ENTER_LIBRARY(); - /* Call the back-end io component function */ switch (fh->f_io_version) { case MCA_IO_BASE_V_2_0_0: - /* XXX -- CONST -- do not cast away const -- update mca/io */ rc = fh->f_io_selected_module.v2_0_0. - io_module_file_iwrite(fh, (void *) buf, count, datatype, request); + io_module_file_iwrite(fh, buf, count, datatype, request); break; default: diff --git a/ompi/mpi/c/file_iwrite_all.c b/ompi/mpi/c/file_iwrite_all.c new file mode 100644 index 00000000000..ddae3c3b0fe --- /dev/null +++ b/ompi/mpi/c/file_iwrite_all.c @@ -0,0 +1,87 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ +/* + * Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2005 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2008 Sun Microsystems, Inc. All rights reserved. + * Copyright (c) 2013 Los Alamos National Security, LLC. All rights + * reserved. + * Copyright (c) 2015 University of Houston. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include "ompi_config.h" + +#include "ompi/mpi/c/bindings.h" +#include "ompi/runtime/params.h" +#include "ompi/errhandler/errhandler.h" +#include "ompi/datatype/ompi_datatype.h" +#include "ompi/file/file.h" +#include "ompi/mca/io/io.h" +#include "ompi/mca/io/base/io_base_request.h" +#include "ompi/memchecker.h" + +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS +#pragma weak MPI_File_iwrite_all = PMPI_File_iwrite_all +#endif +#define MPI_File_iwrite_all PMPI_File_iwrite_all +#endif + +static const char FUNC_NAME[] = "MPI_File_iwrite_all"; + + +int MPI_File_iwrite_all(MPI_File fh, const void *buf, int count, MPI_Datatype + datatype, MPI_Request *request) +{ + int rc; + + MEMCHECKER( + memchecker_datatype(datatype); + memchecker_call(&opal_memchecker_base_isdefined, buf, count, datatype); + ); + + if (MPI_PARAM_CHECK) { + rc = MPI_SUCCESS; + OMPI_ERR_INIT_FINALIZE(FUNC_NAME); + if (ompi_file_invalid(fh)) { + fh = MPI_FILE_NULL; + rc = MPI_ERR_FILE; + } else if (count < 0) { + rc = MPI_ERR_COUNT; + } else if (NULL == request) { + rc = MPI_ERR_REQUEST; + } else { + OMPI_CHECK_DATATYPE_FOR_SEND(rc, datatype, count); + } + OMPI_ERRHANDLER_CHECK(rc, fh, rc, FUNC_NAME); + } + + /* Call the back-end io component function */ + switch (fh->f_io_version) { + case MCA_IO_BASE_V_2_0_0: + rc = fh->f_io_selected_module.v2_0_0. + io_module_file_iwrite_all(fh, buf, count, datatype, request); + break; + + default: + rc = MPI_ERR_INTERN; + break; + } + + /* All done */ + OMPI_ERRHANDLER_RETURN(rc, fh, rc, FUNC_NAME); +} diff --git a/ompi/mpi/c/file_iwrite_at.c b/ompi/mpi/c/file_iwrite_at.c index 5185d77571e..7dca089212d 100644 --- a/ompi/mpi/c/file_iwrite_at.c +++ b/ompi/mpi/c/file_iwrite_at.c @@ -6,17 +6,19 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2008 Sun Microsystems, Inc. All rights reserved. * Copyright (c) 2013 Los Alamos National Security, LLC. All rights * reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -31,19 +33,18 @@ #include "ompi/mca/io/base/io_base_request.h" #include "ompi/memchecker.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_File_iwrite_at = PMPI_File_iwrite_at #endif - -#if OMPI_PROFILING_DEFINES -#include "ompi/mpi/c/profile/defines.h" +#define MPI_File_iwrite_at PMPI_File_iwrite_at #endif static const char FUNC_NAME[] = "MPI_File_iwrite_at"; int MPI_File_iwrite_at(MPI_File fh, MPI_Offset offset, const void *buf, - int count, MPI_Datatype datatype, + int count, MPI_Datatype datatype, MPI_Request *request) { int rc; @@ -69,14 +70,11 @@ int MPI_File_iwrite_at(MPI_File fh, MPI_Offset offset, const void *buf, OMPI_ERRHANDLER_CHECK(rc, fh, rc, FUNC_NAME); } - OPAL_CR_ENTER_LIBRARY(); - /* Call the back-end io component function */ switch (fh->f_io_version) { case MCA_IO_BASE_V_2_0_0: - /* XXX -- CONST -- do not cast away const -- update mca/io */ rc = fh->f_io_selected_module.v2_0_0. - io_module_file_iwrite_at(fh, offset, (void *) buf, count, datatype, + io_module_file_iwrite_at(fh, offset, buf, count, datatype, request); break; @@ -86,6 +84,6 @@ int MPI_File_iwrite_at(MPI_File fh, MPI_Offset offset, const void *buf, } /* All done */ - + OMPI_ERRHANDLER_RETURN(rc, fh, rc, FUNC_NAME); } diff --git a/ompi/mpi/c/file_iwrite_at_all.c b/ompi/mpi/c/file_iwrite_at_all.c new file mode 100644 index 00000000000..d81cfcc5ff4 --- /dev/null +++ b/ompi/mpi/c/file_iwrite_at_all.c @@ -0,0 +1,90 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ +/* + * Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2005 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2008 Sun Microsystems, Inc. All rights reserved. + * Copyright (c) 2013 Los Alamos National Security, LLC. All rights + * reserved. + * Copyright (c) 2015 University of Houston. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include "ompi_config.h" + +#include "ompi/mpi/c/bindings.h" +#include "ompi/runtime/params.h" +#include "ompi/errhandler/errhandler.h" +#include "ompi/datatype/ompi_datatype.h" +#include "ompi/file/file.h" +#include "ompi/mca/io/io.h" +#include "ompi/mca/io/base/io_base_request.h" +#include "ompi/memchecker.h" + +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS +#pragma weak MPI_File_iwrite_at_all = PMPI_File_iwrite_at_all +#endif +#define MPI_File_iwrite_at_all PMPI_File_iwrite_at_all +#endif + +static const char FUNC_NAME[] = "MPI_File_iwrite_at_all"; + + +int MPI_File_iwrite_at_all(MPI_File fh, MPI_Offset offset, const void *buf, + int count, MPI_Datatype datatype, + MPI_Request *request) +{ + int rc; + + MEMCHECKER( + memchecker_datatype(datatype); + memchecker_call(&opal_memchecker_base_isdefined, buf, count, datatype); + ); + + if (MPI_PARAM_CHECK) { + rc = MPI_SUCCESS; + OMPI_ERR_INIT_FINALIZE(FUNC_NAME); + if (ompi_file_invalid(fh)) { + fh = MPI_FILE_NULL; + rc = MPI_ERR_FILE; + } else if (count < 0) { + rc = MPI_ERR_COUNT; + } else if (NULL == request) { + rc = MPI_ERR_REQUEST; + } else { + OMPI_CHECK_DATATYPE_FOR_SEND(rc, datatype, count); + } + OMPI_ERRHANDLER_CHECK(rc, fh, rc, FUNC_NAME); + } + + /* Call the back-end io component function */ + switch (fh->f_io_version) { + case MCA_IO_BASE_V_2_0_0: + rc = fh->f_io_selected_module.v2_0_0. + io_module_file_iwrite_at_all(fh, offset, buf, count, datatype, + request); + break; + + default: + rc = MPI_ERR_INTERN; + break; + } + + /* All done */ + + OMPI_ERRHANDLER_RETURN(rc, fh, rc, FUNC_NAME); +} diff --git a/ompi/mpi/c/file_iwrite_shared.c b/ompi/mpi/c/file_iwrite_shared.c index 102e06f822e..90af5affa15 100644 --- a/ompi/mpi/c/file_iwrite_shared.c +++ b/ompi/mpi/c/file_iwrite_shared.c @@ -6,17 +6,19 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2008 Sun Microsystems, Inc. All rights reserved. * Copyright (c) 2013 Los Alamos National Security, LLC. All rights * reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -31,12 +33,11 @@ #include "ompi/mca/io/base/io_base_request.h" #include "ompi/memchecker.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_File_iwrite_shared = PMPI_File_iwrite_shared #endif - -#if OMPI_PROFILING_DEFINES -#include "ompi/mpi/c/profile/defines.h" +#define MPI_File_iwrite_shared PMPI_File_iwrite_shared #endif static const char FUNC_NAME[] = "MPI_File_iwrite_shared"; @@ -67,14 +68,11 @@ int MPI_File_iwrite_shared(MPI_File fh, const void *buf, int count, OMPI_ERRHANDLER_CHECK(rc, fh, rc, FUNC_NAME); } - OPAL_CR_ENTER_LIBRARY(); - /* Call the back-end io component function */ switch (fh->f_io_version) { case MCA_IO_BASE_V_2_0_0: - /* XXX -- CONST -- do not cast away const -- update mca/io */ rc = fh->f_io_selected_module.v2_0_0. - io_module_file_iwrite_shared(fh, (void *) buf, count, datatype, request); + io_module_file_iwrite_shared(fh, buf, count, datatype, request); break; default: @@ -83,6 +81,6 @@ int MPI_File_iwrite_shared(MPI_File fh, const void *buf, int count, } /* All done */ - + OMPI_ERRHANDLER_RETURN(rc, fh, rc, FUNC_NAME); } diff --git a/ompi/mpi/c/file_open.c b/ompi/mpi/c/file_open.c index 3cdf3cccc6b..a243e75589d 100644 --- a/ompi/mpi/c/file_open.c +++ b/ompi/mpi/c/file_open.c @@ -6,17 +6,19 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. - * Copyright (c) 2013 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2013 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2013 Los Alamos National Security, LLC. All rights * reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -32,12 +34,11 @@ #include "ompi/mca/io/base/base.h" #include "ompi/memchecker.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_File_open = PMPI_File_open #endif - -#if OMPI_PROFILING_DEFINES -#include "ompi/mpi/c/profile/defines.h" +#define MPI_File_open PMPI_File_open #endif static const char FUNC_NAME[] = "MPI_File_open"; @@ -65,7 +66,7 @@ int MPI_File_open(MPI_Comm comm, const char *filename, int amode, return OMPI_ERRHANDLER_INVOKE (comm, MPI_ERR_COMM, FUNC_NAME); } - + } /* Note that MPI-2:9.7 (p265 in the ps; p261 in the pdf) says that @@ -85,13 +86,10 @@ int MPI_File_open(MPI_Comm comm, const char *filename, int amode, return OMPI_ERRHANDLER_INVOKE(MPI_FILE_NULL, rc, FUNC_NAME); } - OPAL_CR_ENTER_LIBRARY(); - /* Create an empty MPI_File handle */ *fh = MPI_FILE_NULL; - /* XXX -- CONST -- do not cast away const -- update mca/io */ - rc = ompi_file_open(comm, (char *) filename, amode, info, fh); + rc = ompi_file_open(comm, filename, amode, info, fh); /* Creating the file handle also selects a component to use, creates a module, and calls file_open() on the module. So diff --git a/ompi/mpi/c/file_preallocate.c b/ompi/mpi/c/file_preallocate.c index 23f764f35e1..0f13c4d91ed 100644 --- a/ompi/mpi/c/file_preallocate.c +++ b/ompi/mpi/c/file_preallocate.c @@ -5,15 +5,17 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2008 Sun Microsystems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -24,12 +26,11 @@ #include "ompi/errhandler/errhandler.h" #include "ompi/file/file.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_File_preallocate = PMPI_File_preallocate #endif - -#if OMPI_PROFILING_DEFINES -#include "ompi/mpi/c/profile/defines.h" +#define MPI_File_preallocate PMPI_File_preallocate #endif static const char FUNC_NAME[] = "MPI_File_preallocate"; @@ -49,8 +50,6 @@ int MPI_File_preallocate(MPI_File fh, MPI_Offset size) OMPI_ERRHANDLER_CHECK(rc, fh, rc, FUNC_NAME); } - OPAL_CR_ENTER_LIBRARY(); - /* Call the back-end io component function */ switch (fh->f_io_version) { @@ -65,6 +64,6 @@ int MPI_File_preallocate(MPI_File fh, MPI_Offset size) } /* All done */ - + OMPI_ERRHANDLER_RETURN(rc, fh, rc, FUNC_NAME); } diff --git a/ompi/mpi/c/file_read.c b/ompi/mpi/c/file_read.c index df8eeed62cd..4df5785a4e4 100644 --- a/ompi/mpi/c/file_read.c +++ b/ompi/mpi/c/file_read.c @@ -5,15 +5,17 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2008 Sun Microsystems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -26,18 +28,17 @@ #include "ompi/datatype/ompi_datatype.h" #include "ompi/memchecker.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_File_read = PMPI_File_read #endif - -#if OMPI_PROFILING_DEFINES -#include "ompi/mpi/c/profile/defines.h" +#define MPI_File_read PMPI_File_read #endif static const char FUNC_NAME[] = "MPI_File_read"; -int MPI_File_read(MPI_File fh, void *buf, int count, +int MPI_File_read(MPI_File fh, void *buf, int count, MPI_Datatype datatype, MPI_Status *status) { int rc; @@ -45,7 +46,7 @@ int MPI_File_read(MPI_File fh, void *buf, int count, MEMCHECKER( memchecker_datatype(datatype); ); - + if (MPI_PARAM_CHECK) { rc = MPI_SUCCESS; OMPI_ERR_INIT_FINALIZE(FUNC_NAME); @@ -60,8 +61,6 @@ int MPI_File_read(MPI_File fh, void *buf, int count, OMPI_ERRHANDLER_CHECK(rc, fh, rc, FUNC_NAME); } - OPAL_CR_ENTER_LIBRARY(); - /* Call the back-end io component function */ switch (fh->f_io_version) { @@ -76,6 +75,6 @@ int MPI_File_read(MPI_File fh, void *buf, int count, } /* All done */ - + OMPI_ERRHANDLER_RETURN(rc, fh, rc, FUNC_NAME); } diff --git a/ompi/mpi/c/file_read_all.c b/ompi/mpi/c/file_read_all.c index b6fbe84f5a7..9d8d7cdc170 100644 --- a/ompi/mpi/c/file_read_all.c +++ b/ompi/mpi/c/file_read_all.c @@ -5,15 +5,17 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2008 Sun Microsystems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -26,12 +28,11 @@ #include "ompi/datatype/ompi_datatype.h" #include "ompi/memchecker.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_File_read_all = PMPI_File_read_all #endif - -#if OMPI_PROFILING_DEFINES -#include "ompi/mpi/c/profile/defines.h" +#define MPI_File_read_all PMPI_File_read_all #endif static const char FUNC_NAME[] = "MPI_File_read_all"; @@ -60,8 +61,6 @@ int MPI_File_read_all(MPI_File fh, void *buf, int count, MPI_Datatype OMPI_ERRHANDLER_CHECK(rc, fh, rc, FUNC_NAME); } - OPAL_CR_ENTER_LIBRARY(); - /* Call the back-end io component function */ switch (fh->f_io_version) { @@ -76,6 +75,6 @@ int MPI_File_read_all(MPI_File fh, void *buf, int count, MPI_Datatype } /* All done */ - + OMPI_ERRHANDLER_RETURN(rc, fh, rc, FUNC_NAME); } diff --git a/ompi/mpi/c/file_read_all_begin.c b/ompi/mpi/c/file_read_all_begin.c index 9ba2781b47f..c818623f877 100644 --- a/ompi/mpi/c/file_read_all_begin.c +++ b/ompi/mpi/c/file_read_all_begin.c @@ -5,15 +5,17 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2008 Sun Microsystems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -26,26 +28,25 @@ #include "ompi/datatype/ompi_datatype.h" #include "ompi/memchecker.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_File_read_all_begin = PMPI_File_read_all_begin #endif - -#if OMPI_PROFILING_DEFINES -#include "ompi/mpi/c/profile/defines.h" +#define MPI_File_read_all_begin PMPI_File_read_all_begin #endif static const char FUNC_NAME[] = "MPI_File_read_all_begin"; int MPI_File_read_all_begin(MPI_File fh, void *buf, int count, - MPI_Datatype datatype) + MPI_Datatype datatype) { int rc; MEMCHECKER( memchecker_datatype(datatype); ); - + if (MPI_PARAM_CHECK) { rc = MPI_SUCCESS; OMPI_ERR_INIT_FINALIZE(FUNC_NAME); @@ -60,8 +61,6 @@ int MPI_File_read_all_begin(MPI_File fh, void *buf, int count, OMPI_ERRHANDLER_CHECK(rc, fh, rc, FUNC_NAME); } - OPAL_CR_ENTER_LIBRARY(); - /* Call the back-end io component function */ switch (fh->f_io_version) { @@ -76,6 +75,6 @@ int MPI_File_read_all_begin(MPI_File fh, void *buf, int count, } /* All done */ - + OMPI_ERRHANDLER_RETURN(rc, fh, rc, FUNC_NAME); } diff --git a/ompi/mpi/c/file_read_all_end.c b/ompi/mpi/c/file_read_all_end.c index 06d4058f50a..0fa6d7a2a9e 100644 --- a/ompi/mpi/c/file_read_all_end.c +++ b/ompi/mpi/c/file_read_all_end.c @@ -5,15 +5,17 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2008 Sun Microsystems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -24,12 +26,11 @@ #include "ompi/errhandler/errhandler.h" #include "ompi/file/file.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_File_read_all_end = PMPI_File_read_all_end #endif - -#if OMPI_PROFILING_DEFINES -#include "ompi/mpi/c/profile/defines.h" +#define MPI_File_read_all_end PMPI_File_read_all_end #endif static const char FUNC_NAME[] = "MPI_File_read_all_end"; @@ -49,7 +50,6 @@ int MPI_File_read_all_end(MPI_File fh, void *buf, MPI_Status *status) OMPI_ERRHANDLER_CHECK(rc, fh, rc, FUNC_NAME); } - OPAL_CR_ENTER_LIBRARY(); /* Call the back-end io component function */ @@ -65,6 +65,6 @@ int MPI_File_read_all_end(MPI_File fh, void *buf, MPI_Status *status) } /* All done */ - + OMPI_ERRHANDLER_RETURN(rc, fh, rc, FUNC_NAME); } diff --git a/ompi/mpi/c/file_read_at.c b/ompi/mpi/c/file_read_at.c index 8ada29e79f5..87eb9d3ec2d 100644 --- a/ompi/mpi/c/file_read_at.c +++ b/ompi/mpi/c/file_read_at.c @@ -5,15 +5,17 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2008 Sun Microsystems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -26,12 +28,11 @@ #include "ompi/datatype/ompi_datatype.h" #include "ompi/memchecker.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_File_read_at = PMPI_File_read_at #endif - -#if OMPI_PROFILING_DEFINES -#include "ompi/mpi/c/profile/defines.h" +#define MPI_File_read_at PMPI_File_read_at #endif static const char FUNC_NAME[] = "MPI_File_read_at"; @@ -45,7 +46,7 @@ int MPI_File_read_at(MPI_File fh, MPI_Offset offset, void *buf, MEMCHECKER( memchecker_datatype(datatype); ); - + if (MPI_PARAM_CHECK) { rc = MPI_SUCCESS; OMPI_ERR_INIT_FINALIZE(FUNC_NAME); @@ -60,8 +61,6 @@ int MPI_File_read_at(MPI_File fh, MPI_Offset offset, void *buf, OMPI_ERRHANDLER_CHECK(rc, fh, rc, FUNC_NAME); } - OPAL_CR_ENTER_LIBRARY(); - /* Call the back-end io component function */ switch (fh->f_io_version) { @@ -76,6 +75,6 @@ int MPI_File_read_at(MPI_File fh, MPI_Offset offset, void *buf, } /* All done */ - + OMPI_ERRHANDLER_RETURN(rc, fh, rc, FUNC_NAME); } diff --git a/ompi/mpi/c/file_read_at_all.c b/ompi/mpi/c/file_read_at_all.c index 6f13146d176..9f988da1db6 100644 --- a/ompi/mpi/c/file_read_at_all.c +++ b/ompi/mpi/c/file_read_at_all.c @@ -5,15 +5,17 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2008 Sun Microsystems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -26,19 +28,18 @@ #include "ompi/datatype/ompi_datatype.h" #include "ompi/memchecker.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_File_read_at_all = PMPI_File_read_at_all #endif - -#if OMPI_PROFILING_DEFINES -#include "ompi/mpi/c/profile/defines.h" +#define MPI_File_read_at_all PMPI_File_read_at_all #endif static const char FUNC_NAME[] = "MPI_File_read_at_all"; int MPI_File_read_at_all(MPI_File fh, MPI_Offset offset, void *buf, - int count, MPI_Datatype datatype, + int count, MPI_Datatype datatype, MPI_Status *status) { int rc; @@ -46,7 +47,7 @@ int MPI_File_read_at_all(MPI_File fh, MPI_Offset offset, void *buf, MEMCHECKER( memchecker_datatype(datatype); ); - + if (MPI_PARAM_CHECK) { rc = MPI_SUCCESS; OMPI_ERR_INIT_FINALIZE(FUNC_NAME); @@ -61,14 +62,12 @@ int MPI_File_read_at_all(MPI_File fh, MPI_Offset offset, void *buf, OMPI_ERRHANDLER_CHECK(rc, fh, rc, FUNC_NAME); } - OPAL_CR_ENTER_LIBRARY(); - /* Call the back-end io component function */ switch (fh->f_io_version) { case MCA_IO_BASE_V_2_0_0: rc = fh->f_io_selected_module.v2_0_0. - io_module_file_read_at_all(fh, offset, buf, count, datatype, + io_module_file_read_at_all(fh, offset, buf, count, datatype, status); break; @@ -78,6 +77,6 @@ int MPI_File_read_at_all(MPI_File fh, MPI_Offset offset, void *buf, } /* All done */ - + OMPI_ERRHANDLER_RETURN(rc, fh, rc, FUNC_NAME); } diff --git a/ompi/mpi/c/file_read_at_all_begin.c b/ompi/mpi/c/file_read_at_all_begin.c index 9a1c1f1e2ea..99b4a512311 100644 --- a/ompi/mpi/c/file_read_at_all_begin.c +++ b/ompi/mpi/c/file_read_at_all_begin.c @@ -5,15 +5,17 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2008 Sun Microsystems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -26,12 +28,11 @@ #include "ompi/datatype/ompi_datatype.h" #include "ompi/memchecker.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_File_read_at_all_begin = PMPI_File_read_at_all_begin #endif - -#if OMPI_PROFILING_DEFINES -#include "ompi/mpi/c/profile/defines.h" +#define MPI_File_read_at_all_begin PMPI_File_read_at_all_begin #endif static const char FUNC_NAME[] = "MPI_File_read_at_all_begin"; @@ -45,7 +46,7 @@ int MPI_File_read_at_all_begin(MPI_File fh, MPI_Offset offset, void *buf, MEMCHECKER( memchecker_datatype(datatype); ); - + if (MPI_PARAM_CHECK) { rc = MPI_SUCCESS; OMPI_ERR_INIT_FINALIZE(FUNC_NAME); @@ -60,8 +61,6 @@ int MPI_File_read_at_all_begin(MPI_File fh, MPI_Offset offset, void *buf, OMPI_ERRHANDLER_CHECK(rc, fh, rc, FUNC_NAME); } - OPAL_CR_ENTER_LIBRARY(); - /* Call the back-end io component function */ switch (fh->f_io_version) { @@ -76,6 +75,6 @@ int MPI_File_read_at_all_begin(MPI_File fh, MPI_Offset offset, void *buf, } /* All done */ - + OMPI_ERRHANDLER_RETURN(rc, fh, rc, FUNC_NAME); } diff --git a/ompi/mpi/c/file_read_at_all_end.c b/ompi/mpi/c/file_read_at_all_end.c index 4a8b71f0ae7..b83ebdc265d 100644 --- a/ompi/mpi/c/file_read_at_all_end.c +++ b/ompi/mpi/c/file_read_at_all_end.c @@ -5,15 +5,17 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2008 Sun Microsystems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -24,12 +26,11 @@ #include "ompi/errhandler/errhandler.h" #include "ompi/file/file.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_File_read_at_all_end = PMPI_File_read_at_all_end #endif - -#if OMPI_PROFILING_DEFINES -#include "ompi/mpi/c/profile/defines.h" +#define MPI_File_read_at_all_end PMPI_File_read_at_all_end #endif static const char FUNC_NAME[] = "MPI_File_read_at_all_end"; @@ -49,8 +50,6 @@ int MPI_File_read_at_all_end(MPI_File fh, void *buf, MPI_Status *status) OMPI_ERRHANDLER_CHECK(rc, fh, rc, FUNC_NAME); } - OPAL_CR_ENTER_LIBRARY(); - /* Call the back-end io component function */ switch (fh->f_io_version) { @@ -65,6 +64,6 @@ int MPI_File_read_at_all_end(MPI_File fh, void *buf, MPI_Status *status) } /* All done */ - + OMPI_ERRHANDLER_RETURN(rc, fh, rc, FUNC_NAME); } diff --git a/ompi/mpi/c/file_read_ordered.c b/ompi/mpi/c/file_read_ordered.c index 5cd29e66aec..b784e411ed6 100644 --- a/ompi/mpi/c/file_read_ordered.c +++ b/ompi/mpi/c/file_read_ordered.c @@ -5,15 +5,17 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2008 Sun Microsystems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -25,12 +27,11 @@ #include "ompi/file/file.h" #include "ompi/datatype/ompi_datatype.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_File_read_ordered = PMPI_File_read_ordered #endif - -#if OMPI_PROFILING_DEFINES -#include "ompi/mpi/c/profile/defines.h" +#define MPI_File_read_ordered PMPI_File_read_ordered #endif static const char FUNC_NAME[] = "MPI_File_read_ordered"; @@ -55,8 +56,6 @@ int MPI_File_read_ordered(MPI_File fh, void *buf, int count, OMPI_ERRHANDLER_CHECK(rc, fh, rc, FUNC_NAME); } - OPAL_CR_ENTER_LIBRARY(); - /* Call the back-end io component function */ switch (fh->f_io_version) { @@ -71,6 +70,6 @@ int MPI_File_read_ordered(MPI_File fh, void *buf, int count, } /* All done */ - + OMPI_ERRHANDLER_RETURN(rc, fh, rc, FUNC_NAME); } diff --git a/ompi/mpi/c/file_read_ordered_begin.c b/ompi/mpi/c/file_read_ordered_begin.c index eb925672bab..d9a339f3fac 100644 --- a/ompi/mpi/c/file_read_ordered_begin.c +++ b/ompi/mpi/c/file_read_ordered_begin.c @@ -5,15 +5,17 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2008 Sun Microsystems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -26,12 +28,11 @@ #include "ompi/datatype/ompi_datatype.h" #include "ompi/memchecker.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_File_read_ordered_begin = PMPI_File_read_ordered_begin #endif - -#if OMPI_PROFILING_DEFINES -#include "ompi/mpi/c/profile/defines.h" +#define MPI_File_read_ordered_begin PMPI_File_read_ordered_begin #endif static const char FUNC_NAME[] = "MPI_File_read_ordered_begin"; @@ -45,7 +46,7 @@ int MPI_File_read_ordered_begin(MPI_File fh, void *buf, int count, MEMCHECKER( memchecker_datatype(datatype); ); - + if (MPI_PARAM_CHECK) { rc = MPI_SUCCESS; OMPI_ERR_INIT_FINALIZE(FUNC_NAME); @@ -60,8 +61,6 @@ int MPI_File_read_ordered_begin(MPI_File fh, void *buf, int count, OMPI_ERRHANDLER_CHECK(rc, fh, rc, FUNC_NAME); } - OPAL_CR_ENTER_LIBRARY(); - /* Call the back-end io component function */ switch (fh->f_io_version) { @@ -76,6 +75,6 @@ int MPI_File_read_ordered_begin(MPI_File fh, void *buf, int count, } /* All done */ - + OMPI_ERRHANDLER_RETURN(rc, fh, rc, FUNC_NAME); } diff --git a/ompi/mpi/c/file_read_ordered_end.c b/ompi/mpi/c/file_read_ordered_end.c index 22a2d07daf5..179932cc07d 100644 --- a/ompi/mpi/c/file_read_ordered_end.c +++ b/ompi/mpi/c/file_read_ordered_end.c @@ -5,15 +5,17 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2008 Sun Microsystems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -24,12 +26,11 @@ #include "ompi/errhandler/errhandler.h" #include "ompi/file/file.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_File_read_ordered_end = PMPI_File_read_ordered_end #endif - -#if OMPI_PROFILING_DEFINES -#include "ompi/mpi/c/profile/defines.h" +#define MPI_File_read_ordered_end PMPI_File_read_ordered_end #endif static const char FUNC_NAME[] = "MPI_File_read_ordered_end"; @@ -49,8 +50,6 @@ int MPI_File_read_ordered_end(MPI_File fh, void *buf, MPI_Status *status) OMPI_ERRHANDLER_CHECK(rc, fh, rc, FUNC_NAME); } - OPAL_CR_ENTER_LIBRARY(); - /* Call the back-end io component function */ switch (fh->f_io_version) { @@ -65,6 +64,6 @@ int MPI_File_read_ordered_end(MPI_File fh, void *buf, MPI_Status *status) } /* All done */ - + OMPI_ERRHANDLER_RETURN(rc, fh, rc, FUNC_NAME); } diff --git a/ompi/mpi/c/file_read_shared.c b/ompi/mpi/c/file_read_shared.c index 76ccc732bc4..e2eda745c6e 100644 --- a/ompi/mpi/c/file_read_shared.c +++ b/ompi/mpi/c/file_read_shared.c @@ -5,15 +5,17 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2008 Sun Microsystems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -26,12 +28,11 @@ #include "ompi/datatype/ompi_datatype.h" #include "ompi/memchecker.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_File_read_shared = PMPI_File_read_shared #endif - -#if OMPI_PROFILING_DEFINES -#include "ompi/mpi/c/profile/defines.h" +#define MPI_File_read_shared PMPI_File_read_shared #endif static const char FUNC_NAME[] = "MPI_File_read_shared"; @@ -45,7 +46,7 @@ int MPI_File_read_shared(MPI_File fh, void *buf, int count, MEMCHECKER( memchecker_datatype(datatype); ); - + if (MPI_PARAM_CHECK) { rc = MPI_SUCCESS; OMPI_ERR_INIT_FINALIZE(FUNC_NAME); @@ -60,8 +61,6 @@ int MPI_File_read_shared(MPI_File fh, void *buf, int count, OMPI_ERRHANDLER_CHECK(rc, fh, rc, FUNC_NAME); } - OPAL_CR_ENTER_LIBRARY(); - /* Call the back-end io component function */ switch (fh->f_io_version) { @@ -76,6 +75,6 @@ int MPI_File_read_shared(MPI_File fh, void *buf, int count, } /* All done */ - + OMPI_ERRHANDLER_RETURN(rc, fh, rc, FUNC_NAME); } diff --git a/ompi/mpi/c/file_seek.c b/ompi/mpi/c/file_seek.c index 867ebb74ad8..0055b530d2b 100644 --- a/ompi/mpi/c/file_seek.c +++ b/ompi/mpi/c/file_seek.c @@ -5,15 +5,17 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2008 Sun Microsystems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -24,18 +26,17 @@ #include "ompi/errhandler/errhandler.h" #include "ompi/file/file.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_File_seek = PMPI_File_seek #endif - -#if OMPI_PROFILING_DEFINES -#include "ompi/mpi/c/profile/defines.h" +#define MPI_File_seek PMPI_File_seek #endif static const char FUNC_NAME[] = "MPI_File_seek"; -int MPI_File_seek(MPI_File fh, MPI_Offset offset, int whence) +int MPI_File_seek(MPI_File fh, MPI_Offset offset, int whence) { int rc; @@ -52,8 +53,6 @@ int MPI_File_seek(MPI_File fh, MPI_Offset offset, int whence) OMPI_ERRHANDLER_CHECK(rc, fh, rc, FUNC_NAME); } - OPAL_CR_ENTER_LIBRARY(); - /* Call the back-end io component function */ switch (fh->f_io_version) { @@ -68,6 +67,6 @@ int MPI_File_seek(MPI_File fh, MPI_Offset offset, int whence) } /* All done */ - + OMPI_ERRHANDLER_RETURN(rc, fh, rc, FUNC_NAME); } diff --git a/ompi/mpi/c/file_seek_shared.c b/ompi/mpi/c/file_seek_shared.c index 360cf9444b0..b96e5503de0 100644 --- a/ompi/mpi/c/file_seek_shared.c +++ b/ompi/mpi/c/file_seek_shared.c @@ -5,15 +5,17 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2008 Sun Microsystems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -24,12 +26,11 @@ #include "ompi/errhandler/errhandler.h" #include "ompi/file/file.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_File_seek_shared = PMPI_File_seek_shared #endif - -#if OMPI_PROFILING_DEFINES -#include "ompi/mpi/c/profile/defines.h" +#define MPI_File_seek_shared PMPI_File_seek_shared #endif static const char FUNC_NAME[] = "MPI_File_seek_shared"; @@ -52,8 +53,6 @@ int MPI_File_seek_shared(MPI_File fh, MPI_Offset offset, int whence) OMPI_ERRHANDLER_CHECK(rc, fh, rc, FUNC_NAME); } - OPAL_CR_ENTER_LIBRARY(); - /* Call the back-end io component function */ switch (fh->f_io_version) { @@ -68,6 +67,6 @@ int MPI_File_seek_shared(MPI_File fh, MPI_Offset offset, int whence) } /* All done */ - + OMPI_ERRHANDLER_RETURN(rc, fh, rc, FUNC_NAME); } diff --git a/ompi/mpi/c/file_set_atomicity.c b/ompi/mpi/c/file_set_atomicity.c index 3973ec417e1..670021adc53 100644 --- a/ompi/mpi/c/file_set_atomicity.c +++ b/ompi/mpi/c/file_set_atomicity.c @@ -5,15 +5,17 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2008 Sun Microsystems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -24,12 +26,11 @@ #include "ompi/errhandler/errhandler.h" #include "ompi/file/file.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_File_set_atomicity = PMPI_File_set_atomicity #endif - -#if OMPI_PROFILING_DEFINES -#include "ompi/mpi/c/profile/defines.h" +#define MPI_File_set_atomicity PMPI_File_set_atomicity #endif static const char FUNC_NAME[] = "MPI_File_set_atomicity"; @@ -49,8 +50,6 @@ int MPI_File_set_atomicity(MPI_File fh, int flag) OMPI_ERRHANDLER_CHECK(rc, fh, rc, FUNC_NAME); } - OPAL_CR_ENTER_LIBRARY(); - /* Call the back-end io component function */ switch (fh->f_io_version) { @@ -65,6 +64,6 @@ int MPI_File_set_atomicity(MPI_File fh, int flag) } /* All done */ - + OMPI_ERRHANDLER_RETURN(rc, fh, rc, FUNC_NAME); } diff --git a/ompi/mpi/c/file_set_errhandler.c b/ompi/mpi/c/file_set_errhandler.c index ac1c24d9e12..190ea4cdb20 100644 --- a/ompi/mpi/c/file_set_errhandler.c +++ b/ompi/mpi/c/file_set_errhandler.c @@ -1,3 +1,4 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ /* * Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana * University Research and Technology @@ -5,15 +6,19 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2008 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. + * Copyright (c) 2016 Los Alamos National Security, LLC. All rights + * reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -24,23 +29,20 @@ #include "ompi/errhandler/errhandler.h" #include "ompi/file/file.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_File_set_errhandler = PMPI_File_set_errhandler #endif - -#if OMPI_PROFILING_DEFINES -#include "ompi/mpi/c/profile/defines.h" +#define MPI_File_set_errhandler PMPI_File_set_errhandler #endif static const char FUNC_NAME[] = "MPI_File_set_errhandler"; -int MPI_File_set_errhandler( MPI_File file, MPI_Errhandler errhandler) +int MPI_File_set_errhandler( MPI_File file, MPI_Errhandler errhandler) { MPI_Errhandler tmp; - OPAL_CR_NOOP_PROGRESS(); - /* Error checking */ if (MPI_PARAM_CHECK) { @@ -55,7 +57,7 @@ int MPI_File_set_errhandler( MPI_File file, MPI_Errhandler errhandler) FUNC_NAME); } else if (NULL == errhandler || MPI_ERRHANDLER_NULL == errhandler || - (OMPI_ERRHANDLER_TYPE_FILE != errhandler->eh_mpi_object_type && + (OMPI_ERRHANDLER_TYPE_FILE != errhandler->eh_mpi_object_type && OMPI_ERRHANDLER_TYPE_PREDEFINED != errhandler->eh_mpi_object_type) ) { return OMPI_ERRHANDLER_INVOKE(file, MPI_ERR_ARG, FUNC_NAME); } @@ -67,9 +69,7 @@ int MPI_File_set_errhandler( MPI_File file, MPI_Errhandler errhandler) /* Ditch the old errhandler, and decrement its refcount. On 64 bits environments we have to make sure the reading of the error_handler became atomic. */ - do { - tmp = file->error_handler; - } while (!OPAL_ATOMIC_CMPSET(&(file->error_handler), tmp, errhandler)); + tmp = OPAL_ATOMIC_SWAP_PTR (&file->error_handler, errhandler); OBJ_RELEASE(tmp); /* All done */ diff --git a/ompi/mpi/c/file_set_info.c b/ompi/mpi/c/file_set_info.c index 9e07d868a7e..57d7f81e0e9 100644 --- a/ompi/mpi/c/file_set_info.c +++ b/ompi/mpi/c/file_set_info.c @@ -5,15 +5,17 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2008 Sun Microsystems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -25,12 +27,11 @@ #include "ompi/info/info.h" #include "ompi/file/file.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_File_set_info = PMPI_File_set_info #endif - -#if OMPI_PROFILING_DEFINES -#include "ompi/mpi/c/profile/defines.h" +#define MPI_File_set_info PMPI_File_set_info #endif static const char FUNC_NAME[] = "MPI_File_set_info"; @@ -50,8 +51,6 @@ int MPI_File_set_info(MPI_File fh, MPI_Info info) OMPI_ERRHANDLER_CHECK(rc, fh, rc, FUNC_NAME); } - OPAL_CR_ENTER_LIBRARY(); - /* Call the back-end io component function */ switch (fh->f_io_version) { @@ -66,6 +65,6 @@ int MPI_File_set_info(MPI_File fh, MPI_Info info) } /* All done */ - + OMPI_ERRHANDLER_RETURN(rc, fh, rc, FUNC_NAME); } diff --git a/ompi/mpi/c/file_set_size.c b/ompi/mpi/c/file_set_size.c index 522f181f114..a966915e97c 100644 --- a/ompi/mpi/c/file_set_size.c +++ b/ompi/mpi/c/file_set_size.c @@ -5,15 +5,17 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2008 Sun Microsystems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -24,12 +26,11 @@ #include "ompi/errhandler/errhandler.h" #include "ompi/file/file.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_File_set_size = PMPI_File_set_size #endif - -#if OMPI_PROFILING_DEFINES -#include "ompi/mpi/c/profile/defines.h" +#define MPI_File_set_size PMPI_File_set_size #endif static const char FUNC_NAME[] = "MPI_File_set_size"; @@ -49,8 +50,6 @@ int MPI_File_set_size(MPI_File fh, MPI_Offset size) OMPI_ERRHANDLER_CHECK(rc, fh, rc, FUNC_NAME); } - OPAL_CR_ENTER_LIBRARY(); - /* Call the back-end io component function */ switch (fh->f_io_version) { @@ -65,6 +64,6 @@ int MPI_File_set_size(MPI_File fh, MPI_Offset size) } /* All done */ - + OMPI_ERRHANDLER_RETURN(rc, fh, rc, FUNC_NAME); } diff --git a/ompi/mpi/c/file_set_view.c b/ompi/mpi/c/file_set_view.c index f6372466ccd..79f62da01a5 100644 --- a/ompi/mpi/c/file_set_view.c +++ b/ompi/mpi/c/file_set_view.c @@ -6,17 +6,19 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2008 Sun Microsystems, Inc. All rights reserved. * Copyright (c) 2013 Los Alamos National Security, LLC. All rights * reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -30,12 +32,11 @@ #include "ompi/file/file.h" #include "ompi/memchecker.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_File_set_view = PMPI_File_set_view #endif - -#if OMPI_PROFILING_DEFINES -#include "ompi/mpi/c/profile/defines.h" +#define MPI_File_set_view PMPI_File_set_view #endif static const char FUNC_NAME[] = "MPI_File_set_view"; @@ -49,7 +50,7 @@ int MPI_File_set_view(MPI_File fh, MPI_Offset disp, MPI_Datatype etype, MEMCHECKER( memchecker_datatype(etype); ); - + if (MPI_PARAM_CHECK) { rc = MPI_SUCCESS; OMPI_ERR_INIT_FINALIZE(FUNC_NAME); @@ -65,15 +66,12 @@ int MPI_File_set_view(MPI_File fh, MPI_Offset disp, MPI_Datatype etype, OMPI_ERRHANDLER_CHECK(rc, fh, rc, FUNC_NAME); } - OPAL_CR_ENTER_LIBRARY(); - /* Call the back-end io component function */ switch (fh->f_io_version) { case MCA_IO_BASE_V_2_0_0: - /* XXX -- CONST -- do not cast away const -- update mca/io */ rc = fh->f_io_selected_module.v2_0_0. - io_module_file_set_view(fh, disp, etype, filetype, (char *) datarep, info); + io_module_file_set_view(fh, disp, etype, filetype, datarep, info); break; default: @@ -82,6 +80,6 @@ int MPI_File_set_view(MPI_File fh, MPI_Offset disp, MPI_Datatype etype, } /* All done */ - + OMPI_ERRHANDLER_RETURN(rc, fh, rc, FUNC_NAME); } diff --git a/ompi/mpi/c/file_sync.c b/ompi/mpi/c/file_sync.c index 2d63dc4d774..558c81d2b32 100644 --- a/ompi/mpi/c/file_sync.c +++ b/ompi/mpi/c/file_sync.c @@ -5,15 +5,17 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2008 Sun Microsystems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -24,12 +26,11 @@ #include "ompi/errhandler/errhandler.h" #include "ompi/file/file.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_File_sync = PMPI_File_sync #endif - -#if OMPI_PROFILING_DEFINES -#include "ompi/mpi/c/profile/defines.h" +#define MPI_File_sync PMPI_File_sync #endif static const char FUNC_NAME[] = "MPI_File_sync"; @@ -49,7 +50,6 @@ int MPI_File_sync(MPI_File fh) OMPI_ERRHANDLER_CHECK(rc, fh, rc, FUNC_NAME); } - OPAL_CR_ENTER_LIBRARY(); /* Call the back-end io component function */ @@ -65,6 +65,6 @@ int MPI_File_sync(MPI_File fh) } /* All done */ - + OMPI_ERRHANDLER_RETURN(rc, fh, rc, FUNC_NAME); } diff --git a/ompi/mpi/c/file_write.c b/ompi/mpi/c/file_write.c index 36daaefe9e4..8bf967d1964 100644 --- a/ompi/mpi/c/file_write.c +++ b/ompi/mpi/c/file_write.c @@ -6,17 +6,19 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2008 Sun Microsystems, Inc. All rights reserved. * Copyright (c) 2013 Los Alamos National Security, LLC. All rights * reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -29,12 +31,11 @@ #include "ompi/datatype/ompi_datatype.h" #include "ompi/memchecker.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_File_write = PMPI_File_write #endif - -#if OMPI_PROFILING_DEFINES -#include "ompi/mpi/c/profile/defines.h" +#define MPI_File_write PMPI_File_write #endif static const char FUNC_NAME[] = "MPI_File_write"; @@ -64,15 +65,12 @@ int MPI_File_write(MPI_File fh, const void *buf, int count, OMPI_ERRHANDLER_CHECK(rc, fh, rc, FUNC_NAME); } - OPAL_CR_ENTER_LIBRARY(); - /* Call the back-end io component function */ switch (fh->f_io_version) { case MCA_IO_BASE_V_2_0_0: - /* XXX -- CONST -- do not cast away const -- update mca/io */ rc = fh->f_io_selected_module.v2_0_0. - io_module_file_write(fh, (void *) buf, count, datatype, status); + io_module_file_write(fh, buf, count, datatype, status); break; default: @@ -81,6 +79,6 @@ int MPI_File_write(MPI_File fh, const void *buf, int count, } /* All done */ - + OMPI_ERRHANDLER_RETURN(rc, fh, rc, FUNC_NAME); } diff --git a/ompi/mpi/c/file_write_all.c b/ompi/mpi/c/file_write_all.c index 25ee4dd594f..5feb488e254 100644 --- a/ompi/mpi/c/file_write_all.c +++ b/ompi/mpi/c/file_write_all.c @@ -6,17 +6,19 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2008 Sun Microsystems, Inc. All rights reserved. * Copyright (c) 2013 Los Alamos National Security, LLC. All rights * reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -29,12 +31,11 @@ #include "ompi/datatype/ompi_datatype.h" #include "ompi/memchecker.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_File_write_all = PMPI_File_write_all #endif - -#if OMPI_PROFILING_DEFINES -#include "ompi/mpi/c/profile/defines.h" +#define MPI_File_write_all PMPI_File_write_all #endif static const char FUNC_NAME[] = "MPI_File_write_all"; @@ -64,15 +65,12 @@ int MPI_File_write_all(MPI_File fh, const void *buf, int count, MPI_Datatype OMPI_ERRHANDLER_CHECK(rc, fh, rc, FUNC_NAME); } - OPAL_CR_ENTER_LIBRARY(); - /* Call the back-end io component function */ switch (fh->f_io_version) { case MCA_IO_BASE_V_2_0_0: - /* XXX -- CONST -- do not cast away const -- update mca/io */ rc = fh->f_io_selected_module.v2_0_0. - io_module_file_write_all(fh, (void *) buf, count, datatype, status); + io_module_file_write_all(fh, buf, count, datatype, status); break; default: @@ -81,6 +79,6 @@ int MPI_File_write_all(MPI_File fh, const void *buf, int count, MPI_Datatype } /* All done */ - + OMPI_ERRHANDLER_RETURN(rc, fh, rc, FUNC_NAME); } diff --git a/ompi/mpi/c/file_write_all_begin.c b/ompi/mpi/c/file_write_all_begin.c index aa71776ad0a..0dd82cfdc2b 100644 --- a/ompi/mpi/c/file_write_all_begin.c +++ b/ompi/mpi/c/file_write_all_begin.c @@ -6,17 +6,19 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2008 Sun Microsystems, Inc. All rights reserved. * Copyright (c) 2013 Los Alamos National Security, LLC. All rights * reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -29,12 +31,11 @@ #include "ompi/datatype/ompi_datatype.h" #include "ompi/memchecker.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_File_write_all_begin = PMPI_File_write_all_begin #endif - -#if OMPI_PROFILING_DEFINES -#include "ompi/mpi/c/profile/defines.h" +#define MPI_File_write_all_begin PMPI_File_write_all_begin #endif static const char FUNC_NAME[] = "MPI_File_write_all_begin"; @@ -64,15 +65,12 @@ int MPI_File_write_all_begin(MPI_File fh, const void *buf, int count, OMPI_ERRHANDLER_CHECK(rc, fh, rc, FUNC_NAME); } - OPAL_CR_ENTER_LIBRARY(); - /* Call the back-end io component function */ switch (fh->f_io_version) { case MCA_IO_BASE_V_2_0_0: - /* XXX -- CONST -- do not cast away const -- update mca/io */ rc = fh->f_io_selected_module.v2_0_0. - io_module_file_write_all_begin(fh, (void *) buf, count, datatype); + io_module_file_write_all_begin(fh, buf, count, datatype); break; default: @@ -81,6 +79,6 @@ int MPI_File_write_all_begin(MPI_File fh, const void *buf, int count, } /* All done */ - + OMPI_ERRHANDLER_RETURN(rc, fh, rc, FUNC_NAME); } diff --git a/ompi/mpi/c/file_write_all_end.c b/ompi/mpi/c/file_write_all_end.c index 3f554db1868..01368c70b8a 100644 --- a/ompi/mpi/c/file_write_all_end.c +++ b/ompi/mpi/c/file_write_all_end.c @@ -6,17 +6,19 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2008 Sun Microsystems, Inc. All rights reserved. * Copyright (c) 2013 Los Alamos National Security, LLC. All rights * reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -27,12 +29,11 @@ #include "ompi/errhandler/errhandler.h" #include "ompi/file/file.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_File_write_all_end = PMPI_File_write_all_end #endif - -#if OMPI_PROFILING_DEFINES -#include "ompi/mpi/c/profile/defines.h" +#define MPI_File_write_all_end PMPI_File_write_all_end #endif static const char FUNC_NAME[] = "MPI_File_write_all_end"; @@ -52,15 +53,12 @@ int MPI_File_write_all_end(MPI_File fh, const void *buf, MPI_Status *status) OMPI_ERRHANDLER_CHECK(rc, fh, rc, FUNC_NAME); } - OPAL_CR_ENTER_LIBRARY(); - /* Call the back-end io component function */ switch (fh->f_io_version) { case MCA_IO_BASE_V_2_0_0: - /* XXX -- CONST -- do not cast away const -- update mca/io */ rc = fh->f_io_selected_module.v2_0_0. - io_module_file_write_all_end(fh, (void *) buf, status); + io_module_file_write_all_end(fh, buf, status); break; default: @@ -69,6 +67,6 @@ int MPI_File_write_all_end(MPI_File fh, const void *buf, MPI_Status *status) } /* All done */ - + OMPI_ERRHANDLER_RETURN(rc, fh, rc, FUNC_NAME); } diff --git a/ompi/mpi/c/file_write_at.c b/ompi/mpi/c/file_write_at.c index 8841b7d591a..0c7596c93b6 100644 --- a/ompi/mpi/c/file_write_at.c +++ b/ompi/mpi/c/file_write_at.c @@ -6,17 +6,19 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2008 Sun Microsystems, Inc. All rights reserved. * Copyright (c) 2013 Los Alamos National Security, LLC. All rights * reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -29,12 +31,11 @@ #include "ompi/datatype/ompi_datatype.h" #include "ompi/memchecker.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_File_write_at = PMPI_File_write_at #endif - -#if OMPI_PROFILING_DEFINES -#include "ompi/mpi/c/profile/defines.h" +#define MPI_File_write_at PMPI_File_write_at #endif static const char FUNC_NAME[] = "MPI_File_write_at"; @@ -65,15 +66,12 @@ int MPI_File_write_at(MPI_File fh, MPI_Offset offset, const void *buf, OMPI_ERRHANDLER_CHECK(rc, fh, rc, FUNC_NAME); } - OPAL_CR_ENTER_LIBRARY(); - /* Call the back-end io component function */ switch (fh->f_io_version) { case MCA_IO_BASE_V_2_0_0: - /* XXX -- CONST -- do not cast away const -- update mca/io */ rc = fh->f_io_selected_module.v2_0_0. - io_module_file_write_at(fh, offset, (void *) buf, count, datatype, status); + io_module_file_write_at(fh, offset, buf, count, datatype, status); break; default: @@ -82,6 +80,6 @@ int MPI_File_write_at(MPI_File fh, MPI_Offset offset, const void *buf, } /* All done */ - + OMPI_ERRHANDLER_RETURN(rc, fh, rc, FUNC_NAME); } diff --git a/ompi/mpi/c/file_write_at_all.c b/ompi/mpi/c/file_write_at_all.c index eaa59b2d88a..1974cbd8dbc 100644 --- a/ompi/mpi/c/file_write_at_all.c +++ b/ompi/mpi/c/file_write_at_all.c @@ -6,17 +6,19 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2008 Sun Microsystems, Inc. All rights reserved. * Copyright (c) 2013 Los Alamos National Security, LLC. All rights * reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -29,12 +31,11 @@ #include "ompi/datatype/ompi_datatype.h" #include "ompi/memchecker.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_File_write_at_all = PMPI_File_write_at_all #endif - -#if OMPI_PROFILING_DEFINES -#include "ompi/mpi/c/profile/defines.h" +#define MPI_File_write_at_all PMPI_File_write_at_all #endif static const char FUNC_NAME[] = "MPI_File_write_at_all"; @@ -65,15 +66,12 @@ int MPI_File_write_at_all(MPI_File fh, MPI_Offset offset, const void *buf, OMPI_ERRHANDLER_CHECK(rc, fh, rc, FUNC_NAME); } - OPAL_CR_ENTER_LIBRARY(); - /* Call the back-end io component function */ switch (fh->f_io_version) { case MCA_IO_BASE_V_2_0_0: - /* XXX -- CONST -- do not cast away const -- update mca/io */ rc = fh->f_io_selected_module.v2_0_0. - io_module_file_write_at_all(fh, offset, (void *) buf, count, datatype, + io_module_file_write_at_all(fh, offset, buf, count, datatype, status); break; @@ -83,6 +81,6 @@ int MPI_File_write_at_all(MPI_File fh, MPI_Offset offset, const void *buf, } /* All done */ - + OMPI_ERRHANDLER_RETURN(rc, fh, rc, FUNC_NAME); } diff --git a/ompi/mpi/c/file_write_at_all_begin.c b/ompi/mpi/c/file_write_at_all_begin.c index 8635e4bde8d..086bdab8a2c 100644 --- a/ompi/mpi/c/file_write_at_all_begin.c +++ b/ompi/mpi/c/file_write_at_all_begin.c @@ -6,17 +6,19 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2008 Sun Microsystems, Inc. All rights reserved. * Copyright (c) 2013 Los Alamos National Security, LLC. All rights * reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -29,12 +31,11 @@ #include "ompi/datatype/ompi_datatype.h" #include "ompi/memchecker.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_File_write_at_all_begin = PMPI_File_write_at_all_begin #endif - -#if OMPI_PROFILING_DEFINES -#include "ompi/mpi/c/profile/defines.h" +#define MPI_File_write_at_all_begin PMPI_File_write_at_all_begin #endif static const char FUNC_NAME[] = "MPI_File_write_at_all_begin"; @@ -64,15 +65,12 @@ int MPI_File_write_at_all_begin(MPI_File fh, MPI_Offset offset, const void *buf, OMPI_ERRHANDLER_CHECK(rc, fh, rc, FUNC_NAME); } - OPAL_CR_ENTER_LIBRARY(); - /* Call the back-end io component function */ switch (fh->f_io_version) { case MCA_IO_BASE_V_2_0_0: - /* XXX -- CONST -- do not cast away const -- update mca/io */ rc = fh->f_io_selected_module.v2_0_0. - io_module_file_write_at_all_begin(fh, offset, (void *) buf, count, + io_module_file_write_at_all_begin(fh, offset, buf, count, datatype); break; @@ -82,6 +80,6 @@ int MPI_File_write_at_all_begin(MPI_File fh, MPI_Offset offset, const void *buf, } /* All done */ - + OMPI_ERRHANDLER_RETURN(rc, fh, rc, FUNC_NAME); } diff --git a/ompi/mpi/c/file_write_at_all_end.c b/ompi/mpi/c/file_write_at_all_end.c index 6b0e534ed94..22fa297a841 100644 --- a/ompi/mpi/c/file_write_at_all_end.c +++ b/ompi/mpi/c/file_write_at_all_end.c @@ -6,17 +6,19 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2008 Sun Microsystems, Inc. All rights reserved. * Copyright (c) 2013 Los Alamos National Security, LLC. All rights * reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -27,12 +29,11 @@ #include "ompi/errhandler/errhandler.h" #include "ompi/file/file.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_File_write_at_all_end = PMPI_File_write_at_all_end #endif - -#if OMPI_PROFILING_DEFINES -#include "ompi/mpi/c/profile/defines.h" +#define MPI_File_write_at_all_end PMPI_File_write_at_all_end #endif static const char FUNC_NAME[] = "MPI_File_write_at_all_end"; @@ -52,15 +53,12 @@ int MPI_File_write_at_all_end(MPI_File fh, const void *buf, MPI_Status *status) OMPI_ERRHANDLER_CHECK(rc, fh, rc, FUNC_NAME); } - OPAL_CR_ENTER_LIBRARY(); - /* Call the back-end io component function */ switch (fh->f_io_version) { case MCA_IO_BASE_V_2_0_0: - /* XXX -- CONST -- do not cast away const -- update mca/io */ rc = fh->f_io_selected_module.v2_0_0. - io_module_file_write_at_all_end(fh, (void *) buf, status); + io_module_file_write_at_all_end(fh, buf, status); break; default: @@ -69,6 +67,6 @@ int MPI_File_write_at_all_end(MPI_File fh, const void *buf, MPI_Status *status) } /* All done */ - + OMPI_ERRHANDLER_RETURN(rc, fh, rc, FUNC_NAME); } diff --git a/ompi/mpi/c/file_write_ordered.c b/ompi/mpi/c/file_write_ordered.c index a638847fcd4..39dd105c367 100644 --- a/ompi/mpi/c/file_write_ordered.c +++ b/ompi/mpi/c/file_write_ordered.c @@ -6,17 +6,19 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2008 Sun Microsystems, Inc. All rights reserved. * Copyright (c) 2013 Los Alamos National Security, LLC. All rights * reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -29,12 +31,11 @@ #include "ompi/datatype/ompi_datatype.h" #include "ompi/memchecker.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_File_write_ordered = PMPI_File_write_ordered #endif - -#if OMPI_PROFILING_DEFINES -#include "ompi/mpi/c/profile/defines.h" +#define MPI_File_write_ordered PMPI_File_write_ordered #endif static const char FUNC_NAME[] = "MPI_File_write_ordered"; @@ -64,15 +65,12 @@ int MPI_File_write_ordered(MPI_File fh, const void *buf, int count, OMPI_ERRHANDLER_CHECK(rc, fh, rc, FUNC_NAME); } - OPAL_CR_ENTER_LIBRARY(); - /* Call the back-end io component function */ switch (fh->f_io_version) { case MCA_IO_BASE_V_2_0_0: - /* XXX -- CONST -- do not cast away const -- update mca/io */ rc = fh->f_io_selected_module.v2_0_0. - io_module_file_write_ordered(fh, (void *) buf, count, datatype, status); + io_module_file_write_ordered(fh, buf, count, datatype, status); break; default: @@ -81,6 +79,6 @@ int MPI_File_write_ordered(MPI_File fh, const void *buf, int count, } /* All done */ - + OMPI_ERRHANDLER_RETURN(rc, fh, rc, FUNC_NAME); } diff --git a/ompi/mpi/c/file_write_ordered_begin.c b/ompi/mpi/c/file_write_ordered_begin.c index d0a605de38b..32b76d78016 100644 --- a/ompi/mpi/c/file_write_ordered_begin.c +++ b/ompi/mpi/c/file_write_ordered_begin.c @@ -6,17 +6,19 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2008 Sun Microsystems, Inc. All rights reserved. * Copyright (c) 2013 Los Alamos National Security, LLC. All rights * reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -29,12 +31,11 @@ #include "ompi/datatype/ompi_datatype.h" #include "ompi/memchecker.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_File_write_ordered_begin = PMPI_File_write_ordered_begin #endif - -#if OMPI_PROFILING_DEFINES -#include "ompi/mpi/c/profile/defines.h" +#define MPI_File_write_ordered_begin PMPI_File_write_ordered_begin #endif static const char FUNC_NAME[] = "MPI_File_write_ordered_begin"; @@ -49,7 +50,7 @@ int MPI_File_write_ordered_begin(MPI_File fh, const void *buf, int count, memchecker_datatype(datatype); memchecker_call(&opal_memchecker_base_isdefined, buf, count, datatype); ); - + if (MPI_PARAM_CHECK) { rc = MPI_SUCCESS; OMPI_ERR_INIT_FINALIZE(FUNC_NAME); @@ -64,15 +65,12 @@ int MPI_File_write_ordered_begin(MPI_File fh, const void *buf, int count, OMPI_ERRHANDLER_CHECK(rc, fh, rc, FUNC_NAME); } - OPAL_CR_ENTER_LIBRARY(); - /* Call the back-end io component function */ switch (fh->f_io_version) { case MCA_IO_BASE_V_2_0_0: - /* XXX -- CONST -- do not cast away const -- update mca/io */ rc = fh->f_io_selected_module.v2_0_0. - io_module_file_write_ordered_begin(fh, (void *) buf, count, datatype); + io_module_file_write_ordered_begin(fh, buf, count, datatype); break; default: @@ -81,6 +79,6 @@ int MPI_File_write_ordered_begin(MPI_File fh, const void *buf, int count, } /* All done */ - + OMPI_ERRHANDLER_RETURN(rc, fh, rc, FUNC_NAME); } diff --git a/ompi/mpi/c/file_write_ordered_end.c b/ompi/mpi/c/file_write_ordered_end.c index 8cd41716398..9369788e48b 100644 --- a/ompi/mpi/c/file_write_ordered_end.c +++ b/ompi/mpi/c/file_write_ordered_end.c @@ -6,17 +6,19 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2008 Sun Microsystems, Inc. All rights reserved. * Copyright (c) 2013 Los Alamos National Security, LLC. All rights * reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -27,12 +29,11 @@ #include "ompi/errhandler/errhandler.h" #include "ompi/file/file.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_File_write_ordered_end = PMPI_File_write_ordered_end #endif - -#if OMPI_PROFILING_DEFINES -#include "ompi/mpi/c/profile/defines.h" +#define MPI_File_write_ordered_end PMPI_File_write_ordered_end #endif static const char FUNC_NAME[] = "MPI_File_write_ordered_end"; @@ -52,15 +53,12 @@ int MPI_File_write_ordered_end(MPI_File fh, const void *buf, MPI_Status *status) OMPI_ERRHANDLER_CHECK(rc, fh, rc, FUNC_NAME); } - OPAL_CR_ENTER_LIBRARY(); - /* Call the back-end io component function */ switch (fh->f_io_version) { case MCA_IO_BASE_V_2_0_0: - /* XXX -- CONST -- do not cast away const -- update mca/io */ rc = fh->f_io_selected_module.v2_0_0. - io_module_file_write_ordered_end(fh, (void *) buf, status); + io_module_file_write_ordered_end(fh, buf, status); break; default: @@ -69,6 +67,6 @@ int MPI_File_write_ordered_end(MPI_File fh, const void *buf, MPI_Status *status) } /* All done */ - + OMPI_ERRHANDLER_RETURN(rc, fh, rc, FUNC_NAME); } diff --git a/ompi/mpi/c/file_write_shared.c b/ompi/mpi/c/file_write_shared.c index 286c10fa805..239be859170 100644 --- a/ompi/mpi/c/file_write_shared.c +++ b/ompi/mpi/c/file_write_shared.c @@ -6,17 +6,19 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2008 Sun Microsystems, Inc. All rights reserved. * Copyright (c) 2013 Los Alamos National Security, LLC. All rights * reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -29,12 +31,11 @@ #include "ompi/datatype/ompi_datatype.h" #include "ompi/memchecker.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_File_write_shared = PMPI_File_write_shared #endif - -#if OMPI_PROFILING_DEFINES -#include "ompi/mpi/c/profile/defines.h" +#define MPI_File_write_shared PMPI_File_write_shared #endif static const char FUNC_NAME[] = "MPI_File_write_shared"; @@ -64,15 +65,12 @@ int MPI_File_write_shared(MPI_File fh, const void *buf, int count, OMPI_ERRHANDLER_CHECK(rc, fh, rc, FUNC_NAME); } - OPAL_CR_ENTER_LIBRARY(); - /* Call the back-end io component function */ switch (fh->f_io_version) { case MCA_IO_BASE_V_2_0_0: - /* XXX -- CONST -- do not cast away const -- update mca/io */ rc = fh->f_io_selected_module.v2_0_0. - io_module_file_write_shared(fh, (void *) buf, count, datatype, status); + io_module_file_write_shared(fh, buf, count, datatype, status); break; default: @@ -81,6 +79,6 @@ int MPI_File_write_shared(MPI_File fh, const void *buf, int count, } /* All done */ - + OMPI_ERRHANDLER_RETURN(rc, fh, rc, FUNC_NAME); } diff --git a/ompi/mpi/c/finalize.c b/ompi/mpi/c/finalize.c index 7c69eb481d3..71f4338864e 100644 --- a/ompi/mpi/c/finalize.c +++ b/ompi/mpi/c/finalize.c @@ -5,14 +5,16 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -22,12 +24,11 @@ #include "ompi/runtime/params.h" #include "ompi/errhandler/errhandler.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_Finalize = PMPI_Finalize #endif - -#if OMPI_PROFILING_DEFINES -#include "ompi/mpi/c/profile/defines.h" +#define MPI_Finalize PMPI_Finalize #endif static const char FUNC_NAME[] = "MPI_Finalize"; @@ -35,8 +36,6 @@ static const char FUNC_NAME[] = "MPI_Finalize"; int MPI_Finalize(void) { - OPAL_CR_FINALIZE_LIBRARY(); - if (MPI_PARAM_CHECK) { OMPI_ERR_INIT_FINALIZE(FUNC_NAME); } diff --git a/ompi/mpi/c/finalized.c b/ompi/mpi/c/finalized.c index ed0a244ef8b..728378c17f3 100644 --- a/ompi/mpi/c/finalized.c +++ b/ompi/mpi/c/finalized.c @@ -5,14 +5,17 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. + * Copyright (c) 2015 Cisco Systems, Inc. All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -23,22 +26,27 @@ #include "ompi/communicator/communicator.h" #include "ompi/errhandler/errhandler.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_Finalized = PMPI_Finalized #endif - -#if OMPI_PROFILING_DEFINES -#include "ompi/mpi/c/profile/defines.h" +#define MPI_Finalized PMPI_Finalized #endif static const char FUNC_NAME[] = "MPI_Finalized"; -int MPI_Finalized(int *flag) +int MPI_Finalized(int *flag) { MPI_Comm null = NULL; - OPAL_CR_NOOP_PROGRESS(); + /* We must obtain the lock to guarnatee consistent values of + ompi_mpi_initialized and ompi_mpi_finalized. Note, too, that + this lock is held for the bulk of the duration of + ompi_mpi_init() and ompi_mpi_finalize(), so when we get the + lock, we are guaranteed that some other thread is not part way + through initialization or finalization. */ + opal_mutex_lock(&ompi_mpi_bootstrap_mutex); if (MPI_PARAM_CHECK) { if (NULL == flag) { @@ -48,17 +56,19 @@ int MPI_Finalized(int *flag) MPI_Finalize) or not */ if (ompi_mpi_initialized && !ompi_mpi_finalized) { + opal_mutex_unlock(&ompi_mpi_bootstrap_mutex); return OMPI_ERRHANDLER_INVOKE(MPI_COMM_WORLD, MPI_ERR_ARG, FUNC_NAME); } else { + opal_mutex_unlock(&ompi_mpi_bootstrap_mutex); return OMPI_ERRHANDLER_INVOKE(null, MPI_ERR_ARG, FUNC_NAME); } } } - /* Pretty simple */ - *flag = ompi_mpi_finalized; + opal_mutex_unlock(&ompi_mpi_bootstrap_mutex); + return MPI_SUCCESS; } diff --git a/ompi/mpi/c/free_mem.c b/ompi/mpi/c/free_mem.c index e842ce1c635..a0145d3081c 100644 --- a/ompi/mpi/c/free_mem.c +++ b/ompi/mpi/c/free_mem.c @@ -5,15 +5,17 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2007 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -27,12 +29,11 @@ #include "ompi/errhandler/errhandler.h" #include "opal/mca/mpool/mpool.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_Free_mem = PMPI_Free_mem #endif - -#if OMPI_PROFILING_DEFINES -#include "ompi/mpi/c/profile/defines.h" +#define MPI_Free_mem PMPI_Free_mem #endif static const char FUNC_NAME[] = "MPI_Free_mem"; @@ -40,21 +41,18 @@ static const char FUNC_NAME[] = "MPI_Free_mem"; int MPI_Free_mem(void *baseptr) { - OPAL_CR_ENTER_LIBRARY(); /* Per these threads: - http://www.open-mpi.org/community/lists/devel/2007/07/1977.php - http://www.open-mpi.org/community/lists/devel/2007/07/1979.php + http://www.open-mpi.org/community/lists/devel/2007/07/1977.php + http://www.open-mpi.org/community/lists/devel/2007/07/1979.php If you call MPI_ALLOC_MEM with a size of 0, you get NULL back. So don't consider a NULL==baseptr an error. */ if (NULL != baseptr && OMPI_SUCCESS != mca_mpool_base_free(baseptr)) { - OPAL_CR_EXIT_LIBRARY(); return OMPI_ERRHANDLER_INVOKE(MPI_COMM_WORLD, MPI_ERR_NO_MEM, FUNC_NAME); } - OPAL_CR_EXIT_LIBRARY(); return MPI_SUCCESS; } diff --git a/ompi/mpi/c/gather.c b/ompi/mpi/c/gather.c index ed788a077a8..c7fbbe678f9 100644 --- a/ompi/mpi/c/gather.c +++ b/ompi/mpi/c/gather.c @@ -6,7 +6,7 @@ * Copyright (c) 2004-2012 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. @@ -15,10 +15,12 @@ * Copyright (c) 2008 Sun Microsystems, Inc. All rights reserved. * Copyright (c) 2013 Los Alamos National Security, LLC. All rights * reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow - * + * * $HEADER$ */ #include "ompi_config.h" @@ -31,12 +33,11 @@ #include "ompi/datatype/ompi_datatype.h" #include "ompi/memchecker.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_Gather = PMPI_Gather #endif - -#if OMPI_PROFILING_DEFINES -#include "ompi/mpi/c/profile/defines.h" +#define MPI_Gather PMPI_Gather #endif static const char FUNC_NAME[] = "MPI_Gather"; @@ -44,7 +45,7 @@ static const char FUNC_NAME[] = "MPI_Gather"; int MPI_Gather(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, int recvcount, MPI_Datatype recvtype, - int root, MPI_Comm comm) + int root, MPI_Comm comm) { int err; @@ -60,14 +61,14 @@ int MPI_Gather(const void *sendbuf, int sendcount, MPI_Datatype sendtype, if(ompi_comm_rank(comm) == root) { /* check whether root's send buffer is defined. */ if (MPI_IN_PLACE == sendbuf) { - memchecker_call(&opal_memchecker_base_isdefined, - (char *)(recvbuf)+rank*ext, + memchecker_call(&opal_memchecker_base_isdefined, + (char *)(recvbuf)+rank*ext, recvcount, recvtype); } else { memchecker_datatype(sendtype); memchecker_call(&opal_memchecker_base_isdefined, sendbuf, sendcount, sendtype); } - + memchecker_datatype(recvtype); /* check whether root's receive buffer is addressable. */ memchecker_call(&opal_memchecker_base_isaddressable, recvbuf, recvcount, recvtype); @@ -88,12 +89,12 @@ int MPI_Gather(const void *sendbuf, int sendcount, MPI_Datatype sendtype, } } ); - + if (MPI_PARAM_CHECK) { err = MPI_SUCCESS; OMPI_ERR_INIT_FINALIZE(FUNC_NAME); if (ompi_comm_invalid(comm)) { - return OMPI_ERRHANDLER_INVOKE(MPI_COMM_WORLD, MPI_ERR_COMM, + return OMPI_ERRHANDLER_INVOKE(MPI_COMM_WORLD, MPI_ERR_COMM, FUNC_NAME); } else if ((ompi_comm_rank(comm) != root && MPI_IN_PLACE == sendbuf) || (ompi_comm_rank(comm) == root && MPI_IN_PLACE == recvbuf)) { @@ -123,7 +124,7 @@ int MPI_Gather(const void *sendbuf, int sendcount, MPI_Datatype sendtype, if (ompi_comm_rank(comm) == root) { if (MPI_DATATYPE_NULL == recvtype || NULL == recvtype) { - return OMPI_ERRHANDLER_INVOKE(comm, MPI_ERR_TYPE, FUNC_NAME); + return OMPI_ERRHANDLER_INVOKE(comm, MPI_ERR_TYPE, FUNC_NAME); } if (recvcount < 0) { @@ -157,7 +158,7 @@ int MPI_Gather(const void *sendbuf, int sendcount, MPI_Datatype sendtype, } if (MPI_DATATYPE_NULL == recvtype || NULL == recvtype) { - return OMPI_ERRHANDLER_INVOKE(comm, MPI_ERR_TYPE, FUNC_NAME); + return OMPI_ERRHANDLER_INVOKE(comm, MPI_ERR_TYPE, FUNC_NAME); } } } @@ -165,20 +166,17 @@ int MPI_Gather(const void *sendbuf, int sendcount, MPI_Datatype sendtype, /* Do we need to do anything? */ - if ((0 == sendcount && MPI_ROOT != root && + if ((0 == sendcount && MPI_ROOT != root && (ompi_comm_rank(comm) != root || (ompi_comm_rank(comm) == root && MPI_IN_PLACE != sendbuf))) || - (ompi_comm_rank(comm) == root && MPI_IN_PLACE == sendbuf && - 0 == recvcount) || + (ompi_comm_rank(comm) == root && MPI_IN_PLACE == sendbuf && + 0 == recvcount) || (0 == recvcount && (MPI_ROOT == root || MPI_PROC_NULL == root))) { return MPI_SUCCESS; } - OPAL_CR_ENTER_LIBRARY(); - /* Invoke the coll component to perform the back-end operation */ - /* XXX -- CONST -- do not cast away const -- update mca/coll */ - err = comm->c_coll.coll_gather((void *) sendbuf, sendcount, sendtype, recvbuf, + err = comm->c_coll.coll_gather(sendbuf, sendcount, sendtype, recvbuf, recvcount, recvtype, root, comm, comm->c_coll.coll_gather_module); OMPI_ERRHANDLER_RETURN(err, comm, err, FUNC_NAME); diff --git a/ompi/mpi/c/gatherv.c b/ompi/mpi/c/gatherv.c index b3b384ac44d..a320b9fc227 100644 --- a/ompi/mpi/c/gatherv.c +++ b/ompi/mpi/c/gatherv.c @@ -6,17 +6,19 @@ * Copyright (c) 2004-2012 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2006-2012 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2012-2013 Los Alamos National Security, LLC. All rights - * reserved. + * reserved. + * Copyright (c) 2015-2016 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ #include "ompi_config.h" @@ -29,12 +31,11 @@ #include "ompi/datatype/ompi_datatype.h" #include "ompi/memchecker.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_Gatherv = PMPI_Gatherv #endif - -#if OMPI_PROFILING_DEFINES -#include "ompi/mpi/c/profile/defines.h" +#define MPI_Gatherv PMPI_Gatherv #endif static const char FUNC_NAME[] = "MPI_Gatherv"; @@ -42,16 +43,14 @@ static const char FUNC_NAME[] = "MPI_Gatherv"; int MPI_Gatherv(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, const int recvcounts[], const int displs[], - MPI_Datatype recvtype, int root, MPI_Comm comm) + MPI_Datatype recvtype, int root, MPI_Comm comm) { int i, size, err; MEMCHECKER( - int rank; ptrdiff_t ext; size = ompi_comm_remote_size(comm); - rank = ompi_comm_rank(comm); ompi_datatype_type_extent(recvtype, &ext); memchecker_comm(comm); @@ -68,7 +67,7 @@ int MPI_Gatherv(const void *sendbuf, int sendcount, MPI_Datatype sendtype, memchecker_datatype(sendtype); memchecker_call(&opal_memchecker_base_isdefined, sendbuf, sendcount, sendtype); } - + memchecker_datatype(recvtype); /* check whether root's receive buffer is addressable. */ for (i = 0; i < size; i++) { @@ -91,7 +90,7 @@ int MPI_Gatherv(const void *sendbuf, int sendcount, MPI_Datatype sendtype, recvcounts[i], recvtype); } } else if (MPI_PROC_NULL != root) { - memchecker_datatype(sendtype); + memchecker_datatype(sendtype); /* check whether send buffer is defined. */ memchecker_call(&opal_memchecker_base_isdefined, sendbuf, sendcount, sendtype); } @@ -102,7 +101,7 @@ int MPI_Gatherv(const void *sendbuf, int sendcount, MPI_Datatype sendtype, err = MPI_SUCCESS; OMPI_ERR_INIT_FINALIZE(FUNC_NAME); if (ompi_comm_invalid(comm)) { - return OMPI_ERRHANDLER_INVOKE(MPI_COMM_WORLD, MPI_ERR_COMM, + return OMPI_ERRHANDLER_INVOKE(MPI_COMM_WORLD, MPI_ERR_COMM, FUNC_NAME); } else if ((ompi_comm_rank(comm) != root && MPI_IN_PLACE == sendbuf) || (ompi_comm_rank(comm) == root && MPI_IN_PLACE == recvbuf)) { @@ -181,19 +180,16 @@ int MPI_Gatherv(const void *sendbuf, int sendcount, MPI_Datatype sendtype, if (recvcounts[i] < 0) { return OMPI_ERRHANDLER_INVOKE(comm, MPI_ERR_COUNT, FUNC_NAME); } else if (MPI_DATATYPE_NULL == recvtype || NULL == recvtype) { - return OMPI_ERRHANDLER_INVOKE(comm, MPI_ERR_TYPE, FUNC_NAME); + return OMPI_ERRHANDLER_INVOKE(comm, MPI_ERR_TYPE, FUNC_NAME); } } } } } - OPAL_CR_ENTER_LIBRARY(); - /* Invoke the coll component to perform the back-end operation */ - /* XXX -- CONST -- do not cast away const -- update mca/coll */ - err = comm->c_coll.coll_gatherv((void *) sendbuf, sendcount, sendtype, recvbuf, - (int *) recvcounts, (int *) displs, + err = comm->c_coll.coll_gatherv(sendbuf, sendcount, sendtype, recvbuf, + recvcounts, displs, recvtype, root, comm, comm->c_coll.coll_gatherv_module); OMPI_ERRHANDLER_RETURN(err, comm, err, FUNC_NAME); diff --git a/ompi/mpi/c/get.c b/ompi/mpi/c/get.c index 8f1937334ea..cf3fb74c51a 100644 --- a/ompi/mpi/c/get.c +++ b/ompi/mpi/c/get.c @@ -1,3 +1,4 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ /* * Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana * University Research and Technology @@ -5,14 +6,18 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. + * Copyright (c) 2015 Los Alamos National Security, LLC. All rights + * reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ #include "ompi_config.h" @@ -26,12 +31,11 @@ #include "ompi/mca/osc/osc.h" #include "ompi/datatype/ompi_datatype.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_Get = PMPI_Get #endif - -#if OMPI_PROFILING_DEFINES -#include "ompi/mpi/c/profile/defines.h" +#define MPI_Get PMPI_Get #endif static const char FUNC_NAME[] = "MPI_Get"; @@ -40,7 +44,7 @@ static const char FUNC_NAME[] = "MPI_Get"; int MPI_Get(void *origin_addr, int origin_count, MPI_Datatype origin_datatype, int target_rank, MPI_Aint target_disp, int target_count, - MPI_Datatype target_datatype, MPI_Win win) + MPI_Datatype target_datatype, MPI_Win win) { int rc; @@ -56,7 +60,7 @@ int MPI_Get(void *origin_addr, int origin_count, } else if (ompi_win_peer_invalid(win, target_rank) && (MPI_PROC_NULL != target_rank)) { rc = MPI_ERR_RANK; - } else if ( target_disp < 0 ) { + } else if ( MPI_WIN_FLAVOR_DYNAMIC != win->w_flavor && target_disp < 0 ) { rc = MPI_ERR_DISP; } else { OMPI_CHECK_DATATYPE_FOR_ONE_SIDED(rc, origin_datatype, origin_count); @@ -69,8 +73,6 @@ int MPI_Get(void *origin_addr, int origin_count, if (MPI_PROC_NULL == target_rank) return MPI_SUCCESS; - OPAL_CR_ENTER_LIBRARY(); - rc = win->w_osc_module->osc_get(origin_addr, origin_count, origin_datatype, target_rank, target_disp, target_count, target_datatype, win); diff --git a/ompi/mpi/c/get_accumulate.c b/ompi/mpi/c/get_accumulate.c index bc498b49f85..a739dd6099e 100644 --- a/ompi/mpi/c/get_accumulate.c +++ b/ompi/mpi/c/get_accumulate.c @@ -12,8 +12,10 @@ * All rights reserved. * Copyright (c) 2009 Sun Microsystmes, Inc. All rights reserved. * Copyright (c) 2011 Sandia National Laboratories. All rights reserved. - * Copyright (c) 2014 Los Alamos National Security, LLC. All rights + * Copyright (c) 2014-2015 Los Alamos National Security, LLC. All rights * reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -33,12 +35,11 @@ #include "ompi/datatype/ompi_datatype_internal.h" #include "ompi/memchecker.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_Get_accumulate = PMPI_Get_accumulate #endif - -#if OMPI_PROFILING_DEFINES -#include "ompi/mpi/c/profile/defines.h" +#define MPI_Get_accumulate PMPI_Get_accumulate #endif static const char FUNC_NAME[] = "MPI_Get_accumulate"; @@ -73,7 +74,7 @@ int MPI_Get_accumulate(const void *origin_addr, int origin_count, MPI_Datatype o rc = MPI_ERR_OP; } else if (!ompi_op_is_intrinsic(op)) { rc = MPI_ERR_OP; - } else if ( target_disp < 0 ) { + } else if ( MPI_WIN_FLAVOR_DYNAMIC != win->w_flavor && target_disp < 0 ) { rc = MPI_ERR_DISP; } else { /* the origin datatype is meaningless when using MPI_OP_NO_OP */ @@ -131,10 +132,7 @@ int MPI_Get_accumulate(const void *origin_addr, int origin_count, MPI_Datatype o return MPI_SUCCESS; } - OPAL_CR_ENTER_LIBRARY(); - - /* XXX -- TODO: do not cast away the const */ - rc = ompi_win->w_osc_module->osc_get_accumulate((void *) origin_addr, + rc = ompi_win->w_osc_module->osc_get_accumulate(origin_addr, origin_count, origin_datatype, result_addr, diff --git a/ompi/mpi/c/get_address.c b/ompi/mpi/c/get_address.c index e37f9497a1b..229f4eeee4b 100644 --- a/ompi/mpi/c/get_address.c +++ b/ompi/mpi/c/get_address.c @@ -6,16 +6,18 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. - * Copyright (c) 2013 Los Alamos National Security, LLC. All rights + * Copyright (c) 2013-2016 Los Alamos National Security, LLC. All rights * reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ #include "ompi_config.h" @@ -25,12 +27,11 @@ #include "ompi/communicator/communicator.h" #include "ompi/errhandler/errhandler.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_Get_address = PMPI_Get_address #endif - -#if OMPI_PROFILING_DEFINES -#include "ompi/mpi/c/profile/defines.h" +#define MPI_Get_address PMPI_Get_address #endif static const char FUNC_NAME[] = "MPI_Get_address"; @@ -39,11 +40,9 @@ static const char FUNC_NAME[] = "MPI_Get_address"; int MPI_Get_address(const void *location, MPI_Aint *address) { - OPAL_CR_NOOP_PROGRESS(); - if( MPI_PARAM_CHECK ) { OMPI_ERR_INIT_FINALIZE(FUNC_NAME); - if (NULL == location || NULL == address) { + if (NULL == address) { return OMPI_ERRHANDLER_INVOKE(MPI_COMM_WORLD, MPI_ERR_ARG, FUNC_NAME); } } diff --git a/ompi/mpi/c/get_count.c b/ompi/mpi/c/get_count.c index 2994f32f7c6..c6dd07103b3 100644 --- a/ompi/mpi/c/get_count.c +++ b/ompi/mpi/c/get_count.c @@ -6,17 +6,19 @@ * Copyright (c) 2004-2010 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2010 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2013 Los Alamos National Security, LLC. All rights * reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ #include "ompi_config.h" @@ -30,12 +32,11 @@ #include "ompi/datatype/ompi_datatype.h" #include "ompi/memchecker.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_Get_count = PMPI_Get_count #endif - -#if OMPI_PROFILING_DEFINES -#include "ompi/mpi/c/profile/defines.h" +#define MPI_Get_count PMPI_Get_count #endif static const char FUNC_NAME[] = "MPI_Get_count"; @@ -46,8 +47,6 @@ int MPI_Get_count(const MPI_Status *status, MPI_Datatype datatype, int *count) size_t size = 0, internal_count; int rc = MPI_SUCCESS; - OPAL_CR_NOOP_PROGRESS(); - MEMCHECKER( if (status != MPI_STATUSES_IGNORE) { /* diff --git a/ompi/mpi/c/get_elements.c b/ompi/mpi/c/get_elements.c index 76fd3c3b6e6..ade78ef2ade 100644 --- a/ompi/mpi/c/get_elements.c +++ b/ompi/mpi/c/get_elements.c @@ -6,16 +6,18 @@ * Copyright (c) 2004-2010 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2013 Los Alamos National Security, LLC. All rights * reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -30,12 +32,11 @@ #include "ompi/datatype/ompi_datatype.h" #include "ompi/memchecker.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_Get_elements = PMPI_Get_elements #endif - -#if OMPI_PROFILING_DEFINES -#include "ompi/mpi/c/profile/defines.h" +#define MPI_Get_elements PMPI_Get_elements #endif static const char FUNC_NAME[] = "MPI_Get_elements"; @@ -45,8 +46,6 @@ int MPI_Get_elements(const MPI_Status *status, MPI_Datatype datatype, int *count size_t internal_count; int ret; - OPAL_CR_NOOP_PROGRESS(); - MEMCHECKER( if (status != MPI_STATUSES_IGNORE) { /* @@ -62,7 +61,7 @@ int MPI_Get_elements(const MPI_Status *status, MPI_Datatype datatype, int *count if (MPI_PARAM_CHECK) { int err = MPI_SUCCESS; OMPI_ERR_INIT_FINALIZE(FUNC_NAME); - if (NULL == status || MPI_STATUSES_IGNORE == status || + if (NULL == status || MPI_STATUSES_IGNORE == status || MPI_STATUS_IGNORE == status || NULL == count) { err = MPI_ERR_ARG; } else if (NULL == datatype || MPI_DATATYPE_NULL == datatype) { diff --git a/ompi/mpi/c/get_elements_x.c b/ompi/mpi/c/get_elements_x.c index 6cd2fc19be6..880938dd817 100644 --- a/ompi/mpi/c/get_elements_x.c +++ b/ompi/mpi/c/get_elements_x.c @@ -6,16 +6,18 @@ * Copyright (c) 2004-2010 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2013 Los Alamos National Security, LLC. All rights * reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -30,12 +32,11 @@ #include "ompi/datatype/ompi_datatype.h" #include "ompi/memchecker.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_Get_elements_x = PMPI_Get_elements_x #endif - -#if OMPI_PROFILING_DEFINES -#include "ompi/mpi/c/profile/defines.h" +#define MPI_Get_elements_x PMPI_Get_elements_x #endif static const char FUNC_NAME[] = "MPI_Get_elements_x"; @@ -45,8 +46,6 @@ int MPI_Get_elements_x(const MPI_Status *status, MPI_Datatype datatype, MPI_Coun size_t internal_count; int ret; - OPAL_CR_NOOP_PROGRESS(); - MEMCHECKER( if (status != MPI_STATUSES_IGNORE) { /* @@ -62,7 +61,7 @@ int MPI_Get_elements_x(const MPI_Status *status, MPI_Datatype datatype, MPI_Coun if (MPI_PARAM_CHECK) { int err = MPI_SUCCESS; OMPI_ERR_INIT_FINALIZE(FUNC_NAME); - if (NULL == status || MPI_STATUSES_IGNORE == status || + if (NULL == status || MPI_STATUSES_IGNORE == status || MPI_STATUS_IGNORE == status || NULL == count) { err = MPI_ERR_ARG; } else if (NULL == datatype || MPI_DATATYPE_NULL == datatype) { diff --git a/ompi/mpi/c/get_library_version.c b/ompi/mpi/c/get_library_version.c index 9bf5decc556..a225b329d98 100644 --- a/ompi/mpi/c/get_library_version.c +++ b/ompi/mpi/c/get_library_version.c @@ -5,15 +5,17 @@ * Copyright (c) 2004-2009 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. - * Copyright (c) 2014 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2014-2015 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ #include "ompi_config.h" @@ -24,30 +26,27 @@ #include "ompi/communicator/communicator.h" #include "ompi/errhandler/errhandler.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_Get_library_version = PMPI_Get_library_version #endif - -#if OMPI_PROFILING_DEFINES -#include "ompi/mpi/c/profile/defines.h" +#define MPI_Get_library_version PMPI_Get_library_version #endif static const char FUNC_NAME[] = "MPI_Get_library_version"; -int MPI_Get_library_version(char *version, int *resultlen) +int MPI_Get_library_version(char *version, int *resultlen) { int len_left; MPI_Comm null = MPI_COMM_NULL; char *ptr, tmp[MPI_MAX_LIBRARY_VERSION_STRING]; - OPAL_CR_NOOP_PROGRESS(); - if (MPI_PARAM_CHECK) { /* Per MPI-3, this function can be invoked before MPI_INIT, so we don't invoke the normal MPI_ERR_INIT_FINALIZE() macro here */ - + if (NULL == version || NULL == resultlen) { /* Note that we have to check and see if we have previously called MPI_INIT or not. If so, use the @@ -74,16 +73,11 @@ int MPI_Get_library_version(char *version, int *resultlen) len_left = sizeof(tmp); memset(tmp, 0, MPI_MAX_LIBRARY_VERSION_STRING); - snprintf(tmp, MPI_MAX_LIBRARY_VERSION_STRING, "Open MPI v%d.%d", - OMPI_MAJOR_VERSION, OMPI_MINOR_VERSION); + snprintf(tmp, MPI_MAX_LIBRARY_VERSION_STRING, "Open MPI v%d.%d.%d", + OMPI_MAJOR_VERSION, OMPI_MINOR_VERSION, OMPI_RELEASE_VERSION); ptr += strlen(tmp); len_left -= strlen(tmp); - if (OMPI_RELEASE_VERSION > 0) { - snprintf(ptr, len_left, ".%d", OMPI_RELEASE_VERSION); - ptr = tmp + strlen(tmp); - len_left = MPI_MAX_LIBRARY_VERSION_STRING - strlen(tmp); - } if (NULL != OMPI_GREEK_VERSION && strlen(OMPI_GREEK_VERSION) > 0) { snprintf(ptr, len_left, "%s", OMPI_GREEK_VERSION); ptr = tmp + strlen(tmp); @@ -116,7 +110,7 @@ int MPI_Get_library_version(char *version, int *resultlen) snprintf(ptr, len_left, ", %s", OMPI_RELEASE_DATE); ptr = tmp + strlen(tmp); len_left = MPI_MAX_LIBRARY_VERSION_STRING - strlen(tmp); - } + } memcpy(version, tmp, strlen(tmp) + 1); *resultlen = strlen(tmp) + 1; diff --git a/ompi/mpi/c/get_processor_name.c b/ompi/mpi/c/get_processor_name.c index 1a4a5682e07..280566da843 100644 --- a/ompi/mpi/c/get_processor_name.c +++ b/ompi/mpi/c/get_processor_name.c @@ -5,15 +5,17 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2006-2008 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ #include "ompi_config.h" @@ -28,29 +30,26 @@ #include "ompi/communicator/communicator.h" #include "ompi/errhandler/errhandler.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_Get_processor_name = PMPI_Get_processor_name #endif - -#if OMPI_PROFILING_DEFINES -#include "ompi/mpi/c/profile/defines.h" +#define MPI_Get_processor_name PMPI_Get_processor_name #endif static const char FUNC_NAME[] = "MPI_Get_processor_name"; -int MPI_Get_processor_name(char *name, int *resultlen) +int MPI_Get_processor_name(char *name, int *resultlen) { - OPAL_CR_NOOP_PROGRESS(); - if ( MPI_PARAM_CHECK) { OMPI_ERR_INIT_FINALIZE(FUNC_NAME); if ( NULL == name ) { - return OMPI_ERRHANDLER_INVOKE(MPI_COMM_WORLD, MPI_ERR_ARG, + return OMPI_ERRHANDLER_INVOKE(MPI_COMM_WORLD, MPI_ERR_ARG, FUNC_NAME); } if ( NULL == resultlen ) { - return OMPI_ERRHANDLER_INVOKE(MPI_COMM_WORLD, MPI_ERR_ARG, + return OMPI_ERRHANDLER_INVOKE(MPI_COMM_WORLD, MPI_ERR_ARG, FUNC_NAME); } } @@ -65,7 +64,7 @@ int MPI_Get_processor_name(char *name, int *resultlen) Guard against gethostname() returning a *really long* hostname and not null-terminating the string. The Fortran API version will pad to the right if necessary. */ - gethostname(name, MPI_MAX_PROCESSOR_NAME - 1); + gethostname(name, (MPI_MAX_PROCESSOR_NAME - 1)); name[MPI_MAX_PROCESSOR_NAME - 1] = '\0'; *resultlen = (int) strlen(name); diff --git a/ompi/mpi/c/get_version.c b/ompi/mpi/c/get_version.c index b5957e12908..5c33086520a 100644 --- a/ompi/mpi/c/get_version.c +++ b/ompi/mpi/c/get_version.c @@ -5,14 +5,16 @@ * Copyright (c) 2004-2009 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ #include "ompi_config.h" @@ -23,28 +25,25 @@ #include "ompi/communicator/communicator.h" #include "ompi/errhandler/errhandler.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_Get_version = PMPI_Get_version #endif - -#if OMPI_PROFILING_DEFINES -#include "ompi/mpi/c/profile/defines.h" +#define MPI_Get_version PMPI_Get_version #endif static const char FUNC_NAME[] = "MPI_Get_version"; -int MPI_Get_version(int *version, int *subversion) +int MPI_Get_version(int *version, int *subversion) { MPI_Comm null = NULL; - OPAL_CR_NOOP_PROGRESS(); - if (MPI_PARAM_CHECK) { /* Per MPI-2:3.1, this function can be invoked before MPI_INIT, so we don't invoke the normal MPI_ERR_INIT_FINALIZE() macro here */ - + if (NULL == version || NULL == subversion) { /* Note that we have to check and see if we have previously called MPI_INIT or not. If so, use the diff --git a/ompi/mpi/c/graph_create.c b/ompi/mpi/c/graph_create.c index 1221368cb09..91a7ecccc8f 100644 --- a/ompi/mpi/c/graph_create.c +++ b/ompi/mpi/c/graph_create.c @@ -6,18 +6,20 @@ * Copyright (c) 2004-2013 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2007-2012 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2012-2013 Los Alamos National Security, LLC. All rights - * reserved. + * reserved. * Copyright (c) 2012-2013 Inria. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ #include "ompi_config.h" @@ -30,12 +32,11 @@ #include "ompi/mca/topo/base/base.h" #include "ompi/memchecker.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_Graph_create = PMPI_Graph_create #endif - -#if OMPI_PROFILING_DEFINES -#include "ompi/mpi/c/profile/defines.h" +#define MPI_Graph_create PMPI_Graph_create #endif static const char FUNC_NAME[] = "MPI_Graph_create"; @@ -58,7 +59,7 @@ int MPI_Graph_create(MPI_Comm old_comm, int nnodes, const int indx[], return OMPI_ERRHANDLER_INVOKE (MPI_COMM_WORLD, MPI_ERR_COMM, FUNC_NAME); } else if (OMPI_COMM_IS_INTER(old_comm)) { - return OMPI_ERRHANDLER_INVOKE(MPI_COMM_WORLD, MPI_ERR_COMM, + return OMPI_ERRHANDLER_INVOKE(MPI_COMM_WORLD, MPI_ERR_COMM, FUNC_NAME); } if (nnodes < 0) { @@ -86,10 +87,9 @@ int MPI_Graph_create(MPI_Comm old_comm, int nnodes, const int indx[], FUNC_NAME); } - OPAL_CR_ENTER_LIBRARY(); - /* - * everything seems to be alright with the communicator, we can go - * ahead and select a topology module for this purpose and create + /* + * everything seems to be alright with the communicator, we can go + * ahead and select a topology module for this purpose and create * the new graph communicator */ if (OMPI_SUCCESS != (err = mca_topo_base_comm_select(old_comm, @@ -99,18 +99,15 @@ int MPI_Graph_create(MPI_Comm old_comm, int nnodes, const int indx[], return err; } - /* Now let that topology module rearrange procs/ranks if it wants to */ - /* XXX -- CONST -- do not cast away const -- update mca/topo */ + /* Now let that topology module rearrange procs/ranks if it wants to */ err = topo->topo.graph.graph_create(topo, old_comm, - nnodes, (int *) indx, (int *) edges, + nnodes, indx, edges, (0 == reorder) ? false : true, comm_graph); - OPAL_CR_EXIT_LIBRARY(); - if (MPI_SUCCESS != err) { OBJ_RELEASE(topo); return OMPI_ERRHANDLER_INVOKE(old_comm, err, FUNC_NAME); } - + /* All done */ return MPI_SUCCESS; } diff --git a/ompi/mpi/c/graph_get.c b/ompi/mpi/c/graph_get.c index 1768932baf8..c96d5027d42 100644 --- a/ompi/mpi/c/graph_get.c +++ b/ompi/mpi/c/graph_get.c @@ -5,17 +5,19 @@ * Copyright (c) 2004-2013 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2007-2012 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2012 Los Alamos Nat Security, LLC. All rights reserved. * Copyright (c) 2012-2013 Inria. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ #include "ompi_config.h" @@ -28,19 +30,18 @@ #include "ompi/mca/topo/topo.h" #include "ompi/memchecker.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_Graph_get = PMPI_Graph_get #endif - -#if OMPI_PROFILING_DEFINES -#include "ompi/mpi/c/profile/defines.h" +#define MPI_Graph_get PMPI_Graph_get #endif static const char FUNC_NAME[] = "MPI_Graph_get"; int MPI_Graph_get(MPI_Comm comm, int maxindx, int maxedges, - int indx[], int edges[]) + int indx[], int edges[]) { int err; @@ -68,11 +69,9 @@ int MPI_Graph_get(MPI_Comm comm, int maxindx, int maxedges, return OMPI_ERRHANDLER_INVOKE (comm, MPI_ERR_TOPOLOGY, FUNC_NAME); } - OPAL_CR_ENTER_LIBRARY(); /* call the function */ err = comm->c_topo->topo.graph.graph_get(comm, maxindx, maxedges, indx, edges); - OPAL_CR_EXIT_LIBRARY(); OMPI_ERRHANDLER_RETURN(err, comm, err, FUNC_NAME); } diff --git a/ompi/mpi/c/graph_map.c b/ompi/mpi/c/graph_map.c index 769c4a404d8..a208bc840ed 100644 --- a/ompi/mpi/c/graph_map.c +++ b/ompi/mpi/c/graph_map.c @@ -6,18 +6,20 @@ * Copyright (c) 2004-2013 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2007-2012 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2012-2013 Los Alamos National Security, LLC. All rights - * reserved. + * reserved. * Copyright (c) 2012-2013 Inria. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ #include "ompi_config.h" @@ -30,19 +32,18 @@ #include "ompi/mca/topo/topo.h" #include "ompi/memchecker.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_Graph_map = PMPI_Graph_map #endif - -#if OMPI_PROFILING_DEFINES -#include "ompi/mpi/c/profile/defines.h" +#define MPI_Graph_map PMPI_Graph_map #endif static const char FUNC_NAME[] = "MPI_Graph_map"; int MPI_Graph_map(MPI_Comm comm, int nnodes, const int indx[], const int edges[], - int *newrank) + int *newrank) { int err = MPI_SUCCESS; @@ -67,18 +68,15 @@ int MPI_Graph_map(MPI_Comm comm, int nnodes, const int indx[], const int edges[] } } - OPAL_CR_ENTER_LIBRARY(); if(!OMPI_COMM_IS_GRAPH(comm)) { - /* In case the communicator has no topo-module attached to + /* In case the communicator has no topo-module attached to it, we just return the "default" value suggested by MPI: newrank = rank */ *newrank = ompi_comm_rank(comm); } else { - /* XXX -- CONST -- do not cast away const -- update mca/topo */ - err = comm->c_topo->topo.graph.graph_map(comm, nnodes, (int *) indx, (int *) edges, newrank); + err = comm->c_topo->topo.graph.graph_map(comm, nnodes, indx, edges, newrank); } - OPAL_CR_EXIT_LIBRARY(); OMPI_ERRHANDLER_RETURN(err, comm, err, FUNC_NAME); } diff --git a/ompi/mpi/c/graph_neighbors.c b/ompi/mpi/c/graph_neighbors.c index 1e47cc233e4..867a841baa0 100644 --- a/ompi/mpi/c/graph_neighbors.c +++ b/ompi/mpi/c/graph_neighbors.c @@ -5,17 +5,19 @@ * Copyright (c) 2004-2013 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2007 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2012 Los Alamos Nat Security, LLC. All rights reserved. * Copyright (c) 2012-2013 Inria. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ #include "ompi_config.h" @@ -28,19 +30,18 @@ #include "ompi/mca/topo/topo.h" #include "ompi/memchecker.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_Graph_neighbors = PMPI_Graph_neighbors #endif - -#if OMPI_PROFILING_DEFINES -#include "ompi/mpi/c/profile/defines.h" +#define MPI_Graph_neighbors PMPI_Graph_neighbors #endif static const char FUNC_NAME[] = "MPI_Graph_neighbors"; int MPI_Graph_neighbors(MPI_Comm comm, int rank, int maxneighbors, - int neighbors[]) + int neighbors[]) { int err; @@ -73,11 +74,9 @@ int MPI_Graph_neighbors(MPI_Comm comm, int rank, int maxneighbors, return OMPI_ERRHANDLER_INVOKE (comm, MPI_ERR_TOPOLOGY, FUNC_NAME); } - OPAL_CR_ENTER_LIBRARY(); /* call the function */ err = comm->c_topo->topo.graph.graph_neighbors(comm, rank, maxneighbors, neighbors); - OPAL_CR_EXIT_LIBRARY(); OMPI_ERRHANDLER_RETURN(err, comm, err, FUNC_NAME); } diff --git a/ompi/mpi/c/graph_neighbors_count.c b/ompi/mpi/c/graph_neighbors_count.c index ba8970b8c07..db1fb11dc7c 100644 --- a/ompi/mpi/c/graph_neighbors_count.c +++ b/ompi/mpi/c/graph_neighbors_count.c @@ -5,16 +5,18 @@ * Copyright (c) 2004-2013 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2007 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2012-2013 Inria. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ #include "ompi_config.h" @@ -27,18 +29,17 @@ #include "ompi/mca/topo/topo.h" #include "ompi/memchecker.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_Graph_neighbors_count = PMPI_Graph_neighbors_count #endif - -#if OMPI_PROFILING_DEFINES -#include "ompi/mpi/c/profile/defines.h" +#define MPI_Graph_neighbors_count PMPI_Graph_neighbors_count #endif static const char FUNC_NAME[] = "MPI_Graph_neighbors_count"; -int MPI_Graph_neighbors_count(MPI_Comm comm, int rank, int *nneighbors) +int MPI_Graph_neighbors_count(MPI_Comm comm, int rank, int *nneighbors) { int err; @@ -71,10 +72,8 @@ int MPI_Graph_neighbors_count(MPI_Comm comm, int rank, int *nneighbors) return OMPI_ERRHANDLER_INVOKE (comm, MPI_ERR_TOPOLOGY, FUNC_NAME); } - OPAL_CR_ENTER_LIBRARY(); err = comm->c_topo->topo.graph.graph_neighbors_count(comm, rank, nneighbors); - OPAL_CR_EXIT_LIBRARY(); OMPI_ERRHANDLER_RETURN(err, comm, err, FUNC_NAME); } diff --git a/ompi/mpi/c/graphdims_get.c b/ompi/mpi/c/graphdims_get.c index 66dcbe8a2f7..8af76da95ee 100644 --- a/ompi/mpi/c/graphdims_get.c +++ b/ompi/mpi/c/graphdims_get.c @@ -5,16 +5,18 @@ * Copyright (c) 2004-2013 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2007 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2012-2013 Inria. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ #include "ompi_config.h" @@ -27,18 +29,17 @@ #include "ompi/mca/topo/topo.h" #include "ompi/memchecker.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_Graphdims_get = PMPI_Graphdims_get #endif - -#if OMPI_PROFILING_DEFINES -#include "ompi/mpi/c/profile/defines.h" +#define MPI_Graphdims_get PMPI_Graphdims_get #endif static const char FUNC_NAME[] = "MPI_Graphdims_get"; -int MPI_Graphdims_get(MPI_Comm comm, int *nnodes, int *nedges) +int MPI_Graphdims_get(MPI_Comm comm, int *nnodes, int *nedges) { int err; @@ -67,10 +68,8 @@ int MPI_Graphdims_get(MPI_Comm comm, int *nnodes, int *nedges) return OMPI_ERRHANDLER_INVOKE (comm, MPI_ERR_TOPOLOGY, FUNC_NAME); } - OPAL_CR_ENTER_LIBRARY(); err = comm->c_topo->topo.graph.graphdims_get(comm, nnodes, nedges); - OPAL_CR_EXIT_LIBRARY(); OMPI_ERRHANDLER_RETURN(err, comm, err, FUNC_NAME); } diff --git a/ompi/mpi/c/grequest_complete.c b/ompi/mpi/c/grequest_complete.c index 25f3d3e2aac..cda1f9d432f 100644 --- a/ompi/mpi/c/grequest_complete.c +++ b/ompi/mpi/c/grequest_complete.c @@ -5,15 +5,17 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2006 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ #include "ompi_config.h" @@ -26,18 +28,17 @@ #include "ompi/request/grequest.h" #include "ompi/memchecker.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_Grequest_complete = PMPI_Grequest_complete #endif - -#if OMPI_PROFILING_DEFINES -#include "ompi/mpi/c/profile/defines.h" +#define MPI_Grequest_complete PMPI_Grequest_complete #endif static const char FUNC_NAME[] = "MPI_Grequest_complete"; -int MPI_Grequest_complete(MPI_Request request) +int MPI_Grequest_complete(MPI_Request request) { int rc = MPI_SUCCESS; @@ -55,8 +56,6 @@ int MPI_Grequest_complete(MPI_Request request) OMPI_ERRHANDLER_CHECK(rc, MPI_COMM_WORLD, rc, FUNC_NAME); } - OPAL_CR_ENTER_LIBRARY(); - rc = ompi_grequest_complete(request); OMPI_ERRHANDLER_RETURN(rc, MPI_COMM_WORLD, MPI_ERR_INTERN, FUNC_NAME); } diff --git a/ompi/mpi/c/grequest_start.c b/ompi/mpi/c/grequest_start.c index 9b867e4d313..aed597bb90b 100644 --- a/ompi/mpi/c/grequest_start.c +++ b/ompi/mpi/c/grequest_start.c @@ -5,15 +5,17 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2006 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ #include "ompi_config.h" @@ -25,12 +27,11 @@ #include "ompi/errhandler/errhandler.h" #include "ompi/request/grequest.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_Grequest_start = PMPI_Grequest_start #endif - -#if OMPI_PROFILING_DEFINES -#include "ompi/mpi/c/profile/defines.h" +#define MPI_Grequest_start PMPI_Grequest_start #endif static const char FUNC_NAME[] = "MPI_Grequest_start"; @@ -39,7 +40,7 @@ static const char FUNC_NAME[] = "MPI_Grequest_start"; int MPI_Grequest_start(MPI_Grequest_query_function *query_fn, MPI_Grequest_free_function *free_fn, MPI_Grequest_cancel_function *cancel_fn, - void *extra_state, MPI_Request *request) + void *extra_state, MPI_Request *request) { int rc; @@ -51,8 +52,6 @@ int MPI_Grequest_start(MPI_Grequest_query_function *query_fn, } } - OPAL_CR_ENTER_LIBRARY(); - rc = ompi_grequest_start(query_fn,free_fn,cancel_fn,extra_state,request); OMPI_ERRHANDLER_RETURN(rc, MPI_COMM_WORLD, rc, FUNC_NAME); } diff --git a/ompi/mpi/c/group_c2f.c b/ompi/mpi/c/group_c2f.c index b50cc6b74eb..0c4022548ff 100644 --- a/ompi/mpi/c/group_c2f.c +++ b/ompi/mpi/c/group_c2f.c @@ -5,15 +5,17 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2006-2012 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ #include "ompi_config.h" @@ -25,21 +27,18 @@ #include "ompi/mpi/fortran/base/fint_2_int.h" #include "ompi/group/group.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_Group_c2f = PMPI_Group_c2f #endif - -#if OMPI_PROFILING_DEFINES -#include "ompi/mpi/c/profile/defines.h" +#define MPI_Group_c2f PMPI_Group_c2f #endif static const char FUNC_NAME[] = "MPI_Group_c2f"; -MPI_Fint MPI_Group_c2f(MPI_Group group) +MPI_Fint MPI_Group_c2f(MPI_Group group) { - OPAL_CR_NOOP_PROGRESS(); - if ( MPI_PARAM_CHECK ) { OMPI_ERR_INIT_FINALIZE(FUNC_NAME); diff --git a/ompi/mpi/c/group_compare.c b/ompi/mpi/c/group_compare.c index 164b4d5a0b4..ef4a85cc01c 100644 --- a/ompi/mpi/c/group_compare.c +++ b/ompi/mpi/c/group_compare.c @@ -5,17 +5,19 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2006 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2009 University of Houston. All rights reserved. * Copyright (c) 2012 Oak Ridge National Labs. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ #include "ompi_config.h" @@ -28,12 +30,11 @@ #include "ompi/communicator/communicator.h" #include "ompi/proc/proc.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_Group_compare = PMPI_Group_compare #endif - -#if OMPI_PROFILING_DEFINES -#include "ompi/mpi/c/profile/defines.h" +#define MPI_Group_compare PMPI_Group_compare #endif static const char FUNC_NAME[] = "MPI_Group_compare"; @@ -56,8 +57,6 @@ int MPI_Group_compare(MPI_Group group1, MPI_Group group2, int *result) { } } - OPAL_CR_NOOP_PROGRESS(); - return_value = ompi_group_compare((ompi_group_t *)group1, (ompi_group_t *)group2, result); return return_value; diff --git a/ompi/mpi/c/group_difference.c b/ompi/mpi/c/group_difference.c index b9a8df11c6a..b28da0ed858 100644 --- a/ompi/mpi/c/group_difference.c +++ b/ompi/mpi/c/group_difference.c @@ -5,16 +5,18 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2006 University of Houston. All rights reserved. * Copyright (c) 2006 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ #include "ompi_config.h" @@ -26,12 +28,11 @@ #include "ompi/errhandler/errhandler.h" #include "ompi/group/group.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_Group_difference = PMPI_Group_difference #endif - -#if OMPI_PROFILING_DEFINES -#include "ompi/mpi/c/profile/defines.h" +#define MPI_Group_difference PMPI_Group_difference #endif static const char FUNC_NAME[] = "MPI_Group_difference"; @@ -53,8 +54,6 @@ int MPI_Group_difference(MPI_Group group1, MPI_Group group2, } } - OPAL_CR_ENTER_LIBRARY(); - err = ompi_group_difference ( group1, group2, new_group ); OMPI_ERRHANDLER_RETURN(err, MPI_COMM_WORLD, err, FUNC_NAME ); } diff --git a/ompi/mpi/c/group_excl.c b/ompi/mpi/c/group_excl.c index 0afb2ccfa91..3b8901960de 100644 --- a/ompi/mpi/c/group_excl.c +++ b/ompi/mpi/c/group_excl.c @@ -6,7 +6,7 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. @@ -14,9 +14,9 @@ * Copyright (c) 2006-2009 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2012-2013 Los Alamos Nat Security, LLC. All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ #include "ompi_config.h" @@ -28,19 +28,18 @@ #include "ompi/errhandler/errhandler.h" #include "ompi/communicator/communicator.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_Group_excl = PMPI_Group_excl #endif - -#if OMPI_PROFILING_DEFINES -#include "ompi/mpi/c/profile/defines.h" +#define MPI_Group_excl PMPI_Group_excl #endif static const char FUNC_NAME[] = "MPI_Group_excl"; int MPI_Group_excl(MPI_Group group, int n, const int ranks[], - MPI_Group *new_group) + MPI_Group *new_group) { ompi_group_t *group_pointer = (ompi_group_t *)group; int i, err, group_size; @@ -50,7 +49,7 @@ int MPI_Group_excl(MPI_Group group, int n, const int ranks[], OMPI_ERR_INIT_FINALIZE(FUNC_NAME); /* verify that group is valid group */ - if ( (MPI_GROUP_NULL == group) || (NULL == group) || + if ( (MPI_GROUP_NULL == group) || (NULL == group) || (NULL == new_group) ) { return OMPI_ERRHANDLER_INVOKE(MPI_COMM_WORLD, MPI_ERR_GROUP, FUNC_NAME); @@ -74,15 +73,13 @@ int MPI_Group_excl(MPI_Group group, int n, const int ranks[], } } /* end if( MPI_CHECK_ARGS) */ - + if ( n == group_size ) { *new_group = MPI_GROUP_EMPTY; OBJ_RETAIN(MPI_GROUP_EMPTY); return MPI_SUCCESS; } - OPAL_CR_ENTER_LIBRARY(); - err = ompi_group_excl ( group, n, ranks, new_group ); - OMPI_ERRHANDLER_RETURN(err, MPI_COMM_WORLD, err, FUNC_NAME ); + OMPI_ERRHANDLER_RETURN(err, MPI_COMM_WORLD, err, FUNC_NAME ); } diff --git a/ompi/mpi/c/group_f2c.c b/ompi/mpi/c/group_f2c.c index 0c3202652a0..d6ee5c68f50 100644 --- a/ompi/mpi/c/group_f2c.c +++ b/ompi/mpi/c/group_f2c.c @@ -6,15 +6,17 @@ * Copyright (c) 2004-2007 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2006-2012 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -26,12 +28,11 @@ #include "ompi/mpi/fortran/base/fint_2_int.h" #include "ompi/group/group.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_Group_f2c = PMPI_Group_f2c #endif - -#if OMPI_PROFILING_DEFINES -#include "ompi/mpi/c/profile/defines.h" +#define MPI_Group_f2c PMPI_Group_f2c #endif static const char FUNC_NAME[] = "MPI_Group_f2c"; @@ -41,8 +42,6 @@ MPI_Group MPI_Group_f2c(MPI_Fint group_f) { int group_index = OMPI_FINT_2_INT(group_f); - OPAL_CR_NOOP_PROGRESS(); - if (MPI_PARAM_CHECK) { OMPI_ERR_INIT_FINALIZE(FUNC_NAME); } diff --git a/ompi/mpi/c/group_free.c b/ompi/mpi/c/group_free.c index 374aac9156f..f8f46211133 100644 --- a/ompi/mpi/c/group_free.c +++ b/ompi/mpi/c/group_free.c @@ -5,15 +5,17 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2006 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ #include "ompi_config.h" @@ -25,12 +27,11 @@ #include "ompi/errhandler/errhandler.h" #include "ompi/group/group.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_Group_free = PMPI_Group_free #endif - -#if OMPI_PROFILING_DEFINES -#include "ompi/mpi/c/profile/defines.h" +#define MPI_Group_free PMPI_Group_free #endif static const char FUNC_NAME[] = "MPI_Group_free"; @@ -65,11 +66,9 @@ int MPI_Group_free(MPI_Group *group) } - OPAL_CR_ENTER_LIBRARY(); ret = ompi_group_free ( group); OMPI_ERRHANDLER_CHECK(ret, MPI_COMM_WORLD, ret, FUNC_NAME); - OPAL_CR_EXIT_LIBRARY(); return MPI_SUCCESS; } diff --git a/ompi/mpi/c/group_incl.c b/ompi/mpi/c/group_incl.c index baab6c5a4f7..21adfbcd156 100644 --- a/ompi/mpi/c/group_incl.c +++ b/ompi/mpi/c/group_incl.c @@ -6,7 +6,7 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. @@ -15,9 +15,9 @@ * Copyright (c) 2012-2013 Los Alamos National Security, LLC. All rights * reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ #include "ompi_config.h" @@ -29,12 +29,11 @@ #include "ompi/errhandler/errhandler.h" #include "ompi/communicator/communicator.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_Group_incl = PMPI_Group_incl #endif - -#if OMPI_PROFILING_DEFINES -#include "ompi/mpi/c/profile/defines.h" +#define MPI_Group_incl PMPI_Group_incl #endif static const char FUNC_NAME[] = "MPI_Group_incl"; @@ -69,20 +68,18 @@ int MPI_Group_incl(MPI_Group group, int n, const int ranks[], MPI_Group *new_gro for (i = 0; i < n; i++) { if ((ranks[i] < 0) || (ranks[i] >= group_size)){ - return OMPI_ERRHANDLER_INVOKE (MPI_COMM_WORLD, MPI_ERR_RANK, + return OMPI_ERRHANDLER_INVOKE (MPI_COMM_WORLD, MPI_ERR_RANK, FUNC_NAME); } } } /* end if( MPI_CHECK_ARGS) */ - + if ( 0 == n ) { *new_group = MPI_GROUP_EMPTY; OBJ_RETAIN(MPI_GROUP_EMPTY); return MPI_SUCCESS; } - OPAL_CR_ENTER_LIBRARY(); - err = ompi_group_incl(group,n,ranks,new_group); OMPI_ERRHANDLER_RETURN(err, MPI_COMM_WORLD,err,FUNC_NAME); } diff --git a/ompi/mpi/c/group_intersection.c b/ompi/mpi/c/group_intersection.c index 71828287684..9be11908f8c 100644 --- a/ompi/mpi/c/group_intersection.c +++ b/ompi/mpi/c/group_intersection.c @@ -5,16 +5,18 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2006 University of Houston. All rights reserved. * Copyright (c) 2006 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ #include "ompi_config.h" @@ -26,25 +28,24 @@ #include "ompi/errhandler/errhandler.h" #include "ompi/communicator/communicator.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_Group_intersection = PMPI_Group_intersection #endif - -#if OMPI_PROFILING_DEFINES -#include "ompi/mpi/c/profile/defines.h" +#define MPI_Group_intersection PMPI_Group_intersection #endif static const char FUNC_NAME[] = "MPI_Group_intersection"; int MPI_Group_intersection(MPI_Group group1, MPI_Group group2, - MPI_Group *new_group) + MPI_Group *new_group) { int err; if ( MPI_PARAM_CHECK ) { OMPI_ERR_INIT_FINALIZE(FUNC_NAME); - + /* verify that groups are valid */ if ( (MPI_GROUP_NULL == group1) || (MPI_GROUP_NULL == group2) || ( NULL == group1) || (NULL == group2) || @@ -54,8 +55,6 @@ int MPI_Group_intersection(MPI_Group group1, MPI_Group group2, } } - OPAL_CR_ENTER_LIBRARY(); - err = ompi_group_intersection ( group1, group2, new_group ); OMPI_ERRHANDLER_RETURN(err, MPI_COMM_WORLD, err, FUNC_NAME ); } diff --git a/ompi/mpi/c/group_range_excl.c b/ompi/mpi/c/group_range_excl.c index adc674fefae..1b6c6a8f326 100644 --- a/ompi/mpi/c/group_range_excl.c +++ b/ompi/mpi/c/group_range_excl.c @@ -5,16 +5,18 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2006 University of Houston. All rights reserved. * Copyright (c) 2006-2012 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ #include "ompi_config.h" @@ -26,36 +28,35 @@ #include "ompi/errhandler/errhandler.h" #include "ompi/communicator/communicator.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_Group_range_excl = PMPI_Group_range_excl #endif - -#if OMPI_PROFILING_DEFINES -#include "ompi/mpi/c/profile/defines.h" +#define MPI_Group_range_excl PMPI_Group_range_excl #endif static const char FUNC_NAME[] = "MPI_Group_range_excl"; int MPI_Group_range_excl(MPI_Group group, int n_triplets, int ranges[][3], - MPI_Group *new_group) + MPI_Group *new_group) { int err, i, group_size, indx; int * elements_int_list; - + /* can't act on NULL group */ if( MPI_PARAM_CHECK ) { OMPI_ERR_INIT_FINALIZE(FUNC_NAME); if ( (MPI_GROUP_NULL == group) || (NULL == group) || (NULL == new_group) ) { - return OMPI_ERRHANDLER_INVOKE(MPI_COMM_WORLD, MPI_ERR_GROUP, + return OMPI_ERRHANDLER_INVOKE(MPI_COMM_WORLD, MPI_ERR_GROUP, FUNC_NAME); } group_size = ompi_group_size ( group ); elements_int_list = (int *) malloc(sizeof(int) * (group_size+1)); if (NULL == elements_int_list) { - return OMPI_ERRHANDLER_INVOKE(MPI_COMM_WORLD, MPI_ERR_OTHER, + return OMPI_ERRHANDLER_INVOKE(MPI_COMM_WORLD, MPI_ERR_OTHER, FUNC_NAME); } for (i = 0; i < group_size; i++) { @@ -85,7 +86,7 @@ int MPI_Group_range_excl(MPI_Group group, int n_triplets, int ranges[][3], } elements_int_list[indx] = i; } - } else if (ranges[i][0] > ranges[i][1]) { + } else if (ranges[i][0] > ranges[i][1]) { if (ranges[i][2] > 0) { goto error_rank; } @@ -106,12 +107,10 @@ int MPI_Group_range_excl(MPI_Group group, int n_triplets, int ranges[][3], elements_int_list[indx] = i; } } - + free (elements_int_list); } - OPAL_CR_ENTER_LIBRARY(); - err = ompi_group_range_excl(group,n_triplets,ranges,new_group); OMPI_ERRHANDLER_RETURN(err, MPI_COMM_WORLD,err,FUNC_NAME); diff --git a/ompi/mpi/c/group_range_incl.c b/ompi/mpi/c/group_range_incl.c index 1d7d9ab6b5d..8dcadbd1d15 100644 --- a/ompi/mpi/c/group_range_incl.c +++ b/ompi/mpi/c/group_range_incl.c @@ -5,16 +5,18 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2006 University of Houston. All rights reserved. * Copyright (c) 2006-2012 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ #include "ompi_config.h" @@ -26,19 +28,18 @@ #include "ompi/errhandler/errhandler.h" #include "ompi/communicator/communicator.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_Group_range_incl = PMPI_Group_range_incl #endif - -#if OMPI_PROFILING_DEFINES -#include "ompi/mpi/c/profile/defines.h" +#define MPI_Group_range_incl PMPI_Group_range_incl #endif static const char FUNC_NAME[] = "MPI_Group_range_incl"; int MPI_Group_range_incl(MPI_Group group, int n_triplets, int ranges[][3], - MPI_Group *new_group) + MPI_Group *new_group) { int err, i,indx; int group_size; @@ -47,7 +48,7 @@ int MPI_Group_range_incl(MPI_Group group, int n_triplets, int ranges[][3], /* can't act on NULL group */ if( MPI_PARAM_CHECK ) { OMPI_ERR_INIT_FINALIZE(FUNC_NAME); - + if ( (MPI_GROUP_NULL == group) || (NULL == group) || (NULL == new_group) ) { return OMPI_ERRHANDLER_INVOKE(MPI_COMM_WORLD, MPI_ERR_GROUP, @@ -111,8 +112,6 @@ int MPI_Group_range_incl(MPI_Group group, int n_triplets, int ranges[][3], free ( elements_int_list); } - OPAL_CR_ENTER_LIBRARY(); - err = ompi_group_range_incl ( group, n_triplets, ranges, new_group ); OMPI_ERRHANDLER_RETURN(err, MPI_COMM_WORLD, err, FUNC_NAME ); diff --git a/ompi/mpi/c/group_rank.c b/ompi/mpi/c/group_rank.c index 44408af6cda..d88f1c6a35d 100644 --- a/ompi/mpi/c/group_rank.c +++ b/ompi/mpi/c/group_rank.c @@ -5,15 +5,17 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2006 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ #include "ompi_config.h" @@ -25,21 +27,18 @@ #include "ompi/errhandler/errhandler.h" #include "ompi/group/group.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_Group_rank = PMPI_Group_rank #endif - -#if OMPI_PROFILING_DEFINES -#include "ompi/mpi/c/profile/defines.h" +#define MPI_Group_rank PMPI_Group_rank #endif static const char FUNC_NAME[] = "MPI_Group_rank"; -int MPI_Group_rank(MPI_Group group, int *rank) +int MPI_Group_rank(MPI_Group group, int *rank) { - OPAL_CR_NOOP_PROGRESS(); - /* error checking */ if( MPI_PARAM_CHECK ) { OMPI_ERR_INIT_FINALIZE(FUNC_NAME); diff --git a/ompi/mpi/c/group_size.c b/ompi/mpi/c/group_size.c index d0e59d96bb0..ba62cbf453a 100644 --- a/ompi/mpi/c/group_size.c +++ b/ompi/mpi/c/group_size.c @@ -5,15 +5,17 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2006 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ #include "ompi_config.h" @@ -25,22 +27,19 @@ #include "ompi/errhandler/errhandler.h" #include "ompi/group/group.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_Group_size = PMPI_Group_size #endif - -#if OMPI_PROFILING_DEFINES -#include "ompi/mpi/c/profile/defines.h" +#define MPI_Group_size PMPI_Group_size #endif static const char FUNC_NAME[] = "MPI_Group_size"; -int MPI_Group_size(MPI_Group group, int *size) +int MPI_Group_size(MPI_Group group, int *size) { - OPAL_CR_NOOP_PROGRESS(); - /* error checking */ if( MPI_PARAM_CHECK ) { OMPI_ERR_INIT_FINALIZE(FUNC_NAME); diff --git a/ompi/mpi/c/group_translate_ranks.c b/ompi/mpi/c/group_translate_ranks.c index 00ed1e06406..ecebaf1b739 100644 --- a/ompi/mpi/c/group_translate_ranks.c +++ b/ompi/mpi/c/group_translate_ranks.c @@ -6,17 +6,19 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2009 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2012-2013 Los Alamos National Security, LLC. All rights * reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ #include "ompi_config.h" @@ -28,19 +30,18 @@ #include "ompi/errhandler/errhandler.h" #include "ompi/group/group.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_Group_translate_ranks = PMPI_Group_translate_ranks #endif - -#if OMPI_PROFILING_DEFINES -#include "ompi/mpi/c/profile/defines.h" +#define MPI_Group_translate_ranks PMPI_Group_translate_ranks #endif static const char FUNC_NAME[] = "MPI_Group_translate_ranks"; int MPI_Group_translate_ranks(MPI_Group group1, int n_ranks, const int ranks1[], - MPI_Group group2, int ranks2[]) + MPI_Group group2, int ranks2[]) { int err; @@ -50,15 +51,15 @@ int MPI_Group_translate_ranks(MPI_Group group1, int n_ranks, const int ranks1[], if ((MPI_GROUP_NULL == group1) || (MPI_GROUP_NULL == group2) || (NULL == group1) || (NULL == group2)) { - return OMPI_ERRHANDLER_INVOKE(MPI_COMM_WORLD, MPI_ERR_GROUP, + return OMPI_ERRHANDLER_INVOKE(MPI_COMM_WORLD, MPI_ERR_GROUP, FUNC_NAME); } if (n_ranks < 0) { - return OMPI_ERRHANDLER_INVOKE(MPI_COMM_WORLD, MPI_ERR_GROUP, + return OMPI_ERRHANDLER_INVOKE(MPI_COMM_WORLD, MPI_ERR_GROUP, FUNC_NAME); } if (n_ranks > 0 && ((NULL == ranks1) || (NULL == ranks2 ))) { - return OMPI_ERRHANDLER_INVOKE(MPI_COMM_WORLD, MPI_ERR_GROUP, + return OMPI_ERRHANDLER_INVOKE(MPI_COMM_WORLD, MPI_ERR_GROUP, FUNC_NAME); } } @@ -67,8 +68,6 @@ int MPI_Group_translate_ranks(MPI_Group group1, int n_ranks, const int ranks1[], return MPI_SUCCESS; } - OPAL_CR_ENTER_LIBRARY(); - err = ompi_group_translate_ranks ( group1, n_ranks, ranks1, group2, ranks2 ); OMPI_ERRHANDLER_RETURN(err, MPI_COMM_WORLD, err, FUNC_NAME ); diff --git a/ompi/mpi/c/group_union.c b/ompi/mpi/c/group_union.c index 7adaaf6d628..16df6dd452f 100644 --- a/ompi/mpi/c/group_union.c +++ b/ompi/mpi/c/group_union.c @@ -5,16 +5,18 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2006 University of Houston. All rights reserved. * Copyright (c) 2006 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ #include "ompi_config.h" @@ -26,18 +28,17 @@ #include "ompi/errhandler/errhandler.h" #include "ompi/communicator/communicator.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_Group_union = PMPI_Group_union #endif - -#if OMPI_PROFILING_DEFINES -#include "ompi/mpi/c/profile/defines.h" +#define MPI_Group_union PMPI_Group_union #endif static const char FUNC_NAME[] = "MPI_Group_union"; -int MPI_Group_union(MPI_Group group1, MPI_Group group2, MPI_Group *new_group) +int MPI_Group_union(MPI_Group group1, MPI_Group group2, MPI_Group *new_group) { int err; @@ -48,14 +49,12 @@ int MPI_Group_union(MPI_Group group1, MPI_Group group2, MPI_Group *new_group) if ((MPI_GROUP_NULL == group1) || (MPI_GROUP_NULL == group2) || (NULL == group1) || (NULL == group2) || (NULL == new_group)) { - return + return OMPI_ERRHANDLER_INVOKE(MPI_COMM_WORLD, MPI_ERR_GROUP, FUNC_NAME); } } - OPAL_CR_ENTER_LIBRARY(); - err = ompi_group_union ( group1, group2, new_group ); OMPI_ERRHANDLER_RETURN(err, MPI_COMM_WORLD, err, FUNC_NAME ); } diff --git a/ompi/mpi/c/iallgather.c b/ompi/mpi/c/iallgather.c index f9bec0ee422..490d76d130c 100644 --- a/ompi/mpi/c/iallgather.c +++ b/ompi/mpi/c/iallgather.c @@ -6,7 +6,7 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. @@ -14,11 +14,12 @@ * Copyright (c) 2012 Oak Ridge National Laboratory. All rights reserved. * Copyright (c) 2013 Los Alamos National Security, LLC. All rights * reserved. - + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -32,12 +33,11 @@ #include "ompi/datatype/ompi_datatype.h" #include "ompi/memchecker.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_Iallgather = PMPI_Iallgather #endif - -#if OMPI_PROFILING_DEFINES -#include "ompi/mpi/c/profile/defines.h" +#define MPI_Iallgather PMPI_Iallgather #endif static const char FUNC_NAME[] = "MPI_Iallgather"; @@ -60,8 +60,8 @@ int MPI_Iallgather(const void *sendbuf, int sendcount, MPI_Datatype sendtype, memchecker_comm(comm); /* check whether the actual send buffer is defined. */ if (MPI_IN_PLACE == sendbuf) { - memchecker_call(&opal_memchecker_base_isdefined, - (char *)(recvbuf)+rank*ext, + memchecker_call(&opal_memchecker_base_isdefined, + (char *)(recvbuf)+rank*ext, recvcount, recvtype); } else { memchecker_datatype(sendtype); @@ -92,11 +92,8 @@ int MPI_Iallgather(const void *sendbuf, int sendcount, MPI_Datatype sendtype, OMPI_ERRHANDLER_CHECK(err, comm, err, FUNC_NAME); } - OPAL_CR_ENTER_LIBRARY(); - /* Invoke the coll component to perform the back-end operation */ - /* XXX -- CONST -- do not cast away const -- update mca/coll */ - err = comm->c_coll.coll_iallgather((void *) sendbuf, sendcount, sendtype, + err = comm->c_coll.coll_iallgather(sendbuf, sendcount, sendtype, recvbuf, recvcount, recvtype, comm, request, comm->c_coll.coll_iallgather_module); diff --git a/ompi/mpi/c/iallgatherv.c b/ompi/mpi/c/iallgatherv.c index e8f4efba061..4c6cc0f72fe 100644 --- a/ompi/mpi/c/iallgatherv.c +++ b/ompi/mpi/c/iallgatherv.c @@ -6,18 +6,20 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2010 University of Houston. All rights reserved. * Copyright (c) 2012 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2012-2013 Los Alamos National Security, LLC. All rights - * reserved. + * reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -31,12 +33,11 @@ #include "ompi/datatype/ompi_datatype.h" #include "ompi/memchecker.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_Iallgatherv = PMPI_Iallgatherv #endif - -#if OMPI_PROFILING_DEFINES -#include "ompi/mpi/c/profile/defines.h" +#define MPI_Iallgatherv PMPI_Iallgatherv #endif static const char FUNC_NAME[] = "MPI_Iallgatherv"; @@ -64,7 +65,7 @@ int MPI_Iallgatherv(const void *sendbuf, int sendcount, MPI_Datatype sendtype, (char *)(recvbuf)+displs[i]*ext, recvcounts[i], recvtype); } - + /* check whether the actual send buffer is defined. */ if (MPI_IN_PLACE == sendbuf) { memchecker_call(&opal_memchecker_base_isdefined, @@ -84,7 +85,7 @@ int MPI_Iallgatherv(const void *sendbuf, int sendcount, MPI_Datatype sendtype, err = MPI_SUCCESS; OMPI_ERR_INIT_FINALIZE(FUNC_NAME); if (ompi_comm_invalid(comm)) { - return OMPI_ERRHANDLER_INVOKE(MPI_COMM_WORLD, MPI_ERR_COMM, + return OMPI_ERRHANDLER_INVOKE(MPI_COMM_WORLD, MPI_ERR_COMM, FUNC_NAME); } else if (MPI_IN_PLACE == recvbuf) { return OMPI_ERRHANDLER_INVOKE(comm, MPI_ERR_ARG, FUNC_NAME); @@ -108,18 +109,15 @@ int MPI_Iallgatherv(const void *sendbuf, int sendcount, MPI_Datatype sendtype, return OMPI_ERRHANDLER_INVOKE(comm, MPI_ERR_COUNT, FUNC_NAME); } } - + if (NULL == displs) { return OMPI_ERRHANDLER_INVOKE(comm, MPI_ERR_BUFFER, FUNC_NAME); } } - OPAL_CR_ENTER_LIBRARY(); - /* Invoke the coll component to perform the back-end operation */ - /* XXX -- CONST -- do not cast away const -- update mca/coll */ - err = comm->c_coll.coll_iallgatherv((void *) sendbuf, sendcount, sendtype, - recvbuf, (int *) recvcounts, (int *) displs, + err = comm->c_coll.coll_iallgatherv(sendbuf, sendcount, sendtype, + recvbuf, recvcounts, displs, recvtype, comm, request, comm->c_coll.coll_iallgatherv_module); OMPI_ERRHANDLER_RETURN(err, comm, err, FUNC_NAME); diff --git a/ompi/mpi/c/iallreduce.c b/ompi/mpi/c/iallreduce.c index 218eed3dcf1..ed07838703e 100644 --- a/ompi/mpi/c/iallreduce.c +++ b/ompi/mpi/c/iallreduce.c @@ -6,16 +6,19 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2013 Los Alamos National Security, LLC. All rights * reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. + * Copyright (c) 2016 IBM Corporation. All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -30,12 +33,11 @@ #include "ompi/op/op.h" #include "ompi/memchecker.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_Iallreduce = PMPI_Iallreduce #endif - -#if OMPI_PROFILING_DEFINES -#include "ompi/mpi/c/profile/defines.h" +#define MPI_Iallreduce PMPI_Iallreduce #endif static const char FUNC_NAME[] = "MPI_Iallreduce"; @@ -92,13 +94,19 @@ int MPI_Iallreduce(const void *sendbuf, void *recvbuf, int count, OMPI_ERRHANDLER_CHECK(err, comm, err, FUNC_NAME); } - OPAL_CR_ENTER_LIBRARY(); + /* MPI standard says that reductions have to have a count of at least 1, + * but some benchmarks (e.g., IMB) calls this function with a count of 0. + * So handle that case. + */ + if (0 == count) { + *request = &ompi_request_empty; + return MPI_SUCCESS; + } /* Invoke the coll component to perform the back-end operation */ OBJ_RETAIN(op); - /* XXX -- CONST -- do not cast away const -- update mca/coll */ - err = comm->c_coll.coll_iallreduce((void *) sendbuf, recvbuf, count, datatype, + err = comm->c_coll.coll_iallreduce(sendbuf, recvbuf, count, datatype, op, comm, request, comm->c_coll.coll_iallreduce_module); OBJ_RELEASE(op); OMPI_ERRHANDLER_RETURN(err, comm, err, FUNC_NAME); diff --git a/ompi/mpi/c/ialltoall.c b/ompi/mpi/c/ialltoall.c index 9fe00f97be9..71613ddfe35 100644 --- a/ompi/mpi/c/ialltoall.c +++ b/ompi/mpi/c/ialltoall.c @@ -14,7 +14,7 @@ * Copyright (c) 2012 Oak Ridge National Laboratory. All rights reserved. * Copyright (c) 2013 Los Alamos National Security, LLC. All rights * reserved. - * Copyright (c) 2014 Research Organization for Information Science + * Copyright (c) 2014-2016 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * @@ -33,12 +33,11 @@ #include "ompi/datatype/ompi_datatype.h" #include "ompi/memchecker.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_Ialltoall = PMPI_Ialltoall #endif - -#if OMPI_PROFILING_DEFINES -#include "ompi/mpi/c/profile/defines.h" +#define MPI_Ialltoall PMPI_Ialltoall #endif static const char FUNC_NAME[] = "MPI_Ialltoall"; @@ -71,12 +70,15 @@ int MPI_Ialltoall(const void *sendbuf, int sendcount, MPI_Datatype sendtype, if (ompi_comm_invalid(comm)) { return OMPI_ERRHANDLER_INVOKE(MPI_COMM_WORLD, MPI_ERR_COMM, FUNC_NAME); - } else if (MPI_IN_PLACE == sendbuf || MPI_IN_PLACE == recvbuf) { + } else if ((MPI_IN_PLACE == sendbuf && OMPI_COMM_IS_INTER(comm)) || + MPI_IN_PLACE == recvbuf) { return OMPI_ERRHANDLER_INVOKE(MPI_COMM_WORLD, MPI_ERR_ARG, FUNC_NAME); } else { - OMPI_CHECK_DATATYPE_FOR_SEND(err, sendtype, sendcount); - OMPI_ERRHANDLER_CHECK(err, comm, err, FUNC_NAME); + if (MPI_IN_PLACE != sendbuf) { + OMPI_CHECK_DATATYPE_FOR_SEND(err, sendtype, sendcount); + OMPI_ERRHANDLER_CHECK(err, comm, err, FUNC_NAME); + } OMPI_CHECK_DATATYPE_FOR_RECV(err, recvtype, recvcount); OMPI_ERRHANDLER_CHECK(err, comm, err, FUNC_NAME); } @@ -90,11 +92,8 @@ int MPI_Ialltoall(const void *sendbuf, int sendcount, MPI_Datatype sendtype, } } - OPAL_CR_ENTER_LIBRARY(); - /* Invoke the coll component to perform the back-end operation */ - /* XXX -- CONST -- do not cast away const -- update mca/coll */ - err = comm->c_coll.coll_ialltoall((void *) sendbuf, sendcount, sendtype, + err = comm->c_coll.coll_ialltoall(sendbuf, sendcount, sendtype, recvbuf, recvcount, recvtype, comm, request, comm->c_coll.coll_ialltoall_module); OMPI_ERRHANDLER_RETURN(err, comm, err, FUNC_NAME); diff --git a/ompi/mpi/c/ialltoallv.c b/ompi/mpi/c/ialltoallv.c index 19cb022c1d6..3614d6cded3 100644 --- a/ompi/mpi/c/ialltoallv.c +++ b/ompi/mpi/c/ialltoallv.c @@ -13,7 +13,7 @@ * Copyright (c) 2007 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2012-2013 Los Alamos National Security, LLC. All rights * reserved. - * Copyright (c) 2014 Research Organization for Information Science + * Copyright (c) 2014-2016 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * @@ -32,12 +32,11 @@ #include "ompi/datatype/ompi_datatype.h" #include "ompi/memchecker.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_Ialltoallv = PMPI_Ialltoallv #endif - -#if OMPI_PROFILING_DEFINES -#include "ompi/mpi/c/profile/defines.h" +#define MPI_Ialltoallv PMPI_Ialltoallv #endif static const char FUNC_NAME[] = "MPI_Ialltoallv"; @@ -49,7 +48,6 @@ int MPI_Ialltoallv(const void *sendbuf, const int sendcounts[], const int sdispl MPI_Request *request) { int i, size, err; - size_t sendtype_size, recvtype_size; MEMCHECKER( ptrdiff_t recv_ext; @@ -91,9 +89,16 @@ int MPI_Ialltoallv(const void *sendbuf, const int sendcounts[], const int sdispl FUNC_NAME); } + if (MPI_IN_PLACE == sendbuf) { + sendcounts = recvcounts; + sdispls = rdispls; + sendtype = recvtype; + } + if ((NULL == sendcounts) || (NULL == sdispls) || (NULL == recvcounts) || (NULL == rdispls) || - MPI_IN_PLACE == sendbuf || MPI_IN_PLACE == recvbuf) { + (MPI_IN_PLACE == sendbuf && OMPI_COMM_IS_INTER(comm)) || + MPI_IN_PLACE == recvbuf) { return OMPI_ERRHANDLER_INVOKE(comm, MPI_ERR_ARG, FUNC_NAME); } @@ -107,6 +112,7 @@ int MPI_Ialltoallv(const void *sendbuf, const int sendcounts[], const int sdispl if (MPI_IN_PLACE != sendbuf && !OMPI_COMM_IS_INTER(comm)) { int me = ompi_comm_rank(comm); + size_t sendtype_size, recvtype_size; ompi_datatype_type_size(sendtype, &sendtype_size); ompi_datatype_type_size(recvtype, &recvtype_size); if ((sendtype_size*sendcounts[me]) != (recvtype_size*recvcounts[me])) { @@ -115,12 +121,9 @@ int MPI_Ialltoallv(const void *sendbuf, const int sendcounts[], const int sdispl } } - OPAL_CR_ENTER_LIBRARY(); - /* Invoke the coll component to perform the back-end operation */ - /* XXX -- CONST -- do not cast away const -- update mca/coll */ - err = comm->c_coll.coll_ialltoallv((void *) sendbuf, (int *) sendcounts, (int *) sdispls, - sendtype, recvbuf, (int *) recvcounts, (int *) rdispls, + err = comm->c_coll.coll_ialltoallv(sendbuf, sendcounts, sdispls, + sendtype, recvbuf, recvcounts, rdispls, recvtype, comm, request, comm->c_coll.coll_ialltoallv_module); OMPI_ERRHANDLER_RETURN(err, comm, err, FUNC_NAME); } diff --git a/ompi/mpi/c/ialltoallw.c b/ompi/mpi/c/ialltoallw.c index ececf1271fe..18105650ec6 100644 --- a/ompi/mpi/c/ialltoallw.c +++ b/ompi/mpi/c/ialltoallw.c @@ -13,7 +13,7 @@ * Copyright (c) 2007 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2012-2013 Los Alamos National Security, LLC. All rights * reserved. - * Copyright (c) 2014 Research Organization for Information Science + * Copyright (c) 2014-2016 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * @@ -32,12 +32,11 @@ #include "ompi/datatype/ompi_datatype.h" #include "ompi/memchecker.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_Ialltoallw = PMPI_Ialltoallw #endif - -#if OMPI_PROFILING_DEFINES -#include "ompi/mpi/c/profile/defines.h" +#define MPI_Ialltoallw PMPI_Ialltoallw #endif static const char FUNC_NAME[] = "MPI_Ialltoallw"; @@ -49,7 +48,6 @@ int MPI_Ialltoallw(const void *sendbuf, const int sendcounts[], const int sdispl MPI_Request *request) { int i, size, err; - size_t sendtype_size, recvtype_size; MEMCHECKER( ptrdiff_t recv_ext; @@ -71,7 +69,7 @@ int MPI_Ialltoallw(const void *sendbuf, const int sendcounts[], const int sdispl memchecker_datatype(recvtypes[i]); ompi_datatype_type_extent(recvtypes[i], &recv_ext); memchecker_call(&opal_memchecker_base_isaddressable, - (char *)(recvbuf)+sdispls[i]*recv_ext, + (char *)(recvbuf)+rdispls[i]*recv_ext, recvcounts[i], recvtypes[i]); } ); @@ -87,9 +85,16 @@ int MPI_Ialltoallw(const void *sendbuf, const int sendcounts[], const int sdispl FUNC_NAME); } + if (MPI_IN_PLACE == sendbuf) { + sendcounts = recvcounts; + sdispls = rdispls; + sendtypes = recvtypes; + } + if ((NULL == sendcounts) || (NULL == sdispls) || (NULL == sendtypes) || (NULL == recvcounts) || (NULL == rdispls) || (NULL == recvtypes) || - MPI_IN_PLACE == sendbuf || MPI_IN_PLACE == recvbuf) { + (MPI_IN_PLACE == sendbuf && OMPI_COMM_IS_INTER(comm)) || + MPI_IN_PLACE == recvbuf) { return OMPI_ERRHANDLER_INVOKE(comm, MPI_ERR_ARG, FUNC_NAME); } @@ -103,6 +108,7 @@ int MPI_Ialltoallw(const void *sendbuf, const int sendcounts[], const int sdispl if (MPI_IN_PLACE != sendbuf && !OMPI_COMM_IS_INTER(comm)) { int me = ompi_comm_rank(comm); + size_t sendtype_size, recvtype_size; ompi_datatype_type_size(sendtypes[me], &sendtype_size); ompi_datatype_type_size(recvtypes[me], &recvtype_size); if ((sendtype_size*sendcounts[me]) != (recvtype_size*recvcounts[me])) { @@ -111,13 +117,10 @@ int MPI_Ialltoallw(const void *sendbuf, const int sendcounts[], const int sdispl } } - OPAL_CR_ENTER_LIBRARY(); - /* Invoke the coll component to perform the back-end operation */ - /* XXX -- CONST -- do not cast away const -- update mca/coll */ - err = comm->c_coll.coll_ialltoallw((void *) sendbuf, (int *) sendcounts, (int *) sdispls, - (ompi_datatype_t **) sendtypes, recvbuf, (int *) recvcounts, - (int *) rdispls, (ompi_datatype_t **) recvtypes, comm, request, + err = comm->c_coll.coll_ialltoallw(sendbuf, sendcounts, sdispls, + sendtypes, recvbuf, recvcounts, + rdispls, recvtypes, comm, request, comm->c_coll.coll_ialltoallw_module); OMPI_ERRHANDLER_RETURN(err, comm, err, FUNC_NAME); } diff --git a/ompi/mpi/c/ibarrier.c b/ompi/mpi/c/ibarrier.c index ff30cdd2b03..a218c70dd32 100644 --- a/ompi/mpi/c/ibarrier.c +++ b/ompi/mpi/c/ibarrier.c @@ -5,15 +5,17 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2012 Oak Rigde National Laboratory. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ #include "ompi_config.h" @@ -26,18 +28,17 @@ #include "ompi/datatype/ompi_datatype.h" #include "ompi/memchecker.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_Ibarrier = PMPI_Ibarrier #endif - -#if OMPI_PROFILING_DEFINES -#include "ompi/mpi/c/profile/defines.h" +#define MPI_Ibarrier PMPI_Ibarrier #endif static const char FUNC_NAME[] = "MPI_Ibarrier"; -int MPI_Ibarrier(MPI_Comm comm, MPI_Request *request) +int MPI_Ibarrier(MPI_Comm comm, MPI_Request *request) { int err = MPI_SUCCESS; @@ -54,8 +55,6 @@ int MPI_Ibarrier(MPI_Comm comm, MPI_Request *request) } } - OPAL_CR_ENTER_LIBRARY(); - err = comm->c_coll.coll_ibarrier(comm, request, comm->c_coll.coll_ibarrier_module); /* All done */ diff --git a/ompi/mpi/c/ibcast.c b/ompi/mpi/c/ibcast.c index 4308d465063..3ba3ae1a041 100644 --- a/ompi/mpi/c/ibcast.c +++ b/ompi/mpi/c/ibcast.c @@ -1,9 +1,11 @@ /* * Copyright (c) 2012 Oak Rigde National Laboratory. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ #include "ompi_config.h" @@ -16,12 +18,11 @@ #include "ompi/datatype/ompi_datatype.h" #include "ompi/memchecker.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_Ibcast = PMPI_Ibcast #endif - -#if OMPI_PROFILING_DEFINES -#include "ompi/mpi/c/profile/defines.h" +#define MPI_Ibcast PMPI_Ibcast #endif static const char FUNC_NAME[] = "MPI_Ibcast"; @@ -34,7 +35,7 @@ int MPI_Ibcast(void *buffer, int count, MPI_Datatype datatype, MEMCHECKER( memchecker_datatype(datatype); - memchecker_call(&opal_memchecker_base_isdefined, buffer, count, datatype); + memchecker_call(&opal_memchecker_base_isdefined, buffer, count, datatype); memchecker_comm(comm); ); @@ -42,7 +43,7 @@ int MPI_Ibcast(void *buffer, int count, MPI_Datatype datatype, err = MPI_SUCCESS; OMPI_ERR_INIT_FINALIZE(FUNC_NAME); if (ompi_comm_invalid(comm)) { - return OMPI_ERRHANDLER_INVOKE(MPI_COMM_WORLD, MPI_ERR_COMM, + return OMPI_ERRHANDLER_INVOKE(MPI_COMM_WORLD, MPI_ERR_COMM, FUNC_NAME); } @@ -60,7 +61,7 @@ int MPI_Ibcast(void *buffer, int count, MPI_Datatype datatype, if ((root >= ompi_comm_size(comm)) || (root < 0)) { return OMPI_ERRHANDLER_INVOKE(comm, MPI_ERR_ROOT, FUNC_NAME); } - } + } /* Errors for intercommunicators */ @@ -69,11 +70,9 @@ int MPI_Ibcast(void *buffer, int count, MPI_Datatype datatype, MPI_ROOT == root || MPI_PROC_NULL == root)) { return OMPI_ERRHANDLER_INVOKE(comm, MPI_ERR_ROOT, FUNC_NAME); } - } + } } - OPAL_CR_ENTER_LIBRARY(); - /* Invoke the coll component to perform the back-end operation */ err = comm->c_coll.coll_ibcast(buffer, count, datatype, root, comm, diff --git a/ompi/mpi/c/ibsend.c b/ompi/mpi/c/ibsend.c index f24dc8256c5..2852ae36a9b 100644 --- a/ompi/mpi/c/ibsend.c +++ b/ompi/mpi/c/ibsend.c @@ -13,6 +13,8 @@ * Copyright (c) 2006 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2013 Los Alamos National Security, LLC. All rights * reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -30,12 +32,11 @@ #include "ompi/mca/pml/base/pml_base_bsend.h" #include "ompi/memchecker.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_Ibsend = PMPI_Ibsend #endif - -#if OMPI_PROFILING_DEFINES -#include "ompi/mpi/c/profile/defines.h" +#define MPI_Ibsend PMPI_Ibsend #endif static const char FUNC_NAME[] = "MPI_Ibsend"; @@ -77,12 +78,10 @@ int MPI_Ibsend(const void *buf, int count, MPI_Datatype type, int dest, return MPI_SUCCESS; } - OPAL_CR_ENTER_LIBRARY(); MEMCHECKER ( memchecker_call(&opal_memchecker_base_mem_noaccess, buf, count, type); ); - /* XXX -- CONST -- do not cast away const -- update mca/pml */ - rc = MCA_PML_CALL(isend((void *) buf, count, type, dest, tag, MCA_PML_BASE_SEND_BUFFERED, comm, request)); + rc = MCA_PML_CALL(isend(buf, count, type, dest, tag, MCA_PML_BASE_SEND_BUFFERED, comm, request)); OMPI_ERRHANDLER_RETURN(rc, comm, rc, FUNC_NAME); } diff --git a/ompi/mpi/c/iexscan.c b/ompi/mpi/c/iexscan.c index be04b002b94..91b626fbda5 100644 --- a/ompi/mpi/c/iexscan.c +++ b/ompi/mpi/c/iexscan.c @@ -12,6 +12,8 @@ * All rights reserved. * Copyright (c) 2013 Los Alamos National Security, LLC. All rights * reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -29,12 +31,11 @@ #include "ompi/op/op.h" #include "ompi/memchecker.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_Iexscan = PMPI_Iexscan #endif - -#if OMPI_PROFILING_DEFINES -#include "ompi/mpi/c/profile/defines.h" +#define MPI_Iexscan PMPI_Iexscan #endif static const char FUNC_NAME[] = "MPI_Iexscan"; @@ -74,13 +75,11 @@ int MPI_Iexscan(const void *sendbuf, void *recvbuf, int count, OMPI_ERRHANDLER_CHECK(err, comm, err, FUNC_NAME); } - OPAL_CR_ENTER_LIBRARY(); /* Invoke the coll component to perform the back-end operation */ OBJ_RETAIN(op); - /* XXX -- CONST -- do not cast away const -- update mca/coll */ - err = comm->c_coll.coll_iexscan((void *) sendbuf, recvbuf, count, + err = comm->c_coll.coll_iexscan(sendbuf, recvbuf, count, datatype, op, comm, request, comm->c_coll.coll_iexscan_module); OBJ_RELEASE(op); diff --git a/ompi/mpi/c/igather.c b/ompi/mpi/c/igather.c index d2cba33e9f2..5f2a63c6173 100644 --- a/ompi/mpi/c/igather.c +++ b/ompi/mpi/c/igather.c @@ -15,6 +15,8 @@ * Copyright (c) 2008 Sun Microsystems, Inc. All rights reserved. * Copyright (c) 2013 Los Alamos National Security, LLC. All rights * reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -31,12 +33,11 @@ #include "ompi/datatype/ompi_datatype.h" #include "ompi/memchecker.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_Igather = PMPI_Igather #endif - -#if OMPI_PROFILING_DEFINES -#include "ompi/mpi/c/profile/defines.h" +#define MPI_Igather PMPI_Igather #endif static const char FUNC_NAME[] = "MPI_Igather"; @@ -163,11 +164,8 @@ int MPI_Igather(const void *sendbuf, int sendcount, MPI_Datatype sendtype, } } - OPAL_CR_ENTER_LIBRARY(); - /* Invoke the coll component to perform the back-end operation */ - /* XXX -- CONST -- do not cast away const -- update mca/coll */ - err = comm->c_coll.coll_igather((void *) sendbuf, sendcount, sendtype, recvbuf, + err = comm->c_coll.coll_igather(sendbuf, sendcount, sendtype, recvbuf, recvcount, recvtype, root, comm, request, comm->c_coll.coll_igather_module); OMPI_ERRHANDLER_RETURN(err, comm, err, FUNC_NAME); diff --git a/ompi/mpi/c/igatherv.c b/ompi/mpi/c/igatherv.c index 7607e9aff47..b2238cffcc0 100644 --- a/ompi/mpi/c/igatherv.c +++ b/ompi/mpi/c/igatherv.c @@ -13,6 +13,8 @@ * Copyright (c) 2006-2012 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2012-2013 Los Alamos National Security, LLC. All rights * reserved. + * Copyright (c) 2015-2016 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -29,12 +31,11 @@ #include "ompi/datatype/ompi_datatype.h" #include "ompi/memchecker.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_Igatherv = PMPI_Igatherv #endif - -#if OMPI_PROFILING_DEFINES -#include "ompi/mpi/c/profile/defines.h" +#define MPI_Igatherv PMPI_Igatherv #endif static const char FUNC_NAME[] = "MPI_Igatherv"; @@ -47,11 +48,9 @@ int MPI_Igatherv(const void *sendbuf, int sendcount, MPI_Datatype sendtype, int i, size, err; MEMCHECKER( - int rank; ptrdiff_t ext; size = ompi_comm_remote_size(comm); - rank = ompi_comm_rank(comm); ompi_datatype_type_extent(recvtype, &ext); memchecker_comm(comm); @@ -188,12 +187,9 @@ int MPI_Igatherv(const void *sendbuf, int sendcount, MPI_Datatype sendtype, } } - OPAL_CR_ENTER_LIBRARY(); - /* Invoke the coll component to perform the back-end operation */ - /* XXX -- CONST -- do not cast away const -- update mca/coll */ - err = comm->c_coll.coll_igatherv((void *) sendbuf, sendcount, sendtype, recvbuf, - (int *) recvcounts, (int *) displs, recvtype, + err = comm->c_coll.coll_igatherv(sendbuf, sendcount, sendtype, recvbuf, + recvcounts, displs, recvtype, root, comm, request, comm->c_coll.coll_igatherv_module); OMPI_ERRHANDLER_RETURN(err, comm, err, FUNC_NAME); } diff --git a/ompi/mpi/c/improbe.c b/ompi/mpi/c/improbe.c index 269c965111e..d396e303128 100644 --- a/ompi/mpi/c/improbe.c +++ b/ompi/mpi/c/improbe.c @@ -2,10 +2,12 @@ * Copyright (c) 2011 Sandia National Laboratories. All rights reserved. * Copyright (c) 2012 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2012 Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -18,18 +20,17 @@ #include "ompi/request/request.h" #include "ompi/message/message.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_Improbe = PMPI_Improbe #endif - -#if OMPI_PROFILING_DEFINES -#include "ompi/mpi/c/profile/defines.h" +#define MPI_Improbe PMPI_Improbe #endif static const char FUNC_NAME[] = "MPI_Improbe"; -int MPI_Improbe(int source, int tag, MPI_Comm comm, int *flag, +int MPI_Improbe(int source, int tag, MPI_Comm comm, int *flag, MPI_Message *message, MPI_Status *status) { int rc; @@ -45,7 +46,7 @@ int MPI_Improbe(int source, int tag, MPI_Comm comm, int *flag, rc = MPI_ERR_TAG; } else if (ompi_comm_invalid(comm)) { rc = MPI_ERR_COMM; - } else if ((source != MPI_ANY_SOURCE) && + } else if ((source != MPI_ANY_SOURCE) && (MPI_PROC_NULL != source) && ompi_comm_peer_invalid(comm, source)) { rc = MPI_ERR_RANK; @@ -69,8 +70,6 @@ int MPI_Improbe(int source, int tag, MPI_Comm comm, int *flag, return MPI_SUCCESS; } - OPAL_CR_ENTER_LIBRARY(); - rc = MCA_PML_CALL(improbe(source, tag, comm, flag, message, status)); /* Per MPI-1, the MPI_ERROR field is not defined for single-completion calls */ diff --git a/ompi/mpi/c/imrecv.c b/ompi/mpi/c/imrecv.c index a23e4315d94..45250ce3ae7 100644 --- a/ompi/mpi/c/imrecv.c +++ b/ompi/mpi/c/imrecv.c @@ -1,10 +1,12 @@ /* * Copyright (c) 2011 Sandia National Laboratories. All rights reserved. * Copyright (c) 2012 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -17,18 +19,17 @@ #include "ompi/request/request.h" #include "ompi/message/message.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_Imrecv = PMPI_Imrecv #endif - -#if OMPI_PROFILING_DEFINES -#include "ompi/mpi/c/profile/defines.h" +#define MPI_Imrecv PMPI_Imrecv #endif static const char FUNC_NAME[] = "MPI_Imrecv"; -int MPI_Imrecv(void *buf, int count, MPI_Datatype type, +int MPI_Imrecv(void *buf, int count, MPI_Datatype type, MPI_Message *message, MPI_Request *request) { int rc = MPI_SUCCESS; @@ -45,7 +46,7 @@ int MPI_Imrecv(void *buf, int count, MPI_Datatype type, OMPI_ERR_INIT_FINALIZE(FUNC_NAME); OMPI_CHECK_DATATYPE_FOR_RECV(rc, type, count); OMPI_CHECK_USER_BUFFER(rc, buf, type, count); - + if (NULL == message || MPI_MESSAGE_NULL == *message) { rc = MPI_ERR_REQUEST; comm = MPI_COMM_NULL; @@ -64,8 +65,6 @@ int MPI_Imrecv(void *buf, int count, MPI_Datatype type, return MPI_SUCCESS; } - OPAL_CR_ENTER_LIBRARY(); - rc = MCA_PML_CALL(imrecv(buf, count, type, message, request)); - OMPI_ERRHANDLER_RETURN(rc, (*message)->comm, rc, FUNC_NAME); + OMPI_ERRHANDLER_RETURN(rc, comm, rc, FUNC_NAME); } diff --git a/ompi/mpi/c/ineighbor_allgather.c b/ompi/mpi/c/ineighbor_allgather.c index 3391a75fe18..4b5e75c5e53 100644 --- a/ompi/mpi/c/ineighbor_allgather.c +++ b/ompi/mpi/c/ineighbor_allgather.c @@ -14,6 +14,9 @@ * Copyright (c) 2012 Oak Rigde National Laboratory. All rights reserved. * Copyright (c) 2013 Los Alamos National Security, LLC. All rights * reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. + * Copyright (c) 2017 IBM Corporation. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -30,13 +33,14 @@ #include "ompi/errhandler/errhandler.h" #include "ompi/datatype/ompi_datatype.h" #include "ompi/memchecker.h" +#include "ompi/mca/topo/topo.h" +#include "ompi/mca/topo/base/base.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_Ineighbor_allgather = PMPI_Ineighbor_allgather #endif - -#if OMPI_PROFILING_DEFINES -#include "ompi/mpi/c/profile/defines.h" +#define MPI_Ineighbor_allgather PMPI_Ineighbor_allgather #endif static const char FUNC_NAME[] = "MPI_Ineighbor_allgather"; @@ -90,13 +94,32 @@ int MPI_Ineighbor_allgather(const void *sendbuf, int sendcount, MPI_Datatype sen OMPI_CHECK_DATATYPE_FOR_SEND(err, sendtype, sendcount); } OMPI_ERRHANDLER_CHECK(err, comm, err, FUNC_NAME); - } - OPAL_CR_ENTER_LIBRARY(); + if( OMPI_COMM_IS_CART(comm) ) { + const mca_topo_base_comm_cart_2_2_0_t *cart = comm->c_topo->mtc.cart; + if( 0 > cart->ndims ) { + return OMPI_ERRHANDLER_INVOKE(comm, MPI_ERR_ARG, FUNC_NAME); + } + } + else if( OMPI_COMM_IS_GRAPH(comm) ) { + int degree; + mca_topo_base_graph_neighbors_count(comm, ompi_comm_rank(comm), °ree); + if( 0 > degree ) { + return OMPI_ERRHANDLER_INVOKE(comm, MPI_ERR_ARG, FUNC_NAME); + } + } + else if( OMPI_COMM_IS_DIST_GRAPH(comm) ) { + const mca_topo_base_comm_dist_graph_2_2_0_t *dist_graph = comm->c_topo->mtc.dist_graph; + int indegree = dist_graph->indegree; + int outdegree = dist_graph->outdegree; + if( indegree < 0 || outdegree < 0 ) { + return OMPI_ERRHANDLER_INVOKE(comm, MPI_ERR_ARG, FUNC_NAME); + } + } + } /* Invoke the coll component to perform the back-end operation */ - /* XXX -- CONST -- do not cast away const -- update mca/coll */ - err = comm->c_coll.coll_ineighbor_allgather((void *) sendbuf, sendcount, sendtype, recvbuf, + err = comm->c_coll.coll_ineighbor_allgather(sendbuf, sendcount, sendtype, recvbuf, recvcount, recvtype, comm, request, comm->c_coll.coll_ineighbor_allgather_module); diff --git a/ompi/mpi/c/ineighbor_allgatherv.c b/ompi/mpi/c/ineighbor_allgatherv.c index 813756eef48..6b4d9a17a7d 100644 --- a/ompi/mpi/c/ineighbor_allgatherv.c +++ b/ompi/mpi/c/ineighbor_allgatherv.c @@ -14,6 +14,9 @@ * Copyright (c) 2012 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2012-2013 Los Alamos National Security, LLC. All rights * reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. + * Copyright (c) 2017 IBM Corporation. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -30,13 +33,14 @@ #include "ompi/errhandler/errhandler.h" #include "ompi/datatype/ompi_datatype.h" #include "ompi/memchecker.h" +#include "ompi/mca/topo/topo.h" +#include "ompi/mca/topo/base/base.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_Ineighbor_allgatherv = PMPI_Ineighbor_allgatherv #endif - -#if OMPI_PROFILING_DEFINES -#include "ompi/mpi/c/profile/defines.h" +#define MPI_Ineighbor_allgatherv PMPI_Ineighbor_allgatherv #endif static const char FUNC_NAME[] = "MPI_Ineighbor_allgatherv"; @@ -113,13 +117,32 @@ int MPI_Ineighbor_allgatherv(const void *sendbuf, int sendcount, MPI_Datatype se if (NULL == displs) { return OMPI_ERRHANDLER_INVOKE(comm, MPI_ERR_BUFFER, FUNC_NAME); } - } - OPAL_CR_ENTER_LIBRARY(); + if( OMPI_COMM_IS_CART(comm) ) { + const mca_topo_base_comm_cart_2_2_0_t *cart = comm->c_topo->mtc.cart; + if( 0 > cart->ndims ) { + return OMPI_ERRHANDLER_INVOKE(comm, MPI_ERR_ARG, FUNC_NAME); + } + } + else if( OMPI_COMM_IS_GRAPH(comm) ) { + int degree; + mca_topo_base_graph_neighbors_count(comm, ompi_comm_rank(comm), °ree); + if( 0 > degree ) { + return OMPI_ERRHANDLER_INVOKE(comm, MPI_ERR_ARG, FUNC_NAME); + } + } + else if( OMPI_COMM_IS_DIST_GRAPH(comm) ) { + const mca_topo_base_comm_dist_graph_2_2_0_t *dist_graph = comm->c_topo->mtc.dist_graph; + int indegree = dist_graph->indegree; + int outdegree = dist_graph->outdegree; + if( indegree < 0 || outdegree < 0 ) { + return OMPI_ERRHANDLER_INVOKE(comm, MPI_ERR_ARG, FUNC_NAME); + } + } + } /* Invoke the coll component to perform the back-end operation */ - /* XXX -- CONST -- do not cast away const -- update mca/coll */ - err = comm->c_coll.coll_ineighbor_allgatherv((void *) sendbuf, sendcount, sendtype, + err = comm->c_coll.coll_ineighbor_allgatherv(sendbuf, sendcount, sendtype, recvbuf, (int *) recvcounts, (int *) displs, recvtype, comm, request, comm->c_coll.coll_ineighbor_allgatherv_module); diff --git a/ompi/mpi/c/ineighbor_alltoall.c b/ompi/mpi/c/ineighbor_alltoall.c index 285965f8a55..6dec2d01879 100644 --- a/ompi/mpi/c/ineighbor_alltoall.c +++ b/ompi/mpi/c/ineighbor_alltoall.c @@ -14,8 +14,9 @@ * Copyright (c) 2012 Oak Ridge National Laboratory. All rights reserved. * Copyright (c) 2013 Los Alamos National Security, LLC. All rights * reserved. - * Copyright (c) 2014 Research Organization for Information Science + * Copyright (c) 2014-2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. + * Copyright (c) 2017 IBM Corporation. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -32,13 +33,14 @@ #include "ompi/errhandler/errhandler.h" #include "ompi/datatype/ompi_datatype.h" #include "ompi/memchecker.h" +#include "ompi/mca/topo/topo.h" +#include "ompi/mca/topo/base/base.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_Ineighbor_alltoall = PMPI_Ineighbor_alltoall #endif - -#if OMPI_PROFILING_DEFINES -#include "ompi/mpi/c/profile/defines.h" +#define MPI_Ineighbor_alltoall PMPI_Ineighbor_alltoall #endif static const char FUNC_NAME[] = "MPI_Ineighbor_alltoall"; @@ -89,13 +91,32 @@ int MPI_Ineighbor_alltoall(const void *sendbuf, int sendcount, MPI_Datatype send return OMPI_ERRHANDLER_INVOKE(comm, MPI_ERR_TRUNCATE, FUNC_NAME); } } - } - OPAL_CR_ENTER_LIBRARY(); + if( OMPI_COMM_IS_CART(comm) ) { + const mca_topo_base_comm_cart_2_2_0_t *cart = comm->c_topo->mtc.cart; + if( 0 > cart->ndims ) { + return OMPI_ERRHANDLER_INVOKE(comm, MPI_ERR_ARG, FUNC_NAME); + } + } + else if( OMPI_COMM_IS_GRAPH(comm) ) { + int degree; + mca_topo_base_graph_neighbors_count(comm, ompi_comm_rank(comm), °ree); + if( 0 > degree ) { + return OMPI_ERRHANDLER_INVOKE(comm, MPI_ERR_ARG, FUNC_NAME); + } + } + else if( OMPI_COMM_IS_DIST_GRAPH(comm) ) { + const mca_topo_base_comm_dist_graph_2_2_0_t *dist_graph = comm->c_topo->mtc.dist_graph; + int indegree = dist_graph->indegree; + int outdegree = dist_graph->outdegree; + if( indegree < 0 || outdegree < 0 ) { + return OMPI_ERRHANDLER_INVOKE(comm, MPI_ERR_ARG, FUNC_NAME); + } + } + } /* Invoke the coll component to perform the back-end operation */ - /* XXX -- CONST -- do not cast away const -- update mca/coll */ - err = comm->c_coll.coll_ineighbor_alltoall((void *) sendbuf, sendcount, sendtype, + err = comm->c_coll.coll_ineighbor_alltoall(sendbuf, sendcount, sendtype, recvbuf, recvcount, recvtype, comm, request, comm->c_coll.coll_ineighbor_alltoall_module); OMPI_ERRHANDLER_RETURN(err, comm, err, FUNC_NAME); diff --git a/ompi/mpi/c/ineighbor_alltoallv.c b/ompi/mpi/c/ineighbor_alltoallv.c index f2d3a6d1ee9..2aff00c577a 100644 --- a/ompi/mpi/c/ineighbor_alltoallv.c +++ b/ompi/mpi/c/ineighbor_alltoallv.c @@ -13,8 +13,9 @@ * Copyright (c) 2007 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2012-2013 Los Alamos National Security, LLC. All rights * reserved. - * Copyright (c) 2014 Research Organization for Information Science + * Copyright (c) 2014-2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. + * Copyright (c) 2017 IBM Corporation. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -32,13 +33,14 @@ #include "ompi/errhandler/errhandler.h" #include "ompi/datatype/ompi_datatype.h" #include "ompi/memchecker.h" +#include "ompi/mca/topo/topo.h" +#include "ompi/mca/topo/base/base.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_Ineighbor_alltoallv = PMPI_Ineighbor_alltoallv #endif - -#if OMPI_PROFILING_DEFINES -#include "ompi/mpi/c/profile/defines.h" +#define MPI_Ineighbor_alltoallv PMPI_Ineighbor_alltoallv #endif static const char FUNC_NAME[] = "MPI_Ineighbor_alltoallv"; @@ -113,14 +115,33 @@ int MPI_Ineighbor_alltoallv(const void *sendbuf, const int sendcounts[], const i OMPI_CHECK_DATATYPE_FOR_RECV(err, recvtype, recvcounts[i]); OMPI_ERRHANDLER_CHECK(err, comm, err, FUNC_NAME); } - } - OPAL_CR_ENTER_LIBRARY(); + if( OMPI_COMM_IS_CART(comm) ) { + const mca_topo_base_comm_cart_2_2_0_t *cart = comm->c_topo->mtc.cart; + if( 0 > cart->ndims ) { + return OMPI_ERRHANDLER_INVOKE(comm, MPI_ERR_ARG, FUNC_NAME); + } + } + else if( OMPI_COMM_IS_GRAPH(comm) ) { + int degree; + mca_topo_base_graph_neighbors_count(comm, ompi_comm_rank(comm), °ree); + if( 0 > degree ) { + return OMPI_ERRHANDLER_INVOKE(comm, MPI_ERR_ARG, FUNC_NAME); + } + } + else if( OMPI_COMM_IS_DIST_GRAPH(comm) ) { + const mca_topo_base_comm_dist_graph_2_2_0_t *dist_graph = comm->c_topo->mtc.dist_graph; + indegree = dist_graph->indegree; + outdegree = dist_graph->outdegree; + if( indegree < 0 || outdegree < 0 ) { + return OMPI_ERRHANDLER_INVOKE(comm, MPI_ERR_ARG, FUNC_NAME); + } + } + } /* Invoke the coll component to perform the back-end operation */ - /* XXX -- CONST -- do not cast away const -- update mca/coll */ - err = comm->c_coll.coll_ineighbor_alltoallv((void *) sendbuf, (int *) sendcounts, (int *) sdispls, - sendtype, recvbuf, (int *) recvcounts, (int *) rdispls, + err = comm->c_coll.coll_ineighbor_alltoallv(sendbuf, sendcounts, sdispls, + sendtype, recvbuf, recvcounts, rdispls, recvtype, comm, request, comm->c_coll.coll_ineighbor_alltoallv_module); OMPI_ERRHANDLER_RETURN(err, comm, err, FUNC_NAME); } diff --git a/ompi/mpi/c/ineighbor_alltoallw.c b/ompi/mpi/c/ineighbor_alltoallw.c index 8f82131151f..d8299158028 100644 --- a/ompi/mpi/c/ineighbor_alltoallw.c +++ b/ompi/mpi/c/ineighbor_alltoallw.c @@ -13,8 +13,9 @@ * Copyright (c) 2007 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2012-2013 Los Alamos National Security, LLC. All rights * reserved. - * Copyright (c) 2014 Research Organization for Information Science + * Copyright (c) 2014-2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. + * Copyright (c) 2017 IBM Corporation. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -32,13 +33,14 @@ #include "ompi/errhandler/errhandler.h" #include "ompi/datatype/ompi_datatype.h" #include "ompi/memchecker.h" +#include "ompi/mca/topo/topo.h" +#include "ompi/mca/topo/base/base.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_Ineighbor_alltoallw = PMPI_Ineighbor_alltoallw #endif - -#if OMPI_PROFILING_DEFINES -#include "ompi/mpi/c/profile/defines.h" +#define MPI_Ineighbor_alltoallw PMPI_Ineighbor_alltoallw #endif static const char FUNC_NAME[] = "MPI_Ineighbor_alltoallw"; @@ -111,14 +113,33 @@ int MPI_Ineighbor_alltoallw(const void *sendbuf, const int sendcounts[], const M OMPI_CHECK_DATATYPE_FOR_RECV(err, recvtypes[i], recvcounts[i]); OMPI_ERRHANDLER_CHECK(err, comm, err, FUNC_NAME); } - } - OPAL_CR_ENTER_LIBRARY(); + if( OMPI_COMM_IS_CART(comm) ) { + const mca_topo_base_comm_cart_2_2_0_t *cart = comm->c_topo->mtc.cart; + if( 0 > cart->ndims ) { + return OMPI_ERRHANDLER_INVOKE(comm, MPI_ERR_ARG, FUNC_NAME); + } + } + else if( OMPI_COMM_IS_GRAPH(comm) ) { + int degree; + mca_topo_base_graph_neighbors_count(comm, ompi_comm_rank(comm), °ree); + if( 0 > degree ) { + return OMPI_ERRHANDLER_INVOKE(comm, MPI_ERR_ARG, FUNC_NAME); + } + } + else if( OMPI_COMM_IS_DIST_GRAPH(comm) ) { + const mca_topo_base_comm_dist_graph_2_2_0_t *dist_graph = comm->c_topo->mtc.dist_graph; + indegree = dist_graph->indegree; + outdegree = dist_graph->outdegree; + if( indegree < 0 || outdegree < 0 ) { + return OMPI_ERRHANDLER_INVOKE(comm, MPI_ERR_ARG, FUNC_NAME); + } + } + } /* Invoke the coll component to perform the back-end operation */ - /* XXX -- CONST -- do not cast away const -- update mca/coll */ - err = comm->c_coll.coll_ineighbor_alltoallw((void *) sendbuf, (int *) sendcounts, (MPI_Aint *) sdispls, (ompi_datatype_t **) sendtypes, - recvbuf, (int *) recvcounts, (MPI_Aint *) rdispls, (ompi_datatype_t **) recvtypes, comm, request, + err = comm->c_coll.coll_ineighbor_alltoallw(sendbuf, sendcounts, sdispls, sendtypes, + recvbuf, recvcounts, rdispls, recvtypes, comm, request, comm->c_coll.coll_ineighbor_alltoallw_module); OMPI_ERRHANDLER_RETURN(err, comm, err, FUNC_NAME); } diff --git a/ompi/mpi/c/info_c2f.c b/ompi/mpi/c/info_c2f.c index 8a00cdc1f96..6583fcb7425 100644 --- a/ompi/mpi/c/info_c2f.c +++ b/ompi/mpi/c/info_c2f.c @@ -5,15 +5,17 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2006-2012 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -25,22 +27,19 @@ #include "ompi/mpi/fortran/base/fint_2_int.h" #include "ompi/info/info.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_Info_c2f = PMPI_Info_c2f #endif - -#if OMPI_PROFILING_DEFINES -#include "ompi/mpi/c/profile/defines.h" +#define MPI_Info_c2f PMPI_Info_c2f #endif static const char FUNC_NAME[] = "MPI_Info_c2f"; -MPI_Fint MPI_Info_c2f(MPI_Info info) +MPI_Fint MPI_Info_c2f(MPI_Info info) { - OPAL_CR_NOOP_PROGRESS(); - if (MPI_PARAM_CHECK) { OMPI_ERR_INIT_FINALIZE(FUNC_NAME); diff --git a/ompi/mpi/c/info_create.c b/ompi/mpi/c/info_create.c index 933a5655324..b57e5c9c93b 100644 --- a/ompi/mpi/c/info_create.c +++ b/ompi/mpi/c/info_create.c @@ -5,14 +5,16 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -24,18 +26,17 @@ #include "ompi/errhandler/errhandler.h" #include "ompi/info/info.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_Info_create = PMPI_Info_create #endif - -#if OMPI_PROFILING_DEFINES -#include "ompi/mpi/c/profile/defines.h" +#define MPI_Info_create PMPI_Info_create #endif static const char FUNC_NAME[] = "MPI_Info_create"; /** - * Create a new info object + * Create a new info object * * @param info Pointer to the MPI_Info handle * @@ -46,11 +47,9 @@ static const char FUNC_NAME[] = "MPI_Info_create"; * When an MPI_Info object is not being used, it should be freed using * MPI_Info_free */ -int MPI_Info_create(MPI_Info *info) +int MPI_Info_create(MPI_Info *info) { - OPAL_CR_NOOP_PROGRESS(); - if (MPI_PARAM_CHECK) { OMPI_ERR_INIT_FINALIZE(FUNC_NAME); if (NULL == info) { @@ -62,7 +61,7 @@ int MPI_Info_create(MPI_Info *info) /* * Call the object create function. This function not only * allocates the space for MPI_Info, but also calls all the - * relevant init functions. Should I check if the fortran + * relevant init functions. Should I check if the fortran * handle is valid */ (*info) = OBJ_NEW(ompi_info_t); diff --git a/ompi/mpi/c/info_delete.c b/ompi/mpi/c/info_delete.c index 6f962b1b3a0..127cd61d972 100644 --- a/ompi/mpi/c/info_delete.c +++ b/ompi/mpi/c/info_delete.c @@ -6,16 +6,18 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2013 Los Alamos National Security, LLC. All rights * reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -29,12 +31,11 @@ #include #include -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_Info_delete = PMPI_Info_delete #endif - -#if OMPI_PROFILING_DEFINES -#include "ompi/mpi/c/profile/defines.h" +#define MPI_Info_delete PMPI_Info_delete #endif static const char FUNC_NAME[] = "MPI_Info_delete"; @@ -44,7 +45,7 @@ static const char FUNC_NAME[] = "MPI_Info_delete"; * Delete a (key,value) pair from "info" * * @param info MPI_Info handle on which we need to operate - * @param key The key portion of the (key,value) pair that + * @param key The key portion of the (key,value) pair that * needs to be deleted * * @retval MPI_SUCCESS If the (key,val) pair was deleted @@ -74,8 +75,6 @@ int MPI_Info_delete(MPI_Info info, const char *key) { } } - OPAL_CR_ENTER_LIBRARY(); - err = ompi_info_delete (info, key); OMPI_ERRHANDLER_RETURN(err, MPI_COMM_WORLD, err, FUNC_NAME); } diff --git a/ompi/mpi/c/info_dup.c b/ompi/mpi/c/info_dup.c index f65e79b5a4d..df3180746c0 100644 --- a/ompi/mpi/c/info_dup.c +++ b/ompi/mpi/c/info_dup.c @@ -5,14 +5,16 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -24,12 +26,11 @@ #include "ompi/errhandler/errhandler.h" #include "ompi/info/info.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_Info_dup = PMPI_Info_dup #endif - -#if OMPI_PROFILING_DEFINES -#include "ompi/mpi/c/profile/defines.h" +#define MPI_Info_dup PMPI_Info_dup #endif static const char FUNC_NAME[] = "MPI_Info_dup"; @@ -56,10 +57,10 @@ int MPI_Info_dup(MPI_Info info, MPI_Info *newinfo) { /** * Here we need to do 2 things * 1. Create a newinfo object using MPI_Info_create - * 2. Fetch all the values from info and copy them to + * 2. Fetch all the values from info and copy them to * newinfo using MPI_Info_set * The new implementation facilitates traversal in many ways. - * I have chosen to get the number of elements on the list + * I have chosen to get the number of elements on the list * and copy them to newinfo one by one */ @@ -78,8 +79,6 @@ int MPI_Info_dup(MPI_Info info, MPI_Info *newinfo) { FUNC_NAME); } - OPAL_CR_ENTER_LIBRARY(); - /* * Now to actually duplicate all the values */ diff --git a/ompi/mpi/c/info_f2c.c b/ompi/mpi/c/info_f2c.c index d337e222038..8c6383a20e6 100644 --- a/ompi/mpi/c/info_f2c.c +++ b/ompi/mpi/c/info_f2c.c @@ -6,15 +6,17 @@ * Copyright (c) 2004-2007 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2006-2012 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -26,12 +28,11 @@ #include "ompi/mpi/fortran/base/fint_2_int.h" #include "ompi/info/info.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_Info_f2c = PMPI_Info_f2c #endif - -#if OMPI_PROFILING_DEFINES -#include "ompi/mpi/c/profile/defines.h" +#define MPI_Info_f2c PMPI_Info_f2c #endif static const char FUNC_NAME[] = "MPI_Info_f2c"; @@ -43,12 +44,10 @@ static const char FUNC_NAME[] = "MPI_Info_f2c"; * @param info Integer handle to an MPI_INFO object * @retval C handle corresponding to MPI_INFO object */ -MPI_Info MPI_Info_f2c(MPI_Fint info) +MPI_Info MPI_Info_f2c(MPI_Fint info) { int info_index = OMPI_FINT_2_INT(info); - OPAL_CR_NOOP_PROGRESS(); - /* check the arguments */ if (MPI_PARAM_CHECK) { @@ -58,9 +57,9 @@ MPI_Info MPI_Info_f2c(MPI_Fint info) /* Per MPI-2:4.12.4, do not invoke an error handler if we get an invalid fortran handle. If we get an invalid fortran handle, return an invalid C handle. */ - - if (info_index < 0 || - info_index >= + + if (info_index < 0 || + info_index >= opal_pointer_array_get_size(&ompi_info_f_to_c_table)) { return NULL; } diff --git a/ompi/mpi/c/info_free.c b/ompi/mpi/c/info_free.c index 60284378dec..651c5acd05e 100644 --- a/ompi/mpi/c/info_free.c +++ b/ompi/mpi/c/info_free.c @@ -5,14 +5,16 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -24,12 +26,11 @@ #include "ompi/errhandler/errhandler.h" #include "ompi/info/info.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_Info_free = PMPI_Info_free #endif - -#if OMPI_PROFILING_DEFINES -#include "ompi/mpi/c/profile/defines.h" +#define MPI_Info_free PMPI_Info_free #endif static const char FUNC_NAME[] = "MPI_Info_free"; @@ -45,7 +46,7 @@ static const char FUNC_NAME[] = "MPI_Info_free"; * * Upon successful completion, 'info' will be set to 'MPI_INFO_NULL'. */ -int MPI_Info_free(MPI_Info *info) +int MPI_Info_free(MPI_Info *info) { int err; @@ -63,7 +64,6 @@ int MPI_Info_free(MPI_Info *info) } } - OPAL_CR_ENTER_LIBRARY(); err = ompi_info_free(info); OMPI_ERRHANDLER_RETURN(err, MPI_COMM_WORLD, err, FUNC_NAME); diff --git a/ompi/mpi/c/info_get.c b/ompi/mpi/c/info_get.c index b380d0fda24..6dcf60c9d03 100644 --- a/ompi/mpi/c/info_get.c +++ b/ompi/mpi/c/info_get.c @@ -6,16 +6,18 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2013 Los Alamos National Security, LLC. All rights * reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -29,12 +31,11 @@ #include #include -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_Info_get = PMPI_Info_get #endif - -#if OMPI_PROFILING_DEFINES -#include "ompi/mpi/c/profile/defines.h" +#define MPI_Info_get PMPI_Info_get #endif static const char FUNC_NAME[] = "MPI_Info_get"; @@ -59,7 +60,7 @@ static const char FUNC_NAME[] = "MPI_Info_get"; * to allow for for the null terminator. */ int MPI_Info_get(MPI_Info info, const char *key, int valuelen, - char *value, int *flag) + char *value, int *flag) { int err; int key_length; @@ -97,8 +98,6 @@ int MPI_Info_get(MPI_Info info, const char *key, int valuelen, } } - OPAL_CR_ENTER_LIBRARY(); - err = ompi_info_get (info, key, valuelen, value, flag); OMPI_ERRHANDLER_RETURN(err, MPI_COMM_WORLD, err, FUNC_NAME); } diff --git a/ompi/mpi/c/info_get_nkeys.c b/ompi/mpi/c/info_get_nkeys.c index e9bc8384f6a..1b687c2ae37 100644 --- a/ompi/mpi/c/info_get_nkeys.c +++ b/ompi/mpi/c/info_get_nkeys.c @@ -5,14 +5,16 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -24,12 +26,11 @@ #include "ompi/errhandler/errhandler.h" #include "ompi/info/info.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_Info_get_nkeys = PMPI_Info_get_nkeys #endif - -#if OMPI_PROFILING_DEFINES -#include "ompi/mpi/c/profile/defines.h" +#define MPI_Info_get_nkeys PMPI_Info_get_nkeys #endif static const char FUNC_NAME[] = "MPI_Info_get_nkeys"; @@ -46,10 +47,10 @@ static const char FUNC_NAME[] = "MPI_Info_get_nkeys"; * @retval MPI_ERR_ARG * @retval MPI_ERR_INFO * - * This function returns the number of elements in the list + * This function returns the number of elements in the list * containing the key-value pairs */ -int MPI_Info_get_nkeys(MPI_Info info, int *nkeys) +int MPI_Info_get_nkeys(MPI_Info info, int *nkeys) { int err; @@ -66,8 +67,6 @@ int MPI_Info_get_nkeys(MPI_Info info, int *nkeys) } } - OPAL_CR_ENTER_LIBRARY(); - err = ompi_info_get_nkeys(info, nkeys); OMPI_ERRHANDLER_RETURN(err, MPI_COMM_WORLD, err, FUNC_NAME); } diff --git a/ompi/mpi/c/info_get_nthkey.c b/ompi/mpi/c/info_get_nthkey.c index 1b0cebad3a0..99a17cb7a6c 100644 --- a/ompi/mpi/c/info_get_nthkey.c +++ b/ompi/mpi/c/info_get_nthkey.c @@ -5,14 +5,16 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -25,12 +27,11 @@ #include "ompi/info/info.h" #include -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_Info_get_nthkey = PMPI_Info_get_nthkey #endif - -#if OMPI_PROFILING_DEFINES -#include "ompi/mpi/c/profile/defines.h" +#define MPI_Info_get_nthkey PMPI_Info_get_nthkey #endif static const char FUNC_NAME[] = "MPI_Info_get_nthkey"; @@ -42,13 +43,13 @@ static const char FUNC_NAME[] = "MPI_Info_get_nthkey"; * @param info info object (handle) * @param n index of key to retrieve (integer) * @param key character string of at least 'MPI_MAX_INFO_KEY' characters - * + * * @retval MPI_SUCCESS * @retval MPI_ERR_ARG * @retval MPI_ERR_INFO * @retval MPI_ERR_INFO_KEY */ -int MPI_Info_get_nthkey(MPI_Info info, int n, char *key) +int MPI_Info_get_nthkey(MPI_Info info, int n, char *key) { int nkeys; int err; @@ -75,7 +76,6 @@ int MPI_Info_get_nthkey(MPI_Info info, int n, char *key) } } - OPAL_CR_ENTER_LIBRARY(); /* Keys are indexed on 0, which makes the "n" parameter offset by 1 from the value returned by get_nkeys(). So be sure to @@ -84,11 +84,10 @@ int MPI_Info_get_nthkey(MPI_Info info, int n, char *key) err = ompi_info_get_nkeys(info, &nkeys); OMPI_ERRHANDLER_CHECK(err, MPI_COMM_WORLD, err, FUNC_NAME); if (n > (nkeys - 1)) { - OPAL_CR_EXIT_LIBRARY(); return OMPI_ERRHANDLER_INVOKE (MPI_COMM_WORLD, MPI_ERR_INFO_KEY, FUNC_NAME); } - + /* Everything seems alright. Call the back end key copy */ err = ompi_info_get_nthkey (info, n, key); diff --git a/ompi/mpi/c/info_get_valuelen.c b/ompi/mpi/c/info_get_valuelen.c index 65cbf1e5bd4..cae2b9e2229 100644 --- a/ompi/mpi/c/info_get_valuelen.c +++ b/ompi/mpi/c/info_get_valuelen.c @@ -6,16 +6,18 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2013 Los Alamos National Security, LLC. All rights * reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -29,12 +31,11 @@ #include #include -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_Info_get_valuelen = PMPI_Info_get_valuelen #endif - -#if OMPI_PROFILING_DEFINES -#include "ompi/mpi/c/profile/defines.h" +#define MPI_Info_get_valuelen PMPI_Info_get_valuelen #endif static const char FUNC_NAME[] = "MPI_Info_get_valuelen"; @@ -55,11 +56,11 @@ static const char FUNC_NAME[] = "MPI_Info_get_valuelen"; * @retval MPI_ERR_INFO_KEY * * The length returned in C and C++ does not include the end-of-string - * character. If the 'key' is not found on 'info', 'valuelen' is left + * character. If the 'key' is not found on 'info', 'valuelen' is left * alone. */ int MPI_Info_get_valuelen(MPI_Info info, const char *key, int *valuelen, - int *flag) + int *flag) { int key_length; int err; @@ -76,7 +77,7 @@ int MPI_Info_get_valuelen(MPI_Info info, const char *key, int *valuelen, FUNC_NAME); } key_length = (key) ? (int)strlen (key) : 0; - if ((NULL == key) || (0 == key_length) || + if ((NULL == key) || (0 == key_length) || (MPI_MAX_INFO_KEY <= key_length)) { return OMPI_ERRHANDLER_INVOKE(MPI_COMM_WORLD, MPI_ERR_INFO_KEY, FUNC_NAME); @@ -87,8 +88,6 @@ int MPI_Info_get_valuelen(MPI_Info info, const char *key, int *valuelen, } } - OPAL_CR_ENTER_LIBRARY(); - err = ompi_info_get_valuelen (info, key, valuelen, flag); OMPI_ERRHANDLER_RETURN(err, MPI_COMM_WORLD, err, FUNC_NAME); } diff --git a/ompi/mpi/c/info_set.c b/ompi/mpi/c/info_set.c index bfbd55bff03..46262d17a3d 100644 --- a/ompi/mpi/c/info_set.c +++ b/ompi/mpi/c/info_set.c @@ -5,15 +5,17 @@ * Copyright (c) 2004-2013 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2012-2013 Inria. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -27,12 +29,11 @@ #include #include -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_Info_set = PMPI_Info_set #endif - -#if OMPI_PROFILING_DEFINES -#include "ompi/mpi/c/profile/defines.h" +#define MPI_Info_set PMPI_Info_set #endif static const char FUNC_NAME[] = "MPI_Info_set"; @@ -54,12 +55,12 @@ static const char FUNC_NAME[] = "MPI_Info_set"; * * MPI_Info_set adds the (key,value) pair to info, and overrides * the value if for the same key a previsou value was set. key and - * value must be NULL terminated strings in C. In Fortan, leading - * and trailing spaces in key and value are stripped. If either + * value must be NULL terminated strings in C. In Fortan, leading + * and trailing spaces in key and value are stripped. If either * key or value is greater than the allowed maxima, MPI_ERR_INFO_KEY * and MPI_ERR_INFO_VALUE are raised */ -int MPI_Info_set(MPI_Info info, const char *key, const char *value) +int MPI_Info_set(MPI_Info info, const char *key, const char *value) { int err; int key_length; @@ -89,20 +90,18 @@ int MPI_Info_set(MPI_Info info, const char *key, const char *value) } value_length = (value) ? (int)strlen (value) : 0; - if ((NULL == value) || (0 == value_length) || + if ((NULL == value) || (0 == value_length) || (MPI_MAX_INFO_VAL <= value_length)) { return OMPI_ERRHANDLER_INVOKE (MPI_COMM_WORLD, MPI_ERR_INFO_VALUE, FUNC_NAME); } } - OPAL_CR_ENTER_LIBRARY(); - /* * If all is right with the arguments, then call the back-end * allocator. */ - + err = ompi_info_set (info, key, value); OMPI_ERRHANDLER_RETURN(err, MPI_COMM_WORLD, err, FUNC_NAME); } diff --git a/ompi/mpi/c/init.c b/ompi/mpi/c/init.c index b9f5e496c2e..668b2da5324 100644 --- a/ompi/mpi/c/init.c +++ b/ompi/mpi/c/init.c @@ -5,16 +5,18 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2006 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2006 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. - * Copyright (c) 2007 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2007-2015 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2007-2008 Sun Microsystems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -28,12 +30,11 @@ #include "ompi/errhandler/errhandler.h" #include "ompi/constants.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_Init = PMPI_Init #endif - -#if OMPI_PROFILING_DEFINES -#include "ompi/mpi/c/profile/defines.h" +#define MPI_Init PMPI_Init #endif static const char FUNC_NAME[] = "MPI_Init"; @@ -46,25 +47,6 @@ int MPI_Init(int *argc, char ***argv) char *env; int required = MPI_THREAD_SINGLE; - /* Ensure that we were not already initialized or finalized */ - - if (ompi_mpi_finalized) { - if (0 == ompi_comm_rank(MPI_COMM_WORLD)) { - opal_show_help("help-mpi-api.txt", - "mpi-function-after-finalize", true, FUNC_NAME); - } - return ompi_errhandler_invoke(NULL, NULL, - OMPI_ERRHANDLER_TYPE_COMM, - MPI_ERR_OTHER, FUNC_NAME); - } else if (ompi_mpi_initialized) { - if (0 == ompi_comm_rank(MPI_COMM_WORLD)) { - opal_show_help("help-mpi-api.txt", "mpi-initialize-twice", - true, FUNC_NAME); - } - return OMPI_ERRHANDLER_INVOKE(MPI_COMM_WORLD, MPI_ERR_OTHER, - FUNC_NAME); - } - /* check for environment overrides for required thread level. If there is, check to see that it is a valid/supported thread level. If not, default to MPI_THREAD_MULTIPLE. */ @@ -99,7 +81,5 @@ int MPI_Init(int *argc, char ***argv) err, FUNC_NAME); } - OPAL_CR_INIT_LIBRARY(); - return MPI_SUCCESS; } diff --git a/ompi/mpi/c/init_thread.c b/ompi/mpi/c/init_thread.c index 7dd0b0fad1e..1b30e2ca0a9 100644 --- a/ompi/mpi/c/init_thread.c +++ b/ompi/mpi/c/init_thread.c @@ -5,15 +5,18 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2006 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2006 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2010 Oak Ridge National Labs. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. + * Copyright (c) 2015 Cisco Systems, Inc. All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -26,19 +29,18 @@ #include "ompi/errhandler/errhandler.h" #include "ompi/constants.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_Init_thread = PMPI_Init_thread #endif - -#if OMPI_PROFILING_DEFINES -#include "ompi/mpi/c/profile/defines.h" +#define MPI_Init_thread PMPI_Init_thread #endif static const char FUNC_NAME[] = "MPI_Init_thread"; int MPI_Init_thread(int *argc, char ***argv, int required, - int *provided) + int *provided) { int err; @@ -59,23 +61,6 @@ int MPI_Init_thread(int *argc, char ***argv, int required, *provided = MPI_THREAD_SINGLE; #endif - /* Ensure that we were not already initialized or finalized */ - - if (ompi_mpi_finalized) { - if (0 == ompi_comm_rank(MPI_COMM_WORLD)) { - opal_show_help("help-mpi-api.txt", "mpi-function-after-finalize", - true, FUNC_NAME); - } - return ompi_errhandler_invoke(NULL, NULL, OMPI_ERRHANDLER_TYPE_COMM, - MPI_ERR_OTHER, FUNC_NAME); - } else if (ompi_mpi_initialized) { - if (0 == ompi_comm_rank(MPI_COMM_WORLD)) { - opal_show_help("help-mpi-api.txt", "mpi-initialize-twice", - true, FUNC_NAME); - } - return OMPI_ERRHANDLER_INVOKE(MPI_COMM_WORLD, MPI_ERR_OTHER, FUNC_NAME); - } - /* Call the back-end initialization function (we need to put as little in this function as possible so that if it's profiled, we don't lose anything) */ @@ -97,7 +82,5 @@ int MPI_Init_thread(int *argc, char ***argv, int required, err, FUNC_NAME); } - OPAL_CR_INIT_LIBRARY(); - return MPI_SUCCESS; } diff --git a/ompi/mpi/c/initialized.c b/ompi/mpi/c/initialized.c index 296f89c41e1..014ec9cdaa8 100644 --- a/ompi/mpi/c/initialized.c +++ b/ompi/mpi/c/initialized.c @@ -5,14 +5,17 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. + * Copyright (c) 2015 Cisco Systems, Inc. All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -23,22 +26,27 @@ #include "ompi/communicator/communicator.h" #include "ompi/errhandler/errhandler.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_Initialized = PMPI_Initialized #endif - -#if OMPI_PROFILING_DEFINES -#include "ompi/mpi/c/profile/defines.h" +#define MPI_Initialized PMPI_Initialized #endif static const char FUNC_NAME[] = "MPI_Initialized"; -int MPI_Initialized(int *flag) +int MPI_Initialized(int *flag) { MPI_Comm null = NULL; - OPAL_CR_NOOP_PROGRESS(); + /* We must obtain the lock to guarnatee consistent values of + ompi_mpi_initialized and ompi_mpi_finalized. Note, too, that + this lock is held for the bulk of the duration of + ompi_mpi_init() and ompi_mpi_finalize(), so when we get the + lock, we are guaranteed that some other thread is not part way + through initialization or finalization. */ + opal_mutex_lock(&ompi_mpi_bootstrap_mutex); if (MPI_PARAM_CHECK) { if (NULL == flag) { @@ -48,17 +56,19 @@ int MPI_Initialized(int *flag) MPI_Finalize) or not */ if (ompi_mpi_initialized && !ompi_mpi_finalized) { + opal_mutex_unlock(&ompi_mpi_bootstrap_mutex); return OMPI_ERRHANDLER_INVOKE(MPI_COMM_WORLD, MPI_ERR_ARG, FUNC_NAME); } else { + opal_mutex_unlock(&ompi_mpi_bootstrap_mutex); return OMPI_ERRHANDLER_INVOKE(null, MPI_ERR_ARG, FUNC_NAME); } } } - - /* Pretty simple */ *flag = ompi_mpi_initialized; + opal_mutex_unlock(&ompi_mpi_bootstrap_mutex); + return MPI_SUCCESS; } diff --git a/ompi/mpi/c/intercomm_create.c b/ompi/mpi/c/intercomm_create.c index f486c4a4087..10f0e8ad2ca 100644 --- a/ompi/mpi/c/intercomm_create.c +++ b/ompi/mpi/c/intercomm_create.c @@ -1,3 +1,4 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ /* * Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana * University Research and Technology @@ -5,19 +6,21 @@ * Copyright (c) 2004-2014 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2006-2007 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2006-2009 University of Houston. All rights reserved. * Copyright (c) 2012-2013 Inria. All rights reserved. - * Copyright (c) 2014 Research Organization for Information Science + * Copyright (c) 2014-2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. + * Copyright (c) 2016 Los Alamos National Security, LLC. All rights + * reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -31,12 +34,11 @@ #include "ompi/request/request.h" #include "ompi/memchecker.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_Intercomm_create = PMPI_Intercomm_create #endif - -#if OMPI_PROFILING_DEFINES -#include "ompi/mpi/c/profile/defines.h" +#define MPI_Intercomm_create PMPI_Intercomm_create #endif static const char FUNC_NAME[] = "MPI_Intercomm_create"; @@ -78,7 +80,6 @@ int MPI_Intercomm_create(MPI_Comm local_comm, int local_leader, */ } - OPAL_CR_ENTER_LIBRARY(); local_size = ompi_comm_size ( local_comm ); local_rank = ompi_comm_rank ( local_comm ); @@ -95,12 +96,10 @@ int MPI_Intercomm_create(MPI_Comm local_comm, int local_leader, if ( local_rank == local_leader ) { if ( ompi_comm_invalid ( bridge_comm ) || (bridge_comm->c_flags & OMPI_COMM_INTER) ) { - OPAL_CR_EXIT_LIBRARY(); return OMPI_ERRHANDLER_INVOKE ( local_comm, MPI_ERR_COMM, FUNC_NAME); } if ( (remote_leader < 0) || (remote_leader >= ompi_comm_size(bridge_comm))) { - OPAL_CR_EXIT_LIBRARY(); return OMPI_ERRHANDLER_INVOKE ( local_comm, MPI_ERR_ARG, FUNC_NAME); } @@ -172,10 +171,9 @@ int MPI_Intercomm_create(MPI_Comm local_comm, int local_leader, /* put group elements in the list */ for (j = 0; j < rsize; j++) { new_group_pointer->grp_proc_pointers[j] = rprocs[j]; + OBJ_RETAIN(rprocs[j]); } - ompi_group_increment_proc_count(new_group_pointer); - rc = ompi_comm_set ( &newcomp, /* new comm */ local_comm, /* old comm */ local_comm->c_local_group->grp_proc_count, /* local_size */ @@ -197,37 +195,24 @@ int MPI_Intercomm_create(MPI_Comm local_comm, int local_leader, goto err_exit; } - ompi_group_decrement_proc_count (new_group_pointer); OBJ_RELEASE(new_group_pointer); new_group_pointer = MPI_GROUP_NULL; /* Determine context id. It is identical to f_2_c_handle */ - rc = ompi_comm_nextcid ( newcomp, /* new comm */ - local_comm, /* old comm */ - bridge_comm, /* bridge comm */ - &lleader, /* local leader */ - &rleader, /* remote_leader */ - OMPI_COMM_CID_INTRA_BRIDGE, /* mode */ - -1 ); /* send_first */ - + rc = ompi_comm_nextcid (newcomp, local_comm, bridge_comm, &lleader, + &rleader, false, OMPI_COMM_CID_INTRA_BRIDGE); if ( MPI_SUCCESS != rc ) { goto err_exit; } /* activate comm and init coll-module */ - rc = ompi_comm_activate ( &newcomp, - local_comm, /* old comm */ - bridge_comm, /* bridge comm */ - &lleader, /* local leader */ - &rleader, /* remote_leader */ - OMPI_COMM_CID_INTRA_BRIDGE, /* mode */ - -1 ); /* send_first */ + rc = ompi_comm_activate (&newcomp, local_comm, bridge_comm, &lleader, &rleader, + false, OMPI_COMM_CID_INTRA_BRIDGE); if ( MPI_SUCCESS != rc ) { goto err_exit; } err_exit: - OPAL_CR_EXIT_LIBRARY(); if ( NULL != rprocs ) { free ( rprocs ); diff --git a/ompi/mpi/c/intercomm_merge.c b/ompi/mpi/c/intercomm_merge.c index 1b837448418..55258b637ef 100644 --- a/ompi/mpi/c/intercomm_merge.c +++ b/ompi/mpi/c/intercomm_merge.c @@ -1,3 +1,4 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ /* * Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana * University Research and Technology @@ -5,17 +6,21 @@ * Copyright (c) 2004-2013 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2006-2012 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2006-2009 University of Houston. All rights reserved. * Copyright (c) 2012-2013 Inria. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. + * Copyright (c) 2016 Los Alamos National Security, LLC. All rights + * reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -29,12 +34,11 @@ #include "ompi/proc/proc.h" #include "ompi/memchecker.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_Intercomm_merge = PMPI_Intercomm_merge #endif - -#if OMPI_PROFILING_DEFINES -#include "ompi/mpi/c/profile/defines.h" +#define MPI_Intercomm_merge PMPI_Intercomm_merge #endif static const char FUNC_NAME[] = "MPI_Intercomm_merge"; @@ -57,20 +61,18 @@ int MPI_Intercomm_merge(MPI_Comm intercomm, int high, ); if ( MPI_PARAM_CHECK ) { - OMPI_ERR_INIT_FINALIZE(FUNC_NAME); + OMPI_ERR_INIT_FINALIZE(FUNC_NAME); if (ompi_comm_invalid ( intercomm ) || - !( intercomm->c_flags & OMPI_COMM_INTER ) ) + !( intercomm->c_flags & OMPI_COMM_INTER ) ) return OMPI_ERRHANDLER_INVOKE ( MPI_COMM_WORLD, MPI_ERR_COMM, FUNC_NAME); if ( NULL == newcomm ) - return OMPI_ERRHANDLER_INVOKE ( intercomm, MPI_ERR_ARG, + return OMPI_ERRHANDLER_INVOKE ( intercomm, MPI_ERR_ARG, FUNC_NAME); } - OPAL_CR_ENTER_LIBRARY(); - local_size = ompi_comm_size ( intercomm ); remote_size = ompi_comm_remote_size ( intercomm ); total_size = local_size + remote_size; @@ -113,36 +115,24 @@ int MPI_Intercomm_merge(MPI_Comm intercomm, int high, goto exit; } - ompi_group_decrement_proc_count(new_group_pointer); OBJ_RELEASE(new_group_pointer); new_group_pointer = MPI_GROUP_NULL; - /* Determine context id. It is identical to f_2_c_handle */ - rc = ompi_comm_nextcid ( newcomp, /* new comm */ - intercomm, /* old comm */ - NULL, /* bridge comm */ - NULL, /* local leader */ - NULL, /* remote_leader */ - OMPI_COMM_CID_INTER, /* mode */ - -1 ); /* send_first */ + /* Determine context id */ + rc = ompi_comm_nextcid (newcomp, intercomm, NULL, NULL, NULL, false, + OMPI_COMM_CID_INTER); if ( OMPI_SUCCESS != rc ) { goto exit; } /* activate communicator and init coll-module */ - rc = ompi_comm_activate( &newcomp, /* new comm */ - intercomm, /* old comm */ - NULL, /* bridge comm */ - NULL, /* local leader */ - NULL, /* remote_leader */ - OMPI_COMM_CID_INTER, /* mode */ - -1 ); /* send_first */ + rc = ompi_comm_activate (&newcomp, intercomm, NULL, NULL, NULL, false, + OMPI_COMM_CID_INTER); if ( OMPI_SUCCESS != rc ) { goto exit; } exit: - OPAL_CR_EXIT_LIBRARY(); if ( NULL != procs ) { free ( procs ); diff --git a/ompi/mpi/c/iprobe.c b/ompi/mpi/c/iprobe.c index c6f9385f25e..3725fafab15 100644 --- a/ompi/mpi/c/iprobe.c +++ b/ompi/mpi/c/iprobe.c @@ -5,14 +5,16 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -26,12 +28,11 @@ #include "ompi/memchecker.h" #include "ompi/request/request.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_Iprobe = PMPI_Iprobe #endif - -#if OMPI_PROFILING_DEFINES -#include "ompi/mpi/c/profile/defines.h" +#define MPI_Iprobe PMPI_Iprobe #endif static const char FUNC_NAME[] = "MPI_Iprobe"; @@ -52,7 +53,7 @@ int MPI_Iprobe(int source, int tag, MPI_Comm comm, int *flag, MPI_Status *status rc = MPI_ERR_TAG; } else if (ompi_comm_invalid(comm)) { rc = MPI_ERR_COMM; - } else if ((source != MPI_ANY_SOURCE) && + } else if ((source != MPI_ANY_SOURCE) && (MPI_PROC_NULL != source) && ompi_comm_peer_invalid(comm, source)) { rc = MPI_ERR_RANK; @@ -74,8 +75,6 @@ int MPI_Iprobe(int source, int tag, MPI_Comm comm, int *flag, MPI_Status *status return MPI_SUCCESS; } - OPAL_CR_ENTER_LIBRARY(); - rc = MCA_PML_CALL(iprobe(source, tag, comm, flag, status)); /* diff --git a/ompi/mpi/c/irecv.c b/ompi/mpi/c/irecv.c index a290b60d4b6..1c15051ed86 100644 --- a/ompi/mpi/c/irecv.c +++ b/ompi/mpi/c/irecv.c @@ -5,15 +5,17 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2006 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ #include "ompi_config.h" @@ -26,12 +28,11 @@ #include "ompi/request/request.h" #include "ompi/memchecker.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_Irecv = PMPI_Irecv #endif - -#if OMPI_PROFILING_DEFINES -#include "ompi/mpi/c/profile/defines.h" +#define MPI_Irecv PMPI_Irecv #endif static const char FUNC_NAME[] = "MPI_Irecv"; @@ -51,12 +52,12 @@ int MPI_Irecv(void *buf, int count, MPI_Datatype type, int source, OMPI_ERR_INIT_FINALIZE(FUNC_NAME); OMPI_CHECK_DATATYPE_FOR_RECV(rc, type, count); OMPI_CHECK_USER_BUFFER(rc, buf, type, count); - + if (ompi_comm_invalid(comm)) { return OMPI_ERRHANDLER_INVOKE(MPI_COMM_WORLD, MPI_ERR_COMM, FUNC_NAME); } else if (((tag < 0) && (tag != MPI_ANY_TAG)) || (tag > mca_pml.pml_max_tag)) { rc = MPI_ERR_TAG; - } else if ((MPI_ANY_SOURCE != source) && + } else if ((MPI_ANY_SOURCE != source) && (MPI_PROC_NULL != source) && ompi_comm_peer_invalid(comm, source)) { rc = MPI_ERR_RANK; @@ -71,8 +72,6 @@ int MPI_Irecv(void *buf, int count, MPI_Datatype type, int source, return MPI_SUCCESS; } - OPAL_CR_ENTER_LIBRARY(); - MEMCHECKER ( memchecker_call(&opal_memchecker_base_mem_noaccess, buf, count, type); ); diff --git a/ompi/mpi/c/ireduce.c b/ompi/mpi/c/ireduce.c index 97b2a443cee..3033cd29e92 100644 --- a/ompi/mpi/c/ireduce.c +++ b/ompi/mpi/c/ireduce.c @@ -13,6 +13,9 @@ * Copyright (c) 2006 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2013 Los Alamos National Security, LLC. All rights * reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. + * Copyright (c) 2016 IBM Corporation. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -30,12 +33,11 @@ #include "ompi/op/op.h" #include "ompi/memchecker.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_Ireduce = PMPI_Ireduce #endif - -#if OMPI_PROFILING_DEFINES -#include "ompi/mpi/c/profile/defines.h" +#define MPI_Ireduce PMPI_Ireduce #endif static const char FUNC_NAME[] = "MPI_Ireduce"; @@ -119,12 +121,18 @@ int MPI_Ireduce(const void *sendbuf, void *recvbuf, int count, } } - OPAL_CR_ENTER_LIBRARY(); + /* MPI standard says that reductions have to have a count of at least 1, + * but some benchmarks (e.g., IMB) calls this function with a count of 0. + * So handle that case. + */ + if (0 == count) { + *request = &ompi_request_empty; + return MPI_SUCCESS; + } /* Invoke the coll component to perform the back-end operation */ - /* XXX -- CONST -- do not cast away const -- update mca/coll */ OBJ_RETAIN(op); - err = comm->c_coll.coll_ireduce((void *) sendbuf, recvbuf, count, + err = comm->c_coll.coll_ireduce(sendbuf, recvbuf, count, datatype, op, root, comm, request, comm->c_coll.coll_ireduce_module); OBJ_RELEASE(op); diff --git a/ompi/mpi/c/ireduce_scatter.c b/ompi/mpi/c/ireduce_scatter.c index 825e20684fc..28428e83261 100644 --- a/ompi/mpi/c/ireduce_scatter.c +++ b/ompi/mpi/c/ireduce_scatter.c @@ -13,6 +13,9 @@ * Copyright (c) 2006-2012 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2012-2013 Los Alamos National Security, LLC. All rights * reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. + * Copyright (c) 2016 IBM Corporation. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -30,12 +33,11 @@ #include "ompi/op/op.h" #include "ompi/memchecker.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_Ireduce_scatter = PMPI_Ireduce_scatter #endif - -#if OMPI_PROFILING_DEFINES -#include "ompi/mpi/c/profile/defines.h" +#define MPI_Ireduce_scatter PMPI_Ireduce_scatter #endif static const char FUNC_NAME[] = "MPI_Ireduce_scatter"; @@ -44,7 +46,7 @@ static const char FUNC_NAME[] = "MPI_Ireduce_scatter"; int MPI_Ireduce_scatter(const void *sendbuf, void *recvbuf, const int recvcounts[], MPI_Datatype datatype, MPI_Op op, MPI_Comm comm, MPI_Request *request) { - int i, err, size; + int i, err, size, count; MEMCHECKER( int rank; @@ -109,13 +111,25 @@ int MPI_Ireduce_scatter(const void *sendbuf, void *recvbuf, const int recvcounts } } - OPAL_CR_ENTER_LIBRARY(); + /* MPI standard says that reductions have to have a count of at least 1, + * but some benchmarks (e.g., IMB) calls this function with a count of 0. + * So handle that case. + */ + size = ompi_comm_size(comm); + for (count = i = 0; i < size; ++i) { + if (0 == recvcounts[i]) { + ++count; + } + } + if (size == count) { + *request = &ompi_request_empty; + return MPI_SUCCESS; + } /* Invoke the coll component to perform the back-end operation */ OBJ_RETAIN(op); - /* XXX -- CONST -- do not cast away const -- update mca/coll */ - err = comm->c_coll.coll_ireduce_scatter((void *) sendbuf, recvbuf, (int *) recvcounts, + err = comm->c_coll.coll_ireduce_scatter(sendbuf, recvbuf, recvcounts, datatype, op, comm, request, comm->c_coll.coll_ireduce_scatter_module); OBJ_RELEASE(op); diff --git a/ompi/mpi/c/ireduce_scatter_block.c b/ompi/mpi/c/ireduce_scatter_block.c index 5cb1d8164bb..d4ad0607ad2 100644 --- a/ompi/mpi/c/ireduce_scatter_block.c +++ b/ompi/mpi/c/ireduce_scatter_block.c @@ -6,7 +6,7 @@ * Copyright (c) 2004-2012 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. @@ -14,10 +14,12 @@ * Copyright (c) 2012 Oak Ridge National Labs. All rights reserved. * Copyright (c) 2013 Los Alamos National Security, LLC. All rights * reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ #include "ompi_config.h" @@ -31,19 +33,18 @@ #include "ompi/op/op.h" #include "ompi/memchecker.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_Ireduce_scatter_block = PMPI_Ireduce_scatter_block #endif - -#if OMPI_PROFILING_DEFINES -#include "ompi/mpi/c/profile/defines.h" +#define MPI_Ireduce_scatter_block PMPI_Ireduce_scatter_block #endif static const char FUNC_NAME[] = "MPI_Ireduce_scatter_block"; int MPI_Ireduce_scatter_block(const void *sendbuf, void *recvbuf, int recvcount, - MPI_Datatype datatype, MPI_Op op, + MPI_Datatype datatype, MPI_Op op, MPI_Comm comm, MPI_Request *request) { int err; @@ -51,17 +52,17 @@ int MPI_Ireduce_scatter_block(const void *sendbuf, void *recvbuf, int recvcount, MEMCHECKER( memchecker_comm(comm); memchecker_datatype(datatype); - + /* check receive buffer of current proccess, whether it's addressable. */ memchecker_call(&opal_memchecker_base_isaddressable, recvbuf, recvcount, datatype); - + /* check whether the actual send buffer is defined. */ if(MPI_IN_PLACE == sendbuf) { memchecker_call(&opal_memchecker_base_isdefined, recvbuf, recvcount, datatype); } else { memchecker_call(&opal_memchecker_base_isdefined, sendbuf, recvcount, datatype); - + } ); @@ -70,7 +71,7 @@ int MPI_Ireduce_scatter_block(const void *sendbuf, void *recvbuf, int recvcount, err = MPI_SUCCESS; OMPI_ERR_INIT_FINALIZE(FUNC_NAME); if (ompi_comm_invalid(comm)) { - return OMPI_ERRHANDLER_INVOKE(MPI_COMM_WORLD, MPI_ERR_COMM, + return OMPI_ERRHANDLER_INVOKE(MPI_COMM_WORLD, MPI_ERR_COMM, FUNC_NAME); } @@ -92,13 +93,10 @@ int MPI_Ireduce_scatter_block(const void *sendbuf, void *recvbuf, int recvcount, OMPI_ERRHANDLER_CHECK(err, comm, err, FUNC_NAME); } - OPAL_CR_ENTER_LIBRARY(); - /* Invoke the coll component to perform the back-end operation */ OBJ_RETAIN(op); - /* XXX -- CONST -- do not cast away const -- update mca/coll */ - err = comm->c_coll.coll_ireduce_scatter_block((void *) sendbuf, recvbuf, recvcount, + err = comm->c_coll.coll_ireduce_scatter_block(sendbuf, recvbuf, recvcount, datatype, op, comm, request, comm->c_coll.coll_ireduce_scatter_block_module); OBJ_RELEASE(op); diff --git a/ompi/mpi/c/irsend.c b/ompi/mpi/c/irsend.c index 06ab28792d7..accec2c2c4f 100644 --- a/ompi/mpi/c/irsend.c +++ b/ompi/mpi/c/irsend.c @@ -13,6 +13,8 @@ * Copyright (c) 2006 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2013 Los Alamos National Security, LLC. All rights * reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -30,12 +32,11 @@ #include "ompi/request/request.h" #include "ompi/memchecker.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_Irsend = PMPI_Irsend #endif - -#if OMPI_PROFILING_DEFINES -#include "ompi/mpi/c/profile/defines.h" +#define MPI_Irsend PMPI_Irsend #endif static const char FUNC_NAME[] = "MPI_Irsend"; @@ -78,13 +79,10 @@ int MPI_Irsend(const void *buf, int count, MPI_Datatype type, int dest, return MPI_SUCCESS; } - OPAL_CR_ENTER_LIBRARY(); - MEMCHECKER ( memchecker_call(&opal_memchecker_base_mem_noaccess, buf, count, type); ); - /* XXX -- CONST -- do not cast away const -- update mca/pml */ - rc = MCA_PML_CALL(isend((void *) buf,count,type,dest,tag, + rc = MCA_PML_CALL(isend(buf,count,type,dest,tag, MCA_PML_BASE_SEND_READY,comm,request)); OMPI_ERRHANDLER_RETURN(rc, comm, rc, FUNC_NAME); } diff --git a/ompi/mpi/c/is_thread_main.c b/ompi/mpi/c/is_thread_main.c index 01cba8b4bf7..29810d7d9ee 100644 --- a/ompi/mpi/c/is_thread_main.c +++ b/ompi/mpi/c/is_thread_main.c @@ -5,14 +5,16 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -25,21 +27,18 @@ #include "ompi/runtime/mpiruntime.h" #include "opal/threads/threads.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_Is_thread_main = PMPI_Is_thread_main #endif - -#if OMPI_PROFILING_DEFINES -#include "ompi/mpi/c/profile/defines.h" +#define MPI_Is_thread_main PMPI_Is_thread_main #endif static const char FUNC_NAME[] = "MPI_Is_thread_main"; -int MPI_Is_thread_main(int *flag) +int MPI_Is_thread_main(int *flag) { - OPAL_CR_NOOP_PROGRESS(); - if (MPI_PARAM_CHECK) { OMPI_ERR_INIT_FINALIZE(FUNC_NAME); if (NULL == flag) { diff --git a/ompi/mpi/c/iscan.c b/ompi/mpi/c/iscan.c index ab9e546ca9e..a15586b3231 100644 --- a/ompi/mpi/c/iscan.c +++ b/ompi/mpi/c/iscan.c @@ -13,6 +13,8 @@ * Copyright (c) 2006 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2013 Los Alamos National Security, LLC. All rights * reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -30,12 +32,11 @@ #include "ompi/op/op.h" #include "ompi/memchecker.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_Iscan = PMPI_Iscan #endif - -#if OMPI_PROFILING_DEFINES -#include "ompi/mpi/c/profile/defines.h" +#define MPI_Iscan PMPI_Iscan #endif static const char FUNC_NAME[] = "MPI_Iscan"; @@ -88,13 +89,10 @@ int MPI_Iscan(const void *sendbuf, void *recvbuf, int count, OMPI_ERRHANDLER_CHECK(err, comm, err, FUNC_NAME); } - OPAL_CR_ENTER_LIBRARY(); - /* Call the coll component to actually perform the allgather */ OBJ_RETAIN(op); - /* XXX -- CONST -- do not cast away const -- update mca/coll */ - err = comm->c_coll.coll_iscan((void *) sendbuf, recvbuf, count, + err = comm->c_coll.coll_iscan(sendbuf, recvbuf, count, datatype, op, comm, request, comm->c_coll.coll_iscan_module); diff --git a/ompi/mpi/c/iscatter.c b/ompi/mpi/c/iscatter.c index 389b278c095..e57da5d4fc5 100644 --- a/ompi/mpi/c/iscatter.c +++ b/ompi/mpi/c/iscatter.c @@ -6,7 +6,7 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. @@ -15,6 +15,8 @@ * Copyright (c) 2008 Sun Microsystems, Inc. All rights reserved. * Copyright (c) 2013 Los Alamos National Security, LLC. All rights * reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -31,12 +33,11 @@ #include "ompi/datatype/ompi_datatype.h" #include "ompi/memchecker.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_Iscatter = PMPI_Iscatter #endif - -#if OMPI_PROFILING_DEFINES -#include "ompi/mpi/c/profile/defines.h" +#define MPI_Iscatter PMPI_Iscatter #endif static const char FUNC_NAME[] = "MPI_Iscatter"; @@ -146,11 +147,8 @@ int MPI_Iscatter(const void *sendbuf, int sendcount, MPI_Datatype sendtype, } } - OPAL_CR_ENTER_LIBRARY(); - /* Invoke the coll component to perform the back-end operation */ - /* XXX -- CONST -- do not cast away const -- update mca/coll */ - err = comm->c_coll.coll_iscatter((void *) sendbuf, sendcount, sendtype, recvbuf, + err = comm->c_coll.coll_iscatter(sendbuf, sendcount, sendtype, recvbuf, recvcount, recvtype, root, comm, request, comm->c_coll.coll_iscatter_module); OMPI_ERRHANDLER_RETURN(err, comm, err, FUNC_NAME); diff --git a/ompi/mpi/c/iscatterv.c b/ompi/mpi/c/iscatterv.c index 5ed3465d142..360b448d0b5 100644 --- a/ompi/mpi/c/iscatterv.c +++ b/ompi/mpi/c/iscatterv.c @@ -6,13 +6,15 @@ * Copyright (c) 2004-2012 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2006-2012 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2012-2013 Los Alamos National Security, LLC. All rights * reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -29,12 +31,11 @@ #include "ompi/datatype/ompi_datatype.h" #include "ompi/memchecker.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_Iscatterv = PMPI_Iscatterv #endif - -#if OMPI_PROFILING_DEFINES -#include "ompi/mpi/c/profile/defines.h" +#define MPI_Iscatterv PMPI_Iscatterv #endif static const char FUNC_NAME[] = "MPI_Iscatterv"; @@ -186,11 +187,8 @@ int MPI_Iscatterv(const void *sendbuf, const int sendcounts[], const int displs[ } } - OPAL_CR_ENTER_LIBRARY(); - /* Invoke the coll component to perform the back-end operation */ - /* XXX -- CONST -- do not cast away const -- update mca/coll */ - err = comm->c_coll.coll_iscatterv((void *) sendbuf, (int *) sendcounts, (int *) displs, + err = comm->c_coll.coll_iscatterv(sendbuf, sendcounts, displs, sendtype, recvbuf, recvcount, recvtype, root, comm, request, comm->c_coll.coll_iscatterv_module); OMPI_ERRHANDLER_RETURN(err, comm, err, FUNC_NAME); diff --git a/ompi/mpi/c/isend.c b/ompi/mpi/c/isend.c index 64a4c41ee87..88596f1f5f6 100644 --- a/ompi/mpi/c/isend.c +++ b/ompi/mpi/c/isend.c @@ -13,6 +13,8 @@ * Copyright (c) 2006-2007 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2013 Los Alamos National Security, LLC. All rights * reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -30,12 +32,11 @@ #include "ompi/request/request.h" #include "ompi/memchecker.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_Isend = PMPI_Isend #endif - -#if OMPI_PROFILING_DEFINES -#include "ompi/mpi/c/profile/defines.h" +#define MPI_Isend PMPI_Isend #endif static const char FUNC_NAME[] = "MPI_Isend"; @@ -79,13 +80,10 @@ int MPI_Isend(const void *buf, int count, MPI_Datatype type, int dest, return MPI_SUCCESS; } - OPAL_CR_ENTER_LIBRARY(); - MEMCHECKER ( memchecker_call(&opal_memchecker_base_mem_noaccess, buf, count, type); ); - /* XXX -- CONST -- do not cast away const -- update mca/pml */ - rc = MCA_PML_CALL(isend((void *) buf, count, type, dest, tag, + rc = MCA_PML_CALL(isend(buf, count, type, dest, tag, MCA_PML_BASE_SEND_STANDARD, comm, request)); OMPI_ERRHANDLER_RETURN(rc, comm, rc, FUNC_NAME); } diff --git a/ompi/mpi/c/issend.c b/ompi/mpi/c/issend.c index 002cc0156fa..ff7faf8fa6e 100644 --- a/ompi/mpi/c/issend.c +++ b/ompi/mpi/c/issend.c @@ -12,6 +12,8 @@ * All rights reserved. * Copyright (c) 2013 Los Alamos National Security, LLC. All rights * reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -29,12 +31,11 @@ #include "ompi/request/request.h" #include "ompi/memchecker.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_Issend = PMPI_Issend #endif - -#if OMPI_PROFILING_DEFINES -#include "ompi/mpi/c/profile/defines.h" +#define MPI_Issend PMPI_Issend #endif static const char FUNC_NAME[] = "MPI_Issend"; @@ -76,13 +77,10 @@ int MPI_Issend(const void *buf, int count, MPI_Datatype type, int dest, return MPI_SUCCESS; } - OPAL_CR_ENTER_LIBRARY(); - MEMCHECKER ( memchecker_call(&opal_memchecker_base_mem_noaccess, buf, count, type); ); - /* XXX -- CONST -- do not cast away const -- update mca/pml */ - rc = MCA_PML_CALL(isend((void *) buf, count, type, dest, tag, + rc = MCA_PML_CALL(isend(buf, count, type, dest, tag, MCA_PML_BASE_SEND_SYNCHRONOUS, comm, request)); OMPI_ERRHANDLER_RETURN(rc, comm, rc, FUNC_NAME); } diff --git a/ompi/mpi/c/keyval_create.c b/ompi/mpi/c/keyval_create.c index c43679cf8c2..9096413a632 100644 --- a/ompi/mpi/c/keyval_create.c +++ b/ompi/mpi/c/keyval_create.c @@ -5,15 +5,17 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2007 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -25,12 +27,11 @@ #include "ompi/attribute/attribute.h" #include "ompi/communicator/communicator.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_Keyval_create = PMPI_Keyval_create #endif - -#if OMPI_PROFILING_DEFINES -#include "ompi/mpi/c/profile/defines.h" +#define MPI_Keyval_create PMPI_Keyval_create #endif static const char FUNC_NAME[] = "MPI_Keyval_create"; @@ -38,7 +39,7 @@ static const char FUNC_NAME[] = "MPI_Keyval_create"; int MPI_Keyval_create(MPI_Copy_function *copy_attr_fn, MPI_Delete_function *delete_attr_fn, - int *keyval, void *extra_state) + int *keyval, void *extra_state) { int ret; ompi_attribute_fn_ptr_union_t copy_fn; @@ -47,20 +48,18 @@ int MPI_Keyval_create(MPI_Copy_function *copy_attr_fn, if (MPI_PARAM_CHECK) { OMPI_ERR_INIT_FINALIZE(FUNC_NAME); if (NULL == keyval) { - return OMPI_ERRHANDLER_INVOKE(MPI_COMM_WORLD, MPI_ERR_KEYVAL, + return OMPI_ERRHANDLER_INVOKE(MPI_COMM_WORLD, MPI_ERR_KEYVAL, FUNC_NAME); } else if ((NULL == copy_attr_fn) || (NULL == delete_attr_fn)) { - return OMPI_ERRHANDLER_INVOKE(MPI_COMM_WORLD, MPI_ERR_ARG, + return OMPI_ERRHANDLER_INVOKE(MPI_COMM_WORLD, MPI_ERR_ARG, FUNC_NAME); } } - OPAL_CR_ENTER_LIBRARY(); - copy_fn.attr_communicator_copy_fn = (MPI_Comm_internal_copy_attr_function*)copy_attr_fn; del_fn.attr_communicator_delete_fn = delete_attr_fn; - ret = ompi_attr_create_keyval(COMM_ATTR, copy_fn, + ret = ompi_attr_create_keyval(COMM_ATTR, copy_fn, del_fn, keyval, extra_state, 0, NULL); OMPI_ERRHANDLER_RETURN(ret, MPI_COMM_WORLD, MPI_ERR_OTHER, FUNC_NAME); } diff --git a/ompi/mpi/c/keyval_free.c b/ompi/mpi/c/keyval_free.c index bd5525fd554..161176e6901 100644 --- a/ompi/mpi/c/keyval_free.c +++ b/ompi/mpi/c/keyval_free.c @@ -5,14 +5,16 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -24,18 +26,17 @@ #include "ompi/attribute/attribute.h" #include "ompi/communicator/communicator.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_Keyval_free = PMPI_Keyval_free #endif - -#if OMPI_PROFILING_DEFINES -#include "ompi/mpi/c/profile/defines.h" +#define MPI_Keyval_free PMPI_Keyval_free #endif static const char FUNC_NAME[] = "MPI_Keyval_free"; -int MPI_Keyval_free(int *keyval) +int MPI_Keyval_free(int *keyval) { int ret; @@ -47,8 +48,6 @@ int MPI_Keyval_free(int *keyval) } } - OPAL_CR_ENTER_LIBRARY(); - ret = ompi_attr_free_keyval(COMM_ATTR, keyval, 0); OMPI_ERRHANDLER_RETURN(ret, MPI_COMM_WORLD, MPI_ERR_OTHER, FUNC_NAME); } diff --git a/ompi/mpi/c/lookup_name.c b/ompi/mpi/c/lookup_name.c index b8e3d864def..b7cbd806fb4 100644 --- a/ompi/mpi/c/lookup_name.c +++ b/ompi/mpi/c/lookup_name.c @@ -6,34 +6,38 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2013 Los Alamos National Security, LLC. All rights * reserved. + * Copyright (c) 2015 Intel, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ #include "ompi_config.h" +#include "opal/class/opal_list.h" +#include "opal/mca/pmix/pmix.h" + #include "ompi/mpi/c/bindings.h" #include "ompi/runtime/params.h" #include "ompi/errhandler/errhandler.h" #include "ompi/info/info.h" #include "ompi/communicator/communicator.h" -#include "ompi/mca/pubsub/pubsub.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_Lookup_name = PMPI_Lookup_name #endif - -#if OMPI_PROFILING_DEFINES -#include "ompi/mpi/c/profile/defines.h" +#define MPI_Lookup_name PMPI_Lookup_name #endif static const char FUNC_NAME[] = "MPI_Lookup_name"; @@ -41,17 +45,21 @@ static const char FUNC_NAME[] = "MPI_Lookup_name"; int MPI_Lookup_name(const char *service_name, MPI_Info info, char *port_name) { - char *tmp; + char range[OPAL_MAX_INFO_VAL]; + int flag=0, ret; + opal_value_t *rng; + opal_list_t results, pinfo; + opal_pmix_pdata_t *pdat; if ( MPI_PARAM_CHECK ) { - OMPI_ERR_INIT_FINALIZE(FUNC_NAME); + OMPI_ERR_INIT_FINALIZE(FUNC_NAME); if ( NULL == port_name ) { - return OMPI_ERRHANDLER_INVOKE(MPI_COMM_WORLD, MPI_ERR_ARG, + return OMPI_ERRHANDLER_INVOKE(MPI_COMM_WORLD, MPI_ERR_ARG, FUNC_NAME); } if ( NULL == service_name ) { - return OMPI_ERRHANDLER_INVOKE(MPI_COMM_WORLD, MPI_ERR_ARG, + return OMPI_ERRHANDLER_INVOKE(MPI_COMM_WORLD, MPI_ERR_ARG, FUNC_NAME); } if (NULL == info || ompi_info_is_freed(info)) { @@ -60,25 +68,51 @@ int MPI_Lookup_name(const char *service_name, MPI_Info info, char *port_name) } } - OPAL_CR_ENTER_LIBRARY(); + OBJ_CONSTRUCT(&pinfo, opal_list_t); + + /* OMPI supports info keys to pass the range to + * be searched for the given key */ + if (MPI_INFO_NULL != info) { + ompi_info_get (info, "range", sizeof(range) - 1, range, &flag); + if (flag) { + if (0 == strcmp(range, "nspace")) { + rng = OBJ_NEW(opal_value_t); + rng->key = strdup(OPAL_PMIX_RANGE); + rng->type = OPAL_INT; + rng->data.integer = OPAL_PMIX_NAMESPACE; // share only with procs in same nspace + opal_list_append(&pinfo, &rng->super); + } else if (0 == strcmp(range, "session")) { + rng = OBJ_NEW(opal_value_t); + rng->key = strdup(OPAL_PMIX_RANGE); + rng->type = OPAL_INT; + rng->data.integer = OPAL_PMIX_SESSION; // share only with procs in same session + opal_list_append(&pinfo, &rng->super); + } else { + /* unrecognized scope */ + OPAL_LIST_DESTRUCT(&pinfo); + return OMPI_ERRHANDLER_INVOKE(MPI_COMM_WORLD, MPI_ERR_ARG, + FUNC_NAME); + } + } + } - /* - * No predefined info-objects for this function in MPI-2, - * therefore, we do not parse the info-object at the moment. - */ + /* collect the findings */ + OBJ_CONSTRUCT(&results, opal_list_t); + pdat = OBJ_NEW(opal_pmix_pdata_t); + pdat->value.key = strdup(service_name); + opal_list_append(&results, &pdat->super); - /* - * if multiple entries found, this implementation uses - * at the moment the first entry. - */ - tmp = (char *) ompi_pubsub.lookup(service_name, info); - if ( NULL == tmp ) { + ret = opal_pmix.lookup(&results, &pinfo); + OPAL_LIST_DESTRUCT(&pinfo); + if (OPAL_SUCCESS != ret || + OPAL_STRING != pdat->value.type || + NULL == pdat->value.data.string) { return OMPI_ERRHANDLER_INVOKE(MPI_COMM_WORLD, MPI_ERR_NAME, FUNC_NAME); } - strncpy ( port_name, tmp, MPI_MAX_PORT_NAME ); + strncpy ( port_name, pdat->value.data.string, MPI_MAX_PORT_NAME ); + OPAL_LIST_DESTRUCT(&results); - OPAL_CR_EXIT_LIBRARY(); return MPI_SUCCESS; } diff --git a/ompi/mpi/c/message_c2f.c b/ompi/mpi/c/message_c2f.c index f6d2fcc2aa4..c6406da434b 100644 --- a/ompi/mpi/c/message_c2f.c +++ b/ompi/mpi/c/message_c2f.c @@ -6,15 +6,17 @@ * Copyright (c) 2004-2007 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2006-2012 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ #include "ompi_config.h" @@ -27,25 +29,22 @@ #include "ompi/message/message.h" #include "ompi/memchecker.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_Message_c2f = PMPI_Message_c2f #endif - -#if OMPI_PROFILING_DEFINES -#include "ompi/mpi/c/profile/defines.h" +#define MPI_Message_c2f PMPI_Message_c2f #endif static const char FUNC_NAME[] = "MPI_Message_c2f"; -MPI_Fint MPI_Message_c2f(MPI_Message message) +MPI_Fint MPI_Message_c2f(MPI_Message message) { MEMCHECKER( memchecker_message(&message); ); - OPAL_CR_NOOP_PROGRESS(); - if ( MPI_PARAM_CHECK ) { OMPI_ERR_INIT_FINALIZE(FUNC_NAME); @@ -68,7 +67,7 @@ MPI_Fint MPI_Message_c2f(MPI_Message message) */ if (MPI_UNDEFINED == message->m_f_to_c_index) { - message->m_f_to_c_index = + message->m_f_to_c_index = opal_pointer_array_add(&ompi_message_f_to_c_table, message); } diff --git a/ompi/mpi/c/message_f2c.c b/ompi/mpi/c/message_f2c.c index ee2e78499d6..b667b573b19 100644 --- a/ompi/mpi/c/message_f2c.c +++ b/ompi/mpi/c/message_f2c.c @@ -6,15 +6,17 @@ * Copyright (c) 2004-2007 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2006-2012 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ #include "ompi_config.h" @@ -25,23 +27,20 @@ #include "ompi/mpi/fortran/base/fint_2_int.h" #include "ompi/message/message.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_Message_f2c = PMPI_Message_f2c #endif - -#if OMPI_PROFILING_DEFINES -#include "ompi/mpi/c/profile/defines.h" +#define MPI_Message_f2c PMPI_Message_f2c #endif static const char FUNC_NAME[] = "MPI_Message_f2c"; -MPI_Message MPI_Message_f2c(MPI_Fint message) +MPI_Message MPI_Message_f2c(MPI_Fint message) { int message_index = OMPI_FINT_2_INT(message); - OPAL_CR_NOOP_PROGRESS(); - if (MPI_PARAM_CHECK) { OMPI_ERR_INIT_FINALIZE(FUNC_NAME); } @@ -49,9 +48,9 @@ MPI_Message MPI_Message_f2c(MPI_Fint message) /* Per MPI-2:4.12.4, do not invoke an error handler if we get an invalid fortran handle. If we get an invalid fortran handle, return an invalid C handle. */ - - if (message_index < 0 || - message_index >= + + if (message_index < 0 || + message_index >= opal_pointer_array_get_size(&ompi_message_f_to_c_table)) { return NULL; } diff --git a/ompi/mpi/c/mprobe.c b/ompi/mpi/c/mprobe.c index 23e99964cd6..161fd39f569 100644 --- a/ompi/mpi/c/mprobe.c +++ b/ompi/mpi/c/mprobe.c @@ -2,10 +2,12 @@ * Copyright (c) 2011 Sandia National Laboratories. All rights reserved. * Copyright (c) 2012 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2012 Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -19,18 +21,17 @@ #include "ompi/request/request.h" #include "ompi/message/message.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_Mprobe = PMPI_Mprobe #endif - -#if OMPI_PROFILING_DEFINES -#include "ompi/mpi/c/profile/defines.h" +#define MPI_Mprobe PMPI_Mprobe #endif static const char FUNC_NAME[] = "MPI_Mprobe"; -int MPI_Mprobe(int source, int tag, MPI_Comm comm, - MPI_Message *message, MPI_Status *status) +int MPI_Mprobe(int source, int tag, MPI_Comm comm, + MPI_Message *message, MPI_Status *status) { int rc; @@ -45,7 +46,7 @@ int MPI_Mprobe(int source, int tag, MPI_Comm comm, rc = MPI_ERR_TAG; } else if (ompi_comm_invalid(comm)) { rc = MPI_ERR_COMM; - } else if ((source != MPI_ANY_SOURCE) && + } else if ((source != MPI_ANY_SOURCE) && (MPI_PROC_NULL != source) && ompi_comm_peer_invalid(comm, source)) { rc = MPI_ERR_RANK; @@ -68,8 +69,6 @@ int MPI_Mprobe(int source, int tag, MPI_Comm comm, return MPI_SUCCESS; } - OPAL_CR_ENTER_LIBRARY(); - rc = MCA_PML_CALL(mprobe(source, tag, comm, message, status)); /* Per MPI-1, the MPI_ERROR field is not defined for single-completion calls */ diff --git a/ompi/mpi/c/mrecv.c b/ompi/mpi/c/mrecv.c index ad4f6535b76..b7ba7dfed19 100644 --- a/ompi/mpi/c/mrecv.c +++ b/ompi/mpi/c/mrecv.c @@ -1,10 +1,12 @@ /* * Copyright (c) 2011 Sandia National Laboratories. All rights reserved. * Copyright (c) 2012-2013 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -17,19 +19,18 @@ #include "ompi/request/request.h" #include "ompi/message/message.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_Mrecv = PMPI_Mrecv #endif - -#if OMPI_PROFILING_DEFINES -#include "ompi/mpi/c/profile/defines.h" +#define MPI_Mrecv PMPI_Mrecv #endif static const char FUNC_NAME[] = "MPI_Mrecv"; -int MPI_Mrecv(void *buf, int count, MPI_Datatype type, - MPI_Message *message, MPI_Status *status) +int MPI_Mrecv(void *buf, int count, MPI_Datatype type, + MPI_Message *message, MPI_Status *status) { int rc = MPI_SUCCESS; ompi_communicator_t *comm; @@ -45,7 +46,7 @@ int MPI_Mrecv(void *buf, int count, MPI_Datatype type, OMPI_ERR_INIT_FINALIZE(FUNC_NAME); OMPI_CHECK_DATATYPE_FOR_RECV(rc, type, count); OMPI_CHECK_USER_BUFFER(rc, buf, type, count); - + if (NULL == message || MPI_MESSAGE_NULL == *message) { rc = MPI_ERR_REQUEST; comm = MPI_COMM_NULL; @@ -66,8 +67,6 @@ int MPI_Mrecv(void *buf, int count, MPI_Datatype type, return MPI_SUCCESS; } - OPAL_CR_ENTER_LIBRARY(); - rc = MCA_PML_CALL(mrecv(buf, count, type, message, status)); /* Per MPI-1, the MPI_ERROR field is not defined for single-completion calls */ @@ -75,5 +74,5 @@ int MPI_Mrecv(void *buf, int count, MPI_Datatype type, opal_memchecker_base_mem_undefined(&status->MPI_ERROR, sizeof(int)); ); - OMPI_ERRHANDLER_RETURN(rc, (*message)->comm, rc, FUNC_NAME); + OMPI_ERRHANDLER_RETURN(rc, comm, rc, FUNC_NAME); } diff --git a/ompi/mpi/c/neighbor_allgather.c b/ompi/mpi/c/neighbor_allgather.c index 2a93ac00722..3f08ad1b67c 100644 --- a/ompi/mpi/c/neighbor_allgather.c +++ b/ompi/mpi/c/neighbor_allgather.c @@ -14,6 +14,9 @@ * Copyright (c) 2010 University of Houston. All rights reserved. * Copyright (c) 2013 Los Alamos National Security, LLC. All rights * reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. + * Copyright (c) 2017 IBM Corporation. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -30,13 +33,14 @@ #include "ompi/errhandler/errhandler.h" #include "ompi/datatype/ompi_datatype.h" #include "ompi/memchecker.h" +#include "ompi/mca/topo/topo.h" +#include "ompi/mca/topo/base/base.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_Neighbor_allgather = PMPI_Neighbor_allgather #endif - -#if OMPI_PROFILING_DEFINES -#include "ompi/mpi/c/profile/defines.h" +#define MPI_Neighbor_allgather PMPI_Neighbor_allgather #endif static const char FUNC_NAME[] = "MPI_Neighbor_allgather"; @@ -89,6 +93,28 @@ int MPI_Neighbor_allgather(const void *sendbuf, int sendcount, MPI_Datatype send OMPI_CHECK_DATATYPE_FOR_SEND(err, sendtype, sendcount); } OMPI_ERRHANDLER_CHECK(err, comm, err, FUNC_NAME); + + if( OMPI_COMM_IS_CART(comm) ) { + const mca_topo_base_comm_cart_2_2_0_t *cart = comm->c_topo->mtc.cart; + if( 0 > cart->ndims ) { + return OMPI_ERRHANDLER_INVOKE(comm, MPI_ERR_ARG, FUNC_NAME); + } + } + else if( OMPI_COMM_IS_GRAPH(comm) ) { + int degree; + mca_topo_base_graph_neighbors_count(comm, ompi_comm_rank(comm), °ree); + if( 0 > degree ) { + return OMPI_ERRHANDLER_INVOKE(comm, MPI_ERR_ARG, FUNC_NAME); + } + } + else if( OMPI_COMM_IS_DIST_GRAPH(comm) ) { + const mca_topo_base_comm_dist_graph_2_2_0_t *dist_graph = comm->c_topo->mtc.dist_graph; + int indegree = dist_graph->indegree; + int outdegree = dist_graph->outdegree; + if( indegree < 0 || outdegree < 0 ) { + return OMPI_ERRHANDLER_INVOKE(comm, MPI_ERR_ARG, FUNC_NAME); + } + } } /* Do we need to do anything? Everyone had to give the same send @@ -113,11 +139,8 @@ int MPI_Neighbor_allgather(const void *sendbuf, int sendcount, MPI_Datatype send } } - OPAL_CR_ENTER_LIBRARY(); - /* Invoke the coll component to perform the back-end operation */ - /* XXX -- CONST -- do not cast away const -- update mca/coll */ - err = comm->c_coll.coll_neighbor_allgather((void *) sendbuf, sendcount, sendtype, + err = comm->c_coll.coll_neighbor_allgather(sendbuf, sendcount, sendtype, recvbuf, recvcount, recvtype, comm, comm->c_coll.coll_neighbor_allgather_module); OMPI_ERRHANDLER_RETURN(err, comm, err, FUNC_NAME); diff --git a/ompi/mpi/c/neighbor_allgatherv.c b/ompi/mpi/c/neighbor_allgatherv.c index 4f8a1728ea1..adcfe797fd7 100644 --- a/ompi/mpi/c/neighbor_allgatherv.c +++ b/ompi/mpi/c/neighbor_allgatherv.c @@ -14,6 +14,9 @@ * Copyright (c) 2012 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2012-2013 Los Alamos National Security, LLC. All rights * reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. + * Copyright (c) 2017 IBM Corporation. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -30,13 +33,14 @@ #include "ompi/errhandler/errhandler.h" #include "ompi/datatype/ompi_datatype.h" #include "ompi/memchecker.h" +#include "ompi/mca/topo/topo.h" +#include "ompi/mca/topo/base/base.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_Neighbor_allgatherv = PMPI_Neighbor_allgatherv #endif - -#if OMPI_PROFILING_DEFINES -#include "ompi/mpi/c/profile/defines.h" +#define MPI_Neighbor_allgatherv PMPI_Neighbor_allgatherv #endif static const char FUNC_NAME[] = "MPI_Neighbor_allgatherv"; @@ -113,6 +117,28 @@ int MPI_Neighbor_allgatherv(const void *sendbuf, int sendcount, MPI_Datatype sen if (NULL == displs) { return OMPI_ERRHANDLER_INVOKE(comm, MPI_ERR_BUFFER, FUNC_NAME); } + + if( OMPI_COMM_IS_CART(comm) ) { + const mca_topo_base_comm_cart_2_2_0_t *cart = comm->c_topo->mtc.cart; + if( 0 > cart->ndims ) { + return OMPI_ERRHANDLER_INVOKE(comm, MPI_ERR_ARG, FUNC_NAME); + } + } + else if( OMPI_COMM_IS_GRAPH(comm) ) { + int degree; + mca_topo_base_graph_neighbors_count(comm, ompi_comm_rank(comm), °ree); + if( 0 > degree ) { + return OMPI_ERRHANDLER_INVOKE(comm, MPI_ERR_ARG, FUNC_NAME); + } + } + else if( OMPI_COMM_IS_DIST_GRAPH(comm) ) { + const mca_topo_base_comm_dist_graph_2_2_0_t *dist_graph = comm->c_topo->mtc.dist_graph; + int indegree = dist_graph->indegree; + int outdegree = dist_graph->outdegree; + if( indegree < 0 || outdegree < 0 ) { + return OMPI_ERRHANDLER_INVOKE(comm, MPI_ERR_ARG, FUNC_NAME); + } + } } /* Do we need to do anything? Everyone had to give the same @@ -136,12 +162,9 @@ int MPI_Neighbor_allgatherv(const void *sendbuf, int sendcount, MPI_Datatype sen something */ - OPAL_CR_ENTER_LIBRARY(); - /* Invoke the coll component to perform the back-end operation */ - /* XXX -- CONST -- do not cast away const -- update mca/coll */ - err = comm->c_coll.coll_neighbor_allgatherv((void *) sendbuf, sendcount, sendtype, - recvbuf, (int *) recvcounts, (int *) displs, + err = comm->c_coll.coll_neighbor_allgatherv(sendbuf, sendcount, sendtype, + recvbuf, recvcounts, displs, recvtype, comm, comm->c_coll.coll_neighbor_allgatherv_module); OMPI_ERRHANDLER_RETURN(err, comm, err, FUNC_NAME); } diff --git a/ompi/mpi/c/neighbor_alltoall.c b/ompi/mpi/c/neighbor_alltoall.c index be1c913be82..c0a829b24cd 100644 --- a/ompi/mpi/c/neighbor_alltoall.c +++ b/ompi/mpi/c/neighbor_alltoall.c @@ -13,8 +13,9 @@ * Copyright (c) 2007 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2013 Los Alamos National Security, LLC. All rights * reserved. - * Copyright (c) 2014 Research Organization for Information Science + * Copyright (c) 2014-2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. + * Copyright (c) 2017 IBM Corporation. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -31,13 +32,14 @@ #include "ompi/errhandler/errhandler.h" #include "ompi/datatype/ompi_datatype.h" #include "ompi/memchecker.h" +#include "ompi/mca/topo/topo.h" +#include "ompi/mca/topo/base/base.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_Neighbor_alltoall = PMPI_Neighbor_alltoall #endif - -#if OMPI_PROFILING_DEFINES -#include "ompi/mpi/c/profile/defines.h" +#define MPI_Neighbor_alltoall PMPI_Neighbor_alltoall #endif static const char FUNC_NAME[] = "MPI_Neighbor_alltoall"; @@ -93,6 +95,28 @@ int MPI_Neighbor_alltoall(const void *sendbuf, int sendcount, MPI_Datatype sendt return OMPI_ERRHANDLER_INVOKE(comm, MPI_ERR_TRUNCATE, FUNC_NAME); } } + + if( OMPI_COMM_IS_CART(comm) ) { + const mca_topo_base_comm_cart_2_2_0_t *cart = comm->c_topo->mtc.cart; + if( 0 > cart->ndims ) { + return OMPI_ERRHANDLER_INVOKE(comm, MPI_ERR_ARG, FUNC_NAME); + } + } + else if( OMPI_COMM_IS_GRAPH(comm) ) { + int degree; + mca_topo_base_graph_neighbors_count(comm, ompi_comm_rank(comm), °ree); + if( 0 > degree ) { + return OMPI_ERRHANDLER_INVOKE(comm, MPI_ERR_ARG, FUNC_NAME); + } + } + else if( OMPI_COMM_IS_DIST_GRAPH(comm) ) { + const mca_topo_base_comm_dist_graph_2_2_0_t *dist_graph = comm->c_topo->mtc.dist_graph; + int indegree = dist_graph->indegree; + int outdegree = dist_graph->outdegree; + if( indegree < 0 || outdegree < 0 ) { + return OMPI_ERRHANDLER_INVOKE(comm, MPI_ERR_ARG, FUNC_NAME); + } + } } /* Do we need to do anything? */ @@ -105,11 +129,8 @@ int MPI_Neighbor_alltoall(const void *sendbuf, int sendcount, MPI_Datatype sendt return MPI_SUCCESS; } - OPAL_CR_ENTER_LIBRARY(); - /* Invoke the coll component to perform the back-end operation */ - /* XXX -- CONST -- do not cast away const -- update mca/coll */ - err = comm->c_coll.coll_neighbor_alltoall((void *) sendbuf, sendcount, sendtype, recvbuf, + err = comm->c_coll.coll_neighbor_alltoall(sendbuf, sendcount, sendtype, recvbuf, recvcount, recvtype, comm, comm->c_coll.coll_neighbor_alltoall_module); OMPI_ERRHANDLER_RETURN(err, comm, err, FUNC_NAME); diff --git a/ompi/mpi/c/neighbor_alltoallv.c b/ompi/mpi/c/neighbor_alltoallv.c index 9c599da49e2..500781eb62c 100644 --- a/ompi/mpi/c/neighbor_alltoallv.c +++ b/ompi/mpi/c/neighbor_alltoallv.c @@ -13,8 +13,9 @@ * Copyright (c) 2007 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2012-2013 Los Alamos National Security, LLC. All rights * reserved. - * Copyright (c) 2014 Research Organization for Information Science + * Copyright (c) 2014-2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. + * Copyright (c) 2017 IBM Corporation. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -32,13 +33,14 @@ #include "ompi/datatype/ompi_datatype.h" #include "ompi/memchecker.h" #include "ompi/communicator/comm_helpers.h" +#include "ompi/mca/topo/topo.h" +#include "ompi/mca/topo/base/base.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_Neighbor_alltoallv = PMPI_Neighbor_alltoallv #endif - -#if OMPI_PROFILING_DEFINES -#include "ompi/mpi/c/profile/defines.h" +#define MPI_Neighbor_alltoallv PMPI_Neighbor_alltoallv #endif static const char FUNC_NAME[] = "MPI_Neighbor_alltoallv"; @@ -119,14 +121,33 @@ int MPI_Neighbor_alltoallv(const void *sendbuf, const int sendcounts[], const in OMPI_CHECK_DATATYPE_FOR_RECV(err, recvtype, recvcounts[i]); OMPI_ERRHANDLER_CHECK(err, comm, err, FUNC_NAME); } - } - OPAL_CR_ENTER_LIBRARY(); + if( OMPI_COMM_IS_CART(comm) ) { + const mca_topo_base_comm_cart_2_2_0_t *cart = comm->c_topo->mtc.cart; + if( 0 > cart->ndims ) { + return OMPI_ERRHANDLER_INVOKE(comm, MPI_ERR_ARG, FUNC_NAME); + } + } + else if( OMPI_COMM_IS_GRAPH(comm) ) { + int degree; + mca_topo_base_graph_neighbors_count(comm, ompi_comm_rank(comm), °ree); + if( 0 > degree ) { + return OMPI_ERRHANDLER_INVOKE(comm, MPI_ERR_ARG, FUNC_NAME); + } + } + else if( OMPI_COMM_IS_DIST_GRAPH(comm) ) { + const mca_topo_base_comm_dist_graph_2_2_0_t *dist_graph = comm->c_topo->mtc.dist_graph; + indegree = dist_graph->indegree; + outdegree = dist_graph->outdegree; + if( indegree < 0 || outdegree < 0 ) { + return OMPI_ERRHANDLER_INVOKE(comm, MPI_ERR_ARG, FUNC_NAME); + } + } + } /* Invoke the coll component to perform the back-end operation */ - /* XXX -- CONST -- do not cast away const -- update mca/coll */ - err = comm->c_coll.coll_neighbor_alltoallv((void *) sendbuf, (int *) sendcounts, (int *) sdispls, sendtype, - recvbuf, (int *) recvcounts, (int *) rdispls, recvtype, + err = comm->c_coll.coll_neighbor_alltoallv(sendbuf, sendcounts, sdispls, sendtype, + recvbuf, recvcounts, rdispls, recvtype, comm, comm->c_coll.coll_neighbor_alltoallv_module); OMPI_ERRHANDLER_RETURN(err, comm, err, FUNC_NAME); } diff --git a/ompi/mpi/c/neighbor_alltoallw.c b/ompi/mpi/c/neighbor_alltoallw.c index 6808f0d3fc3..dbd82b9e0f2 100644 --- a/ompi/mpi/c/neighbor_alltoallw.c +++ b/ompi/mpi/c/neighbor_alltoallw.c @@ -13,8 +13,9 @@ * Copyright (c) 2007 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2012-2013 Los Alamos National Security, LLC. All rights * reserved. - * Copyright (c) 2014 Research Organization for Information Science + * Copyright (c) 2014-2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. + * Copyright (c) 2017 IBM Corporation. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -32,13 +33,14 @@ #include "ompi/datatype/ompi_datatype.h" #include "ompi/memchecker.h" #include "ompi/communicator/comm_helpers.h" +#include "ompi/mca/topo/topo.h" +#include "ompi/mca/topo/base/base.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_Neighbor_alltoallw = PMPI_Neighbor_alltoallw #endif - -#if OMPI_PROFILING_DEFINES -#include "ompi/mpi/c/profile/defines.h" +#define MPI_Neighbor_alltoallw PMPI_Neighbor_alltoallw #endif static const char FUNC_NAME[] = "MPI_Neighbor_alltoallw"; @@ -115,14 +117,33 @@ int MPI_Neighbor_alltoallw(const void *sendbuf, const int sendcounts[], const MP OMPI_CHECK_DATATYPE_FOR_RECV(err, recvtypes[i], recvcounts[i]); OMPI_ERRHANDLER_CHECK(err, comm, err, FUNC_NAME); } - } - OPAL_CR_ENTER_LIBRARY(); + if( OMPI_COMM_IS_CART(comm) ) { + const mca_topo_base_comm_cart_2_2_0_t *cart = comm->c_topo->mtc.cart; + if( 0 > cart->ndims ) { + return OMPI_ERRHANDLER_INVOKE(comm, MPI_ERR_ARG, FUNC_NAME); + } + } + else if( OMPI_COMM_IS_GRAPH(comm) ) { + int degree; + mca_topo_base_graph_neighbors_count(comm, ompi_comm_rank(comm), °ree); + if( 0 > degree ) { + return OMPI_ERRHANDLER_INVOKE(comm, MPI_ERR_ARG, FUNC_NAME); + } + } + else if( OMPI_COMM_IS_DIST_GRAPH(comm) ) { + const mca_topo_base_comm_dist_graph_2_2_0_t *dist_graph = comm->c_topo->mtc.dist_graph; + indegree = dist_graph->indegree; + outdegree = dist_graph->outdegree; + if( indegree < 0 || outdegree < 0 ) { + return OMPI_ERRHANDLER_INVOKE(comm, MPI_ERR_ARG, FUNC_NAME); + } + } + } /* Invoke the coll component to perform the back-end operation */ - /* XXX -- CONST -- do not cast away const -- update mca/coll */ - err = comm->c_coll.coll_neighbor_alltoallw((void *) sendbuf, (int *) sendcounts, (MPI_Aint *) sdispls, (ompi_datatype_t **) sendtypes, - recvbuf, (int *) recvcounts, (MPI_Aint *) rdispls, (ompi_datatype_t **) recvtypes, + err = comm->c_coll.coll_neighbor_alltoallw(sendbuf, sendcounts, sdispls, sendtypes, + recvbuf, recvcounts, rdispls, recvtypes, comm, comm->c_coll.coll_neighbor_alltoallw_module); OMPI_ERRHANDLER_RETURN(err, comm, err, FUNC_NAME); } diff --git a/ompi/mpi/c/op_c2f.c b/ompi/mpi/c/op_c2f.c index 389b4417e29..bf09306c21e 100644 --- a/ompi/mpi/c/op_c2f.c +++ b/ompi/mpi/c/op_c2f.c @@ -5,15 +5,17 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2006-2012 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ #include "ompi_config.h" @@ -25,21 +27,18 @@ #include "ompi/mpi/fortran/base/fint_2_int.h" #include "ompi/op/op.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_Op_c2f = PMPI_Op_c2f #endif - -#if OMPI_PROFILING_DEFINES -#include "ompi/mpi/c/profile/defines.h" +#define MPI_Op_c2f PMPI_Op_c2f #endif static const char FUNC_NAME[] = "MPI_Op_c2f"; -MPI_Fint MPI_Op_c2f(MPI_Op op) +MPI_Fint MPI_Op_c2f(MPI_Op op) { - OPAL_CR_NOOP_PROGRESS(); - if (MPI_PARAM_CHECK) { OMPI_ERR_INIT_FINALIZE(FUNC_NAME); diff --git a/ompi/mpi/c/op_commutative.c b/ompi/mpi/c/op_commutative.c index aed90812612..d2c101ff5dc 100644 --- a/ompi/mpi/c/op_commutative.c +++ b/ompi/mpi/c/op_commutative.c @@ -5,15 +5,17 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2009-2014 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ #include "ompi_config.h" @@ -26,12 +28,11 @@ #include "ompi/errhandler/errhandler.h" #include "ompi/op/op.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_Op_commutative = PMPI_Op_commutative #endif - -#if OMPI_PROFILING_DEFINES -#include "ompi/mpi/c/profile/defines.h" +#define MPI_Op_commutative PMPI_Op_commutative #endif static const char FUNC_NAME[] = "MPI_Op_commutative"; @@ -39,8 +40,6 @@ static const char FUNC_NAME[] = "MPI_Op_commutative"; int MPI_Op_commutative(MPI_Op op, int *commute) { - OPAL_CR_NOOP_PROGRESS(); - /* Error checking */ if (MPI_PARAM_CHECK) { diff --git a/ompi/mpi/c/op_create.c b/ompi/mpi/c/op_create.c index 377a941c18c..0b13c17ae70 100644 --- a/ompi/mpi/c/op_create.c +++ b/ompi/mpi/c/op_create.c @@ -5,15 +5,17 @@ * Copyright (c) 2004-2006 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2008-2009 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ #include "ompi_config.h" @@ -25,12 +27,11 @@ #include "ompi/errhandler/errhandler.h" #include "ompi/op/op.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_Op_create = PMPI_Op_create #endif - -#if OMPI_PROFILING_DEFINES -#include "ompi/mpi/c/profile/defines.h" +#define MPI_Op_create PMPI_Op_create #endif static const char FUNC_NAME[] = "MPI_Op_create"; @@ -53,8 +54,6 @@ int MPI_Op_create(MPI_User_function * function, int commute, MPI_Op * op) } } - OPAL_CR_ENTER_LIBRARY(); - /* Create and cache the op. Sets a refcount of 1. */ *op = ompi_op_create_user(OPAL_INT_TO_BOOL(commute), diff --git a/ompi/mpi/c/op_f2c.c b/ompi/mpi/c/op_f2c.c index 21660d902db..d9be1f240b0 100644 --- a/ompi/mpi/c/op_f2c.c +++ b/ompi/mpi/c/op_f2c.c @@ -6,15 +6,17 @@ * Copyright (c) 2004-2007 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2006-2012 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ #include "ompi_config.h" @@ -25,12 +27,11 @@ #include "ompi/mpi/fortran/base/fint_2_int.h" #include "ompi/op/op.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_Op_f2c = PMPI_Op_f2c #endif - -#if OMPI_PROFILING_DEFINES -#include "ompi/mpi/c/profile/defines.h" +#define MPI_Op_f2c PMPI_Op_f2c #endif static const char FUNC_NAME[] = "MPI_Op_f2c"; @@ -40,20 +41,18 @@ MPI_Op MPI_Op_f2c(MPI_Fint op_f) { int op_index = OMPI_FINT_2_INT(op_f); - OPAL_CR_NOOP_PROGRESS(); - /* Error checking */ if (MPI_PARAM_CHECK) { OMPI_ERR_INIT_FINALIZE(FUNC_NAME); } - + /* Per MPI-2:4.12.4, do not invoke an error handler if we get an invalid fortran handle. If we get an invalid fortran handle, return an invalid C handle. */ - - if (op_index < 0 || - op_index >= + + if (op_index < 0 || + op_index >= opal_pointer_array_get_size(ompi_op_f_to_c_table)) { return NULL; } diff --git a/ompi/mpi/c/op_free.c b/ompi/mpi/c/op_free.c index 7712497c3d2..e78987875b1 100644 --- a/ompi/mpi/c/op_free.c +++ b/ompi/mpi/c/op_free.c @@ -5,14 +5,16 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ #include "ompi_config.h" @@ -24,22 +26,19 @@ #include "ompi/errhandler/errhandler.h" #include "ompi/op/op.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_Op_free = PMPI_Op_free #endif - -#if OMPI_PROFILING_DEFINES -#include "ompi/mpi/c/profile/defines.h" +#define MPI_Op_free PMPI_Op_free #endif static const char FUNC_NAME[] = "MPI_Op_free"; -int MPI_Op_free(MPI_Op *op) +int MPI_Op_free(MPI_Op *op) { - OPAL_CR_NOOP_PROGRESS(); - /* Error checking */ if (MPI_PARAM_CHECK) { diff --git a/ompi/mpi/c/open_port.c b/ompi/mpi/c/open_port.c index 62dca423f66..cb5e97944dd 100644 --- a/ompi/mpi/c/open_port.c +++ b/ompi/mpi/c/open_port.c @@ -5,14 +5,17 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. + * Copyright (c) 2015 Intel, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ #include "ompi_config.h" @@ -23,28 +26,27 @@ #include "ompi/communicator/communicator.h" #include "ompi/errhandler/errhandler.h" #include "ompi/info/info.h" -#include "ompi/mca/dpm/dpm.h" +#include "ompi/dpm/dpm.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_Open_port = PMPI_Open_port #endif - -#if OMPI_PROFILING_DEFINES -#include "ompi/mpi/c/profile/defines.h" +#define MPI_Open_port PMPI_Open_port #endif static const char FUNC_NAME[] = "MPI_Open_port"; -int MPI_Open_port(MPI_Info info, char *port_name) +int MPI_Open_port(MPI_Info info, char *port_name) { int rc; if ( MPI_PARAM_CHECK ) { - OMPI_ERR_INIT_FINALIZE(FUNC_NAME); + OMPI_ERR_INIT_FINALIZE(FUNC_NAME); if ( NULL == port_name ) { - return OMPI_ERRHANDLER_INVOKE(MPI_COMM_WORLD, MPI_ERR_ARG, + return OMPI_ERRHANDLER_INVOKE(MPI_COMM_WORLD, MPI_ERR_ARG, FUNC_NAME); } if (NULL == info || ompi_info_is_freed(info)) { @@ -64,9 +66,7 @@ int MPI_Open_port(MPI_Info info, char *port_name) */ } - OPAL_CR_ENTER_LIBRARY(); - - rc = ompi_dpm.open_port(port_name, OMPI_RML_TAG_INVALID); + rc = ompi_dpm_open_port(port_name); OMPI_ERRHANDLER_RETURN(rc, MPI_COMM_WORLD, rc, FUNC_NAME); } diff --git a/ompi/mpi/c/pack.c b/ompi/mpi/c/pack.c index 1f64319adf4..46d9b1a0b8e 100644 --- a/ompi/mpi/c/pack.c +++ b/ompi/mpi/c/pack.c @@ -3,20 +3,22 @@ * Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. - * Copyright (c) 2004-2005 The University of Tennessee and The University + * Copyright (c) 2004-2016 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2006 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2013 Los Alamos National Security, LLC. All rights * reserved. + * Copyright (c) 2015-2017 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ #include "ompi_config.h" @@ -30,12 +32,11 @@ #include "opal/datatype/opal_convertor.h" #include "ompi/memchecker.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_Pack = PMPI_Pack #endif - -#if OMPI_PROFILING_DEFINES -#include "ompi/mpi/c/profile/defines.h" +#define MPI_Pack PMPI_Pack #endif static const char FUNC_NAME[] = "MPI_Pack"; @@ -44,7 +45,7 @@ static const char FUNC_NAME[] = "MPI_Pack"; int MPI_Pack(const void *inbuf, int incount, MPI_Datatype datatype, void *outbuf, int outsize, int *position, MPI_Comm comm) { - int rc; + int rc = MPI_SUCCESS, ret; opal_convertor_t local_convertor; struct iovec invec; unsigned int iov_count; @@ -66,13 +67,13 @@ int MPI_Pack(const void *inbuf, int incount, MPI_Datatype datatype, return OMPI_ERRHANDLER_INVOKE(comm, MPI_ERR_COUNT, FUNC_NAME); } else if (outsize < 0) { return OMPI_ERRHANDLER_INVOKE(comm, MPI_ERR_ARG, FUNC_NAME); - } else if (MPI_DATATYPE_NULL == datatype || NULL == datatype) { - return OMPI_ERRHANDLER_INVOKE(comm, MPI_ERR_TYPE, FUNC_NAME); } + OMPI_CHECK_DATATYPE_FOR_SEND(rc, datatype, incount); + OMPI_ERRHANDLER_CHECK(rc, comm, rc, FUNC_NAME); + OMPI_CHECK_USER_BUFFER(rc, inbuf, datatype, incount); + OMPI_ERRHANDLER_CHECK(rc, comm, rc, FUNC_NAME); } - OPAL_CR_ENTER_LIBRARY(); - OBJ_CONSTRUCT( &local_convertor, opal_convertor_t ); /* the resulting convertor will be set to the position ZERO */ opal_convertor_copy_and_prepare_for_send( ompi_mpi_local_convertor, &(datatype->super), @@ -82,7 +83,6 @@ int MPI_Pack(const void *inbuf, int incount, MPI_Datatype datatype, opal_convertor_get_packed_size( &local_convertor, &size ); if( (*position + size) > (unsigned int)outsize ) { /* we can cast as we already checked for < 0 */ OBJ_DESTRUCT( &local_convertor ); - OPAL_CR_EXIT_LIBRARY(); return OMPI_ERRHANDLER_INVOKE(comm, MPI_ERR_TRUNCATE, FUNC_NAME); } @@ -92,12 +92,14 @@ int MPI_Pack(const void *inbuf, int incount, MPI_Datatype datatype, /* Do the actual packing */ iov_count = 1; - rc = opal_convertor_pack( &local_convertor, &invec, &iov_count, &size ); + ret = opal_convertor_pack( &local_convertor, &invec, &iov_count, &size ); *position += size; OBJ_DESTRUCT( &local_convertor ); /* All done. Note that the convertor returns 1 upon success, not - OMPI_SUCCESS. */ - OMPI_ERRHANDLER_RETURN((rc == 1) ? OMPI_SUCCESS : OMPI_ERROR, - comm, MPI_ERR_UNKNOWN, FUNC_NAME); + OPAL_SUCCESS. */ + if (1 != ret) { + rc = OMPI_ERROR; + } + OMPI_ERRHANDLER_RETURN(rc, comm, MPI_ERR_UNKNOWN, FUNC_NAME); } diff --git a/ompi/mpi/c/pack_external.c b/ompi/mpi/c/pack_external.c index 6770fd1b9a7..cc5dddbca1a 100644 --- a/ompi/mpi/c/pack_external.c +++ b/ompi/mpi/c/pack_external.c @@ -3,20 +3,22 @@ * Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. - * Copyright (c) 2004-2005 The University of Tennessee and The University + * Copyright (c) 2004-2016 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2006 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2013 Los Alamos National Security, LLC. All rights * reserved. + * Copyright (c) 2015-2017 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ #include "ompi_config.h" @@ -30,12 +32,11 @@ #include "opal/datatype/opal_convertor.h" #include "ompi/memchecker.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_Pack_external = PMPI_Pack_external #endif - -#if OMPI_PROFILING_DEFINES -#include "ompi/mpi/c/profile/defines.h" +#define MPI_Pack_external PMPI_Pack_external #endif static const char FUNC_NAME[] = "MPI_Pack_external"; @@ -43,13 +44,9 @@ static const char FUNC_NAME[] = "MPI_Pack_external"; int MPI_Pack_external(const char datarep[], const void *inbuf, int incount, MPI_Datatype datatype, void *outbuf, - MPI_Aint outsize, MPI_Aint *position) + MPI_Aint outsize, MPI_Aint *position) { - int rc; - opal_convertor_t local_convertor; - struct iovec invec; - unsigned int iov_count; - size_t size; + int rc = MPI_SUCCESS; MEMCHECKER( memchecker_datatype(datatype); @@ -64,44 +61,16 @@ int MPI_Pack_external(const char datarep[], const void *inbuf, int incount, return OMPI_ERRHANDLER_INVOKE(MPI_COMM_WORLD, MPI_ERR_COUNT, FUNC_NAME); } else if (outsize < 0) { return OMPI_ERRHANDLER_INVOKE(MPI_COMM_WORLD, MPI_ERR_ARG, FUNC_NAME); - } else if (MPI_DATATYPE_NULL == datatype || NULL == datatype) { - return OMPI_ERRHANDLER_INVOKE(MPI_COMM_WORLD, MPI_ERR_TYPE, FUNC_NAME); } + OMPI_CHECK_DATATYPE_FOR_SEND(rc, datatype, incount); + OMPI_ERRHANDLER_CHECK(rc, MPI_COMM_WORLD, rc, FUNC_NAME); + OMPI_CHECK_USER_BUFFER(rc, inbuf, datatype, incount); + OMPI_ERRHANDLER_CHECK(rc, MPI_COMM_WORLD, rc, FUNC_NAME); } - OPAL_CR_ENTER_LIBRARY(); - - OBJ_CONSTRUCT(&local_convertor, opal_convertor_t); - - /* The resulting convertor will be set to the position zero. We have to use - * CONVERTOR_SEND_CONVERSION in order to force the convertor to do anything - * more than just packing the data. - */ - opal_convertor_copy_and_prepare_for_send( ompi_mpi_external32_convertor, - &(datatype->super), incount, (void *) inbuf, - CONVERTOR_SEND_CONVERSION, - &local_convertor ); - - /* Check for truncation */ - opal_convertor_get_packed_size( &local_convertor, &size ); - if( (*position + size) > (size_t)outsize ) { /* we can cast as we already checked for < 0 */ - OBJ_DESTRUCT( &local_convertor ); - OPAL_CR_EXIT_LIBRARY(); - return OMPI_ERRHANDLER_INVOKE( MPI_COMM_WORLD, MPI_ERR_TRUNCATE, FUNC_NAME ); - } - - /* Prepare the iovec with all informations */ - invec.iov_base = (char*) outbuf + (*position); - invec.iov_len = size; - - /* Do the actual packing */ - iov_count = 1; - rc = opal_convertor_pack( &local_convertor, &invec, &iov_count, &size ); - *position += size; - OBJ_DESTRUCT( &local_convertor ); + rc = ompi_datatype_pack_external(datarep, inbuf, incount, + datatype, outbuf, + outsize, position); - /* All done. Note that the convertor returns 1 upon success, not - OMPI_SUCCESS. */ - OMPI_ERRHANDLER_RETURN((rc == 1) ? OMPI_SUCCESS : OMPI_ERROR, - MPI_COMM_WORLD, MPI_ERR_UNKNOWN, FUNC_NAME); + OMPI_ERRHANDLER_RETURN(rc, MPI_COMM_WORLD, rc, FUNC_NAME); } diff --git a/ompi/mpi/c/pack_external_size.c b/ompi/mpi/c/pack_external_size.c index 307338f65c2..b3b3ceef337 100644 --- a/ompi/mpi/c/pack_external_size.c +++ b/ompi/mpi/c/pack_external_size.c @@ -6,17 +6,19 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2006 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2013 Los Alamos National Security, LLC. All rights * reserved. + * Copyright (c) 2015-2017 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ #include "ompi_config.h" @@ -30,27 +32,25 @@ #include "opal/datatype/opal_convertor.h" #include "ompi/memchecker.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_Pack_external_size = PMPI_Pack_external_size #endif - -#if OMPI_PROFILING_DEFINES -#include "ompi/mpi/c/profile/defines.h" +#define MPI_Pack_external_size PMPI_Pack_external_size #endif static const char FUNC_NAME[] = "MPI_Pack_external_size"; int MPI_Pack_external_size(const char datarep[], int incount, - MPI_Datatype datatype, MPI_Aint *size) + MPI_Datatype datatype, MPI_Aint *size) { - opal_convertor_t local_convertor; - size_t length; + int rc = MPI_SUCCESS; MEMCHECKER( memchecker_datatype(datatype); ); - + if (MPI_PARAM_CHECK) { OMPI_ERR_INIT_FINALIZE(FUNC_NAME); if (NULL == size) { @@ -60,20 +60,8 @@ int MPI_Pack_external_size(const char datarep[], int incount, } } - OPAL_CR_ENTER_LIBRARY(); - - OBJ_CONSTRUCT(&local_convertor, opal_convertor_t); - - /* the resulting convertor will be set to the position ZERO */ - opal_convertor_copy_and_prepare_for_recv( ompi_mpi_external32_convertor, - &(datatype->super), incount, NULL, - CONVERTOR_SEND_CONVERSION, - &local_convertor ); - - opal_convertor_get_unpacked_size( &local_convertor, &length ); - *size = (MPI_Aint)length; - OBJ_DESTRUCT( &local_convertor ); - OPAL_CR_EXIT_LIBRARY(); - return OMPI_SUCCESS; + rc = ompi_datatype_pack_external_size(datarep, incount, + datatype, size); + OMPI_ERRHANDLER_RETURN(rc, MPI_COMM_WORLD, rc, FUNC_NAME); } diff --git a/ompi/mpi/c/pack_size.c b/ompi/mpi/c/pack_size.c index ed533567651..bae928e4f88 100644 --- a/ompi/mpi/c/pack_size.c +++ b/ompi/mpi/c/pack_size.c @@ -5,15 +5,17 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2006 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ #include "ompi_config.h" @@ -27,18 +29,17 @@ #include "opal/datatype/opal_convertor.h" #include "ompi/memchecker.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_Pack_size = PMPI_Pack_size #endif - -#if OMPI_PROFILING_DEFINES -#include "ompi/mpi/c/profile/defines.h" +#define MPI_Pack_size PMPI_Pack_size #endif static const char FUNC_NAME[] = "MPI_Pack_size"; int MPI_Pack_size(int incount, MPI_Datatype datatype, MPI_Comm comm, - int *size) + int *size) { opal_convertor_t local_convertor; size_t length; @@ -60,7 +61,6 @@ int MPI_Pack_size(int incount, MPI_Datatype datatype, MPI_Comm comm, } } - OPAL_CR_ENTER_LIBRARY(); OBJ_CONSTRUCT( &local_convertor, opal_convertor_t ); /* the resulting convertor will be set to the position ZERO */ @@ -71,7 +71,6 @@ int MPI_Pack_size(int incount, MPI_Datatype datatype, MPI_Comm comm, *size = (int)length; OBJ_DESTRUCT( &local_convertor ); - OPAL_CR_EXIT_LIBRARY(); return MPI_SUCCESS; } diff --git a/ompi/mpi/c/pcontrol.c b/ompi/mpi/c/pcontrol.c index 05ef586009e..7aba846cce0 100644 --- a/ompi/mpi/c/pcontrol.c +++ b/ompi/mpi/c/pcontrol.c @@ -5,14 +5,16 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ #include "ompi_config.h" @@ -22,23 +24,20 @@ #include "ompi/runtime/params.h" #include "ompi/errhandler/errhandler.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_Pcontrol = PMPI_Pcontrol #endif - -#if OMPI_PROFILING_DEFINES -#include "ompi/mpi/c/profile/defines.h" +#define MPI_Pcontrol PMPI_Pcontrol #endif static const char FUNC_NAME[] = "MPI_Pcontrol"; -int MPI_Pcontrol(const int level, ...) +int MPI_Pcontrol(const int level, ...) { va_list arglist; - OPAL_CR_NOOP_PROGRESS(); - if (MPI_PARAM_CHECK) { OMPI_ERR_INIT_FINALIZE(FUNC_NAME); } diff --git a/ompi/mpi/c/probe.c b/ompi/mpi/c/probe.c index 7cfa5235469..ed1172db5cc 100644 --- a/ompi/mpi/c/probe.c +++ b/ompi/mpi/c/probe.c @@ -5,14 +5,16 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ #include "ompi_config.h" @@ -26,18 +28,17 @@ #include "ompi/memchecker.h" #include "ompi/request/request.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_Probe = PMPI_Probe #endif - -#if OMPI_PROFILING_DEFINES -#include "ompi/mpi/c/profile/defines.h" +#define MPI_Probe PMPI_Probe #endif static const char FUNC_NAME[] = "MPI_Probe"; -int MPI_Probe(int source, int tag, MPI_Comm comm, MPI_Status *status) +int MPI_Probe(int source, int tag, MPI_Comm comm, MPI_Status *status) { int rc; @@ -52,7 +53,7 @@ int MPI_Probe(int source, int tag, MPI_Comm comm, MPI_Status *status) rc = MPI_ERR_TAG; } else if (ompi_comm_invalid(comm)) { rc = MPI_ERR_COMM; - } else if ((source != MPI_ANY_SOURCE) && + } else if ((source != MPI_ANY_SOURCE) && (MPI_PROC_NULL != source) && ompi_comm_peer_invalid(comm, source)) { rc = MPI_ERR_RANK; @@ -73,8 +74,6 @@ int MPI_Probe(int source, int tag, MPI_Comm comm, MPI_Status *status) return MPI_SUCCESS; } - OPAL_CR_ENTER_LIBRARY(); - rc = MCA_PML_CALL(probe(source, tag, comm, status)); /* * Per MPI-1, the MPI_ERROR field is not defined for single-completion calls diff --git a/ompi/mpi/c/profile/Makefile.am b/ompi/mpi/c/profile/Makefile.am index aeb7fa88b05..ed8b77c8270 100644 --- a/ompi/mpi/c/profile/Makefile.am +++ b/ompi/mpi/c/profile/Makefile.am @@ -6,7 +6,7 @@ # Copyright (c) 2004-2013 The University of Tennessee and The University # of Tennessee Research Foundation. All rights # reserved. -# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, # University of Stuttgart. All rights reserved. # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. @@ -16,35 +16,29 @@ # Copyright (c) 2012-2013 Inria. All rights reserved. # Copyright (c) 2013 Los Alamos National Security, LLC. All rights # reserved. +# Copyright (c) 2015 Research Organization for Information Science +# and Technology (RIST). All rights reserved. # $COPYRIGHT$ -# +# # Additional copyrights may follow -# +# # $HEADER$ # include $(top_srcdir)/Makefile.ompi-rules -# -# OMPI_PROFILING_DEFINES flag s enabled when we want our MPI_* symbols -# to be replaced by PMPI_*. In other words, this flag decides -# whether "profile/defines.h" is included or not. "profile/defines.h" -# replaces all MPI_* symbols with PMPI_* symbols. In this directory -# we definately need it to be 1. -# -AM_CPPFLAGS = -DOMPI_PROFILING_DEFINES=1 +# If OMPI_BUILD_MPI_PROFILING is enabled when we want our generated MPI_* symbols +# to be replaced by PMPI_*. +# In this directory, we definately need it to be 1. + +AM_CPPFLAGS = -DOMPI_BUILD_MPI_PROFILING=1 # # This build needs to go through only if profiling is required. # Further, this build HAS to go through if profiling is required. # -noinst_LTLIBRARIES = -if BUILD_PMPI_BINDINGS_LAYER -noinst_LTLIBRARIES += libmpi_c_pmpi.la -endif - -headers = defines.h +noinst_LTLIBRARIES = libmpi_c_pmpi.la nodist_libmpi_c_pmpi_la_SOURCES = \ pabort.c \ @@ -391,9 +385,13 @@ nodist_libmpi_c_pmpi_la_SOURCES += \ pfile_get_view.c \ pfile_iread_at.c \ pfile_iread.c \ + pfile_iread_at_all.c \ + pfile_iread_all.c \ pfile_iread_shared.c \ pfile_iwrite_at.c \ pfile_iwrite.c \ + pfile_iwrite_at_all.c \ + pfile_iwrite_all.c \ pfile_iwrite_shared.c \ pfile_open.c \ pfile_preallocate.c \ @@ -445,7 +443,6 @@ $(nodist_libmpi_c_pmpi_la_SOURCES): if WANT_INSTALL_HEADERS ompidir = $(ompiincludedir)/$(subdir) -ompi_HEADERS = $(headers) endif # These files were created by targets above diff --git a/ompi/mpi/c/profile/defines.h b/ompi/mpi/c/profile/defines.h deleted file mode 100644 index 574482c64cd..00000000000 --- a/ompi/mpi/c/profile/defines.h +++ /dev/null @@ -1,415 +0,0 @@ -/* - * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2013 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * Copyright (c) 2009-2014 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2011 Sandia National Laboratories. All rights reserved. - * Copyright (c) 2012 Oak Rigde National Laboratory. All rights reserved. - * Copyright (c) 2012-2013 Inria. All rights reserved. - * Copyright (c) 2013 Los Alamos National Security, LLC. All rights - * reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#ifndef OMPI_C_PROFILE_DEFINES_H -#define OMPI_C_PROFILE_DEFINES_H -/* - * This file is included in the top directory only if - * profiling is required. Once profiling is required, - * this file will replace all MPI_* symbols with - * PMPI_* symbols - */ -#define MPI_Abort PMPI_Abort -#define MPI_Accumulate PMPI_Accumulate -#define MPI_Add_error_class PMPI_Add_error_class -#define MPI_Add_error_code PMPI_Add_error_code -#define MPI_Add_error_string PMPI_Add_error_string -#define MPI_Address PMPI_Address -#define MPI_Allgather PMPI_Allgather -#define MPI_Iallgather PMPI_Iallgather -#define MPI_Allgatherv PMPI_Allgatherv -#define MPI_Iallgatherv PMPI_Iallgatherv -#define MPI_Alloc_mem PMPI_Alloc_mem -#define MPI_Allreduce PMPI_Allreduce -#define MPI_Iallreduce PMPI_Iallreduce -#define MPI_Alltoall PMPI_Alltoall -#define MPI_Ialltoall PMPI_Ialltoall -#define MPI_Alltoallv PMPI_Alltoallv -#define MPI_Ialltoallv PMPI_Ialltoallv -#define MPI_Alltoallw PMPI_Alltoallw -#define MPI_Ialltoallw PMPI_Ialltoallw -#define MPI_Attr_delete PMPI_Attr_delete -#define MPI_Attr_get PMPI_Attr_get -#define MPI_Attr_put PMPI_Attr_put -#define MPI_Barrier PMPI_Barrier -#define MPI_Ibarrier PMPI_Ibarrier -#define MPI_Bcast PMPI_Bcast -#define MPI_Ibcast PMPI_Ibcast -#define MPI_Bsend_init PMPI_Bsend_init -#define MPI_Bsend PMPI_Bsend -#define MPI_Buffer_attach PMPI_Buffer_attach -#define MPI_Buffer_detach PMPI_Buffer_detach -#define MPI_Cancel PMPI_Cancel -#define MPI_Cart_coords PMPI_Cart_coords -#define MPI_Cart_create PMPI_Cart_create -#define MPI_Cart_get PMPI_Cart_get -#define MPI_Cart_map PMPI_Cart_map -#define MPI_Cart_rank PMPI_Cart_rank -#define MPI_Cart_shift PMPI_Cart_shift -#define MPI_Cart_sub PMPI_Cart_sub -#define MPI_Cartdim_get PMPI_Cartdim_get -#define MPI_Close_port PMPI_Close_port -#define MPI_Comm_accept PMPI_Comm_accept -#define MPI_Comm_c2f PMPI_Comm_c2f -#define MPI_Comm_call_errhandler PMPI_Comm_call_errhandler -#define MPI_Comm_compare PMPI_Comm_compare -#define MPI_Comm_connect PMPI_Comm_connect -#define MPI_Comm_create_errhandler PMPI_Comm_create_errhandler -#define MPI_Comm_create_keyval PMPI_Comm_create_keyval -#define MPI_Comm_create_group PMPI_Comm_create_group -#define MPI_Comm_create PMPI_Comm_create -#define MPI_Comm_delete_attr PMPI_Comm_delete_attr -#define MPI_Comm_disconnect PMPI_Comm_disconnect -#define MPI_Comm_dup PMPI_Comm_dup -#define MPI_Comm_dup_with_info PMPI_Comm_dup_with_info -#define MPI_Comm_idup PMPI_Comm_idup -#define MPI_Comm_f2c PMPI_Comm_f2c -#define MPI_Comm_free_keyval PMPI_Comm_free_keyval -#define MPI_Comm_free PMPI_Comm_free -#define MPI_Comm_get_attr PMPI_Comm_get_attr -#define MPI_Comm_get_errhandler PMPI_Comm_get_errhandler -#define MPI_Comm_get_info PMPI_Comm_get_info -#define MPI_Comm_get_name PMPI_Comm_get_name -#define MPI_Comm_get_parent PMPI_Comm_get_parent -#define MPI_Comm_group PMPI_Comm_group -#define MPI_Comm_join PMPI_Comm_join -#define MPI_Comm_rank PMPI_Comm_rank -#define MPI_Comm_remote_group PMPI_Comm_remote_group -#define MPI_Comm_remote_size PMPI_Comm_remote_size -#define MPI_Comm_set_attr PMPI_Comm_set_attr -#define MPI_Dist_graph_create PMPI_Dist_graph_create -#define MPI_Dist_graph_create_adjacent PMPI_Dist_graph_create_adjacent -#define MPI_Dist_graph_neighbors PMPI_Dist_graph_neighbors -#define MPI_Dist_graph_neighbors_count PMPI_Dist_graph_neighbors_count -#define MPI_Comm_set_errhandler PMPI_Comm_set_errhandler -#define MPI_Comm_set_info PMPI_Comm_set_info -#define MPI_Comm_set_name PMPI_Comm_set_name -#define MPI_Comm_size PMPI_Comm_size -#define MPI_Comm_spawn PMPI_Comm_spawn -#define MPI_Comm_spawn_multiple PMPI_Comm_spawn_multiple -#define MPI_Comm_split PMPI_Comm_split -#define MPI_Comm_split_type PMPI_Comm_split_type -#define MPI_Comm_test_inter PMPI_Comm_test_inter -#define MPI_Compare_and_swap PMPI_Compare_and_swap -#define MPI_Dims_create PMPI_Dims_create -#define MPI_Errhandler_c2f PMPI_Errhandler_c2f -#define MPI_Errhandler_f2c PMPI_Errhandler_f2c -#define MPI_Errhandler_create PMPI_Errhandler_create -#define MPI_Errhandler_free PMPI_Errhandler_free -#define MPI_Errhandler_get PMPI_Errhandler_get -#define MPI_Errhandler_set PMPI_Errhandler_set -#define MPI_Error_class PMPI_Error_class -#define MPI_Error_string PMPI_Error_string -#define MPI_Exscan PMPI_Exscan -#define MPI_Fetch_and_op PMPI_Fetch_and_op -#define MPI_Iexscan PMPI_Iexscan -#define MPI_File_c2f PMPI_File_c2f -#define MPI_File_call_errhandler PMPI_File_call_errhandler -#define MPI_File_close PMPI_File_close -#define MPI_File_create_errhandler PMPI_File_create_errhandler -#define MPI_File_delete PMPI_File_delete -#define MPI_File_f2c PMPI_File_f2c -#define MPI_File_get_amode PMPI_File_get_amode -#define MPI_File_get_atomicity PMPI_File_get_atomicity -#define MPI_File_get_byte_offset PMPI_File_get_byte_offset -#define MPI_File_get_errhandler PMPI_File_get_errhandler -#define MPI_File_get_group PMPI_File_get_group -#define MPI_File_get_info PMPI_File_get_info -#define MPI_File_get_position PMPI_File_get_position -#define MPI_File_get_position_shared PMPI_File_get_position_shared -#define MPI_File_get_size PMPI_File_get_size -#define MPI_File_get_type_extent PMPI_File_get_type_extent -#define MPI_File_get_view PMPI_File_get_view -#define MPI_File_iread_at PMPI_File_iread_at -#define MPI_File_iread PMPI_File_iread -#define MPI_File_iread_shared PMPI_File_iread_shared -#define MPI_File_iwrite_at PMPI_File_iwrite_at -#define MPI_File_iwrite PMPI_File_iwrite -#define MPI_File_iwrite_shared PMPI_File_iwrite_shared -#define MPI_File_open PMPI_File_open -#define MPI_File_preallocate PMPI_File_preallocate -#define MPI_File_read_all_begin PMPI_File_read_all_begin -#define MPI_File_read_all_end PMPI_File_read_all_end -#define MPI_File_read_all PMPI_File_read_all -#define MPI_File_read_at_all_begin PMPI_File_read_at_all_begin -#define MPI_File_read_at_all_end PMPI_File_read_at_all_end -#define MPI_File_read_at_all PMPI_File_read_at_all -#define MPI_File_read_at PMPI_File_read_at -#define MPI_File_read PMPI_File_read -#define MPI_File_read_ordered_begin PMPI_File_read_ordered_begin -#define MPI_File_read_ordered_end PMPI_File_read_ordered_end -#define MPI_File_read_ordered PMPI_File_read_ordered -#define MPI_File_read_shared PMPI_File_read_shared -#define MPI_File_seek PMPI_File_seek -#define MPI_File_seek_shared PMPI_File_seek_shared -#define MPI_File_set_atomicity PMPI_File_set_atomicity -#define MPI_File_set_errhandler PMPI_File_set_errhandler -#define MPI_File_set_info PMPI_File_set_info -#define MPI_File_set_size PMPI_File_set_size -#define MPI_File_set_view PMPI_File_set_view -#define MPI_File_sync PMPI_File_sync -#define MPI_File_write_all_begin PMPI_File_write_all_begin -#define MPI_File_write_all_end PMPI_File_write_all_end -#define MPI_File_write_all PMPI_File_write_all -#define MPI_File_write_at_all_begin PMPI_File_write_at_all_begin -#define MPI_File_write_at_all_end PMPI_File_write_at_all_end -#define MPI_File_write_at_all PMPI_File_write_at_all -#define MPI_File_write_at PMPI_File_write_at -#define MPI_File_write PMPI_File_write -#define MPI_File_write_ordered_begin PMPI_File_write_ordered_begin -#define MPI_File_write_ordered_end PMPI_File_write_ordered_end -#define MPI_File_write_ordered PMPI_File_write_ordered -#define MPI_File_write_shared PMPI_File_write_shared -#define MPI_Finalize PMPI_Finalize -#define MPI_Finalized PMPI_Finalized - -#define MPI_Gather PMPI_Gather -#define MPI_Igather PMPI_Igather -#define MPI_Gatherv PMPI_Gatherv -#define MPI_Igatherv PMPI_Igatherv -#define MPI_Get_address PMPI_Get_address -#define MPI_Get_count PMPI_Get_count -#define MPI_Get_elements PMPI_Get_elements -#define MPI_Get_elements_x PMPI_Get_elements_x -#define MPI_Get PMPI_Get -#define MPI_Get_accumulate PMPI_Get_accumulate -#define MPI_Get_library_version PMPI_Get_library_version -#define MPI_Get_processor_name PMPI_Get_processor_name -#define MPI_Get_version PMPI_Get_version -#define MPI_Graph_create PMPI_Graph_create -#define MPI_Graph_get PMPI_Graph_get -#define MPI_Graph_map PMPI_Graph_map -#define MPI_Graph_neighbors_count PMPI_Graph_neighbors_count -#define MPI_Graph_neighbors PMPI_Graph_neighbors -#define MPI_Graphdims_get PMPI_Graphdims_get -#define MPI_Grequest_complete PMPI_Grequest_complete -#define MPI_Grequest_start PMPI_Grequest_start -#define MPI_Group_c2f PMPI_Group_c2f -#define MPI_Group_compare PMPI_Group_compare -#define MPI_Group_difference PMPI_Group_difference -#define MPI_Group_excl PMPI_Group_excl -#define MPI_Group_f2c PMPI_Group_f2c -#define MPI_Group_free PMPI_Group_free -#define MPI_Group_incl PMPI_Group_incl -#define MPI_Group_intersection PMPI_Group_intersection -#define MPI_Group_range_excl PMPI_Group_range_excl -#define MPI_Group_range_incl PMPI_Group_range_incl -#define MPI_Group_rank PMPI_Group_rank -#define MPI_Group_size PMPI_Group_size -#define MPI_Group_translate_ranks PMPI_Group_translate_ranks -#define MPI_Group_union PMPI_Group_union -#define MPI_Free_mem PMPI_Free_mem -#define MPI_Ibsend PMPI_Ibsend -#define MPI_Message_c2f PMPI_Message_c2f -#define MPI_Message_f2c PMPI_Message_f2c -#define MPI_Improbe PMPI_Improbe -#define MPI_Imrecv PMPI_Imrecv -#define MPI_Info_c2f PMPI_Info_c2f -#define MPI_Info_create PMPI_Info_create -#define MPI_Info_delete PMPI_Info_delete -#define MPI_Info_dup PMPI_Info_dup -#define MPI_Info_f2c PMPI_Info_f2c -#define MPI_Info_free PMPI_Info_free -#define MPI_Info_get PMPI_Info_get -#define MPI_Info_get_nkeys PMPI_Info_get_nkeys -#define MPI_Info_get_nthkey PMPI_Info_get_nthkey -#define MPI_Info_get_valuelen PMPI_Info_get_valuelen -#define MPI_Info_set PMPI_Info_set -#define MPI_Init PMPI_Init -#define MPI_Init_thread PMPI_Init_thread -#define MPI_Initialized PMPI_Initialized -#define MPI_Intercomm_create PMPI_Intercomm_create -#define MPI_Intercomm_merge PMPI_Intercomm_merge -#define MPI_Iprobe PMPI_Iprobe -#define MPI_Irecv PMPI_Irecv -#define MPI_Irsend PMPI_Irsend -#define MPI_Is_thread_main PMPI_Is_thread_main -#define MPI_Isend PMPI_Isend -#define MPI_Issend PMPI_Issend -#define MPI_Keyval_create PMPI_Keyval_create -#define MPI_Keyval_free PMPI_Keyval_free -#define MPI_Lookup_name PMPI_Lookup_name -#define MPI_Mprobe PMPI_Mprobe -#define MPI_Mrecv PMPI_Mrecv -#define MPI_Message_cancel PMPI_Message_cancel -#define MPI_Neighbor_allgather PMPI_Neighbor_allgather -#define MPI_Ineighbor_allgather PMPI_Ineighbor_allgather -#define MPI_Neighbor_allgatherv PMPI_Neighbor_allgatherv -#define MPI_Ineighbor_allgatherv PMPI_Ineighbor_allgatherv -#define MPI_Neighbor_alltoall PMPI_Neighbor_alltoall -#define MPI_Ineighbor_alltoall PMPI_Ineighbor_alltoall -#define MPI_Neighbor_alltoallv PMPI_Neighbor_alltoallv -#define MPI_Ineighbor_alltoallv PMPI_Ineighbor_alltoallv -#define MPI_Neighbor_alltoallw PMPI_Neighbor_alltoallw -#define MPI_Ineighbor_alltoallw PMPI_Ineighbor_alltoallw -#define MPI_Op_c2f PMPI_Op_c2f -#define MPI_Op_commutative PMPI_Op_commutative -#define MPI_Op_create PMPI_Op_create -#define MPI_Op_f2c PMPI_Op_f2c -#define MPI_Op_free PMPI_Op_free -#define MPI_Open_port PMPI_Open_port -#define MPI_Pack_external PMPI_Pack_external -#define MPI_Pack_external_size PMPI_Pack_external_size -#define MPI_Pack PMPI_Pack -#define MPI_Pack_size PMPI_Pack_size -#define MPI_Pcontrol PMPI_Pcontrol -#define MPI_Probe PMPI_Probe -#define MPI_Publish_name PMPI_Publish_name -#define MPI_Put PMPI_Put -#define MPI_Query_thread PMPI_Query_thread -#define MPI_Raccumulate PMPI_Raccumulate -#define MPI_Recv_init PMPI_Recv_init -#define MPI_Recv PMPI_Recv -#define MPI_Reduce PMPI_Reduce -#define MPI_Ireduce PMPI_Ireduce -#define MPI_Reduce_local PMPI_Reduce_local -#define MPI_Reduce_scatter PMPI_Reduce_scatter -#define MPI_Ireduce_scatter PMPI_Ireduce_scatter -#define MPI_Reduce_scatter_block PMPI_Reduce_scatter_block -#define MPI_Ireduce_scatter_block PMPI_Ireduce_scatter_block -#define MPI_Register_datarep PMPI_Register_datarep -#define MPI_Request_c2f PMPI_Request_c2f -#define MPI_Request_f2c PMPI_Request_f2c -#define MPI_Request_free PMPI_Request_free -#define MPI_Request_get_status PMPI_Request_get_status -#define MPI_Rget PMPI_Rget -#define MPI_Rget_accumulate PMPI_Rget_accumulate -#define MPI_Rput PMPI_Rput -#define MPI_Rsend_init PMPI_Rsend_init -#define MPI_Rsend PMPI_Rsend -#define MPI_Scan PMPI_Scan -#define MPI_Iscan PMPI_Iscan -#define MPI_Scatter PMPI_Scatter -#define MPI_Iscatter PMPI_Iscatter -#define MPI_Scatterv PMPI_Scatterv -#define MPI_Iscatterv PMPI_Iscatterv -#define MPI_Send_init PMPI_Send_init -#define MPI_Send PMPI_Send -#define MPI_Sendrecv PMPI_Sendrecv -#define MPI_Sendrecv_replace PMPI_Sendrecv_replace -#define MPI_Ssend_init PMPI_Ssend_init -#define MPI_Ssend PMPI_Ssend -#define MPI_Start PMPI_Start -#define MPI_Startall PMPI_Startall -#define MPI_Status_c2f PMPI_Status_c2f -#define MPI_Status_f2c PMPI_Status_f2c -#define MPI_Status_set_cancelled PMPI_Status_set_cancelled -#define MPI_Status_set_elements PMPI_Status_set_elements -#define MPI_Status_set_elements_x PMPI_Status_set_elements_x -#define MPI_Test_cancelled PMPI_Test_cancelled -#define MPI_Test PMPI_Test -#define MPI_Testall PMPI_Testall -#define MPI_Testany PMPI_Testany -#define MPI_Testsome PMPI_Testsome -#define MPI_Topo_test PMPI_Topo_test -#define MPI_Type_c2f PMPI_Type_c2f -#define MPI_Type_commit PMPI_Type_commit -#define MPI_Type_contiguous PMPI_Type_contiguous -#define MPI_Type_create_darray PMPI_Type_create_darray -#define MPI_Type_create_f90_complex PMPI_Type_create_f90_complex -#define MPI_Type_create_f90_integer PMPI_Type_create_f90_integer -#define MPI_Type_create_f90_real PMPI_Type_create_f90_real -#define MPI_Type_create_hindexed PMPI_Type_create_hindexed -#define MPI_Type_create_hvector PMPI_Type_create_hvector -#define MPI_Type_create_indexed_block PMPI_Type_create_indexed_block -#define MPI_Type_create_hindexed_block PMPI_Type_create_hindexed_block -#define MPI_Type_create_keyval PMPI_Type_create_keyval -#define MPI_Type_create_resized PMPI_Type_create_resized -#define MPI_Type_create_struct PMPI_Type_create_struct -#define MPI_Type_create_subarray PMPI_Type_create_subarray -#define MPI_Type_delete_attr PMPI_Type_delete_attr -#define MPI_Type_dup PMPI_Type_dup -#define MPI_Type_extent PMPI_Type_extent -#define MPI_Type_f2c PMPI_Type_f2c -#define MPI_Type_free_keyval PMPI_Type_free_keyval -#define MPI_Type_free PMPI_Type_free -#define MPI_Type_get_attr PMPI_Type_get_attr -#define MPI_Type_get_contents PMPI_Type_get_contents -#define MPI_Type_get_envelope PMPI_Type_get_envelope -#define MPI_Type_get_extent PMPI_Type_get_extent -#define MPI_Type_get_extent_x PMPI_Type_get_extent_x -#define MPI_Type_get_name PMPI_Type_get_name -#define MPI_Type_get_true_extent PMPI_Type_get_true_extent -#define MPI_Type_get_true_extent_x PMPI_Type_get_true_extent_x -#define MPI_Type_hindexed PMPI_Type_hindexed -#define MPI_Type_hvector PMPI_Type_hvector -#define MPI_Type_indexed PMPI_Type_indexed -#define MPI_Type_lb PMPI_Type_lb -#define MPI_Type_match_size PMPI_Type_match_size -#define MPI_Type_set_attr PMPI_Type_set_attr -#define MPI_Type_set_name PMPI_Type_set_name -#define MPI_Type_size PMPI_Type_size -#define MPI_Type_size_x PMPI_Type_size_x -#define MPI_Type_struct PMPI_Type_struct -#define MPI_Type_ub PMPI_Type_ub -#define MPI_Type_vector PMPI_Type_vector -#define MPI_Unpack_external PMPI_Unpack_external -#define MPI_Unpack PMPI_Unpack -#define MPI_Unpublish_name PMPI_Unpublish_name -#define MPI_Wait PMPI_Wait -#define MPI_Waitall PMPI_Waitall -#define MPI_Waitany PMPI_Waitany -#define MPI_Waitsome PMPI_Waitsome -#define MPI_Win_allocate PMPI_Win_allocate -#define MPI_Win_allocate_shared PMPI_Win_allocate_shared -#define MPI_Win_attach PMPI_Win_attach -#define MPI_Win_c2f PMPI_Win_c2f -#define MPI_Win_call_errhandler PMPI_Win_call_errhandler -#define MPI_Win_complete PMPI_Win_complete -#define MPI_Win_create_errhandler PMPI_Win_create_errhandler -#define MPI_Win_create_keyval PMPI_Win_create_keyval -#define MPI_Win_create PMPI_Win_create -#define MPI_Win_create_dynamic PMPI_Win_create_dynamic -#define MPI_Win_delete_attr PMPI_Win_delete_attr -#define MPI_Win_detach PMPI_Win_detach -#define MPI_Win_f2c PMPI_Win_f2c -#define MPI_Win_fence PMPI_Win_fence -#define MPI_Win_flush PMPI_Win_flush -#define MPI_Win_flush_all PMPI_Win_flush_all -#define MPI_Win_flush_local PMPI_Win_flush_local -#define MPI_Win_flush_local_all PMPI_Win_flush_local_all -#define MPI_Win_free_keyval PMPI_Win_free_keyval -#define MPI_Win_free PMPI_Win_free -#define MPI_Win_get_attr PMPI_Win_get_attr -#define MPI_Win_get_errhandler PMPI_Win_get_errhandler -#define MPI_Win_get_group PMPI_Win_get_group -#define MPI_Win_get_info PMPI_Win_get_info -#define MPI_Win_get_name PMPI_Win_get_name -#define MPI_Win_lock PMPI_Win_lock -#define MPI_Win_lock_all PMPI_Win_lock_all -#define MPI_Win_post PMPI_Win_post -#define MPI_Win_set_attr PMPI_Win_set_attr -#define MPI_Win_set_errhandler PMPI_Win_set_errhandler -#define MPI_Win_set_info PMPI_Win_set_info -#define MPI_Win_set_name PMPI_Win_set_name -#define MPI_Win_shared_query PMPI_Win_shared_query -#define MPI_Win_start PMPI_Win_start -#define MPI_Win_sync PMPI_Win_sync -#define MPI_Win_test PMPI_Win_test -#define MPI_Win_unlock PMPI_Win_unlock -#define MPI_Win_unlock_all PMPI_Win_unlock_all -#define MPI_Win_wait PMPI_Win_wait -#define MPI_Wtick PMPI_Wtick -#define MPI_Wtime PMPI_Wtime -#endif /* OMPI_C_PROFILE_DEFINES_H */ diff --git a/ompi/mpi/c/publish_name.c b/ompi/mpi/c/publish_name.c index 2ba69408d6c..5f5d6591345 100644 --- a/ompi/mpi/c/publish_name.c +++ b/ompi/mpi/c/publish_name.c @@ -6,34 +6,39 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2013 Los Alamos National Security, LLC. All rights * reserved. + * Copyright (c) 2015 Intel, Inc. All rights reserved. + * + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ #include "ompi_config.h" #include +#include "opal/class/opal_list.h" +#include "opal/mca/pmix/pmix.h" + #include "ompi/mpi/c/bindings.h" #include "ompi/runtime/params.h" #include "ompi/errhandler/errhandler.h" #include "ompi/info/info.h" #include "ompi/communicator/communicator.h" -#include "ompi/mca/pubsub/pubsub.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_Publish_name = PMPI_Publish_name #endif - -#if OMPI_PROFILING_DEFINES -#include "ompi/mpi/c/profile/defines.h" +#define MPI_Publish_name PMPI_Publish_name #endif static const char FUNC_NAME[] = "MPI_Publish_name"; @@ -43,16 +48,20 @@ int MPI_Publish_name(const char *service_name, MPI_Info info, const char *port_name) { int rc; + char range[OPAL_MAX_INFO_VAL]; + int flag=0; + opal_value_t *rng; + opal_list_t values; if ( MPI_PARAM_CHECK ) { - OMPI_ERR_INIT_FINALIZE(FUNC_NAME); + OMPI_ERR_INIT_FINALIZE(FUNC_NAME); if ( NULL == port_name || 0 == strlen(port_name) ) { - return OMPI_ERRHANDLER_INVOKE(MPI_COMM_WORLD, MPI_ERR_ARG, + return OMPI_ERRHANDLER_INVOKE(MPI_COMM_WORLD, MPI_ERR_ARG, FUNC_NAME); } if ( NULL == service_name || 0 == strlen(service_name) ) { - return OMPI_ERRHANDLER_INVOKE(MPI_COMM_WORLD, MPI_ERR_ARG, + return OMPI_ERRHANDLER_INVOKE(MPI_COMM_WORLD, MPI_ERR_ARG, FUNC_NAME); } if (NULL == info || ompi_info_is_freed(info)) { @@ -61,17 +70,79 @@ int MPI_Publish_name(const char *service_name, MPI_Info info, } } - OPAL_CR_ENTER_LIBRARY(); + OBJ_CONSTRUCT(&values, opal_list_t); + + /* OMPI supports info keys to pass the range and persistence to + * be used for the given key */ + if (MPI_INFO_NULL != info) { + ompi_info_get (info, "range", sizeof(range) - 1, range, &flag); + if (flag) { + if (0 == strcmp(range, "nspace")) { + rng = OBJ_NEW(opal_value_t); + rng->key = strdup(OPAL_PMIX_RANGE); + rng->type = OPAL_INT; + rng->data.integer = OPAL_PMIX_NAMESPACE; // share only with procs in same nspace + opal_list_append(&values, &rng->super); + } else if (0 == strcmp(range, "session")) { + rng = OBJ_NEW(opal_value_t); + rng->key = strdup(OPAL_PMIX_RANGE); + rng->type = OPAL_INT; + rng->data.integer = OPAL_PMIX_SESSION; // share only with procs in same session + opal_list_append(&values, &rng->super); + } else { + /* unrecognized scope */ + OPAL_LIST_DESTRUCT(&values); + return OMPI_ERRHANDLER_INVOKE(MPI_COMM_WORLD, MPI_ERR_ARG, + FUNC_NAME); + } + } + ompi_info_get (info, "persistence", sizeof(range) - 1, range, &flag); + if (flag) { + if (0 == strcmp(range, "indef")) { + rng = OBJ_NEW(opal_value_t); + rng->key = strdup(OPAL_PMIX_PERSISTENCE); + rng->type = OPAL_INT; + rng->data.integer = OPAL_PMIX_PERSIST_INDEF; // retain until specifically deleted + opal_list_append(&values, &rng->super); + } else if (0 == strcmp(range, "proc")) { + rng = OBJ_NEW(opal_value_t); + rng->key = strdup(OPAL_PMIX_PERSISTENCE); + rng->type = OPAL_INT; + rng->data.integer = OPAL_PMIX_PERSIST_PROC; // retain until publishing process terminates + opal_list_append(&values, &rng->super); + } else if (0 == strcmp(range, "app")) { + rng = OBJ_NEW(opal_value_t); + rng->key = strdup(OPAL_PMIX_PERSISTENCE); + rng->type = OPAL_INT; + rng->data.integer = OPAL_PMIX_PERSIST_APP; // retain until application terminates + opal_list_append(&values, &rng->super); + } else if (0 == strcmp(range, "session")) { + rng = OBJ_NEW(opal_value_t); + rng->key = strdup(OPAL_PMIX_PERSISTENCE); + rng->type = OPAL_INT; + rng->data.integer = OPAL_PMIX_PERSIST_SESSION; // retain until session/allocation terminates + opal_list_append(&values, &rng->super); + } else { + /* unrecognized persistence */ + OPAL_LIST_DESTRUCT(&values); + return OMPI_ERRHANDLER_INVOKE(MPI_COMM_WORLD, MPI_ERR_ARG, + FUNC_NAME); + } + } + } + + /* publish the service name */ + rng = OBJ_NEW(opal_value_t); + rng->key = strdup(service_name); + rng->type = OPAL_STRING; + rng->data.string = strdup(port_name); + opal_list_append(&values, &rng->super); - /* - * No predefined info-objects for this function in MPI-2, - * therefore, we do not parse the info-object at the moment. - */ + rc = opal_pmix.publish(&values); + OPAL_LIST_DESTRUCT(&values); - rc = ompi_pubsub.publish (service_name, info, port_name); - OPAL_CR_EXIT_LIBRARY(); - if ( OMPI_SUCCESS != rc ) { - if (OMPI_EXISTS == rc) { + if ( OPAL_SUCCESS != rc ) { + if (OPAL_EXISTS == rc) { /* already exists - can't publish it */ return OMPI_ERRHANDLER_INVOKE(MPI_COMM_WORLD, MPI_ERR_FILE_EXISTS, FUNC_NAME); diff --git a/ompi/mpi/c/put.c b/ompi/mpi/c/put.c index dc7dbb8f2bd..a25db956313 100644 --- a/ompi/mpi/c/put.c +++ b/ompi/mpi/c/put.c @@ -6,17 +6,19 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2006 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2013 Los Alamos National Security, LLC. All rights + * Copyright (c) 2013-2015 Los Alamos National Security, LLC. All rights * reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ #include "ompi_config.h" @@ -30,12 +32,11 @@ #include "ompi/mca/osc/osc.h" #include "ompi/datatype/ompi_datatype.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_Put = PMPI_Put #endif - -#if OMPI_PROFILING_DEFINES -#include "ompi/mpi/c/profile/defines.h" +#define MPI_Put PMPI_Put #endif static const char FUNC_NAME[] = "MPI_Put"; @@ -43,7 +44,7 @@ static const char FUNC_NAME[] = "MPI_Put"; int MPI_Put(const void *origin_addr, int origin_count, MPI_Datatype origin_datatype, int target_rank, MPI_Aint target_disp, int target_count, - MPI_Datatype target_datatype, MPI_Win win) + MPI_Datatype target_datatype, MPI_Win win) { int rc; @@ -59,10 +60,10 @@ int MPI_Put(const void *origin_addr, int origin_count, MPI_Datatype origin_datat } else if (ompi_win_peer_invalid(win, target_rank) && (MPI_PROC_NULL != target_rank)) { rc = MPI_ERR_RANK; - } else if (NULL == target_datatype || + } else if (NULL == target_datatype || MPI_DATATYPE_NULL == target_datatype) { rc = MPI_ERR_TYPE; - } else if ( target_disp < 0 ) { + } else if ( MPI_WIN_FLAVOR_DYNAMIC != win->w_flavor && target_disp < 0 ) { rc = MPI_ERR_DISP; } else { OMPI_CHECK_DATATYPE_FOR_ONE_SIDED(rc, origin_datatype, origin_count); @@ -75,10 +76,7 @@ int MPI_Put(const void *origin_addr, int origin_count, MPI_Datatype origin_datat if (MPI_PROC_NULL == target_rank) return MPI_SUCCESS; - OPAL_CR_ENTER_LIBRARY(); - - /* XXX -- CONST -- do not cast away const -- update mca/osc */ - rc = win->w_osc_module->osc_put((void *) origin_addr, origin_count, origin_datatype, + rc = win->w_osc_module->osc_put(origin_addr, origin_count, origin_datatype, target_rank, target_disp, target_count, target_datatype, win); OMPI_ERRHANDLER_RETURN(rc, win, rc, FUNC_NAME); diff --git a/ompi/mpi/c/query_thread.c b/ompi/mpi/c/query_thread.c index 3ecc85509d2..e7093a4cc88 100644 --- a/ompi/mpi/c/query_thread.c +++ b/ompi/mpi/c/query_thread.c @@ -5,14 +5,16 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ #include "ompi_config.h" @@ -23,22 +25,19 @@ #include "ompi/communicator/communicator.h" #include "ompi/errhandler/errhandler.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_Query_thread = PMPI_Query_thread #endif - -#if OMPI_PROFILING_DEFINES -#include "ompi/mpi/c/profile/defines.h" +#define MPI_Query_thread PMPI_Query_thread #endif static const char FUNC_NAME[] = "MPI_Query_thread"; -int MPI_Query_thread(int *provided) +int MPI_Query_thread(int *provided) { - OPAL_CR_NOOP_PROGRESS(); - if (MPI_PARAM_CHECK) { OMPI_ERR_INIT_FINALIZE(FUNC_NAME); if (NULL == provided) { diff --git a/ompi/mpi/c/raccumulate.c b/ompi/mpi/c/raccumulate.c index 8e2bfbeabaf..960671799a7 100644 --- a/ompi/mpi/c/raccumulate.c +++ b/ompi/mpi/c/raccumulate.c @@ -6,18 +6,20 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2009 Sun Microsystmes, Inc. All rights reserved. * Copyright (c) 2011 Sandia National Laboratories. All rights reserved. - * Copyright (c) 2014 Los Alamos National Security, LLC. All rights + * Copyright (c) 2014-2015 Los Alamos National Security, LLC. All rights * reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ #include "ompi_config.h" @@ -33,19 +35,18 @@ #include "ompi/datatype/ompi_datatype_internal.h" #include "ompi/memchecker.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_Raccumulate = PMPI_Raccumulate #endif - -#if OMPI_PROFILING_DEFINES -#include "ompi/mpi/c/profile/defines.h" +#define MPI_Raccumulate PMPI_Raccumulate #endif static const char FUNC_NAME[] = "MPI_Raccumulate"; -int MPI_Raccumulate(void *origin_addr, int origin_count, MPI_Datatype origin_datatype, +int MPI_Raccumulate(const void *origin_addr, int origin_count, MPI_Datatype origin_datatype, int target_rank, MPI_Aint target_disp, int target_count, - MPI_Datatype target_datatype, MPI_Op op, MPI_Win win, MPI_Request *request) + MPI_Datatype target_datatype, MPI_Op op, MPI_Win win, MPI_Request *request) { int rc; ompi_win_t *ompi_win = (ompi_win_t*) win; @@ -72,7 +73,7 @@ int MPI_Raccumulate(void *origin_addr, int origin_count, MPI_Datatype origin_dat rc = MPI_ERR_OP; } else if (!ompi_op_is_intrinsic(op)) { rc = MPI_ERR_OP; - } else if ( target_disp < 0 ) { + } else if ( MPI_WIN_FLAVOR_DYNAMIC != win->w_flavor && target_disp < 0 ) { rc = MPI_ERR_DISP; } else { OMPI_CHECK_DATATYPE_FOR_ONE_SIDED(rc, origin_datatype, origin_count); @@ -126,16 +127,13 @@ int MPI_Raccumulate(void *origin_addr, int origin_count, MPI_Datatype origin_dat return MPI_SUCCESS; } - OPAL_CR_ENTER_LIBRARY(); - - /* TODO: don't cast away the const */ - rc = ompi_win->w_osc_module->osc_raccumulate((void*) origin_addr, + rc = ompi_win->w_osc_module->osc_raccumulate(origin_addr, origin_count, origin_datatype, - target_rank, - target_disp, + target_rank, + target_disp, target_count, - target_datatype, + target_datatype, op, win, request); OMPI_ERRHANDLER_RETURN(rc, win, rc, FUNC_NAME); } diff --git a/ompi/mpi/c/recv.c b/ompi/mpi/c/recv.c index 6215bb49002..060d09a31f1 100644 --- a/ompi/mpi/c/recv.c +++ b/ompi/mpi/c/recv.c @@ -5,14 +5,16 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -26,19 +28,18 @@ #include "ompi/memchecker.h" #include "ompi/request/request.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_Recv = PMPI_Recv #endif - -#if OMPI_PROFILING_DEFINES -#include "ompi/mpi/c/profile/defines.h" +#define MPI_Recv PMPI_Recv #endif static const char FUNC_NAME[] = "MPI_Recv"; int MPI_Recv(void *buf, int count, MPI_Datatype type, int source, - int tag, MPI_Comm comm, MPI_Status *status) + int tag, MPI_Comm comm, MPI_Status *status) { int rc = MPI_SUCCESS; @@ -52,17 +53,17 @@ int MPI_Recv(void *buf, int count, MPI_Datatype type, int source, OMPI_ERR_INIT_FINALIZE(FUNC_NAME); OMPI_CHECK_DATATYPE_FOR_RECV(rc, type, count); OMPI_CHECK_USER_BUFFER(rc, buf, type, count); - + if (ompi_comm_invalid(comm)) { return OMPI_ERRHANDLER_INVOKE(MPI_COMM_WORLD, MPI_ERR_COMM, FUNC_NAME); } else if (((tag < 0) && (tag != MPI_ANY_TAG)) || (tag > mca_pml.pml_max_tag)) { rc = MPI_ERR_TAG; - } else if ((source != MPI_ANY_SOURCE) && + } else if ((source != MPI_ANY_SOURCE) && (MPI_PROC_NULL != source) && ompi_comm_peer_invalid(comm, source)) { rc = MPI_ERR_RANK; } - + OMPI_ERRHANDLER_CHECK(rc, comm, rc, FUNC_NAME); } @@ -73,8 +74,6 @@ int MPI_Recv(void *buf, int count, MPI_Datatype type, int source, return MPI_SUCCESS; } - OPAL_CR_ENTER_LIBRARY(); - rc = MCA_PML_CALL(recv(buf, count, type, source, tag, comm, status)); OMPI_ERRHANDLER_RETURN(rc, comm, rc, FUNC_NAME); } diff --git a/ompi/mpi/c/recv_init.c b/ompi/mpi/c/recv_init.c index 43c7e0040f5..22d927b3cbd 100644 --- a/ompi/mpi/c/recv_init.c +++ b/ompi/mpi/c/recv_init.c @@ -2,18 +2,20 @@ * Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. - * Copyright (c) 2004-2005 The University of Tennessee and The University + * Copyright (c) 2004-2016 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2006 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ #include "ompi_config.h" @@ -26,12 +28,11 @@ #include "ompi/request/request.h" #include "ompi/memchecker.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_Recv_init = PMPI_Recv_init #endif - -#if OMPI_PROFILING_DEFINES -#include "ompi/mpi/c/profile/defines.h" +#define MPI_Recv_init PMPI_Recv_init #endif static const char FUNC_NAME[] = "MPI_Recv_init"; @@ -52,37 +53,35 @@ int MPI_Recv_init(void *buf, int count, MPI_Datatype type, int source, OMPI_ERR_INIT_FINALIZE(FUNC_NAME); OMPI_CHECK_DATATYPE_FOR_RECV(rc, type, count); OMPI_CHECK_USER_BUFFER(rc, buf, type, count); - + if (ompi_comm_invalid(comm)) { return OMPI_ERRHANDLER_INVOKE(MPI_COMM_WORLD, MPI_ERR_COMM, FUNC_NAME); } else if (((tag < 0) && (tag != MPI_ANY_TAG)) || (tag > mca_pml.pml_max_tag)) { rc = MPI_ERR_TAG; - } else if ((source != MPI_ANY_SOURCE) && + } else if ((source != MPI_ANY_SOURCE) && (MPI_PROC_NULL != source) && ompi_comm_peer_invalid(comm, source)) { rc = MPI_ERR_RANK; } else if (NULL == request) { rc = MPI_ERR_REQUEST; } - + OMPI_ERRHANDLER_CHECK(rc, comm, rc, FUNC_NAME); } - if (MPI_PROC_NULL == source) { + if (MPI_PROC_NULL == source) { *request = OBJ_NEW(ompi_request_t); /* Other fields were initialized by the constructor for ompi_request_t */ (*request)->req_type = OMPI_REQUEST_NOOP; (*request)->req_status = ompi_request_empty.req_status; - (*request)->req_complete = true; + (*request)->req_complete = REQUEST_COMPLETED; (*request)->req_state = OMPI_REQUEST_INACTIVE; (*request)->req_persistent = true; (*request)->req_free = ompi_request_persistent_proc_null_free; return MPI_SUCCESS; } - OPAL_CR_ENTER_LIBRARY(); - /* * Here, we just initialize the request -- memchecker should set the buffer in MPI_Start. */ diff --git a/ompi/mpi/c/reduce.c b/ompi/mpi/c/reduce.c index dbd29641f19..cf65eb5bd77 100644 --- a/ompi/mpi/c/reduce.c +++ b/ompi/mpi/c/reduce.c @@ -6,17 +6,19 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2006 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2013 Los Alamos National Security, LLC. All rights * reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ #include "ompi_config.h" @@ -30,19 +32,18 @@ #include "ompi/op/op.h" #include "ompi/memchecker.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_Reduce = PMPI_Reduce #endif - -#if OMPI_PROFILING_DEFINES -#include "ompi/mpi/c/profile/defines.h" +#define MPI_Reduce PMPI_Reduce #endif static const char FUNC_NAME[] = "MPI_Reduce"; int MPI_Reduce(const void *sendbuf, void *recvbuf, int count, - MPI_Datatype datatype, MPI_Op op, int root, MPI_Comm comm) + MPI_Datatype datatype, MPI_Op op, int root, MPI_Comm comm) { int err; @@ -58,7 +59,7 @@ int MPI_Reduce(const void *sendbuf, void *recvbuf, int count, } else { memchecker_call(&opal_memchecker_base_isdefined, sendbuf, count, datatype); } - + /* check whether root's receive buffer is addressable. */ memchecker_call(&opal_memchecker_base_isaddressable, recvbuf, count, datatype); } else { @@ -69,7 +70,7 @@ int MPI_Reduce(const void *sendbuf, void *recvbuf, int count, if (MPI_ROOT == root) { /* check whether root's receive buffer is addressable. */ memchecker_call(&opal_memchecker_base_isaddressable, recvbuf, count, datatype); - } else if (MPI_PROC_NULL != root) { + } else if (MPI_PROC_NULL != root) { /* check whether send buffer is defined. */ memchecker_call(&opal_memchecker_base_isdefined, sendbuf, count, datatype); } @@ -81,12 +82,12 @@ int MPI_Reduce(const void *sendbuf, void *recvbuf, int count, err = MPI_SUCCESS; OMPI_ERR_INIT_FINALIZE(FUNC_NAME); if (ompi_comm_invalid(comm)) { - return OMPI_ERRHANDLER_INVOKE(MPI_COMM_WORLD, MPI_ERR_COMM, + return OMPI_ERRHANDLER_INVOKE(MPI_COMM_WORLD, MPI_ERR_COMM, FUNC_NAME); } /* Checks for all ranks */ - + else if (MPI_OP_NULL == op || NULL == op) { err = MPI_ERR_OP; } else if (!ompi_op_is_valid(op, datatype, &msg, FUNC_NAME)) { @@ -127,13 +128,10 @@ int MPI_Reduce(const void *sendbuf, void *recvbuf, int count, return MPI_SUCCESS; } - OPAL_CR_ENTER_LIBRARY(); - /* Invoke the coll component to perform the back-end operation */ OBJ_RETAIN(op); - /* XXX -- CONST -- do not cast away const -- update mca/coll */ - err = comm->c_coll.coll_reduce((void *) sendbuf, recvbuf, count, + err = comm->c_coll.coll_reduce(sendbuf, recvbuf, count, datatype, op, root, comm, comm->c_coll.coll_reduce_module); OBJ_RELEASE(op); diff --git a/ompi/mpi/c/reduce_local.c b/ompi/mpi/c/reduce_local.c index f5e95630822..a6096fab587 100644 --- a/ompi/mpi/c/reduce_local.c +++ b/ompi/mpi/c/reduce_local.c @@ -6,17 +6,19 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2006-2009 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2013 Los Alamos National Security, LLC. All rights * reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ #include "ompi_config.h" @@ -30,12 +32,11 @@ #include "ompi/op/op.h" #include "ompi/memchecker.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_Reduce_local = PMPI_Reduce_local #endif - -#if OMPI_PROFILING_DEFINES -#include "ompi/mpi/c/profile/defines.h" +#define MPI_Reduce_local PMPI_Reduce_local #endif static const char FUNC_NAME[] = "MPI_Reduce_local"; @@ -70,8 +71,6 @@ int MPI_Reduce_local(const void *inbuf, void *inoutbuf, int count, return MPI_SUCCESS; } - OPAL_CR_ENTER_LIBRARY(); - /* Invoke the op component to perform the back-end operation */ OBJ_RETAIN(op); /* XXX -- CONST -- do not cast away const -- update mca/coll */ diff --git a/ompi/mpi/c/reduce_scatter.c b/ompi/mpi/c/reduce_scatter.c index 387773799fa..8a5d13321a6 100644 --- a/ompi/mpi/c/reduce_scatter.c +++ b/ompi/mpi/c/reduce_scatter.c @@ -6,17 +6,19 @@ * Copyright (c) 2004-2012 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2006-2012 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2012-2013 Los Alamos National Security, LLC. All rights - * reserved. + * reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ #include "ompi_config.h" @@ -30,25 +32,24 @@ #include "ompi/op/op.h" #include "ompi/memchecker.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_Reduce_scatter = PMPI_Reduce_scatter #endif - -#if OMPI_PROFILING_DEFINES -#include "ompi/mpi/c/profile/defines.h" +#define MPI_Reduce_scatter PMPI_Reduce_scatter #endif static const char FUNC_NAME[] = "MPI_Reduce_scatter"; int MPI_Reduce_scatter(const void *sendbuf, void *recvbuf, const int recvcounts[], - MPI_Datatype datatype, MPI_Op op, MPI_Comm comm) + MPI_Datatype datatype, MPI_Op op, MPI_Comm comm) { int i, err, size, count; MEMCHECKER( int rank; - + size = ompi_comm_size(comm); rank = ompi_comm_rank(comm); for (count = i = 0; i < size; ++i) { @@ -59,17 +60,17 @@ int MPI_Reduce_scatter(const void *sendbuf, void *recvbuf, const int recvcounts[ memchecker_comm(comm); memchecker_datatype(datatype); - + /* check receive buffer of current proccess, whether it's addressable. */ memchecker_call(&opal_memchecker_base_isaddressable, recvbuf, recvcounts[rank], datatype); - + /* check whether the actual send buffer is defined. */ if(MPI_IN_PLACE == sendbuf) { memchecker_call(&opal_memchecker_base_isdefined, recvbuf, count, datatype); } else { memchecker_call(&opal_memchecker_base_isdefined, sendbuf, count, datatype); - + } ); @@ -78,7 +79,7 @@ int MPI_Reduce_scatter(const void *sendbuf, void *recvbuf, const int recvcounts[ err = MPI_SUCCESS; OMPI_ERR_INIT_FINALIZE(FUNC_NAME); if (ompi_comm_invalid(comm)) { - return OMPI_ERRHANDLER_INVOKE(MPI_COMM_WORLD, MPI_ERR_COMM, + return OMPI_ERRHANDLER_INVOKE(MPI_COMM_WORLD, MPI_ERR_COMM, FUNC_NAME); } @@ -122,13 +123,10 @@ int MPI_Reduce_scatter(const void *sendbuf, void *recvbuf, const int recvcounts[ return MPI_SUCCESS; } - OPAL_CR_ENTER_LIBRARY(); - /* Invoke the coll component to perform the back-end operation */ OBJ_RETAIN(op); - /* XXX -- CONST -- do not cast away const -- update mca/coll */ - err = comm->c_coll.coll_reduce_scatter((void *) sendbuf, recvbuf, (int *) recvcounts, + err = comm->c_coll.coll_reduce_scatter(sendbuf, recvbuf, recvcounts, datatype, op, comm, comm->c_coll.coll_reduce_scatter_module); OBJ_RELEASE(op); diff --git a/ompi/mpi/c/reduce_scatter_block.c b/ompi/mpi/c/reduce_scatter_block.c index 6ff40cf4b90..9b38ef17ea0 100644 --- a/ompi/mpi/c/reduce_scatter_block.c +++ b/ompi/mpi/c/reduce_scatter_block.c @@ -14,6 +14,8 @@ * Copyright (c) 2012 Oak Ridge National Labs. All rights reserved. * Copyright (c) 2013 Los Alamos National Security, LLC. All rights * reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -31,12 +33,11 @@ #include "ompi/op/op.h" #include "ompi/memchecker.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_Reduce_scatter_block = PMPI_Reduce_scatter_block #endif - -#if OMPI_PROFILING_DEFINES -#include "ompi/mpi/c/profile/defines.h" +#define MPI_Reduce_scatter_block PMPI_Reduce_scatter_block #endif static const char FUNC_NAME[] = "MPI_Reduce_scatter_block"; @@ -91,13 +92,10 @@ int MPI_Reduce_scatter_block(const void *sendbuf, void *recvbuf, int recvcount, OMPI_ERRHANDLER_CHECK(err, comm, err, FUNC_NAME); } - OPAL_CR_ENTER_LIBRARY(); - /* Invoke the coll component to perform the back-end operation */ OBJ_RETAIN(op); - /* XXX -- CONST -- do not cast away const -- update mca/coll */ - err = comm->c_coll.coll_reduce_scatter_block((void *) sendbuf, recvbuf, recvcount, + err = comm->c_coll.coll_reduce_scatter_block(sendbuf, recvbuf, recvcount, datatype, op, comm, comm->c_coll.coll_reduce_scatter_block_module); OBJ_RELEASE(op); diff --git a/ompi/mpi/c/register_datarep.c b/ompi/mpi/c/register_datarep.c index 09a87d70299..2ce49dbe9f3 100644 --- a/ompi/mpi/c/register_datarep.c +++ b/ompi/mpi/c/register_datarep.c @@ -6,16 +6,18 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2013 Los Alamos National Security, LLC. All rights * reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -28,12 +30,11 @@ #include "ompi/mca/io/base/base.h" #include "ompi/file/file.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_Register_datarep = PMPI_Register_datarep #endif - -#if OMPI_PROFILING_DEFINES -#include "ompi/mpi/c/profile/defines.h" +#define MPI_Register_datarep PMPI_Register_datarep #endif static const char FUNC_NAME[] = "MPI_Register_datarep"; @@ -65,17 +66,14 @@ int MPI_Register_datarep(const char *datarep, return OMPI_ERRHANDLER_INVOKE(MPI_FILE_NULL, rc, FUNC_NAME); } - OPAL_CR_ENTER_LIBRARY(); - /* Call the back-end io component function */ - /* XXX -- CONST -- do not cast away const -- update mca/io */ - rc = mca_io_base_register_datarep((char *) datarep, read_conversion_fn, + rc = mca_io_base_register_datarep(datarep, read_conversion_fn, write_conversion_fn, dtype_file_extent_fn, extra_state); /* All done */ - + OMPI_ERRHANDLER_RETURN(rc, MPI_FILE_NULL, rc, FUNC_NAME); } diff --git a/ompi/mpi/c/request_c2f.c b/ompi/mpi/c/request_c2f.c index 01f3f6c583b..9d98b2928b5 100644 --- a/ompi/mpi/c/request_c2f.c +++ b/ompi/mpi/c/request_c2f.c @@ -6,15 +6,17 @@ * Copyright (c) 2004-2007 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2006-2012 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ #include "ompi_config.h" @@ -27,25 +29,22 @@ #include "ompi/request/request.h" #include "ompi/memchecker.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_Request_c2f = PMPI_Request_c2f #endif - -#if OMPI_PROFILING_DEFINES -#include "ompi/mpi/c/profile/defines.h" +#define MPI_Request_c2f PMPI_Request_c2f #endif static const char FUNC_NAME[] = "MPI_Request_c2f"; -MPI_Fint MPI_Request_c2f(MPI_Request request) +MPI_Fint MPI_Request_c2f(MPI_Request request) { MEMCHECKER( memchecker_request(&request); ); - OPAL_CR_NOOP_PROGRESS(); - if ( MPI_PARAM_CHECK ) { OMPI_ERR_INIT_FINALIZE(FUNC_NAME); @@ -68,7 +67,7 @@ MPI_Fint MPI_Request_c2f(MPI_Request request) */ if (MPI_UNDEFINED == request->req_f_to_c_index) { - request->req_f_to_c_index = + request->req_f_to_c_index = opal_pointer_array_add(&ompi_request_f_to_c_table, request); } diff --git a/ompi/mpi/c/request_f2c.c b/ompi/mpi/c/request_f2c.c index 4e43e4af3cb..993aa4383fe 100644 --- a/ompi/mpi/c/request_f2c.c +++ b/ompi/mpi/c/request_f2c.c @@ -6,15 +6,17 @@ * Copyright (c) 2004-2007 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2006-2012 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ #include "ompi_config.h" @@ -25,23 +27,20 @@ #include "ompi/mpi/fortran/base/fint_2_int.h" #include "ompi/request/request.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_Request_f2c = PMPI_Request_f2c #endif - -#if OMPI_PROFILING_DEFINES -#include "ompi/mpi/c/profile/defines.h" +#define MPI_Request_f2c PMPI_Request_f2c #endif static const char FUNC_NAME[] = "MPI_Request_f2c"; -MPI_Request MPI_Request_f2c(MPI_Fint request) +MPI_Request MPI_Request_f2c(MPI_Fint request) { int request_index = OMPI_FINT_2_INT(request); - OPAL_CR_NOOP_PROGRESS(); - if (MPI_PARAM_CHECK) { OMPI_ERR_INIT_FINALIZE(FUNC_NAME); } @@ -49,9 +48,9 @@ MPI_Request MPI_Request_f2c(MPI_Fint request) /* Per MPI-2:4.12.4, do not invoke an error handler if we get an invalid fortran handle. If we get an invalid fortran handle, return an invalid C handle. */ - - if (request_index < 0 || - request_index >= + + if (request_index < 0 || + request_index >= opal_pointer_array_get_size(&ompi_request_f_to_c_table)) { return NULL; } diff --git a/ompi/mpi/c/request_free.c b/ompi/mpi/c/request_free.c index aa2a4a1fc43..011fdcc7ffc 100644 --- a/ompi/mpi/c/request_free.c +++ b/ompi/mpi/c/request_free.c @@ -5,15 +5,17 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2006-2009 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ #include "ompi_config.h" @@ -26,18 +28,17 @@ #include "ompi/request/request.h" #include "ompi/memchecker.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_Request_free = PMPI_Request_free #endif - -#if OMPI_PROFILING_DEFINES -#include "ompi/mpi/c/profile/defines.h" +#define MPI_Request_free PMPI_Request_free #endif static const char FUNC_NAME[] = "MPI_Request_free"; -int MPI_Request_free(MPI_Request *request) +int MPI_Request_free(MPI_Request *request) { int rc; @@ -48,15 +49,13 @@ int MPI_Request_free(MPI_Request *request) if (MPI_PARAM_CHECK) { rc = MPI_SUCCESS; OMPI_ERR_INIT_FINALIZE(FUNC_NAME); - if (NULL == request || NULL == *request || + if (NULL == request || NULL == *request || MPI_REQUEST_NULL == *request) { rc = MPI_ERR_REQUEST; } OMPI_ERRHANDLER_CHECK(rc, MPI_COMM_WORLD, rc, FUNC_NAME); } - OPAL_CR_ENTER_LIBRARY(); - rc = ompi_request_free(request); OMPI_ERRHANDLER_RETURN(rc, MPI_COMM_WORLD, rc, FUNC_NAME); } diff --git a/ompi/mpi/c/request_get_status.c b/ompi/mpi/c/request_get_status.c index 14d975e49c5..9a7d3cc48de 100644 --- a/ompi/mpi/c/request_get_status.c +++ b/ompi/mpi/c/request_get_status.c @@ -5,15 +5,17 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2006-2010 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ #include "ompi_config.h" @@ -27,12 +29,11 @@ #include "ompi/request/grequest.h" #include "ompi/memchecker.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_Request_get_status = PMPI_Request_get_status #endif - -#if OMPI_PROFILING_DEFINES -#include "ompi/mpi/c/profile/defines.h" +#define MPI_Request_get_status PMPI_Request_get_status #endif static const char FUNC_NAME[] = "MPI_Request_get_status"; @@ -42,7 +43,7 @@ static const char FUNC_NAME[] = "MPI_Request_get_status"; * or free should be executed on the request. */ int MPI_Request_get_status(MPI_Request request, int *flag, - MPI_Status *status) + MPI_Status *status) { #if OPAL_ENABLE_PROGRESS_THREADS == 0 int do_it_once = 0; @@ -52,8 +53,6 @@ int MPI_Request_get_status(MPI_Request request, int *flag, memchecker_request(&request); ); - OPAL_CR_NOOP_PROGRESS(); - if( MPI_PARAM_CHECK ) { OMPI_ERR_INIT_FINALIZE(FUNC_NAME); if( (NULL == flag) ) { @@ -75,8 +74,8 @@ int MPI_Request_get_status(MPI_Request request, int *flag, } return MPI_SUCCESS; } - if( request->req_complete ) { - *flag = true; + if( request->req_complete ) { + *flag = true; /* If this is a generalized request, we *always* have to call the query function to get the status (MPI-2:8.2), even if the user passed STATUS_IGNORE. */ diff --git a/ompi/mpi/c/rget.c b/ompi/mpi/c/rget.c index 85c396abe8f..fc037343f76 100644 --- a/ompi/mpi/c/rget.c +++ b/ompi/mpi/c/rget.c @@ -6,12 +6,14 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. - * Copyright (c) 2014 Los Alamos National Security, LLC. ALl rights + * Copyright (c) 2014-2015 Los Alamos National Security, LLC. ALl rights * reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -29,12 +31,11 @@ #include "ompi/mca/osc/osc.h" #include "ompi/datatype/ompi_datatype.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_Rget = PMPI_Rget #endif - -#if OMPI_PROFILING_DEFINES -#include "ompi/mpi/c/profile/defines.h" +#define MPI_Rget PMPI_Rget #endif static const char FUNC_NAME[] = "MPI_Rget"; @@ -59,7 +60,7 @@ int MPI_Rget(void *origin_addr, int origin_count, } else if (ompi_win_peer_invalid(win, target_rank) && (MPI_PROC_NULL != target_rank)) { rc = MPI_ERR_RANK; - } else if ( target_disp < 0 ) { + } else if ( MPI_WIN_FLAVOR_DYNAMIC != win->w_flavor && target_disp < 0 ) { rc = MPI_ERR_DISP; } else { OMPI_CHECK_DATATYPE_FOR_ONE_SIDED(rc, origin_datatype, origin_count); @@ -75,8 +76,6 @@ int MPI_Rget(void *origin_addr, int origin_count, return MPI_SUCCESS; } - OPAL_CR_ENTER_LIBRARY(); - rc = win->w_osc_module->osc_rget(origin_addr, origin_count, origin_datatype, target_rank, target_disp, target_count, target_datatype, win, request); diff --git a/ompi/mpi/c/rget_accumulate.c b/ompi/mpi/c/rget_accumulate.c index 461d8599375..c3325fcdb18 100644 --- a/ompi/mpi/c/rget_accumulate.c +++ b/ompi/mpi/c/rget_accumulate.c @@ -6,18 +6,20 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2009 Sun Microsystmes, Inc. All rights reserved. * Copyright (c) 2011 Sandia National Laboratories. All rights reserved. - * Copyright (c) 2014 Los Alamos National Security, LLC. All right + * Copyright (c) 2014-2015 Los Alamos National Security, LLC. All right * reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ #include "ompi_config.h" @@ -34,12 +36,11 @@ #include "ompi/datatype/ompi_datatype_internal.h" #include "ompi/memchecker.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_Rget_accumulate = PMPI_Rget_accumulate #endif - -#if OMPI_PROFILING_DEFINES -#include "ompi/mpi/c/profile/defines.h" +#define MPI_Rget_accumulate PMPI_Rget_accumulate #endif static const char FUNC_NAME[] = "MPI_Rget_accumulate"; @@ -47,7 +48,7 @@ static const char FUNC_NAME[] = "MPI_Rget_accumulate"; int MPI_Rget_accumulate(const void *origin_addr, int origin_count, MPI_Datatype origin_datatype, void *result_addr, int result_count, MPI_Datatype result_datatype, int target_rank, MPI_Aint target_disp, int target_count, - MPI_Datatype target_datatype, MPI_Op op, MPI_Win win, MPI_Request *request) + MPI_Datatype target_datatype, MPI_Op op, MPI_Win win, MPI_Request *request) { int rc; ompi_win_t *ompi_win = (ompi_win_t*) win; @@ -74,7 +75,7 @@ int MPI_Rget_accumulate(const void *origin_addr, int origin_count, MPI_Datatype rc = MPI_ERR_OP; } else if (!ompi_op_is_intrinsic(op)) { rc = MPI_ERR_OP; - } else if ( target_disp < 0 ) { + } else if ( MPI_WIN_FLAVOR_DYNAMIC != win->w_flavor && target_disp < 0 ) { rc = MPI_ERR_DISP; } else { /* the origin datatype is meaningless when using MPI_OP_NO_OP */ @@ -133,19 +134,16 @@ int MPI_Rget_accumulate(const void *origin_addr, int origin_count, MPI_Datatype return MPI_SUCCESS; } - OPAL_CR_ENTER_LIBRARY(); - - /* TODO: do not cast away the const */ - rc = ompi_win->w_osc_module->osc_rget_accumulate((void *) origin_addr, + rc = ompi_win->w_osc_module->osc_rget_accumulate(origin_addr, origin_count, origin_datatype, result_addr, result_count, result_datatype, - target_rank, - target_disp, + target_rank, + target_disp, target_count, - target_datatype, + target_datatype, op, win, request); OMPI_ERRHANDLER_RETURN(rc, win, rc, FUNC_NAME); } diff --git a/ompi/mpi/c/rput.c b/ompi/mpi/c/rput.c index 16aae363f6f..859498e02dd 100644 --- a/ompi/mpi/c/rput.c +++ b/ompi/mpi/c/rput.c @@ -6,17 +6,19 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2006 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2014 Los Alamos National Security, LLC. All rights + * Copyright (c) 2014-2015 Los Alamos National Security, LLC. All rights * reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ #include "ompi_config.h" @@ -30,12 +32,11 @@ #include "ompi/mca/osc/osc.h" #include "ompi/datatype/ompi_datatype.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_Rput = PMPI_Rput #endif - -#if OMPI_PROFILING_DEFINES -#include "ompi/mpi/c/profile/defines.h" +#define MPI_Rput PMPI_Rput #endif static const char FUNC_NAME[] = "MPI_Rput"; @@ -43,7 +44,7 @@ static const char FUNC_NAME[] = "MPI_Rput"; int MPI_Rput(const void *origin_addr, int origin_count, MPI_Datatype origin_datatype, int target_rank, MPI_Aint target_disp, int target_count, - MPI_Datatype target_datatype, MPI_Win win, MPI_Request *request) + MPI_Datatype target_datatype, MPI_Win win, MPI_Request *request) { int rc; @@ -59,10 +60,10 @@ int MPI_Rput(const void *origin_addr, int origin_count, MPI_Datatype origin_data } else if (ompi_win_peer_invalid(win, target_rank) && (MPI_PROC_NULL != target_rank)) { rc = MPI_ERR_RANK; - } else if (NULL == target_datatype || + } else if (NULL == target_datatype || MPI_DATATYPE_NULL == target_datatype) { rc = MPI_ERR_TYPE; - } else if ( target_disp < 0 ) { + } else if ( MPI_WIN_FLAVOR_DYNAMIC != win->w_flavor && target_disp < 0 ) { rc = MPI_ERR_DISP; } else { OMPI_CHECK_DATATYPE_FOR_ONE_SIDED(rc, origin_datatype, origin_count); @@ -78,11 +79,8 @@ int MPI_Rput(const void *origin_addr, int origin_count, MPI_Datatype origin_data return MPI_SUCCESS; } - OPAL_CR_ENTER_LIBRARY(); - - /* TODO: do not cast away the const */ - rc = win->w_osc_module->osc_rput((void *) origin_addr, origin_count, origin_datatype, - target_rank, target_disp, target_count, + rc = win->w_osc_module->osc_rput(origin_addr, origin_count, origin_datatype, + target_rank, target_disp, target_count, target_datatype, win, request); OMPI_ERRHANDLER_RETURN(rc, win, rc, FUNC_NAME); } diff --git a/ompi/mpi/c/rsend.c b/ompi/mpi/c/rsend.c index dae3ad9e08f..0e4def37a9d 100644 --- a/ompi/mpi/c/rsend.c +++ b/ompi/mpi/c/rsend.c @@ -13,6 +13,8 @@ * Copyright (c) 2006 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2013 Los Alamos National Security, LLC. All rights * reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -30,12 +32,11 @@ #include "ompi/memchecker.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_Rsend = PMPI_Rsend #endif - -#if OMPI_PROFILING_DEFINES -#include "ompi/mpi/c/profile/defines.h" +#define MPI_Rsend PMPI_Rsend #endif static const char FUNC_NAME[] = "MPI_Rsend"; @@ -75,9 +76,7 @@ int MPI_Rsend(const void *buf, int count, MPI_Datatype type, int dest, int tag, return MPI_SUCCESS; } - OPAL_CR_ENTER_LIBRARY(); - /* XXX -- CONST -- do not cast away const -- update mca/pml */ - rc = MCA_PML_CALL(send((void *) buf, count, type, dest, tag, + rc = MCA_PML_CALL(send(buf, count, type, dest, tag, MCA_PML_BASE_SEND_READY, comm)); OMPI_ERRHANDLER_RETURN(rc, comm, rc, FUNC_NAME); } diff --git a/ompi/mpi/c/rsend_init.c b/ompi/mpi/c/rsend_init.c index 9a87bd3a12e..7aef5f65229 100644 --- a/ompi/mpi/c/rsend_init.c +++ b/ompi/mpi/c/rsend_init.c @@ -3,20 +3,22 @@ * Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. - * Copyright (c) 2004-2005 The University of Tennessee and The University + * Copyright (c) 2004-2016 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2006 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2013 Los Alamos National Security, LLC. All rights * reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ #include "ompi_config.h" @@ -30,12 +32,11 @@ #include "ompi/request/request.h" #include "ompi/memchecker.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_Rsend_init = PMPI_Rsend_init #endif - -#if OMPI_PROFILING_DEFINES -#include "ompi/mpi/c/profile/defines.h" +#define MPI_Rsend_init PMPI_Rsend_init #endif static const char FUNC_NAME[] = "MPI_Rsend_init"; @@ -43,7 +44,7 @@ static const char FUNC_NAME[] = "MPI_Rsend_init"; int MPI_Rsend_init(const void *buf, int count, MPI_Datatype type, int dest, int tag, MPI_Comm comm, - MPI_Request *request) + MPI_Request *request) { int rc; @@ -79,20 +80,17 @@ int MPI_Rsend_init(const void *buf, int count, MPI_Datatype type, ompi_request_t */ (*request)->req_type = OMPI_REQUEST_NOOP; (*request)->req_status = ompi_request_empty.req_status; - (*request)->req_complete = true; + (*request)->req_complete = REQUEST_COMPLETED; (*request)->req_state = OMPI_REQUEST_INACTIVE; (*request)->req_persistent = true; (*request)->req_free = ompi_request_persistent_proc_null_free; return MPI_SUCCESS; } - OPAL_CR_ENTER_LIBRARY(); - /* * Here, we just initialize the request -- memchecker should set the buffer in MPI_Start. */ - /* XXX -- CONST -- do not cast away const -- update mca/pml */ - rc = MCA_PML_CALL(isend_init((void *) buf,count,type,dest,tag, + rc = MCA_PML_CALL(isend_init(buf,count,type,dest,tag, MCA_PML_BASE_SEND_READY,comm,request)); OMPI_ERRHANDLER_RETURN(rc, comm, rc, FUNC_NAME); } diff --git a/ompi/mpi/c/scan.c b/ompi/mpi/c/scan.c index 5d44ea78420..57031b50787 100644 --- a/ompi/mpi/c/scan.c +++ b/ompi/mpi/c/scan.c @@ -6,17 +6,19 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2006 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2013 Los Alamos National Security, LLC. All rights * reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ #include "ompi_config.h" @@ -30,19 +32,18 @@ #include "ompi/op/op.h" #include "ompi/memchecker.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_Scan = PMPI_Scan #endif - -#if OMPI_PROFILING_DEFINES -#include "ompi/mpi/c/profile/defines.h" +#define MPI_Scan PMPI_Scan #endif static const char FUNC_NAME[] = "MPI_Scan"; int MPI_Scan(const void *sendbuf, void *recvbuf, int count, - MPI_Datatype datatype, MPI_Op op, MPI_Comm comm) + MPI_Datatype datatype, MPI_Op op, MPI_Comm comm) { int err; @@ -61,7 +62,7 @@ int MPI_Scan(const void *sendbuf, void *recvbuf, int count, err = MPI_SUCCESS; OMPI_ERR_INIT_FINALIZE(FUNC_NAME); if (ompi_comm_invalid(comm)) { - return OMPI_ERRHANDLER_INVOKE(MPI_COMM_WORLD, MPI_ERR_COMM, + return OMPI_ERRHANDLER_INVOKE(MPI_COMM_WORLD, MPI_ERR_COMM, FUNC_NAME); } @@ -96,13 +97,10 @@ int MPI_Scan(const void *sendbuf, void *recvbuf, int count, return MPI_SUCCESS; } - OPAL_CR_ENTER_LIBRARY(); - /* Call the coll component to actually perform the allgather */ OBJ_RETAIN(op); - /* XXX -- CONST -- do not cast away const -- update mca/coll */ - err = comm->c_coll.coll_scan((void *) sendbuf, recvbuf, count, + err = comm->c_coll.coll_scan(sendbuf, recvbuf, count, datatype, op, comm, comm->c_coll.coll_scan_module); OBJ_RELEASE(op); diff --git a/ompi/mpi/c/scatter.c b/ompi/mpi/c/scatter.c index 56acc7d94f0..652525a45bd 100644 --- a/ompi/mpi/c/scatter.c +++ b/ompi/mpi/c/scatter.c @@ -6,7 +6,7 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. @@ -15,6 +15,8 @@ * Copyright (c) 2008 Sun Microsystems, Inc. All rights reserved. * Copyright (c) 2013 Los Alamos National Security, LLC. All rights * reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -31,12 +33,11 @@ #include "ompi/datatype/ompi_datatype.h" #include "ompi/memchecker.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_Scatter = PMPI_Scatter #endif - -#if OMPI_PROFILING_DEFINES -#include "ompi/mpi/c/profile/defines.h" +#define MPI_Scatter PMPI_Scatter #endif static const char FUNC_NAME[] = "MPI_Scatter"; @@ -157,11 +158,8 @@ int MPI_Scatter(const void *sendbuf, int sendcount, MPI_Datatype sendtype, return MPI_SUCCESS; } - OPAL_CR_ENTER_LIBRARY(); - /* Invoke the coll component to perform the back-end operation */ - /* XXX -- CONST -- do not cast away const -- update mca/coll */ - err = comm->c_coll.coll_scatter((void *) sendbuf, sendcount, sendtype, recvbuf, + err = comm->c_coll.coll_scatter(sendbuf, sendcount, sendtype, recvbuf, recvcount, recvtype, root, comm, comm->c_coll.coll_scatter_module); OMPI_ERRHANDLER_RETURN(err, comm, err, FUNC_NAME); diff --git a/ompi/mpi/c/scatterv.c b/ompi/mpi/c/scatterv.c index 8410359a8f3..44e70002689 100644 --- a/ompi/mpi/c/scatterv.c +++ b/ompi/mpi/c/scatterv.c @@ -13,6 +13,8 @@ * Copyright (c) 2006-2012 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2012 Los Alamos National Security, LLC. All rights * reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -29,12 +31,11 @@ #include "ompi/datatype/ompi_datatype.h" #include "ompi/memchecker.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_Scatterv = PMPI_Scatterv #endif - -#if OMPI_PROFILING_DEFINES -#include "ompi/mpi/c/profile/defines.h" +#define MPI_Scatterv PMPI_Scatterv #endif static const char FUNC_NAME[] = "MPI_Scatterv"; @@ -186,11 +187,8 @@ int MPI_Scatterv(const void *sendbuf, const int sendcounts[], const int displs[] } } - OPAL_CR_ENTER_LIBRARY(); - /* Invoke the coll component to perform the back-end operation */ - /* XXX -- CONST -- do not cast away const -- update mca/coll */ - err = comm->c_coll.coll_scatterv((void *) sendbuf, (int *) sendcounts, (int *) displs, + err = comm->c_coll.coll_scatterv(sendbuf, sendcounts, displs, sendtype, recvbuf, recvcount, recvtype, root, comm, comm->c_coll.coll_scatterv_module); OMPI_ERRHANDLER_RETURN(err, comm, err, FUNC_NAME); diff --git a/ompi/mpi/c/send.c b/ompi/mpi/c/send.c index 73df108e90c..28963cbd30b 100644 --- a/ompi/mpi/c/send.c +++ b/ompi/mpi/c/send.c @@ -6,12 +6,14 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2013 Los Alamos National Security, LLC. All rights * reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -29,12 +31,11 @@ #include "ompi/datatype/ompi_datatype.h" #include "ompi/memchecker.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_Send = PMPI_Send #endif - -#if OMPI_PROFILING_DEFINES -#include "ompi/mpi/c/profile/defines.h" +#define MPI_Send PMPI_Send #endif static const char FUNC_NAME[] = "MPI_Send"; @@ -73,8 +74,6 @@ int MPI_Send(const void *buf, int count, MPI_Datatype type, int dest, return MPI_SUCCESS; } - OPAL_CR_ENTER_LIBRARY(); - /* XXX -- CONST -- do not cast away const -- update mca/pml */ - rc = MCA_PML_CALL(send((void *) buf, count, type, dest, tag, MCA_PML_BASE_SEND_STANDARD, comm)); + rc = MCA_PML_CALL(send(buf, count, type, dest, tag, MCA_PML_BASE_SEND_STANDARD, comm)); OMPI_ERRHANDLER_RETURN(rc, comm, rc, FUNC_NAME); } diff --git a/ompi/mpi/c/send_init.c b/ompi/mpi/c/send_init.c index e732d500ef2..4eb4221d5da 100644 --- a/ompi/mpi/c/send_init.c +++ b/ompi/mpi/c/send_init.c @@ -3,16 +3,18 @@ * Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. - * Copyright (c) 2004-2005 The University of Tennessee and The University + * Copyright (c) 2004-2016 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2006 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2013 Los Alamos National Security, LLC. All rights * reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -30,12 +32,11 @@ #include "ompi/request/request.h" #include "ompi/memchecker.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_Send_init = PMPI_Send_init #endif - -#if OMPI_PROFILING_DEFINES -#include "ompi/mpi/c/profile/defines.h" +#define MPI_Send_init PMPI_Send_init #endif static const char FUNC_NAME[] = "MPI_Send_init"; @@ -79,20 +80,17 @@ int MPI_Send_init(const void *buf, int count, MPI_Datatype type, ompi_request_t */ (*request)->req_type = OMPI_REQUEST_NOOP; (*request)->req_status = ompi_request_empty.req_status; - (*request)->req_complete = true; + (*request)->req_complete = REQUEST_COMPLETED; (*request)->req_state = OMPI_REQUEST_INACTIVE; (*request)->req_persistent = true; (*request)->req_free = ompi_request_persistent_proc_null_free; return MPI_SUCCESS; } - OPAL_CR_ENTER_LIBRARY(); - /* * Here, we just initialize the request -- memchecker should set the buffer in MPI_Start. */ - /* XXX -- CONST -- do not cast away const -- update mca/pml */ - rc = MCA_PML_CALL(isend_init((void *) buf,count,type,dest,tag,MCA_PML_BASE_SEND_STANDARD,comm,request)); + rc = MCA_PML_CALL(isend_init(buf,count,type,dest,tag,MCA_PML_BASE_SEND_STANDARD,comm,request)); OMPI_ERRHANDLER_RETURN(rc, comm, rc, FUNC_NAME); } diff --git a/ompi/mpi/c/sendrecv.c b/ompi/mpi/c/sendrecv.c index 3fa5700abdf..f24e6700cff 100644 --- a/ompi/mpi/c/sendrecv.c +++ b/ompi/mpi/c/sendrecv.c @@ -12,6 +12,8 @@ * All rights reserved. * Copyright (c) 2013 Los Alamos National Security, LLC. All rights * reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -28,12 +30,11 @@ #include "ompi/request/request.h" #include "ompi/memchecker.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_Sendrecv = PMPI_Sendrecv #endif - -#if OMPI_PROFILING_DEFINES -#include "ompi/mpi/c/profile/defines.h" +#define MPI_Sendrecv PMPI_Sendrecv #endif static const char FUNC_NAME[] = "MPI_Sendrecv"; @@ -75,8 +76,6 @@ int MPI_Sendrecv(const void *sendbuf, int sendcount, MPI_Datatype sendtype, OMPI_ERRHANDLER_CHECK(rc, comm, rc, FUNC_NAME); } - OPAL_CR_ENTER_LIBRARY(); - if (source != MPI_PROC_NULL) { /* post recv */ rc = MCA_PML_CALL(irecv(recvbuf, recvcount, recvtype, source, recvtag, comm, &req)); @@ -84,8 +83,7 @@ int MPI_Sendrecv(const void *sendbuf, int sendcount, MPI_Datatype sendtype, } if (dest != MPI_PROC_NULL) { /* send */ - /* XXX -- CONST -- do not cast away const -- update mca/pml */ - rc = MCA_PML_CALL(send((void *) sendbuf, sendcount, sendtype, dest, + rc = MCA_PML_CALL(send(sendbuf, sendcount, sendtype, dest, sendtag, MCA_PML_BASE_SEND_STANDARD, comm)); OMPI_ERRHANDLER_CHECK(rc, comm, rc, FUNC_NAME); } diff --git a/ompi/mpi/c/sendrecv_replace.c b/ompi/mpi/c/sendrecv_replace.c index e1ae56484c7..297a3b9bfb7 100644 --- a/ompi/mpi/c/sendrecv_replace.c +++ b/ompi/mpi/c/sendrecv_replace.c @@ -5,7 +5,7 @@ * Copyright (c) 2004-2010 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. @@ -13,9 +13,9 @@ * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ #include "ompi_config.h" @@ -30,12 +30,11 @@ #include "ompi/proc/proc.h" #include "ompi/memchecker.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_Sendrecv_replace = PMPI_Sendrecv_replace #endif - -#if OMPI_PROFILING_DEFINES -#include "ompi/mpi/c/profile/defines.h" +#define MPI_Sendrecv_replace PMPI_Sendrecv_replace #endif static const char FUNC_NAME[] = "MPI_Sendrecv_replace"; @@ -73,13 +72,10 @@ int MPI_Sendrecv_replace(void * buf, int count, MPI_Datatype datatype, OMPI_ERRHANDLER_CHECK(rc, comm, rc, FUNC_NAME); } - OPAL_CR_ENTER_LIBRARY(); - /* simple case */ if ( source == MPI_PROC_NULL || dest == MPI_PROC_NULL || count == 0 ) { - rc = MPI_Sendrecv(buf,count,datatype,dest,sendtag,buf,count,datatype,source,recvtag,comm,status); + rc = PMPI_Sendrecv(buf,count,datatype,dest,sendtag,buf,count,datatype,source,recvtag,comm,status); - OPAL_CR_EXIT_LIBRARY(); return rc; } else { @@ -103,7 +99,7 @@ int MPI_Sendrecv_replace(void * buf, int count, MPI_Datatype datatype, /* setup a buffer for recv */ opal_convertor_get_packed_size( &convertor, &packed_size ); if( packed_size > sizeof(recv_data) ) { - rc = MPI_Alloc_mem(packed_size, MPI_INFO_NULL, &iov.iov_base); + rc = PMPI_Alloc_mem(packed_size, MPI_INFO_NULL, &iov.iov_base); if(OMPI_SUCCESS != rc) { OMPI_ERRHANDLER_RETURN(OMPI_ERR_OUT_OF_RESOURCE, comm, MPI_ERR_BUFFER, FUNC_NAME); } @@ -112,11 +108,11 @@ int MPI_Sendrecv_replace(void * buf, int count, MPI_Datatype datatype, } /* recv into temporary buffer */ - rc = MPI_Sendrecv( buf, count, datatype, dest, sendtag, iov.iov_base, packed_size, + rc = PMPI_Sendrecv( buf, count, datatype, dest, sendtag, iov.iov_base, packed_size, MPI_BYTE, source, recvtag, comm, &recv_status ); if (rc != MPI_SUCCESS) { if(packed_size > sizeof(recv_data)) - MPI_Free_mem(iov.iov_base); + PMPI_Free_mem(iov.iov_base); OBJ_DESTRUCT(&convertor); OMPI_ERRHANDLER_RETURN(rc, comm, rc, FUNC_NAME); } @@ -134,11 +130,10 @@ int MPI_Sendrecv_replace(void * buf, int count, MPI_Datatype datatype, /* release resources */ if(packed_size > sizeof(recv_data)) { - MPI_Free_mem(iov.iov_base); + PMPI_Free_mem(iov.iov_base); } OBJ_DESTRUCT(&convertor); - OPAL_CR_EXIT_LIBRARY(); return MPI_SUCCESS; } } diff --git a/ompi/mpi/c/ssend.c b/ompi/mpi/c/ssend.c index 700c3ea5ea6..0d0c2922b66 100644 --- a/ompi/mpi/c/ssend.c +++ b/ompi/mpi/c/ssend.c @@ -6,17 +6,19 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2006 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2013 Los Alamos National Security, LLC. All rights * reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ #include "ompi_config.h" @@ -29,12 +31,11 @@ #include "ompi/mca/pml/pml.h" #include "ompi/memchecker.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_Ssend = PMPI_Ssend #endif - -#if OMPI_PROFILING_DEFINES -#include "ompi/mpi/c/profile/defines.h" +#define MPI_Ssend PMPI_Ssend #endif static const char FUNC_NAME[] = "MPI_Ssend"; @@ -74,9 +75,7 @@ int MPI_Ssend(const void *buf, int count, MPI_Datatype type, int dest, int tag, return MPI_SUCCESS; } - OPAL_CR_ENTER_LIBRARY(); - /* XXX -- CONST -- do not cast away const -- update mca/pml */ - rc = MCA_PML_CALL(send((void *) buf, count, type, dest, tag, + rc = MCA_PML_CALL(send(buf, count, type, dest, tag, MCA_PML_BASE_SEND_SYNCHRONOUS, comm)); OMPI_ERRHANDLER_RETURN(rc, comm, rc, FUNC_NAME); } diff --git a/ompi/mpi/c/ssend_init.c b/ompi/mpi/c/ssend_init.c index e3548e0af46..d2cdc05b9be 100644 --- a/ompi/mpi/c/ssend_init.c +++ b/ompi/mpi/c/ssend_init.c @@ -3,7 +3,7 @@ * Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. - * Copyright (c) 2004-2005 The University of Tennessee and The University + * Copyright (c) 2004-2016 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, @@ -13,6 +13,8 @@ * Copyright (c) 2006 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2013 Los Alamos National Security, LLC. All rights * reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -30,12 +32,11 @@ #include "ompi/request/request.h" #include "ompi/memchecker.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_Ssend_init = PMPI_Ssend_init #endif - -#if OMPI_PROFILING_DEFINES -#include "ompi/mpi/c/profile/defines.h" +#define MPI_Ssend_init PMPI_Ssend_init #endif static const char FUNC_NAME[] = "MPI_Ssend_init"; @@ -79,20 +80,17 @@ int MPI_Ssend_init(const void *buf, int count, MPI_Datatype type, ompi_request_t */ (*request)->req_type = OMPI_REQUEST_NOOP; (*request)->req_status = ompi_request_empty.req_status; - (*request)->req_complete = true; + (*request)->req_complete = REQUEST_COMPLETED; (*request)->req_state = OMPI_REQUEST_INACTIVE; (*request)->req_persistent = true; (*request)->req_free = ompi_request_persistent_proc_null_free; return MPI_SUCCESS; } - OPAL_CR_ENTER_LIBRARY(); - /* * Here, we just initialize the request -- memchecker should set the buffer in MPI_Start. */ - /* XXX -- CONST -- do not cast away const -- update mca/pml */ - rc = MCA_PML_CALL(isend_init((void *) buf, count, type, dest, tag, + rc = MCA_PML_CALL(isend_init(buf, count, type, dest, tag, MCA_PML_BASE_SEND_SYNCHRONOUS, comm, request)); OMPI_ERRHANDLER_RETURN(rc, comm, rc, FUNC_NAME); } diff --git a/ompi/mpi/c/start.c b/ompi/mpi/c/start.c index 34040d12ae0..8df3d1044b0 100644 --- a/ompi/mpi/c/start.c +++ b/ompi/mpi/c/start.c @@ -5,15 +5,17 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2006 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ #include "ompi_config.h" @@ -27,18 +29,17 @@ #include "ompi/errhandler/errhandler.h" #include "ompi/memchecker.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_Start = PMPI_Start #endif - -#if OMPI_PROFILING_DEFINES -#include "ompi/mpi/c/profile/defines.h" +#define MPI_Start PMPI_Start #endif static const char FUNC_NAME[] = "MPI_Start"; -int MPI_Start(MPI_Request *request) +int MPI_Start(MPI_Request *request) { int ret = OMPI_SUCCESS; @@ -59,17 +60,15 @@ int MPI_Start(MPI_Request *request) * MPI standard 3.1 page 78 line 19: we must have the following * sequence CREATE (START COMPLETE)* FREE. The upper level is * responsible for handling any concurency. The PML must handle - * this case, as it is the only one knowing if the request can + * this case, as it is the only one knowing if the request can * be reused or not (it is PML completed or not?). */ switch((*request)->req_type) { case OMPI_REQUEST_PML: - OPAL_CR_ENTER_LIBRARY(); ret = MCA_PML_CALL(start(1, request)); - OPAL_CR_EXIT_LIBRARY(); return ret; case OMPI_REQUEST_NOOP: diff --git a/ompi/mpi/c/startall.c b/ompi/mpi/c/startall.c index 7193ae07854..3c714a11e66 100644 --- a/ompi/mpi/c/startall.c +++ b/ompi/mpi/c/startall.c @@ -1,3 +1,4 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ /* * Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana * University Research and Technology @@ -5,19 +6,19 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2006 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2012 Los Alamos National Security, LLC. All rights - * reserved. - * Copyright (c) 2014 Research Organization for Information Science + * Copyright (c) 2012-2016 Los Alamos National Security, LLC. All rights + * reserved. + * Copyright (c) 2014-2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ #include "ompi_config.h" @@ -31,18 +32,17 @@ #include "ompi/request/request.h" #include "ompi/memchecker.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_Startall = PMPI_Startall #endif - -#if OMPI_PROFILING_DEFINES -#include "ompi/mpi/c/profile/defines.h" +#define MPI_Startall PMPI_Startall #endif static const char FUNC_NAME[] = "MPI_Startall"; -int MPI_Startall(int count, MPI_Request requests[]) +int MPI_Startall(int count, MPI_Request requests[]) { int i; int ret = OMPI_SUCCESS; @@ -74,9 +74,13 @@ int MPI_Startall(int count, MPI_Request requests[]) OMPI_ERRHANDLER_CHECK(rc, MPI_COMM_WORLD, rc, FUNC_NAME); } - OPAL_CR_ENTER_LIBRARY(); for (i = 0; i < count; ++i) { + /* Per MPI it is invalid to start an active request */ + if (OMPI_REQUEST_INACTIVE != requests[i]->req_state) { + return OMPI_ERRHANDLER_INVOKE(MPI_COMM_WORLD, MPI_ERR_REQUEST, FUNC_NAME); + } + if (OMPI_REQUEST_NOOP == requests[i]->req_type) { /** * We deal with a MPI_PROC_NULL request. If the request is @@ -84,14 +88,12 @@ int MPI_Startall(int count, MPI_Request requests[]) * Otherwise, mark it active so we can correctly handle it in * the wait*. */ - if( OMPI_REQUEST_INACTIVE == requests[i]->req_state ) { - requests[i]->req_state = OMPI_REQUEST_ACTIVE; - } + requests[i]->req_state = OMPI_REQUEST_ACTIVE; } } + ret = MCA_PML_CALL(start(count, requests)); - OPAL_CR_EXIT_LIBRARY(); return ret; } diff --git a/ompi/mpi/c/status_c2f.c b/ompi/mpi/c/status_c2f.c index ad4799e8965..ba2afbf8342 100644 --- a/ompi/mpi/c/status_c2f.c +++ b/ompi/mpi/c/status_c2f.c @@ -6,13 +6,15 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2010-2012 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2013 Los Alamos National Security, LLC. All rights * reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -30,12 +32,11 @@ #include "ompi/mpi/fortran/base/constants.h" #include "ompi/memchecker.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_Status_c2f = PMPI_Status_c2f #endif - -#if OMPI_PROFILING_DEFINES -#include "ompi/mpi/c/profile/defines.h" +#define MPI_Status_c2f PMPI_Status_c2f #endif static const char FUNC_NAME[] = "MPI_Status_c2f"; @@ -56,8 +57,6 @@ int MPI_Status_c2f(const MPI_Status *c_status, MPI_Fint *f_status) } ); - OPAL_CR_NOOP_PROGRESS(); - if (MPI_PARAM_CHECK) { OMPI_ERR_INIT_FINALIZE(FUNC_NAME); diff --git a/ompi/mpi/c/status_f2c.c b/ompi/mpi/c/status_f2c.c index 8e2a3503f76..05140d96800 100644 --- a/ompi/mpi/c/status_f2c.c +++ b/ompi/mpi/c/status_f2c.c @@ -6,13 +6,15 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2010-2012 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2013 Los Alamos National Security, LLC. All rights * reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -28,12 +30,11 @@ #include "ompi/mpi/fortran/base/fint_2_int.h" #include "ompi/mpi/fortran/base/constants.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_Status_f2c = PMPI_Status_f2c #endif - -#if OMPI_PROFILING_DEFINES -#include "ompi/mpi/c/profile/defines.h" +#define MPI_Status_f2c PMPI_Status_f2c #endif static const char FUNC_NAME[] = "MPI_Status_f2c"; @@ -43,8 +44,6 @@ int MPI_Status_f2c(const MPI_Fint *f_status, MPI_Status *c_status) { int i, *c_ints; - OPAL_CR_NOOP_PROGRESS(); - if (MPI_PARAM_CHECK) { OMPI_ERR_INIT_FINALIZE(FUNC_NAME); diff --git a/ompi/mpi/c/status_set_cancelled.c b/ompi/mpi/c/status_set_cancelled.c index 0f32342c269..5cebb091771 100644 --- a/ompi/mpi/c/status_set_cancelled.c +++ b/ompi/mpi/c/status_set_cancelled.c @@ -5,14 +5,16 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ #include "ompi_config.h" @@ -24,18 +26,17 @@ #include "ompi/errhandler/errhandler.h" #include "ompi/memchecker.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_Status_set_cancelled = PMPI_Status_set_cancelled #endif - -#if OMPI_PROFILING_DEFINES -#include "ompi/mpi/c/profile/defines.h" +#define MPI_Status_set_cancelled PMPI_Status_set_cancelled #endif static const char FUNC_NAME[] = "MPI_Status_set_cancelled"; -int MPI_Status_set_cancelled(MPI_Status *status, int flag) +int MPI_Status_set_cancelled(MPI_Status *status, int flag) { MEMCHECKER( if(status != MPI_STATUSES_IGNORE) { @@ -48,13 +49,11 @@ int MPI_Status_set_cancelled(MPI_Status *status, int flag) } ); - OPAL_CR_NOOP_PROGRESS(); - if (MPI_PARAM_CHECK) { int rc = MPI_SUCCESS; OMPI_ERR_INIT_FINALIZE(FUNC_NAME); if (NULL == status || - MPI_STATUS_IGNORE == status || + MPI_STATUS_IGNORE == status || MPI_STATUSES_IGNORE == status) { rc = MPI_ERR_ARG; } diff --git a/ompi/mpi/c/status_set_elements.c b/ompi/mpi/c/status_set_elements.c index 5bf5399bbe6..97f9fea6bc6 100644 --- a/ompi/mpi/c/status_set_elements.c +++ b/ompi/mpi/c/status_set_elements.c @@ -5,17 +5,19 @@ * Copyright (c) 2004-2010 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2007 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2013 Los Alamos National Security, LLC. All rights * reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -29,12 +31,11 @@ #include "ompi/datatype/ompi_datatype.h" #include "ompi/memchecker.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_Status_set_elements = PMPI_Status_set_elements #endif - -#if OMPI_PROFILING_DEFINES -#include "ompi/mpi/c/profile/defines.h" +#define MPI_Status_set_elements PMPI_Status_set_elements #endif static const char FUNC_NAME[] = "MPI_Status_set_elements"; @@ -56,8 +57,6 @@ int MPI_Status_set_elements(MPI_Status *status, MPI_Datatype datatype, int count } ); - OPAL_CR_NOOP_PROGRESS(); - if (MPI_PARAM_CHECK) { OMPI_ERR_INIT_FINALIZE(FUNC_NAME); if (NULL == datatype || MPI_DATATYPE_NULL == datatype) { diff --git a/ompi/mpi/c/status_set_elements_x.c b/ompi/mpi/c/status_set_elements_x.c index 561f5fef8f4..19477b5b182 100644 --- a/ompi/mpi/c/status_set_elements_x.c +++ b/ompi/mpi/c/status_set_elements_x.c @@ -5,17 +5,19 @@ * Copyright (c) 2004-2010 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2007 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2013 Los Alamos National Security, LLC. All rights * reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -29,12 +31,11 @@ #include "ompi/datatype/ompi_datatype.h" #include "ompi/memchecker.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_Status_set_elements_x = PMPI_Status_set_elements_x #endif - -#if OMPI_PROFILING_DEFINES -#include "ompi/mpi/c/profile/defines.h" +#define MPI_Status_set_elements_x PMPI_Status_set_elements_x #endif static const char FUNC_NAME[] = "MPI_Status_set_elements_x"; @@ -56,8 +57,6 @@ int MPI_Status_set_elements_x(MPI_Status *status, MPI_Datatype datatype, MPI_Cou } ); - OPAL_CR_NOOP_PROGRESS(); - if (MPI_PARAM_CHECK) { OMPI_ERR_INIT_FINALIZE(FUNC_NAME); if (NULL == datatype || MPI_DATATYPE_NULL == datatype) { diff --git a/ompi/mpi/c/test.c b/ompi/mpi/c/test.c index 4a0c4fd4f81..31dd640a8e1 100644 --- a/ompi/mpi/c/test.c +++ b/ompi/mpi/c/test.c @@ -5,15 +5,17 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2006 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ #include "ompi_config.h" @@ -26,18 +28,17 @@ #include "ompi/request/request.h" #include "ompi/memchecker.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_Test = PMPI_Test #endif - -#if OMPI_PROFILING_DEFINES -#include "ompi/mpi/c/profile/defines.h" +#define MPI_Test PMPI_Test #endif static const char FUNC_NAME[] = "MPI_Test"; -int MPI_Test(MPI_Request *request, int *completed, MPI_Status *status) +int MPI_Test(MPI_Request *request, int *completed, MPI_Status *status) { int rc; @@ -56,7 +57,6 @@ int MPI_Test(MPI_Request *request, int *completed, MPI_Status *status) OMPI_ERRHANDLER_CHECK(rc, MPI_COMM_WORLD, rc, FUNC_NAME); } - OPAL_CR_ENTER_LIBRARY(); rc = ompi_request_test(request, completed, status); if (*completed < 0) { @@ -67,7 +67,6 @@ int MPI_Test(MPI_Request *request, int *completed, MPI_Status *status) opal_memchecker_base_mem_undefined(&status->MPI_ERROR, sizeof(int)); ); - OPAL_CR_EXIT_LIBRARY(); if (OMPI_SUCCESS == rc) { return MPI_SUCCESS; diff --git a/ompi/mpi/c/test_cancelled.c b/ompi/mpi/c/test_cancelled.c index b1f3ea08476..e1d8d9b2675 100644 --- a/ompi/mpi/c/test_cancelled.c +++ b/ompi/mpi/c/test_cancelled.c @@ -12,6 +12,8 @@ * All rights reserved. * Copyright (c) 2013 Los Alamos National Security, LLC. All rights * reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -27,12 +29,11 @@ #include "ompi/errhandler/errhandler.h" #include "ompi/memchecker.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_Test_cancelled = PMPI_Test_cancelled #endif - -#if OMPI_PROFILING_DEFINES -#include "ompi/mpi/c/profile/defines.h" +#define MPI_Test_cancelled PMPI_Test_cancelled #endif static const char FUNC_NAME[] = "MPI_Test_cancelled"; @@ -47,8 +48,6 @@ int MPI_Test_cancelled(const MPI_Status *status, int *flag) memchecker_status(status); ); - OPAL_CR_NOOP_PROGRESS(); - if (MPI_PARAM_CHECK) { OMPI_ERR_INIT_FINALIZE(FUNC_NAME); if (NULL == flag || NULL == status) { diff --git a/ompi/mpi/c/testall.c b/ompi/mpi/c/testall.c index a57fe12fc44..9e136f227ba 100644 --- a/ompi/mpi/c/testall.c +++ b/ompi/mpi/c/testall.c @@ -5,18 +5,20 @@ * Copyright (c) 2004-2006 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2006 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2012 Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2012 Los Alamos National Security, LLC. All rights - * reserved. + * reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ #include "ompi_config.h" @@ -29,19 +31,18 @@ #include "ompi/request/request.h" #include "ompi/memchecker.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_Testall = PMPI_Testall #endif - -#if OMPI_PROFILING_DEFINES -#include "ompi/mpi/c/profile/defines.h" +#define MPI_Testall PMPI_Testall #endif static const char FUNC_NAME[] = "MPI_Testall"; int MPI_Testall(int count, MPI_Request requests[], int *flag, - MPI_Status statuses[]) + MPI_Status statuses[]) { MEMCHECKER( int j; @@ -74,20 +75,16 @@ int MPI_Testall(int count, MPI_Request requests[], int *flag, return MPI_SUCCESS; } - OPAL_CR_ENTER_LIBRARY(); - if (OMPI_SUCCESS == ompi_request_test_all(count, requests, flag, + if (OMPI_SUCCESS == ompi_request_test_all(count, requests, flag, statuses)) { - OPAL_CR_EXIT_LIBRARY(); return MPI_SUCCESS; } if (MPI_SUCCESS != ompi_errhandler_request_invoke(count, requests, FUNC_NAME)) { - OPAL_CR_EXIT_LIBRARY(); return MPI_ERR_IN_STATUS; } - OPAL_CR_EXIT_LIBRARY(); return MPI_SUCCESS; } diff --git a/ompi/mpi/c/testany.c b/ompi/mpi/c/testany.c index 3c4f6f7e629..e88f3af1787 100644 --- a/ompi/mpi/c/testany.c +++ b/ompi/mpi/c/testany.c @@ -5,20 +5,20 @@ * Copyright (c) 2004-2006 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2006-2012 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2012 Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2012 Los Alamos National Security, LLC. All rights - * reserved. - * Copyright (c) 2014 Research Organization for Information Science + * reserved. + * Copyright (c) 2014-2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ #include "ompi_config.h" @@ -31,18 +31,17 @@ #include "ompi/request/request.h" #include "ompi/memchecker.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_Testany = PMPI_Testany #endif - -#if OMPI_PROFILING_DEFINES -#include "ompi/mpi/c/profile/defines.h" +#define MPI_Testany PMPI_Testany #endif static const char FUNC_NAME[] = "MPI_Testany"; -int MPI_Testany(int count, MPI_Request requests[], int *indx, int *completed, MPI_Status *status) +int MPI_Testany(int count, MPI_Request requests[], int *indx, int *completed, MPI_Status *status) { MEMCHECKER( int j; @@ -80,14 +79,11 @@ int MPI_Testany(int count, MPI_Request requests[], int *indx, int *completed, MP return MPI_SUCCESS; } - OPAL_CR_ENTER_LIBRARY(); - if (OMPI_SUCCESS == ompi_request_test_any(count, requests, + if (OMPI_SUCCESS == ompi_request_test_any(count, requests, indx, completed, status)) { - OPAL_CR_EXIT_LIBRARY(); return MPI_SUCCESS; } - OPAL_CR_EXIT_LIBRARY(); return ompi_errhandler_request_invoke(count, requests, FUNC_NAME); } diff --git a/ompi/mpi/c/testsome.c b/ompi/mpi/c/testsome.c index 807d13ebb3f..0cebd972c92 100644 --- a/ompi/mpi/c/testsome.c +++ b/ompi/mpi/c/testsome.c @@ -5,18 +5,20 @@ * Copyright (c) 2004-2006 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2006-2012 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2012 Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2012 Los Alamos National Security, LLC. All rights - * reserved. + * reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ #include "ompi_config.h" @@ -29,12 +31,11 @@ #include "ompi/request/request.h" #include "ompi/memchecker.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_Testsome = PMPI_Testsome #endif - -#if OMPI_PROFILING_DEFINES -#include "ompi/mpi/c/profile/defines.h" +#define MPI_Testsome PMPI_Testsome #endif static const char FUNC_NAME[] = "MPI_Testsome"; @@ -76,20 +77,16 @@ int MPI_Testsome(int incount, MPI_Request requests[], return OMPI_SUCCESS; } - OPAL_CR_ENTER_LIBRARY(); - if (OMPI_SUCCESS == ompi_request_test_some(incount, requests, outcount, + if (OMPI_SUCCESS == ompi_request_test_some(incount, requests, outcount, indices, statuses)) { - OPAL_CR_EXIT_LIBRARY(); return MPI_SUCCESS; } if (MPI_SUCCESS != ompi_errhandler_request_invoke(incount, requests, FUNC_NAME)) { - OPAL_CR_EXIT_LIBRARY(); return MPI_ERR_IN_STATUS; } - OPAL_CR_EXIT_LIBRARY(); return MPI_SUCCESS; } diff --git a/ompi/mpi/c/topo_test.c b/ompi/mpi/c/topo_test.c index 48ef3e22810..00d3d1cfcd1 100644 --- a/ompi/mpi/c/topo_test.c +++ b/ompi/mpi/c/topo_test.c @@ -5,15 +5,17 @@ * Copyright (c) 2004-2013 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2012-2013 Inria. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -25,29 +27,26 @@ #include "ompi/communicator/communicator.h" #include "ompi/errhandler/errhandler.h" #include "ompi/memchecker.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_Topo_test = PMPI_Topo_test #endif - -#if OMPI_PROFILING_DEFINES -#include "ompi/mpi/c/profile/defines.h" +#define MPI_Topo_test PMPI_Topo_test #endif static const char FUNC_NAME[] = "MPI_Topo_test"; -int MPI_Topo_test(MPI_Comm comm, int *status) +int MPI_Topo_test(MPI_Comm comm, int *status) { MEMCHECKER( memchecker_comm(comm); ); - OPAL_CR_NOOP_PROGRESS(); - if ( MPI_PARAM_CHECK ) { OMPI_ERR_INIT_FINALIZE(FUNC_NAME); if (ompi_comm_invalid (comm)) { - return OMPI_ERRHANDLER_INVOKE(MPI_COMM_WORLD, MPI_ERR_COMM, + return OMPI_ERRHANDLER_INVOKE(MPI_COMM_WORLD, MPI_ERR_COMM, FUNC_NAME); } else if ( NULL == status ) { return OMPI_ERRHANDLER_INVOKE(comm, MPI_ERR_ARG, FUNC_NAME); diff --git a/ompi/mpi/c/type_c2f.c b/ompi/mpi/c/type_c2f.c index 2c43b372676..deb6d3323d5 100644 --- a/ompi/mpi/c/type_c2f.c +++ b/ompi/mpi/c/type_c2f.c @@ -5,15 +5,17 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2006-2012 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ #include "ompi_config.h" @@ -26,12 +28,11 @@ #include "ompi/datatype/ompi_datatype.h" #include "ompi/memchecker.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_Type_c2f = PMPI_Type_c2f #endif - -#if OMPI_PROFILING_DEFINES -#include "ompi/mpi/c/profile/defines.h" +#define MPI_Type_c2f PMPI_Type_c2f #endif static const char FUNC_NAME[] = "MPI_Type_c2f"; @@ -40,8 +41,6 @@ static const char FUNC_NAME[] = "MPI_Type_c2f"; MPI_Fint MPI_Type_c2f(MPI_Datatype datatype) { - OPAL_CR_NOOP_PROGRESS(); - MEMCHECKER( memchecker_datatype(datatype); ); diff --git a/ompi/mpi/c/type_commit.c b/ompi/mpi/c/type_commit.c index 7c1d9402406..c012d8b9c47 100644 --- a/ompi/mpi/c/type_commit.c +++ b/ompi/mpi/c/type_commit.c @@ -5,15 +5,17 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2006 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -26,12 +28,11 @@ #include "ompi/datatype/ompi_datatype.h" #include "ompi/memchecker.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_Type_commit = PMPI_Type_commit #endif - -#if OMPI_PROFILING_DEFINES -#include "ompi/mpi/c/profile/defines.h" +#define MPI_Type_commit PMPI_Type_commit #endif static const char FUNC_NAME[] = "MPI_Type_commit"; @@ -44,7 +45,7 @@ int MPI_Type_commit(MPI_Datatype *type) MEMCHECKER( memchecker_datatype(*type); ); - + if (MPI_PARAM_CHECK) { OMPI_ERR_INIT_FINALIZE(FUNC_NAME); if (NULL == type || NULL == *type || MPI_DATATYPE_NULL == *type) { @@ -52,8 +53,6 @@ int MPI_Type_commit(MPI_Datatype *type) } } - OPAL_CR_ENTER_LIBRARY(); - rc = ompi_datatype_commit( type ); OMPI_ERRHANDLER_RETURN(rc, MPI_COMM_WORLD, rc, FUNC_NAME ); } diff --git a/ompi/mpi/c/type_contiguous.c b/ompi/mpi/c/type_contiguous.c index 220dcc2e022..29193241106 100644 --- a/ompi/mpi/c/type_contiguous.c +++ b/ompi/mpi/c/type_contiguous.c @@ -13,6 +13,8 @@ * Copyright (c) 2006 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2013 Los Alamos National Security, LLC. All rights * reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -29,12 +31,11 @@ #include "ompi/datatype/ompi_datatype.h" #include "ompi/memchecker.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_Type_contiguous = PMPI_Type_contiguous #endif - -#if OMPI_PROFILING_DEFINES -#include "ompi/mpi/c/profile/defines.h" +#define MPI_Type_contiguous PMPI_Type_contiguous #endif static const char FUNC_NAME[] = "MPI_Type_contiguous"; @@ -60,8 +61,6 @@ int MPI_Type_contiguous(int count, } } - OPAL_CR_ENTER_LIBRARY(); - rc = ompi_datatype_create_contiguous( count, oldtype, newtype ); OMPI_ERRHANDLER_CHECK(rc, MPI_COMM_WORLD, rc, FUNC_NAME ); diff --git a/ompi/mpi/c/type_create_darray.c b/ompi/mpi/c/type_create_darray.c index d8303f0a9d6..6097912f462 100644 --- a/ompi/mpi/c/type_create_darray.c +++ b/ompi/mpi/c/type_create_darray.c @@ -3,7 +3,7 @@ * Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. - * Copyright (c) 2004-2011 The University of Tennessee and The University + * Copyright (c) 2004-2016 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, @@ -13,6 +13,8 @@ * Copyright (c) 2007-2013 Los Alamos National Security, LLC. All rights * reserved. * Copyright (c) 2009 Sun Microsystems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -29,12 +31,11 @@ #include "ompi/datatype/ompi_datatype.h" #include "ompi/memchecker.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_Type_create_darray = PMPI_Type_create_darray #endif - -#if OMPI_PROFILING_DEFINES -#include "ompi/mpi/c/profile/defines.h" +#define MPI_Type_create_darray PMPI_Type_create_darray #endif static const char FUNC_NAME[] = "MPI_Type_create_darray"; @@ -64,7 +65,8 @@ int MPI_Type_create_darray(int size, return OMPI_ERRHANDLER_INVOKE(MPI_COMM_WORLD, MPI_ERR_ARG, FUNC_NAME); } else if( ndims < 0 ) { return OMPI_ERRHANDLER_INVOKE(MPI_COMM_WORLD, MPI_ERR_COUNT, FUNC_NAME); - } else if( (NULL == gsize_array) || (NULL == distrib_array) || (NULL == darg_array) || (NULL == psize_array)) { + } else if( (ndims > 0) && ((NULL == gsize_array) || (NULL == distrib_array) || + (NULL == darg_array) || (NULL == psize_array))) { return OMPI_ERRHANDLER_INVOKE(MPI_COMM_WORLD, MPI_ERR_ARG, FUNC_NAME); } else if (NULL == newtype) { return OMPI_ERRHANDLER_INVOKE(MPI_COMM_WORLD, MPI_ERR_TYPE, FUNC_NAME); @@ -93,8 +95,6 @@ int MPI_Type_create_darray(int size, return OMPI_ERRHANDLER_INVOKE(MPI_COMM_WORLD, MPI_ERR_ARG, FUNC_NAME); } - OPAL_CR_ENTER_LIBRARY(); - rc = ompi_datatype_create_darray( size, rank, ndims, gsize_array, distrib_array, darg_array, psize_array, order, oldtype, newtype ); @@ -106,7 +106,5 @@ int MPI_Type_create_darray(int size, MPI_COMBINER_DARRAY ); } - OPAL_CR_EXIT_LIBRARY(); - OMPI_ERRHANDLER_RETURN(rc, MPI_COMM_WORLD, rc, FUNC_NAME); } diff --git a/ompi/mpi/c/type_create_f90_complex.c b/ompi/mpi/c/type_create_f90_complex.c index bd97f7699a3..cf4b7455d85 100644 --- a/ompi/mpi/c/type_create_f90_complex.c +++ b/ompi/mpi/c/type_create_f90_complex.c @@ -6,7 +6,7 @@ * Copyright (c) 2004-2015 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. @@ -14,10 +14,12 @@ * Copyright (c) 2008 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2013 Los Alamos National Security, LLC. All rights * reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -30,12 +32,11 @@ #include -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_Type_create_f90_complex = PMPI_Type_create_f90_complex #endif - -#if OMPI_PROFILING_DEFINES -#include "ompi/mpi/c/profile/defines.h" +#define MPI_Type_create_f90_complex PMPI_Type_create_f90_complex #endif static const char FUNC_NAME[] = "MPI_Type_create_f90_complex"; @@ -45,8 +46,6 @@ int MPI_Type_create_f90_complex(int p, int r, MPI_Datatype *newtype) { uint64_t key; - OPAL_CR_NOOP_PROGRESS(); - if (MPI_PARAM_CHECK) { OMPI_ERR_INIT_FINALIZE(FUNC_NAME); diff --git a/ompi/mpi/c/type_create_f90_integer.c b/ompi/mpi/c/type_create_f90_integer.c index 23985fbb511..b72ff287bf1 100644 --- a/ompi/mpi/c/type_create_f90_integer.c +++ b/ompi/mpi/c/type_create_f90_integer.c @@ -14,6 +14,8 @@ * Copyright (c) 2008-2012 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2013 Los Alamos National Security, LLC. All rights * reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -28,12 +30,11 @@ #include "ompi/communicator/communicator.h" #include "ompi/errhandler/errhandler.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_Type_create_f90_integer = PMPI_Type_create_f90_integer #endif - -#if OMPI_PROFILING_DEFINES -#include "ompi/mpi/c/profile/defines.h" +#define MPI_Type_create_f90_integer PMPI_Type_create_f90_integer #endif static const char FUNC_NAME[] = "MPI_Type_create_f90_integer"; @@ -42,8 +43,6 @@ static const char FUNC_NAME[] = "MPI_Type_create_f90_integer"; int MPI_Type_create_f90_integer(int r, MPI_Datatype *newtype) { - OPAL_CR_NOOP_PROGRESS(); - if (MPI_PARAM_CHECK) { OMPI_ERR_INIT_FINALIZE(FUNC_NAME); diff --git a/ompi/mpi/c/type_create_f90_real.c b/ompi/mpi/c/type_create_f90_real.c index 03744dc05e8..8da1f15181d 100644 --- a/ompi/mpi/c/type_create_f90_real.c +++ b/ompi/mpi/c/type_create_f90_real.c @@ -6,7 +6,7 @@ * Copyright (c) 2004-2015 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. @@ -14,10 +14,12 @@ * Copyright (c) 2008 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2013 Los Alamos National Security, LLC. All rights * reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -30,12 +32,11 @@ #include -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_Type_create_f90_real = PMPI_Type_create_f90_real #endif - -#if OMPI_PROFILING_DEFINES -#include "ompi/mpi/c/profile/defines.h" +#define MPI_Type_create_f90_real PMPI_Type_create_f90_real #endif static const char FUNC_NAME[] = "MPI_Type_create_f90_real"; @@ -45,8 +46,6 @@ int MPI_Type_create_f90_real(int p, int r, MPI_Datatype *newtype) { uint64_t key; - OPAL_CR_NOOP_PROGRESS(); - if (MPI_PARAM_CHECK) { OMPI_ERR_INIT_FINALIZE(FUNC_NAME); diff --git a/ompi/mpi/c/type_create_hindexed.c b/ompi/mpi/c/type_create_hindexed.c index a9c6e0224da..6008f6be9c2 100644 --- a/ompi/mpi/c/type_create_hindexed.c +++ b/ompi/mpi/c/type_create_hindexed.c @@ -3,7 +3,7 @@ * Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. - * Copyright (c) 2004-2005 The University of Tennessee and The University + * Copyright (c) 2004-2016 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, @@ -12,6 +12,8 @@ * All rights reserved. * Copyright (c) 2013 Los Alamos National Security, LLC. All rights * reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -28,12 +30,11 @@ #include "ompi/datatype/ompi_datatype.h" #include "ompi/memchecker.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_Type_create_hindexed = PMPI_Type_create_hindexed #endif - -#if OMPI_PROFILING_DEFINES -#include "ompi/mpi/c/profile/defines.h" +#define MPI_Type_create_hindexed PMPI_Type_create_hindexed #endif static const char FUNC_NAME[] = "MPI_Type_create_hindexed"; @@ -56,8 +57,8 @@ int MPI_Type_create_hindexed(int count, if( count < 0 ) { return OMPI_ERRHANDLER_INVOKE(MPI_COMM_WORLD, MPI_ERR_COUNT, FUNC_NAME); - } else if (NULL == array_of_blocklengths || - NULL == array_of_displacements) { + } else if ((count > 0) && (NULL == array_of_blocklengths || + NULL == array_of_displacements)) { return OMPI_ERRHANDLER_INVOKE(MPI_COMM_WORLD, MPI_ERR_ARG, FUNC_NAME); } else if (MPI_DATATYPE_NULL == oldtype || NULL == oldtype || @@ -73,7 +74,6 @@ int MPI_Type_create_hindexed(int count, } } - OPAL_CR_ENTER_LIBRARY(); rc = ompi_datatype_create_hindexed( count, array_of_blocklengths, array_of_displacements, oldtype, newtype ); @@ -89,6 +89,5 @@ int MPI_Type_create_hindexed(int count, 1, &oldtype, MPI_COMBINER_HINDEXED ); } - OPAL_CR_EXIT_LIBRARY(); return MPI_SUCCESS; } diff --git a/ompi/mpi/c/type_create_hindexed_block.c b/ompi/mpi/c/type_create_hindexed_block.c index 8a81d8f84e7..5c26c6c1fa8 100644 --- a/ompi/mpi/c/type_create_hindexed_block.c +++ b/ompi/mpi/c/type_create_hindexed_block.c @@ -5,6 +5,8 @@ * reserved. * Copyright (c) 2013 Los Alamos National Security, LLC. All rights * reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -21,12 +23,11 @@ #include "ompi/datatype/ompi_datatype.h" #include "ompi/memchecker.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_Type_create_hindexed_block = PMPI_Type_create_hindexed_block #endif - -#if OMPI_PROFILING_DEFINES -#include "ompi/mpi/c/profile/defines.h" +#define MPI_Type_create_hindexed_block PMPI_Type_create_hindexed_block #endif static const char FUNC_NAME[] = "MPI_Type_create_hindexed_block"; @@ -59,7 +60,6 @@ int MPI_Type_create_hindexed_block(int count, } } - OPAL_CR_ENTER_LIBRARY(); rc = ompi_datatype_create_hindexed_block( count, blocklength, array_of_displacements, oldtype, newtype ); @@ -73,6 +73,5 @@ int MPI_Type_create_hindexed_block(int count, MPI_COMBINER_HINDEXED_BLOCK ); } - OPAL_CR_EXIT_LIBRARY(); return MPI_SUCCESS; } diff --git a/ompi/mpi/c/type_create_hvector.c b/ompi/mpi/c/type_create_hvector.c index 0fdb7703be3..5425cbc27fc 100644 --- a/ompi/mpi/c/type_create_hvector.c +++ b/ompi/mpi/c/type_create_hvector.c @@ -12,6 +12,8 @@ * All rights reserved. * Copyright (c) 2013 Los Alamos National Security, LLC. All rights * reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -28,12 +30,11 @@ #include "ompi/datatype/ompi_datatype.h" #include "ompi/memchecker.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_Type_create_hvector = PMPI_Type_create_hvector #endif - -#if OMPI_PROFILING_DEFINES -#include "ompi/mpi/c/profile/defines.h" +#define MPI_Type_create_hvector PMPI_Type_create_hvector #endif static const char FUNC_NAME[] = "MPI_Type_create_hvector"; @@ -66,7 +67,6 @@ int MPI_Type_create_hvector(int count, } } - OPAL_CR_ENTER_LIBRARY(); rc = ompi_datatype_create_hvector ( count, blocklength, stride, oldtype, newtype ); @@ -79,6 +79,5 @@ int MPI_Type_create_hvector(int count, ompi_datatype_set_args( *newtype, 2, a_i, 1, a_a, 1, &oldtype, MPI_COMBINER_HVECTOR ); } - OPAL_CR_EXIT_LIBRARY(); return MPI_SUCCESS; } diff --git a/ompi/mpi/c/type_create_indexed_block.c b/ompi/mpi/c/type_create_indexed_block.c index a7636795fe6..810556fee81 100644 --- a/ompi/mpi/c/type_create_indexed_block.c +++ b/ompi/mpi/c/type_create_indexed_block.c @@ -12,6 +12,8 @@ * All rights reserved. * Copyright (c) 2013 Los Alamos National Security, LLC. All rights * reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -28,12 +30,11 @@ #include "ompi/datatype/ompi_datatype.h" #include "ompi/memchecker.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_Type_create_indexed_block = PMPI_Type_create_indexed_block #endif - -#if OMPI_PROFILING_DEFINES -#include "ompi/mpi/c/profile/defines.h" +#define MPI_Type_create_indexed_block PMPI_Type_create_indexed_block #endif static const char FUNC_NAME[] = "MPI_Type_create_indexed_block"; @@ -66,7 +67,6 @@ int MPI_Type_create_indexed_block(int count, } } - OPAL_CR_ENTER_LIBRARY(); rc = ompi_datatype_create_indexed_block( count, blocklength, array_of_displacements, oldtype, newtype ); @@ -81,6 +81,5 @@ int MPI_Type_create_indexed_block(int count, MPI_COMBINER_INDEXED_BLOCK ); } - OPAL_CR_EXIT_LIBRARY(); return MPI_SUCCESS; } diff --git a/ompi/mpi/c/type_create_keyval.c b/ompi/mpi/c/type_create_keyval.c index 3d37042885c..eed6392e093 100644 --- a/ompi/mpi/c/type_create_keyval.c +++ b/ompi/mpi/c/type_create_keyval.c @@ -5,15 +5,17 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2007 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -25,12 +27,11 @@ #include "ompi/errhandler/errhandler.h" #include "ompi/attribute/attribute.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_Type_create_keyval = PMPI_Type_create_keyval #endif - -#if OMPI_PROFILING_DEFINES -#include "ompi/mpi/c/profile/defines.h" +#define MPI_Type_create_keyval PMPI_Type_create_keyval #endif static const char FUNC_NAME[] = "MPI_Type_create_keyval"; @@ -50,13 +51,11 @@ int MPI_Type_create_keyval(MPI_Type_copy_attr_function *type_copy_attr_fn, if ((NULL == type_copy_attr_fn) || (NULL == type_delete_attr_fn) || (NULL == type_keyval)) { return OMPI_ERRHANDLER_INVOKE(MPI_COMM_WORLD, - MPI_ERR_ARG, + MPI_ERR_ARG, FUNC_NAME); } } - OPAL_CR_ENTER_LIBRARY(); - copy_fn.attr_datatype_copy_fn = (MPI_Type_internal_copy_attr_function*)type_copy_attr_fn; del_fn.attr_datatype_delete_fn = type_delete_attr_fn; diff --git a/ompi/mpi/c/type_create_resized.c b/ompi/mpi/c/type_create_resized.c index 50eb8d72d04..346bee7f1e9 100644 --- a/ompi/mpi/c/type_create_resized.c +++ b/ompi/mpi/c/type_create_resized.c @@ -5,14 +5,16 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -25,12 +27,11 @@ #include "ompi/datatype/ompi_datatype.h" #include "ompi/memchecker.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_Type_create_resized = PMPI_Type_create_resized #endif - -#if OMPI_PROFILING_DEFINES -#include "ompi/mpi/c/profile/defines.h" +#define MPI_Type_create_resized PMPI_Type_create_resized #endif static const char FUNC_NAME[] = "MPI_Type_create_resized"; @@ -56,7 +57,6 @@ int MPI_Type_create_resized(MPI_Datatype oldtype, } } - OPAL_CR_ENTER_LIBRARY(); rc = ompi_datatype_create_resized( oldtype, lb, extent, newtype ); if( rc != MPI_SUCCESS ) { @@ -71,7 +71,6 @@ int MPI_Type_create_resized(MPI_Datatype oldtype, ompi_datatype_set_args( *newtype, 0, NULL, 2, a_a, 1, &oldtype, MPI_COMBINER_RESIZED ); } - OPAL_CR_EXIT_LIBRARY(); return MPI_SUCCESS; } diff --git a/ompi/mpi/c/type_create_struct.c b/ompi/mpi/c/type_create_struct.c index f9990f9470b..7631061aae7 100644 --- a/ompi/mpi/c/type_create_struct.c +++ b/ompi/mpi/c/type_create_struct.c @@ -12,6 +12,8 @@ * All rights reserved. * Copyright (c) 2013 Los Alamos National Security, LLC. All rights * reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -28,12 +30,11 @@ #include "ompi/datatype/ompi_datatype.h" #include "ompi/memchecker.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_Type_create_struct = PMPI_Type_create_struct #endif - -#if OMPI_PROFILING_DEFINES -#include "ompi/mpi/c/profile/defines.h" +#define MPI_Type_create_struct PMPI_Type_create_struct #endif static const char FUNC_NAME[] = "MPI_Type_create_struct"; @@ -79,7 +80,6 @@ int MPI_Type_create_struct(int count, } } - OPAL_CR_ENTER_LIBRARY(); rc = ompi_datatype_create_struct( count, array_of_blocklengths, array_of_displacements, array_of_types, newtype ); @@ -96,6 +96,5 @@ int MPI_Type_create_struct(int count, count, array_of_types, MPI_COMBINER_STRUCT ); } - OPAL_CR_EXIT_LIBRARY(); return MPI_SUCCESS; } diff --git a/ompi/mpi/c/type_create_subarray.c b/ompi/mpi/c/type_create_subarray.c index 307245c05c7..0c4d631c09f 100644 --- a/ompi/mpi/c/type_create_subarray.c +++ b/ompi/mpi/c/type_create_subarray.c @@ -3,7 +3,7 @@ * Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. - * Copyright (c) 2004-2011 The University of Tennessee and The University + * Copyright (c) 2004-2016 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, @@ -13,6 +13,8 @@ * Copyright (c) 2009 Sun Microsystems, Inc. All rights reserved. * Copyright (c) 2013 Los Alamos National Security, LLC. All rights * reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -29,12 +31,11 @@ #include "ompi/datatype/ompi_datatype.h" #include "ompi/memchecker.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_Type_create_subarray = PMPI_Type_create_subarray #endif - -#if OMPI_PROFILING_DEFINES -#include "ompi/mpi/c/profile/defines.h" +#define MPI_Type_create_subarray PMPI_Type_create_subarray #endif static const char FUNC_NAME[] = "MPI_Type_create_subarray"; @@ -58,7 +59,7 @@ int MPI_Type_create_subarray(int ndims, OMPI_ERR_INIT_FINALIZE(FUNC_NAME); if( ndims < 0 ) { return OMPI_ERRHANDLER_INVOKE(MPI_COMM_WORLD, MPI_ERR_COUNT, FUNC_NAME); - } else if( (NULL == size_array) || (NULL == subsize_array) || (NULL == start_array) ) { + } else if( (ndims > 0) && ((NULL == size_array) || (NULL == subsize_array) || (NULL == start_array)) ) { return OMPI_ERRHANDLER_INVOKE(MPI_COMM_WORLD, MPI_ERR_ARG, FUNC_NAME); } else if( (NULL == oldtype) || (MPI_DATATYPE_NULL == oldtype) || (NULL == newtype) ) { return OMPI_ERRHANDLER_INVOKE(MPI_COMM_WORLD, MPI_ERR_TYPE, FUNC_NAME); @@ -74,7 +75,6 @@ int MPI_Type_create_subarray(int ndims, } } - OPAL_CR_ENTER_LIBRARY(); rc = ompi_datatype_create_subarray( ndims, size_array, subsize_array, start_array, order, oldtype, newtype); @@ -85,7 +85,6 @@ int MPI_Type_create_subarray(int ndims, MPI_COMBINER_SUBARRAY ); } - OPAL_CR_EXIT_LIBRARY(); OMPI_ERRHANDLER_RETURN(rc, MPI_COMM_WORLD, rc, FUNC_NAME); } diff --git a/ompi/mpi/c/type_delete_attr.c b/ompi/mpi/c/type_delete_attr.c index 6013e60c15c..5eb2c0b1eb9 100644 --- a/ompi/mpi/c/type_delete_attr.c +++ b/ompi/mpi/c/type_delete_attr.c @@ -5,14 +5,16 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -26,12 +28,11 @@ #include "ompi/datatype/ompi_datatype.h" #include "ompi/memchecker.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_Type_delete_attr = PMPI_Type_delete_attr #endif - -#if OMPI_PROFILING_DEFINES -#include "ompi/mpi/c/profile/defines.h" +#define MPI_Type_delete_attr PMPI_Type_delete_attr #endif static const char FUNC_NAME[] = "MPI_Type_delete_attr"; @@ -48,16 +49,14 @@ int MPI_Type_delete_attr (MPI_Datatype type, int type_keyval) if (MPI_PARAM_CHECK) { OMPI_ERR_INIT_FINALIZE(FUNC_NAME); if (NULL == type || MPI_DATATYPE_NULL == type) { - return OMPI_ERRHANDLER_INVOKE(MPI_COMM_WORLD, - MPI_ERR_TYPE, + return OMPI_ERRHANDLER_INVOKE(MPI_COMM_WORLD, + MPI_ERR_TYPE, FUNC_NAME); } } - OPAL_CR_ENTER_LIBRARY(); - - ret = ompi_attr_delete(TYPE_ATTR, type, type->d_keyhash, type_keyval, + ret = ompi_attr_delete(TYPE_ATTR, type, type->d_keyhash, type_keyval, false); OMPI_ERRHANDLER_RETURN(ret, MPI_COMM_WORLD, - MPI_ERR_OTHER, FUNC_NAME); + MPI_ERR_OTHER, FUNC_NAME); } diff --git a/ompi/mpi/c/type_dup.c b/ompi/mpi/c/type_dup.c index 70b8a8babee..f84a1bf3dd1 100644 --- a/ompi/mpi/c/type_dup.c +++ b/ompi/mpi/c/type_dup.c @@ -5,14 +5,16 @@ * Copyright (c) 2004-2007 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -26,12 +28,11 @@ #include "ompi/attribute/attribute.h" #include "ompi/memchecker.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_Type_dup = PMPI_Type_dup #endif - -#if OMPI_PROFILING_DEFINES -#include "ompi/mpi/c/profile/defines.h" +#define MPI_Type_dup PMPI_Type_dup #endif static const char FUNC_NAME[] = "MPI_Type_dup"; @@ -53,7 +54,6 @@ int MPI_Type_dup (MPI_Datatype type, } } - OPAL_CR_ENTER_LIBRARY(); if (OMPI_SUCCESS != ompi_datatype_duplicate( type, newtype)) { ompi_datatype_destroy( newtype ); @@ -77,10 +77,9 @@ int MPI_Type_dup (MPI_Datatype type, ompi_datatype_destroy(newtype); OMPI_ERRHANDLER_RETURN( MPI_ERR_INTERN, MPI_COMM_WORLD, MPI_ERR_INTERN, FUNC_NAME ); - } + } } - OPAL_CR_EXIT_LIBRARY(); return MPI_SUCCESS; } diff --git a/ompi/mpi/c/type_extent.c b/ompi/mpi/c/type_extent.c index ea90b6104d5..fdb395256f6 100644 --- a/ompi/mpi/c/type_extent.c +++ b/ompi/mpi/c/type_extent.c @@ -5,14 +5,16 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -25,12 +27,11 @@ #include "ompi/datatype/ompi_datatype.h" #include "ompi/memchecker.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_Type_extent = PMPI_Type_extent #endif - -#if OMPI_PROFILING_DEFINES -#include "ompi/mpi/c/profile/defines.h" +#define MPI_Type_extent PMPI_Type_extent #endif static const char FUNC_NAME[] = "MPI_Type_extent"; @@ -54,8 +55,6 @@ int MPI_Type_extent(MPI_Datatype type, MPI_Aint *extent) } } - OPAL_CR_ENTER_LIBRARY(); - rc = ompi_datatype_get_extent( type, &lb, extent ); OMPI_ERRHANDLER_RETURN(rc, MPI_COMM_WORLD, rc, FUNC_NAME ); } diff --git a/ompi/mpi/c/type_f2c.c b/ompi/mpi/c/type_f2c.c index e4c6723d2b8..2afa2909ddd 100644 --- a/ompi/mpi/c/type_f2c.c +++ b/ompi/mpi/c/type_f2c.c @@ -6,15 +6,17 @@ * Copyright (c) 2004-2007 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2006-2012 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ #include "ompi_config.h" @@ -27,12 +29,11 @@ #include "ompi/datatype/ompi_datatype_internal.h" #include "ompi/memchecker.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_Type_f2c = PMPI_Type_f2c #endif - -#if OMPI_PROFILING_DEFINES -#include "ompi/mpi/c/profile/defines.h" +#define MPI_Type_f2c PMPI_Type_f2c #endif static const char FUNC_NAME[] = "MPI_Type_f2c"; @@ -46,8 +47,6 @@ MPI_Datatype MPI_Type_f2c(MPI_Fint datatype) memchecker_datatype(datatype); ); - OPAL_CR_NOOP_PROGRESS(); - if (MPI_PARAM_CHECK) { OMPI_ERR_INIT_FINALIZE(FUNC_NAME); } @@ -55,8 +54,8 @@ MPI_Datatype MPI_Type_f2c(MPI_Fint datatype) /* Per MPI-2:4.12.4, do not invoke an error handler if we get an invalid fortran handle. If we get an invalid fortran handle, return an invalid C handle. */ - if (datatype_index < 0 || - datatype_index >= + if (datatype_index < 0 || + datatype_index >= opal_pointer_array_get_size(&ompi_datatype_f_to_c_table)) { return NULL; } diff --git a/ompi/mpi/c/type_free.c b/ompi/mpi/c/type_free.c index 2ca780a4fc9..84a6db8639e 100644 --- a/ompi/mpi/c/type_free.c +++ b/ompi/mpi/c/type_free.c @@ -5,15 +5,17 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2009 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -26,12 +28,11 @@ #include "ompi/datatype/ompi_datatype.h" #include "ompi/memchecker.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_Type_free = PMPI_Type_free #endif - -#if OMPI_PROFILING_DEFINES -#include "ompi/mpi/c/profile/defines.h" +#define MPI_Type_free PMPI_Type_free #endif static const char FUNC_NAME[] = "MPI_Type_free"; @@ -44,7 +45,7 @@ int MPI_Type_free(MPI_Datatype *type) MEMCHECKER( memchecker_datatype(*type); ); - + if( MPI_PARAM_CHECK ) { OMPI_ERR_INIT_FINALIZE(FUNC_NAME); if (NULL == type || NULL == *type || MPI_DATATYPE_NULL == *type || @@ -54,7 +55,6 @@ int MPI_Type_free(MPI_Datatype *type) } } - OPAL_CR_ENTER_LIBRARY(); rc = ompi_datatype_destroy( type ); if( rc != MPI_SUCCESS ) { @@ -63,6 +63,5 @@ int MPI_Type_free(MPI_Datatype *type) } *type = MPI_DATATYPE_NULL; - OPAL_CR_EXIT_LIBRARY(); return MPI_SUCCESS; } diff --git a/ompi/mpi/c/type_free_keyval.c b/ompi/mpi/c/type_free_keyval.c index b4714efe375..f5b239e9813 100644 --- a/ompi/mpi/c/type_free_keyval.c +++ b/ompi/mpi/c/type_free_keyval.c @@ -5,14 +5,16 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -24,12 +26,11 @@ #include "ompi/errhandler/errhandler.h" #include "ompi/attribute/attribute.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_Type_free_keyval = PMPI_Type_free_keyval #endif - -#if OMPI_PROFILING_DEFINES -#include "ompi/mpi/c/profile/defines.h" +#define MPI_Type_free_keyval PMPI_Type_free_keyval #endif static const char FUNC_NAME[] = "MPI_Type_free_keyval"; @@ -45,13 +46,11 @@ int MPI_Type_free_keyval(int *type_keyval) OMPI_ERR_INIT_FINALIZE(FUNC_NAME); if (NULL == type_keyval) { return OMPI_ERRHANDLER_INVOKE(MPI_COMM_WORLD, - MPI_ERR_ARG, + MPI_ERR_ARG, FUNC_NAME); } } - OPAL_CR_ENTER_LIBRARY(); - ret = ompi_attr_free_keyval(TYPE_ATTR, type_keyval, 0); OMPI_ERRHANDLER_RETURN(ret, MPI_COMM_WORLD, diff --git a/ompi/mpi/c/type_get_attr.c b/ompi/mpi/c/type_get_attr.c index 0705ae8c632..3a94fb281fd 100644 --- a/ompi/mpi/c/type_get_attr.c +++ b/ompi/mpi/c/type_get_attr.c @@ -5,14 +5,16 @@ * Copyright (c) 2004-2006 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -26,12 +28,11 @@ #include "ompi/datatype/ompi_datatype.h" #include "ompi/memchecker.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_Type_get_attr = PMPI_Type_get_attr #endif - -#if OMPI_PROFILING_DEFINES -#include "ompi/mpi/c/profile/defines.h" +#define MPI_Type_get_attr PMPI_Type_get_attr #endif static const char FUNC_NAME[] = "MPI_Type_get_attr"; @@ -54,23 +55,21 @@ int MPI_Type_get_attr (MPI_Datatype type, return OMPI_ERRHANDLER_INVOKE(MPI_COMM_WORLD, MPI_ERR_TYPE, FUNC_NAME ); } else if ((NULL == attribute_val) || (NULL == flag)) { - return OMPI_ERRHANDLER_INVOKE(MPI_COMM_WORLD, - MPI_ERR_ARG, + return OMPI_ERRHANDLER_INVOKE(MPI_COMM_WORLD, + MPI_ERR_ARG, FUNC_NAME); } else if (MPI_KEYVAL_INVALID == type_keyval) { - return OMPI_ERRHANDLER_INVOKE(MPI_COMM_WORLD, MPI_ERR_KEYVAL, + return OMPI_ERRHANDLER_INVOKE(MPI_COMM_WORLD, MPI_ERR_KEYVAL, FUNC_NAME); } } - OPAL_CR_ENTER_LIBRARY(); - /* This stuff is very confusing. Be sure to see src/attribute/attribute.c for a lengthy comment explaining Open MPI attribute behavior. */ - ret = ompi_attr_get_c(type->d_keyhash, type_keyval, + ret = ompi_attr_get_c(type->d_keyhash, type_keyval, (void**)attribute_val, flag); OMPI_ERRHANDLER_RETURN(ret, MPI_COMM_WORLD, - MPI_ERR_OTHER, FUNC_NAME); + MPI_ERR_OTHER, FUNC_NAME); } diff --git a/ompi/mpi/c/type_get_contents.c b/ompi/mpi/c/type_get_contents.c index 966e0ab1ae2..b17753e476e 100644 --- a/ompi/mpi/c/type_get_contents.c +++ b/ompi/mpi/c/type_get_contents.c @@ -5,14 +5,16 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -25,12 +27,11 @@ #include "ompi/datatype/ompi_datatype.h" #include "ompi/memchecker.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_Type_get_contents = PMPI_Type_get_contents #endif - -#if OMPI_PROFILING_DEFINES -#include "ompi/mpi/c/profile/defines.h" +#define MPI_Type_get_contents PMPI_Type_get_contents #endif static const char FUNC_NAME[] = "MPI_Type_get_contents"; @@ -64,7 +65,6 @@ int MPI_Type_get_contents(MPI_Datatype mtype, } } - OPAL_CR_ENTER_LIBRARY(); rc = ompi_datatype_get_args( mtype, 1, &max_integers, array_of_integers, &max_addresses, array_of_addresses, @@ -89,6 +89,5 @@ int MPI_Type_get_contents(MPI_Datatype mtype, } } - OPAL_CR_EXIT_LIBRARY(); return MPI_SUCCESS; } diff --git a/ompi/mpi/c/type_get_envelope.c b/ompi/mpi/c/type_get_envelope.c index d32a08adc7b..b25ba0ce51a 100644 --- a/ompi/mpi/c/type_get_envelope.c +++ b/ompi/mpi/c/type_get_envelope.c @@ -5,14 +5,16 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -25,12 +27,11 @@ #include "ompi/datatype/ompi_datatype.h" #include "ompi/memchecker.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_Type_get_envelope = PMPI_Type_get_envelope #endif - -#if OMPI_PROFILING_DEFINES -#include "ompi/mpi/c/profile/defines.h" +#define MPI_Type_get_envelope PMPI_Type_get_envelope #endif static const char FUNC_NAME[] = "MPI_Type_get_envelope"; @@ -60,9 +61,7 @@ int MPI_Type_get_envelope(MPI_Datatype type, } } - OPAL_CR_ENTER_LIBRARY(); - - rc = ompi_datatype_get_args( type, 0, num_integers, NULL, num_addresses, NULL, + rc = ompi_datatype_get_args( type, 0, num_integers, NULL, num_addresses, NULL, num_datatypes, NULL, combiner ); OMPI_ERRHANDLER_RETURN( rc, MPI_COMM_WORLD, rc, FUNC_NAME ); } diff --git a/ompi/mpi/c/type_get_extent.c b/ompi/mpi/c/type_get_extent.c index c1dd096bfb6..bfbdec38b32 100644 --- a/ompi/mpi/c/type_get_extent.c +++ b/ompi/mpi/c/type_get_extent.c @@ -5,16 +5,18 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2013 Los Alamos National Security, LLC. All rights * reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -27,12 +29,11 @@ #include "ompi/datatype/ompi_datatype.h" #include "ompi/memchecker.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_Type_get_extent = PMPI_Type_get_extent #endif - -#if OMPI_PROFILING_DEFINES -#include "ompi/mpi/c/profile/defines.h" +#define MPI_Type_get_extent PMPI_Type_get_extent #endif static const char FUNC_NAME[] = "MPI_Type_get_extent"; @@ -44,7 +45,7 @@ int MPI_Type_get_extent(MPI_Datatype type, MPI_Aint *lb, MPI_Aint *extent) MEMCHECKER( memchecker_datatype(type); ); - + if (MPI_PARAM_CHECK) { OMPI_ERR_INIT_FINALIZE(FUNC_NAME); if (NULL == type || MPI_DATATYPE_NULL == type) { @@ -54,8 +55,6 @@ int MPI_Type_get_extent(MPI_Datatype type, MPI_Aint *lb, MPI_Aint *extent) } } - OPAL_CR_ENTER_LIBRARY(); - rc = ompi_datatype_get_extent( type, lb, extent ); OMPI_ERRHANDLER_RETURN(rc, MPI_COMM_WORLD, rc, FUNC_NAME ); } diff --git a/ompi/mpi/c/type_get_extent_x.c b/ompi/mpi/c/type_get_extent_x.c index 233eaad0d2b..caeddf526c0 100644 --- a/ompi/mpi/c/type_get_extent_x.c +++ b/ompi/mpi/c/type_get_extent_x.c @@ -5,16 +5,18 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2013 Los Alamos National Security, LLC. All rights * reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -27,12 +29,11 @@ #include "ompi/datatype/ompi_datatype.h" #include "ompi/memchecker.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_Type_get_extent_x = PMPI_Type_get_extent_x #endif - -#if OMPI_PROFILING_DEFINES -#include "ompi/mpi/c/profile/defines.h" +#define MPI_Type_get_extent_x PMPI_Type_get_extent_x #endif static const char FUNC_NAME[] = "MPI_Type_get_extent_x"; @@ -45,7 +46,7 @@ int MPI_Type_get_extent_x(MPI_Datatype type, MPI_Count *lb, MPI_Count *extent) MEMCHECKER( memchecker_datatype(type); ); - + if (MPI_PARAM_CHECK) { OMPI_ERR_INIT_FINALIZE(FUNC_NAME); if (NULL == type || MPI_DATATYPE_NULL == type) { @@ -55,8 +56,6 @@ int MPI_Type_get_extent_x(MPI_Datatype type, MPI_Count *lb, MPI_Count *extent) } } - OPAL_CR_ENTER_LIBRARY(); - rc = ompi_datatype_get_extent( type, &alb, &aextent ); if (OMPI_SUCCESS == rc) { *lb = (MPI_Count) alb; diff --git a/ompi/mpi/c/type_get_name.c b/ompi/mpi/c/type_get_name.c index 037931c2586..b1cdbacd449 100644 --- a/ompi/mpi/c/type_get_name.c +++ b/ompi/mpi/c/type_get_name.c @@ -5,15 +5,17 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2008 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -28,12 +30,11 @@ #include "ompi/datatype/ompi_datatype.h" #include "ompi/memchecker.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_Type_get_name = PMPI_Type_get_name #endif - -#if OMPI_PROFILING_DEFINES -#include "ompi/mpi/c/profile/defines.h" +#define MPI_Type_get_name PMPI_Type_get_name #endif static const char FUNC_NAME[] = "MPI_Type_get_name"; @@ -46,8 +47,6 @@ int MPI_Type_get_name(MPI_Datatype type, char *type_name, int *resultlen) memchecker_datatype(type); ); - OPAL_CR_NOOP_PROGRESS(); - if ( MPI_PARAM_CHECK ) { OMPI_ERR_INIT_FINALIZE(FUNC_NAME); if (NULL == type || MPI_DATATYPE_NULL == type) { diff --git a/ompi/mpi/c/type_get_true_extent.c b/ompi/mpi/c/type_get_true_extent.c index f0877ba451a..287d4b3a1ff 100644 --- a/ompi/mpi/c/type_get_true_extent.c +++ b/ompi/mpi/c/type_get_true_extent.c @@ -5,16 +5,18 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2013 Los Alamos National Security, LLC. All rights * reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -27,12 +29,11 @@ #include "ompi/datatype/ompi_datatype.h" #include "ompi/memchecker.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_Type_get_true_extent = PMPI_Type_get_true_extent #endif - -#if OMPI_PROFILING_DEFINES -#include "ompi/mpi/c/profile/defines.h" +#define MPI_Type_get_true_extent PMPI_Type_get_true_extent #endif static const char FUNC_NAME[] = "MPI_Type_get_true_extent"; @@ -46,7 +47,7 @@ int MPI_Type_get_true_extent(MPI_Datatype datatype, MEMCHECKER( memchecker_datatype(datatype); ); - + if( MPI_PARAM_CHECK ) { OMPI_ERR_INIT_FINALIZE(FUNC_NAME); if (NULL == datatype || MPI_DATATYPE_NULL == datatype) { @@ -58,8 +59,6 @@ int MPI_Type_get_true_extent(MPI_Datatype datatype, } } - OPAL_CR_ENTER_LIBRARY(); - rc = ompi_datatype_get_true_extent( datatype, true_lb, true_extent ); OMPI_ERRHANDLER_RETURN(rc, MPI_COMM_WORLD, rc, FUNC_NAME ); } diff --git a/ompi/mpi/c/type_get_true_extent_x.c b/ompi/mpi/c/type_get_true_extent_x.c index 58e81c62047..dcb8fabdc70 100644 --- a/ompi/mpi/c/type_get_true_extent_x.c +++ b/ompi/mpi/c/type_get_true_extent_x.c @@ -5,16 +5,18 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2013 Los Alamos National Security, LLC. All rights * reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -27,12 +29,11 @@ #include "ompi/datatype/ompi_datatype.h" #include "ompi/memchecker.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_Type_get_true_extent_x = PMPI_Type_get_true_extent_x #endif - -#if OMPI_PROFILING_DEFINES -#include "ompi/mpi/c/profile/defines.h" +#define MPI_Type_get_true_extent_x PMPI_Type_get_true_extent_x #endif static const char FUNC_NAME[] = "MPI_Type_get_true_extent_x"; @@ -47,7 +48,7 @@ int MPI_Type_get_true_extent_x(MPI_Datatype datatype, MEMCHECKER( memchecker_datatype(datatype); ); - + if( MPI_PARAM_CHECK ) { OMPI_ERR_INIT_FINALIZE(FUNC_NAME); if (NULL == datatype || MPI_DATATYPE_NULL == datatype) { @@ -59,8 +60,6 @@ int MPI_Type_get_true_extent_x(MPI_Datatype datatype, } } - OPAL_CR_ENTER_LIBRARY(); - rc = ompi_datatype_get_true_extent( datatype, &atrue_lb, &atrue_extent ); if (OMPI_SUCCESS == rc) { *true_lb = (MPI_Count) atrue_lb; diff --git a/ompi/mpi/c/type_hindexed.c b/ompi/mpi/c/type_hindexed.c index 22402bf4d9c..89d3b46bdd3 100644 --- a/ompi/mpi/c/type_hindexed.c +++ b/ompi/mpi/c/type_hindexed.c @@ -2,17 +2,19 @@ * Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. - * Copyright (c) 2004-2005 The University of Tennessee and The University + * Copyright (c) 2004-2016 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -24,12 +26,11 @@ #include "ompi/errhandler/errhandler.h" #include "ompi/memchecker.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_Type_hindexed = PMPI_Type_hindexed #endif - -#if OMPI_PROFILING_DEFINES -#include "ompi/mpi/c/profile/defines.h" +#define MPI_Type_hindexed PMPI_Type_hindexed #endif static const char FUNC_NAME[] = "MPI_Type_hindexed"; @@ -56,8 +57,8 @@ int MPI_Type_hindexed(int count, } else if (count < 0) { return OMPI_ERRHANDLER_INVOKE(MPI_COMM_WORLD, MPI_ERR_COUNT, FUNC_NAME ); - } else if (NULL == array_of_blocklengths || - NULL == array_of_displacements) { + } else if ((count > 0) && (NULL == array_of_blocklengths || + NULL == array_of_displacements) ) { return OMPI_ERRHANDLER_INVOKE(MPI_COMM_WORLD, MPI_ERR_ARG, FUNC_NAME ); } @@ -69,7 +70,7 @@ int MPI_Type_hindexed(int count, } } - return MPI_Type_create_hindexed(count, + return PMPI_Type_create_hindexed(count, array_of_blocklengths, array_of_displacements, oldtype, diff --git a/ompi/mpi/c/type_hvector.c b/ompi/mpi/c/type_hvector.c index 7447dbe6c7f..2c1517b565a 100644 --- a/ompi/mpi/c/type_hvector.c +++ b/ompi/mpi/c/type_hvector.c @@ -5,14 +5,16 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -24,12 +26,11 @@ #include "ompi/errhandler/errhandler.h" #include "ompi/memchecker.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_Type_hvector = PMPI_Type_hvector #endif - -#if OMPI_PROFILING_DEFINES -#include "ompi/mpi/c/profile/defines.h" +#define MPI_Type_hvector PMPI_Type_hvector #endif static const char FUNC_NAME[] = "MPI_Type_hvector"; @@ -60,7 +61,7 @@ int MPI_Type_hvector(int count, } } - return MPI_Type_create_hvector(count, + return PMPI_Type_create_hvector(count, blocklength, stride, oldtype, diff --git a/ompi/mpi/c/type_indexed.c b/ompi/mpi/c/type_indexed.c index 935b00f7125..1ac21623698 100644 --- a/ompi/mpi/c/type_indexed.c +++ b/ompi/mpi/c/type_indexed.c @@ -3,7 +3,7 @@ * Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. - * Copyright (c) 2004-2005 The University of Tennessee and The University + * Copyright (c) 2004-2016 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, @@ -12,6 +12,8 @@ * All rights reserved. * Copyright (c) 2012-2013 Los Alamos National Security, LLC. All rights * reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -28,12 +30,11 @@ #include "ompi/datatype/ompi_datatype.h" #include "ompi/memchecker.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_Type_indexed = PMPI_Type_indexed #endif - -#if OMPI_PROFILING_DEFINES -#include "ompi/mpi/c/profile/defines.h" +#define MPI_Type_indexed PMPI_Type_indexed #endif static const char FUNC_NAME[] = "MPI_Type_indexed"; @@ -59,8 +60,8 @@ int MPI_Type_indexed(int count, } else if( count < 0 ) { return OMPI_ERRHANDLER_INVOKE(MPI_COMM_WORLD, MPI_ERR_COUNT, FUNC_NAME); - } else if (NULL == array_of_blocklengths || - NULL == array_of_displacements) { + } else if ((count > 0) && (NULL == array_of_blocklengths || + NULL == array_of_displacements)) { return OMPI_ERRHANDLER_INVOKE(MPI_COMM_WORLD, MPI_ERR_ARG, FUNC_NAME); } @@ -72,7 +73,6 @@ int MPI_Type_indexed(int count, } } - OPAL_CR_ENTER_LIBRARY(); rc = ompi_datatype_create_indexed ( count, array_of_blocklengths, array_of_displacements, @@ -90,7 +90,6 @@ int MPI_Type_indexed(int count, MPI_COMBINER_INDEXED ); } - OPAL_CR_EXIT_LIBRARY(); return MPI_SUCCESS; } diff --git a/ompi/mpi/c/type_lb.c b/ompi/mpi/c/type_lb.c index 90e55ddf6c4..ab57ad8c646 100644 --- a/ompi/mpi/c/type_lb.c +++ b/ompi/mpi/c/type_lb.c @@ -5,14 +5,16 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -25,12 +27,11 @@ #include "ompi/datatype/ompi_datatype.h" #include "ompi/memchecker.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_Type_lb = PMPI_Type_lb #endif - -#if OMPI_PROFILING_DEFINES -#include "ompi/mpi/c/profile/defines.h" +#define MPI_Type_lb PMPI_Type_lb #endif static const char FUNC_NAME[] = "MPI_Type_lb"; @@ -54,8 +55,6 @@ int MPI_Type_lb(MPI_Datatype type, MPI_Aint *lb) } } - OPAL_CR_ENTER_LIBRARY(); - rc = ompi_datatype_get_extent( type, lb, &extent ); OMPI_ERRHANDLER_RETURN(rc, MPI_COMM_WORLD, rc, FUNC_NAME ); } diff --git a/ompi/mpi/c/type_match_size.c b/ompi/mpi/c/type_match_size.c index e9bcbee6279..32a2a30c73e 100644 --- a/ompi/mpi/c/type_match_size.c +++ b/ompi/mpi/c/type_match_size.c @@ -5,15 +5,17 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2009 Sun Microsystems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -26,12 +28,11 @@ #include "ompi/datatype/ompi_datatype.h" #include "ompi/datatype/ompi_datatype_internal.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_Type_match_size = PMPI_Type_match_size #endif - -#if OMPI_PROFILING_DEFINES -#include "ompi/mpi/c/profile/defines.h" +#define MPI_Type_match_size PMPI_Type_match_size #endif static const char FUNC_NAME[] = "MPI_Type_match_size"; @@ -43,7 +44,6 @@ int MPI_Type_match_size(int typeclass, int size, MPI_Datatype *type) OMPI_ERR_INIT_FINALIZE(FUNC_NAME); } - OPAL_CR_ENTER_LIBRARY(); switch( typeclass ) { case MPI_TYPECLASS_REAL: @@ -59,7 +59,6 @@ int MPI_Type_match_size(int typeclass, int size, MPI_Datatype *type) *type = &ompi_mpi_datatype_null.dt; } - OPAL_CR_EXIT_LIBRARY(); if( *type != &ompi_mpi_datatype_null.dt ) { return MPI_SUCCESS; } diff --git a/ompi/mpi/c/type_set_attr.c b/ompi/mpi/c/type_set_attr.c index b24893fcb92..970397b0113 100644 --- a/ompi/mpi/c/type_set_attr.c +++ b/ompi/mpi/c/type_set_attr.c @@ -2,17 +2,19 @@ * Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. - * Copyright (c) 2004-2005 The University of Tennessee and The University + * Copyright (c) 2004-2016 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -26,12 +28,11 @@ #include "ompi/datatype/ompi_datatype.h" #include "ompi/memchecker.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_Type_set_attr = PMPI_Type_set_attr #endif - -#if OMPI_PROFILING_DEFINES -#include "ompi/mpi/c/profile/defines.h" +#define MPI_Type_set_attr PMPI_Type_set_attr #endif static const char FUNC_NAME[] = "MPI_Type_set_attr"; @@ -46,22 +47,18 @@ int MPI_Type_set_attr (MPI_Datatype type, MEMCHECKER( memchecker_datatype(type); ); - - if (MPI_PARAM_CHECK) { - OMPI_ERR_INIT_FINALIZE(FUNC_NAME); - if (NULL == type || MPI_DATATYPE_NULL == type) { - return OMPI_ERRHANDLER_INVOKE(MPI_COMM_WORLD, MPI_ERR_TYPE, FUNC_NAME); - } else if (NULL == attribute_val) { - return OMPI_ERRHANDLER_INVOKE(MPI_COMM_WORLD, MPI_ERR_ARG, FUNC_NAME); - } - } - OPAL_CR_ENTER_LIBRARY(); + if (MPI_PARAM_CHECK) { + OMPI_ERR_INIT_FINALIZE(FUNC_NAME); + if (NULL == type || MPI_DATATYPE_NULL == type) { + return OMPI_ERRHANDLER_INVOKE(MPI_COMM_WORLD, MPI_ERR_TYPE, FUNC_NAME); + } + } - ret = ompi_attr_set_c(TYPE_ATTR, type, &type->d_keyhash, + ret = ompi_attr_set_c(TYPE_ATTR, type, &type->d_keyhash, type_keyval, attribute_val, false); OMPI_ERRHANDLER_RETURN(ret, MPI_COMM_WORLD, - MPI_ERR_OTHER, FUNC_NAME); + MPI_ERR_OTHER, FUNC_NAME); } diff --git a/ompi/mpi/c/type_set_name.c b/ompi/mpi/c/type_set_name.c index b6d8c969f4b..fd8338b832a 100644 --- a/ompi/mpi/c/type_set_name.c +++ b/ompi/mpi/c/type_set_name.c @@ -12,6 +12,8 @@ * All rights reserved. * Copyright (c) 2013 Los Alamos National Security, LLC. All rights * reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -30,12 +32,11 @@ #include "ompi/datatype/ompi_datatype.h" #include "ompi/memchecker.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_Type_set_name = PMPI_Type_set_name #endif - -#if OMPI_PROFILING_DEFINES -#include "ompi/mpi/c/profile/defines.h" +#define MPI_Type_set_name PMPI_Type_set_name #endif static const char FUNC_NAME[] = "MPI_Type_set_name"; @@ -49,8 +50,6 @@ int MPI_Type_set_name (MPI_Datatype type, const char *type_name) memchecker_datatype(type); ); - OPAL_CR_NOOP_PROGRESS(); - if (MPI_PARAM_CHECK) { OMPI_ERR_INIT_FINALIZE(FUNC_NAME); if (NULL == type || MPI_DATATYPE_NULL == type) { diff --git a/ompi/mpi/c/type_size.c b/ompi/mpi/c/type_size.c index 8a9b19eee13..a14db5a56fc 100644 --- a/ompi/mpi/c/type_size.c +++ b/ompi/mpi/c/type_size.c @@ -6,17 +6,19 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2009 Oak Ridge National Labs. All rights reserved. * Copyright (c) 2013 Los Alamos National Security, LLC. All rights * reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -29,12 +31,11 @@ #include "ompi/datatype/ompi_datatype.h" #include "ompi/memchecker.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_Type_size = PMPI_Type_size #endif - -#if OMPI_PROFILING_DEFINES -#include "ompi/mpi/c/profile/defines.h" +#define MPI_Type_size PMPI_Type_size #endif static const char FUNC_NAME[] = "MPI_Type_size"; @@ -46,8 +47,6 @@ int MPI_Type_size(MPI_Datatype type, int *size) memchecker_datatype(type); ); - OPAL_CR_NOOP_PROGRESS(); - if (MPI_PARAM_CHECK) { OMPI_ERR_INIT_FINALIZE(FUNC_NAME); if (NULL == type || MPI_DATATYPE_NULL == type) { diff --git a/ompi/mpi/c/type_size_x.c b/ompi/mpi/c/type_size_x.c index 5275d53a038..ad0935c574b 100644 --- a/ompi/mpi/c/type_size_x.c +++ b/ompi/mpi/c/type_size_x.c @@ -6,17 +6,19 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2009 Oak Ridge National Labs. All rights reserved. * Copyright (c) 2013 Los Alamos National Security, LLC. All rights * reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -29,12 +31,11 @@ #include "ompi/datatype/ompi_datatype.h" #include "ompi/memchecker.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_Type_size_x = PMPI_Type_size_x #endif - -#if OMPI_PROFILING_DEFINES -#include "ompi/mpi/c/profile/defines.h" +#define MPI_Type_size_x PMPI_Type_size_x #endif static const char FUNC_NAME[] = "MPI_Type_size_x"; @@ -46,8 +47,6 @@ int MPI_Type_size_x(MPI_Datatype type, MPI_Count *size) memchecker_datatype(type); ); - OPAL_CR_NOOP_PROGRESS(); - if (MPI_PARAM_CHECK) { OMPI_ERR_INIT_FINALIZE(FUNC_NAME); if (NULL == type || MPI_DATATYPE_NULL == type) { diff --git a/ompi/mpi/c/type_struct.c b/ompi/mpi/c/type_struct.c index d9e196f36e1..575e26453f7 100644 --- a/ompi/mpi/c/type_struct.c +++ b/ompi/mpi/c/type_struct.c @@ -5,14 +5,16 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -20,15 +22,13 @@ #include "ompi/mpi/c/bindings.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_Type_struct = PMPI_Type_struct #endif - -#if OMPI_PROFILING_DEFINES -#include "ompi/mpi/c/profile/defines.h" +#define MPI_Type_struct PMPI_Type_struct #endif - int MPI_Type_struct(int count, int array_of_blocklengths[], MPI_Aint array_of_displacements[], @@ -36,7 +36,7 @@ int MPI_Type_struct(int count, MPI_Datatype *newtype) { /* the param check will be done if necessary on the MPI_Type_create_struct */ - return MPI_Type_create_struct(count, + return PMPI_Type_create_struct(count, array_of_blocklengths, array_of_displacements, array_of_types, diff --git a/ompi/mpi/c/type_ub.c b/ompi/mpi/c/type_ub.c index a9d2cc823c0..c5015037975 100644 --- a/ompi/mpi/c/type_ub.c +++ b/ompi/mpi/c/type_ub.c @@ -5,14 +5,16 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -25,12 +27,11 @@ #include "ompi/datatype/ompi_datatype.h" #include "ompi/memchecker.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_Type_ub = PMPI_Type_ub #endif - -#if OMPI_PROFILING_DEFINES -#include "ompi/mpi/c/profile/defines.h" +#define MPI_Type_ub PMPI_Type_ub #endif static const char FUNC_NAME[] = "MPI_Type_ub"; @@ -55,8 +56,6 @@ int MPI_Type_ub(MPI_Datatype mtype, MPI_Aint *ub) } } - OPAL_CR_ENTER_LIBRARY(); - status = ompi_datatype_get_extent( mtype, &lb, &extent ); if (MPI_SUCCESS == status) { *ub = lb + extent; diff --git a/ompi/mpi/c/type_vector.c b/ompi/mpi/c/type_vector.c index 0b6a6458598..bbd68b2c835 100644 --- a/ompi/mpi/c/type_vector.c +++ b/ompi/mpi/c/type_vector.c @@ -12,6 +12,8 @@ * All rights reserved. * Copyright (c) 2013 Los Alamos National Security, LLC. All rights * reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -28,12 +30,11 @@ #include "ompi/datatype/ompi_datatype.h" #include "ompi/memchecker.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_Type_vector = PMPI_Type_vector #endif - -#if OMPI_PROFILING_DEFINES -#include "ompi/mpi/c/profile/defines.h" +#define MPI_Type_vector PMPI_Type_vector #endif static const char FUNC_NAME[] = "MPI_Type_vector"; @@ -65,7 +66,6 @@ int MPI_Type_vector(int count, } } - OPAL_CR_ENTER_LIBRARY(); rc = ompi_datatype_create_vector ( count, blocklength, stride, oldtype, newtype ); OMPI_ERRHANDLER_CHECK(rc, MPI_COMM_WORLD, rc, FUNC_NAME ); @@ -76,6 +76,5 @@ int MPI_Type_vector(int count, ompi_datatype_set_args( *newtype, 3, a_i, 0, NULL, 1, &oldtype, MPI_COMBINER_VECTOR ); } - OPAL_CR_EXIT_LIBRARY(); return MPI_SUCCESS; } diff --git a/ompi/mpi/c/unpack.c b/ompi/mpi/c/unpack.c index 9798236edeb..4d33a38e268 100644 --- a/ompi/mpi/c/unpack.c +++ b/ompi/mpi/c/unpack.c @@ -2,18 +2,20 @@ * Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. - * Copyright (c) 2004-2005 The University of Tennessee and The University + * Copyright (c) 2004-2016 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2006-2013 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015-2017 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ #include "ompi_config.h" @@ -27,12 +29,11 @@ #include "opal/datatype/opal_convertor.h" #include "ompi/memchecker.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_Unpack = PMPI_Unpack #endif - -#if OMPI_PROFILING_DEFINES -#include "ompi/mpi/c/profile/defines.h" +#define MPI_Unpack PMPI_Unpack #endif static const char FUNC_NAME[] = "MPI_Unpack"; @@ -40,9 +41,9 @@ static const char FUNC_NAME[] = "MPI_Unpack"; int MPI_Unpack(const void *inbuf, int insize, int *position, void *outbuf, int outcount, MPI_Datatype datatype, - MPI_Comm comm) + MPI_Comm comm) { - int rc = 1; + int rc = MPI_SUCCESS; opal_convertor_t local_convertor; struct iovec outvec; unsigned int iov_count; @@ -60,52 +61,51 @@ int MPI_Unpack(const void *inbuf, int insize, int *position, return OMPI_ERRHANDLER_INVOKE(MPI_COMM_WORLD, MPI_ERR_COMM, FUNC_NAME); } - + if ((NULL == inbuf) || (NULL == position)) { /* outbuf can be MPI_BOTTOM */ return OMPI_ERRHANDLER_INVOKE(comm, MPI_ERR_ARG, FUNC_NAME); } - + if (outcount < 0) { return OMPI_ERRHANDLER_INVOKE(comm, MPI_ERR_COUNT, FUNC_NAME); } - if (MPI_DATATYPE_NULL == datatype || NULL == datatype) { - return OMPI_ERRHANDLER_INVOKE(comm, MPI_ERR_TYPE, FUNC_NAME); - } + OMPI_CHECK_DATATYPE_FOR_RECV(rc, datatype, outcount); + OMPI_ERRHANDLER_CHECK(rc, comm, rc, FUNC_NAME); + OMPI_CHECK_USER_BUFFER(rc, outbuf, datatype, outcount); + OMPI_ERRHANDLER_CHECK(rc, comm, rc, FUNC_NAME); } - OPAL_CR_ENTER_LIBRARY(); - if( insize > 0 ) { + if( insize > 0 ) { + int ret; OBJ_CONSTRUCT( &local_convertor, opal_convertor_t ); /* the resulting convertor will be set the the position ZERO */ opal_convertor_copy_and_prepare_for_recv( ompi_mpi_local_convertor, &(datatype->super), outcount, outbuf, 0, &local_convertor ); - + /* Check for truncation */ opal_convertor_get_packed_size( &local_convertor, &size ); if( (*position + size) > (unsigned int)insize ) { OBJ_DESTRUCT( &local_convertor ); - OPAL_CR_EXIT_LIBRARY(); return OMPI_ERRHANDLER_INVOKE(comm, MPI_ERR_TRUNCATE, FUNC_NAME); } - + /* Prepare the iovec with all informations */ outvec.iov_base = (char*) inbuf + (*position); outvec.iov_len = size; - + /* Do the actual unpacking */ iov_count = 1; - rc = opal_convertor_unpack( &local_convertor, &outvec, &iov_count, &size ); + ret = opal_convertor_unpack( &local_convertor, &outvec, &iov_count, &size ); *position += size; OBJ_DESTRUCT( &local_convertor ); - /* All done. Note that the convertor returns 1 upon success, not - OMPI_SUCCESS. */ - + OPAL_SUCCESS. */ + if (1 != ret) { + rc = OMPI_ERROR; + } } - OMPI_ERRHANDLER_RETURN((rc == 1) ? OMPI_SUCCESS : OMPI_ERROR, - comm, MPI_ERR_UNKNOWN, FUNC_NAME); - + OMPI_ERRHANDLER_RETURN(rc, comm, MPI_ERR_UNKNOWN, FUNC_NAME); } diff --git a/ompi/mpi/c/unpack_external.c b/ompi/mpi/c/unpack_external.c index 7220d826f24..55ada651ef0 100644 --- a/ompi/mpi/c/unpack_external.c +++ b/ompi/mpi/c/unpack_external.c @@ -3,20 +3,22 @@ * Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. - * Copyright (c) 2004-2005 The University of Tennessee and The University + * Copyright (c) 2004-2016 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2006 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2013 Los Alamos National Security, LLC. All rights * reserved. + * Copyright (c) 2015-2017 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ #include "ompi_config.h" @@ -29,12 +31,11 @@ #include "opal/datatype/opal_convertor.h" #include "ompi/memchecker.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_Unpack_external = PMPI_Unpack_external #endif - -#if OMPI_PROFILING_DEFINES -#include "ompi/mpi/c/profile/defines.h" +#define MPI_Unpack_external PMPI_Unpack_external #endif static const char FUNC_NAME[] = "MPI_Unpack_external"; @@ -42,13 +43,9 @@ static const char FUNC_NAME[] = "MPI_Unpack_external"; int MPI_Unpack_external (const char datarep[], const void *inbuf, MPI_Aint insize, MPI_Aint *position, void *outbuf, int outcount, - MPI_Datatype datatype) + MPI_Datatype datatype) { - int rc; - opal_convertor_t local_convertor; - struct iovec outvec; - unsigned int iov_count; - size_t size; + int rc = MPI_SUCCESS; MEMCHECKER( memchecker_datatype(datatype); @@ -61,39 +58,16 @@ int MPI_Unpack_external (const char datarep[], const void *inbuf, MPI_Aint insiz return OMPI_ERRHANDLER_INVOKE(MPI_COMM_WORLD, MPI_ERR_ARG, FUNC_NAME); } else if (outcount < 0) { return OMPI_ERRHANDLER_INVOKE(MPI_COMM_WORLD, MPI_ERR_COUNT, FUNC_NAME); - } else if (MPI_DATATYPE_NULL == datatype || NULL == datatype) { - return OMPI_ERRHANDLER_INVOKE(MPI_COMM_WORLD, MPI_ERR_TYPE, FUNC_NAME); } + OMPI_CHECK_DATATYPE_FOR_RECV(rc, datatype, outcount); + OMPI_ERRHANDLER_CHECK(rc, MPI_COMM_WORLD, rc, FUNC_NAME); + OMPI_CHECK_USER_BUFFER(rc, outbuf, datatype, outcount); + OMPI_ERRHANDLER_CHECK(rc, MPI_COMM_WORLD, rc, FUNC_NAME); } - OPAL_CR_ENTER_LIBRARY(); - - OBJ_CONSTRUCT(&local_convertor, opal_convertor_t); - - /* the resulting convertor will be set to the position ZERO */ - opal_convertor_copy_and_prepare_for_recv( ompi_mpi_external32_convertor, - &(datatype->super), outcount, outbuf, 0, &local_convertor ); - - /* Check for truncation */ - opal_convertor_get_packed_size( &local_convertor, &size ); - if( (*position + size) > (unsigned int)insize ) { - OBJ_DESTRUCT( &local_convertor ); - OPAL_CR_EXIT_LIBRARY(); - return OMPI_ERRHANDLER_INVOKE(MPI_COMM_WORLD, MPI_ERR_TRUNCATE, FUNC_NAME); - } - - /* Prepare the iovec with all informations */ - outvec.iov_base = (char*) inbuf + (*position); - outvec.iov_len = size; - - /* Do the actual unpacking */ - iov_count = 1; - rc = opal_convertor_unpack( &local_convertor, &outvec, &iov_count, &size ); - *position += size; - OBJ_DESTRUCT( &local_convertor ); - /* All done. Note that the convertor returns 1 upon success, not - OMPI_SUCCESS. */ - OMPI_ERRHANDLER_RETURN((rc == 1) ? OMPI_SUCCESS : OMPI_ERROR, - MPI_COMM_WORLD, MPI_ERR_UNKNOWN, FUNC_NAME); + rc = ompi_datatype_unpack_external(datarep, inbuf, insize, + position, outbuf, outcount, + datatype); + OMPI_ERRHANDLER_RETURN(rc, MPI_COMM_WORLD, rc, FUNC_NAME); } diff --git a/ompi/mpi/c/unpublish_name.c b/ompi/mpi/c/unpublish_name.c index 69888807a5a..a61c487a67f 100644 --- a/ompi/mpi/c/unpublish_name.c +++ b/ompi/mpi/c/unpublish_name.c @@ -6,34 +6,39 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2012-2013 Los Alamos National Security, LLC. All rights - * reserved. + * reserved. + * Copyright (c) 2015 Intel, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ #include "ompi_config.h" #include +#include "opal/class/opal_list.h" +#include "opal/mca/pmix/pmix.h" +#include "opal/util/argv.h" + #include "ompi/mpi/c/bindings.h" #include "ompi/runtime/params.h" #include "ompi/communicator/communicator.h" #include "ompi/errhandler/errhandler.h" #include "ompi/info/info.h" -#include "ompi/mca/pubsub/pubsub.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_Unpublish_name = PMPI_Unpublish_name #endif - -#if OMPI_PROFILING_DEFINES -#include "ompi/mpi/c/profile/defines.h" +#define MPI_Unpublish_name PMPI_Unpublish_name #endif static const char FUNC_NAME[] = "MPI_Unpublish_name"; @@ -43,16 +48,21 @@ int MPI_Unpublish_name(const char *service_name, MPI_Info info, const char *port_name) { int rc; + char range[OPAL_MAX_INFO_VAL]; + int flag=0; + opal_list_t pinfo; + opal_value_t *rng; + char **keys = NULL; if ( MPI_PARAM_CHECK ) { - OMPI_ERR_INIT_FINALIZE(FUNC_NAME); + OMPI_ERR_INIT_FINALIZE(FUNC_NAME); if ( NULL == port_name ) { - return OMPI_ERRHANDLER_INVOKE(MPI_COMM_WORLD, MPI_ERR_ARG, + return OMPI_ERRHANDLER_INVOKE(MPI_COMM_WORLD, MPI_ERR_ARG, FUNC_NAME); } if ( NULL == service_name ) { - return OMPI_ERRHANDLER_INVOKE(MPI_COMM_WORLD, MPI_ERR_ARG, + return OMPI_ERRHANDLER_INVOKE(MPI_COMM_WORLD, MPI_ERR_ARG, FUNC_NAME); } if (NULL == info || ompi_info_is_freed(info)) { @@ -61,35 +71,59 @@ int MPI_Unpublish_name(const char *service_name, MPI_Info info, } } - OPAL_CR_ENTER_LIBRARY(); + OBJ_CONSTRUCT(&pinfo, opal_list_t); - /* - * No predefined info-objects for this function in MPI-2, - * therefore, we do not parse the info-object at the moment. - */ - rc = ompi_pubsub.unpublish(service_name, info); - if ( OMPI_SUCCESS != rc ) { - if (OMPI_ERR_NOT_FOUND == rc) { + /* OMPI supports info keys to pass the range to + * be searched for the given key */ + if (MPI_INFO_NULL != info) { + ompi_info_get (info, "range", sizeof(range) - 1, range, &flag); + if (flag) { + if (0 == strcmp(range, "nspace")) { + rng = OBJ_NEW(opal_value_t); + rng->key = strdup(OPAL_PMIX_RANGE); + rng->type = OPAL_INT; + rng->data.integer = OPAL_PMIX_NAMESPACE; // share only with procs in same nspace + opal_list_append(&pinfo, &rng->super); + } else if (0 == strcmp(range, "session")) { + rng = OBJ_NEW(opal_value_t); + rng->key = strdup(OPAL_PMIX_RANGE); + rng->type = OPAL_INT; + rng->data.integer = OPAL_PMIX_SESSION; // share only with procs in same session + opal_list_append(&pinfo, &rng->super); + } else { + /* unrecognized scope */ + OPAL_LIST_DESTRUCT(&pinfo); + return OMPI_ERRHANDLER_INVOKE(MPI_COMM_WORLD, MPI_ERR_ARG, + FUNC_NAME); + } + } + } + + /* unpublish the service_name */ + opal_argv_append_nosize(&keys, service_name); + + rc = opal_pmix.unpublish(keys, &pinfo); + opal_argv_free(keys); + OPAL_LIST_DESTRUCT(&pinfo); + + if ( OPAL_SUCCESS != rc ) { + if (OPAL_ERR_NOT_FOUND == rc) { /* service couldn't be found */ - OPAL_CR_EXIT_LIBRARY(); return OMPI_ERRHANDLER_INVOKE(MPI_COMM_WORLD, MPI_ERR_SERVICE, FUNC_NAME); } - if (OMPI_ERR_PERM == rc) { + if (OPAL_ERR_PERM == rc) { /* this process didn't own the specified service */ - OPAL_CR_EXIT_LIBRARY(); return OMPI_ERRHANDLER_INVOKE(MPI_COMM_WORLD, MPI_ERR_ACCESS, FUNC_NAME); } - + /* none of the MPI-specific errors occurred - must be some * kind of internal error */ - OPAL_CR_EXIT_LIBRARY(); return OMPI_ERRHANDLER_INVOKE(MPI_COMM_WORLD, MPI_ERR_INTERN, FUNC_NAME); } - OPAL_CR_EXIT_LIBRARY(); return MPI_SUCCESS; } diff --git a/ompi/mpi/c/wait.c b/ompi/mpi/c/wait.c index cdf6fd74795..867b61563c5 100644 --- a/ompi/mpi/c/wait.c +++ b/ompi/mpi/c/wait.c @@ -5,15 +5,17 @@ * Copyright (c) 2004-2013 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2006 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ #include "ompi_config.h" @@ -26,12 +28,11 @@ #include "ompi/request/request.h" #include "ompi/memchecker.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_Wait = PMPI_Wait #endif - -#if OMPI_PROFILING_DEFINES -#include "ompi/mpi/c/profile/defines.h" +#define MPI_Wait PMPI_Wait #endif static const char FUNC_NAME[] = "MPI_Wait"; @@ -65,7 +66,6 @@ int MPI_Wait(MPI_Request *request, MPI_Status *status) return MPI_SUCCESS; } - OPAL_CR_ENTER_LIBRARY(); if (OMPI_SUCCESS == ompi_request_wait(request, status)) { /* @@ -74,13 +74,11 @@ int MPI_Wait(MPI_Request *request, MPI_Status *status) MEMCHECKER( opal_memchecker_base_mem_undefined(&status->MPI_ERROR, sizeof(int)); ); - OPAL_CR_EXIT_LIBRARY(); return MPI_SUCCESS; } MEMCHECKER( - opal_memchecker_base_mem_undefined(&status->MPI_ERROR, sizeof(int)); + opal_memchecker_base_mem_undefined(&status->MPI_ERROR, sizeof(int)); ); - OPAL_CR_EXIT_LIBRARY(); return ompi_errhandler_request_invoke(1, request, FUNC_NAME); } diff --git a/ompi/mpi/c/waitall.c b/ompi/mpi/c/waitall.c index a0fbba80bdc..ae663fc6aa5 100644 --- a/ompi/mpi/c/waitall.c +++ b/ompi/mpi/c/waitall.c @@ -5,17 +5,19 @@ * Copyright (c) 2004-2006 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2006 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2012 Los Alamos National Security, LLC. All rights - * reserved. + * reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ #include "ompi_config.h" @@ -28,18 +30,17 @@ #include "ompi/request/request.h" #include "ompi/memchecker.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_Waitall = PMPI_Waitall #endif - -#if OMPI_PROFILING_DEFINES -#include "ompi/mpi/c/profile/defines.h" +#define MPI_Waitall PMPI_Waitall #endif static const char FUNC_NAME[] = "MPI_Waitall"; -int MPI_Waitall(int count, MPI_Request requests[], MPI_Status statuses[]) +int MPI_Waitall(int count, MPI_Request requests[], MPI_Status statuses[]) { MEMCHECKER( int j; @@ -47,7 +48,7 @@ int MPI_Waitall(int count, MPI_Request requests[], MPI_Status statuses[]) memchecker_request(&requests[j]); } ); - + if ( MPI_PARAM_CHECK ) { int i, rc = MPI_SUCCESS; OMPI_ERR_INIT_FINALIZE(FUNC_NAME); @@ -71,19 +72,15 @@ int MPI_Waitall(int count, MPI_Request requests[], MPI_Status statuses[]) return MPI_SUCCESS; } - OPAL_CR_ENTER_LIBRARY(); if (OMPI_SUCCESS == ompi_request_wait_all(count, requests, statuses)) { - OPAL_CR_EXIT_LIBRARY(); return MPI_SUCCESS; } - if (MPI_SUCCESS != + if (MPI_SUCCESS != ompi_errhandler_request_invoke(count, requests, FUNC_NAME)) { - OPAL_CR_EXIT_LIBRARY(); return MPI_ERR_IN_STATUS; } - OPAL_CR_EXIT_LIBRARY(); return MPI_SUCCESS; } diff --git a/ompi/mpi/c/waitany.c b/ompi/mpi/c/waitany.c index 58b94a0e41a..46ab299b338 100644 --- a/ompi/mpi/c/waitany.c +++ b/ompi/mpi/c/waitany.c @@ -5,20 +5,20 @@ * Copyright (c) 2004-2006 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2006 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2012 Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2012 Los Alamos National Security, LLC. All rights - * reserved. - * Copyright (c) 2014 Research Organization for Information Science + * reserved. + * Copyright (c) 2014-2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ #include "ompi_config.h" @@ -31,18 +31,17 @@ #include "ompi/request/request.h" #include "ompi/memchecker.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_Waitany = PMPI_Waitany #endif - -#if OMPI_PROFILING_DEFINES -#include "ompi/mpi/c/profile/defines.h" +#define MPI_Waitany PMPI_Waitany #endif static const char FUNC_NAME[] = "MPI_Waitany"; -int MPI_Waitany(int count, MPI_Request requests[], int *indx, MPI_Status *status) +int MPI_Waitany(int count, MPI_Request requests[], int *indx, MPI_Status *status) { MEMCHECKER( int j; @@ -50,7 +49,7 @@ int MPI_Waitany(int count, MPI_Request requests[], int *indx, MPI_Status *status memchecker_request(&requests[j]); } ); - + if ( MPI_PARAM_CHECK ) { int i, rc = MPI_SUCCESS; OMPI_ERR_INIT_FINALIZE(FUNC_NAME); @@ -79,13 +78,10 @@ int MPI_Waitany(int count, MPI_Request requests[], int *indx, MPI_Status *status return MPI_SUCCESS; } - OPAL_CR_ENTER_LIBRARY(); if (OMPI_SUCCESS == ompi_request_wait_any(count, requests, indx, status)) { - OPAL_CR_EXIT_LIBRARY(); return MPI_SUCCESS; } - OPAL_CR_EXIT_LIBRARY(); return ompi_errhandler_request_invoke(count, requests, FUNC_NAME); } diff --git a/ompi/mpi/c/waitsome.c b/ompi/mpi/c/waitsome.c index 47c9b3ac853..71626ab69e4 100644 --- a/ompi/mpi/c/waitsome.c +++ b/ompi/mpi/c/waitsome.c @@ -5,18 +5,20 @@ * Copyright (c) 2004-2006 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2006-2012 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2012 Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2012 Los Alamos National Security, LLC. All rights - * reserved. + * reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ #include "ompi_config.h" @@ -29,12 +31,11 @@ #include "ompi/request/request.h" #include "ompi/memchecker.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_Waitsome = PMPI_Waitsome #endif - -#if OMPI_PROFILING_DEFINES -#include "ompi/mpi/c/profile/defines.h" +#define MPI_Waitsome PMPI_Waitsome #endif static const char FUNC_NAME[] = "MPI_Waitsome"; @@ -42,7 +43,7 @@ static const char FUNC_NAME[] = "MPI_Waitsome"; int MPI_Waitsome(int incount, MPI_Request requests[], int *outcount, int indices[], - MPI_Status statuses[]) + MPI_Status statuses[]) { MEMCHECKER( int j; @@ -76,20 +77,16 @@ int MPI_Waitsome(int incount, MPI_Request requests[], return MPI_SUCCESS; } - OPAL_CR_ENTER_LIBRARY(); - if (OMPI_SUCCESS == ompi_request_wait_some( incount, requests, + if (OMPI_SUCCESS == ompi_request_wait_some( incount, requests, outcount, indices, statuses )) { - OPAL_CR_EXIT_LIBRARY(); return MPI_SUCCESS; } if (MPI_SUCCESS != ompi_errhandler_request_invoke(incount, requests, FUNC_NAME)) { - OPAL_CR_EXIT_LIBRARY(); return MPI_ERR_IN_STATUS; } - OPAL_CR_EXIT_LIBRARY(); return MPI_SUCCESS; } diff --git a/ompi/mpi/c/win_allocate.c b/ompi/mpi/c/win_allocate.c index 9d3c81ed581..c5f587a00de 100644 --- a/ompi/mpi/c/win_allocate.c +++ b/ompi/mpi/c/win_allocate.c @@ -5,15 +5,17 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2006 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ #include "ompi_config.h" @@ -28,12 +30,11 @@ #include "ompi/win/win.h" #include "ompi/memchecker.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_Win_allocate = PMPI_Win_allocate #endif - -#if OMPI_PROFILING_DEFINES -#include "ompi/mpi/c/profile/defines.h" +#define MPI_Win_allocate PMPI_Win_allocate #endif static const char FUNC_NAME[] = "MPI_Win_allocate"; @@ -43,7 +44,7 @@ int MPI_Win_allocate(MPI_Aint size, int disp_unit, MPI_Info info, MPI_Comm comm, void *baseptr, MPI_Win *win) { int ret = MPI_SUCCESS; - + MEMCHECKER( memchecker_comm(comm); ); @@ -52,7 +53,7 @@ int MPI_Win_allocate(MPI_Aint size, int disp_unit, MPI_Info info, OMPI_ERR_INIT_FINALIZE(FUNC_NAME); if (ompi_comm_invalid (comm)) { - return OMPI_ERRHANDLER_INVOKE(MPI_COMM_WORLD, MPI_ERR_COMM, + return OMPI_ERRHANDLER_INVOKE(MPI_COMM_WORLD, MPI_ERR_COMM, FUNC_NAME); } else if (NULL == info || ompi_info_is_freed(info)) { @@ -62,7 +63,7 @@ int MPI_Win_allocate(MPI_Aint size, int disp_unit, MPI_Info info, } else if (NULL == win) { return OMPI_ERRHANDLER_INVOKE(comm, MPI_ERR_WIN, FUNC_NAME); } else if ( size < 0 ) { - return OMPI_ERRHANDLER_INVOKE(comm, MPI_ERR_SIZE, FUNC_NAME); + return OMPI_ERRHANDLER_INVOKE(comm, MPI_ERR_SIZE, FUNC_NAME); } else if ( disp_unit <= 0 ) { return OMPI_ERRHANDLER_INVOKE(comm, MPI_ERR_DISP, FUNC_NAME); } @@ -73,17 +74,14 @@ int MPI_Win_allocate(MPI_Aint size, int disp_unit, MPI_Info info, return OMPI_ERRHANDLER_INVOKE(comm, MPI_ERR_COMM, FUNC_NAME); } - OPAL_CR_ENTER_LIBRARY(); /* create window and return */ ret = ompi_win_allocate((size_t)size, disp_unit, info, comm, baseptr, win); if (OMPI_SUCCESS != ret) { *win = MPI_WIN_NULL; - OPAL_CR_EXIT_LIBRARY(); return OMPI_ERRHANDLER_INVOKE(comm, MPI_ERR_WIN, FUNC_NAME); } - OPAL_CR_EXIT_LIBRARY(); return MPI_SUCCESS; } diff --git a/ompi/mpi/c/win_allocate_shared.c b/ompi/mpi/c/win_allocate_shared.c index 2efe2fe3005..6c602df0d90 100644 --- a/ompi/mpi/c/win_allocate_shared.c +++ b/ompi/mpi/c/win_allocate_shared.c @@ -6,17 +6,19 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2006 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2014 Los Alamos National Security, LLC. All rights * reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ #include "ompi_config.h" @@ -31,12 +33,11 @@ #include "ompi/win/win.h" #include "ompi/memchecker.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_Win_allocate_shared = PMPI_Win_allocate_shared #endif - -#if OMPI_PROFILING_DEFINES -#include "ompi/mpi/c/profile/defines.h" +#define MPI_Win_allocate_shared PMPI_Win_allocate_shared #endif static const char FUNC_NAME[] = "MPI_Win_allocate_shared"; @@ -46,7 +47,7 @@ int MPI_Win_allocate_shared(MPI_Aint size, int disp_unit, MPI_Info info, MPI_Comm comm, void *baseptr, MPI_Win *win) { int ret = MPI_SUCCESS; - + MEMCHECKER( memchecker_comm(comm); ); @@ -55,7 +56,7 @@ int MPI_Win_allocate_shared(MPI_Aint size, int disp_unit, MPI_Info info, OMPI_ERR_INIT_FINALIZE(FUNC_NAME); if (ompi_comm_invalid (comm)) { - return OMPI_ERRHANDLER_INVOKE(MPI_COMM_WORLD, MPI_ERR_COMM, + return OMPI_ERRHANDLER_INVOKE(MPI_COMM_WORLD, MPI_ERR_COMM, FUNC_NAME); } else if (NULL == info || ompi_info_is_freed(info)) { @@ -65,7 +66,7 @@ int MPI_Win_allocate_shared(MPI_Aint size, int disp_unit, MPI_Info info, } else if (NULL == win) { return OMPI_ERRHANDLER_INVOKE(comm, MPI_ERR_WIN, FUNC_NAME); } else if ( size < 0 ) { - return OMPI_ERRHANDLER_INVOKE(comm, MPI_ERR_SIZE, FUNC_NAME); + return OMPI_ERRHANDLER_INVOKE(comm, MPI_ERR_SIZE, FUNC_NAME); } } @@ -74,17 +75,14 @@ int MPI_Win_allocate_shared(MPI_Aint size, int disp_unit, MPI_Info info, return OMPI_ERRHANDLER_INVOKE(comm, MPI_ERR_COMM, FUNC_NAME); } - OPAL_CR_ENTER_LIBRARY(); /* create window and return */ ret = ompi_win_allocate_shared((size_t)size, disp_unit, info, comm, baseptr, win); if (OMPI_SUCCESS != ret) { *win = MPI_WIN_NULL; - OPAL_CR_EXIT_LIBRARY(); OMPI_ERRHANDLER_RETURN (ret, comm, ret, FUNC_NAME); } - OPAL_CR_EXIT_LIBRARY(); return MPI_SUCCESS; } diff --git a/ompi/mpi/c/win_attach.c b/ompi/mpi/c/win_attach.c index 9bd52c016d9..b6a27cad47c 100644 --- a/ompi/mpi/c/win_attach.c +++ b/ompi/mpi/c/win_attach.c @@ -5,15 +5,17 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2006 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ #include "ompi_config.h" @@ -28,12 +30,11 @@ #include "ompi/win/win.h" #include "ompi/memchecker.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_Win_attach = PMPI_Win_attach #endif - -#if OMPI_PROFILING_DEFINES -#include "ompi/mpi/c/profile/defines.h" +#define MPI_Win_attach PMPI_Win_attach #endif static const char FUNC_NAME[] = "MPI_Win_attach"; @@ -41,7 +42,7 @@ static const char FUNC_NAME[] = "MPI_Win_attach"; int MPI_Win_attach(MPI_Win win, void *base, MPI_Aint size) { int ret = MPI_SUCCESS; - + /* argument checking */ if (MPI_PARAM_CHECK) { OMPI_ERR_INIT_FINALIZE(FUNC_NAME); @@ -54,8 +55,6 @@ int MPI_Win_attach(MPI_Win win, void *base, MPI_Aint size) OMPI_ERRHANDLER_CHECK(ret, win, ret, FUNC_NAME); } - OPAL_CR_ENTER_LIBRARY(); - /* create window and return */ ret = win->w_osc_module->osc_win_attach(win, base, size); OMPI_ERRHANDLER_RETURN(ret, win, ret, FUNC_NAME); diff --git a/ompi/mpi/c/win_c2f.c b/ompi/mpi/c/win_c2f.c index 6bd80fac329..806eba0957c 100644 --- a/ompi/mpi/c/win_c2f.c +++ b/ompi/mpi/c/win_c2f.c @@ -5,15 +5,17 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2006-2012 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ #include "ompi_config.h" @@ -26,22 +28,19 @@ #include "ompi/errhandler/errhandler.h" #include "ompi/mpi/fortran/base/fint_2_int.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_Win_c2f = PMPI_Win_c2f #endif - -#if OMPI_PROFILING_DEFINES -#include "ompi/mpi/c/profile/defines.h" +#define MPI_Win_c2f PMPI_Win_c2f #endif static const char FUNC_NAME[] = "MPI_Win_c2f"; -MPI_Fint MPI_Win_c2f(MPI_Win win) +MPI_Fint MPI_Win_c2f(MPI_Win win) { - OPAL_CR_NOOP_PROGRESS(); - if ( MPI_PARAM_CHECK) { OMPI_ERR_INIT_FINALIZE(FUNC_NAME); diff --git a/ompi/mpi/c/win_call_errhandler.c b/ompi/mpi/c/win_call_errhandler.c index f1cb02a0e14..70802557707 100644 --- a/ompi/mpi/c/win_call_errhandler.c +++ b/ompi/mpi/c/win_call_errhandler.c @@ -5,14 +5,16 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ #include "ompi_config.h" @@ -24,22 +26,19 @@ #include "ompi/errhandler/errhandler.h" #include "ompi/win/win.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_Win_call_errhandler = PMPI_Win_call_errhandler #endif - -#if OMPI_PROFILING_DEFINES -#include "ompi/mpi/c/profile/defines.h" +#define MPI_Win_call_errhandler PMPI_Win_call_errhandler #endif static const char FUNC_NAME[] = "MPI_Win_call_errhandler"; -int MPI_Win_call_errhandler(MPI_Win win, int errorcode) +int MPI_Win_call_errhandler(MPI_Win win, int errorcode) { - OPAL_CR_NOOP_PROGRESS(); - if (MPI_PARAM_CHECK) { OMPI_ERR_INIT_FINALIZE(FUNC_NAME); diff --git a/ompi/mpi/c/win_complete.c b/ompi/mpi/c/win_complete.c index fa45c387971..625a1ebbf87 100644 --- a/ompi/mpi/c/win_complete.c +++ b/ompi/mpi/c/win_complete.c @@ -5,14 +5,16 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ #include "ompi_config.h" @@ -25,18 +27,17 @@ #include "ompi/win/win.h" #include "ompi/mca/osc/osc.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_Win_complete = PMPI_Win_complete #endif - -#if OMPI_PROFILING_DEFINES -#include "ompi/mpi/c/profile/defines.h" +#define MPI_Win_complete PMPI_Win_complete #endif static const char FUNC_NAME[] = "MPI_Win_complete"; -int MPI_Win_complete(MPI_Win win) +int MPI_Win_complete(MPI_Win win) { int rc; @@ -48,8 +49,6 @@ int MPI_Win_complete(MPI_Win win) } } - OPAL_CR_ENTER_LIBRARY(); - rc = win->w_osc_module->osc_complete(win); OMPI_ERRHANDLER_RETURN(rc, win, rc, FUNC_NAME); } diff --git a/ompi/mpi/c/win_create.c b/ompi/mpi/c/win_create.c index d87116ad69d..fb76d03df4d 100644 --- a/ompi/mpi/c/win_create.c +++ b/ompi/mpi/c/win_create.c @@ -5,15 +5,17 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2006 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ #include "ompi_config.h" @@ -28,22 +30,21 @@ #include "ompi/win/win.h" #include "ompi/memchecker.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_Win_create = PMPI_Win_create #endif - -#if OMPI_PROFILING_DEFINES -#include "ompi/mpi/c/profile/defines.h" +#define MPI_Win_create PMPI_Win_create #endif static const char FUNC_NAME[] = "MPI_Win_create"; int MPI_Win_create(void *base, MPI_Aint size, int disp_unit, - MPI_Info info, MPI_Comm comm, MPI_Win *win) + MPI_Info info, MPI_Comm comm, MPI_Win *win) { int ret = MPI_SUCCESS; - + MEMCHECKER( memchecker_comm(comm); ); @@ -52,7 +53,7 @@ int MPI_Win_create(void *base, MPI_Aint size, int disp_unit, OMPI_ERR_INIT_FINALIZE(FUNC_NAME); if (ompi_comm_invalid (comm)) { - return OMPI_ERRHANDLER_INVOKE(MPI_COMM_WORLD, MPI_ERR_COMM, + return OMPI_ERRHANDLER_INVOKE(MPI_COMM_WORLD, MPI_ERR_COMM, FUNC_NAME); } else if (NULL == info || ompi_info_is_freed(info)) { @@ -62,7 +63,7 @@ int MPI_Win_create(void *base, MPI_Aint size, int disp_unit, } else if (NULL == win) { return OMPI_ERRHANDLER_INVOKE(comm, MPI_ERR_WIN, FUNC_NAME); } else if ( size < 0 ) { - return OMPI_ERRHANDLER_INVOKE(comm, MPI_ERR_SIZE, FUNC_NAME); + return OMPI_ERRHANDLER_INVOKE(comm, MPI_ERR_SIZE, FUNC_NAME); } else if ( disp_unit <= 0 ) { return OMPI_ERRHANDLER_INVOKE(comm, MPI_ERR_DISP, FUNC_NAME); } @@ -73,17 +74,14 @@ int MPI_Win_create(void *base, MPI_Aint size, int disp_unit, return OMPI_ERRHANDLER_INVOKE(comm, MPI_ERR_COMM, FUNC_NAME); } - OPAL_CR_ENTER_LIBRARY(); /* create window and return */ ret = ompi_win_create(base, (size_t)size, disp_unit, comm, info, win); if (OMPI_SUCCESS != ret) { *win = MPI_WIN_NULL; - OPAL_CR_EXIT_LIBRARY(); return OMPI_ERRHANDLER_INVOKE(comm, MPI_ERR_WIN, FUNC_NAME); } - OPAL_CR_EXIT_LIBRARY(); return MPI_SUCCESS; } diff --git a/ompi/mpi/c/win_create_dynamic.c b/ompi/mpi/c/win_create_dynamic.c index 20772b3caa6..6207c8bf755 100644 --- a/ompi/mpi/c/win_create_dynamic.c +++ b/ompi/mpi/c/win_create_dynamic.c @@ -5,15 +5,17 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2006 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -29,21 +31,20 @@ #include "ompi/win/win.h" #include "ompi/memchecker.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_Win_create_dynamic = PMPI_Win_create_dynamic #endif - -#if OMPI_PROFILING_DEFINES -#include "ompi/mpi/c/profile/defines.h" +#define MPI_Win_create_dynamic PMPI_Win_create_dynamic #endif static const char FUNC_NAME[] = "MPI_Win_create_dynamic"; -int MPI_Win_create_dynamic(MPI_Info info, MPI_Comm comm, MPI_Win *win) +int MPI_Win_create_dynamic(MPI_Info info, MPI_Comm comm, MPI_Win *win) { int ret = MPI_SUCCESS; - + MEMCHECKER( memchecker_comm(comm); ); @@ -52,7 +53,7 @@ int MPI_Win_create_dynamic(MPI_Info info, MPI_Comm comm, MPI_Win *win) OMPI_ERR_INIT_FINALIZE(FUNC_NAME); if (ompi_comm_invalid (comm)) { - return OMPI_ERRHANDLER_INVOKE(MPI_COMM_WORLD, MPI_ERR_COMM, + return OMPI_ERRHANDLER_INVOKE(MPI_COMM_WORLD, MPI_ERR_COMM, FUNC_NAME); } else if (NULL == info || ompi_info_is_freed(info)) { @@ -69,16 +70,13 @@ int MPI_Win_create_dynamic(MPI_Info info, MPI_Comm comm, MPI_Win *win) return OMPI_ERRHANDLER_INVOKE(comm, MPI_ERR_COMM, FUNC_NAME); } - OPAL_CR_ENTER_LIBRARY(); /* create_dynamic window and return */ ret = ompi_win_create_dynamic(info, comm, win); if (OMPI_SUCCESS != ret) { *win = MPI_WIN_NULL; - OPAL_CR_EXIT_LIBRARY(); return OMPI_ERRHANDLER_INVOKE(comm, MPI_ERR_WIN, FUNC_NAME); } - OPAL_CR_EXIT_LIBRARY(); return MPI_SUCCESS; } diff --git a/ompi/mpi/c/win_create_errhandler.c b/ompi/mpi/c/win_create_errhandler.c index ebd62757a5b..33ada365843 100644 --- a/ompi/mpi/c/win_create_errhandler.c +++ b/ompi/mpi/c/win_create_errhandler.c @@ -5,15 +5,17 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2008-2009 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ #include "ompi_config.h" @@ -25,35 +27,32 @@ #include "ompi/errhandler/errhandler.h" #include "ompi/win/win.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_Win_create_errhandler = PMPI_Win_create_errhandler #endif - -#if OMPI_PROFILING_DEFINES -#include "ompi/mpi/c/profile/defines.h" +#define MPI_Win_create_errhandler PMPI_Win_create_errhandler #endif static const char FUNC_NAME[] = "MPI_Win_create_errhandler"; int MPI_Win_create_errhandler(MPI_Win_errhandler_function *function, - MPI_Errhandler *errhandler) + MPI_Errhandler *errhandler) { int err = MPI_SUCCESS; if (MPI_PARAM_CHECK) { OMPI_ERR_INIT_FINALIZE(FUNC_NAME); - if (NULL == function || + if (NULL == function || NULL == errhandler) { return OMPI_ERRHANDLER_INVOKE(MPI_COMM_WORLD, MPI_ERR_ARG, FUNC_NAME); } } - OPAL_CR_ENTER_LIBRARY(); - /* Create and cache the errhandler. Sets a refcount of 1. */ - *errhandler = + *errhandler = ompi_errhandler_create(OMPI_ERRHANDLER_TYPE_WIN, (ompi_errhandler_generic_handler_fn_t*) function, OMPI_ERRHANDLER_LANG_C); diff --git a/ompi/mpi/c/win_create_keyval.c b/ompi/mpi/c/win_create_keyval.c index 989fc5419bd..f93313f7c3c 100644 --- a/ompi/mpi/c/win_create_keyval.c +++ b/ompi/mpi/c/win_create_keyval.c @@ -5,15 +5,17 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2007 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ #include "ompi_config.h" @@ -25,12 +27,11 @@ #include "ompi/errhandler/errhandler.h" #include "ompi/attribute/attribute.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_Win_create_keyval = PMPI_Win_create_keyval #endif - -#if OMPI_PROFILING_DEFINES -#include "ompi/mpi/c/profile/defines.h" +#define MPI_Win_create_keyval PMPI_Win_create_keyval #endif static const char FUNC_NAME[] = "MPI_Win_create_keyval"; @@ -38,7 +39,7 @@ static const char FUNC_NAME[] = "MPI_Win_create_keyval"; int MPI_Win_create_keyval(MPI_Win_copy_attr_function *win_copy_attr_fn, MPI_Win_delete_attr_function *win_delete_attr_fn, - int *win_keyval, void *extra_state) + int *win_keyval, void *extra_state) { int ret; ompi_attribute_fn_ptr_union_t copy_fn; @@ -48,13 +49,11 @@ int MPI_Win_create_keyval(MPI_Win_copy_attr_function *win_copy_attr_fn, OMPI_ERR_INIT_FINALIZE(FUNC_NAME); if ((NULL == win_copy_attr_fn) || (NULL == win_delete_attr_fn) || (NULL == win_keyval)) { - return OMPI_ERRHANDLER_INVOKE(MPI_COMM_WORLD, MPI_ERR_ARG, + return OMPI_ERRHANDLER_INVOKE(MPI_COMM_WORLD, MPI_ERR_ARG, FUNC_NAME); } } - OPAL_CR_ENTER_LIBRARY(); - copy_fn.attr_win_copy_fn = (MPI_Win_internal_copy_attr_function*)win_copy_attr_fn; del_fn.attr_win_delete_fn = win_delete_attr_fn; diff --git a/ompi/mpi/c/win_delete_attr.c b/ompi/mpi/c/win_delete_attr.c index f3c8179b752..0d355ac7449 100644 --- a/ompi/mpi/c/win_delete_attr.c +++ b/ompi/mpi/c/win_delete_attr.c @@ -5,14 +5,16 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ #include "ompi_config.h" @@ -25,20 +27,19 @@ #include "ompi/attribute/attribute.h" #include "ompi/win/win.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_Win_delete_attr = PMPI_Win_delete_attr #endif - -#if OMPI_PROFILING_DEFINES -#include "ompi/mpi/c/profile/defines.h" +#define MPI_Win_delete_attr PMPI_Win_delete_attr #endif static const char FUNC_NAME[] = "MPI_Win_delete_attr"; -int MPI_Win_delete_attr(MPI_Win win, int win_keyval) +int MPI_Win_delete_attr(MPI_Win win, int win_keyval) { - int ret; + int ret; if (MPI_PARAM_CHECK) { OMPI_ERR_INIT_FINALIZE(FUNC_NAME); @@ -48,9 +49,7 @@ int MPI_Win_delete_attr(MPI_Win win, int win_keyval) } } - OPAL_CR_ENTER_LIBRARY(); - - ret = ompi_attr_delete(WIN_ATTR, win, win->w_keyhash, win_keyval, + ret = ompi_attr_delete(WIN_ATTR, win, win->w_keyhash, win_keyval, false); - OMPI_ERRHANDLER_RETURN(ret, win, MPI_ERR_OTHER, FUNC_NAME); + OMPI_ERRHANDLER_RETURN(ret, win, MPI_ERR_OTHER, FUNC_NAME); } diff --git a/ompi/mpi/c/win_detach.c b/ompi/mpi/c/win_detach.c index 8d16b5750c8..0e57192e111 100644 --- a/ompi/mpi/c/win_detach.c +++ b/ompi/mpi/c/win_detach.c @@ -5,15 +5,17 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2006 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ #include "ompi_config.h" @@ -28,20 +30,19 @@ #include "ompi/win/win.h" #include "ompi/memchecker.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_Win_detach = PMPI_Win_detach #endif - -#if OMPI_PROFILING_DEFINES -#include "ompi/mpi/c/profile/defines.h" +#define MPI_Win_detach PMPI_Win_detach #endif static const char FUNC_NAME[] = "MPI_Win_detach"; -int MPI_Win_detach(MPI_Win win, void *base) +int MPI_Win_detach(MPI_Win win, const void *base) { int ret = MPI_SUCCESS; - + /* argument checking */ if (MPI_PARAM_CHECK) { OMPI_ERR_INIT_FINALIZE(FUNC_NAME); @@ -54,8 +55,6 @@ int MPI_Win_detach(MPI_Win win, void *base) OMPI_ERRHANDLER_CHECK(ret, win, ret, FUNC_NAME); } - OPAL_CR_ENTER_LIBRARY(); - /* create window and return */ ret = win->w_osc_module->osc_win_detach(win, base); OMPI_ERRHANDLER_RETURN(ret, win, ret, FUNC_NAME); diff --git a/ompi/mpi/c/win_f2c.c b/ompi/mpi/c/win_f2c.c index 74378bae1c0..bc8bdb7dcab 100644 --- a/ompi/mpi/c/win_f2c.c +++ b/ompi/mpi/c/win_f2c.c @@ -6,15 +6,17 @@ * Copyright (c) 2004-2007 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2006-2012 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ #include "ompi_config.h" @@ -25,23 +27,20 @@ #include "ompi/errhandler/errhandler.h" #include "ompi/mpi/fortran/base/fint_2_int.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_Win_f2c = PMPI_Win_f2c #endif - -#if OMPI_PROFILING_DEFINES -#include "ompi/mpi/c/profile/defines.h" +#define MPI_Win_f2c PMPI_Win_f2c #endif static const char FUNC_NAME[] = "MPI_Win_f2c"; -MPI_Win MPI_Win_f2c(MPI_Fint win) +MPI_Win MPI_Win_f2c(MPI_Fint win) { int o_index= OMPI_FINT_2_INT(win); - OPAL_CR_NOOP_PROGRESS(); - if (MPI_PARAM_CHECK) { OMPI_ERR_INIT_FINALIZE(FUNC_NAME); } @@ -54,6 +53,6 @@ MPI_Win MPI_Win_f2c(MPI_Fint win) o_index >= opal_pointer_array_get_size(&ompi_mpi_windows)) { return NULL; } - + return (MPI_Win)opal_pointer_array_get_item(&ompi_mpi_windows, o_index); } diff --git a/ompi/mpi/c/win_fence.c b/ompi/mpi/c/win_fence.c index 03adb6874f2..4d64c79d9b8 100644 --- a/ompi/mpi/c/win_fence.c +++ b/ompi/mpi/c/win_fence.c @@ -6,16 +6,18 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2014 Los Alamos National Security, LLC. All rights * reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ #include "ompi_config.h" @@ -28,18 +30,17 @@ #include "ompi/win/win.h" #include "ompi/mca/osc/osc.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_Win_fence = PMPI_Win_fence #endif - -#if OMPI_PROFILING_DEFINES -#include "ompi/mpi/c/profile/defines.h" +#define MPI_Win_fence PMPI_Win_fence #endif static const char FUNC_NAME[] = "MPI_Win_fence"; -int MPI_Win_fence(int assert, MPI_Win win) +int MPI_Win_fence(int assert, MPI_Win win) { int rc; @@ -48,14 +49,12 @@ int MPI_Win_fence(int assert, MPI_Win win) if (ompi_win_invalid(win)) { return OMPI_ERRHANDLER_INVOKE(MPI_COMM_WORLD, MPI_ERR_WIN, FUNC_NAME); - } else if (0 != (assert & ~(MPI_MODE_NOSTORE | MPI_MODE_NOPUT | + } else if (0 != (assert & ~(MPI_MODE_NOSTORE | MPI_MODE_NOPUT | MPI_MODE_NOPRECEDE | MPI_MODE_NOSUCCEED))) { return OMPI_ERRHANDLER_INVOKE(win, MPI_ERR_ASSERT, FUNC_NAME); } } - OPAL_CR_ENTER_LIBRARY(); - rc = win->w_osc_module->osc_fence(assert, win); OMPI_ERRHANDLER_RETURN(rc, win, rc, FUNC_NAME); } diff --git a/ompi/mpi/c/win_flush.c b/ompi/mpi/c/win_flush.c index 083baa148b2..32cabc2a4c2 100644 --- a/ompi/mpi/c/win_flush.c +++ b/ompi/mpi/c/win_flush.c @@ -5,15 +5,17 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2006 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ #include "ompi_config.h" @@ -28,12 +30,11 @@ #include "ompi/win/win.h" #include "ompi/memchecker.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_Win_flush = PMPI_Win_flush #endif - -#if OMPI_PROFILING_DEFINES -#include "ompi/mpi/c/profile/defines.h" +#define MPI_Win_flush PMPI_Win_flush #endif static const char FUNC_NAME[] = "MPI_Win_flush"; @@ -41,7 +42,7 @@ static const char FUNC_NAME[] = "MPI_Win_flush"; int MPI_Win_flush(int rank, MPI_Win win) { int ret = MPI_SUCCESS; - + /* argument checking */ if (MPI_PARAM_CHECK) { OMPI_ERR_INIT_FINALIZE(FUNC_NAME); @@ -52,8 +53,6 @@ int MPI_Win_flush(int rank, MPI_Win win) OMPI_ERRHANDLER_CHECK(ret, win, ret, FUNC_NAME); } - OPAL_CR_ENTER_LIBRARY(); - /* create window and return */ ret = win->w_osc_module->osc_flush(rank, win); OMPI_ERRHANDLER_RETURN(ret, win, ret, FUNC_NAME); diff --git a/ompi/mpi/c/win_flush_all.c b/ompi/mpi/c/win_flush_all.c index a0a4039de58..914f40f1b32 100644 --- a/ompi/mpi/c/win_flush_all.c +++ b/ompi/mpi/c/win_flush_all.c @@ -5,15 +5,17 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2006 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ #include "ompi_config.h" @@ -28,12 +30,11 @@ #include "ompi/win/win.h" #include "ompi/memchecker.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_Win_flush_all = PMPI_Win_flush_all #endif - -#if OMPI_PROFILING_DEFINES -#include "ompi/mpi/c/profile/defines.h" +#define MPI_Win_flush_all PMPI_Win_flush_all #endif static const char FUNC_NAME[] = "MPI_Win_flush_all"; @@ -41,7 +42,7 @@ static const char FUNC_NAME[] = "MPI_Win_flush_all"; int MPI_Win_flush_all(MPI_Win win) { int ret = MPI_SUCCESS; - + /* argument checking */ if (MPI_PARAM_CHECK) { OMPI_ERR_INIT_FINALIZE(FUNC_NAME); @@ -52,8 +53,6 @@ int MPI_Win_flush_all(MPI_Win win) OMPI_ERRHANDLER_CHECK(ret, win, ret, FUNC_NAME); } - OPAL_CR_ENTER_LIBRARY(); - /* create window and return */ ret = win->w_osc_module->osc_flush_all(win); OMPI_ERRHANDLER_RETURN(ret, win, ret, FUNC_NAME); diff --git a/ompi/mpi/c/win_flush_local.c b/ompi/mpi/c/win_flush_local.c index 215e9be8ae0..322acd77c21 100644 --- a/ompi/mpi/c/win_flush_local.c +++ b/ompi/mpi/c/win_flush_local.c @@ -5,15 +5,17 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2006 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ #include "ompi_config.h" @@ -28,12 +30,11 @@ #include "ompi/win/win.h" #include "ompi/memchecker.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_Win_flush_local = PMPI_Win_flush_local #endif - -#if OMPI_PROFILING_DEFINES -#include "ompi/mpi/c/profile/defines.h" +#define MPI_Win_flush_local PMPI_Win_flush_local #endif static const char FUNC_NAME[] = "MPI_Win_flush_local"; @@ -41,7 +42,7 @@ static const char FUNC_NAME[] = "MPI_Win_flush_local"; int MPI_Win_flush_local(int rank, MPI_Win win) { int ret = MPI_SUCCESS; - + /* argument checking */ if (MPI_PARAM_CHECK) { OMPI_ERR_INIT_FINALIZE(FUNC_NAME); @@ -52,8 +53,6 @@ int MPI_Win_flush_local(int rank, MPI_Win win) OMPI_ERRHANDLER_CHECK(ret, win, ret, FUNC_NAME); } - OPAL_CR_ENTER_LIBRARY(); - /* create window and return */ ret = win->w_osc_module->osc_flush_local(rank, win); OMPI_ERRHANDLER_RETURN(ret, win, ret, FUNC_NAME); diff --git a/ompi/mpi/c/win_flush_local_all.c b/ompi/mpi/c/win_flush_local_all.c index 90e61dea7b6..c1260c1e298 100644 --- a/ompi/mpi/c/win_flush_local_all.c +++ b/ompi/mpi/c/win_flush_local_all.c @@ -5,15 +5,17 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2006 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ #include "ompi_config.h" @@ -28,12 +30,11 @@ #include "ompi/win/win.h" #include "ompi/memchecker.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_Win_flush_local_all = PMPI_Win_flush_local_all #endif - -#if OMPI_PROFILING_DEFINES -#include "ompi/mpi/c/profile/defines.h" +#define MPI_Win_flush_local_all PMPI_Win_flush_local_all #endif static const char FUNC_NAME[] = "MPI_Win_flush_local_all"; @@ -41,7 +42,7 @@ static const char FUNC_NAME[] = "MPI_Win_flush_local_all"; int MPI_Win_flush_local_all(MPI_Win win) { int ret = MPI_SUCCESS; - + /* argument checking */ if (MPI_PARAM_CHECK) { OMPI_ERR_INIT_FINALIZE(FUNC_NAME); @@ -52,8 +53,6 @@ int MPI_Win_flush_local_all(MPI_Win win) OMPI_ERRHANDLER_CHECK(ret, win, ret, FUNC_NAME); } - OPAL_CR_ENTER_LIBRARY(); - /* create window and return */ ret = win->w_osc_module->osc_flush_local_all(win); OMPI_ERRHANDLER_RETURN(ret, win, ret, FUNC_NAME); diff --git a/ompi/mpi/c/win_free.c b/ompi/mpi/c/win_free.c index 527d6c0a0fe..1485517ebfd 100644 --- a/ompi/mpi/c/win_free.c +++ b/ompi/mpi/c/win_free.c @@ -5,14 +5,16 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ #include "ompi_config.h" @@ -24,18 +26,17 @@ #include "ompi/errhandler/errhandler.h" #include "ompi/win/win.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_Win_free = PMPI_Win_free #endif - -#if OMPI_PROFILING_DEFINES -#include "ompi/mpi/c/profile/defines.h" +#define MPI_Win_free PMPI_Win_free #endif static const char FUNC_NAME[] = "MPI_Win_free"; -int MPI_Win_free(MPI_Win *win) +int MPI_Win_free(MPI_Win *win) { int ret; @@ -47,8 +48,6 @@ int MPI_Win_free(MPI_Win *win) } } - OPAL_CR_ENTER_LIBRARY(); - ret = ompi_win_free(*win); if (OMPI_SUCCESS == ret) { *win = MPI_WIN_NULL; diff --git a/ompi/mpi/c/win_free_keyval.c b/ompi/mpi/c/win_free_keyval.c index 56202156da5..108540f8202 100644 --- a/ompi/mpi/c/win_free_keyval.c +++ b/ompi/mpi/c/win_free_keyval.c @@ -5,14 +5,16 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ #include "ompi_config.h" @@ -24,18 +26,17 @@ #include "ompi/errhandler/errhandler.h" #include "ompi/attribute/attribute.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_Win_free_keyval = PMPI_Win_free_keyval #endif - -#if OMPI_PROFILING_DEFINES -#include "ompi/mpi/c/profile/defines.h" +#define MPI_Win_free_keyval PMPI_Win_free_keyval #endif static const char FUNC_NAME[] = "MPI_Win_free_keyval"; -int MPI_Win_free_keyval(int *win_keyval) +int MPI_Win_free_keyval(int *win_keyval) { int ret; @@ -44,13 +45,11 @@ int MPI_Win_free_keyval(int *win_keyval) if (MPI_PARAM_CHECK) { OMPI_ERR_INIT_FINALIZE(FUNC_NAME); if (NULL == win_keyval) { - return OMPI_ERRHANDLER_INVOKE(MPI_COMM_WORLD, MPI_ERR_ARG, + return OMPI_ERRHANDLER_INVOKE(MPI_COMM_WORLD, MPI_ERR_ARG, FUNC_NAME); } } - OPAL_CR_ENTER_LIBRARY(); - ret = ompi_attr_free_keyval(WIN_ATTR, win_keyval, 0); OMPI_ERRHANDLER_RETURN(ret, MPI_COMM_WORLD, MPI_ERR_OTHER, FUNC_NAME); } diff --git a/ompi/mpi/c/win_get_attr.c b/ompi/mpi/c/win_get_attr.c index 1651ab7b56f..2a096a0f367 100644 --- a/ompi/mpi/c/win_get_attr.c +++ b/ompi/mpi/c/win_get_attr.c @@ -6,16 +6,18 @@ * Copyright (c) 2004-2006 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2015 Los Alamos National Security, LLC. All rights * reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ #include "ompi_config.h" @@ -28,19 +30,18 @@ #include "ompi/attribute/attribute.h" #include "ompi/win/win.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_Win_get_attr = PMPI_Win_get_attr #endif - -#if OMPI_PROFILING_DEFINES -#include "ompi/mpi/c/profile/defines.h" +#define MPI_Win_get_attr PMPI_Win_get_attr #endif static const char FUNC_NAME[] = "MPI_Win_get_attr"; int MPI_Win_get_attr(MPI_Win win, int win_keyval, - void *attribute_val, int *flag) + void *attribute_val, int *flag) { int ret; @@ -56,14 +57,12 @@ int MPI_Win_get_attr(MPI_Win win, int win_keyval, } } - OPAL_CR_ENTER_LIBRARY(); - /* This stuff is very confusing. Be sure to see src/attribute/attribute.c for a lengthy comment explaining Open MPI attribute behavior. */ - ret = ompi_attr_get_c(win->w_keyhash, win_keyval, + ret = ompi_attr_get_c(win->w_keyhash, win_keyval, (void**)attribute_val, flag); - OMPI_ERRHANDLER_RETURN(ret, win, MPI_ERR_OTHER, FUNC_NAME); + OMPI_ERRHANDLER_RETURN(ret, win, MPI_ERR_OTHER, FUNC_NAME); } diff --git a/ompi/mpi/c/win_get_errhandler.c b/ompi/mpi/c/win_get_errhandler.c index 5ce1aa862d1..49b2b02dd67 100644 --- a/ompi/mpi/c/win_get_errhandler.c +++ b/ompi/mpi/c/win_get_errhandler.c @@ -1,3 +1,4 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ /* * Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana * University Research and Technology @@ -5,15 +6,19 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2008-2009 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. + * Copyright (c) 2016 Los Alamos National Security, LLC. All rights + * reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ #include "ompi_config.h" @@ -25,23 +30,20 @@ #include "ompi/errhandler/errhandler.h" #include "ompi/win/win.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_Win_get_errhandler = PMPI_Win_get_errhandler #endif - -#if OMPI_PROFILING_DEFINES -#include "ompi/mpi/c/profile/defines.h" +#define MPI_Win_get_errhandler PMPI_Win_get_errhandler #endif static const char FUNC_NAME[] = "MPI_Win_get_errhandler"; -int MPI_Win_get_errhandler(MPI_Win win, MPI_Errhandler *errhandler) +int MPI_Win_get_errhandler(MPI_Win win, MPI_Errhandler *errhandler) { MPI_Errhandler tmp; - OPAL_CR_NOOP_PROGRESS(); - if (MPI_PARAM_CHECK) { OMPI_ERR_INIT_FINALIZE(FUNC_NAME); if (ompi_win_invalid(win)) { @@ -57,7 +59,7 @@ int MPI_Win_get_errhandler(MPI_Win win, MPI_Errhandler *errhandler) error_handler became atomic. */ do { tmp = win->error_handler; - } while (!OPAL_ATOMIC_CMPSET(&(win->error_handler), tmp, tmp)); + } while (!OPAL_ATOMIC_CMPSET_PTR(&(win->error_handler), tmp, tmp)); /* Retain the errhandler, corresponding to object refcount decrease in errhandler_free.c. */ diff --git a/ompi/mpi/c/win_get_group.c b/ompi/mpi/c/win_get_group.c index 9edefb30327..523e4d1da3b 100644 --- a/ompi/mpi/c/win_get_group.c +++ b/ompi/mpi/c/win_get_group.c @@ -5,14 +5,16 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ #include "ompi_config.h" @@ -24,18 +26,17 @@ #include "ompi/errhandler/errhandler.h" #include "ompi/win/win.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_Win_get_group = PMPI_Win_get_group #endif - -#if OMPI_PROFILING_DEFINES -#include "ompi/mpi/c/profile/defines.h" +#define MPI_Win_get_group PMPI_Win_get_group #endif static const char FUNC_NAME[] = "MPI_Win_get_group"; -int MPI_Win_get_group(MPI_Win win, MPI_Group *group) +int MPI_Win_get_group(MPI_Win win, MPI_Group *group) { int ret; @@ -49,8 +50,6 @@ int MPI_Win_get_group(MPI_Win win, MPI_Group *group) } } - OPAL_CR_ENTER_LIBRARY(); - ret = ompi_win_group(win, (ompi_group_t**) group); OMPI_ERRHANDLER_RETURN(ret, win, ret, FUNC_NAME); } diff --git a/ompi/mpi/c/win_get_info.c b/ompi/mpi/c/win_get_info.c index ff9f143d90c..f78fbc48871 100644 --- a/ompi/mpi/c/win_get_info.c +++ b/ompi/mpi/c/win_get_info.c @@ -3,10 +3,12 @@ * Copyright (c) 2013 Sandia National Laboratories. All rights reserved. * Copyright (c) 2015 Los Alamos National Security, LLC. All rights * reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -17,16 +19,19 @@ #include "ompi/errhandler/errhandler.h" #include "ompi/win/win.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_Win_get_info = PMPI_Win_get_info #endif - -#if OMPI_PROFILING_DEFINES -#include "ompi/mpi/c/profile/defines.h" +#define MPI_Win_get_info PMPI_Win_get_info #endif static const char FUNC_NAME[] = "MPI_Win_get_info"; +static void _win_info_set (ompi_info_t *info, const char *key, int set) +{ + ompi_info_set (info, key, set ? "true" : "false"); +} int MPI_Win_get_info(MPI_Win win, MPI_Info *info_used) { @@ -44,17 +49,15 @@ int MPI_Win_get_info(MPI_Win win, MPI_Info *info_used) } } - OPAL_CR_ENTER_LIBRARY(); - ret = win->w_osc_module->osc_get_info(win, info_used); if (OMPI_SUCCESS == ret && *info_used) { /* set standard info keys based on what the OSC module is using */ - if (win->w_flags & OMPI_WIN_NO_LOCKS) { - ompi_info_set (*info_used, "no_locks", "true"); - } else { - ompi_info_set (*info_used, "no_locks", "false"); - } + + _win_info_set (*info_used, "no_locks", win->w_flags & OMPI_WIN_NO_LOCKS); + _win_info_set (*info_used, "same_size", win->w_flags & OMPI_WIN_SAME_SIZE); + _win_info_set (*info_used, "same_disp_unit", win->w_flags & OMPI_WIN_SAME_DISP); + ompi_info_set_value_enum (*info_used, "accumulate_ops", win->w_acc_ops, ompi_win_accumulate_ops); } OMPI_ERRHANDLER_RETURN(ret, win, ret, FUNC_NAME); diff --git a/ompi/mpi/c/win_get_name.c b/ompi/mpi/c/win_get_name.c index b25fcc5b358..085ac2bbfaa 100644 --- a/ompi/mpi/c/win_get_name.c +++ b/ompi/mpi/c/win_get_name.c @@ -5,15 +5,17 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2008 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ #include "ompi_config.h" @@ -25,18 +27,17 @@ #include "ompi/errhandler/errhandler.h" #include "ompi/win/win.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_Win_get_name = PMPI_Win_get_name #endif - -#if OMPI_PROFILING_DEFINES -#include "ompi/mpi/c/profile/defines.h" +#define MPI_Win_get_name PMPI_Win_get_name #endif static const char FUNC_NAME[] = "MPI_Win_get_name"; -int MPI_Win_get_name(MPI_Win win, char *win_name, int *resultlen) +int MPI_Win_get_name(MPI_Win win, char *win_name, int *resultlen) { int ret; @@ -50,8 +51,6 @@ int MPI_Win_get_name(MPI_Win win, char *win_name, int *resultlen) } } - OPAL_CR_ENTER_LIBRARY(); - /* Note that MPI-2.1 requires: - terminating the string with a \0 - name[*resultlen] == '\0' diff --git a/ompi/mpi/c/win_lock.c b/ompi/mpi/c/win_lock.c index ae10000e317..f27e017cfcb 100644 --- a/ompi/mpi/c/win_lock.c +++ b/ompi/mpi/c/win_lock.c @@ -6,16 +6,18 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2014 Los Alamos National Security, LLC. All rights * reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ #include "ompi_config.h" @@ -28,18 +30,17 @@ #include "ompi/win/win.h" #include "ompi/mca/osc/osc.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_Win_lock = PMPI_Win_lock #endif - -#if OMPI_PROFILING_DEFINES -#include "ompi/mpi/c/profile/defines.h" +#define MPI_Win_lock PMPI_Win_lock #endif static const char FUNC_NAME[] = "MPI_Win_lock"; -int MPI_Win_lock(int lock_type, int rank, int assert, MPI_Win win) +int MPI_Win_lock(int lock_type, int rank, int assert, MPI_Win win) { int rc; @@ -48,7 +49,7 @@ int MPI_Win_lock(int lock_type, int rank, int assert, MPI_Win win) if (ompi_win_invalid(win)) { return OMPI_ERRHANDLER_INVOKE(MPI_COMM_WORLD, MPI_ERR_WIN, FUNC_NAME); - } else if (lock_type != MPI_LOCK_EXCLUSIVE && + } else if (lock_type != MPI_LOCK_EXCLUSIVE && lock_type != MPI_LOCK_SHARED) { return OMPI_ERRHANDLER_INVOKE(win, MPI_ERR_LOCKTYPE, FUNC_NAME); } else if (ompi_win_peer_invalid(win, rank)) { @@ -63,8 +64,6 @@ int MPI_Win_lock(int lock_type, int rank, int assert, MPI_Win win) /* NTH: do not bother keeping track of locking MPI_PROC_NULL. */ if (MPI_PROC_NULL == rank) return MPI_SUCCESS; - OPAL_CR_ENTER_LIBRARY(); - rc = win->w_osc_module->osc_lock(lock_type, rank, assert, win); OMPI_ERRHANDLER_RETURN(rc, win, rc, FUNC_NAME); } diff --git a/ompi/mpi/c/win_lock_all.c b/ompi/mpi/c/win_lock_all.c index 828316d5d13..62443d30008 100644 --- a/ompi/mpi/c/win_lock_all.c +++ b/ompi/mpi/c/win_lock_all.c @@ -5,14 +5,16 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ #include "ompi_config.h" @@ -25,18 +27,17 @@ #include "ompi/win/win.h" #include "ompi/mca/osc/osc.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_Win_lock_all = PMPI_Win_lock_all #endif - -#if OMPI_PROFILING_DEFINES -#include "ompi/mpi/c/profile/defines.h" +#define MPI_Win_lock_all PMPI_Win_lock_all #endif static const char FUNC_NAME[] = "MPI_Win_lock_all"; -int MPI_Win_lock_all(int assert, MPI_Win win) +int MPI_Win_lock_all(int assert, MPI_Win win) { int rc; @@ -52,8 +53,6 @@ int MPI_Win_lock_all(int assert, MPI_Win win) } } - OPAL_CR_ENTER_LIBRARY(); - rc = win->w_osc_module->osc_lock_all(assert, win); OMPI_ERRHANDLER_RETURN(rc, win, rc, FUNC_NAME); } diff --git a/ompi/mpi/c/win_post.c b/ompi/mpi/c/win_post.c index a1de2ec18c9..8c7ffe479d8 100644 --- a/ompi/mpi/c/win_post.c +++ b/ompi/mpi/c/win_post.c @@ -5,14 +5,16 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ #include "ompi_config.h" @@ -25,18 +27,17 @@ #include "ompi/win/win.h" #include "ompi/mca/osc/osc.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_Win_post = PMPI_Win_post #endif - -#if OMPI_PROFILING_DEFINES -#include "ompi/mpi/c/profile/defines.h" +#define MPI_Win_post PMPI_Win_post #endif static const char FUNC_NAME[] = "MPI_Win_post"; -int MPI_Win_post(MPI_Group group, int assert, MPI_Win win) +int MPI_Win_post(MPI_Group group, int assert, MPI_Win win) { int rc; @@ -45,14 +46,12 @@ int MPI_Win_post(MPI_Group group, int assert, MPI_Win win) if (ompi_win_invalid(win)) { return OMPI_ERRHANDLER_INVOKE(MPI_COMM_WORLD, MPI_ERR_WIN, FUNC_NAME); - } else if (0 != (assert & ~(MPI_MODE_NOCHECK | MPI_MODE_NOSTORE | + } else if (0 != (assert & ~(MPI_MODE_NOCHECK | MPI_MODE_NOSTORE | MPI_MODE_NOPUT))) { return OMPI_ERRHANDLER_INVOKE(win, MPI_ERR_ASSERT, FUNC_NAME); } } - OPAL_CR_ENTER_LIBRARY(); - rc = win->w_osc_module->osc_post(group, assert, win); OMPI_ERRHANDLER_RETURN(rc, win, rc, FUNC_NAME); } diff --git a/ompi/mpi/c/win_set_attr.c b/ompi/mpi/c/win_set_attr.c index f206ae7e105..9f5f434ea96 100644 --- a/ompi/mpi/c/win_set_attr.c +++ b/ompi/mpi/c/win_set_attr.c @@ -5,14 +5,16 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ #include "ompi_config.h" @@ -25,18 +27,17 @@ #include "ompi/attribute/attribute.h" #include "ompi/win/win.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_Win_set_attr = PMPI_Win_set_attr #endif - -#if OMPI_PROFILING_DEFINES -#include "ompi/mpi/c/profile/defines.h" +#define MPI_Win_set_attr PMPI_Win_set_attr #endif static const char FUNC_NAME[] = "MPI_Win_set_attr"; -int MPI_Win_set_attr(MPI_Win win, int win_keyval, void *attribute_val) +int MPI_Win_set_attr(MPI_Win win, int win_keyval, void *attribute_val) { int ret; @@ -48,9 +49,7 @@ int MPI_Win_set_attr(MPI_Win win, int win_keyval, void *attribute_val) } } - OPAL_CR_ENTER_LIBRARY(); - - ret = ompi_attr_set_c(WIN_ATTR, win, &win->w_keyhash, + ret = ompi_attr_set_c(WIN_ATTR, win, &win->w_keyhash, win_keyval, attribute_val, false); - OMPI_ERRHANDLER_RETURN(ret, win, MPI_ERR_OTHER, FUNC_NAME); + OMPI_ERRHANDLER_RETURN(ret, win, MPI_ERR_OTHER, FUNC_NAME); } diff --git a/ompi/mpi/c/win_set_errhandler.c b/ompi/mpi/c/win_set_errhandler.c index 8adbb8c433f..c30d4e86108 100644 --- a/ompi/mpi/c/win_set_errhandler.c +++ b/ompi/mpi/c/win_set_errhandler.c @@ -1,3 +1,4 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ /* * Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana * University Research and Technology @@ -5,15 +6,19 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2008-2009 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. + * Copyright (c) 2016 Los Alamos National Security, LLC. All rights + * reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ #include "ompi_config.h" @@ -25,23 +30,20 @@ #include "ompi/errhandler/errhandler.h" #include "ompi/win/win.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_Win_set_errhandler = PMPI_Win_set_errhandler #endif - -#if OMPI_PROFILING_DEFINES -#include "ompi/mpi/c/profile/defines.h" +#define MPI_Win_set_errhandler PMPI_Win_set_errhandler #endif static const char FUNC_NAME[] = "MPI_Win_set_errhandler"; -int MPI_Win_set_errhandler(MPI_Win win, MPI_Errhandler errhandler) +int MPI_Win_set_errhandler(MPI_Win win, MPI_Errhandler errhandler) { MPI_Errhandler tmp; - OPAL_CR_NOOP_PROGRESS(); - if (MPI_PARAM_CHECK) { OMPI_ERR_INIT_FINALIZE(FUNC_NAME); @@ -50,7 +52,7 @@ int MPI_Win_set_errhandler(MPI_Win win, MPI_Errhandler errhandler) FUNC_NAME); } else if (NULL == errhandler || MPI_ERRHANDLER_NULL == errhandler || - (OMPI_ERRHANDLER_TYPE_WIN != errhandler->eh_mpi_object_type && + (OMPI_ERRHANDLER_TYPE_WIN != errhandler->eh_mpi_object_type && OMPI_ERRHANDLER_TYPE_PREDEFINED != errhandler->eh_mpi_object_type) ) { return OMPI_ERRHANDLER_INVOKE(win, MPI_ERR_ARG, FUNC_NAME); } @@ -62,9 +64,7 @@ int MPI_Win_set_errhandler(MPI_Win win, MPI_Errhandler errhandler) /* Ditch the old errhandler, and decrement its refcount. On 64 bits environments we have to make sure the reading of the error_handler became atomic. */ - do { - tmp = win->error_handler; - } while (!OPAL_ATOMIC_CMPSET(&(win->error_handler), tmp, errhandler)); + tmp = OPAL_ATOMIC_SWAP_PTR(&win->error_handler, errhandler); OBJ_RELEASE(tmp); /* All done */ diff --git a/ompi/mpi/c/win_set_info.c b/ompi/mpi/c/win_set_info.c index f96b370c5c2..522fca477dc 100644 --- a/ompi/mpi/c/win_set_info.c +++ b/ompi/mpi/c/win_set_info.c @@ -1,9 +1,11 @@ /* * Copyright (c) 2013 Sandia National Laboratories. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -14,12 +16,11 @@ #include "ompi/errhandler/errhandler.h" #include "ompi/win/win.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_Win_set_info = PMPI_Win_set_info #endif - -#if OMPI_PROFILING_DEFINES -#include "ompi/mpi/c/profile/defines.h" +#define MPI_Win_set_info PMPI_Win_set_info #endif static const char FUNC_NAME[] = "MPI_Win_set_info"; @@ -42,8 +43,6 @@ int MPI_Win_set_info(MPI_Win win, MPI_Info info) } } - OPAL_CR_ENTER_LIBRARY(); - ret = win->w_osc_module->osc_set_info(win, info); OMPI_ERRHANDLER_RETURN(ret, win, ret, FUNC_NAME); } diff --git a/ompi/mpi/c/win_set_name.c b/ompi/mpi/c/win_set_name.c index 35b61de2246..8671480fb9b 100644 --- a/ompi/mpi/c/win_set_name.c +++ b/ompi/mpi/c/win_set_name.c @@ -6,17 +6,19 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2006 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2013 Los Alamos National Security, LLC. All rights * reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ #include "ompi_config.h" @@ -28,12 +30,11 @@ #include "ompi/errhandler/errhandler.h" #include "ompi/win/win.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_Win_set_name = PMPI_Win_set_name #endif - -#if OMPI_PROFILING_DEFINES -#include "ompi/mpi/c/profile/defines.h" +#define MPI_Win_set_name PMPI_Win_set_name #endif static const char FUNC_NAME[] = "MPI_Win_set_name"; @@ -53,8 +54,6 @@ int MPI_Win_set_name(MPI_Win win, const char *win_name) } } - OPAL_CR_ENTER_LIBRARY(); - ret = ompi_win_set_name(win, win_name); OMPI_ERRHANDLER_RETURN(ret, win, ret, FUNC_NAME); } diff --git a/ompi/mpi/c/win_shared_query.c b/ompi/mpi/c/win_shared_query.c index 565e09bc969..9afdb91b818 100644 --- a/ompi/mpi/c/win_shared_query.c +++ b/ompi/mpi/c/win_shared_query.c @@ -1,9 +1,11 @@ /* * Copyright (c) 2012-2013 Sandia National Laboratories. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -17,12 +19,11 @@ #include "ompi/win/win.h" #include "ompi/mca/osc/osc.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_Win_shared_query = PMPI_Win_shared_query #endif - -#if OMPI_PROFILING_DEFINES -#include "ompi/mpi/c/profile/defines.h" +#define MPI_Win_shared_query PMPI_Win_shared_query #endif static const char FUNC_NAME[] = "MPI_Win_shared_query"; @@ -43,8 +44,6 @@ int MPI_Win_shared_query(MPI_Win win, int rank, MPI_Aint *size, int *disp_unit, } } - OPAL_CR_ENTER_LIBRARY(); - if (NULL != win->w_osc_module->osc_win_shared_query) { rc = win->w_osc_module->osc_win_shared_query(win, rank, &tsize, disp_unit, baseptr); *size = tsize; diff --git a/ompi/mpi/c/win_start.c b/ompi/mpi/c/win_start.c index f0e7a9f44b5..a2ddaf8b167 100644 --- a/ompi/mpi/c/win_start.c +++ b/ompi/mpi/c/win_start.c @@ -5,14 +5,16 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ #include "ompi_config.h" @@ -25,18 +27,17 @@ #include "ompi/win/win.h" #include "ompi/mca/osc/osc.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_Win_start = PMPI_Win_start #endif - -#if OMPI_PROFILING_DEFINES -#include "ompi/mpi/c/profile/defines.h" +#define MPI_Win_start PMPI_Win_start #endif static const char FUNC_NAME[] = "MPI_Win_start"; -int MPI_Win_start(MPI_Group group, int assert, MPI_Win win) +int MPI_Win_start(MPI_Group group, int assert, MPI_Win win) { int rc; @@ -50,8 +51,6 @@ int MPI_Win_start(MPI_Group group, int assert, MPI_Win win) } } - OPAL_CR_ENTER_LIBRARY(); - rc = win->w_osc_module->osc_start(group, assert, win); OMPI_ERRHANDLER_RETURN(rc, win, rc, FUNC_NAME); } diff --git a/ompi/mpi/c/win_sync.c b/ompi/mpi/c/win_sync.c index 90e3fba93d5..954d9c71fca 100644 --- a/ompi/mpi/c/win_sync.c +++ b/ompi/mpi/c/win_sync.c @@ -5,15 +5,17 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2006 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ #include "ompi_config.h" @@ -28,12 +30,11 @@ #include "ompi/win/win.h" #include "ompi/memchecker.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_Win_sync = PMPI_Win_sync #endif - -#if OMPI_PROFILING_DEFINES -#include "ompi/mpi/c/profile/defines.h" +#define MPI_Win_sync PMPI_Win_sync #endif static const char FUNC_NAME[] = "MPI_Win_sync"; @@ -41,7 +42,7 @@ static const char FUNC_NAME[] = "MPI_Win_sync"; int MPI_Win_sync(MPI_Win win) { int ret = MPI_SUCCESS; - + /* argument checking */ if (MPI_PARAM_CHECK) { OMPI_ERR_INIT_FINALIZE(FUNC_NAME); @@ -51,8 +52,6 @@ int MPI_Win_sync(MPI_Win win) } } - OPAL_CR_ENTER_LIBRARY(); - ret = win->w_osc_module->osc_sync(win); OMPI_ERRHANDLER_RETURN(ret, win, ret, FUNC_NAME); } diff --git a/ompi/mpi/c/win_test.c b/ompi/mpi/c/win_test.c index 1a168ad4996..f295cfeb6a3 100644 --- a/ompi/mpi/c/win_test.c +++ b/ompi/mpi/c/win_test.c @@ -5,14 +5,16 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ #include "ompi_config.h" @@ -25,18 +27,17 @@ #include "ompi/win/win.h" #include "ompi/mca/osc/osc.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_Win_test = PMPI_Win_test #endif - -#if OMPI_PROFILING_DEFINES -#include "ompi/mpi/c/profile/defines.h" +#define MPI_Win_test PMPI_Win_test #endif static const char FUNC_NAME[] = "MPI_Win_test"; -int MPI_Win_test(MPI_Win win, int *flag) +int MPI_Win_test(MPI_Win win, int *flag) { int rc; @@ -48,8 +49,6 @@ int MPI_Win_test(MPI_Win win, int *flag) } } - OPAL_CR_ENTER_LIBRARY(); - rc = win->w_osc_module->osc_test(win, flag); OMPI_ERRHANDLER_RETURN(rc, win, rc, FUNC_NAME); } diff --git a/ompi/mpi/c/win_unlock.c b/ompi/mpi/c/win_unlock.c index ea1bd2bac5a..a9d20f8b4ea 100644 --- a/ompi/mpi/c/win_unlock.c +++ b/ompi/mpi/c/win_unlock.c @@ -6,16 +6,18 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2014 Los Alamos National Security, LLC. All rights * reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ #include "ompi_config.h" @@ -28,18 +30,17 @@ #include "ompi/win/win.h" #include "ompi/mca/osc/osc.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_Win_unlock = PMPI_Win_unlock #endif - -#if OMPI_PROFILING_DEFINES -#include "ompi/mpi/c/profile/defines.h" +#define MPI_Win_unlock PMPI_Win_unlock #endif static const char FUNC_NAME[] = "MPI_Win_unlock"; -int MPI_Win_unlock(int rank, MPI_Win win) +int MPI_Win_unlock(int rank, MPI_Win win) { int rc; @@ -56,8 +57,6 @@ int MPI_Win_unlock(int rank, MPI_Win win) /* NTH: do not bother keeping track of unlocking MPI_PROC_NULL. */ if (MPI_PROC_NULL == rank) return MPI_SUCCESS; - OPAL_CR_ENTER_LIBRARY(); - rc = win->w_osc_module->osc_unlock(rank, win); OMPI_ERRHANDLER_RETURN(rc, win, rc, FUNC_NAME); } diff --git a/ompi/mpi/c/win_unlock_all.c b/ompi/mpi/c/win_unlock_all.c index a5c85a64bd6..5c8f412139c 100644 --- a/ompi/mpi/c/win_unlock_all.c +++ b/ompi/mpi/c/win_unlock_all.c @@ -5,14 +5,16 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ #include "ompi_config.h" @@ -25,18 +27,17 @@ #include "ompi/win/win.h" #include "ompi/mca/osc/osc.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_Win_unlock_all = PMPI_Win_unlock_all #endif - -#if OMPI_PROFILING_DEFINES -#include "ompi/mpi/c/profile/defines.h" +#define MPI_Win_unlock_all PMPI_Win_unlock_all #endif static const char FUNC_NAME[] = "MPI_Win_unlock_all"; -int MPI_Win_unlock_all(MPI_Win win) +int MPI_Win_unlock_all(MPI_Win win) { int rc; @@ -48,8 +49,6 @@ int MPI_Win_unlock_all(MPI_Win win) } } - OPAL_CR_ENTER_LIBRARY(); - rc = win->w_osc_module->osc_unlock_all(win); OMPI_ERRHANDLER_RETURN(rc, win, rc, FUNC_NAME); } diff --git a/ompi/mpi/c/win_wait.c b/ompi/mpi/c/win_wait.c index 3ee9f5bb368..88ccbaff517 100644 --- a/ompi/mpi/c/win_wait.c +++ b/ompi/mpi/c/win_wait.c @@ -5,14 +5,16 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ #include "ompi_config.h" @@ -25,18 +27,17 @@ #include "ompi/win/win.h" #include "ompi/mca/osc/osc.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_Win_wait = PMPI_Win_wait #endif - -#if OMPI_PROFILING_DEFINES -#include "ompi/mpi/c/profile/defines.h" +#define MPI_Win_wait PMPI_Win_wait #endif static const char FUNC_NAME[] = "MPI_Win_wait"; -int MPI_Win_wait(MPI_Win win) +int MPI_Win_wait(MPI_Win win) { int rc; @@ -48,8 +49,6 @@ int MPI_Win_wait(MPI_Win win) } } - OPAL_CR_ENTER_LIBRARY(); - rc = win->w_osc_module->osc_wait(win); OMPI_ERRHANDLER_RETURN(rc, win, rc, FUNC_NAME); } diff --git a/ompi/mpi/c/wtick.c b/ompi/mpi/c/wtick.c index 8088888e5ff..f6504dccee2 100644 --- a/ompi/mpi/c/wtick.c +++ b/ompi/mpi/c/wtick.c @@ -5,15 +5,20 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2007-2014 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015-2016 Research Organization for Information Science + * and Technology (RIST). All rights reserved. + * Copyright (c) 2017 IBM Corporation. All rights reserved. + * Copyright (c) 2017 Los Alamos National Security, LLC. All rights + * reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ #include "ompi_config.h" @@ -22,30 +27,55 @@ #include #endif #include +#ifdef HAVE_TIME_H +#include +#endif #include MCA_timer_IMPLEMENTATION_HEADER #include "ompi/mpi/c/bindings.h" #include "ompi/runtime/mpiruntime.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_Wtick = PMPI_Wtick #endif - -#if OMPI_PROFILING_DEFINES -#include "ompi/mpi/c/profile/defines.h" +#define MPI_Wtick PMPI_Wtick #endif - double MPI_Wtick(void) { - OPAL_CR_NOOP_PROGRESS(); - + /* + * See https://github.com/open-mpi/ompi/issues/3003 + * to get an idea what's going on here. + */ +#if 0 #if OPAL_TIMER_CYCLE_NATIVE - return opal_timer_base_get_freq(); + { + opal_timer_t freq = opal_timer_base_get_freq(); + if (0 == freq) { + /* That should never happen, but if it does, return a bogus value + * rather than crashing with a division by zero */ + return (double)0.0; + } + return (double)1.0 / (double)freq; + } #elif OPAL_TIMER_USEC_NATIVE return 0.000001; +#endif +#else +#if defined(__linux__) && OPAL_HAVE_CLOCK_GETTIME + struct timespec spec; + double wtick = 0.0; + if (0 == clock_getres(CLOCK_MONOTONIC, &spec)){ + wtick = spec.tv_sec + spec.tv_nsec * 1.0e-09; + } else { + /* guess */ + wtick = 1.0e-09; + } + return wtick; #else /* Otherwise, we already return usec precision. */ return 0.000001; #endif +#endif } diff --git a/ompi/mpi/c/wtime.c b/ompi/mpi/c/wtime.c index c7309ddb0ab..73b803c2322 100644 --- a/ompi/mpi/c/wtime.c +++ b/ompi/mpi/c/wtime.c @@ -5,15 +5,20 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2006-2014 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. + * Copyright (c) 2017 IBM Corporation. All rights reserved. + * Copyright (c) 2017 Los Alamos National Security, LLC. All rights + * reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ #include "ompi_config.h" @@ -22,28 +27,41 @@ #include #endif #include +#ifdef HAVE_TIME_H +#include +#endif /* HAVE_TIME_H */ #include MCA_timer_IMPLEMENTATION_HEADER #include "ompi/mpi/c/bindings.h" #include "ompi/runtime/mpiruntime.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_Wtime = PMPI_Wtime #endif - -#if OMPI_PROFILING_DEFINES -#include "ompi/mpi/c/profile/defines.h" +#define MPI_Wtime PMPI_Wtime #endif - double MPI_Wtime(void) { double wtime; + /* + * See https://github.com/open-mpi/ompi/issues/3003 to find out + * what's happening here. + */ +#if 0 #if OPAL_TIMER_CYCLE_NATIVE wtime = ((double) opal_timer_base_get_cycles()) / opal_timer_base_get_freq(); #elif OPAL_TIMER_USEC_NATIVE wtime = ((double) opal_timer_base_get_usec()) / 1000000.0; +#endif +#else +#if defined(__linux__) && OPAL_HAVE_CLOCK_GETTIME + struct timespec tp = {.tv_sec = 0, .tv_nsec = 0}; + (void) clock_gettime(CLOCK_MONOTONIC, &tp); + wtime = tp.tv_sec; + wtime += tp.tv_nsec/1.0e+9; #else /* Fall back to gettimeofday() if we have nothing else */ struct timeval tv; @@ -51,8 +69,7 @@ double MPI_Wtime(void) wtime = tv.tv_sec; wtime += (double)tv.tv_usec / 1000000.0; #endif - - OPAL_CR_NOOP_PROGRESS(); +#endif return wtime; } diff --git a/ompi/mpi/cxx/Makefile.am b/ompi/mpi/cxx/Makefile.am index fec0d2aafb6..25e65d36c1f 100644 --- a/ompi/mpi/cxx/Makefile.am +++ b/ompi/mpi/cxx/Makefile.am @@ -6,15 +6,16 @@ # Copyright (c) 2004-2005 The University of Tennessee and The University # of Tennessee Research Foundation. All rights # reserved. -# Copyright (c) 2004-2009 High Performance Computing Center Stuttgart, +# Copyright (c) 2004-2009 High Performance Computing Center Stuttgart, # University of Stuttgart. All rights reserved. # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. # Copyright (c) 2007-2012 Cisco Systems, Inc. All rights reserved. +# Copyright (c) 2016 IBM Corporation. All rights reserved. # $COPYRIGHT$ -# +# # Additional copyrights may follow -# +# # $HEADER$ # @@ -27,23 +28,24 @@ AM_CPPFLAGS = -DOMPI_BUILDING_CXX_BINDINGS_LIBRARY=1 -DOMPI_SKIP_MPICXX=1 if BUILD_MPI_CXX_BINDINGS -mpi_lib = libmpi_cxx.la +mpi_lib = lib@OMPI_LIBMPI_NAME@_cxx.la -lib_LTLIBRARIES = libmpi_cxx.la -libmpi_cxx_la_SOURCES = \ +lib_LTLIBRARIES = lib@OMPI_LIBMPI_NAME@_cxx.la +lib@OMPI_LIBMPI_NAME@_cxx_la_SOURCES = \ mpicxx.cc \ intercepts.cc \ comm.cc \ datatype.cc \ - win.cc + win.cc \ + cxx_glue.c if OMPI_PROVIDE_MPI_FILE_INTERFACE -libmpi_cxx_la_SOURCES += \ +lib@OMPI_LIBMPI_NAME@_cxx_la_SOURCES += \ file.cc endif -libmpi_cxx_la_LIBADD = $(top_builddir)/ompi/libmpi.la -libmpi_cxx_la_LDFLAGS = -version-info $(libmpi_cxx_so_version) +lib@OMPI_LIBMPI_NAME@_cxx_la_LIBADD = $(top_builddir)/ompi/lib@OMPI_LIBMPI_NAME@.la +lib@OMPI_LIBMPI_NAME@_cxx_la_LDFLAGS = -version-info $(libmpi_cxx_so_version) headers = \ mpicxx.h \ @@ -76,7 +78,8 @@ headers = \ group_inln.h \ op_inln.h \ errhandler_inln.h \ - status_inln.h + status_inln.h \ + cxx_glue.h ompidir = $(ompiincludedir)/ompi/mpi/cxx ompi_HEADERS = \ diff --git a/ompi/mpi/cxx/comm.cc b/ompi/mpi/cxx/comm.cc index b8f52218eb9..d5c58adf07e 100644 --- a/ompi/mpi/cxx/comm.cc +++ b/ompi/mpi/cxx/comm.cc @@ -1,35 +1,30 @@ // -*- c++ -*- -// +// // Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana // University Research and Technology // Corporation. All rights reserved. // Copyright (c) 2004-2005 The University of Tennessee and The University // of Tennessee Research Foundation. All rights // reserved. -// Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +// Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, // University of Stuttgart. All rights reserved. // Copyright (c) 2004-2005 The Regents of the University of California. // All rights reserved. // Copyright (c) 2007-2008 Cisco Systems, Inc. All rights reserved. +// Copyright (c) 2016 Los Alamos National Security, LLC. All rights +// reserved. // $COPYRIGHT$ -// +// // Additional copyrights may follow -// +// // $HEADER$ // // do not include ompi_config.h because it kills the free/malloc defines #include "mpi.h" +#include "ompi/constants.h" #include "ompi/mpi/cxx/mpicxx.h" - -#ifdef HAVE_SCHED_H -#include -#endif - - -#include "ompi/communicator/communicator.h" -#include "ompi/attribute/attribute.h" -#include "ompi/errhandler/errhandler.h" +#include "cxx_glue.h" // @@ -48,7 +43,7 @@ MPI::Comm::Comm() } // copy -MPI::Comm::Comm(const Comm_Null& data) : Comm_Null(data) +MPI::Comm::Comm(const Comm_Null& data) : Comm_Null(data) { } @@ -56,14 +51,7 @@ MPI::Comm::Comm(const Comm_Null& data) : Comm_Null(data) MPI::Errhandler MPI::Comm::Create_errhandler(MPI::Comm::_MPI2CPP_ERRHANDLERFN_* function) { - MPI_Errhandler c_errhandler = - ompi_errhandler_create(OMPI_ERRHANDLER_TYPE_COMM, - (ompi_errhandler_generic_handler_fn_t*) function, - OMPI_ERRHANDLER_LANG_CXX); - c_errhandler->eh_cxx_dispatch_fn = - (ompi_errhandler_cxx_dispatch_fn_t*) - ompi_mpi_cxx_comm_errhandler_invoke; - return c_errhandler; + return ompi_cxx_errhandler_create_comm ((ompi_cxx_dummy_fn_t *) function); } @@ -77,20 +65,15 @@ MPI::Comm::do_create_keyval(MPI_Comm_copy_attr_function* c_copy_fn, void* extra_state, int &keyval) { int ret, count = 0; - ompi_attribute_fn_ptr_union_t copy_fn; - ompi_attribute_fn_ptr_union_t delete_fn; keyval_intercept_data_t *cxx_extra_state; // If both the callbacks are C, then do the simple thing -- no // need for all the C++ machinery. if (NULL != c_copy_fn && NULL != c_delete_fn) { - copy_fn.attr_communicator_copy_fn = - (MPI_Comm_internal_copy_attr_function*) c_copy_fn; - delete_fn.attr_communicator_delete_fn = c_delete_fn; - ret = ompi_attr_create_keyval(COMM_ATTR, copy_fn, delete_fn, - &keyval, extra_state, 0, NULL); + ret = ompi_cxx_attr_create_keyval_comm (c_copy_fn, c_delete_fn, &keyval, + extra_state, 0, NULL); if (MPI_SUCCESS != ret) { - return OMPI_ERRHANDLER_INVOKE(MPI_COMM_WORLD, ret, + return ompi_cxx_errhandler_invoke_comm(MPI_COMM_WORLD, ret, "MPI::Comm::Create_keyval"); } } @@ -104,11 +87,11 @@ MPI::Comm::do_create_keyval(MPI_Comm_copy_attr_function* c_copy_fn, // extra_state into the user's original extra_state). Ensure to // malloc() the struct here (vs new) so that it can be free()'ed // by the C attribute base. - cxx_extra_state = (keyval_intercept_data_t*) + cxx_extra_state = (keyval_intercept_data_t*) malloc(sizeof(keyval_intercept_data_t)); if (NULL == cxx_extra_state) { - return OMPI_ERRHANDLER_INVOKE(MPI_COMM_WORLD, MPI_ERR_NO_MEM, - "MPI::Comm::Create_keyval"); + return ompi_cxx_errhandler_invoke_comm (MPI_COMM_WORLD, MPI_ERR_NO_MEM, + "MPI::Comm::Create_keyval"); } cxx_extra_state->c_copy_fn = c_copy_fn; cxx_extra_state->cxx_copy_fn = cxx_copy_fn; @@ -131,26 +114,20 @@ MPI::Comm::do_create_keyval(MPI_Comm_copy_attr_function* c_copy_fn, } if (2 != count) { free(cxx_extra_state); - return OMPI_ERRHANDLER_INVOKE(MPI_COMM_WORLD, MPI_ERR_ARG, - "MPI::Comm::Create_keyval"); + return ompi_cxx_errhandler_invoke_comm (MPI_COMM_WORLD, MPI_ERR_ARG, + "MPI::Comm::Create_keyval"); } // We do not call MPI_Comm_create_keyval() here because we need to // pass in the cxx_extra_state to the backend keyval creation so // that when the keyval is destroyed (i.e., when its refcount goes // to 0), the cxx_extra_state is free()'ed. - - copy_fn.attr_communicator_copy_fn = - (MPI_Comm_internal_copy_attr_function*) - ompi_mpi_cxx_comm_copy_attr_intercept; - delete_fn.attr_communicator_delete_fn = - ompi_mpi_cxx_comm_delete_attr_intercept; - ret = ompi_attr_create_keyval(COMM_ATTR, copy_fn, delete_fn, - &keyval, cxx_extra_state, 0, - cxx_extra_state); + ret = ompi_cxx_attr_create_keyval_comm ((MPI_Comm_copy_attr_function *) ompi_mpi_cxx_comm_copy_attr_intercept, + ompi_mpi_cxx_comm_delete_attr_intercept, + &keyval, cxx_extra_state, 0, cxx_extra_state); if (OMPI_SUCCESS != ret) { - return OMPI_ERRHANDLER_INVOKE(MPI_COMM_WORLD, ret, - "MPI::Comm::Create_keyval"); + return ompi_cxx_errhandler_invoke_comm (MPI_COMM_WORLD, ret, + "MPI::Comm::Create_keyval"); } return MPI_SUCCESS; diff --git a/ompi/mpi/cxx/comm.h b/ompi/mpi/cxx/comm.h index 9e9305a5f4d..bd50a6b1e03 100644 --- a/ompi/mpi/cxx/comm.h +++ b/ompi/mpi/cxx/comm.h @@ -1,21 +1,21 @@ // -*- c++ -*- -// +// // Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana // University Research and Technology // Corporation. All rights reserved. // Copyright (c) 2004-2005 The University of Tennessee and The University // of Tennessee Research Foundation. All rights // reserved. -// Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +// Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, // University of Stuttgart. All rights reserved. // Copyright (c) 2004-2005 The Regents of the University of California. // All rights reserved. // Copyright (c) 2006-2009 Cisco Systems, Inc. All rights reserved. // Copyright (c) 2011 FUJITSU LIMITED. All rights reserved. // $COPYRIGHT$ -// +// // Additional copyrights may follow -// +// // $HEADER$ // @@ -31,7 +31,7 @@ class Comm_Null { inline Comm_Null() { } // copy inline Comm_Null(const Comm_Null& data) : pmpi_comm(data.pmpi_comm) { } - // inter-language operability + // inter-language operability inline Comm_Null(MPI_Comm data) : pmpi_comm(data) { } inline Comm_Null(const PMPI::Comm_Null& data) : pmpi_comm(data) { } @@ -40,7 +40,7 @@ class Comm_Null { virtual inline ~Comm_Null() { } inline Comm_Null& operator=(const Comm_Null& data) { - pmpi_comm = data.pmpi_comm; + pmpi_comm = data.pmpi_comm; return *this; } @@ -62,7 +62,7 @@ class Comm_Null { inline Comm_Null() : mpi_comm(MPI_COMM_NULL) { } // copy inline Comm_Null(const Comm_Null& data) : mpi_comm(data.mpi_comm) { } - // inter-language operability + // inter-language operability inline Comm_Null(MPI_Comm data) : mpi_comm(data) { } // destruction @@ -81,7 +81,7 @@ class Comm_Null { #endif - + protected: #if 0 /* OMPI_ENABLE_MPI_PROFILING */ @@ -89,7 +89,7 @@ class Comm_Null { #else MPI_Comm mpi_comm; #endif - + }; @@ -102,9 +102,9 @@ class Comm : public Comm_Null { __mpi_interface_deprecated__("MPI::Comm::Errhandler_fn was deprecated in MPI-2.2; use MPI::Comm::Errhandler_function instead"); typedef int Copy_attr_function(const Comm& oldcomm, int comm_keyval, void* extra_state, void* attribute_val_in, - void* attribute_val_out, + void* attribute_val_out, bool& flag); - typedef int Delete_attr_function(Comm& comm, int comm_keyval, + typedef int Delete_attr_function(Comm& comm, int comm_keyval, void* attribute_val, void* extra_state); #if !0 /* OMPI_ENABLE_MPI_PROFILING */ @@ -120,7 +120,7 @@ class Comm : public Comm_Null { Comm(const Comm_Null& data); #if 0 /* OMPI_ENABLE_MPI_PROFILING */ - Comm(const Comm& data) : + Comm(const Comm& data) : Comm_Null(data), pmpi_comm((const PMPI::Comm&) data) { } @@ -136,13 +136,13 @@ class Comm : public Comm_Null { // assignment Comm& operator=(const Comm& data) { this->Comm_Null::operator=(data); - pmpi_comm = data.pmpi_comm; + pmpi_comm = data.pmpi_comm; return *this; } Comm& operator=(const Comm_Null& data) { this->Comm_Null::operator=(data); MPI_Comm tmp = data; - pmpi_comm = tmp; + pmpi_comm = tmp; return *this; } // inter-language operability @@ -163,7 +163,7 @@ class Comm : public Comm_Null { // Point-to-Point // - virtual void Send(const void *buf, int count, + virtual void Send(const void *buf, int count, const Datatype & datatype, int dest, int tag) const; virtual void Recv(void *buf, int count, const Datatype & datatype, @@ -172,25 +172,25 @@ class Comm : public Comm_Null { virtual void Recv(void *buf, int count, const Datatype & datatype, int source, int tag) const; - + virtual void Bsend(const void *buf, int count, const Datatype & datatype, int dest, int tag) const; - - virtual void Ssend(const void *buf, int count, + + virtual void Ssend(const void *buf, int count, const Datatype & datatype, int dest, int tag) const ; virtual void Rsend(const void *buf, int count, const Datatype & datatype, int dest, int tag) const; - + virtual Request Isend(const void *buf, int count, const Datatype & datatype, int dest, int tag) const; - + virtual Request Ibsend(const void *buf, int count, const Datatype & datatype, int dest, int tag) const; - + virtual Request Issend(const void *buf, int count, const Datatype & datatype, int dest, int tag) const; - + virtual Request Irsend(const void *buf, int count, const Datatype & datatype, int dest, int tag) const; @@ -202,67 +202,67 @@ class Comm : public Comm_Null { virtual bool Iprobe(int source, int tag) const; virtual void Probe(int source, int tag, Status & status) const; - + virtual void Probe(int source, int tag) const; - + virtual Prequest Send_init(const void *buf, int count, - const Datatype & datatype, int dest, + const Datatype & datatype, int dest, int tag) const; - + virtual Prequest Bsend_init(const void *buf, int count, - const Datatype & datatype, int dest, + const Datatype & datatype, int dest, int tag) const; - + virtual Prequest Ssend_init(const void *buf, int count, - const Datatype & datatype, int dest, + const Datatype & datatype, int dest, int tag) const; - + virtual Prequest Rsend_init(const void *buf, int count, - const Datatype & datatype, int dest, + const Datatype & datatype, int dest, int tag) const; - + virtual Prequest Recv_init(void *buf, int count, - const Datatype & datatype, int source, + const Datatype & datatype, int source, int tag) const; - + virtual void Sendrecv(const void *sendbuf, int sendcount, - const Datatype & sendtype, int dest, int sendtag, - void *recvbuf, int recvcount, + const Datatype & sendtype, int dest, int sendtag, + void *recvbuf, int recvcount, const Datatype & recvtype, int source, int recvtag, Status & status) const; - + virtual void Sendrecv(const void *sendbuf, int sendcount, - const Datatype & sendtype, int dest, int sendtag, - void *recvbuf, int recvcount, + const Datatype & sendtype, int dest, int sendtag, + void *recvbuf, int recvcount, const Datatype & recvtype, int source, int recvtag) const; virtual void Sendrecv_replace(void *buf, int count, - const Datatype & datatype, int dest, + const Datatype & datatype, int dest, int sendtag, int source, int recvtag, Status & status) const; virtual void Sendrecv_replace(void *buf, int count, - const Datatype & datatype, int dest, + const Datatype & datatype, int dest, int sendtag, int source, int recvtag) const; - + // // Groups, Contexts, and Communicators // virtual Group Get_group() const; - + virtual int Get_size() const; virtual int Get_rank() const; - + static int Compare(const Comm & comm1, const Comm & comm2); - + virtual Comm& Clone() const = 0; virtual void Free(void); - + virtual bool Is_inter() const; @@ -278,105 +278,105 @@ class Comm : public Comm_Null { Barrier() const; virtual void - Bcast(void *buffer, int count, + Bcast(void *buffer, int count, const Datatype& datatype, int root) const; - + virtual void - Gather(const void *sendbuf, int sendcount, - const Datatype & sendtype, - void *recvbuf, int recvcount, + Gather(const void *sendbuf, int sendcount, + const Datatype & sendtype, + void *recvbuf, int recvcount, const Datatype & recvtype, int root) const; - + virtual void - Gatherv(const void *sendbuf, int sendcount, - const Datatype & sendtype, void *recvbuf, - const int recvcounts[], const int displs[], + Gatherv(const void *sendbuf, int sendcount, + const Datatype & sendtype, void *recvbuf, + const int recvcounts[], const int displs[], const Datatype & recvtype, int root) const; - + virtual void - Scatter(const void *sendbuf, int sendcount, - const Datatype & sendtype, - void *recvbuf, int recvcount, + Scatter(const void *sendbuf, int sendcount, + const Datatype & sendtype, + void *recvbuf, int recvcount, const Datatype & recvtype, int root) const; - + virtual void - Scatterv(const void *sendbuf, const int sendcounts[], + Scatterv(const void *sendbuf, const int sendcounts[], const int displs[], const Datatype & sendtype, - void *recvbuf, int recvcount, + void *recvbuf, int recvcount, const Datatype & recvtype, int root) const; - + virtual void - Allgather(const void *sendbuf, int sendcount, - const Datatype & sendtype, void *recvbuf, + Allgather(const void *sendbuf, int sendcount, + const Datatype & sendtype, void *recvbuf, int recvcount, const Datatype & recvtype) const; - + virtual void - Allgatherv(const void *sendbuf, int sendcount, - const Datatype & sendtype, void *recvbuf, + Allgatherv(const void *sendbuf, int sendcount, + const Datatype & sendtype, void *recvbuf, const int recvcounts[], const int displs[], const Datatype & recvtype) const; - + virtual void - Alltoall(const void *sendbuf, int sendcount, - const Datatype & sendtype, void *recvbuf, + Alltoall(const void *sendbuf, int sendcount, + const Datatype & sendtype, void *recvbuf, int recvcount, const Datatype & recvtype) const; - + virtual void - Alltoallv(const void *sendbuf, const int sendcounts[], - const int sdispls[], const Datatype & sendtype, - void *recvbuf, const int recvcounts[], + Alltoallv(const void *sendbuf, const int sendcounts[], + const int sdispls[], const Datatype & sendtype, + void *recvbuf, const int recvcounts[], const int rdispls[], const Datatype & recvtype) const; - + virtual void Alltoallw(const void *sendbuf, const int sendcounts[], const int sdispls[], const Datatype sendtypes[], void *recvbuf, const int recvcounts[], const int rdispls[], const Datatype recvtypes[]) const; - + virtual void - Reduce(const void *sendbuf, void *recvbuf, int count, - const Datatype & datatype, const Op & op, + Reduce(const void *sendbuf, void *recvbuf, int count, + const Datatype & datatype, const Op & op, int root) const; - - + + virtual void Allreduce(const void *sendbuf, void *recvbuf, int count, const Datatype & datatype, const Op & op) const; - + virtual void - Reduce_scatter(const void *sendbuf, void *recvbuf, - int recvcounts[], - const Datatype & datatype, + Reduce_scatter(const void *sendbuf, void *recvbuf, + int recvcounts[], + const Datatype & datatype, const Op & op) const; - // + // // Process Creation // virtual void Disconnect(); static Intercomm Get_parent(); - + static Intercomm Join(const int fd); // // External Interfaces // - + virtual void Get_name(char * comm_name, int& resultlen) const; virtual void Set_name(const char* comm_name); - + // // Process Topologies // - + virtual int Get_topology() const; - + // // Environmental Inquiry // - + virtual void Abort(int errorcode); // @@ -411,7 +411,7 @@ class Comm : public Comm_Null { Delete_attr_function* comm_delete_attr_fn, void* extra_state); -protected: +protected: static int do_create_keyval(MPI_Comm_copy_attr_function* c_copy_fn, MPI_Comm_delete_attr_function* c_delete_fn, Copy_attr_function* cxx_copy_fn, @@ -425,17 +425,17 @@ class Comm : public Comm_Null { virtual void Set_attr(int comm_keyval, const void* attribute_val) const; virtual bool Get_attr(int comm_keyval, void* attribute_val) const; - + virtual void Delete_attr(int comm_keyval); static int NULL_COPY_FN(const Comm& oldcomm, int comm_keyval, void* extra_state, void* attribute_val_in, void* attribute_val_out, bool& flag); - + static int DUP_FN(const Comm& oldcomm, int comm_keyval, void* extra_state, void* attribute_val_in, void* attribute_val_out, bool& flag); - + static int NULL_DELETE_FN(Comm& comm, int comm_keyval, void* attribute_val, void* extra_state); diff --git a/ompi/mpi/cxx/comm_inln.h b/ompi/mpi/cxx/comm_inln.h index 95116a0170f..f5ae1b8eb20 100644 --- a/ompi/mpi/cxx/comm_inln.h +++ b/ompi/mpi/cxx/comm_inln.h @@ -1,30 +1,30 @@ // -*- c++ -*- -// +// // Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana // University Research and Technology // Corporation. All rights reserved. // Copyright (c) 2004-2005 The University of Tennessee and The University // of Tennessee Research Foundation. All rights // reserved. -// Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +// Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, // University of Stuttgart. All rights reserved. // Copyright (c) 2004-2005 The Regents of the University of California. // All rights reserved. -// Copyright (c) 2007-2012 Cisco Systems, Inc. All rights reserved. +// Copyright (c) 2007-2016 Cisco Systems, Inc. All rights reserved. // Copyright (c) 2011 FUJITSU LIMITED. All rights reserved. // $COPYRIGHT$ -// +// // Additional copyrights may follow -// +// // $HEADER$ // - + // // Point-to-Point // inline void -MPI::Comm::Send(const void *buf, int count, +MPI::Comm::Send(const void *buf, int count, const MPI::Datatype & datatype, int dest, int tag) const { (void)MPI_Send(const_cast(buf), count, datatype, dest, tag, mpi_comm); @@ -41,7 +41,7 @@ inline void MPI::Comm::Recv(void *buf, int count, const MPI::Datatype & datatype, int source, int tag) const { - (void)MPI_Recv(buf, count, datatype, source, + (void)MPI_Recv(buf, count, datatype, source, tag, mpi_comm, MPI_STATUS_IGNORE); } @@ -49,15 +49,15 @@ inline void MPI::Comm::Bsend(const void *buf, int count, const MPI::Datatype & datatype, int dest, int tag) const { - (void)MPI_Bsend(const_cast(buf), count, datatype, + (void)MPI_Bsend(const_cast(buf), count, datatype, dest, tag, mpi_comm); } inline void -MPI::Comm::Ssend(const void *buf, int count, - const MPI::Datatype & datatype, int dest, int tag) const +MPI::Comm::Ssend(const void *buf, int count, + const MPI::Datatype & datatype, int dest, int tag) const { - (void)MPI_Ssend(const_cast(buf), count, datatype, dest, + (void)MPI_Ssend(const_cast(buf), count, datatype, dest, tag, mpi_comm); } @@ -65,7 +65,7 @@ inline void MPI::Comm::Rsend(const void *buf, int count, const MPI::Datatype & datatype, int dest, int tag) const { - (void)MPI_Rsend(const_cast(buf), count, datatype, + (void)MPI_Rsend(const_cast(buf), count, datatype, dest, tag, mpi_comm); } @@ -74,7 +74,7 @@ MPI::Comm::Isend(const void *buf, int count, const MPI::Datatype & datatype, int dest, int tag) const { MPI_Request request; - (void)MPI_Isend(const_cast(buf), count, datatype, + (void)MPI_Isend(const_cast(buf), count, datatype, dest, tag, mpi_comm, &request); return request; } @@ -83,8 +83,8 @@ inline MPI::Request MPI::Comm::Ibsend(const void *buf, int count, const MPI::Datatype & datatype, int dest, int tag) const { - MPI_Request request; - (void)MPI_Ibsend(const_cast(buf), count, datatype, + MPI_Request request; + (void)MPI_Ibsend(const_cast(buf), count, datatype, dest, tag, mpi_comm, &request); return request; } @@ -93,7 +93,7 @@ inline MPI::Request MPI::Comm::Issend(const void *buf, int count, const MPI::Datatype & datatype, int dest, int tag) const { - MPI_Request request; + MPI_Request request; (void)MPI_Issend(const_cast(buf), count, datatype, dest, tag, mpi_comm, &request); return request; @@ -104,7 +104,7 @@ MPI::Comm::Irsend(const void *buf, int count, const MPI::Datatype & datatype, int dest, int tag) const { MPI_Request request; - (void)MPI_Irsend(const_cast(buf), count, datatype, + (void)MPI_Irsend(const_cast(buf), count, datatype, dest, tag, mpi_comm, &request); return request; } @@ -114,7 +114,7 @@ MPI::Comm::Irecv(void *buf, int count, const MPI::Datatype & datatype, int source, int tag) const { MPI_Request request; - (void)MPI_Irecv(buf, count, datatype, source, + (void)MPI_Irecv(buf, count, datatype, source, tag, mpi_comm, &request); return request; } @@ -127,7 +127,7 @@ MPI::Comm::Iprobe(int source, int tag, MPI::Status & status) const (void)MPI_Iprobe(source, tag, mpi_comm, &t, &status.mpi_status); return OPAL_INT_TO_BOOL(t); } - + inline bool MPI::Comm::Iprobe(int source, int tag) const { @@ -151,9 +151,9 @@ MPI::Comm::Probe(int source, int tag) const inline MPI::Prequest MPI::Comm::Send_init(const void *buf, int count, const MPI::Datatype & datatype, int dest, int tag) const -{ +{ MPI_Request request; - (void)MPI_Send_init(const_cast(buf), count, datatype, + (void)MPI_Send_init(const_cast(buf), count, datatype, dest, tag, mpi_comm, &request); return request; } @@ -162,8 +162,8 @@ inline MPI::Prequest MPI::Comm::Bsend_init(const void *buf, int count, const MPI::Datatype & datatype, int dest, int tag) const { - MPI_Request request; - (void)MPI_Bsend_init(const_cast(buf), count, datatype, + MPI_Request request; + (void)MPI_Bsend_init(const_cast(buf), count, datatype, dest, tag, mpi_comm, &request); return request; } @@ -172,7 +172,7 @@ inline MPI::Prequest MPI::Comm::Ssend_init(const void *buf, int count, const MPI::Datatype & datatype, int dest, int tag) const { - MPI_Request request; + MPI_Request request; (void)MPI_Ssend_init(const_cast(buf), count, datatype, dest, tag, mpi_comm, &request); return request; @@ -182,7 +182,7 @@ inline MPI::Prequest MPI::Comm::Rsend_init(const void *buf, int count, const MPI::Datatype & datatype, int dest, int tag) const { - MPI_Request request; + MPI_Request request; (void)MPI_Rsend_init(const_cast(buf), count, datatype, dest, tag, mpi_comm, &request); return request; @@ -192,45 +192,45 @@ inline MPI::Prequest MPI::Comm::Recv_init(void *buf, int count, const MPI::Datatype & datatype, int source, int tag) const { - MPI_Request request; - (void)MPI_Recv_init(buf, count, datatype, source, + MPI_Request request; + (void)MPI_Recv_init(buf, count, datatype, source, tag, mpi_comm, &request); return request; } inline void MPI::Comm::Sendrecv(const void *sendbuf, int sendcount, - const MPI::Datatype & sendtype, int dest, int sendtag, - void *recvbuf, int recvcount, + const MPI::Datatype & sendtype, int dest, int sendtag, + void *recvbuf, int recvcount, const MPI::Datatype & recvtype, int source, int recvtag, MPI::Status & status) const { - (void)MPI_Sendrecv(const_cast(sendbuf), sendcount, + (void)MPI_Sendrecv(const_cast(sendbuf), sendcount, sendtype, - dest, sendtag, recvbuf, recvcount, - recvtype, + dest, sendtag, recvbuf, recvcount, + recvtype, source, recvtag, mpi_comm, &status.mpi_status); } inline void MPI::Comm::Sendrecv(const void *sendbuf, int sendcount, - const MPI::Datatype & sendtype, int dest, int sendtag, - void *recvbuf, int recvcount, + const MPI::Datatype & sendtype, int dest, int sendtag, + void *recvbuf, int recvcount, const MPI::Datatype & recvtype, int source, int recvtag) const { - (void)MPI_Sendrecv(const_cast(sendbuf), sendcount, + (void)MPI_Sendrecv(const_cast(sendbuf), sendcount, sendtype, - dest, sendtag, recvbuf, recvcount, - recvtype, + dest, sendtag, recvbuf, recvcount, + recvtype, source, recvtag, mpi_comm, MPI_STATUS_IGNORE); } inline void MPI::Comm::Sendrecv_replace(void *buf, int count, - const MPI::Datatype & datatype, int dest, + const MPI::Datatype & datatype, int dest, int sendtag, int source, - int recvtag, MPI::Status & status) const + int recvtag, MPI::Status & status) const { (void)MPI_Sendrecv_replace(buf, count, datatype, dest, sendtag, source, recvtag, mpi_comm, @@ -239,9 +239,9 @@ MPI::Comm::Sendrecv_replace(void *buf, int count, inline void MPI::Comm::Sendrecv_replace(void *buf, int count, - const MPI::Datatype & datatype, int dest, + const MPI::Datatype & datatype, int dest, int sendtag, int source, - int recvtag) const + int recvtag) const { (void)MPI_Sendrecv_replace(buf, count, datatype, dest, sendtag, source, recvtag, mpi_comm, @@ -253,29 +253,29 @@ MPI::Comm::Sendrecv_replace(void *buf, int count, // inline MPI::Group -MPI::Comm::Get_group() const +MPI::Comm::Get_group() const { MPI_Group group; (void)MPI_Comm_group(mpi_comm, &group); return group; } - + inline int -MPI::Comm::Get_size() const +MPI::Comm::Get_size() const { int size; (void)MPI_Comm_size (mpi_comm, &size); return size; } - + inline int -MPI::Comm::Get_rank() const +MPI::Comm::Get_rank() const { - int rank; - (void)MPI_Comm_rank (mpi_comm, &rank); - return rank; + int myrank; + (void)MPI_Comm_rank (mpi_comm, &myrank); + return myrank; } - + inline int MPI::Comm::Compare(const MPI::Comm & comm1, const MPI::Comm & comm2) @@ -286,7 +286,7 @@ MPI::Comm::Compare(const MPI::Comm & comm1, } inline void -MPI::Comm::Free(void) +MPI::Comm::Free(void) { (void)MPI_Comm_free(&mpi_comm); } @@ -311,16 +311,16 @@ MPI::Comm::Barrier() const } inline void -MPI::Comm::Bcast(void *buffer, int count, +MPI::Comm::Bcast(void *buffer, int count, const MPI::Datatype& datatype, int root) const -{ +{ (void)MPI_Bcast(buffer, count, datatype, root, mpi_comm); } inline void -MPI::Comm::Gather(const void *sendbuf, int sendcount, - const MPI::Datatype & sendtype, - void *recvbuf, int recvcount, +MPI::Comm::Gather(const void *sendbuf, int sendcount, + const MPI::Datatype & sendtype, + void *recvbuf, int recvcount, const MPI::Datatype & recvtype, int root) const { (void)MPI_Gather(const_cast(sendbuf), sendcount, sendtype, @@ -328,66 +328,66 @@ MPI::Comm::Gather(const void *sendbuf, int sendcount, } inline void -MPI::Comm::Gatherv(const void *sendbuf, int sendcount, - const MPI::Datatype & sendtype, void *recvbuf, - const int recvcounts[], const int displs[], +MPI::Comm::Gatherv(const void *sendbuf, int sendcount, + const MPI::Datatype & sendtype, void *recvbuf, + const int recvcounts[], const int displs[], const MPI::Datatype & recvtype, int root) const { (void)MPI_Gatherv(const_cast(sendbuf), sendcount, sendtype, recvbuf, const_cast(recvcounts), - const_cast(displs), + const_cast(displs), recvtype, root, mpi_comm); } inline void -MPI::Comm::Scatter(const void *sendbuf, int sendcount, - const MPI::Datatype & sendtype, - void *recvbuf, int recvcount, +MPI::Comm::Scatter(const void *sendbuf, int sendcount, + const MPI::Datatype & sendtype, + void *recvbuf, int recvcount, const MPI::Datatype & recvtype, int root) const -{ +{ (void)MPI_Scatter(const_cast(sendbuf), sendcount, sendtype, recvbuf, recvcount, recvtype, root, mpi_comm); } inline void -MPI::Comm::Scatterv(const void *sendbuf, const int sendcounts[], +MPI::Comm::Scatterv(const void *sendbuf, const int sendcounts[], const int displs[], const MPI::Datatype & sendtype, - void *recvbuf, int recvcount, + void *recvbuf, int recvcount, const MPI::Datatype & recvtype, int root) const { (void)MPI_Scatterv(const_cast(sendbuf), - const_cast(sendcounts), - const_cast(displs), sendtype, - recvbuf, recvcount, recvtype, + const_cast(sendcounts), + const_cast(displs), sendtype, + recvbuf, recvcount, recvtype, root, mpi_comm); } inline void -MPI::Comm::Allgather(const void *sendbuf, int sendcount, - const MPI::Datatype & sendtype, void *recvbuf, - int recvcount, const MPI::Datatype & recvtype) const +MPI::Comm::Allgather(const void *sendbuf, int sendcount, + const MPI::Datatype & sendtype, void *recvbuf, + int recvcount, const MPI::Datatype & recvtype) const { - (void)MPI_Allgather(const_cast(sendbuf), sendcount, + (void)MPI_Allgather(const_cast(sendbuf), sendcount, sendtype, recvbuf, recvcount, recvtype, mpi_comm); } inline void -MPI::Comm::Allgatherv(const void *sendbuf, int sendcount, - const MPI::Datatype & sendtype, void *recvbuf, +MPI::Comm::Allgatherv(const void *sendbuf, int sendcount, + const MPI::Datatype & sendtype, void *recvbuf, const int recvcounts[], const int displs[], const MPI::Datatype & recvtype) const { - (void)MPI_Allgatherv(const_cast(sendbuf), sendcount, - sendtype, recvbuf, + (void)MPI_Allgatherv(const_cast(sendbuf), sendcount, + sendtype, recvbuf, const_cast(recvcounts), - const_cast(displs), + const_cast(displs), recvtype, mpi_comm); } inline void -MPI::Comm::Alltoall(const void *sendbuf, int sendcount, - const MPI::Datatype & sendtype, void *recvbuf, +MPI::Comm::Alltoall(const void *sendbuf, int sendcount, + const MPI::Datatype & sendtype, void *recvbuf, int recvcount, const MPI::Datatype & recvtype) const { (void)MPI_Alltoall(const_cast(sendbuf), sendcount, @@ -396,17 +396,17 @@ MPI::Comm::Alltoall(const void *sendbuf, int sendcount, } inline void -MPI::Comm::Alltoallv(const void *sendbuf, const int sendcounts[], - const int sdispls[], const MPI::Datatype & sendtype, - void *recvbuf, const int recvcounts[], - const int rdispls[], - const MPI::Datatype & recvtype) const +MPI::Comm::Alltoallv(const void *sendbuf, const int sendcounts[], + const int sdispls[], const MPI::Datatype & sendtype, + void *recvbuf, const int recvcounts[], + const int rdispls[], + const MPI::Datatype & recvtype) const { - (void)MPI_Alltoallv(const_cast(sendbuf), - const_cast(sendcounts), - const_cast(sdispls), sendtype, recvbuf, - const_cast(recvcounts), - const_cast(rdispls), + (void)MPI_Alltoallv(const_cast(sendbuf), + const_cast(sendcounts), + const_cast(sdispls), sendtype, recvbuf, + const_cast(recvcounts), + const_cast(rdispls), recvtype,mpi_comm); } @@ -414,33 +414,33 @@ inline void MPI::Comm::Alltoallw(const void *sendbuf, const int sendcounts[], const int sdispls[], const MPI::Datatype sendtypes[], void *recvbuf, const int recvcounts[], - const int rdispls[], + const int rdispls[], const MPI::Datatype recvtypes[]) const { const int comm_size = Get_size(); MPI_Datatype *const data_type_tbl = new MPI_Datatype [2*comm_size]; - + // This must be done because MPI::Datatype arrays cannot be - // converted directly into MPI_Datatype arrays. + // converted directly into MPI_Datatype arrays. for (int i_rank=0; i_rank < comm_size; i_rank++) { data_type_tbl[i_rank] = sendtypes[i_rank]; data_type_tbl[i_rank + comm_size] = recvtypes[i_rank]; } - - (void)MPI_Alltoallw(const_cast(sendbuf), + + (void)MPI_Alltoallw(const_cast(sendbuf), const_cast(sendcounts), const_cast(sdispls), data_type_tbl, recvbuf, - const_cast(recvcounts), + const_cast(recvcounts), const_cast(rdispls), &data_type_tbl[comm_size], mpi_comm); - + delete[] data_type_tbl; } inline void -MPI::Comm::Reduce(const void *sendbuf, void *recvbuf, int count, - const MPI::Datatype & datatype, const MPI::Op& op, +MPI::Comm::Reduce(const void *sendbuf, void *recvbuf, int count, + const MPI::Datatype & datatype, const MPI::Op& op, int root) const { (void)MPI_Reduce(const_cast(sendbuf), recvbuf, count, datatype, op, root, mpi_comm); @@ -454,9 +454,9 @@ MPI::Comm::Allreduce(const void *sendbuf, void *recvbuf, int count, } inline void -MPI::Comm::Reduce_scatter(const void *sendbuf, void *recvbuf, - int recvcounts[], - const MPI::Datatype & datatype, +MPI::Comm::Reduce_scatter(const void *sendbuf, void *recvbuf, + int recvcounts[], + const MPI::Datatype & datatype, const MPI::Op& op) const { (void)MPI_Reduce_scatter(const_cast(sendbuf), recvbuf, recvcounts, @@ -484,7 +484,7 @@ MPI::Comm::Get_parent() inline MPI::Intercomm -MPI::Comm::Join(const int fd) +MPI::Comm::Join(const int fd) { MPI_Comm newcomm; (void) MPI_Comm_join((int) fd, &newcomm); @@ -502,29 +502,29 @@ MPI::Comm::Get_name(char* comm_name, int& resultlen) const } inline void -MPI::Comm::Set_name(const char* comm_name) +MPI::Comm::Set_name(const char* comm_name) { (void) MPI_Comm_set_name(mpi_comm, const_cast(comm_name)); } - + // //Process Topologies // inline int -MPI::Comm::Get_topology() const +MPI::Comm::Get_topology() const { int status; (void)MPI_Topo_test(mpi_comm, &status); return status; } - + // // Environmental Inquiry // inline void -MPI::Comm::Abort(int errorcode) +MPI::Comm::Abort(int errorcode) { (void)MPI_Abort(mpi_comm, errorcode); } @@ -549,7 +549,7 @@ MPI::Comm::Set_errhandler(const MPI::Errhandler& errhandler) (void)MPI_Comm_set_errhandler(mpi_comm, errhandler); } -inline void +inline void MPI::Comm::Call_errhandler(int errorcode) const { (void) MPI_Comm_call_errhandler(mpi_comm, errorcode); @@ -559,12 +559,12 @@ MPI::Comm::Call_errhandler(int errorcode) const // functions inline int MPI::Comm::Create_keyval(MPI::Comm::Copy_attr_function* comm_copy_attr_fn, - MPI::Comm::Delete_attr_function* comm_delete_attr_fn, + MPI::Comm::Delete_attr_function* comm_delete_attr_fn, void* extra_state) { // Back-end function does the heavy lifting int ret, keyval; - ret = do_create_keyval(NULL, NULL, + ret = do_create_keyval(NULL, NULL, comm_copy_attr_fn, comm_delete_attr_fn, extra_state, keyval); return (MPI_SUCCESS == ret) ? keyval : ret; @@ -574,7 +574,7 @@ MPI::Comm::Create_keyval(MPI::Comm::Copy_attr_function* comm_copy_attr_fn, // functions inline int MPI::Comm::Create_keyval(MPI_Comm_copy_attr_function* comm_copy_attr_fn, - MPI_Comm_delete_attr_function* comm_delete_attr_fn, + MPI_Comm_delete_attr_function* comm_delete_attr_fn, void* extra_state) { // Back-end function does the heavy lifting @@ -645,11 +645,11 @@ MPI::Comm::Delete_attr(int comm_keyval) // about them. Use comments instead of just deleting the param names // outright so that we know/remember what they are. inline int -MPI::Comm::NULL_COPY_FN(const MPI::Comm& /* oldcomm */, +MPI::Comm::NULL_COPY_FN(const MPI::Comm& /* oldcomm */, int /* comm_keyval */, - void* /* extra_state */, + void* /* extra_state */, void* /* attribute_val_in */, - void* /* attribute_val_out */, + void* /* attribute_val_out */, bool& flag) { flag = false; @@ -664,12 +664,12 @@ MPI::Comm::DUP_FN(const MPI::Comm& oldcomm, int comm_keyval, if (sizeof(bool) != sizeof(int)) { int f = (int)flag; int ret; - ret = MPI_COMM_DUP_FN(oldcomm, comm_keyval, extra_state, + ret = MPI_COMM_DUP_FN(oldcomm, comm_keyval, extra_state, attribute_val_in, attribute_val_out, &f); flag = OPAL_INT_TO_BOOL(f); return ret; } else { - return MPI_COMM_DUP_FN(oldcomm, comm_keyval, extra_state, + return MPI_COMM_DUP_FN(oldcomm, comm_keyval, extra_state, attribute_val_in, attribute_val_out, (int*)&flag); } @@ -679,8 +679,8 @@ MPI::Comm::DUP_FN(const MPI::Comm& oldcomm, int comm_keyval, // about them. Use comments instead of just deleting the param names // outright so that we know/remember what they are. inline int -MPI::Comm::NULL_DELETE_FN(MPI::Comm& /* comm */, - int /* comm_keyval */, +MPI::Comm::NULL_DELETE_FN(MPI::Comm& /* comm */, + int /* comm_keyval */, void* /* attribute_val */, void* /* extra_state */) { diff --git a/ompi/mpi/cxx/constants.h b/ompi/mpi/cxx/constants.h index 173b7fe3abe..eb4a991626b 100644 --- a/ompi/mpi/cxx/constants.h +++ b/ompi/mpi/cxx/constants.h @@ -1,21 +1,21 @@ // -*- c++ -*- -// +// // Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana // University Research and Technology // Corporation. All rights reserved. // Copyright (c) 2004-2005 The University of Tennessee and The University // of Tennessee Research Foundation. All rights // reserved. -// Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +// Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, // University of Stuttgart. All rights reserved. // Copyright (c) 2004-2005 The Regents of the University of California. // All rights reserved. // Copyright (c) 2008-2009 Cisco Systems, Inc. All rights reserved. // Copyright (c) 2011 FUJITSU LIMITED. All rights reserved. // $COPYRIGHT$ -// +// // Additional copyrights may follow -// +// // $HEADER$ // @@ -113,14 +113,14 @@ static const int MAX_OBJECT_NAME = MPI_MAX_OBJECT_NAME; // elementary datatypes (C / C++) OMPI_DECLSPEC extern const Datatype CHAR; -OMPI_DECLSPEC extern const Datatype SHORT; -OMPI_DECLSPEC extern const Datatype INT; +OMPI_DECLSPEC extern const Datatype SHORT; +OMPI_DECLSPEC extern const Datatype INT; OMPI_DECLSPEC extern const Datatype LONG; OMPI_DECLSPEC extern const Datatype SIGNED_CHAR; OMPI_DECLSPEC extern const Datatype UNSIGNED_CHAR; -OMPI_DECLSPEC extern const Datatype UNSIGNED_SHORT; -OMPI_DECLSPEC extern const Datatype UNSIGNED; -OMPI_DECLSPEC extern const Datatype UNSIGNED_LONG; +OMPI_DECLSPEC extern const Datatype UNSIGNED_SHORT; +OMPI_DECLSPEC extern const Datatype UNSIGNED; +OMPI_DECLSPEC extern const Datatype UNSIGNED_LONG; OMPI_DECLSPEC extern const Datatype FLOAT; OMPI_DECLSPEC extern const Datatype DOUBLE; OMPI_DECLSPEC extern const Datatype LONG_DOUBLE; @@ -245,7 +245,7 @@ OMPI_DECLSPEC extern Comm_Null COMM_NULL; OMPI_DECLSPEC extern const Datatype DATATYPE_NULL; OMPI_DECLSPEC extern Request REQUEST_NULL; OMPI_DECLSPEC extern const Op OP_NULL; -OMPI_DECLSPEC extern const Errhandler ERRHANDLER_NULL; +OMPI_DECLSPEC extern const Errhandler ERRHANDLER_NULL; #if OMPI_PROVIDE_MPI_FILE_INTERFACE OMPI_DECLSPEC extern const File FILE_NULL; #endif diff --git a/ompi/mpi/cxx/cxx_glue.c b/ompi/mpi/cxx/cxx_glue.c new file mode 100644 index 00000000000..76aa41be6c9 --- /dev/null +++ b/ompi/mpi/cxx/cxx_glue.c @@ -0,0 +1,158 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ +/* + * Copyright (c) 2016 Los Alamos National Security, LLC. All rights + * reserved. + * Copyright (c) 2016 Research Organization for Information Science + * and Technology (RIST). All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include "ompi_config.h" + +#include "ompi/communicator/communicator.h" +#include "ompi/attribute/attribute.h" +#include "ompi/errhandler/errhandler.h" +#include "ompi/file/file.h" +#include "opal/class/opal_list.h" +#include "cxx_glue.h" + +typedef struct ompi_cxx_intercept_file_extra_state_item_t { + opal_list_item_t super; + ompi_cxx_intercept_file_extra_state_t state; +} ompi_cxx_intercept_file_extra_state_item_t; + +OBJ_CLASS_DECLARATION(ompi_cxx_intercept_file_extra_state_item_t); +OBJ_CLASS_INSTANCE(ompi_cxx_intercept_file_extra_state_item_t, opal_list_item_t, + NULL, NULL); + +ompi_cxx_communicator_type_t ompi_cxx_comm_get_type (MPI_Comm comm) +{ + if (OMPI_COMM_IS_GRAPH(comm)) { + return OMPI_CXX_COMM_TYPE_GRAPH; + } else if (OMPI_COMM_IS_CART(comm)) { + return OMPI_CXX_COMM_TYPE_CART; + } else if (OMPI_COMM_IS_INTRA(comm)) { + return OMPI_CXX_COMM_TYPE_INTRACOMM; + } else if (OMPI_COMM_IS_INTER(comm)) { + return OMPI_CXX_COMM_TYPE_INTERCOMM; + } + + return OMPI_CXX_COMM_TYPE_UNKNOWN; +} + +int ompi_cxx_errhandler_invoke_comm (MPI_Comm comm, int ret, const char *message) +{ + return OMPI_ERRHANDLER_INVOKE (comm, ret, message); +} + +#if OMPI_PROVIDE_MPI_FILE_INTERFACE +int ompi_cxx_errhandler_invoke_file (MPI_File file, int ret, const char *message) +{ + return OMPI_ERRHANDLER_INVOKE (file, ret, message); +} +#endif + +int ompi_cxx_attr_create_keyval_comm (MPI_Comm_copy_attr_function *copy_fn, + MPI_Comm_delete_attr_function* delete_fn, int *keyval, void *extra_state, + int flags, void *bindings_extra_state) +{ + ompi_attribute_fn_ptr_union_t copy_fn_u = {.attr_communicator_copy_fn = + (MPI_Comm_internal_copy_attr_function *) copy_fn}; + ompi_attribute_fn_ptr_union_t delete_fn_u = {.attr_communicator_delete_fn = + (MPI_Comm_delete_attr_function *) delete_fn}; + + return ompi_attr_create_keyval (COMM_ATTR, copy_fn_u, delete_fn_u, keyval, extra_state, 0, bindings_extra_state); +} + +int ompi_cxx_attr_create_keyval_win (MPI_Win_copy_attr_function *copy_fn, + MPI_Win_delete_attr_function* delete_fn, int *keyval, void *extra_state, + int flags, void *bindings_extra_state) +{ + ompi_attribute_fn_ptr_union_t copy_fn_u = {.attr_win_copy_fn = + (MPI_Win_internal_copy_attr_function *) copy_fn}; + ompi_attribute_fn_ptr_union_t delete_fn_u = {.attr_win_delete_fn = + (MPI_Win_delete_attr_function *) delete_fn}; + + return ompi_attr_create_keyval (WIN_ATTR, copy_fn_u, delete_fn_u, keyval, extra_state, 0, NULL); +} + +int ompi_cxx_attr_create_keyval_type (MPI_Type_copy_attr_function *copy_fn, + MPI_Type_delete_attr_function* delete_fn, int *keyval, void *extra_state, + int flags, void *bindings_extra_state) +{ + ompi_attribute_fn_ptr_union_t copy_fn_u = {.attr_datatype_copy_fn = + (MPI_Type_internal_copy_attr_function *) copy_fn}; + ompi_attribute_fn_ptr_union_t delete_fn_u = {.attr_datatype_delete_fn = + (MPI_Type_delete_attr_function *) delete_fn}; + + return ompi_attr_create_keyval (TYPE_ATTR, copy_fn_u, delete_fn_u, keyval, extra_state, 0, NULL); +} + +MPI_Errhandler ompi_cxx_errhandler_create_comm (ompi_cxx_dummy_fn_t *fn) +{ + ompi_errhandler_t *errhandler; + errhandler = ompi_errhandler_create(OMPI_ERRHANDLER_TYPE_COMM, + (ompi_errhandler_generic_handler_fn_t *) fn, + OMPI_ERRHANDLER_LANG_CXX); + errhandler->eh_cxx_dispatch_fn = + (ompi_errhandler_cxx_dispatch_fn_t *) ompi_mpi_cxx_comm_errhandler_invoke; + return errhandler; +} + +MPI_Errhandler ompi_cxx_errhandler_create_win (ompi_cxx_dummy_fn_t *fn) +{ + ompi_errhandler_t *errhandler; + errhandler = ompi_errhandler_create(OMPI_ERRHANDLER_TYPE_WIN, + (ompi_errhandler_generic_handler_fn_t *) fn, + OMPI_ERRHANDLER_LANG_CXX); + errhandler->eh_cxx_dispatch_fn = + (ompi_errhandler_cxx_dispatch_fn_t *) ompi_mpi_cxx_win_errhandler_invoke; + return errhandler; +} + +#if OMPI_PROVIDE_MPI_FILE_INTERFACE +MPI_Errhandler ompi_cxx_errhandler_create_file (ompi_cxx_dummy_fn_t *fn) +{ + ompi_errhandler_t *errhandler; + errhandler = ompi_errhandler_create(OMPI_ERRHANDLER_TYPE_FILE, + (ompi_errhandler_generic_handler_fn_t *) fn, + OMPI_ERRHANDLER_LANG_CXX); + errhandler->eh_cxx_dispatch_fn = + (ompi_errhandler_cxx_dispatch_fn_t *) ompi_mpi_cxx_file_errhandler_invoke; + return errhandler; +} +#endif + +ompi_cxx_intercept_file_extra_state_t +*ompi_cxx_new_intercept_state (void *read_fn_cxx, void *write_fn_cxx, void *extent_fn_cxx, + void *extra_state_cxx) +{ + ompi_cxx_intercept_file_extra_state_item_t *intercept; + + intercept = OBJ_NEW(ompi_cxx_intercept_file_extra_state_item_t); + if (NULL == intercept) { + return NULL; + } + + opal_list_append(&ompi_registered_datareps, &intercept->super); + intercept->state.read_fn_cxx = read_fn_cxx; + intercept->state.write_fn_cxx = write_fn_cxx; + intercept->state.extent_fn_cxx = extent_fn_cxx; + intercept->state.extra_state_cxx = extra_state_cxx; + + return &intercept->state; +} + +void ompi_cxx_errhandler_set_callbacks (struct ompi_errhandler_t *errhandler, MPI_Comm_errhandler_function *eh_comm_fn, + ompi_file_errhandler_fn *eh_file_fn, MPI_Win_errhandler_function *eh_win_fn) +{ + errhandler->eh_comm_fn = eh_comm_fn; +#if OMPI_PROVIDE_MPI_FILE_INTERFACE + errhandler->eh_file_fn = eh_file_fn; +#endif + errhandler->eh_win_fn = eh_win_fn; +} diff --git a/ompi/mpi/cxx/cxx_glue.h b/ompi/mpi/cxx/cxx_glue.h new file mode 100644 index 00000000000..8cb906f9f79 --- /dev/null +++ b/ompi/mpi/cxx/cxx_glue.h @@ -0,0 +1,94 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ +/* + * Copyright (c) 2016 Los Alamos National Security, LLC. All rights + * reserved. + * Copyright (c) 2016 Research Organization for Information Science + * and Technology (RIST). All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#if !defined(OMPI_CXX_COMM_GLUE_H) +#define OMPI_CXX_COMM_GLUE_H + +#include "ompi_config.h" +#include "ompi/errhandler/errhandler.h" +#include + +#include "mpi.h" + +#if defined(c_plusplus) || defined(__cplusplus) +extern "C" { +#endif + +typedef struct ompi_cxx_intercept_file_extra_state_t { + void *read_fn_cxx; + void *write_fn_cxx; + void *extent_fn_cxx; + void *extra_state_cxx; +} ompi_cxx_intercept_file_extra_state_t; + +enum ompi_cxx_communicator_type_t { + OMPI_CXX_COMM_TYPE_UNKNOWN, + OMPI_CXX_COMM_TYPE_INTRACOMM, + OMPI_CXX_COMM_TYPE_INTERCOMM, + OMPI_CXX_COMM_TYPE_CART, + OMPI_CXX_COMM_TYPE_GRAPH, +}; +typedef enum ompi_cxx_communicator_type_t ompi_cxx_communicator_type_t; + +/* need to declare this error handler here */ +struct ompi_predefined_errhandler_t; +extern struct ompi_predefined_errhandler_t ompi_mpi_errors_throw_exceptions; + +/** + * C++ invocation function signature + */ +typedef void (ompi_cxx_dummy_fn_t) (void); + +ompi_cxx_communicator_type_t ompi_cxx_comm_get_type (MPI_Comm comm); + +int ompi_cxx_errhandler_invoke_comm (MPI_Comm comm, int ret, const char *message); + +int ompi_cxx_attr_create_keyval_comm (MPI_Comm_copy_attr_function *copy_fn, + MPI_Comm_delete_attr_function* delete_fn, int *keyval, void *extra_state, + int flags, void *bindings_extra_state); +int ompi_cxx_attr_create_keyval_win (MPI_Win_copy_attr_function *copy_fn, + MPI_Win_delete_attr_function* delete_fn, int *keyval, void *extra_state, + int flags, void *bindings_extra_state); +int ompi_cxx_attr_create_keyval_type (MPI_Type_copy_attr_function *copy_fn, + MPI_Type_delete_attr_function* delete_fn, int *keyval, void *extra_state, + int flags, void *bindings_extra_state); + +void ompi_mpi_cxx_comm_errhandler_invoke (MPI_Comm *mpi_comm, int *err, + const char *message, void *comm_fn); +void ompi_mpi_cxx_win_errhandler_invoke (MPI_Win *mpi_comm, int *err, + const char *message, void *win_fn); +#if OMPI_PROVIDE_MPI_FILE_INTERFACE +int ompi_cxx_errhandler_invoke_file (MPI_File file, int ret, const char *message); +void ompi_mpi_cxx_file_errhandler_invoke (MPI_File *mpi_comm, int *err, + const char *message, void *file_fn); +#endif + +MPI_Errhandler ompi_cxx_errhandler_create_comm (ompi_cxx_dummy_fn_t *fn); +MPI_Errhandler ompi_cxx_errhandler_create_win (ompi_cxx_dummy_fn_t *fn); +MPI_Errhandler ompi_cxx_errhandler_create_file (ompi_cxx_dummy_fn_t *fn); + +ompi_cxx_intercept_file_extra_state_t +*ompi_cxx_new_intercept_state (void *read_fn_cxx, void *write_fn_cxx, void *extent_fn_cxx, + void *extra_state_cxx); + +void ompi_cxx_errhandler_set_cxx_dispatch_fn (struct ompi_errhandler_t *errhandler, + ompi_errhandler_cxx_dispatch_fn_t *dispatch_fn); + +void ompi_cxx_errhandler_set_callbacks (struct ompi_errhandler_t *errhandler, MPI_Comm_errhandler_function *eh_comm_fn, + ompi_file_errhandler_fn *eh_file_fn, MPI_Win_errhandler_function *eh_win_fn); + +#if defined(c_plusplus) || defined(__cplusplus) +} +#endif + +#endif /* OMPI_CXX_COMM_GLUE_H */ diff --git a/ompi/mpi/cxx/datatype.cc b/ompi/mpi/cxx/datatype.cc index 269e94af6c7..343f0ea731f 100644 --- a/ompi/mpi/cxx/datatype.cc +++ b/ompi/mpi/cxx/datatype.cc @@ -1,25 +1,21 @@ // -*- c++ -*- -// -// Copyright (c) 2006 Los Alamos National Security, LLC. All rights -// reserved. +// +// Copyright (c) 2006-2016 Los Alamos National Security, LLC. All rights +// reserved. // Copyright (c) 2007-2008 Sun Microsystems, Inc. All rights reserved. // Copyright (c) 2007-2008 Cisco Systems, Inc. All rights reserved. // $COPYRIGHT$ -// +// // Additional copyrights may follow -// +// // $HEADER$ // // do not include ompi_config.h because it kills the free/malloc defines #include "mpi.h" #include "ompi/mpi/cxx/mpicxx.h" - - -#include "ompi/communicator/communicator.h" -#include "ompi/attribute/attribute.h" -#include "ompi/errhandler/errhandler.h" - +#include "ompi/constants.h" +#include "cxx_glue.h" void MPI::Datatype::Free() @@ -35,21 +31,16 @@ MPI::Datatype::do_create_keyval(MPI_Type_copy_attr_function* c_copy_fn, void* extra_state, int &keyval) { int ret, count = 0; - ompi_attribute_fn_ptr_union_t copy_fn; - ompi_attribute_fn_ptr_union_t delete_fn; keyval_intercept_data_t *cxx_extra_state; // If both the callbacks are C, then do the simple thing -- no // need for all the C++ machinery. if (NULL != c_copy_fn && NULL != c_delete_fn) { - copy_fn.attr_datatype_copy_fn = - (MPI_Type_internal_copy_attr_function*) c_copy_fn; - delete_fn.attr_datatype_delete_fn = c_delete_fn; - ret = ompi_attr_create_keyval(COMM_ATTR, copy_fn, delete_fn, - &keyval, extra_state, 0, NULL); + ret = ompi_cxx_attr_create_keyval_type (c_copy_fn, c_delete_fn, &keyval, + extra_state, 0, NULL); if (MPI_SUCCESS != ret) { - return OMPI_ERRHANDLER_INVOKE(MPI_COMM_WORLD, ret, - "MPI::Datatype::Create_keyval"); + return ompi_cxx_errhandler_invoke_comm (MPI_COMM_WORLD, ret, + "MPI::Datatype::Create_keyval"); } } @@ -60,12 +51,12 @@ MPI::Datatype::do_create_keyval(MPI_Type_copy_attr_function* c_copy_fn, // extra_state for the delete callback), we have to use the C++ // callbacks for both (and therefore translate the C++-special // extra_state into the user's original extra_state). - cxx_extra_state = (keyval_intercept_data_t*) - malloc(sizeof(keyval_intercept_data_t)); + cxx_extra_state = (keyval_intercept_data_t *) malloc(sizeof(*cxx_extra_state)); if (NULL == cxx_extra_state) { - return OMPI_ERRHANDLER_INVOKE(MPI_COMM_WORLD, MPI_ERR_NO_MEM, - "MPI::Datatype::Create_keyval"); + return ompi_cxx_errhandler_invoke_comm (MPI_COMM_WORLD, MPI_ERR_NO_MEM, + "MPI::Datatype::Create_keyval"); } + cxx_extra_state->c_copy_fn = c_copy_fn; cxx_extra_state->cxx_copy_fn = cxx_copy_fn; cxx_extra_state->c_delete_fn = c_delete_fn; @@ -87,8 +78,8 @@ MPI::Datatype::do_create_keyval(MPI_Type_copy_attr_function* c_copy_fn, } if (2 != count) { free(cxx_extra_state); - return OMPI_ERRHANDLER_INVOKE(MPI_COMM_WORLD, MPI_ERR_ARG, - "MPI::Datatype::Create_keyval"); + return ompi_cxx_errhandler_invoke_comm (MPI_COMM_WORLD, MPI_ERR_ARG, + "MPI::Datatype::Create_keyval"); } // We do not call MPI_Datatype_create_keyval() here because we need to @@ -100,18 +91,12 @@ MPI::Datatype::do_create_keyval(MPI_Type_copy_attr_function* c_copy_fn, // MPI_Comm_create_keyval(). Hence, we do all the work here (and // ensure to set the destructor atomicly when the keyval is // created). - - copy_fn.attr_datatype_copy_fn = - (MPI_Type_internal_copy_attr_function*) - ompi_mpi_cxx_type_copy_attr_intercept; - delete_fn.attr_datatype_delete_fn = - ompi_mpi_cxx_type_delete_attr_intercept; - ret = ompi_attr_create_keyval(TYPE_ATTR, copy_fn, delete_fn, - &keyval, cxx_extra_state, 0, - cxx_extra_state); + ret = ompi_cxx_attr_create_keyval_type ((MPI_Type_copy_attr_function *) ompi_mpi_cxx_type_copy_attr_intercept, + ompi_mpi_cxx_type_delete_attr_intercept, &keyval, + cxx_extra_state, 0, NULL); if (OMPI_SUCCESS != ret) { - return OMPI_ERRHANDLER_INVOKE(MPI_COMM_WORLD, ret, - "MPI::Datatype::Create_keyval"); + return ompi_cxx_errhandler_invoke_comm (MPI_COMM_WORLD, ret, + "MPI::Datatype::Create_keyval"); } return MPI_SUCCESS; diff --git a/ompi/mpi/cxx/datatype.h b/ompi/mpi/cxx/datatype.h index 061173bca80..b013a1d110e 100644 --- a/ompi/mpi/cxx/datatype.h +++ b/ompi/mpi/cxx/datatype.h @@ -6,7 +6,7 @@ // Copyright (c) 2004-2005 The University of Tennessee and The University // of Tennessee Research Foundation. All rights // reserved. -// Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +// Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, // University of Stuttgart. All rights reserved. // Copyright (c) 2004-2005 The Regents of the University of California. // All rights reserved. @@ -14,9 +14,9 @@ // Copyright (c) 2006-2007 Cisco Systems, Inc. All rights reserved. // Copyright (c) 2011 FUJITSU LIMITED. All rights reserved. // $COPYRIGHT$ -// +// // Additional copyrights may follow -// +// // $HEADER$ // @@ -39,7 +39,7 @@ class Datatype { inline Datatype(const Datatype& dt) : pmpi_datatype(dt.pmpi_datatype) { } inline Datatype(const PMPI::Datatype& dt) : pmpi_datatype(dt) { } - + inline virtual ~Datatype() {} inline Datatype& operator=(const Datatype& dt) { @@ -53,7 +53,7 @@ class Datatype { { return (bool) !(*this == a); } // inter-language operability - inline Datatype& operator= (const MPI_Datatype &i) + inline Datatype& operator= (const MPI_Datatype &i) { pmpi_datatype = i; return *this; } inline operator MPI_Datatype() const { return (MPI_Datatype)pmpi_datatype; } @@ -84,7 +84,7 @@ class Datatype { { return (bool) !(*this == a); } // inter-language operability - inline Datatype& operator= (const MPI_Datatype &i) + inline Datatype& operator= (const MPI_Datatype &i) { mpi_datatype = i; return *this; } inline operator MPI_Datatype () const { return mpi_datatype; } @@ -92,7 +92,7 @@ class Datatype { #endif - // + // // User Defined Functions // typedef int Copy_attr_function(const Datatype& oldtype, @@ -101,27 +101,27 @@ class Datatype { const void* attribute_val_in, void* attribute_val_out, bool& flag); - - typedef int Delete_attr_function(Datatype& type, int type_keyval, + + typedef int Delete_attr_function(Datatype& type, int type_keyval, void* attribute_val, void* extra_state); - + // // Point-to-Point Communication // - + virtual Datatype Create_contiguous(int count) const; - + virtual Datatype Create_vector(int count, int blocklength, int stride) const; - + virtual Datatype Create_indexed(int count, - const int array_of_blocklengths[], + const int array_of_blocklengths[], const int array_of_displacements[]) const; static Datatype Create_struct(int count, const int array_of_blocklengths[], const Aint array_of_displacements[], const Datatype array_if_types[]); - + virtual Datatype Create_hindexed(int count, const int array_of_blocklengths[], const Aint array_of_displacements[]) const; @@ -138,10 +138,10 @@ class Datatype { virtual void Get_true_extent(Aint&, Aint&) const; virtual void Commit(); - + virtual void Free(); - virtual void Pack(const void* inbuf, int incount, void *outbuf, + virtual void Pack(const void* inbuf, int incount, void *outbuf, int outsize, int& position, const Comm &comm) const; virtual void Unpack(const void* inbuf, int insize, void *outbuf, int outcount, @@ -161,9 +161,9 @@ class Datatype { // Miscellany // virtual Datatype Create_subarray(int ndims, const int array_of_sizes[], - const int array_of_subsizes[], - const int array_of_starts[], int order) - const; + const int array_of_subsizes[], + const int array_of_starts[], int order) + const; virtual Datatype Create_darray(int size, int rank, int ndims, const int array_of_gsizes[], const int array_of_distribs[], diff --git a/ompi/mpi/cxx/datatype_inln.h b/ompi/mpi/cxx/datatype_inln.h index 610838daaf3..af02e56d1b5 100644 --- a/ompi/mpi/cxx/datatype_inln.h +++ b/ompi/mpi/cxx/datatype_inln.h @@ -1,12 +1,12 @@ // -*- c++ -*- -// +// // Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana // University Research and Technology // Corporation. All rights reserved. // Copyright (c) 2004-2005 The University of Tennessee and The University // of Tennessee Research Foundation. All rights // reserved. -// Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +// Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, // University of Stuttgart. All rights reserved. // Copyright (c) 2004-2005 The Regents of the University of California. // All rights reserved. @@ -14,9 +14,9 @@ // Copyright (c) 2011 FUJITSU LIMITED. All rights reserved. // $COPYRIGHT$ -// +// // Additional copyrights may follow -// +// // $HEADER$ // @@ -44,11 +44,11 @@ MPI::Datatype::Create_vector(int count, int blocklength, inline MPI::Datatype MPI::Datatype::Create_indexed(int count, - const int array_of_blocklengths[], + const int array_of_blocklengths[], const int array_of_displacements[]) const { MPI_Datatype newtype; - (void)MPI_Type_indexed(count, const_cast(array_of_blocklengths), + (void)MPI_Type_indexed(count, const_cast(array_of_blocklengths), const_cast(array_of_displacements), mpi_datatype, &newtype); return newtype; } @@ -65,7 +65,7 @@ MPI::Datatype::Create_struct(int count, const int array_of_blocklengths[], type_array[i] = array_of_types[i]; (void)MPI_Type_create_struct(count, const_cast(array_of_blocklengths), - const_cast(array_of_displacements), + const_cast(array_of_displacements), type_array, &newtype); delete[] type_array; return newtype; @@ -98,7 +98,7 @@ MPI::Datatype::Create_indexed_block(int count, int blocklength, const int array_of_displacements[]) const { MPI_Datatype newtype; - (void)MPI_Type_create_indexed_block(count, blocklength, const_cast(array_of_displacements), + (void)MPI_Type_create_indexed_block(count, blocklength, const_cast(array_of_displacements), mpi_datatype, &newtype); return newtype; } @@ -113,7 +113,7 @@ MPI::Datatype::Create_resized(const MPI::Aint lb, const MPI::Aint extent) const } inline int -MPI::Datatype::Get_size() const +MPI::Datatype::Get_size() const { int size; (void)MPI_Type_size(mpi_datatype, &size); @@ -123,7 +123,7 @@ MPI::Datatype::Get_size() const inline void MPI::Datatype::Get_extent(MPI::Aint& lb, MPI::Aint& extent) const { - (void)MPI_Type_get_extent(mpi_datatype, &lb, &extent); + (void)MPI_Type_get_extent(mpi_datatype, &lb, &extent); } inline void @@ -133,7 +133,7 @@ MPI::Datatype::Get_true_extent(MPI::Aint& lb, MPI::Aint& extent) const } inline void -MPI::Datatype::Commit() +MPI::Datatype::Commit() { (void)MPI_Type_commit(&mpi_datatype); } @@ -150,14 +150,14 @@ MPI::Datatype::Pack(const void* inbuf, int incount, inline void MPI::Datatype::Unpack(const void* inbuf, int insize, void *outbuf, int outcount, int& position, - const MPI::Comm& comm) const + const MPI::Comm& comm) const { (void)MPI_Unpack(const_cast(inbuf), insize, &position, outbuf, outcount, mpi_datatype, comm); } inline int -MPI::Datatype::Pack_size(int incount, const MPI::Comm& comm) const +MPI::Datatype::Pack_size(int incount, const MPI::Comm& comm) const { int size; (void)MPI_Pack_size(incount, mpi_datatype, comm, &size); @@ -199,7 +199,7 @@ MPI::Datatype::Create_subarray(int ndims, const int array_of_sizes[], const { MPI_Datatype type; - (void) MPI_Type_create_subarray(ndims, const_cast(array_of_sizes), + (void) MPI_Type_create_subarray(ndims, const_cast(array_of_sizes), const_cast(array_of_subsizes), const_cast(array_of_starts), order, mpi_datatype, &type); @@ -272,12 +272,12 @@ MPI::Datatype::Dup() const // functions inline int MPI::Datatype::Create_keyval(MPI::Datatype::Copy_attr_function* type_copy_attr_fn, - MPI::Datatype::Delete_attr_function* type_delete_attr_fn, + MPI::Datatype::Delete_attr_function* type_delete_attr_fn, void* extra_state) { // Back-end function does the heavy lifting int ret, keyval; - ret = do_create_keyval(NULL, NULL, + ret = do_create_keyval(NULL, NULL, type_copy_attr_fn, type_delete_attr_fn, extra_state, keyval); return (MPI_SUCCESS == ret) ? keyval : ret; @@ -287,13 +287,13 @@ MPI::Datatype::Create_keyval(MPI::Datatype::Copy_attr_function* type_copy_attr_f // functions inline int MPI::Datatype::Create_keyval(MPI_Type_copy_attr_function* type_copy_attr_fn, - MPI_Type_delete_attr_function* type_delete_attr_fn, + MPI_Type_delete_attr_function* type_delete_attr_fn, void* extra_state) { // Back-end function does the heavy lifting int ret, keyval; ret = do_create_keyval(type_copy_attr_fn, type_delete_attr_fn, - NULL, NULL, + NULL, NULL, extra_state, keyval); return (MPI_SUCCESS == ret) ? keyval : ret; } @@ -308,7 +308,7 @@ MPI::Datatype::Create_keyval(MPI::Datatype::Copy_attr_function* type_copy_attr_f // Back-end function does the heavy lifting int ret, keyval; ret = do_create_keyval(NULL, type_delete_attr_fn, - type_copy_attr_fn, NULL, + type_copy_attr_fn, NULL, extra_state, keyval); return (MPI_SUCCESS == ret) ? keyval : ret; } @@ -358,11 +358,11 @@ MPI::Datatype::Get_contents(int max_integers, int max_addresses, { int i; MPI_Datatype *c_datatypes = new MPI_Datatype[max_datatypes]; - + (void) MPI_Type_get_contents(mpi_datatype, max_integers, max_addresses, - max_datatypes, - const_cast(array_of_integers), - const_cast(array_of_addresses), + max_datatypes, + const_cast(array_of_integers), + const_cast(array_of_addresses), c_datatypes); // Convert the C MPI_Datatypes to the user's OUT MPI::Datatype // array parameter diff --git a/ompi/mpi/cxx/errhandler.h b/ompi/mpi/cxx/errhandler.h index 7d9d8738e23..2253087c0e3 100644 --- a/ompi/mpi/cxx/errhandler.h +++ b/ompi/mpi/cxx/errhandler.h @@ -6,15 +6,15 @@ // Copyright (c) 2004-2005 The University of Tennessee and The University // of Tennessee Research Foundation. All rights // reserved. -// Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +// Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, // University of Stuttgart. All rights reserved. // Copyright (c) 2004-2005 The Regents of the University of California. // All rights reserved. // Copyright (c) 2006-2008 Cisco Systems, Inc. All rights reserved. // $COPYRIGHT$ -// +// // Additional copyrights may follow -// +// // $HEADER$ // @@ -40,22 +40,22 @@ class Errhandler { // comparison inline bool operator==(const Errhandler &a) { return (bool)(mpi_errhandler == a.mpi_errhandler); } - + inline bool operator!=(const Errhandler &a) { return (bool)!(*this == a); } // inter-language operability inline Errhandler& operator= (const MPI_Errhandler &i) { mpi_errhandler = i; return *this; } - + inline operator MPI_Errhandler() const { return mpi_errhandler; } - + // inline operator MPI_Errhandler*() { return &mpi_errhandler; } // // Errhandler access functions // - + virtual void Free(); private: diff --git a/ompi/mpi/cxx/errhandler_inln.h b/ompi/mpi/cxx/errhandler_inln.h index ebbe0074323..46d2430fb36 100644 --- a/ompi/mpi/cxx/errhandler_inln.h +++ b/ompi/mpi/cxx/errhandler_inln.h @@ -1,19 +1,19 @@ // -*- c++ -*- -// +// // Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana // University Research and Technology // Corporation. All rights reserved. // Copyright (c) 2004-2005 The University of Tennessee and The University // of Tennessee Research Foundation. All rights // reserved. -// Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +// Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, // University of Stuttgart. All rights reserved. // Copyright (c) 2004-2005 The Regents of the University of California. // All rights reserved. // $COPYRIGHT$ -// +// // Additional copyrights may follow -// +// // $HEADER$ // diff --git a/ompi/mpi/cxx/exception.h b/ompi/mpi/cxx/exception.h index 835c298e00e..df15a52c97d 100644 --- a/ompi/mpi/cxx/exception.h +++ b/ompi/mpi/cxx/exception.h @@ -6,14 +6,14 @@ // Copyright (c) 2004-2005 The University of Tennessee and The University // of Tennessee Research Foundation. All rights // reserved. -// Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +// Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, // University of Stuttgart. All rights reserved. // Copyright (c) 2004-2005 The Regents of the University of California. // All rights reserved. // $COPYRIGHT$ -// +// // Additional copyrights may follow -// +// // $HEADER$ // @@ -25,11 +25,11 @@ class Exception { inline Exception(int ec) : pmpi_exception(ec) { } int Get_error_code() const; - + int Get_error_class() const; - + const char* Get_error_string() const; - + #else inline Exception(int ec) : error_code(ec), error_string(0), error_class(-1) { @@ -58,11 +58,11 @@ class Exception { inline int Get_error_code() const { return error_code; } inline int Get_error_class() const { return error_class; } - + inline const char* Get_error_string() const { return error_string; } #endif - + protected: #if 0 /* OMPI_ENABLE_MPI_PROFILING */ PMPI::Exception pmpi_exception; diff --git a/ompi/mpi/cxx/file.cc b/ompi/mpi/cxx/file.cc index e04aa45ebc5..fd4ebb05129 100644 --- a/ompi/mpi/cxx/file.cc +++ b/ompi/mpi/cxx/file.cc @@ -1,12 +1,12 @@ // -*- c++ -*- -// -// Copyright (c) 2006 Los Alamos National Security, LLC. All rights -// reserved. +// +// Copyright (c) 2006-2016 Los Alamos National Security, LLC. All rights +// reserved. // Copyright (c) 2007-2009 Cisco Systems, Inc. All rights reserved. // $COPYRIGHT$ -// +// // Additional copyrights may follow -// +// // $HEADER$ // @@ -16,29 +16,19 @@ #include "ompi/constants.h" #include "ompi/mpi/cxx/mpicxx.h" -#include "opal/class/opal_list.h" -#include "ompi/file/file.h" -#include "ompi/errhandler/errhandler.h" -#include "ompi/runtime/mpiruntime.h" +#include "cxx_glue.h" -void -MPI::File::Close() +void +MPI::File::Close() { (void) MPI_File_close(&mpi_file); } - -MPI::Errhandler + +MPI::Errhandler MPI::File::Create_errhandler(MPI::File::Errhandler_function* function) { - MPI_Errhandler c_errhandler = - ompi_errhandler_create(OMPI_ERRHANDLER_TYPE_FILE, - (ompi_errhandler_generic_handler_fn_t*) function, - OMPI_ERRHANDLER_LANG_CXX); - c_errhandler->eh_cxx_dispatch_fn = - (ompi_errhandler_cxx_dispatch_fn_t*) - ompi_mpi_cxx_file_errhandler_invoke; - return c_errhandler; + return ompi_cxx_errhandler_create_file ((ompi_cxx_dummy_fn_t *) function); } @@ -54,174 +44,142 @@ MPI::File::Create_errhandler(MPI::File::Errhandler_function* function) // Data structure passed to the intercepts (see below). It is an OPAL // list_item_t so that we can clean this memory up during // MPI_FINALIZE. -typedef struct intercept_extra_state { - opal_list_item_t base; - MPI::Datarep_conversion_function *read_fn_cxx; - MPI::Datarep_conversion_function *write_fn_cxx; - MPI::Datarep_extent_function *extent_fn_cxx; - void *extra_state_cxx; -} intercept_extra_state_t; - -static void intercept_extra_state_constructor(intercept_extra_state_t *obj) -{ - obj->read_fn_cxx = NULL; - obj->write_fn_cxx = NULL; - obj->extent_fn_cxx = NULL; - obj->extra_state_cxx = NULL; -} - -OBJ_CLASS_DECLARATION(intercept_extra_state_t); -OBJ_CLASS_INSTANCE(intercept_extra_state_t, - opal_list_item_t, - intercept_extra_state_constructor, NULL); // Intercept function for read conversions static int read_intercept_fn(void *userbuf, MPI_Datatype type_c, int count_c, - void *filebuf, MPI_Offset position_c, + void *filebuf, MPI_Offset position_c, void *extra_state) { MPI::Datatype type_cxx(type_c); MPI::Offset position_cxx(position_c); - intercept_extra_state_t *intercept_data = - (intercept_extra_state_t*) extra_state; + ompi_cxx_intercept_file_extra_state_t *intercept_data = + (ompi_cxx_intercept_file_extra_state_t*) extra_state; + MPI::Datarep_conversion_function *read_fn_cxx = + (MPI::Datarep_conversion_function *) intercept_data->read_fn_cxx; - intercept_data->read_fn_cxx(userbuf, type_cxx, count_c, filebuf, - position_cxx, intercept_data->extra_state_cxx); + read_fn_cxx (userbuf, type_cxx, count_c, filebuf, position_cxx, + intercept_data->extra_state_cxx); return MPI_SUCCESS; } // Intercept function for write conversions static int write_intercept_fn(void *userbuf, MPI_Datatype type_c, int count_c, - void *filebuf, MPI_Offset position_c, + void *filebuf, MPI_Offset position_c, void *extra_state) { MPI::Datatype type_cxx(type_c); MPI::Offset position_cxx(position_c); - intercept_extra_state_t *intercept_data = - (intercept_extra_state_t*) extra_state; + ompi_cxx_intercept_file_extra_state_t *intercept_data = + (ompi_cxx_intercept_file_extra_state_t*) extra_state; + MPI::Datarep_conversion_function *write_fn_cxx = + (MPI::Datarep_conversion_function *) intercept_data->write_fn_cxx; - intercept_data->write_fn_cxx(userbuf, type_cxx, count_c, filebuf, - position_cxx, intercept_data->extra_state_cxx); + write_fn_cxx (userbuf, type_cxx, count_c, filebuf, position_cxx, + intercept_data->extra_state_cxx); return MPI_SUCCESS; } // Intercept function for extent calculations -static int extent_intercept_fn(MPI_Datatype type_c, MPI_Aint *file_extent_c, +static int extent_intercept_fn(MPI_Datatype type_c, MPI_Aint *file_extent_c, void *extra_state) { MPI::Datatype type_cxx(type_c); MPI::Aint file_extent_cxx(*file_extent_c); - intercept_extra_state_t *intercept_data = - (intercept_extra_state_t*) extra_state; + ompi_cxx_intercept_file_extra_state_t *intercept_data = + (ompi_cxx_intercept_file_extra_state_t*) extra_state; + MPI::Datarep_extent_function *extent_fn_cxx = + (MPI::Datarep_extent_function *) intercept_data->extent_fn_cxx; - intercept_data->extent_fn_cxx(type_cxx, file_extent_cxx, - intercept_data->extra_state_cxx); + extent_fn_cxx (type_cxx, file_extent_cxx, intercept_data->extra_state_cxx); *file_extent_c = file_extent_cxx; return MPI_SUCCESS; } // C++ bindings for MPI::Register_datarep -void -MPI::Register_datarep(const char* datarep, - Datarep_conversion_function* read_fn_cxx, - Datarep_conversion_function* write_fn_cxx, - Datarep_extent_function* extent_fn_cxx, +void +MPI::Register_datarep(const char* datarep, + Datarep_conversion_function* read_fn_cxx, + Datarep_conversion_function* write_fn_cxx, + Datarep_extent_function* extent_fn_cxx, void* extra_state_cxx) { - intercept_extra_state_t *intercept; + ompi_cxx_intercept_file_extra_state_t *intercept; - intercept = OBJ_NEW(intercept_extra_state_t); + intercept = ompi_cxx_new_intercept_state ((void *) read_fn_cxx, (void *) write_fn_cxx, + (void *) extent_fn_cxx, extra_state_cxx); if (NULL == intercept) { - OMPI_ERRHANDLER_INVOKE(MPI_FILE_NULL, OMPI_ERR_OUT_OF_RESOURCE, - "MPI::Register_datarep"); + ompi_cxx_errhandler_invoke_file (MPI_FILE_NULL, OMPI_ERR_OUT_OF_RESOURCE, + "MPI::Register_datarep"); return; } - opal_list_append(&ompi_registered_datareps, &(intercept->base)); - intercept->read_fn_cxx = read_fn_cxx; - intercept->write_fn_cxx = write_fn_cxx; - intercept->extent_fn_cxx = extent_fn_cxx; - intercept->extra_state_cxx = extra_state_cxx; - - (void)MPI_Register_datarep(const_cast(datarep), read_intercept_fn, - write_intercept_fn, - extent_intercept_fn, intercept); + + (void)MPI_Register_datarep (const_cast(datarep), read_intercept_fn, + write_intercept_fn, extent_intercept_fn, intercept); } -void -MPI::Register_datarep(const char* datarep, +void +MPI::Register_datarep(const char* datarep, MPI_Datarep_conversion_function* read_fn_c, - Datarep_conversion_function* write_fn_cxx, - Datarep_extent_function* extent_fn_cxx, + Datarep_conversion_function* write_fn_cxx, + Datarep_extent_function* extent_fn_cxx, void* extra_state_cxx) { - intercept_extra_state_t *intercept; + ompi_cxx_intercept_file_extra_state_t *intercept; - intercept = OBJ_NEW(intercept_extra_state_t); + intercept = ompi_cxx_new_intercept_state (NULL, (void *) write_fn_cxx, (void *) extent_fn_cxx, + extra_state_cxx); if (NULL == intercept) { - OMPI_ERRHANDLER_INVOKE(MPI_FILE_NULL, OMPI_ERR_OUT_OF_RESOURCE, - "MPI::Register_datarep"); + ompi_cxx_errhandler_invoke_file (MPI_FILE_NULL, OMPI_ERR_OUT_OF_RESOURCE, + "MPI::Register_datarep"); return; } - opal_list_append(&ompi_registered_datareps, &(intercept->base)); - intercept->write_fn_cxx = write_fn_cxx; - intercept->extent_fn_cxx = extent_fn_cxx; - intercept->extra_state_cxx = extra_state_cxx; - - (void)MPI_Register_datarep(const_cast(datarep), read_fn_c, - write_intercept_fn, - extent_intercept_fn, intercept); + + (void)MPI_Register_datarep (const_cast(datarep), read_fn_c, write_intercept_fn, + extent_intercept_fn, intercept); } -void -MPI::Register_datarep(const char* datarep, - Datarep_conversion_function* read_fn_cxx, - MPI_Datarep_conversion_function* write_fn_c, - Datarep_extent_function* extent_fn_cxx, +void +MPI::Register_datarep(const char* datarep, + Datarep_conversion_function* read_fn_cxx, + MPI_Datarep_conversion_function* write_fn_c, + Datarep_extent_function* extent_fn_cxx, void* extra_state_cxx) { - intercept_extra_state_t *intercept; + ompi_cxx_intercept_file_extra_state_t *intercept; - intercept = OBJ_NEW(intercept_extra_state_t); + intercept = ompi_cxx_new_intercept_state ((void *) read_fn_cxx, NULL, (void *) extent_fn_cxx, + extra_state_cxx); if (NULL == intercept) { - OMPI_ERRHANDLER_INVOKE(MPI_FILE_NULL, OMPI_ERR_OUT_OF_RESOURCE, - "MPI::Register_datarep"); + ompi_cxx_errhandler_invoke_file (MPI_FILE_NULL, OMPI_ERR_OUT_OF_RESOURCE, + "MPI::Register_datarep"); return; } - opal_list_append(&ompi_registered_datareps, &(intercept->base)); - intercept->read_fn_cxx = read_fn_cxx; - intercept->extent_fn_cxx = extent_fn_cxx; - intercept->extra_state_cxx = extra_state_cxx; - - (void)MPI_Register_datarep(const_cast(datarep), read_intercept_fn, - write_fn_c, - extent_intercept_fn, intercept); + + (void)MPI_Register_datarep (const_cast(datarep), read_intercept_fn, write_fn_c, + extent_intercept_fn, intercept); } -void -MPI::Register_datarep(const char* datarep, +void +MPI::Register_datarep(const char* datarep, MPI_Datarep_conversion_function* read_fn_c, - MPI_Datarep_conversion_function* write_fn_c, - Datarep_extent_function* extent_fn_cxx, + MPI_Datarep_conversion_function* write_fn_c, + Datarep_extent_function* extent_fn_cxx, void* extra_state_cxx) { - intercept_extra_state_t *intercept; + ompi_cxx_intercept_file_extra_state_t *intercept; - intercept = OBJ_NEW(intercept_extra_state_t); + intercept = ompi_cxx_new_intercept_state (NULL, NULL, (void *) extent_fn_cxx, extra_state_cxx); if (NULL == intercept) { - OMPI_ERRHANDLER_INVOKE(MPI_FILE_NULL, OMPI_ERR_OUT_OF_RESOURCE, - "MPI::Register_datarep"); + ompi_cxx_errhandler_invoke_file (MPI_FILE_NULL, OMPI_ERR_OUT_OF_RESOURCE, + "MPI::Register_datarep"); return; } - opal_list_append(&ompi_registered_datareps, &(intercept->base)); - intercept->extent_fn_cxx = extent_fn_cxx; - intercept->extra_state_cxx = extra_state_cxx; - (void)MPI_Register_datarep(const_cast(datarep), read_fn_c, - write_fn_c, - extent_intercept_fn, intercept); + (void)MPI_Register_datarep (const_cast(datarep), read_fn_c, write_fn_c, + extent_intercept_fn, intercept); } diff --git a/ompi/mpi/cxx/file.h b/ompi/mpi/cxx/file.h index c135cf81235..1cef5aff8f2 100644 --- a/ompi/mpi/cxx/file.h +++ b/ompi/mpi/cxx/file.h @@ -6,51 +6,51 @@ // Copyright (c) 2004-2005 The University of Tennessee and The University // of Tennessee Research Foundation. All rights // reserved. -// Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +// Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, // University of Stuttgart. All rights reserved. // Copyright (c) 2004-2005 The Regents of the University of California. // All rights reserved. // Copyright (c) 2006-2009 Cisco Systems, Inc. All rights reserved. // $COPYRIGHT$ -// +// // Additional copyrights may follow -// +// // $HEADER$ // // Typedefs for C++ callbacks registered via MPI::Register_datarep -typedef void Datarep_extent_function(const Datatype& datatype, +typedef void Datarep_extent_function(const Datatype& datatype, Aint& file_extent, void* extra_state); -typedef void Datarep_conversion_function(void* userbuf, Datatype& datatype, - int count, void* filebuf, +typedef void Datarep_conversion_function(void* userbuf, Datatype& datatype, + int count, void* filebuf, Offset position, void* extra_state); // Both callback functions in C++ -void Register_datarep(const char* datarep, - Datarep_conversion_function* read_conversion_fn, - Datarep_conversion_function* write_conversion_fn, - Datarep_extent_function* dtype_file_extent_fn, +void Register_datarep(const char* datarep, + Datarep_conversion_function* read_conversion_fn, + Datarep_conversion_function* write_conversion_fn, + Datarep_extent_function* dtype_file_extent_fn, void* extra_state); // Overload for C read callback function (MPI_CONVERSION_FN_NULL) -void Register_datarep(const char* datarep, - MPI_Datarep_conversion_function* read_conversion_fn, - Datarep_conversion_function* write_conversion_fn, - Datarep_extent_function* dtype_file_extent_fn, +void Register_datarep(const char* datarep, + MPI_Datarep_conversion_function* read_conversion_fn, + Datarep_conversion_function* write_conversion_fn, + Datarep_extent_function* dtype_file_extent_fn, void* extra_state); // Overload for C write callback function (MPI_CONVERSION_FN_NULL) -void Register_datarep(const char* datarep, - Datarep_conversion_function* read_conversion_fn, - MPI_Datarep_conversion_function* write_conversion_fn, - Datarep_extent_function* dtype_file_extent_fn, +void Register_datarep(const char* datarep, + Datarep_conversion_function* read_conversion_fn, + MPI_Datarep_conversion_function* write_conversion_fn, + Datarep_extent_function* dtype_file_extent_fn, void* extra_state); // Overload for C read and write callback functions (MPI_CONVERSION_FN_NULL) -void Register_datarep(const char* datarep, - MPI_Datarep_conversion_function* read_conversion_fn, - MPI_Datarep_conversion_function* write_conversion_fn, - Datarep_extent_function* dtype_file_extent_fn, +void Register_datarep(const char* datarep, + MPI_Datarep_conversion_function* read_conversion_fn, + MPI_Datarep_conversion_function* write_conversion_fn, + Datarep_extent_function* dtype_file_extent_fn, void* extra_state); class File { @@ -93,7 +93,7 @@ class File { File(const File& data) : mpi_file(data.mpi_file) { } File(MPI_File i) : mpi_file(i) { } - + virtual ~File() { } File& operator=(const File& data) { @@ -109,7 +109,7 @@ class File { #endif - // from the I/o chapter of MPI - 2 + // from the I/o chapter of MPI - 2 void Close(); @@ -133,22 +133,22 @@ class File { MPI::Aint Get_type_extent(const MPI::Datatype& datatype) const; - void Get_view(MPI::Offset& disp, MPI::Datatype& etype, + void Get_view(MPI::Offset& disp, MPI::Datatype& etype, MPI::Datatype& filetype, char* datarep) const; - MPI::Request Iread(void* buf, int count, + MPI::Request Iread(void* buf, int count, const MPI::Datatype& datatype); - - MPI::Request Iread_at(MPI::Offset offset, void* buf, int count, + + MPI::Request Iread_at(MPI::Offset offset, void* buf, int count, const MPI::Datatype& datatype); - + MPI::Request Iread_shared(void* buf, int count, const MPI::Datatype& datatype); MPI::Request Iwrite(const void* buf, int count, const MPI::Datatype& datatype); - MPI::Request Iwrite_at(MPI::Offset offset, const void* buf, + MPI::Request Iwrite_at(MPI::Offset offset, const void* buf, int count, const MPI::Datatype& datatype); MPI::Request Iwrite_shared(const void* buf, int count, @@ -177,21 +177,21 @@ class File { void Read_all_end(void* buf, MPI::Status& status); - void Read_at(MPI::Offset offset, + void Read_at(MPI::Offset offset, void* buf, int count, const MPI::Datatype& datatype); void Read_at(MPI::Offset offset, void* buf, int count, const MPI::Datatype& datatype, MPI::Status& status); - void Read_at_all(MPI::Offset offset, void* buf, int count, + void Read_at_all(MPI::Offset offset, void* buf, int count, const MPI::Datatype& datatype); - + void Read_at_all(MPI::Offset offset, void* buf, int count, const MPI::Datatype& datatype, MPI::Status& status); void Read_at_all_begin(MPI::Offset offset, void* buf, int count, const MPI::Datatype& datatype); - + void Read_at_all_end(void* buf); void Read_at_all_end(void* buf, MPI::Status& status); @@ -277,7 +277,7 @@ class File { void Write_ordered(const void* buf, int count, const MPI::Datatype& datatype, MPI::Status& status); - void Write_ordered_begin(const void* buf, int count, + void Write_ordered_begin(const void* buf, int count, const MPI::Datatype& datatype); void Write_ordered_end(const void* buf); @@ -297,7 +297,7 @@ class File { typedef Errhandler_function Errhandler_fn __mpi_interface_deprecated__("MPI::File::Errhandler_fn was deprecated in MPI-2.2; use MPI::File::Errhandler_function instead"); - static MPI::Errhandler Create_errhandler(Errhandler_function* function); + static MPI::Errhandler Create_errhandler(Errhandler_function* function); MPI::Errhandler Get_errhandler() const; diff --git a/ompi/mpi/cxx/file_inln.h b/ompi/mpi/cxx/file_inln.h index c9b7db8b510..9930f69056e 100644 --- a/ompi/mpi/cxx/file_inln.h +++ b/ompi/mpi/cxx/file_inln.h @@ -6,28 +6,28 @@ // Copyright (c) 2004-2005 The University of Tennessee and The University // of Tennessee Research Foundation. All rights // reserved. -// Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +// Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, // University of Stuttgart. All rights reserved. // Copyright (c) 2004-2005 The Regents of the University of California. // All rights reserved. // Copyright (c) 2008 Cisco Systems, Inc. All rights reserved. // $COPYRIGHT$ -// +// // Additional copyrights may follow -// +// // $HEADER$ // -inline void -MPI::File::Delete(const char* filename, const MPI::Info& info) +inline void +MPI::File::Delete(const char* filename, const MPI::Info& info) { (void) MPI_File_delete(const_cast(filename), info); } -inline int -MPI::File::Get_amode() const +inline int +MPI::File::Get_amode() const { int amode; (void) MPI_File_get_amode(mpi_file, &amode); @@ -35,8 +35,8 @@ MPI::File::Get_amode() const } -inline bool -MPI::File::Get_atomicity() const +inline bool +MPI::File::Get_atomicity() const { int flag; (void) MPI_File_get_atomicity(mpi_file, &flag); @@ -44,7 +44,7 @@ MPI::File::Get_atomicity() const } inline MPI::Offset -MPI::File::Get_byte_offset(const MPI::Offset disp) const +MPI::File::Get_byte_offset(const MPI::Offset disp) const { MPI_Offset offset, ldisp; ldisp = disp; @@ -52,7 +52,7 @@ MPI::File::Get_byte_offset(const MPI::Offset disp) const return offset; } -inline MPI::Group +inline MPI::Group MPI::File::Get_group() const { MPI_Group group; @@ -61,7 +61,7 @@ MPI::File::Get_group() const } -inline MPI::Info +inline MPI::Info MPI::File::Get_info() const { MPI_Info info_used; @@ -70,7 +70,7 @@ MPI::File::Get_info() const } -inline MPI::Offset +inline MPI::Offset MPI::File::Get_position() const { MPI_Offset offset; @@ -79,7 +79,7 @@ MPI::File::Get_position() const } -inline MPI::Offset +inline MPI::Offset MPI::File::Get_position_shared() const { MPI_Offset offset; @@ -88,7 +88,7 @@ MPI::File::Get_position_shared() const } -inline MPI::Offset +inline MPI::Offset MPI::File::Get_size() const { MPI_Offset offset; @@ -98,7 +98,7 @@ MPI::File::Get_size() const } -inline MPI::Aint +inline MPI::Aint MPI::File::Get_type_extent(const MPI::Datatype& datatype) const { MPI_Aint extent; @@ -107,9 +107,9 @@ MPI::File::Get_type_extent(const MPI::Datatype& datatype) const } -inline void +inline void MPI::File::Get_view(MPI::Offset& disp, - MPI::Datatype& etype, + MPI::Datatype& etype, MPI::Datatype& filetype, char* datarep) const { @@ -123,8 +123,8 @@ MPI::File::Get_view(MPI::Offset& disp, } -inline MPI::Request -MPI::File::Iread(void* buf, int count, +inline MPI::Request +MPI::File::Iread(void* buf, int count, const MPI::Datatype& datatype) { MPI_Request req; @@ -133,8 +133,8 @@ MPI::File::Iread(void* buf, int count, } -inline MPI::Request -MPI::File::Iread_at(MPI::Offset offset, void* buf, int count, +inline MPI::Request +MPI::File::Iread_at(MPI::Offset offset, void* buf, int count, const MPI::Datatype& datatype) { MPI_Request req; @@ -143,7 +143,7 @@ MPI::File::Iread_at(MPI::Offset offset, void* buf, int count, } -inline MPI::Request +inline MPI::Request MPI::File::Iread_shared(void* buf, int count, const MPI::Datatype& datatype) { @@ -153,7 +153,7 @@ MPI::File::Iread_shared(void* buf, int count, } -inline MPI::Request +inline MPI::Request MPI::File::Iwrite(const void* buf, int count, const MPI::Datatype& datatype) { @@ -163,8 +163,8 @@ MPI::File::Iwrite(const void* buf, int count, } -inline MPI::Request -MPI::File::Iwrite_at(MPI::Offset offset, const void* buf, +inline MPI::Request +MPI::File::Iwrite_at(MPI::Offset offset, const void* buf, int count, const MPI::Datatype& datatype) { MPI_Request req; @@ -174,7 +174,7 @@ MPI::File::Iwrite_at(MPI::Offset offset, const void* buf, } -inline MPI::Request +inline MPI::Request MPI::File::Iwrite_shared(const void* buf, int count, const MPI::Datatype& datatype) { @@ -184,7 +184,7 @@ MPI::File::Iwrite_shared(const void* buf, int count, } -inline MPI::File +inline MPI::File MPI::File::Open(const MPI::Intracomm& comm, const char* filename, int amode, const MPI::Info& info) @@ -195,14 +195,14 @@ MPI::File::Open(const MPI::Intracomm& comm, } -inline void +inline void MPI::File::Preallocate(MPI::Offset size) { (void) MPI_File_preallocate(mpi_file, size); } -inline void +inline void MPI::File::Read(void* buf, int count, const MPI::Datatype& datatype) { @@ -211,7 +211,7 @@ MPI::File::Read(void* buf, int count, } -inline void +inline void MPI::File::Read(void* buf, int count, const MPI::Datatype& datatype, MPI::Status& status) @@ -220,7 +220,7 @@ MPI::File::Read(void* buf, int count, } -inline void +inline void MPI::File::Read_all(void* buf, int count, const MPI::Datatype& datatype) { @@ -229,7 +229,7 @@ MPI::File::Read_all(void* buf, int count, } -inline void +inline void MPI::File::Read_all(void* buf, int count, const MPI::Datatype& datatype, MPI::Status& status) @@ -238,7 +238,7 @@ MPI::File::Read_all(void* buf, int count, } -inline void +inline void MPI::File::Read_all_begin(void* buf, int count, const MPI::Datatype& datatype) { @@ -246,7 +246,7 @@ MPI::File::Read_all_begin(void* buf, int count, } -inline void +inline void MPI::File::Read_all_end(void* buf) { MPI_Status status; @@ -254,15 +254,15 @@ MPI::File::Read_all_end(void* buf) } -inline void +inline void MPI::File::Read_all_end(void* buf, MPI::Status& status) { (void) MPI_File_read_all_end(mpi_file, buf, &status.mpi_status); } -inline void -MPI::File::Read_at(MPI::Offset offset, +inline void +MPI::File::Read_at(MPI::Offset offset, void* buf, int count, const MPI::Datatype& datatype) { @@ -272,18 +272,18 @@ MPI::File::Read_at(MPI::Offset offset, } -inline void +inline void MPI::File::Read_at(MPI::Offset offset, void* buf, int count, const MPI::Datatype& datatype, MPI::Status& status) { - (void) MPI_File_read_at(mpi_file, offset, buf, count, datatype, + (void) MPI_File_read_at(mpi_file, offset, buf, count, datatype, &status.mpi_status); } -inline void -MPI::File::Read_at_all(MPI::Offset offset, void* buf, int count, +inline void +MPI::File::Read_at_all(MPI::Offset offset, void* buf, int count, const MPI::Datatype& datatype) { MPI_Status status; @@ -291,17 +291,17 @@ MPI::File::Read_at_all(MPI::Offset offset, void* buf, int count, } -inline void +inline void MPI::File::Read_at_all(MPI::Offset offset, void* buf, int count, const MPI::Datatype& datatype, MPI::Status& status) { - (void) MPI_File_read_at_all(mpi_file, offset, buf, count, datatype, + (void) MPI_File_read_at_all(mpi_file, offset, buf, count, datatype, &status.mpi_status); } -inline void +inline void MPI::File::Read_at_all_begin(MPI::Offset offset, void* buf, int count, const MPI::Datatype& datatype) @@ -310,7 +310,7 @@ MPI::File::Read_at_all_begin(MPI::Offset offset, } -inline void +inline void MPI::File::Read_at_all_end(void* buf) { MPI_Status status; @@ -318,14 +318,14 @@ MPI::File::Read_at_all_end(void* buf) } -inline void +inline void MPI::File::Read_at_all_end(void* buf, MPI::Status& status) { (void) MPI_File_read_at_all_end(mpi_file, buf, &status.mpi_status); } -inline void +inline void MPI::File::Read_ordered(void* buf, int count, const MPI::Datatype& datatype) { @@ -334,17 +334,17 @@ MPI::File::Read_ordered(void* buf, int count, } -inline void +inline void MPI::File::Read_ordered(void* buf, int count, const MPI::Datatype& datatype, MPI::Status& status) { - (void) MPI_File_read_ordered(mpi_file, buf, count, datatype, + (void) MPI_File_read_ordered(mpi_file, buf, count, datatype, &status.mpi_status); } -inline void +inline void MPI::File::Read_ordered_begin(void* buf, int count, const MPI::Datatype& datatype) { @@ -352,7 +352,7 @@ MPI::File::Read_ordered_begin(void* buf, int count, } -inline void +inline void MPI::File::Read_ordered_end(void* buf) { MPI_Status status; @@ -360,14 +360,14 @@ MPI::File::Read_ordered_end(void* buf) } -inline void +inline void MPI::File::Read_ordered_end(void* buf, MPI::Status& status) { (void) MPI_File_read_ordered_end(mpi_file, buf, &status.mpi_status); } -inline void +inline void MPI::File::Read_shared(void* buf, int count, const MPI::Datatype& datatype) { @@ -376,51 +376,51 @@ MPI::File::Read_shared(void* buf, int count, } -inline void +inline void MPI::File::Read_shared(void* buf, int count, const MPI::Datatype& datatype, MPI::Status& status) { - (void) MPI_File_read_shared(mpi_file, buf, count, datatype, + (void) MPI_File_read_shared(mpi_file, buf, count, datatype, &status.mpi_status); } -inline void +inline void MPI::File::Seek(MPI::Offset offset, int whence) { (void) MPI_File_seek(mpi_file, offset, whence); } -inline void +inline void MPI::File::Seek_shared(MPI::Offset offset, int whence) { (void) MPI_File_seek_shared(mpi_file, offset, whence); } -inline void +inline void MPI::File::Set_atomicity(bool flag) { (void) MPI_File_set_atomicity(mpi_file, flag); } -inline void +inline void MPI::File::Set_info(const MPI::Info& info) { (void) MPI_File_set_info(mpi_file, info); } -inline void +inline void MPI::File::Set_size(MPI::Offset size) { (void) MPI_File_set_size(mpi_file, size); } -inline void +inline void MPI::File::Set_view(MPI::Offset disp, const MPI::Datatype& etype, const MPI::Datatype& filetype, @@ -432,14 +432,14 @@ MPI::File::Set_view(MPI::Offset disp, } -inline void +inline void MPI::File::Sync() { (void) MPI_File_sync(mpi_file); } -inline void +inline void MPI::File::Write(const void* buf, int count, const MPI::Datatype& datatype) { @@ -448,17 +448,17 @@ MPI::File::Write(const void* buf, int count, } -inline void +inline void MPI::File::Write(const void* buf, int count, const MPI::Datatype& datatype, MPI::Status& status) { - (void) MPI_File_write(mpi_file, const_cast(buf), count, datatype, + (void) MPI_File_write(mpi_file, const_cast(buf), count, datatype, &status.mpi_status); } -inline void +inline void MPI::File::Write_all(const void* buf, int count, const MPI::Datatype& datatype) { @@ -468,17 +468,17 @@ MPI::File::Write_all(const void* buf, int count, -inline void +inline void MPI::File::Write_all(const void* buf, int count, const MPI::Datatype& datatype, MPI::Status& status) { - (void) MPI_File_write_all(mpi_file, const_cast(buf), count, datatype, + (void) MPI_File_write_all(mpi_file, const_cast(buf), count, datatype, &status.mpi_status); } -inline void +inline void MPI::File::Write_all_begin(const void* buf, int count, const MPI::Datatype& datatype) { @@ -486,7 +486,7 @@ MPI::File::Write_all_begin(const void* buf, int count, } -inline void +inline void MPI::File::Write_all_end(const void* buf) { MPI_Status status; @@ -494,14 +494,14 @@ MPI::File::Write_all_end(const void* buf) } -inline void +inline void MPI::File::Write_all_end(const void* buf, MPI::Status& status) { (void) MPI_File_write_all_end(mpi_file, const_cast(buf), &status.mpi_status); } -inline void +inline void MPI::File::Write_at(MPI::Offset offset, const void* buf, int count, const MPI::Datatype& datatype) @@ -512,7 +512,7 @@ MPI::File::Write_at(MPI::Offset offset, } -inline void +inline void MPI::File::Write_at(MPI::Offset offset, const void* buf, int count, const MPI::Datatype& datatype, @@ -523,7 +523,7 @@ MPI::File::Write_at(MPI::Offset offset, } -inline void +inline void MPI::File::Write_at_all(MPI::Offset offset, const void* buf, int count, const MPI::Datatype& datatype) @@ -534,18 +534,18 @@ MPI::File::Write_at_all(MPI::Offset offset, } -inline void +inline void MPI::File::Write_at_all(MPI::Offset offset, const void* buf, int count, const MPI::Datatype& datatype, MPI::Status& status) { - (void) MPI_File_write_at_all(mpi_file, offset, const_cast(buf), count, + (void) MPI_File_write_at_all(mpi_file, offset, const_cast(buf), count, datatype, &status.mpi_status); } -inline void +inline void MPI::File::Write_at_all_begin(MPI::Offset offset, const void* buf, int count, const MPI::Datatype& datatype) @@ -555,7 +555,7 @@ MPI::File::Write_at_all_begin(MPI::Offset offset, } -inline void +inline void MPI::File::Write_at_all_end(const void* buf) { MPI_Status status; @@ -563,14 +563,14 @@ MPI::File::Write_at_all_end(const void* buf) } -inline void +inline void MPI::File::Write_at_all_end(const void* buf, MPI::Status& status) { (void) MPI_File_write_at_all_end(mpi_file, const_cast(buf), &status.mpi_status); } -inline void +inline void MPI::File::Write_ordered(const void* buf, int count, const MPI::Datatype& datatype) { @@ -580,7 +580,7 @@ MPI::File::Write_ordered(const void* buf, int count, } -inline void +inline void MPI::File::Write_ordered(const void* buf, int count, const MPI::Datatype& datatype, MPI::Status& status) @@ -590,15 +590,15 @@ MPI::File::Write_ordered(const void* buf, int count, } -inline void -MPI::File::Write_ordered_begin(const void* buf, int count, +inline void +MPI::File::Write_ordered_begin(const void* buf, int count, const MPI::Datatype& datatype) { (void) MPI_File_write_ordered_begin(mpi_file, const_cast(buf), count, datatype); } -inline void +inline void MPI::File::Write_ordered_end(const void* buf) { MPI_Status status; @@ -606,7 +606,7 @@ MPI::File::Write_ordered_end(const void* buf) } -inline void +inline void MPI::File::Write_ordered_end(const void* buf, MPI::Status& status) { @@ -614,7 +614,7 @@ MPI::File::Write_ordered_end(const void* buf, } -inline void +inline void MPI::File::Write_shared(const void* buf, int count, const MPI::Datatype& datatype) { @@ -624,7 +624,7 @@ MPI::File::Write_shared(const void* buf, int count, } -inline void +inline void MPI::File::Write_shared(const void* buf, int count, const MPI::Datatype& datatype, MPI::Status& status) { @@ -648,7 +648,7 @@ MPI::File::Get_errhandler() const return errhandler; } -inline void +inline void MPI::File::Call_errhandler(int errorcode) const { (void) MPI_File_call_errhandler(mpi_file, errorcode); diff --git a/ompi/mpi/cxx/functions.h b/ompi/mpi/cxx/functions.h index a1324960836..b86ccef2d58 100644 --- a/ompi/mpi/cxx/functions.h +++ b/ompi/mpi/cxx/functions.h @@ -6,16 +6,16 @@ // Copyright (c) 2004-2005 The University of Tennessee and The University // of Tennessee Research Foundation. All rights // reserved. -// Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +// Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, // University of Stuttgart. All rights reserved. // Copyright (c) 2004-2005 The Regents of the University of California. // All rights reserved. // Copyright (c) 2008 Cisco Systems, Inc. All rights reserved. // Copyright (c) 2011 FUJITSU LIMITED. All rights reserved. // $COPYRIGHT$ -// +// // Additional copyrights may follow -// +// // $HEADER$ // @@ -23,10 +23,10 @@ // Point-to-Point Communication // -void +void Attach_buffer(void* buffer, int size); -int +int Detach_buffer(void*& buffer); // @@ -49,19 +49,19 @@ Add_error_code(int errorclass); void Add_error_string(int errorcode, const char* string); -void +void Get_processor_name(char* name, int& resultlen); void Get_error_string(int errorcode, char* string, int& resultlen); -int +int Get_error_class(int errorcode); -double +double Wtime(); -double +double Wtick(); void @@ -131,11 +131,11 @@ Open_port(const Info& info, char* port_name); void -Publish_name(const char* service_name, const Info& info, +Publish_name(const char* service_name, const Info& info, const char* port_name); -void -Unpublish_name(const char* service_name, const Info& info, +void +Unpublish_name(const char* service_name, const Info& info, const char* port_name); // diff --git a/ompi/mpi/cxx/functions_inln.h b/ompi/mpi/cxx/functions_inln.h index 0fd30c5fe49..8ea793677d1 100644 --- a/ompi/mpi/cxx/functions_inln.h +++ b/ompi/mpi/cxx/functions_inln.h @@ -1,21 +1,21 @@ // -*- c++ -*- -// +// // Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana // University Research and Technology // Corporation. All rights reserved. // Copyright (c) 2004-2005 The University of Tennessee and The University // of Tennessee Research Foundation. All rights // reserved. -// Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +// Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, // University of Stuttgart. All rights reserved. // Copyright (c) 2004-2005 The Regents of the University of California. // All rights reserved. // Copyright (c) 2008 Cisco Systems, Inc. All rights reserved. // Copyright (c) 2011 FUJITSU LIMITED. All rights reserved. // $COPYRIGHT$ -// +// // Additional copyrights may follow -// +// // $HEADER$ // @@ -25,13 +25,13 @@ // Point-to-Point Communication // -inline void +inline void MPI::Attach_buffer(void* buffer, int size) { (void)MPI_Buffer_attach(buffer, size); } -inline int +inline int MPI::Detach_buffer(void*& buffer) { int size; @@ -76,7 +76,7 @@ MPI::Add_error_string(int errorcode, const char* string) (void)MPI_Add_error_string(errorcode, const_cast(string)); } -inline void +inline void MPI::Get_processor_name(char* name, int& resultlen) { (void)MPI_Get_processor_name(name, &resultlen); @@ -88,21 +88,21 @@ MPI::Get_error_string(int errorcode, char* string, int& resultlen) (void)MPI_Error_string(errorcode, string, &resultlen); } -inline int -MPI::Get_error_class(int errorcode) +inline int +MPI::Get_error_class(int errorcode) { int errorclass; (void)MPI_Error_class(errorcode, &errorclass); return errorclass; } -inline double +inline double MPI::Wtime() { return (MPI_Wtime()); } -inline double +inline double MPI::Wtick() { return (MPI_Wtick()); @@ -200,7 +200,7 @@ MPI::Query_thread() inline void* -MPI::Alloc_mem(MPI::Aint size, const MPI::Info& info) +MPI::Alloc_mem(MPI::Aint size, const MPI::Info& info) { void* baseptr; (void) MPI_Alloc_mem(size, info, &baseptr); @@ -221,14 +221,14 @@ MPI::Free_mem(void* base) inline void -MPI::Close_port(const char* port_name) +MPI::Close_port(const char* port_name) { (void) MPI_Close_port(const_cast(port_name)); } inline void -MPI::Lookup_name(const char * service_name, +MPI::Lookup_name(const char * service_name, const MPI::Info& info, char* port_name) { @@ -244,7 +244,7 @@ MPI::Open_port(const MPI::Info& info, char* port_name) inline void -MPI::Publish_name(const char* service_name, +MPI::Publish_name(const char* service_name, const MPI::Info& info, const char* port_name) { @@ -254,7 +254,7 @@ MPI::Publish_name(const char* service_name, inline void -MPI::Unpublish_name(const char* service_name, +MPI::Unpublish_name(const char* service_name, const MPI::Info& info, const char* port_name) { @@ -273,7 +273,7 @@ MPI::Pcontrol(const int level, ...) { va_list ap; va_start(ap, level); - + (void)MPI_Pcontrol(level, ap); va_end(ap); } diff --git a/ompi/mpi/cxx/group.h b/ompi/mpi/cxx/group.h index 68b8a3f23eb..e8423ea88a2 100644 --- a/ompi/mpi/cxx/group.h +++ b/ompi/mpi/cxx/group.h @@ -6,15 +6,15 @@ // Copyright (c) 2004-2005 The University of Tennessee and The University // of Tennessee Research Foundation. All rights // reserved. -// Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +// Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, // University of Stuttgart. All rights reserved. // Copyright (c) 2004-2005 The Regents of the University of California. // All rights reserved. // Copyright (c) 2006 Cisco Systems, Inc. All rights reserved. // $COPYRIGHT$ -// +// // Additional copyrights may follow -// +// // $HEADER$ // @@ -44,10 +44,10 @@ class Group { inline bool operator== (const Group &a) { return (bool)(pmpi_group == a.pmpi_group); } - inline bool operator!= (const Group &a) { + inline bool operator!= (const Group &a) { return (bool)!(*this == a); } - + // inter-language operability Group& operator= (const MPI_Group &i) { pmpi_group = i; return *this; } inline operator MPI_Group () const { return pmpi_group.mpi(); } @@ -71,7 +71,7 @@ class Group { // comparison inline bool operator== (const Group &a) { return (bool)(mpi_group == a.mpi_group); } inline bool operator!= (const Group &a) { return (bool)!(*this == a); } - + // inter-language operability inline Group& operator= (const MPI_Group &i) { mpi_group = i; return *this; } inline operator MPI_Group () const { return mpi_group; } @@ -86,28 +86,28 @@ class Group { // virtual int Get_size() const; - + virtual int Get_rank() const; - - static void Translate_ranks (const Group& group1, int n, const int ranks1[], + + static void Translate_ranks (const Group& group1, int n, const int ranks1[], const Group& group2, int ranks2[]); - + static int Compare(const Group& group1, const Group& group2); - + static Group Union(const Group &group1, const Group &group2); - + static Group Intersect(const Group &group1, const Group &group2); - + static Group Difference(const Group &group1, const Group &group2); - + virtual Group Incl(int n, const int ranks[]) const; - + virtual Group Excl(int n, const int ranks[]) const; - + virtual Group Range_incl(int n, const int ranges[][3]) const; - + virtual Group Range_excl(int n, const int ranges[][3]) const; - + virtual void Free(); protected: diff --git a/ompi/mpi/cxx/group_inln.h b/ompi/mpi/cxx/group_inln.h index c363fb10672..5b2dab11b7b 100644 --- a/ompi/mpi/cxx/group_inln.h +++ b/ompi/mpi/cxx/group_inln.h @@ -1,19 +1,20 @@ // -*- c++ -*- -// +// // Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana // University Research and Technology // Corporation. All rights reserved. // Copyright (c) 2004-2005 The University of Tennessee and The University // of Tennessee Research Foundation. All rights // reserved. -// Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +// Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, // University of Stuttgart. All rights reserved. // Copyright (c) 2004-2005 The Regents of the University of California. // All rights reserved. +// Copyright (c) 2016 Cisco Systems, Inc. All rights reserved. // $COPYRIGHT$ -// +// // Additional copyrights may follow -// +// // $HEADER$ // @@ -30,16 +31,16 @@ MPI::Group::Get_size() const } inline int -MPI::Group::Get_rank() const +MPI::Group::Get_rank() const { - int rank; - (void)MPI_Group_rank(mpi_group, &rank); - return rank; + int myrank; + (void)MPI_Group_rank(mpi_group, &myrank); + return myrank; } inline void MPI::Group::Translate_ranks (const MPI::Group& group1, int n, - const int ranks1[], + const int ranks1[], const MPI::Group& group2, int ranks2[]) { (void)MPI_Group_translate_ranks(group1, n, const_cast(ranks1), group2, const_cast(ranks2)); @@ -72,7 +73,7 @@ MPI::Group::Intersect(const MPI::Group &group1, const MPI::Group &group2) inline MPI::Group MPI::Group::Difference(const MPI::Group &group1, const MPI::Group &group2) { - MPI_Group newgroup; + MPI_Group newgroup; (void)MPI_Group_difference(group1, group2, &newgroup); return newgroup; } @@ -99,7 +100,7 @@ MPI::Group::Range_incl(int n, const int ranges[][3]) const MPI_Group newgroup; (void)MPI_Group_range_incl(mpi_group, n, #if OMPI_CXX_SUPPORTS_2D_CONST_CAST - const_cast(ranges), + const_cast(ranges), #else (int(*)[3]) ranges, #endif diff --git a/ompi/mpi/cxx/info.h b/ompi/mpi/cxx/info.h index f2d0c77d4f5..d3bc73489ca 100644 --- a/ompi/mpi/cxx/info.h +++ b/ompi/mpi/cxx/info.h @@ -6,15 +6,15 @@ // Copyright (c) 2004-2005 The University of Tennessee and The University // of Tennessee Research Foundation. All rights // reserved. -// Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +// Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, // University of Stuttgart. All rights reserved. // Copyright (c) 2004-2005 The Regents of the University of California. // All rights reserved. // Copyright (c) 2006-2008 Cisco Systems, Inc. All rights reserved. // $COPYRIGHT$ -// +// // Additional copyrights may follow -// +// // $HEADER$ // @@ -79,7 +79,7 @@ class Info { virtual void Delete(const char* key); - virtual Info Dup() const; + virtual Info Dup() const; virtual void Free(); diff --git a/ompi/mpi/cxx/info_inln.h b/ompi/mpi/cxx/info_inln.h index d0af94d94e1..b68fa8039df 100644 --- a/ompi/mpi/cxx/info_inln.h +++ b/ompi/mpi/cxx/info_inln.h @@ -1,24 +1,24 @@ // -*- c++ -*- -// +// // Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana // University Research and Technology // Corporation. All rights reserved. // Copyright (c) 2004-2005 The University of Tennessee and The University // of Tennessee Research Foundation. All rights // reserved. -// Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +// Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, // University of Stuttgart. All rights reserved. // Copyright (c) 2004-2005 The Regents of the University of California. // All rights reserved. // $COPYRIGHT$ -// +// // Additional copyrights may follow -// +// // $HEADER$ // -inline MPI::Info +inline MPI::Info MPI::Info::Create() { MPI_Info newinfo; @@ -32,7 +32,7 @@ MPI::Info::Delete(const char* key) (void)MPI_Info_delete(mpi_info, const_cast(key)); } -inline MPI::Info +inline MPI::Info MPI::Info::Dup() const { MPI_Info newinfo; @@ -68,7 +68,7 @@ MPI::Info::Get_nthkey(int n, char* key) const (void) MPI_Info_get_nthkey(mpi_info, n, key); } -inline bool +inline bool MPI::Info::Get_valuelen(const char* key, int& valuelen) const { int flag; diff --git a/ompi/mpi/cxx/intercepts.cc b/ompi/mpi/cxx/intercepts.cc index aa14460b501..3695ae7579b 100644 --- a/ompi/mpi/cxx/intercepts.cc +++ b/ompi/mpi/cxx/intercepts.cc @@ -1,32 +1,32 @@ // -*- c++ -*- -// +// // Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana // University Research and Technology // Corporation. All rights reserved. // Copyright (c) 2004-2005 The University of Tennessee and The University // of Tennessee Research Foundation. All rights // reserved. -// Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +// Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, // University of Stuttgart. All rights reserved. // Copyright (c) 2004-2005 The Regents of the University of California. // All rights reserved. // Copyright (c) 2006-2009 Cisco Systems, Inc. All rights reserved. // Copyright (c) 2009 Sun Microsystems, Inc. All rights reserved. +// Copyright (c) 2016 Los Alamos National Security, LLC. All rights +// reserved. // $COPYRIGHT$ -// +// // Additional copyrights may follow -// +// // $HEADER$ // #include "mpicxx.h" -#include +#include #include "ompi_config.h" -#include "ompi/errhandler/errhandler.h" -#include "ompi/communicator/communicator.h" -#include "ompi/datatype/ompi_datatype.h" +#include "cxx_glue.h" extern "C" void ompi_mpi_cxx_throw_exception(int *errcode) @@ -38,12 +38,12 @@ void ompi_mpi_cxx_throw_exception(int *errcode) // and -lmpi++ (which can legally happen in the LAM MPI implementation, // and probably in MPICH and others who include -lmpi++ by default in their // wrapper compilers) - fprintf(stderr, "MPI 2 C++ exception throwing is disabled, MPI::mpi_errno has the error code\n"); + fprintf(stderr, "MPI 2 C++ exception throwing is disabled, MPI::mpi_errno has the error code\n"); MPI::mpi_errno = *errcode; -#endif +#endif } -extern "C" +extern "C" void ompi_mpi_cxx_comm_throw_excptn_fctn(MPI_Comm *, int *errcode, ...) { /* Portland compiler raises a warning if va_start is not used in a @@ -55,7 +55,7 @@ void ompi_mpi_cxx_comm_throw_excptn_fctn(MPI_Comm *, int *errcode, ...) } #if OMPI_PROVIDE_MPI_FILE_INTERFACE -extern "C" +extern "C" void ompi_mpi_cxx_file_throw_excptn_fctn(MPI_File *, int *errcode, ...) { va_list ap; @@ -65,7 +65,7 @@ void ompi_mpi_cxx_file_throw_excptn_fctn(MPI_File *, int *errcode, ...) } #endif -extern "C" +extern "C" void ompi_mpi_cxx_win_throw_excptn_fctn(MPI_Win *, int *errcode, ...) { va_list ap; @@ -78,14 +78,14 @@ void ompi_mpi_cxx_win_throw_excptn_fctn(MPI_Win *, int *errcode, ...) void MPI::InitializeIntercepts() { - ompi_mpi_errors_throw_exceptions.eh.eh_comm_fn = - ompi_mpi_cxx_comm_throw_excptn_fctn; + ompi_cxx_errhandler_set_callbacks ((struct ompi_errhandler_t *) &ompi_mpi_errors_throw_exceptions, + ompi_mpi_cxx_comm_throw_excptn_fctn, #if OMPI_PROVIDE_MPI_FILE_INTERFACE - ompi_mpi_errors_throw_exceptions.eh.eh_file_fn = - ompi_mpi_cxx_file_throw_excptn_fctn; + ompi_mpi_cxx_file_throw_excptn_fctn, +#else + NULL, #endif - ompi_mpi_errors_throw_exceptions.eh.eh_win_fn = - ompi_mpi_cxx_win_throw_excptn_fctn; + ompi_mpi_cxx_win_throw_excptn_fctn); } @@ -93,16 +93,15 @@ MPI::InitializeIntercepts() // the express purpose of having a C++ entity call back the C++ // function (so that types can be converted, etc.). extern "C" -void ompi_mpi_cxx_comm_errhandler_invoke(ompi_errhandler_t *c_errhandler, - MPI_Comm *c_comm, int *err, - const char *message) +void ompi_mpi_cxx_comm_errhandler_invoke(MPI_Comm *c_comm, int *err, + const char *message, void *comm_fn) { // MPI::Comm is an abstract base class; can't instantiate one of // those. So fake it by instantiating an MPI::Intracomm and then // casting it down to an (MPI::Comm&) when invoking the callback. MPI::Intracomm cxx_comm(*c_comm); - MPI::Comm::Errhandler_function *cxx_fn = - (MPI::Comm::Errhandler_function*) c_errhandler->eh_comm_fn; + MPI::Comm::Errhandler_function *cxx_fn = + (MPI::Comm::Errhandler_function*) comm_fn; cxx_fn((MPI::Comm&) cxx_comm, err, message); } @@ -112,13 +111,12 @@ void ompi_mpi_cxx_comm_errhandler_invoke(ompi_errhandler_t *c_errhandler, // the express purpose of having a C++ entity call back the C++ // function (so that types can be converted, etc.). extern "C" -void ompi_mpi_cxx_file_errhandler_invoke(ompi_errhandler_t *c_errhandler, - MPI_File *c_file, int *err, - const char *message) +void ompi_mpi_cxx_file_errhandler_invoke(MPI_File *c_file, int *err, + const char *message, void *file_fn) { MPI::File cxx_file(*c_file); - MPI::File::Errhandler_function *cxx_fn = - (MPI::File::Errhandler_function*) c_errhandler->eh_file_fn; + MPI::File::Errhandler_function *cxx_fn = + (MPI::File::Errhandler_function*) file_fn; cxx_fn(cxx_file, err, message); } @@ -128,13 +126,12 @@ void ompi_mpi_cxx_file_errhandler_invoke(ompi_errhandler_t *c_errhandler, // the express purpose of having a C++ entity call back the C++ // function (so that types can be converted, etc.). extern "C" -void ompi_mpi_cxx_win_errhandler_invoke(ompi_errhandler_t *c_errhandler, - MPI_Win *c_win, int *err, - const char *message) +void ompi_mpi_cxx_win_errhandler_invoke(MPI_Win *c_win, int *err, + const char *message, void *win_fn) { MPI::Win cxx_win(*c_win); - MPI::Win::Errhandler_function *cxx_fn = - (MPI::Win::Errhandler_function*) c_errhandler->eh_win_fn; + MPI::Win::Errhandler_function *cxx_fn = + (MPI::Win::Errhandler_function*) win_fn; cxx_fn(cxx_win, err, message); } @@ -209,10 +206,10 @@ void ompi_mpi_cxx_win_errhandler_invoke(ompi_errhandler_t *c_errhandler, // bindings, in fact), and pass it the ompi_mpi_cxx_op_intercept() // function (casting it to (MPI_User_function*) -- it's a function // pointer, so its size is guaranteed to be the same, even if the -// signature of the real function is different). +// signature of the real function is different). // // 3. The function pointer to ompi_mpi_cxx_op_intercept() will be -// cached in the MPI_Op in op->o_func[0].cxx_intercept_fn. +// cached in the MPI_Op in op->o_func[0].cxx_intercept_fn. // // Recall that MPI_Op is implemented to have an array of function // pointers so that optimized versions of reduction operations can be @@ -228,7 +225,7 @@ void ompi_mpi_cxx_win_errhandler_invoke(ompi_errhandler_t *c_errhandler, // multiple different function pointer types -- it doesn't matter // which type the user's callback function pointer is stored in; since // all the types in the union are function pointers, it's guaranteed -// to be large enough to hold what we need. +// to be large enough to hold what we need. // // Note that we don't have a member of the union for the C++ callback // function because its signature includes a (MPI::Datatype&), which @@ -250,7 +247,7 @@ void ompi_mpi_cxx_win_errhandler_invoke(ompi_errhandler_t *c_errhandler, // Wasn't that simple? // extern "C" void -ompi_mpi_cxx_op_intercept(void *invec, void *outvec, int *len, +ompi_mpi_cxx_op_intercept(void *invec, void *outvec, int *len, MPI_Datatype *datatype, MPI_User_function *c_fn) { MPI::Datatype cxx_datatype = *datatype; @@ -262,14 +259,14 @@ ompi_mpi_cxx_op_intercept(void *invec, void *outvec, int *len, // Attribute copy functions -- comm, type, and win // extern "C" int -ompi_mpi_cxx_comm_copy_attr_intercept(MPI_Comm comm, int keyval, - void *extra_state, - void *attribute_val_in, +ompi_mpi_cxx_comm_copy_attr_intercept(MPI_Comm comm, int keyval, + void *extra_state, + void *attribute_val_in, void *attribute_val_out, int *flag, MPI_Comm newcomm) { int ret = 0; - MPI::Comm::keyval_intercept_data_t *kid = + MPI::Comm::keyval_intercept_data_t *kid = (MPI::Comm::keyval_intercept_data_t*) extra_state; // The callback may be in C or C++. If it's in C, it's easy - just @@ -286,31 +283,38 @@ ompi_mpi_cxx_comm_copy_attr_intercept(MPI_Comm comm, int keyval, MPI::Intercomm intercomm; MPI::Graphcomm graphcomm; MPI::Cartcomm cartcomm; - - bool bflag = OPAL_INT_TO_BOOL(*flag); + + bool bflag = OPAL_INT_TO_BOOL(*flag); if (NULL != kid->cxx_copy_fn) { - if (OMPI_COMM_IS_GRAPH(comm)) { + ompi_cxx_communicator_type_t comm_type = + ompi_cxx_comm_get_type (comm); + switch (comm_type) { + case OMPI_CXX_COMM_TYPE_GRAPH: graphcomm = MPI::Graphcomm(comm); ret = kid->cxx_copy_fn(graphcomm, keyval, kid->extra_state, - attribute_val_in, attribute_val_out, + attribute_val_in, attribute_val_out, bflag); - } else if (OMPI_COMM_IS_CART(comm)) { + break; + case OMPI_CXX_COMM_TYPE_CART: cartcomm = MPI::Cartcomm(comm); ret = kid->cxx_copy_fn(cartcomm, keyval, kid->extra_state, - attribute_val_in, attribute_val_out, + attribute_val_in, attribute_val_out, bflag); - } else if (OMPI_COMM_IS_INTRA(comm)) { + break; + case OMPI_CXX_COMM_TYPE_INTRACOMM: intracomm = MPI::Intracomm(comm); ret = kid->cxx_copy_fn(intracomm, keyval, kid->extra_state, - attribute_val_in, attribute_val_out, + attribute_val_in, attribute_val_out, bflag); - } else if (OMPI_COMM_IS_INTER(comm)) { + break; + case OMPI_CXX_COMM_TYPE_INTERCOMM: intercomm = MPI::Intercomm(comm); ret = kid->cxx_copy_fn(intercomm, keyval, kid->extra_state, - attribute_val_in, attribute_val_out, + attribute_val_in, attribute_val_out, bflag); - } else { + break; + default: ret = MPI::ERR_COMM; } } else { @@ -322,11 +326,11 @@ ompi_mpi_cxx_comm_copy_attr_intercept(MPI_Comm comm, int keyval, } extern "C" int -ompi_mpi_cxx_comm_delete_attr_intercept(MPI_Comm comm, int keyval, +ompi_mpi_cxx_comm_delete_attr_intercept(MPI_Comm comm, int keyval, void *attribute_val, void *extra_state) { int ret = 0; - MPI::Comm::keyval_intercept_data_t *kid = + MPI::Comm::keyval_intercept_data_t *kid = (MPI::Comm::keyval_intercept_data_t*) extra_state; // The callback may be in C or C++. If it's in C, it's easy - just @@ -342,54 +346,61 @@ ompi_mpi_cxx_comm_delete_attr_intercept(MPI_Comm comm, int keyval, MPI::Intercomm intercomm; MPI::Graphcomm graphcomm; MPI::Cartcomm cartcomm; - + if (NULL != kid->cxx_delete_fn) { - if (OMPI_COMM_IS_GRAPH(comm)) { + ompi_cxx_communicator_type_t comm_type = + ompi_cxx_comm_get_type (comm); + switch (comm_type) { + case OMPI_CXX_COMM_TYPE_GRAPH: graphcomm = MPI::Graphcomm(comm); - ret = kid->cxx_delete_fn(graphcomm, keyval, attribute_val, + ret = kid->cxx_delete_fn(graphcomm, keyval, attribute_val, kid->extra_state); - } else if (OMPI_COMM_IS_CART(comm)) { + break; + case OMPI_CXX_COMM_TYPE_CART: cartcomm = MPI::Cartcomm(comm); - ret = kid->cxx_delete_fn(cartcomm, keyval, attribute_val, + ret = kid->cxx_delete_fn(cartcomm, keyval, attribute_val, kid->extra_state); - } else if (OMPI_COMM_IS_INTRA(comm)) { + break; + case OMPI_CXX_COMM_TYPE_INTRACOMM: intracomm = MPI::Intracomm(comm); - ret = kid->cxx_delete_fn(intracomm, keyval, attribute_val, + ret = kid->cxx_delete_fn(intracomm, keyval, attribute_val, kid->extra_state); - } else if (OMPI_COMM_IS_INTER(comm)) { + break; + case OMPI_CXX_COMM_TYPE_INTERCOMM: intercomm = MPI::Intercomm(comm); - ret = kid->cxx_delete_fn(intercomm, keyval, attribute_val, + ret = kid->cxx_delete_fn(intercomm, keyval, attribute_val, kid->extra_state); - } else { + break; + default: ret = MPI::ERR_COMM; } } else { ret = MPI::ERR_OTHER; } - return ret; + return ret; } extern "C" int -ompi_mpi_cxx_type_copy_attr_intercept(MPI_Datatype oldtype, int keyval, - void *extra_state, void *attribute_val_in, +ompi_mpi_cxx_type_copy_attr_intercept(MPI_Datatype oldtype, int keyval, + void *extra_state, void *attribute_val_in, void *attribute_val_out, int *flag) { int ret = 0; - MPI::Datatype::keyval_intercept_data_t *kid = + MPI::Datatype::keyval_intercept_data_t *kid = (MPI::Datatype::keyval_intercept_data_t*) extra_state; if (NULL != kid->c_copy_fn) { // The callback may be in C or C++. If it's in C, it's easy - just // call it with no extra C++ machinery. - ret = kid->c_copy_fn(oldtype, keyval, kid->extra_state, attribute_val_in, + ret = kid->c_copy_fn(oldtype, keyval, kid->extra_state, attribute_val_in, attribute_val_out, flag); } else if (NULL != kid->cxx_copy_fn) { // If the callback was C++, we have to do a little more work - bool bflag = OPAL_INT_TO_BOOL(*flag); + bool bflag = OPAL_INT_TO_BOOL(*flag); MPI::Datatype cxx_datatype(oldtype); - ret = kid->cxx_copy_fn(cxx_datatype, keyval, kid->extra_state, + ret = kid->cxx_copy_fn(cxx_datatype, keyval, kid->extra_state, attribute_val_in, attribute_val_out, bflag); *flag = (int)bflag; } else { @@ -400,18 +411,18 @@ ompi_mpi_cxx_type_copy_attr_intercept(MPI_Datatype oldtype, int keyval, } extern "C" int -ompi_mpi_cxx_type_delete_attr_intercept(MPI_Datatype type, int keyval, +ompi_mpi_cxx_type_delete_attr_intercept(MPI_Datatype type, int keyval, void *attribute_val, void *extra_state) { int ret = 0; - MPI::Datatype::keyval_intercept_data_t *kid = + MPI::Datatype::keyval_intercept_data_t *kid = (MPI::Datatype::keyval_intercept_data_t*) extra_state; if (NULL != kid->c_delete_fn) { return kid->c_delete_fn(type, keyval, attribute_val, kid->extra_state); } else if (NULL != kid->cxx_delete_fn) { MPI::Datatype cxx_datatype(type); - return kid->cxx_delete_fn(cxx_datatype, keyval, attribute_val, + return kid->cxx_delete_fn(cxx_datatype, keyval, attribute_val, kid->extra_state); } else { ret = MPI::ERR_TYPE; @@ -421,7 +432,7 @@ ompi_mpi_cxx_type_delete_attr_intercept(MPI_Datatype type, int keyval, } extern "C" int -ompi_mpi_cxx_win_copy_attr_intercept(MPI_Win oldwin, int keyval, +ompi_mpi_cxx_win_copy_attr_intercept(MPI_Win oldwin, int keyval, void *extra_state, void *attribute_val_in, void *attribute_val_out, int *flag) { @@ -432,13 +443,13 @@ ompi_mpi_cxx_win_copy_attr_intercept(MPI_Win oldwin, int keyval, if (NULL != kid->c_copy_fn) { // The callback may be in C or C++. If it's in C, it's easy - just // call it with no extra C++ machinery. - ret = kid->c_copy_fn(oldwin, keyval, kid->extra_state, attribute_val_in, + ret = kid->c_copy_fn(oldwin, keyval, kid->extra_state, attribute_val_in, attribute_val_out, flag); } else if (NULL != kid->cxx_copy_fn) { // If the callback was C++, we have to do a little more work - bool bflag = OPAL_INT_TO_BOOL(*flag); + bool bflag = OPAL_INT_TO_BOOL(*flag); MPI::Win cxx_win(oldwin); - ret = kid->cxx_copy_fn(cxx_win, keyval, kid->extra_state, + ret = kid->cxx_copy_fn(cxx_win, keyval, kid->extra_state, attribute_val_in, attribute_val_out, bflag); *flag = (int)bflag; } else { @@ -449,18 +460,18 @@ ompi_mpi_cxx_win_copy_attr_intercept(MPI_Win oldwin, int keyval, } extern "C" int -ompi_mpi_cxx_win_delete_attr_intercept(MPI_Win win, int keyval, +ompi_mpi_cxx_win_delete_attr_intercept(MPI_Win win, int keyval, void *attribute_val, void *extra_state) { int ret = 0; - MPI::Win::keyval_intercept_data_t *kid = + MPI::Win::keyval_intercept_data_t *kid = (MPI::Win::keyval_intercept_data_t*) extra_state; if (NULL != kid->c_delete_fn) { return kid->c_delete_fn(win, keyval, attribute_val, kid->extra_state); } else if (NULL != kid->cxx_delete_fn) { MPI::Win cxx_win(win); - return kid->cxx_delete_fn(cxx_win, keyval, attribute_val, + return kid->cxx_delete_fn(cxx_win, keyval, attribute_val, kid->extra_state); } else { ret = MPI::ERR_WIN; @@ -476,7 +487,7 @@ ompi_mpi_cxx_win_delete_attr_intercept(MPI_Win win, int keyval, extern "C" int ompi_mpi_cxx_grequest_query_fn_intercept(void *state, MPI_Status *status) { - MPI::Grequest::Intercept_data_t *data = + MPI::Grequest::Intercept_data_t *data = (MPI::Grequest::Intercept_data_t *) state; MPI::Status s(*status); @@ -488,7 +499,7 @@ ompi_mpi_cxx_grequest_query_fn_intercept(void *state, MPI_Status *status) extern "C" int ompi_mpi_cxx_grequest_free_fn_intercept(void *state) { - MPI::Grequest::Intercept_data_t *data = + MPI::Grequest::Intercept_data_t *data = (MPI::Grequest::Intercept_data_t *) state; int ret = data->id_cxx_free_fn(data->id_extra); // Delete the struct that was "new"ed in MPI::Grequest::Start() @@ -499,8 +510,8 @@ ompi_mpi_cxx_grequest_free_fn_intercept(void *state) extern "C" int ompi_mpi_cxx_grequest_cancel_fn_intercept(void *state, int cancelled) { - MPI::Grequest::Intercept_data_t *data = + MPI::Grequest::Intercept_data_t *data = (MPI::Grequest::Intercept_data_t *) state; - return data->id_cxx_cancel_fn(data->id_extra, + return data->id_cxx_cancel_fn(data->id_extra, (0 != cancelled ? true : false)); } diff --git a/ompi/mpi/cxx/intercomm.h b/ompi/mpi/cxx/intercomm.h index 7dbc855c726..12d63de9955 100644 --- a/ompi/mpi/cxx/intercomm.h +++ b/ompi/mpi/cxx/intercomm.h @@ -1,21 +1,21 @@ // -*- c++ -*- -// +// // Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana // University Research and Technology // Corporation. All rights reserved. // Copyright (c) 2004-2005 The University of Tennessee and The University // of Tennessee Research Foundation. All rights // reserved. -// Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +// Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, // University of Stuttgart. All rights reserved. // Copyright (c) 2004-2005 The Regents of the University of California. // All rights reserved. // Copyright (c) 2006 Cisco Systems, Inc. All rights reserved. // Copyright (c) 2011 FUJITSU LIMITED. All rights reserved. // $COPYRIGHT$ -// +// // Additional copyrights may follow -// +// // $HEADER$ // @@ -36,7 +36,7 @@ class Intercomm : public Comm { #if 0 /* OMPI_ENABLE_MPI_PROFILING */ // copy Intercomm(const Intercomm& data) : Comm(data), pmpi_comm(data.pmpi_comm) { } - Intercomm(const PMPI::Intercomm& d) : + Intercomm(const PMPI::Intercomm& d) : Comm((const PMPI::Comm&)d), pmpi_comm(d) { } // assignment @@ -61,10 +61,10 @@ class Intercomm : public Comm { mpi_comm = data; return *this; } // inter-language operability Intercomm& operator=(const MPI_Comm& data) { - mpi_comm = data; return *this; } + mpi_comm = data; return *this; } #endif - + // // Groups, Contexts, and Communicators diff --git a/ompi/mpi/cxx/intercomm_inln.h b/ompi/mpi/cxx/intercomm_inln.h index 658f75f53a7..964abe89b37 100644 --- a/ompi/mpi/cxx/intercomm_inln.h +++ b/ompi/mpi/cxx/intercomm_inln.h @@ -6,15 +6,15 @@ // Copyright (c) 2004-2005 The University of Tennessee and The University // of Tennessee Research Foundation. All rights // reserved. -// Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +// Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, // University of Stuttgart. All rights reserved. // Copyright (c) 2004-2005 The Regents of the University of California. // All rights reserved. // Copyright (c) 2011 FUJITSU LIMITED. All rights reserved. // $COPYRIGHT$ -// +// // Additional copyrights may follow -// +// // $HEADER$ // @@ -36,7 +36,7 @@ MPI::Intercomm::Clone() const } inline int -MPI::Intercomm::Get_remote_size() const +MPI::Intercomm::Get_remote_size() const { int size; (void)MPI_Comm_remote_size(mpi_comm, &size); @@ -44,7 +44,7 @@ MPI::Intercomm::Get_remote_size() const } inline MPI::Group -MPI::Intercomm::Get_remote_group() const +MPI::Intercomm::Get_remote_group() const { MPI_Group group; (void)MPI_Comm_remote_group(mpi_comm, &group); diff --git a/ompi/mpi/cxx/intracomm.h b/ompi/mpi/cxx/intracomm.h index 9f60d662470..984cb3a4b46 100644 --- a/ompi/mpi/cxx/intracomm.h +++ b/ompi/mpi/cxx/intracomm.h @@ -6,15 +6,15 @@ // Copyright (c) 2004-2005 The University of Tennessee and The University // of Tennessee Research Foundation. All rights // reserved. -// Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +// Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, // University of Stuttgart. All rights reserved. // Copyright (c) 2004-2005 The Regents of the University of California. // All rights reserved. // Copyright (c) 2006 Cisco Systems, Inc. All rights reserved. // $COPYRIGHT$ -// +// // Additional copyrights may follow -// +// // $HEADER$ // @@ -37,14 +37,14 @@ class Intracomm : public Comm { Intracomm(const Intracomm& data) : Comm(data), pmpi_comm(data) { } Intracomm(MPI_Comm data) : Comm(data), pmpi_comm(data) { } - - Intracomm(const PMPI::Intracomm& data) + + Intracomm(const PMPI::Intracomm& data) : Comm((const PMPI::Comm&)data), pmpi_comm(data) { } // assignment Intracomm& operator=(const Intracomm& data) { Comm::operator=(data); - pmpi_comm = data.pmpi_comm; + pmpi_comm = data.pmpi_comm; return *this; } Intracomm& operator=(const Comm_Null& data) { @@ -74,10 +74,10 @@ class Intracomm : public Comm { // inter-language operability Intracomm& operator=(const MPI_Comm& data) { - mpi_comm = data; return *this; } + mpi_comm = data; return *this; } #endif - + // // Collective Communication // @@ -85,7 +85,7 @@ class Intracomm : public Comm { // in intercomm's, so they're down here in Intracomm. // virtual void - Scan(const void *sendbuf, void *recvbuf, int count, + Scan(const void *sendbuf, void *recvbuf, int count, const Datatype & datatype, const Op & op) const; virtual void @@ -97,23 +97,23 @@ class Intracomm : public Comm { // Intracomm Dup() const; - + virtual Intracomm& Clone() const; virtual Intracomm Create(const Group& group) const; - + virtual Intracomm Split(int color, int key) const; virtual Intercomm Create_intercomm(int local_leader, const Comm& peer_comm, int remote_leader, int tag) const; - + virtual Cartcomm Create_cart(int ndims, const int dims[], const bool periods[], bool reorder) const; - + virtual Graphcomm Create_graph(int nnodes, const int index[], const int edges[], bool reorder) const; @@ -122,7 +122,7 @@ class Intracomm : public Comm { // // Process Creation and Management // - + virtual Intercomm Accept(const char* port_name, const Info& info, int root) const; @@ -161,6 +161,6 @@ class Intracomm : public Comm { // Convert an array of p_nbr Info object into an array of MPI_Info. // A pointer to the allocated array is returned and must be // eventually deleted. - static inline MPI_Info *convert_info_to_mpi_info(int p_nbr, + static inline MPI_Info *convert_info_to_mpi_info(int p_nbr, const Info p_info_tbl[]); }; diff --git a/ompi/mpi/cxx/intracomm_inln.h b/ompi/mpi/cxx/intracomm_inln.h index e6079391cce..a4580281bcf 100644 --- a/ompi/mpi/cxx/intracomm_inln.h +++ b/ompi/mpi/cxx/intracomm_inln.h @@ -1,21 +1,21 @@ // -*- c++ -*- -// +// // Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana // University Research and Technology // Corporation. All rights reserved. // Copyright (c) 2004-2005 The University of Tennessee and The University // of Tennessee Research Foundation. All rights // reserved. -// Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +// Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, // University of Stuttgart. All rights reserved. // Copyright (c) 2004-2005 The Regents of the University of California. // All rights reserved. // Copyright (c) 2006 Cisco Systems, Inc. All rights reserved. // Copyright (c) 2007 Sun Microsystems, Inc. All rights reserved. // $COPYRIGHT$ -// +// // Additional copyrights may follow -// +// // $HEADER$ // @@ -36,7 +36,7 @@ MPI::Intracomm::Intracomm(MPI_Comm data) { } inline void -MPI::Intracomm::Scan(const void *sendbuf, void *recvbuf, int count, +MPI::Intracomm::Scan(const void *sendbuf, void *recvbuf, int count, const MPI::Datatype & datatype, const MPI::Op& op) const { (void)MPI_Scan(const_cast(sendbuf), recvbuf, count, datatype, op, mpi_comm); @@ -44,7 +44,7 @@ MPI::Intracomm::Scan(const void *sendbuf, void *recvbuf, int count, inline void MPI::Intracomm::Exscan(const void *sendbuf, void *recvbuf, int count, - const MPI::Datatype & datatype, + const MPI::Datatype & datatype, const MPI::Op& op) const { (void)MPI_Exscan(const_cast(sendbuf), recvbuf, count, datatype, op, mpi_comm); @@ -92,7 +92,7 @@ MPI::Intracomm::Create_intercomm(int local_leader, { MPI_Comm newintercomm; (void)MPI_Intercomm_create(mpi_comm, local_leader, peer_comm, - remote_leader, tag, &newintercomm); + remote_leader, tag, &newintercomm); return newintercomm; } @@ -103,9 +103,9 @@ MPI::Intracomm::Create_cart(int ndims, const int dims[], int *int_periods = new int [ndims]; for (int i=0; i(dims), + (void)MPI_Cart_create(mpi_comm, ndims, const_cast(dims), int_periods, (int)reorder, &newcomm); delete [] int_periods; return newcomm; @@ -116,7 +116,7 @@ MPI::Intracomm::Create_graph(int nnodes, const int index[], const int edges[], bool reorder) const { MPI_Comm newcomm; - (void)MPI_Graph_create(mpi_comm, nnodes, const_cast(index), + (void)MPI_Graph_create(mpi_comm, nnodes, const_cast(index), const_cast(edges), (int)reorder, &newcomm); return newcomm; } @@ -127,7 +127,7 @@ MPI::Intracomm::Create_graph(int nnodes, const int index[], // inline MPI::Intercomm -MPI::Intracomm::Accept(const char* port_name, +MPI::Intracomm::Accept(const char* port_name, const MPI::Info& info, int root) const { @@ -152,12 +152,12 @@ MPI::Intracomm::Connect(const char* port_name, inline MPI::Intercomm MPI::Intracomm::Spawn(const char* command, const char* argv[], - int maxprocs, const MPI::Info& info, + int maxprocs, const MPI::Info& info, int root) const { MPI_Comm newcomm; (void) MPI_Comm_spawn(const_cast(command), const_cast(argv), maxprocs, - info, root, mpi_comm, &newcomm, + info, root, mpi_comm, &newcomm, (int *)MPI_ERRCODES_IGNORE); return newcomm; } @@ -170,25 +170,25 @@ MPI::Intracomm::Spawn(const char* command, const char* argv[], { MPI_Comm newcomm; (void) MPI_Comm_spawn(const_cast(command), const_cast(argv), maxprocs, - info, root, mpi_comm, &newcomm, + info, root, mpi_comm, &newcomm, array_of_errcodes); return newcomm; } inline MPI::Intercomm -MPI::Intracomm::Spawn_multiple(int count, +MPI::Intracomm::Spawn_multiple(int count, const char* array_of_commands[], const char** array_of_argv[], const int array_of_maxprocs[], const Info array_of_info[], int root) { MPI_Comm newcomm; - MPI_Info *const array_of_mpi_info = + MPI_Info *const array_of_mpi_info = convert_info_to_mpi_info(count, array_of_info); - MPI_Comm_spawn_multiple(count, const_cast(array_of_commands), - const_cast(array_of_argv), + MPI_Comm_spawn_multiple(count, const_cast(array_of_commands), + const_cast(array_of_argv), const_cast(array_of_maxprocs), array_of_mpi_info, root, mpi_comm, &newcomm, (int *)MPI_ERRCODES_IGNORE); @@ -217,11 +217,11 @@ MPI::Intracomm::Spawn_multiple(int count, int array_of_errcodes[]) { MPI_Comm newcomm; - MPI_Info *const array_of_mpi_info = + MPI_Info *const array_of_mpi_info = convert_info_to_mpi_info(count, array_of_info); - MPI_Comm_spawn_multiple(count, const_cast(array_of_commands), - const_cast(array_of_argv), + MPI_Comm_spawn_multiple(count, const_cast(array_of_commands), + const_cast(array_of_argv), const_cast(array_of_maxprocs), array_of_mpi_info, root, mpi_comm, &newcomm, array_of_errcodes); diff --git a/ompi/mpi/cxx/mpicxx.cc b/ompi/mpi/cxx/mpicxx.cc index 44b9fd49064..bd5fb5d2158 100644 --- a/ompi/mpi/cxx/mpicxx.cc +++ b/ompi/mpi/cxx/mpicxx.cc @@ -1,12 +1,12 @@ // -*- c++ -*- -// +// // Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana // University Research and Technology // Corporation. All rights reserved. // Copyright (c) 2004-2005 The University of Tennessee and The University // of Tennessee Research Foundation. All rights // reserved. -// Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +// Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, // University of Stuttgart. All rights reserved. // Copyright (c) 2004-2005 The Regents of the University of California. // All rights reserved. @@ -14,9 +14,9 @@ // Copyright (c) 2007 Sun Microsystems, Inc. All rights reserved. // Copyright (c) 2011 FUJITSU LIMITED. All rights reserved. // $COPYRIGHT$ -// +// // Additional copyrights may follow -// +// // $HEADER$ #include "mpicxx.h" @@ -25,6 +25,8 @@ SEEK_SET and friends right */ #include "ompi_config.h" +#include "cxx_glue.h" + #if OPAL_CXX_USE_PRAGMA_IDENT #pragma ident OMPI_IDENT_STRING #elif OPAL_CXX_USE_IDENT @@ -34,8 +36,6 @@ namespace MPI { const char ompi_libcxx_version_string[] = OMPI_IDENT_STRING; } -#include "ompi/errhandler/errhandler.h" - namespace MPI { #if ! OMPI_HAVE_CXX_EXCEPTION_SUPPORT @@ -74,7 +74,7 @@ const Datatype DOUBLE_INT(MPI_DOUBLE_INT); const Datatype LONG_INT(MPI_LONG_INT); const Datatype TWOINT(MPI_2INT); const Datatype SHORT_INT(MPI_SHORT_INT); -const Datatype LONG_DOUBLE_INT(MPI_LONG_DOUBLE); +const Datatype LONG_DOUBLE_INT(MPI_LONG_DOUBLE_INT); #if OMPI_BUILD_FORTRAN_BINDINGS // elementary datatype (Fortran) @@ -151,7 +151,7 @@ Comm_Null COMM_NULL; const Datatype DATATYPE_NULL = MPI_DATATYPE_NULL; Request REQUEST_NULL = MPI_REQUEST_NULL; const Op OP_NULL = MPI_OP_NULL; -const Errhandler ERRHANDLER_NULL; +const Errhandler ERRHANDLER_NULL; #if OMPI_PROVIDE_MPI_FILE_INTERFACE const File FILE_NULL = MPI_FILE_NULL; #endif diff --git a/ompi/mpi/cxx/mpicxx.h b/ompi/mpi/cxx/mpicxx.h index 5aa22e97122..f182e15058f 100644 --- a/ompi/mpi/cxx/mpicxx.h +++ b/ompi/mpi/cxx/mpicxx.h @@ -1,35 +1,37 @@ // -*- c++ -*- -// +// // Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana // University Research and Technology // Corporation. All rights reserved. // Copyright (c) 2004-2005 The University of Tennessee and The University // of Tennessee Research Foundation. All rights // reserved. -// Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +// Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, // University of Stuttgart. All rights reserved. // Copyright (c) 2004-2005 The Regents of the University of California. // All rights reserved. // Copyright (c) 2006-2008 Cisco Systems, Inc. All rights reserved. // Copyright (c) 2008 Sun Microsystems, Inc. All rights reserved. // Copyright (c) 2011 FUJITSU LIMITED. All rights reserved. +// Copyright (c) 2016 Los Alamos National Security, LLC. All rights +// reserved. // $COPYRIGHT$ -// +// // Additional copyrights may follow -// +// // $HEADER$ // #ifndef MPIPP_H #define MPIPP_H -// +// // Let's ensure that we're really in C++, and some errant programmer // hasn't included just "for completeness" // // We do not include the opal_config.h and may not replace extern "C" { -#if defined(c_plusplus) || defined(__cplusplus) +#if defined(c_plusplus) || defined(__cplusplus) // do not include ompi_config.h. it will smash free() as a symbol #include "mpi.h" @@ -70,8 +72,8 @@ static const int SEEK_END = ompi_stdio_seek_end; #endif #ifdef OPAL_HAVE_SYS_SYNCH_H -// Solaris threads.h pulls in sys/synch.h which in certain versions -// defines LOCK_SHARED. +// Solaris threads.h pulls in sys/synch.h which in certain versions +// defines LOCK_SHARED. // include so that we can smash LOCK_SHARED #include @@ -94,50 +96,35 @@ struct opal_mutex_t; // See lengthy explanation in intercepts.cc about this function. extern "C" void -ompi_mpi_cxx_op_intercept(void *invec, void *outvec, int *len, +ompi_mpi_cxx_op_intercept(void *invec, void *outvec, int *len, MPI_Datatype *datatype, MPI_User_function *fn); -extern "C" void -ompi_mpi_cxx_comm_errhandler_invoke(ompi_errhandler_t *c_errhandler, - MPI_Comm *mpi_comm, int *err, - const char *message); -extern "C" void -ompi_mpi_cxx_win_errhandler_invoke(ompi_errhandler_t *c_errhandler, - MPI_Win *mpi_comm, int *err, - const char *message); -#if OMPI_PROVIDE_MPI_FILE_INTERFACE -extern "C" void -ompi_mpi_cxx_file_errhandler_invoke(ompi_errhandler_t *c_errhandler, - MPI_File *mpi_comm, int *err, - const char *message); -#endif - //used for attr intercept functions enum CommType { eIntracomm, eIntercomm, eCartcomm, eGraphcomm}; extern "C" int -ompi_mpi_cxx_comm_copy_attr_intercept(MPI_Comm oldcomm, int keyval, - void *extra_state, void *attribute_val_in, +ompi_mpi_cxx_comm_copy_attr_intercept(MPI_Comm oldcomm, int keyval, + void *extra_state, void *attribute_val_in, void *attribute_val_out, int *flag, MPI_Comm newcomm); extern "C" int -ompi_mpi_cxx_comm_delete_attr_intercept(MPI_Comm comm, int keyval, +ompi_mpi_cxx_comm_delete_attr_intercept(MPI_Comm comm, int keyval, void *attribute_val, void *extra_state); extern "C" int -ompi_mpi_cxx_type_copy_attr_intercept(MPI_Datatype oldtype, int keyval, - void *extra_state, void *attribute_val_in, +ompi_mpi_cxx_type_copy_attr_intercept(MPI_Datatype oldtype, int keyval, + void *extra_state, void *attribute_val_in, void *attribute_val_out, int *flag); extern "C" int -ompi_mpi_cxx_type_delete_attr_intercept(MPI_Datatype type, int keyval, +ompi_mpi_cxx_type_delete_attr_intercept(MPI_Datatype type, int keyval, void *attribute_val, void *extra_state); extern "C" int -ompi_mpi_cxx_win_copy_attr_intercept(MPI_Win oldwin, int keyval, - void *extra_state, void *attribute_val_in, +ompi_mpi_cxx_win_copy_attr_intercept(MPI_Win oldwin, int keyval, + void *extra_state, void *attribute_val_in, void *attribute_val_out, int *flag); extern "C" int -ompi_mpi_cxx_win_delete_attr_intercept(MPI_Win win, int keyval, +ompi_mpi_cxx_win_delete_attr_intercept(MPI_Win win, int keyval, void *attribute_val, void *extra_state); @@ -146,11 +133,11 @@ ompi_mpi_cxx_win_delete_attr_intercept(MPI_Win win, int keyval, // MPI generalized request intercepts // -extern "C" int +extern "C" int ompi_mpi_cxx_grequest_query_fn_intercept(void *state, MPI_Status *status); -extern "C" int +extern "C" int ompi_mpi_cxx_grequest_free_fn_intercept(void *state); -extern "C" int +extern "C" int ompi_mpi_cxx_grequest_cancel_fn_intercept(void *state, int canceled); /** @@ -217,7 +204,7 @@ namespace MPI { #include "ompi/mpi/cxx/op.h" #include "ompi/mpi/cxx/status.h" #include "ompi/mpi/cxx/request.h" //includes class Prequest -#include "ompi/mpi/cxx/group.h" +#include "ompi/mpi/cxx/group.h" #include "ompi/mpi/cxx/comm.h" #include "ompi/mpi/cxx/win.h" #if OMPI_PROVIDE_MPI_FILE_INTERFACE @@ -233,7 +220,7 @@ namespace MPI { #include "openmpi/ompi/mpi/cxx/op.h" #include "openmpi/ompi/mpi/cxx/status.h" #include "openmpi/ompi/mpi/cxx/request.h" //includes class Prequest -#include "openmpi/ompi/mpi/cxx/group.h" +#include "openmpi/ompi/mpi/cxx/group.h" #include "openmpi/ompi/mpi/cxx/comm.h" #include "openmpi/ompi/mpi/cxx/win.h" #if OMPI_PROVIDE_MPI_FILE_INTERFACE @@ -303,5 +290,5 @@ namespace MPI { #endif #endif -#endif // #if defined(c_plusplus) || defined(__cplusplus) +#endif // #if defined(c_plusplus) || defined(__cplusplus) #endif // #ifndef MPIPP_H_ diff --git a/ompi/mpi/cxx/op.h b/ompi/mpi/cxx/op.h index a0490d9e9c5..00a9ec64d48 100644 --- a/ompi/mpi/cxx/op.h +++ b/ompi/mpi/cxx/op.h @@ -6,15 +6,15 @@ // Copyright (c) 2004-2005 The University of Tennessee and The University // of Tennessee Research Foundation. All rights // reserved. -// Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +// Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, // University of Stuttgart. All rights reserved. // Copyright (c) 2004-2005 The Regents of the University of California. // All rights reserved. // Copyright (c) 2006-2009 Cisco Systems, Inc. All rights reserved. // $COPYRIGHT$ -// +// // Additional copyrights may follow -// +// // $HEADER$ // @@ -46,8 +46,8 @@ class Op { //JGS took const out virtual void Init(User_function *func, bool commute); virtual void Free(); - - virtual void Reduce_local(const void *inbuf, void *inoutbuf, int count, + + virtual void Reduce_local(const void *inbuf, void *inoutbuf, int count, const MPI::Datatype& datatype) const; virtual bool Is_commutative(void) const; diff --git a/ompi/mpi/cxx/op_inln.h b/ompi/mpi/cxx/op_inln.h index bdd5a5fcfc8..1453ac86f61 100644 --- a/ompi/mpi/cxx/op_inln.h +++ b/ompi/mpi/cxx/op_inln.h @@ -1,20 +1,20 @@ // -*- c++ -*- -// +// // Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana // University Research and Technology // Corporation. All rights reserved. // Copyright (c) 2004-2005 The University of Tennessee and The University // of Tennessee Research Foundation. All rights // reserved. -// Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +// Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, // University of Stuttgart. All rights reserved. // Copyright (c) 2004-2005 The Regents of the University of California. // All rights reserved. // Copyright (c) 2006-2009 Cisco Systems, Inc. All rights reserved. // $COPYRIGHT$ -// +// // Additional copyrights may follow -// +// // $HEADER$ // @@ -22,10 +22,10 @@ inline MPI::Op::Op() { } - + inline MPI::Op::Op(const MPI::Op& o) : pmpi_op(o.pmpi_op) { } - + inline MPI::Op::Op(MPI_Op o) : pmpi_op(o) { } @@ -72,14 +72,14 @@ inline MPI::Op::Op(const MPI::Op& op) : mpi_op(op.mpi_op) { } -inline -MPI::Op::~Op() -{ +inline +MPI::Op::~Op() +{ #if 0 mpi_op = MPI_OP_NULL; op_user_function = 0; #endif -} +} inline MPI::Op& MPI::Op::operator=(const MPI::Op& op) { @@ -118,7 +118,7 @@ extern "C" void ompi_op_set_cxx_callback(MPI_Op op, MPI_User_function*); inline void MPI::Op::Init(MPI::User_function *func, bool commute) { - (void)MPI_Op_create((MPI_User_function*) ompi_mpi_cxx_op_intercept, + (void)MPI_Op_create((MPI_User_function*) ompi_mpi_cxx_op_intercept, (int) commute, &mpi_op); ompi_op_set_cxx_callback(mpi_op, (MPI_User_function*) func); } @@ -131,16 +131,16 @@ MPI::Op::Free() } -inline void -MPI::Op::Reduce_local(const void *inbuf, void *inoutbuf, int count, +inline void +MPI::Op::Reduce_local(const void *inbuf, void *inoutbuf, int count, const MPI::Datatype& datatype) const { - (void)MPI_Reduce_local(const_cast(inbuf), inoutbuf, count, + (void)MPI_Reduce_local(const_cast(inbuf), inoutbuf, count, datatype, mpi_op); } -inline bool +inline bool MPI::Op::Is_commutative(void) const { int commute; diff --git a/ompi/mpi/cxx/request.h b/ompi/mpi/cxx/request.h index f90e8e5836b..f8e3153d602 100644 --- a/ompi/mpi/cxx/request.h +++ b/ompi/mpi/cxx/request.h @@ -6,15 +6,15 @@ // Copyright (c) 2004-2005 The University of Tennessee and The University // of Tennessee Research Foundation. All rights // reserved. -// Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +// Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, // University of Stuttgart. All rights reserved. // Copyright (c) 2004-2005 The Regents of the University of California. // All rights reserved. // Copyright (c) 2006-2008 Cisco Systems, Inc. All rights reserved. // $COPYRIGHT$ -// +// // Additional copyrights may follow -// +// // $HEADER$ // @@ -41,9 +41,9 @@ class Request { pmpi_request = r.pmpi_request; return *this; } // comparison - bool operator== (const Request &a) + bool operator== (const Request &a) { return (bool)(pmpi_request == a.pmpi_request); } - bool operator!= (const Request &a) + bool operator!= (const Request &a) { return (bool)!(*this == a); } // inter-language operability @@ -68,9 +68,9 @@ class Request { mpi_request = r.mpi_request; return *this; } // comparison - bool operator== (const Request &a) + bool operator== (const Request &a) { return (bool)(mpi_request == a.mpi_request); } - bool operator!= (const Request &a) + bool operator!= (const Request &a) { return (bool)!(*this == a); } // inter-language operability @@ -104,11 +104,11 @@ class Request { static bool Testany(int count, Request array[], int& index); static void Waitall(int count, Request req_array[], Status stat_array[]); - + static void Waitall(int count, Request req_array[]); static bool Testall(int count, Request req_array[], Status stat_array[]); - + static bool Testall(int count, Request req_array[]); static int Waitsome(int incount, Request req_array[], @@ -138,7 +138,7 @@ class Request { #if 0 /* OMPI_ENABLE_MPI_PROFILING */ PMPI::Request pmpi_request; -#endif +#endif }; @@ -154,12 +154,12 @@ class Prequest : public Request { #if 0 /* OMPI_ENABLE_MPI_PROFILING */ Prequest(const Request& p) : Request(p), pmpi_request(p) { } - Prequest(const PMPI::Prequest& r) : + Prequest(const PMPI::Prequest& r) : Request((const PMPI::Request&)r), pmpi_request(r) { } Prequest(const MPI_Request &i) : Request(i), pmpi_request(i) { } - + virtual ~Prequest() { } Prequest& operator=(const Request& r) { @@ -190,7 +190,7 @@ class Prequest : public Request { #if 0 /* OMPI_ENABLE_MPI_PROFILING */ private: PMPI::Prequest pmpi_request; -#endif +#endif }; diff --git a/ompi/mpi/cxx/request_inln.h b/ompi/mpi/cxx/request_inln.h index 68417b26a19..d042b4d9a9f 100644 --- a/ompi/mpi/cxx/request_inln.h +++ b/ompi/mpi/cxx/request_inln.h @@ -1,21 +1,21 @@ // -*- c++ -*- -// +// // Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana // University Research and Technology // Corporation. All rights reserved. // Copyright (c) 2004-2005 The University of Tennessee and The University // of Tennessee Research Foundation. All rights // reserved. -// Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +// Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, // University of Stuttgart. All rights reserved. // Copyright (c) 2004-2005 The Regents of the University of California. // All rights reserved. // Copyright (c) 2006-2008 Cisco Systems, Inc. All rights reserved. // Copyright (c) 2007 Sun Microsystems, Inc. All rights reserved. // $COPYRIGHT$ -// +// // Additional copyrights may follow -// +// // $HEADER$ // @@ -24,25 +24,25 @@ // inline void -MPI::Request::Wait(MPI::Status &status) +MPI::Request::Wait(MPI::Status &status) { (void)MPI_Wait(&mpi_request, &status.mpi_status); } inline void -MPI::Request::Wait() +MPI::Request::Wait() { (void)MPI_Wait(&mpi_request, MPI_STATUS_IGNORE); } inline void -MPI::Request::Free() +MPI::Request::Free() { (void)MPI_Request_free(&mpi_request); } inline bool -MPI::Request::Test(MPI::Status &status) +MPI::Request::Test(MPI::Status &status) { int t; (void)MPI_Test(&mpi_request, &t, &status.mpi_status); @@ -50,7 +50,7 @@ MPI::Request::Test(MPI::Status &status) } inline bool -MPI::Request::Test() +MPI::Request::Test() { int t; (void)MPI_Test(&mpi_request, &t, MPI_STATUS_IGNORE); @@ -115,7 +115,7 @@ MPI::Request::Testany(int count, MPI::Request array[], int& index) for (i=0; i < count; i++) { array_of_requests[i] = array[i]; } - (void)MPI_Testany(count, array_of_requests, &index, &flag, + (void)MPI_Testany(count, array_of_requests, &index, &flag, MPI_STATUS_IGNORE); for (i=0; i < count; i++) { array[i] = array_of_requests[i]; @@ -159,7 +159,7 @@ MPI::Request::Waitall(int count, MPI::Request req_array[]) } delete [] array_of_requests; -} +} inline bool MPI::Request::Testall(int count, MPI::Request req_array[], @@ -198,11 +198,11 @@ MPI::Request::Testall(int count, MPI::Request req_array[]) delete [] array_of_requests; return OPAL_INT_TO_BOOL(flag); -} +} inline int MPI::Request::Waitsome(int incount, MPI::Request req_array[], - int array_of_indices[], MPI::Status stat_array[]) + int array_of_indices[], MPI::Status stat_array[]) { int i, outcount; MPI_Request* array_of_requests = new MPI_Request[incount]; @@ -223,7 +223,7 @@ MPI::Request::Waitsome(int incount, MPI::Request req_array[], inline int MPI::Request::Waitsome(int incount, MPI::Request req_array[], - int array_of_indices[]) + int array_of_indices[]) { int i, outcount; MPI_Request* array_of_requests = new MPI_Request[incount]; @@ -244,7 +244,7 @@ MPI::Request::Waitsome(int incount, MPI::Request req_array[], inline int MPI::Request::Testsome(int incount, MPI::Request req_array[], - int array_of_indices[], MPI::Status stat_array[]) + int array_of_indices[], MPI::Status stat_array[]) { int i, outcount; MPI_Request* array_of_requests = new MPI_Request[incount]; @@ -265,7 +265,7 @@ MPI::Request::Testsome(int incount, MPI::Request req_array[], inline int MPI::Request::Testsome(int incount, MPI::Request req_array[], - int array_of_indices[]) + int array_of_indices[]) { int i, outcount; MPI_Request* array_of_requests = new MPI_Request[incount]; @@ -305,12 +305,12 @@ MPI::Prequest::Startall(int count, MPI:: Prequest array_of_requests[]) for (i=0; i < count; i++) { mpi_requests[i] = array_of_requests[i]; } - (void)MPI_Startall(count, mpi_requests); + (void)MPI_Startall(count, mpi_requests); for (i=0; i < count; i++) { array_of_requests[i].mpi_request = mpi_requests[i] ; } delete [] mpi_requests; -} +} inline bool MPI::Request::Get_status(MPI::Status& status) const { @@ -343,7 +343,7 @@ MPI::Grequest::Start(Query_function *query_fn, Free_function *free_fn, Cancel_function *cancel_fn, void *extra) { MPI_Request grequest = 0; - Intercept_data_t *new_extra = + Intercept_data_t *new_extra = new MPI::Grequest::Intercept_data_t; new_extra->id_extra = extra; diff --git a/ompi/mpi/cxx/status.h b/ompi/mpi/cxx/status.h index 9a716ab47c7..872707890ff 100644 --- a/ompi/mpi/cxx/status.h +++ b/ompi/mpi/cxx/status.h @@ -1,20 +1,20 @@ // -*- c++ -*- -// +// // Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana // University Research and Technology // Corporation. All rights reserved. // Copyright (c) 2004-2005 The University of Tennessee and The University // of Tennessee Research Foundation. All rights // reserved. -// Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +// Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, // University of Stuttgart. All rights reserved. // Copyright (c) 2004-2005 The Regents of the University of California. // All rights reserved. // Copyright (c) 2006-2008 Cisco Systems, Inc. All rights reserved. // $COPYRIGHT$ -// +// // Additional copyrights may follow -// +// // $HEADER$ // @@ -94,9 +94,9 @@ class Status { virtual void Set_source(int source); virtual int Get_tag() const; - + virtual void Set_tag(int tag); - + virtual int Get_error() const; virtual void Set_error(int error); diff --git a/ompi/mpi/cxx/status_inln.h b/ompi/mpi/cxx/status_inln.h index f85caac7dd0..2110c671f8a 100644 --- a/ompi/mpi/cxx/status_inln.h +++ b/ompi/mpi/cxx/status_inln.h @@ -1,20 +1,20 @@ // -*- c++ -*- -// +// // Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana // University Research and Technology // Corporation. All rights reserved. // Copyright (c) 2004-2005 The University of Tennessee and The University // of Tennessee Research Foundation. All rights // reserved. -// Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +// Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, // University of Stuttgart. All rights reserved. // Copyright (c) 2004-2005 The Regents of the University of California. // All rights reserved. // Copyright (c) 2006 Cisco Systems, Inc. All rights reserved. // $COPYRIGHT$ -// +// // Additional copyrights may follow -// +// // $HEADER$ // diff --git a/ompi/mpi/cxx/topology.h b/ompi/mpi/cxx/topology.h index dee26322963..167c8b65393 100644 --- a/ompi/mpi/cxx/topology.h +++ b/ompi/mpi/cxx/topology.h @@ -6,15 +6,15 @@ // Copyright (c) 2004-2005 The University of Tennessee and The University // of Tennessee Research Foundation. All rights // reserved. -// Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +// Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, // University of Stuttgart. All rights reserved. // Copyright (c) 2004-2005 The Regents of the University of California. // All rights reserved. // Copyright (c) 2011 FUJITSU LIMITED. All rights reserved. // $COPYRIGHT$ -// +// // Additional copyrights may follow -// +// // $HEADER$ // @@ -30,10 +30,10 @@ class Cartcomm : public Intracomm { inline Cartcomm(const MPI_Comm& data); #if 0 /* OMPI_ENABLE_MPI_PROFILING */ Cartcomm(const Cartcomm& data) : Intracomm(data), pmpi_comm(data) { } - Cartcomm(const PMPI::Cartcomm& d) : + Cartcomm(const PMPI::Cartcomm& d) : Intracomm((const PMPI::Intracomm&)d), pmpi_comm(d) { } - + // assignment Cartcomm& operator=(const Cartcomm& data) { Intracomm::operator=(data); @@ -54,7 +54,7 @@ class Cartcomm : public Intracomm { mpi_comm = data; return *this; } // inter-language operability Cartcomm& operator=(const MPI_Comm& data) { - mpi_comm = data; return *this; } + mpi_comm = data; return *this; } #endif // // Groups, Contexts, and Communicators @@ -80,7 +80,7 @@ class Cartcomm : public Intracomm { virtual void Shift(int direction, int disp, int &rank_source, int &rank_dest) const; - + virtual Cartcomm Sub(const bool remain_dims[]) const; virtual int Map(int ndims, const int dims[], const bool periods[]) const; @@ -107,9 +107,9 @@ class Graphcomm : public Intracomm { inline Graphcomm(const MPI_Comm& data); #if 0 /* OMPI_ENABLE_MPI_PROFILING */ Graphcomm(const Graphcomm& data) : Intracomm(data), pmpi_comm(data) { } - Graphcomm(const PMPI::Graphcomm& d) : + Graphcomm(const PMPI::Graphcomm& d) : Intracomm((const PMPI::Intracomm&)d), pmpi_comm(d) { } - + // assignment Graphcomm& operator=(const Graphcomm& data) { Intracomm::operator=(data); @@ -131,13 +131,13 @@ class Graphcomm : public Intracomm { mpi_comm = data; return *this; } // inter-language operability Graphcomm& operator=(const MPI_Comm& data) { - mpi_comm = data; return *this; } + mpi_comm = data; return *this; } #endif // // Groups, Contexts, and Communicators // - + Graphcomm Dup() const; virtual Graphcomm& Clone() const; @@ -148,15 +148,15 @@ class Graphcomm : public Intracomm { virtual void Get_dims(int nnodes[], int nedges[]) const; - virtual void Get_topo(int maxindex, int maxedges, int index[], + virtual void Get_topo(int maxindex, int maxedges, int index[], int edges[]) const; virtual int Get_neighbors_count(int rank) const; - virtual void Get_neighbors(int rank, int maxneighbors, + virtual void Get_neighbors(int rank, int maxneighbors, int neighbors[]) const; - virtual int Map(int nnodes, const int index[], + virtual int Map(int nnodes, const int index[], const int edges[]) const; #if 0 /* OMPI_ENABLE_MPI_PROFILING */ diff --git a/ompi/mpi/cxx/topology_inln.h b/ompi/mpi/cxx/topology_inln.h index 3bdbab2ddc0..13a1542bd90 100644 --- a/ompi/mpi/cxx/topology_inln.h +++ b/ompi/mpi/cxx/topology_inln.h @@ -6,16 +6,17 @@ // Copyright (c) 2004-2005 The University of Tennessee and The University // of Tennessee Research Foundation. All rights // reserved. -// Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +// Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, // University of Stuttgart. All rights reserved. // Copyright (c) 2004-2005 The Regents of the University of California. // All rights reserved. // Copyright (c) 2007 Sun Microsystems, Inc. All rights reserved. // Copyright (c) 2011 FUJITSU LIMITED. All rights reserved. +// Copyright (c) 2016 Cisco Systems, Inc. All rights reserved. // $COPYRIGHT$ -// +// // Additional copyrights may follow -// +// // $HEADER$ // @@ -55,7 +56,7 @@ MPI::Cartcomm::Dup() const // inline int -MPI::Cartcomm::Get_dim() const +MPI::Cartcomm::Get_dim() const { int ndims; (void)MPI_Cartdim_get(mpi_comm, &ndims); @@ -79,22 +80,22 @@ MPI::Cartcomm::Get_topo(int maxdims, int dims[], bool periods[], } inline int -MPI::Cartcomm::Get_cart_rank(const int coords[]) const +MPI::Cartcomm::Get_cart_rank(const int coords[]) const { - int rank; - (void)MPI_Cart_rank(mpi_comm, const_cast(coords), &rank); - return rank; + int myrank; + (void)MPI_Cart_rank(mpi_comm, const_cast(coords), &myrank); + return myrank; } - + inline void -MPI::Cartcomm::Get_coords(int rank, int maxdims, int coords[]) const +MPI::Cartcomm::Get_coords(int rank, int maxdims, int coords[]) const { (void)MPI_Cart_coords(mpi_comm, rank, maxdims, coords); -} +} inline void MPI::Cartcomm::Shift(int direction, int disp, - int &rank_source, int &rank_dest) const + int &rank_source, int &rank_dest) const { (void)MPI_Cart_shift(mpi_comm, direction, disp, &rank_source, &rank_dest); } @@ -115,7 +116,7 @@ MPI::Cartcomm::Sub(const bool remain_dims[]) const } inline int -MPI::Cartcomm::Map(int ndims, const int dims[], const bool periods[]) const +MPI::Cartcomm::Map(int ndims, const int dims[], const bool periods[]) const { int *int_periods = new int [ndims]; for (int i=0; i(index), const_cast(edges), &newrank); diff --git a/ompi/mpi/cxx/win.cc b/ompi/mpi/cxx/win.cc index 1f106073290..7e1a18e6951 100644 --- a/ompi/mpi/cxx/win.cc +++ b/ompi/mpi/cxx/win.cc @@ -1,44 +1,34 @@ // -*- c++ -*- -// -// Copyright (c) 2006 Los Alamos National Security, LLC. All rights -// reserved. +// +// Copyright (c) 2006-2016 Los Alamos National Security, LLC. All rights +// reserved. // Copyright (c) 2007-2008 Sun Microsystems, Inc. All rights reserved. // Copyright (c) 2007-2009 Cisco Systems, Inc. All rights reserved. // $COPYRIGHT$ -// +// // Additional copyrights may follow -// +// // $HEADER$ // // do not include ompi_config.h because it kills the free/malloc defines #include "mpi.h" +#include "ompi/constants.h" #include "ompi/mpi/cxx/mpicxx.h" +#include "cxx_glue.h" - -#include "ompi/communicator/communicator.h" -#include "ompi/attribute/attribute.h" -#include "ompi/errhandler/errhandler.h" - -void +void MPI::Win::Free() { (void) MPI_Win_free(&mpi_win); } - + // This function needs some internal OMPI types, so it's not inlined MPI::Errhandler MPI::Win::Create_errhandler(MPI::Win::Errhandler_function* function) { - MPI_Errhandler c_errhandler = - ompi_errhandler_create(OMPI_ERRHANDLER_TYPE_WIN, - (ompi_errhandler_generic_handler_fn_t*) function, - OMPI_ERRHANDLER_LANG_CXX); - c_errhandler->eh_cxx_dispatch_fn = - (ompi_errhandler_cxx_dispatch_fn_t*) - ompi_mpi_cxx_win_errhandler_invoke; - return c_errhandler; + return ompi_cxx_errhandler_create_win ((ompi_cxx_dummy_fn_t *) function); } @@ -50,21 +40,16 @@ MPI::Win::do_create_keyval(MPI_Win_copy_attr_function* c_copy_fn, void* extra_state, int &keyval) { int ret, count = 0; - ompi_attribute_fn_ptr_union_t copy_fn; - ompi_attribute_fn_ptr_union_t delete_fn; keyval_intercept_data_t *cxx_extra_state; // If both the callbacks are C, then do the simple thing -- no // need for all the C++ machinery. if (NULL != c_copy_fn && NULL != c_delete_fn) { - copy_fn.attr_win_copy_fn = - (MPI_Win_internal_copy_attr_function*) c_copy_fn; - delete_fn.attr_win_delete_fn = c_delete_fn; - ret = ompi_attr_create_keyval(COMM_ATTR, copy_fn, delete_fn, - &keyval, extra_state, 0, NULL); + ret = ompi_cxx_attr_create_keyval_win (c_copy_fn, c_delete_fn, &keyval, + extra_state, 0, NULL); if (MPI_SUCCESS != ret) { - return OMPI_ERRHANDLER_INVOKE(MPI_COMM_WORLD, ret, - "MPI::Win::Create_keyval"); + return ompi_cxx_errhandler_invoke_comm (MPI_COMM_WORLD, ret, + "MPI::Win::Create_keyval"); } } @@ -75,11 +60,11 @@ MPI::Win::do_create_keyval(MPI_Win_copy_attr_function* c_copy_fn, // extra_state for the delete callback), we have to use the C++ // callbacks for both (and therefore translate the C++-special // extra_state into the user's original extra_state). - cxx_extra_state = (keyval_intercept_data_t*) + cxx_extra_state = (keyval_intercept_data_t*) malloc(sizeof(keyval_intercept_data_t)); if (NULL == cxx_extra_state) { - return OMPI_ERRHANDLER_INVOKE(MPI_COMM_WORLD, MPI_ERR_NO_MEM, - "MPI::Win::Create_keyval"); + return ompi_cxx_errhandler_invoke_comm (MPI_COMM_WORLD, MPI_ERR_NO_MEM, + "MPI::Win::Create_keyval"); } cxx_extra_state->c_copy_fn = c_copy_fn; cxx_extra_state->cxx_copy_fn = cxx_copy_fn; @@ -102,8 +87,8 @@ MPI::Win::do_create_keyval(MPI_Win_copy_attr_function* c_copy_fn, } if (2 != count) { free(cxx_extra_state); - return OMPI_ERRHANDLER_INVOKE(MPI_COMM_WORLD, MPI_ERR_ARG, - "MPI::Win::Create_keyval"); + return ompi_cxx_errhandler_invoke_comm (MPI_COMM_WORLD, MPI_ERR_ARG, + "MPI::Win::Create_keyval"); } // We do not call MPI_Win_create_keyval() here because we need to @@ -116,17 +101,12 @@ MPI::Win::do_create_keyval(MPI_Win_copy_attr_function* c_copy_fn, // ensure to set the destructor atomicly when the keyval is // created). - copy_fn.attr_win_copy_fn = - (MPI_Win_internal_copy_attr_function*) - ompi_mpi_cxx_win_copy_attr_intercept; - delete_fn.attr_win_delete_fn = - ompi_mpi_cxx_win_delete_attr_intercept; - ret = ompi_attr_create_keyval(WIN_ATTR, copy_fn, delete_fn, - &keyval, cxx_extra_state, 0, - cxx_extra_state); + ret = ompi_cxx_attr_create_keyval_win ((MPI_Win_copy_attr_function *) ompi_mpi_cxx_win_copy_attr_intercept, + ompi_mpi_cxx_win_delete_attr_intercept, &keyval, + cxx_extra_state, 0, NULL); if (OMPI_SUCCESS != ret) { - return OMPI_ERRHANDLER_INVOKE(MPI_COMM_WORLD, ret, - "MPI::Win::Create_keyval"); + return ompi_cxx_errhandler_invoke_comm (MPI_COMM_WORLD, ret, + "MPI::Win::Create_keyval"); } return MPI_SUCCESS; diff --git a/ompi/mpi/cxx/win.h b/ompi/mpi/cxx/win.h index a1dee15c16a..8c3b0bb1be1 100644 --- a/ompi/mpi/cxx/win.h +++ b/ompi/mpi/cxx/win.h @@ -6,16 +6,16 @@ // Copyright (c) 2004-2005 The University of Tennessee and The University // of Tennessee Research Foundation. All rights // reserved. -// Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +// Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, // University of Stuttgart. All rights reserved. // Copyright (c) 2004-2005 The Regents of the University of California. // All rights reserved. // Copyright (c) 2006-2009 Cisco Systems, Inc. All rights reserved. // Copyright (c) 2007 Sun Microsystems, Inc. All rights reserved. // $COPYRIGHT$ -// +// // Additional copyrights may follow -// +// // $HEADER$ // @@ -59,7 +59,7 @@ class Win { Win(const Win& data) : mpi_win(data.mpi_win) { } Win(MPI_Win i) : mpi_win(i) { } - + virtual ~Win() { } Win& operator=(const Win& data) { @@ -78,17 +78,17 @@ class Win { // // User defined functions // - typedef int Copy_attr_function(const Win& oldwin, int win_keyval, - void* extra_state, void* attribute_val_in, - void* attribute_val_out, bool& flag); - - typedef int Delete_attr_function(Win& win, int win_keyval, - void* attribute_val, void* extra_state); - + typedef int Copy_attr_function(const Win& oldwin, int win_keyval, + void* extra_state, void* attribute_val_in, + void* attribute_val_out, bool& flag); + + typedef int Delete_attr_function(Win& win, int win_keyval, + void* attribute_val, void* extra_state); + typedef void Errhandler_function(Win &, int *, ... ); typedef Errhandler_function Errhandler_fn __mpi_interface_deprecated__("MPI::Win::Errhandler_fn was deprecated in MPI-2.2; use MPI::Win::Errhandler_function instead"); - + // // Errhandler // @@ -96,73 +96,73 @@ class Win { virtual void Set_errhandler(const MPI::Errhandler& errhandler) const; - virtual MPI::Errhandler Get_errhandler() const; + virtual MPI::Errhandler Get_errhandler() const; // // One sided communication // - virtual void Accumulate(const void* origin_addr, int origin_count, - const MPI::Datatype& origin_datatype, - int target_rank, MPI::Aint target_disp, - int target_count, - const MPI::Datatype& target_datatype, + virtual void Accumulate(const void* origin_addr, int origin_count, + const MPI::Datatype& origin_datatype, + int target_rank, MPI::Aint target_disp, + int target_count, + const MPI::Datatype& target_datatype, const MPI::Op& op) const; - + virtual void Complete() const; - - static Win Create(const void* base, MPI::Aint size, int disp_unit, + + static Win Create(const void* base, MPI::Aint size, int disp_unit, const MPI::Info& info, const MPI::Intracomm& comm); - + virtual void Fence(int assert) const; - - virtual void Free(); - - virtual void Get(const void *origin_addr, int origin_count, - const MPI::Datatype& origin_datatype, int target_rank, - MPI::Aint target_disp, int target_count, + + virtual void Free(); + + virtual void Get(const void *origin_addr, int origin_count, + const MPI::Datatype& origin_datatype, int target_rank, + MPI::Aint target_disp, int target_count, const MPI::Datatype& target_datatype) const; - - virtual MPI::Group Get_group() const; + + virtual MPI::Group Get_group() const; virtual void Lock(int lock_type, int rank, int assert) const; - + virtual void Post(const MPI::Group& group, int assert) const; - - virtual void Put(const void* origin_addr, int origin_count, - const MPI::Datatype& origin_datatype, int target_rank, - MPI::Aint target_disp, int target_count, + + virtual void Put(const void* origin_addr, int origin_count, + const MPI::Datatype& origin_datatype, int target_rank, + MPI::Aint target_disp, int target_count, const MPI::Datatype& target_datatype) const; - - virtual void Start(const MPI::Group& group, int assert) const; - + + virtual void Start(const MPI::Group& group, int assert) const; + virtual bool Test() const; virtual void Unlock(int rank) const; - + virtual void Wait() const; - + // // External Interfaces // virtual void Call_errhandler(int errorcode) const; - + // Need 4 overloaded versions of this function because per the // MPI-2 spec, you can mix-n-match the C predefined functions with // C++ functions. - static int Create_keyval(Copy_attr_function* win_copy_attr_fn, - Delete_attr_function* win_delete_attr_fn, + static int Create_keyval(Copy_attr_function* win_copy_attr_fn, + Delete_attr_function* win_delete_attr_fn, void* extra_state); - static int Create_keyval(MPI_Win_copy_attr_function* win_copy_attr_fn, - MPI_Win_delete_attr_function* win_delete_attr_fn, + static int Create_keyval(MPI_Win_copy_attr_function* win_copy_attr_fn, + MPI_Win_delete_attr_function* win_delete_attr_fn, void* extra_state); - static int Create_keyval(Copy_attr_function* win_copy_attr_fn, - MPI_Win_delete_attr_function* win_delete_attr_fn, + static int Create_keyval(Copy_attr_function* win_copy_attr_fn, + MPI_Win_delete_attr_function* win_delete_attr_fn, void* extra_state); - static int Create_keyval(MPI_Win_copy_attr_function* win_copy_attr_fn, - Delete_attr_function* win_delete_attr_fn, + static int Create_keyval(MPI_Win_copy_attr_function* win_copy_attr_fn, + Delete_attr_function* win_delete_attr_fn, void* extra_state); - + protected: // Back-end function to do the heavy lifting for creating the // keyval @@ -174,20 +174,20 @@ class Win { public: virtual void Delete_attr(int win_keyval); - - static void Free_keyval(int& win_keyval); - + + static void Free_keyval(int& win_keyval); + // version 1: pre-errata Get_attr (not correct, but probably nice to support bool Get_attr(const Win& win, int win_keyval, void* attribute_val) const; // version 2: post-errata Get_attr (correct, but no one seems to know about it) bool Get_attr(int win_keyval, void* attribute_val) const; - + virtual void Get_name(char* win_name, int& resultlen) const; - + virtual void Set_attr(int win_keyval, const void* attribute_val); - + virtual void Set_name(const char* win_name); // Data that is passed through keyval create when C++ callback @@ -202,7 +202,7 @@ class Win { // Protect the global list from multiple thread access static opal_mutex_t cxx_extra_states_lock; - + protected: #if 0 /* OMPI_ENABLE_MPI_PROFILING */ PMPI::Win pmpi_win; diff --git a/ompi/mpi/cxx/win_inln.h b/ompi/mpi/cxx/win_inln.h index 1a6d94b5d0b..d8901967233 100644 --- a/ompi/mpi/cxx/win_inln.h +++ b/ompi/mpi/cxx/win_inln.h @@ -6,16 +6,16 @@ // Copyright (c) 2004-2005 The University of Tennessee and The University // of Tennessee Research Foundation. All rights // reserved. -// Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +// Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, // University of Stuttgart. All rights reserved. // Copyright (c) 2004-2005 The Regents of the University of California. // All rights reserved. // Copyright (c) 2007 Sun Microsystems, Inc. All rights reserved. // Copyright (c) 2007-2008 Cisco Systems, Inc. All rights reserved. // $COPYRIGHT$ -// +// // Additional copyrights may follow -// +// // $HEADER$ // @@ -26,8 +26,8 @@ // -inline MPI::Errhandler -MPI::Win:: Get_errhandler() const +inline MPI::Errhandler +MPI::Win:: Get_errhandler() const { MPI_Errhandler errhandler; MPI_Win_get_errhandler(mpi_win, &errhandler); @@ -35,7 +35,7 @@ MPI::Win:: Get_errhandler() const } -inline void +inline void MPI::Win::Set_errhandler(const MPI::Errhandler& errhandler) const { (void)MPI_Win_set_errhandler(mpi_win, errhandler); @@ -47,31 +47,31 @@ MPI::Win::Set_errhandler(const MPI::Errhandler& errhandler) const // -inline void +inline void MPI::Win::Accumulate(const void* origin_addr, int origin_count, const MPI::Datatype& origin_datatype, int target_rank, MPI::Aint target_disp, int target_count, const MPI::Datatype& target_datatype, - const MPI::Op& op) const + const MPI::Op& op) const { (void) MPI_Accumulate(const_cast(origin_addr), origin_count, origin_datatype, - target_rank, target_disp, target_count, + target_rank, target_disp, target_count, target_datatype, op, mpi_win); - + } inline void -MPI::Win::Complete() const +MPI::Win::Complete() const { (void) MPI_Win_complete(mpi_win); } -inline MPI::Win -MPI::Win::Create(const void* base, MPI::Aint size, - int disp_unit, const MPI::Info& info, - const MPI::Intracomm& comm) +inline MPI::Win +MPI::Win::Create(const void* base, MPI::Aint size, + int disp_unit, const MPI::Info& info, + const MPI::Intracomm& comm) { MPI_Win newwin; (void) MPI_Win_create(const_cast(base), size, disp_unit, info, comm, &newwin); @@ -79,28 +79,28 @@ MPI::Win::Create(const void* base, MPI::Aint size, } -inline void -MPI::Win::Fence(int assert) const +inline void +MPI::Win::Fence(int assert) const { (void) MPI_Win_fence(assert, mpi_win); } -inline void -MPI::Win::Get(const void *origin_addr, int origin_count, - const MPI::Datatype& origin_datatype, - int target_rank, MPI::Aint target_disp, - int target_count, - const MPI::Datatype& target_datatype) const +inline void +MPI::Win::Get(const void *origin_addr, int origin_count, + const MPI::Datatype& origin_datatype, + int target_rank, MPI::Aint target_disp, + int target_count, + const MPI::Datatype& target_datatype) const { (void) MPI_Get(const_cast(origin_addr), origin_count, origin_datatype, - target_rank, target_disp, + target_rank, target_disp, target_count, target_datatype, mpi_win); } -inline MPI::Group +inline MPI::Group MPI::Win::Get_group() const { MPI_Group mpi_group; @@ -109,59 +109,59 @@ MPI::Win::Get_group() const } -inline void -MPI::Win::Lock(int lock_type, int rank, int assert) const +inline void +MPI::Win::Lock(int lock_type, int rank, int assert) const { (void) MPI_Win_lock(lock_type, rank, assert, mpi_win); } -inline void -MPI::Win::Post(const MPI::Group& group, int assert) const +inline void +MPI::Win::Post(const MPI::Group& group, int assert) const { (void) MPI_Win_post(group, assert, mpi_win); } -inline void -MPI::Win::Put(const void* origin_addr, int origin_count, - const MPI::Datatype& origin_datatype, - int target_rank, MPI::Aint target_disp, - int target_count, - const MPI::Datatype& target_datatype) const +inline void +MPI::Win::Put(const void* origin_addr, int origin_count, + const MPI::Datatype& origin_datatype, + int target_rank, MPI::Aint target_disp, + int target_count, + const MPI::Datatype& target_datatype) const { - (void) MPI_Put(const_cast(origin_addr), origin_count, origin_datatype, + (void) MPI_Put(const_cast(origin_addr), origin_count, origin_datatype, target_rank, target_disp, target_count, target_datatype, mpi_win); - + } -inline void +inline void MPI::Win::Start(const MPI::Group& group, int assert) const { (void) MPI_Win_start(group, assert, mpi_win); } -inline bool +inline bool MPI::Win::Test() const { int flag; MPI_Win_test(mpi_win, &flag); return OPAL_INT_TO_BOOL(flag); - + } -inline void +inline void MPI::Win::Unlock(int rank) const { (void) MPI_Win_unlock(rank, mpi_win); } -inline void +inline void MPI::Win::Wait() const { (void) MPI_Win_wait(mpi_win); @@ -172,7 +172,7 @@ MPI::Win::Wait() const // External Interfaces // -inline void +inline void MPI::Win::Call_errhandler(int errorcode) const { (void) MPI_Win_call_errhandler(mpi_win, errorcode); @@ -182,12 +182,12 @@ MPI::Win::Call_errhandler(int errorcode) const // functions inline int MPI::Win::Create_keyval(MPI::Win::Copy_attr_function* win_copy_attr_fn, - MPI::Win::Delete_attr_function* win_delete_attr_fn, + MPI::Win::Delete_attr_function* win_delete_attr_fn, void* extra_state) { // Back-end function does the heavy lifting int ret, keyval; - ret = do_create_keyval(NULL, NULL, + ret = do_create_keyval(NULL, NULL, win_copy_attr_fn, win_delete_attr_fn, extra_state, keyval); return (MPI_SUCCESS == ret) ? keyval : ret; @@ -197,13 +197,13 @@ MPI::Win::Create_keyval(MPI::Win::Copy_attr_function* win_copy_attr_fn, // functions inline int MPI::Win::Create_keyval(MPI_Win_copy_attr_function* win_copy_attr_fn, - MPI_Win_delete_attr_function* win_delete_attr_fn, + MPI_Win_delete_attr_function* win_delete_attr_fn, void* extra_state) { // Back-end function does the heavy lifting int ret, keyval; ret = do_create_keyval(win_copy_attr_fn, win_delete_attr_fn, - NULL, NULL, + NULL, NULL, extra_state, keyval); return (MPI_SUCCESS == ret) ? keyval : ret; } @@ -218,7 +218,7 @@ MPI::Win::Create_keyval(MPI::Win::Copy_attr_function* win_copy_attr_fn, // Back-end function does the heavy lifting int ret, keyval; ret = do_create_keyval(NULL, win_delete_attr_fn, - win_copy_attr_fn, NULL, + win_copy_attr_fn, NULL, extra_state, keyval); return (MPI_SUCCESS == ret) ? keyval : ret; } @@ -238,14 +238,14 @@ MPI::Win::Create_keyval(MPI_Win_copy_attr_function* win_copy_attr_fn, return (MPI_SUCCESS == ret) ? keyval : ret; } -inline void -MPI::Win::Delete_attr(int win_keyval) +inline void +MPI::Win::Delete_attr(int win_keyval) { (void) MPI_Win_delete_attr(mpi_win, win_keyval); } -inline void +inline void MPI::Win::Free_keyval(int& win_keyval) { (void) MPI_Win_free_keyval(&win_keyval); @@ -264,7 +264,7 @@ MPI::Win::Get_attr(const Win& win, int win_keyval, // version 2: post-errata Get_attr (correct, but no one seems to know about it) -inline bool +inline bool MPI::Win::Get_attr(int win_keyval, void* attribute_val) const { int ret; @@ -273,22 +273,22 @@ MPI::Win::Get_attr(int win_keyval, void* attribute_val) const } -inline void +inline void MPI::Win::Get_name(char* win_name, int& resultlen) const { (void) MPI_Win_get_name(mpi_win, win_name, &resultlen); } -inline void -MPI::Win::Set_attr(int win_keyval, const void* attribute_val) +inline void +MPI::Win::Set_attr(int win_keyval, const void* attribute_val) { (void) MPI_Win_set_attr(mpi_win, win_keyval, const_cast(attribute_val)); } -inline void -MPI::Win::Set_name(const char* win_name) +inline void +MPI::Win::Set_name(const char* win_name) { (void) MPI_Win_set_name(mpi_win, const_cast(win_name)); } diff --git a/ompi/mpi/fortran/base/Makefile.am b/ompi/mpi/fortran/base/Makefile.am index bb7a9c233c9..35738b27a40 100644 --- a/ompi/mpi/fortran/base/Makefile.am +++ b/ompi/mpi/fortran/base/Makefile.am @@ -5,15 +5,17 @@ # Copyright (c) 2004-2005 The University of Tennessee and The University # of Tennessee Research Foundation. All rights # reserved. -# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, # University of Stuttgart. All rights reserved. # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. -# Copyright (c) 2006-2014 Cisco Systems, Inc. All rights reserved. +# Copyright (c) 2006-2015 Cisco Systems, Inc. All rights reserved. +# Copyright (c) 2015 Research Organization for Information Science +# and Technology (RIST). All rights reserved. # $COPYRIGHT$ -# +# # Additional copyrights may follow -# +# # $HEADER$ # @@ -22,7 +24,8 @@ noinst_LTLIBRARIES = EXTRA_DIST = \ attr-fn-int-callback-interfaces.h \ conversion-fn-null-int-interface.h \ - gen-mpi-sizeof.pl + gen-mpi-sizeof.pl \ + gen-mpi-mangling.pl #----------------------------------------------------------------------------- diff --git a/ompi/mpi/fortran/base/attr-fn-int-callback-interfaces.h b/ompi/mpi/fortran/base/attr-fn-int-callback-interfaces.h index cd997dbc92d..27c64cc6251 100644 --- a/ompi/mpi/fortran/base/attr-fn-int-callback-interfaces.h +++ b/ompi/mpi/fortran/base/attr-fn-int-callback-interfaces.h @@ -4,6 +4,8 @@ ! Copyright (c) 2006-2014 Cisco Systems, Inc. All rights reserved. ! Copyright (c) 2013 Los Alamos National Security, LLC. All rights ! reserved. +! Copyright (c) 2015 Research Organization for Information Science +! and Technology (RIST). All rights reserved. ! $COPYRIGHT$ ! ! Additional copyrights may follow @@ -19,6 +21,15 @@ interface !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! + subroutine MPI_DUP_FN( comm, comm_keyval, extra_state, & + attribute_val_in, attribute_val_out, & + flag, ierr ) + implicit none + integer :: comm, comm_keyval, extra_state + integer :: attribute_val_in, attribute_val_out, ierr + logical :: flag + end subroutine MPI_DUP_FN + subroutine MPI_NULL_COPY_FN( comm, comm_keyval, extra_state, & attribute_val_in, attribute_val_out, & flag, ierr ) @@ -38,6 +49,17 @@ interface !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! + subroutine MPI_COMM_DUP_FN(oldcomm, comm_keyval, extra_state, attribute_val_in, & + attribute_val_out, flag, ierr ) + implicit none + include 'mpif-config.h' + integer :: oldcomm + integer :: comm_keyval + integer(kind=MPI_ADDRESS_KIND) :: extra_state, attribute_val_in, attribute_val_out + logical :: flag + integer :: ierr + end subroutine MPI_COMM_DUP_FN + subroutine MPI_COMM_NULL_COPY_FN( comm, comm_keyval, extra_state, & attribute_val_in, attribute_val_out, & flag, ierr ) @@ -62,6 +84,18 @@ interface !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! + subroutine MPI_TYPE_DUP_FN( oldtype, type_keyval, extra_state, & + attribute_val_in, attribute_val_out, & + flag, ierr ) + implicit none + include 'mpif-config.h' + integer :: oldtype + integer :: type_keyval + integer(KIND=MPI_ADDRESS_KIND) :: extra_state, attribute_val_in, attribute_val_out + logical :: flag + integer :: ierr + end subroutine MPI_TYPE_DUP_FN + subroutine MPI_TYPE_NULL_COPY_FN( type, type_keyval, extra_state, & attribute_val_in, attribute_val_out, & flag, ierr ) @@ -86,6 +120,18 @@ interface !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! + subroutine MPI_WIN_DUP_FN( oldwin, win_keyval, extra_state, & + attribute_val_in, attribute_val_out, & + flag, ierr) + implicit none + include 'mpif-config.h' + integer :: oldwin + integer :: win_keyval + integer(kind=MPI_ADDRESS_KIND) :: extra_state, attribute_val_in, attribute_val_out + logical :: flag + integer :: ierr + end subroutine MPI_WIN_DUP_FN + subroutine MPI_WIN_NULL_COPY_FN( window, win_keyval, extra_state, & attribute_val_in, attribute_val_out, & flag, ierr ) diff --git a/ompi/mpi/fortran/base/attr_fn_f.c b/ompi/mpi/fortran/base/attr_fn_f.c index 2b4aa3d964c..efdb8238aca 100644 --- a/ompi/mpi/fortran/base/attr_fn_f.c +++ b/ompi/mpi/fortran/base/attr_fn_f.c @@ -5,15 +5,15 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2006-2012 Cisco Systems, Inc. All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -175,7 +175,7 @@ OMPI_GENERATE_F77_BINDINGS( MPI_WIN_DUP_FN, * Note that in this file, we invoke OMPI_C_ rather than * , where is MPI_DUP_FN (and all the rest). * Specifically: - * + * * MPI_NULL_DELETE_FN -> OMPI_C_MPI_NULL_DELETE_FN * MPI_NULL_COPY_FN -> OMPI_C_MPI_NULL_COPY_FN * MPI_DUP_FN -> OMPI_C_MPI_DUP_FN @@ -195,15 +195,15 @@ OMPI_GENERATE_F77_BINDINGS( MPI_WIN_DUP_FN, * The reason why is discussed in a lengthy comment in mpi.h. */ void ompi_type_null_delete_fn_f(MPI_Fint* type, MPI_Fint* type_keyval, - MPI_Aint* attribute_val_out, + MPI_Aint* attribute_val_out, MPI_Aint* extra_state, MPI_Fint* ierr) { *ierr = OMPI_INT_2_FINT(MPI_SUCCESS); } -void ompi_type_null_copy_fn_f(MPI_Fint* type, MPI_Fint* type_keyval, +void ompi_type_null_copy_fn_f(MPI_Fint* type, MPI_Fint* type_keyval, MPI_Aint* extra_state, - MPI_Aint* attribute_val_in, + MPI_Aint* attribute_val_in, MPI_Aint* attribute_val_out, ompi_fortran_logical_t* flag, MPI_Fint* ierr) { @@ -211,10 +211,10 @@ void ompi_type_null_copy_fn_f(MPI_Fint* type, MPI_Fint* type_keyval, *ierr = OMPI_INT_2_FINT(MPI_SUCCESS); } -void ompi_type_dup_fn_f(MPI_Fint* type, MPI_Fint* type_keyval, +void ompi_type_dup_fn_f(MPI_Fint* type, MPI_Fint* type_keyval, MPI_Aint* extra_state, - MPI_Aint* attribute_val_in, - MPI_Aint* attribute_val_out, + MPI_Aint* attribute_val_in, + MPI_Aint* attribute_val_out, ompi_fortran_logical_t* flag, MPI_Fint* ierr ) { *flag = (ompi_fortran_logical_t) 1; @@ -223,26 +223,26 @@ void ompi_type_dup_fn_f(MPI_Fint* type, MPI_Fint* type_keyval, } void ompi_comm_null_delete_fn_f(MPI_Fint* comm, MPI_Fint* comm_keyval, - MPI_Aint* attribute_val_out, + MPI_Aint* attribute_val_out, MPI_Aint* extra_state, MPI_Fint* ierr) { *ierr = OMPI_INT_2_FINT(MPI_SUCCESS); } -void ompi_comm_null_copy_fn_f(MPI_Fint* comm, MPI_Fint* comm_keyval, +void ompi_comm_null_copy_fn_f(MPI_Fint* comm, MPI_Fint* comm_keyval, MPI_Aint* extra_state, - MPI_Aint* attribute_val_in, - MPI_Aint* attribute_val_out, + MPI_Aint* attribute_val_in, + MPI_Aint* attribute_val_out, ompi_fortran_logical_t* flag, MPI_Fint* ierr) { *flag = (ompi_fortran_logical_t) 0; *ierr = OMPI_INT_2_FINT(MPI_SUCCESS); } -void ompi_comm_dup_fn_f(MPI_Fint* comm, MPI_Fint* comm_keyval, +void ompi_comm_dup_fn_f(MPI_Fint* comm, MPI_Fint* comm_keyval, MPI_Aint* extra_state, - MPI_Aint* attribute_val_in, - MPI_Aint* attribute_val_out, + MPI_Aint* attribute_val_in, + MPI_Aint* attribute_val_out, ompi_fortran_logical_t* flag, MPI_Fint* ierr) { *flag = (ompi_fortran_logical_t) 1; @@ -251,16 +251,16 @@ void ompi_comm_dup_fn_f(MPI_Fint* comm, MPI_Fint* comm_keyval, } void ompi_null_delete_fn_f(MPI_Fint* comm, MPI_Fint* comm_keyval, - MPI_Fint* attribute_val_out, + MPI_Fint* attribute_val_out, MPI_Fint* extra_state, MPI_Fint* ierr) { *ierr = OMPI_INT_2_FINT(MPI_SUCCESS); } -void ompi_null_copy_fn_f(MPI_Fint* comm, MPI_Fint* comm_keyval, +void ompi_null_copy_fn_f(MPI_Fint* comm, MPI_Fint* comm_keyval, MPI_Fint* extra_state, - MPI_Fint* attribute_val_in, - MPI_Fint* attribute_val_out, + MPI_Fint* attribute_val_in, + MPI_Fint* attribute_val_out, ompi_fortran_logical_t* flag, MPI_Fint* ierr) { *flag = (ompi_fortran_logical_t) 0; @@ -268,10 +268,10 @@ void ompi_null_copy_fn_f(MPI_Fint* comm, MPI_Fint* comm_keyval, } -void ompi_dup_fn_f(MPI_Fint* comm, MPI_Fint* comm_keyval, +void ompi_dup_fn_f(MPI_Fint* comm, MPI_Fint* comm_keyval, MPI_Fint* extra_state, - MPI_Fint* attribute_val_in, - MPI_Fint* attribute_val_out, + MPI_Fint* attribute_val_in, + MPI_Fint* attribute_val_out, ompi_fortran_logical_t* flag, MPI_Fint* ierr) { *flag = (ompi_fortran_logical_t) 1; @@ -280,26 +280,26 @@ void ompi_dup_fn_f(MPI_Fint* comm, MPI_Fint* comm_keyval, } void ompi_win_null_delete_fn_f(MPI_Fint* window, MPI_Fint* win_keyval, - MPI_Aint* attribute_val_out, + MPI_Aint* attribute_val_out, MPI_Aint* extra_state, MPI_Fint* ierr) { *ierr = OMPI_INT_2_FINT(MPI_SUCCESS); } -void ompi_win_null_copy_fn_f(MPI_Fint* window, MPI_Fint* win_keyval, +void ompi_win_null_copy_fn_f(MPI_Fint* window, MPI_Fint* win_keyval, MPI_Aint* extra_state, - MPI_Aint* attribute_val_in, - MPI_Aint* attribute_val_out, + MPI_Aint* attribute_val_in, + MPI_Aint* attribute_val_out, ompi_fortran_logical_t* flag, MPI_Fint* ierr) { *flag = (ompi_fortran_logical_t) 0; *ierr = OMPI_INT_2_FINT(MPI_SUCCESS); } -void ompi_win_dup_fn_f(MPI_Fint* window, MPI_Fint* win_keyval, +void ompi_win_dup_fn_f(MPI_Fint* window, MPI_Fint* win_keyval, MPI_Aint* extra_state, - MPI_Aint* attribute_val_in, - MPI_Aint* attribute_val_out, + MPI_Aint* attribute_val_in, + MPI_Aint* attribute_val_out, ompi_fortran_logical_t* flag, MPI_Fint* ierr) { *flag = (ompi_fortran_logical_t) 1; diff --git a/ompi/mpi/fortran/base/constants.h b/ompi/mpi/fortran/base/constants.h index 914c63d8b3e..6e5f54918fd 100644 --- a/ompi/mpi/fortran/base/constants.h +++ b/ompi/mpi/fortran/base/constants.h @@ -5,17 +5,19 @@ * Copyright (c) 2004-2013 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. - * Copyright (c) 2006-2012 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2006-2015 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2011-2013 Inria. All rights reserved. * Copyright (c) 2011-2012 Universite Bordeaux 1 + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -24,10 +26,11 @@ #include "ompi_config.h" +#if OMPI_BUILD_FORTRAN_BINDINGS /* * Several variables are used to link against MPI F77 constants which * correspond to addresses, e.g. MPI_BOTTOM, and are implemented via - * common blocks. + * common blocks. * * We use common blocks so that in the C wrapper functions, we can * compare the address that comes in against known addresses (e.g., if @@ -85,168 +88,7 @@ * file. */ -#define DECL(type, upper_case, lower_case, single_u, double_u) \ -OMPI_DECLSPEC extern type upper_case; \ -OMPI_DECLSPEC extern type lower_case; \ -OMPI_DECLSPEC extern type single_u; \ -OMPI_DECLSPEC extern type double_u - -/* Note that the rationale for the types of each of these variables is - discussed in ompi/include/mpif-common.h. Do not change the types - without also changing ompi/runtime/ompi_mpi_init.c and - ompi/include/mpif-common.h. */ - -DECL(int, MPI_FORTRAN_BOTTOM, mpi_fortran_bottom, - mpi_fortran_bottom_, mpi_fortran_bottom__); -DECL(int, MPI_FORTRAN_IN_PLACE, mpi_fortran_in_place, - mpi_fortran_in_place_, mpi_fortran_in_place__); -DECL(int, MPI_FORTRAN_UNWEIGHTED, mpi_fortran_unweighted, - mpi_fortran_unweighted_, mpi_fortran_unweighted__); -DECL(int, MPI_FORTRAN_WEIGHTS_EMPTY, mpi_fortran_weights_empty, - mpi_fortran_weights_empty_, mpi_fortran_weights_empty__); -DECL(char *, MPI_FORTRAN_ARGV_NULL, mpi_fortran_argv_null, - mpi_fortran_argv_null_, mpi_fortran_argv_null__); -DECL(char *, MPI_FORTRAN_ARGVS_NULL, mpi_fortran_argvs_null, - mpi_fortran_argvs_null_, mpi_fortran_argvs_null__); -DECL(int *, MPI_FORTRAN_ERRCODES_IGNORE, mpi_fortran_errcodes_ignore, - mpi_fortran_errcodes_ignore_, mpi_fortran_errcodes_ignore__); -DECL(int *, MPI_FORTRAN_STATUS_IGNORE, mpi_fortran_status_ignore, - mpi_fortran_status_ignore_, mpi_fortran_status_ignore__); -DECL(int *, MPI_FORTRAN_STATUSES_IGNORE, mpi_fortran_statuses_ignore, - mpi_fortran_statuses_ignore_, mpi_fortran_statuses_ignore__); - -/* - * Create macros to do the checking. Only check for all 4 if we have - * weak symbols. Otherwise, just check for the one relevant symbol. - */ -#if OPAL_HAVE_WEAK_SYMBOLS -#define OMPI_IS_FORTRAN_BOTTOM(addr) \ - (addr == (void*) &MPI_FORTRAN_BOTTOM || \ - addr == (void*) &mpi_fortran_bottom || \ - addr == (void*) &mpi_fortran_bottom_ || \ - addr == (void*) &mpi_fortran_bottom__) -#define OMPI_IS_FORTRAN_IN_PLACE(addr) \ - (addr == (void*) &MPI_FORTRAN_IN_PLACE || \ - addr == (void*) &mpi_fortran_in_place || \ - addr == (void*) &mpi_fortran_in_place_ || \ - addr == (void*) &mpi_fortran_in_place__) -#define OMPI_IS_FORTRAN_UNWEIGHTED(addr) \ - (addr == (void*) &MPI_FORTRAN_UNWEIGHTED || \ - addr == (void*) &mpi_fortran_unweighted || \ - addr == (void*) &mpi_fortran_unweighted_ || \ - addr == (void*) &mpi_fortran_unweighted__) -#define OMPI_IS_FORTRAN_WEIGHTS_EMPTY(addr) \ - (addr == (void*) &MPI_FORTRAN_WEIGHTS_EMPTY || \ - addr == (void*) &mpi_fortran_weights_empty || \ - addr == (void*) &mpi_fortran_weights_empty_ || \ - addr == (void*) &mpi_fortran_weights_empty__) -#define OMPI_IS_FORTRAN_ARGV_NULL(addr) \ - (addr == (void*) &MPI_FORTRAN_ARGV_NULL || \ - addr == (void*) &mpi_fortran_argv_null || \ - addr == (void*) &mpi_fortran_argv_null_ || \ - addr == (void*) &mpi_fortran_argv_null__) -#define OMPI_IS_FORTRAN_ARGVS_NULL(addr) \ - (addr == (void*) &MPI_FORTRAN_ARGVS_NULL || \ - addr == (void*) &mpi_fortran_argvs_null || \ - addr == (void*) &mpi_fortran_argvs_null_ || \ - addr == (void*) &mpi_fortran_argvs_null__) -#define OMPI_IS_FORTRAN_ERRCODES_IGNORE(addr) \ - (addr == (void*) &MPI_FORTRAN_ERRCODES_IGNORE || \ - addr == (void*) &mpi_fortran_errcodes_ignore || \ - addr == (void*) &mpi_fortran_errcodes_ignore_ || \ - addr == (void*) &mpi_fortran_errcodes_ignore__) -#define OMPI_IS_FORTRAN_STATUS_IGNORE(addr) \ - (addr == (void*) &MPI_FORTRAN_STATUS_IGNORE || \ - addr == (void*) &mpi_fortran_status_ignore || \ - addr == (void*) &mpi_fortran_status_ignore_ || \ - addr == (void*) &mpi_fortran_status_ignore__) -#define OMPI_IS_FORTRAN_STATUSES_IGNORE(addr) \ - (addr == (void*) &MPI_FORTRAN_STATUSES_IGNORE || \ - addr == (void*) &mpi_fortran_statuses_ignore || \ - addr == (void*) &mpi_fortran_statuses_ignore_ || \ - addr == (void*) &mpi_fortran_statuses_ignore__) - -#elif OMPI_FORTRAN_CAPS -#define OMPI_IS_FORTRAN_BOTTOM(addr) \ - (addr == (void*) &MPI_FORTRAN_BOTTOM) -#define OMPI_IS_FORTRAN_IN_PLACE(addr) \ - (addr == (void*) &MPI_FORTRAN_IN_PLACE) -#define OMPI_IS_FORTRAN_UNWEIGHTED(addr) \ - (addr == (void*) &MPI_FORTRAN_UNWEIGHTED) -#define OMPI_IS_FORTRAN_WEIGHTS_EMPTY(addr) \ - (addr == (void*) &MPI_FORTRAN_WEIGHTS_EMPTY) -#define OMPI_IS_FORTRAN_ARGV_NULL(addr) \ - (addr == (void*) &MPI_FORTRAN_ARGV_NULL) -#define OMPI_IS_FORTRAN_ARGVS_NULL(addr) \ - (addr == (void*) &MPI_FORTRAN_ARGVS_NULL) -#define OMPI_IS_FORTRAN_ERRCODES_IGNORE(addr) \ - (addr == (void*) &MPI_FORTRAN_ERRCODES_IGNORE) -#define OMPI_IS_FORTRAN_STATUS_IGNORE(addr) \ - (addr == (void*) &MPI_FORTRAN_STATUS_IGNORE) -#define OMPI_IS_FORTRAN_STATUSES_IGNORE(addr) \ - (addr == (void*) &MPI_FORTRAN_STATUSES_IGNORE) - -#elif OMPI_FORTRAN_PLAIN -#define OMPI_IS_FORTRAN_BOTTOM(addr) \ - (addr == (void*) &mpi_fortran_bottom) -#define OMPI_IS_FORTRAN_IN_PLACE(addr) \ - (addr == (void*) &mpi_fortran_in_place) -#define OMPI_IS_FORTRAN_UNWEIGHTED(addr) \ - (addr == (void*) &mpi_fortran_unweighted) -#define OMPI_IS_FORTRAN_WEIGHTS_EMPTY(addr) \ - (addr == (void*) &mpi_fortran_weights_empty) -#define OMPI_IS_FORTRAN_ARGV_NULL(addr) \ - (addr == (void*) &mpi_fortran_argv_null) -#define OMPI_IS_FORTRAN_ARGVS_NULL(addr) \ - (addr == (void*) &mpi_fortran_argvs_null) -#define OMPI_IS_FORTRAN_ERRCODES_IGNORE(addr) \ - (addr == (void*) &mpi_fortran_errcodes_ignore) -#define OMPI_IS_FORTRAN_STATUS_IGNORE(addr) \ - (addr == (void*) &mpi_fortran_status_ignore) -#define OMPI_IS_FORTRAN_STATUSES_IGNORE(addr) \ - (addr == (void*) &mpi_fortran_statuses_ignore) - -#elif OMPI_FORTRAN_SINGLE_UNDERSCORE -#define OMPI_IS_FORTRAN_BOTTOM(addr) \ - (addr == (void*) &mpi_fortran_bottom_) -#define OMPI_IS_FORTRAN_IN_PLACE(addr) \ - (addr == (void*) &mpi_fortran_in_place_) -#define OMPI_IS_FORTRAN_UNWEIGHTED(addr) \ - (addr == (void*) &mpi_fortran_unweighted_) -#define OMPI_IS_FORTRAN_WEIGHTS_EMPTY(addr) \ - (addr == (void*) &mpi_fortran_weights_empty_) -#define OMPI_IS_FORTRAN_ARGV_NULL(addr) \ - (addr == (void*) &mpi_fortran_argv_null_) -#define OMPI_IS_FORTRAN_ARGVS_NULL(addr) \ - (addr == (void*) &mpi_fortran_argvs_null_) -#define OMPI_IS_FORTRAN_ERRCODES_IGNORE(addr) \ - (addr == (void*) &mpi_fortran_errcodes_ignore_) -#define OMPI_IS_FORTRAN_STATUS_IGNORE(addr) \ - (addr == (void*) &mpi_fortran_status_ignore_) -#define OMPI_IS_FORTRAN_STATUSES_IGNORE(addr) \ - (addr == (void*) &mpi_fortran_statuses_ignore_) - -#else -#define OMPI_IS_FORTRAN_BOTTOM(addr) \ - (addr == (void*) &mpi_fortran_bottom__) -#define OMPI_IS_FORTRAN_IN_PLACE(addr) \ - (addr == (void*) &mpi_fortran_in_place__) -#define OMPI_IS_FORTRAN_UNWEIGHTED(addr) \ - (addr == (void*) &mpi_fortran_unweighted__) -#define OMPI_IS_FORTRAN_WEIGHTS_EMPTY(addr) \ - (addr == (void*) &mpi_fortran_weights_empty__) -#define OMPI_IS_FORTRAN_ARGV_NULL(addr) \ - (addr == (void*) &mpi_fortran_argv_null__) -#define OMPI_IS_FORTRAN_ARGVS_NULL(addr) \ - (addr == (void*) &mpi_fortran_argvs_null__) -#define OMPI_IS_FORTRAN_ERRCODES_IGNORE(addr) \ - (addr == (void*) &mpi_fortran_errcodes_ignore__) -#define OMPI_IS_FORTRAN_STATUS_IGNORE(addr) \ - (addr == (void*) &mpi_fortran_status_ignore__) -#define OMPI_IS_FORTRAN_STATUSES_IGNORE(addr) \ - (addr == (void*) &mpi_fortran_statuses_ignore__) - -#endif /* weak / specific symbol type */ +#include "mpif-c-constants-decl.h" /* Convert between Fortran and C MPI_BOTTOM */ #define OMPI_F2C_BOTTOM(addr) (OMPI_IS_FORTRAN_BOTTOM(addr) ? MPI_BOTTOM : (addr)) @@ -254,4 +96,6 @@ DECL(int *, MPI_FORTRAN_STATUSES_IGNORE, mpi_fortran_statuses_ignore, #define OMPI_F2C_UNWEIGHTED(addr) (OMPI_IS_FORTRAN_UNWEIGHTED(addr) ? MPI_UNWEIGHTED : (addr)) #define OMPI_F2C_WEIGHTS_EMPTY(addr) (OMPI_IS_FORTRAN_WEIGHTS_EMPTY(addr) ? MPI_WEIGHTS_EMPTY : (addr)) +#endif /* OMPI_BUILD_FORTRAN_BINDINGS */ + #endif /* OMPI_FORTRAN_BASE_CONSTANTS_H */ diff --git a/ompi/mpi/fortran/base/conversion_fn_null_f.c b/ompi/mpi/fortran/base/conversion_fn_null_f.c index 67e718bcab6..9e83f03e699 100644 --- a/ompi/mpi/fortran/base/conversion_fn_null_f.c +++ b/ompi/mpi/fortran/base/conversion_fn_null_f.c @@ -5,15 +5,15 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2007-2012 Cisco Systems, Inc. All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -58,10 +58,10 @@ OMPI_GENERATE_F77_BINDINGS(MPI_CONVERSION_FN_NULL, #endif -void mpi_conversion_fn_null_f(char *userbuf, MPI_Fint *datatype, - MPI_Fint *count, char *filebuf, - MPI_Offset *position, - MPI_Aint *extra_state, +void mpi_conversion_fn_null_f(char *userbuf, MPI_Fint *datatype, + MPI_Fint *count, char *filebuf, + MPI_Offset *position, + MPI_Aint *extra_state, MPI_Fint *ierr) { /* Per MPI-2:9.5.3, this function will never be called; it's only diff --git a/ompi/mpi/fortran/base/datarep.h b/ompi/mpi/fortran/base/datarep.h index 0a0efaa1c0e..2f94c10dee2 100644 --- a/ompi/mpi/fortran/base/datarep.h +++ b/ompi/mpi/fortran/base/datarep.h @@ -5,15 +5,17 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2007-2012 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -41,21 +43,21 @@ BEGIN_C_DECLS -/** +/** * Function typedef for the conversion function pointer in * MPI_REGISTER_DATAREP */ typedef void (ompi_mpi2_fortran_datarep_conversion_fn_t) - (char *userbuf, MPI_Fint *datatype, MPI_Fint *count, char *filebuf, + (char *userbuf, MPI_Fint *datatype, MPI_Fint *count, char *filebuf, MPI_Offset *position, MPI_Aint *extra_state, MPI_Fint *ierr); -/** +/** * Function typedef for the extent function pointer in * MPI_REGISTER_DATAREP */ typedef void (ompi_mpi2_fortran_datarep_extent_fn_t) - (MPI_Fint *datatype, MPI_Aint *extent, MPI_Aint *extra_state, + (MPI_Fint *datatype, MPI_Aint *extent, MPI_Aint *extra_state, MPI_Fint *ierr); -/** +/** * Macro for declaring each of the 5 back-end Fortran functions for * MPI_CONVERSION_FN_NULL. We need the 4 fortran compiler convetions * and 1 for the "real" back-end function (even though these functions @@ -79,18 +81,12 @@ OMPI_DATAREP_FORTRAN_DECLARE(mpi_conversion_fn_null, MPI_CONVERSION_FN_NULL, (ch space */ #undef OMPI_DATAREP_FORTRAN_DECLARE -/** +/** * Declare the test macro in all of its forms. This macro provides a * convenient way to check whether an argument is the sentinel value * MPI_CONVERSION_FN_NULL. */ -#if OPAL_HAVE_WEAK_SYMBOLS -#define OMPI_IS_FORTRAN_CONVERSION_FN_NULL(addr) \ - (MPI_CONVERSION_FN_NULL == addr || \ - mpi_conversion_fn_null == addr || \ - mpi_conversion_fn_null_ == addr || \ - mpi_conversion_fn_null__ == addr) -#elif OMPI_FORTRAN_CAPS +#if OMPI_FORTRAN_CAPS #define OMPI_IS_FORTRAN_CONVERSION_FN_NULL(addr) \ (MPI_CONVERSION_FN_NULL == addr) #elif OMPI_FORTRAN_PLAIN @@ -99,9 +95,11 @@ OMPI_DATAREP_FORTRAN_DECLARE(mpi_conversion_fn_null, MPI_CONVERSION_FN_NULL, (ch #elif OMPI_FORTRAN_SINGLE_UNDERSCORE #define OMPI_IS_FORTRAN_CONVERSION_FN_NULL(addr) \ (mpi_conversion_fn_null_ == addr) -#else +#elif OMPI_FORTRAN_DOUBLE_UNDERSCORE #define OMPI_IS_FORTRAN_CONVERSION_FN_NULL(addr) \ (mpi_conversion_fn_null__ == addr) +#else +#error Unrecognized Fortran name mangling scheme #endif END_C_DECLS diff --git a/ompi/mpi/fortran/base/f90_accessors.c b/ompi/mpi/fortran/base/f90_accessors.c index 3e82b46afa3..e163afb7f6d 100644 --- a/ompi/mpi/fortran/base/f90_accessors.c +++ b/ompi/mpi/fortran/base/f90_accessors.c @@ -1,3 +1,4 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ /* * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana * University Research and Technology @@ -5,15 +6,17 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2006-2012 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Los Alamos National Security, LLC. All rights + * reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -44,6 +47,16 @@ OMPI_DECLSPEC void mpi_wtick_f90(double *w); OMPI_DECLSPEC void mpi_wtick_f90_(double *w); OMPI_DECLSPEC void mpi_wtick_f90__(double *w); +OMPI_DECLSPEC void MPI_AINT_ADD_F90(MPI_Aint *base, MPI_Aint *diff, MPI_Aint *w); +OMPI_DECLSPEC void mpi_aint_add_f90(MPI_Aint *base, MPI_Aint *diff, MPI_Aint *w); +OMPI_DECLSPEC void mpi_aint_add_f90_(MPI_Aint *base, MPI_Aint *diff, MPI_Aint *w); +OMPI_DECLSPEC void mpi_aint_add_f90__(MPI_Aint *base, MPI_Aint *diff, MPI_Aint *w); + +OMPI_DECLSPEC void MPI_AINT_DIFF_F90(MPI_Aint *addr1, MPI_Aint *addr2, MPI_Aint *w); +OMPI_DECLSPEC void mpi_aint_diff_f90(MPI_Aint *addr1, MPI_Aint *addr2, MPI_Aint *w); +OMPI_DECLSPEC void mpi_aint_diff_f90_(MPI_Aint *addr1, MPI_Aint *addr2, MPI_Aint *w); +OMPI_DECLSPEC void mpi_aint_diff_f90__(MPI_Aint *addr1, MPI_Aint *addr2, MPI_Aint *w); + /**********************************************************************/ void MPI_WTIME_F90(double *w) @@ -88,3 +101,47 @@ void mpi_wtick_f90__(double *w) *w = MPI_Wtick(); } +/**********************************************************************/ + +void MPI_AINT_ADD_F90(MPI_Aint *base, MPI_Aint *diff, MPI_Aint *w) +{ + *w = MPI_Aint_add (*base, *diff); +} + +void mpi_aint_add_f90(MPI_Aint *base, MPI_Aint *diff, MPI_Aint *w) +{ + *w = MPI_Aint_add (*base, *diff); +} + +void mpi_aint_add_f90_(MPI_Aint *base, MPI_Aint *diff, MPI_Aint *w) +{ + *w = MPI_Aint_add (*base, *diff); +} + +void mpi_aint_add_f90__(MPI_Aint *base, MPI_Aint *diff, MPI_Aint *w) +{ + *w = MPI_Aint_add (*base, *diff); +} + + +/**********************************************************************/ + +void MPI_AINT_DIFF_F90(MPI_Aint *addr1, MPI_Aint *addr2, MPI_Aint *w) +{ + *w = MPI_Aint_diff (*addr1, *addr2); +} + +void mpi_aint_diff_f90(MPI_Aint *addr1, MPI_Aint *addr2, MPI_Aint *w) +{ + *w = MPI_Aint_diff (*addr1, *addr2); +} + +void mpi_aint_diff_f90_(MPI_Aint *addr1, MPI_Aint *addr2, MPI_Aint *w) +{ + *w = MPI_Aint_diff (*addr1, *addr2); +} + +void mpi_aint_diff_f90__(MPI_Aint *addr1, MPI_Aint *addr2, MPI_Aint *w) +{ + *w = MPI_Aint_diff (*addr1, *addr2); +} diff --git a/ompi/mpi/fortran/base/fint_2_int.h b/ompi/mpi/fortran/base/fint_2_int.h index d44cf2cdc3d..5971694eb9b 100644 --- a/ompi/mpi/fortran/base/fint_2_int.h +++ b/ompi/mpi/fortran/base/fint_2_int.h @@ -5,7 +5,7 @@ * Copyright (c) 2004-2009 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. @@ -14,9 +14,9 @@ * Copyright (c) 2014 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -27,7 +27,7 @@ #include -/* +/* * Define MACROS to take account of different size of MPI_Fint from int */ @@ -40,9 +40,9 @@ #define OMPI_INT_2_FINT(a) a #define OMPI_FINT_2_INT(a) a #define OMPI_PFINT_2_PINT(a) a - #define OMPI_ARRAY_FINT_2_INT_ALLOC(in, n) + #define OMPI_ARRAY_FINT_2_INT_ALLOC(in, n) #define OMPI_ARRAY_FINT_2_INT(in, n) - #define OMPI_2_DIM_ARRAY_FINT_2_INT(in, n, dim2) + #define OMPI_2_DIM_ARRAY_FINT_2_INT(in, n, dim2) #define OMPI_ARRAY_FINT_2_INT_CLEANUP(in) #define OMPI_SINGLE_FINT_2_INT(in) #define OMPI_SINGLE_INT_2_FINT(in) diff --git a/ompi/mpi/fortran/base/gen-mpi-mangling.pl b/ompi/mpi/fortran/base/gen-mpi-mangling.pl new file mode 100755 index 00000000000..96294f9fa9e --- /dev/null +++ b/ompi/mpi/fortran/base/gen-mpi-mangling.pl @@ -0,0 +1,206 @@ +#!/usr/bin/env perl +# +# Copyright (c) 2015 Research Organization for Information Science +# and Technology (RIST). All rights reserved. +# Copyright (c) 2015 Cisco Systems, Inc. All rights reserved. +# $COPYRIGHT$ +# +# Subroutine to generate a bunch of Fortran declarations and symbols +# + +use strict; + +use Getopt::Long; + +my $caps_arg; +my $plain_arg; +my $single_underscore_arg; +my $double_underscore_arg; +my $help_arg = 0; + +&Getopt::Long::Configure("bundling"); +my $ok = Getopt::Long::GetOptions("caps=i" => \$caps_arg, + "plain=i" => \$plain_arg, + "single=i" => \$single_underscore_arg, + "double=i" => \$double_underscore_arg, + "help|h" => \$help_arg); + +if ($help_arg || !$ok) { + print "Usage: $0 [--caps|--plain|--single|--double] [--help]\n"; + exit(1 - $ok); +} + +my $file_c_constants_decl = "mpif-c-constants-decl.h"; +my $file_c_constants = "mpif-c-constants.h"; +my $file_f08_types = "mpif-f08-types.h"; + +# If we are not building fortran, then just make empty files +if ($caps_arg + $plain_arg + $single_underscore_arg + + $double_underscore_arg == 0) { + system("touch $file_c_constants_decl"); + system("touch $file_c_constants"); + system("touch $file_f08_types"); + exit(0); +} + +############################################################### + +# Declare a hash of all the Fortran sentinel values + +my $fortran; + +$fortran->{bottom} = { + c_type => "int", + c_name => "mpi_fortran_bottom", + f_type => "integer", + f_name => "MPI_BOTTOM", +}; +$fortran->{in_place} = { + c_type => "int", + c_name => "mpi_fortran_in_place", + f_type => "integer", + f_name => "MPI_IN_PLACE", +}; +$fortran->{unweighted} = { + c_type => "int *", + c_name => "mpi_fortran_unweighted", + f_type => "integer", + f_name => "MPI_UNWEIGHTED", +}; +$fortran->{weights_empty} = { + c_type => "int *", + c_name => "mpi_fortran_weights_empty", + f_type => "integer", + f_name => "MPI_WEIGHTS_EMPTY", +}; + +$fortran->{argv_null} = { + c_type => "char *", + c_name => "mpi_fortran_argv_null", + f_type => "integer", + f_name => "MPI_ARGV_NULL", +}; +$fortran->{argvs_null} = { + c_type => "char *", + c_name => "mpi_fortran_argvs_null", + f_type => "integer", + f_name => "MPI_ARGVS_NULL", +}; + +$fortran->{errcodes_ignore} = { + c_type => "int *", + c_name => "mpi_fortran_errcodes_ignore", + f_type => "integer", + f_name => "MPI_ERRCODES_IGNORE", +}; +$fortran->{status_ignore} = { + c_type => "int *", + c_name => "mpi_fortran_status_ignore", + f_type => "type(MPI_STATUS)", + f_name => "MPI_STATUS_IGNORE", +}; +$fortran->{statuses_ignore} = { + c_type => "int *", + c_name => "mpi_fortran_statuses_ignore", + f_type => "type(MPI_STATUS)", + f_name => "MPI_STATUSES_IGNORE(1)", +}; + +############################################################### + +sub mangle { + my $name = shift; + + if ($plain_arg) { + return $name; + } elsif ($caps_arg) { + return uc($name); + } elsif ($single_underscore_arg) { + return $name . "_"; + } elsif ($double_underscore_arg) { + return $name . "__"; + } else { + die "Unknown name mangling type"; + } +} + +sub gen_c_constants_decl { + open(OUT, ">$file_c_constants_decl") || + die "Can't write to $file_c_constants_decl"; + + print OUT "/* WARNING: This is a generated file! Edits will be lost! */ +/* + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. + * Copyright (c) 2015 Cisco Systems, Inc. All rights reserved. + * \$COPYRIGHT\$ + * + * This file was generated by gen-mpi-mangling.pl + */ + +/* Note that the rationale for the types of each of these variables is + discussed in ompi/include/mpif-common.h. Do not change the types + without also changing ompi/runtime/ompi_mpi_init.c and + ompi/include/mpif-common.h. */\n\n"; + + foreach my $key (sort(keys(%{$fortran}))) { + my $f = $fortran->{$key}; + my $m = mangle($f->{c_name}); + print OUT "extern $f->{c_type} $m; +#define OMPI_IS_FORTRAN_" . uc($key) . "(addr) \\ + (addr == (void*) &$m)\n\n"; + } + + close(OUT); +} + +sub gen_c_constants { + open(OUT, ">$file_c_constants") || + die "Can't write to $file_c_constants"; + + print OUT "/* WARNING: This is a generated file! Edits will be lost! */ +/* + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. + * Copyright (c) 2015 Cisco Systems, Inc. All rights reserved. + * \$COPYRIGHT\$ + * + * This file was generated by gen-mpi-mangling.pl + */\n\n"; + + foreach my $key (sort(keys(%{$fortran}))) { + my $f = $fortran->{$key}; + my $m = mangle($f->{c_name}); + print OUT "$f->{c_type} $m;\n"; + } + + close (OUT); +} + +sub gen_f08_types { + open(OUT, ">$file_f08_types") || + die "Can't write to $file_f08_types"; + + print OUT "! WARNING: This is a generated file! Edits will be lost! */ +! +! Copyright (c) 2015 Research Organization for Information Science +! and Technology (RIST). All rights reserved. +! Copyright (c) 2015 Cisco Systems, Inc. All rights reserved. +! \$COPYRIGHT\$ +! +! This file was generated by gen-mpi-mangling.pl +!\n\n"; + + foreach my $key (sort(keys(%{$fortran}))) { + my $f = $fortran->{$key}; + print OUT "$f->{f_type}, bind(C, name=\"".mangle($f->{c_name})."\") :: $f->{f_name}\n"; + } + + close (OUT); +} + +gen_c_constants_decl(); +gen_c_constants(); +gen_f08_types(); + +exit(0); diff --git a/ompi/mpi/fortran/base/gen-mpi-sizeof.pl b/ompi/mpi/fortran/base/gen-mpi-sizeof.pl index a422c9a4acd..5ea3dca3a47 100755 --- a/ompi/mpi/fortran/base/gen-mpi-sizeof.pl +++ b/ompi/mpi/fortran/base/gen-mpi-sizeof.pl @@ -1,6 +1,6 @@ #!/usr/bin/env perl # -# Copyright (c) 2014 Cisco Systems, Inc. All rights reserved. +# Copyright (c) 2014-2015 Cisco Systems, Inc. All rights reserved. # Copyright (c) 2015 Research Organization for Information Science # and Technology (RIST). All rights reserved. # $COPYRIGHT$ @@ -218,12 +218,32 @@ sub output_file { ! Specifically: we need support for the INTERFACE keyword, ! ISO_FORTRAN_ENV, and the STORAGE_SIZE() intrinsic on all types. ! Apparently, this compiler does not support both of those things, so -! this file will be blank (i.e., we didn't bother generating the -! necessary stuff for MPI_SIZEOF because the compiler doesn't support +! this file will be (effecitvely) blank (i.e., we didn't bother +! generating the necessary stuff for MPI_SIZEOF because the compiler +! doesn't support ! it). ! ! If you want support for MPI_SIZEOF, please use a different Fortran ! compiler to build Open MPI.\n\n"; + + if ($want_bodies) { + my $name = $pmpi_arg ? "pompi_sad_panda" : "ompi_sad_panda"; + print OUT "! +! Dummy subroutine, just so that there is *some* Fortran in this file +! (this is defensive programming: since the Fortran compiler doesn't +! support enough mojo, configure should set some AM_CONDITIONALs such +! that this file should not end up being compiled, but just in case +! that logic changes someday and this file *does* end up getting +! compiled, make sure that it's not entirely empty because some +! compilers are unhappy if there are no Fortran statements in this +! file). +subroutine $name() + implicit none + + print *, 'Open MPI is a sad panda because your Fortran compiler' + print *, 'does not support enough Fortran mojo for MPI_SIZEOF' +end subroutine $name\n\n"; + } } close(OUT); diff --git a/ompi/mpi/fortran/base/strings.c b/ompi/mpi/fortran/base/strings.c index a63fa0dc390..1db122711b5 100644 --- a/ompi/mpi/fortran/base/strings.c +++ b/ompi/mpi/fortran/base/strings.c @@ -5,15 +5,15 @@ * Copyright (c) 2004-2014 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2010-2012 Cisco Systems, Inc. All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -102,7 +102,7 @@ int ompi_fortran_string_c2f(char *cstr, char *fstr, int len) * creates a C argument vector from an F77 array of strings * (terminated by a blank string) */ -int ompi_fortran_argv_f2c(char *array, int string_len, int advance, +int ompi_fortran_argv_f2c(char *array, int string_len, int advance, char ***argv) { int err, argc = 0; @@ -113,7 +113,7 @@ int ompi_fortran_argv_f2c(char *array, int string_len, int advance, *argv = NULL; while (1) { - if (OMPI_SUCCESS != (err = ompi_fortran_string_f2c(array, string_len, + if (OMPI_SUCCESS != (err = ompi_fortran_string_f2c(array, string_len, &cstr))) { opal_argv_free(*argv); return err; @@ -142,7 +142,7 @@ int ompi_fortran_argv_f2c(char *array, int string_len, int advance, * Creates a set of C argv arrays from an F77 array of argv's. The * returned arrays need to be freed by the caller. */ -int ompi_fortran_multiple_argvs_f2c(int num_argv_arrays, char *array, +int ompi_fortran_multiple_argvs_f2c(int num_argv_arrays, char *array, int string_len, char ****argv) { char ***argv_array; @@ -153,7 +153,7 @@ int ompi_fortran_multiple_argvs_f2c(int num_argv_arrays, char *array, argv_array = (char ***) malloc (num_argv_arrays * sizeof(char **)); for (i = 0; i < num_argv_arrays; ++i) { - ret = ompi_fortran_argv_f2c(current_array, string_len, + ret = ompi_fortran_argv_f2c(current_array, string_len, string_len * num_argv_arrays, &argv_array[i]); if (OMPI_SUCCESS != ret) { diff --git a/ompi/mpi/fortran/base/strings.h b/ompi/mpi/fortran/base/strings.h index d15a11bea57..98c3c868847 100644 --- a/ompi/mpi/fortran/base/strings.h +++ b/ompi/mpi/fortran/base/strings.h @@ -5,15 +5,15 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2010-2012 Cisco Systems, Inc. All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -28,7 +28,7 @@ BEGIN_C_DECLS * * @param fstr Fortran string * @param len Fortran string length - * @param cstr Pointer to C string that will be created and returned + * @param cstr Pointer to C string that will be created and returned * * @retval OMPI_SUCCESS upon success * @retval OMPI_ERROR upon error @@ -86,7 +86,7 @@ BEGIN_C_DECLS * number_of_argv_arrays). Hence, the advance parameter is used * to specify this displacement. */ - OMPI_DECLSPEC int ompi_fortran_argv_f2c(char *farray, int string_len, + OMPI_DECLSPEC int ompi_fortran_argv_f2c(char *farray, int string_len, int advancex, char ***cargv); /** diff --git a/ompi/mpi/fortran/base/test_constants_f.c b/ompi/mpi/fortran/base/test_constants_f.c index 8c2ba2491fd..59a5630fabb 100644 --- a/ompi/mpi/fortran/base/test_constants_f.c +++ b/ompi/mpi/fortran/base/test_constants_f.c @@ -5,15 +5,15 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2006-2012 Cisco Systems, Inc. All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ diff --git a/ompi/mpi/fortran/configure-fortran-output-bottom.h b/ompi/mpi/fortran/configure-fortran-output-bottom.h index d79151b5b5c..895f0496d7f 100644 --- a/ompi/mpi/fortran/configure-fortran-output-bottom.h +++ b/ompi/mpi/fortran/configure-fortran-output-bottom.h @@ -7,7 +7,7 @@ ! $COPYRIGHT$ ! ! Additional copyrights may follow -! +! ! $HEADER$ ! diff --git a/ompi/mpi/fortran/configure-fortran-output.h.in b/ompi/mpi/fortran/configure-fortran-output.h.in index 5ef9848cbc9..9f40f5344f3 100644 --- a/ompi/mpi/fortran/configure-fortran-output.h.in +++ b/ompi/mpi/fortran/configure-fortran-output.h.in @@ -7,7 +7,7 @@ ! $COPYRIGHT$ ! ! Additional copyrights may follow -! +! ! $HEADER$ ! diff --git a/ompi/mpi/fortran/mpiext/Makefile.am b/ompi/mpi/fortran/mpiext/Makefile.am index 879e7f018ab..542e7d47e19 100644 --- a/ompi/mpi/fortran/mpiext/Makefile.am +++ b/ompi/mpi/fortran/mpiext/Makefile.am @@ -1,9 +1,9 @@ # # Copyright (c) 2012 Cisco Systems, Inc. All rights reserved. # $COPYRIGHT$ -# +# # Additional copyrights may follow -# +# # $HEADER$ # diff --git a/ompi/mpi/fortran/mpif-h/Makefile.am b/ompi/mpi/fortran/mpif-h/Makefile.am index c9fc687e2fc..437adcb1228 100644 --- a/ompi/mpi/fortran/mpif-h/Makefile.am +++ b/ompi/mpi/fortran/mpif-h/Makefile.am @@ -5,21 +5,22 @@ # Copyright (c) 2004-2013 The University of Tennessee and The University # of Tennessee Research Foundation. All rights # reserved. -# Copyright (c) 2004-2009 High Performance Computing Center Stuttgart, +# Copyright (c) 2004-2009 High Performance Computing Center Stuttgart, # University of Stuttgart. All rights reserved. # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. -# Copyright (c) 2006-2014 Cisco Systems, Inc. All rights reserved. +# Copyright (c) 2006-2015 Cisco Systems, Inc. All rights reserved. # Copyright (c) 2011-2013 Inria. All rights reserved. # Copyright (c) 2011-2013 Universite Bordeaux 1 # Copyright (c) 2013-2014 Los Alamos National Security, LLC. All rights # reserved. # Copyright (c) 2015 Research Organization for Information Science # and Technology (RIST). All rights reserved. +# Copyright (c) 2016 IBM Corporation. All rights reserved. # $COPYRIGHT$ -# +# # Additional copyrights may follow -# +# # $HEADER$ # @@ -27,15 +28,7 @@ include $(top_srcdir)/Makefile.ompi-rules SUBDIRS = profile -# -# OMPI_PRPOFILING_DEFINES flag is enabled when we want our MPI_* symbols -# to be replaced by PMPI_*. In other words, this flag decides -# whether "profile/defines.h" is included or not. "profile/defines.h" -# replaces all MPI_* symbols with PMPI_* symbols. In this directory, -# we need it to be 0 -# - -AM_CPPFLAGS = -DOMPI_PROFILE_LAYER=0 -DOMPI_COMPILING_FORTRAN_WRAPPERS=1 +AM_CPPFLAGS = -DOMPI_BUILD_MPI_PROFILING=0 -DOMPI_COMPILING_FORTRAN_WRAPPERS=1 # The top directory only builds MPI_* bindings and some support # glue. The bottom directory only builds PMPI_* bindings. Each @@ -55,18 +48,25 @@ AM_CPPFLAGS = -DOMPI_PROFILE_LAYER=0 -DOMPI_COMPILING_FORTRAN_WRAPPERS=1 lib_LTLIBRARIES = CLEANFILES = -libmpi_mpifh_la_LIBADD = $(top_builddir)/ompi/libmpi.la $(OMPI_MPIEXT_MPIFH_LIBS) -libmpi_mpifh_la_LDFLAGS = -version-info $(libmpi_mpifh_so_version) +# Note that we invoke some OPAL functions directly in libmpi_mpifh.la, +# so we need to link in the OPAL library directly (pulling it in +# indirectly via libmpi.la does not work on all platforms). +lib@OMPI_LIBMPI_NAME@_mpifh_la_LIBADD = \ + $(top_builddir)/ompi/lib@OMPI_LIBMPI_NAME@.la \ + $(OMPI_MPIEXT_MPIFH_LIBS) \ + $(OMPI_TOP_BUILDDIR)/opal/lib@OPAL_LIB_PREFIX@open-pal.la + +lib@OMPI_LIBMPI_NAME@_mpifh_la_LDFLAGS = -version-info $(libmpi_mpifh_so_version) # Are we building the mpif.h bindings at all? if OMPI_BUILD_FORTRAN_MPIFH_BINDINGS # If yes, then we need to build the installable library and the glue # convenience library that will be sucked up into the main libmpi. -lib_LTLIBRARIES += libmpi_mpifh.la +lib_LTLIBRARIES += lib@OMPI_LIBMPI_NAME@_mpifh.la # Do we need to suck in the convenience library from the lower # directory? if BUILD_PMPI_FORTRAN_MPIFH_BINDINGS_LAYER -libmpi_mpifh_la_LIBADD += profile/libmpi_mpifh_pmpi.la +lib@OMPI_LIBMPI_NAME@_mpifh_la_LIBADD += profile/libmpi_mpifh_pmpi.la endif endif @@ -78,7 +78,7 @@ headers = \ # # These files are only built and added to libmpi_mpifh.la in certain cases. # -libmpi_mpifh_la_SOURCES = +lib@OMPI_LIBMPI_NAME@_mpifh_la_SOURCES = # sizeof_f.f90 is generated based on some results from configure tests. CLEANFILES += sizeof_f.f90 @@ -108,7 +108,7 @@ if BUILD_FORTRAN_SIZEOF noinst_LTLIBRARIES += libmpi_mpifh_sizeof.la # Do not dist this file; it is generated nodist_libmpi_mpifh_sizeof_la_SOURCES = sizeof_f.f90 -libmpi_mpifh_la_LIBADD += libmpi_mpifh_sizeof.la +lib@OMPI_LIBMPI_NAME@_mpifh_la_LIBADD += libmpi_mpifh_sizeof.la endif sizeof_pl = $(top_srcdir)/ompi/mpi/fortran/base/gen-mpi-sizeof.pl @@ -124,12 +124,14 @@ sizeof_f.f90: --complex32=$(OMPI_HAVE_FORTRAN_COMPLEX32) if BUILD_MPI_FORTRAN_MPIFH_BINDINGS_LAYER -libmpi_mpifh_la_SOURCES += \ +lib@OMPI_LIBMPI_NAME@_mpifh_la_SOURCES += \ abort_f.c \ add_error_class_f.c \ add_error_code_f.c \ add_error_string_f.c \ address_f.c \ + aint_add_f.c \ + aint_diff_f.c \ allgather_f.c \ allgatherv_f.c \ alloc_mem_f.c \ @@ -430,7 +432,7 @@ libmpi_mpifh_la_SOURCES += \ win_flush_local_all_f.c if OMPI_PROVIDE_MPI_FILE_INTERFACE -libmpi_mpifh_la_SOURCES += \ +lib@OMPI_LIBMPI_NAME@_mpifh_la_SOURCES += \ file_call_errhandler_f.c \ file_close_f.c \ file_create_errhandler_f.c \ @@ -448,9 +450,13 @@ libmpi_mpifh_la_SOURCES += \ file_get_view_f.c \ file_iread_at_f.c \ file_iread_f.c \ + file_iread_at_all_f.c \ + file_iread_all_f.c \ file_iread_shared_f.c \ file_iwrite_at_f.c \ file_iwrite_f.c \ + file_iwrite_at_all_f.c \ + file_iwrite_all_f.c \ file_iwrite_shared_f.c \ file_open_f.c \ file_preallocate_f.c \ diff --git a/ompi/mpi/fortran/mpif-h/abort_f.c b/ompi/mpi/fortran/mpif-h/abort_f.c index c28ff5c284b..917ad13f13d 100644 --- a/ompi/mpi/fortran/mpif-h/abort_f.c +++ b/ompi/mpi/fortran/mpif-h/abort_f.c @@ -5,15 +5,17 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2011-2012 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -21,7 +23,8 @@ #include "ompi/mpi/fortran/mpif-h/bindings.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILE_LAYER +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak PMPI_ABORT = ompi_abort_f #pragma weak pmpi_abort = ompi_abort_f #pragma weak pmpi_abort_ = ompi_abort_f @@ -29,7 +32,7 @@ #pragma weak PMPI_Abort_f = ompi_abort_f #pragma weak PMPI_Abort_f08 = ompi_abort_f -#elif OMPI_PROFILE_LAYER +#else OMPI_GENERATE_F77_BINDINGS(PMPI_ABORT, pmpi_abort, pmpi_abort_, @@ -38,6 +41,7 @@ OMPI_GENERATE_F77_BINDINGS(PMPI_ABORT, (MPI_Fint *comm, MPI_Fint *errorcode, MPI_Fint *ierr), (comm, errorcode, ierr) ) #endif +#endif #if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_ABORT = ompi_abort_f @@ -47,9 +51,8 @@ OMPI_GENERATE_F77_BINDINGS(PMPI_ABORT, #pragma weak MPI_Abort_f = ompi_abort_f #pragma weak MPI_Abort_f08 = ompi_abort_f -#endif - -#if ! OPAL_HAVE_WEAK_SYMBOLS && ! OMPI_PROFILE_LAYER +#else +#if ! OMPI_BUILD_MPI_PROFILING OMPI_GENERATE_F77_BINDINGS(MPI_ABORT, mpi_abort, mpi_abort_, @@ -57,19 +60,18 @@ OMPI_GENERATE_F77_BINDINGS(MPI_ABORT, ompi_abort_f, (MPI_Fint *comm, MPI_Fint *errorcode, MPI_Fint *ierr), (comm, errorcode, ierr) ) +#else +#define ompi_abort_f pompi_abort_f #endif - - -#if OMPI_PROFILE_LAYER && ! OPAL_HAVE_WEAK_SYMBOLS -#include "ompi/mpi/fortran/mpif-h/profile/defines.h" #endif + void ompi_abort_f(MPI_Fint *comm, MPI_Fint *errorcode, MPI_Fint *ierr) { int ierr_c; - MPI_Comm c_comm = MPI_Comm_f2c(*comm); - - ierr_c = MPI_Abort(c_comm, OMPI_FINT_2_INT(*errorcode)); + MPI_Comm c_comm = PMPI_Comm_f2c(*comm); + + ierr_c = PMPI_Abort(c_comm, OMPI_FINT_2_INT(*errorcode)); if (NULL != ierr) *ierr = OMPI_INT_2_FINT(ierr_c); } diff --git a/ompi/mpi/fortran/mpif-h/accumulate_f.c b/ompi/mpi/fortran/mpif-h/accumulate_f.c index 643167f1dce..c9b6460a683 100644 --- a/ompi/mpi/fortran/mpif-h/accumulate_f.c +++ b/ompi/mpi/fortran/mpif-h/accumulate_f.c @@ -5,15 +5,17 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2011-2012 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -23,7 +25,8 @@ #include "ompi/mpi/fortran/base/constants.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILE_LAYER +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak PMPI_ACCUMULATE = ompi_accumulate_f #pragma weak pmpi_accumulate = ompi_accumulate_f #pragma weak pmpi_accumulate_ = ompi_accumulate_f @@ -31,7 +34,7 @@ #pragma weak PMPI_Accumulate_f = ompi_accumulate_f #pragma weak PMPI_Accumulate_f08 = ompi_accumulate_f -#elif OMPI_PROFILE_LAYER +#else OMPI_GENERATE_F77_BINDINGS (PMPI_ACCUMULATE, pmpi_accumulate, pmpi_accumulate_, @@ -40,6 +43,7 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_ACCUMULATE, (char *origin_addr, MPI_Fint *origin_count, MPI_Fint *origin_datatype, MPI_Fint *target_rank, MPI_Aint *target_disp, MPI_Fint *target_count, MPI_Fint *target_datatype, MPI_Fint *op, MPI_Fint *win, MPI_Fint *ierr), (origin_addr, origin_count, origin_datatype, target_rank, target_disp, target_count, target_datatype, op, win, ierr) ) #endif +#endif #if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_ACCUMULATE = ompi_accumulate_f @@ -49,9 +53,8 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_ACCUMULATE, #pragma weak MPI_Accumulate_f = ompi_accumulate_f #pragma weak MPI_Accumulate_f08 = ompi_accumulate_f -#endif - -#if ! OPAL_HAVE_WEAK_SYMBOLS && ! OMPI_PROFILE_LAYER +#else +#if ! OMPI_BUILD_MPI_PROFILING OMPI_GENERATE_F77_BINDINGS (MPI_ACCUMULATE, mpi_accumulate, mpi_accumulate_, @@ -59,13 +62,12 @@ OMPI_GENERATE_F77_BINDINGS (MPI_ACCUMULATE, ompi_accumulate_f, (char *origin_addr, MPI_Fint *origin_count, MPI_Fint *origin_datatype, MPI_Fint *target_rank, MPI_Aint *target_disp, MPI_Fint *target_count, MPI_Fint *target_datatype, MPI_Fint *op, MPI_Fint *win, MPI_Fint *ierr), (origin_addr, origin_count, origin_datatype, target_rank, target_disp, target_count, target_datatype, op, win, ierr) ) +#else +#define ompi_accumulate_f pompi_accumulate_f #endif - - -#if OMPI_PROFILE_LAYER && ! OPAL_HAVE_WEAK_SYMBOLS -#include "ompi/mpi/fortran/mpif-h/profile/defines.h" #endif + void ompi_accumulate_f(char *origin_addr, MPI_Fint *origin_count, MPI_Fint *origin_datatype, MPI_Fint *target_rank, MPI_Aint *target_disp, MPI_Fint *target_count, @@ -74,18 +76,18 @@ void ompi_accumulate_f(char *origin_addr, MPI_Fint *origin_count, { int ierr_c; - MPI_Datatype c_origin_datatype = MPI_Type_f2c(*origin_datatype); - MPI_Datatype c_target_datatype = MPI_Type_f2c(*target_datatype); - MPI_Win c_win = MPI_Win_f2c(*win); - MPI_Op c_op = MPI_Op_f2c(*op); + MPI_Datatype c_origin_datatype = PMPI_Type_f2c(*origin_datatype); + MPI_Datatype c_target_datatype = PMPI_Type_f2c(*target_datatype); + MPI_Win c_win = PMPI_Win_f2c(*win); + MPI_Op c_op = PMPI_Op_f2c(*op); - ierr_c = MPI_Accumulate(OMPI_F2C_BOTTOM(origin_addr), - OMPI_FINT_2_INT(*origin_count), - c_origin_datatype, - OMPI_FINT_2_INT(*target_rank), - *target_disp, - OMPI_FINT_2_INT(*target_count), - c_target_datatype, c_op, c_win); + ierr_c = PMPI_Accumulate(OMPI_F2C_BOTTOM(origin_addr), + OMPI_FINT_2_INT(*origin_count), + c_origin_datatype, + OMPI_FINT_2_INT(*target_rank), + *target_disp, + OMPI_FINT_2_INT(*target_count), + c_target_datatype, c_op, c_win); if (NULL != ierr) *ierr = OMPI_INT_2_FINT(ierr_c); } diff --git a/ompi/mpi/fortran/mpif-h/add_error_class_f.c b/ompi/mpi/fortran/mpif-h/add_error_class_f.c index 6c791d270ce..6ce7e0f12d9 100644 --- a/ompi/mpi/fortran/mpif-h/add_error_class_f.c +++ b/ompi/mpi/fortran/mpif-h/add_error_class_f.c @@ -5,15 +5,17 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2011-2012 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -21,7 +23,8 @@ #include "ompi/mpi/fortran/mpif-h/bindings.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILE_LAYER +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak PMPI_ADD_ERROR_CLASS = ompi_add_error_class_f #pragma weak pmpi_add_error_class = ompi_add_error_class_f #pragma weak pmpi_add_error_class_ = ompi_add_error_class_f @@ -29,7 +32,7 @@ #pragma weak PMPI_Add_error_class_f = ompi_add_error_class_f #pragma weak PMPI_Add_error_class_f08 = ompi_add_error_class_f -#elif OMPI_PROFILE_LAYER +#else OMPI_GENERATE_F77_BINDINGS (PMPI_ADD_ERROR_CLASS, pmpi_add_error_class, pmpi_add_error_class_, @@ -38,6 +41,7 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_ADD_ERROR_CLASS, (MPI_Fint *errorclass, MPI_Fint *ierr), (errorclass, ierr) ) #endif +#endif #if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_ADD_ERROR_CLASS = ompi_add_error_class_f @@ -47,9 +51,8 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_ADD_ERROR_CLASS, #pragma weak MPI_Add_error_class_f = ompi_add_error_class_f #pragma weak MPI_Add_error_class_f08 = ompi_add_error_class_f -#endif - -#if ! OPAL_HAVE_WEAK_SYMBOLS && ! OMPI_PROFILE_LAYER +#else +#if ! OMPI_BUILD_MPI_PROFILING OMPI_GENERATE_F77_BINDINGS (MPI_ADD_ERROR_CLASS, mpi_add_error_class, mpi_add_error_class_, @@ -57,19 +60,18 @@ OMPI_GENERATE_F77_BINDINGS (MPI_ADD_ERROR_CLASS, ompi_add_error_class_f, (MPI_Fint *errorclass, MPI_Fint *ierr), (errorclass, ierr) ) +#else +#define ompi_add_error_class_f pompi_add_error_class_f #endif - - -#if OMPI_PROFILE_LAYER && ! OPAL_HAVE_WEAK_SYMBOLS -#include "ompi/mpi/fortran/mpif-h/profile/defines.h" #endif + void ompi_add_error_class_f(MPI_Fint *errorclass, MPI_Fint *ierr) { int ierr_c; OMPI_SINGLE_NAME_DECL(errorclass); - ierr_c = MPI_Add_error_class(OMPI_SINGLE_NAME_CONVERT(errorclass)); + ierr_c = PMPI_Add_error_class(OMPI_SINGLE_NAME_CONVERT(errorclass)); if (NULL != ierr) *ierr = OMPI_INT_2_FINT(ierr_c); if (MPI_SUCCESS == ierr_c) { diff --git a/ompi/mpi/fortran/mpif-h/add_error_code_f.c b/ompi/mpi/fortran/mpif-h/add_error_code_f.c index e0d66e14432..818b7c1ced7 100644 --- a/ompi/mpi/fortran/mpif-h/add_error_code_f.c +++ b/ompi/mpi/fortran/mpif-h/add_error_code_f.c @@ -5,15 +5,17 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2011-2012 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -21,7 +23,8 @@ #include "ompi/mpi/fortran/mpif-h/bindings.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILE_LAYER +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak PMPI_ADD_ERROR_CODE = ompi_add_error_code_f #pragma weak pmpi_add_error_code = ompi_add_error_code_f #pragma weak pmpi_add_error_code_ = ompi_add_error_code_f @@ -29,7 +32,7 @@ #pragma weak PMPI_Add_error_code_f = ompi_add_error_code_f #pragma weak PMPI_Add_error_code_f08 = ompi_add_error_code_f -#elif OMPI_PROFILE_LAYER +#else OMPI_GENERATE_F77_BINDINGS (PMPI_ADD_ERROR_CODE, pmpi_add_error_code, pmpi_add_error_code_, @@ -38,6 +41,7 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_ADD_ERROR_CODE, (MPI_Fint *errorclass, MPI_Fint *errorcode, MPI_Fint *ierr), (errorclass, errorcode, ierr) ) #endif +#endif #if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_ADD_ERROR_CODE = ompi_add_error_code_f @@ -47,9 +51,8 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_ADD_ERROR_CODE, #pragma weak MPI_Add_error_code_f = ompi_add_error_code_f #pragma weak MPI_Add_error_code_f08 = ompi_add_error_code_f -#endif - -#if ! OPAL_HAVE_WEAK_SYMBOLS && ! OMPI_PROFILE_LAYER +#else +#if ! OMPI_BUILD_MPI_PROFILING OMPI_GENERATE_F77_BINDINGS (MPI_ADD_ERROR_CODE, mpi_add_error_code, mpi_add_error_code_, @@ -57,21 +60,20 @@ OMPI_GENERATE_F77_BINDINGS (MPI_ADD_ERROR_CODE, ompi_add_error_code_f, (MPI_Fint *errorclass, MPI_Fint *errorcode, MPI_Fint *ierr), (errorclass, errorcode, ierr) ) +#else +#define ompi_add_error_code_f pompi_add_error_code_f #endif - - -#if OMPI_PROFILE_LAYER && ! OPAL_HAVE_WEAK_SYMBOLS -#include "ompi/mpi/fortran/mpif-h/profile/defines.h" #endif + void ompi_add_error_code_f(MPI_Fint *errorclass, MPI_Fint *errorcode, MPI_Fint *ierr) { int ierr_c; OMPI_SINGLE_NAME_DECL(errorcode); - ierr_c = MPI_Add_error_code(OMPI_FINT_2_INT(*errorclass), - OMPI_SINGLE_NAME_CONVERT(errorcode) - ); + ierr_c = PMPI_Add_error_code(OMPI_FINT_2_INT(*errorclass), + OMPI_SINGLE_NAME_CONVERT(errorcode) + ); if (NULL != ierr) *ierr = OMPI_INT_2_FINT(ierr_c); if (MPI_SUCCESS == ierr_c) { diff --git a/ompi/mpi/fortran/mpif-h/add_error_string_f.c b/ompi/mpi/fortran/mpif-h/add_error_string_f.c index 3c3f112ceb3..24a854dd338 100644 --- a/ompi/mpi/fortran/mpif-h/add_error_string_f.c +++ b/ompi/mpi/fortran/mpif-h/add_error_string_f.c @@ -5,15 +5,17 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2006-2012 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -24,7 +26,8 @@ #include "ompi/mpi/fortran/base/strings.h" #include "ompi/communicator/communicator.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILE_LAYER +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak PMPI_ADD_ERROR_STRING = ompi_add_error_string_f #pragma weak pmpi_add_error_string = ompi_add_error_string_f #pragma weak pmpi_add_error_string_ = ompi_add_error_string_f @@ -32,7 +35,7 @@ #pragma weak PMPI_Add_error_string_f = ompi_add_error_string_f #pragma weak PMPI_Add_error_string_f08 = ompi_add_error_string_f -#elif OMPI_PROFILE_LAYER +#else OMPI_GENERATE_F77_BINDINGS (PMPI_ADD_ERROR_STRING, pmpi_add_error_string, pmpi_add_error_string_, @@ -41,6 +44,7 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_ADD_ERROR_STRING, (MPI_Fint *errorcode, char *string, MPI_Fint *ierr,int l), (errorcode, string, ierr, l) ) #endif +#endif #if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_ADD_ERROR_STRING = ompi_add_error_string_f @@ -50,9 +54,8 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_ADD_ERROR_STRING, #pragma weak MPI_Add_error_string_f = ompi_add_error_string_f #pragma weak MPI_Add_error_string_f08 = ompi_add_error_string_f -#endif - -#if ! OPAL_HAVE_WEAK_SYMBOLS && ! OMPI_PROFILE_LAYER +#else +#if ! OMPI_BUILD_MPI_PROFILING OMPI_GENERATE_F77_BINDINGS (MPI_ADD_ERROR_STRING, mpi_add_error_string, mpi_add_error_string_, @@ -60,13 +63,12 @@ OMPI_GENERATE_F77_BINDINGS (MPI_ADD_ERROR_STRING, ompi_add_error_string_f, (MPI_Fint *errorcode, char *string, MPI_Fint *ierr, int l), (errorcode, string, ierr, l) ) +#else +#define ompi_add_error_string_f pompi_add_error_string_f #endif - - -#if OMPI_PROFILE_LAYER && ! OPAL_HAVE_WEAK_SYMBOLS -#include "ompi/mpi/fortran/mpif-h/profile/defines.h" #endif + void ompi_add_error_string_f(MPI_Fint *errorcode, char *string, MPI_Fint *ierr, int len) { @@ -81,7 +83,7 @@ void ompi_add_error_string_f(MPI_Fint *errorcode, char *string, } ompi_fortran_string_f2c(string, len, &c_string); - ierr_c = MPI_Add_error_string(OMPI_FINT_2_INT(*errorcode), c_string); + ierr_c = PMPI_Add_error_string(OMPI_FINT_2_INT(*errorcode), c_string); if (NULL != ierr) *ierr = OMPI_INT_2_FINT(ierr_c); free(c_string); } diff --git a/ompi/mpi/fortran/mpif-h/address_f.c b/ompi/mpi/fortran/mpif-h/address_f.c index e0128f265ab..85d1369ae2d 100644 --- a/ompi/mpi/fortran/mpif-h/address_f.c +++ b/ompi/mpi/fortran/mpif-h/address_f.c @@ -5,15 +5,17 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2011-2012 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -21,7 +23,8 @@ #include "ompi/mpi/fortran/mpif-h/bindings.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILE_LAYER +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak PMPI_ADDRESS = ompi_address_f #pragma weak pmpi_address = ompi_address_f #pragma weak pmpi_address_ = ompi_address_f @@ -29,7 +32,7 @@ #pragma weak PMPI_Address_f = ompi_address_f #pragma weak PMPI_Address_f08 = ompi_address_f -#elif OMPI_PROFILE_LAYER +#else OMPI_GENERATE_F77_BINDINGS (PMPI_ADDRESS, pmpi_address, pmpi_address_, @@ -38,6 +41,7 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_ADDRESS, (char *location, MPI_Fint *address, MPI_Fint *ierr), (location, address, ierr) ) #endif +#endif #if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_ADDRESS = ompi_address_f @@ -47,9 +51,8 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_ADDRESS, #pragma weak MPI_Address_f = ompi_address_f #pragma weak MPI_Address_f08 = ompi_address_f -#endif - -#if ! OPAL_HAVE_WEAK_SYMBOLS && ! OMPI_PROFILE_LAYER +#else +#if ! OMPI_BUILD_MPI_PROFILING OMPI_GENERATE_F77_BINDINGS (MPI_ADDRESS, mpi_address, mpi_address_, @@ -57,19 +60,18 @@ OMPI_GENERATE_F77_BINDINGS (MPI_ADDRESS, ompi_address_f, (char *location, MPI_Fint *address, MPI_Fint *ierr), (location, address, ierr) ) +#else +#define ompi_address_f pompi_address_f #endif - - -#if OMPI_PROFILE_LAYER && ! OPAL_HAVE_WEAK_SYMBOLS -#include "ompi/mpi/fortran/mpif-h/profile/defines.h" #endif + void ompi_address_f(char *location, MPI_Fint *address, MPI_Fint *ierr) { int ierr_c; MPI_Aint addr; - ierr_c = MPI_Address(location, &addr); + ierr_c = PMPI_Address(location, &addr); if (NULL != ierr) *ierr = OMPI_INT_2_FINT(ierr_c); if (MPI_SUCCESS == ierr_c) { diff --git a/ompi/mpi/fortran/mpif-h/aint_add_f.c b/ompi/mpi/fortran/mpif-h/aint_add_f.c new file mode 100644 index 00000000000..3f5af406070 --- /dev/null +++ b/ompi/mpi/fortran/mpif-h/aint_add_f.c @@ -0,0 +1,73 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ +/* + * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2005 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2011-2012 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Los Alamos National Security, LLC. All rights + * reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include "ompi_config.h" + +#include "ompi/mpi/fortran/mpif-h/bindings.h" + +/* The OMPI_GENERATE_F77_BINDINGS work only for the most common F77 bindings, the + * one that does not return any value. There are 4 exceptions MPI_Wtick, MPI_Wtime, + * MPI_Aint_add, and MPI_Aint_diff. For these 4 we can insert the bindings + * manually. + */ +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS +#pragma weak PMPI_AINT_ADD = ompi_aint_add_f +#pragma weak pmpi_aint_add = ompi_aint_add_f +#pragma weak pmpi_aint_add_ = ompi_aint_add_f +#pragma weak pmpi_aint_add__ = ompi_aint_add_f + +#pragma weak PMPI_Aint_add_f = ompi_aint_add_f +#pragma weak PMPI_Aint_add_f08 = ompi_aint_add_f +#else +MPI_Aint PMPI_AINT_ADD(MPI_Aint *base, MPI_Aint *diff) { return pompi_aint_add_f(base, diff); } +MPI_Aint pmpi_aint_add(MPI_Aint *base, MPI_Aint *diff) { return pompi_aint_add_f(base, diff); } +MPI_Aint pmpi_aint_add_(MPI_Aint *base, MPI_Aint *diff) { return pompi_aint_add_f(base, diff); } +MPI_Aint pmpi_aint_add__(MPI_Aint *base, MPI_Aint *diff) { return pompi_aint_add_f(base, diff); } +#endif +#endif + +#if OPAL_HAVE_WEAK_SYMBOLS +#pragma weak MPI_AINT_ADD = ompi_aint_add_f +#pragma weak mpi_aint_add = ompi_aint_add_f +#pragma weak mpi_aint_add_ = ompi_aint_add_f +#pragma weak mpi_aint_add__ = ompi_aint_add_f + +#pragma weak MPI_Aint_add_f = ompi_aint_add_f +#pragma weak MPI_Aint_add_f08 = ompi_aint_add_f +#else +#if ! OMPI_BUILD_MPI_PROFILING +MPI_Aint MPI_AINT_ADD(MPI_Aint *base, MPI_Aint *diff) { return ompi_aint_add_f(base, diff); } +MPI_Aint mpi_aint_add(MPI_Aint *base, MPI_Aint *diff) { return ompi_aint_add_f(base, diff); } +MPI_Aint mpi_aint_add_(MPI_Aint *base, MPI_Aint *diff) { return ompi_aint_add_f(base, diff); } +MPI_Aint mpi_aint_add__(MPI_Aint *base, MPI_Aint *diff) { return ompi_aint_add_f(base, diff); } +#else +#define ompi_aint_add_f pompi_aint_add_f +#endif +#endif + +MPI_Aint ompi_aint_add_f(MPI_Aint *base, MPI_Aint *diff) +{ + return MPI_Aint_add (*base, *diff); +} diff --git a/ompi/mpi/fortran/mpif-h/aint_diff_f.c b/ompi/mpi/fortran/mpif-h/aint_diff_f.c new file mode 100644 index 00000000000..c9c8ca43a07 --- /dev/null +++ b/ompi/mpi/fortran/mpif-h/aint_diff_f.c @@ -0,0 +1,73 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ +/* + * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2005 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2011-2012 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Los Alamos National Security, LLC. All rights + * reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include "ompi_config.h" + +#include "ompi/mpi/fortran/mpif-h/bindings.h" + +/* The OMPI_GENERATE_F77_BINDINGS work only for the most common F77 bindings, the + * one that does not return any value. There are 4 exceptions MPI_Wtick, MPI_Wtime, + * MPI_Aint_add, and MPI_Aint_diff. For these 4 we can insert the bindings + * manually. + */ +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS +#pragma weak PMPI_AINT_DIFF = ompi_aint_diff_f +#pragma weak pmpi_aint_diff = ompi_aint_diff_f +#pragma weak pmpi_aint_diff_ = ompi_aint_diff_f +#pragma weak pmpi_aint_diff__ = ompi_aint_diff_f + +#pragma weak PMPI_Aint_diff_f = ompi_aint_diff_f +#pragma weak PMPI_Aint_diff_f08 = ompi_aint_diff_f +#else +MPI_Aint PMPI_AINT_DIFF(MPI_Aint *addr1, MPI_Aint *addr2) { return pompi_aint_diff_f(addr1, addr2); } +MPI_Aint pmpi_aint_diff(MPI_Aint *addr1, MPI_Aint *addr2) { return pompi_aint_diff_f(addr1, addr2); } +MPI_Aint pmpi_aint_diff_(MPI_Aint *addr1, MPI_Aint *addr2) { return pompi_aint_diff_f(addr1, addr2); } +MPI_Aint pmpi_aint_diff__(MPI_Aint *addr1, MPI_Aint *addr2) { return pompi_aint_diff_f(addr1, addr2); } +#endif +#endif + +#if OPAL_HAVE_WEAK_SYMBOLS +#pragma weak MPI_AINT_DIFF = ompi_aint_diff_f +#pragma weak mpi_aint_diff = ompi_aint_diff_f +#pragma weak mpi_aint_diff_ = ompi_aint_diff_f +#pragma weak mpi_aint_diff__ = ompi_aint_diff_f + +#pragma weak MPI_Aint_diff_f = ompi_aint_diff_f +#pragma weak MPI_Aint_diff_f08 = ompi_aint_diff_f +#else +#if ! OMPI_BUILD_MPI_PROFILING +MPI_Aint MPI_AINT_DIFF(MPI_Aint *addr1, MPI_Aint *addr2) { return ompi_aint_diff_f(addr1, addr2); } +MPI_Aint mpi_aint_diff(MPI_Aint *addr1, MPI_Aint *addr2) { return ompi_aint_diff_f(addr1, addr2); } +MPI_Aint mpi_aint_diff_(MPI_Aint *addr1, MPI_Aint *addr2) { return ompi_aint_diff_f(addr1, addr2); } +MPI_Aint mpi_aint_diff__(MPI_Aint *addr1, MPI_Aint *addr2) { return ompi_aint_diff_f(addr1, addr2); } +#else +#define ompi_aint_diff_f pompi_aint_diff_f +#endif +#endif + +MPI_Aint ompi_aint_diff_f(MPI_Aint *addr1, MPI_Aint *addr2) +{ + return MPI_Aint_diff (*addr1, *addr2); +} diff --git a/ompi/mpi/fortran/mpif-h/allgather_f.c b/ompi/mpi/fortran/mpif-h/allgather_f.c index c426d422cd8..1e9c56caaf9 100644 --- a/ompi/mpi/fortran/mpif-h/allgather_f.c +++ b/ompi/mpi/fortran/mpif-h/allgather_f.c @@ -5,15 +5,17 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2011-2012 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -22,7 +24,8 @@ #include "ompi/mpi/fortran/mpif-h/bindings.h" #include "ompi/mpi/fortran/base/constants.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILE_LAYER +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak PMPI_ALLGATHER = ompi_allgather_f #pragma weak pmpi_allgather = ompi_allgather_f #pragma weak pmpi_allgather_ = ompi_allgather_f @@ -30,7 +33,7 @@ #pragma weak PMPI_Allgather_f = ompi_allgather_f #pragma weak PMPI_Allgather_f08 = ompi_allgather_f -#elif OMPI_PROFILE_LAYER +#else OMPI_GENERATE_F77_BINDINGS (PMPI_ALLGATHER, pmpi_allgather, pmpi_allgather_, @@ -39,6 +42,7 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_ALLGATHER, (char *sendbuf, MPI_Fint *sendcount, MPI_Fint *sendtype, char *recvbuf, MPI_Fint *recvcount, MPI_Fint *recvtype, MPI_Fint *comm, MPI_Fint *ierr), (sendbuf, sendcount, sendtype, recvbuf, recvcount, recvtype, comm, ierr) ) #endif +#endif #if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_ALLGATHER = ompi_allgather_f @@ -48,9 +52,8 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_ALLGATHER, #pragma weak MPI_Allgather_f = ompi_allgather_f #pragma weak MPI_Allgather_f08 = ompi_allgather_f -#endif - -#if ! OPAL_HAVE_WEAK_SYMBOLS && ! OMPI_PROFILE_LAYER +#else +#if ! OMPI_BUILD_MPI_PROFILING OMPI_GENERATE_F77_BINDINGS (MPI_ALLGATHER, mpi_allgather, mpi_allgather_, @@ -58,13 +61,12 @@ OMPI_GENERATE_F77_BINDINGS (MPI_ALLGATHER, ompi_allgather_f, (char *sendbuf, MPI_Fint *sendcount, MPI_Fint *sendtype, char *recvbuf, MPI_Fint *recvcount, MPI_Fint *recvtype, MPI_Fint *comm, MPI_Fint *ierr), (sendbuf, sendcount, sendtype, recvbuf, recvcount, recvtype, comm, ierr) ) +#else +#define ompi_allgather_f pompi_allgather_f #endif - - -#if OMPI_PROFILE_LAYER && ! OPAL_HAVE_WEAK_SYMBOLS -#include "ompi/mpi/fortran/mpif-h/profile/defines.h" #endif + void ompi_allgather_f(char *sendbuf, MPI_Fint *sendcount, MPI_Fint *sendtype, char *recvbuf, MPI_Fint *recvcount, MPI_Fint *recvtype, MPI_Fint *comm, MPI_Fint *ierr) @@ -73,20 +75,20 @@ void ompi_allgather_f(char *sendbuf, MPI_Fint *sendcount, MPI_Fint *sendtype, MPI_Comm c_comm; MPI_Datatype c_sendtype, c_recvtype; - c_comm = MPI_Comm_f2c(*comm); - c_sendtype = MPI_Type_f2c(*sendtype); - c_recvtype = MPI_Type_f2c(*recvtype); + c_comm = PMPI_Comm_f2c(*comm); + c_sendtype = PMPI_Type_f2c(*sendtype); + c_recvtype = PMPI_Type_f2c(*recvtype); sendbuf = (char *) OMPI_F2C_IN_PLACE(sendbuf); sendbuf = (char *) OMPI_F2C_BOTTOM(sendbuf); recvbuf = (char *) OMPI_F2C_BOTTOM(recvbuf); - ierr_c = MPI_Allgather(sendbuf, - OMPI_FINT_2_INT(*sendcount), - c_sendtype, - recvbuf, - OMPI_FINT_2_INT(*recvcount), - c_recvtype, c_comm); + ierr_c = PMPI_Allgather(sendbuf, + OMPI_FINT_2_INT(*sendcount), + c_sendtype, + recvbuf, + OMPI_FINT_2_INT(*recvcount), + c_recvtype, c_comm); if (NULL != ierr) *ierr = OMPI_INT_2_FINT(ierr_c); } diff --git a/ompi/mpi/fortran/mpif-h/allgatherv_f.c b/ompi/mpi/fortran/mpif-h/allgatherv_f.c index d31ef314036..7917136c0f1 100644 --- a/ompi/mpi/fortran/mpif-h/allgatherv_f.c +++ b/ompi/mpi/fortran/mpif-h/allgatherv_f.c @@ -5,15 +5,17 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2011-2012 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -22,7 +24,8 @@ #include "ompi/mpi/fortran/mpif-h/bindings.h" #include "ompi/mpi/fortran/base/constants.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILE_LAYER +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak PMPI_ALLGATHERV = ompi_allgatherv_f #pragma weak pmpi_allgatherv = ompi_allgatherv_f #pragma weak pmpi_allgatherv_ = ompi_allgatherv_f @@ -30,7 +33,7 @@ #pragma weak PMPI_Allgatherv_f = ompi_allgatherv_f #pragma weak PMPI_Allgatherv_f08 = ompi_allgatherv_f -#elif OMPI_PROFILE_LAYER +#else OMPI_GENERATE_F77_BINDINGS (PMPI_ALLGATHERV, pmpi_allgatherv, pmpi_allgatherv_, @@ -39,6 +42,7 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_ALLGATHERV, (char *sendbuf, MPI_Fint *sendcount, MPI_Fint *sendtype, char *recvbuf, MPI_Fint *recvcounts, MPI_Fint *displs, MPI_Fint *recvtype, MPI_Fint *comm, MPI_Fint *ierr), (sendbuf, sendcount, sendtype, recvbuf, recvcounts, displs, recvtype, comm, ierr) ) #endif +#endif #if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_ALLGATHERV = ompi_allgatherv_f @@ -48,9 +52,8 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_ALLGATHERV, #pragma weak MPI_Allgatherv_f = ompi_allgatherv_f #pragma weak MPI_Allgatherv_f08 = ompi_allgatherv_f -#endif - -#if ! OPAL_HAVE_WEAK_SYMBOLS && ! OMPI_PROFILE_LAYER +#else +#if ! OMPI_BUILD_MPI_PROFILING OMPI_GENERATE_F77_BINDINGS (MPI_ALLGATHERV, mpi_allgatherv, mpi_allgatherv_, @@ -58,13 +61,12 @@ OMPI_GENERATE_F77_BINDINGS (MPI_ALLGATHERV, ompi_allgatherv_f, (char *sendbuf, MPI_Fint *sendcount, MPI_Fint *sendtype, char *recvbuf, MPI_Fint *recvcounts, MPI_Fint *displs, MPI_Fint *recvtype, MPI_Fint *comm, MPI_Fint *ierr), (sendbuf, sendcount, sendtype, recvbuf, recvcounts, displs, recvtype, comm, ierr) ) +#else +#define ompi_allgatherv_f pompi_allgatherv_f #endif - - -#if OMPI_PROFILE_LAYER && ! OPAL_HAVE_WEAK_SYMBOLS -#include "ompi/mpi/fortran/mpif-h/profile/defines.h" #endif + void ompi_allgatherv_f(char *sendbuf, MPI_Fint *sendcount, MPI_Fint *sendtype, char *recvbuf, MPI_Fint *recvcounts, MPI_Fint *displs, MPI_Fint *recvtype, MPI_Fint *comm, MPI_Fint *ierr) @@ -75,11 +77,11 @@ void ompi_allgatherv_f(char *sendbuf, MPI_Fint *sendcount, MPI_Fint *sendtype, OMPI_ARRAY_NAME_DECL(recvcounts); OMPI_ARRAY_NAME_DECL(displs); - c_comm = MPI_Comm_f2c(*comm); - c_sendtype = MPI_Type_f2c(*sendtype); - c_recvtype = MPI_Type_f2c(*recvtype); + c_comm = PMPI_Comm_f2c(*comm); + c_sendtype = PMPI_Type_f2c(*sendtype); + c_recvtype = PMPI_Type_f2c(*recvtype); - MPI_Comm_size(c_comm, &size); + PMPI_Comm_size(c_comm, &size); OMPI_ARRAY_FINT_2_INT(recvcounts, size); OMPI_ARRAY_FINT_2_INT(displs, size); @@ -87,13 +89,13 @@ void ompi_allgatherv_f(char *sendbuf, MPI_Fint *sendcount, MPI_Fint *sendtype, sendbuf = (char *) OMPI_F2C_BOTTOM(sendbuf); recvbuf = (char *) OMPI_F2C_BOTTOM(recvbuf); - ierr_c = MPI_Allgatherv(sendbuf, - OMPI_FINT_2_INT(*sendcount), - c_sendtype, - recvbuf, - OMPI_ARRAY_NAME_CONVERT(recvcounts), - OMPI_ARRAY_NAME_CONVERT(displs), - c_recvtype, c_comm); + ierr_c = PMPI_Allgatherv(sendbuf, + OMPI_FINT_2_INT(*sendcount), + c_sendtype, + recvbuf, + OMPI_ARRAY_NAME_CONVERT(recvcounts), + OMPI_ARRAY_NAME_CONVERT(displs), + c_recvtype, c_comm); if (NULL != ierr) *ierr = OMPI_INT_2_FINT(ierr_c); diff --git a/ompi/mpi/fortran/mpif-h/alloc_mem_f.c b/ompi/mpi/fortran/mpif-h/alloc_mem_f.c index c3fc140bc21..75704508137 100644 --- a/ompi/mpi/fortran/mpif-h/alloc_mem_f.c +++ b/ompi/mpi/fortran/mpif-h/alloc_mem_f.c @@ -10,7 +10,7 @@ * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2011-2012 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2014 Research Organization for Information Science + * Copyright (c) 2014-2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * @@ -23,21 +23,24 @@ #include "ompi/mpi/fortran/mpif-h/bindings.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILE_LAYER +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak PMPI_ALLOC_MEM = ompi_alloc_mem_f #pragma weak pmpi_alloc_mem = ompi_alloc_mem_f #pragma weak pmpi_alloc_mem_ = ompi_alloc_mem_f #pragma weak pmpi_alloc_mem__ = ompi_alloc_mem_f -/* Extra pragmas for the _cptr variant from MPI-3.1 */ +#pragma weak PMPI_Alloc_mem_f = ompi_alloc_mem_f +#pragma weak PMPI_Alloc_mem_f08 = ompi_alloc_mem_f + #pragma weak PMPI_ALLOC_MEM_CPTR = ompi_alloc_mem_f #pragma weak pmpi_alloc_mem_cptr = ompi_alloc_mem_f #pragma weak pmpi_alloc_mem_cptr_ = ompi_alloc_mem_f #pragma weak pmpi_alloc_mem_cptr__ = ompi_alloc_mem_f -#pragma weak PMPI_Alloc_mem_f = ompi_alloc_mem_f -#pragma weak PMPI_Alloc_mem_f08 = ompi_alloc_mem_f -#elif OMPI_PROFILE_LAYER +#pragma weak PMPI_Alloc_mem_cptr_f = ompi_alloc_mem_f +#pragma weak PMPI_Alloc_mem_cptr_f08 = ompi_alloc_mem_f +#else OMPI_GENERATE_F77_BINDINGS (PMPI_ALLOC_MEM, pmpi_alloc_mem, pmpi_alloc_mem_, @@ -50,10 +53,11 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_ALLOC_MEM_CPTR, pmpi_alloc_mem_cptr, pmpi_alloc_mem_cptr_, pmpi_alloc_mem_cptr__, - pompi_alloc_mem_f, + pompi_alloc_mem_cptr_f, (MPI_Aint *size, MPI_Fint *info, char *baseptr, MPI_Fint *ierr), (size, info, baseptr, ierr) ) #endif +#endif #if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_ALLOC_MEM = ompi_alloc_mem_f @@ -61,17 +65,18 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_ALLOC_MEM_CPTR, #pragma weak mpi_alloc_mem_ = ompi_alloc_mem_f #pragma weak mpi_alloc_mem__ = ompi_alloc_mem_f -/* Extra pragmas for the _cptr variant from MPI-3.1 */ +#pragma weak MPI_Alloc_mem_f = ompi_alloc_mem_f +#pragma weak MPI_Alloc_mem_f08 = ompi_alloc_mem_f + #pragma weak MPI_ALLOC_MEM_CPTR = ompi_alloc_mem_f #pragma weak mpi_alloc_mem_cptr = ompi_alloc_mem_f #pragma weak mpi_alloc_mem_cptr_ = ompi_alloc_mem_f #pragma weak mpi_alloc_mem_cptr__ = ompi_alloc_mem_f -#pragma weak MPI_Alloc_mem_f = ompi_alloc_mem_f -#pragma weak MPI_Alloc_mem_f08 = ompi_alloc_mem_f -#endif - -#if ! OPAL_HAVE_WEAK_SYMBOLS && ! OMPI_PROFILE_LAYER +#pragma weak MPI_Alloc_mem_cptr_f = ompi_alloc_mem_f +#pragma weak MPI_Alloc_mem_cptr_f08 = ompi_alloc_mem_f +#else +#if ! OMPI_BUILD_MPI_PROFILING OMPI_GENERATE_F77_BINDINGS (MPI_ALLOC_MEM, mpi_alloc_mem, mpi_alloc_mem_, @@ -84,21 +89,30 @@ OMPI_GENERATE_F77_BINDINGS (MPI_ALLOC_MEM_CPTR, mpi_alloc_mem_cptr, mpi_alloc_mem_cptr_, mpi_alloc_mem_cptr__, - ompi_alloc_mem_f, + ompi_alloc_mem_cptr_f, (MPI_Aint *size, MPI_Fint *info, char *baseptr, MPI_Fint *ierr), (size, info, baseptr, ierr) ) +#else +#define ompi_alloc_mem_f pompi_alloc_mem_f +#define ompi_alloc_mem_cptr_f pompi_alloc_mem_cptr_f #endif - - -#if OMPI_PROFILE_LAYER && ! OPAL_HAVE_WEAK_SYMBOLS -#include "ompi/mpi/fortran/mpif-h/profile/defines.h" #endif + void ompi_alloc_mem_f(MPI_Aint *size, MPI_Fint *info, char *baseptr, MPI_Fint *ierr) { int ierr_c; - MPI_Info c_info = MPI_Info_f2c(*info); + MPI_Info c_info = PMPI_Info_f2c(*info); - ierr_c = MPI_Alloc_mem(*size, c_info, baseptr); + ierr_c = PMPI_Alloc_mem(*size, c_info, baseptr); if (NULL != ierr) *ierr = OMPI_INT_2_FINT(ierr_c); } + +/* + * Note that MPI-3 mandates a second form of the + * MPI_Alloc_mem interface -- one that has a "_cptr" suffix. + */ +void ompi_alloc_mem_cptr_f(MPI_Aint *size, MPI_Fint *info, char *baseptr, MPI_Fint *ierr) +{ + ompi_alloc_mem_f(size, info, baseptr, ierr); +} diff --git a/ompi/mpi/fortran/mpif-h/allreduce_f.c b/ompi/mpi/fortran/mpif-h/allreduce_f.c index 81cae296d65..b097e74c0b0 100644 --- a/ompi/mpi/fortran/mpif-h/allreduce_f.c +++ b/ompi/mpi/fortran/mpif-h/allreduce_f.c @@ -5,15 +5,17 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2011-2012 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -22,7 +24,8 @@ #include "ompi/mpi/fortran/mpif-h/bindings.h" #include "ompi/mpi/fortran/base/constants.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILE_LAYER +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak PMPI_ALLREDUCE = ompi_allreduce_f #pragma weak pmpi_allreduce = ompi_allreduce_f #pragma weak pmpi_allreduce_ = ompi_allreduce_f @@ -30,7 +33,7 @@ #pragma weak PMPI_Allreduce_f = ompi_allreduce_f #pragma weak PMPI_Allreduce_f08 = ompi_allreduce_f -#elif OMPI_PROFILE_LAYER +#else OMPI_GENERATE_F77_BINDINGS (PMPI_ALLREDUCE, pmpi_allreduce, pmpi_allreduce_, @@ -39,6 +42,7 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_ALLREDUCE, (char *sendbuf, char *recvbuf, MPI_Fint *count, MPI_Fint *datatype, MPI_Fint *op, MPI_Fint *comm, MPI_Fint *ierr), (sendbuf, recvbuf, count, datatype, op, comm, ierr) ) #endif +#endif #if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_ALLREDUCE = ompi_allreduce_f @@ -48,9 +52,8 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_ALLREDUCE, #pragma weak MPI_Allreduce_f = ompi_allreduce_f #pragma weak MPI_Allreduce_f08 = ompi_allreduce_f -#endif - -#if ! OPAL_HAVE_WEAK_SYMBOLS && ! OMPI_PROFILE_LAYER +#else +#if ! OMPI_BUILD_MPI_PROFILING OMPI_GENERATE_F77_BINDINGS (MPI_ALLREDUCE, mpi_allreduce, mpi_allreduce_, @@ -58,13 +61,12 @@ OMPI_GENERATE_F77_BINDINGS (MPI_ALLREDUCE, ompi_allreduce_f, (char *sendbuf, char *recvbuf, MPI_Fint *count, MPI_Fint *datatype, MPI_Fint *op, MPI_Fint *comm, MPI_Fint *ierr), (sendbuf, recvbuf, count, datatype, op, comm, ierr) ) +#else +#define ompi_allreduce_f pompi_allreduce_f #endif - - -#if OMPI_PROFILE_LAYER && ! OPAL_HAVE_WEAK_SYMBOLS -#include "ompi/mpi/fortran/mpif-h/profile/defines.h" #endif + void ompi_allreduce_f(char *sendbuf, char *recvbuf, MPI_Fint *count, MPI_Fint *datatype, MPI_Fint *op, MPI_Fint *comm, MPI_Fint *ierr) @@ -74,16 +76,16 @@ void ompi_allreduce_f(char *sendbuf, char *recvbuf, MPI_Fint *count, MPI_Datatype c_type; MPI_Op c_op; - c_comm = MPI_Comm_f2c(*comm); - c_type = MPI_Type_f2c(*datatype); - c_op = MPI_Op_f2c(*op); + c_comm = PMPI_Comm_f2c(*comm); + c_type = PMPI_Type_f2c(*datatype); + c_op = PMPI_Op_f2c(*op); sendbuf = (char *) OMPI_F2C_IN_PLACE(sendbuf); sendbuf = (char *) OMPI_F2C_BOTTOM(sendbuf); recvbuf = (char *) OMPI_F2C_BOTTOM(recvbuf); - ierr_c = MPI_Allreduce(sendbuf, recvbuf, - OMPI_FINT_2_INT(*count), - c_type, c_op, c_comm); + ierr_c = PMPI_Allreduce(sendbuf, recvbuf, + OMPI_FINT_2_INT(*count), + c_type, c_op, c_comm); if (NULL != ierr) *ierr = OMPI_INT_2_FINT(ierr_c); } diff --git a/ompi/mpi/fortran/mpif-h/alltoall_f.c b/ompi/mpi/fortran/mpif-h/alltoall_f.c index 2a37ab4c29c..2934fe97e49 100644 --- a/ompi/mpi/fortran/mpif-h/alltoall_f.c +++ b/ompi/mpi/fortran/mpif-h/alltoall_f.c @@ -5,15 +5,17 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2011-2012 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -22,7 +24,8 @@ #include "ompi/mpi/fortran/mpif-h/bindings.h" #include "ompi/mpi/fortran/base/constants.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILE_LAYER +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak PMPI_ALLTOALL = ompi_alltoall_f #pragma weak pmpi_alltoall = ompi_alltoall_f #pragma weak pmpi_alltoall_ = ompi_alltoall_f @@ -30,7 +33,7 @@ #pragma weak PMPI_Alltoall_f = ompi_alltoall_f #pragma weak PMPI_Alltoall_f08 = ompi_alltoall_f -#elif OMPI_PROFILE_LAYER +#else OMPI_GENERATE_F77_BINDINGS (PMPI_ALLTOALL, pmpi_alltoall, pmpi_alltoall_, @@ -39,6 +42,7 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_ALLTOALL, (char *sendbuf, MPI_Fint *sendcount, MPI_Fint *sendtype, char *recvbuf, MPI_Fint *recvcount, MPI_Fint *recvtype, MPI_Fint *comm, MPI_Fint *ierr), (sendbuf, sendcount, sendtype, recvbuf, recvcount, recvtype, comm, ierr) ) #endif +#endif #if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_ALLTOALL = ompi_alltoall_f @@ -48,9 +52,8 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_ALLTOALL, #pragma weak MPI_Alltoall_f = ompi_alltoall_f #pragma weak MPI_Alltoall_f08 = ompi_alltoall_f -#endif - -#if ! OPAL_HAVE_WEAK_SYMBOLS && ! OMPI_PROFILE_LAYER +#else +#if ! OMPI_BUILD_MPI_PROFILING OMPI_GENERATE_F77_BINDINGS (MPI_ALLTOALL, mpi_alltoall, mpi_alltoall_, @@ -58,13 +61,12 @@ OMPI_GENERATE_F77_BINDINGS (MPI_ALLTOALL, ompi_alltoall_f, (char *sendbuf, MPI_Fint *sendcount, MPI_Fint *sendtype, char *recvbuf, MPI_Fint *recvcount, MPI_Fint *recvtype, MPI_Fint *comm, MPI_Fint *ierr), (sendbuf, sendcount, sendtype, recvbuf, recvcount, recvtype, comm, ierr) ) +#else +#define ompi_alltoall_f pompi_alltoall_f #endif - - -#if OMPI_PROFILE_LAYER && ! OPAL_HAVE_WEAK_SYMBOLS -#include "ompi/mpi/fortran/mpif-h/profile/defines.h" #endif + void ompi_alltoall_f(char *sendbuf, MPI_Fint *sendcount, MPI_Fint *sendtype, char *recvbuf, MPI_Fint *recvcount, MPI_Fint *recvtype, MPI_Fint *comm, MPI_Fint *ierr) @@ -73,18 +75,18 @@ void ompi_alltoall_f(char *sendbuf, MPI_Fint *sendcount, MPI_Fint *sendtype, MPI_Comm c_comm; MPI_Datatype c_sendtype, c_recvtype; - c_comm = MPI_Comm_f2c(*comm); - c_sendtype = MPI_Type_f2c(*sendtype); - c_recvtype = MPI_Type_f2c(*recvtype); + c_comm = PMPI_Comm_f2c(*comm); + c_sendtype = PMPI_Type_f2c(*sendtype); + c_recvtype = PMPI_Type_f2c(*recvtype); sendbuf = (char *) OMPI_F2C_IN_PLACE(sendbuf); sendbuf = (char *) OMPI_F2C_BOTTOM(sendbuf); recvbuf = (char *) OMPI_F2C_BOTTOM(recvbuf); - c_ierr = MPI_Alltoall(sendbuf, + c_ierr = PMPI_Alltoall(sendbuf, OMPI_FINT_2_INT(*sendcount), - c_sendtype, - recvbuf, + c_sendtype, + recvbuf, OMPI_FINT_2_INT(*recvcount), c_recvtype, c_comm); if (NULL != ierr) *ierr = OMPI_INT_2_FINT(c_ierr); diff --git a/ompi/mpi/fortran/mpif-h/alltoallv_f.c b/ompi/mpi/fortran/mpif-h/alltoallv_f.c index 23900c7ae3d..3b7b588c5e7 100644 --- a/ompi/mpi/fortran/mpif-h/alltoallv_f.c +++ b/ompi/mpi/fortran/mpif-h/alltoallv_f.c @@ -5,15 +5,17 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2011-2012 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -22,7 +24,8 @@ #include "ompi/mpi/fortran/mpif-h/bindings.h" #include "ompi/mpi/fortran/base/constants.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILE_LAYER +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak PMPI_ALLTOALLV = ompi_alltoallv_f #pragma weak pmpi_alltoallv = ompi_alltoallv_f #pragma weak pmpi_alltoallv_ = ompi_alltoallv_f @@ -30,7 +33,7 @@ #pragma weak PMPI_Alltoallv_f = ompi_alltoallv_f #pragma weak PMPI_Alltoallv_f08 = ompi_alltoallv_f -#elif OMPI_PROFILE_LAYER +#else OMPI_GENERATE_F77_BINDINGS (PMPI_ALLTOALLV, pmpi_alltoallv, pmpi_alltoallv_, @@ -39,6 +42,7 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_ALLTOALLV, (char *sendbuf, MPI_Fint *sendcounts, MPI_Fint *sdispls, MPI_Fint *sendtype, char *recvbuf, MPI_Fint *recvcounts, MPI_Fint *rdispls, MPI_Fint *recvtype, MPI_Fint *comm, MPI_Fint *ierr), (sendbuf, sendcounts, sdispls, sendtype, recvbuf, recvcounts, rdispls, recvtype, comm, ierr) ) #endif +#endif #if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_ALLTOALLV = ompi_alltoallv_f @@ -48,9 +52,8 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_ALLTOALLV, #pragma weak MPI_Alltoallv_f = ompi_alltoallv_f #pragma weak MPI_Alltoallv_f08 = ompi_alltoallv_f -#endif - -#if ! OPAL_HAVE_WEAK_SYMBOLS && ! OMPI_PROFILE_LAYER +#else +#if ! OMPI_BUILD_MPI_PROFILING OMPI_GENERATE_F77_BINDINGS (MPI_ALLTOALLV, mpi_alltoallv, mpi_alltoallv_, @@ -58,16 +61,15 @@ OMPI_GENERATE_F77_BINDINGS (MPI_ALLTOALLV, ompi_alltoallv_f, (char *sendbuf, MPI_Fint *sendcounts, MPI_Fint *sdispls, MPI_Fint *sendtype, char *recvbuf, MPI_Fint *recvcounts, MPI_Fint *rdispls, MPI_Fint *recvtype, MPI_Fint *comm, MPI_Fint *ierr), (sendbuf, sendcounts, sdispls, sendtype, recvbuf, recvcounts, rdispls, recvtype, comm, ierr) ) +#else +#define ompi_alltoallv_f pompi_alltoallv_f #endif - - -#if OMPI_PROFILE_LAYER && ! OPAL_HAVE_WEAK_SYMBOLS -#include "ompi/mpi/fortran/mpif-h/profile/defines.h" #endif + void ompi_alltoallv_f(char *sendbuf, MPI_Fint *sendcounts, MPI_Fint *sdispls, MPI_Fint *sendtype, char *recvbuf, MPI_Fint *recvcounts, - MPI_Fint *rdispls, MPI_Fint *recvtype, + MPI_Fint *rdispls, MPI_Fint *recvtype, MPI_Fint *comm, MPI_Fint *ierr) { MPI_Comm c_comm; @@ -78,11 +80,11 @@ void ompi_alltoallv_f(char *sendbuf, MPI_Fint *sendcounts, MPI_Fint *sdispls, OMPI_ARRAY_NAME_DECL(recvcounts); OMPI_ARRAY_NAME_DECL(rdispls); - c_comm = MPI_Comm_f2c(*comm); - c_sendtype = MPI_Type_f2c(*sendtype); - c_recvtype = MPI_Type_f2c(*recvtype); + c_comm = PMPI_Comm_f2c(*comm); + c_sendtype = PMPI_Type_f2c(*sendtype); + c_recvtype = PMPI_Type_f2c(*recvtype); - MPI_Comm_size(c_comm, &size); + PMPI_Comm_size(c_comm, &size); OMPI_ARRAY_FINT_2_INT(sendcounts, size); OMPI_ARRAY_FINT_2_INT(sdispls, size); OMPI_ARRAY_FINT_2_INT(recvcounts, size); @@ -92,11 +94,11 @@ void ompi_alltoallv_f(char *sendbuf, MPI_Fint *sendcounts, MPI_Fint *sdispls, sendbuf = (char *) OMPI_F2C_BOTTOM(sendbuf); recvbuf = (char *) OMPI_F2C_BOTTOM(recvbuf); - c_ierr = MPI_Alltoallv(sendbuf, + c_ierr = PMPI_Alltoallv(sendbuf, OMPI_ARRAY_NAME_CONVERT(sendcounts), - OMPI_ARRAY_NAME_CONVERT(sdispls), - c_sendtype, - recvbuf, + OMPI_ARRAY_NAME_CONVERT(sdispls), + c_sendtype, + recvbuf, OMPI_ARRAY_NAME_CONVERT(recvcounts), OMPI_ARRAY_NAME_CONVERT(rdispls), c_recvtype, c_comm); diff --git a/ompi/mpi/fortran/mpif-h/alltoallw_f.c b/ompi/mpi/fortran/mpif-h/alltoallw_f.c index fe48f295fbe..cb2328cf972 100644 --- a/ompi/mpi/fortran/mpif-h/alltoallw_f.c +++ b/ompi/mpi/fortran/mpif-h/alltoallw_f.c @@ -5,15 +5,17 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2011-2012 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -22,7 +24,8 @@ #include "ompi/mpi/fortran/mpif-h/bindings.h" #include "ompi/mpi/fortran/base/constants.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILE_LAYER +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak PMPI_ALLTOALLW = ompi_alltoallw_f #pragma weak pmpi_alltoallw = ompi_alltoallw_f #pragma weak pmpi_alltoallw_ = ompi_alltoallw_f @@ -30,7 +33,7 @@ #pragma weak PMPI_Alltoallw_f = ompi_alltoallw_f #pragma weak PMPI_Alltoallw_f08 = ompi_alltoallw_f -#elif OMPI_PROFILE_LAYER +#else OMPI_GENERATE_F77_BINDINGS (PMPI_ALLTOALLW, pmpi_alltoallw, pmpi_alltoallw_, @@ -39,6 +42,7 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_ALLTOALLW, (char *sendbuf, MPI_Fint *sendcounts, MPI_Fint *sdispls, MPI_Fint *sendtypes, char *recvbuf, MPI_Fint *recvcounts, MPI_Fint *rdispls, MPI_Fint *recvtypes, MPI_Fint *comm, MPI_Fint *ierr), (sendbuf, sendcounts, sdispls, sendtypes, recvbuf, recvcounts, rdispls, recvtypes, comm, ierr) ) #endif +#endif #if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_ALLTOALLW = ompi_alltoallw_f @@ -48,9 +52,8 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_ALLTOALLW, #pragma weak MPI_Alltoallw_f = ompi_alltoallw_f #pragma weak MPI_Alltoallw_f08 = ompi_alltoallw_f -#endif - -#if ! OPAL_HAVE_WEAK_SYMBOLS && ! OMPI_PROFILE_LAYER +#else +#if ! OMPI_BUILD_MPI_PROFILING OMPI_GENERATE_F77_BINDINGS (MPI_ALLTOALLW, mpi_alltoallw, mpi_alltoallw_, @@ -58,15 +61,14 @@ OMPI_GENERATE_F77_BINDINGS (MPI_ALLTOALLW, ompi_alltoallw_f, (char *sendbuf, MPI_Fint *sendcounts, MPI_Fint *sdispls, MPI_Fint *sendtypes, char *recvbuf, MPI_Fint *recvcounts, MPI_Fint *rdispls, MPI_Fint *recvtypes, MPI_Fint *comm, MPI_Fint *ierr), (sendbuf, sendcounts, sdispls, sendtypes, recvbuf, recvcounts, rdispls, recvtypes, comm, ierr) ) +#else +#define ompi_alltoallw_f pompi_alltoallw_f #endif - - -#if OMPI_PROFILE_LAYER && ! OPAL_HAVE_WEAK_SYMBOLS -#include "ompi/mpi/fortran/mpif-h/profile/defines.h" #endif + void ompi_alltoallw_f(char *sendbuf, MPI_Fint *sendcounts, - MPI_Fint *sdispls, MPI_Fint *sendtypes, + MPI_Fint *sdispls, MPI_Fint *sendtypes, char *recvbuf, MPI_Fint *recvcounts, MPI_Fint *rdispls, MPI_Fint *recvtypes, MPI_Fint *comm, MPI_Fint *ierr) @@ -79,8 +81,8 @@ void ompi_alltoallw_f(char *sendbuf, MPI_Fint *sendcounts, OMPI_ARRAY_NAME_DECL(recvcounts); OMPI_ARRAY_NAME_DECL(rdispls); - c_comm = MPI_Comm_f2c(*comm); - MPI_Comm_size(c_comm, &size); + c_comm = PMPI_Comm_f2c(*comm); + PMPI_Comm_size(c_comm, &size); c_sendtypes = (MPI_Datatype *) malloc(size * sizeof(MPI_Datatype)); c_recvtypes = (MPI_Datatype *) malloc(size * sizeof(MPI_Datatype)); @@ -91,20 +93,20 @@ void ompi_alltoallw_f(char *sendbuf, MPI_Fint *sendcounts, OMPI_ARRAY_FINT_2_INT(rdispls, size); while (size > 0) { - c_sendtypes[size - 1] = MPI_Type_f2c(sendtypes[size - 1]); - c_recvtypes[size - 1] = MPI_Type_f2c(recvtypes[size - 1]); + c_sendtypes[size - 1] = PMPI_Type_f2c(sendtypes[size - 1]); + c_recvtypes[size - 1] = PMPI_Type_f2c(recvtypes[size - 1]); --size; } - /* Alltoallw does not support MPI_IN_PLACE */ + sendbuf = (char *) OMPI_F2C_IN_PLACE(sendbuf); sendbuf = (char *) OMPI_F2C_BOTTOM(sendbuf); recvbuf = (char *) OMPI_F2C_BOTTOM(recvbuf); - c_ierr = MPI_Alltoallw(sendbuf, + c_ierr = PMPI_Alltoallw(sendbuf, OMPI_ARRAY_NAME_CONVERT(sendcounts), OMPI_ARRAY_NAME_CONVERT(sdispls), - c_sendtypes, - recvbuf, + c_sendtypes, + recvbuf, OMPI_ARRAY_NAME_CONVERT(recvcounts), OMPI_ARRAY_NAME_CONVERT(rdispls), c_recvtypes, c_comm); diff --git a/ompi/mpi/fortran/mpif-h/attr_delete_f.c b/ompi/mpi/fortran/mpif-h/attr_delete_f.c index 6c0aefb5a4f..fc3a1398b1d 100644 --- a/ompi/mpi/fortran/mpif-h/attr_delete_f.c +++ b/ompi/mpi/fortran/mpif-h/attr_delete_f.c @@ -5,15 +5,17 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2011-2012 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -21,7 +23,8 @@ #include "ompi/mpi/fortran/mpif-h/bindings.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILE_LAYER +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak PMPI_ATTR_DELETE = ompi_attr_delete_f #pragma weak pmpi_attr_delete = ompi_attr_delete_f #pragma weak pmpi_attr_delete_ = ompi_attr_delete_f @@ -29,7 +32,7 @@ #pragma weak PMPI_Attr_delete_f = ompi_attr_delete_f #pragma weak PMPI_Attr_delete_f08 = ompi_attr_delete_f -#elif OMPI_PROFILE_LAYER +#else OMPI_GENERATE_F77_BINDINGS (PMPI_ATTR_DELETE, pmpi_attr_delete, pmpi_attr_delete_, @@ -38,6 +41,7 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_ATTR_DELETE, (MPI_Fint *comm, MPI_Fint *keyval, MPI_Fint *ierr), (comm, keyval, ierr) ) #endif +#endif #if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_ATTR_DELETE = ompi_attr_delete_f @@ -47,9 +51,8 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_ATTR_DELETE, #pragma weak MPI_Attr_delete_f = ompi_attr_delete_f #pragma weak MPI_Attr_delete_f08 = ompi_attr_delete_f -#endif - -#if ! OPAL_HAVE_WEAK_SYMBOLS && ! OMPI_PROFILE_LAYER +#else +#if ! OMPI_BUILD_MPI_PROFILING OMPI_GENERATE_F77_BINDINGS (MPI_ATTR_DELETE, mpi_attr_delete, mpi_attr_delete_, @@ -57,19 +60,18 @@ OMPI_GENERATE_F77_BINDINGS (MPI_ATTR_DELETE, ompi_attr_delete_f, (MPI_Fint *comm, MPI_Fint *keyval, MPI_Fint *ierr), (comm, keyval, ierr) ) +#else +#define ompi_attr_delete_f pompi_attr_delete_f #endif - - -#if OMPI_PROFILE_LAYER && ! OPAL_HAVE_WEAK_SYMBOLS -#include "ompi/mpi/fortran/mpif-h/profile/defines.h" #endif + void ompi_attr_delete_f(MPI_Fint *comm, MPI_Fint *keyval, MPI_Fint *ierr) { int c_ierr; MPI_Comm c_comm; - c_comm = MPI_Comm_f2c(*comm); + c_comm = PMPI_Comm_f2c(*comm); - c_ierr = MPI_Attr_delete(c_comm, OMPI_FINT_2_INT(*keyval)); + c_ierr = PMPI_Attr_delete(c_comm, OMPI_FINT_2_INT(*keyval)); if (NULL != ierr) *ierr = OMPI_INT_2_FINT(c_ierr); } diff --git a/ompi/mpi/fortran/mpif-h/attr_get_f.c b/ompi/mpi/fortran/mpif-h/attr_get_f.c index 4001fa9d764..5e4ca187691 100644 --- a/ompi/mpi/fortran/mpif-h/attr_get_f.c +++ b/ompi/mpi/fortran/mpif-h/attr_get_f.c @@ -5,15 +5,17 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2011-2012 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -23,7 +25,8 @@ #include "ompi/attribute/attribute.h" #include "ompi/communicator/communicator.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILE_LAYER +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak PMPI_ATTR_GET = ompi_attr_get_f #pragma weak pmpi_attr_get = ompi_attr_get_f #pragma weak pmpi_attr_get_ = ompi_attr_get_f @@ -31,7 +34,7 @@ #pragma weak PMPI_Attr_get_f = ompi_attr_get_f #pragma weak PMPI_Attr_get_f08 = ompi_attr_get_f -#elif OMPI_PROFILE_LAYER +#else OMPI_GENERATE_F77_BINDINGS (PMPI_ATTR_GET, pmpi_attr_get, pmpi_attr_get_, @@ -40,6 +43,7 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_ATTR_GET, (MPI_Fint *comm, MPI_Fint *keyval, MPI_Fint *attribute_val, ompi_fortran_logical_t *flag, MPI_Fint *ierr), (comm, keyval, attribute_val, flag, ierr) ) #endif +#endif #if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_ATTR_GET = ompi_attr_get_f @@ -49,9 +53,8 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_ATTR_GET, #pragma weak MPI_Attr_get_f = ompi_attr_get_f #pragma weak MPI_Attr_get_f08 = ompi_attr_get_f -#endif - -#if ! OPAL_HAVE_WEAK_SYMBOLS && ! OMPI_PROFILE_LAYER +#else +#if ! OMPI_BUILD_MPI_PROFILING OMPI_GENERATE_F77_BINDINGS (MPI_ATTR_GET, mpi_attr_get, mpi_attr_get_, @@ -59,24 +62,22 @@ OMPI_GENERATE_F77_BINDINGS (MPI_ATTR_GET, ompi_attr_get_f, (MPI_Fint *comm, MPI_Fint *keyval, MPI_Fint *attribute_val, ompi_fortran_logical_t *flag, MPI_Fint *ierr), (comm, keyval, attribute_val, flag, ierr) ) +#else +#define ompi_attr_get_f pompi_attr_get_f #endif - - -#if OMPI_PROFILE_LAYER && ! OPAL_HAVE_WEAK_SYMBOLS -#include "ompi/mpi/fortran/mpif-h/profile/defines.h" #endif void ompi_attr_get_f(MPI_Fint *comm, MPI_Fint *keyval, MPI_Fint *attribute_val, ompi_fortran_logical_t *flag, MPI_Fint *ierr) { int c_ierr; - MPI_Comm c_comm = MPI_Comm_f2c(*comm); + MPI_Comm c_comm = PMPI_Comm_f2c(*comm); OMPI_LOGICAL_NAME_DECL(flag); /* This stuff is very confusing. Be sure to see the comment at the top of src/attributes/attributes.c. */ - c_ierr = ompi_attr_get_fortran_mpi1(c_comm->c_keyhash, + c_ierr = ompi_attr_get_fortran_mpi1(c_comm->c_keyhash, OMPI_FINT_2_INT(*keyval), attribute_val, OMPI_LOGICAL_SINGLE_NAME_CONVERT(flag)); diff --git a/ompi/mpi/fortran/mpif-h/attr_put_f.c b/ompi/mpi/fortran/mpif-h/attr_put_f.c index 31838c6b9d7..f4908704aa6 100644 --- a/ompi/mpi/fortran/mpif-h/attr_put_f.c +++ b/ompi/mpi/fortran/mpif-h/attr_put_f.c @@ -5,15 +5,17 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2011-2012 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -23,7 +25,8 @@ #include "ompi/attribute/attribute.h" #include "ompi/communicator/communicator.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILE_LAYER +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak PMPI_ATTR_PUT = ompi_attr_put_f #pragma weak pmpi_attr_put = ompi_attr_put_f #pragma weak pmpi_attr_put_ = ompi_attr_put_f @@ -31,7 +34,7 @@ #pragma weak PMPI_Attr_put_f = ompi_attr_put_f #pragma weak PMPI_Attr_put_f08 = ompi_attr_put_f -#elif OMPI_PROFILE_LAYER +#else OMPI_GENERATE_F77_BINDINGS (PMPI_ATTR_PUT, pmpi_attr_put, pmpi_attr_put_, @@ -40,6 +43,7 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_ATTR_PUT, (MPI_Fint *comm, MPI_Fint *keyval, MPI_Fint *attribute_val, MPI_Fint *ierr), (comm, keyval, attribute_val, ierr) ) #endif +#endif #if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_ATTR_PUT = ompi_attr_put_f @@ -49,9 +53,8 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_ATTR_PUT, #pragma weak MPI_Attr_put_f = ompi_attr_put_f #pragma weak MPI_Attr_put_f08 = ompi_attr_put_f -#endif - -#if ! OPAL_HAVE_WEAK_SYMBOLS && ! OMPI_PROFILE_LAYER +#else +#if ! OMPI_BUILD_MPI_PROFILING OMPI_GENERATE_F77_BINDINGS (MPI_ATTR_PUT, mpi_attr_put, mpi_attr_put_, @@ -59,18 +62,16 @@ OMPI_GENERATE_F77_BINDINGS (MPI_ATTR_PUT, ompi_attr_put_f, (MPI_Fint *comm, MPI_Fint *keyval, MPI_Fint *attribute_val, MPI_Fint *ierr), (comm, keyval, attribute_val, ierr) ) +#else +#define ompi_attr_put_f pompi_attr_put_f #endif - - -#if OMPI_PROFILE_LAYER && ! OPAL_HAVE_WEAK_SYMBOLS -#include "ompi/mpi/fortran/mpif-h/profile/defines.h" #endif -void ompi_attr_put_f(MPI_Fint *comm, MPI_Fint *keyval, MPI_Fint *attribute_val, +void ompi_attr_put_f(MPI_Fint *comm, MPI_Fint *keyval, MPI_Fint *attribute_val, MPI_Fint *ierr) { int c_err; - MPI_Comm c_comm = MPI_Comm_f2c(*comm); + MPI_Comm c_comm = PMPI_Comm_f2c(*comm); /* This stuff is very confusing. Be sure to see the comment at the top of src/attributes/attributes.c. */ @@ -78,7 +79,7 @@ void ompi_attr_put_f(MPI_Fint *comm, MPI_Fint *keyval, MPI_Fint *attribute_val, c_err = ompi_attr_set_fortran_mpi1(COMM_ATTR, c_comm, &c_comm->c_keyhash, - OMPI_FINT_2_INT(*keyval), + OMPI_FINT_2_INT(*keyval), *attribute_val, false); if (NULL != ierr) *ierr = OMPI_INT_2_FINT(c_err); diff --git a/ompi/mpi/fortran/mpif-h/barrier_f.c b/ompi/mpi/fortran/mpif-h/barrier_f.c index 870e4a1af18..2304b400897 100644 --- a/ompi/mpi/fortran/mpif-h/barrier_f.c +++ b/ompi/mpi/fortran/mpif-h/barrier_f.c @@ -5,15 +5,17 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2011-2012 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -21,7 +23,8 @@ #include "ompi/mpi/fortran/mpif-h/bindings.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILE_LAYER +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak PMPI_BARRIER = ompi_barrier_f #pragma weak pmpi_barrier = ompi_barrier_f #pragma weak pmpi_barrier_ = ompi_barrier_f @@ -29,7 +32,7 @@ #pragma weak PMPI_Barrier_f = ompi_barrier_f #pragma weak PMPI_Barrier_f08 = ompi_barrier_f -#elif OMPI_PROFILE_LAYER +#else OMPI_GENERATE_F77_BINDINGS (PMPI_BARRIER, pmpi_barrier, pmpi_barrier_, @@ -38,6 +41,7 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_BARRIER, (MPI_Fint *comm, MPI_Fint *ierr), (comm, ierr) ) #endif +#endif #if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_BARRIER = ompi_barrier_f @@ -47,9 +51,8 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_BARRIER, #pragma weak MPI_Barrier_f = ompi_barrier_f #pragma weak MPI_Barrier_f08 = ompi_barrier_f -#endif - -#if ! OPAL_HAVE_WEAK_SYMBOLS && ! OMPI_PROFILE_LAYER +#else +#if ! OMPI_BUILD_MPI_PROFILING OMPI_GENERATE_F77_BINDINGS (MPI_BARRIER, mpi_barrier, mpi_barrier_, @@ -57,20 +60,19 @@ OMPI_GENERATE_F77_BINDINGS (MPI_BARRIER, ompi_barrier_f, (MPI_Fint *comm, MPI_Fint *ierr), (comm, ierr) ) +#else +#define ompi_barrier_f pompi_barrier_f #endif - - -#if OMPI_PROFILE_LAYER && ! OPAL_HAVE_WEAK_SYMBOLS -#include "ompi/mpi/fortran/mpif-h/profile/defines.h" #endif + void ompi_barrier_f(MPI_Fint *comm, MPI_Fint *ierr) { int ierr_c; MPI_Comm c_comm; - c_comm = MPI_Comm_f2c(*comm); + c_comm = PMPI_Comm_f2c(*comm); - ierr_c = MPI_Barrier(c_comm); + ierr_c = PMPI_Barrier(c_comm); if (NULL != ierr) *ierr = OMPI_INT_2_FINT(ierr_c); } diff --git a/ompi/mpi/fortran/mpif-h/bcast_f.c b/ompi/mpi/fortran/mpif-h/bcast_f.c index c6d930aa5d2..72e8f37faf9 100644 --- a/ompi/mpi/fortran/mpif-h/bcast_f.c +++ b/ompi/mpi/fortran/mpif-h/bcast_f.c @@ -5,15 +5,17 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2011-2012 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -22,7 +24,8 @@ #include "ompi/mpi/fortran/mpif-h/bindings.h" #include "ompi/mpi/fortran/base/constants.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILE_LAYER +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak PMPI_BCAST = ompi_bcast_f #pragma weak pmpi_bcast = ompi_bcast_f #pragma weak pmpi_bcast_ = ompi_bcast_f @@ -30,7 +33,7 @@ #pragma weak PMPI_Bcast_f = ompi_bcast_f #pragma weak PMPI_Bcast_f08 = ompi_bcast_f -#elif OMPI_PROFILE_LAYER +#else OMPI_GENERATE_F77_BINDINGS (PMPI_BCAST, pmpi_bcast, pmpi_bcast_, @@ -39,6 +42,7 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_BCAST, (char *buffer, MPI_Fint *count, MPI_Fint *datatype, MPI_Fint *root, MPI_Fint *comm, MPI_Fint *ierr), (buffer, count, datatype, root, comm, ierr) ) #endif +#endif #if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_BCAST = ompi_bcast_f @@ -48,9 +52,8 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_BCAST, #pragma weak MPI_Bcast_f = ompi_bcast_f #pragma weak MPI_Bcast_f08 = ompi_bcast_f -#endif - -#if ! OPAL_HAVE_WEAK_SYMBOLS && ! OMPI_PROFILE_LAYER +#else +#if ! OMPI_BUILD_MPI_PROFILING OMPI_GENERATE_F77_BINDINGS (MPI_BCAST, mpi_bcast, mpi_bcast_, @@ -58,25 +61,24 @@ OMPI_GENERATE_F77_BINDINGS (MPI_BCAST, ompi_bcast_f, (char *buffer, MPI_Fint *count, MPI_Fint *datatype, MPI_Fint *root, MPI_Fint *comm, MPI_Fint *ierr), (buffer, count, datatype, root, comm, ierr) ) +#else +#define ompi_bcast_f pompi_bcast_f #endif - - -#if OMPI_PROFILE_LAYER && ! OPAL_HAVE_WEAK_SYMBOLS -#include "ompi/mpi/fortran/mpif-h/profile/defines.h" #endif -void ompi_bcast_f(char *buffer, MPI_Fint *count, MPI_Fint *datatype, + +void ompi_bcast_f(char *buffer, MPI_Fint *count, MPI_Fint *datatype, MPI_Fint *root, MPI_Fint *comm, MPI_Fint *ierr) { int c_ierr; MPI_Comm c_comm; MPI_Datatype c_type; - c_comm = MPI_Comm_f2c(*comm); - c_type = MPI_Type_f2c(*datatype); + c_comm = PMPI_Comm_f2c(*comm); + c_type = PMPI_Type_f2c(*datatype); - c_ierr = MPI_Bcast(OMPI_F2C_BOTTOM(buffer), - OMPI_FINT_2_INT(*count), + c_ierr = PMPI_Bcast(OMPI_F2C_BOTTOM(buffer), + OMPI_FINT_2_INT(*count), c_type, OMPI_FINT_2_INT(*root), c_comm); diff --git a/ompi/mpi/fortran/mpif-h/bindings.h b/ompi/mpi/fortran/mpif-h/bindings.h index 1c7d871793c..fd3834cfde1 100644 --- a/ompi/mpi/fortran/mpif-h/bindings.h +++ b/ompi/mpi/fortran/mpif-h/bindings.h @@ -5,15 +5,17 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2011-2012 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -25,13 +27,34 @@ #include "mpi.h" #include "ompi/request/grequest.h" -/* - * We now build all four fortran bindings and dont care too much about - * which convention (lowercase, underscore, double underscore or - * all uppercase) is supported by the compiler. The policy now is to - * have the ompi_*_f functions be the default symbols and then wrap - * the four signature types around it. The macro below achieves this. - */ +#if OMPI_FORTRAN_CAPS +#define OMPI_GENERATE_F77_BINDINGS(upper_case, \ + lower_case, \ + single_underscore, \ + double_underscore, \ + wrapper_function, \ + signature, \ + params) \ + void upper_case signature { wrapper_function params; } +#elif OMPI_FORTRAN_PLAIN +#define OMPI_GENERATE_F77_BINDINGS(upper_case, \ + lower_case, \ + single_underscore, \ + double_underscore, \ + wrapper_function, \ + signature, \ + params) \ + void lower_case signature { wrapper_function params; } +#elif OMPI_FORTRAN_DOUBLE_UNDERSCORE +#define OMPI_GENERATE_F77_BINDINGS(upper_case, \ + lower_case, \ + single_underscore, \ + double_underscore, \ + wrapper_function, \ + signature, \ + params) \ + void double_underscore signature { wrapper_function params; } +#elif OMPI_FORTRAN_SINGLE_UNDERSCORE #define OMPI_GENERATE_F77_BINDINGS(upper_case, \ lower_case, \ single_underscore, \ @@ -39,10 +62,10 @@ wrapper_function, \ signature, \ params) \ - void upper_case signature { wrapper_function params; } \ - void lower_case signature { wrapper_function params; } \ - void single_underscore signature { wrapper_function params; } \ - void double_underscore signature { wrapper_function params; } + void single_underscore signature { wrapper_function params; } +#else +#error Unrecognized Fortran name mangling scheme +#endif /* * We maintain 2 separate sets of defines and prototypes. This ensures * that we can build MPI_* bindings or PMPI_* bindings as needed. The diff --git a/ompi/mpi/fortran/mpif-h/bsend_f.c b/ompi/mpi/fortran/mpif-h/bsend_f.c index 692f3ad412d..5474a33a8e2 100644 --- a/ompi/mpi/fortran/mpif-h/bsend_f.c +++ b/ompi/mpi/fortran/mpif-h/bsend_f.c @@ -5,15 +5,17 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2011-2012 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -22,7 +24,8 @@ #include "ompi/mpi/fortran/mpif-h/bindings.h" #include "ompi/mpi/fortran/base/constants.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILE_LAYER +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak PMPI_BSEND = ompi_bsend_f #pragma weak pmpi_bsend = ompi_bsend_f #pragma weak pmpi_bsend_ = ompi_bsend_f @@ -30,7 +33,7 @@ #pragma weak PMPI_Bsend_f = ompi_bsend_f #pragma weak PMPI_Bsend_f08 = ompi_bsend_f -#elif OMPI_PROFILE_LAYER +#else OMPI_GENERATE_F77_BINDINGS (PMPI_BSEND, pmpi_bsend, pmpi_bsend_, @@ -39,6 +42,7 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_BSEND, (char *buf, MPI_Fint *count, MPI_Fint *datatype, MPI_Fint *dest, MPI_Fint *tag, MPI_Fint *comm, MPI_Fint *ierr), (buf, count, datatype, dest, tag, comm, ierr) ) #endif +#endif #if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_BSEND = ompi_bsend_f @@ -48,9 +52,8 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_BSEND, #pragma weak MPI_Bsend_f = ompi_bsend_f #pragma weak MPI_Bsend_f08 = ompi_bsend_f -#endif - -#if ! OPAL_HAVE_WEAK_SYMBOLS && ! OMPI_PROFILE_LAYER +#else +#if ! OMPI_BUILD_MPI_PROFILING OMPI_GENERATE_F77_BINDINGS (MPI_BSEND, mpi_bsend, mpi_bsend_, @@ -58,22 +61,21 @@ OMPI_GENERATE_F77_BINDINGS (MPI_BSEND, ompi_bsend_f, (char *buf, MPI_Fint *count, MPI_Fint *datatype, MPI_Fint *dest, MPI_Fint *tag, MPI_Fint *comm, MPI_Fint *ierr), (buf, count, datatype, dest, tag, comm, ierr) ) +#else +#define ompi_bsend_f pompi_bsend_f #endif - - -#if OMPI_PROFILE_LAYER && ! OPAL_HAVE_WEAK_SYMBOLS -#include "ompi/mpi/fortran/mpif-h/profile/defines.h" #endif + void ompi_bsend_f(char *buf, MPI_Fint *count, MPI_Fint *datatype, MPI_Fint *dest, MPI_Fint *tag, MPI_Fint *comm, MPI_Fint *ierr) { int c_ierr; MPI_Comm c_comm; - MPI_Datatype c_type = MPI_Type_f2c(*datatype); + MPI_Datatype c_type = PMPI_Type_f2c(*datatype); + + c_comm = PMPI_Comm_f2c (*comm); - c_comm = MPI_Comm_f2c (*comm); - - c_ierr = MPI_Bsend(OMPI_F2C_BOTTOM(buf), OMPI_FINT_2_INT(*count), + c_ierr = PMPI_Bsend(OMPI_F2C_BOTTOM(buf), OMPI_FINT_2_INT(*count), c_type, OMPI_FINT_2_INT(*dest), OMPI_FINT_2_INT(*tag), c_comm); if (NULL != ierr) *ierr = OMPI_INT_2_FINT(c_ierr); diff --git a/ompi/mpi/fortran/mpif-h/bsend_init_f.c b/ompi/mpi/fortran/mpif-h/bsend_init_f.c index c0a2cc42213..8a7e9eda6e8 100644 --- a/ompi/mpi/fortran/mpif-h/bsend_init_f.c +++ b/ompi/mpi/fortran/mpif-h/bsend_init_f.c @@ -10,6 +10,8 @@ * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2011-2012 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -22,7 +24,8 @@ #include "ompi/mpi/fortran/mpif-h/bindings.h" #include "ompi/mpi/fortran/base/constants.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILE_LAYER +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak PMPI_BSEND_INIT = ompi_bsend_init_f #pragma weak pmpi_bsend_init = ompi_bsend_init_f #pragma weak pmpi_bsend_init_ = ompi_bsend_init_f @@ -30,7 +33,7 @@ #pragma weak PMPI_Bsend_init_f = ompi_bsend_init_f #pragma weak PMPI_Bsend_init_f08 = ompi_bsend_init_f -#elif OMPI_PROFILE_LAYER +#else OMPI_GENERATE_F77_BINDINGS (PMPI_BSEND_INIT, pmpi_bsend_init, pmpi_bsend_init_, @@ -39,6 +42,7 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_BSEND_INIT, (char *buf, MPI_Fint *count, MPI_Fint *datatype, MPI_Fint *dest, MPI_Fint *tag, MPI_Fint *comm, MPI_Fint *request, MPI_Fint *ierr), (buf, count, datatype, dest, tag, comm, request, ierr) ) #endif +#endif #if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_BSEND_INIT = ompi_bsend_init_f @@ -48,9 +52,8 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_BSEND_INIT, #pragma weak MPI_Bsend_init_f = ompi_bsend_init_f #pragma weak MPI_Bsend_init_f08 = ompi_bsend_init_f -#endif - -#if ! OPAL_HAVE_WEAK_SYMBOLS && ! OMPI_PROFILE_LAYER +#else +#if ! OMPI_BUILD_MPI_PROFILING OMPI_GENERATE_F77_BINDINGS (MPI_BSEND_INIT, mpi_bsend_init, mpi_bsend_init_, @@ -58,30 +61,29 @@ OMPI_GENERATE_F77_BINDINGS (MPI_BSEND_INIT, ompi_bsend_init_f, (char *buf, MPI_Fint *count, MPI_Fint *datatype, MPI_Fint *dest, MPI_Fint *tag, MPI_Fint *comm, MPI_Fint *request, MPI_Fint *ierr), (buf, count, datatype, dest, tag, comm, request, ierr) ) +#else +#define ompi_bsend_init_f pompi_bsend_init_f #endif - - -#if OMPI_PROFILE_LAYER && ! OPAL_HAVE_WEAK_SYMBOLS -#include "ompi/mpi/fortran/mpif-h/profile/defines.h" #endif + void ompi_bsend_init_f(char *buf, MPI_Fint *count, MPI_Fint *datatype, MPI_Fint *dest, MPI_Fint *tag, MPI_Fint *comm, MPI_Fint *request, MPI_Fint *ierr) { int c_ierr; - MPI_Datatype c_type = MPI_Type_f2c(*datatype); + MPI_Datatype c_type = PMPI_Type_f2c(*datatype); MPI_Request c_req; MPI_Comm c_comm; - c_comm = MPI_Comm_f2c (*comm); + c_comm = PMPI_Comm_f2c (*comm); - c_ierr = MPI_Bsend_init(OMPI_F2C_BOTTOM(buf), OMPI_FINT_2_INT(*count), + c_ierr = PMPI_Bsend_init(OMPI_F2C_BOTTOM(buf), OMPI_FINT_2_INT(*count), c_type, OMPI_FINT_2_INT(*dest), - OMPI_FINT_2_INT(*tag), + OMPI_FINT_2_INT(*tag), c_comm, &c_req); if (NULL != ierr) *ierr = OMPI_INT_2_FINT(c_ierr); if (MPI_SUCCESS == c_ierr) { - *request = MPI_Request_c2f(c_req); + *request = PMPI_Request_c2f(c_req); } } diff --git a/ompi/mpi/fortran/mpif-h/buffer_attach_f.c b/ompi/mpi/fortran/mpif-h/buffer_attach_f.c index a946f55e1cd..d1ae64f3384 100644 --- a/ompi/mpi/fortran/mpif-h/buffer_attach_f.c +++ b/ompi/mpi/fortran/mpif-h/buffer_attach_f.c @@ -5,15 +5,17 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2011-2012 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -21,7 +23,8 @@ #include "ompi/mpi/fortran/mpif-h/bindings.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILE_LAYER +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak PMPI_BUFFER_ATTACH = ompi_buffer_attach_f #pragma weak pmpi_buffer_attach = ompi_buffer_attach_f #pragma weak pmpi_buffer_attach_ = ompi_buffer_attach_f @@ -29,7 +32,7 @@ #pragma weak PMPI_Buffer_attach_f = ompi_buffer_attach_f #pragma weak PMPI_Buffer_attach_f08 = ompi_buffer_attach_f -#elif OMPI_PROFILE_LAYER +#else OMPI_GENERATE_F77_BINDINGS (PMPI_BUFFER_ATTACH, pmpi_buffer_attach, pmpi_buffer_attach_, @@ -38,6 +41,7 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_BUFFER_ATTACH, (char *buffer, MPI_Fint *size, MPI_Fint *ierr), (buffer, size, ierr) ) #endif +#endif #if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_BUFFER_ATTACH = ompi_buffer_attach_f @@ -47,9 +51,8 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_BUFFER_ATTACH, #pragma weak MPI_Buffer_attach_f = ompi_buffer_attach_f #pragma weak MPI_Buffer_attach_f08 = ompi_buffer_attach_f -#endif - -#if ! OPAL_HAVE_WEAK_SYMBOLS && ! OMPI_PROFILE_LAYER +#else +#if ! OMPI_BUILD_MPI_PROFILING OMPI_GENERATE_F77_BINDINGS (MPI_BUFFER_ATTACH, mpi_buffer_attach, mpi_buffer_attach_, @@ -57,15 +60,14 @@ OMPI_GENERATE_F77_BINDINGS (MPI_BUFFER_ATTACH, ompi_buffer_attach_f, (char *buffer, MPI_Fint *size, MPI_Fint *ierr), (buffer, size, ierr) ) +#else +#define ompi_buffer_attach_f pompi_buffer_attach_f #endif - - -#if OMPI_PROFILE_LAYER && ! OPAL_HAVE_WEAK_SYMBOLS -#include "ompi/mpi/fortran/mpif-h/profile/defines.h" #endif + void ompi_buffer_attach_f(char *buffer, MPI_Fint *size, MPI_Fint *ierr) { - int c_ierr = MPI_Buffer_attach(buffer, OMPI_FINT_2_INT(*size)); + int c_ierr = PMPI_Buffer_attach(buffer, OMPI_FINT_2_INT(*size)); if (NULL != ierr) *ierr = OMPI_INT_2_FINT(c_ierr); } diff --git a/ompi/mpi/fortran/mpif-h/buffer_detach_f.c b/ompi/mpi/fortran/mpif-h/buffer_detach_f.c index a5cfad15eb1..918071fd50d 100644 --- a/ompi/mpi/fortran/mpif-h/buffer_detach_f.c +++ b/ompi/mpi/fortran/mpif-h/buffer_detach_f.c @@ -5,16 +5,18 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2007 Sun Microsystems, Inc. All rights reserved. - * Copyright (c) 2011-2012 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2011-2015 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -22,15 +24,15 @@ #include "ompi/mpi/fortran/mpif-h/bindings.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILE_LAYER +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak PMPI_BUFFER_DETACH = ompi_buffer_detach_f #pragma weak pmpi_buffer_detach = ompi_buffer_detach_f #pragma weak pmpi_buffer_detach_ = ompi_buffer_detach_f #pragma weak pmpi_buffer_detach__ = ompi_buffer_detach_f #pragma weak PMPI_Buffer_detach_f = ompi_buffer_detach_f -#pragma weak PMPI_Buffer_detach_f08 = ompi_buffer_detach_f -#elif OMPI_PROFILE_LAYER +#else OMPI_GENERATE_F77_BINDINGS (PMPI_BUFFER_DETACH, pmpi_buffer_detach, pmpi_buffer_detach_, @@ -39,6 +41,7 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_BUFFER_DETACH, (char *buffer, MPI_Fint *size, MPI_Fint *ierr), (buffer, size, ierr) ) #endif +#endif #if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_BUFFER_DETACH = ompi_buffer_detach_f @@ -47,10 +50,8 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_BUFFER_DETACH, #pragma weak mpi_buffer_detach__ = ompi_buffer_detach_f #pragma weak MPI_Buffer_detach_f = ompi_buffer_detach_f -#pragma weak MPI_Buffer_detach_f08 = ompi_buffer_detach_f -#endif - -#if ! OPAL_HAVE_WEAK_SYMBOLS && ! OMPI_PROFILE_LAYER +#else +#if ! OMPI_BUILD_MPI_PROFILING OMPI_GENERATE_F77_BINDINGS (MPI_BUFFER_DETACH, mpi_buffer_detach, mpi_buffer_detach_, @@ -58,25 +59,32 @@ OMPI_GENERATE_F77_BINDINGS (MPI_BUFFER_DETACH, ompi_buffer_detach_f, (char *buffer, MPI_Fint *size, MPI_Fint *ierr), (buffer, size, ierr) ) +#else +#define ompi_buffer_detach_f pompi_buffer_detach_f #endif - - -#if OMPI_PROFILE_LAYER && ! OPAL_HAVE_WEAK_SYMBOLS -#include "ompi/mpi/fortran/mpif-h/profile/defines.h" #endif + +/* (this comment is repeated in ompi/mpi/fortran/use-mpi-f08/buffer_detach.c) + * + * MPI-3.1 section 3.6, page 45, states that the mpif.h and mpi module + * interfaces for MPI_BUFFER_DETACH ignore the buffer argument. + * Therefore, for the mpif.h and mpi module interfaces, we use a dummy + * variable and leave the value handed in alone. + * + * The mpi_f08 implementation for MPI_BUFFER_DETACH therefore is a + * separate routine in the use-mpi-f08 directory (it's not built in + * the mpif-h directory because of all the different combinations of + * supporting weak symbols (or not), building the profiling layer (or + * not), etc.). + */ void ompi_buffer_detach_f(char *buffer, MPI_Fint *size, MPI_Fint *ierr) { - /* - * It does not make sense in fortran to return a pointer - * here as the user may get a behavior that is unexpected. - * Therefore, we use a dummy variable and leave the value - * handed in alone. - */ int c_ierr; void *dummy; OMPI_SINGLE_NAME_DECL(size); - c_ierr = MPI_Buffer_detach(&dummy, OMPI_SINGLE_NAME_CONVERT(size)); + + c_ierr = PMPI_Buffer_detach(&dummy, OMPI_SINGLE_NAME_CONVERT(size)); if (NULL != ierr) *ierr = OMPI_INT_2_FINT(c_ierr); if (MPI_SUCCESS == c_ierr) { diff --git a/ompi/mpi/fortran/mpif-h/cancel_f.c b/ompi/mpi/fortran/mpif-h/cancel_f.c index 33d3baaf2cf..ded928b8a3d 100644 --- a/ompi/mpi/fortran/mpif-h/cancel_f.c +++ b/ompi/mpi/fortran/mpif-h/cancel_f.c @@ -5,15 +5,17 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2011-2012 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -21,7 +23,8 @@ #include "ompi/mpi/fortran/mpif-h/bindings.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILE_LAYER +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak PMPI_CANCEL = ompi_cancel_f #pragma weak pmpi_cancel = ompi_cancel_f #pragma weak pmpi_cancel_ = ompi_cancel_f @@ -29,7 +32,7 @@ #pragma weak PMPI_Cancel_f = ompi_cancel_f #pragma weak PMPI_Cancel_f08 = ompi_cancel_f -#elif OMPI_PROFILE_LAYER +#else OMPI_GENERATE_F77_BINDINGS (PMPI_CANCEL, pmpi_cancel, pmpi_cancel_, @@ -38,6 +41,7 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_CANCEL, (MPI_Fint *request, MPI_Fint *ierr), (request, ierr) ) #endif +#endif #if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_CANCEL = ompi_cancel_f @@ -47,9 +51,8 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_CANCEL, #pragma weak MPI_Cancel_f = ompi_cancel_f #pragma weak MPI_Cancel_f08 = ompi_cancel_f -#endif - -#if ! OPAL_HAVE_WEAK_SYMBOLS && ! OMPI_PROFILE_LAYER +#else +#if ! OMPI_BUILD_MPI_PROFILING OMPI_GENERATE_F77_BINDINGS (MPI_CANCEL, mpi_cancel, mpi_cancel_, @@ -59,15 +62,17 @@ OMPI_GENERATE_F77_BINDINGS (MPI_CANCEL, (request, ierr) ) #endif -#if OMPI_PROFILE_LAYER && ! OPAL_HAVE_WEAK_SYMBOLS -#include "ompi/mpi/fortran/mpif-h/profile/defines.h" +#if OMPI_BUILD_MPI_PROFILING && ! OPAL_HAVE_WEAK_SYMBOLS +#define ompi_cancel_f pompi_cancel_f +#endif #endif + void ompi_cancel_f(MPI_Fint *request, MPI_Fint *ierr) { int c_ierr; - MPI_Request c_req = MPI_Request_f2c(*request); + MPI_Request c_req = PMPI_Request_f2c(*request); - c_ierr = MPI_Cancel(&c_req); + c_ierr = PMPI_Cancel(&c_req); if (NULL != ierr) *ierr = OMPI_INT_2_FINT(c_ierr); } diff --git a/ompi/mpi/fortran/mpif-h/cart_coords_f.c b/ompi/mpi/fortran/mpif-h/cart_coords_f.c index eeb2a6bd171..311ff6a24f5 100644 --- a/ompi/mpi/fortran/mpif-h/cart_coords_f.c +++ b/ompi/mpi/fortran/mpif-h/cart_coords_f.c @@ -5,15 +5,17 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2011-2012 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -21,7 +23,8 @@ #include "ompi/mpi/fortran/mpif-h/bindings.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILE_LAYER +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak PMPI_CART_COORDS = ompi_cart_coords_f #pragma weak pmpi_cart_coords = ompi_cart_coords_f #pragma weak pmpi_cart_coords_ = ompi_cart_coords_f @@ -29,7 +32,7 @@ #pragma weak PMPI_Cart_coords_f = ompi_cart_coords_f #pragma weak PMPI_Cart_coords_f08 = ompi_cart_coords_f -#elif OMPI_PROFILE_LAYER +#else OMPI_GENERATE_F77_BINDINGS (PMPI_CART_COORDS, pmpi_cart_coords, pmpi_cart_coords_, @@ -38,6 +41,7 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_CART_COORDS, (MPI_Fint *comm, MPI_Fint *rank, MPI_Fint *maxdims, MPI_Fint *coords, MPI_Fint *ierr), (comm, rank, maxdims, coords, ierr) ) #endif +#endif #if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_CART_COORDS = ompi_cart_coords_f @@ -47,9 +51,8 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_CART_COORDS, #pragma weak MPI_Cart_coords_f = ompi_cart_coords_f #pragma weak MPI_Cart_coords_f08 = ompi_cart_coords_f -#endif - -#if ! OPAL_HAVE_WEAK_SYMBOLS && ! OMPI_PROFILE_LAYER +#else +#if ! OMPI_BUILD_MPI_PROFILING OMPI_GENERATE_F77_BINDINGS (MPI_CART_COORDS, mpi_cart_coords, mpi_cart_coords_, @@ -57,13 +60,12 @@ OMPI_GENERATE_F77_BINDINGS (MPI_CART_COORDS, ompi_cart_coords_f, (MPI_Fint *comm, MPI_Fint *rank, MPI_Fint *maxdims, MPI_Fint *coords, MPI_Fint *ierr), (comm, rank, maxdims, coords, ierr) ) +#else +#define ompi_cart_coords_f pompi_cart_coords_f #endif - - -#if OMPI_PROFILE_LAYER && ! OPAL_HAVE_WEAK_SYMBOLS -#include "ompi/mpi/fortran/mpif-h/profile/defines.h" #endif + void ompi_cart_coords_f(MPI_Fint *comm, MPI_Fint *rank, MPI_Fint *maxdims, MPI_Fint *coords, MPI_Fint *ierr) { @@ -71,10 +73,10 @@ void ompi_cart_coords_f(MPI_Fint *comm, MPI_Fint *rank, MPI_Fint *maxdims, MPI_Comm c_comm; OMPI_ARRAY_NAME_DECL(coords); - c_comm = MPI_Comm_f2c(*comm); + c_comm = PMPI_Comm_f2c(*comm); OMPI_ARRAY_FINT_2_INT_ALLOC(coords, OMPI_FINT_2_INT(*maxdims)); - c_ierr = MPI_Cart_coords(c_comm, + c_ierr = PMPI_Cart_coords(c_comm, OMPI_FINT_2_INT(*rank), OMPI_FINT_2_INT(*maxdims), OMPI_ARRAY_NAME_CONVERT(coords)); diff --git a/ompi/mpi/fortran/mpif-h/cart_create_f.c b/ompi/mpi/fortran/mpif-h/cart_create_f.c index 6a85f86e3ca..d1bafcc0b62 100644 --- a/ompi/mpi/fortran/mpif-h/cart_create_f.c +++ b/ompi/mpi/fortran/mpif-h/cart_create_f.c @@ -5,15 +5,17 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2007 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2007 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2011-2012 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -21,7 +23,8 @@ #include "ompi/mpi/fortran/mpif-h/bindings.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILE_LAYER +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak PMPI_CART_CREATE = ompi_cart_create_f #pragma weak pmpi_cart_create = ompi_cart_create_f #pragma weak pmpi_cart_create_ = ompi_cart_create_f @@ -29,7 +32,7 @@ #pragma weak PMPI_Cart_create_f = ompi_cart_create_f #pragma weak PMPI_Cart_create_f08 = ompi_cart_create_f -#elif OMPI_PROFILE_LAYER +#else OMPI_GENERATE_F77_BINDINGS (PMPI_CART_CREATE, pmpi_cart_create, pmpi_cart_create_, @@ -38,6 +41,7 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_CART_CREATE, (MPI_Fint *old_comm, MPI_Fint *ndims, MPI_Fint *dims, ompi_fortran_logical_t *periods, ompi_fortran_logical_t *reorder, MPI_Fint *comm_cart, MPI_Fint *ierr), (old_comm, ndims, dims, periods, reorder, comm_cart, ierr) ) #endif +#endif #if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_CART_CREATE = ompi_cart_create_f @@ -47,9 +51,8 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_CART_CREATE, #pragma weak MPI_Cart_create_f = ompi_cart_create_f #pragma weak MPI_Cart_create_f08 = ompi_cart_create_f -#endif - -#if ! OPAL_HAVE_WEAK_SYMBOLS && ! OMPI_PROFILE_LAYER +#else +#if ! OMPI_BUILD_MPI_PROFILING OMPI_GENERATE_F77_BINDINGS (MPI_CART_CREATE, mpi_cart_create, mpi_cart_create_, @@ -57,13 +60,12 @@ OMPI_GENERATE_F77_BINDINGS (MPI_CART_CREATE, ompi_cart_create_f, (MPI_Fint *old_comm, MPI_Fint *ndims, MPI_Fint *dims, ompi_fortran_logical_t *periods, ompi_fortran_logical_t *reorder, MPI_Fint *comm_cart, MPI_Fint *ierr), (old_comm, ndims, dims, periods, reorder, comm_cart, ierr) ) +#else +#define ompi_cart_create_f pompi_cart_create_f #endif - - -#if OMPI_PROFILE_LAYER && ! OPAL_HAVE_WEAK_SYMBOLS -#include "ompi/mpi/fortran/mpif-h/profile/defines.h" #endif + void ompi_cart_create_f(MPI_Fint *old_comm, MPI_Fint *ndims, MPI_Fint *dims, ompi_fortran_logical_t *periods, ompi_fortran_logical_t *reorder, MPI_Fint *comm_cart, MPI_Fint *ierr) @@ -73,13 +75,13 @@ void ompi_cart_create_f(MPI_Fint *old_comm, MPI_Fint *ndims, MPI_Fint *dims, OMPI_ARRAY_NAME_DECL(dims); OMPI_LOGICAL_ARRAY_NAME_DECL(periods); - c_comm1 = MPI_Comm_f2c(*old_comm); + c_comm1 = PMPI_Comm_f2c(*old_comm); size = OMPI_FINT_2_INT(*ndims); OMPI_ARRAY_FINT_2_INT(dims, size); OMPI_ARRAY_LOGICAL_2_INT(periods, size); - c_ierr = MPI_Cart_create(c_comm1, size, + c_ierr = PMPI_Cart_create(c_comm1, size, OMPI_ARRAY_NAME_CONVERT(dims), OMPI_LOGICAL_ARRAY_NAME_CONVERT(periods), OMPI_LOGICAL_2_INT(*reorder), @@ -87,7 +89,7 @@ void ompi_cart_create_f(MPI_Fint *old_comm, MPI_Fint *ndims, MPI_Fint *dims, if (NULL != ierr) *ierr = OMPI_INT_2_FINT(c_ierr); if (MPI_SUCCESS == c_ierr) { - *comm_cart = MPI_Comm_c2f(c_comm2); + *comm_cart = PMPI_Comm_c2f(c_comm2); } /* diff --git a/ompi/mpi/fortran/mpif-h/cart_get_f.c b/ompi/mpi/fortran/mpif-h/cart_get_f.c index 3759b0361ca..4517c12e038 100644 --- a/ompi/mpi/fortran/mpif-h/cart_get_f.c +++ b/ompi/mpi/fortran/mpif-h/cart_get_f.c @@ -5,15 +5,17 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2011-2012 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -21,7 +23,8 @@ #include "ompi/mpi/fortran/mpif-h/bindings.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILE_LAYER +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak PMPI_CART_GET = ompi_cart_get_f #pragma weak pmpi_cart_get = ompi_cart_get_f #pragma weak pmpi_cart_get_ = ompi_cart_get_f @@ -29,7 +32,7 @@ #pragma weak PMPI_Cart_get_f = ompi_cart_get_f #pragma weak PMPI_Cart_get_f08 = ompi_cart_get_f -#elif OMPI_PROFILE_LAYER +#else OMPI_GENERATE_F77_BINDINGS (PMPI_CART_GET, pmpi_cart_get, pmpi_cart_get_, @@ -38,6 +41,7 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_CART_GET, (MPI_Fint *comm, MPI_Fint *maxdims, MPI_Fint *dims, ompi_fortran_logical_t *periods, MPI_Fint *coords, MPI_Fint *ierr), (comm, maxdims, dims, periods, coords, ierr) ) #endif +#endif #if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_CART_GET = ompi_cart_get_f @@ -47,9 +51,8 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_CART_GET, #pragma weak MPI_Cart_get_f = ompi_cart_get_f #pragma weak MPI_Cart_get_f08 = ompi_cart_get_f -#endif - -#if ! OPAL_HAVE_WEAK_SYMBOLS && ! OMPI_PROFILE_LAYER +#else +#if ! OMPI_BUILD_MPI_PROFILING OMPI_GENERATE_F77_BINDINGS (MPI_CART_GET, mpi_cart_get, mpi_cart_get_, @@ -57,13 +60,12 @@ OMPI_GENERATE_F77_BINDINGS (MPI_CART_GET, ompi_cart_get_f, (MPI_Fint *comm, MPI_Fint *maxdims, MPI_Fint *dims, ompi_fortran_logical_t *periods, MPI_Fint *coords, MPI_Fint *ierr), (comm, maxdims, dims, periods, coords, ierr) ) +#else +#define ompi_cart_get_f pompi_cart_get_f #endif - - -#if OMPI_PROFILE_LAYER && ! OPAL_HAVE_WEAK_SYMBOLS -#include "ompi/mpi/fortran/mpif-h/profile/defines.h" #endif + void ompi_cart_get_f(MPI_Fint *comm, MPI_Fint *maxdims, MPI_Fint *dims, ompi_fortran_logical_t *periods, MPI_Fint *coords, MPI_Fint *ierr) { @@ -73,15 +75,15 @@ void ompi_cart_get_f(MPI_Fint *comm, MPI_Fint *maxdims, MPI_Fint *dims, OMPI_ARRAY_NAME_DECL(coords); OMPI_LOGICAL_ARRAY_NAME_DECL(periods); - c_comm = MPI_Comm_f2c(*comm); + c_comm = PMPI_Comm_f2c(*comm); size = OMPI_FINT_2_INT(*maxdims); OMPI_ARRAY_FINT_2_INT_ALLOC(dims, size); OMPI_ARRAY_FINT_2_INT_ALLOC(coords, size); OMPI_ARRAY_LOGICAL_2_INT_ALLOC(periods, size); - c_ierr = MPI_Cart_get(c_comm, - size, + c_ierr = PMPI_Cart_get(c_comm, + size, OMPI_ARRAY_NAME_CONVERT(dims), OMPI_LOGICAL_ARRAY_NAME_CONVERT(periods), OMPI_ARRAY_NAME_CONVERT(coords)); diff --git a/ompi/mpi/fortran/mpif-h/cart_map_f.c b/ompi/mpi/fortran/mpif-h/cart_map_f.c index 1777c8c6213..3ad10c341e8 100644 --- a/ompi/mpi/fortran/mpif-h/cart_map_f.c +++ b/ompi/mpi/fortran/mpif-h/cart_map_f.c @@ -5,15 +5,17 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2011-2012 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -21,7 +23,8 @@ #include "ompi/mpi/fortran/mpif-h/bindings.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILE_LAYER +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak PMPI_CART_MAP = ompi_cart_map_f #pragma weak pmpi_cart_map = ompi_cart_map_f #pragma weak pmpi_cart_map_ = ompi_cart_map_f @@ -29,7 +32,7 @@ #pragma weak PMPI_Cart_map_f = ompi_cart_map_f #pragma weak PMPI_Cart_map_f08 = ompi_cart_map_f -#elif OMPI_PROFILE_LAYER +#else OMPI_GENERATE_F77_BINDINGS (PMPI_CART_MAP, pmpi_cart_map, pmpi_cart_map_, @@ -38,6 +41,7 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_CART_MAP, (MPI_Fint *comm, MPI_Fint *ndims, MPI_Fint *dims, ompi_fortran_logical_t *periods, MPI_Fint *newrank, MPI_Fint *ierr), (comm, ndims, dims, periods, newrank, ierr) ) #endif +#endif #if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_CART_MAP = ompi_cart_map_f @@ -47,9 +51,8 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_CART_MAP, #pragma weak MPI_Cart_map_f = ompi_cart_map_f #pragma weak MPI_Cart_map_f08 = ompi_cart_map_f -#endif - -#if ! OPAL_HAVE_WEAK_SYMBOLS && ! OMPI_PROFILE_LAYER +#else +#if ! OMPI_BUILD_MPI_PROFILING OMPI_GENERATE_F77_BINDINGS (MPI_CART_MAP, mpi_cart_map, mpi_cart_map_, @@ -57,13 +60,12 @@ OMPI_GENERATE_F77_BINDINGS (MPI_CART_MAP, ompi_cart_map_f, (MPI_Fint *comm, MPI_Fint *ndims, MPI_Fint *dims, ompi_fortran_logical_t *periods, MPI_Fint *newrank, MPI_Fint *ierr), (comm, ndims, dims, periods, newrank, ierr) ) +#else +#define ompi_cart_map_f pompi_cart_map_f #endif - - -#if OMPI_PROFILE_LAYER && ! OPAL_HAVE_WEAK_SYMBOLS -#include "ompi/mpi/fortran/mpif-h/profile/defines.h" #endif + void ompi_cart_map_f(MPI_Fint *comm, MPI_Fint *ndims, MPI_Fint *dims, ompi_fortran_logical_t *periods, MPI_Fint *newrank, MPI_Fint *ierr) { @@ -73,13 +75,13 @@ void ompi_cart_map_f(MPI_Fint *comm, MPI_Fint *ndims, MPI_Fint *dims, OMPI_LOGICAL_ARRAY_NAME_DECL(periods); OMPI_SINGLE_NAME_DECL(newrank); - c_comm = MPI_Comm_f2c(*comm); + c_comm = PMPI_Comm_f2c(*comm); size = OMPI_FINT_2_INT(*ndims); OMPI_ARRAY_FINT_2_INT(dims, size); OMPI_ARRAY_LOGICAL_2_INT(periods, size); - c_ierr = MPI_Cart_map(c_comm, + c_ierr = PMPI_Cart_map(c_comm, size, OMPI_ARRAY_NAME_CONVERT(dims), OMPI_LOGICAL_ARRAY_NAME_CONVERT(periods), diff --git a/ompi/mpi/fortran/mpif-h/cart_rank_f.c b/ompi/mpi/fortran/mpif-h/cart_rank_f.c index c66865a5c13..e6375db0029 100644 --- a/ompi/mpi/fortran/mpif-h/cart_rank_f.c +++ b/ompi/mpi/fortran/mpif-h/cart_rank_f.c @@ -5,15 +5,17 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2011-2015 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -21,7 +23,8 @@ #include "ompi/mpi/fortran/mpif-h/bindings.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILE_LAYER +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak PMPI_CART_RANK = ompi_cart_rank_f #pragma weak pmpi_cart_rank = ompi_cart_rank_f #pragma weak pmpi_cart_rank_ = ompi_cart_rank_f @@ -29,7 +32,7 @@ #pragma weak PMPI_Cart_rank_f = ompi_cart_rank_f #pragma weak PMPI_Cart_rank_f08 = ompi_cart_rank_f -#elif OMPI_PROFILE_LAYER +#else OMPI_GENERATE_F77_BINDINGS (PMPI_CART_RANK, pmpi_cart_rank, pmpi_cart_rank_, @@ -38,6 +41,7 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_CART_RANK, (MPI_Fint *comm, MPI_Fint *coords, MPI_Fint *rank, MPI_Fint *ierr), (comm, coords, rank, ierr) ) #endif +#endif #if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_CART_RANK = ompi_cart_rank_f @@ -47,9 +51,8 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_CART_RANK, #pragma weak MPI_Cart_rank_f = ompi_cart_rank_f #pragma weak MPI_Cart_rank_f08 = ompi_cart_rank_f -#endif - -#if ! OPAL_HAVE_WEAK_SYMBOLS && ! OMPI_PROFILE_LAYER +#else +#if ! OMPI_BUILD_MPI_PROFILING OMPI_GENERATE_F77_BINDINGS (MPI_CART_RANK, mpi_cart_rank, mpi_cart_rank_, @@ -57,13 +60,12 @@ OMPI_GENERATE_F77_BINDINGS (MPI_CART_RANK, ompi_cart_rank_f, (MPI_Fint *comm, MPI_Fint *coords, MPI_Fint *rank, MPI_Fint *ierr), (comm, coords, rank, ierr) ) +#else +#define ompi_cart_rank_f pompi_cart_rank_f #endif - - -#if OMPI_PROFILE_LAYER && ! OPAL_HAVE_WEAK_SYMBOLS -#include "ompi/mpi/fortran/mpif-h/profile/defines.h" #endif + void ompi_cart_rank_f(MPI_Fint *comm, MPI_Fint *coords, MPI_Fint *rank, MPI_Fint *ierr) { @@ -72,16 +74,16 @@ void ompi_cart_rank_f(MPI_Fint *comm, MPI_Fint *coords, MPI_Fint *rank, OMPI_ARRAY_NAME_DECL(coords); OMPI_SINGLE_NAME_DECL(rank); - c_comm = MPI_Comm_f2c(*comm); + c_comm = PMPI_Comm_f2c(*comm); - c_ierr = MPI_Cartdim_get(c_comm, &ndims); + c_ierr = PMPI_Cartdim_get(c_comm, &ndims); if (MPI_SUCCESS != c_ierr) { if (NULL != ierr) *ierr = OMPI_INT_2_FINT(c_ierr); return; } OMPI_ARRAY_FINT_2_INT(coords, ndims); - c_ierr = MPI_Cart_rank(c_comm, + c_ierr = PMPI_Cart_rank(c_comm, OMPI_ARRAY_NAME_CONVERT(coords), OMPI_SINGLE_NAME_CONVERT(rank)); if (NULL != ierr) *ierr = OMPI_INT_2_FINT(c_ierr); diff --git a/ompi/mpi/fortran/mpif-h/cart_shift_f.c b/ompi/mpi/fortran/mpif-h/cart_shift_f.c index 31252e1aa02..2b29078b76c 100644 --- a/ompi/mpi/fortran/mpif-h/cart_shift_f.c +++ b/ompi/mpi/fortran/mpif-h/cart_shift_f.c @@ -5,15 +5,17 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2011-2012 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -21,7 +23,8 @@ #include "ompi/mpi/fortran/mpif-h/bindings.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILE_LAYER +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak PMPI_CART_SHIFT = ompi_cart_shift_f #pragma weak pmpi_cart_shift = ompi_cart_shift_f #pragma weak pmpi_cart_shift_ = ompi_cart_shift_f @@ -29,7 +32,7 @@ #pragma weak PMPI_Cart_shift_f = ompi_cart_shift_f #pragma weak PMPI_Cart_shift_f08 = ompi_cart_shift_f -#elif OMPI_PROFILE_LAYER +#else OMPI_GENERATE_F77_BINDINGS (PMPI_CART_SHIFT, pmpi_cart_shift, pmpi_cart_shift_, @@ -38,6 +41,7 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_CART_SHIFT, (MPI_Fint *comm, MPI_Fint *direction, MPI_Fint *disp, MPI_Fint *rank_source, MPI_Fint *rank_dest, MPI_Fint *ierr), (comm, direction, disp, rank_source, rank_dest, ierr) ) #endif +#endif #if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_CART_SHIFT = ompi_cart_shift_f @@ -47,9 +51,8 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_CART_SHIFT, #pragma weak MPI_Cart_shift_f = ompi_cart_shift_f #pragma weak MPI_Cart_shift_f08 = ompi_cart_shift_f -#endif - -#if ! OPAL_HAVE_WEAK_SYMBOLS && ! OMPI_PROFILE_LAYER +#else +#if ! OMPI_BUILD_MPI_PROFILING OMPI_GENERATE_F77_BINDINGS (MPI_CART_SHIFT, mpi_cart_shift, mpi_cart_shift_, @@ -57,13 +60,12 @@ OMPI_GENERATE_F77_BINDINGS (MPI_CART_SHIFT, ompi_cart_shift_f, (MPI_Fint *comm, MPI_Fint *direction, MPI_Fint *disp, MPI_Fint *rank_source, MPI_Fint *rank_dest, MPI_Fint *ierr), (comm, direction, disp, rank_source, rank_dest, ierr) ) +#else +#define ompi_cart_shift_f pompi_cart_shift_f #endif - - -#if OMPI_PROFILE_LAYER && ! OPAL_HAVE_WEAK_SYMBOLS -#include "ompi/mpi/fortran/mpif-h/profile/defines.h" #endif + void ompi_cart_shift_f(MPI_Fint *comm, MPI_Fint *direction, MPI_Fint *disp, MPI_Fint *rank_source, MPI_Fint *rank_dest, MPI_Fint *ierr) @@ -73,9 +75,9 @@ void ompi_cart_shift_f(MPI_Fint *comm, MPI_Fint *direction, MPI_Fint *disp, OMPI_SINGLE_NAME_DECL(rank_source); OMPI_SINGLE_NAME_DECL(rank_dest); - c_comm = MPI_Comm_f2c(*comm); + c_comm = PMPI_Comm_f2c(*comm); - c_ierr = MPI_Cart_shift(c_comm, + c_ierr = PMPI_Cart_shift(c_comm, OMPI_FINT_2_INT(*direction), OMPI_FINT_2_INT(*disp), OMPI_SINGLE_NAME_CONVERT(rank_source), diff --git a/ompi/mpi/fortran/mpif-h/cart_sub_f.c b/ompi/mpi/fortran/mpif-h/cart_sub_f.c index 1cd3e7b175b..be51488e30e 100644 --- a/ompi/mpi/fortran/mpif-h/cart_sub_f.c +++ b/ompi/mpi/fortran/mpif-h/cart_sub_f.c @@ -10,6 +10,8 @@ * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2011-2012 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -21,7 +23,8 @@ #include "ompi/mpi/fortran/mpif-h/bindings.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILE_LAYER +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak PMPI_CART_SUB = ompi_cart_sub_f #pragma weak pmpi_cart_sub = ompi_cart_sub_f #pragma weak pmpi_cart_sub_ = ompi_cart_sub_f @@ -29,7 +32,7 @@ #pragma weak PMPI_Cart_sub_f = ompi_cart_sub_f #pragma weak PMPI_Cart_sub_f08 = ompi_cart_sub_f -#elif OMPI_PROFILE_LAYER +#else OMPI_GENERATE_F77_BINDINGS (PMPI_CART_SUB, pmpi_cart_sub, pmpi_cart_sub_, @@ -38,6 +41,7 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_CART_SUB, (MPI_Fint *comm, ompi_fortran_logical_t *remain_dims, MPI_Fint *new_comm, MPI_Fint *ierr), (comm, remain_dims, new_comm, ierr) ) #endif +#endif #if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_CART_SUB = ompi_cart_sub_f @@ -47,9 +51,8 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_CART_SUB, #pragma weak MPI_Cart_sub_f = ompi_cart_sub_f #pragma weak MPI_Cart_sub_f08 = ompi_cart_sub_f -#endif - -#if ! OPAL_HAVE_WEAK_SYMBOLS && ! OMPI_PROFILE_LAYER +#else +#if ! OMPI_BUILD_MPI_PROFILING OMPI_GENERATE_F77_BINDINGS (MPI_CART_SUB, mpi_cart_sub, mpi_cart_sub_, @@ -57,13 +60,12 @@ OMPI_GENERATE_F77_BINDINGS (MPI_CART_SUB, ompi_cart_sub_f, (MPI_Fint *comm, ompi_fortran_logical_t *remain_dims, MPI_Fint *new_comm, MPI_Fint *ierr), (comm, remain_dims, new_comm, ierr) ) +#else +#define ompi_cart_sub_f pompi_cart_sub_f #endif - - -#if OMPI_PROFILE_LAYER && ! OPAL_HAVE_WEAK_SYMBOLS -#include "ompi/mpi/fortran/mpif-h/profile/defines.h" #endif + void ompi_cart_sub_f(MPI_Fint *comm, ompi_fortran_logical_t *remain_dims, MPI_Fint *new_comm, MPI_Fint *ierr) { @@ -79,8 +81,8 @@ void ompi_cart_sub_f(MPI_Fint *comm, ompi_fortran_logical_t *remain_dims, #endif OMPI_LOGICAL_ARRAY_NAME_DECL(remain_dims); - c_comm = MPI_Comm_f2c(*comm); - c_new_comm = MPI_Comm_f2c(*new_comm); + c_comm = PMPI_Comm_f2c(*comm); + c_new_comm = PMPI_Comm_f2c(*new_comm); #if OMPI_FORTRAN_MUST_CONVERT_LOGICAL_2_INT == 1 *ierr = OMPI_INT_2_FINT(MPI_Cartdim_get(c_comm, &ndims)); @@ -90,13 +92,13 @@ void ompi_cart_sub_f(MPI_Fint *comm, ompi_fortran_logical_t *remain_dims, #endif OMPI_ARRAY_LOGICAL_2_INT(remain_dims, ndims); - c_ierr = MPI_Cart_sub(c_comm, + c_ierr = PMPI_Cart_sub(c_comm, OMPI_LOGICAL_ARRAY_NAME_CONVERT(remain_dims), &c_new_comm); if (NULL != ierr) *ierr = OMPI_INT_2_FINT(c_ierr); if (MPI_SUCCESS == c_ierr) { - *new_comm = MPI_Comm_c2f(c_new_comm); + *new_comm = PMPI_Comm_c2f(c_new_comm); } OMPI_ARRAY_INT_2_LOGICAL(remain_dims, ndims); diff --git a/ompi/mpi/fortran/mpif-h/cartdim_get_f.c b/ompi/mpi/fortran/mpif-h/cartdim_get_f.c index 28c706ddde1..d4766d0d3b7 100644 --- a/ompi/mpi/fortran/mpif-h/cartdim_get_f.c +++ b/ompi/mpi/fortran/mpif-h/cartdim_get_f.c @@ -5,15 +5,17 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2011-2012 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -21,7 +23,8 @@ #include "ompi/mpi/fortran/mpif-h/bindings.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILE_LAYER +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak PMPI_CARTDIM_GET = ompi_cartdim_get_f #pragma weak pmpi_cartdim_get = ompi_cartdim_get_f #pragma weak pmpi_cartdim_get_ = ompi_cartdim_get_f @@ -29,7 +32,7 @@ #pragma weak PMPI_Cartdim_get_f = ompi_cartdim_get_f #pragma weak PMPI_Cartdim_get_f08 = ompi_cartdim_get_f -#elif OMPI_PROFILE_LAYER +#else OMPI_GENERATE_F77_BINDINGS (PMPI_CARTDIM_GET, pmpi_cartdim_get, pmpi_cartdim_get_, @@ -38,6 +41,7 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_CARTDIM_GET, (MPI_Fint *comm, MPI_Fint *ndims, MPI_Fint *ierr), (comm, ndims, ierr) ) #endif +#endif #if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_CARTDIM_GET = ompi_cartdim_get_f @@ -47,9 +51,8 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_CARTDIM_GET, #pragma weak MPI_Cartdim_get_f = ompi_cartdim_get_f #pragma weak MPI_Cartdim_get_f08 = ompi_cartdim_get_f -#endif - -#if ! OPAL_HAVE_WEAK_SYMBOLS && ! OMPI_PROFILE_LAYER +#else +#if ! OMPI_BUILD_MPI_PROFILING OMPI_GENERATE_F77_BINDINGS (MPI_CARTDIM_GET, mpi_cartdim_get, mpi_cartdim_get_, @@ -57,22 +60,21 @@ OMPI_GENERATE_F77_BINDINGS (MPI_CARTDIM_GET, ompi_cartdim_get_f, (MPI_Fint *comm, MPI_Fint *ndims, MPI_Fint *ierr), (comm, ndims, ierr) ) +#else +#define ompi_cartdim_get_f pompi_cartdim_get_f #endif - - -#if OMPI_PROFILE_LAYER && ! OPAL_HAVE_WEAK_SYMBOLS -#include "ompi/mpi/fortran/mpif-h/profile/defines.h" #endif + void ompi_cartdim_get_f(MPI_Fint *comm, MPI_Fint *ndims, MPI_Fint *ierr) { int c_ierr; MPI_Comm c_comm; OMPI_SINGLE_NAME_DECL(ndims); - - c_comm = MPI_Comm_f2c(*comm); - c_ierr = MPI_Cartdim_get(c_comm, OMPI_SINGLE_NAME_CONVERT(ndims)); + c_comm = PMPI_Comm_f2c(*comm); + + c_ierr = PMPI_Cartdim_get(c_comm, OMPI_SINGLE_NAME_CONVERT(ndims)); if (NULL != ierr) *ierr = OMPI_INT_2_FINT(c_ierr); if (MPI_SUCCESS == c_ierr) { diff --git a/ompi/mpi/fortran/mpif-h/close_port_f.c b/ompi/mpi/fortran/mpif-h/close_port_f.c index e2098f56f97..eaf95750e55 100644 --- a/ompi/mpi/fortran/mpif-h/close_port_f.c +++ b/ompi/mpi/fortran/mpif-h/close_port_f.c @@ -5,15 +5,17 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2011-2012 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -22,7 +24,8 @@ #include "ompi/mpi/fortran/mpif-h/bindings.h" #include "ompi/mpi/fortran/base/strings.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILE_LAYER +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak PMPI_CLOSE_PORT = ompi_close_port_f #pragma weak pmpi_close_port = ompi_close_port_f #pragma weak pmpi_close_port_ = ompi_close_port_f @@ -30,7 +33,7 @@ #pragma weak PMPI_Close_port_f = ompi_close_port_f #pragma weak PMPI_Close_port_f08 = ompi_close_port_f -#elif OMPI_PROFILE_LAYER +#else OMPI_GENERATE_F77_BINDINGS (PMPI_CLOSE_PORT, pmpi_close_port, pmpi_close_port_, @@ -39,6 +42,7 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_CLOSE_PORT, (char *port_name, MPI_Fint *ierr, int port_name_len), (port_name, ierr, port_name_len) ) #endif +#endif #if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_CLOSE_PORT = ompi_close_port_f @@ -48,9 +52,8 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_CLOSE_PORT, #pragma weak MPI_Close_port_f = ompi_close_port_f #pragma weak MPI_Close_port_f08 = ompi_close_port_f -#endif - -#if ! OPAL_HAVE_WEAK_SYMBOLS && ! OMPI_PROFILE_LAYER +#else +#if ! OMPI_BUILD_MPI_PROFILING OMPI_GENERATE_F77_BINDINGS (MPI_CLOSE_PORT, mpi_close_port, mpi_close_port_, @@ -58,20 +61,19 @@ OMPI_GENERATE_F77_BINDINGS (MPI_CLOSE_PORT, ompi_close_port_f, (char *port_name, MPI_Fint *ierr, int port_name_len), (port_name, ierr, port_name_len) ) +#else +#define ompi_close_port_f pompi_close_port_f #endif - - -#if OMPI_PROFILE_LAYER && ! OPAL_HAVE_WEAK_SYMBOLS -#include "ompi/mpi/fortran/mpif-h/profile/defines.h" #endif + void ompi_close_port_f(char *port_name, MPI_Fint *ierr, int port_name_len) { int c_ierr; char *c_port_name; ompi_fortran_string_f2c(port_name, port_name_len, &c_port_name); - c_ierr = MPI_Close_port(c_port_name); + c_ierr = PMPI_Close_port(c_port_name); if (NULL != ierr) *ierr = OMPI_INT_2_FINT(c_ierr); free ( c_port_name); diff --git a/ompi/mpi/fortran/mpif-h/comm_accept_f.c b/ompi/mpi/fortran/mpif-h/comm_accept_f.c index f9161f4f27a..257e2c3062b 100644 --- a/ompi/mpi/fortran/mpif-h/comm_accept_f.c +++ b/ompi/mpi/fortran/mpif-h/comm_accept_f.c @@ -5,15 +5,17 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2011-2012 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -22,7 +24,8 @@ #include "ompi/mpi/fortran/mpif-h/bindings.h" #include "ompi/mpi/fortran/base/strings.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILE_LAYER +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak PMPI_COMM_ACCEPT = ompi_comm_accept_f #pragma weak pmpi_comm_accept = ompi_comm_accept_f #pragma weak pmpi_comm_accept_ = ompi_comm_accept_f @@ -30,7 +33,7 @@ #pragma weak PMPI_Comm_accept_f = ompi_comm_accept_f #pragma weak PMPI_Comm_accept_f08 = ompi_comm_accept_f -#elif OMPI_PROFILE_LAYER +#else OMPI_GENERATE_F77_BINDINGS (PMPI_COMM_ACCEPT, pmpi_comm_accept, pmpi_comm_accept_, @@ -39,6 +42,7 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_COMM_ACCEPT, (char *port_name, MPI_Fint *info, MPI_Fint *root, MPI_Fint *comm, MPI_Fint *newcomm, MPI_Fint *ierr, int port_name_len), (port_name, info, root, comm, newcomm, ierr, port_name_len) ) #endif +#endif #if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_COMM_ACCEPT = ompi_comm_accept_f @@ -48,9 +52,8 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_COMM_ACCEPT, #pragma weak MPI_Comm_accept_f = ompi_comm_accept_f #pragma weak MPI_Comm_accept_f08 = ompi_comm_accept_f -#endif - -#if ! OPAL_HAVE_WEAK_SYMBOLS && ! OMPI_PROFILE_LAYER +#else +#if ! OMPI_BUILD_MPI_PROFILING OMPI_GENERATE_F77_BINDINGS (MPI_COMM_ACCEPT, mpi_comm_accept, mpi_comm_accept_, @@ -58,15 +61,14 @@ OMPI_GENERATE_F77_BINDINGS (MPI_COMM_ACCEPT, ompi_comm_accept_f, (char *port_name, MPI_Fint *info, MPI_Fint *root, MPI_Fint *comm, MPI_Fint *newcomm, MPI_Fint *ierr, int port_name_len), (port_name, info, root, comm, newcomm, ierr, port_name_len) ) +#else +#define ompi_comm_accept_f pompi_comm_accept_f #endif - - -#if OMPI_PROFILE_LAYER && ! OPAL_HAVE_WEAK_SYMBOLS -#include "ompi/mpi/fortran/mpif-h/profile/defines.h" #endif -void ompi_comm_accept_f(char *port_name, MPI_Fint *info, MPI_Fint *root, - MPI_Fint *comm, MPI_Fint *newcomm, MPI_Fint *ierr, + +void ompi_comm_accept_f(char *port_name, MPI_Fint *info, MPI_Fint *root, + MPI_Fint *comm, MPI_Fint *newcomm, MPI_Fint *ierr, int port_name_len) { int c_ierr; @@ -74,18 +76,18 @@ void ompi_comm_accept_f(char *port_name, MPI_Fint *info, MPI_Fint *root, MPI_Info c_info; char *c_port_name; - c_comm = MPI_Comm_f2c(*comm); - c_info = MPI_Info_f2c(*info); + c_comm = PMPI_Comm_f2c(*comm); + c_info = PMPI_Info_f2c(*info); ompi_fortran_string_f2c(port_name, port_name_len, &c_port_name); - c_ierr = MPI_Comm_accept(c_port_name, c_info, - OMPI_FINT_2_INT(*root), + c_ierr = PMPI_Comm_accept(c_port_name, c_info, + OMPI_FINT_2_INT(*root), c_comm, &c_new_comm); if (NULL != ierr) *ierr = OMPI_INT_2_FINT(c_ierr); if (MPI_SUCCESS == c_ierr) { - *newcomm = MPI_Comm_c2f(c_new_comm); + *newcomm = PMPI_Comm_c2f(c_new_comm); } free ( c_port_name ); } diff --git a/ompi/mpi/fortran/mpif-h/comm_call_errhandler_f.c b/ompi/mpi/fortran/mpif-h/comm_call_errhandler_f.c index 32afd5e760b..a5d35ae4d34 100644 --- a/ompi/mpi/fortran/mpif-h/comm_call_errhandler_f.c +++ b/ompi/mpi/fortran/mpif-h/comm_call_errhandler_f.c @@ -5,15 +5,17 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2011-2012 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -21,7 +23,8 @@ #include "ompi/mpi/fortran/mpif-h/bindings.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILE_LAYER +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak PMPI_COMM_CALL_ERRHANDLER = ompi_comm_call_errhandler_f #pragma weak pmpi_comm_call_errhandler = ompi_comm_call_errhandler_f #pragma weak pmpi_comm_call_errhandler_ = ompi_comm_call_errhandler_f @@ -29,7 +32,7 @@ #pragma weak PMPI_Comm_call_errhandler_f = ompi_comm_call_errhandler_f #pragma weak PMPI_Comm_call_errhandler_f08 = ompi_comm_call_errhandler_f -#elif OMPI_PROFILE_LAYER +#else OMPI_GENERATE_F77_BINDINGS (PMPI_COMM_CALL_ERRHANDLER, pmpi_comm_call_errhandler, pmpi_comm_call_errhandler_, @@ -38,6 +41,7 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_COMM_CALL_ERRHANDLER, (MPI_Fint *comm, MPI_Fint *errorcode, MPI_Fint *ierr), (comm, errorcode, ierr) ) #endif +#endif #if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_COMM_CALL_ERRHANDLER = ompi_comm_call_errhandler_f @@ -47,9 +51,8 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_COMM_CALL_ERRHANDLER, #pragma weak MPI_Comm_call_errhandler_f = ompi_comm_call_errhandler_f #pragma weak MPI_Comm_call_errhandler_f08 = ompi_comm_call_errhandler_f -#endif - -#if ! OPAL_HAVE_WEAK_SYMBOLS && ! OMPI_PROFILE_LAYER +#else +#if ! OMPI_BUILD_MPI_PROFILING OMPI_GENERATE_F77_BINDINGS (MPI_COMM_CALL_ERRHANDLER, mpi_comm_call_errhandler, mpi_comm_call_errhandler_, @@ -57,21 +60,20 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_COMM_CALL_ERRHANDLER, ompi_comm_call_errhandler_f, (MPI_Fint *comm, MPI_Fint *errorcode, MPI_Fint *ierr), (comm, errorcode, ierr) ) +#else +#define ompi_comm_call_errhandler_f pompi_comm_call_errhandler_f #endif - - -#if OMPI_PROFILE_LAYER && ! OPAL_HAVE_WEAK_SYMBOLS -#include "ompi/mpi/fortran/mpif-h/profile/defines.h" #endif + void ompi_comm_call_errhandler_f(MPI_Fint *comm, MPI_Fint *errorcode, MPI_Fint *ierr) { int c_ierr; MPI_Comm c_comm; - c_comm = MPI_Comm_f2c(*comm); + c_comm = PMPI_Comm_f2c(*comm); - c_ierr = MPI_Comm_call_errhandler(c_comm, OMPI_FINT_2_INT(*errorcode)); + c_ierr = PMPI_Comm_call_errhandler(c_comm, OMPI_FINT_2_INT(*errorcode)); if (NULL != ierr) *ierr = OMPI_INT_2_FINT(c_ierr); } diff --git a/ompi/mpi/fortran/mpif-h/comm_compare_f.c b/ompi/mpi/fortran/mpif-h/comm_compare_f.c index 7efa46b8892..332e74ba8ac 100644 --- a/ompi/mpi/fortran/mpif-h/comm_compare_f.c +++ b/ompi/mpi/fortran/mpif-h/comm_compare_f.c @@ -5,15 +5,17 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2011-2012 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -21,7 +23,8 @@ #include "ompi/mpi/fortran/mpif-h/bindings.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILE_LAYER +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak PMPI_COMM_COMPARE = ompi_comm_compare_f #pragma weak pmpi_comm_compare = ompi_comm_compare_f #pragma weak pmpi_comm_compare_ = ompi_comm_compare_f @@ -29,7 +32,7 @@ #pragma weak PMPI_Comm_compare_f = ompi_comm_compare_f #pragma weak PMPI_Comm_compare_f08 = ompi_comm_compare_f -#elif OMPI_PROFILE_LAYER +#else OMPI_GENERATE_F77_BINDINGS (PMPI_COMM_COMPARE, pmpi_comm_compare, pmpi_comm_compare_, @@ -38,6 +41,7 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_COMM_COMPARE, (MPI_Fint *comm1, MPI_Fint *comm2, MPI_Fint *result, MPI_Fint *ierr), (comm1, comm2, result, ierr) ) #endif +#endif #if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_COMM_COMPARE = ompi_comm_compare_f @@ -47,9 +51,8 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_COMM_COMPARE, #pragma weak MPI_Comm_compare_f = ompi_comm_compare_f #pragma weak MPI_Comm_compare_f08 = ompi_comm_compare_f -#endif - -#if ! OPAL_HAVE_WEAK_SYMBOLS && ! OMPI_PROFILE_LAYER +#else +#if ! OMPI_BUILD_MPI_PROFILING OMPI_GENERATE_F77_BINDINGS (MPI_COMM_COMPARE, mpi_comm_compare, mpi_comm_compare_, @@ -57,21 +60,20 @@ OMPI_GENERATE_F77_BINDINGS (MPI_COMM_COMPARE, ompi_comm_compare_f, (MPI_Fint *comm1, MPI_Fint *comm2, MPI_Fint *result, MPI_Fint *ierr), (comm1, comm2, result, ierr) ) +#else +#define ompi_comm_compare_f pompi_comm_compare_f #endif - - -#if OMPI_PROFILE_LAYER && ! OPAL_HAVE_WEAK_SYMBOLS -#include "ompi/mpi/fortran/mpif-h/profile/defines.h" #endif + void ompi_comm_compare_f(MPI_Fint *comm1, MPI_Fint *comm2, MPI_Fint *result, MPI_Fint *ierr) { int c_ierr; - MPI_Comm c_comm1 = MPI_Comm_f2c(*comm1); - MPI_Comm c_comm2 = MPI_Comm_f2c(*comm2); + MPI_Comm c_comm1 = PMPI_Comm_f2c(*comm1); + MPI_Comm c_comm2 = PMPI_Comm_f2c(*comm2); OMPI_SINGLE_NAME_DECL(result); - c_ierr = MPI_Comm_compare(c_comm1, c_comm2, + c_ierr = PMPI_Comm_compare(c_comm1, c_comm2, OMPI_SINGLE_NAME_CONVERT(result)); if (NULL != ierr) *ierr = OMPI_INT_2_FINT(c_ierr); diff --git a/ompi/mpi/fortran/mpif-h/comm_connect_f.c b/ompi/mpi/fortran/mpif-h/comm_connect_f.c index c532bcb82e9..3acaaa62751 100644 --- a/ompi/mpi/fortran/mpif-h/comm_connect_f.c +++ b/ompi/mpi/fortran/mpif-h/comm_connect_f.c @@ -5,15 +5,17 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2011-2012 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -22,7 +24,8 @@ #include "ompi/mpi/fortran/mpif-h/bindings.h" #include "ompi/mpi/fortran/base/strings.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILE_LAYER +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak PMPI_COMM_CONNECT = ompi_comm_connect_f #pragma weak pmpi_comm_connect = ompi_comm_connect_f #pragma weak pmpi_comm_connect_ = ompi_comm_connect_f @@ -30,7 +33,7 @@ #pragma weak PMPI_Comm_connect_f = ompi_comm_connect_f #pragma weak PMPI_Comm_connect_f08 = ompi_comm_connect_f -#elif OMPI_PROFILE_LAYER +#else OMPI_GENERATE_F77_BINDINGS (PMPI_COMM_CONNECT, pmpi_comm_connect, pmpi_comm_connect_, @@ -39,6 +42,7 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_COMM_CONNECT, (char *port_name, MPI_Fint *info, MPI_Fint *root, MPI_Fint *comm, MPI_Fint *newcomm, MPI_Fint *ierr, int port_name_len), (port_name, info, root, comm, newcomm, ierr, port_name_len) ) #endif +#endif #if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_COMM_CONNECT = ompi_comm_connect_f @@ -48,9 +52,8 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_COMM_CONNECT, #pragma weak MPI_Comm_connect_f = ompi_comm_connect_f #pragma weak MPI_Comm_connect_f08 = ompi_comm_connect_f -#endif - -#if ! OPAL_HAVE_WEAK_SYMBOLS && ! OMPI_PROFILE_LAYER +#else +#if ! OMPI_BUILD_MPI_PROFILING OMPI_GENERATE_F77_BINDINGS (MPI_COMM_CONNECT, mpi_comm_connect, mpi_comm_connect_, @@ -58,16 +61,15 @@ OMPI_GENERATE_F77_BINDINGS (MPI_COMM_CONNECT, ompi_comm_connect_f, (char *port_name, MPI_Fint *info, MPI_Fint *root, MPI_Fint *comm, MPI_Fint *newcomm, MPI_Fint *ierr, int port_name_len), (port_name, info, root, comm, newcomm, ierr, port_name_len) ) +#else +#define ompi_comm_connect_f pompi_comm_connect_f #endif - - -#if OMPI_PROFILE_LAYER && ! OPAL_HAVE_WEAK_SYMBOLS -#include "ompi/mpi/fortran/mpif-h/profile/defines.h" #endif + void ompi_comm_connect_f(char *port_name, MPI_Fint *info, MPI_Fint *root, MPI_Fint *comm, - MPI_Fint *newcomm, MPI_Fint *ierr, + MPI_Fint *newcomm, MPI_Fint *ierr, int port_name_len) { int c_ierr; @@ -75,17 +77,17 @@ void ompi_comm_connect_f(char *port_name, MPI_Fint *info, MPI_Info c_info; char *c_port_name; - c_comm = MPI_Comm_f2c(*comm); - c_info = MPI_Info_f2c(*info); + c_comm = PMPI_Comm_f2c(*comm); + c_info = PMPI_Info_f2c(*info); ompi_fortran_string_f2c(port_name, port_name_len, &c_port_name); - c_ierr = MPI_Comm_connect(c_port_name, c_info, + c_ierr = PMPI_Comm_connect(c_port_name, c_info, OMPI_FINT_2_INT(*root), c_comm, &c_new_comm); if (NULL != ierr) *ierr = OMPI_INT_2_FINT(c_ierr); if (MPI_SUCCESS == c_ierr) { - *newcomm = MPI_Comm_c2f(c_new_comm); + *newcomm = PMPI_Comm_c2f(c_new_comm); } free ( c_port_name ); } diff --git a/ompi/mpi/fortran/mpif-h/comm_create_errhandler_f.c b/ompi/mpi/fortran/mpif-h/comm_create_errhandler_f.c index a327070172f..d7a32ff651c 100644 --- a/ompi/mpi/fortran/mpif-h/comm_create_errhandler_f.c +++ b/ompi/mpi/fortran/mpif-h/comm_create_errhandler_f.c @@ -5,15 +5,17 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2008-2012 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -23,7 +25,8 @@ #include "ompi/errhandler/errhandler.h" #include "ompi/communicator/communicator.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILE_LAYER +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak PMPI_COMM_CREATE_ERRHANDLER = ompi_comm_create_errhandler_f #pragma weak pmpi_comm_create_errhandler = ompi_comm_create_errhandler_f #pragma weak pmpi_comm_create_errhandler_ = ompi_comm_create_errhandler_f @@ -31,7 +34,7 @@ #pragma weak PMPI_Comm_create_errhandler_f = ompi_comm_create_errhandler_f #pragma weak PMPI_Comm_create_errhandler_f08 = ompi_comm_create_errhandler_f -#elif OMPI_PROFILE_LAYER +#else OMPI_GENERATE_F77_BINDINGS (PMPI_COMM_CREATE_ERRHANDLER, pmpi_comm_create_errhandler, pmpi_comm_create_errhandler_, @@ -40,6 +43,7 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_COMM_CREATE_ERRHANDLER, (ompi_errhandler_fortran_handler_fn_t* function, MPI_Fint *errhandler, MPI_Fint *ierr), (function, errhandler, ierr) ) #endif +#endif #if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_COMM_CREATE_ERRHANDLER = ompi_comm_create_errhandler_f @@ -49,9 +53,8 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_COMM_CREATE_ERRHANDLER, #pragma weak MPI_Comm_create_errhandler_f = ompi_comm_create_errhandler_f #pragma weak MPI_Comm_create_errhandler_f08 = ompi_comm_create_errhandler_f -#endif - -#if ! OPAL_HAVE_WEAK_SYMBOLS && ! OMPI_PROFILE_LAYER +#else +#if ! OMPI_BUILD_MPI_PROFILING OMPI_GENERATE_F77_BINDINGS (MPI_COMM_CREATE_ERRHANDLER, mpi_comm_create_errhandler, mpi_comm_create_errhandler_, @@ -59,11 +62,9 @@ OMPI_GENERATE_F77_BINDINGS (MPI_COMM_CREATE_ERRHANDLER, ompi_comm_create_errhandler_f, (ompi_errhandler_fortran_handler_fn_t* function, MPI_Fint *errhandler, MPI_Fint *ierr), (function, errhandler, ierr) ) +#else +#define ompi_comm_create_errhandler_f pompi_comm_create_errhandler_f #endif - - -#if OMPI_PROFILE_LAYER && ! OPAL_HAVE_WEAK_SYMBOLS -#include "ompi/mpi/fortran/mpif-h/profile/defines.h" #endif static const char FUNC_NAME[] = "MPI_COMM_CREATE_ERRHANDLER"; @@ -73,12 +74,12 @@ void ompi_comm_create_errhandler_f(ompi_errhandler_fortran_handler_fn_t *functio MPI_Fint *errhandler, MPI_Fint *ierr) { int c_ierr; - MPI_Errhandler c_errhandler = + MPI_Errhandler c_errhandler = ompi_errhandler_create(OMPI_ERRHANDLER_TYPE_COMM, (ompi_errhandler_generic_handler_fn_t*) function, OMPI_ERRHANDLER_LANG_FORTRAN); if (MPI_ERRHANDLER_NULL != c_errhandler) { - *errhandler = MPI_Errhandler_c2f(c_errhandler); + *errhandler = PMPI_Errhandler_c2f(c_errhandler); c_ierr = MPI_SUCCESS; } else { c_ierr = MPI_ERR_INTERN; diff --git a/ompi/mpi/fortran/mpif-h/comm_create_f.c b/ompi/mpi/fortran/mpif-h/comm_create_f.c index b1c2be8554c..b40f1237863 100644 --- a/ompi/mpi/fortran/mpif-h/comm_create_f.c +++ b/ompi/mpi/fortran/mpif-h/comm_create_f.c @@ -5,15 +5,17 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2011-2012 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -21,7 +23,8 @@ #include "ompi/mpi/fortran/mpif-h/bindings.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILE_LAYER +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak PMPI_COMM_CREATE = ompi_comm_create_f #pragma weak pmpi_comm_create = ompi_comm_create_f #pragma weak pmpi_comm_create_ = ompi_comm_create_f @@ -29,7 +32,7 @@ #pragma weak PMPI_Comm_create_f = ompi_comm_create_f #pragma weak PMPI_Comm_create_f08 = ompi_comm_create_f -#elif OMPI_PROFILE_LAYER +#else OMPI_GENERATE_F77_BINDINGS (PMPI_COMM_CREATE, pmpi_comm_create, pmpi_comm_create_, @@ -38,6 +41,7 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_COMM_CREATE, (MPI_Fint *comm, MPI_Fint *group, MPI_Fint *newcomm, MPI_Fint *ierr), (comm, group, newcomm, ierr) ) #endif +#endif #if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_COMM_CREATE = ompi_comm_create_f @@ -47,9 +51,8 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_COMM_CREATE, #pragma weak MPI_Comm_create_f = ompi_comm_create_f #pragma weak MPI_Comm_create_f08 = ompi_comm_create_f -#endif - -#if ! OPAL_HAVE_WEAK_SYMBOLS && ! OMPI_PROFILE_LAYER +#else +#if ! OMPI_BUILD_MPI_PROFILING OMPI_GENERATE_F77_BINDINGS (MPI_COMM_CREATE, mpi_comm_create, mpi_comm_create_, @@ -57,24 +60,22 @@ OMPI_GENERATE_F77_BINDINGS (MPI_COMM_CREATE, ompi_comm_create_f, (MPI_Fint *comm, MPI_Fint *group, MPI_Fint *newcomm, MPI_Fint *ierr), (comm, group, newcomm, ierr) ) +#else +#define ompi_comm_create_f pompi_comm_create_f #endif - - -#if OMPI_PROFILE_LAYER && ! OPAL_HAVE_WEAK_SYMBOLS -#include "ompi/mpi/fortran/mpif-h/profile/defines.h" #endif void ompi_comm_create_f(MPI_Fint *comm, MPI_Fint *group, MPI_Fint *newcomm, MPI_Fint *ierr) { int c_ierr; MPI_Comm c_newcomm; - MPI_Comm c_comm = MPI_Comm_f2c (*comm); - MPI_Group c_group = MPI_Group_f2c(*group); + MPI_Comm c_comm = PMPI_Comm_f2c (*comm); + MPI_Group c_group = PMPI_Group_f2c(*group); - c_ierr = MPI_Comm_create (c_comm, c_group, &c_newcomm); + c_ierr = PMPI_Comm_create(c_comm, c_group, &c_newcomm); if (NULL != ierr) *ierr = OMPI_INT_2_FINT(c_ierr); if (MPI_SUCCESS == c_ierr) { - *newcomm = MPI_Comm_c2f (c_newcomm); + *newcomm = PMPI_Comm_c2f (c_newcomm); } } diff --git a/ompi/mpi/fortran/mpif-h/comm_create_group_f.c b/ompi/mpi/fortran/mpif-h/comm_create_group_f.c index e7fb5263de5..4f8f26eaec3 100644 --- a/ompi/mpi/fortran/mpif-h/comm_create_group_f.c +++ b/ompi/mpi/fortran/mpif-h/comm_create_group_f.c @@ -13,6 +13,8 @@ * Copyright (c) 2011-2013 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2013 Los Alamos National Security, LLC. All rights * reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -24,7 +26,8 @@ #include "ompi/mpi/fortran/mpif-h/bindings.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILE_LAYER +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak PMPI_COMM_CREATE_GROUP = ompi_comm_create_group_f #pragma weak pmpi_comm_create_group = ompi_comm_create_group_f #pragma weak pmpi_comm_create_group_ = ompi_comm_create_group_f @@ -32,7 +35,7 @@ #pragma weak PMPI_Comm_create_group_f = ompi_comm_create_group_f #pragma weak PMPI_Comm_create_group_f08 = ompi_comm_create_group_f -#elif OMPI_PROFILE_LAYER +#else OMPI_GENERATE_F77_BINDINGS (PMPI_COMM_CREATE_GROUP, pmpi_comm_create_group, pmpi_comm_create_group_, @@ -41,6 +44,7 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_COMM_CREATE_GROUP, (MPI_Fint *comm, MPI_Fint *group, MPI_Fint *tag, MPI_Fint *newcomm, MPI_Fint *ierr), (comm, group, tag, newcomm, ierr) ) #endif +#endif #if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_COMM_CREATE_GROUP = ompi_comm_create_group_f @@ -50,9 +54,8 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_COMM_CREATE_GROUP, #pragma weak MPI_Comm_create_group_f = ompi_comm_create_group_f #pragma weak MPI_Comm_create_group_f08 = ompi_comm_create_group_f -#endif - -#if ! OPAL_HAVE_WEAK_SYMBOLS && ! OMPI_PROFILE_LAYER +#else +#if ! OMPI_BUILD_MPI_PROFILING OMPI_GENERATE_F77_BINDINGS (MPI_COMM_CREATE_GROUP, mpi_comm_create_group, mpi_comm_create_group_, @@ -60,24 +63,22 @@ OMPI_GENERATE_F77_BINDINGS (MPI_COMM_CREATE_GROUP, ompi_comm_create_group_f, (MPI_Fint *comm, MPI_Fint *group, MPI_Fint *tag, MPI_Fint *newcomm, MPI_Fint *ierr), (comm, group, tag, newcomm, ierr) ) +#else +#define ompi_comm_create_group_f pompi_comm_create_group_f #endif - - -#if OMPI_PROFILE_LAYER && ! OPAL_HAVE_WEAK_SYMBOLS -#include "ompi/mpi/fortran/mpif-h/profile/defines.h" #endif void ompi_comm_create_group_f(MPI_Fint *comm, MPI_Fint *group, MPI_Fint *tag, MPI_Fint *newcomm, MPI_Fint *ierr) { int c_ierr; MPI_Comm c_newcomm; - MPI_Comm c_comm = MPI_Comm_f2c (*comm); - MPI_Group c_group = MPI_Group_f2c(*group); + MPI_Comm c_comm = PMPI_Comm_f2c (*comm); + MPI_Group c_group = PMPI_Group_f2c(*group); - c_ierr = MPI_Comm_create_group (c_comm, c_group, OMPI_FINT_2_INT(*tag), &c_newcomm); + c_ierr = PMPI_Comm_create_group (c_comm, c_group, OMPI_FINT_2_INT(*tag), &c_newcomm); if (NULL != ierr) *ierr = OMPI_INT_2_FINT(c_ierr); if (MPI_SUCCESS == c_ierr) { - *newcomm = MPI_Comm_c2f (c_newcomm); + *newcomm = PMPI_Comm_c2f (c_newcomm); } } diff --git a/ompi/mpi/fortran/mpif-h/comm_create_keyval_f.c b/ompi/mpi/fortran/mpif-h/comm_create_keyval_f.c index d0923ee5c0a..5f3f3b2691c 100644 --- a/ompi/mpi/fortran/mpif-h/comm_create_keyval_f.c +++ b/ompi/mpi/fortran/mpif-h/comm_create_keyval_f.c @@ -5,15 +5,17 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2007-2012 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -22,7 +24,8 @@ #include "ompi/mpi/fortran/mpif-h/bindings.h" #include "ompi/communicator/communicator.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILE_LAYER +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak PMPI_COMM_CREATE_KEYVAL = ompi_comm_create_keyval_f #pragma weak pmpi_comm_create_keyval = ompi_comm_create_keyval_f #pragma weak pmpi_comm_create_keyval_ = ompi_comm_create_keyval_f @@ -30,7 +33,7 @@ #pragma weak PMPI_Comm_create_keyval_f = ompi_comm_create_keyval_f #pragma weak PMPI_Comm_create_keyval_f08 = ompi_comm_create_keyval_f -#elif OMPI_PROFILE_LAYER +#else OMPI_GENERATE_F77_BINDINGS (PMPI_COMM_CREATE_KEYVAL, pmpi_comm_create_keyval, pmpi_comm_create_keyval_, @@ -39,6 +42,7 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_COMM_CREATE_KEYVAL, (ompi_mpi2_fortran_copy_attr_function* comm_copy_attr_fn, ompi_mpi2_fortran_delete_attr_function* comm_delete_attr_fn, MPI_Fint *comm_keyval, MPI_Aint *extra_state, MPI_Fint *ierr), (comm_copy_attr_fn, comm_delete_attr_fn, comm_keyval, extra_state, ierr) ) #endif +#endif #if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_COMM_CREATE_KEYVAL = ompi_comm_create_keyval_f @@ -48,9 +52,8 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_COMM_CREATE_KEYVAL, #pragma weak MPI_Comm_create_keyval_f = ompi_comm_create_keyval_f #pragma weak MPI_Comm_create_keyval_f08 = ompi_comm_create_keyval_f -#endif - -#if ! OPAL_HAVE_WEAK_SYMBOLS && ! OMPI_PROFILE_LAYER +#else +#if ! OMPI_BUILD_MPI_PROFILING OMPI_GENERATE_F77_BINDINGS (MPI_COMM_CREATE_KEYVAL, mpi_comm_create_keyval, mpi_comm_create_keyval_, @@ -58,14 +61,13 @@ OMPI_GENERATE_F77_BINDINGS (MPI_COMM_CREATE_KEYVAL, ompi_comm_create_keyval_f, (ompi_mpi2_fortran_copy_attr_function* comm_copy_attr_fn, ompi_mpi2_fortran_delete_attr_function* comm_delete_attr_fn, MPI_Fint *comm_keyval, MPI_Aint *extra_state, MPI_Fint *ierr), (comm_copy_attr_fn, comm_delete_attr_fn, comm_keyval, extra_state, ierr) ) +#else +#define ompi_comm_create_keyval_f pompi_comm_create_keyval_f #endif - - -#if OMPI_PROFILE_LAYER && ! OPAL_HAVE_WEAK_SYMBOLS -#include "ompi/mpi/fortran/mpif-h/profile/defines.h" #endif - static const char FUNC_NAME[] = "MPI_Comm_create_keyval_f"; +static const char FUNC_NAME[] = "MPI_Comm_create_keyval_f"; + void ompi_comm_create_keyval_f(ompi_mpi2_fortran_copy_attr_function* comm_copy_attr_fn, ompi_mpi2_fortran_delete_attr_function* comm_delete_attr_fn, diff --git a/ompi/mpi/fortran/mpif-h/comm_delete_attr_f.c b/ompi/mpi/fortran/mpif-h/comm_delete_attr_f.c index 77ef2139335..86ad48d94c4 100644 --- a/ompi/mpi/fortran/mpif-h/comm_delete_attr_f.c +++ b/ompi/mpi/fortran/mpif-h/comm_delete_attr_f.c @@ -5,15 +5,17 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2011-2012 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -21,7 +23,8 @@ #include "ompi/mpi/fortran/mpif-h/bindings.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILE_LAYER +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak PMPI_COMM_DELETE_ATTR = ompi_comm_delete_attr_f #pragma weak pmpi_comm_delete_attr = ompi_comm_delete_attr_f #pragma weak pmpi_comm_delete_attr_ = ompi_comm_delete_attr_f @@ -29,7 +32,7 @@ #pragma weak PMPI_Comm_delete_attr_f = ompi_comm_delete_attr_f #pragma weak PMPI_Comm_delete_attr_f08 = ompi_comm_delete_attr_f -#elif OMPI_PROFILE_LAYER +#else OMPI_GENERATE_F77_BINDINGS (PMPI_COMM_DELETE_ATTR, pmpi_comm_delete_attr, pmpi_comm_delete_attr_, @@ -38,6 +41,7 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_COMM_DELETE_ATTR, (MPI_Fint *comm, MPI_Fint *comm_keyval, MPI_Fint *ierr), (comm, comm_keyval, ierr) ) #endif +#endif #if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_COMM_DELETE_ATTR = ompi_comm_delete_attr_f @@ -47,9 +51,8 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_COMM_DELETE_ATTR, #pragma weak MPI_Comm_delete_attr_f = ompi_comm_delete_attr_f #pragma weak MPI_Comm_delete_attr_f08 = ompi_comm_delete_attr_f -#endif - -#if ! OPAL_HAVE_WEAK_SYMBOLS && ! OMPI_PROFILE_LAYER +#else +#if ! OMPI_BUILD_MPI_PROFILING OMPI_GENERATE_F77_BINDINGS (MPI_COMM_DELETE_ATTR, mpi_comm_delete_attr, mpi_comm_delete_attr_, @@ -57,21 +60,20 @@ OMPI_GENERATE_F77_BINDINGS (MPI_COMM_DELETE_ATTR, ompi_comm_delete_attr_f, (MPI_Fint *comm, MPI_Fint *comm_keyval, MPI_Fint *ierr), (comm, comm_keyval, ierr) ) +#else +#define ompi_comm_delete_attr_f pompi_comm_delete_attr_f #endif - - -#if OMPI_PROFILE_LAYER && ! OPAL_HAVE_WEAK_SYMBOLS -#include "ompi/mpi/fortran/mpif-h/profile/defines.h" #endif + void ompi_comm_delete_attr_f(MPI_Fint *comm, MPI_Fint *comm_keyval, MPI_Fint *ierr) { int c_ierr; MPI_Comm c_comm; - - c_comm = MPI_Comm_f2c(*comm); - c_ierr = MPI_Comm_delete_attr(c_comm, OMPI_FINT_2_INT(*comm_keyval)); + c_comm = PMPI_Comm_f2c(*comm); + + c_ierr = PMPI_Comm_delete_attr(c_comm, OMPI_FINT_2_INT(*comm_keyval)); if (NULL != ierr) *ierr = OMPI_INT_2_FINT(c_ierr); } diff --git a/ompi/mpi/fortran/mpif-h/comm_disconnect_f.c b/ompi/mpi/fortran/mpif-h/comm_disconnect_f.c index 0b6a238dff8..9aefb1ca417 100644 --- a/ompi/mpi/fortran/mpif-h/comm_disconnect_f.c +++ b/ompi/mpi/fortran/mpif-h/comm_disconnect_f.c @@ -5,15 +5,17 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2011-2012 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -21,7 +23,8 @@ #include "ompi/mpi/fortran/mpif-h/bindings.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILE_LAYER +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak PMPI_COMM_DISCONNECT = ompi_comm_disconnect_f #pragma weak pmpi_comm_disconnect = ompi_comm_disconnect_f #pragma weak pmpi_comm_disconnect_ = ompi_comm_disconnect_f @@ -29,7 +32,7 @@ #pragma weak PMPI_Comm_disconnect_f = ompi_comm_disconnect_f #pragma weak PMPI_Comm_disconnect_f08 = ompi_comm_disconnect_f -#elif OMPI_PROFILE_LAYER +#else OMPI_GENERATE_F77_BINDINGS (PMPI_COMM_DISCONNECT, pmpi_comm_disconnect, pmpi_comm_disconnect_, @@ -38,6 +41,7 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_COMM_DISCONNECT, (MPI_Fint *comm, MPI_Fint *ierr), (comm, ierr) ) #endif +#endif #if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_COMM_DISCONNECT = ompi_comm_disconnect_f @@ -47,9 +51,8 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_COMM_DISCONNECT, #pragma weak MPI_Comm_disconnect_f = ompi_comm_disconnect_f #pragma weak MPI_Comm_disconnect_f08 = ompi_comm_disconnect_f -#endif - -#if ! OPAL_HAVE_WEAK_SYMBOLS && ! OMPI_PROFILE_LAYER +#else +#if ! OMPI_BUILD_MPI_PROFILING OMPI_GENERATE_F77_BINDINGS (MPI_COMM_DISCONNECT, mpi_comm_disconnect, mpi_comm_disconnect_, @@ -57,24 +60,23 @@ OMPI_GENERATE_F77_BINDINGS (MPI_COMM_DISCONNECT, ompi_comm_disconnect_f, (MPI_Fint *comm, MPI_Fint *ierr), (comm, ierr) ) +#else +#define ompi_comm_disconnect_f pompi_comm_disconnect_f #endif - - -#if OMPI_PROFILE_LAYER && ! OPAL_HAVE_WEAK_SYMBOLS -#include "ompi/mpi/fortran/mpif-h/profile/defines.h" #endif + void ompi_comm_disconnect_f(MPI_Fint *comm, MPI_Fint *ierr) { int c_ierr; MPI_Comm c_comm; - c_comm = MPI_Comm_f2c(*comm); - - c_ierr = MPI_Comm_disconnect(&c_comm); + c_comm = PMPI_Comm_f2c(*comm); + + c_ierr = PMPI_Comm_disconnect(&c_comm); if (NULL != ierr) *ierr = OMPI_INT_2_FINT(c_ierr); if (MPI_SUCCESS == c_ierr) { - *comm = MPI_Comm_c2f(c_comm); + *comm = PMPI_Comm_c2f(c_comm); } } diff --git a/ompi/mpi/fortran/mpif-h/comm_dup_f.c b/ompi/mpi/fortran/mpif-h/comm_dup_f.c index 0bf1ff8b917..8e9da075a98 100644 --- a/ompi/mpi/fortran/mpif-h/comm_dup_f.c +++ b/ompi/mpi/fortran/mpif-h/comm_dup_f.c @@ -5,15 +5,17 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2011-2012 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -21,7 +23,8 @@ #include "ompi/mpi/fortran/mpif-h/bindings.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILE_LAYER +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak PMPI_COMM_DUP = ompi_comm_dup_f #pragma weak pmpi_comm_dup = ompi_comm_dup_f #pragma weak pmpi_comm_dup_ = ompi_comm_dup_f @@ -29,7 +32,7 @@ #pragma weak PMPI_Comm_dup_f = ompi_comm_dup_f #pragma weak PMPI_Comm_dup_f08 = ompi_comm_dup_f -#elif OMPI_PROFILE_LAYER +#else OMPI_GENERATE_F77_BINDINGS (PMPI_COMM_DUP, pmpi_comm_dup, pmpi_comm_dup_, @@ -38,6 +41,7 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_COMM_DUP, (MPI_Fint *comm, MPI_Fint *newcomm, MPI_Fint *ierr), (comm, newcomm, ierr) ) #endif +#endif #if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_COMM_DUP = ompi_comm_dup_f @@ -47,9 +51,8 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_COMM_DUP, #pragma weak MPI_Comm_dup_f = ompi_comm_dup_f #pragma weak MPI_Comm_dup_f08 = ompi_comm_dup_f -#endif - -#if ! OPAL_HAVE_WEAK_SYMBOLS && ! OMPI_PROFILE_LAYER +#else +#if ! OMPI_BUILD_MPI_PROFILING OMPI_GENERATE_F77_BINDINGS (MPI_COMM_DUP, mpi_comm_dup, mpi_comm_dup_, @@ -57,23 +60,22 @@ OMPI_GENERATE_F77_BINDINGS (MPI_COMM_DUP, ompi_comm_dup_f, (MPI_Fint *comm, MPI_Fint *newcomm, MPI_Fint *ierr), (comm, newcomm, ierr) ) +#else +#define ompi_comm_dup_f pompi_comm_dup_f #endif - - -#if OMPI_PROFILE_LAYER && ! OPAL_HAVE_WEAK_SYMBOLS -#include "ompi/mpi/fortran/mpif-h/profile/defines.h" #endif + void ompi_comm_dup_f(MPI_Fint *comm, MPI_Fint *newcomm, MPI_Fint *ierr) { int c_ierr; MPI_Comm c_newcomm; - MPI_Comm c_comm = MPI_Comm_f2c(*comm); - - c_ierr = MPI_Comm_dup(c_comm, &c_newcomm); + MPI_Comm c_comm = PMPI_Comm_f2c(*comm); + + c_ierr = PMPI_Comm_dup(c_comm, &c_newcomm); if (NULL != ierr) *ierr = OMPI_INT_2_FINT(c_ierr); if (MPI_SUCCESS == c_ierr) { - *newcomm = MPI_Comm_c2f(c_newcomm); + *newcomm = PMPI_Comm_c2f(c_newcomm); } } diff --git a/ompi/mpi/fortran/mpif-h/comm_dup_with_info_f.c b/ompi/mpi/fortran/mpif-h/comm_dup_with_info_f.c index f17599eb801..40d989ffda2 100644 --- a/ompi/mpi/fortran/mpif-h/comm_dup_with_info_f.c +++ b/ompi/mpi/fortran/mpif-h/comm_dup_with_info_f.c @@ -13,6 +13,8 @@ * Copyright (c) 2011-2013 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2013 Los Alamos National Security, LLC. All rights * reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -24,7 +26,8 @@ #include "ompi/mpi/fortran/mpif-h/bindings.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILE_LAYER +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak PMPI_COMM_DUP_WITH_INFO = ompi_comm_dup_with_info_f #pragma weak pmpi_comm_dup_with_info = ompi_comm_dup_with_info_f #pragma weak pmpi_comm_dup_with_info_ = ompi_comm_dup_with_info_f @@ -32,7 +35,7 @@ #pragma weak PMPI_Comm_dup_with_info_f = ompi_comm_dup_with_info_f #pragma weak PMPI_Comm_dup_with_info_f08 = ompi_comm_dup_with_info_f -#elif OMPI_PROFILE_LAYER +#else OMPI_GENERATE_F77_BINDINGS (PMPI_COMM_DUP_WITH_INFO, pmpi_comm_dup_with_info, pmpi_comm_dup_with_info_, @@ -41,6 +44,7 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_COMM_DUP_WITH_INFO, (MPI_Fint *comm, MPI_Fint *info, MPI_Fint *newcomm, MPI_Fint *ierr), (comm, info, newcomm, ierr) ) #endif +#endif #if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_COMM_DUP_WITH_INFO = ompi_comm_dup_with_info_f @@ -50,9 +54,8 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_COMM_DUP_WITH_INFO, #pragma weak MPI_Comm_dup_with_info_f = ompi_comm_dup_with_info_f #pragma weak MPI_Comm_dup_with_info_f08 = ompi_comm_dup_with_info_f -#endif - -#if ! OPAL_HAVE_WEAK_SYMBOLS && ! OMPI_PROFILE_LAYER +#else +#if ! OMPI_BUILD_MPI_PROFILING OMPI_GENERATE_F77_BINDINGS (MPI_COMM_DUP_WITH_INFO, mpi_comm_dup_with_info, mpi_comm_dup_with_info_, @@ -60,26 +63,25 @@ OMPI_GENERATE_F77_BINDINGS (MPI_COMM_DUP_WITH_INFO, ompi_comm_dup_with_info_f, (MPI_Fint *comm, MPI_Fint *info, MPI_Fint *newcomm, MPI_Fint *ierr), (comm, info, newcomm, ierr) ) +#else +#define ompi_comm_dup_with_info_f pompi_comm_dup_with_info_f #endif - - -#if OMPI_PROFILE_LAYER && ! OPAL_HAVE_WEAK_SYMBOLS -#include "ompi/mpi/fortran/mpif-h/profile/defines.h" #endif + void ompi_comm_dup_with_info_f(MPI_Fint *comm, MPI_Fint *info, MPI_Fint *newcomm, MPI_Fint *ierr) { int c_ierr; MPI_Comm c_newcomm; - MPI_Comm c_comm = MPI_Comm_f2c(*comm); + MPI_Comm c_comm = PMPI_Comm_f2c(*comm); MPI_Info c_info; - c_info = MPI_Info_f2c(*info); + c_info = PMPI_Info_f2c(*info); - c_ierr = MPI_Comm_dup_with_info(c_comm, c_info, &c_newcomm); + c_ierr = PMPI_Comm_dup_with_info(c_comm, c_info, &c_newcomm); if (NULL != ierr) *ierr = OMPI_INT_2_FINT(c_ierr); if (MPI_SUCCESS == c_ierr) { - *newcomm = MPI_Comm_c2f(c_newcomm); + *newcomm = PMPI_Comm_c2f(c_newcomm); } } diff --git a/ompi/mpi/fortran/mpif-h/comm_free_f.c b/ompi/mpi/fortran/mpif-h/comm_free_f.c index 54b9d069e59..f2e2f4aa20f 100644 --- a/ompi/mpi/fortran/mpif-h/comm_free_f.c +++ b/ompi/mpi/fortran/mpif-h/comm_free_f.c @@ -5,15 +5,17 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2011-2012 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -21,7 +23,8 @@ #include "ompi/mpi/fortran/mpif-h/bindings.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILE_LAYER +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak PMPI_COMM_FREE = ompi_comm_free_f #pragma weak pmpi_comm_free = ompi_comm_free_f #pragma weak pmpi_comm_free_ = ompi_comm_free_f @@ -29,7 +32,7 @@ #pragma weak PMPI_Comm_free_f = ompi_comm_free_f #pragma weak PMPI_Comm_free_f08 = ompi_comm_free_f -#elif OMPI_PROFILE_LAYER +#else OMPI_GENERATE_F77_BINDINGS (PMPI_COMM_FREE, pmpi_comm_free, pmpi_comm_free_, @@ -38,6 +41,7 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_COMM_FREE, (MPI_Fint *comm, MPI_Fint *ierr), (comm, ierr) ) #endif +#endif #if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_COMM_FREE = ompi_comm_free_f @@ -47,9 +51,8 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_COMM_FREE, #pragma weak MPI_Comm_free_f = ompi_comm_free_f #pragma weak MPI_Comm_free_f08 = ompi_comm_free_f -#endif - -#if ! OPAL_HAVE_WEAK_SYMBOLS && ! OMPI_PROFILE_LAYER +#else +#if ! OMPI_BUILD_MPI_PROFILING OMPI_GENERATE_F77_BINDINGS (MPI_COMM_FREE, mpi_comm_free, mpi_comm_free_, @@ -57,22 +60,21 @@ OMPI_GENERATE_F77_BINDINGS (MPI_COMM_FREE, ompi_comm_free_f, (MPI_Fint *comm, MPI_Fint *ierr), (comm, ierr) ) +#else +#define ompi_comm_free_f pompi_comm_free_f #endif - - -#if OMPI_PROFILE_LAYER && ! OPAL_HAVE_WEAK_SYMBOLS -#include "ompi/mpi/fortran/mpif-h/profile/defines.h" #endif + void ompi_comm_free_f(MPI_Fint *comm, MPI_Fint *ierr) { int c_ierr; - MPI_Comm c_comm = MPI_Comm_f2c(*comm); + MPI_Comm c_comm = PMPI_Comm_f2c(*comm); - c_ierr = MPI_Comm_free(&c_comm); + c_ierr = PMPI_Comm_free(&c_comm); if (NULL != ierr) *ierr = OMPI_INT_2_FINT(c_ierr); if (MPI_SUCCESS == c_ierr) { - *comm = MPI_Comm_c2f(c_comm); + *comm = PMPI_Comm_c2f(c_comm); } } diff --git a/ompi/mpi/fortran/mpif-h/comm_free_keyval_f.c b/ompi/mpi/fortran/mpif-h/comm_free_keyval_f.c index 1d9634925b6..a66cea6caf4 100644 --- a/ompi/mpi/fortran/mpif-h/comm_free_keyval_f.c +++ b/ompi/mpi/fortran/mpif-h/comm_free_keyval_f.c @@ -5,15 +5,17 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2011-2012 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -21,7 +23,8 @@ #include "ompi/mpi/fortran/mpif-h/bindings.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILE_LAYER +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak PMPI_COMM_FREE_KEYVAL = ompi_comm_free_keyval_f #pragma weak pmpi_comm_free_keyval = ompi_comm_free_keyval_f #pragma weak pmpi_comm_free_keyval_ = ompi_comm_free_keyval_f @@ -29,7 +32,7 @@ #pragma weak PMPI_Comm_free_keyval_f = ompi_comm_free_keyval_f #pragma weak PMPI_Comm_free_keyval_f08 = ompi_comm_free_keyval_f -#elif OMPI_PROFILE_LAYER +#else OMPI_GENERATE_F77_BINDINGS (PMPI_COMM_FREE_KEYVAL, pmpi_comm_free_keyval, pmpi_comm_free_keyval_, @@ -38,6 +41,7 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_COMM_FREE_KEYVAL, (MPI_Fint *comm_keyval, MPI_Fint *ierr), (comm_keyval, ierr) ) #endif +#endif #if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_COMM_FREE_KEYVAL = ompi_comm_free_keyval_f @@ -47,9 +51,8 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_COMM_FREE_KEYVAL, #pragma weak MPI_Comm_free_keyval_f = ompi_comm_free_keyval_f #pragma weak MPI_Comm_free_keyval_f08 = ompi_comm_free_keyval_f -#endif - -#if ! OPAL_HAVE_WEAK_SYMBOLS && ! OMPI_PROFILE_LAYER +#else +#if ! OMPI_BUILD_MPI_PROFILING OMPI_GENERATE_F77_BINDINGS (MPI_COMM_FREE_KEYVAL, mpi_comm_free_keyval, mpi_comm_free_keyval_, @@ -57,13 +60,12 @@ OMPI_GENERATE_F77_BINDINGS (MPI_COMM_FREE_KEYVAL, ompi_comm_free_keyval_f, (MPI_Fint *comm_keyval, MPI_Fint *ierr), (comm_keyval, ierr) ) +#else +#define ompi_comm_free_keyval_f pompi_comm_free_keyval_f #endif - - -#if OMPI_PROFILE_LAYER && ! OPAL_HAVE_WEAK_SYMBOLS -#include "ompi/mpi/fortran/mpif-h/profile/defines.h" #endif + void ompi_comm_free_keyval_f(MPI_Fint *comm_keyval, MPI_Fint *ierr) { int c_ierr; @@ -71,7 +73,7 @@ void ompi_comm_free_keyval_f(MPI_Fint *comm_keyval, MPI_Fint *ierr) OMPI_SINGLE_FINT_2_INT(comm_keyval); - c_ierr = MPI_Comm_free_keyval(OMPI_SINGLE_NAME_CONVERT(comm_keyval)); + c_ierr = PMPI_Comm_free_keyval(OMPI_SINGLE_NAME_CONVERT(comm_keyval)); if (NULL != ierr) *ierr = OMPI_INT_2_FINT(c_ierr); if (MPI_SUCCESS == c_ierr) { diff --git a/ompi/mpi/fortran/mpif-h/comm_get_attr_f.c b/ompi/mpi/fortran/mpif-h/comm_get_attr_f.c index 3465c589012..d5570d8bf11 100644 --- a/ompi/mpi/fortran/mpif-h/comm_get_attr_f.c +++ b/ompi/mpi/fortran/mpif-h/comm_get_attr_f.c @@ -5,15 +5,17 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2011-2012 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -23,7 +25,8 @@ #include "ompi/attribute/attribute.h" #include "ompi/communicator/communicator.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILE_LAYER +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak PMPI_COMM_GET_ATTR = ompi_comm_get_attr_f #pragma weak pmpi_comm_get_attr = ompi_comm_get_attr_f #pragma weak pmpi_comm_get_attr_ = ompi_comm_get_attr_f @@ -31,7 +34,7 @@ #pragma weak PMPI_Comm_get_attr_f = ompi_comm_get_attr_f #pragma weak PMPI_Comm_get_attr_f08 = ompi_comm_get_attr_f -#elif OMPI_PROFILE_LAYER +#else OMPI_GENERATE_F77_BINDINGS (PMPI_COMM_GET_ATTR, pmpi_comm_get_attr, pmpi_comm_get_attr_, @@ -40,6 +43,7 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_COMM_GET_ATTR, (MPI_Fint *comm, MPI_Fint *comm_keyval, MPI_Aint *attribute_val, ompi_fortran_logical_t *flag, MPI_Fint *ierr), (comm, comm_keyval, attribute_val, flag, ierr) ) #endif +#endif #if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_COMM_GET_ATTR = ompi_comm_get_attr_f @@ -49,9 +53,8 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_COMM_GET_ATTR, #pragma weak MPI_Comm_get_attr_f = ompi_comm_get_attr_f #pragma weak MPI_Comm_get_attr_f08 = ompi_comm_get_attr_f -#endif - -#if ! OPAL_HAVE_WEAK_SYMBOLS && ! OMPI_PROFILE_LAYER +#else +#if ! OMPI_BUILD_MPI_PROFILING OMPI_GENERATE_F77_BINDINGS (MPI_COMM_GET_ATTR, mpi_comm_get_attr, mpi_comm_get_attr_, @@ -59,11 +62,9 @@ OMPI_GENERATE_F77_BINDINGS (MPI_COMM_GET_ATTR, ompi_comm_get_attr_f, (MPI_Fint *comm, MPI_Fint *comm_keyval, MPI_Aint *attribute_val, ompi_fortran_logical_t *flag, MPI_Fint *ierr), (comm, comm_keyval, attribute_val, flag, ierr) ) +#else +#define ompi_comm_get_attr_f pompi_comm_get_attr_f #endif - - -#if OMPI_PROFILE_LAYER && ! OPAL_HAVE_WEAK_SYMBOLS -#include "ompi/mpi/fortran/mpif-h/profile/defines.h" #endif void ompi_comm_get_attr_f(MPI_Fint *comm, MPI_Fint *comm_keyval, @@ -71,7 +72,7 @@ void ompi_comm_get_attr_f(MPI_Fint *comm, MPI_Fint *comm_keyval, MPI_Fint *ierr) { int c_ierr; - MPI_Comm c_comm = MPI_Comm_f2c(*comm); + MPI_Comm c_comm = PMPI_Comm_f2c(*comm); OMPI_LOGICAL_NAME_DECL(flag); /* This stuff is very confusing. Be sure to see the comment at diff --git a/ompi/mpi/fortran/mpif-h/comm_get_errhandler_f.c b/ompi/mpi/fortran/mpif-h/comm_get_errhandler_f.c index 0cc8a17bad1..c3c37d3ff4d 100644 --- a/ompi/mpi/fortran/mpif-h/comm_get_errhandler_f.c +++ b/ompi/mpi/fortran/mpif-h/comm_get_errhandler_f.c @@ -5,15 +5,17 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2011-2012 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -21,7 +23,8 @@ #include "ompi/mpi/fortran/mpif-h/bindings.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILE_LAYER +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak PMPI_COMM_GET_ERRHANDLER = ompi_comm_get_errhandler_f #pragma weak pmpi_comm_get_errhandler = ompi_comm_get_errhandler_f #pragma weak pmpi_comm_get_errhandler_ = ompi_comm_get_errhandler_f @@ -29,7 +32,7 @@ #pragma weak PMPI_Comm_get_errhandler_f = ompi_comm_get_errhandler_f #pragma weak PMPI_Comm_get_errhandler_f08 = ompi_comm_get_errhandler_f -#elif OMPI_PROFILE_LAYER +#else OMPI_GENERATE_F77_BINDINGS (PMPI_COMM_GET_ERRHANDLER, pmpi_comm_get_errhandler, pmpi_comm_get_errhandler_, @@ -38,6 +41,7 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_COMM_GET_ERRHANDLER, (MPI_Fint *comm, MPI_Fint *erhandler, MPI_Fint *ierr), (comm, erhandler, ierr) ) #endif +#endif #if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_COMM_GET_ERRHANDLER = ompi_comm_get_errhandler_f @@ -47,9 +51,8 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_COMM_GET_ERRHANDLER, #pragma weak MPI_Comm_get_errhandler_f = ompi_comm_get_errhandler_f #pragma weak MPI_Comm_get_errhandler_f08 = ompi_comm_get_errhandler_f -#endif - -#if ! OPAL_HAVE_WEAK_SYMBOLS && ! OMPI_PROFILE_LAYER +#else +#if ! OMPI_BUILD_MPI_PROFILING OMPI_GENERATE_F77_BINDINGS (MPI_COMM_GET_ERRHANDLER, mpi_comm_get_errhandler, mpi_comm_get_errhandler_, @@ -57,13 +60,12 @@ OMPI_GENERATE_F77_BINDINGS (MPI_COMM_GET_ERRHANDLER, ompi_comm_get_errhandler_f, (MPI_Fint *comm, MPI_Fint *erhandler, MPI_Fint *ierr), (comm, erhandler, ierr) ) +#else +#define ompi_comm_get_errhandler_f pompi_comm_get_errhandler_f #endif - - -#if OMPI_PROFILE_LAYER && ! OPAL_HAVE_WEAK_SYMBOLS -#include "ompi/mpi/fortran/mpif-h/profile/defines.h" #endif + void ompi_comm_get_errhandler_f(MPI_Fint *comm, MPI_Fint *errhandler, MPI_Fint *ierr) { @@ -71,12 +73,12 @@ void ompi_comm_get_errhandler_f(MPI_Fint *comm, MPI_Fint *errhandler, MPI_Comm c_comm; MPI_Errhandler c_errhandler; - c_comm = MPI_Comm_f2c(*comm); + c_comm = PMPI_Comm_f2c(*comm); - c_ierr = MPI_Comm_get_errhandler(c_comm, &c_errhandler); + c_ierr = PMPI_Comm_get_errhandler(c_comm, &c_errhandler); if (NULL != ierr) *ierr = OMPI_INT_2_FINT(c_ierr); if (MPI_SUCCESS == c_ierr) { - *errhandler = MPI_Errhandler_c2f(c_errhandler); + *errhandler = PMPI_Errhandler_c2f(c_errhandler); } } diff --git a/ompi/mpi/fortran/mpif-h/comm_get_info_f.c b/ompi/mpi/fortran/mpif-h/comm_get_info_f.c index 1cfd8bde3ae..109fb590de4 100644 --- a/ompi/mpi/fortran/mpif-h/comm_get_info_f.c +++ b/ompi/mpi/fortran/mpif-h/comm_get_info_f.c @@ -1,9 +1,11 @@ /* * Copyright (c) 2011-2014 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -13,7 +15,8 @@ #include "ompi/attribute/attribute.h" #include "ompi/communicator/communicator.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILE_LAYER +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak PMPI_COMM_GET_INFO = ompi_comm_get_info_f #pragma weak pmpi_comm_get_info = ompi_comm_get_info_f #pragma weak pmpi_comm_get_info_ = ompi_comm_get_info_f @@ -21,7 +24,7 @@ #pragma weak PMPI_Comm_get_info_f = ompi_comm_get_info_f #pragma weak PMPI_Comm_get_info_f08 = ompi_comm_get_info_f -#elif OMPI_PROFILE_LAYER +#else OMPI_GENERATE_F77_BINDINGS (PMPI_COMM_GET_INFO, pmpi_comm_get_info, pmpi_comm_get_info_, @@ -30,6 +33,7 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_COMM_GET_INFO, (MPI_Fint *comm, MPI_Fint *info_used, MPI_Fint *ierr), (comm, info_used, ierr) ) #endif +#endif #if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_COMM_GET_INFO = ompi_comm_get_info_f @@ -39,9 +43,8 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_COMM_GET_INFO, #pragma weak MPI_Comm_get_info_f = ompi_comm_get_info_f #pragma weak MPI_Comm_get_info_f08 = ompi_comm_get_info_f -#endif - -#if ! OPAL_HAVE_WEAK_SYMBOLS && ! OMPI_PROFILE_LAYER +#else +#if ! OMPI_BUILD_MPI_PROFILING OMPI_GENERATE_F77_BINDINGS (MPI_COMM_GET_INFO, mpi_comm_get_info, mpi_comm_get_info_, @@ -49,23 +52,22 @@ OMPI_GENERATE_F77_BINDINGS (MPI_COMM_GET_INFO, ompi_comm_get_info_f, (MPI_Fint *comm, MPI_Fint *info_used, MPI_Fint *ierr), (comm, info_used, ierr) ) +#else +#define ompi_comm_get_info_f pompi_comm_get_info_f #endif - - -#if OMPI_PROFILE_LAYER && ! OPAL_HAVE_WEAK_SYMBOLS -#include "ompi/mpi/fortran/mpif-h/profile/defines.h" #endif + void ompi_comm_get_info_f(MPI_Fint *comm, MPI_Fint *info_used, MPI_Fint *ierr) { int c_ierr; - MPI_Comm c_comm = MPI_Comm_f2c(*comm); + MPI_Comm c_comm = PMPI_Comm_f2c(*comm); MPI_Info c_info; - c_ierr = MPI_Comm_get_info(c_comm, &c_info); + c_ierr = PMPI_Comm_get_info(c_comm, &c_info); if (NULL != ierr) *ierr = OMPI_INT_2_FINT(c_ierr); if (MPI_SUCCESS == c_ierr) { - *info_used = MPI_Info_c2f(c_info); + *info_used = PMPI_Info_c2f(c_info); } } diff --git a/ompi/mpi/fortran/mpif-h/comm_get_name_f.c b/ompi/mpi/fortran/mpif-h/comm_get_name_f.c index 836a110cb87..af600628211 100644 --- a/ompi/mpi/fortran/mpif-h/comm_get_name_f.c +++ b/ompi/mpi/fortran/mpif-h/comm_get_name_f.c @@ -5,15 +5,17 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2006-2012 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -24,7 +26,8 @@ #include "ompi/constants.h" #include "ompi/communicator/communicator.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILE_LAYER +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak PMPI_COMM_GET_NAME = ompi_comm_get_name_f #pragma weak pmpi_comm_get_name = ompi_comm_get_name_f #pragma weak pmpi_comm_get_name_ = ompi_comm_get_name_f @@ -32,7 +35,7 @@ #pragma weak PMPI_Comm_get_name_f = ompi_comm_get_name_f #pragma weak PMPI_Comm_get_name_f08 = ompi_comm_get_name_f -#elif OMPI_PROFILE_LAYER +#else OMPI_GENERATE_F77_BINDINGS (PMPI_COMM_GET_NAME, pmpi_comm_get_name, pmpi_comm_get_name_, @@ -41,6 +44,7 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_COMM_GET_NAME, (MPI_Fint *comm, char *comm_name, MPI_Fint *resultlen, MPI_Fint *ierr, int name_len), (comm, comm_name, resultlen, ierr, name_len) ) #endif +#endif #if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_COMM_GET_NAME = ompi_comm_get_name_f @@ -50,9 +54,8 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_COMM_GET_NAME, #pragma weak MPI_Comm_get_name_f = ompi_comm_get_name_f #pragma weak MPI_Comm_get_name_f08 = ompi_comm_get_name_f -#endif - -#if ! OPAL_HAVE_WEAK_SYMBOLS && ! OMPI_PROFILE_LAYER +#else +#if ! OMPI_BUILD_MPI_PROFILING OMPI_GENERATE_F77_BINDINGS (MPI_COMM_GET_NAME, mpi_comm_get_name, mpi_comm_get_name_, @@ -60,22 +63,21 @@ OMPI_GENERATE_F77_BINDINGS (MPI_COMM_GET_NAME, ompi_comm_get_name_f, (MPI_Fint *comm, char *comm_name, MPI_Fint *resultlen, MPI_Fint *ierr, int name_len), (comm, comm_name, resultlen, ierr, name_len) ) +#else +#define ompi_comm_get_name_f pompi_comm_get_name_f #endif - - -#if OMPI_PROFILE_LAYER && ! OPAL_HAVE_WEAK_SYMBOLS -#include "ompi/mpi/fortran/mpif-h/profile/defines.h" #endif + void ompi_comm_get_name_f(MPI_Fint *comm, char *comm_name, MPI_Fint *resultlen, MPI_Fint *ierr, int name_len) { int c_ierr, c_len; - MPI_Comm c_comm = MPI_Comm_f2c(*comm); + MPI_Comm c_comm = PMPI_Comm_f2c(*comm); char c_name[MPI_MAX_OBJECT_NAME]; - c_ierr = MPI_Comm_get_name(c_comm, c_name, &c_len); + c_ierr = PMPI_Comm_get_name(c_comm, c_name, &c_len); if (MPI_SUCCESS == c_ierr) { ompi_fortran_string_c2f(c_name, comm_name, name_len); *resultlen = OMPI_INT_2_FINT(c_len); diff --git a/ompi/mpi/fortran/mpif-h/comm_get_parent_f.c b/ompi/mpi/fortran/mpif-h/comm_get_parent_f.c index 925865b6f1a..951e392507c 100644 --- a/ompi/mpi/fortran/mpif-h/comm_get_parent_f.c +++ b/ompi/mpi/fortran/mpif-h/comm_get_parent_f.c @@ -5,15 +5,17 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2011-2012 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -21,7 +23,8 @@ #include "ompi/mpi/fortran/mpif-h/bindings.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILE_LAYER +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak PMPI_COMM_GET_PARENT = ompi_comm_get_parent_f #pragma weak pmpi_comm_get_parent = ompi_comm_get_parent_f #pragma weak pmpi_comm_get_parent_ = ompi_comm_get_parent_f @@ -29,7 +32,7 @@ #pragma weak PMPI_Comm_get_parent_f = ompi_comm_get_parent_f #pragma weak PMPI_Comm_get_parent_f08 = ompi_comm_get_parent_f -#elif OMPI_PROFILE_LAYER +#else OMPI_GENERATE_F77_BINDINGS (PMPI_COMM_GET_PARENT, pmpi_comm_get_parent, pmpi_comm_get_parent_, @@ -38,6 +41,7 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_COMM_GET_PARENT, (MPI_Fint *parent, MPI_Fint *ierr), (parent, ierr) ) #endif +#endif #if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_COMM_GET_PARENT = ompi_comm_get_parent_f @@ -47,9 +51,8 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_COMM_GET_PARENT, #pragma weak MPI_Comm_get_parent_f = ompi_comm_get_parent_f #pragma weak MPI_Comm_get_parent_f08 = ompi_comm_get_parent_f -#endif - -#if ! OPAL_HAVE_WEAK_SYMBOLS && ! OMPI_PROFILE_LAYER +#else +#if ! OMPI_BUILD_MPI_PROFILING OMPI_GENERATE_F77_BINDINGS (MPI_COMM_GET_PARENT, mpi_comm_get_parent, mpi_comm_get_parent_, @@ -57,22 +60,21 @@ OMPI_GENERATE_F77_BINDINGS (MPI_COMM_GET_PARENT, ompi_comm_get_parent_f, (MPI_Fint *parent, MPI_Fint *ierr), (parent, ierr) ) +#else +#define ompi_comm_get_parent_f pompi_comm_get_parent_f #endif - - -#if OMPI_PROFILE_LAYER && ! OPAL_HAVE_WEAK_SYMBOLS -#include "ompi/mpi/fortran/mpif-h/profile/defines.h" #endif + void ompi_comm_get_parent_f(MPI_Fint *parent, MPI_Fint *ierr) { int c_ierr; MPI_Comm c_parent; - c_ierr = MPI_Comm_get_parent(&c_parent); + c_ierr = PMPI_Comm_get_parent(&c_parent); if (NULL != ierr) *ierr = OMPI_INT_2_FINT(c_ierr); if (MPI_SUCCESS == c_ierr) { - *parent = MPI_Comm_c2f(c_parent); + *parent = PMPI_Comm_c2f(c_parent); } } diff --git a/ompi/mpi/fortran/mpif-h/comm_group_f.c b/ompi/mpi/fortran/mpif-h/comm_group_f.c index 84527eec154..50c9342b71b 100644 --- a/ompi/mpi/fortran/mpif-h/comm_group_f.c +++ b/ompi/mpi/fortran/mpif-h/comm_group_f.c @@ -5,15 +5,17 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2011-2012 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -21,7 +23,8 @@ #include "ompi/mpi/fortran/mpif-h/bindings.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILE_LAYER +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak PMPI_COMM_GROUP = ompi_comm_group_f #pragma weak pmpi_comm_group = ompi_comm_group_f #pragma weak pmpi_comm_group_ = ompi_comm_group_f @@ -29,7 +32,7 @@ #pragma weak PMPI_Comm_group_f = ompi_comm_group_f #pragma weak PMPI_Comm_group_f08 = ompi_comm_group_f -#elif OMPI_PROFILE_LAYER +#else OMPI_GENERATE_F77_BINDINGS (PMPI_COMM_GROUP, pmpi_comm_group, pmpi_comm_group_, @@ -38,6 +41,7 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_COMM_GROUP, (MPI_Fint *comm, MPI_Fint *group, MPI_Fint *ierr), (comm, group, ierr) ) #endif +#endif #if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_COMM_GROUP = ompi_comm_group_f @@ -47,9 +51,8 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_COMM_GROUP, #pragma weak MPI_Comm_group_f = ompi_comm_group_f #pragma weak MPI_Comm_group_f08 = ompi_comm_group_f -#endif - -#if ! OPAL_HAVE_WEAK_SYMBOLS && ! OMPI_PROFILE_LAYER +#else +#if ! OMPI_BUILD_MPI_PROFILING OMPI_GENERATE_F77_BINDINGS (MPI_COMM_GROUP, mpi_comm_group, mpi_comm_group_, @@ -57,23 +60,22 @@ OMPI_GENERATE_F77_BINDINGS (MPI_COMM_GROUP, ompi_comm_group_f, (MPI_Fint *comm, MPI_Fint *group, MPI_Fint *ierr), (comm, group, ierr) ) +#else +#define ompi_comm_group_f pompi_comm_group_f #endif - - -#if OMPI_PROFILE_LAYER && ! OPAL_HAVE_WEAK_SYMBOLS -#include "ompi/mpi/fortran/mpif-h/profile/defines.h" #endif + void ompi_comm_group_f(MPI_Fint *comm, MPI_Fint *group, MPI_Fint *ierr) { int c_ierr; MPI_Group c_group; - MPI_Comm c_comm = MPI_Comm_f2c( *comm ); - - c_ierr = MPI_Comm_group( c_comm, &c_group); + MPI_Comm c_comm = PMPI_Comm_f2c( *comm ); + + c_ierr = PMPI_Comm_group( c_comm, &c_group); if (NULL != ierr) *ierr = OMPI_INT_2_FINT(c_ierr); if (MPI_SUCCESS == c_ierr) { - *group = MPI_Group_c2f (c_group); + *group = PMPI_Group_c2f (c_group); } } diff --git a/ompi/mpi/fortran/mpif-h/comm_idup_f.c b/ompi/mpi/fortran/mpif-h/comm_idup_f.c index 692a1d4247d..11330c82f44 100644 --- a/ompi/mpi/fortran/mpif-h/comm_idup_f.c +++ b/ompi/mpi/fortran/mpif-h/comm_idup_f.c @@ -13,6 +13,8 @@ * Copyright (c) 2011-2013 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2013 Los Alamos National Security, LLC. All rights * reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -24,7 +26,8 @@ #include "ompi/mpi/fortran/mpif-h/bindings.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILE_LAYER +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak PMPI_COMM_IDUP = ompi_comm_idup_f #pragma weak pmpi_comm_idup = ompi_comm_idup_f #pragma weak pmpi_comm_idup_ = ompi_comm_idup_f @@ -32,7 +35,7 @@ #pragma weak PMPI_Comm_idup_f = ompi_comm_idup_f #pragma weak PMPI_Comm_idup_f08 = ompi_comm_idup_f -#elif OMPI_PROFILE_LAYER +#else OMPI_GENERATE_F77_BINDINGS (PMPI_COMM_IDUP, pmpi_comm_idup, pmpi_comm_idup_, @@ -41,6 +44,7 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_COMM_IDUP, (MPI_Fint *comm, MPI_Fint *newcomm, MPI_Fint *request, MPI_Fint *ierr), (comm, newcomm, request, ierr) ) #endif +#endif #if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_COMM_IDUP = ompi_comm_idup_f @@ -50,9 +54,8 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_COMM_IDUP, #pragma weak MPI_Comm_idup_f = ompi_comm_idup_f #pragma weak MPI_Comm_idup_f08 = ompi_comm_idup_f -#endif - -#if ! OPAL_HAVE_WEAK_SYMBOLS && ! OMPI_PROFILE_LAYER +#else +#if ! OMPI_BUILD_MPI_PROFILING OMPI_GENERATE_F77_BINDINGS (MPI_COMM_IDUP, mpi_comm_idup, mpi_comm_idup_, @@ -60,25 +63,24 @@ OMPI_GENERATE_F77_BINDINGS (MPI_COMM_IDUP, ompi_comm_idup_f, (MPI_Fint *comm, MPI_Fint *newcomm, MPI_Fint *request, MPI_Fint *ierr), (comm, newcomm, request, ierr) ) +#else +#define ompi_comm_idup_f pompi_comm_idup_f #endif - - -#if OMPI_PROFILE_LAYER && ! OPAL_HAVE_WEAK_SYMBOLS -#include "ompi/mpi/fortran/mpif-h/profile/defines.h" #endif + void ompi_comm_idup_f(MPI_Fint *comm, MPI_Fint *newcomm, MPI_Fint *request, MPI_Fint *ierr) { int c_ierr; MPI_Comm c_newcomm; - MPI_Comm c_comm = MPI_Comm_f2c(*comm); + MPI_Comm c_comm = PMPI_Comm_f2c(*comm); MPI_Request c_req; - c_ierr = MPI_Comm_idup(c_comm, &c_newcomm, &c_req); + c_ierr = PMPI_Comm_idup(c_comm, &c_newcomm, &c_req); if (NULL != ierr) *ierr = OMPI_INT_2_FINT(c_ierr); if (MPI_SUCCESS == c_ierr) { - *newcomm = MPI_Comm_c2f(c_newcomm); - *request = MPI_Request_c2f(c_req); + *newcomm = PMPI_Comm_c2f(c_newcomm); + *request = PMPI_Request_c2f(c_req); } } diff --git a/ompi/mpi/fortran/mpif-h/comm_join_f.c b/ompi/mpi/fortran/mpif-h/comm_join_f.c index d913ac519c7..fa4fa1bdaa4 100644 --- a/ompi/mpi/fortran/mpif-h/comm_join_f.c +++ b/ompi/mpi/fortran/mpif-h/comm_join_f.c @@ -5,15 +5,17 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2011-2012 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -21,7 +23,8 @@ #include "ompi/mpi/fortran/mpif-h/bindings.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILE_LAYER +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak PMPI_COMM_JOIN = ompi_comm_join_f #pragma weak pmpi_comm_join = ompi_comm_join_f #pragma weak pmpi_comm_join_ = ompi_comm_join_f @@ -29,7 +32,7 @@ #pragma weak PMPI_Comm_join_f = ompi_comm_join_f #pragma weak PMPI_Comm_join_f08 = ompi_comm_join_f -#elif OMPI_PROFILE_LAYER +#else OMPI_GENERATE_F77_BINDINGS (PMPI_COMM_JOIN, pmpi_comm_join, pmpi_comm_join_, @@ -38,6 +41,7 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_COMM_JOIN, (MPI_Fint *fd, MPI_Fint *intercomm, MPI_Fint *ierr), (fd, intercomm, ierr) ) #endif +#endif #if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_COMM_JOIN = ompi_comm_join_f @@ -47,9 +51,8 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_COMM_JOIN, #pragma weak MPI_Comm_join_f = ompi_comm_join_f #pragma weak MPI_Comm_join_f08 = ompi_comm_join_f -#endif - -#if ! OPAL_HAVE_WEAK_SYMBOLS && ! OMPI_PROFILE_LAYER +#else +#if ! OMPI_BUILD_MPI_PROFILING OMPI_GENERATE_F77_BINDINGS (MPI_COMM_JOIN, mpi_comm_join, mpi_comm_join_, @@ -57,22 +60,21 @@ OMPI_GENERATE_F77_BINDINGS (MPI_COMM_JOIN, ompi_comm_join_f, (MPI_Fint *fd, MPI_Fint *intercomm, MPI_Fint *ierr), (fd, intercomm, ierr) ) +#else +#define ompi_comm_join_f pompi_comm_join_f #endif - - -#if OMPI_PROFILE_LAYER && ! OPAL_HAVE_WEAK_SYMBOLS -#include "ompi/mpi/fortran/mpif-h/profile/defines.h" #endif + void ompi_comm_join_f(MPI_Fint *fd, MPI_Fint *intercomm, MPI_Fint *ierr) { int c_ierr; MPI_Comm c_intercomm; - c_ierr = MPI_Comm_join(OMPI_FINT_2_INT(*fd), &c_intercomm); + c_ierr = PMPI_Comm_join(OMPI_FINT_2_INT(*fd), &c_intercomm); if (NULL != ierr) *ierr = OMPI_INT_2_FINT(c_ierr); if (MPI_SUCCESS == c_ierr) { - *intercomm = MPI_Comm_c2f(c_intercomm); + *intercomm = PMPI_Comm_c2f(c_intercomm); } } diff --git a/ompi/mpi/fortran/mpif-h/comm_rank_f.c b/ompi/mpi/fortran/mpif-h/comm_rank_f.c index 576d5083639..4735707bf91 100644 --- a/ompi/mpi/fortran/mpif-h/comm_rank_f.c +++ b/ompi/mpi/fortran/mpif-h/comm_rank_f.c @@ -5,15 +5,17 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2011-2012 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -21,7 +23,8 @@ #include "ompi/mpi/fortran/mpif-h/bindings.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILE_LAYER +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak PMPI_COMM_RANK = ompi_comm_rank_f #pragma weak pmpi_comm_rank = ompi_comm_rank_f #pragma weak pmpi_comm_rank_ = ompi_comm_rank_f @@ -29,7 +32,7 @@ #pragma weak PMPI_Comm_rank_f = ompi_comm_rank_f #pragma weak PMPI_Comm_rank_f08 = ompi_comm_rank_f -#elif OMPI_PROFILE_LAYER +#else OMPI_GENERATE_F77_BINDINGS (PMPI_COMM_RANK, pmpi_comm_rank, pmpi_comm_rank_, @@ -38,6 +41,7 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_COMM_RANK, (MPI_Fint *comm, MPI_Fint *rank, MPI_Fint *ierr), (comm, rank, ierr) ) #endif +#endif #if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_COMM_RANK = ompi_comm_rank_f @@ -47,9 +51,8 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_COMM_RANK, #pragma weak MPI_Comm_rank_f = ompi_comm_rank_f #pragma weak MPI_Comm_rank_f08 = ompi_comm_rank_f -#endif - -#if ! OPAL_HAVE_WEAK_SYMBOLS && ! OMPI_PROFILE_LAYER +#else +#if ! OMPI_BUILD_MPI_PROFILING OMPI_GENERATE_F77_BINDINGS (MPI_COMM_RANK, mpi_comm_rank, mpi_comm_rank_, @@ -57,20 +60,19 @@ OMPI_GENERATE_F77_BINDINGS (MPI_COMM_RANK, ompi_comm_rank_f, (MPI_Fint *comm, MPI_Fint *rank, MPI_Fint *ierr), (comm, rank, ierr) ) +#else +#define ompi_comm_rank_f pompi_comm_rank_f #endif - - -#if OMPI_PROFILE_LAYER && ! OPAL_HAVE_WEAK_SYMBOLS -#include "ompi/mpi/fortran/mpif-h/profile/defines.h" #endif + void ompi_comm_rank_f(MPI_Fint *comm, MPI_Fint *rank, MPI_Fint *ierr) { int c_ierr; - MPI_Comm c_comm = MPI_Comm_f2c( *comm ); + MPI_Comm c_comm = PMPI_Comm_f2c( *comm ); OMPI_SINGLE_NAME_DECL(rank); - c_ierr = MPI_Comm_rank( c_comm, OMPI_SINGLE_NAME_CONVERT(rank)); + c_ierr = PMPI_Comm_rank( c_comm, OMPI_SINGLE_NAME_CONVERT(rank)); if (NULL != ierr) *ierr = OMPI_INT_2_FINT(c_ierr); if (MPI_SUCCESS == c_ierr) { diff --git a/ompi/mpi/fortran/mpif-h/comm_remote_group_f.c b/ompi/mpi/fortran/mpif-h/comm_remote_group_f.c index 9f87441ef13..b891c18960b 100644 --- a/ompi/mpi/fortran/mpif-h/comm_remote_group_f.c +++ b/ompi/mpi/fortran/mpif-h/comm_remote_group_f.c @@ -5,15 +5,17 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2011-2012 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -21,7 +23,8 @@ #include "ompi/mpi/fortran/mpif-h/bindings.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILE_LAYER +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak PMPI_COMM_REMOTE_GROUP = ompi_comm_remote_group_f #pragma weak pmpi_comm_remote_group = ompi_comm_remote_group_f #pragma weak pmpi_comm_remote_group_ = ompi_comm_remote_group_f @@ -29,7 +32,7 @@ #pragma weak PMPI_Comm_remote_group_f = ompi_comm_remote_group_f #pragma weak PMPI_Comm_remote_group_f08 = ompi_comm_remote_group_f -#elif OMPI_PROFILE_LAYER +#else OMPI_GENERATE_F77_BINDINGS (PMPI_COMM_REMOTE_GROUP, pmpi_comm_remote_group, pmpi_comm_remote_group_, @@ -38,6 +41,7 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_COMM_REMOTE_GROUP, (MPI_Fint *comm, MPI_Fint *group, MPI_Fint *ierr), (comm, group, ierr) ) #endif +#endif #if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_COMM_REMOTE_GROUP = ompi_comm_remote_group_f @@ -47,9 +51,8 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_COMM_REMOTE_GROUP, #pragma weak MPI_Comm_remote_group_f = ompi_comm_remote_group_f #pragma weak MPI_Comm_remote_group_f08 = ompi_comm_remote_group_f -#endif - -#if ! OPAL_HAVE_WEAK_SYMBOLS && ! OMPI_PROFILE_LAYER +#else +#if ! OMPI_BUILD_MPI_PROFILING OMPI_GENERATE_F77_BINDINGS (MPI_COMM_REMOTE_GROUP, mpi_comm_remote_group, mpi_comm_remote_group_, @@ -57,23 +60,21 @@ OMPI_GENERATE_F77_BINDINGS (MPI_COMM_REMOTE_GROUP, ompi_comm_remote_group_f, (MPI_Fint *comm, MPI_Fint *group, MPI_Fint *ierr), (comm, group, ierr) ) +#else +#define ompi_comm_remote_group_f pompi_comm_remote_group_f #endif - - -#if OMPI_PROFILE_LAYER && ! OPAL_HAVE_WEAK_SYMBOLS -#include "ompi/mpi/fortran/mpif-h/profile/defines.h" #endif void ompi_comm_remote_group_f(MPI_Fint *comm, MPI_Fint *group, MPI_Fint *ierr) { int c_ierr; MPI_Group c_group; - MPI_Comm c_comm = MPI_Comm_f2c ( *comm ); + MPI_Comm c_comm = PMPI_Comm_f2c ( *comm ); - c_ierr = MPI_Comm_remote_group ( c_comm, &c_group ); + c_ierr = PMPI_Comm_remote_group(c_comm, &c_group); if (NULL != ierr) *ierr = OMPI_INT_2_FINT(c_ierr); if (MPI_SUCCESS == c_ierr) { - *group = MPI_Group_c2f (c_group); + *group = PMPI_Group_c2f (c_group); } } diff --git a/ompi/mpi/fortran/mpif-h/comm_remote_size_f.c b/ompi/mpi/fortran/mpif-h/comm_remote_size_f.c index e64095a34ef..3a27c0e6ac0 100644 --- a/ompi/mpi/fortran/mpif-h/comm_remote_size_f.c +++ b/ompi/mpi/fortran/mpif-h/comm_remote_size_f.c @@ -5,15 +5,17 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2011-2012 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -21,7 +23,8 @@ #include "ompi/mpi/fortran/mpif-h/bindings.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILE_LAYER +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak PMPI_COMM_REMOTE_SIZE = ompi_comm_remote_size_f #pragma weak pmpi_comm_remote_size = ompi_comm_remote_size_f #pragma weak pmpi_comm_remote_size_ = ompi_comm_remote_size_f @@ -29,7 +32,7 @@ #pragma weak PMPI_Comm_remote_size_f = ompi_comm_remote_size_f #pragma weak PMPI_Comm_remote_size_f08 = ompi_comm_remote_size_f -#elif OMPI_PROFILE_LAYER +#else OMPI_GENERATE_F77_BINDINGS (PMPI_COMM_REMOTE_SIZE, pmpi_comm_remote_size, pmpi_comm_remote_size_, @@ -38,6 +41,7 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_COMM_REMOTE_SIZE, (MPI_Fint *comm, MPI_Fint *size, MPI_Fint *ierr), (comm, size, ierr) ) #endif +#endif #if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_COMM_REMOTE_SIZE = ompi_comm_remote_size_f @@ -47,9 +51,8 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_COMM_REMOTE_SIZE, #pragma weak MPI_Comm_remote_size_f = ompi_comm_remote_size_f #pragma weak MPI_Comm_remote_size_f08 = ompi_comm_remote_size_f -#endif - -#if ! OPAL_HAVE_WEAK_SYMBOLS && ! OMPI_PROFILE_LAYER +#else +#if ! OMPI_BUILD_MPI_PROFILING OMPI_GENERATE_F77_BINDINGS (MPI_COMM_REMOTE_SIZE, mpi_comm_remote_size, mpi_comm_remote_size_, @@ -57,20 +60,18 @@ OMPI_GENERATE_F77_BINDINGS (MPI_COMM_REMOTE_SIZE, ompi_comm_remote_size_f, (MPI_Fint *comm, MPI_Fint *size, MPI_Fint *ierr), (comm, size, ierr) ) +#else +#define ompi_comm_remote_size_f pompi_comm_remote_size_f #endif - - -#if OMPI_PROFILE_LAYER && ! OPAL_HAVE_WEAK_SYMBOLS -#include "ompi/mpi/fortran/mpif-h/profile/defines.h" #endif void ompi_comm_remote_size_f(MPI_Fint *comm, MPI_Fint *size, MPI_Fint *ierr) { int c_ierr; - MPI_Comm c_comm = MPI_Comm_f2c ( *comm ); + MPI_Comm c_comm = PMPI_Comm_f2c ( *comm ); OMPI_SINGLE_NAME_DECL(size); - c_ierr = MPI_Comm_remote_size ( c_comm, OMPI_SINGLE_NAME_CONVERT(size )); + c_ierr = PMPI_Comm_remote_size(c_comm, OMPI_SINGLE_NAME_CONVERT(size )); if (NULL != ierr) *ierr = OMPI_INT_2_FINT(c_ierr); if (MPI_SUCCESS == c_ierr) { diff --git a/ompi/mpi/fortran/mpif-h/comm_set_attr_f.c b/ompi/mpi/fortran/mpif-h/comm_set_attr_f.c index b1887cbe30a..79d14c7126e 100644 --- a/ompi/mpi/fortran/mpif-h/comm_set_attr_f.c +++ b/ompi/mpi/fortran/mpif-h/comm_set_attr_f.c @@ -5,15 +5,17 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2011-2012 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -23,7 +25,8 @@ #include "ompi/attribute/attribute.h" #include "ompi/communicator/communicator.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILE_LAYER +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak PMPI_COMM_SET_ATTR = ompi_comm_set_attr_f #pragma weak pmpi_comm_set_attr = ompi_comm_set_attr_f #pragma weak pmpi_comm_set_attr_ = ompi_comm_set_attr_f @@ -31,7 +34,7 @@ #pragma weak PMPI_Comm_set_attr_f = ompi_comm_set_attr_f #pragma weak PMPI_Comm_set_attr_f08 = ompi_comm_set_attr_f -#elif OMPI_PROFILE_LAYER +#else OMPI_GENERATE_F77_BINDINGS (PMPI_COMM_SET_ATTR, pmpi_comm_set_attr, pmpi_comm_set_attr_, @@ -40,6 +43,7 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_COMM_SET_ATTR, (MPI_Fint *comm, MPI_Fint *comm_keyval, MPI_Aint *attribute_val, MPI_Fint *ierr), (comm, comm_keyval, attribute_val, ierr) ) #endif +#endif #if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_COMM_SET_ATTR = ompi_comm_set_attr_f @@ -49,9 +53,8 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_COMM_SET_ATTR, #pragma weak MPI_Comm_set_attr_f = ompi_comm_set_attr_f #pragma weak MPI_Comm_set_attr_f08 = ompi_comm_set_attr_f -#endif - -#if ! OPAL_HAVE_WEAK_SYMBOLS && ! OMPI_PROFILE_LAYER +#else +#if ! OMPI_BUILD_MPI_PROFILING OMPI_GENERATE_F77_BINDINGS (MPI_COMM_SET_ATTR, mpi_comm_set_attr, mpi_comm_set_attr_, @@ -59,26 +62,24 @@ OMPI_GENERATE_F77_BINDINGS (MPI_COMM_SET_ATTR, ompi_comm_set_attr_f, (MPI_Fint *comm, MPI_Fint *comm_keyval, MPI_Aint *attribute_val, MPI_Fint *ierr), (comm, comm_keyval, attribute_val, ierr) ) +#else +#define ompi_comm_set_attr_f pompi_comm_set_attr_f #endif - - -#if OMPI_PROFILE_LAYER && ! OPAL_HAVE_WEAK_SYMBOLS -#include "ompi/mpi/fortran/mpif-h/profile/defines.h" #endif void ompi_comm_set_attr_f(MPI_Fint *comm, MPI_Fint *comm_keyval, MPI_Aint *attribute_val, MPI_Fint *ierr) { int c_ierr; - MPI_Comm c_comm = MPI_Comm_f2c(*comm); - + MPI_Comm c_comm = PMPI_Comm_f2c(*comm); + /* This stuff is very confusing. Be sure to see the comment at the top of src/attributes/attributes.c. */ c_ierr = ompi_attr_set_fortran_mpi2(COMM_ATTR, c_comm, &c_comm->c_keyhash, - OMPI_FINT_2_INT(*comm_keyval), + OMPI_FINT_2_INT(*comm_keyval), *attribute_val, false); if (NULL != ierr) *ierr = OMPI_INT_2_FINT(c_ierr); diff --git a/ompi/mpi/fortran/mpif-h/comm_set_errhandler_f.c b/ompi/mpi/fortran/mpif-h/comm_set_errhandler_f.c index b69313adaee..857e92aea35 100644 --- a/ompi/mpi/fortran/mpif-h/comm_set_errhandler_f.c +++ b/ompi/mpi/fortran/mpif-h/comm_set_errhandler_f.c @@ -5,15 +5,17 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2008-2012 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -22,7 +24,8 @@ #include "ompi/mpi/fortran/mpif-h/bindings.h" #include "ompi/errhandler/errhandler.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILE_LAYER +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak PMPI_COMM_SET_ERRHANDLER = ompi_comm_set_errhandler_f #pragma weak pmpi_comm_set_errhandler = ompi_comm_set_errhandler_f #pragma weak pmpi_comm_set_errhandler_ = ompi_comm_set_errhandler_f @@ -30,7 +33,7 @@ #pragma weak PMPI_Comm_set_errhandler_f = ompi_comm_set_errhandler_f #pragma weak PMPI_Comm_set_errhandler_f08 = ompi_comm_set_errhandler_f -#elif OMPI_PROFILE_LAYER +#else OMPI_GENERATE_F77_BINDINGS (PMPI_COMM_SET_ERRHANDLER, pmpi_comm_set_errhandler, pmpi_comm_set_errhandler_, @@ -39,6 +42,7 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_COMM_SET_ERRHANDLER, (MPI_Fint *comm, MPI_Fint *errhandler, MPI_Fint *ierr), (comm, errhandler, ierr) ) #endif +#endif #if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_COMM_SET_ERRHANDLER = ompi_comm_set_errhandler_f @@ -48,9 +52,8 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_COMM_SET_ERRHANDLER, #pragma weak MPI_Comm_set_errhandler_f = ompi_comm_set_errhandler_f #pragma weak MPI_Comm_set_errhandler_f08 = ompi_comm_set_errhandler_f -#endif - -#if ! OPAL_HAVE_WEAK_SYMBOLS && ! OMPI_PROFILE_LAYER +#else +#if ! OMPI_BUILD_MPI_PROFILING OMPI_GENERATE_F77_BINDINGS (MPI_COMM_SET_ERRHANDLER, mpi_comm_set_errhandler, mpi_comm_set_errhandler_, @@ -58,13 +61,12 @@ OMPI_GENERATE_F77_BINDINGS (MPI_COMM_SET_ERRHANDLER, ompi_comm_set_errhandler_f, (MPI_Fint *comm, MPI_Fint *errhandler, MPI_Fint *ierr), (comm, errhandler, ierr) ) +#else +#define ompi_comm_set_errhandler_f pompi_comm_set_errhandler_f #endif - - -#if OMPI_PROFILE_LAYER && ! OPAL_HAVE_WEAK_SYMBOLS -#include "ompi/mpi/fortran/mpif-h/profile/defines.h" #endif + void ompi_comm_set_errhandler_f(MPI_Fint *comm, MPI_Fint *errhandler, MPI_Fint *ierr) { @@ -72,9 +74,9 @@ void ompi_comm_set_errhandler_f(MPI_Fint *comm, MPI_Fint *errhandler, MPI_Comm c_comm; MPI_Errhandler c_errhandler; - c_comm = MPI_Comm_f2c(*comm); - c_errhandler = MPI_Errhandler_f2c(*errhandler); + c_comm = PMPI_Comm_f2c(*comm); + c_errhandler = PMPI_Errhandler_f2c(*errhandler); - c_ierr = MPI_Comm_set_errhandler(c_comm, c_errhandler); + c_ierr = PMPI_Comm_set_errhandler(c_comm, c_errhandler); if (NULL != ierr) *ierr = OMPI_INT_2_FINT(c_ierr); } diff --git a/ompi/mpi/fortran/mpif-h/comm_set_info_f.c b/ompi/mpi/fortran/mpif-h/comm_set_info_f.c index 550f17c5649..75b39ed5d1b 100644 --- a/ompi/mpi/fortran/mpif-h/comm_set_info_f.c +++ b/ompi/mpi/fortran/mpif-h/comm_set_info_f.c @@ -1,9 +1,11 @@ /* * Copyright (c) 2011-2014 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -13,7 +15,8 @@ #include "ompi/attribute/attribute.h" #include "ompi/communicator/communicator.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILE_LAYER +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak PMPI_COMM_SET_INFO = ompi_comm_set_info_f #pragma weak pmpi_comm_set_info = ompi_comm_set_info_f #pragma weak pmpi_comm_set_info_ = ompi_comm_set_info_f @@ -21,7 +24,7 @@ #pragma weak PMPI_Comm_set_info_f = ompi_comm_set_info_f #pragma weak PMPI_Comm_set_info_f08 = ompi_comm_set_info_f -#elif OMPI_PROFILE_LAYER +#else OMPI_GENERATE_F77_BINDINGS (PMPI_COMM_SET_INFO, pmpi_comm_set_info, pmpi_comm_set_info_, @@ -30,6 +33,7 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_COMM_SET_INFO, (MPI_Fint *comm, MPI_Fint *info, MPI_Fint *ierr), (comm, info, ierr) ) #endif +#endif #if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_COMM_SET_INFO = ompi_comm_set_info_f @@ -39,9 +43,8 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_COMM_SET_INFO, #pragma weak MPI_Comm_set_info_f = ompi_comm_set_info_f #pragma weak MPI_Comm_set_info_f08 = ompi_comm_set_info_f -#endif - -#if ! OPAL_HAVE_WEAK_SYMBOLS && ! OMPI_PROFILE_LAYER +#else +#if ! OMPI_BUILD_MPI_PROFILING OMPI_GENERATE_F77_BINDINGS (MPI_COMM_SET_INFO, mpi_comm_set_info, mpi_comm_set_info_, @@ -49,19 +52,18 @@ OMPI_GENERATE_F77_BINDINGS (MPI_COMM_SET_INFO, ompi_comm_set_info_f, (MPI_Fint *comm, MPI_Fint *info, MPI_Fint *ierr), (comm, info, ierr) ) +#else +#define ompi_comm_set_info_f pompi_comm_set_info_f #endif - - -#if OMPI_PROFILE_LAYER && ! OPAL_HAVE_WEAK_SYMBOLS -#include "ompi/mpi/fortran/mpif-h/profile/defines.h" #endif + void ompi_comm_set_info_f(MPI_Fint *comm, MPI_Fint *info, MPI_Fint *ierr) { int c_ierr; - MPI_Comm c_comm = MPI_Comm_f2c(*comm); - MPI_Info c_info = MPI_Info_f2c(*info); + MPI_Comm c_comm = PMPI_Comm_f2c(*comm); + MPI_Info c_info = PMPI_Info_f2c(*info); - c_ierr = MPI_Comm_set_info(c_comm, c_info); + c_ierr = PMPI_Comm_set_info(c_comm, c_info); if (NULL != ierr) *ierr = OMPI_INT_2_FINT(c_ierr); } diff --git a/ompi/mpi/fortran/mpif-h/comm_set_name_f.c b/ompi/mpi/fortran/mpif-h/comm_set_name_f.c index 947ee124a6f..1bbfed6a779 100644 --- a/ompi/mpi/fortran/mpif-h/comm_set_name_f.c +++ b/ompi/mpi/fortran/mpif-h/comm_set_name_f.c @@ -5,15 +5,17 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2011-2012 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -24,7 +26,8 @@ #include "ompi/communicator/communicator.h" #include "ompi/mpi/fortran/base/strings.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILE_LAYER +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak PMPI_COMM_SET_NAME = ompi_comm_set_name_f #pragma weak pmpi_comm_set_name = ompi_comm_set_name_f #pragma weak pmpi_comm_set_name_ = ompi_comm_set_name_f @@ -32,7 +35,7 @@ #pragma weak PMPI_Comm_set_name_f = ompi_comm_set_name_f #pragma weak PMPI_Comm_set_name_f08 = ompi_comm_set_name_f -#elif OMPI_PROFILE_LAYER +#else OMPI_GENERATE_F77_BINDINGS (PMPI_COMM_SET_NAME, pmpi_comm_set_name, pmpi_comm_set_name_, @@ -41,6 +44,7 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_COMM_SET_NAME, (MPI_Fint *comm, char *comm_name, MPI_Fint *ierr, int name_len), (comm, comm_name, ierr, name_len) ) #endif +#endif #if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_COMM_SET_NAME = ompi_comm_set_name_f @@ -50,9 +54,8 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_COMM_SET_NAME, #pragma weak MPI_Comm_set_name_f = ompi_comm_set_name_f #pragma weak MPI_Comm_set_name_f08 = ompi_comm_set_name_f -#endif - -#if ! OPAL_HAVE_WEAK_SYMBOLS && ! OMPI_PROFILE_LAYER +#else +#if ! OMPI_BUILD_MPI_PROFILING OMPI_GENERATE_F77_BINDINGS (MPI_COMM_SET_NAME, mpi_comm_set_name, mpi_comm_set_name_, @@ -60,19 +63,18 @@ OMPI_GENERATE_F77_BINDINGS (MPI_COMM_SET_NAME, ompi_comm_set_name_f, (MPI_Fint *comm, char *comm_name, MPI_Fint *ierr, int name_len), (comm, comm_name, ierr, name_len) ) +#else +#define ompi_comm_set_name_f pompi_comm_set_name_f #endif - - -#if OMPI_PROFILE_LAYER && ! OPAL_HAVE_WEAK_SYMBOLS -#include "ompi/mpi/fortran/mpif-h/profile/defines.h" #endif + void ompi_comm_set_name_f(MPI_Fint *comm, char *comm_name, MPI_Fint *ierr, int name_len) { int ret, c_ierr; char *c_name; - MPI_Comm c_comm = MPI_Comm_f2c(*comm); + MPI_Comm c_comm = PMPI_Comm_f2c(*comm); /* Convert the fortran string */ @@ -86,7 +88,7 @@ void ompi_comm_set_name_f(MPI_Fint *comm, char *comm_name, MPI_Fint *ierr, /* Call the C function */ - c_ierr = MPI_Comm_set_name(c_comm, c_name); + c_ierr = PMPI_Comm_set_name(c_comm, c_name); if (NULL != ierr) *ierr = OMPI_INT_2_FINT(c_ierr); /* Free the C name */ diff --git a/ompi/mpi/fortran/mpif-h/comm_size_f.c b/ompi/mpi/fortran/mpif-h/comm_size_f.c index 1b165f0f6b2..429199ba8a9 100644 --- a/ompi/mpi/fortran/mpif-h/comm_size_f.c +++ b/ompi/mpi/fortran/mpif-h/comm_size_f.c @@ -5,15 +5,17 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2011-2012 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -21,7 +23,8 @@ #include "ompi/mpi/fortran/mpif-h/bindings.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILE_LAYER +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak PMPI_COMM_SIZE = ompi_comm_size_f #pragma weak pmpi_comm_size = ompi_comm_size_f #pragma weak pmpi_comm_size_ = ompi_comm_size_f @@ -29,7 +32,7 @@ #pragma weak PMPI_Comm_size_f = ompi_comm_size_f #pragma weak PMPI_Comm_size_f08 = ompi_comm_size_f -#elif OMPI_PROFILE_LAYER +#else OMPI_GENERATE_F77_BINDINGS (PMPI_COMM_SIZE, pmpi_comm_size, pmpi_comm_size_, @@ -38,6 +41,7 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_COMM_SIZE, (MPI_Fint *comm, MPI_Fint *size, MPI_Fint *ierr), (comm, size, ierr) ) #endif +#endif #if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_COMM_SIZE = ompi_comm_size_f @@ -47,9 +51,8 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_COMM_SIZE, #pragma weak MPI_Comm_size_f = ompi_comm_size_f #pragma weak MPI_Comm_size_f08 = ompi_comm_size_f -#endif - -#if ! OPAL_HAVE_WEAK_SYMBOLS && ! OMPI_PROFILE_LAYER +#else +#if ! OMPI_BUILD_MPI_PROFILING OMPI_GENERATE_F77_BINDINGS (MPI_COMM_SIZE, mpi_comm_size, mpi_comm_size_, @@ -57,22 +60,21 @@ OMPI_GENERATE_F77_BINDINGS (MPI_COMM_SIZE, ompi_comm_size_f, (MPI_Fint *comm, MPI_Fint *size, MPI_Fint *ierr), (comm, size, ierr) ) +#else +#define ompi_comm_size_f pompi_comm_size_f #endif - - -#if OMPI_PROFILE_LAYER && ! OPAL_HAVE_WEAK_SYMBOLS -#include "ompi/mpi/fortran/mpif-h/profile/defines.h" #endif + #include void ompi_comm_size_f(MPI_Fint *comm, MPI_Fint *size, MPI_Fint *ierr) { int c_ierr; - MPI_Comm c_comm = MPI_Comm_f2c( *comm ); + MPI_Comm c_comm = PMPI_Comm_f2c( *comm ); OMPI_SINGLE_NAME_DECL(size); - c_ierr = MPI_Comm_size( c_comm, OMPI_SINGLE_NAME_CONVERT(size) ); + c_ierr = PMPI_Comm_size( c_comm, OMPI_SINGLE_NAME_CONVERT(size) ); if (NULL != ierr) *ierr = OMPI_INT_2_FINT(c_ierr); if (MPI_SUCCESS == c_ierr) { diff --git a/ompi/mpi/fortran/mpif-h/comm_spawn_f.c b/ompi/mpi/fortran/mpif-h/comm_spawn_f.c index 2b3a50c9f25..2ad50ec7215 100644 --- a/ompi/mpi/fortran/mpif-h/comm_spawn_f.c +++ b/ompi/mpi/fortran/mpif-h/comm_spawn_f.c @@ -5,15 +5,17 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2010-2012 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -24,7 +26,8 @@ #include "ompi/mpi/fortran/base/strings.h" #include "opal/util/argv.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILE_LAYER +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak PMPI_COMM_SPAWN = ompi_comm_spawn_f #pragma weak pmpi_comm_spawn = ompi_comm_spawn_f #pragma weak pmpi_comm_spawn_ = ompi_comm_spawn_f @@ -32,7 +35,7 @@ #pragma weak PMPI_Comm_spawn_f = ompi_comm_spawn_f #pragma weak PMPI_Comm_spawn_f08 = ompi_comm_spawn_f -#elif OMPI_PROFILE_LAYER +#else OMPI_GENERATE_F77_BINDINGS (PMPI_COMM_SPAWN, pmpi_comm_spawn, pmpi_comm_spawn_, @@ -41,6 +44,7 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_COMM_SPAWN, (char *command, char *argv, MPI_Fint *maxprocs, MPI_Fint *info, MPI_Fint *root, MPI_Fint *comm, MPI_Fint *intercomm, MPI_Fint *array_of_errcodes, MPI_Fint *ierr, int cmd_len, int string_len), (command, argv, maxprocs, info, root, comm, intercomm, array_of_errcodes, ierr, cmd_len, string_len) ) #endif +#endif #if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_COMM_SPAWN = ompi_comm_spawn_f @@ -50,9 +54,8 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_COMM_SPAWN, #pragma weak MPI_Comm_spawn_f = ompi_comm_spawn_f #pragma weak MPI_Comm_spawn_f08 = ompi_comm_spawn_f -#endif - -#if ! OPAL_HAVE_WEAK_SYMBOLS && ! OMPI_PROFILE_LAYER +#else +#if ! OMPI_BUILD_MPI_PROFILING OMPI_GENERATE_F77_BINDINGS (MPI_COMM_SPAWN, mpi_comm_spawn, mpi_comm_spawn_, @@ -60,15 +63,14 @@ OMPI_GENERATE_F77_BINDINGS (MPI_COMM_SPAWN, ompi_comm_spawn_f, (char *command, char *argv, MPI_Fint *maxprocs, MPI_Fint *info, MPI_Fint *root, MPI_Fint *comm, MPI_Fint *intercomm, MPI_Fint *array_of_errcodes, MPI_Fint *ierr, int cmd_len, int string_len), (command, argv, maxprocs, info, root, comm, intercomm, array_of_errcodes, ierr, cmd_len, string_len) ) +#else +#define ompi_comm_spawn_f pompi_comm_spawn_f #endif - - -#if OMPI_PROFILE_LAYER && ! OPAL_HAVE_WEAK_SYMBOLS -#include "ompi/mpi/fortran/mpif-h/profile/defines.h" #endif + void ompi_comm_spawn_f(char *command, char *argv, MPI_Fint *maxprocs, - MPI_Fint *info, MPI_Fint *root, MPI_Fint *comm, + MPI_Fint *info, MPI_Fint *root, MPI_Fint *comm, MPI_Fint *intercomm, MPI_Fint *array_of_errcodes, MPI_Fint *ierr, int cmd_len, int string_len) { @@ -79,10 +81,10 @@ void ompi_comm_spawn_f(char *command, char *argv, MPI_Fint *maxprocs, char **c_argv; char *c_command; OMPI_ARRAY_NAME_DECL(array_of_errcodes); - - c_comm = MPI_Comm_f2c(*comm); - c_info = MPI_Info_f2c(*info); - MPI_Comm_size(c_comm, &size); + + c_comm = PMPI_Comm_f2c(*comm); + c_info = PMPI_Info_f2c(*info); + PMPI_Comm_size(c_comm, &size); ompi_fortran_string_f2c(command, cmd_len, &c_command); /* It's allowed to ignore the errcodes */ @@ -102,7 +104,7 @@ void ompi_comm_spawn_f(char *command, char *argv, MPI_Fint *maxprocs, ompi_fortran_argv_f2c(argv, string_len, string_len, &c_argv); } - c_ierr = MPI_Comm_spawn(c_command, c_argv, + c_ierr = PMPI_Comm_spawn(c_command, c_argv, OMPI_FINT_2_INT(*maxprocs), c_info, OMPI_FINT_2_INT(*root), @@ -110,7 +112,7 @@ void ompi_comm_spawn_f(char *command, char *argv, MPI_Fint *maxprocs, if (NULL != ierr) *ierr = OMPI_INT_2_FINT(c_ierr); if (MPI_SUCCESS == c_ierr) { - *intercomm = MPI_Comm_c2f(c_new_comm); + *intercomm = PMPI_Comm_c2f(c_new_comm); } free(c_command); if (MPI_ARGV_NULL != c_argv && NULL != c_argv) { diff --git a/ompi/mpi/fortran/mpif-h/comm_spawn_multiple_f.c b/ompi/mpi/fortran/mpif-h/comm_spawn_multiple_f.c index 30306ba0546..867934e138a 100644 --- a/ompi/mpi/fortran/mpif-h/comm_spawn_multiple_f.c +++ b/ompi/mpi/fortran/mpif-h/comm_spawn_multiple_f.c @@ -5,15 +5,19 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2010-2012 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. + * Copyright (c) 2016 Los Alamos National Security, LLC. All rights + * reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -25,7 +29,8 @@ #include "opal/util/argv.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILE_LAYER +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak PMPI_COMM_SPAWN_MULTIPLE = ompi_comm_spawn_multiple_f #pragma weak pmpi_comm_spawn_multiple = ompi_comm_spawn_multiple_f #pragma weak pmpi_comm_spawn_multiple_ = ompi_comm_spawn_multiple_f @@ -33,7 +38,7 @@ #pragma weak PMPI_Comm_spawn_multiple_f = ompi_comm_spawn_multiple_f #pragma weak PMPI_Comm_spawn_multiple_f08 = ompi_comm_spawn_multiple_f -#elif OMPI_PROFILE_LAYER +#else OMPI_GENERATE_F77_BINDINGS (PMPI_COMM_SPAWN_MULTIPLE, pmpi_comm_spawn_multiple, pmpi_comm_spawn_multiple_, @@ -42,6 +47,7 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_COMM_SPAWN_MULTIPLE, (MPI_Fint *count, char *array_of_commands, char *array_of_argv, MPI_Fint *array_of_maxprocs, MPI_Fint *array_of_info, MPI_Fint *root, MPI_Fint *comm, MPI_Fint *intercomm, MPI_Fint *array_of_errcodes, MPI_Fint *ierr, int cmd_string_len, int argv_string_len), (count, array_of_commands, array_of_argv, array_of_maxprocs, array_of_info, root, comm, intercomm, array_of_errcodes, ierr, cmd_string_len, argv_string_len) ) #endif +#endif #if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_COMM_SPAWN_MULTIPLE = ompi_comm_spawn_multiple_f @@ -51,9 +57,8 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_COMM_SPAWN_MULTIPLE, #pragma weak MPI_Comm_spawn_multiple_f = ompi_comm_spawn_multiple_f #pragma weak MPI_Comm_spawn_multiple_f08 = ompi_comm_spawn_multiple_f -#endif - -#if ! OPAL_HAVE_WEAK_SYMBOLS && ! OMPI_PROFILE_LAYER +#else +#if ! OMPI_BUILD_MPI_PROFILING OMPI_GENERATE_F77_BINDINGS (MPI_COMM_SPAWN_MULTIPLE, mpi_comm_spawn_multiple, mpi_comm_spawn_multiple_, @@ -61,13 +66,12 @@ OMPI_GENERATE_F77_BINDINGS (MPI_COMM_SPAWN_MULTIPLE, ompi_comm_spawn_multiple_f, (MPI_Fint *count, char *array_of_commands, char *array_of_argv, MPI_Fint *array_of_maxprocs, MPI_Fint *array_of_info, MPI_Fint *root, MPI_Fint *comm, MPI_Fint *intercomm, MPI_Fint *array_of_errcodes, MPI_Fint *ierr, int cmd_string_len, int argv_string_len), (count, array_of_commands, array_of_argv, array_of_maxprocs, array_of_info, root, comm, intercomm, array_of_errcodes, ierr, cmd_string_len, argv_string_len) ) +#else +#define ompi_comm_spawn_multiple_f pompi_comm_spawn_multiple_f #endif - - -#if OMPI_PROFILE_LAYER && ! OPAL_HAVE_WEAK_SYMBOLS -#include "ompi/mpi/fortran/mpif-h/profile/defines.h" #endif + void ompi_comm_spawn_multiple_f(MPI_Fint *count, char *array_commands, char *array_argv, MPI_Fint *array_maxprocs, @@ -84,10 +88,10 @@ void ompi_comm_spawn_multiple_f(MPI_Fint *count, char *array_commands, char ***c_array_argv; OMPI_ARRAY_NAME_DECL(array_maxprocs); OMPI_ARRAY_NAME_DECL(array_errcds); - - c_comm = MPI_Comm_f2c(*comm); - - MPI_Comm_size(c_comm, &size); + + c_comm = PMPI_Comm_f2c(*comm); + + PMPI_Comm_size(c_comm, &size); array_size = OMPI_FINT_2_INT(*count); @@ -105,23 +109,23 @@ void ompi_comm_spawn_multiple_f(MPI_Fint *count, char *array_commands, if (OMPI_IS_FORTRAN_ARGVS_NULL(array_argv)) { c_array_argv = MPI_ARGVS_NULL; } else { - ompi_fortran_multiple_argvs_f2c(OMPI_FINT_2_INT(*count), array_argv, + ompi_fortran_multiple_argvs_f2c(OMPI_FINT_2_INT(*count), array_argv, argv_string_len, &c_array_argv); } OMPI_ARRAY_FINT_2_INT(array_maxprocs, array_size); - - ompi_fortran_argv_f2c(array_commands, cmd_string_len, + + ompi_fortran_argv_f2c(array_commands, cmd_string_len, cmd_string_len, &c_array_commands); - + c_info = (MPI_Info *) malloc (array_size * sizeof(MPI_Info)); for (i = 0; i < array_size; ++i) { - c_info[i] = MPI_Info_f2c(array_info[i]); + c_info[i] = PMPI_Info_f2c(array_info[i]); } - c_ierr = MPI_Comm_spawn_multiple(OMPI_FINT_2_INT(*count), + c_ierr = PMPI_Comm_spawn_multiple(OMPI_FINT_2_INT(*count), c_array_commands, - c_array_argv, + c_array_argv, OMPI_ARRAY_NAME_CONVERT(array_maxprocs), c_info, OMPI_FINT_2_INT(*root), @@ -130,7 +134,7 @@ void ompi_comm_spawn_multiple_f(MPI_Fint *count, char *array_commands, if (NULL != ierr) *ierr = OMPI_INT_2_FINT(c_ierr); if (MPI_SUCCESS == c_ierr) { - *intercomm = MPI_Comm_c2f(c_new_comm); + *intercomm = PMPI_Comm_c2f(c_new_comm); } if (!OMPI_IS_FORTRAN_ERRCODES_IGNORE(array_errcds)) { @@ -143,9 +147,10 @@ void ompi_comm_spawn_multiple_f(MPI_Fint *count, char *array_commands, opal_argv_free(c_array_commands); if (MPI_ARGVS_NULL != c_array_argv && NULL != c_array_argv) { - for (i = 0; i < OMPI_FINT_2_INT(*count); ++i) { + for (i = 0; i < OMPI_FINT_2_INT(*count); ++i) { opal_argv_free(c_array_argv[i]); } } free(c_array_argv); + free(c_info); } diff --git a/ompi/mpi/fortran/mpif-h/comm_split_f.c b/ompi/mpi/fortran/mpif-h/comm_split_f.c index b7f9a262aca..614fb96f4d8 100644 --- a/ompi/mpi/fortran/mpif-h/comm_split_f.c +++ b/ompi/mpi/fortran/mpif-h/comm_split_f.c @@ -5,15 +5,17 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2011-2012 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -21,7 +23,8 @@ #include "ompi/mpi/fortran/mpif-h/bindings.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILE_LAYER +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak PMPI_COMM_SPLIT = ompi_comm_split_f #pragma weak pmpi_comm_split = ompi_comm_split_f #pragma weak pmpi_comm_split_ = ompi_comm_split_f @@ -29,7 +32,7 @@ #pragma weak PMPI_Comm_split_f = ompi_comm_split_f #pragma weak PMPI_Comm_split_f08 = ompi_comm_split_f -#elif OMPI_PROFILE_LAYER +#else OMPI_GENERATE_F77_BINDINGS (PMPI_COMM_SPLIT, pmpi_comm_split, pmpi_comm_split_, @@ -38,6 +41,7 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_COMM_SPLIT, (MPI_Fint *comm, MPI_Fint *color, MPI_Fint *key, MPI_Fint *newcomm, MPI_Fint *ierr), (comm, color, key, newcomm, ierr) ) #endif +#endif #if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_COMM_SPLIT = ompi_comm_split_f @@ -47,9 +51,8 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_COMM_SPLIT, #pragma weak MPI_Comm_split_f = ompi_comm_split_f #pragma weak MPI_Comm_split_f08 = ompi_comm_split_f -#endif - -#if ! OPAL_HAVE_WEAK_SYMBOLS && ! OMPI_PROFILE_LAYER +#else +#if ! OMPI_BUILD_MPI_PROFILING OMPI_GENERATE_F77_BINDINGS (MPI_COMM_SPLIT, mpi_comm_split, mpi_comm_split_, @@ -57,27 +60,26 @@ OMPI_GENERATE_F77_BINDINGS (MPI_COMM_SPLIT, ompi_comm_split_f, (MPI_Fint *comm, MPI_Fint *color, MPI_Fint *key, MPI_Fint *newcomm, MPI_Fint *ierr), (comm, color, key, newcomm, ierr) ) +#else +#define ompi_comm_split_f pompi_comm_split_f #endif - - -#if OMPI_PROFILE_LAYER && ! OPAL_HAVE_WEAK_SYMBOLS -#include "ompi/mpi/fortran/mpif-h/profile/defines.h" #endif + void ompi_comm_split_f(MPI_Fint *comm, MPI_Fint *color, MPI_Fint *key, MPI_Fint *newcomm, MPI_Fint *ierr) { int c_ierr; MPI_Comm c_newcomm; - MPI_Comm c_comm = MPI_Comm_f2c ( *comm ); + MPI_Comm c_comm = PMPI_Comm_f2c ( *comm ); - c_ierr = MPI_Comm_split(c_comm, + c_ierr = PMPI_Comm_split(c_comm, OMPI_FINT_2_INT(*color), OMPI_FINT_2_INT(*key), &c_newcomm ); if (NULL != ierr) *ierr = OMPI_INT_2_FINT(c_ierr); if (MPI_SUCCESS == c_ierr) { - *newcomm = MPI_Comm_c2f (c_newcomm); + *newcomm = PMPI_Comm_c2f (c_newcomm); } } diff --git a/ompi/mpi/fortran/mpif-h/comm_split_type_f.c b/ompi/mpi/fortran/mpif-h/comm_split_type_f.c index 79e53e2f998..c6eb7306a37 100644 --- a/ompi/mpi/fortran/mpif-h/comm_split_type_f.c +++ b/ompi/mpi/fortran/mpif-h/comm_split_type_f.c @@ -5,16 +5,18 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2012 Sandia National Laboratories. All rights reserved. * Copyright (c) 2012 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -22,7 +24,8 @@ #include "ompi/mpi/fortran/mpif-h/bindings.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILE_LAYER +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak PMPI_COMM_SPLIT_TYPE = ompi_comm_split_type_f #pragma weak pmpi_comm_split_type = ompi_comm_split_type_f #pragma weak pmpi_comm_split_type_ = ompi_comm_split_type_f @@ -30,7 +33,7 @@ #pragma weak PMPI_Comm_split_type_f = ompi_comm_split_type_f #pragma weak PMPI_Comm_split_type_f08 = ompi_comm_split_type_f -#elif OMPI_PROFILE_LAYER +#else OMPI_GENERATE_F77_BINDINGS (PMPI_COMM_SPLIT_TYPE, pmpi_comm_split_type, pmpi_comm_split_type_, @@ -39,6 +42,7 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_COMM_SPLIT_TYPE, (MPI_Fint *comm, MPI_Fint *split_type, MPI_Fint *key, MPI_Fint *info, MPI_Fint *newcomm, MPI_Fint *ierr), (comm, split_type, key, info, newcomm, ierr) ) #endif +#endif #if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_COMM_SPLIT_TYPE = ompi_comm_split_type_f @@ -48,9 +52,8 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_COMM_SPLIT_TYPE, #pragma weak MPI_Comm_split_type_f = ompi_comm_split_type_f #pragma weak MPI_Comm_split_type_f08 = ompi_comm_split_type_f -#endif - -#if ! OPAL_HAVE_WEAK_SYMBOLS && ! OMPI_PROFILE_LAYER +#else +#if ! OMPI_BUILD_MPI_PROFILING OMPI_GENERATE_F77_BINDINGS (MPI_COMM_SPLIT_TYPE, mpi_comm_split_type, mpi_comm_split_type_, @@ -58,31 +61,30 @@ OMPI_GENERATE_F77_BINDINGS (MPI_COMM_SPLIT_TYPE, ompi_comm_split_type_f, (MPI_Fint *comm, MPI_Fint *split_type, MPI_Fint *key, MPI_Fint *info, MPI_Fint *newcomm, MPI_Fint *ierr), (comm, split_type, key, info, newcomm, ierr) ) +#else +#define ompi_comm_split_type_f pompi_comm_split_type_f #endif - - -#if OMPI_PROFILE_LAYER && ! OPAL_HAVE_WEAK_SYMBOLS -#include "ompi/mpi/fortran/mpif-h/profile/defines.h" #endif + void ompi_comm_split_type_f(MPI_Fint *comm, MPI_Fint *split_type, MPI_Fint *key, MPI_Fint *info, MPI_Fint *newcomm, MPI_Fint *ierr) { int c_ierr; MPI_Comm c_newcomm; - MPI_Comm c_comm = MPI_Comm_f2c ( *comm ); + MPI_Comm c_comm = PMPI_Comm_f2c ( *comm ); MPI_Info c_info; - c_info = MPI_Info_f2c(*info); + c_info = PMPI_Info_f2c(*info); - c_ierr = OMPI_INT_2_FINT(MPI_Comm_split_type(c_comm, - OMPI_FINT_2_INT(*split_type), - OMPI_FINT_2_INT(*key), - c_info, - &c_newcomm )); + c_ierr = OMPI_INT_2_FINT(PMPI_Comm_split_type(c_comm, + OMPI_FINT_2_INT(*split_type), + OMPI_FINT_2_INT(*key), + c_info, + &c_newcomm )); if (NULL != ierr) *ierr = OMPI_INT_2_FINT(c_ierr); if (MPI_SUCCESS == c_ierr) { - *newcomm = MPI_Comm_c2f (c_newcomm); + *newcomm = PMPI_Comm_c2f (c_newcomm); } } diff --git a/ompi/mpi/fortran/mpif-h/comm_test_inter_f.c b/ompi/mpi/fortran/mpif-h/comm_test_inter_f.c index cdf76df3994..78e2a5cdb5e 100644 --- a/ompi/mpi/fortran/mpif-h/comm_test_inter_f.c +++ b/ompi/mpi/fortran/mpif-h/comm_test_inter_f.c @@ -5,15 +5,17 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2011-2012 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -21,7 +23,8 @@ #include "ompi/mpi/fortran/mpif-h/bindings.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILE_LAYER +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak PMPI_COMM_TEST_INTER = ompi_comm_test_inter_f #pragma weak pmpi_comm_test_inter = ompi_comm_test_inter_f #pragma weak pmpi_comm_test_inter_ = ompi_comm_test_inter_f @@ -29,7 +32,7 @@ #pragma weak PMPI_Comm_test_inter_f = ompi_comm_test_inter_f #pragma weak PMPI_Comm_test_inter_f08 = ompi_comm_test_inter_f -#elif OMPI_PROFILE_LAYER +#else OMPI_GENERATE_F77_BINDINGS (PMPI_COMM_TEST_INTER, pmpi_comm_test_inter, pmpi_comm_test_inter_, @@ -38,6 +41,7 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_COMM_TEST_INTER, (MPI_Fint *comm, ompi_fortran_logical_t *flag, MPI_Fint *ierr), (comm, flag, ierr) ) #endif +#endif #if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_COMM_TEST_INTER = ompi_comm_test_inter_f @@ -47,9 +51,8 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_COMM_TEST_INTER, #pragma weak MPI_Comm_test_inter_f = ompi_comm_test_inter_f #pragma weak MPI_Comm_test_inter_f08 = ompi_comm_test_inter_f -#endif - -#if ! OPAL_HAVE_WEAK_SYMBOLS && ! OMPI_PROFILE_LAYER +#else +#if ! OMPI_BUILD_MPI_PROFILING OMPI_GENERATE_F77_BINDINGS (MPI_COMM_TEST_INTER, mpi_comm_test_inter, mpi_comm_test_inter_, @@ -57,20 +60,19 @@ OMPI_GENERATE_F77_BINDINGS (MPI_COMM_TEST_INTER, ompi_comm_test_inter_f, (MPI_Fint *comm, ompi_fortran_logical_t *flag, MPI_Fint *ierr), (comm, flag, ierr) ) +#else +#define ompi_comm_test_inter_f pompi_comm_test_inter_f #endif - - -#if OMPI_PROFILE_LAYER && ! OPAL_HAVE_WEAK_SYMBOLS -#include "ompi/mpi/fortran/mpif-h/profile/defines.h" #endif + void ompi_comm_test_inter_f(MPI_Fint *comm, ompi_fortran_logical_t *flag, MPI_Fint *ierr) { int c_ierr; - MPI_Comm c_comm = MPI_Comm_f2c (*comm); + MPI_Comm c_comm = PMPI_Comm_f2c (*comm); OMPI_LOGICAL_NAME_DECL(flag); - c_ierr = MPI_Comm_test_inter(c_comm, OMPI_LOGICAL_SINGLE_NAME_CONVERT(flag)); + c_ierr = PMPI_Comm_test_inter(c_comm, OMPI_LOGICAL_SINGLE_NAME_CONVERT(flag)); if (NULL != ierr) *ierr = OMPI_INT_2_FINT(c_ierr); if (MPI_SUCCESS == c_ierr) { diff --git a/ompi/mpi/fortran/mpif-h/compare_and_swap_f.c b/ompi/mpi/fortran/mpif-h/compare_and_swap_f.c index db899d8fc70..cf6f6a81319 100644 --- a/ompi/mpi/fortran/mpif-h/compare_and_swap_f.c +++ b/ompi/mpi/fortran/mpif-h/compare_and_swap_f.c @@ -13,6 +13,8 @@ * Copyright (c) 2011-2012 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2014 Los Alamos National Security, LLC. All rights * reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -26,7 +28,8 @@ #include "ompi/mpi/fortran/base/constants.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILE_LAYER +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak PMPI_COMPARE_AND_SWAP = ompi_compare_and_swap_f #pragma weak pmpi_compare_and_swap = ompi_compare_and_swap_f #pragma weak pmpi_compare_and_swap_ = ompi_compare_and_swap_f @@ -34,7 +37,7 @@ #pragma weak PMPI_Compare_and_swap_f = ompi_compare_and_swap_f #pragma weak PMPI_Compare_and_swap_f08 = ompi_compare_and_swap_f -#elif OMPI_PROFILE_LAYER +#else OMPI_GENERATE_F77_BINDINGS (PMPI_COMPARE_AND_SWAP, pmpi_compare_and_swap, pmpi_compare_and_swap_, @@ -43,6 +46,7 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_COMPARE_AND_SWAP, (char *origin_addr, char *compare_addr, char *result_addr, MPI_Fint *datatype, MPI_Fint *target_rank, MPI_Aint *target_disp, MPI_Fint *win, MPI_Fint *ierr), (origin_addr, compare_addr, result_addr, datatype, target_rank, target_disp, win, ierr) ) #endif +#endif #if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_COMPARE_AND_SWAP = ompi_compare_and_swap_f @@ -52,9 +56,8 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_COMPARE_AND_SWAP, #pragma weak MPI_Compare_and_swap_f = ompi_compare_and_swap_f #pragma weak MPI_Compare_and_swap_f08 = ompi_compare_and_swap_f -#endif - -#if ! OPAL_HAVE_WEAK_SYMBOLS && ! OMPI_PROFILE_LAYER +#else +#if ! OMPI_BUILD_MPI_PROFILING OMPI_GENERATE_F77_BINDINGS (MPI_COMPARE_AND_SWAP, mpi_compare_and_swap, mpi_compare_and_swap_, @@ -62,22 +65,21 @@ OMPI_GENERATE_F77_BINDINGS (MPI_COMPARE_AND_SWAP, ompi_compare_and_swap_f, (char *origin_addr, char *compare_addr, char *result_addr, MPI_Fint *datatype, MPI_Fint *target_rank, MPI_Aint *target_disp, MPI_Fint *win, MPI_Fint *ierr), (origin_addr, compare_addr, result_addr, datatype, target_rank, target_disp, win, ierr) ) +#else +#define ompi_compare_and_swap_f pompi_compare_and_swap_f #endif - - -#if OMPI_PROFILE_LAYER && ! OPAL_HAVE_WEAK_SYMBOLS -#include "ompi/mpi/fortran/mpif-h/profile/defines.h" #endif + void ompi_compare_and_swap_f(char *origin_addr, char *compare_addr, char *result_addr, MPI_Fint *datatype, MPI_Fint *target_rank, MPI_Aint *target_disp, MPI_Fint *win, MPI_Fint *ierr) { int c_ierr; - MPI_Datatype c_datatype = MPI_Type_f2c(*datatype); - MPI_Win c_win = MPI_Win_f2c(*win); + MPI_Datatype c_datatype = PMPI_Type_f2c(*datatype); + MPI_Win c_win = PMPI_Win_f2c(*win); - c_ierr = MPI_Compare_and_swap(OMPI_F2C_BOTTOM(origin_addr), + c_ierr = PMPI_Compare_and_swap(OMPI_F2C_BOTTOM(origin_addr), OMPI_F2C_BOTTOM(compare_addr), OMPI_F2C_BOTTOM(result_addr), c_datatype, diff --git a/ompi/mpi/fortran/mpif-h/dims_create_f.c b/ompi/mpi/fortran/mpif-h/dims_create_f.c index 563cd07394e..7a544af06b1 100644 --- a/ompi/mpi/fortran/mpif-h/dims_create_f.c +++ b/ompi/mpi/fortran/mpif-h/dims_create_f.c @@ -5,15 +5,17 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2011-2012 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -21,7 +23,8 @@ #include "ompi/mpi/fortran/mpif-h/bindings.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILE_LAYER +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak PMPI_DIMS_CREATE = ompi_dims_create_f #pragma weak pmpi_dims_create = ompi_dims_create_f #pragma weak pmpi_dims_create_ = ompi_dims_create_f @@ -29,7 +32,7 @@ #pragma weak PMPI_Dims_create_f = ompi_dims_create_f #pragma weak PMPI_Dims_create_f08 = ompi_dims_create_f -#elif OMPI_PROFILE_LAYER +#else OMPI_GENERATE_F77_BINDINGS (PMPI_DIMS_CREATE, pmpi_dims_create, pmpi_dims_create_, @@ -38,6 +41,7 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_DIMS_CREATE, (MPI_Fint *nnodes, MPI_Fint *ndims, MPI_Fint *dims, MPI_Fint *ierr), (nnodes, ndims, dims, ierr) ) #endif +#endif #if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_DIMS_CREATE = ompi_dims_create_f @@ -47,9 +51,8 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_DIMS_CREATE, #pragma weak MPI_Dims_create_f = ompi_dims_create_f #pragma weak MPI_Dims_create_f08 = ompi_dims_create_f -#endif - -#if ! OPAL_HAVE_WEAK_SYMBOLS && ! OMPI_PROFILE_LAYER +#else +#if ! OMPI_BUILD_MPI_PROFILING OMPI_GENERATE_F77_BINDINGS (MPI_DIMS_CREATE, mpi_dims_create, mpi_dims_create_, @@ -57,14 +60,13 @@ OMPI_GENERATE_F77_BINDINGS (MPI_DIMS_CREATE, ompi_dims_create_f, (MPI_Fint *nnodes, MPI_Fint *ndims, MPI_Fint *dims, MPI_Fint *ierr), (nnodes, ndims, dims, ierr) ) +#else +#define ompi_dims_create_f pompi_dims_create_f #endif - - -#if OMPI_PROFILE_LAYER && ! OPAL_HAVE_WEAK_SYMBOLS -#include "ompi/mpi/fortran/mpif-h/profile/defines.h" #endif -void ompi_dims_create_f(MPI_Fint *nnodes, MPI_Fint *ndims, + +void ompi_dims_create_f(MPI_Fint *nnodes, MPI_Fint *ndims, MPI_Fint *dims, MPI_Fint *ierr) { int c_ierr; @@ -72,7 +74,7 @@ void ompi_dims_create_f(MPI_Fint *nnodes, MPI_Fint *ndims, OMPI_ARRAY_FINT_2_INT(dims, *ndims); - c_ierr = MPI_Dims_create(OMPI_FINT_2_INT(*nnodes), + c_ierr = PMPI_Dims_create(OMPI_FINT_2_INT(*nnodes), OMPI_FINT_2_INT(*ndims), OMPI_ARRAY_NAME_CONVERT(dims)); if (NULL != ierr) *ierr = OMPI_INT_2_FINT(c_ierr); diff --git a/ompi/mpi/fortran/mpif-h/dist_graph_create_adjacent_f.c b/ompi/mpi/fortran/mpif-h/dist_graph_create_adjacent_f.c index f0350320e8a..f9668b379a8 100644 --- a/ompi/mpi/fortran/mpif-h/dist_graph_create_adjacent_f.c +++ b/ompi/mpi/fortran/mpif-h/dist_graph_create_adjacent_f.c @@ -8,10 +8,12 @@ * Copyright (c) 2013-2014 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2014 Los Alamos National Security, LLC. All rights * reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -20,7 +22,8 @@ #include "ompi/mpi/fortran/mpif-h/bindings.h" #include "ompi/mpi/fortran/base/constants.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILE_LAYER +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak PMPI_DIST_GRAPH_CREATE_ADJACENT = ompi_dist_graph_create_adjacent_f #pragma weak pmpi_dist_graph_create_adjacent = ompi_dist_graph_create_adjacent_f #pragma weak pmpi_dist_graph_create_adjacent_ = ompi_dist_graph_create_adjacent_f @@ -28,7 +31,7 @@ #pragma weak PMPI_Dist_graph_create_adjacent_f = ompi_dist_graph_create_adjacent_f #pragma weak PMPI_Dist_graph_create_adjacent_f08 = ompi_dist_graph_create_adjacent_f -#elif OMPI_PROFILE_LAYER +#else OMPI_GENERATE_F77_BINDINGS (PMPI_DIST_GRAPH_CREATE_ADJACENT, pmpi_dist_graph_create_adjacent, pmpi_dist_graph_create_adjacent_, @@ -37,6 +40,7 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_DIST_GRAPH_CREATE_ADJACENT, (MPI_Fint *comm_old, MPI_Fint *indegree, MPI_Fint *sources, MPI_Fint *sourceweights, MPI_Fint *outdegree, MPI_Fint *destinations, MPI_Fint *destweights, MPI_Fint *info, ompi_fortran_logical_t *reorder, MPI_Fint *comm_graph, MPI_Fint *ierr), (comm_old, indegree, sources, sourceweights, outdegree, destinations, destweights, info, reorder, comm_graph, ierr) ) #endif +#endif #if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_DIST_GRAPH_CREATE_ADJACENT = ompi_dist_graph_create_adjacent_f @@ -46,9 +50,8 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_DIST_GRAPH_CREATE_ADJACENT, #pragma weak MPI_Dist_graph_create_adjacent_f = ompi_dist_graph_create_adjacent_f #pragma weak MPI_Dist_graph_create_adjacent_f08 = ompi_dist_graph_create_adjacent_f -#endif - -#if ! OPAL_HAVE_WEAK_SYMBOLS && ! OMPI_PROFILE_LAYER +#else +#if ! OMPI_BUILD_MPI_PROFILING OMPI_GENERATE_F77_BINDINGS (MPI_DIST_GRAPH_CREATE_ADJACENT, mpi_dist_graph_create_adjacent, mpi_dist_graph_create_adjacent_, @@ -58,11 +61,13 @@ OMPI_GENERATE_F77_BINDINGS (MPI_DIST_GRAPH_CREATE_ADJACENT, (comm_old, indegree, sources, sourceweights, outdegree, destinations, destweights, info, reorder, comm_graph, ierr) ) #endif -#if OMPI_PROFILE_LAYER && ! OPAL_HAVE_WEAK_SYMBOLS -#include "ompi/mpi/fortran/mpif-h/profile/defines.h" +#if OMPI_BUILD_MPI_PROFILING && ! OPAL_HAVE_WEAK_SYMBOLS +#define ompi_dist_graph_create_adjacent_f pompi_dist_graph_create_adjacent_f +#endif #endif + void ompi_dist_graph_create_adjacent_f(MPI_Fint *comm_old, MPI_Fint *indegree, MPI_Fint *sources, MPI_Fint *sourceweights, MPI_Fint *outdegree, @@ -77,8 +82,8 @@ void ompi_dist_graph_create_adjacent_f(MPI_Fint *comm_old, MPI_Fint *indegree, OMPI_ARRAY_NAME_DECL(sources); OMPI_ARRAY_NAME_DECL(destinations); - c_comm_old = MPI_Comm_f2c(*comm_old); - c_info = MPI_Info_f2c(*info); + c_comm_old = PMPI_Comm_f2c(*comm_old); + c_info = PMPI_Info_f2c(*info); OMPI_ARRAY_FINT_2_INT(sources, *indegree); if (OMPI_IS_FORTRAN_UNWEIGHTED(sourceweights)) { @@ -100,17 +105,17 @@ void ompi_dist_graph_create_adjacent_f(MPI_Fint *comm_old, MPI_Fint *indegree, c_destweights = OMPI_ARRAY_NAME_CONVERT(destweights); } - *ierr = OMPI_INT_2_FINT(MPI_Dist_graph_create_adjacent(c_comm_old, OMPI_FINT_2_INT(*indegree), - OMPI_ARRAY_NAME_CONVERT(sources), - c_sourceweights, - OMPI_FINT_2_INT(*outdegree), - OMPI_ARRAY_NAME_CONVERT(destinations), - c_destweights, - c_info, - OMPI_LOGICAL_2_INT(*reorder), - &c_comm_graph)); + *ierr = OMPI_INT_2_FINT(PMPI_Dist_graph_create_adjacent(c_comm_old, OMPI_FINT_2_INT(*indegree), + OMPI_ARRAY_NAME_CONVERT(sources), + c_sourceweights, + OMPI_FINT_2_INT(*outdegree), + OMPI_ARRAY_NAME_CONVERT(destinations), + c_destweights, + c_info, + OMPI_LOGICAL_2_INT(*reorder), + &c_comm_graph)); if (OMPI_SUCCESS == OMPI_FINT_2_INT(*ierr)) { - *comm_graph = MPI_Comm_c2f(c_comm_graph); + *comm_graph = PMPI_Comm_c2f(c_comm_graph); } OMPI_ARRAY_FINT_2_INT_CLEANUP(sources); diff --git a/ompi/mpi/fortran/mpif-h/dist_graph_create_f.c b/ompi/mpi/fortran/mpif-h/dist_graph_create_f.c index aade58e7f41..2692f9b7d06 100644 --- a/ompi/mpi/fortran/mpif-h/dist_graph_create_f.c +++ b/ompi/mpi/fortran/mpif-h/dist_graph_create_f.c @@ -7,10 +7,12 @@ * Copyright (c) 2011-2013 Université Bordeaux 1 * Copyright (c) 2014 Los Alamos National Security, LLC. All rights * reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -19,7 +21,8 @@ #include "ompi/mpi/fortran/mpif-h/bindings.h" #include "ompi/mpi/fortran/base/constants.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILE_LAYER +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak PMPI_DIST_GRAPH_CREATE = ompi_dist_graph_create_f #pragma weak pmpi_dist_graph_create = ompi_dist_graph_create_f #pragma weak pmpi_dist_graph_create_ = ompi_dist_graph_create_f @@ -27,7 +30,7 @@ #pragma weak PMPI_Dist_graph_create_f = ompi_dist_graph_create_f #pragma weak PMPI_Dist_graph_create_f08 = ompi_dist_graph_create_f -#elif OMPI_PROFILE_LAYER +#else OMPI_GENERATE_F77_BINDINGS (PMPI_DIST_GRAPH_CREATE, pmpi_dist_graph_create, pmpi_dist_graph_create_, @@ -36,6 +39,7 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_DIST_GRAPH_CREATE, (MPI_Fint *comm_old, MPI_Fint *n, MPI_Fint *sources, MPI_Fint *degrees, MPI_Fint *destinations, MPI_Fint *weights, MPI_Fint *info, ompi_fortran_logical_t *reorder, MPI_Fint *comm_graph, MPI_Fint *ierr), (comm_old, n, sources, degrees, destinations, weights, info, reorder, comm_graph, ierr) ) #endif +#endif #if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_DIST_GRAPH_CREATE = ompi_dist_graph_create_f @@ -45,9 +49,8 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_DIST_GRAPH_CREATE, #pragma weak MPI_Dist_graph_create_f = ompi_dist_graph_create_f #pragma weak MPI_Dist_graph_create_f08 = ompi_dist_graph_create_f -#endif - -#if ! OPAL_HAVE_WEAK_SYMBOLS && ! OMPI_PROFILE_LAYER +#else +#if ! OMPI_BUILD_MPI_PROFILING OMPI_GENERATE_F77_BINDINGS (MPI_DIST_GRAPH_CREATE, mpi_dist_graph_create, mpi_dist_graph_create_, @@ -57,9 +60,11 @@ OMPI_GENERATE_F77_BINDINGS (MPI_DIST_GRAPH_CREATE, (comm_old, n, sources, degrees, destinations, weights, info, reorder, comm_graph, ierr) ) #endif -#if OMPI_PROFILE_LAYER && ! OPAL_HAVE_WEAK_SYMBOLS -#include "ompi/mpi/fortran/mpif-h/profile/defines.h" +#if OMPI_BUILD_MPI_PROFILING && ! OPAL_HAVE_WEAK_SYMBOLS +#define ompi_dist_graph_create_f pompi_dist_graph_create_f #endif +#endif + void ompi_dist_graph_create_f(MPI_Fint *comm_old, MPI_Fint *n, MPI_Fint *sources, MPI_Fint *degrees, MPI_Fint *destinations, MPI_Fint *weights, @@ -75,8 +80,8 @@ void ompi_dist_graph_create_f(MPI_Fint *comm_old, MPI_Fint *n, MPI_Fint *sources OMPI_ARRAY_NAME_DECL(degrees); OMPI_ARRAY_NAME_DECL(destinations); - c_comm_old = MPI_Comm_f2c(*comm_old); - c_info = MPI_Info_f2c(*info); + c_comm_old = PMPI_Comm_f2c(*comm_old); + c_info = PMPI_Info_f2c(*info); OMPI_ARRAY_FINT_2_INT(sources, *n); OMPI_ARRAY_FINT_2_INT(degrees, *n); for( i = 0; i < OMPI_FINT_2_INT(*n); i++ ) @@ -91,13 +96,13 @@ void ompi_dist_graph_create_f(MPI_Fint *comm_old, MPI_Fint *n, MPI_Fint *sources OMPI_ARRAY_FINT_2_INT(weights, count); c_weights = OMPI_ARRAY_NAME_CONVERT(weights); } - - *ierr = OMPI_INT_2_FINT(MPI_Dist_graph_create(c_comm_old, OMPI_FINT_2_INT(*n), OMPI_ARRAY_NAME_CONVERT(sources), - OMPI_ARRAY_NAME_CONVERT(degrees), OMPI_ARRAY_NAME_CONVERT(destinations), - c_weights, c_info, OMPI_LOGICAL_2_INT(*reorder), &c_comm_graph)); + + *ierr = OMPI_INT_2_FINT(PMPI_Dist_graph_create(c_comm_old, OMPI_FINT_2_INT(*n), OMPI_ARRAY_NAME_CONVERT(sources), + OMPI_ARRAY_NAME_CONVERT(degrees), OMPI_ARRAY_NAME_CONVERT(destinations), + c_weights, c_info, OMPI_LOGICAL_2_INT(*reorder), &c_comm_graph)); if (OMPI_SUCCESS == OMPI_FINT_2_INT(*ierr)) { - *comm_graph = MPI_Comm_c2f(c_comm_graph); + *comm_graph = PMPI_Comm_c2f(c_comm_graph); } OMPI_ARRAY_FINT_2_INT_CLEANUP(sources); diff --git a/ompi/mpi/fortran/mpif-h/dist_graph_neighbors_count_f.c b/ompi/mpi/fortran/mpif-h/dist_graph_neighbors_count_f.c index d4624f63c2b..4f8611e783a 100644 --- a/ompi/mpi/fortran/mpif-h/dist_graph_neighbors_count_f.c +++ b/ompi/mpi/fortran/mpif-h/dist_graph_neighbors_count_f.c @@ -4,10 +4,12 @@ * reserved. * Copyright (c) 2011-2013 Inria. All rights reserved. * Copyright (c) 2011-2013 Université Bordeaux 1 + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -16,7 +18,8 @@ #include "ompi/mpi/fortran/mpif-h/bindings.h" #include "ompi/mpi/fortran/base/constants.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILE_LAYER +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak PMPI_DIST_GRAPH_NEIGHBORS_COUNT = ompi_dist_graph_neighbors_count_f #pragma weak pmpi_dist_graph_neighbors_count = ompi_dist_graph_neighbors_count_f #pragma weak pmpi_dist_graph_neighbors_count_ = ompi_dist_graph_neighbors_count_f @@ -24,7 +27,7 @@ #pragma weak PMPI_Dist_graph_neighbors_count_f = ompi_dist_graph_neighbors_count_f #pragma weak PMPI_Dist_graph_neighbors_count_f08 = ompi_dist_graph_neighbors_count_f -#elif OMPI_PROFILE_LAYER +#else OMPI_GENERATE_F77_BINDINGS (PMPI_DIST_GRAPH_NEIGHBORS_COUNT, pmpi_dist_graph_neighbors_count, pmpi_dist_graph_neighbors_count_, @@ -33,6 +36,7 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_DIST_GRAPH_NEIGHBORS_COUNT, (MPI_Fint *comm, MPI_Fint *inneighbors, MPI_Fint *outneighbors, ompi_fortran_logical_t *weighted, MPI_Fint *ierr), (comm, inneighbors, outneighbors, weighted, ierr) ) #endif +#endif #if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_DIST_GRAPH_NEIGHBORS_COUNT = ompi_dist_graph_neighbors_count_f @@ -42,9 +46,8 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_DIST_GRAPH_NEIGHBORS_COUNT, #pragma weak MPI_Dist_graph_neighbors_count_f = ompi_dist_graph_neighbors_count_f #pragma weak MPI_Dist_graph_neighbors_count_f08 = ompi_dist_graph_neighbors_count_f -#endif - -#if ! OPAL_HAVE_WEAK_SYMBOLS && ! OMPI_PROFILE_LAYER +#else +#if ! OMPI_BUILD_MPI_PROFILING OMPI_GENERATE_F77_BINDINGS (MPI_DIST_GRAPH_NEIGHBORS_COUNT, mpi_dist_graph_neighbors_count, mpi_dist_graph_neighbors_count_, @@ -52,13 +55,12 @@ OMPI_GENERATE_F77_BINDINGS (MPI_DIST_GRAPH_NEIGHBORS_COUNT, ompi_dist_graph_neighbors_count_f, (MPI_Fint *comm, MPI_Fint *inneighbors, MPI_Fint *outneighbors, ompi_fortran_logical_t *weighted, MPI_Fint *ierr), (comm, inneighbors, outneighbors, weighted, ierr) ) +#else +#define ompi_dist_graph_neighbors_count_f pompi_dist_graph_neighbors_count_f #endif - - -#if OMPI_PROFILE_LAYER && ! OPAL_HAVE_WEAK_SYMBOLS -#include "ompi/mpi/fortran/mpif-h/profile/defines.h" #endif + void ompi_dist_graph_neighbors_count_f(MPI_Fint *comm, MPI_Fint *inneighbors, MPI_Fint *outneighbors, ompi_fortran_logical_t *weighted, MPI_Fint *ierr) @@ -68,12 +70,12 @@ void ompi_dist_graph_neighbors_count_f(MPI_Fint *comm, MPI_Fint *inneighbors, OMPI_SINGLE_NAME_DECL(outneighbors); OMPI_LOGICAL_NAME_DECL(weighted); - c_comm = MPI_Comm_f2c(*comm); + c_comm = PMPI_Comm_f2c(*comm); - *ierr = OMPI_INT_2_FINT(MPI_Dist_graph_neighbors_count(c_comm, - OMPI_SINGLE_NAME_CONVERT(inneighbors), - OMPI_SINGLE_NAME_CONVERT(outneighbors), - OMPI_LOGICAL_SINGLE_NAME_CONVERT(weighted))); + *ierr = OMPI_INT_2_FINT(PMPI_Dist_graph_neighbors_count(c_comm, + OMPI_SINGLE_NAME_CONVERT(inneighbors), + OMPI_SINGLE_NAME_CONVERT(outneighbors), + OMPI_LOGICAL_SINGLE_NAME_CONVERT(weighted))); OMPI_SINGLE_INT_2_LOGICAL(weighted); if (OMPI_SUCCESS == OMPI_FINT_2_INT(*ierr)) { OMPI_SINGLE_INT_2_FINT(inneighbors); diff --git a/ompi/mpi/fortran/mpif-h/dist_graph_neighbors_f.c b/ompi/mpi/fortran/mpif-h/dist_graph_neighbors_f.c index a54a641652e..5309b322c35 100644 --- a/ompi/mpi/fortran/mpif-h/dist_graph_neighbors_f.c +++ b/ompi/mpi/fortran/mpif-h/dist_graph_neighbors_f.c @@ -4,10 +4,12 @@ * reserved. * Copyright (c) 2011-2013 Inria. All rights reserved. * Copyright (c) 2011-2013 Université Bordeaux 1 + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -16,7 +18,8 @@ #include "ompi/mpi/fortran/mpif-h/bindings.h" #include "ompi/mpi/fortran/base/constants.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILE_LAYER +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak PMPI_DIST_GRAPH_NEIGHBORS = ompi_dist_graph_neighbors_f #pragma weak pmpi_dist_graph_neighbors = ompi_dist_graph_neighbors_f #pragma weak pmpi_dist_graph_neighbors_ = ompi_dist_graph_neighbors_f @@ -24,7 +27,7 @@ #pragma weak PMPI_Dist_graph_neighbors_f = ompi_dist_graph_neighbors_f #pragma weak PMPI_Dist_graph_neighbors_f08 = ompi_dist_graph_neighbors_f -#elif OMPI_PROFILE_LAYER +#else OMPI_GENERATE_F77_BINDINGS (PMPI_DIST_GRAPH_NEIGHBORS, pmpi_dist_graph_neighbors, pmpi_dist_graph_neighbors_, @@ -33,6 +36,7 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_DIST_GRAPH_NEIGHBORS, (MPI_Fint* comm, MPI_Fint* maxindegree, MPI_Fint* sources, MPI_Fint* sourceweights, MPI_Fint* maxoutdegree, MPI_Fint* destinations, MPI_Fint* destweights, MPI_Fint *ierr), (comm, maxindegree, sources, sourceweights, maxoutdegree, destinations, destweights, ierr) ) #endif +#endif #if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_DIST_GRAPH_NEIGHBORS = ompi_dist_graph_neighbors_f @@ -42,9 +46,8 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_DIST_GRAPH_NEIGHBORS, #pragma weak MPI_Dist_graph_neighbors_f = ompi_dist_graph_neighbors_f #pragma weak MPI_Dist_graph_neighbors_f08 = ompi_dist_graph_neighbors_f -#endif - -#if ! OPAL_HAVE_WEAK_SYMBOLS && ! OMPI_PROFILE_LAYER +#else +#if ! OMPI_BUILD_MPI_PROFILING OMPI_GENERATE_F77_BINDINGS (MPI_DIST_GRAPH_NEIGHBORS, mpi_dist_graph_neighbors, mpi_dist_graph_neighbors_, @@ -54,10 +57,12 @@ OMPI_GENERATE_F77_BINDINGS (MPI_DIST_GRAPH_NEIGHBORS, (comm, maxindegree, sources, sourceweights, maxoutdegree, destinations, destweights, ierr) ) #endif -#if OMPI_PROFILE_LAYER && ! OPAL_HAVE_WEAK_SYMBOLS -#include "ompi/mpi/fortran/mpif-h/profile/defines.h" +#if OMPI_BUILD_MPI_PROFILING && ! OPAL_HAVE_WEAK_SYMBOLS +#define ompi_dist_graph_neighbors_f pompi_dist_graph_neighbors_f +#endif #endif + void ompi_dist_graph_neighbors_f(MPI_Fint* comm, MPI_Fint* maxindegree, MPI_Fint* sources, MPI_Fint* sourceweights, MPI_Fint* maxoutdegree, MPI_Fint* destinations, @@ -70,7 +75,7 @@ void ompi_dist_graph_neighbors_f(MPI_Fint* comm, MPI_Fint* maxindegree, OMPI_ARRAY_NAME_DECL(destinations); OMPI_ARRAY_NAME_DECL(destweights); - c_comm = MPI_Comm_f2c(*comm); + c_comm = PMPI_Comm_f2c(*comm); OMPI_ARRAY_FINT_2_INT_ALLOC(sources, *maxindegree); if( !OMPI_IS_FORTRAN_UNWEIGHTED(sourceweights) ) { @@ -81,11 +86,11 @@ void ompi_dist_graph_neighbors_f(MPI_Fint* comm, MPI_Fint* maxindegree, OMPI_ARRAY_FINT_2_INT_ALLOC(destweights, *maxoutdegree); } - *ierr = OMPI_INT_2_FINT(MPI_Dist_graph_neighbors(c_comm, OMPI_FINT_2_INT(*maxindegree), - OMPI_ARRAY_NAME_CONVERT(sources), - OMPI_IS_FORTRAN_UNWEIGHTED(sourceweights) ? MPI_UNWEIGHTED : OMPI_ARRAY_NAME_CONVERT(sourceweights), - OMPI_FINT_2_INT(*maxoutdegree), OMPI_ARRAY_NAME_CONVERT(destinations), - OMPI_IS_FORTRAN_UNWEIGHTED(destweights) ? MPI_UNWEIGHTED : OMPI_ARRAY_NAME_CONVERT(destweights))); + *ierr = OMPI_INT_2_FINT(PMPI_Dist_graph_neighbors(c_comm, OMPI_FINT_2_INT(*maxindegree), + OMPI_ARRAY_NAME_CONVERT(sources), + OMPI_IS_FORTRAN_UNWEIGHTED(sourceweights) ? MPI_UNWEIGHTED : OMPI_ARRAY_NAME_CONVERT(sourceweights), + OMPI_FINT_2_INT(*maxoutdegree), OMPI_ARRAY_NAME_CONVERT(destinations), + OMPI_IS_FORTRAN_UNWEIGHTED(destweights) ? MPI_UNWEIGHTED : OMPI_ARRAY_NAME_CONVERT(destweights))); if (OMPI_SUCCESS == OMPI_FINT_2_INT(*ierr)) { OMPI_ARRAY_INT_2_FINT(sources, *maxindegree); if( !OMPI_IS_FORTRAN_UNWEIGHTED(sourceweights) ) { diff --git a/ompi/mpi/fortran/mpif-h/errhandler_create_f.c b/ompi/mpi/fortran/mpif-h/errhandler_create_f.c index 989b5621ce9..9805f652806 100644 --- a/ompi/mpi/fortran/mpif-h/errhandler_create_f.c +++ b/ompi/mpi/fortran/mpif-h/errhandler_create_f.c @@ -5,15 +5,17 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2008-2012 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -22,7 +24,8 @@ #include "ompi/mpi/fortran/mpif-h/bindings.h" #include "ompi/errhandler/errhandler.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILE_LAYER +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak PMPI_ERRHANDLER_CREATE = ompi_errhandler_create_f #pragma weak pmpi_errhandler_create = ompi_errhandler_create_f #pragma weak pmpi_errhandler_create_ = ompi_errhandler_create_f @@ -30,7 +33,7 @@ #pragma weak PMPI_Errhandler_create_f = ompi_errhandler_create_f #pragma weak PMPI_Errhandler_create_f08 = ompi_errhandler_create_f -#elif OMPI_PROFILE_LAYER +#else OMPI_GENERATE_F77_BINDINGS (PMPI_ERRHANDLER_CREATE, pmpi_errhandler_create, pmpi_errhandler_create_, @@ -39,6 +42,7 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_ERRHANDLER_CREATE, (ompi_errhandler_fortran_handler_fn_t* function, MPI_Fint *errhandler, MPI_Fint *ierr), (function, errhandler, ierr) ) #endif +#endif #if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_ERRHANDLER_CREATE = ompi_errhandler_create_f @@ -48,9 +52,8 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_ERRHANDLER_CREATE, #pragma weak MPI_Errhandler_create_f = ompi_errhandler_create_f #pragma weak MPI_Errhandler_create_f08 = ompi_errhandler_create_f -#endif - -#if ! OPAL_HAVE_WEAK_SYMBOLS && ! OMPI_PROFILE_LAYER +#else +#if ! OMPI_BUILD_MPI_PROFILING OMPI_GENERATE_F77_BINDINGS (MPI_ERRHANDLER_CREATE, mpi_errhandler_create, mpi_errhandler_create_, @@ -58,14 +61,12 @@ OMPI_GENERATE_F77_BINDINGS (MPI_ERRHANDLER_CREATE, ompi_errhandler_create_f, (ompi_errhandler_fortran_handler_fn_t* function, MPI_Fint *errhandler, MPI_Fint *ierr), (function, errhandler, ierr) ) +#else +#define ompi_errhandler_create_f pompi_errhandler_create_f #endif - - -#if OMPI_PROFILE_LAYER && ! OPAL_HAVE_WEAK_SYMBOLS -#include "ompi/mpi/fortran/mpif-h/profile/defines.h" #endif -void ompi_errhandler_create_f(ompi_errhandler_fortran_handler_fn_t* function, +void ompi_errhandler_create_f(ompi_errhandler_fortran_handler_fn_t* function, MPI_Fint *errhandler, MPI_Fint *ierr) { ompi_comm_create_errhandler_f(function, errhandler, ierr); diff --git a/ompi/mpi/fortran/mpif-h/errhandler_free_f.c b/ompi/mpi/fortran/mpif-h/errhandler_free_f.c index d0a2661b317..cf377540a9d 100644 --- a/ompi/mpi/fortran/mpif-h/errhandler_free_f.c +++ b/ompi/mpi/fortran/mpif-h/errhandler_free_f.c @@ -5,15 +5,17 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2011-2012 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -21,7 +23,8 @@ #include "ompi/mpi/fortran/mpif-h/bindings.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILE_LAYER +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak PMPI_ERRHANDLER_FREE = ompi_errhandler_free_f #pragma weak pmpi_errhandler_free = ompi_errhandler_free_f #pragma weak pmpi_errhandler_free_ = ompi_errhandler_free_f @@ -29,7 +32,7 @@ #pragma weak PMPI_Errhandler_free_f = ompi_errhandler_free_f #pragma weak PMPI_Errhandler_free_f08 = ompi_errhandler_free_f -#elif OMPI_PROFILE_LAYER +#else OMPI_GENERATE_F77_BINDINGS (PMPI_ERRHANDLER_FREE, pmpi_errhandler_free, pmpi_errhandler_free_, @@ -38,6 +41,7 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_ERRHANDLER_FREE, (MPI_Fint *errhandler, MPI_Fint *ierr), (errhandler, ierr) ) #endif +#endif #if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_ERRHANDLER_FREE = ompi_errhandler_free_f @@ -47,9 +51,8 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_ERRHANDLER_FREE, #pragma weak MPI_Errhandler_free_f = ompi_errhandler_free_f #pragma weak MPI_Errhandler_free_f08 = ompi_errhandler_free_f -#endif - -#if ! OPAL_HAVE_WEAK_SYMBOLS && ! OMPI_PROFILE_LAYER +#else +#if ! OMPI_BUILD_MPI_PROFILING OMPI_GENERATE_F77_BINDINGS (MPI_ERRHANDLER_FREE, mpi_errhandler_free, mpi_errhandler_free_, @@ -57,24 +60,23 @@ OMPI_GENERATE_F77_BINDINGS (MPI_ERRHANDLER_FREE, ompi_errhandler_free_f, (MPI_Fint *errhandler, MPI_Fint *ierr), (errhandler, ierr) ) +#else +#define ompi_errhandler_free_f pompi_errhandler_free_f #endif - - -#if OMPI_PROFILE_LAYER && ! OPAL_HAVE_WEAK_SYMBOLS -#include "ompi/mpi/fortran/mpif-h/profile/defines.h" #endif + void ompi_errhandler_free_f(MPI_Fint *errhandler, MPI_Fint *ierr) { int c_ierr; MPI_Errhandler c_errhandler; - c_errhandler = MPI_Errhandler_f2c(*errhandler); + c_errhandler = PMPI_Errhandler_f2c(*errhandler); - c_ierr = MPI_Errhandler_free(&c_errhandler); + c_ierr = PMPI_Errhandler_free(&c_errhandler); if (NULL != ierr) *ierr = OMPI_INT_2_FINT(c_ierr); if (MPI_SUCCESS == c_ierr) { - *errhandler = MPI_Errhandler_c2f(c_errhandler); + *errhandler = PMPI_Errhandler_c2f(c_errhandler); } } diff --git a/ompi/mpi/fortran/mpif-h/errhandler_get_f.c b/ompi/mpi/fortran/mpif-h/errhandler_get_f.c index 999026e7d56..5ca661aa10d 100644 --- a/ompi/mpi/fortran/mpif-h/errhandler_get_f.c +++ b/ompi/mpi/fortran/mpif-h/errhandler_get_f.c @@ -5,15 +5,17 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2011-2012 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -21,7 +23,8 @@ #include "ompi/mpi/fortran/mpif-h/bindings.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILE_LAYER +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak PMPI_ERRHANDLER_GET = ompi_errhandler_get_f #pragma weak pmpi_errhandler_get = ompi_errhandler_get_f #pragma weak pmpi_errhandler_get_ = ompi_errhandler_get_f @@ -29,7 +32,7 @@ #pragma weak PMPI_Errhandler_get_f = ompi_errhandler_get_f #pragma weak PMPI_Errhandler_get_f08 = ompi_errhandler_get_f -#elif OMPI_PROFILE_LAYER +#else OMPI_GENERATE_F77_BINDINGS (PMPI_ERRHANDLER_GET, pmpi_errhandler_get, pmpi_errhandler_get_, @@ -38,6 +41,7 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_ERRHANDLER_GET, (MPI_Fint *comm, MPI_Fint *errhandler, MPI_Fint *ierr), (comm, errhandler, ierr) ) #endif +#endif #if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_ERRHANDLER_GET = ompi_errhandler_get_f @@ -47,9 +51,8 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_ERRHANDLER_GET, #pragma weak MPI_Errhandler_get_f = ompi_errhandler_get_f #pragma weak MPI_Errhandler_get_f08 = ompi_errhandler_get_f -#endif - -#if ! OPAL_HAVE_WEAK_SYMBOLS && ! OMPI_PROFILE_LAYER +#else +#if ! OMPI_BUILD_MPI_PROFILING OMPI_GENERATE_F77_BINDINGS (MPI_ERRHANDLER_GET, mpi_errhandler_get, mpi_errhandler_get_, @@ -57,26 +60,25 @@ OMPI_GENERATE_F77_BINDINGS (MPI_ERRHANDLER_GET, ompi_errhandler_get_f, (MPI_Fint *comm, MPI_Fint *errhandler, MPI_Fint *ierr), (comm, errhandler, ierr) ) +#else +#define ompi_errhandler_get_f pompi_errhandler_get_f #endif - - -#if OMPI_PROFILE_LAYER && ! OPAL_HAVE_WEAK_SYMBOLS -#include "ompi/mpi/fortran/mpif-h/profile/defines.h" #endif + void ompi_errhandler_get_f(MPI_Fint *comm, MPI_Fint *errhandler, MPI_Fint *ierr) { int c_ierr; MPI_Comm c_comm; MPI_Errhandler c_errhandler; - c_comm = MPI_Comm_f2c(*comm); + c_comm = PMPI_Comm_f2c(*comm); - c_ierr = MPI_Errhandler_get(c_comm, &c_errhandler); + c_ierr = PMPI_Errhandler_get(c_comm, &c_errhandler); if (NULL != ierr) *ierr = OMPI_INT_2_FINT(c_ierr); if (MPI_SUCCESS == c_ierr) { - *errhandler = MPI_Errhandler_c2f(c_errhandler); + *errhandler = PMPI_Errhandler_c2f(c_errhandler); } } diff --git a/ompi/mpi/fortran/mpif-h/errhandler_set_f.c b/ompi/mpi/fortran/mpif-h/errhandler_set_f.c index 2bb529133de..d6ea8a94b32 100644 --- a/ompi/mpi/fortran/mpif-h/errhandler_set_f.c +++ b/ompi/mpi/fortran/mpif-h/errhandler_set_f.c @@ -5,15 +5,17 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2008-2012 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -22,7 +24,8 @@ #include "ompi/mpi/fortran/mpif-h/bindings.h" #include "ompi/errhandler/errhandler.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILE_LAYER +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak PMPI_ERRHANDLER_SET = ompi_errhandler_set_f #pragma weak pmpi_errhandler_set = ompi_errhandler_set_f #pragma weak pmpi_errhandler_set_ = ompi_errhandler_set_f @@ -30,7 +33,7 @@ #pragma weak PMPI_Errhandler_set_f = ompi_errhandler_set_f #pragma weak PMPI_Errhandler_set_f08 = ompi_errhandler_set_f -#elif OMPI_PROFILE_LAYER +#else OMPI_GENERATE_F77_BINDINGS (PMPI_ERRHANDLER_SET, pmpi_errhandler_set, pmpi_errhandler_set_, @@ -39,6 +42,7 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_ERRHANDLER_SET, (MPI_Fint *comm, MPI_Fint *errhandler, MPI_Fint *ierr), (comm, errhandler, ierr) ) #endif +#endif #if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_ERRHANDLER_SET = ompi_errhandler_set_f @@ -48,9 +52,8 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_ERRHANDLER_SET, #pragma weak MPI_Errhandler_set_f = ompi_errhandler_set_f #pragma weak MPI_Errhandler_set_f08 = ompi_errhandler_set_f -#endif - -#if ! OPAL_HAVE_WEAK_SYMBOLS && ! OMPI_PROFILE_LAYER +#else +#if ! OMPI_BUILD_MPI_PROFILING OMPI_GENERATE_F77_BINDINGS (MPI_ERRHANDLER_SET, mpi_errhandler_set, mpi_errhandler_set_, @@ -58,22 +61,21 @@ OMPI_GENERATE_F77_BINDINGS (MPI_ERRHANDLER_SET, ompi_errhandler_set_f, (MPI_Fint *comm, MPI_Fint *errhandler, MPI_Fint *ierr), (comm, errhandler, ierr) ) +#else +#define ompi_errhandler_set_f pompi_errhandler_set_f #endif - - -#if OMPI_PROFILE_LAYER && ! OPAL_HAVE_WEAK_SYMBOLS -#include "ompi/mpi/fortran/mpif-h/profile/defines.h" #endif + void ompi_errhandler_set_f(MPI_Fint *comm, MPI_Fint *errhandler, MPI_Fint *ierr) { int c_ierr; MPI_Comm c_comm; MPI_Errhandler c_errhandler; - c_comm = MPI_Comm_f2c(*comm); - c_errhandler = MPI_Errhandler_f2c(*errhandler); + c_comm = PMPI_Comm_f2c(*comm); + c_errhandler = PMPI_Errhandler_f2c(*errhandler); - c_ierr = MPI_Errhandler_set(c_comm, c_errhandler); + c_ierr = PMPI_Errhandler_set(c_comm, c_errhandler); if (NULL != ierr) *ierr = OMPI_INT_2_FINT(c_ierr); } diff --git a/ompi/mpi/fortran/mpif-h/error_class_f.c b/ompi/mpi/fortran/mpif-h/error_class_f.c index f761a2d563d..77d2c848d14 100644 --- a/ompi/mpi/fortran/mpif-h/error_class_f.c +++ b/ompi/mpi/fortran/mpif-h/error_class_f.c @@ -5,15 +5,17 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2011-2012 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -21,7 +23,8 @@ #include "ompi/mpi/fortran/mpif-h/bindings.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILE_LAYER +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak PMPI_ERROR_CLASS = ompi_error_class_f #pragma weak pmpi_error_class = ompi_error_class_f #pragma weak pmpi_error_class_ = ompi_error_class_f @@ -29,7 +32,7 @@ #pragma weak PMPI_Error_class_f = ompi_error_class_f #pragma weak PMPI_Error_class_f08 = ompi_error_class_f -#elif OMPI_PROFILE_LAYER +#else OMPI_GENERATE_F77_BINDINGS (PMPI_ERROR_CLASS, pmpi_error_class, pmpi_error_class_, @@ -38,6 +41,7 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_ERROR_CLASS, (MPI_Fint *errorcode, MPI_Fint *errorclass, MPI_Fint *ierr), (errorcode, errorclass, ierr) ) #endif +#endif #if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_ERROR_CLASS = ompi_error_class_f @@ -47,9 +51,8 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_ERROR_CLASS, #pragma weak MPI_Error_class_f = ompi_error_class_f #pragma weak MPI_Error_class_f08 = ompi_error_class_f -#endif - -#if ! OPAL_HAVE_WEAK_SYMBOLS && ! OMPI_PROFILE_LAYER +#else +#if ! OMPI_BUILD_MPI_PROFILING OMPI_GENERATE_F77_BINDINGS (MPI_ERROR_CLASS, mpi_error_class, mpi_error_class_, @@ -57,20 +60,19 @@ OMPI_GENERATE_F77_BINDINGS (MPI_ERROR_CLASS, ompi_error_class_f, (MPI_Fint *errorcode, MPI_Fint *errorclass, MPI_Fint *ierr), (errorcode, errorclass, ierr) ) +#else +#define ompi_error_class_f pompi_error_class_f #endif - - -#if OMPI_PROFILE_LAYER && ! OPAL_HAVE_WEAK_SYMBOLS -#include "ompi/mpi/fortran/mpif-h/profile/defines.h" #endif -void ompi_error_class_f(MPI_Fint *errorcode, MPI_Fint *errorclass, + +void ompi_error_class_f(MPI_Fint *errorcode, MPI_Fint *errorclass, MPI_Fint *ierr) { int c_ierr; OMPI_SINGLE_NAME_DECL(errorclass); - c_ierr = MPI_Error_class(OMPI_FINT_2_INT(*errorcode), + c_ierr = PMPI_Error_class(OMPI_FINT_2_INT(*errorcode), OMPI_SINGLE_NAME_CONVERT(errorclass)); if (NULL != ierr) *ierr = OMPI_INT_2_FINT(c_ierr); diff --git a/ompi/mpi/fortran/mpif-h/error_string_f.c b/ompi/mpi/fortran/mpif-h/error_string_f.c index 5da692636e6..2462a051f30 100644 --- a/ompi/mpi/fortran/mpif-h/error_string_f.c +++ b/ompi/mpi/fortran/mpif-h/error_string_f.c @@ -5,15 +5,17 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2006-2012 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -24,7 +26,8 @@ #include "ompi/constants.h" #include "ompi/communicator/communicator.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILE_LAYER +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak PMPI_ERROR_STRING = ompi_error_string_f #pragma weak pmpi_error_string = ompi_error_string_f #pragma weak pmpi_error_string_ = ompi_error_string_f @@ -32,7 +35,7 @@ #pragma weak PMPI_Error_string_f = ompi_error_string_f #pragma weak PMPI_Error_string_f08 = ompi_error_string_f -#elif OMPI_PROFILE_LAYER +#else OMPI_GENERATE_F77_BINDINGS (PMPI_ERROR_STRING, pmpi_error_string, pmpi_error_string_, @@ -41,6 +44,7 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_ERROR_STRING, (MPI_Fint *errorcode, char *string, MPI_Fint *resultlen, MPI_Fint *ierr, int string_len), (errorcode, string, resultlen, ierr, string_len) ) #endif +#endif #if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_ERROR_STRING = ompi_error_string_f @@ -50,9 +54,8 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_ERROR_STRING, #pragma weak MPI_Error_string_f = ompi_error_string_f #pragma weak MPI_Error_string_f08 = ompi_error_string_f -#endif - -#if ! OPAL_HAVE_WEAK_SYMBOLS && ! OMPI_PROFILE_LAYER +#else +#if ! OMPI_BUILD_MPI_PROFILING OMPI_GENERATE_F77_BINDINGS (MPI_ERROR_STRING, mpi_error_string, mpi_error_string_, @@ -60,13 +63,12 @@ OMPI_GENERATE_F77_BINDINGS (MPI_ERROR_STRING, ompi_error_string_f, (MPI_Fint *errorcode, char *string, MPI_Fint *resultlen, MPI_Fint *ierr, int string_len), (errorcode, string, resultlen, ierr, string_len) ) +#else +#define ompi_error_string_f pompi_error_string_f #endif - - -#if OMPI_PROFILE_LAYER && ! OPAL_HAVE_WEAK_SYMBOLS -#include "ompi/mpi/fortran/mpif-h/profile/defines.h" #endif + static const char FUNC_NAME[] = "MPI_ERROR_STRING"; /* Note that the string_len parameter is silently added by the Fortran @@ -81,7 +83,7 @@ void ompi_error_string_f(MPI_Fint *errorcode, char *string, char c_string[MPI_MAX_ERROR_STRING + 1]; OMPI_SINGLE_NAME_DECL(resultlen); - c_ierr = MPI_Error_string(OMPI_FINT_2_INT(*errorcode), + c_ierr = PMPI_Error_string(OMPI_FINT_2_INT(*errorcode), c_string, OMPI_SINGLE_NAME_CONVERT(resultlen) ); diff --git a/ompi/mpi/fortran/mpif-h/exscan_f.c b/ompi/mpi/fortran/mpif-h/exscan_f.c index f9427ac481c..9880301e2b3 100644 --- a/ompi/mpi/fortran/mpif-h/exscan_f.c +++ b/ompi/mpi/fortran/mpif-h/exscan_f.c @@ -5,15 +5,17 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2011-2012 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -22,7 +24,8 @@ #include "ompi/mpi/fortran/mpif-h/bindings.h" #include "ompi/mpi/fortran/base/constants.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILE_LAYER +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak PMPI_EXSCAN = ompi_exscan_f #pragma weak pmpi_exscan = ompi_exscan_f #pragma weak pmpi_exscan_ = ompi_exscan_f @@ -30,7 +33,7 @@ #pragma weak PMPI_Exscan_f = ompi_exscan_f #pragma weak PMPI_Exscan_f08 = ompi_exscan_f -#elif OMPI_PROFILE_LAYER +#else OMPI_GENERATE_F77_BINDINGS (PMPI_EXSCAN, pmpi_exscan, pmpi_exscan_, @@ -39,6 +42,7 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_EXSCAN, (char *sendbuf, char *recvbuf, MPI_Fint *count, MPI_Fint *datatype, MPI_Fint *op, MPI_Fint *comm, MPI_Fint *ierr), (sendbuf, recvbuf, count, datatype, op, comm, ierr) ) #endif +#endif #if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_EXSCAN = ompi_exscan_f @@ -48,9 +52,8 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_EXSCAN, #pragma weak MPI_Exscan_f = ompi_exscan_f #pragma weak MPI_Exscan_f08 = ompi_exscan_f -#endif - -#if ! OPAL_HAVE_WEAK_SYMBOLS && ! OMPI_PROFILE_LAYER +#else +#if ! OMPI_BUILD_MPI_PROFILING OMPI_GENERATE_F77_BINDINGS (MPI_EXSCAN, mpi_exscan, mpi_exscan_, @@ -58,13 +61,12 @@ OMPI_GENERATE_F77_BINDINGS (MPI_EXSCAN, ompi_exscan_f, (char *sendbuf, char *recvbuf, MPI_Fint *count, MPI_Fint *datatype, MPI_Fint *op, MPI_Fint *comm, MPI_Fint *ierr), (sendbuf, recvbuf, count, datatype, op, comm, ierr) ) +#else +#define ompi_exscan_f pompi_exscan_f #endif - - -#if OMPI_PROFILE_LAYER && ! OPAL_HAVE_WEAK_SYMBOLS -#include "ompi/mpi/fortran/mpif-h/profile/defines.h" #endif + void ompi_exscan_f(char *sendbuf, char *recvbuf, MPI_Fint *count, MPI_Fint *datatype, MPI_Fint *op, MPI_Fint *comm, MPI_Fint *ierr) @@ -73,16 +75,16 @@ void ompi_exscan_f(char *sendbuf, char *recvbuf, MPI_Fint *count, MPI_Comm c_comm; MPI_Datatype c_type; MPI_Op c_op; - - c_comm = MPI_Comm_f2c(*comm); - c_type = MPI_Type_f2c(*datatype); - c_op = MPI_Op_f2c(*op); - /* MPI_IN_PLACE is not supported */ + c_comm = PMPI_Comm_f2c(*comm); + c_type = PMPI_Type_f2c(*datatype); + c_op = PMPI_Op_f2c(*op); + + sendbuf = (char *) OMPI_F2C_IN_PLACE(sendbuf); sendbuf = (char *) OMPI_F2C_BOTTOM (sendbuf); recvbuf = (char *) OMPI_F2C_BOTTOM (recvbuf); - c_ierr = MPI_Exscan(sendbuf, recvbuf, + c_ierr = PMPI_Exscan(sendbuf, recvbuf, OMPI_FINT_2_INT(*count), c_type, c_op, c_comm); if (NULL != ierr) *ierr = OMPI_INT_2_FINT(c_ierr); diff --git a/ompi/mpi/fortran/mpif-h/f_sync_reg_f.c b/ompi/mpi/fortran/mpif-h/f_sync_reg_f.c index 3954962ccb3..4b58f023776 100644 --- a/ompi/mpi/fortran/mpif-h/f_sync_reg_f.c +++ b/ompi/mpi/fortran/mpif-h/f_sync_reg_f.c @@ -5,16 +5,18 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2011-2012 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2012 University of Oregon. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -23,7 +25,8 @@ #include "ompi/mpi/fortran/mpif-h/bindings.h" #include "ompi/mpi/fortran/base/constants.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILE_LAYER +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak PMPI_F_SYNC_REG = ompi_f_sync_reg_f #pragma weak pmpi_f_sync_reg = ompi_f_sync_reg_f #pragma weak pmpi_f_sync_reg_ = ompi_f_sync_reg_f @@ -31,7 +34,7 @@ #pragma weak PMPI_F_sync_reg_f = ompi_f_sync_reg_f #pragma weak PMPI_F_sync_reg_f08 = ompi_f_sync_reg_f -#elif OMPI_PROFILE_LAYER +#else OMPI_GENERATE_F77_BINDINGS (PMPI_F_SYNC_REG, pmpi_f_sync_reg, pmpi_f_sync_reg_, @@ -40,6 +43,7 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_F_SYNC_REG, (char *buf), (buf) ) #endif +#endif #if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_F_SYNC_REG = ompi_f_sync_reg_f @@ -49,9 +53,8 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_F_SYNC_REG, #pragma weak MPI_F_sync_reg_f = ompi_f_sync_reg_f #pragma weak MPI_F_sync_reg_f08 = ompi_f_sync_reg_f -#endif - -#if ! OPAL_HAVE_WEAK_SYMBOLS && ! OMPI_PROFILE_LAYER +#else +#if ! OMPI_BUILD_MPI_PROFILING OMPI_GENERATE_F77_BINDINGS (MPI_F_SYNC_REG, mpi_f_sync_reg, mpi_f_sync_reg_, @@ -59,11 +62,9 @@ OMPI_GENERATE_F77_BINDINGS (MPI_F_SYNC_REG, ompi_f_sync_reg_f, (char *buf), (buf) ) +#else +#define ompi_f_sync_reg_f pompi_f_sync_reg_f #endif - - -#if OMPI_PROFILE_LAYER && ! OPAL_HAVE_WEAK_SYMBOLS -#include "ompi/mpi/fortran/mpif-h/profile/defines.h" #endif void ompi_f_sync_reg_f(char *buf) diff --git a/ompi/mpi/fortran/mpif-h/fetch_and_op_f.c b/ompi/mpi/fortran/mpif-h/fetch_and_op_f.c index e1f0657ddee..9deac958f7a 100644 --- a/ompi/mpi/fortran/mpif-h/fetch_and_op_f.c +++ b/ompi/mpi/fortran/mpif-h/fetch_and_op_f.c @@ -13,6 +13,8 @@ * Copyright (c) 2011-2012 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2014 Los Alamos National Security, LLC. All rights * reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -26,7 +28,8 @@ #include "ompi/mpi/fortran/base/constants.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILE_LAYER +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak PMPI_FETCH_AND_OP = ompi_fetch_and_op_f #pragma weak pmpi_fetch_and_op = ompi_fetch_and_op_f #pragma weak pmpi_fetch_and_op_ = ompi_fetch_and_op_f @@ -34,7 +37,7 @@ #pragma weak PMPI_Fetch_and_op_f = ompi_fetch_and_op_f #pragma weak PMPI_Fetch_and_op_f08 = ompi_fetch_and_op_f -#elif OMPI_PROFILE_LAYER +#else OMPI_GENERATE_F77_BINDINGS (PMPI_FETCH_AND_OP, pmpi_fetch_and_op, pmpi_fetch_and_op_, @@ -43,6 +46,7 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_FETCH_AND_OP, (char *origin_addr, char *result_addr, MPI_Fint *datatype, MPI_Fint *target_rank, MPI_Aint *target_disp, MPI_Fint *op, MPI_Fint *win, MPI_Fint *ierr), (origin_addr, result_addr, datatype, target_rank, target_disp, op, win, ierr) ) #endif +#endif #if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_FETCH_AND_OP = ompi_fetch_and_op_f @@ -52,9 +56,8 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_FETCH_AND_OP, #pragma weak MPI_Fetch_and_op_f = ompi_fetch_and_op_f #pragma weak MPI_Fetch_and_op_f08 = ompi_fetch_and_op_f -#endif - -#if ! OPAL_HAVE_WEAK_SYMBOLS && ! OMPI_PROFILE_LAYER +#else +#if ! OMPI_BUILD_MPI_PROFILING OMPI_GENERATE_F77_BINDINGS (MPI_FETCH_AND_OP, mpi_fetch_and_op, mpi_fetch_and_op_, @@ -62,23 +65,22 @@ OMPI_GENERATE_F77_BINDINGS (MPI_FETCH_AND_OP, ompi_fetch_and_op_f, (char *origin_addr, char *result_addr, MPI_Fint *datatype, MPI_Fint *target_rank, MPI_Aint *target_disp, MPI_Fint *op, MPI_Fint *win, MPI_Fint *ierr), (origin_addr, result_addr, datatype, target_rank, target_disp, op, win, ierr) ) +#else +#define ompi_fetch_and_op_f pompi_fetch_and_op_f #endif - - -#if OMPI_PROFILE_LAYER && ! OPAL_HAVE_WEAK_SYMBOLS -#include "ompi/mpi/fortran/mpif-h/profile/defines.h" #endif + void ompi_fetch_and_op_f(char *origin_addr, char *result_addr, MPI_Fint *datatype, MPI_Fint *target_rank, MPI_Aint *target_disp, MPI_Fint *op, MPI_Fint *win, MPI_Fint *ierr) { int c_ierr; - MPI_Datatype c_datatype = MPI_Type_f2c(*datatype); - MPI_Win c_win = MPI_Win_f2c(*win); - MPI_Op c_op = MPI_Op_f2c(*op); + MPI_Datatype c_datatype = PMPI_Type_f2c(*datatype); + MPI_Win c_win = PMPI_Win_f2c(*win); + MPI_Op c_op = PMPI_Op_f2c(*op); - c_ierr = MPI_Fetch_and_op(OMPI_F2C_BOTTOM(origin_addr), + c_ierr = PMPI_Fetch_and_op(OMPI_F2C_BOTTOM(origin_addr), OMPI_F2C_BOTTOM(result_addr), c_datatype, OMPI_FINT_2_INT(*target_rank), diff --git a/ompi/mpi/fortran/mpif-h/file_call_errhandler_f.c b/ompi/mpi/fortran/mpif-h/file_call_errhandler_f.c index dd22f004057..d5de8aa2476 100644 --- a/ompi/mpi/fortran/mpif-h/file_call_errhandler_f.c +++ b/ompi/mpi/fortran/mpif-h/file_call_errhandler_f.c @@ -5,15 +5,17 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2011-2012 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -21,7 +23,8 @@ #include "ompi/mpi/fortran/mpif-h/bindings.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILE_LAYER +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak PMPI_FILE_CALL_ERRHANDLER = ompi_file_call_errhandler_f #pragma weak pmpi_file_call_errhandler = ompi_file_call_errhandler_f #pragma weak pmpi_file_call_errhandler_ = ompi_file_call_errhandler_f @@ -29,7 +32,7 @@ #pragma weak PMPI_File_call_errhandler_f = ompi_file_call_errhandler_f #pragma weak PMPI_File_call_errhandler_f08 = ompi_file_call_errhandler_f -#elif OMPI_PROFILE_LAYER +#else OMPI_GENERATE_F77_BINDINGS (PMPI_FILE_CALL_ERRHANDLER, pmpi_file_call_errhandler, pmpi_file_call_errhandler_, @@ -38,6 +41,7 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_FILE_CALL_ERRHANDLER, (MPI_Fint *fh, MPI_Fint *errorcode, MPI_Fint *ierr), (fh, errorcode, ierr) ) #endif +#endif #if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_FILE_CALL_ERRHANDLER = ompi_file_call_errhandler_f @@ -47,9 +51,8 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_FILE_CALL_ERRHANDLER, #pragma weak MPI_File_call_errhandler_f = ompi_file_call_errhandler_f #pragma weak MPI_File_call_errhandler_f08 = ompi_file_call_errhandler_f -#endif - -#if ! OPAL_HAVE_WEAK_SYMBOLS && ! OMPI_PROFILE_LAYER +#else +#if ! OMPI_BUILD_MPI_PROFILING OMPI_GENERATE_F77_BINDINGS (MPI_FILE_CALL_ERRHANDLER, mpi_file_call_errhandler, mpi_file_call_errhandler_, @@ -57,21 +60,20 @@ OMPI_GENERATE_F77_BINDINGS (MPI_FILE_CALL_ERRHANDLER, ompi_file_call_errhandler_f, (MPI_Fint *fh, MPI_Fint *errorcode, MPI_Fint *ierr), (fh, errorcode, ierr) ) +#else +#define ompi_file_call_errhandler_f pompi_file_call_errhandler_f #endif - - -#if OMPI_PROFILE_LAYER && ! OPAL_HAVE_WEAK_SYMBOLS -#include "ompi/mpi/fortran/mpif-h/profile/defines.h" #endif + void ompi_file_call_errhandler_f(MPI_Fint *fh, MPI_Fint *errorcode, MPI_Fint *ierr) { int c_ierr; MPI_File c_fh; - c_fh = MPI_File_f2c(*fh); + c_fh = PMPI_File_f2c(*fh); - c_ierr = MPI_File_call_errhandler(c_fh, OMPI_FINT_2_INT(*errorcode)); + c_ierr = PMPI_File_call_errhandler(c_fh, OMPI_FINT_2_INT(*errorcode)); if (NULL != ierr) *ierr = OMPI_INT_2_FINT(c_ierr); } diff --git a/ompi/mpi/fortran/mpif-h/file_close_f.c b/ompi/mpi/fortran/mpif-h/file_close_f.c index b4ed37a892f..71b343b22a0 100644 --- a/ompi/mpi/fortran/mpif-h/file_close_f.c +++ b/ompi/mpi/fortran/mpif-h/file_close_f.c @@ -5,15 +5,17 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2011-2012 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -21,7 +23,8 @@ #include "ompi/mpi/fortran/mpif-h/bindings.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILE_LAYER +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak PMPI_FILE_CLOSE = ompi_file_close_f #pragma weak pmpi_file_close = ompi_file_close_f #pragma weak pmpi_file_close_ = ompi_file_close_f @@ -29,7 +32,7 @@ #pragma weak PMPI_File_close_f = ompi_file_close_f #pragma weak PMPI_File_close_f08 = ompi_file_close_f -#elif OMPI_PROFILE_LAYER +#else OMPI_GENERATE_F77_BINDINGS (PMPI_FILE_CLOSE, pmpi_file_close, pmpi_file_close_, @@ -38,6 +41,7 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_FILE_CLOSE, (MPI_Fint *fh, MPI_Fint *ierr), (fh, ierr) ) #endif +#endif #if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_FILE_CLOSE = ompi_file_close_f @@ -47,9 +51,8 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_FILE_CLOSE, #pragma weak MPI_File_close_f = ompi_file_close_f #pragma weak MPI_File_close_f08 = ompi_file_close_f -#endif - -#if ! OPAL_HAVE_WEAK_SYMBOLS && ! OMPI_PROFILE_LAYER +#else +#if ! OMPI_BUILD_MPI_PROFILING OMPI_GENERATE_F77_BINDINGS (MPI_FILE_CLOSE, mpi_file_close, mpi_file_close_, @@ -57,24 +60,23 @@ OMPI_GENERATE_F77_BINDINGS (MPI_FILE_CLOSE, ompi_file_close_f, (MPI_Fint *fh, MPI_Fint *ierr), (fh, ierr) ) +#else +#define ompi_file_close_f pompi_file_close_f #endif - - -#if OMPI_PROFILE_LAYER && ! OPAL_HAVE_WEAK_SYMBOLS -#include "ompi/mpi/fortran/mpif-h/profile/defines.h" #endif + void ompi_file_close_f(MPI_Fint *fh, MPI_Fint *ierr) { int c_ierr; MPI_File c_fh; - c_fh = MPI_File_f2c(*fh); + c_fh = PMPI_File_f2c(*fh); - c_ierr = MPI_File_close(&c_fh); + c_ierr = PMPI_File_close(&c_fh); if (NULL != ierr) *ierr = OMPI_INT_2_FINT(c_ierr); if (MPI_SUCCESS == c_ierr) { - *fh = MPI_File_c2f(c_fh); + *fh = PMPI_File_c2f(c_fh); } } diff --git a/ompi/mpi/fortran/mpif-h/file_create_errhandler_f.c b/ompi/mpi/fortran/mpif-h/file_create_errhandler_f.c index 94342b5ce52..29ac3ffe365 100644 --- a/ompi/mpi/fortran/mpif-h/file_create_errhandler_f.c +++ b/ompi/mpi/fortran/mpif-h/file_create_errhandler_f.c @@ -5,15 +5,17 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2008-2012 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -23,7 +25,8 @@ #include "ompi/errhandler/errhandler.h" #include "ompi/file/file.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILE_LAYER +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak PMPI_FILE_CREATE_ERRHANDLER = ompi_file_create_errhandler_f #pragma weak pmpi_file_create_errhandler = ompi_file_create_errhandler_f #pragma weak pmpi_file_create_errhandler_ = ompi_file_create_errhandler_f @@ -31,7 +34,7 @@ #pragma weak PMPI_File_create_errhandler_f = ompi_file_create_errhandler_f #pragma weak PMPI_File_create_errhandler_f08 = ompi_file_create_errhandler_f -#elif OMPI_PROFILE_LAYER +#else OMPI_GENERATE_F77_BINDINGS (PMPI_FILE_CREATE_ERRHANDLER, pmpi_file_create_errhandler, pmpi_file_create_errhandler_, @@ -40,6 +43,7 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_FILE_CREATE_ERRHANDLER, (ompi_errhandler_fortran_handler_fn_t* function, MPI_Fint *errhandler, MPI_Fint *ierr), (function, errhandler, ierr) ) #endif +#endif #if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_FILE_CREATE_ERRHANDLER = ompi_file_create_errhandler_f @@ -49,9 +53,8 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_FILE_CREATE_ERRHANDLER, #pragma weak MPI_File_create_errhandler_f = ompi_file_create_errhandler_f #pragma weak MPI_File_create_errhandler_f08 = ompi_file_create_errhandler_f -#endif - -#if ! OPAL_HAVE_WEAK_SYMBOLS && ! OMPI_PROFILE_LAYER +#else +#if ! OMPI_BUILD_MPI_PROFILING OMPI_GENERATE_F77_BINDINGS (MPI_FILE_CREATE_ERRHANDLER, mpi_file_create_errhandler, mpi_file_create_errhandler_, @@ -59,26 +62,24 @@ OMPI_GENERATE_F77_BINDINGS (MPI_FILE_CREATE_ERRHANDLER, ompi_file_create_errhandler_f, (ompi_errhandler_fortran_handler_fn_t* function, MPI_Fint *errhandler, MPI_Fint *ierr), (function, errhandler, ierr) ) +#else +#define ompi_file_create_errhandler_f pompi_file_create_errhandler_f #endif - - -#if OMPI_PROFILE_LAYER && ! OPAL_HAVE_WEAK_SYMBOLS -#include "ompi/mpi/fortran/mpif-h/profile/defines.h" #endif static const char FUNC_NAME[] = "MPI_FILE_CREATE_ERRHANDLER"; -void ompi_file_create_errhandler_f(ompi_errhandler_fortran_handler_fn_t* function, +void ompi_file_create_errhandler_f(ompi_errhandler_fortran_handler_fn_t* function, MPI_Fint *errhandler, MPI_Fint *ierr) { int c_ierr; - MPI_Errhandler c_errhandler = + MPI_Errhandler c_errhandler = ompi_errhandler_create(OMPI_ERRHANDLER_TYPE_FILE, (ompi_errhandler_generic_handler_fn_t*) function, OMPI_ERRHANDLER_LANG_FORTRAN); if (MPI_ERRHANDLER_NULL != c_errhandler) { - *errhandler = MPI_Errhandler_c2f(c_errhandler); + *errhandler = PMPI_Errhandler_c2f(c_errhandler); c_ierr = MPI_SUCCESS; } else { c_ierr = MPI_ERR_INTERN; diff --git a/ompi/mpi/fortran/mpif-h/file_delete_f.c b/ompi/mpi/fortran/mpif-h/file_delete_f.c index 6bd1827a51d..8c566470802 100644 --- a/ompi/mpi/fortran/mpif-h/file_delete_f.c +++ b/ompi/mpi/fortran/mpif-h/file_delete_f.c @@ -5,15 +5,17 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2006-2012 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -23,7 +25,8 @@ #include "ompi/mpi/fortran/base/strings.h" #include "ompi/file/file.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILE_LAYER +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak PMPI_FILE_DELETE = ompi_file_delete_f #pragma weak pmpi_file_delete = ompi_file_delete_f #pragma weak pmpi_file_delete_ = ompi_file_delete_f @@ -31,7 +34,7 @@ #pragma weak PMPI_File_delete_f = ompi_file_delete_f #pragma weak PMPI_File_delete_f08 = ompi_file_delete_f -#elif OMPI_PROFILE_LAYER +#else OMPI_GENERATE_F77_BINDINGS (PMPI_FILE_DELETE, pmpi_file_delete, pmpi_file_delete_, @@ -40,6 +43,7 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_FILE_DELETE, (char *filename, MPI_Fint *info, MPI_Fint *ierr, int filename_len), (filename, info, ierr, filename_len) ) #endif +#endif #if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_FILE_DELETE = ompi_file_delete_f @@ -49,9 +53,8 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_FILE_DELETE, #pragma weak MPI_File_delete_f = ompi_file_delete_f #pragma weak MPI_File_delete_f08 = ompi_file_delete_f -#endif - -#if ! OPAL_HAVE_WEAK_SYMBOLS && ! OMPI_PROFILE_LAYER +#else +#if ! OMPI_BUILD_MPI_PROFILING OMPI_GENERATE_F77_BINDINGS (MPI_FILE_DELETE, mpi_file_delete, mpi_file_delete_, @@ -59,20 +62,19 @@ OMPI_GENERATE_F77_BINDINGS (MPI_FILE_DELETE, ompi_file_delete_f, (char *filename, MPI_Fint *info, MPI_Fint *ierr, int filename_len), (filename, info, ierr, filename_len) ) +#else +#define ompi_file_delete_f pompi_file_delete_f #endif - - -#if OMPI_PROFILE_LAYER && ! OPAL_HAVE_WEAK_SYMBOLS -#include "ompi/mpi/fortran/mpif-h/profile/defines.h" #endif + void ompi_file_delete_f(char *filename, MPI_Fint *info, MPI_Fint *ierr, int filename_len) { MPI_Info c_info; char *c_filename; int c_ierr, ret; - c_info = MPI_Info_f2c(*info); + c_info = PMPI_Info_f2c(*info); /* Convert the fortran string */ if (OMPI_SUCCESS != (ret = ompi_fortran_string_f2c(filename, filename_len, @@ -81,8 +83,8 @@ void ompi_file_delete_f(char *filename, MPI_Fint *info, MPI_Fint *ierr, int file if (NULL != ierr) *ierr = OMPI_INT_2_FINT(c_ierr); return; } - - c_ierr = MPI_File_delete(c_filename, c_info); + + c_ierr = PMPI_File_delete(c_filename, c_info); if (NULL != ierr) *ierr = OMPI_INT_2_FINT(c_ierr); free(c_filename); diff --git a/ompi/mpi/fortran/mpif-h/file_get_amode_f.c b/ompi/mpi/fortran/mpif-h/file_get_amode_f.c index af6fe147e1e..3c99d14a165 100644 --- a/ompi/mpi/fortran/mpif-h/file_get_amode_f.c +++ b/ompi/mpi/fortran/mpif-h/file_get_amode_f.c @@ -5,15 +5,17 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2011-2012 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -21,7 +23,8 @@ #include "ompi/mpi/fortran/mpif-h/bindings.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILE_LAYER +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak PMPI_FILE_GET_AMODE = ompi_file_get_amode_f #pragma weak pmpi_file_get_amode = ompi_file_get_amode_f #pragma weak pmpi_file_get_amode_ = ompi_file_get_amode_f @@ -29,7 +32,7 @@ #pragma weak PMPI_File_get_amode_f = ompi_file_get_amode_f #pragma weak PMPI_File_get_amode_f08 = ompi_file_get_amode_f -#elif OMPI_PROFILE_LAYER +#else OMPI_GENERATE_F77_BINDINGS (PMPI_FILE_GET_AMODE, pmpi_file_get_amode, pmpi_file_get_amode_, @@ -38,6 +41,7 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_FILE_GET_AMODE, (MPI_Fint *fh, MPI_Fint *amode, MPI_Fint *ierr), (fh, amode, ierr) ) #endif +#endif #if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_FILE_GET_AMODE = ompi_file_get_amode_f @@ -47,9 +51,8 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_FILE_GET_AMODE, #pragma weak MPI_File_get_amode_f = ompi_file_get_amode_f #pragma weak MPI_File_get_amode_f08 = ompi_file_get_amode_f -#endif - -#if ! OPAL_HAVE_WEAK_SYMBOLS && ! OMPI_PROFILE_LAYER +#else +#if ! OMPI_BUILD_MPI_PROFILING OMPI_GENERATE_F77_BINDINGS (MPI_FILE_GET_AMODE, mpi_file_get_amode, mpi_file_get_amode_, @@ -57,21 +60,20 @@ OMPI_GENERATE_F77_BINDINGS (MPI_FILE_GET_AMODE, ompi_file_get_amode_f, (MPI_Fint *fh, MPI_Fint *amode, MPI_Fint *ierr), (fh, amode, ierr) ) +#else +#define ompi_file_get_amode_f pompi_file_get_amode_f #endif - - -#if OMPI_PROFILE_LAYER && ! OPAL_HAVE_WEAK_SYMBOLS -#include "ompi/mpi/fortran/mpif-h/profile/defines.h" #endif + void ompi_file_get_amode_f(MPI_Fint *fh, MPI_Fint *amode, MPI_Fint *ierr) { int c_ierr; MPI_File c_fh; OMPI_SINGLE_NAME_DECL(amode); - - c_fh = MPI_File_f2c(*fh); - c_ierr = MPI_File_get_amode(c_fh, OMPI_SINGLE_NAME_CONVERT(amode)); + + c_fh = PMPI_File_f2c(*fh); + c_ierr = PMPI_File_get_amode(c_fh, OMPI_SINGLE_NAME_CONVERT(amode)); if (NULL != ierr) *ierr = OMPI_INT_2_FINT(c_ierr); if (MPI_SUCCESS == c_ierr) { diff --git a/ompi/mpi/fortran/mpif-h/file_get_atomicity_f.c b/ompi/mpi/fortran/mpif-h/file_get_atomicity_f.c index 23410636159..76f672c0751 100644 --- a/ompi/mpi/fortran/mpif-h/file_get_atomicity_f.c +++ b/ompi/mpi/fortran/mpif-h/file_get_atomicity_f.c @@ -5,15 +5,17 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2011-2012 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -21,7 +23,8 @@ #include "ompi/mpi/fortran/mpif-h/bindings.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILE_LAYER +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak PMPI_FILE_GET_ATOMICITY = ompi_file_get_atomicity_f #pragma weak pmpi_file_get_atomicity = ompi_file_get_atomicity_f #pragma weak pmpi_file_get_atomicity_ = ompi_file_get_atomicity_f @@ -29,7 +32,7 @@ #pragma weak PMPI_File_get_atomicity_f = ompi_file_get_atomicity_f #pragma weak PMPI_File_get_atomicity_f08 = ompi_file_get_atomicity_f -#elif OMPI_PROFILE_LAYER +#else OMPI_GENERATE_F77_BINDINGS (PMPI_FILE_GET_ATOMICITY, pmpi_file_get_atomicity, pmpi_file_get_atomicity_, @@ -38,6 +41,7 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_FILE_GET_ATOMICITY, (MPI_Fint *fh, ompi_fortran_logical_t *flag, MPI_Fint *ierr), (fh, flag, ierr) ) #endif +#endif #if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_FILE_GET_ATOMICITY = ompi_file_get_atomicity_f @@ -47,9 +51,8 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_FILE_GET_ATOMICITY, #pragma weak MPI_File_get_atomicity_f = ompi_file_get_atomicity_f #pragma weak MPI_File_get_atomicity_f08 = ompi_file_get_atomicity_f -#endif - -#if ! OPAL_HAVE_WEAK_SYMBOLS && ! OMPI_PROFILE_LAYER +#else +#if ! OMPI_BUILD_MPI_PROFILING OMPI_GENERATE_F77_BINDINGS (MPI_FILE_GET_ATOMICITY, mpi_file_get_atomicity, mpi_file_get_atomicity_, @@ -57,21 +60,20 @@ OMPI_GENERATE_F77_BINDINGS (MPI_FILE_GET_ATOMICITY, ompi_file_get_atomicity_f, (MPI_Fint *fh, ompi_fortran_logical_t *flag, MPI_Fint *ierr), (fh, flag, ierr) ) +#else +#define ompi_file_get_atomicity_f pompi_file_get_atomicity_f #endif - - -#if OMPI_PROFILE_LAYER && ! OPAL_HAVE_WEAK_SYMBOLS -#include "ompi/mpi/fortran/mpif-h/profile/defines.h" #endif + void ompi_file_get_atomicity_f(MPI_Fint *fh, ompi_fortran_logical_t *flag, MPI_Fint *ierr) { int c_ierr; MPI_File c_fh; OMPI_LOGICAL_NAME_DECL(flag); - c_fh = MPI_File_f2c(*fh); - c_ierr = MPI_File_get_atomicity(c_fh, + c_fh = PMPI_File_f2c(*fh); + c_ierr = PMPI_File_get_atomicity(c_fh, OMPI_LOGICAL_SINGLE_NAME_CONVERT(flag)); if (NULL != ierr) *ierr = OMPI_INT_2_FINT(c_ierr); diff --git a/ompi/mpi/fortran/mpif-h/file_get_byte_offset_f.c b/ompi/mpi/fortran/mpif-h/file_get_byte_offset_f.c index c42d676c8db..88b6901d82e 100644 --- a/ompi/mpi/fortran/mpif-h/file_get_byte_offset_f.c +++ b/ompi/mpi/fortran/mpif-h/file_get_byte_offset_f.c @@ -5,15 +5,17 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2011-2012 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -21,7 +23,8 @@ #include "ompi/mpi/fortran/mpif-h/bindings.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILE_LAYER +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak PMPI_FILE_GET_BYTE_OFFSET = ompi_file_get_byte_offset_f #pragma weak pmpi_file_get_byte_offset = ompi_file_get_byte_offset_f #pragma weak pmpi_file_get_byte_offset_ = ompi_file_get_byte_offset_f @@ -29,7 +32,7 @@ #pragma weak PMPI_File_get_byte_offset_f = ompi_file_get_byte_offset_f #pragma weak PMPI_File_get_byte_offset_f08 = ompi_file_get_byte_offset_f -#elif OMPI_PROFILE_LAYER +#else OMPI_GENERATE_F77_BINDINGS (PMPI_FILE_GET_BYTE_OFFSET, pmpi_file_get_byte_offset, pmpi_file_get_byte_offset_, @@ -38,6 +41,7 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_FILE_GET_BYTE_OFFSET, (MPI_Fint *fh, MPI_Offset *offset, MPI_Offset *disp, MPI_Fint *ierr), (fh, offset, disp, ierr) ) #endif +#endif #if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_FILE_GET_BYTE_OFFSET = ompi_file_get_byte_offset_f @@ -47,9 +51,8 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_FILE_GET_BYTE_OFFSET, #pragma weak MPI_File_get_byte_offset_f = ompi_file_get_byte_offset_f #pragma weak MPI_File_get_byte_offset_f08 = ompi_file_get_byte_offset_f -#endif - -#if ! OPAL_HAVE_WEAK_SYMBOLS && ! OMPI_PROFILE_LAYER +#else +#if ! OMPI_BUILD_MPI_PROFILING OMPI_GENERATE_F77_BINDINGS (MPI_FILE_GET_BYTE_OFFSET, mpi_file_get_byte_offset, mpi_file_get_byte_offset_, @@ -57,22 +60,21 @@ OMPI_GENERATE_F77_BINDINGS (MPI_FILE_GET_BYTE_OFFSET, ompi_file_get_byte_offset_f, (MPI_Fint *fh, MPI_Offset *offset, MPI_Offset *disp, MPI_Fint *ierr), (fh, offset, disp, ierr) ) +#else +#define ompi_file_get_byte_offset_f pompi_file_get_byte_offset_f #endif - - -#if OMPI_PROFILE_LAYER && ! OPAL_HAVE_WEAK_SYMBOLS -#include "ompi/mpi/fortran/mpif-h/profile/defines.h" #endif + void ompi_file_get_byte_offset_f(MPI_Fint *fh, MPI_Offset *offset, MPI_Offset *disp, MPI_Fint *ierr) { int c_ierr; MPI_File c_fh; - c_fh = MPI_File_f2c(*fh); + c_fh = PMPI_File_f2c(*fh); - c_ierr = MPI_File_get_byte_offset(c_fh, + c_ierr = PMPI_File_get_byte_offset(c_fh, (MPI_Offset) *offset, disp); if (NULL != ierr) *ierr = OMPI_INT_2_FINT(c_ierr); diff --git a/ompi/mpi/fortran/mpif-h/file_get_errhandler_f.c b/ompi/mpi/fortran/mpif-h/file_get_errhandler_f.c index 1966aa5b878..3fe15871b22 100644 --- a/ompi/mpi/fortran/mpif-h/file_get_errhandler_f.c +++ b/ompi/mpi/fortran/mpif-h/file_get_errhandler_f.c @@ -5,15 +5,17 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2011-2012 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -21,7 +23,8 @@ #include "ompi/mpi/fortran/mpif-h/bindings.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILE_LAYER +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak PMPI_FILE_GET_ERRHANDLER = ompi_file_get_errhandler_f #pragma weak pmpi_file_get_errhandler = ompi_file_get_errhandler_f #pragma weak pmpi_file_get_errhandler_ = ompi_file_get_errhandler_f @@ -29,7 +32,7 @@ #pragma weak PMPI_File_get_errhandler_f = ompi_file_get_errhandler_f #pragma weak PMPI_File_get_errhandler_f08 = ompi_file_get_errhandler_f -#elif OMPI_PROFILE_LAYER +#else OMPI_GENERATE_F77_BINDINGS (PMPI_FILE_GET_ERRHANDLER, pmpi_file_get_errhandler, pmpi_file_get_errhandler_, @@ -38,6 +41,7 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_FILE_GET_ERRHANDLER, (MPI_Fint *file, MPI_Fint *errhandler, MPI_Fint *ierr), (file, errhandler, ierr) ) #endif +#endif #if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_FILE_GET_ERRHANDLER = ompi_file_get_errhandler_f @@ -47,9 +51,8 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_FILE_GET_ERRHANDLER, #pragma weak MPI_File_get_errhandler_f = ompi_file_get_errhandler_f #pragma weak MPI_File_get_errhandler_f08 = ompi_file_get_errhandler_f -#endif - -#if ! OPAL_HAVE_WEAK_SYMBOLS && ! OMPI_PROFILE_LAYER +#else +#if ! OMPI_BUILD_MPI_PROFILING OMPI_GENERATE_F77_BINDINGS (MPI_FILE_GET_ERRHANDLER, mpi_file_get_errhandler, mpi_file_get_errhandler_, @@ -57,25 +60,24 @@ OMPI_GENERATE_F77_BINDINGS (MPI_FILE_GET_ERRHANDLER, ompi_file_get_errhandler_f, (MPI_Fint *file, MPI_Fint *errhandler, MPI_Fint *ierr), (file, errhandler, ierr) ) +#else +#define ompi_file_get_errhandler_f pompi_file_get_errhandler_f #endif - - -#if OMPI_PROFILE_LAYER && ! OPAL_HAVE_WEAK_SYMBOLS -#include "ompi/mpi/fortran/mpif-h/profile/defines.h" #endif + void ompi_file_get_errhandler_f(MPI_Fint *fh, MPI_Fint *errhandler, MPI_Fint *ierr) { int c_ierr; MPI_File c_fh; MPI_Errhandler c_errhandler; - c_fh = MPI_File_f2c(*fh); - - c_ierr = MPI_File_get_errhandler(c_fh, &c_errhandler); + c_fh = PMPI_File_f2c(*fh); + + c_ierr = PMPI_File_get_errhandler(c_fh, &c_errhandler); if (NULL != ierr) *ierr = OMPI_INT_2_FINT(c_ierr); if (MPI_SUCCESS == c_ierr) { - *errhandler = MPI_Errhandler_c2f(c_errhandler); + *errhandler = PMPI_Errhandler_c2f(c_errhandler); } } diff --git a/ompi/mpi/fortran/mpif-h/file_get_group_f.c b/ompi/mpi/fortran/mpif-h/file_get_group_f.c index 3bde5c84a97..d844e03ed00 100644 --- a/ompi/mpi/fortran/mpif-h/file_get_group_f.c +++ b/ompi/mpi/fortran/mpif-h/file_get_group_f.c @@ -5,15 +5,17 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2011-2012 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -21,7 +23,8 @@ #include "ompi/mpi/fortran/mpif-h/bindings.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILE_LAYER +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak PMPI_FILE_GET_GROUP = ompi_file_get_group_f #pragma weak pmpi_file_get_group = ompi_file_get_group_f #pragma weak pmpi_file_get_group_ = ompi_file_get_group_f @@ -29,7 +32,7 @@ #pragma weak PMPI_File_get_group_f = ompi_file_get_group_f #pragma weak PMPI_File_get_group_f08 = ompi_file_get_group_f -#elif OMPI_PROFILE_LAYER +#else OMPI_GENERATE_F77_BINDINGS (PMPI_FILE_GET_GROUP, pmpi_file_get_group, pmpi_file_get_group_, @@ -38,6 +41,7 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_FILE_GET_GROUP, (MPI_Fint *fh, MPI_Fint *group, MPI_Fint *ierr), (fh, group, ierr) ) #endif +#endif #if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_FILE_GET_GROUP = ompi_file_get_group_f @@ -47,9 +51,8 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_FILE_GET_GROUP, #pragma weak MPI_File_get_group_f = ompi_file_get_group_f #pragma weak MPI_File_get_group_f08 = ompi_file_get_group_f -#endif - -#if ! OPAL_HAVE_WEAK_SYMBOLS && ! OMPI_PROFILE_LAYER +#else +#if ! OMPI_BUILD_MPI_PROFILING OMPI_GENERATE_F77_BINDINGS (MPI_FILE_GET_GROUP, mpi_file_get_group, mpi_file_get_group_, @@ -57,23 +60,22 @@ OMPI_GENERATE_F77_BINDINGS (MPI_FILE_GET_GROUP, ompi_file_get_group_f, (MPI_Fint *fh, MPI_Fint *group, MPI_Fint *ierr), (fh, group, ierr) ) +#else +#define ompi_file_get_group_f pompi_file_get_group_f #endif - - -#if OMPI_PROFILE_LAYER && ! OPAL_HAVE_WEAK_SYMBOLS -#include "ompi/mpi/fortran/mpif-h/profile/defines.h" #endif + void ompi_file_get_group_f(MPI_Fint *fh, MPI_Fint *group, MPI_Fint *ierr) { int c_ierr; - MPI_File c_fh = MPI_File_f2c(*fh); + MPI_File c_fh = PMPI_File_f2c(*fh); MPI_Group c_grp; - c_ierr = MPI_File_get_group(c_fh, &c_grp); + c_ierr = PMPI_File_get_group(c_fh, &c_grp); if (NULL != ierr) *ierr = OMPI_INT_2_FINT(c_ierr); if (MPI_SUCCESS == c_ierr) { - *group = MPI_Group_c2f(c_grp); + *group = PMPI_Group_c2f(c_grp); } } diff --git a/ompi/mpi/fortran/mpif-h/file_get_info_f.c b/ompi/mpi/fortran/mpif-h/file_get_info_f.c index 82e5f834a8f..5daceede9e1 100644 --- a/ompi/mpi/fortran/mpif-h/file_get_info_f.c +++ b/ompi/mpi/fortran/mpif-h/file_get_info_f.c @@ -5,15 +5,17 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2011-2012 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -21,7 +23,8 @@ #include "ompi/mpi/fortran/mpif-h/bindings.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILE_LAYER +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak PMPI_FILE_GET_INFO = ompi_file_get_info_f #pragma weak pmpi_file_get_info = ompi_file_get_info_f #pragma weak pmpi_file_get_info_ = ompi_file_get_info_f @@ -29,7 +32,7 @@ #pragma weak PMPI_File_get_info_f = ompi_file_get_info_f #pragma weak PMPI_File_get_info_f08 = ompi_file_get_info_f -#elif OMPI_PROFILE_LAYER +#else OMPI_GENERATE_F77_BINDINGS (PMPI_FILE_GET_INFO, pmpi_file_get_info, pmpi_file_get_info_, @@ -38,6 +41,7 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_FILE_GET_INFO, (MPI_Fint *fh, MPI_Fint *info_used, MPI_Fint *ierr), (fh, info_used, ierr) ) #endif +#endif #if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_FILE_GET_INFO = ompi_file_get_info_f @@ -47,9 +51,8 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_FILE_GET_INFO, #pragma weak MPI_File_get_info_f = ompi_file_get_info_f #pragma weak MPI_File_get_info_f08 = ompi_file_get_info_f -#endif - -#if ! OPAL_HAVE_WEAK_SYMBOLS && ! OMPI_PROFILE_LAYER +#else +#if ! OMPI_BUILD_MPI_PROFILING OMPI_GENERATE_F77_BINDINGS (MPI_FILE_GET_INFO, mpi_file_get_info, mpi_file_get_info_, @@ -57,23 +60,22 @@ OMPI_GENERATE_F77_BINDINGS (MPI_FILE_GET_INFO, ompi_file_get_info_f, (MPI_Fint *fh, MPI_Fint *info_used, MPI_Fint *ierr), (fh, info_used, ierr) ) +#else +#define ompi_file_get_info_f pompi_file_get_info_f #endif - - -#if OMPI_PROFILE_LAYER && ! OPAL_HAVE_WEAK_SYMBOLS -#include "ompi/mpi/fortran/mpif-h/profile/defines.h" #endif + void ompi_file_get_info_f(MPI_Fint *fh, MPI_Fint *info_used, MPI_Fint *ierr) { int c_ierr; - MPI_File c_fh = MPI_File_f2c(*fh); + MPI_File c_fh = PMPI_File_f2c(*fh); MPI_Info c_info; - c_ierr = MPI_File_get_info(c_fh, &c_info); + c_ierr = PMPI_File_get_info(c_fh, &c_info); if (NULL != ierr) *ierr = OMPI_INT_2_FINT(c_ierr); if (MPI_SUCCESS == c_ierr) { - *info_used = MPI_Info_c2f(c_info); + *info_used = PMPI_Info_c2f(c_info); } } diff --git a/ompi/mpi/fortran/mpif-h/file_get_position_f.c b/ompi/mpi/fortran/mpif-h/file_get_position_f.c index 26e2a755b8d..8a5e6178ab3 100644 --- a/ompi/mpi/fortran/mpif-h/file_get_position_f.c +++ b/ompi/mpi/fortran/mpif-h/file_get_position_f.c @@ -5,15 +5,17 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2011-2012 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -21,7 +23,8 @@ #include "ompi/mpi/fortran/mpif-h/bindings.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILE_LAYER +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak PMPI_FILE_GET_POSITION = ompi_file_get_position_f #pragma weak pmpi_file_get_position = ompi_file_get_position_f #pragma weak pmpi_file_get_position_ = ompi_file_get_position_f @@ -29,7 +32,7 @@ #pragma weak PMPI_File_get_position_f = ompi_file_get_position_f #pragma weak PMPI_File_get_position_f08 = ompi_file_get_position_f -#elif OMPI_PROFILE_LAYER +#else OMPI_GENERATE_F77_BINDINGS (PMPI_FILE_GET_POSITION, pmpi_file_get_position, pmpi_file_get_position_, @@ -38,6 +41,7 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_FILE_GET_POSITION, (MPI_Fint *fh, MPI_Offset *offset, MPI_Fint *ierr), (fh, offset, ierr) ) #endif +#endif #if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_FILE_GET_POSITION = ompi_file_get_position_f @@ -47,9 +51,8 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_FILE_GET_POSITION, #pragma weak MPI_File_get_position_f = ompi_file_get_position_f #pragma weak MPI_File_get_position_f08 = ompi_file_get_position_f -#endif - -#if ! OPAL_HAVE_WEAK_SYMBOLS && ! OMPI_PROFILE_LAYER +#else +#if ! OMPI_BUILD_MPI_PROFILING OMPI_GENERATE_F77_BINDINGS (MPI_FILE_GET_POSITION, mpi_file_get_position, mpi_file_get_position_, @@ -57,20 +60,19 @@ OMPI_GENERATE_F77_BINDINGS (MPI_FILE_GET_POSITION, ompi_file_get_position_f, (MPI_Fint *fh, MPI_Offset *offset, MPI_Fint *ierr), (fh, offset, ierr) ) +#else +#define ompi_file_get_position_f pompi_file_get_position_f #endif - - -#if OMPI_PROFILE_LAYER && ! OPAL_HAVE_WEAK_SYMBOLS -#include "ompi/mpi/fortran/mpif-h/profile/defines.h" #endif + void ompi_file_get_position_f(MPI_Fint *fh, MPI_Offset *offset, MPI_Fint *ierr) { int c_ierr; - MPI_File c_fh = MPI_File_f2c(*fh); + MPI_File c_fh = PMPI_File_f2c(*fh); MPI_Offset c_offset; - c_ierr = MPI_File_get_position(c_fh, &c_offset); + c_ierr = PMPI_File_get_position(c_fh, &c_offset); if (NULL != ierr) *ierr = OMPI_INT_2_FINT(c_ierr); if (MPI_SUCCESS == c_ierr) { diff --git a/ompi/mpi/fortran/mpif-h/file_get_position_shared_f.c b/ompi/mpi/fortran/mpif-h/file_get_position_shared_f.c index 743fa3428b6..67a9af73f82 100644 --- a/ompi/mpi/fortran/mpif-h/file_get_position_shared_f.c +++ b/ompi/mpi/fortran/mpif-h/file_get_position_shared_f.c @@ -5,15 +5,17 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2011-2012 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -21,7 +23,8 @@ #include "ompi/mpi/fortran/mpif-h/bindings.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILE_LAYER +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak PMPI_FILE_GET_POSITION_SHARED = ompi_file_get_position_shared_f #pragma weak pmpi_file_get_position_shared = ompi_file_get_position_shared_f #pragma weak pmpi_file_get_position_shared_ = ompi_file_get_position_shared_f @@ -29,7 +32,7 @@ #pragma weak PMPI_File_get_position_shared_f = ompi_file_get_position_shared_f #pragma weak PMPI_File_get_position_shared_f08 = ompi_file_get_position_shared_f -#elif OMPI_PROFILE_LAYER +#else OMPI_GENERATE_F77_BINDINGS (PMPI_FILE_GET_POSITION_SHARED, pmpi_file_get_position_shared, pmpi_file_get_position_shared_, @@ -38,6 +41,7 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_FILE_GET_POSITION_SHARED, (MPI_Fint *fh, MPI_Offset *offset, MPI_Fint *ierr), (fh, offset, ierr) ) #endif +#endif #if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_FILE_GET_POSITION_SHARED = ompi_file_get_position_shared_f @@ -47,9 +51,8 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_FILE_GET_POSITION_SHARED, #pragma weak MPI_File_get_position_shared_f = ompi_file_get_position_shared_f #pragma weak MPI_File_get_position_shared_f08 = ompi_file_get_position_shared_f -#endif - -#if ! OPAL_HAVE_WEAK_SYMBOLS && ! OMPI_PROFILE_LAYER +#else +#if ! OMPI_BUILD_MPI_PROFILING OMPI_GENERATE_F77_BINDINGS (MPI_FILE_GET_POSITION_SHARED, mpi_file_get_position_shared, mpi_file_get_position_shared_, @@ -57,21 +60,20 @@ OMPI_GENERATE_F77_BINDINGS (MPI_FILE_GET_POSITION_SHARED, ompi_file_get_position_shared_f, (MPI_Fint *fh, MPI_Offset *offset, MPI_Fint *ierr), (fh, offset, ierr) ) +#else +#define ompi_file_get_position_shared_f pompi_file_get_position_shared_f #endif - - -#if OMPI_PROFILE_LAYER && ! OPAL_HAVE_WEAK_SYMBOLS -#include "ompi/mpi/fortran/mpif-h/profile/defines.h" #endif + void ompi_file_get_position_shared_f(MPI_Fint *fh, MPI_Offset *offset, MPI_Fint *ierr) { int c_ierr; - MPI_File c_fh = MPI_File_f2c(*fh); + MPI_File c_fh = PMPI_File_f2c(*fh); MPI_Offset c_offset; - c_ierr = MPI_File_get_position_shared(c_fh, &c_offset); + c_ierr = PMPI_File_get_position_shared(c_fh, &c_offset); if (NULL != ierr) *ierr = OMPI_INT_2_FINT(c_ierr); if (MPI_SUCCESS == c_ierr) { diff --git a/ompi/mpi/fortran/mpif-h/file_get_size_f.c b/ompi/mpi/fortran/mpif-h/file_get_size_f.c index 4d8f96ef079..90261226887 100644 --- a/ompi/mpi/fortran/mpif-h/file_get_size_f.c +++ b/ompi/mpi/fortran/mpif-h/file_get_size_f.c @@ -5,15 +5,17 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2011-2012 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -21,7 +23,8 @@ #include "ompi/mpi/fortran/mpif-h/bindings.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILE_LAYER +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak PMPI_FILE_GET_SIZE = ompi_file_get_size_f #pragma weak pmpi_file_get_size = ompi_file_get_size_f #pragma weak pmpi_file_get_size_ = ompi_file_get_size_f @@ -29,7 +32,7 @@ #pragma weak PMPI_File_get_size_f = ompi_file_get_size_f #pragma weak PMPI_File_get_size_f08 = ompi_file_get_size_f -#elif OMPI_PROFILE_LAYER +#else OMPI_GENERATE_F77_BINDINGS (PMPI_FILE_GET_SIZE, pmpi_file_get_size, pmpi_file_get_size_, @@ -38,6 +41,7 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_FILE_GET_SIZE, (MPI_Fint *fh, MPI_Offset *size, MPI_Fint *ierr), (fh, size, ierr) ) #endif +#endif #if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_FILE_GET_SIZE = ompi_file_get_size_f @@ -47,9 +51,8 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_FILE_GET_SIZE, #pragma weak MPI_File_get_size_f = ompi_file_get_size_f #pragma weak MPI_File_get_size_f08 = ompi_file_get_size_f -#endif - -#if ! OPAL_HAVE_WEAK_SYMBOLS && ! OMPI_PROFILE_LAYER +#else +#if ! OMPI_BUILD_MPI_PROFILING OMPI_GENERATE_F77_BINDINGS (MPI_FILE_GET_SIZE, mpi_file_get_size, mpi_file_get_size_, @@ -57,20 +60,19 @@ OMPI_GENERATE_F77_BINDINGS (MPI_FILE_GET_SIZE, ompi_file_get_size_f, (MPI_Fint *fh, MPI_Offset *size, MPI_Fint *ierr), (fh, size, ierr) ) +#else +#define ompi_file_get_size_f pompi_file_get_size_f #endif - - -#if OMPI_PROFILE_LAYER && ! OPAL_HAVE_WEAK_SYMBOLS -#include "ompi/mpi/fortran/mpif-h/profile/defines.h" #endif + void ompi_file_get_size_f(MPI_Fint *fh, MPI_Offset *size, MPI_Fint *ierr) { int c_ierr; - MPI_File c_fh = MPI_File_f2c(*fh); + MPI_File c_fh = PMPI_File_f2c(*fh); MPI_Offset c_size; - c_ierr = MPI_File_get_size(c_fh, &c_size); + c_ierr = PMPI_File_get_size(c_fh, &c_size); if (NULL != ierr) *ierr = OMPI_INT_2_FINT(c_ierr); if (MPI_SUCCESS == c_ierr) { diff --git a/ompi/mpi/fortran/mpif-h/file_get_type_extent_f.c b/ompi/mpi/fortran/mpif-h/file_get_type_extent_f.c index cde28424684..fef25adb706 100644 --- a/ompi/mpi/fortran/mpif-h/file_get_type_extent_f.c +++ b/ompi/mpi/fortran/mpif-h/file_get_type_extent_f.c @@ -5,15 +5,17 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2007-2012 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -21,7 +23,8 @@ #include "ompi/mpi/fortran/mpif-h/bindings.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILE_LAYER +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak PMPI_FILE_GET_TYPE_EXTENT = ompi_file_get_type_extent_f #pragma weak pmpi_file_get_type_extent = ompi_file_get_type_extent_f #pragma weak pmpi_file_get_type_extent_ = ompi_file_get_type_extent_f @@ -29,7 +32,7 @@ #pragma weak PMPI_File_get_type_extent_f = ompi_file_get_type_extent_f #pragma weak PMPI_File_get_type_extent_f08 = ompi_file_get_type_extent_f -#elif OMPI_PROFILE_LAYER +#else OMPI_GENERATE_F77_BINDINGS (PMPI_FILE_GET_TYPE_EXTENT, pmpi_file_get_type_extent, pmpi_file_get_type_extent_, @@ -38,6 +41,7 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_FILE_GET_TYPE_EXTENT, (MPI_Fint *fh, MPI_Fint *datatype, MPI_Aint *extent, MPI_Fint *ierr), (fh, datatype, extent, ierr) ) #endif +#endif #if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_FILE_GET_TYPE_EXTENT = ompi_file_get_type_extent_f @@ -47,9 +51,8 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_FILE_GET_TYPE_EXTENT, #pragma weak MPI_File_get_type_extent_f = ompi_file_get_type_extent_f #pragma weak MPI_File_get_type_extent_f08 = ompi_file_get_type_extent_f -#endif - -#if ! OPAL_HAVE_WEAK_SYMBOLS && ! OMPI_PROFILE_LAYER +#else +#if ! OMPI_BUILD_MPI_PROFILING OMPI_GENERATE_F77_BINDINGS (MPI_FILE_GET_TYPE_EXTENT, mpi_file_get_type_extent, mpi_file_get_type_extent_, @@ -57,22 +60,21 @@ OMPI_GENERATE_F77_BINDINGS (MPI_FILE_GET_TYPE_EXTENT, ompi_file_get_type_extent_f, (MPI_Fint *fh, MPI_Fint *datatype, MPI_Aint *extent, MPI_Fint *ierr), (fh, datatype, extent, ierr) ) +#else +#define ompi_file_get_type_extent_f pompi_file_get_type_extent_f #endif - - -#if OMPI_PROFILE_LAYER && ! OPAL_HAVE_WEAK_SYMBOLS -#include "ompi/mpi/fortran/mpif-h/profile/defines.h" #endif + void ompi_file_get_type_extent_f(MPI_Fint *fh, MPI_Fint *datatype, MPI_Aint *extent, MPI_Fint *ierr) { int c_ierr; - MPI_File c_fh = MPI_File_f2c(*fh); + MPI_File c_fh = PMPI_File_f2c(*fh); MPI_Datatype c_type; - c_type = MPI_Type_f2c(*datatype); + c_type = PMPI_Type_f2c(*datatype); - c_ierr = MPI_File_get_type_extent(c_fh, c_type, extent); + c_ierr = PMPI_File_get_type_extent(c_fh, c_type, extent); if (NULL != ierr) *ierr = OMPI_INT_2_FINT(c_ierr); } diff --git a/ompi/mpi/fortran/mpif-h/file_get_view_f.c b/ompi/mpi/fortran/mpif-h/file_get_view_f.c index 9396298d500..b5acefea4e3 100644 --- a/ompi/mpi/fortran/mpif-h/file_get_view_f.c +++ b/ompi/mpi/fortran/mpif-h/file_get_view_f.c @@ -5,15 +5,17 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2011-2012 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -23,7 +25,8 @@ #include "ompi/mpi/fortran/base/strings.h" #include "ompi/file/file.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILE_LAYER +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak PMPI_FILE_GET_VIEW = ompi_file_get_view_f #pragma weak pmpi_file_get_view = ompi_file_get_view_f #pragma weak pmpi_file_get_view_ = ompi_file_get_view_f @@ -31,7 +34,7 @@ #pragma weak PMPI_File_get_view_f = ompi_file_get_view_f #pragma weak PMPI_File_get_view_f08 = ompi_file_get_view_f -#elif OMPI_PROFILE_LAYER +#else OMPI_GENERATE_F77_BINDINGS (PMPI_FILE_GET_VIEW, pmpi_file_get_view, pmpi_file_get_view_, @@ -40,6 +43,7 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_FILE_GET_VIEW, (MPI_Fint *fh, MPI_Offset *disp, MPI_Fint *etype, MPI_Fint *filetype, char *datarep, MPI_Fint *ierr, int datarep_len), (fh, disp, etype, filetype, datarep, ierr, datarep_len) ) #endif +#endif #if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_FILE_GET_VIEW = ompi_file_get_view_f @@ -49,9 +53,8 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_FILE_GET_VIEW, #pragma weak MPI_File_get_view_f = ompi_file_get_view_f #pragma weak MPI_File_get_view_f08 = ompi_file_get_view_f -#endif - -#if ! OPAL_HAVE_WEAK_SYMBOLS && ! OMPI_PROFILE_LAYER +#else +#if ! OMPI_BUILD_MPI_PROFILING OMPI_GENERATE_F77_BINDINGS (MPI_FILE_GET_VIEW, mpi_file_get_view, mpi_file_get_view_, @@ -59,31 +62,30 @@ OMPI_GENERATE_F77_BINDINGS (MPI_FILE_GET_VIEW, ompi_file_get_view_f, (MPI_Fint *fh, MPI_Offset *disp, MPI_Fint *etype, MPI_Fint *filetype, char *datarep, MPI_Fint *ierr, int datarep_len), (fh, disp, etype, filetype, datarep, ierr, datarep_len) ) +#else +#define ompi_file_get_view_f pompi_file_get_view_f #endif - - -#if OMPI_PROFILE_LAYER && ! OPAL_HAVE_WEAK_SYMBOLS -#include "ompi/mpi/fortran/mpif-h/profile/defines.h" #endif -void ompi_file_get_view_f(MPI_Fint *fh, MPI_Offset *disp, - MPI_Fint *etype, MPI_Fint *filetype, + +void ompi_file_get_view_f(MPI_Fint *fh, MPI_Offset *disp, + MPI_Fint *etype, MPI_Fint *filetype, char *datarep, MPI_Fint *ierr, int datarep_len) { int c_ierr; - MPI_File c_fh = MPI_File_f2c(*fh); + MPI_File c_fh = PMPI_File_f2c(*fh); MPI_Datatype c_etype, c_filetype; MPI_Offset c_disp; char c_datarep[MPI_MAX_DATAREP_STRING]; - c_ierr = MPI_File_get_view(c_fh, &c_disp, &c_etype, + c_ierr = PMPI_File_get_view(c_fh, &c_disp, &c_etype, &c_filetype, c_datarep); if (NULL != ierr) *ierr = OMPI_INT_2_FINT(c_ierr); if (MPI_SUCCESS == c_ierr) { *disp = (MPI_Offset) c_disp; - *etype = MPI_Type_c2f(c_etype); - *filetype = MPI_Type_c2f(c_filetype); + *etype = PMPI_Type_c2f(c_etype); + *filetype = PMPI_Type_c2f(c_filetype); ompi_fortran_string_c2f(c_datarep, datarep, datarep_len); } } diff --git a/ompi/mpi/fortran/mpif-h/file_iread_all_f.c b/ompi/mpi/fortran/mpif-h/file_iread_all_f.c new file mode 100644 index 00000000000..f3ec32c9a85 --- /dev/null +++ b/ompi/mpi/fortran/mpif-h/file_iread_all_f.c @@ -0,0 +1,86 @@ +/* + * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2005 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2011-2012 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include "ompi_config.h" + +#include "ompi/mpi/fortran/mpif-h/bindings.h" +#include "ompi/mpi/fortran/base/constants.h" + +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS +#pragma weak PMPI_FILE_IREAD_ALL = ompi_file_iread_all_f +#pragma weak pmpi_file_iread_all = ompi_file_iread_all_f +#pragma weak pmpi_file_iread_all_ = ompi_file_iread_all_f +#pragma weak pmpi_file_iread_all__ = ompi_file_iread_all_f + +#pragma weak PMPI_File_iread_all_f = ompi_file_iread_all_f +#pragma weak PMPI_File_iread_all_f08 = ompi_file_iread_all_f +#else +OMPI_GENERATE_F77_BINDINGS (PMPI_FILE_IREAD_ALL, + pmpi_file_iread_all, + pmpi_file_iread_all_, + pmpi_file_iread_all__, + pompi_file_iread_all_f, + (MPI_Fint *fh, char *buf, MPI_Fint *count, MPI_Fint *datatype, MPI_Fint *request, MPI_Fint *ierr), + (fh, buf, count, datatype, request, ierr) ) +#endif +#endif + +#if OPAL_HAVE_WEAK_SYMBOLS +#pragma weak MPI_FILE_IREAD_ALL = ompi_file_iread_all_f +#pragma weak mpi_file_iread_all = ompi_file_iread_all_f +#pragma weak mpi_file_iread_all_ = ompi_file_iread_all_f +#pragma weak mpi_file_iread_all__ = ompi_file_iread_all_f + +#pragma weak MPI_File_iread_all_f = ompi_file_iread_all_f +#pragma weak MPI_File_iread_all_f08 = ompi_file_iread_all_f +#else +#if ! OMPI_BUILD_MPI_PROFILING +OMPI_GENERATE_F77_BINDINGS (MPI_FILE_IREAD_ALL, + mpi_file_iread_all, + mpi_file_iread_all_, + mpi_file_iread_all__, + ompi_file_iread_all_f, + (MPI_Fint *fh, char *buf, MPI_Fint *count, MPI_Fint *datatype, MPI_Fint *request, MPI_Fint *ierr), + (fh, buf, count, datatype, request, ierr) ) +#else +#define ompi_file_iread_all_f pompi_file_iread_all_f +#endif +#endif + + +void ompi_file_iread_all_f(MPI_Fint *fh, char *buf, MPI_Fint *count, + MPI_Fint *datatype, MPI_Fint *request, MPI_Fint *ierr) +{ + int c_ierr; + MPI_File c_fh = PMPI_File_f2c(*fh); + MPI_Datatype c_type = PMPI_Type_f2c(*datatype); + MPI_Request c_request; + + c_ierr = PMPI_File_iread_all(c_fh, OMPI_F2C_BOTTOM(buf), + OMPI_FINT_2_INT(*count), + c_type, &c_request); + if (NULL != ierr) *ierr = OMPI_INT_2_FINT(c_ierr); + + if (MPI_SUCCESS == c_ierr) { + *request = PMPI_Request_c2f(c_request); + } +} diff --git a/ompi/mpi/fortran/mpif-h/file_iread_at_all_f.c b/ompi/mpi/fortran/mpif-h/file_iread_at_all_f.c new file mode 100644 index 00000000000..2b5730e9b54 --- /dev/null +++ b/ompi/mpi/fortran/mpif-h/file_iread_at_all_f.c @@ -0,0 +1,89 @@ +/* + * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2005 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2011-2012 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include "ompi_config.h" + +#include "ompi/mpi/fortran/mpif-h/bindings.h" +#include "ompi/mpi/fortran/base/constants.h" + +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS +#pragma weak PMPI_FILE_IREAD_AT_ALL = ompi_file_iread_at_all_f +#pragma weak pmpi_file_iread_at_all = ompi_file_iread_at_all_f +#pragma weak pmpi_file_iread_at_all_ = ompi_file_iread_at_all_f +#pragma weak pmpi_file_iread_at_all__ = ompi_file_iread_at_all_f + +#pragma weak PMPI_File_iread_at_all_f = ompi_file_iread_at_all_f +#pragma weak PMPI_File_iread_at_all_f08 = ompi_file_iread_at_all_f +#else +OMPI_GENERATE_F77_BINDINGS (PMPI_FILE_IREAD_AT_ALL, + pmpi_file_iread_at_all, + pmpi_file_iread_at_all_, + pmpi_file_iread_at_all__, + pompi_file_iread_at_all_f, + (MPI_Fint *fh, MPI_Offset *offset, char *buf, MPI_Fint *count, MPI_Fint *datatype, MPI_Fint *request, MPI_Fint *ierr), + (fh, offset, buf, count, datatype, request, ierr) ) +#endif +#endif + +#if OPAL_HAVE_WEAK_SYMBOLS +#pragma weak MPI_FILE_IREAD_AT_ALL = ompi_file_iread_at_all_f +#pragma weak mpi_file_iread_at_all = ompi_file_iread_at_all_f +#pragma weak mpi_file_iread_at_all_ = ompi_file_iread_at_all_f +#pragma weak mpi_file_iread_at_all__ = ompi_file_iread_at_all_f + +#pragma weak MPI_File_iread_at_all_f = ompi_file_iread_at_all_f +#pragma weak MPI_File_iread_at_all_f08 = ompi_file_iread_at_all_f +#else +#if ! OMPI_BUILD_MPI_PROFILING + OMPI_GENERATE_F77_BINDINGS (MPI_FILE_IREAD_AT_ALL, + mpi_file_iread_at_all, + mpi_file_iread_at_all_, + mpi_file_iread_at_all__, + ompi_file_iread_at_all_f, + (MPI_Fint *fh, MPI_Offset *offset, char *buf, MPI_Fint *count, MPI_Fint *datatype, MPI_Fint *request, MPI_Fint *ierr), + (fh, offset, buf, count, datatype, request, ierr) ) +#else +#define ompi_file_iread_at_all_f pompi_file_iread_at_all_f +#endif +#endif + + +void ompi_file_iread_at_all_f(MPI_Fint *fh, MPI_Offset *offset, + char *buf, MPI_Fint *count, + MPI_Fint *datatype, MPI_Fint *request, MPI_Fint *ierr) +{ + int c_ierr; + MPI_File c_fh = PMPI_File_f2c(*fh); + MPI_Datatype c_type = PMPI_Type_f2c(*datatype); + MPI_Request c_request; + + c_ierr = PMPI_File_iread_at_all(c_fh, (MPI_Offset) *offset, + OMPI_F2C_BOTTOM(buf), + OMPI_FINT_2_INT(*count), + c_type, + &c_request); + if (NULL != ierr) *ierr = OMPI_INT_2_FINT(c_ierr); + + if (MPI_SUCCESS == c_ierr) { + *request = PMPI_Request_c2f(c_request); + } +} diff --git a/ompi/mpi/fortran/mpif-h/file_iread_at_f.c b/ompi/mpi/fortran/mpif-h/file_iread_at_f.c index ca18963ef33..239f7a3bacb 100644 --- a/ompi/mpi/fortran/mpif-h/file_iread_at_f.c +++ b/ompi/mpi/fortran/mpif-h/file_iread_at_f.c @@ -5,15 +5,17 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2011-2012 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -22,7 +24,8 @@ #include "ompi/mpi/fortran/mpif-h/bindings.h" #include "ompi/mpi/fortran/base/constants.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILE_LAYER +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak PMPI_FILE_IREAD_AT = ompi_file_iread_at_f #pragma weak pmpi_file_iread_at = ompi_file_iread_at_f #pragma weak pmpi_file_iread_at_ = ompi_file_iread_at_f @@ -30,7 +33,7 @@ #pragma weak PMPI_File_iread_at_f = ompi_file_iread_at_f #pragma weak PMPI_File_iread_at_f08 = ompi_file_iread_at_f -#elif OMPI_PROFILE_LAYER +#else OMPI_GENERATE_F77_BINDINGS (PMPI_FILE_IREAD_AT, pmpi_file_iread_at, pmpi_file_iread_at_, @@ -39,6 +42,7 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_FILE_IREAD_AT, (MPI_Fint *fh, MPI_Offset *offset, char *buf, MPI_Fint *count, MPI_Fint *datatype, MPI_Fint *request, MPI_Fint *ierr), (fh, offset, buf, count, datatype, request, ierr) ) #endif +#endif #if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_FILE_IREAD_AT = ompi_file_iread_at_f @@ -48,9 +52,8 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_FILE_IREAD_AT, #pragma weak MPI_File_iread_at_f = ompi_file_iread_at_f #pragma weak MPI_File_iread_at_f08 = ompi_file_iread_at_f -#endif - -#if ! OPAL_HAVE_WEAK_SYMBOLS && ! OMPI_PROFILE_LAYER +#else +#if ! OMPI_BUILD_MPI_PROFILING OMPI_GENERATE_F77_BINDINGS (MPI_FILE_IREAD_AT, mpi_file_iread_at, mpi_file_iread_at_, @@ -58,30 +61,29 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_FILE_IREAD_AT, ompi_file_iread_at_f, (MPI_Fint *fh, MPI_Offset *offset, char *buf, MPI_Fint *count, MPI_Fint *datatype, MPI_Fint *request, MPI_Fint *ierr), (fh, offset, buf, count, datatype, request, ierr) ) +#else +#define ompi_file_iread_at_f pompi_file_iread_at_f #endif - - -#if OMPI_PROFILE_LAYER && ! OPAL_HAVE_WEAK_SYMBOLS -#include "ompi/mpi/fortran/mpif-h/profile/defines.h" #endif + void ompi_file_iread_at_f(MPI_Fint *fh, MPI_Offset *offset, - char *buf, MPI_Fint *count, + char *buf, MPI_Fint *count, MPI_Fint *datatype, MPI_Fint *request, MPI_Fint *ierr) { int c_ierr; - MPI_File c_fh = MPI_File_f2c(*fh); - MPI_Datatype c_type = MPI_Type_f2c(*datatype); + MPI_File c_fh = PMPI_File_f2c(*fh); + MPI_Datatype c_type = PMPI_Type_f2c(*datatype); MPI_Request c_request; - c_ierr = MPI_File_iread_at(c_fh, (MPI_Offset) *offset, + c_ierr = PMPI_File_iread_at(c_fh, (MPI_Offset) *offset, OMPI_F2C_BOTTOM(buf), OMPI_FINT_2_INT(*count), - c_type, + c_type, &c_request); if (NULL != ierr) *ierr = OMPI_INT_2_FINT(c_ierr); if (MPI_SUCCESS == c_ierr) { - *request = MPI_Request_c2f(c_request); + *request = PMPI_Request_c2f(c_request); } } diff --git a/ompi/mpi/fortran/mpif-h/file_iread_f.c b/ompi/mpi/fortran/mpif-h/file_iread_f.c index ac632565116..620eb6890da 100644 --- a/ompi/mpi/fortran/mpif-h/file_iread_f.c +++ b/ompi/mpi/fortran/mpif-h/file_iread_f.c @@ -5,15 +5,17 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2011-2012 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -22,7 +24,8 @@ #include "ompi/mpi/fortran/mpif-h/bindings.h" #include "ompi/mpi/fortran/base/constants.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILE_LAYER +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak PMPI_FILE_IREAD = ompi_file_iread_f #pragma weak pmpi_file_iread = ompi_file_iread_f #pragma weak pmpi_file_iread_ = ompi_file_iread_f @@ -30,7 +33,7 @@ #pragma weak PMPI_File_iread_f = ompi_file_iread_f #pragma weak PMPI_File_iread_f08 = ompi_file_iread_f -#elif OMPI_PROFILE_LAYER +#else OMPI_GENERATE_F77_BINDINGS (PMPI_FILE_IREAD, pmpi_file_iread, pmpi_file_iread_, @@ -39,6 +42,7 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_FILE_IREAD, (MPI_Fint *fh, char *buf, MPI_Fint *count, MPI_Fint *datatype, MPI_Fint *request, MPI_Fint *ierr), (fh, buf, count, datatype, request, ierr) ) #endif +#endif #if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_FILE_IREAD = ompi_file_iread_f @@ -48,9 +52,8 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_FILE_IREAD, #pragma weak MPI_File_iread_f = ompi_file_iread_f #pragma weak MPI_File_iread_f08 = ompi_file_iread_f -#endif - -#if ! OPAL_HAVE_WEAK_SYMBOLS && ! OMPI_PROFILE_LAYER +#else +#if ! OMPI_BUILD_MPI_PROFILING OMPI_GENERATE_F77_BINDINGS (MPI_FILE_IREAD, mpi_file_iread, mpi_file_iread_, @@ -58,27 +61,26 @@ OMPI_GENERATE_F77_BINDINGS (MPI_FILE_IREAD, ompi_file_iread_f, (MPI_Fint *fh, char *buf, MPI_Fint *count, MPI_Fint *datatype, MPI_Fint *request, MPI_Fint *ierr), (fh, buf, count, datatype, request, ierr) ) +#else +#define ompi_file_iread_f pompi_file_iread_f #endif - - -#if OMPI_PROFILE_LAYER && ! OPAL_HAVE_WEAK_SYMBOLS -#include "ompi/mpi/fortran/mpif-h/profile/defines.h" #endif + void ompi_file_iread_f(MPI_Fint *fh, char *buf, MPI_Fint *count, MPI_Fint *datatype, MPI_Fint *request, MPI_Fint *ierr) { int c_ierr; - MPI_File c_fh = MPI_File_f2c(*fh); - MPI_Datatype c_type = MPI_Type_f2c(*datatype); + MPI_File c_fh = PMPI_File_f2c(*fh); + MPI_Datatype c_type = PMPI_Type_f2c(*datatype); MPI_Request c_request; - c_ierr = MPI_File_iread(c_fh, OMPI_F2C_BOTTOM(buf), + c_ierr = PMPI_File_iread(c_fh, OMPI_F2C_BOTTOM(buf), OMPI_FINT_2_INT(*count), c_type, &c_request); if (NULL != ierr) *ierr = OMPI_INT_2_FINT(c_ierr); - + if (MPI_SUCCESS == c_ierr) { - *request = MPI_Request_c2f(c_request); + *request = PMPI_Request_c2f(c_request); } } diff --git a/ompi/mpi/fortran/mpif-h/file_iread_shared_f.c b/ompi/mpi/fortran/mpif-h/file_iread_shared_f.c index 893b7c85c36..bfc105f625f 100644 --- a/ompi/mpi/fortran/mpif-h/file_iread_shared_f.c +++ b/ompi/mpi/fortran/mpif-h/file_iread_shared_f.c @@ -5,15 +5,17 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2011-2012 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -22,7 +24,8 @@ #include "ompi/mpi/fortran/mpif-h/bindings.h" #include "ompi/mpi/fortran/base/constants.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILE_LAYER +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak PMPI_FILE_IREAD_SHARED = ompi_file_iread_shared_f #pragma weak pmpi_file_iread_shared = ompi_file_iread_shared_f #pragma weak pmpi_file_iread_shared_ = ompi_file_iread_shared_f @@ -30,7 +33,7 @@ #pragma weak PMPI_File_iread_shared_f = ompi_file_iread_shared_f #pragma weak PMPI_File_iread_shared_f08 = ompi_file_iread_shared_f -#elif OMPI_PROFILE_LAYER +#else OMPI_GENERATE_F77_BINDINGS (PMPI_FILE_IREAD_SHARED, pmpi_file_iread_shared, pmpi_file_iread_shared_, @@ -39,6 +42,7 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_FILE_IREAD_SHARED, (MPI_Fint *fh, char *buf, MPI_Fint *count, MPI_Fint *datatype, MPI_Fint *request, MPI_Fint *ierr), (fh, buf, count, datatype, request, ierr) ) #endif +#endif #if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_FILE_IREAD_SHARED = ompi_file_iread_shared_f @@ -48,9 +52,8 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_FILE_IREAD_SHARED, #pragma weak MPI_File_iread_shared_f = ompi_file_iread_shared_f #pragma weak MPI_File_iread_shared_f08 = ompi_file_iread_shared_f -#endif - -#if ! OPAL_HAVE_WEAK_SYMBOLS && ! OMPI_PROFILE_LAYER +#else +#if ! OMPI_BUILD_MPI_PROFILING OMPI_GENERATE_F77_BINDINGS (MPI_FILE_IREAD_SHARED, mpi_file_iread_shared, mpi_file_iread_shared_, @@ -58,23 +61,22 @@ OMPI_GENERATE_F77_BINDINGS (MPI_FILE_IREAD_SHARED, ompi_file_iread_shared_f, (MPI_Fint *fh, char *buf, MPI_Fint *count, MPI_Fint *datatype, MPI_Fint *request, MPI_Fint *ierr), (fh, buf, count, datatype, request, ierr) ) +#else +#define ompi_file_iread_shared_f pompi_file_iread_shared_f #endif - - -#if OMPI_PROFILE_LAYER && ! OPAL_HAVE_WEAK_SYMBOLS -#include "ompi/mpi/fortran/mpif-h/profile/defines.h" #endif + void ompi_file_iread_shared_f(MPI_Fint *fh, char *buf, MPI_Fint *count, MPI_Fint *datatype, MPI_Fint *request, MPI_Fint *ierr) { int c_ierr; - MPI_File c_fh = MPI_File_f2c(*fh); - MPI_Datatype c_type = MPI_Type_f2c(*datatype); + MPI_File c_fh = PMPI_File_f2c(*fh); + MPI_Datatype c_type = PMPI_Type_f2c(*datatype); MPI_Request c_request; - - c_ierr = MPI_File_iread_shared(c_fh, + + c_ierr = PMPI_File_iread_shared(c_fh, OMPI_F2C_BOTTOM(buf), OMPI_FINT_2_INT(*count), c_type, @@ -82,6 +84,6 @@ void ompi_file_iread_shared_f(MPI_Fint *fh, char *buf, MPI_Fint *count, if (NULL != ierr) *ierr = OMPI_INT_2_FINT(c_ierr); if (MPI_SUCCESS == c_ierr) { - *request = MPI_Request_c2f(c_request); + *request = PMPI_Request_c2f(c_request); } } diff --git a/ompi/mpi/fortran/mpif-h/file_iwrite_all_f.c b/ompi/mpi/fortran/mpif-h/file_iwrite_all_f.c new file mode 100644 index 00000000000..43368f54167 --- /dev/null +++ b/ompi/mpi/fortran/mpif-h/file_iwrite_all_f.c @@ -0,0 +1,85 @@ +/* + * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2005 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2011-2012 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include "ompi_config.h" + +#include "ompi/mpi/fortran/mpif-h/bindings.h" +#include "ompi/mpi/fortran/base/constants.h" + +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS +#pragma weak PMPI_FILE_IWRITE_ALL = ompi_file_iwrite_all_f +#pragma weak pmpi_file_iwrite_all = ompi_file_iwrite_all_f +#pragma weak pmpi_file_iwrite_all_ = ompi_file_iwrite_all_f +#pragma weak pmpi_file_iwrite_all__ = ompi_file_iwrite_all_f + +#pragma weak PMPI_File_iwrite_all_f = ompi_file_iwrite_all_f +#pragma weak PMPI_File_iwrite_all_f08 = ompi_file_iwrite_all_f +#else +OMPI_GENERATE_F77_BINDINGS (PMPI_FILE_IWRITE_ALL, + pmpi_file_iwrite_all, + pmpi_file_iwrite_all_, + pmpi_file_iwrite_all__, + pompi_file_iwrite_all_f, + (MPI_Fint *fh, char *buf, MPI_Fint *count, MPI_Fint *datatype, MPI_Fint *request, MPI_Fint *ierr), + (fh, buf, count, datatype, request, ierr) ) +#endif +#endif + +#if OPAL_HAVE_WEAK_SYMBOLS +#pragma weak MPI_FILE_IWRITE_ALL = ompi_file_iwrite_all_f +#pragma weak mpi_file_iwrite_all = ompi_file_iwrite_all_f +#pragma weak mpi_file_iwrite_all_ = ompi_file_iwrite_all_f +#pragma weak mpi_file_iwrite_all__ = ompi_file_iwrite_all_f + +#pragma weak MPI_File_iwrite_all_f = ompi_file_iwrite_all_f +#pragma weak MPI_File_iwrite_all_f08 = ompi_file_iwrite_all_f +#else +#if ! OMPI_BUILD_MPI_PROFILING +OMPI_GENERATE_F77_BINDINGS (MPI_FILE_IWRITE_ALL, + mpi_file_iwrite_all, + mpi_file_iwrite_all_, + mpi_file_iwrite_all__, + ompi_file_iwrite_all_f, + (MPI_Fint *fh, char *buf, MPI_Fint *count, MPI_Fint *datatype, MPI_Fint *request, MPI_Fint *ierr), + (fh, buf, count, datatype, request, ierr) ) +#else +#define ompi_file_iwrite_all_f pompi_file_iwrite_all_f +#endif +#endif + + +void ompi_file_iwrite_all_f(MPI_Fint *fh, char *buf, MPI_Fint *count, MPI_Fint *datatype, MPI_Fint *request, MPI_Fint *ierr) +{ + int c_ierr; + MPI_File c_fh = PMPI_File_f2c(*fh); + MPI_Datatype c_type = PMPI_Type_f2c(*datatype); + MPI_Request c_request; + + c_ierr = PMPI_File_iwrite_all(c_fh, OMPI_F2C_BOTTOM(buf), + OMPI_FINT_2_INT(*count), + c_type, &c_request); + if (NULL != ierr) *ierr = OMPI_INT_2_FINT(c_ierr); + + if (MPI_SUCCESS == c_ierr) { + *request = PMPI_Request_c2f(c_request); + } +} diff --git a/ompi/mpi/fortran/mpif-h/file_iwrite_at_all_f.c b/ompi/mpi/fortran/mpif-h/file_iwrite_at_all_f.c new file mode 100644 index 00000000000..743a301af93 --- /dev/null +++ b/ompi/mpi/fortran/mpif-h/file_iwrite_at_all_f.c @@ -0,0 +1,88 @@ +/* + * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2005 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2011-2012 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include "ompi_config.h" + +#include "ompi/mpi/fortran/mpif-h/bindings.h" +#include "ompi/mpi/fortran/base/constants.h" + +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS +#pragma weak PMPI_FILE_IWRITE_AT_ALL = ompi_file_iwrite_at_all_f +#pragma weak pmpi_file_iwrite_at_all = ompi_file_iwrite_at_all_f +#pragma weak pmpi_file_iwrite_at_all_ = ompi_file_iwrite_at_all_f +#pragma weak pmpi_file_iwrite_at_all__ = ompi_file_iwrite_at_all_f + +#pragma weak PMPI_File_iwrite_at_all_f = ompi_file_iwrite_at_all_f +#pragma weak PMPI_File_iwrite_at_all_f08 = ompi_file_iwrite_at_all_f +#else +OMPI_GENERATE_F77_BINDINGS (PMPI_FILE_IWRITE_AT_ALL, + pmpi_file_iwrite_at_all, + pmpi_file_iwrite_at_all_, + pmpi_file_iwrite_at_all__, + pompi_file_iwrite_at_all_f, + (MPI_Fint *fh, MPI_Offset *offset, char *buf, MPI_Fint *count, MPI_Fint *datatype, MPI_Fint *request, MPI_Fint *ierr), + (fh, offset, buf, count, datatype, request, ierr) ) +#endif +#endif + +#if OPAL_HAVE_WEAK_SYMBOLS +#pragma weak MPI_FILE_IWRITE_AT_ALL = ompi_file_iwrite_at_all_f +#pragma weak mpi_file_iwrite_at_all = ompi_file_iwrite_at_all_f +#pragma weak mpi_file_iwrite_at_all_ = ompi_file_iwrite_at_all_f +#pragma weak mpi_file_iwrite_at_all__ = ompi_file_iwrite_at_all_f + +#pragma weak MPI_File_iwrite_at_all_f = ompi_file_iwrite_at_all_f +#pragma weak MPI_File_iwrite_at_all_f08 = ompi_file_iwrite_at_all_f +#else +#if ! OMPI_BUILD_MPI_PROFILING +OMPI_GENERATE_F77_BINDINGS (MPI_FILE_IWRITE_AT_ALL, + mpi_file_iwrite_at_all, + mpi_file_iwrite_at_all_, + mpi_file_iwrite_at_all__, + ompi_file_iwrite_at_all_f, + (MPI_Fint *fh, MPI_Offset *offset, char *buf, MPI_Fint *count, MPI_Fint *datatype, MPI_Fint *request, MPI_Fint *ierr), + (fh, offset, buf, count, datatype, request, ierr) ) +#else +#define ompi_file_iwrite_at_all_f pompi_file_iwrite_at_all_f +#endif +#endif + + +void ompi_file_iwrite_at_all_f(MPI_Fint *fh, MPI_Offset *offset, char *buf, + MPI_Fint *count, MPI_Fint *datatype, + MPI_Fint *request, MPI_Fint *ierr) +{ + int c_ierr; + MPI_File c_fh = PMPI_File_f2c(*fh); + MPI_Datatype c_type = PMPI_Type_f2c(*datatype); + MPI_Request c_request; + + c_ierr = PMPI_File_iwrite_at_all(c_fh, (MPI_Offset) *offset, + OMPI_F2C_BOTTOM(buf), + OMPI_FINT_2_INT(*count), + c_type, &c_request); + if (NULL != ierr) *ierr = OMPI_INT_2_FINT(c_ierr); + + if (MPI_SUCCESS == c_ierr) { + *request = PMPI_Request_c2f(c_request); + } +} diff --git a/ompi/mpi/fortran/mpif-h/file_iwrite_at_f.c b/ompi/mpi/fortran/mpif-h/file_iwrite_at_f.c index 8f879287b5f..faab1b45059 100644 --- a/ompi/mpi/fortran/mpif-h/file_iwrite_at_f.c +++ b/ompi/mpi/fortran/mpif-h/file_iwrite_at_f.c @@ -5,15 +5,17 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2011-2012 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -22,7 +24,8 @@ #include "ompi/mpi/fortran/mpif-h/bindings.h" #include "ompi/mpi/fortran/base/constants.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILE_LAYER +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak PMPI_FILE_IWRITE_AT = ompi_file_iwrite_at_f #pragma weak pmpi_file_iwrite_at = ompi_file_iwrite_at_f #pragma weak pmpi_file_iwrite_at_ = ompi_file_iwrite_at_f @@ -30,7 +33,7 @@ #pragma weak PMPI_File_iwrite_at_f = ompi_file_iwrite_at_f #pragma weak PMPI_File_iwrite_at_f08 = ompi_file_iwrite_at_f -#elif OMPI_PROFILE_LAYER +#else OMPI_GENERATE_F77_BINDINGS (PMPI_FILE_IWRITE_AT, pmpi_file_iwrite_at, pmpi_file_iwrite_at_, @@ -39,6 +42,7 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_FILE_IWRITE_AT, (MPI_Fint *fh, MPI_Offset *offset, char *buf, MPI_Fint *count, MPI_Fint *datatype, MPI_Fint *request, MPI_Fint *ierr), (fh, offset, buf, count, datatype, request, ierr) ) #endif +#endif #if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_FILE_IWRITE_AT = ompi_file_iwrite_at_f @@ -48,9 +52,8 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_FILE_IWRITE_AT, #pragma weak MPI_File_iwrite_at_f = ompi_file_iwrite_at_f #pragma weak MPI_File_iwrite_at_f08 = ompi_file_iwrite_at_f -#endif - -#if ! OPAL_HAVE_WEAK_SYMBOLS && ! OMPI_PROFILE_LAYER +#else +#if ! OMPI_BUILD_MPI_PROFILING OMPI_GENERATE_F77_BINDINGS (MPI_FILE_IWRITE_AT, mpi_file_iwrite_at, mpi_file_iwrite_at_, @@ -58,29 +61,28 @@ OMPI_GENERATE_F77_BINDINGS (MPI_FILE_IWRITE_AT, ompi_file_iwrite_at_f, (MPI_Fint *fh, MPI_Offset *offset, char *buf, MPI_Fint *count, MPI_Fint *datatype, MPI_Fint *request, MPI_Fint *ierr), (fh, offset, buf, count, datatype, request, ierr) ) +#else +#define ompi_file_iwrite_at_f pompi_file_iwrite_at_f #endif - - -#if OMPI_PROFILE_LAYER && ! OPAL_HAVE_WEAK_SYMBOLS -#include "ompi/mpi/fortran/mpif-h/profile/defines.h" #endif + void ompi_file_iwrite_at_f(MPI_Fint *fh, MPI_Offset *offset, char *buf, MPI_Fint *count, MPI_Fint *datatype, MPI_Fint *request, MPI_Fint *ierr) { int c_ierr; - MPI_File c_fh = MPI_File_f2c(*fh); - MPI_Datatype c_type = MPI_Type_f2c(*datatype); + MPI_File c_fh = PMPI_File_f2c(*fh); + MPI_Datatype c_type = PMPI_Type_f2c(*datatype); MPI_Request c_request; - - c_ierr = MPI_File_iwrite_at(c_fh, (MPI_Offset) *offset, + + c_ierr = PMPI_File_iwrite_at(c_fh, (MPI_Offset) *offset, OMPI_F2C_BOTTOM(buf), OMPI_FINT_2_INT(*count), c_type, &c_request); if (NULL != ierr) *ierr = OMPI_INT_2_FINT(c_ierr); if (MPI_SUCCESS == c_ierr) { - *request = MPI_Request_c2f(c_request); + *request = PMPI_Request_c2f(c_request); } } diff --git a/ompi/mpi/fortran/mpif-h/file_iwrite_f.c b/ompi/mpi/fortran/mpif-h/file_iwrite_f.c index e8ff2adf6fe..b4b87bc4f14 100644 --- a/ompi/mpi/fortran/mpif-h/file_iwrite_f.c +++ b/ompi/mpi/fortran/mpif-h/file_iwrite_f.c @@ -5,15 +5,17 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2011-2012 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -22,7 +24,8 @@ #include "ompi/mpi/fortran/mpif-h/bindings.h" #include "ompi/mpi/fortran/base/constants.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILE_LAYER +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak PMPI_FILE_IWRITE = ompi_file_iwrite_f #pragma weak pmpi_file_iwrite = ompi_file_iwrite_f #pragma weak pmpi_file_iwrite_ = ompi_file_iwrite_f @@ -30,7 +33,7 @@ #pragma weak PMPI_File_iwrite_f = ompi_file_iwrite_f #pragma weak PMPI_File_iwrite_f08 = ompi_file_iwrite_f -#elif OMPI_PROFILE_LAYER +#else OMPI_GENERATE_F77_BINDINGS (PMPI_FILE_IWRITE, pmpi_file_iwrite, pmpi_file_iwrite_, @@ -39,6 +42,7 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_FILE_IWRITE, (MPI_Fint *fh, char *buf, MPI_Fint *count, MPI_Fint *datatype, MPI_Fint *request, MPI_Fint *ierr), (fh, buf, count, datatype, request, ierr) ) #endif +#endif #if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_FILE_IWRITE = ompi_file_iwrite_f @@ -48,9 +52,8 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_FILE_IWRITE, #pragma weak MPI_File_iwrite_f = ompi_file_iwrite_f #pragma weak MPI_File_iwrite_f08 = ompi_file_iwrite_f -#endif - -#if ! OPAL_HAVE_WEAK_SYMBOLS && ! OMPI_PROFILE_LAYER +#else +#if ! OMPI_BUILD_MPI_PROFILING OMPI_GENERATE_F77_BINDINGS (MPI_FILE_IWRITE, mpi_file_iwrite, mpi_file_iwrite_, @@ -58,26 +61,25 @@ OMPI_GENERATE_F77_BINDINGS (MPI_FILE_IWRITE, ompi_file_iwrite_f, (MPI_Fint *fh, char *buf, MPI_Fint *count, MPI_Fint *datatype, MPI_Fint *request, MPI_Fint *ierr), (fh, buf, count, datatype, request, ierr) ) +#else +#define ompi_file_iwrite_f pompi_file_iwrite_f #endif - - -#if OMPI_PROFILE_LAYER && ! OPAL_HAVE_WEAK_SYMBOLS -#include "ompi/mpi/fortran/mpif-h/profile/defines.h" #endif + void ompi_file_iwrite_f(MPI_Fint *fh, char *buf, MPI_Fint *count, MPI_Fint *datatype, MPI_Fint *request, MPI_Fint *ierr) { int c_ierr; - MPI_File c_fh = MPI_File_f2c(*fh); - MPI_Datatype c_type = MPI_Type_f2c(*datatype); + MPI_File c_fh = PMPI_File_f2c(*fh); + MPI_Datatype c_type = PMPI_Type_f2c(*datatype); MPI_Request c_request; - - c_ierr = MPI_File_iwrite(c_fh, OMPI_F2C_BOTTOM(buf), + + c_ierr = PMPI_File_iwrite(c_fh, OMPI_F2C_BOTTOM(buf), OMPI_FINT_2_INT(*count), c_type, &c_request); if (NULL != ierr) *ierr = OMPI_INT_2_FINT(c_ierr); - + if (MPI_SUCCESS == c_ierr) { - *request = MPI_Request_c2f(c_request); + *request = PMPI_Request_c2f(c_request); } } diff --git a/ompi/mpi/fortran/mpif-h/file_iwrite_shared_f.c b/ompi/mpi/fortran/mpif-h/file_iwrite_shared_f.c index 064c37900fb..c0ffc386c72 100644 --- a/ompi/mpi/fortran/mpif-h/file_iwrite_shared_f.c +++ b/ompi/mpi/fortran/mpif-h/file_iwrite_shared_f.c @@ -5,15 +5,17 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2011-2012 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -22,7 +24,8 @@ #include "ompi/mpi/fortran/mpif-h/bindings.h" #include "ompi/mpi/fortran/base/constants.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILE_LAYER +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak PMPI_FILE_IWRITE_SHARED = ompi_file_iwrite_shared_f #pragma weak pmpi_file_iwrite_shared = ompi_file_iwrite_shared_f #pragma weak pmpi_file_iwrite_shared_ = ompi_file_iwrite_shared_f @@ -30,7 +33,7 @@ #pragma weak PMPI_File_iwrite_shared_f = ompi_file_iwrite_shared_f #pragma weak PMPI_File_iwrite_shared_f08 = ompi_file_iwrite_shared_f -#elif OMPI_PROFILE_LAYER +#else OMPI_GENERATE_F77_BINDINGS (PMPI_FILE_IWRITE_SHARED, pmpi_file_iwrite_shared, pmpi_file_iwrite_shared_, @@ -39,6 +42,7 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_FILE_IWRITE_SHARED, (MPI_Fint *fh, char *buf, MPI_Fint *count, MPI_Fint *datatype, MPI_Fint *request, MPI_Fint *ierr), (fh, buf, count, datatype, request, ierr) ) #endif +#endif #if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_FILE_IWRITE_SHARED = ompi_file_iwrite_shared_f @@ -48,9 +52,8 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_FILE_IWRITE_SHARED, #pragma weak MPI_File_iwrite_shared_f = ompi_file_iwrite_shared_f #pragma weak MPI_File_iwrite_shared_f08 = ompi_file_iwrite_shared_f -#endif - -#if ! OPAL_HAVE_WEAK_SYMBOLS && ! OMPI_PROFILE_LAYER +#else +#if ! OMPI_BUILD_MPI_PROFILING OMPI_GENERATE_F77_BINDINGS (MPI_FILE_IWRITE_SHARED, mpi_file_iwrite_shared, mpi_file_iwrite_shared_, @@ -58,23 +61,22 @@ OMPI_GENERATE_F77_BINDINGS (MPI_FILE_IWRITE_SHARED, ompi_file_iwrite_shared_f, (MPI_Fint *fh, char *buf, MPI_Fint *count, MPI_Fint *datatype, MPI_Fint *request, MPI_Fint *ierr), (fh, buf, count, datatype, request, ierr) ) +#else +#define ompi_file_iwrite_shared_f pompi_file_iwrite_shared_f #endif - - -#if OMPI_PROFILE_LAYER && ! OPAL_HAVE_WEAK_SYMBOLS -#include "ompi/mpi/fortran/mpif-h/profile/defines.h" #endif + void ompi_file_iwrite_shared_f(MPI_Fint *fh, char *buf, MPI_Fint *count, MPI_Fint *datatype, MPI_Fint *request, MPI_Fint *ierr) { int c_ierr; - MPI_File c_fh = MPI_File_f2c(*fh); - MPI_Datatype c_type = MPI_Type_f2c(*datatype); + MPI_File c_fh = PMPI_File_f2c(*fh); + MPI_Datatype c_type = PMPI_Type_f2c(*datatype); MPI_Request c_request; - - c_ierr = MPI_File_iwrite_shared(c_fh, + + c_ierr = PMPI_File_iwrite_shared(c_fh, OMPI_F2C_BOTTOM(buf), OMPI_FINT_2_INT(*count), c_type, @@ -82,6 +84,6 @@ void ompi_file_iwrite_shared_f(MPI_Fint *fh, char *buf, MPI_Fint *count, if (NULL != ierr) *ierr = OMPI_INT_2_FINT(c_ierr); if (MPI_SUCCESS == c_ierr) { - *request = MPI_Request_c2f(c_request); + *request = PMPI_Request_c2f(c_request); } } diff --git a/ompi/mpi/fortran/mpif-h/file_open_f.c b/ompi/mpi/fortran/mpif-h/file_open_f.c index 1336014d5ae..eb144c6238d 100644 --- a/ompi/mpi/fortran/mpif-h/file_open_f.c +++ b/ompi/mpi/fortran/mpif-h/file_open_f.c @@ -5,15 +5,17 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2011-2012 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -23,7 +25,8 @@ #include "ompi/mpi/fortran/base/strings.h" #include "ompi/file/file.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILE_LAYER +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak PMPI_FILE_OPEN = ompi_file_open_f #pragma weak pmpi_file_open = ompi_file_open_f #pragma weak pmpi_file_open_ = ompi_file_open_f @@ -31,7 +34,7 @@ #pragma weak PMPI_File_open_f = ompi_file_open_f #pragma weak PMPI_File_open_f08 = ompi_file_open_f -#elif OMPI_PROFILE_LAYER +#else OMPI_GENERATE_F77_BINDINGS (PMPI_FILE_OPEN, pmpi_file_open, pmpi_file_open_, @@ -40,6 +43,7 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_FILE_OPEN, (MPI_Fint *comm, char *filename, MPI_Fint *amode, MPI_Fint *info, MPI_Fint *fh, MPI_Fint *ierr, int name_len), (comm, filename, amode, info, fh, ierr, name_len) ) #endif +#endif #if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_FILE_OPEN = ompi_file_open_f @@ -49,9 +53,8 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_FILE_OPEN, #pragma weak MPI_File_open_f = ompi_file_open_f #pragma weak MPI_File_open_f08 = ompi_file_open_f -#endif - -#if ! OPAL_HAVE_WEAK_SYMBOLS && ! OMPI_PROFILE_LAYER +#else +#if ! OMPI_BUILD_MPI_PROFILING OMPI_GENERATE_F77_BINDINGS (MPI_FILE_OPEN, mpi_file_open, mpi_file_open_, @@ -59,18 +62,17 @@ OMPI_GENERATE_F77_BINDINGS (MPI_FILE_OPEN, ompi_file_open_f, (MPI_Fint *comm, char *filename, MPI_Fint *amode, MPI_Fint *info, MPI_Fint *fh, MPI_Fint *ierr, int name_len), (comm, filename, amode, info, fh, ierr, name_len) ) +#else +#define ompi_file_open_f pompi_file_open_f #endif - - -#if OMPI_PROFILE_LAYER && ! OPAL_HAVE_WEAK_SYMBOLS -#include "ompi/mpi/fortran/mpif-h/profile/defines.h" #endif + void ompi_file_open_f(MPI_Fint *comm, char *filename, MPI_Fint *amode, MPI_Fint *info, MPI_Fint *fh, MPI_Fint *ierr, int name_len) { - MPI_Comm c_comm = MPI_Comm_f2c(*comm); - MPI_Info c_info = MPI_Info_f2c(*info); + MPI_Comm c_comm = PMPI_Comm_f2c(*comm); + MPI_Info c_info = PMPI_Info_f2c(*info); MPI_File c_fh; char *c_filename; int c_ierr, ret; @@ -83,13 +85,13 @@ void ompi_file_open_f(MPI_Fint *comm, char *filename, MPI_Fint *amode, return; } - c_ierr = MPI_File_open(c_comm, c_filename, + c_ierr = PMPI_File_open(c_comm, c_filename, OMPI_FINT_2_INT(*amode), c_info, &c_fh); if (NULL != ierr) *ierr = OMPI_INT_2_FINT(c_ierr); if (MPI_SUCCESS == c_ierr) { - *fh = MPI_File_c2f(c_fh); + *fh = PMPI_File_c2f(c_fh); } free(c_filename); diff --git a/ompi/mpi/fortran/mpif-h/file_preallocate_f.c b/ompi/mpi/fortran/mpif-h/file_preallocate_f.c index 736f55dd4d7..1f0a596872e 100644 --- a/ompi/mpi/fortran/mpif-h/file_preallocate_f.c +++ b/ompi/mpi/fortran/mpif-h/file_preallocate_f.c @@ -5,15 +5,17 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2011-2012 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -21,7 +23,8 @@ #include "ompi/mpi/fortran/mpif-h/bindings.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILE_LAYER +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak PMPI_FILE_PREALLOCATE = ompi_file_preallocate_f #pragma weak pmpi_file_preallocate = ompi_file_preallocate_f #pragma weak pmpi_file_preallocate_ = ompi_file_preallocate_f @@ -29,7 +32,7 @@ #pragma weak PMPI_File_preallocate_f = ompi_file_preallocate_f #pragma weak PMPI_File_preallocate_f08 = ompi_file_preallocate_f -#elif OMPI_PROFILE_LAYER +#else OMPI_GENERATE_F77_BINDINGS (PMPI_FILE_PREALLOCATE, pmpi_file_preallocate, pmpi_file_preallocate_, @@ -38,6 +41,7 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_FILE_PREALLOCATE, (MPI_Fint *fh, MPI_Offset *size, MPI_Fint *ierr), (fh, size, ierr) ) #endif +#endif #if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_FILE_PREALLOCATE = ompi_file_preallocate_f @@ -47,9 +51,8 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_FILE_PREALLOCATE, #pragma weak MPI_File_preallocate_f = ompi_file_preallocate_f #pragma weak MPI_File_preallocate_f08 = ompi_file_preallocate_f -#endif - -#if ! OPAL_HAVE_WEAK_SYMBOLS && ! OMPI_PROFILE_LAYER +#else +#if ! OMPI_BUILD_MPI_PROFILING OMPI_GENERATE_F77_BINDINGS (MPI_FILE_PREALLOCATE, mpi_file_preallocate, mpi_file_preallocate_, @@ -57,18 +60,17 @@ OMPI_GENERATE_F77_BINDINGS (MPI_FILE_PREALLOCATE, ompi_file_preallocate_f, (MPI_Fint *fh, MPI_Offset *size, MPI_Fint *ierr), (fh, size, ierr) ) +#else +#define ompi_file_preallocate_f pompi_file_preallocate_f #endif - - -#if OMPI_PROFILE_LAYER && ! OPAL_HAVE_WEAK_SYMBOLS -#include "ompi/mpi/fortran/mpif-h/profile/defines.h" #endif + void ompi_file_preallocate_f(MPI_Fint *fh, MPI_Offset *size, MPI_Fint *ierr) { int c_ierr; - MPI_File c_fh = MPI_File_f2c(*fh); - - c_ierr = MPI_File_preallocate(c_fh, (MPI_Offset) *size); + MPI_File c_fh = PMPI_File_f2c(*fh); + + c_ierr = PMPI_File_preallocate(c_fh, (MPI_Offset) *size); if (NULL != ierr) *ierr = OMPI_INT_2_FINT(c_ierr); } diff --git a/ompi/mpi/fortran/mpif-h/file_read_all_begin_f.c b/ompi/mpi/fortran/mpif-h/file_read_all_begin_f.c index 762222f5cab..1b55844c3b8 100644 --- a/ompi/mpi/fortran/mpif-h/file_read_all_begin_f.c +++ b/ompi/mpi/fortran/mpif-h/file_read_all_begin_f.c @@ -5,15 +5,17 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2011-2012 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -22,7 +24,8 @@ #include "ompi/mpi/fortran/mpif-h/bindings.h" #include "ompi/mpi/fortran/base/constants.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILE_LAYER +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak PMPI_FILE_READ_ALL_BEGIN = ompi_file_read_all_begin_f #pragma weak pmpi_file_read_all_begin = ompi_file_read_all_begin_f #pragma weak pmpi_file_read_all_begin_ = ompi_file_read_all_begin_f @@ -30,7 +33,7 @@ #pragma weak PMPI_File_read_all_begin_f = ompi_file_read_all_begin_f #pragma weak PMPI_File_read_all_begin_f08 = ompi_file_read_all_begin_f -#elif OMPI_PROFILE_LAYER +#else OMPI_GENERATE_F77_BINDINGS (PMPI_FILE_READ_ALL_BEGIN, pmpi_file_read_all_begin, pmpi_file_read_all_begin_, @@ -39,6 +42,7 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_FILE_READ_ALL_BEGIN, (MPI_Fint *fh, char *buf, MPI_Fint *count, MPI_Fint *datatype, MPI_Fint *ierr), (fh, buf, count, datatype, ierr) ) #endif +#endif #if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_FILE_READ_ALL_BEGIN = ompi_file_read_all_begin_f @@ -48,9 +52,8 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_FILE_READ_ALL_BEGIN, #pragma weak MPI_File_read_all_begin_f = ompi_file_read_all_begin_f #pragma weak MPI_File_read_all_begin_f08 = ompi_file_read_all_begin_f -#endif - -#if ! OPAL_HAVE_WEAK_SYMBOLS && ! OMPI_PROFILE_LAYER +#else +#if ! OMPI_BUILD_MPI_PROFILING OMPI_GENERATE_F77_BINDINGS (MPI_FILE_READ_ALL_BEGIN, mpi_file_read_all_begin, mpi_file_read_all_begin_, @@ -58,22 +61,21 @@ OMPI_GENERATE_F77_BINDINGS (MPI_FILE_READ_ALL_BEGIN, ompi_file_read_all_begin_f, (MPI_Fint *fh, char *buf, MPI_Fint *count, MPI_Fint *datatype, MPI_Fint *ierr), (fh, buf, count, datatype, ierr) ) +#else +#define ompi_file_read_all_begin_f pompi_file_read_all_begin_f #endif - - -#if OMPI_PROFILE_LAYER && ! OPAL_HAVE_WEAK_SYMBOLS -#include "ompi/mpi/fortran/mpif-h/profile/defines.h" #endif + void ompi_file_read_all_begin_f(MPI_Fint *fh, char *buf, MPI_Fint *count, MPI_Fint *datatype, MPI_Fint *ierr) { int c_ierr; - MPI_File c_fh = MPI_File_f2c(*fh); - MPI_Datatype c_type = MPI_Type_f2c(*datatype); + MPI_File c_fh = PMPI_File_f2c(*fh); + MPI_Datatype c_type = PMPI_Type_f2c(*datatype); - c_ierr = MPI_File_read_all_begin(c_fh, OMPI_F2C_BOTTOM(buf), + c_ierr = PMPI_File_read_all_begin(c_fh, OMPI_F2C_BOTTOM(buf), OMPI_FINT_2_INT(*count), c_type); if (NULL != ierr) *ierr = OMPI_INT_2_FINT(c_ierr); diff --git a/ompi/mpi/fortran/mpif-h/file_read_all_end_f.c b/ompi/mpi/fortran/mpif-h/file_read_all_end_f.c index 75b784d851b..1e9881c8858 100644 --- a/ompi/mpi/fortran/mpif-h/file_read_all_end_f.c +++ b/ompi/mpi/fortran/mpif-h/file_read_all_end_f.c @@ -5,16 +5,18 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2011-2012 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2012 Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -24,7 +26,8 @@ #include "ompi/mpi/fortran/mpif-h/status-conversion.h" #include "ompi/mpi/fortran/base/constants.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILE_LAYER +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak PMPI_FILE_READ_ALL_END = ompi_file_read_all_end_f #pragma weak pmpi_file_read_all_end = ompi_file_read_all_end_f #pragma weak pmpi_file_read_all_end_ = ompi_file_read_all_end_f @@ -32,7 +35,7 @@ #pragma weak PMPI_File_read_all_end_f = ompi_file_read_all_end_f #pragma weak PMPI_File_read_all_end_f08 = ompi_file_read_all_end_f -#elif OMPI_PROFILE_LAYER +#else OMPI_GENERATE_F77_BINDINGS (PMPI_FILE_READ_ALL_END, pmpi_file_read_all_end, pmpi_file_read_all_end_, @@ -41,6 +44,7 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_FILE_READ_ALL_END, (MPI_Fint *fh, char *buf, MPI_Fint *status, MPI_Fint *ierr), (fh, buf, status, ierr) ) #endif +#endif #if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_FILE_READ_ALL_END = ompi_file_read_all_end_f @@ -50,9 +54,8 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_FILE_READ_ALL_END, #pragma weak MPI_File_read_all_end_f = ompi_file_read_all_end_f #pragma weak MPI_File_read_all_end_f08 = ompi_file_read_all_end_f -#endif - -#if ! OPAL_HAVE_WEAK_SYMBOLS && ! OMPI_PROFILE_LAYER +#else +#if ! OMPI_BUILD_MPI_PROFILING OMPI_GENERATE_F77_BINDINGS (MPI_FILE_READ_ALL_END, mpi_file_read_all_end, mpi_file_read_all_end_, @@ -60,23 +63,22 @@ OMPI_GENERATE_F77_BINDINGS (MPI_FILE_READ_ALL_END, ompi_file_read_all_end_f, (MPI_Fint *fh, char *buf, MPI_Fint *status, MPI_Fint *ierr), (fh, buf, status, ierr) ) +#else +#define ompi_file_read_all_end_f pompi_file_read_all_end_f #endif - - -#if OMPI_PROFILE_LAYER && ! OPAL_HAVE_WEAK_SYMBOLS -#include "ompi/mpi/fortran/mpif-h/profile/defines.h" #endif + void ompi_file_read_all_end_f(MPI_Fint *fh, char *buf, MPI_Fint *status, MPI_Fint *ierr) { int c_ierr; OMPI_FORTRAN_STATUS_DECLARATION(c_status,c_status2) - MPI_File c_fh = MPI_File_f2c(*fh); + MPI_File c_fh = PMPI_File_f2c(*fh); OMPI_FORTRAN_STATUS_SET_POINTER(c_status,c_status2,status) - - c_ierr = MPI_File_read_all_end(c_fh, buf, c_status); + + c_ierr = PMPI_File_read_all_end(c_fh, buf, c_status); if (NULL != ierr) *ierr = OMPI_INT_2_FINT(c_ierr); OMPI_FORTRAN_STATUS_RETURN(c_status,c_status2,status,c_ierr) diff --git a/ompi/mpi/fortran/mpif-h/file_read_all_f.c b/ompi/mpi/fortran/mpif-h/file_read_all_f.c index c919b001d2b..b49a7e26bde 100644 --- a/ompi/mpi/fortran/mpif-h/file_read_all_f.c +++ b/ompi/mpi/fortran/mpif-h/file_read_all_f.c @@ -5,16 +5,18 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2011-2012 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2012 Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -24,7 +26,8 @@ #include "ompi/mpi/fortran/mpif-h/status-conversion.h" #include "ompi/mpi/fortran/base/constants.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILE_LAYER +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak PMPI_FILE_READ_ALL = ompi_file_read_all_f #pragma weak pmpi_file_read_all = ompi_file_read_all_f #pragma weak pmpi_file_read_all_ = ompi_file_read_all_f @@ -32,7 +35,7 @@ #pragma weak PMPI_File_read_all_f = ompi_file_read_all_f #pragma weak PMPI_File_read_all_f08 = ompi_file_read_all_f -#elif OMPI_PROFILE_LAYER +#else OMPI_GENERATE_F77_BINDINGS (PMPI_FILE_READ_ALL, pmpi_file_read_all, pmpi_file_read_all_, @@ -41,6 +44,7 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_FILE_READ_ALL, (MPI_Fint *fh, char *buf, MPI_Fint *count, MPI_Fint *datatype, MPI_Fint *status, MPI_Fint *ierr), (fh, buf, count, datatype, status, ierr) ) #endif +#endif #if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_FILE_READ_ALL = ompi_file_read_all_f @@ -50,9 +54,8 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_FILE_READ_ALL, #pragma weak MPI_File_read_all_f = ompi_file_read_all_f #pragma weak MPI_File_read_all_f08 = ompi_file_read_all_f -#endif - -#if ! OPAL_HAVE_WEAK_SYMBOLS && ! OMPI_PROFILE_LAYER +#else +#if ! OMPI_BUILD_MPI_PROFILING OMPI_GENERATE_F77_BINDINGS (MPI_FILE_READ_ALL, mpi_file_read_all, mpi_file_read_all_, @@ -60,27 +63,26 @@ OMPI_GENERATE_F77_BINDINGS (MPI_FILE_READ_ALL, ompi_file_read_all_f, (MPI_Fint *fh, char *buf, MPI_Fint *count, MPI_Fint *datatype, MPI_Fint *status, MPI_Fint *ierr), (fh, buf, count, datatype, status, ierr) ) +#else +#define ompi_file_read_all_f pompi_file_read_all_f #endif - - -#if OMPI_PROFILE_LAYER && ! OPAL_HAVE_WEAK_SYMBOLS -#include "ompi/mpi/fortran/mpif-h/profile/defines.h" #endif + void ompi_file_read_all_f(MPI_Fint *fh, char *buf, MPI_Fint *count, MPI_Fint *datatype, MPI_Fint *status, MPI_Fint *ierr) { int c_ierr; - MPI_File c_fh = MPI_File_f2c(*fh); - MPI_Datatype c_type = MPI_Type_f2c(*datatype); + MPI_File c_fh = PMPI_File_f2c(*fh); + MPI_Datatype c_type = PMPI_Type_f2c(*datatype); OMPI_FORTRAN_STATUS_DECLARATION(c_status,c_status2) OMPI_FORTRAN_STATUS_SET_POINTER(c_status,c_status2,status) - - c_ierr = MPI_File_read_all(c_fh, OMPI_F2C_BOTTOM(buf), + + c_ierr = PMPI_File_read_all(c_fh, OMPI_F2C_BOTTOM(buf), OMPI_FINT_2_INT(*count), c_type, c_status); if (NULL != ierr) *ierr = OMPI_INT_2_FINT(c_ierr); - + OMPI_FORTRAN_STATUS_RETURN(c_status,c_status2,status,c_ierr) } diff --git a/ompi/mpi/fortran/mpif-h/file_read_at_all_begin_f.c b/ompi/mpi/fortran/mpif-h/file_read_at_all_begin_f.c index 0e209f94040..1078d57de35 100644 --- a/ompi/mpi/fortran/mpif-h/file_read_at_all_begin_f.c +++ b/ompi/mpi/fortran/mpif-h/file_read_at_all_begin_f.c @@ -5,15 +5,17 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2011-2012 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -22,7 +24,8 @@ #include "ompi/mpi/fortran/mpif-h/bindings.h" #include "ompi/mpi/fortran/base/constants.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILE_LAYER +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak PMPI_FILE_READ_AT_ALL_BEGIN = ompi_file_read_at_all_begin_f #pragma weak pmpi_file_read_at_all_begin = ompi_file_read_at_all_begin_f #pragma weak pmpi_file_read_at_all_begin_ = ompi_file_read_at_all_begin_f @@ -30,7 +33,7 @@ #pragma weak PMPI_File_read_at_all_begin_f = ompi_file_read_at_all_begin_f #pragma weak PMPI_File_read_at_all_begin_f08 = ompi_file_read_at_all_begin_f -#elif OMPI_PROFILE_LAYER +#else OMPI_GENERATE_F77_BINDINGS (PMPI_FILE_READ_AT_ALL_BEGIN, pmpi_file_read_at_all_begin, pmpi_file_read_at_all_begin_, @@ -39,6 +42,7 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_FILE_READ_AT_ALL_BEGIN, (MPI_Fint *fh, MPI_Offset *offset, char *buf, MPI_Fint *count, MPI_Fint *datatype, MPI_Fint *ierr), (fh, offset, buf, count, datatype, ierr) ) #endif +#endif #if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_FILE_READ_AT_ALL_BEGIN = ompi_file_read_at_all_begin_f @@ -48,9 +52,8 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_FILE_READ_AT_ALL_BEGIN, #pragma weak MPI_File_read_at_all_begin_f = ompi_file_read_at_all_begin_f #pragma weak MPI_File_read_at_all_begin_f08 = ompi_file_read_at_all_begin_f -#endif - -#if ! OPAL_HAVE_WEAK_SYMBOLS && ! OMPI_PROFILE_LAYER +#else +#if ! OMPI_BUILD_MPI_PROFILING OMPI_GENERATE_F77_BINDINGS (MPI_FILE_READ_AT_ALL_BEGIN, mpi_file_read_at_all_begin, mpi_file_read_at_all_begin_, @@ -58,22 +61,21 @@ OMPI_GENERATE_F77_BINDINGS (MPI_FILE_READ_AT_ALL_BEGIN, ompi_file_read_at_all_begin_f, (MPI_Fint *fh, MPI_Offset *offset, char *buf, MPI_Fint *count, MPI_Fint *datatype, MPI_Fint *ierr), (fh, offset, buf, count, datatype, ierr) ) +#else +#define ompi_file_read_at_all_begin_f pompi_file_read_at_all_begin_f #endif - - -#if OMPI_PROFILE_LAYER && ! OPAL_HAVE_WEAK_SYMBOLS -#include "ompi/mpi/fortran/mpif-h/profile/defines.h" #endif + void ompi_file_read_at_all_begin_f(MPI_Fint *fh, MPI_Offset *offset, char *buf, MPI_Fint *count, MPI_Fint *datatype, MPI_Fint *ierr) { int c_ierr; - MPI_File c_fh = MPI_File_f2c(*fh); - MPI_Datatype c_type = MPI_Type_f2c(*datatype); + MPI_File c_fh = PMPI_File_f2c(*fh); + MPI_Datatype c_type = PMPI_Type_f2c(*datatype); - c_ierr = MPI_File_read_at_all_begin(c_fh, + c_ierr = PMPI_File_read_at_all_begin(c_fh, (MPI_Offset) *offset, OMPI_F2C_BOTTOM(buf), OMPI_FINT_2_INT(*count), diff --git a/ompi/mpi/fortran/mpif-h/file_read_at_all_end_f.c b/ompi/mpi/fortran/mpif-h/file_read_at_all_end_f.c index 862cc56c9fa..3d30c635fc3 100644 --- a/ompi/mpi/fortran/mpif-h/file_read_at_all_end_f.c +++ b/ompi/mpi/fortran/mpif-h/file_read_at_all_end_f.c @@ -5,16 +5,18 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2011-2012 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2012 Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -24,7 +26,8 @@ #include "ompi/mpi/fortran/mpif-h/status-conversion.h" #include "ompi/mpi/fortran/base/constants.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILE_LAYER +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak PMPI_FILE_READ_AT_ALL_END = ompi_file_read_at_all_end_f #pragma weak pmpi_file_read_at_all_end = ompi_file_read_at_all_end_f #pragma weak pmpi_file_read_at_all_end_ = ompi_file_read_at_all_end_f @@ -32,7 +35,7 @@ #pragma weak PMPI_File_read_at_all_end_f = ompi_file_read_at_all_end_f #pragma weak PMPI_File_read_at_all_end_f08 = ompi_file_read_at_all_end_f -#elif OMPI_PROFILE_LAYER +#else OMPI_GENERATE_F77_BINDINGS (PMPI_FILE_READ_AT_ALL_END, pmpi_file_read_at_all_end, pmpi_file_read_at_all_end_, @@ -41,6 +44,7 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_FILE_READ_AT_ALL_END, (MPI_Fint *fh, char *buf, MPI_Fint *status, MPI_Fint *ierr), (fh, buf, status, ierr) ) #endif +#endif #if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_FILE_READ_AT_ALL_END = ompi_file_read_at_all_end_f @@ -50,9 +54,8 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_FILE_READ_AT_ALL_END, #pragma weak MPI_File_read_at_all_end_f = ompi_file_read_at_all_end_f #pragma weak MPI_File_read_at_all_end_f08 = ompi_file_read_at_all_end_f -#endif - -#if ! OPAL_HAVE_WEAK_SYMBOLS && ! OMPI_PROFILE_LAYER +#else +#if ! OMPI_BUILD_MPI_PROFILING OMPI_GENERATE_F77_BINDINGS (MPI_FILE_READ_AT_ALL_END, mpi_file_read_at_all_end, mpi_file_read_at_all_end_, @@ -60,23 +63,22 @@ OMPI_GENERATE_F77_BINDINGS (MPI_FILE_READ_AT_ALL_END, ompi_file_read_at_all_end_f, (MPI_Fint *fh, char *buf, MPI_Fint *status, MPI_Fint *ierr), (fh, buf, status, ierr) ) +#else +#define ompi_file_read_at_all_end_f pompi_file_read_at_all_end_f #endif - - -#if OMPI_PROFILE_LAYER && ! OPAL_HAVE_WEAK_SYMBOLS -#include "ompi/mpi/fortran/mpif-h/profile/defines.h" #endif -void ompi_file_read_at_all_end_f(MPI_Fint *fh, char *buf, + +void ompi_file_read_at_all_end_f(MPI_Fint *fh, char *buf, MPI_Fint *status, MPI_Fint *ierr) { int c_ierr; - MPI_File c_fh = MPI_File_f2c(*fh); + MPI_File c_fh = PMPI_File_f2c(*fh); OMPI_FORTRAN_STATUS_DECLARATION(c_status,c_status2) OMPI_FORTRAN_STATUS_SET_POINTER(c_status,c_status2,status) - c_ierr = MPI_File_read_at_all_end(c_fh, buf, c_status); + c_ierr = PMPI_File_read_at_all_end(c_fh, buf, c_status); if (NULL != ierr) *ierr = OMPI_INT_2_FINT(c_ierr); OMPI_FORTRAN_STATUS_RETURN(c_status,c_status2,status,c_ierr) diff --git a/ompi/mpi/fortran/mpif-h/file_read_at_all_f.c b/ompi/mpi/fortran/mpif-h/file_read_at_all_f.c index cc062ee958a..2ac4c021a66 100644 --- a/ompi/mpi/fortran/mpif-h/file_read_at_all_f.c +++ b/ompi/mpi/fortran/mpif-h/file_read_at_all_f.c @@ -5,16 +5,18 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2006-2012 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2012 Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -24,7 +26,8 @@ #include "ompi/mpi/fortran/mpif-h/status-conversion.h" #include "ompi/mpi/fortran/base/constants.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILE_LAYER +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak PMPI_FILE_READ_AT_ALL = ompi_file_read_at_all_f #pragma weak pmpi_file_read_at_all = ompi_file_read_at_all_f #pragma weak pmpi_file_read_at_all_ = ompi_file_read_at_all_f @@ -32,7 +35,7 @@ #pragma weak PMPI_File_read_at_all_f = ompi_file_read_at_all_f #pragma weak PMPI_File_read_at_all_f08 = ompi_file_read_at_all_f -#elif OMPI_PROFILE_LAYER +#else OMPI_GENERATE_F77_BINDINGS (PMPI_FILE_READ_AT_ALL, pmpi_file_read_at_all, pmpi_file_read_at_all_, @@ -41,6 +44,7 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_FILE_READ_AT_ALL, (MPI_Fint *fh, MPI_Offset *offset, char *buf, MPI_Fint *count, MPI_Fint *datatype, MPI_Fint *status, MPI_Fint *ierr), (fh, offset, buf, count, datatype, status, ierr) ) #endif +#endif #if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_FILE_READ_AT_ALL = ompi_file_read_at_all_f @@ -50,9 +54,8 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_FILE_READ_AT_ALL, #pragma weak MPI_File_read_at_all_f = ompi_file_read_at_all_f #pragma weak MPI_File_read_at_all_f08 = ompi_file_read_at_all_f -#endif - -#if ! OPAL_HAVE_WEAK_SYMBOLS && ! OMPI_PROFILE_LAYER +#else +#if ! OMPI_BUILD_MPI_PROFILING OMPI_GENERATE_F77_BINDINGS (MPI_FILE_READ_AT_ALL, mpi_file_read_at_all, mpi_file_read_at_all_, @@ -60,26 +63,25 @@ OMPI_GENERATE_F77_BINDINGS (MPI_FILE_READ_AT_ALL, ompi_file_read_at_all_f, (MPI_Fint *fh, MPI_Offset *offset, char *buf, MPI_Fint *count, MPI_Fint *datatype, MPI_Fint *status, MPI_Fint *ierr), (fh, offset, buf, count, datatype, status, ierr) ) +#else +#define ompi_file_read_at_all_f pompi_file_read_at_all_f #endif - - -#if OMPI_PROFILE_LAYER && ! OPAL_HAVE_WEAK_SYMBOLS -#include "ompi/mpi/fortran/mpif-h/profile/defines.h" #endif + void ompi_file_read_at_all_f(MPI_Fint *fh, MPI_Offset *offset, - char *buf, MPI_Fint *count, + char *buf, MPI_Fint *count, MPI_Fint *datatype, MPI_Fint *status, MPI_Fint *ierr) { int c_ierr; - MPI_File c_fh = MPI_File_f2c(*fh); - MPI_Datatype c_type = MPI_Type_f2c(*datatype); + MPI_File c_fh = PMPI_File_f2c(*fh); + MPI_Datatype c_type = PMPI_Type_f2c(*datatype); OMPI_FORTRAN_STATUS_DECLARATION(c_status,c_status2) OMPI_FORTRAN_STATUS_SET_POINTER(c_status,c_status2,status) - c_ierr = MPI_File_read_at_all(c_fh, + c_ierr = PMPI_File_read_at_all(c_fh, (MPI_Offset) *offset, OMPI_F2C_BOTTOM(buf), OMPI_FINT_2_INT(*count), diff --git a/ompi/mpi/fortran/mpif-h/file_read_at_f.c b/ompi/mpi/fortran/mpif-h/file_read_at_f.c index c29251f83eb..b48594503be 100644 --- a/ompi/mpi/fortran/mpif-h/file_read_at_f.c +++ b/ompi/mpi/fortran/mpif-h/file_read_at_f.c @@ -5,16 +5,18 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2006-2012 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2012 Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -24,7 +26,8 @@ #include "ompi/mpi/fortran/mpif-h/status-conversion.h" #include "ompi/mpi/fortran/base/constants.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILE_LAYER +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak PMPI_FILE_READ_AT = ompi_file_read_at_f #pragma weak pmpi_file_read_at = ompi_file_read_at_f #pragma weak pmpi_file_read_at_ = ompi_file_read_at_f @@ -32,7 +35,7 @@ #pragma weak PMPI_File_read_at_f = ompi_file_read_at_f #pragma weak PMPI_File_read_at_f08 = ompi_file_read_at_f -#elif OMPI_PROFILE_LAYER +#else OMPI_GENERATE_F77_BINDINGS (PMPI_FILE_READ_AT, pmpi_file_read_at, pmpi_file_read_at_, @@ -41,6 +44,7 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_FILE_READ_AT, (MPI_Fint *fh, MPI_Offset *offset, char *buf, MPI_Fint *count, MPI_Fint *datatype, MPI_Fint *status, MPI_Fint *ierr), (fh, offset, buf, count, datatype, status, ierr) ) #endif +#endif #if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_FILE_READ_AT = ompi_file_read_at_f @@ -50,9 +54,8 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_FILE_READ_AT, #pragma weak MPI_File_read_at_f = ompi_file_read_at_f #pragma weak MPI_File_read_at_f08 = ompi_file_read_at_f -#endif - -#if ! OPAL_HAVE_WEAK_SYMBOLS && ! OMPI_PROFILE_LAYER +#else +#if ! OMPI_BUILD_MPI_PROFILING OMPI_GENERATE_F77_BINDINGS (MPI_FILE_READ_AT, mpi_file_read_at, mpi_file_read_at_, @@ -60,27 +63,26 @@ OMPI_GENERATE_F77_BINDINGS (MPI_FILE_READ_AT, ompi_file_read_at_f, (MPI_Fint *fh, MPI_Offset *offset, char *buf, MPI_Fint *count, MPI_Fint *datatype, MPI_Fint *status, MPI_Fint *ierr), (fh, offset, buf, count, datatype, status, ierr) ) +#else +#define ompi_file_read_at_f pompi_file_read_at_f #endif - - -#if OMPI_PROFILE_LAYER && ! OPAL_HAVE_WEAK_SYMBOLS -#include "ompi/mpi/fortran/mpif-h/profile/defines.h" #endif -void ompi_file_read_at_f(MPI_Fint *fh, MPI_Offset *offset, char *buf, + +void ompi_file_read_at_f(MPI_Fint *fh, MPI_Offset *offset, char *buf, MPI_Fint *count, MPI_Fint *datatype, MPI_Fint *status, MPI_Fint *ierr) { int c_ierr; - MPI_File c_fh = MPI_File_f2c(*fh); - MPI_Datatype c_type = MPI_Type_f2c(*datatype); + MPI_File c_fh = PMPI_File_f2c(*fh); + MPI_Datatype c_type = PMPI_Type_f2c(*datatype); OMPI_FORTRAN_STATUS_DECLARATION(c_status,c_status2) OMPI_FORTRAN_STATUS_SET_POINTER(c_status,c_status2,status) - c_ierr = MPI_File_read_at(c_fh, + c_ierr = PMPI_File_read_at(c_fh, (MPI_Offset) *offset, - buf, + buf, OMPI_FINT_2_INT(*count), c_type, c_status); if (NULL != ierr) *ierr = OMPI_INT_2_FINT(c_ierr); diff --git a/ompi/mpi/fortran/mpif-h/file_read_f.c b/ompi/mpi/fortran/mpif-h/file_read_f.c index ed518693d79..97895289be6 100644 --- a/ompi/mpi/fortran/mpif-h/file_read_f.c +++ b/ompi/mpi/fortran/mpif-h/file_read_f.c @@ -5,16 +5,18 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2006-2012 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2012 Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -24,7 +26,8 @@ #include "ompi/mpi/fortran/mpif-h/status-conversion.h" #include "ompi/mpi/fortran/base/constants.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILE_LAYER +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak PMPI_FILE_READ = ompi_file_read_f #pragma weak pmpi_file_read = ompi_file_read_f #pragma weak pmpi_file_read_ = ompi_file_read_f @@ -32,7 +35,7 @@ #pragma weak PMPI_File_read_f = ompi_file_read_f #pragma weak PMPI_File_read_f08 = ompi_file_read_f -#elif OMPI_PROFILE_LAYER +#else OMPI_GENERATE_F77_BINDINGS (PMPI_FILE_READ, pmpi_file_read, pmpi_file_read_, @@ -41,6 +44,7 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_FILE_READ, (MPI_Fint *fh, char *buf, MPI_Fint *count, MPI_Fint *datatype, MPI_Fint *status, MPI_Fint *ierr), (fh, buf, count, datatype, status, ierr) ) #endif +#endif #if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_FILE_READ = ompi_file_read_f @@ -50,9 +54,8 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_FILE_READ, #pragma weak MPI_File_read_f = ompi_file_read_f #pragma weak MPI_File_read_f08 = ompi_file_read_f -#endif - -#if ! OPAL_HAVE_WEAK_SYMBOLS && ! OMPI_PROFILE_LAYER +#else +#if ! OMPI_BUILD_MPI_PROFILING OMPI_GENERATE_F77_BINDINGS (MPI_FILE_READ, mpi_file_read, mpi_file_read_, @@ -60,24 +63,23 @@ OMPI_GENERATE_F77_BINDINGS (MPI_FILE_READ, ompi_file_read_f, (MPI_Fint *fh, char *buf, MPI_Fint *count, MPI_Fint *datatype, MPI_Fint *status, MPI_Fint *ierr), (fh, buf, count, datatype, status, ierr) ) +#else +#define ompi_file_read_f pompi_file_read_f #endif - - -#if OMPI_PROFILE_LAYER && ! OPAL_HAVE_WEAK_SYMBOLS -#include "ompi/mpi/fortran/mpif-h/profile/defines.h" #endif + void ompi_file_read_f(MPI_Fint *fh, char *buf, MPI_Fint *count, MPI_Fint *datatype, MPI_Fint *status, MPI_Fint *ierr) { int c_ierr; - MPI_File c_fh = MPI_File_f2c(*fh); - MPI_Datatype c_type = MPI_Type_f2c(*datatype); + MPI_File c_fh = PMPI_File_f2c(*fh); + MPI_Datatype c_type = PMPI_Type_f2c(*datatype); OMPI_FORTRAN_STATUS_DECLARATION(c_status,c_status2) OMPI_FORTRAN_STATUS_SET_POINTER(c_status,c_status2,status) - - c_ierr = MPI_File_read(c_fh, OMPI_F2C_BOTTOM(buf), + + c_ierr = PMPI_File_read(c_fh, OMPI_F2C_BOTTOM(buf), OMPI_FINT_2_INT(*count), c_type, c_status); if (NULL != ierr) *ierr = OMPI_INT_2_FINT(c_ierr); diff --git a/ompi/mpi/fortran/mpif-h/file_read_ordered_begin_f.c b/ompi/mpi/fortran/mpif-h/file_read_ordered_begin_f.c index 5b73e9cd339..4c294f93980 100644 --- a/ompi/mpi/fortran/mpif-h/file_read_ordered_begin_f.c +++ b/ompi/mpi/fortran/mpif-h/file_read_ordered_begin_f.c @@ -5,15 +5,17 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2011-2012 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -22,7 +24,8 @@ #include "ompi/mpi/fortran/mpif-h/bindings.h" #include "ompi/mpi/fortran/base/constants.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILE_LAYER +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak PMPI_FILE_READ_ORDERED_BEGIN = ompi_file_read_ordered_begin_f #pragma weak pmpi_file_read_ordered_begin = ompi_file_read_ordered_begin_f #pragma weak pmpi_file_read_ordered_begin_ = ompi_file_read_ordered_begin_f @@ -30,7 +33,7 @@ #pragma weak PMPI_File_read_ordered_begin_f = ompi_file_read_ordered_begin_f #pragma weak PMPI_File_read_ordered_begin_f08 = ompi_file_read_ordered_begin_f -#elif OMPI_PROFILE_LAYER +#else OMPI_GENERATE_F77_BINDINGS (PMPI_FILE_READ_ORDERED_BEGIN, pmpi_file_read_ordered_begin, pmpi_file_read_ordered_begin_, @@ -39,6 +42,7 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_FILE_READ_ORDERED_BEGIN, (MPI_Fint *fh, char *buf, MPI_Fint *count, MPI_Fint *datatype, MPI_Fint *ierr), (fh, buf, count, datatype, ierr) ) #endif +#endif #if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_FILE_READ_ORDERED_BEGIN = ompi_file_read_ordered_begin_f @@ -48,9 +52,8 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_FILE_READ_ORDERED_BEGIN, #pragma weak MPI_File_read_ordered_begin_f = ompi_file_read_ordered_begin_f #pragma weak MPI_File_read_ordered_begin_f08 = ompi_file_read_ordered_begin_f -#endif - -#if ! OPAL_HAVE_WEAK_SYMBOLS && ! OMPI_PROFILE_LAYER +#else +#if ! OMPI_BUILD_MPI_PROFILING OMPI_GENERATE_F77_BINDINGS (MPI_FILE_READ_ORDERED_BEGIN, mpi_file_read_ordered_begin, mpi_file_read_ordered_begin_, @@ -58,21 +61,20 @@ OMPI_GENERATE_F77_BINDINGS (MPI_FILE_READ_ORDERED_BEGIN, ompi_file_read_ordered_begin_f, (MPI_Fint *fh, char *buf, MPI_Fint *count, MPI_Fint *datatype, MPI_Fint *ierr), (fh, buf, count, datatype, ierr) ) +#else +#define ompi_file_read_ordered_begin_f pompi_file_read_ordered_begin_f #endif - - -#if OMPI_PROFILE_LAYER && ! OPAL_HAVE_WEAK_SYMBOLS -#include "ompi/mpi/fortran/mpif-h/profile/defines.h" #endif -void ompi_file_read_ordered_begin_f(MPI_Fint *fh, char *buf, MPI_Fint *count, + +void ompi_file_read_ordered_begin_f(MPI_Fint *fh, char *buf, MPI_Fint *count, MPI_Fint *datatype, MPI_Fint *ierr) { int c_ierr; - MPI_File c_fh = MPI_File_f2c(*fh); - MPI_Datatype c_type = MPI_Type_f2c(*datatype); + MPI_File c_fh = PMPI_File_f2c(*fh); + MPI_Datatype c_type = PMPI_Type_f2c(*datatype); - c_ierr = MPI_File_read_ordered_begin(c_fh, OMPI_F2C_BOTTOM(buf), + c_ierr = PMPI_File_read_ordered_begin(c_fh, OMPI_F2C_BOTTOM(buf), OMPI_FINT_2_INT(*count), c_type); if (NULL != ierr) *ierr = OMPI_INT_2_FINT(c_ierr); diff --git a/ompi/mpi/fortran/mpif-h/file_read_ordered_end_f.c b/ompi/mpi/fortran/mpif-h/file_read_ordered_end_f.c index 5f545afd46c..811d6df8b44 100644 --- a/ompi/mpi/fortran/mpif-h/file_read_ordered_end_f.c +++ b/ompi/mpi/fortran/mpif-h/file_read_ordered_end_f.c @@ -5,16 +5,18 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2006-2012 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2012 Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -24,7 +26,8 @@ #include "ompi/mpi/fortran/mpif-h/status-conversion.h" #include "ompi/mpi/fortran/base/constants.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILE_LAYER +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak PMPI_FILE_READ_ORDERED_END = ompi_file_read_ordered_end_f #pragma weak pmpi_file_read_ordered_end = ompi_file_read_ordered_end_f #pragma weak pmpi_file_read_ordered_end_ = ompi_file_read_ordered_end_f @@ -32,7 +35,7 @@ #pragma weak PMPI_File_read_ordered_end_f = ompi_file_read_ordered_end_f #pragma weak PMPI_File_read_ordered_end_f08 = ompi_file_read_ordered_end_f -#elif OMPI_PROFILE_LAYER +#else OMPI_GENERATE_F77_BINDINGS (PMPI_FILE_READ_ORDERED_END, pmpi_file_read_ordered_end, pmpi_file_read_ordered_end_, @@ -41,6 +44,7 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_FILE_READ_ORDERED_END, (MPI_Fint *fh, char *buf, MPI_Fint *status, MPI_Fint *ierr), (fh, buf, status, ierr) ) #endif +#endif #if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_FILE_READ_ORDERED_END = ompi_file_read_ordered_end_f @@ -50,9 +54,8 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_FILE_READ_ORDERED_END, #pragma weak MPI_File_read_ordered_end_f = ompi_file_read_ordered_end_f #pragma weak MPI_File_read_ordered_end_f08 = ompi_file_read_ordered_end_f -#endif - -#if ! OPAL_HAVE_WEAK_SYMBOLS && ! OMPI_PROFILE_LAYER +#else +#if ! OMPI_BUILD_MPI_PROFILING OMPI_GENERATE_F77_BINDINGS (MPI_FILE_READ_ORDERED_END, mpi_file_read_ordered_end, mpi_file_read_ordered_end_, @@ -60,23 +63,22 @@ OMPI_GENERATE_F77_BINDINGS (MPI_FILE_READ_ORDERED_END, ompi_file_read_ordered_end_f, (MPI_Fint *fh, char *buf, MPI_Fint *status, MPI_Fint *ierr), (fh, buf, status, ierr) ) +#else +#define ompi_file_read_ordered_end_f pompi_file_read_ordered_end_f #endif - - -#if OMPI_PROFILE_LAYER && ! OPAL_HAVE_WEAK_SYMBOLS -#include "ompi/mpi/fortran/mpif-h/profile/defines.h" #endif + void ompi_file_read_ordered_end_f(MPI_Fint *fh, char *buf, MPI_Fint *status, MPI_Fint *ierr) { int c_ierr; - MPI_File c_fh = MPI_File_f2c(*fh); + MPI_File c_fh = PMPI_File_f2c(*fh); OMPI_FORTRAN_STATUS_DECLARATION(c_status,c_status2) OMPI_FORTRAN_STATUS_SET_POINTER(c_status,c_status2,status) - c_ierr = MPI_File_read_ordered_end(c_fh, buf, c_status); + c_ierr = PMPI_File_read_ordered_end(c_fh, buf, c_status); if (NULL != ierr) *ierr = OMPI_INT_2_FINT(c_ierr); OMPI_FORTRAN_STATUS_RETURN(c_status,c_status2,status,c_ierr) diff --git a/ompi/mpi/fortran/mpif-h/file_read_ordered_f.c b/ompi/mpi/fortran/mpif-h/file_read_ordered_f.c index 24e5e151036..452380fe293 100644 --- a/ompi/mpi/fortran/mpif-h/file_read_ordered_f.c +++ b/ompi/mpi/fortran/mpif-h/file_read_ordered_f.c @@ -5,16 +5,18 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2011-2012 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2012 Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -24,7 +26,8 @@ #include "ompi/mpi/fortran/mpif-h/status-conversion.h" #include "ompi/mpi/fortran/base/constants.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILE_LAYER +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak PMPI_FILE_READ_ORDERED = ompi_file_read_ordered_f #pragma weak pmpi_file_read_ordered = ompi_file_read_ordered_f #pragma weak pmpi_file_read_ordered_ = ompi_file_read_ordered_f @@ -32,7 +35,7 @@ #pragma weak PMPI_File_read_ordered_f = ompi_file_read_ordered_f #pragma weak PMPI_File_read_ordered_f08 = ompi_file_read_ordered_f -#elif OMPI_PROFILE_LAYER +#else OMPI_GENERATE_F77_BINDINGS (PMPI_FILE_READ_ORDERED, pmpi_file_read_ordered, pmpi_file_read_ordered_, @@ -41,6 +44,7 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_FILE_READ_ORDERED, (MPI_Fint *fh, char *buf, MPI_Fint *count, MPI_Fint *datatype, MPI_Fint *status, MPI_Fint *ierr), (fh, buf, count, datatype, status, ierr) ) #endif +#endif #if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_FILE_READ_ORDERED = ompi_file_read_ordered_f @@ -50,9 +54,8 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_FILE_READ_ORDERED, #pragma weak MPI_File_read_ordered_f = ompi_file_read_ordered_f #pragma weak MPI_File_read_ordered_f08 = ompi_file_read_ordered_f -#endif - -#if ! OPAL_HAVE_WEAK_SYMBOLS && ! OMPI_PROFILE_LAYER +#else +#if ! OMPI_BUILD_MPI_PROFILING OMPI_GENERATE_F77_BINDINGS (MPI_FILE_READ_ORDERED, mpi_file_read_ordered, mpi_file_read_ordered_, @@ -60,25 +63,24 @@ OMPI_GENERATE_F77_BINDINGS (MPI_FILE_READ_ORDERED, ompi_file_read_ordered_f, (MPI_Fint *fh, char *buf, MPI_Fint *count, MPI_Fint *datatype, MPI_Fint *status, MPI_Fint *ierr), (fh, buf, count, datatype, status, ierr) ) +#else +#define ompi_file_read_ordered_f pompi_file_read_ordered_f #endif - - -#if OMPI_PROFILE_LAYER && ! OPAL_HAVE_WEAK_SYMBOLS -#include "ompi/mpi/fortran/mpif-h/profile/defines.h" #endif + void ompi_file_read_ordered_f(MPI_Fint *fh, char *buf, MPI_Fint *count, - MPI_Fint *datatype, MPI_Fint *status, + MPI_Fint *datatype, MPI_Fint *status, MPI_Fint *ierr) { int c_ierr; - MPI_File c_fh = MPI_File_f2c(*fh); - MPI_Datatype c_type = MPI_Type_f2c(*datatype); + MPI_File c_fh = PMPI_File_f2c(*fh); + MPI_Datatype c_type = PMPI_Type_f2c(*datatype); OMPI_FORTRAN_STATUS_DECLARATION(c_status,c_status2) OMPI_FORTRAN_STATUS_SET_POINTER(c_status,c_status2,status) - c_ierr = MPI_File_read_ordered(c_fh, + c_ierr = PMPI_File_read_ordered(c_fh, OMPI_F2C_BOTTOM(buf), OMPI_FINT_2_INT(*count), c_type, diff --git a/ompi/mpi/fortran/mpif-h/file_read_shared_f.c b/ompi/mpi/fortran/mpif-h/file_read_shared_f.c index cd4ce14752a..6d945ca60fd 100644 --- a/ompi/mpi/fortran/mpif-h/file_read_shared_f.c +++ b/ompi/mpi/fortran/mpif-h/file_read_shared_f.c @@ -5,16 +5,18 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2011-2012 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2012 Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -24,7 +26,8 @@ #include "ompi/mpi/fortran/mpif-h/status-conversion.h" #include "ompi/mpi/fortran/base/constants.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILE_LAYER +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak PMPI_FILE_READ_SHARED = ompi_file_read_shared_f #pragma weak pmpi_file_read_shared = ompi_file_read_shared_f #pragma weak pmpi_file_read_shared_ = ompi_file_read_shared_f @@ -32,7 +35,7 @@ #pragma weak PMPI_File_read_shared_f = ompi_file_read_shared_f #pragma weak PMPI_File_read_shared_f08 = ompi_file_read_shared_f -#elif OMPI_PROFILE_LAYER +#else OMPI_GENERATE_F77_BINDINGS (PMPI_FILE_READ_SHARED, pmpi_file_read_shared, pmpi_file_read_shared_, @@ -41,6 +44,7 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_FILE_READ_SHARED, (MPI_Fint *fh, char *buf, MPI_Fint *count, MPI_Fint *datatype, MPI_Fint *status, MPI_Fint *ierr), (fh, buf, count, datatype, status, ierr) ) #endif +#endif #if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_FILE_READ_SHARED = ompi_file_read_shared_f @@ -50,9 +54,8 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_FILE_READ_SHARED, #pragma weak MPI_File_read_shared_f = ompi_file_read_shared_f #pragma weak MPI_File_read_shared_f08 = ompi_file_read_shared_f -#endif - -#if ! OPAL_HAVE_WEAK_SYMBOLS && ! OMPI_PROFILE_LAYER +#else +#if ! OMPI_BUILD_MPI_PROFILING OMPI_GENERATE_F77_BINDINGS (MPI_FILE_READ_SHARED, mpi_file_read_shared, mpi_file_read_shared_, @@ -60,26 +63,25 @@ OMPI_GENERATE_F77_BINDINGS (MPI_FILE_READ_SHARED, ompi_file_read_shared_f, (MPI_Fint *fh, char *buf, MPI_Fint *count, MPI_Fint *datatype, MPI_Fint *status, MPI_Fint *ierr), (fh, buf, count, datatype, status, ierr) ) +#else +#define ompi_file_read_shared_f pompi_file_read_shared_f #endif - - -#if OMPI_PROFILE_LAYER && ! OPAL_HAVE_WEAK_SYMBOLS -#include "ompi/mpi/fortran/mpif-h/profile/defines.h" #endif + void ompi_file_read_shared_f(MPI_Fint *fh, char *buf, MPI_Fint *count, - MPI_Fint *datatype, MPI_Fint *status, + MPI_Fint *datatype, MPI_Fint *status, MPI_Fint *ierr) { int c_ierr; - MPI_File c_fh = MPI_File_f2c(*fh); - MPI_Datatype c_type = MPI_Type_f2c(*datatype); + MPI_File c_fh = PMPI_File_f2c(*fh); + MPI_Datatype c_type = PMPI_Type_f2c(*datatype); OMPI_FORTRAN_STATUS_DECLARATION(c_status,c_status2) OMPI_FORTRAN_STATUS_SET_POINTER(c_status,c_status2,status) - c_ierr = MPI_File_read_shared(c_fh, - OMPI_F2C_BOTTOM(buf), + c_ierr = PMPI_File_read_shared(c_fh, + OMPI_F2C_BOTTOM(buf), OMPI_FINT_2_INT(*count), c_type, c_status); diff --git a/ompi/mpi/fortran/mpif-h/file_seek_f.c b/ompi/mpi/fortran/mpif-h/file_seek_f.c index 4948d83a721..02bfe39c90a 100644 --- a/ompi/mpi/fortran/mpif-h/file_seek_f.c +++ b/ompi/mpi/fortran/mpif-h/file_seek_f.c @@ -5,15 +5,17 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2011-2012 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -21,7 +23,8 @@ #include "ompi/mpi/fortran/mpif-h/bindings.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILE_LAYER +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak PMPI_FILE_SEEK = ompi_file_seek_f #pragma weak pmpi_file_seek = ompi_file_seek_f #pragma weak pmpi_file_seek_ = ompi_file_seek_f @@ -29,7 +32,7 @@ #pragma weak PMPI_File_seek_f = ompi_file_seek_f #pragma weak PMPI_File_seek_f08 = ompi_file_seek_f -#elif OMPI_PROFILE_LAYER +#else OMPI_GENERATE_F77_BINDINGS (PMPI_FILE_SEEK, pmpi_file_seek, pmpi_file_seek_, @@ -38,6 +41,7 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_FILE_SEEK, (MPI_Fint *fh, MPI_Offset *offset, MPI_Fint *whence, MPI_Fint *ierr), (fh, offset, whence, ierr) ) #endif +#endif #if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_FILE_SEEK = ompi_file_seek_f @@ -47,9 +51,8 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_FILE_SEEK, #pragma weak MPI_File_seek_f = ompi_file_seek_f #pragma weak MPI_File_seek_f08 = ompi_file_seek_f -#endif - -#if ! OPAL_HAVE_WEAK_SYMBOLS && ! OMPI_PROFILE_LAYER +#else +#if ! OMPI_BUILD_MPI_PROFILING OMPI_GENERATE_F77_BINDINGS (MPI_FILE_SEEK, mpi_file_seek, mpi_file_seek_, @@ -57,20 +60,19 @@ OMPI_GENERATE_F77_BINDINGS (MPI_FILE_SEEK, ompi_file_seek_f, (MPI_Fint *fh, MPI_Offset *offset, MPI_Fint *whence, MPI_Fint *ierr), (fh, offset, whence, ierr) ) +#else +#define ompi_file_seek_f pompi_file_seek_f #endif - - -#if OMPI_PROFILE_LAYER && ! OPAL_HAVE_WEAK_SYMBOLS -#include "ompi/mpi/fortran/mpif-h/profile/defines.h" #endif -void ompi_file_seek_f(MPI_Fint *fh, MPI_Offset *offset, + +void ompi_file_seek_f(MPI_Fint *fh, MPI_Offset *offset, MPI_Fint *whence, MPI_Fint *ierr) { int c_ierr; - MPI_File c_fh = MPI_File_f2c(*fh); - - c_ierr = MPI_File_seek(c_fh, (MPI_Offset) *offset, + MPI_File c_fh = PMPI_File_f2c(*fh); + + c_ierr = PMPI_File_seek(c_fh, (MPI_Offset) *offset, OMPI_FINT_2_INT(*whence)); if (NULL != ierr) *ierr = OMPI_INT_2_FINT(c_ierr); } diff --git a/ompi/mpi/fortran/mpif-h/file_seek_shared_f.c b/ompi/mpi/fortran/mpif-h/file_seek_shared_f.c index b0596429296..9189ec6c371 100644 --- a/ompi/mpi/fortran/mpif-h/file_seek_shared_f.c +++ b/ompi/mpi/fortran/mpif-h/file_seek_shared_f.c @@ -5,15 +5,17 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2011-2012 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -21,7 +23,8 @@ #include "ompi/mpi/fortran/mpif-h/bindings.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILE_LAYER +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak PMPI_FILE_SEEK_SHARED = ompi_file_seek_shared_f #pragma weak pmpi_file_seek_shared = ompi_file_seek_shared_f #pragma weak pmpi_file_seek_shared_ = ompi_file_seek_shared_f @@ -29,7 +32,7 @@ #pragma weak PMPI_File_seek_shared_f = ompi_file_seek_shared_f #pragma weak PMPI_File_seek_shared_f08 = ompi_file_seek_shared_f -#elif OMPI_PROFILE_LAYER +#else OMPI_GENERATE_F77_BINDINGS (PMPI_FILE_SEEK_SHARED, pmpi_file_seek_shared, pmpi_file_seek_shared_, @@ -38,6 +41,7 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_FILE_SEEK_SHARED, (MPI_Fint *fh, MPI_Offset *offset, MPI_Fint *whence, MPI_Fint *ierr), (fh, offset, whence, ierr) ) #endif +#endif #if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_FILE_SEEK_SHARED = ompi_file_seek_shared_f @@ -47,9 +51,8 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_FILE_SEEK_SHARED, #pragma weak MPI_File_seek_shared_f = ompi_file_seek_shared_f #pragma weak MPI_File_seek_shared_f08 = ompi_file_seek_shared_f -#endif - -#if ! OPAL_HAVE_WEAK_SYMBOLS && ! OMPI_PROFILE_LAYER +#else +#if ! OMPI_BUILD_MPI_PROFILING OMPI_GENERATE_F77_BINDINGS (MPI_FILE_SEEK_SHARED, mpi_file_seek_shared, mpi_file_seek_shared_, @@ -57,20 +60,19 @@ OMPI_GENERATE_F77_BINDINGS (MPI_FILE_SEEK_SHARED, ompi_file_seek_shared_f, (MPI_Fint *fh, MPI_Offset *offset, MPI_Fint *whence, MPI_Fint *ierr), (fh, offset, whence, ierr) ) +#else +#define ompi_file_seek_shared_f pompi_file_seek_shared_f #endif - - -#if OMPI_PROFILE_LAYER && ! OPAL_HAVE_WEAK_SYMBOLS -#include "ompi/mpi/fortran/mpif-h/profile/defines.h" #endif -void ompi_file_seek_shared_f(MPI_Fint *fh, MPI_Offset *offset, + +void ompi_file_seek_shared_f(MPI_Fint *fh, MPI_Offset *offset, MPI_Fint *whence, MPI_Fint *ierr) -{ +{ int c_ierr; - MPI_File c_fh = MPI_File_f2c(*fh); + MPI_File c_fh = PMPI_File_f2c(*fh); - c_ierr = MPI_File_seek_shared(c_fh, (MPI_Offset) *offset, + c_ierr = PMPI_File_seek_shared(c_fh, (MPI_Offset) *offset, OMPI_FINT_2_INT(*whence)); if (NULL != ierr) *ierr = OMPI_INT_2_FINT(c_ierr); } diff --git a/ompi/mpi/fortran/mpif-h/file_set_atomicity_f.c b/ompi/mpi/fortran/mpif-h/file_set_atomicity_f.c index a9da00a46fb..0f4549ee112 100644 --- a/ompi/mpi/fortran/mpif-h/file_set_atomicity_f.c +++ b/ompi/mpi/fortran/mpif-h/file_set_atomicity_f.c @@ -5,15 +5,17 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2011-2012 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -21,7 +23,8 @@ #include "ompi/mpi/fortran/mpif-h/bindings.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILE_LAYER +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak PMPI_FILE_SET_ATOMICITY = ompi_file_set_atomicity_f #pragma weak pmpi_file_set_atomicity = ompi_file_set_atomicity_f #pragma weak pmpi_file_set_atomicity_ = ompi_file_set_atomicity_f @@ -29,7 +32,7 @@ #pragma weak PMPI_File_set_atomicity_f = ompi_file_set_atomicity_f #pragma weak PMPI_File_set_atomicity_f08 = ompi_file_set_atomicity_f -#elif OMPI_PROFILE_LAYER +#else OMPI_GENERATE_F77_BINDINGS (PMPI_FILE_SET_ATOMICITY, pmpi_file_set_atomicity, pmpi_file_set_atomicity_, @@ -38,6 +41,7 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_FILE_SET_ATOMICITY, (MPI_Fint *fh, ompi_fortran_logical_t *flag, MPI_Fint *ierr), (fh, flag, ierr) ) #endif +#endif #if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_FILE_SET_ATOMICITY = ompi_file_set_atomicity_f @@ -47,9 +51,8 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_FILE_SET_ATOMICITY, #pragma weak MPI_File_set_atomicity_f = ompi_file_set_atomicity_f #pragma weak MPI_File_set_atomicity_f08 = ompi_file_set_atomicity_f -#endif - -#if ! OPAL_HAVE_WEAK_SYMBOLS && ! OMPI_PROFILE_LAYER +#else +#if ! OMPI_BUILD_MPI_PROFILING OMPI_GENERATE_F77_BINDINGS (MPI_FILE_SET_ATOMICITY, mpi_file_set_atomicity, mpi_file_set_atomicity_, @@ -57,19 +60,18 @@ OMPI_GENERATE_F77_BINDINGS (MPI_FILE_SET_ATOMICITY, ompi_file_set_atomicity_f, (MPI_Fint *fh, ompi_fortran_logical_t *flag, MPI_Fint *ierr), (fh, flag, ierr) ) +#else +#define ompi_file_set_atomicity_f pompi_file_set_atomicity_f #endif - - -#if OMPI_PROFILE_LAYER && ! OPAL_HAVE_WEAK_SYMBOLS -#include "ompi/mpi/fortran/mpif-h/profile/defines.h" #endif + void ompi_file_set_atomicity_f(MPI_Fint *fh, ompi_fortran_logical_t *flag, MPI_Fint *ierr) { int c_ierr; - MPI_File c_fh = MPI_File_f2c(*fh); + MPI_File c_fh = PMPI_File_f2c(*fh); - c_ierr = MPI_File_set_atomicity(c_fh, + c_ierr = PMPI_File_set_atomicity(c_fh, OMPI_LOGICAL_2_INT(*flag)); if (NULL != ierr) *ierr = OMPI_INT_2_FINT(c_ierr); } diff --git a/ompi/mpi/fortran/mpif-h/file_set_errhandler_f.c b/ompi/mpi/fortran/mpif-h/file_set_errhandler_f.c index 0e730189887..769378cd1f1 100644 --- a/ompi/mpi/fortran/mpif-h/file_set_errhandler_f.c +++ b/ompi/mpi/fortran/mpif-h/file_set_errhandler_f.c @@ -5,15 +5,17 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2008-2012 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -22,7 +24,8 @@ #include "ompi/mpi/fortran/mpif-h/bindings.h" #include "ompi/errhandler/errhandler.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILE_LAYER +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak PMPI_FILE_SET_ERRHANDLER = ompi_file_set_errhandler_f #pragma weak pmpi_file_set_errhandler = ompi_file_set_errhandler_f #pragma weak pmpi_file_set_errhandler_ = ompi_file_set_errhandler_f @@ -30,7 +33,7 @@ #pragma weak PMPI_File_set_errhandler_f = ompi_file_set_errhandler_f #pragma weak PMPI_File_set_errhandler_f08 = ompi_file_set_errhandler_f -#elif OMPI_PROFILE_LAYER +#else OMPI_GENERATE_F77_BINDINGS (PMPI_FILE_SET_ERRHANDLER, pmpi_file_set_errhandler, pmpi_file_set_errhandler_, @@ -39,6 +42,7 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_FILE_SET_ERRHANDLER, (MPI_Fint *file, MPI_Fint *errhandler, MPI_Fint *ierr), (file, errhandler, ierr) ) #endif +#endif #if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_FILE_SET_ERRHANDLER = ompi_file_set_errhandler_f @@ -48,9 +52,8 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_FILE_SET_ERRHANDLER, #pragma weak MPI_File_set_errhandler_f = ompi_file_set_errhandler_f #pragma weak MPI_File_set_errhandler_f08 = ompi_file_set_errhandler_f -#endif - -#if ! OPAL_HAVE_WEAK_SYMBOLS && ! OMPI_PROFILE_LAYER +#else +#if ! OMPI_BUILD_MPI_PROFILING OMPI_GENERATE_F77_BINDINGS (MPI_FILE_SET_ERRHANDLER, mpi_file_set_errhandler, mpi_file_set_errhandler_, @@ -58,20 +61,19 @@ OMPI_GENERATE_F77_BINDINGS (MPI_FILE_SET_ERRHANDLER, ompi_file_set_errhandler_f, (MPI_Fint *file, MPI_Fint *errhandler, MPI_Fint *ierr), (file, errhandler, ierr) ) +#else +#define ompi_file_set_errhandler_f pompi_file_set_errhandler_f #endif - - -#if OMPI_PROFILE_LAYER && ! OPAL_HAVE_WEAK_SYMBOLS -#include "ompi/mpi/fortran/mpif-h/profile/defines.h" #endif + void ompi_file_set_errhandler_f(MPI_Fint *fh, MPI_Fint *errhandler, MPI_Fint *ierr) { int c_ierr; - MPI_File c_fh = MPI_File_f2c(*fh); - MPI_Errhandler c_err = MPI_Errhandler_f2c(*errhandler); + MPI_File c_fh = PMPI_File_f2c(*fh); + MPI_Errhandler c_err = PMPI_Errhandler_f2c(*errhandler); - c_ierr = MPI_File_set_errhandler(c_fh, c_err); + c_ierr = PMPI_File_set_errhandler(c_fh, c_err); if (NULL != ierr) *ierr = OMPI_INT_2_FINT(c_ierr); } diff --git a/ompi/mpi/fortran/mpif-h/file_set_info_f.c b/ompi/mpi/fortran/mpif-h/file_set_info_f.c index 611fa7ab92f..5fb03686e89 100644 --- a/ompi/mpi/fortran/mpif-h/file_set_info_f.c +++ b/ompi/mpi/fortran/mpif-h/file_set_info_f.c @@ -5,15 +5,17 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2011-2012 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -21,7 +23,8 @@ #include "ompi/mpi/fortran/mpif-h/bindings.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILE_LAYER +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak PMPI_FILE_SET_INFO = ompi_file_set_info_f #pragma weak pmpi_file_set_info = ompi_file_set_info_f #pragma weak pmpi_file_set_info_ = ompi_file_set_info_f @@ -29,7 +32,7 @@ #pragma weak PMPI_File_set_info_f = ompi_file_set_info_f #pragma weak PMPI_File_set_info_f08 = ompi_file_set_info_f -#elif OMPI_PROFILE_LAYER +#else OMPI_GENERATE_F77_BINDINGS (PMPI_FILE_SET_INFO, pmpi_file_set_info, pmpi_file_set_info_, @@ -38,6 +41,7 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_FILE_SET_INFO, (MPI_Fint *fh, MPI_Fint *info, MPI_Fint *ierr), (fh, info, ierr) ) #endif +#endif #if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_FILE_SET_INFO = ompi_file_set_info_f @@ -47,9 +51,8 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_FILE_SET_INFO, #pragma weak MPI_File_set_info_f = ompi_file_set_info_f #pragma weak MPI_File_set_info_f08 = ompi_file_set_info_f -#endif - -#if ! OPAL_HAVE_WEAK_SYMBOLS && ! OMPI_PROFILE_LAYER +#else +#if ! OMPI_BUILD_MPI_PROFILING OMPI_GENERATE_F77_BINDINGS (MPI_FILE_SET_INFO, mpi_file_set_info, mpi_file_set_info_, @@ -57,19 +60,18 @@ OMPI_GENERATE_F77_BINDINGS (MPI_FILE_SET_INFO, ompi_file_set_info_f, (MPI_Fint *fh, MPI_Fint *info, MPI_Fint *ierr), (fh, info, ierr) ) +#else +#define ompi_file_set_info_f pompi_file_set_info_f #endif - - -#if OMPI_PROFILE_LAYER && ! OPAL_HAVE_WEAK_SYMBOLS -#include "ompi/mpi/fortran/mpif-h/profile/defines.h" #endif + void ompi_file_set_info_f(MPI_Fint *fh, MPI_Fint *info, MPI_Fint *ierr) { int c_ierr; - MPI_File c_fh = MPI_File_f2c(*fh); - MPI_Info c_info = MPI_Info_f2c(*info); + MPI_File c_fh = PMPI_File_f2c(*fh); + MPI_Info c_info = PMPI_Info_f2c(*info); - c_ierr = MPI_File_set_info(c_fh, c_info); + c_ierr = PMPI_File_set_info(c_fh, c_info); if (NULL != ierr) *ierr = OMPI_INT_2_FINT(c_ierr); } diff --git a/ompi/mpi/fortran/mpif-h/file_set_size_f.c b/ompi/mpi/fortran/mpif-h/file_set_size_f.c index a4e8b97881b..cdadd67d724 100644 --- a/ompi/mpi/fortran/mpif-h/file_set_size_f.c +++ b/ompi/mpi/fortran/mpif-h/file_set_size_f.c @@ -5,15 +5,17 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2011-2012 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -21,7 +23,8 @@ #include "ompi/mpi/fortran/mpif-h/bindings.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILE_LAYER +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak PMPI_FILE_SET_SIZE = ompi_file_set_size_f #pragma weak pmpi_file_set_size = ompi_file_set_size_f #pragma weak pmpi_file_set_size_ = ompi_file_set_size_f @@ -29,7 +32,7 @@ #pragma weak PMPI_File_set_size_f = ompi_file_set_size_f #pragma weak PMPI_File_set_size_f08 = ompi_file_set_size_f -#elif OMPI_PROFILE_LAYER +#else OMPI_GENERATE_F77_BINDINGS (PMPI_FILE_SET_SIZE, pmpi_file_set_size, pmpi_file_set_size_, @@ -38,6 +41,7 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_FILE_SET_SIZE, (MPI_Fint *fh, MPI_Offset *size, MPI_Fint *ierr), (fh, size, ierr) ) #endif +#endif #if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_FILE_SET_SIZE = ompi_file_set_size_f @@ -47,9 +51,8 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_FILE_SET_SIZE, #pragma weak MPI_File_set_size_f = ompi_file_set_size_f #pragma weak MPI_File_set_size_f08 = ompi_file_set_size_f -#endif - -#if ! OPAL_HAVE_WEAK_SYMBOLS && ! OMPI_PROFILE_LAYER +#else +#if ! OMPI_BUILD_MPI_PROFILING OMPI_GENERATE_F77_BINDINGS (MPI_FILE_SET_SIZE, mpi_file_set_size, mpi_file_set_size_, @@ -57,18 +60,17 @@ OMPI_GENERATE_F77_BINDINGS (MPI_FILE_SET_SIZE, ompi_file_set_size_f, (MPI_Fint *fh, MPI_Offset *size, MPI_Fint *ierr), (fh, size, ierr) ) +#else +#define ompi_file_set_size_f pompi_file_set_size_f #endif - - -#if OMPI_PROFILE_LAYER && ! OPAL_HAVE_WEAK_SYMBOLS -#include "ompi/mpi/fortran/mpif-h/profile/defines.h" #endif + void ompi_file_set_size_f(MPI_Fint *fh, MPI_Offset *size, MPI_Fint *ierr) { int c_ierr; - MPI_File c_fh = MPI_File_f2c(*fh); - - c_ierr = MPI_File_set_size(c_fh, (MPI_Offset) *size); + MPI_File c_fh = PMPI_File_f2c(*fh); + + c_ierr = PMPI_File_set_size(c_fh, (MPI_Offset) *size); if (NULL != ierr) *ierr = OMPI_INT_2_FINT(c_ierr); } diff --git a/ompi/mpi/fortran/mpif-h/file_set_view_f.c b/ompi/mpi/fortran/mpif-h/file_set_view_f.c index 6f7e3a12ead..69ced3e734f 100644 --- a/ompi/mpi/fortran/mpif-h/file_set_view_f.c +++ b/ompi/mpi/fortran/mpif-h/file_set_view_f.c @@ -5,15 +5,17 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2011-2012 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -23,7 +25,8 @@ #include "ompi/mpi/fortran/base/strings.h" #include "ompi/file/file.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILE_LAYER +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak PMPI_FILE_SET_VIEW = ompi_file_set_view_f #pragma weak pmpi_file_set_view = ompi_file_set_view_f #pragma weak pmpi_file_set_view_ = ompi_file_set_view_f @@ -31,7 +34,7 @@ #pragma weak PMPI_File_set_view_f = ompi_file_set_view_f #pragma weak PMPI_File_set_view_f08 = ompi_file_set_view_f -#elif OMPI_PROFILE_LAYER +#else OMPI_GENERATE_F77_BINDINGS (PMPI_FILE_SET_VIEW, pmpi_file_set_view, pmpi_file_set_view_, @@ -40,6 +43,7 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_FILE_SET_VIEW, (MPI_Fint *fh, MPI_Offset *disp, MPI_Fint *etype, MPI_Fint *filetype, char *datarep, MPI_Fint *info, MPI_Fint *ierr, int datarep_len), (fh, disp, etype, filetype, datarep, info, ierr, datarep_len) ) #endif +#endif #if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_FILE_SET_VIEW = ompi_file_set_view_f @@ -49,9 +53,8 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_FILE_SET_VIEW, #pragma weak MPI_File_set_view_f = ompi_file_set_view_f #pragma weak MPI_File_set_view_f08 = ompi_file_set_view_f -#endif - -#if ! OPAL_HAVE_WEAK_SYMBOLS && ! OMPI_PROFILE_LAYER +#else +#if ! OMPI_BUILD_MPI_PROFILING OMPI_GENERATE_F77_BINDINGS (MPI_FILE_SET_VIEW, mpi_file_set_view, mpi_file_set_view_, @@ -59,22 +62,21 @@ OMPI_GENERATE_F77_BINDINGS (MPI_FILE_SET_VIEW, ompi_file_set_view_f, (MPI_Fint *fh, MPI_Offset *disp, MPI_Fint *etype, MPI_Fint *filetype, char *datarep, MPI_Fint *info, MPI_Fint *ierr, int datarep_len), (fh, disp, etype, filetype, datarep, info, ierr, datarep_len) ) +#else +#define ompi_file_set_view_f pompi_file_set_view_f #endif - - -#if OMPI_PROFILE_LAYER && ! OPAL_HAVE_WEAK_SYMBOLS -#include "ompi/mpi/fortran/mpif-h/profile/defines.h" #endif + void ompi_file_set_view_f(MPI_Fint *fh, MPI_Offset *disp, MPI_Fint *etype, MPI_Fint *filetype, - char *datarep, MPI_Fint *info, MPI_Fint *ierr, + char *datarep, MPI_Fint *info, MPI_Fint *ierr, int datarep_len) { - MPI_File c_fh = MPI_File_f2c(*fh); - MPI_Datatype c_etype = MPI_Type_f2c(*etype); - MPI_Datatype c_filetype = MPI_Type_f2c(*filetype); - MPI_Info c_info = MPI_Info_f2c(*info); + MPI_File c_fh = PMPI_File_f2c(*fh); + MPI_Datatype c_etype = PMPI_Type_f2c(*etype); + MPI_Datatype c_filetype = PMPI_Type_f2c(*filetype); + MPI_Info c_info = PMPI_Info_f2c(*info); char *c_datarep; int c_ierr, ret; @@ -86,7 +88,7 @@ void ompi_file_set_view_f(MPI_Fint *fh, MPI_Offset *disp, return; } - c_ierr = MPI_File_set_view(c_fh, (MPI_Offset) *disp, + c_ierr = PMPI_File_set_view(c_fh, (MPI_Offset) *disp, c_etype, c_filetype, c_datarep, c_info); if (NULL != ierr) *ierr = OMPI_INT_2_FINT(c_ierr); diff --git a/ompi/mpi/fortran/mpif-h/file_sync_f.c b/ompi/mpi/fortran/mpif-h/file_sync_f.c index efba22a80e2..2e1d425b48b 100644 --- a/ompi/mpi/fortran/mpif-h/file_sync_f.c +++ b/ompi/mpi/fortran/mpif-h/file_sync_f.c @@ -5,15 +5,17 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2011-2012 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -21,7 +23,8 @@ #include "ompi/mpi/fortran/mpif-h/bindings.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILE_LAYER +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak PMPI_FILE_SYNC = ompi_file_sync_f #pragma weak pmpi_file_sync = ompi_file_sync_f #pragma weak pmpi_file_sync_ = ompi_file_sync_f @@ -29,7 +32,7 @@ #pragma weak PMPI_File_sync_f = ompi_file_sync_f #pragma weak PMPI_File_sync_f08 = ompi_file_sync_f -#elif OMPI_PROFILE_LAYER +#else OMPI_GENERATE_F77_BINDINGS (PMPI_FILE_SYNC, pmpi_file_sync, pmpi_file_sync_, @@ -38,6 +41,7 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_FILE_SYNC, (MPI_Fint *fh, MPI_Fint *ierr), (fh, ierr) ) #endif +#endif #if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_FILE_SYNC = ompi_file_sync_f @@ -47,9 +51,8 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_FILE_SYNC, #pragma weak MPI_File_sync_f = ompi_file_sync_f #pragma weak MPI_File_sync_f08 = ompi_file_sync_f -#endif - -#if ! OPAL_HAVE_WEAK_SYMBOLS && ! OMPI_PROFILE_LAYER +#else +#if ! OMPI_BUILD_MPI_PROFILING OMPI_GENERATE_F77_BINDINGS (MPI_FILE_SYNC, mpi_file_sync, mpi_file_sync_, @@ -57,18 +60,17 @@ OMPI_GENERATE_F77_BINDINGS (MPI_FILE_SYNC, ompi_file_sync_f, (MPI_Fint *fh, MPI_Fint *ierr), (fh, ierr) ) +#else +#define ompi_file_sync_f pompi_file_sync_f #endif - - -#if OMPI_PROFILE_LAYER && ! OPAL_HAVE_WEAK_SYMBOLS -#include "ompi/mpi/fortran/mpif-h/profile/defines.h" #endif + void ompi_file_sync_f(MPI_Fint *fh, MPI_Fint *ierr) { int c_ierr; - MPI_File c_fh = MPI_File_f2c(*fh); + MPI_File c_fh = PMPI_File_f2c(*fh); - c_ierr = MPI_File_sync(c_fh); + c_ierr = PMPI_File_sync(c_fh); if (NULL != ierr) *ierr = OMPI_INT_2_FINT(c_ierr); } diff --git a/ompi/mpi/fortran/mpif-h/file_write_all_begin_f.c b/ompi/mpi/fortran/mpif-h/file_write_all_begin_f.c index 4cc4d3bee97..e91f4624c22 100644 --- a/ompi/mpi/fortran/mpif-h/file_write_all_begin_f.c +++ b/ompi/mpi/fortran/mpif-h/file_write_all_begin_f.c @@ -5,15 +5,17 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2011-2012 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -22,7 +24,8 @@ #include "ompi/mpi/fortran/mpif-h/bindings.h" #include "ompi/mpi/fortran/base/constants.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILE_LAYER +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak PMPI_FILE_WRITE_ALL_BEGIN = ompi_file_write_all_begin_f #pragma weak pmpi_file_write_all_begin = ompi_file_write_all_begin_f #pragma weak pmpi_file_write_all_begin_ = ompi_file_write_all_begin_f @@ -30,7 +33,7 @@ #pragma weak PMPI_File_write_all_begin_f = ompi_file_write_all_begin_f #pragma weak PMPI_File_write_all_begin_f08 = ompi_file_write_all_begin_f -#elif OMPI_PROFILE_LAYER +#else OMPI_GENERATE_F77_BINDINGS (PMPI_FILE_WRITE_ALL_BEGIN, pmpi_file_write_all_begin, pmpi_file_write_all_begin_, @@ -39,6 +42,7 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_FILE_WRITE_ALL_BEGIN, (MPI_Fint *fh, char *buf, MPI_Fint *count, MPI_Fint *datatype, MPI_Fint *ierr), (fh, buf, count, datatype, ierr) ) #endif +#endif #if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_FILE_WRITE_ALL_BEGIN = ompi_file_write_all_begin_f @@ -48,9 +52,8 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_FILE_WRITE_ALL_BEGIN, #pragma weak MPI_File_write_all_begin_f = ompi_file_write_all_begin_f #pragma weak MPI_File_write_all_begin_f08 = ompi_file_write_all_begin_f -#endif - -#if ! OPAL_HAVE_WEAK_SYMBOLS && ! OMPI_PROFILE_LAYER +#else +#if ! OMPI_BUILD_MPI_PROFILING OMPI_GENERATE_F77_BINDINGS (MPI_FILE_WRITE_ALL_BEGIN, mpi_file_write_all_begin, mpi_file_write_all_begin_, @@ -58,22 +61,21 @@ OMPI_GENERATE_F77_BINDINGS (MPI_FILE_WRITE_ALL_BEGIN, ompi_file_write_all_begin_f, (MPI_Fint *fh, char *buf, MPI_Fint *count, MPI_Fint *datatype, MPI_Fint *ierr), (fh, buf, count, datatype, ierr) ) +#else +#define ompi_file_write_all_begin_f pompi_file_write_all_begin_f #endif - - -#if OMPI_PROFILE_LAYER && ! OPAL_HAVE_WEAK_SYMBOLS -#include "ompi/mpi/fortran/mpif-h/profile/defines.h" #endif -void ompi_file_write_all_begin_f(MPI_Fint *fh, char *buf, + +void ompi_file_write_all_begin_f(MPI_Fint *fh, char *buf, MPI_Fint *count, MPI_Fint *datatype, MPI_Fint *ierr) { int c_ierr; - MPI_File c_fh = MPI_File_f2c(*fh); - MPI_Datatype c_type = MPI_Type_f2c(*datatype); + MPI_File c_fh = PMPI_File_f2c(*fh); + MPI_Datatype c_type = PMPI_Type_f2c(*datatype); - c_ierr = MPI_File_write_all_begin(c_fh, OMPI_F2C_BOTTOM(buf), + c_ierr = PMPI_File_write_all_begin(c_fh, OMPI_F2C_BOTTOM(buf), OMPI_FINT_2_INT(*count), c_type); if (NULL != ierr) *ierr = OMPI_INT_2_FINT(c_ierr); diff --git a/ompi/mpi/fortran/mpif-h/file_write_all_end_f.c b/ompi/mpi/fortran/mpif-h/file_write_all_end_f.c index ffa3975dcac..af6be08878d 100644 --- a/ompi/mpi/fortran/mpif-h/file_write_all_end_f.c +++ b/ompi/mpi/fortran/mpif-h/file_write_all_end_f.c @@ -5,16 +5,18 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2011-2012 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2012 Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -24,7 +26,8 @@ #include "ompi/mpi/fortran/mpif-h/status-conversion.h" #include "ompi/mpi/fortran/base/constants.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILE_LAYER +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak PMPI_FILE_WRITE_ALL_END = ompi_file_write_all_end_f #pragma weak pmpi_file_write_all_end = ompi_file_write_all_end_f #pragma weak pmpi_file_write_all_end_ = ompi_file_write_all_end_f @@ -32,7 +35,7 @@ #pragma weak PMPI_File_write_all_end_f = ompi_file_write_all_end_f #pragma weak PMPI_File_write_all_end_f08 = ompi_file_write_all_end_f -#elif OMPI_PROFILE_LAYER +#else OMPI_GENERATE_F77_BINDINGS (PMPI_FILE_WRITE_ALL_END, pmpi_file_write_all_end, pmpi_file_write_all_end_, @@ -41,6 +44,7 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_FILE_WRITE_ALL_END, (MPI_Fint *fh, char *buf, MPI_Fint *status, MPI_Fint *ierr), (fh, buf, status, ierr) ) #endif +#endif #if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_FILE_WRITE_ALL_END = ompi_file_write_all_end_f @@ -50,9 +54,8 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_FILE_WRITE_ALL_END, #pragma weak MPI_File_write_all_end_f = ompi_file_write_all_end_f #pragma weak MPI_File_write_all_end_f08 = ompi_file_write_all_end_f -#endif - -#if ! OPAL_HAVE_WEAK_SYMBOLS && ! OMPI_PROFILE_LAYER +#else +#if ! OMPI_BUILD_MPI_PROFILING OMPI_GENERATE_F77_BINDINGS (MPI_FILE_WRITE_ALL_END, mpi_file_write_all_end, mpi_file_write_all_end_, @@ -60,23 +63,22 @@ OMPI_GENERATE_F77_BINDINGS (MPI_FILE_WRITE_ALL_END, ompi_file_write_all_end_f, (MPI_Fint *fh, char *buf, MPI_Fint *status, MPI_Fint *ierr), (fh, buf, status, ierr) ) +#else +#define ompi_file_write_all_end_f pompi_file_write_all_end_f #endif - - -#if OMPI_PROFILE_LAYER && ! OPAL_HAVE_WEAK_SYMBOLS -#include "ompi/mpi/fortran/mpif-h/profile/defines.h" #endif + void ompi_file_write_all_end_f(MPI_Fint *fh, char *buf, MPI_Fint *status, MPI_Fint *ierr) { int c_ierr; OMPI_FORTRAN_STATUS_DECLARATION(c_status,c_status2) - MPI_File c_fh = MPI_File_f2c(*fh); + MPI_File c_fh = PMPI_File_f2c(*fh); OMPI_FORTRAN_STATUS_SET_POINTER(c_status,c_status2,status) - - c_ierr = MPI_File_write_all_end(c_fh, buf, c_status); + + c_ierr = PMPI_File_write_all_end(c_fh, buf, c_status); if (NULL != ierr) *ierr = OMPI_INT_2_FINT(c_ierr); OMPI_FORTRAN_STATUS_RETURN(c_status,c_status2,status,c_ierr) diff --git a/ompi/mpi/fortran/mpif-h/file_write_all_f.c b/ompi/mpi/fortran/mpif-h/file_write_all_f.c index a0c97d63309..8245c5e3aa9 100644 --- a/ompi/mpi/fortran/mpif-h/file_write_all_f.c +++ b/ompi/mpi/fortran/mpif-h/file_write_all_f.c @@ -5,16 +5,18 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2011-2012 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2012 Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -24,7 +26,8 @@ #include "ompi/mpi/fortran/mpif-h/status-conversion.h" #include "ompi/mpi/fortran/base/constants.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILE_LAYER +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak PMPI_FILE_WRITE_ALL = ompi_file_write_all_f #pragma weak pmpi_file_write_all = ompi_file_write_all_f #pragma weak pmpi_file_write_all_ = ompi_file_write_all_f @@ -32,7 +35,7 @@ #pragma weak PMPI_File_write_all_f = ompi_file_write_all_f #pragma weak PMPI_File_write_all_f08 = ompi_file_write_all_f -#elif OMPI_PROFILE_LAYER +#else OMPI_GENERATE_F77_BINDINGS (PMPI_FILE_WRITE_ALL, pmpi_file_write_all, pmpi_file_write_all_, @@ -41,6 +44,7 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_FILE_WRITE_ALL, (MPI_Fint *fh, char *buf, MPI_Fint *count, MPI_Fint *datatype, MPI_Fint *status, MPI_Fint *ierr), (fh, buf, count, datatype, status, ierr) ) #endif +#endif #if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_FILE_WRITE_ALL = ompi_file_write_all_f @@ -50,9 +54,8 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_FILE_WRITE_ALL, #pragma weak MPI_File_write_all_f = ompi_file_write_all_f #pragma weak MPI_File_write_all_f08 = ompi_file_write_all_f -#endif - -#if ! OPAL_HAVE_WEAK_SYMBOLS && ! OMPI_PROFILE_LAYER +#else +#if ! OMPI_BUILD_MPI_PROFILING OMPI_GENERATE_F77_BINDINGS (MPI_FILE_WRITE_ALL, mpi_file_write_all, mpi_file_write_all_, @@ -60,24 +63,23 @@ OMPI_GENERATE_F77_BINDINGS (MPI_FILE_WRITE_ALL, ompi_file_write_all_f, (MPI_Fint *fh, char *buf, MPI_Fint *count, MPI_Fint *datatype, MPI_Fint *status, MPI_Fint *ierr), (fh, buf, count, datatype, status, ierr) ) +#else +#define ompi_file_write_all_f pompi_file_write_all_f #endif - - -#if OMPI_PROFILE_LAYER && ! OPAL_HAVE_WEAK_SYMBOLS -#include "ompi/mpi/fortran/mpif-h/profile/defines.h" #endif + void ompi_file_write_all_f(MPI_Fint *fh, char *buf, MPI_Fint *count, MPI_Fint *datatype, MPI_Fint *status, MPI_Fint *ierr) { int c_ierr; - MPI_File c_fh = MPI_File_f2c(*fh); - MPI_Datatype c_type = MPI_Type_f2c(*datatype); + MPI_File c_fh = PMPI_File_f2c(*fh); + MPI_Datatype c_type = PMPI_Type_f2c(*datatype); OMPI_FORTRAN_STATUS_DECLARATION(c_status,c_status2) OMPI_FORTRAN_STATUS_SET_POINTER(c_status,c_status2,status) - - c_ierr = MPI_File_write_all(c_fh, OMPI_F2C_BOTTOM(buf), + + c_ierr = PMPI_File_write_all(c_fh, OMPI_F2C_BOTTOM(buf), OMPI_FINT_2_INT(*count), c_type, c_status); if (NULL != ierr) *ierr = OMPI_INT_2_FINT(c_ierr); diff --git a/ompi/mpi/fortran/mpif-h/file_write_at_all_begin_f.c b/ompi/mpi/fortran/mpif-h/file_write_at_all_begin_f.c index fb54a58962e..01e825e6967 100644 --- a/ompi/mpi/fortran/mpif-h/file_write_at_all_begin_f.c +++ b/ompi/mpi/fortran/mpif-h/file_write_at_all_begin_f.c @@ -5,15 +5,17 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2011-2012 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -22,7 +24,8 @@ #include "ompi/mpi/fortran/mpif-h/bindings.h" #include "ompi/mpi/fortran/base/constants.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILE_LAYER +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak PMPI_FILE_WRITE_AT_ALL_BEGIN = ompi_file_write_at_all_begin_f #pragma weak pmpi_file_write_at_all_begin = ompi_file_write_at_all_begin_f #pragma weak pmpi_file_write_at_all_begin_ = ompi_file_write_at_all_begin_f @@ -30,7 +33,7 @@ #pragma weak PMPI_File_write_at_all_begin_f = ompi_file_write_at_all_begin_f #pragma weak PMPI_File_write_at_all_begin_f08 = ompi_file_write_at_all_begin_f -#elif OMPI_PROFILE_LAYER +#else OMPI_GENERATE_F77_BINDINGS (PMPI_FILE_WRITE_AT_ALL_BEGIN, pmpi_file_write_at_all_begin, pmpi_file_write_at_all_begin_, @@ -39,6 +42,7 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_FILE_WRITE_AT_ALL_BEGIN, (MPI_Fint *fh, MPI_Offset *offset, char *buf, MPI_Fint *count, MPI_Fint *datatype, MPI_Fint *ierr), (fh, offset, buf, count, datatype, ierr) ) #endif +#endif #if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_FILE_WRITE_AT_ALL_BEGIN = ompi_file_write_at_all_begin_f @@ -48,9 +52,8 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_FILE_WRITE_AT_ALL_BEGIN, #pragma weak MPI_File_write_at_all_begin_f = ompi_file_write_at_all_begin_f #pragma weak MPI_File_write_at_all_begin_f08 = ompi_file_write_at_all_begin_f -#endif - -#if ! OPAL_HAVE_WEAK_SYMBOLS && ! OMPI_PROFILE_LAYER +#else +#if ! OMPI_BUILD_MPI_PROFILING OMPI_GENERATE_F77_BINDINGS (MPI_FILE_WRITE_AT_ALL_BEGIN, mpi_file_write_at_all_begin, mpi_file_write_at_all_begin_, @@ -58,24 +61,23 @@ OMPI_GENERATE_F77_BINDINGS (MPI_FILE_WRITE_AT_ALL_BEGIN, ompi_file_write_at_all_begin_f, (MPI_Fint *fh, MPI_Offset *offset, char *buf, MPI_Fint *count, MPI_Fint *datatype, MPI_Fint *ierr), (fh, offset, buf, count, datatype, ierr) ) +#else +#define ompi_file_write_at_all_begin_f pompi_file_write_at_all_begin_f #endif - - -#if OMPI_PROFILE_LAYER && ! OPAL_HAVE_WEAK_SYMBOLS -#include "ompi/mpi/fortran/mpif-h/profile/defines.h" #endif -void ompi_file_write_at_all_begin_f(MPI_Fint *fh, MPI_Offset *offset, + +void ompi_file_write_at_all_begin_f(MPI_Fint *fh, MPI_Offset *offset, char *buf, MPI_Fint *count, MPI_Fint *datatype, MPI_Fint *ierr) { int c_ierr; - MPI_File c_fh = MPI_File_f2c(*fh); - MPI_Datatype c_type = MPI_Type_f2c(*datatype); + MPI_File c_fh = PMPI_File_f2c(*fh); + MPI_Datatype c_type = PMPI_Type_f2c(*datatype); - c_ierr = MPI_File_write_at_all_begin(c_fh, + c_ierr = PMPI_File_write_at_all_begin(c_fh, (MPI_Offset) *offset, - OMPI_F2C_BOTTOM(buf), + OMPI_F2C_BOTTOM(buf), OMPI_FINT_2_INT(*count), c_type); if (NULL != ierr) *ierr = OMPI_INT_2_FINT(c_ierr); diff --git a/ompi/mpi/fortran/mpif-h/file_write_at_all_end_f.c b/ompi/mpi/fortran/mpif-h/file_write_at_all_end_f.c index dc8f9f0ea33..f27daf9d438 100644 --- a/ompi/mpi/fortran/mpif-h/file_write_at_all_end_f.c +++ b/ompi/mpi/fortran/mpif-h/file_write_at_all_end_f.c @@ -5,16 +5,18 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2011-2012 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2012 Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -24,7 +26,8 @@ #include "ompi/mpi/fortran/mpif-h/status-conversion.h" #include "ompi/mpi/fortran/base/constants.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILE_LAYER +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak PMPI_FILE_WRITE_AT_ALL_END = ompi_file_write_at_all_end_f #pragma weak pmpi_file_write_at_all_end = ompi_file_write_at_all_end_f #pragma weak pmpi_file_write_at_all_end_ = ompi_file_write_at_all_end_f @@ -32,7 +35,7 @@ #pragma weak PMPI_File_write_at_all_end_f = ompi_file_write_at_all_end_f #pragma weak PMPI_File_write_at_all_end_f08 = ompi_file_write_at_all_end_f -#elif OMPI_PROFILE_LAYER +#else OMPI_GENERATE_F77_BINDINGS (PMPI_FILE_WRITE_AT_ALL_END, pmpi_file_write_at_all_end, pmpi_file_write_at_all_end_, @@ -41,6 +44,7 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_FILE_WRITE_AT_ALL_END, (MPI_Fint *fh, char *buf, MPI_Fint *status, MPI_Fint *ierr), (fh, buf, status, ierr) ) #endif +#endif #if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_FILE_WRITE_AT_ALL_END = ompi_file_write_at_all_end_f @@ -50,9 +54,8 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_FILE_WRITE_AT_ALL_END, #pragma weak MPI_File_write_at_all_end_f = ompi_file_write_at_all_end_f #pragma weak MPI_File_write_at_all_end_f08 = ompi_file_write_at_all_end_f -#endif - -#if ! OPAL_HAVE_WEAK_SYMBOLS && ! OMPI_PROFILE_LAYER +#else +#if ! OMPI_BUILD_MPI_PROFILING OMPI_GENERATE_F77_BINDINGS (MPI_FILE_WRITE_AT_ALL_END, mpi_file_write_at_all_end, mpi_file_write_at_all_end_, @@ -60,23 +63,22 @@ OMPI_GENERATE_F77_BINDINGS (MPI_FILE_WRITE_AT_ALL_END, ompi_file_write_at_all_end_f, (MPI_Fint *fh, char *buf, MPI_Fint *status, MPI_Fint *ierr), (fh, buf, status, ierr) ) +#else +#define ompi_file_write_at_all_end_f pompi_file_write_at_all_end_f #endif - - -#if OMPI_PROFILE_LAYER && ! OPAL_HAVE_WEAK_SYMBOLS -#include "ompi/mpi/fortran/mpif-h/profile/defines.h" #endif + void ompi_file_write_at_all_end_f(MPI_Fint *fh, char *buf, MPI_Fint *status, MPI_Fint *ierr) { int c_ierr; - MPI_File c_fh = MPI_File_f2c(*fh); + MPI_File c_fh = PMPI_File_f2c(*fh); OMPI_FORTRAN_STATUS_DECLARATION(c_status,c_status2) OMPI_FORTRAN_STATUS_SET_POINTER(c_status,c_status2,status) - c_ierr = MPI_File_write_at_all_end(c_fh, buf, c_status); + c_ierr = PMPI_File_write_at_all_end(c_fh, buf, c_status); if (NULL != ierr) *ierr = OMPI_INT_2_FINT(c_ierr); OMPI_FORTRAN_STATUS_RETURN(c_status,c_status2,status,c_ierr) diff --git a/ompi/mpi/fortran/mpif-h/file_write_at_all_f.c b/ompi/mpi/fortran/mpif-h/file_write_at_all_f.c index 42903e38938..575db9e4bc5 100644 --- a/ompi/mpi/fortran/mpif-h/file_write_at_all_f.c +++ b/ompi/mpi/fortran/mpif-h/file_write_at_all_f.c @@ -5,16 +5,18 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2011-2012 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2012 Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -24,7 +26,8 @@ #include "ompi/mpi/fortran/mpif-h/status-conversion.h" #include "ompi/mpi/fortran/base/constants.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILE_LAYER +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak PMPI_FILE_WRITE_AT_ALL = ompi_file_write_at_all_f #pragma weak pmpi_file_write_at_all = ompi_file_write_at_all_f #pragma weak pmpi_file_write_at_all_ = ompi_file_write_at_all_f @@ -32,7 +35,7 @@ #pragma weak PMPI_File_write_at_all_f = ompi_file_write_at_all_f #pragma weak PMPI_File_write_at_all_f08 = ompi_file_write_at_all_f -#elif OMPI_PROFILE_LAYER +#else OMPI_GENERATE_F77_BINDINGS (PMPI_FILE_WRITE_AT_ALL, pmpi_file_write_at_all, pmpi_file_write_at_all_, @@ -41,6 +44,7 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_FILE_WRITE_AT_ALL, (MPI_Fint *fh, MPI_Offset *offset, char *buf, MPI_Fint *count, MPI_Fint *datatype, MPI_Fint *status, MPI_Fint *ierr), (fh, offset, buf, count, datatype, status, ierr) ) #endif +#endif #if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_FILE_WRITE_AT_ALL = ompi_file_write_at_all_f @@ -50,9 +54,8 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_FILE_WRITE_AT_ALL, #pragma weak MPI_File_write_at_all_f = ompi_file_write_at_all_f #pragma weak MPI_File_write_at_all_f08 = ompi_file_write_at_all_f -#endif - -#if ! OPAL_HAVE_WEAK_SYMBOLS && ! OMPI_PROFILE_LAYER +#else +#if ! OMPI_BUILD_MPI_PROFILING OMPI_GENERATE_F77_BINDINGS (MPI_FILE_WRITE_AT_ALL, mpi_file_write_at_all, mpi_file_write_at_all_, @@ -60,32 +63,31 @@ OMPI_GENERATE_F77_BINDINGS (MPI_FILE_WRITE_AT_ALL, ompi_file_write_at_all_f, (MPI_Fint *fh, MPI_Offset *offset, char *buf, MPI_Fint *count, MPI_Fint *datatype, MPI_Fint *status, MPI_Fint *ierr), (fh, offset, buf, count, datatype, status, ierr) ) +#else +#define ompi_file_write_at_all_f pompi_file_write_at_all_f #endif - - -#if OMPI_PROFILE_LAYER && ! OPAL_HAVE_WEAK_SYMBOLS -#include "ompi/mpi/fortran/mpif-h/profile/defines.h" #endif + void ompi_file_write_at_all_f(MPI_Fint *fh, MPI_Offset *offset, char *buf, MPI_Fint *count, MPI_Fint *datatype, MPI_Fint *status, MPI_Fint *ierr) { int c_ierr; - MPI_File c_fh = MPI_File_f2c(*fh); - MPI_Datatype c_type = MPI_Type_f2c(*datatype); + MPI_File c_fh = PMPI_File_f2c(*fh); + MPI_Datatype c_type = PMPI_Type_f2c(*datatype); OMPI_FORTRAN_STATUS_DECLARATION(c_status,c_status2) OMPI_FORTRAN_STATUS_SET_POINTER(c_status,c_status2,status) - c_ierr = MPI_File_write_at_all(c_fh, + c_ierr = PMPI_File_write_at_all(c_fh, (MPI_Offset) *offset, - OMPI_F2C_BOTTOM(buf), + OMPI_F2C_BOTTOM(buf), OMPI_FINT_2_INT(*count), c_type, c_status); if (NULL != ierr) *ierr = OMPI_INT_2_FINT(c_ierr); - + OMPI_FORTRAN_STATUS_RETURN(c_status,c_status2,status,c_ierr) } diff --git a/ompi/mpi/fortran/mpif-h/file_write_at_f.c b/ompi/mpi/fortran/mpif-h/file_write_at_f.c index 820b1fddb6b..b542139d1ff 100644 --- a/ompi/mpi/fortran/mpif-h/file_write_at_f.c +++ b/ompi/mpi/fortran/mpif-h/file_write_at_f.c @@ -5,16 +5,18 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2011-2012 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2012 Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -24,7 +26,8 @@ #include "ompi/mpi/fortran/mpif-h/status-conversion.h" #include "ompi/mpi/fortran/base/constants.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILE_LAYER +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak PMPI_FILE_WRITE_AT = ompi_file_write_at_f #pragma weak pmpi_file_write_at = ompi_file_write_at_f #pragma weak pmpi_file_write_at_ = ompi_file_write_at_f @@ -32,7 +35,7 @@ #pragma weak PMPI_File_write_at_f = ompi_file_write_at_f #pragma weak PMPI_File_write_at_f08 = ompi_file_write_at_f -#elif OMPI_PROFILE_LAYER +#else OMPI_GENERATE_F77_BINDINGS (PMPI_FILE_WRITE_AT, pmpi_file_write_at, pmpi_file_write_at_, @@ -41,6 +44,7 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_FILE_WRITE_AT, (MPI_Fint *fh, MPI_Offset *offset, char *buf, MPI_Fint *count, MPI_Fint *datatype, MPI_Fint *status, MPI_Fint *ierr), (fh, offset, buf, count, datatype, status, ierr) ) #endif +#endif #if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_FILE_WRITE_AT = ompi_file_write_at_f @@ -50,9 +54,8 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_FILE_WRITE_AT, #pragma weak MPI_File_write_at_f = ompi_file_write_at_f #pragma weak MPI_File_write_at_f08 = ompi_file_write_at_f -#endif - -#if ! OPAL_HAVE_WEAK_SYMBOLS && ! OMPI_PROFILE_LAYER +#else +#if ! OMPI_BUILD_MPI_PROFILING OMPI_GENERATE_F77_BINDINGS (MPI_FILE_WRITE_AT, mpi_file_write_at, mpi_file_write_at_, @@ -62,27 +65,29 @@ OMPI_GENERATE_F77_BINDINGS (MPI_FILE_WRITE_AT, (fh, offset, buf, count, datatype, status, ierr) ) #endif -#if OMPI_PROFILE_LAYER && ! OPAL_HAVE_WEAK_SYMBOLS -#include "ompi/mpi/fortran/mpif-h/profile/defines.h" +#if OMPI_BUILD_MPI_PROFILING && ! OPAL_HAVE_WEAK_SYMBOLS +#define ompi_file_write_at_f pompi_file_write_at_f #endif +#endif + void ompi_file_write_at_f(MPI_Fint *fh, MPI_Offset *offset, - char *buf, MPI_Fint *count, + char *buf, MPI_Fint *count, MPI_Fint *datatype, MPI_Fint *status, MPI_Fint *ierr) { int c_ierr; - MPI_File c_fh = MPI_File_f2c(*fh); - MPI_Datatype c_type = MPI_Type_f2c(*datatype); + MPI_File c_fh = PMPI_File_f2c(*fh); + MPI_Datatype c_type = PMPI_Type_f2c(*datatype); OMPI_FORTRAN_STATUS_DECLARATION(c_status,c_status2) OMPI_FORTRAN_STATUS_SET_POINTER(c_status,c_status2,status) - c_ierr = MPI_File_write_at(c_fh, + c_ierr = PMPI_File_write_at(c_fh, (MPI_Offset) *offset, - OMPI_F2C_BOTTOM(buf), + OMPI_F2C_BOTTOM(buf), OMPI_FINT_2_INT(*count), c_type, c_status); if (NULL != ierr) *ierr = OMPI_INT_2_FINT(c_ierr); - + OMPI_FORTRAN_STATUS_RETURN(c_status,c_status2,status,c_ierr) } diff --git a/ompi/mpi/fortran/mpif-h/file_write_f.c b/ompi/mpi/fortran/mpif-h/file_write_f.c index 5041e344d5c..d49c5f50a49 100644 --- a/ompi/mpi/fortran/mpif-h/file_write_f.c +++ b/ompi/mpi/fortran/mpif-h/file_write_f.c @@ -5,16 +5,18 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2011-2012 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2012 Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -24,7 +26,8 @@ #include "ompi/mpi/fortran/mpif-h/status-conversion.h" #include "ompi/mpi/fortran/base/constants.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILE_LAYER +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak PMPI_FILE_WRITE = ompi_file_write_f #pragma weak pmpi_file_write = ompi_file_write_f #pragma weak pmpi_file_write_ = ompi_file_write_f @@ -32,7 +35,7 @@ #pragma weak PMPI_File_write_f = ompi_file_write_f #pragma weak PMPI_File_write_f08 = ompi_file_write_f -#elif OMPI_PROFILE_LAYER +#else OMPI_GENERATE_F77_BINDINGS (PMPI_FILE_WRITE, pmpi_file_write, pmpi_file_write_, @@ -41,6 +44,7 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_FILE_WRITE, (MPI_Fint *fh, char *buf, MPI_Fint *count, MPI_Fint *datatype, MPI_Fint *status, MPI_Fint *ierr), (fh, buf, count, datatype, status, ierr) ) #endif +#endif #if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_FILE_WRITE = ompi_file_write_f @@ -50,9 +54,8 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_FILE_WRITE, #pragma weak MPI_File_write_f = ompi_file_write_f #pragma weak MPI_File_write_f08 = ompi_file_write_f -#endif - -#if ! OPAL_HAVE_WEAK_SYMBOLS && ! OMPI_PROFILE_LAYER +#else +#if ! OMPI_BUILD_MPI_PROFILING OMPI_GENERATE_F77_BINDINGS (MPI_FILE_WRITE, mpi_file_write, mpi_file_write_, @@ -60,27 +63,26 @@ OMPI_GENERATE_F77_BINDINGS (MPI_FILE_WRITE, ompi_file_write_f, (MPI_Fint *fh, char *buf, MPI_Fint *count, MPI_Fint *datatype, MPI_Fint *status, MPI_Fint *ierr), (fh, buf, count, datatype, status, ierr) ) +#else +#define ompi_file_write_f pompi_file_write_f #endif - - -#if OMPI_PROFILE_LAYER && ! OPAL_HAVE_WEAK_SYMBOLS -#include "ompi/mpi/fortran/mpif-h/profile/defines.h" #endif + void ompi_file_write_f(MPI_Fint *fh, char *buf, MPI_Fint *count, MPI_Fint *datatype, MPI_Fint *status, MPI_Fint *ierr) { int c_ierr; - MPI_File c_fh = MPI_File_f2c(*fh); - MPI_Datatype c_type = MPI_Type_f2c(*datatype); + MPI_File c_fh = PMPI_File_f2c(*fh); + MPI_Datatype c_type = PMPI_Type_f2c(*datatype); OMPI_FORTRAN_STATUS_DECLARATION(c_status,c_status2) OMPI_FORTRAN_STATUS_SET_POINTER(c_status,c_status2,status) - - c_ierr = MPI_File_write(c_fh, OMPI_F2C_BOTTOM(buf), + + c_ierr = PMPI_File_write(c_fh, OMPI_F2C_BOTTOM(buf), OMPI_FINT_2_INT(*count), c_type, c_status); if (NULL != ierr) *ierr = OMPI_INT_2_FINT(c_ierr); - + OMPI_FORTRAN_STATUS_RETURN(c_status,c_status2,status,c_ierr) } diff --git a/ompi/mpi/fortran/mpif-h/file_write_ordered_begin_f.c b/ompi/mpi/fortran/mpif-h/file_write_ordered_begin_f.c index f7fd293d2be..582461a1400 100644 --- a/ompi/mpi/fortran/mpif-h/file_write_ordered_begin_f.c +++ b/ompi/mpi/fortran/mpif-h/file_write_ordered_begin_f.c @@ -5,15 +5,17 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2011-2012 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -22,7 +24,8 @@ #include "ompi/mpi/fortran/mpif-h/bindings.h" #include "ompi/mpi/fortran/base/constants.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILE_LAYER +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak PMPI_FILE_WRITE_ORDERED_BEGIN = ompi_file_write_ordered_begin_f #pragma weak pmpi_file_write_ordered_begin = ompi_file_write_ordered_begin_f #pragma weak pmpi_file_write_ordered_begin_ = ompi_file_write_ordered_begin_f @@ -30,7 +33,7 @@ #pragma weak PMPI_File_write_ordered_begin_f = ompi_file_write_ordered_begin_f #pragma weak PMPI_File_write_ordered_begin_f08 = ompi_file_write_ordered_begin_f -#elif OMPI_PROFILE_LAYER +#else OMPI_GENERATE_F77_BINDINGS (PMPI_FILE_WRITE_ORDERED_BEGIN, pmpi_file_write_ordered_begin, pmpi_file_write_ordered_begin_, @@ -39,6 +42,7 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_FILE_WRITE_ORDERED_BEGIN, (MPI_Fint *fh, char *buf, MPI_Fint *count, MPI_Fint *datatype, MPI_Fint *ierr), (fh, buf, count, datatype, ierr) ) #endif +#endif #if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_FILE_WRITE_ORDERED_BEGIN = ompi_file_write_ordered_begin_f @@ -48,9 +52,8 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_FILE_WRITE_ORDERED_BEGIN, #pragma weak MPI_File_write_ordered_begin_f = ompi_file_write_ordered_begin_f #pragma weak MPI_File_write_ordered_begin_f08 = ompi_file_write_ordered_begin_f -#endif - -#if ! OPAL_HAVE_WEAK_SYMBOLS && ! OMPI_PROFILE_LAYER +#else +#if ! OMPI_BUILD_MPI_PROFILING OMPI_GENERATE_F77_BINDINGS (MPI_FILE_WRITE_ORDERED_BEGIN, mpi_file_write_ordered_begin, mpi_file_write_ordered_begin_, @@ -58,22 +61,21 @@ OMPI_GENERATE_F77_BINDINGS (MPI_FILE_WRITE_ORDERED_BEGIN, ompi_file_write_ordered_begin_f, (MPI_Fint *fh, char *buf, MPI_Fint *count, MPI_Fint *datatype, MPI_Fint *ierr), (fh, buf, count, datatype, ierr) ) +#else +#define ompi_file_write_ordered_begin_f pompi_file_write_ordered_begin_f #endif - - -#if OMPI_PROFILE_LAYER && ! OPAL_HAVE_WEAK_SYMBOLS -#include "ompi/mpi/fortran/mpif-h/profile/defines.h" #endif -void ompi_file_write_ordered_begin_f(MPI_Fint *fh, char *buf, - MPI_Fint *count, MPI_Fint *datatype, + +void ompi_file_write_ordered_begin_f(MPI_Fint *fh, char *buf, + MPI_Fint *count, MPI_Fint *datatype, MPI_Fint *ierr) { int c_ierr; - MPI_File c_fh = MPI_File_f2c(*fh); - MPI_Datatype c_type = MPI_Type_f2c(*datatype); + MPI_File c_fh = PMPI_File_f2c(*fh); + MPI_Datatype c_type = PMPI_Type_f2c(*datatype); - c_ierr = MPI_File_write_ordered_begin(c_fh, OMPI_F2C_BOTTOM(buf), + c_ierr = PMPI_File_write_ordered_begin(c_fh, OMPI_F2C_BOTTOM(buf), OMPI_FINT_2_INT(*count), c_type); if (NULL != ierr) *ierr = OMPI_INT_2_FINT(c_ierr); diff --git a/ompi/mpi/fortran/mpif-h/file_write_ordered_end_f.c b/ompi/mpi/fortran/mpif-h/file_write_ordered_end_f.c index d70c272ae28..6126ef037ab 100644 --- a/ompi/mpi/fortran/mpif-h/file_write_ordered_end_f.c +++ b/ompi/mpi/fortran/mpif-h/file_write_ordered_end_f.c @@ -5,16 +5,18 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2011-2012 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2012 Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -24,7 +26,8 @@ #include "ompi/mpi/fortran/mpif-h/status-conversion.h" #include "ompi/mpi/fortran/base/constants.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILE_LAYER +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak PMPI_FILE_WRITE_ORDERED_END = ompi_file_write_ordered_end_f #pragma weak pmpi_file_write_ordered_end = ompi_file_write_ordered_end_f #pragma weak pmpi_file_write_ordered_end_ = ompi_file_write_ordered_end_f @@ -32,7 +35,7 @@ #pragma weak PMPI_File_write_ordered_end_f = ompi_file_write_ordered_end_f #pragma weak PMPI_File_write_ordered_end_f08 = ompi_file_write_ordered_end_f -#elif OMPI_PROFILE_LAYER +#else OMPI_GENERATE_F77_BINDINGS (PMPI_FILE_WRITE_ORDERED_END, pmpi_file_write_ordered_end, pmpi_file_write_ordered_end_, @@ -41,6 +44,7 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_FILE_WRITE_ORDERED_END, (MPI_Fint *fh, char *buf, MPI_Fint *status, MPI_Fint *ierr), (fh, buf, status, ierr) ) #endif +#endif #if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_FILE_WRITE_ORDERED_END = ompi_file_write_ordered_end_f @@ -50,9 +54,8 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_FILE_WRITE_ORDERED_END, #pragma weak MPI_File_write_ordered_end_f = ompi_file_write_ordered_end_f #pragma weak MPI_File_write_ordered_end_f08 = ompi_file_write_ordered_end_f -#endif - -#if ! OPAL_HAVE_WEAK_SYMBOLS && ! OMPI_PROFILE_LAYER +#else +#if ! OMPI_BUILD_MPI_PROFILING OMPI_GENERATE_F77_BINDINGS (MPI_FILE_WRITE_ORDERED_END, mpi_file_write_ordered_end, mpi_file_write_ordered_end_, @@ -60,23 +63,22 @@ OMPI_GENERATE_F77_BINDINGS (MPI_FILE_WRITE_ORDERED_END, ompi_file_write_ordered_end_f, (MPI_Fint *fh, char *buf, MPI_Fint *status, MPI_Fint *ierr), (fh, buf, status, ierr) ) +#else +#define ompi_file_write_ordered_end_f pompi_file_write_ordered_end_f #endif - - -#if OMPI_PROFILE_LAYER && ! OPAL_HAVE_WEAK_SYMBOLS -#include "ompi/mpi/fortran/mpif-h/profile/defines.h" #endif + void ompi_file_write_ordered_end_f(MPI_Fint *fh, char *buf, MPI_Fint *status, MPI_Fint *ierr) { int c_ierr; - MPI_File c_fh = MPI_File_f2c(*fh); + MPI_File c_fh = PMPI_File_f2c(*fh); OMPI_FORTRAN_STATUS_DECLARATION(c_status,c_status2) OMPI_FORTRAN_STATUS_SET_POINTER(c_status,c_status2,status) - c_ierr = MPI_File_write_ordered_end(c_fh, buf, c_status); + c_ierr = PMPI_File_write_ordered_end(c_fh, buf, c_status); if (NULL != ierr) *ierr = OMPI_INT_2_FINT(c_ierr); OMPI_FORTRAN_STATUS_RETURN(c_status,c_status2,status,c_ierr) diff --git a/ompi/mpi/fortran/mpif-h/file_write_ordered_f.c b/ompi/mpi/fortran/mpif-h/file_write_ordered_f.c index 1c37dad3d39..293e140d372 100644 --- a/ompi/mpi/fortran/mpif-h/file_write_ordered_f.c +++ b/ompi/mpi/fortran/mpif-h/file_write_ordered_f.c @@ -5,16 +5,18 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2011-2012 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2012 Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -24,7 +26,8 @@ #include "ompi/mpi/fortran/mpif-h/status-conversion.h" #include "ompi/mpi/fortran/base/constants.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILE_LAYER +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak PMPI_FILE_WRITE_ORDERED = ompi_file_write_ordered_f #pragma weak pmpi_file_write_ordered = ompi_file_write_ordered_f #pragma weak pmpi_file_write_ordered_ = ompi_file_write_ordered_f @@ -32,7 +35,7 @@ #pragma weak PMPI_File_write_ordered_f = ompi_file_write_ordered_f #pragma weak PMPI_File_write_ordered_f08 = ompi_file_write_ordered_f -#elif OMPI_PROFILE_LAYER +#else OMPI_GENERATE_F77_BINDINGS (PMPI_FILE_WRITE_ORDERED, pmpi_file_write_ordered, pmpi_file_write_ordered_, @@ -41,6 +44,7 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_FILE_WRITE_ORDERED, (MPI_Fint *fh, char *buf, MPI_Fint *count, MPI_Fint *datatype, MPI_Fint *status, MPI_Fint *ierr), (fh, buf, count, datatype, status, ierr) ) #endif +#endif #if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_FILE_WRITE_ORDERED = ompi_file_write_ordered_f @@ -50,9 +54,8 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_FILE_WRITE_ORDERED, #pragma weak MPI_File_write_ordered_f = ompi_file_write_ordered_f #pragma weak MPI_File_write_ordered_f08 = ompi_file_write_ordered_f -#endif - -#if ! OPAL_HAVE_WEAK_SYMBOLS && ! OMPI_PROFILE_LAYER +#else +#if ! OMPI_BUILD_MPI_PROFILING OMPI_GENERATE_F77_BINDINGS (MPI_FILE_WRITE_ORDERED, mpi_file_write_ordered, mpi_file_write_ordered_, @@ -60,30 +63,29 @@ OMPI_GENERATE_F77_BINDINGS (MPI_FILE_WRITE_ORDERED, ompi_file_write_ordered_f, (MPI_Fint *fh, char *buf, MPI_Fint *count, MPI_Fint *datatype, MPI_Fint *status, MPI_Fint *ierr), (fh, buf, count, datatype, status, ierr) ) +#else +#define ompi_file_write_ordered_f pompi_file_write_ordered_f #endif - - -#if OMPI_PROFILE_LAYER && ! OPAL_HAVE_WEAK_SYMBOLS -#include "ompi/mpi/fortran/mpif-h/profile/defines.h" #endif + void ompi_file_write_ordered_f(MPI_Fint *fh, char *buf, MPI_Fint *count, MPI_Fint *datatype, MPI_Fint *status, MPI_Fint *ierr) { int c_ierr; - MPI_File c_fh = MPI_File_f2c(*fh); - MPI_Datatype c_type = MPI_Type_f2c(*datatype); + MPI_File c_fh = PMPI_File_f2c(*fh); + MPI_Datatype c_type = PMPI_Type_f2c(*datatype); OMPI_FORTRAN_STATUS_DECLARATION(c_status,c_status2) OMPI_FORTRAN_STATUS_SET_POINTER(c_status,c_status2,status) - c_ierr = MPI_File_write_ordered(c_fh, - OMPI_F2C_BOTTOM(buf), + c_ierr = PMPI_File_write_ordered(c_fh, + OMPI_F2C_BOTTOM(buf), OMPI_FINT_2_INT(*count), c_type, c_status); if (NULL != ierr) *ierr = OMPI_INT_2_FINT(c_ierr); - + OMPI_FORTRAN_STATUS_RETURN(c_status,c_status2,status,c_ierr) } diff --git a/ompi/mpi/fortran/mpif-h/file_write_shared_f.c b/ompi/mpi/fortran/mpif-h/file_write_shared_f.c index 858f9a51d00..196688a533a 100644 --- a/ompi/mpi/fortran/mpif-h/file_write_shared_f.c +++ b/ompi/mpi/fortran/mpif-h/file_write_shared_f.c @@ -5,16 +5,18 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2011-2012 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2012 Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -24,7 +26,8 @@ #include "ompi/mpi/fortran/mpif-h/status-conversion.h" #include "ompi/mpi/fortran/base/constants.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILE_LAYER +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak PMPI_FILE_WRITE_SHARED = ompi_file_write_shared_f #pragma weak pmpi_file_write_shared = ompi_file_write_shared_f #pragma weak pmpi_file_write_shared_ = ompi_file_write_shared_f @@ -32,7 +35,7 @@ #pragma weak PMPI_File_write_shared_f = ompi_file_write_shared_f #pragma weak PMPI_File_write_shared_f08 = ompi_file_write_shared_f -#elif OMPI_PROFILE_LAYER +#else OMPI_GENERATE_F77_BINDINGS (PMPI_FILE_WRITE_SHARED, pmpi_file_write_shared, pmpi_file_write_shared_, @@ -41,6 +44,7 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_FILE_WRITE_SHARED, (MPI_Fint *fh, char *buf, MPI_Fint *count, MPI_Fint *datatype, MPI_Fint *status, MPI_Fint *ierr), (fh, buf, count, datatype, status, ierr) ) #endif +#endif #if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_FILE_WRITE_SHARED = ompi_file_write_shared_f @@ -50,9 +54,8 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_FILE_WRITE_SHARED, #pragma weak MPI_File_write_shared_f = ompi_file_write_shared_f #pragma weak MPI_File_write_shared_f08 = ompi_file_write_shared_f -#endif - -#if ! OPAL_HAVE_WEAK_SYMBOLS && ! OMPI_PROFILE_LAYER +#else +#if ! OMPI_BUILD_MPI_PROFILING OMPI_GENERATE_F77_BINDINGS (MPI_FILE_WRITE_SHARED, mpi_file_write_shared, mpi_file_write_shared_, @@ -60,30 +63,29 @@ OMPI_GENERATE_F77_BINDINGS (MPI_FILE_WRITE_SHARED, ompi_file_write_shared_f, (MPI_Fint *fh, char *buf, MPI_Fint *count, MPI_Fint *datatype, MPI_Fint *status, MPI_Fint *ierr), (fh, buf, count, datatype, status, ierr) ) +#else +#define ompi_file_write_shared_f pompi_file_write_shared_f #endif - - -#if OMPI_PROFILE_LAYER && ! OPAL_HAVE_WEAK_SYMBOLS -#include "ompi/mpi/fortran/mpif-h/profile/defines.h" #endif + void ompi_file_write_shared_f(MPI_Fint *fh, char *buf, MPI_Fint *count, MPI_Fint *datatype, MPI_Fint *status, MPI_Fint *ierr) { int c_ierr; - MPI_File c_fh = MPI_File_f2c(*fh); - MPI_Datatype c_type = MPI_Type_f2c(*datatype); + MPI_File c_fh = PMPI_File_f2c(*fh); + MPI_Datatype c_type = PMPI_Type_f2c(*datatype); OMPI_FORTRAN_STATUS_DECLARATION(c_status,c_status2) OMPI_FORTRAN_STATUS_SET_POINTER(c_status,c_status2,status) - c_ierr = MPI_File_write_shared(c_fh, - OMPI_F2C_BOTTOM(buf), + c_ierr = PMPI_File_write_shared(c_fh, + OMPI_F2C_BOTTOM(buf), OMPI_FINT_2_INT(*count), c_type, c_status); if (NULL != ierr) *ierr = OMPI_INT_2_FINT(c_ierr); - + OMPI_FORTRAN_STATUS_RETURN(c_status,c_status2,status,c_ierr) } diff --git a/ompi/mpi/fortran/mpif-h/finalize_f.c b/ompi/mpi/fortran/mpif-h/finalize_f.c index d4002c15aa2..c0dc45fbd8a 100644 --- a/ompi/mpi/fortran/mpif-h/finalize_f.c +++ b/ompi/mpi/fortran/mpif-h/finalize_f.c @@ -5,15 +5,17 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2011-2012 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -21,7 +23,8 @@ #include "ompi/mpi/fortran/mpif-h/bindings.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILE_LAYER +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak PMPI_FINALIZE = ompi_finalize_f #pragma weak pmpi_finalize = ompi_finalize_f #pragma weak pmpi_finalize_ = ompi_finalize_f @@ -29,7 +32,7 @@ #pragma weak PMPI_Finalize_f = ompi_finalize_f #pragma weak PMPI_Finalize_f08 = ompi_finalize_f -#elif OMPI_PROFILE_LAYER +#else OMPI_GENERATE_F77_BINDINGS (PMPI_FINALIZE, pmpi_finalize, pmpi_finalize_, @@ -38,6 +41,7 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_FINALIZE, (MPI_Fint *ierr), (ierr) ) #endif +#endif #if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_FINALIZE = ompi_finalize_f @@ -47,9 +51,8 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_FINALIZE, #pragma weak MPI_Finalize_f = ompi_finalize_f #pragma weak MPI_Finalize_f08 = ompi_finalize_f -#endif - -#if ! OPAL_HAVE_WEAK_SYMBOLS && ! OMPI_PROFILE_LAYER +#else +#if ! OMPI_BUILD_MPI_PROFILING OMPI_GENERATE_F77_BINDINGS (MPI_FINALIZE, mpi_finalize, mpi_finalize_, @@ -57,15 +60,14 @@ OMPI_GENERATE_F77_BINDINGS (MPI_FINALIZE, ompi_finalize_f, (MPI_Fint *ierr), (ierr) ) +#else +#define ompi_finalize_f pompi_finalize_f #endif - - -#if OMPI_PROFILE_LAYER && ! OPAL_HAVE_WEAK_SYMBOLS -#include "ompi/mpi/fortran/mpif-h/profile/defines.h" #endif + void ompi_finalize_f(MPI_Fint *ierr) { - int c_ierr = MPI_Finalize(); + int c_ierr = PMPI_Finalize(); if (NULL != ierr) *ierr = OMPI_INT_2_FINT(c_ierr); } diff --git a/ompi/mpi/fortran/mpif-h/finalized_f.c b/ompi/mpi/fortran/mpif-h/finalized_f.c index 3c3cd9ea6a8..8095e719a53 100644 --- a/ompi/mpi/fortran/mpif-h/finalized_f.c +++ b/ompi/mpi/fortran/mpif-h/finalized_f.c @@ -5,15 +5,17 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2011-2012 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -21,7 +23,8 @@ #include "ompi/mpi/fortran/mpif-h/bindings.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILE_LAYER +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak PMPI_FINALIZED = ompi_finalized_f #pragma weak pmpi_finalized = ompi_finalized_f #pragma weak pmpi_finalized_ = ompi_finalized_f @@ -29,7 +32,7 @@ #pragma weak PMPI_Finalized_f = ompi_finalized_f #pragma weak PMPI_Finalized_f08 = ompi_finalized_f -#elif OMPI_PROFILE_LAYER +#else OMPI_GENERATE_F77_BINDINGS (PMPI_FINALIZED, pmpi_finalized, pmpi_finalized_, @@ -38,6 +41,7 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_FINALIZED, (ompi_fortran_logical_t *flag, MPI_Fint *ierr), (flag, ierr) ) #endif +#endif #if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_FINALIZED = ompi_finalized_f @@ -47,9 +51,8 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_FINALIZED, #pragma weak MPI_Finalized_f = ompi_finalized_f #pragma weak MPI_Finalized_f08 = ompi_finalized_f -#endif - -#if ! OPAL_HAVE_WEAK_SYMBOLS && ! OMPI_PROFILE_LAYER +#else +#if ! OMPI_BUILD_MPI_PROFILING OMPI_GENERATE_F77_BINDINGS (MPI_FINALIZED, mpi_finalized, mpi_finalized_, @@ -57,19 +60,18 @@ OMPI_GENERATE_F77_BINDINGS (MPI_FINALIZED, ompi_finalized_f, (ompi_fortran_logical_t *flag, MPI_Fint *ierr), (flag, ierr) ) +#else +#define ompi_finalized_f pompi_finalized_f #endif - - -#if OMPI_PROFILE_LAYER && ! OPAL_HAVE_WEAK_SYMBOLS -#include "ompi/mpi/fortran/mpif-h/profile/defines.h" #endif + void ompi_finalized_f(ompi_fortran_logical_t *flag, MPI_Fint *ierr) { int c_ierr; OMPI_LOGICAL_NAME_DECL(flag); - c_ierr = MPI_Finalized(OMPI_LOGICAL_SINGLE_NAME_CONVERT(flag)); + c_ierr = PMPI_Finalized(OMPI_LOGICAL_SINGLE_NAME_CONVERT(flag)); if (NULL != ierr) *ierr = OMPI_INT_2_FINT(c_ierr); if (MPI_SUCCESS == c_ierr) { diff --git a/ompi/mpi/fortran/mpif-h/free_mem_f.c b/ompi/mpi/fortran/mpif-h/free_mem_f.c index 56026516380..5a9e8507b2f 100644 --- a/ompi/mpi/fortran/mpif-h/free_mem_f.c +++ b/ompi/mpi/fortran/mpif-h/free_mem_f.c @@ -5,15 +5,17 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2011-2012 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -21,7 +23,8 @@ #include "ompi/mpi/fortran/mpif-h/bindings.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILE_LAYER +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak PMPI_FREE_MEM = ompi_free_mem_f #pragma weak pmpi_free_mem = ompi_free_mem_f #pragma weak pmpi_free_mem_ = ompi_free_mem_f @@ -29,7 +32,7 @@ #pragma weak PMPI_Free_mem_f = ompi_free_mem_f #pragma weak PMPI_Free_mem_f08 = ompi_free_mem_f -#elif OMPI_PROFILE_LAYER +#else OMPI_GENERATE_F77_BINDINGS (PMPI_FREE_MEM, pmpi_free_mem, pmpi_free_mem_, @@ -38,6 +41,7 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_FREE_MEM, (char *base, MPI_Fint *ierr), (base, ierr) ) #endif +#endif #if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_FREE_MEM = ompi_free_mem_f @@ -47,9 +51,8 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_FREE_MEM, #pragma weak MPI_Free_mem_f = ompi_free_mem_f #pragma weak MPI_Free_mem_f08 = ompi_free_mem_f -#endif - -#if ! OPAL_HAVE_WEAK_SYMBOLS && ! OMPI_PROFILE_LAYER +#else +#if ! OMPI_BUILD_MPI_PROFILING OMPI_GENERATE_F77_BINDINGS (MPI_FREE_MEM, mpi_free_mem, mpi_free_mem_, @@ -57,15 +60,14 @@ OMPI_GENERATE_F77_BINDINGS (MPI_FREE_MEM, ompi_free_mem_f, (char *base, MPI_Fint *ierr), (base, ierr) ) +#else +#define ompi_free_mem_f pompi_free_mem_f #endif - - -#if OMPI_PROFILE_LAYER && ! OPAL_HAVE_WEAK_SYMBOLS -#include "ompi/mpi/fortran/mpif-h/profile/defines.h" #endif + void ompi_free_mem_f(char *base, MPI_Fint *ierr) { - int c_ierr = MPI_Free_mem(base); + int c_ierr = PMPI_Free_mem(base); if (NULL != ierr) *ierr = OMPI_INT_2_FINT(c_ierr); } diff --git a/ompi/mpi/fortran/mpif-h/gather_f.c b/ompi/mpi/fortran/mpif-h/gather_f.c index ec3ddafe464..0019d17d1c9 100644 --- a/ompi/mpi/fortran/mpif-h/gather_f.c +++ b/ompi/mpi/fortran/mpif-h/gather_f.c @@ -5,15 +5,17 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2011-2012 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -22,7 +24,8 @@ #include "ompi/mpi/fortran/mpif-h/bindings.h" #include "ompi/mpi/fortran/base/constants.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILE_LAYER +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak PMPI_GATHER = ompi_gather_f #pragma weak pmpi_gather = ompi_gather_f #pragma weak pmpi_gather_ = ompi_gather_f @@ -30,7 +33,7 @@ #pragma weak PMPI_Gather_f = ompi_gather_f #pragma weak PMPI_Gather_f08 = ompi_gather_f -#elif OMPI_PROFILE_LAYER +#else OMPI_GENERATE_F77_BINDINGS (PMPI_GATHER, pmpi_gather, pmpi_gather_, @@ -39,6 +42,7 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_GATHER, (char *sendbuf, MPI_Fint *sendcount, MPI_Fint *sendtype, char *recvbuf, MPI_Fint *recvcount, MPI_Fint *recvtype, MPI_Fint *root, MPI_Fint *comm, MPI_Fint *ierr), (sendbuf, sendcount, sendtype, recvbuf, recvcount, recvtype, root, comm, ierr) ) #endif +#endif #if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_GATHER = ompi_gather_f @@ -48,9 +52,8 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_GATHER, #pragma weak MPI_Gather_f = ompi_gather_f #pragma weak MPI_Gather_f08 = ompi_gather_f -#endif - -#if ! OPAL_HAVE_WEAK_SYMBOLS && ! OMPI_PROFILE_LAYER +#else +#if ! OMPI_BUILD_MPI_PROFILING OMPI_GENERATE_F77_BINDINGS (MPI_GATHER, mpi_gather, mpi_gather_, @@ -58,31 +61,30 @@ OMPI_GENERATE_F77_BINDINGS (MPI_GATHER, ompi_gather_f, (char *sendbuf, MPI_Fint *sendcount, MPI_Fint *sendtype, char *recvbuf, MPI_Fint *recvcount, MPI_Fint *recvtype, MPI_Fint *root, MPI_Fint *comm, MPI_Fint *ierr), (sendbuf, sendcount, sendtype, recvbuf, recvcount, recvtype, root, comm, ierr) ) +#else +#define ompi_gather_f pompi_gather_f #endif - - -#if OMPI_PROFILE_LAYER && ! OPAL_HAVE_WEAK_SYMBOLS -#include "ompi/mpi/fortran/mpif-h/profile/defines.h" #endif + void ompi_gather_f(char *sendbuf, MPI_Fint *sendcount, MPI_Fint *sendtype, - char *recvbuf, MPI_Fint *recvcount, MPI_Fint *recvtype, + char *recvbuf, MPI_Fint *recvcount, MPI_Fint *recvtype, MPI_Fint *root, MPI_Fint *comm, MPI_Fint *ierr) { int c_ierr; MPI_Comm c_comm; MPI_Datatype c_sendtype, c_recvtype; - c_comm = MPI_Comm_f2c(*comm); - c_sendtype = MPI_Type_f2c(*sendtype); - c_recvtype = MPI_Type_f2c(*recvtype); + c_comm = PMPI_Comm_f2c(*comm); + c_sendtype = PMPI_Type_f2c(*sendtype); + c_recvtype = PMPI_Type_f2c(*recvtype); sendbuf = (char *) OMPI_F2C_IN_PLACE(sendbuf); sendbuf = (char *) OMPI_F2C_BOTTOM(sendbuf); recvbuf = (char *) OMPI_F2C_BOTTOM(recvbuf); - c_ierr = MPI_Gather(sendbuf, OMPI_FINT_2_INT(*sendcount), - c_sendtype, recvbuf, + c_ierr = PMPI_Gather(sendbuf, OMPI_FINT_2_INT(*sendcount), + c_sendtype, recvbuf, OMPI_FINT_2_INT(*recvcount), c_recvtype, OMPI_FINT_2_INT(*root), diff --git a/ompi/mpi/fortran/mpif-h/gatherv_f.c b/ompi/mpi/fortran/mpif-h/gatherv_f.c index b8fa43fef7d..ef235ebe686 100644 --- a/ompi/mpi/fortran/mpif-h/gatherv_f.c +++ b/ompi/mpi/fortran/mpif-h/gatherv_f.c @@ -5,15 +5,17 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2011-2012 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -22,7 +24,8 @@ #include "ompi/mpi/fortran/mpif-h/bindings.h" #include "ompi/mpi/fortran/base/constants.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILE_LAYER +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak PMPI_GATHERV = ompi_gatherv_f #pragma weak pmpi_gatherv = ompi_gatherv_f #pragma weak pmpi_gatherv_ = ompi_gatherv_f @@ -30,7 +33,7 @@ #pragma weak PMPI_Gatherv_f = ompi_gatherv_f #pragma weak PMPI_Gatherv_f08 = ompi_gatherv_f -#elif OMPI_PROFILE_LAYER +#else OMPI_GENERATE_F77_BINDINGS (PMPI_GATHERV, pmpi_gatherv, pmpi_gatherv_, @@ -39,6 +42,7 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_GATHERV, (char *sendbuf, MPI_Fint *sendcount, MPI_Fint *sendtype, char *recvbuf, MPI_Fint *recvcounts, MPI_Fint *displs, MPI_Fint *recvtype, MPI_Fint *root, MPI_Fint *comm, MPI_Fint *ierr), (sendbuf, sendcount, sendtype, recvbuf, recvcounts, displs, recvtype, root, comm, ierr) ) #endif +#endif #if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_GATHERV = ompi_gatherv_f @@ -48,9 +52,8 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_GATHERV, #pragma weak MPI_Gatherv_f = ompi_gatherv_f #pragma weak MPI_Gatherv_f08 = ompi_gatherv_f -#endif - -#if ! OPAL_HAVE_WEAK_SYMBOLS && ! OMPI_PROFILE_LAYER +#else +#if ! OMPI_BUILD_MPI_PROFILING OMPI_GENERATE_F77_BINDINGS (MPI_GATHERV, mpi_gatherv, mpi_gatherv_, @@ -58,13 +61,12 @@ OMPI_GENERATE_F77_BINDINGS (MPI_GATHERV, ompi_gatherv_f, (char *sendbuf, MPI_Fint *sendcount, MPI_Fint *sendtype, char *recvbuf, MPI_Fint *recvcounts, MPI_Fint *displs, MPI_Fint *recvtype, MPI_Fint *root, MPI_Fint *comm, MPI_Fint *ierr), (sendbuf, sendcount, sendtype, recvbuf, recvcounts, displs, recvtype, root, comm, ierr) ) +#else +#define ompi_gatherv_f pompi_gatherv_f #endif - - -#if OMPI_PROFILE_LAYER && ! OPAL_HAVE_WEAK_SYMBOLS -#include "ompi/mpi/fortran/mpif-h/profile/defines.h" #endif + void ompi_gatherv_f(char *sendbuf, MPI_Fint *sendcount, MPI_Fint *sendtype, char *recvbuf, MPI_Fint *recvcounts, MPI_Fint *displs, MPI_Fint *recvtype, MPI_Fint *root, MPI_Fint *comm, @@ -76,11 +78,11 @@ void ompi_gatherv_f(char *sendbuf, MPI_Fint *sendcount, MPI_Fint *sendtype, OMPI_ARRAY_NAME_DECL(recvcounts); OMPI_ARRAY_NAME_DECL(displs); - c_comm = MPI_Comm_f2c(*comm); - c_sendtype = MPI_Type_f2c(*sendtype); - c_recvtype = MPI_Type_f2c(*recvtype); - - MPI_Comm_size(c_comm, &size); + c_comm = PMPI_Comm_f2c(*comm); + c_sendtype = PMPI_Type_f2c(*sendtype); + c_recvtype = PMPI_Type_f2c(*recvtype); + + PMPI_Comm_size(c_comm, &size); OMPI_ARRAY_FINT_2_INT(recvcounts, size); OMPI_ARRAY_FINT_2_INT(displs, size); @@ -88,11 +90,11 @@ void ompi_gatherv_f(char *sendbuf, MPI_Fint *sendcount, MPI_Fint *sendtype, sendbuf = (char *) OMPI_F2C_BOTTOM(sendbuf); recvbuf = (char *) OMPI_F2C_BOTTOM(recvbuf); - c_ierr = MPI_Gatherv(sendbuf, OMPI_FINT_2_INT(*sendcount), + c_ierr = PMPI_Gatherv(sendbuf, OMPI_FINT_2_INT(*sendcount), c_sendtype, recvbuf, OMPI_ARRAY_NAME_CONVERT(recvcounts), OMPI_ARRAY_NAME_CONVERT(displs), - c_recvtype, + c_recvtype, OMPI_FINT_2_INT(*root), c_comm); if (NULL != ierr) *ierr = OMPI_INT_2_FINT(c_ierr); diff --git a/ompi/mpi/fortran/mpif-h/get_accumulate_f.c b/ompi/mpi/fortran/mpif-h/get_accumulate_f.c index c46c48db9bf..ff15d4aa11f 100644 --- a/ompi/mpi/fortran/mpif-h/get_accumulate_f.c +++ b/ompi/mpi/fortran/mpif-h/get_accumulate_f.c @@ -13,6 +13,8 @@ * Copyright (c) 2011-2012 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2014 Los Alamos National Security, LLC. All rights * reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -26,7 +28,8 @@ #include "ompi/mpi/fortran/base/constants.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILE_LAYER +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak PMPI_GET_ACCUMULATE = ompi_get_accumulate_f #pragma weak pmpi_get_accumulate = ompi_get_accumulate_f #pragma weak pmpi_get_accumulate_ = ompi_get_accumulate_f @@ -34,7 +37,7 @@ #pragma weak PMPI_Get_accumulate_f = ompi_get_accumulate_f #pragma weak PMPI_Get_accumulate_f08 = ompi_get_accumulate_f -#elif OMPI_PROFILE_LAYER +#else OMPI_GENERATE_F77_BINDINGS (PMPI_GET_ACCUMULATE, pmpi_get_accumulate, pmpi_get_accumulate_, @@ -43,6 +46,7 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_GET_ACCUMULATE, (char *origin_addr, MPI_Fint *origin_count, MPI_Fint *origin_datatype, char *result_addr, MPI_Fint *result_count, MPI_Fint *result_datatype, MPI_Fint *target_rank, MPI_Aint *target_disp, MPI_Fint *target_count, MPI_Fint *target_datatype, MPI_Fint *op, MPI_Fint *win, MPI_Fint *ierr), (origin_addr, origin_count, origin_datatype, result_addr, result_count, result_datatype, target_rank, target_disp, target_count, target_datatype, op, win, ierr) ) #endif +#endif #if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_GET_ACCUMULATE = ompi_get_accumulate_f @@ -52,9 +56,8 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_GET_ACCUMULATE, #pragma weak MPI_Get_accumulate_f = ompi_get_accumulate_f #pragma weak MPI_Get_accumulate_f08 = ompi_get_accumulate_f -#endif - -#if ! OPAL_HAVE_WEAK_SYMBOLS && ! OMPI_PROFILE_LAYER +#else +#if ! OMPI_BUILD_MPI_PROFILING OMPI_GENERATE_F77_BINDINGS (MPI_GET_ACCUMULATE, mpi_get_accumulate, mpi_get_accumulate_, @@ -62,13 +65,12 @@ OMPI_GENERATE_F77_BINDINGS (MPI_GET_ACCUMULATE, ompi_get_accumulate_f, (char *origin_addr, MPI_Fint *origin_count, MPI_Fint *origin_datatype, char *result_addr, MPI_Fint *result_count, MPI_Fint *result_datatype, MPI_Fint *target_rank, MPI_Aint *target_disp, MPI_Fint *target_count, MPI_Fint *target_datatype, MPI_Fint *op, MPI_Fint *win, MPI_Fint *ierr), (origin_addr, origin_count, origin_datatype, result_addr, result_count, result_datatype, target_rank, target_disp, target_count, target_datatype, op, win, ierr) ) +#else +#define ompi_get_accumulate_f pompi_get_accumulate_f #endif - - -#if OMPI_PROFILE_LAYER && ! OPAL_HAVE_WEAK_SYMBOLS -#include "ompi/mpi/fortran/mpif-h/profile/defines.h" #endif + void ompi_get_accumulate_f(char *origin_addr, MPI_Fint *origin_count, MPI_Fint *origin_datatype, char *result_addr, MPI_Fint *result_count, MPI_Fint *result_datatype, @@ -77,13 +79,13 @@ void ompi_get_accumulate_f(char *origin_addr, MPI_Fint *origin_count, MPI_Fint *op, MPI_Fint *win, MPI_Fint *ierr) { int c_ierr; - MPI_Datatype c_origin_datatype = MPI_Type_f2c(*origin_datatype); - MPI_Datatype c_result_datatype = MPI_Type_f2c(*result_datatype); - MPI_Datatype c_target_datatype = MPI_Type_f2c(*target_datatype); - MPI_Win c_win = MPI_Win_f2c(*win); - MPI_Op c_op = MPI_Op_f2c(*op); + MPI_Datatype c_origin_datatype = PMPI_Type_f2c(*origin_datatype); + MPI_Datatype c_result_datatype = PMPI_Type_f2c(*result_datatype); + MPI_Datatype c_target_datatype = PMPI_Type_f2c(*target_datatype); + MPI_Win c_win = PMPI_Win_f2c(*win); + MPI_Op c_op = PMPI_Op_f2c(*op); - c_ierr = MPI_Get_accumulate(OMPI_F2C_BOTTOM(origin_addr), + c_ierr = PMPI_Get_accumulate(OMPI_F2C_BOTTOM(origin_addr), OMPI_FINT_2_INT(*origin_count), c_origin_datatype, OMPI_F2C_BOTTOM(result_addr), diff --git a/ompi/mpi/fortran/mpif-h/get_address_f.c b/ompi/mpi/fortran/mpif-h/get_address_f.c index 4b9fce768b5..4c19d61dfdc 100644 --- a/ompi/mpi/fortran/mpif-h/get_address_f.c +++ b/ompi/mpi/fortran/mpif-h/get_address_f.c @@ -5,23 +5,27 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2011-2012 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ #include "ompi_config.h" #include "ompi/mpi/fortran/mpif-h/bindings.h" +#include "ompi/mpi/fortran/base/constants.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILE_LAYER +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak PMPI_GET_ADDRESS = ompi_get_address_f #pragma weak pmpi_get_address = ompi_get_address_f #pragma weak pmpi_get_address_ = ompi_get_address_f @@ -29,7 +33,7 @@ #pragma weak PMPI_Get_address_f = ompi_get_address_f #pragma weak PMPI_Get_address_f08 = ompi_get_address_f -#elif OMPI_PROFILE_LAYER +#else OMPI_GENERATE_F77_BINDINGS (PMPI_GET_ADDRESS, pmpi_get_address, pmpi_get_address_, @@ -38,6 +42,7 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_GET_ADDRESS, (char *location, MPI_Aint *address, MPI_Fint *ierr), (location, address, ierr) ) #endif +#endif #if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_GET_ADDRESS = ompi_get_address_f @@ -47,9 +52,8 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_GET_ADDRESS, #pragma weak MPI_Get_address_f = ompi_get_address_f #pragma weak MPI_Get_address_f08 = ompi_get_address_f -#endif - -#if ! OPAL_HAVE_WEAK_SYMBOLS && ! OMPI_PROFILE_LAYER +#else +#if ! OMPI_BUILD_MPI_PROFILING OMPI_GENERATE_F77_BINDINGS (MPI_GET_ADDRESS, mpi_get_address, mpi_get_address_, @@ -57,19 +61,18 @@ OMPI_GENERATE_F77_BINDINGS (MPI_GET_ADDRESS, ompi_get_address_f, (char *location, MPI_Aint *address, MPI_Fint *ierr), (location, address, ierr) ) +#else +#define ompi_get_address_f pompi_get_address_f #endif - - -#if OMPI_PROFILE_LAYER && ! OPAL_HAVE_WEAK_SYMBOLS -#include "ompi/mpi/fortran/mpif-h/profile/defines.h" #endif + void ompi_get_address_f(char *location, MPI_Aint *address, MPI_Fint *ierr) { int c_ierr; MPI_Aint c_address; - c_ierr = MPI_Get_address(location, &c_address); + c_ierr = PMPI_Get_address(OMPI_F2C_BOTTOM(location), &c_address); if (NULL != ierr) *ierr = OMPI_INT_2_FINT(c_ierr); if (MPI_SUCCESS == c_ierr) { diff --git a/ompi/mpi/fortran/mpif-h/get_count_f.c b/ompi/mpi/fortran/mpif-h/get_count_f.c index 07a38f942b5..1c9a7de47e4 100644 --- a/ompi/mpi/fortran/mpif-h/get_count_f.c +++ b/ompi/mpi/fortran/mpif-h/get_count_f.c @@ -5,15 +5,17 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2011-2012 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -22,7 +24,8 @@ #include "ompi/mpi/fortran/mpif-h/bindings.h" #include "ompi/mpi/fortran/base/constants.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILE_LAYER +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak PMPI_GET_COUNT = ompi_get_count_f #pragma weak pmpi_get_count = ompi_get_count_f #pragma weak pmpi_get_count_ = ompi_get_count_f @@ -30,7 +33,7 @@ #pragma weak PMPI_Get_count_f = ompi_get_count_f #pragma weak PMPI_Get_count_f08 = ompi_get_count_f -#elif OMPI_PROFILE_LAYER +#else OMPI_GENERATE_F77_BINDINGS (PMPI_GET_COUNT, pmpi_get_count, pmpi_get_count_, @@ -39,6 +42,7 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_GET_COUNT, (MPI_Fint *status, MPI_Fint *datatype, MPI_Fint *count, MPI_Fint *ierr), (status, datatype, count, ierr) ) #endif +#endif #if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_GET_COUNT = ompi_get_count_f @@ -48,9 +52,8 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_GET_COUNT, #pragma weak MPI_Get_count_f = ompi_get_count_f #pragma weak MPI_Get_count_f08 = ompi_get_count_f -#endif - -#if ! OPAL_HAVE_WEAK_SYMBOLS && ! OMPI_PROFILE_LAYER +#else +#if ! OMPI_BUILD_MPI_PROFILING OMPI_GENERATE_F77_BINDINGS (MPI_GET_COUNT, mpi_get_count, mpi_get_count_, @@ -58,17 +61,16 @@ OMPI_GENERATE_F77_BINDINGS (MPI_GET_COUNT, ompi_get_count_f, (MPI_Fint *status, MPI_Fint *datatype, MPI_Fint *count, MPI_Fint *ierr), (status, datatype, count, ierr) ) +#else +#define ompi_get_count_f pompi_get_count_f #endif - - -#if OMPI_PROFILE_LAYER && ! OPAL_HAVE_WEAK_SYMBOLS -#include "ompi/mpi/fortran/mpif-h/profile/defines.h" #endif + void ompi_get_count_f(MPI_Fint *status, MPI_Fint *datatype, MPI_Fint *count, MPI_Fint *ierr) { int c_ierr; - MPI_Datatype c_type = MPI_Type_f2c(*datatype); + MPI_Datatype c_type = PMPI_Type_f2c(*datatype); MPI_Status c_status; OMPI_SINGLE_NAME_DECL(count); @@ -76,10 +78,10 @@ void ompi_get_count_f(MPI_Fint *status, MPI_Fint *datatype, MPI_Fint *count, MPI *count = OMPI_INT_2_FINT(0); c_ierr = MPI_SUCCESS; } else { - c_ierr = MPI_Status_f2c(status, &c_status); + c_ierr = PMPI_Status_f2c(status, &c_status); if (MPI_SUCCESS == c_ierr) { - c_ierr = MPI_Get_count(&c_status, c_type, + c_ierr = PMPI_Get_count(&c_status, c_type, OMPI_SINGLE_NAME_CONVERT(count)); OMPI_SINGLE_INT_2_FINT(count); } diff --git a/ompi/mpi/fortran/mpif-h/get_elements_f.c b/ompi/mpi/fortran/mpif-h/get_elements_f.c index de05ea81e61..7e3b1e0676a 100644 --- a/ompi/mpi/fortran/mpif-h/get_elements_f.c +++ b/ompi/mpi/fortran/mpif-h/get_elements_f.c @@ -5,15 +5,17 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2011-2012 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -22,7 +24,8 @@ #include "ompi/mpi/fortran/mpif-h/bindings.h" #include "ompi/mpi/fortran/base/constants.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILE_LAYER +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak PMPI_GET_ELEMENTS = ompi_get_elements_f #pragma weak pmpi_get_elements = ompi_get_elements_f #pragma weak pmpi_get_elements_ = ompi_get_elements_f @@ -30,7 +33,7 @@ #pragma weak PMPI_Get_elements_f = ompi_get_elements_f #pragma weak PMPI_Get_elements_f08 = ompi_get_elements_f -#elif OMPI_PROFILE_LAYER +#else OMPI_GENERATE_F77_BINDINGS (PMPI_GET_ELEMENTS, pmpi_get_elements, pmpi_get_elements_, @@ -39,6 +42,7 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_GET_ELEMENTS, (MPI_Fint *status, MPI_Fint *datatype, MPI_Fint *count, MPI_Fint *ierr), (status, datatype, count, ierr) ) #endif +#endif #if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_GET_ELEMENTS = ompi_get_elements_f @@ -48,9 +52,8 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_GET_ELEMENTS, #pragma weak MPI_Get_elements_f = ompi_get_elements_f #pragma weak MPI_Get_elements_f08 = ompi_get_elements_f -#endif - -#if ! OPAL_HAVE_WEAK_SYMBOLS && ! OMPI_PROFILE_LAYER +#else +#if ! OMPI_BUILD_MPI_PROFILING OMPI_GENERATE_F77_BINDINGS (MPI_GET_ELEMENTS, mpi_get_elements, mpi_get_elements_, @@ -58,17 +61,16 @@ OMPI_GENERATE_F77_BINDINGS (MPI_GET_ELEMENTS, ompi_get_elements_f, (MPI_Fint *status, MPI_Fint *datatype, MPI_Fint *count, MPI_Fint *ierr), (status, datatype, count, ierr) ) +#else +#define ompi_get_elements_f pompi_get_elements_f #endif - - -#if OMPI_PROFILE_LAYER && ! OPAL_HAVE_WEAK_SYMBOLS -#include "ompi/mpi/fortran/mpif-h/profile/defines.h" #endif + void ompi_get_elements_f(MPI_Fint *status, MPI_Fint *datatype, MPI_Fint *count, MPI_Fint *ierr) { int c_ierr; - MPI_Datatype c_type = MPI_Type_f2c(*datatype); + MPI_Datatype c_type = PMPI_Type_f2c(*datatype); MPI_Status c_status; OMPI_SINGLE_NAME_DECL(count); @@ -76,10 +78,10 @@ void ompi_get_elements_f(MPI_Fint *status, MPI_Fint *datatype, MPI_Fint *count, *count = OMPI_INT_2_FINT(0); c_ierr = MPI_SUCCESS; } else { - c_ierr = MPI_Status_f2c(status, &c_status); + c_ierr = PMPI_Status_f2c(status, &c_status); if (MPI_SUCCESS == c_ierr) { - c_ierr = MPI_Get_elements(&c_status, c_type, + c_ierr = PMPI_Get_elements(&c_status, c_type, OMPI_SINGLE_NAME_CONVERT(count)); OMPI_SINGLE_INT_2_FINT(count); } diff --git a/ompi/mpi/fortran/mpif-h/get_elements_x_f.c b/ompi/mpi/fortran/mpif-h/get_elements_x_f.c index 9e25b4fb5d6..10480b6c7a4 100644 --- a/ompi/mpi/fortran/mpif-h/get_elements_x_f.c +++ b/ompi/mpi/fortran/mpif-h/get_elements_x_f.c @@ -5,17 +5,19 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2011-2013 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2013 Los Alamos National Security, LLC. All rights * reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -24,7 +26,8 @@ #include "ompi/mpi/fortran/mpif-h/bindings.h" #include "ompi/mpi/fortran/base/constants.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILE_LAYER +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak PMPI_GET_ELEMENTS_X = ompi_get_elements_x_f #pragma weak pmpi_get_elements_x = ompi_get_elements_x_f #pragma weak pmpi_get_elements_x_ = ompi_get_elements_x_f @@ -32,7 +35,7 @@ #pragma weak PMPI_Get_elements_x_f = ompi_get_elements_x_f #pragma weak PMPI_Get_elements_x_f08 = ompi_get_elements_x_f -#elif OMPI_PROFILE_LAYER +#else OMPI_GENERATE_F77_BINDINGS (PMPI_GET_ELEMENTS_X, pmpi_get_elements_x, pmpi_get_elements_x_, @@ -41,6 +44,7 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_GET_ELEMENTS_X, (MPI_Fint *status, MPI_Fint *datatype, MPI_Count *count, MPI_Fint *ierr), (status, datatype, count, ierr) ) #endif +#endif #if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_GET_ELEMENTS_X = ompi_get_elements_x_f @@ -50,9 +54,8 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_GET_ELEMENTS_X, #pragma weak MPI_Get_elements_x_f = ompi_get_elements_x_f #pragma weak MPI_Get_elements_x_f08 = ompi_get_elements_x_f -#endif - -#if ! OPAL_HAVE_WEAK_SYMBOLS && ! OMPI_PROFILE_LAYER +#else +#if ! OMPI_BUILD_MPI_PROFILING OMPI_GENERATE_F77_BINDINGS (MPI_GET_ELEMENTS_X, mpi_get_elements_x, mpi_get_elements_x_, @@ -60,17 +63,16 @@ OMPI_GENERATE_F77_BINDINGS (MPI_GET_ELEMENTS_X, ompi_get_elements_x_f, (MPI_Fint *status, MPI_Fint *datatype, MPI_Count *count, MPI_Fint *ierr), (status, datatype, count, ierr) ) +#else +#define ompi_get_elements_x_f pompi_get_elements_x_f #endif - - -#if OMPI_PROFILE_LAYER && ! OPAL_HAVE_WEAK_SYMBOLS -#include "ompi/mpi/fortran/mpif-h/profile/defines.h" #endif + void ompi_get_elements_x_f(MPI_Fint *status, MPI_Fint *datatype, MPI_Count *count, MPI_Fint *ierr) { int c_ierr; - MPI_Datatype c_type = MPI_Type_f2c(*datatype); + MPI_Datatype c_type = PMPI_Type_f2c(*datatype); MPI_Status c_status; OMPI_SINGLE_NAME_DECL(count); @@ -78,10 +80,10 @@ void ompi_get_elements_x_f(MPI_Fint *status, MPI_Fint *datatype, MPI_Count *coun *count = OMPI_INT_2_FINT(0); c_ierr = MPI_SUCCESS; } else { - c_ierr = MPI_Status_f2c(status, &c_status); + c_ierr = PMPI_Status_f2c(status, &c_status); if (MPI_SUCCESS == c_ierr) { - c_ierr = MPI_Get_elements_x(&c_status, c_type, count); + c_ierr = PMPI_Get_elements_x(&c_status, c_type, count); } } if (NULL != ierr) *ierr = OMPI_INT_2_FINT(c_ierr); diff --git a/ompi/mpi/fortran/mpif-h/get_f.c b/ompi/mpi/fortran/mpif-h/get_f.c index afdf0e1bf35..0442adb346a 100644 --- a/ompi/mpi/fortran/mpif-h/get_f.c +++ b/ompi/mpi/fortran/mpif-h/get_f.c @@ -5,15 +5,17 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2011-2012 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -23,7 +25,8 @@ #include "ompi/mpi/fortran/base/constants.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILE_LAYER +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak PMPI_GET = ompi_get_f #pragma weak pmpi_get = ompi_get_f #pragma weak pmpi_get_ = ompi_get_f @@ -31,7 +34,7 @@ #pragma weak PMPI_Get_f = ompi_get_f #pragma weak PMPI_Get_f08 = ompi_get_f -#elif OMPI_PROFILE_LAYER +#else OMPI_GENERATE_F77_BINDINGS (PMPI_GET, pmpi_get, pmpi_get_, @@ -40,6 +43,7 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_GET, (char *origin_addr, MPI_Fint *origin_count, MPI_Fint *origin_datatype, MPI_Fint *target_rank, MPI_Aint *target_disp, MPI_Fint *target_count, MPI_Fint *target_datatype, MPI_Fint *win, MPI_Fint *ierr), (origin_addr, origin_count, origin_datatype, target_rank, target_disp, target_count, target_datatype, win, ierr) ) #endif +#endif #if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_GET = ompi_get_f @@ -49,9 +53,8 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_GET, #pragma weak MPI_Get_f = ompi_get_f #pragma weak MPI_Get_f08 = ompi_get_f -#endif - -#if ! OPAL_HAVE_WEAK_SYMBOLS && ! OMPI_PROFILE_LAYER +#else +#if ! OMPI_BUILD_MPI_PROFILING OMPI_GENERATE_F77_BINDINGS (MPI_GET, mpi_get, mpi_get_, @@ -59,28 +62,27 @@ OMPI_GENERATE_F77_BINDINGS (MPI_GET, ompi_get_f, (char *origin_addr, MPI_Fint *origin_count, MPI_Fint *origin_datatype, MPI_Fint *target_rank, MPI_Aint *target_disp, MPI_Fint *target_count, MPI_Fint *target_datatype, MPI_Fint *win, MPI_Fint *ierr), (origin_addr, origin_count, origin_datatype, target_rank, target_disp, target_count, target_datatype, win, ierr) ) +#else +#define ompi_get_f pompi_get_f #endif - - -#if OMPI_PROFILE_LAYER && ! OPAL_HAVE_WEAK_SYMBOLS -#include "ompi/mpi/fortran/mpif-h/profile/defines.h" #endif + void ompi_get_f(char *origin_addr, MPI_Fint *origin_count, MPI_Fint *origin_datatype, MPI_Fint *target_rank, MPI_Aint *target_disp, MPI_Fint *target_count, MPI_Fint *target_datatype, MPI_Fint *win, MPI_Fint *ierr) { int c_ierr; - MPI_Datatype c_origin_datatype = MPI_Type_f2c(*origin_datatype); - MPI_Datatype c_target_datatype = MPI_Type_f2c(*target_datatype); - MPI_Win c_win = MPI_Win_f2c(*win); + MPI_Datatype c_origin_datatype = PMPI_Type_f2c(*origin_datatype); + MPI_Datatype c_target_datatype = PMPI_Type_f2c(*target_datatype); + MPI_Win c_win = PMPI_Win_f2c(*win); - c_ierr = MPI_Get(OMPI_F2C_BOTTOM(origin_addr), + c_ierr = PMPI_Get(OMPI_F2C_BOTTOM(origin_addr), OMPI_FINT_2_INT(*origin_count), c_origin_datatype, OMPI_FINT_2_INT(*target_rank), - *target_disp, + *target_disp, OMPI_FINT_2_INT(*target_count), c_target_datatype, c_win); if (NULL != ierr) *ierr = OMPI_INT_2_FINT(c_ierr); diff --git a/ompi/mpi/fortran/mpif-h/get_library_version_f.c b/ompi/mpi/fortran/mpif-h/get_library_version_f.c index dae22b97be2..a10966a0d25 100644 --- a/ompi/mpi/fortran/mpif-h/get_library_version_f.c +++ b/ompi/mpi/fortran/mpif-h/get_library_version_f.c @@ -5,15 +5,17 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2012 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -22,7 +24,8 @@ #include "ompi/mpi/fortran/mpif-h/bindings.h" #include "ompi/mpi/fortran/base/strings.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILE_LAYER +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak PMPI_GET_LIBRARY_VERSION = ompi_get_library_version_f #pragma weak pmpi_get_library_version = ompi_get_library_version_f #pragma weak pmpi_get_library_version_ = ompi_get_library_version_f @@ -30,7 +33,7 @@ #pragma weak PMPI_Get_library_version_f = ompi_get_library_version_f #pragma weak PMPI_Get_library_version_f08 = ompi_get_library_version_f -#elif OMPI_PROFILE_LAYER +#else OMPI_GENERATE_F77_BINDINGS (PMPI_GET_LIBRARY_VERSION, pmpi_get_library_version, pmpi_get_library_version_, @@ -39,6 +42,7 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_GET_LIBRARY_VERSION, (char *version, MPI_Fint *resultlen, MPI_Fint *ierr, MPI_Fint version_len), (version, resultlen, ierr, version_len) ) #endif +#endif #if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_GET_LIBRARY_VERSION = ompi_get_library_version_f @@ -48,9 +52,8 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_GET_LIBRARY_VERSION, #pragma weak MPI_Get_library_version_f = ompi_get_library_version_f #pragma weak MPI_Get_library_version_f08 = ompi_get_library_version_f -#endif - -#if ! OPAL_HAVE_WEAK_SYMBOLS && ! OMPI_PROFILE_LAYER +#else +#if ! OMPI_BUILD_MPI_PROFILING OMPI_GENERATE_F77_BINDINGS (MPI_GET_LIBRARY_VERSION, mpi_get_library_version, mpi_get_library_version_, @@ -58,24 +61,23 @@ OMPI_GENERATE_F77_BINDINGS (MPI_GET_LIBRARY_VERSION, ompi_get_library_version_f, (char *version, MPI_Fint *resultlen, MPI_Fint *ierr, MPI_Fint version_len), (version, resultlen, ierr, version_len) ) +#else +#define ompi_get_library_version_f pompi_get_library_version_f #endif - - -#if OMPI_PROFILE_LAYER && ! OPAL_HAVE_WEAK_SYMBOLS -#include "ompi/mpi/fortran/mpif-h/profile/defines.h" #endif + void ompi_get_library_version_f(char *version, MPI_Fint *resultlen, MPI_Fint *ierr, MPI_Fint version_len) { int c_ierr, c_resultlen; char c_version[MPI_MAX_LIBRARY_VERSION_STRING]; - c_ierr = MPI_Get_library_version(c_version, &c_resultlen); + c_ierr = PMPI_Get_library_version(c_version, &c_resultlen); if (NULL != ierr) *ierr = OMPI_INT_2_FINT(c_ierr); if (MPI_SUCCESS == c_ierr) { - ompi_fortran_string_c2f(c_version, version, + ompi_fortran_string_c2f(c_version, version, OMPI_FINT_2_INT(version_len)); *resultlen = OMPI_INT_2_FINT(c_resultlen); } diff --git a/ompi/mpi/fortran/mpif-h/get_processor_name_f.c b/ompi/mpi/fortran/mpif-h/get_processor_name_f.c index 2feb797c7c0..1f36f671eec 100644 --- a/ompi/mpi/fortran/mpif-h/get_processor_name_f.c +++ b/ompi/mpi/fortran/mpif-h/get_processor_name_f.c @@ -5,15 +5,17 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2006-2012 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -24,7 +26,8 @@ #include "ompi/communicator/communicator.h" #include "ompi/mpi/fortran/base/strings.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILE_LAYER +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak PMPI_GET_PROCESSOR_NAME = ompi_get_processor_name_f #pragma weak pmpi_get_processor_name = ompi_get_processor_name_f #pragma weak pmpi_get_processor_name_ = ompi_get_processor_name_f @@ -32,7 +35,7 @@ #pragma weak PMPI_Get_processor_name_f = ompi_get_processor_name_f #pragma weak PMPI_Get_processor_name_f08 = ompi_get_processor_name_f -#elif OMPI_PROFILE_LAYER +#else OMPI_GENERATE_F77_BINDINGS (PMPI_GET_PROCESSOR_NAME, pmpi_get_processor_name, pmpi_get_processor_name_, @@ -41,6 +44,7 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_GET_PROCESSOR_NAME, (char *name, MPI_Fint *resultlen, MPI_Fint *ierr, int name_len), (name, resultlen, ierr, name_len) ) #endif +#endif #if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_GET_PROCESSOR_NAME = ompi_get_processor_name_f @@ -50,9 +54,8 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_GET_PROCESSOR_NAME, #pragma weak MPI_Get_processor_name_f = ompi_get_processor_name_f #pragma weak MPI_Get_processor_name_f08 = ompi_get_processor_name_f -#endif - -#if ! OPAL_HAVE_WEAK_SYMBOLS && ! OMPI_PROFILE_LAYER +#else +#if ! OMPI_BUILD_MPI_PROFILING OMPI_GENERATE_F77_BINDINGS (MPI_GET_PROCESSOR_NAME, mpi_get_processor_name, mpi_get_processor_name_, @@ -60,13 +63,12 @@ OMPI_GENERATE_F77_BINDINGS (MPI_GET_PROCESSOR_NAME, ompi_get_processor_name_f, (char *name, MPI_Fint *resultlen, MPI_Fint *ierr, int name_len), (name, resultlen, ierr, name_len) ) +#else +#define ompi_get_processor_name_f pompi_get_processor_name_f #endif - - -#if OMPI_PROFILE_LAYER && ! OPAL_HAVE_WEAK_SYMBOLS -#include "ompi/mpi/fortran/mpif-h/profile/defines.h" #endif + static const char FUNC_NAME[] = "MPI_GET_PROCESSOR_NAME"; /* Note that the name_len parameter is silently added by the Fortran @@ -81,8 +83,8 @@ void ompi_get_processor_name_f(char *name, MPI_Fint *resultlen, MPI_Fint *ierr, char c_name[MPI_MAX_PROCESSOR_NAME]; OMPI_SINGLE_NAME_DECL(resultlen); - ierr_c = MPI_Get_processor_name(c_name, - OMPI_SINGLE_NAME_CONVERT(resultlen)); + ierr_c = PMPI_Get_processor_name(c_name, + OMPI_SINGLE_NAME_CONVERT(resultlen)); if (MPI_SUCCESS == ierr_c) { OMPI_SINGLE_INT_2_FINT(resultlen); diff --git a/ompi/mpi/fortran/mpif-h/get_version_f.c b/ompi/mpi/fortran/mpif-h/get_version_f.c index d15c29e46ac..07b4f76f442 100644 --- a/ompi/mpi/fortran/mpif-h/get_version_f.c +++ b/ompi/mpi/fortran/mpif-h/get_version_f.c @@ -5,15 +5,17 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2011-2012 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -21,7 +23,8 @@ #include "ompi/mpi/fortran/mpif-h/bindings.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILE_LAYER +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak PMPI_GET_VERSION = ompi_get_version_f #pragma weak pmpi_get_version = ompi_get_version_f #pragma weak pmpi_get_version_ = ompi_get_version_f @@ -29,7 +32,7 @@ #pragma weak PMPI_Get_version_f = ompi_get_version_f #pragma weak PMPI_Get_version_f08 = ompi_get_version_f -#elif OMPI_PROFILE_LAYER +#else OMPI_GENERATE_F77_BINDINGS (PMPI_GET_VERSION, pmpi_get_version, pmpi_get_version_, @@ -38,6 +41,7 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_GET_VERSION, (MPI_Fint *version, MPI_Fint *subversion, MPI_Fint *ierr), (version, subversion, ierr) ) #endif +#endif #if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_GET_VERSION = ompi_get_version_f @@ -47,9 +51,8 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_GET_VERSION, #pragma weak MPI_Get_version_f = ompi_get_version_f #pragma weak MPI_Get_version_f08 = ompi_get_version_f -#endif - -#if ! OPAL_HAVE_WEAK_SYMBOLS && ! OMPI_PROFILE_LAYER +#else +#if ! OMPI_BUILD_MPI_PROFILING OMPI_GENERATE_F77_BINDINGS (MPI_GET_VERSION, mpi_get_version, mpi_get_version_, @@ -57,20 +60,19 @@ OMPI_GENERATE_F77_BINDINGS (MPI_GET_VERSION, ompi_get_version_f, (MPI_Fint *version, MPI_Fint *subversion, MPI_Fint *ierr), (version, subversion, ierr) ) +#else +#define ompi_get_version_f pompi_get_version_f #endif - - -#if OMPI_PROFILE_LAYER && ! OPAL_HAVE_WEAK_SYMBOLS -#include "ompi/mpi/fortran/mpif-h/profile/defines.h" #endif + void ompi_get_version_f(MPI_Fint *version, MPI_Fint *subversion, MPI_Fint *ierr) { int c_ierr; OMPI_SINGLE_NAME_DECL(version); OMPI_SINGLE_NAME_DECL(subversion); - c_ierr = MPI_Get_version(OMPI_SINGLE_NAME_CONVERT(version), + c_ierr = PMPI_Get_version(OMPI_SINGLE_NAME_CONVERT(version), OMPI_SINGLE_NAME_CONVERT(subversion)); if (NULL != ierr) *ierr = OMPI_INT_2_FINT(c_ierr); diff --git a/ompi/mpi/fortran/mpif-h/graph_create_f.c b/ompi/mpi/fortran/mpif-h/graph_create_f.c index 35631475fc4..70f487b0e6b 100644 --- a/ompi/mpi/fortran/mpif-h/graph_create_f.c +++ b/ompi/mpi/fortran/mpif-h/graph_create_f.c @@ -5,15 +5,17 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2011-2012 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -21,7 +23,8 @@ #include "ompi/mpi/fortran/mpif-h/bindings.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILE_LAYER +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak PMPI_GRAPH_CREATE = ompi_graph_create_f #pragma weak pmpi_graph_create = ompi_graph_create_f #pragma weak pmpi_graph_create_ = ompi_graph_create_f @@ -29,7 +32,7 @@ #pragma weak PMPI_Graph_create_f = ompi_graph_create_f #pragma weak PMPI_Graph_create_f08 = ompi_graph_create_f -#elif OMPI_PROFILE_LAYER +#else OMPI_GENERATE_F77_BINDINGS (PMPI_GRAPH_CREATE, pmpi_graph_create, pmpi_graph_create_, @@ -38,6 +41,7 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_GRAPH_CREATE, (MPI_Fint *comm_old, MPI_Fint *nnodes, MPI_Fint *indx, MPI_Fint *edges, ompi_fortran_logical_t *reorder, MPI_Fint *comm_graph, MPI_Fint *ierr), (comm_old, nnodes, indx, edges, reorder, comm_graph, ierr) ) #endif +#endif #if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_GRAPH_CREATE = ompi_graph_create_f @@ -47,9 +51,8 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_GRAPH_CREATE, #pragma weak MPI_Graph_create_f = ompi_graph_create_f #pragma weak MPI_Graph_create_f08 = ompi_graph_create_f -#endif - -#if ! OPAL_HAVE_WEAK_SYMBOLS && ! OMPI_PROFILE_LAYER +#else +#if ! OMPI_BUILD_MPI_PROFILING OMPI_GENERATE_F77_BINDINGS (MPI_GRAPH_CREATE, mpi_graph_create, mpi_graph_create_, @@ -57,13 +60,12 @@ OMPI_GENERATE_F77_BINDINGS (MPI_GRAPH_CREATE, ompi_graph_create_f, (MPI_Fint *comm_old, MPI_Fint *nnodes, MPI_Fint *indx, MPI_Fint *edges, ompi_fortran_logical_t *reorder, MPI_Fint *comm_graph, MPI_Fint *ierr), (comm_old, nnodes, indx, edges, reorder, comm_graph, ierr) ) +#else +#define ompi_graph_create_f pompi_graph_create_f #endif - - -#if OMPI_PROFILE_LAYER && ! OPAL_HAVE_WEAK_SYMBOLS -#include "ompi/mpi/fortran/mpif-h/profile/defines.h" #endif + void ompi_graph_create_f(MPI_Fint *comm_old, MPI_Fint *nnodes, MPI_Fint *indx, MPI_Fint *edges, ompi_fortran_logical_t *reorder, MPI_Fint *comm_graph, @@ -74,14 +76,14 @@ void ompi_graph_create_f(MPI_Fint *comm_old, MPI_Fint *nnodes, OMPI_ARRAY_NAME_DECL(indx); OMPI_ARRAY_NAME_DECL(edges); - c_comm_old = MPI_Comm_f2c(*comm_old); + c_comm_old = PMPI_Comm_f2c(*comm_old); OMPI_ARRAY_FINT_2_INT(indx, *nnodes); /* Number of edges is equal to the last entry in the index array */ OMPI_ARRAY_FINT_2_INT(edges, indx[*nnodes - 1]); - c_ierr = MPI_Graph_create(c_comm_old, + c_ierr = PMPI_Graph_create(c_comm_old, OMPI_FINT_2_INT(*nnodes), OMPI_ARRAY_NAME_CONVERT(indx), OMPI_ARRAY_NAME_CONVERT(edges), @@ -90,7 +92,7 @@ void ompi_graph_create_f(MPI_Fint *comm_old, MPI_Fint *nnodes, if (NULL != ierr) *ierr = OMPI_INT_2_FINT(c_ierr); if (OMPI_SUCCESS == c_ierr) { - *comm_graph = MPI_Comm_c2f(c_comm_graph); + *comm_graph = PMPI_Comm_c2f(c_comm_graph); } OMPI_ARRAY_FINT_2_INT_CLEANUP(indx); diff --git a/ompi/mpi/fortran/mpif-h/graph_get_f.c b/ompi/mpi/fortran/mpif-h/graph_get_f.c index 247d82b3854..afea6ac5b15 100644 --- a/ompi/mpi/fortran/mpif-h/graph_get_f.c +++ b/ompi/mpi/fortran/mpif-h/graph_get_f.c @@ -5,15 +5,17 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2011-2012 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -21,7 +23,8 @@ #include "ompi/mpi/fortran/mpif-h/bindings.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILE_LAYER +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak PMPI_GRAPH_GET = ompi_graph_get_f #pragma weak pmpi_graph_get = ompi_graph_get_f #pragma weak pmpi_graph_get_ = ompi_graph_get_f @@ -29,7 +32,7 @@ #pragma weak PMPI_Graph_get_f = ompi_graph_get_f #pragma weak PMPI_Graph_get_f08 = ompi_graph_get_f -#elif OMPI_PROFILE_LAYER +#else OMPI_GENERATE_F77_BINDINGS (PMPI_GRAPH_GET, pmpi_graph_get, pmpi_graph_get_, @@ -38,6 +41,7 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_GRAPH_GET, (MPI_Fint *comm, MPI_Fint *maxindex, MPI_Fint *maxedges, MPI_Fint *indx, MPI_Fint *edges, MPI_Fint *ierr), (comm, maxindex, maxedges, indx, edges, ierr) ) #endif +#endif #if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_GRAPH_GET = ompi_graph_get_f @@ -47,9 +51,8 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_GRAPH_GET, #pragma weak MPI_Graph_get_f = ompi_graph_get_f #pragma weak MPI_Graph_get_f08 = ompi_graph_get_f -#endif - -#if ! OPAL_HAVE_WEAK_SYMBOLS && ! OMPI_PROFILE_LAYER +#else +#if ! OMPI_BUILD_MPI_PROFILING OMPI_GENERATE_F77_BINDINGS (MPI_GRAPH_GET, mpi_graph_get, mpi_graph_get_, @@ -57,15 +60,14 @@ OMPI_GENERATE_F77_BINDINGS (MPI_GRAPH_GET, ompi_graph_get_f, (MPI_Fint *comm, MPI_Fint *maxindex, MPI_Fint *maxedges, MPI_Fint *indx, MPI_Fint *edges, MPI_Fint *ierr), (comm, maxindex, maxedges, indx, edges, ierr) ) +#else +#define ompi_graph_get_f pompi_graph_get_f #endif - - -#if OMPI_PROFILE_LAYER && ! OPAL_HAVE_WEAK_SYMBOLS -#include "ompi/mpi/fortran/mpif-h/profile/defines.h" #endif -void ompi_graph_get_f(MPI_Fint *comm, MPI_Fint *maxindex, - MPI_Fint *maxedges, MPI_Fint *indx, + +void ompi_graph_get_f(MPI_Fint *comm, MPI_Fint *maxindex, + MPI_Fint *maxedges, MPI_Fint *indx, MPI_Fint *edges, MPI_Fint *ierr) { int c_ierr; @@ -73,11 +75,11 @@ void ompi_graph_get_f(MPI_Fint *comm, MPI_Fint *maxindex, OMPI_ARRAY_NAME_DECL(indx); OMPI_ARRAY_NAME_DECL(edges); - c_comm = MPI_Comm_f2c(*comm); + c_comm = PMPI_Comm_f2c(*comm); OMPI_ARRAY_FINT_2_INT_ALLOC(indx, *maxindex); OMPI_ARRAY_FINT_2_INT_ALLOC(edges, *maxedges); - c_ierr = MPI_Graph_get(c_comm, + c_ierr = PMPI_Graph_get(c_comm, OMPI_FINT_2_INT(*maxindex), OMPI_FINT_2_INT(*maxedges), OMPI_ARRAY_NAME_CONVERT(indx), diff --git a/ompi/mpi/fortran/mpif-h/graph_map_f.c b/ompi/mpi/fortran/mpif-h/graph_map_f.c index 4ab56eac402..8905820bdd0 100644 --- a/ompi/mpi/fortran/mpif-h/graph_map_f.c +++ b/ompi/mpi/fortran/mpif-h/graph_map_f.c @@ -5,15 +5,17 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2011-2012 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -21,7 +23,8 @@ #include "ompi/mpi/fortran/mpif-h/bindings.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILE_LAYER +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak PMPI_GRAPH_MAP = ompi_graph_map_f #pragma weak pmpi_graph_map = ompi_graph_map_f #pragma weak pmpi_graph_map_ = ompi_graph_map_f @@ -29,7 +32,7 @@ #pragma weak PMPI_Graph_map_f = ompi_graph_map_f #pragma weak PMPI_Graph_map_f08 = ompi_graph_map_f -#elif OMPI_PROFILE_LAYER +#else OMPI_GENERATE_F77_BINDINGS (PMPI_GRAPH_MAP, pmpi_graph_map, pmpi_graph_map_, @@ -38,6 +41,7 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_GRAPH_MAP, (MPI_Fint *comm, MPI_Fint *nnodes, MPI_Fint *indx, MPI_Fint *edges, MPI_Fint *newrank, MPI_Fint *ierr), (comm, nnodes, indx, edges, newrank, ierr) ) #endif +#endif #if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_GRAPH_MAP = ompi_graph_map_f @@ -47,9 +51,8 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_GRAPH_MAP, #pragma weak MPI_Graph_map_f = ompi_graph_map_f #pragma weak MPI_Graph_map_f08 = ompi_graph_map_f -#endif - -#if ! OPAL_HAVE_WEAK_SYMBOLS && ! OMPI_PROFILE_LAYER +#else +#if ! OMPI_BUILD_MPI_PROFILING OMPI_GENERATE_F77_BINDINGS (MPI_GRAPH_MAP, mpi_graph_map, mpi_graph_map_, @@ -57,13 +60,12 @@ OMPI_GENERATE_F77_BINDINGS (MPI_GRAPH_MAP, ompi_graph_map_f, (MPI_Fint *comm, MPI_Fint *nnodes, MPI_Fint *indx, MPI_Fint *edges, MPI_Fint *newrank, MPI_Fint *ierr), (comm, nnodes, indx, edges, newrank, ierr) ) +#else +#define ompi_graph_map_f pompi_graph_map_f #endif - - -#if OMPI_PROFILE_LAYER && ! OPAL_HAVE_WEAK_SYMBOLS -#include "ompi/mpi/fortran/mpif-h/profile/defines.h" #endif + void ompi_graph_map_f(MPI_Fint *comm, MPI_Fint *nnodes, MPI_Fint *indx, MPI_Fint *edges, MPI_Fint *nrank, MPI_Fint *ierr) { @@ -73,13 +75,13 @@ void ompi_graph_map_f(MPI_Fint *comm, MPI_Fint *nnodes, MPI_Fint *indx, OMPI_ARRAY_NAME_DECL(edges); OMPI_SINGLE_NAME_DECL(nrank); - c_comm = MPI_Comm_f2c(*comm); + c_comm = PMPI_Comm_f2c(*comm); /* Number of edges is equal to the last entry in the index array */ OMPI_ARRAY_FINT_2_INT(edges, indx[*nnodes - 1]); OMPI_ARRAY_FINT_2_INT(indx, *nnodes); - c_ierr = MPI_Graph_map(c_comm, OMPI_FINT_2_INT(*nnodes), + c_ierr = PMPI_Graph_map(c_comm, OMPI_FINT_2_INT(*nnodes), OMPI_ARRAY_NAME_CONVERT(indx), OMPI_ARRAY_NAME_CONVERT(edges), OMPI_SINGLE_NAME_CONVERT(nrank)); diff --git a/ompi/mpi/fortran/mpif-h/graph_neighbors_count_f.c b/ompi/mpi/fortran/mpif-h/graph_neighbors_count_f.c index 5bf7fa0c654..d23292a8128 100644 --- a/ompi/mpi/fortran/mpif-h/graph_neighbors_count_f.c +++ b/ompi/mpi/fortran/mpif-h/graph_neighbors_count_f.c @@ -5,15 +5,17 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2011-2012 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -21,7 +23,8 @@ #include "ompi/mpi/fortran/mpif-h/bindings.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILE_LAYER +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak PMPI_GRAPH_NEIGHBORS_COUNT = ompi_graph_neighbors_count_f #pragma weak pmpi_graph_neighbors_count = ompi_graph_neighbors_count_f #pragma weak pmpi_graph_neighbors_count_ = ompi_graph_neighbors_count_f @@ -29,7 +32,7 @@ #pragma weak PMPI_Graph_neighbors_count_f = ompi_graph_neighbors_count_f #pragma weak PMPI_Graph_neighbors_count_f08 = ompi_graph_neighbors_count_f -#elif OMPI_PROFILE_LAYER +#else OMPI_GENERATE_F77_BINDINGS (PMPI_GRAPH_NEIGHBORS_COUNT, pmpi_graph_neighbors_count, pmpi_graph_neighbors_count_, @@ -38,6 +41,7 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_GRAPH_NEIGHBORS_COUNT, (MPI_Fint *comm, MPI_Fint *rank, MPI_Fint *nneighbors, MPI_Fint *ierr), (comm, rank, nneighbors, ierr) ) #endif +#endif #if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_GRAPH_NEIGHBORS_COUNT = ompi_graph_neighbors_count_f @@ -47,9 +51,8 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_GRAPH_NEIGHBORS_COUNT, #pragma weak MPI_Graph_neighbors_count_f = ompi_graph_neighbors_count_f #pragma weak MPI_Graph_neighbors_count_f08 = ompi_graph_neighbors_count_f -#endif - -#if ! OPAL_HAVE_WEAK_SYMBOLS && ! OMPI_PROFILE_LAYER +#else +#if ! OMPI_BUILD_MPI_PROFILING OMPI_GENERATE_F77_BINDINGS (MPI_GRAPH_NEIGHBORS_COUNT, mpi_graph_neighbors_count, mpi_graph_neighbors_count_, @@ -57,13 +60,12 @@ OMPI_GENERATE_F77_BINDINGS (MPI_GRAPH_NEIGHBORS_COUNT, ompi_graph_neighbors_count_f, (MPI_Fint *comm, MPI_Fint *rank, MPI_Fint *nneighbors, MPI_Fint *ierr), (comm, rank, nneighbors, ierr) ) +#else +#define ompi_graph_neighbors_count_f pompi_graph_neighbors_count_f #endif - - -#if OMPI_PROFILE_LAYER && ! OPAL_HAVE_WEAK_SYMBOLS -#include "ompi/mpi/fortran/mpif-h/profile/defines.h" #endif + void ompi_graph_neighbors_count_f(MPI_Fint *comm, MPI_Fint *rank, MPI_Fint *nneighbors, MPI_Fint *ierr) { @@ -71,9 +73,9 @@ void ompi_graph_neighbors_count_f(MPI_Fint *comm, MPI_Fint *rank, MPI_Comm c_comm; OMPI_SINGLE_NAME_DECL(nneighbors); - c_comm = MPI_Comm_f2c(*comm); - - c_ierr = MPI_Graph_neighbors_count(c_comm, + c_comm = PMPI_Comm_f2c(*comm); + + c_ierr = PMPI_Graph_neighbors_count(c_comm, OMPI_FINT_2_INT(*rank), OMPI_SINGLE_NAME_CONVERT(nneighbors)); if (NULL != ierr) *ierr = OMPI_INT_2_FINT(c_ierr); diff --git a/ompi/mpi/fortran/mpif-h/graph_neighbors_f.c b/ompi/mpi/fortran/mpif-h/graph_neighbors_f.c index be78a3b0bb6..e89b9733a85 100644 --- a/ompi/mpi/fortran/mpif-h/graph_neighbors_f.c +++ b/ompi/mpi/fortran/mpif-h/graph_neighbors_f.c @@ -5,17 +5,19 @@ * Copyright (c) 2004-2013 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2011-2012 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2011-2013 Inria. All rights reserved. * Copyright (c) 2011-2013 Universite Bordeaux 1 + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -23,7 +25,8 @@ #include "ompi/mpi/fortran/mpif-h/bindings.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILE_LAYER +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak PMPI_GRAPH_NEIGHBORS = ompi_graph_neighbors_f #pragma weak pmpi_graph_neighbors = ompi_graph_neighbors_f #pragma weak pmpi_graph_neighbors_ = ompi_graph_neighbors_f @@ -31,7 +34,7 @@ #pragma weak PMPI_Graph_neighbors_f = ompi_graph_neighbors_f #pragma weak PMPI_Graph_neighbors_f08 = ompi_graph_neighbors_f -#elif OMPI_PROFILE_LAYER +#else OMPI_GENERATE_F77_BINDINGS (PMPI_GRAPH_NEIGHBORS, pmpi_graph_neighbors, pmpi_graph_neighbors_, @@ -40,6 +43,7 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_GRAPH_NEIGHBORS, (MPI_Fint *comm, MPI_Fint *rank, MPI_Fint *maxneighbors, MPI_Fint *neighbors, MPI_Fint *ierr), (comm, rank, maxneighbors, neighbors, ierr) ) #endif +#endif #if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_GRAPH_NEIGHBORS = ompi_graph_neighbors_f @@ -49,9 +53,8 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_GRAPH_NEIGHBORS, #pragma weak MPI_Graph_neighbors_f = ompi_graph_neighbors_f #pragma weak MPI_Graph_neighbors_f08 = ompi_graph_neighbors_f -#endif - -#if ! OPAL_HAVE_WEAK_SYMBOLS && ! OMPI_PROFILE_LAYER +#else +#if ! OMPI_BUILD_MPI_PROFILING OMPI_GENERATE_F77_BINDINGS (MPI_GRAPH_NEIGHBORS, mpi_graph_neighbors, mpi_graph_neighbors_, @@ -59,26 +62,25 @@ OMPI_GENERATE_F77_BINDINGS (MPI_GRAPH_NEIGHBORS, ompi_graph_neighbors_f, (MPI_Fint *comm, MPI_Fint *rank, MPI_Fint *maxneighbors, MPI_Fint *neighbors, MPI_Fint *ierr), (comm, rank, maxneighbors, neighbors, ierr) ) +#else +#define ompi_graph_neighbors_f pompi_graph_neighbors_f #endif - - -#if OMPI_PROFILE_LAYER && ! OPAL_HAVE_WEAK_SYMBOLS -#include "ompi/mpi/fortran/mpif-h/profile/defines.h" #endif + void ompi_graph_neighbors_f(MPI_Fint *comm, MPI_Fint *rank, - MPI_Fint *maxneighbors, MPI_Fint *neighbors, + MPI_Fint *maxneighbors, MPI_Fint *neighbors, MPI_Fint *ierr) { int c_ierr; MPI_Comm c_comm; OMPI_ARRAY_NAME_DECL(neighbors); - c_comm = MPI_Comm_f2c(*comm); - + c_comm = PMPI_Comm_f2c(*comm); + OMPI_ARRAY_FINT_2_INT_ALLOC(neighbors, *maxneighbors); - - c_ierr = MPI_Graph_neighbors(c_comm, + + c_ierr = PMPI_Graph_neighbors(c_comm, OMPI_FINT_2_INT(*rank), OMPI_FINT_2_INT(*maxneighbors), OMPI_ARRAY_NAME_CONVERT(neighbors) diff --git a/ompi/mpi/fortran/mpif-h/graphdims_get_f.c b/ompi/mpi/fortran/mpif-h/graphdims_get_f.c index 7517aa6752b..f0231612102 100644 --- a/ompi/mpi/fortran/mpif-h/graphdims_get_f.c +++ b/ompi/mpi/fortran/mpif-h/graphdims_get_f.c @@ -5,15 +5,17 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2011-2012 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -21,7 +23,8 @@ #include "ompi/mpi/fortran/mpif-h/bindings.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILE_LAYER +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak PMPI_GRAPHDIMS_GET = ompi_graphdims_get_f #pragma weak pmpi_graphdims_get = ompi_graphdims_get_f #pragma weak pmpi_graphdims_get_ = ompi_graphdims_get_f @@ -29,7 +32,7 @@ #pragma weak PMPI_Graphdims_get_f = ompi_graphdims_get_f #pragma weak PMPI_Graphdims_get_f08 = ompi_graphdims_get_f -#elif OMPI_PROFILE_LAYER +#else OMPI_GENERATE_F77_BINDINGS (PMPI_GRAPHDIMS_GET, pmpi_graphdims_get, pmpi_graphdims_get_, @@ -38,6 +41,7 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_GRAPHDIMS_GET, (MPI_Fint *comm, MPI_Fint *nnodes, MPI_Fint *nedges, MPI_Fint *ierr), (comm, nnodes, nedges, ierr) ) #endif +#endif #if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_GRAPHDIMS_GET = ompi_graphdims_get_f @@ -47,9 +51,8 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_GRAPHDIMS_GET, #pragma weak MPI_Graphdims_get_f = ompi_graphdims_get_f #pragma weak MPI_Graphdims_get_f08 = ompi_graphdims_get_f -#endif - -#if ! OPAL_HAVE_WEAK_SYMBOLS && ! OMPI_PROFILE_LAYER +#else +#if ! OMPI_BUILD_MPI_PROFILING OMPI_GENERATE_F77_BINDINGS (MPI_GRAPHDIMS_GET, mpi_graphdims_get, mpi_graphdims_get_, @@ -57,13 +60,12 @@ OMPI_GENERATE_F77_BINDINGS (MPI_GRAPHDIMS_GET, ompi_graphdims_get_f, (MPI_Fint *comm, MPI_Fint *nnodes, MPI_Fint *nedges, MPI_Fint *ierr), (comm, nnodes, nedges, ierr) ) +#else +#define ompi_graphdims_get_f pompi_graphdims_get_f #endif - - -#if OMPI_PROFILE_LAYER && ! OPAL_HAVE_WEAK_SYMBOLS -#include "ompi/mpi/fortran/mpif-h/profile/defines.h" #endif + void ompi_graphdims_get_f(MPI_Fint *comm, MPI_Fint *nnodes, MPI_Fint *nedges, MPI_Fint *ierr) { @@ -72,9 +74,9 @@ void ompi_graphdims_get_f(MPI_Fint *comm, MPI_Fint *nnodes, OMPI_SINGLE_NAME_DECL(nnodes); OMPI_SINGLE_NAME_DECL(nedges); - c_comm = MPI_Comm_f2c(*comm); + c_comm = PMPI_Comm_f2c(*comm); - c_ierr = MPI_Graphdims_get(c_comm, + c_ierr = PMPI_Graphdims_get(c_comm, OMPI_SINGLE_NAME_CONVERT(nnodes), OMPI_SINGLE_NAME_CONVERT(nedges) ); diff --git a/ompi/mpi/fortran/mpif-h/grequest_complete_f.c b/ompi/mpi/fortran/mpif-h/grequest_complete_f.c index acd6bf1b9ab..075c2e459f6 100644 --- a/ompi/mpi/fortran/mpif-h/grequest_complete_f.c +++ b/ompi/mpi/fortran/mpif-h/grequest_complete_f.c @@ -5,15 +5,17 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2011-2012 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -21,7 +23,8 @@ #include "ompi/mpi/fortran/mpif-h/bindings.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILE_LAYER +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak PMPI_GREQUEST_COMPLETE = ompi_grequest_complete_f #pragma weak pmpi_grequest_complete = ompi_grequest_complete_f #pragma weak pmpi_grequest_complete_ = ompi_grequest_complete_f @@ -29,7 +32,7 @@ #pragma weak PMPI_Grequest_complete_f = ompi_grequest_complete_f #pragma weak PMPI_Grequest_complete_f08 = ompi_grequest_complete_f -#elif OMPI_PROFILE_LAYER +#else OMPI_GENERATE_F77_BINDINGS (PMPI_GREQUEST_COMPLETE, pmpi_grequest_complete, pmpi_grequest_complete_, @@ -38,6 +41,7 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_GREQUEST_COMPLETE, (MPI_Fint *request, MPI_Fint *ierr), (request, ierr) ) #endif +#endif #if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_GREQUEST_COMPLETE = ompi_grequest_complete_f @@ -47,9 +51,8 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_GREQUEST_COMPLETE, #pragma weak MPI_Grequest_complete_f = ompi_grequest_complete_f #pragma weak MPI_Grequest_complete_f08 = ompi_grequest_complete_f -#endif - -#if ! OPAL_HAVE_WEAK_SYMBOLS && ! OMPI_PROFILE_LAYER +#else +#if ! OMPI_BUILD_MPI_PROFILING OMPI_GENERATE_F77_BINDINGS (MPI_GREQUEST_COMPLETE, mpi_grequest_complete, mpi_grequest_complete_, @@ -57,18 +60,17 @@ OMPI_GENERATE_F77_BINDINGS (MPI_GREQUEST_COMPLETE, ompi_grequest_complete_f, (MPI_Fint *request, MPI_Fint *ierr), (request, ierr) ) +#else +#define ompi_grequest_complete_f pompi_grequest_complete_f #endif - - -#if OMPI_PROFILE_LAYER && ! OPAL_HAVE_WEAK_SYMBOLS -#include "ompi/mpi/fortran/mpif-h/profile/defines.h" #endif + void ompi_grequest_complete_f(MPI_Fint *request, MPI_Fint *ierr) { int c_ierr; - MPI_Request c_req = MPI_Request_f2c(*request); + MPI_Request c_req = PMPI_Request_f2c(*request); - c_ierr = MPI_Grequest_complete(c_req); + c_ierr = PMPI_Grequest_complete(c_req); if (NULL != ierr) *ierr = OMPI_INT_2_FINT(c_ierr); } diff --git a/ompi/mpi/fortran/mpif-h/grequest_start_f.c b/ompi/mpi/fortran/mpif-h/grequest_start_f.c index 051c6432c27..9e12b1c40a1 100644 --- a/ompi/mpi/fortran/mpif-h/grequest_start_f.c +++ b/ompi/mpi/fortran/mpif-h/grequest_start_f.c @@ -5,15 +5,17 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2006-2012 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -21,7 +23,8 @@ #include "ompi/mpi/fortran/mpif-h/bindings.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILE_LAYER +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak PMPI_GREQUEST_START = ompi_grequest_start_f #pragma weak pmpi_grequest_start = ompi_grequest_start_f #pragma weak pmpi_grequest_start_ = ompi_grequest_start_f @@ -29,7 +32,7 @@ #pragma weak PMPI_Grequest_start_f = ompi_grequest_start_f #pragma weak PMPI_Grequest_start_f08 = ompi_grequest_start_f -#elif OMPI_PROFILE_LAYER +#else OMPI_GENERATE_F77_BINDINGS (PMPI_GREQUEST_START, pmpi_grequest_start, pmpi_grequest_start_, @@ -38,6 +41,7 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_GREQUEST_START, (MPI_F_Grequest_query_function* query_fn, MPI_F_Grequest_free_function* free_fn, MPI_F_Grequest_cancel_function* cancel_fn, MPI_Aint *extra_state, MPI_Fint *request, MPI_Fint *ierr), (query_fn, free_fn, cancel_fn, extra_state, request, ierr) ) #endif +#endif #if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_GREQUEST_START = ompi_grequest_start_f @@ -47,9 +51,8 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_GREQUEST_START, #pragma weak MPI_Grequest_start_f = ompi_grequest_start_f #pragma weak MPI_Grequest_start_f08 = ompi_grequest_start_f -#endif - -#if ! OPAL_HAVE_WEAK_SYMBOLS && ! OMPI_PROFILE_LAYER +#else +#if ! OMPI_BUILD_MPI_PROFILING OMPI_GENERATE_F77_BINDINGS (MPI_GREQUEST_START, mpi_grequest_start, mpi_grequest_start_, @@ -57,13 +60,12 @@ OMPI_GENERATE_F77_BINDINGS (MPI_GREQUEST_START, ompi_grequest_start_f, (MPI_F_Grequest_query_function* query_fn, MPI_F_Grequest_free_function* free_fn, MPI_F_Grequest_cancel_function* cancel_fn, MPI_Aint *extra_state, MPI_Fint *request, MPI_Fint *ierr), (query_fn, free_fn, cancel_fn, extra_state, request, ierr) ) +#else +#define ompi_grequest_start_f pompi_grequest_start_f #endif - - -#if OMPI_PROFILE_LAYER && ! OPAL_HAVE_WEAK_SYMBOLS -#include "ompi/mpi/fortran/mpif-h/profile/defines.h" #endif + void ompi_grequest_start_f(MPI_F_Grequest_query_function* query_fn, MPI_F_Grequest_free_function* free_fn, MPI_F_Grequest_cancel_function* cancel_fn, @@ -72,10 +74,10 @@ void ompi_grequest_start_f(MPI_F_Grequest_query_function* query_fn, { int c_ierr; MPI_Request c_req; - c_ierr = MPI_Grequest_start( + c_ierr = PMPI_Grequest_start( (MPI_Grequest_query_function *) query_fn, (MPI_Grequest_free_function *) free_fn, - (MPI_Grequest_cancel_function *) cancel_fn, + (MPI_Grequest_cancel_function *) cancel_fn, extra_state, &c_req); if (NULL != ierr) *ierr = OMPI_INT_2_FINT(c_ierr); @@ -85,6 +87,6 @@ void ompi_grequest_start_f(MPI_F_Grequest_query_function* query_fn, ompi_grequest_t *g = (ompi_grequest_t*) c_req; g->greq_funcs_are_c = false; - *request = MPI_Request_c2f(c_req); + *request = PMPI_Request_c2f(c_req); } } diff --git a/ompi/mpi/fortran/mpif-h/group_compare_f.c b/ompi/mpi/fortran/mpif-h/group_compare_f.c index d04abb3c59e..bc80330bcc6 100644 --- a/ompi/mpi/fortran/mpif-h/group_compare_f.c +++ b/ompi/mpi/fortran/mpif-h/group_compare_f.c @@ -5,15 +5,17 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2011-2012 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -23,7 +25,8 @@ #include "ompi/group/group.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILE_LAYER +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak PMPI_GROUP_COMPARE = ompi_group_compare_f #pragma weak pmpi_group_compare = ompi_group_compare_f #pragma weak pmpi_group_compare_ = ompi_group_compare_f @@ -31,7 +34,7 @@ #pragma weak PMPI_Group_compare_f = ompi_group_compare_f #pragma weak PMPI_Group_compare_f08 = ompi_group_compare_f -#elif OMPI_PROFILE_LAYER +#else OMPI_GENERATE_F77_BINDINGS (PMPI_GROUP_COMPARE, pmpi_group_compare, pmpi_group_compare_, @@ -41,6 +44,7 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_GROUP_COMPARE, MPI_Fint *result, MPI_Fint *ierror), (group1,group2,result,ierror)) #endif +#endif #if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_GROUP_COMPARE = ompi_group_compare_f @@ -52,7 +56,7 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_GROUP_COMPARE, #pragma weak MPI_Group_compare_f08 = ompi_group_compare_f #endif -#if ! OMPI_PROFILE_LAYER && ! OPAL_HAVE_WEAK_SYMBOLS +#if ! OMPI_BUILD_MPI_PROFILING && ! OPAL_HAVE_WEAK_SYMBOLS OMPI_GENERATE_F77_BINDINGS (MPI_GROUP_COMPARE, mpi_group_compare, mpi_group_compare_, @@ -65,10 +69,11 @@ OMPI_GENERATE_F77_BINDINGS (MPI_GROUP_COMPARE, -#if OMPI_PROFILE_LAYER && ! OPAL_HAVE_WEAK_SYMBOLS -#include "ompi/mpi/fortran/mpif-h/profile/defines.h" +#if OMPI_BUILD_MPI_PROFILING && ! OPAL_HAVE_WEAK_SYMBOLS +#define ompi_group_compare_f pompi_group_compare_f #endif + void ompi_group_compare_f(MPI_Fint *group1, MPI_Fint *group2, MPI_Fint *result, MPI_Fint *ierr) { @@ -77,10 +82,10 @@ void ompi_group_compare_f(MPI_Fint *group1, MPI_Fint *group2, OMPI_SINGLE_NAME_DECL(result); /* make the fortran to c representation conversion */ - c_group1 = MPI_Group_f2c(*group1); - c_group2 = MPI_Group_f2c(*group2); + c_group1 = PMPI_Group_f2c(*group1); + c_group2 = PMPI_Group_f2c(*group2); - c_ierr = MPI_Group_compare(c_group1, c_group2, + c_ierr = PMPI_Group_compare(c_group1, c_group2, OMPI_SINGLE_NAME_CONVERT(result) ); if (NULL != ierr) *ierr = OMPI_INT_2_FINT(c_ierr); diff --git a/ompi/mpi/fortran/mpif-h/group_difference_f.c b/ompi/mpi/fortran/mpif-h/group_difference_f.c index 293fe2f7cd1..ea8356cdb4d 100644 --- a/ompi/mpi/fortran/mpif-h/group_difference_f.c +++ b/ompi/mpi/fortran/mpif-h/group_difference_f.c @@ -5,15 +5,17 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2011-2012 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -22,7 +24,8 @@ #include "ompi/mpi/fortran/mpif-h/bindings.h" #include "ompi/group/group.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILE_LAYER +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak PMPI_GROUP_DIFFERENCE = ompi_group_difference_f #pragma weak pmpi_group_difference = ompi_group_difference_f #pragma weak pmpi_group_difference_ = ompi_group_difference_f @@ -30,7 +33,7 @@ #pragma weak PMPI_Group_difference_f = ompi_group_difference_f #pragma weak PMPI_Group_difference_f08 = ompi_group_difference_f -#elif OMPI_PROFILE_LAYER +#else OMPI_GENERATE_F77_BINDINGS (PMPI_GROUP_DIFFERENCE, pmpi_group_difference, pmpi_group_difference_, @@ -39,6 +42,7 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_GROUP_DIFFERENCE, (MPI_Fint *group1, MPI_Fint *group2, MPI_Fint *newgroup, MPI_Fint *ierr), (group1, group2, newgroup, ierr) ) #endif +#endif #if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_GROUP_DIFFERENCE = ompi_group_difference_f @@ -48,9 +52,8 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_GROUP_DIFFERENCE, #pragma weak MPI_Group_difference_f = ompi_group_difference_f #pragma weak MPI_Group_difference_f08 = ompi_group_difference_f -#endif - -#if ! OPAL_HAVE_WEAK_SYMBOLS && ! OMPI_PROFILE_LAYER +#else +#if ! OMPI_BUILD_MPI_PROFILING OMPI_GENERATE_F77_BINDINGS (MPI_GROUP_DIFFERENCE, mpi_group_difference, mpi_group_difference_, @@ -58,27 +61,26 @@ OMPI_GENERATE_F77_BINDINGS (MPI_GROUP_DIFFERENCE, ompi_group_difference_f, (MPI_Fint *group1, MPI_Fint *group2, MPI_Fint *newgroup, MPI_Fint *ierr), (group1, group2, newgroup, ierr) ) +#else +#define ompi_group_difference_f pompi_group_difference_f #endif - - -#if OMPI_PROFILE_LAYER && ! OPAL_HAVE_WEAK_SYMBOLS -#include "ompi/mpi/fortran/mpif-h/profile/defines.h" #endif + void ompi_group_difference_f(MPI_Fint *group1, MPI_Fint *group2, MPI_Fint *newgroup, MPI_Fint *ierr) { int c_ierr; ompi_group_t *c_group1, *c_group2, *c_newgroup; /* Make the fortran to c representation conversion */ - c_group1 = MPI_Group_f2c(*group1); - c_group2 = MPI_Group_f2c(*group2); - - c_ierr = MPI_Group_difference(c_group1, c_group2, &c_newgroup); + c_group1 = PMPI_Group_f2c(*group1); + c_group2 = PMPI_Group_f2c(*group2); + + c_ierr = PMPI_Group_difference(c_group1, c_group2, &c_newgroup); if (NULL != ierr) *ierr = OMPI_INT_2_FINT(c_ierr); /* translate the results from c to fortran */ if (MPI_SUCCESS == c_ierr) { - *newgroup = c_newgroup->grp_f_to_c_index; + *newgroup = c_newgroup->grp_f_to_c_index; } } diff --git a/ompi/mpi/fortran/mpif-h/group_excl_f.c b/ompi/mpi/fortran/mpif-h/group_excl_f.c index 05018b5ed46..e739d5e746f 100644 --- a/ompi/mpi/fortran/mpif-h/group_excl_f.c +++ b/ompi/mpi/fortran/mpif-h/group_excl_f.c @@ -5,15 +5,17 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2011-2012 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -22,7 +24,8 @@ #include "ompi/mpi/fortran/mpif-h/bindings.h" #include "ompi/group/group.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILE_LAYER +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak PMPI_GROUP_EXCL = ompi_group_excl_f #pragma weak pmpi_group_excl = ompi_group_excl_f #pragma weak pmpi_group_excl_ = ompi_group_excl_f @@ -30,7 +33,7 @@ #pragma weak PMPI_Group_excl_f = ompi_group_excl_f #pragma weak PMPI_Group_excl_f08 = ompi_group_excl_f -#elif OMPI_PROFILE_LAYER +#else OMPI_GENERATE_F77_BINDINGS (PMPI_GROUP_EXCL, pmpi_group_excl, pmpi_group_excl_, @@ -39,6 +42,7 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_GROUP_EXCL, (MPI_Fint *group, MPI_Fint *n, MPI_Fint *ranks, MPI_Fint *newgroup, MPI_Fint *ierr), (group, n, ranks, newgroup, ierr) ) #endif +#endif #if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_GROUP_EXCL = ompi_group_excl_f @@ -48,9 +52,8 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_GROUP_EXCL, #pragma weak MPI_Group_excl_f = ompi_group_excl_f #pragma weak MPI_Group_excl_f08 = ompi_group_excl_f -#endif - -#if ! OPAL_HAVE_WEAK_SYMBOLS && ! OMPI_PROFILE_LAYER +#else +#if ! OMPI_BUILD_MPI_PROFILING OMPI_GENERATE_F77_BINDINGS (MPI_GROUP_EXCL, mpi_group_excl, mpi_group_excl_, @@ -58,15 +61,14 @@ OMPI_GENERATE_F77_BINDINGS (MPI_GROUP_EXCL, ompi_group_excl_f, (MPI_Fint *group, MPI_Fint *n, MPI_Fint *ranks, MPI_Fint *newgroup, MPI_Fint *ierr), (group, n, ranks, newgroup, ierr) ) +#else +#define ompi_group_excl_f pompi_group_excl_f #endif - - -#if OMPI_PROFILE_LAYER && ! OPAL_HAVE_WEAK_SYMBOLS -#include "ompi/mpi/fortran/mpif-h/profile/defines.h" #endif + void ompi_group_excl_f(MPI_Fint *group, MPI_Fint *n, - MPI_Fint *ranks, MPI_Fint *newgroup, + MPI_Fint *ranks, MPI_Fint *newgroup, MPI_Fint *ierr) { int c_ierr; @@ -74,10 +76,10 @@ void ompi_group_excl_f(MPI_Fint *group, MPI_Fint *n, OMPI_ARRAY_NAME_DECL(ranks); /* Make the fortran to c representation conversion */ - c_group = MPI_Group_f2c(*group); + c_group = PMPI_Group_f2c(*group); OMPI_ARRAY_FINT_2_INT(ranks, *n); - c_ierr = MPI_Group_excl(c_group, + c_ierr = PMPI_Group_excl(c_group, OMPI_FINT_2_INT(*n), OMPI_ARRAY_NAME_CONVERT(ranks), &c_newgroup); diff --git a/ompi/mpi/fortran/mpif-h/group_free_f.c b/ompi/mpi/fortran/mpif-h/group_free_f.c index f7bd065dd64..3875b8cf0cd 100644 --- a/ompi/mpi/fortran/mpif-h/group_free_f.c +++ b/ompi/mpi/fortran/mpif-h/group_free_f.c @@ -5,15 +5,17 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2011-2012 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -22,7 +24,8 @@ #include "ompi/mpi/fortran/mpif-h/bindings.h" #include "ompi/group/group.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILE_LAYER +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak PMPI_GROUP_FREE = ompi_group_free_f #pragma weak pmpi_group_free = ompi_group_free_f #pragma weak pmpi_group_free_ = ompi_group_free_f @@ -30,7 +33,7 @@ #pragma weak PMPI_Group_free_f = ompi_group_free_f #pragma weak PMPI_Group_free_f08 = ompi_group_free_f -#elif OMPI_PROFILE_LAYER +#else OMPI_GENERATE_F77_BINDINGS (PMPI_GROUP_FREE, pmpi_group_free, pmpi_group_free_, @@ -39,6 +42,7 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_GROUP_FREE, (MPI_Fint *group, MPI_Fint *ierr), (group, ierr) ) #endif +#endif #if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_GROUP_FREE = ompi_group_free_f @@ -48,9 +52,8 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_GROUP_FREE, #pragma weak MPI_Group_free_f = ompi_group_free_f #pragma weak MPI_Group_free_f08 = ompi_group_free_f -#endif - -#if ! OPAL_HAVE_WEAK_SYMBOLS && ! OMPI_PROFILE_LAYER +#else +#if ! OMPI_BUILD_MPI_PROFILING OMPI_GENERATE_F77_BINDINGS (MPI_GROUP_FREE, mpi_group_free, mpi_group_free_, @@ -58,13 +61,12 @@ OMPI_GENERATE_F77_BINDINGS (MPI_GROUP_FREE, ompi_group_free_f, (MPI_Fint *group, MPI_Fint *ierr), (group, ierr) ) +#else +#define ompi_group_free_f pompi_group_free_f #endif - - -#if OMPI_PROFILE_LAYER && ! OPAL_HAVE_WEAK_SYMBOLS -#include "ompi/mpi/fortran/mpif-h/profile/defines.h" #endif + void ompi_group_free_f(MPI_Fint *group, MPI_Fint *ierr) { int c_ierr; @@ -72,8 +74,8 @@ void ompi_group_free_f(MPI_Fint *group, MPI_Fint *ierr) /* Make the fortran to c representation conversion */ - c_group = MPI_Group_f2c(*group); - c_ierr = MPI_Group_free( &c_group ); + c_group = PMPI_Group_f2c(*group); + c_ierr = PMPI_Group_free( &c_group ); if (NULL != ierr) *ierr = OMPI_INT_2_FINT(c_ierr); /* This value comes from the MPI_GROUP_NULL value in mpif.h. Do not diff --git a/ompi/mpi/fortran/mpif-h/group_incl_f.c b/ompi/mpi/fortran/mpif-h/group_incl_f.c index 8da30db390b..0bc4b3edcd7 100644 --- a/ompi/mpi/fortran/mpif-h/group_incl_f.c +++ b/ompi/mpi/fortran/mpif-h/group_incl_f.c @@ -5,15 +5,17 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2011-2012 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -22,7 +24,8 @@ #include "ompi/mpi/fortran/mpif-h/bindings.h" #include "ompi/group/group.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILE_LAYER +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak PMPI_GROUP_INCL = ompi_group_incl_f #pragma weak pmpi_group_incl = ompi_group_incl_f #pragma weak pmpi_group_incl_ = ompi_group_incl_f @@ -30,7 +33,7 @@ #pragma weak PMPI_Group_incl_f = ompi_group_incl_f #pragma weak PMPI_Group_incl_f08 = ompi_group_incl_f -#elif OMPI_PROFILE_LAYER +#else OMPI_GENERATE_F77_BINDINGS (PMPI_GROUP_INCL, pmpi_group_incl, pmpi_group_incl_, @@ -39,6 +42,7 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_GROUP_INCL, (MPI_Fint *group, MPI_Fint *n, MPI_Fint *ranks, MPI_Fint *newgroup, MPI_Fint *ierr), (group, n, ranks, newgroup, ierr) ) #endif +#endif #if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_GROUP_INCL = ompi_group_incl_f @@ -48,9 +52,8 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_GROUP_INCL, #pragma weak MPI_Group_incl_f = ompi_group_incl_f #pragma weak MPI_Group_incl_f08 = ompi_group_incl_f -#endif - -#if ! OPAL_HAVE_WEAK_SYMBOLS && ! OMPI_PROFILE_LAYER +#else +#if ! OMPI_BUILD_MPI_PROFILING OMPI_GENERATE_F77_BINDINGS (MPI_GROUP_INCL, mpi_group_incl, mpi_group_incl_, @@ -58,25 +61,24 @@ OMPI_GENERATE_F77_BINDINGS (MPI_GROUP_INCL, ompi_group_incl_f, (MPI_Fint *group, MPI_Fint *n, MPI_Fint *ranks, MPI_Fint *newgroup, MPI_Fint *ierr), (group, n, ranks, newgroup, ierr) ) +#else +#define ompi_group_incl_f pompi_group_incl_f #endif - - -#if OMPI_PROFILE_LAYER && ! OPAL_HAVE_WEAK_SYMBOLS -#include "ompi/mpi/fortran/mpif-h/profile/defines.h" #endif + void ompi_group_incl_f(MPI_Fint *group, MPI_Fint *n, MPI_Fint *ranks, MPI_Fint *newgroup, MPI_Fint *ierr) { /* local variables */ int c_ierr; ompi_group_t *c_group, *c_newgroup; OMPI_ARRAY_NAME_DECL(ranks); - + /* make the fortran to c representation conversion */ - c_group = MPI_Group_f2c(*group); + c_group = PMPI_Group_f2c(*group); OMPI_ARRAY_FINT_2_INT(ranks, *n); - c_ierr = MPI_Group_incl(c_group, + c_ierr = PMPI_Group_incl(c_group, OMPI_FINT_2_INT(*n), OMPI_ARRAY_NAME_CONVERT(ranks), &c_newgroup); diff --git a/ompi/mpi/fortran/mpif-h/group_intersection_f.c b/ompi/mpi/fortran/mpif-h/group_intersection_f.c index 42865325a6d..7cbca9de649 100644 --- a/ompi/mpi/fortran/mpif-h/group_intersection_f.c +++ b/ompi/mpi/fortran/mpif-h/group_intersection_f.c @@ -5,15 +5,17 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2011-2012 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -22,7 +24,8 @@ #include "ompi/mpi/fortran/mpif-h/bindings.h" #include "ompi/group/group.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILE_LAYER +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak PMPI_GROUP_INTERSECTION = ompi_group_intersection_f #pragma weak pmpi_group_intersection = ompi_group_intersection_f #pragma weak pmpi_group_intersection_ = ompi_group_intersection_f @@ -30,7 +33,7 @@ #pragma weak PMPI_Group_intersection_f = ompi_group_intersection_f #pragma weak PMPI_Group_intersection_f08 = ompi_group_intersection_f -#elif OMPI_PROFILE_LAYER +#else OMPI_GENERATE_F77_BINDINGS (PMPI_GROUP_INTERSECTION, pmpi_group_intersection, pmpi_group_intersection_, @@ -39,6 +42,7 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_GROUP_INTERSECTION, (MPI_Fint *group1, MPI_Fint *group2, MPI_Fint *newgroup, MPI_Fint *ierr), (group1, group2, newgroup, ierr) ) #endif +#endif #if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_GROUP_INTERSECTION = ompi_group_intersection_f @@ -48,9 +52,8 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_GROUP_INTERSECTION, #pragma weak MPI_Group_intersection_f = ompi_group_intersection_f #pragma weak MPI_Group_intersection_f08 = ompi_group_intersection_f -#endif - -#if ! OPAL_HAVE_WEAK_SYMBOLS && ! OMPI_PROFILE_LAYER +#else +#if ! OMPI_BUILD_MPI_PROFILING OMPI_GENERATE_F77_BINDINGS (MPI_GROUP_INTERSECTION, mpi_group_intersection, mpi_group_intersection_, @@ -58,23 +61,22 @@ OMPI_GENERATE_F77_BINDINGS (MPI_GROUP_INTERSECTION, ompi_group_intersection_f, (MPI_Fint *group1, MPI_Fint *group2, MPI_Fint *newgroup, MPI_Fint *ierr), (group1, group2, newgroup, ierr) ) +#else +#define ompi_group_intersection_f pompi_group_intersection_f #endif - - -#if OMPI_PROFILE_LAYER && ! OPAL_HAVE_WEAK_SYMBOLS -#include "ompi/mpi/fortran/mpif-h/profile/defines.h" #endif + void ompi_group_intersection_f(MPI_Fint *group1, MPI_Fint *group2, MPI_Fint *newgroup, MPI_Fint *ierr) { int c_ierr; ompi_group_t *c_group1, *c_group2, *c_newgroup; /* Make the fortran to c representation conversion */ - c_group1 = MPI_Group_f2c(*group1); - c_group2 = MPI_Group_f2c(*group2); - - c_ierr = MPI_Group_intersection(c_group1, c_group2, &c_newgroup); + c_group1 = PMPI_Group_f2c(*group1); + c_group2 = PMPI_Group_f2c(*group2); + + c_ierr = PMPI_Group_intersection(c_group1, c_group2, &c_newgroup); if (NULL != ierr) *ierr = OMPI_INT_2_FINT(c_ierr); /* translate the results from c to fortran */ diff --git a/ompi/mpi/fortran/mpif-h/group_range_excl_f.c b/ompi/mpi/fortran/mpif-h/group_range_excl_f.c index 2eebe63f905..8e138d5d29a 100644 --- a/ompi/mpi/fortran/mpif-h/group_range_excl_f.c +++ b/ompi/mpi/fortran/mpif-h/group_range_excl_f.c @@ -5,15 +5,17 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2011-2012 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -22,7 +24,8 @@ #include "ompi/mpi/fortran/mpif-h/bindings.h" #include "ompi/group/group.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILE_LAYER +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak PMPI_GROUP_RANGE_EXCL = ompi_group_range_excl_f #pragma weak pmpi_group_range_excl = ompi_group_range_excl_f #pragma weak pmpi_group_range_excl_ = ompi_group_range_excl_f @@ -30,7 +33,7 @@ #pragma weak PMPI_Group_range_excl_f = ompi_group_range_excl_f #pragma weak PMPI_Group_range_excl_f08 = ompi_group_range_excl_f -#elif OMPI_PROFILE_LAYER +#else OMPI_GENERATE_F77_BINDINGS (PMPI_GROUP_RANGE_EXCL, pmpi_group_range_excl, pmpi_group_range_excl_, @@ -39,6 +42,7 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_GROUP_RANGE_EXCL, (MPI_Fint *group, MPI_Fint *n, MPI_Fint ranges[][3], MPI_Fint *newgroup, MPI_Fint *ierr), (group, n, ranges, newgroup, ierr) ) #endif +#endif #if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_GROUP_RANGE_EXCL = ompi_group_range_excl_f @@ -48,9 +52,8 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_GROUP_RANGE_EXCL, #pragma weak MPI_Group_range_excl_f = ompi_group_range_excl_f #pragma weak MPI_Group_range_excl_f08 = ompi_group_range_excl_f -#endif - -#if ! OPAL_HAVE_WEAK_SYMBOLS && ! OMPI_PROFILE_LAYER +#else +#if ! OMPI_BUILD_MPI_PROFILING OMPI_GENERATE_F77_BINDINGS (MPI_GROUP_RANGE_EXCL, mpi_group_range_excl, mpi_group_range_excl_, @@ -58,13 +61,12 @@ OMPI_GENERATE_F77_BINDINGS (MPI_GROUP_RANGE_EXCL, ompi_group_range_excl_f, (MPI_Fint *group, MPI_Fint *n, MPI_Fint ranges[][3], MPI_Fint *newgroup, MPI_Fint *ierr), (group, n, ranges, newgroup, ierr) ) +#else +#define ompi_group_range_excl_f pompi_group_range_excl_f #endif - - -#if OMPI_PROFILE_LAYER && ! OPAL_HAVE_WEAK_SYMBOLS -#include "ompi/mpi/fortran/mpif-h/profile/defines.h" #endif + void ompi_group_range_excl_f(MPI_Fint *group, MPI_Fint *n, MPI_Fint ranges[][3], MPI_Fint *newgroup, MPI_Fint *ierr) { int c_ierr; @@ -72,11 +74,11 @@ void ompi_group_range_excl_f(MPI_Fint *group, MPI_Fint *n, MPI_Fint ranges[][3], OMPI_2_DIM_ARRAY_NAME_DECL(ranges, 3); /* Make the fortran to c representation conversion */ - c_group = MPI_Group_f2c(*group); + c_group = PMPI_Group_f2c(*group); OMPI_2_DIM_ARRAY_FINT_2_INT(ranges, *n, 3); - c_ierr = MPI_Group_range_excl(c_group, - OMPI_FINT_2_INT(*n), + c_ierr = PMPI_Group_range_excl(c_group, + OMPI_FINT_2_INT(*n), OMPI_ARRAY_NAME_CONVERT(ranges), &c_newgroup); if (NULL != ierr) *ierr = OMPI_INT_2_FINT(c_ierr); diff --git a/ompi/mpi/fortran/mpif-h/group_range_incl_f.c b/ompi/mpi/fortran/mpif-h/group_range_incl_f.c index 73aeac007da..d3059aff69f 100644 --- a/ompi/mpi/fortran/mpif-h/group_range_incl_f.c +++ b/ompi/mpi/fortran/mpif-h/group_range_incl_f.c @@ -5,15 +5,17 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2011-2012 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -22,7 +24,8 @@ #include "ompi/mpi/fortran/mpif-h/bindings.h" #include "ompi/group/group.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILE_LAYER +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak PMPI_GROUP_RANGE_INCL = ompi_group_range_incl_f #pragma weak pmpi_group_range_incl = ompi_group_range_incl_f #pragma weak pmpi_group_range_incl_ = ompi_group_range_incl_f @@ -30,7 +33,7 @@ #pragma weak PMPI_Group_range_incl_f = ompi_group_range_incl_f #pragma weak PMPI_Group_range_incl_f08 = ompi_group_range_incl_f -#elif OMPI_PROFILE_LAYER +#else OMPI_GENERATE_F77_BINDINGS (PMPI_GROUP_RANGE_INCL, pmpi_group_range_incl, pmpi_group_range_incl_, @@ -39,6 +42,7 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_GROUP_RANGE_INCL, (MPI_Fint *group, MPI_Fint *n, MPI_Fint ranges[][3], MPI_Fint *newgroup, MPI_Fint *ierr), (group, n, ranges, newgroup, ierr) ) #endif +#endif #if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_GROUP_RANGE_INCL = ompi_group_range_incl_f @@ -48,9 +52,8 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_GROUP_RANGE_INCL, #pragma weak MPI_Group_range_incl_f = ompi_group_range_incl_f #pragma weak MPI_Group_range_incl_f08 = ompi_group_range_incl_f -#endif - -#if ! OPAL_HAVE_WEAK_SYMBOLS && ! OMPI_PROFILE_LAYER +#else +#if ! OMPI_BUILD_MPI_PROFILING OMPI_GENERATE_F77_BINDINGS (MPI_GROUP_RANGE_INCL, mpi_group_range_incl, mpi_group_range_incl_, @@ -58,13 +61,12 @@ OMPI_GENERATE_F77_BINDINGS (MPI_GROUP_RANGE_INCL, ompi_group_range_incl_f, (MPI_Fint *group, MPI_Fint *n, MPI_Fint ranges[][3], MPI_Fint *newgroup, MPI_Fint *ierr), (group, n, ranges, newgroup, ierr) ) +#else +#define ompi_group_range_incl_f pompi_group_range_incl_f #endif - - -#if OMPI_PROFILE_LAYER && ! OPAL_HAVE_WEAK_SYMBOLS -#include "ompi/mpi/fortran/mpif-h/profile/defines.h" #endif + void ompi_group_range_incl_f(MPI_Fint *group, MPI_Fint *n, MPI_Fint ranges[][3], MPI_Fint *newgroup, MPI_Fint *ierr) { int c_ierr; @@ -72,11 +74,11 @@ void ompi_group_range_incl_f(MPI_Fint *group, MPI_Fint *n, MPI_Fint ranges[][3], OMPI_2_DIM_ARRAY_NAME_DECL(ranges, 3); /* Make the fortran to c representation conversion */ - c_group = MPI_Group_f2c(*group); + c_group = PMPI_Group_f2c(*group); - OMPI_2_DIM_ARRAY_FINT_2_INT(ranges, *n, 3); - c_ierr = MPI_Group_range_incl(c_group, - OMPI_FINT_2_INT(*n), + OMPI_2_DIM_ARRAY_FINT_2_INT(ranges, *n, 3); + c_ierr = PMPI_Group_range_incl(c_group, + OMPI_FINT_2_INT(*n), OMPI_ARRAY_NAME_CONVERT(ranges), &c_newgroup); if (NULL != ierr) *ierr = OMPI_INT_2_FINT(c_ierr); diff --git a/ompi/mpi/fortran/mpif-h/group_rank_f.c b/ompi/mpi/fortran/mpif-h/group_rank_f.c index 674d4eadb84..fac60432b60 100644 --- a/ompi/mpi/fortran/mpif-h/group_rank_f.c +++ b/ompi/mpi/fortran/mpif-h/group_rank_f.c @@ -5,15 +5,17 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2011-2012 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -22,7 +24,8 @@ #include "ompi/mpi/fortran/mpif-h/bindings.h" #include "ompi/group/group.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILE_LAYER +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak PMPI_GROUP_RANK = ompi_group_rank_f #pragma weak pmpi_group_rank = ompi_group_rank_f #pragma weak pmpi_group_rank_ = ompi_group_rank_f @@ -30,7 +33,7 @@ #pragma weak PMPI_Group_rank_f = ompi_group_rank_f #pragma weak PMPI_Group_rank_f08 = ompi_group_rank_f -#elif OMPI_PROFILE_LAYER +#else OMPI_GENERATE_F77_BINDINGS (PMPI_GROUP_RANK, pmpi_group_rank, pmpi_group_rank_, @@ -39,6 +42,7 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_GROUP_RANK, (MPI_Fint *group, MPI_Fint *rank, MPI_Fint *ierr), (group, rank, ierr) ) #endif +#endif #if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_GROUP_RANK = ompi_group_rank_f @@ -48,9 +52,8 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_GROUP_RANK, #pragma weak MPI_Group_rank_f = ompi_group_rank_f #pragma weak MPI_Group_rank_f08 = ompi_group_rank_f -#endif - -#if ! OPAL_HAVE_WEAK_SYMBOLS && ! OMPI_PROFILE_LAYER +#else +#if ! OMPI_BUILD_MPI_PROFILING OMPI_GENERATE_F77_BINDINGS (MPI_GROUP_RANK, mpi_group_rank, mpi_group_rank_, @@ -58,13 +61,12 @@ OMPI_GENERATE_F77_BINDINGS (MPI_GROUP_RANK, ompi_group_rank_f, (MPI_Fint *group, MPI_Fint *rank, MPI_Fint *ierr), (group, rank, ierr) ) +#else +#define ompi_group_rank_f pompi_group_rank_f #endif - - -#if OMPI_PROFILE_LAYER && ! OPAL_HAVE_WEAK_SYMBOLS -#include "ompi/mpi/fortran/mpif-h/profile/defines.h" #endif + void ompi_group_rank_f(MPI_Fint *group, MPI_Fint *rank, MPI_Fint *ierr) { int c_ierr; @@ -72,9 +74,9 @@ void ompi_group_rank_f(MPI_Fint *group, MPI_Fint *rank, MPI_Fint *ierr) OMPI_SINGLE_NAME_DECL(rank); /* Make the fortran to c representation conversion */ - c_group = MPI_Group_f2c(*group); - - c_ierr = MPI_Group_rank(c_group, OMPI_SINGLE_NAME_CONVERT(rank)); + c_group = PMPI_Group_f2c(*group); + + c_ierr = PMPI_Group_rank(c_group, OMPI_SINGLE_NAME_CONVERT(rank)); if (NULL != ierr) *ierr = OMPI_INT_2_FINT(c_ierr); if (MPI_SUCCESS == c_ierr) { diff --git a/ompi/mpi/fortran/mpif-h/group_size_f.c b/ompi/mpi/fortran/mpif-h/group_size_f.c index 6286bf3be15..6e25c35d1f0 100644 --- a/ompi/mpi/fortran/mpif-h/group_size_f.c +++ b/ompi/mpi/fortran/mpif-h/group_size_f.c @@ -5,15 +5,17 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2011-2012 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -22,7 +24,8 @@ #include "ompi/mpi/fortran/mpif-h/bindings.h" #include "ompi/group/group.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILE_LAYER +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak PMPI_GROUP_SIZE = ompi_group_size_f #pragma weak pmpi_group_size = ompi_group_size_f #pragma weak pmpi_group_size_ = ompi_group_size_f @@ -30,7 +33,7 @@ #pragma weak PMPI_Group_size_f = ompi_group_size_f #pragma weak PMPI_Group_size_f08 = ompi_group_size_f -#elif OMPI_PROFILE_LAYER +#else OMPI_GENERATE_F77_BINDINGS (PMPI_GROUP_SIZE, pmpi_group_size, pmpi_group_size_, @@ -39,6 +42,7 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_GROUP_SIZE, (MPI_Fint *group, MPI_Fint *size, MPI_Fint *ierr), (group, size, ierr) ) #endif +#endif #if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_GROUP_SIZE = ompi_group_size_f @@ -48,9 +52,8 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_GROUP_SIZE, #pragma weak MPI_Group_size_f = ompi_group_size_f #pragma weak MPI_Group_size_f08 = ompi_group_size_f -#endif - -#if ! OPAL_HAVE_WEAK_SYMBOLS && ! OMPI_PROFILE_LAYER +#else +#if ! OMPI_BUILD_MPI_PROFILING OMPI_GENERATE_F77_BINDINGS (MPI_GROUP_SIZE, mpi_group_size, mpi_group_size_, @@ -58,13 +61,12 @@ OMPI_GENERATE_F77_BINDINGS (MPI_GROUP_SIZE, ompi_group_size_f, (MPI_Fint *group, MPI_Fint *size, MPI_Fint *ierr), (group, size, ierr) ) +#else +#define ompi_group_size_f pompi_group_size_f #endif - - -#if OMPI_PROFILE_LAYER && ! OPAL_HAVE_WEAK_SYMBOLS -#include "ompi/mpi/fortran/mpif-h/profile/defines.h" #endif + void ompi_group_size_f(MPI_Fint *group, MPI_Fint *size, MPI_Fint *ierr) { int c_ierr; @@ -72,9 +74,9 @@ void ompi_group_size_f(MPI_Fint *group, MPI_Fint *size, MPI_Fint *ierr) OMPI_SINGLE_NAME_DECL(size); /* Make the fortran to c representation conversion */ - c_group = MPI_Group_f2c(*group); - - c_ierr = MPI_Group_size(c_group, OMPI_SINGLE_NAME_CONVERT(size)); + c_group = PMPI_Group_f2c(*group); + + c_ierr = PMPI_Group_size(c_group, OMPI_SINGLE_NAME_CONVERT(size)); if (NULL != ierr) *ierr = OMPI_INT_2_FINT(c_ierr); if (MPI_SUCCESS == c_ierr) { diff --git a/ompi/mpi/fortran/mpif-h/group_translate_ranks_f.c b/ompi/mpi/fortran/mpif-h/group_translate_ranks_f.c index 0059997ca5e..37b13cb9af3 100644 --- a/ompi/mpi/fortran/mpif-h/group_translate_ranks_f.c +++ b/ompi/mpi/fortran/mpif-h/group_translate_ranks_f.c @@ -5,15 +5,17 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2011-2012 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -22,7 +24,8 @@ #include "ompi/mpi/fortran/mpif-h/bindings.h" #include "ompi/group/group.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILE_LAYER +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak PMPI_GROUP_TRANSLATE_RANKS = ompi_group_translate_ranks_f #pragma weak pmpi_group_translate_ranks = ompi_group_translate_ranks_f #pragma weak pmpi_group_translate_ranks_ = ompi_group_translate_ranks_f @@ -30,7 +33,7 @@ #pragma weak PMPI_Group_translate_ranks_f = ompi_group_translate_ranks_f #pragma weak PMPI_Group_translate_ranks_f08 = ompi_group_translate_ranks_f -#elif OMPI_PROFILE_LAYER +#else OMPI_GENERATE_F77_BINDINGS (PMPI_GROUP_TRANSLATE_RANKS, pmpi_group_translate_ranks, pmpi_group_translate_ranks_, @@ -39,6 +42,7 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_GROUP_TRANSLATE_RANKS, (MPI_Fint *group1, MPI_Fint *n, MPI_Fint *ranks1, MPI_Fint *group2, MPI_Fint *ranks2, MPI_Fint *ierr), (group1, n, ranks1, group2, ranks2, ierr) ) #endif +#endif #if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_GROUP_TRANSLATE_RANKS = ompi_group_translate_ranks_f @@ -48,9 +52,8 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_GROUP_TRANSLATE_RANKS, #pragma weak MPI_Group_translate_ranks_f = ompi_group_translate_ranks_f #pragma weak MPI_Group_translate_ranks_f08 = ompi_group_translate_ranks_f -#endif - -#if ! OPAL_HAVE_WEAK_SYMBOLS && ! OMPI_PROFILE_LAYER +#else +#if ! OMPI_BUILD_MPI_PROFILING OMPI_GENERATE_F77_BINDINGS (MPI_GROUP_TRANSLATE_RANKS, mpi_group_translate_ranks, mpi_group_translate_ranks_, @@ -58,14 +61,13 @@ OMPI_GENERATE_F77_BINDINGS (MPI_GROUP_TRANSLATE_RANKS, ompi_group_translate_ranks_f, (MPI_Fint *group1, MPI_Fint *n, MPI_Fint *ranks1, MPI_Fint *group2, MPI_Fint *ranks2, MPI_Fint *ierr), (group1, n, ranks1, group2, ranks2, ierr) ) +#else +#define ompi_group_translate_ranks_f pompi_group_translate_ranks_f #endif - - -#if OMPI_PROFILE_LAYER && ! OPAL_HAVE_WEAK_SYMBOLS -#include "ompi/mpi/fortran/mpif-h/profile/defines.h" #endif -void ompi_group_translate_ranks_f(MPI_Fint *group1, MPI_Fint *n, + +void ompi_group_translate_ranks_f(MPI_Fint *group1, MPI_Fint *n, MPI_Fint *ranks1, MPI_Fint *group2, MPI_Fint *ranks2, MPI_Fint *ierr) { @@ -75,16 +77,16 @@ void ompi_group_translate_ranks_f(MPI_Fint *group1, MPI_Fint *n, OMPI_ARRAY_NAME_DECL(ranks2); /* Make the fortran to c representation conversion */ - c_group1 = MPI_Group_f2c(*group1); - c_group2 = MPI_Group_f2c(*group2); + c_group1 = PMPI_Group_f2c(*group1); + c_group2 = PMPI_Group_f2c(*group2); OMPI_ARRAY_FINT_2_INT(ranks1, *n); OMPI_ARRAY_FINT_2_INT_ALLOC(ranks2, *n); - c_ierr = MPI_Group_translate_ranks(c_group1, + c_ierr = PMPI_Group_translate_ranks(c_group1, OMPI_FINT_2_INT(*n), - OMPI_ARRAY_NAME_CONVERT(ranks1), - c_group2, + OMPI_ARRAY_NAME_CONVERT(ranks1), + c_group2, OMPI_ARRAY_NAME_CONVERT(ranks2) ); if (NULL != ierr) *ierr = OMPI_INT_2_FINT(c_ierr); diff --git a/ompi/mpi/fortran/mpif-h/group_union_f.c b/ompi/mpi/fortran/mpif-h/group_union_f.c index 4e993cb5fab..df6a326cd33 100644 --- a/ompi/mpi/fortran/mpif-h/group_union_f.c +++ b/ompi/mpi/fortran/mpif-h/group_union_f.c @@ -5,15 +5,17 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2011-2012 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -22,7 +24,8 @@ #include "ompi/mpi/fortran/mpif-h/bindings.h" #include "ompi/group/group.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILE_LAYER +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak PMPI_GROUP_UNION = ompi_group_union_f #pragma weak pmpi_group_union = ompi_group_union_f #pragma weak pmpi_group_union_ = ompi_group_union_f @@ -30,7 +33,7 @@ #pragma weak PMPI_Group_union_f = ompi_group_union_f #pragma weak PMPI_Group_union_f08 = ompi_group_union_f -#elif OMPI_PROFILE_LAYER +#else OMPI_GENERATE_F77_BINDINGS (PMPI_GROUP_UNION, pmpi_group_union, pmpi_group_union_, @@ -39,6 +42,7 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_GROUP_UNION, (MPI_Fint *group1, MPI_Fint *group2, MPI_Fint *newgroup, MPI_Fint *ierr), (group1, group2, newgroup, ierr) ) #endif +#endif #if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_GROUP_UNION = ompi_group_union_f @@ -48,9 +52,8 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_GROUP_UNION, #pragma weak MPI_Group_union_f = ompi_group_union_f #pragma weak MPI_Group_union_f08 = ompi_group_union_f -#endif - -#if ! OPAL_HAVE_WEAK_SYMBOLS && ! OMPI_PROFILE_LAYER +#else +#if ! OMPI_BUILD_MPI_PROFILING OMPI_GENERATE_F77_BINDINGS (MPI_GROUP_UNION, mpi_group_union, mpi_group_union_, @@ -58,23 +61,22 @@ OMPI_GENERATE_F77_BINDINGS (MPI_GROUP_UNION, ompi_group_union_f, (MPI_Fint *group1, MPI_Fint *group2, MPI_Fint *newgroup, MPI_Fint *ierr), (group1, group2, newgroup, ierr) ) +#else +#define ompi_group_union_f pompi_group_union_f #endif - - -#if OMPI_PROFILE_LAYER && ! OPAL_HAVE_WEAK_SYMBOLS -#include "ompi/mpi/fortran/mpif-h/profile/defines.h" #endif + void ompi_group_union_f(MPI_Fint *group1, MPI_Fint *group2, MPI_Fint *newgroup, MPI_Fint *ierr) { int c_ierr; ompi_group_t *c_group1, *c_group2, *c_newgroup; /* Make the fortran to c representation conversion */ - c_group1 = MPI_Group_f2c(*group1); - c_group2 = MPI_Group_f2c(*group2); - - c_ierr = MPI_Group_union(c_group1, c_group2, &c_newgroup); + c_group1 = PMPI_Group_f2c(*group1); + c_group2 = PMPI_Group_f2c(*group2); + + c_ierr = PMPI_Group_union(c_group1, c_group2, &c_newgroup); if (NULL != ierr) *ierr = OMPI_INT_2_FINT(c_ierr); /* translate the results from c to fortran */ diff --git a/ompi/mpi/fortran/mpif-h/iallgather_f.c b/ompi/mpi/fortran/mpif-h/iallgather_f.c index 7be043cffe3..6b61cc55522 100644 --- a/ompi/mpi/fortran/mpif-h/iallgather_f.c +++ b/ompi/mpi/fortran/mpif-h/iallgather_f.c @@ -5,15 +5,17 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2011-2012 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -22,7 +24,8 @@ #include "ompi/mpi/fortran/mpif-h/bindings.h" #include "ompi/mpi/fortran/base/constants.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILE_LAYER +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak PMPI_IALLGATHER = ompi_iallgather_f #pragma weak pmpi_iallgather = ompi_iallgather_f #pragma weak pmpi_iallgather_ = ompi_iallgather_f @@ -30,7 +33,7 @@ #pragma weak PMPI_Iallgather_f = ompi_iallgather_f #pragma weak PMPI_Iallgather_f08 = ompi_iallgather_f -#elif OMPI_PROFILE_LAYER +#else OMPI_GENERATE_F77_BINDINGS (PMPI_IALLGATHER, pmpi_iallgather, pmpi_iallgather_, @@ -39,6 +42,7 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_IALLGATHER, (char *sendbuf, MPI_Fint *sendcount, MPI_Fint *sendtype, char *recvbuf, MPI_Fint *recvcount, MPI_Fint *recvtype, MPI_Fint *comm, MPI_Fint *request, MPI_Fint *ierr), (sendbuf, sendcount, sendtype, recvbuf, recvcount, recvtype, comm, request, ierr) ) #endif +#endif #if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_IALLGATHER = ompi_iallgather_f @@ -48,9 +52,8 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_IALLGATHER, #pragma weak MPI_Iallgather_f = ompi_iallgather_f #pragma weak MPI_Iallgather_f08 = ompi_iallgather_f -#endif - -#if ! OPAL_HAVE_WEAK_SYMBOLS && ! OMPI_PROFILE_LAYER +#else +#if ! OMPI_BUILD_MPI_PROFILING OMPI_GENERATE_F77_BINDINGS (MPI_IALLGATHER, mpi_iallgather, mpi_iallgather_, @@ -58,13 +61,12 @@ OMPI_GENERATE_F77_BINDINGS (MPI_IALLGATHER, ompi_iallgather_f, (char *sendbuf, MPI_Fint *sendcount, MPI_Fint *sendtype, char *recvbuf, MPI_Fint *recvcount, MPI_Fint *recvtype, MPI_Fint *comm, MPI_Fint *request, MPI_Fint *ierr), (sendbuf, sendcount, sendtype, recvbuf, recvcount, recvtype, comm, request, ierr) ) +#else +#define ompi_iallgather_f pompi_iallgather_f #endif - - -#if OMPI_PROFILE_LAYER && ! OPAL_HAVE_WEAK_SYMBOLS -#include "ompi/mpi/fortran/mpif-h/profile/defines.h" #endif + void ompi_iallgather_f(char *sendbuf, MPI_Fint *sendcount, MPI_Fint *sendtype, char *recvbuf, MPI_Fint *recvcount, MPI_Fint *recvtype, MPI_Fint *comm, MPI_Fint *request, MPI_Fint *ierr) @@ -74,22 +76,22 @@ void ompi_iallgather_f(char *sendbuf, MPI_Fint *sendcount, MPI_Fint *sendtype, MPI_Request c_req; MPI_Datatype c_sendtype, c_recvtype; - c_comm = MPI_Comm_f2c(*comm); - c_sendtype = MPI_Type_f2c(*sendtype); - c_recvtype = MPI_Type_f2c(*recvtype); + c_comm = PMPI_Comm_f2c(*comm); + c_sendtype = PMPI_Type_f2c(*sendtype); + c_recvtype = PMPI_Type_f2c(*recvtype); sendbuf = (char *) OMPI_F2C_IN_PLACE(sendbuf); sendbuf = (char *) OMPI_F2C_BOTTOM(sendbuf); recvbuf = (char *) OMPI_F2C_BOTTOM(recvbuf); - ierr_c = MPI_Iallgather(sendbuf, - OMPI_FINT_2_INT(*sendcount), - c_sendtype, - recvbuf, - OMPI_FINT_2_INT(*recvcount), - c_recvtype, c_comm, &c_req); + ierr_c = PMPI_Iallgather(sendbuf, + OMPI_FINT_2_INT(*sendcount), + c_sendtype, + recvbuf, + OMPI_FINT_2_INT(*recvcount), + c_recvtype, c_comm, &c_req); if (NULL != ierr) *ierr = OMPI_INT_2_FINT(ierr_c); - if (MPI_SUCCESS == ierr_c) *request = MPI_Request_c2f(c_req); + if (MPI_SUCCESS == ierr_c) *request = PMPI_Request_c2f(c_req); } diff --git a/ompi/mpi/fortran/mpif-h/iallgatherv_f.c b/ompi/mpi/fortran/mpif-h/iallgatherv_f.c index 9f478441f17..19671fea103 100644 --- a/ompi/mpi/fortran/mpif-h/iallgatherv_f.c +++ b/ompi/mpi/fortran/mpif-h/iallgatherv_f.c @@ -5,15 +5,17 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2011-2012 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -22,7 +24,8 @@ #include "ompi/mpi/fortran/mpif-h/bindings.h" #include "ompi/mpi/fortran/base/constants.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILE_LAYER +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak PMPI_IALLGATHERV = ompi_iallgatherv_f #pragma weak pmpi_iallgatherv = ompi_iallgatherv_f #pragma weak pmpi_iallgatherv_ = ompi_iallgatherv_f @@ -30,7 +33,7 @@ #pragma weak PMPI_Iallgatherv_f = ompi_iallgatherv_f #pragma weak PMPI_Iallgatherv_f08 = ompi_iallgatherv_f -#elif OMPI_PROFILE_LAYER +#else OMPI_GENERATE_F77_BINDINGS (PMPI_IALLGATHERV, pmpi_iallgatherv, pmpi_iallgatherv_, @@ -39,6 +42,7 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_IALLGATHERV, (char *sendbuf, MPI_Fint *sendcount, MPI_Fint *sendtype, char *recvbuf, MPI_Fint *recvcounts, MPI_Fint *displs, MPI_Fint *recvtype, MPI_Fint *comm, MPI_Fint *request, MPI_Fint *ierr), (sendbuf, sendcount, sendtype, recvbuf, recvcounts, displs, recvtype, comm, request, ierr) ) #endif +#endif #if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_IALLGATHERV = ompi_iallgatherv_f @@ -48,9 +52,8 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_IALLGATHERV, #pragma weak MPI_Iallgatherv_f = ompi_iallgatherv_f #pragma weak MPI_Iallgatherv_f08 = ompi_iallgatherv_f -#endif - -#if ! OPAL_HAVE_WEAK_SYMBOLS && ! OMPI_PROFILE_LAYER +#else +#if ! OMPI_BUILD_MPI_PROFILING OMPI_GENERATE_F77_BINDINGS (MPI_IALLGATHERV, mpi_iallgatherv, mpi_iallgatherv_, @@ -58,13 +61,12 @@ OMPI_GENERATE_F77_BINDINGS (MPI_IALLGATHERV, ompi_iallgatherv_f, (char *sendbuf, MPI_Fint *sendcount, MPI_Fint *sendtype, char *recvbuf, MPI_Fint *recvcounts, MPI_Fint *displs, MPI_Fint *recvtype, MPI_Fint *comm, MPI_Fint *request, MPI_Fint *ierr), (sendbuf, sendcount, sendtype, recvbuf, recvcounts, displs, recvtype, comm, request, ierr) ) +#else +#define ompi_iallgatherv_f pompi_iallgatherv_f #endif - - -#if OMPI_PROFILE_LAYER && ! OPAL_HAVE_WEAK_SYMBOLS -#include "ompi/mpi/fortran/mpif-h/profile/defines.h" #endif + void ompi_iallgatherv_f(char *sendbuf, MPI_Fint *sendcount, MPI_Fint *sendtype, char *recvbuf, MPI_Fint *recvcounts, MPI_Fint *displs, MPI_Fint *recvtype, MPI_Fint *comm, MPI_Fint *request, @@ -77,11 +79,11 @@ void ompi_iallgatherv_f(char *sendbuf, MPI_Fint *sendcount, MPI_Fint *sendtype, OMPI_ARRAY_NAME_DECL(recvcounts); OMPI_ARRAY_NAME_DECL(displs); - c_comm = MPI_Comm_f2c(*comm); - c_sendtype = MPI_Type_f2c(*sendtype); - c_recvtype = MPI_Type_f2c(*recvtype); + c_comm = PMPI_Comm_f2c(*comm); + c_sendtype = PMPI_Type_f2c(*sendtype); + c_recvtype = PMPI_Type_f2c(*recvtype); - MPI_Comm_size(c_comm, &size); + PMPI_Comm_size(c_comm, &size); OMPI_ARRAY_FINT_2_INT(recvcounts, size); OMPI_ARRAY_FINT_2_INT(displs, size); @@ -89,16 +91,16 @@ void ompi_iallgatherv_f(char *sendbuf, MPI_Fint *sendcount, MPI_Fint *sendtype, sendbuf = (char *) OMPI_F2C_BOTTOM(sendbuf); recvbuf = (char *) OMPI_F2C_BOTTOM(recvbuf); - ierr_c = MPI_Iallgatherv(sendbuf, - OMPI_FINT_2_INT(*sendcount), - c_sendtype, - recvbuf, - OMPI_ARRAY_NAME_CONVERT(recvcounts), - OMPI_ARRAY_NAME_CONVERT(displs), - c_recvtype, c_comm, &c_request); + ierr_c = PMPI_Iallgatherv(sendbuf, + OMPI_FINT_2_INT(*sendcount), + c_sendtype, + recvbuf, + OMPI_ARRAY_NAME_CONVERT(recvcounts), + OMPI_ARRAY_NAME_CONVERT(displs), + c_recvtype, c_comm, &c_request); if (NULL != ierr) *ierr = OMPI_INT_2_FINT(ierr_c); - if (MPI_SUCCESS == ierr_c) *request = MPI_Request_c2f(c_request); + if (MPI_SUCCESS == ierr_c) *request = PMPI_Request_c2f(c_request); OMPI_ARRAY_FINT_2_INT_CLEANUP(recvcounts); OMPI_ARRAY_FINT_2_INT_CLEANUP(displs); diff --git a/ompi/mpi/fortran/mpif-h/iallreduce_f.c b/ompi/mpi/fortran/mpif-h/iallreduce_f.c index ea657ce8ace..3bcb92967d5 100644 --- a/ompi/mpi/fortran/mpif-h/iallreduce_f.c +++ b/ompi/mpi/fortran/mpif-h/iallreduce_f.c @@ -5,15 +5,17 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2011-2012 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -22,7 +24,8 @@ #include "ompi/mpi/fortran/mpif-h/bindings.h" #include "ompi/mpi/fortran/base/constants.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILE_LAYER +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak PMPI_IALLREDUCE = ompi_iallreduce_f #pragma weak pmpi_iallreduce = ompi_iallreduce_f #pragma weak pmpi_iallreduce_ = ompi_iallreduce_f @@ -30,7 +33,7 @@ #pragma weak PMPI_Iallreduce_f = ompi_iallreduce_f #pragma weak PMPI_Iallreduce_f08 = ompi_iallreduce_f -#elif OMPI_PROFILE_LAYER +#else OMPI_GENERATE_F77_BINDINGS (PMPI_IALLREDUCE, pmpi_iallreduce, pmpi_iallreduce_, @@ -39,6 +42,7 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_IALLREDUCE, (char *sendbuf, char *recvbuf, MPI_Fint *count, MPI_Fint *datatype, MPI_Fint *op, MPI_Fint *comm, MPI_Fint *request, MPI_Fint *ierr), (sendbuf, recvbuf, count, datatype, op, comm, request, ierr) ) #endif +#endif #if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_IALLREDUCE = ompi_iallreduce_f @@ -48,9 +52,8 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_IALLREDUCE, #pragma weak MPI_Iallreduce_f = ompi_iallreduce_f #pragma weak MPI_Iallreduce_f08 = ompi_iallreduce_f -#endif - -#if ! OPAL_HAVE_WEAK_SYMBOLS && ! OMPI_PROFILE_LAYER +#else +#if ! OMPI_BUILD_MPI_PROFILING OMPI_GENERATE_F77_BINDINGS (MPI_IALLREDUCE, mpi_iallreduce, mpi_iallreduce_, @@ -58,13 +61,12 @@ OMPI_GENERATE_F77_BINDINGS (MPI_IALLREDUCE, ompi_iallreduce_f, (char *sendbuf, char *recvbuf, MPI_Fint *count, MPI_Fint *datatype, MPI_Fint *op, MPI_Fint *comm, MPI_Fint *request, MPI_Fint *ierr), (sendbuf, recvbuf, count, datatype, op, comm, request, ierr) ) +#else +#define ompi_iallreduce_f pompi_iallreduce_f #endif - - -#if OMPI_PROFILE_LAYER && ! OPAL_HAVE_WEAK_SYMBOLS -#include "ompi/mpi/fortran/mpif-h/profile/defines.h" #endif + void ompi_iallreduce_f(char *sendbuf, char *recvbuf, MPI_Fint *count, MPI_Fint *datatype, MPI_Fint *op, MPI_Fint *comm, MPI_Fint *request, MPI_Fint *ierr) @@ -75,17 +77,17 @@ void ompi_iallreduce_f(char *sendbuf, char *recvbuf, MPI_Fint *count, MPI_Request c_request; MPI_Op c_op; - c_comm = MPI_Comm_f2c(*comm); - c_type = MPI_Type_f2c(*datatype); - c_op = MPI_Op_f2c(*op); + c_comm = PMPI_Comm_f2c(*comm); + c_type = PMPI_Type_f2c(*datatype); + c_op = PMPI_Op_f2c(*op); sendbuf = (char *) OMPI_F2C_IN_PLACE(sendbuf); sendbuf = (char *) OMPI_F2C_BOTTOM(sendbuf); recvbuf = (char *) OMPI_F2C_BOTTOM(recvbuf); - ierr_c = MPI_Iallreduce(sendbuf, recvbuf, - OMPI_FINT_2_INT(*count), - c_type, c_op, c_comm, &c_request); + ierr_c = PMPI_Iallreduce(sendbuf, recvbuf, + OMPI_FINT_2_INT(*count), + c_type, c_op, c_comm, &c_request); if (NULL != ierr) *ierr = OMPI_INT_2_FINT(ierr_c); - if (MPI_SUCCESS == ierr_c) *request = MPI_Request_c2f(c_request); + if (MPI_SUCCESS == ierr_c) *request = PMPI_Request_c2f(c_request); } diff --git a/ompi/mpi/fortran/mpif-h/ialltoall_f.c b/ompi/mpi/fortran/mpif-h/ialltoall_f.c index 89c90782841..27b8ca4bd64 100644 --- a/ompi/mpi/fortran/mpif-h/ialltoall_f.c +++ b/ompi/mpi/fortran/mpif-h/ialltoall_f.c @@ -5,15 +5,17 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2011-2012 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -22,7 +24,8 @@ #include "ompi/mpi/fortran/mpif-h/bindings.h" #include "ompi/mpi/fortran/base/constants.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILE_LAYER +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak PMPI_IALLTOALL = ompi_ialltoall_f #pragma weak pmpi_ialltoall = ompi_ialltoall_f #pragma weak pmpi_ialltoall_ = ompi_ialltoall_f @@ -30,7 +33,7 @@ #pragma weak PMPI_Ialltoall_f = ompi_ialltoall_f #pragma weak PMPI_Ialltoall_f08 = ompi_ialltoall_f -#elif OMPI_PROFILE_LAYER +#else OMPI_GENERATE_F77_BINDINGS (PMPI_IALLTOALL, pmpi_ialltoall, pmpi_ialltoall_, @@ -39,6 +42,7 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_IALLTOALL, (char *sendbuf, MPI_Fint *sendcount, MPI_Fint *sendtype, char *recvbuf, MPI_Fint *recvcount, MPI_Fint *recvtype, MPI_Fint *comm, MPI_Fint *request, MPI_Fint *ierr), (sendbuf, sendcount, sendtype, recvbuf, recvcount, recvtype, comm, request, ierr) ) #endif +#endif #if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_IALLTOALL = ompi_ialltoall_f @@ -48,9 +52,8 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_IALLTOALL, #pragma weak MPI_Ialltoall_f = ompi_ialltoall_f #pragma weak MPI_Ialltoall_f08 = ompi_ialltoall_f -#endif - -#if ! OPAL_HAVE_WEAK_SYMBOLS && ! OMPI_PROFILE_LAYER +#else +#if ! OMPI_BUILD_MPI_PROFILING OMPI_GENERATE_F77_BINDINGS (MPI_IALLTOALL, mpi_ialltoall, mpi_ialltoall_, @@ -58,13 +61,12 @@ OMPI_GENERATE_F77_BINDINGS (MPI_IALLTOALL, ompi_ialltoall_f, (char *sendbuf, MPI_Fint *sendcount, MPI_Fint *sendtype, char *recvbuf, MPI_Fint *recvcount, MPI_Fint *recvtype, MPI_Fint *comm, MPI_Fint *request, MPI_Fint *ierr), (sendbuf, sendcount, sendtype, recvbuf, recvcount, recvtype, comm, request, ierr) ) +#else +#define ompi_ialltoall_f pompi_ialltoall_f #endif - - -#if OMPI_PROFILE_LAYER && ! OPAL_HAVE_WEAK_SYMBOLS -#include "ompi/mpi/fortran/mpif-h/profile/defines.h" #endif + void ompi_ialltoall_f(char *sendbuf, MPI_Fint *sendcount, MPI_Fint *sendtype, char *recvbuf, MPI_Fint *recvcount, MPI_Fint *recvtype, MPI_Fint *comm, MPI_Fint *request, MPI_Fint *ierr) @@ -74,21 +76,21 @@ void ompi_ialltoall_f(char *sendbuf, MPI_Fint *sendcount, MPI_Fint *sendtype, MPI_Request c_req; MPI_Datatype c_sendtype, c_recvtype; - c_comm = MPI_Comm_f2c(*comm); - c_sendtype = MPI_Type_f2c(*sendtype); - c_recvtype = MPI_Type_f2c(*recvtype); + c_comm = PMPI_Comm_f2c(*comm); + c_sendtype = PMPI_Type_f2c(*sendtype); + c_recvtype = PMPI_Type_f2c(*recvtype); sendbuf = (char *) OMPI_F2C_IN_PLACE(sendbuf); sendbuf = (char *) OMPI_F2C_BOTTOM(sendbuf); recvbuf = (char *) OMPI_F2C_BOTTOM(recvbuf); - c_ierr = MPI_Ialltoall(sendbuf, + c_ierr = PMPI_Ialltoall(sendbuf, OMPI_FINT_2_INT(*sendcount), - c_sendtype, - recvbuf, + c_sendtype, + recvbuf, OMPI_FINT_2_INT(*recvcount), c_recvtype, c_comm, &c_req); if (NULL != ierr) *ierr = OMPI_INT_2_FINT(c_ierr); - if (MPI_SUCCESS == c_ierr) *request = MPI_Request_c2f(c_req); + if (MPI_SUCCESS == c_ierr) *request = PMPI_Request_c2f(c_req); } diff --git a/ompi/mpi/fortran/mpif-h/ialltoallv_f.c b/ompi/mpi/fortran/mpif-h/ialltoallv_f.c index e63d0fa255e..0a447f5d5b6 100644 --- a/ompi/mpi/fortran/mpif-h/ialltoallv_f.c +++ b/ompi/mpi/fortran/mpif-h/ialltoallv_f.c @@ -5,15 +5,17 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2011-2012 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -22,7 +24,8 @@ #include "ompi/mpi/fortran/mpif-h/bindings.h" #include "ompi/mpi/fortran/base/constants.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILE_LAYER +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak PMPI_IALLTOALLV = ompi_ialltoallv_f #pragma weak pmpi_ialltoallv = ompi_ialltoallv_f #pragma weak pmpi_ialltoallv_ = ompi_ialltoallv_f @@ -30,7 +33,7 @@ #pragma weak PMPI_Ialltoallv_f = ompi_ialltoallv_f #pragma weak PMPI_Ialltoallv_f08 = ompi_ialltoallv_f -#elif OMPI_PROFILE_LAYER +#else OMPI_GENERATE_F77_BINDINGS (PMPI_IALLTOALLV, pmpi_ialltoallv, pmpi_ialltoallv_, @@ -39,6 +42,7 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_IALLTOALLV, (char *sendbuf, MPI_Fint *sendcounts, MPI_Fint *sdispls, MPI_Fint *sendtype, char *recvbuf, MPI_Fint *recvcounts, MPI_Fint *rdispls, MPI_Fint *recvtype, MPI_Fint *comm, MPI_Fint *request, MPI_Fint *ierr), (sendbuf, sendcounts, sdispls, sendtype, recvbuf, recvcounts, rdispls, recvtype, comm, request, ierr) ) #endif +#endif #if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_IALLTOALLV = ompi_ialltoallv_f @@ -48,9 +52,8 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_IALLTOALLV, #pragma weak MPI_Ialltoallv_f = ompi_ialltoallv_f #pragma weak MPI_Ialltoallv_f08 = ompi_ialltoallv_f -#endif - -#if ! OPAL_HAVE_WEAK_SYMBOLS && ! OMPI_PROFILE_LAYER +#else +#if ! OMPI_BUILD_MPI_PROFILING OMPI_GENERATE_F77_BINDINGS (MPI_IALLTOALLV, mpi_ialltoallv, mpi_ialltoallv_, @@ -58,16 +61,15 @@ OMPI_GENERATE_F77_BINDINGS (MPI_IALLTOALLV, ompi_ialltoallv_f, (char *sendbuf, MPI_Fint *sendcounts, MPI_Fint *sdispls, MPI_Fint *sendtype, char *recvbuf, MPI_Fint *recvcounts, MPI_Fint *rdispls, MPI_Fint *recvtype, MPI_Fint *comm, MPI_Fint *request, MPI_Fint *ierr), (sendbuf, sendcounts, sdispls, sendtype, recvbuf, recvcounts, rdispls, recvtype, comm, request, ierr) ) +#else +#define ompi_ialltoallv_f pompi_ialltoallv_f #endif - - -#if OMPI_PROFILE_LAYER && ! OPAL_HAVE_WEAK_SYMBOLS -#include "ompi/mpi/fortran/mpif-h/profile/defines.h" #endif + void ompi_ialltoallv_f(char *sendbuf, MPI_Fint *sendcounts, MPI_Fint *sdispls, MPI_Fint *sendtype, char *recvbuf, MPI_Fint *recvcounts, - MPI_Fint *rdispls, MPI_Fint *recvtype, + MPI_Fint *rdispls, MPI_Fint *recvtype, MPI_Fint *comm, MPI_Fint *request, MPI_Fint *ierr) { MPI_Comm c_comm; @@ -79,11 +81,11 @@ void ompi_ialltoallv_f(char *sendbuf, MPI_Fint *sendcounts, MPI_Fint *sdispls, OMPI_ARRAY_NAME_DECL(recvcounts); OMPI_ARRAY_NAME_DECL(rdispls); - c_comm = MPI_Comm_f2c(*comm); - c_sendtype = MPI_Type_f2c(*sendtype); - c_recvtype = MPI_Type_f2c(*recvtype); + c_comm = PMPI_Comm_f2c(*comm); + c_sendtype = PMPI_Type_f2c(*sendtype); + c_recvtype = PMPI_Type_f2c(*recvtype); - MPI_Comm_size(c_comm, &size); + PMPI_Comm_size(c_comm, &size); OMPI_ARRAY_FINT_2_INT(sendcounts, size); OMPI_ARRAY_FINT_2_INT(sdispls, size); OMPI_ARRAY_FINT_2_INT(recvcounts, size); @@ -93,16 +95,16 @@ void ompi_ialltoallv_f(char *sendbuf, MPI_Fint *sendcounts, MPI_Fint *sdispls, sendbuf = (char *) OMPI_F2C_BOTTOM(sendbuf); recvbuf = (char *) OMPI_F2C_BOTTOM(recvbuf); - c_ierr = MPI_Ialltoallv(sendbuf, + c_ierr = PMPI_Ialltoallv(sendbuf, OMPI_ARRAY_NAME_CONVERT(sendcounts), - OMPI_ARRAY_NAME_CONVERT(sdispls), - c_sendtype, - recvbuf, + OMPI_ARRAY_NAME_CONVERT(sdispls), + c_sendtype, + recvbuf, OMPI_ARRAY_NAME_CONVERT(recvcounts), OMPI_ARRAY_NAME_CONVERT(rdispls), c_recvtype, c_comm, &c_request); if (NULL != ierr) *ierr = OMPI_INT_2_FINT(c_ierr); - if (MPI_SUCCESS == c_ierr) *request = MPI_Request_c2f(c_request); + if (MPI_SUCCESS == c_ierr) *request = PMPI_Request_c2f(c_request); OMPI_ARRAY_FINT_2_INT_CLEANUP(sendcounts); OMPI_ARRAY_FINT_2_INT_CLEANUP(sdispls); diff --git a/ompi/mpi/fortran/mpif-h/ialltoallw_f.c b/ompi/mpi/fortran/mpif-h/ialltoallw_f.c index 4995f8b198c..85cedd932f2 100644 --- a/ompi/mpi/fortran/mpif-h/ialltoallw_f.c +++ b/ompi/mpi/fortran/mpif-h/ialltoallw_f.c @@ -5,15 +5,17 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2011-2012 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -22,7 +24,8 @@ #include "ompi/mpi/fortran/mpif-h/bindings.h" #include "ompi/mpi/fortran/base/constants.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILE_LAYER +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak PMPI_IALLTOALLW = ompi_ialltoallw_f #pragma weak pmpi_ialltoallw = ompi_ialltoallw_f #pragma weak pmpi_ialltoallw_ = ompi_ialltoallw_f @@ -30,7 +33,7 @@ #pragma weak PMPI_Ialltoallw_f = ompi_ialltoallw_f #pragma weak PMPI_Ialltoallw_f08 = ompi_ialltoallw_f -#elif OMPI_PROFILE_LAYER +#else OMPI_GENERATE_F77_BINDINGS (PMPI_IALLTOALLW, pmpi_ialltoallw, pmpi_ialltoallw_, @@ -39,6 +42,7 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_IALLTOALLW, (char *sendbuf, MPI_Fint *sendcounts, MPI_Fint *sdispls, MPI_Fint *sendtypes, char *recvbuf, MPI_Fint *recvcounts, MPI_Fint *rdispls, MPI_Fint *recvtypes, MPI_Fint *comm, MPI_Fint *request, MPI_Fint *ierr), (sendbuf, sendcounts, sdispls, sendtypes, recvbuf, recvcounts, rdispls, recvtypes, comm, request, ierr) ) #endif +#endif #if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_IALLTOALLW = ompi_ialltoallw_f @@ -48,9 +52,8 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_IALLTOALLW, #pragma weak MPI_Ialltoallw_f = ompi_ialltoallw_f #pragma weak MPI_Ialltoallw_f08 = ompi_ialltoallw_f -#endif - -#if ! OPAL_HAVE_WEAK_SYMBOLS && ! OMPI_PROFILE_LAYER +#else +#if ! OMPI_BUILD_MPI_PROFILING OMPI_GENERATE_F77_BINDINGS (MPI_IALLTOALLW, mpi_ialltoallw, mpi_ialltoallw_, @@ -58,15 +61,14 @@ OMPI_GENERATE_F77_BINDINGS (MPI_IALLTOALLW, ompi_ialltoallw_f, (char *sendbuf, MPI_Fint *sendcounts, MPI_Fint *sdispls, MPI_Fint *sendtypes, char *recvbuf, MPI_Fint *recvcounts, MPI_Fint *rdispls, MPI_Fint *recvtypes, MPI_Fint *comm, MPI_Fint *request, MPI_Fint *ierr), (sendbuf, sendcounts, sdispls, sendtypes, recvbuf, recvcounts, rdispls, recvtypes, comm, request, ierr) ) +#else +#define ompi_ialltoallw_f pompi_ialltoallw_f #endif - - -#if OMPI_PROFILE_LAYER && ! OPAL_HAVE_WEAK_SYMBOLS -#include "ompi/mpi/fortran/mpif-h/profile/defines.h" #endif + void ompi_ialltoallw_f(char *sendbuf, MPI_Fint *sendcounts, - MPI_Fint *sdispls, MPI_Fint *sendtypes, + MPI_Fint *sdispls, MPI_Fint *sendtypes, char *recvbuf, MPI_Fint *recvcounts, MPI_Fint *rdispls, MPI_Fint *recvtypes, MPI_Fint *comm, MPI_Fint *request, MPI_Fint *ierr) @@ -80,8 +82,8 @@ void ompi_ialltoallw_f(char *sendbuf, MPI_Fint *sendcounts, OMPI_ARRAY_NAME_DECL(recvcounts); OMPI_ARRAY_NAME_DECL(rdispls); - c_comm = MPI_Comm_f2c(*comm); - MPI_Comm_size(c_comm, &size); + c_comm = PMPI_Comm_f2c(*comm); + PMPI_Comm_size(c_comm, &size); c_sendtypes = (MPI_Datatype *) malloc(size * sizeof(MPI_Datatype)); c_recvtypes = (MPI_Datatype *) malloc(size * sizeof(MPI_Datatype)); @@ -92,25 +94,25 @@ void ompi_ialltoallw_f(char *sendbuf, MPI_Fint *sendcounts, OMPI_ARRAY_FINT_2_INT(rdispls, size); while (size > 0) { - c_sendtypes[size - 1] = MPI_Type_f2c(sendtypes[size - 1]); - c_recvtypes[size - 1] = MPI_Type_f2c(recvtypes[size - 1]); + c_sendtypes[size - 1] = PMPI_Type_f2c(sendtypes[size - 1]); + c_recvtypes[size - 1] = PMPI_Type_f2c(recvtypes[size - 1]); --size; } - /* Ialltoallw does not support MPI_IN_PLACE */ + sendbuf = (char *) OMPI_F2C_IN_PLACE(sendbuf); sendbuf = (char *) OMPI_F2C_BOTTOM(sendbuf); recvbuf = (char *) OMPI_F2C_BOTTOM(recvbuf); - c_ierr = MPI_Ialltoallw(sendbuf, + c_ierr = PMPI_Ialltoallw(sendbuf, OMPI_ARRAY_NAME_CONVERT(sendcounts), OMPI_ARRAY_NAME_CONVERT(sdispls), - c_sendtypes, - recvbuf, + c_sendtypes, + recvbuf, OMPI_ARRAY_NAME_CONVERT(recvcounts), OMPI_ARRAY_NAME_CONVERT(rdispls), c_recvtypes, c_comm, &c_request); if (NULL != ierr) *ierr = OMPI_INT_2_FINT(c_ierr); - if (MPI_SUCCESS == c_ierr) *request = MPI_Request_c2f(c_request); + if (MPI_SUCCESS == c_ierr) *request = PMPI_Request_c2f(c_request); OMPI_ARRAY_FINT_2_INT_CLEANUP(sendcounts); OMPI_ARRAY_FINT_2_INT_CLEANUP(sdispls); diff --git a/ompi/mpi/fortran/mpif-h/ibarrier_f.c b/ompi/mpi/fortran/mpif-h/ibarrier_f.c index f8d0e124db1..a649ddeffe5 100644 --- a/ompi/mpi/fortran/mpif-h/ibarrier_f.c +++ b/ompi/mpi/fortran/mpif-h/ibarrier_f.c @@ -5,15 +5,17 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2011-2012 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -21,7 +23,8 @@ #include "ompi/mpi/fortran/mpif-h/bindings.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILE_LAYER +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak PMPI_IBARRIER = ompi_ibarrier_f #pragma weak pmpi_ibarrier = ompi_ibarrier_f #pragma weak pmpi_ibarrier_ = ompi_ibarrier_f @@ -29,7 +32,7 @@ #pragma weak PMPI_Ibarrier_f = ompi_ibarrier_f #pragma weak PMPI_Ibarrier_f08 = ompi_ibarrier_f -#elif OMPI_PROFILE_LAYER +#else OMPI_GENERATE_F77_BINDINGS (PMPI_IBARRIER, pmpi_ibarrier, pmpi_ibarrier_, @@ -38,6 +41,7 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_IBARRIER, (MPI_Fint *comm, MPI_Fint *request, MPI_Fint *ierr), (comm, request, ierr) ) #endif +#endif #if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_IBARRIER = ompi_ibarrier_f @@ -47,9 +51,8 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_IBARRIER, #pragma weak MPI_Ibarrier_f = ompi_ibarrier_f #pragma weak MPI_Ibarrier_f08 = ompi_ibarrier_f -#endif - -#if ! OPAL_HAVE_WEAK_SYMBOLS && ! OMPI_PROFILE_LAYER +#else +#if ! OMPI_BUILD_MPI_PROFILING OMPI_GENERATE_F77_BINDINGS (MPI_IBARRIER, mpi_ibarrier, mpi_ibarrier_, @@ -57,23 +60,22 @@ OMPI_GENERATE_F77_BINDINGS (MPI_IBARRIER, ompi_ibarrier_f, (MPI_Fint *comm, MPI_Fint *request, MPI_Fint *ierr), (comm, request, ierr) ) +#else +#define ompi_ibarrier_f pompi_ibarrier_f #endif - - -#if OMPI_PROFILE_LAYER && ! OPAL_HAVE_WEAK_SYMBOLS -#include "ompi/mpi/fortran/mpif-h/profile/defines.h" #endif + void ompi_ibarrier_f(MPI_Fint *comm, MPI_Fint *request, MPI_Fint *ierr) { int ierr_c; MPI_Comm c_comm; MPI_Request c_req; - c_comm = MPI_Comm_f2c(*comm); + c_comm = PMPI_Comm_f2c(*comm); - ierr_c = MPI_Ibarrier(c_comm, &c_req); + ierr_c = PMPI_Ibarrier(c_comm, &c_req); if (NULL != ierr) *ierr = OMPI_INT_2_FINT(ierr_c); - if (MPI_SUCCESS == ierr_c) *request = MPI_Request_c2f(c_req); + if (MPI_SUCCESS == ierr_c) *request = PMPI_Request_c2f(c_req); } diff --git a/ompi/mpi/fortran/mpif-h/ibcast_f.c b/ompi/mpi/fortran/mpif-h/ibcast_f.c index 44a48c16d40..497b54c339c 100644 --- a/ompi/mpi/fortran/mpif-h/ibcast_f.c +++ b/ompi/mpi/fortran/mpif-h/ibcast_f.c @@ -5,15 +5,17 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2011-2012 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -22,7 +24,8 @@ #include "ompi/mpi/fortran/mpif-h/bindings.h" #include "ompi/mpi/fortran/base/constants.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILE_LAYER +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak PMPI_IBCAST = ompi_ibcast_f #pragma weak pmpi_ibcast = ompi_ibcast_f #pragma weak pmpi_ibcast_ = ompi_ibcast_f @@ -30,7 +33,7 @@ #pragma weak PMPI_Ibcast_f = ompi_ibcast_f #pragma weak PMPI_Ibcast_f08 = ompi_ibcast_f -#elif OMPI_PROFILE_LAYER +#else OMPI_GENERATE_F77_BINDINGS (PMPI_IBCAST, pmpi_ibcast, pmpi_ibcast_, @@ -39,6 +42,7 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_IBCAST, (char *buffer, MPI_Fint *count, MPI_Fint *datatype, MPI_Fint *root, MPI_Fint *comm, MPI_Fint *request, MPI_Fint *ierr), (buffer, count, datatype, root, comm, request, ierr) ) #endif +#endif #if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_IBCAST = ompi_ibcast_f @@ -48,9 +52,8 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_IBCAST, #pragma weak MPI_Ibcast_f = ompi_ibcast_f #pragma weak MPI_Ibcast_f08 = ompi_ibcast_f -#endif - -#if ! OPAL_HAVE_WEAK_SYMBOLS && ! OMPI_PROFILE_LAYER +#else +#if ! OMPI_BUILD_MPI_PROFILING OMPI_GENERATE_F77_BINDINGS (MPI_IBCAST, mpi_ibcast, mpi_ibcast_, @@ -58,14 +61,13 @@ OMPI_GENERATE_F77_BINDINGS (MPI_IBCAST, ompi_ibcast_f, (char *buffer, MPI_Fint *count, MPI_Fint *datatype, MPI_Fint *root, MPI_Fint *comm, MPI_Fint *request, MPI_Fint *ierr), (buffer, count, datatype, root, comm, request, ierr) ) +#else +#define ompi_ibcast_f pompi_ibcast_f #endif - - -#if OMPI_PROFILE_LAYER && ! OPAL_HAVE_WEAK_SYMBOLS -#include "ompi/mpi/fortran/mpif-h/profile/defines.h" #endif -void ompi_ibcast_f(char *buffer, MPI_Fint *count, MPI_Fint *datatype, + +void ompi_ibcast_f(char *buffer, MPI_Fint *count, MPI_Fint *datatype, MPI_Fint *root, MPI_Fint *comm, MPI_Fint *request, MPI_Fint *ierr) { @@ -74,15 +76,15 @@ void ompi_ibcast_f(char *buffer, MPI_Fint *count, MPI_Fint *datatype, MPI_Request c_req; MPI_Datatype c_type; - c_comm = MPI_Comm_f2c(*comm); - c_type = MPI_Type_f2c(*datatype); + c_comm = PMPI_Comm_f2c(*comm); + c_type = PMPI_Type_f2c(*datatype); - c_ierr = MPI_Ibcast(OMPI_F2C_BOTTOM(buffer), - OMPI_FINT_2_INT(*count), + c_ierr = PMPI_Ibcast(OMPI_F2C_BOTTOM(buffer), + OMPI_FINT_2_INT(*count), c_type, OMPI_FINT_2_INT(*root), c_comm, &c_req); if (NULL != ierr) *ierr = OMPI_INT_2_FINT(c_ierr); - if (MPI_SUCCESS == c_ierr) *request = MPI_Request_c2f(c_req); + if (MPI_SUCCESS == c_ierr) *request = PMPI_Request_c2f(c_req); } diff --git a/ompi/mpi/fortran/mpif-h/ibsend_f.c b/ompi/mpi/fortran/mpif-h/ibsend_f.c index dd3e4676c7c..088febdbcbb 100644 --- a/ompi/mpi/fortran/mpif-h/ibsend_f.c +++ b/ompi/mpi/fortran/mpif-h/ibsend_f.c @@ -5,15 +5,17 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2011-2012 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -22,7 +24,8 @@ #include "ompi/mpi/fortran/mpif-h/bindings.h" #include "ompi/mpi/fortran/base/constants.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILE_LAYER +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak PMPI_IBSEND = ompi_ibsend_f #pragma weak pmpi_ibsend = ompi_ibsend_f #pragma weak pmpi_ibsend_ = ompi_ibsend_f @@ -30,7 +33,7 @@ #pragma weak PMPI_Ibsend_f = ompi_ibsend_f #pragma weak PMPI_Ibsend_f08 = ompi_ibsend_f -#elif OMPI_PROFILE_LAYER +#else OMPI_GENERATE_F77_BINDINGS (PMPI_IBSEND, pmpi_ibsend, pmpi_ibsend_, @@ -39,6 +42,7 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_IBSEND, (char *buf, MPI_Fint *count, MPI_Fint *datatype, MPI_Fint *dest, MPI_Fint *tag, MPI_Fint *comm, MPI_Fint *request, MPI_Fint *ierr), (buf, count, datatype, dest, tag, comm, request, ierr) ) #endif +#endif #if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_IBSEND = ompi_ibsend_f @@ -48,9 +52,8 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_IBSEND, #pragma weak MPI_Ibsend_f = ompi_ibsend_f #pragma weak MPI_Ibsend_f08 = ompi_ibsend_f -#endif - -#if ! OPAL_HAVE_WEAK_SYMBOLS && ! OMPI_PROFILE_LAYER +#else +#if ! OMPI_BUILD_MPI_PROFILING OMPI_GENERATE_F77_BINDINGS (MPI_IBSEND, mpi_ibsend, mpi_ibsend_, @@ -58,31 +61,30 @@ OMPI_GENERATE_F77_BINDINGS (MPI_IBSEND, ompi_ibsend_f, (char *buf, MPI_Fint *count, MPI_Fint *datatype, MPI_Fint *dest, MPI_Fint *tag, MPI_Fint *comm, MPI_Fint *request, MPI_Fint *ierr), (buf, count, datatype, dest, tag, comm, request, ierr) ) +#else +#define ompi_ibsend_f pompi_ibsend_f #endif - - -#if OMPI_PROFILE_LAYER && ! OPAL_HAVE_WEAK_SYMBOLS -#include "ompi/mpi/fortran/mpif-h/profile/defines.h" #endif + void ompi_ibsend_f(char *buf, MPI_Fint *count, MPI_Fint *datatype, - MPI_Fint *dest, MPI_Fint *tag, MPI_Fint *comm, + MPI_Fint *dest, MPI_Fint *tag, MPI_Fint *comm, MPI_Fint *request, MPI_Fint *ierr) { int c_ierr; - MPI_Datatype c_type = MPI_Type_f2c(*datatype); + MPI_Datatype c_type = PMPI_Type_f2c(*datatype); MPI_Request c_req; MPI_Comm c_comm; - c_comm = MPI_Comm_f2c (*comm); + c_comm = PMPI_Comm_f2c (*comm); - c_ierr = MPI_Ibsend(OMPI_F2C_BOTTOM(buf), OMPI_FINT_2_INT(*count), + c_ierr = PMPI_Ibsend(OMPI_F2C_BOTTOM(buf), OMPI_FINT_2_INT(*count), c_type, OMPI_FINT_2_INT(*dest), OMPI_FINT_2_INT(*tag), c_comm, &c_req); if (NULL != ierr) *ierr = OMPI_INT_2_FINT(c_ierr); if (MPI_SUCCESS == c_ierr) { - *request = MPI_Request_c2f(c_req); + *request = PMPI_Request_c2f(c_req); } } diff --git a/ompi/mpi/fortran/mpif-h/iexscan_f.c b/ompi/mpi/fortran/mpif-h/iexscan_f.c index aab1dd3a92d..cd41416244f 100644 --- a/ompi/mpi/fortran/mpif-h/iexscan_f.c +++ b/ompi/mpi/fortran/mpif-h/iexscan_f.c @@ -5,15 +5,17 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2011-2012 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -22,7 +24,8 @@ #include "ompi/mpi/fortran/mpif-h/bindings.h" #include "ompi/mpi/fortran/base/constants.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILE_LAYER +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak PMPI_IEXSCAN = ompi_iexscan_f #pragma weak pmpi_iexscan = ompi_iexscan_f #pragma weak pmpi_iexscan_ = ompi_iexscan_f @@ -30,7 +33,7 @@ #pragma weak PMPI_Iexscan_f = ompi_iexscan_f #pragma weak PMPI_Iexscan_f08 = ompi_iexscan_f -#elif OMPI_PROFILE_LAYER +#else OMPI_GENERATE_F77_BINDINGS (PMPI_IEXSCAN, pmpi_iexscan, pmpi_iexscan_, @@ -39,6 +42,7 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_IEXSCAN, (char *sendbuf, char *recvbuf, MPI_Fint *count, MPI_Fint *datatype, MPI_Fint *op, MPI_Fint *comm, MPI_Fint *request, MPI_Fint *ierr), (sendbuf, recvbuf, count, datatype, op, comm, request, ierr) ) #endif +#endif #if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_IEXSCAN = ompi_iexscan_f @@ -48,9 +52,8 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_IEXSCAN, #pragma weak MPI_Iexscan_f = ompi_iexscan_f #pragma weak MPI_Iexscan_f08 = ompi_iexscan_f -#endif - -#if ! OPAL_HAVE_WEAK_SYMBOLS && ! OMPI_PROFILE_LAYER +#else +#if ! OMPI_BUILD_MPI_PROFILING OMPI_GENERATE_F77_BINDINGS (MPI_IEXSCAN, mpi_iexscan, mpi_iexscan_, @@ -58,13 +61,12 @@ OMPI_GENERATE_F77_BINDINGS (MPI_IEXSCAN, ompi_iexscan_f, (char *sendbuf, char *recvbuf, MPI_Fint *count, MPI_Fint *datatype, MPI_Fint *op, MPI_Fint *comm, MPI_Fint *request, MPI_Fint *ierr), (sendbuf, recvbuf, count, datatype, op, comm, request, ierr) ) +#else +#define ompi_iexscan_f pompi_iexscan_f #endif - - -#if OMPI_PROFILE_LAYER && ! OPAL_HAVE_WEAK_SYMBOLS -#include "ompi/mpi/fortran/mpif-h/profile/defines.h" #endif + void ompi_iexscan_f(char *sendbuf, char *recvbuf, MPI_Fint *count, MPI_Fint *datatype, MPI_Fint *op, MPI_Fint *comm, MPI_Fint *request, MPI_Fint *ierr) @@ -74,18 +76,18 @@ void ompi_iexscan_f(char *sendbuf, char *recvbuf, MPI_Fint *count, MPI_Datatype c_type; MPI_Request c_request; MPI_Op c_op; - - c_comm = MPI_Comm_f2c(*comm); - c_type = MPI_Type_f2c(*datatype); - c_op = MPI_Op_f2c(*op); - /* MPI_IN_PLACE is not supported */ + c_comm = PMPI_Comm_f2c(*comm); + c_type = PMPI_Type_f2c(*datatype); + c_op = PMPI_Op_f2c(*op); + + sendbuf = (char *) OMPI_F2C_IN_PLACE(sendbuf); sendbuf = (char *) OMPI_F2C_BOTTOM (sendbuf); recvbuf = (char *) OMPI_F2C_BOTTOM (recvbuf); - c_ierr = MPI_Iexscan(sendbuf, recvbuf, + c_ierr = PMPI_Iexscan(sendbuf, recvbuf, OMPI_FINT_2_INT(*count), c_type, c_op, c_comm, &c_request); if (NULL != ierr) *ierr = OMPI_INT_2_FINT(c_ierr); - if (MPI_SUCCESS == c_ierr) *request = MPI_Request_c2f(c_request); + if (MPI_SUCCESS == c_ierr) *request = PMPI_Request_c2f(c_request); } diff --git a/ompi/mpi/fortran/mpif-h/igather_f.c b/ompi/mpi/fortran/mpif-h/igather_f.c index cf6fef99e25..f234581d0c3 100644 --- a/ompi/mpi/fortran/mpif-h/igather_f.c +++ b/ompi/mpi/fortran/mpif-h/igather_f.c @@ -5,15 +5,17 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2011-2012 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -22,7 +24,8 @@ #include "ompi/mpi/fortran/mpif-h/bindings.h" #include "ompi/mpi/fortran/base/constants.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILE_LAYER +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak PMPI_IGATHER = ompi_igather_f #pragma weak pmpi_igather = ompi_igather_f #pragma weak pmpi_igather_ = ompi_igather_f @@ -30,7 +33,7 @@ #pragma weak PMPI_Igather_f = ompi_igather_f #pragma weak PMPI_Igather_f08 = ompi_igather_f -#elif OMPI_PROFILE_LAYER +#else OMPI_GENERATE_F77_BINDINGS (PMPI_IGATHER, pmpi_igather, pmpi_igather_, @@ -39,6 +42,7 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_IGATHER, (char *sendbuf, MPI_Fint *sendcount, MPI_Fint *sendtype, char *recvbuf, MPI_Fint *recvcount, MPI_Fint *recvtype, MPI_Fint *root, MPI_Fint *comm, MPI_Fint *request, MPI_Fint *ierr), (sendbuf, sendcount, sendtype, recvbuf, recvcount, recvtype, root, comm, request, ierr) ) #endif +#endif #if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_IGATHER = ompi_igather_f @@ -48,9 +52,8 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_IGATHER, #pragma weak MPI_Igather_f = ompi_igather_f #pragma weak MPI_Igather_f08 = ompi_igather_f -#endif - -#if ! OPAL_HAVE_WEAK_SYMBOLS && ! OMPI_PROFILE_LAYER +#else +#if ! OMPI_BUILD_MPI_PROFILING OMPI_GENERATE_F77_BINDINGS (MPI_IGATHER, mpi_igather, mpi_igather_, @@ -58,16 +61,15 @@ OMPI_GENERATE_F77_BINDINGS (MPI_IGATHER, ompi_igather_f, (char *sendbuf, MPI_Fint *sendcount, MPI_Fint *sendtype, char *recvbuf, MPI_Fint *recvcount, MPI_Fint *recvtype, MPI_Fint *root, MPI_Fint *comm, MPI_Fint *request, MPI_Fint *ierr), (sendbuf, sendcount, sendtype, recvbuf, recvcount, recvtype, root, comm, request, ierr) ) +#else +#define ompi_igather_f pompi_igather_f #endif - - -#if OMPI_PROFILE_LAYER && ! OPAL_HAVE_WEAK_SYMBOLS -#include "ompi/mpi/fortran/mpif-h/profile/defines.h" #endif + void ompi_igather_f(char *sendbuf, MPI_Fint *sendcount, MPI_Fint *sendtype, - char *recvbuf, MPI_Fint *recvcount, MPI_Fint *recvtype, - MPI_Fint *root, MPI_Fint *comm, MPI_Fint *request, + char *recvbuf, MPI_Fint *recvcount, MPI_Fint *recvtype, + MPI_Fint *root, MPI_Fint *comm, MPI_Fint *request, MPI_Fint *ierr) { int c_ierr; @@ -75,20 +77,20 @@ void ompi_igather_f(char *sendbuf, MPI_Fint *sendcount, MPI_Fint *sendtype, MPI_Datatype c_sendtype, c_recvtype; MPI_Request c_request; - c_comm = MPI_Comm_f2c(*comm); - c_sendtype = MPI_Type_f2c(*sendtype); - c_recvtype = MPI_Type_f2c(*recvtype); + c_comm = PMPI_Comm_f2c(*comm); + c_sendtype = PMPI_Type_f2c(*sendtype); + c_recvtype = PMPI_Type_f2c(*recvtype); sendbuf = (char *) OMPI_F2C_IN_PLACE(sendbuf); sendbuf = (char *) OMPI_F2C_BOTTOM(sendbuf); recvbuf = (char *) OMPI_F2C_BOTTOM(recvbuf); - c_ierr = MPI_Igather(sendbuf, OMPI_FINT_2_INT(*sendcount), - c_sendtype, recvbuf, + c_ierr = PMPI_Igather(sendbuf, OMPI_FINT_2_INT(*sendcount), + c_sendtype, recvbuf, OMPI_FINT_2_INT(*recvcount), c_recvtype, OMPI_FINT_2_INT(*root), c_comm, &c_request); if (NULL != ierr) *ierr = OMPI_INT_2_FINT(c_ierr); - if (MPI_SUCCESS == c_ierr) *request = MPI_Request_c2f(c_request); + if (MPI_SUCCESS == c_ierr) *request = PMPI_Request_c2f(c_request); } diff --git a/ompi/mpi/fortran/mpif-h/igatherv_f.c b/ompi/mpi/fortran/mpif-h/igatherv_f.c index 1c07ed5804b..c367c2f4abf 100644 --- a/ompi/mpi/fortran/mpif-h/igatherv_f.c +++ b/ompi/mpi/fortran/mpif-h/igatherv_f.c @@ -5,15 +5,17 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2011-2012 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -22,7 +24,8 @@ #include "ompi/mpi/fortran/mpif-h/bindings.h" #include "ompi/mpi/fortran/base/constants.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILE_LAYER +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak PMPI_IGATHERV = ompi_igatherv_f #pragma weak pmpi_igatherv = ompi_igatherv_f #pragma weak pmpi_igatherv_ = ompi_igatherv_f @@ -30,7 +33,7 @@ #pragma weak PMPI_Igatherv_f = ompi_igatherv_f #pragma weak PMPI_Igatherv_f08 = ompi_igatherv_f -#elif OMPI_PROFILE_LAYER +#else OMPI_GENERATE_F77_BINDINGS (PMPI_IGATHERV, pmpi_igatherv, pmpi_igatherv_, @@ -39,6 +42,7 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_IGATHERV, (char *sendbuf, MPI_Fint *sendcount, MPI_Fint *sendtype, char *recvbuf, MPI_Fint *recvcounts, MPI_Fint *displs, MPI_Fint *recvtype, MPI_Fint *root, MPI_Fint *comm, MPI_Fint *request, MPI_Fint *ierr), (sendbuf, sendcount, sendtype, recvbuf, recvcounts, displs, recvtype, root, comm, request, ierr) ) #endif +#endif #if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_IGATHERV = ompi_igatherv_f @@ -48,9 +52,8 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_IGATHERV, #pragma weak MPI_Igatherv_f = ompi_igatherv_f #pragma weak MPI_Igatherv_f08 = ompi_igatherv_f -#endif - -#if ! OPAL_HAVE_WEAK_SYMBOLS && ! OMPI_PROFILE_LAYER +#else +#if ! OMPI_BUILD_MPI_PROFILING OMPI_GENERATE_F77_BINDINGS (MPI_IGATHERV, mpi_igatherv, mpi_igatherv_, @@ -58,13 +61,12 @@ OMPI_GENERATE_F77_BINDINGS (MPI_IGATHERV, ompi_igatherv_f, (char *sendbuf, MPI_Fint *sendcount, MPI_Fint *sendtype, char *recvbuf, MPI_Fint *recvcounts, MPI_Fint *displs, MPI_Fint *recvtype, MPI_Fint *root, MPI_Fint *comm, MPI_Fint *request,MPI_Fint *ierr), (sendbuf, sendcount, sendtype, recvbuf, recvcounts, displs, recvtype, root, comm, request, ierr) ) +#else +#define ompi_igatherv_f pompi_igatherv_f #endif - - -#if OMPI_PROFILE_LAYER && ! OPAL_HAVE_WEAK_SYMBOLS -#include "ompi/mpi/fortran/mpif-h/profile/defines.h" #endif + void ompi_igatherv_f(char *sendbuf, MPI_Fint *sendcount, MPI_Fint *sendtype, char *recvbuf, MPI_Fint *recvcounts, MPI_Fint *displs, MPI_Fint *recvtype, MPI_Fint *root, MPI_Fint *comm, @@ -77,11 +79,11 @@ void ompi_igatherv_f(char *sendbuf, MPI_Fint *sendcount, MPI_Fint *sendtype, OMPI_ARRAY_NAME_DECL(recvcounts); OMPI_ARRAY_NAME_DECL(displs); - c_comm = MPI_Comm_f2c(*comm); - c_sendtype = MPI_Type_f2c(*sendtype); - c_recvtype = MPI_Type_f2c(*recvtype); - - MPI_Comm_size(c_comm, &size); + c_comm = PMPI_Comm_f2c(*comm); + c_sendtype = PMPI_Type_f2c(*sendtype); + c_recvtype = PMPI_Type_f2c(*recvtype); + + PMPI_Comm_size(c_comm, &size); OMPI_ARRAY_FINT_2_INT(recvcounts, size); OMPI_ARRAY_FINT_2_INT(displs, size); @@ -89,13 +91,13 @@ void ompi_igatherv_f(char *sendbuf, MPI_Fint *sendcount, MPI_Fint *sendtype, sendbuf = (char *) OMPI_F2C_BOTTOM(sendbuf); recvbuf = (char *) OMPI_F2C_BOTTOM(recvbuf); - c_ierr = MPI_Igatherv(sendbuf, OMPI_FINT_2_INT(*sendcount), + c_ierr = PMPI_Igatherv(sendbuf, OMPI_FINT_2_INT(*sendcount), c_sendtype, recvbuf, OMPI_ARRAY_NAME_CONVERT(recvcounts), OMPI_ARRAY_NAME_CONVERT(displs), - c_recvtype, + c_recvtype, OMPI_FINT_2_INT(*root), c_comm, &c_request); if (NULL != ierr) *ierr = OMPI_INT_2_FINT(c_ierr); - if (MPI_SUCCESS == c_ierr) *request = MPI_Request_c2f(c_request); + if (MPI_SUCCESS == c_ierr) *request = PMPI_Request_c2f(c_request); } diff --git a/ompi/mpi/fortran/mpif-h/improbe_f.c b/ompi/mpi/fortran/mpif-h/improbe_f.c index 44901e34039..8d7764fffd1 100644 --- a/ompi/mpi/fortran/mpif-h/improbe_f.c +++ b/ompi/mpi/fortran/mpif-h/improbe_f.c @@ -5,16 +5,19 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2012 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2012 Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. + * Copyright (c) 2015 FUJITSU LIMITED. All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -25,45 +28,51 @@ #include "ompi/mpi/fortran/base/constants.h" #include "ompi/communicator/communicator.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILE_LAYER +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak PMPI_IMPROBE = ompi_improbe_f #pragma weak pmpi_improbe = ompi_improbe_f #pragma weak pmpi_improbe_ = ompi_improbe_f #pragma weak pmpi_improbe__ = ompi_improbe_f -#elif OMPI_PROFILE_LAYER + +#pragma weak PMPI_Improbe_f = ompi_improbe_f +#pragma weak PMPI_Improbe_f08 = ompi_improbe_f +#else OMPI_GENERATE_F77_BINDINGS (PMPI_IMPROBE, pmpi_improbe, pmpi_improbe_, pmpi_improbe__, pompi_improbe_f, - (MPI_Fint *source, MPI_Fint *tag, MPI_Fint *comm, ompi_fortran_logical_t *flag, + (MPI_Fint *source, MPI_Fint *tag, MPI_Fint *comm, ompi_fortran_logical_t *flag, MPI_Fint *message, MPI_Fint *status, MPI_Fint *ierr), (source, tag, comm, flag, message, status, ierr) ) #endif +#endif #if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_IMPROBE = ompi_improbe_f #pragma weak mpi_improbe = ompi_improbe_f #pragma weak mpi_improbe_ = ompi_improbe_f #pragma weak mpi_improbe__ = ompi_improbe_f -#endif -#if ! OPAL_HAVE_WEAK_SYMBOLS && ! OMPI_PROFILE_LAYER +#pragma weak MPI_Improbe_f = ompi_improbe_f +#pragma weak MPI_Improbe_f08 = ompi_improbe_f +#else +#if ! OMPI_BUILD_MPI_PROFILING OMPI_GENERATE_F77_BINDINGS (MPI_IMPROBE, mpi_improbe, mpi_improbe_, mpi_improbe__, ompi_improbe_f, - (MPI_Fint *source, MPI_Fint *tag, MPI_Fint *comm, ompi_fortran_logical_t *flag, + (MPI_Fint *source, MPI_Fint *tag, MPI_Fint *comm, ompi_fortran_logical_t *flag, MPI_Fint *message, MPI_Fint *status, MPI_Fint *ierr), (source, tag, comm, flag, message, status, ierr) ) +#else +#define ompi_improbe_f pompi_improbe_f #endif - - -#if OMPI_PROFILE_LAYER && ! OPAL_HAVE_WEAK_SYMBOLS -#include "ompi/mpi/fortran/mpif-h/profile/defines.h" #endif + void ompi_improbe_f(MPI_Fint *source, MPI_Fint *tag, MPI_Fint *comm, ompi_fortran_logical_t *flag, MPI_Fint *message, MPI_Fint *status, MPI_Fint *ierr) @@ -74,21 +83,21 @@ void ompi_improbe_f(MPI_Fint *source, MPI_Fint *tag, MPI_Fint *comm, int c_ierr; OMPI_LOGICAL_NAME_DECL(flag); - c_comm = MPI_Comm_f2c (*comm); + c_comm = PMPI_Comm_f2c (*comm); OMPI_FORTRAN_STATUS_SET_POINTER(c_status,c_status2,status) - c_ierr = OMPI_INT_2_FINT(MPI_Improbe(OMPI_FINT_2_INT(*source), - OMPI_FINT_2_INT(*tag), - c_comm, OMPI_LOGICAL_SINGLE_NAME_CONVERT(flag), - &c_message, c_status)); + c_ierr = OMPI_INT_2_FINT(PMPI_Improbe(OMPI_FINT_2_INT(*source), + OMPI_FINT_2_INT(*tag), + c_comm, OMPI_LOGICAL_SINGLE_NAME_CONVERT(flag), + &c_message, c_status)); if (NULL != ierr) *ierr = OMPI_INT_2_FINT(c_ierr); if (MPI_SUCCESS == c_ierr) { OMPI_SINGLE_INT_2_LOGICAL(flag); if (OMPI_FORTRAN_VALUE_TRUE == *flag) { OMPI_FORTRAN_STATUS_RETURN(c_status,c_status2,status,c_ierr) - *message = MPI_Message_c2f(c_message); - } + *message = PMPI_Message_c2f(c_message); + } } } diff --git a/ompi/mpi/fortran/mpif-h/imrecv_f.c b/ompi/mpi/fortran/mpif-h/imrecv_f.c index 92f9657d395..4ba7a13a2ad 100644 --- a/ompi/mpi/fortran/mpif-h/imrecv_f.c +++ b/ompi/mpi/fortran/mpif-h/imrecv_f.c @@ -5,15 +5,18 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2012 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. + * Copyright (c) 2015 FUJITSU LIMITED. All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -22,12 +25,16 @@ #include "ompi/mpi/fortran/mpif-h/bindings.h" #include "ompi/mpi/fortran/base/constants.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILE_LAYER +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak PMPI_IMRECV = ompi_imrecv_f #pragma weak pmpi_imrecv = ompi_imrecv_f #pragma weak pmpi_imrecv_ = ompi_imrecv_f #pragma weak pmpi_imrecv__ = ompi_imrecv_f -#elif OMPI_PROFILE_LAYER + +#pragma weak PMPI_Imrecv_f = ompi_imrecv_f +#pragma weak PMPI_Imrecv_f08 = ompi_imrecv_f +#else OMPI_GENERATE_F77_BINDINGS (PMPI_IMRECV, pmpi_imrecv, pmpi_imrecv_, @@ -37,15 +44,18 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_IMRECV, MPI_Fint *request, MPI_Fint *ierr), (buf, count, datatype, message, request, ierr) ) #endif +#endif #if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_IMRECV = ompi_imrecv_f #pragma weak mpi_imrecv = ompi_imrecv_f #pragma weak mpi_imrecv_ = ompi_imrecv_f #pragma weak mpi_imrecv__ = ompi_imrecv_f -#endif -#if ! OPAL_HAVE_WEAK_SYMBOLS && ! OMPI_PROFILE_LAYER +#pragma weak MPI_Imrecv_f = ompi_imrecv_f +#pragma weak MPI_Imrecv_f08 = ompi_imrecv_f +#else +#if ! OMPI_BUILD_MPI_PROFILING OMPI_GENERATE_F77_BINDINGS (MPI_IMRECV, mpi_imrecv, mpi_imrecv_, @@ -54,30 +64,29 @@ OMPI_GENERATE_F77_BINDINGS (MPI_IMRECV, (char *buf, MPI_Fint *count, MPI_Fint *datatype, MPI_Fint *message, MPI_Fint *request, MPI_Fint *ierr), (buf, count, datatype, message, request, ierr) ) +#else +#define ompi_imrecv_f pompi_imrecv_f #endif - - -#if OMPI_PROFILE_LAYER && ! OPAL_HAVE_WEAK_SYMBOLS -#include "ompi/mpi/fortran/mpif-h/profile/defines.h" #endif + void ompi_imrecv_f(char *buf, MPI_Fint *count, MPI_Fint *datatype, MPI_Fint *message, MPI_Fint *request, MPI_Fint *ierr) { int c_ierr; - MPI_Datatype c_type = MPI_Type_f2c(*datatype); + MPI_Datatype c_type = PMPI_Type_f2c(*datatype); MPI_Request c_req; MPI_Message c_message; - c_message = MPI_Message_f2c(*message); + c_message = PMPI_Message_f2c(*message); - c_ierr = OMPI_INT_2_FINT(MPI_Imrecv(OMPI_F2C_BOTTOM(buf), OMPI_FINT_2_INT(*count), - c_type, &c_message, &c_req)); + c_ierr = OMPI_INT_2_FINT(PMPI_Imrecv(OMPI_F2C_BOTTOM(buf), OMPI_FINT_2_INT(*count), + c_type, &c_message, &c_req)); if (NULL != ierr) *ierr = OMPI_INT_2_FINT(c_ierr); if (MPI_SUCCESS == c_ierr) { - *request = MPI_Request_c2f(c_req); + *request = PMPI_Request_c2f(c_req); /* message is an INOUT, and may be updated by the recv */ - *message = MPI_Message_c2f(c_message); + *message = PMPI_Message_c2f(c_message); } } diff --git a/ompi/mpi/fortran/mpif-h/ineighbor_allgather_f.c b/ompi/mpi/fortran/mpif-h/ineighbor_allgather_f.c index a2706638ec1..ecd0221187a 100644 --- a/ompi/mpi/fortran/mpif-h/ineighbor_allgather_f.c +++ b/ompi/mpi/fortran/mpif-h/ineighbor_allgather_f.c @@ -13,6 +13,8 @@ * Copyright (c) 2011-2012 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2013 Los Alamos National Security, LLC. All rights * reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -25,7 +27,8 @@ #include "ompi/mpi/fortran/mpif-h/bindings.h" #include "ompi/mpi/fortran/base/constants.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILE_LAYER +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak PMPI_INEIGHBOR_ALLGATHER = ompi_ineighbor_allgather_f #pragma weak pmpi_ineighbor_allgather = ompi_ineighbor_allgather_f #pragma weak pmpi_ineighbor_allgather_ = ompi_ineighbor_allgather_f @@ -33,7 +36,7 @@ #pragma weak PMPI_Ineighbor_allgather_f = ompi_ineighbor_allgather_f #pragma weak PMPI_Ineighbor_allgather_f08 = ompi_ineighbor_allgather_f -#elif OMPI_PROFILE_LAYER +#else OMPI_GENERATE_F77_BINDINGS (PMPI_INEIGHBOR_ALLGATHER, pmpi_ineighbor_allgather, pmpi_ineighbor_allgather_, @@ -42,6 +45,7 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_INEIGHBOR_ALLGATHER, (char *sendbuf, MPI_Fint *sendcount, MPI_Fint *sendtype, char *recvbuf, MPI_Fint *recvcount, MPI_Fint *recvtype, MPI_Fint *comm, MPI_Fint *request, MPI_Fint *ierr), (sendbuf, sendcount, sendtype, recvbuf, recvcount, recvtype, comm, request, ierr) ) #endif +#endif #if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_INEIGHBOR_ALLGATHER = ompi_ineighbor_allgather_f @@ -51,9 +55,8 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_INEIGHBOR_ALLGATHER, #pragma weak MPI_Ineighbor_allgather_f = ompi_ineighbor_allgather_f #pragma weak MPI_Ineighbor_allgather_f08 = ompi_ineighbor_allgather_f -#endif - -#if ! OPAL_HAVE_WEAK_SYMBOLS && ! OMPI_PROFILE_LAYER +#else +#if ! OMPI_BUILD_MPI_PROFILING OMPI_GENERATE_F77_BINDINGS (MPI_INEIGHBOR_ALLGATHER, mpi_ineighbor_allgather, mpi_ineighbor_allgather_, @@ -61,13 +64,12 @@ OMPI_GENERATE_F77_BINDINGS (MPI_INEIGHBOR_ALLGATHER, ompi_ineighbor_allgather_f, (char *sendbuf, MPI_Fint *sendcount, MPI_Fint *sendtype, char *recvbuf, MPI_Fint *recvcount, MPI_Fint *recvtype, MPI_Fint *comm, MPI_Fint *request, MPI_Fint *ierr), (sendbuf, sendcount, sendtype, recvbuf, recvcount, recvtype, comm, request, ierr) ) +#else +#define ompi_ineighbor_allgather_f pompi_ineighbor_allgather_f #endif - - -#if OMPI_PROFILE_LAYER && ! OPAL_HAVE_WEAK_SYMBOLS -#include "ompi/mpi/fortran/mpif-h/profile/defines.h" #endif + void ompi_ineighbor_allgather_f(char *sendbuf, MPI_Fint *sendcount, MPI_Fint *sendtype, char *recvbuf, MPI_Fint *recvcount, MPI_Fint *recvtype, MPI_Fint *comm, MPI_Fint *request, MPI_Fint *ierr) @@ -77,22 +79,22 @@ void ompi_ineighbor_allgather_f(char *sendbuf, MPI_Fint *sendcount, MPI_Fint *se MPI_Request c_req; MPI_Datatype c_sendtype, c_recvtype; - c_comm = MPI_Comm_f2c(*comm); - c_sendtype = MPI_Type_f2c(*sendtype); - c_recvtype = MPI_Type_f2c(*recvtype); + c_comm = PMPI_Comm_f2c(*comm); + c_sendtype = PMPI_Type_f2c(*sendtype); + c_recvtype = PMPI_Type_f2c(*recvtype); sendbuf = (char *) OMPI_F2C_IN_PLACE(sendbuf); sendbuf = (char *) OMPI_F2C_BOTTOM(sendbuf); recvbuf = (char *) OMPI_F2C_BOTTOM(recvbuf); - ierr_c = MPI_Ineighbor_allgather(sendbuf, - OMPI_FINT_2_INT(*sendcount), - c_sendtype, - recvbuf, - OMPI_FINT_2_INT(*recvcount), - c_recvtype, c_comm, &c_req); + ierr_c = PMPI_Ineighbor_allgather(sendbuf, + OMPI_FINT_2_INT(*sendcount), + c_sendtype, + recvbuf, + OMPI_FINT_2_INT(*recvcount), + c_recvtype, c_comm, &c_req); if (NULL != ierr) *ierr = OMPI_INT_2_FINT(ierr_c); - if (MPI_SUCCESS == ierr_c) *request = MPI_Request_c2f(c_req); + if (MPI_SUCCESS == ierr_c) *request = PMPI_Request_c2f(c_req); } diff --git a/ompi/mpi/fortran/mpif-h/ineighbor_allgatherv_f.c b/ompi/mpi/fortran/mpif-h/ineighbor_allgatherv_f.c index 3ee2b26c0d3..db30c7451a6 100644 --- a/ompi/mpi/fortran/mpif-h/ineighbor_allgatherv_f.c +++ b/ompi/mpi/fortran/mpif-h/ineighbor_allgatherv_f.c @@ -13,6 +13,8 @@ * Copyright (c) 2011-2012 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2013 Los Alamos National Security, LLC. All rights * reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -25,7 +27,8 @@ #include "ompi/mpi/fortran/mpif-h/bindings.h" #include "ompi/mpi/fortran/base/constants.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILE_LAYER +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak PMPI_INEIGHBOR_ALLGATHERV = ompi_ineighbor_allgatherv_f #pragma weak pmpi_ineighbor_allgatherv = ompi_ineighbor_allgatherv_f #pragma weak pmpi_ineighbor_allgatherv_ = ompi_ineighbor_allgatherv_f @@ -33,7 +36,7 @@ #pragma weak PMPI_Ineighbor_allgatherv_f = ompi_ineighbor_allgatherv_f #pragma weak PMPI_Ineighbor_allgatherv_f08 = ompi_ineighbor_allgatherv_f -#elif OMPI_PROFILE_LAYER +#else OMPI_GENERATE_F77_BINDINGS (PMPI_INEIGHBOR_ALLGATHERV, pmpi_ineighbor_allgatherv, pmpi_ineighbor_allgatherv_, @@ -42,6 +45,7 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_INEIGHBOR_ALLGATHERV, (char *sendbuf, MPI_Fint *sendcount, MPI_Fint *sendtype, char *recvbuf, MPI_Fint *recvcounts, MPI_Fint *displs, MPI_Fint *recvtype, MPI_Fint *comm, MPI_Fint *request, MPI_Fint *ierr), (sendbuf, sendcount, sendtype, recvbuf, recvcounts, displs, recvtype, comm, request, ierr) ) #endif +#endif #if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_INEIGHBOR_ALLGATHERV = ompi_ineighbor_allgatherv_f @@ -51,9 +55,8 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_INEIGHBOR_ALLGATHERV, #pragma weak MPI_Ineighbor_allgatherv_f = ompi_ineighbor_allgatherv_f #pragma weak MPI_Ineighbor_allgatherv_f08 = ompi_ineighbor_allgatherv_f -#endif - -#if ! OPAL_HAVE_WEAK_SYMBOLS && ! OMPI_PROFILE_LAYER +#else +#if ! OMPI_BUILD_MPI_PROFILING OMPI_GENERATE_F77_BINDINGS (MPI_INEIGHBOR_ALLGATHERV, mpi_ineighbor_allgatherv, mpi_ineighbor_allgatherv_, @@ -61,13 +64,12 @@ OMPI_GENERATE_F77_BINDINGS (MPI_INEIGHBOR_ALLGATHERV, ompi_ineighbor_allgatherv_f, (char *sendbuf, MPI_Fint *sendcount, MPI_Fint *sendtype, char *recvbuf, MPI_Fint *recvcounts, MPI_Fint *displs, MPI_Fint *recvtype, MPI_Fint *comm, MPI_Fint *request, MPI_Fint *ierr), (sendbuf, sendcount, sendtype, recvbuf, recvcounts, displs, recvtype, comm, request, ierr) ) +#else +#define ompi_ineighbor_allgatherv_f pompi_ineighbor_allgatherv_f #endif - - -#if OMPI_PROFILE_LAYER && ! OPAL_HAVE_WEAK_SYMBOLS -#include "ompi/mpi/fortran/mpif-h/profile/defines.h" #endif + void ompi_ineighbor_allgatherv_f(char *sendbuf, MPI_Fint *sendcount, MPI_Fint *sendtype, char *recvbuf, MPI_Fint *recvcounts, MPI_Fint *displs, MPI_Fint *recvtype, MPI_Fint *comm, MPI_Fint *request, @@ -80,11 +82,11 @@ void ompi_ineighbor_allgatherv_f(char *sendbuf, MPI_Fint *sendcount, MPI_Fint *s OMPI_ARRAY_NAME_DECL(recvcounts); OMPI_ARRAY_NAME_DECL(displs); - c_comm = MPI_Comm_f2c(*comm); - c_sendtype = MPI_Type_f2c(*sendtype); - c_recvtype = MPI_Type_f2c(*recvtype); + c_comm = PMPI_Comm_f2c(*comm); + c_sendtype = PMPI_Type_f2c(*sendtype); + c_recvtype = PMPI_Type_f2c(*recvtype); - MPI_Comm_size(c_comm, &size); + PMPI_Comm_size(c_comm, &size); OMPI_ARRAY_FINT_2_INT(recvcounts, size); OMPI_ARRAY_FINT_2_INT(displs, size); @@ -92,16 +94,16 @@ void ompi_ineighbor_allgatherv_f(char *sendbuf, MPI_Fint *sendcount, MPI_Fint *s sendbuf = (char *) OMPI_F2C_BOTTOM(sendbuf); recvbuf = (char *) OMPI_F2C_BOTTOM(recvbuf); - ierr_c = MPI_Ineighbor_allgatherv(sendbuf, - OMPI_FINT_2_INT(*sendcount), - c_sendtype, - recvbuf, - OMPI_ARRAY_NAME_CONVERT(recvcounts), - OMPI_ARRAY_NAME_CONVERT(displs), - c_recvtype, c_comm, &c_request); + ierr_c = PMPI_Ineighbor_allgatherv(sendbuf, + OMPI_FINT_2_INT(*sendcount), + c_sendtype, + recvbuf, + OMPI_ARRAY_NAME_CONVERT(recvcounts), + OMPI_ARRAY_NAME_CONVERT(displs), + c_recvtype, c_comm, &c_request); if (NULL != ierr) *ierr = OMPI_INT_2_FINT(ierr_c); - if (MPI_SUCCESS == ierr_c) *request = MPI_Request_c2f(c_request); + if (MPI_SUCCESS == ierr_c) *request = PMPI_Request_c2f(c_request); OMPI_ARRAY_FINT_2_INT_CLEANUP(recvcounts); OMPI_ARRAY_FINT_2_INT_CLEANUP(displs); diff --git a/ompi/mpi/fortran/mpif-h/ineighbor_alltoall_f.c b/ompi/mpi/fortran/mpif-h/ineighbor_alltoall_f.c index fdcbd370f61..b565f1b74ed 100644 --- a/ompi/mpi/fortran/mpif-h/ineighbor_alltoall_f.c +++ b/ompi/mpi/fortran/mpif-h/ineighbor_alltoall_f.c @@ -13,6 +13,8 @@ * Copyright (c) 2011-2012 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2013 Los Alamos National Security, LLC. All rights * reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -25,7 +27,8 @@ #include "ompi/mpi/fortran/mpif-h/bindings.h" #include "ompi/mpi/fortran/base/constants.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILE_LAYER +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak PMPI_INEIGHBOR_ALLTOALL = ompi_ineighbor_alltoall_f #pragma weak pmpi_ineighbor_alltoall = ompi_ineighbor_alltoall_f #pragma weak pmpi_ineighbor_alltoall_ = ompi_ineighbor_alltoall_f @@ -33,7 +36,7 @@ #pragma weak PMPI_Ineighbor_alltoall_f = ompi_ineighbor_alltoall_f #pragma weak PMPI_Ineighbor_alltoall_f08 = ompi_ineighbor_alltoall_f -#elif OMPI_PROFILE_LAYER +#else OMPI_GENERATE_F77_BINDINGS (PMPI_INEIGHBOR_ALLTOALL, pmpi_ineighbor_alltoall, pmpi_ineighbor_alltoall_, @@ -42,6 +45,7 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_INEIGHBOR_ALLTOALL, (char *sendbuf, MPI_Fint *sendcount, MPI_Fint *sendtype, char *recvbuf, MPI_Fint *recvcount, MPI_Fint *recvtype, MPI_Fint *comm, MPI_Fint *request, MPI_Fint *ierr), (sendbuf, sendcount, sendtype, recvbuf, recvcount, recvtype, comm, request, ierr) ) #endif +#endif #if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_INEIGHBOR_ALLTOALL = ompi_ineighbor_alltoall_f @@ -51,9 +55,8 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_INEIGHBOR_ALLTOALL, #pragma weak MPI_Ineighbor_alltoall_f = ompi_ineighbor_alltoall_f #pragma weak MPI_Ineighbor_alltoall_f08 = ompi_ineighbor_alltoall_f -#endif - -#if ! OPAL_HAVE_WEAK_SYMBOLS && ! OMPI_PROFILE_LAYER +#else +#if ! OMPI_BUILD_MPI_PROFILING OMPI_GENERATE_F77_BINDINGS (MPI_INEIGHBOR_ALLTOALL, mpi_ineighbor_alltoall, mpi_ineighbor_alltoall_, @@ -61,13 +64,12 @@ OMPI_GENERATE_F77_BINDINGS (MPI_INEIGHBOR_ALLTOALL, ompi_ineighbor_alltoall_f, (char *sendbuf, MPI_Fint *sendcount, MPI_Fint *sendtype, char *recvbuf, MPI_Fint *recvcount, MPI_Fint *recvtype, MPI_Fint *comm, MPI_Fint *request, MPI_Fint *ierr), (sendbuf, sendcount, sendtype, recvbuf, recvcount, recvtype, comm, request, ierr) ) +#else +#define ompi_ineighbor_alltoall_f pompi_ineighbor_alltoall_f #endif - - -#if OMPI_PROFILE_LAYER && ! OPAL_HAVE_WEAK_SYMBOLS -#include "ompi/mpi/fortran/mpif-h/profile/defines.h" #endif + void ompi_ineighbor_alltoall_f(char *sendbuf, MPI_Fint *sendcount, MPI_Fint *sendtype, char *recvbuf, MPI_Fint *recvcount, MPI_Fint *recvtype, MPI_Fint *comm, MPI_Fint *request, MPI_Fint *ierr) @@ -77,15 +79,15 @@ void ompi_ineighbor_alltoall_f(char *sendbuf, MPI_Fint *sendcount, MPI_Fint *sen MPI_Request c_req; MPI_Datatype c_sendtype, c_recvtype; - c_comm = MPI_Comm_f2c(*comm); - c_sendtype = MPI_Type_f2c(*sendtype); - c_recvtype = MPI_Type_f2c(*recvtype); + c_comm = PMPI_Comm_f2c(*comm); + c_sendtype = PMPI_Type_f2c(*sendtype); + c_recvtype = PMPI_Type_f2c(*recvtype); sendbuf = (char *) OMPI_F2C_IN_PLACE(sendbuf); sendbuf = (char *) OMPI_F2C_BOTTOM(sendbuf); recvbuf = (char *) OMPI_F2C_BOTTOM(recvbuf); - c_ierr = MPI_Ineighbor_alltoall(sendbuf, + c_ierr = PMPI_Ineighbor_alltoall(sendbuf, OMPI_FINT_2_INT(*sendcount), c_sendtype, recvbuf, @@ -93,5 +95,5 @@ void ompi_ineighbor_alltoall_f(char *sendbuf, MPI_Fint *sendcount, MPI_Fint *sen c_recvtype, c_comm, &c_req); if (NULL != ierr) *ierr = OMPI_INT_2_FINT(c_ierr); - if (MPI_SUCCESS == c_ierr) *request = MPI_Request_c2f(c_req); + if (MPI_SUCCESS == c_ierr) *request = PMPI_Request_c2f(c_req); } diff --git a/ompi/mpi/fortran/mpif-h/ineighbor_alltoallv_f.c b/ompi/mpi/fortran/mpif-h/ineighbor_alltoallv_f.c index d689113cdc0..89761ace66d 100644 --- a/ompi/mpi/fortran/mpif-h/ineighbor_alltoallv_f.c +++ b/ompi/mpi/fortran/mpif-h/ineighbor_alltoallv_f.c @@ -13,6 +13,8 @@ * Copyright (c) 2011-2012 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2013 Los Alamos National Security, LLC. All rights * reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -25,7 +27,8 @@ #include "ompi/mpi/fortran/mpif-h/bindings.h" #include "ompi/mpi/fortran/base/constants.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILE_LAYER +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak PMPI_INEIGHBOR_ALLTOALLV = ompi_ineighbor_alltoallv_f #pragma weak pmpi_ineighbor_alltoallv = ompi_ineighbor_alltoallv_f #pragma weak pmpi_ineighbor_alltoallv_ = ompi_ineighbor_alltoallv_f @@ -33,7 +36,7 @@ #pragma weak PMPI_Ineighbor_alltoallv_f = ompi_ineighbor_alltoallv_f #pragma weak PMPI_Ineighbor_alltoallv_f08 = ompi_ineighbor_alltoallv_f -#elif OMPI_PROFILE_LAYER +#else OMPI_GENERATE_F77_BINDINGS (PMPI_INEIGHBOR_ALLTOALLV, pmpi_ineighbor_alltoallv, pmpi_ineighbor_alltoallv_, @@ -42,6 +45,7 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_INEIGHBOR_ALLTOALLV, (char *sendbuf, MPI_Fint *sendcounts, MPI_Fint *sdispls, MPI_Fint *sendtype, char *recvbuf, MPI_Fint *recvcounts, MPI_Fint *rdispls, MPI_Fint *recvtype, MPI_Fint *comm, MPI_Fint *request, MPI_Fint *ierr), (sendbuf, sendcounts, sdispls, sendtype, recvbuf, recvcounts, rdispls, recvtype, comm, request, ierr) ) #endif +#endif #if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_INEIGHBOR_ALLTOALLV = ompi_ineighbor_alltoallv_f @@ -51,9 +55,8 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_INEIGHBOR_ALLTOALLV, #pragma weak MPI_Ineighbor_alltoallv_f = ompi_ineighbor_alltoallv_f #pragma weak MPI_Ineighbor_alltoallv_f08 = ompi_ineighbor_alltoallv_f -#endif - -#if ! OPAL_HAVE_WEAK_SYMBOLS && ! OMPI_PROFILE_LAYER +#else +#if ! OMPI_BUILD_MPI_PROFILING OMPI_GENERATE_F77_BINDINGS (MPI_INEIGHBOR_ALLTOALLV, mpi_ineighbor_alltoallv, mpi_ineighbor_alltoallv_, @@ -61,13 +64,12 @@ OMPI_GENERATE_F77_BINDINGS (MPI_INEIGHBOR_ALLTOALLV, ompi_ineighbor_alltoallv_f, (char *sendbuf, MPI_Fint *sendcounts, MPI_Fint *sdispls, MPI_Fint *sendtype, char *recvbuf, MPI_Fint *recvcounts, MPI_Fint *rdispls, MPI_Fint *recvtype, MPI_Fint *comm, MPI_Fint *request, MPI_Fint *ierr), (sendbuf, sendcounts, sdispls, sendtype, recvbuf, recvcounts, rdispls, recvtype, comm, request, ierr) ) +#else +#define ompi_ineighbor_alltoallv_f pompi_ineighbor_alltoallv_f #endif - - -#if OMPI_PROFILE_LAYER && ! OPAL_HAVE_WEAK_SYMBOLS -#include "ompi/mpi/fortran/mpif-h/profile/defines.h" #endif + void ompi_ineighbor_alltoallv_f(char *sendbuf, MPI_Fint *sendcounts, MPI_Fint *sdispls, MPI_Fint *sendtype, char *recvbuf, MPI_Fint *recvcounts, MPI_Fint *rdispls, MPI_Fint *recvtype, @@ -82,11 +84,11 @@ void ompi_ineighbor_alltoallv_f(char *sendbuf, MPI_Fint *sendcounts, MPI_Fint *s OMPI_ARRAY_NAME_DECL(recvcounts); OMPI_ARRAY_NAME_DECL(rdispls); - c_comm = MPI_Comm_f2c(*comm); - c_sendtype = MPI_Type_f2c(*sendtype); - c_recvtype = MPI_Type_f2c(*recvtype); + c_comm = PMPI_Comm_f2c(*comm); + c_sendtype = PMPI_Type_f2c(*sendtype); + c_recvtype = PMPI_Type_f2c(*recvtype); - MPI_Comm_size(c_comm, &size); + PMPI_Comm_size(c_comm, &size); OMPI_ARRAY_FINT_2_INT(sendcounts, size); OMPI_ARRAY_FINT_2_INT(sdispls, size); OMPI_ARRAY_FINT_2_INT(recvcounts, size); @@ -96,7 +98,7 @@ void ompi_ineighbor_alltoallv_f(char *sendbuf, MPI_Fint *sendcounts, MPI_Fint *s sendbuf = (char *) OMPI_F2C_BOTTOM(sendbuf); recvbuf = (char *) OMPI_F2C_BOTTOM(recvbuf); - c_ierr = MPI_Ineighbor_alltoallv(sendbuf, + c_ierr = PMPI_Ineighbor_alltoallv(sendbuf, OMPI_ARRAY_NAME_CONVERT(sendcounts), OMPI_ARRAY_NAME_CONVERT(sdispls), c_sendtype, @@ -105,7 +107,7 @@ void ompi_ineighbor_alltoallv_f(char *sendbuf, MPI_Fint *sendcounts, MPI_Fint *s OMPI_ARRAY_NAME_CONVERT(rdispls), c_recvtype, c_comm, &c_request); if (NULL != ierr) *ierr = OMPI_INT_2_FINT(c_ierr); - if (MPI_SUCCESS == c_ierr) *request = MPI_Request_c2f(c_request); + if (MPI_SUCCESS == c_ierr) *request = PMPI_Request_c2f(c_request); OMPI_ARRAY_FINT_2_INT_CLEANUP(sendcounts); OMPI_ARRAY_FINT_2_INT_CLEANUP(sdispls); diff --git a/ompi/mpi/fortran/mpif-h/ineighbor_alltoallw_f.c b/ompi/mpi/fortran/mpif-h/ineighbor_alltoallw_f.c index 36b8347530b..42116da7a81 100644 --- a/ompi/mpi/fortran/mpif-h/ineighbor_alltoallw_f.c +++ b/ompi/mpi/fortran/mpif-h/ineighbor_alltoallw_f.c @@ -13,6 +13,8 @@ * Copyright (c) 2011-2012 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2013 Los Alamos National Security, LLC. All rights * reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -25,7 +27,8 @@ #include "ompi/mpi/fortran/mpif-h/bindings.h" #include "ompi/mpi/fortran/base/constants.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILE_LAYER +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak PMPI_INEIGHBOR_ALLTOALLW = ompi_ineighbor_alltoallw_f #pragma weak pmpi_ineighbor_alltoallw = ompi_ineighbor_alltoallw_f #pragma weak pmpi_ineighbor_alltoallw_ = ompi_ineighbor_alltoallw_f @@ -33,7 +36,7 @@ #pragma weak PMPI_Ineighbor_alltoallw_f = ompi_ineighbor_alltoallw_f #pragma weak PMPI_Ineighbor_alltoallw_f08 = ompi_ineighbor_alltoallw_f -#elif OMPI_PROFILE_LAYER +#else OMPI_GENERATE_F77_BINDINGS (PMPI_INEIGHBOR_ALLTOALLW, pmpi_ineighbor_alltoallw, pmpi_ineighbor_alltoallw_, @@ -42,6 +45,7 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_INEIGHBOR_ALLTOALLW, (char *sendbuf, MPI_Fint *sendcounts, MPI_Aint *sdispls, MPI_Fint *sendtypes, char *recvbuf, MPI_Fint *recvcounts, MPI_Aint *rdispls, MPI_Fint *recvtypes, MPI_Fint *comm, MPI_Fint *request, MPI_Fint *ierr), (sendbuf, sendcounts, sdispls, sendtypes, recvbuf, recvcounts, rdispls, recvtypes, comm, request, ierr) ) #endif +#endif #if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_INEIGHBOR_ALLTOALLW = ompi_ineighbor_alltoallw_f @@ -51,9 +55,8 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_INEIGHBOR_ALLTOALLW, #pragma weak MPI_Ineighbor_alltoallw_f = ompi_ineighbor_alltoallw_f #pragma weak MPI_Ineighbor_alltoallw_f08 = ompi_ineighbor_alltoallw_f -#endif - -#if ! OPAL_HAVE_WEAK_SYMBOLS && ! OMPI_PROFILE_LAYER +#else +#if ! OMPI_BUILD_MPI_PROFILING OMPI_GENERATE_F77_BINDINGS (MPI_INEIGHBOR_ALLTOALLW, mpi_ineighbor_alltoallw, mpi_ineighbor_alltoallw_, @@ -61,13 +64,12 @@ OMPI_GENERATE_F77_BINDINGS (MPI_INEIGHBOR_ALLTOALLW, ompi_ineighbor_alltoallw_f, (char *sendbuf, MPI_Fint *sendcounts, MPI_Aint *sdispls, MPI_Fint *sendtypes, char *recvbuf, MPI_Fint *recvcounts, MPI_Aint *rdispls, MPI_Fint *recvtypes, MPI_Fint *comm, MPI_Fint *request, MPI_Fint *ierr), (sendbuf, sendcounts, sdispls, sendtypes, recvbuf, recvcounts, rdispls, recvtypes, comm, request, ierr) ) +#else +#define ompi_ineighbor_alltoallw_f pompi_ineighbor_alltoallw_f #endif - - -#if OMPI_PROFILE_LAYER && ! OPAL_HAVE_WEAK_SYMBOLS -#include "ompi/mpi/fortran/mpif-h/profile/defines.h" #endif + void ompi_ineighbor_alltoallw_f(char *sendbuf, MPI_Fint *sendcounts, MPI_Aint *sdispls, MPI_Fint *sendtypes, char *recvbuf, MPI_Fint *recvcounts, @@ -81,8 +83,8 @@ void ompi_ineighbor_alltoallw_f(char *sendbuf, MPI_Fint *sendcounts, OMPI_ARRAY_NAME_DECL(sendcounts); OMPI_ARRAY_NAME_DECL(recvcounts); - c_comm = MPI_Comm_f2c(*comm); - MPI_Comm_size(c_comm, &size); + c_comm = PMPI_Comm_f2c(*comm); + PMPI_Comm_size(c_comm, &size); c_sendtypes = (MPI_Datatype *) malloc(size * sizeof(MPI_Datatype)); c_recvtypes = (MPI_Datatype *) malloc(size * sizeof(MPI_Datatype)); @@ -91,8 +93,8 @@ void ompi_ineighbor_alltoallw_f(char *sendbuf, MPI_Fint *sendcounts, OMPI_ARRAY_FINT_2_INT(recvcounts, size); while (size > 0) { - c_sendtypes[size - 1] = MPI_Type_f2c(sendtypes[size - 1]); - c_recvtypes[size - 1] = MPI_Type_f2c(recvtypes[size - 1]); + c_sendtypes[size - 1] = PMPI_Type_f2c(sendtypes[size - 1]); + c_recvtypes[size - 1] = PMPI_Type_f2c(recvtypes[size - 1]); --size; } @@ -100,7 +102,7 @@ void ompi_ineighbor_alltoallw_f(char *sendbuf, MPI_Fint *sendcounts, sendbuf = (char *) OMPI_F2C_BOTTOM(sendbuf); recvbuf = (char *) OMPI_F2C_BOTTOM(recvbuf); - c_ierr = MPI_Ineighbor_alltoallw(sendbuf, + c_ierr = PMPI_Ineighbor_alltoallw(sendbuf, OMPI_ARRAY_NAME_CONVERT(sendcounts), sdispls, c_sendtypes, @@ -109,7 +111,7 @@ void ompi_ineighbor_alltoallw_f(char *sendbuf, MPI_Fint *sendcounts, rdispls, c_recvtypes, c_comm, &c_request); if (NULL != ierr) *ierr = OMPI_INT_2_FINT(c_ierr); - if (MPI_SUCCESS == c_ierr) *request = MPI_Request_c2f(c_request); + if (MPI_SUCCESS == c_ierr) *request = PMPI_Request_c2f(c_request); OMPI_ARRAY_FINT_2_INT_CLEANUP(sendcounts); OMPI_ARRAY_FINT_2_INT_CLEANUP(recvcounts); diff --git a/ompi/mpi/fortran/mpif-h/info_create_f.c b/ompi/mpi/fortran/mpif-h/info_create_f.c index 1ebeb795475..3c9dc287bd8 100644 --- a/ompi/mpi/fortran/mpif-h/info_create_f.c +++ b/ompi/mpi/fortran/mpif-h/info_create_f.c @@ -5,15 +5,17 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2011-2012 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -21,7 +23,8 @@ #include "ompi/mpi/fortran/mpif-h/bindings.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILE_LAYER +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak PMPI_INFO_CREATE = ompi_info_create_f #pragma weak pmpi_info_create = ompi_info_create_f #pragma weak pmpi_info_create_ = ompi_info_create_f @@ -29,7 +32,7 @@ #pragma weak PMPI_Info_create_f = ompi_info_create_f #pragma weak PMPI_Info_create_f08 = ompi_info_create_f -#elif OMPI_PROFILE_LAYER +#else OMPI_GENERATE_F77_BINDINGS (PMPI_INFO_CREATE, pmpi_info_create, pmpi_info_create_, @@ -38,6 +41,7 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_INFO_CREATE, (MPI_Fint *info, MPI_Fint *ierr), (info, ierr) ) #endif +#endif #if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_INFO_CREATE = ompi_info_create_f @@ -47,9 +51,8 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_INFO_CREATE, #pragma weak MPI_Info_create_f = ompi_info_create_f #pragma weak MPI_Info_create_f08 = ompi_info_create_f -#endif - -#if ! OPAL_HAVE_WEAK_SYMBOLS && ! OMPI_PROFILE_LAYER +#else +#if ! OMPI_BUILD_MPI_PROFILING OMPI_GENERATE_F77_BINDINGS (MPI_INFO_CREATE, mpi_info_create, mpi_info_create_, @@ -57,22 +60,21 @@ OMPI_GENERATE_F77_BINDINGS (MPI_INFO_CREATE, ompi_info_create_f, (MPI_Fint *info, MPI_Fint *ierr), (info, ierr) ) +#else +#define ompi_info_create_f pompi_info_create_f #endif - - -#if OMPI_PROFILE_LAYER && ! OPAL_HAVE_WEAK_SYMBOLS -#include "ompi/mpi/fortran/mpif-h/profile/defines.h" #endif + void ompi_info_create_f(MPI_Fint *info, MPI_Fint *ierr) { int c_ierr; MPI_Info c_info; - c_ierr = MPI_Info_create(&c_info); + c_ierr = PMPI_Info_create(&c_info); if (NULL != ierr) *ierr = OMPI_INT_2_FINT(c_ierr); if (MPI_SUCCESS == c_ierr) { - *info = MPI_Info_c2f(c_info); + *info = PMPI_Info_c2f(c_info); } } diff --git a/ompi/mpi/fortran/mpif-h/info_delete_f.c b/ompi/mpi/fortran/mpif-h/info_delete_f.c index e4d3be849b6..4197a53f0d0 100644 --- a/ompi/mpi/fortran/mpif-h/info_delete_f.c +++ b/ompi/mpi/fortran/mpif-h/info_delete_f.c @@ -5,15 +5,17 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2006-2012 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -24,7 +26,8 @@ #include "ompi/communicator/communicator.h" #include "ompi/mpi/fortran/base/strings.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILE_LAYER +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak PMPI_INFO_DELETE = ompi_info_delete_f #pragma weak pmpi_info_delete = ompi_info_delete_f #pragma weak pmpi_info_delete_ = ompi_info_delete_f @@ -32,7 +35,7 @@ #pragma weak PMPI_Info_delete_f = ompi_info_delete_f #pragma weak PMPI_Info_delete_f08 = ompi_info_delete_f -#elif OMPI_PROFILE_LAYER +#else OMPI_GENERATE_F77_BINDINGS (PMPI_INFO_DELETE, pmpi_info_delete, pmpi_info_delete_, @@ -41,6 +44,7 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_INFO_DELETE, (MPI_Fint *info, char *key, MPI_Fint *ierr, int key_len), (info, key, ierr, key_len) ) #endif +#endif #if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_INFO_DELETE = ompi_info_delete_f @@ -50,9 +54,8 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_INFO_DELETE, #pragma weak MPI_Info_delete_f = ompi_info_delete_f #pragma weak MPI_Info_delete_f08 = ompi_info_delete_f -#endif - -#if ! OPAL_HAVE_WEAK_SYMBOLS && ! OMPI_PROFILE_LAYER +#else +#if ! OMPI_BUILD_MPI_PROFILING OMPI_GENERATE_F77_BINDINGS (MPI_INFO_DELETE, mpi_info_delete, mpi_info_delete_, @@ -60,13 +63,12 @@ OMPI_GENERATE_F77_BINDINGS (MPI_INFO_DELETE, ompi_info_delete_f, (MPI_Fint *info, char *key, MPI_Fint *ierr, int key_len), (info, key, ierr, key_len) ) +#else +#define ompi_info_delete_f pompi_info_delete_f #endif - - -#if OMPI_PROFILE_LAYER && ! OPAL_HAVE_WEAK_SYMBOLS -#include "ompi/mpi/fortran/mpif-h/profile/defines.h" #endif + static const char FUNC_NAME[] = "MPI_INFO_DELETE"; /* Note that the key_len parameter is silently added by the Fortran @@ -85,9 +87,9 @@ void ompi_info_delete_f(MPI_Fint *info, char *key, MPI_Fint *ierr, int key_len) if (NULL != ierr) *ierr = OMPI_INT_2_FINT(c_ierr); return; } - c_info = MPI_Info_f2c(*info); - - c_ierr = MPI_Info_delete(c_info, c_key); + c_info = PMPI_Info_f2c(*info); + + c_ierr = PMPI_Info_delete(c_info, c_key); if (NULL != ierr) *ierr = OMPI_INT_2_FINT(c_ierr); free(c_key); diff --git a/ompi/mpi/fortran/mpif-h/info_dup_f.c b/ompi/mpi/fortran/mpif-h/info_dup_f.c index de43fbc86b5..304746c88ac 100644 --- a/ompi/mpi/fortran/mpif-h/info_dup_f.c +++ b/ompi/mpi/fortran/mpif-h/info_dup_f.c @@ -5,15 +5,17 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2011-2012 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -21,7 +23,8 @@ #include "ompi/mpi/fortran/mpif-h/bindings.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILE_LAYER +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak PMPI_INFO_DUP = ompi_info_dup_f #pragma weak pmpi_info_dup = ompi_info_dup_f #pragma weak pmpi_info_dup_ = ompi_info_dup_f @@ -29,7 +32,7 @@ #pragma weak PMPI_Info_dup_f = ompi_info_dup_f #pragma weak PMPI_Info_dup_f08 = ompi_info_dup_f -#elif OMPI_PROFILE_LAYER +#else OMPI_GENERATE_F77_BINDINGS (PMPI_INFO_DUP, pmpi_info_dup, pmpi_info_dup_, @@ -38,6 +41,7 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_INFO_DUP, (MPI_Fint *info, MPI_Fint *newinfo, MPI_Fint *ierr), (info, newinfo, ierr) ) #endif +#endif #if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_INFO_DUP = ompi_info_dup_f @@ -47,9 +51,8 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_INFO_DUP, #pragma weak MPI_Info_dup_f = ompi_info_dup_f #pragma weak MPI_Info_dup_f08 = ompi_info_dup_f -#endif - -#if ! OPAL_HAVE_WEAK_SYMBOLS && ! OMPI_PROFILE_LAYER +#else +#if ! OMPI_BUILD_MPI_PROFILING OMPI_GENERATE_F77_BINDINGS (MPI_INFO_DUP, mpi_info_dup, mpi_info_dup_, @@ -57,24 +60,23 @@ OMPI_GENERATE_F77_BINDINGS (MPI_INFO_DUP, ompi_info_dup_f, (MPI_Fint *info, MPI_Fint *newinfo, MPI_Fint *ierr), (info, newinfo, ierr) ) +#else +#define ompi_info_dup_f pompi_info_dup_f #endif - - -#if OMPI_PROFILE_LAYER && ! OPAL_HAVE_WEAK_SYMBOLS -#include "ompi/mpi/fortran/mpif-h/profile/defines.h" #endif + void ompi_info_dup_f(MPI_Fint *info, MPI_Fint *newinfo, MPI_Fint *ierr) { int c_ierr; MPI_Info c_info, c_new_info; - c_info = MPI_Info_f2c(*info); + c_info = PMPI_Info_f2c(*info); - c_ierr = MPI_Info_dup(c_info, &c_new_info); + c_ierr = PMPI_Info_dup(c_info, &c_new_info); if (NULL != ierr) *ierr = OMPI_INT_2_FINT(c_ierr); if (MPI_SUCCESS == c_ierr) { - *newinfo = MPI_Info_c2f(c_new_info); + *newinfo = PMPI_Info_c2f(c_new_info); } } diff --git a/ompi/mpi/fortran/mpif-h/info_free_f.c b/ompi/mpi/fortran/mpif-h/info_free_f.c index 732cf97e9fa..ad769602f6e 100644 --- a/ompi/mpi/fortran/mpif-h/info_free_f.c +++ b/ompi/mpi/fortran/mpif-h/info_free_f.c @@ -5,15 +5,17 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2011-2012 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -21,7 +23,8 @@ #include "ompi/mpi/fortran/mpif-h/bindings.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILE_LAYER +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak PMPI_INFO_FREE = ompi_info_free_f #pragma weak pmpi_info_free = ompi_info_free_f #pragma weak pmpi_info_free_ = ompi_info_free_f @@ -29,7 +32,7 @@ #pragma weak PMPI_Info_free_f = ompi_info_free_f #pragma weak PMPI_Info_free_f08 = ompi_info_free_f -#elif OMPI_PROFILE_LAYER +#else OMPI_GENERATE_F77_BINDINGS (PMPI_INFO_FREE, pmpi_info_free, pmpi_info_free_, @@ -38,6 +41,7 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_INFO_FREE, (MPI_Fint *info, MPI_Fint *ierr), (info, ierr) ) #endif +#endif #if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_INFO_FREE = ompi_info_free_f @@ -47,9 +51,8 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_INFO_FREE, #pragma weak MPI_Info_free_f = ompi_info_free_f #pragma weak MPI_Info_free_f08 = ompi_info_free_f -#endif - -#if ! OPAL_HAVE_WEAK_SYMBOLS && ! OMPI_PROFILE_LAYER +#else +#if ! OMPI_BUILD_MPI_PROFILING OMPI_GENERATE_F77_BINDINGS (MPI_INFO_FREE, mpi_info_free, mpi_info_free_, @@ -57,24 +60,23 @@ OMPI_GENERATE_F77_BINDINGS (MPI_INFO_FREE, ompi_info_free_f, (MPI_Fint *info, MPI_Fint *ierr), (info, ierr) ) +#else +#define ompi_info_free_f pompi_info_free_f #endif - - -#if OMPI_PROFILE_LAYER && ! OPAL_HAVE_WEAK_SYMBOLS -#include "ompi/mpi/fortran/mpif-h/profile/defines.h" #endif + void ompi_info_free_f(MPI_Fint *info, MPI_Fint *ierr) { int c_ierr; MPI_Info c_info; - c_info = MPI_Info_f2c(*info); + c_info = PMPI_Info_f2c(*info); - c_ierr = MPI_Info_free(&c_info); + c_ierr = PMPI_Info_free(&c_info); if (NULL != ierr) *ierr = OMPI_INT_2_FINT(c_ierr); if (MPI_SUCCESS == c_ierr) { - *info = MPI_Info_c2f(c_info); + *info = PMPI_Info_c2f(c_info); } } diff --git a/ompi/mpi/fortran/mpif-h/info_get_f.c b/ompi/mpi/fortran/mpif-h/info_get_f.c index bcfa9bbb50d..48082786fb4 100644 --- a/ompi/mpi/fortran/mpif-h/info_get_f.c +++ b/ompi/mpi/fortran/mpif-h/info_get_f.c @@ -5,15 +5,17 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2006-2012 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -24,7 +26,8 @@ #include "ompi/communicator/communicator.h" #include "ompi/mpi/fortran/base/strings.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILE_LAYER +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak PMPI_INFO_GET = ompi_info_get_f #pragma weak pmpi_info_get = ompi_info_get_f #pragma weak pmpi_info_get_ = ompi_info_get_f @@ -32,7 +35,7 @@ #pragma weak PMPI_Info_get_f = ompi_info_get_f #pragma weak PMPI_Info_get_f08 = ompi_info_get_f -#elif OMPI_PROFILE_LAYER +#else OMPI_GENERATE_F77_BINDINGS (PMPI_INFO_GET, pmpi_info_get, pmpi_info_get_, @@ -41,6 +44,7 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_INFO_GET, (MPI_Fint *info, char *key, MPI_Fint *valuelen, char *value, ompi_fortran_logical_t *flag, MPI_Fint *ierr, int key_len, int value_len), (info, key, valuelen, value, flag, ierr, key_len, value_len) ) #endif +#endif #if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_INFO_GET = ompi_info_get_f @@ -50,9 +54,8 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_INFO_GET, #pragma weak MPI_Info_get_f = ompi_info_get_f #pragma weak MPI_Info_get_f08 = ompi_info_get_f -#endif - -#if ! OPAL_HAVE_WEAK_SYMBOLS && ! OMPI_PROFILE_LAYER +#else +#if ! OMPI_BUILD_MPI_PROFILING OMPI_GENERATE_F77_BINDINGS (MPI_INFO_GET, mpi_info_get, mpi_info_get_, @@ -60,13 +63,12 @@ OMPI_GENERATE_F77_BINDINGS (MPI_INFO_GET, ompi_info_get_f, (MPI_Fint *info, char *key, MPI_Fint *valuelen, char *value, ompi_fortran_logical_t *flag, MPI_Fint *ierr, int key_len, int value_len), (info, key, valuelen, value, flag, ierr, key_len, value_len) ) +#else +#define ompi_info_get_f pompi_info_get_f #endif - - -#if OMPI_PROFILE_LAYER && ! OPAL_HAVE_WEAK_SYMBOLS -#include "ompi/mpi/fortran/mpif-h/profile/defines.h" #endif + static const char FUNC_NAME[] = "MPI_INFO_GET"; /* Note that the key_len and value_len parameters are silently added @@ -88,9 +90,9 @@ void ompi_info_get_f(MPI_Fint *info, char *key, MPI_Fint *valuelen, if (NULL != ierr) *ierr = OMPI_INT_2_FINT(c_ierr); return; } - c_info = MPI_Info_f2c(*info); + c_info = PMPI_Info_f2c(*info); - c_ierr = MPI_Info_get(c_info, c_key, + c_ierr = PMPI_Info_get(c_info, c_key, OMPI_FINT_2_INT(*valuelen), c_value, OMPI_LOGICAL_SINGLE_NAME_CONVERT(flag)); @@ -105,7 +107,7 @@ void ompi_info_get_f(MPI_Fint *info, char *key, MPI_Fint *valuelen, Fortran compilers have TRUE == 1). Note: use the full length of the Fortran string, not *valuelen. See comment in ompi/mpi/fortran/base/strings.c. */ - if (*flag && OMPI_SUCCESS != + if (*flag && OMPI_SUCCESS != (ret = ompi_fortran_string_c2f(c_value, value, value_len))) { c_ierr = OMPI_ERRHANDLER_INVOKE(MPI_COMM_WORLD, ret, FUNC_NAME); if (NULL != ierr) *ierr = OMPI_INT_2_FINT(c_ierr); diff --git a/ompi/mpi/fortran/mpif-h/info_get_nkeys_f.c b/ompi/mpi/fortran/mpif-h/info_get_nkeys_f.c index 1efc9882617..7cbcffea2c4 100644 --- a/ompi/mpi/fortran/mpif-h/info_get_nkeys_f.c +++ b/ompi/mpi/fortran/mpif-h/info_get_nkeys_f.c @@ -5,15 +5,17 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2011-2012 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -21,7 +23,8 @@ #include "ompi/mpi/fortran/mpif-h/bindings.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILE_LAYER +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak PMPI_INFO_GET_NKEYS = ompi_info_get_nkeys_f #pragma weak pmpi_info_get_nkeys = ompi_info_get_nkeys_f #pragma weak pmpi_info_get_nkeys_ = ompi_info_get_nkeys_f @@ -29,7 +32,7 @@ #pragma weak PMPI_Info_get_nkeys_f = ompi_info_get_nkeys_f #pragma weak PMPI_Info_get_nkeys_f08 = ompi_info_get_nkeys_f -#elif OMPI_PROFILE_LAYER +#else OMPI_GENERATE_F77_BINDINGS (PMPI_INFO_GET_NKEYS, pmpi_info_get_nkeys, pmpi_info_get_nkeys_, @@ -38,6 +41,7 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_INFO_GET_NKEYS, (MPI_Fint *info, MPI_Fint *nkeys, MPI_Fint *ierr), (info, nkeys, ierr) ) #endif +#endif #if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_INFO_GET_NKEYS = ompi_info_get_nkeys_f @@ -47,9 +51,8 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_INFO_GET_NKEYS, #pragma weak MPI_Info_get_nkeys_f = ompi_info_get_nkeys_f #pragma weak MPI_Info_get_nkeys_f08 = ompi_info_get_nkeys_f -#endif - -#if ! OPAL_HAVE_WEAK_SYMBOLS && ! OMPI_PROFILE_LAYER +#else +#if ! OMPI_BUILD_MPI_PROFILING OMPI_GENERATE_F77_BINDINGS (MPI_INFO_GET_NKEYS, mpi_info_get_nkeys, mpi_info_get_nkeys_, @@ -57,22 +60,21 @@ OMPI_GENERATE_F77_BINDINGS (MPI_INFO_GET_NKEYS, ompi_info_get_nkeys_f, (MPI_Fint *info, MPI_Fint *nkeys, MPI_Fint *ierr), (info, nkeys, ierr) ) +#else +#define ompi_info_get_nkeys_f pompi_info_get_nkeys_f #endif - - -#if OMPI_PROFILE_LAYER && ! OPAL_HAVE_WEAK_SYMBOLS -#include "ompi/mpi/fortran/mpif-h/profile/defines.h" #endif + void ompi_info_get_nkeys_f(MPI_Fint *info, MPI_Fint *nkeys, MPI_Fint *ierr) { int c_ierr; MPI_Info c_info; OMPI_SINGLE_NAME_DECL(nkeys); - c_info = MPI_Info_f2c(*info); - - c_ierr = MPI_Info_get_nkeys(c_info, OMPI_SINGLE_NAME_CONVERT(nkeys)); + c_info = PMPI_Info_f2c(*info); + + c_ierr = PMPI_Info_get_nkeys(c_info, OMPI_SINGLE_NAME_CONVERT(nkeys)); if (NULL != ierr) *ierr = OMPI_INT_2_FINT(c_ierr); if (MPI_SUCCESS == c_ierr) { diff --git a/ompi/mpi/fortran/mpif-h/info_get_nthkey_f.c b/ompi/mpi/fortran/mpif-h/info_get_nthkey_f.c index e764aa1d11f..31fdcdc24b5 100644 --- a/ompi/mpi/fortran/mpif-h/info_get_nthkey_f.c +++ b/ompi/mpi/fortran/mpif-h/info_get_nthkey_f.c @@ -5,15 +5,17 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2006-2012 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -24,7 +26,8 @@ #include "ompi/communicator/communicator.h" #include "ompi/mpi/fortran/base/strings.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILE_LAYER +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak PMPI_INFO_GET_NTHKEY = ompi_info_get_nthkey_f #pragma weak pmpi_info_get_nthkey = ompi_info_get_nthkey_f #pragma weak pmpi_info_get_nthkey_ = ompi_info_get_nthkey_f @@ -32,7 +35,7 @@ #pragma weak PMPI_Info_get_nthkey_f = ompi_info_get_nthkey_f #pragma weak PMPI_Info_get_nthkey_f08 = ompi_info_get_nthkey_f -#elif OMPI_PROFILE_LAYER +#else OMPI_GENERATE_F77_BINDINGS (PMPI_INFO_GET_NTHKEY, pmpi_info_get_nthkey, pmpi_info_get_nthkey_, @@ -41,6 +44,7 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_INFO_GET_NTHKEY, (MPI_Fint *info, MPI_Fint *n, char *key, MPI_Fint *ierr, int key_len), (info, n, key, ierr, key_len) ) #endif +#endif #if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_INFO_GET_NTHKEY = ompi_info_get_nthkey_f @@ -50,9 +54,8 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_INFO_GET_NTHKEY, #pragma weak MPI_Info_get_nthkey_f = ompi_info_get_nthkey_f #pragma weak MPI_Info_get_nthkey_f08 = ompi_info_get_nthkey_f -#endif - -#if ! OPAL_HAVE_WEAK_SYMBOLS && ! OMPI_PROFILE_LAYER +#else +#if ! OMPI_BUILD_MPI_PROFILING OMPI_GENERATE_F77_BINDINGS (MPI_INFO_GET_NTHKEY, mpi_info_get_nthkey, mpi_info_get_nthkey_, @@ -60,13 +63,12 @@ OMPI_GENERATE_F77_BINDINGS (MPI_INFO_GET_NTHKEY, ompi_info_get_nthkey_f, (MPI_Fint *info, MPI_Fint *n, char *key, MPI_Fint *ierr, int key_len), (info, n, key, ierr, key_len) ) +#else +#define ompi_info_get_nthkey_f pompi_info_get_nthkey_f #endif - - -#if OMPI_PROFILE_LAYER && ! OPAL_HAVE_WEAK_SYMBOLS -#include "ompi/mpi/fortran/mpif-h/profile/defines.h" #endif + static const char FUNC_NAME[] = "MPI_INFO_GET_NTHKEY"; /* Note that the key_len parameter is silently added by the Fortran @@ -81,13 +83,13 @@ void ompi_info_get_nthkey_f(MPI_Fint *info, MPI_Fint *n, char *key, MPI_Info c_info; char c_key[MPI_MAX_INFO_KEY + 1]; - c_info = MPI_Info_f2c(*info); - - c_ierr = MPI_Info_get_nthkey(c_info, + c_info = PMPI_Info_f2c(*info); + + c_ierr = PMPI_Info_get_nthkey(c_info, OMPI_FINT_2_INT(*n), c_key); if (NULL != ierr) *ierr = OMPI_INT_2_FINT(c_ierr); - + if (OMPI_SUCCESS != (ret = ompi_fortran_string_c2f(c_key, key, key_len))) { c_ierr = OMPI_ERRHANDLER_INVOKE(MPI_COMM_WORLD, ret, FUNC_NAME); if (NULL != ierr) *ierr = OMPI_INT_2_FINT(c_ierr); diff --git a/ompi/mpi/fortran/mpif-h/info_get_valuelen_f.c b/ompi/mpi/fortran/mpif-h/info_get_valuelen_f.c index 16db83e8724..2b2b68567a7 100644 --- a/ompi/mpi/fortran/mpif-h/info_get_valuelen_f.c +++ b/ompi/mpi/fortran/mpif-h/info_get_valuelen_f.c @@ -5,15 +5,17 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2006-2012 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -24,7 +26,8 @@ #include "ompi/communicator/communicator.h" #include "ompi/mpi/fortran/base/strings.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILE_LAYER +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak PMPI_INFO_GET_VALUELEN = ompi_info_get_valuelen_f #pragma weak pmpi_info_get_valuelen = ompi_info_get_valuelen_f #pragma weak pmpi_info_get_valuelen_ = ompi_info_get_valuelen_f @@ -32,7 +35,7 @@ #pragma weak PMPI_Info_get_valuelen_f = ompi_info_get_valuelen_f #pragma weak PMPI_Info_get_valuelen_f08 = ompi_info_get_valuelen_f -#elif OMPI_PROFILE_LAYER +#else OMPI_GENERATE_F77_BINDINGS (PMPI_INFO_GET_VALUELEN, pmpi_info_get_valuelen, pmpi_info_get_valuelen_, @@ -41,6 +44,7 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_INFO_GET_VALUELEN, (MPI_Fint *info, char *key, MPI_Fint *valuelen, ompi_fortran_logical_t *flag, MPI_Fint *ierr, int key_len), (info, key, valuelen, flag, ierr, key_len) ) #endif +#endif #if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_INFO_GET_VALUELEN = ompi_info_get_valuelen_f @@ -50,9 +54,8 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_INFO_GET_VALUELEN, #pragma weak MPI_Info_get_valuelen_f = ompi_info_get_valuelen_f #pragma weak MPI_Info_get_valuelen_f08 = ompi_info_get_valuelen_f -#endif - -#if ! OPAL_HAVE_WEAK_SYMBOLS && ! OMPI_PROFILE_LAYER +#else +#if ! OMPI_BUILD_MPI_PROFILING OMPI_GENERATE_F77_BINDINGS (MPI_INFO_GET_VALUELEN, mpi_info_get_valuelen, mpi_info_get_valuelen_, @@ -60,13 +63,12 @@ OMPI_GENERATE_F77_BINDINGS (MPI_INFO_GET_VALUELEN, ompi_info_get_valuelen_f, (MPI_Fint *info, char *key, MPI_Fint *valuelen, ompi_fortran_logical_t *flag, MPI_Fint *ierr, int key_len), (info, key, valuelen, flag, ierr, key_len) ) +#else +#define ompi_info_get_valuelen_f pompi_info_get_valuelen_f #endif - - -#if OMPI_PROFILE_LAYER && ! OPAL_HAVE_WEAK_SYMBOLS -#include "ompi/mpi/fortran/mpif-h/profile/defines.h" #endif + static const char FUNC_NAME[] = "MPI_INFO_GET_VALUELEN"; /* Note that the key_len parameter is silently added by the Fortran @@ -89,8 +91,8 @@ void ompi_info_get_valuelen_f(MPI_Fint *info, char *key, if (NULL != ierr) *ierr = OMPI_INT_2_FINT(c_ierr); return; } - c_info = MPI_Info_f2c(*info); - c_ierr = MPI_Info_get_valuelen(c_info, c_key, + c_info = PMPI_Info_f2c(*info); + c_ierr = PMPI_Info_get_valuelen(c_info, c_key, OMPI_SINGLE_NAME_CONVERT(valuelen), OMPI_LOGICAL_SINGLE_NAME_CONVERT(flag)); if (NULL != ierr) *ierr = OMPI_INT_2_FINT(c_ierr); diff --git a/ompi/mpi/fortran/mpif-h/info_set_f.c b/ompi/mpi/fortran/mpif-h/info_set_f.c index 54f822abfe2..a6eca5722e5 100644 --- a/ompi/mpi/fortran/mpif-h/info_set_f.c +++ b/ompi/mpi/fortran/mpif-h/info_set_f.c @@ -5,15 +5,17 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2006-2012 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -24,7 +26,8 @@ #include "ompi/communicator/communicator.h" #include "ompi/mpi/fortran/base/strings.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILE_LAYER +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak PMPI_INFO_SET = ompi_info_set_f #pragma weak pmpi_info_set = ompi_info_set_f #pragma weak pmpi_info_set_ = ompi_info_set_f @@ -32,7 +35,7 @@ #pragma weak PMPI_Info_set_f = ompi_info_set_f #pragma weak PMPI_Info_set_f08 = ompi_info_set_f -#elif OMPI_PROFILE_LAYER +#else OMPI_GENERATE_F77_BINDINGS (PMPI_INFO_SET, pmpi_info_set, pmpi_info_set_, @@ -41,6 +44,7 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_INFO_SET, (MPI_Fint *info, char *key, char *value, MPI_Fint *ierr, int key_len, int value_len), (info, key, value, ierr, key_len, value_len) ) #endif +#endif #if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_INFO_SET = ompi_info_set_f @@ -50,9 +54,8 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_INFO_SET, #pragma weak MPI_Info_set_f = ompi_info_set_f #pragma weak MPI_Info_set_f08 = ompi_info_set_f -#endif - -#if ! OPAL_HAVE_WEAK_SYMBOLS && ! OMPI_PROFILE_LAYER +#else +#if ! OMPI_BUILD_MPI_PROFILING OMPI_GENERATE_F77_BINDINGS (MPI_INFO_SET, mpi_info_set, mpi_info_set_, @@ -60,13 +63,12 @@ OMPI_GENERATE_F77_BINDINGS (MPI_INFO_SET, ompi_info_set_f, (MPI_Fint *info, char *key, char *value, MPI_Fint *ierr, int key_len, int value_len), (info, key, value, ierr, key_len, value_len) ) +#else +#define ompi_info_set_f pompi_info_set_f #endif - - -#if OMPI_PROFILE_LAYER && ! OPAL_HAVE_WEAK_SYMBOLS -#include "ompi/mpi/fortran/mpif-h/profile/defines.h" #endif + static const char FUNC_NAME[] = "MPI_INFO_SET"; /* Note that the key_len and value_len parameters are silently added @@ -82,7 +84,7 @@ void ompi_info_set_f(MPI_Fint *info, char *key, char *value, MPI_Fint *ierr, char *c_key = NULL, *c_value = NULL; if (OMPI_SUCCESS != (ret = ompi_fortran_string_f2c(key, key_len, &c_key)) || - OMPI_SUCCESS != (ret = ompi_fortran_string_f2c(value, value_len, + OMPI_SUCCESS != (ret = ompi_fortran_string_f2c(value, value_len, &c_value))) { c_ierr = OMPI_ERRHANDLER_INVOKE(MPI_COMM_WORLD, ret, FUNC_NAME); if (NULL != ierr) *ierr = OMPI_INT_2_FINT(c_ierr); @@ -91,9 +93,9 @@ void ompi_info_set_f(MPI_Fint *info, char *key, char *value, MPI_Fint *ierr, } return; } - c_info = MPI_Info_f2c(*info); + c_info = PMPI_Info_f2c(*info); - c_ierr = MPI_Info_set(c_info, c_key, c_value); + c_ierr = PMPI_Info_set(c_info, c_key, c_value); if (NULL != ierr) *ierr = OMPI_INT_2_FINT(c_ierr); free(c_key); diff --git a/ompi/mpi/fortran/mpif-h/init_f.c b/ompi/mpi/fortran/mpif-h/init_f.c index c5b10c69388..0d3b6d810c9 100644 --- a/ompi/mpi/fortran/mpif-h/init_f.c +++ b/ompi/mpi/fortran/mpif-h/init_f.c @@ -5,16 +5,18 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2007-2012 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2007 Sun Microsystems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -30,7 +32,8 @@ static const char ident[] = OMPI_IDENT_STRING; #include "ompi/mpi/fortran/mpif-h/bindings.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILE_LAYER +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak PMPI_INIT = ompi_init_f #pragma weak pmpi_init = ompi_init_f #pragma weak pmpi_init_ = ompi_init_f @@ -38,7 +41,7 @@ static const char ident[] = OMPI_IDENT_STRING; #pragma weak PMPI_Init_f = ompi_init_f #pragma weak PMPI_Init_f08 = ompi_init_f -#elif OMPI_PROFILE_LAYER +#else OMPI_GENERATE_F77_BINDINGS (PMPI_INIT, pmpi_init, pmpi_init_, @@ -47,6 +50,7 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_INIT, (MPI_Fint *ierr), (ierr) ) #endif +#endif #if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_INIT = ompi_init_f @@ -56,9 +60,8 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_INIT, #pragma weak MPI_Init_f = ompi_init_f #pragma weak MPI_Init_f08 = ompi_init_f -#endif - -#if ! OPAL_HAVE_WEAK_SYMBOLS && ! OMPI_PROFILE_LAYER +#else +#if ! OMPI_BUILD_MPI_PROFILING OMPI_GENERATE_F77_BINDINGS (MPI_INIT, mpi_init, mpi_init_, @@ -66,19 +69,18 @@ OMPI_GENERATE_F77_BINDINGS (MPI_INIT, ompi_init_f, (MPI_Fint *ierr), (ierr) ) +#else +#define ompi_init_f pompi_init_f #endif - - -#if OMPI_PROFILE_LAYER && ! OPAL_HAVE_WEAK_SYMBOLS -#include "ompi/mpi/fortran/mpif-h/profile/defines.h" #endif + void ompi_init_f( MPI_Fint *ierr ) { int c_ierr; int argc = 0; char **argv = NULL; - c_ierr = MPI_Init( &argc, &argv ); + c_ierr = PMPI_Init( &argc, &argv ); if (NULL != ierr) *ierr = OMPI_INT_2_FINT(c_ierr); } diff --git a/ompi/mpi/fortran/mpif-h/init_thread_f.c b/ompi/mpi/fortran/mpif-h/init_thread_f.c index be6f625b90d..2f6acbb44de 100644 --- a/ompi/mpi/fortran/mpif-h/init_thread_f.c +++ b/ompi/mpi/fortran/mpif-h/init_thread_f.c @@ -5,15 +5,17 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2011-2012 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -21,7 +23,8 @@ #include "ompi/mpi/fortran/mpif-h/bindings.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILE_LAYER +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak PMPI_INIT_THREAD = ompi_init_thread_f #pragma weak pmpi_init_thread = ompi_init_thread_f #pragma weak pmpi_init_thread_ = ompi_init_thread_f @@ -29,7 +32,7 @@ #pragma weak PMPI_Init_thread_f = ompi_init_thread_f #pragma weak PMPI_Init_thread_f08 = ompi_init_thread_f -#elif OMPI_PROFILE_LAYER +#else OMPI_GENERATE_F77_BINDINGS (PMPI_INIT_THREAD, pmpi_init_thread, pmpi_init_thread_, @@ -38,6 +41,7 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_INIT_THREAD, (MPI_Fint *required, MPI_Fint *provided, MPI_Fint *ierr), (required, provided, ierr) ) #endif +#endif #if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_INIT_THREAD = ompi_init_thread_f @@ -47,9 +51,8 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_INIT_THREAD, #pragma weak MPI_Init_thread_f = ompi_init_thread_f #pragma weak MPI_Init_thread_f08 = ompi_init_thread_f -#endif - -#if ! OPAL_HAVE_WEAK_SYMBOLS && ! OMPI_PROFILE_LAYER +#else +#if ! OMPI_BUILD_MPI_PROFILING OMPI_GENERATE_F77_BINDINGS (MPI_INIT_THREAD, mpi_init_thread, mpi_init_thread_, @@ -57,13 +60,12 @@ OMPI_GENERATE_F77_BINDINGS (MPI_INIT_THREAD, ompi_init_thread_f, (MPI_Fint *required, MPI_Fint *provided, MPI_Fint *ierr), (required, provided, ierr) ) +#else +#define ompi_init_thread_f pompi_init_thread_f #endif - - -#if OMPI_PROFILE_LAYER && ! OPAL_HAVE_WEAK_SYMBOLS -#include "ompi/mpi/fortran/mpif-h/profile/defines.h" #endif + void ompi_init_thread_f( MPI_Fint *required, MPI_Fint *provided, MPI_Fint *ierr ) { int c_ierr; @@ -71,7 +73,7 @@ void ompi_init_thread_f( MPI_Fint *required, MPI_Fint *provided, MPI_Fint *ierr char** argv = NULL; OMPI_SINGLE_NAME_DECL(provided); - c_ierr = MPI_Init_thread(&argc, &argv, + c_ierr = PMPI_Init_thread(&argc, &argv, OMPI_FINT_2_INT(*required), OMPI_SINGLE_NAME_CONVERT(provided)); if (NULL != ierr) *ierr = OMPI_INT_2_FINT(c_ierr); diff --git a/ompi/mpi/fortran/mpif-h/initialized_f.c b/ompi/mpi/fortran/mpif-h/initialized_f.c index 212331fa6c7..4489e91b6c2 100644 --- a/ompi/mpi/fortran/mpif-h/initialized_f.c +++ b/ompi/mpi/fortran/mpif-h/initialized_f.c @@ -5,15 +5,17 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2011-2012 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -21,7 +23,8 @@ #include "ompi/mpi/fortran/mpif-h/bindings.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILE_LAYER +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak PMPI_INITIALIZED = ompi_initialized_f #pragma weak pmpi_initialized = ompi_initialized_f #pragma weak pmpi_initialized_ = ompi_initialized_f @@ -29,7 +32,7 @@ #pragma weak PMPI_Initialized_f = ompi_initialized_f #pragma weak PMPI_Initialized_f08 = ompi_initialized_f -#elif OMPI_PROFILE_LAYER +#else OMPI_GENERATE_F77_BINDINGS (PMPI_INITIALIZED, pmpi_initialized, pmpi_initialized_, @@ -38,6 +41,7 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_INITIALIZED, (ompi_fortran_logical_t *flag, MPI_Fint *ierr), (flag, ierr) ) #endif +#endif #if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_INITIALIZED = ompi_initialized_f @@ -47,9 +51,8 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_INITIALIZED, #pragma weak MPI_Initialized_f = ompi_initialized_f #pragma weak MPI_Initialized_f08 = ompi_initialized_f -#endif - -#if ! OPAL_HAVE_WEAK_SYMBOLS && ! OMPI_PROFILE_LAYER +#else +#if ! OMPI_BUILD_MPI_PROFILING OMPI_GENERATE_F77_BINDINGS (MPI_INITIALIZED, mpi_initialized, mpi_initialized_, @@ -57,18 +60,17 @@ OMPI_GENERATE_F77_BINDINGS (MPI_INITIALIZED, ompi_initialized_f, (ompi_fortran_logical_t *flag, MPI_Fint *ierr), (flag, ierr) ) +#else +#define ompi_initialized_f pompi_initialized_f #endif - - -#if OMPI_PROFILE_LAYER && ! OPAL_HAVE_WEAK_SYMBOLS -#include "ompi/mpi/fortran/mpif-h/profile/defines.h" #endif + void ompi_initialized_f(ompi_fortran_logical_t *flag, MPI_Fint *ierr) { int c_ierr; OMPI_LOGICAL_NAME_DECL(flag); - c_ierr = MPI_Initialized(OMPI_LOGICAL_SINGLE_NAME_CONVERT(flag)); + c_ierr = PMPI_Initialized(OMPI_LOGICAL_SINGLE_NAME_CONVERT(flag)); if (NULL != ierr) *ierr = OMPI_INT_2_FINT(c_ierr); if (MPI_SUCCESS == c_ierr) { diff --git a/ompi/mpi/fortran/mpif-h/intercomm_create_f.c b/ompi/mpi/fortran/mpif-h/intercomm_create_f.c index 63861e1386c..79cddd8ba50 100644 --- a/ompi/mpi/fortran/mpif-h/intercomm_create_f.c +++ b/ompi/mpi/fortran/mpif-h/intercomm_create_f.c @@ -5,15 +5,17 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2011-2012 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -21,7 +23,8 @@ #include "ompi/mpi/fortran/mpif-h/bindings.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILE_LAYER +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak PMPI_INTERCOMM_CREATE = ompi_intercomm_create_f #pragma weak pmpi_intercomm_create = ompi_intercomm_create_f #pragma weak pmpi_intercomm_create_ = ompi_intercomm_create_f @@ -29,7 +32,7 @@ #pragma weak PMPI_Intercomm_create_f = ompi_intercomm_create_f #pragma weak PMPI_Intercomm_create_f08 = ompi_intercomm_create_f -#elif OMPI_PROFILE_LAYER +#else OMPI_GENERATE_F77_BINDINGS (PMPI_INTERCOMM_CREATE, pmpi_intercomm_create, pmpi_intercomm_create_, @@ -38,6 +41,7 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_INTERCOMM_CREATE, (MPI_Fint *local_comm, MPI_Fint *local_leader, MPI_Fint *bridge_comm, MPI_Fint *remote_leader, MPI_Fint *tag, MPI_Fint *newintercomm, MPI_Fint *ierr), (local_comm, local_leader, bridge_comm, remote_leader, tag, newintercomm, ierr) ) #endif +#endif #if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_INTERCOMM_CREATE = ompi_intercomm_create_f @@ -47,9 +51,8 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_INTERCOMM_CREATE, #pragma weak MPI_Intercomm_create_f = ompi_intercomm_create_f #pragma weak MPI_Intercomm_create_f08 = ompi_intercomm_create_f -#endif - -#if ! OPAL_HAVE_WEAK_SYMBOLS && ! OMPI_PROFILE_LAYER +#else +#if ! OMPI_BUILD_MPI_PROFILING OMPI_GENERATE_F77_BINDINGS (MPI_INTERCOMM_CREATE, mpi_intercomm_create, mpi_intercomm_create_, @@ -57,26 +60,25 @@ OMPI_GENERATE_F77_BINDINGS (MPI_INTERCOMM_CREATE, ompi_intercomm_create_f, (MPI_Fint *local_comm, MPI_Fint *local_leader, MPI_Fint *bridge_comm, MPI_Fint *remote_leader, MPI_Fint *tag, MPI_Fint *newintercomm, MPI_Fint *ierr), (local_comm, local_leader, bridge_comm, remote_leader, tag, newintercomm, ierr) ) +#else +#define ompi_intercomm_create_f pompi_intercomm_create_f #endif - - -#if OMPI_PROFILE_LAYER && ! OPAL_HAVE_WEAK_SYMBOLS -#include "ompi/mpi/fortran/mpif-h/profile/defines.h" #endif + void ompi_intercomm_create_f(MPI_Fint *local_comm, MPI_Fint *local_leader, - MPI_Fint *bridge_comm, + MPI_Fint *bridge_comm, MPI_Fint *remote_leader, MPI_Fint *tag, - MPI_Fint *newintercomm, + MPI_Fint *newintercomm, MPI_Fint *ierr) { int c_ierr; MPI_Comm c_newcomm; - MPI_Comm c_local_comm = MPI_Comm_f2c (*local_comm ); - MPI_Comm c_bridge_comm = MPI_Comm_f2c (*bridge_comm); + MPI_Comm c_local_comm = PMPI_Comm_f2c (*local_comm ); + MPI_Comm c_bridge_comm = PMPI_Comm_f2c (*bridge_comm); - c_ierr = MPI_Intercomm_create(c_local_comm, - OMPI_FINT_2_INT(*local_leader), + c_ierr = PMPI_Intercomm_create(c_local_comm, + OMPI_FINT_2_INT(*local_leader), c_bridge_comm, OMPI_FINT_2_INT(*remote_leader), OMPI_FINT_2_INT(*tag), @@ -84,6 +86,6 @@ void ompi_intercomm_create_f(MPI_Fint *local_comm, MPI_Fint *local_leader, if (NULL != ierr) *ierr = OMPI_INT_2_FINT(c_ierr); if (MPI_SUCCESS == c_ierr) { - *newintercomm = MPI_Comm_c2f (c_newcomm); + *newintercomm = PMPI_Comm_c2f (c_newcomm); } } diff --git a/ompi/mpi/fortran/mpif-h/intercomm_merge_f.c b/ompi/mpi/fortran/mpif-h/intercomm_merge_f.c index 6800d40879d..6c07c66580b 100644 --- a/ompi/mpi/fortran/mpif-h/intercomm_merge_f.c +++ b/ompi/mpi/fortran/mpif-h/intercomm_merge_f.c @@ -5,15 +5,17 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2011-2012 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -21,7 +23,8 @@ #include "ompi/mpi/fortran/mpif-h/bindings.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILE_LAYER +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak PMPI_INTERCOMM_MERGE = ompi_intercomm_merge_f #pragma weak pmpi_intercomm_merge = ompi_intercomm_merge_f #pragma weak pmpi_intercomm_merge_ = ompi_intercomm_merge_f @@ -29,7 +32,7 @@ #pragma weak PMPI_Intercomm_merge_f = ompi_intercomm_merge_f #pragma weak PMPI_Intercomm_merge_f08 = ompi_intercomm_merge_f -#elif OMPI_PROFILE_LAYER +#else OMPI_GENERATE_F77_BINDINGS (PMPI_INTERCOMM_MERGE, pmpi_intercomm_merge, pmpi_intercomm_merge_, @@ -38,6 +41,7 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_INTERCOMM_MERGE, (MPI_Fint *intercomm, ompi_fortran_logical_t *high, MPI_Fint *newintercomm, MPI_Fint *ierr), (intercomm, high, newintercomm, ierr) ) #endif +#endif #if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_INTERCOMM_MERGE = ompi_intercomm_merge_f @@ -47,9 +51,8 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_INTERCOMM_MERGE, #pragma weak MPI_Intercomm_merge_f = ompi_intercomm_merge_f #pragma weak MPI_Intercomm_merge_f08 = ompi_intercomm_merge_f -#endif - -#if ! OPAL_HAVE_WEAK_SYMBOLS && ! OMPI_PROFILE_LAYER +#else +#if ! OMPI_BUILD_MPI_PROFILING OMPI_GENERATE_F77_BINDINGS (MPI_INTERCOMM_MERGE, mpi_intercomm_merge, mpi_intercomm_merge_, @@ -57,26 +60,24 @@ OMPI_GENERATE_F77_BINDINGS (MPI_INTERCOMM_MERGE, ompi_intercomm_merge_f, (MPI_Fint *intercomm, ompi_fortran_logical_t *high, MPI_Fint *newintercomm, MPI_Fint *ierr), (intercomm, high, newintercomm, ierr) ) +#else +#define ompi_intercomm_merge_f pompi_intercomm_merge_f #endif - - -#if OMPI_PROFILE_LAYER && ! OPAL_HAVE_WEAK_SYMBOLS -#include "ompi/mpi/fortran/mpif-h/profile/defines.h" #endif void ompi_intercomm_merge_f(MPI_Fint *intercomm, ompi_fortran_logical_t *high, - MPI_Fint *newintracomm, + MPI_Fint *newintracomm, MPI_Fint *ierr) { int c_ierr; MPI_Comm c_newcomm; - MPI_Comm c_intercomm = MPI_Comm_f2c(*intercomm); + MPI_Comm c_intercomm = PMPI_Comm_f2c(*intercomm); - c_ierr = MPI_Intercomm_merge (c_intercomm, OMPI_LOGICAL_2_INT(*high), + c_ierr = PMPI_Intercomm_merge(c_intercomm, OMPI_LOGICAL_2_INT(*high), &c_newcomm); if (NULL != ierr) *ierr = OMPI_INT_2_FINT(c_ierr); if (MPI_SUCCESS == c_ierr) { - *newintracomm = MPI_Comm_c2f (c_newcomm); + *newintracomm = PMPI_Comm_c2f (c_newcomm); } } diff --git a/ompi/mpi/fortran/mpif-h/iprobe_f.c b/ompi/mpi/fortran/mpif-h/iprobe_f.c index c5d03797e26..4b9f76a3dc4 100644 --- a/ompi/mpi/fortran/mpif-h/iprobe_f.c +++ b/ompi/mpi/fortran/mpif-h/iprobe_f.c @@ -5,16 +5,18 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2011-2012 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2012 Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -25,7 +27,8 @@ #include "ompi/mpi/fortran/base/constants.h" #include "ompi/communicator/communicator.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILE_LAYER +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak PMPI_IPROBE = ompi_iprobe_f #pragma weak pmpi_iprobe = ompi_iprobe_f #pragma weak pmpi_iprobe_ = ompi_iprobe_f @@ -33,7 +36,7 @@ #pragma weak PMPI_Iprobe_f = ompi_iprobe_f #pragma weak PMPI_Iprobe_f08 = ompi_iprobe_f -#elif OMPI_PROFILE_LAYER +#else OMPI_GENERATE_F77_BINDINGS (PMPI_IPROBE, pmpi_iprobe, pmpi_iprobe_, @@ -42,6 +45,7 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_IPROBE, (MPI_Fint *source, MPI_Fint *tag, MPI_Fint *comm, ompi_fortran_logical_t *flag, MPI_Fint *status, MPI_Fint *ierr), (source, tag, comm, flag, status, ierr) ) #endif +#endif #if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_IPROBE = ompi_iprobe_f @@ -51,9 +55,8 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_IPROBE, #pragma weak MPI_Iprobe_f = ompi_iprobe_f #pragma weak MPI_Iprobe_f08 = ompi_iprobe_f -#endif - -#if ! OPAL_HAVE_WEAK_SYMBOLS && ! OMPI_PROFILE_LAYER +#else +#if ! OMPI_BUILD_MPI_PROFILING OMPI_GENERATE_F77_BINDINGS (MPI_IPROBE, mpi_iprobe, mpi_iprobe_, @@ -61,13 +64,12 @@ OMPI_GENERATE_F77_BINDINGS (MPI_IPROBE, ompi_iprobe_f, (MPI_Fint *source, MPI_Fint *tag, MPI_Fint *comm, ompi_fortran_logical_t *flag, MPI_Fint *status, MPI_Fint *ierr), (source, tag, comm, flag, status, ierr) ) +#else +#define ompi_iprobe_f pompi_iprobe_f #endif - - -#if OMPI_PROFILE_LAYER && ! OPAL_HAVE_WEAK_SYMBOLS -#include "ompi/mpi/fortran/mpif-h/profile/defines.h" #endif + void ompi_iprobe_f(MPI_Fint *source, MPI_Fint *tag, MPI_Fint *comm, ompi_fortran_logical_t *flag, MPI_Fint *status, MPI_Fint *ierr) { @@ -76,11 +78,11 @@ void ompi_iprobe_f(MPI_Fint *source, MPI_Fint *tag, MPI_Fint *comm, OMPI_FORTRAN_STATUS_DECLARATION(c_status,c_status2) OMPI_LOGICAL_NAME_DECL(flag); - c_comm = MPI_Comm_f2c (*comm); + c_comm = PMPI_Comm_f2c (*comm); OMPI_FORTRAN_STATUS_SET_POINTER(c_status,c_status2,status) - c_ierr = MPI_Iprobe(OMPI_FINT_2_INT(*source), + c_ierr = PMPI_Iprobe(OMPI_FINT_2_INT(*source), OMPI_FINT_2_INT(*tag), c_comm, OMPI_LOGICAL_SINGLE_NAME_CONVERT(flag), c_status); diff --git a/ompi/mpi/fortran/mpif-h/irecv_f.c b/ompi/mpi/fortran/mpif-h/irecv_f.c index 4fde92a12d1..5703f226d95 100644 --- a/ompi/mpi/fortran/mpif-h/irecv_f.c +++ b/ompi/mpi/fortran/mpif-h/irecv_f.c @@ -5,15 +5,17 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2011-2012 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -22,7 +24,8 @@ #include "ompi/mpi/fortran/mpif-h/bindings.h" #include "ompi/mpi/fortran/base/constants.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILE_LAYER +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak PMPI_IRECV = ompi_irecv_f #pragma weak pmpi_irecv = ompi_irecv_f #pragma weak pmpi_irecv_ = ompi_irecv_f @@ -30,7 +33,7 @@ #pragma weak PMPI_Irecv_f = ompi_irecv_f #pragma weak PMPI_Irecv_f08 = ompi_irecv_f -#elif OMPI_PROFILE_LAYER +#else OMPI_GENERATE_F77_BINDINGS (PMPI_IRECV, pmpi_irecv, pmpi_irecv_, @@ -39,6 +42,7 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_IRECV, (char *buf, MPI_Fint *count, MPI_Fint *datatype, MPI_Fint *source, MPI_Fint *tag, MPI_Fint *comm, MPI_Fint *request, MPI_Fint *ierr), (buf, count, datatype, source, tag, comm, request, ierr) ) #endif +#endif #if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_IRECV = ompi_irecv_f @@ -48,9 +52,8 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_IRECV, #pragma weak MPI_Irecv_f = ompi_irecv_f #pragma weak MPI_Irecv_f08 = ompi_irecv_f -#endif - -#if ! OPAL_HAVE_WEAK_SYMBOLS && ! OMPI_PROFILE_LAYER +#else +#if ! OMPI_BUILD_MPI_PROFILING OMPI_GENERATE_F77_BINDINGS (MPI_IRECV, mpi_irecv, mpi_irecv_, @@ -58,30 +61,29 @@ OMPI_GENERATE_F77_BINDINGS (MPI_IRECV, ompi_irecv_f, (char *buf, MPI_Fint *count, MPI_Fint *datatype, MPI_Fint *source, MPI_Fint *tag, MPI_Fint *comm, MPI_Fint *request, MPI_Fint *ierr), (buf, count, datatype, source, tag, comm, request, ierr) ) +#else +#define ompi_irecv_f pompi_irecv_f #endif - - -#if OMPI_PROFILE_LAYER && ! OPAL_HAVE_WEAK_SYMBOLS -#include "ompi/mpi/fortran/mpif-h/profile/defines.h" #endif + void ompi_irecv_f(char *buf, MPI_Fint *count, MPI_Fint *datatype, MPI_Fint *source, MPI_Fint *tag, MPI_Fint *comm, MPI_Fint *request, MPI_Fint *ierr) { int c_ierr; - MPI_Datatype c_type = MPI_Type_f2c(*datatype); + MPI_Datatype c_type = PMPI_Type_f2c(*datatype); MPI_Request c_req; MPI_Comm c_comm; - c_comm = MPI_Comm_f2c (*comm); + c_comm = PMPI_Comm_f2c (*comm); - c_ierr = MPI_Irecv(OMPI_F2C_BOTTOM(buf), OMPI_FINT_2_INT(*count), + c_ierr = PMPI_Irecv(OMPI_F2C_BOTTOM(buf), OMPI_FINT_2_INT(*count), c_type, OMPI_FINT_2_INT(*source), OMPI_FINT_2_INT(*tag), c_comm, &c_req); if (NULL != ierr) *ierr = OMPI_INT_2_FINT(c_ierr); if (MPI_SUCCESS == c_ierr) { - *request = MPI_Request_c2f(c_req); + *request = PMPI_Request_c2f(c_req); } } diff --git a/ompi/mpi/fortran/mpif-h/ireduce_f.c b/ompi/mpi/fortran/mpif-h/ireduce_f.c index e5428ff3f62..17ee5a3aebd 100644 --- a/ompi/mpi/fortran/mpif-h/ireduce_f.c +++ b/ompi/mpi/fortran/mpif-h/ireduce_f.c @@ -5,15 +5,17 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2011-2012 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -22,7 +24,8 @@ #include "ompi/mpi/fortran/mpif-h/bindings.h" #include "ompi/mpi/fortran/base/constants.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILE_LAYER +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak PMPI_IREDUCE = ompi_ireduce_f #pragma weak pmpi_ireduce = ompi_ireduce_f #pragma weak pmpi_ireduce_ = ompi_ireduce_f @@ -30,7 +33,7 @@ #pragma weak PMPI_Ireduce_f = ompi_ireduce_f #pragma weak PMPI_Ireduce_f08 = ompi_ireduce_f -#elif OMPI_PROFILE_LAYER +#else OMPI_GENERATE_F77_BINDINGS (PMPI_IREDUCE, pmpi_ireduce, pmpi_ireduce_, @@ -39,6 +42,7 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_IREDUCE, (char *sendbuf, char *recvbuf, MPI_Fint *count, MPI_Fint *datatype, MPI_Fint *op, MPI_Fint *root, MPI_Fint *comm, MPI_Fint *request, MPI_Fint *ierr), (sendbuf, recvbuf, count, datatype, op, root, comm, request, ierr) ) #endif +#endif #if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_IREDUCE = ompi_ireduce_f @@ -48,9 +52,8 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_IREDUCE, #pragma weak MPI_Ireduce_f = ompi_ireduce_f #pragma weak MPI_Ireduce_f08 = ompi_ireduce_f -#endif - -#if ! OPAL_HAVE_WEAK_SYMBOLS && ! OMPI_PROFILE_LAYER +#else +#if ! OMPI_BUILD_MPI_PROFILING OMPI_GENERATE_F77_BINDINGS (MPI_IREDUCE, mpi_ireduce, mpi_ireduce_, @@ -58,16 +61,15 @@ OMPI_GENERATE_F77_BINDINGS (MPI_IREDUCE, ompi_ireduce_f, (char *sendbuf, char *recvbuf, MPI_Fint *count, MPI_Fint *datatype, MPI_Fint *op, MPI_Fint *root, MPI_Fint *comm, MPI_Fint *request, MPI_Fint *ierr), (sendbuf, recvbuf, count, datatype, op, root, comm, request, ierr) ) +#else +#define ompi_ireduce_f pompi_ireduce_f #endif - - -#if OMPI_PROFILE_LAYER && ! OPAL_HAVE_WEAK_SYMBOLS -#include "ompi/mpi/fortran/mpif-h/profile/defines.h" #endif + void ompi_ireduce_f(char *sendbuf, char *recvbuf, MPI_Fint *count, - MPI_Fint *datatype, MPI_Fint *op, - MPI_Fint *root, MPI_Fint *comm, MPI_Fint *request, + MPI_Fint *datatype, MPI_Fint *op, + MPI_Fint *root, MPI_Fint *comm, MPI_Fint *request, MPI_Fint *ierr) { int c_ierr; @@ -76,19 +78,19 @@ void ompi_ireduce_f(char *sendbuf, char *recvbuf, MPI_Fint *count, MPI_Op c_op; MPI_Comm c_comm; - c_type = MPI_Type_f2c(*datatype); - c_op = MPI_Op_f2c(*op); - c_comm = MPI_Comm_f2c(*comm); + c_type = PMPI_Type_f2c(*datatype); + c_op = PMPI_Op_f2c(*op); + c_comm = PMPI_Comm_f2c(*comm); sendbuf = (char *) OMPI_F2C_IN_PLACE(sendbuf); sendbuf = (char *) OMPI_F2C_BOTTOM(sendbuf); recvbuf = (char *) OMPI_F2C_BOTTOM(recvbuf); - c_ierr = MPI_Ireduce(sendbuf, recvbuf, + c_ierr = PMPI_Ireduce(sendbuf, recvbuf, OMPI_FINT_2_INT(*count), - c_type, c_op, + c_type, c_op, OMPI_FINT_2_INT(*root), c_comm, &c_request); if (NULL != ierr) *ierr = OMPI_INT_2_FINT(c_ierr); - if (MPI_SUCCESS == c_ierr) *request = MPI_Request_c2f(c_request); + if (MPI_SUCCESS == c_ierr) *request = PMPI_Request_c2f(c_request); } diff --git a/ompi/mpi/fortran/mpif-h/ireduce_scatter_block_f.c b/ompi/mpi/fortran/mpif-h/ireduce_scatter_block_f.c index ce6958faf91..89e51fd1102 100644 --- a/ompi/mpi/fortran/mpif-h/ireduce_scatter_block_f.c +++ b/ompi/mpi/fortran/mpif-h/ireduce_scatter_block_f.c @@ -5,15 +5,17 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2011-2012 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -22,7 +24,8 @@ #include "ompi/mpi/fortran/mpif-h/bindings.h" #include "ompi/mpi/fortran/base/constants.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILE_LAYER +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak PMPI_IREDUCE_SCATTER_BLOCK = ompi_ireduce_scatter_block_f #pragma weak pmpi_ireduce_scatter_block = ompi_ireduce_scatter_block_f #pragma weak pmpi_ireduce_scatter_block_ = ompi_ireduce_scatter_block_f @@ -30,7 +33,7 @@ #pragma weak PMPI_Ireduce_scatter_block_f = ompi_ireduce_scatter_block_f #pragma weak PMPI_Ireduce_scatter_block_f08 = ompi_ireduce_scatter_block_f -#elif OMPI_PROFILE_LAYER +#else OMPI_GENERATE_F77_BINDINGS (PMPI_IREDUCE_SCATTER_BLOCK, pmpi_ireduce_scatter_block, pmpi_ireduce_scatter_block_, @@ -39,6 +42,7 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_IREDUCE_SCATTER_BLOCK, (char *sendbuf, char *recvbuf, MPI_Fint *recvcounts, MPI_Fint *datatype, MPI_Fint *op, MPI_Fint *comm, MPI_Fint *request, MPI_Fint *ierr), (sendbuf, recvbuf, recvcounts, datatype, op, comm, request, ierr) ) #endif +#endif #if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_IREDUCE_SCATTER_BLOCK = ompi_ireduce_scatter_block_f @@ -48,9 +52,8 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_IREDUCE_SCATTER_BLOCK, #pragma weak MPI_Ireduce_scatter_block_f = ompi_ireduce_scatter_block_f #pragma weak MPI_Ireduce_scatter_block_f08 = ompi_ireduce_scatter_block_f -#endif - -#if ! OPAL_HAVE_WEAK_SYMBOLS && ! OMPI_PROFILE_LAYER +#else +#if ! OMPI_BUILD_MPI_PROFILING OMPI_GENERATE_F77_BINDINGS (MPI_IREDUCE_SCATTER_BLOCK, mpi_ireduce_scatter_block, mpi_ireduce_scatter_block_, @@ -58,16 +61,15 @@ OMPI_GENERATE_F77_BINDINGS (MPI_IREDUCE_SCATTER_BLOCK, ompi_ireduce_scatter_block_f, (char *sendbuf, char *recvbuf, MPI_Fint *recvcounts, MPI_Fint *datatype, MPI_Fint *op, MPI_Fint *comm, MPI_Fint *request, MPI_Fint *ierr), (sendbuf, recvbuf, recvcounts, datatype, op, comm, request, ierr) ) +#else +#define ompi_ireduce_scatter_block_f pompi_ireduce_scatter_block_f #endif - - -#if OMPI_PROFILE_LAYER && ! OPAL_HAVE_WEAK_SYMBOLS -#include "ompi/mpi/fortran/mpif-h/profile/defines.h" #endif -void ompi_ireduce_scatter_block_f(char *sendbuf, char *recvbuf, + +void ompi_ireduce_scatter_block_f(char *sendbuf, char *recvbuf, MPI_Fint *recvcount, MPI_Fint *datatype, - MPI_Fint *op, MPI_Fint *comm, + MPI_Fint *op, MPI_Fint *comm, MPI_Fint *request, MPI_Fint *ierr) { int c_ierr; @@ -77,19 +79,19 @@ void ompi_ireduce_scatter_block_f(char *sendbuf, char *recvbuf, MPI_Op c_op; int size; - c_comm = MPI_Comm_f2c(*comm); - c_type = MPI_Type_f2c(*datatype); - c_op = MPI_Op_f2c(*op); + c_comm = PMPI_Comm_f2c(*comm); + c_type = PMPI_Type_f2c(*datatype); + c_op = PMPI_Op_f2c(*op); - MPI_Comm_size(c_comm, &size); + PMPI_Comm_size(c_comm, &size); sendbuf = (char *) OMPI_F2C_IN_PLACE(sendbuf); sendbuf = (char *) OMPI_F2C_BOTTOM(sendbuf); recvbuf = (char *) OMPI_F2C_BOTTOM(recvbuf); - - c_ierr = MPI_Ireduce_scatter_block(sendbuf, recvbuf, + + c_ierr = PMPI_Ireduce_scatter_block(sendbuf, recvbuf, OMPI_FINT_2_INT(*recvcount), c_type, c_op, c_comm, &c_request); if (NULL != ierr) *ierr = OMPI_INT_2_FINT(c_ierr); - if (MPI_SUCCESS == c_ierr) *request = MPI_Request_c2f(c_request); + if (MPI_SUCCESS == c_ierr) *request = PMPI_Request_c2f(c_request); } diff --git a/ompi/mpi/fortran/mpif-h/ireduce_scatter_f.c b/ompi/mpi/fortran/mpif-h/ireduce_scatter_f.c index ab87b09f8e6..a0910394dfa 100644 --- a/ompi/mpi/fortran/mpif-h/ireduce_scatter_f.c +++ b/ompi/mpi/fortran/mpif-h/ireduce_scatter_f.c @@ -5,15 +5,17 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2011-2012 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -22,7 +24,8 @@ #include "ompi/mpi/fortran/mpif-h/bindings.h" #include "ompi/mpi/fortran/base/constants.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILE_LAYER +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak PMPI_IREDUCE_SCATTER = ompi_ireduce_scatter_f #pragma weak pmpi_ireduce_scatter = ompi_ireduce_scatter_f #pragma weak pmpi_ireduce_scatter_ = ompi_ireduce_scatter_f @@ -30,7 +33,7 @@ #pragma weak PMPI_Ireduce_scatter_f = ompi_ireduce_scatter_f #pragma weak PMPI_Ireduce_scatter_f08 = ompi_ireduce_scatter_f -#elif OMPI_PROFILE_LAYER +#else OMPI_GENERATE_F77_BINDINGS (PMPI_IREDUCE_SCATTER, pmpi_ireduce_scatter, pmpi_ireduce_scatter_, @@ -39,6 +42,7 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_IREDUCE_SCATTER, (char *sendbuf, char *recvbuf, MPI_Fint *recvcounts, MPI_Fint *datatype, MPI_Fint *op, MPI_Fint *comm, MPI_Fint *request, MPI_Fint *ierr), (sendbuf, recvbuf, recvcounts, datatype, op, comm, request, ierr) ) #endif +#endif #if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_IREDUCE_SCATTER = ompi_ireduce_scatter_f @@ -48,9 +52,8 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_IREDUCE_SCATTER, #pragma weak MPI_Ireduce_scatter_f = ompi_ireduce_scatter_f #pragma weak MPI_Ireduce_scatter_f08 = ompi_ireduce_scatter_f -#endif - -#if ! OPAL_HAVE_WEAK_SYMBOLS && ! OMPI_PROFILE_LAYER +#else +#if ! OMPI_BUILD_MPI_PROFILING OMPI_GENERATE_F77_BINDINGS (MPI_IREDUCE_SCATTER, mpi_ireduce_scatter, mpi_ireduce_scatter_, @@ -58,16 +61,15 @@ OMPI_GENERATE_F77_BINDINGS (MPI_IREDUCE_SCATTER, ompi_ireduce_scatter_f, (char *sendbuf, char *recvbuf, MPI_Fint *recvcounts, MPI_Fint *datatype, MPI_Fint *op, MPI_Fint *comm, MPI_Fint *request, MPI_Fint *ierr), (sendbuf, recvbuf, recvcounts, datatype, op, comm, request, ierr) ) +#else +#define ompi_ireduce_scatter_f pompi_ireduce_scatter_f #endif - - -#if OMPI_PROFILE_LAYER && ! OPAL_HAVE_WEAK_SYMBOLS -#include "ompi/mpi/fortran/mpif-h/profile/defines.h" #endif -void ompi_ireduce_scatter_f(char *sendbuf, char *recvbuf, + +void ompi_ireduce_scatter_f(char *sendbuf, char *recvbuf, MPI_Fint *recvcounts, MPI_Fint *datatype, - MPI_Fint *op, MPI_Fint *comm, MPI_Fint *request, + MPI_Fint *op, MPI_Fint *comm, MPI_Fint *request, MPI_Fint *ierr) { int c_ierr; @@ -78,20 +80,20 @@ void ompi_ireduce_scatter_f(char *sendbuf, char *recvbuf, int size; OMPI_ARRAY_NAME_DECL(recvcounts); - c_comm = MPI_Comm_f2c(*comm); - c_type = MPI_Type_f2c(*datatype); - c_op = MPI_Op_f2c(*op); + c_comm = PMPI_Comm_f2c(*comm); + c_type = PMPI_Type_f2c(*datatype); + c_op = PMPI_Op_f2c(*op); - MPI_Comm_size(c_comm, &size); + PMPI_Comm_size(c_comm, &size); OMPI_ARRAY_FINT_2_INT(recvcounts, size); sendbuf = (char *) OMPI_F2C_IN_PLACE(sendbuf); sendbuf = (char *) OMPI_F2C_BOTTOM(sendbuf); recvbuf = (char *) OMPI_F2C_BOTTOM(recvbuf); - - c_ierr = MPI_Ireduce_scatter(sendbuf, recvbuf, + + c_ierr = PMPI_Ireduce_scatter(sendbuf, recvbuf, OMPI_ARRAY_NAME_CONVERT(recvcounts), c_type, c_op, c_comm, &c_request); if (NULL != ierr) *ierr = OMPI_INT_2_FINT(c_ierr); - if (MPI_SUCCESS == c_ierr) *request = MPI_Request_c2f(c_request); + if (MPI_SUCCESS == c_ierr) *request = PMPI_Request_c2f(c_request); } diff --git a/ompi/mpi/fortran/mpif-h/irsend_f.c b/ompi/mpi/fortran/mpif-h/irsend_f.c index e4316491a6b..d4fe6a12bf6 100644 --- a/ompi/mpi/fortran/mpif-h/irsend_f.c +++ b/ompi/mpi/fortran/mpif-h/irsend_f.c @@ -5,15 +5,17 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2011-2012 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -22,7 +24,8 @@ #include "ompi/mpi/fortran/mpif-h/bindings.h" #include "ompi/mpi/fortran/base/constants.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILE_LAYER +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak PMPI_IRSEND = ompi_irsend_f #pragma weak pmpi_irsend = ompi_irsend_f #pragma weak pmpi_irsend_ = ompi_irsend_f @@ -30,7 +33,7 @@ #pragma weak PMPI_Irsend_f = ompi_irsend_f #pragma weak PMPI_Irsend_f08 = ompi_irsend_f -#elif OMPI_PROFILE_LAYER +#else OMPI_GENERATE_F77_BINDINGS (PMPI_IRSEND, pmpi_irsend, pmpi_irsend_, @@ -39,6 +42,7 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_IRSEND, (char *buf, MPI_Fint *count, MPI_Fint *datatype, MPI_Fint *dest, MPI_Fint *tag, MPI_Fint *comm, MPI_Fint *request, MPI_Fint *ierr), (buf, count, datatype, dest, tag, comm, request, ierr) ) #endif +#endif #if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_IRSEND = ompi_irsend_f @@ -48,9 +52,8 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_IRSEND, #pragma weak MPI_Irsend_f = ompi_irsend_f #pragma weak MPI_Irsend_f08 = ompi_irsend_f -#endif - -#if ! OPAL_HAVE_WEAK_SYMBOLS && ! OMPI_PROFILE_LAYER +#else +#if ! OMPI_BUILD_MPI_PROFILING OMPI_GENERATE_F77_BINDINGS (MPI_IRSEND, mpi_irsend, mpi_irsend_, @@ -58,29 +61,28 @@ OMPI_GENERATE_F77_BINDINGS (MPI_IRSEND, ompi_irsend_f, (char *buf, MPI_Fint *count, MPI_Fint *datatype, MPI_Fint *dest, MPI_Fint *tag, MPI_Fint *comm, MPI_Fint *request, MPI_Fint *ierr), (buf, count, datatype, dest, tag, comm, request, ierr) ) +#else +#define ompi_irsend_f pompi_irsend_f #endif - - -#if OMPI_PROFILE_LAYER && ! OPAL_HAVE_WEAK_SYMBOLS -#include "ompi/mpi/fortran/mpif-h/profile/defines.h" #endif + void ompi_irsend_f(char *buf, MPI_Fint *count, MPI_Fint *datatype, MPI_Fint *dest, MPI_Fint *tag, MPI_Fint *comm, MPI_Fint *request, MPI_Fint *ierr) { int c_ierr; - MPI_Datatype c_type = MPI_Type_f2c(*datatype); + MPI_Datatype c_type = PMPI_Type_f2c(*datatype); MPI_Request c_req; MPI_Comm c_comm; - c_comm = MPI_Comm_f2c (*comm); + c_comm = PMPI_Comm_f2c (*comm); - c_ierr = MPI_Irsend(OMPI_F2C_BOTTOM(buf), OMPI_FINT_2_INT(*count), + c_ierr = PMPI_Irsend(OMPI_F2C_BOTTOM(buf), OMPI_FINT_2_INT(*count), c_type, OMPI_FINT_2_INT(*dest), OMPI_FINT_2_INT(*tag), c_comm, &c_req); if (NULL != ierr) *ierr = OMPI_INT_2_FINT(c_ierr); if (MPI_SUCCESS == c_ierr) { - *request = MPI_Request_c2f(c_req); + *request = PMPI_Request_c2f(c_req); } } diff --git a/ompi/mpi/fortran/mpif-h/is_thread_main_f.c b/ompi/mpi/fortran/mpif-h/is_thread_main_f.c index 39cf69e8f50..b78c4bfd0ba 100644 --- a/ompi/mpi/fortran/mpif-h/is_thread_main_f.c +++ b/ompi/mpi/fortran/mpif-h/is_thread_main_f.c @@ -5,15 +5,17 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2011-2012 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -21,7 +23,8 @@ #include "ompi/mpi/fortran/mpif-h/bindings.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILE_LAYER +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak PMPI_IS_THREAD_MAIN = ompi_is_thread_main_f #pragma weak pmpi_is_thread_main = ompi_is_thread_main_f #pragma weak pmpi_is_thread_main_ = ompi_is_thread_main_f @@ -29,7 +32,7 @@ #pragma weak PMPI_Is_thread_main_f = ompi_is_thread_main_f #pragma weak PMPI_Is_thread_main_f08 = ompi_is_thread_main_f -#elif OMPI_PROFILE_LAYER +#else OMPI_GENERATE_F77_BINDINGS (PMPI_IS_THREAD_MAIN, pmpi_is_thread_main, pmpi_is_thread_main_, @@ -38,6 +41,7 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_IS_THREAD_MAIN, (ompi_fortran_logical_t *flag, MPI_Fint *ierr), (flag, ierr) ) #endif +#endif #if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_IS_THREAD_MAIN = ompi_is_thread_main_f @@ -47,9 +51,8 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_IS_THREAD_MAIN, #pragma weak MPI_Is_thread_main_f = ompi_is_thread_main_f #pragma weak MPI_Is_thread_main_f08 = ompi_is_thread_main_f -#endif - -#if ! OPAL_HAVE_WEAK_SYMBOLS && ! OMPI_PROFILE_LAYER +#else +#if ! OMPI_BUILD_MPI_PROFILING OMPI_GENERATE_F77_BINDINGS (MPI_IS_THREAD_MAIN, mpi_is_thread_main, mpi_is_thread_main_, @@ -57,19 +60,18 @@ OMPI_GENERATE_F77_BINDINGS (MPI_IS_THREAD_MAIN, ompi_is_thread_main_f, (ompi_fortran_logical_t *flag, MPI_Fint *ierr), (flag, ierr) ) +#else +#define ompi_is_thread_main_f pompi_is_thread_main_f #endif - - -#if OMPI_PROFILE_LAYER && ! OPAL_HAVE_WEAK_SYMBOLS -#include "ompi/mpi/fortran/mpif-h/profile/defines.h" #endif + void ompi_is_thread_main_f(ompi_fortran_logical_t *flag, MPI_Fint *ierr) { int c_ierr; OMPI_LOGICAL_NAME_DECL(flag); - c_ierr = MPI_Is_thread_main(OMPI_LOGICAL_SINGLE_NAME_CONVERT(flag)); + c_ierr = PMPI_Is_thread_main(OMPI_LOGICAL_SINGLE_NAME_CONVERT(flag)); if (NULL != ierr) *ierr = OMPI_INT_2_FINT(c_ierr); if (MPI_SUCCESS == c_ierr) { diff --git a/ompi/mpi/fortran/mpif-h/iscan_f.c b/ompi/mpi/fortran/mpif-h/iscan_f.c index 442494764d0..0b1271d1a2e 100644 --- a/ompi/mpi/fortran/mpif-h/iscan_f.c +++ b/ompi/mpi/fortran/mpif-h/iscan_f.c @@ -5,15 +5,17 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2011-2012 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -22,7 +24,8 @@ #include "ompi/mpi/fortran/mpif-h/bindings.h" #include "ompi/mpi/fortran/base/constants.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILE_LAYER +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak PMPI_ISCAN = ompi_iscan_f #pragma weak pmpi_iscan = ompi_iscan_f #pragma weak pmpi_iscan_ = ompi_iscan_f @@ -30,7 +33,7 @@ #pragma weak PMPI_Iscan_f = ompi_iscan_f #pragma weak PMPI_Iscan_f08 = ompi_iscan_f -#elif OMPI_PROFILE_LAYER +#else OMPI_GENERATE_F77_BINDINGS (PMPI_ISCAN, pmpi_iscan, pmpi_iscan_, @@ -39,6 +42,7 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_ISCAN, (char *sendbuf, char *recvbuf, MPI_Fint *count, MPI_Fint *datatype, MPI_Fint *op, MPI_Fint *comm, MPI_Fint *request, MPI_Fint *ierr), (sendbuf, recvbuf, count, datatype, op, comm, request, ierr) ) #endif +#endif #if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_ISCAN = ompi_iscan_f @@ -48,9 +52,8 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_ISCAN, #pragma weak MPI_Iscan_f = ompi_iscan_f #pragma weak MPI_Iscan_f08 = ompi_iscan_f -#endif - -#if ! OPAL_HAVE_WEAK_SYMBOLS && ! OMPI_PROFILE_LAYER +#else +#if ! OMPI_BUILD_MPI_PROFILING OMPI_GENERATE_F77_BINDINGS (MPI_ISCAN, mpi_iscan, mpi_iscan_, @@ -58,13 +61,12 @@ OMPI_GENERATE_F77_BINDINGS (MPI_ISCAN, ompi_iscan_f, (char *sendbuf, char *recvbuf, MPI_Fint *count, MPI_Fint *datatype, MPI_Fint *op, MPI_Fint *comm, MPI_Fint *request, MPI_Fint *ierr), (sendbuf, recvbuf, count, datatype, op, comm, request, ierr) ) +#else +#define ompi_iscan_f pompi_iscan_f #endif - - -#if OMPI_PROFILE_LAYER && ! OPAL_HAVE_WEAK_SYMBOLS -#include "ompi/mpi/fortran/mpif-h/profile/defines.h" #endif + void ompi_iscan_f(char *sendbuf, char *recvbuf, MPI_Fint *count, MPI_Fint *datatype, MPI_Fint *op, MPI_Fint *comm, MPI_Fint *request, MPI_Fint *ierr) @@ -75,18 +77,18 @@ void ompi_iscan_f(char *sendbuf, char *recvbuf, MPI_Fint *count, MPI_Request c_request; MPI_Op c_op; - c_type = MPI_Type_f2c(*datatype); - c_op = MPI_Op_f2c(*op); - c_comm = MPI_Comm_f2c(*comm); + c_type = PMPI_Type_f2c(*datatype); + c_op = PMPI_Op_f2c(*op); + c_comm = PMPI_Comm_f2c(*comm); sendbuf = (char *) OMPI_F2C_IN_PLACE(sendbuf); sendbuf = (char *) OMPI_F2C_BOTTOM(sendbuf); recvbuf = (char *) OMPI_F2C_BOTTOM(recvbuf); - - c_ierr = MPI_Iscan(sendbuf, recvbuf, + + c_ierr = PMPI_Iscan(sendbuf, recvbuf, OMPI_FINT_2_INT(*count), - c_type, c_op, + c_type, c_op, c_comm, &c_request); if (NULL != ierr) *ierr = OMPI_INT_2_FINT(c_ierr); - if (MPI_SUCCESS == c_ierr) *request = MPI_Request_c2f(c_request); + if (MPI_SUCCESS == c_ierr) *request = PMPI_Request_c2f(c_request); } diff --git a/ompi/mpi/fortran/mpif-h/iscatter_f.c b/ompi/mpi/fortran/mpif-h/iscatter_f.c index 6cd7323b479..7ae74152a96 100644 --- a/ompi/mpi/fortran/mpif-h/iscatter_f.c +++ b/ompi/mpi/fortran/mpif-h/iscatter_f.c @@ -5,15 +5,17 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2011-2012 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -22,7 +24,8 @@ #include "ompi/mpi/fortran/mpif-h/bindings.h" #include "ompi/mpi/fortran/base/constants.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILE_LAYER +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak PMPI_ISCATTER = ompi_iscatter_f #pragma weak pmpi_iscatter = ompi_iscatter_f #pragma weak pmpi_iscatter_ = ompi_iscatter_f @@ -30,7 +33,7 @@ #pragma weak PMPI_Iscatter_f = ompi_iscatter_f #pragma weak PMPI_Iscatter_f08 = ompi_iscatter_f -#elif OMPI_PROFILE_LAYER +#else OMPI_GENERATE_F77_BINDINGS (PMPI_ISCATTER, pmpi_iscatter, pmpi_iscatter_, @@ -39,6 +42,7 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_ISCATTER, (char *sendbuf, MPI_Fint *sendcount, MPI_Fint *sendtype, char *recvbuf, MPI_Fint *recvcount, MPI_Fint *recvtype, MPI_Fint *root, MPI_Fint *comm, MPI_Fint *request, MPI_Fint *ierr), (sendbuf, sendcount, sendtype, recvbuf, recvcount, recvtype, root, comm, request, ierr) ) #endif +#endif #if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_ISCATTER = ompi_iscatter_f @@ -48,9 +52,8 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_ISCATTER, #pragma weak MPI_Iscatter_f = ompi_iscatter_f #pragma weak MPI_Iscatter_f08 = ompi_iscatter_f -#endif - -#if ! OPAL_HAVE_WEAK_SYMBOLS && ! OMPI_PROFILE_LAYER +#else +#if ! OMPI_BUILD_MPI_PROFILING OMPI_GENERATE_F77_BINDINGS (MPI_ISCATTER, mpi_iscatter, mpi_iscatter_, @@ -58,36 +61,35 @@ OMPI_GENERATE_F77_BINDINGS (MPI_ISCATTER, ompi_iscatter_f, (char *sendbuf, MPI_Fint *sendcount, MPI_Fint *sendtype, char *recvbuf, MPI_Fint *recvcount, MPI_Fint *recvtype, MPI_Fint *root, MPI_Fint *comm, MPI_Fint *request, MPI_Fint *ierr), (sendbuf, sendcount, sendtype, recvbuf, recvcount, recvtype, root, comm, request, ierr) ) +#else +#define ompi_iscatter_f pompi_iscatter_f #endif - - -#if OMPI_PROFILE_LAYER && ! OPAL_HAVE_WEAK_SYMBOLS -#include "ompi/mpi/fortran/mpif-h/profile/defines.h" #endif -void ompi_iscatter_f(char *sendbuf, MPI_Fint *sendcount, + +void ompi_iscatter_f(char *sendbuf, MPI_Fint *sendcount, MPI_Fint *sendtype, char *recvbuf, MPI_Fint *recvcount, MPI_Fint *recvtype, - MPI_Fint *root, MPI_Fint *comm, MPI_Fint *request, + MPI_Fint *root, MPI_Fint *comm, MPI_Fint *request, MPI_Fint *ierr) { int c_ierr; MPI_Datatype c_sendtype, c_recvtype; MPI_Request c_request; - MPI_Comm c_comm = MPI_Comm_f2c(*comm); - - c_sendtype = MPI_Type_f2c(*sendtype); - c_recvtype = MPI_Type_f2c(*recvtype); + MPI_Comm c_comm = PMPI_Comm_f2c(*comm); + + c_sendtype = PMPI_Type_f2c(*sendtype); + c_recvtype = PMPI_Type_f2c(*recvtype); sendbuf = (char *) OMPI_F2C_IN_PLACE(sendbuf); sendbuf = (char *) OMPI_F2C_BOTTOM(sendbuf); recvbuf = (char *) OMPI_F2C_BOTTOM(recvbuf); - - c_ierr = MPI_Iscatter(sendbuf,OMPI_FINT_2_INT(*sendcount), - c_sendtype, recvbuf, + + c_ierr = PMPI_Iscatter(sendbuf,OMPI_FINT_2_INT(*sendcount), + c_sendtype, recvbuf, OMPI_FINT_2_INT(*recvcount), - c_recvtype, + c_recvtype, OMPI_FINT_2_INT(*root), c_comm, &c_request); if (NULL != ierr) *ierr = OMPI_INT_2_FINT(c_ierr); - if (MPI_SUCCESS == c_ierr) *request = MPI_Request_c2f(c_request); + if (MPI_SUCCESS == c_ierr) *request = PMPI_Request_c2f(c_request); } diff --git a/ompi/mpi/fortran/mpif-h/iscatterv_f.c b/ompi/mpi/fortran/mpif-h/iscatterv_f.c index a4c84ca7df8..201be7b367e 100644 --- a/ompi/mpi/fortran/mpif-h/iscatterv_f.c +++ b/ompi/mpi/fortran/mpif-h/iscatterv_f.c @@ -5,15 +5,17 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2011-2012 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -22,7 +24,8 @@ #include "ompi/mpi/fortran/mpif-h/bindings.h" #include "ompi/mpi/fortran/base/constants.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILE_LAYER +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak PMPI_ISCATTERV = ompi_iscatterv_f #pragma weak pmpi_iscatterv = ompi_iscatterv_f #pragma weak pmpi_iscatterv_ = ompi_iscatterv_f @@ -30,7 +33,7 @@ #pragma weak PMPI_Iscatterv_f = ompi_iscatterv_f #pragma weak PMPI_Iscatterv_f08 = ompi_iscatterv_f -#elif OMPI_PROFILE_LAYER +#else OMPI_GENERATE_F77_BINDINGS (PMPI_ISCATTERV, pmpi_iscatterv, pmpi_iscatterv_, @@ -39,6 +42,7 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_ISCATTERV, (char *sendbuf, MPI_Fint *sendcounts, MPI_Fint *displs, MPI_Fint *sendtype, char *recvbuf, MPI_Fint *recvcount, MPI_Fint *recvtype, MPI_Fint *root, MPI_Fint *comm, MPI_Fint *request, MPI_Fint *ierr), (sendbuf, sendcounts, displs, sendtype, recvbuf, recvcount, recvtype, root, comm, request, ierr) ) #endif +#endif #if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_ISCATTERV = ompi_iscatterv_f @@ -48,9 +52,8 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_ISCATTERV, #pragma weak MPI_Iscatterv_f = ompi_iscatterv_f #pragma weak MPI_Iscatterv_f08 = ompi_iscatterv_f -#endif - -#if ! OPAL_HAVE_WEAK_SYMBOLS && ! OMPI_PROFILE_LAYER +#else +#if ! OMPI_BUILD_MPI_PROFILING OMPI_GENERATE_F77_BINDINGS (MPI_ISCATTERV, mpi_iscatterv, mpi_iscatterv_, @@ -58,16 +61,15 @@ OMPI_GENERATE_F77_BINDINGS (MPI_ISCATTERV, ompi_iscatterv_f, (char *sendbuf, MPI_Fint *sendcounts, MPI_Fint *displs, MPI_Fint *sendtype, char *recvbuf, MPI_Fint *recvcount, MPI_Fint *recvtype, MPI_Fint *root, MPI_Fint *comm, MPI_Fint *request, MPI_Fint *ierr), (sendbuf, sendcounts, displs, sendtype, recvbuf, recvcount, recvtype, root, comm, request, ierr) ) +#else +#define ompi_iscatterv_f pompi_iscatterv_f #endif - - -#if OMPI_PROFILE_LAYER && ! OPAL_HAVE_WEAK_SYMBOLS -#include "ompi/mpi/fortran/mpif-h/profile/defines.h" #endif + void ompi_iscatterv_f(char *sendbuf, MPI_Fint *sendcounts, MPI_Fint *displs, MPI_Fint *sendtype, - char *recvbuf, MPI_Fint *recvcount, + char *recvbuf, MPI_Fint *recvcount, MPI_Fint *recvtype, MPI_Fint *root, MPI_Fint *comm, MPI_Fint *request, MPI_Fint *ierr) { @@ -78,27 +80,27 @@ void ompi_iscatterv_f(char *sendbuf, MPI_Fint *sendcounts, OMPI_ARRAY_NAME_DECL(sendcounts); OMPI_ARRAY_NAME_DECL(displs); - c_comm = MPI_Comm_f2c(*comm); - c_sendtype = MPI_Type_f2c(*sendtype); - c_recvtype = MPI_Type_f2c(*recvtype); + c_comm = PMPI_Comm_f2c(*comm); + c_sendtype = PMPI_Type_f2c(*sendtype); + c_recvtype = PMPI_Type_f2c(*recvtype); - MPI_Comm_size(c_comm, &size); + PMPI_Comm_size(c_comm, &size); OMPI_ARRAY_FINT_2_INT(sendcounts, size); OMPI_ARRAY_FINT_2_INT(displs, size); sendbuf = (char *) OMPI_F2C_IN_PLACE(sendbuf); sendbuf = (char *) OMPI_F2C_BOTTOM(sendbuf); recvbuf = (char *) OMPI_F2C_BOTTOM(recvbuf); - - c_ierr = MPI_Iscatterv(sendbuf, + + c_ierr = PMPI_Iscatterv(sendbuf, OMPI_ARRAY_NAME_CONVERT(sendcounts), OMPI_ARRAY_NAME_CONVERT(displs), c_sendtype, recvbuf, OMPI_FINT_2_INT(*recvcount), - c_recvtype, + c_recvtype, OMPI_FINT_2_INT(*root), c_comm, &c_request); if (NULL != ierr) *ierr = OMPI_INT_2_FINT(c_ierr); - if (MPI_SUCCESS == c_ierr) *request = MPI_Request_c2f(c_request); + if (MPI_SUCCESS == c_ierr) *request = PMPI_Request_c2f(c_request); OMPI_ARRAY_FINT_2_INT_CLEANUP(sendcounts); OMPI_ARRAY_FINT_2_INT_CLEANUP(displs); diff --git a/ompi/mpi/fortran/mpif-h/isend_f.c b/ompi/mpi/fortran/mpif-h/isend_f.c index 30fe99e5c59..d29a9933a06 100644 --- a/ompi/mpi/fortran/mpif-h/isend_f.c +++ b/ompi/mpi/fortran/mpif-h/isend_f.c @@ -5,15 +5,17 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2011-2012 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -22,7 +24,8 @@ #include "ompi/mpi/fortran/mpif-h/bindings.h" #include "ompi/mpi/fortran/base/constants.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILE_LAYER +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak PMPI_ISEND = ompi_isend_f #pragma weak pmpi_isend = ompi_isend_f #pragma weak pmpi_isend_ = ompi_isend_f @@ -30,7 +33,7 @@ #pragma weak PMPI_Isend_f = ompi_isend_f #pragma weak PMPI_Isend_f08 = ompi_isend_f -#elif OMPI_PROFILE_LAYER +#else OMPI_GENERATE_F77_BINDINGS (PMPI_ISEND, pmpi_isend, pmpi_isend_, @@ -39,6 +42,7 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_ISEND, (char *buf, MPI_Fint *count, MPI_Fint *datatype, MPI_Fint *dest, MPI_Fint *tag, MPI_Fint *comm, MPI_Fint *request, MPI_Fint *ierr), (buf, count, datatype, dest, tag, comm, request, ierr) ) #endif +#endif #if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_ISEND = ompi_isend_f @@ -48,9 +52,8 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_ISEND, #pragma weak MPI_Isend_f = ompi_isend_f #pragma weak MPI_Isend_f08 = ompi_isend_f -#endif - -#if ! OPAL_HAVE_WEAK_SYMBOLS && ! OMPI_PROFILE_LAYER +#else +#if ! OMPI_BUILD_MPI_PROFILING OMPI_GENERATE_F77_BINDINGS (MPI_ISEND, mpi_isend, mpi_isend_, @@ -58,29 +61,28 @@ OMPI_GENERATE_F77_BINDINGS (MPI_ISEND, ompi_isend_f, (char *buf, MPI_Fint *count, MPI_Fint *datatype, MPI_Fint *dest, MPI_Fint *tag, MPI_Fint *comm, MPI_Fint *request, MPI_Fint *ierr), (buf, count, datatype, dest, tag, comm, request, ierr) ) +#else +#define ompi_isend_f pompi_isend_f #endif - - -#if OMPI_PROFILE_LAYER && ! OPAL_HAVE_WEAK_SYMBOLS -#include "ompi/mpi/fortran/mpif-h/profile/defines.h" #endif + void ompi_isend_f(char *buf, MPI_Fint *count, MPI_Fint *datatype, MPI_Fint *dest, MPI_Fint *tag, MPI_Fint *comm, MPI_Fint *request, MPI_Fint *ierr) { int c_ierr; - MPI_Datatype c_type = MPI_Type_f2c(*datatype); + MPI_Datatype c_type = PMPI_Type_f2c(*datatype); MPI_Request c_req; MPI_Comm c_comm; - c_comm = MPI_Comm_f2c (*comm); + c_comm = PMPI_Comm_f2c (*comm); - c_ierr = MPI_Isend(OMPI_F2C_BOTTOM(buf), OMPI_FINT_2_INT(*count), + c_ierr = PMPI_Isend(OMPI_F2C_BOTTOM(buf), OMPI_FINT_2_INT(*count), c_type, OMPI_FINT_2_INT(*dest), OMPI_FINT_2_INT(*tag), c_comm, &c_req); if (NULL != ierr) *ierr = OMPI_INT_2_FINT(c_ierr); if (MPI_SUCCESS == c_ierr) { - *request = MPI_Request_c2f(c_req); + *request = PMPI_Request_c2f(c_req); } } diff --git a/ompi/mpi/fortran/mpif-h/issend_f.c b/ompi/mpi/fortran/mpif-h/issend_f.c index c2ca666128b..ae299e98888 100644 --- a/ompi/mpi/fortran/mpif-h/issend_f.c +++ b/ompi/mpi/fortran/mpif-h/issend_f.c @@ -5,15 +5,17 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2011-2012 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -22,7 +24,8 @@ #include "ompi/mpi/fortran/mpif-h/bindings.h" #include "ompi/mpi/fortran/base/constants.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILE_LAYER +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak PMPI_ISSEND = ompi_issend_f #pragma weak pmpi_issend = ompi_issend_f #pragma weak pmpi_issend_ = ompi_issend_f @@ -30,7 +33,7 @@ #pragma weak PMPI_Issend_f = ompi_issend_f #pragma weak PMPI_Issend_f08 = ompi_issend_f -#elif OMPI_PROFILE_LAYER +#else OMPI_GENERATE_F77_BINDINGS (PMPI_ISSEND, pmpi_issend, pmpi_issend_, @@ -39,6 +42,7 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_ISSEND, (char *buf, MPI_Fint *count, MPI_Fint *datatype, MPI_Fint *dest, MPI_Fint *tag, MPI_Fint *comm, MPI_Fint *request, MPI_Fint *ierr), (buf, count, datatype, dest, tag, comm, request, ierr) ) #endif +#endif #if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_ISSEND = ompi_issend_f @@ -48,9 +52,8 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_ISSEND, #pragma weak MPI_Issend_f = ompi_issend_f #pragma weak MPI_Issend_f08 = ompi_issend_f -#endif - -#if ! OPAL_HAVE_WEAK_SYMBOLS && ! OMPI_PROFILE_LAYER +#else +#if ! OMPI_BUILD_MPI_PROFILING OMPI_GENERATE_F77_BINDINGS (MPI_ISSEND, mpi_issend, mpi_issend_, @@ -58,29 +61,28 @@ OMPI_GENERATE_F77_BINDINGS (MPI_ISSEND, ompi_issend_f, (char *buf, MPI_Fint *count, MPI_Fint *datatype, MPI_Fint *dest, MPI_Fint *tag, MPI_Fint *comm, MPI_Fint *request, MPI_Fint *ierr), (buf, count, datatype, dest, tag, comm, request, ierr) ) +#else +#define ompi_issend_f pompi_issend_f #endif - - -#if OMPI_PROFILE_LAYER && ! OPAL_HAVE_WEAK_SYMBOLS -#include "ompi/mpi/fortran/mpif-h/profile/defines.h" #endif + void ompi_issend_f(char *buf, MPI_Fint *count, MPI_Fint *datatype, MPI_Fint *dest, MPI_Fint *tag, MPI_Fint *comm, MPI_Fint *request, MPI_Fint *ierr) { int c_ierr; - MPI_Datatype c_type = MPI_Type_f2c(*datatype); + MPI_Datatype c_type = PMPI_Type_f2c(*datatype); MPI_Request c_req; MPI_Comm c_comm; - c_comm = MPI_Comm_f2c (*comm); + c_comm = PMPI_Comm_f2c (*comm); - c_ierr = MPI_Issend(OMPI_F2C_BOTTOM(buf), OMPI_FINT_2_INT(*count), + c_ierr = PMPI_Issend(OMPI_F2C_BOTTOM(buf), OMPI_FINT_2_INT(*count), c_type, OMPI_FINT_2_INT(*dest), OMPI_FINT_2_INT(*tag), c_comm, &c_req); if (NULL != ierr) *ierr = OMPI_INT_2_FINT(c_ierr); if (MPI_SUCCESS == c_ierr) { - *request = MPI_Request_c2f(c_req); + *request = PMPI_Request_c2f(c_req); } } diff --git a/ompi/mpi/fortran/mpif-h/keyval_create_f.c b/ompi/mpi/fortran/mpif-h/keyval_create_f.c index 086537668af..af3465ad6fd 100644 --- a/ompi/mpi/fortran/mpif-h/keyval_create_f.c +++ b/ompi/mpi/fortran/mpif-h/keyval_create_f.c @@ -5,15 +5,17 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2007-2012 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -22,7 +24,8 @@ #include "ompi/mpi/fortran/mpif-h/bindings.h" #include "ompi/communicator/communicator.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILE_LAYER +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak PMPI_KEYVAL_CREATE = ompi_keyval_create_f #pragma weak pmpi_keyval_create = ompi_keyval_create_f #pragma weak pmpi_keyval_create_ = ompi_keyval_create_f @@ -30,7 +33,7 @@ #pragma weak PMPI_Keyval_create_f = ompi_keyval_create_f #pragma weak PMPI_Keyval_create_f08 = ompi_keyval_create_f -#elif OMPI_PROFILE_LAYER +#else OMPI_GENERATE_F77_BINDINGS (PMPI_KEYVAL_CREATE, pmpi_keyval_create, pmpi_keyval_create_, @@ -39,6 +42,7 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_KEYVAL_CREATE, (ompi_mpi1_fortran_copy_attr_function* copy_fn, ompi_mpi1_fortran_delete_attr_function* delete_fn, MPI_Fint *keyval, MPI_Fint *extra_state, MPI_Fint *ierr), (copy_fn, delete_fn, keyval, extra_state, ierr) ) #endif +#endif #if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_KEYVAL_CREATE = ompi_keyval_create_f @@ -48,9 +52,8 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_KEYVAL_CREATE, #pragma weak MPI_Keyval_create_f = ompi_keyval_create_f #pragma weak MPI_Keyval_create_f08 = ompi_keyval_create_f -#endif - -#if ! OPAL_HAVE_WEAK_SYMBOLS && ! OMPI_PROFILE_LAYER +#else +#if ! OMPI_BUILD_MPI_PROFILING OMPI_GENERATE_F77_BINDINGS (MPI_KEYVAL_CREATE, mpi_keyval_create, mpi_keyval_create_, @@ -58,18 +61,16 @@ OMPI_GENERATE_F77_BINDINGS (MPI_KEYVAL_CREATE, ompi_keyval_create_f, (ompi_mpi1_fortran_copy_attr_function* copy_fn, ompi_mpi1_fortran_delete_attr_function* delete_fn, MPI_Fint *keyval, MPI_Fint *extra_state, MPI_Fint *ierr), (copy_fn, delete_fn, keyval, extra_state, ierr) ) +#else +#define ompi_keyval_create_f pompi_keyval_create_f #endif - - -#if OMPI_PROFILE_LAYER && ! OPAL_HAVE_WEAK_SYMBOLS -#include "ompi/mpi/fortran/mpif-h/profile/defines.h" #endif static const char FUNC_NAME[] = "MPI_keyval_create_f"; void ompi_keyval_create_f(ompi_mpi1_fortran_copy_attr_function* copy_attr_fn, ompi_mpi1_fortran_delete_attr_function* delete_attr_fn, - MPI_Fint *keyval, MPI_Fint *extra_state, + MPI_Fint *keyval, MPI_Fint *extra_state, MPI_Fint *ierr) { int ret, c_ierr; diff --git a/ompi/mpi/fortran/mpif-h/keyval_free_f.c b/ompi/mpi/fortran/mpif-h/keyval_free_f.c index cdfb3153e3d..e836b689ebb 100644 --- a/ompi/mpi/fortran/mpif-h/keyval_free_f.c +++ b/ompi/mpi/fortran/mpif-h/keyval_free_f.c @@ -5,15 +5,17 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2011-2012 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -21,7 +23,8 @@ #include "ompi/mpi/fortran/mpif-h/bindings.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILE_LAYER +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak PMPI_KEYVAL_FREE = ompi_keyval_free_f #pragma weak pmpi_keyval_free = ompi_keyval_free_f #pragma weak pmpi_keyval_free_ = ompi_keyval_free_f @@ -29,7 +32,7 @@ #pragma weak PMPI_Keyval_free_f = ompi_keyval_free_f #pragma weak PMPI_Keyval_free_f08 = ompi_keyval_free_f -#elif OMPI_PROFILE_LAYER +#else OMPI_GENERATE_F77_BINDINGS (PMPI_KEYVAL_FREE, pmpi_keyval_free, pmpi_keyval_free_, @@ -38,6 +41,7 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_KEYVAL_FREE, (MPI_Fint *keyval, MPI_Fint *ierr), (keyval, ierr) ) #endif +#endif #if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_KEYVAL_FREE = ompi_keyval_free_f @@ -47,9 +51,8 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_KEYVAL_FREE, #pragma weak MPI_Keyval_free_f = ompi_keyval_free_f #pragma weak MPI_Keyval_free_f08 = ompi_keyval_free_f -#endif - -#if ! OPAL_HAVE_WEAK_SYMBOLS && ! OMPI_PROFILE_LAYER +#else +#if ! OMPI_BUILD_MPI_PROFILING OMPI_GENERATE_F77_BINDINGS (MPI_KEYVAL_FREE, mpi_keyval_free, mpi_keyval_free_, @@ -57,13 +60,12 @@ OMPI_GENERATE_F77_BINDINGS (MPI_KEYVAL_FREE, ompi_keyval_free_f, (MPI_Fint *keyval, MPI_Fint *ierr), (keyval, ierr) ) +#else +#define ompi_keyval_free_f pompi_keyval_free_f #endif - - -#if OMPI_PROFILE_LAYER && ! OPAL_HAVE_WEAK_SYMBOLS -#include "ompi/mpi/fortran/mpif-h/profile/defines.h" #endif + void ompi_keyval_free_f(MPI_Fint *keyval, MPI_Fint *ierr) { int c_ierr; @@ -71,7 +73,7 @@ void ompi_keyval_free_f(MPI_Fint *keyval, MPI_Fint *ierr) OMPI_SINGLE_FINT_2_INT(keyval); - c_ierr = MPI_Keyval_free(OMPI_SINGLE_NAME_CONVERT(keyval)); + c_ierr = PMPI_Keyval_free(OMPI_SINGLE_NAME_CONVERT(keyval)); if (NULL != ierr) *ierr = OMPI_INT_2_FINT(c_ierr); if (MPI_SUCCESS == c_ierr) { diff --git a/ompi/mpi/fortran/mpif-h/lookup_name_f.c b/ompi/mpi/fortran/mpif-h/lookup_name_f.c index 345d3eeb5fb..766361e809f 100644 --- a/ompi/mpi/fortran/mpif-h/lookup_name_f.c +++ b/ompi/mpi/fortran/mpif-h/lookup_name_f.c @@ -5,7 +5,7 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. @@ -13,9 +13,9 @@ * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -24,7 +24,8 @@ #include "ompi/mpi/fortran/mpif-h/bindings.h" #include "ompi/mpi/fortran/base/strings.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILE_LAYER +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak PMPI_LOOKUP_NAME = ompi_lookup_name_f #pragma weak pmpi_lookup_name = ompi_lookup_name_f #pragma weak pmpi_lookup_name_ = ompi_lookup_name_f @@ -32,7 +33,7 @@ #pragma weak PMPI_Lookup_name_f = ompi_lookup_name_f #pragma weak PMPI_Lookup_name_f08 = ompi_lookup_name_f -#elif OMPI_PROFILE_LAYER +#else OMPI_GENERATE_F77_BINDINGS (PMPI_LOOKUP_NAME, pmpi_lookup_name, pmpi_lookup_name_, @@ -41,6 +42,7 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_LOOKUP_NAME, (char *service_name, MPI_Fint *info, char *port_name, MPI_Fint *ierr, int service_name_len, int port_name_len), (service_name, info, port_name, ierr, service_name_len, port_name_len) ) #endif +#endif #if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_LOOKUP_NAME = ompi_lookup_name_f @@ -50,9 +52,8 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_LOOKUP_NAME, #pragma weak MPI_Lookup_name_f = ompi_lookup_name_f #pragma weak MPI_Lookup_name_f08 = ompi_lookup_name_f -#endif - -#if ! OPAL_HAVE_WEAK_SYMBOLS && ! OMPI_PROFILE_LAYER +#else +#if ! OMPI_BUILD_MPI_PROFILING OMPI_GENERATE_F77_BINDINGS (MPI_LOOKUP_NAME, mpi_lookup_name, mpi_lookup_name_, @@ -60,13 +61,12 @@ OMPI_GENERATE_F77_BINDINGS (MPI_LOOKUP_NAME, ompi_lookup_name_f, (char *service_name, MPI_Fint *info, char *port_name, MPI_Fint *ierr, int service_name_len, int port_name_len), (service_name, info, port_name, ierr, service_name_len, port_name_len) ) +#else +#define ompi_lookup_name_f pompi_lookup_name_f #endif - - -#if OMPI_PROFILE_LAYER && ! OPAL_HAVE_WEAK_SYMBOLS -#include "ompi/mpi/fortran/mpif-h/profile/defines.h" #endif + void ompi_lookup_name_f(char *service_name, MPI_Fint *info, char *port_name, MPI_Fint *ierr, int service_name_len, int port_name_len) { @@ -75,7 +75,7 @@ void ompi_lookup_name_f(char *service_name, MPI_Fint *info, char *c_service_name; char *c_port_name; - c_info = MPI_Info_f2c(*info); + c_info = PMPI_Info_f2c(*info); ompi_fortran_string_f2c(service_name, service_name_len, &c_service_name); c_port_name = (char *) malloc (port_name_len+1); @@ -85,12 +85,12 @@ void ompi_lookup_name_f(char *service_name, MPI_Fint *info, return; } - c_ierr = MPI_Lookup_name(c_service_name, c_info, c_port_name); + c_ierr = PMPI_Lookup_name(c_service_name, c_info, c_port_name); if (NULL != ierr) *ierr = OMPI_INT_2_FINT(c_ierr); if ( MPI_SUCCESS == c_ierr) { ompi_fortran_string_c2f(c_port_name, port_name, port_name_len); - } + } free (c_service_name); free (c_port_name); } diff --git a/ompi/mpi/fortran/mpif-h/mprobe_f.c b/ompi/mpi/fortran/mpif-h/mprobe_f.c index 660869453b6..db39bce941c 100644 --- a/ompi/mpi/fortran/mpif-h/mprobe_f.c +++ b/ompi/mpi/fortran/mpif-h/mprobe_f.c @@ -5,17 +5,20 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2011 Sandia National Laboratories. All rights reserved. * Copyright (c) 2012 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2012 Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. + * Copyright (c) 2015 FUJITSU LIMITED. All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -26,7 +29,8 @@ #include "ompi/mpi/fortran/base/constants.h" #include "ompi/communicator/communicator.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILE_LAYER +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak PMPI_MPROBE = ompi_mprobe_f #pragma weak pmpi_mprobe = ompi_mprobe_f #pragma weak pmpi_mprobe_ = ompi_mprobe_f @@ -34,60 +38,62 @@ #pragma weak PMPI_Mprobe_f = ompi_mprobe_f #pragma weak PMPI_Mprobe_f08 = ompi_mprobe_f -#elif OMPI_PROFILE_LAYER +#else OMPI_GENERATE_F77_BINDINGS (PMPI_MPROBE, pmpi_mprobe, pmpi_mprobe_, pmpi_mprobe__, pompi_mprobe_f, - (MPI_Fint *source, MPI_Fint *tag, MPI_Fint *comm, MPI_Fint *message, + (MPI_Fint *source, MPI_Fint *tag, MPI_Fint *comm, MPI_Fint *message, MPI_Fint *status, MPI_Fint *ierr), (source, tag, comm, message, status, ierr) ) #endif +#endif #if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_MPROBE = ompi_mprobe_f #pragma weak mpi_mprobe = ompi_mprobe_f #pragma weak mpi_mprobe_ = ompi_mprobe_f #pragma weak mpi_mprobe__ = ompi_mprobe_f -#endif -#if ! OPAL_HAVE_WEAK_SYMBOLS && ! OMPI_PROFILE_LAYER +#pragma weak MPI_Mprobe_f = ompi_mprobe_f +#pragma weak MPI_Mprobe_f08 = ompi_mprobe_f +#else +#if ! OMPI_BUILD_MPI_PROFILING OMPI_GENERATE_F77_BINDINGS (MPI_MPROBE, mpi_mprobe, mpi_mprobe_, mpi_mprobe__, ompi_mprobe_f, - (MPI_Fint *source, MPI_Fint *tag, MPI_Fint *comm, MPI_Fint *message, + (MPI_Fint *source, MPI_Fint *tag, MPI_Fint *comm, MPI_Fint *message, MPI_Fint *status, MPI_Fint *ierr), (source, tag, comm, message, status, ierr) ) +#else +#define ompi_mprobe_f pompi_mprobe_f #endif - - -#if OMPI_PROFILE_LAYER && ! OPAL_HAVE_WEAK_SYMBOLS -#include "ompi/mpi/fortran/mpif-h/profile/defines.h" #endif -void ompi_mprobe_f(MPI_Fint *source, MPI_Fint *tag, MPI_Fint *comm, + +void ompi_mprobe_f(MPI_Fint *source, MPI_Fint *tag, MPI_Fint *comm, MPI_Fint *message, MPI_Fint *status, MPI_Fint *ierr) -{ +{ MPI_Comm c_comm; MPI_Message c_message; OMPI_FORTRAN_STATUS_DECLARATION(c_status,c_status2) int c_ierr; - c_comm = MPI_Comm_f2c (*comm); + c_comm = PMPI_Comm_f2c (*comm); OMPI_FORTRAN_STATUS_SET_POINTER(c_status,c_status2,status) - c_ierr = OMPI_INT_2_FINT(MPI_Mprobe(OMPI_FINT_2_INT(*source), - OMPI_FINT_2_INT(*tag), - c_comm, &c_message, - c_status)); + c_ierr = OMPI_INT_2_FINT(PMPI_Mprobe(OMPI_FINT_2_INT(*source), + OMPI_FINT_2_INT(*tag), + c_comm, &c_message, + c_status)); if (NULL != ierr) *ierr = OMPI_INT_2_FINT(c_ierr); if (MPI_SUCCESS == c_ierr) { OMPI_FORTRAN_STATUS_RETURN(c_status,c_status2,status,c_ierr) - *message = MPI_Message_c2f(c_message); + *message = PMPI_Message_c2f(c_message); } } diff --git a/ompi/mpi/fortran/mpif-h/mrecv_f.c b/ompi/mpi/fortran/mpif-h/mrecv_f.c index aec45283f22..33a122510cd 100644 --- a/ompi/mpi/fortran/mpif-h/mrecv_f.c +++ b/ompi/mpi/fortran/mpif-h/mrecv_f.c @@ -5,16 +5,19 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2012 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2012 Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. + * Copyright (c) 2015 FUJITSU LIMITED. All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -25,12 +28,16 @@ #include "ompi/mpi/fortran/base/constants.h" #include "ompi/communicator/communicator.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILE_LAYER +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak PMPI_MRECV = ompi_mrecv_f #pragma weak pmpi_mrecv = ompi_mrecv_f #pragma weak pmpi_mrecv_ = ompi_mrecv_f #pragma weak pmpi_mrecv__ = ompi_mrecv_f -#elif OMPI_PROFILE_LAYER + +#pragma weak PMPI_Mrecv_f = ompi_mrecv_f +#pragma weak PMPI_Mrecv_f08 = ompi_mrecv_f +#else OMPI_GENERATE_F77_BINDINGS (PMPI_MRECV, pmpi_mrecv, pmpi_mrecv_, @@ -40,15 +47,18 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_MRECV, MPI_Fint *status, MPI_Fint *ierr), (buf, count, datatype, message, status, ierr) ) #endif +#endif #if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_MRECV = ompi_mrecv_f #pragma weak mpi_mrecv = ompi_mrecv_f #pragma weak mpi_mrecv_ = ompi_mrecv_f #pragma weak mpi_mrecv__ = ompi_mrecv_f -#endif -#if ! OPAL_HAVE_WEAK_SYMBOLS && ! OMPI_PROFILE_LAYER +#pragma weak MPI_Mrecv_f = ompi_mrecv_f +#pragma weak MPI_Mrecv_f08 = ompi_mrecv_f +#else +#if ! OMPI_BUILD_MPI_PROFILING OMPI_GENERATE_F77_BINDINGS (MPI_MRECV, mpi_mrecv, mpi_mrecv_, @@ -57,32 +67,31 @@ OMPI_GENERATE_F77_BINDINGS (MPI_MRECV, (char *buf, MPI_Fint *count, MPI_Fint *datatype, MPI_Fint *message, MPI_Fint *status, MPI_Fint *ierr), (buf, count, datatype, message, status, ierr) ) +#else +#define ompi_mrecv_f pompi_mrecv_f #endif - - -#if OMPI_PROFILE_LAYER && ! OPAL_HAVE_WEAK_SYMBOLS -#include "ompi/mpi/fortran/mpif-h/profile/defines.h" #endif -void ompi_mrecv_f(char *buf, MPI_Fint *count, MPI_Fint *datatype, + +void ompi_mrecv_f(char *buf, MPI_Fint *count, MPI_Fint *datatype, MPI_Fint *message, MPI_Fint *status, MPI_Fint *ierr) { int c_ierr; OMPI_FORTRAN_STATUS_DECLARATION(c_status,c_status2) - MPI_Message c_message = MPI_Message_f2c(*message); - MPI_Datatype c_type = MPI_Type_f2c(*datatype); + MPI_Message c_message = PMPI_Message_f2c(*message); + MPI_Datatype c_type = PMPI_Type_f2c(*datatype); OMPI_FORTRAN_STATUS_SET_POINTER(c_status,c_status2,status) /* Call the C function */ - c_ierr = OMPI_INT_2_FINT(MPI_Mrecv(OMPI_F2C_BOTTOM(buf), OMPI_FINT_2_INT(*count), - c_type, &c_message, - c_status)); + c_ierr = OMPI_INT_2_FINT(PMPI_Mrecv(OMPI_F2C_BOTTOM(buf), OMPI_FINT_2_INT(*count), + c_type, &c_message, + c_status)); if (NULL != ierr) *ierr = OMPI_INT_2_FINT(c_ierr); if (MPI_SUCCESS == c_ierr) { OMPI_FORTRAN_STATUS_RETURN(c_status,c_status2,status,c_ierr) /* message is an INOUT, and may be updated by the recv */ - *message = MPI_Message_c2f(c_message); + *message = PMPI_Message_c2f(c_message); } } diff --git a/ompi/mpi/fortran/mpif-h/neighbor_allgather_f.c b/ompi/mpi/fortran/mpif-h/neighbor_allgather_f.c index 2369eb07845..e0ffdf083a1 100644 --- a/ompi/mpi/fortran/mpif-h/neighbor_allgather_f.c +++ b/ompi/mpi/fortran/mpif-h/neighbor_allgather_f.c @@ -13,6 +13,8 @@ * Copyright (c) 2011-2012 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2013 Los Alamos National Security, LLC. All rights * reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -25,7 +27,8 @@ #include "ompi/mpi/fortran/mpif-h/bindings.h" #include "ompi/mpi/fortran/base/constants.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILE_LAYER +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak PMPI_NEIGHBOR_ALLGATHER = ompi_neighbor_allgather_f #pragma weak pmpi_neighbor_allgather = ompi_neighbor_allgather_f #pragma weak pmpi_neighbor_allgather_ = ompi_neighbor_allgather_f @@ -33,7 +36,7 @@ #pragma weak PMPI_Neighbor_allgather_f = ompi_neighbor_allgather_f #pragma weak PMPI_Neighbor_allgather_f08 = ompi_neighbor_allgather_f -#elif OMPI_PROFILE_LAYER +#else OMPI_GENERATE_F77_BINDINGS (PMPI_NEIGHBOR_ALLGATHER, pmpi_neighbor_allgather, pmpi_neighbor_allgather_, @@ -42,6 +45,7 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_NEIGHBOR_ALLGATHER, (char *sendbuf, MPI_Fint *sendcount, MPI_Fint *sendtype, char *recvbuf, MPI_Fint *recvcount, MPI_Fint *recvtype, MPI_Fint *comm, MPI_Fint *ierr), (sendbuf, sendcount, sendtype, recvbuf, recvcount, recvtype, comm, ierr) ) #endif +#endif #if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_NEIGHBOR_ALLGATHER = ompi_neighbor_allgather_f @@ -51,9 +55,8 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_NEIGHBOR_ALLGATHER, #pragma weak MPI_Neighbor_allgather_f = ompi_neighbor_allgather_f #pragma weak MPI_Neighbor_allgather_f08 = ompi_neighbor_allgather_f -#endif - -#if ! OPAL_HAVE_WEAK_SYMBOLS && ! OMPI_PROFILE_LAYER +#else +#if ! OMPI_BUILD_MPI_PROFILING OMPI_GENERATE_F77_BINDINGS (MPI_NEIGHBOR_ALLGATHER, mpi_neighbor_allgather, mpi_neighbor_allgather_, @@ -61,13 +64,12 @@ OMPI_GENERATE_F77_BINDINGS (MPI_NEIGHBOR_ALLGATHER, ompi_neighbor_allgather_f, (char *sendbuf, MPI_Fint *sendcount, MPI_Fint *sendtype, char *recvbuf, MPI_Fint *recvcount, MPI_Fint *recvtype, MPI_Fint *comm, MPI_Fint *ierr), (sendbuf, sendcount, sendtype, recvbuf, recvcount, recvtype, comm, ierr) ) +#else +#define ompi_neighbor_allgather_f pompi_neighbor_allgather_f #endif - - -#if OMPI_PROFILE_LAYER && ! OPAL_HAVE_WEAK_SYMBOLS -#include "ompi/mpi/fortran/mpif-h/profile/defines.h" #endif + void ompi_neighbor_allgather_f(char *sendbuf, MPI_Fint *sendcount, MPI_Fint *sendtype, char *recvbuf, MPI_Fint *recvcount, MPI_Fint *recvtype, MPI_Fint *comm, MPI_Fint *ierr) @@ -76,20 +78,20 @@ void ompi_neighbor_allgather_f(char *sendbuf, MPI_Fint *sendcount, MPI_Fint *sen MPI_Comm c_comm; MPI_Datatype c_sendtype, c_recvtype; - c_comm = MPI_Comm_f2c(*comm); - c_sendtype = MPI_Type_f2c(*sendtype); - c_recvtype = MPI_Type_f2c(*recvtype); + c_comm = PMPI_Comm_f2c(*comm); + c_sendtype = PMPI_Type_f2c(*sendtype); + c_recvtype = PMPI_Type_f2c(*recvtype); sendbuf = (char *) OMPI_F2C_IN_PLACE(sendbuf); sendbuf = (char *) OMPI_F2C_BOTTOM(sendbuf); recvbuf = (char *) OMPI_F2C_BOTTOM(recvbuf); - ierr_c = MPI_Neighbor_allgather(sendbuf, - OMPI_FINT_2_INT(*sendcount), - c_sendtype, - recvbuf, - OMPI_FINT_2_INT(*recvcount), - c_recvtype, c_comm); + ierr_c = PMPI_Neighbor_allgather(sendbuf, + OMPI_FINT_2_INT(*sendcount), + c_sendtype, + recvbuf, + OMPI_FINT_2_INT(*recvcount), + c_recvtype, c_comm); if (NULL != ierr) *ierr = OMPI_INT_2_FINT(ierr_c); } diff --git a/ompi/mpi/fortran/mpif-h/neighbor_allgatherv_f.c b/ompi/mpi/fortran/mpif-h/neighbor_allgatherv_f.c index c6587ac734a..0008ca3185c 100644 --- a/ompi/mpi/fortran/mpif-h/neighbor_allgatherv_f.c +++ b/ompi/mpi/fortran/mpif-h/neighbor_allgatherv_f.c @@ -13,6 +13,8 @@ * Copyright (c) 2011-2012 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2013 Los Alamos National Security, LLC. All rights * reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -25,7 +27,8 @@ #include "ompi/mpi/fortran/mpif-h/bindings.h" #include "ompi/mpi/fortran/base/constants.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILE_LAYER +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak PMPI_NEIGHBOR_ALLGATHERV = ompi_neighbor_allgatherv_f #pragma weak pmpi_neighbor_allgatherv = ompi_neighbor_allgatherv_f #pragma weak pmpi_neighbor_allgatherv_ = ompi_neighbor_allgatherv_f @@ -33,7 +36,7 @@ #pragma weak PMPI_Neighbor_allgatherv_f = ompi_neighbor_allgatherv_f #pragma weak PMPI_Neighbor_allgatherv_f08 = ompi_neighbor_allgatherv_f -#elif OMPI_PROFILE_LAYER +#else OMPI_GENERATE_F77_BINDINGS (PMPI_NEIGHBOR_ALLGATHERV, pmpi_neighbor_allgatherv, pmpi_neighbor_allgatherv_, @@ -42,6 +45,7 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_NEIGHBOR_ALLGATHERV, (char *sendbuf, MPI_Fint *sendcount, MPI_Fint *sendtype, char *recvbuf, MPI_Fint *recvcounts, MPI_Fint *displs, MPI_Fint *recvtype, MPI_Fint *comm, MPI_Fint *ierr), (sendbuf, sendcount, sendtype, recvbuf, recvcounts, displs, recvtype, comm, ierr) ) #endif +#endif #if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_NEIGHBOR_ALLGATHERV = ompi_neighbor_allgatherv_f @@ -51,9 +55,8 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_NEIGHBOR_ALLGATHERV, #pragma weak MPI_Neighbor_allgatherv_f = ompi_neighbor_allgatherv_f #pragma weak MPI_Neighbor_allgatherv_f08 = ompi_neighbor_allgatherv_f -#endif - -#if ! OPAL_HAVE_WEAK_SYMBOLS && ! OMPI_PROFILE_LAYER +#else +#if ! OMPI_BUILD_MPI_PROFILING OMPI_GENERATE_F77_BINDINGS (MPI_NEIGHBOR_ALLGATHERV, mpi_neighbor_allgatherv, mpi_neighbor_allgatherv_, @@ -61,13 +64,12 @@ OMPI_GENERATE_F77_BINDINGS (MPI_NEIGHBOR_ALLGATHERV, ompi_neighbor_allgatherv_f, (char *sendbuf, MPI_Fint *sendcount, MPI_Fint *sendtype, char *recvbuf, MPI_Fint *recvcounts, MPI_Fint *displs, MPI_Fint *recvtype, MPI_Fint *comm, MPI_Fint *ierr), (sendbuf, sendcount, sendtype, recvbuf, recvcounts, displs, recvtype, comm, ierr) ) +#else +#define ompi_neighbor_allgatherv_f pompi_neighbor_allgatherv_f #endif - - -#if OMPI_PROFILE_LAYER && ! OPAL_HAVE_WEAK_SYMBOLS -#include "ompi/mpi/fortran/mpif-h/profile/defines.h" #endif + void ompi_neighbor_allgatherv_f(char *sendbuf, MPI_Fint *sendcount, MPI_Fint *sendtype, char *recvbuf, MPI_Fint *recvcounts, MPI_Fint *displs, MPI_Fint *recvtype, MPI_Fint *comm, MPI_Fint *ierr) @@ -78,11 +80,11 @@ void ompi_neighbor_allgatherv_f(char *sendbuf, MPI_Fint *sendcount, MPI_Fint *se OMPI_ARRAY_NAME_DECL(recvcounts); OMPI_ARRAY_NAME_DECL(displs); - c_comm = MPI_Comm_f2c(*comm); - c_sendtype = MPI_Type_f2c(*sendtype); - c_recvtype = MPI_Type_f2c(*recvtype); + c_comm = PMPI_Comm_f2c(*comm); + c_sendtype = PMPI_Type_f2c(*sendtype); + c_recvtype = PMPI_Type_f2c(*recvtype); - MPI_Comm_size(c_comm, &size); + PMPI_Comm_size(c_comm, &size); OMPI_ARRAY_FINT_2_INT(recvcounts, size); OMPI_ARRAY_FINT_2_INT(displs, size); @@ -90,13 +92,13 @@ void ompi_neighbor_allgatherv_f(char *sendbuf, MPI_Fint *sendcount, MPI_Fint *se sendbuf = (char *) OMPI_F2C_BOTTOM(sendbuf); recvbuf = (char *) OMPI_F2C_BOTTOM(recvbuf); - ierr_c = MPI_Neighbor_allgatherv(sendbuf, - OMPI_FINT_2_INT(*sendcount), - c_sendtype, - recvbuf, - OMPI_ARRAY_NAME_CONVERT(recvcounts), - OMPI_ARRAY_NAME_CONVERT(displs), - c_recvtype, c_comm); + ierr_c = PMPI_Neighbor_allgatherv(sendbuf, + OMPI_FINT_2_INT(*sendcount), + c_sendtype, + recvbuf, + OMPI_ARRAY_NAME_CONVERT(recvcounts), + OMPI_ARRAY_NAME_CONVERT(displs), + c_recvtype, c_comm); if (NULL != ierr) *ierr = OMPI_INT_2_FINT(ierr_c); diff --git a/ompi/mpi/fortran/mpif-h/neighbor_alltoall_f.c b/ompi/mpi/fortran/mpif-h/neighbor_alltoall_f.c index 47aaa1aae75..57e28e2a119 100644 --- a/ompi/mpi/fortran/mpif-h/neighbor_alltoall_f.c +++ b/ompi/mpi/fortran/mpif-h/neighbor_alltoall_f.c @@ -13,6 +13,8 @@ * Copyright (c) 2011-2012 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2013 Los Alamos National Security, LLC. All rights * reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -25,7 +27,8 @@ #include "ompi/mpi/fortran/mpif-h/bindings.h" #include "ompi/mpi/fortran/base/constants.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILE_LAYER +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak PMPI_NEIGHBOR_ALLTOALL = ompi_neighbor_alltoall_f #pragma weak pmpi_neighbor_alltoall = ompi_neighbor_alltoall_f #pragma weak pmpi_neighbor_alltoall_ = ompi_neighbor_alltoall_f @@ -33,7 +36,7 @@ #pragma weak PMPI_Neighbor_alltoall_f = ompi_neighbor_alltoall_f #pragma weak PMPI_Neighbor_alltoall_f08 = ompi_neighbor_alltoall_f -#elif OMPI_PROFILE_LAYER +#else OMPI_GENERATE_F77_BINDINGS (PMPI_NEIGHBOR_ALLTOALL, pmpi_neighbor_alltoall, pmpi_neighbor_alltoall_, @@ -42,6 +45,7 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_NEIGHBOR_ALLTOALL, (char *sendbuf, MPI_Fint *sendcount, MPI_Fint *sendtype, char *recvbuf, MPI_Fint *recvcount, MPI_Fint *recvtype, MPI_Fint *comm, MPI_Fint *ierr), (sendbuf, sendcount, sendtype, recvbuf, recvcount, recvtype, comm, ierr) ) #endif +#endif #if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_NEIGHBOR_ALLTOALL = ompi_neighbor_alltoall_f @@ -51,9 +55,8 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_NEIGHBOR_ALLTOALL, #pragma weak MPI_Neighbor_alltoall_f = ompi_neighbor_alltoall_f #pragma weak MPI_Neighbor_alltoall_f08 = ompi_neighbor_alltoall_f -#endif - -#if ! OPAL_HAVE_WEAK_SYMBOLS && ! OMPI_PROFILE_LAYER +#else +#if ! OMPI_BUILD_MPI_PROFILING OMPI_GENERATE_F77_BINDINGS (MPI_NEIGHBOR_ALLTOALL, mpi_neighbor_alltoall, mpi_neighbor_alltoall_, @@ -61,13 +64,12 @@ OMPI_GENERATE_F77_BINDINGS (MPI_NEIGHBOR_ALLTOALL, ompi_neighbor_alltoall_f, (char *sendbuf, MPI_Fint *sendcount, MPI_Fint *sendtype, char *recvbuf, MPI_Fint *recvcount, MPI_Fint *recvtype, MPI_Fint *comm, MPI_Fint *ierr), (sendbuf, sendcount, sendtype, recvbuf, recvcount, recvtype, comm, ierr) ) +#else +#define ompi_neighbor_alltoall_f pompi_neighbor_alltoall_f #endif - - -#if OMPI_PROFILE_LAYER && ! OPAL_HAVE_WEAK_SYMBOLS -#include "ompi/mpi/fortran/mpif-h/profile/defines.h" #endif + void ompi_neighbor_alltoall_f(char *sendbuf, MPI_Fint *sendcount, MPI_Fint *sendtype, char *recvbuf, MPI_Fint *recvcount, MPI_Fint *recvtype, MPI_Fint *comm, MPI_Fint *ierr) @@ -76,15 +78,15 @@ void ompi_neighbor_alltoall_f(char *sendbuf, MPI_Fint *sendcount, MPI_Fint *send MPI_Comm c_comm; MPI_Datatype c_sendtype, c_recvtype; - c_comm = MPI_Comm_f2c(*comm); - c_sendtype = MPI_Type_f2c(*sendtype); - c_recvtype = MPI_Type_f2c(*recvtype); + c_comm = PMPI_Comm_f2c(*comm); + c_sendtype = PMPI_Type_f2c(*sendtype); + c_recvtype = PMPI_Type_f2c(*recvtype); sendbuf = (char *) OMPI_F2C_IN_PLACE(sendbuf); sendbuf = (char *) OMPI_F2C_BOTTOM(sendbuf); recvbuf = (char *) OMPI_F2C_BOTTOM(recvbuf); - c_ierr = MPI_Neighbor_alltoall(sendbuf, + c_ierr = PMPI_Neighbor_alltoall(sendbuf, OMPI_FINT_2_INT(*sendcount), c_sendtype, recvbuf, diff --git a/ompi/mpi/fortran/mpif-h/neighbor_alltoallv_f.c b/ompi/mpi/fortran/mpif-h/neighbor_alltoallv_f.c index 99ba170e6a3..e9b96425e94 100644 --- a/ompi/mpi/fortran/mpif-h/neighbor_alltoallv_f.c +++ b/ompi/mpi/fortran/mpif-h/neighbor_alltoallv_f.c @@ -13,6 +13,8 @@ * Copyright (c) 2011-2012 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2013 Los Alamos National Security, LLC. All rights * reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -25,7 +27,8 @@ #include "ompi/mpi/fortran/mpif-h/bindings.h" #include "ompi/mpi/fortran/base/constants.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILE_LAYER +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak PMPI_NEIGHBOR_ALLTOALLV = ompi_neighbor_alltoallv_f #pragma weak pmpi_neighbor_alltoallv = ompi_neighbor_alltoallv_f #pragma weak pmpi_neighbor_alltoallv_ = ompi_neighbor_alltoallv_f @@ -33,7 +36,7 @@ #pragma weak PMPI_Neighbor_alltoallv_f = ompi_neighbor_alltoallv_f #pragma weak PMPI_Neighbor_alltoallv_f08 = ompi_neighbor_alltoallv_f -#elif OMPI_PROFILE_LAYER +#else OMPI_GENERATE_F77_BINDINGS (PMPI_NEIGHBOR_ALLTOALLV, pmpi_neighbor_alltoallv, pmpi_neighbor_alltoallv_, @@ -42,6 +45,7 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_NEIGHBOR_ALLTOALLV, (char *sendbuf, MPI_Fint *sendcounts, MPI_Fint *sdispls, MPI_Fint *sendtype, char *recvbuf, MPI_Fint *recvcounts, MPI_Fint *rdispls, MPI_Fint *recvtype, MPI_Fint *comm, MPI_Fint *ierr), (sendbuf, sendcounts, sdispls, sendtype, recvbuf, recvcounts, rdispls, recvtype, comm, ierr) ) #endif +#endif #if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_NEIGHBOR_ALLTOALLV = ompi_neighbor_alltoallv_f @@ -51,9 +55,8 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_NEIGHBOR_ALLTOALLV, #pragma weak MPI_Neighbor_alltoallv_f = ompi_neighbor_alltoallv_f #pragma weak MPI_Neighbor_alltoallv_f08 = ompi_neighbor_alltoallv_f -#endif - -#if ! OPAL_HAVE_WEAK_SYMBOLS && ! OMPI_PROFILE_LAYER +#else +#if ! OMPI_BUILD_MPI_PROFILING OMPI_GENERATE_F77_BINDINGS (MPI_NEIGHBOR_ALLTOALLV, mpi_neighbor_alltoallv, mpi_neighbor_alltoallv_, @@ -61,13 +64,12 @@ OMPI_GENERATE_F77_BINDINGS (MPI_NEIGHBOR_ALLTOALLV, ompi_neighbor_alltoallv_f, (char *sendbuf, MPI_Fint *sendcounts, MPI_Fint *sdispls, MPI_Fint *sendtype, char *recvbuf, MPI_Fint *recvcounts, MPI_Fint *rdispls, MPI_Fint *recvtype, MPI_Fint *comm, MPI_Fint *ierr), (sendbuf, sendcounts, sdispls, sendtype, recvbuf, recvcounts, rdispls, recvtype, comm, ierr) ) +#else +#define ompi_neighbor_alltoallv_f pompi_neighbor_alltoallv_f #endif - - -#if OMPI_PROFILE_LAYER && ! OPAL_HAVE_WEAK_SYMBOLS -#include "ompi/mpi/fortran/mpif-h/profile/defines.h" #endif + void ompi_neighbor_alltoallv_f(char *sendbuf, MPI_Fint *sendcounts, MPI_Fint *sdispls, MPI_Fint *sendtype, char *recvbuf, MPI_Fint *recvcounts, MPI_Fint *rdispls, MPI_Fint *recvtype, @@ -81,11 +83,11 @@ void ompi_neighbor_alltoallv_f(char *sendbuf, MPI_Fint *sendcounts, MPI_Fint *sd OMPI_ARRAY_NAME_DECL(recvcounts); OMPI_ARRAY_NAME_DECL(rdispls); - c_comm = MPI_Comm_f2c(*comm); - c_sendtype = MPI_Type_f2c(*sendtype); - c_recvtype = MPI_Type_f2c(*recvtype); + c_comm = PMPI_Comm_f2c(*comm); + c_sendtype = PMPI_Type_f2c(*sendtype); + c_recvtype = PMPI_Type_f2c(*recvtype); - MPI_Comm_size(c_comm, &size); + PMPI_Comm_size(c_comm, &size); OMPI_ARRAY_FINT_2_INT(sendcounts, size); OMPI_ARRAY_FINT_2_INT(sdispls, size); OMPI_ARRAY_FINT_2_INT(recvcounts, size); @@ -95,7 +97,7 @@ void ompi_neighbor_alltoallv_f(char *sendbuf, MPI_Fint *sendcounts, MPI_Fint *sd sendbuf = (char *) OMPI_F2C_BOTTOM(sendbuf); recvbuf = (char *) OMPI_F2C_BOTTOM(recvbuf); - c_ierr = MPI_Neighbor_alltoallv(sendbuf, + c_ierr = PMPI_Neighbor_alltoallv(sendbuf, OMPI_ARRAY_NAME_CONVERT(sendcounts), OMPI_ARRAY_NAME_CONVERT(sdispls), c_sendtype, diff --git a/ompi/mpi/fortran/mpif-h/neighbor_alltoallw_f.c b/ompi/mpi/fortran/mpif-h/neighbor_alltoallw_f.c index 0aeb91a0ac2..f5a34a36e1c 100644 --- a/ompi/mpi/fortran/mpif-h/neighbor_alltoallw_f.c +++ b/ompi/mpi/fortran/mpif-h/neighbor_alltoallw_f.c @@ -13,6 +13,8 @@ * Copyright (c) 2011-2012 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2013 Los Alamos National Security, LLC. All rights * reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -25,7 +27,8 @@ #include "ompi/mpi/fortran/mpif-h/bindings.h" #include "ompi/mpi/fortran/base/constants.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILE_LAYER +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak PMPI_NEIGHBOR_ALLTOALLW = ompi_neighbor_alltoallw_f #pragma weak pmpi_neighbor_alltoallw = ompi_neighbor_alltoallw_f #pragma weak pmpi_neighbor_alltoallw_ = ompi_neighbor_alltoallw_f @@ -33,7 +36,7 @@ #pragma weak PMPI_Neighbor_alltoallw_f = ompi_neighbor_alltoallw_f #pragma weak PMPI_Neighbor_alltoallw_f08 = ompi_neighbor_alltoallw_f -#elif OMPI_PROFILE_LAYER +#else OMPI_GENERATE_F77_BINDINGS (PMPI_NEIGHBOR_ALLTOALLW, pmpi_neighbor_alltoallw, pmpi_neighbor_alltoallw_, @@ -42,6 +45,7 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_NEIGHBOR_ALLTOALLW, (char *sendbuf, MPI_Fint *sendcounts, MPI_Aint *sdispls, MPI_Fint *sendtypes, char *recvbuf, MPI_Fint *recvcounts, MPI_Aint *rdispls, MPI_Fint *recvtypes, MPI_Fint *comm, MPI_Fint *ierr), (sendbuf, sendcounts, sdispls, sendtypes, recvbuf, recvcounts, rdispls, recvtypes, comm, ierr) ) #endif +#endif #if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_NEIGHBOR_ALLTOALLW = ompi_neighbor_alltoallw_f @@ -51,9 +55,8 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_NEIGHBOR_ALLTOALLW, #pragma weak MPI_Neighbor_alltoallw_f = ompi_neighbor_alltoallw_f #pragma weak MPI_Neighbor_alltoallw_f08 = ompi_neighbor_alltoallw_f -#endif - -#if ! OPAL_HAVE_WEAK_SYMBOLS && ! OMPI_PROFILE_LAYER +#else +#if ! OMPI_BUILD_MPI_PROFILING OMPI_GENERATE_F77_BINDINGS (MPI_NEIGHBOR_ALLTOALLW, mpi_neighbor_alltoallw, mpi_neighbor_alltoallw_, @@ -61,13 +64,12 @@ OMPI_GENERATE_F77_BINDINGS (MPI_NEIGHBOR_ALLTOALLW, ompi_neighbor_alltoallw_f, (char *sendbuf, MPI_Fint *sendcounts, MPI_Aint *sdispls, MPI_Fint *sendtypes, char *recvbuf, MPI_Fint *recvcounts, MPI_Aint *rdispls, MPI_Fint *recvtypes, MPI_Fint *comm, MPI_Fint *ierr), (sendbuf, sendcounts, sdispls, sendtypes, recvbuf, recvcounts, rdispls, recvtypes, comm, ierr) ) +#else +#define ompi_neighbor_alltoallw_f pompi_neighbor_alltoallw_f #endif - - -#if OMPI_PROFILE_LAYER && ! OPAL_HAVE_WEAK_SYMBOLS -#include "ompi/mpi/fortran/mpif-h/profile/defines.h" #endif + void ompi_neighbor_alltoallw_f(char *sendbuf, MPI_Fint *sendcounts, MPI_Aint *sdispls, MPI_Fint *sendtypes, char *recvbuf, MPI_Fint *recvcounts, @@ -80,8 +82,8 @@ void ompi_neighbor_alltoallw_f(char *sendbuf, MPI_Fint *sendcounts, OMPI_ARRAY_NAME_DECL(sendcounts); OMPI_ARRAY_NAME_DECL(recvcounts); - c_comm = MPI_Comm_f2c(*comm); - MPI_Comm_size(c_comm, &size); + c_comm = PMPI_Comm_f2c(*comm); + PMPI_Comm_size(c_comm, &size); c_sendtypes = (MPI_Datatype *) malloc(size * sizeof(MPI_Datatype)); c_recvtypes = (MPI_Datatype *) malloc(size * sizeof(MPI_Datatype)); @@ -90,8 +92,8 @@ void ompi_neighbor_alltoallw_f(char *sendbuf, MPI_Fint *sendcounts, OMPI_ARRAY_FINT_2_INT(recvcounts, size); while (size > 0) { - c_sendtypes[size - 1] = MPI_Type_f2c(sendtypes[size - 1]); - c_recvtypes[size - 1] = MPI_Type_f2c(recvtypes[size - 1]); + c_sendtypes[size - 1] = PMPI_Type_f2c(sendtypes[size - 1]); + c_recvtypes[size - 1] = PMPI_Type_f2c(recvtypes[size - 1]); --size; } @@ -99,7 +101,7 @@ void ompi_neighbor_alltoallw_f(char *sendbuf, MPI_Fint *sendcounts, sendbuf = (char *) OMPI_F2C_BOTTOM(sendbuf); recvbuf = (char *) OMPI_F2C_BOTTOM(recvbuf); - c_ierr = MPI_Neighbor_alltoallw(sendbuf, + c_ierr = PMPI_Neighbor_alltoallw(sendbuf, OMPI_ARRAY_NAME_CONVERT(sendcounts), sdispls, c_sendtypes, diff --git a/ompi/mpi/fortran/mpif-h/op_commutative_f.c b/ompi/mpi/fortran/mpif-h/op_commutative_f.c index 63644c0237c..f4c5b72ee1a 100644 --- a/ompi/mpi/fortran/mpif-h/op_commutative_f.c +++ b/ompi/mpi/fortran/mpif-h/op_commutative_f.c @@ -5,15 +5,17 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2008-2012 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -21,7 +23,8 @@ #include "ompi/mpi/fortran/mpif-h/bindings.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILE_LAYER +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak PMPI_OP_COMMUTATIVE = ompi_op_commutative_f #pragma weak pmpi_op_commutative = ompi_op_commutative_f #pragma weak pmpi_op_commutative_ = ompi_op_commutative_f @@ -29,7 +32,7 @@ #pragma weak PMPI_Op_commutative_f = ompi_op_commutative_f #pragma weak PMPI_Op_commutative_f08 = ompi_op_commutative_f -#elif OMPI_PROFILE_LAYER +#else OMPI_GENERATE_F77_BINDINGS (PMPI_OP_COMMUTATIVE, pmpi_op_commutative, pmpi_op_commutative_, @@ -38,6 +41,7 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_OP_COMMUTATIVE, (MPI_Fint *op, MPI_Fint *commute, MPI_Fint *ierr), (op, commute, ierr) ) #endif +#endif #if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_OP_COMMUTATIVE = ompi_op_commutative_f @@ -47,9 +51,8 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_OP_COMMUTATIVE, #pragma weak MPI_Op_commutative_f = ompi_op_commutative_f #pragma weak MPI_Op_commutative_f08 = ompi_op_commutative_f -#endif - -#if ! OPAL_HAVE_WEAK_SYMBOLS && ! OMPI_PROFILE_LAYER +#else +#if ! OMPI_BUILD_MPI_PROFILING OMPI_GENERATE_F77_BINDINGS (MPI_OP_COMMUTATIVE, mpi_op_commutative, mpi_op_commutative_, @@ -57,22 +60,21 @@ OMPI_GENERATE_F77_BINDINGS (MPI_OP_COMMUTATIVE, ompi_op_commutative_f, (MPI_Fint *op, MPI_Fint *commute, MPI_Fint *ierr), (op, commute, ierr) ) +#else +#define ompi_op_commutative_f pompi_op_commutative_f #endif - - -#if OMPI_PROFILE_LAYER && ! OPAL_HAVE_WEAK_SYMBOLS -#include "ompi/mpi/fortran/mpif-h/profile/defines.h" #endif + void ompi_op_commutative_f(MPI_Fint *op, MPI_Fint *commute, MPI_Fint *ierr) { int c_ierr; MPI_Op c_op; OMPI_SINGLE_NAME_DECL(commute); - c_op = MPI_Op_f2c(*op); + c_op = PMPI_Op_f2c(*op); - c_ierr = MPI_Op_commutative(c_op, OMPI_SINGLE_NAME_CONVERT(commute)); + c_ierr = PMPI_Op_commutative(c_op, OMPI_SINGLE_NAME_CONVERT(commute)); if (NULL != ierr) *ierr = OMPI_INT_2_FINT(c_ierr); if (MPI_SUCCESS == c_ierr) { diff --git a/ompi/mpi/fortran/mpif-h/op_create_f.c b/ompi/mpi/fortran/mpif-h/op_create_f.c index b192426028f..05334a07015 100644 --- a/ompi/mpi/fortran/mpif-h/op_create_f.c +++ b/ompi/mpi/fortran/mpif-h/op_create_f.c @@ -5,15 +5,17 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2008-2012 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -21,7 +23,8 @@ #include "ompi/mpi/fortran/mpif-h/bindings.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILE_LAYER +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak PMPI_OP_CREATE = ompi_op_create_f #pragma weak pmpi_op_create = ompi_op_create_f #pragma weak pmpi_op_create_ = ompi_op_create_f @@ -29,7 +32,7 @@ #pragma weak PMPI_Op_create_f = ompi_op_create_f #pragma weak PMPI_Op_create_f08 = ompi_op_create_f -#elif OMPI_PROFILE_LAYER +#else OMPI_GENERATE_F77_BINDINGS (PMPI_OP_CREATE, pmpi_op_create, pmpi_op_create_, @@ -38,6 +41,7 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_OP_CREATE, (ompi_op_fortran_handler_fn_t* function, ompi_fortran_logical_t *commute, MPI_Fint *op, MPI_Fint *ierr), (function, commute, op, ierr) ) #endif +#endif #if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_OP_CREATE = ompi_op_create_f @@ -47,9 +51,8 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_OP_CREATE, #pragma weak MPI_Op_create_f = ompi_op_create_f #pragma weak MPI_Op_create_f08 = ompi_op_create_f -#endif - -#if ! OPAL_HAVE_WEAK_SYMBOLS && ! OMPI_PROFILE_LAYER +#else +#if ! OMPI_BUILD_MPI_PROFILING OMPI_GENERATE_F77_BINDINGS (MPI_OP_CREATE, mpi_op_create, mpi_op_create_, @@ -57,13 +60,12 @@ OMPI_GENERATE_F77_BINDINGS (MPI_OP_CREATE, ompi_op_create_f, (ompi_op_fortran_handler_fn_t* function, ompi_fortran_logical_t *commute, MPI_Fint *op, MPI_Fint *ierr), (function, commute, op, ierr) ) +#else +#define ompi_op_create_f pompi_op_create_f #endif - - -#if OMPI_PROFILE_LAYER && ! OPAL_HAVE_WEAK_SYMBOLS -#include "ompi/mpi/fortran/mpif-h/profile/defines.h" #endif + void ompi_op_create_f(ompi_op_fortran_handler_fn_t* function, ompi_fortran_logical_t *commute, MPI_Fint *op, MPI_Fint *ierr) { @@ -73,13 +75,13 @@ void ompi_op_create_f(ompi_op_fortran_handler_fn_t* function, ompi_fortran_logic /* See the note in src/mpi/fortran/mpif-h/prototypes_mpi.h about the use of (void*) for function pointers in this function */ - c_ierr = MPI_Op_create((MPI_User_function *) function, + c_ierr = PMPI_Op_create((MPI_User_function *) function, OMPI_LOGICAL_2_INT(*commute), &c_op); if (NULL != ierr) *ierr = OMPI_INT_2_FINT(c_ierr); if (MPI_SUCCESS == c_ierr) { c_op->o_flags |= OMPI_OP_FLAGS_FORTRAN_FUNC; - *op = MPI_Op_c2f(c_op); + *op = PMPI_Op_c2f(c_op); } } diff --git a/ompi/mpi/fortran/mpif-h/op_free_f.c b/ompi/mpi/fortran/mpif-h/op_free_f.c index 27c86a4c2e6..c3efbd3ba06 100644 --- a/ompi/mpi/fortran/mpif-h/op_free_f.c +++ b/ompi/mpi/fortran/mpif-h/op_free_f.c @@ -5,15 +5,17 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2011-2012 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -21,7 +23,8 @@ #include "ompi/mpi/fortran/mpif-h/bindings.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILE_LAYER +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak PMPI_OP_FREE = ompi_op_free_f #pragma weak pmpi_op_free = ompi_op_free_f #pragma weak pmpi_op_free_ = ompi_op_free_f @@ -29,7 +32,7 @@ #pragma weak PMPI_Op_free_f = ompi_op_free_f #pragma weak PMPI_Op_free_f08 = ompi_op_free_f -#elif OMPI_PROFILE_LAYER +#else OMPI_GENERATE_F77_BINDINGS (PMPI_OP_FREE, pmpi_op_free, pmpi_op_free_, @@ -38,6 +41,7 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_OP_FREE, (MPI_Fint *op, MPI_Fint *ierr), (op, ierr) ) #endif +#endif #if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_OP_FREE = ompi_op_free_f @@ -47,9 +51,8 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_OP_FREE, #pragma weak MPI_Op_free_f = ompi_op_free_f #pragma weak MPI_Op_free_f08 = ompi_op_free_f -#endif - -#if ! OPAL_HAVE_WEAK_SYMBOLS && ! OMPI_PROFILE_LAYER +#else +#if ! OMPI_BUILD_MPI_PROFILING OMPI_GENERATE_F77_BINDINGS (MPI_OP_FREE, mpi_op_free, mpi_op_free_, @@ -57,24 +60,23 @@ OMPI_GENERATE_F77_BINDINGS (MPI_OP_FREE, ompi_op_free_f, (MPI_Fint *op, MPI_Fint *ierr), (op, ierr) ) +#else +#define ompi_op_free_f pompi_op_free_f #endif - - -#if OMPI_PROFILE_LAYER && ! OPAL_HAVE_WEAK_SYMBOLS -#include "ompi/mpi/fortran/mpif-h/profile/defines.h" #endif + void ompi_op_free_f(MPI_Fint *op, MPI_Fint *ierr) { int c_ierr; MPI_Op c_op; - c_op = MPI_Op_f2c(*op); + c_op = PMPI_Op_f2c(*op); - c_ierr = MPI_Op_free(&c_op); + c_ierr = PMPI_Op_free(&c_op); if (NULL != ierr) *ierr = OMPI_INT_2_FINT(c_ierr); if (MPI_SUCCESS == c_ierr) { - *op = MPI_Op_c2f(c_op); + *op = PMPI_Op_c2f(c_op); } } diff --git a/ompi/mpi/fortran/mpif-h/open_port_f.c b/ompi/mpi/fortran/mpif-h/open_port_f.c index bf04d7879c2..167bf055506 100644 --- a/ompi/mpi/fortran/mpif-h/open_port_f.c +++ b/ompi/mpi/fortran/mpif-h/open_port_f.c @@ -5,15 +5,17 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2011-2012 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -22,7 +24,8 @@ #include "ompi/mpi/fortran/mpif-h/bindings.h" #include "ompi/mpi/fortran/base/strings.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILE_LAYER +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak PMPI_OPEN_PORT = ompi_open_port_f #pragma weak pmpi_open_port = ompi_open_port_f #pragma weak pmpi_open_port_ = ompi_open_port_f @@ -30,7 +33,7 @@ #pragma weak PMPI_Open_port_f = ompi_open_port_f #pragma weak PMPI_Open_port_f08 = ompi_open_port_f -#elif OMPI_PROFILE_LAYER +#else OMPI_GENERATE_F77_BINDINGS (PMPI_OPEN_PORT, pmpi_open_port, pmpi_open_port_, @@ -39,6 +42,7 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_OPEN_PORT, (MPI_Fint *info, char *port_name, MPI_Fint *ierr, int port_name_len), (info, port_name, ierr, port_name_len) ) #endif +#endif #if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_OPEN_PORT = ompi_open_port_f @@ -48,9 +52,8 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_OPEN_PORT, #pragma weak MPI_Open_port_f = ompi_open_port_f #pragma weak MPI_Open_port_f08 = ompi_open_port_f -#endif - -#if ! OPAL_HAVE_WEAK_SYMBOLS && ! OMPI_PROFILE_LAYER +#else +#if ! OMPI_BUILD_MPI_PROFILING OMPI_GENERATE_F77_BINDINGS (MPI_OPEN_PORT, mpi_open_port, mpi_open_port_, @@ -58,22 +61,21 @@ OMPI_GENERATE_F77_BINDINGS (MPI_OPEN_PORT, ompi_open_port_f, (MPI_Fint *info, char *port_name, MPI_Fint *ierr, int port_name_len), (info, port_name, ierr, port_name_len) ) +#else +#define ompi_open_port_f pompi_open_port_f #endif - - -#if OMPI_PROFILE_LAYER && ! OPAL_HAVE_WEAK_SYMBOLS -#include "ompi/mpi/fortran/mpif-h/profile/defines.h" #endif + void ompi_open_port_f(MPI_Fint *info, char *port_name, MPI_Fint *ierr, int port_name_len) { int c_ierr; MPI_Info c_info; char c_port_name[MPI_MAX_PORT_NAME]; - c_info = MPI_Info_f2c(*info); + c_info = PMPI_Info_f2c(*info); - c_ierr = MPI_Open_port(c_info, c_port_name); + c_ierr = PMPI_Open_port(c_info, c_port_name); if (NULL != ierr) *ierr = OMPI_INT_2_FINT(c_ierr); if ( MPI_SUCCESS == c_ierr ) { diff --git a/ompi/mpi/fortran/mpif-h/pack_external_f.c b/ompi/mpi/fortran/mpif-h/pack_external_f.c index c6ff45fd59c..461211064ef 100644 --- a/ompi/mpi/fortran/mpif-h/pack_external_f.c +++ b/ompi/mpi/fortran/mpif-h/pack_external_f.c @@ -5,15 +5,17 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2011-2012 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -25,7 +27,8 @@ #include "ompi/mpi/fortran/base/constants.h" #include "ompi/mpi/fortran/base/strings.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILE_LAYER +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak PMPI_PACK_EXTERNAL = ompi_pack_external_f #pragma weak pmpi_pack_external = ompi_pack_external_f #pragma weak pmpi_pack_external_ = ompi_pack_external_f @@ -33,14 +36,15 @@ #pragma weak PMPI_Pack_external_f = ompi_pack_external_f #pragma weak PMPI_Pack_external_f08 = ompi_pack_external_f -#elif OMPI_PROFILE_LAYER +#else OMPI_GENERATE_F77_BINDINGS (PMPI_PACK_EXTERNAL, pmpi_pack_external, pmpi_pack_external_, pmpi_pack_external__, pompi_pack_external_f, (char *datarep, char *inbuf, MPI_Fint *incount, MPI_Fint *datatype, char *outbuf, MPI_Aint *outsize, MPI_Aint *position, MPI_Fint *ierr, int datarep_len), - (datarep, inbuf, incount, datatype, outbuf, outsize, position, ierr, datarep_len) ) + (datarep, inbuf, incount, datatype, outbuf, outsize, position, ierr, datarep_len) ) +#endif #endif #if OPAL_HAVE_WEAK_SYMBOLS @@ -51,9 +55,8 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_PACK_EXTERNAL, #pragma weak MPI_Pack_external_f = ompi_pack_external_f #pragma weak MPI_Pack_external_f08 = ompi_pack_external_f -#endif - -#if ! OPAL_HAVE_WEAK_SYMBOLS && ! OMPI_PROFILE_LAYER +#else +#if ! OMPI_BUILD_MPI_PROFILING OMPI_GENERATE_F77_BINDINGS (MPI_PACK_EXTERNAL, mpi_pack_external, mpi_pack_external_, @@ -61,24 +64,23 @@ OMPI_GENERATE_F77_BINDINGS (MPI_PACK_EXTERNAL, ompi_pack_external_f, (char *datarep, char *inbuf, MPI_Fint *incount, MPI_Fint *datatype, char *outbuf, MPI_Aint *outsize, MPI_Aint *position, MPI_Fint *ierr, int datarep_len), (datarep, inbuf, incount, datatype, outbuf, outsize, position, ierr, datarep_len) ) +#else +#define ompi_pack_external_f pompi_pack_external_f #endif - - -#if OMPI_PROFILE_LAYER && ! OPAL_HAVE_WEAK_SYMBOLS -#include "ompi/mpi/fortran/mpif-h/profile/defines.h" #endif + void ompi_pack_external_f(char *datarep, char *inbuf, MPI_Fint *incount, - MPI_Fint *datatype, char *outbuf, + MPI_Fint *datatype, char *outbuf, MPI_Aint *outsize, MPI_Aint *position, MPI_Fint *ierr, int datarep_len) { int ret, c_ierr; char *c_datarep; - MPI_Datatype type = MPI_Type_f2c(*datatype); - + MPI_Datatype type = PMPI_Type_f2c(*datatype); + /* Convert the fortran string */ - + if (OMPI_SUCCESS != (ret = ompi_fortran_string_f2c(datarep, datarep_len, &c_datarep))) { c_ierr = OMPI_ERRHANDLER_INVOKE(MPI_COMM_WORLD, ret, @@ -86,8 +88,8 @@ void ompi_pack_external_f(char *datarep, char *inbuf, MPI_Fint *incount, if (NULL != ierr) *ierr = OMPI_INT_2_FINT(c_ierr); return; } - - c_ierr = MPI_Pack_external(c_datarep, OMPI_F2C_BOTTOM(inbuf), + + c_ierr = PMPI_Pack_external(c_datarep, OMPI_F2C_BOTTOM(inbuf), OMPI_FINT_2_INT(*incount), type, outbuf, *outsize, diff --git a/ompi/mpi/fortran/mpif-h/pack_external_size_f.c b/ompi/mpi/fortran/mpif-h/pack_external_size_f.c index f9428c73b27..8e9913acdaf 100644 --- a/ompi/mpi/fortran/mpif-h/pack_external_size_f.c +++ b/ompi/mpi/fortran/mpif-h/pack_external_size_f.c @@ -5,15 +5,17 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2007-2012 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -25,7 +27,8 @@ #include "ompi/mpi/fortran/base/constants.h" #include "ompi/mpi/fortran/base/strings.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILE_LAYER +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak PMPI_PACK_EXTERNAL_SIZE = ompi_pack_external_size_f #pragma weak pmpi_pack_external_size = ompi_pack_external_size_f #pragma weak pmpi_pack_external_size_ = ompi_pack_external_size_f @@ -33,7 +36,7 @@ #pragma weak PMPI_Pack_external_size_f = ompi_pack_external_size_f #pragma weak PMPI_Pack_external_size_f08 = ompi_pack_external_size_f -#elif OMPI_PROFILE_LAYER +#else OMPI_GENERATE_F77_BINDINGS (PMPI_PACK_EXTERNAL_SIZE, pmpi_pack_external_size, pmpi_pack_external_size_, @@ -42,6 +45,7 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_PACK_EXTERNAL_SIZE, (char *datarep, MPI_Fint *incount, MPI_Fint *datatype, MPI_Aint *size, MPI_Fint *ierr, int datarep_len), (datarep, incount, datatype, size, ierr, datarep_len) ) #endif +#endif #if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_PACK_EXTERNAL_SIZE = ompi_pack_external_size_f @@ -51,9 +55,8 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_PACK_EXTERNAL_SIZE, #pragma weak MPI_Pack_external_size_f = ompi_pack_external_size_f #pragma weak MPI_Pack_external_size_f08 = ompi_pack_external_size_f -#endif - -#if ! OPAL_HAVE_WEAK_SYMBOLS && ! OMPI_PROFILE_LAYER +#else +#if ! OMPI_BUILD_MPI_PROFILING OMPI_GENERATE_F77_BINDINGS (MPI_PACK_EXTERNAL_SIZE, mpi_pack_external_size, mpi_pack_external_size_, @@ -61,23 +64,22 @@ OMPI_GENERATE_F77_BINDINGS (MPI_PACK_EXTERNAL_SIZE, ompi_pack_external_size_f, (char *datarep, MPI_Fint *incount, MPI_Fint *datatype, MPI_Aint *size, MPI_Fint *ierr, int datarep_len), (datarep, incount, datatype, size, ierr, datarep_len) ) +#else +#define ompi_pack_external_size_f pompi_pack_external_size_f #endif - - -#if OMPI_PROFILE_LAYER && ! OPAL_HAVE_WEAK_SYMBOLS -#include "ompi/mpi/fortran/mpif-h/profile/defines.h" #endif + void ompi_pack_external_size_f(char *datarep, MPI_Fint *incount, MPI_Fint *datatype, MPI_Aint *size, MPI_Fint *ierr, int datarep_len) { int ret, c_ierr; char *c_datarep; - MPI_Datatype type = MPI_Type_f2c(*datatype); + MPI_Datatype type = PMPI_Type_f2c(*datatype); /* Convert the fortran string */ - + if (OMPI_SUCCESS != (ret = ompi_fortran_string_f2c(datarep, datarep_len, &c_datarep))) { c_ierr = OMPI_ERRHANDLER_INVOKE(MPI_COMM_WORLD, ret, @@ -86,7 +88,7 @@ void ompi_pack_external_size_f(char *datarep, MPI_Fint *incount, return; } - c_ierr = MPI_Pack_external_size(c_datarep, + c_ierr = PMPI_Pack_external_size(c_datarep, OMPI_FINT_2_INT(*incount), type, size); if (NULL != ierr) *ierr = OMPI_INT_2_FINT(c_ierr); diff --git a/ompi/mpi/fortran/mpif-h/pack_f.c b/ompi/mpi/fortran/mpif-h/pack_f.c index 1c1dbe76fa4..9006c59ad28 100644 --- a/ompi/mpi/fortran/mpif-h/pack_f.c +++ b/ompi/mpi/fortran/mpif-h/pack_f.c @@ -5,15 +5,17 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2011-2012 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -22,7 +24,8 @@ #include "ompi/mpi/fortran/mpif-h/bindings.h" #include "ompi/mpi/fortran/base/constants.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILE_LAYER +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak PMPI_PACK = ompi_pack_f #pragma weak pmpi_pack = ompi_pack_f #pragma weak pmpi_pack_ = ompi_pack_f @@ -30,7 +33,7 @@ #pragma weak PMPI_Pack_f = ompi_pack_f #pragma weak PMPI_Pack_f08 = ompi_pack_f -#elif OMPI_PROFILE_LAYER +#else OMPI_GENERATE_F77_BINDINGS (PMPI_PACK, pmpi_pack, pmpi_pack_, @@ -39,6 +42,7 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_PACK, (char *inbuf, MPI_Fint *incount, MPI_Fint *datatype, char *outbuf, MPI_Fint *outsize, MPI_Fint *position, MPI_Fint *comm, MPI_Fint *ierr), (inbuf, incount, datatype, outbuf, outsize, position, comm, ierr) ) #endif +#endif #if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_PACK = ompi_pack_f @@ -48,9 +52,8 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_PACK, #pragma weak MPI_Pack_f = ompi_pack_f #pragma weak MPI_Pack_f08 = ompi_pack_f -#endif - -#if ! OPAL_HAVE_WEAK_SYMBOLS && ! OMPI_PROFILE_LAYER +#else +#if ! OMPI_BUILD_MPI_PROFILING OMPI_GENERATE_F77_BINDINGS (MPI_PACK, mpi_pack, mpi_pack_, @@ -58,15 +61,14 @@ OMPI_GENERATE_F77_BINDINGS (MPI_PACK, ompi_pack_f, (char *inbuf, MPI_Fint *incount, MPI_Fint *datatype, char *outbuf, MPI_Fint *outsize, MPI_Fint *position, MPI_Fint *comm, MPI_Fint *ierr), (inbuf, incount, datatype, outbuf, outsize, position, comm, ierr) ) +#else +#define ompi_pack_f pompi_pack_f #endif - - -#if OMPI_PROFILE_LAYER && ! OPAL_HAVE_WEAK_SYMBOLS -#include "ompi/mpi/fortran/mpif-h/profile/defines.h" #endif + void ompi_pack_f(char *inbuf, MPI_Fint *incount, MPI_Fint *datatype, - char *outbuf, MPI_Fint *outsize, MPI_Fint *position, + char *outbuf, MPI_Fint *outsize, MPI_Fint *position, MPI_Fint *comm, MPI_Fint *ierr) { int c_ierr; @@ -74,18 +76,18 @@ void ompi_pack_f(char *inbuf, MPI_Fint *incount, MPI_Fint *datatype, MPI_Datatype c_type; OMPI_SINGLE_NAME_DECL(position); - c_comm = MPI_Comm_f2c(*comm); - c_type = MPI_Type_f2c(*datatype); + c_comm = PMPI_Comm_f2c(*comm); + c_type = PMPI_Type_f2c(*datatype); OMPI_SINGLE_FINT_2_INT(position); - - c_ierr = MPI_Pack(OMPI_F2C_BOTTOM(inbuf), OMPI_FINT_2_INT(*incount), + + c_ierr = PMPI_Pack(OMPI_F2C_BOTTOM(inbuf), OMPI_FINT_2_INT(*incount), c_type, outbuf, OMPI_FINT_2_INT(*outsize), OMPI_SINGLE_NAME_CONVERT(position), c_comm); if (NULL != ierr) *ierr = OMPI_INT_2_FINT(c_ierr); - + if (MPI_SUCCESS == c_ierr) { - OMPI_SINGLE_INT_2_FINT(position); + OMPI_SINGLE_INT_2_FINT(position); } } diff --git a/ompi/mpi/fortran/mpif-h/pack_size_f.c b/ompi/mpi/fortran/mpif-h/pack_size_f.c index 578d40a3363..b6376e1fe79 100644 --- a/ompi/mpi/fortran/mpif-h/pack_size_f.c +++ b/ompi/mpi/fortran/mpif-h/pack_size_f.c @@ -5,15 +5,17 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2011-2012 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -21,7 +23,8 @@ #include "ompi/mpi/fortran/mpif-h/bindings.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILE_LAYER +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak PMPI_PACK_SIZE = ompi_pack_size_f #pragma weak pmpi_pack_size = ompi_pack_size_f #pragma weak pmpi_pack_size_ = ompi_pack_size_f @@ -29,7 +32,7 @@ #pragma weak PMPI_Pack_size_f = ompi_pack_size_f #pragma weak PMPI_Pack_size_f08 = ompi_pack_size_f -#elif OMPI_PROFILE_LAYER +#else OMPI_GENERATE_F77_BINDINGS (PMPI_PACK_SIZE, pmpi_pack_size, pmpi_pack_size_, @@ -38,6 +41,7 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_PACK_SIZE, (MPI_Fint *incount, MPI_Fint *datatype, MPI_Fint *comm, MPI_Fint *size, MPI_Fint *ierr), (incount, datatype, comm, size, ierr) ) #endif +#endif #if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_PACK_SIZE = ompi_pack_size_f @@ -47,9 +51,8 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_PACK_SIZE, #pragma weak MPI_Pack_size_f = ompi_pack_size_f #pragma weak MPI_Pack_size_f08 = ompi_pack_size_f -#endif - -#if ! OPAL_HAVE_WEAK_SYMBOLS && ! OMPI_PROFILE_LAYER +#else +#if ! OMPI_BUILD_MPI_PROFILING OMPI_GENERATE_F77_BINDINGS (MPI_PACK_SIZE, mpi_pack_size, mpi_pack_size_, @@ -57,26 +60,25 @@ OMPI_GENERATE_F77_BINDINGS (MPI_PACK_SIZE, ompi_pack_size_f, (MPI_Fint *incount, MPI_Fint *datatype, MPI_Fint *comm, MPI_Fint *size, MPI_Fint *ierr), (incount, datatype, comm, size, ierr) ) +#else +#define ompi_pack_size_f pompi_pack_size_f #endif - - -#if OMPI_PROFILE_LAYER && ! OPAL_HAVE_WEAK_SYMBOLS -#include "ompi/mpi/fortran/mpif-h/profile/defines.h" #endif -void ompi_pack_size_f(MPI_Fint *incount, MPI_Fint *datatype, + +void ompi_pack_size_f(MPI_Fint *incount, MPI_Fint *datatype, MPI_Fint *comm, MPI_Fint *size, MPI_Fint *ierr) { int c_ierr; MPI_Comm c_comm; MPI_Datatype c_type; OMPI_SINGLE_NAME_DECL(size); - - c_comm = MPI_Comm_f2c(*comm); - c_type = MPI_Type_f2c(*datatype); - c_ierr = MPI_Pack_size(OMPI_FINT_2_INT(*incount), - c_type, c_comm, + c_comm = PMPI_Comm_f2c(*comm); + c_type = PMPI_Type_f2c(*datatype); + + c_ierr = PMPI_Pack_size(OMPI_FINT_2_INT(*incount), + c_type, c_comm, OMPI_SINGLE_NAME_CONVERT(size)); if (NULL != ierr) *ierr = OMPI_INT_2_FINT(c_ierr); diff --git a/ompi/mpi/fortran/mpif-h/pcontrol_f.c b/ompi/mpi/fortran/mpif-h/pcontrol_f.c index a0565ebfec3..6150496a2d0 100644 --- a/ompi/mpi/fortran/mpif-h/pcontrol_f.c +++ b/ompi/mpi/fortran/mpif-h/pcontrol_f.c @@ -5,15 +5,17 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2011-2012 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -21,7 +23,8 @@ #include "ompi/mpi/fortran/mpif-h/bindings.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILE_LAYER +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak PMPI_PCONTROL = ompi_pcontrol_f #pragma weak pmpi_pcontrol = ompi_pcontrol_f #pragma weak pmpi_pcontrol_ = ompi_pcontrol_f @@ -29,7 +32,7 @@ #pragma weak PMPI_Pcontrol_f = ompi_pcontrol_f #pragma weak PMPI_Pcontrol_f08 = ompi_pcontrol_f -#elif OMPI_PROFILE_LAYER +#else OMPI_GENERATE_F77_BINDINGS (PMPI_PCONTROL, pmpi_pcontrol, pmpi_pcontrol_, @@ -38,6 +41,7 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_PCONTROL, (MPI_Fint *level), (level) ) #endif +#endif #if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_PCONTROL = ompi_pcontrol_f @@ -47,9 +51,8 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_PCONTROL, #pragma weak MPI_Pcontrol_f = ompi_pcontrol_f #pragma weak MPI_Pcontrol_f08 = ompi_pcontrol_f -#endif - -#if ! OPAL_HAVE_WEAK_SYMBOLS && ! OMPI_PROFILE_LAYER +#else +#if ! OMPI_BUILD_MPI_PROFILING OMPI_GENERATE_F77_BINDINGS (MPI_PCONTROL, mpi_pcontrol, mpi_pcontrol_, @@ -57,14 +60,13 @@ OMPI_GENERATE_F77_BINDINGS (MPI_PCONTROL, ompi_pcontrol_f, (MPI_Fint *level), (level) ) +#else +#define ompi_pcontrol_f pompi_pcontrol_f #endif - - -#if OMPI_PROFILE_LAYER && ! OPAL_HAVE_WEAK_SYMBOLS -#include "ompi/mpi/fortran/mpif-h/profile/defines.h" #endif + void ompi_pcontrol_f(MPI_Fint *level) { - MPI_Pcontrol(OMPI_FINT_2_INT(*level)); + PMPI_Pcontrol(OMPI_FINT_2_INT(*level)); } diff --git a/ompi/mpi/fortran/mpif-h/probe_f.c b/ompi/mpi/fortran/mpif-h/probe_f.c index 783b397bf84..76774cb1a68 100644 --- a/ompi/mpi/fortran/mpif-h/probe_f.c +++ b/ompi/mpi/fortran/mpif-h/probe_f.c @@ -5,16 +5,18 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2011-2012 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2012 Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -25,7 +27,8 @@ #include "ompi/mpi/fortran/base/constants.h" #include "ompi/communicator/communicator.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILE_LAYER +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak PMPI_PROBE = ompi_probe_f #pragma weak pmpi_probe = ompi_probe_f #pragma weak pmpi_probe_ = ompi_probe_f @@ -33,7 +36,7 @@ #pragma weak PMPI_Probe_f = ompi_probe_f #pragma weak PMPI_Probe_f08 = ompi_probe_f -#elif OMPI_PROFILE_LAYER +#else OMPI_GENERATE_F77_BINDINGS (PMPI_PROBE, pmpi_probe, pmpi_probe_, @@ -42,6 +45,7 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_PROBE, (MPI_Fint *source, MPI_Fint *tag, MPI_Fint *comm, MPI_Fint *status, MPI_Fint *ierr), (source, tag, comm, status, ierr) ) #endif +#endif #if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_PROBE = ompi_probe_f @@ -51,9 +55,8 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_PROBE, #pragma weak MPI_Probe_f = ompi_probe_f #pragma weak MPI_Probe_f08 = ompi_probe_f -#endif - -#if ! OPAL_HAVE_WEAK_SYMBOLS && ! OMPI_PROFILE_LAYER +#else +#if ! OMPI_BUILD_MPI_PROFILING OMPI_GENERATE_F77_BINDINGS (MPI_PROBE, mpi_probe, mpi_probe_, @@ -61,24 +64,23 @@ OMPI_GENERATE_F77_BINDINGS (MPI_PROBE, ompi_probe_f, (MPI_Fint *source, MPI_Fint *tag, MPI_Fint *comm, MPI_Fint *status, MPI_Fint *ierr), (source, tag, comm, status, ierr) ) +#else +#define ompi_probe_f pompi_probe_f #endif - - -#if OMPI_PROFILE_LAYER && ! OPAL_HAVE_WEAK_SYMBOLS -#include "ompi/mpi/fortran/mpif-h/profile/defines.h" #endif + void ompi_probe_f(MPI_Fint *source, MPI_Fint *tag, MPI_Fint *comm, MPI_Fint *status, MPI_Fint *ierr) -{ +{ int c_ierr; MPI_Comm c_comm; OMPI_FORTRAN_STATUS_DECLARATION(c_status,c_status2) - c_comm = MPI_Comm_f2c (*comm); + c_comm = PMPI_Comm_f2c (*comm); OMPI_FORTRAN_STATUS_SET_POINTER(c_status,c_status2,status) - c_ierr = MPI_Probe(OMPI_FINT_2_INT(*source), + c_ierr = PMPI_Probe(OMPI_FINT_2_INT(*source), OMPI_FINT_2_INT(*tag), c_comm, c_status); if (NULL != ierr) *ierr = OMPI_INT_2_FINT(c_ierr); diff --git a/ompi/mpi/fortran/mpif-h/profile/Makefile.am b/ompi/mpi/fortran/mpif-h/profile/Makefile.am index 39b5a332328..bfc22b2286f 100644 --- a/ompi/mpi/fortran/mpif-h/profile/Makefile.am +++ b/ompi/mpi/fortran/mpif-h/profile/Makefile.am @@ -6,11 +6,11 @@ # Copyright (c) 2004-2013 The University of Tennessee and The University # of Tennessee Research Foundation. All rights # reserved. -# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, # University of Stuttgart. All rights reserved. # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. -# Copyright (c) 2009-2014 Cisco Systems, Inc. All rights reserved. +# Copyright (c) 2009-2015 Cisco Systems, Inc. All rights reserved. # Copyright (c) 2011-2013 Inria. All rights reserved. # Copyright (c) 2011-2013 Universite Bordeaux 1 # Copyright (c) 2013-2014 Los Alamos National Security, LLC. All rights @@ -18,22 +18,18 @@ # Copyright (c) 2015 Research Organization for Information Science # and Technology (RIST). All rights reserved. # $COPYRIGHT$ -# +# # Additional copyrights may follow -# +# # $HEADER$ # include $(top_srcdir)/Makefile.ompi-rules -# -# OMPI_PROFILING_DEFINES flag is enabled when we want our MPI_* symbols -# to be replaced by PMPI_*. In other words, this flag decides -# whether "profile/defines.h" is included or not. "profile/defines.h" -# replaces all MPI_* symbols with PMPI_* symbols. In this directory -# we definately need it to be 1. -# -AM_CPPFLAGS = -DOMPI_PROFILE_LAYER=1 -DOMPI_COMPILING_FORTRAN_WRAPPERS=1 +CLEANFILES= +libmpi_mpifh_pmpi_la_LIBADD = + +AM_CPPFLAGS = -DOMPI_BUILD_MPI_PROFILING=1 -DOMPI_COMPILING_FORTRAN_WRAPPERS=1 # # This build needs to go through only if profiling is required. # Further, this build HAS to go through if profiling is required. @@ -42,19 +38,16 @@ AM_CPPFLAGS = -DOMPI_PROFILE_LAYER=1 -DOMPI_COMPILING_FORTRAN_WRAPPERS=1 noinst_LTLIBRARIES = if BUILD_PMPI_FORTRAN_MPIFH_BINDINGS_LAYER noinst_LTLIBRARIES += libmpi_mpifh_pmpi.la -else -noinst_LTLIBRARIES += endif -headers = \ - defines.h - linked_files = \ pabort_f.c \ padd_error_class_f.c \ padd_error_code_f.c \ padd_error_string_f.c \ paddress_f.c \ + paint_add_f.c \ + paint_diff_f.c \ pallgather_f.c \ pallgatherv_f.c \ palloc_mem_f.c \ @@ -374,9 +367,13 @@ linked_files += \ pfile_get_view_f.c \ pfile_iread_at_f.c \ pfile_iread_f.c \ + pfile_iread_at_all_f.c \ + pfile_iread_all_f.c \ pfile_iread_shared_f.c \ pfile_iwrite_at_f.c \ pfile_iwrite_f.c \ + pfile_iwrite_at_all_f.c \ + pfile_iwrite_all_f.c \ pfile_iwrite_shared_f.c \ pfile_open_f.c \ pfile_preallocate_f.c \ @@ -426,7 +423,19 @@ $(linked_files): # psizeof_f.f90 is generated based on some results from configure tests. CLEANFILES += psizeof_f.f90 + +# Build the MPI_SIZEOF code in a separate convenience library (see +# lengthy comment in ompi/mpi/fortran/mpif-h/Makefile.am for an +# explanation why). +if BUILD_FORTRAN_SIZEOF +noinst_LTLIBRARIES += libmpi_mpifh_psizeof.la +# Do not dist this file; it is generated +nodist_libmpi_mpifh_psizeof_la_SOURCES = psizeof_f.f90 +libmpi_mpifh_pmpi_la_LIBADD += libmpi_mpifh_psizeof.la +endif + sizeof_pl=$(top_srcdir)/ompi/mpi/fortran/base/gen-mpi-sizeof.pl + psizeof_f.f90: $(top_builddir)/config.status psizeof_f.f90: $(sizeof_pl) psizeof_f.f90: @@ -441,14 +450,12 @@ psizeof_f.f90: # The library itself # nodist_libmpi_mpifh_pmpi_la_SOURCES = \ - psizeof_f.f90 \ $(linked_files) # Conditionally install the header files if WANT_INSTALL_HEADERS ompidir = $(ompiincludedir)/$(subdir) -ompi_HEADERS = $(headers) endif # These files were created by targets above diff --git a/ompi/mpi/fortran/mpif-h/profile/defines.h b/ompi/mpi/fortran/mpif-h/profile/defines.h deleted file mode 100644 index 579ae0d5696..00000000000 --- a/ompi/mpi/fortran/mpif-h/profile/defines.h +++ /dev/null @@ -1,390 +0,0 @@ -/* - * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2013 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * Copyright (c) 2009-2014 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2011-2013 Inria. All rights reserved. - * Copyright (c) 2011-2013 Universite Bordeaux 1 - * Copyright (c) 2013-2014 Los Alamos National Security, LLC. All rights - * reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ -#ifndef OMPI_F77_PROFILE_DEFINES_H -#define OMPI_F77_PROFILE_DEFINES_H - -#define ompi_abort_f pompi_abort_f -#define ompi_accumulate_f pompi_accumulate_f -#define ompi_add_error_class_f pompi_add_error_class_f -#define ompi_add_error_code_f pompi_add_error_code_f -#define ompi_add_error_string_f pompi_add_error_string_f -#define ompi_address_f pompi_address_f -#define ompi_allgather_f pompi_allgather_f -#define ompi_allgatherv_f pompi_allgatherv_f -#define ompi_alloc_mem_f pompi_alloc_mem_f -#define ompi_allreduce_f pompi_allreduce_f -#define ompi_alltoall_f pompi_alltoall_f -#define ompi_alltoallv_f pompi_alltoallv_f -#define ompi_alltoallw_f pompi_alltoallw_f -#define ompi_attr_delete_f pompi_attr_delete_f -#define ompi_attr_get_f pompi_attr_get_f -#define ompi_attr_put_f pompi_attr_put_f -#define ompi_barrier_f pompi_barrier_f -#define ompi_bcast_f pompi_bcast_f -#define ompi_bsend_f pompi_bsend_f -#define ompi_bsend_init_f pompi_bsend_init_f -#define ompi_buffer_attach_f pompi_buffer_attach_f -#define ompi_buffer_detach_f pompi_buffer_detach_f -#define ompi_cancel_f pompi_cancel_f -#define ompi_cart_coords_f pompi_cart_coords_f -#define ompi_cart_create_f pompi_cart_create_f -#define ompi_cart_get_f pompi_cart_get_f -#define ompi_cart_map_f pompi_cart_map_f -#define ompi_cart_rank_f pompi_cart_rank_f -#define ompi_cart_shift_f pompi_cart_shift_f -#define ompi_cart_sub_f pompi_cart_sub_f -#define ompi_cartdim_get_f pompi_cartdim_get_f -#define ompi_close_port_f pompi_close_port_f -#define ompi_comm_accept_f pompi_comm_accept_f -#define ompi_comm_call_errhandler_f pompi_comm_call_errhandler_f -#define ompi_comm_compare_f pompi_comm_compare_f -#define ompi_comm_connect_f pompi_comm_connect_f -#define ompi_comm_create_errhandler_f pompi_comm_create_errhandler_f -#define ompi_comm_create_keyval_f pompi_comm_create_keyval_f -#define ompi_comm_create_group_f pompi_comm_create_group_f -#define ompi_comm_create_f pompi_comm_create_f -#define ompi_comm_delete_attr_f pompi_comm_delete_attr_f -#define ompi_comm_disconnect_f pompi_comm_disconnect_f -#define ompi_comm_dup_with_info_f pompi_comm_dup_with_info_f -#define ompi_comm_dup_f pompi_comm_dup_f -#define ompi_comm_idup_f pompi_comm_idup_f -#define ompi_comm_free_keyval_f pompi_comm_free_keyval_f -#define ompi_comm_free_f pompi_comm_free_f -#define ompi_comm_get_attr_f pompi_comm_get_attr_f -#define ompi_comm_get_info_f pompi_comm_get_info_f -#define ompi_comm_get_errhandler_f pompi_comm_get_errhandler_f -#define ompi_comm_get_name_f pompi_comm_get_name_f -#define ompi_comm_get_parent_f pompi_comm_get_parent_f -#define ompi_comm_group_f pompi_comm_group_f -#define ompi_comm_join_f pompi_comm_join_f -#define ompi_comm_rank_f pompi_comm_rank_f -#define ompi_comm_remote_group_f pompi_comm_remote_group_f -#define ompi_comm_remote_size_f pompi_comm_remote_size_f -#define ompi_comm_set_attr_f pompi_comm_set_attr_f -#define ompi_comm_set_info_f pompi_comm_set_info_f -#define ompi_comm_set_errhandler_f pompi_comm_set_errhandler_f -#define ompi_comm_set_name_f pompi_comm_set_name_f -#define ompi_comm_size_f pompi_comm_size_f -#define ompi_comm_spawn_f pompi_comm_spawn_f -#define ompi_comm_spawn_multiple_f pompi_comm_spawn_multiple_f -#define ompi_comm_split_f pompi_comm_split_f -#define ompi_comm_split_type_f pompi_comm_split_type_f -#define ompi_comm_test_inter_f pompi_comm_test_inter_f -#define ompi_compare_and_swap_f pompi_compare_and_swap_f -#define ompi_dims_create_f pompi_dims_create_f -#define ompi_dist_graph_create_f pompi_dist_graph_create_f -#define ompi_dist_graph_create_adjacent_f pompi_dist_graph_create_adjacent_f -#define ompi_dist_graph_neighbors_f pompi_dist_graph_neighbors_f -#define ompi_dist_graph_neighbors_count_f pompi_dist_graph_neighbors_count_f -#define ompi_errhandler_create_f pompi_errhandler_create_f -#define ompi_errhandler_free_f pompi_errhandler_free_f -#define ompi_errhandler_get_f pompi_errhandler_get_f -#define ompi_errhandler_set_f pompi_errhandler_set_f -#define ompi_error_class_f pompi_error_class_f -#define ompi_error_string_f pompi_error_string_f -#define ompi_exscan_f pompi_exscan_f -#define ompi_f_sync_reg_f pompi_f_sync_reg_f -#define ompi_fetch_and_op_f pompi_fetch_and_op_f -#define ompi_file_call_errhandler_f pompi_file_call_errhandler_f -#define ompi_file_create_errhandler_f pompi_file_create_errhandler_f -#define ompi_file_set_errhandler_f pompi_file_set_errhandler_f -#define ompi_file_get_errhandler_f pompi_file_get_errhandler_f -#define ompi_file_open_f pompi_file_open_f -#define ompi_file_close_f pompi_file_close_f -#define ompi_file_delete_f pompi_file_delete_f -#define ompi_file_set_size_f pompi_file_set_size_f -#define ompi_file_preallocate_f pompi_file_preallocate_f -#define ompi_file_get_size_f pompi_file_get_size_f -#define ompi_file_get_group_f pompi_file_get_group_f -#define ompi_file_get_amode_f pompi_file_get_amode_f -#define ompi_file_set_info_f pompi_file_set_info_f -#define ompi_file_get_info_f pompi_file_get_info_f -#define ompi_file_set_view_f pompi_file_set_view_f -#define ompi_file_get_view_f pompi_file_get_view_f -#define ompi_file_read_at_f pompi_file_read_at_f -#define ompi_file_read_at_all_f pompi_file_read_at_all_f -#define ompi_file_write_at_f pompi_file_write_at_f -#define ompi_file_write_at_all_f pompi_file_write_at_all_f -#define ompi_file_iread_at_f pompi_file_iread_at_f -#define ompi_file_iwrite_at_f pompi_file_iwrite_at_f -#define ompi_file_read_f pompi_file_read_f -#define ompi_file_read_all_f pompi_file_read_all_f -#define ompi_file_write_f pompi_file_write_f -#define ompi_file_write_all_f pompi_file_write_all_f -#define ompi_file_iread_f pompi_file_iread_f -#define ompi_file_iwrite_f pompi_file_iwrite_f -#define ompi_file_seek_f pompi_file_seek_f -#define ompi_file_get_position_f pompi_file_get_position_f -#define ompi_file_get_byte_offset_f pompi_file_get_byte_offset_f -#define ompi_file_read_shared_f pompi_file_read_shared_f -#define ompi_file_write_shared_f pompi_file_write_shared_f -#define ompi_file_iread_shared_f pompi_file_iread_shared_f -#define ompi_file_iwrite_shared_f pompi_file_iwrite_shared_f -#define ompi_file_read_ordered_f pompi_file_read_ordered_f -#define ompi_file_write_ordered_f pompi_file_write_ordered_f -#define ompi_file_seek_shared_f pompi_file_seek_shared_f -#define ompi_file_get_position_shared_f pompi_file_get_position_shared_f -#define ompi_file_read_at_all_begin_f pompi_file_read_at_all_begin_f -#define ompi_file_read_at_all_end_f pompi_file_read_at_all_end_f -#define ompi_file_write_at_all_begin_f pompi_file_write_at_all_begin_f -#define ompi_file_write_at_all_end_f pompi_file_write_at_all_end_f -#define ompi_file_read_all_begin_f pompi_file_read_all_begin_f -#define ompi_file_read_all_end_f pompi_file_read_all_end_f -#define ompi_file_write_all_begin_f pompi_file_write_all_begin_f -#define ompi_file_write_all_end_f pompi_file_write_all_end_f -#define ompi_file_read_ordered_begin_f pompi_file_read_ordered_begin_f -#define ompi_file_read_ordered_end_f pompi_file_read_ordered_end_f -#define ompi_file_write_ordered_begin_f pompi_file_write_ordered_begin_f -#define ompi_file_write_ordered_end_f pompi_file_write_ordered_end_f -#define ompi_file_get_type_extent_f pompi_file_get_type_extent_f -#define ompi_file_set_atomicity_f pompi_file_set_atomicity_f -#define ompi_file_get_atomicity_f pompi_file_get_atomicity_f -#define ompi_file_sync_f pompi_file_sync_f -#define ompi_finalize_f pompi_finalize_f -#define ompi_finalized_f pompi_finalized_f -#define ompi_free_mem_f pompi_free_mem_f -#define ompi_gather_f pompi_gather_f -#define ompi_gatherv_f pompi_gatherv_f -#define ompi_get_address_f pompi_get_address_f -#define ompi_get_count_f pompi_get_count_f -#define ompi_get_elements_f pompi_get_elements_f -#define ompi_get_elements_x_f pompi_get_elements_x_f -#define ompi_get_f pompi_get_f -#define ompi_get_accumulate_f pompi_get_accumulate_f -#define ompi_get_library_version_f pompi_get_library_version_f -#define ompi_get_processor_name_f pompi_get_processor_name_f -#define ompi_get_version_f pompi_get_version_f -#define ompi_graph_create_f pompi_graph_create_f -#define ompi_graph_get_f pompi_graph_get_f -#define ompi_graph_map_f pompi_graph_map_f -#define ompi_graph_neighbors_count_f pompi_graph_neighbors_count_f -#define ompi_graph_neighbors_f pompi_graph_neighbors_f -#define ompi_graphdims_get_f pompi_graphdims_get_f -#define ompi_grequest_complete_f pompi_grequest_complete_f -#define ompi_grequest_start_f pompi_grequest_start_f -#define ompi_group_compare_f pompi_group_compare_f -#define ompi_group_difference_f pompi_group_difference_f -#define ompi_group_excl_f pompi_group_excl_f -#define ompi_group_free_f pompi_group_free_f -#define ompi_group_incl_f pompi_group_incl_f -#define ompi_group_intersection_f pompi_group_intersection_f -#define ompi_group_range_excl_f pompi_group_range_excl_f -#define ompi_group_range_incl_f pompi_group_range_incl_f -#define ompi_group_rank_f pompi_group_rank_f -#define ompi_group_size_f pompi_group_size_f -#define ompi_group_translate_ranks_f pompi_group_translate_ranks_f -#define ompi_group_union_f pompi_group_union_f -#define ompi_iallgather_f pompi_iallgather_f -#define ompi_iallgatherv_f pompi_iallgatherv_f -#define ompi_iallgather_f pompi_iallgather_f -#define ompi_iallreduce_f pompi_iallreduce_f -#define ompi_ialltoall_f pompi_ialltoall_f -#define ompi_ialltoallv_f pompi_ialltoallv_f -#define ompi_ialltoallw_f pompi_ialltoallw_f -#define ompi_ibarrier_f pompi_ibarrier_f -#define ompi_ibcast_f pompi_ibcast_f -#define ompi_ibsend_f pompi_ibsend_f -#define ompi_iexscan_f pompi_iexscan_f -#define ompi_igather_f pompi_igather_f -#define ompi_igatherv_f pompi_igatherv_f -#define ompi_improbe_f pompi_improbe_f -#define ompi_imrecv_f pompi_imrecv_f -#define ompi_ineighbor_allgather_f pompi_ineighbor_allgather_f -#define ompi_ineighbor_allgatherv_f pompi_ineighbor_allgatherv_f -#define ompi_ineighbor_alltoall_f pompi_ineighbor_alltoall_f -#define ompi_ineighbor_alltoallv_f pompi_ineighbor_alltoallv_f -#define ompi_ineighbor_alltoallw_f pompi_ineighbor_alltoallw_f -#define ompi_ireduce_f pompi_ireduce_f -#define ompi_ireduce_scatter_f pompi_ireduce_scatter_f -#define ompi_ireduce_scatter_block_f pompi_ireduce_scatter_block_f -#define ompi_iscan_f pompi_iscan_f -#define ompi_iscatter_f pompi_iscatter_f -#define ompi_iscatterv_f pompi_iscatterv_f -#define ompi_info_create_f pompi_info_create_f -#define ompi_info_delete_f pompi_info_delete_f -#define ompi_info_dup_f pompi_info_dup_f -#define ompi_info_free_f pompi_info_free_f -#define ompi_info_get_f pompi_info_get_f -#define ompi_info_get_nkeys_f pompi_info_get_nkeys_f -#define ompi_info_get_nthkey_f pompi_info_get_nthkey_f -#define ompi_info_get_valuelen_f pompi_info_get_valuelen_f -#define ompi_info_set_f pompi_info_set_f -#define ompi_init_f pompi_init_f -#define ompi_initialized_f pompi_initialized_f -#define ompi_init_thread_f pompi_init_thread_f -#define ompi_intercomm_create_f pompi_intercomm_create_f -#define ompi_intercomm_merge_f pompi_intercomm_merge_f -#define ompi_iprobe_f pompi_iprobe_f -#define ompi_irecv_f pompi_irecv_f -#define ompi_irsend_f pompi_irsend_f -#define ompi_isend_f pompi_isend_f -#define ompi_issend_f pompi_issend_f -#define ompi_is_thread_main_f pompi_is_thread_main_f -#define ompi_keyval_create_f pompi_keyval_create_f -#define ompi_keyval_free_f pompi_keyval_free_f -#define ompi_lookup_name_f pompi_lookup_name_f -#define ompi_mprobe_f pompi_mprobe_f -#define ompi_mrecv_f pompi_mrecv_f -#define ompi_neighbor_allgather_f pompi_neighbor_allgather_f -#define ompi_neighbor_allgatherv_f pompi_neighbor_allgatherv_f -#define ompi_neighbor_alltoall_f pompi_neighbor_alltoall_f -#define ompi_neighbor_alltoallv_f pompi_neighbor_alltoallv_f -#define ompi_neighbor_alltoallw_f pompi_neighbor_alltoallw_f -#define ompi_op_commutative_f pompi_op_commutative_f -#define ompi_op_create_f pompi_op_create_f -#define ompi_open_port_f pompi_open_port_f -#define ompi_op_free_f pompi_op_free_f -#define ompi_pack_external_f pompi_pack_external_f -#define ompi_pack_external_size_f pompi_pack_external_size_f -#define ompi_pack_f pompi_pack_f -#define ompi_pack_size_f pompi_pack_size_f -#define ompi_pcontrol_f pompi_pcontrol_f -#define ompi_probe_f pompi_probe_f -#define ompi_publish_name_f pompi_publish_name_f -#define ompi_put_f pompi_put_f -#define ompi_query_thread_f pompi_query_thread_f -#define ompi_raccumulate_f pompi_raccumulate_f -#define ompi_recv_init_f pompi_recv_init_f -#define ompi_recv_f pompi_recv_f -#define ompi_reduce_f pompi_reduce_f -#define ompi_reduce_local_f pompi_reduce_local_f -#define ompi_reduce_scatter_f pompi_reduce_scatter_f -#define ompi_reduce_scatter_block_f pompi_reduce_scatter_block_f -#define ompi_register_datarep_f pompi_register_datarep_f -#define ompi_request_free_f pompi_request_free_f -#define ompi_request_get_status_f pompi_request_get_status_f -#define ompi_rget_f pompi_rget_f -#define ompi_rget_accumulate_f pompi_rget_accumulate_f -#define ompi_rput_f pompi_rput_f -#define ompi_rsend_f pompi_rsend_f -#define ompi_rsend_init_f pompi_rsend_init_f -#define ompi_scan_f pompi_scan_f -#define ompi_scatter_f pompi_scatter_f -#define ompi_scatterv_f pompi_scatterv_f -#define ompi_send_init_f pompi_send_init_f -#define ompi_send_f pompi_send_f -#define ompi_sendrecv_f pompi_sendrecv_f -#define ompi_sendrecv_replace_f pompi_sendrecv_replace_f -#define ompi_ssend_init_f pompi_ssend_init_f -#define ompi_ssend_f pompi_ssend_f -#define ompi_start_f pompi_start_f -#define ompi_startall_f pompi_startall_f -#define ompi_status_set_cancelled_f pompi_status_set_cancelled_f -#define ompi_status_set_elements_f pompi_status_set_elements_f -#define ompi_status_set_elements_x_f pompi_status_set_elements_x_f -#define ompi_testall_f pompi_testall_f -#define ompi_testany_f pompi_testany_f -#define ompi_test_f pompi_test_f -#define ompi_test_cancelled_f pompi_test_cancelled_f -#define ompi_testsome_f pompi_testsome_f -#define ompi_topo_test_f pompi_topo_test_f -#define ompi_type_commit_f pompi_type_commit_f -#define ompi_type_contiguous_f pompi_type_contiguous_f -#define ompi_type_create_darray_f pompi_type_create_darray_f -#define ompi_type_create_f90_complex_f pompi_type_create_f90_complex_f -#define ompi_type_create_f90_integer_f pompi_type_create_f90_integer_f -#define ompi_type_create_f90_real_f pompi_type_create_f90_real_f -#define ompi_type_create_hindexed_f pompi_type_create_hindexed_f -#define ompi_type_create_hvector_f pompi_type_create_hvector_f -#define ompi_type_create_keyval_f pompi_type_create_keyval_f -#define ompi_type_create_indexed_block_f pompi_type_create_indexed_block_f -#define ompi_type_create_hindexed_block_f pompi_type_create_hindexed_block_f -#define ompi_type_create_struct_f pompi_type_create_struct_f -#define ompi_type_create_subarray_f pompi_type_create_subarray_f -#define ompi_type_create_resized_f pompi_type_create_resized_f -#define ompi_type_delete_attr_f pompi_type_delete_attr_f -#define ompi_type_dup_f pompi_type_dup_f -#define ompi_type_extent_f pompi_type_extent_f -#define ompi_type_free_f pompi_type_free_f -#define ompi_type_free_keyval_f pompi_type_free_keyval_f -#define ompi_type_get_attr_f pompi_type_get_attr_f -#define ompi_type_get_contents_f pompi_type_get_contents_f -#define ompi_type_get_envelope_f pompi_type_get_envelope_f -#define ompi_type_get_extent_f pompi_type_get_extent_f -#define ompi_type_get_extent_x_f pompi_type_get_extent_x_f -#define ompi_type_get_name_f pompi_type_get_name_f -#define ompi_type_get_true_extent_f pompi_type_get_true_extent_f -#define ompi_type_get_true_extent_x_f pompi_type_get_true_extent_x_f -#define ompi_type_hindexed_f pompi_type_hindexed_f -#define ompi_type_hvector_f pompi_type_hvector_f -#define ompi_type_indexed_f pompi_type_indexed_f -#define ompi_type_lb_f pompi_type_lb_f -#define ompi_type_match_size_f pompi_type_match_size_f -#define ompi_type_set_attr_f pompi_type_set_attr_f -#define ompi_type_set_name_f pompi_type_set_name_f -#define ompi_type_size_f pompi_type_size_f -#define ompi_type_size_x_f pompi_type_size_x_f -#define ompi_type_struct_f pompi_type_struct_f -#define ompi_type_ub_f pompi_type_ub_f -#define ompi_type_vector_f pompi_type_vector_f -#define ompi_unpack_f pompi_unpack_f -#define ompi_unpublish_name_f pompi_unpublish_name_f -#define ompi_unpack_external_f pompi_unpack_external_f -#define ompi_waitall_f pompi_waitall_f -#define ompi_waitany_f pompi_waitany_f -#define ompi_wait_f pompi_wait_f -#define ompi_waitsome_f pompi_waitsome_f -#define ompi_win_allocate_f pompi_win_allocate_f -#define ompi_win_allocate_cptr_f pompi_win_allocate_cptr_f -#define ompi_win_allocate_shared_f pompi_win_allocate_shared_f -#define ompi_win_allocate_shared_cptr_f pompi_win_allocate_shared_cptr_f -#define ompi_win_attach_f pompi_win_attach_f -#define ompi_win_call_errhandler_f pompi_win_call_errhandler_f -#define ompi_win_complete_f pompi_win_complete_f -#define ompi_win_create_f pompi_win_create_f -#define ompi_win_create_dynamic_f pompi_win_create_dynamic_f -#define ompi_win_create_errhandler_f pompi_win_create_errhandler_f -#define ompi_win_create_keyval_f pompi_win_create_keyval_f -#define ompi_win_delete_attr_f pompi_win_delete_attr_f -#define ompi_win_detach_f pompi_win_detach_f -#define ompi_win_fence_f pompi_win_fence_f -#define ompi_win_flush_f pompi_win_flush_f -#define ompi_win_flush_all_f pompi_win_flush_all_f -#define ompi_win_flush_local_f pompi_win_flush_local_f -#define ompi_win_flush_local_all_f pompi_win_flush_local_all_f -#define ompi_win_free_f pompi_win_free_f -#define ompi_win_free_keyval_f pompi_win_free_keyval_f -#define ompi_win_get_attr_f pompi_win_get_attr_f -#define ompi_win_get_errhandler_f pompi_win_get_errhandler_f -#define ompi_win_get_group_f pompi_win_get_group_f -#define ompi_win_get_info_f pompi_win_get_info_f -#define ompi_win_get_name_f pompi_win_get_name_f -#define ompi_win_lock_f pompi_win_lock_f -#define ompi_win_lock_all_f pompi_win_lock_all_f -#define ompi_win_post_f pompi_win_post_f -#define ompi_win_set_attr_f pompi_win_set_attr_f -#define ompi_win_set_errhandler_f pompi_win_set_errhandler_f -#define ompi_win_set_info_f pompi_win_set_info_f -#define ompi_win_set_name_f pompi_win_set_name_f -#define ompi_win_shared_query_f pompi_win_shared_query_f -#define ompi_win_shared_query_cptr_f pompi_win_shared_query_cptr_f -#define ompi_win_start_f pompi_win_start_f -#define ompi_win_sync_f pompi_win_sync_f -#define ompi_win_test_f pompi_win_test_f -#define ompi_win_unlock_f pompi_win_unlock_f -#define ompi_win_unlock_all_f pompi_win_unlock_all_f -#define ompi_win_wait_f pompi_win_wait_f -#define ompi_wtick_f pompi_wtick_f -#define ompi_wtime_f pompi_wtime_f -#endif - diff --git a/ompi/mpi/fortran/mpif-h/prototypes_mpi.h b/ompi/mpi/fortran/mpif-h/prototypes_mpi.h index 2c68dac5509..1241e422e16 100644 --- a/ompi/mpi/fortran/mpif-h/prototypes_mpi.h +++ b/ompi/mpi/fortran/mpif-h/prototypes_mpi.h @@ -5,19 +5,19 @@ * Copyright (c) 2004-2013 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. - * Copyright (c) 2006-2014 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2006-2015 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2011-2013 Inria. All rights reserved. * Copyright (c) 2011-2013 Universite Bordeaux 1 - * Copyright (c) 2013 Los Alamos National Security, LLC. All rights + * Copyright (c) 2013-2015 Los Alamos National Security, LLC. All rights * reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ * * This file prototypes all MPI fortran functions in all four fortran @@ -85,6 +85,8 @@ PN2(void, MPI_Add_error_class, mpi_add_error_class, MPI_ADD_ERROR_CLASS, (MPI_Fi PN2(void, MPI_Add_error_code, mpi_add_error_code, MPI_ADD_ERROR_CODE, (MPI_Fint *errorclass, MPI_Fint *errorcode, MPI_Fint *ierr)); PN2(void, MPI_Add_error_string, mpi_add_error_string, MPI_ADD_ERROR_STRING, (MPI_Fint *errorcode, char *string, MPI_Fint *ierr, int l)); PN2(void, MPI_Address, mpi_address, MPI_ADDRESS, (char *location, MPI_Fint *address, MPI_Fint *ierr)); +PN2(MPI_Aint, MPI_Aint_add, mpi_aint_add, MPI_AINT_ADD, (MPI_Aint *base, MPI_Aint *diff)); +PN2(MPI_Aint, MPI_Aint_diff, mpi_aint_diff, MPI_AINT_DIFF, (MPI_Aint *addr1, MPI_Aint *addr2)); PN2(void, MPI_Allgather, mpi_allgather, MPI_ALLGATHER, (char *sendbuf, MPI_Fint *sendcount, MPI_Fint *sendtype, char *recvbuf, MPI_Fint *recvcount, MPI_Fint *recvtype, MPI_Fint *comm, MPI_Fint *ierr)); PN2(void, MPI_Allgatherv, mpi_allgatherv, MPI_ALLGATHERV, (char *sendbuf, MPI_Fint *sendcount, MPI_Fint *sendtype, char *recvbuf, MPI_Fint *recvcounts, MPI_Fint *displs, MPI_Fint *recvtype, MPI_Fint *comm, MPI_Fint *ierr)); PN2(void, MPI_Alloc_mem, mpi_alloc_mem, MPI_ALLOC_MEM, (MPI_Aint *size, MPI_Fint *info, char *baseptr, MPI_Fint *ierr)); @@ -185,12 +187,16 @@ PN2(void, MPI_File_write_at, mpi_file_write_at, MPI_FILE_WRITE_AT, (MPI_Fint *fh PN2(void, MPI_File_write_at_all, mpi_file_write_at_all, MPI_FILE_WRITE_AT_ALL, (MPI_Fint *fh, MPI_Offset *offset, char *buf, MPI_Fint *count, MPI_Fint *datatype, MPI_Fint *status, MPI_Fint *ierr)); PN2(void, MPI_File_iread_at, mpi_file_iread_at, MPI_FILE_IREAD_AT, (MPI_Fint *fh, MPI_Offset *offset, char *buf, MPI_Fint *count, MPI_Fint *datatype, MPI_Fint *request, MPI_Fint *ierr)); PN2(void, MPI_File_iwrite_at, mpi_file_iwrite_at, MPI_FILE_IWRITE_AT, (MPI_Fint *fh, MPI_Offset *offset, char *buf, MPI_Fint *count, MPI_Fint *datatype, MPI_Fint *request, MPI_Fint *ierr)); +PN2(void, MPI_File_iread_at_all, mpi_file_iread_at_all, MPI_FILE_IREAD_AT_ALL, (MPI_Fint *fh, MPI_Offset *offset, char *buf, MPI_Fint *count, MPI_Fint *datatype, MPI_Fint *request, MPI_Fint *ierr)); +PN2(void, MPI_File_iwrite_at_all, mpi_file_iwrite_at_all, MPI_FILE_IWRITE_AT_ALL, (MPI_Fint *fh, MPI_Offset *offset, char *buf, MPI_Fint *count, MPI_Fint *datatype, MPI_Fint *request, MPI_Fint *ierr)); PN2(void, MPI_File_read, mpi_file_read, MPI_FILE_READ, (MPI_Fint *fh, char *buf, MPI_Fint *count, MPI_Fint *datatype, MPI_Fint *status, MPI_Fint *ierr)); PN2(void, MPI_File_read_all, mpi_file_read_all, MPI_FILE_READ_ALL, (MPI_Fint *fh, char *buf, MPI_Fint *count, MPI_Fint *datatype, MPI_Fint *status, MPI_Fint *ierr)); PN2(void, MPI_File_write, mpi_file_write, MPI_FILE_WRITE, (MPI_Fint *fh, char *buf, MPI_Fint *count, MPI_Fint *datatype, MPI_Fint *status, MPI_Fint *ierr)); PN2(void, MPI_File_write_all, mpi_file_write_all, MPI_FILE_WRITE_ALL, (MPI_Fint *fh, char *buf, MPI_Fint *count, MPI_Fint *datatype, MPI_Fint *status, MPI_Fint *ierr)); PN2(void, MPI_File_iread, mpi_file_iread, MPI_FILE_IREAD, (MPI_Fint *fh, char *buf, MPI_Fint *count, MPI_Fint *datatype, MPI_Fint *request, MPI_Fint *ierr)); PN2(void, MPI_File_iwrite, mpi_file_iwrite, MPI_FILE_IWRITE, (MPI_Fint *fh, char *buf, MPI_Fint *count, MPI_Fint *datatype, MPI_Fint *request, MPI_Fint *ierr)); +PN2(void, MPI_File_iread_all, mpi_file_iread_all, MPI_FILE_IREAD_ALL, (MPI_Fint *fh, char *buf, MPI_Fint *count, MPI_Fint *datatype, MPI_Fint *request, MPI_Fint *ierr)); +PN2(void, MPI_File_iwrite_all, mpi_file_iwrite_all, MPI_FILE_IWRITE_ALL, (MPI_Fint *fh, char *buf, MPI_Fint *count, MPI_Fint *datatype, MPI_Fint *request, MPI_Fint *ierr)); PN2(void, MPI_File_seek, mpi_file_seek, MPI_FILE_SEEK, (MPI_Fint *fh, MPI_Offset *offset, MPI_Fint *whence, MPI_Fint *ierr)); PN2(void, MPI_File_get_position, mpi_file_get_position, MPI_FILE_GET_POSITION, (MPI_Fint *fh, MPI_Offset *offset, MPI_Fint *ierr)); PN2(void, MPI_File_get_byte_offset, mpi_file_get_byte_offset, MPI_FILE_GET_BYTE_OFFSET, (MPI_Fint *fh, MPI_Offset *offset, MPI_Offset *disp, MPI_Fint *ierr)); diff --git a/ompi/mpi/fortran/mpif-h/publish_name_f.c b/ompi/mpi/fortran/mpif-h/publish_name_f.c index 0dba2c12f70..21dc6191ccb 100644 --- a/ompi/mpi/fortran/mpif-h/publish_name_f.c +++ b/ompi/mpi/fortran/mpif-h/publish_name_f.c @@ -5,15 +5,17 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2011-2012 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -22,7 +24,8 @@ #include "ompi/mpi/fortran/mpif-h/bindings.h" #include "ompi/mpi/fortran/base/strings.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILE_LAYER +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak PMPI_PUBLISH_NAME = ompi_publish_name_f #pragma weak pmpi_publish_name = ompi_publish_name_f #pragma weak pmpi_publish_name_ = ompi_publish_name_f @@ -30,7 +33,7 @@ #pragma weak PMPI_Publish_name_f = ompi_publish_name_f #pragma weak PMPI_Publish_name_f08 = ompi_publish_name_f -#elif OMPI_PROFILE_LAYER +#else OMPI_GENERATE_F77_BINDINGS (PMPI_PUBLISH_NAME, pmpi_publish_name, pmpi_publish_name_, @@ -39,6 +42,7 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_PUBLISH_NAME, (char *service_name, MPI_Fint *info, char *port_name, MPI_Fint *ierr, int service_name_len, int port_name_len), (service_name, info, port_name, ierr, service_name_len, port_name_len) ) #endif +#endif #if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_PUBLISH_NAME = ompi_publish_name_f @@ -48,9 +52,8 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_PUBLISH_NAME, #pragma weak MPI_Publish_name_f = ompi_publish_name_f #pragma weak MPI_Publish_name_f08 = ompi_publish_name_f -#endif - -#if ! OPAL_HAVE_WEAK_SYMBOLS && ! OMPI_PROFILE_LAYER +#else +#if ! OMPI_BUILD_MPI_PROFILING OMPI_GENERATE_F77_BINDINGS (MPI_PUBLISH_NAME, mpi_publish_name, mpi_publish_name_, @@ -58,13 +61,12 @@ OMPI_GENERATE_F77_BINDINGS (MPI_PUBLISH_NAME, ompi_publish_name_f, (char *service_name, MPI_Fint *info, char *port_name, MPI_Fint *ierr, int service_name_len, int port_name_len), (service_name, info, port_name, ierr, service_name_len, port_name_len) ) +#else +#define ompi_publish_name_f pompi_publish_name_f #endif - - -#if OMPI_PROFILE_LAYER && ! OPAL_HAVE_WEAK_SYMBOLS -#include "ompi/mpi/fortran/mpif-h/profile/defines.h" #endif + void ompi_publish_name_f(char *service_name, MPI_Fint *info, char *port_name, MPI_Fint *ierr, int service_name_len, int port_name_len) { @@ -73,11 +75,11 @@ void ompi_publish_name_f(char *service_name, MPI_Fint *info, char *c_service_name; char *c_port_name; - c_info = MPI_Info_f2c(*info); + c_info = PMPI_Info_f2c(*info); ompi_fortran_string_f2c(service_name, service_name_len, &c_service_name); ompi_fortran_string_f2c(port_name, port_name_len, &c_port_name); - c_ierr = MPI_Publish_name(c_service_name, c_info, c_port_name); + c_ierr = PMPI_Publish_name(c_service_name, c_info, c_port_name); if (NULL != ierr) *ierr = OMPI_INT_2_FINT(c_ierr); free ( c_service_name); diff --git a/ompi/mpi/fortran/mpif-h/put_f.c b/ompi/mpi/fortran/mpif-h/put_f.c index 7a18dfbb67c..686166e1bc8 100644 --- a/ompi/mpi/fortran/mpif-h/put_f.c +++ b/ompi/mpi/fortran/mpif-h/put_f.c @@ -5,15 +5,17 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2011-2012 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -23,7 +25,8 @@ #include "ompi/mpi/fortran/base/constants.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILE_LAYER +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak PMPI_PUT = ompi_put_f #pragma weak pmpi_put = ompi_put_f #pragma weak pmpi_put_ = ompi_put_f @@ -31,7 +34,7 @@ #pragma weak PMPI_Put_f = ompi_put_f #pragma weak PMPI_Put_f08 = ompi_put_f -#elif OMPI_PROFILE_LAYER +#else OMPI_GENERATE_F77_BINDINGS (PMPI_PUT, pmpi_put, pmpi_put_, @@ -40,6 +43,7 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_PUT, (char *origin_addr, MPI_Fint *origin_count, MPI_Fint *origin_datatype, MPI_Fint *target_rank, MPI_Aint *target_disp, MPI_Fint *target_count, MPI_Fint *target_datatype, MPI_Fint *win, MPI_Fint *ierr), (origin_addr, origin_count, origin_datatype, target_rank, target_disp, target_count, target_datatype, win, ierr) ) #endif +#endif #if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_PUT = ompi_put_f @@ -49,9 +53,8 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_PUT, #pragma weak MPI_Put_f = ompi_put_f #pragma weak MPI_Put_f08 = ompi_put_f -#endif - -#if ! OPAL_HAVE_WEAK_SYMBOLS && ! OMPI_PROFILE_LAYER +#else +#if ! OMPI_BUILD_MPI_PROFILING OMPI_GENERATE_F77_BINDINGS (MPI_PUT, mpi_put, mpi_put_, @@ -59,28 +62,27 @@ OMPI_GENERATE_F77_BINDINGS (MPI_PUT, ompi_put_f, (char *origin_addr, MPI_Fint *origin_count, MPI_Fint *origin_datatype, MPI_Fint *target_rank, MPI_Aint *target_disp, MPI_Fint *target_count, MPI_Fint *target_datatype, MPI_Fint *win, MPI_Fint *ierr), (origin_addr, origin_count, origin_datatype, target_rank, target_disp, target_count, target_datatype, win, ierr) ) +#else +#define ompi_put_f pompi_put_f #endif - - -#if OMPI_PROFILE_LAYER && ! OPAL_HAVE_WEAK_SYMBOLS -#include "ompi/mpi/fortran/mpif-h/profile/defines.h" #endif + void ompi_put_f(char *origin_addr, MPI_Fint *origin_count, MPI_Fint *origin_datatype, MPI_Fint *target_rank, MPI_Aint *target_disp, MPI_Fint *target_count, MPI_Fint *target_datatype, MPI_Fint *win, MPI_Fint *ierr) { int c_ierr; - MPI_Datatype c_origin_datatype = MPI_Type_f2c(*origin_datatype); - MPI_Datatype c_target_datatype = MPI_Type_f2c(*target_datatype); - MPI_Win c_win = MPI_Win_f2c(*win); + MPI_Datatype c_origin_datatype = PMPI_Type_f2c(*origin_datatype); + MPI_Datatype c_target_datatype = PMPI_Type_f2c(*target_datatype); + MPI_Win c_win = PMPI_Win_f2c(*win); - c_ierr = MPI_Put(OMPI_F2C_BOTTOM(origin_addr), + c_ierr = PMPI_Put(OMPI_F2C_BOTTOM(origin_addr), OMPI_FINT_2_INT(*origin_count), c_origin_datatype, OMPI_FINT_2_INT(*target_rank), - *target_disp, + *target_disp, OMPI_FINT_2_INT(*target_count), c_target_datatype, c_win); if (NULL != ierr) *ierr = OMPI_INT_2_FINT(c_ierr); diff --git a/ompi/mpi/fortran/mpif-h/query_thread_f.c b/ompi/mpi/fortran/mpif-h/query_thread_f.c index f0aa8b6a93a..e3bf2b39a6d 100644 --- a/ompi/mpi/fortran/mpif-h/query_thread_f.c +++ b/ompi/mpi/fortran/mpif-h/query_thread_f.c @@ -5,15 +5,17 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2011-2012 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -21,7 +23,8 @@ #include "ompi/mpi/fortran/mpif-h/bindings.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILE_LAYER +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak PMPI_QUERY_THREAD = ompi_query_thread_f #pragma weak pmpi_query_thread = ompi_query_thread_f #pragma weak pmpi_query_thread_ = ompi_query_thread_f @@ -29,7 +32,7 @@ #pragma weak PMPI_Query_thread_f = ompi_query_thread_f #pragma weak PMPI_Query_thread_f08 = ompi_query_thread_f -#elif OMPI_PROFILE_LAYER +#else OMPI_GENERATE_F77_BINDINGS (PMPI_QUERY_THREAD, pmpi_query_thread, pmpi_query_thread_, @@ -38,6 +41,7 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_QUERY_THREAD, (MPI_Fint *provided, MPI_Fint *ierr), (provided, ierr) ) #endif +#endif #if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_QUERY_THREAD = ompi_query_thread_f @@ -47,9 +51,8 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_QUERY_THREAD, #pragma weak MPI_Query_thread_f = ompi_query_thread_f #pragma weak MPI_Query_thread_f08 = ompi_query_thread_f -#endif - -#if ! OPAL_HAVE_WEAK_SYMBOLS && ! OMPI_PROFILE_LAYER +#else +#if ! OMPI_BUILD_MPI_PROFILING OMPI_GENERATE_F77_BINDINGS (MPI_QUERY_THREAD, mpi_query_thread, mpi_query_thread_, @@ -57,19 +60,18 @@ OMPI_GENERATE_F77_BINDINGS (MPI_QUERY_THREAD, ompi_query_thread_f, (MPI_Fint *provided, MPI_Fint *ierr), (provided, ierr) ) +#else +#define ompi_query_thread_f pompi_query_thread_f #endif - - -#if OMPI_PROFILE_LAYER && ! OPAL_HAVE_WEAK_SYMBOLS -#include "ompi/mpi/fortran/mpif-h/profile/defines.h" #endif + void ompi_query_thread_f(MPI_Fint *provided, MPI_Fint *ierr) { int c_ierr; OMPI_SINGLE_NAME_DECL(provided); - c_ierr = MPI_Query_thread(OMPI_SINGLE_NAME_CONVERT(provided)); + c_ierr = PMPI_Query_thread(OMPI_SINGLE_NAME_CONVERT(provided)); if (NULL != ierr) *ierr = OMPI_INT_2_FINT(c_ierr); if (MPI_SUCCESS == c_ierr) { diff --git a/ompi/mpi/fortran/mpif-h/raccumulate_f.c b/ompi/mpi/fortran/mpif-h/raccumulate_f.c index 0595d06bf5b..2c631871312 100644 --- a/ompi/mpi/fortran/mpif-h/raccumulate_f.c +++ b/ompi/mpi/fortran/mpif-h/raccumulate_f.c @@ -11,8 +11,10 @@ * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2011-2012 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2014 Los Alamos National Security, LLC. All rights + * Copyright (c) 2014-2016 Los Alamos National Security, LLC. All rights * reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -26,7 +28,8 @@ #include "ompi/mpi/fortran/base/constants.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILE_LAYER +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak PMPI_RACCUMULATE = ompi_raccumulate_f #pragma weak pmpi_raccumulate = ompi_raccumulate_f #pragma weak pmpi_raccumulate_ = ompi_raccumulate_f @@ -34,7 +37,7 @@ #pragma weak PMPI_Raccumulate_f = ompi_raccumulate_f #pragma weak PMPI_Raccumulate_f08 = ompi_raccumulate_f -#elif OMPI_PROFILE_LAYER +#else OMPI_GENERATE_F77_BINDINGS (PMPI_RACCUMULATE, pmpi_raccumulate, pmpi_raccumulate_, @@ -43,6 +46,7 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_RACCUMULATE, (char *origin_addr, MPI_Fint *origin_count, MPI_Fint *origin_datatype, MPI_Fint *target_rank, MPI_Aint *target_disp, MPI_Fint *target_count, MPI_Fint *target_datatype, MPI_Fint *op, MPI_Fint *win, MPI_Fint *request, MPI_Fint *ierr), (origin_addr, origin_count, origin_datatype, target_rank, target_disp, target_count, target_datatype, op, win, request, ierr) ) #endif +#endif #if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_RACCUMULATE = ompi_raccumulate_f @@ -52,9 +56,8 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_RACCUMULATE, #pragma weak MPI_Raccumulate_f = ompi_raccumulate_f #pragma weak MPI_Raccumulate_f08 = ompi_raccumulate_f -#endif - -#if ! OPAL_HAVE_WEAK_SYMBOLS && ! OMPI_PROFILE_LAYER +#else +#if ! OMPI_BUILD_MPI_PROFILING OMPI_GENERATE_F77_BINDINGS (MPI_RACCUMULATE, mpi_raccumulate, mpi_raccumulate_, @@ -62,13 +65,12 @@ OMPI_GENERATE_F77_BINDINGS (MPI_RACCUMULATE, ompi_raccumulate_f, (char *origin_addr, MPI_Fint *origin_count, MPI_Fint *origin_datatype, MPI_Fint *target_rank, MPI_Aint *target_disp, MPI_Fint *target_count, MPI_Fint *target_datatype, MPI_Fint *op, MPI_Fint *win, MPI_Fint *request, MPI_Fint *ierr), (origin_addr, origin_count, origin_datatype, target_rank, target_disp, target_count, target_datatype, op, win, request, ierr) ) +#else +#define ompi_raccumulate_f pompi_raccumulate_f #endif - - -#if OMPI_PROFILE_LAYER && ! OPAL_HAVE_WEAK_SYMBOLS -#include "ompi/mpi/fortran/mpif-h/profile/defines.h" #endif + void ompi_raccumulate_f(char *origin_addr, MPI_Fint *origin_count, MPI_Fint *origin_datatype, MPI_Fint *target_rank, MPI_Aint *target_disp, MPI_Fint *target_count, @@ -77,24 +79,24 @@ void ompi_raccumulate_f(char *origin_addr, MPI_Fint *origin_count, { int ierr_c; - MPI_Datatype c_origin_datatype = MPI_Type_f2c(*origin_datatype); - MPI_Datatype c_target_datatype = MPI_Type_f2c(*target_datatype); - MPI_Win c_win = MPI_Win_f2c(*win); - MPI_Op c_op = MPI_Op_f2c(*op); + MPI_Datatype c_origin_datatype = PMPI_Type_f2c(*origin_datatype); + MPI_Datatype c_target_datatype = PMPI_Type_f2c(*target_datatype); + MPI_Win c_win = PMPI_Win_f2c(*win); + MPI_Op c_op = PMPI_Op_f2c(*op); MPI_Request c_req; - ierr_c = MPI_Raccumulate(OMPI_F2C_BOTTOM(origin_addr), - OMPI_FINT_2_INT(*origin_count), - c_origin_datatype, - OMPI_FINT_2_INT(*target_rank), - *target_disp, - OMPI_FINT_2_INT(*target_count), - c_target_datatype, c_op, c_win, - &c_req); + ierr_c = PMPI_Raccumulate(OMPI_F2C_BOTTOM(origin_addr), + OMPI_FINT_2_INT(*origin_count), + c_origin_datatype, + OMPI_FINT_2_INT(*target_rank), + *target_disp, + OMPI_FINT_2_INT(*target_count), + c_target_datatype, c_op, c_win, + &c_req); if (NULL != ierr) *ierr = OMPI_INT_2_FINT(ierr_c); - if (MPI_SUCCESS != ierr_c) { - *request = MPI_Request_c2f(c_req); + if (MPI_SUCCESS == ierr_c) { + *request = PMPI_Request_c2f(c_req); } } diff --git a/ompi/mpi/fortran/mpif-h/recv_f.c b/ompi/mpi/fortran/mpif-h/recv_f.c index 16f1d8c7fe2..2d933075284 100644 --- a/ompi/mpi/fortran/mpif-h/recv_f.c +++ b/ompi/mpi/fortran/mpif-h/recv_f.c @@ -5,16 +5,18 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2011-2012 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2012 Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -25,7 +27,8 @@ #include "ompi/mpi/fortran/base/constants.h" #include "ompi/communicator/communicator.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILE_LAYER +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak PMPI_RECV = ompi_recv_f #pragma weak pmpi_recv = ompi_recv_f #pragma weak pmpi_recv_ = ompi_recv_f @@ -33,7 +36,7 @@ #pragma weak PMPI_Recv_f = ompi_recv_f #pragma weak PMPI_Recv_f08 = ompi_recv_f -#elif OMPI_PROFILE_LAYER +#else OMPI_GENERATE_F77_BINDINGS (PMPI_RECV, pmpi_recv, pmpi_recv_, @@ -42,6 +45,7 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_RECV, (char *buf, MPI_Fint *count, MPI_Fint *datatype, MPI_Fint *source, MPI_Fint *tag, MPI_Fint *comm, MPI_Fint *status, MPI_Fint *ierr), (buf, count, datatype, source, tag, comm, status, ierr) ) #endif +#endif #if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_RECV = ompi_recv_f @@ -51,9 +55,8 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_RECV, #pragma weak MPI_Recv_f = ompi_recv_f #pragma weak MPI_Recv_f08 = ompi_recv_f -#endif - -#if ! OPAL_HAVE_WEAK_SYMBOLS && ! OMPI_PROFILE_LAYER +#else +#if ! OMPI_BUILD_MPI_PROFILING OMPI_GENERATE_F77_BINDINGS (MPI_RECV, mpi_recv, mpi_recv_, @@ -61,27 +64,26 @@ OMPI_GENERATE_F77_BINDINGS (MPI_RECV, ompi_recv_f, (char *buf, MPI_Fint *count, MPI_Fint *datatype, MPI_Fint *source, MPI_Fint *tag, MPI_Fint *comm, MPI_Fint *status, MPI_Fint *ierr), (buf, count, datatype, source, tag, comm, status, ierr) ) +#else +#define ompi_recv_f pompi_recv_f #endif - - -#if OMPI_PROFILE_LAYER && ! OPAL_HAVE_WEAK_SYMBOLS -#include "ompi/mpi/fortran/mpif-h/profile/defines.h" #endif -void ompi_recv_f(char *buf, MPI_Fint *count, MPI_Fint *datatype, - MPI_Fint *source, MPI_Fint *tag, MPI_Fint *comm, + +void ompi_recv_f(char *buf, MPI_Fint *count, MPI_Fint *datatype, + MPI_Fint *source, MPI_Fint *tag, MPI_Fint *comm, MPI_Fint *status, MPI_Fint *ierr) { OMPI_FORTRAN_STATUS_DECLARATION(c_status,c_status2) - MPI_Comm c_comm = MPI_Comm_f2c(*comm); - MPI_Datatype c_type = MPI_Type_f2c(*datatype); + MPI_Comm c_comm = PMPI_Comm_f2c(*comm); + MPI_Datatype c_type = PMPI_Type_f2c(*datatype); int c_ierr; OMPI_FORTRAN_STATUS_SET_POINTER(c_status,c_status2,status) /* Call the C function */ - c_ierr = MPI_Recv(OMPI_F2C_BOTTOM(buf), OMPI_FINT_2_INT(*count), - c_type, OMPI_FINT_2_INT(*source), + c_ierr = PMPI_Recv(OMPI_F2C_BOTTOM(buf), OMPI_FINT_2_INT(*count), + c_type, OMPI_FINT_2_INT(*source), OMPI_FINT_2_INT(*tag), c_comm, c_status); if (NULL != ierr) *ierr = OMPI_INT_2_FINT(c_ierr); diff --git a/ompi/mpi/fortran/mpif-h/recv_init_f.c b/ompi/mpi/fortran/mpif-h/recv_init_f.c index 433c2bf1b71..8fea063296e 100644 --- a/ompi/mpi/fortran/mpif-h/recv_init_f.c +++ b/ompi/mpi/fortran/mpif-h/recv_init_f.c @@ -5,15 +5,17 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2011-2012 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -22,7 +24,8 @@ #include "ompi/mpi/fortran/mpif-h/bindings.h" #include "ompi/mpi/fortran/base/constants.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILE_LAYER +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak PMPI_RECV_INIT = ompi_recv_init_f #pragma weak pmpi_recv_init = ompi_recv_init_f #pragma weak pmpi_recv_init_ = ompi_recv_init_f @@ -30,7 +33,7 @@ #pragma weak PMPI_Recv_init_f = ompi_recv_init_f #pragma weak PMPI_Recv_init_f08 = ompi_recv_init_f -#elif OMPI_PROFILE_LAYER +#else OMPI_GENERATE_F77_BINDINGS (PMPI_RECV_INIT, pmpi_recv_init, pmpi_recv_init_, @@ -39,6 +42,7 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_RECV_INIT, (char *buf, MPI_Fint *count, MPI_Fint *datatype, MPI_Fint *source, MPI_Fint *tag, MPI_Fint *comm, MPI_Fint *request, MPI_Fint *ierr), (buf, count, datatype, source, tag, comm, request, ierr) ) #endif +#endif #if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_RECV_INIT = ompi_recv_init_f @@ -48,9 +52,8 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_RECV_INIT, #pragma weak MPI_Recv_init_f = ompi_recv_init_f #pragma weak MPI_Recv_init_f08 = ompi_recv_init_f -#endif - -#if ! OPAL_HAVE_WEAK_SYMBOLS && ! OMPI_PROFILE_LAYER +#else +#if ! OMPI_BUILD_MPI_PROFILING OMPI_GENERATE_F77_BINDINGS (MPI_RECV_INIT, mpi_recv_init, mpi_recv_init_, @@ -58,31 +61,30 @@ OMPI_GENERATE_F77_BINDINGS (MPI_RECV_INIT, ompi_recv_init_f, (char *buf, MPI_Fint *count, MPI_Fint *datatype, MPI_Fint *source, MPI_Fint *tag, MPI_Fint *comm, MPI_Fint *request, MPI_Fint *ierr), (buf, count, datatype, source, tag, comm, request, ierr) ) +#else +#define ompi_recv_init_f pompi_recv_init_f #endif - - -#if OMPI_PROFILE_LAYER && ! OPAL_HAVE_WEAK_SYMBOLS -#include "ompi/mpi/fortran/mpif-h/profile/defines.h" #endif -void ompi_recv_init_f(char *buf, MPI_Fint *count, MPI_Fint *datatype, + +void ompi_recv_init_f(char *buf, MPI_Fint *count, MPI_Fint *datatype, MPI_Fint *source, MPI_Fint *tag, MPI_Fint *comm, MPI_Fint *request, MPI_Fint *ierr) { int c_ierr; - MPI_Datatype c_type = MPI_Type_f2c(*datatype); + MPI_Datatype c_type = PMPI_Type_f2c(*datatype); MPI_Request c_req; MPI_Comm c_comm; - c_comm = MPI_Comm_f2c (*comm); + c_comm = PMPI_Comm_f2c (*comm); - c_ierr = MPI_Recv_init(OMPI_F2C_BOTTOM(buf), OMPI_FINT_2_INT(*count), + c_ierr = PMPI_Recv_init(OMPI_F2C_BOTTOM(buf), OMPI_FINT_2_INT(*count), c_type, OMPI_FINT_2_INT(*source), OMPI_INT_2_FINT(*tag), c_comm, &c_req); if (NULL != ierr) *ierr = OMPI_INT_2_FINT(c_ierr); if (MPI_SUCCESS == c_ierr) { - *request = MPI_Request_c2f(c_req); + *request = PMPI_Request_c2f(c_req); } } diff --git a/ompi/mpi/fortran/mpif-h/reduce_f.c b/ompi/mpi/fortran/mpif-h/reduce_f.c index 7ffc07f26c5..bf26152f344 100644 --- a/ompi/mpi/fortran/mpif-h/reduce_f.c +++ b/ompi/mpi/fortran/mpif-h/reduce_f.c @@ -5,15 +5,17 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2011-2012 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -22,7 +24,8 @@ #include "ompi/mpi/fortran/mpif-h/bindings.h" #include "ompi/mpi/fortran/base/constants.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILE_LAYER +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak PMPI_REDUCE = ompi_reduce_f #pragma weak pmpi_reduce = ompi_reduce_f #pragma weak pmpi_reduce_ = ompi_reduce_f @@ -30,7 +33,7 @@ #pragma weak PMPI_Reduce_f = ompi_reduce_f #pragma weak PMPI_Reduce_f08 = ompi_reduce_f -#elif OMPI_PROFILE_LAYER +#else OMPI_GENERATE_F77_BINDINGS (PMPI_REDUCE, pmpi_reduce, pmpi_reduce_, @@ -39,6 +42,7 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_REDUCE, (char *sendbuf, char *recvbuf, MPI_Fint *count, MPI_Fint *datatype, MPI_Fint *op, MPI_Fint *root, MPI_Fint *comm, MPI_Fint *ierr), (sendbuf, recvbuf, count, datatype, op, root, comm, ierr) ) #endif +#endif #if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_REDUCE = ompi_reduce_f @@ -48,9 +52,8 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_REDUCE, #pragma weak MPI_Reduce_f = ompi_reduce_f #pragma weak MPI_Reduce_f08 = ompi_reduce_f -#endif - -#if ! OPAL_HAVE_WEAK_SYMBOLS && ! OMPI_PROFILE_LAYER +#else +#if ! OMPI_BUILD_MPI_PROFILING OMPI_GENERATE_F77_BINDINGS (MPI_REDUCE, mpi_reduce, mpi_reduce_, @@ -58,15 +61,14 @@ OMPI_GENERATE_F77_BINDINGS (MPI_REDUCE, ompi_reduce_f, (char *sendbuf, char *recvbuf, MPI_Fint *count, MPI_Fint *datatype, MPI_Fint *op, MPI_Fint *root, MPI_Fint *comm, MPI_Fint *ierr), (sendbuf, recvbuf, count, datatype, op, root, comm, ierr) ) +#else +#define ompi_reduce_f pompi_reduce_f #endif - - -#if OMPI_PROFILE_LAYER && ! OPAL_HAVE_WEAK_SYMBOLS -#include "ompi/mpi/fortran/mpif-h/profile/defines.h" #endif + void ompi_reduce_f(char *sendbuf, char *recvbuf, MPI_Fint *count, - MPI_Fint *datatype, MPI_Fint *op, + MPI_Fint *datatype, MPI_Fint *op, MPI_Fint *root, MPI_Fint *comm, MPI_Fint *ierr) { int c_ierr; @@ -74,17 +76,17 @@ void ompi_reduce_f(char *sendbuf, char *recvbuf, MPI_Fint *count, MPI_Op c_op; MPI_Comm c_comm; - c_type = MPI_Type_f2c(*datatype); - c_op = MPI_Op_f2c(*op); - c_comm = MPI_Comm_f2c(*comm); + c_type = PMPI_Type_f2c(*datatype); + c_op = PMPI_Op_f2c(*op); + c_comm = PMPI_Comm_f2c(*comm); sendbuf = (char *) OMPI_F2C_IN_PLACE(sendbuf); sendbuf = (char *) OMPI_F2C_BOTTOM(sendbuf); recvbuf = (char *) OMPI_F2C_BOTTOM(recvbuf); - c_ierr = MPI_Reduce(sendbuf, recvbuf, + c_ierr = PMPI_Reduce(sendbuf, recvbuf, OMPI_FINT_2_INT(*count), - c_type, c_op, + c_type, c_op, OMPI_FINT_2_INT(*root), c_comm); if (NULL != ierr) *ierr = OMPI_INT_2_FINT(c_ierr); diff --git a/ompi/mpi/fortran/mpif-h/reduce_local_f.c b/ompi/mpi/fortran/mpif-h/reduce_local_f.c index eded1349f4a..a7134e4baac 100644 --- a/ompi/mpi/fortran/mpif-h/reduce_local_f.c +++ b/ompi/mpi/fortran/mpif-h/reduce_local_f.c @@ -5,15 +5,17 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2009-2012 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -22,7 +24,8 @@ #include "ompi/mpi/fortran/mpif-h/bindings.h" #include "ompi/mpi/fortran/base/constants.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILE_LAYER +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak PMPI_REDUCE_LOCAL = ompi_reduce_local_f #pragma weak pmpi_reduce_local = ompi_reduce_local_f #pragma weak pmpi_reduce_local_ = ompi_reduce_local_f @@ -30,7 +33,7 @@ #pragma weak PMPI_Reduce_local_f = ompi_reduce_local_f #pragma weak PMPI_Reduce_local_f08 = ompi_reduce_local_f -#elif OMPI_PROFILE_LAYER +#else OMPI_GENERATE_F77_BINDINGS (PMPI_REDUCE_LOCAL, pmpi_reduce_local, pmpi_reduce_local_, @@ -39,6 +42,7 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_REDUCE_LOCAL, (char *sendbuf, char *recvbuf, MPI_Fint *count, MPI_Fint *datatype, MPI_Fint *op, MPI_Fint *ierr), (sendbuf, recvbuf, count, datatype, op, ierr) ) #endif +#endif #if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_REDUCE_LOCAL = ompi_reduce_local_f @@ -48,9 +52,8 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_REDUCE_LOCAL, #pragma weak MPI_Reduce_local_f = ompi_reduce_local_f #pragma weak MPI_Reduce_local_f08 = ompi_reduce_local_f -#endif - -#if ! OPAL_HAVE_WEAK_SYMBOLS && ! OMPI_PROFILE_LAYER +#else +#if ! OMPI_BUILD_MPI_PROFILING OMPI_GENERATE_F77_BINDINGS (MPI_REDUCE_LOCAL, mpi_reduce_local, mpi_reduce_local_, @@ -58,13 +61,12 @@ OMPI_GENERATE_F77_BINDINGS (MPI_REDUCE_LOCAL, ompi_reduce_local_f, (char *sendbuf, char *recvbuf, MPI_Fint *count, MPI_Fint *datatype, MPI_Fint *op, MPI_Fint *ierr), (sendbuf, recvbuf, count, datatype, op, ierr) ) +#else +#define ompi_reduce_local_f pompi_reduce_local_f #endif - - -#if OMPI_PROFILE_LAYER && ! OPAL_HAVE_WEAK_SYMBOLS -#include "ompi/mpi/fortran/mpif-h/profile/defines.h" #endif + void ompi_reduce_local_f(char *inbuf, char *inoutbuf, MPI_Fint *count, MPI_Fint *datatype, MPI_Fint *op, MPI_Fint *ierr) { @@ -72,13 +74,13 @@ void ompi_reduce_local_f(char *inbuf, char *inoutbuf, MPI_Fint *count, MPI_Datatype c_type; MPI_Op c_op; - c_type = MPI_Type_f2c(*datatype); - c_op = MPI_Op_f2c(*op); + c_type = PMPI_Type_f2c(*datatype); + c_op = PMPI_Op_f2c(*op); inbuf = (char *) OMPI_F2C_BOTTOM(inbuf); inoutbuf = (char *) OMPI_F2C_BOTTOM(inoutbuf); - c_ierr = MPI_Reduce_local(inbuf, inoutbuf, + c_ierr = PMPI_Reduce_local(inbuf, inoutbuf, OMPI_FINT_2_INT(*count), c_type, c_op); if (NULL != ierr) *ierr = OMPI_INT_2_FINT(c_ierr); diff --git a/ompi/mpi/fortran/mpif-h/reduce_scatter_block_f.c b/ompi/mpi/fortran/mpif-h/reduce_scatter_block_f.c index 5a426eae513..c70cef7baf1 100644 --- a/ompi/mpi/fortran/mpif-h/reduce_scatter_block_f.c +++ b/ompi/mpi/fortran/mpif-h/reduce_scatter_block_f.c @@ -5,15 +5,17 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2011-2012 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -22,7 +24,8 @@ #include "ompi/mpi/fortran/mpif-h/bindings.h" #include "ompi/mpi/fortran/base/constants.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILE_LAYER +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak PMPI_REDUCE_SCATTER_BLOCK = ompi_reduce_scatter_block_f #pragma weak pmpi_reduce_scatter_block = ompi_reduce_scatter_block_f #pragma weak pmpi_reduce_scatter_block_ = ompi_reduce_scatter_block_f @@ -30,7 +33,7 @@ #pragma weak PMPI_Reduce_scatter_block_f = ompi_reduce_scatter_block_f #pragma weak PMPI_Reduce_scatter_block_f08 = ompi_reduce_scatter_block_f -#elif OMPI_PROFILE_LAYER +#else OMPI_GENERATE_F77_BINDINGS (PMPI_REDUCE_SCATTER_BLOCK, pmpi_reduce_scatter_block, pmpi_reduce_scatter_block_, @@ -39,6 +42,7 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_REDUCE_SCATTER_BLOCK, (char *sendbuf, char *recvbuf, MPI_Fint *recvcounts, MPI_Fint *datatype, MPI_Fint *op, MPI_Fint *comm, MPI_Fint *ierr), (sendbuf, recvbuf, recvcounts, datatype, op, comm, ierr) ) #endif +#endif #if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_REDUCE_SCATTER_BLOCK = ompi_reduce_scatter_block_f @@ -48,9 +52,8 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_REDUCE_SCATTER_BLOCK, #pragma weak MPI_Reduce_scatter_block_f = ompi_reduce_scatter_block_f #pragma weak MPI_Reduce_scatter_block_f08 = ompi_reduce_scatter_block_f -#endif - -#if ! OPAL_HAVE_WEAK_SYMBOLS && ! OMPI_PROFILE_LAYER +#else +#if ! OMPI_BUILD_MPI_PROFILING OMPI_GENERATE_F77_BINDINGS (MPI_REDUCE_SCATTER_BLOCK, mpi_reduce_scatter_block, mpi_reduce_scatter_block_, @@ -58,14 +61,13 @@ OMPI_GENERATE_F77_BINDINGS (MPI_REDUCE_SCATTER_BLOCK, ompi_reduce_scatter_block_f, (char *sendbuf, char *recvbuf, MPI_Fint *recvcounts, MPI_Fint *datatype, MPI_Fint *op, MPI_Fint *comm, MPI_Fint *ierr), (sendbuf, recvbuf, recvcounts, datatype, op, comm, ierr) ) +#else +#define ompi_reduce_scatter_block_f pompi_reduce_scatter_block_f #endif - - -#if OMPI_PROFILE_LAYER && ! OPAL_HAVE_WEAK_SYMBOLS -#include "ompi/mpi/fortran/mpif-h/profile/defines.h" #endif -void ompi_reduce_scatter_block_f(char *sendbuf, char *recvbuf, + +void ompi_reduce_scatter_block_f(char *sendbuf, char *recvbuf, MPI_Fint *recvcount, MPI_Fint *datatype, MPI_Fint *op, MPI_Fint *comm, MPI_Fint *ierr) { @@ -75,17 +77,17 @@ void ompi_reduce_scatter_block_f(char *sendbuf, char *recvbuf, MPI_Op c_op; int size; - c_comm = MPI_Comm_f2c(*comm); - c_type = MPI_Type_f2c(*datatype); - c_op = MPI_Op_f2c(*op); + c_comm = PMPI_Comm_f2c(*comm); + c_type = PMPI_Type_f2c(*datatype); + c_op = PMPI_Op_f2c(*op); - MPI_Comm_size(c_comm, &size); + PMPI_Comm_size(c_comm, &size); sendbuf = (char *) OMPI_F2C_IN_PLACE(sendbuf); sendbuf = (char *) OMPI_F2C_BOTTOM(sendbuf); recvbuf = (char *) OMPI_F2C_BOTTOM(recvbuf); - - c_ierr = MPI_Reduce_scatter_block(sendbuf, recvbuf, + + c_ierr = PMPI_Reduce_scatter_block(sendbuf, recvbuf, OMPI_FINT_2_INT(*recvcount), c_type, c_op, c_comm); if (NULL != ierr) *ierr = OMPI_INT_2_FINT(c_ierr); diff --git a/ompi/mpi/fortran/mpif-h/reduce_scatter_f.c b/ompi/mpi/fortran/mpif-h/reduce_scatter_f.c index 0afb3aff56e..c5cd1881643 100644 --- a/ompi/mpi/fortran/mpif-h/reduce_scatter_f.c +++ b/ompi/mpi/fortran/mpif-h/reduce_scatter_f.c @@ -5,15 +5,17 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2011-2012 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -22,7 +24,8 @@ #include "ompi/mpi/fortran/mpif-h/bindings.h" #include "ompi/mpi/fortran/base/constants.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILE_LAYER +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak PMPI_REDUCE_SCATTER = ompi_reduce_scatter_f #pragma weak pmpi_reduce_scatter = ompi_reduce_scatter_f #pragma weak pmpi_reduce_scatter_ = ompi_reduce_scatter_f @@ -30,7 +33,7 @@ #pragma weak PMPI_Reduce_scatter_f = ompi_reduce_scatter_f #pragma weak PMPI_Reduce_scatter_f08 = ompi_reduce_scatter_f -#elif OMPI_PROFILE_LAYER +#else OMPI_GENERATE_F77_BINDINGS (PMPI_REDUCE_SCATTER, pmpi_reduce_scatter, pmpi_reduce_scatter_, @@ -39,6 +42,7 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_REDUCE_SCATTER, (char *sendbuf, char *recvbuf, MPI_Fint *recvcounts, MPI_Fint *datatype, MPI_Fint *op, MPI_Fint *comm, MPI_Fint *ierr), (sendbuf, recvbuf, recvcounts, datatype, op, comm, ierr) ) #endif +#endif #if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_REDUCE_SCATTER = ompi_reduce_scatter_f @@ -48,9 +52,8 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_REDUCE_SCATTER, #pragma weak MPI_Reduce_scatter_f = ompi_reduce_scatter_f #pragma weak MPI_Reduce_scatter_f08 = ompi_reduce_scatter_f -#endif - -#if ! OPAL_HAVE_WEAK_SYMBOLS && ! OMPI_PROFILE_LAYER +#else +#if ! OMPI_BUILD_MPI_PROFILING OMPI_GENERATE_F77_BINDINGS (MPI_REDUCE_SCATTER, mpi_reduce_scatter, mpi_reduce_scatter_, @@ -58,14 +61,13 @@ OMPI_GENERATE_F77_BINDINGS (MPI_REDUCE_SCATTER, ompi_reduce_scatter_f, (char *sendbuf, char *recvbuf, MPI_Fint *recvcounts, MPI_Fint *datatype, MPI_Fint *op, MPI_Fint *comm, MPI_Fint *ierr), (sendbuf, recvbuf, recvcounts, datatype, op, comm, ierr) ) +#else +#define ompi_reduce_scatter_f pompi_reduce_scatter_f #endif - - -#if OMPI_PROFILE_LAYER && ! OPAL_HAVE_WEAK_SYMBOLS -#include "ompi/mpi/fortran/mpif-h/profile/defines.h" #endif -void ompi_reduce_scatter_f(char *sendbuf, char *recvbuf, + +void ompi_reduce_scatter_f(char *sendbuf, char *recvbuf, MPI_Fint *recvcounts, MPI_Fint *datatype, MPI_Fint *op, MPI_Fint *comm, MPI_Fint *ierr) { @@ -76,18 +78,18 @@ void ompi_reduce_scatter_f(char *sendbuf, char *recvbuf, int size; OMPI_ARRAY_NAME_DECL(recvcounts); - c_comm = MPI_Comm_f2c(*comm); - c_type = MPI_Type_f2c(*datatype); - c_op = MPI_Op_f2c(*op); + c_comm = PMPI_Comm_f2c(*comm); + c_type = PMPI_Type_f2c(*datatype); + c_op = PMPI_Op_f2c(*op); - MPI_Comm_size(c_comm, &size); + PMPI_Comm_size(c_comm, &size); OMPI_ARRAY_FINT_2_INT(recvcounts, size); sendbuf = (char *) OMPI_F2C_IN_PLACE(sendbuf); sendbuf = (char *) OMPI_F2C_BOTTOM(sendbuf); recvbuf = (char *) OMPI_F2C_BOTTOM(recvbuf); - - c_ierr = MPI_Reduce_scatter(sendbuf, recvbuf, + + c_ierr = PMPI_Reduce_scatter(sendbuf, recvbuf, OMPI_ARRAY_NAME_CONVERT(recvcounts), c_type, c_op, c_comm); if (NULL != ierr) *ierr = OMPI_INT_2_FINT(c_ierr); diff --git a/ompi/mpi/fortran/mpif-h/register_datarep_f.c b/ompi/mpi/fortran/mpif-h/register_datarep_f.c index 6ebd454641e..7b9e628f60b 100644 --- a/ompi/mpi/fortran/mpif-h/register_datarep_f.c +++ b/ompi/mpi/fortran/mpif-h/register_datarep_f.c @@ -5,15 +5,17 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2007-2012 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -29,7 +31,8 @@ #include "ompi/runtime/mpiruntime.h" #include "ompi/file/file.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILE_LAYER +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak PMPI_REGISTER_DATAREP = ompi_register_datarep_f #pragma weak pmpi_register_datarep = ompi_register_datarep_f #pragma weak pmpi_register_datarep_ = ompi_register_datarep_f @@ -37,7 +40,7 @@ #pragma weak PMPI_Register_datarep_f = ompi_register_datarep_f #pragma weak PMPI_Register_datarep_f08 = ompi_register_datarep_f -#elif OMPI_PROFILE_LAYER +#else OMPI_GENERATE_F77_BINDINGS (PMPI_REGISTER_DATAREP, pmpi_register_datarep, pmpi_register_datarep_, @@ -46,6 +49,7 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_REGISTER_DATAREP, (char *datarep, ompi_mpi2_fortran_datarep_conversion_fn_t *read_conversion_fn, ompi_mpi2_fortran_datarep_conversion_fn_t *write_conversion_fn, ompi_mpi2_fortran_datarep_extent_fn_t *dtype_file_extent_fn, MPI_Aint *extra_state, MPI_Fint *ierr, int datarep_len), (datarep, read_conversion_fn, write_conversion_fn, dtype_file_extent_fn, extra_state, ierr, datarep_len) ) #endif +#endif #if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_REGISTER_DATAREP = ompi_register_datarep_f @@ -55,9 +59,8 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_REGISTER_DATAREP, #pragma weak MPI_Register_datarep_f = ompi_register_datarep_f #pragma weak MPI_Register_datarep_f08 = ompi_register_datarep_f -#endif - -#if ! OPAL_HAVE_WEAK_SYMBOLS && ! OMPI_PROFILE_LAYER +#else +#if ! OMPI_BUILD_MPI_PROFILING OMPI_GENERATE_F77_BINDINGS (MPI_REGISTER_DATAREP, mpi_register_datarep, mpi_register_datarep_, @@ -65,11 +68,9 @@ OMPI_GENERATE_F77_BINDINGS (MPI_REGISTER_DATAREP, ompi_register_datarep_f, (char *datarep, ompi_mpi2_fortran_datarep_conversion_fn_t *read_conversion_fn, ompi_mpi2_fortran_datarep_conversion_fn_t *write_conversion_fn, ompi_mpi2_fortran_datarep_extent_fn_t *dtype_file_extent_fn, MPI_Aint *extra_state, MPI_Fint *ierr, int datarep_len), (datarep, read_conversion_fn, write_conversion_fn, dtype_file_extent_fn, extra_state, ierr, datarep_len) ) +#else +#define ompi_register_datarep_f pompi_register_datarep_f #endif - - -#if OMPI_PROFILE_LAYER && ! OPAL_HAVE_WEAK_SYMBOLS -#include "ompi/mpi/fortran/mpif-h/profile/defines.h" #endif static const char FUNC_NAME[] = "MPI_REGISTER_DATAREP"; @@ -77,12 +78,12 @@ static const char FUNC_NAME[] = "MPI_REGISTER_DATAREP"; /* Intercept functions used below (see below for explanations in comments) */ static int read_intercept_fn(void *userbuf, MPI_Datatype type_c, int count_c, - void *filebuf, MPI_Offset position, + void *filebuf, MPI_Offset position, void *extra_state); static int write_intercept_fn(void *userbuf, MPI_Datatype type_c, int count_c, - void *filebuf, MPI_Offset position, + void *filebuf, MPI_Offset position, void *extra_state); -static int extent_intercept_fn(MPI_Datatype type_c, MPI_Aint *file_extent, +static int extent_intercept_fn(MPI_Datatype type_c, MPI_Aint *file_extent, void *extra_state); /* Data structure passed to the intercepts (see below). It is an OPAL @@ -98,7 +99,7 @@ typedef struct intercept_extra_state { OBJ_CLASS_DECLARATION(intercept_extra_state_t); -#if !OMPI_PROFILE_LAYER || OPAL_HAVE_WEAK_SYMBOLS +#if !OMPI_BUILD_MPI_PROFILING || OPAL_HAVE_WEAK_SYMBOLS static void intercept_extra_state_constructor(intercept_extra_state_t *obj) { obj->read_fn_f77 = NULL; @@ -110,7 +111,7 @@ static void intercept_extra_state_constructor(intercept_extra_state_t *obj) OBJ_CLASS_INSTANCE(intercept_extra_state_t, opal_list_item_t, intercept_extra_state_constructor, NULL); -#endif /* !OMPI_PROFILE_LAYER */ +#endif /* !OMPI_BUILD_MPI_PROFILING */ /* * This function works by calling the C version of @@ -126,10 +127,10 @@ OBJ_CLASS_INSTANCE(intercept_extra_state_t, * arguments to Fortran and then invoke the registered callback * function. */ -void ompi_register_datarep_f(char *datarep, +void ompi_register_datarep_f(char *datarep, ompi_mpi2_fortran_datarep_conversion_fn_t *read_fn_f77, ompi_mpi2_fortran_datarep_conversion_fn_t *write_fn_f77, - ompi_mpi2_fortran_datarep_extent_fn_t *extent_fn_f77, + ompi_mpi2_fortran_datarep_extent_fn_t *extent_fn_f77, MPI_Aint *extra_state_f77, MPI_Fint *ierr, int datarep_len) { @@ -137,11 +138,11 @@ void ompi_register_datarep_f(char *datarep, int c_ierr, ret; MPI_Datarep_conversion_function *read_fn_c, *write_fn_c; intercept_extra_state_t *intercept; - + /* Malloc space for the intercept callback data */ intercept = OBJ_NEW(intercept_extra_state_t); if (NULL == intercept) { - c_ierr = OMPI_ERRHANDLER_INVOKE(MPI_FILE_NULL, + c_ierr = OMPI_ERRHANDLER_INVOKE(MPI_FILE_NULL, OMPI_ERR_OUT_OF_RESOURCE, FUNC_NAME); if (NULL != ierr) *ierr = OMPI_INT_2_FINT(c_ierr); return; @@ -161,12 +162,12 @@ void ompi_register_datarep_f(char *datarep, if (NULL != ierr) *ierr = OMPI_INT_2_FINT(c_ierr); return; } - + /* Convert the Fortran function callbacks to C equivalents. Use local intercepts if they're not MPI_CONVERSION_FN_NULL so that - we can just call the C MPI API MPI_Register_datarep(). If they + we can just call the C MPI API PMPI_Register_datarep(). If they *are* MPI_CONVERSION_FN_NULL, then just pass that to - MPI_Register_datarep so that it becomes a no-op (i.e., no + PMPI_Register_datarep so that it becomes a no-op (i.e., no callback is ever triggered). */ if (OMPI_IS_FORTRAN_CONVERSION_FN_NULL(read_fn_f77)) { /* Can't use the MPI_CONVERSION_FN_NULL macro here because it @@ -192,8 +193,8 @@ void ompi_register_datarep_f(char *datarep, /* Now that the intercept data has been setup, call the C function with the setup intercept routines and the intercept-specific data/extra state. */ - c_ierr = MPI_Register_datarep(c_datarep, - read_fn_c, write_fn_c, + c_ierr = PMPI_Register_datarep(c_datarep, + read_fn_c, write_fn_c, extent_intercept_fn, intercept); if (NULL != ierr) *ierr = OMPI_INT_2_FINT(c_ierr); @@ -204,16 +205,16 @@ void ompi_register_datarep_f(char *datarep, * C->Fortran intercept for the read conversion. */ static int read_intercept_fn(void *userbuf, MPI_Datatype type_c, int count_c, - void *filebuf, MPI_Offset position, + void *filebuf, MPI_Offset position, void *extra_state) { MPI_Fint ierr, count_f77 = OMPI_FINT_2_INT(count_c); - MPI_Fint type_f77 = MPI_Type_c2f(type_c); - intercept_extra_state_t *intercept_data = + MPI_Fint type_f77 = PMPI_Type_c2f(type_c); + intercept_extra_state_t *intercept_data = (intercept_extra_state_t*) extra_state; - intercept_data->read_fn_f77((char *) userbuf, &type_f77, &count_f77, (char *) filebuf, - &position, intercept_data->extra_state_f77, + intercept_data->read_fn_f77((char *) userbuf, &type_f77, &count_f77, (char *) filebuf, + &position, intercept_data->extra_state_f77, &ierr); return OMPI_FINT_2_INT(ierr); } @@ -222,16 +223,16 @@ static int read_intercept_fn(void *userbuf, MPI_Datatype type_c, int count_c, * C->Fortran intercept for the write conversion. */ static int write_intercept_fn(void *userbuf, MPI_Datatype type_c, int count_c, - void *filebuf, MPI_Offset position, + void *filebuf, MPI_Offset position, void *extra_state) { MPI_Fint ierr, count_f77 = OMPI_FINT_2_INT(count_c); - MPI_Fint type_f77 = MPI_Type_c2f(type_c); - intercept_extra_state_t *intercept_data = + MPI_Fint type_f77 = PMPI_Type_c2f(type_c); + intercept_extra_state_t *intercept_data = (intercept_extra_state_t*) extra_state; - intercept_data->write_fn_f77((char *) userbuf, &type_f77, &count_f77, (char *) filebuf, - &position, intercept_data->extra_state_f77, + intercept_data->write_fn_f77((char *) userbuf, &type_f77, &count_f77, (char *) filebuf, + &position, intercept_data->extra_state_f77, &ierr); return OMPI_FINT_2_INT(ierr); } @@ -239,14 +240,14 @@ static int write_intercept_fn(void *userbuf, MPI_Datatype type_c, int count_c, /* * C->Fortran intercept for the extent calculation. */ -static int extent_intercept_fn(MPI_Datatype type_c, MPI_Aint *file_extent_f77, +static int extent_intercept_fn(MPI_Datatype type_c, MPI_Aint *file_extent_f77, void *extra_state) { - MPI_Fint ierr, type_f77 = MPI_Type_c2f(type_c); - intercept_extra_state_t *intercept_data = + MPI_Fint ierr, type_f77 = PMPI_Type_c2f(type_c); + intercept_extra_state_t *intercept_data = (intercept_extra_state_t*) extra_state; - intercept_data->extent_fn_f77(&type_f77, file_extent_f77, + intercept_data->extent_fn_f77(&type_f77, file_extent_f77, intercept_data->extra_state_f77, &ierr); return OMPI_FINT_2_INT(ierr); } diff --git a/ompi/mpi/fortran/mpif-h/request_free_f.c b/ompi/mpi/fortran/mpif-h/request_free_f.c index 696afefedd7..6ab32d4b56c 100644 --- a/ompi/mpi/fortran/mpif-h/request_free_f.c +++ b/ompi/mpi/fortran/mpif-h/request_free_f.c @@ -5,15 +5,17 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2011-2012 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -21,7 +23,8 @@ #include "ompi/mpi/fortran/mpif-h/bindings.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILE_LAYER +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak PMPI_REQUEST_FREE = ompi_request_free_f #pragma weak pmpi_request_free = ompi_request_free_f #pragma weak pmpi_request_free_ = ompi_request_free_f @@ -29,7 +32,7 @@ #pragma weak PMPI_Request_free_f = ompi_request_free_f #pragma weak PMPI_Request_free_f08 = ompi_request_free_f -#elif OMPI_PROFILE_LAYER +#else OMPI_GENERATE_F77_BINDINGS (PMPI_REQUEST_FREE, pmpi_request_free, pmpi_request_free_, @@ -38,6 +41,7 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_REQUEST_FREE, (MPI_Fint *request, MPI_Fint *ierr), (request, ierr) ) #endif +#endif #if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_REQUEST_FREE = ompi_request_free_f @@ -47,9 +51,8 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_REQUEST_FREE, #pragma weak MPI_Request_free_f = ompi_request_free_f #pragma weak MPI_Request_free_f08 = ompi_request_free_f -#endif - -#if ! OPAL_HAVE_WEAK_SYMBOLS && ! OMPI_PROFILE_LAYER +#else +#if ! OMPI_BUILD_MPI_PROFILING OMPI_GENERATE_F77_BINDINGS (MPI_REQUEST_FREE, mpi_request_free, mpi_request_free_, @@ -57,19 +60,18 @@ OMPI_GENERATE_F77_BINDINGS (MPI_REQUEST_FREE, ompi_request_free_f, (MPI_Fint *request, MPI_Fint *ierr), (request, ierr) ) +#else +#define ompi_request_free_f pompi_request_free_f #endif - - -#if OMPI_PROFILE_LAYER && ! OPAL_HAVE_WEAK_SYMBOLS -#include "ompi/mpi/fortran/mpif-h/profile/defines.h" #endif + void ompi_request_free_f(MPI_Fint *request, MPI_Fint *ierr) { int c_ierr; - MPI_Request c_req = MPI_Request_f2c( *request ); - c_ierr = MPI_Request_free(&c_req); + MPI_Request c_req = PMPI_Request_f2c( *request ); + c_ierr = PMPI_Request_free(&c_req); if (NULL != ierr) *ierr = OMPI_INT_2_FINT(c_ierr); if (MPI_SUCCESS == c_ierr) { diff --git a/ompi/mpi/fortran/mpif-h/request_get_status_f.c b/ompi/mpi/fortran/mpif-h/request_get_status_f.c index fb450495cea..7a5c9d57716 100644 --- a/ompi/mpi/fortran/mpif-h/request_get_status_f.c +++ b/ompi/mpi/fortran/mpif-h/request_get_status_f.c @@ -5,15 +5,17 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2011-2012 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -22,7 +24,8 @@ #include "ompi/mpi/fortran/mpif-h/bindings.h" #include "ompi/mpi/fortran/base/constants.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILE_LAYER +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak PMPI_REQUEST_GET_STATUS = ompi_request_get_status_f #pragma weak pmpi_request_get_status = ompi_request_get_status_f #pragma weak pmpi_request_get_status_ = ompi_request_get_status_f @@ -30,7 +33,7 @@ #pragma weak PMPI_Request_get_status_f = ompi_request_get_status_f #pragma weak PMPI_Request_get_status_f08 = ompi_request_get_status_f -#elif OMPI_PROFILE_LAYER +#else OMPI_GENERATE_F77_BINDINGS (PMPI_REQUEST_GET_STATUS, pmpi_request_get_status, pmpi_request_get_status_, @@ -39,6 +42,7 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_REQUEST_GET_STATUS, (MPI_Fint *request, ompi_fortran_logical_t *flag, MPI_Fint *status, MPI_Fint *ierr), (request, flag, status, ierr) ) #endif +#endif #if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_REQUEST_GET_STATUS = ompi_request_get_status_f @@ -48,9 +52,8 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_REQUEST_GET_STATUS, #pragma weak MPI_Request_get_status_f = ompi_request_get_status_f #pragma weak MPI_Request_get_status_f08 = ompi_request_get_status_f -#endif - -#if ! OPAL_HAVE_WEAK_SYMBOLS && ! OMPI_PROFILE_LAYER +#else +#if ! OMPI_BUILD_MPI_PROFILING OMPI_GENERATE_F77_BINDINGS (MPI_REQUEST_GET_STATUS, mpi_request_get_status, mpi_request_get_status_, @@ -58,19 +61,18 @@ OMPI_GENERATE_F77_BINDINGS (MPI_REQUEST_GET_STATUS, ompi_request_get_status_f, (MPI_Fint *request, ompi_fortran_logical_t *flag, MPI_Fint *status, MPI_Fint *ierr), (request, flag, status, ierr) ) +#else +#define ompi_request_get_status_f pompi_request_get_status_f #endif - - -#if OMPI_PROFILE_LAYER && ! OPAL_HAVE_WEAK_SYMBOLS -#include "ompi/mpi/fortran/mpif-h/profile/defines.h" #endif + void ompi_request_get_status_f(MPI_Fint *request, ompi_fortran_logical_t *flag, MPI_Fint *status, MPI_Fint *ierr) { int c_ierr; MPI_Status c_status; - MPI_Request c_req = MPI_Request_f2c( *request ); + MPI_Request c_req = PMPI_Request_f2c( *request ); OMPI_LOGICAL_NAME_DECL(flag); /* This seems silly, but someone will do it */ @@ -79,11 +81,11 @@ void ompi_request_get_status_f(MPI_Fint *request, ompi_fortran_logical_t *flag, *flag = OMPI_INT_2_LOGICAL(0); c_ierr = MPI_SUCCESS; } else { - c_ierr = MPI_Request_get_status(c_req, + c_ierr = PMPI_Request_get_status(c_req, OMPI_LOGICAL_SINGLE_NAME_CONVERT(flag), &c_status); OMPI_SINGLE_INT_2_LOGICAL(flag); - MPI_Status_c2f( &c_status, status ); + PMPI_Status_c2f( &c_status, status ); } if (NULL != ierr) *ierr = OMPI_INT_2_FINT(c_ierr); } diff --git a/ompi/mpi/fortran/mpif-h/rget_accumulate_f.c b/ompi/mpi/fortran/mpif-h/rget_accumulate_f.c index 53e0cb7ebf6..57fa6470227 100644 --- a/ompi/mpi/fortran/mpif-h/rget_accumulate_f.c +++ b/ompi/mpi/fortran/mpif-h/rget_accumulate_f.c @@ -11,8 +11,11 @@ * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2011-2012 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2014 Los Alamos National Security, LLC. All rights + * Copyright (c) 2014-2016 Los Alamos National Security, LLC. All rights * reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. + * Copyright (c) 2015 FUJITSU LIMITED. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -26,15 +29,16 @@ #include "ompi/mpi/fortran/base/constants.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILE_LAYER +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak PMPI_RGET_ACCUMULATE = ompi_rget_accumulate_f #pragma weak pmpi_rget_accumulate = ompi_rget_accumulate_f #pragma weak pmpi_rget_accumulate_ = ompi_rget_accumulate_f #pragma weak pmpi_rget_accumulate__ = ompi_rget_accumulate_f -#pragma weak PMPI_Get_accumulate_f = ompi_rget_accumulate_f -#pragma weak PMPI_Get_accumulate_f08 = ompi_rget_accumulate_f -#elif OMPI_PROFILE_LAYER +#pragma weak PMPI_Rget_accumulate_f = ompi_rget_accumulate_f +#pragma weak PMPI_Rget_accumulate_f08 = ompi_rget_accumulate_f +#else OMPI_GENERATE_F77_BINDINGS (PMPI_RGET_ACCUMULATE, pmpi_rget_accumulate, pmpi_rget_accumulate_, @@ -43,6 +47,7 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_RGET_ACCUMULATE, (char *origin_addr, MPI_Fint *origin_count, MPI_Fint *origin_datatype, char *result_addr, MPI_Fint *result_count, MPI_Fint *result_datatype, MPI_Fint *target_rank, MPI_Aint *target_disp, MPI_Fint *target_count, MPI_Fint *target_datatype, MPI_Fint *op, MPI_Fint *win, MPI_Fint *request, MPI_Fint *ierr), (origin_addr, origin_count, origin_datatype, result_addr, result_count, result_datatype, target_rank, target_disp, target_count, target_datatype, op, win, request, ierr) ) #endif +#endif #if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_RGET_ACCUMULATE = ompi_rget_accumulate_f @@ -50,11 +55,10 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_RGET_ACCUMULATE, #pragma weak mpi_rget_accumulate_ = ompi_rget_accumulate_f #pragma weak mpi_rget_accumulate__ = ompi_rget_accumulate_f -#pragma weak MPI_Get_accumulate_f = ompi_rget_accumulate_f -#pragma weak MPI_Get_accumulate_f08 = ompi_rget_accumulate_f -#endif - -#if ! OPAL_HAVE_WEAK_SYMBOLS && ! OMPI_PROFILE_LAYER +#pragma weak MPI_Rget_accumulate_f = ompi_rget_accumulate_f +#pragma weak MPI_Rget_accumulate_f08 = ompi_rget_accumulate_f +#else +#if ! OMPI_BUILD_MPI_PROFILING OMPI_GENERATE_F77_BINDINGS (MPI_RGET_ACCUMULATE, mpi_rget_accumulate, mpi_rget_accumulate_, @@ -62,13 +66,12 @@ OMPI_GENERATE_F77_BINDINGS (MPI_RGET_ACCUMULATE, ompi_rget_accumulate_f, (char *origin_addr, MPI_Fint *origin_count, MPI_Fint *origin_datatype, char *result_addr, MPI_Fint *result_count, MPI_Fint *result_datatype, MPI_Fint *target_rank, MPI_Aint *target_disp, MPI_Fint *target_count, MPI_Fint *target_datatype, MPI_Fint *op, MPI_Fint *win, MPI_Fint *request, MPI_Fint *ierr), (origin_addr, origin_count, origin_datatype, result_addr, result_count, result_datatype, target_rank, target_disp, target_count, target_datatype, op, win, request, ierr) ) +#else +#define ompi_rget_accumulate_f pompi_rget_accumulate_f #endif - - -#if OMPI_PROFILE_LAYER && ! OPAL_HAVE_WEAK_SYMBOLS -#include "ompi/mpi/fortran/mpif-h/profile/defines.h" #endif + void ompi_rget_accumulate_f(char *origin_addr, MPI_Fint *origin_count, MPI_Fint *origin_datatype, char *result_addr, MPI_Fint *result_count, MPI_Fint *result_datatype, @@ -78,14 +81,14 @@ void ompi_rget_accumulate_f(char *origin_addr, MPI_Fint *origin_count, MPI_Fint *ierr) { int c_ierr; - MPI_Datatype c_origin_datatype = MPI_Type_f2c(*origin_datatype); - MPI_Datatype c_result_datatype = MPI_Type_f2c(*result_datatype); - MPI_Datatype c_target_datatype = MPI_Type_f2c(*target_datatype); - MPI_Win c_win = MPI_Win_f2c(*win); - MPI_Op c_op = MPI_Op_f2c(*op); + MPI_Datatype c_origin_datatype = PMPI_Type_f2c(*origin_datatype); + MPI_Datatype c_result_datatype = PMPI_Type_f2c(*result_datatype); + MPI_Datatype c_target_datatype = PMPI_Type_f2c(*target_datatype); + MPI_Win c_win = PMPI_Win_f2c(*win); + MPI_Op c_op = PMPI_Op_f2c(*op); MPI_Request c_req; - c_ierr = MPI_Rget_accumulate(OMPI_F2C_BOTTOM(origin_addr), + c_ierr = PMPI_Rget_accumulate(OMPI_F2C_BOTTOM(origin_addr), OMPI_FINT_2_INT(*origin_count), c_origin_datatype, OMPI_F2C_BOTTOM(result_addr), @@ -97,7 +100,7 @@ void ompi_rget_accumulate_f(char *origin_addr, MPI_Fint *origin_count, c_target_datatype, c_op, c_win, &c_req); if (NULL != ierr) *ierr = OMPI_INT_2_FINT(c_ierr); - if (MPI_SUCCESS != c_ierr) { - *request = MPI_Request_c2f(c_req); + if (MPI_SUCCESS == c_ierr) { + *request = PMPI_Request_c2f(c_req); } } diff --git a/ompi/mpi/fortran/mpif-h/rget_f.c b/ompi/mpi/fortran/mpif-h/rget_f.c index 833986117ad..a6c77d36f2d 100644 --- a/ompi/mpi/fortran/mpif-h/rget_f.c +++ b/ompi/mpi/fortran/mpif-h/rget_f.c @@ -11,8 +11,10 @@ * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2011-2012 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2014 Los Alamos National Security, LLC. All rights + * Copyright (c) 2014-2016 Los Alamos National Security, LLC. All rights * reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -26,7 +28,8 @@ #include "ompi/mpi/fortran/base/constants.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILE_LAYER +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak PMPI_RGET = ompi_rget_f #pragma weak pmpi_rget = ompi_rget_f #pragma weak pmpi_rget_ = ompi_rget_f @@ -34,7 +37,7 @@ #pragma weak PMPI_Rget_f = ompi_rget_f #pragma weak PMPI_Rget_f08 = ompi_rget_f -#elif OMPI_PROFILE_LAYER +#else OMPI_GENERATE_F77_BINDINGS (PMPI_RGET, pmpi_rget, pmpi_rget_, @@ -43,6 +46,7 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_RGET, (char *origin_addr, MPI_Fint *origin_count, MPI_Fint *origin_datatype, MPI_Fint *target_rank, MPI_Aint *target_disp, MPI_Fint *target_count, MPI_Fint *target_datatype, MPI_Fint *win, MPI_Fint *request, MPI_Fint *ierr), (origin_addr, origin_count, origin_datatype, target_rank, target_disp, target_count, target_datatype, win, request, ierr) ) #endif +#endif #if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_RGET = ompi_rget_f @@ -52,9 +56,8 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_RGET, #pragma weak MPI_Rget_f = ompi_rget_f #pragma weak MPI_Rget_f08 = ompi_rget_f -#endif - -#if ! OPAL_HAVE_WEAK_SYMBOLS && ! OMPI_PROFILE_LAYER +#else +#if ! OMPI_BUILD_MPI_PROFILING OMPI_GENERATE_F77_BINDINGS (MPI_RGET, mpi_rget, mpi_rget_, @@ -62,13 +65,12 @@ OMPI_GENERATE_F77_BINDINGS (MPI_RGET, ompi_rget_f, (char *origin_addr, MPI_Fint *origin_count, MPI_Fint *origin_datatype, MPI_Fint *target_rank, MPI_Aint *target_disp, MPI_Fint *target_count, MPI_Fint *target_datatype, MPI_Fint *win, MPI_Fint *request, MPI_Fint *ierr), (origin_addr, origin_count, origin_datatype, target_rank, target_disp, target_count, target_datatype, win, request, ierr) ) +#else +#define ompi_rget_f pompi_rget_f #endif - - -#if OMPI_PROFILE_LAYER && ! OPAL_HAVE_WEAK_SYMBOLS -#include "ompi/mpi/fortran/mpif-h/profile/defines.h" #endif + void ompi_rget_f(char *origin_addr, MPI_Fint *origin_count, MPI_Fint *origin_datatype, MPI_Fint *target_rank, MPI_Aint *target_disp, MPI_Fint *target_count, @@ -76,12 +78,12 @@ void ompi_rget_f(char *origin_addr, MPI_Fint *origin_count, MPI_Fint *ierr) { int c_ierr; - MPI_Datatype c_origin_datatype = MPI_Type_f2c(*origin_datatype); - MPI_Datatype c_target_datatype = MPI_Type_f2c(*target_datatype); - MPI_Win c_win = MPI_Win_f2c(*win); + MPI_Datatype c_origin_datatype = PMPI_Type_f2c(*origin_datatype); + MPI_Datatype c_target_datatype = PMPI_Type_f2c(*target_datatype); + MPI_Win c_win = PMPI_Win_f2c(*win); MPI_Request c_req; - c_ierr = MPI_Rget(OMPI_F2C_BOTTOM(origin_addr), + c_ierr = PMPI_Rget(OMPI_F2C_BOTTOM(origin_addr), OMPI_FINT_2_INT(*origin_count), c_origin_datatype, OMPI_FINT_2_INT(*target_rank), @@ -90,7 +92,7 @@ void ompi_rget_f(char *origin_addr, MPI_Fint *origin_count, c_target_datatype, c_win, &c_req); if (NULL != ierr) *ierr = OMPI_INT_2_FINT(c_ierr); - if (MPI_SUCCESS != c_ierr) { - *request = MPI_Request_c2f(c_req); + if (MPI_SUCCESS == c_ierr) { + *request = PMPI_Request_c2f(c_req); } } diff --git a/ompi/mpi/fortran/mpif-h/rput_f.c b/ompi/mpi/fortran/mpif-h/rput_f.c index 395486c15ba..03125bcc894 100644 --- a/ompi/mpi/fortran/mpif-h/rput_f.c +++ b/ompi/mpi/fortran/mpif-h/rput_f.c @@ -11,8 +11,10 @@ * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2011-2012 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2014 Los Alamos National Security, LLC. All rights + * Copyright (c) 2014-2016 Los Alamos National Security, LLC. All rights * reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -26,7 +28,8 @@ #include "ompi/mpi/fortran/base/constants.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILE_LAYER +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak PMPI_RPUT = ompi_rput_f #pragma weak pmpi_rput = ompi_rput_f #pragma weak pmpi_rput_ = ompi_rput_f @@ -34,7 +37,7 @@ #pragma weak PMPI_Rput_f = ompi_rput_f #pragma weak PMPI_Rput_f08 = ompi_rput_f -#elif OMPI_PROFILE_LAYER +#else OMPI_GENERATE_F77_BINDINGS (PMPI_RPUT, pmpi_rput, pmpi_rput_, @@ -43,6 +46,7 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_RPUT, (char *origin_addr, MPI_Fint *origin_count, MPI_Fint *origin_datatype, MPI_Fint *target_rank, MPI_Aint *target_disp, MPI_Fint *target_count, MPI_Fint *target_datatype, MPI_Fint *win, MPI_Fint *request, MPI_Fint *ierr), (origin_addr, origin_count, origin_datatype, target_rank, target_disp, target_count, target_datatype, win, request, ierr) ) #endif +#endif #if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_RPUT = ompi_rput_f @@ -52,9 +56,8 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_RPUT, #pragma weak MPI_Rput_f = ompi_rput_f #pragma weak MPI_Rput_f08 = ompi_rput_f -#endif - -#if ! OPAL_HAVE_WEAK_SYMBOLS && ! OMPI_PROFILE_LAYER +#else +#if ! OMPI_BUILD_MPI_PROFILING OMPI_GENERATE_F77_BINDINGS (MPI_RPUT, mpi_rput, mpi_rput_, @@ -62,13 +65,12 @@ OMPI_GENERATE_F77_BINDINGS (MPI_RPUT, ompi_rput_f, (char *origin_addr, MPI_Fint *origin_count, MPI_Fint *origin_datatype, MPI_Fint *target_rank, MPI_Aint *target_disp, MPI_Fint *target_count, MPI_Fint *target_datatype, MPI_Fint *win, MPI_Fint *request, MPI_Fint *ierr), (origin_addr, origin_count, origin_datatype, target_rank, target_disp, target_count, target_datatype, win, request, ierr) ) +#else +#define ompi_rput_f pompi_rput_f #endif - - -#if OMPI_PROFILE_LAYER && ! OPAL_HAVE_WEAK_SYMBOLS -#include "ompi/mpi/fortran/mpif-h/profile/defines.h" #endif + void ompi_rput_f(char *origin_addr, MPI_Fint *origin_count, MPI_Fint *origin_datatype, MPI_Fint *target_rank, MPI_Aint *target_disp, MPI_Fint *target_count, @@ -76,12 +78,12 @@ void ompi_rput_f(char *origin_addr, MPI_Fint *origin_count, MPI_Fint *ierr) { int c_ierr; - MPI_Datatype c_origin_datatype = MPI_Type_f2c(*origin_datatype); - MPI_Datatype c_target_datatype = MPI_Type_f2c(*target_datatype); - MPI_Win c_win = MPI_Win_f2c(*win); + MPI_Datatype c_origin_datatype = PMPI_Type_f2c(*origin_datatype); + MPI_Datatype c_target_datatype = PMPI_Type_f2c(*target_datatype); + MPI_Win c_win = PMPI_Win_f2c(*win); MPI_Request c_req; - c_ierr = MPI_Rput(OMPI_F2C_BOTTOM(origin_addr), + c_ierr = PMPI_Rput(OMPI_F2C_BOTTOM(origin_addr), OMPI_FINT_2_INT(*origin_count), c_origin_datatype, OMPI_FINT_2_INT(*target_rank), @@ -90,7 +92,7 @@ void ompi_rput_f(char *origin_addr, MPI_Fint *origin_count, c_target_datatype, c_win, &c_req); if (NULL != ierr) *ierr = OMPI_INT_2_FINT(c_ierr); - if (MPI_SUCCESS != c_ierr) { - *request = MPI_Request_c2f(c_req); + if (MPI_SUCCESS == c_ierr) { + *request = PMPI_Request_c2f(c_req); } } diff --git a/ompi/mpi/fortran/mpif-h/rsend_f.c b/ompi/mpi/fortran/mpif-h/rsend_f.c index 18b8e4272a5..eed76ee3658 100644 --- a/ompi/mpi/fortran/mpif-h/rsend_f.c +++ b/ompi/mpi/fortran/mpif-h/rsend_f.c @@ -5,15 +5,17 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2011-2012 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -22,7 +24,8 @@ #include "ompi/mpi/fortran/mpif-h/bindings.h" #include "ompi/mpi/fortran/base/constants.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILE_LAYER +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak PMPI_RSEND = ompi_rsend_f #pragma weak pmpi_rsend = ompi_rsend_f #pragma weak pmpi_rsend_ = ompi_rsend_f @@ -30,7 +33,7 @@ #pragma weak PMPI_Rsend_f = ompi_rsend_f #pragma weak PMPI_Rsend_f08 = ompi_rsend_f -#elif OMPI_PROFILE_LAYER +#else OMPI_GENERATE_F77_BINDINGS (PMPI_RSEND, pmpi_rsend, pmpi_rsend_, @@ -39,6 +42,7 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_RSEND, (char *ibuf, MPI_Fint *count, MPI_Fint *datatype, MPI_Fint *dest, MPI_Fint *tag, MPI_Fint *comm, MPI_Fint *ierr), (ibuf, count, datatype, dest, tag, comm, ierr) ) #endif +#endif #if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_RSEND = ompi_rsend_f @@ -48,9 +52,8 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_RSEND, #pragma weak MPI_Rsend_f = ompi_rsend_f #pragma weak MPI_Rsend_f08 = ompi_rsend_f -#endif - -#if ! OPAL_HAVE_WEAK_SYMBOLS && ! OMPI_PROFILE_LAYER +#else +#if ! OMPI_BUILD_MPI_PROFILING OMPI_GENERATE_F77_BINDINGS (MPI_RSEND, mpi_rsend, mpi_rsend_, @@ -58,23 +61,22 @@ OMPI_GENERATE_F77_BINDINGS (MPI_RSEND, ompi_rsend_f, (char *ibuf, MPI_Fint *count, MPI_Fint *datatype, MPI_Fint *dest, MPI_Fint *tag, MPI_Fint *comm, MPI_Fint *ierr), (ibuf, count, datatype, dest, tag, comm, ierr) ) +#else +#define ompi_rsend_f pompi_rsend_f #endif - - -#if OMPI_PROFILE_LAYER && ! OPAL_HAVE_WEAK_SYMBOLS -#include "ompi/mpi/fortran/mpif-h/profile/defines.h" #endif + void ompi_rsend_f(char *ibuf, MPI_Fint *count, MPI_Fint *datatype, MPI_Fint *dest, MPI_Fint *tag, MPI_Fint *comm, MPI_Fint *ierr) { int c_ierr; - MPI_Datatype c_type = MPI_Type_f2c(*datatype); + MPI_Datatype c_type = PMPI_Type_f2c(*datatype); MPI_Comm c_comm; - c_comm = MPI_Comm_f2c (*comm); + c_comm = PMPI_Comm_f2c (*comm); - c_ierr = MPI_Rsend(OMPI_F2C_BOTTOM(ibuf), OMPI_FINT_2_INT(*count), + c_ierr = PMPI_Rsend(OMPI_F2C_BOTTOM(ibuf), OMPI_FINT_2_INT(*count), c_type, OMPI_FINT_2_INT(*dest), OMPI_FINT_2_INT(*tag), c_comm); if (NULL != ierr) *ierr = OMPI_INT_2_FINT(c_ierr); diff --git a/ompi/mpi/fortran/mpif-h/rsend_init_f.c b/ompi/mpi/fortran/mpif-h/rsend_init_f.c index 00a44d393c8..d0e9e2bd0fe 100644 --- a/ompi/mpi/fortran/mpif-h/rsend_init_f.c +++ b/ompi/mpi/fortran/mpif-h/rsend_init_f.c @@ -5,15 +5,17 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2011-2012 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -22,7 +24,8 @@ #include "ompi/mpi/fortran/mpif-h/bindings.h" #include "ompi/mpi/fortran/base/constants.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILE_LAYER +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak PMPI_RSEND_INIT = ompi_rsend_init_f #pragma weak pmpi_rsend_init = ompi_rsend_init_f #pragma weak pmpi_rsend_init_ = ompi_rsend_init_f @@ -30,7 +33,7 @@ #pragma weak PMPI_Rsend_init_f = ompi_rsend_init_f #pragma weak PMPI_Rsend_init_f08 = ompi_rsend_init_f -#elif OMPI_PROFILE_LAYER +#else OMPI_GENERATE_F77_BINDINGS (PMPI_RSEND_INIT, pmpi_rsend_init, pmpi_rsend_init_, @@ -39,6 +42,7 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_RSEND_INIT, (char *buf, MPI_Fint *count, MPI_Fint *datatype, MPI_Fint *dest, MPI_Fint *tag, MPI_Fint *comm, MPI_Fint *request, MPI_Fint *ierr), (buf, count, datatype, dest, tag, comm, request, ierr) ) #endif +#endif #if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_RSEND_INIT = ompi_rsend_init_f @@ -48,9 +52,8 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_RSEND_INIT, #pragma weak MPI_Rsend_init_f = ompi_rsend_init_f #pragma weak MPI_Rsend_init_f08 = ompi_rsend_init_f -#endif - -#if ! OPAL_HAVE_WEAK_SYMBOLS && ! OMPI_PROFILE_LAYER +#else +#if ! OMPI_BUILD_MPI_PROFILING OMPI_GENERATE_F77_BINDINGS (MPI_RSEND_INIT, mpi_rsend_init, mpi_rsend_init_, @@ -58,32 +61,31 @@ OMPI_GENERATE_F77_BINDINGS (MPI_RSEND_INIT, ompi_rsend_init_f, (char *buf, MPI_Fint *count, MPI_Fint *datatype, MPI_Fint *dest, MPI_Fint *tag, MPI_Fint *comm, MPI_Fint *request, MPI_Fint *ierr), (buf, count, datatype, dest, tag, comm, request, ierr) ) +#else +#define ompi_rsend_init_f pompi_rsend_init_f #endif - - -#if OMPI_PROFILE_LAYER && ! OPAL_HAVE_WEAK_SYMBOLS -#include "ompi/mpi/fortran/mpif-h/profile/defines.h" #endif -void ompi_rsend_init_f(char *buf, MPI_Fint *count, + +void ompi_rsend_init_f(char *buf, MPI_Fint *count, MPI_Fint *datatype, MPI_Fint *dest, MPI_Fint *tag, MPI_Fint *comm, MPI_Fint *request, MPI_Fint *ierr) { int c_ierr; - MPI_Datatype c_type = MPI_Type_f2c(*datatype); + MPI_Datatype c_type = PMPI_Type_f2c(*datatype); MPI_Request c_req; MPI_Comm c_comm; - c_comm = MPI_Comm_f2c (*comm); + c_comm = PMPI_Comm_f2c (*comm); - c_ierr = MPI_Rsend_init(OMPI_F2C_BOTTOM(buf), OMPI_FINT_2_INT(*count), + c_ierr = PMPI_Rsend_init(OMPI_F2C_BOTTOM(buf), OMPI_FINT_2_INT(*count), c_type, OMPI_FINT_2_INT(*dest), OMPI_FINT_2_INT(*tag), c_comm, &c_req); if (NULL != ierr) *ierr = OMPI_INT_2_FINT(c_ierr); if (MPI_SUCCESS == c_ierr) { - *request = MPI_Request_c2f(c_req); + *request = PMPI_Request_c2f(c_req); } } diff --git a/ompi/mpi/fortran/mpif-h/scan_f.c b/ompi/mpi/fortran/mpif-h/scan_f.c index d87d6d74427..cff6012ca4a 100644 --- a/ompi/mpi/fortran/mpif-h/scan_f.c +++ b/ompi/mpi/fortran/mpif-h/scan_f.c @@ -5,15 +5,17 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2011-2012 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -22,7 +24,8 @@ #include "ompi/mpi/fortran/mpif-h/bindings.h" #include "ompi/mpi/fortran/base/constants.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILE_LAYER +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak PMPI_SCAN = ompi_scan_f #pragma weak pmpi_scan = ompi_scan_f #pragma weak pmpi_scan_ = ompi_scan_f @@ -30,7 +33,7 @@ #pragma weak PMPI_Scan_f = ompi_scan_f #pragma weak PMPI_Scan_f08 = ompi_scan_f -#elif OMPI_PROFILE_LAYER +#else OMPI_GENERATE_F77_BINDINGS (PMPI_SCAN, pmpi_scan, pmpi_scan_, @@ -39,6 +42,7 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_SCAN, (char *sendbuf, char *recvbuf, MPI_Fint *count, MPI_Fint *datatype, MPI_Fint *op, MPI_Fint *comm, MPI_Fint *ierr), (sendbuf, recvbuf, count, datatype, op, comm, ierr) ) #endif +#endif #if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_SCAN = ompi_scan_f @@ -48,9 +52,8 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_SCAN, #pragma weak MPI_Scan_f = ompi_scan_f #pragma weak MPI_Scan_f08 = ompi_scan_f -#endif - -#if ! OPAL_HAVE_WEAK_SYMBOLS && ! OMPI_PROFILE_LAYER +#else +#if ! OMPI_BUILD_MPI_PROFILING OMPI_GENERATE_F77_BINDINGS (MPI_SCAN, mpi_scan, mpi_scan_, @@ -58,13 +61,12 @@ OMPI_GENERATE_F77_BINDINGS (MPI_SCAN, ompi_scan_f, (char *sendbuf, char *recvbuf, MPI_Fint *count, MPI_Fint *datatype, MPI_Fint *op, MPI_Fint *comm, MPI_Fint *ierr), (sendbuf, recvbuf, count, datatype, op, comm, ierr) ) +#else +#define ompi_scan_f pompi_scan_f #endif - - -#if OMPI_PROFILE_LAYER && ! OPAL_HAVE_WEAK_SYMBOLS -#include "ompi/mpi/fortran/mpif-h/profile/defines.h" #endif + void ompi_scan_f(char *sendbuf, char *recvbuf, MPI_Fint *count, MPI_Fint *datatype, MPI_Fint *op, MPI_Fint *comm, MPI_Fint *ierr) @@ -74,17 +76,17 @@ void ompi_scan_f(char *sendbuf, char *recvbuf, MPI_Fint *count, MPI_Datatype c_type; MPI_Op c_op; - c_type = MPI_Type_f2c(*datatype); - c_op = MPI_Op_f2c(*op); - c_comm = MPI_Comm_f2c(*comm); + c_type = PMPI_Type_f2c(*datatype); + c_op = PMPI_Op_f2c(*op); + c_comm = PMPI_Comm_f2c(*comm); sendbuf = (char *) OMPI_F2C_IN_PLACE(sendbuf); sendbuf = (char *) OMPI_F2C_BOTTOM(sendbuf); recvbuf = (char *) OMPI_F2C_BOTTOM(recvbuf); - c_ierr = MPI_Scan(sendbuf, recvbuf, + c_ierr = PMPI_Scan(sendbuf, recvbuf, OMPI_FINT_2_INT(*count), - c_type, c_op, + c_type, c_op, c_comm); if (NULL != ierr) *ierr = OMPI_INT_2_FINT(c_ierr); } diff --git a/ompi/mpi/fortran/mpif-h/scatter_f.c b/ompi/mpi/fortran/mpif-h/scatter_f.c index a256b754bf2..667a7064dd5 100644 --- a/ompi/mpi/fortran/mpif-h/scatter_f.c +++ b/ompi/mpi/fortran/mpif-h/scatter_f.c @@ -5,15 +5,17 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2011-2013 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -22,7 +24,8 @@ #include "ompi/mpi/fortran/mpif-h/bindings.h" #include "ompi/mpi/fortran/base/constants.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILE_LAYER +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak PMPI_SCATTER = ompi_scatter_f #pragma weak pmpi_scatter = ompi_scatter_f #pragma weak pmpi_scatter_ = ompi_scatter_f @@ -30,7 +33,7 @@ #pragma weak PMPI_Scatter_f = ompi_scatter_f #pragma weak PMPI_Scatter_f08 = ompi_scatter_f -#elif OMPI_PROFILE_LAYER +#else OMPI_GENERATE_F77_BINDINGS (PMPI_SCATTER, pmpi_scatter, pmpi_scatter_, @@ -39,6 +42,7 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_SCATTER, (char *sendbuf, MPI_Fint *sendcount, MPI_Fint *sendtype, char *recvbuf, MPI_Fint *recvcount, MPI_Fint *recvtype, MPI_Fint *root, MPI_Fint *comm, MPI_Fint *ierr), (sendbuf, sendcount, sendtype, recvbuf, recvcount, recvtype, root, comm, ierr) ) #endif +#endif #if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_SCATTER = ompi_scatter_f @@ -48,9 +52,8 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_SCATTER, #pragma weak MPI_Scatter_f = ompi_scatter_f #pragma weak MPI_Scatter_f08 = ompi_scatter_f -#endif - -#if ! OPAL_HAVE_WEAK_SYMBOLS && ! OMPI_PROFILE_LAYER +#else +#if ! OMPI_BUILD_MPI_PROFILING OMPI_GENERATE_F77_BINDINGS (MPI_SCATTER, mpi_scatter, mpi_scatter_, @@ -58,33 +61,32 @@ OMPI_GENERATE_F77_BINDINGS (MPI_SCATTER, ompi_scatter_f, (char *sendbuf, MPI_Fint *sendcount, MPI_Fint *sendtype, char *recvbuf, MPI_Fint *recvcount, MPI_Fint *recvtype, MPI_Fint *root, MPI_Fint *comm, MPI_Fint *ierr), (sendbuf, sendcount, sendtype, recvbuf, recvcount, recvtype, root, comm, ierr) ) +#else +#define ompi_scatter_f pompi_scatter_f #endif - - -#if OMPI_PROFILE_LAYER && ! OPAL_HAVE_WEAK_SYMBOLS -#include "ompi/mpi/fortran/mpif-h/profile/defines.h" #endif -void ompi_scatter_f(char *sendbuf, MPI_Fint *sendcount, + +void ompi_scatter_f(char *sendbuf, MPI_Fint *sendcount, MPI_Fint *sendtype, char *recvbuf, MPI_Fint *recvcount, MPI_Fint *recvtype, MPI_Fint *root, MPI_Fint *comm, MPI_Fint *ierr) { int c_ierr; MPI_Datatype c_sendtype, c_recvtype; - MPI_Comm c_comm = MPI_Comm_f2c(*comm); - - c_sendtype = MPI_Type_f2c(*sendtype); - c_recvtype = MPI_Type_f2c(*recvtype); + MPI_Comm c_comm = PMPI_Comm_f2c(*comm); + + c_sendtype = PMPI_Type_f2c(*sendtype); + c_recvtype = PMPI_Type_f2c(*recvtype); sendbuf = (char *) OMPI_F2C_BOTTOM(sendbuf); recvbuf = (char *) OMPI_F2C_IN_PLACE(recvbuf); recvbuf = (char *) OMPI_F2C_BOTTOM(recvbuf); - c_ierr = MPI_Scatter(sendbuf,OMPI_FINT_2_INT(*sendcount), - c_sendtype, recvbuf, + c_ierr = PMPI_Scatter(sendbuf,OMPI_FINT_2_INT(*sendcount), + c_sendtype, recvbuf, OMPI_FINT_2_INT(*recvcount), - c_recvtype, + c_recvtype, OMPI_FINT_2_INT(*root), c_comm); if (NULL != ierr) *ierr = OMPI_INT_2_FINT(c_ierr); } diff --git a/ompi/mpi/fortran/mpif-h/scatterv_f.c b/ompi/mpi/fortran/mpif-h/scatterv_f.c index 28b0532f3fc..744a05fe02c 100644 --- a/ompi/mpi/fortran/mpif-h/scatterv_f.c +++ b/ompi/mpi/fortran/mpif-h/scatterv_f.c @@ -5,15 +5,17 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2011-2013 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -22,7 +24,8 @@ #include "ompi/mpi/fortran/mpif-h/bindings.h" #include "ompi/mpi/fortran/base/constants.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILE_LAYER +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak PMPI_SCATTERV = ompi_scatterv_f #pragma weak pmpi_scatterv = ompi_scatterv_f #pragma weak pmpi_scatterv_ = ompi_scatterv_f @@ -30,7 +33,7 @@ #pragma weak PMPI_Scatterv_f = ompi_scatterv_f #pragma weak PMPI_Scatterv_f08 = ompi_scatterv_f -#elif OMPI_PROFILE_LAYER +#else OMPI_GENERATE_F77_BINDINGS (PMPI_SCATTERV, pmpi_scatterv, pmpi_scatterv_, @@ -39,6 +42,7 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_SCATTERV, (char *sendbuf, MPI_Fint *sendcounts, MPI_Fint *displs, MPI_Fint *sendtype, char *recvbuf, MPI_Fint *recvcount, MPI_Fint *recvtype, MPI_Fint *root, MPI_Fint *comm, MPI_Fint *ierr), (sendbuf, sendcounts, displs, sendtype, recvbuf, recvcount, recvtype, root, comm, ierr) ) #endif +#endif #if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_SCATTERV = ompi_scatterv_f @@ -48,9 +52,8 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_SCATTERV, #pragma weak MPI_Scatterv_f = ompi_scatterv_f #pragma weak MPI_Scatterv_f08 = ompi_scatterv_f -#endif - -#if ! OPAL_HAVE_WEAK_SYMBOLS && ! OMPI_PROFILE_LAYER +#else +#if ! OMPI_BUILD_MPI_PROFILING OMPI_GENERATE_F77_BINDINGS (MPI_SCATTERV, mpi_scatterv, mpi_scatterv_, @@ -58,16 +61,15 @@ OMPI_GENERATE_F77_BINDINGS (MPI_SCATTERV, ompi_scatterv_f, (char *sendbuf, MPI_Fint *sendcounts, MPI_Fint *displs, MPI_Fint *sendtype, char *recvbuf, MPI_Fint *recvcount, MPI_Fint *recvtype, MPI_Fint *root, MPI_Fint *comm, MPI_Fint *ierr), (sendbuf, sendcounts, displs, sendtype, recvbuf, recvcount, recvtype, root, comm, ierr) ) +#else +#define ompi_scatterv_f pompi_scatterv_f #endif - - -#if OMPI_PROFILE_LAYER && ! OPAL_HAVE_WEAK_SYMBOLS -#include "ompi/mpi/fortran/mpif-h/profile/defines.h" #endif + void ompi_scatterv_f(char *sendbuf, MPI_Fint *sendcounts, MPI_Fint *displs, MPI_Fint *sendtype, - char *recvbuf, MPI_Fint *recvcount, + char *recvbuf, MPI_Fint *recvcount, MPI_Fint *recvtype, MPI_Fint *root, MPI_Fint *comm, MPI_Fint *ierr) { @@ -77,11 +79,11 @@ void ompi_scatterv_f(char *sendbuf, MPI_Fint *sendcounts, OMPI_ARRAY_NAME_DECL(sendcounts); OMPI_ARRAY_NAME_DECL(displs); - c_comm = MPI_Comm_f2c(*comm); - c_sendtype = MPI_Type_f2c(*sendtype); - c_recvtype = MPI_Type_f2c(*recvtype); + c_comm = PMPI_Comm_f2c(*comm); + c_sendtype = PMPI_Type_f2c(*sendtype); + c_recvtype = PMPI_Type_f2c(*recvtype); - MPI_Comm_size(c_comm, &size); + PMPI_Comm_size(c_comm, &size); OMPI_ARRAY_FINT_2_INT(sendcounts, size); OMPI_ARRAY_FINT_2_INT(displs, size); @@ -89,12 +91,12 @@ void ompi_scatterv_f(char *sendbuf, MPI_Fint *sendcounts, recvbuf = (char *) OMPI_F2C_IN_PLACE(recvbuf); recvbuf = (char *) OMPI_F2C_BOTTOM(recvbuf); - c_ierr = MPI_Scatterv(sendbuf, + c_ierr = PMPI_Scatterv(sendbuf, OMPI_ARRAY_NAME_CONVERT(sendcounts), OMPI_ARRAY_NAME_CONVERT(displs), c_sendtype, recvbuf, OMPI_FINT_2_INT(*recvcount), - c_recvtype, + c_recvtype, OMPI_FINT_2_INT(*root), c_comm); if (NULL != ierr) *ierr = OMPI_INT_2_FINT(c_ierr); diff --git a/ompi/mpi/fortran/mpif-h/send_f.c b/ompi/mpi/fortran/mpif-h/send_f.c index 5a4b0ee63c2..60a244d6855 100644 --- a/ompi/mpi/fortran/mpif-h/send_f.c +++ b/ompi/mpi/fortran/mpif-h/send_f.c @@ -5,15 +5,17 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2011-2012 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -22,7 +24,8 @@ #include "ompi/mpi/fortran/mpif-h/bindings.h" #include "ompi/mpi/fortran/base/constants.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILE_LAYER +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak PMPI_SEND = ompi_send_f #pragma weak pmpi_send = ompi_send_f #pragma weak pmpi_send_ = ompi_send_f @@ -30,7 +33,7 @@ #pragma weak PMPI_Send_f = ompi_send_f #pragma weak PMPI_Send_f08 = ompi_send_f -#elif OMPI_PROFILE_LAYER +#else OMPI_GENERATE_F77_BINDINGS (PMPI_SEND, pmpi_send, pmpi_send_, @@ -39,6 +42,7 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_SEND, (char *buf, MPI_Fint *count, MPI_Fint *datatype, MPI_Fint *dest, MPI_Fint *tag, MPI_Fint *comm, MPI_Fint *ierr), (buf, count, datatype, dest, tag, comm, ierr) ) #endif +#endif #if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_SEND = ompi_send_f @@ -48,9 +52,8 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_SEND, #pragma weak MPI_Send_f = ompi_send_f #pragma weak MPI_Send_f08 = ompi_send_f -#endif - -#if ! OPAL_HAVE_WEAK_SYMBOLS && ! OMPI_PROFILE_LAYER +#else +#if ! OMPI_BUILD_MPI_PROFILING OMPI_GENERATE_F77_BINDINGS (MPI_SEND, mpi_send, mpi_send_, @@ -58,23 +61,22 @@ OMPI_GENERATE_F77_BINDINGS (MPI_SEND, ompi_send_f, (char *buf, MPI_Fint *count, MPI_Fint *datatype, MPI_Fint *dest, MPI_Fint *tag, MPI_Fint *comm, MPI_Fint *ierr), (buf, count, datatype, dest, tag, comm, ierr) ) +#else +#define ompi_send_f pompi_send_f #endif - - -#if OMPI_PROFILE_LAYER && ! OPAL_HAVE_WEAK_SYMBOLS -#include "ompi/mpi/fortran/mpif-h/profile/defines.h" #endif -void ompi_send_f(char *buf, MPI_Fint *count, MPI_Fint *datatype, + +void ompi_send_f(char *buf, MPI_Fint *count, MPI_Fint *datatype, MPI_Fint *dest, MPI_Fint *tag, MPI_Fint *comm, MPI_Fint *ierr) { int c_ierr; - MPI_Comm c_comm = MPI_Comm_f2c(*comm); - MPI_Datatype c_type = MPI_Type_f2c(*datatype); + MPI_Comm c_comm = PMPI_Comm_f2c(*comm); + MPI_Datatype c_type = PMPI_Type_f2c(*datatype); - c_ierr = MPI_Send(OMPI_F2C_BOTTOM(buf), OMPI_FINT_2_INT(*count), - c_type, OMPI_FINT_2_INT(*dest), + c_ierr = PMPI_Send(OMPI_F2C_BOTTOM(buf), OMPI_FINT_2_INT(*count), + c_type, OMPI_FINT_2_INT(*dest), OMPI_FINT_2_INT(*tag), c_comm); if (NULL != ierr) *ierr = OMPI_INT_2_FINT(c_ierr); diff --git a/ompi/mpi/fortran/mpif-h/send_init_f.c b/ompi/mpi/fortran/mpif-h/send_init_f.c index 26a47bb72b8..c406d315c9e 100644 --- a/ompi/mpi/fortran/mpif-h/send_init_f.c +++ b/ompi/mpi/fortran/mpif-h/send_init_f.c @@ -5,15 +5,17 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2011-2012 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -22,7 +24,8 @@ #include "ompi/mpi/fortran/mpif-h/bindings.h" #include "ompi/mpi/fortran/base/constants.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILE_LAYER +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak PMPI_SEND_INIT = ompi_send_init_f #pragma weak pmpi_send_init = ompi_send_init_f #pragma weak pmpi_send_init_ = ompi_send_init_f @@ -30,7 +33,7 @@ #pragma weak PMPI_Send_init_f = ompi_send_init_f #pragma weak PMPI_Send_init_f08 = ompi_send_init_f -#elif OMPI_PROFILE_LAYER +#else OMPI_GENERATE_F77_BINDINGS (PMPI_SEND_INIT, pmpi_send_init, pmpi_send_init_, @@ -39,6 +42,7 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_SEND_INIT, (char *buf, MPI_Fint *count, MPI_Fint *datatype, MPI_Fint *dest, MPI_Fint *tag, MPI_Fint *comm, MPI_Fint *request, MPI_Fint *ierr), (buf, count, datatype, dest, tag, comm, request, ierr) ) #endif +#endif #if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_SEND_INIT = ompi_send_init_f @@ -48,9 +52,8 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_SEND_INIT, #pragma weak MPI_Send_init_f = ompi_send_init_f #pragma weak MPI_Send_init_f08 = ompi_send_init_f -#endif - -#if ! OPAL_HAVE_WEAK_SYMBOLS && ! OMPI_PROFILE_LAYER +#else +#if ! OMPI_BUILD_MPI_PROFILING OMPI_GENERATE_F77_BINDINGS (MPI_SEND_INIT, mpi_send_init, mpi_send_init_, @@ -58,31 +61,30 @@ OMPI_GENERATE_F77_BINDINGS (MPI_SEND_INIT, ompi_send_init_f, (char *buf, MPI_Fint *count, MPI_Fint *datatype, MPI_Fint *dest, MPI_Fint *tag, MPI_Fint *comm, MPI_Fint *request, MPI_Fint *ierr), (buf, count, datatype, dest, tag, comm, request, ierr) ) +#else +#define ompi_send_init_f pompi_send_init_f #endif - - -#if OMPI_PROFILE_LAYER && ! OPAL_HAVE_WEAK_SYMBOLS -#include "ompi/mpi/fortran/mpif-h/profile/defines.h" #endif + void ompi_send_init_f(char *buf, MPI_Fint *count, MPI_Fint *datatype, MPI_Fint *dest, MPI_Fint *tag, MPI_Fint *comm, MPI_Fint *request, MPI_Fint *ierr) { int c_ierr; - MPI_Datatype c_type = MPI_Type_f2c(*datatype); + MPI_Datatype c_type = PMPI_Type_f2c(*datatype); MPI_Request c_req; MPI_Comm c_comm; - c_comm = MPI_Comm_f2c (*comm); + c_comm = PMPI_Comm_f2c (*comm); - c_ierr = MPI_Send_init(OMPI_F2C_BOTTOM(buf), OMPI_FINT_2_INT(*count), + c_ierr = PMPI_Send_init(OMPI_F2C_BOTTOM(buf), OMPI_FINT_2_INT(*count), c_type, OMPI_FINT_2_INT(*dest), - OMPI_FINT_2_INT(*tag), + OMPI_FINT_2_INT(*tag), c_comm, &c_req); if (NULL != ierr) *ierr = OMPI_INT_2_FINT(c_ierr); - + if (MPI_SUCCESS == c_ierr) { - *request = MPI_Request_c2f(c_req); + *request = PMPI_Request_c2f(c_req); } } diff --git a/ompi/mpi/fortran/mpif-h/sendrecv_f.c b/ompi/mpi/fortran/mpif-h/sendrecv_f.c index c1031fae35b..4772fce4f32 100644 --- a/ompi/mpi/fortran/mpif-h/sendrecv_f.c +++ b/ompi/mpi/fortran/mpif-h/sendrecv_f.c @@ -5,15 +5,17 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2011-2012 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -22,7 +24,8 @@ #include "ompi/mpi/fortran/mpif-h/bindings.h" #include "ompi/mpi/fortran/base/constants.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILE_LAYER +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak PMPI_SENDRECV = ompi_sendrecv_f #pragma weak pmpi_sendrecv = ompi_sendrecv_f #pragma weak pmpi_sendrecv_ = ompi_sendrecv_f @@ -30,7 +33,7 @@ #pragma weak PMPI_Sendrecv_f = ompi_sendrecv_f #pragma weak PMPI_Sendrecv_f08 = ompi_sendrecv_f -#elif OMPI_PROFILE_LAYER +#else OMPI_GENERATE_F77_BINDINGS (PMPI_SENDRECV, pmpi_sendrecv, pmpi_sendrecv_, @@ -39,6 +42,7 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_SENDRECV, (char *sendbuf, MPI_Fint *sendcount, MPI_Fint *sendtype, MPI_Fint *dest, MPI_Fint *sendtag, char *recvbuf, MPI_Fint *recvcount, MPI_Fint *recvtype, MPI_Fint *source, MPI_Fint *recvtag, MPI_Fint *comm, MPI_Fint *status, MPI_Fint *ierr), (sendbuf, sendcount, sendtype, dest, sendtag, recvbuf, recvcount, recvtype, source, recvtag, comm, status, ierr) ) #endif +#endif #if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_SENDRECV = ompi_sendrecv_f @@ -48,9 +52,8 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_SENDRECV, #pragma weak MPI_Sendrecv_f = ompi_sendrecv_f #pragma weak MPI_Sendrecv_f08 = ompi_sendrecv_f -#endif - -#if ! OPAL_HAVE_WEAK_SYMBOLS && ! OMPI_PROFILE_LAYER +#else +#if ! OMPI_BUILD_MPI_PROFILING OMPI_GENERATE_F77_BINDINGS (MPI_SENDRECV, mpi_sendrecv, mpi_sendrecv_, @@ -58,13 +61,12 @@ OMPI_GENERATE_F77_BINDINGS (MPI_SENDRECV, ompi_sendrecv_f, (char *sendbuf, MPI_Fint *sendcount, MPI_Fint *sendtype, MPI_Fint *dest, MPI_Fint *sendtag, char *recvbuf, MPI_Fint *recvcount, MPI_Fint *recvtype, MPI_Fint *source, MPI_Fint *recvtag, MPI_Fint *comm, MPI_Fint *status, MPI_Fint *ierr), (sendbuf, sendcount, sendtype, dest, sendtag, recvbuf, recvcount, recvtype, source, recvtag, comm, status, ierr) ) +#else +#define ompi_sendrecv_f pompi_sendrecv_f #endif - - -#if OMPI_PROFILE_LAYER && ! OPAL_HAVE_WEAK_SYMBOLS -#include "ompi/mpi/fortran/mpif-h/profile/defines.h" #endif + void ompi_sendrecv_f(char *sendbuf, MPI_Fint *sendcount, MPI_Fint *sendtype, MPI_Fint *dest, MPI_Fint *sendtag, char *recvbuf, MPI_Fint *recvcount, MPI_Fint *recvtype, @@ -73,13 +75,13 @@ void ompi_sendrecv_f(char *sendbuf, MPI_Fint *sendcount, MPI_Fint *sendtype, { int c_ierr; MPI_Comm c_comm; - MPI_Datatype c_sendtype = MPI_Type_f2c(*sendtype); - MPI_Datatype c_recvtype = MPI_Type_f2c(*recvtype); + MPI_Datatype c_sendtype = PMPI_Type_f2c(*sendtype); + MPI_Datatype c_recvtype = PMPI_Type_f2c(*recvtype); MPI_Status c_status; - - c_comm = MPI_Comm_f2c (*comm); - - c_ierr = MPI_Sendrecv(OMPI_F2C_BOTTOM(sendbuf), OMPI_FINT_2_INT(*sendcount), + + c_comm = PMPI_Comm_f2c (*comm); + + c_ierr = PMPI_Sendrecv(OMPI_F2C_BOTTOM(sendbuf), OMPI_FINT_2_INT(*sendcount), c_sendtype, OMPI_FINT_2_INT(*dest), OMPI_FINT_2_INT(*sendtag), @@ -91,6 +93,6 @@ void ompi_sendrecv_f(char *sendbuf, MPI_Fint *sendcount, MPI_Fint *sendtype, if (MPI_SUCCESS == c_ierr && !OMPI_IS_FORTRAN_STATUS_IGNORE(status)) { - MPI_Status_c2f(&c_status, status); + PMPI_Status_c2f(&c_status, status); } } diff --git a/ompi/mpi/fortran/mpif-h/sendrecv_replace_f.c b/ompi/mpi/fortran/mpif-h/sendrecv_replace_f.c index 647c560b4e3..5a6a0ed79e7 100644 --- a/ompi/mpi/fortran/mpif-h/sendrecv_replace_f.c +++ b/ompi/mpi/fortran/mpif-h/sendrecv_replace_f.c @@ -5,15 +5,17 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2011-2012 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -22,7 +24,8 @@ #include "ompi/mpi/fortran/mpif-h/bindings.h" #include "ompi/mpi/fortran/base/constants.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILE_LAYER +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak PMPI_SENDRECV_REPLACE = ompi_sendrecv_replace_f #pragma weak pmpi_sendrecv_replace = ompi_sendrecv_replace_f #pragma weak pmpi_sendrecv_replace_ = ompi_sendrecv_replace_f @@ -30,7 +33,7 @@ #pragma weak PMPI_Sendrecv_replace_f = ompi_sendrecv_replace_f #pragma weak PMPI_Sendrecv_replace_f08 = ompi_sendrecv_replace_f -#elif OMPI_PROFILE_LAYER +#else OMPI_GENERATE_F77_BINDINGS (PMPI_SENDRECV_REPLACE, pmpi_sendrecv_replace, pmpi_sendrecv_replace_, @@ -39,6 +42,7 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_SENDRECV_REPLACE, (char *buf, MPI_Fint *count, MPI_Fint *datatype, MPI_Fint *dest, MPI_Fint *sendtag, MPI_Fint *source, MPI_Fint *recvtag, MPI_Fint *comm, MPI_Fint *status, MPI_Fint *ierr), (buf, count, datatype, dest, sendtag, source, recvtag, comm, status, ierr) ) #endif +#endif #if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_SENDRECV_REPLACE = ompi_sendrecv_replace_f @@ -48,9 +52,8 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_SENDRECV_REPLACE, #pragma weak MPI_Sendrecv_replace_f = ompi_sendrecv_replace_f #pragma weak MPI_Sendrecv_replace_f08 = ompi_sendrecv_replace_f -#endif - -#if ! OPAL_HAVE_WEAK_SYMBOLS && ! OMPI_PROFILE_LAYER +#else +#if ! OMPI_BUILD_MPI_PROFILING OMPI_GENERATE_F77_BINDINGS (MPI_SENDRECV_REPLACE, mpi_sendrecv_replace, mpi_sendrecv_replace_, @@ -58,37 +61,36 @@ OMPI_GENERATE_F77_BINDINGS (MPI_SENDRECV_REPLACE, ompi_sendrecv_replace_f, (char *buf, MPI_Fint *count, MPI_Fint *datatype, MPI_Fint *dest, MPI_Fint *sendtag, MPI_Fint *source, MPI_Fint *recvtag, MPI_Fint *comm, MPI_Fint *status, MPI_Fint *ierr), (buf, count, datatype, dest, sendtag, source, recvtag, comm, status, ierr) ) +#else +#define ompi_sendrecv_replace_f pompi_sendrecv_replace_f #endif - - -#if OMPI_PROFILE_LAYER && ! OPAL_HAVE_WEAK_SYMBOLS -#include "ompi/mpi/fortran/mpif-h/profile/defines.h" #endif + void ompi_sendrecv_replace_f(char *buf, MPI_Fint *count, MPI_Fint *datatype, MPI_Fint *dest, MPI_Fint *sendtag, MPI_Fint *source, MPI_Fint *recvtag, MPI_Fint *comm, MPI_Fint *status, MPI_Fint *ierr) { int c_ierr; - MPI_Datatype c_type = MPI_Type_f2c(*datatype); + MPI_Datatype c_type = PMPI_Type_f2c(*datatype); MPI_Comm c_comm; MPI_Status c_status; - c_comm = MPI_Comm_f2c (*comm); - - c_ierr = MPI_Sendrecv_replace(OMPI_F2C_BOTTOM(buf), + c_comm = PMPI_Comm_f2c (*comm); + + c_ierr = PMPI_Sendrecv_replace(OMPI_F2C_BOTTOM(buf), OMPI_FINT_2_INT(*count), - c_type, - OMPI_FINT_2_INT(*dest), - OMPI_FINT_2_INT(*sendtag), - OMPI_FINT_2_INT(*source), + c_type, + OMPI_FINT_2_INT(*dest), + OMPI_FINT_2_INT(*sendtag), + OMPI_FINT_2_INT(*source), OMPI_FINT_2_INT(*recvtag), c_comm, &c_status); if (NULL != ierr) *ierr = OMPI_INT_2_FINT(c_ierr); if (MPI_SUCCESS == c_ierr && !OMPI_IS_FORTRAN_STATUS_IGNORE(status)) { - MPI_Status_c2f(&c_status, status); + PMPI_Status_c2f(&c_status, status); } } diff --git a/ompi/mpi/fortran/mpif-h/ssend_f.c b/ompi/mpi/fortran/mpif-h/ssend_f.c index e73fbc2f232..4ba3e8d7744 100644 --- a/ompi/mpi/fortran/mpif-h/ssend_f.c +++ b/ompi/mpi/fortran/mpif-h/ssend_f.c @@ -5,15 +5,17 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2011-2012 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -22,7 +24,8 @@ #include "ompi/mpi/fortran/mpif-h/bindings.h" #include "ompi/mpi/fortran/base/constants.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILE_LAYER +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak PMPI_SSEND = ompi_ssend_f #pragma weak pmpi_ssend = ompi_ssend_f #pragma weak pmpi_ssend_ = ompi_ssend_f @@ -30,7 +33,7 @@ #pragma weak PMPI_Ssend_f = ompi_ssend_f #pragma weak PMPI_Ssend_f08 = ompi_ssend_f -#elif OMPI_PROFILE_LAYER +#else OMPI_GENERATE_F77_BINDINGS (PMPI_SSEND, pmpi_ssend, pmpi_ssend_, @@ -39,6 +42,7 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_SSEND, (char *buf, MPI_Fint *count, MPI_Fint *datatype, MPI_Fint *dest, MPI_Fint *tag, MPI_Fint *comm, MPI_Fint *ierr), (buf, count, datatype, dest, tag, comm, ierr) ) #endif +#endif #if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_SSEND = ompi_ssend_f @@ -48,9 +52,8 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_SSEND, #pragma weak MPI_Ssend_f = ompi_ssend_f #pragma weak MPI_Ssend_f08 = ompi_ssend_f -#endif - -#if ! OPAL_HAVE_WEAK_SYMBOLS && ! OMPI_PROFILE_LAYER +#else +#if ! OMPI_BUILD_MPI_PROFILING OMPI_GENERATE_F77_BINDINGS (MPI_SSEND, mpi_ssend, mpi_ssend_, @@ -58,24 +61,23 @@ OMPI_GENERATE_F77_BINDINGS (MPI_SSEND, ompi_ssend_f, (char *buf, MPI_Fint *count, MPI_Fint *datatype, MPI_Fint *dest, MPI_Fint *tag, MPI_Fint *comm, MPI_Fint *ierr), (buf, count, datatype, dest, tag, comm, ierr) ) +#else +#define ompi_ssend_f pompi_ssend_f #endif - - -#if OMPI_PROFILE_LAYER && ! OPAL_HAVE_WEAK_SYMBOLS -#include "ompi/mpi/fortran/mpif-h/profile/defines.h" #endif -void ompi_ssend_f(char *buf, MPI_Fint *count, MPI_Fint *datatype, + +void ompi_ssend_f(char *buf, MPI_Fint *count, MPI_Fint *datatype, MPI_Fint *dest, MPI_Fint *tag, MPI_Fint *comm, MPI_Fint *ierr) { int c_ierr; - MPI_Datatype c_type = MPI_Type_f2c(*datatype); + MPI_Datatype c_type = PMPI_Type_f2c(*datatype); MPI_Comm c_comm; - c_comm = MPI_Comm_f2c (*comm); + c_comm = PMPI_Comm_f2c (*comm); - c_ierr = MPI_Ssend(OMPI_F2C_BOTTOM(buf), OMPI_FINT_2_INT(*count), + c_ierr = PMPI_Ssend(OMPI_F2C_BOTTOM(buf), OMPI_FINT_2_INT(*count), c_type, OMPI_FINT_2_INT(*dest), OMPI_FINT_2_INT(*tag), c_comm); if (NULL != ierr) *ierr = OMPI_INT_2_FINT(c_ierr); diff --git a/ompi/mpi/fortran/mpif-h/ssend_init_f.c b/ompi/mpi/fortran/mpif-h/ssend_init_f.c index d007a1039b0..62e2d8bb027 100644 --- a/ompi/mpi/fortran/mpif-h/ssend_init_f.c +++ b/ompi/mpi/fortran/mpif-h/ssend_init_f.c @@ -5,15 +5,17 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2011-2012 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -22,7 +24,8 @@ #include "ompi/mpi/fortran/mpif-h/bindings.h" #include "ompi/mpi/fortran/base/constants.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILE_LAYER +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak PMPI_SSEND_INIT = ompi_ssend_init_f #pragma weak pmpi_ssend_init = ompi_ssend_init_f #pragma weak pmpi_ssend_init_ = ompi_ssend_init_f @@ -30,7 +33,7 @@ #pragma weak PMPI_Ssend_init_f = ompi_ssend_init_f #pragma weak PMPI_Ssend_init_f08 = ompi_ssend_init_f -#elif OMPI_PROFILE_LAYER +#else OMPI_GENERATE_F77_BINDINGS (PMPI_SSEND_INIT, pmpi_ssend_init, pmpi_ssend_init_, @@ -39,6 +42,7 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_SSEND_INIT, (char *buf, MPI_Fint *count, MPI_Fint *datatype, MPI_Fint *dest, MPI_Fint *tag, MPI_Fint *comm, MPI_Fint *request, MPI_Fint *ierr), (buf, count, datatype, dest, tag, comm, request, ierr) ) #endif +#endif #if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_SSEND_INIT = ompi_ssend_init_f @@ -48,9 +52,8 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_SSEND_INIT, #pragma weak MPI_Ssend_init_f = ompi_ssend_init_f #pragma weak MPI_Ssend_init_f08 = ompi_ssend_init_f -#endif - -#if ! OPAL_HAVE_WEAK_SYMBOLS && ! OMPI_PROFILE_LAYER +#else +#if ! OMPI_BUILD_MPI_PROFILING OMPI_GENERATE_F77_BINDINGS (MPI_SSEND_INIT, mpi_ssend_init, mpi_ssend_init_, @@ -58,31 +61,30 @@ OMPI_GENERATE_F77_BINDINGS (MPI_SSEND_INIT, ompi_ssend_init_f, (char *buf, MPI_Fint *count, MPI_Fint *datatype, MPI_Fint *dest, MPI_Fint *tag, MPI_Fint *comm, MPI_Fint *request, MPI_Fint *ierr), (buf, count, datatype, dest, tag, comm, request, ierr) ) +#else +#define ompi_ssend_init_f pompi_ssend_init_f #endif - - -#if OMPI_PROFILE_LAYER && ! OPAL_HAVE_WEAK_SYMBOLS -#include "ompi/mpi/fortran/mpif-h/profile/defines.h" #endif + void ompi_ssend_init_f(char *buf, MPI_Fint *count, MPI_Fint *datatype, - MPI_Fint *dest, MPI_Fint *tag, + MPI_Fint *dest, MPI_Fint *tag, MPI_Fint *comm, MPI_Fint *request, MPI_Fint *ierr) { int c_ierr; - MPI_Datatype c_type = MPI_Type_f2c(*datatype); + MPI_Datatype c_type = PMPI_Type_f2c(*datatype); MPI_Request c_req; MPI_Comm c_comm; - c_comm = MPI_Comm_f2c (*comm); + c_comm = PMPI_Comm_f2c (*comm); - c_ierr = MPI_Ssend_init(OMPI_F2C_BOTTOM(buf), OMPI_FINT_2_INT(*count), + c_ierr = PMPI_Ssend_init(OMPI_F2C_BOTTOM(buf), OMPI_FINT_2_INT(*count), c_type, OMPI_FINT_2_INT(*dest), OMPI_FINT_2_INT(*tag), c_comm, &c_req); if (NULL != ierr) *ierr = OMPI_INT_2_FINT(c_ierr); if (MPI_SUCCESS == c_ierr) { - *request = MPI_Request_c2f(c_req); + *request = PMPI_Request_c2f(c_req); } } diff --git a/ompi/mpi/fortran/mpif-h/start_f.c b/ompi/mpi/fortran/mpif-h/start_f.c index a8d31564430..fb1c1a47988 100644 --- a/ompi/mpi/fortran/mpif-h/start_f.c +++ b/ompi/mpi/fortran/mpif-h/start_f.c @@ -5,15 +5,17 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2011-2012 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -21,7 +23,8 @@ #include "ompi/mpi/fortran/mpif-h/bindings.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILE_LAYER +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak PMPI_START = ompi_start_f #pragma weak pmpi_start = ompi_start_f #pragma weak pmpi_start_ = ompi_start_f @@ -29,7 +32,7 @@ #pragma weak PMPI_Start_f = ompi_start_f #pragma weak PMPI_Start_f08 = ompi_start_f -#elif OMPI_PROFILE_LAYER +#else OMPI_GENERATE_F77_BINDINGS (PMPI_START, pmpi_start, pmpi_start_, @@ -38,6 +41,7 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_START, (MPI_Fint *request, MPI_Fint *ierr), (request, ierr) ) #endif +#endif #if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_START = ompi_start_f @@ -47,9 +51,8 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_START, #pragma weak MPI_Start_f = ompi_start_f #pragma weak MPI_Start_f08 = ompi_start_f -#endif - -#if ! OPAL_HAVE_WEAK_SYMBOLS && ! OMPI_PROFILE_LAYER +#else +#if ! OMPI_BUILD_MPI_PROFILING OMPI_GENERATE_F77_BINDINGS (MPI_START, mpi_start, mpi_start_, @@ -57,30 +60,29 @@ OMPI_GENERATE_F77_BINDINGS (MPI_START, ompi_start_f, (MPI_Fint *request, MPI_Fint *ierr), (request, ierr) ) +#else +#define ompi_start_f pompi_start_f #endif - - -#if OMPI_PROFILE_LAYER && ! OPAL_HAVE_WEAK_SYMBOLS -#include "ompi/mpi/fortran/mpif-h/profile/defines.h" #endif + void ompi_start_f(MPI_Fint *request, MPI_Fint *ierr) { int c_ierr; - MPI_Request c_req = MPI_Request_f2c(*request); + MPI_Request c_req = PMPI_Request_f2c(*request); MPI_Request tmp_req = c_req; - c_ierr = MPI_Start(&c_req); + c_ierr = PMPI_Start(&c_req); if (NULL != ierr) *ierr = OMPI_INT_2_FINT(c_ierr); if (MPI_SUCCESS == c_ierr) { /* For a persistent request, the underlying request descriptor could - change (i.e. the old descriptor has not completed and cannot be + change (i.e. the old descriptor has not completed and cannot be reused). So commit new descriptor. */ if ( tmp_req != c_req ) { - *request = MPI_Request_c2f(c_req); + *request = PMPI_Request_c2f(c_req); } } } diff --git a/ompi/mpi/fortran/mpif-h/startall_f.c b/ompi/mpi/fortran/mpif-h/startall_f.c index 363dbf4678e..b4107d91b9c 100644 --- a/ompi/mpi/fortran/mpif-h/startall_f.c +++ b/ompi/mpi/fortran/mpif-h/startall_f.c @@ -5,15 +5,17 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2011-2012 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -23,7 +25,8 @@ #include "ompi/errhandler/errhandler.h" #include "ompi/communicator/communicator.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILE_LAYER +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak PMPI_STARTALL = ompi_startall_f #pragma weak pmpi_startall = ompi_startall_f #pragma weak pmpi_startall_ = ompi_startall_f @@ -31,7 +34,7 @@ #pragma weak PMPI_Startall_f = ompi_startall_f #pragma weak PMPI_Startall_f08 = ompi_startall_f -#elif OMPI_PROFILE_LAYER +#else OMPI_GENERATE_F77_BINDINGS (PMPI_STARTALL, pmpi_startall, pmpi_startall_, @@ -40,6 +43,7 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_STARTALL, (MPI_Fint *count, MPI_Fint *array_of_requests, MPI_Fint *ierr), (count, array_of_requests, ierr) ) #endif +#endif #if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_STARTALL = ompi_startall_f @@ -49,9 +53,8 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_STARTALL, #pragma weak MPI_Startall_f = ompi_startall_f #pragma weak MPI_Startall_f08 = ompi_startall_f -#endif - -#if ! OPAL_HAVE_WEAK_SYMBOLS && ! OMPI_PROFILE_LAYER +#else +#if ! OMPI_BUILD_MPI_PROFILING OMPI_GENERATE_F77_BINDINGS (MPI_STARTALL, mpi_startall, mpi_startall_, @@ -59,17 +62,16 @@ OMPI_GENERATE_F77_BINDINGS (MPI_STARTALL, ompi_startall_f, (MPI_Fint *count, MPI_Fint *array_of_requests, MPI_Fint *ierr), (count, array_of_requests, ierr) ) +#else +#define ompi_startall_f pompi_startall_f #endif - - -#if OMPI_PROFILE_LAYER && ! OPAL_HAVE_WEAK_SYMBOLS -#include "ompi/mpi/fortran/mpif-h/profile/defines.h" #endif + static const char FUNC_NAME[] = "MPI_STARTALL"; -void ompi_startall_f(MPI_Fint *count, MPI_Fint *array_of_requests, +void ompi_startall_f(MPI_Fint *count, MPI_Fint *array_of_requests, MPI_Fint *ierr) { int c_ierr; @@ -85,14 +87,14 @@ void ompi_startall_f(MPI_Fint *count, MPI_Fint *array_of_requests, } for(i = 0; i < *count; i++ ) { - c_req[i] = MPI_Request_f2c(array_of_requests[i]); + c_req[i] = PMPI_Request_f2c(array_of_requests[i]); } - c_ierr = MPI_Startall(OMPI_FINT_2_INT(*count), c_req); + c_ierr = PMPI_Startall(OMPI_FINT_2_INT(*count), c_req); if (NULL != ierr) *ierr = OMPI_INT_2_FINT(c_ierr); for( i = 0; i < *count; i++ ) { - array_of_requests[i] = MPI_Request_c2f(c_req[i]); + array_of_requests[i] = PMPI_Request_c2f(c_req[i]); } free(c_req); } diff --git a/ompi/mpi/fortran/mpif-h/status_set_cancelled_f.c b/ompi/mpi/fortran/mpif-h/status_set_cancelled_f.c index f7b0ff78c7b..1c9fc45d4bc 100644 --- a/ompi/mpi/fortran/mpif-h/status_set_cancelled_f.c +++ b/ompi/mpi/fortran/mpif-h/status_set_cancelled_f.c @@ -5,15 +5,17 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2011-2012 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -22,7 +24,8 @@ #include "ompi/mpi/fortran/mpif-h/bindings.h" #include "ompi/mpi/fortran/base/constants.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILE_LAYER +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak PMPI_STATUS_SET_CANCELLED = ompi_status_set_cancelled_f #pragma weak pmpi_status_set_cancelled = ompi_status_set_cancelled_f #pragma weak pmpi_status_set_cancelled_ = ompi_status_set_cancelled_f @@ -30,7 +33,7 @@ #pragma weak PMPI_Status_set_cancelled_f = ompi_status_set_cancelled_f #pragma weak PMPI_Status_set_cancelled_f08 = ompi_status_set_cancelled_f -#elif OMPI_PROFILE_LAYER +#else OMPI_GENERATE_F77_BINDINGS (PMPI_STATUS_SET_CANCELLED, pmpi_status_set_cancelled, pmpi_status_set_cancelled_, @@ -39,6 +42,7 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_STATUS_SET_CANCELLED, (MPI_Fint *status, ompi_fortran_logical_t *flag, MPI_Fint *ierr), (status, flag, ierr) ) #endif +#endif #if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_STATUS_SET_CANCELLED = ompi_status_set_cancelled_f @@ -48,9 +52,8 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_STATUS_SET_CANCELLED, #pragma weak MPI_Status_set_cancelled_f = ompi_status_set_cancelled_f #pragma weak MPI_Status_set_cancelled_f08 = ompi_status_set_cancelled_f -#endif - -#if ! OPAL_HAVE_WEAK_SYMBOLS && ! OMPI_PROFILE_LAYER +#else +#if ! OMPI_BUILD_MPI_PROFILING OMPI_GENERATE_F77_BINDINGS (MPI_STATUS_SET_CANCELLED, mpi_status_set_cancelled, mpi_status_set_cancelled_, @@ -58,13 +61,12 @@ OMPI_GENERATE_F77_BINDINGS (MPI_STATUS_SET_CANCELLED, ompi_status_set_cancelled_f, (MPI_Fint *status, ompi_fortran_logical_t *flag, MPI_Fint *ierr), (status, flag, ierr) ) +#else +#define ompi_status_set_cancelled_f pompi_status_set_cancelled_f #endif - - -#if OMPI_PROFILE_LAYER && ! OPAL_HAVE_WEAK_SYMBOLS -#include "ompi/mpi/fortran/mpif-h/profile/defines.h" #endif + void ompi_status_set_cancelled_f(MPI_Fint *status, ompi_fortran_logical_t *flag, MPI_Fint *ierr) { int c_ierr; @@ -75,12 +77,12 @@ void ompi_status_set_cancelled_f(MPI_Fint *status, ompi_fortran_logical_t *flag, if (OMPI_IS_FORTRAN_STATUS_IGNORE(status)) { c_ierr = MPI_SUCCESS; } else { - MPI_Status_f2c( status, &c_status ); + PMPI_Status_f2c( status, &c_status ); - c_ierr = MPI_Status_set_cancelled(&c_status, + c_ierr = PMPI_Status_set_cancelled(&c_status, OMPI_LOGICAL_2_INT(*flag)); if (MPI_SUCCESS == c_ierr) { - MPI_Status_c2f(&c_status, status); + PMPI_Status_c2f(&c_status, status); } } if (NULL != ierr) *ierr = OMPI_INT_2_FINT(c_ierr); diff --git a/ompi/mpi/fortran/mpif-h/status_set_elements_f.c b/ompi/mpi/fortran/mpif-h/status_set_elements_f.c index 3119c978bb9..2567bf98113 100644 --- a/ompi/mpi/fortran/mpif-h/status_set_elements_f.c +++ b/ompi/mpi/fortran/mpif-h/status_set_elements_f.c @@ -5,15 +5,17 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2011-2012 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -22,7 +24,8 @@ #include "ompi/mpi/fortran/mpif-h/bindings.h" #include "ompi/mpi/fortran/base/constants.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILE_LAYER +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak PMPI_STATUS_SET_ELEMENTS = ompi_status_set_elements_f #pragma weak pmpi_status_set_elements = ompi_status_set_elements_f #pragma weak pmpi_status_set_elements_ = ompi_status_set_elements_f @@ -30,7 +33,7 @@ #pragma weak PMPI_Status_set_elements_f = ompi_status_set_elements_f #pragma weak PMPI_Status_set_elements_f08 = ompi_status_set_elements_f -#elif OMPI_PROFILE_LAYER +#else OMPI_GENERATE_F77_BINDINGS (PMPI_STATUS_SET_ELEMENTS, pmpi_status_set_elements, pmpi_status_set_elements_, @@ -39,6 +42,7 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_STATUS_SET_ELEMENTS, (MPI_Fint *status, MPI_Fint *datatype, MPI_Fint *count, MPI_Fint *ierr), (status, datatype, count, ierr) ) #endif +#endif #if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_STATUS_SET_ELEMENTS = ompi_status_set_elements_f @@ -48,9 +52,8 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_STATUS_SET_ELEMENTS, #pragma weak MPI_Status_set_elements_f = ompi_status_set_elements_f #pragma weak MPI_Status_set_elements_f08 = ompi_status_set_elements_f -#endif - -#if ! OPAL_HAVE_WEAK_SYMBOLS && ! OMPI_PROFILE_LAYER +#else +#if ! OMPI_BUILD_MPI_PROFILING OMPI_GENERATE_F77_BINDINGS (MPI_STATUS_SET_ELEMENTS, mpi_status_set_elements, mpi_status_set_elements_, @@ -58,18 +61,17 @@ OMPI_GENERATE_F77_BINDINGS (MPI_STATUS_SET_ELEMENTS, ompi_status_set_elements_f, (MPI_Fint *status, MPI_Fint *datatype, MPI_Fint *count, MPI_Fint *ierr), (status, datatype, count, ierr) ) +#else +#define ompi_status_set_elements_f pompi_status_set_elements_f #endif - - -#if OMPI_PROFILE_LAYER && ! OPAL_HAVE_WEAK_SYMBOLS -#include "ompi/mpi/fortran/mpif-h/profile/defines.h" #endif -void ompi_status_set_elements_f(MPI_Fint *status, MPI_Fint *datatype, + +void ompi_status_set_elements_f(MPI_Fint *status, MPI_Fint *datatype, MPI_Fint *count, MPI_Fint *ierr) { int c_ierr; - MPI_Datatype c_type = MPI_Type_f2c(*datatype); + MPI_Datatype c_type = PMPI_Type_f2c(*datatype); MPI_Status c_status; /* This seems silly, but someone will do it */ @@ -77,15 +79,15 @@ void ompi_status_set_elements_f(MPI_Fint *status, MPI_Fint *datatype, if (OMPI_IS_FORTRAN_STATUS_IGNORE(status)) { c_ierr = MPI_SUCCESS; } else { - MPI_Status_f2c( status, &c_status ); + PMPI_Status_f2c( status, &c_status ); - c_ierr = MPI_Status_set_elements(&c_status, c_type, + c_ierr = PMPI_Status_set_elements(&c_status, c_type, OMPI_FINT_2_INT(*count)); /* If datatype is really being set, then that needs to be converted.... */ if (MPI_SUCCESS == c_ierr) { - MPI_Status_c2f(&c_status, status); + PMPI_Status_c2f(&c_status, status); } } if (NULL != ierr) *ierr = OMPI_INT_2_FINT(c_ierr); diff --git a/ompi/mpi/fortran/mpif-h/status_set_elements_x_f.c b/ompi/mpi/fortran/mpif-h/status_set_elements_x_f.c index 54dafa9a1c6..255e68775e9 100644 --- a/ompi/mpi/fortran/mpif-h/status_set_elements_x_f.c +++ b/ompi/mpi/fortran/mpif-h/status_set_elements_x_f.c @@ -5,17 +5,19 @@ * Copyright (c) 2004-2013 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2011-2013 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2013 Los Alamos National Security, LLC. All rights * reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -24,7 +26,8 @@ #include "ompi/mpi/fortran/mpif-h/bindings.h" #include "ompi/mpi/fortran/base/constants.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILE_LAYER +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak PMPI_STATUS_SET_ELEMENTS_X = ompi_status_set_elements_x_f #pragma weak pmpi_status_set_elements_x = ompi_status_set_elements_x_f #pragma weak pmpi_status_set_elements_x_ = ompi_status_set_elements_x_f @@ -32,7 +35,7 @@ #pragma weak PMPI_Status_set_elements_x_f = ompi_status_set_elements_x_f #pragma weak PMPI_Status_set_elements_x_f08 = ompi_status_set_elements_x_f -#elif OMPI_PROFILE_LAYER +#else OMPI_GENERATE_F77_BINDINGS (PMPI_STATUS_SET_ELEMENTS_X, pmpi_status_set_elements_x, pmpi_status_set_elements_x_, @@ -41,6 +44,7 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_STATUS_SET_ELEMENTS_X, (MPI_Fint *status, MPI_Fint *datatype, MPI_Count *count, MPI_Fint *ierr), (status, datatype, count, ierr) ) #endif +#endif #if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_STATUS_SET_ELEMENTS_X = ompi_status_set_elements_x_f @@ -50,9 +54,8 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_STATUS_SET_ELEMENTS_X, #pragma weak MPI_Status_set_elements_x_f = ompi_status_set_elements_x_f #pragma weak MPI_Status_set_elements_x_f08 = ompi_status_set_elements_x_f -#endif - -#if ! OPAL_HAVE_WEAK_SYMBOLS && ! OMPI_PROFILE_LAYER +#else +#if ! OMPI_BUILD_MPI_PROFILING OMPI_GENERATE_F77_BINDINGS (MPI_STATUS_SET_ELEMENTS_X, mpi_status_set_elements_x, mpi_status_set_elements_x_, @@ -60,18 +63,17 @@ OMPI_GENERATE_F77_BINDINGS (MPI_STATUS_SET_ELEMENTS_X, ompi_status_set_elements_x_f, (MPI_Fint *status, MPI_Fint *datatype, MPI_Count *count, MPI_Fint *ierr), (status, datatype, count, ierr) ) +#else +#define ompi_status_set_elements_x_f pompi_status_set_elements_x_f #endif - - -#if OMPI_PROFILE_LAYER && ! OPAL_HAVE_WEAK_SYMBOLS -#include "ompi/mpi/fortran/mpif-h/profile/defines.h" #endif -void ompi_status_set_elements_x_f(MPI_Fint *status, MPI_Fint *datatype, + +void ompi_status_set_elements_x_f(MPI_Fint *status, MPI_Fint *datatype, MPI_Count *count, MPI_Fint *ierr) { int c_ierr; - MPI_Datatype c_type = MPI_Type_f2c(*datatype); + MPI_Datatype c_type = PMPI_Type_f2c(*datatype); MPI_Status c_status; /* This seems silly, but someone will do it */ @@ -79,14 +81,14 @@ void ompi_status_set_elements_x_f(MPI_Fint *status, MPI_Fint *datatype, if (OMPI_IS_FORTRAN_STATUS_IGNORE(status)) { c_ierr = MPI_SUCCESS; } else { - MPI_Status_f2c( status, &c_status ); + PMPI_Status_f2c( status, &c_status ); - c_ierr = MPI_Status_set_elements_x(&c_status, c_type, *count); + c_ierr = PMPI_Status_set_elements_x(&c_status, c_type, *count); /* If datatype is really being set, then that needs to be converted.... */ if (MPI_SUCCESS == c_ierr) { - MPI_Status_c2f(&c_status, status); + PMPI_Status_c2f(&c_status, status); } } if (NULL != ierr) *ierr = OMPI_INT_2_FINT(c_ierr); diff --git a/ompi/mpi/fortran/mpif-h/test_cancelled_f.c b/ompi/mpi/fortran/mpif-h/test_cancelled_f.c index f266ceb79f8..caf371ecd97 100644 --- a/ompi/mpi/fortran/mpif-h/test_cancelled_f.c +++ b/ompi/mpi/fortran/mpif-h/test_cancelled_f.c @@ -5,15 +5,17 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2011-2012 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -22,7 +24,8 @@ #include "ompi/mpi/fortran/mpif-h/bindings.h" #include "ompi/mpi/fortran/base/constants.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILE_LAYER +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak PMPI_TEST_CANCELLED = ompi_test_cancelled_f #pragma weak pmpi_test_cancelled = ompi_test_cancelled_f #pragma weak pmpi_test_cancelled_ = ompi_test_cancelled_f @@ -30,7 +33,7 @@ #pragma weak PMPI_Test_cancelled_f = ompi_test_cancelled_f #pragma weak PMPI_Test_cancelled_f08 = ompi_test_cancelled_f -#elif OMPI_PROFILE_LAYER +#else OMPI_GENERATE_F77_BINDINGS (PMPI_TEST_CANCELLED, pmpi_test_cancelled, pmpi_test_cancelled_, @@ -39,6 +42,7 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_TEST_CANCELLED, (MPI_Fint *status, ompi_fortran_logical_t *flag, MPI_Fint *ierr), (status, flag, ierr) ) #endif +#endif #if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_TEST_CANCELLED = ompi_test_cancelled_f @@ -48,9 +52,8 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_TEST_CANCELLED, #pragma weak MPI_Test_cancelled_f = ompi_test_cancelled_f #pragma weak MPI_Test_cancelled_f08 = ompi_test_cancelled_f -#endif - -#if ! OPAL_HAVE_WEAK_SYMBOLS && ! OMPI_PROFILE_LAYER +#else +#if ! OMPI_BUILD_MPI_PROFILING OMPI_GENERATE_F77_BINDINGS (MPI_TEST_CANCELLED, mpi_test_cancelled, mpi_test_cancelled_, @@ -58,13 +61,12 @@ OMPI_GENERATE_F77_BINDINGS (MPI_TEST_CANCELLED, ompi_test_cancelled_f, (MPI_Fint *status, ompi_fortran_logical_t *flag, MPI_Fint *ierr), (status, flag, ierr) ) +#else +#define ompi_test_cancelled_f pompi_test_cancelled_f #endif - - -#if OMPI_PROFILE_LAYER && ! OPAL_HAVE_WEAK_SYMBOLS -#include "ompi/mpi/fortran/mpif-h/profile/defines.h" #endif + void ompi_test_cancelled_f(MPI_Fint *status, ompi_fortran_logical_t *flag, MPI_Fint *ierr) { int c_ierr; @@ -77,10 +79,10 @@ void ompi_test_cancelled_f(MPI_Fint *status, ompi_fortran_logical_t *flag, MPI_F *flag = OMPI_INT_2_LOGICAL(0); c_ierr = MPI_SUCCESS; } else { - c_ierr = MPI_Status_f2c( status, &c_status ); + c_ierr = PMPI_Status_f2c( status, &c_status ); if (MPI_SUCCESS == c_ierr) { - c_ierr = MPI_Test_cancelled(&c_status, + c_ierr = PMPI_Test_cancelled(&c_status, OMPI_LOGICAL_SINGLE_NAME_CONVERT(flag)); OMPI_SINGLE_INT_2_LOGICAL(flag); diff --git a/ompi/mpi/fortran/mpif-h/test_f.c b/ompi/mpi/fortran/mpif-h/test_f.c index a879d0de313..f92e1d219fc 100644 --- a/ompi/mpi/fortran/mpif-h/test_f.c +++ b/ompi/mpi/fortran/mpif-h/test_f.c @@ -5,15 +5,17 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2011-2012 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -22,7 +24,8 @@ #include "ompi/mpi/fortran/mpif-h/bindings.h" #include "ompi/mpi/fortran/base/constants.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILE_LAYER +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak PMPI_TEST = ompi_test_f #pragma weak pmpi_test = ompi_test_f #pragma weak pmpi_test_ = ompi_test_f @@ -30,7 +33,7 @@ #pragma weak PMPI_Test_f = ompi_test_f #pragma weak PMPI_Test_f08 = ompi_test_f -#elif OMPI_PROFILE_LAYER +#else OMPI_GENERATE_F77_BINDINGS (PMPI_TEST, pmpi_test, pmpi_test_, @@ -39,6 +42,7 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_TEST, (MPI_Fint *request, ompi_fortran_logical_t *flag, MPI_Fint *status, MPI_Fint *ierr), (request, flag, status, ierr) ) #endif +#endif #if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_TEST = ompi_test_f @@ -48,9 +52,8 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_TEST, #pragma weak MPI_Test_f = ompi_test_f #pragma weak MPI_Test_f08 = ompi_test_f -#endif - -#if ! OPAL_HAVE_WEAK_SYMBOLS && ! OMPI_PROFILE_LAYER +#else +#if ! OMPI_BUILD_MPI_PROFILING OMPI_GENERATE_F77_BINDINGS (MPI_TEST, mpi_test, mpi_test_, @@ -58,22 +61,21 @@ OMPI_GENERATE_F77_BINDINGS (MPI_TEST, ompi_test_f, (MPI_Fint *request, ompi_fortran_logical_t *flag, MPI_Fint *status, MPI_Fint *ierr), (request, flag, status, ierr) ) +#else +#define ompi_test_f pompi_test_f #endif - - -#if OMPI_PROFILE_LAYER && ! OPAL_HAVE_WEAK_SYMBOLS -#include "ompi/mpi/fortran/mpif-h/profile/defines.h" #endif + void ompi_test_f(MPI_Fint *request, ompi_fortran_logical_t *flag, MPI_Fint *status, MPI_Fint *ierr) { int c_ierr; - MPI_Request c_req = MPI_Request_f2c(*request); + MPI_Request c_req = PMPI_Request_f2c(*request); MPI_Status c_status; OMPI_LOGICAL_NAME_DECL(flag); - c_ierr = MPI_Test(&c_req, + c_ierr = PMPI_Test(&c_req, OMPI_LOGICAL_SINGLE_NAME_CONVERT(flag), &c_status); if (NULL != ierr) *ierr = OMPI_INT_2_FINT(c_ierr); @@ -86,7 +88,7 @@ void ompi_test_f(MPI_Fint *request, ompi_fortran_logical_t *flag, if (MPI_SUCCESS == c_ierr && *flag) { *request = OMPI_INT_2_FINT(c_req->req_f_to_c_index); if (!OMPI_IS_FORTRAN_STATUS_IGNORE(status)) { - MPI_Status_c2f(&c_status, status); + PMPI_Status_c2f(&c_status, status); } } } diff --git a/ompi/mpi/fortran/mpif-h/testall_f.c b/ompi/mpi/fortran/mpif-h/testall_f.c index bfb698ec2a2..5bc3d16950f 100644 --- a/ompi/mpi/fortran/mpif-h/testall_f.c +++ b/ompi/mpi/fortran/mpif-h/testall_f.c @@ -5,15 +5,17 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2011-2012 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -24,7 +26,8 @@ #include "ompi/errhandler/errhandler.h" #include "ompi/communicator/communicator.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILE_LAYER +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak PMPI_TESTALL = ompi_testall_f #pragma weak pmpi_testall = ompi_testall_f #pragma weak pmpi_testall_ = ompi_testall_f @@ -32,7 +35,7 @@ #pragma weak PMPI_Testall_f = ompi_testall_f #pragma weak PMPI_Testall_f08 = ompi_testall_f -#elif OMPI_PROFILE_LAYER +#else OMPI_GENERATE_F77_BINDINGS (PMPI_TESTALL, pmpi_testall, pmpi_testall_, @@ -41,6 +44,7 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_TESTALL, (MPI_Fint *count, MPI_Fint *array_of_requests, ompi_fortran_logical_t *flag, MPI_Fint *array_of_statuses, MPI_Fint *ierr), (count, array_of_requests, flag, array_of_statuses, ierr) ) #endif +#endif #if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_TESTALL = ompi_testall_f @@ -50,9 +54,8 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_TESTALL, #pragma weak MPI_Testall_f = ompi_testall_f #pragma weak MPI_Testall_f08 = ompi_testall_f -#endif - -#if ! OPAL_HAVE_WEAK_SYMBOLS && ! OMPI_PROFILE_LAYER +#else +#if ! OMPI_BUILD_MPI_PROFILING OMPI_GENERATE_F77_BINDINGS (MPI_TESTALL, mpi_testall, mpi_testall_, @@ -60,13 +63,12 @@ OMPI_GENERATE_F77_BINDINGS (MPI_TESTALL, ompi_testall_f, (MPI_Fint *count, MPI_Fint *array_of_requests, ompi_fortran_logical_t *flag, MPI_Fint *array_of_statuses, MPI_Fint *ierr), (count, array_of_requests, flag, array_of_statuses, ierr) ) +#else +#define ompi_testall_f pompi_testall_f #endif - - -#if OMPI_PROFILE_LAYER && ! OPAL_HAVE_WEAK_SYMBOLS -#include "ompi/mpi/fortran/mpif-h/profile/defines.h" #endif + static const char FUNC_NAME[] = "MPI_TESTALL"; void ompi_testall_f(MPI_Fint *count, MPI_Fint *array_of_requests, ompi_fortran_logical_t *flag, MPI_Fint *array_of_statuses, MPI_Fint *ierr) @@ -95,10 +97,10 @@ void ompi_testall_f(MPI_Fint *count, MPI_Fint *array_of_requests, ompi_fortran_l } c_status = (MPI_Status*) (c_req + OMPI_FINT_2_INT(*count)); for (i = 0; i < OMPI_FINT_2_INT(*count); ++i) { - c_req[i] = MPI_Request_f2c(array_of_requests[i]); + c_req[i] = PMPI_Request_f2c(array_of_requests[i]); } - c_ierr = MPI_Testall(OMPI_FINT_2_INT(*count), c_req, + c_ierr = PMPI_Testall(OMPI_FINT_2_INT(*count), c_req, OMPI_LOGICAL_SINGLE_NAME_CONVERT(flag), c_status); if (NULL != ierr) *ierr = OMPI_INT_2_FINT(c_ierr); @@ -112,7 +114,7 @@ void ompi_testall_f(MPI_Fint *count, MPI_Fint *array_of_requests, ompi_fortran_l array_of_requests[i] = c_req[i]->req_f_to_c_index; if (!OMPI_IS_FORTRAN_STATUSES_IGNORE(array_of_statuses) && !OMPI_IS_FORTRAN_STATUS_IGNORE(&array_of_statuses[i])) { - MPI_Status_c2f(&c_status[i], &array_of_statuses[i * (sizeof(MPI_Status) / sizeof(int))]); + PMPI_Status_c2f(&c_status[i], &array_of_statuses[i * (sizeof(MPI_Status) / sizeof(int))]); } } } diff --git a/ompi/mpi/fortran/mpif-h/testany_f.c b/ompi/mpi/fortran/mpif-h/testany_f.c index f61b33d0a88..d566e5e9726 100644 --- a/ompi/mpi/fortran/mpif-h/testany_f.c +++ b/ompi/mpi/fortran/mpif-h/testany_f.c @@ -5,15 +5,17 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2011-2012 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -24,7 +26,8 @@ #include "ompi/errhandler/errhandler.h" #include "ompi/communicator/communicator.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILE_LAYER +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak PMPI_TESTANY = ompi_testany_f #pragma weak pmpi_testany = ompi_testany_f #pragma weak pmpi_testany_ = ompi_testany_f @@ -32,7 +35,7 @@ #pragma weak PMPI_Testany_f = ompi_testany_f #pragma weak PMPI_Testany_f08 = ompi_testany_f -#elif OMPI_PROFILE_LAYER +#else OMPI_GENERATE_F77_BINDINGS (PMPI_TESTANY, pmpi_testany, pmpi_testany_, @@ -41,6 +44,7 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_TESTANY, (MPI_Fint *count, MPI_Fint *array_of_requests, MPI_Fint *indx, ompi_fortran_logical_t *flag, MPI_Fint *status, MPI_Fint *ierr), (count, array_of_requests, indx, flag, status, ierr) ) #endif +#endif #if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_TESTANY = ompi_testany_f @@ -50,9 +54,8 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_TESTANY, #pragma weak MPI_Testany_f = ompi_testany_f #pragma weak MPI_Testany_f08 = ompi_testany_f -#endif - -#if ! OPAL_HAVE_WEAK_SYMBOLS && ! OMPI_PROFILE_LAYER +#else +#if ! OMPI_BUILD_MPI_PROFILING OMPI_GENERATE_F77_BINDINGS (MPI_TESTANY, mpi_testany, mpi_testany_, @@ -60,13 +63,12 @@ OMPI_GENERATE_F77_BINDINGS (MPI_TESTANY, ompi_testany_f, (MPI_Fint *count, MPI_Fint *array_of_requests, MPI_Fint *indx, ompi_fortran_logical_t *flag, MPI_Fint *status, MPI_Fint *ierr), (count, array_of_requests, indx, flag, status, ierr) ) +#else +#define ompi_testany_f pompi_testany_f #endif - - -#if OMPI_PROFILE_LAYER && ! OPAL_HAVE_WEAK_SYMBOLS -#include "ompi/mpi/fortran/mpif-h/profile/defines.h" #endif + static const char FUNC_NAME[] = "MPI_TESTANY"; @@ -83,14 +85,14 @@ void ompi_testany_f(MPI_Fint *count, MPI_Fint *array_of_requests, MPI_Fint *indx if (OPAL_UNLIKELY(0 == OMPI_FINT_2_INT(*count))) { *flag = OMPI_FORTRAN_VALUE_TRUE; *indx = OMPI_INT_2_FINT(MPI_UNDEFINED); - MPI_Status_c2f(&ompi_status_empty, status); + PMPI_Status_c2f(&ompi_status_empty, status); *ierr = OMPI_INT_2_FINT(MPI_SUCCESS); return; } c_req = (MPI_Request *) malloc(OMPI_FINT_2_INT(*count) * sizeof(MPI_Request)); if (c_req == NULL) { - c_ierr = OMPI_ERRHANDLER_INVOKE(MPI_COMM_WORLD, + c_ierr = OMPI_ERRHANDLER_INVOKE(MPI_COMM_WORLD, MPI_ERR_NO_MEM, FUNC_NAME); if (NULL != ierr) *ierr = OMPI_INT_2_FINT(c_ierr); @@ -98,10 +100,10 @@ void ompi_testany_f(MPI_Fint *count, MPI_Fint *array_of_requests, MPI_Fint *indx } for (i = 0; i < OMPI_FINT_2_INT(*count); ++i) { - c_req[i] = MPI_Request_f2c(array_of_requests[i]); + c_req[i] = PMPI_Request_f2c(array_of_requests[i]); } - c_ierr = MPI_Testany(OMPI_FINT_2_INT(*count), c_req, + c_ierr = PMPI_Testany(OMPI_FINT_2_INT(*count), c_req, OMPI_SINGLE_NAME_CONVERT(indx), OMPI_LOGICAL_SINGLE_NAME_CONVERT(flag), &c_status); @@ -122,7 +124,7 @@ void ompi_testany_f(MPI_Fint *count, MPI_Fint *array_of_requests, MPI_Fint *indx ++(*indx); } if (!OMPI_IS_FORTRAN_STATUS_IGNORE(status)) { - MPI_Status_c2f(&c_status, status); + PMPI_Status_c2f(&c_status, status); } } free(c_req); diff --git a/ompi/mpi/fortran/mpif-h/testsome_f.c b/ompi/mpi/fortran/mpif-h/testsome_f.c index 6a05917efa6..3e299001bc2 100644 --- a/ompi/mpi/fortran/mpif-h/testsome_f.c +++ b/ompi/mpi/fortran/mpif-h/testsome_f.c @@ -5,15 +5,17 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2006-2012 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -24,7 +26,8 @@ #include "ompi/errhandler/errhandler.h" #include "ompi/communicator/communicator.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILE_LAYER +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak PMPI_TESTSOME = ompi_testsome_f #pragma weak pmpi_testsome = ompi_testsome_f #pragma weak pmpi_testsome_ = ompi_testsome_f @@ -32,7 +35,7 @@ #pragma weak PMPI_Testsome_f = ompi_testsome_f #pragma weak PMPI_Testsome_f08 = ompi_testsome_f -#elif OMPI_PROFILE_LAYER +#else OMPI_GENERATE_F77_BINDINGS (PMPI_TESTSOME, pmpi_testsome, pmpi_testsome_, @@ -41,6 +44,7 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_TESTSOME, (MPI_Fint *incount, MPI_Fint *array_of_requests, MPI_Fint *outcount, MPI_Fint *array_of_indices, MPI_Fint *array_of_statuses, MPI_Fint *ierr), (incount, array_of_requests, outcount, array_of_indices, array_of_statuses, ierr) ) #endif +#endif #if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_TESTSOME = ompi_testsome_f @@ -50,9 +54,8 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_TESTSOME, #pragma weak MPI_Testsome_f = ompi_testsome_f #pragma weak MPI_Testsome_f08 = ompi_testsome_f -#endif - -#if ! OPAL_HAVE_WEAK_SYMBOLS && ! OMPI_PROFILE_LAYER +#else +#if ! OMPI_BUILD_MPI_PROFILING OMPI_GENERATE_F77_BINDINGS (MPI_TESTSOME, mpi_testsome, mpi_testsome_, @@ -60,17 +63,16 @@ OMPI_GENERATE_F77_BINDINGS (MPI_TESTSOME, ompi_testsome_f, (MPI_Fint *incount, MPI_Fint *array_of_requests, MPI_Fint *outcount, MPI_Fint *array_of_indices, MPI_Fint *array_of_statuses, MPI_Fint *ierr), (incount, array_of_requests, outcount, array_of_indices, array_of_statuses, ierr) ) +#else +#define ompi_testsome_f pompi_testsome_f #endif - - -#if OMPI_PROFILE_LAYER && ! OPAL_HAVE_WEAK_SYMBOLS -#include "ompi/mpi/fortran/mpif-h/profile/defines.h" #endif + static const char FUNC_NAME[] = "MPI_TESTSOME"; -void ompi_testsome_f(MPI_Fint *incount, MPI_Fint *array_of_requests, +void ompi_testsome_f(MPI_Fint *incount, MPI_Fint *array_of_requests, MPI_Fint *outcount, MPI_Fint *array_of_indices, MPI_Fint *array_of_statuses, MPI_Fint *ierr) { @@ -100,13 +102,13 @@ void ompi_testsome_f(MPI_Fint *incount, MPI_Fint *array_of_requests, c_status = (MPI_Status*) (c_req + OMPI_FINT_2_INT(*incount)); for (i = 0; i < OMPI_FINT_2_INT(*incount); ++i) { - c_req[i] = MPI_Request_f2c(array_of_requests[i]); + c_req[i] = PMPI_Request_f2c(array_of_requests[i]); } OMPI_ARRAY_FINT_2_INT_ALLOC(array_of_indices, OMPI_FINT_2_INT(*incount)); - c_ierr = MPI_Testsome(OMPI_FINT_2_INT(*incount), c_req, - OMPI_SINGLE_NAME_CONVERT(outcount), - OMPI_ARRAY_NAME_CONVERT(array_of_indices), + c_ierr = PMPI_Testsome(OMPI_FINT_2_INT(*incount), c_req, + OMPI_SINGLE_NAME_CONVERT(outcount), + OMPI_ARRAY_NAME_CONVERT(array_of_indices), c_status); if (NULL != ierr) *ierr = OMPI_INT_2_FINT(c_ierr); @@ -124,7 +126,7 @@ void ompi_testsome_f(MPI_Fint *incount, MPI_Fint *array_of_requests, if (!OMPI_IS_FORTRAN_STATUSES_IGNORE(array_of_statuses)) { for (i = 0; i < OMPI_FINT_2_INT(*outcount); ++i) { if (!OMPI_IS_FORTRAN_STATUS_IGNORE(&array_of_statuses[i])) { - MPI_Status_c2f(&c_status[i], &array_of_statuses[i * (sizeof(MPI_Status) / sizeof(int))]); + PMPI_Status_c2f(&c_status[i], &array_of_statuses[i * (sizeof(MPI_Status) / sizeof(int))]); } } } diff --git a/ompi/mpi/fortran/mpif-h/topo_test_f.c b/ompi/mpi/fortran/mpif-h/topo_test_f.c index 7af8ffec92c..82789a60abf 100644 --- a/ompi/mpi/fortran/mpif-h/topo_test_f.c +++ b/ompi/mpi/fortran/mpif-h/topo_test_f.c @@ -5,16 +5,18 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2011-2012 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2012 Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -22,7 +24,8 @@ #include "ompi/mpi/fortran/mpif-h/bindings.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILE_LAYER +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak PMPI_TOPO_TEST = ompi_topo_test_f #pragma weak pmpi_topo_test = ompi_topo_test_f #pragma weak pmpi_topo_test_ = ompi_topo_test_f @@ -30,7 +33,7 @@ #pragma weak PMPI_Topo_test_f = ompi_topo_test_f #pragma weak PMPI_Topo_test_f08 = ompi_topo_test_f -#elif OMPI_PROFILE_LAYER +#else OMPI_GENERATE_F77_BINDINGS (PMPI_TOPO_TEST, pmpi_topo_test, pmpi_topo_test_, @@ -39,6 +42,7 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_TOPO_TEST, (MPI_Fint *comm, MPI_Fint *topo_type, MPI_Fint *ierr), (comm, topo_type, ierr) ) #endif +#endif #if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_TOPO_TEST = ompi_topo_test_f @@ -48,9 +52,8 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_TOPO_TEST, #pragma weak MPI_Topo_test_f = ompi_topo_test_f #pragma weak MPI_Topo_test_f08 = ompi_topo_test_f -#endif - -#if ! OPAL_HAVE_WEAK_SYMBOLS && ! OMPI_PROFILE_LAYER +#else +#if ! OMPI_BUILD_MPI_PROFILING OMPI_GENERATE_F77_BINDINGS (MPI_TOPO_TEST, mpi_topo_test, mpi_topo_test_, @@ -58,22 +61,21 @@ OMPI_GENERATE_F77_BINDINGS (MPI_TOPO_TEST, ompi_topo_test_f, (MPI_Fint *comm, MPI_Fint *topo_type, MPI_Fint *ierr), (comm, topo_type, ierr) ) +#else +#define ompi_topo_test_f pompi_topo_test_f #endif - - -#if OMPI_PROFILE_LAYER && ! OPAL_HAVE_WEAK_SYMBOLS -#include "ompi/mpi/fortran/mpif-h/profile/defines.h" #endif + void ompi_topo_test_f(MPI_Fint *comm, MPI_Fint *topo_type, MPI_Fint *ierr) { int c_ierr; MPI_Comm c_comm; OMPI_SINGLE_NAME_DECL(topo_type); - c_comm = MPI_Comm_f2c(*comm); - - c_ierr = MPI_Topo_test(c_comm, OMPI_SINGLE_NAME_CONVERT(topo_type)); + c_comm = PMPI_Comm_f2c(*comm); + + c_ierr = PMPI_Topo_test(c_comm, OMPI_SINGLE_NAME_CONVERT(topo_type)); if (NULL != ierr) *ierr = OMPI_INT_2_FINT(c_ierr); if (MPI_SUCCESS == c_ierr) { diff --git a/ompi/mpi/fortran/mpif-h/type_commit_f.c b/ompi/mpi/fortran/mpif-h/type_commit_f.c index 41700f62346..129c80a7b6e 100644 --- a/ompi/mpi/fortran/mpif-h/type_commit_f.c +++ b/ompi/mpi/fortran/mpif-h/type_commit_f.c @@ -5,15 +5,17 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2011-2012 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -21,7 +23,8 @@ #include "ompi/mpi/fortran/mpif-h/bindings.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILE_LAYER +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak PMPI_TYPE_COMMIT = ompi_type_commit_f #pragma weak pmpi_type_commit = ompi_type_commit_f #pragma weak pmpi_type_commit_ = ompi_type_commit_f @@ -29,7 +32,7 @@ #pragma weak PMPI_Type_commit_f = ompi_type_commit_f #pragma weak PMPI_Type_commit_f08 = ompi_type_commit_f -#elif OMPI_PROFILE_LAYER +#else OMPI_GENERATE_F77_BINDINGS (PMPI_TYPE_COMMIT, pmpi_type_commit, pmpi_type_commit_, @@ -38,6 +41,7 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_TYPE_COMMIT, (MPI_Fint *type, MPI_Fint *ierr), (type, ierr) ) #endif +#endif #if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_TYPE_COMMIT = ompi_type_commit_f @@ -47,9 +51,8 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_TYPE_COMMIT, #pragma weak MPI_Type_commit_f = ompi_type_commit_f #pragma weak MPI_Type_commit_f08 = ompi_type_commit_f -#endif - -#if ! OPAL_HAVE_WEAK_SYMBOLS && ! OMPI_PROFILE_LAYER +#else +#if ! OMPI_BUILD_MPI_PROFILING OMPI_GENERATE_F77_BINDINGS (MPI_TYPE_COMMIT, mpi_type_commit, mpi_type_commit_, @@ -57,22 +60,21 @@ OMPI_GENERATE_F77_BINDINGS (MPI_TYPE_COMMIT, ompi_type_commit_f, (MPI_Fint *type, MPI_Fint *ierr), (type, ierr) ) +#else +#define ompi_type_commit_f pompi_type_commit_f #endif - - -#if OMPI_PROFILE_LAYER && ! OPAL_HAVE_WEAK_SYMBOLS -#include "ompi/mpi/fortran/mpif-h/profile/defines.h" #endif + void ompi_type_commit_f(MPI_Fint *type, MPI_Fint *ierr) { int c_ierr; - MPI_Datatype c_type = MPI_Type_f2c(*type); + MPI_Datatype c_type = PMPI_Type_f2c(*type); - c_ierr = MPI_Type_commit(&c_type); + c_ierr = PMPI_Type_commit(&c_type); if (NULL != ierr) *ierr = OMPI_INT_2_FINT(c_ierr); if (MPI_SUCCESS == c_ierr) { - *type = MPI_Type_c2f(c_type); + *type = PMPI_Type_c2f(c_type); } } diff --git a/ompi/mpi/fortran/mpif-h/type_contiguous_f.c b/ompi/mpi/fortran/mpif-h/type_contiguous_f.c index 19b006bf639..42ffd8fa900 100644 --- a/ompi/mpi/fortran/mpif-h/type_contiguous_f.c +++ b/ompi/mpi/fortran/mpif-h/type_contiguous_f.c @@ -5,15 +5,17 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2011-2012 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -21,7 +23,8 @@ #include "ompi/mpi/fortran/mpif-h/bindings.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILE_LAYER +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak PMPI_TYPE_CONTIGUOUS = ompi_type_contiguous_f #pragma weak pmpi_type_contiguous = ompi_type_contiguous_f #pragma weak pmpi_type_contiguous_ = ompi_type_contiguous_f @@ -29,7 +32,7 @@ #pragma weak PMPI_Type_contiguous_f = ompi_type_contiguous_f #pragma weak PMPI_Type_contiguous_f08 = ompi_type_contiguous_f -#elif OMPI_PROFILE_LAYER +#else OMPI_GENERATE_F77_BINDINGS (PMPI_TYPE_CONTIGUOUS, pmpi_type_contiguous, pmpi_type_contiguous_, @@ -38,6 +41,7 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_TYPE_CONTIGUOUS, (MPI_Fint *count, MPI_Fint *oldtype, MPI_Fint *newtype, MPI_Fint *ierr), (count, oldtype, newtype, ierr) ) #endif +#endif #if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_TYPE_CONTIGUOUS = ompi_type_contiguous_f @@ -47,9 +51,8 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_TYPE_CONTIGUOUS, #pragma weak MPI_Type_contiguous_f = ompi_type_contiguous_f #pragma weak MPI_Type_contiguous_f08 = ompi_type_contiguous_f -#endif - -#if ! OPAL_HAVE_WEAK_SYMBOLS && ! OMPI_PROFILE_LAYER +#else +#if ! OMPI_BUILD_MPI_PROFILING OMPI_GENERATE_F77_BINDINGS (MPI_TYPE_CONTIGUOUS, mpi_type_contiguous, mpi_type_contiguous_, @@ -57,24 +60,23 @@ OMPI_GENERATE_F77_BINDINGS (MPI_TYPE_CONTIGUOUS, ompi_type_contiguous_f, (MPI_Fint *count, MPI_Fint *oldtype, MPI_Fint *newtype, MPI_Fint *ierr), (count, oldtype, newtype, ierr) ) +#else +#define ompi_type_contiguous_f pompi_type_contiguous_f #endif - - -#if OMPI_PROFILE_LAYER && ! OPAL_HAVE_WEAK_SYMBOLS -#include "ompi/mpi/fortran/mpif-h/profile/defines.h" #endif + void ompi_type_contiguous_f(MPI_Fint *count, MPI_Fint *oldtype, MPI_Fint *newtype, MPI_Fint *ierr) { int c_ierr; - MPI_Datatype c_old = MPI_Type_f2c(*oldtype); + MPI_Datatype c_old = PMPI_Type_f2c(*oldtype); MPI_Datatype c_new; - c_ierr = MPI_Type_contiguous(OMPI_FINT_2_INT(*count), c_old, &c_new); + c_ierr = PMPI_Type_contiguous(OMPI_FINT_2_INT(*count), c_old, &c_new); if (NULL != ierr) *ierr = OMPI_INT_2_FINT(c_ierr); if (MPI_SUCCESS == c_ierr) { - *newtype = MPI_Type_c2f(c_new); + *newtype = PMPI_Type_c2f(c_new); } } diff --git a/ompi/mpi/fortran/mpif-h/type_create_darray_f.c b/ompi/mpi/fortran/mpif-h/type_create_darray_f.c index e0448a15598..c6711c355ee 100644 --- a/ompi/mpi/fortran/mpif-h/type_create_darray_f.c +++ b/ompi/mpi/fortran/mpif-h/type_create_darray_f.c @@ -5,15 +5,17 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2011-2012 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -21,7 +23,8 @@ #include "ompi/mpi/fortran/mpif-h/bindings.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILE_LAYER +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak PMPI_TYPE_CREATE_DARRAY = ompi_type_create_darray_f #pragma weak pmpi_type_create_darray = ompi_type_create_darray_f #pragma weak pmpi_type_create_darray_ = ompi_type_create_darray_f @@ -29,7 +32,7 @@ #pragma weak PMPI_Type_create_darray_f = ompi_type_create_darray_f #pragma weak PMPI_Type_create_darray_f08 = ompi_type_create_darray_f -#elif OMPI_PROFILE_LAYER +#else OMPI_GENERATE_F77_BINDINGS (PMPI_TYPE_CREATE_DARRAY, pmpi_type_create_darray, pmpi_type_create_darray_, @@ -38,6 +41,7 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_TYPE_CREATE_DARRAY, (MPI_Fint *size, MPI_Fint *rank, MPI_Fint *ndims, MPI_Fint *gsize_array, MPI_Fint *distrib_array, MPI_Fint *darg_array, MPI_Fint *psize_array, MPI_Fint *order, MPI_Fint *oldtype, MPI_Fint *newtype, MPI_Fint *ierr), (size, rank, ndims, gsize_array, distrib_array, darg_array, psize_array, order, oldtype, newtype, ierr) ) #endif +#endif #if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_TYPE_CREATE_DARRAY = ompi_type_create_darray_f @@ -47,9 +51,8 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_TYPE_CREATE_DARRAY, #pragma weak MPI_Type_create_darray_f = ompi_type_create_darray_f #pragma weak MPI_Type_create_darray_f08 = ompi_type_create_darray_f -#endif - -#if ! OPAL_HAVE_WEAK_SYMBOLS && ! OMPI_PROFILE_LAYER +#else +#if ! OMPI_BUILD_MPI_PROFILING OMPI_GENERATE_F77_BINDINGS (MPI_TYPE_CREATE_DARRAY, mpi_type_create_darray, mpi_type_create_darray_, @@ -57,22 +60,21 @@ OMPI_GENERATE_F77_BINDINGS (MPI_TYPE_CREATE_DARRAY, ompi_type_create_darray_f, (MPI_Fint *size, MPI_Fint *rank, MPI_Fint *ndims, MPI_Fint *gsize_array, MPI_Fint *distrib_array, MPI_Fint *darg_array, MPI_Fint *psize_array, MPI_Fint *order, MPI_Fint *oldtype, MPI_Fint *newtype, MPI_Fint *ierr), (size, rank, ndims, gsize_array, distrib_array, darg_array, psize_array, order, oldtype, newtype, ierr) ) +#else +#define ompi_type_create_darray_f pompi_type_create_darray_f #endif - - -#if OMPI_PROFILE_LAYER && ! OPAL_HAVE_WEAK_SYMBOLS -#include "ompi/mpi/fortran/mpif-h/profile/defines.h" #endif + void ompi_type_create_darray_f(MPI_Fint *size, MPI_Fint *rank, - MPI_Fint *ndims, MPI_Fint *gsize_array, + MPI_Fint *ndims, MPI_Fint *gsize_array, MPI_Fint *distrib_array, MPI_Fint *darg_array, - MPI_Fint *psize_array, MPI_Fint *order, + MPI_Fint *psize_array, MPI_Fint *order, MPI_Fint *oldtype, MPI_Fint *newtype, MPI_Fint *ierr) { int c_ierr; - MPI_Datatype c_old = MPI_Type_f2c(*oldtype); + MPI_Datatype c_old = PMPI_Type_f2c(*oldtype); MPI_Datatype c_new; OMPI_ARRAY_NAME_DECL(gsize_array); OMPI_ARRAY_NAME_DECL(distrib_array); @@ -84,10 +86,10 @@ void ompi_type_create_darray_f(MPI_Fint *size, MPI_Fint *rank, OMPI_ARRAY_FINT_2_INT(darg_array, *ndims); OMPI_ARRAY_FINT_2_INT(psize_array, *ndims); - c_ierr = MPI_Type_create_darray(OMPI_FINT_2_INT(*size), + c_ierr = PMPI_Type_create_darray(OMPI_FINT_2_INT(*size), OMPI_FINT_2_INT(*rank), OMPI_FINT_2_INT(*ndims), - OMPI_ARRAY_NAME_CONVERT(gsize_array), + OMPI_ARRAY_NAME_CONVERT(gsize_array), OMPI_ARRAY_NAME_CONVERT(distrib_array), OMPI_ARRAY_NAME_CONVERT(darg_array), OMPI_ARRAY_NAME_CONVERT(psize_array), @@ -100,6 +102,6 @@ void ompi_type_create_darray_f(MPI_Fint *size, MPI_Fint *rank, OMPI_ARRAY_FINT_2_INT_CLEANUP(psize_array); if (MPI_SUCCESS == c_ierr) { - *newtype = MPI_Type_c2f(c_new); + *newtype = PMPI_Type_c2f(c_new); } } diff --git a/ompi/mpi/fortran/mpif-h/type_create_f90_complex_f.c b/ompi/mpi/fortran/mpif-h/type_create_f90_complex_f.c index 5b1f2b8b7b3..66f0f26a64f 100644 --- a/ompi/mpi/fortran/mpif-h/type_create_f90_complex_f.c +++ b/ompi/mpi/fortran/mpif-h/type_create_f90_complex_f.c @@ -5,15 +5,17 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2011-2012 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -21,7 +23,8 @@ #include "ompi/mpi/fortran/mpif-h/bindings.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILE_LAYER +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak PMPI_TYPE_CREATE_F90_COMPLEX = ompi_type_create_f90_complex_f #pragma weak pmpi_type_create_f90_complex = ompi_type_create_f90_complex_f #pragma weak pmpi_type_create_f90_complex_ = ompi_type_create_f90_complex_f @@ -29,7 +32,7 @@ #pragma weak PMPI_Type_create_f90_complex_f = ompi_type_create_f90_complex_f #pragma weak PMPI_Type_create_f90_complex_f08 = ompi_type_create_f90_complex_f -#elif OMPI_PROFILE_LAYER +#else OMPI_GENERATE_F77_BINDINGS (PMPI_TYPE_CREATE_F90_COMPLEX, pmpi_type_create_f90_complex, pmpi_type_create_f90_complex_, @@ -38,6 +41,7 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_TYPE_CREATE_F90_COMPLEX, (MPI_Fint *p, MPI_Fint *r, MPI_Fint *newtype, MPI_Fint *ierr), (p, r, newtype, ierr) ) #endif +#endif #if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_TYPE_CREATE_F90_COMPLEX = ompi_type_create_f90_complex_f @@ -47,9 +51,8 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_TYPE_CREATE_F90_COMPLEX, #pragma weak MPI_Type_create_f90_complex_f = ompi_type_create_f90_complex_f #pragma weak MPI_Type_create_f90_complex_f08 = ompi_type_create_f90_complex_f -#endif - -#if ! OPAL_HAVE_WEAK_SYMBOLS && ! OMPI_PROFILE_LAYER +#else +#if ! OMPI_BUILD_MPI_PROFILING OMPI_GENERATE_F77_BINDINGS (MPI_TYPE_CREATE_F90_COMPLEX, mpi_type_create_f90_complex, mpi_type_create_f90_complex_, @@ -57,25 +60,24 @@ OMPI_GENERATE_F77_BINDINGS (MPI_TYPE_CREATE_F90_COMPLEX, ompi_type_create_f90_complex_f, (MPI_Fint *p, MPI_Fint *r, MPI_Fint *newtype, MPI_Fint *ierr), (p, r, newtype, ierr) ) +#else +#define ompi_type_create_f90_complex_f pompi_type_create_f90_complex_f #endif - - -#if OMPI_PROFILE_LAYER && ! OPAL_HAVE_WEAK_SYMBOLS -#include "ompi/mpi/fortran/mpif-h/profile/defines.h" #endif + void ompi_type_create_f90_complex_f(MPI_Fint *p, MPI_Fint *r, MPI_Fint *newtype, MPI_Fint *ierr) { int c_ierr; - MPI_Datatype c_newtype = MPI_Type_f2c(*newtype); + MPI_Datatype c_newtype = PMPI_Type_f2c(*newtype); - c_ierr = MPI_Type_create_f90_complex(OMPI_FINT_2_INT(*p), + c_ierr = PMPI_Type_create_f90_complex(OMPI_FINT_2_INT(*p), OMPI_FINT_2_INT(*r), &c_newtype); if (NULL != ierr) *ierr = OMPI_INT_2_FINT(c_ierr); if (MPI_SUCCESS == c_ierr) { - *newtype = MPI_Type_c2f (c_newtype); + *newtype = PMPI_Type_c2f (c_newtype); } } diff --git a/ompi/mpi/fortran/mpif-h/type_create_f90_integer_f.c b/ompi/mpi/fortran/mpif-h/type_create_f90_integer_f.c index 79de1e1adfc..2356218b59a 100644 --- a/ompi/mpi/fortran/mpif-h/type_create_f90_integer_f.c +++ b/ompi/mpi/fortran/mpif-h/type_create_f90_integer_f.c @@ -5,15 +5,17 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2011-2012 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -21,7 +23,8 @@ #include "ompi/mpi/fortran/mpif-h/bindings.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILE_LAYER +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak PMPI_TYPE_CREATE_F90_INTEGER = ompi_type_create_f90_integer_f #pragma weak pmpi_type_create_f90_integer = ompi_type_create_f90_integer_f #pragma weak pmpi_type_create_f90_integer_ = ompi_type_create_f90_integer_f @@ -29,7 +32,7 @@ #pragma weak PMPI_Type_create_f90_integer_f = ompi_type_create_f90_integer_f #pragma weak PMPI_Type_create_f90_integer_f08 = ompi_type_create_f90_integer_f -#elif OMPI_PROFILE_LAYER +#else OMPI_GENERATE_F77_BINDINGS (PMPI_TYPE_CREATE_F90_INTEGER, pmpi_type_create_f90_integer, pmpi_type_create_f90_integer_, @@ -38,6 +41,7 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_TYPE_CREATE_F90_INTEGER, (MPI_Fint *r, MPI_Fint *newtype, MPI_Fint *ierr), (r, newtype, ierr) ) #endif +#endif #if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_TYPE_CREATE_F90_INTEGER = ompi_type_create_f90_integer_f @@ -47,9 +51,8 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_TYPE_CREATE_F90_INTEGER, #pragma weak MPI_Type_create_f90_integer_f = ompi_type_create_f90_integer_f #pragma weak MPI_Type_create_f90_integer_f08 = ompi_type_create_f90_integer_f -#endif - -#if ! OPAL_HAVE_WEAK_SYMBOLS && ! OMPI_PROFILE_LAYER +#else +#if ! OMPI_BUILD_MPI_PROFILING OMPI_GENERATE_F77_BINDINGS (MPI_TYPE_CREATE_F90_INTEGER, mpi_type_create_f90_integer, mpi_type_create_f90_integer_, @@ -57,23 +60,22 @@ OMPI_GENERATE_F77_BINDINGS (MPI_TYPE_CREATE_F90_INTEGER, ompi_type_create_f90_integer_f, (MPI_Fint *r, MPI_Fint *newtype, MPI_Fint *ierr), (r, newtype, ierr) ) +#else +#define ompi_type_create_f90_integer_f pompi_type_create_f90_integer_f #endif - - -#if OMPI_PROFILE_LAYER && ! OPAL_HAVE_WEAK_SYMBOLS -#include "ompi/mpi/fortran/mpif-h/profile/defines.h" #endif + void ompi_type_create_f90_integer_f(MPI_Fint *r, MPI_Fint *newtype, MPI_Fint *ierr) { int c_ierr; - MPI_Datatype c_new = MPI_Type_f2c(*newtype); + MPI_Datatype c_new = PMPI_Type_f2c(*newtype); - c_ierr = MPI_Type_create_f90_integer(OMPI_FINT_2_INT(*r), &c_new); + c_ierr = PMPI_Type_create_f90_integer(OMPI_FINT_2_INT(*r), &c_new); if (NULL != ierr) *ierr = OMPI_INT_2_FINT(c_ierr); if (MPI_SUCCESS == c_ierr) { - *newtype = MPI_Type_c2f(c_new); + *newtype = PMPI_Type_c2f(c_new); } } diff --git a/ompi/mpi/fortran/mpif-h/type_create_f90_real_f.c b/ompi/mpi/fortran/mpif-h/type_create_f90_real_f.c index 37cc68d5136..70d6de28992 100644 --- a/ompi/mpi/fortran/mpif-h/type_create_f90_real_f.c +++ b/ompi/mpi/fortran/mpif-h/type_create_f90_real_f.c @@ -5,15 +5,17 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2011-2012 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -21,7 +23,8 @@ #include "ompi/mpi/fortran/mpif-h/bindings.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILE_LAYER +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak PMPI_TYPE_CREATE_F90_REAL = ompi_type_create_f90_real_f #pragma weak pmpi_type_create_f90_real = ompi_type_create_f90_real_f #pragma weak pmpi_type_create_f90_real_ = ompi_type_create_f90_real_f @@ -29,7 +32,7 @@ #pragma weak PMPI_Type_create_f90_real_f = ompi_type_create_f90_real_f #pragma weak PMPI_Type_create_f90_real_f08 = ompi_type_create_f90_real_f -#elif OMPI_PROFILE_LAYER +#else OMPI_GENERATE_F77_BINDINGS (PMPI_TYPE_CREATE_F90_REAL, pmpi_type_create_f90_real, pmpi_type_create_f90_real_, @@ -38,6 +41,7 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_TYPE_CREATE_F90_REAL, (MPI_Fint *p, MPI_Fint *r, MPI_Fint *newtype, MPI_Fint *ierr), (p, r, newtype, ierr) ) #endif +#endif #if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_TYPE_CREATE_F90_REAL = ompi_type_create_f90_real_f @@ -47,9 +51,8 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_TYPE_CREATE_F90_REAL, #pragma weak MPI_Type_create_f90_real_f = ompi_type_create_f90_real_f #pragma weak MPI_Type_create_f90_real_f08 = ompi_type_create_f90_real_f -#endif - -#if ! OPAL_HAVE_WEAK_SYMBOLS && ! OMPI_PROFILE_LAYER +#else +#if ! OMPI_BUILD_MPI_PROFILING OMPI_GENERATE_F77_BINDINGS (MPI_TYPE_CREATE_F90_REAL, mpi_type_create_f90_real, mpi_type_create_f90_real_, @@ -57,25 +60,24 @@ OMPI_GENERATE_F77_BINDINGS (MPI_TYPE_CREATE_F90_REAL, ompi_type_create_f90_real_f, (MPI_Fint *p, MPI_Fint *r, MPI_Fint *newtype, MPI_Fint *ierr), (p, r, newtype, ierr) ) +#else +#define ompi_type_create_f90_real_f pompi_type_create_f90_real_f #endif - - -#if OMPI_PROFILE_LAYER && ! OPAL_HAVE_WEAK_SYMBOLS -#include "ompi/mpi/fortran/mpif-h/profile/defines.h" #endif + void ompi_type_create_f90_real_f(MPI_Fint *p, MPI_Fint *r, MPI_Fint *newtype, MPI_Fint *ierr) { int c_ierr; - MPI_Datatype c_new = MPI_Type_f2c(*newtype); + MPI_Datatype c_new = PMPI_Type_f2c(*newtype); - c_ierr = MPI_Type_create_f90_real(OMPI_FINT_2_INT(*p), + c_ierr = PMPI_Type_create_f90_real(OMPI_FINT_2_INT(*p), OMPI_FINT_2_INT(*r), &c_new); if (NULL != ierr) *ierr = OMPI_INT_2_FINT(c_ierr); if (MPI_SUCCESS == c_ierr) { - *newtype = MPI_Type_c2f(c_new); + *newtype = PMPI_Type_c2f(c_new); } } diff --git a/ompi/mpi/fortran/mpif-h/type_create_hindexed_block_f.c b/ompi/mpi/fortran/mpif-h/type_create_hindexed_block_f.c index 8bacef24b3e..111344c9247 100644 --- a/ompi/mpi/fortran/mpif-h/type_create_hindexed_block_f.c +++ b/ompi/mpi/fortran/mpif-h/type_create_hindexed_block_f.c @@ -3,10 +3,12 @@ * of Tennessee Research Foundation. All rights * reserved. * Copyright (c) 2012 Inria. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -14,7 +16,8 @@ #include "ompi/mpi/fortran/mpif-h/bindings.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILE_LAYER +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak PMPI_TYPE_CREATE_HINDEXED_BLOCK = ompi_type_create_hindexed_block_f #pragma weak pmpi_type_create_hindexed_block = ompi_type_create_hindexed_block_f #pragma weak pmpi_type_create_hindexed_block_ = ompi_type_create_hindexed_block_f @@ -22,7 +25,7 @@ #pragma weak PMPI_Type_create_hindexed_block_f = ompi_type_create_hindexed_block_f #pragma weak PMPI_Type_create_hindexed_block_f08 = ompi_type_create_hindexed_block_f -#elif OMPI_PROFILE_LAYER +#else OMPI_GENERATE_F77_BINDINGS (PMPI_TYPE_CREATE_HINDEXED_BLOCK, pmpi_type_create_hindexed_block, pmpi_type_create_hindexed_block_, @@ -31,6 +34,7 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_TYPE_CREATE_HINDEXED_BLOCK, (MPI_Fint *count, MPI_Fint *blocklength, MPI_Aint *array_of_displacements, MPI_Fint *oldtype, MPI_Fint *newtype, MPI_Fint *ierr), (count, blocklength, array_of_displacements, oldtype, newtype, ierr) ) #endif +#endif #if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_TYPE_CREATE_HINDEXED_BLOCK = ompi_type_create_hindexed_block_f @@ -40,9 +44,8 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_TYPE_CREATE_HINDEXED_BLOCK, #pragma weak MPI_Type_create_hindexed_block_f = ompi_type_create_hindexed_block_f #pragma weak MPI_Type_create_hindexed_block_f08 = ompi_type_create_hindexed_block_f -#endif - -#if ! OPAL_HAVE_WEAK_SYMBOLS && ! OMPI_PROFILE_LAYER +#else +#if ! OMPI_BUILD_MPI_PROFILING OMPI_GENERATE_F77_BINDINGS (MPI_TYPE_CREATE_HINDEXED_BLOCK, mpi_type_create_hindexed_block, mpi_type_create_hindexed_block_, @@ -50,30 +53,29 @@ OMPI_GENERATE_F77_BINDINGS (MPI_TYPE_CREATE_HINDEXED_BLOCK, ompi_type_create_hindexed_block_f, (MPI_Fint *count, MPI_Fint *blocklength, MPI_Aint *array_of_displacements, MPI_Fint *oldtype, MPI_Fint *newtype, MPI_Fint *ierr), (count, blocklength, array_of_displacements, oldtype, newtype, ierr) ) +#else +#define ompi_type_create_hindexed_block_f pompi_type_create_hindexed_block_f #endif - - -#if OMPI_PROFILE_LAYER && ! OPAL_HAVE_WEAK_SYMBOLS -#include "ompi/mpi/fortran/mpif-h/profile/defines.h" #endif + void ompi_type_create_hindexed_block_f(MPI_Fint *count, MPI_Fint *blocklength, - MPI_Aint *array_of_displacements, + MPI_Aint *array_of_displacements, MPI_Fint *oldtype, MPI_Fint *newtype, MPI_Fint *ierr) { int c_ierr; - MPI_Datatype c_old = MPI_Type_f2c(*oldtype); + MPI_Datatype c_old = PMPI_Type_f2c(*oldtype); MPI_Datatype c_new; - c_ierr = MPI_Type_create_hindexed_block(OMPI_FINT_2_INT(*count), + c_ierr = PMPI_Type_create_hindexed_block(OMPI_FINT_2_INT(*count), OMPI_FINT_2_INT(*blocklength), array_of_displacements, c_old, &c_new); if (NULL != ierr) *ierr = OMPI_INT_2_FINT(c_ierr); - + if (MPI_SUCCESS == c_ierr) { - *newtype = MPI_Type_c2f(c_new); + *newtype = PMPI_Type_c2f(c_new); } } diff --git a/ompi/mpi/fortran/mpif-h/type_create_hindexed_f.c b/ompi/mpi/fortran/mpif-h/type_create_hindexed_f.c index 4649ed674c7..b68745b6ede 100644 --- a/ompi/mpi/fortran/mpif-h/type_create_hindexed_f.c +++ b/ompi/mpi/fortran/mpif-h/type_create_hindexed_f.c @@ -5,15 +5,17 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2011-2014 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -21,7 +23,8 @@ #include "ompi/mpi/fortran/mpif-h/bindings.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILE_LAYER +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak PMPI_TYPE_CREATE_HINDEXED = ompi_type_create_hindexed_f #pragma weak pmpi_type_create_hindexed = ompi_type_create_hindexed_f #pragma weak pmpi_type_create_hindexed_ = ompi_type_create_hindexed_f @@ -29,7 +32,7 @@ #pragma weak PMPI_Type_create_hindexed_f = ompi_type_create_hindexed_f #pragma weak PMPI_Type_create_hindexed_f08 = ompi_type_create_hindexed_f -#elif OMPI_PROFILE_LAYER +#else OMPI_GENERATE_F77_BINDINGS (PMPI_TYPE_CREATE_HINDEXED, pmpi_type_create_hindexed, pmpi_type_create_hindexed_, @@ -38,6 +41,7 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_TYPE_CREATE_HINDEXED, (MPI_Fint *count, MPI_Fint *array_of_blocklengths, MPI_Aint *array_of_displacements, MPI_Fint *oldtype, MPI_Fint *newtype, MPI_Fint *ierr), (count, array_of_blocklengths, array_of_displacements, oldtype, newtype, ierr) ) #endif +#endif #if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_TYPE_CREATE_HINDEXED = ompi_type_create_hindexed_f @@ -47,9 +51,8 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_TYPE_CREATE_HINDEXED, #pragma weak MPI_Type_create_hindexed_f = ompi_type_create_hindexed_f #pragma weak MPI_Type_create_hindexed_f08 = ompi_type_create_hindexed_f -#endif - -#if ! OPAL_HAVE_WEAK_SYMBOLS && ! OMPI_PROFILE_LAYER +#else +#if ! OMPI_BUILD_MPI_PROFILING OMPI_GENERATE_F77_BINDINGS (MPI_TYPE_CREATE_HINDEXED, mpi_type_create_hindexed, mpi_type_create_hindexed_, @@ -57,36 +60,35 @@ OMPI_GENERATE_F77_BINDINGS (MPI_TYPE_CREATE_HINDEXED, ompi_type_create_hindexed_f, (MPI_Fint *count, MPI_Fint *array_of_blocklengths, MPI_Aint *array_of_displacements, MPI_Fint *oldtype, MPI_Fint *newtype, MPI_Fint *ierr), (count, array_of_blocklengths, array_of_displacements, oldtype, newtype, ierr) ) +#else +#define ompi_type_create_hindexed_f pompi_type_create_hindexed_f #endif - - -#if OMPI_PROFILE_LAYER && ! OPAL_HAVE_WEAK_SYMBOLS -#include "ompi/mpi/fortran/mpif-h/profile/defines.h" #endif + void ompi_type_create_hindexed_f(MPI_Fint *count, MPI_Fint *array_of_blocklengths, - MPI_Aint *array_of_displacements, + MPI_Aint *array_of_displacements, MPI_Fint *oldtype, MPI_Fint *newtype, MPI_Fint *ierr) { int c_ierr; - MPI_Datatype c_old = MPI_Type_f2c(*oldtype); - MPI_Datatype c_new = MPI_Type_f2c(*newtype); + MPI_Datatype c_old = PMPI_Type_f2c(*oldtype); + MPI_Datatype c_new = PMPI_Type_f2c(*newtype); OMPI_ARRAY_NAME_DECL(array_of_blocklengths); - OMPI_ARRAY_FINT_2_INT(array_of_blocklengths, *count); + OMPI_ARRAY_FINT_2_INT(array_of_blocklengths, *count); - c_ierr = MPI_Type_create_hindexed(OMPI_FINT_2_INT(*count), - OMPI_ARRAY_NAME_CONVERT(array_of_blocklengths), + c_ierr = PMPI_Type_create_hindexed(OMPI_FINT_2_INT(*count), + OMPI_ARRAY_NAME_CONVERT(array_of_blocklengths), array_of_displacements, c_old, &c_new); if (NULL != ierr) *ierr = OMPI_INT_2_FINT(c_ierr); - + if (MPI_SUCCESS == c_ierr) { - *newtype = MPI_Type_c2f(c_new); + *newtype = PMPI_Type_c2f(c_new); } - + OMPI_ARRAY_FINT_2_INT_CLEANUP(array_of_blocklengths); } diff --git a/ompi/mpi/fortran/mpif-h/type_create_hvector_f.c b/ompi/mpi/fortran/mpif-h/type_create_hvector_f.c index a829c9a4ff3..1393d666065 100644 --- a/ompi/mpi/fortran/mpif-h/type_create_hvector_f.c +++ b/ompi/mpi/fortran/mpif-h/type_create_hvector_f.c @@ -5,15 +5,17 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2007-2012 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -21,7 +23,8 @@ #include "ompi/mpi/fortran/mpif-h/bindings.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILE_LAYER +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak PMPI_TYPE_CREATE_HVECTOR = ompi_type_create_hvector_f #pragma weak pmpi_type_create_hvector = ompi_type_create_hvector_f #pragma weak pmpi_type_create_hvector_ = ompi_type_create_hvector_f @@ -29,7 +32,7 @@ #pragma weak PMPI_Type_create_hvector_f = ompi_type_create_hvector_f #pragma weak PMPI_Type_create_hvector_f08 = ompi_type_create_hvector_f -#elif OMPI_PROFILE_LAYER +#else OMPI_GENERATE_F77_BINDINGS (PMPI_TYPE_CREATE_HVECTOR, pmpi_type_create_hvector, pmpi_type_create_hvector_, @@ -38,6 +41,7 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_TYPE_CREATE_HVECTOR, (MPI_Fint *count, MPI_Fint *blocklength, MPI_Aint *stride, MPI_Fint *oldtype, MPI_Fint *newtype, MPI_Fint *ierr), (count, blocklength, stride, oldtype, newtype, ierr) ) #endif +#endif #if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_TYPE_CREATE_HVECTOR = ompi_type_create_hvector_f @@ -47,9 +51,8 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_TYPE_CREATE_HVECTOR, #pragma weak MPI_Type_create_hvector_f = ompi_type_create_hvector_f #pragma weak MPI_Type_create_hvector_f08 = ompi_type_create_hvector_f -#endif - -#if ! OPAL_HAVE_WEAK_SYMBOLS && ! OMPI_PROFILE_LAYER +#else +#if ! OMPI_BUILD_MPI_PROFILING OMPI_GENERATE_F77_BINDINGS (MPI_TYPE_CREATE_HVECTOR, mpi_type_create_hvector, mpi_type_create_hvector_, @@ -57,11 +60,9 @@ OMPI_GENERATE_F77_BINDINGS (MPI_TYPE_CREATE_HVECTOR, ompi_type_create_hvector_f, (MPI_Fint *count, MPI_Fint *blocklength, MPI_Aint *stride, MPI_Fint *oldtype, MPI_Fint *newtype, MPI_Fint *ierr), (count, blocklength, stride, oldtype, newtype, ierr) ) +#else +#define ompi_type_create_hvector_f pompi_type_create_hvector_f #endif - - -#if OMPI_PROFILE_LAYER && ! OPAL_HAVE_WEAK_SYMBOLS -#include "ompi/mpi/fortran/mpif-h/profile/defines.h" #endif void ompi_type_create_hvector_f(MPI_Fint *count, MPI_Fint *blocklength, @@ -69,16 +70,16 @@ void ompi_type_create_hvector_f(MPI_Fint *count, MPI_Fint *blocklength, MPI_Fint *newtype, MPI_Fint *ierr) { int c_ierr; - MPI_Datatype c_old = MPI_Type_f2c(*oldtype); + MPI_Datatype c_old = PMPI_Type_f2c(*oldtype); MPI_Datatype c_new; - c_ierr = MPI_Type_hvector(OMPI_FINT_2_INT(*count), + c_ierr = PMPI_Type_hvector(OMPI_FINT_2_INT(*count), OMPI_FINT_2_INT(*blocklength), *stride, c_old, &c_new); if (NULL != ierr) *ierr = OMPI_INT_2_FINT(c_ierr); if (MPI_SUCCESS == c_ierr) { - *newtype = MPI_Type_c2f(c_new); + *newtype = PMPI_Type_c2f(c_new); } } diff --git a/ompi/mpi/fortran/mpif-h/type_create_indexed_block_f.c b/ompi/mpi/fortran/mpif-h/type_create_indexed_block_f.c index ebdb633c74b..5df675820d7 100644 --- a/ompi/mpi/fortran/mpif-h/type_create_indexed_block_f.c +++ b/ompi/mpi/fortran/mpif-h/type_create_indexed_block_f.c @@ -5,15 +5,17 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2011-2012 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -21,7 +23,8 @@ #include "ompi/mpi/fortran/mpif-h/bindings.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILE_LAYER +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak PMPI_TYPE_CREATE_INDEXED_BLOCK = ompi_type_create_indexed_block_f #pragma weak pmpi_type_create_indexed_block = ompi_type_create_indexed_block_f #pragma weak pmpi_type_create_indexed_block_ = ompi_type_create_indexed_block_f @@ -29,7 +32,7 @@ #pragma weak PMPI_Type_create_indexed_block_f = ompi_type_create_indexed_block_f #pragma weak PMPI_Type_create_indexed_block_f08 = ompi_type_create_indexed_block_f -#elif OMPI_PROFILE_LAYER +#else OMPI_GENERATE_F77_BINDINGS (PMPI_TYPE_CREATE_INDEXED_BLOCK, pmpi_type_create_indexed_block, pmpi_type_create_indexed_block_, @@ -38,6 +41,7 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_TYPE_CREATE_INDEXED_BLOCK, (MPI_Fint *count, MPI_Fint *blocklength, MPI_Fint *array_of_displacements, MPI_Fint *oldtype, MPI_Fint *newtype, MPI_Fint *ierr), (count, blocklength, array_of_displacements, oldtype, newtype, ierr) ) #endif +#endif #if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_TYPE_CREATE_INDEXED_BLOCK = ompi_type_create_indexed_block_f @@ -47,9 +51,8 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_TYPE_CREATE_INDEXED_BLOCK, #pragma weak MPI_Type_create_indexed_block_f = ompi_type_create_indexed_block_f #pragma weak MPI_Type_create_indexed_block_f08 = ompi_type_create_indexed_block_f -#endif - -#if ! OPAL_HAVE_WEAK_SYMBOLS && ! OMPI_PROFILE_LAYER +#else +#if ! OMPI_BUILD_MPI_PROFILING OMPI_GENERATE_F77_BINDINGS (MPI_TYPE_CREATE_INDEXED_BLOCK, mpi_type_create_indexed_block, mpi_type_create_indexed_block_, @@ -57,33 +60,32 @@ OMPI_GENERATE_F77_BINDINGS (MPI_TYPE_CREATE_INDEXED_BLOCK, ompi_type_create_indexed_block_f, (MPI_Fint *count, MPI_Fint *blocklength, MPI_Fint *array_of_displacements, MPI_Fint *oldtype, MPI_Fint *newtype, MPI_Fint *ierr), (count, blocklength, array_of_displacements, oldtype, newtype, ierr) ) +#else +#define ompi_type_create_indexed_block_f pompi_type_create_indexed_block_f #endif - - -#if OMPI_PROFILE_LAYER && ! OPAL_HAVE_WEAK_SYMBOLS -#include "ompi/mpi/fortran/mpif-h/profile/defines.h" #endif + void ompi_type_create_indexed_block_f(MPI_Fint *count, MPI_Fint *blocklength, - MPI_Fint *array_of_displacements, + MPI_Fint *array_of_displacements, MPI_Fint *oldtype, MPI_Fint *newtype, MPI_Fint *ierr) { int c_ierr; - MPI_Datatype c_old = MPI_Type_f2c(*oldtype); + MPI_Datatype c_old = PMPI_Type_f2c(*oldtype); MPI_Datatype c_new; OMPI_ARRAY_NAME_DECL(array_of_displacements); OMPI_ARRAY_FINT_2_INT(array_of_displacements, *count); - c_ierr = MPI_Type_create_indexed_block(OMPI_FINT_2_INT(*count), + c_ierr = PMPI_Type_create_indexed_block(OMPI_FINT_2_INT(*count), OMPI_FINT_2_INT(*blocklength), OMPI_ARRAY_NAME_CONVERT(array_of_displacements), c_old, &c_new); if (NULL != ierr) *ierr = OMPI_INT_2_FINT(c_ierr); - + if (MPI_SUCCESS == c_ierr) { - *newtype = MPI_Type_c2f(c_new); + *newtype = PMPI_Type_c2f(c_new); } OMPI_ARRAY_FINT_2_INT_CLEANUP(array_of_displacements); diff --git a/ompi/mpi/fortran/mpif-h/type_create_keyval_f.c b/ompi/mpi/fortran/mpif-h/type_create_keyval_f.c index f9dc90706b1..2505456b5b1 100644 --- a/ompi/mpi/fortran/mpif-h/type_create_keyval_f.c +++ b/ompi/mpi/fortran/mpif-h/type_create_keyval_f.c @@ -5,15 +5,17 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2007-2012 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -22,7 +24,8 @@ #include "ompi/mpi/fortran/mpif-h/bindings.h" #include "ompi/communicator/communicator.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILE_LAYER +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak PMPI_TYPE_CREATE_KEYVAL = ompi_type_create_keyval_f #pragma weak pmpi_type_create_keyval = ompi_type_create_keyval_f #pragma weak pmpi_type_create_keyval_ = ompi_type_create_keyval_f @@ -30,7 +33,7 @@ #pragma weak PMPI_Type_create_keyval_f = ompi_type_create_keyval_f #pragma weak PMPI_Type_create_keyval_f08 = ompi_type_create_keyval_f -#elif OMPI_PROFILE_LAYER +#else OMPI_GENERATE_F77_BINDINGS (PMPI_TYPE_CREATE_KEYVAL, pmpi_type_create_keyval, pmpi_type_create_keyval_, @@ -39,6 +42,7 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_TYPE_CREATE_KEYVAL, (ompi_mpi2_fortran_copy_attr_function* type_copy_attr_fn, ompi_mpi2_fortran_delete_attr_function* type_delete_attr_fn, MPI_Fint *type_keyval, MPI_Aint *extra_state, MPI_Fint *ierr), (type_copy_attr_fn, type_delete_attr_fn, type_keyval, extra_state, ierr) ) #endif +#endif #if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_TYPE_CREATE_KEYVAL = ompi_type_create_keyval_f @@ -48,9 +52,8 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_TYPE_CREATE_KEYVAL, #pragma weak MPI_Type_create_keyval_f = ompi_type_create_keyval_f #pragma weak MPI_Type_create_keyval_f08 = ompi_type_create_keyval_f -#endif - -#if ! OPAL_HAVE_WEAK_SYMBOLS && ! OMPI_PROFILE_LAYER +#else +#if ! OMPI_BUILD_MPI_PROFILING OMPI_GENERATE_F77_BINDINGS (MPI_TYPE_CREATE_KEYVAL, mpi_type_create_keyval, mpi_type_create_keyval_, @@ -58,11 +61,9 @@ OMPI_GENERATE_F77_BINDINGS (MPI_TYPE_CREATE_KEYVAL, ompi_type_create_keyval_f, (ompi_mpi2_fortran_copy_attr_function* type_copy_attr_fn, ompi_mpi2_fortran_delete_attr_function* type_delete_attr_fn, MPI_Fint *type_keyval, MPI_Aint *extra_state, MPI_Fint *ierr), (type_copy_attr_fn, type_delete_attr_fn, type_keyval, extra_state, ierr) ) +#else +#define ompi_type_create_keyval_f pompi_type_create_keyval_f #endif - - -#if OMPI_PROFILE_LAYER && ! OPAL_HAVE_WEAK_SYMBOLS -#include "ompi/mpi/fortran/mpif-h/profile/defines.h" #endif static char FUNC_NAME[] = "MPI_Type_create_keyval_f"; diff --git a/ompi/mpi/fortran/mpif-h/type_create_resized_f.c b/ompi/mpi/fortran/mpif-h/type_create_resized_f.c index d9e0a28e968..7bb66b1cc6e 100644 --- a/ompi/mpi/fortran/mpif-h/type_create_resized_f.c +++ b/ompi/mpi/fortran/mpif-h/type_create_resized_f.c @@ -5,15 +5,17 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2007-2012 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -21,7 +23,8 @@ #include "ompi/mpi/fortran/mpif-h/bindings.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILE_LAYER +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak PMPI_TYPE_CREATE_RESIZED = ompi_type_create_resized_f #pragma weak pmpi_type_create_resized = ompi_type_create_resized_f #pragma weak pmpi_type_create_resized_ = ompi_type_create_resized_f @@ -29,7 +32,7 @@ #pragma weak PMPI_Type_create_resized_f = ompi_type_create_resized_f #pragma weak PMPI_Type_create_resized_f08 = ompi_type_create_resized_f -#elif OMPI_PROFILE_LAYER +#else OMPI_GENERATE_F77_BINDINGS (PMPI_TYPE_CREATE_RESIZED, pmpi_type_create_resized, pmpi_type_create_resized_, @@ -38,6 +41,7 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_TYPE_CREATE_RESIZED, (MPI_Fint *oldtype, MPI_Aint *lb, MPI_Aint *extent, MPI_Fint *newtype, MPI_Fint *ierr), (oldtype, lb, extent, newtype, ierr) ) #endif +#endif #if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_TYPE_CREATE_RESIZED = ompi_type_create_resized_f @@ -47,9 +51,8 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_TYPE_CREATE_RESIZED, #pragma weak MPI_Type_create_resized_f = ompi_type_create_resized_f #pragma weak MPI_Type_create_resized_f08 = ompi_type_create_resized_f -#endif - -#if ! OPAL_HAVE_WEAK_SYMBOLS && ! OMPI_PROFILE_LAYER +#else +#if ! OMPI_BUILD_MPI_PROFILING OMPI_GENERATE_F77_BINDINGS (MPI_TYPE_CREATE_RESIZED, mpi_type_create_resized, mpi_type_create_resized_, @@ -57,25 +60,24 @@ OMPI_GENERATE_F77_BINDINGS (MPI_TYPE_CREATE_RESIZED, ompi_type_create_resized_f, (MPI_Fint *oldtype, MPI_Aint *lb, MPI_Aint *extent, MPI_Fint *newtype, MPI_Fint *ierr), (oldtype, lb, extent, newtype, ierr) ) +#else +#define ompi_type_create_resized_f pompi_type_create_resized_f #endif - - -#if OMPI_PROFILE_LAYER && ! OPAL_HAVE_WEAK_SYMBOLS -#include "ompi/mpi/fortran/mpif-h/profile/defines.h" #endif + void ompi_type_create_resized_f(MPI_Fint *oldtype, MPI_Aint *lb, - MPI_Aint *extent, MPI_Fint *newtype, + MPI_Aint *extent, MPI_Fint *newtype, MPI_Fint *ierr) { int c_ierr; - MPI_Datatype c_old = MPI_Type_f2c(*oldtype); + MPI_Datatype c_old = PMPI_Type_f2c(*oldtype); MPI_Datatype c_new; - c_ierr = MPI_Type_create_resized(c_old, *lb, *extent, &c_new); + c_ierr = PMPI_Type_create_resized(c_old, *lb, *extent, &c_new); if (NULL != ierr) *ierr = OMPI_INT_2_FINT(c_ierr); if (MPI_SUCCESS == c_ierr) { - *newtype = MPI_Type_c2f(c_new); + *newtype = PMPI_Type_c2f(c_new); } } diff --git a/ompi/mpi/fortran/mpif-h/type_create_struct_f.c b/ompi/mpi/fortran/mpif-h/type_create_struct_f.c index 035276502eb..8988b481acb 100644 --- a/ompi/mpi/fortran/mpif-h/type_create_struct_f.c +++ b/ompi/mpi/fortran/mpif-h/type_create_struct_f.c @@ -5,15 +5,17 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2011-2012 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -23,7 +25,8 @@ #include "ompi/errhandler/errhandler.h" #include "ompi/communicator/communicator.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILE_LAYER +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak PMPI_TYPE_CREATE_STRUCT = ompi_type_create_struct_f #pragma weak pmpi_type_create_struct = ompi_type_create_struct_f #pragma weak pmpi_type_create_struct_ = ompi_type_create_struct_f @@ -31,7 +34,7 @@ #pragma weak PMPI_Type_create_struct_f = ompi_type_create_struct_f #pragma weak PMPI_Type_create_struct_f08 = ompi_type_create_struct_f -#elif OMPI_PROFILE_LAYER +#else OMPI_GENERATE_F77_BINDINGS (PMPI_TYPE_CREATE_STRUCT, pmpi_type_create_struct, pmpi_type_create_struct_, @@ -40,6 +43,7 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_TYPE_CREATE_STRUCT, (MPI_Fint *count, MPI_Fint *array_of_block_lengths, MPI_Aint *array_of_displacements, MPI_Fint *array_of_types, MPI_Fint *newtype, MPI_Fint *ierr), (count, array_of_block_lengths, array_of_displacements, array_of_types, newtype, ierr) ) #endif +#endif #if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_TYPE_CREATE_STRUCT = ompi_type_create_struct_f @@ -49,9 +53,8 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_TYPE_CREATE_STRUCT, #pragma weak MPI_Type_create_struct_f = ompi_type_create_struct_f #pragma weak MPI_Type_create_struct_f08 = ompi_type_create_struct_f -#endif - -#if ! OPAL_HAVE_WEAK_SYMBOLS && ! OMPI_PROFILE_LAYER +#else +#if ! OMPI_BUILD_MPI_PROFILING OMPI_GENERATE_F77_BINDINGS (MPI_TYPE_CREATE_STRUCT, mpi_type_create_struct, mpi_type_create_struct_, @@ -59,19 +62,18 @@ OMPI_GENERATE_F77_BINDINGS (MPI_TYPE_CREATE_STRUCT, ompi_type_create_struct_f, (MPI_Fint *count, MPI_Fint *array_of_block_lengths, MPI_Aint *array_of_displacements, MPI_Fint *array_of_types, MPI_Fint *newtype, MPI_Fint *ierr), (count, array_of_block_lengths, array_of_displacements, array_of_types, newtype, ierr) ) +#else +#define ompi_type_create_struct_f pompi_type_create_struct_f #endif - - -#if OMPI_PROFILE_LAYER && ! OPAL_HAVE_WEAK_SYMBOLS -#include "ompi/mpi/fortran/mpif-h/profile/defines.h" #endif + static const char FUNC_NAME[] = "MPI_TYPE_CREATE_STRUCT"; -void ompi_type_create_struct_f(MPI_Fint *count, +void ompi_type_create_struct_f(MPI_Fint *count, MPI_Fint *array_of_block_lengths, - MPI_Aint *array_of_displacements, + MPI_Aint *array_of_displacements, MPI_Fint *array_of_types, MPI_Fint *newtype, MPI_Fint *ierr) { @@ -89,12 +91,12 @@ void ompi_type_create_struct_f(MPI_Fint *count, } for (i = 0; i < *count; i++) { - c_type_old_array[i] = MPI_Type_f2c(array_of_types[i]); + c_type_old_array[i] = PMPI_Type_f2c(array_of_types[i]); } OMPI_ARRAY_FINT_2_INT(array_of_block_lengths, *count); - c_ierr = MPI_Type_create_struct(OMPI_FINT_2_INT(*count), + c_ierr = PMPI_Type_create_struct(OMPI_FINT_2_INT(*count), OMPI_ARRAY_NAME_CONVERT(array_of_block_lengths), array_of_displacements, c_type_old_array, &c_new); if (NULL != ierr) *ierr = OMPI_INT_2_FINT(c_ierr); @@ -102,7 +104,7 @@ void ompi_type_create_struct_f(MPI_Fint *count, OMPI_ARRAY_FINT_2_INT_CLEANUP(array_of_block_lengths); if (MPI_SUCCESS == c_ierr) { - *newtype = MPI_Type_c2f(c_new); + *newtype = PMPI_Type_c2f(c_new); } free(c_type_old_array); diff --git a/ompi/mpi/fortran/mpif-h/type_create_subarray_f.c b/ompi/mpi/fortran/mpif-h/type_create_subarray_f.c index 081ead7b52e..a4143ca24d0 100644 --- a/ompi/mpi/fortran/mpif-h/type_create_subarray_f.c +++ b/ompi/mpi/fortran/mpif-h/type_create_subarray_f.c @@ -5,15 +5,17 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2011-2012 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -21,7 +23,8 @@ #include "ompi/mpi/fortran/mpif-h/bindings.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILE_LAYER +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak PMPI_TYPE_CREATE_SUBARRAY = ompi_type_create_subarray_f #pragma weak pmpi_type_create_subarray = ompi_type_create_subarray_f #pragma weak pmpi_type_create_subarray_ = ompi_type_create_subarray_f @@ -29,7 +32,7 @@ #pragma weak PMPI_Type_create_subarray_f = ompi_type_create_subarray_f #pragma weak PMPI_Type_create_subarray_f08 = ompi_type_create_subarray_f -#elif OMPI_PROFILE_LAYER +#else OMPI_GENERATE_F77_BINDINGS (PMPI_TYPE_CREATE_SUBARRAY, pmpi_type_create_subarray, pmpi_type_create_subarray_, @@ -38,6 +41,7 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_TYPE_CREATE_SUBARRAY, (MPI_Fint *ndims, MPI_Fint *size_array, MPI_Fint *subsize_array, MPI_Fint *start_array, MPI_Fint *order, MPI_Fint *oldtype, MPI_Fint *newtype, MPI_Fint *ierr), (ndims, size_array, subsize_array, start_array, order, oldtype, newtype, ierr) ) #endif +#endif #if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_TYPE_CREATE_SUBARRAY = ompi_type_create_subarray_f @@ -47,9 +51,8 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_TYPE_CREATE_SUBARRAY, #pragma weak MPI_Type_create_subarray_f = ompi_type_create_subarray_f #pragma weak MPI_Type_create_subarray_f08 = ompi_type_create_subarray_f -#endif - -#if ! OPAL_HAVE_WEAK_SYMBOLS && ! OMPI_PROFILE_LAYER +#else +#if ! OMPI_BUILD_MPI_PROFILING OMPI_GENERATE_F77_BINDINGS (MPI_TYPE_CREATE_SUBARRAY, mpi_type_create_subarray, mpi_type_create_subarray_, @@ -57,17 +60,16 @@ OMPI_GENERATE_F77_BINDINGS (MPI_TYPE_CREATE_SUBARRAY, ompi_type_create_subarray_f, (MPI_Fint *ndims, MPI_Fint *size_array, MPI_Fint *subsize_array, MPI_Fint *start_array, MPI_Fint *order, MPI_Fint *oldtype, MPI_Fint *newtype, MPI_Fint *ierr), (ndims, size_array, subsize_array, start_array, order, oldtype, newtype, ierr) ) +#else +#define ompi_type_create_subarray_f pompi_type_create_subarray_f #endif - - -#if OMPI_PROFILE_LAYER && ! OPAL_HAVE_WEAK_SYMBOLS -#include "ompi/mpi/fortran/mpif-h/profile/defines.h" #endif + void ompi_type_create_subarray_f(MPI_Fint *ndims, MPI_Fint *size_array, - MPI_Fint *subsize_array, + MPI_Fint *subsize_array, MPI_Fint *start_array, MPI_Fint *order, - MPI_Fint *oldtype, MPI_Fint *newtype, + MPI_Fint *oldtype, MPI_Fint *newtype, MPI_Fint *ierr) { int c_ierr; @@ -77,13 +79,13 @@ void ompi_type_create_subarray_f(MPI_Fint *ndims, MPI_Fint *size_array, OMPI_ARRAY_NAME_DECL(subsize_array); OMPI_ARRAY_NAME_DECL(start_array); - c_old = MPI_Type_f2c(*oldtype); + c_old = PMPI_Type_f2c(*oldtype); OMPI_ARRAY_FINT_2_INT(size_array, *ndims); OMPI_ARRAY_FINT_2_INT(subsize_array, *ndims); OMPI_ARRAY_FINT_2_INT(start_array, *ndims); - c_ierr = MPI_Type_create_subarray(OMPI_FINT_2_INT(*ndims), + c_ierr = PMPI_Type_create_subarray(OMPI_FINT_2_INT(*ndims), OMPI_ARRAY_NAME_CONVERT(size_array), OMPI_ARRAY_NAME_CONVERT(subsize_array), OMPI_ARRAY_NAME_CONVERT(start_array), @@ -91,7 +93,7 @@ void ompi_type_create_subarray_f(MPI_Fint *ndims, MPI_Fint *size_array, if (NULL != ierr) *ierr = OMPI_INT_2_FINT(c_ierr); if (MPI_SUCCESS == c_ierr) { - *newtype = MPI_Type_c2f(c_new); + *newtype = PMPI_Type_c2f(c_new); } OMPI_ARRAY_FINT_2_INT_CLEANUP(size_array); diff --git a/ompi/mpi/fortran/mpif-h/type_delete_attr_f.c b/ompi/mpi/fortran/mpif-h/type_delete_attr_f.c index df57e6b3795..96756cdd568 100644 --- a/ompi/mpi/fortran/mpif-h/type_delete_attr_f.c +++ b/ompi/mpi/fortran/mpif-h/type_delete_attr_f.c @@ -5,15 +5,17 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2011-2012 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -21,7 +23,8 @@ #include "ompi/mpi/fortran/mpif-h/bindings.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILE_LAYER +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak PMPI_TYPE_DELETE_ATTR = ompi_type_delete_attr_f #pragma weak pmpi_type_delete_attr = ompi_type_delete_attr_f #pragma weak pmpi_type_delete_attr_ = ompi_type_delete_attr_f @@ -29,7 +32,7 @@ #pragma weak PMPI_Type_delete_attr_f = ompi_type_delete_attr_f #pragma weak PMPI_Type_delete_attr_f08 = ompi_type_delete_attr_f -#elif OMPI_PROFILE_LAYER +#else OMPI_GENERATE_F77_BINDINGS (PMPI_TYPE_DELETE_ATTR, pmpi_type_delete_attr, pmpi_type_delete_attr_, @@ -38,6 +41,7 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_TYPE_DELETE_ATTR, (MPI_Fint *type, MPI_Fint *type_keyval, MPI_Fint *ierr), (type, type_keyval, ierr) ) #endif +#endif #if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_TYPE_DELETE_ATTR = ompi_type_delete_attr_f @@ -47,9 +51,8 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_TYPE_DELETE_ATTR, #pragma weak MPI_Type_delete_attr_f = ompi_type_delete_attr_f #pragma weak MPI_Type_delete_attr_f08 = ompi_type_delete_attr_f -#endif - -#if ! OPAL_HAVE_WEAK_SYMBOLS && ! OMPI_PROFILE_LAYER +#else +#if ! OMPI_BUILD_MPI_PROFILING OMPI_GENERATE_F77_BINDINGS (MPI_TYPE_DELETE_ATTR, mpi_type_delete_attr, mpi_type_delete_attr_, @@ -57,19 +60,18 @@ OMPI_GENERATE_F77_BINDINGS (MPI_TYPE_DELETE_ATTR, ompi_type_delete_attr_f, (MPI_Fint *type, MPI_Fint *type_keyval, MPI_Fint *ierr), (type, type_keyval, ierr) ) +#else +#define ompi_type_delete_attr_f pompi_type_delete_attr_f #endif - - -#if OMPI_PROFILE_LAYER && ! OPAL_HAVE_WEAK_SYMBOLS -#include "ompi/mpi/fortran/mpif-h/profile/defines.h" #endif + void ompi_type_delete_attr_f(MPI_Fint *type, MPI_Fint *type_keyval, MPI_Fint *ierr) { int c_ierr; - MPI_Datatype c_type = MPI_Type_f2c(*type); + MPI_Datatype c_type = PMPI_Type_f2c(*type); - c_ierr = MPI_Type_delete_attr(c_type, OMPI_FINT_2_INT(*type_keyval)); + c_ierr = PMPI_Type_delete_attr(c_type, OMPI_FINT_2_INT(*type_keyval)); if (NULL != ierr) *ierr = OMPI_INT_2_FINT(c_ierr); } diff --git a/ompi/mpi/fortran/mpif-h/type_dup_f.c b/ompi/mpi/fortran/mpif-h/type_dup_f.c index 8bd71702983..42351e9f2a9 100644 --- a/ompi/mpi/fortran/mpif-h/type_dup_f.c +++ b/ompi/mpi/fortran/mpif-h/type_dup_f.c @@ -5,15 +5,17 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2011-2012 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -21,7 +23,8 @@ #include "ompi/mpi/fortran/mpif-h/bindings.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILE_LAYER +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak PMPI_TYPE_DUP = ompi_type_dup_f #pragma weak pmpi_type_dup = ompi_type_dup_f #pragma weak pmpi_type_dup_ = ompi_type_dup_f @@ -29,7 +32,7 @@ #pragma weak PMPI_Type_dup_f = ompi_type_dup_f #pragma weak PMPI_Type_dup_f08 = ompi_type_dup_f -#elif OMPI_PROFILE_LAYER +#else OMPI_GENERATE_F77_BINDINGS (PMPI_TYPE_DUP, pmpi_type_dup, pmpi_type_dup_, @@ -38,6 +41,7 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_TYPE_DUP, (MPI_Fint *type, MPI_Fint *newtype, MPI_Fint *ierr), (type, newtype, ierr) ) #endif +#endif #if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_TYPE_DUP = ompi_type_dup_f @@ -47,9 +51,8 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_TYPE_DUP, #pragma weak MPI_Type_dup_f = ompi_type_dup_f #pragma weak MPI_Type_dup_f08 = ompi_type_dup_f -#endif - -#if ! OPAL_HAVE_WEAK_SYMBOLS && ! OMPI_PROFILE_LAYER +#else +#if ! OMPI_BUILD_MPI_PROFILING OMPI_GENERATE_F77_BINDINGS (MPI_TYPE_DUP, mpi_type_dup, mpi_type_dup_, @@ -57,23 +60,22 @@ OMPI_GENERATE_F77_BINDINGS (MPI_TYPE_DUP, ompi_type_dup_f, (MPI_Fint *type, MPI_Fint *newtype, MPI_Fint *ierr), (type, newtype, ierr) ) +#else +#define ompi_type_dup_f pompi_type_dup_f #endif - - -#if OMPI_PROFILE_LAYER && ! OPAL_HAVE_WEAK_SYMBOLS -#include "ompi/mpi/fortran/mpif-h/profile/defines.h" #endif + void ompi_type_dup_f(MPI_Fint *type, MPI_Fint *newtype, MPI_Fint *ierr) { int c_ierr; - MPI_Datatype c_type = MPI_Type_f2c(*type); + MPI_Datatype c_type = PMPI_Type_f2c(*type); MPI_Datatype c_new; - c_ierr = MPI_Type_dup(c_type, &c_new); + c_ierr = PMPI_Type_dup(c_type, &c_new); if (NULL != ierr) *ierr = OMPI_INT_2_FINT(c_ierr); if (MPI_SUCCESS == c_ierr) { - *newtype = MPI_Type_c2f(c_new); + *newtype = PMPI_Type_c2f(c_new); } } diff --git a/ompi/mpi/fortran/mpif-h/type_extent_f.c b/ompi/mpi/fortran/mpif-h/type_extent_f.c index 85102b8601e..a2baf215a6b 100644 --- a/ompi/mpi/fortran/mpif-h/type_extent_f.c +++ b/ompi/mpi/fortran/mpif-h/type_extent_f.c @@ -5,15 +5,17 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2011-2012 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -21,7 +23,8 @@ #include "ompi/mpi/fortran/mpif-h/bindings.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILE_LAYER +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak PMPI_TYPE_EXTENT = ompi_type_extent_f #pragma weak pmpi_type_extent = ompi_type_extent_f #pragma weak pmpi_type_extent_ = ompi_type_extent_f @@ -29,7 +32,7 @@ #pragma weak PMPI_Type_extent_f = ompi_type_extent_f #pragma weak PMPI_Type_extent_f08 = ompi_type_extent_f -#elif OMPI_PROFILE_LAYER +#else OMPI_GENERATE_F77_BINDINGS (PMPI_TYPE_EXTENT, pmpi_type_extent, pmpi_type_extent_, @@ -38,6 +41,7 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_TYPE_EXTENT, (MPI_Fint *type, MPI_Fint *extent, MPI_Fint *ierr), (type, extent, ierr) ) #endif +#endif #if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_TYPE_EXTENT = ompi_type_extent_f @@ -47,9 +51,8 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_TYPE_EXTENT, #pragma weak MPI_Type_extent_f = ompi_type_extent_f #pragma weak MPI_Type_extent_f08 = ompi_type_extent_f -#endif - -#if ! OPAL_HAVE_WEAK_SYMBOLS && ! OMPI_PROFILE_LAYER +#else +#if ! OMPI_BUILD_MPI_PROFILING OMPI_GENERATE_F77_BINDINGS (MPI_TYPE_EXTENT, mpi_type_extent, mpi_type_extent_, @@ -57,20 +60,19 @@ OMPI_GENERATE_F77_BINDINGS (MPI_TYPE_EXTENT, ompi_type_extent_f, (MPI_Fint *type, MPI_Fint *extent, MPI_Fint *ierr), (type, extent, ierr) ) +#else +#define ompi_type_extent_f pompi_type_extent_f #endif - - -#if OMPI_PROFILE_LAYER && ! OPAL_HAVE_WEAK_SYMBOLS -#include "ompi/mpi/fortran/mpif-h/profile/defines.h" #endif + void ompi_type_extent_f(MPI_Fint *type, MPI_Fint *extent, MPI_Fint *ierr) { int c_ierr; - MPI_Datatype c_type = MPI_Type_f2c(*type); + MPI_Datatype c_type = PMPI_Type_f2c(*type); MPI_Aint c_extent; - c_ierr = MPI_Type_extent(c_type, &c_extent); + c_ierr = PMPI_Type_extent(c_type, &c_extent); if (NULL != ierr) *ierr = OMPI_INT_2_FINT(c_ierr); if (MPI_SUCCESS == c_ierr) { diff --git a/ompi/mpi/fortran/mpif-h/type_free_f.c b/ompi/mpi/fortran/mpif-h/type_free_f.c index 314a28fe274..bdee31aa0cd 100644 --- a/ompi/mpi/fortran/mpif-h/type_free_f.c +++ b/ompi/mpi/fortran/mpif-h/type_free_f.c @@ -5,15 +5,17 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2011-2012 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -21,7 +23,8 @@ #include "ompi/mpi/fortran/mpif-h/bindings.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILE_LAYER +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak PMPI_TYPE_FREE = ompi_type_free_f #pragma weak pmpi_type_free = ompi_type_free_f #pragma weak pmpi_type_free_ = ompi_type_free_f @@ -29,7 +32,7 @@ #pragma weak PMPI_Type_free_f = ompi_type_free_f #pragma weak PMPI_Type_free_f08 = ompi_type_free_f -#elif OMPI_PROFILE_LAYER +#else OMPI_GENERATE_F77_BINDINGS (PMPI_TYPE_FREE, pmpi_type_free, pmpi_type_free_, @@ -38,6 +41,7 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_TYPE_FREE, (MPI_Fint *type, MPI_Fint *ierr), (type, ierr) ) #endif +#endif #if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_TYPE_FREE = ompi_type_free_f @@ -47,9 +51,8 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_TYPE_FREE, #pragma weak MPI_Type_free_f = ompi_type_free_f #pragma weak MPI_Type_free_f08 = ompi_type_free_f -#endif - -#if ! OPAL_HAVE_WEAK_SYMBOLS && ! OMPI_PROFILE_LAYER +#else +#if ! OMPI_BUILD_MPI_PROFILING OMPI_GENERATE_F77_BINDINGS (MPI_TYPE_FREE, mpi_type_free, mpi_type_free_, @@ -57,24 +60,23 @@ OMPI_GENERATE_F77_BINDINGS (MPI_TYPE_FREE, ompi_type_free_f, (MPI_Fint *type, MPI_Fint *ierr), (type, ierr) ) +#else +#define ompi_type_free_f pompi_type_free_f #endif - - -#if OMPI_PROFILE_LAYER && ! OPAL_HAVE_WEAK_SYMBOLS -#include "ompi/mpi/fortran/mpif-h/profile/defines.h" #endif + void ompi_type_free_f(MPI_Fint *type, MPI_Fint *ierr) { int c_ierr; MPI_Datatype c_type; - - c_type = MPI_Type_f2c(*type); - - c_ierr = MPI_Type_free(&c_type); + + c_type = PMPI_Type_f2c(*type); + + c_ierr = PMPI_Type_free(&c_type); if (NULL != ierr) *ierr = OMPI_INT_2_FINT(c_ierr); if (MPI_SUCCESS == c_ierr) { - *type = MPI_Type_c2f(c_type); + *type = PMPI_Type_c2f(c_type); } } diff --git a/ompi/mpi/fortran/mpif-h/type_free_keyval_f.c b/ompi/mpi/fortran/mpif-h/type_free_keyval_f.c index 903c22531a0..257b59abc01 100644 --- a/ompi/mpi/fortran/mpif-h/type_free_keyval_f.c +++ b/ompi/mpi/fortran/mpif-h/type_free_keyval_f.c @@ -5,15 +5,17 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2011-2012 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -21,7 +23,8 @@ #include "ompi/mpi/fortran/mpif-h/bindings.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILE_LAYER +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak PMPI_TYPE_FREE_KEYVAL = ompi_type_free_keyval_f #pragma weak pmpi_type_free_keyval = ompi_type_free_keyval_f #pragma weak pmpi_type_free_keyval_ = ompi_type_free_keyval_f @@ -29,7 +32,7 @@ #pragma weak PMPI_Type_free_keyval_f = ompi_type_free_keyval_f #pragma weak PMPI_Type_free_keyval_f08 = ompi_type_free_keyval_f -#elif OMPI_PROFILE_LAYER +#else OMPI_GENERATE_F77_BINDINGS (PMPI_TYPE_FREE_KEYVAL, pmpi_type_free_keyval, pmpi_type_free_keyval_, @@ -38,6 +41,7 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_TYPE_FREE_KEYVAL, (MPI_Fint *type_keyval, MPI_Fint *ierr), (type_keyval, ierr) ) #endif +#endif #if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_TYPE_FREE_KEYVAL = ompi_type_free_keyval_f @@ -47,9 +51,8 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_TYPE_FREE_KEYVAL, #pragma weak MPI_Type_free_keyval_f = ompi_type_free_keyval_f #pragma weak MPI_Type_free_keyval_f08 = ompi_type_free_keyval_f -#endif - -#if ! OPAL_HAVE_WEAK_SYMBOLS && ! OMPI_PROFILE_LAYER +#else +#if ! OMPI_BUILD_MPI_PROFILING OMPI_GENERATE_F77_BINDINGS (MPI_TYPE_FREE_KEYVAL, mpi_type_free_keyval, mpi_type_free_keyval_, @@ -57,13 +60,12 @@ OMPI_GENERATE_F77_BINDINGS (MPI_TYPE_FREE_KEYVAL, ompi_type_free_keyval_f, (MPI_Fint *type_keyval, MPI_Fint *ierr), (type_keyval, ierr) ) +#else +#define ompi_type_free_keyval_f pompi_type_free_keyval_f #endif - - -#if OMPI_PROFILE_LAYER && ! OPAL_HAVE_WEAK_SYMBOLS -#include "ompi/mpi/fortran/mpif-h/profile/defines.h" #endif + void ompi_type_free_keyval_f(MPI_Fint *type_keyval, MPI_Fint *ierr) { int c_ierr; @@ -71,7 +73,7 @@ void ompi_type_free_keyval_f(MPI_Fint *type_keyval, MPI_Fint *ierr) OMPI_SINGLE_FINT_2_INT(type_keyval); - c_ierr = MPI_Type_free_keyval(OMPI_SINGLE_NAME_CONVERT(type_keyval)); + c_ierr = PMPI_Type_free_keyval(OMPI_SINGLE_NAME_CONVERT(type_keyval)); if (NULL != ierr) *ierr = OMPI_INT_2_FINT(c_ierr); if (MPI_SUCCESS == c_ierr) { diff --git a/ompi/mpi/fortran/mpif-h/type_get_attr_f.c b/ompi/mpi/fortran/mpif-h/type_get_attr_f.c index 7f54a83488d..84e51e25e66 100644 --- a/ompi/mpi/fortran/mpif-h/type_get_attr_f.c +++ b/ompi/mpi/fortran/mpif-h/type_get_attr_f.c @@ -5,15 +5,17 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2007-2012 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -23,7 +25,8 @@ #include "ompi/attribute/attribute.h" #include "ompi/datatype/ompi_datatype.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILE_LAYER +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak PMPI_TYPE_GET_ATTR = ompi_type_get_attr_f #pragma weak pmpi_type_get_attr = ompi_type_get_attr_f #pragma weak pmpi_type_get_attr_ = ompi_type_get_attr_f @@ -31,7 +34,7 @@ #pragma weak PMPI_Type_get_attr_f = ompi_type_get_attr_f #pragma weak PMPI_Type_get_attr_f08 = ompi_type_get_attr_f -#elif OMPI_PROFILE_LAYER +#else OMPI_GENERATE_F77_BINDINGS (PMPI_TYPE_GET_ATTR, pmpi_type_get_attr, pmpi_type_get_attr_, @@ -40,6 +43,7 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_TYPE_GET_ATTR, (MPI_Fint *type, MPI_Fint *type_keyval, MPI_Aint *attribute_val, ompi_fortran_logical_t *flag, MPI_Fint *ierr), (type, type_keyval, attribute_val, flag, ierr) ) #endif +#endif #if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_TYPE_GET_ATTR = ompi_type_get_attr_f @@ -49,9 +53,8 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_TYPE_GET_ATTR, #pragma weak MPI_Type_get_attr_f = ompi_type_get_attr_f #pragma weak MPI_Type_get_attr_f08 = ompi_type_get_attr_f -#endif - -#if ! OPAL_HAVE_WEAK_SYMBOLS && ! OMPI_PROFILE_LAYER +#else +#if ! OMPI_BUILD_MPI_PROFILING OMPI_GENERATE_F77_BINDINGS (MPI_TYPE_GET_ATTR, mpi_type_get_attr, mpi_type_get_attr_, @@ -59,11 +62,9 @@ OMPI_GENERATE_F77_BINDINGS (MPI_TYPE_GET_ATTR, ompi_type_get_attr_f, (MPI_Fint *type, MPI_Fint *type_keyval, MPI_Aint *attribute_val, ompi_fortran_logical_t *flag, MPI_Fint *ierr), (type, type_keyval, attribute_val, flag, ierr) ) +#else +#define ompi_type_get_attr_f pompi_type_get_attr_f #endif - - -#if OMPI_PROFILE_LAYER && ! OPAL_HAVE_WEAK_SYMBOLS -#include "ompi/mpi/fortran/mpif-h/profile/defines.h" #endif void ompi_type_get_attr_f(MPI_Fint *type, MPI_Fint *type_keyval, @@ -71,7 +72,7 @@ void ompi_type_get_attr_f(MPI_Fint *type, MPI_Fint *type_keyval, MPI_Fint *ierr) { int c_ierr; - MPI_Datatype c_type = MPI_Type_f2c(*type); + MPI_Datatype c_type = PMPI_Type_f2c(*type); OMPI_LOGICAL_NAME_DECL(flag); /* This stuff is very confusing. Be sure to see the comment at diff --git a/ompi/mpi/fortran/mpif-h/type_get_contents_f.c b/ompi/mpi/fortran/mpif-h/type_get_contents_f.c index a109fe22068..2f98ef2662b 100644 --- a/ompi/mpi/fortran/mpif-h/type_get_contents_f.c +++ b/ompi/mpi/fortran/mpif-h/type_get_contents_f.c @@ -5,15 +5,17 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2011-2012 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -23,7 +25,8 @@ #include "ompi/errhandler/errhandler.h" #include "ompi/communicator/communicator.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILE_LAYER +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak PMPI_TYPE_GET_CONTENTS = ompi_type_get_contents_f #pragma weak pmpi_type_get_contents = ompi_type_get_contents_f #pragma weak pmpi_type_get_contents_ = ompi_type_get_contents_f @@ -31,7 +34,7 @@ #pragma weak PMPI_Type_get_contents_f = ompi_type_get_contents_f #pragma weak PMPI_Type_get_contents_f08 = ompi_type_get_contents_f -#elif OMPI_PROFILE_LAYER +#else OMPI_GENERATE_F77_BINDINGS (PMPI_TYPE_GET_CONTENTS, pmpi_type_get_contents, pmpi_type_get_contents_, @@ -40,6 +43,7 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_TYPE_GET_CONTENTS, (MPI_Fint *mtype, MPI_Fint *max_integers, MPI_Fint *max_addresses, MPI_Fint *max_datatypes, MPI_Fint *array_of_integers, MPI_Aint *array_of_addresses, MPI_Fint *array_of_datatypes, MPI_Fint *ierr), (mtype, max_integers, max_addresses, max_datatypes, array_of_integers, array_of_addresses, array_of_datatypes, ierr) ) #endif +#endif #if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_TYPE_GET_CONTENTS = ompi_type_get_contents_f @@ -49,9 +53,8 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_TYPE_GET_CONTENTS, #pragma weak MPI_Type_get_contents_f = ompi_type_get_contents_f #pragma weak MPI_Type_get_contents_f08 = ompi_type_get_contents_f -#endif - -#if ! OPAL_HAVE_WEAK_SYMBOLS && ! OMPI_PROFILE_LAYER +#else +#if ! OMPI_BUILD_MPI_PROFILING OMPI_GENERATE_F77_BINDINGS (MPI_TYPE_GET_CONTENTS, mpi_type_get_contents, mpi_type_get_contents_, @@ -59,25 +62,24 @@ OMPI_GENERATE_F77_BINDINGS (MPI_TYPE_GET_CONTENTS, ompi_type_get_contents_f, (MPI_Fint *mtype, MPI_Fint *max_integers, MPI_Fint *max_addresses, MPI_Fint *max_datatypes, MPI_Fint *array_of_integers, MPI_Aint *array_of_addresses, MPI_Fint *array_of_datatypes, MPI_Fint *ierr), (mtype, max_integers, max_addresses, max_datatypes, array_of_integers, array_of_addresses, array_of_datatypes, ierr) ) +#else +#define ompi_type_get_contents_f pompi_type_get_contents_f #endif - - -#if OMPI_PROFILE_LAYER && ! OPAL_HAVE_WEAK_SYMBOLS -#include "ompi/mpi/fortran/mpif-h/profile/defines.h" #endif + static const char FUNC_NAME[] = "MPI_TYPE_GET_CONTENTS"; void ompi_type_get_contents_f(MPI_Fint *mtype, MPI_Fint *max_integers, MPI_Fint *max_addresses, MPI_Fint *max_datatypes, - MPI_Fint *array_of_integers, - MPI_Aint *array_of_addresses, + MPI_Fint *array_of_integers, + MPI_Aint *array_of_addresses, MPI_Fint *array_of_datatypes, MPI_Fint *ierr) { MPI_Aint *c_address_array = NULL; MPI_Datatype *c_datatype_array = NULL; - MPI_Datatype c_mtype = MPI_Type_f2c(*mtype); + MPI_Datatype c_mtype = PMPI_Type_f2c(*mtype); int i, c_ierr; OMPI_ARRAY_NAME_DECL(array_of_integers); @@ -107,11 +109,11 @@ void ompi_type_get_contents_f(MPI_Fint *mtype, MPI_Fint *max_integers, OMPI_ARRAY_FINT_2_INT(array_of_integers, *max_integers); - c_ierr = MPI_Type_get_contents(c_mtype, - OMPI_FINT_2_INT(*max_integers), + c_ierr = PMPI_Type_get_contents(c_mtype, + OMPI_FINT_2_INT(*max_integers), OMPI_FINT_2_INT(*max_addresses), OMPI_FINT_2_INT(*max_datatypes), - OMPI_ARRAY_NAME_CONVERT(array_of_integers), + OMPI_ARRAY_NAME_CONVERT(array_of_integers), c_address_array, c_datatype_array); if (NULL != ierr) *ierr = OMPI_INT_2_FINT(c_ierr); @@ -120,7 +122,7 @@ void ompi_type_get_contents_f(MPI_Fint *mtype, MPI_Fint *max_integers, array_of_addresses[i] = c_address_array[i]; } for (i = 0; i < *max_datatypes; i++) { - array_of_datatypes[i] = MPI_Type_c2f(c_datatype_array[i]); + array_of_datatypes[i] = PMPI_Type_c2f(c_datatype_array[i]); } } free(c_address_array); diff --git a/ompi/mpi/fortran/mpif-h/type_get_envelope_f.c b/ompi/mpi/fortran/mpif-h/type_get_envelope_f.c index b9629f49b60..23a2246dab4 100644 --- a/ompi/mpi/fortran/mpif-h/type_get_envelope_f.c +++ b/ompi/mpi/fortran/mpif-h/type_get_envelope_f.c @@ -5,15 +5,17 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2011-2012 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -21,7 +23,8 @@ #include "ompi/mpi/fortran/mpif-h/bindings.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILE_LAYER +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak PMPI_TYPE_GET_ENVELOPE = ompi_type_get_envelope_f #pragma weak pmpi_type_get_envelope = ompi_type_get_envelope_f #pragma weak pmpi_type_get_envelope_ = ompi_type_get_envelope_f @@ -29,7 +32,7 @@ #pragma weak PMPI_Type_get_envelope_f = ompi_type_get_envelope_f #pragma weak PMPI_Type_get_envelope_f08 = ompi_type_get_envelope_f -#elif OMPI_PROFILE_LAYER +#else OMPI_GENERATE_F77_BINDINGS (PMPI_TYPE_GET_ENVELOPE, pmpi_type_get_envelope, pmpi_type_get_envelope_, @@ -38,6 +41,7 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_TYPE_GET_ENVELOPE, (MPI_Fint *type, MPI_Fint *num_integers, MPI_Fint *num_addresses, MPI_Fint *num_datatypes, MPI_Fint *combiner, MPI_Fint *ierr), (type, num_integers, num_addresses, num_datatypes, combiner, ierr) ) #endif +#endif #if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_TYPE_GET_ENVELOPE = ompi_type_get_envelope_f @@ -47,9 +51,8 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_TYPE_GET_ENVELOPE, #pragma weak MPI_Type_get_envelope_f = ompi_type_get_envelope_f #pragma weak MPI_Type_get_envelope_f08 = ompi_type_get_envelope_f -#endif - -#if ! OPAL_HAVE_WEAK_SYMBOLS && ! OMPI_PROFILE_LAYER +#else +#if ! OMPI_BUILD_MPI_PROFILING OMPI_GENERATE_F77_BINDINGS (MPI_TYPE_GET_ENVELOPE, mpi_type_get_envelope, mpi_type_get_envelope_, @@ -57,29 +60,28 @@ OMPI_GENERATE_F77_BINDINGS (MPI_TYPE_GET_ENVELOPE, ompi_type_get_envelope_f, (MPI_Fint *type, MPI_Fint *num_integers, MPI_Fint *num_addresses, MPI_Fint *num_datatypes, MPI_Fint *combiner, MPI_Fint *ierr), (type, num_integers, num_addresses, num_datatypes, combiner, ierr) ) +#else +#define ompi_type_get_envelope_f pompi_type_get_envelope_f #endif - - -#if OMPI_PROFILE_LAYER && ! OPAL_HAVE_WEAK_SYMBOLS -#include "ompi/mpi/fortran/mpif-h/profile/defines.h" #endif + void ompi_type_get_envelope_f(MPI_Fint *type, MPI_Fint *num_integers, - MPI_Fint *num_addresses, + MPI_Fint *num_addresses, MPI_Fint *num_datatypes, MPI_Fint *combiner, MPI_Fint *ierr) { int c_ierr; - MPI_Datatype c_type = MPI_Type_f2c(*type); + MPI_Datatype c_type = PMPI_Type_f2c(*type); OMPI_SINGLE_NAME_DECL(num_integers); OMPI_SINGLE_NAME_DECL(num_addresses); OMPI_SINGLE_NAME_DECL(num_datatypes); OMPI_SINGLE_NAME_DECL(combiner); - c_ierr = MPI_Type_get_envelope(c_type, - OMPI_SINGLE_NAME_CONVERT(num_integers), - OMPI_SINGLE_NAME_CONVERT(num_addresses), - OMPI_SINGLE_NAME_CONVERT(num_datatypes), + c_ierr = PMPI_Type_get_envelope(c_type, + OMPI_SINGLE_NAME_CONVERT(num_integers), + OMPI_SINGLE_NAME_CONVERT(num_addresses), + OMPI_SINGLE_NAME_CONVERT(num_datatypes), OMPI_SINGLE_NAME_CONVERT(combiner)); if (NULL != ierr) *ierr = OMPI_INT_2_FINT(c_ierr); diff --git a/ompi/mpi/fortran/mpif-h/type_get_extent_f.c b/ompi/mpi/fortran/mpif-h/type_get_extent_f.c index 2a3ab6e4e1c..24cd75f5da1 100644 --- a/ompi/mpi/fortran/mpif-h/type_get_extent_f.c +++ b/ompi/mpi/fortran/mpif-h/type_get_extent_f.c @@ -5,15 +5,17 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2011-2012 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -21,7 +23,8 @@ #include "ompi/mpi/fortran/mpif-h/bindings.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILE_LAYER +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak PMPI_TYPE_GET_EXTENT = ompi_type_get_extent_f #pragma weak pmpi_type_get_extent = ompi_type_get_extent_f #pragma weak pmpi_type_get_extent_ = ompi_type_get_extent_f @@ -29,7 +32,7 @@ #pragma weak PMPI_Type_get_extent_f = ompi_type_get_extent_f #pragma weak PMPI_Type_get_extent_f08 = ompi_type_get_extent_f -#elif OMPI_PROFILE_LAYER +#else OMPI_GENERATE_F77_BINDINGS (PMPI_TYPE_GET_EXTENT, pmpi_type_get_extent, pmpi_type_get_extent_, @@ -38,6 +41,7 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_TYPE_GET_EXTENT, (MPI_Fint *type, MPI_Aint *lb, MPI_Aint *extent, MPI_Fint *ierr), (type, lb, extent, ierr) ) #endif +#endif #if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_TYPE_GET_EXTENT = ompi_type_get_extent_f @@ -47,9 +51,8 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_TYPE_GET_EXTENT, #pragma weak MPI_Type_get_extent_f = ompi_type_get_extent_f #pragma weak MPI_Type_get_extent_f08 = ompi_type_get_extent_f -#endif - -#if ! OPAL_HAVE_WEAK_SYMBOLS && ! OMPI_PROFILE_LAYER +#else +#if ! OMPI_BUILD_MPI_PROFILING OMPI_GENERATE_F77_BINDINGS (MPI_TYPE_GET_EXTENT, mpi_type_get_extent, mpi_type_get_extent_, @@ -57,19 +60,18 @@ OMPI_GENERATE_F77_BINDINGS (MPI_TYPE_GET_EXTENT, ompi_type_get_extent_f, (MPI_Fint *type, MPI_Aint *lb, MPI_Aint *extent, MPI_Fint *ierr), (type, lb, extent, ierr) ) +#else +#define ompi_type_get_extent_f pompi_type_get_extent_f #endif - - -#if OMPI_PROFILE_LAYER && ! OPAL_HAVE_WEAK_SYMBOLS -#include "ompi/mpi/fortran/mpif-h/profile/defines.h" #endif + void ompi_type_get_extent_f(MPI_Fint *type, MPI_Aint *lb, MPI_Aint *extent, MPI_Fint *ierr) { int c_ierr; - MPI_Datatype c_type = MPI_Type_f2c(*type); + MPI_Datatype c_type = PMPI_Type_f2c(*type); - c_ierr = MPI_Type_get_extent(c_type, lb, extent); + c_ierr = PMPI_Type_get_extent(c_type, lb, extent); if (NULL != ierr) *ierr = OMPI_INT_2_FINT(c_ierr); } diff --git a/ompi/mpi/fortran/mpif-h/type_get_extent_x_f.c b/ompi/mpi/fortran/mpif-h/type_get_extent_x_f.c index 5634d2aa5dc..dfd4d6755fe 100644 --- a/ompi/mpi/fortran/mpif-h/type_get_extent_x_f.c +++ b/ompi/mpi/fortran/mpif-h/type_get_extent_x_f.c @@ -5,17 +5,19 @@ * Copyright (c) 2004-2013 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2011-2013 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2013 Los Alamos National Security, LLC. All rights * reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -23,7 +25,8 @@ #include "ompi/mpi/fortran/mpif-h/bindings.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILE_LAYER +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak PMPI_TYPE_GET_EXTENT_X = ompi_type_get_extent_x_f #pragma weak pmpi_type_get_extent_x = ompi_type_get_extent_x_f #pragma weak pmpi_type_get_extent_x_ = ompi_type_get_extent_x_f @@ -31,7 +34,7 @@ #pragma weak PMPI_Type_get_extent_x_f = ompi_type_get_extent_x_f #pragma weak PMPI_Type_get_extent_x_f08 = ompi_type_get_extent_x_f -#elif OMPI_PROFILE_LAYER +#else OMPI_GENERATE_F77_BINDINGS (PMPI_TYPE_GET_EXTENT_X, pmpi_type_get_extent_x, pmpi_type_get_extent_x_, @@ -40,6 +43,7 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_TYPE_GET_EXTENT_X, (MPI_Fint *type, MPI_Count *lb, MPI_Count *extent, MPI_Fint *ierr), (type, lb, extent, ierr) ) #endif +#endif #if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_TYPE_GET_EXTENT_X = ompi_type_get_extent_x_f @@ -49,9 +53,8 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_TYPE_GET_EXTENT_X, #pragma weak MPI_Type_get_extent_x_f = ompi_type_get_extent_x_f #pragma weak MPI_Type_get_extent_x_f08 = ompi_type_get_extent_x_f -#endif - -#if ! OPAL_HAVE_WEAK_SYMBOLS && ! OMPI_PROFILE_LAYER +#else +#if ! OMPI_BUILD_MPI_PROFILING OMPI_GENERATE_F77_BINDINGS (MPI_TYPE_GET_EXTENT_X, mpi_type_get_extent_x, mpi_type_get_extent_x_, @@ -59,19 +62,18 @@ OMPI_GENERATE_F77_BINDINGS (MPI_TYPE_GET_EXTENT_X, ompi_type_get_extent_x_f, (MPI_Fint *type, MPI_Count *lb, MPI_Count *extent, MPI_Fint *ierr), (type, lb, extent, ierr) ) +#else +#define ompi_type_get_extent_x_f pompi_type_get_extent_x_f #endif - - -#if OMPI_PROFILE_LAYER && ! OPAL_HAVE_WEAK_SYMBOLS -#include "ompi/mpi/fortran/mpif-h/profile/defines.h" #endif + void ompi_type_get_extent_x_f(MPI_Fint *type, MPI_Count *lb, MPI_Count *extent, MPI_Fint *ierr) { int c_ierr; - MPI_Datatype c_type = MPI_Type_f2c(*type); + MPI_Datatype c_type = PMPI_Type_f2c(*type); - c_ierr = MPI_Type_get_extent_x(c_type, lb, extent); + c_ierr = PMPI_Type_get_extent_x(c_type, lb, extent); if (NULL != ierr) *ierr = OMPI_INT_2_FINT(c_ierr); } diff --git a/ompi/mpi/fortran/mpif-h/type_get_name_f.c b/ompi/mpi/fortran/mpif-h/type_get_name_f.c index ac36354f6c2..5e646bec9b2 100644 --- a/ompi/mpi/fortran/mpif-h/type_get_name_f.c +++ b/ompi/mpi/fortran/mpif-h/type_get_name_f.c @@ -5,15 +5,17 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2006-2012 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -23,7 +25,8 @@ #include "ompi/constants.h" #include "ompi/mpi/fortran/base/strings.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILE_LAYER +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak PMPI_TYPE_GET_NAME = ompi_type_get_name_f #pragma weak pmpi_type_get_name = ompi_type_get_name_f #pragma weak pmpi_type_get_name_ = ompi_type_get_name_f @@ -31,7 +34,7 @@ #pragma weak PMPI_Type_get_name_f = ompi_type_get_name_f #pragma weak PMPI_Type_get_name_f08 = ompi_type_get_name_f -#elif OMPI_PROFILE_LAYER +#else OMPI_GENERATE_F77_BINDINGS (PMPI_TYPE_GET_NAME, pmpi_type_get_name, pmpi_type_get_name_, @@ -40,6 +43,7 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_TYPE_GET_NAME, (MPI_Fint *type, char *type_name, MPI_Fint *resultlen, MPI_Fint *ierr, int name_len), (type, type_name, resultlen, ierr, name_len) ) #endif +#endif #if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_TYPE_GET_NAME = ompi_type_get_name_f @@ -49,9 +53,8 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_TYPE_GET_NAME, #pragma weak MPI_Type_get_name_f = ompi_type_get_name_f #pragma weak MPI_Type_get_name_f08 = ompi_type_get_name_f -#endif - -#if ! OPAL_HAVE_WEAK_SYMBOLS && ! OMPI_PROFILE_LAYER +#else +#if ! OMPI_BUILD_MPI_PROFILING OMPI_GENERATE_F77_BINDINGS (MPI_TYPE_GET_NAME, mpi_type_get_name, mpi_type_get_name_, @@ -59,20 +62,19 @@ OMPI_GENERATE_F77_BINDINGS (MPI_TYPE_GET_NAME, ompi_type_get_name_f, (MPI_Fint *type, char *type_name, MPI_Fint *resultlen, MPI_Fint *ierr, int name_len), (type, type_name, resultlen, ierr, name_len) ) +#else +#define ompi_type_get_name_f pompi_type_get_name_f #endif - - -#if OMPI_PROFILE_LAYER && ! OPAL_HAVE_WEAK_SYMBOLS -#include "ompi/mpi/fortran/mpif-h/profile/defines.h" #endif + void ompi_type_get_name_f(MPI_Fint *type, char *type_name, MPI_Fint *resultlen, MPI_Fint *ierr, int name_len) { int c_ierr, c_len; - MPI_Datatype c_type = MPI_Type_f2c(*type); + MPI_Datatype c_type = PMPI_Type_f2c(*type); char c_name[MPI_MAX_OBJECT_NAME]; - c_ierr = MPI_Type_get_name(c_type, c_name, &c_len); + c_ierr = PMPI_Type_get_name(c_type, c_name, &c_len); if (MPI_SUCCESS == c_ierr) { ompi_fortran_string_c2f(c_name, type_name, name_len); *resultlen = OMPI_INT_2_FINT(c_len); diff --git a/ompi/mpi/fortran/mpif-h/type_get_true_extent_f.c b/ompi/mpi/fortran/mpif-h/type_get_true_extent_f.c index 2c640523071..8ea8b81cbf4 100644 --- a/ompi/mpi/fortran/mpif-h/type_get_true_extent_f.c +++ b/ompi/mpi/fortran/mpif-h/type_get_true_extent_f.c @@ -5,15 +5,17 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2011-2012 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -21,7 +23,8 @@ #include "ompi/mpi/fortran/mpif-h/bindings.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILE_LAYER +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak PMPI_TYPE_GET_TRUE_EXTENT = ompi_type_get_true_extent_f #pragma weak pmpi_type_get_true_extent = ompi_type_get_true_extent_f #pragma weak pmpi_type_get_true_extent_ = ompi_type_get_true_extent_f @@ -29,7 +32,7 @@ #pragma weak PMPI_Type_get_true_extent_f = ompi_type_get_true_extent_f #pragma weak PMPI_Type_get_true_extent_f08 = ompi_type_get_true_extent_f -#elif OMPI_PROFILE_LAYER +#else OMPI_GENERATE_F77_BINDINGS (PMPI_TYPE_GET_TRUE_EXTENT, pmpi_type_get_true_extent, pmpi_type_get_true_extent_, @@ -38,6 +41,7 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_TYPE_GET_TRUE_EXTENT, (MPI_Fint *datatype, MPI_Aint *true_lb, MPI_Aint *true_extent, MPI_Fint *ierr), (datatype, true_lb, true_extent, ierr) ) #endif +#endif #if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_TYPE_GET_TRUE_EXTENT = ompi_type_get_true_extent_f @@ -47,9 +51,8 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_TYPE_GET_TRUE_EXTENT, #pragma weak MPI_Type_get_true_extent_f = ompi_type_get_true_extent_f #pragma weak MPI_Type_get_true_extent_f08 = ompi_type_get_true_extent_f -#endif - -#if ! OPAL_HAVE_WEAK_SYMBOLS && ! OMPI_PROFILE_LAYER +#else +#if ! OMPI_BUILD_MPI_PROFILING OMPI_GENERATE_F77_BINDINGS (MPI_TYPE_GET_TRUE_EXTENT, mpi_type_get_true_extent, mpi_type_get_true_extent_, @@ -57,18 +60,17 @@ OMPI_GENERATE_F77_BINDINGS (MPI_TYPE_GET_TRUE_EXTENT, ompi_type_get_true_extent_f, (MPI_Fint *datatype, MPI_Aint *true_lb, MPI_Aint *true_extent, MPI_Fint *ierr), (datatype, true_lb, true_extent, ierr) ) +#else +#define ompi_type_get_true_extent_f pompi_type_get_true_extent_f #endif - - -#if OMPI_PROFILE_LAYER && ! OPAL_HAVE_WEAK_SYMBOLS -#include "ompi/mpi/fortran/mpif-h/profile/defines.h" #endif + void ompi_type_get_true_extent_f(MPI_Fint *datatype, MPI_Aint *true_lb, MPI_Aint *true_extent, MPI_Fint *ierr) { int c_ierr; - MPI_Datatype c_type = MPI_Type_f2c(*datatype); + MPI_Datatype c_type = PMPI_Type_f2c(*datatype); - c_ierr = MPI_Type_get_true_extent(c_type, true_lb, true_extent); + c_ierr = PMPI_Type_get_true_extent(c_type, true_lb, true_extent); if (NULL != ierr) *ierr = OMPI_INT_2_FINT(c_ierr); } diff --git a/ompi/mpi/fortran/mpif-h/type_get_true_extent_x_f.c b/ompi/mpi/fortran/mpif-h/type_get_true_extent_x_f.c index fa1dfc77641..cef713ff4a5 100644 --- a/ompi/mpi/fortran/mpif-h/type_get_true_extent_x_f.c +++ b/ompi/mpi/fortran/mpif-h/type_get_true_extent_x_f.c @@ -5,17 +5,19 @@ * Copyright (c) 2004-2013 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2011-2013 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2013 Los Alamos National Security, LLC. All rights * reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -23,7 +25,8 @@ #include "ompi/mpi/fortran/mpif-h/bindings.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILE_LAYER +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak PMPI_TYPE_GET_TRUE_EXTENT_X = ompi_type_get_true_extent_x_f #pragma weak pmpi_type_get_true_extent_x = ompi_type_get_true_extent_x_f #pragma weak pmpi_type_get_true_extent_x_ = ompi_type_get_true_extent_x_f @@ -31,7 +34,7 @@ #pragma weak PMPI_Type_get_true_extent_x_f = ompi_type_get_true_extent_x_f #pragma weak PMPI_Type_get_true_extent_x_f08 = ompi_type_get_true_extent_x_f -#elif OMPI_PROFILE_LAYER +#else OMPI_GENERATE_F77_BINDINGS (PMPI_TYPE_GET_TRUE_EXTENT_X, pmpi_type_get_true_extent_x, pmpi_type_get_true_extent_x_, @@ -40,6 +43,7 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_TYPE_GET_TRUE_EXTENT_X, (MPI_Fint *datatype, MPI_Count *true_lb, MPI_Count *true_extent, MPI_Fint *ierr), (datatype, true_lb, true_extent, ierr) ) #endif +#endif #if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_TYPE_GET_TRUE_EXTENT_X = ompi_type_get_true_extent_x_f @@ -49,9 +53,8 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_TYPE_GET_TRUE_EXTENT_X, #pragma weak MPI_Type_get_true_extent_x_f = ompi_type_get_true_extent_x_f #pragma weak MPI_Type_get_true_extent_x_f08 = ompi_type_get_true_extent_x_f -#endif - -#if ! OPAL_HAVE_WEAK_SYMBOLS && ! OMPI_PROFILE_LAYER +#else +#if ! OMPI_BUILD_MPI_PROFILING OMPI_GENERATE_F77_BINDINGS (MPI_TYPE_GET_TRUE_EXTENT_X, mpi_type_get_true_extent_x, mpi_type_get_true_extent_x_, @@ -59,18 +62,17 @@ OMPI_GENERATE_F77_BINDINGS (MPI_TYPE_GET_TRUE_EXTENT_X, ompi_type_get_true_extent_x_f, (MPI_Fint *datatype, MPI_Count *true_lb, MPI_Count *true_extent, MPI_Fint *ierr), (datatype, true_lb, true_extent, ierr) ) +#else +#define ompi_type_get_true_extent_x_f pompi_type_get_true_extent_x_f #endif - - -#if OMPI_PROFILE_LAYER && ! OPAL_HAVE_WEAK_SYMBOLS -#include "ompi/mpi/fortran/mpif-h/profile/defines.h" #endif + void ompi_type_get_true_extent_x_f(MPI_Fint *datatype, MPI_Count *true_lb, MPI_Count *true_extent, MPI_Fint *ierr) { int c_ierr; - MPI_Datatype c_type = MPI_Type_f2c(*datatype); + MPI_Datatype c_type = PMPI_Type_f2c(*datatype); - c_ierr = MPI_Type_get_true_extent_x(c_type, true_lb, true_extent); + c_ierr = PMPI_Type_get_true_extent_x(c_type, true_lb, true_extent); if (NULL != ierr) *ierr = OMPI_INT_2_FINT(c_ierr); } diff --git a/ompi/mpi/fortran/mpif-h/type_hindexed_f.c b/ompi/mpi/fortran/mpif-h/type_hindexed_f.c index 1a6a31af165..3b48ec31cef 100644 --- a/ompi/mpi/fortran/mpif-h/type_hindexed_f.c +++ b/ompi/mpi/fortran/mpif-h/type_hindexed_f.c @@ -5,15 +5,17 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2011-2012 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -23,7 +25,8 @@ #include "ompi/errhandler/errhandler.h" #include "ompi/communicator/communicator.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILE_LAYER +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak PMPI_TYPE_HINDEXED = ompi_type_hindexed_f #pragma weak pmpi_type_hindexed = ompi_type_hindexed_f #pragma weak pmpi_type_hindexed_ = ompi_type_hindexed_f @@ -31,7 +34,7 @@ #pragma weak PMPI_Type_hindexed_f = ompi_type_hindexed_f #pragma weak PMPI_Type_hindexed_f08 = ompi_type_hindexed_f -#elif OMPI_PROFILE_LAYER +#else OMPI_GENERATE_F77_BINDINGS (PMPI_TYPE_HINDEXED, pmpi_type_hindexed, pmpi_type_hindexed_, @@ -40,6 +43,7 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_TYPE_HINDEXED, (MPI_Fint *count, MPI_Fint *array_of_blocklengths, MPI_Fint *array_of_displacements, MPI_Fint *oldtype, MPI_Fint *newtype, MPI_Fint *ierr), (count, array_of_blocklengths, array_of_displacements, oldtype, newtype, ierr) ) #endif +#endif #if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_TYPE_HINDEXED = ompi_type_hindexed_f @@ -49,9 +53,8 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_TYPE_HINDEXED, #pragma weak MPI_Type_hindexed_f = ompi_type_hindexed_f #pragma weak MPI_Type_hindexed_f08 = ompi_type_hindexed_f -#endif - -#if ! OPAL_HAVE_WEAK_SYMBOLS && ! OMPI_PROFILE_LAYER +#else +#if ! OMPI_BUILD_MPI_PROFILING OMPI_GENERATE_F77_BINDINGS (MPI_TYPE_HINDEXED, mpi_type_hindexed, mpi_type_hindexed_, @@ -59,21 +62,20 @@ OMPI_GENERATE_F77_BINDINGS (MPI_TYPE_HINDEXED, ompi_type_hindexed_f, (MPI_Fint *count, MPI_Fint *array_of_blocklengths, MPI_Fint *array_of_displacements, MPI_Fint *oldtype, MPI_Fint *newtype, MPI_Fint *ierr), (count, array_of_blocklengths, array_of_displacements, oldtype, newtype, ierr) ) +#else +#define ompi_type_hindexed_f pompi_type_hindexed_f #endif - - -#if OMPI_PROFILE_LAYER && ! OPAL_HAVE_WEAK_SYMBOLS -#include "ompi/mpi/fortran/mpif-h/profile/defines.h" #endif + static const char FUNC_NAME[] = "MPI_TYPE_HINDEXED"; -void ompi_type_hindexed_f(MPI_Fint *count, MPI_Fint *array_of_blocklengths, - MPI_Fint *array_of_displacements, +void ompi_type_hindexed_f(MPI_Fint *count, MPI_Fint *array_of_blocklengths, + MPI_Fint *array_of_displacements, MPI_Fint *oldtype, MPI_Fint *newtype, MPI_Fint *ierr) { - MPI_Datatype c_old = MPI_Type_f2c(*oldtype); + MPI_Datatype c_old = PMPI_Type_f2c(*oldtype); MPI_Datatype c_new; MPI_Aint *c_disp_array; int i, c_ierr; @@ -92,8 +94,8 @@ void ompi_type_hindexed_f(MPI_Fint *count, MPI_Fint *array_of_blocklengths, OMPI_ARRAY_FINT_2_INT(array_of_blocklengths, *count); - c_ierr = MPI_Type_hindexed(OMPI_FINT_2_INT(*count), - OMPI_ARRAY_NAME_CONVERT(array_of_blocklengths), + c_ierr = PMPI_Type_hindexed(OMPI_FINT_2_INT(*count), + OMPI_ARRAY_NAME_CONVERT(array_of_blocklengths), c_disp_array, c_old, &c_new); if (NULL != ierr) *ierr = OMPI_INT_2_FINT(c_ierr); @@ -101,6 +103,6 @@ void ompi_type_hindexed_f(MPI_Fint *count, MPI_Fint *array_of_blocklengths, OMPI_ARRAY_FINT_2_INT_CLEANUP(array_of_blocklengths); if (MPI_SUCCESS == c_ierr) { - *newtype = MPI_Type_c2f(c_new); + *newtype = PMPI_Type_c2f(c_new); } } diff --git a/ompi/mpi/fortran/mpif-h/type_hvector_f.c b/ompi/mpi/fortran/mpif-h/type_hvector_f.c index 2ae4a594236..1bc5720e9bb 100644 --- a/ompi/mpi/fortran/mpif-h/type_hvector_f.c +++ b/ompi/mpi/fortran/mpif-h/type_hvector_f.c @@ -5,15 +5,17 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2011-2012 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -21,7 +23,8 @@ #include "ompi/mpi/fortran/mpif-h/bindings.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILE_LAYER +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak PMPI_TYPE_HVECTOR = ompi_type_hvector_f #pragma weak pmpi_type_hvector = ompi_type_hvector_f #pragma weak pmpi_type_hvector_ = ompi_type_hvector_f @@ -29,7 +32,7 @@ #pragma weak PMPI_Type_hvector_f = ompi_type_hvector_f #pragma weak PMPI_Type_hvector_f08 = ompi_type_hvector_f -#elif OMPI_PROFILE_LAYER +#else OMPI_GENERATE_F77_BINDINGS (PMPI_TYPE_HVECTOR, pmpi_type_hvector, pmpi_type_hvector_, @@ -38,6 +41,7 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_TYPE_HVECTOR, (MPI_Fint *count, MPI_Fint *blocklength, MPI_Fint *stride, MPI_Fint *oldtype, MPI_Fint *newtype, MPI_Fint *ierr), (count, blocklength, stride, oldtype, newtype, ierr) ) #endif +#endif #if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_TYPE_HVECTOR = ompi_type_hvector_f @@ -47,9 +51,8 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_TYPE_HVECTOR, #pragma weak MPI_Type_hvector_f = ompi_type_hvector_f #pragma weak MPI_Type_hvector_f08 = ompi_type_hvector_f -#endif - -#if ! OPAL_HAVE_WEAK_SYMBOLS && ! OMPI_PROFILE_LAYER +#else +#if ! OMPI_BUILD_MPI_PROFILING OMPI_GENERATE_F77_BINDINGS (MPI_TYPE_HVECTOR, mpi_type_hvector, mpi_type_hvector_, @@ -57,29 +60,28 @@ OMPI_GENERATE_F77_BINDINGS (MPI_TYPE_HVECTOR, ompi_type_hvector_f, (MPI_Fint *count, MPI_Fint *blocklength, MPI_Fint *stride, MPI_Fint *oldtype, MPI_Fint *newtype, MPI_Fint *ierr), (count, blocklength, stride, oldtype, newtype, ierr) ) +#else +#define ompi_type_hvector_f pompi_type_hvector_f #endif - - -#if OMPI_PROFILE_LAYER && ! OPAL_HAVE_WEAK_SYMBOLS -#include "ompi/mpi/fortran/mpif-h/profile/defines.h" #endif -void ompi_type_hvector_f(MPI_Fint *count, MPI_Fint *blocklength, - MPI_Fint *stride, MPI_Fint *oldtype, + +void ompi_type_hvector_f(MPI_Fint *count, MPI_Fint *blocklength, + MPI_Fint *stride, MPI_Fint *oldtype, MPI_Fint *newtype, MPI_Fint *ierr) { int c_ierr; MPI_Datatype c_oldtype, c_newtype; - c_oldtype = MPI_Type_f2c(*oldtype); + c_oldtype = PMPI_Type_f2c(*oldtype); - c_ierr = MPI_Type_hvector(OMPI_FINT_2_INT(*count), + c_ierr = PMPI_Type_hvector(OMPI_FINT_2_INT(*count), OMPI_FINT_2_INT(*blocklength), (MPI_Aint)*stride, c_oldtype, &c_newtype); if (NULL != ierr) *ierr = OMPI_INT_2_FINT(c_ierr); if (MPI_SUCCESS == c_ierr) { - *newtype = MPI_Type_c2f(c_newtype); + *newtype = PMPI_Type_c2f(c_newtype); } } diff --git a/ompi/mpi/fortran/mpif-h/type_indexed_f.c b/ompi/mpi/fortran/mpif-h/type_indexed_f.c index 3887f76ba4f..f7b2b5fa1b7 100644 --- a/ompi/mpi/fortran/mpif-h/type_indexed_f.c +++ b/ompi/mpi/fortran/mpif-h/type_indexed_f.c @@ -5,15 +5,17 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2011-2012 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -21,7 +23,8 @@ #include "ompi/mpi/fortran/mpif-h/bindings.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILE_LAYER +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak PMPI_TYPE_INDEXED = ompi_type_indexed_f #pragma weak pmpi_type_indexed = ompi_type_indexed_f #pragma weak pmpi_type_indexed_ = ompi_type_indexed_f @@ -29,7 +32,7 @@ #pragma weak PMPI_Type_indexed_f = ompi_type_indexed_f #pragma weak PMPI_Type_indexed_f08 = ompi_type_indexed_f -#elif OMPI_PROFILE_LAYER +#else OMPI_GENERATE_F77_BINDINGS (PMPI_TYPE_INDEXED, pmpi_type_indexed, pmpi_type_indexed_, @@ -38,6 +41,7 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_TYPE_INDEXED, (MPI_Fint *count, MPI_Fint *array_of_blocklengths, MPI_Fint *array_of_displacements, MPI_Fint *oldtype, MPI_Fint *newtype, MPI_Fint *ierr), (count, array_of_blocklengths, array_of_displacements, oldtype, newtype, ierr) ) #endif +#endif #if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_TYPE_INDEXED = ompi_type_indexed_f @@ -47,9 +51,8 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_TYPE_INDEXED, #pragma weak MPI_Type_indexed_f = ompi_type_indexed_f #pragma weak MPI_Type_indexed_f08 = ompi_type_indexed_f -#endif - -#if ! OPAL_HAVE_WEAK_SYMBOLS && ! OMPI_PROFILE_LAYER +#else +#if ! OMPI_BUILD_MPI_PROFILING OMPI_GENERATE_F77_BINDINGS (MPI_TYPE_INDEXED, mpi_type_indexed, mpi_type_indexed_, @@ -57,19 +60,18 @@ OMPI_GENERATE_F77_BINDINGS (MPI_TYPE_INDEXED, ompi_type_indexed_f, (MPI_Fint *count, MPI_Fint *array_of_blocklengths, MPI_Fint *array_of_displacements, MPI_Fint *oldtype, MPI_Fint *newtype, MPI_Fint *ierr), (count, array_of_blocklengths, array_of_displacements, oldtype, newtype, ierr) ) +#else +#define ompi_type_indexed_f pompi_type_indexed_f #endif - - -#if OMPI_PROFILE_LAYER && ! OPAL_HAVE_WEAK_SYMBOLS -#include "ompi/mpi/fortran/mpif-h/profile/defines.h" #endif + void ompi_type_indexed_f(MPI_Fint *count, MPI_Fint *array_of_blocklengths, MPI_Fint *array_of_displacements, MPI_Fint *oldtype, MPI_Fint *newtype, MPI_Fint *ierr) { int c_ierr; - MPI_Datatype c_old = MPI_Type_f2c(*oldtype); + MPI_Datatype c_old = PMPI_Type_f2c(*oldtype); MPI_Datatype c_new; OMPI_ARRAY_NAME_DECL(array_of_blocklengths); OMPI_ARRAY_NAME_DECL(array_of_displacements); @@ -77,8 +79,8 @@ void ompi_type_indexed_f(MPI_Fint *count, MPI_Fint *array_of_blocklengths, OMPI_ARRAY_FINT_2_INT(array_of_blocklengths, *count); OMPI_ARRAY_FINT_2_INT(array_of_displacements, *count); - c_ierr = MPI_Type_indexed(OMPI_FINT_2_INT(*count), - OMPI_ARRAY_NAME_CONVERT(array_of_blocklengths), + c_ierr = PMPI_Type_indexed(OMPI_FINT_2_INT(*count), + OMPI_ARRAY_NAME_CONVERT(array_of_blocklengths), OMPI_ARRAY_NAME_CONVERT(array_of_displacements), c_old, &c_new); if (NULL != ierr) *ierr = OMPI_INT_2_FINT(c_ierr); @@ -87,6 +89,6 @@ void ompi_type_indexed_f(MPI_Fint *count, MPI_Fint *array_of_blocklengths, OMPI_ARRAY_FINT_2_INT_CLEANUP(array_of_displacements); if (MPI_SUCCESS == c_ierr) { - *newtype = MPI_Type_c2f(c_new); + *newtype = PMPI_Type_c2f(c_new); } } diff --git a/ompi/mpi/fortran/mpif-h/type_lb_f.c b/ompi/mpi/fortran/mpif-h/type_lb_f.c index 40d03260ec6..3867c35ad29 100644 --- a/ompi/mpi/fortran/mpif-h/type_lb_f.c +++ b/ompi/mpi/fortran/mpif-h/type_lb_f.c @@ -5,15 +5,17 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2011-2012 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -21,7 +23,8 @@ #include "ompi/mpi/fortran/mpif-h/bindings.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILE_LAYER +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak PMPI_TYPE_LB = ompi_type_lb_f #pragma weak pmpi_type_lb = ompi_type_lb_f #pragma weak pmpi_type_lb_ = ompi_type_lb_f @@ -29,7 +32,7 @@ #pragma weak PMPI_Type_lb_f = ompi_type_lb_f #pragma weak PMPI_Type_lb_f08 = ompi_type_lb_f -#elif OMPI_PROFILE_LAYER +#else OMPI_GENERATE_F77_BINDINGS (PMPI_TYPE_LB, pmpi_type_lb, pmpi_type_lb_, @@ -38,6 +41,7 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_TYPE_LB, (MPI_Fint *type, MPI_Fint *lb, MPI_Fint *ierr), (type, lb, ierr) ) #endif +#endif #if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_TYPE_LB = ompi_type_lb_f @@ -47,9 +51,8 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_TYPE_LB, #pragma weak MPI_Type_lb_f = ompi_type_lb_f #pragma weak MPI_Type_lb_f08 = ompi_type_lb_f -#endif - -#if ! OPAL_HAVE_WEAK_SYMBOLS && ! OMPI_PROFILE_LAYER +#else +#if ! OMPI_BUILD_MPI_PROFILING OMPI_GENERATE_F77_BINDINGS (MPI_TYPE_LB, mpi_type_lb, mpi_type_lb_, @@ -57,20 +60,19 @@ OMPI_GENERATE_F77_BINDINGS (MPI_TYPE_LB, ompi_type_lb_f, (MPI_Fint *type, MPI_Fint *lb, MPI_Fint *ierr), (type, lb, ierr) ) +#else +#define ompi_type_lb_f pompi_type_lb_f #endif - - -#if OMPI_PROFILE_LAYER && ! OPAL_HAVE_WEAK_SYMBOLS -#include "ompi/mpi/fortran/mpif-h/profile/defines.h" #endif + void ompi_type_lb_f(MPI_Fint *type, MPI_Fint *lb, MPI_Fint *ierr) { int c_ierr; - MPI_Datatype c_type = MPI_Type_f2c(*type); + MPI_Datatype c_type = PMPI_Type_f2c(*type); MPI_Aint c_lb; - c_ierr = MPI_Type_lb(c_type, &c_lb); + c_ierr = PMPI_Type_lb(c_type, &c_lb); if (NULL != ierr) *ierr = OMPI_INT_2_FINT(c_ierr); if (MPI_SUCCESS == c_ierr) { diff --git a/ompi/mpi/fortran/mpif-h/type_match_size_f.c b/ompi/mpi/fortran/mpif-h/type_match_size_f.c index 46f29ae60ce..90934dfdd7e 100644 --- a/ompi/mpi/fortran/mpif-h/type_match_size_f.c +++ b/ompi/mpi/fortran/mpif-h/type_match_size_f.c @@ -5,16 +5,18 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2009 Sun Microsystems, Inc. All rights reserved. * Copyright (c) 2011-2012 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -28,7 +30,8 @@ #include "ompi/communicator/communicator.h" #include "ompi/runtime/params.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILE_LAYER +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak PMPI_TYPE_MATCH_SIZE = ompi_type_match_size_f #pragma weak pmpi_type_match_size = ompi_type_match_size_f #pragma weak pmpi_type_match_size_ = ompi_type_match_size_f @@ -36,7 +39,7 @@ #pragma weak PMPI_Type_match_size_f = ompi_type_match_size_f #pragma weak PMPI_Type_match_size_f08 = ompi_type_match_size_f -#elif OMPI_PROFILE_LAYER +#else OMPI_GENERATE_F77_BINDINGS (PMPI_TYPE_MATCH_SIZE, pmpi_type_match_size, pmpi_type_match_size_, @@ -45,6 +48,7 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_TYPE_MATCH_SIZE, (MPI_Fint *typeclass, MPI_Fint *size, MPI_Fint *type, MPI_Fint *ierr), (typeclass, size, type, ierr) ) #endif +#endif #if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_TYPE_MATCH_SIZE = ompi_type_match_size_f @@ -54,9 +58,8 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_TYPE_MATCH_SIZE, #pragma weak MPI_Type_match_size_f = ompi_type_match_size_f #pragma weak MPI_Type_match_size_f08 = ompi_type_match_size_f -#endif - -#if ! OPAL_HAVE_WEAK_SYMBOLS && ! OMPI_PROFILE_LAYER +#else +#if ! OMPI_BUILD_MPI_PROFILING OMPI_GENERATE_F77_BINDINGS (MPI_TYPE_MATCH_SIZE, mpi_type_match_size, mpi_type_match_size_, @@ -64,11 +67,9 @@ OMPI_GENERATE_F77_BINDINGS (MPI_TYPE_MATCH_SIZE, ompi_type_match_size_f, (MPI_Fint *typeclass, MPI_Fint *size, MPI_Fint *type, MPI_Fint *ierr), (typeclass, size, type, ierr) ) +#else +#define ompi_type_match_size_f pompi_type_match_size_f #endif - - -#if OMPI_PROFILE_LAYER && ! OPAL_HAVE_WEAK_SYMBOLS -#include "ompi/mpi/fortran/mpif-h/profile/defines.h" #endif static const char FUNC_NAME[] = "MPI_Type_match_size_f"; @@ -99,7 +100,7 @@ void ompi_type_match_size_f(MPI_Fint *typeclass, MPI_Fint *size, MPI_Fint *type, default: c_type = &ompi_mpi_datatype_null.dt; } - *type = MPI_Type_c2f( c_type ); + *type = PMPI_Type_c2f( c_type ); if ( c_type != &ompi_mpi_datatype_null.dt ) { c_ierr = MPI_SUCCESS; } else { diff --git a/ompi/mpi/fortran/mpif-h/type_set_attr_f.c b/ompi/mpi/fortran/mpif-h/type_set_attr_f.c index f49bc21ac8a..644d2b32ae9 100644 --- a/ompi/mpi/fortran/mpif-h/type_set_attr_f.c +++ b/ompi/mpi/fortran/mpif-h/type_set_attr_f.c @@ -5,15 +5,17 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2011-2012 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -23,7 +25,8 @@ #include "ompi/attribute/attribute.h" #include "ompi/datatype/ompi_datatype.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILE_LAYER +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak PMPI_TYPE_SET_ATTR = ompi_type_set_attr_f #pragma weak pmpi_type_set_attr = ompi_type_set_attr_f #pragma weak pmpi_type_set_attr_ = ompi_type_set_attr_f @@ -31,7 +34,7 @@ #pragma weak PMPI_Type_set_attr_f = ompi_type_set_attr_f #pragma weak PMPI_Type_set_attr_f08 = ompi_type_set_attr_f -#elif OMPI_PROFILE_LAYER +#else OMPI_GENERATE_F77_BINDINGS (PMPI_TYPE_SET_ATTR, pmpi_type_set_attr, pmpi_type_set_attr_, @@ -40,6 +43,7 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_TYPE_SET_ATTR, (MPI_Fint *type, MPI_Fint *type_keyval, MPI_Aint *attr_val, MPI_Fint *ierr), (type, type_keyval, attr_val, ierr) ) #endif +#endif #if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_TYPE_SET_ATTR = ompi_type_set_attr_f @@ -49,9 +53,8 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_TYPE_SET_ATTR, #pragma weak MPI_Type_set_attr_f = ompi_type_set_attr_f #pragma weak MPI_Type_set_attr_f08 = ompi_type_set_attr_f -#endif - -#if ! OPAL_HAVE_WEAK_SYMBOLS && ! OMPI_PROFILE_LAYER +#else +#if ! OMPI_BUILD_MPI_PROFILING OMPI_GENERATE_F77_BINDINGS (MPI_TYPE_SET_ATTR, mpi_type_set_attr, mpi_type_set_attr_, @@ -59,17 +62,15 @@ OMPI_GENERATE_F77_BINDINGS (MPI_TYPE_SET_ATTR, ompi_type_set_attr_f, (MPI_Fint *type, MPI_Fint *type_keyval, MPI_Aint *attr_val, MPI_Fint *ierr), (type, type_keyval, attr_val, ierr) ) +#else +#define ompi_type_set_attr_f pompi_type_set_attr_f #endif - - -#if OMPI_PROFILE_LAYER && ! OPAL_HAVE_WEAK_SYMBOLS -#include "ompi/mpi/fortran/mpif-h/profile/defines.h" #endif void ompi_type_set_attr_f(MPI_Fint *type, MPI_Fint *type_keyval, MPI_Aint *attribute_val, MPI_Fint *ierr) { int c_ierr; - MPI_Datatype c_type = MPI_Type_f2c(*type); + MPI_Datatype c_type = PMPI_Type_f2c(*type); /* This stuff is very confusing. Be sure to see the comment at the top of src/attributes/attributes.c. */ @@ -77,7 +78,7 @@ void ompi_type_set_attr_f(MPI_Fint *type, MPI_Fint *type_keyval, MPI_Aint *attri c_ierr = ompi_attr_set_fortran_mpi2(TYPE_ATTR, c_type, &c_type->d_keyhash, - OMPI_FINT_2_INT(*type_keyval), + OMPI_FINT_2_INT(*type_keyval), *attribute_val, false); if (NULL != ierr) *ierr = OMPI_INT_2_FINT(c_ierr); diff --git a/ompi/mpi/fortran/mpif-h/type_set_name_f.c b/ompi/mpi/fortran/mpif-h/type_set_name_f.c index e06ded9339b..a2333260dcd 100644 --- a/ompi/mpi/fortran/mpif-h/type_set_name_f.c +++ b/ompi/mpi/fortran/mpif-h/type_set_name_f.c @@ -5,15 +5,17 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2011-2012 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -25,7 +27,8 @@ #include "ompi/communicator/communicator.h" #include "ompi/mpi/fortran/base/strings.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILE_LAYER +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak PMPI_TYPE_SET_NAME = ompi_type_set_name_f #pragma weak pmpi_type_set_name = ompi_type_set_name_f #pragma weak pmpi_type_set_name_ = ompi_type_set_name_f @@ -33,7 +36,7 @@ #pragma weak PMPI_Type_set_name_f = ompi_type_set_name_f #pragma weak PMPI_Type_set_name_f08 = ompi_type_set_name_f -#elif OMPI_PROFILE_LAYER +#else OMPI_GENERATE_F77_BINDINGS (PMPI_TYPE_SET_NAME, pmpi_type_set_name, pmpi_type_set_name_, @@ -42,6 +45,7 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_TYPE_SET_NAME, (MPI_Fint *type, char *type_name, MPI_Fint *ierr, int name_len), (type, type_name, ierr, name_len) ) #endif +#endif #if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_TYPE_SET_NAME = ompi_type_set_name_f @@ -51,9 +55,8 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_TYPE_SET_NAME, #pragma weak MPI_Type_set_name_f = ompi_type_set_name_f #pragma weak MPI_Type_set_name_f08 = ompi_type_set_name_f -#endif - -#if ! OPAL_HAVE_WEAK_SYMBOLS && ! OMPI_PROFILE_LAYER +#else +#if ! OMPI_BUILD_MPI_PROFILING OMPI_GENERATE_F77_BINDINGS (MPI_TYPE_SET_NAME, mpi_type_set_name, mpi_type_set_name_, @@ -61,13 +64,12 @@ OMPI_GENERATE_F77_BINDINGS (MPI_TYPE_SET_NAME, ompi_type_set_name_f, (MPI_Fint *type, char *type_name, MPI_Fint *ierr, int name_len), (type, type_name, ierr, name_len) ) +#else +#define ompi_type_set_name_f pompi_type_set_name_f #endif - - -#if OMPI_PROFILE_LAYER && ! OPAL_HAVE_WEAK_SYMBOLS -#include "ompi/mpi/fortran/mpif-h/profile/defines.h" #endif + void ompi_type_set_name_f(MPI_Fint *type, char *type_name, MPI_Fint *ierr, int name_len) { @@ -75,7 +77,7 @@ void ompi_type_set_name_f(MPI_Fint *type, char *type_name, MPI_Fint *ierr, char *c_name; MPI_Datatype c_type; - c_type = MPI_Type_f2c(*type); + c_type = PMPI_Type_f2c(*type); /* Convert the fortran string */ @@ -89,7 +91,7 @@ void ompi_type_set_name_f(MPI_Fint *type, char *type_name, MPI_Fint *ierr, /* Call the C function */ - c_ierr = MPI_Type_set_name(c_type, c_name); + c_ierr = PMPI_Type_set_name(c_type, c_name); if (NULL != ierr) *ierr = OMPI_INT_2_FINT(c_ierr); /* Free the C name */ diff --git a/ompi/mpi/fortran/mpif-h/type_size_f.c b/ompi/mpi/fortran/mpif-h/type_size_f.c index 8603d29a747..e4bd0f862a6 100644 --- a/ompi/mpi/fortran/mpif-h/type_size_f.c +++ b/ompi/mpi/fortran/mpif-h/type_size_f.c @@ -5,15 +5,17 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2011-2012 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -21,7 +23,8 @@ #include "ompi/mpi/fortran/mpif-h/bindings.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILE_LAYER +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak PMPI_TYPE_SIZE = ompi_type_size_f #pragma weak pmpi_type_size = ompi_type_size_f #pragma weak pmpi_type_size_ = ompi_type_size_f @@ -29,7 +32,7 @@ #pragma weak PMPI_Type_size_f = ompi_type_size_f #pragma weak PMPI_Type_size_f08 = ompi_type_size_f -#elif OMPI_PROFILE_LAYER +#else OMPI_GENERATE_F77_BINDINGS (PMPI_TYPE_SIZE, pmpi_type_size, pmpi_type_size_, @@ -38,6 +41,7 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_TYPE_SIZE, (MPI_Fint *type, MPI_Fint *size, MPI_Fint *ierr), (type, size, ierr) ) #endif +#endif #if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_TYPE_SIZE = ompi_type_size_f @@ -47,9 +51,8 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_TYPE_SIZE, #pragma weak MPI_Type_size_f = ompi_type_size_f #pragma weak MPI_Type_size_f08 = ompi_type_size_f -#endif - -#if ! OPAL_HAVE_WEAK_SYMBOLS && ! OMPI_PROFILE_LAYER +#else +#if ! OMPI_BUILD_MPI_PROFILING OMPI_GENERATE_F77_BINDINGS (MPI_TYPE_SIZE, mpi_type_size, mpi_type_size_, @@ -57,20 +60,19 @@ OMPI_GENERATE_F77_BINDINGS (MPI_TYPE_SIZE, ompi_type_size_f, (MPI_Fint *type, MPI_Fint *size, MPI_Fint *ierr), (type, size, ierr) ) +#else +#define ompi_type_size_f pompi_type_size_f #endif - - -#if OMPI_PROFILE_LAYER && ! OPAL_HAVE_WEAK_SYMBOLS -#include "ompi/mpi/fortran/mpif-h/profile/defines.h" #endif + void ompi_type_size_f(MPI_Fint *type, MPI_Fint *size, MPI_Fint *ierr) { int c_ierr; - MPI_Datatype c_type = MPI_Type_f2c(*type); + MPI_Datatype c_type = PMPI_Type_f2c(*type); OMPI_SINGLE_NAME_DECL(size); - c_ierr = MPI_Type_size(c_type, OMPI_SINGLE_NAME_CONVERT(size)); + c_ierr = PMPI_Type_size(c_type, OMPI_SINGLE_NAME_CONVERT(size)); if (NULL != ierr) *ierr = OMPI_INT_2_FINT(c_ierr); if (MPI_SUCCESS == c_ierr) { diff --git a/ompi/mpi/fortran/mpif-h/type_size_x_f.c b/ompi/mpi/fortran/mpif-h/type_size_x_f.c index 70cad8de467..2527f49878f 100644 --- a/ompi/mpi/fortran/mpif-h/type_size_x_f.c +++ b/ompi/mpi/fortran/mpif-h/type_size_x_f.c @@ -5,17 +5,19 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2011-2013 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2013 Los Alamos National Security, LLC. All rights * reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -23,7 +25,8 @@ #include "ompi/mpi/fortran/mpif-h/bindings.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILE_LAYER +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak PMPI_TYPE_SIZE_X = ompi_type_size_x_f #pragma weak pmpi_type_size_x = ompi_type_size_x_f #pragma weak pmpi_type_size_x_ = ompi_type_size_x_f @@ -31,7 +34,7 @@ #pragma weak PMPI_Type_size_x_f = ompi_type_size_x_f #pragma weak PMPI_Type_size_x_f08 = ompi_type_size_x_f -#elif OMPI_PROFILE_LAYER +#else OMPI_GENERATE_F77_BINDINGS (PMPI_TYPE_SIZE_X, pmpi_type_size_x, pmpi_type_size_x_, @@ -40,6 +43,7 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_TYPE_SIZE_X, (MPI_Fint *type, MPI_Count *size, MPI_Fint *ierr), (type, size, ierr) ) #endif +#endif #if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_TYPE_SIZE_X = ompi_type_size_x_f @@ -49,9 +53,8 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_TYPE_SIZE_X, #pragma weak MPI_Type_size_x_f = ompi_type_size_x_f #pragma weak MPI_Type_size_x_f08 = ompi_type_size_x_f -#endif - -#if ! OPAL_HAVE_WEAK_SYMBOLS && ! OMPI_PROFILE_LAYER +#else +#if ! OMPI_BUILD_MPI_PROFILING OMPI_GENERATE_F77_BINDINGS (MPI_TYPE_SIZE_X, mpi_type_size_x, mpi_type_size_x_, @@ -59,19 +62,18 @@ OMPI_GENERATE_F77_BINDINGS (MPI_TYPE_SIZE_X, ompi_type_size_x_f, (MPI_Fint *type, MPI_Count *size, MPI_Fint *ierr), (type, size, ierr) ) +#else +#define ompi_type_size_x_f pompi_type_size_x_f #endif - - -#if OMPI_PROFILE_LAYER && ! OPAL_HAVE_WEAK_SYMBOLS -#include "ompi/mpi/fortran/mpif-h/profile/defines.h" #endif + void ompi_type_size_x_f(MPI_Fint *type, MPI_Count *size, MPI_Fint *ierr) { int c_ierr; - MPI_Datatype c_type = MPI_Type_f2c(*type); + MPI_Datatype c_type = PMPI_Type_f2c(*type); OMPI_SINGLE_NAME_DECL(size); - c_ierr = MPI_Type_size_x(c_type, size); + c_ierr = PMPI_Type_size_x(c_type, size); if (NULL != ierr) *ierr = OMPI_INT_2_FINT(c_ierr); } diff --git a/ompi/mpi/fortran/mpif-h/type_struct_f.c b/ompi/mpi/fortran/mpif-h/type_struct_f.c index cd7d726af38..03a05d9a548 100644 --- a/ompi/mpi/fortran/mpif-h/type_struct_f.c +++ b/ompi/mpi/fortran/mpif-h/type_struct_f.c @@ -5,15 +5,17 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2011-2012 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -23,7 +25,8 @@ #include "ompi/errhandler/errhandler.h" #include "ompi/communicator/communicator.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILE_LAYER +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak PMPI_TYPE_STRUCT = ompi_type_struct_f #pragma weak pmpi_type_struct = ompi_type_struct_f #pragma weak pmpi_type_struct_ = ompi_type_struct_f @@ -31,7 +34,7 @@ #pragma weak PMPI_Type_struct_f = ompi_type_struct_f #pragma weak PMPI_Type_struct_f08 = ompi_type_struct_f -#elif OMPI_PROFILE_LAYER +#else OMPI_GENERATE_F77_BINDINGS (PMPI_TYPE_STRUCT, pmpi_type_struct, pmpi_type_struct_, @@ -40,6 +43,7 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_TYPE_STRUCT, (MPI_Fint *count, MPI_Fint *array_of_blocklengths, MPI_Fint *array_of_displacements, MPI_Fint *array_of_types, MPI_Fint *newtype, MPI_Fint *ierr), (count, array_of_blocklengths, array_of_displacements, array_of_types, newtype, ierr) ) #endif +#endif #if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_TYPE_STRUCT = ompi_type_struct_f @@ -49,9 +53,8 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_TYPE_STRUCT, #pragma weak MPI_Type_struct_f = ompi_type_struct_f #pragma weak MPI_Type_struct_f08 = ompi_type_struct_f -#endif - -#if ! OPAL_HAVE_WEAK_SYMBOLS && ! OMPI_PROFILE_LAYER +#else +#if ! OMPI_BUILD_MPI_PROFILING OMPI_GENERATE_F77_BINDINGS (MPI_TYPE_STRUCT, mpi_type_struct, mpi_type_struct_, @@ -59,18 +62,17 @@ OMPI_GENERATE_F77_BINDINGS (MPI_TYPE_STRUCT, ompi_type_struct_f, (MPI_Fint *count, MPI_Fint *array_of_blocklengths, MPI_Fint *array_of_displacements, MPI_Fint *array_of_types, MPI_Fint *newtype, MPI_Fint *ierr), (count, array_of_blocklengths, array_of_displacements, array_of_types, newtype, ierr) ) +#else +#define ompi_type_struct_f pompi_type_struct_f #endif - - -#if OMPI_PROFILE_LAYER && ! OPAL_HAVE_WEAK_SYMBOLS -#include "ompi/mpi/fortran/mpif-h/profile/defines.h" #endif + static const char FUNC_NAME[] = "MPI_TYPE_STRUCT"; void ompi_type_struct_f(MPI_Fint *count, MPI_Fint *array_of_blocklengths, - MPI_Fint *array_of_displacements, + MPI_Fint *array_of_displacements, MPI_Fint *array_of_types, MPI_Fint *newtype, MPI_Fint *ierr) { @@ -80,7 +82,7 @@ void ompi_type_struct_f(MPI_Fint *count, MPI_Fint *array_of_blocklengths, int i, c_ierr; OMPI_ARRAY_NAME_DECL(array_of_blocklengths); - c_type_old_array = (MPI_Datatype *) malloc(*count * (sizeof(MPI_Datatype) + + c_type_old_array = (MPI_Datatype *) malloc(*count * (sizeof(MPI_Datatype) + sizeof(MPI_Aint))); if (NULL == c_type_old_array) { c_ierr = OMPI_ERRHANDLER_INVOKE(MPI_COMM_WORLD, MPI_ERR_NO_MEM, @@ -92,12 +94,12 @@ void ompi_type_struct_f(MPI_Fint *count, MPI_Fint *array_of_blocklengths, for (i = 0; i < *count; i++) { c_disp_array[i] = (MPI_Aint) array_of_displacements[i]; - c_type_old_array[i] = MPI_Type_f2c(array_of_types[i]); + c_type_old_array[i] = PMPI_Type_f2c(array_of_types[i]); } OMPI_ARRAY_FINT_2_INT(array_of_blocklengths, *count); - c_ierr = MPI_Type_struct(OMPI_FINT_2_INT(*count), + c_ierr = PMPI_Type_struct(OMPI_FINT_2_INT(*count), OMPI_ARRAY_NAME_CONVERT(array_of_blocklengths), c_disp_array, c_type_old_array, &c_new); @@ -107,6 +109,6 @@ void ompi_type_struct_f(MPI_Fint *count, MPI_Fint *array_of_blocklengths, free(c_type_old_array); if (MPI_SUCCESS == c_ierr) { - *newtype = MPI_Type_c2f(c_new); + *newtype = PMPI_Type_c2f(c_new); } } diff --git a/ompi/mpi/fortran/mpif-h/type_ub_f.c b/ompi/mpi/fortran/mpif-h/type_ub_f.c index b31d348a3d1..17a468a6343 100644 --- a/ompi/mpi/fortran/mpif-h/type_ub_f.c +++ b/ompi/mpi/fortran/mpif-h/type_ub_f.c @@ -5,15 +5,17 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2011-2012 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -21,7 +23,8 @@ #include "ompi/mpi/fortran/mpif-h/bindings.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILE_LAYER +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak PMPI_TYPE_UB = ompi_type_ub_f #pragma weak pmpi_type_ub = ompi_type_ub_f #pragma weak pmpi_type_ub_ = ompi_type_ub_f @@ -29,7 +32,7 @@ #pragma weak PMPI_Type_ub_f = ompi_type_ub_f #pragma weak PMPI_Type_ub_f08 = ompi_type_ub_f -#elif OMPI_PROFILE_LAYER +#else OMPI_GENERATE_F77_BINDINGS (PMPI_TYPE_UB, pmpi_type_ub, pmpi_type_ub_, @@ -38,6 +41,7 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_TYPE_UB, (MPI_Fint *mtype, MPI_Fint *ub, MPI_Fint *ierr), (mtype, ub, ierr) ) #endif +#endif #if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_TYPE_UB = ompi_type_ub_f @@ -47,9 +51,8 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_TYPE_UB, #pragma weak MPI_Type_ub_f = ompi_type_ub_f #pragma weak MPI_Type_ub_f08 = ompi_type_ub_f -#endif - -#if ! OPAL_HAVE_WEAK_SYMBOLS && ! OMPI_PROFILE_LAYER +#else +#if ! OMPI_BUILD_MPI_PROFILING OMPI_GENERATE_F77_BINDINGS (MPI_TYPE_UB, mpi_type_ub, mpi_type_ub_, @@ -57,20 +60,19 @@ OMPI_GENERATE_F77_BINDINGS (MPI_TYPE_UB, ompi_type_ub_f, (MPI_Fint *mtype, MPI_Fint *ub, MPI_Fint *ierr), (mtype, ub, ierr) ) +#else +#define ompi_type_ub_f pompi_type_ub_f #endif - - -#if OMPI_PROFILE_LAYER && ! OPAL_HAVE_WEAK_SYMBOLS -#include "ompi/mpi/fortran/mpif-h/profile/defines.h" #endif + void ompi_type_ub_f(MPI_Fint *mtype, MPI_Fint *ub, MPI_Fint *ierr) { int c_ierr; - MPI_Datatype c_mtype = MPI_Type_f2c(*mtype); + MPI_Datatype c_mtype = PMPI_Type_f2c(*mtype); MPI_Aint c_ub; - c_ierr = MPI_Type_ub(c_mtype, &c_ub); + c_ierr = PMPI_Type_ub(c_mtype, &c_ub); if (NULL != ierr) *ierr = OMPI_INT_2_FINT(c_ierr); if (MPI_SUCCESS == c_ierr) { diff --git a/ompi/mpi/fortran/mpif-h/type_vector_f.c b/ompi/mpi/fortran/mpif-h/type_vector_f.c index 0e3a1f522f6..89650448eb9 100644 --- a/ompi/mpi/fortran/mpif-h/type_vector_f.c +++ b/ompi/mpi/fortran/mpif-h/type_vector_f.c @@ -5,15 +5,17 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2011-2012 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -21,7 +23,8 @@ #include "ompi/mpi/fortran/mpif-h/bindings.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILE_LAYER +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak PMPI_TYPE_VECTOR = ompi_type_vector_f #pragma weak pmpi_type_vector = ompi_type_vector_f #pragma weak pmpi_type_vector_ = ompi_type_vector_f @@ -29,7 +32,7 @@ #pragma weak PMPI_Type_vector_f = ompi_type_vector_f #pragma weak PMPI_Type_vector_f08 = ompi_type_vector_f -#elif OMPI_PROFILE_LAYER +#else OMPI_GENERATE_F77_BINDINGS (PMPI_TYPE_VECTOR, pmpi_type_vector, pmpi_type_vector_, @@ -38,6 +41,7 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_TYPE_VECTOR, (MPI_Fint *count, MPI_Fint *blocklength, MPI_Fint *stride, MPI_Fint *oldtype, MPI_Fint *newtype, MPI_Fint *ierr), (count, blocklength, stride, oldtype, newtype, ierr) ) #endif +#endif #if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_TYPE_VECTOR = ompi_type_vector_f @@ -47,9 +51,8 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_TYPE_VECTOR, #pragma weak MPI_Type_vector_f = ompi_type_vector_f #pragma weak MPI_Type_vector_f08 = ompi_type_vector_f -#endif - -#if ! OPAL_HAVE_WEAK_SYMBOLS && ! OMPI_PROFILE_LAYER +#else +#if ! OMPI_BUILD_MPI_PROFILING OMPI_GENERATE_F77_BINDINGS (MPI_TYPE_VECTOR, mpi_type_vector, mpi_type_vector_, @@ -57,13 +60,12 @@ OMPI_GENERATE_F77_BINDINGS (MPI_TYPE_VECTOR, ompi_type_vector_f, (MPI_Fint *count, MPI_Fint *blocklength, MPI_Fint *stride, MPI_Fint *oldtype, MPI_Fint *newtype, MPI_Fint *ierr), (count, blocklength, stride, oldtype, newtype, ierr) ) +#else +#define ompi_type_vector_f pompi_type_vector_f #endif - - -#if OMPI_PROFILE_LAYER && ! OPAL_HAVE_WEAK_SYMBOLS -#include "ompi/mpi/fortran/mpif-h/profile/defines.h" #endif + void ompi_type_vector_f(MPI_Fint *count, MPI_Fint *blocklength, MPI_Fint *stride, MPI_Fint *oldtype, MPI_Fint *newtype, MPI_Fint *ierr) @@ -72,15 +74,15 @@ void ompi_type_vector_f(MPI_Fint *count, MPI_Fint *blocklength, MPI_Datatype c_old; MPI_Datatype c_new; - c_old = MPI_Type_f2c(*oldtype); + c_old = PMPI_Type_f2c(*oldtype); - c_ierr = MPI_Type_vector(OMPI_FINT_2_INT(*count), + c_ierr = PMPI_Type_vector(OMPI_FINT_2_INT(*count), OMPI_FINT_2_INT(*blocklength), OMPI_FINT_2_INT(*stride), c_old, &c_new); if (NULL != ierr) *ierr = OMPI_INT_2_FINT(c_ierr); if (MPI_SUCCESS == c_ierr) { - *newtype = MPI_Type_c2f(c_new); + *newtype = PMPI_Type_c2f(c_new); } } diff --git a/ompi/mpi/fortran/mpif-h/unpack_external_f.c b/ompi/mpi/fortran/mpif-h/unpack_external_f.c index eeec08570e6..ad10f73ad5e 100644 --- a/ompi/mpi/fortran/mpif-h/unpack_external_f.c +++ b/ompi/mpi/fortran/mpif-h/unpack_external_f.c @@ -5,15 +5,17 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2011-2012 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -25,7 +27,8 @@ #include "ompi/mpi/fortran/base/constants.h" #include "ompi/mpi/fortran/base/strings.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILE_LAYER +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak PMPI_UNPACK_EXTERNAL = ompi_unpack_external_f #pragma weak pmpi_unpack_external = ompi_unpack_external_f #pragma weak pmpi_unpack_external_ = ompi_unpack_external_f @@ -33,7 +36,7 @@ #pragma weak PMPI_Unpack_external_f = ompi_unpack_external_f #pragma weak PMPI_Unpack_external_f08 = ompi_unpack_external_f -#elif OMPI_PROFILE_LAYER +#else OMPI_GENERATE_F77_BINDINGS (PMPI_UNPACK_EXTERNAL, pmpi_unpack_external, pmpi_unpack_external_, @@ -42,6 +45,7 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_UNPACK_EXTERNAL, (char *datarep, char *inbuf, MPI_Aint *insize, MPI_Aint *position, char *outbuf, MPI_Fint *outcount, MPI_Fint *datatype, MPI_Fint *ierr, int datarep_len), (datarep, inbuf, insize, position, outbuf, outcount, datatype, ierr, datarep_len) ) #endif +#endif #if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_UNPACK_EXTERNAL = ompi_unpack_external_f @@ -51,9 +55,8 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_UNPACK_EXTERNAL, #pragma weak MPI_Unpack_external_f = ompi_unpack_external_f #pragma weak MPI_Unpack_external_f08 = ompi_unpack_external_f -#endif - -#if ! OPAL_HAVE_WEAK_SYMBOLS && ! OMPI_PROFILE_LAYER +#else +#if ! OMPI_BUILD_MPI_PROFILING OMPI_GENERATE_F77_BINDINGS (MPI_UNPACK_EXTERNAL, mpi_unpack_external, mpi_unpack_external_, @@ -61,15 +64,14 @@ OMPI_GENERATE_F77_BINDINGS (MPI_UNPACK_EXTERNAL, ompi_unpack_external_f, (char *datarep, char *inbuf, MPI_Aint *insize, MPI_Aint *position, char *outbuf, MPI_Fint *outcount, MPI_Fint *datatype, MPI_Fint *ierr, int datarep_len), (datarep, inbuf, insize, position, outbuf, outcount, datatype, ierr, datarep_len) ) +#else +#define ompi_unpack_external_f pompi_unpack_external_f #endif - - -#if OMPI_PROFILE_LAYER && ! OPAL_HAVE_WEAK_SYMBOLS -#include "ompi/mpi/fortran/mpif-h/profile/defines.h" #endif + void ompi_unpack_external_f (char *datarep, char *inbuf, MPI_Aint *insize, - MPI_Aint *position, char *outbuf, + MPI_Aint *position, char *outbuf, MPI_Fint *outcount, MPI_Fint *datatype, MPI_Fint *ierr, int datarep_len) { @@ -77,10 +79,10 @@ void ompi_unpack_external_f (char *datarep, char *inbuf, MPI_Aint *insize, char *c_datarep; MPI_Datatype c_type; - c_type = MPI_Type_f2c(*datatype); + c_type = PMPI_Type_f2c(*datatype); /* Convert the fortran string */ - + if (OMPI_SUCCESS != (ret = ompi_fortran_string_f2c(datarep, datarep_len, &c_datarep))) { c_ierr = OMPI_ERRHANDLER_INVOKE(MPI_COMM_WORLD, ret, @@ -89,8 +91,8 @@ void ompi_unpack_external_f (char *datarep, char *inbuf, MPI_Aint *insize, return; } - c_ierr = MPI_Unpack_external(c_datarep, inbuf, - *insize, + c_ierr = PMPI_Unpack_external(c_datarep, inbuf, + *insize, position, OMPI_F2C_BOTTOM(outbuf), OMPI_FINT_2_INT(*outcount), diff --git a/ompi/mpi/fortran/mpif-h/unpack_f.c b/ompi/mpi/fortran/mpif-h/unpack_f.c index 9c653072e73..b97e7d09b9a 100644 --- a/ompi/mpi/fortran/mpif-h/unpack_f.c +++ b/ompi/mpi/fortran/mpif-h/unpack_f.c @@ -5,15 +5,17 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2011-2012 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -22,7 +24,8 @@ #include "ompi/mpi/fortran/mpif-h/bindings.h" #include "ompi/mpi/fortran/base/constants.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILE_LAYER +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak PMPI_UNPACK = ompi_unpack_f #pragma weak pmpi_unpack = ompi_unpack_f #pragma weak pmpi_unpack_ = ompi_unpack_f @@ -30,7 +33,7 @@ #pragma weak PMPI_Unpack_f = ompi_unpack_f #pragma weak PMPI_Unpack_f08 = ompi_unpack_f -#elif OMPI_PROFILE_LAYER +#else OMPI_GENERATE_F77_BINDINGS (PMPI_UNPACK, pmpi_unpack, pmpi_unpack_, @@ -39,6 +42,7 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_UNPACK, (char *inbuf, MPI_Fint *insize, MPI_Fint *position, char *outbuf, MPI_Fint *outcount, MPI_Fint *datatype, MPI_Fint *comm, MPI_Fint *ierr), (inbuf, insize, position, outbuf, outcount, datatype, comm, ierr) ) #endif +#endif #if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_UNPACK = ompi_unpack_f @@ -48,9 +52,8 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_UNPACK, #pragma weak MPI_Unpack_f = ompi_unpack_f #pragma weak MPI_Unpack_f08 = ompi_unpack_f -#endif - -#if ! OPAL_HAVE_WEAK_SYMBOLS && ! OMPI_PROFILE_LAYER +#else +#if ! OMPI_BUILD_MPI_PROFILING OMPI_GENERATE_F77_BINDINGS (MPI_UNPACK, mpi_unpack, mpi_unpack_, @@ -58,13 +61,12 @@ OMPI_GENERATE_F77_BINDINGS (MPI_UNPACK, ompi_unpack_f, (char *inbuf, MPI_Fint *insize, MPI_Fint *position, char *outbuf, MPI_Fint *outcount, MPI_Fint *datatype, MPI_Fint *comm, MPI_Fint *ierr), (inbuf, insize, position, outbuf, outcount, datatype, comm, ierr) ) +#else +#define ompi_unpack_f pompi_unpack_f #endif - - -#if OMPI_PROFILE_LAYER && ! OPAL_HAVE_WEAK_SYMBOLS -#include "ompi/mpi/fortran/mpif-h/profile/defines.h" #endif + void ompi_unpack_f(char *inbuf, MPI_Fint *insize, MPI_Fint *position, char *outbuf, MPI_Fint *outcount, MPI_Fint *datatype, MPI_Fint *comm, MPI_Fint *ierr) @@ -74,11 +76,11 @@ void ompi_unpack_f(char *inbuf, MPI_Fint *insize, MPI_Fint *position, MPI_Datatype c_type; OMPI_SINGLE_NAME_DECL(position); - c_comm = MPI_Comm_f2c(*comm); - c_type = MPI_Type_f2c(*datatype); + c_comm = PMPI_Comm_f2c(*comm); + c_type = PMPI_Type_f2c(*datatype); OMPI_SINGLE_FINT_2_INT(position); - c_ierr = MPI_Unpack(inbuf, OMPI_FINT_2_INT(*insize), + c_ierr = PMPI_Unpack(inbuf, OMPI_FINT_2_INT(*insize), OMPI_SINGLE_NAME_CONVERT(position), OMPI_F2C_BOTTOM(outbuf), OMPI_FINT_2_INT(*outcount), c_type, c_comm); diff --git a/ompi/mpi/fortran/mpif-h/unpublish_name_f.c b/ompi/mpi/fortran/mpif-h/unpublish_name_f.c index d27fd64d545..290b02dfb45 100644 --- a/ompi/mpi/fortran/mpif-h/unpublish_name_f.c +++ b/ompi/mpi/fortran/mpif-h/unpublish_name_f.c @@ -5,15 +5,17 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2011-2012 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -22,7 +24,8 @@ #include "ompi/mpi/fortran/mpif-h/bindings.h" #include "ompi/mpi/fortran/base/strings.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILE_LAYER +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak PMPI_UNPUBLISH_NAME = ompi_unpublish_name_f #pragma weak pmpi_unpublish_name = ompi_unpublish_name_f #pragma weak pmpi_unpublish_name_ = ompi_unpublish_name_f @@ -30,7 +33,7 @@ #pragma weak PMPI_Unpublish_name_f = ompi_unpublish_name_f #pragma weak PMPI_Unpublish_name_f08 = ompi_unpublish_name_f -#elif OMPI_PROFILE_LAYER +#else OMPI_GENERATE_F77_BINDINGS (PMPI_UNPUBLISH_NAME, pmpi_unpublish_name, pmpi_unpublish_name_, @@ -39,6 +42,7 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_UNPUBLISH_NAME, (char *service_name, MPI_Fint *info, char *port_name, MPI_Fint *ierr, int service_name_len, int port_name_len), (service_name, info, port_name, ierr, service_name_len, port_name_len) ) #endif +#endif #if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_UNPUBLISH_NAME = ompi_unpublish_name_f @@ -48,9 +52,8 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_UNPUBLISH_NAME, #pragma weak MPI_Unpublish_name_f = ompi_unpublish_name_f #pragma weak MPI_Unpublish_name_f08 = ompi_unpublish_name_f -#endif - -#if ! OPAL_HAVE_WEAK_SYMBOLS && ! OMPI_PROFILE_LAYER +#else +#if ! OMPI_BUILD_MPI_PROFILING OMPI_GENERATE_F77_BINDINGS (MPI_UNPUBLISH_NAME, mpi_unpublish_name, mpi_unpublish_name_, @@ -58,15 +61,14 @@ OMPI_GENERATE_F77_BINDINGS (MPI_UNPUBLISH_NAME, ompi_unpublish_name_f, (char *service_name, MPI_Fint *info, char *port_name, MPI_Fint *ierr, int service_name_len, int port_name_len), (service_name, info, port_name, ierr, service_name_len, port_name_len) ) +#else +#define ompi_unpublish_name_f pompi_unpublish_name_f #endif - - -#if OMPI_PROFILE_LAYER && ! OPAL_HAVE_WEAK_SYMBOLS -#include "ompi/mpi/fortran/mpif-h/profile/defines.h" #endif + void ompi_unpublish_name_f(char *service_name, MPI_Fint *info, - char *port_name, MPI_Fint *ierr, + char *port_name, MPI_Fint *ierr, int service_name_len, int port_name_len) { int c_ierr; @@ -74,11 +76,11 @@ void ompi_unpublish_name_f(char *service_name, MPI_Fint *info, char *c_service_name; char *c_port_name; - c_info = MPI_Info_f2c(*info); + c_info = PMPI_Info_f2c(*info); ompi_fortran_string_f2c(service_name, service_name_len, &c_service_name); ompi_fortran_string_f2c(port_name, port_name_len, &c_port_name); - c_ierr = MPI_Unpublish_name(c_service_name, c_info, c_port_name); + c_ierr = PMPI_Unpublish_name(c_service_name, c_info, c_port_name); if (NULL != ierr) *ierr = OMPI_INT_2_FINT(c_ierr); free ( c_service_name); diff --git a/ompi/mpi/fortran/mpif-h/wait_f.c b/ompi/mpi/fortran/mpif-h/wait_f.c index 0e91ab6c499..7b67686f02e 100644 --- a/ompi/mpi/fortran/mpif-h/wait_f.c +++ b/ompi/mpi/fortran/mpif-h/wait_f.c @@ -5,15 +5,17 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2011-2012 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -22,7 +24,8 @@ #include "ompi/mpi/fortran/mpif-h/bindings.h" #include "ompi/mpi/fortran/base/constants.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILE_LAYER +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak PMPI_WAIT = ompi_wait_f #pragma weak pmpi_wait = ompi_wait_f #pragma weak pmpi_wait_ = ompi_wait_f @@ -30,7 +33,7 @@ #pragma weak PMPI_Wait_f = ompi_wait_f #pragma weak PMPI_Wait_f08 = ompi_wait_f -#elif OMPI_PROFILE_LAYER +#else OMPI_GENERATE_F77_BINDINGS (PMPI_WAIT, pmpi_wait, pmpi_wait_, @@ -39,6 +42,7 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_WAIT, (MPI_Fint *request, MPI_Fint *status, MPI_Fint *ierr), (request, status, ierr) ) #endif +#endif #if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_WAIT = ompi_wait_f @@ -48,9 +52,8 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_WAIT, #pragma weak MPI_Wait_f = ompi_wait_f #pragma weak MPI_Wait_f08 = ompi_wait_f -#endif - -#if ! OPAL_HAVE_WEAK_SYMBOLS && ! OMPI_PROFILE_LAYER +#else +#if ! OMPI_BUILD_MPI_PROFILING OMPI_GENERATE_F77_BINDINGS (MPI_WAIT, mpi_wait, mpi_wait_, @@ -58,26 +61,25 @@ OMPI_GENERATE_F77_BINDINGS (MPI_WAIT, ompi_wait_f, (MPI_Fint *request, MPI_Fint *status, MPI_Fint *ierr), (request, status, ierr) ) +#else +#define ompi_wait_f pompi_wait_f #endif - - -#if OMPI_PROFILE_LAYER && ! OPAL_HAVE_WEAK_SYMBOLS -#include "ompi/mpi/fortran/mpif-h/profile/defines.h" #endif + void ompi_wait_f(MPI_Fint *request, MPI_Fint *status, MPI_Fint *ierr) { int c_ierr; - MPI_Request c_req = MPI_Request_f2c(*request); + MPI_Request c_req = PMPI_Request_f2c(*request); MPI_Status c_status; - c_ierr = MPI_Wait(&c_req, &c_status); + c_ierr = PMPI_Wait(&c_req, &c_status); if (NULL != ierr) *ierr = OMPI_INT_2_FINT(c_ierr); if (MPI_SUCCESS == c_ierr) { *request = OMPI_INT_2_FINT(c_req->req_f_to_c_index); if (!OMPI_IS_FORTRAN_STATUS_IGNORE(status)) { - MPI_Status_c2f(&c_status, status); + PMPI_Status_c2f(&c_status, status); } } } diff --git a/ompi/mpi/fortran/mpif-h/waitall_f.c b/ompi/mpi/fortran/mpif-h/waitall_f.c index 5cabd6274e4..e1da2c76f4c 100644 --- a/ompi/mpi/fortran/mpif-h/waitall_f.c +++ b/ompi/mpi/fortran/mpif-h/waitall_f.c @@ -5,15 +5,17 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2011-2012 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -24,7 +26,8 @@ #include "ompi/errhandler/errhandler.h" #include "ompi/communicator/communicator.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILE_LAYER +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak PMPI_WAITALL = ompi_waitall_f #pragma weak pmpi_waitall = ompi_waitall_f #pragma weak pmpi_waitall_ = ompi_waitall_f @@ -32,7 +35,7 @@ #pragma weak PMPI_Waitall_f = ompi_waitall_f #pragma weak PMPI_Waitall_f08 = ompi_waitall_f -#elif OMPI_PROFILE_LAYER +#else OMPI_GENERATE_F77_BINDINGS (PMPI_WAITALL, pmpi_waitall, pmpi_waitall_, @@ -41,6 +44,7 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_WAITALL, (MPI_Fint *count, MPI_Fint *array_of_requests, MPI_Fint *array_of_statuses, MPI_Fint *ierr), (count, array_of_requests, array_of_statuses, ierr) ) #endif +#endif #if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_WAITALL = ompi_waitall_f @@ -50,9 +54,8 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_WAITALL, #pragma weak MPI_Waitall_f = ompi_waitall_f #pragma weak MPI_Waitall_f08 = ompi_waitall_f -#endif - -#if ! OPAL_HAVE_WEAK_SYMBOLS && ! OMPI_PROFILE_LAYER +#else +#if ! OMPI_BUILD_MPI_PROFILING OMPI_GENERATE_F77_BINDINGS (MPI_WAITALL, mpi_waitall, mpi_waitall_, @@ -60,13 +63,12 @@ OMPI_GENERATE_F77_BINDINGS (MPI_WAITALL, ompi_waitall_f, (MPI_Fint *count, MPI_Fint *array_of_requests, MPI_Fint *array_of_statuses, MPI_Fint *ierr), (count, array_of_requests, array_of_statuses, ierr) ) +#else +#define ompi_waitall_f pompi_waitall_f #endif - - -#if OMPI_PROFILE_LAYER && ! OPAL_HAVE_WEAK_SYMBOLS -#include "ompi/mpi/fortran/mpif-h/profile/defines.h" #endif + static const char FUNC_NAME[] = "MPI_WAITALL"; @@ -96,10 +98,10 @@ void ompi_waitall_f(MPI_Fint *count, MPI_Fint *array_of_requests, c_status = (MPI_Status*) (c_req + OMPI_FINT_2_INT(*count)); for (i = 0; i < OMPI_FINT_2_INT(*count); ++i) { - c_req[i] = MPI_Request_f2c(array_of_requests[i]); + c_req[i] = PMPI_Request_f2c(array_of_requests[i]); } - c_ierr = MPI_Waitall(OMPI_FINT_2_INT(*count), c_req, c_status); + c_ierr = PMPI_Waitall(OMPI_FINT_2_INT(*count), c_req, c_status); if (NULL != ierr) *ierr = OMPI_INT_2_FINT(c_ierr); if (MPI_SUCCESS == c_ierr) { @@ -107,7 +109,7 @@ void ompi_waitall_f(MPI_Fint *count, MPI_Fint *array_of_requests, array_of_requests[i] = c_req[i]->req_f_to_c_index; if (!OMPI_IS_FORTRAN_STATUSES_IGNORE(array_of_statuses) && !OMPI_IS_FORTRAN_STATUS_IGNORE(&array_of_statuses[i])) { - MPI_Status_c2f( &c_status[i], &array_of_statuses[i * (sizeof(MPI_Status) / sizeof(int))]); + PMPI_Status_c2f( &c_status[i], &array_of_statuses[i * (sizeof(MPI_Status) / sizeof(int))]); } } } diff --git a/ompi/mpi/fortran/mpif-h/waitany_f.c b/ompi/mpi/fortran/mpif-h/waitany_f.c index 4bac9907c30..033328febde 100644 --- a/ompi/mpi/fortran/mpif-h/waitany_f.c +++ b/ompi/mpi/fortran/mpif-h/waitany_f.c @@ -5,15 +5,17 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2011-2012 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -24,7 +26,8 @@ #include "ompi/errhandler/errhandler.h" #include "ompi/communicator/communicator.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILE_LAYER +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak PMPI_WAITANY = ompi_waitany_f #pragma weak pmpi_waitany = ompi_waitany_f #pragma weak pmpi_waitany_ = ompi_waitany_f @@ -32,7 +35,7 @@ #pragma weak PMPI_Waitany_f = ompi_waitany_f #pragma weak PMPI_Waitany_f08 = ompi_waitany_f -#elif OMPI_PROFILE_LAYER +#else OMPI_GENERATE_F77_BINDINGS (PMPI_WAITANY, pmpi_waitany, pmpi_waitany_, @@ -41,6 +44,7 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_WAITANY, (MPI_Fint *count, MPI_Fint *array_of_requests, MPI_Fint *indx, MPI_Fint *status, MPI_Fint *ierr), (count, array_of_requests, indx, status, ierr) ) #endif +#endif #if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_WAITANY = ompi_waitany_f @@ -50,9 +54,8 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_WAITANY, #pragma weak MPI_Waitany_f = ompi_waitany_f #pragma weak MPI_Waitany_f08 = ompi_waitany_f -#endif - -#if ! OPAL_HAVE_WEAK_SYMBOLS && ! OMPI_PROFILE_LAYER +#else +#if ! OMPI_BUILD_MPI_PROFILING OMPI_GENERATE_F77_BINDINGS (MPI_WAITANY, mpi_waitany, mpi_waitany_, @@ -60,13 +63,12 @@ OMPI_GENERATE_F77_BINDINGS (MPI_WAITANY, ompi_waitany_f, (MPI_Fint *count, MPI_Fint *array_of_requests, MPI_Fint *indx, MPI_Fint *status, MPI_Fint *ierr), (count, array_of_requests, indx, status, ierr) ) +#else +#define ompi_waitany_f pompi_waitany_f #endif - - -#if OMPI_PROFILE_LAYER && ! OPAL_HAVE_WEAK_SYMBOLS -#include "ompi/mpi/fortran/mpif-h/profile/defines.h" #endif + static const char FUNC_NAME[] = "MPI_WAITANY"; @@ -82,7 +84,7 @@ void ompi_waitany_f(MPI_Fint *count, MPI_Fint *array_of_requests, skipping other parameter error checks. */ if (OPAL_UNLIKELY(0 == OMPI_FINT_2_INT(*count))) { *indx = OMPI_INT_2_FINT(MPI_UNDEFINED); - MPI_Status_c2f(&ompi_status_empty, status); + PMPI_Status_c2f(&ompi_status_empty, status); *ierr = OMPI_INT_2_FINT(MPI_SUCCESS); return; } @@ -96,10 +98,10 @@ void ompi_waitany_f(MPI_Fint *count, MPI_Fint *array_of_requests, } for (i = 0; i < OMPI_FINT_2_INT(*count); ++i) { - c_req[i] = MPI_Request_f2c(array_of_requests[i]); + c_req[i] = PMPI_Request_f2c(array_of_requests[i]); } - c_ierr = MPI_Waitany(OMPI_FINT_2_INT(*count), c_req, + c_ierr = PMPI_Waitany(OMPI_FINT_2_INT(*count), c_req, OMPI_SINGLE_NAME_CONVERT(indx), &c_status); if (NULL != ierr) *ierr = OMPI_INT_2_FINT(c_ierr); @@ -115,7 +117,7 @@ void ompi_waitany_f(MPI_Fint *count, MPI_Fint *array_of_requests, ++(*indx); } if (!OMPI_IS_FORTRAN_STATUS_IGNORE(status)) { - MPI_Status_c2f(&c_status, status); + PMPI_Status_c2f(&c_status, status); } } free(c_req); diff --git a/ompi/mpi/fortran/mpif-h/waitsome_f.c b/ompi/mpi/fortran/mpif-h/waitsome_f.c index 1f8eb5dbee3..64f9853249b 100644 --- a/ompi/mpi/fortran/mpif-h/waitsome_f.c +++ b/ompi/mpi/fortran/mpif-h/waitsome_f.c @@ -5,15 +5,17 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2006-2012 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -24,7 +26,8 @@ #include "ompi/errhandler/errhandler.h" #include "ompi/communicator/communicator.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILE_LAYER +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak PMPI_WAITSOME = ompi_waitsome_f #pragma weak pmpi_waitsome = ompi_waitsome_f #pragma weak pmpi_waitsome_ = ompi_waitsome_f @@ -32,7 +35,7 @@ #pragma weak PMPI_Waitsome_f = ompi_waitsome_f #pragma weak PMPI_Waitsome_f08 = ompi_waitsome_f -#elif OMPI_PROFILE_LAYER +#else OMPI_GENERATE_F77_BINDINGS (PMPI_WAITSOME, pmpi_waitsome, pmpi_waitsome_, @@ -41,6 +44,7 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_WAITSOME, (MPI_Fint *incount, MPI_Fint *array_of_requests, MPI_Fint *outcount, MPI_Fint *array_of_indices, MPI_Fint *array_of_statuses, MPI_Fint *ierr), (incount, array_of_requests, outcount, array_of_indices, array_of_statuses, ierr) ) #endif +#endif #if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_WAITSOME = ompi_waitsome_f @@ -50,9 +54,8 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_WAITSOME, #pragma weak MPI_Waitsome_f = ompi_waitsome_f #pragma weak MPI_Waitsome_f08 = ompi_waitsome_f -#endif - -#if ! OPAL_HAVE_WEAK_SYMBOLS && ! OMPI_PROFILE_LAYER +#else +#if ! OMPI_BUILD_MPI_PROFILING OMPI_GENERATE_F77_BINDINGS (MPI_WAITSOME, mpi_waitsome, mpi_waitsome_, @@ -60,13 +63,12 @@ OMPI_GENERATE_F77_BINDINGS (MPI_WAITSOME, ompi_waitsome_f, (MPI_Fint *incount, MPI_Fint *array_of_requests, MPI_Fint *outcount, MPI_Fint *array_of_indices, MPI_Fint *array_of_statuses, MPI_Fint *ierr), (incount, array_of_requests, outcount, array_of_indices, array_of_statuses, ierr) ) +#else +#define ompi_waitsome_f pompi_waitsome_f #endif - - -#if OMPI_PROFILE_LAYER && ! OPAL_HAVE_WEAK_SYMBOLS -#include "ompi/mpi/fortran/mpif-h/profile/defines.h" #endif + static const char FUNC_NAME[] = "MPI_WAITSOME"; @@ -101,13 +103,13 @@ void ompi_waitsome_f(MPI_Fint *incount, MPI_Fint *array_of_requests, c_status = (MPI_Status*) (c_req + OMPI_FINT_2_INT(*incount)); for (i = 0; i < OMPI_FINT_2_INT(*incount); ++i) { - c_req[i] = MPI_Request_f2c(array_of_requests[i]); + c_req[i] = PMPI_Request_f2c(array_of_requests[i]); } OMPI_ARRAY_FINT_2_INT_ALLOC(array_of_indices, *incount); - c_ierr = MPI_Waitsome(OMPI_FINT_2_INT(*incount), c_req, + c_ierr = PMPI_Waitsome(OMPI_FINT_2_INT(*incount), c_req, OMPI_SINGLE_NAME_CONVERT(outcount), - OMPI_ARRAY_NAME_CONVERT(array_of_indices), + OMPI_ARRAY_NAME_CONVERT(array_of_indices), c_status); if (NULL != ierr) *ierr = OMPI_INT_2_FINT(c_ierr); @@ -127,7 +129,7 @@ void ompi_waitsome_f(MPI_Fint *incount, MPI_Fint *array_of_requests, if (!OMPI_IS_FORTRAN_STATUSES_IGNORE(array_of_statuses)) { for (i = 0; i < OMPI_FINT_2_INT(*incount); ++i) { if (!OMPI_IS_FORTRAN_STATUS_IGNORE(&array_of_statuses[i])) { - MPI_Status_c2f(&c_status[i], &array_of_statuses[i * (sizeof(MPI_Status) / sizeof(int))]); + PMPI_Status_c2f(&c_status[i], &array_of_statuses[i * (sizeof(MPI_Status) / sizeof(int))]); } } } diff --git a/ompi/mpi/fortran/mpif-h/win_allocate_f.c b/ompi/mpi/fortran/mpif-h/win_allocate_f.c index 9e956a90d16..1685892d1b6 100644 --- a/ompi/mpi/fortran/mpif-h/win_allocate_f.c +++ b/ompi/mpi/fortran/mpif-h/win_allocate_f.c @@ -10,6 +10,8 @@ * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2011-2014 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -21,7 +23,8 @@ #include "ompi/mpi/fortran/mpif-h/bindings.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILE_LAYER +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak PMPI_WIN_ALLOCATE = ompi_win_allocate_f #pragma weak pmpi_win_allocate = ompi_win_allocate_f #pragma weak pmpi_win_allocate_ = ompi_win_allocate_f @@ -37,7 +40,7 @@ #pragma weak PMPI_Win_allocate_cptr_f = ompi_win_allocate_f #pragma weak PMPI_Win_allocate_cptr_f08 = ompi_win_allocate_f -#elif OMPI_PROFILE_LAYER +#else OMPI_GENERATE_F77_BINDINGS (PMPI_WIN_ALLOCATE, pmpi_win_allocate, pmpi_win_allocate_, @@ -58,6 +61,7 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_WIN_ALLOCATE_CPTR, MPI_Fint *win, MPI_Fint *ierr), (size, disp_unit, info, comm, baseptr, win, ierr) ) #endif +#endif #if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_WIN_ALLOCATE = ompi_win_allocate_f @@ -75,9 +79,8 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_WIN_ALLOCATE_CPTR, #pragma weak MPI_Win_allocate_cptr_f = ompi_win_allocate_f #pragma weak MPI_Win_allocate_cptr_f08 = ompi_win_allocate_f -#endif - -#if ! OPAL_HAVE_WEAK_SYMBOLS && ! OMPI_PROFILE_LAYER +#else +#if ! OMPI_BUILD_MPI_PROFILING OMPI_GENERATE_F77_BINDINGS (MPI_WIN_ALLOCATE, mpi_win_allocate, mpi_win_allocate_, @@ -97,13 +100,13 @@ OMPI_GENERATE_F77_BINDINGS (MPI_WIN_ALLOCATE_CPTR, MPI_Fint *info, MPI_Fint *comm, char *baseptr, MPI_Fint *win, MPI_Fint *ierr), (size, disp_unit, info, comm, baseptr, win, ierr) ) +#else +#define ompi_win_allocate_f pompi_win_allocate_f +#define ompi_win_allocate_cptr_f pompi_win_allocate_cptr_f #endif - - -#if OMPI_PROFILE_LAYER && ! OPAL_HAVE_WEAK_SYMBOLS -#include "ompi/mpi/fortran/mpif-h/profile/defines.h" #endif + void ompi_win_allocate_f(MPI_Aint *size, MPI_Fint *disp_unit, MPI_Fint *info, MPI_Fint *comm, char *baseptr, MPI_Fint *win, MPI_Fint *ierr) @@ -113,13 +116,13 @@ void ompi_win_allocate_f(MPI_Aint *size, MPI_Fint *disp_unit, MPI_Comm c_comm; MPI_Win c_win; - c_info = MPI_Info_f2c(*info); - c_comm = MPI_Comm_f2c(*comm); + c_info = PMPI_Info_f2c(*info); + c_comm = PMPI_Comm_f2c(*comm); - c_ierr = MPI_Win_allocate(*size, OMPI_FINT_2_INT(*disp_unit), + c_ierr = PMPI_Win_allocate(*size, OMPI_FINT_2_INT(*disp_unit), c_info, c_comm, baseptr, &c_win); - *win = MPI_Win_c2f(c_win); + *win = PMPI_Win_c2f(c_win); if (NULL != ierr) *ierr = OMPI_INT_2_FINT(c_ierr); } diff --git a/ompi/mpi/fortran/mpif-h/win_allocate_shared_f.c b/ompi/mpi/fortran/mpif-h/win_allocate_shared_f.c index 0a5c604bc87..f5b275c8f5a 100644 --- a/ompi/mpi/fortran/mpif-h/win_allocate_shared_f.c +++ b/ompi/mpi/fortran/mpif-h/win_allocate_shared_f.c @@ -10,6 +10,8 @@ * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2011-2014 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -21,7 +23,8 @@ #include "ompi/mpi/fortran/mpif-h/bindings.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILE_LAYER +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak PMPI_WIN_ALLOCATE_SHARED = ompi_win_allocate_shared_f #pragma weak pmpi_win_allocate_shared = ompi_win_allocate_shared_f #pragma weak pmpi_win_allocate_shared_ = ompi_win_allocate_shared_f @@ -37,7 +40,7 @@ #pragma weak PMPI_Win_allocate_shared_cptr_f = ompi_win_allocate_shared_f #pragma weak PMPI_Win_allocate_shared_cptr_f08 = ompi_win_allocate_shared_f -#elif OMPI_PROFILE_LAYER +#else OMPI_GENERATE_F77_BINDINGS (PMPI_WIN_ALLOCATE_SHARED, pmpi_win_allocate_shared, pmpi_win_allocate_shared_, @@ -58,6 +61,7 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_WIN_ALLOCATE_SHARED_CPTR, MPI_Fint *win, MPI_Fint *ierr), (size, disp_unit, info, comm, baseptr, win, ierr) ) #endif +#endif #if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_WIN_ALLOCATE_SHARED = ompi_win_allocate_shared_f @@ -75,9 +79,8 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_WIN_ALLOCATE_SHARED_CPTR, #pragma weak MPI_Win_allocate_shared_cptr_f = ompi_win_allocate_shared_f #pragma weak MPI_Win_allocate_shared_cptr_f08 = ompi_win_allocate_shared_f -#endif - -#if ! OPAL_HAVE_WEAK_SYMBOLS && ! OMPI_PROFILE_LAYER +#else +#if ! OMPI_BUILD_MPI_PROFILING OMPI_GENERATE_F77_BINDINGS (MPI_WIN_ALLOCATE_SHARED, mpi_win_allocate_shared, mpi_win_allocate_shared_, @@ -97,13 +100,13 @@ OMPI_GENERATE_F77_BINDINGS (MPI_WIN_ALLOCATE_SHARED_CPTR, MPI_Fint *info, MPI_Fint *comm, char *baseptr, MPI_Fint *win, MPI_Fint *ierr), (size, disp_unit, info, comm, baseptr, win, ierr) ) +#else +#define ompi_win_allocate_shared_f pompi_win_allocate_shared_f +#define ompi_win_allocate_shared_cptr_f pompi_win_allocate_shared_cptr_f #endif - - -#if OMPI_PROFILE_LAYER && ! OPAL_HAVE_WEAK_SYMBOLS -#include "ompi/mpi/fortran/mpif-h/profile/defines.h" #endif + void ompi_win_allocate_shared_f(MPI_Aint *size, MPI_Fint *disp_unit, MPI_Fint *info, MPI_Fint *comm, char *baseptr, MPI_Fint *win, MPI_Fint *ierr) @@ -113,13 +116,13 @@ void ompi_win_allocate_shared_f(MPI_Aint *size, MPI_Fint *disp_unit, MPI_Comm c_comm; MPI_Win c_win; - c_info = MPI_Info_f2c(*info); - c_comm = MPI_Comm_f2c(*comm); + c_info = PMPI_Info_f2c(*info); + c_comm = PMPI_Comm_f2c(*comm); - c_ierr = MPI_Win_allocate_shared(*size, OMPI_FINT_2_INT(*disp_unit), + c_ierr = PMPI_Win_allocate_shared(*size, OMPI_FINT_2_INT(*disp_unit), c_info, c_comm, baseptr, &c_win); - *win = MPI_Win_c2f(c_win); + *win = PMPI_Win_c2f(c_win); if (NULL != ierr) *ierr = OMPI_INT_2_FINT(c_ierr); } diff --git a/ompi/mpi/fortran/mpif-h/win_attach_f.c b/ompi/mpi/fortran/mpif-h/win_attach_f.c index 2bd14f04520..29bd0447134 100644 --- a/ompi/mpi/fortran/mpif-h/win_attach_f.c +++ b/ompi/mpi/fortran/mpif-h/win_attach_f.c @@ -2,9 +2,9 @@ * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -12,15 +12,16 @@ #include "ompi/mpi/fortran/mpif-h/bindings.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILE_LAYER +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak PMPI_WIN_ATTACH = ompi_win_attach_f #pragma weak pmpi_win_attach = ompi_win_attach_f #pragma weak pmpi_win_attach_ = ompi_win_attach_f #pragma weak pmpi_win_attach__ = ompi_win_attach_f -#pragma weak PMPI_Win_create_f = ompi_win_attach_f -#pragma weak PMPI_Win_create_f08 = ompi_win_attach_f -#elif OMPI_PROFILE_LAYER +#pragma weak PMPI_Win_attach_f = ompi_win_attach_f +#pragma weak PMPI_Win_attach_f08 = ompi_win_attach_f +#else OMPI_GENERATE_F77_BINDINGS (PMPI_WIN_ATTACH, pmpi_win_attach, pmpi_win_attach_, @@ -29,6 +30,7 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_WIN_ATTACH, (MPI_Fint *win, char *base, MPI_Aint *size, MPI_Fint *ierr), (win, base, size, ierr) ) #endif +#endif #if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_WIN_ATTACH = ompi_win_attach_f @@ -36,11 +38,10 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_WIN_ATTACH, #pragma weak mpi_win_attach_ = ompi_win_attach_f #pragma weak mpi_win_attach__ = ompi_win_attach_f -#pragma weak MPI_Win_create_f = ompi_win_attach_f -#pragma weak MPI_Win_create_f08 = ompi_win_attach_f -#endif - -#if ! OPAL_HAVE_WEAK_SYMBOLS && ! OMPI_PROFILE_LAYER +#pragma weak MPI_Win_attach_f = ompi_win_attach_f +#pragma weak MPI_Win_attach_f08 = ompi_win_attach_f +#else +#if ! OMPI_BUILD_MPI_PROFILING OMPI_GENERATE_F77_BINDINGS (MPI_WIN_ATTACH, mpi_win_attach, mpi_win_attach_, @@ -48,20 +49,19 @@ OMPI_GENERATE_F77_BINDINGS (MPI_WIN_ATTACH, ompi_win_attach_f, (MPI_Fint *win, char *base, MPI_Aint *size, MPI_Fint *ierr), (win, base, size, ierr) ) +#else +#define ompi_win_attach_f pompi_win_attach_f #endif - - -#if OMPI_PROFILE_LAYER && ! OPAL_HAVE_WEAK_SYMBOLS -#include "ompi/mpi/fortran/mpif-h/profile/defines.h" #endif + void ompi_win_attach_f(MPI_Fint *win, char *base, MPI_Aint *size, MPI_Fint *ierr) { int c_ierr; MPI_Win c_win; - c_win = MPI_Win_f2c(*win); - c_ierr = MPI_Win_attach(c_win, base, *size); + c_win = PMPI_Win_f2c(*win); + c_ierr = PMPI_Win_attach(c_win, base, *size); if (NULL != ierr) *ierr = OMPI_INT_2_FINT(c_ierr); } diff --git a/ompi/mpi/fortran/mpif-h/win_call_errhandler_f.c b/ompi/mpi/fortran/mpif-h/win_call_errhandler_f.c index 04de93d9c92..3e3b0cc9c30 100644 --- a/ompi/mpi/fortran/mpif-h/win_call_errhandler_f.c +++ b/ompi/mpi/fortran/mpif-h/win_call_errhandler_f.c @@ -5,15 +5,17 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2011-2012 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -21,7 +23,8 @@ #include "ompi/mpi/fortran/mpif-h/bindings.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILE_LAYER +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak PMPI_WIN_CALL_ERRHANDLER = ompi_win_call_errhandler_f #pragma weak pmpi_win_call_errhandler = ompi_win_call_errhandler_f #pragma weak pmpi_win_call_errhandler_ = ompi_win_call_errhandler_f @@ -29,7 +32,7 @@ #pragma weak PMPI_Win_call_errhandler_f = ompi_win_call_errhandler_f #pragma weak PMPI_Win_call_errhandler_f08 = ompi_win_call_errhandler_f -#elif OMPI_PROFILE_LAYER +#else OMPI_GENERATE_F77_BINDINGS (PMPI_WIN_CALL_ERRHANDLER, pmpi_win_call_errhandler, pmpi_win_call_errhandler_, @@ -38,6 +41,7 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_WIN_CALL_ERRHANDLER, (MPI_Fint *win, MPI_Fint *errorcode, MPI_Fint *ierr), (win, errorcode, ierr) ) #endif +#endif #if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_WIN_CALL_ERRHANDLER = ompi_win_call_errhandler_f @@ -47,9 +51,8 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_WIN_CALL_ERRHANDLER, #pragma weak MPI_Win_call_errhandler_f = ompi_win_call_errhandler_f #pragma weak MPI_Win_call_errhandler_f08 = ompi_win_call_errhandler_f -#endif - -#if ! OPAL_HAVE_WEAK_SYMBOLS && ! OMPI_PROFILE_LAYER +#else +#if ! OMPI_BUILD_MPI_PROFILING OMPI_GENERATE_F77_BINDINGS (MPI_WIN_CALL_ERRHANDLER, mpi_win_call_errhandler, mpi_win_call_errhandler_, @@ -57,21 +60,20 @@ OMPI_GENERATE_F77_BINDINGS (MPI_WIN_CALL_ERRHANDLER, ompi_win_call_errhandler_f, (MPI_Fint *win, MPI_Fint *errorcode, MPI_Fint *ierr), (win, errorcode, ierr) ) +#else +#define ompi_win_call_errhandler_f pompi_win_call_errhandler_f #endif - - -#if OMPI_PROFILE_LAYER && ! OPAL_HAVE_WEAK_SYMBOLS -#include "ompi/mpi/fortran/mpif-h/profile/defines.h" #endif + void ompi_win_call_errhandler_f(MPI_Fint *win, MPI_Fint *errorcode, MPI_Fint *ierr) { int c_ierr; MPI_Win c_win; - c_win = MPI_Win_f2c(*win); + c_win = PMPI_Win_f2c(*win); - c_ierr = MPI_Win_call_errhandler(c_win, OMPI_FINT_2_INT(*errorcode)); + c_ierr = PMPI_Win_call_errhandler(c_win, OMPI_FINT_2_INT(*errorcode)); if (NULL != ierr) *ierr = OMPI_INT_2_FINT(c_ierr); } diff --git a/ompi/mpi/fortran/mpif-h/win_complete_f.c b/ompi/mpi/fortran/mpif-h/win_complete_f.c index 0ecb1ce2437..4d40ffe4801 100644 --- a/ompi/mpi/fortran/mpif-h/win_complete_f.c +++ b/ompi/mpi/fortran/mpif-h/win_complete_f.c @@ -5,15 +5,17 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2011-2012 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -21,7 +23,8 @@ #include "ompi/mpi/fortran/mpif-h/bindings.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILE_LAYER +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak PMPI_WIN_COMPLETE = ompi_win_complete_f #pragma weak pmpi_win_complete = ompi_win_complete_f #pragma weak pmpi_win_complete_ = ompi_win_complete_f @@ -29,7 +32,7 @@ #pragma weak PMPI_Win_complete_f = ompi_win_complete_f #pragma weak PMPI_Win_complete_f08 = ompi_win_complete_f -#elif OMPI_PROFILE_LAYER +#else OMPI_GENERATE_F77_BINDINGS (PMPI_WIN_COMPLETE, pmpi_win_complete, pmpi_win_complete_, @@ -38,6 +41,7 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_WIN_COMPLETE, (MPI_Fint *win, MPI_Fint *ierr), (win, ierr) ) #endif +#endif #if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_WIN_COMPLETE = ompi_win_complete_f @@ -47,9 +51,8 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_WIN_COMPLETE, #pragma weak MPI_Win_complete_f = ompi_win_complete_f #pragma weak MPI_Win_complete_f08 = ompi_win_complete_f -#endif - -#if ! OPAL_HAVE_WEAK_SYMBOLS && ! OMPI_PROFILE_LAYER +#else +#if ! OMPI_BUILD_MPI_PROFILING OMPI_GENERATE_F77_BINDINGS (MPI_WIN_COMPLETE, mpi_win_complete, mpi_win_complete_, @@ -57,20 +60,19 @@ OMPI_GENERATE_F77_BINDINGS (MPI_WIN_COMPLETE, ompi_win_complete_f, (MPI_Fint *win, MPI_Fint *ierr), (win, ierr) ) +#else +#define ompi_win_complete_f pompi_win_complete_f #endif - - -#if OMPI_PROFILE_LAYER && ! OPAL_HAVE_WEAK_SYMBOLS -#include "ompi/mpi/fortran/mpif-h/profile/defines.h" #endif + void ompi_win_complete_f(MPI_Fint *win, MPI_Fint *ierr) { int c_ierr; MPI_Win c_win; - c_win = MPI_Win_f2c(*win); + c_win = PMPI_Win_f2c(*win); - c_ierr = MPI_Win_complete(c_win); + c_ierr = PMPI_Win_complete(c_win); if (NULL != ierr) *ierr = OMPI_INT_2_FINT(c_ierr); } diff --git a/ompi/mpi/fortran/mpif-h/win_create_dynamic_f.c b/ompi/mpi/fortran/mpif-h/win_create_dynamic_f.c index 35ac451dc16..afc7846228a 100644 --- a/ompi/mpi/fortran/mpif-h/win_create_dynamic_f.c +++ b/ompi/mpi/fortran/mpif-h/win_create_dynamic_f.c @@ -2,9 +2,9 @@ * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -12,7 +12,8 @@ #include "ompi/mpi/fortran/mpif-h/bindings.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILE_LAYER +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak PMPI_WIN_CREATE_DYNAMIC = ompi_win_create_dynamic_f #pragma weak pmpi_win_create_dynamic = ompi_win_create_dynamic_f #pragma weak pmpi_win_create_dynamic_ = ompi_win_create_dynamic_f @@ -20,7 +21,7 @@ #pragma weak PMPI_Win_create_dynamic_f = ompi_win_create_dynamic_f #pragma weak PMPI_Win_create_dynamic_f08 = ompi_win_create_dynamic_f -#elif OMPI_PROFILE_LAYER +#else OMPI_GENERATE_F77_BINDINGS (PMPI_WIN_CREATE_DYNAMIC, pmpi_win_create_dynamic, pmpi_win_create_dynamic_, @@ -29,6 +30,7 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_WIN_CREATE_DYNAMIC, (MPI_Fint *info, MPI_Fint *comm, MPI_Fint *win, MPI_Fint *ierr), (info, comm, win, ierr) ) #endif +#endif #if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_WIN_CREATE_DYNAMIC = ompi_win_create_dynamic_f @@ -38,9 +40,8 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_WIN_CREATE_DYNAMIC, #pragma weak MPI_Win_create_dynamic_f = ompi_win_create_dynamic_f #pragma weak MPI_Win_create_dynamic_f08 = ompi_win_create_dynamic_f -#endif - -#if ! OPAL_HAVE_WEAK_SYMBOLS && ! OMPI_PROFILE_LAYER +#else +#if ! OMPI_BUILD_MPI_PROFILING OMPI_GENERATE_F77_BINDINGS (MPI_WIN_CREATE_DYNAMIC, mpi_win_create_dynamic, mpi_win_create_dynamic_, @@ -48,13 +49,12 @@ OMPI_GENERATE_F77_BINDINGS (MPI_WIN_CREATE_DYNAMIC, ompi_win_create_dynamic_f, (MPI_Fint *info, MPI_Fint *comm, MPI_Fint *win, MPI_Fint *ierr), (info, comm, win, ierr) ) +#else +#define ompi_win_create_dynamic_f pompi_win_create_dynamic_f #endif - - -#if OMPI_PROFILE_LAYER && ! OPAL_HAVE_WEAK_SYMBOLS -#include "ompi/mpi/fortran/mpif-h/profile/defines.h" #endif + void ompi_win_create_dynamic_f(MPI_Fint *info, MPI_Fint *comm, MPI_Fint *win, MPI_Fint *ierr) { @@ -63,13 +63,13 @@ void ompi_win_create_dynamic_f(MPI_Fint *info, MPI_Fint *comm, MPI_Fint *win, MPI_Info c_info; MPI_Comm c_comm; - c_comm = MPI_Comm_f2c(*comm); - c_info = MPI_Info_f2c(*info); + c_comm = PMPI_Comm_f2c(*comm); + c_info = PMPI_Info_f2c(*info); - c_ierr = MPI_Win_create_dynamic(c_info, c_comm, &c_win); + c_ierr = PMPI_Win_create_dynamic(c_info, c_comm, &c_win); if (NULL != ierr) *ierr = OMPI_INT_2_FINT(c_ierr); if (MPI_SUCCESS == c_ierr) { - *win = MPI_Win_c2f(c_win); + *win = PMPI_Win_c2f(c_win); } } diff --git a/ompi/mpi/fortran/mpif-h/win_create_errhandler_f.c b/ompi/mpi/fortran/mpif-h/win_create_errhandler_f.c index 6b46dc35585..22be461a7be 100644 --- a/ompi/mpi/fortran/mpif-h/win_create_errhandler_f.c +++ b/ompi/mpi/fortran/mpif-h/win_create_errhandler_f.c @@ -5,15 +5,17 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2008-2012 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -23,7 +25,8 @@ #include "ompi/errhandler/errhandler.h" #include "ompi/communicator/communicator.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILE_LAYER +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak PMPI_WIN_CREATE_ERRHANDLER = ompi_win_create_errhandler_f #pragma weak pmpi_win_create_errhandler = ompi_win_create_errhandler_f #pragma weak pmpi_win_create_errhandler_ = ompi_win_create_errhandler_f @@ -31,7 +34,7 @@ #pragma weak PMPI_Win_create_errhandler_f = ompi_win_create_errhandler_f #pragma weak PMPI_Win_create_errhandler_f08 = ompi_win_create_errhandler_f -#elif OMPI_PROFILE_LAYER +#else OMPI_GENERATE_F77_BINDINGS (PMPI_WIN_CREATE_ERRHANDLER, pmpi_win_create_errhandler, pmpi_win_create_errhandler_, @@ -40,6 +43,7 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_WIN_CREATE_ERRHANDLER, (ompi_errhandler_fortran_handler_fn_t* function, MPI_Fint *errhandler, MPI_Fint *ierr), (function, errhandler, ierr) ) #endif +#endif #if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_WIN_CREATE_ERRHANDLER = ompi_win_create_errhandler_f @@ -49,9 +53,8 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_WIN_CREATE_ERRHANDLER, #pragma weak MPI_Win_create_errhandler_f = ompi_win_create_errhandler_f #pragma weak MPI_Win_create_errhandler_f08 = ompi_win_create_errhandler_f -#endif - -#if ! OPAL_HAVE_WEAK_SYMBOLS && ! OMPI_PROFILE_LAYER +#else +#if ! OMPI_BUILD_MPI_PROFILING OMPI_GENERATE_F77_BINDINGS (MPI_WIN_CREATE_ERRHANDLER, mpi_win_create_errhandler, mpi_win_create_errhandler_, @@ -59,25 +62,23 @@ OMPI_GENERATE_F77_BINDINGS (MPI_WIN_CREATE_ERRHANDLER, ompi_win_create_errhandler_f, (ompi_errhandler_fortran_handler_fn_t* function, MPI_Fint *errhandler, MPI_Fint *ierr), (function, errhandler, ierr) ) +#else +#define ompi_win_create_errhandler_f pompi_win_create_errhandler_f #endif - - -#if OMPI_PROFILE_LAYER && ! OPAL_HAVE_WEAK_SYMBOLS -#include "ompi/mpi/fortran/mpif-h/profile/defines.h" #endif static const char FUNC_NAME[] = "MPI_WIN_CREATE_ERRHANDLER"; -void ompi_win_create_errhandler_f(ompi_errhandler_fortran_handler_fn_t* function, +void ompi_win_create_errhandler_f(ompi_errhandler_fortran_handler_fn_t* function, MPI_Fint *errhandler, MPI_Fint *ierr) { - MPI_Errhandler c_errhandler = + MPI_Errhandler c_errhandler = ompi_errhandler_create(OMPI_ERRHANDLER_TYPE_WIN, (ompi_errhandler_generic_handler_fn_t*) function, OMPI_ERRHANDLER_LANG_FORTRAN); if (MPI_ERRHANDLER_NULL != c_errhandler) { - *errhandler = MPI_Errhandler_c2f(c_errhandler); + *errhandler = PMPI_Errhandler_c2f(c_errhandler); if (NULL != ierr) *ierr = OMPI_INT_2_FINT(MPI_SUCCESS); } else { if (NULL != ierr) *ierr = OMPI_INT_2_FINT(MPI_ERR_INTERN); diff --git a/ompi/mpi/fortran/mpif-h/win_create_f.c b/ompi/mpi/fortran/mpif-h/win_create_f.c index 95ed62571b8..e98ad24a7ae 100644 --- a/ompi/mpi/fortran/mpif-h/win_create_f.c +++ b/ompi/mpi/fortran/mpif-h/win_create_f.c @@ -5,15 +5,17 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2007-2012 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -21,7 +23,8 @@ #include "ompi/mpi/fortran/mpif-h/bindings.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILE_LAYER +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak PMPI_WIN_CREATE = ompi_win_create_f #pragma weak pmpi_win_create = ompi_win_create_f #pragma weak pmpi_win_create_ = ompi_win_create_f @@ -29,7 +32,7 @@ #pragma weak PMPI_Win_create_f = ompi_win_create_f #pragma weak PMPI_Win_create_f08 = ompi_win_create_f -#elif OMPI_PROFILE_LAYER +#else OMPI_GENERATE_F77_BINDINGS (PMPI_WIN_CREATE, pmpi_win_create, pmpi_win_create_, @@ -38,6 +41,7 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_WIN_CREATE, (char *base, MPI_Aint *size, MPI_Fint *disp_unit, MPI_Fint *info, MPI_Fint *comm, MPI_Fint *win, MPI_Fint *ierr), (base, size, disp_unit, info, comm, win, ierr) ) #endif +#endif #if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_WIN_CREATE = ompi_win_create_f @@ -47,9 +51,8 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_WIN_CREATE, #pragma weak MPI_Win_create_f = ompi_win_create_f #pragma weak MPI_Win_create_f08 = ompi_win_create_f -#endif - -#if ! OPAL_HAVE_WEAK_SYMBOLS && ! OMPI_PROFILE_LAYER +#else +#if ! OMPI_BUILD_MPI_PROFILING OMPI_GENERATE_F77_BINDINGS (MPI_WIN_CREATE, mpi_win_create, mpi_win_create_, @@ -57,13 +60,12 @@ OMPI_GENERATE_F77_BINDINGS (MPI_WIN_CREATE, ompi_win_create_f, (char *base, MPI_Aint *size, MPI_Fint *disp_unit, MPI_Fint *info, MPI_Fint *comm, MPI_Fint *win, MPI_Fint *ierr), (base, size, disp_unit, info, comm, win, ierr) ) +#else +#define ompi_win_create_f pompi_win_create_f #endif - - -#if OMPI_PROFILE_LAYER && ! OPAL_HAVE_WEAK_SYMBOLS -#include "ompi/mpi/fortran/mpif-h/profile/defines.h" #endif + void ompi_win_create_f(char *base, MPI_Aint *size, MPI_Fint *disp_unit, MPI_Fint *info, MPI_Fint *comm, MPI_Fint *win, MPI_Fint *ierr) @@ -73,15 +75,15 @@ void ompi_win_create_f(char *base, MPI_Aint *size, MPI_Fint *disp_unit, MPI_Info c_info; MPI_Comm c_comm; - c_comm = MPI_Comm_f2c(*comm); - c_info = MPI_Info_f2c(*info); + c_comm = PMPI_Comm_f2c(*comm); + c_info = PMPI_Info_f2c(*info); - c_ierr = MPI_Win_create(base, *size, + c_ierr = PMPI_Win_create(base, *size, OMPI_FINT_2_INT(*disp_unit), c_info, c_comm, &c_win); if (NULL != ierr) *ierr = OMPI_INT_2_FINT(c_ierr); if (MPI_SUCCESS == c_ierr) { - *win = MPI_Win_c2f(c_win); + *win = PMPI_Win_c2f(c_win); } } diff --git a/ompi/mpi/fortran/mpif-h/win_create_keyval_f.c b/ompi/mpi/fortran/mpif-h/win_create_keyval_f.c index 8d3e4eb2b0c..6c1ea88df10 100644 --- a/ompi/mpi/fortran/mpif-h/win_create_keyval_f.c +++ b/ompi/mpi/fortran/mpif-h/win_create_keyval_f.c @@ -5,15 +5,17 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2007-2012 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -22,7 +24,8 @@ #include "ompi/mpi/fortran/mpif-h/bindings.h" #include "ompi/communicator/communicator.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILE_LAYER +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak PMPI_WIN_CREATE_KEYVAL = ompi_win_create_keyval_f #pragma weak pmpi_win_create_keyval = ompi_win_create_keyval_f #pragma weak pmpi_win_create_keyval_ = ompi_win_create_keyval_f @@ -30,7 +33,7 @@ #pragma weak PMPI_Win_create_keyval_f = ompi_win_create_keyval_f #pragma weak PMPI_Win_create_keyval_f08 = ompi_win_create_keyval_f -#elif OMPI_PROFILE_LAYER +#else OMPI_GENERATE_F77_BINDINGS (PMPI_WIN_CREATE_KEYVAL, pmpi_win_create_keyval, pmpi_win_create_keyval_, @@ -39,6 +42,7 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_WIN_CREATE_KEYVAL, (ompi_mpi2_fortran_copy_attr_function* win_copy_attr_fn, ompi_mpi2_fortran_delete_attr_function* win_delete_attr_fn, MPI_Fint *win_keyval, MPI_Aint *extra_state, MPI_Fint *ierr), (win_copy_attr_fn, win_delete_attr_fn, win_keyval, extra_state, ierr) ) #endif +#endif #if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_WIN_CREATE_KEYVAL = ompi_win_create_keyval_f @@ -48,9 +52,8 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_WIN_CREATE_KEYVAL, #pragma weak MPI_Win_create_keyval_f = ompi_win_create_keyval_f #pragma weak MPI_Win_create_keyval_f08 = ompi_win_create_keyval_f -#endif - -#if ! OPAL_HAVE_WEAK_SYMBOLS && ! OMPI_PROFILE_LAYER +#else +#if ! OMPI_BUILD_MPI_PROFILING OMPI_GENERATE_F77_BINDINGS (MPI_WIN_CREATE_KEYVAL, mpi_win_create_keyval, mpi_win_create_keyval_, @@ -58,11 +61,9 @@ OMPI_GENERATE_F77_BINDINGS (MPI_WIN_CREATE_KEYVAL, ompi_win_create_keyval_f, (ompi_mpi2_fortran_copy_attr_function* win_copy_attr_fn, ompi_mpi2_fortran_delete_attr_function* win_delete_attr_fn, MPI_Fint *win_keyval, MPI_Aint *extra_state, MPI_Fint *ierr), (win_copy_attr_fn, win_delete_attr_fn, win_keyval, extra_state, ierr) ) +#else +#define ompi_win_create_keyval_f pompi_win_create_keyval_f #endif - - -#if OMPI_PROFILE_LAYER && ! OPAL_HAVE_WEAK_SYMBOLS -#include "ompi/mpi/fortran/mpif-h/profile/defines.h" #endif static char FUNC_NAME[] = "MPI_Win_create_keyval"; @@ -81,7 +82,7 @@ void ompi_win_create_keyval_f(ompi_mpi2_fortran_copy_attr_function* win_copy_att /* Note that we only set the "F77" bit and exclude the "F77_OLD" bit, indicating that the callbacks should use the new MPI-2 INTEGER(KIND=MPI_ADDRESS_KIND)-parameter functions (as opposed - to the old MPI-1 INTEGER-parameter functions). */ + to the old MPI-1 INTEGER-parameter functions). */ ret = ompi_attr_create_keyval_aint(WIN_ATTR, copy_fn, del_fn, win_keyval, *extra_state, OMPI_KEYVAL_F77, diff --git a/ompi/mpi/fortran/mpif-h/win_delete_attr_f.c b/ompi/mpi/fortran/mpif-h/win_delete_attr_f.c index 6f6804d5626..c28dd072c18 100644 --- a/ompi/mpi/fortran/mpif-h/win_delete_attr_f.c +++ b/ompi/mpi/fortran/mpif-h/win_delete_attr_f.c @@ -5,15 +5,17 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2011-2012 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -21,7 +23,8 @@ #include "ompi/mpi/fortran/mpif-h/bindings.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILE_LAYER +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak PMPI_WIN_DELETE_ATTR = ompi_win_delete_attr_f #pragma weak pmpi_win_delete_attr = ompi_win_delete_attr_f #pragma weak pmpi_win_delete_attr_ = ompi_win_delete_attr_f @@ -29,7 +32,7 @@ #pragma weak PMPI_Win_delete_attr_f = ompi_win_delete_attr_f #pragma weak PMPI_Win_delete_attr_f08 = ompi_win_delete_attr_f -#elif OMPI_PROFILE_LAYER +#else OMPI_GENERATE_F77_BINDINGS (PMPI_WIN_DELETE_ATTR, pmpi_win_delete_attr, pmpi_win_delete_attr_, @@ -38,6 +41,7 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_WIN_DELETE_ATTR, (MPI_Fint *win, MPI_Fint *win_keyval, MPI_Fint *ierr), (win, win_keyval, ierr) ) #endif +#endif #if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_WIN_DELETE_ATTR = ompi_win_delete_attr_f @@ -47,9 +51,8 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_WIN_DELETE_ATTR, #pragma weak MPI_Win_delete_attr_f = ompi_win_delete_attr_f #pragma weak MPI_Win_delete_attr_f08 = ompi_win_delete_attr_f -#endif - -#if ! OPAL_HAVE_WEAK_SYMBOLS && ! OMPI_PROFILE_LAYER +#else +#if ! OMPI_BUILD_MPI_PROFILING OMPI_GENERATE_F77_BINDINGS (MPI_WIN_DELETE_ATTR, mpi_win_delete_attr, mpi_win_delete_attr_, @@ -57,19 +60,18 @@ OMPI_GENERATE_F77_BINDINGS (MPI_WIN_DELETE_ATTR, ompi_win_delete_attr_f, (MPI_Fint *win, MPI_Fint *win_keyval, MPI_Fint *ierr), (win, win_keyval, ierr) ) +#else +#define ompi_win_delete_attr_f pompi_win_delete_attr_f #endif - - -#if OMPI_PROFILE_LAYER && ! OPAL_HAVE_WEAK_SYMBOLS -#include "ompi/mpi/fortran/mpif-h/profile/defines.h" #endif + void ompi_win_delete_attr_f(MPI_Fint *win, MPI_Fint *win_keyval, MPI_Fint *ierr) { int c_ierr; - MPI_Win c_win = MPI_Win_f2c(*win); + MPI_Win c_win = PMPI_Win_f2c(*win); - c_ierr = MPI_Win_delete_attr(c_win, OMPI_FINT_2_INT(*win_keyval)); + c_ierr = PMPI_Win_delete_attr(c_win, OMPI_FINT_2_INT(*win_keyval)); if (NULL != ierr) *ierr = OMPI_INT_2_FINT(c_ierr); } diff --git a/ompi/mpi/fortran/mpif-h/win_detach_f.c b/ompi/mpi/fortran/mpif-h/win_detach_f.c index 9ada7b4dff8..f50aa308ebd 100644 --- a/ompi/mpi/fortran/mpif-h/win_detach_f.c +++ b/ompi/mpi/fortran/mpif-h/win_detach_f.c @@ -2,9 +2,9 @@ * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -12,15 +12,16 @@ #include "ompi/mpi/fortran/mpif-h/bindings.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILE_LAYER +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak PMPI_WIN_DETACH = ompi_win_detach_f #pragma weak pmpi_win_detach = ompi_win_detach_f #pragma weak pmpi_win_detach_ = ompi_win_detach_f #pragma weak pmpi_win_detach__ = ompi_win_detach_f -#pragma weak PMPI_Win_create_f = ompi_win_detach_f -#pragma weak PMPI_Win_create_f08 = ompi_win_detach_f -#elif OMPI_PROFILE_LAYER +#pragma weak PMPI_Win_detach_f = ompi_win_detach_f +#pragma weak PMPI_Win_detach_f08 = ompi_win_detach_f +#else OMPI_GENERATE_F77_BINDINGS (PMPI_WIN_DETACH, pmpi_win_detach, pmpi_win_detach_, @@ -29,6 +30,7 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_WIN_DETACH, (MPI_Fint *win, char *base, MPI_Fint *ierr), (win, base, ierr) ) #endif +#endif #if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_WIN_DETACH = ompi_win_detach_f @@ -36,11 +38,10 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_WIN_DETACH, #pragma weak mpi_win_detach_ = ompi_win_detach_f #pragma weak mpi_win_detach__ = ompi_win_detach_f -#pragma weak MPI_Win_create_f = ompi_win_detach_f -#pragma weak MPI_Win_create_f08 = ompi_win_detach_f -#endif - -#if ! OPAL_HAVE_WEAK_SYMBOLS && ! OMPI_PROFILE_LAYER +#pragma weak MPI_Win_detach_f = ompi_win_detach_f +#pragma weak MPI_Win_detach_f08 = ompi_win_detach_f +#else +#if ! OMPI_BUILD_MPI_PROFILING OMPI_GENERATE_F77_BINDINGS (MPI_WIN_DETACH, mpi_win_detach, mpi_win_detach_, @@ -48,20 +49,19 @@ OMPI_GENERATE_F77_BINDINGS (MPI_WIN_DETACH, ompi_win_detach_f, (MPI_Fint *win, char *base, MPI_Fint *ierr), (win, base, ierr) ) +#else +#define ompi_win_detach_f pompi_win_detach_f #endif - - -#if OMPI_PROFILE_LAYER && ! OPAL_HAVE_WEAK_SYMBOLS -#include "ompi/mpi/fortran/mpif-h/profile/defines.h" #endif + void ompi_win_detach_f(MPI_Fint *win, char *base, MPI_Fint *ierr) { int c_ierr; MPI_Win c_win; - c_win = MPI_Win_f2c(*win); - c_ierr = MPI_Win_detach(c_win, base); + c_win = PMPI_Win_f2c(*win); + c_ierr = PMPI_Win_detach(c_win, base); if (NULL != ierr) *ierr = OMPI_INT_2_FINT(c_ierr); } diff --git a/ompi/mpi/fortran/mpif-h/win_fence_f.c b/ompi/mpi/fortran/mpif-h/win_fence_f.c index cdbbcf836c0..0fee45da658 100644 --- a/ompi/mpi/fortran/mpif-h/win_fence_f.c +++ b/ompi/mpi/fortran/mpif-h/win_fence_f.c @@ -5,15 +5,17 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2011-2012 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -21,7 +23,8 @@ #include "ompi/mpi/fortran/mpif-h/bindings.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILE_LAYER +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak PMPI_WIN_FENCE = ompi_win_fence_f #pragma weak pmpi_win_fence = ompi_win_fence_f #pragma weak pmpi_win_fence_ = ompi_win_fence_f @@ -29,7 +32,7 @@ #pragma weak PMPI_Win_fence_f = ompi_win_fence_f #pragma weak PMPI_Win_fence_f08 = ompi_win_fence_f -#elif OMPI_PROFILE_LAYER +#else OMPI_GENERATE_F77_BINDINGS (PMPI_WIN_FENCE, pmpi_win_fence, pmpi_win_fence_, @@ -38,6 +41,7 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_WIN_FENCE, (MPI_Fint *assert, MPI_Fint *win, MPI_Fint *ierr), (assert, win, ierr) ) #endif +#endif #if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_WIN_FENCE = ompi_win_fence_f @@ -47,9 +51,8 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_WIN_FENCE, #pragma weak MPI_Win_fence_f = ompi_win_fence_f #pragma weak MPI_Win_fence_f08 = ompi_win_fence_f -#endif - -#if ! OPAL_HAVE_WEAK_SYMBOLS && ! OMPI_PROFILE_LAYER +#else +#if ! OMPI_BUILD_MPI_PROFILING OMPI_GENERATE_F77_BINDINGS (MPI_WIN_FENCE, mpi_win_fence, mpi_win_fence_, @@ -57,18 +60,17 @@ OMPI_GENERATE_F77_BINDINGS (MPI_WIN_FENCE, ompi_win_fence_f, (MPI_Fint *assert, MPI_Fint *win, MPI_Fint *ierr), (assert, win, ierr) ) +#else +#define ompi_win_fence_f pompi_win_fence_f #endif - - -#if OMPI_PROFILE_LAYER && ! OPAL_HAVE_WEAK_SYMBOLS -#include "ompi/mpi/fortran/mpif-h/profile/defines.h" #endif + void ompi_win_fence_f(MPI_Fint *assert, MPI_Fint *win, MPI_Fint *ierr) { int c_ierr; - MPI_Win c_win = MPI_Win_f2c(*win); - - c_ierr = MPI_Win_fence(OMPI_FINT_2_INT(*assert), c_win); + MPI_Win c_win = PMPI_Win_f2c(*win); + + c_ierr = PMPI_Win_fence(OMPI_FINT_2_INT(*assert), c_win); if (NULL != ierr) *ierr = OMPI_INT_2_FINT(c_ierr); } diff --git a/ompi/mpi/fortran/mpif-h/win_flush_all_f.c b/ompi/mpi/fortran/mpif-h/win_flush_all_f.c index 945d4abfa8b..fc18ab34eed 100644 --- a/ompi/mpi/fortran/mpif-h/win_flush_all_f.c +++ b/ompi/mpi/fortran/mpif-h/win_flush_all_f.c @@ -13,6 +13,8 @@ * Copyright (c) 2011-2012 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2014 Los Alamos National Security, LLC. All rights * reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -24,7 +26,8 @@ #include "ompi/mpi/fortran/mpif-h/bindings.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILE_LAYER +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak PMPI_WIN_FLUSH_ALL = ompi_win_flush_all_f #pragma weak pmpi_win_flush_all = ompi_win_flush_all_f #pragma weak pmpi_win_flush_all_ = ompi_win_flush_all_f @@ -32,7 +35,7 @@ #pragma weak PMPI_Win_flush_all_f = ompi_win_flush_all_f #pragma weak PMPI_Win_flush_all_f08 = ompi_win_flush_all_f -#elif OMPI_PROFILE_LAYER +#else OMPI_GENERATE_F77_BINDINGS (PMPI_WIN_FLUSH_ALL, pmpi_win_flush_all, pmpi_win_flush_all_, @@ -41,6 +44,7 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_WIN_FLUSH_ALL, (MPI_Fint *win, MPI_Fint *ierr), (win, ierr) ) #endif +#endif #if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_WIN_FLUSH_ALL = ompi_win_flush_all_f @@ -50,9 +54,8 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_WIN_FLUSH_ALL, #pragma weak MPI_Win_flush_all_f = ompi_win_flush_all_f #pragma weak MPI_Win_flush_all_f08 = ompi_win_flush_all_f -#endif - -#if ! OPAL_HAVE_WEAK_SYMBOLS && ! OMPI_PROFILE_LAYER +#else +#if ! OMPI_BUILD_MPI_PROFILING OMPI_GENERATE_F77_BINDINGS (MPI_WIN_FLUSH_ALL, mpi_win_flush_all, mpi_win_flush_all_, @@ -60,18 +63,17 @@ OMPI_GENERATE_F77_BINDINGS (MPI_WIN_FLUSH_ALL, ompi_win_flush_all_f, (MPI_Fint *win, MPI_Fint *ierr), (win, ierr) ) +#else +#define ompi_win_flush_all_f pompi_win_flush_all_f #endif - - -#if OMPI_PROFILE_LAYER && ! OPAL_HAVE_WEAK_SYMBOLS -#include "ompi/mpi/fortran/mpif-h/profile/defines.h" #endif + void ompi_win_flush_all_f(MPI_Fint *win, MPI_Fint *ierr) { int c_ierr; - MPI_Win c_win = MPI_Win_f2c(*win); + MPI_Win c_win = PMPI_Win_f2c(*win); - c_ierr = MPI_Win_flush_all(c_win); + c_ierr = PMPI_Win_flush_all(c_win); if (NULL != ierr) *ierr = OMPI_INT_2_FINT(c_ierr); } diff --git a/ompi/mpi/fortran/mpif-h/win_flush_f.c b/ompi/mpi/fortran/mpif-h/win_flush_f.c index a31ed2bc6bf..398703f5011 100644 --- a/ompi/mpi/fortran/mpif-h/win_flush_f.c +++ b/ompi/mpi/fortran/mpif-h/win_flush_f.c @@ -13,6 +13,8 @@ * Copyright (c) 2011-2012 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2014 Los Alamos National Security, LLC. All rights * reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -24,7 +26,8 @@ #include "ompi/mpi/fortran/mpif-h/bindings.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILE_LAYER +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak PMPI_WIN_FLUSH = ompi_win_flush_f #pragma weak pmpi_win_flush = ompi_win_flush_f #pragma weak pmpi_win_flush_ = ompi_win_flush_f @@ -32,7 +35,7 @@ #pragma weak PMPI_Win_flush_f = ompi_win_flush_f #pragma weak PMPI_Win_flush_f08 = ompi_win_flush_f -#elif OMPI_PROFILE_LAYER +#else OMPI_GENERATE_F77_BINDINGS (PMPI_WIN_FLUSH, pmpi_win_flush, pmpi_win_flush_, @@ -41,6 +44,7 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_WIN_FLUSH, (MPI_Fint *rank, MPI_Fint *win, MPI_Fint *ierr), (rank, win, ierr) ) #endif +#endif #if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_WIN_FLUSH = ompi_win_flush_f @@ -50,9 +54,8 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_WIN_FLUSH, #pragma weak MPI_Win_flush_f = ompi_win_flush_f #pragma weak MPI_Win_flush_f08 = ompi_win_flush_f -#endif - -#if ! OPAL_HAVE_WEAK_SYMBOLS && ! OMPI_PROFILE_LAYER +#else +#if ! OMPI_BUILD_MPI_PROFILING OMPI_GENERATE_F77_BINDINGS (MPI_WIN_FLUSH, mpi_win_flush, mpi_win_flush_, @@ -60,18 +63,17 @@ OMPI_GENERATE_F77_BINDINGS (MPI_WIN_FLUSH, ompi_win_flush_f, (MPI_Fint *rank, MPI_Fint *win, MPI_Fint *ierr), (rank, win, ierr) ) +#else +#define ompi_win_flush_f pompi_win_flush_f #endif - - -#if OMPI_PROFILE_LAYER && ! OPAL_HAVE_WEAK_SYMBOLS -#include "ompi/mpi/fortran/mpif-h/profile/defines.h" #endif + void ompi_win_flush_f(MPI_Fint *rank, MPI_Fint *win, MPI_Fint *ierr) { int c_ierr; - MPI_Win c_win = MPI_Win_f2c(*win); + MPI_Win c_win = PMPI_Win_f2c(*win); - c_ierr = MPI_Win_flush(OMPI_FINT_2_INT(*rank), c_win); + c_ierr = PMPI_Win_flush(OMPI_FINT_2_INT(*rank), c_win); if (NULL != ierr) *ierr = OMPI_INT_2_FINT(c_ierr); } diff --git a/ompi/mpi/fortran/mpif-h/win_flush_local_all_f.c b/ompi/mpi/fortran/mpif-h/win_flush_local_all_f.c index 3a6ec66b67d..64e869df812 100644 --- a/ompi/mpi/fortran/mpif-h/win_flush_local_all_f.c +++ b/ompi/mpi/fortran/mpif-h/win_flush_local_all_f.c @@ -13,6 +13,8 @@ * Copyright (c) 2011-2012 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2014 Los Alamos National Security, LLC. All rights * reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -24,7 +26,8 @@ #include "ompi/mpi/fortran/mpif-h/bindings.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILE_LAYER +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak PMPI_WIN_FLUSH_LOCAL_ALL = ompi_win_flush_local_all_f #pragma weak pmpi_win_flush_local_all = ompi_win_flush_local_all_f #pragma weak pmpi_win_flush_local_all_ = ompi_win_flush_local_all_f @@ -32,7 +35,7 @@ #pragma weak PMPI_Win_flush_local_all_f = ompi_win_flush_local_all_f #pragma weak PMPI_Win_flush_local_all_f08 = ompi_win_flush_local_all_f -#elif OMPI_PROFILE_LAYER +#else OMPI_GENERATE_F77_BINDINGS (PMPI_WIN_FLUSH_LOCAL_ALL, pmpi_win_flush_local_all, pmpi_win_flush_local_all_, @@ -41,6 +44,7 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_WIN_FLUSH_LOCAL_ALL, (MPI_Fint *win, MPI_Fint *ierr), (win, ierr) ) #endif +#endif #if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_WIN_FLUSH_LOCAL_ALL = ompi_win_flush_local_all_f @@ -50,9 +54,8 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_WIN_FLUSH_LOCAL_ALL, #pragma weak MPI_Win_flush_local_all_f = ompi_win_flush_local_all_f #pragma weak MPI_Win_flush_local_all_f08 = ompi_win_flush_local_all_f -#endif - -#if ! OPAL_HAVE_WEAK_SYMBOLS && ! OMPI_PROFILE_LAYER +#else +#if ! OMPI_BUILD_MPI_PROFILING OMPI_GENERATE_F77_BINDINGS (MPI_WIN_FLUSH_LOCAL_ALL, mpi_win_flush_local_all, mpi_win_flush_local_all_, @@ -60,18 +63,17 @@ OMPI_GENERATE_F77_BINDINGS (MPI_WIN_FLUSH_LOCAL_ALL, ompi_win_flush_local_all_f, (MPI_Fint *win, MPI_Fint *ierr), (win, ierr) ) +#else +#define ompi_win_flush_local_all_f pompi_win_flush_local_all_f #endif - - -#if OMPI_PROFILE_LAYER && ! OPAL_HAVE_WEAK_SYMBOLS -#include "ompi/mpi/fortran/mpif-h/profile/defines.h" #endif + void ompi_win_flush_local_all_f(MPI_Fint *win, MPI_Fint *ierr) { int c_ierr; - MPI_Win c_win = MPI_Win_f2c(*win); + MPI_Win c_win = PMPI_Win_f2c(*win); - c_ierr = MPI_Win_flush_local_all(c_win); + c_ierr = PMPI_Win_flush_local_all(c_win); if (NULL != ierr) *ierr = OMPI_INT_2_FINT(c_ierr); } diff --git a/ompi/mpi/fortran/mpif-h/win_flush_local_f.c b/ompi/mpi/fortran/mpif-h/win_flush_local_f.c index 52dc116ed30..4c2cd92a17c 100644 --- a/ompi/mpi/fortran/mpif-h/win_flush_local_f.c +++ b/ompi/mpi/fortran/mpif-h/win_flush_local_f.c @@ -13,6 +13,8 @@ * Copyright (c) 2011-2012 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2014 Los Alamos National Security, LLC. All rights * reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -24,7 +26,8 @@ #include "ompi/mpi/fortran/mpif-h/bindings.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILE_LAYER +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak PMPI_WIN_FLUSH_LOCAL = ompi_win_flush_local_f #pragma weak pmpi_win_flush_local = ompi_win_flush_local_f #pragma weak pmpi_win_flush_local_ = ompi_win_flush_local_f @@ -32,7 +35,7 @@ #pragma weak PMPI_Win_flush_local_f = ompi_win_flush_local_f #pragma weak PMPI_Win_flush_local_f08 = ompi_win_flush_local_f -#elif OMPI_PROFILE_LAYER +#else OMPI_GENERATE_F77_BINDINGS (PMPI_WIN_FLUSH_LOCAL, pmpi_win_flush_local, pmpi_win_flush_local_, @@ -41,6 +44,7 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_WIN_FLUSH_LOCAL, (MPI_Fint *rank, MPI_Fint *win, MPI_Fint *ierr), (rank, win, ierr) ) #endif +#endif #if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_WIN_FLUSH_LOCAL = ompi_win_flush_local_f @@ -50,9 +54,8 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_WIN_FLUSH_LOCAL, #pragma weak MPI_Win_flush_local_f = ompi_win_flush_local_f #pragma weak MPI_Win_flush_local_f08 = ompi_win_flush_local_f -#endif - -#if ! OPAL_HAVE_WEAK_SYMBOLS && ! OMPI_PROFILE_LAYER +#else +#if ! OMPI_BUILD_MPI_PROFILING OMPI_GENERATE_F77_BINDINGS (MPI_WIN_FLUSH_LOCAL, mpi_win_flush_local, mpi_win_flush_local_, @@ -60,18 +63,17 @@ OMPI_GENERATE_F77_BINDINGS (MPI_WIN_FLUSH_LOCAL, ompi_win_flush_local_f, (MPI_Fint *rank, MPI_Fint *win, MPI_Fint *ierr), (rank, win, ierr) ) +#else +#define ompi_win_flush_local_f pompi_win_flush_local_f #endif - - -#if OMPI_PROFILE_LAYER && ! OPAL_HAVE_WEAK_SYMBOLS -#include "ompi/mpi/fortran/mpif-h/profile/defines.h" #endif + void ompi_win_flush_local_f(MPI_Fint *rank, MPI_Fint *win, MPI_Fint *ierr) { int c_ierr; - MPI_Win c_win = MPI_Win_f2c(*win); + MPI_Win c_win = PMPI_Win_f2c(*win); - c_ierr = MPI_Win_flush_local(OMPI_FINT_2_INT(*rank), c_win); + c_ierr = PMPI_Win_flush_local(OMPI_FINT_2_INT(*rank), c_win); if (NULL != ierr) *ierr = OMPI_INT_2_FINT(c_ierr); } diff --git a/ompi/mpi/fortran/mpif-h/win_free_f.c b/ompi/mpi/fortran/mpif-h/win_free_f.c index d05138d8873..fb59b86f428 100644 --- a/ompi/mpi/fortran/mpif-h/win_free_f.c +++ b/ompi/mpi/fortran/mpif-h/win_free_f.c @@ -5,15 +5,17 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2011-2012 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -21,7 +23,8 @@ #include "ompi/mpi/fortran/mpif-h/bindings.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILE_LAYER +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak PMPI_WIN_FREE = ompi_win_free_f #pragma weak pmpi_win_free = ompi_win_free_f #pragma weak pmpi_win_free_ = ompi_win_free_f @@ -29,7 +32,7 @@ #pragma weak PMPI_Win_free_f = ompi_win_free_f #pragma weak PMPI_Win_free_f08 = ompi_win_free_f -#elif OMPI_PROFILE_LAYER +#else OMPI_GENERATE_F77_BINDINGS (PMPI_WIN_FREE, pmpi_win_free, pmpi_win_free_, @@ -38,6 +41,7 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_WIN_FREE, (MPI_Fint *win, MPI_Fint *ierr), (win, ierr) ) #endif +#endif #if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_WIN_FREE = ompi_win_free_f @@ -47,9 +51,8 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_WIN_FREE, #pragma weak MPI_Win_free_f = ompi_win_free_f #pragma weak MPI_Win_free_f08 = ompi_win_free_f -#endif - -#if ! OPAL_HAVE_WEAK_SYMBOLS && ! OMPI_PROFILE_LAYER +#else +#if ! OMPI_BUILD_MPI_PROFILING OMPI_GENERATE_F77_BINDINGS (MPI_WIN_FREE, mpi_win_free, mpi_win_free_, @@ -57,22 +60,21 @@ OMPI_GENERATE_F77_BINDINGS (MPI_WIN_FREE, ompi_win_free_f, (MPI_Fint *win, MPI_Fint *ierr), (win, ierr) ) +#else +#define ompi_win_free_f pompi_win_free_f #endif - - -#if OMPI_PROFILE_LAYER && ! OPAL_HAVE_WEAK_SYMBOLS -#include "ompi/mpi/fortran/mpif-h/profile/defines.h" #endif + void ompi_win_free_f(MPI_Fint *win, MPI_Fint *ierr) { int c_ierr; - MPI_Win c_win = MPI_Win_f2c(*win); + MPI_Win c_win = PMPI_Win_f2c(*win); - c_ierr = MPI_Win_free(&c_win); + c_ierr = PMPI_Win_free(&c_win); if (NULL != ierr) *ierr = OMPI_INT_2_FINT(c_ierr); if (MPI_SUCCESS == c_ierr) { - *win = MPI_Win_c2f(c_win); + *win = PMPI_Win_c2f(c_win); } } diff --git a/ompi/mpi/fortran/mpif-h/win_free_keyval_f.c b/ompi/mpi/fortran/mpif-h/win_free_keyval_f.c index 5cd48c57ecc..7bd38feb843 100644 --- a/ompi/mpi/fortran/mpif-h/win_free_keyval_f.c +++ b/ompi/mpi/fortran/mpif-h/win_free_keyval_f.c @@ -5,15 +5,17 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2011-2012 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -21,7 +23,8 @@ #include "ompi/mpi/fortran/mpif-h/bindings.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILE_LAYER +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak PMPI_WIN_FREE_KEYVAL = ompi_win_free_keyval_f #pragma weak pmpi_win_free_keyval = ompi_win_free_keyval_f #pragma weak pmpi_win_free_keyval_ = ompi_win_free_keyval_f @@ -29,7 +32,7 @@ #pragma weak PMPI_Win_free_keyval_f = ompi_win_free_keyval_f #pragma weak PMPI_Win_free_keyval_f08 = ompi_win_free_keyval_f -#elif OMPI_PROFILE_LAYER +#else OMPI_GENERATE_F77_BINDINGS (PMPI_WIN_FREE_KEYVAL, pmpi_win_free_keyval, pmpi_win_free_keyval_, @@ -38,6 +41,7 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_WIN_FREE_KEYVAL, (MPI_Fint *win_keyval, MPI_Fint *ierr), (win_keyval, ierr) ) #endif +#endif #if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_WIN_FREE_KEYVAL = ompi_win_free_keyval_f @@ -47,9 +51,8 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_WIN_FREE_KEYVAL, #pragma weak MPI_Win_free_keyval_f = ompi_win_free_keyval_f #pragma weak MPI_Win_free_keyval_f08 = ompi_win_free_keyval_f -#endif - -#if ! OPAL_HAVE_WEAK_SYMBOLS && ! OMPI_PROFILE_LAYER +#else +#if ! OMPI_BUILD_MPI_PROFILING OMPI_GENERATE_F77_BINDINGS (MPI_WIN_FREE_KEYVAL, mpi_win_free_keyval, mpi_win_free_keyval_, @@ -57,19 +60,18 @@ OMPI_GENERATE_F77_BINDINGS (MPI_WIN_FREE_KEYVAL, ompi_win_free_keyval_f, (MPI_Fint *win_keyval, MPI_Fint *ierr), (win_keyval, ierr) ) +#else +#define ompi_win_free_keyval_f pompi_win_free_keyval_f #endif - - -#if OMPI_PROFILE_LAYER && ! OPAL_HAVE_WEAK_SYMBOLS -#include "ompi/mpi/fortran/mpif-h/profile/defines.h" #endif + void ompi_win_free_keyval_f(MPI_Fint *win_keyval, MPI_Fint *ierr) { int c_ierr; OMPI_SINGLE_NAME_DECL(win_keyval); - - c_ierr = MPI_Win_free_keyval(OMPI_SINGLE_NAME_CONVERT(win_keyval)); + + c_ierr = PMPI_Win_free_keyval(OMPI_SINGLE_NAME_CONVERT(win_keyval)); if (NULL != ierr) *ierr = OMPI_INT_2_FINT(c_ierr); if (MPI_SUCCESS == c_ierr) { diff --git a/ompi/mpi/fortran/mpif-h/win_get_attr_f.c b/ompi/mpi/fortran/mpif-h/win_get_attr_f.c index 071a505f81a..af77810f380 100644 --- a/ompi/mpi/fortran/mpif-h/win_get_attr_f.c +++ b/ompi/mpi/fortran/mpif-h/win_get_attr_f.c @@ -5,15 +5,17 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2011-2012 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -23,7 +25,8 @@ #include "ompi/attribute/attribute.h" #include "ompi/win/win.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILE_LAYER +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak PMPI_WIN_GET_ATTR = ompi_win_get_attr_f #pragma weak pmpi_win_get_attr = ompi_win_get_attr_f #pragma weak pmpi_win_get_attr_ = ompi_win_get_attr_f @@ -31,7 +34,7 @@ #pragma weak PMPI_Win_get_attr_f = ompi_win_get_attr_f #pragma weak PMPI_Win_get_attr_f08 = ompi_win_get_attr_f -#elif OMPI_PROFILE_LAYER +#else OMPI_GENERATE_F77_BINDINGS (PMPI_WIN_GET_ATTR, pmpi_win_get_attr, pmpi_win_get_attr_, @@ -40,6 +43,7 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_WIN_GET_ATTR, (MPI_Fint *win, MPI_Fint *win_keyval, MPI_Aint *attribute_val, ompi_fortran_logical_t *flag, MPI_Fint *ierr), (win, win_keyval, attribute_val, flag, ierr) ) #endif +#endif #if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_WIN_GET_ATTR = ompi_win_get_attr_f @@ -49,9 +53,8 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_WIN_GET_ATTR, #pragma weak MPI_Win_get_attr_f = ompi_win_get_attr_f #pragma weak MPI_Win_get_attr_f08 = ompi_win_get_attr_f -#endif - -#if ! OPAL_HAVE_WEAK_SYMBOLS && ! OMPI_PROFILE_LAYER +#else +#if ! OMPI_BUILD_MPI_PROFILING OMPI_GENERATE_F77_BINDINGS (MPI_WIN_GET_ATTR, mpi_win_get_attr, mpi_win_get_attr_, @@ -59,18 +62,16 @@ OMPI_GENERATE_F77_BINDINGS (MPI_WIN_GET_ATTR, ompi_win_get_attr_f, (MPI_Fint *win, MPI_Fint *win_keyval, MPI_Aint *attribute_val, ompi_fortran_logical_t *flag, MPI_Fint *ierr), (win, win_keyval, attribute_val, flag, ierr) ) +#else +#define ompi_win_get_attr_f pompi_win_get_attr_f #endif - - -#if OMPI_PROFILE_LAYER && ! OPAL_HAVE_WEAK_SYMBOLS -#include "ompi/mpi/fortran/mpif-h/profile/defines.h" #endif void ompi_win_get_attr_f(MPI_Fint *win, MPI_Fint *win_keyval, MPI_Aint *attribute_val, ompi_fortran_logical_t *flag, MPI_Fint *ierr) { int c_ierr; - MPI_Win c_win = MPI_Win_f2c(*win); + MPI_Win c_win = PMPI_Win_f2c(*win); OMPI_LOGICAL_NAME_DECL(flag); /* This stuff is very confusing. Be sure to see the comment at diff --git a/ompi/mpi/fortran/mpif-h/win_get_errhandler_f.c b/ompi/mpi/fortran/mpif-h/win_get_errhandler_f.c index aa1a4ae2d72..28ff7e20936 100644 --- a/ompi/mpi/fortran/mpif-h/win_get_errhandler_f.c +++ b/ompi/mpi/fortran/mpif-h/win_get_errhandler_f.c @@ -5,15 +5,17 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2011-2012 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -21,7 +23,8 @@ #include "ompi/mpi/fortran/mpif-h/bindings.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILE_LAYER +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak PMPI_WIN_GET_ERRHANDLER = ompi_win_get_errhandler_f #pragma weak pmpi_win_get_errhandler = ompi_win_get_errhandler_f #pragma weak pmpi_win_get_errhandler_ = ompi_win_get_errhandler_f @@ -29,7 +32,7 @@ #pragma weak PMPI_Win_get_errhandler_f = ompi_win_get_errhandler_f #pragma weak PMPI_Win_get_errhandler_f08 = ompi_win_get_errhandler_f -#elif OMPI_PROFILE_LAYER +#else OMPI_GENERATE_F77_BINDINGS (PMPI_WIN_GET_ERRHANDLER, pmpi_win_get_errhandler, pmpi_win_get_errhandler_, @@ -38,6 +41,7 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_WIN_GET_ERRHANDLER, (MPI_Fint *win, MPI_Fint *errhandler, MPI_Fint *ierr), (win, errhandler, ierr) ) #endif +#endif #if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_WIN_GET_ERRHANDLER = ompi_win_get_errhandler_f @@ -47,9 +51,8 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_WIN_GET_ERRHANDLER, #pragma weak MPI_Win_get_errhandler_f = ompi_win_get_errhandler_f #pragma weak MPI_Win_get_errhandler_f08 = ompi_win_get_errhandler_f -#endif - -#if ! OPAL_HAVE_WEAK_SYMBOLS && ! OMPI_PROFILE_LAYER +#else +#if ! OMPI_BUILD_MPI_PROFILING OMPI_GENERATE_F77_BINDINGS (MPI_WIN_GET_ERRHANDLER, mpi_win_get_errhandler, mpi_win_get_errhandler_, @@ -57,24 +60,23 @@ OMPI_GENERATE_F77_BINDINGS (MPI_WIN_GET_ERRHANDLER, ompi_win_get_errhandler_f, (MPI_Fint *win, MPI_Fint *errhandler, MPI_Fint *ierr), (win, errhandler, ierr) ) +#else +#define ompi_win_get_errhandler_f pompi_win_get_errhandler_f #endif - - -#if OMPI_PROFILE_LAYER && ! OPAL_HAVE_WEAK_SYMBOLS -#include "ompi/mpi/fortran/mpif-h/profile/defines.h" #endif -void ompi_win_get_errhandler_f(MPI_Fint *win, MPI_Fint *errhandler, + +void ompi_win_get_errhandler_f(MPI_Fint *win, MPI_Fint *errhandler, MPI_Fint *ierr) { int c_ierr; MPI_Errhandler c_err; - MPI_Win c_win = MPI_Win_f2c(*win); + MPI_Win c_win = PMPI_Win_f2c(*win); - c_ierr = MPI_Win_get_errhandler(c_win, &c_err); + c_ierr = PMPI_Win_get_errhandler(c_win, &c_err); if (NULL != ierr) *ierr = OMPI_INT_2_FINT(c_ierr); if (MPI_SUCCESS == c_ierr) { - *errhandler = MPI_Errhandler_c2f(c_err); + *errhandler = PMPI_Errhandler_c2f(c_err); } } diff --git a/ompi/mpi/fortran/mpif-h/win_get_group_f.c b/ompi/mpi/fortran/mpif-h/win_get_group_f.c index d58281c5bfa..5afd0c89870 100644 --- a/ompi/mpi/fortran/mpif-h/win_get_group_f.c +++ b/ompi/mpi/fortran/mpif-h/win_get_group_f.c @@ -5,15 +5,17 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2011-2012 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -21,7 +23,8 @@ #include "ompi/mpi/fortran/mpif-h/bindings.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILE_LAYER +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak PMPI_WIN_GET_GROUP = ompi_win_get_group_f #pragma weak pmpi_win_get_group = ompi_win_get_group_f #pragma weak pmpi_win_get_group_ = ompi_win_get_group_f @@ -29,7 +32,7 @@ #pragma weak PMPI_Win_get_group_f = ompi_win_get_group_f #pragma weak PMPI_Win_get_group_f08 = ompi_win_get_group_f -#elif OMPI_PROFILE_LAYER +#else OMPI_GENERATE_F77_BINDINGS (PMPI_WIN_GET_GROUP, pmpi_win_get_group, pmpi_win_get_group_, @@ -38,6 +41,7 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_WIN_GET_GROUP, (MPI_Fint *win, MPI_Fint *group, MPI_Fint *ierr), (win, group, ierr) ) #endif +#endif #if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_WIN_GET_GROUP = ompi_win_get_group_f @@ -47,9 +51,8 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_WIN_GET_GROUP, #pragma weak MPI_Win_get_group_f = ompi_win_get_group_f #pragma weak MPI_Win_get_group_f08 = ompi_win_get_group_f -#endif - -#if ! OPAL_HAVE_WEAK_SYMBOLS && ! OMPI_PROFILE_LAYER +#else +#if ! OMPI_BUILD_MPI_PROFILING OMPI_GENERATE_F77_BINDINGS (MPI_WIN_GET_GROUP, mpi_win_get_group, mpi_win_get_group_, @@ -57,23 +60,22 @@ OMPI_GENERATE_F77_BINDINGS (MPI_WIN_GET_GROUP, ompi_win_get_group_f, (MPI_Fint *win, MPI_Fint *group, MPI_Fint *ierr), (win, group, ierr) ) +#else +#define ompi_win_get_group_f pompi_win_get_group_f #endif - - -#if OMPI_PROFILE_LAYER && ! OPAL_HAVE_WEAK_SYMBOLS -#include "ompi/mpi/fortran/mpif-h/profile/defines.h" #endif + void ompi_win_get_group_f(MPI_Fint *win, MPI_Fint *group, MPI_Fint *ierr) { int c_ierr; MPI_Group c_grp; - MPI_Win c_win = MPI_Win_f2c(*win); + MPI_Win c_win = PMPI_Win_f2c(*win); - c_ierr = MPI_Win_get_group(c_win, &c_grp); + c_ierr = PMPI_Win_get_group(c_win, &c_grp); if (NULL != ierr) *ierr = OMPI_INT_2_FINT(c_ierr); if (MPI_SUCCESS == c_ierr) { - *group = MPI_Group_c2f(c_grp); + *group = PMPI_Group_c2f(c_grp); } } diff --git a/ompi/mpi/fortran/mpif-h/win_get_info_f.c b/ompi/mpi/fortran/mpif-h/win_get_info_f.c index 0f0ec514104..dc74ea3f7ec 100644 --- a/ompi/mpi/fortran/mpif-h/win_get_info_f.c +++ b/ompi/mpi/fortran/mpif-h/win_get_info_f.c @@ -2,9 +2,9 @@ * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -12,15 +12,16 @@ #include "ompi/mpi/fortran/mpif-h/bindings.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILE_LAYER +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak PMPI_WIN_GET_INFO = ompi_win_get_info_f #pragma weak pmpi_win_get_info = ompi_win_get_info_f #pragma weak pmpi_win_get_info_ = ompi_win_get_info_f #pragma weak pmpi_win_get_info__ = ompi_win_get_info_f -#pragma weak PMPI_Win_create_f = ompi_win_get_info_f -#pragma weak PMPI_Win_create_f08 = ompi_win_get_info_f -#elif OMPI_PROFILE_LAYER +#pragma weak PMPI_Win_get_info_f = ompi_win_get_info_f +#pragma weak PMPI_Win_get_info_f08 = ompi_win_get_info_f +#else OMPI_GENERATE_F77_BINDINGS (PMPI_WIN_GET_INFO, pmpi_win_get_info, pmpi_win_get_info_, @@ -29,6 +30,7 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_WIN_GET_INFO, (MPI_Fint *win, MPI_Fint *info, MPI_Fint *ierr), (win, info, ierr) ) #endif +#endif #if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_WIN_GET_INFO = ompi_win_get_info_f @@ -36,11 +38,10 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_WIN_GET_INFO, #pragma weak mpi_win_get_info_ = ompi_win_get_info_f #pragma weak mpi_win_get_info__ = ompi_win_get_info_f -#pragma weak MPI_Win_create_f = ompi_win_get_info_f -#pragma weak MPI_Win_create_f08 = ompi_win_get_info_f -#endif - -#if ! OPAL_HAVE_WEAK_SYMBOLS && ! OMPI_PROFILE_LAYER +#pragma weak MPI_Win_get_info_f = ompi_win_get_info_f +#pragma weak MPI_Win_get_info_f08 = ompi_win_get_info_f +#else +#if ! OMPI_BUILD_MPI_PROFILING OMPI_GENERATE_F77_BINDINGS (MPI_WIN_GET_INFO, mpi_win_get_info, mpi_win_get_info_, @@ -48,21 +49,20 @@ OMPI_GENERATE_F77_BINDINGS (MPI_WIN_GET_INFO, ompi_win_get_info_f, (MPI_Fint *win, MPI_Fint *info, MPI_Fint *ierr), (win, info, ierr) ) +#else +#define ompi_win_get_info_f pompi_win_get_info_f #endif - - -#if OMPI_PROFILE_LAYER && ! OPAL_HAVE_WEAK_SYMBOLS -#include "ompi/mpi/fortran/mpif-h/profile/defines.h" #endif + void ompi_win_get_info_f(MPI_Fint *win, MPI_Fint *info, MPI_Fint *ierr) { int c_ierr; MPI_Win c_win; MPI_Info c_info; - c_win = MPI_Win_f2c(*win); - c_ierr = MPI_Win_get_info(c_win, &c_info); + c_win = PMPI_Win_f2c(*win); + c_ierr = PMPI_Win_get_info(c_win, &c_info); if (NULL != ierr) *ierr = OMPI_INT_2_FINT(c_ierr); - *info = MPI_Info_c2f(c_info); + *info = PMPI_Info_c2f(c_info); } diff --git a/ompi/mpi/fortran/mpif-h/win_get_name_f.c b/ompi/mpi/fortran/mpif-h/win_get_name_f.c index df526439524..8d523ed1b45 100644 --- a/ompi/mpi/fortran/mpif-h/win_get_name_f.c +++ b/ompi/mpi/fortran/mpif-h/win_get_name_f.c @@ -5,15 +5,17 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2006-2012 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -24,7 +26,8 @@ #include "ompi/mpi/fortran/base/strings.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILE_LAYER +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak PMPI_WIN_GET_NAME = ompi_win_get_name_f #pragma weak pmpi_win_get_name = ompi_win_get_name_f #pragma weak pmpi_win_get_name_ = ompi_win_get_name_f @@ -32,7 +35,7 @@ #pragma weak PMPI_Win_get_name_f = ompi_win_get_name_f #pragma weak PMPI_Win_get_name_f08 = ompi_win_get_name_f -#elif OMPI_PROFILE_LAYER +#else OMPI_GENERATE_F77_BINDINGS (PMPI_WIN_GET_NAME, pmpi_win_get_name, pmpi_win_get_name_, @@ -41,6 +44,7 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_WIN_GET_NAME, (MPI_Fint *win, char *win_name, MPI_Fint *resultlen, MPI_Fint *ierr, int name_len), (win, win_name, resultlen, ierr, name_len) ) #endif +#endif #if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_WIN_GET_NAME = ompi_win_get_name_f @@ -50,9 +54,8 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_WIN_GET_NAME, #pragma weak MPI_Win_get_name_f = ompi_win_get_name_f #pragma weak MPI_Win_get_name_f08 = ompi_win_get_name_f -#endif - -#if ! OPAL_HAVE_WEAK_SYMBOLS && ! OMPI_PROFILE_LAYER +#else +#if ! OMPI_BUILD_MPI_PROFILING OMPI_GENERATE_F77_BINDINGS (MPI_WIN_GET_NAME, mpi_win_get_name, mpi_win_get_name_, @@ -60,21 +63,20 @@ OMPI_GENERATE_F77_BINDINGS (MPI_WIN_GET_NAME, ompi_win_get_name_f, (MPI_Fint *win, char *win_name, MPI_Fint *resultlen, MPI_Fint *ierr, int name_len), (win, win_name, resultlen, ierr, name_len) ) +#else +#define ompi_win_get_name_f pompi_win_get_name_f #endif - - -#if OMPI_PROFILE_LAYER && ! OPAL_HAVE_WEAK_SYMBOLS -#include "ompi/mpi/fortran/mpif-h/profile/defines.h" #endif + void ompi_win_get_name_f(MPI_Fint *win, char *win_name, MPI_Fint *resultlen, MPI_Fint *ierr, int name_len) { int c_ierr, c_len; - MPI_Win c_win = MPI_Win_f2c(*win); + MPI_Win c_win = PMPI_Win_f2c(*win); char c_name[MPI_MAX_OBJECT_NAME]; - c_ierr = MPI_Win_get_name(c_win, c_name, &c_len); + c_ierr = PMPI_Win_get_name(c_win, c_name, &c_len); if (MPI_SUCCESS == c_ierr) { ompi_fortran_string_c2f(c_name, win_name, name_len); *resultlen = OMPI_INT_2_FINT(c_len); diff --git a/ompi/mpi/fortran/mpif-h/win_lock_all_f.c b/ompi/mpi/fortran/mpif-h/win_lock_all_f.c index fbe1a1b49e6..7c9cc559315 100644 --- a/ompi/mpi/fortran/mpif-h/win_lock_all_f.c +++ b/ompi/mpi/fortran/mpif-h/win_lock_all_f.c @@ -10,6 +10,8 @@ * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2011-2014 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -21,7 +23,8 @@ #include "ompi/mpi/fortran/mpif-h/bindings.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILE_LAYER +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak PMPI_WIN_LOCK_ALL = ompi_win_lock_all_f #pragma weak pmpi_win_lock_all = ompi_win_lock_all_f #pragma weak pmpi_win_lock_all_ = ompi_win_lock_all_f @@ -29,7 +32,7 @@ #pragma weak PMPI_Win_lock_all_f = ompi_win_lock_all_f #pragma weak PMPI_Win_lock_all_f08 = ompi_win_lock_all_f -#elif OMPI_PROFILE_LAYER +#else OMPI_GENERATE_F77_BINDINGS (PMPI_WIN_LOCK_ALL, pmpi_win_lock_all, pmpi_win_lock_all_, @@ -38,6 +41,7 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_WIN_LOCK_ALL, (MPI_Fint *assert, MPI_Fint *win, MPI_Fint *ierr), (assert, win, ierr) ) #endif +#endif #if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_WIN_LOCK_ALL = ompi_win_lock_all_f @@ -47,9 +51,8 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_WIN_LOCK_ALL, #pragma weak MPI_Win_lock_all_f = ompi_win_lock_all_f #pragma weak MPI_Win_lock_all_f08 = ompi_win_lock_all_f -#endif - -#if ! OPAL_HAVE_WEAK_SYMBOLS && ! OMPI_PROFILE_LAYER +#else +#if ! OMPI_BUILD_MPI_PROFILING OMPI_GENERATE_F77_BINDINGS (MPI_WIN_LOCK_ALL, mpi_win_lock_all, mpi_win_lock_all_, @@ -57,19 +60,18 @@ OMPI_GENERATE_F77_BINDINGS (MPI_WIN_LOCK_ALL, ompi_win_lock_all_f, (MPI_Fint *assert, MPI_Fint *win, MPI_Fint *ierr), (assert, win, ierr) ) +#else +#define ompi_win_lock_all_f pompi_win_lock_all_f #endif - - -#if OMPI_PROFILE_LAYER && ! OPAL_HAVE_WEAK_SYMBOLS -#include "ompi/mpi/fortran/mpif-h/profile/defines.h" #endif + void ompi_win_lock_all_f(MPI_Fint *assert, MPI_Fint *win, MPI_Fint *ierr) { int c_ierr; - MPI_Win c_win = MPI_Win_f2c(*win); + MPI_Win c_win = PMPI_Win_f2c(*win); - c_ierr = MPI_Win_lock_all(OMPI_FINT_2_INT(*assert), + c_ierr = PMPI_Win_lock_all(OMPI_FINT_2_INT(*assert), c_win); if (NULL != ierr) *ierr = OMPI_INT_2_FINT(c_ierr); } diff --git a/ompi/mpi/fortran/mpif-h/win_lock_f.c b/ompi/mpi/fortran/mpif-h/win_lock_f.c index dc1059550ee..9d23f1fab20 100644 --- a/ompi/mpi/fortran/mpif-h/win_lock_f.c +++ b/ompi/mpi/fortran/mpif-h/win_lock_f.c @@ -5,15 +5,17 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2011-2012 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -21,7 +23,8 @@ #include "ompi/mpi/fortran/mpif-h/bindings.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILE_LAYER +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak PMPI_WIN_LOCK = ompi_win_lock_f #pragma weak pmpi_win_lock = ompi_win_lock_f #pragma weak pmpi_win_lock_ = ompi_win_lock_f @@ -29,7 +32,7 @@ #pragma weak PMPI_Win_lock_f = ompi_win_lock_f #pragma weak PMPI_Win_lock_f08 = ompi_win_lock_f -#elif OMPI_PROFILE_LAYER +#else OMPI_GENERATE_F77_BINDINGS (PMPI_WIN_LOCK, pmpi_win_lock, pmpi_win_lock_, @@ -38,6 +41,7 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_WIN_LOCK, (MPI_Fint *lock_type, MPI_Fint *rank, MPI_Fint *assert, MPI_Fint *win, MPI_Fint *ierr), (lock_type, rank, assert, win, ierr) ) #endif +#endif #if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_WIN_LOCK = ompi_win_lock_f @@ -47,9 +51,8 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_WIN_LOCK, #pragma weak MPI_Win_lock_f = ompi_win_lock_f #pragma weak MPI_Win_lock_f08 = ompi_win_lock_f -#endif - -#if ! OPAL_HAVE_WEAK_SYMBOLS && ! OMPI_PROFILE_LAYER +#else +#if ! OMPI_BUILD_MPI_PROFILING OMPI_GENERATE_F77_BINDINGS (MPI_WIN_LOCK, mpi_win_lock, mpi_win_lock_, @@ -57,20 +60,19 @@ OMPI_GENERATE_F77_BINDINGS (MPI_WIN_LOCK, ompi_win_lock_f, (MPI_Fint *lock_type, MPI_Fint *rank, MPI_Fint *assert, MPI_Fint *win, MPI_Fint *ierr), (lock_type, rank, assert, win, ierr) ) +#else +#define ompi_win_lock_f pompi_win_lock_f #endif - - -#if OMPI_PROFILE_LAYER && ! OPAL_HAVE_WEAK_SYMBOLS -#include "ompi/mpi/fortran/mpif-h/profile/defines.h" #endif + void ompi_win_lock_f(MPI_Fint *lock_type, MPI_Fint *rank, MPI_Fint *assert, MPI_Fint *win, MPI_Fint *ierr) { int c_ierr; - MPI_Win c_win = MPI_Win_f2c(*win); + MPI_Win c_win = PMPI_Win_f2c(*win); - c_ierr = MPI_Win_lock(OMPI_FINT_2_INT(*lock_type), + c_ierr = PMPI_Win_lock(OMPI_FINT_2_INT(*lock_type), OMPI_FINT_2_INT(*rank), OMPI_FINT_2_INT(*assert), c_win); diff --git a/ompi/mpi/fortran/mpif-h/win_post_f.c b/ompi/mpi/fortran/mpif-h/win_post_f.c index 1c825a24016..05edb0d7ae5 100644 --- a/ompi/mpi/fortran/mpif-h/win_post_f.c +++ b/ompi/mpi/fortran/mpif-h/win_post_f.c @@ -5,15 +5,17 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2011-2012 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -21,7 +23,8 @@ #include "ompi/mpi/fortran/mpif-h/bindings.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILE_LAYER +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak PMPI_WIN_POST = ompi_win_post_f #pragma weak pmpi_win_post = ompi_win_post_f #pragma weak pmpi_win_post_ = ompi_win_post_f @@ -29,7 +32,7 @@ #pragma weak PMPI_Win_post_f = ompi_win_post_f #pragma weak PMPI_Win_post_f08 = ompi_win_post_f -#elif OMPI_PROFILE_LAYER +#else OMPI_GENERATE_F77_BINDINGS (PMPI_WIN_POST, pmpi_win_post, pmpi_win_post_, @@ -38,6 +41,7 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_WIN_POST, (MPI_Fint *group, MPI_Fint *assert, MPI_Fint *win, MPI_Fint *ierr), (group, assert, win, ierr) ) #endif +#endif #if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_WIN_POST = ompi_win_post_f @@ -47,9 +51,8 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_WIN_POST, #pragma weak MPI_Win_post_f = ompi_win_post_f #pragma weak MPI_Win_post_f08 = ompi_win_post_f -#endif - -#if ! OPAL_HAVE_WEAK_SYMBOLS && ! OMPI_PROFILE_LAYER +#else +#if ! OMPI_BUILD_MPI_PROFILING OMPI_GENERATE_F77_BINDINGS (MPI_WIN_POST, mpi_win_post, mpi_win_post_, @@ -57,21 +60,20 @@ OMPI_GENERATE_F77_BINDINGS (MPI_WIN_POST, ompi_win_post_f, (MPI_Fint *group, MPI_Fint *assert, MPI_Fint *win, MPI_Fint *ierr), (group, assert, win, ierr) ) +#else +#define ompi_win_post_f pompi_win_post_f #endif - - -#if OMPI_PROFILE_LAYER && ! OPAL_HAVE_WEAK_SYMBOLS -#include "ompi/mpi/fortran/mpif-h/profile/defines.h" #endif + void ompi_win_post_f(MPI_Fint *group, MPI_Fint *assert, MPI_Fint *win, MPI_Fint *ierr) { int c_ierr; - MPI_Win c_win = MPI_Win_f2c(*win); - MPI_Group c_grp = MPI_Group_f2c(*group); + MPI_Win c_win = PMPI_Win_f2c(*win); + MPI_Group c_grp = PMPI_Group_f2c(*group); - c_ierr = MPI_Win_post(c_grp, + c_ierr = PMPI_Win_post(c_grp, OMPI_FINT_2_INT(*assert), c_win); if (NULL != ierr) *ierr = OMPI_INT_2_FINT(c_ierr); diff --git a/ompi/mpi/fortran/mpif-h/win_set_attr_f.c b/ompi/mpi/fortran/mpif-h/win_set_attr_f.c index 968d0bc2020..7dd9d51f93e 100644 --- a/ompi/mpi/fortran/mpif-h/win_set_attr_f.c +++ b/ompi/mpi/fortran/mpif-h/win_set_attr_f.c @@ -5,15 +5,17 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2011-2012 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -24,7 +26,8 @@ #include "ompi/win/win.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILE_LAYER +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak PMPI_WIN_SET_ATTR = ompi_win_set_attr_f #pragma weak pmpi_win_set_attr = ompi_win_set_attr_f #pragma weak pmpi_win_set_attr_ = ompi_win_set_attr_f @@ -32,7 +35,7 @@ #pragma weak PMPI_Win_set_attr_f = ompi_win_set_attr_f #pragma weak PMPI_Win_set_attr_f08 = ompi_win_set_attr_f -#elif OMPI_PROFILE_LAYER +#else OMPI_GENERATE_F77_BINDINGS (PMPI_WIN_SET_ATTR, pmpi_win_set_attr, pmpi_win_set_attr_, @@ -41,6 +44,7 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_WIN_SET_ATTR, (MPI_Fint *win, MPI_Fint *win_keyval, MPI_Aint *attribute_val, MPI_Fint *ierr), (win, win_keyval, attribute_val, ierr) ) #endif +#endif #if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_WIN_SET_ATTR = ompi_win_set_attr_f @@ -50,9 +54,8 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_WIN_SET_ATTR, #pragma weak MPI_Win_set_attr_f = ompi_win_set_attr_f #pragma weak MPI_Win_set_attr_f08 = ompi_win_set_attr_f -#endif - -#if ! OPAL_HAVE_WEAK_SYMBOLS && ! OMPI_PROFILE_LAYER +#else +#if ! OMPI_BUILD_MPI_PROFILING OMPI_GENERATE_F77_BINDINGS (MPI_WIN_SET_ATTR, mpi_win_set_attr, mpi_win_set_attr_, @@ -60,18 +63,16 @@ OMPI_GENERATE_F77_BINDINGS (MPI_WIN_SET_ATTR, ompi_win_set_attr_f, (MPI_Fint *win, MPI_Fint *win_keyval, MPI_Aint *attribute_val, MPI_Fint *ierr), (win, win_keyval, attribute_val, ierr) ) +#else +#define ompi_win_set_attr_f pompi_win_set_attr_f #endif - - -#if OMPI_PROFILE_LAYER && ! OPAL_HAVE_WEAK_SYMBOLS -#include "ompi/mpi/fortran/mpif-h/profile/defines.h" #endif void ompi_win_set_attr_f(MPI_Fint *win, MPI_Fint *win_keyval, MPI_Aint *attribute_val, MPI_Fint *ierr) { int c_ierr; - MPI_Win c_win = MPI_Win_f2c(*win); + MPI_Win c_win = PMPI_Win_f2c(*win); /* This stuff is very confusing. Be sure to see the comment at the top of src/attributes/attributes.c. */ @@ -79,7 +80,7 @@ void ompi_win_set_attr_f(MPI_Fint *win, MPI_Fint *win_keyval, c_ierr = ompi_attr_set_fortran_mpi2(WIN_ATTR, c_win, &c_win->w_keyhash, - OMPI_FINT_2_INT(*win_keyval), + OMPI_FINT_2_INT(*win_keyval), *attribute_val, false); if (NULL != ierr) *ierr = OMPI_INT_2_FINT(c_ierr); diff --git a/ompi/mpi/fortran/mpif-h/win_set_errhandler_f.c b/ompi/mpi/fortran/mpif-h/win_set_errhandler_f.c index 180d46aa810..d017f8293dc 100644 --- a/ompi/mpi/fortran/mpif-h/win_set_errhandler_f.c +++ b/ompi/mpi/fortran/mpif-h/win_set_errhandler_f.c @@ -5,15 +5,17 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2008-2012 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -22,7 +24,8 @@ #include "ompi/mpi/fortran/mpif-h/bindings.h" #include "ompi/errhandler/errhandler.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILE_LAYER +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak PMPI_WIN_SET_ERRHANDLER = ompi_win_set_errhandler_f #pragma weak pmpi_win_set_errhandler = ompi_win_set_errhandler_f #pragma weak pmpi_win_set_errhandler_ = ompi_win_set_errhandler_f @@ -30,7 +33,7 @@ #pragma weak PMPI_Win_set_errhandler_f = ompi_win_set_errhandler_f #pragma weak PMPI_Win_set_errhandler_f08 = ompi_win_set_errhandler_f -#elif OMPI_PROFILE_LAYER +#else OMPI_GENERATE_F77_BINDINGS (PMPI_WIN_SET_ERRHANDLER, pmpi_win_set_errhandler, pmpi_win_set_errhandler_, @@ -39,6 +42,7 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_WIN_SET_ERRHANDLER, (MPI_Fint *win, MPI_Fint *errhandler, MPI_Fint *ierr), (win, errhandler, ierr) ) #endif +#endif #if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_WIN_SET_ERRHANDLER = ompi_win_set_errhandler_f @@ -48,9 +52,8 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_WIN_SET_ERRHANDLER, #pragma weak MPI_Win_set_errhandler_f = ompi_win_set_errhandler_f #pragma weak MPI_Win_set_errhandler_f08 = ompi_win_set_errhandler_f -#endif - -#if ! OPAL_HAVE_WEAK_SYMBOLS && ! OMPI_PROFILE_LAYER +#else +#if ! OMPI_BUILD_MPI_PROFILING OMPI_GENERATE_F77_BINDINGS (MPI_WIN_SET_ERRHANDLER, mpi_win_set_errhandler, mpi_win_set_errhandler_, @@ -58,20 +61,19 @@ OMPI_GENERATE_F77_BINDINGS (MPI_WIN_SET_ERRHANDLER, ompi_win_set_errhandler_f, (MPI_Fint *win, MPI_Fint *errhandler, MPI_Fint *ierr), (win, errhandler, ierr) ) +#else +#define ompi_win_set_errhandler_f pompi_win_set_errhandler_f #endif - - -#if OMPI_PROFILE_LAYER && ! OPAL_HAVE_WEAK_SYMBOLS -#include "ompi/mpi/fortran/mpif-h/profile/defines.h" #endif + void ompi_win_set_errhandler_f(MPI_Fint *win, MPI_Fint *errhandler, MPI_Fint *ierr) { int c_ierr; - MPI_Win c_win = MPI_Win_f2c(*win); - MPI_Errhandler c_err = MPI_Errhandler_f2c(*errhandler); + MPI_Win c_win = PMPI_Win_f2c(*win); + MPI_Errhandler c_err = PMPI_Errhandler_f2c(*errhandler); - c_ierr = MPI_Win_set_errhandler(c_win, c_err); + c_ierr = PMPI_Win_set_errhandler(c_win, c_err); if (NULL != ierr) *ierr = OMPI_INT_2_FINT(c_ierr); } diff --git a/ompi/mpi/fortran/mpif-h/win_set_info_f.c b/ompi/mpi/fortran/mpif-h/win_set_info_f.c index 95755c3e155..65773fb33eb 100644 --- a/ompi/mpi/fortran/mpif-h/win_set_info_f.c +++ b/ompi/mpi/fortran/mpif-h/win_set_info_f.c @@ -2,9 +2,9 @@ * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -12,15 +12,16 @@ #include "ompi/mpi/fortran/mpif-h/bindings.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILE_LAYER +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak PMPI_WIN_SET_INFO = ompi_win_set_info_f #pragma weak pmpi_win_set_info = ompi_win_set_info_f #pragma weak pmpi_win_set_info_ = ompi_win_set_info_f #pragma weak pmpi_win_set_info__ = ompi_win_set_info_f -#pragma weak PMPI_Win_create_f = ompi_win_set_info_f -#pragma weak PMPI_Win_create_f08 = ompi_win_set_info_f -#elif OMPI_PROFILE_LAYER +#pragma weak PMPI_Win_set_info_f = ompi_win_set_info_f +#pragma weak PMPI_Win_set_info_f08 = ompi_win_set_info_f +#else OMPI_GENERATE_F77_BINDINGS (PMPI_WIN_SET_INFO, pmpi_win_set_info, pmpi_win_set_info_, @@ -29,6 +30,7 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_WIN_SET_INFO, (MPI_Fint *win, MPI_Fint *info, MPI_Fint *ierr), (win, info, ierr) ) #endif +#endif #if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_WIN_SET_INFO = ompi_win_set_info_f @@ -36,11 +38,10 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_WIN_SET_INFO, #pragma weak mpi_win_set_info_ = ompi_win_set_info_f #pragma weak mpi_win_set_info__ = ompi_win_set_info_f -#pragma weak MPI_Win_create_f = ompi_win_set_info_f -#pragma weak MPI_Win_create_f08 = ompi_win_set_info_f -#endif - -#if ! OPAL_HAVE_WEAK_SYMBOLS && ! OMPI_PROFILE_LAYER +#pragma weak MPI_Win_set_info_f = ompi_win_set_info_f +#pragma weak MPI_Win_set_info_f08 = ompi_win_set_info_f +#else +#if ! OMPI_BUILD_MPI_PROFILING OMPI_GENERATE_F77_BINDINGS (MPI_WIN_SET_INFO, mpi_win_set_info, mpi_win_set_info_, @@ -48,21 +49,20 @@ OMPI_GENERATE_F77_BINDINGS (MPI_WIN_SET_INFO, ompi_win_set_info_f, (MPI_Fint *win, MPI_Fint *info, MPI_Fint *ierr), (win, info, ierr) ) +#else +#define ompi_win_set_info_f pompi_win_set_info_f #endif - - -#if OMPI_PROFILE_LAYER && ! OPAL_HAVE_WEAK_SYMBOLS -#include "ompi/mpi/fortran/mpif-h/profile/defines.h" #endif + void ompi_win_set_info_f(MPI_Fint *win, MPI_Fint *info, MPI_Fint *ierr) { int c_ierr; MPI_Win c_win; MPI_Info c_info; - c_win = MPI_Win_f2c(*win); - c_info = MPI_Info_f2c(*info); - c_ierr = MPI_Win_set_info(c_win, c_info); + c_win = PMPI_Win_f2c(*win); + c_info = PMPI_Info_f2c(*info); + c_ierr = PMPI_Win_set_info(c_win, c_info); if (NULL != ierr) *ierr = OMPI_INT_2_FINT(c_ierr); } diff --git a/ompi/mpi/fortran/mpif-h/win_set_name_f.c b/ompi/mpi/fortran/mpif-h/win_set_name_f.c index 4585f04e9c1..ccec5e41eb3 100644 --- a/ompi/mpi/fortran/mpif-h/win_set_name_f.c +++ b/ompi/mpi/fortran/mpif-h/win_set_name_f.c @@ -5,15 +5,17 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2011-2012 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -24,7 +26,8 @@ #include "ompi/mpi/fortran/base/strings.h" #include "ompi/communicator/communicator.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILE_LAYER +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak PMPI_WIN_SET_NAME = ompi_win_set_name_f #pragma weak pmpi_win_set_name = ompi_win_set_name_f #pragma weak pmpi_win_set_name_ = ompi_win_set_name_f @@ -32,7 +35,7 @@ #pragma weak PMPI_Win_set_name_f = ompi_win_set_name_f #pragma weak PMPI_Win_set_name_f08 = ompi_win_set_name_f -#elif OMPI_PROFILE_LAYER +#else OMPI_GENERATE_F77_BINDINGS (PMPI_WIN_SET_NAME, pmpi_win_set_name, pmpi_win_set_name_, @@ -41,6 +44,7 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_WIN_SET_NAME, (MPI_Fint *win, char *win_name, MPI_Fint *ierr, int name_len), (win, win_name, ierr, name_len) ) #endif +#endif #if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_WIN_SET_NAME = ompi_win_set_name_f @@ -50,9 +54,8 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_WIN_SET_NAME, #pragma weak MPI_Win_set_name_f = ompi_win_set_name_f #pragma weak MPI_Win_set_name_f08 = ompi_win_set_name_f -#endif - -#if ! OPAL_HAVE_WEAK_SYMBOLS && ! OMPI_PROFILE_LAYER +#else +#if ! OMPI_BUILD_MPI_PROFILING OMPI_GENERATE_F77_BINDINGS (MPI_WIN_SET_NAME, mpi_win_set_name, mpi_win_set_name_, @@ -60,13 +63,12 @@ OMPI_GENERATE_F77_BINDINGS (MPI_WIN_SET_NAME, ompi_win_set_name_f, (MPI_Fint *win, char *win_name, MPI_Fint *ierr, int name_len), (win, win_name, ierr, name_len) ) +#else +#define ompi_win_set_name_f pompi_win_set_name_f #endif - - -#if OMPI_PROFILE_LAYER && ! OPAL_HAVE_WEAK_SYMBOLS -#include "ompi/mpi/fortran/mpif-h/profile/defines.h" #endif + void ompi_win_set_name_f(MPI_Fint *win, char *win_name, MPI_Fint *ierr, int name_len) { @@ -74,7 +76,7 @@ void ompi_win_set_name_f(MPI_Fint *win, char *win_name, MPI_Fint *ierr, char *c_name; MPI_Win c_win; - c_win = MPI_Win_f2c(*win); + c_win = PMPI_Win_f2c(*win); /* Convert the fortran string */ @@ -88,7 +90,7 @@ void ompi_win_set_name_f(MPI_Fint *win, char *win_name, MPI_Fint *ierr, /* Call the C function */ - c_ierr = MPI_Win_set_name(c_win, c_name); + c_ierr = PMPI_Win_set_name(c_win, c_name); if (NULL != ierr) *ierr = OMPI_INT_2_FINT(c_ierr); /* Free the C name */ diff --git a/ompi/mpi/fortran/mpif-h/win_shared_query_f.c b/ompi/mpi/fortran/mpif-h/win_shared_query_f.c index c9db9c82828..d398c4529b6 100644 --- a/ompi/mpi/fortran/mpif-h/win_shared_query_f.c +++ b/ompi/mpi/fortran/mpif-h/win_shared_query_f.c @@ -10,6 +10,8 @@ * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2011-2014 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -21,7 +23,8 @@ #include "ompi/mpi/fortran/mpif-h/bindings.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILE_LAYER +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak PMPI_WIN_SHARED_QUERY = ompi_win_shared_query_f #pragma weak pmpi_win_shared_query = ompi_win_shared_query_f #pragma weak pmpi_win_shared_query_ = ompi_win_shared_query_f @@ -37,7 +40,7 @@ #pragma weak PMPI_Win_shared_query_cptr_f = ompi_win_shared_query_f #pragma weak PMPI_Win_shared_query_cptr_f08 = ompi_win_shared_query_f -#elif OMPI_PROFILE_LAYER +#else OMPI_GENERATE_F77_BINDINGS (PMPI_WIN_SHARED_QUERY, pmpi_win_shared_query, pmpi_win_shared_query_, @@ -58,6 +61,7 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_WIN_SHARED_QUERY_CPTR, MPI_Fint *ierr), (win, rank, size, disp_unit, baseptr, ierr) ) #endif +#endif #if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_WIN_SHARED_QUERY = ompi_win_shared_query_f @@ -75,9 +79,8 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_WIN_SHARED_QUERY_CPTR, #pragma weak MPI_Win_shared_query_cptr_f = ompi_win_shared_query_f #pragma weak MPI_Win_shared_query_cptr_f08 = ompi_win_shared_query_f -#endif - -#if ! OPAL_HAVE_WEAK_SYMBOLS && ! OMPI_PROFILE_LAYER +#else +#if ! OMPI_BUILD_MPI_PROFILING OMPI_GENERATE_F77_BINDINGS (MPI_WIN_SHARED_QUERY, mpi_win_shared_query, mpi_win_shared_query_, @@ -97,13 +100,13 @@ OMPI_GENERATE_F77_BINDINGS (MPI_WIN_SHARED_QUERY_CPTR, MPI_Fint *disp_unit, char *baseptr, MPI_Fint *ierr), (win, rank, size, disp_unit, baseptr, ierr) ) +#else +#define ompi_win_shared_query_f pompi_win_shared_query_f +#define ompi_win_shared_query_cptr_f pompi_win_shared_query_cptr_f #endif - - -#if OMPI_PROFILE_LAYER && ! OPAL_HAVE_WEAK_SYMBOLS -#include "ompi/mpi/fortran/mpif-h/profile/defines.h" #endif + void ompi_win_shared_query_f(MPI_Fint *win, MPI_Fint *rank, MPI_Aint *size, MPI_Fint *disp_unit, char *baseptr, MPI_Fint *ierr) @@ -111,9 +114,9 @@ void ompi_win_shared_query_f(MPI_Fint *win, MPI_Fint *rank, MPI_Aint *size, int c_ierr; MPI_Win c_win; - c_win = MPI_Win_f2c(*win); + c_win = PMPI_Win_f2c(*win); - c_ierr = MPI_Win_shared_query(c_win, OMPI_FINT_2_INT(*rank), size, + c_ierr = PMPI_Win_shared_query(c_win, OMPI_FINT_2_INT(*rank), size, OMPI_FINT_2_INT(disp_unit), baseptr); if (NULL != ierr) *ierr = OMPI_INT_2_FINT(c_ierr); } diff --git a/ompi/mpi/fortran/mpif-h/win_start_f.c b/ompi/mpi/fortran/mpif-h/win_start_f.c index 920ec0e7fdd..efcf5ecb695 100644 --- a/ompi/mpi/fortran/mpif-h/win_start_f.c +++ b/ompi/mpi/fortran/mpif-h/win_start_f.c @@ -5,15 +5,17 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2011-2012 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -21,7 +23,8 @@ #include "ompi/mpi/fortran/mpif-h/bindings.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILE_LAYER +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak PMPI_WIN_START = ompi_win_start_f #pragma weak pmpi_win_start = ompi_win_start_f #pragma weak pmpi_win_start_ = ompi_win_start_f @@ -29,7 +32,7 @@ #pragma weak PMPI_Win_start_f = ompi_win_start_f #pragma weak PMPI_Win_start_f08 = ompi_win_start_f -#elif OMPI_PROFILE_LAYER +#else OMPI_GENERATE_F77_BINDINGS (PMPI_WIN_START, pmpi_win_start, pmpi_win_start_, @@ -38,6 +41,7 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_WIN_START, (MPI_Fint *group, MPI_Fint *assert, MPI_Fint *win, MPI_Fint *ierr), (group, assert, win, ierr) ) #endif +#endif #if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_WIN_START = ompi_win_start_f @@ -47,9 +51,8 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_WIN_START, #pragma weak MPI_Win_start_f = ompi_win_start_f #pragma weak MPI_Win_start_f08 = ompi_win_start_f -#endif - -#if ! OPAL_HAVE_WEAK_SYMBOLS && ! OMPI_PROFILE_LAYER +#else +#if ! OMPI_BUILD_MPI_PROFILING OMPI_GENERATE_F77_BINDINGS (MPI_WIN_START, mpi_win_start, mpi_win_start_, @@ -57,21 +60,20 @@ OMPI_GENERATE_F77_BINDINGS (MPI_WIN_START, ompi_win_start_f, (MPI_Fint *group, MPI_Fint *assert, MPI_Fint *win, MPI_Fint *ierr), (group, assert, win, ierr) ) +#else +#define ompi_win_start_f pompi_win_start_f #endif - - -#if OMPI_PROFILE_LAYER && ! OPAL_HAVE_WEAK_SYMBOLS -#include "ompi/mpi/fortran/mpif-h/profile/defines.h" #endif + void ompi_win_start_f(MPI_Fint *group, MPI_Fint *assert, MPI_Fint *win, MPI_Fint *ierr) { int c_ierr; - MPI_Group c_grp = MPI_Group_f2c(*group); - MPI_Win c_win = MPI_Win_f2c(*win); + MPI_Group c_grp = PMPI_Group_f2c(*group); + MPI_Win c_win = PMPI_Win_f2c(*win); - c_ierr = MPI_Win_start(c_grp, + c_ierr = PMPI_Win_start(c_grp, OMPI_FINT_2_INT(*assert), c_win); if (NULL != ierr) *ierr = OMPI_INT_2_FINT(c_ierr); diff --git a/ompi/mpi/fortran/mpif-h/win_sync_f.c b/ompi/mpi/fortran/mpif-h/win_sync_f.c index e86ee2ae3b2..eed1116d598 100644 --- a/ompi/mpi/fortran/mpif-h/win_sync_f.c +++ b/ompi/mpi/fortran/mpif-h/win_sync_f.c @@ -10,6 +10,8 @@ * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2011-2014 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -21,7 +23,8 @@ #include "ompi/mpi/fortran/mpif-h/bindings.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILE_LAYER +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak PMPI_WIN_SYNC = ompi_win_sync_f #pragma weak pmpi_win_sync = ompi_win_sync_f #pragma weak pmpi_win_sync_ = ompi_win_sync_f @@ -29,7 +32,7 @@ #pragma weak PMPI_Win_sync_f = ompi_win_sync_f #pragma weak PMPI_Win_sync_f08 = ompi_win_sync_f -#elif OMPI_PROFILE_LAYER +#else OMPI_GENERATE_F77_BINDINGS (PMPI_WIN_SYNC, pmpi_win_sync, pmpi_win_sync_, @@ -38,6 +41,7 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_WIN_SYNC, (MPI_Fint *win, MPI_Fint *ierr), (win, ierr) ) #endif +#endif #if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_WIN_SYNC = ompi_win_sync_f @@ -47,9 +51,8 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_WIN_SYNC, #pragma weak MPI_Win_sync_f = ompi_win_sync_f #pragma weak MPI_Win_sync_f08 = ompi_win_sync_f -#endif - -#if ! OPAL_HAVE_WEAK_SYMBOLS && ! OMPI_PROFILE_LAYER +#else +#if ! OMPI_BUILD_MPI_PROFILING OMPI_GENERATE_F77_BINDINGS (MPI_WIN_SYNC, mpi_win_sync, mpi_win_sync_, @@ -57,18 +60,17 @@ OMPI_GENERATE_F77_BINDINGS (MPI_WIN_SYNC, ompi_win_sync_f, (MPI_Fint *win, MPI_Fint *ierr), (win, ierr) ) +#else +#define ompi_win_sync_f pompi_win_sync_f #endif - - -#if OMPI_PROFILE_LAYER && ! OPAL_HAVE_WEAK_SYMBOLS -#include "ompi/mpi/fortran/mpif-h/profile/defines.h" #endif + void ompi_win_sync_f(MPI_Fint *win, MPI_Fint *ierr) { int c_ierr; - MPI_Win c_win = MPI_Win_f2c(*win); + MPI_Win c_win = PMPI_Win_f2c(*win); - c_ierr = MPI_Win_sync(c_win); + c_ierr = PMPI_Win_sync(c_win); if (NULL != ierr) *ierr = OMPI_INT_2_FINT(c_ierr); } diff --git a/ompi/mpi/fortran/mpif-h/win_test_f.c b/ompi/mpi/fortran/mpif-h/win_test_f.c index 19b0f5f85b5..27ed978bc7c 100644 --- a/ompi/mpi/fortran/mpif-h/win_test_f.c +++ b/ompi/mpi/fortran/mpif-h/win_test_f.c @@ -5,15 +5,17 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2011-2012 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -21,7 +23,8 @@ #include "ompi/mpi/fortran/mpif-h/bindings.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILE_LAYER +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak PMPI_WIN_TEST = ompi_win_test_f #pragma weak pmpi_win_test = ompi_win_test_f #pragma weak pmpi_win_test_ = ompi_win_test_f @@ -29,7 +32,7 @@ #pragma weak PMPI_Win_test_f = ompi_win_test_f #pragma weak PMPI_Win_test_f08 = ompi_win_test_f -#elif OMPI_PROFILE_LAYER +#else OMPI_GENERATE_F77_BINDINGS (PMPI_WIN_TEST, pmpi_win_test, pmpi_win_test_, @@ -38,6 +41,7 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_WIN_TEST, (MPI_Fint *win, ompi_fortran_logical_t *flag, MPI_Fint *ierr), (win, flag, ierr) ) #endif +#endif #if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_WIN_TEST = ompi_win_test_f @@ -47,9 +51,8 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_WIN_TEST, #pragma weak MPI_Win_test_f = ompi_win_test_f #pragma weak MPI_Win_test_f08 = ompi_win_test_f -#endif - -#if ! OPAL_HAVE_WEAK_SYMBOLS && ! OMPI_PROFILE_LAYER +#else +#if ! OMPI_BUILD_MPI_PROFILING OMPI_GENERATE_F77_BINDINGS (MPI_WIN_TEST, mpi_win_test, mpi_win_test_, @@ -57,20 +60,19 @@ OMPI_GENERATE_F77_BINDINGS (MPI_WIN_TEST, ompi_win_test_f, (MPI_Fint *win, ompi_fortran_logical_t *flag, MPI_Fint *ierr), (win, flag, ierr) ) +#else +#define ompi_win_test_f pompi_win_test_f #endif - - -#if OMPI_PROFILE_LAYER && ! OPAL_HAVE_WEAK_SYMBOLS -#include "ompi/mpi/fortran/mpif-h/profile/defines.h" #endif + void ompi_win_test_f(MPI_Fint *win, ompi_fortran_logical_t *flag, MPI_Fint *ierr) { int c_ierr; - MPI_Win c_win = MPI_Win_f2c(*win); + MPI_Win c_win = PMPI_Win_f2c(*win); OMPI_LOGICAL_NAME_DECL(flag); - c_ierr = MPI_Win_test(c_win, OMPI_LOGICAL_SINGLE_NAME_CONVERT(flag)); + c_ierr = PMPI_Win_test(c_win, OMPI_LOGICAL_SINGLE_NAME_CONVERT(flag)); if (NULL != ierr) *ierr = OMPI_INT_2_FINT(c_ierr); if (MPI_SUCCESS == c_ierr) { diff --git a/ompi/mpi/fortran/mpif-h/win_unlock_all_f.c b/ompi/mpi/fortran/mpif-h/win_unlock_all_f.c index 68adcf39787..efad4fbad3b 100644 --- a/ompi/mpi/fortran/mpif-h/win_unlock_all_f.c +++ b/ompi/mpi/fortran/mpif-h/win_unlock_all_f.c @@ -10,6 +10,8 @@ * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2011-2014 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -21,7 +23,8 @@ #include "ompi/mpi/fortran/mpif-h/bindings.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILE_LAYER +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak PMPI_WIN_UNLOCK_ALL = ompi_win_unlock_all_f #pragma weak pmpi_win_unlock_all = ompi_win_unlock_all_f #pragma weak pmpi_win_unlock_all_ = ompi_win_unlock_all_f @@ -29,7 +32,7 @@ #pragma weak PMPI_Win_unlock_all_f = ompi_win_unlock_all_f #pragma weak PMPI_Win_unlock_all_f08 = ompi_win_unlock_all_f -#elif OMPI_PROFILE_LAYER +#else OMPI_GENERATE_F77_BINDINGS (PMPI_WIN_UNLOCK_ALL, pmpi_win_unlock_all, pmpi_win_unlock_all_, @@ -38,6 +41,7 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_WIN_UNLOCK_ALL, (MPI_Fint *win, MPI_Fint *ierr), (win, ierr) ) #endif +#endif #if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_WIN_UNLOCK_ALL = ompi_win_unlock_all_f @@ -47,9 +51,8 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_WIN_UNLOCK_ALL, #pragma weak MPI_Win_unlock_all_f = ompi_win_unlock_all_f #pragma weak MPI_Win_unlock_all_f08 = ompi_win_unlock_all_f -#endif - -#if ! OPAL_HAVE_WEAK_SYMBOLS && ! OMPI_PROFILE_LAYER +#else +#if ! OMPI_BUILD_MPI_PROFILING OMPI_GENERATE_F77_BINDINGS (MPI_WIN_UNLOCK_ALL, mpi_win_unlock_all, mpi_win_unlock_all_, @@ -57,18 +60,17 @@ OMPI_GENERATE_F77_BINDINGS (MPI_WIN_UNLOCK_ALL, ompi_win_unlock_all_f, (MPI_Fint *win, MPI_Fint *ierr), (win, ierr) ) +#else +#define ompi_win_unlock_all_f pompi_win_unlock_all_f #endif - - -#if OMPI_PROFILE_LAYER && ! OPAL_HAVE_WEAK_SYMBOLS -#include "ompi/mpi/fortran/mpif-h/profile/defines.h" #endif + void ompi_win_unlock_all_f(MPI_Fint *win, MPI_Fint *ierr) { int c_ierr; - MPI_Win c_win = MPI_Win_f2c(*win); + MPI_Win c_win = PMPI_Win_f2c(*win); - c_ierr = MPI_Win_unlock_all(c_win); + c_ierr = PMPI_Win_unlock_all(c_win); if (NULL != ierr) *ierr = OMPI_INT_2_FINT(c_ierr); } diff --git a/ompi/mpi/fortran/mpif-h/win_unlock_f.c b/ompi/mpi/fortran/mpif-h/win_unlock_f.c index 4c0c84a9509..7e2a537b908 100644 --- a/ompi/mpi/fortran/mpif-h/win_unlock_f.c +++ b/ompi/mpi/fortran/mpif-h/win_unlock_f.c @@ -5,15 +5,17 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2011-2012 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -21,7 +23,8 @@ #include "ompi/mpi/fortran/mpif-h/bindings.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILE_LAYER +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak PMPI_WIN_UNLOCK = ompi_win_unlock_f #pragma weak pmpi_win_unlock = ompi_win_unlock_f #pragma weak pmpi_win_unlock_ = ompi_win_unlock_f @@ -29,7 +32,7 @@ #pragma weak PMPI_Win_unlock_f = ompi_win_unlock_f #pragma weak PMPI_Win_unlock_f08 = ompi_win_unlock_f -#elif OMPI_PROFILE_LAYER +#else OMPI_GENERATE_F77_BINDINGS (PMPI_WIN_UNLOCK, pmpi_win_unlock, pmpi_win_unlock_, @@ -38,6 +41,7 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_WIN_UNLOCK, (MPI_Fint *rank, MPI_Fint *win, MPI_Fint *ierr), (rank, win, ierr) ) #endif +#endif #if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_WIN_UNLOCK = ompi_win_unlock_f @@ -47,9 +51,8 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_WIN_UNLOCK, #pragma weak MPI_Win_unlock_f = ompi_win_unlock_f #pragma weak MPI_Win_unlock_f08 = ompi_win_unlock_f -#endif - -#if ! OPAL_HAVE_WEAK_SYMBOLS && ! OMPI_PROFILE_LAYER +#else +#if ! OMPI_BUILD_MPI_PROFILING OMPI_GENERATE_F77_BINDINGS (MPI_WIN_UNLOCK, mpi_win_unlock, mpi_win_unlock_, @@ -57,18 +60,17 @@ OMPI_GENERATE_F77_BINDINGS (MPI_WIN_UNLOCK, ompi_win_unlock_f, (MPI_Fint *rank, MPI_Fint *win, MPI_Fint *ierr), (rank, win, ierr) ) +#else +#define ompi_win_unlock_f pompi_win_unlock_f #endif - - -#if OMPI_PROFILE_LAYER && ! OPAL_HAVE_WEAK_SYMBOLS -#include "ompi/mpi/fortran/mpif-h/profile/defines.h" #endif + void ompi_win_unlock_f(MPI_Fint *rank, MPI_Fint *win, MPI_Fint *ierr) { int c_ierr; - MPI_Win c_win = MPI_Win_f2c(*win); + MPI_Win c_win = PMPI_Win_f2c(*win); - c_ierr = MPI_Win_unlock(OMPI_FINT_2_INT(*rank), c_win); + c_ierr = PMPI_Win_unlock(OMPI_FINT_2_INT(*rank), c_win); if (NULL != ierr) *ierr = OMPI_INT_2_FINT(c_ierr); } diff --git a/ompi/mpi/fortran/mpif-h/win_wait_f.c b/ompi/mpi/fortran/mpif-h/win_wait_f.c index c0c1b605441..9fe7f5d5201 100644 --- a/ompi/mpi/fortran/mpif-h/win_wait_f.c +++ b/ompi/mpi/fortran/mpif-h/win_wait_f.c @@ -5,15 +5,17 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2011-2012 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -21,7 +23,8 @@ #include "ompi/mpi/fortran/mpif-h/bindings.h" -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILE_LAYER +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak PMPI_WIN_WAIT = ompi_win_wait_f #pragma weak pmpi_win_wait = ompi_win_wait_f #pragma weak pmpi_win_wait_ = ompi_win_wait_f @@ -29,7 +32,7 @@ #pragma weak PMPI_Win_wait_f = ompi_win_wait_f #pragma weak PMPI_Win_wait_f08 = ompi_win_wait_f -#elif OMPI_PROFILE_LAYER +#else OMPI_GENERATE_F77_BINDINGS (PMPI_WIN_WAIT, pmpi_win_wait, pmpi_win_wait_, @@ -38,6 +41,7 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_WIN_WAIT, (MPI_Fint *win, MPI_Fint *ierr), (win, ierr) ) #endif +#endif #if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_WIN_WAIT = ompi_win_wait_f @@ -47,9 +51,8 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_WIN_WAIT, #pragma weak MPI_Win_wait_f = ompi_win_wait_f #pragma weak MPI_Win_wait_f08 = ompi_win_wait_f -#endif - -#if ! OPAL_HAVE_WEAK_SYMBOLS && ! OMPI_PROFILE_LAYER +#else +#if ! OMPI_BUILD_MPI_PROFILING OMPI_GENERATE_F77_BINDINGS (MPI_WIN_WAIT, mpi_win_wait, mpi_win_wait_, @@ -57,18 +60,17 @@ OMPI_GENERATE_F77_BINDINGS (MPI_WIN_WAIT, ompi_win_wait_f, (MPI_Fint *win, MPI_Fint *ierr), (win, ierr) ) +#else +#define ompi_win_wait_f pompi_win_wait_f #endif - - -#if OMPI_PROFILE_LAYER && ! OPAL_HAVE_WEAK_SYMBOLS -#include "ompi/mpi/fortran/mpif-h/profile/defines.h" #endif + void ompi_win_wait_f(MPI_Fint *win, MPI_Fint *ierr) { int c_ierr; - MPI_Win c_win = MPI_Win_f2c(*win); + MPI_Win c_win = PMPI_Win_f2c(*win); - c_ierr = MPI_Win_wait(c_win); + c_ierr = PMPI_Win_wait(c_win); if (NULL != ierr) *ierr = OMPI_INT_2_FINT(c_ierr); } diff --git a/ompi/mpi/fortran/mpif-h/wtick_f.c b/ompi/mpi/fortran/mpif-h/wtick_f.c index 77bf8efe001..24da17bfa07 100644 --- a/ompi/mpi/fortran/mpif-h/wtick_f.c +++ b/ompi/mpi/fortran/mpif-h/wtick_f.c @@ -1,3 +1,4 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ /* * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana * University Research and Technology @@ -5,15 +6,19 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2011-2012 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Los Alamos National Security, LLC. All rights + * reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -22,10 +27,12 @@ #include "ompi/mpi/fortran/mpif-h/bindings.h" /* The OMPI_GENERATE_F77_BINDINGS work only for the most common F77 bindings, the - * one that does not return any value. There are 2 exceptions MPI_Wtick and MPI_Wtime. - * For these 2 we can insert the bindings manually. + * one that does not return any value. There are 4 exceptions MPI_Wtick, MPI_Wtime, + * MPI_Aint_add, and MPI_Aint_diff. For these 4 we can insert the bindings + * manually. */ -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILE_LAYER +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak PMPI_WTICK = ompi_wtick_f #pragma weak pmpi_wtick = ompi_wtick_f #pragma weak pmpi_wtick_ = ompi_wtick_f @@ -33,12 +40,13 @@ #pragma weak PMPI_Wtick_f = ompi_wtick_f #pragma weak PMPI_Wtick_f08 = ompi_wtick_f -#elif OMPI_PROFILE_LAYER +#else double PMPI_WTICK(void) { return pompi_wtick_f(); } double pmpi_wtick(void) { return pompi_wtick_f(); } double pmpi_wtick_(void) { return pompi_wtick_f(); } double pmpi_wtick__(void) { return pompi_wtick_f(); } #endif +#endif #if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_WTICK = ompi_wtick_f @@ -48,21 +56,19 @@ double pmpi_wtick__(void) { return pompi_wtick_f(); } #pragma weak MPI_Wtick_f = ompi_wtick_f #pragma weak MPI_Wtick_f08 = ompi_wtick_f -#endif - -#if ! OPAL_HAVE_WEAK_SYMBOLS && ! OMPI_PROFILE_LAYER +#else +#if ! OMPI_BUILD_MPI_PROFILING double MPI_WTICK(void) { return ompi_wtick_f(); } double mpi_wtick(void) { return ompi_wtick_f(); } double mpi_wtick_(void) { return ompi_wtick_f(); } double mpi_wtick__(void) { return ompi_wtick_f(); } +#else +#define ompi_wtick_f pompi_wtick_f #endif - - -#if OMPI_PROFILE_LAYER && ! OPAL_HAVE_WEAK_SYMBOLS -#include "ompi/mpi/fortran/mpif-h/profile/defines.h" #endif + double ompi_wtick_f(void) { - return MPI_Wtick(); + return PMPI_Wtick(); } diff --git a/ompi/mpi/fortran/mpif-h/wtime_f.c b/ompi/mpi/fortran/mpif-h/wtime_f.c index e04f4aded94..ce0f18b5ea7 100644 --- a/ompi/mpi/fortran/mpif-h/wtime_f.c +++ b/ompi/mpi/fortran/mpif-h/wtime_f.c @@ -1,3 +1,4 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ /* * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana * University Research and Technology @@ -5,15 +6,19 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2011-2012 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Los Alamos National Security, LLC. All rights + * reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -22,10 +27,12 @@ #include "ompi/mpi/fortran/mpif-h/bindings.h" /* The OMPI_GENERATE_F77_BINDINGS work only for the most common F77 bindings, the - * one that does not return any value. There are 2 exceptions MPI_Wtick and MPI_Wtime. - * For these 2 we can insert the bindings manually. + * one that does not return any value. There are 4 exceptions MPI_Wtick, MPI_Wtime, + * MPI_Aint_add, and MPI_Aint_diff. For these 4 we can insert the bindings + * manually. */ -#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILE_LAYER +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS #pragma weak PMPI_WTIME = ompi_wtime_f #pragma weak pmpi_wtime = ompi_wtime_f #pragma weak pmpi_wtime_ = ompi_wtime_f @@ -33,12 +40,13 @@ #pragma weak PMPI_Wtime_f = ompi_wtime_f #pragma weak PMPI_Wtime_f08 = ompi_wtime_f -#elif OMPI_PROFILE_LAYER +#else double PMPI_WTIME(void) { return pompi_wtime_f(); } double pmpi_wtime(void) { return pompi_wtime_f(); } double pmpi_wtime_(void) { return pompi_wtime_f(); } double pmpi_wtime__(void) { return pompi_wtime_f(); } #endif +#endif #if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_WTIME = ompi_wtime_f @@ -48,21 +56,19 @@ double pmpi_wtime__(void) { return pompi_wtime_f(); } #pragma weak MPI_Wtime_f = ompi_wtime_f #pragma weak MPI_Wtime_f08 = ompi_wtime_f -#endif - -#if ! OPAL_HAVE_WEAK_SYMBOLS && ! OMPI_PROFILE_LAYER +#else +#if ! OMPI_BUILD_MPI_PROFILING double MPI_WTIME(void) { return ompi_wtime_f(); } double mpi_wtime(void) { return ompi_wtime_f(); } double mpi_wtime_(void) { return ompi_wtime_f(); } double mpi_wtime__(void) { return ompi_wtime_f(); } +#else +#define ompi_wtime_f pompi_wtime_f #endif - - -#if OMPI_PROFILE_LAYER && ! OPAL_HAVE_WEAK_SYMBOLS -#include "ompi/mpi/fortran/mpif-h/profile/defines.h" #endif + double ompi_wtime_f(void) { - return MPI_Wtime(); + return PMPI_Wtime(); } diff --git a/ompi/mpi/fortran/use-mpi-f08-desc/ISO_Fortran_binding.h b/ompi/mpi/fortran/use-mpi-f08-desc/ISO_Fortran_binding.h old mode 100755 new mode 100644 index e8b326ce062..d7879939cbe --- a/ompi/mpi/fortran/use-mpi-f08-desc/ISO_Fortran_binding.h +++ b/ompi/mpi/fortran/use-mpi-f08-desc/ISO_Fortran_binding.h @@ -5,8 +5,8 @@ /* Struct CFI_dim_t for triples of bound, extent and stride information */ typedef struct { - intptr_t lower_bound, - extent, + intptr_t lower_bound, + extent, sm; } CFI_dim_t; @@ -16,20 +16,20 @@ typedef struct { stride; } CFI_bounds_t; - + /* Maximum rank supported by the companion Fortran processor */ /* Changed from 15 to F2003 value of 7 (CER) */ #define CFI_MAX_RANK 7 -/* Struct CFI_cdesc_t for holding all the information about a +/* Struct CFI_cdesc_t for holding all the information about a descriptor-based Fortran object */ typedef struct { void * base_addr; /* base address of object */ size_t elem_len; /* length of one element, in bytes */ int rank; /* object rank, 0 .. CF_MAX_RANK */ - int type; /* identifier for type of object */ + int type; /* identifier for type of object */ int attribute; /* object attribute: 0..2, or -1 */ int state; /* allocation/association state: 0 or 1 */ //Removed (CER) diff --git a/ompi/mpi/fortran/use-mpi-f08-desc/Makefile.am b/ompi/mpi/fortran/use-mpi-f08-desc/Makefile.am index f9f48f4db5e..9e55e5bd36d 100644 --- a/ompi/mpi/fortran/use-mpi-f08-desc/Makefile.am +++ b/ompi/mpi/fortran/use-mpi-f08-desc/Makefile.am @@ -1,11 +1,14 @@ # -*- makefile -*- # # Copyright (c) 2006-2012 Cisco Systems, Inc. All rights reserved. +# Copyright (c) 2015 Research Organization for Information Science +# and Technology (RIST). All rights reserved. +# Copyright (c) 2016 IBM Corporation. All rights reserved. # # $COPYRIGHT$ -# +# # Additional copyrights may follow -# +# # $HEADER$ # @@ -16,7 +19,7 @@ if OMPI_BUILD_FORTRAN_USEMPIF08_BINDINGS AM_FCFLAGS = -I$(top_builddir)/ompi/include -I$(top_srcdir)/ompi/include \ -I$(top_srcdir) $(FCFLAGS) -lib_LTLIBRARIES = libmpi_usempif08.la +lib_LTLIBRARIES = lib@OMPI_LIBMPI_NAME@_usempif08.la # # This list is a subset of the full MPI API used for testing Fortran @@ -33,7 +36,7 @@ mpi_api_files = \ type_contiguous_f08.f90 \ type_vector_f08.f90 -libmpi_usempif08_la_SOURCES = \ +lib@OMPI_LIBMPI_NAME@_usempif08_la_SOURCES = \ $(mpi_api_files) \ mpi-f08-types.f90 \ mpi-f08-interfaces.F90 \ @@ -60,13 +63,21 @@ mpi-f08-types.lo: mpi-f08-types.f90 mpi-f08-interfaces.lo: mpi-f08-interfaces.F90 mpi-f08-types.lo OMPI_Fortran_binding.lo: OMPI_Fortran_binding.f90 mpi-f08-types.lo -$(mpi_api_files): mpi-f08.lo -mpi-f08.lo: mpi-f08-types.lo +# +# Automake doesn't do Fortran dependency analysis, so must list them +# manually here. Bummer! +# + +mpi_api_lo_files = $(mpi_api_files:.f90=.lo) + +$(mpi_api_lo_files): mpi-f08.lo + +mpi-f08.lo: mpi-f08-types.lo mpi-f08.lo: OMPI_Fortran_binding.lo mpi-f08.lo: mpi-f08-interfaces.lo mpi-f08.lo: mpi-f-interfaces-bind.h -mpi-f08.lo: mpi-f08.f90 +mpi-f08.lo: mpi-f08.f90 # Install the generated .mod files. Unfortunately, each F90 compiler # may generate different filenames, so we have to use a glob. :-( diff --git a/ompi/mpi/fortran/use-mpi-f08-desc/OMPI_Fortran_binding_c.c b/ompi/mpi/fortran/use-mpi-f08-desc/OMPI_Fortran_binding_c.c index d7afd5e4789..fbd4e3531d6 100644 --- a/ompi/mpi/fortran/use-mpi-f08-desc/OMPI_Fortran_binding_c.c +++ b/ompi/mpi/fortran/use-mpi-f08-desc/OMPI_Fortran_binding_c.c @@ -60,7 +60,7 @@ size_t numElements(CFI_cdesc_t * desc) { int r; size_t num = 1; - + /* TODO - can have 0 size arrays? */ for (r = 0; r < desc->rank; r++) { @@ -79,7 +79,7 @@ void * copyToContiguous(CFI_cdesc_t * desc, void * cont_buf, size_t offset, int { size_t b, e, num_copied; char * next_out; - + char * in = (char *) desc->base_addr + offset; char * out = (char *) cont_buf; @@ -112,7 +112,7 @@ void * copyFromContiguous(CFI_cdesc_t * desc, void * cont_buf, size_t offset, in { size_t b, e, num_copied; char * next_out; - + char * out = (char *) desc->base_addr + offset; char * in = (char *) cont_buf; @@ -137,12 +137,12 @@ void * copyFromContiguous(CFI_cdesc_t * desc, void * cont_buf, size_t offset, in /* From ../mpif-h/send_f.c */ -void ompi_recv_f(char *buf, MPI_Fint *count, MPI_Fint *datatype, - MPI_Fint *source, MPI_Fint *tag, MPI_Fint *comm, +void ompi_recv_f(char *buf, MPI_Fint *count, MPI_Fint *datatype, + MPI_Fint *source, MPI_Fint *tag, MPI_Fint *comm, MPI_Fint *status, MPI_Fint *ierr); -void ompi_recv_f08_desc_f(CFI_cdesc_t *desc, MPI_Fint *count, MPI_Fint *datatype, - MPI_Fint *source, MPI_Fint *tag, MPI_Fint *comm, +void ompi_recv_f08_desc_f(CFI_cdesc_t *desc, MPI_Fint *count, MPI_Fint *datatype, + MPI_Fint *source, MPI_Fint *tag, MPI_Fint *comm, MPI_Fint *status, MPI_Fint *ierr) { size_t num_bytes = 0; @@ -167,10 +167,10 @@ void ompi_recv_f08_desc_f(CFI_cdesc_t *desc, MPI_Fint *count, MPI_Fint *datatype /* From ../mpif-h/send_f.c */ -void ompi_send_f(char *buf, MPI_Fint *count, MPI_Fint *datatype, +void ompi_send_f(char *buf, MPI_Fint *count, MPI_Fint *datatype, MPI_Fint *dest, MPI_Fint *tag, MPI_Fint *comm, MPI_Fint *ierr); -void ompi_send_f08_desc_f(CFI_cdesc_t *desc, MPI_Fint *count, MPI_Fint *datatype, +void ompi_send_f08_desc_f(CFI_cdesc_t *desc, MPI_Fint *count, MPI_Fint *datatype, MPI_Fint *dest, MPI_Fint *tag, MPI_Fint *comm, MPI_Fint *ierr) { size_t num_bytes = 0; diff --git a/ompi/mpi/fortran/use-mpi-f08-desc/constants.h.fin b/ompi/mpi/fortran/use-mpi-f08-desc/constants.h.fin index 482ad70b523..f6f3a33ba23 100644 --- a/ompi/mpi/fortran/use-mpi-f08-desc/constants.h.fin +++ b/ompi/mpi/fortran/use-mpi-f08-desc/constants.h.fin @@ -12,8 +12,8 @@ * Copyright (c) 2007-2012 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2008-2009 Sun Microsystems, Inc. All rights reserved. * Copyright (c) 2009 Oak Ridge National Labs. All rights reserved. - * Copyright (c) 2009-2012 Los Alamos National Security, LLC. - * All rights reserved. + * Copyright (c) 2009-2012 Los Alamos National Security, LLC. + * All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow diff --git a/ompi/mpi/fortran/use-mpi-f08-desc/mpi-f08-types.f90 b/ompi/mpi/fortran/use-mpi-f08-desc/mpi-f08-types.f90 index dbfcc16b9bb..ec6fbf2ca31 100644 --- a/ompi/mpi/fortran/use-mpi-f08-desc/mpi-f08-types.f90 +++ b/ompi/mpi/fortran/use-mpi-f08-desc/mpi-f08-types.f90 @@ -3,6 +3,8 @@ ! Copyright (c) 2009 Cisco Systems, Inc. All rights reserved. ! Copyright (c) 2009-2012 Los Alamos National Security, LLC. ! All rights reserved. +! Copyright (c) 2015 Research Organization for Information Science +! and Technology (RIST). All rights reserved. ! ! This file creates mappings between MPI C types (e.g., MPI_Comm) and ! variables (e.g., MPI_COMM_WORLD) and corresponding Fortran names @@ -151,7 +153,6 @@ module mpi_f08_types ! ! STATUS/STATUSES_IGNORE ! - type(MPI_STATUS), bind(C, name="mpi_fortran_status_ignore") :: MPI_STATUS_IGNORE - type(MPI_STATUS), bind(C, name="mpi_fortran_statuses_ignore") :: MPI_STATUSES_IGNORE(1) +#include "mpif-f08-types.h" end module mpi_f08_types diff --git a/ompi/mpi/fortran/use-mpi-f08-desc/mpi-f08.f90 b/ompi/mpi/fortran/use-mpi-f08-desc/mpi-f08.f90 index cd4360d7158..ce3b197ed94 100644 --- a/ompi/mpi/fortran/use-mpi-f08-desc/mpi-f08.f90 +++ b/ompi/mpi/fortran/use-mpi-f08-desc/mpi-f08.f90 @@ -14,9 +14,9 @@ ! Copyright (c) 2009-2012 Los Alamos National Security, LLC. ! All rights reserved. ! $COPYRIGHT$ -! +! ! Additional copyrights may follow -! +! ! $HEADER$ ! diff --git a/ompi/mpi/fortran/use-mpi-f08-desc/recv_f08_desc.f90 b/ompi/mpi/fortran/use-mpi-f08-desc/recv_f08_desc.f90 index 042c725e06f..e7ac58d305a 100644 --- a/ompi/mpi/fortran/use-mpi-f08-desc/recv_f08_desc.f90 +++ b/ompi/mpi/fortran/use-mpi-f08-desc/recv_f08_desc.f90 @@ -24,7 +24,7 @@ subroutine MPI_Recv_f08_desc_int_2d(buf,count,datatype,source,tag,comm,status,ie integer :: c_ierror type(CFI_cdesc_t) :: buf_desc - call make_desc_f(buf, buf_desc) + call make_desc_f(buf, buf_desc) !call print_desc(buf_desc) call ompi_recv_f08_desc_f(buf_desc, count, datatype%MPI_VAL, source, tag, comm%MPI_VAL, status, c_ierror) diff --git a/ompi/mpi/fortran/use-mpi-f08-desc/send_f08_desc.f90 b/ompi/mpi/fortran/use-mpi-f08-desc/send_f08_desc.f90 index a9420fa6451..98f76e832cd 100644 --- a/ompi/mpi/fortran/use-mpi-f08-desc/send_f08_desc.f90 +++ b/ompi/mpi/fortran/use-mpi-f08-desc/send_f08_desc.f90 @@ -18,7 +18,7 @@ subroutine MPI_Send_f08_desc_int_2d(buf, count, datatype, dest, tag, comm, ierro type(MPI_Datatype), intent(in) :: datatype type(MPI_Comm), intent(in) :: comm integer, optional, intent(out) :: ierror - + integer :: err type(CFI_cdesc_t) :: buf_desc @@ -42,7 +42,7 @@ subroutine MPI_Send_f08_desc_dbl_1d(buf, count, datatype, dest, tag, comm, ierro type(MPI_Datatype), intent(in) :: datatype type(MPI_Comm), intent(in) :: comm integer, optional, intent(out) :: ierror - + integer :: err print *, "WARNING, testing of double precision arrays not yet supported with subarrays" @@ -62,7 +62,7 @@ subroutine MPI_Send_f08_desc_dbl_0d(buf, count, datatype, dest, tag, comm, ierro type(MPI_Datatype), intent(in) :: datatype type(MPI_Comm), intent(in) :: comm integer, optional, intent(out) :: ierror - + integer :: err print *, "WARNING, testing of double precision arrays not yet supported with subarrays" diff --git a/ompi/mpi/fortran/use-mpi-f08/Makefile.am b/ompi/mpi/fortran/use-mpi-f08/Makefile.am index f5ed4c12d14..75bbf7600d8 100644 --- a/ompi/mpi/fortran/use-mpi-f08/Makefile.am +++ b/ompi/mpi/fortran/use-mpi-f08/Makefile.am @@ -7,13 +7,14 @@ # Copyright (c) 2012-2013 Inria. All rights reserved. # Copyright (c) 2013 Los Alamos National Security, LLC. All rights # reserved. -# Copyright (c) 2015 Research Organization for Information Science +# Copyright (c) 2015-2016 Research Organization for Information Science # and Technology (RIST). All rights reserved. +# Copyright (c) 2016 IBM Corporation. All rights reserved. # # $COPYRIGHT$ -# +# # Additional copyrights may follow -# +# # $HEADER$ # @@ -33,7 +34,7 @@ MOSTLYCLEANFILES = *.mod CLEANFILES += *.i90 -lib_LTLIBRARIES = libmpi_usempif08.la +lib_LTLIBRARIES = lib@OMPI_LIBMPI_NAME@_usempif08.la module_sentinel_file = \ libforce_usempif08_internal_modules_to_be_built.la @@ -43,8 +44,6 @@ noinst_LTLIBRARIES = $(module_sentinel_file) mpi-f08.lo: $(module_sentinel_file) mpi-f08.lo: mpi-f08.F90 mpi-f08.lo: mpi-f-interfaces-bind.h pmpi-f-interfaces-bind.h -mpi-f08.lo: attr-fn-f08-callback-interfaces.h -mpi-f08.lo: conversion-fn-null-f08-interface.h mpi-f08.lo: sizeof_f08.h # @@ -92,6 +91,8 @@ mpi_api_files = \ add_error_class_f08.F90 \ add_error_code_f08.F90 \ add_error_string_f08.F90 \ + aint_add_f08.F90 \ + aint_diff_f08.F90 \ allgather_f08.F90 \ allgatherv_f08.F90 \ alloc_mem_f08.F90 \ @@ -327,6 +328,7 @@ mpi_api_files = \ type_set_attr_f08.F90 \ type_set_name_f08.F90 \ type_size_f08.F90 \ + type_size_x_f08.F90 \ type_vector_f08.F90 \ unpack_external_f08.F90 \ unpack_f08.F90 \ @@ -392,9 +394,13 @@ mpi_api_files += \ file_get_view_f08.F90 \ file_iread_at_f08.F90 \ file_iread_f08.F90 \ + file_iread_at_all_f08.F90 \ + file_iread_all_f08.F90 \ file_iread_shared_f08.F90 \ file_iwrite_at_f08.F90 \ file_iwrite_f08.F90 \ + file_iwrite_at_all_f08.F90 \ + file_iwrite_all_f08.F90 \ file_iwrite_shared_f08.F90 \ file_open_f08.F90 \ file_preallocate_f08.F90 \ @@ -444,6 +450,8 @@ pmpi_api_files = \ profile/padd_error_class_f08.F90 \ profile/padd_error_code_f08.F90 \ profile/padd_error_string_f08.F90 \ + profile/paint_add_f08.F90 \ + profile/paint_diff_f08.F90 \ profile/pallgather_f08.F90 \ profile/pallgatherv_f08.F90 \ profile/palloc_mem_f08.F90 \ @@ -525,6 +533,7 @@ pmpi_api_files = \ profile/pget_elements_f08.F90 \ profile/pget_elements_x_f08.F90 \ profile/pget_f08.F90 \ + profile/pget_library_version_f08.F90 \ profile/pget_processor_name_f08.F90 \ profile/pget_version_f08.F90 \ profile/pgraph_create_f08.F90 \ @@ -689,12 +698,15 @@ pmpi_api_files = \ profile/pwaitsome_f08.F90 \ profile/pwin_allocate_f08.F90 \ profile/pwin_allocate_shared_f08.F90 \ + profile/pwin_attach_f08.F90 \ profile/pwin_call_errhandler_f08.F90 \ profile/pwin_complete_f08.F90 \ + profile/pwin_create_dynamic_f08.F90 \ profile/pwin_create_errhandler_f08.F90 \ profile/pwin_create_f08.F90 \ profile/pwin_create_keyval_f08.F90 \ profile/pwin_delete_attr_f08.F90 \ + profile/pwin_detach_f08.F90 \ profile/pwin_fence_f08.F90 \ profile/pwin_flush_f08.F90 \ profile/pwin_flush_all_f08.F90 \ @@ -705,12 +717,14 @@ pmpi_api_files = \ profile/pwin_get_attr_f08.F90 \ profile/pwin_get_errhandler_f08.F90 \ profile/pwin_get_group_f08.F90 \ + profile/pwin_get_info_f08.F90 \ profile/pwin_get_name_f08.F90 \ profile/pwin_lock_f08.F90 \ profile/pwin_lock_all_f08.F90 \ profile/pwin_post_f08.F90 \ profile/pwin_set_attr_f08.F90 \ profile/pwin_set_errhandler_f08.F90 \ + profile/pwin_set_info_f08.F90 \ profile/pwin_set_name_f08.F90 \ profile/pwin_shared_query_f08.F90 \ profile/pwin_start_f08.F90 \ @@ -780,26 +794,22 @@ pmpi_api_files += \ profile/pregister_datarep_f08.F90 endif -libmpi_usempif08_la_SOURCES = \ +lib@OMPI_LIBMPI_NAME@_usempif08_la_SOURCES = \ $(mpi_api_files) \ $(pmpi_api_files) \ mpi-f-interfaces-bind.h \ pmpi-f-interfaces-bind.h \ - attr-fn-f08-callback-interfaces.h \ - conversion-fn-null-f08-interface.h \ mpi-f08.F90 \ - mpi-f-interfaces-bind.h pmpi-f-interfaces-bind.h \ - attr-fn-f08-callback-interfaces.h \ - conversion-fn-null-f08-interface.h \ + buffer_detach.c \ constants.h \ constants.c # These are generated; do not ship them -nodist_libmpi_usempif08_la_SOURCES = +nodist_lib@OMPI_LIBMPI_NAME@_usempif08_la_SOURCES = if BUILD_FORTRAN_SIZEOF SIZEOF_H = sizeof_f08.h -nodist_libmpi_usempif08_la_SOURCES += \ +nodist_lib@OMPI_LIBMPI_NAME@_usempif08_la_SOURCES += \ sizeof_f08.h \ sizeof_f08.f90 \ profile/psizeof_f08.f90 @@ -809,13 +819,13 @@ endif # Include the mpi_f08-based MPI extensions in libmpi_usempif08, too. # -libmpi_usempif08_la_LIBADD = \ +lib@OMPI_LIBMPI_NAME@_usempif08_la_LIBADD = \ $(module_sentinel_file) \ $(OMPI_MPIEXT_USEMPIF08_LIBS) \ - $(top_builddir)/ompi/mpi/fortran/mpif-h/libmpi_mpifh.la \ - $(top_builddir)/ompi/libmpi.la -libmpi_usempif08_la_DEPENDENCIES = $(module_sentinel_file) -libmpi_usempif08_la_LDFLAGS = -version-info $(libmpi_usempif08_so_version) + $(top_builddir)/ompi/mpi/fortran/mpif-h/lib@OMPI_LIBMPI_NAME@_mpifh.la \ + $(top_builddir)/ompi/lib@OMPI_LIBMPI_NAME@.la +lib@OMPI_LIBMPI_NAME@_usempif08_la_DEPENDENCIES = $(module_sentinel_file) +lib@OMPI_LIBMPI_NAME@_usempif08_la_LDFLAGS = -version-info $(libmpi_usempif08_so_version) # # Automake doesn't do Fortran dependency analysis, so must list them @@ -830,8 +840,6 @@ $(pmpi_api_lo_files): mpi-f08.lo mpi-f08.lo: $(module_sentinel_file) $(SIZEOF_H) mpi-f08.lo: mpi-f-interfaces-bind.h pmpi-f-interfaces-bind.h -mpi-f08.lo: attr-fn-f08-callback-interfaces.h -mpi-f08.lo: conversion-fn-null-f08-interface.h ########################################################################### @@ -841,6 +849,7 @@ libforce_usempif08_internal_modules_to_be_built_la_SOURCES = \ mpi-f08-types.F90 \ mpi-f08-interfaces.F90 \ mpi-f08-interfaces-callbacks.F90 \ + mpi-f08-callbacks.F90 \ pmpi-f08-interfaces.F90 config_h = \ @@ -860,6 +869,9 @@ mpi-f08-interfaces.lo: mpi-f08-interfaces-callbacks.lo mpi-f08-interfaces-callbacks.lo: $(config_h) mpi-f08-interfaces-callbacks.lo: mpi-f08-interfaces-callbacks.F90 mpi-f08-interfaces-callbacks.lo: mpi-f08-types.lo +mpi-f08-callbacks.lo: $(config_h) +mpi-f08-callbacks.lo: mpi-f08-callbacks.F90 +mpi-f08-callbacks.lo: mpi-f08-types.lo pmpi-f08-interfaces.lo: $(config_h) pmpi-f08-interfaces.lo: pmpi-f08-interfaces.F90 pmpi-f08-interfaces.lo: mpi-f08-interfaces-callbacks.lo diff --git a/ompi/mpi/fortran/use-mpi-f08/aint_add_f08.F90 b/ompi/mpi/fortran/use-mpi-f08/aint_add_f08.F90 new file mode 100644 index 00000000000..afe3874d036 --- /dev/null +++ b/ompi/mpi/fortran/use-mpi-f08/aint_add_f08.F90 @@ -0,0 +1,18 @@ +! -*- f90 -*- +! +! Copyright (c) 2010-2015 Cisco Systems, Inc. All rights reserved. +! Copyright (c) 2009-2015 Los Alamos National Security, LLC. +! All Rights reserved. +! $COPYRIGHT$ + +#include "ompi/mpi/fortran/configure-fortran-output.h" + +function MPI_Aint_add_f08(addr1, addr2) + use :: mpi_f08_types, only : MPI_ADDRESS_KIND + use :: mpi_f08, only : ompi_aint_add_f + implicit none + INTEGER(MPI_ADDRESS_KIND) :: MPI_Aint_add_f08 + INTEGER(MPI_ADDRESS_KIND), INTENT(IN) :: addr1 + INTEGER(MPI_ADDRESS_KIND), INTENT(IN) :: addr2 + MPI_Aint_add_f08 = ompi_aint_add_f(addr1, addr2) +end function MPI_Aint_add_f08 diff --git a/ompi/mpi/fortran/use-mpi-f08/aint_diff_f08.F90 b/ompi/mpi/fortran/use-mpi-f08/aint_diff_f08.F90 new file mode 100644 index 00000000000..97919f1da97 --- /dev/null +++ b/ompi/mpi/fortran/use-mpi-f08/aint_diff_f08.F90 @@ -0,0 +1,18 @@ +! -*- f90 -*- +! +! Copyright (c) 2010-2015 Cisco Systems, Inc. All rights reserved. +! Copyright (c) 2009-2015 Los Alamos National Security, LLC. +! All Rights reserved. +! $COPYRIGHT$ + +#include "ompi/mpi/fortran/configure-fortran-output.h" + +function MPI_Aint_diff_f08(addr1, addr2) + use :: mpi_f08_types, only : MPI_ADDRESS_KIND + use :: mpi_f08, only : ompi_aint_diff_f + implicit none + INTEGER(MPI_ADDRESS_KIND) :: MPI_Aint_diff_f08 + INTEGER(MPI_ADDRESS_KIND), INTENT(IN) :: addr1 + INTEGER(MPI_ADDRESS_KIND), INTENT(IN) :: addr2 + MPI_Aint_diff_f08 = ompi_aint_diff_f(addr1, addr2) +end function MPI_Aint_diff_f08 diff --git a/ompi/mpi/fortran/use-mpi-f08/attr-fn-f08-callback-interfaces.h b/ompi/mpi/fortran/use-mpi-f08/attr-fn-f08-callback-interfaces.h deleted file mode 100644 index 238d930210c..00000000000 --- a/ompi/mpi/fortran/use-mpi-f08/attr-fn-f08-callback-interfaces.h +++ /dev/null @@ -1,114 +0,0 @@ -! -*- f90 -*- -! Copyright (c) 2004-2005 The Regents of the University of California. -! All rights reserved. -! Copyright (c) 2006-2014 Cisco Systems, Inc. All rights reserved. -! Copyright (c) 2013 Los Alamos National Security, LLC. All rights -! reserved. -! $COPYRIGHT$ -! -! Additional copyrights may follow -! -! $HEADER$ -! - -! -! F08 handle (e.g., Type(MPI_Comm)) pre-defined attribute callback -! function interfaces -! - -interface - -!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! - - subroutine MPI_NULL_COPY_FN( comm, comm_keyval, extra_state, & - attribute_val_in, attribute_val_out, & - flag, ierr ) - use mpi_f08_types - implicit none - type(MPI_Comm) :: comm - integer :: comm_keyval, extra_state - integer :: attribute_val_in, attribute_val_out, ierr - logical :: flag - end subroutine MPI_NULL_COPY_FN - - subroutine MPI_NULL_DELETE_FN( comm, comm_keyval, attribute_val_out, & - extra_state, ierr ) - use mpi_f08_types - implicit none - type(MPI_Comm) :: comm - integer :: comm_keyval, attribute_val_out, extra_state, ierr - end subroutine MPI_NULL_DELETE_FN - -!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! - - subroutine MPI_COMM_NULL_COPY_FN( comm, comm_keyval, extra_state, & - attribute_val_in, attribute_val_out, & - flag, ierr ) - use mpi_f08_types - implicit none - type(MPI_Comm) :: comm - integer :: comm_keyval - integer(kind=MPI_ADDRESS_KIND) :: extra_state, attribute_val_in, attribute_val_out - integer :: ierr - logical :: flag - end subroutine MPI_COMM_NULL_COPY_FN - - subroutine MPI_COMM_NULL_DELETE_FN(comm, comm_keyval, attribute_val_out, & - extra_state, ierr ) - use mpi_f08_types - implicit none - type(MPI_Comm) :: comm - integer :: comm_keyval - integer(kind=MPI_ADDRESS_KIND) :: attribute_val_out, extra_state - integer :: ierr - end subroutine MPI_COMM_NULL_DELETE_FN - -!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! - - subroutine MPI_TYPE_NULL_COPY_FN( type, type_keyval, extra_state, & - attribute_val_in, attribute_val_out, & - flag, ierr ) - use mpi_f08_types - implicit none - type(MPI_Datatype) :: type - integer :: type_keyval - integer(kind=MPI_ADDRESS_KIND) :: extra_state, attribute_val_in, attribute_val_out - integer :: ierr - logical :: flag - end subroutine MPI_TYPE_NULL_COPY_FN - - subroutine MPI_TYPE_NULL_DELETE_FN( type, type_keyval, attribute_val_out, & - extra_state, ierr ) - use mpi_f08_types - implicit none - type(MPI_Datatype) :: type - integer :: type_keyval - integer(kind=MPI_ADDRESS_KIND) :: attribute_val_out, extra_state - integer :: ierr - end subroutine MPI_TYPE_NULL_DELETE_FN - -!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! - - subroutine MPI_WIN_NULL_COPY_FN( window, win_keyval, extra_state, & - attribute_val_in, attribute_val_out, & - flag, ierr ) - use mpi_f08_types - implicit none - type(MPI_Win) :: window - integer :: win_keyval - integer(kind=MPI_ADDRESS_KIND) :: extra_state, attribute_val_in, attribute_val_out - integer :: ierr - logical :: flag - end subroutine MPI_WIN_NULL_COPY_FN - - subroutine MPI_WIN_NULL_DELETE_FN( window, win_keyval, attribute_val_out, & - extra_state, ierr ) - use mpi_f08_types - implicit none - type(MPI_Win) :: window - integer :: win_keyval - integer(kind=MPI_ADDRESS_KIND) :: attribute_val_out, extra_state - integer :: ierr - end subroutine MPI_WIN_NULL_DELETE_FN - -end interface diff --git a/ompi/mpi/fortran/use-mpi-f08/buffer_detach.c b/ompi/mpi/fortran/use-mpi-f08/buffer_detach.c new file mode 100644 index 00000000000..3d24b3f9656 --- /dev/null +++ b/ompi/mpi/fortran/use-mpi-f08/buffer_detach.c @@ -0,0 +1,77 @@ +/* + * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2005 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2007 Sun Microsystems, Inc. All rights reserved. + * Copyright (c) 2011-2015 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include "ompi_config.h" + +#include "mpi.h" +#include "ompi/mpi/fortran/base/fint_2_int.h" + +/* + * This function implemented in this file is only called from Fortran, + * so we never bothered to put a prototype for it in any C header + * file. To avoid compiler warnings about no protoype, we prototype + * it here. + */ +OMPI_DECLSPEC void ompi_buffer_detach_f08(char *buffer, MPI_Fint *size, + MPI_Fint *ierr); + +OMPI_DECLSPEC void pompi_buffer_detach_f08(char *buffer, MPI_Fint *size, + MPI_Fint *ierr); + +/* (this comment is repeated in ompi/mpi/fortran/mpif-h/buffer_detach_f.c) + * + * MPI-3.1 section 3.6, page 45, states that the mpif.h and mpi module + * interfaces for MPI_BUFFER_DETACH ignore the buffer argument. + * Therefore, for the mpif.h and mpi module interfaces, we use a dummy + * variable and leave the value handed in alone. + * + * The mpi_f08 implementation for MPI_BUFFER_DETACH therefore is a + * separate routine in the use-mpi-f08 directory (it's not built in + * the mpif-h directory because of all the different combinations of + * supporting weak symbols (or not), building the profiling layer (or + * not), etc.). + * + * Note that we only need to build this function once -- the F08 + * interfaces for MPI_BUFFER_ATTACH and PMPI_BUFFER_ATTACH both + * bind(C) to the name ompi_buffer_detach_f08. + */ +void ompi_buffer_detach_f08(char *buffer, MPI_Fint *size, MPI_Fint *ierr) +{ + int c_ierr; + void *dummy; + OMPI_SINGLE_NAME_DECL(size); + + c_ierr = PMPI_Buffer_detach(&dummy, OMPI_SINGLE_NAME_CONVERT(size)); + if (NULL != ierr) { + *ierr = OMPI_INT_2_FINT(c_ierr); + } + + if (MPI_SUCCESS == c_ierr) { + OMPI_SINGLE_INT_2_FINT(size); + *(void **)buffer = dummy; + } +} + +void pompi_buffer_detach_f08(char *buffer, MPI_Fint *size, MPI_Fint *ierr) +{ + ompi_buffer_detach_f08(buffer, size, ierr); +} diff --git a/ompi/mpi/fortran/use-mpi-f08/buffer_detach_f08.F90 b/ompi/mpi/fortran/use-mpi-f08/buffer_detach_f08.F90 index 4249416e564..7c7815175d2 100644 --- a/ompi/mpi/fortran/use-mpi-f08/buffer_detach_f08.F90 +++ b/ompi/mpi/fortran/use-mpi-f08/buffer_detach_f08.F90 @@ -1,6 +1,6 @@ ! -*- f90 -*- ! -! Copyright (c) 2009-2012 Cisco Systems, Inc. All rights reserved. +! Copyright (c) 2009-2015 Cisco Systems, Inc. All rights reserved. ! Copyright (c) 2009-2012 Los Alamos National Security, LLC. ! All rights reserved. ! $COPYRIGHT$ @@ -9,8 +9,9 @@ subroutine MPI_Buffer_detach_f08(buffer_addr,size,ierror) use :: mpi_f08, only : ompi_buffer_detach_f + USE, INTRINSIC :: ISO_C_BINDING, ONLY : C_PTR implicit none - OMPI_FORTRAN_IGNORE_TKR_TYPE, INTENT(IN) :: buffer_addr + TYPE(C_PTR), INTENT(OUT) :: buffer_addr INTEGER, INTENT(OUT) :: size INTEGER, OPTIONAL, INTENT(OUT) :: ierror integer :: c_ierror diff --git a/ompi/mpi/fortran/use-mpi-f08/comm_spawn_multiple_f08.F90 b/ompi/mpi/fortran/use-mpi-f08/comm_spawn_multiple_f08.F90 index 5c549bf5d30..b3842eac64c 100644 --- a/ompi/mpi/fortran/use-mpi-f08/comm_spawn_multiple_f08.F90 +++ b/ompi/mpi/fortran/use-mpi-f08/comm_spawn_multiple_f08.F90 @@ -3,6 +3,8 @@ ! Copyright (c) 2010-2012 Cisco Systems, Inc. All rights reserved. ! Copyright (c) 2009-2012 Los Alamos National Security, LLC. ! All Rights reserved. +! Copyright (c) 2015 Research Organization for Information Science +! and Technology (RIST). All rights reserved. ! $COPYRIGHT$ subroutine MPI_Comm_spawn_multiple_f08(count,array_of_commands,array_of_argv, & @@ -26,8 +28,9 @@ subroutine MPI_Comm_spawn_multiple_f08(count,array_of_commands,array_of_argv, & ! call ompi_comm_spawn_multiple_f(count,array_of_commands,array_of_argv, & - array_of_maxprocs,array_of_info(:)%MPI_VAL,root, & - comm%MPI_VAL,intercomm%MPI_VAL,array_of_errcodes,c_ierror) + array_of_maxprocs,array_of_info(:)%MPI_VAL,root, & + comm%MPI_VAL,intercomm%MPI_VAL,array_of_errcodes,c_ierror, & + len(array_of_commands), len(array_of_argv)) if (present(ierror)) ierror = c_ierror end subroutine MPI_Comm_spawn_multiple_f08 diff --git a/ompi/mpi/fortran/use-mpi-f08/constants.c b/ompi/mpi/fortran/use-mpi-f08/constants.c index 797c8ebf7fb..8b6c5353362 100644 --- a/ompi/mpi/fortran/use-mpi-f08/constants.c +++ b/ompi/mpi/fortran/use-mpi-f08/constants.c @@ -1,5 +1,7 @@ /* - * Copyright (c) 2010-2012 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2010-2015 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * * $COPYRIGHT$ * @@ -20,76 +22,83 @@ */ typedef MPI_Fint ompi_fortran_08_handle_t[OMPI_FORTRAN_F08_HANDLE_SIZE / sizeof(MPI_Fint)]; -OMPI_DECLSPEC ompi_fortran_08_handle_t ompi_f08_mpi_comm_world = {OMPI_MPI_COMM_WORLD}; -OMPI_DECLSPEC ompi_fortran_08_handle_t ompi_f08_mpi_comm_self = {OMPI_MPI_COMM_SELF}; -OMPI_DECLSPEC ompi_fortran_08_handle_t ompi_f08_mpi_group_empty = {OMPI_MPI_GROUP_EMPTY}; -OMPI_DECLSPEC ompi_fortran_08_handle_t ompi_f08_mpi_errors_are_fatal = {OMPI_MPI_ERRORS_ARE_FATAL}; -OMPI_DECLSPEC ompi_fortran_08_handle_t ompi_f08_mpi_errors_return = {OMPI_MPI_ERRORS_RETURN}; -OMPI_DECLSPEC ompi_fortran_08_handle_t ompi_f08_mpi_message_no_proc = {OMPI_MPI_MESSAGE_NO_PROC}; -OMPI_DECLSPEC ompi_fortran_08_handle_t ompi_f08_mpi_info_env = {OMPI_MPI_INFO_ENV}; +#if OMPI_FORTRAN_F08_HANDLE_ALIGNMENT > OMPI_FORTRAN_F08_HANDLE_SIZE +#define OMPI_F08_HANDLE_ALIGNED __opal_attribute_aligned__(OMPI_FORTRAN_F08_HANDLE_ALIGNMENT) +#else +#define OMPI_F08_HANDLE_ALIGNED +#endif + +OMPI_DECLSPEC ompi_fortran_08_handle_t OMPI_F08_HANDLE_ALIGNED ompi_f08_mpi_comm_world = {OMPI_MPI_COMM_WORLD}; +OMPI_DECLSPEC ompi_fortran_08_handle_t OMPI_F08_HANDLE_ALIGNED ompi_f08_mpi_comm_self = {OMPI_MPI_COMM_SELF}; +OMPI_DECLSPEC ompi_fortran_08_handle_t OMPI_F08_HANDLE_ALIGNED ompi_f08_mpi_group_empty = {OMPI_MPI_GROUP_EMPTY}; +OMPI_DECLSPEC ompi_fortran_08_handle_t OMPI_F08_HANDLE_ALIGNED ompi_f08_mpi_errors_are_fatal = {OMPI_MPI_ERRORS_ARE_FATAL}; +OMPI_DECLSPEC ompi_fortran_08_handle_t OMPI_F08_HANDLE_ALIGNED ompi_f08_mpi_errors_return = {OMPI_MPI_ERRORS_RETURN}; +OMPI_DECLSPEC ompi_fortran_08_handle_t OMPI_F08_HANDLE_ALIGNED ompi_f08_mpi_message_no_proc = {OMPI_MPI_MESSAGE_NO_PROC}; +OMPI_DECLSPEC ompi_fortran_08_handle_t OMPI_F08_HANDLE_ALIGNED ompi_f08_mpi_info_env = {OMPI_MPI_INFO_ENV}; -OMPI_DECLSPEC ompi_fortran_08_handle_t ompi_f08_mpi_max = {OMPI_MPI_MAX}; -OMPI_DECLSPEC ompi_fortran_08_handle_t ompi_f08_mpi_min = {OMPI_MPI_MIN}; -OMPI_DECLSPEC ompi_fortran_08_handle_t ompi_f08_mpi_sum = {OMPI_MPI_SUM}; -OMPI_DECLSPEC ompi_fortran_08_handle_t ompi_f08_mpi_prod = {OMPI_MPI_PROD}; -OMPI_DECLSPEC ompi_fortran_08_handle_t ompi_f08_mpi_land = {OMPI_MPI_LAND}; -OMPI_DECLSPEC ompi_fortran_08_handle_t ompi_f08_mpi_band = {OMPI_MPI_BAND}; -OMPI_DECLSPEC ompi_fortran_08_handle_t ompi_f08_mpi_lor = {OMPI_MPI_LOR}; -OMPI_DECLSPEC ompi_fortran_08_handle_t ompi_f08_mpi_bor = {OMPI_MPI_BOR}; -OMPI_DECLSPEC ompi_fortran_08_handle_t ompi_f08_mpi_lxor = {OMPI_MPI_LXOR}; -OMPI_DECLSPEC ompi_fortran_08_handle_t ompi_f08_mpi_bxor = {OMPI_MPI_BXOR}; -OMPI_DECLSPEC ompi_fortran_08_handle_t ompi_f08_mpi_maxloc = {OMPI_MPI_MAXLOC}; -OMPI_DECLSPEC ompi_fortran_08_handle_t ompi_f08_mpi_minloc = {OMPI_MPI_MINLOC}; -OMPI_DECLSPEC ompi_fortran_08_handle_t ompi_f08_mpi_replace = {OMPI_MPI_REPLACE}; +OMPI_DECLSPEC ompi_fortran_08_handle_t OMPI_F08_HANDLE_ALIGNED ompi_f08_mpi_max = {OMPI_MPI_MAX}; +OMPI_DECLSPEC ompi_fortran_08_handle_t OMPI_F08_HANDLE_ALIGNED ompi_f08_mpi_min = {OMPI_MPI_MIN}; +OMPI_DECLSPEC ompi_fortran_08_handle_t OMPI_F08_HANDLE_ALIGNED ompi_f08_mpi_sum = {OMPI_MPI_SUM}; +OMPI_DECLSPEC ompi_fortran_08_handle_t OMPI_F08_HANDLE_ALIGNED ompi_f08_mpi_prod = {OMPI_MPI_PROD}; +OMPI_DECLSPEC ompi_fortran_08_handle_t OMPI_F08_HANDLE_ALIGNED ompi_f08_mpi_land = {OMPI_MPI_LAND}; +OMPI_DECLSPEC ompi_fortran_08_handle_t OMPI_F08_HANDLE_ALIGNED ompi_f08_mpi_band = {OMPI_MPI_BAND}; +OMPI_DECLSPEC ompi_fortran_08_handle_t OMPI_F08_HANDLE_ALIGNED ompi_f08_mpi_lor = {OMPI_MPI_LOR}; +OMPI_DECLSPEC ompi_fortran_08_handle_t OMPI_F08_HANDLE_ALIGNED ompi_f08_mpi_bor = {OMPI_MPI_BOR}; +OMPI_DECLSPEC ompi_fortran_08_handle_t OMPI_F08_HANDLE_ALIGNED ompi_f08_mpi_lxor = {OMPI_MPI_LXOR}; +OMPI_DECLSPEC ompi_fortran_08_handle_t OMPI_F08_HANDLE_ALIGNED ompi_f08_mpi_bxor = {OMPI_MPI_BXOR}; +OMPI_DECLSPEC ompi_fortran_08_handle_t OMPI_F08_HANDLE_ALIGNED ompi_f08_mpi_maxloc = {OMPI_MPI_MAXLOC}; +OMPI_DECLSPEC ompi_fortran_08_handle_t OMPI_F08_HANDLE_ALIGNED ompi_f08_mpi_minloc = {OMPI_MPI_MINLOC}; +OMPI_DECLSPEC ompi_fortran_08_handle_t OMPI_F08_HANDLE_ALIGNED ompi_f08_mpi_replace = {OMPI_MPI_REPLACE}; /* * NULL "handles" (indices) */ -OMPI_DECLSPEC ompi_fortran_08_handle_t ompi_f08_mpi_comm_null = {OMPI_MPI_COMM_NULL}; -OMPI_DECLSPEC ompi_fortran_08_handle_t ompi_f08_mpi_datatype_null = {OMPI_MPI_DATATYPE_NULL}; -OMPI_DECLSPEC ompi_fortran_08_handle_t ompi_f08_mpi_errhandler_null = {OMPI_MPI_ERRHANDLER_NULL}; -OMPI_DECLSPEC ompi_fortran_08_handle_t ompi_f08_mpi_group_null = {OMPI_MPI_GROUP_NULL}; -OMPI_DECLSPEC ompi_fortran_08_handle_t ompi_f08_mpi_info_null = {OMPI_MPI_INFO_NULL}; -OMPI_DECLSPEC ompi_fortran_08_handle_t ompi_f08_mpi_message_null = {OMPI_MPI_MESSAGE_NULL}; -OMPI_DECLSPEC ompi_fortran_08_handle_t ompi_f08_mpi_op_null = {OMPI_MPI_OP_NULL}; -OMPI_DECLSPEC ompi_fortran_08_handle_t ompi_f08_mpi_request_null = {OMPI_MPI_REQUEST_NULL}; -OMPI_DECLSPEC ompi_fortran_08_handle_t ompi_f08_mpi_win_null = {OMPI_MPI_WIN_NULL}; +OMPI_DECLSPEC ompi_fortran_08_handle_t OMPI_F08_HANDLE_ALIGNED ompi_f08_mpi_comm_null = {OMPI_MPI_COMM_NULL}; +OMPI_DECLSPEC ompi_fortran_08_handle_t OMPI_F08_HANDLE_ALIGNED ompi_f08_mpi_datatype_null = {OMPI_MPI_DATATYPE_NULL}; +OMPI_DECLSPEC ompi_fortran_08_handle_t OMPI_F08_HANDLE_ALIGNED ompi_f08_mpi_errhandler_null = {OMPI_MPI_ERRHANDLER_NULL}; +OMPI_DECLSPEC ompi_fortran_08_handle_t OMPI_F08_HANDLE_ALIGNED ompi_f08_mpi_group_null = {OMPI_MPI_GROUP_NULL}; +OMPI_DECLSPEC ompi_fortran_08_handle_t OMPI_F08_HANDLE_ALIGNED ompi_f08_mpi_info_null = {OMPI_MPI_INFO_NULL}; +OMPI_DECLSPEC ompi_fortran_08_handle_t OMPI_F08_HANDLE_ALIGNED ompi_f08_mpi_message_null = {OMPI_MPI_MESSAGE_NULL}; +OMPI_DECLSPEC ompi_fortran_08_handle_t OMPI_F08_HANDLE_ALIGNED ompi_f08_mpi_op_null = {OMPI_MPI_OP_NULL}; +OMPI_DECLSPEC ompi_fortran_08_handle_t OMPI_F08_HANDLE_ALIGNED ompi_f08_mpi_request_null = {OMPI_MPI_REQUEST_NULL}; +OMPI_DECLSPEC ompi_fortran_08_handle_t OMPI_F08_HANDLE_ALIGNED ompi_f08_mpi_win_null = {OMPI_MPI_WIN_NULL}; #if OMPI_PROVIDE_MPI_FILE_INTERFACE -OMPI_DECLSPEC ompi_fortran_08_handle_t ompi_f08_mpi_file_null = {OMPI_MPI_FILE_NULL}; +OMPI_DECLSPEC ompi_fortran_08_handle_t OMPI_F08_HANDLE_ALIGNED ompi_f08_mpi_file_null = {OMPI_MPI_FILE_NULL}; #endif /* * common block items from ompi/include/mpif-common.h */ -OMPI_DECLSPEC ompi_fortran_08_handle_t ompi_f08_mpi_byte = {OMPI_MPI_BYTE}; -OMPI_DECLSPEC ompi_fortran_08_handle_t ompi_f08_mpi_packed = {OMPI_MPI_PACKED}; -OMPI_DECLSPEC ompi_fortran_08_handle_t ompi_f08_mpi_ub = {OMPI_MPI_UB}; -OMPI_DECLSPEC ompi_fortran_08_handle_t ompi_f08_mpi_lb = {OMPI_MPI_LB}; -OMPI_DECLSPEC ompi_fortran_08_handle_t ompi_f08_mpi_character = {OMPI_MPI_CHARACTER}; -OMPI_DECLSPEC ompi_fortran_08_handle_t ompi_f08_mpi_logical = {OMPI_MPI_LOGICAL}; -OMPI_DECLSPEC ompi_fortran_08_handle_t ompi_f08_mpi_integer = {OMPI_MPI_INTEGER}; -OMPI_DECLSPEC ompi_fortran_08_handle_t ompi_f08_mpi_integer1 = {OMPI_MPI_INTEGER1}; -OMPI_DECLSPEC ompi_fortran_08_handle_t ompi_f08_mpi_integer2 = {OMPI_MPI_INTEGER2}; -OMPI_DECLSPEC ompi_fortran_08_handle_t ompi_f08_mpi_integer4 = {OMPI_MPI_INTEGER4}; -OMPI_DECLSPEC ompi_fortran_08_handle_t ompi_f08_mpi_integer8 = {OMPI_MPI_INTEGER8}; -OMPI_DECLSPEC ompi_fortran_08_handle_t ompi_f08_mpi_integer16 = {OMPI_MPI_INTEGER16}; -OMPI_DECLSPEC ompi_fortran_08_handle_t ompi_f08_mpi_real = {OMPI_MPI_REAL}; -OMPI_DECLSPEC ompi_fortran_08_handle_t ompi_f08_mpi_real4 = {OMPI_MPI_REAL4}; -OMPI_DECLSPEC ompi_fortran_08_handle_t ompi_f08_mpi_real8 = {OMPI_MPI_REAL8}; -OMPI_DECLSPEC ompi_fortran_08_handle_t ompi_f08_mpi_real16 = {OMPI_MPI_REAL16}; -OMPI_DECLSPEC ompi_fortran_08_handle_t ompi_f08_mpi_double_precision = {OMPI_MPI_DOUBLE_PRECISION}; -OMPI_DECLSPEC ompi_fortran_08_handle_t ompi_f08_mpi_complex = {OMPI_MPI_COMPLEX}; -OMPI_DECLSPEC ompi_fortran_08_handle_t ompi_f08_mpi_complex8 = {OMPI_MPI_COMPLEX8}; -OMPI_DECLSPEC ompi_fortran_08_handle_t ompi_f08_mpi_complex16 = {OMPI_MPI_COMPLEX16}; -OMPI_DECLSPEC ompi_fortran_08_handle_t ompi_f08_mpi_complex32 = {OMPI_MPI_COMPLEX32}; -OMPI_DECLSPEC ompi_fortran_08_handle_t ompi_f08_mpi_double_complex = {OMPI_MPI_DOUBLE_COMPLEX}; -OMPI_DECLSPEC ompi_fortran_08_handle_t ompi_f08_mpi_2real = {OMPI_MPI_2REAL}; -OMPI_DECLSPEC ompi_fortran_08_handle_t ompi_f08_mpi_2double_precision = {OMPI_MPI_2DOUBLE_PRECISION}; -OMPI_DECLSPEC ompi_fortran_08_handle_t ompi_f08_mpi_2integer = {OMPI_MPI_2INTEGER}; -OMPI_DECLSPEC ompi_fortran_08_handle_t ompi_f08_mpi_2complex = {OMPI_MPI_2COMPLEX}; -OMPI_DECLSPEC ompi_fortran_08_handle_t ompi_f08_mpi_2double_complex = {OMPI_MPI_2DOUBLE_COMPLEX}; -OMPI_DECLSPEC ompi_fortran_08_handle_t ompi_f08_mpi_real2 = {OMPI_MPI_REAL2}; -OMPI_DECLSPEC ompi_fortran_08_handle_t ompi_f08_mpi_logical1 = {OMPI_MPI_LOGICAL1}; -OMPI_DECLSPEC ompi_fortran_08_handle_t ompi_f08_mpi_logical2 = {OMPI_MPI_LOGICAL2}; -OMPI_DECLSPEC ompi_fortran_08_handle_t ompi_f08_mpi_logical4 = {OMPI_MPI_LOGICAL4}; -OMPI_DECLSPEC ompi_fortran_08_handle_t ompi_f08_mpi_logical8 = {OMPI_MPI_LOGICAL8}; +OMPI_DECLSPEC ompi_fortran_08_handle_t OMPI_F08_HANDLE_ALIGNED ompi_f08_mpi_aint = {OMPI_MPI_AINT}; +OMPI_DECLSPEC ompi_fortran_08_handle_t OMPI_F08_HANDLE_ALIGNED ompi_f08_mpi_byte = {OMPI_MPI_BYTE}; +OMPI_DECLSPEC ompi_fortran_08_handle_t OMPI_F08_HANDLE_ALIGNED ompi_f08_mpi_packed = {OMPI_MPI_PACKED}; +OMPI_DECLSPEC ompi_fortran_08_handle_t OMPI_F08_HANDLE_ALIGNED ompi_f08_mpi_ub = {OMPI_MPI_UB}; +OMPI_DECLSPEC ompi_fortran_08_handle_t OMPI_F08_HANDLE_ALIGNED ompi_f08_mpi_lb = {OMPI_MPI_LB}; +OMPI_DECLSPEC ompi_fortran_08_handle_t OMPI_F08_HANDLE_ALIGNED ompi_f08_mpi_character = {OMPI_MPI_CHARACTER}; +OMPI_DECLSPEC ompi_fortran_08_handle_t OMPI_F08_HANDLE_ALIGNED ompi_f08_mpi_logical = {OMPI_MPI_LOGICAL}; +OMPI_DECLSPEC ompi_fortran_08_handle_t OMPI_F08_HANDLE_ALIGNED ompi_f08_mpi_integer = {OMPI_MPI_INTEGER}; +OMPI_DECLSPEC ompi_fortran_08_handle_t OMPI_F08_HANDLE_ALIGNED ompi_f08_mpi_integer1 = {OMPI_MPI_INTEGER1}; +OMPI_DECLSPEC ompi_fortran_08_handle_t OMPI_F08_HANDLE_ALIGNED ompi_f08_mpi_integer2 = {OMPI_MPI_INTEGER2}; +OMPI_DECLSPEC ompi_fortran_08_handle_t OMPI_F08_HANDLE_ALIGNED ompi_f08_mpi_integer4 = {OMPI_MPI_INTEGER4}; +OMPI_DECLSPEC ompi_fortran_08_handle_t OMPI_F08_HANDLE_ALIGNED ompi_f08_mpi_integer8 = {OMPI_MPI_INTEGER8}; +OMPI_DECLSPEC ompi_fortran_08_handle_t OMPI_F08_HANDLE_ALIGNED ompi_f08_mpi_integer16 = {OMPI_MPI_INTEGER16}; +OMPI_DECLSPEC ompi_fortran_08_handle_t OMPI_F08_HANDLE_ALIGNED ompi_f08_mpi_real = {OMPI_MPI_REAL}; +OMPI_DECLSPEC ompi_fortran_08_handle_t OMPI_F08_HANDLE_ALIGNED ompi_f08_mpi_real4 = {OMPI_MPI_REAL4}; +OMPI_DECLSPEC ompi_fortran_08_handle_t OMPI_F08_HANDLE_ALIGNED ompi_f08_mpi_real8 = {OMPI_MPI_REAL8}; +OMPI_DECLSPEC ompi_fortran_08_handle_t OMPI_F08_HANDLE_ALIGNED ompi_f08_mpi_real16 = {OMPI_MPI_REAL16}; +OMPI_DECLSPEC ompi_fortran_08_handle_t OMPI_F08_HANDLE_ALIGNED ompi_f08_mpi_double_precision = {OMPI_MPI_DOUBLE_PRECISION}; +OMPI_DECLSPEC ompi_fortran_08_handle_t OMPI_F08_HANDLE_ALIGNED ompi_f08_mpi_complex = {OMPI_MPI_COMPLEX}; +OMPI_DECLSPEC ompi_fortran_08_handle_t OMPI_F08_HANDLE_ALIGNED ompi_f08_mpi_complex8 = {OMPI_MPI_COMPLEX8}; +OMPI_DECLSPEC ompi_fortran_08_handle_t OMPI_F08_HANDLE_ALIGNED ompi_f08_mpi_complex16 = {OMPI_MPI_COMPLEX16}; +OMPI_DECLSPEC ompi_fortran_08_handle_t OMPI_F08_HANDLE_ALIGNED ompi_f08_mpi_complex32 = {OMPI_MPI_COMPLEX32}; +OMPI_DECLSPEC ompi_fortran_08_handle_t OMPI_F08_HANDLE_ALIGNED ompi_f08_mpi_double_complex = {OMPI_MPI_DOUBLE_COMPLEX}; +OMPI_DECLSPEC ompi_fortran_08_handle_t OMPI_F08_HANDLE_ALIGNED ompi_f08_mpi_2real = {OMPI_MPI_2REAL}; +OMPI_DECLSPEC ompi_fortran_08_handle_t OMPI_F08_HANDLE_ALIGNED ompi_f08_mpi_2double_precision = {OMPI_MPI_2DOUBLE_PRECISION}; +OMPI_DECLSPEC ompi_fortran_08_handle_t OMPI_F08_HANDLE_ALIGNED ompi_f08_mpi_2integer = {OMPI_MPI_2INTEGER}; +OMPI_DECLSPEC ompi_fortran_08_handle_t OMPI_F08_HANDLE_ALIGNED ompi_f08_mpi_2complex = {OMPI_MPI_2COMPLEX}; +OMPI_DECLSPEC ompi_fortran_08_handle_t OMPI_F08_HANDLE_ALIGNED ompi_f08_mpi_2double_complex = {OMPI_MPI_2DOUBLE_COMPLEX}; +OMPI_DECLSPEC ompi_fortran_08_handle_t OMPI_F08_HANDLE_ALIGNED ompi_f08_mpi_real2 = {OMPI_MPI_REAL2}; +OMPI_DECLSPEC ompi_fortran_08_handle_t OMPI_F08_HANDLE_ALIGNED ompi_f08_mpi_logical1 = {OMPI_MPI_LOGICAL1}; +OMPI_DECLSPEC ompi_fortran_08_handle_t OMPI_F08_HANDLE_ALIGNED ompi_f08_mpi_logical2 = {OMPI_MPI_LOGICAL2}; +OMPI_DECLSPEC ompi_fortran_08_handle_t OMPI_F08_HANDLE_ALIGNED ompi_f08_mpi_logical4 = {OMPI_MPI_LOGICAL4}; +OMPI_DECLSPEC ompi_fortran_08_handle_t OMPI_F08_HANDLE_ALIGNED ompi_f08_mpi_logical8 = {OMPI_MPI_LOGICAL8}; diff --git a/ompi/mpi/fortran/use-mpi-f08/conversion-fn-null-f08-interface.h b/ompi/mpi/fortran/use-mpi-f08/conversion-fn-null-f08-interface.h deleted file mode 100644 index c653de6f49b..00000000000 --- a/ompi/mpi/fortran/use-mpi-f08/conversion-fn-null-f08-interface.h +++ /dev/null @@ -1,35 +0,0 @@ -! -*- f90 -*- -! Copyright (c) 2006-2014 Cisco Systems, Inc. All rights reserved. -! $COPYRIGHT$ -! -! Additional copyrights may follow -! -! $HEADER$ -! - -! Note about these declarations: these are "external" functions in -! mpif-common.h. However, if we don't declare them here, compilers will add -! them to the "mpi" module namespace, and result in linker errors if MPI -! F90 applications try to use them. because the implementations of -! these functions are not in the MPI module namespace -- they're the F77 -! functions. - -! -! F08 handle pre-defined conversion callback function interface -! - -interface - - subroutine MPI_CONVERSION_FN_NULL(userbuf, datatype, count, filebuf, & - position, extra_state, ierror) - use mpi_f08_types - implicit none - character(len=*), intent(in) :: filebuf - character(len=*), intent(out) :: userbuf - type(MPI_Datatype) :: datatype - integer, intent(in) :: count, ierror - integer(kind=MPI_OFFSET_KIND), intent(in) :: position - integer(kind=MPI_ADDRESS_KIND), intent(in) :: extra_state - end subroutine MPI_CONVERSION_FN_NULL - -end interface diff --git a/ompi/mpi/fortran/use-mpi-f08/file_iread_all_f08.F90 b/ompi/mpi/fortran/use-mpi-f08/file_iread_all_f08.F90 new file mode 100644 index 00000000000..f280e911c30 --- /dev/null +++ b/ompi/mpi/fortran/use-mpi-f08/file_iread_all_f08.F90 @@ -0,0 +1,25 @@ +! -*- f90 -*- +! +! Copyright (c) 2010-2012 Cisco Systems, Inc. All rights reserved. +! Copyright (c) 2009-2012 Los Alamos National Security, LLC. +! All Rights reserved. +! $COPYRIGHT$ + +#include "ompi/mpi/fortran/configure-fortran-output.h" + +subroutine MPI_File_iread_all_f08(fh,buf,count,datatype,request,ierror) + use :: mpi_f08_types, only : MPI_File, MPI_Datatype, MPI_Request + use :: mpi_f08, only : ompi_file_iread_all_f + implicit none + TYPE(MPI_File), INTENT(IN) :: fh + OMPI_FORTRAN_IGNORE_TKR_TYPE, INTENT(IN) :: buf + INTEGER, INTENT(IN) :: count + TYPE(MPI_Datatype), INTENT(IN) :: datatype + TYPE(MPI_Request), INTENT(OUT) :: request + INTEGER, OPTIONAL, INTENT(OUT) :: ierror + integer :: c_ierror + + call ompi_file_iread_all_f(fh%MPI_VAL,buf,count,datatype%MPI_VAL,request%MPI_VAL,c_ierror) + if (present(ierror)) ierror = c_ierror + +end subroutine MPI_File_iread_all_f08 diff --git a/ompi/mpi/fortran/use-mpi-f08/file_iread_at_all_f08.F90 b/ompi/mpi/fortran/use-mpi-f08/file_iread_at_all_f08.F90 new file mode 100644 index 00000000000..9cc7f317df9 --- /dev/null +++ b/ompi/mpi/fortran/use-mpi-f08/file_iread_at_all_f08.F90 @@ -0,0 +1,27 @@ +! -*- f90 -*- +! +! Copyright (c) 2010-2012 Cisco Systems, Inc. All rights reserved. +! Copyright (c) 2009-2012 Los Alamos National Security, LLC. +! All Rights reserved. +! $COPYRIGHT$ + +#include "ompi/mpi/fortran/configure-fortran-output.h" + +subroutine MPI_File_iread_at_all_f08(fh,offset,buf,count,datatype,request,ierror) + use :: mpi_f08_types, only : MPI_File, MPI_Datatype, MPI_Request, MPI_OFFSET_KIND + use :: mpi_f08, only : ompi_file_iread_at_all_f + implicit none + TYPE(MPI_File), INTENT(IN) :: fh + INTEGER(MPI_OFFSET_KIND), INTENT(IN) :: offset + OMPI_FORTRAN_IGNORE_TKR_TYPE, INTENT(IN) :: buf + INTEGER, INTENT(IN) :: count + TYPE(MPI_Datatype), INTENT(IN) :: datatype + TYPE(MPI_Request), INTENT(OUT) :: request + INTEGER, OPTIONAL, INTENT(OUT) :: ierror + integer :: c_ierror + + call ompi_file_iread_at_all_f(fh%MPI_VAL,offset,buf,count,& + datatype%MPI_VAL,request%MPI_VAL,c_ierror) + if (present(ierror)) ierror = c_ierror + +end subroutine MPI_File_iread_at_all_f08 diff --git a/ompi/mpi/fortran/use-mpi-f08/file_iwrite_all_f08.F90 b/ompi/mpi/fortran/use-mpi-f08/file_iwrite_all_f08.F90 new file mode 100644 index 00000000000..c36c68abf10 --- /dev/null +++ b/ompi/mpi/fortran/use-mpi-f08/file_iwrite_all_f08.F90 @@ -0,0 +1,26 @@ +! -*- f90 -*- +! +! Copyright (c) 2010-2012 Cisco Systems, Inc. All rights reserved. +! Copyright (c) 2009-2012 Los Alamos National Security, LLC. +! All Rights reserved. +! $COPYRIGHT$ + +#include "ompi/mpi/fortran/configure-fortran-output.h" + +subroutine MPI_File_iwrite_all_f08(fh,buf,count,datatype,request,ierror) + use :: mpi_f08_types, only : MPI_File, MPI_Datatype, MPI_Request + use :: mpi_f08, only : ompi_file_iwrite_all_f + implicit none + TYPE(MPI_File), INTENT(IN) :: fh + OMPI_FORTRAN_IGNORE_TKR_TYPE, INTENT(IN) :: buf + INTEGER, INTENT(IN) :: count + TYPE(MPI_Datatype), INTENT(IN) :: datatype + TYPE(MPI_Request), INTENT(OUT) :: request + INTEGER, OPTIONAL, INTENT(OUT) :: ierror + integer :: c_ierror + + call ompi_file_iwrite_all_f(fh%MPI_VAL,buf,count,& + datatype%MPI_VAL,request%MPI_VAL,c_ierror) + if (present(ierror)) ierror = c_ierror + +end subroutine MPI_File_iwrite_all_f08 diff --git a/ompi/mpi/fortran/use-mpi-f08/file_iwrite_at_all_f08.F90 b/ompi/mpi/fortran/use-mpi-f08/file_iwrite_at_all_f08.F90 new file mode 100644 index 00000000000..315b466ffea --- /dev/null +++ b/ompi/mpi/fortran/use-mpi-f08/file_iwrite_at_all_f08.F90 @@ -0,0 +1,27 @@ +! -*- f90 -*- +! +! Copyright (c) 2009-2012 Cisco Systems, Inc. All rights reserved. +! Copyright (c) 2009-2012 Los Alamos National Security, LLC. +! All Rights reserved. +! $COPYRIGHT$ + +#include "ompi/mpi/fortran/configure-fortran-output.h" + +subroutine MPI_File_iwrite_at_all_f08(fh,offset,buf,count,datatype,request,ierror) + use :: mpi_f08_types, only : MPI_File, MPI_Datatype, MPI_Request, MPI_OFFSET_KIND + use :: mpi_f08, only : ompi_file_iwrite_at_all_f + implicit none + TYPE(MPI_File), INTENT(IN) :: fh + INTEGER(MPI_OFFSET_KIND), INTENT(IN) :: offset + OMPI_FORTRAN_IGNORE_TKR_TYPE, INTENT(IN) :: buf + INTEGER, INTENT(IN) :: count + TYPE(MPI_Datatype), INTENT(IN) :: datatype + TYPE(MPI_Request), INTENT(OUT) :: request + INTEGER, OPTIONAL, INTENT(OUT) :: ierror + integer :: c_ierror + + call ompi_file_iwrite_at_all_f(fh%MPI_VAL,offset,buf,count,& + datatype%MPI_VAL,request%MPI_VAL,c_ierror) + if (present(ierror)) ierror = c_ierror + +end subroutine MPI_File_iwrite_at_all_f08 diff --git a/ompi/mpi/fortran/use-mpi-f08/free_mem_f08.F90 b/ompi/mpi/fortran/use-mpi-f08/free_mem_f08.F90 index c84daf4f7a1..cf67c56a563 100644 --- a/ompi/mpi/fortran/use-mpi-f08/free_mem_f08.F90 +++ b/ompi/mpi/fortran/use-mpi-f08/free_mem_f08.F90 @@ -1,19 +1,16 @@ ! Copyright (c) 2009-2012 Cisco Systems, Inc. All rights reserved. ! Copyright (c) 2009-2012 Los Alamos National Security, LLC. -! All Rights reserved. +! All Rights reserved. +! Copyright (c) 2015 Research Organization for Information Science +! and Technology (RIST). All rights reserved. ! $COPYRIGHT$ -! -! This file creates mappings between MPI C types (e.g., MPI_Comm) and -! variables (e.g., MPI_COMM_WORLD) and corresponding Fortran names -! (type(MPI_Comm_world) and MPI_COMM_WORLD, respectively). #include "ompi/mpi/fortran/configure-fortran-output.h" subroutine MPI_Free_mem_f08(base,ierror) - use :: mpi_f08_types, only : MPI_ADDRESS_KIND use :: mpi_f08, only : ompi_free_mem_f implicit none - INTEGER(MPI_ADDRESS_KIND), DIMENSION(*) OMPI_ASYNCHRONOUS :: base + OMPI_FORTRAN_IGNORE_TKR_TYPE, INTENT(IN) :: base INTEGER, OPTIONAL, INTENT(OUT) :: ierror integer :: c_ierror diff --git a/ompi/mpi/fortran/use-mpi-f08/get_library_version_f08.F90 b/ompi/mpi/fortran/use-mpi-f08/get_library_version_f08.F90 index 1f22f5d9f23..037c38dac2d 100644 --- a/ompi/mpi/fortran/use-mpi-f08/get_library_version_f08.F90 +++ b/ompi/mpi/fortran/use-mpi-f08/get_library_version_f08.F90 @@ -16,5 +16,5 @@ subroutine MPI_Get_library_version_f08(version,resultlen,ierror) call ompi_get_library_version_f(version,resultlen,c_ierror,len(version)) if (present(ierror)) ierror = c_ierror - + end subroutine MPI_Get_library_version_f08 diff --git a/ompi/mpi/fortran/use-mpi-f08/info_delete_f08.F90 b/ompi/mpi/fortran/use-mpi-f08/info_delete_f08.F90 index 3135334c12d..1714443c76c 100644 --- a/ompi/mpi/fortran/use-mpi-f08/info_delete_f08.F90 +++ b/ompi/mpi/fortran/use-mpi-f08/info_delete_f08.F90 @@ -16,5 +16,5 @@ subroutine MPI_Info_delete_f08(info,key,ierror) call ompi_info_delete_f(info%MPI_VAL,key,c_ierror,len(key)) if (present(ierror)) ierror = c_ierror - + end subroutine MPI_Info_delete_f08 diff --git a/ompi/mpi/fortran/use-mpi-f08/mpi-f-interfaces-bind.h b/ompi/mpi/fortran/use-mpi-f08/mpi-f-interfaces-bind.h index a692e6055b0..9e2ad6060aa 100644 --- a/ompi/mpi/fortran/use-mpi-f08/mpi-f-interfaces-bind.h +++ b/ompi/mpi/fortran/use-mpi-f08/mpi-f-interfaces-bind.h @@ -1,6 +1,6 @@ ! -*- f90 -*- ! -! Copyright (c) 2009-2014 Cisco Systems, Inc. All rights reserved. +! Copyright (c) 2009-2015 Cisco Systems, Inc. All rights reserved. ! Copyright (c) 2009-2012 Los Alamos National Security, LLC. ! All rights reserved. ! Copyright (c) 2012 The University of Tennessee and The University @@ -124,6 +124,7 @@ ! Wasn't that simple? Here's the list of subroutines that are not ! prototyped in this file because they fall into case #1 or #2, above. ! +! Case #1: ! MPI_Cart_create ! MPI_Cart_get ! MPI_Cart_map @@ -137,15 +138,20 @@ ! MPI_File_set_atomicity ! MPI_Finalized ! MPI_Graph_create -! MPI_Improbe ! MPI_Info_get ! MPI_Info_get_valuelen ! MPI_Initialized ! MPI_Intercomm_merge -! MPI_Iprobe ! MPI_Is_thread_main ! MPI_Op_commutative ! MPI_Op_create +! MPI_Type_get_attr +! MPI_Win_get_attr +! MPI_Win_test +! +! Case #2: +! MPI_Iprobe +! MPI_Improbe ! MPI_Request_get_status ! MPI_Status_set_cancelled ! MPI_Test @@ -153,9 +159,6 @@ ! MPI_Testany ! MPI_Testsome ! MPI_Test_cancelled -! MPI_Type_get_attr -! MPI_Win_get_attr -! MPI_Win_test ! interface @@ -189,10 +192,14 @@ subroutine ompi_buffer_attach_f(buffer,size,ierror) & INTEGER, INTENT(OUT) :: ierror end subroutine ompi_buffer_attach_f +! Note that we have an F08-specific C implementation function for +! MPI_BUFFER_DETACH (i.e., it is different than the mpif.h / mpi +! module C implementation function). subroutine ompi_buffer_detach_f(buffer_addr,size,ierror) & - BIND(C, name="ompi_buffer_detach_f") + BIND(C, name="ompi_buffer_detach_f08") + USE, INTRINSIC :: ISO_C_BINDING, ONLY : C_PTR implicit none - OMPI_FORTRAN_IGNORE_TKR_TYPE, INTENT(IN) :: buffer_addr + TYPE(C_PTR), INTENT(OUT) :: buffer_addr INTEGER, INTENT(OUT) :: size INTEGER, INTENT(OUT) :: ierror end subroutine ompi_buffer_detach_f @@ -1364,7 +1371,7 @@ subroutine ompi_comm_set_attr_f(comm,comm_keyval,attribute_val,ierror) & end subroutine ompi_comm_set_attr_f subroutine ompi_comm_set_info_f(comm,info,ierror) & - BIND(C, name="ompi_comm_get_info_f") + BIND(C, name="ompi_comm_set_info_f") implicit none INTEGER, INTENT(IN) :: comm INTEGER, INTENT(IN) :: info @@ -1780,6 +1787,24 @@ function ompi_wtime_f() & DOUBLE PRECISION :: ompi_wtime_f end function ompi_wtime_f +function ompi_aint_add_f(base,diff) & + BIND(C, name="ompi_aint_add_f") + use :: mpi_f08_types, only : MPI_ADDRESS_KIND + implicit none + INTEGER(MPI_ADDRESS_KIND), INTENT(IN) :: base + INTEGER(MPI_ADDRESS_KIND), INTENT(IN) :: diff + INTEGER(MPI_ADDRESS_KIND) :: ompi_aint_add_f +end function ompi_aint_add_f + +function ompi_aint_diff_f(addr1,addr2) & + BIND(C, name="ompi_aint_diff_f") + use :: mpi_f08_types, only : MPI_ADDRESS_KIND + implicit none + INTEGER(MPI_ADDRESS_KIND), INTENT(IN) :: addr1 + INTEGER(MPI_ADDRESS_KIND), INTENT(IN) :: addr2 + INTEGER(MPI_ADDRESS_KIND) :: ompi_aint_diff_f +end function ompi_aint_diff_f + subroutine ompi_abort_f(comm,errorcode,ierror) & BIND(C, name="ompi_abort_f") implicit none @@ -1929,9 +1954,8 @@ end subroutine ompi_finalize_f subroutine ompi_free_mem_f(base,ierror) & BIND(C, name="ompi_free_mem_f") - use :: mpi_f08_types, only : MPI_ADDRESS_KIND implicit none - INTEGER(MPI_ADDRESS_KIND), DIMENSION(*) OMPI_ASYNCHRONOUS :: base + OMPI_FORTRAN_IGNORE_TKR_TYPE, INTENT(IN) :: base INTEGER, INTENT(OUT) :: ierror end subroutine ompi_free_mem_f @@ -2128,7 +2152,8 @@ end subroutine ompi_comm_spawn_f ! TODO - FIXME to use arrays of strings and pass strlen subroutine ompi_comm_spawn_multiple_f(count,array_of_commands, & array_of_argv, array_of_maxprocs,array_of_info,root, & - comm,intercomm,array_of_errcodes,ierror) & + comm,intercomm,array_of_errcodes,ierror, & + cmd_len, argv_len) & BIND(C, name="ompi_comm_spawn_multiple_f") use, intrinsic :: ISO_C_BINDING, only : C_CHAR implicit none @@ -2140,6 +2165,7 @@ subroutine ompi_comm_spawn_multiple_f(count,array_of_commands, & INTEGER, INTENT(OUT) :: intercomm INTEGER, INTENT(OUT) :: array_of_errcodes(*) INTEGER, INTENT(OUT) :: ierror + INTEGER, VALUE, INTENT(IN) :: cmd_len, argv_len end subroutine ompi_comm_spawn_multiple_f subroutine ompi_lookup_name_f(service_name,info,port_name,ierror, & @@ -2386,7 +2412,7 @@ subroutine ompi_win_attach_f(win,base,size,ierror) & implicit none OMPI_FORTRAN_IGNORE_TKR_TYPE, INTENT(IN) :: base INTEGER(MPI_ADDRESS_KIND), INTENT(IN) :: size - INTEGER, INTENT(OUT) :: win + INTEGER, INTENT(IN) :: win INTEGER, INTENT(OUT) :: ierror end subroutine ompi_win_attach_f @@ -2395,7 +2421,7 @@ subroutine ompi_win_detach_f(win,base,ierror) & use :: mpi_f08_types, only : MPI_ADDRESS_KIND implicit none OMPI_FORTRAN_IGNORE_TKR_TYPE, INTENT(IN) :: base - INTEGER, INTENT(OUT) :: win + INTEGER, INTENT(IN) :: win INTEGER, INTENT(OUT) :: ierror end subroutine ompi_win_detach_f @@ -2727,6 +2753,30 @@ subroutine ompi_file_iread_at_f(fh,offset,buf,count,datatype,request,ierror) & INTEGER, INTENT(OUT) :: ierror end subroutine ompi_file_iread_at_f +subroutine ompi_file_iread_all_f(fh,buf,count,datatype,request,ierror) & + BIND(C, name="ompi_file_iread_all_f") + implicit none + INTEGER, INTENT(IN) :: fh + OMPI_FORTRAN_IGNORE_TKR_TYPE, INTENT(IN) :: buf + INTEGER, INTENT(IN) :: count + INTEGER, INTENT(IN) :: datatype + INTEGER, INTENT(OUT) :: request + INTEGER, INTENT(OUT) :: ierror +end subroutine ompi_file_iread_all_f + +subroutine ompi_file_iread_at_all_f(fh,offset,buf,count,datatype,request,ierror) & + BIND(C, name="ompi_file_iread_at_all_f") + use :: mpi_f08_types, only : MPI_OFFSET_KIND + implicit none + INTEGER, INTENT(IN) :: fh + INTEGER(MPI_OFFSET_KIND), INTENT(IN) :: offset + OMPI_FORTRAN_IGNORE_TKR_TYPE, INTENT(IN) :: buf + INTEGER, INTENT(IN) :: count + INTEGER, INTENT(IN) :: datatype + INTEGER, INTENT(OUT) :: request + INTEGER, INTENT(OUT) :: ierror +end subroutine ompi_file_iread_at_all_f + subroutine ompi_file_iread_shared_f(fh,buf,count,datatype,request,ierror) & BIND(C, name="ompi_file_iread_shared_f") implicit none @@ -2762,6 +2812,30 @@ subroutine ompi_file_iwrite_at_f(fh,offset,buf,count,datatype,request,ierror) & INTEGER, INTENT(OUT) :: ierror end subroutine ompi_file_iwrite_at_f +subroutine ompi_file_iwrite_all_f(fh,buf,count,datatype,request,ierror) & + BIND(C, name="ompi_file_iwrite_all_f") + implicit none + INTEGER, INTENT(IN) :: fh + OMPI_FORTRAN_IGNORE_TKR_TYPE, INTENT(IN) :: buf + INTEGER, INTENT(IN) :: count + INTEGER, INTENT(IN) :: datatype + INTEGER, INTENT(OUT) :: request + INTEGER, INTENT(OUT) :: ierror +end subroutine ompi_file_iwrite_all_f + +subroutine ompi_file_iwrite_at_all_f(fh,offset,buf,count,datatype,request,ierror) & + BIND(C, name="ompi_file_iwrite_at_all_f") + use :: mpi_f08_types, only : MPI_OFFSET_KIND + implicit none + INTEGER, INTENT(IN) :: fh + INTEGER(MPI_OFFSET_KIND), INTENT(IN) :: offset + OMPI_FORTRAN_IGNORE_TKR_TYPE, INTENT(IN) :: buf + INTEGER, INTENT(IN) :: count + INTEGER, INTENT(IN) :: datatype + INTEGER, INTENT(OUT) :: request + INTEGER, INTENT(OUT) :: ierror +end subroutine ompi_file_iwrite_at_all_f + subroutine ompi_file_iwrite_shared_f(fh,buf,count,datatype,request,ierror) & BIND(C, name="ompi_file_iwrite_shared_f") implicit none diff --git a/ompi/mpi/fortran/use-mpi-f08/mpi-f08-callbacks.F90 b/ompi/mpi/fortran/use-mpi-f08/mpi-f08-callbacks.F90 new file mode 100644 index 00000000000..d992702ef2e --- /dev/null +++ b/ompi/mpi/fortran/use-mpi-f08/mpi-f08-callbacks.F90 @@ -0,0 +1,142 @@ +! -*- f90 -*- +! Copyright (c) 2016 Research Organization for Information Science +! and Technology (RIST). All rights reserved. +! $COPYRIGHT$ + +#include "ompi/mpi/fortran/configure-fortran-output.h" + +module mpi_f08_callbacks + +! MPI3.1, p270, 5-19 + +contains + +subroutine MPI_COMM_DUP_FN(oldcomm,comm_keyval,extra_state, & + attribute_val_in,attribute_val_out,flag,ierror) + use mpi_f08_types + implicit none + type(MPI_Comm) :: oldcomm + integer :: comm_keyval, ierror + integer(kind=MPI_ADDRESS_KIND) :: extra_state, attribute_val_in, attribute_val_out + logical :: flag + + flag = .true. + attribute_val_out = attribute_val_in + ierror = MPI_SUCCESS +end subroutine + +subroutine MPI_COMM_NULL_COPY_FN(oldcomm,comm_keyval,extra_state, & + attribute_val_in,attribute_val_out,flag,ierror) + use mpi_f08_types + implicit none + type(MPI_Comm) :: oldcomm + integer :: comm_keyval, ierror + integer(kind=MPI_ADDRESS_KIND) :: extra_state, attribute_val_in, attribute_val_out + logical :: flag + + flag = .false. + ierror = MPI_SUCCESS +end subroutine + +subroutine MPI_COMM_NULL_DELETE_FN(comm,comm_keyval, & + attribute_val, extra_state, ierror) + use mpi_f08_types + implicit none + type(MPI_Comm) :: comm + integer :: comm_keyval, ierror + integer(kind=MPI_ADDRESS_KIND) :: attribute_val, extra_state + + ierror = MPI_SUCCESS +end subroutine + +subroutine MPI_TYPE_DUP_FN(oldtype,type_keyval,extra_state, & + attribute_val_in,attribute_val_out,flag,ierror) + use mpi_f08_types + implicit none + type(MPI_Datatype) :: oldtype + integer :: type_keyval, ierror + integer(kind=MPI_ADDRESS_KIND) :: extra_state, attribute_val_in, attribute_val_out + logical :: flag + + flag = .true. + attribute_val_out = attribute_val_in + ierror = MPI_SUCCESS +end subroutine + +subroutine MPI_TYPE_NULL_COPY_FN(oldtype,type_keyval,extra_state, & + attribute_val_in,attribute_val_out,flag,ierror) + use mpi_f08_types + implicit none + type(MPI_Datatype) :: oldtype + integer :: type_keyval, ierror + integer(kind=MPI_ADDRESS_KIND) :: extra_state, attribute_val_in, attribute_val_out + logical :: flag + + flag = .false. + ierror = MPI_SUCCESS +end subroutine + +subroutine MPI_TYPE_NULL_DELETE_FN(datatype,type_keyval, & + attribute_val, extra_state, ierror) + use mpi_f08_types + implicit none + type(MPI_Datatype) :: datatype + integer :: type_keyval, ierror + integer(kind=MPI_ADDRESS_KIND) :: attribute_val, extra_state + + ierror = MPI_SUCCESS +end subroutine + +subroutine MPI_WIN_DUP_FN(oldwin,win_keyval,extra_state, & + attribute_val_in,attribute_val_out,flag,ierror) + use mpi_f08_types + implicit none + type(MPI_Win) :: oldwin + integer :: win_keyval, ierror + integer(kind=MPI_ADDRESS_KIND) :: extra_state, attribute_val_in, attribute_val_out + logical :: flag + + flag = .true. + attribute_val_out = attribute_val_in + ierror = MPI_SUCCESS +end subroutine + +subroutine MPI_WIN_NULL_COPY_FN(oldwin,win_keyval,extra_state, & + attribute_val_in,attribute_val_out,flag,ierror) + use mpi_f08_types + implicit none + type(MPI_Win) :: oldwin + integer :: win_keyval, ierror + integer(kind=MPI_ADDRESS_KIND) :: extra_state, attribute_val_in, attribute_val_out + logical :: flag + + flag = .false. + ierror = MPI_SUCCESS +end subroutine + +subroutine MPI_WIN_NULL_DELETE_FN(win,win_keyval, & + attribute_val, extra_state, ierror) + use mpi_f08_types + implicit none + type(MPI_Win) :: win + integer :: win_keyval, ierror + integer(kind=MPI_ADDRESS_KIND) :: attribute_val, extra_state + + ierror = MPI_SUCCESS +end subroutine + +subroutine MPI_CONVERSION_FN_NULL(userbuf, datatype, count, & + filebuf, position, extra_state, ierror) + use, intrinsic :: iso_c_binding, only : c_ptr + use mpi_f08_types + implicit none + type(c_ptr), value :: userbuf, filebuf + type(MPI_Datatype) :: datatype + integer :: count, ierror + integer(kind=MPI_OFFSET_KIND) :: position + integer(kind=MPI_ADDRESS_KIND) :: extra_state + + ! Do nothing +end subroutine + +end module mpi_f08_callbacks diff --git a/ompi/mpi/fortran/use-mpi-f08/mpi-f08-interfaces-callbacks.F90 b/ompi/mpi/fortran/use-mpi-f08/mpi-f08-interfaces-callbacks.F90 index 05a19509495..47801afefe3 100644 --- a/ompi/mpi/fortran/use-mpi-f08/mpi-f08-interfaces-callbacks.F90 +++ b/ompi/mpi/fortran/use-mpi-f08/mpi-f08-interfaces-callbacks.F90 @@ -2,6 +2,8 @@ ! Copyright (c) 2009-2013 Cisco Systems, Inc. All rights reserved. ! Copyright (c) 2009-2012 Los Alamos National Security, LLC. ! All rights reserved. +! Copyright (c) 2015-2016 Research Organization for Information Science +! and Technology (RIST). All rights reserved. ! $COPYRIGHT$ #include "ompi/mpi/fortran/configure-fortran-output.h" diff --git a/ompi/mpi/fortran/use-mpi-f08/mpi-f08-interfaces.F90 b/ompi/mpi/fortran/use-mpi-f08/mpi-f08-interfaces.F90 index 1cabdff5f29..0ef30a25139 100644 --- a/ompi/mpi/fortran/use-mpi-f08/mpi-f08-interfaces.F90 +++ b/ompi/mpi/fortran/use-mpi-f08/mpi-f08-interfaces.F90 @@ -1,7 +1,7 @@ ! -*- f90 -*- ! -! Copyright (c) 2009-2014 Cisco Systems, Inc. All rights reserved. -! Copyright (c) 2009-2013 Los Alamos National Security, LLC. +! Copyright (c) 2009-2015 Cisco Systems, Inc. All rights reserved. +! Copyright (c) 2009-2015 Los Alamos National Security, LLC. ! All rights reserved. ! Copyright (c) 2012 The University of Tennessee and The University ! of Tennessee Research Foundation. All rights @@ -71,13 +71,9 @@ end subroutine MPI_Buffer_attach_f08 interface MPI_Buffer_detach subroutine MPI_Buffer_detach_f08(buffer_addr,size,ierror) + USE, INTRINSIC :: ISO_C_BINDING, ONLY : C_PTR implicit none - !DEC$ ATTRIBUTES NO_ARG_CHECK :: buffer_addr - !GCC$ ATTRIBUTES NO_ARG_CHECK :: buffer_addr - !$PRAGMA IGNORE_TKR buffer_addr - !DIR$ IGNORE_TKR buffer_addr - !IBM* IGNORE_TKR buffer_addr - OMPI_FORTRAN_IGNORE_TKR_TYPE :: buffer_addr + TYPE(C_PTR), INTENT(OUT) :: buffer_addr INTEGER, INTENT(OUT) :: size INTEGER, OPTIONAL, INTENT(OUT) :: ierror end subroutine MPI_Buffer_detach_f08 @@ -2482,6 +2478,26 @@ function MPI_Wtime_f08( ) BIND(C,name="MPI_Wtime") end function MPI_Wtime_f08 end interface MPI_Wtime +interface MPI_Aint_add +function MPI_Aint_add_f08(base,diff) + use :: mpi_f08_types, only : MPI_ADDRESS_KIND + implicit none + INTEGER(MPI_ADDRESS_KIND) :: base + INTEGER(MPI_ADDRESS_KIND) :: diff + INTEGER(MPI_ADDRESS_KIND) :: MPI_Aint_add_f08 +end function MPI_Aint_add_f08 +end interface MPI_Aint_add + +interface MPI_Aint_diff +function MPI_Aint_diff_f08(addr1,addr2) + use :: mpi_f08_types, only : MPI_ADDRESS_KIND + implicit none + INTEGER(MPI_ADDRESS_KIND) :: addr1 + INTEGER(MPI_ADDRESS_KIND) :: addr2 + INTEGER(MPI_ADDRESS_KIND) :: MPI_Aint_diff_f08 +end function MPI_Aint_diff_f08 +end interface MPI_Aint_diff + interface MPI_Abort subroutine MPI_Abort_f08(comm,errorcode,ierror) use :: mpi_f08_types, only : MPI_Comm @@ -2666,15 +2682,13 @@ end subroutine MPI_Finalized_f08 ! be okay once the Interop TR is implemented. interface MPI_Free_mem subroutine MPI_Free_mem_f08(base,ierror) - use :: mpi_f08_types, only : MPI_ADDRESS_KIND implicit none !DEC$ ATTRIBUTES NO_ARG_CHECK :: base !GCC$ ATTRIBUTES NO_ARG_CHECK :: base !$PRAGMA IGNORE_TKR base !DIR$ IGNORE_TKR base !IBM* IGNORE_TKR base -! INTEGER(MPI_ADDRESS_KIND), DIMENSION(*) OMPI_ASYNCHRONOUS :: base - INTEGER(MPI_ADDRESS_KIND), DIMENSION(*) :: base + OMPI_FORTRAN_IGNORE_TKR_TYPE, INTENT(IN) :: base INTEGER, OPTIONAL, INTENT(OUT) :: ierror end subroutine MPI_Free_mem_f08 end interface MPI_Free_mem @@ -3247,7 +3261,7 @@ subroutine MPI_Win_attach_f08(win,base,size,ierror) !IBM* IGNORE_TKR base OMPI_FORTRAN_IGNORE_TKR_TYPE :: base INTEGER(MPI_ADDRESS_KIND), INTENT(IN) :: size - TYPE(MPI_Win), INTENT(OUT) :: win + TYPE(MPI_Win), INTENT(IN) :: win INTEGER, OPTIONAL, INTENT(OUT) :: ierror end subroutine MPI_Win_attach_f08 end interface MPI_Win_attach @@ -3262,7 +3276,7 @@ subroutine MPI_Win_detach_f08(win,base,ierror) !DIR$ IGNORE_TKR base !IBM* IGNORE_TKR base OMPI_FORTRAN_IGNORE_TKR_TYPE :: base - TYPE(MPI_Win), INTENT(OUT) :: win + TYPE(MPI_Win), INTENT(IN) :: win INTEGER, OPTIONAL, INTENT(OUT) :: ierror end subroutine MPI_Win_detach_f08 end interface MPI_Win_detach @@ -3419,14 +3433,14 @@ subroutine MPI_Win_flush_local_f08(rank,win,ierror) end subroutine MPI_Win_flush_local_f08 end interface MPI_Win_flush_local -interface MPI_Win_flush_all_local -subroutine MPI_Win_flush_all_local_f08(win,ierror) +interface MPI_Win_flush_local_all +subroutine MPI_Win_flush_local_all_f08(win,ierror) use :: mpi_f08_types, only : MPI_Win implicit none TYPE(MPI_Win), INTENT(IN) :: win INTEGER, OPTIONAL, INTENT(OUT) :: ierror -end subroutine MPI_Win_flush_all_local_f08 -end interface MPI_Win_flush_all_local +end subroutine MPI_Win_flush_local_all_f08 +end interface MPI_Win_flush_local_all interface MPI_Win_flush_all subroutine MPI_Win_flush_all_f08(win,ierror) @@ -3683,6 +3697,43 @@ subroutine MPI_File_iread_at_f08(fh,offset,buf,count,datatype,request,ierror) end subroutine MPI_File_iread_at_f08 end interface MPI_File_iread_at +interface MPI_File_iread_all +subroutine MPI_File_iread_all_f08(fh,buf,count,datatype,request,ierror) + use :: mpi_f08_types, only : MPI_File, MPI_Datatype, MPI_Request + implicit none + TYPE(MPI_File), INTENT(IN) :: fh + !DEC$ ATTRIBUTES NO_ARG_CHECK :: buf + !GCC$ ATTRIBUTES NO_ARG_CHECK :: buf + !$PRAGMA IGNORE_TKR buf + !DIR$ IGNORE_TKR buf + !IBM* IGNORE_TKR buf + OMPI_FORTRAN_IGNORE_TKR_TYPE :: buf + INTEGER, INTENT(IN) :: count + TYPE(MPI_Datatype), INTENT(IN) :: datatype + TYPE(MPI_Request), INTENT(OUT) :: request + INTEGER, OPTIONAL, INTENT(OUT) :: ierror +end subroutine MPI_File_iread_all_f08 +end interface MPI_File_iread_all + +interface MPI_File_iread_at_all +subroutine MPI_File_iread_at_all_f08(fh,offset,buf,count,datatype,request,ierror) + use :: mpi_f08_types, only : MPI_File, MPI_Datatype, MPI_Request, MPI_OFFSET_KIND + implicit none + TYPE(MPI_File), INTENT(IN) :: fh + INTEGER(MPI_OFFSET_KIND), INTENT(IN) :: offset + !DEC$ ATTRIBUTES NO_ARG_CHECK :: buf + !GCC$ ATTRIBUTES NO_ARG_CHECK :: buf + !$PRAGMA IGNORE_TKR buf + !DIR$ IGNORE_TKR buf + !IBM* IGNORE_TKR buf + OMPI_FORTRAN_IGNORE_TKR_TYPE :: buf + INTEGER, INTENT(IN) :: count + TYPE(MPI_Datatype), INTENT(IN) :: datatype + TYPE(MPI_Request), INTENT(OUT) :: request + INTEGER, OPTIONAL, INTENT(OUT) :: ierror +end subroutine MPI_File_iread_at_all_f08 +end interface MPI_File_iread_at_all + interface MPI_File_iread_shared subroutine MPI_File_iread_shared_f08(fh,buf,count,datatype,request,ierror) use :: mpi_f08_types, only : MPI_File, MPI_Datatype, MPI_Request @@ -3738,6 +3789,43 @@ subroutine MPI_File_iwrite_at_f08(fh,offset,buf,count,datatype,request,ierror) end subroutine MPI_File_iwrite_at_f08 end interface MPI_File_iwrite_at +interface MPI_File_iwrite_all +subroutine MPI_File_iwrite_all_f08(fh,buf,count,datatype,request,ierror) + use :: mpi_f08_types, only : MPI_File, MPI_Datatype, MPI_Request + implicit none + TYPE(MPI_File), INTENT(IN) :: fh + !DEC$ ATTRIBUTES NO_ARG_CHECK :: buf + !GCC$ ATTRIBUTES NO_ARG_CHECK :: buf + !$PRAGMA IGNORE_TKR buf + !DIR$ IGNORE_TKR buf + !IBM* IGNORE_TKR buf + OMPI_FORTRAN_IGNORE_TKR_TYPE, INTENT(IN) :: buf + INTEGER, INTENT(IN) :: count + TYPE(MPI_Datatype), INTENT(IN) :: datatype + TYPE(MPI_Request), INTENT(OUT) :: request + INTEGER, OPTIONAL, INTENT(OUT) :: ierror +end subroutine MPI_File_iwrite_all_f08 +end interface MPI_File_iwrite_all + +interface MPI_File_iwrite_at_all +subroutine MPI_File_iwrite_at_all_f08(fh,offset,buf,count,datatype,request,ierror) + use :: mpi_f08_types, only : MPI_File, MPI_Datatype, MPI_Request, MPI_OFFSET_KIND + implicit none + TYPE(MPI_File), INTENT(IN) :: fh + INTEGER(MPI_OFFSET_KIND), INTENT(IN) :: offset + !DEC$ ATTRIBUTES NO_ARG_CHECK :: buf + !GCC$ ATTRIBUTES NO_ARG_CHECK :: buf + !$PRAGMA IGNORE_TKR buf + !DIR$ IGNORE_TKR buf + !IBM* IGNORE_TKR buf + OMPI_FORTRAN_IGNORE_TKR_TYPE, INTENT(IN) :: buf + INTEGER, INTENT(IN) :: count + TYPE(MPI_Datatype), INTENT(IN) :: datatype + TYPE(MPI_Request), INTENT(OUT) :: request + INTEGER, OPTIONAL, INTENT(OUT) :: ierror +end subroutine MPI_File_iwrite_at_all_f08 +end interface MPI_File_iwrite_at_all + interface MPI_File_iwrite_shared subroutine MPI_File_iwrite_shared_f08(fh,buf,count,datatype,request,ierror) use :: mpi_f08_types, only : MPI_File, MPI_Datatype, MPI_Request diff --git a/ompi/mpi/fortran/use-mpi-f08/mpi-f08-types.F90 b/ompi/mpi/fortran/use-mpi-f08/mpi-f08-types.F90 index 43fcf5301e8..622d0ae574b 100644 --- a/ompi/mpi/fortran/use-mpi-f08/mpi-f08-types.F90 +++ b/ompi/mpi/fortran/use-mpi-f08/mpi-f08-types.F90 @@ -1,8 +1,10 @@ ! -*- f90 -*- ! -! Copyright (c) 2009-2014 Cisco Systems, Inc. All rights reserved. +! Copyright (c) 2009-2015 Cisco Systems, Inc. All rights reserved. ! Copyright (c) 2009-2012 Los Alamos National Security, LLC. ! All rights reserved. +! Copyright (c) 2015 Research Organization for Information Science +! and Technology (RIST). All rights reserved. ! $COPYRIGHT$ ! ! This file creates mappings between MPI C types (e.g., MPI_Comm) and @@ -129,6 +131,7 @@ module mpi_f08_types ! They are defined in ompi/runtime/ompi_mpi_init.c ! + type(MPI_Datatype), bind(C, name="ompi_f08_mpi_aint") OMPI_PROTECTED :: MPI_AINT type(MPI_Datatype), bind(C, name="ompi_f08_mpi_byte") OMPI_PROTECTED :: MPI_BYTE type(MPI_Datatype), bind(C, name="ompi_f08_mpi_packed") OMPI_PROTECTED :: MPI_PACKED type(MPI_Datatype), bind(C, name="ompi_f08_mpi_ub") OMPI_PROTECTED :: MPI_UB @@ -164,15 +167,7 @@ module mpi_f08_types !... Special sentinel constants !------------------------------ - type(MPI_STATUS), bind(C, name="mpi_fortran_status_ignore") :: MPI_STATUS_IGNORE - type(MPI_STATUS), bind(C, name="mpi_fortran_statuses_ignore") :: MPI_STATUSES_IGNORE(1) - integer, bind(C, name="mpi_fortran_bottom") :: MPI_BOTTOM - integer, bind(C, name="mpi_fortran_in_place") :: MPI_IN_PLACE - integer, bind(C, name="mpi_fortran_argv_null") :: MPI_ARGV_NULL - integer, bind(C, name="mpi_fortran_argvs_null") :: MPI_ARGVS_NULL - integer, bind(C, name="mpi_fortran_errcodes_ignore") :: MPI_ERRCODES_IGNORE - integer, bind(C, name="mpi_fortran_unweighted") :: MPI_UNWEIGHTED - integer, bind(C, name="mpi_fortran_weights_empty") :: MPI_WEIGHTS_EMPTY +#include "mpif-f08-types.h" !... Interfaces for operators with handles !----------------------------------------- diff --git a/ompi/mpi/fortran/use-mpi-f08/mpi-f08.F90 b/ompi/mpi/fortran/use-mpi-f08/mpi-f08.F90 index 180f4fc84ae..43b6cb09109 100644 --- a/ompi/mpi/fortran/use-mpi-f08/mpi-f08.F90 +++ b/ompi/mpi/fortran/use-mpi-f08/mpi-f08.F90 @@ -13,10 +13,12 @@ ! Copyright (c) 2006-2014 Cisco Systems, Inc. All rights reserved. ! Copyright (c) 2009-2012 Los Alamos National Security, LLC. ! All rights reserved. +! Copyright (c) 2016 Research Organization for Information Science +! and Technology (RIST). All rights reserved. ! $COPYRIGHT$ -! +! ! Additional copyrights may follow -! +! ! $HEADER$ ! @@ -27,6 +29,7 @@ module mpi_f08 use mpi_f08_types use mpi_f08_interfaces ! this module contains the mpi_f08 interface declarations use pmpi_f08_interfaces ! this module contains the pmpi_f08 interface declarations + use mpi_f08_callbacks ! this module contains the mpi_f08 attribute callback subroutines ! ! Declaration of the interfaces to the ompi impl files @@ -35,14 +38,6 @@ module mpi_f08 #include "mpi-f-interfaces-bind.h" #include "pmpi-f-interfaces-bind.h" -! The MPI attribute callback functions - - include "attr-fn-f08-callback-interfaces.h" - -! The MPI_CONVERSION_FN_NULL function - - include "conversion-fn-null-f08-interface.h" - ! The sizeof interfaces include "sizeof_f08.h" diff --git a/ompi/mpi/fortran/use-mpi-f08/pmpi-f-interfaces-bind.h b/ompi/mpi/fortran/use-mpi-f08/pmpi-f-interfaces-bind.h index 7a8b5b1a64f..695ff644046 100644 --- a/ompi/mpi/fortran/use-mpi-f08/pmpi-f-interfaces-bind.h +++ b/ompi/mpi/fortran/use-mpi-f08/pmpi-f-interfaces-bind.h @@ -1,12 +1,14 @@ ! -*- f90 -*- ! -! Copyright (c) 2009-2014 Cisco Systems, Inc. All rights reserved. +! Copyright (c) 2009-2015 Cisco Systems, Inc. All rights reserved. ! Copyright (c) 2009-2012 Los Alamos National Security, LLC. ! All rights reserved. ! Copyright (c) 2012 The University of Tennessee and The University ! of Tennessee Research Foundation. All rights ! reserved. ! Copyright (c) 2012 Inria. All rights reserved. +! Copyright (c) 2015 Research Organization for Information Science +! and Technology (RIST). All rights reserved. ! $COPYRIGHT$ ! ! This file provides the interface specifications for the MPI Fortran @@ -89,8 +91,15 @@ subroutine pompi_buffer_attach_f(buffer,size,ierror) & INTEGER, INTENT(OUT) :: ierror end subroutine pompi_buffer_attach_f +! Note that we have an F08-specific C implementation function for +! PMPI_BUFFER_DETACH (i.e., it is different than the mpif.h / mpi +! module C implementation function). +! +! Note, too, the "p" version of the C implementation +! function is a 1-line routine. It calls +! ompi_buffer_detach_f08 C function. subroutine pompi_buffer_detach_f(buffer_addr,size,ierror) & - BIND(C, name="pompi_buffer_detach_f") + BIND(C, name="pompi_buffer_detach_f08") implicit none OMPI_FORTRAN_IGNORE_TKR_TYPE, INTENT(IN) :: buffer_addr INTEGER, INTENT(OUT) :: size @@ -1408,7 +1417,7 @@ subroutine pompi_type_set_name_f(type,type_name,ierror,type_name_len) & end subroutine pompi_type_set_name_f subroutine pompi_win_allocate_f(size, disp_unit, info, comm, & - baseptr, win, ierror) BIND(C, name="ompi_win_allocate_f") + baseptr, win, ierror) BIND(C, name="pompi_win_allocate_f") USE, INTRINSIC :: ISO_C_BINDING, ONLY : C_PTR use :: mpi_f08_types, only : MPI_ADDRESS_KIND INTEGER(KIND=MPI_ADDRESS_KIND), INTENT(IN) :: size @@ -1421,7 +1430,7 @@ subroutine pompi_win_allocate_f(size, disp_unit, info, comm, & end subroutine pompi_win_allocate_f subroutine pompi_win_allocate_shared_f(size, disp_unit, info, comm, & - baseptr, win, ierror) BIND(C, name="ompi_win_allocate_shared_f") + baseptr, win, ierror) BIND(C, name="pompi_win_allocate_shared_f") USE, INTRINSIC :: ISO_C_BINDING, ONLY : C_PTR use :: mpi_f08_types, only : MPI_ADDRESS_KIND INTEGER(KIND=MPI_ADDRESS_KIND), INTENT(IN) :: size @@ -1615,6 +1624,24 @@ end subroutine pompi_topo_test_f ! DOUBLE PRECISION :: MPI_Wtime_f !end function MPI_Wtime_f +function pompi_aint_add_f(base,diff) & + BIND(C, name="pompi_aint_add_f") + use :: mpi_f08_types, only : MPI_ADDRESS_KIND + implicit none + INTEGER(MPI_ADDRESS_KIND), INTENT(IN) :: base + INTEGER(MPI_ADDRESS_KIND), INTENT(IN) :: diff + INTEGER(MPI_ADDRESS_KIND) :: pompi_aint_add_f +end function pompi_aint_add_f + +function pompi_aint_diff_f(addr1,addr2) & + BIND(C, name="pompi_aint_diff_f") + use :: mpi_f08_types, only : MPI_ADDRESS_KIND + implicit none + INTEGER(MPI_ADDRESS_KIND), INTENT(IN) :: addr1 + INTEGER(MPI_ADDRESS_KIND), INTENT(IN) :: addr2 + INTEGER(MPI_ADDRESS_KIND) :: pompi_aint_diff_f +end function pompi_aint_diff_f + subroutine pompi_abort_f(comm,errorcode,ierror) & BIND(C, name="pompi_abort_f") implicit none @@ -1764,9 +1791,8 @@ end subroutine pompi_finalize_f subroutine pompi_free_mem_f(base,ierror) & BIND(C, name="pompi_free_mem_f") - use :: mpi_f08_types, only : MPI_ADDRESS_KIND implicit none - INTEGER(MPI_ADDRESS_KIND), DIMENSION(*) OMPI_ASYNCHRONOUS :: base + OMPI_FORTRAN_IGNORE_TKR_TYPE, INTENT(IN) :: base INTEGER, INTENT(OUT) :: ierror end subroutine pompi_free_mem_f @@ -1963,7 +1989,8 @@ end subroutine pompi_comm_spawn_f ! TODO - FIXME to use arrays of strings and pass strlen subroutine pompi_comm_spawn_multiple_f(count,array_of_commands, & array_of_argv, array_of_maxprocs,array_of_info,root, & - comm,intercomm,array_of_errcodes,ierror) & + comm,intercomm,array_of_errcodes,ierror, & + cmd_len, argv_len) & BIND(C, name="pompi_comm_spawn_multiple_f") use, intrinsic :: ISO_C_BINDING, only : C_CHAR implicit none @@ -1975,6 +2002,7 @@ subroutine pompi_comm_spawn_multiple_f(count,array_of_commands, & INTEGER, INTENT(OUT) :: intercomm INTEGER, INTENT(OUT) :: array_of_errcodes(*) INTEGER, INTENT(OUT) :: ierror + INTEGER, INTENT(IN) :: cmd_len, argv_len end subroutine pompi_comm_spawn_multiple_f subroutine pompi_lookup_name_f(service_name,info,port_name,ierror, & @@ -2508,6 +2536,30 @@ subroutine pompi_file_iread_at_f(fh,offset,buf,count,datatype,request,ierror) & INTEGER, INTENT(OUT) :: ierror end subroutine pompi_file_iread_at_f +subroutine pompi_file_iread_all_f(fh,buf,count,datatype,request,ierror) & + BIND(C, name="pompi_file_iread_all_f") + implicit none + INTEGER, INTENT(IN) :: fh + OMPI_FORTRAN_IGNORE_TKR_TYPE, INTENT(IN) :: buf + INTEGER, INTENT(IN) :: count + INTEGER, INTENT(IN) :: datatype + INTEGER, INTENT(OUT) :: request + INTEGER, INTENT(OUT) :: ierror +end subroutine pompi_file_iread_all_f + +subroutine pompi_file_iread_at_all_f(fh,offset,buf,count,datatype,request,ierror) & + BIND(C, name="pompi_file_iread_at_all_f") + use :: mpi_f08_types, only : MPI_OFFSET_KIND + implicit none + INTEGER, INTENT(IN) :: fh + INTEGER(MPI_OFFSET_KIND), INTENT(IN) :: offset + OMPI_FORTRAN_IGNORE_TKR_TYPE, INTENT(IN) :: buf + INTEGER, INTENT(IN) :: count + INTEGER, INTENT(IN) :: datatype + INTEGER, INTENT(OUT) :: request + INTEGER, INTENT(OUT) :: ierror +end subroutine pompi_file_iread_at_all_f + subroutine pompi_file_iread_shared_f(fh,buf,count,datatype,request,ierror) & BIND(C, name="pompi_file_iread_shared_f") implicit none @@ -2543,6 +2595,30 @@ subroutine pompi_file_iwrite_at_f(fh,offset,buf,count,datatype,request,ierror) & INTEGER, INTENT(OUT) :: ierror end subroutine pompi_file_iwrite_at_f +subroutine pompi_file_iwrite_all_f(fh,buf,count,datatype,request,ierror) & + BIND(C, name="pompi_file_iwrite_all_f") + implicit none + INTEGER, INTENT(IN) :: fh + OMPI_FORTRAN_IGNORE_TKR_TYPE, INTENT(IN) :: buf + INTEGER, INTENT(IN) :: count + INTEGER, INTENT(IN) :: datatype + INTEGER, INTENT(OUT) :: request + INTEGER, INTENT(OUT) :: ierror +end subroutine pompi_file_iwrite_all_f + +subroutine pompi_file_iwrite_at_all_f(fh,offset,buf,count,datatype,request,ierror) & + BIND(C, name="pompi_file_iwrite_at_all_f") + use :: mpi_f08_types, only : MPI_OFFSET_KIND + implicit none + INTEGER, INTENT(IN) :: fh + INTEGER(MPI_OFFSET_KIND), INTENT(IN) :: offset + OMPI_FORTRAN_IGNORE_TKR_TYPE, INTENT(IN) :: buf + INTEGER, INTENT(IN) :: count + INTEGER, INTENT(IN) :: datatype + INTEGER, INTENT(OUT) :: request + INTEGER, INTENT(OUT) :: ierror +end subroutine pompi_file_iwrite_at_all_f + subroutine pompi_file_iwrite_shared_f(fh,buf,count,datatype,request,ierror) & BIND(C, name="pompi_file_iwrite_shared_f") implicit none diff --git a/ompi/mpi/fortran/use-mpi-f08/pmpi-f08-interfaces.F90 b/ompi/mpi/fortran/use-mpi-f08/pmpi-f08-interfaces.F90 index 4e9606a2b8a..99c6306d58d 100644 --- a/ompi/mpi/fortran/use-mpi-f08/pmpi-f08-interfaces.F90 +++ b/ompi/mpi/fortran/use-mpi-f08/pmpi-f08-interfaces.F90 @@ -1,12 +1,14 @@ ! -*- f90 -*- ! -! Copyright (c) 2009-2014 Cisco Systems, Inc. All rights reserved. +! Copyright (c) 2009-2015 Cisco Systems, Inc. All rights reserved. ! Copyright (c) 2009-2013 Los Alamos National Security, LLC. ! All rights reserved. ! Copyright (c) 2012 The University of Tennessee and The University ! of Tennessee Research Foundation. All rights ! reserved. ! Copyright (c) 2012 Inria. All rights reserved. +! Copyright (c) 2015 Research Organization for Information Science +! and Technology (RIST). All rights reserved. ! $COPYRIGHT$ ! ! This file provides the interface specifications for the MPI Fortran @@ -2413,7 +2415,7 @@ end subroutine PMPI_Topo_test_f08 ! MPI_Wtick is not a wrapper function ! interface PMPI_Wtick -function PMPI_Wtick_f08( ) BIND(C,name="MPI_Wtick") +function PMPI_Wtick_f08( ) BIND(C,name="PMPI_Wtick") use, intrinsic :: ISO_C_BINDING implicit none DOUBLE PRECISION :: PMPI_Wtick_f08 @@ -2423,13 +2425,31 @@ end function PMPI_Wtick_f08 ! MPI_Wtime is not a wrapper function ! interface PMPI_Wtime -function PMPI_Wtime_f08( ) BIND(C,name="MPI_Wtime") +function PMPI_Wtime_f08( ) BIND(C,name="PMPI_Wtime") use, intrinsic :: ISO_C_BINDING implicit none DOUBLE PRECISION :: PMPI_Wtime_f08 end function PMPI_Wtime_f08 end interface PMPI_Wtime +interface PMPI_Aint_add +subroutine PMPI_Aint_add_f08(base,diff) + use :: mpi_f08_types, only : MPI_ADDRESS_KIND + implicit none + INTEGER(MPI_ADDRESS_KIND) :: base + INTEGER(MPI_ADDRESS_KIND) :: diff +end subroutine PMPI_Aint_add_f08 +end interface PMPI_Aint_add + +interface PMPI_Aint_diff +subroutine PMPI_Aint_diff_f08(addr1,addr2) + use :: mpi_f08_types, only : MPI_ADDRESS_KIND + implicit none + INTEGER(MPI_ADDRESS_KIND) :: addr1 + INTEGER(MPI_ADDRESS_KIND) :: addr2 +end subroutine PMPI_Aint_diff_f08 +end interface PMPI_Aint_diff + interface PMPI_Abort subroutine PMPI_Abort_f08(comm,errorcode,ierror) use :: mpi_f08_types, only : MPI_Comm @@ -2615,15 +2635,13 @@ end subroutine PMPI_Finalized_f08 ! interface PMPI_Free_mem subroutine PMPI_Free_mem_f08(base,ierror) - use :: mpi_f08_types, only : MPI_ADDRESS_KIND implicit none !DEC$ ATTRIBUTES NO_ARG_CHECK :: base !GCC$ ATTRIBUTES NO_ARG_CHECK :: base !$PRAGMA IGNORE_TKR base !DIR$ IGNORE_TKR base !IBM* IGNORE_TKR base -! INTEGER(MPI_ADDRESS_KIND), DIMENSION(*) OMPI_ASYNCHRONOUS :: base - INTEGER(MPI_ADDRESS_KIND), DIMENSION(*) :: base + OMPI_FORTRAN_IGNORE_TKR_TYPE, INTENT(IN) :: base INTEGER, OPTIONAL, INTENT(OUT) :: ierror end subroutine PMPI_Free_mem_f08 end interface PMPI_Free_mem @@ -3312,14 +3330,14 @@ subroutine PMPI_Win_flush_local_f08(rank,win,ierror) end subroutine PMPI_Win_flush_local_f08 end interface PMPI_Win_flush_local -interface PMPI_Win_flush_all_local -subroutine PMPI_Win_flush_all_local_f08(win,ierror) +interface PMPI_Win_flush_local_all +subroutine PMPI_Win_flush_local_all_f08(win,ierror) use :: mpi_f08_types, only : MPI_Win implicit none TYPE(MPI_Win), INTENT(IN) :: win INTEGER, OPTIONAL, INTENT(OUT) :: ierror -end subroutine PMPI_Win_flush_all_local_f08 -end interface PMPI_Win_flush_all_local +end subroutine PMPI_Win_flush_local_all_f08 +end interface PMPI_Win_flush_local_all interface PMPI_Win_flush_all subroutine PMPI_Win_flush_all_f08(win,ierror) @@ -3576,6 +3594,43 @@ subroutine PMPI_File_iread_at_f08(fh,offset,buf,count,datatype,request,ierror) end subroutine PMPI_File_iread_at_f08 end interface PMPI_File_iread_at +interface PMPI_File_iread_all +subroutine PMPI_File_iread_all_f08(fh,buf,count,datatype,request,ierror) + use :: mpi_f08_types, only : MPI_File, MPI_Datatype, MPI_Request + implicit none + TYPE(MPI_File), INTENT(IN) :: fh + !DEC$ ATTRIBUTES NO_ARG_CHECK :: buf + !GCC$ ATTRIBUTES NO_ARG_CHECK :: buf + !$PRAGMA IGNORE_TKR buf + !DIR$ IGNORE_TKR buf + !IBM* IGNORE_TKR buf + OMPI_FORTRAN_IGNORE_TKR_TYPE OMPI_ASYNCHRONOUS :: buf + INTEGER, INTENT(IN) :: count + TYPE(MPI_Datatype), INTENT(IN) :: datatype + TYPE(MPI_Request), INTENT(OUT) :: request + INTEGER, OPTIONAL, INTENT(OUT) :: ierror +end subroutine PMPI_File_iread_all_f08 +end interface PMPI_File_iread_all + +interface PMPI_File_iread_at_all +subroutine PMPI_File_iread_at_all_f08(fh,offset,buf,count,datatype,request,ierror) + use :: mpi_f08_types, only : MPI_File, MPI_Datatype, MPI_Request, MPI_OFFSET_KIND + implicit none + TYPE(MPI_File), INTENT(IN) :: fh + INTEGER(MPI_OFFSET_KIND), INTENT(IN) :: offset + !DEC$ ATTRIBUTES NO_ARG_CHECK :: buf + !GCC$ ATTRIBUTES NO_ARG_CHECK :: buf + !$PRAGMA IGNORE_TKR buf + !DIR$ IGNORE_TKR buf + !IBM* IGNORE_TKR buf + OMPI_FORTRAN_IGNORE_TKR_TYPE OMPI_ASYNCHRONOUS :: buf + INTEGER, INTENT(IN) :: count + TYPE(MPI_Datatype), INTENT(IN) :: datatype + TYPE(MPI_Request), INTENT(OUT) :: request + INTEGER, OPTIONAL, INTENT(OUT) :: ierror +end subroutine PMPI_File_iread_at_all_f08 +end interface PMPI_File_iread_at_all + interface PMPI_File_iread_shared subroutine PMPI_File_iread_shared_f08(fh,buf,count,datatype,request,ierror) use :: mpi_f08_types, only : MPI_File, MPI_Datatype, MPI_Request @@ -3631,6 +3686,43 @@ subroutine PMPI_File_iwrite_at_f08(fh,offset,buf,count,datatype,request,ierror) end subroutine PMPI_File_iwrite_at_f08 end interface PMPI_File_iwrite_at +interface PMPI_File_iwrite_all +subroutine PMPI_File_iwrite_all_f08(fh,buf,count,datatype,request,ierror) + use :: mpi_f08_types, only : MPI_File, MPI_Datatype, MPI_Request + implicit none + TYPE(MPI_File), INTENT(IN) :: fh + !DEC$ ATTRIBUTES NO_ARG_CHECK :: buf + !GCC$ ATTRIBUTES NO_ARG_CHECK :: buf + !$PRAGMA IGNORE_TKR buf + !DIR$ IGNORE_TKR buf + !IBM* IGNORE_TKR buf + OMPI_FORTRAN_IGNORE_TKR_TYPE, INTENT(IN) OMPI_ASYNCHRONOUS :: buf + INTEGER, INTENT(IN) :: count + TYPE(MPI_Datatype), INTENT(IN) :: datatype + TYPE(MPI_Request), INTENT(OUT) :: request + INTEGER, OPTIONAL, INTENT(OUT) :: ierror +end subroutine PMPI_File_iwrite_all_f08 +end interface PMPI_File_iwrite_all + +interface PMPI_File_iwrite_at_all +subroutine PMPI_File_iwrite_at_all_f08(fh,offset,buf,count,datatype,request,ierror) + use :: mpi_f08_types, only : MPI_File, MPI_Datatype, MPI_Request, MPI_OFFSET_KIND + implicit none + TYPE(MPI_File), INTENT(IN) :: fh + INTEGER(MPI_OFFSET_KIND), INTENT(IN) :: offset + !DEC$ ATTRIBUTES NO_ARG_CHECK :: buf + !GCC$ ATTRIBUTES NO_ARG_CHECK :: buf + !$PRAGMA IGNORE_TKR buf + !DIR$ IGNORE_TKR buf + !IBM* IGNORE_TKR buf + OMPI_FORTRAN_IGNORE_TKR_TYPE, INTENT(IN) OMPI_ASYNCHRONOUS :: buf + INTEGER, INTENT(IN) :: count + TYPE(MPI_Datatype), INTENT(IN) :: datatype + TYPE(MPI_Request), INTENT(OUT) :: request + INTEGER, OPTIONAL, INTENT(OUT) :: ierror +end subroutine PMPI_File_iwrite_at_all_f08 +end interface PMPI_File_iwrite_at_all + interface PMPI_File_iwrite_shared subroutine PMPI_File_iwrite_shared_f08(fh,buf,count,datatype,request,ierror) use :: mpi_f08_types, only : MPI_File, MPI_Datatype, MPI_Request diff --git a/ompi/mpi/fortran/use-mpi-f08/profile/paint_add_f08.F90 b/ompi/mpi/fortran/use-mpi-f08/profile/paint_add_f08.F90 new file mode 100644 index 00000000000..d07e786568e --- /dev/null +++ b/ompi/mpi/fortran/use-mpi-f08/profile/paint_add_f08.F90 @@ -0,0 +1,18 @@ +! -*- f90 -*- +! +! Copyright (c) 2010-2015 Cisco Systems, Inc. All rights reserved. +! Copyright (c) 2009-2015 Los Alamos National Security, LLC. +! All Rights reserved. +! $COPYRIGHT$ + +#include "ompi/mpi/fortran/configure-fortran-output.h" + +function PMPI_Aint_add_f08(base, diff) + use :: mpi_f08_types, only : MPI_ADDRESS_KIND + use :: mpi_f08, only : ompi_aint_add_f + implicit none + INTEGER(MPI_ADDRESS_KIND) :: PMPI_Aint_add_f08 + INTEGER(MPI_ADDRESS_KIND), INTENT(IN) :: base + INTEGER(MPI_ADDRESS_KIND), INTENT(IN) :: diff + PMPI_Aint_add_f08 = ompi_aint_add_f(base, diff) +end function PMPI_Aint_add_f08 diff --git a/ompi/mpi/fortran/use-mpi-f08/profile/paint_diff_f08.F90 b/ompi/mpi/fortran/use-mpi-f08/profile/paint_diff_f08.F90 new file mode 100644 index 00000000000..0d46dee769b --- /dev/null +++ b/ompi/mpi/fortran/use-mpi-f08/profile/paint_diff_f08.F90 @@ -0,0 +1,18 @@ +! -*- f90 -*- +! +! Copyright (c) 2010-2015 Cisco Systems, Inc. All rights reserved. +! Copyright (c) 2009-2015 Los Alamos National Security, LLC. +! All Rights reserved. +! $COPYRIGHT$ + +#include "ompi/mpi/fortran/configure-fortran-output.h" + +function PMPI_Aint_diff_f08(addr1, addr2) + use :: mpi_f08_types, only : MPI_ADDRESS_KIND + use :: mpi_f08, only : ompi_aint_diff_f + implicit none + INTEGER(MPI_ADDRESS_KIND) :: PMPI_Aint_diff_f08 + INTEGER(MPI_ADDRESS_KIND), INTENT(IN) :: addr1 + INTEGER(MPI_ADDRESS_KIND), INTENT(IN) :: addr2 + PMPI_Aint_diff_f08 = ompi_aint_diff_f(addr1, addr2) +end function PMPI_Aint_diff_f08 diff --git a/ompi/mpi/fortran/use-mpi-f08/profile/pbuffer_detach_f08.F90 b/ompi/mpi/fortran/use-mpi-f08/profile/pbuffer_detach_f08.F90 index d39799b29a9..28125b90341 100644 --- a/ompi/mpi/fortran/use-mpi-f08/profile/pbuffer_detach_f08.F90 +++ b/ompi/mpi/fortran/use-mpi-f08/profile/pbuffer_detach_f08.F90 @@ -1,6 +1,6 @@ ! -*- f90 -*- ! -! Copyright (c) 2009-2012 Cisco Systems, Inc. All rights reserved. +! Copyright (c) 2009-2015 Cisco Systems, Inc. All rights reserved. ! Copyright (c) 2009-2012 Los Alamos National Security, LLC. ! All rights reserved. ! $COPYRIGHT$ @@ -8,9 +8,10 @@ #include "ompi/mpi/fortran/configure-fortran-output.h" subroutine PMPI_Buffer_detach_f08(buffer_addr,size,ierror) + USE, INTRINSIC :: ISO_C_BINDING, ONLY : C_PTR use :: mpi_f08, only : ompi_buffer_detach_f implicit none - OMPI_FORTRAN_IGNORE_TKR_TYPE, INTENT(IN) :: buffer_addr + TYPE(C_PTR), INTENT(OUT) :: buffer_addr INTEGER, INTENT(OUT) :: size INTEGER, OPTIONAL, INTENT(OUT) :: ierror integer :: c_ierror diff --git a/ompi/mpi/fortran/use-mpi-f08/profile/pcomm_spawn_multiple_f08.F90 b/ompi/mpi/fortran/use-mpi-f08/profile/pcomm_spawn_multiple_f08.F90 index e210e015847..6f9e2acd7fc 100644 --- a/ompi/mpi/fortran/use-mpi-f08/profile/pcomm_spawn_multiple_f08.F90 +++ b/ompi/mpi/fortran/use-mpi-f08/profile/pcomm_spawn_multiple_f08.F90 @@ -1,14 +1,11 @@ ! -*- f90 -*- ! ! Copyright (c) 2010-2011 Cisco Systems, Inc. All rights reserved. -! -! $COPYRIGHT$ -! -! -*- f90 -*- -! ! Copyright (c) 2010-2012 Cisco Systems, Inc. All rights reserved. ! Copyright (c) 2009-2012 Los Alamos National Security, LLC. -! All Rights reserved. +! All Rights reserved. +! Copyright (c) 2015 Research Organization for Information Science +! and Technology (RIST). All rights reserved. ! $COPYRIGHT$ subroutine PMPI_Comm_spawn_multiple_f08(count,array_of_commands,array_of_argv, & @@ -32,8 +29,9 @@ subroutine PMPI_Comm_spawn_multiple_f08(count,array_of_commands,array_of_argv, & ! call ompi_comm_spawn_multiple_f(count,array_of_commands,array_of_argv, & - array_of_maxprocs,array_of_info(:)%MPI_VAL,root, & - comm%MPI_VAL,intercomm%MPI_VAL,array_of_errcodes,c_ierror) + array_of_maxprocs,array_of_info(:)%MPI_VAL,root, & + comm%MPI_VAL,intercomm%MPI_VAL,array_of_errcodes,c_ierror, & + len(array_of_commands), len(array_of_argv)) if (present(ierror)) ierror = c_ierror end subroutine PMPI_Comm_spawn_multiple_f08 diff --git a/ompi/mpi/fortran/use-mpi-f08/profile/pfree_mem_f08.F90 b/ompi/mpi/fortran/use-mpi-f08/profile/pfree_mem_f08.F90 index 39e0c595bbc..0a542b9321b 100644 --- a/ompi/mpi/fortran/use-mpi-f08/profile/pfree_mem_f08.F90 +++ b/ompi/mpi/fortran/use-mpi-f08/profile/pfree_mem_f08.F90 @@ -2,20 +2,17 @@ ! ! Copyright (c) 2010-2012 Cisco Systems, Inc. All rights reserved. ! Copyright (c) 2009-2012 Los Alamos National Security, LLC. -! All Rights reserved. +! All Rights reserved. +! Copyright (c) 2015 Research Organization for Information Science +! and Technology (RIST). All rights reserved. ! $COPYRIGHT$ -! -! This file provides the interface specifications for the MPI Fortran -! API bindings. It effectively maps between public names ("MPI_Init") -! and the back-end implementation subroutine name (e.g., "ompi_init_f"). #include "ompi/mpi/fortran/configure-fortran-output.h" subroutine PMPI_Free_mem_f08(base,ierror) - use :: mpi_f08_types, only : MPI_ADDRESS_KIND use :: mpi_f08, only : ompi_free_mem_f implicit none - INTEGER(MPI_ADDRESS_KIND), DIMENSION(*) OMPI_ASYNCHRONOUS :: base + OMPI_FORTRAN_IGNORE_TKR_TYPE,INTENT(IN) :: base INTEGER, OPTIONAL, INTENT(OUT) :: ierror integer :: c_ierror diff --git a/ompi/mpi/fortran/use-mpi-f08/profile/pget_library_version_f08.F90 b/ompi/mpi/fortran/use-mpi-f08/profile/pget_library_version_f08.F90 index 106eacbb3e1..9ed2b1650a1 100644 --- a/ompi/mpi/fortran/use-mpi-f08/profile/pget_library_version_f08.F90 +++ b/ompi/mpi/fortran/use-mpi-f08/profile/pget_library_version_f08.F90 @@ -16,5 +16,5 @@ subroutine PMPI_Get_library_version_f08(version,resultlen,ierror) call ompi_get_library_version_f(version,resultlen,c_ierror,len(version)) if (present(ierror)) ierror = c_ierror - + end subroutine PMPI_Get_library_version_f08 diff --git a/ompi/mpi/fortran/use-mpi-f08/profile/pget_processor_name_f08.F90 b/ompi/mpi/fortran/use-mpi-f08/profile/pget_processor_name_f08.F90 index c6c6e4a9a9c..13c951fb3ab 100644 --- a/ompi/mpi/fortran/use-mpi-f08/profile/pget_processor_name_f08.F90 +++ b/ompi/mpi/fortran/use-mpi-f08/profile/pget_processor_name_f08.F90 @@ -15,6 +15,6 @@ subroutine PMPI_Get_processor_name_f08(name,resultlen,ierror) call ompi_get_processor_name_f(name,resultlen,c_ierror,len(name)) if (present(ierror)) ierror = c_ierror - + end subroutine PMPI_Get_processor_name_f08 diff --git a/ompi/mpi/fortran/use-mpi-f08/profile/pinfo_delete_f08.F90 b/ompi/mpi/fortran/use-mpi-f08/profile/pinfo_delete_f08.F90 index a5c03083e79..e6d783498d8 100644 --- a/ompi/mpi/fortran/use-mpi-f08/profile/pinfo_delete_f08.F90 +++ b/ompi/mpi/fortran/use-mpi-f08/profile/pinfo_delete_f08.F90 @@ -16,5 +16,5 @@ subroutine PMPI_Info_delete_f08(info,key,ierror) call ompi_info_delete_f(info%MPI_VAL,key,c_ierror,len(key)) if (present(ierror)) ierror = c_ierror - + end subroutine PMPI_Info_delete_f08 diff --git a/ompi/mpi/fortran/use-mpi-f08/profile/ptype_size_x_f08.F90 b/ompi/mpi/fortran/use-mpi-f08/profile/ptype_size_x_f08.F90 index 24599dc5c07..e5344f50c2d 100644 --- a/ompi/mpi/fortran/use-mpi-f08/profile/ptype_size_x_f08.F90 +++ b/ompi/mpi/fortran/use-mpi-f08/profile/ptype_size_x_f08.F90 @@ -3,10 +3,12 @@ ! Copyright (c) 2009-2012 Cisco Systems, Inc. All rights reserved. ! Copyright (c) 2009-2013 Los Alamos National Security, LLC. ! All rights reserved. +! Copyright (c) 2015 Research Organization for Information Science +! and Technology (RIST). All rights reserved. ! $COPYRIGHT$ subroutine PMPI_Type_size_x_f08(datatype,size,ierror) - use :: mpi_f08_types, only : MPI_Datatype, MPI_ADDRESS_KIND, MPI_COUNT_KIND + use :: mpi_f08_types, only : MPI_Datatype, MPI_COUNT_KIND use :: mpi_f08, only : ompi_type_size_x_f implicit none TYPE(MPI_Datatype), INTENT(IN) :: datatype diff --git a/ompi/mpi/fortran/use-mpi-f08/profile/pwin_attach_f08.F90 b/ompi/mpi/fortran/use-mpi-f08/profile/pwin_attach_f08.F90 index f7115856a9f..4114a822d03 100644 --- a/ompi/mpi/fortran/use-mpi-f08/profile/pwin_attach_f08.F90 +++ b/ompi/mpi/fortran/use-mpi-f08/profile/pwin_attach_f08.F90 @@ -12,7 +12,7 @@ subroutine PMPI_Win_attach_f08(win,base,size,ierror) implicit none OMPI_FORTRAN_IGNORE_TKR_TYPE, INTENT(IN) :: base INTEGER(MPI_ADDRESS_KIND), INTENT(IN) :: size - TYPE(MPI_Win), INTENT(OUT) :: win + TYPE(MPI_Win), INTENT(IN) :: win INTEGER, OPTIONAL, INTENT(OUT) :: ierror integer :: c_ierror diff --git a/ompi/mpi/fortran/use-mpi-f08/profile/pwin_detach_f08.F90 b/ompi/mpi/fortran/use-mpi-f08/profile/pwin_detach_f08.F90 index 426d69890e6..67beec75966 100644 --- a/ompi/mpi/fortran/use-mpi-f08/profile/pwin_detach_f08.F90 +++ b/ompi/mpi/fortran/use-mpi-f08/profile/pwin_detach_f08.F90 @@ -11,7 +11,7 @@ subroutine PMPI_Win_detach_f08(win,base,ierror) use :: mpi_f08, only : ompi_win_detach_f implicit none OMPI_FORTRAN_IGNORE_TKR_TYPE, INTENT(IN) :: base - TYPE(MPI_Win), INTENT(OUT) :: win + TYPE(MPI_Win), INTENT(IN) :: win INTEGER, OPTIONAL, INTENT(OUT) :: ierror integer :: c_ierror diff --git a/ompi/mpi/fortran/use-mpi-f08/tests/contig.f90 b/ompi/mpi/fortran/use-mpi-f08/tests/contig.f90 index 37ba13513ab..4bea9269935 100644 --- a/ompi/mpi/fortran/use-mpi-f08/tests/contig.f90 +++ b/ompi/mpi/fortran/use-mpi-f08/tests/contig.f90 @@ -17,6 +17,6 @@ subroutine print_array(A, count) BIND(C, name="print_array") print *, A(::2) call print_array(A, 10) - call print_array(A(::2), 5) + call print_array(A(::2), 5) end program diff --git a/ompi/mpi/fortran/use-mpi-f08/type_size_x_f08.F90 b/ompi/mpi/fortran/use-mpi-f08/type_size_x_f08.F90 index 809df0c1dd7..d70553b918d 100644 --- a/ompi/mpi/fortran/use-mpi-f08/type_size_x_f08.F90 +++ b/ompi/mpi/fortran/use-mpi-f08/type_size_x_f08.F90 @@ -3,10 +3,12 @@ ! Copyright (c) 2009-2012 Cisco Systems, Inc. All rights reserved. ! Copyright (c) 2009-2013 Los Alamos National Security, LLC. ! All rights reserved. +! Copyright (c) 2015 Research Organization for Information Science +! and Technology (RIST). All rights reserved. ! $COPYRIGHT$ subroutine MPI_Type_size_x_f08(datatype,size,ierror) - use :: mpi_f08_types, only : MPI_Datatype, MPI_ADDRESS_KIND + use :: mpi_f08_types, only : MPI_Datatype, MPI_COUNT_KIND use :: mpi_f08, only : ompi_type_size_x_f implicit none TYPE(MPI_Datatype), INTENT(IN) :: datatype diff --git a/ompi/mpi/fortran/use-mpi-f08/win_attach_f08.F90 b/ompi/mpi/fortran/use-mpi-f08/win_attach_f08.F90 index 83c70881597..2c8160f3aee 100644 --- a/ompi/mpi/fortran/use-mpi-f08/win_attach_f08.F90 +++ b/ompi/mpi/fortran/use-mpi-f08/win_attach_f08.F90 @@ -12,7 +12,7 @@ subroutine MPI_Win_attach_f08(win,base,size,ierror) implicit none OMPI_FORTRAN_IGNORE_TKR_TYPE, INTENT(IN) :: base INTEGER(MPI_ADDRESS_KIND), INTENT(IN) :: size - TYPE(MPI_Win), INTENT(OUT) :: win + TYPE(MPI_Win), INTENT(IN) :: win INTEGER, OPTIONAL, INTENT(OUT) :: ierror integer :: c_ierror diff --git a/ompi/mpi/fortran/use-mpi-f08/win_detach_f08.F90 b/ompi/mpi/fortran/use-mpi-f08/win_detach_f08.F90 index cd7aab7169d..88135ea874f 100644 --- a/ompi/mpi/fortran/use-mpi-f08/win_detach_f08.F90 +++ b/ompi/mpi/fortran/use-mpi-f08/win_detach_f08.F90 @@ -11,7 +11,7 @@ subroutine MPI_Win_detach_f08(win,base,ierror) use :: mpi_f08, only : ompi_win_detach_f implicit none OMPI_FORTRAN_IGNORE_TKR_TYPE, INTENT(IN) :: base - TYPE(MPI_Win), INTENT(OUT) :: win + TYPE(MPI_Win), INTENT(IN) :: win INTEGER, OPTIONAL, INTENT(OUT) :: ierror integer :: c_ierror diff --git a/ompi/mpi/fortran/use-mpi-ignore-tkr/Makefile.am b/ompi/mpi/fortran/use-mpi-ignore-tkr/Makefile.am index 7f656522734..cc8ee45bf13 100644 --- a/ompi/mpi/fortran/use-mpi-ignore-tkr/Makefile.am +++ b/ompi/mpi/fortran/use-mpi-ignore-tkr/Makefile.am @@ -1,13 +1,14 @@ # -*- makefile -*- # -# Copyright (c) 2006-2014 Cisco Systems, Inc. All rights reserved. +# Copyright (c) 2006-2015 Cisco Systems, Inc. All rights reserved. # Copyright (c) 2015 Research Organization for Information Science # and Technology (RIST). All rights reserved. +# Copyright (c) 2016 IBM Corporation. All rights reserved. # # $COPYRIGHT$ -# +# # Additional copyrights may follow -# +# # $HEADER$ # @@ -22,7 +23,7 @@ AM_CPPFLAGS = -DOMPI_PROFILE_LAYER=0 -DOMPI_COMPILING_FORTRAN_WRAPPERS=1 AM_FCFLAGS = -I$(top_builddir)/ompi/include -I$(top_srcdir)/ompi/include \ -I$(top_builddir) -I$(top_srcdir) $(FCFLAGS_f90) -lib_LTLIBRARIES = libmpi_usempi_ignore_tkr.la +lib_LTLIBRARIES = lib@OMPI_LIBMPI_NAME@_usempi_ignore_tkr.la mpi-ignore-tkr-interfaces.h: mpi-ignore-tkr-interfaces.h.in mpi-ignore-tkr-file-interfaces.h: mpi-ignore-tkr-file-interfaces.h.in @@ -35,20 +36,27 @@ mpi-ignore-tkr.lo: mpi-ignore-tkr-sizeof.h mpi-ignore-tkr.lo: mpi-ignore-tkr-sizeof.f90 mpi-ignore-tkr.lo: mpi-ignore-tkr.F90 -libmpi_usempi_ignore_tkr_la_SOURCES = \ +lib@OMPI_LIBMPI_NAME@_usempi_ignore_tkr_la_SOURCES = \ mpi-ignore-tkr.F90 -nodist_libmpi_usempi_ignore_tkr_la_SOURCES = \ +nodist_lib@OMPI_LIBMPI_NAME@_usempi_ignore_tkr_la_SOURCES = \ mpi-ignore-tkr-interfaces.h \ mpi-ignore-tkr-file-interfaces.h if BUILD_FORTRAN_SIZEOF # These files are generated; do not distribute them -nodist_libmpi_usempi_ignore_tkr_la_SOURCES += \ +nodist_lib@OMPI_LIBMPI_NAME@_usempi_ignore_tkr_la_SOURCES += \ mpi-ignore-tkr-sizeof.h \ mpi-ignore-tkr-sizeof.f90 endif -libmpi_usempi_ignore_tkr_la_LDFLAGS = \ +# Note that we invoke some OPAL functions directly in +# libmpi_usempi_ignore_tkr.la, so we need to link in the OPAL library +# directly (pulling it in indirectly via libmpi.la does not work on +# all platforms). +lib@OMPI_LIBMPI_NAME@_usempi_ignore_tkr_la_LIBADD = \ + $(OMPI_MPIEXT_USEMPI_LIBS) \ + $(OMPI_TOP_BUILDDIR)/opal/lib@OPAL_LIB_PREFIX@open-pal.la +lib@OMPI_LIBMPI_NAME@_usempi_ignore_tkr_la_LDFLAGS = \ -version-info $(libmpi_usempi_ignore_tkr_so_version) \ $(OMPI_FORTRAN_EXTRA_SHARED_LIBRARY_FLAGS) diff --git a/ompi/mpi/fortran/use-mpi-ignore-tkr/mpi-ignore-tkr-file-interfaces.h.in b/ompi/mpi/fortran/use-mpi-ignore-tkr/mpi-ignore-tkr-file-interfaces.h.in index 09b15caa208..b48f9dc2070 100644 --- a/ompi/mpi/fortran/use-mpi-ignore-tkr/mpi-ignore-tkr-file-interfaces.h.in +++ b/ompi/mpi/fortran/use-mpi-ignore-tkr/mpi-ignore-tkr-file-interfaces.h.in @@ -377,6 +377,35 @@ end subroutine PMPI_File_iread end interface +interface MPI_File_iread_all + +subroutine MPI_File_iread_all(fh, buf, count, datatype, request& + , ierror) + integer, intent(in) :: fh + @OMPI_FORTRAN_IGNORE_TKR_PREDECL@ buf + @OMPI_FORTRAN_IGNORE_TKR_TYPE@ :: buf + integer, intent(in) :: count + integer, intent(in) :: datatype + integer, intent(out) :: request + integer, intent(out) :: ierror +end subroutine MPI_File_iread_all + +end interface + +interface PMPI_File_iread_all + +subroutine PMPI_File_iread_all(fh, buf, count, datatype, request& + , ierror) + integer, intent(in) :: fh + @OMPI_FORTRAN_IGNORE_TKR_PREDECL@ buf + @OMPI_FORTRAN_IGNORE_TKR_TYPE@ :: buf + integer, intent(in) :: count + integer, intent(in) :: datatype + integer, intent(out) :: request + integer, intent(out) :: ierror +end subroutine PMPI_File_iread_all + +end interface interface MPI_File_iread_at @@ -412,6 +441,39 @@ end subroutine PMPI_File_iread_at end interface +interface MPI_File_iread_at_all + +subroutine MPI_File_iread_at_all(fh, offset, buf, count, datatype, & + request, ierror) + include 'mpif-config.h' + integer, intent(in) :: fh + integer(kind=MPI_OFFSET_KIND), intent(in) :: offset + @OMPI_FORTRAN_IGNORE_TKR_PREDECL@ buf + @OMPI_FORTRAN_IGNORE_TKR_TYPE@ :: buf + integer, intent(in) :: count + integer, intent(in) :: datatype + integer, intent(out) :: request + integer, intent(out) :: ierror +end subroutine MPI_File_iread_at_all + +end interface + +interface PMPI_File_iread_at_all + +subroutine PMPI_File_iread_at_all(fh, offset, buf, count, datatype, & + request, ierror) + include 'mpif-config.h' + integer, intent(in) :: fh + integer(kind=MPI_OFFSET_KIND), intent(in) :: offset + @OMPI_FORTRAN_IGNORE_TKR_PREDECL@ buf + @OMPI_FORTRAN_IGNORE_TKR_TYPE@ :: buf + integer, intent(in) :: count + integer, intent(in) :: datatype + integer, intent(out) :: request + integer, intent(out) :: ierror +end subroutine PMPI_File_iread_at_all + +end interface interface MPI_File_iread_shared @@ -474,6 +536,35 @@ end subroutine PMPI_File_iwrite end interface +interface MPI_File_iwrite_all + +subroutine MPI_File_iwrite_all(fh, buf, count, datatype, request& + , ierror) + integer, intent(in) :: fh + @OMPI_FORTRAN_IGNORE_TKR_PREDECL@ buf + @OMPI_FORTRAN_IGNORE_TKR_TYPE@, intent(in) :: buf + integer, intent(in) :: count + integer, intent(in) :: datatype + integer, intent(out) :: request + integer, intent(out) :: ierror +end subroutine MPI_File_iwrite_all + +end interface + +interface PMPI_File_iwrite_all + +subroutine PMPI_File_iwrite_all(fh, buf, count, datatype, request& + , ierror) + integer, intent(in) :: fh + @OMPI_FORTRAN_IGNORE_TKR_PREDECL@ buf + @OMPI_FORTRAN_IGNORE_TKR_TYPE@, intent(in) :: buf + integer, intent(in) :: count + integer, intent(in) :: datatype + integer, intent(out) :: request + integer, intent(out) :: ierror +end subroutine PMPI_File_iwrite_all + +end interface interface MPI_File_iwrite_at @@ -509,6 +600,39 @@ end subroutine PMPI_File_iwrite_at end interface +interface MPI_File_iwrite_at_all + +subroutine MPI_File_iwrite_at_all(fh, offset, buf, count, datatype, & + request, ierror) + include 'mpif-config.h' + integer, intent(in) :: fh + integer(kind=MPI_OFFSET_KIND), intent(in) :: offset + @OMPI_FORTRAN_IGNORE_TKR_PREDECL@ buf + @OMPI_FORTRAN_IGNORE_TKR_TYPE@, intent(in) :: buf + integer, intent(in) :: count + integer, intent(in) :: datatype + integer, intent(out) :: request + integer, intent(out) :: ierror +end subroutine MPI_File_iwrite_at_all + +end interface + +interface PMPI_File_iwrite_at_all + +subroutine PMPI_File_iwrite_at_all(fh, offset, buf, count, datatype, & + request, ierror) + include 'mpif-config.h' + integer, intent(in) :: fh + integer(kind=MPI_OFFSET_KIND), intent(in) :: offset + @OMPI_FORTRAN_IGNORE_TKR_PREDECL@ buf + @OMPI_FORTRAN_IGNORE_TKR_TYPE@, intent(in) :: buf + integer, intent(in) :: count + integer, intent(in) :: datatype + integer, intent(out) :: request + integer, intent(out) :: ierror +end subroutine PMPI_File_iwrite_at_all + +end interface interface MPI_File_iwrite_shared @@ -878,27 +1002,27 @@ end interface interface MPI_File_read_ordered_begin -subroutine MPI_File_read_ord_begin(fh, buf, count, datatype, ierror) +subroutine MPI_File_read_ordered_begin(fh, buf, count, datatype, ierror) integer, intent(in) :: fh @OMPI_FORTRAN_IGNORE_TKR_PREDECL@ buf @OMPI_FORTRAN_IGNORE_TKR_TYPE@ :: buf integer, intent(in) :: count integer, intent(in) :: datatype integer, intent(out) :: ierror -end subroutine MPI_File_read_ord_begin +end subroutine MPI_File_read_ordered_begin end interface interface PMPI_File_read_ordered_begin -subroutine PMPI_File_read_ord_begin(fh, buf, count, datatype, ierror) +subroutine PMPI_File_read_ordered_begin(fh, buf, count, datatype, ierror) integer, intent(in) :: fh @OMPI_FORTRAN_IGNORE_TKR_PREDECL@ buf @OMPI_FORTRAN_IGNORE_TKR_TYPE@ :: buf integer, intent(in) :: count integer, intent(in) :: datatype integer, intent(out) :: ierror -end subroutine PMPI_File_read_ord_begin +end subroutine PMPI_File_read_ordered_begin end interface @@ -1343,7 +1467,7 @@ end interface interface MPI_File_write_at_all_begin -subroutine MPI_File_wr_at_all_begin(fh, offset, buf, count, datatype& +subroutine MPI_File_write_at_all_begin(fh, offset, buf, count, datatype& , ierror) include 'mpif-config.h' integer, intent(in) :: fh @@ -1353,13 +1477,13 @@ subroutine MPI_File_wr_at_all_begin(fh, offset, buf, count, datatype& integer, intent(in) :: count integer, intent(in) :: datatype integer, intent(out) :: ierror -end subroutine MPI_File_wr_at_all_begin +end subroutine MPI_File_write_at_all_begin end interface interface PMPI_File_write_at_all_begin -subroutine PMPI_File_wr_at_all_begin(fh, offset, buf, count, datatype& +subroutine PMPI_File_write_at_all_begin(fh, offset, buf, count, datatype& , ierror) include 'mpif-config.h' integer, intent(in) :: fh @@ -1369,7 +1493,7 @@ subroutine PMPI_File_wr_at_all_begin(fh, offset, buf, count, datatype& integer, intent(in) :: count integer, intent(in) :: datatype integer, intent(out) :: ierror -end subroutine PMPI_File_wr_at_all_begin +end subroutine PMPI_File_write_at_all_begin end interface diff --git a/ompi/mpi/fortran/use-mpi-ignore-tkr/mpi-ignore-tkr-interfaces.h.in b/ompi/mpi/fortran/use-mpi-ignore-tkr/mpi-ignore-tkr-interfaces.h.in index 16433d793af..756ba19d062 100644 --- a/ompi/mpi/fortran/use-mpi-ignore-tkr/mpi-ignore-tkr-interfaces.h.in +++ b/ompi/mpi/fortran/use-mpi-ignore-tkr/mpi-ignore-tkr-interfaces.h.in @@ -7,7 +7,7 @@ ! of Tennessee Research Foundation. All rights ! reserved. ! Copyright (c) 2012 Inria. All rights reserved. -! Copyright (c) 2013-2014 Los Alamos National Security, LLC. All rights +! Copyright (c) 2013-2015 Los Alamos National Security, LLC. All rights ! reserved. ! Copyright (c) 2015 Research Organization for Information Science ! and Technology (RIST). All rights reserved. @@ -164,6 +164,52 @@ end subroutine PMPI_Address end interface +interface MPI_Aint_add + +function MPI_Aint_add(base, diff) + include 'mpif-config.h' + integer(kind=MPI_ADDRESS_KIND) :: base + integer(kind=MPI_ADDRESS_KIND) :: diff + integer(kind=MPI_ADDRESS_KIND) MPI_Aint_add +end function MPI_Aint_add + +end interface + +interface PMPI_Aint_add + +function PMPI_Aint_add(base, diff) + include 'mpif-config.h' + integer(kind=MPI_ADDRESS_KIND) :: base + integer(kind=MPI_ADDRESS_KIND) :: diff + integer(kind=MPI_ADDRESS_KIND) PMPI_Aint_add +end function PMPI_Aint_add + +end interface + + +interface MPI_Aint_diff + +function MPI_Aint_diff(addr1, addr2) + include 'mpif-config.h' + integer(kind=MPI_ADDRESS_KIND) :: addr1 + integer(kind=MPI_ADDRESS_KIND) :: addr2 + integer(kind=MPI_ADDRESS_KIND) MPI_Aint_diff +end function MPI_Aint_diff + +end interface + +interface PMPI_Aint_diff + +function PMPI_Aint_diff(addr1, addr2) + include 'mpif-config.h' + integer(kind=MPI_ADDRESS_KIND) :: addr1 + integer(kind=MPI_ADDRESS_KIND) :: addr2 + integer(kind=MPI_ADDRESS_KIND) PMPI_Aint_diff +end function PMPI_Aint_diff + +end interface + + interface MPI_Allgather subroutine MPI_Allgather(sendbuf, sendcount, sendtype, recvbuf, recvcount, & @@ -1031,18 +1077,6 @@ end subroutine MPI_Comm_create end interface -interface MPI_Comm_create_group - -subroutine MPI_Comm_create_group(comm, group, tag, newcomm, ierror) - integer, intent(in) :: comm - integer, intent(in) :: group - integer, intent(in) :: tag - integer, intent(out) :: newcomm - integer, intent(out) :: ierror -end subroutine MPI_Comm_create_group - -end interface - interface PMPI_Comm_create subroutine PMPI_Comm_create(comm, group, newcomm, ierror) @@ -1076,6 +1110,31 @@ end subroutine PMPI_Comm_create_errhandler end interface +interface MPI_Comm_create_group + +subroutine MPI_Comm_create_group(comm, group, tag, newcomm, ierror) + integer, intent(in) :: comm + integer, intent(in) :: group + integer, intent(in) :: tag + integer, intent(out) :: newcomm + integer, intent(out) :: ierror +end subroutine MPI_Comm_create_group + +end interface + +interface PMPI_Comm_create_group + +subroutine PMPI_Comm_create_group(comm, group, tag, newcomm, ierror) + integer, intent(in) :: comm + integer, intent(in) :: group + integer, intent(in) :: tag + integer, intent(out) :: newcomm + integer, intent(out) :: ierror +end subroutine PMPI_Comm_create_group + +end interface + + interface MPI_Comm_create_keyval subroutine MPI_Comm_create_keyval(comm_copy_attr_fn, comm_delete_attr_fn, comm_keyval, extra_state, ierror) @@ -1153,35 +1212,36 @@ end subroutine MPI_Comm_dup end interface -interface MPI_Comm_dup +interface PMPI_Comm_dup -subroutine MPI_Comm_dup_with_info(comm, info, newcomm, ierror) +subroutine PMPI_Comm_dup(comm, newcomm, ierror) integer, intent(in) :: comm - integer, intent(in) :: info integer, intent(out) :: newcomm integer, intent(out) :: ierror -end subroutine MPI_Comm_dup_with_info +end subroutine PMPI_Comm_dup end interface -interface MPI_Comm_idup -subroutine MPI_Comm_idup(comm, newcomm, request, ierror) +interface MPI_Comm_dup_with_info + +subroutine MPI_Comm_dup_with_info(comm, info, newcomm, ierror) integer, intent(in) :: comm + integer, intent(in) :: info integer, intent(out) :: newcomm - integer, intent(out) :: request integer, intent(out) :: ierror -end subroutine MPI_Comm_idup +end subroutine MPI_Comm_dup_with_info end interface -interface PMPI_Comm_dup +interface PMPI_Comm_dup_with_info -subroutine PMPI_Comm_dup(comm, newcomm, ierror) +subroutine PMPI_Comm_dup_with_info(comm, info, newcomm, ierror) integer, intent(in) :: comm + integer, intent(in) :: info integer, intent(out) :: newcomm integer, intent(out) :: ierror -end subroutine PMPI_Comm_dup +end subroutine PMPI_Comm_dup_with_info end interface @@ -1272,6 +1332,27 @@ end subroutine PMPI_Comm_get_errhandler end interface +interface MPI_Comm_get_info + +subroutine MPI_Comm_get_info(comm, info_used, ierror) + integer, intent(in) :: comm + integer, intent(out) :: info_used + integer, intent(out) :: ierror +end subroutine MPI_Comm_get_info + +end interface + +interface PMPI_Comm_get_info + +subroutine PMPI_Comm_get_info(comm, info_used, ierror) + integer, intent(in) :: comm + integer, intent(out) :: info_used + integer, intent(out) :: ierror +end subroutine PMPI_Comm_get_info + +end interface + + interface MPI_Comm_get_name subroutine MPI_Comm_get_name(comm, comm_name, resultlen, ierror) @@ -1335,6 +1416,29 @@ end subroutine PMPI_Comm_group end interface +interface MPI_Comm_idup + +subroutine MPI_Comm_idup(comm, newcomm, request, ierror) + integer, intent(in) :: comm + integer, intent(out) :: newcomm + integer, intent(out) :: request + integer, intent(out) :: ierror +end subroutine MPI_Comm_idup + +end interface + +interface PMPI_Comm_idup + +subroutine PMPI_Comm_idup(comm, newcomm, request, ierror) + integer, intent(in) :: comm + integer, intent(out) :: newcomm + integer, intent(out) :: request + integer, intent(out) :: ierror +end subroutine PMPI_Comm_idup + +end interface + + interface MPI_Comm_join subroutine MPI_Comm_join(fd, intercomm, ierror) @@ -1627,7 +1731,7 @@ end subroutine PMPI_Comm_split end interface -interface MPI_Comm_split +interface MPI_Comm_split_type subroutine MPI_Comm_split_type(comm, split_type, key, info, newcomm, ierror) integer, intent(in) :: comm @@ -1640,7 +1744,7 @@ end subroutine MPI_Comm_split_type end interface -interface PMPI_Comm_split +interface PMPI_Comm_split_type subroutine PMPI_Comm_split_type(comm, split_type, key, info, newcomm, ierror) integer, intent(in) :: comm @@ -1711,6 +1815,7 @@ end subroutine PMPI_Compare_and_swap end interface + interface MPI_Dims_create subroutine MPI_Dims_create(nnodes, ndims, dims, ierror) @@ -2014,6 +2119,26 @@ end subroutine PMPI_Exscan end interface + +interface MPI_F_sync_reg + +subroutine MPI_F_sync_reg(buf) + @OMPI_FORTRAN_IGNORE_TKR_PREDECL@ buf + @OMPI_FORTRAN_IGNORE_TKR_TYPE@, intent(inout) :: buf +end subroutine MPI_F_sync_reg + +end interface + +interface PMPI_F_sync_reg + +subroutine PMPI_F_sync_reg(buf) + @OMPI_FORTRAN_IGNORE_TKR_PREDECL@ buf + @OMPI_FORTRAN_IGNORE_TKR_TYPE@, intent(inout) :: buf +end subroutine PMPI_F_sync_reg + +end interface + + interface MPI_Fetch_and_op subroutine MPI_Fetch_and_op(origin_addr, result_addr, datatype, target_rank,& @@ -2052,6 +2177,7 @@ end subroutine PMPI_Fetch_and_op end interface + interface MPI_Finalize subroutine MPI_Finalize(ierror) @@ -2227,6 +2353,7 @@ end subroutine PMPI_Get end interface + interface MPI_Get_accumulate subroutine MPI_Get_accumulate(origin_addr, origin_count, origin_datatype,& @@ -2279,6 +2406,7 @@ end subroutine PMPI_Get_accumulate end interface + interface MPI_Get_address subroutine MPI_Get_address(location, address, ierror) @@ -2354,6 +2482,29 @@ end subroutine PMPI_Get_elements end interface +interface MPI_Get_elements_x + +subroutine MPI_Get_elements_x(status, datatype, count, ierror) + include 'mpif-config.h' + integer, dimension(MPI_STATUS_SIZE), intent(in) :: status + integer, intent(in) :: datatype + integer(kind=MPI_COUNT_KIND), intent(out) :: count + integer, intent(out) :: ierror +end subroutine MPI_Get_elements_x + +end interface + +interface PMPI_Get_elements_x + +subroutine PMPI_Get_elements_x(status, datatype, count, ierror) + include 'mpif-config.h' + integer, dimension(MPI_STATUS_SIZE), intent(in) :: status + integer, intent(in) :: datatype + integer(kind=MPI_COUNT_KIND), intent(out) :: count + integer, intent(out) :: ierror +end subroutine PMPI_Get_elements_x + +end interface interface MPI_Get_library_version @@ -3207,74 +3358,74 @@ end subroutine PMPI_Ibcast end interface -interface MPI_Iexscan +interface MPI_Ibsend -subroutine MPI_Iexscan(sendbuf, recvbuf, count, datatype, op, & +subroutine MPI_Ibsend(buf, count, datatype, dest, tag, & comm, request, ierror) - @OMPI_FORTRAN_IGNORE_TKR_PREDECL@ sendbuf - @OMPI_FORTRAN_IGNORE_TKR_TYPE@, intent(in) :: sendbuf - @OMPI_FORTRAN_IGNORE_TKR_PREDECL@ recvbuf - @OMPI_FORTRAN_IGNORE_TKR_TYPE@ :: recvbuf + @OMPI_FORTRAN_IGNORE_TKR_PREDECL@ buf + @OMPI_FORTRAN_IGNORE_TKR_TYPE@, intent(in) :: buf integer, intent(in) :: count integer, intent(in) :: datatype - integer, intent(in) :: op + integer, intent(in) :: dest + integer, intent(in) :: tag integer, intent(in) :: comm integer, intent(out) :: request integer, intent(out) :: ierror -end subroutine MPI_Iexscan +end subroutine MPI_Ibsend end interface -interface PMPI_Iexscan +interface PMPI_Ibsend -subroutine PMPI_Iexscan(sendbuf, recvbuf, count, datatype, op, & +subroutine PMPI_Ibsend(buf, count, datatype, dest, tag, & comm, request, ierror) - @OMPI_FORTRAN_IGNORE_TKR_PREDECL@ sendbuf - @OMPI_FORTRAN_IGNORE_TKR_TYPE@, intent(in) :: sendbuf - @OMPI_FORTRAN_IGNORE_TKR_PREDECL@ recvbuf - @OMPI_FORTRAN_IGNORE_TKR_TYPE@ :: recvbuf + @OMPI_FORTRAN_IGNORE_TKR_PREDECL@ buf + @OMPI_FORTRAN_IGNORE_TKR_TYPE@, intent(in) :: buf integer, intent(in) :: count integer, intent(in) :: datatype - integer, intent(in) :: op + integer, intent(in) :: dest + integer, intent(in) :: tag integer, intent(in) :: comm integer, intent(out) :: request integer, intent(out) :: ierror -end subroutine PMPI_Iexscan +end subroutine PMPI_Ibsend end interface -interface MPI_Ibsend +interface MPI_Iexscan -subroutine MPI_Ibsend(buf, count, datatype, dest, tag, & +subroutine MPI_Iexscan(sendbuf, recvbuf, count, datatype, op, & comm, request, ierror) - @OMPI_FORTRAN_IGNORE_TKR_PREDECL@ buf - @OMPI_FORTRAN_IGNORE_TKR_TYPE@, intent(in) :: buf + @OMPI_FORTRAN_IGNORE_TKR_PREDECL@ sendbuf + @OMPI_FORTRAN_IGNORE_TKR_TYPE@, intent(in) :: sendbuf + @OMPI_FORTRAN_IGNORE_TKR_PREDECL@ recvbuf + @OMPI_FORTRAN_IGNORE_TKR_TYPE@ :: recvbuf integer, intent(in) :: count integer, intent(in) :: datatype - integer, intent(in) :: dest - integer, intent(in) :: tag + integer, intent(in) :: op integer, intent(in) :: comm integer, intent(out) :: request integer, intent(out) :: ierror -end subroutine MPI_Ibsend +end subroutine MPI_Iexscan end interface -interface PMPI_Ibsend +interface PMPI_Iexscan -subroutine PMPI_Ibsend(buf, count, datatype, dest, tag, & +subroutine PMPI_Iexscan(sendbuf, recvbuf, count, datatype, op, & comm, request, ierror) - @OMPI_FORTRAN_IGNORE_TKR_PREDECL@ buf - @OMPI_FORTRAN_IGNORE_TKR_TYPE@, intent(in) :: buf + @OMPI_FORTRAN_IGNORE_TKR_PREDECL@ sendbuf + @OMPI_FORTRAN_IGNORE_TKR_TYPE@, intent(in) :: sendbuf + @OMPI_FORTRAN_IGNORE_TKR_PREDECL@ recvbuf + @OMPI_FORTRAN_IGNORE_TKR_TYPE@ :: recvbuf integer, intent(in) :: count integer, intent(in) :: datatype - integer, intent(in) :: dest - integer, intent(in) :: tag + integer, intent(in) :: op integer, intent(in) :: comm integer, intent(out) :: request integer, intent(out) :: ierror -end subroutine PMPI_Ibsend +end subroutine PMPI_Iexscan end interface @@ -3593,12 +3744,12 @@ subroutine MPI_Ineighbor_alltoallw(sendbuf, sendcounts, sdispls, sendtypes, recv @OMPI_FORTRAN_IGNORE_TKR_PREDECL@ sendbuf @OMPI_FORTRAN_IGNORE_TKR_TYPE@, intent(in) :: sendbuf integer, dimension(*), intent(in) :: sendcounts - integer(kind=MPI_ADDRESS_KIND), intent(in) :: sdispls + integer(kind=MPI_ADDRESS_KIND), dimension(*), intent(in) :: sdispls integer, dimension(*), intent(in) :: sendtypes @OMPI_FORTRAN_IGNORE_TKR_PREDECL@ recvbuf @OMPI_FORTRAN_IGNORE_TKR_TYPE@ :: recvbuf integer, dimension(*), intent(in) :: recvcounts - integer(kind=MPI_ADDRESS_KIND), intent(in) :: rdispls + integer(kind=MPI_ADDRESS_KIND), dimension(*), intent(in) :: rdispls integer, dimension(*), intent(in) :: recvtypes integer, intent(in) :: comm integer, intent(out) :: request @@ -3615,12 +3766,12 @@ subroutine PMPI_Ineighbor_alltoallw(sendbuf, sendcounts, sdispls, sendtypes, rec @OMPI_FORTRAN_IGNORE_TKR_PREDECL@ sendbuf @OMPI_FORTRAN_IGNORE_TKR_TYPE@, intent(in) :: sendbuf integer, dimension(*), intent(in) :: sendcounts - integer(kind=MPI_ADDRESS_KIND), intent(in) :: sdispls + integer(kind=MPI_ADDRESS_KIND), dimension(*), intent(in) :: sdispls integer, dimension(*), intent(in) :: sendtypes @OMPI_FORTRAN_IGNORE_TKR_PREDECL@ recvbuf @OMPI_FORTRAN_IGNORE_TKR_TYPE@ :: recvbuf integer, dimension(*), intent(in) :: recvcounts - integer(kind=MPI_ADDRESS_KIND), intent(in) :: rdispls + integer(kind=MPI_ADDRESS_KIND), dimension(*), intent(in) :: rdispls integer, dimension(*), intent(in) :: recvtypes integer, intent(in) :: comm integer, intent(out) :: request @@ -4655,12 +4806,12 @@ subroutine MPI_Neighbor_alltoallw(sendbuf, sendcounts, sdispls, sendtypes, recvb @OMPI_FORTRAN_IGNORE_TKR_PREDECL@ sendbuf @OMPI_FORTRAN_IGNORE_TKR_TYPE@, intent(in) :: sendbuf integer, dimension(*), intent(in) :: sendcounts - integer(kind=MPI_ADDRESS_KIND), intent(in) :: sdispls + integer(kind=MPI_ADDRESS_KIND), dimension(*), intent(in) :: sdispls integer, dimension(*), intent(in) :: sendtypes @OMPI_FORTRAN_IGNORE_TKR_PREDECL@ recvbuf @OMPI_FORTRAN_IGNORE_TKR_TYPE@ :: recvbuf integer, dimension(*), intent(in) :: recvcounts - integer(kind=MPI_ADDRESS_KIND), intent(in) :: rdispls + integer(kind=MPI_ADDRESS_KIND), dimension(*), intent(in) :: rdispls integer, dimension(*), intent(in) :: recvtypes integer, intent(in) :: comm integer, intent(out) :: ierror @@ -4676,12 +4827,12 @@ subroutine PMPI_Neighbor_alltoallw(sendbuf, sendcounts, sdispls, sendtypes, recv @OMPI_FORTRAN_IGNORE_TKR_PREDECL@ sendbuf @OMPI_FORTRAN_IGNORE_TKR_TYPE@, intent(in) :: sendbuf integer, dimension(*), intent(in) :: sendcounts - integer(kind=MPI_ADDRESS_KIND), intent(in) :: sdispls + integer(kind=MPI_ADDRESS_KIND), dimension(*), intent(in) :: sdispls integer, dimension(*), intent(in) :: sendtypes @OMPI_FORTRAN_IGNORE_TKR_PREDECL@ recvbuf @OMPI_FORTRAN_IGNORE_TKR_TYPE@ :: recvbuf integer, dimension(*), intent(in) :: recvcounts - integer(kind=MPI_ADDRESS_KIND), intent(in) :: rdispls + integer(kind=MPI_ADDRESS_KIND), dimension(*), intent(in) :: rdispls integer, dimension(*), intent(in) :: recvtypes integer, intent(in) :: comm integer, intent(out) :: ierror @@ -4689,6 +4840,7 @@ end subroutine PMPI_Neighbor_alltoallw end interface + interface MPI_Op_commutative subroutine MPI_Op_commutative(op, commute, ierror) @@ -5027,6 +5179,7 @@ end subroutine PMPI_Query_thread end interface + interface MPI_Raccumulate subroutine MPI_Raccumulate(origin_addr, origin_count, origin_datatype,& @@ -5071,175 +5224,38 @@ end subroutine PMPI_Raccumulate end interface -interface MPI_Get -subroutine MPI_Rget(origin_addr, origin_count, origin_datatype, & - target_rank, target_disp, target_count, & - target_datatype, win, request, ierror) +interface MPI_Recv + +subroutine MPI_Recv(buf, count, datatype, source, tag, & + comm, status, ierror) include 'mpif-config.h' - @OMPI_FORTRAN_IGNORE_TKR_PREDECL@ origin_addr - @OMPI_FORTRAN_IGNORE_TKR_TYPE@ :: origin_addr - integer, intent(in) :: origin_count - integer, intent(in) :: origin_datatype - integer, intent(in) :: target_rank - integer(kind=MPI_ADDRESS_KIND), intent(in) :: target_disp - integer, intent(in) :: target_count - integer, intent(in) :: target_datatype - integer, intent(in) :: win - integer, intent(out) :: request + @OMPI_FORTRAN_IGNORE_TKR_PREDECL@ buf + @OMPI_FORTRAN_IGNORE_TKR_TYPE@ :: buf + integer, intent(in) :: count + integer, intent(in) :: datatype + integer, intent(in) :: source + integer, intent(in) :: tag + integer, intent(in) :: comm + integer, dimension(MPI_STATUS_SIZE), intent(out) :: status integer, intent(out) :: ierror -end subroutine MPI_Rget +end subroutine MPI_Recv end interface -interface PMPI_Get +interface PMPI_Recv -subroutine PMPI_Rget(origin_addr, origin_count, origin_datatype, & - target_rank, target_disp, target_count, & - target_datatype, win, request, ierror) +subroutine PMPI_Recv(buf, count, datatype, source, tag, & + comm, status, ierror) include 'mpif-config.h' - @OMPI_FORTRAN_IGNORE_TKR_PREDECL@ origin_addr - @OMPI_FORTRAN_IGNORE_TKR_TYPE@ :: origin_addr - integer, intent(in) :: origin_count - integer, intent(in) :: origin_datatype - integer, intent(in) :: target_rank - integer(kind=MPI_ADDRESS_KIND), intent(in) :: target_disp - integer, intent(in) :: target_count - integer, intent(in) :: target_datatype - integer, intent(in) :: win - integer, intent(out) :: request - integer, intent(out) :: ierror -end subroutine PMPI_Rget - -end interface - -interface MPI_Rget_accumulate - -subroutine MPI_Rget_accumulate(origin_addr, origin_count, origin_datatype,& - result_addr, result_count, result_datatype,& - target_rank, target_disp, target_count, & - target_datatype, op, win, request, ierror) - include 'mpif-config.h' - @OMPI_FORTRAN_IGNORE_TKR_PREDECL@ origin_addr - @OMPI_FORTRAN_IGNORE_TKR_TYPE@, intent(in) :: origin_addr - integer, intent(in) :: origin_count - integer, intent(in) :: origin_datatype - @OMPI_FORTRAN_IGNORE_TKR_PREDECL@ result_addr - @OMPI_FORTRAN_IGNORE_TKR_TYPE@ :: result_addr - integer, intent(in) :: result_count - integer, intent(in) :: result_datatype - integer, intent(in) :: target_rank - integer(kind=MPI_ADDRESS_KIND), intent(in) :: target_disp - integer, intent(in) :: target_count - integer, intent(in) :: target_datatype - integer, intent(in) :: op - integer, intent(in) :: win - integer, intent(out) :: request - integer, intent(out) :: ierror -end subroutine MPI_Rget_accumulate - -end interface - -interface PMPI_Rget_accumulate - -subroutine PMPI_Rget_accumulate(origin_addr, origin_count, origin_datatype,& - result_addr, result_count, result_datatype,& - target_rank, target_disp, target_count, & - target_datatype, op, win, request, ierror) - include 'mpif-config.h' - @OMPI_FORTRAN_IGNORE_TKR_PREDECL@ origin_addr - @OMPI_FORTRAN_IGNORE_TKR_TYPE@, intent(in) :: origin_addr - integer, intent(in) :: origin_count - integer, intent(in) :: origin_datatype - @OMPI_FORTRAN_IGNORE_TKR_PREDECL@ result_addr - @OMPI_FORTRAN_IGNORE_TKR_TYPE@ :: result_addr - integer, intent(in) :: result_count - integer, intent(in) :: result_datatype - integer, intent(in) :: target_rank - integer(kind=MPI_ADDRESS_KIND), intent(in) :: target_disp - integer, intent(in) :: target_count - integer, intent(in) :: target_datatype - integer, intent(in) :: op - integer, intent(in) :: win - integer, intent(out) :: request - integer, intent(out) :: ierror -end subroutine PMPI_Rget_accumulate - -end interface - -interface MPI_Rput - -subroutine MPI_Rput(origin_addr, origin_count, origin_datatype, & - target_rank, target_disp, target_count, & - target_datatype, win, request, ierror) - include 'mpif-config.h' - @OMPI_FORTRAN_IGNORE_TKR_PREDECL@ origin_addr - @OMPI_FORTRAN_IGNORE_TKR_TYPE@, intent(in) :: origin_addr - integer, intent(in) :: origin_count - integer, intent(in) :: origin_datatype - integer, intent(in) :: target_rank - integer(kind=MPI_ADDRESS_KIND), intent(in) :: target_disp - integer, intent(in) :: target_count - integer, intent(in) :: target_datatype - integer, intent(in) :: win - integer, intent(out) :: request - integer, intent(out) :: ierror -end subroutine MPI_Rput - -end interface - -interface PMPI_Rput - -subroutine PMPI_Rput(origin_addr, origin_count, origin_datatype, & - target_rank, target_disp, target_count, & - target_datatype, win, request, ierror) - include 'mpif-config.h' - @OMPI_FORTRAN_IGNORE_TKR_PREDECL@ origin_addr - @OMPI_FORTRAN_IGNORE_TKR_TYPE@, intent(in) :: origin_addr - integer, intent(in) :: origin_count - integer, intent(in) :: origin_datatype - integer, intent(in) :: target_rank - integer(kind=MPI_ADDRESS_KIND), intent(in) :: target_disp - integer, intent(in) :: target_count - integer, intent(in) :: target_datatype - integer, intent(in) :: win - integer, intent(out) :: request - integer, intent(out) :: ierror -end subroutine PMPI_Rput - -end interface - -interface MPI_Recv - -subroutine MPI_Recv(buf, count, datatype, source, tag, & - comm, status, ierror) - include 'mpif-config.h' - @OMPI_FORTRAN_IGNORE_TKR_PREDECL@ buf - @OMPI_FORTRAN_IGNORE_TKR_TYPE@ :: buf - integer, intent(in) :: count - integer, intent(in) :: datatype - integer, intent(in) :: source - integer, intent(in) :: tag - integer, intent(in) :: comm - integer, dimension(MPI_STATUS_SIZE), intent(out) :: status - integer, intent(out) :: ierror -end subroutine MPI_Recv - -end interface - -interface PMPI_Recv - -subroutine PMPI_Recv(buf, count, datatype, source, tag, & - comm, status, ierror) - include 'mpif-config.h' - @OMPI_FORTRAN_IGNORE_TKR_PREDECL@ buf - @OMPI_FORTRAN_IGNORE_TKR_TYPE@ :: buf - integer, intent(in) :: count - integer, intent(in) :: datatype - integer, intent(in) :: source - integer, intent(in) :: tag - integer, intent(in) :: comm - integer, dimension(MPI_STATUS_SIZE), intent(out) :: status + @OMPI_FORTRAN_IGNORE_TKR_PREDECL@ buf + @OMPI_FORTRAN_IGNORE_TKR_TYPE@ :: buf + integer, intent(in) :: count + integer, intent(in) :: datatype + integer, intent(in) :: source + integer, intent(in) :: tag + integer, intent(in) :: comm + integer, dimension(MPI_STATUS_SIZE), intent(out) :: status integer, intent(out) :: ierror end subroutine PMPI_Recv @@ -5496,6 +5512,147 @@ end subroutine PMPI_Request_get_status end interface +interface MPI_Rget + +subroutine MPI_Rget(origin_addr, origin_count, origin_datatype, & + target_rank, target_disp, target_count, & + target_datatype, win, request, ierror) + include 'mpif-config.h' + @OMPI_FORTRAN_IGNORE_TKR_PREDECL@ origin_addr + @OMPI_FORTRAN_IGNORE_TKR_TYPE@ :: origin_addr + integer, intent(in) :: origin_count + integer, intent(in) :: origin_datatype + integer, intent(in) :: target_rank + integer(kind=MPI_ADDRESS_KIND), intent(in) :: target_disp + integer, intent(in) :: target_count + integer, intent(in) :: target_datatype + integer, intent(in) :: win + integer, intent(out) :: request + integer, intent(out) :: ierror +end subroutine MPI_Rget + +end interface + +interface PMPI_Rget + +subroutine PMPI_Rget(origin_addr, origin_count, origin_datatype, & + target_rank, target_disp, target_count, & + target_datatype, win, request, ierror) + include 'mpif-config.h' + @OMPI_FORTRAN_IGNORE_TKR_PREDECL@ origin_addr + @OMPI_FORTRAN_IGNORE_TKR_TYPE@ :: origin_addr + integer, intent(in) :: origin_count + integer, intent(in) :: origin_datatype + integer, intent(in) :: target_rank + integer(kind=MPI_ADDRESS_KIND), intent(in) :: target_disp + integer, intent(in) :: target_count + integer, intent(in) :: target_datatype + integer, intent(in) :: win + integer, intent(out) :: request + integer, intent(out) :: ierror +end subroutine PMPI_Rget + +end interface + + +interface MPI_Rget_accumulate + +subroutine MPI_Rget_accumulate(origin_addr, origin_count, origin_datatype,& + result_addr, result_count, result_datatype,& + target_rank, target_disp, target_count, & + target_datatype, op, win, request, ierror) + include 'mpif-config.h' + @OMPI_FORTRAN_IGNORE_TKR_PREDECL@ origin_addr + @OMPI_FORTRAN_IGNORE_TKR_TYPE@, intent(in) :: origin_addr + integer, intent(in) :: origin_count + integer, intent(in) :: origin_datatype + @OMPI_FORTRAN_IGNORE_TKR_PREDECL@ result_addr + @OMPI_FORTRAN_IGNORE_TKR_TYPE@ :: result_addr + integer, intent(in) :: result_count + integer, intent(in) :: result_datatype + integer, intent(in) :: target_rank + integer(kind=MPI_ADDRESS_KIND), intent(in) :: target_disp + integer, intent(in) :: target_count + integer, intent(in) :: target_datatype + integer, intent(in) :: op + integer, intent(in) :: win + integer, intent(out) :: request + integer, intent(out) :: ierror +end subroutine MPI_Rget_accumulate + +end interface + +interface PMPI_Rget_accumulate + +subroutine PMPI_Rget_accumulate(origin_addr, origin_count, origin_datatype,& + result_addr, result_count, result_datatype,& + target_rank, target_disp, target_count, & + target_datatype, op, win, request, ierror) + include 'mpif-config.h' + @OMPI_FORTRAN_IGNORE_TKR_PREDECL@ origin_addr + @OMPI_FORTRAN_IGNORE_TKR_TYPE@, intent(in) :: origin_addr + integer, intent(in) :: origin_count + integer, intent(in) :: origin_datatype + @OMPI_FORTRAN_IGNORE_TKR_PREDECL@ result_addr + @OMPI_FORTRAN_IGNORE_TKR_TYPE@ :: result_addr + integer, intent(in) :: result_count + integer, intent(in) :: result_datatype + integer, intent(in) :: target_rank + integer(kind=MPI_ADDRESS_KIND), intent(in) :: target_disp + integer, intent(in) :: target_count + integer, intent(in) :: target_datatype + integer, intent(in) :: op + integer, intent(in) :: win + integer, intent(out) :: request + integer, intent(out) :: ierror +end subroutine PMPI_Rget_accumulate + +end interface + + +interface MPI_Rput + +subroutine MPI_Rput(origin_addr, origin_count, origin_datatype, & + target_rank, target_disp, target_count, & + target_datatype, win, request, ierror) + include 'mpif-config.h' + @OMPI_FORTRAN_IGNORE_TKR_PREDECL@ origin_addr + @OMPI_FORTRAN_IGNORE_TKR_TYPE@, intent(in) :: origin_addr + integer, intent(in) :: origin_count + integer, intent(in) :: origin_datatype + integer, intent(in) :: target_rank + integer(kind=MPI_ADDRESS_KIND), intent(in) :: target_disp + integer, intent(in) :: target_count + integer, intent(in) :: target_datatype + integer, intent(in) :: win + integer, intent(out) :: request + integer, intent(out) :: ierror +end subroutine MPI_Rput + +end interface + +interface PMPI_Rput + +subroutine PMPI_Rput(origin_addr, origin_count, origin_datatype, & + target_rank, target_disp, target_count, & + target_datatype, win, request, ierror) + include 'mpif-config.h' + @OMPI_FORTRAN_IGNORE_TKR_PREDECL@ origin_addr + @OMPI_FORTRAN_IGNORE_TKR_TYPE@, intent(in) :: origin_addr + integer, intent(in) :: origin_count + integer, intent(in) :: origin_datatype + integer, intent(in) :: target_rank + integer(kind=MPI_ADDRESS_KIND), intent(in) :: target_disp + integer, intent(in) :: target_count + integer, intent(in) :: target_datatype + integer, intent(in) :: win + integer, intent(out) :: request + integer, intent(out) :: ierror +end subroutine PMPI_Rput + +end interface + + interface MPI_Rsend subroutine MPI_Rsend(ibuf, count, datatype, dest, tag, & @@ -5993,6 +6150,31 @@ end subroutine PMPI_Status_set_elements end interface +interface MPI_Status_set_elements_x + +subroutine MPI_Status_set_elements_x(status, datatype, count, ierror) + include 'mpif-config.h' + integer, dimension(MPI_STATUS_SIZE), intent(inout) :: status + integer, intent(in) :: datatype + integer(kind=MPI_COUNT_KIND), intent(in) :: count + integer, intent(out) :: ierror +end subroutine MPI_Status_set_elements_x + +end interface + +interface PMPI_Status_set_elements_x + +subroutine PMPI_Status_set_elements_x(status, datatype, count, ierror) + include 'mpif-config.h' + integer, dimension(MPI_STATUS_SIZE), intent(inout) :: status + integer, intent(in) :: datatype + integer(kind=MPI_COUNT_KIND), intent(in) :: count + integer, intent(out) :: ierror +end subroutine PMPI_Status_set_elements_x + +end interface + + interface MPI_Test subroutine MPI_Test(request, flag, status, ierror) @@ -6330,93 +6512,93 @@ end subroutine PMPI_Type_create_hindexed end interface -interface MPI_Type_create_hvector +interface MPI_Type_create_hindexed_block -subroutine MPI_Type_create_hvector(count, blocklength, stride, oldtype, newtype& +subroutine MPI_Type_create_hindexed_block(count, blocklength, array_of_displacements, oldtype, newtype& , ierror) include 'mpif-config.h' integer, intent(in) :: count integer, intent(in) :: blocklength - integer(kind=MPI_ADDRESS_KIND), intent(in) :: stride + integer(kind=MPI_ADDRESS_KIND), dimension(*), intent(in) :: array_of_displacements integer, intent(in) :: oldtype integer, intent(out) :: newtype integer, intent(out) :: ierror -end subroutine MPI_Type_create_hvector +end subroutine MPI_Type_create_hindexed_block end interface -interface PMPI_Type_create_hvector +interface PMPI_Type_create_hindexed_block -subroutine PMPI_Type_create_hvector(count, blocklength, stride, oldtype, newtype& +subroutine PMPI_Type_create_hindexed_block(count, blocklength, array_of_displacements, oldtype, newtype& , ierror) include 'mpif-config.h' integer, intent(in) :: count integer, intent(in) :: blocklength - integer(kind=MPI_ADDRESS_KIND), intent(in) :: stride + integer(kind=MPI_ADDRESS_KIND), dimension(*), intent(in) :: array_of_displacements integer, intent(in) :: oldtype integer, intent(out) :: newtype integer, intent(out) :: ierror -end subroutine PMPI_Type_create_hvector +end subroutine PMPI_Type_create_hindexed_block end interface -interface MPI_Type_create_indexed_block +interface MPI_Type_create_hvector -subroutine MPI_Type_create_indexed_block(count, blocklength, array_of_displacements, oldtype, newtype& +subroutine MPI_Type_create_hvector(count, blocklength, stride, oldtype, newtype& , ierror) + include 'mpif-config.h' integer, intent(in) :: count integer, intent(in) :: blocklength - integer, dimension(*), intent(in) :: array_of_displacements + integer(kind=MPI_ADDRESS_KIND), intent(in) :: stride integer, intent(in) :: oldtype integer, intent(out) :: newtype integer, intent(out) :: ierror -end subroutine MPI_Type_create_indexed_block +end subroutine MPI_Type_create_hvector end interface -interface PMPI_Type_create_indexed_block +interface PMPI_Type_create_hvector -subroutine PMPI_Type_create_indexed_block(count, blocklength, array_of_displacements, oldtype, newtype& +subroutine PMPI_Type_create_hvector(count, blocklength, stride, oldtype, newtype& , ierror) + include 'mpif-config.h' integer, intent(in) :: count integer, intent(in) :: blocklength - integer, dimension(*), intent(in) :: array_of_displacements + integer(kind=MPI_ADDRESS_KIND), intent(in) :: stride integer, intent(in) :: oldtype integer, intent(out) :: newtype integer, intent(out) :: ierror -end subroutine PMPI_Type_create_indexed_block +end subroutine PMPI_Type_create_hvector end interface -interface MPI_Type_create_hindexed_block +interface MPI_Type_create_indexed_block -subroutine MPI_Type_create_hindexed_block(count, blocklength, array_of_displacements, oldtype, newtype& +subroutine MPI_Type_create_indexed_block(count, blocklength, array_of_displacements, oldtype, newtype& , ierror) - include 'mpif-config.h' integer, intent(in) :: count integer, intent(in) :: blocklength - integer(kind=MPI_ADDRESS_KIND), dimension(*), intent(in) :: array_of_displacements + integer, dimension(*), intent(in) :: array_of_displacements integer, intent(in) :: oldtype integer, intent(out) :: newtype integer, intent(out) :: ierror -end subroutine MPI_Type_create_hindexed_block +end subroutine MPI_Type_create_indexed_block end interface -interface PMPI_Type_create_hindexed_block +interface PMPI_Type_create_indexed_block -subroutine PMPI_Type_create_hindexed_block(count, blocklength, array_of_displacements, oldtype, newtype& +subroutine PMPI_Type_create_indexed_block(count, blocklength, array_of_displacements, oldtype, newtype& , ierror) - include 'mpif-config.h' integer, intent(in) :: count integer, intent(in) :: blocklength - integer(kind=MPI_ADDRESS_KIND), dimension(*), intent(in) :: array_of_displacements + integer, dimension(*), intent(in) :: array_of_displacements integer, intent(in) :: oldtype integer, intent(out) :: newtype integer, intent(out) :: ierror -end subroutine PMPI_Type_create_hindexed_block +end subroutine PMPI_Type_create_indexed_block end interface @@ -6756,6 +6938,31 @@ end subroutine PMPI_Type_get_extent end interface +interface MPI_Type_get_extent_x + +subroutine MPI_Type_get_extent_x(type, lb, extent, ierror) + include 'mpif-config.h' + integer, intent(in) :: type + integer(kind=MPI_COUNT_KIND), intent(out) :: lb + integer(kind=MPI_COUNT_KIND), intent(out) :: extent + integer, intent(out) :: ierror +end subroutine MPI_Type_get_extent_x + +end interface + +interface PMPI_Type_get_extent_x + +subroutine PMPI_Type_get_extent_x(type, lb, extent, ierror) + include 'mpif-config.h' + integer, intent(in) :: type + integer(kind=MPI_COUNT_KIND), intent(out) :: lb + integer(kind=MPI_COUNT_KIND), intent(out) :: extent + integer, intent(out) :: ierror +end subroutine PMPI_Type_get_extent_x + +end interface + + interface MPI_Type_get_name subroutine MPI_Type_get_name(type, type_name, resultlen, ierror) @@ -6784,22 +6991,47 @@ interface MPI_Type_get_true_extent subroutine MPI_Type_get_true_extent(datatype, true_lb, true_extent, ierror) include 'mpif-config.h' integer, intent(in) :: datatype - integer(kind=MPI_ADDRESS_KIND), intent(out) :: true_lb - integer(kind=MPI_ADDRESS_KIND), intent(out) :: true_extent + integer(kind=MPI_ADDRESS_KIND), intent(out) :: true_lb + integer(kind=MPI_ADDRESS_KIND), intent(out) :: true_extent + integer, intent(out) :: ierror +end subroutine MPI_Type_get_true_extent + +end interface + +interface PMPI_Type_get_true_extent + +subroutine PMPI_Type_get_true_extent(datatype, true_lb, true_extent, ierror) + include 'mpif-config.h' + integer, intent(in) :: datatype + integer(kind=MPI_ADDRESS_KIND), intent(out) :: true_lb + integer(kind=MPI_ADDRESS_KIND), intent(out) :: true_extent + integer, intent(out) :: ierror +end subroutine PMPI_Type_get_true_extent + +end interface + + +interface MPI_Type_get_true_extent_x + +subroutine MPI_Type_get_true_extent_x(datatype, true_lb, true_extent, ierror) + include 'mpif-config.h' + integer, intent(in) :: datatype + integer(kind=MPI_COUNT_KIND), intent(out) :: true_lb + integer(kind=MPI_COUNT_KIND), intent(out) :: true_extent integer, intent(out) :: ierror -end subroutine MPI_Type_get_true_extent +end subroutine MPI_Type_get_true_extent_x end interface -interface PMPI_Type_get_true_extent +interface PMPI_Type_get_true_extent_x -subroutine PMPI_Type_get_true_extent(datatype, true_lb, true_extent, ierror) +subroutine PMPI_Type_get_true_extent_x(datatype, true_lb, true_extent, ierror) include 'mpif-config.h' integer, intent(in) :: datatype - integer(kind=MPI_ADDRESS_KIND), intent(out) :: true_lb - integer(kind=MPI_ADDRESS_KIND), intent(out) :: true_extent + integer(kind=MPI_COUNT_KIND), intent(out) :: true_lb + integer(kind=MPI_COUNT_KIND), intent(out) :: true_extent integer, intent(out) :: ierror -end subroutine PMPI_Type_get_true_extent +end subroutine PMPI_Type_get_true_extent_x end interface @@ -7002,6 +7234,29 @@ end subroutine PMPI_Type_size end interface +interface MPI_Type_size_x + +subroutine MPI_Type_size_x(type, size, ierror) + include 'mpif-config.h' + integer, intent(in) :: type + integer(kind=MPI_COUNT_KIND), intent(out) :: size + integer, intent(out) :: ierror +end subroutine MPI_Type_size_x + +end interface + +interface PMPI_Type_size_x + +subroutine PMPI_Type_size_x(type, size, ierror) + include 'mpif-config.h' + integer, intent(in) :: type + integer(kind=MPI_COUNT_KIND), intent(out) :: size + integer, intent(out) :: ierror +end subroutine PMPI_Type_size_x + +end interface + + interface MPI_Type_struct subroutine MPI_Type_struct(count, array_of_blocklengths, array_of_displacements, array_of_types, newtype& @@ -7286,17 +7541,6 @@ end subroutine PMPI_Waitsome end interface -interface MPI_Win_call_errhandler - -subroutine MPI_Win_call_errhandler(win, errorcode, ierror) - integer, intent(in) :: win - integer, intent(in) :: errorcode - integer, intent(out) :: ierror -end subroutine MPI_Win_call_errhandler - -end interface - - interface MPI_Win_allocate subroutine MPI_Win_allocate(size, disp_unit, info, comm, & @@ -7322,6 +7566,31 @@ end subroutine MPI_Win_allocate_cptr end interface +interface PMPI_Win_allocate + +subroutine PMPI_Win_allocate(size, disp_unit, info, comm, & + baseptr, win, ierror) + include 'mpif-config.h' + integer(KIND=MPI_ADDRESS_KIND), intent(in) :: size + integer, intent(in) :: disp_unit + integer, intent(in) :: info + integer, intent(in) :: comm + integer(KIND=MPI_ADDRESS_KIND), intent(out) :: baseptr + integer, intent(out) :: win + integer, intent(out) :: ierror +end subroutine PMPI_Win_allocate + +subroutine PMPI_Win_allocate_cptr(size, disp_unit, info, comm, & + baseptr, win, ierror) + use, intrinsic :: iso_c_binding, only : c_ptr + include 'mpif-config.h' + integer :: disp_unit, info, comm, win, ierror + integer(KIND=MPI_ADDRESS_KIND) :: size + type(C_PTR) :: baseptr +end subroutine PMPI_Win_allocate_cptr + +end interface + interface MPI_Win_allocate_shared @@ -7348,6 +7617,68 @@ end subroutine MPI_Win_allocate_shared_cptr end interface +interface PMPI_Win_allocate_shared + +subroutine PMPI_Win_allocate_shared(size, disp_unit, info, comm, & + baseptr, win, ierror) + include 'mpif-config.h' + integer(KIND=MPI_ADDRESS_KIND), intent(in) :: size + integer, intent(in) :: disp_unit + integer, intent(in) :: info + integer, intent(in) :: comm + integer(KIND=MPI_ADDRESS_KIND), intent(out) :: baseptr + integer, intent(out) :: win + integer, intent(out) :: ierror +end subroutine PMPI_Win_allocate_shared + +subroutine PMPI_Win_allocate_shared_cptr(size, disp_unit, info, comm, & + baseptr, win, ierror) + use, intrinsic :: iso_c_binding, only : c_ptr + include 'mpif-config.h' + integer :: disp_unit, info, comm, win, ierror + integer(KIND=MPI_ADDRESS_KIND) :: size + type(C_PTR) :: baseptr +end subroutine PMPI_Win_allocate_shared_cptr + +end interface + + +interface MPI_Win_attach + +subroutine MPI_Win_attach(win, base, size, ierror) + include 'mpif-config.h' + @OMPI_FORTRAN_IGNORE_TKR_PREDECL@ base + @OMPI_FORTRAN_IGNORE_TKR_TYPE@, intent(in) :: base + integer(kind=MPI_ADDRESS_KIND), intent(in) :: size + integer, intent(in) :: win + integer, intent(out) :: ierror +end subroutine MPI_Win_attach + +end interface + +interface PMPI_Win_attach + +subroutine PMPI_Win_attach(win, base, size, ierror) + include 'mpif-config.h' + @OMPI_FORTRAN_IGNORE_TKR_PREDECL@ base + @OMPI_FORTRAN_IGNORE_TKR_TYPE@, intent(in) :: base + integer(kind=MPI_ADDRESS_KIND), intent(in) :: size + integer, intent(in) :: win + integer, intent(out) :: ierror +end subroutine PMPI_Win_attach + +end interface + + +interface MPI_Win_call_errhandler + +subroutine MPI_Win_call_errhandler(win, errorcode, ierror) + integer, intent(in) :: win + integer, intent(in) :: errorcode + integer, intent(out) :: ierror +end subroutine MPI_Win_call_errhandler + +end interface interface PMPI_Win_call_errhandler @@ -7441,58 +7772,6 @@ end subroutine PMPI_Win_create_dynamic end interface -interface MPI_Win_attach - -subroutine MPI_Win_attach(win, base, size, ierror) - include 'mpif-config.h' - @OMPI_FORTRAN_IGNORE_TKR_PREDECL@ base - @OMPI_FORTRAN_IGNORE_TKR_TYPE@, intent(in) :: base - integer(kind=MPI_ADDRESS_KIND), intent(in) :: size - integer, intent(out) :: win - integer, intent(out) :: ierror -end subroutine MPI_Win_attach - -end interface - -interface PMPI_Win_attach - -subroutine PMPI_Win_attach(win, base, size, ierror) - include 'mpif-config.h' - @OMPI_FORTRAN_IGNORE_TKR_PREDECL@ base - @OMPI_FORTRAN_IGNORE_TKR_TYPE@, intent(in) :: base - integer(kind=MPI_ADDRESS_KIND), intent(in) :: size - integer, intent(out) :: win - integer, intent(out) :: ierror -end subroutine PMPI_Win_attach - -end interface - - -interface MPI_Win_detach - -subroutine MPI_Win_detach(win, base, ierror) - include 'mpif-config.h' - @OMPI_FORTRAN_IGNORE_TKR_PREDECL@ base - @OMPI_FORTRAN_IGNORE_TKR_TYPE@, intent(in) :: base - integer, intent(out) :: win - integer, intent(out) :: ierror -end subroutine MPI_Win_detach - -end interface - -interface PMPI_Win_detach - -subroutine PMPI_Win_detach(win, base, ierror) - include 'mpif-config.h' - @OMPI_FORTRAN_IGNORE_TKR_PREDECL@ base - @OMPI_FORTRAN_IGNORE_TKR_TYPE@, intent(in) :: base - integer, intent(out) :: win - integer, intent(out) :: ierror -end subroutine PMPI_Win_detach - -end interface - - interface MPI_Win_create_errhandler subroutine MPI_Win_create_errhandler(function, errhandler, ierror) @@ -7562,6 +7841,31 @@ end subroutine PMPI_Win_delete_attr end interface +interface MPI_Win_detach + +subroutine MPI_Win_detach(win, base, ierror) + include 'mpif-config.h' + @OMPI_FORTRAN_IGNORE_TKR_PREDECL@ base + @OMPI_FORTRAN_IGNORE_TKR_TYPE@, intent(in) :: base + integer, intent(in) :: win + integer, intent(out) :: ierror +end subroutine MPI_Win_detach + +end interface + +interface PMPI_Win_detach + +subroutine PMPI_Win_detach(win, base, ierror) + include 'mpif-config.h' + @OMPI_FORTRAN_IGNORE_TKR_PREDECL@ base + @OMPI_FORTRAN_IGNORE_TKR_TYPE@, intent(in) :: base + integer, intent(in) :: win + integer, intent(out) :: ierror +end subroutine PMPI_Win_detach + +end interface + + interface MPI_Win_fence subroutine MPI_Win_fence(assert, win, ierror) @@ -7582,6 +7886,7 @@ end subroutine PMPI_Win_fence end interface + interface MPI_Win_flush subroutine MPI_Win_flush(rank, win, ierror) @@ -7602,6 +7907,7 @@ end subroutine PMPI_Win_flush end interface + interface MPI_Win_flush_all subroutine MPI_Win_flush_all(win, ierror) @@ -7620,6 +7926,7 @@ end subroutine PMPI_Win_flush_all end interface + interface MPI_Win_flush_local subroutine MPI_Win_flush_local(rank, win, ierror) @@ -7640,6 +7947,7 @@ end subroutine PMPI_Win_flush_local end interface + interface MPI_Win_flush_local_all subroutine MPI_Win_flush_local_all(win, ierror) @@ -7658,6 +7966,7 @@ end subroutine PMPI_Win_flush_local_all end interface + interface MPI_Win_free subroutine MPI_Win_free(win, ierror) @@ -7836,6 +8145,27 @@ end subroutine PMPI_Win_lock end interface +interface MPI_Win_lock_all + +subroutine MPI_Win_lock_all(assert, win, ierror) + integer, intent(in) :: assert + integer, intent(in) :: win + integer, intent(out) :: ierror +end subroutine MPI_Win_lock_all + +end interface + +interface PMPI_Win_lock_all + +subroutine PMPI_Win_lock_all(assert, win, ierror) + integer, intent(in) :: assert + integer, intent(in) :: win + integer, intent(out) :: ierror +end subroutine PMPI_Win_lock_all + +end interface + + interface MPI_Win_post subroutine MPI_Win_post(group, assert, win, ierror) @@ -7976,6 +8306,33 @@ end subroutine MPI_Win_shared_query_cptr end interface +interface PMPI_Win_shared_query + +subroutine PMPI_Win_shared_query(win, rank, size, disp_unit, baseptr,& + ierror) + include 'mpif-config.h' + integer, intent(in) :: win + integer, intent(in) :: rank + integer(KIND=MPI_ADDRESS_KIND), intent(out) :: size + integer, intent(out) :: disp_unit + integer(KIND=MPI_ADDRESS_KIND), intent(out) :: baseptr + integer, intent(out) :: ierror +end subroutine PMPI_Win_shared_query + +subroutine PMPI_Win_shared_query_cptr(win, rank, size, disp_unit, baseptr,& + ierror) + use, intrinsic :: iso_c_binding, only : c_ptr + include 'mpif-config.h' + integer, intent(in) :: win + integer, intent(in) :: rank + integer(KIND=MPI_ADDRESS_KIND), intent(out) :: size + integer, intent(out) :: disp_unit + type(C_PTR), intent(out) :: baseptr + integer, intent(out) :: ierror +end subroutine PMPI_Win_shared_query_cptr + +end interface + interface MPI_Win_start @@ -8000,6 +8357,25 @@ end subroutine PMPI_Win_start end interface +interface MPI_Win_sync + +subroutine MPI_Win_sync(win, ierror) + integer, intent(in) :: win + integer, intent(out) :: ierror +end subroutine MPI_Win_sync + +end interface + +interface PMPI_Win_sync + +subroutine PMPI_Win_sync(win, ierror) + integer, intent(in) :: win + integer, intent(out) :: ierror +end subroutine PMPI_Win_sync + +end interface + + interface MPI_Win_test subroutine MPI_Win_test(win, flag, ierror) @@ -8042,6 +8418,25 @@ end subroutine PMPI_Win_unlock end interface +interface MPI_Win_unlock_all + +subroutine MPI_Win_unlock_all(win, ierror) + integer, intent(in) :: win + integer, intent(out) :: ierror +end subroutine MPI_Win_unlock_all + +end interface + +interface PMPI_Win_unlock_all + +subroutine PMPI_Win_unlock_all(win, ierror) + integer, intent(in) :: win + integer, intent(out) :: ierror +end subroutine PMPI_Win_unlock_all + +end interface + + interface MPI_Win_wait subroutine MPI_Win_wait(win, ierror) diff --git a/ompi/mpi/fortran/use-mpi-ignore-tkr/mpi-ignore-tkr.F90 b/ompi/mpi/fortran/use-mpi-ignore-tkr/mpi-ignore-tkr.F90 index 605eddbca32..4120d7d6b3b 100644 --- a/ompi/mpi/fortran/use-mpi-ignore-tkr/mpi-ignore-tkr.F90 +++ b/ompi/mpi/fortran/use-mpi-ignore-tkr/mpi-ignore-tkr.F90 @@ -6,15 +6,15 @@ ! Copyright (c) 2004-2005 The University of Tennessee and The University ! of Tennessee Research Foundation. All rights ! reserved. -! Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +! Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, ! University of Stuttgart. All rights reserved. ! Copyright (c) 2004-2005 The Regents of the University of California. ! All rights reserved. ! Copyright (c) 2006-2014 Cisco Systems, Inc. All rights reserved. ! $COPYRIGHT$ -! +! ! Additional copyrights may follow -! +! ! $HEADER$ ! diff --git a/ompi/mpi/fortran/use-mpi-tkr/Makefile.am b/ompi/mpi/fortran/use-mpi-tkr/Makefile.am index 208c73c19f0..653ad71d8ca 100644 --- a/ompi/mpi/fortran/use-mpi-tkr/Makefile.am +++ b/ompi/mpi/fortran/use-mpi-tkr/Makefile.am @@ -6,19 +6,20 @@ # Copyright (c) 2004-2005 The University of Tennessee and The University # of Tennessee Research Foundation. All rights # reserved. -# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, # University of Stuttgart. All rights reserved. # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. -# Copyright (c) 2006-2014 Cisco Systems, Inc. All rights reserved. +# Copyright (c) 2006-2015 Cisco Systems, Inc. All rights reserved. # Copyright (c) 2007 Los Alamos National Security, LLC. All rights -# reserved. +# reserved. # Copyright (c) 2014-2015 Research Organization for Information Science # and Technology (RIST). All rights reserved. +# Copyright (c) 2016 IBM Corporation. All rights reserved. # $COPYRIGHT$ -# +# # Additional copyrights may follow -# +# # $HEADER$ # @@ -49,7 +50,7 @@ lib_LTLIBRARIES = # Add the f90 library to the list of libraries to build -lib_LTLIBRARIES += libmpi_usempi.la +lib_LTLIBRARIES += lib@OMPI_LIBMPI_NAME@_usempi.la # Automake doesn't know how to do F90 dependency analysis, so manually # list this here (i.e., "mpi-f90-interfaces.h" is included in @@ -65,8 +66,10 @@ mpi.lo: mpi-f90-cptr-interfaces.F90 # buffer dummy argument. We therefore really only need to build a # handful of subroutines. -libmpi_usempi_la_SOURCES = \ +lib@OMPI_LIBMPI_NAME@_usempi_la_SOURCES = \ mpi.F90 \ + mpi_aint_add_f90.f90 \ + mpi_aint_diff_f90.f90 \ mpi_comm_spawn_multiple_f90.f90 \ mpi_testall_f90.f90 \ mpi_testsome_f90.f90 \ @@ -74,20 +77,26 @@ libmpi_usempi_la_SOURCES = \ mpi_waitsome_f90.f90 \ mpi_wtick_f90.f90 \ mpi_wtime_f90.f90 -libmpi_usempi_la_LIBADD = \ - $(top_builddir)/ompi/mpi/fortran/mpif-h/libmpi_mpifh.la # Don't distribute mpi-tkr-sizeof-*; they're generated. -nodist_libmpi_usempi_la_SOURCES = +nodist_lib@OMPI_LIBMPI_NAME@_usempi_la_SOURCES = if BUILD_FORTRAN_SIZEOF -nodist_libmpi_usempi_la_SOURCES += \ +nodist_lib@OMPI_LIBMPI_NAME@_usempi_la_SOURCES += \ mpi-tkr-sizeof.h \ mpi-tkr-sizeof.f90 endif +# Note that we invoke some OPAL functions directly in +# libmpi_usempi.la, so we need to link in the OPAL library directly +# (pulling it in indirectly via libmpi.la does not work on all +# platforms). +lib@OMPI_LIBMPI_NAME@_usempi_la_LIBADD = \ + $(top_builddir)/ompi/mpi/fortran/mpif-h/lib@OMPI_LIBMPI_NAME@_mpifh.la \ + $(OMPI_MPIEXT_USEMPI_LIBS) \ + $(OMPI_TOP_BUILDDIR)/opal/lib@OPAL_LIB_PREFIX@open-pal.la # Set the library version -libmpi_usempi_la_LDFLAGS = \ +lib@OMPI_LIBMPI_NAME@_usempi_la_LDFLAGS = \ -version-info $(libmpi_usempi_tkr_so_version) \ $(OMPI_FORTRAN_EXTRA_SHARED_LIBRARY_FLAGS) @@ -124,7 +133,7 @@ mpi-tkr-sizeof.f90: CLEANFILES += mpi-tkr-sizeof.h mpi-tkr-sizeof.f90 MOSTLYCLEANFILES = *.mod -DISTCLEANFILES = $(nodist_libmpi_usempi_la_SOURCES) +DISTCLEANFILES = $(nodist_lib@OMPI_LIBMPI_NAME@_usempi_la_SOURCES) # # Install the generated .mod files. Unfortunately, each F90 compiler diff --git a/ompi/mpi/fortran/use-mpi-tkr/fortran_kinds.sh.in b/ompi/mpi/fortran/use-mpi-tkr/fortran_kinds.sh.in index fa4c9e4da19..bc81b890427 100644 --- a/ompi/mpi/fortran/use-mpi-tkr/fortran_kinds.sh.in +++ b/ompi/mpi/fortran/use-mpi-tkr/fortran_kinds.sh.in @@ -6,15 +6,15 @@ # Copyright (c) 2004-2005 The University of Tennessee and The University # of Tennessee Research Foundation. All rights # reserved. -# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, # University of Stuttgart. All rights reserved. # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. # Copyright (c) 2006-2014 Cisco Systems, Inc. All rights reserved. # $COPYRIGHT$ -# +# # Additional copyrights may follow -# +# # $HEADER$ # @@ -26,7 +26,7 @@ ckinds="@OMPI_FORTRAN_CKINDS@" # In prior versions of Open MPI, we used to allow the user to choose # which "size" of "use mpi" module to build: tiny, small, medium, and -# large. Large never really worked. +# large. Large never really worked. # # According to MPI-3 p610:34-41, if ignore TKR is not supported, we # should not provide interfaces for MPI subroutines with choice diff --git a/ompi/mpi/fortran/use-mpi-tkr/fortran_sizes.h.in b/ompi/mpi/fortran/use-mpi-tkr/fortran_sizes.h.in index b706996f190..fe6db1b536d 100644 --- a/ompi/mpi/fortran/use-mpi-tkr/fortran_sizes.h.in +++ b/ompi/mpi/fortran/use-mpi-tkr/fortran_sizes.h.in @@ -5,15 +5,15 @@ ! Copyright (c) 2004-2005 The University of Tennessee and The University ! of Tennessee Research Foundation. All rights ! reserved. -! Copyright (c) 2004-2007 High Performance Computing Center Stuttgart, +! Copyright (c) 2004-2007 High Performance Computing Center Stuttgart, ! University of Stuttgart. All rights reserved. ! Copyright (c) 2004-2005 The Regents of the University of California. ! All rights reserved. ! Copyright (c) 2006-2012 Cisco Systems, Inc. All rights reserved. ! $COPYRIGHT$ -! +! ! Additional copyrights may follow -! +! ! $HEADER$ ! diff --git a/ompi/mpi/fortran/use-mpi-tkr/mpi-f90-interfaces.h b/ompi/mpi/fortran/use-mpi-tkr/mpi-f90-interfaces.h index 5dcbe1df49f..a57a3b47e0e 100644 --- a/ompi/mpi/fortran/use-mpi-tkr/mpi-f90-interfaces.h +++ b/ompi/mpi/fortran/use-mpi-tkr/mpi-f90-interfaces.h @@ -78,6 +78,27 @@ end subroutine MPI_Add_error_string end interface +interface MPI_Aint_add + +function MPI_Aint_add(base, diff) + include 'mpif-config.h' + integer(kind=MPI_ADDRESS_KIND), intent(in) :: base + integer(kind=MPI_ADDRESS_KIND), intent(in) :: diff + integer(kind=MPI_ADDRESS_KIND) MPI_Aint_add +end function MPI_Aint_add + +end interface + +interface MPI_Aint_diff + +function MPI_Aint_diff(addr1, addr2) + include 'mpif-config.h' + integer(kind=MPI_ADDRESS_KIND), intent(in) :: addr1 + integer(kind=MPI_ADDRESS_KIND), intent(in) :: addr2 + integer(kind=MPI_ADDRESS_KIND) MPI_Aint_diff +end function MPI_Aint_diff + +end interface interface MPI_Attr_delete @@ -3358,7 +3379,7 @@ subroutine MPI_Dist_graph_create(comm_old, n, sources, degrees, destinations, & integer, dimension(n), intent(in) :: degrees integer, dimension(n), intent(in) :: destinations integer, dimension(n), intent(in) :: weights - logical, intent(in) :: info + integer, intent(in) :: info logical, intent(in) :: reorder integer, intent(out) :: comm_dist_graph integer, intent(out) :: ierror @@ -3379,7 +3400,7 @@ subroutine MPI_Dist_graph_create_adjacent(comm_old, indegree, sources, sourcewei integer, intent(in) :: outdegree integer, dimension(outdegree), intent(in) :: destinations integer, dimension(outdegree), intent(in) :: destweights - logical, intent(in) :: info + integer, intent(in) :: info logical, intent(in) :: reorder integer, intent(out) :: comm_dist_graph integer, intent(out) :: ierror diff --git a/ompi/mpi/fortran/use-mpi-tkr/mpi.F90 b/ompi/mpi/fortran/use-mpi-tkr/mpi.F90 index 4d1feea1545..89c515f7e3b 100644 --- a/ompi/mpi/fortran/use-mpi-tkr/mpi.F90 +++ b/ompi/mpi/fortran/use-mpi-tkr/mpi.F90 @@ -6,15 +6,15 @@ ! Copyright (c) 2004-2005 The University of Tennessee and The University ! of Tennessee Research Foundation. All rights ! reserved. -! Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +! Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, ! University of Stuttgart. All rights reserved. ! Copyright (c) 2004-2005 The Regents of the University of California. ! All rights reserved. ! Copyright (c) 2006-2014 Cisco Systems, Inc. All rights reserved. ! $COPYRIGHT$ -! +! ! Additional copyrights may follow -! +! ! $HEADER$ ! diff --git a/ompi/mpi/fortran/use-mpi-tkr/mpi_aint_add_f90.f90 b/ompi/mpi/fortran/use-mpi-tkr/mpi_aint_add_f90.f90 new file mode 100644 index 00000000000..e1e7140cd1e --- /dev/null +++ b/ompi/mpi/fortran/use-mpi-tkr/mpi_aint_add_f90.f90 @@ -0,0 +1,31 @@ +! -*- fortran -*- +! +! Copyright (c) 2004-2006 The Trustees of Indiana University and Indiana +! University Research and Technology +! Corporation. All rights reserved. +! Copyright (c) 2004-2005 The University of Tennessee and The University +! of Tennessee Research Foundation. All rights +! reserved. +! Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +! University of Stuttgart. All rights reserved. +! Copyright (c) 2004-2005 The Regents of the University of California. +! All rights reserved. +! Copyright (c) 2006-2014 Cisco Systems, Inc. All rights reserved. +! Copyright (c) 2015 Los Alamos National Security, LLC. All rights +! reserved. +! $COPYRIGHT$ +! +! Additional copyrights may follow +! +! $HEADER$ +! + +function MPI_Aint_add(base, diff) + include 'mpif-config.h' + integer(kind=MPI_ADDRESS_KIND), intent(in) :: base + integer(kind=MPI_ADDRESS_KIND), intent(in) :: diff + integer(kind=MPI_ADDRESS_KIND) :: MPI_Aint_add,foo + call MPI_Aint_add_f90(base,diff,foo) + MPI_Aint_add = foo +end function MPI_Aint_add + diff --git a/ompi/mpi/fortran/use-mpi-tkr/mpi_aint_diff_f90.f90 b/ompi/mpi/fortran/use-mpi-tkr/mpi_aint_diff_f90.f90 new file mode 100644 index 00000000000..2bc6e485c87 --- /dev/null +++ b/ompi/mpi/fortran/use-mpi-tkr/mpi_aint_diff_f90.f90 @@ -0,0 +1,31 @@ +! -*- fortran -*- +! +! Copyright (c) 2004-2006 The Trustees of Indiana University and Indiana +! University Research and Technology +! Corporation. All rights reserved. +! Copyright (c) 2004-2005 The University of Tennessee and The University +! of Tennessee Research Foundation. All rights +! reserved. +! Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +! University of Stuttgart. All rights reserved. +! Copyright (c) 2004-2005 The Regents of the University of California. +! All rights reserved. +! Copyright (c) 2006-2014 Cisco Systems, Inc. All rights reserved. +! Copyright (c) 2015 Los Alamos National Security, LLC. All rights +! reserved. +! $COPYRIGHT$ +! +! Additional copyrights may follow +! +! $HEADER$ +! + +function MPI_Aint_diff(addr1, addr2) + include 'mpif-config.h' + integer(kind=MPI_ADDRESS_KIND), intent(in) :: addr1 + integer(kind=MPI_ADDRESS_KIND), intent(in) :: addr2 + integer(kind=MPI_ADDRESS_KIND) :: MPI_Aint_diff,foo + call MPI_Aint_diff_f90(addr1,addr2,foo) + MPI_Aint_diff = foo +end function MPI_Aint_diff + diff --git a/ompi/mpi/fortran/use-mpi-tkr/test/print_align.f90 b/ompi/mpi/fortran/use-mpi-tkr/test/print_align.f90 index dc61ca05050..126ca6f3b6a 100644 --- a/ompi/mpi/fortran/use-mpi-tkr/test/print_align.f90 +++ b/ompi/mpi/fortran/use-mpi-tkr/test/print_align.f90 @@ -5,15 +5,15 @@ ! Copyright (c) 2004-2005 The University of Tennessee and The University ! of Tennessee Research Foundation. All rights ! reserved. -! Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +! Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, ! University of Stuttgart. All rights reserved. ! Copyright (c) 2004-2005 The Regents of the University of California. ! All rights reserved. ! Copyright (c) 2012 Cisco Systems, Inc. All rights reserved. ! $COPYRIGHT$ -! +! ! Additional copyrights may follow -! +! ! $HEADER$ ! ! print_align.f90 diff --git a/ompi/mpi/fortran/use-mpi-tkr/test/print_ikinds.f90 b/ompi/mpi/fortran/use-mpi-tkr/test/print_ikinds.f90 index 98f6b6ab52d..d2b5e6aea9b 100644 --- a/ompi/mpi/fortran/use-mpi-tkr/test/print_ikinds.f90 +++ b/ompi/mpi/fortran/use-mpi-tkr/test/print_ikinds.f90 @@ -5,15 +5,15 @@ ! Copyright (c) 2004-2005 The University of Tennessee and The University ! of Tennessee Research Foundation. All rights ! reserved. -! Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +! Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, ! University of Stuttgart. All rights reserved. ! Copyright (c) 2004-2005 The Regents of the University of California. ! All rights reserved. ! Copyright (c) 2012 Cisco Systems, Inc. All rights reserved. ! $COPYRIGHT$ -! +! ! Additional copyrights may follow -! +! ! $HEADER$ ! ! print_ikinds.f90 diff --git a/ompi/mpi/fortran/use-mpi-tkr/test/print_prec_range.f90 b/ompi/mpi/fortran/use-mpi-tkr/test/print_prec_range.f90 index cff453d571b..1abc5526875 100644 --- a/ompi/mpi/fortran/use-mpi-tkr/test/print_prec_range.f90 +++ b/ompi/mpi/fortran/use-mpi-tkr/test/print_prec_range.f90 @@ -5,15 +5,15 @@ ! Copyright (c) 2004-2005 The University of Tennessee and The University ! of Tennessee Research Foundation. All rights ! reserved. -! Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +! Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, ! University of Stuttgart. All rights reserved. ! Copyright (c) 2004-2005 The Regents of the University of California. ! All rights reserved. ! Copyright (c) 2012 Cisco Systems, Inc. All rights reserved. ! $COPYRIGHT$ -! +! ! Additional copyrights may follow -! +! ! $HEADER$ ! ! print_prec_range.f90 diff --git a/ompi/mpi/fortran/use-mpi-tkr/test/print_rkinds.f90 b/ompi/mpi/fortran/use-mpi-tkr/test/print_rkinds.f90 index 6510cf00bca..3eaa5466910 100644 --- a/ompi/mpi/fortran/use-mpi-tkr/test/print_rkinds.f90 +++ b/ompi/mpi/fortran/use-mpi-tkr/test/print_rkinds.f90 @@ -5,15 +5,15 @@ ! Copyright (c) 2004-2005 The University of Tennessee and The University ! of Tennessee Research Foundation. All rights ! reserved. -! Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +! Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, ! University of Stuttgart. All rights reserved. ! Copyright (c) 2004-2005 The Regents of the University of California. ! All rights reserved. ! Copyright (c) 2012 Cisco Systems, Inc. All rights reserved. ! $COPYRIGHT$ -! +! ! Additional copyrights may follow -! +! ! $HEADER$ ! ! print_rkinds.f90 diff --git a/ompi/mpi/fortran/use-mpi-tkr/test/send_t.c b/ompi/mpi/fortran/use-mpi-tkr/test/send_t.c index 4a532296c60..e39a02f25eb 100644 --- a/ompi/mpi/fortran/use-mpi-tkr/test/send_t.c +++ b/ompi/mpi/fortran/use-mpi-tkr/test/send_t.c @@ -5,15 +5,15 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2012 Cisco Systems, Inc. All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ * * send_t.c - tests mpi_send variants diff --git a/ompi/mpi/fortran/use-mpi-tkr/test/test_send.f90 b/ompi/mpi/fortran/use-mpi-tkr/test/test_send.f90 index e25d2921417..33c24ec845a 100644 --- a/ompi/mpi/fortran/use-mpi-tkr/test/test_send.f90 +++ b/ompi/mpi/fortran/use-mpi-tkr/test/test_send.f90 @@ -5,15 +5,15 @@ ! Copyright (c) 2004-2005 The University of Tennessee and The University ! of Tennessee Research Foundation. All rights ! reserved. -! Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +! Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, ! University of Stuttgart. All rights reserved. ! Copyright (c) 2004-2005 The Regents of the University of California. ! All rights reserved. ! Copyright (c) 2012 Cisco Systems, Inc. All rights reserved. ! $COPYRIGHT$ -! +! ! Additional copyrights may follow -! +! ! $HEADER$ ! ! test_send.f90 - tests mpi_send variants (calls functions in send_t.c) diff --git a/ompi/mpi/help-mpi-api.txt b/ompi/mpi/help-mpi-api.txt index fc7dffd8bb9..0045d305251 100644 --- a/ompi/mpi/help-mpi-api.txt +++ b/ompi/mpi/help-mpi-api.txt @@ -1,25 +1,18 @@ # -*- text -*- # -# Copyright (c) 2006 High Performance Computing Center Stuttgart, +# Copyright (c) 2006 High Performance Computing Center Stuttgart, # University of Stuttgart. All rights reserved. # Copyright (c) 2006-2008 Cisco Systems, Inc. All rights reserved. # $COPYRIGHT$ -# +# # Additional copyrights may follow -# +# # $HEADER$ # # This is the US/English general help file for Open MPI. # -[mpi-function-after-finalize] -Calling any MPI-function after calling MPI_Finalize is erroneous. -The only exceptions are MPI_Initialized, MPI_Finalized and MPI_Get_version. -# -[mpi-initialize-twice] -Calling MPI_Init or MPI_Init_thread twice is erroneous. -# [mpi-abort] -MPI_ABORT was invoked on rank %d in communicator %s +MPI_ABORT was invoked on rank %d in communicator %s with errorcode %d. NOTE: invoking MPI_ABORT causes Open MPI to kill all MPI processes. diff --git a/ompi/mpi/java/Makefile.am b/ompi/mpi/java/Makefile.am index e0525cc5ba2..9e516a704a1 100644 --- a/ompi/mpi/java/Makefile.am +++ b/ompi/mpi/java/Makefile.am @@ -3,9 +3,9 @@ # Copyright (c) 2011 Cisco Systems, Inc. All rights reserved. # Copyright (c) 2014 Intel, Inc. All rights reserved. # $COPYRIGHT$ -# +# # Additional copyrights may follow -# +# # $HEADER$ # diff --git a/ompi/mpi/java/README b/ompi/mpi/java/README index 2635166df65..75db2708418 100644 --- a/ompi/mpi/java/README +++ b/ompi/mpi/java/README @@ -39,7 +39,7 @@ original copyrights and license terms of mpiJava are listed below. (Bugfixes/Additions, CMake based configure/build) Blasius Czink HLRS, University of Stuttgart - + Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at diff --git a/ompi/mpi/java/c/Makefile.am b/ompi/mpi/java/c/Makefile.am index 6b90e27e30f..2fee2dc0611 100644 --- a/ompi/mpi/java/c/Makefile.am +++ b/ompi/mpi/java/c/Makefile.am @@ -1,18 +1,23 @@ # -*- makefile -*- # # Copyright (c) 2011-2013 Cisco Systems, Inc. All rights reserved. -# Copyright (c) 2012 Oracle and/or its affiliates. All rights reserved. +# Copyright (c) 2012 Oracle and/or its affiliates. All rights reserved. +# Copyright (c) 2015 Los Alamos National Security, LLC. All rights +# reserved. +# Copyright (c) 2015 Research Organization for Information Science +# and Technology (RIST). All rights reserved. +# Copyright (c) 2016 IBM Corporation. All rights reserved. # $COPYRIGHT$ -# +# # Additional copyrights may follow -# +# # $HEADER$ # if OMPI_WANT_JAVA_BINDINGS # Get the include files that were generated from the .java source files -AM_CPPFLAGS = -I$(top_builddir)/ompi/mpi/java/java $(OPAL_JDK_CPPFLAGS) -DOPAL_DYN_LIB_SUFFIX=\"$(OPAL_DYN_LIB_SUFFIX)\" +AM_CPPFLAGS = -I$(top_builddir)/ompi/mpi/java/java $(OPAL_JDK_CPPFLAGS) -DOMPI_LIBMPI_NAME=\"$(OMPI_LIBMPI_NAME)\" -DOPAL_DYN_LIB_SUFFIX=\"$(OPAL_DYN_LIB_SUFFIX)\" headers = \ mpiJava.h @@ -20,28 +25,29 @@ ompidir = $(ompiincludedir)/ompi/mpi/java ompi_HEADERS = \ $(headers) -lib_LTLIBRARIES = libmpi_java.la -libmpi_java_la_SOURCES = \ - mpi_CartComm.c \ - mpi_Comm.c \ +lib_LTLIBRARIES = lib@OMPI_LIBMPI_NAME@_java.la +lib@OMPI_LIBMPI_NAME@_java_la_SOURCES = \ + mpi_CartComm.c \ + mpi_Comm.c \ mpi_Constant.c \ - mpi_Datatype.c \ - mpi_Errhandler.c \ + mpi_Count.c \ + mpi_Datatype.c \ + mpi_Errhandler.c \ mpi_File.c \ - mpi_GraphComm.c \ - mpi_Group.c \ + mpi_GraphComm.c \ + mpi_Group.c \ mpi_Info.c \ - mpi_Intercomm.c \ - mpi_Intracomm.c \ + mpi_Intercomm.c \ + mpi_Intracomm.c \ mpi_Message.c \ - mpi_MPI.c \ - mpi_Op.c \ - mpi_Request.c \ - mpi_Prequest.c \ - mpi_Status.c \ + mpi_MPI.c \ + mpi_Op.c \ + mpi_Request.c \ + mpi_Prequest.c \ + mpi_Status.c \ mpi_Win.c -libmpi_java_la_LIBADD = $(top_builddir)/ompi/libmpi.la -libmpi_java_la_LDFLAGS = -version-info $(libmpi_java_so_version) +lib@OMPI_LIBMPI_NAME@_java_la_LIBADD = -ldl $(top_builddir)/ompi/lib@OMPI_LIBMPI_NAME@.la +lib@OMPI_LIBMPI_NAME@_java_la_LDFLAGS = -version-info $(libmpi_java_so_version) endif diff --git a/ompi/mpi/java/c/mpiJava.h b/ompi/mpi/java/c/mpiJava.h index 3b9303ac86d..6f20cf943b8 100644 --- a/ompi/mpi/java/c/mpiJava.h +++ b/ompi/mpi/java/c/mpiJava.h @@ -5,14 +5,16 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. + * Copyright (c) 2015 Los Alamos National Security, LLC. All rights + * reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -29,6 +31,10 @@ typedef struct { jmethodID CartParmsInit; jclass ShiftParmsClass; jmethodID ShiftParmsInit; + jclass VersionClass; + jmethodID VersionInit; + jclass CountClass; + jmethodID CountInit; jclass GraphParmsClass; jmethodID GraphParmsInit; jclass DistGraphNeighborsClass; @@ -133,6 +139,11 @@ void ompi_java_releaseIntArray( void ompi_java_forgetIntArray( JNIEnv *env, jintArray array, jint *jptr, int *cptr); +void ompi_java_getDatatypeArray( + JNIEnv *env, jlongArray array, jlong **jptr, MPI_Datatype **cptr); +void ompi_java_forgetDatatypeArray( + JNIEnv *env, jlongArray array, jlong *jptr, MPI_Datatype *cptr); + void ompi_java_getBooleanArray( JNIEnv *env, jbooleanArray array, jboolean **jptr, int **cptr); void ompi_java_releaseBooleanArray( diff --git a/ompi/mpi/java/c/mpi_CartComm.c b/ompi/mpi/java/c/mpi_CartComm.c index cd796578a98..9c6a8b3040f 100644 --- a/ompi/mpi/java/c/mpi_CartComm.c +++ b/ompi/mpi/java/c/mpi_CartComm.c @@ -5,14 +5,14 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ /* @@ -58,7 +58,7 @@ JNIEXPORT void JNICALL Java_mpi_CartComm_init(JNIEnv *env, jclass clazz) { ompi_java.CartParmsInit = (*env)->GetMethodID(env, ompi_java.CartParmsClass, "", "([I[Z[I)V"); - + ompi_java.ShiftParmsInit = (*env)->GetMethodID(env, ompi_java.ShiftParmsClass, "", "(II)V"); } diff --git a/ompi/mpi/java/c/mpi_Comm.c b/ompi/mpi/java/c/mpi_Comm.c index edabfed97be..654c2ad475a 100644 --- a/ompi/mpi/java/c/mpi_Comm.c +++ b/ompi/mpi/java/c/mpi_Comm.c @@ -5,16 +5,16 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ /* @@ -214,6 +214,15 @@ JNIEXPORT jlongArray JNICALL Java_mpi_Comm_iDup( return jcr; } +JNIEXPORT jlong JNICALL Java_mpi_Comm_dupWithInfo( + JNIEnv *env, jobject jthis, jlong comm, jlong info) +{ + MPI_Comm newcomm; + int rc = MPI_Comm_dup_with_info((MPI_Comm)comm, (MPI_Info)info, &newcomm); + ompi_java_exceptionCheck(env, rc); + return (jlong)newcomm; +} + JNIEXPORT jint JNICALL Java_mpi_Comm_getSize( JNIEnv *env, jobject jthis, jlong comm) { @@ -1581,6 +1590,82 @@ JNIEXPORT jlong JNICALL Java_mpi_Comm_iAllToAllv( return (jlong)request; } +JNIEXPORT void JNICALL Java_mpi_Comm_allToAllw( + JNIEnv *env, jobject jthis, jlong jComm, + jobject sendBuf, jintArray sCount, jintArray sDispls, jlongArray sTypes, + jobject recvBuf, jintArray rCount, jintArray rDispls, jlongArray rTypes) +{ + MPI_Comm comm = (MPI_Comm)jComm; + + jlong* jSTypes, *jRTypes; + MPI_Datatype *cSTypes, *cRTypes; + + ompi_java_getDatatypeArray(env, sTypes, &jSTypes, &cSTypes); + ompi_java_getDatatypeArray(env, rTypes, &jRTypes, &cRTypes); + + jint *jSCount, *jRCount, *jSDispls, *jRDispls; + int *cSCount, *cRCount, *cSDispls, *cRDispls; + ompi_java_getIntArray(env, sCount, &jSCount, &cSCount); + ompi_java_getIntArray(env, rCount, &jRCount, &cRCount); + ompi_java_getIntArray(env, sDispls, &jSDispls, &cSDispls); + ompi_java_getIntArray(env, rDispls, &jRDispls, &cRDispls); + + void *sPtr = ompi_java_getDirectBufferAddress(env, sendBuf), + *rPtr = ompi_java_getDirectBufferAddress(env, recvBuf); + + int rc = MPI_Alltoallw( + sPtr, cSCount, cSDispls, cSTypes, + rPtr, cRCount, cRDispls, cRTypes, comm); + + ompi_java_exceptionCheck(env, rc); + ompi_java_forgetIntArray(env, sCount, jSCount, cSCount); + ompi_java_forgetIntArray(env, rCount, jRCount, cRCount); + ompi_java_forgetIntArray(env, sDispls, jSDispls, cSDispls); + ompi_java_forgetIntArray(env, rDispls, jRDispls, cRDispls); + ompi_java_forgetDatatypeArray(env, sTypes, jSTypes, cSTypes); + ompi_java_forgetDatatypeArray(env, rTypes, jRTypes, cRTypes); +} + +JNIEXPORT jlong JNICALL Java_mpi_Comm_iAllToAllw( + JNIEnv *env, jobject jthis, jlong jComm, + jobject sendBuf, jintArray sCount, jintArray sDispls, jlongArray sTypes, + jobject recvBuf, jintArray rCount, jintArray rDispls, jlongArray rTypes) +{ + MPI_Comm comm = (MPI_Comm)jComm; + + jlong* jSTypes, *jRTypes; + MPI_Datatype *cSTypes, *cRTypes; + + ompi_java_getDatatypeArray(env, sTypes, &jSTypes, &cSTypes); + ompi_java_getDatatypeArray(env, rTypes, &jRTypes, &cRTypes); + + jint *jSCount, *jRCount, *jSDispls, *jRDispls; + int *cSCount, *cRCount, *cSDispls, *cRDispls; + ompi_java_getIntArray(env, sCount, &jSCount, &cSCount); + ompi_java_getIntArray(env, rCount, &jRCount, &cRCount); + ompi_java_getIntArray(env, sDispls, &jSDispls, &cSDispls); + ompi_java_getIntArray(env, rDispls, &jRDispls, &cRDispls); + + void *sPtr = ompi_java_getDirectBufferAddress(env, sendBuf), + *rPtr = ompi_java_getDirectBufferAddress(env, recvBuf); + + MPI_Request request; + + int rc = MPI_Ialltoallw( + sPtr, cSCount, cSDispls, cSTypes, + rPtr, cRCount, cRDispls, cRTypes, comm, &request); + + ompi_java_exceptionCheck(env, rc); + ompi_java_forgetIntArray(env, sCount, jSCount, cSCount); + ompi_java_forgetIntArray(env, rCount, jRCount, cRCount); + ompi_java_forgetIntArray(env, sDispls, jSDispls, cSDispls); + ompi_java_forgetIntArray(env, rDispls, jRDispls, cRDispls); + ompi_java_forgetDatatypeArray(env, sTypes, jSTypes, cSTypes); + ompi_java_forgetDatatypeArray(env, rTypes, jRTypes, cRTypes); + + return (jlong)request; +} + JNIEXPORT void JNICALL Java_mpi_Comm_neighborAllGather( JNIEnv *env, jobject jthis, jlong jComm, jobject sBuf, jboolean sdb, jint sOff, diff --git a/ompi/mpi/java/c/mpi_Constant.c b/ompi/mpi/java/c/mpi_Constant.c index 7e80e542efe..20d180b8e62 100644 --- a/ompi/mpi/java/c/mpi_Constant.c +++ b/ompi/mpi/java/c/mpi_Constant.c @@ -5,14 +5,14 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ diff --git a/ompi/mpi/java/c/mpi_Count.c b/ompi/mpi/java/c/mpi_Count.c new file mode 100644 index 00000000000..0ef8827c017 --- /dev/null +++ b/ompi/mpi/java/c/mpi_Count.c @@ -0,0 +1,52 @@ +/* + * Copyright (c) 2015 Los Alamos National Security, LLC. All rights + * reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + * + * + * This file is almost a complete re-write for Open MPI compared to the + * original mpiJava package. Its license and copyright are listed below. + * See for more information. + * + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * + * File : Version.java + * Author : Nathaniel Graham + * Created : Thu Jul 30 09:34 2015 + */ + +#include "ompi_config.h" + +#include +#ifdef HAVE_TARGETCONDITIONALS_H +#include +#endif + +#include "mpi.h" +#include "mpi_Count.h" +#include "mpiJava.h" + +JNIEXPORT void JNICALL Java_mpi_Count_initCount(JNIEnv *env, jclass jthis) +{ + jclass c = (*env)->FindClass(env, "mpi/Count"); + ompi_java.CountClass = (*env)->NewGlobalRef(env, c); + ompi_java.CountInit = (*env)->GetMethodID(env, ompi_java.CountClass, "", "(J)V"); + + (*env)->DeleteLocalRef(env, c); +} diff --git a/ompi/mpi/java/c/mpi_Datatype.c b/ompi/mpi/java/c/mpi_Datatype.c index 69344603be5..1979a21b65c 100644 --- a/ompi/mpi/java/c/mpi_Datatype.c +++ b/ompi/mpi/java/c/mpi_Datatype.c @@ -5,14 +5,14 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ /* diff --git a/ompi/mpi/java/c/mpi_Errhandler.c b/ompi/mpi/java/c/mpi_Errhandler.c index acfcf9d8fd0..793bcbbb516 100644 --- a/ompi/mpi/java/c/mpi_Errhandler.c +++ b/ompi/mpi/java/c/mpi_Errhandler.c @@ -5,14 +5,14 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ /* @@ -36,7 +36,7 @@ /* * File : mpi_Errhandler.c - * Headerfile : mpi_Errhandler.h + * Headerfile : mpi_Errhandler.h * Author : Bryan Carpenter * Created : 1999 * Revision : $Revision: 1.2 $ diff --git a/ompi/mpi/java/c/mpi_File.c b/ompi/mpi/java/c/mpi_File.c index e404bf0cbd7..c1c34d5acc3 100644 --- a/ompi/mpi/java/c/mpi_File.c +++ b/ompi/mpi/java/c/mpi_File.c @@ -5,14 +5,14 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ diff --git a/ompi/mpi/java/c/mpi_GraphComm.c b/ompi/mpi/java/c/mpi_GraphComm.c index b7de5e482b0..8a77eb816d4 100644 --- a/ompi/mpi/java/c/mpi_GraphComm.c +++ b/ompi/mpi/java/c/mpi_GraphComm.c @@ -5,14 +5,14 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ /* @@ -35,7 +35,7 @@ */ /* * File : mpi_GraphComm.c - * Headerfile : mpi_GraphComm.h + * Headerfile : mpi_GraphComm.h * Author : Xinying Li * Created : Thu Apr 9 12:22:15 1998 * Revision : $Revision: 1.2 $ diff --git a/ompi/mpi/java/c/mpi_Group.c b/ompi/mpi/java/c/mpi_Group.c index ea25109a0a0..2ea29f4acdb 100644 --- a/ompi/mpi/java/c/mpi_Group.c +++ b/ompi/mpi/java/c/mpi_Group.c @@ -5,14 +5,14 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ /* @@ -35,7 +35,7 @@ */ /* * File : mpi_Group.c - * Headerfile : mpi_Group.h + * Headerfile : mpi_Group.h * Author : Xinying Li * Created : Thu Apr 9 12:22:15 1998 * Revision : $Revision: 1.3 $ diff --git a/ompi/mpi/java/c/mpi_Info.c b/ompi/mpi/java/c/mpi_Info.c index ef8d2ee29ad..93545c757e8 100644 --- a/ompi/mpi/java/c/mpi_Info.c +++ b/ompi/mpi/java/c/mpi_Info.c @@ -5,14 +5,14 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ diff --git a/ompi/mpi/java/c/mpi_Intercomm.c b/ompi/mpi/java/c/mpi_Intercomm.c index 917d6f62831..8e8f1b68e98 100644 --- a/ompi/mpi/java/c/mpi_Intercomm.c +++ b/ompi/mpi/java/c/mpi_Intercomm.c @@ -5,14 +5,14 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ /* @@ -35,7 +35,7 @@ */ /* * File : mpi_Intercomm.c - * Headerfile : mpi_Intercomm.h + * Headerfile : mpi_Intercomm.h * Author : Xinying Li * Created : Thu Apr 9 12:22:15 1998 * Revision : $Revision: 1.3 $ diff --git a/ompi/mpi/java/c/mpi_Intracomm.c b/ompi/mpi/java/c/mpi_Intracomm.c index cb7bcbc0e7b..f73aa0089df 100644 --- a/ompi/mpi/java/c/mpi_Intracomm.c +++ b/ompi/mpi/java/c/mpi_Intracomm.c @@ -5,14 +5,16 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. + * Copyright (c) 2015 Los Alamos National Security, LLC. All rights + * reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ /* @@ -47,9 +49,7 @@ #include "ompi_config.h" #include -#ifdef HAVE_STRING_H #include -#endif #ifdef HAVE_TARGETCONDITIONALS_H #include #endif @@ -68,6 +68,15 @@ JNIEXPORT jlong JNICALL Java_mpi_Intracomm_split( return (jlong)newcomm; } +JNIEXPORT jlong JNICALL Java_mpi_Intracomm_splitType( + JNIEnv *env, jobject jthis, jlong comm, jint splitType, jint key, jlong info) +{ + MPI_Comm newcomm; + int rc = MPI_Comm_split_type((MPI_Comm)comm, splitType, key, (MPI_Info)info, &newcomm); + ompi_java_exceptionCheck(env, rc); + return (jlong)newcomm; +} + JNIEXPORT jlong JNICALL Java_mpi_Intracomm_create( JNIEnv *env, jobject jthis, jlong comm, jlong group) { @@ -77,6 +86,15 @@ JNIEXPORT jlong JNICALL Java_mpi_Intracomm_create( return (jlong)newcomm; } +JNIEXPORT jlong JNICALL Java_mpi_Intracomm_createGroup( + JNIEnv *env, jobject jthis, jlong comm, jlong group, int tag) +{ + MPI_Comm newcomm; + int rc = MPI_Comm_create_group((MPI_Comm)comm, (MPI_Group)group, tag, &newcomm); + ompi_java_exceptionCheck(env, rc); + return (jlong)newcomm; +} + JNIEXPORT jlong JNICALL Java_mpi_Intracomm_createCart( JNIEnv *env, jobject jthis, jlong comm, jintArray dims, jbooleanArray periods, jboolean reorder) @@ -129,7 +147,7 @@ JNIEXPORT jlong JNICALL Java_mpi_Intracomm_createDistGraph( { MPI_Comm graph; int nnodes = (*env)->GetArrayLength(env, sources); - + jint *jSources, *jDegrees, *jDestins, *jWeights = NULL; int *cSources, *cDegrees, *cDestins, *cWeights = MPI_UNWEIGHTED; ompi_java_getIntArray(env, sources, &jSources, &cSources); @@ -163,12 +181,12 @@ JNIEXPORT jlong JNICALL Java_mpi_Intracomm_createDistGraphAdjacent( int inDegree = (*env)->GetArrayLength(env, sources), outDegree = (*env)->GetArrayLength(env, destins); - + jint *jSources, *jDestins, *jSrcWeights, *jDesWeights; int *cSources, *cDestins, *cSrcWeights, *cDesWeights; ompi_java_getIntArray(env, sources, &jSources, &cSources); ompi_java_getIntArray(env, destins, &jDestins, &cDestins); - + if(weighted) { ompi_java_getIntArray(env, srcWeights, &jSrcWeights, &cSrcWeights); @@ -179,7 +197,7 @@ JNIEXPORT jlong JNICALL Java_mpi_Intracomm_createDistGraphAdjacent( jSrcWeights = jDesWeights = NULL; cSrcWeights = cDesWeights = MPI_UNWEIGHTED; } - + int rc = MPI_Dist_graph_create_adjacent((MPI_Comm)comm, inDegree, cSources, cSrcWeights, outDegree, cDestins, cDesWeights, (MPI_Info)info, reorder, &graph); @@ -219,14 +237,14 @@ JNIEXPORT void JNICALL Java_mpi_Intracomm_scan( ompi_java_getReadPtr(&sPtr,&sItem,env,sBuf,sdb,sOff,count,type,bType); ompi_java_getWritePtr(&rPtr, &rItem, env, rBuf, rdb, count, type); } - + MPI_Op op = ompi_java_op_getHandle(env, jOp, hOp, bType); int rc = MPI_Scan(sPtr, rPtr, count, type, op, comm); ompi_java_exceptionCheck(env, rc); if(sBuf != NULL) ompi_java_releaseReadPtr(sPtr, sItem, sBuf, sdb); - + ompi_java_releaseWritePtr(rPtr,rItem,env,rBuf,rdb,rOff,count,type,bType); } @@ -340,7 +358,7 @@ JNIEXPORT jlong JNICALL Java_mpi_Intracomm_accept( root, (MPI_Comm)comm, &newComm); ompi_java_exceptionCheck(env, rc); - + if(jport != NULL) (*env)->ReleaseStringUTFChars(env, jport, port); diff --git a/ompi/mpi/java/c/mpi_MPI.c b/ompi/mpi/java/c/mpi_MPI.c index e856bbfa3ba..0023a3ebcd7 100644 --- a/ompi/mpi/java/c/mpi_MPI.c +++ b/ompi/mpi/java/c/mpi_MPI.c @@ -6,18 +6,21 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2015 Los Alamos National Security, LLC. All rights * reserved. - * Copyright (c) 2015 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015-2016 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2015 Intel, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. + * Copyright (c) 2016 IBM Corporation. All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ /* @@ -54,20 +57,21 @@ #ifdef HAVE_SYS_TYPES_H #include #endif -#ifdef HAVE_STDLIB_H #include -#endif -#ifdef HAVE_STRING_H #include -#endif #ifdef HAVE_TARGETCONDITIONALS_H #include #endif #ifdef HAVE_SYS_STAT_H #include #endif +#ifdef HAVE_DLFCN_H #include +#endif #include +#ifdef HAVE_LIBGEN_H +#include +#endif #include "opal/util/output.h" #include "opal/datatype/opal_convertor.h" @@ -82,7 +86,7 @@ ompi_java_globals_t ompi_java = {0}; int ompi_mpi_java_eager = 65536; -opal_free_list_t ompi_java_buffers = {0}; +opal_free_list_t ompi_java_buffers = {{{0}}}; static void *libmpi = NULL; static void bufferConstructor(ompi_java_buffer_t *item) @@ -128,11 +132,35 @@ OBJ_CLASS_INSTANCE(ompi_java_buffer_t, */ jint JNI_OnLoad(JavaVM *vm, void *reserved) { - libmpi = dlopen("libmpi." OPAL_DYN_LIB_SUFFIX, RTLD_NOW | RTLD_GLOBAL); + // Ensure that PSM signal hijacking is disabled *before* loading + // the library (see comment in the function for more detail). + opal_init_psm(); + + libmpi = dlopen("lib" OMPI_LIBMPI_NAME "." OPAL_DYN_LIB_SUFFIX, RTLD_NOW | RTLD_GLOBAL); + +#if defined(HAVE_DL_INFO) && defined(HAVE_LIBGEN_H) + /* + * OS X El Capitan does not propagate DYLD_LIBRARY_PATH to children any more + * so if previous dlopen failed, try to open libmpi in the same directory + * than the current libmpi_java + */ + if(NULL == libmpi) { + Dl_info info; + if(0 != dladdr((void *)JNI_OnLoad, &info)) { + char libmpipath[OPAL_PATH_MAX]; + char *libmpijavapath = strdup(info.dli_fname); + if (NULL != libmpijavapath) { + snprintf(libmpipath, OPAL_PATH_MAX-1, "%s/lib" OMPI_LIBMPI_NAME "." OPAL_DYN_LIB_SUFFIX, dirname(libmpijavapath)); + free(libmpijavapath); + libmpi = dlopen(libmpipath, RTLD_NOW | RTLD_GLOBAL); + } + } + } +#endif - if(libmpi == NULL) + if(NULL == libmpi) { - fprintf(stderr, "Java bindings failed to load libmpi: %s\n",dlerror()); + fprintf(stderr, "Java bindings failed to load lib" OMPI_LIBMPI_NAME ": %s\n",dlerror()); exit(1); } @@ -208,6 +236,8 @@ static void deleteClasses(JNIEnv *env) { (*env)->DeleteGlobalRef(env, ompi_java.CartParmsClass); (*env)->DeleteGlobalRef(env, ompi_java.ShiftParmsClass); + (*env)->DeleteGlobalRef(env, ompi_java.VersionClass); + (*env)->DeleteGlobalRef(env, ompi_java.CountClass); (*env)->DeleteGlobalRef(env, ompi_java.GraphParmsClass); (*env)->DeleteGlobalRef(env, ompi_java.DistGraphNeighborsClass); (*env)->DeleteGlobalRef(env, ompi_java.StatusClass); @@ -261,6 +291,12 @@ JNIEXPORT jobject JNICALL Java_mpi_MPI_newDoubleInt(JNIEnv *env, jclass clazz) return (*env)->NewObject(env, c, m, iOff, sizeof(int)); } +JNIEXPORT void JNICALL Java_mpi_MPI_initVersion(JNIEnv *env, jclass jthis) +{ + ompi_java.VersionClass = findClass(env, "mpi/Version"); + ompi_java.VersionInit = (*env)->GetMethodID(env, ompi_java.VersionClass, "", "(II)V"); +} + JNIEXPORT jobjectArray JNICALL Java_mpi_MPI_Init_1jni( JNIEnv *env, jclass clazz, jobjectArray argv) { @@ -357,6 +393,26 @@ JNIEXPORT void JNICALL Java_mpi_MPI_Finalize_1jni(JNIEnv *env, jclass obj) deleteClasses(env); } +JNIEXPORT jobject JNICALL Java_mpi_MPI_getVersionJNI(JNIEnv *env, jclass jthis) +{ + int version, subversion; + int rc = MPI_Get_version(&version, &subversion); + ompi_java_exceptionCheck(env, rc); + + return (*env)->NewObject(env, ompi_java.VersionClass, + ompi_java.VersionInit, version, subversion); +} + +JNIEXPORT jstring JNICALL Java_mpi_MPI_getLibVersionJNI(JNIEnv *env, jclass jthis) +{ + int length; + char version[MPI_MAX_LIBRARY_VERSION_STRING]; + int rc = MPI_Get_library_version(version, &length); + ompi_java_exceptionCheck(env, rc); + + return (*env)->NewStringUTF(env, version); +} + JNIEXPORT jint JNICALL Java_mpi_MPI_getProcessorName( JNIEnv *env, jclass obj, jbyteArray buf) { @@ -979,6 +1035,30 @@ void ompi_java_forgetIntArray(JNIEnv *env, jintArray array, (*env)->ReleaseIntArrayElements(env, array, jptr, JNI_ABORT); } +void ompi_java_getDatatypeArray(JNIEnv *env, jlongArray array, + jlong **jptr, MPI_Datatype **cptr) +{ + jlong *jLongs = (*env)->GetLongArrayElements(env, array, NULL); + *jptr = jLongs; + + int i, length = (*env)->GetArrayLength(env, array); + MPI_Datatype *cDatatypes = calloc(length, sizeof(MPI_Datatype)); + + for(i = 0; i < length; i++){ + cDatatypes[i] = (MPI_Datatype)jLongs[i]; + } + *cptr = cDatatypes; +} + +void ompi_java_forgetDatatypeArray(JNIEnv *env, jlongArray array, + jlong *jptr, MPI_Datatype *cptr) +{ + if((long)jptr != (long)cptr) + free(cptr); + + (*env)->ReleaseLongArrayElements(env, array, jptr, JNI_ABORT); +} + void ompi_java_getBooleanArray(JNIEnv *env, jbooleanArray array, jboolean **jptr, int **cptr) { @@ -988,7 +1068,7 @@ void ompi_java_getBooleanArray(JNIEnv *env, jbooleanArray array, for(i = 0; i < length; i++) cb[i] = jb[i]; - + *jptr = jb; *cptr = cb; } @@ -1049,6 +1129,8 @@ void ompi_java_releasePtrArray(JNIEnv *env, jlongArray array, jboolean ompi_java_exceptionCheck(JNIEnv *env, int rc) { + jboolean jni_exception; + if (rc < 0) { /* handle ompi error code */ rc = ompi_errcode_get_mpi_code (rc); @@ -1056,16 +1138,13 @@ jboolean ompi_java_exceptionCheck(JNIEnv *env, int rc) * all Open MPI MPI error codes should be > 0. */ assert (rc >= 0); } + jni_exception = (*env)->ExceptionCheck(env); - if(MPI_SUCCESS == rc) + if(MPI_SUCCESS == rc && JNI_FALSE == jni_exception) { return JNI_FALSE; } - else if((*env)->ExceptionCheck(env)) - { - return JNI_TRUE; - } - else + else if(MPI_SUCCESS != rc) { int errClass = ompi_mpi_errcode_get_class(rc); char *message = ompi_mpi_errnum_get_string(rc); @@ -1079,6 +1158,8 @@ jboolean ompi_java_exceptionCheck(JNIEnv *env, int rc) (*env)->DeleteLocalRef(env, jmessage); return JNI_TRUE; } + /* If we get here, a JNI error has occurred. */ + return JNI_TRUE; } void* ompi_java_attrSet(JNIEnv *env, jbyteArray jval) @@ -1089,7 +1170,7 @@ void* ompi_java_attrSet(JNIEnv *env, jbyteArray jval) (*env)->GetByteArrayRegion(env, jval, 0, length, (jbyte*)cval + sizeof(int)); - + return cval; } diff --git a/ompi/mpi/java/c/mpi_Message.c b/ompi/mpi/java/c/mpi_Message.c index 634ae823292..a51e112ff49 100644 --- a/ompi/mpi/java/c/mpi_Message.c +++ b/ompi/mpi/java/c/mpi_Message.c @@ -5,14 +5,14 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ diff --git a/ompi/mpi/java/c/mpi_Op.c b/ompi/mpi/java/c/mpi_Op.c index 5016860d870..44508d1c9d0 100644 --- a/ompi/mpi/java/c/mpi_Op.c +++ b/ompi/mpi/java/c/mpi_Op.c @@ -5,14 +5,16 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. + * Copyright (c) 2015 Los Alamos National Security, LLC. All rights + * reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ /* @@ -69,7 +71,7 @@ JNIEXPORT void JNICALL Java_mpi_Op_getOp(JNIEnv *env, jobject jthis, jint type) static MPI_Op Ops[] = { MPI_OP_NULL, MPI_MAX, MPI_MIN, MPI_SUM, MPI_PROD, MPI_LAND, MPI_BAND, MPI_LOR, MPI_BOR, MPI_LXOR, - MPI_BXOR, MPI_MINLOC, MPI_MAXLOC + MPI_BXOR, MPI_MINLOC, MPI_MAXLOC, MPI_REPLACE, MPI_NO_OP }; (*env)->SetLongField(env,jthis, ompi_java.OpHandle, (jlong)Ops[type]); } @@ -80,7 +82,7 @@ static jobject setBooleanArray(JNIEnv *env, void *vec, int len) if(obj != NULL) (*env)->SetBooleanArrayRegion(env, obj, 0, len, vec); - + return obj; } diff --git a/ompi/mpi/java/c/mpi_Prequest.c b/ompi/mpi/java/c/mpi_Prequest.c index f1f402c9471..a036d45906e 100644 --- a/ompi/mpi/java/c/mpi_Prequest.c +++ b/ompi/mpi/java/c/mpi_Prequest.c @@ -5,14 +5,14 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ diff --git a/ompi/mpi/java/c/mpi_Request.c b/ompi/mpi/java/c/mpi_Request.c index d6e7cb894c0..4ad9e2c307a 100644 --- a/ompi/mpi/java/c/mpi_Request.c +++ b/ompi/mpi/java/c/mpi_Request.c @@ -5,14 +5,14 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ /* @@ -78,7 +78,7 @@ static void setIndices(JNIEnv *env, jintArray indices, int *cIdx, int count) } (*env)->SetIntArrayRegion(env, indices, 0, count, jIdx); - + if(jIdx != cIdx) free(jIdx); } @@ -171,6 +171,18 @@ JNIEXPORT jobject JNICALL Java_mpi_Request_testStatus( return flag ? ompi_java_status_new(env, &status) : NULL; } +JNIEXPORT jobject JNICALL Java_mpi_Request_getStatus( + JNIEnv *env, jobject jthis, jlong handle) +{ + MPI_Request req = (MPI_Request)handle; + int flag; + MPI_Status status; + int rc = MPI_Request_get_status(req, &flag, &status); + ompi_java_exceptionCheck(env, rc); + (*env)->SetLongField(env, jthis, ompi_java.ReqHandle, (jlong)req); + return flag ? ompi_java_status_new(env, &status) : NULL; +} + JNIEXPORT jboolean JNICALL Java_mpi_Request_test( JNIEnv *env, jobject jthis, jlong handle) { diff --git a/ompi/mpi/java/c/mpi_Status.c b/ompi/mpi/java/c/mpi_Status.c index dc6e9799e74..0863a872b7f 100644 --- a/ompi/mpi/java/c/mpi_Status.c +++ b/ompi/mpi/java/c/mpi_Status.c @@ -5,14 +5,14 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ /* @@ -109,6 +109,56 @@ JNIEXPORT jint JNICALL Java_mpi_Status_getElements( return count; } +JNIEXPORT jobject JNICALL Java_mpi_Status_getElementsX( + JNIEnv *env, jobject jthis, jint source, jint tag, + jint error, jint cancelled, jlong ucount, jlong jType) +{ + MPI_Count count; + MPI_Status stat; + getStatus(&stat, source, tag, error, cancelled, ucount); + MPI_Datatype datatype = (MPI_Datatype)jType; + int rc = MPI_Get_elements_x(&stat, datatype, &count); + ompi_java_exceptionCheck(env, rc); + + return (*env)->NewObject(env, ompi_java.CountClass, + ompi_java.CountInit, (jlong)count); +} + +JNIEXPORT jint JNICALL Java_mpi_Status_setElements( + JNIEnv *env, jobject jthis, jint source, jint tag, + jint error, jint cancelled, jlong ucount, jlong jType, int count) +{ + MPI_Status stat; + getStatus(&stat, source, tag, error, cancelled, ucount); + MPI_Datatype datatype = (MPI_Datatype)jType; + int rc = MPI_Status_set_elements(&stat, datatype, count); + ompi_java_exceptionCheck(env, rc); + return stat._ucount; +} + +JNIEXPORT jlong JNICALL Java_mpi_Status_setElementsX( + JNIEnv *env, jobject jthis, jint source, jint tag, + jint error, jint cancelled, jlong ucount, jlong jType, jlong jcount) +{ + MPI_Status stat; + MPI_Count count = (long)jcount; + getStatus(&stat, source, tag, error, cancelled, ucount); + MPI_Datatype datatype = (MPI_Datatype)jType; + int rc = MPI_Status_set_elements_x(&stat, datatype, count); + ompi_java_exceptionCheck(env, rc); + return (jlong)stat._ucount; +} + +JNIEXPORT void JNICALL Java_mpi_Status_setCancelled( + JNIEnv *env, jobject jthis, jint source, jint tag, + jint error, jint cancelled, jlong ucount, int flag) +{ + MPI_Status stat; + getStatus(&stat, source, tag, error, cancelled, ucount); + int rc = MPI_Status_set_cancelled(&stat, flag); + ompi_java_exceptionCheck(env, rc); +} + jobject ompi_java_status_new(JNIEnv *env, MPI_Status *status) { jlongArray jData = (*env)->NewLongArray(env, 6); diff --git a/ompi/mpi/java/c/mpi_Win.c b/ompi/mpi/java/c/mpi_Win.c index 3b06833414f..ce810da46a1 100644 --- a/ompi/mpi/java/c/mpi_Win.c +++ b/ompi/mpi/java/c/mpi_Win.c @@ -5,14 +5,14 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -41,6 +41,66 @@ JNIEXPORT jlong JNICALL Java_mpi_Win_createWin( return (jlong)win; } +JNIEXPORT jlong JNICALL Java_mpi_Win_allocateWin(JNIEnv *env, jobject jthis, + jint size, jint dispUnit, jlong info, jlong comm, jobject jBase) +{ + void *basePtr = (*env)->GetDirectBufferAddress(env, jBase); + MPI_Win win; + + int rc = MPI_Win_allocate((MPI_Aint)size, dispUnit, + (MPI_Info)info, (MPI_Comm)comm, basePtr, &win); + + ompi_java_exceptionCheck(env, rc); + return (jlong)win; +} + +JNIEXPORT jlong JNICALL Java_mpi_Win_allocateSharedWin(JNIEnv *env, jobject jthis, + jint size, jint dispUnit, jlong info, jlong comm, jobject jBase) +{ + void *basePtr = (*env)->GetDirectBufferAddress(env, jBase); + MPI_Win win; + + int rc = MPI_Win_allocate_shared((MPI_Aint)size, dispUnit, + (MPI_Info)info, (MPI_Comm)comm, basePtr, &win); + + ompi_java_exceptionCheck(env, rc); + return (jlong)win; +} + +JNIEXPORT jlong JNICALL Java_mpi_Win_createDynamicWin( + JNIEnv *env, jobject jthis, + jlong info, jlong comm) +{ + MPI_Win win; + + int rc = MPI_Win_create_dynamic( + (MPI_Info)info, (MPI_Comm)comm, &win); + + ompi_java_exceptionCheck(env, rc); + return (jlong)win; +} + +JNIEXPORT void JNICALL Java_mpi_Win_attach( + JNIEnv *env, jobject jthis, jlong win, jobject jBase, + jint size) +{ + void *base = (*env)->GetDirectBufferAddress(env, jBase); + + int rc = MPI_Win_attach((MPI_Win)win, base, (MPI_Aint)size); + + ompi_java_exceptionCheck(env, rc); +} + +JNIEXPORT void JNICALL Java_mpi_Win_detach( + JNIEnv *env, jobject jthis, jlong win, jobject jBase) +{ + void *base = (*env)->GetDirectBufferAddress(env, jBase); + + int rc = MPI_Win_detach((MPI_Win)win, base); + + ompi_java_exceptionCheck(env, rc); +} + JNIEXPORT jlong JNICALL Java_mpi_Win_getGroup( JNIEnv *env, jobject jthis, jlong win) { @@ -242,3 +302,193 @@ JNIEXPORT jlong JNICALL Java_mpi_Win_free( ompi_java_exceptionCheck(env, rc); return (jlong)win; } + +JNIEXPORT jlong JNICALL Java_mpi_Win_getInfo( + JNIEnv *env, jobject jthis, jlong handle) +{ + MPI_Win win = (MPI_Win)handle; + MPI_Info info; + int rc = MPI_Win_get_info((MPI_Win)win, &info); + ompi_java_exceptionCheck(env, rc); + return (jlong)info; +} + +JNIEXPORT void JNICALL Java_mpi_Win_setInfo( + JNIEnv *env, jobject jthis, jlong handle, jlong i) +{ + MPI_Win win = (MPI_Win)handle; + MPI_Info info = (MPI_Info)i; + int rc = MPI_Win_set_info(win, info); + ompi_java_exceptionCheck(env, rc); +} + +JNIEXPORT jlong JNICALL Java_mpi_Win_rPut(JNIEnv *env, jobject jthis, + jlong win, jobject origin_addr, jint origin_count, jlong origin_type, + jint target_rank, jint target_disp, jint target_count, jlong target_datatype, + jint basetype) +{ + void *origPtr = ompi_java_getDirectBufferAddress(env, origin_addr); + MPI_Request request; + + int rc = MPI_Rput(origPtr, origin_count, (MPI_Datatype)origin_type, + target_rank, (MPI_Aint)target_disp, target_count, (MPI_Datatype)target_datatype, + (MPI_Win)win, &request); + + ompi_java_exceptionCheck(env, rc); + return (jlong)request; +} + +JNIEXPORT jlong JNICALL Java_mpi_Win_rGet(JNIEnv *env, jobject jthis, jlong win, + jobject origin, jint orgCount, jlong orgType, jint targetRank, jint targetDisp, + jint targetCount, jlong targetType, jint base) +{ + void *orgPtr = (*env)->GetDirectBufferAddress(env, origin); + MPI_Request request; + + int rc = MPI_Rget(orgPtr, orgCount, (MPI_Datatype)orgType, + targetRank, (MPI_Aint)targetDisp, targetCount, + (MPI_Datatype)targetType, (MPI_Win)win, &request); + + ompi_java_exceptionCheck(env, rc); + return (jlong)request; +} + +JNIEXPORT jlong JNICALL Java_mpi_Win_rAccumulate(JNIEnv *env, jobject jthis, jlong win, + jobject origin, jint orgCount, jlong orgType, jint targetRank, jint targetDisp, + jint targetCount, jlong targetType, jobject jOp, jlong hOp, jint baseType) +{ + void *orgPtr = (*env)->GetDirectBufferAddress(env, origin); + MPI_Op op = ompi_java_op_getHandle(env, jOp, hOp, baseType); + MPI_Request request; + + int rc = MPI_Raccumulate(orgPtr, orgCount, (MPI_Datatype)orgType, + targetRank, (MPI_Aint)targetDisp, targetCount, + (MPI_Datatype)targetType, op, (MPI_Win)win, &request); + + ompi_java_exceptionCheck(env, rc); + return (jlong)request; +} + +JNIEXPORT void JNICALL Java_mpi_Win_getAccumulate(JNIEnv *env, jobject jthis, jlong win, + jobject origin, jint orgCount, jlong orgType, jobject resultBuff, jint resultCount, + jlong resultType, jint targetRank, jint targetDisp, jint targetCount, jlong targetType, + jobject jOp, jlong hOp, jint baseType) +{ + void *orgPtr = (*env)->GetDirectBufferAddress(env, origin); + void *resultPtr = (*env)->GetDirectBufferAddress(env, resultBuff); + MPI_Op op = ompi_java_op_getHandle(env, jOp, hOp, baseType); + + int rc = MPI_Get_accumulate(orgPtr, orgCount, (MPI_Datatype)orgType, + resultPtr, resultCount, (MPI_Datatype)resultType, + targetRank, (MPI_Aint)targetDisp, targetCount, + (MPI_Datatype)targetType, op, (MPI_Win)win); + + ompi_java_exceptionCheck(env, rc); +} + +JNIEXPORT jlong JNICALL Java_mpi_Win_rGetAccumulate(JNIEnv *env, jobject jthis, jlong win, + jobject origin, jint orgCount, jlong orgType, jobject resultBuff, jint resultCount, + jlong resultType, jint targetRank, jint targetDisp, jint targetCount, jlong targetType, + jobject jOp, jlong hOp, jint baseType) +{ + void *orgPtr = (*env)->GetDirectBufferAddress(env, origin); + void *resultPtr = (*env)->GetDirectBufferAddress(env, resultBuff); + MPI_Op op = ompi_java_op_getHandle(env, jOp, hOp, baseType); + MPI_Request request; + + int rc = MPI_Rget_accumulate(orgPtr, orgCount, (MPI_Datatype)orgType, + resultPtr, resultCount, (MPI_Datatype)resultType, + targetRank, (MPI_Aint)targetDisp, targetCount, + (MPI_Datatype)targetType, op, (MPI_Win)win, &request); + + ompi_java_exceptionCheck(env, rc); + return (jlong)request; +} + +JNIEXPORT void JNICALL Java_mpi_Win_lockAll(JNIEnv *env, jobject jthis, jlong win, jint assertion) +{ + int rc = MPI_Win_lock_all(assertion, (MPI_Win)win); + ompi_java_exceptionCheck(env, rc); +} + +JNIEXPORT void JNICALL Java_mpi_Win_unlockAll(JNIEnv *env, jobject jthis, jlong win) +{ + int rc = MPI_Win_unlock_all((MPI_Win)win); + ompi_java_exceptionCheck(env, rc); +} + +JNIEXPORT void JNICALL Java_mpi_Win_sync(JNIEnv *env, jobject jthis, jlong win) +{ + int rc = MPI_Win_sync((MPI_Win)win); + ompi_java_exceptionCheck(env, rc); +} + +JNIEXPORT void JNICALL Java_mpi_Win_flush(JNIEnv *env, jobject jthis, jlong win, jint targetRank) +{ + int rc = MPI_Win_flush(targetRank, (MPI_Win)win); + ompi_java_exceptionCheck(env, rc); +} + +JNIEXPORT void JNICALL Java_mpi_Win_flushAll(JNIEnv *env, jobject jthis, jlong win) +{ + int rc = MPI_Win_flush_all((MPI_Win)win); + ompi_java_exceptionCheck(env, rc); +} + +JNIEXPORT void JNICALL Java_mpi_Win_compareAndSwap (JNIEnv *env, jobject jthis, jlong win, jobject origin, + jobject compareAddr, jobject resultAddr, jlong dataType, jint targetRank, jint targetDisp) +{ + void *orgPtr = (*env)->GetDirectBufferAddress(env, origin); + void *compPtr = (*env)->GetDirectBufferAddress(env, compareAddr); + void *resultPtr = (*env)->GetDirectBufferAddress(env, resultAddr); + + int rc = MPI_Compare_and_swap(orgPtr, compPtr, resultPtr, (MPI_Datatype)dataType, + targetRank, targetDisp, (MPI_Win)win); + ompi_java_exceptionCheck(env, rc); +} + +JNIEXPORT void JNICALL Java_mpi_Win_fetchAndOp(JNIEnv *env, jobject jthis, jlong win, jobject origin, + jobject resultAddr, jlong dataType, jint targetRank, jint targetDisp, jobject jOp, jlong hOp, jint baseType) +{ + void *orgPtr = (*env)->GetDirectBufferAddress(env, origin); + void *resultPtr = (*env)->GetDirectBufferAddress(env, resultAddr); + MPI_Op op = ompi_java_op_getHandle(env, jOp, hOp, baseType); + + int rc = MPI_Fetch_and_op(orgPtr, resultPtr, (MPI_Datatype)dataType, targetRank, + targetDisp, op, (MPI_Win)win); + ompi_java_exceptionCheck(env, rc); +} + +JNIEXPORT void JNICALL Java_mpi_Win_flushLocal(JNIEnv *env, jobject jthis, jlong win, jint targetRank) +{ + int rc = MPI_Win_flush_local(targetRank, (MPI_Win)win); + ompi_java_exceptionCheck(env, rc); +} + +JNIEXPORT void JNICALL Java_mpi_Win_flushLocalAll(JNIEnv *env, jobject jthis, jlong win) +{ + int rc = MPI_Win_flush_local_all((MPI_Win)win); + ompi_java_exceptionCheck(env, rc); +} + +JNIEXPORT void JNICALL Java_mpi_Win_setName( + JNIEnv *env, jobject jthis, jlong handle, jstring jname) +{ + const char *name = (*env)->GetStringUTFChars(env, jname, NULL); + int rc = MPI_Win_set_name((MPI_Win)handle, (char*)name); + ompi_java_exceptionCheck(env, rc); + (*env)->ReleaseStringUTFChars(env, jname, name); +} + +JNIEXPORT jstring JNICALL Java_mpi_Win_getName( + JNIEnv *env, jobject jthis, jlong handle) +{ + char name[MPI_MAX_OBJECT_NAME]; + int len; + int rc = MPI_Win_get_name((MPI_Win)handle, name, &len); + + if(ompi_java_exceptionCheck(env, rc)) + return NULL; + + return (*env)->NewStringUTF(env, name); +} diff --git a/ompi/mpi/java/java/CartComm.java b/ompi/mpi/java/java/CartComm.java index 2ce61edba5c..90bf1c7ec87 100644 --- a/ompi/mpi/java/java/CartComm.java +++ b/ompi/mpi/java/java/CartComm.java @@ -5,35 +5,37 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. + * Copyright (c) 2015 Los Alamos National Security, LLC. All rights + * reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ - */ -/* + * + * * This file is almost a complete re-write for Open MPI compared to the * original mpiJava package. Its license and copyright are listed below. * See for more information. - */ -/* - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. -*/ -/* + * + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * * File : Cartcomm.java * Author : Xinying Li * Created : Thu Apr 9 12:22:15 1998 @@ -50,181 +52,194 @@ */ public final class CartComm extends Intracomm { -static -{ - init(); -} - -private static native void init(); - -protected CartComm(long handle) throws MPIException -{ - super(handle); -} - -protected CartComm(long[] commRequest) -{ - super(commRequest); -} - -/** - * Duplicates this communicator. - *

Java binding of {@code MPI_COMM_DUP}. - *

It is recommended to use {@link #dup} instead of {@link #clone} - * because the last can't throw an {@link mpi.MPIException}. - * @return copy of this communicator - */ -@Override public CartComm clone() -{ - try - { - return dup(); - } - catch(MPIException e) - { - throw new RuntimeException(e.getMessage()); - } -} - -/** - * Duplicates this communicator. - *

Java binding of {@code MPI_COMM_DUP}. - * @return copy of this communicator - * @throws MPIException - */ -@Override public CartComm dup() throws MPIException -{ - MPI.check(); - return new CartComm(dup(handle)); -} - -/** - * Duplicates this communicator. - *

Java binding of {@code MPI_COMM_IDUP}. - *

The new communicator can't be used before the operation completes. - * The request object must be obtained calling {@link #getRequest}. - * @return copy of this communicator - * @throws MPIException - */ -@Override public CartComm iDup() throws MPIException -{ - MPI.check(); - return new CartComm(iDup(handle)); -} - -/** - * Returns cartesian topology information. - *

Java binding of the MPI operations {@code MPI_CARTDIM_GET} and - * {@code MPI_CART_GET}. - *

The number of dimensions can be obtained from the size of (eg) - * {@code dims} field of the returned object. - * @return object containing dimensions, periods and local coordinates - * @throws MPIException - */ -public CartParms getTopo() throws MPIException -{ - MPI.check(); - return getTopo(handle); -} - -private native CartParms getTopo(long comm) throws MPIException; - -/** - * Translate logical process coordinates to process rank. - *

Java binding of the MPI operation {@code MPI_CART_RANK}. - * @param coords Cartesian coordinates of a process - * @return rank of the specified process - * @throws MPIException - */ -public int getRank(int[] coords) throws MPIException -{ - MPI.check(); - return getRank(handle, coords); -} - -private native int getRank(long comm, int[] coords) throws MPIException; - -/** - * Translate process rank to logical process coordinates. - *

Java binding of the MPI operation {@code MPI_CART_COORDS}. - * @param rank rank of a process - * @return Cartesian coordinates of the specified process - * @throws MPIException - */ -public int[] getCoords(int rank) throws MPIException -{ - MPI.check(); - return getCoords(handle, rank); -} - -private native int[] getCoords(long comm, int rank) throws MPIException; - -/** - * Compute source and destination ranks for "shift" communication. - *

Java binding of the MPI operation {@code MPI_CART_SHIFT}. - * @param direction coordinate dimension of shift - * @param disp displacement - * @return object containing ranks of source and destination processes - * @throws MPIException - */ -public ShiftParms shift(int direction, int disp) throws MPIException -{ - MPI.check(); - return shift(handle, direction, disp); -} - -private native ShiftParms shift(long comm, int direction, int disp) - throws MPIException; - -/** - * Partition cartesian communicator into subgroups of lower dimension. - *

Java binding of the MPI operation {@code MPI_CART_SUB}. - * @param remainDims by dimension, {@code true} if dimension is to be kept, - * {@code false} otherwise - * @return communicator containing subgrid including this process - * @throws MPIException - */ -public CartComm sub(boolean[] remainDims) throws MPIException -{ - MPI.check(); - return new CartComm(sub(handle, remainDims)); -} - -private native long sub(long comm, boolean[] remainDims) throws MPIException; - -/** - * Compute an optimal placement. - *

Java binding of the MPI operation {@code MPI_CART_MAP}. - *

The number of dimensions is taken to be size of the {@code dims} argument. - * @param dims the number of processes in each dimension - * @param periods {@code true} if grid is periodic, - * {@code false} if not, in each dimension - * @return reordered rank of calling process - * @throws MPIException - */ -public int map(int[] dims, boolean[] periods) throws MPIException -{ - MPI.check(); - return map(handle, dims, periods); -} - -private native int map(long comm, int[] dims, boolean[] periods) - throws MPIException; - -/** - * Select a balanced distribution of processes per coordinate direction. - *

Java binding of the MPI operation {@code MPI_DIMS_CREATE}. - * @param nnodes number of nodes in a grid - * @param dims array specifying the number of nodes in each dimension - * @throws MPIException - */ -public static void createDims(int nnodes, int[] dims) throws MPIException -{ - MPI.check(); - createDims_jni(nnodes, dims); -} - -private static native void createDims_jni(int nnodes, int[] dims) - throws MPIException; + static + { + init(); + } + + private static native void init(); + + protected CartComm(long handle) throws MPIException + { + super(handle); + } + + protected CartComm(long[] commRequest) + { + super(commRequest); + } + + /** + * Duplicates this communicator. + *

Java binding of {@code MPI_COMM_DUP}. + *

It is recommended to use {@link #dup} instead of {@link #clone} + * because the last can't throw an {@link mpi.MPIException}. + * @return copy of this communicator + */ + @Override public CartComm clone() + { + try + { + return dup(); + } + catch(MPIException e) + { + throw new RuntimeException(e.getMessage()); + } + } + + /** + * Duplicates this communicator. + *

Java binding of {@code MPI_COMM_DUP}. + * @return copy of this communicator + * @throws MPIException Signals that an MPI exception of some sort has occurred. + */ + @Override public CartComm dup() throws MPIException + { + MPI.check(); + return new CartComm(dup(handle)); + } + + /** + * Duplicates this communicator. + *

Java binding of {@code MPI_COMM_IDUP}. + *

The new communicator can't be used before the operation completes. + * The request object must be obtained calling {@link #getRequest}. + * @return copy of this communicator + * @throws MPIException Signals that an MPI exception of some sort has occurred. + */ + @Override public CartComm iDup() throws MPIException + { + MPI.check(); + return new CartComm(iDup(handle)); + } + + /** + * Duplicates this communicator with the info object used in the call. + *

Java binding of {@code MPI_COMM_DUP_WITH_INFO}. + * @param info info object to associate with the new communicator + * @return copy of this communicator + * @throws MPIException Signals that an MPI exception of some sort has occurred. + */ + @Override public CartComm dupWithInfo(Info info) throws MPIException + { + MPI.check(); + return new CartComm(dupWithInfo(handle, info.handle)); + } + + /** + * Returns cartesian topology information. + *

Java binding of the MPI operations {@code MPI_CARTDIM_GET} and + * {@code MPI_CART_GET}. + *

The number of dimensions can be obtained from the size of (eg) + * {@code dims} field of the returned object. + * @return object containing dimensions, periods and local coordinates + * @throws MPIException Signals that an MPI exception of some sort has occurred. + */ + public CartParms getTopo() throws MPIException + { + MPI.check(); + return getTopo(handle); + } + + private native CartParms getTopo(long comm) throws MPIException; + + /** + * Translate logical process coordinates to process rank. + *

Java binding of the MPI operation {@code MPI_CART_RANK}. + * @param coords Cartesian coordinates of a process + * @return rank of the specified process + * @throws MPIException Signals that an MPI exception of some sort has occurred. + */ + public int getRank(int[] coords) throws MPIException + { + MPI.check(); + return getRank(handle, coords); + } + + private native int getRank(long comm, int[] coords) throws MPIException; + + /** + * Translate process rank to logical process coordinates. + *

Java binding of the MPI operation {@code MPI_CART_COORDS}. + * @param rank rank of a process + * @return Cartesian coordinates of the specified process + * @throws MPIException Signals that an MPI exception of some sort has occurred. + */ + public int[] getCoords(int rank) throws MPIException + { + MPI.check(); + return getCoords(handle, rank); + } + + private native int[] getCoords(long comm, int rank) throws MPIException; + + /** + * Compute source and destination ranks for "shift" communication. + *

Java binding of the MPI operation {@code MPI_CART_SHIFT}. + * @param direction coordinate dimension of shift + * @param disp displacement + * @return object containing ranks of source and destination processes + * @throws MPIException Signals that an MPI exception of some sort has occurred. + */ + public ShiftParms shift(int direction, int disp) throws MPIException + { + MPI.check(); + return shift(handle, direction, disp); + } + + private native ShiftParms shift(long comm, int direction, int disp) + throws MPIException; + + /** + * Partition cartesian communicator into subgroups of lower dimension. + *

Java binding of the MPI operation {@code MPI_CART_SUB}. + * @param remainDims by dimension, {@code true} if dimension is to be kept, + * {@code false} otherwise + * @return communicator containing subgrid including this process + * @throws MPIException Signals that an MPI exception of some sort has occurred. + */ + public CartComm sub(boolean[] remainDims) throws MPIException + { + MPI.check(); + return new CartComm(sub(handle, remainDims)); + } + + private native long sub(long comm, boolean[] remainDims) throws MPIException; + + /** + * Compute an optimal placement. + *

Java binding of the MPI operation {@code MPI_CART_MAP}. + *

The number of dimensions is taken to be size of the {@code dims} argument. + * @param dims the number of processes in each dimension + * @param periods {@code true} if grid is periodic, + * {@code false} if not, in each dimension + * @return reordered rank of calling process + * @throws MPIException Signals that an MPI exception of some sort has occurred. + */ + public int map(int[] dims, boolean[] periods) throws MPIException + { + MPI.check(); + return map(handle, dims, periods); + } + + private native int map(long comm, int[] dims, boolean[] periods) + throws MPIException; + + /** + * Select a balanced distribution of processes per coordinate direction. + *

Java binding of the MPI operation {@code MPI_DIMS_CREATE}. + * @param nnodes number of nodes in a grid + * @param dims array specifying the number of nodes in each dimension + * @throws MPIException Signals that an MPI exception of some sort has occurred. + */ + public static void createDims(int nnodes, int[] dims) throws MPIException + { + MPI.check(); + createDims_jni(nnodes, dims); + } + + private static native void createDims_jni(int nnodes, int[] dims) + throws MPIException; } // Cartcomm diff --git a/ompi/mpi/java/java/CartParms.java b/ompi/mpi/java/java/CartParms.java index 1be8bee063e..2ea7eca7938 100644 --- a/ompi/mpi/java/java/CartParms.java +++ b/ompi/mpi/java/java/CartParms.java @@ -5,35 +5,37 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. + * Copyright (c) 2015 Los Alamos National Security, LLC. All rights + * reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ - */ -/* + * + * * This file is almost a complete re-write for Open MPI compared to the * original mpiJava package. Its license and copyright are listed below. * See for more information. - */ -/* - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. -*/ -/* + * + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * * File : CartParms.java * Author : Xinying Li * Created : Thu Apr 9 12:22:15 1998 @@ -50,65 +52,65 @@ */ public final class CartParms { -/** Number of processes for each cartesian dimension. */ -private final int[] dims; + /** Number of processes for each cartesian dimension. */ + private final int[] dims; -/** Periodicity (true/false) for each cartesian dimension. */ -private final boolean[] periods; + /** Periodicity (true/false) for each cartesian dimension. */ + private final boolean[] periods; -/** Coordinates of calling process in cartesian structure. */ -private final int[] coords; + /** Coordinates of calling process in cartesian structure. */ + private final int[] coords; -/** - * Constructs a cartesian topology information object. - * @param dims number of processes for each cartesian dimension. - * @param periods periodicity (true/false) for each cartesian dimension. - * @param coords coordinates of calling process in cartesian structure. - */ -protected CartParms(int[] dims, boolean[] periods, int[] coords) -{ - this.dims = dims; - this.periods = periods; - this.coords = coords; -} + /** + * Constructs a cartesian topology information object. + * @param dims number of processes for each cartesian dimension. + * @param periods periodicity (true/false) for each cartesian dimension. + * @param coords coordinates of calling process in cartesian structure. + */ + protected CartParms(int[] dims, boolean[] periods, int[] coords) + { + this.dims = dims; + this.periods = periods; + this.coords = coords; + } -/** - * Returns the number of dimensions. - * @return number of dimensions. - */ -public int getDimCount() -{ - return dims.length; -} + /** + * Returns the number of dimensions. + * @return number of dimensions. + */ + public int getDimCount() + { + return dims.length; + } -/** - * Returns the number of processes for a cartesian dimension. - * @param i cartesian dimension. - * @return number of processes for a cartesian dimension. - */ -public int getDim(int i) -{ - return dims[i]; -} + /** + * Returns the number of processes for a cartesian dimension. + * @param i cartesian dimension. + * @return number of processes for a cartesian dimension. + */ + public int getDim(int i) + { + return dims[i]; + } -/** - * Returns the periodicity (true/false) for a cartesian dimension. - * @param i cartesian dimension. - * @return periodicity for a cartesian dimension. - */ -public boolean getPeriod(int i) -{ - return periods[i]; -} + /** + * Returns the periodicity (true/false) for a cartesian dimension. + * @param i cartesian dimension. + * @return periodicity for a cartesian dimension. + */ + public boolean getPeriod(int i) + { + return periods[i]; + } -/** - * Returns the coordinate of calling process for a cartesian dimension. - * @param i cartesian dimension. - * @return coordinate of calling process for a cartesian dimension. - */ -public int getCoord(int i) -{ - return coords[i]; -} + /** + * Returns the coordinate of calling process for a cartesian dimension. + * @param i cartesian dimension. + * @return coordinate of calling process for a cartesian dimension. + */ + public int getCoord(int i) + { + return coords[i]; + } } // CartParms diff --git a/ompi/mpi/java/java/Comm.java b/ompi/mpi/java/java/Comm.java index 7417550fba1..fe7e7b35a26 100644 --- a/ompi/mpi/java/java/Comm.java +++ b/ompi/mpi/java/java/Comm.java @@ -5,35 +5,39 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. + * Copyright (c) 2015 Los Alamos National Security, LLC. All rights + * reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ - */ -/* + * + * * This file is almost a complete re-write for Open MPI compared to the * original mpiJava package. Its license and copyright are listed below. * See for more information. - */ -/* - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. -*/ -/* + * + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * * File : Comm.java * Author : Sang Lim, Sung-Hoon Ko, Xinying Li, Bryan Carpenter * Created : Thu Apr 9 12:22:15 1998 @@ -41,16 +45,16 @@ * Updated : $Date: 2001/08/07 16:36:25 $ * Copyright: Northeast Parallel Architectures Center * at Syracuse University 1998 - */ - -/* + * + * + * * IMPLEMENTATION DETAILS - * + * * All methods with buffers that can be direct or non direct have * a companion argument 'db' which is true if the buffer is direct. * For example, if the buffer argument is recvBuf, the companion * argument will be 'rdb', meaning if the receive buffer is direct. - * + * * Checking if a buffer is direct is faster in Java than C. */ package mpi; @@ -61,3170 +65,3346 @@ /** * The {@code Comm} class represents communicators. */ -public class Comm implements Freeable -{ -protected final static int SELF = 1; -protected final static int WORLD = 2; -protected long handle; -private Request request; - -private static long nullHandle; - -static -{ - init(); -} - -private static native void init(); - -protected Comm() -{ -} - -protected Comm(long handle) -{ - this.handle = handle; -} - -protected Comm(long[] commRequest) -{ - handle = commRequest[0]; - request = new Request(commRequest[1]); -} - -protected final void setType(int type) -{ - getComm(type); -} - -private native void getComm(int type); - -/** - * Duplicates this communicator. - *

Java binding of {@code MPI_COMM_DUP}. - *

It is recommended to use {@link #dup} instead of {@link #clone} - * because the last can't throw an {@link mpi.MPIException}. - * @return copy of this communicator - */ -@Override public Comm clone() -{ - try - { - return dup(); - } - catch(MPIException e) - { - throw new RuntimeException(e.getMessage()); - } -} - -/** - * Duplicates this communicator. - *

Java binding of {@code MPI_COMM_DUP}. - * @return copy of this communicator - * @throws MPIException - */ -public Comm dup() throws MPIException -{ - MPI.check(); - return new Comm(dup(handle)); -} - -protected final native long dup(long comm) throws MPIException; - -/** - * Duplicates this communicator. - *

Java binding of {@code MPI_COMM_IDUP}. - *

The new communicator can't be used before the operation completes. - * The request object must be obtained calling {@link #getRequest}. - * @return copy of this communicator - * @throws MPIException - */ -public Comm iDup() throws MPIException -{ - MPI.check(); - return new Comm(iDup(handle)); -} - -protected final native long[] iDup(long comm) throws MPIException; - -/** - * Returns the associated request to this communicator if it was - * created using {@link #iDup}. - * @return associated request if this communicator was created - * using {@link #iDup}, or null otherwise. - */ -public final Request getRequest() -{ - return request; -} - -/** - * Size of group of this communicator. - *

Java binding of the MPI operation {@code MPI_COMM_SIZE}. - * @return number of processors in the group of this communicator - * @throws MPIException - */ -public final int getSize() throws MPIException -{ - MPI.check(); - return getSize(handle); -} - -private native int getSize(long comm) throws MPIException; - -/** - * Rank of this process in group of this communicator. - *

Java binding of the MPI operation {@code MPI_COMM_RANK}. - * @return rank of the calling process in the group of this communicator - * @throws MPIException - */ -public final int getRank() throws MPIException -{ - MPI.check(); - return getRank(handle); -} - -private native int getRank(long comm) throws MPIException; - -/** - * Compare two communicators. - *

Java binding of the MPI operation {@code MPI_COMM_COMPARE}. - * @param comm1 first communicator - * @param comm2 second communicator - * @return - * {@code MPI.IDENT} results if the {@code comm1} and {@code comm2} - * are references to the same object (ie, if {@code comm1 == comm2}).
- * {@code MPI.CONGRUENT} results if the underlying groups are identical - * but the communicators differ by context.
- * {@code MPI.SIMILAR} results if the underlying groups are similar - * but the communicators differ by context.
- * {@code MPI.UNEQUAL} results otherwise. - * @throws MPIException - */ -public static int compare(Comm comm1, Comm comm2) throws MPIException -{ - MPI.check(); - return compare(comm1.handle, comm2.handle); -} - -private static native int compare(long comm1, long comm2) throws MPIException; - -/** - * Java binding of the MPI operation {@code MPI_COMM_FREE}. - * @throws MPIException - */ -@Override final public void free() throws MPIException -{ - MPI.check(); - handle = free(handle); -} - -private native long free(long comm) throws MPIException; - -/** - * Test if communicator object is null (has been freed). - * @return true if the comm object is null, false otherwise - */ -public final boolean isNull() -{ - return handle == nullHandle; -} - -/** - * Java binding of {@code MPI_COMM_SET_INFO}. - * @param info info object - * @throws MPIException - */ -public final void setInfo(Info info) throws MPIException -{ - MPI.check(); - setInfo(handle, info.handle); -} - -private native void setInfo(long fh, long info) throws MPIException; - -/** - * Java binding of {@code MPI_COMM_GET_INFO}. - * @return new info object - * @throws MPIException - */ -public final Info getInfo() throws MPIException -{ - MPI.check(); - return new Info(getInfo(handle)); -} - -private native long getInfo(long fh) throws MPIException; - -/** - * Java binding of the MPI operation {@code MPI_COMM_DISCONNECT}. - * @throws MPIException - */ -public final void disconnect() throws MPIException -{ - MPI.check(); - handle = disconnect(handle); -} - -private native long disconnect(long comm) throws MPIException; - -/** - * Return group associated with a communicator. - *

Java binding of the MPI operation {@code MPI_COMM_GROUP}. - * @return group corresponding to this communicator group - * @throws MPIException - */ -public final Group getGroup() throws MPIException -{ - MPI.check(); - return new Group(getGroup(handle)); -} - -private native long getGroup(long comm); - -// Inter-communication - -/** - * Test if this communicator is an inter-communicator. - *

Java binding of the MPI operation {@code MPI_COMM_TEST_INTER}. - * @return {@code true} if this is an inter-communicator, - * {@code false} otherwise - * @throws MPIException - */ -public final boolean isInter() throws MPIException -{ - MPI.check(); - return isInter(handle); -} - -private native boolean isInter(long comm) throws MPIException; - -/** - * Create an inter-communicator. - *

- * Java binding of the MPI operation {@code MPI_INTERCOMM_CREATE}. - *

- * This operation is defined as a method on the "peer communicator", - * making it analogous to a {@code send} or {@code recv} communication - * with the remote group leader. - * @param localComm local intra-communicator - * @param localLeader rank of local group leader in {@code localComm} - * @param remoteLeader rank of remote group leader in this communicator - * @param tag "safe" tag - * @return new inter-communicator - * @throws MPIException - */ -public final Intercomm createIntercomm(Comm localComm, int localLeader, - int remoteLeader, int tag) - throws MPIException -{ - MPI.check(); - - return new Intercomm(createIntercomm(handle, localComm.handle, - localLeader, remoteLeader, tag)); -} - -private native long createIntercomm( - long comm, long localComm, int localLeader, - int remoteLeader, int tag) throws MPIException; - -// Blocking Send and Recv - -/** - * Blocking send operation. - *

Java binding of the MPI operation {@code MPI_SEND}. - * @param buf send buffer - * @param count number of items to send - * @param type datatype of each item in send buffer - * @param dest rank of destination - * @param tag message tag - * @throws MPIException - */ -public final void send(Object buf, int count, Datatype type, int dest, int tag) - throws MPIException -{ - MPI.check(); - int off = 0; - boolean db = false; - - if(buf instanceof Buffer && !(db = ((Buffer)buf).isDirect())) - { - off = type.getOffset(buf); - buf = ((Buffer)buf).array(); - } - - send(handle, buf, db, off, count, type.handle, type.baseType, dest, tag); -} - -private native void send( - long comm, Object buf, boolean db, int offset, int count, - long type, int baseType, int dest, int tag) throws MPIException; - -/** - * Blocking receive operation. - *

Java binding of the MPI operation {@code MPI_RECV}. - * @param buf receive buffer - * @param count number of items in receive buffer - * @param type datatype of each item in receive buffer - * @param source rank of source - * @param tag message tag - * @return status object - * @throws MPIException - */ -public final Status recv(Object buf, int count, - Datatype type, int source, int tag) - throws MPIException -{ - MPI.check(); - int off = 0; - boolean db = false; - - if(buf instanceof Buffer && !(db = ((Buffer)buf).isDirect())) - { - off = type.getOffset(buf); - buf = ((Buffer)buf).array(); - } - - Status status = new Status(); - - recv(handle, buf, db, off, count, - type.handle, type.baseType, source, tag, status.data); - - return status; -} - -private native void recv( - long comm, Object buf, boolean db, int offset, int count, - long type, int basetype, int source, int tag, long[] stat) - throws MPIException; - -// Send-Recv - -/** - * Execute a blocking send and receive operation. - *

Java binding of the MPI operation {@code MPI_SENDRECV}. - * @param sendbuf send buffer - * @param sendcount number of items to send - * @param sendtype datatype of each item in send buffer - * @param dest rank of destination - * @param sendtag send tag - * @param recvbuf receive buffer - * @param recvcount number of items in receive buffer - * @param recvtype datatype of each item in receive buffer - * @param source rank of source - * @param recvtag receive tag - * @return status object - * @throws MPIException - * @see mpi.Comm#send(Object, int, Datatype, int, int) - * @see mpi.Comm#recv(Object, int, Datatype, int, int) - */ -public final Status sendRecv( - Object sendbuf, int sendcount, Datatype sendtype, int dest, int sendtag, - Object recvbuf, int recvcount, Datatype recvtype, int source, int recvtag) - throws MPIException -{ - MPI.check(); - - int sendoff = 0, - recvoff = 0; - - boolean sdb = false, - rdb = false; - - if(sendbuf instanceof Buffer && !(sdb = ((Buffer)sendbuf).isDirect())) - { - sendoff = sendtype.getOffset(sendbuf); - sendbuf = ((Buffer)sendbuf).array(); - } - - if(recvbuf instanceof Buffer && !(rdb = ((Buffer)recvbuf).isDirect())) - { - recvoff = recvtype.getOffset(recvbuf); - recvbuf = ((Buffer)recvbuf).array(); - } - - Status status = new Status(); - - sendRecv(handle, sendbuf, sdb, sendoff, sendcount, - sendtype.handle, sendtype.baseType, dest, sendtag, - recvbuf, rdb, recvoff, recvcount, - recvtype.handle, recvtype.baseType, source, recvtag, status.data); - - return status; -} - -private native void sendRecv( - long comm, Object sbuf, boolean sdb, int soffset, int scount, - long sType, int sBaseType, int dest, int stag, - Object rbuf, boolean rdb, int roffset, int rcount, - long rType, int rBaseType, int source, int rtag, - long[] stat) throws MPIException; - -/** - * Execute a blocking send and receive operation, - * receiving message into send buffer. - *

Java binding of the MPI operation {@code MPI_SENDRECV_REPLACE}. - * @param buf buffer - * @param count number of items to send - * @param type datatype of each item in buffer - * @param dest rank of destination - * @param sendtag send tag - * @param source rank of source - * @param recvtag receive tag - * @return status object - * @throws MPIException - * @see mpi.Comm#send(Object, int, Datatype, int, int) - * @see mpi.Comm#recv(Object, int, Datatype, int, int) - */ -public final Status sendRecvReplace( - Object buf, int count, Datatype type, - int dest, int sendtag, int source, int recvtag) - throws MPIException -{ - MPI.check(); - int off = 0; - boolean db = false; - - if(buf instanceof Buffer && !(db = ((Buffer)buf).isDirect())) - { - off = type.getOffset(buf); - buf = ((Buffer)buf).array(); - } - - Status status = new Status(); - - sendRecvReplace(handle, buf, db, off, count, type.handle, type.baseType, - dest, sendtag, source, recvtag, status.data); - - return status; -} - -private native void sendRecvReplace( - long comm, Object buf, boolean db, int offset, int count, - long type, int baseType, int dest, int stag, - int source, int rtag, long[] stat) throws MPIException; - -// Communication Modes - -/** - * Send in buffered mode. - *

Java binding of the MPI operation {@code MPI_BSEND}. - * @param buf send buffer - * @param count number of items to send - * @param type datatype of each item in send buffer - * @param dest rank of destination - * @param tag message tag - * @throws MPIException - * @see mpi.Comm#send(Object, int, Datatype, int, int) - */ -public final void bSend(Object buf, int count, Datatype type, int dest, int tag) - throws MPIException -{ - MPI.check(); - int off = 0; - boolean db = false; - - if(buf instanceof Buffer && !(db = ((Buffer)buf).isDirect())) - { - off = type.getOffset(buf); - buf = ((Buffer)buf).array(); - } - - bSend(handle, buf, db, off, count, type.handle, type.baseType, dest, tag); -} - -private native void bSend( - long comm, Object buf, boolean db, int offset, int count, - long type, int baseType, int dest, int tag) throws MPIException; - -/** - * Send in synchronous mode. - *

Java binding of the MPI operation {@code MPI_SSEND}. - * @param buf send buffer - * @param count number of items to send - * @param type datatype of each item in send buffer - * @param dest rank of destination - * @param tag message tag - * @throws MPIException - * @see mpi.Comm#send(Object, int, Datatype, int, int) - */ -public final void sSend(Object buf, int count, Datatype type, int dest, int tag) - throws MPIException -{ - MPI.check(); - int off = 0; - boolean db = false; - - if(buf instanceof Buffer && !(db = ((Buffer)buf).isDirect())) - { - off = type.getOffset(buf); - buf = ((Buffer)buf).array(); - } - - sSend(handle, buf, db, off, count, type.handle, type.baseType, dest, tag); -} - -private native void sSend( - long comm, Object buf, boolean db, int offset, int count, - long type, int baseType, int dest, int tag) throws MPIException; - -/** - * Send in ready mode. - *

Java binding of the MPI operation {@code MPI_RSEND}. - * @param buf send buffer - * @param count number of items to send - * @param type datatype of each item in send buffer - * @param dest rank of destination - * @param tag message tag - * @throws MPIException - * @see mpi.Comm#send(Object, int, Datatype, int, int) - */ -public final void rSend(Object buf, int count, Datatype type, int dest, int tag) - throws MPIException -{ - MPI.check(); - int off = 0; - boolean db = false; - - if(buf instanceof Buffer && !(db = ((Buffer)buf).isDirect())) - { - off = type.getOffset(buf); - buf = ((Buffer)buf).array(); - } - - rSend(handle, buf, db, off, count, type.handle, type.baseType, dest, tag); -} - -private native void rSend( - long comm, Object buf, boolean db, int offset, int count, - long type, int baseType, int dest, int tag) throws MPIException; - -// Nonblocking communication - -/** - * Start a standard mode, nonblocking send. - *

Java binding of the MPI operation {@code MPI_ISEND}. - * @param buf send buffer - * @param count number of items to send - * @param type datatype of each item in send buffer - * @param dest rank of destination - * @param tag message tag - * @return communication request - * @throws MPIException - * @see mpi.Comm#send(Object, int, Datatype, int, int) - */ -public final Request iSend(Buffer buf, int count, - Datatype type, int dest, int tag) - throws MPIException -{ - MPI.check(); - assertDirectBuffer(buf); - return new Request(iSend(handle, buf, count, type.handle, dest, tag)); -} - -private native long iSend( - long comm, Buffer buf, int count, long type, int dest, int tag) - throws MPIException; - -/** - * Start a buffered mode, nonblocking send. - *

Java binding of the MPI operation MPI_IBSEND. - * @param buf send buffer - * @param count number of items to send - * @param type datatype of each item in send buffer - * @param dest rank of destination - * @param tag message tag - * @return communication request - * @throws MPIException - * @see mpi.Comm#send(Object, int, Datatype, int, int) - */ -public final Request ibSend(Buffer buf, int count, - Datatype type, int dest, int tag) - throws MPIException -{ - MPI.check(); - assertDirectBuffer(buf); - return new Request(ibSend(handle, buf, count, type.handle, dest, tag)); -} - -private native long ibSend( - long comm, Buffer buf, int count, long type, int dest, int tag) - throws MPIException; - -/** - * Start a synchronous mode, nonblocking send. - *

Java binding of the MPI operation {@code MPI_ISSEND}. - * @param buf send buffer - * @param count number of items to send - * @param type datatype of each item in send buffer - * @param dest rank of destination - * @param tag message tag - * @return communication request - * @throws MPIException - * @see mpi.Comm#send(Object, int, Datatype, int, int) - */ -public final Request isSend(Buffer buf, int count, - Datatype type, int dest, int tag) - throws MPIException -{ - MPI.check(); - assertDirectBuffer(buf); - return new Request(isSend(handle, buf, count, type.handle, dest, tag)); -} - -private native long isSend( - long comm, Buffer buf, int count, long type, int dest, int tag) - throws MPIException; - -/** - * Start a ready mode, nonblocking send. - *

Java binding of the MPI operation {@code MPI_IRSEND}. - * @param buf send buffer - * @param count number of items to send - * @param type datatype of each item in send buffer - * @param dest rank of destination - * @param tag message tag - * @return communication request - * @throws MPIException - * @see mpi.Comm#send(Object, int, Datatype, int, int) - */ -public final Request irSend(Buffer buf, int count, - Datatype type, int dest, int tag) - throws MPIException -{ - MPI.check(); - assertDirectBuffer(buf); - return new Request(irSend(handle, buf, count, type.handle, dest, tag)); -} - -private native long irSend( - long comm, Buffer buf, int count, long type, int dest, int tag) - throws MPIException; - -/** - * Start a nonblocking receive. - *

Java binding of the MPI operation {@code MPI_IRECV}. - * @param buf receive buffer - * @param count number of items in receive buffer - * @param type datatype of each item in receive buffer - * @param source rank of source - * @param tag message tag - * @return communication request - * @throws MPIException - * @see mpi.Comm#recv(Object, int, Datatype, int, int) - */ -public final Request iRecv(Buffer buf, int count, - Datatype type, int source, int tag) - throws MPIException -{ - MPI.check(); - assertDirectBuffer(buf); - return new Request(iRecv(handle, buf, count, type.handle, source, tag)); -} - -private native long iRecv( - long comm, Buffer buf, int count, long type, int source, int tag) - throws MPIException; - - -// Persistent communication requests - -/** - * Creates a persistent communication request for a standard mode send. - *

Java binding of the MPI operation {@code MPI_SEND_INIT}. - * @param buf send buffer - * @param count number of items to send - * @param type datatype of each item in send buffer - * @param dest rank of destination - * @param tag message tag - * @return persistent communication request - * @throws MPIException - * @see mpi.Comm#send(Object, int, Datatype, int, int) - */ -public final Prequest sendInit(Buffer buf, int count, - Datatype type, int dest, int tag) - throws MPIException -{ - MPI.check(); - assertDirectBuffer(buf); - return new Prequest(sendInit(handle, buf, count, type.handle, dest, tag)); -} - -private native long sendInit( - long comm, Buffer buf, int count, long type, int dest, int tag) - throws MPIException; - -/** - * Creates a persistent communication request for a buffered mode send. - *

Java binding of the MPI operation {@code MPI_BSEND_INIT}. - * @param buf send buffer - * @param count number of items to send - * @param type datatype of each item in send buffer - * @param dest rank of destination - * @param tag message tag - * @return persistent communication request - * @throws MPIException - * @see mpi.Comm#send(Object, int, Datatype, int, int) - */ -public final Prequest bSendInit(Buffer buf, int count, - Datatype type, int dest, int tag) - throws MPIException -{ - MPI.check(); - assertDirectBuffer(buf); - return new Prequest(bSendInit(handle, buf, count, type.handle, dest, tag)); -} - -private native long bSendInit( - long comm, Buffer buf, int count, long type, int dest, int tag) - throws MPIException; - -/** - * Creates a persistent communication request for a synchronous mode send. - *

Java binding of the MPI operation {@code MPI_SSEND_INIT}. - * @param buf send buffer - * @param count number of items to send - * @param type datatype of each item in send buffer - * @param dest rank of destination - * @param tag message tag - * @return persistent communication request - * @throws MPIException - * @see mpi.Comm#send(Object, int, Datatype, int, int) - */ -public final Prequest sSendInit(Buffer buf, int count, - Datatype type, int dest, int tag) - throws MPIException -{ - MPI.check(); - assertDirectBuffer(buf); - return new Prequest(sSendInit(handle, buf, count, type.handle, dest, tag)); -} - -private native long sSendInit( - long comm, Buffer buf, int count, long type, int dest, int tag) - throws MPIException; - -/** - * Creates a persistent communication request for a ready mode send. - *

Java binding of the MPI operation {@code MPI_RSEND_INIT}. - * @param buf send buffer - * @param count number of items to send - * @param type datatype of each item in send buffer - * @param dest rank of destination - * @param tag message tag - * @return persistent communication request - * @throws MPIException - * @see mpi.Comm#send(Object, int, Datatype, int, int) - */ -public final Prequest rSendInit(Buffer buf, int count, - Datatype type, int dest, int tag) - throws MPIException -{ - MPI.check(); - assertDirectBuffer(buf); - return new Prequest(rSendInit(handle, buf, count, type.handle, dest, tag)); -} - -private native long rSendInit( - long comm, Buffer buf, int count, long type, int dest, int tag) - throws MPIException; - -/** - * Creates a persistent communication request for a receive operation. - *

Java binding of the MPI operation {@code MPI_RECV_INIT}. - * @param buf receive buffer - * @param count number of items in receive buffer - * @param type datatype of each item in receive buffer - * @param source rank of source - * @param tag message tag - * @return communication request - * @throws MPIException - * @see mpi.Comm#recv(Object, int, Datatype, int, int) - */ -public final Prequest recvInit(Buffer buf, int count, - Datatype type, int source, int tag) - throws MPIException -{ - MPI.check(); - assertDirectBuffer(buf); - return new Prequest(recvInit(handle, buf, count, type.handle, source, tag)); -} - -private native long recvInit( - long comm, Buffer buf, int count, long type, int source, int tag) - throws MPIException; - -// Pack and Unpack - -/** - * Packs message in send buffer {@code inbuf} into space specified in - * {@code outbuf}. - *

- * Java binding of the MPI operation {@code MPI_PACK}. - *

- * The return value is the output value of {@code position} - the - * inital value incremented by the number of bytes written. - * @param inbuf input buffer - * @param incount number of items in input buffer - * @param type datatype of each item in input buffer - * @param outbuf output buffer - * @param position initial position in output buffer - * @return final position in output buffer - * @throws MPIException - */ -public final int pack(Object inbuf, int incount, Datatype type, - byte[] outbuf, int position) - throws MPIException -{ - MPI.check(); - int offset = 0; - boolean indb = false; - - if(inbuf instanceof Buffer && !(indb = ((Buffer)inbuf).isDirect())) - { - offset = type.getOffset(inbuf); - inbuf = ((Buffer)inbuf).array(); - } - - return pack(handle, inbuf, indb, offset, incount, - type.handle, outbuf, position); -} - -private native int pack( - long comm, Object inbuf, boolean indb, int offset, int incount, - long type, byte[] outbuf, int position) throws MPIException; - -/** - * Unpacks message in receive buffer {@code outbuf} into space specified in - * {@code inbuf}. - *

- * Java binding of the MPI operation {@code MPI_UNPACK}. - *

- * The return value is the output value of {@code position} - the - * inital value incremented by the number of bytes read. - * @param inbuf input buffer - * @param position initial position in input buffer - * @param outbuf output buffer - * @param outcount number of items in output buffer - * @param type datatype of each item in output buffer - * @return final position in input buffer - * @throws MPIException - */ -public final int unpack(byte[] inbuf, int position, - Object outbuf, int outcount, Datatype type) - throws MPIException -{ - MPI.check(); - int offset = 0; - boolean outdb = false; - - if(outbuf instanceof Buffer && !(outdb = ((Buffer)outbuf).isDirect())) - { - offset = type.getOffset(outbuf); - outbuf = ((Buffer)outbuf).array(); - } - - return unpack(handle, inbuf, position, outbuf, outdb, - offset, outcount, type.handle); -} - -private native int unpack( - long comm, byte[] inbuf, int position, Object outbuf, boolean outdb, - int offset, int outcount, long type) throws MPIException; - -/** - * Returns an upper bound on the increment of {@code position} effected - * by {@code pack}. - *

Java binding of the MPI operation {@code MPI_PACK_SIZE}. - * @param incount number of items in input buffer - * @param type datatype of each item in input buffer - * @return upper bound on size of packed message - * @throws MPIException - */ -public final int packSize(int incount, Datatype type) throws MPIException -{ - MPI.check(); - return packSize(handle, incount, type.handle); -} - -private native int packSize(long comm, int incount, long type) - throws MPIException; - -// Probe and Cancel - -/** - * Check if there is an incoming message matching the pattern specified. - *

Java binding of the MPI operation {@code MPI_IPROBE}. - *

If such a message is currently available, a status object similar - * to the return value of a matching {@code recv} operation is returned. - * @param source rank of source - * @param tag message tag - * @return status object if such a message is currently available, - * {@code null} otherwise. - * @throws MPIException - */ -public final Status iProbe(int source, int tag) throws MPIException -{ - MPI.check(); - return iProbe(handle, source, tag); -} - -private native Status iProbe(long comm, int source, int tag) - throws MPIException; - -/** - * Wait until there is an incoming message matching the pattern specified. - *

Java binding of the MPI operation {@code MPI_PROBE}. - *

Returns a status object similar to the return value of a matching - * {@code recv} operation. - * @param source rank of source - * @param tag message tag - * @return status object - * @throws MPIException - */ -public final Status probe(int source, int tag) throws MPIException -{ - MPI.check(); - Status status = new Status(); - probe(handle, source, tag, status.data); - return status; -} - -private native void probe(long comm, int source, int tag, long[] stat) - throws MPIException; - -// Caching - -/** - * Create a new attribute key. - *

Java binding of the MPI operation {@code MPI_COMM_CREATE_KEYVAL}. - * @return attribute key for future access - * @throws MPIException - */ -public static int createKeyval() throws MPIException -{ - MPI.check(); - return createKeyval_jni(); -} - -private static native int createKeyval_jni() throws MPIException; - -/** - * Frees an attribute key for communicators. - *

Java binding of the MPI operation {@code MPI_COMM_FREE_KEYVAL}. - * @param keyval attribute key - * @throws MPIException - */ -public static void freeKeyval(int keyval) throws MPIException -{ - MPI.check(); - freeKeyval_jni(keyval); -} - -private static native void freeKeyval_jni(int keyval) throws MPIException; - -/** - * Stores attribute value associated with a key. - *

Java binding of the MPI operation {@code MPI_COMM_SET_ATTR}. - * @param keyval attribute key - * @param value attribute value - * @throws MPIException - */ -public final void setAttr(int keyval, Object value) throws MPIException -{ - MPI.check(); - setAttr(handle, keyval, MPI.attrSet(value)); -} - -private native void setAttr(long comm, int keyval, byte[] value) - throws MPIException; - -/** - * Retrieves attribute value by key. - *

Java binding of the MPI operation {@code MPI_COMM_GET_ATTR}. - * @param keyval attribute key - * @return attribute value or null if no attribute is associated with the key. - * @throws MPIException - */ -public final Object getAttr(int keyval) throws MPIException -{ - MPI.check(); - - if( keyval == MPI.TAG_UB || - keyval == MPI.HOST || - keyval == MPI.IO || - keyval == MPI.APPNUM || - keyval == MPI.LASTUSEDCODE || - keyval == MPI.UNIVERSE_SIZE) - { - return getAttr_predefined(handle, keyval); - } - else if(keyval == MPI.WTIME_IS_GLOBAL) - { - Integer value = (Integer)getAttr_predefined(handle, keyval); - return value==null ? null : value.intValue() != 0; - } - else - { - return MPI.attrGet(getAttr(handle, keyval)); - } -} - -private native Object getAttr_predefined(long comm, int keyval) - throws MPIException; - -private native byte[] getAttr(long comm, int keyval) throws MPIException; - -/** - * Deletes an attribute value associated with a key on a communicator. - *

Java binding of the MPI operation {@code MPI_COMM_DELETE_ATTR}. - * @param keyval attribute key - * @throws MPIException - */ -public final void deleteAttr(int keyval) throws MPIException -{ - MPI.check(); - deleteAttr(handle, keyval); -} - -private native void deleteAttr(long comm, int keyval) throws MPIException; - -// Process Topologies - -/** - * Returns the type of topology associated with the communicator. - *

Java binding of the MPI operation {@code MPI_TOPO_TEST}. - *

The return value will be one of {@code MPI.GRAPH}, {@code MPI.CART} - * or {@code MPI.UNDEFINED}. - * @return topology type of communicator - * @throws MPIException - */ -public final int getTopology() throws MPIException -{ - MPI.check(); - return getTopology(handle); -} - -private native int getTopology(long comm) throws MPIException; - -// Enviromental Management - -/** - * Abort MPI. - *

Java binding of the MPI operation {@code MPI_ABORT}. - * @param errorcode error code for Unix or POSIX environments - * @throws MPIException - */ -public final void abort(int errorcode) throws MPIException -{ - MPI.check(); - abort(handle, errorcode); -} - -private native void abort(long comm, int errorcode) throws MPIException; - -// Error handler - -/** - * Associates a new error handler with communicator at the calling process. - *

Java binding of the MPI operation {@code MPI_ERRHANDLER_SET}. - * @param errhandler new MPI error handler for communicator - * @throws MPIException - */ -public final void setErrhandler(Errhandler errhandler) throws MPIException -{ - MPI.check(); - setErrhandler(handle, errhandler.handle); -} - -private native void setErrhandler(long comm, long errhandler) - throws MPIException; - -/** - * Returns the error handler currently associated with the communicator. - *

Java binding of the MPI operation {@code MPI_ERRHANDLER_GET}. - * @return MPI error handler currently associated with communicator - * @throws MPIException - */ -public final Errhandler getErrhandler() throws MPIException -{ - MPI.check(); - return new Errhandler(getErrhandler(handle)); -} - -private native long getErrhandler(long comm); - -// Collective Communication - -/** - * A call to {@code barrier} blocks the caller until all process - * in the group have called it. - *

Java binding of the MPI operation {@code MPI_BARRIER}. - * @throws MPIException - */ -public final void barrier() throws MPIException -{ - MPI.check(); - barrier(handle); -} - -private native void barrier(long comm) throws MPIException; - -/** - * Nonblocking barrier sinchronization. - *

Java binding of the MPI operation {@code MPI_IBARRIER}. - * @return communication request - * @throws MPIException - */ -public final Request iBarrier() throws MPIException -{ - MPI.check(); - return new Request(iBarrier(handle)); -} - -private native long iBarrier(long comm) throws MPIException; - -/** - * Broadcast a message from the process with rank {@code root} - * to all processes of the group. - *

Java binding of the MPI operation {@code MPI_BCAST}. - * @param buf buffer - * @param count number of items in buffer - * @param type datatype of each item in buffer - * @param root rank of broadcast root - * @throws MPIException - */ -public final void bcast(Object buf, int count, Datatype type, int root) - throws MPIException -{ - MPI.check(); - int off = 0; - boolean db = false; - - if(buf instanceof Buffer && !(db = ((Buffer)buf).isDirect())) - { - off = type.getOffset(buf); - buf = ((Buffer)buf).array(); - } - - bcast(handle, buf, db, off, count, type.handle, type.baseType, root); -} - -private native void bcast( - long comm, Object buf, boolean db, int offset, int count, - long type, int basetype, int root) throws MPIException; - -/** - * Broadcast a message from the process with rank {@code root} - * to all processes of the group. - *

Java binding of the MPI operation {@code MPI_IBCAST}. - * @param buf buffer - * @param count number of items in buffer - * @param type datatype of each item in buffer - * @param root rank of broadcast root - * @return communication request - * @throws MPIException - */ -public final Request iBcast(Buffer buf, int count, Datatype type, int root) - throws MPIException -{ - MPI.check(); - assertDirectBuffer(buf); - return new Request(iBcast(handle, buf, count, type.handle, root)); -} - -private native long iBcast( - long comm, Buffer buf, int count, long type, int root) - throws MPIException; - -/** - * Each process sends the contents of its send buffer to the root process. - *

Java binding of the MPI operation {@code MPI_GATHER}. - * @param sendbuf send buffer - * @param sendcount number of items to send - * @param sendtype datatype of each item in send buffer - * @param recvbuf receive buffer - * @param recvcount number of items to receive - * @param recvtype datatype of each item in receive buffer - * @param root rank of receiving process - * @throws MPIException - */ -public final void gather( - Object sendbuf, int sendcount, Datatype sendtype, - Object recvbuf, int recvcount, Datatype recvtype, int root) - throws MPIException -{ - MPI.check(); - - int sendoff = 0, - recvoff = 0; - - boolean sdb = false, - rdb = false; - - if(sendbuf instanceof Buffer && !(sdb = ((Buffer)sendbuf).isDirect())) - { - sendoff = sendtype.getOffset(sendbuf); - sendbuf = ((Buffer)sendbuf).array(); - } - - if(recvbuf instanceof Buffer && !(rdb = ((Buffer)recvbuf).isDirect())) - { - recvoff = recvtype.getOffset(recvbuf); - recvbuf = ((Buffer)recvbuf).array(); - } - - gather(handle, sendbuf, sdb, sendoff, sendcount, - sendtype.handle, sendtype.baseType, - recvbuf, rdb, recvoff, recvcount, - recvtype.handle, recvtype.baseType, root); -} - -/** - * Each process sends the contents of its send buffer to the root process. - *

Java binding of the MPI operation {@code MPI_GATHER} - * using {@code MPI_IN_PLACE} instead of the send buffer. - * The buffer is used by the root process to receive data, - * and it is used by the non-root processes to send data. - * @param buf buffer - * @param count number of items to send/receive - * @param type datatype of each item in buffer - * @param root rank of receiving process - * @throws MPIException - */ -public final void gather(Object buf, int count, Datatype type, int root) - throws MPIException -{ - MPI.check(); - int off = 0; - boolean db = false; - - if(buf instanceof Buffer && !(db = ((Buffer)buf).isDirect())) - { - off = type.getOffset(buf); - buf = ((Buffer)buf).array(); - } - - gather(handle, null, false, 0, 0, 0, 0, - buf, db, off, count, type.handle, type.baseType, root); -} - -private native void gather( - long comm, Object sendBuf, boolean sdb, int sendOff, int sendCount, - long sendType, int sendBaseType, - Object recvBuf, boolean rdb, int recvOff, int recvCount, - long recvType, int recvBaseType, int root) - throws MPIException; - -/** - * Each process sends the contents of its send buffer to the root process. - *

Java binding of the MPI operation {@code MPI_IGATHER}. - * @param sendbuf send buffer - * @param sendcount number of items to send - * @param sendtype datatype of each item in send buffer - * @param recvbuf receive buffer - * @param recvcount number of items to receive - * @param recvtype datatype of each item in receive buffer - * @param root rank of receiving process - * @return communication request - * @throws MPIException - */ -public final Request iGather( - Buffer sendbuf, int sendcount, Datatype sendtype, - Buffer recvbuf, int recvcount, Datatype recvtype, int root) - throws MPIException -{ - MPI.check(); - assertDirectBuffer(sendbuf, recvbuf); - - return new Request(iGather(handle, sendbuf, sendcount, sendtype.handle, - recvbuf, recvcount, recvtype.handle, root)); -} - -/** - * Each process sends the contents of its send buffer to the root process. - *

Java binding of the MPI operation {@code MPI_IGATHER} - * using {@code MPI_IN_PLACE} instead of the send buffer. - * The buffer is used by the root process to receive data, - * and it is used by the non-root processes to send data. - * @param buf buffer - * @param count number of items to send/receive - * @param type datatype of each item in buffer - * @param root rank of receiving process - * @return communication request - * @throws MPIException - */ -public final Request iGather(Buffer buf, int count, Datatype type, int root) - throws MPIException -{ - MPI.check(); - assertDirectBuffer(buf); - - return new Request(iGather(handle, null, 0, 0, - buf, count, type.handle, root)); -} - -private native long iGather( - long comm, Buffer sendbuf, int sendcount, long sendtype, - Buffer recvbuf, int recvcount, long recvtype, - int root) throws MPIException; - -/** - * Extends functionality of {@code gather} by allowing varying - * counts of data from each process. - *

Java binding of the MPI operation {@code MPI_GATHERV}. - * @param sendbuf send buffer - * @param sendcount number of items to send - * @param sendtype datatype of each item in send buffer - * @param recvbuf receive buffer - * @param recvcount number of elements received from each process - * @param displs displacements at which to place incoming data - * @param recvtype datatype of each item in receive buffer - * @param root rank of receiving process - * @throws MPIException - */ -public final void gatherv(Object sendbuf, int sendcount, Datatype sendtype, - Object recvbuf, int[] recvcount, int[] displs, - Datatype recvtype, int root) - throws MPIException -{ - MPI.check(); - - int sendoff = 0, - recvoff = 0; - - boolean sdb = false, - rdb = false; - - if(sendbuf instanceof Buffer && !(sdb = ((Buffer)sendbuf).isDirect())) - { - sendoff = sendtype.getOffset(sendbuf); - sendbuf = ((Buffer)sendbuf).array(); - } - - if(recvbuf instanceof Buffer && !(rdb = ((Buffer)recvbuf).isDirect())) - { - recvoff = recvtype.getOffset(recvbuf); - recvbuf = ((Buffer)recvbuf).array(); - } - - gatherv(handle, sendbuf, sdb, sendoff, sendcount, - sendtype.handle, sendtype.baseType, - recvbuf, rdb, recvoff, recvcount, displs, - recvtype.handle, recvtype.baseType, root); -} - -/** - * Extends functionality of {@code gather} by allowing varying - * counts of data from each process. - *

Java binding of the MPI operation {@code MPI_GATHERV} using - * {@code MPI_IN_PLACE} instead of the send buffer in the root process. - * This method must be used in the root process. - * @param recvbuf receive buffer - * @param recvcount number of elements received from each process - * @param displs displacements at which to place incoming data - * @param recvtype datatype of each item in receive buffer - * @param root rank of receiving process - * @throws MPIException - */ -public final void gatherv(Object recvbuf, int[] recvcount, int[] displs, - Datatype recvtype, int root) - throws MPIException -{ - MPI.check(); - int recvoff = 0; - boolean rdb = false; - - if(recvbuf instanceof Buffer && !(rdb = ((Buffer)recvbuf).isDirect())) - { - recvoff = recvtype.getOffset(recvbuf); - recvbuf = ((Buffer)recvbuf).array(); - } - - gatherv(handle, null, false, 0, 0, 0, 0, recvbuf, rdb, recvoff, recvcount, - displs, recvtype.handle, recvtype.baseType, root); -} - -/** - * Extends functionality of {@code gather} by allowing varying - * counts of data from each process. - *

Java binding of the MPI operation {@code MPI_GATHERV} using - * {@code MPI_IN_PLACE} instead of the send buffer in the root process. - * This method must be used in the non-root processes. - * @param sendbuf send buffer - * @param sendcount number of items to send - * @param sendtype datatype of each item in send buffer - * @param root rank of receiving process - * @throws MPIException - */ -public final void gatherv(Object sendbuf, int sendcount, - Datatype sendtype, int root) - throws MPIException -{ - MPI.check(); - int sendoff = 0; - boolean sdb = false; - - if(sendbuf instanceof Buffer && !(sdb = ((Buffer)sendbuf).isDirect())) - { - sendoff = sendtype.getOffset(sendbuf); - sendbuf = ((Buffer)sendbuf).array(); - } - - gatherv(handle, sendbuf, sdb, sendoff, sendcount, - sendtype.handle, sendtype.baseType, - null, false, 0, null, null, 0, 0, root); -} - -private native void gatherv( - long comm, Object sendBuf, boolean sdb, int sendOffset, - int sendCount, long sendType, int sendBaseType, - Object recvBuf, boolean rdb, int recvOffset, - int[] recvCount, int[] displs, long recvType, int recvBaseType, - int root) throws MPIException; - -/** - * Extends functionality of {@code gather} by allowing varying - * counts of data from each process. - *

Java binding of the MPI operation {@code MPI_IGATHERV}. - * @param sendbuf send buffer - * @param sendcount number of items to send - * @param sendtype datatype of each item in send buffer - * @param recvbuf receive buffer - * @param recvcount number of elements received from each process - * @param displs displacements at which to place incoming data - * @param recvtype datatype of each item in receive buffer - * @param root rank of receiving process - * @return communication request - * @throws MPIException - */ -public final Request iGatherv( - Buffer sendbuf, int sendcount, Datatype sendtype, Buffer recvbuf, - int[] recvcount, int[] displs, Datatype recvtype, int root) - throws MPIException -{ - MPI.check(); - assertDirectBuffer(sendbuf, recvbuf); - - return new Request(iGatherv( - handle, sendbuf, sendcount, sendtype.handle, - recvbuf, recvcount, displs, recvtype.handle, root)); -} - -/** - * Extends functionality of {@code gather} by allowing varying - * counts of data from each process. - *

Java binding of the MPI operation {@code MPI_IGATHERV} using - * {@code MPI_IN_PLACE} instead of the send buffer in the root process. - * This method must be used in the root process. - * @param recvbuf receive buffer - * @param recvcount number of elements received from each process - * @param displs displacements at which to place incoming data - * @param recvtype datatype of each item in receive buffer - * @param root rank of receiving process - * @return communication request - * @throws MPIException - */ -public final Request iGatherv(Buffer recvbuf, int[] recvcount, int[] displs, - Datatype recvtype, int root) - throws MPIException -{ - MPI.check(); - assertDirectBuffer(recvbuf); - - return new Request(iGatherv(handle, null, 0, 0, - recvbuf, recvcount, displs, recvtype.handle, root)); -} - -/** - * Extends functionality of {@code gather} by allowing varying - * counts of data from each process. - *

Java binding of the MPI operation {@code MPI_IGATHERV} using - * {@code MPI_IN_PLACE} instead of the send buffer in the root process. - * This method must be used in the non-root processes. - * @param sendbuf send buffer - * @param sendcount number of items to send - * @param sendtype datatype of each item in send buffer - * @param root rank of receiving process - * @return communication request - * @throws MPIException - */ -public final Request iGatherv(Buffer sendbuf, int sendcount, - Datatype sendtype, int root) - throws MPIException -{ - MPI.check(); - assertDirectBuffer(sendbuf); - - return new Request(iGatherv(handle, sendbuf, sendcount, sendtype.handle, - null, null, null, 0, root)); -} - -private native long iGatherv( - long handle, Buffer sendbuf, int sendcount, long sendtype, - Buffer recvbuf, int[] recvcount, int[] displs, - long recvtype, int root) - throws MPIException; - -/** - * Inverse of the operation {@code gather}. - *

Java binding of the MPI operation {@code MPI_SCATTER}. - * @param sendbuf send buffer - * @param sendcount number of items to send - * @param sendtype datatype of each item in send buffer - * @param recvbuf receive buffer - * @param recvcount number of items to receive - * @param recvtype datatype of each item in receive buffer - * @param root rank of sending process - * @throws MPIException - */ -public final void scatter( - Object sendbuf, int sendcount, Datatype sendtype, - Object recvbuf, int recvcount, Datatype recvtype, int root) - throws MPIException -{ - MPI.check(); - - int sendoff = 0, - recvoff = 0; - - boolean sdb = false, - rdb = false; - - if(sendbuf instanceof Buffer && !(sdb = ((Buffer)sendbuf).isDirect())) - { - sendoff = sendtype.getOffset(sendbuf); - sendbuf = ((Buffer)sendbuf).array(); - } - - if(recvbuf instanceof Buffer && !(rdb = ((Buffer)recvbuf).isDirect())) - { - recvoff = recvtype.getOffset(recvbuf); - recvbuf = ((Buffer)recvbuf).array(); - } - - scatter(handle, sendbuf, sdb, sendoff, sendcount, - sendtype.handle, sendtype.baseType, - recvbuf, rdb, recvoff, recvcount, - recvtype.handle, recvtype.baseType, root); -} - -/** - * Inverse of the operation {@code gather}. - *

Java binding of the MPI operation {@code MPI_SCATTER} - * using {@code MPI_IN_PLACE} instead of the receive buffer. - * The buffer is used by the root process to send data, - * and it is used by the non-root processes to receive data. - * @param buf send/receive buffer - * @param count number of items to send/receive - * @param type datatype of each item in buffer - * @param root rank of sending process - * @throws MPIException - */ -public final void scatter(Object buf, int count, Datatype type, int root) - throws MPIException -{ - MPI.check(); - int off = 0; - boolean db = false; - - if(buf instanceof Buffer && !(db = ((Buffer)buf).isDirect())) - { - off = type.getOffset(buf); - buf = ((Buffer)buf).array(); - } - - scatter(handle, buf, db, off, count, type.handle, type.baseType, - null, false, 0, 0, 0, 0, root); -} - -private native void scatter( - long comm, Object sendBuf, boolean sdb, int sendOffset, int sendCount, - long sendType, int sendBaseType, - Object recvBuf, boolean rdb, int recvOffset, int recvCount, - long recvType, int recvBaseType, int root) throws MPIException; - -/** - * Inverse of the operation {@code gather}. - *

Java binding of the MPI operation {@code MPI_ISCATTER}. - * @param sendbuf send buffer - * @param sendcount number of items to send - * @param sendtype datatype of each item in send buffer - * @param recvbuf receive buffer - * @param recvcount number of items to receive - * @param recvtype datatype of each item in receive buffer - * @param root rank of sending process - * @return communication request - * @throws MPIException - */ -public final Request iScatter( - Buffer sendbuf, int sendcount, Datatype sendtype, - Buffer recvbuf, int recvcount, Datatype recvtype, int root) - throws MPIException -{ - MPI.check(); - assertDirectBuffer(sendbuf, recvbuf); - - return new Request(iScatter(handle, sendbuf, sendcount, sendtype.handle, - recvbuf, recvcount, recvtype.handle, root)); -} - -/** - * Inverse of the operation {@code gather}. - *

Java binding of the MPI operation {@code MPI_ISCATTER} - * using {@code MPI_IN_PLACE} instead of the receive buffer. - * The buffer is used by the root process to send data, - * and it is used by the non-root processes to receive data. - * @param buf send/receive buffer - * @param count number of items to send/receive - * @param type datatype of each item in buffer - * @param root rank of sending process - * @return communication request - * @throws MPIException - */ -public final Request iScatter(Buffer buf, int count, Datatype type, int root) - throws MPIException -{ - MPI.check(); - assertDirectBuffer(buf); - - return new Request(iScatter(handle, buf, count, type.handle, - null, 0, 0, root)); -} - -private native long iScatter( - long comm, Buffer sendbuf, int sendcount, long sendtype, - Buffer recvbuf, int recvcount, long recvtype, int root) - throws MPIException; - -/** - * Inverse of the operation {@code gatherv}. - *

Java binding of the MPI operation {@code MPI_SCATTERV}. - * @param sendbuf send buffer - * @param sendcount number of items sent to each process - * @param displs displacements from which to take outgoing data - * @param sendtype datatype of each item in send buffer - * @param recvbuf receive buffer - * @param recvcount number of items to receive - * @param recvtype datatype of each item in receive buffer - * @param root rank of sending process - * @throws MPIException - */ -public final void scatterv( - Object sendbuf, int[] sendcount, int[] displs, Datatype sendtype, - Object recvbuf, int recvcount, Datatype recvtype, int root) - throws MPIException -{ - MPI.check(); - - int sendoff = 0, - recvoff = 0; - - boolean sdb = false, - rdb = false; - - if(sendbuf instanceof Buffer && !(sdb = ((Buffer)sendbuf).isDirect())) - { - sendoff = sendtype.getOffset(sendbuf); - sendbuf = ((Buffer)sendbuf).array(); - } - - if(recvbuf instanceof Buffer && !(rdb = ((Buffer)recvbuf).isDirect())) - { - recvoff = recvtype.getOffset(recvbuf); - recvbuf = ((Buffer)recvbuf).array(); - } - - scatterv(handle, sendbuf, sdb, sendoff, sendcount, displs, - sendtype.handle, sendtype.baseType, - recvbuf, rdb, recvoff, recvcount, - recvtype.handle, recvtype.baseType, root); -} - -/** - * Inverse of the operation {@code gatherv}. - *

Java binding of the MPI operation {@code MPI_SCATTERV} using - * {@code MPI_IN_PLACE} instead of the receive buffer in the root process. - * This method must be used in the root process. - * @param sendbuf send buffer - * @param sendcount number of items sent to each process - * @param displs displacements from which to take outgoing data - * @param sendtype datatype of each item in send buffer - * @param root rank of sending process - * @throws MPIException - */ -public final void scatterv(Object sendbuf, int[] sendcount, int[] displs, - Datatype sendtype, int root) - throws MPIException -{ - MPI.check(); - int sendoff = 0; - boolean sdb = false; - - if(sendbuf instanceof Buffer && !(sdb = ((Buffer)sendbuf).isDirect())) - { - sendoff = sendtype.getOffset(sendbuf); - sendbuf = ((Buffer)sendbuf).array(); - } - - scatterv(handle, sendbuf, sdb, sendoff, sendcount, displs, - sendtype.handle, sendtype.baseType, - null, false, 0, 0, 0, 0, root); -} - -/** - * Inverse of the operation {@code gatherv}. - *

Java binding of the MPI operation {@code MPI_SCATTERV} using - * {@code MPI_IN_PLACE} instead of the receive buffer in the root process. - * This method must be used in the non-root processes. - * @param recvbuf receive buffer - * @param recvcount number of items to receive - * @param recvtype datatype of each item in receive buffer - * @param root rank of sending process - * @throws MPIException - */ -public final void scatterv(Object recvbuf, int recvcount, - Datatype recvtype, int root) - throws MPIException -{ - MPI.check(); - int recvoff = 0; - boolean rdb = false; - - if(recvbuf instanceof Buffer && !(rdb = ((Buffer)recvbuf).isDirect())) - { - recvoff = recvtype.getOffset(recvbuf); - recvbuf = ((Buffer)recvbuf).array(); - } - - scatterv(handle, null, false, 0, null, null, 0, 0, - recvbuf, rdb, recvoff, recvcount, - recvtype.handle, recvtype.baseType, root); -} - -private native void scatterv( - long comm, Object sendBuf, boolean sdb, int sendOffset, - int[] sendCount, int[] displs, long sendType, int sendBaseType, - Object recvBuf, boolean rdb, int recvOffset, int recvCount, - long recvType, int recvBaseType, int root) - throws MPIException; - -/** - * Inverse of the operation {@code gatherv}. - *

Java binding of the MPI operation {@code MPI_ISCATTERV}. - * @param sendbuf send buffer - * @param sendcount number of items sent to each process - * @param displs displacements from which to take outgoing data - * @param sendtype datatype of each item in send buffer - * @param recvbuf receive buffer - * @param recvcount number of items to receive - * @param recvtype datatype of each item in receive buffer - * @param root rank of sending process - * @return communication request - * @throws MPIException - */ -public final Request iScatterv( - Buffer sendbuf, int[] sendcount, int[] displs, Datatype sendtype, - Buffer recvbuf, int recvcount, Datatype recvtype, int root) - throws MPIException -{ - MPI.check(); - assertDirectBuffer(sendbuf, recvbuf); - - return new Request(iScatterv( - handle, sendbuf, sendcount, displs, sendtype.handle, - recvbuf, recvcount, recvtype.handle, root)); -} - -/** - * Inverse of the operation {@code gatherv}. - *

Java binding of the MPI operation {@code MPI_ISCATTERV} using - * {@code MPI_IN_PLACE} instead of the receive buffer in the root process. - * This method must be used in the root process. - * @param sendbuf send buffer - * @param sendcount number of items sent to each process - * @param displs displacements from which to take outgoing data - * @param sendtype datatype of each item in send buffer - * @param root rank of sending process - * @return communication request - * @throws MPIException - */ -public final Request iScatterv(Buffer sendbuf, int[] sendcount, int[] displs, - Datatype sendtype, int root) - throws MPIException -{ - MPI.check(); - assertDirectBuffer(sendbuf); - - return new Request(iScatterv(handle, sendbuf, sendcount, displs, - sendtype.handle, null, 0, 0, root)); -} - -/** - * Inverse of the operation {@code gatherv}. - *

Java binding of the MPI operation {@code MPI_ISCATTERV} using - * {@code MPI_IN_PLACE} instead of the receive buffer in the root process. - * This method must be used in the non-root processes. - * @param recvbuf receive buffer - * @param recvcount number of items to receive - * @param recvtype datatype of each item in receive buffer - * @param root rank of sending process - * @return communication request - * @throws MPIException - */ -public final Request iScatterv(Buffer recvbuf, int recvcount, - Datatype recvtype, int root) - throws MPIException -{ - MPI.check(); - assertDirectBuffer(recvbuf); - - return new Request(iScatterv(handle, null, null, null, 0, - recvbuf, recvcount, recvtype.handle, root)); -} - -private native long iScatterv( - long comm, Buffer sendbuf, int[] sendcount, int[] displs, long sendtype, - Buffer recvbuf, int recvcount, long recvtype, int root) - throws MPIException; - -/** - * Similar to {@code gather}, but all processes receive the result. - *

Java binding of the MPI operation {@code MPI_ALLGATHER}. - * @param sendbuf send buffer - * @param sendcount number of items to send - * @param sendtype datatype of each item in send buffer - * @param recvbuf receive buffer - * @param recvcount number of items to receive - * @param recvtype datatype of each item in receive buffer - * @throws MPIException - */ -public final void allGather(Object sendbuf, int sendcount, Datatype sendtype, - Object recvbuf, int recvcount, Datatype recvtype) - throws MPIException -{ - MPI.check(); - - int sendoff = 0, - recvoff = 0; - - boolean sdb = false, - rdb = false; - - if(sendbuf instanceof Buffer && !(sdb = ((Buffer)sendbuf).isDirect())) - { - sendoff = sendtype.getOffset(sendbuf); - sendbuf = ((Buffer)sendbuf).array(); - } - - if(recvbuf instanceof Buffer && !(rdb = ((Buffer)recvbuf).isDirect())) - { - recvoff = recvtype.getOffset(recvbuf); - recvbuf = ((Buffer)recvbuf).array(); - } - - allGather(handle, sendbuf, sdb, sendoff, sendcount, - sendtype.handle, sendtype.baseType, - recvbuf, rdb, recvoff, recvcount, - recvtype.handle, recvtype.baseType); -} - -/** - * Similar to {@code gather}, but all processes receive the result. - *

Java binding of the MPI operation {@code MPI_ALLGATHER} - * using {@code MPI_IN_PLACE} instead of the send buffer. - * @param buf receive buffer - * @param count number of items to receive - * @param type datatype of each item in receive buffer - * @throws MPIException - */ -public final void allGather(Object buf, int count, Datatype type) - throws MPIException -{ - MPI.check(); - int off = 0; - boolean db = false; - - if(buf instanceof Buffer && !(db = ((Buffer)buf).isDirect())) - { - off = type.getOffset(buf); - buf = ((Buffer)buf).array(); - } - - allGather(handle, null, false, 0, 0, 0, 0, - buf, db, off, count, type.handle, type.baseType); -} - -private native void allGather( - long comm, Object sendBuf, boolean sdb, int sendOffset, int sendCount, - long sendType, int sendBaseType, - Object recvBuf, boolean rdb, int recvOffset, int recvCount, - long recvType, int recvBaseType) throws MPIException; - -/** - * Similar to {@code gather}, but all processes receive the result. - *

Java binding of the MPI operation {@code MPI_IALLGATHER}. - * @param sendbuf send buffer - * @param sendcount number of items to send - * @param sendtype datatype of each item in send buffer - * @param recvbuf receive buffer - * @param recvcount number of items to receive - * @param recvtype datatype of each item in receive buffer - * @return communication request - * @throws MPIException - */ -public final Request iAllGather( - Buffer sendbuf, int sendcount, Datatype sendtype, - Buffer recvbuf, int recvcount, Datatype recvtype) - throws MPIException -{ - MPI.check(); - assertDirectBuffer(sendbuf, recvbuf); - - return new Request(iAllGather(handle, sendbuf, sendcount, sendtype.handle, - recvbuf, recvcount, recvtype.handle)); -} - -/** - * Similar to {@code gather}, but all processes receive the result. - *

Java binding of the MPI operation {@code MPI_IALLGATHER} - * using {@code MPI_IN_PLACE} instead of the send buffer. - * @param buf receive buffer - * @param count number of items to receive - * @param type datatype of each item in receive buffer - * @return communication request - * @throws MPIException - */ -public final Request iAllGather(Buffer buf, int count, Datatype type) - throws MPIException -{ - MPI.check(); - assertDirectBuffer(buf); - return new Request(iAllGather(handle, null, 0, 0, buf, count, type.handle)); -} - -private native long iAllGather( - long comm, Buffer sendbuf, int sendcount, long sendtype, - Buffer recvbuf, int recvcount, long recvtype) throws MPIException; - -/** - * Similar to {@code gatherv}, but all processes receive the result. - *

Java binding of the MPI operation {@code MPI_ALLGATHERV}. - * @param sendbuf send buffer - * @param sendcount number of items to send - * @param sendtype datatype of each item in send buffer - * @param recvbuf receive buffer - * @param recvcount number of elements received from each process - * @param displs displacements at which to place incoming data - * @param recvtype datatype of each item in receive buffer - * @throws MPIException - */ -public final void allGatherv( - Object sendbuf, int sendcount, Datatype sendtype, - Object recvbuf, int[] recvcount, int[] displs, Datatype recvtype) - throws MPIException -{ - MPI.check(); - - int sendoff = 0, - recvoff = 0; - - boolean sdb = false, - rdb = false; - - if(sendbuf instanceof Buffer && !(sdb = ((Buffer)sendbuf).isDirect())) - { - sendoff = sendtype.getOffset(sendbuf); - sendbuf = ((Buffer)sendbuf).array(); - } - - if(recvbuf instanceof Buffer && !(rdb = ((Buffer)recvbuf).isDirect())) - { - recvoff = recvtype.getOffset(recvbuf); - recvbuf = ((Buffer)recvbuf).array(); - } - - allGatherv(handle, sendbuf, sdb, sendoff, sendcount, - sendtype.handle, sendtype.baseType, - recvbuf, rdb, recvoff, recvcount, displs, - recvtype.handle, recvtype.baseType); -} - -/** - * Similar to {@code gatherv}, but all processes receive the result. - *

Java binding of the MPI operation {@code MPI_ALLGATHERV} - * using {@code MPI_IN_PLACE} instead of the send buffer. - * @param recvbuf receive buffer - * @param recvcount number of elements received from each process - * @param displs displacements at which to place incoming data - * @param recvtype datatype of each item in receive buffer - * @throws MPIException - */ -public final void allGatherv(Object recvbuf, int[] recvcount, - int[] displs, Datatype recvtype) - throws MPIException -{ - MPI.check(); - int recvoff = 0; - boolean rdb = false; - - if(recvbuf instanceof Buffer && !(rdb = ((Buffer)recvbuf).isDirect())) - { - recvoff = recvtype.getOffset(recvbuf); - recvbuf = ((Buffer)recvbuf).array(); - } - - allGatherv(handle, null, false, 0, 0, 0, 0, - recvbuf, rdb, recvoff, recvcount, - displs, recvtype.handle, recvtype.baseType); -} - -private native void allGatherv( - long comm, Object sendBuf, boolean sdb, int sendOffset, int sendCount, - long sendType, int sendBaseType, - Object recvBuf, boolean rdb, int recvOffset, int[] recvCount, - int[] displs, long recvType, int recvBasetype) throws MPIException; - -/** - * Similar to {@code gatherv}, but all processes receive the result. - *

Java binding of the MPI operation {@code MPI_IALLGATHERV}. - * @param sendbuf send buffer - * @param sendcount number of items to send - * @param sendtype datatype of each item in send buffer - * @param recvbuf receive buffer - * @param recvcount number of elements received from each process - * @param displs displacements at which to place incoming data - * @param recvtype datatype of each item in receive buffer - * @return communication request - * @throws MPIException - */ -public final Request iAllGatherv( - Buffer sendbuf, int sendcount, Datatype sendtype, - Buffer recvbuf, int[] recvcount, int[] displs, Datatype recvtype) - throws MPIException -{ - MPI.check(); - assertDirectBuffer(sendbuf, recvbuf); - - return new Request(iAllGatherv( - handle, sendbuf, sendcount, sendtype.handle, - recvbuf, recvcount, displs, recvtype.handle)); -} - -/** - * Similar to {@code gatherv}, but all processes receive the result. - *

Java binding of the MPI operation {@code MPI_IALLGATHERV} - * using {@code MPI_IN_PLACE} instead of the send buffer. - * @param buf receive buffer - * @param count number of elements received from each process - * @param displs displacements at which to place incoming data - * @param type datatype of each item in receive buffer - * @return communication request - * @throws MPIException - */ -public final Request iAllGatherv( - Buffer buf, int[] count, int[] displs, Datatype type) - throws MPIException -{ - MPI.check(); - assertDirectBuffer(buf); - - return new Request(iAllGatherv( - handle, null, 0, 0, buf, count, displs, type.handle)); -} - -private native long iAllGatherv( - long handle, Buffer sendbuf, int sendcount, long sendtype, - Buffer recvbuf, int[] recvcount, int[] displs, long recvtype) - throws MPIException; - -/** - * Extension of {@code allGather} to the case where each process sends - * distinct data to each of the receivers. - *

Java binding of the MPI operation {@code MPI_ALLTOALL}. - * @param sendbuf send buffer - * @param sendcount number of items sent to each process - * @param sendtype datatype send buffer items - * @param recvbuf receive buffer - * @param recvcount number of items received from any process - * @param recvtype datatype of receive buffer items - * @throws MPIException - */ -public final void allToAll(Object sendbuf, int sendcount, Datatype sendtype, - Object recvbuf, int recvcount, Datatype recvtype) - throws MPIException -{ - MPI.check(); - - int sendoff = 0, - recvoff = 0; - - boolean sdb = false, - rdb = false; - - if(sendbuf instanceof Buffer && !(sdb = ((Buffer)sendbuf).isDirect())) - { - sendoff = sendtype.getOffset(sendbuf); - sendbuf = ((Buffer)sendbuf).array(); - } - - if(recvbuf instanceof Buffer && !(rdb = ((Buffer)recvbuf).isDirect())) - { - recvoff = recvtype.getOffset(recvbuf); - recvbuf = ((Buffer)recvbuf).array(); - } - - allToAll(handle, sendbuf, sdb, sendoff, sendcount, - sendtype.handle, sendtype.baseType, - recvbuf, rdb, recvoff, recvcount, - recvtype.handle, recvtype.baseType); -} - -private native void allToAll( - long comm, Object sendBuf, boolean sdb, int sendOffset, int sendCount, - long sendType, int sendBaseType, - Object recvBuf, boolean rdb, int recvOffset, int recvCount, - long recvType, int recvBaseType) throws MPIException; - -/** - * Extension of {@code allGather} to the case where each process sends - * distinct data to each of the receivers. - *

Java binding of the MPI operation {@code MPI_IALLTOALL}. - * @param sendbuf send buffer - * @param sendcount number of items sent to each process - * @param sendtype datatype send buffer items - * @param recvbuf receive buffer - * @param recvcount number of items received from any process - * @param recvtype datatype of receive buffer items - * @return communication request - * @throws MPIException - */ -public final Request iAllToAll(Buffer sendbuf, int sendcount, Datatype sendtype, - Buffer recvbuf, int recvcount, Datatype recvtype) - throws MPIException -{ - MPI.check(); - assertDirectBuffer(sendbuf, recvbuf); - - return new Request(iAllToAll(handle, sendbuf, sendcount, sendtype.handle, - recvbuf, recvcount, recvtype.handle)); -} - -private native long iAllToAll( - long comm, Buffer sendbuf, int sendcount, long sendtype, - Buffer recvbuf, int recvcount, long recvtype) throws MPIException; - -/** - * Adds flexibility to {@code allToAll}: location of data for send is - * specified by {@code sdispls} and location to place data on receive - * side is specified by {@code rdispls}. - *

Java binding of the MPI operation {@code MPI_ALLTOALLV}. - * @param sendbuf send buffer - * @param sendcount number of items sent to each buffer - * @param sdispls displacements from which to take outgoing data - * @param sendtype datatype send buffer items - * @param recvbuf receive buffer - * @param recvcount number of elements received from each process - * @param rdispls displacements at which to place incoming data - * @param recvtype datatype of each item in receive buffer - * @throws MPIException - */ -public final void allToAllv( - Object sendbuf, int[] sendcount, int[] sdispls, Datatype sendtype, - Object recvbuf, int[] recvcount, int[] rdispls, Datatype recvtype) - throws MPIException -{ - MPI.check(); - - int sendoff = 0, - recvoff = 0; - - boolean sdb = false, - rdb = false; - - if(sendbuf instanceof Buffer && !(sdb = ((Buffer)sendbuf).isDirect())) - { - sendoff = sendtype.getOffset(sendbuf); - sendbuf = ((Buffer)sendbuf).array(); - } - - if(recvbuf instanceof Buffer && !(rdb = ((Buffer)recvbuf).isDirect())) - { - recvoff = recvtype.getOffset(recvbuf); - recvbuf = ((Buffer)recvbuf).array(); - } - - allToAllv(handle, sendbuf, sdb, sendoff, sendcount, sdispls, - sendtype.handle, sendtype.baseType, - recvbuf, rdb, recvoff, recvcount, rdispls, - recvtype.handle, recvtype.baseType); -} - -private native void allToAllv( - long comm, Object sendBuf, boolean sdb, int sendOffset, - int[] sendCount, int[] sdispls, long sendType, int sendBaseType, - Object recvBuf, boolean rdb, int recvOffset, - int[] recvCount, int[] rdispls, long recvType, int recvBaseType) - throws MPIException; - -/** - * Adds flexibility to {@code allToAll}: location of data for send is - * specified by {@code sdispls} and location to place data on receive - * side is specified by {@code rdispls}. - *

Java binding of the MPI operation {@code MPI_IALLTOALLV}. - * @param sendbuf send buffer - * @param sendcount number of items sent to each buffer - * @param sdispls displacements from which to take outgoing data - * @param sendtype datatype send buffer items - * @param recvbuf receive buffer - * @param recvcount number of elements received from each process - * @param rdispls displacements at which to place incoming data - * @param recvtype datatype of each item in receive buffer - * @return communication request - * @throws MPIException - */ -public final Request iAllToAllv( - Buffer sendbuf, int[] sendcount, int[] sdispls, Datatype sendtype, - Buffer recvbuf, int[] recvcount, int[] rdispls, Datatype recvtype) - throws MPIException -{ - MPI.check(); - assertDirectBuffer(sendbuf, recvbuf); - - return new Request(iAllToAllv( - handle, sendbuf, sendcount, sdispls, sendtype.handle, - recvbuf, recvcount, rdispls, recvtype.handle)); -} - -private native long iAllToAllv(long comm, - Buffer sendbuf, int[] sendcount, int[] sdispls, long sendtype, - Buffer recvbuf, int[] recvcount, int[] rdispls, long recvtype) - throws MPIException; - -/** - * Java binding of {@code MPI_NEIGHBOR_ALLGATHER}. - * @param sendbuf send buffer - * @param sendcount number of items to send - * @param sendtype datatype of each item in send buffer - * @param recvbuf receive buffer - * @param recvcount number of items to receive - * @param recvtype datatype of each item in receive buffer - * @throws MPIException - */ -public final void neighborAllGather( - Object sendbuf, int sendcount, Datatype sendtype, - Object recvbuf, int recvcount, Datatype recvtype) - throws MPIException -{ - MPI.check(); - - int sendoff = 0, - recvoff = 0; - - boolean sdb = false, - rdb = false; - - if(sendbuf instanceof Buffer && !(sdb = ((Buffer)sendbuf).isDirect())) - { - sendoff = sendtype.getOffset(sendbuf); - sendbuf = ((Buffer)sendbuf).array(); - } - - if(recvbuf instanceof Buffer && !(rdb = ((Buffer)recvbuf).isDirect())) - { - recvoff = recvtype.getOffset(recvbuf); - recvbuf = ((Buffer)recvbuf).array(); - } - - neighborAllGather(handle, sendbuf, sdb, sendoff, sendcount, - sendtype.handle, sendtype.baseType, - recvbuf, rdb, recvoff, recvcount, - recvtype.handle, recvtype.baseType); -} - -private native void neighborAllGather( - long comm, Object sendBuf, boolean sdb, int sendOffset, - int sendCount, long sendType, int sendBaseType, - Object recvBuf, boolean rdb, int recvOffset, - int recvCount, long recvType, int recvBaseType) - throws MPIException; - -/** - * Java binding of {@code MPI_INEIGHBOR_ALLGATHER}. - * @param sendbuf send buffer - * @param sendcount number of items to send - * @param sendtype datatype of each item in send buffer - * @param recvbuf receive buffer - * @param recvcount number of items to receive - * @param recvtype datatype of each item in receive buffer - * @return communication request - * @throws MPIException - */ -public final Request iNeighborAllGather( - Buffer sendbuf, int sendcount, Datatype sendtype, - Buffer recvbuf, int recvcount, Datatype recvtype) - throws MPIException -{ - MPI.check(); - assertDirectBuffer(sendbuf, recvbuf); - - return new Request(iNeighborAllGather( - handle, sendbuf, sendcount, sendtype.handle, - recvbuf, recvcount, recvtype.handle)); -} - -private native long iNeighborAllGather( - long comm, Buffer sendBuf, int sendCount, long sendType, - Buffer recvBuf, int recvCount, long recvType) - throws MPIException; - -/** - * Java binding of {@code MPI_NEIGHBOR_ALLGATHERV}. - * @param sendbuf send buffer - * @param sendcount number of items to send - * @param sendtype datatype of each item in send buffer - * @param recvbuf receive buffer - * @param recvcount number of elements that are received from each neighbor - * @param displs displacements at which to place incoming data - * @param recvtype datatype of receive buffer elements - * @throws MPIException - */ -public final void neighborAllGatherv( - Object sendbuf, int sendcount, Datatype sendtype, - Object recvbuf, int[] recvcount, int[] displs, Datatype recvtype) - throws MPIException -{ - MPI.check(); - - int sendoff = 0, - recvoff = 0; - - boolean sdb = false, - rdb = false; - - if(sendbuf instanceof Buffer && !(sdb = ((Buffer)sendbuf).isDirect())) - { - sendoff = sendtype.getOffset(sendbuf); - sendbuf = ((Buffer)sendbuf).array(); - } - - if(recvbuf instanceof Buffer && !(rdb = ((Buffer)recvbuf).isDirect())) - { - recvoff = recvtype.getOffset(recvbuf); - recvbuf = ((Buffer)recvbuf).array(); - } - - neighborAllGatherv(handle, sendbuf, sdb, sendoff, sendcount, - sendtype.handle, sendtype.baseType, - recvbuf, rdb, recvoff, recvcount, displs, - recvtype.handle, recvtype.baseType); -} - -private native void neighborAllGatherv( - long comm, Object sendBuf, boolean sdb, int sendOff, - int sendCount, long sendType, int sendBaseType, - Object recvBuf, boolean rdb, int recvOff, - int[] recvCount, int[] displs, long recvType, int recvBaseType); - -/** - * Java binding of {@code MPI_INEIGHBOR_ALLGATHERV}. - * @param sendbuf send buffer - * @param sendcount number of items to send - * @param sendtype datatype of each item in send buffer - * @param recvbuf receive buffer - * @param recvcount number of elements that are received from each neighbor - * @param displs displacements at which to place incoming data - * @param recvtype datatype of receive buffer elements - * @return communication request - * @throws MPIException - */ -public final Request iNeighborAllGatherv( - Buffer sendbuf, int sendcount, Datatype sendtype, - Buffer recvbuf, int[] recvcount, int[] displs, Datatype recvtype) - throws MPIException -{ - MPI.check(); - assertDirectBuffer(sendbuf, recvbuf); - - return new Request(iNeighborAllGatherv( - handle, sendbuf, sendcount, sendtype.handle, - recvbuf, recvcount, displs, recvtype.handle)); -} - -private native long iNeighborAllGatherv( - long comm, Buffer sendBuf, int sendCount, long sendType, - Buffer recvBuf, int[] recvCount, int[] displs, long recvType) - throws MPIException; - -/** - * Java binding of {@code MPI_NEIGHBOR_ALLTOALL}. - * @param sendbuf send buffer - * @param sendcount number of items to send - * @param sendtype datatype of each item in send buffer - * @param recvbuf receive buffer - * @param recvcount number of items to receive - * @param recvtype datatype of each item in receive buffer - * @throws MPIException - */ -public final void neighborAllToAll( - Object sendbuf, int sendcount, Datatype sendtype, - Object recvbuf, int recvcount, Datatype recvtype) - throws MPIException -{ - MPI.check(); - - int sendoff = 0, - recvoff = 0; - - boolean sdb = false, - rdb = false; - - if(sendbuf instanceof Buffer && !(sdb = ((Buffer)sendbuf).isDirect())) - { - sendoff = sendtype.getOffset(sendbuf); - sendbuf = ((Buffer)sendbuf).array(); - } - - if(recvbuf instanceof Buffer && !(rdb = ((Buffer)recvbuf).isDirect())) - { - recvoff = recvtype.getOffset(recvbuf); - recvbuf = ((Buffer)recvbuf).array(); - } - - neighborAllToAll(handle, sendbuf, sdb, sendoff, sendcount, - sendtype.handle, sendtype.baseType, - recvbuf, rdb, recvoff, recvcount, - recvtype.handle, recvtype.baseType); -} - -private native void neighborAllToAll( - long comm, Object sendBuf, boolean sdb, int sendOff, - int sendCount, long sendType, int sendBaseType, - Object recvBuf, boolean rdb, int recvOff, - int recvCount, long recvType, int recvBaseType) - throws MPIException; - -/** - * Java binding of {@code MPI_INEIGHBOR_ALLTOALL}. - * @param sendbuf send buffer - * @param sendcount number of items to send - * @param sendtype datatype of each item in send buffer - * @param recvbuf receive buffer - * @param recvcount number of items to receive - * @param recvtype datatype of each item in receive buffer - * @return communication request - * @throws MPIException - */ -public final Request iNeighborAllToAll( - Buffer sendbuf, int sendcount, Datatype sendtype, - Buffer recvbuf, int recvcount, Datatype recvtype) - throws MPIException -{ - MPI.check(); - assertDirectBuffer(sendbuf, recvbuf); - - return new Request(iNeighborAllToAll( - handle, sendbuf, sendcount, sendtype.handle, - recvbuf, recvcount, recvtype.handle)); -} - -private native long iNeighborAllToAll( - long comm, Buffer sendBuf, int sendCount, long sendType, - Buffer recvBuf, int recvCount, long recvType); - -/** - * Java binding of {@code MPI_NEIGHBOR_ALLTOALLV}. - * @param sendbuf send buffer - * @param sendcount number of items sent to each buffer - * @param sdispls displacements from which to take outgoing data - * @param sendtype datatype send buffer items - * @param recvbuf receive buffer - * @param recvcount number of elements received from each process - * @param rdispls displacements at which to place incoming data - * @param recvtype datatype of each item in receive buffer - * @throws MPIException - */ -public final void neighborAllToAllv( - Object sendbuf, int[] sendcount, int[] sdispls, Datatype sendtype, - Object recvbuf, int[] recvcount, int[] rdispls, Datatype recvtype) - throws MPIException -{ - MPI.check(); - - int sendoff = 0, - recvoff = 0; - - boolean sdb = false, - rdb = false; - - if(sendbuf instanceof Buffer && !(sdb = ((Buffer)sendbuf).isDirect())) - { - sendoff = sendtype.getOffset(sendbuf); - sendbuf = ((Buffer)sendbuf).array(); - } - - if(recvbuf instanceof Buffer && !(rdb = ((Buffer)recvbuf).isDirect())) - { - recvoff = recvtype.getOffset(recvbuf); - recvbuf = ((Buffer)recvbuf).array(); - } - - neighborAllToAllv(handle, - sendbuf, sdb, sendoff, sendcount, sdispls, - sendtype.handle, sendtype.baseType, - recvbuf, rdb, recvoff, recvcount, rdispls, - recvtype.handle, recvtype.baseType); -} - -private native void neighborAllToAllv( - long comm, Object sendBuf, boolean sdb, int sendOff, - int[] sendCount, int[] sdispls, long sendType, int sendBaseType, - Object recvBuf, boolean rdb, int recvOff, - int[] recvCount, int[] rdispls, long recvType, int recvBaseType) - throws MPIException; - -/** - * Java binding of {@code MPI_INEIGHBOR_ALLTOALLV}. - * @param sendbuf send buffer - * @param sendcount number of items sent to each buffer - * @param sdispls displacements from which to take outgoing data - * @param sendtype datatype send buffer items - * @param recvbuf receive buffer - * @param recvcount number of elements received from each process - * @param rdispls displacements at which to place incoming data - * @param recvtype datatype of each item in receive buffer - * @return communication request - * @throws MPIException - */ -public final Request iNeighborAllToAllv( - Buffer sendbuf, int[] sendcount, int[] sdispls, Datatype sendtype, - Buffer recvbuf, int[] recvcount, int[] rdispls, Datatype recvtype) - throws MPIException -{ - MPI.check(); - assertDirectBuffer(sendbuf, recvbuf); - - return new Request(iNeighborAllToAllv( - handle, sendbuf, sendcount, sdispls, sendtype.handle, - recvbuf, recvcount, rdispls, recvtype.handle)); -} - -private native long iNeighborAllToAllv( - long comm, Buffer sendBuf, int[] sendCount, int[] sdispls, long sType, - Buffer recvBuf, int[] recvCount, int[] rdispls, long rType) - throws MPIException; - -/** - * Combine elements in input buffer of each process using the reduce - * operation, and return the combined value in the output buffer of the - * root process. - *

- * Java binding of the MPI operation {@code MPI_REDUCE}. - *

- * The predefined operations are available in Java as {@code MPI.MAX}, - * {@code MPI.MIN}, {@code MPI.SUM}, {@code MPI.PROD}, {@code MPI.LAND}, - * {@code MPI.BAND}, {@code MPI.LOR}, {@code MPI.BOR}, {@code MPI.LXOR}, - * {@code MPI.BXOR}, {@code MPI.MINLOC} and {@code MPI.MAXLOC}. - * @param sendbuf send buffer - * @param recvbuf receive buffer - * @param count number of items in send buffer - * @param type data type of each item in send buffer - * @param op reduce operation - * @param root rank of root process - * @throws MPIException - */ -public final void reduce(Object sendbuf, Object recvbuf, int count, - Datatype type, Op op, int root) - throws MPIException -{ - MPI.check(); - op.setDatatype(type); - - int sendoff = 0, - recvoff = 0; - - boolean sdb = false, - rdb = false; - - if(sendbuf instanceof Buffer && !(sdb = ((Buffer)sendbuf).isDirect())) - { - sendoff = type.getOffset(sendbuf); - sendbuf = ((Buffer)sendbuf).array(); - } - - if(recvbuf instanceof Buffer && !(rdb = ((Buffer)recvbuf).isDirect())) - { - recvoff = type.getOffset(recvbuf); - recvbuf = ((Buffer)recvbuf).array(); - } - - reduce(handle, sendbuf, sdb, sendoff, recvbuf, rdb, recvoff, - count, type.handle, type.baseType, op, op.handle, root); -} - -/** - * Combine elements in input buffer of each process using the reduce - * operation, and return the combined value in the output buffer of the - * root process. - *

Java binding of the MPI operation {@code MPI_REDUCE} - * using {@code MPI_IN_PLACE} instead of the send buffer. - * @param buf send/receive buffer - * @param count number of items in buffer - * @param type data type of each item in buffer - * @param op reduce operation - * @param root rank of root process - * @throws MPIException - */ -public final void reduce(Object buf, int count, Datatype type, Op op, int root) - throws MPIException -{ - MPI.check(); - op.setDatatype(type); - int off = 0; - boolean db = false; - - if(buf instanceof Buffer && !(db = ((Buffer)buf).isDirect())) - { - off = type.getOffset(buf); - buf = ((Buffer)buf).array(); - } - - reduce(handle, null, false, 0, buf, db, off, count, - type.handle, type.baseType, op, op.handle, root); -} - -private native void reduce( - long comm, Object sendbuf, boolean sdb, int sendoff, - Object recvbuf, boolean rdb, int recvoff, int count, - long type, int baseType, Op jOp, long hOp, int root) - throws MPIException; - -/** - * Combine elements in input buffer of each process using the reduce - * operation, and return the combined value in the output buffer of the - * root process. - *

Java binding of the MPI operation {@code MPI_IREDUCE}. - * @param sendbuf send buffer - * @param recvbuf receive buffer - * @param count number of items in send buffer - * @param type data type of each item in send buffer - * @param op reduce operation - * @param root rank of root process - * @return communication request - * @throws MPIException - */ -public final Request iReduce(Buffer sendbuf, Buffer recvbuf, - int count, Datatype type, Op op, int root) - throws MPIException -{ - MPI.check(); - assertDirectBuffer(sendbuf, recvbuf); - op.setDatatype(type); - - return new Request(iReduce( - handle, sendbuf, recvbuf, count, - type.handle, type.baseType, op, op.handle, root)); -} - -/** - * Combine elements in input buffer of each process using the reduce - * operation, and return the combined value in the output buffer of the - * root process. - *

Java binding of the MPI operation {@code MPI_IREDUCE} - * using {@code MPI_IN_PLACE} instead of the send buffer. - * @param buf send/receive buffer - * @param count number of items in buffer - * @param type data type of each item in buffer - * @param op reduce operation - * @param root rank of root process - * @return communication request - * @throws MPIException - */ -public final Request iReduce(Buffer buf, int count, - Datatype type, Op op, int root) - throws MPIException -{ - MPI.check(); - assertDirectBuffer(buf); - op.setDatatype(type); - - return new Request(iReduce( - handle, null, buf, count, - type.handle, type.baseType, op, op.handle, root)); -} - -private native long iReduce( - long comm, Buffer sendbuf, Buffer recvbuf, int count, - long type, int baseType, Op jOp, long hOp, int root) - throws MPIException; - -/** - * Same as {@code reduce} except that the result appears in receive - * buffer of all process in the group. - *

Java binding of the MPI operation {@code MPI_ALLREDUCE}. - * @param sendbuf send buffer - * @param recvbuf receive buffer - * @param count number of items in send buffer - * @param type data type of each item in send buffer - * @param op reduce operation - * @throws MPIException - */ -public final void allReduce(Object sendbuf, Object recvbuf, - int count, Datatype type, Op op) - throws MPIException -{ - MPI.check(); - op.setDatatype(type); - - int sendoff = 0, - recvoff = 0; - - boolean sdb = false, - rdb = false; - - if(sendbuf instanceof Buffer && !(sdb = ((Buffer)sendbuf).isDirect())) - { - sendoff = type.getOffset(sendbuf); - sendbuf = ((Buffer)sendbuf).array(); - } - - if(recvbuf instanceof Buffer && !(rdb = ((Buffer)recvbuf).isDirect())) - { - recvoff = type.getOffset(recvbuf); - recvbuf = ((Buffer)recvbuf).array(); - } - - allReduce(handle, sendbuf, sdb, sendoff, recvbuf, rdb, recvoff, - count, type.handle, type.baseType, op, op.handle); -} - -/** - * Same as {@code reduce} except that the result appears in receive - * buffer of all process in the group. - *

Java binding of the MPI operation {@code MPI_ALLREDUCE} - * using {@code MPI_IN_PLACE} instead of the send buffer. - * @param buf receive buffer - * @param count number of items in send buffer - * @param type data type of each item in send buffer - * @param op reduce operation - * @throws MPIException - */ -public final void allReduce(Object buf, int count, Datatype type, Op op) - throws MPIException -{ - MPI.check(); - op.setDatatype(type); - int off = 0; - boolean db = false; - - if(buf instanceof Buffer && !(db = ((Buffer)buf).isDirect())) - { - off = type.getOffset(buf); - buf = ((Buffer)buf).array(); - } - - allReduce(handle, null, false, 0, buf, db, off, count, - type.handle, type.baseType, op, op.handle); -} - -private native void allReduce( - long comm, Object sendbuf, boolean sdb, int sendoff, - Object recvbuf, boolean rdb, int recvoff, int count, - long type, int baseType, Op jOp, long hOp) throws MPIException; - -/** - * Same as {@code reduce} except that the result appears in receive - * buffer of all process in the group. - *

Java binding of the MPI operation {@code MPI_IALLREDUCE}. - * @param sendbuf send buffer - * @param recvbuf receive buffer - * @param count number of items in send buffer - * @param type data type of each item in send buffer - * @param op reduce operation - * @return communication request - * @throws MPIException - */ -public final Request iAllReduce(Buffer sendbuf, Buffer recvbuf, - int count, Datatype type, Op op) - throws MPIException -{ - MPI.check(); - assertDirectBuffer(sendbuf, recvbuf); - op.setDatatype(type); - - return new Request(iAllReduce(handle, sendbuf, recvbuf, count, - type.handle, type.baseType, op, op.handle)); -} - -/** - * Same as {@code reduce} except that the result appears in receive - * buffer of all process in the group. - *

Java binding of the MPI operation {@code MPI_IALLREDUCE} - * using {@code MPI_IN_PLACE} instead of the send buffer. - * @param buf receive buffer - * @param count number of items in send buffer - * @param type data type of each item in send buffer - * @param op reduce operation - * @return communication request - * @throws MPIException - */ -public final Request iAllReduce(Buffer buf, int count, Datatype type, Op op) - throws MPIException -{ - MPI.check(); - op.setDatatype(type); - assertDirectBuffer(buf); - - return new Request(iAllReduce( - handle, null, buf, count, - type.handle, type.baseType, op, op.handle)); -} - -private native long iAllReduce( - long comm, Buffer sendbuf, Buffer recvbuf, int count, - long type, int baseType, Op jOp, long hOp) throws MPIException; - -/** - * Combine elements in input buffer of each process using the reduce - * operation, and scatter the combined values over the output buffers - * of the processes. - *

Java binding of the MPI operation {@code MPI_REDUCE_SCATTER}. - * @param sendbuf send buffer - * @param recvbuf receive buffer - * @param recvcounts numbers of result elements distributed to each process - * @param type data type of each item in send buffer - * @param op reduce operation - * @throws MPIException - */ -public final void reduceScatter(Object sendbuf, Object recvbuf, - int[] recvcounts, Datatype type, Op op) - throws MPIException -{ - MPI.check(); - op.setDatatype(type); - - int sendoff = 0, - recvoff = 0; - - boolean sdb = false, - rdb = false; - - if(sendbuf instanceof Buffer && !(sdb = ((Buffer)sendbuf).isDirect())) - { - sendoff = type.getOffset(sendbuf); - sendbuf = ((Buffer)sendbuf).array(); - } - - if(recvbuf instanceof Buffer && !(rdb = ((Buffer)recvbuf).isDirect())) - { - recvoff = type.getOffset(recvbuf); - recvbuf = ((Buffer)recvbuf).array(); - } - - reduceScatter(handle, sendbuf, sdb, sendoff, recvbuf, rdb, recvoff, - recvcounts, type.handle, type.baseType, op, op.handle); -} - -/** - * Combine elements in input buffer of each process using the reduce - * operation, and scatter the combined values over the output buffers - * of the processes. - *

Java binding of the MPI operation {@code MPI_REDUCE_SCATTER} - * using {@code MPI_IN_PLACE} instead of the send buffer. - * @param buf receive buffer - * @param counts numbers of result elements distributed to each process - * @param type data type of each item in send buffer - * @param op reduce operation - * @throws MPIException - */ -public final void reduceScatter(Object buf, int[] counts, Datatype type, Op op) - throws MPIException -{ - MPI.check(); - op.setDatatype(type); - int off = 0; - boolean db = false; - - if(buf instanceof Buffer && !(db = ((Buffer)buf).isDirect())) - { - off = type.getOffset(buf); - buf = ((Buffer)buf).array(); - } - - reduceScatter(handle, null, false, 0, buf, db, off, counts, - type.handle, type.baseType, op, op.handle); -} - -private native void reduceScatter( - long comm, Object sendbuf, boolean sdb, int sendoff, - Object recvbuf, boolean rdb, int recvoff, int[] recvcounts, - long type, int baseType, Op jOp, long hOp) throws MPIException; - -/** - * Combine elements in input buffer of each process using the reduce - * operation, and scatter the combined values over the output buffers - * of the processes. - *

Java binding of the MPI operation {@code MPI_IREDUCE_SCATTER}. - * @param sendbuf send buffer - * @param recvbuf receive buffer - * @param recvcounts numbers of result elements distributed to each process - * @param type data type of each item in send buffer - * @param op reduce operation - * @return communication request - * @throws MPIException - */ -public final Request iReduceScatter(Buffer sendbuf, Buffer recvbuf, - int[] recvcounts, Datatype type, Op op) - throws MPIException -{ - MPI.check(); - op.setDatatype(type); - assertDirectBuffer(sendbuf, recvbuf); - - return new Request(iReduceScatter( - handle, sendbuf, recvbuf, recvcounts, - type.handle, type.baseType, op, op.handle)); -} - -/** - * Combine elements in input buffer of each process using the reduce - * operation, and scatter the combined values over the output buffers - * of the processes. - *

Java binding of the MPI operation {@code MPI_IREDUCE_SCATTER} - * using {@code MPI_IN_PLACE} instead of the send buffer. - * @param buf receive buffer - * @param counts numbers of result elements distributed to each process - * @param type data type of each item in send buffer - * @param op reduce operation - * @return communication request - * @throws MPIException - */ -public final Request iReduceScatter( - Buffer buf, int[] counts, Datatype type, Op op) - throws MPIException -{ - MPI.check(); - op.setDatatype(type); - assertDirectBuffer(buf); - - return new Request(iReduceScatter( - handle, null, buf, counts, - type.handle, type.baseType, op, op.handle)); -} - -private native long iReduceScatter( - long handle, Buffer sendbuf, Object recvbuf, int[] recvcounts, - long type, int baseType, Op jOp, long hOp) throws MPIException; - -/** - * Combine values and scatter the results. - *

Java binding of the MPI operation {@code MPI_REDUCE_SCATTER_BLOCK}. - * @param sendbuf send buffer - * @param recvbuf receive buffer - * @param recvcount element count per block - * @param type data type of each item in send buffer - * @param op reduce operation - * @throws MPIException - */ -public final void reduceScatterBlock(Object sendbuf, Object recvbuf, - int recvcount, Datatype type, Op op) - throws MPIException -{ - MPI.check(); - op.setDatatype(type); - - int sendoff = 0, - recvoff = 0; - - boolean sdb = false, - rdb = false; - - if(sendbuf instanceof Buffer && !(sdb = ((Buffer)sendbuf).isDirect())) - { - sendoff = type.getOffset(sendbuf); - sendbuf = ((Buffer)sendbuf).array(); - } - - if(recvbuf instanceof Buffer && !(rdb = ((Buffer)recvbuf).isDirect())) - { - recvoff = type.getOffset(recvbuf); - recvbuf = ((Buffer)recvbuf).array(); - } - - reduceScatterBlock(handle, sendbuf, sdb, sendoff, recvbuf, rdb, recvoff, - recvcount, type.handle, type.baseType, op, op.handle); -} - -/** - * Combine values and scatter the results. - *

Java binding of the MPI operation {@code MPI_REDUCE_SCATTER_BLOCK} - * using {@code MPI_IN_PLACE} instead of the send buffer. - * @param buf receive buffer - * @param count element count per block - * @param type data type of each item in send buffer - * @param op reduce operation - * @throws MPIException - */ -public final void reduceScatterBlock( - Object buf, int count, Datatype type, Op op) - throws MPIException -{ - MPI.check(); - op.setDatatype(type); - int off = 0; - boolean db = false; - - if(buf instanceof Buffer && !(db = ((Buffer)buf).isDirect())) - { - off = type.getOffset(buf); - buf = ((Buffer)buf).array(); - } - - reduceScatterBlock(handle, null, false, 0, buf, db, off, count, - type.handle, type.baseType, op, op.handle); -} - -private native void reduceScatterBlock( - long comm, Object sendBuf, boolean sdb, int sOffset, - Object recvBuf, boolean rdb, int rOffset, int rCount, - long type, int baseType, Op jOp, long hOp) throws MPIException; - -/** - * Combine values and scatter the results. - *

Java binding of the MPI operation {@code MPI_IREDUCE_SCATTER_BLOCK}. - * @param sendbuf send buffer - * @param recvbuf receive buffer - * @param recvcount element count per block - * @param type data type of each item in send buffer - * @param op reduce operation - * @return communication request - * @throws MPIException - */ -public final Request iReduceScatterBlock( - Buffer sendbuf, Buffer recvbuf, int recvcount, Datatype type, Op op) - throws MPIException -{ - MPI.check(); - op.setDatatype(type); - assertDirectBuffer(sendbuf, recvbuf); - - return new Request(iReduceScatterBlock( - handle, sendbuf, recvbuf, recvcount, - type.handle, type.baseType, op, op.handle)); -} - -/** - * Combine values and scatter the results. - *

Java binding of the MPI operation {@code MPI_IREDUCE_SCATTER_BLOCK} - * using {@code MPI_IN_PLACE} instead of the send buffer. - * @param buf receive buffer - * @param count element count per block - * @param type data type of each item in send buffer - * @param op reduce operation - * @return communication request - * @throws MPIException - */ -public final Request iReduceScatterBlock( - Buffer buf, int count, Datatype type, Op op) - throws MPIException -{ - MPI.check(); - op.setDatatype(type); - assertDirectBuffer(buf); - - return new Request(iReduceScatterBlock( - handle, null, buf, count, type.handle, - type.baseType, op, op.handle)); -} - -private native long iReduceScatterBlock( - long handle, Buffer sendbuf, Buffer recvbuf, int recvcount, - long type, int baseType, Op jOp, long hOp) throws MPIException; - -/** - * Apply the operation given by {@code op} element-wise to the - * elements of {@code inBuf} and {@code inOutBuf} with the result - * stored element-wise in {@code inOutBuf}. - *

Java binding of the MPI operation {@code MPI_REDUCE_LOCAL}. - * @param inBuf input buffer - * @param inOutBuf input buffer, will contain combined output - * @param count number of elements - * @param type data type of each item - * @param op reduce operation - * @throws MPIException - */ -public static void reduceLocal( - Object inBuf, Object inOutBuf, int count, Datatype type, Op op) - throws MPIException -{ - MPI.check(); - op.setDatatype(type); - - int inOff = 0, - inOutOff = 0; - - boolean idb = false, - iodb = false; - - if(inBuf instanceof Buffer && !(idb = ((Buffer)inBuf).isDirect())) - { - inOff = type.getOffset(inBuf); - inBuf = ((Buffer)inBuf).array(); - } - - if(inOutBuf instanceof Buffer && !(iodb = ((Buffer)inOutBuf).isDirect())) - { - inOutOff = type.getOffset(inOutBuf); - inOutBuf = ((Buffer)inOutBuf).array(); - } - - if(op.uf == null) - { - reduceLocal(inBuf, idb, inOff, inOutBuf, iodb, inOutOff, - count, type.handle, op.handle); - } - else - { - reduceLocalUf(inBuf, idb, inOff, inOutBuf, iodb, inOutOff, - count, type.handle, type.baseType, op, op.handle); - } -} - -private static native void reduceLocal( - Object inBuf, boolean idb, int inOff, - Object inOutBuf, boolean iodb, int inOutOff, int count, - long type, long op) throws MPIException; - -private static native void reduceLocalUf( - Object inBuf, boolean idb, int inOff, - Object inOutBuf, boolean iodb, int inOutOff, int count, - long type, int baseType, Op jOp, long hOp) throws MPIException; - -/** - * Sets the print name for the communicator. - * @param name name for the communicator - * @throws MPIException - */ -public final void setName(String name) throws MPIException -{ - MPI.check(); - setName(handle, name); -} - -private native void setName(long handle, String name) throws MPIException; - -/** - * Return the print name from the communicator. - * @return name of the communicator - * @throws MPIException - */ -public final String getName() throws MPIException -{ - MPI.check(); - return getName(handle); -} - -private native String getName(long handle) throws MPIException; +public class Comm implements Freeable, Cloneable +{ + public final static int TYPE_SHARED = 0; + protected final static int SELF = 1; + protected final static int WORLD = 2; + protected long handle; + private Request request; + + private static long nullHandle; + + static + { + init(); + } + + private static native void init(); + + protected Comm() + { + } + + protected Comm(long handle) + { + this.handle = handle; + } + + protected Comm(long[] commRequest) + { + handle = commRequest[0]; + request = new Request(commRequest[1]); + } + + protected final void setType(int type) + { + getComm(type); + } + + private native void getComm(int type); + + /** + * Duplicates this communicator. + *

Java binding of {@code MPI_COMM_DUP}. + *

It is recommended to use {@link #dup} instead of {@link #clone} + * because the last can't throw an {@link mpi.MPIException}. + * @return copy of this communicator + */ + @Override public Comm clone() + { + try + { + return dup(); + } + catch(MPIException e) + { + throw new RuntimeException(e.getMessage()); + } + } + + /** + * Duplicates this communicator. + *

Java binding of {@code MPI_COMM_DUP}. + * @return copy of this communicator + * @throws MPIException Signals that an MPI exception of some sort has occurred. + */ + public Comm dup() throws MPIException + { + MPI.check(); + return new Comm(dup(handle)); + } + + protected final native long dup(long comm) throws MPIException; + + /** + * Duplicates this communicator. + *

Java binding of {@code MPI_COMM_IDUP}. + *

The new communicator can't be used before the operation completes. + * The request object must be obtained calling {@link #getRequest}. + * @return copy of this communicator + * @throws MPIException Signals that an MPI exception of some sort has occurred. + */ + public Comm iDup() throws MPIException + { + MPI.check(); + return new Comm(iDup(handle)); + } + + protected final native long[] iDup(long comm) throws MPIException; + + /** + * Duplicates this communicator with the info object used in the call. + *

Java binding of {@code MPI_COMM_DUP_WITH_INFO}. + * @param info info object to associate with the new communicator + * @return copy of this communicator + * @throws MPIException Signals that an MPI exception of some sort has occurred. + */ + public Comm dupWithInfo(Info info) throws MPIException + { + MPI.check(); + return new Comm(dupWithInfo(handle, info.handle)); + } + + protected final native long dupWithInfo(long comm, long info) throws MPIException; + + /** + * Returns the associated request to this communicator if it was + * created using {@link #iDup}. + * @return associated request if this communicator was created + * using {@link #iDup}, or null otherwise. + */ + public final Request getRequest() + { + return request; + } + + /** + * Size of group of this communicator. + *

Java binding of the MPI operation {@code MPI_COMM_SIZE}. + * @return number of processors in the group of this communicator + * @throws MPIException Signals that an MPI exception of some sort has occurred. + */ + public final int getSize() throws MPIException + { + MPI.check(); + return getSize(handle); + } + + private native int getSize(long comm) throws MPIException; + + /** + * Rank of this process in group of this communicator. + *

Java binding of the MPI operation {@code MPI_COMM_RANK}. + * @return rank of the calling process in the group of this communicator + * @throws MPIException Signals that an MPI exception of some sort has occurred. + */ + public final int getRank() throws MPIException + { + MPI.check(); + return getRank(handle); + } + + private native int getRank(long comm) throws MPIException; + + /** + * Compare two communicators. + *

Java binding of the MPI operation {@code MPI_COMM_COMPARE}. + * @param comm1 first communicator + * @param comm2 second communicator + * @return + * {@code MPI.IDENT} results if the {@code comm1} and {@code comm2} + * are references to the same object (ie, if {@code comm1 == comm2}).
+ * {@code MPI.CONGRUENT} results if the underlying groups are identical + * but the communicators differ by context.
+ * {@code MPI.SIMILAR} results if the underlying groups are similar + * but the communicators differ by context.
+ * {@code MPI.UNEQUAL} results otherwise. + * @throws MPIException Signals that an MPI exception of some sort has occurred. + */ + public static int compare(Comm comm1, Comm comm2) throws MPIException + { + MPI.check(); + return compare(comm1.handle, comm2.handle); + } + + private static native int compare(long comm1, long comm2) throws MPIException; + + /** + * Java binding of the MPI operation {@code MPI_COMM_FREE}. + * @throws MPIException Signals that an MPI exception of some sort has occurred. + */ + @Override final public void free() throws MPIException + { + MPI.check(); + handle = free(handle); + } + + private native long free(long comm) throws MPIException; + + /** + * Test if communicator object is null (has been freed). + * Java binding of {@code MPI_COMM_NULL}. + * @return true if the comm object is null, false otherwise + */ + public final boolean isNull() + { + return handle == nullHandle; + } + + /** + * Java binding of {@code MPI_COMM_SET_INFO}. + * @param info info object + * @throws MPIException Signals that an MPI exception of some sort has occurred. + */ + public final void setInfo(Info info) throws MPIException + { + MPI.check(); + setInfo(handle, info.handle); + } + + private native void setInfo(long comm, long info) throws MPIException; + + /** + * Java binding of {@code MPI_COMM_GET_INFO}. + * @return new info object + * @throws MPIException Signals that an MPI exception of some sort has occurred. + */ + public final Info getInfo() throws MPIException + { + MPI.check(); + return new Info(getInfo(handle)); + } + + private native long getInfo(long comm) throws MPIException; + + /** + * Java binding of the MPI operation {@code MPI_COMM_DISCONNECT}. + * @throws MPIException Signals that an MPI exception of some sort has occurred. + */ + public final void disconnect() throws MPIException + { + MPI.check(); + handle = disconnect(handle); + } + + private native long disconnect(long comm) throws MPIException; + + /** + * Return group associated with a communicator. + *

Java binding of the MPI operation {@code MPI_COMM_GROUP}. + * @return group corresponding to this communicator group + * @throws MPIException Signals that an MPI exception of some sort has occurred. + */ + public final Group getGroup() throws MPIException + { + MPI.check(); + return new Group(getGroup(handle)); + } + + private native long getGroup(long comm); + + // Inter-communication + + /** + * Test if this communicator is an inter-communicator. + *

Java binding of the MPI operation {@code MPI_COMM_TEST_INTER}. + * @return {@code true} if this is an inter-communicator, + * {@code false} otherwise + * @throws MPIException Signals that an MPI exception of some sort has occurred. + */ + public final boolean isInter() throws MPIException + { + MPI.check(); + return isInter(handle); + } + + private native boolean isInter(long comm) throws MPIException; + + /** + * Create an inter-communicator. + *

+ * Java binding of the MPI operation {@code MPI_INTERCOMM_CREATE}. + *

+ * This operation is defined as a method on the "peer communicator", + * making it analogous to a {@code send} or {@code recv} communication + * with the remote group leader. + * @param localComm local intra-communicator + * @param localLeader rank of local group leader in {@code localComm} + * @param remoteLeader rank of remote group leader in this communicator + * @param tag "safe" tag + * @return new inter-communicator + * @throws MPIException Signals that an MPI exception of some sort has occurred. + */ + public final Intercomm createIntercomm(Comm localComm, int localLeader, + int remoteLeader, int tag) + throws MPIException + { + MPI.check(); + + return new Intercomm(createIntercomm(handle, localComm.handle, + localLeader, remoteLeader, tag)); + } + + private native long createIntercomm( + long comm, long localComm, int localLeader, + int remoteLeader, int tag) throws MPIException; + + // Blocking Send and Recv + + /** + * Blocking send operation. + *

Java binding of the MPI operation {@code MPI_SEND}. + * @param buf send buffer + * @param count number of items to send + * @param type datatype of each item in send buffer + * @param dest rank of destination + * @param tag message tag + * @throws MPIException Signals that an MPI exception of some sort has occurred. + */ + public final void send(Object buf, int count, Datatype type, int dest, int tag) + throws MPIException + { + MPI.check(); + int off = 0; + boolean db = false; + + if(buf instanceof Buffer && !(db = ((Buffer)buf).isDirect())) + { + off = type.getOffset(buf); + buf = ((Buffer)buf).array(); + } + + send(handle, buf, db, off, count, type.handle, type.baseType, dest, tag); + } + + private native void send( + long comm, Object buf, boolean db, int offset, int count, + long type, int baseType, int dest, int tag) throws MPIException; + + /** + * Blocking receive operation. + *

Java binding of the MPI operation {@code MPI_RECV}. + * @param buf receive buffer + * @param count number of items in receive buffer + * @param type datatype of each item in receive buffer + * @param source rank of source + * @param tag message tag + * @return status object + * @throws MPIException Signals that an MPI exception of some sort has occurred. + */ + public final Status recv(Object buf, int count, + Datatype type, int source, int tag) + throws MPIException + { + MPI.check(); + int off = 0; + boolean db = false; + + if(buf instanceof Buffer && !(db = ((Buffer)buf).isDirect())) + { + off = type.getOffset(buf); + buf = ((Buffer)buf).array(); + } + + Status status = new Status(); + + recv(handle, buf, db, off, count, + type.handle, type.baseType, source, tag, status.data); + + return status; + } + + private native void recv( + long comm, Object buf, boolean db, int offset, int count, + long type, int basetype, int source, int tag, long[] stat) + throws MPIException; + + // Send-Recv + + /** + * Execute a blocking send and receive operation. + *

Java binding of the MPI operation {@code MPI_SENDRECV}. + * @param sendbuf send buffer + * @param sendcount number of items to send + * @param sendtype datatype of each item in send buffer + * @param dest rank of destination + * @param sendtag send tag + * @param recvbuf receive buffer + * @param recvcount number of items in receive buffer + * @param recvtype datatype of each item in receive buffer + * @param source rank of source + * @param recvtag receive tag + * @return status object + * @throws MPIException Signals that an MPI exception of some sort has occurred. + * @see mpi.Comm#send(Object, int, Datatype, int, int) + * @see mpi.Comm#recv(Object, int, Datatype, int, int) + */ + public final Status sendRecv( + Object sendbuf, int sendcount, Datatype sendtype, int dest, int sendtag, + Object recvbuf, int recvcount, Datatype recvtype, int source, int recvtag) + throws MPIException + { + MPI.check(); + + int sendoff = 0, + recvoff = 0; + + boolean sdb = false, + rdb = false; + + if(sendbuf instanceof Buffer && !(sdb = ((Buffer)sendbuf).isDirect())) + { + sendoff = sendtype.getOffset(sendbuf); + sendbuf = ((Buffer)sendbuf).array(); + } + + if(recvbuf instanceof Buffer && !(rdb = ((Buffer)recvbuf).isDirect())) + { + recvoff = recvtype.getOffset(recvbuf); + recvbuf = ((Buffer)recvbuf).array(); + } + + Status status = new Status(); + + sendRecv(handle, sendbuf, sdb, sendoff, sendcount, + sendtype.handle, sendtype.baseType, dest, sendtag, + recvbuf, rdb, recvoff, recvcount, + recvtype.handle, recvtype.baseType, source, recvtag, status.data); + + return status; + } + + private native void sendRecv( + long comm, Object sbuf, boolean sdb, int soffset, int scount, + long sType, int sBaseType, int dest, int stag, + Object rbuf, boolean rdb, int roffset, int rcount, + long rType, int rBaseType, int source, int rtag, + long[] stat) throws MPIException; + + /** + * Execute a blocking send and receive operation, + * receiving message into send buffer. + *

Java binding of the MPI operation {@code MPI_SENDRECV_REPLACE}. + * @param buf buffer + * @param count number of items to send + * @param type datatype of each item in buffer + * @param dest rank of destination + * @param sendtag send tag + * @param source rank of source + * @param recvtag receive tag + * @return status object + * @throws MPIException Signals that an MPI exception of some sort has occurred. + * @see mpi.Comm#send(Object, int, Datatype, int, int) + * @see mpi.Comm#recv(Object, int, Datatype, int, int) + */ + public final Status sendRecvReplace( + Object buf, int count, Datatype type, + int dest, int sendtag, int source, int recvtag) + throws MPIException + { + MPI.check(); + int off = 0; + boolean db = false; + + if(buf instanceof Buffer && !(db = ((Buffer)buf).isDirect())) + { + off = type.getOffset(buf); + buf = ((Buffer)buf).array(); + } + + Status status = new Status(); + + sendRecvReplace(handle, buf, db, off, count, type.handle, type.baseType, + dest, sendtag, source, recvtag, status.data); + + return status; + } + + private native void sendRecvReplace( + long comm, Object buf, boolean db, int offset, int count, + long type, int baseType, int dest, int stag, + int source, int rtag, long[] stat) throws MPIException; + + // Communication Modes + + /** + * Send in buffered mode. + *

Java binding of the MPI operation {@code MPI_BSEND}. + * @param buf send buffer + * @param count number of items to send + * @param type datatype of each item in send buffer + * @param dest rank of destination + * @param tag message tag + * @throws MPIException Signals that an MPI exception of some sort has occurred. + * @see mpi.Comm#send(Object, int, Datatype, int, int) + */ + public final void bSend(Object buf, int count, Datatype type, int dest, int tag) + throws MPIException + { + MPI.check(); + int off = 0; + boolean db = false; + + if(buf instanceof Buffer && !(db = ((Buffer)buf).isDirect())) + { + off = type.getOffset(buf); + buf = ((Buffer)buf).array(); + } + + bSend(handle, buf, db, off, count, type.handle, type.baseType, dest, tag); + } + + private native void bSend( + long comm, Object buf, boolean db, int offset, int count, + long type, int baseType, int dest, int tag) throws MPIException; + + /** + * Send in synchronous mode. + *

Java binding of the MPI operation {@code MPI_SSEND}. + * @param buf send buffer + * @param count number of items to send + * @param type datatype of each item in send buffer + * @param dest rank of destination + * @param tag message tag + * @throws MPIException Signals that an MPI exception of some sort has occurred. + * @see mpi.Comm#send(Object, int, Datatype, int, int) + */ + public final void sSend(Object buf, int count, Datatype type, int dest, int tag) + throws MPIException + { + MPI.check(); + int off = 0; + boolean db = false; + + if(buf instanceof Buffer && !(db = ((Buffer)buf).isDirect())) + { + off = type.getOffset(buf); + buf = ((Buffer)buf).array(); + } + + sSend(handle, buf, db, off, count, type.handle, type.baseType, dest, tag); + } + + private native void sSend( + long comm, Object buf, boolean db, int offset, int count, + long type, int baseType, int dest, int tag) throws MPIException; + + /** + * Send in ready mode. + *

Java binding of the MPI operation {@code MPI_RSEND}. + * @param buf send buffer + * @param count number of items to send + * @param type datatype of each item in send buffer + * @param dest rank of destination + * @param tag message tag + * @throws MPIException Signals that an MPI exception of some sort has occurred. + * @see mpi.Comm#send(Object, int, Datatype, int, int) + */ + public final void rSend(Object buf, int count, Datatype type, int dest, int tag) + throws MPIException + { + MPI.check(); + int off = 0; + boolean db = false; + + if(buf instanceof Buffer && !(db = ((Buffer)buf).isDirect())) + { + off = type.getOffset(buf); + buf = ((Buffer)buf).array(); + } + + rSend(handle, buf, db, off, count, type.handle, type.baseType, dest, tag); + } + + private native void rSend( + long comm, Object buf, boolean db, int offset, int count, + long type, int baseType, int dest, int tag) throws MPIException; + + // Nonblocking communication + + /** + * Start a standard mode, nonblocking send. + *

Java binding of the MPI operation {@code MPI_ISEND}. + * @param buf send buffer + * @param count number of items to send + * @param type datatype of each item in send buffer + * @param dest rank of destination + * @param tag message tag + * @return communication request + * @throws MPIException Signals that an MPI exception of some sort has occurred. + * @see mpi.Comm#send(Object, int, Datatype, int, int) + */ + public final Request iSend(Buffer buf, int count, + Datatype type, int dest, int tag) + throws MPIException + { + MPI.check(); + assertDirectBuffer(buf); + Request req = new Request(iSend(handle, buf, count, type.handle, dest, tag)); + req.addSendBufRef(buf); + return req; + } + + private native long iSend( + long comm, Buffer buf, int count, long type, int dest, int tag) + throws MPIException; + + /** + * Start a buffered mode, nonblocking send. + *

Java binding of the MPI operation MPI_IBSEND. + * @param buf send buffer + * @param count number of items to send + * @param type datatype of each item in send buffer + * @param dest rank of destination + * @param tag message tag + * @return communication request + * @throws MPIException Signals that an MPI exception of some sort has occurred. + * @see mpi.Comm#send(Object, int, Datatype, int, int) + */ + public final Request ibSend(Buffer buf, int count, + Datatype type, int dest, int tag) + throws MPIException + { + MPI.check(); + assertDirectBuffer(buf); + Request req = new Request(ibSend(handle, buf, count, type.handle, dest, tag)); + req.addSendBufRef(buf); + return req; + } + + private native long ibSend( + long comm, Buffer buf, int count, long type, int dest, int tag) + throws MPIException; + + /** + * Start a synchronous mode, nonblocking send. + *

Java binding of the MPI operation {@code MPI_ISSEND}. + * @param buf send buffer + * @param count number of items to send + * @param type datatype of each item in send buffer + * @param dest rank of destination + * @param tag message tag + * @return communication request + * @throws MPIException Signals that an MPI exception of some sort has occurred. + * @see mpi.Comm#send(Object, int, Datatype, int, int) + */ + public final Request isSend(Buffer buf, int count, + Datatype type, int dest, int tag) + throws MPIException + { + MPI.check(); + assertDirectBuffer(buf); + Request req = new Request(isSend(handle, buf, count, type.handle, dest, tag)); + req.addSendBufRef(buf); + return req; + } + + private native long isSend( + long comm, Buffer buf, int count, long type, int dest, int tag) + throws MPIException; + + /** + * Start a ready mode, nonblocking send. + *

Java binding of the MPI operation {@code MPI_IRSEND}. + * @param buf send buffer + * @param count number of items to send + * @param type datatype of each item in send buffer + * @param dest rank of destination + * @param tag message tag + * @return communication request + * @throws MPIException Signals that an MPI exception of some sort has occurred. + * @see mpi.Comm#send(Object, int, Datatype, int, int) + */ + public final Request irSend(Buffer buf, int count, + Datatype type, int dest, int tag) + throws MPIException + { + MPI.check(); + assertDirectBuffer(buf); + Request req = new Request(irSend(handle, buf, count, type.handle, dest, tag)); + req.addSendBufRef(buf); + return req; + } + + private native long irSend( + long comm, Buffer buf, int count, long type, int dest, int tag) + throws MPIException; + + /** + * Start a nonblocking receive. + *

Java binding of the MPI operation {@code MPI_IRECV}. + * @param buf receive buffer + * @param count number of items in receive buffer + * @param type datatype of each item in receive buffer + * @param source rank of source + * @param tag message tag + * @return communication request + * @throws MPIException Signals that an MPI exception of some sort has occurred. + * @see mpi.Comm#recv(Object, int, Datatype, int, int) + */ + public final Request iRecv(Buffer buf, int count, + Datatype type, int source, int tag) + throws MPIException + { + MPI.check(); + assertDirectBuffer(buf); + Request req = new Request(iRecv(handle, buf, count, type.handle, source, tag)); + req.addRecvBufRef(buf); + return req; + } + + private native long iRecv( + long comm, Buffer buf, int count, long type, int source, int tag) + throws MPIException; + + + // Persistent communication requests + + /** + * Creates a persistent communication request for a standard mode send. + *

Java binding of the MPI operation {@code MPI_SEND_INIT}. + * @param buf send buffer + * @param count number of items to send + * @param type datatype of each item in send buffer + * @param dest rank of destination + * @param tag message tag + * @return persistent communication request + * @throws MPIException Signals that an MPI exception of some sort has occurred. + * @see mpi.Comm#send(Object, int, Datatype, int, int) + */ + public final Prequest sendInit(Buffer buf, int count, + Datatype type, int dest, int tag) + throws MPIException + { + MPI.check(); + assertDirectBuffer(buf); + Prequest preq = new Prequest(sendInit(handle, buf, count, type.handle, dest, tag)); + preq.addSendBufRef(buf); + return preq; + } + + private native long sendInit( + long comm, Buffer buf, int count, long type, int dest, int tag) + throws MPIException; + + /** + * Creates a persistent communication request for a buffered mode send. + *

Java binding of the MPI operation {@code MPI_BSEND_INIT}. + * @param buf send buffer + * @param count number of items to send + * @param type datatype of each item in send buffer + * @param dest rank of destination + * @param tag message tag + * @return persistent communication request + * @throws MPIException Signals that an MPI exception of some sort has occurred. + * @see mpi.Comm#send(Object, int, Datatype, int, int) + */ + public final Prequest bSendInit(Buffer buf, int count, + Datatype type, int dest, int tag) + throws MPIException + { + MPI.check(); + assertDirectBuffer(buf); + Prequest preq = new Prequest(bSendInit(handle, buf, count, type.handle, dest, tag)); + preq.addSendBufRef(buf); + return preq; + } + + private native long bSendInit( + long comm, Buffer buf, int count, long type, int dest, int tag) + throws MPIException; + + /** + * Creates a persistent communication request for a synchronous mode send. + *

Java binding of the MPI operation {@code MPI_SSEND_INIT}. + * @param buf send buffer + * @param count number of items to send + * @param type datatype of each item in send buffer + * @param dest rank of destination + * @param tag message tag + * @return persistent communication request + * @throws MPIException Signals that an MPI exception of some sort has occurred. + * @see mpi.Comm#send(Object, int, Datatype, int, int) + */ + public final Prequest sSendInit(Buffer buf, int count, + Datatype type, int dest, int tag) + throws MPIException + { + MPI.check(); + assertDirectBuffer(buf); + Prequest preq = new Prequest(sSendInit(handle, buf, count, type.handle, dest, tag)); + preq.addSendBufRef(buf); + return preq; + } + + private native long sSendInit( + long comm, Buffer buf, int count, long type, int dest, int tag) + throws MPIException; + + /** + * Creates a persistent communication request for a ready mode send. + *

Java binding of the MPI operation {@code MPI_RSEND_INIT}. + * @param buf send buffer + * @param count number of items to send + * @param type datatype of each item in send buffer + * @param dest rank of destination + * @param tag message tag + * @return persistent communication request + * @throws MPIException Signals that an MPI exception of some sort has occurred. + * @see mpi.Comm#send(Object, int, Datatype, int, int) + */ + public final Prequest rSendInit(Buffer buf, int count, + Datatype type, int dest, int tag) + throws MPIException + { + MPI.check(); + assertDirectBuffer(buf); + Prequest preq = new Prequest(rSendInit(handle, buf, count, type.handle, dest, tag)); + preq.addSendBufRef(buf); + return preq; + } + + private native long rSendInit( + long comm, Buffer buf, int count, long type, int dest, int tag) + throws MPIException; + + /** + * Creates a persistent communication request for a receive operation. + *

Java binding of the MPI operation {@code MPI_RECV_INIT}. + * @param buf receive buffer + * @param count number of items in receive buffer + * @param type datatype of each item in receive buffer + * @param source rank of source + * @param tag message tag + * @return communication request + * @throws MPIException Signals that an MPI exception of some sort has occurred. + * @see mpi.Comm#recv(Object, int, Datatype, int, int) + */ + public final Prequest recvInit(Buffer buf, int count, + Datatype type, int source, int tag) + throws MPIException + { + MPI.check(); + assertDirectBuffer(buf); + Prequest preq = new Prequest(recvInit(handle, buf, count, type.handle, source, tag)); + preq.addRecvBufRef(buf); + return preq; + } + + private native long recvInit( + long comm, Buffer buf, int count, long type, int source, int tag) + throws MPIException; + + // Pack and Unpack + + /** + * Packs message in send buffer {@code inbuf} into space specified in + * {@code outbuf}. + *

+ * Java binding of the MPI operation {@code MPI_PACK}. + *

+ * The return value is the output value of {@code position} - the + * inital value incremented by the number of bytes written. + * @param inbuf input buffer + * @param incount number of items in input buffer + * @param type datatype of each item in input buffer + * @param outbuf output buffer + * @param position initial position in output buffer + * @return final position in output buffer + * @throws MPIException Signals that an MPI exception of some sort has occurred. + */ + public final int pack(Object inbuf, int incount, Datatype type, + byte[] outbuf, int position) + throws MPIException + { + MPI.check(); + int offset = 0; + boolean indb = false; + + if(inbuf instanceof Buffer && !(indb = ((Buffer)inbuf).isDirect())) + { + offset = type.getOffset(inbuf); + inbuf = ((Buffer)inbuf).array(); + } + + return pack(handle, inbuf, indb, offset, incount, + type.handle, outbuf, position); + } + + private native int pack( + long comm, Object inbuf, boolean indb, int offset, int incount, + long type, byte[] outbuf, int position) throws MPIException; + + /** + * Unpacks message in receive buffer {@code outbuf} into space specified in + * {@code inbuf}. + *

+ * Java binding of the MPI operation {@code MPI_UNPACK}. + *

+ * The return value is the output value of {@code position} - the + * inital value incremented by the number of bytes read. + * @param inbuf input buffer + * @param position initial position in input buffer + * @param outbuf output buffer + * @param outcount number of items in output buffer + * @param type datatype of each item in output buffer + * @return final position in input buffer + * @throws MPIException Signals that an MPI exception of some sort has occurred. + */ + public final int unpack(byte[] inbuf, int position, + Object outbuf, int outcount, Datatype type) + throws MPIException + { + MPI.check(); + int offset = 0; + boolean outdb = false; + + if(outbuf instanceof Buffer && !(outdb = ((Buffer)outbuf).isDirect())) + { + offset = type.getOffset(outbuf); + outbuf = ((Buffer)outbuf).array(); + } + + return unpack(handle, inbuf, position, outbuf, outdb, + offset, outcount, type.handle); + } + + private native int unpack( + long comm, byte[] inbuf, int position, Object outbuf, boolean outdb, + int offset, int outcount, long type) throws MPIException; + + /** + * Returns an upper bound on the increment of {@code position} effected + * by {@code pack}. + *

Java binding of the MPI operation {@code MPI_PACK_SIZE}. + * @param incount number of items in input buffer + * @param type datatype of each item in input buffer + * @return upper bound on size of packed message + * @throws MPIException Signals that an MPI exception of some sort has occurred. + */ + public final int packSize(int incount, Datatype type) throws MPIException + { + MPI.check(); + return packSize(handle, incount, type.handle); + } + + private native int packSize(long comm, int incount, long type) + throws MPIException; + + // Probe and Cancel + + /** + * Check if there is an incoming message matching the pattern specified. + *

Java binding of the MPI operation {@code MPI_IPROBE}. + *

If such a message is currently available, a status object similar + * to the return value of a matching {@code recv} operation is returned. + * @param source rank of source + * @param tag message tag + * @return status object if such a message is currently available, + * {@code null} otherwise. + * @throws MPIException Signals that an MPI exception of some sort has occurred. + */ + public final Status iProbe(int source, int tag) throws MPIException + { + MPI.check(); + return iProbe(handle, source, tag); + } + + private native Status iProbe(long comm, int source, int tag) + throws MPIException; + + /** + * Wait until there is an incoming message matching the pattern specified. + *

Java binding of the MPI operation {@code MPI_PROBE}. + *

Returns a status object similar to the return value of a matching + * {@code recv} operation. + * @param source rank of source + * @param tag message tag + * @return status object + * @throws MPIException Signals that an MPI exception of some sort has occurred. + */ + public final Status probe(int source, int tag) throws MPIException + { + MPI.check(); + Status status = new Status(); + probe(handle, source, tag, status.data); + return status; + } + + private native void probe(long comm, int source, int tag, long[] stat) + throws MPIException; + + // Caching + + /** + * Create a new attribute key. + *

Java binding of the MPI operation {@code MPI_COMM_CREATE_KEYVAL}. + * @return attribute key for future access + * @throws MPIException Signals that an MPI exception of some sort has occurred. + */ + public static int createKeyval() throws MPIException + { + MPI.check(); + return createKeyval_jni(); + } + + private static native int createKeyval_jni() throws MPIException; + + /** + * Frees an attribute key for communicators. + *

Java binding of the MPI operation {@code MPI_COMM_FREE_KEYVAL}. + * @param keyval attribute key + * @throws MPIException Signals that an MPI exception of some sort has occurred. + */ + public static void freeKeyval(int keyval) throws MPIException + { + MPI.check(); + freeKeyval_jni(keyval); + } + + private static native void freeKeyval_jni(int keyval) throws MPIException; + + /** + * Stores attribute value associated with a key. + *

Java binding of the MPI operation {@code MPI_COMM_SET_ATTR}. + * @param keyval attribute key + * @param value attribute value + * @throws MPIException Signals that an MPI exception of some sort has occurred. + */ + public final void setAttr(int keyval, Object value) throws MPIException + { + MPI.check(); + setAttr(handle, keyval, MPI.attrSet(value)); + } + + private native void setAttr(long comm, int keyval, byte[] value) + throws MPIException; + + /** + * Retrieves attribute value by key. + *

Java binding of the MPI operation {@code MPI_COMM_GET_ATTR}. + * @param keyval attribute key + * @return attribute value or null if no attribute is associated with the key. + * @throws MPIException Signals that an MPI exception of some sort has occurred. + */ + public final Object getAttr(int keyval) throws MPIException + { + MPI.check(); + + if( keyval == MPI.TAG_UB || + keyval == MPI.HOST || + keyval == MPI.IO || + keyval == MPI.APPNUM || + keyval == MPI.LASTUSEDCODE || + keyval == MPI.UNIVERSE_SIZE) + { + return getAttr_predefined(handle, keyval); + } + else if(keyval == MPI.WTIME_IS_GLOBAL) + { + Integer value = (Integer)getAttr_predefined(handle, keyval); + return value==null ? null : value.intValue() != 0; + } + else + { + return MPI.attrGet(getAttr(handle, keyval)); + } + } + + private native Object getAttr_predefined(long comm, int keyval) + throws MPIException; + + private native byte[] getAttr(long comm, int keyval) throws MPIException; + + /** + * Deletes an attribute value associated with a key on a communicator. + *

Java binding of the MPI operation {@code MPI_COMM_DELETE_ATTR}. + * @param keyval attribute key + * @throws MPIException Signals that an MPI exception of some sort has occurred. + */ + public final void deleteAttr(int keyval) throws MPIException + { + MPI.check(); + deleteAttr(handle, keyval); + } + + private native void deleteAttr(long comm, int keyval) throws MPIException; + + // Process Topologies + + /** + * Returns the type of topology associated with the communicator. + *

Java binding of the MPI operation {@code MPI_TOPO_TEST}. + *

The return value will be one of {@code MPI.GRAPH}, {@code MPI.CART} + * or {@code MPI.UNDEFINED}. + * @return topology type of communicator + * @throws MPIException Signals that an MPI exception of some sort has occurred. + */ + public final int getTopology() throws MPIException + { + MPI.check(); + return getTopology(handle); + } + + private native int getTopology(long comm) throws MPIException; + + // Enviromental Management + + /** + * Abort MPI. + *

Java binding of the MPI operation {@code MPI_ABORT}. + * @param errorcode error code for Unix or POSIX environments + * @throws MPIException Signals that an MPI exception of some sort has occurred. + */ + public final void abort(int errorcode) throws MPIException + { + MPI.check(); + abort(handle, errorcode); + } + + private native void abort(long comm, int errorcode) throws MPIException; + + // Error handler + + /** + * Associates a new error handler with communicator at the calling process. + *

Java binding of the MPI operation {@code MPI_ERRHANDLER_SET}. + * @param errhandler new MPI error handler for communicator + * @throws MPIException Signals that an MPI exception of some sort has occurred. + */ + public final void setErrhandler(Errhandler errhandler) throws MPIException + { + MPI.check(); + setErrhandler(handle, errhandler.handle); + } + + private native void setErrhandler(long comm, long errhandler) + throws MPIException; + + /** + * Returns the error handler currently associated with the communicator. + *

Java binding of the MPI operation {@code MPI_ERRHANDLER_GET}. + * @return MPI error handler currently associated with communicator + * @throws MPIException Signals that an MPI exception of some sort has occurred. + */ + public final Errhandler getErrhandler() throws MPIException + { + MPI.check(); + return new Errhandler(getErrhandler(handle)); + } + + private native long getErrhandler(long comm); + + // Collective Communication + + /** + * A call to {@code barrier} blocks the caller until all process + * in the group have called it. + *

Java binding of the MPI operation {@code MPI_BARRIER}. + * @throws MPIException Signals that an MPI exception of some sort has occurred. + */ + public final void barrier() throws MPIException + { + MPI.check(); + barrier(handle); + } + + private native void barrier(long comm) throws MPIException; + + /** + * Nonblocking barrier sinchronization. + *

Java binding of the MPI operation {@code MPI_IBARRIER}. + * @return communication request + * @throws MPIException Signals that an MPI exception of some sort has occurred. + */ + public final Request iBarrier() throws MPIException + { + MPI.check(); + return new Request(iBarrier(handle)); + } + + private native long iBarrier(long comm) throws MPIException; + + /** + * Broadcast a message from the process with rank {@code root} + * to all processes of the group. + *

Java binding of the MPI operation {@code MPI_BCAST}. + * @param buf buffer + * @param count number of items in buffer + * @param type datatype of each item in buffer + * @param root rank of broadcast root + * @throws MPIException Signals that an MPI exception of some sort has occurred. + */ + public final void bcast(Object buf, int count, Datatype type, int root) + throws MPIException + { + MPI.check(); + int off = 0; + boolean db = false; + + if(buf instanceof Buffer && !(db = ((Buffer)buf).isDirect())) + { + off = type.getOffset(buf); + buf = ((Buffer)buf).array(); + } + + bcast(handle, buf, db, off, count, type.handle, type.baseType, root); + } + + private native void bcast( + long comm, Object buf, boolean db, int offset, int count, + long type, int basetype, int root) throws MPIException; + + /** + * Broadcast a message from the process with rank {@code root} + * to all processes of the group. + *

Java binding of the MPI operation {@code MPI_IBCAST}. + * @param buf buffer + * @param count number of items in buffer + * @param type datatype of each item in buffer + * @param root rank of broadcast root + * @return communication request + * @throws MPIException Signals that an MPI exception of some sort has occurred. + */ + public final Request iBcast(Buffer buf, int count, Datatype type, int root) + throws MPIException + { + MPI.check(); + assertDirectBuffer(buf); + Request req = new Request(iBcast(handle, buf, count, type.handle, root)); + req.addSendBufRef(buf); + return req; + } + + private native long iBcast( + long comm, Buffer buf, int count, long type, int root) + throws MPIException; + + /** + * Each process sends the contents of its send buffer to the root process. + *

Java binding of the MPI operation {@code MPI_GATHER}. + * @param sendbuf send buffer + * @param sendcount number of items to send + * @param sendtype datatype of each item in send buffer + * @param recvbuf receive buffer + * @param recvcount number of items to receive + * @param recvtype datatype of each item in receive buffer + * @param root rank of receiving process + * @throws MPIException Signals that an MPI exception of some sort has occurred. + */ + public final void gather( + Object sendbuf, int sendcount, Datatype sendtype, + Object recvbuf, int recvcount, Datatype recvtype, int root) + throws MPIException + { + MPI.check(); + + int sendoff = 0, + recvoff = 0; + + boolean sdb = false, + rdb = false; + + if(sendbuf instanceof Buffer && !(sdb = ((Buffer)sendbuf).isDirect())) + { + sendoff = sendtype.getOffset(sendbuf); + sendbuf = ((Buffer)sendbuf).array(); + } + + if(recvbuf instanceof Buffer && !(rdb = ((Buffer)recvbuf).isDirect())) + { + recvoff = recvtype.getOffset(recvbuf); + recvbuf = ((Buffer)recvbuf).array(); + } + + gather(handle, sendbuf, sdb, sendoff, sendcount, + sendtype.handle, sendtype.baseType, + recvbuf, rdb, recvoff, recvcount, + recvtype.handle, recvtype.baseType, root); + } + + /** + * Each process sends the contents of its send buffer to the root process. + *

Java binding of the MPI operation {@code MPI_GATHER} + * using {@code MPI_IN_PLACE} instead of the send buffer. + * The buffer is used by the root process to receive data, + * and it is used by the non-root processes to send data. + * @param buf buffer + * @param count number of items to send/receive + * @param type datatype of each item in buffer + * @param root rank of receiving process + * @throws MPIException Signals that an MPI exception of some sort has occurred. + */ + public final void gather(Object buf, int count, Datatype type, int root) + throws MPIException + { + MPI.check(); + int off = 0; + boolean db = false; + + if(buf instanceof Buffer && !(db = ((Buffer)buf).isDirect())) + { + off = type.getOffset(buf); + buf = ((Buffer)buf).array(); + } + + gather(handle, null, false, 0, 0, 0, 0, + buf, db, off, count, type.handle, type.baseType, root); + } + + private native void gather( + long comm, Object sendBuf, boolean sdb, int sendOff, int sendCount, + long sendType, int sendBaseType, + Object recvBuf, boolean rdb, int recvOff, int recvCount, + long recvType, int recvBaseType, int root) + throws MPIException; + + /** + * Each process sends the contents of its send buffer to the root process. + *

Java binding of the MPI operation {@code MPI_IGATHER}. + * @param sendbuf send buffer + * @param sendcount number of items to send + * @param sendtype datatype of each item in send buffer + * @param recvbuf receive buffer + * @param recvcount number of items to receive + * @param recvtype datatype of each item in receive buffer + * @param root rank of receiving process + * @return communication request + * @throws MPIException Signals that an MPI exception of some sort has occurred. + */ + public final Request iGather( + Buffer sendbuf, int sendcount, Datatype sendtype, + Buffer recvbuf, int recvcount, Datatype recvtype, int root) + throws MPIException + { + MPI.check(); + assertDirectBuffer(sendbuf, recvbuf); + Request req = new Request(iGather(handle, sendbuf, sendcount, sendtype.handle, + recvbuf, recvcount, recvtype.handle, root)); + req.addSendBufRef(sendbuf); + req.addRecvBufRef(recvbuf); + return req; + } + + /** + * Each process sends the contents of its send buffer to the root process. + *

Java binding of the MPI operation {@code MPI_IGATHER} + * using {@code MPI_IN_PLACE} instead of the send buffer. + * The buffer is used by the root process to receive data, + * and it is used by the non-root processes to send data. + * @param buf buffer + * @param count number of items to send/receive + * @param type datatype of each item in buffer + * @param root rank of receiving process + * @return communication request + * @throws MPIException Signals that an MPI exception of some sort has occurred. + */ + public final Request iGather(Buffer buf, int count, Datatype type, int root) + throws MPIException + { + MPI.check(); + assertDirectBuffer(buf); + Request req = new Request(iGather(handle, null, 0, 0, + buf, count, type.handle, root)); + req.addRecvBufRef(buf); + return req; + } + + private native long iGather( + long comm, Buffer sendbuf, int sendcount, long sendtype, + Buffer recvbuf, int recvcount, long recvtype, + int root) throws MPIException; + + /** + * Extends functionality of {@code gather} by allowing varying + * counts of data from each process. + *

Java binding of the MPI operation {@code MPI_GATHERV}. + * @param sendbuf send buffer + * @param sendcount number of items to send + * @param sendtype datatype of each item in send buffer + * @param recvbuf receive buffer + * @param recvcount number of elements received from each process + * @param displs displacements at which to place incoming data + * @param recvtype datatype of each item in receive buffer + * @param root rank of receiving process + * @throws MPIException Signals that an MPI exception of some sort has occurred. + */ + public final void gatherv(Object sendbuf, int sendcount, Datatype sendtype, + Object recvbuf, int[] recvcount, int[] displs, + Datatype recvtype, int root) + throws MPIException + { + MPI.check(); + + int sendoff = 0, + recvoff = 0; + + boolean sdb = false, + rdb = false; + + if(sendbuf instanceof Buffer && !(sdb = ((Buffer)sendbuf).isDirect())) + { + sendoff = sendtype.getOffset(sendbuf); + sendbuf = ((Buffer)sendbuf).array(); + } + + if(recvbuf instanceof Buffer && !(rdb = ((Buffer)recvbuf).isDirect())) + { + recvoff = recvtype.getOffset(recvbuf); + recvbuf = ((Buffer)recvbuf).array(); + } + + gatherv(handle, sendbuf, sdb, sendoff, sendcount, + sendtype.handle, sendtype.baseType, + recvbuf, rdb, recvoff, recvcount, displs, + recvtype.handle, recvtype.baseType, root); + } + + /** + * Extends functionality of {@code gather} by allowing varying + * counts of data from each process. + *

Java binding of the MPI operation {@code MPI_GATHERV} using + * {@code MPI_IN_PLACE} instead of the send buffer in the root process. + * This method must be used in the root process. + * @param recvbuf receive buffer + * @param recvcount number of elements received from each process + * @param displs displacements at which to place incoming data + * @param recvtype datatype of each item in receive buffer + * @param root rank of receiving process + * @throws MPIException Signals that an MPI exception of some sort has occurred. + */ + public final void gatherv(Object recvbuf, int[] recvcount, int[] displs, + Datatype recvtype, int root) + throws MPIException + { + MPI.check(); + int recvoff = 0; + boolean rdb = false; + + if(recvbuf instanceof Buffer && !(rdb = ((Buffer)recvbuf).isDirect())) + { + recvoff = recvtype.getOffset(recvbuf); + recvbuf = ((Buffer)recvbuf).array(); + } + + gatherv(handle, null, false, 0, 0, 0, 0, recvbuf, rdb, recvoff, recvcount, + displs, recvtype.handle, recvtype.baseType, root); + } + + /** + * Extends functionality of {@code gather} by allowing varying + * counts of data from each process. + *

Java binding of the MPI operation {@code MPI_GATHERV} using + * {@code MPI_IN_PLACE} instead of the send buffer in the root process. + * This method must be used in the non-root processes. + * @param sendbuf send buffer + * @param sendcount number of items to send + * @param sendtype datatype of each item in send buffer + * @param root rank of receiving process + * @throws MPIException Signals that an MPI exception of some sort has occurred. + */ + public final void gatherv(Object sendbuf, int sendcount, + Datatype sendtype, int root) + throws MPIException + { + MPI.check(); + int sendoff = 0; + boolean sdb = false; + + if(sendbuf instanceof Buffer && !(sdb = ((Buffer)sendbuf).isDirect())) + { + sendoff = sendtype.getOffset(sendbuf); + sendbuf = ((Buffer)sendbuf).array(); + } + + gatherv(handle, sendbuf, sdb, sendoff, sendcount, + sendtype.handle, sendtype.baseType, + null, false, 0, null, null, 0, 0, root); + } + + private native void gatherv( + long comm, Object sendBuf, boolean sdb, int sendOffset, + int sendCount, long sendType, int sendBaseType, + Object recvBuf, boolean rdb, int recvOffset, + int[] recvCount, int[] displs, long recvType, int recvBaseType, + int root) throws MPIException; + + /** + * Extends functionality of {@code gather} by allowing varying + * counts of data from each process. + *

Java binding of the MPI operation {@code MPI_IGATHERV}. + * @param sendbuf send buffer + * @param sendcount number of items to send + * @param sendtype datatype of each item in send buffer + * @param recvbuf receive buffer + * @param recvcount number of elements received from each process + * @param displs displacements at which to place incoming data + * @param recvtype datatype of each item in receive buffer + * @param root rank of receiving process + * @return communication request + * @throws MPIException Signals that an MPI exception of some sort has occurred. + */ + public final Request iGatherv( + Buffer sendbuf, int sendcount, Datatype sendtype, Buffer recvbuf, + int[] recvcount, int[] displs, Datatype recvtype, int root) + throws MPIException + { + MPI.check(); + assertDirectBuffer(sendbuf, recvbuf); + Request req = new Request(iGatherv( + handle, sendbuf, sendcount, sendtype.handle, + recvbuf, recvcount, displs, recvtype.handle, root)); + req.addSendBufRef(sendbuf); + return req; + } + + /** + * Extends functionality of {@code gather} by allowing varying + * counts of data from each process. + *

Java binding of the MPI operation {@code MPI_IGATHERV} using + * {@code MPI_IN_PLACE} instead of the send buffer in the root process. + * This method must be used in the root process. + * @param recvbuf receive buffer + * @param recvcount number of elements received from each process + * @param displs displacements at which to place incoming data + * @param recvtype datatype of each item in receive buffer + * @param root rank of receiving process + * @return communication request + * @throws MPIException Signals that an MPI exception of some sort has occurred. + */ + public final Request iGatherv(Buffer recvbuf, int[] recvcount, int[] displs, + Datatype recvtype, int root) + throws MPIException + { + MPI.check(); + assertDirectBuffer(recvbuf); + Request req = new Request(iGatherv(handle, null, 0, 0, + recvbuf, recvcount, displs, recvtype.handle, root)); + req.addRecvBufRef(recvbuf); + return req; + } + + /** + * Extends functionality of {@code gather} by allowing varying + * counts of data from each process. + *

Java binding of the MPI operation {@code MPI_IGATHERV} using + * {@code MPI_IN_PLACE} instead of the send buffer in the root process. + * This method must be used in the non-root processes. + * @param sendbuf send buffer + * @param sendcount number of items to send + * @param sendtype datatype of each item in send buffer + * @param root rank of receiving process + * @return communication request + * @throws MPIException Signals that an MPI exception of some sort has occurred. + */ + public final Request iGatherv(Buffer sendbuf, int sendcount, + Datatype sendtype, int root) + throws MPIException + { + MPI.check(); + assertDirectBuffer(sendbuf); + Request req = new Request(iGatherv(handle, sendbuf, sendcount, sendtype.handle, + null, null, null, 0, root)); + req.addSendBufRef(sendbuf); + return req; + } + + private native long iGatherv( + long handle, Buffer sendbuf, int sendcount, long sendtype, + Buffer recvbuf, int[] recvcount, int[] displs, + long recvtype, int root) + throws MPIException; + + /** + * Inverse of the operation {@code gather}. + *

Java binding of the MPI operation {@code MPI_SCATTER}. + * @param sendbuf send buffer + * @param sendcount number of items to send + * @param sendtype datatype of each item in send buffer + * @param recvbuf receive buffer + * @param recvcount number of items to receive + * @param recvtype datatype of each item in receive buffer + * @param root rank of sending process + * @throws MPIException Signals that an MPI exception of some sort has occurred. + */ + public final void scatter( + Object sendbuf, int sendcount, Datatype sendtype, + Object recvbuf, int recvcount, Datatype recvtype, int root) + throws MPIException + { + MPI.check(); + + int sendoff = 0, + recvoff = 0; + + boolean sdb = false, + rdb = false; + + if(sendbuf instanceof Buffer && !(sdb = ((Buffer)sendbuf).isDirect())) + { + sendoff = sendtype.getOffset(sendbuf); + sendbuf = ((Buffer)sendbuf).array(); + } + + if(recvbuf instanceof Buffer && !(rdb = ((Buffer)recvbuf).isDirect())) + { + recvoff = recvtype.getOffset(recvbuf); + recvbuf = ((Buffer)recvbuf).array(); + } + + scatter(handle, sendbuf, sdb, sendoff, sendcount, + sendtype.handle, sendtype.baseType, + recvbuf, rdb, recvoff, recvcount, + recvtype.handle, recvtype.baseType, root); + } + + /** + * Inverse of the operation {@code gather}. + *

Java binding of the MPI operation {@code MPI_SCATTER} + * using {@code MPI_IN_PLACE} instead of the receive buffer. + * The buffer is used by the root process to send data, + * and it is used by the non-root processes to receive data. + * @param buf send/receive buffer + * @param count number of items to send/receive + * @param type datatype of each item in buffer + * @param root rank of sending process + * @throws MPIException Signals that an MPI exception of some sort has occurred. + */ + public final void scatter(Object buf, int count, Datatype type, int root) + throws MPIException + { + MPI.check(); + int off = 0; + boolean db = false; + + if(buf instanceof Buffer && !(db = ((Buffer)buf).isDirect())) + { + off = type.getOffset(buf); + buf = ((Buffer)buf).array(); + } + + scatter(handle, buf, db, off, count, type.handle, type.baseType, + null, false, 0, 0, 0, 0, root); + } + + private native void scatter( + long comm, Object sendBuf, boolean sdb, int sendOffset, int sendCount, + long sendType, int sendBaseType, + Object recvBuf, boolean rdb, int recvOffset, int recvCount, + long recvType, int recvBaseType, int root) throws MPIException; + + /** + * Inverse of the operation {@code gather}. + *

Java binding of the MPI operation {@code MPI_ISCATTER}. + * @param sendbuf send buffer + * @param sendcount number of items to send + * @param sendtype datatype of each item in send buffer + * @param recvbuf receive buffer + * @param recvcount number of items to receive + * @param recvtype datatype of each item in receive buffer + * @param root rank of sending process + * @return communication request + * @throws MPIException Signals that an MPI exception of some sort has occurred. + */ + public final Request iScatter( + Buffer sendbuf, int sendcount, Datatype sendtype, + Buffer recvbuf, int recvcount, Datatype recvtype, int root) + throws MPIException + { + MPI.check(); + assertDirectBuffer(sendbuf, recvbuf); + Request req = new Request(iScatter(handle, sendbuf, sendcount, sendtype.handle, + recvbuf, recvcount, recvtype.handle, root)); + req.addSendBufRef(sendbuf); + req.addRecvBufRef(recvbuf); + return req; + } + + /** + * Inverse of the operation {@code gather}. + *

Java binding of the MPI operation {@code MPI_ISCATTER} + * using {@code MPI_IN_PLACE} instead of the receive buffer. + * The buffer is used by the root process to send data, + * and it is used by the non-root processes to receive data. + * @param buf send/receive buffer + * @param count number of items to send/receive + * @param type datatype of each item in buffer + * @param root rank of sending process + * @return communication request + * @throws MPIException Signals that an MPI exception of some sort has occurred. + */ + public final Request iScatter(Buffer buf, int count, Datatype type, int root) + throws MPIException + { + MPI.check(); + assertDirectBuffer(buf); + Request req = new Request(iScatter(handle, buf, count, type.handle, + null, 0, 0, root)); + req.addSendBufRef(buf); + return req; + } + + private native long iScatter( + long comm, Buffer sendbuf, int sendcount, long sendtype, + Buffer recvbuf, int recvcount, long recvtype, int root) + throws MPIException; + + /** + * Inverse of the operation {@code gatherv}. + *

Java binding of the MPI operation {@code MPI_SCATTERV}. + * @param sendbuf send buffer + * @param sendcount number of items sent to each process + * @param displs displacements from which to take outgoing data + * @param sendtype datatype of each item in send buffer + * @param recvbuf receive buffer + * @param recvcount number of items to receive + * @param recvtype datatype of each item in receive buffer + * @param root rank of sending process + * @throws MPIException Signals that an MPI exception of some sort has occurred. + */ + public final void scatterv( + Object sendbuf, int[] sendcount, int[] displs, Datatype sendtype, + Object recvbuf, int recvcount, Datatype recvtype, int root) + throws MPIException + { + MPI.check(); + + int sendoff = 0, + recvoff = 0; + + boolean sdb = false, + rdb = false; + + if(sendbuf instanceof Buffer && !(sdb = ((Buffer)sendbuf).isDirect())) + { + sendoff = sendtype.getOffset(sendbuf); + sendbuf = ((Buffer)sendbuf).array(); + } + + if(recvbuf instanceof Buffer && !(rdb = ((Buffer)recvbuf).isDirect())) + { + recvoff = recvtype.getOffset(recvbuf); + recvbuf = ((Buffer)recvbuf).array(); + } + + scatterv(handle, sendbuf, sdb, sendoff, sendcount, displs, + sendtype.handle, sendtype.baseType, + recvbuf, rdb, recvoff, recvcount, + recvtype.handle, recvtype.baseType, root); + } + + /** + * Inverse of the operation {@code gatherv}. + *

Java binding of the MPI operation {@code MPI_SCATTERV} using + * {@code MPI_IN_PLACE} instead of the receive buffer in the root process. + * This method must be used in the root process. + * @param sendbuf send buffer + * @param sendcount number of items sent to each process + * @param displs displacements from which to take outgoing data + * @param sendtype datatype of each item in send buffer + * @param root rank of sending process + * @throws MPIException Signals that an MPI exception of some sort has occurred. + */ + public final void scatterv(Object sendbuf, int[] sendcount, int[] displs, + Datatype sendtype, int root) + throws MPIException + { + MPI.check(); + int sendoff = 0; + boolean sdb = false; + + if(sendbuf instanceof Buffer && !(sdb = ((Buffer)sendbuf).isDirect())) + { + sendoff = sendtype.getOffset(sendbuf); + sendbuf = ((Buffer)sendbuf).array(); + } + + scatterv(handle, sendbuf, sdb, sendoff, sendcount, displs, + sendtype.handle, sendtype.baseType, + null, false, 0, 0, 0, 0, root); + } + + /** + * Inverse of the operation {@code gatherv}. + *

Java binding of the MPI operation {@code MPI_SCATTERV} using + * {@code MPI_IN_PLACE} instead of the receive buffer in the root process. + * This method must be used in the non-root processes. + * @param recvbuf receive buffer + * @param recvcount number of items to receive + * @param recvtype datatype of each item in receive buffer + * @param root rank of sending process + * @throws MPIException Signals that an MPI exception of some sort has occurred. + */ + public final void scatterv(Object recvbuf, int recvcount, + Datatype recvtype, int root) + throws MPIException + { + MPI.check(); + int recvoff = 0; + boolean rdb = false; + + if(recvbuf instanceof Buffer && !(rdb = ((Buffer)recvbuf).isDirect())) + { + recvoff = recvtype.getOffset(recvbuf); + recvbuf = ((Buffer)recvbuf).array(); + } + + scatterv(handle, null, false, 0, null, null, 0, 0, + recvbuf, rdb, recvoff, recvcount, + recvtype.handle, recvtype.baseType, root); + } + + private native void scatterv( + long comm, Object sendBuf, boolean sdb, int sendOffset, + int[] sendCount, int[] displs, long sendType, int sendBaseType, + Object recvBuf, boolean rdb, int recvOffset, int recvCount, + long recvType, int recvBaseType, int root) + throws MPIException; + + /** + * Inverse of the operation {@code gatherv}. + *

Java binding of the MPI operation {@code MPI_ISCATTERV}. + * @param sendbuf send buffer + * @param sendcount number of items sent to each process + * @param displs displacements from which to take outgoing data + * @param sendtype datatype of each item in send buffer + * @param recvbuf receive buffer + * @param recvcount number of items to receive + * @param recvtype datatype of each item in receive buffer + * @param root rank of sending process + * @return communication request + * @throws MPIException Signals that an MPI exception of some sort has occurred. + */ + public final Request iScatterv( + Buffer sendbuf, int[] sendcount, int[] displs, Datatype sendtype, + Buffer recvbuf, int recvcount, Datatype recvtype, int root) + throws MPIException + { + MPI.check(); + assertDirectBuffer(sendbuf, recvbuf); + Request req = new Request(iScatterv( + handle, sendbuf, sendcount, displs, sendtype.handle, + recvbuf, recvcount, recvtype.handle, root)); + req.addSendBufRef(sendbuf); + req.addRecvBufRef(recvbuf); + return req; + } + + /** + * Inverse of the operation {@code gatherv}. + *

Java binding of the MPI operation {@code MPI_ISCATTERV} using + * {@code MPI_IN_PLACE} instead of the receive buffer in the root process. + * This method must be used in the root process. + * @param sendbuf send buffer + * @param sendcount number of items sent to each process + * @param displs displacements from which to take outgoing data + * @param sendtype datatype of each item in send buffer + * @param root rank of sending process + * @return communication request + * @throws MPIException Signals that an MPI exception of some sort has occurred. + */ + public final Request iScatterv(Buffer sendbuf, int[] sendcount, int[] displs, + Datatype sendtype, int root) + throws MPIException + { + MPI.check(); + assertDirectBuffer(sendbuf); + Request req = new Request(iScatterv(handle, sendbuf, sendcount, displs, + sendtype.handle, null, 0, 0, root)); + req.addSendBufRef(sendbuf); + return req; + } + + /** + * Inverse of the operation {@code gatherv}. + *

Java binding of the MPI operation {@code MPI_ISCATTERV} using + * {@code MPI_IN_PLACE} instead of the receive buffer in the root process. + * This method must be used in the non-root processes. + * @param recvbuf receive buffer + * @param recvcount number of items to receive + * @param recvtype datatype of each item in receive buffer + * @param root rank of sending process + * @return communication request + * @throws MPIException Signals that an MPI exception of some sort has occurred. + */ + public final Request iScatterv(Buffer recvbuf, int recvcount, + Datatype recvtype, int root) + throws MPIException + { + MPI.check(); + assertDirectBuffer(recvbuf); + Request req = new Request(iScatterv(handle, null, null, null, 0, + recvbuf, recvcount, recvtype.handle, root)); + req.addRecvBufRef(recvbuf); + return req; + } + + private native long iScatterv( + long comm, Buffer sendbuf, int[] sendcount, int[] displs, long sendtype, + Buffer recvbuf, int recvcount, long recvtype, int root) + throws MPIException; + + /** + * Similar to {@code gather}, but all processes receive the result. + *

Java binding of the MPI operation {@code MPI_ALLGATHER}. + * @param sendbuf send buffer + * @param sendcount number of items to send + * @param sendtype datatype of each item in send buffer + * @param recvbuf receive buffer + * @param recvcount number of items to receive + * @param recvtype datatype of each item in receive buffer + * @throws MPIException Signals that an MPI exception of some sort has occurred. + */ + public final void allGather(Object sendbuf, int sendcount, Datatype sendtype, + Object recvbuf, int recvcount, Datatype recvtype) + throws MPIException + { + MPI.check(); + + int sendoff = 0, + recvoff = 0; + + boolean sdb = false, + rdb = false; + + if(sendbuf instanceof Buffer && !(sdb = ((Buffer)sendbuf).isDirect())) + { + sendoff = sendtype.getOffset(sendbuf); + sendbuf = ((Buffer)sendbuf).array(); + } + + if(recvbuf instanceof Buffer && !(rdb = ((Buffer)recvbuf).isDirect())) + { + recvoff = recvtype.getOffset(recvbuf); + recvbuf = ((Buffer)recvbuf).array(); + } + + allGather(handle, sendbuf, sdb, sendoff, sendcount, + sendtype.handle, sendtype.baseType, + recvbuf, rdb, recvoff, recvcount, + recvtype.handle, recvtype.baseType); + } + + /** + * Similar to {@code gather}, but all processes receive the result. + *

Java binding of the MPI operation {@code MPI_ALLGATHER} + * using {@code MPI_IN_PLACE} instead of the send buffer. + * @param buf receive buffer + * @param count number of items to receive + * @param type datatype of each item in receive buffer + * @throws MPIException Signals that an MPI exception of some sort has occurred. + */ + public final void allGather(Object buf, int count, Datatype type) + throws MPIException + { + MPI.check(); + int off = 0; + boolean db = false; + + if(buf instanceof Buffer && !(db = ((Buffer)buf).isDirect())) + { + off = type.getOffset(buf); + buf = ((Buffer)buf).array(); + } + + allGather(handle, null, false, 0, 0, 0, 0, + buf, db, off, count, type.handle, type.baseType); + } + + private native void allGather( + long comm, Object sendBuf, boolean sdb, int sendOffset, int sendCount, + long sendType, int sendBaseType, + Object recvBuf, boolean rdb, int recvOffset, int recvCount, + long recvType, int recvBaseType) throws MPIException; + + /** + * Similar to {@code gather}, but all processes receive the result. + *

Java binding of the MPI operation {@code MPI_IALLGATHER}. + * @param sendbuf send buffer + * @param sendcount number of items to send + * @param sendtype datatype of each item in send buffer + * @param recvbuf receive buffer + * @param recvcount number of items to receive + * @param recvtype datatype of each item in receive buffer + * @return communication request + * @throws MPIException Signals that an MPI exception of some sort has occurred. + */ + public final Request iAllGather( + Buffer sendbuf, int sendcount, Datatype sendtype, + Buffer recvbuf, int recvcount, Datatype recvtype) + throws MPIException + { + MPI.check(); + assertDirectBuffer(sendbuf, recvbuf); + Request req = new Request(iAllGather(handle, sendbuf, sendcount, sendtype.handle, + recvbuf, recvcount, recvtype.handle)); + req.addSendBufRef(sendbuf); + req.addRecvBufRef(recvbuf); + return req; + } + + + /** + * Similar to {@code gather}, but all processes receive the result. + *

Java binding of the MPI operation {@code MPI_IALLGATHER} + * using {@code MPI_IN_PLACE} instead of the send buffer. + * @param buf receive buffer + * @param count number of items to receive + * @param type datatype of each item in receive buffer + * @return communication request + * @throws MPIException Signals that an MPI exception of some sort has occurred. + */ + public final Request iAllGather(Buffer buf, int count, Datatype type) + throws MPIException + { + MPI.check(); + assertDirectBuffer(buf); + Request req = new Request(iAllGather(handle, null, 0, 0, buf, count, type.handle)); + req.addRecvBufRef(buf); + return req; + } + + private native long iAllGather( + long comm, Buffer sendbuf, int sendcount, long sendtype, + Buffer recvbuf, int recvcount, long recvtype) throws MPIException; + + /** + * Similar to {@code gatherv}, but all processes receive the result. + *

Java binding of the MPI operation {@code MPI_ALLGATHERV}. + * @param sendbuf send buffer + * @param sendcount number of items to send + * @param sendtype datatype of each item in send buffer + * @param recvbuf receive buffer + * @param recvcount number of elements received from each process + * @param displs displacements at which to place incoming data + * @param recvtype datatype of each item in receive buffer + * @throws MPIException Signals that an MPI exception of some sort has occurred. + */ + public final void allGatherv( + Object sendbuf, int sendcount, Datatype sendtype, + Object recvbuf, int[] recvcount, int[] displs, Datatype recvtype) + throws MPIException + { + MPI.check(); + + int sendoff = 0, + recvoff = 0; + + boolean sdb = false, + rdb = false; + + if(sendbuf instanceof Buffer && !(sdb = ((Buffer)sendbuf).isDirect())) + { + sendoff = sendtype.getOffset(sendbuf); + sendbuf = ((Buffer)sendbuf).array(); + } + + if(recvbuf instanceof Buffer && !(rdb = ((Buffer)recvbuf).isDirect())) + { + recvoff = recvtype.getOffset(recvbuf); + recvbuf = ((Buffer)recvbuf).array(); + } + + allGatherv(handle, sendbuf, sdb, sendoff, sendcount, + sendtype.handle, sendtype.baseType, + recvbuf, rdb, recvoff, recvcount, displs, + recvtype.handle, recvtype.baseType); + } + + /** + * Similar to {@code gatherv}, but all processes receive the result. + *

Java binding of the MPI operation {@code MPI_ALLGATHERV} + * using {@code MPI_IN_PLACE} instead of the send buffer. + * @param recvbuf receive buffer + * @param recvcount number of elements received from each process + * @param displs displacements at which to place incoming data + * @param recvtype datatype of each item in receive buffer + * @throws MPIException Signals that an MPI exception of some sort has occurred. + */ + public final void allGatherv(Object recvbuf, int[] recvcount, + int[] displs, Datatype recvtype) + throws MPIException + { + MPI.check(); + int recvoff = 0; + boolean rdb = false; + + if(recvbuf instanceof Buffer && !(rdb = ((Buffer)recvbuf).isDirect())) + { + recvoff = recvtype.getOffset(recvbuf); + recvbuf = ((Buffer)recvbuf).array(); + } + + allGatherv(handle, null, false, 0, 0, 0, 0, + recvbuf, rdb, recvoff, recvcount, + displs, recvtype.handle, recvtype.baseType); + } + + private native void allGatherv( + long comm, Object sendBuf, boolean sdb, int sendOffset, int sendCount, + long sendType, int sendBaseType, + Object recvBuf, boolean rdb, int recvOffset, int[] recvCount, + int[] displs, long recvType, int recvBasetype) throws MPIException; + + /** + * Similar to {@code gatherv}, but all processes receive the result. + *

Java binding of the MPI operation {@code MPI_IALLGATHERV}. + * @param sendbuf send buffer + * @param sendcount number of items to send + * @param sendtype datatype of each item in send buffer + * @param recvbuf receive buffer + * @param recvcount number of elements received from each process + * @param displs displacements at which to place incoming data + * @param recvtype datatype of each item in receive buffer + * @return communication request + * @throws MPIException Signals that an MPI exception of some sort has occurred. + */ + public final Request iAllGatherv( + Buffer sendbuf, int sendcount, Datatype sendtype, + Buffer recvbuf, int[] recvcount, int[] displs, Datatype recvtype) + throws MPIException + { + MPI.check(); + assertDirectBuffer(sendbuf, recvbuf); + Request req = new Request(iAllGatherv( + handle, sendbuf, sendcount, sendtype.handle, + recvbuf, recvcount, displs, recvtype.handle)); + req.addSendBufRef(sendbuf); + req.addRecvBufRef(recvbuf); + return req; + } + + /** + * Similar to {@code gatherv}, but all processes receive the result. + *

Java binding of the MPI operation {@code MPI_IALLGATHERV} + * using {@code MPI_IN_PLACE} instead of the send buffer. + * @param buf receive buffer + * @param count number of elements received from each process + * @param displs displacements at which to place incoming data + * @param type datatype of each item in receive buffer + * @return communication request + * @throws MPIException Signals that an MPI exception of some sort has occurred. + */ + public final Request iAllGatherv( + Buffer buf, int[] count, int[] displs, Datatype type) + throws MPIException + { + MPI.check(); + assertDirectBuffer(buf); + Request req = new Request(iAllGatherv( + handle, null, 0, 0, buf, count, displs, type.handle)); + req.addRecvBufRef(buf); + return req; + } + + private native long iAllGatherv( + long handle, Buffer sendbuf, int sendcount, long sendtype, + Buffer recvbuf, int[] recvcount, int[] displs, long recvtype) + throws MPIException; + + /** + * Extension of {@code allGather} to the case where each process sends + * distinct data to each of the receivers. + *

Java binding of the MPI operation {@code MPI_ALLTOALL}. + * @param sendbuf send buffer + * @param sendcount number of items sent to each process + * @param sendtype datatype send buffer items + * @param recvbuf receive buffer + * @param recvcount number of items received from any process + * @param recvtype datatype of receive buffer items + * @throws MPIException Signals that an MPI exception of some sort has occurred. + */ + public final void allToAll(Object sendbuf, int sendcount, Datatype sendtype, + Object recvbuf, int recvcount, Datatype recvtype) + throws MPIException + { + MPI.check(); + + int sendoff = 0, + recvoff = 0; + + boolean sdb = false, + rdb = false; + + if(sendbuf instanceof Buffer && !(sdb = ((Buffer)sendbuf).isDirect())) + { + sendoff = sendtype.getOffset(sendbuf); + sendbuf = ((Buffer)sendbuf).array(); + } + + if(recvbuf instanceof Buffer && !(rdb = ((Buffer)recvbuf).isDirect())) + { + recvoff = recvtype.getOffset(recvbuf); + recvbuf = ((Buffer)recvbuf).array(); + } + + allToAll(handle, sendbuf, sdb, sendoff, sendcount, + sendtype.handle, sendtype.baseType, + recvbuf, rdb, recvoff, recvcount, + recvtype.handle, recvtype.baseType); + } + + private native void allToAll( + long comm, Object sendBuf, boolean sdb, int sendOffset, int sendCount, + long sendType, int sendBaseType, + Object recvBuf, boolean rdb, int recvOffset, int recvCount, + long recvType, int recvBaseType) throws MPIException; + + /** + * Extension of {@code allGather} to the case where each process sends + * distinct data to each of the receivers. + *

Java binding of the MPI operation {@code MPI_IALLTOALL}. + * @param sendbuf send buffer + * @param sendcount number of items sent to each process + * @param sendtype datatype send buffer items + * @param recvbuf receive buffer + * @param recvcount number of items received from any process + * @param recvtype datatype of receive buffer items + * @return communication request + * @throws MPIException Signals that an MPI exception of some sort has occurred. + */ + public final Request iAllToAll(Buffer sendbuf, int sendcount, Datatype sendtype, + Buffer recvbuf, int recvcount, Datatype recvtype) + throws MPIException + { + MPI.check(); + assertDirectBuffer(sendbuf, recvbuf); + Request req = new Request(iAllToAll(handle, sendbuf, sendcount, sendtype.handle, + recvbuf, recvcount, recvtype.handle)); + req.addSendBufRef(sendbuf); + req.addRecvBufRef(recvbuf); + return req; + } + + private native long iAllToAll( + long comm, Buffer sendbuf, int sendcount, long sendtype, + Buffer recvbuf, int recvcount, long recvtype) throws MPIException; + + /** + * Adds flexibility to {@code allToAll}: location of data for send is + * specified by {@code sdispls} and location to place data on receive + * side is specified by {@code rdispls}. + *

Java binding of the MPI operation {@code MPI_ALLTOALLV}. + * @param sendbuf send buffer + * @param sendcount number of items sent to each buffer + * @param sdispls displacements from which to take outgoing data + * @param sendtype datatype send buffer items + * @param recvbuf receive buffer + * @param recvcount number of elements received from each process + * @param rdispls displacements at which to place incoming data + * @param recvtype datatype of each item in receive buffer + * @throws MPIException Signals that an MPI exception of some sort has occurred. + */ + public final void allToAllv( + Object sendbuf, int[] sendcount, int[] sdispls, Datatype sendtype, + Object recvbuf, int[] recvcount, int[] rdispls, Datatype recvtype) + throws MPIException + { + MPI.check(); + + int sendoff = 0, + recvoff = 0; + + boolean sdb = false, + rdb = false; + + if(sendbuf instanceof Buffer && !(sdb = ((Buffer)sendbuf).isDirect())) + { + sendoff = sendtype.getOffset(sendbuf); + sendbuf = ((Buffer)sendbuf).array(); + } + + if(recvbuf instanceof Buffer && !(rdb = ((Buffer)recvbuf).isDirect())) + { + recvoff = recvtype.getOffset(recvbuf); + recvbuf = ((Buffer)recvbuf).array(); + } + + allToAllv(handle, sendbuf, sdb, sendoff, sendcount, sdispls, + sendtype.handle, sendtype.baseType, + recvbuf, rdb, recvoff, recvcount, rdispls, + recvtype.handle, recvtype.baseType); + } + + private native void allToAllv( + long comm, Object sendBuf, boolean sdb, int sendOffset, + int[] sendCount, int[] sdispls, long sendType, int sendBaseType, + Object recvBuf, boolean rdb, int recvOffset, + int[] recvCount, int[] rdispls, long recvType, int recvBaseType) + throws MPIException; + + /** + * Adds flexibility to {@code allToAll}: location of data for send is + * specified by {@code sdispls} and location to place data on receive + * side is specified by {@code rdispls}. + *

Java binding of the MPI operation {@code MPI_IALLTOALLV}. + * @param sendbuf send buffer + * @param sendcount number of items sent to each buffer + * @param sdispls displacements from which to take outgoing data + * @param sendtype datatype send buffer items + * @param recvbuf receive buffer + * @param recvcount number of elements received from each process + * @param rdispls displacements at which to place incoming data + * @param recvtype datatype of each item in receive buffer + * @return communication request + * @throws MPIException Signals that an MPI exception of some sort has occurred. + */ + public final Request iAllToAllv( + Buffer sendbuf, int[] sendcount, int[] sdispls, Datatype sendtype, + Buffer recvbuf, int[] recvcount, int[] rdispls, Datatype recvtype) + throws MPIException + { + MPI.check(); + assertDirectBuffer(sendbuf, recvbuf); + Request req = new Request(iAllToAllv( + handle, sendbuf, sendcount, sdispls, sendtype.handle, + recvbuf, recvcount, rdispls, recvtype.handle)); + req.addSendBufRef(sendbuf); + req.addRecvBufRef(recvbuf); + return req; + } + + private native long iAllToAllv(long comm, + Buffer sendbuf, int[] sendcount, int[] sdispls, long sendtype, + Buffer recvbuf, int[] recvcount, int[] rdispls, long recvtype) + throws MPIException; + + /** + * Adds flexibility to {@code allToAll}: location of data for send is //here + * specified by {@code sDispls} and location to place data on receive + * side is specified by {@code rDispls}. + *

Java binding of the MPI operation {@code MPI_ALLTOALLW}. + * @param sendBuf send buffer + * @param sendCount number of items sent to each buffer + * @param sDispls displacements from which to take outgoing data + * @param sendTypes datatypes of send buffer items + * @param recvBuf receive buffer + * @param recvCount number of elements received from each process + * @param rDispls displacements at which to place incoming data + * @param recvTypes datatype of each item in receive buffer + * @throws MPIException Signals that an MPI exception of some sort has occurred. + */ + public final void allToAllw( + Buffer sendBuf, int[] sendCount, int[] sDispls, Datatype[] sendTypes, + Buffer recvBuf, int[] recvCount, int[] rDispls, Datatype[] recvTypes) + throws MPIException + { + MPI.check(); + assertDirectBuffer(sendBuf, recvBuf); + + long[] sendHandles = convertTypeArray(sendTypes); + long[] recvHandles = convertTypeArray(recvTypes); + + allToAllw(handle, sendBuf, sendCount, sDispls, + sendHandles, recvBuf, recvCount, rDispls, + recvHandles); + } + + private native void allToAllw(long comm, + Buffer sendBuf, int[] sendCount, int[] sDispls, long[] sendTypes, + Buffer recvBuf, int[] recvCount, int[] rDispls, long[] recvTypes) + throws MPIException; + + /** + * Adds flexibility to {@code iAllToAll}: location of data for send is + * specified by {@code sDispls} and location to place data on receive + * side is specified by {@code rDispls}. + *

Java binding of the MPI operation {@code MPI_IALLTOALLW}. + * @param sendBuf send buffer + * @param sendCount number of items sent to each buffer + * @param sDispls displacements from which to take outgoing data + * @param sendTypes datatype send buffer items + * @param recvBuf receive buffer + * @param recvCount number of elements received from each process + * @param rDispls displacements at which to place incoming data + * @param recvTypes datatype of each item in receive buffer + * @return communication request + * @throws MPIException Signals that an MPI exception of some sort has occurred. + */ + public final Request iAllToAllw( + Buffer sendBuf, int[] sendCount, int[] sDispls, Datatype[] sendTypes, + Buffer recvBuf, int[] recvCount, int[] rDispls, Datatype[] recvTypes) + throws MPIException + { + MPI.check(); + assertDirectBuffer(sendBuf, recvBuf); + + long[] sendHandles = convertTypeArray(sendTypes); + long[] recvHandles = convertTypeArray(recvTypes); + Request req = new Request(iAllToAllw( + handle, sendBuf, sendCount, sDispls, sendHandles, + recvBuf, recvCount, rDispls, recvHandles)); + req.addSendBufRef(sendBuf); + req.addRecvBufRef(recvBuf); + return req; + } + + private native long iAllToAllw(long comm, + Buffer sendBuf, int[] sendCount, int[] sDispls, long[] sendTypes, + Buffer recvBuf, int[] recvCount, int[] rDispls, long[] recvTypes) + throws MPIException; + + /** + * Java binding of {@code MPI_NEIGHBOR_ALLGATHER}. + * @param sendbuf send buffer + * @param sendcount number of items to send + * @param sendtype datatype of each item in send buffer + * @param recvbuf receive buffer + * @param recvcount number of items to receive + * @param recvtype datatype of each item in receive buffer + * @throws MPIException Signals that an MPI exception of some sort has occurred. + */ + public final void neighborAllGather( + Object sendbuf, int sendcount, Datatype sendtype, + Object recvbuf, int recvcount, Datatype recvtype) + throws MPIException + { + MPI.check(); + + int sendoff = 0, + recvoff = 0; + + boolean sdb = false, + rdb = false; + + if(sendbuf instanceof Buffer && !(sdb = ((Buffer)sendbuf).isDirect())) + { + sendoff = sendtype.getOffset(sendbuf); + sendbuf = ((Buffer)sendbuf).array(); + } + + if(recvbuf instanceof Buffer && !(rdb = ((Buffer)recvbuf).isDirect())) + { + recvoff = recvtype.getOffset(recvbuf); + recvbuf = ((Buffer)recvbuf).array(); + } + + neighborAllGather(handle, sendbuf, sdb, sendoff, sendcount, + sendtype.handle, sendtype.baseType, + recvbuf, rdb, recvoff, recvcount, + recvtype.handle, recvtype.baseType); + } + + private native void neighborAllGather( + long comm, Object sendBuf, boolean sdb, int sendOffset, + int sendCount, long sendType, int sendBaseType, + Object recvBuf, boolean rdb, int recvOffset, + int recvCount, long recvType, int recvBaseType) + throws MPIException; + + /** + * Java binding of {@code MPI_INEIGHBOR_ALLGATHER}. + * @param sendbuf send buffer + * @param sendcount number of items to send + * @param sendtype datatype of each item in send buffer + * @param recvbuf receive buffer + * @param recvcount number of items to receive + * @param recvtype datatype of each item in receive buffer + * @return communication request + * @throws MPIException Signals that an MPI exception of some sort has occurred. + */ + public final Request iNeighborAllGather( + Buffer sendbuf, int sendcount, Datatype sendtype, + Buffer recvbuf, int recvcount, Datatype recvtype) + throws MPIException + { + MPI.check(); + assertDirectBuffer(sendbuf, recvbuf); + Request req = new Request(iNeighborAllGather( + handle, sendbuf, sendcount, sendtype.handle, + recvbuf, recvcount, recvtype.handle)); + req.addSendBufRef(sendbuf); + req.addRecvBufRef(recvbuf); + return req; + } + + private native long iNeighborAllGather( + long comm, Buffer sendBuf, int sendCount, long sendType, + Buffer recvBuf, int recvCount, long recvType) + throws MPIException; + + /** + * Java binding of {@code MPI_NEIGHBOR_ALLGATHERV}. + * @param sendbuf send buffer + * @param sendcount number of items to send + * @param sendtype datatype of each item in send buffer + * @param recvbuf receive buffer + * @param recvcount number of elements that are received from each neighbor + * @param displs displacements at which to place incoming data + * @param recvtype datatype of receive buffer elements + * @throws MPIException Signals that an MPI exception of some sort has occurred. + */ + public final void neighborAllGatherv( + Object sendbuf, int sendcount, Datatype sendtype, + Object recvbuf, int[] recvcount, int[] displs, Datatype recvtype) + throws MPIException + { + MPI.check(); + + int sendoff = 0, + recvoff = 0; + + boolean sdb = false, + rdb = false; + + if(sendbuf instanceof Buffer && !(sdb = ((Buffer)sendbuf).isDirect())) + { + sendoff = sendtype.getOffset(sendbuf); + sendbuf = ((Buffer)sendbuf).array(); + } + + if(recvbuf instanceof Buffer && !(rdb = ((Buffer)recvbuf).isDirect())) + { + recvoff = recvtype.getOffset(recvbuf); + recvbuf = ((Buffer)recvbuf).array(); + } + + neighborAllGatherv(handle, sendbuf, sdb, sendoff, sendcount, + sendtype.handle, sendtype.baseType, + recvbuf, rdb, recvoff, recvcount, displs, + recvtype.handle, recvtype.baseType); + } + + private native void neighborAllGatherv( + long comm, Object sendBuf, boolean sdb, int sendOff, + int sendCount, long sendType, int sendBaseType, + Object recvBuf, boolean rdb, int recvOff, + int[] recvCount, int[] displs, long recvType, int recvBaseType); + + /** + * Java binding of {@code MPI_INEIGHBOR_ALLGATHERV}. + * @param sendbuf send buffer + * @param sendcount number of items to send + * @param sendtype datatype of each item in send buffer + * @param recvbuf receive buffer + * @param recvcount number of elements that are received from each neighbor + * @param displs displacements at which to place incoming data + * @param recvtype datatype of receive buffer elements + * @return communication request + * @throws MPIException Signals that an MPI exception of some sort has occurred. + */ + public final Request iNeighborAllGatherv( + Buffer sendbuf, int sendcount, Datatype sendtype, + Buffer recvbuf, int[] recvcount, int[] displs, Datatype recvtype) + throws MPIException + { + MPI.check(); + assertDirectBuffer(sendbuf, recvbuf); + Request req = new Request(iNeighborAllGatherv( + handle, sendbuf, sendcount, sendtype.handle, + recvbuf, recvcount, displs, recvtype.handle)); + req.addSendBufRef(sendbuf); + req.addRecvBufRef(recvbuf); + return req; + } + + private native long iNeighborAllGatherv( + long comm, Buffer sendBuf, int sendCount, long sendType, + Buffer recvBuf, int[] recvCount, int[] displs, long recvType) + throws MPIException; + + /** + * Java binding of {@code MPI_NEIGHBOR_ALLTOALL}. + * @param sendbuf send buffer + * @param sendcount number of items to send + * @param sendtype datatype of each item in send buffer + * @param recvbuf receive buffer + * @param recvcount number of items to receive + * @param recvtype datatype of each item in receive buffer + * @throws MPIException Signals that an MPI exception of some sort has occurred. + */ + public final void neighborAllToAll( + Object sendbuf, int sendcount, Datatype sendtype, + Object recvbuf, int recvcount, Datatype recvtype) + throws MPIException + { + MPI.check(); + + int sendoff = 0, + recvoff = 0; + + boolean sdb = false, + rdb = false; + + if(sendbuf instanceof Buffer && !(sdb = ((Buffer)sendbuf).isDirect())) + { + sendoff = sendtype.getOffset(sendbuf); + sendbuf = ((Buffer)sendbuf).array(); + } + + if(recvbuf instanceof Buffer && !(rdb = ((Buffer)recvbuf).isDirect())) + { + recvoff = recvtype.getOffset(recvbuf); + recvbuf = ((Buffer)recvbuf).array(); + } + + neighborAllToAll(handle, sendbuf, sdb, sendoff, sendcount, + sendtype.handle, sendtype.baseType, + recvbuf, rdb, recvoff, recvcount, + recvtype.handle, recvtype.baseType); + } + + private native void neighborAllToAll( + long comm, Object sendBuf, boolean sdb, int sendOff, + int sendCount, long sendType, int sendBaseType, + Object recvBuf, boolean rdb, int recvOff, + int recvCount, long recvType, int recvBaseType) + throws MPIException; + + /** + * Java binding of {@code MPI_INEIGHBOR_ALLTOALL}. + * @param sendbuf send buffer + * @param sendcount number of items to send + * @param sendtype datatype of each item in send buffer + * @param recvbuf receive buffer + * @param recvcount number of items to receive + * @param recvtype datatype of each item in receive buffer + * @return communication request + * @throws MPIException Signals that an MPI exception of some sort has occurred. + */ + public final Request iNeighborAllToAll( + Buffer sendbuf, int sendcount, Datatype sendtype, + Buffer recvbuf, int recvcount, Datatype recvtype) + throws MPIException + { + MPI.check(); + assertDirectBuffer(sendbuf, recvbuf); + Request req = new Request(iNeighborAllToAll( + handle, sendbuf, sendcount, sendtype.handle, + recvbuf, recvcount, recvtype.handle)); + req.addSendBufRef(sendbuf); + req.addRecvBufRef(recvbuf); + return req; + } + + private native long iNeighborAllToAll( + long comm, Buffer sendBuf, int sendCount, long sendType, + Buffer recvBuf, int recvCount, long recvType); + + /** + * Java binding of {@code MPI_NEIGHBOR_ALLTOALLV}. + * @param sendbuf send buffer + * @param sendcount number of items sent to each buffer + * @param sdispls displacements from which to take outgoing data + * @param sendtype datatype send buffer items + * @param recvbuf receive buffer + * @param recvcount number of elements received from each process + * @param rdispls displacements at which to place incoming data + * @param recvtype datatype of each item in receive buffer + * @throws MPIException Signals that an MPI exception of some sort has occurred. + */ + public final void neighborAllToAllv( + Object sendbuf, int[] sendcount, int[] sdispls, Datatype sendtype, + Object recvbuf, int[] recvcount, int[] rdispls, Datatype recvtype) + throws MPIException + { + MPI.check(); + + int sendoff = 0, + recvoff = 0; + + boolean sdb = false, + rdb = false; + + if(sendbuf instanceof Buffer && !(sdb = ((Buffer)sendbuf).isDirect())) + { + sendoff = sendtype.getOffset(sendbuf); + sendbuf = ((Buffer)sendbuf).array(); + } + + if(recvbuf instanceof Buffer && !(rdb = ((Buffer)recvbuf).isDirect())) + { + recvoff = recvtype.getOffset(recvbuf); + recvbuf = ((Buffer)recvbuf).array(); + } + + neighborAllToAllv(handle, + sendbuf, sdb, sendoff, sendcount, sdispls, + sendtype.handle, sendtype.baseType, + recvbuf, rdb, recvoff, recvcount, rdispls, + recvtype.handle, recvtype.baseType); + } + + private native void neighborAllToAllv( + long comm, Object sendBuf, boolean sdb, int sendOff, + int[] sendCount, int[] sdispls, long sendType, int sendBaseType, + Object recvBuf, boolean rdb, int recvOff, + int[] recvCount, int[] rdispls, long recvType, int recvBaseType) + throws MPIException; + + /** + * Java binding of {@code MPI_INEIGHBOR_ALLTOALLV}. + * @param sendbuf send buffer + * @param sendcount number of items sent to each buffer + * @param sdispls displacements from which to take outgoing data + * @param sendtype datatype send buffer items + * @param recvbuf receive buffer + * @param recvcount number of elements received from each process + * @param rdispls displacements at which to place incoming data + * @param recvtype datatype of each item in receive buffer + * @return communication request + * @throws MPIException Signals that an MPI exception of some sort has occurred. + */ + public final Request iNeighborAllToAllv( + Buffer sendbuf, int[] sendcount, int[] sdispls, Datatype sendtype, + Buffer recvbuf, int[] recvcount, int[] rdispls, Datatype recvtype) + throws MPIException + { + MPI.check(); + assertDirectBuffer(sendbuf, recvbuf); + Request req = new Request(iNeighborAllToAllv( + handle, sendbuf, sendcount, sdispls, sendtype.handle, + recvbuf, recvcount, rdispls, recvtype.handle)); + req.addSendBufRef(sendbuf); + req.addRecvBufRef(recvbuf); + return req; + } + + private native long iNeighborAllToAllv( + long comm, Buffer sendBuf, int[] sendCount, int[] sdispls, long sType, + Buffer recvBuf, int[] recvCount, int[] rdispls, long rType) + throws MPIException; + + /** + * Combine elements in input buffer of each process using the reduce + * operation, and return the combined value in the output buffer of the + * root process. + *

+ * Java binding of the MPI operation {@code MPI_REDUCE}. + *

+ * The predefined operations are available in Java as {@code MPI.MAX}, + * {@code MPI.MIN}, {@code MPI.SUM}, {@code MPI.PROD}, {@code MPI.LAND}, + * {@code MPI.BAND}, {@code MPI.LOR}, {@code MPI.BOR}, {@code MPI.LXOR}, + * {@code MPI.BXOR}, {@code MPI.MINLOC} and {@code MPI.MAXLOC}. + * @param sendbuf send buffer + * @param recvbuf receive buffer + * @param count number of items in send buffer + * @param type data type of each item in send buffer + * @param op reduce operation + * @param root rank of root process + * @throws MPIException Signals that an MPI exception of some sort has occurred. + */ + public final void reduce(Object sendbuf, Object recvbuf, int count, + Datatype type, Op op, int root) + throws MPIException + { + MPI.check(); + op.setDatatype(type); + + int sendoff = 0, + recvoff = 0; + + boolean sdb = false, + rdb = false; + + if(sendbuf instanceof Buffer && !(sdb = ((Buffer)sendbuf).isDirect())) + { + sendoff = type.getOffset(sendbuf); + sendbuf = ((Buffer)sendbuf).array(); + } + + if(recvbuf instanceof Buffer && !(rdb = ((Buffer)recvbuf).isDirect())) + { + recvoff = type.getOffset(recvbuf); + recvbuf = ((Buffer)recvbuf).array(); + } + + reduce(handle, sendbuf, sdb, sendoff, recvbuf, rdb, recvoff, + count, type.handle, type.baseType, op, op.handle, root); + } + + /** + * Combine elements in input buffer of each process using the reduce + * operation, and return the combined value in the output buffer of the + * root process. + *

Java binding of the MPI operation {@code MPI_REDUCE} + * using {@code MPI_IN_PLACE} instead of the send buffer. + * @param buf send/receive buffer + * @param count number of items in buffer + * @param type data type of each item in buffer + * @param op reduce operation + * @param root rank of root process + * @throws MPIException Signals that an MPI exception of some sort has occurred. + */ + public final void reduce(Object buf, int count, Datatype type, Op op, int root) + throws MPIException + { + MPI.check(); + op.setDatatype(type); + int off = 0; + boolean db = false; + + if(buf instanceof Buffer && !(db = ((Buffer)buf).isDirect())) + { + off = type.getOffset(buf); + buf = ((Buffer)buf).array(); + } + + reduce(handle, null, false, 0, buf, db, off, count, + type.handle, type.baseType, op, op.handle, root); + } + + private native void reduce( + long comm, Object sendbuf, boolean sdb, int sendoff, + Object recvbuf, boolean rdb, int recvoff, int count, + long type, int baseType, Op jOp, long hOp, int root) + throws MPIException; + + /** + * Combine elements in input buffer of each process using the reduce + * operation, and return the combined value in the output buffer of the + * root process. + *

Java binding of the MPI operation {@code MPI_IREDUCE}. + * @param sendbuf send buffer + * @param recvbuf receive buffer + * @param count number of items in send buffer + * @param type data type of each item in send buffer + * @param op reduce operation + * @param root rank of root process + * @return communication request + * @throws MPIException Signals that an MPI exception of some sort has occurred. + */ + public final Request iReduce(Buffer sendbuf, Buffer recvbuf, + int count, Datatype type, Op op, int root) + throws MPIException + { + MPI.check(); + assertDirectBuffer(sendbuf, recvbuf); + op.setDatatype(type); + Request req = new Request(iReduce( + handle, sendbuf, recvbuf, count, + type.handle, type.baseType, op, op.handle, root)); + req.addSendBufRef(sendbuf); + req.addRecvBufRef(recvbuf); + return req; + } + + /** + * Combine elements in input buffer of each process using the reduce + * operation, and return the combined value in the output buffer of the + * root process. + *

Java binding of the MPI operation {@code MPI_IREDUCE} + * using {@code MPI_IN_PLACE} instead of the send buffer. + * @param buf send/receive buffer + * @param count number of items in buffer + * @param type data type of each item in buffer + * @param op reduce operation + * @param root rank of root process + * @return communication request + * @throws MPIException Signals that an MPI exception of some sort has occurred. + */ + public final Request iReduce(Buffer buf, int count, + Datatype type, Op op, int root) + throws MPIException + { + MPI.check(); + assertDirectBuffer(buf); + op.setDatatype(type); + Request req = new Request(iReduce( + handle, null, buf, count, + type.handle, type.baseType, op, op.handle, root)); + req.addSendBufRef(buf); + return req; + } + + private native long iReduce( + long comm, Buffer sendbuf, Buffer recvbuf, int count, + long type, int baseType, Op jOp, long hOp, int root) + throws MPIException; + + /** + * Same as {@code reduce} except that the result appears in receive + * buffer of all process in the group. + *

Java binding of the MPI operation {@code MPI_ALLREDUCE}. + * @param sendbuf send buffer + * @param recvbuf receive buffer + * @param count number of items in send buffer + * @param type data type of each item in send buffer + * @param op reduce operation + * @throws MPIException Signals that an MPI exception of some sort has occurred. + */ + public final void allReduce(Object sendbuf, Object recvbuf, + int count, Datatype type, Op op) + throws MPIException + { + MPI.check(); + op.setDatatype(type); + + int sendoff = 0, + recvoff = 0; + + boolean sdb = false, + rdb = false; + + if(sendbuf instanceof Buffer && !(sdb = ((Buffer)sendbuf).isDirect())) + { + sendoff = type.getOffset(sendbuf); + sendbuf = ((Buffer)sendbuf).array(); + } + + if(recvbuf instanceof Buffer && !(rdb = ((Buffer)recvbuf).isDirect())) + { + recvoff = type.getOffset(recvbuf); + recvbuf = ((Buffer)recvbuf).array(); + } + + allReduce(handle, sendbuf, sdb, sendoff, recvbuf, rdb, recvoff, + count, type.handle, type.baseType, op, op.handle); + } + + /** + * Same as {@code reduce} except that the result appears in receive + * buffer of all process in the group. + *

Java binding of the MPI operation {@code MPI_ALLREDUCE} + * using {@code MPI_IN_PLACE} instead of the send buffer. + * @param buf receive buffer + * @param count number of items in send buffer + * @param type data type of each item in send buffer + * @param op reduce operation + * @throws MPIException Signals that an MPI exception of some sort has occurred. + */ + public final void allReduce(Object buf, int count, Datatype type, Op op) + throws MPIException + { + MPI.check(); + op.setDatatype(type); + int off = 0; + boolean db = false; + + if(buf instanceof Buffer && !(db = ((Buffer)buf).isDirect())) + { + off = type.getOffset(buf); + buf = ((Buffer)buf).array(); + } + + allReduce(handle, null, false, 0, buf, db, off, count, + type.handle, type.baseType, op, op.handle); + } + + private native void allReduce( + long comm, Object sendbuf, boolean sdb, int sendoff, + Object recvbuf, boolean rdb, int recvoff, int count, + long type, int baseType, Op jOp, long hOp) throws MPIException; + + /** + * Same as {@code reduce} except that the result appears in receive + * buffer of all process in the group. + *

Java binding of the MPI operation {@code MPI_IALLREDUCE}. + * @param sendbuf send buffer + * @param recvbuf receive buffer + * @param count number of items in send buffer + * @param type data type of each item in send buffer + * @param op reduce operation + * @return communication request + * @throws MPIException Signals that an MPI exception of some sort has occurred. + */ + public final Request iAllReduce(Buffer sendbuf, Buffer recvbuf, + int count, Datatype type, Op op) + throws MPIException + { + MPI.check(); + assertDirectBuffer(sendbuf, recvbuf); + op.setDatatype(type); + Request req = new Request(iAllReduce(handle, sendbuf, recvbuf, count, + type.handle, type.baseType, op, op.handle)); + req.addSendBufRef(sendbuf); + req.addRecvBufRef(recvbuf); + return req; + } + + /** + * Same as {@code reduce} except that the result appears in receive + * buffer of all process in the group. + *

Java binding of the MPI operation {@code MPI_IALLREDUCE} + * using {@code MPI_IN_PLACE} instead of the send buffer. + * @param buf receive buffer + * @param count number of items in send buffer + * @param type data type of each item in send buffer + * @param op reduce operation + * @return communication request + * @throws MPIException Signals that an MPI exception of some sort has occurred. + */ + public final Request iAllReduce(Buffer buf, int count, Datatype type, Op op) + throws MPIException + { + MPI.check(); + op.setDatatype(type); + assertDirectBuffer(buf); + Request req = new Request(iAllReduce( + handle, null, buf, count, + type.handle, type.baseType, op, op.handle)); + req.addRecvBufRef(buf); + return req; + } + + private native long iAllReduce( + long comm, Buffer sendbuf, Buffer recvbuf, int count, + long type, int baseType, Op jOp, long hOp) throws MPIException; + + /** + * Combine elements in input buffer of each process using the reduce + * operation, and scatter the combined values over the output buffers + * of the processes. + *

Java binding of the MPI operation {@code MPI_REDUCE_SCATTER}. + * @param sendbuf send buffer + * @param recvbuf receive buffer + * @param recvcounts numbers of result elements distributed to each process + * @param type data type of each item in send buffer + * @param op reduce operation + * @throws MPIException Signals that an MPI exception of some sort has occurred. + */ + public final void reduceScatter(Object sendbuf, Object recvbuf, + int[] recvcounts, Datatype type, Op op) + throws MPIException + { + MPI.check(); + op.setDatatype(type); + + int sendoff = 0, + recvoff = 0; + + boolean sdb = false, + rdb = false; + + if(sendbuf instanceof Buffer && !(sdb = ((Buffer)sendbuf).isDirect())) + { + sendoff = type.getOffset(sendbuf); + sendbuf = ((Buffer)sendbuf).array(); + } + + if(recvbuf instanceof Buffer && !(rdb = ((Buffer)recvbuf).isDirect())) + { + recvoff = type.getOffset(recvbuf); + recvbuf = ((Buffer)recvbuf).array(); + } + + reduceScatter(handle, sendbuf, sdb, sendoff, recvbuf, rdb, recvoff, + recvcounts, type.handle, type.baseType, op, op.handle); + } + + /** + * Combine elements in input buffer of each process using the reduce + * operation, and scatter the combined values over the output buffers + * of the processes. + *

Java binding of the MPI operation {@code MPI_REDUCE_SCATTER} + * using {@code MPI_IN_PLACE} instead of the send buffer. + * @param buf receive buffer + * @param counts numbers of result elements distributed to each process + * @param type data type of each item in send buffer + * @param op reduce operation + * @throws MPIException Signals that an MPI exception of some sort has occurred. + */ + public final void reduceScatter(Object buf, int[] counts, Datatype type, Op op) + throws MPIException + { + MPI.check(); + op.setDatatype(type); + int off = 0; + boolean db = false; + + if(buf instanceof Buffer && !(db = ((Buffer)buf).isDirect())) + { + off = type.getOffset(buf); + buf = ((Buffer)buf).array(); + } + + reduceScatter(handle, null, false, 0, buf, db, off, counts, + type.handle, type.baseType, op, op.handle); + } + + private native void reduceScatter( + long comm, Object sendbuf, boolean sdb, int sendoff, + Object recvbuf, boolean rdb, int recvoff, int[] recvcounts, + long type, int baseType, Op jOp, long hOp) throws MPIException; + + /** + * Combine elements in input buffer of each process using the reduce + * operation, and scatter the combined values over the output buffers + * of the processes. + *

Java binding of the MPI operation {@code MPI_IREDUCE_SCATTER}. + * @param sendbuf send buffer + * @param recvbuf receive buffer + * @param recvcounts numbers of result elements distributed to each process + * @param type data type of each item in send buffer + * @param op reduce operation + * @return communication request + * @throws MPIException Signals that an MPI exception of some sort has occurred. + */ + public final Request iReduceScatter(Buffer sendbuf, Buffer recvbuf, + int[] recvcounts, Datatype type, Op op) + throws MPIException + { + MPI.check(); + op.setDatatype(type); + assertDirectBuffer(sendbuf, recvbuf); + Request req = new Request(iReduceScatter( + handle, sendbuf, recvbuf, recvcounts, + type.handle, type.baseType, op, op.handle)); + req.addSendBufRef(sendbuf); + req.addRecvBufRef(recvbuf); + return req; + } + + /** + * Combine elements in input buffer of each process using the reduce + * operation, and scatter the combined values over the output buffers + * of the processes. + *

Java binding of the MPI operation {@code MPI_IREDUCE_SCATTER} + * using {@code MPI_IN_PLACE} instead of the send buffer. + * @param buf receive buffer + * @param counts numbers of result elements distributed to each process + * @param type data type of each item in send buffer + * @param op reduce operation + * @return communication request + * @throws MPIException Signals that an MPI exception of some sort has occurred. + */ + public final Request iReduceScatter( + Buffer buf, int[] counts, Datatype type, Op op) + throws MPIException + { + MPI.check(); + op.setDatatype(type); + assertDirectBuffer(buf); + Request req = new Request(iReduceScatter( + handle, null, buf, counts, + type.handle, type.baseType, op, op.handle)); + req.addRecvBufRef(buf); + return req; + } + + private native long iReduceScatter( + long handle, Buffer sendbuf, Object recvbuf, int[] recvcounts, + long type, int baseType, Op jOp, long hOp) throws MPIException; + + /** + * Combine values and scatter the results. + *

Java binding of the MPI operation {@code MPI_REDUCE_SCATTER_BLOCK}. + * @param sendbuf send buffer + * @param recvbuf receive buffer + * @param recvcount element count per block + * @param type data type of each item in send buffer + * @param op reduce operation + * @throws MPIException Signals that an MPI exception of some sort has occurred. + */ + public final void reduceScatterBlock(Object sendbuf, Object recvbuf, + int recvcount, Datatype type, Op op) + throws MPIException + { + MPI.check(); + op.setDatatype(type); + + int sendoff = 0, + recvoff = 0; + + boolean sdb = false, + rdb = false; + + if(sendbuf instanceof Buffer && !(sdb = ((Buffer)sendbuf).isDirect())) + { + sendoff = type.getOffset(sendbuf); + sendbuf = ((Buffer)sendbuf).array(); + } + + if(recvbuf instanceof Buffer && !(rdb = ((Buffer)recvbuf).isDirect())) + { + recvoff = type.getOffset(recvbuf); + recvbuf = ((Buffer)recvbuf).array(); + } + + reduceScatterBlock(handle, sendbuf, sdb, sendoff, recvbuf, rdb, recvoff, + recvcount, type.handle, type.baseType, op, op.handle); + } + + /** + * Combine values and scatter the results. + *

Java binding of the MPI operation {@code MPI_REDUCE_SCATTER_BLOCK} + * using {@code MPI_IN_PLACE} instead of the send buffer. + * @param buf receive buffer + * @param count element count per block + * @param type data type of each item in send buffer + * @param op reduce operation + * @throws MPIException Signals that an MPI exception of some sort has occurred. + */ + public final void reduceScatterBlock( + Object buf, int count, Datatype type, Op op) + throws MPIException + { + MPI.check(); + op.setDatatype(type); + int off = 0; + boolean db = false; + + if(buf instanceof Buffer && !(db = ((Buffer)buf).isDirect())) + { + off = type.getOffset(buf); + buf = ((Buffer)buf).array(); + } + + reduceScatterBlock(handle, null, false, 0, buf, db, off, count, + type.handle, type.baseType, op, op.handle); + } + + private native void reduceScatterBlock( + long comm, Object sendBuf, boolean sdb, int sOffset, + Object recvBuf, boolean rdb, int rOffset, int rCount, + long type, int baseType, Op jOp, long hOp) throws MPIException; + + /** + * Combine values and scatter the results. + *

Java binding of the MPI operation {@code MPI_IREDUCE_SCATTER_BLOCK}. + * @param sendbuf send buffer + * @param recvbuf receive buffer + * @param recvcount element count per block + * @param type data type of each item in send buffer + * @param op reduce operation + * @return communication request + * @throws MPIException Signals that an MPI exception of some sort has occurred. + */ + public final Request iReduceScatterBlock( + Buffer sendbuf, Buffer recvbuf, int recvcount, Datatype type, Op op) + throws MPIException + { + MPI.check(); + op.setDatatype(type); + assertDirectBuffer(sendbuf, recvbuf); + Request req = new Request(iReduceScatterBlock( + handle, sendbuf, recvbuf, recvcount, + type.handle, type.baseType, op, op.handle)); + req.addSendBufRef(sendbuf); + req.addRecvBufRef(recvbuf); + return req; + } + + /** + * Combine values and scatter the results. + *

Java binding of the MPI operation {@code MPI_IREDUCE_SCATTER_BLOCK} + * using {@code MPI_IN_PLACE} instead of the send buffer. + * @param buf receive buffer + * @param count element count per block + * @param type data type of each item in send buffer + * @param op reduce operation + * @return communication request + * @throws MPIException Signals that an MPI exception of some sort has occurred. + */ + public final Request iReduceScatterBlock( + Buffer buf, int count, Datatype type, Op op) + throws MPIException + { + MPI.check(); + op.setDatatype(type); + assertDirectBuffer(buf); + Request req = new Request(iReduceScatterBlock( + handle, null, buf, count, type.handle, + type.baseType, op, op.handle)); + req.addRecvBufRef(buf); + return req; + } + + private native long iReduceScatterBlock( + long handle, Buffer sendbuf, Buffer recvbuf, int recvcount, + long type, int baseType, Op jOp, long hOp) throws MPIException; + + /** + * Apply the operation given by {@code op} element-wise to the + * elements of {@code inBuf} and {@code inOutBuf} with the result + * stored element-wise in {@code inOutBuf}. + *

Java binding of the MPI operation {@code MPI_REDUCE_LOCAL}. + * @param inBuf input buffer + * @param inOutBuf input buffer, will contain combined output + * @param count number of elements + * @param type data type of each item + * @param op reduce operation + * @throws MPIException Signals that an MPI exception of some sort has occurred. + */ + public static void reduceLocal( + Object inBuf, Object inOutBuf, int count, Datatype type, Op op) + throws MPIException + { + MPI.check(); + op.setDatatype(type); + + int inOff = 0, + inOutOff = 0; + + boolean idb = false, + iodb = false; + + if(inBuf instanceof Buffer && !(idb = ((Buffer)inBuf).isDirect())) + { + inOff = type.getOffset(inBuf); + inBuf = ((Buffer)inBuf).array(); + } + + if(inOutBuf instanceof Buffer && !(iodb = ((Buffer)inOutBuf).isDirect())) + { + inOutOff = type.getOffset(inOutBuf); + inOutBuf = ((Buffer)inOutBuf).array(); + } + + if(op.uf == null) + { + reduceLocal(inBuf, idb, inOff, inOutBuf, iodb, inOutOff, + count, type.handle, op.handle); + } + else + { + reduceLocalUf(inBuf, idb, inOff, inOutBuf, iodb, inOutOff, + count, type.handle, type.baseType, op, op.handle); + } + } + + private static native void reduceLocal( + Object inBuf, boolean idb, int inOff, + Object inOutBuf, boolean iodb, int inOutOff, int count, + long type, long op) throws MPIException; + + private static native void reduceLocalUf( + Object inBuf, boolean idb, int inOff, + Object inOutBuf, boolean iodb, int inOutOff, int count, + long type, int baseType, Op jOp, long hOp) throws MPIException; + + /** + * Sets the print name for the communicator. + * @param name name for the communicator + * @throws MPIException Signals that an MPI exception of some sort has occurred. + */ + public final void setName(String name) throws MPIException + { + MPI.check(); + setName(handle, name); + } + + private native void setName(long handle, String name) throws MPIException; + + /** + * Return the print name from the communicator. + * @return name of the communicator + * @throws MPIException Signals that an MPI exception of some sort has occurred. + */ + public final String getName() throws MPIException + { + MPI.check(); + return getName(handle); + } + + private native String getName(long handle) throws MPIException; + + /** + * A helper method to convert an array of Datatypes to + * an array of longs (handles). + * @param dArray Array of Datatypes + * @return converted Datatypes + */ + private long[] convertTypeArray(Datatype[] dArray) { + long[] lArray = new long[dArray.length]; + + for(int i = 0; i < lArray.length; i++) { + if(dArray[i] != null) { + lArray[i] = dArray[i].handle; + } + } + return lArray; + } } // Comm diff --git a/ompi/mpi/java/java/Constant.java b/ompi/mpi/java/java/Constant.java index e60ada57d86..de1031c1d84 100644 --- a/ompi/mpi/java/java/Constant.java +++ b/ompi/mpi/java/java/Constant.java @@ -5,14 +5,16 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. + * Copyright (c) 2015 Los Alamos National Security, LLC. All rights + * reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -20,103 +22,100 @@ class Constant { - protected int THREAD_SINGLE, THREAD_FUNNELED, THREAD_SERIALIZED, - THREAD_MULTIPLE; + protected int THREAD_SINGLE, THREAD_FUNNELED, THREAD_SERIALIZED, THREAD_MULTIPLE; + + protected int GRAPH, DIST_GRAPH, CART; + protected int ANY_SOURCE, ANY_TAG; + protected int PROC_NULL; + protected int UNDEFINED; + protected int IDENT, CONGRUENT, SIMILAR, UNEQUAL; + protected int TAG_UB, HOST, IO, WTIME_IS_GLOBAL; - protected int GRAPH, DIST_GRAPH, CART; - protected int ANY_SOURCE, ANY_TAG; - protected int PROC_NULL; - protected int UNDEFINED; - protected int IDENT, CONGRUENT, SIMILAR, UNEQUAL; - protected int TAG_UB, HOST, IO, WTIME_IS_GLOBAL; + protected int APPNUM, LASTUSEDCODE, UNIVERSE_SIZE, WIN_BASE, WIN_SIZE, WIN_DISP_UNIT; - protected int APPNUM, LASTUSEDCODE, UNIVERSE_SIZE, WIN_BASE, WIN_SIZE, - WIN_DISP_UNIT; + protected int VERSION, SUBVERSION; + protected int ROOT, KEYVAL_INVALID, BSEND_OVERHEAD; + protected int MAX_OBJECT_NAME, MAX_PORT_NAME, MAX_DATAREP_STRING; + protected int MAX_INFO_KEY, MAX_INFO_VAL; + protected int ORDER_C, ORDER_FORTRAN; - protected int VERSION, SUBVERSION; - protected int ROOT, KEYVAL_INVALID, BSEND_OVERHEAD; - protected int MAX_OBJECT_NAME, MAX_PORT_NAME, MAX_DATAREP_STRING; - protected int MAX_INFO_KEY, MAX_INFO_VAL; - protected int ORDER_C, ORDER_FORTRAN; - protected int DISTRIBUTE_BLOCK, DISTRIBUTE_CYCLIC, DISTRIBUTE_NONE, - DISTRIBUTE_DFLT_DARG; + protected int DISTRIBUTE_BLOCK, DISTRIBUTE_CYCLIC, DISTRIBUTE_NONE, DISTRIBUTE_DFLT_DARG; - protected int MODE_CREATE, MODE_RDONLY, MODE_WRONLY, MODE_RDWR, - MODE_DELETE_ON_CLOSE, MODE_UNIQUE_OPEN, MODE_EXCL, - MODE_APPEND, MODE_SEQUENTIAL; + protected int MODE_CREATE, MODE_RDONLY, MODE_WRONLY, MODE_RDWR, + MODE_DELETE_ON_CLOSE, MODE_UNIQUE_OPEN, MODE_EXCL, + MODE_APPEND, MODE_SEQUENTIAL; - protected int DISPLACEMENT_CURRENT; - protected int SEEK_SET, SEEK_CUR, SEEK_END; + protected int DISPLACEMENT_CURRENT; + protected int SEEK_SET, SEEK_CUR, SEEK_END; - protected int MODE_NOCHECK, MODE_NOPRECEDE, MODE_NOPUT, MODE_NOSTORE, - MODE_NOSUCCEED; + protected int MODE_NOCHECK, MODE_NOPRECEDE, MODE_NOPUT, MODE_NOSTORE, MODE_NOSUCCEED; - protected int LOCK_EXCLUSIVE, LOCK_SHARED; + protected int LOCK_EXCLUSIVE, LOCK_SHARED; - // Error classes and codes - protected int SUCCESS; - protected int ERR_BUFFER; - protected int ERR_COUNT; - protected int ERR_TYPE; - protected int ERR_TAG; - protected int ERR_COMM; - protected int ERR_RANK; - protected int ERR_REQUEST; - protected int ERR_ROOT; - protected int ERR_GROUP; - protected int ERR_OP; - protected int ERR_TOPOLOGY; - protected int ERR_DIMS; - protected int ERR_ARG; - protected int ERR_UNKNOWN; - protected int ERR_TRUNCATE; - protected int ERR_OTHER; - protected int ERR_INTERN; - protected int ERR_IN_STATUS; - protected int ERR_PENDING; - protected int ERR_ACCESS; - protected int ERR_AMODE; - protected int ERR_ASSERT; - protected int ERR_BAD_FILE; - protected int ERR_BASE; - protected int ERR_CONVERSION; - protected int ERR_DISP; - protected int ERR_DUP_DATAREP; - protected int ERR_FILE_EXISTS; - protected int ERR_FILE_IN_USE; - protected int ERR_FILE; - protected int ERR_INFO_KEY; - protected int ERR_INFO_NOKEY; - protected int ERR_INFO_VALUE; - protected int ERR_INFO; - protected int ERR_IO; - protected int ERR_KEYVAL; - protected int ERR_LOCKTYPE; - protected int ERR_NAME; - protected int ERR_NO_MEM; - protected int ERR_NOT_SAME; - protected int ERR_NO_SPACE; - protected int ERR_NO_SUCH_FILE; - protected int ERR_PORT; - protected int ERR_QUOTA; - protected int ERR_READ_ONLY; - protected int ERR_RMA_CONFLICT; - protected int ERR_RMA_SYNC; - protected int ERR_SERVICE; - protected int ERR_SIZE; - protected int ERR_SPAWN; - protected int ERR_UNSUPPORTED_DATAREP; - protected int ERR_UNSUPPORTED_OPERATION; - protected int ERR_WIN; - protected int ERR_LASTCODE; - protected int ERR_SYSRESOURCE; + // Error classes and codes + protected int SUCCESS; + protected int ERR_BUFFER; + protected int ERR_COUNT; + protected int ERR_TYPE; + protected int ERR_TAG; + protected int ERR_COMM; + protected int ERR_RANK; + protected int ERR_REQUEST; + protected int ERR_ROOT; + protected int ERR_GROUP; + protected int ERR_OP; + protected int ERR_TOPOLOGY; + protected int ERR_DIMS; + protected int ERR_ARG; + protected int ERR_UNKNOWN; + protected int ERR_TRUNCATE; + protected int ERR_OTHER; + protected int ERR_INTERN; + protected int ERR_IN_STATUS; + protected int ERR_PENDING; + protected int ERR_ACCESS; + protected int ERR_AMODE; + protected int ERR_ASSERT; + protected int ERR_BAD_FILE; + protected int ERR_BASE; + protected int ERR_CONVERSION; + protected int ERR_DISP; + protected int ERR_DUP_DATAREP; + protected int ERR_FILE_EXISTS; + protected int ERR_FILE_IN_USE; + protected int ERR_FILE; + protected int ERR_INFO_KEY; + protected int ERR_INFO_NOKEY; + protected int ERR_INFO_VALUE; + protected int ERR_INFO; + protected int ERR_IO; + protected int ERR_KEYVAL; + protected int ERR_LOCKTYPE; + protected int ERR_NAME; + protected int ERR_NO_MEM; + protected int ERR_NOT_SAME; + protected int ERR_NO_SPACE; + protected int ERR_NO_SUCH_FILE; + protected int ERR_PORT; + protected int ERR_QUOTA; + protected int ERR_READ_ONLY; + protected int ERR_RMA_CONFLICT; + protected int ERR_RMA_SYNC; + protected int ERR_SERVICE; + protected int ERR_SIZE; + protected int ERR_SPAWN; + protected int ERR_UNSUPPORTED_DATAREP; + protected int ERR_UNSUPPORTED_OPERATION; + protected int ERR_WIN; + protected int ERR_LASTCODE; + protected int ERR_SYSRESOURCE; - protected Constant() - { - setConstant(); - } + protected Constant() + { + setConstant(); + } - private native void setConstant(); + private native void setConstant(); } // Constant diff --git a/ompi/mpi/java/java/Count.java b/ompi/mpi/java/java/Count.java new file mode 100644 index 00000000000..f0ccc9b552b --- /dev/null +++ b/ompi/mpi/java/java/Count.java @@ -0,0 +1,96 @@ +/* + * Copyright (c) 2015 Los Alamos National Security, LLC. All rights + * reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + * + * + * This file is almost a complete re-write for Open MPI compared to the + * original mpiJava package. Its license and copyright are listed below. + * See for more information. + * + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * + * File : Count.java + * Author : Nathaniel Graham + * Created : Thu Jul 29 17:13 2015 + */ + +package mpi; + +/** + * This class represents {@code MPI_Count}. + */ +public final class Count implements Comparable +{ + private long count; + + static + { + System.loadLibrary("mpi_java"); + initCount(); + } + + private static native void initCount(); + + public Count(long count) + { + this.count = count; + } + + /** + * Gets value associated with this Count object. + * @return Count value + */ + public long getCount() + { + return this.count; + } + + /** + * Sets the value associated with this Count object. + * @param count the value to set for this count object + */ + public void setCount(long count) + { + this.count = count; + } + + @Override + public boolean equals(Object obj) + { + if(obj instanceof Count) { + if(this.count == ((Count)obj).getCount()) { + return true; + } + } + return false; + } + + public int compareTo(Object obj) + { + if(obj instanceof Count) { + if(this.count - ((Count)obj).getCount() > 0) { + return 1; + } else if(this.count - ((Count)obj).getCount() == 0) { + return 0; + } + } + return -1; + } +} // Count diff --git a/ompi/mpi/java/java/Datatype.java b/ompi/mpi/java/java/Datatype.java index e72b96a23ab..a8e113d1cdb 100644 --- a/ompi/mpi/java/java/Datatype.java +++ b/ompi/mpi/java/java/Datatype.java @@ -5,35 +5,37 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. + * Copyright (c) 2015 Los Alamos National Security, LLC. All rights + * reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ - */ -/* + * + * * This file is almost a complete re-write for Open MPI compared to the * original mpiJava package. Its license and copyright are listed below. * See for more information. - */ -/* - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. -*/ -/* + * + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * * File : Datatype.java * Author : Sang Lim, Sung-Hoon Ko, Xinying Li, Bryan Carpenter * Created : Thu Apr 9 12:22:15 1998 @@ -50,528 +52,528 @@ /** * The {@code Datatype} class represents {@code MPI_Datatype} handles. */ -public final class Datatype implements Freeable -{ -protected long handle; -protected int baseType; -protected int baseSize; - -// Cache to avoid unnecessary jni calls. -private int lb, extent, trueLb, trueExtent; - -protected static final int NULL = 0; -protected static final int BYTE = 1; -protected static final int CHAR = 2; -protected static final int SHORT = 3; -protected static final int BOOLEAN = 4; -protected static final int INT = 5; -protected static final int LONG = 6; -protected static final int FLOAT = 7; -protected static final int DOUBLE = 8; -protected static final int PACKED = 9; -protected static final int INT2 = 10; -protected static final int SHORT_INT = 11; -protected static final int LONG_INT = 12; -protected static final int FLOAT_INT = 13; -protected static final int DOUBLE_INT = 14; -protected static final int FLOAT_COMPLEX = 15; -protected static final int DOUBLE_COMPLEX = 16; - -static -{ - init(); -} - -private static native void init(); - -/* - * Constructor used in static initializer of 'MPI'. - * - * (Called before MPI.Init(), so cannot make any native MPI calls.) - * - * (Initialization done in separate 'setBasic', so can create - * datatype objects for 'BYTE', etc in static initializers invoked before - * MPI.Init(), then initialize objects after MPI initialized.) - */ -protected Datatype() -{ -} - -protected void setBasic(int type) -{ - baseType = type; - handle = getDatatype(type); - baseSize = type == NULL ? 0 : getSize(handle); -} - -protected void setBasic(int type, Datatype oldType) -{ - baseType = oldType.baseType; - handle = getDatatype(type); - baseSize = oldType.baseSize; -} - -private static native long getDatatype(int type); - -/* - * Constructor used in 'create*' methods. - */ -private Datatype(Datatype oldType, long handle) -{ - baseType = oldType.baseType; - baseSize = oldType.baseSize; - this.handle = handle; -} - -/* - * Constructor used in 'create*' methods. - */ -private Datatype(int baseType, int baseSize, long handle) -{ - this.baseType = baseType; - this.baseSize = baseSize; - this.handle = handle; -} - -/** - * Returns the lower bound of a datatype. - *

Java binding of the MPI operation {@code MPI_TYPE_GET_EXTENT}. - * @return lower bound of datatype - * @throws MPIException - */ -public int getLb() throws MPIException -{ - if(extent == 0) - getLbExtent(); - - return lb; -} - -/** - * Returns the extent of a datatype. - *

Java binding of the MPI operation {@code MPI_TYPE_GET_EXTENT}. - * @return datatype extent - * @throws MPIException - */ -public int getExtent() throws MPIException -{ - if(extent == 0) - getLbExtent(); - - return extent; -} - -private void getLbExtent() throws MPIException -{ - MPI.check(); - int lbExt[] = new int[2]; - getLbExtent(handle, lbExt); - lb = lbExt[0] / baseSize; - extent = lbExt[1] / baseSize; -} - -private native void getLbExtent(long handle, int[] lbExt); - -/** - * Returns the true lower bound of a datatype. - *

Java binding of the MPI operation {@code MPI_TYPE_GET_TRUE_EXTENT}. - * @return lower bound of datatype - * @throws MPIException - */ -public int getTrueLb() throws MPIException -{ - if(trueExtent == 0) - getTrueLbExtent(); - - return trueLb; -} - -/** - * Returns the true extent of a datatype. - *

Java binding of the MPI operation {@code MPI_TYPE_GET_TRUE_EXTENT}. - * @return datatype true extent - * @throws MPIException - */ -public int getTrueExtent() throws MPIException -{ - if(trueExtent == 0) - getTrueLbExtent(); - - return trueExtent; -} - -private void getTrueLbExtent() throws MPIException -{ - MPI.check(); - int lbExt[] = new int[2]; - getTrueLbExtent(handle, lbExt); - trueLb = lbExt[0] / baseSize; - trueExtent = lbExt[1] / baseSize; -} - -private native void getTrueLbExtent(long handle, int[] lbExt); - -/** - * Returns the total size of a datatype - the number of buffer - * elements it represents. - *

Java binding of the MPI operation {@code MPI_TYPE_SIZE}. - * @return datatype size - * @throws MPIException - */ -public int getSize() throws MPIException -{ - MPI.check(); - return getSize(handle) / baseSize; -} - -private native int getSize(long type); - -/** - * Commits a derived datatype. - * Java binding of the MPI operation {@code MPI_TYPE_COMMIT}. - * @throws MPIException - */ -public void commit() throws MPIException -{ - MPI.check(); - commit(handle); -} - -private native void commit(long type); - -/** - * Frees the datatype. - *

Java binding of the MPI operation {@code MPI_TYPE_FREE}. - * @throws MPIException - */ -@Override public void free() throws MPIException -{ - MPI.check(); - handle = free(handle); -} - -private native long free(long type) throws MPIException; - -/** - * Returns {@code true} if this datatype is MPI_DATATYPE_NULL. - * @return {@code true} if this datatype is MPI_DATATYPE_NULL - */ -public boolean isNull() -{ - return handle == MPI.DATATYPE_NULL.handle; -} - -/** - * Java binding of {@code MPI_TYPE_DUP}. - *

It is recommended to use {@link #dup} instead of {@link #clone} - * because the last can't throw an {@link mpi.MPIException}. - * @return new datatype - */ -@Override public Datatype clone() -{ - try - { - return dup(); - } - catch(MPIException e) - { - throw new RuntimeException(e.getMessage()); - } -} - -/** - * Java binding of {@code MPI_TYPE_DUP}. - * @return new datatype - * @throws MPIException - */ -public Datatype dup() throws MPIException -{ - MPI.check(); - return new Datatype(this, dup(handle)); -} - -private native long dup(long type) throws MPIException; - -/** - * Construct new datatype representing replication of old datatype into - * contiguous locations. - *

Java binding of the MPI operation {@code MPI_TYPE_CONTIGUOUS}. - *

The base type of the new datatype is the same as the base type of - * {@code oldType}. - * @param count replication count - * @param oldType old datatype - * @return new datatype - * @throws MPIException - */ -public static Datatype createContiguous(int count, Datatype oldType) - throws MPIException -{ - MPI.check(); - return new Datatype(oldType, getContiguous(count, oldType.handle)); -} - -private static native long getContiguous(int count, long oldType); - -/** - * Construct new datatype representing replication of old datatype into - * locations that consist of equally spaced blocks. - *

Java binding of the MPI operation {@code MPI_TYPE_VECTOR}. - *

The base type of the new datatype is the same as the base type of - * {@code oldType}. - * @param count number of blocks - * @param blockLength number of elements in each block - * @param stride number of elements between start of each block - * @param oldType old datatype - * @return new datatype - * @throws MPIException - */ -public static Datatype createVector(int count, int blockLength, - int stride, Datatype oldType) - throws MPIException -{ - MPI.check(); - long handle = getVector(count, blockLength, stride, oldType.handle); - return new Datatype(oldType, handle); -} - -private static native long getVector( - int count, int blockLength, int stride, long oldType) - throws MPIException; - -/** - * Identical to {@code createVector} except that the stride is expressed - * directly in terms of the buffer index, rather than the units of - * the old type. - *

Java binding of the MPI operation {@code MPI_TYPE_HVECTOR}. - * @param count number of blocks - * @param blockLength number of elements in each - * @param stride number of bytes between start of each block - * @param oldType old datatype - * @return new datatype - * @throws MPIException - */ -public static Datatype createHVector(int count, int blockLength, - int stride, Datatype oldType) - throws MPIException -{ - MPI.check(); - long handle = getHVector(count, blockLength, stride, oldType.handle); - return new Datatype(oldType, handle); -} - -private static native long getHVector( - int count, int blockLength, int stride, long oldType) - throws MPIException; - -/** - * Construct new datatype representing replication of old datatype into - * a sequence of blocks where each block can contain a different number - * of copies and have a different displacement. - *

Java binding of the MPI operation {@code MPI_TYPE_INDEXED}. - *

The number of blocks is taken to be size of the {@code blockLengths} - * argument. The second argument, {@code displacements}, should be the - * same size. The base type of the new datatype is the same as the base - * type of {@code oldType}. - * @param blockLengths number of elements per block - * @param displacements displacement of each block in units of old type - * @param oldType old datatype - * @return new datatype - * @throws MPIException - */ -public static Datatype createIndexed(int[] blockLengths, - int[] displacements, Datatype oldType) - throws MPIException -{ - MPI.check(); - long handle = getIndexed(blockLengths, displacements, oldType.handle); - return new Datatype(oldType, handle); -} - -private static native long getIndexed( - int[] blockLengths, int[] displacements, long oldType) - throws MPIException; - -/** - * Identical to {@code createIndexed} except that the displacements are - * expressed directly in terms of the buffer index, rather than the - * units of the old type. - *

Java binding of the MPI operation {@code MPI_TYPE_HINDEXED}. - * @param blockLengths number of elements per block - * @param displacements byte displacement in buffer for each block - * @param oldType old datatype - * @return new datatype - * @throws MPIException - */ -public static Datatype createHIndexed(int[] blockLengths, - int[] displacements, Datatype oldType) - throws MPIException -{ - MPI.check(); - long handle = getHIndexed(blockLengths, displacements, oldType.handle); - return new Datatype(oldType, handle); -} - -private static native long getHIndexed( - int[] blockLengths, int[] displacements, long oldType) - throws MPIException; - -/** - * The most general type constructor. - *

Java binding of the MPI operation {@code MPI_TYPE_STRUCT}. - *

The number of blocks is taken to be size of the {@code blockLengths} - * argument. The second and third arguments, {@code displacements}, - * and {@code types}, should be the same size. - * @param blockLengths number of elements in each block - * @param displacements byte displacement of each block - * @param types type of elements in each block - * @return new datatype - * @throws MPIException - */ -public static Datatype createStruct(int[] blockLengths, - int[] displacements, Datatype[] types) - throws MPIException -{ - MPI.check(); - long handle = getStruct(blockLengths, displacements, types); - return new Datatype(MPI.BYTE, handle); -} - -private static native long getStruct( - int[] blockLengths, int[] displacements, Datatype[] types) - throws MPIException; - -/* - * JMS add proper documentation here - * JMS int != Aint! This needs to be fixed throughout. - */ -/** - * Create a datatype with a new lower bound and extent from an existing - * datatype. - *

Java binding of the MPI operation {@code MPI_TYPE_CREATE_RESIZED}. - * @param oldType input datatype - * @param lb new lower bound of datatype (address integer) - * @param extent new extent of datatype (address integer) - * @return new datatype - * @throws MPIException - */ -public static Datatype createResized(Datatype oldType, int lb, int extent) - throws MPIException -{ - MPI.check(); - long handle = getResized(oldType.handle, lb, extent); - return new Datatype(oldType, handle); -} - -private static native long getResized(long oldType, int lb, int extent); - -/** - * Sets the print name for the datatype. - * @param name name for the datatype - * @throws MPIException - */ -public void setName(String name) throws MPIException -{ - MPI.check(); - setName(handle, name); -} - -private native void setName(long handle, String name) throws MPIException; - -/** - * Return the print name from the datatype. - * @return name of the datatype - * @throws MPIException - */ -public String getName() throws MPIException -{ - MPI.check(); - return getName(handle); -} - -private native String getName(long handle) throws MPIException; - -/** - * Create a new attribute key. - *

Java binding of the MPI operation {@code MPI_TYPE_CREATE_KEYVAL}. - * @return attribute key for future access - * @throws MPIException - */ -public static int createKeyval() throws MPIException -{ - MPI.check(); - return createKeyval_jni(); -} - -private static native int createKeyval_jni() throws MPIException; - -/** - * Frees an attribute key. - *

Java binding of the MPI operation {@code MPI_TYPE_FREE_KEYVAL}. - * @param keyval attribute key - * @throws MPIException - */ -public static void freeKeyval(int keyval) throws MPIException -{ - MPI.check(); - freeKeyval_jni(keyval); -} - -private static native void freeKeyval_jni(int keyval) throws MPIException; - -/** - * Stores attribute value associated with a key. - *

Java binding of the MPI operation {@code MPI_TYPE_SET_ATTR}. - * @param keyval attribute key - * @param value attribute value - * @throws MPIException - */ -public void setAttr(int keyval, Object value) throws MPIException -{ - MPI.check(); - setAttr(handle, keyval, MPI.attrSet(value)); -} - -private native void setAttr(long type, int keyval, byte[] value) - throws MPIException; - -/** - * Retrieves attribute value by key. - *

Java binding of the MPI operation {@code MPI_TYPE_GET_ATTR}. - * @param keyval attribute key - * @return attribute value or null if no attribute is associated with the key. - * @throws MPIException - */ -public Object getAttr(int keyval) throws MPIException -{ - MPI.check(); - Object obj = getAttr(handle, keyval); - return obj instanceof byte[] ? MPI.attrGet((byte[])obj) : obj; -} - -private native Object getAttr(long type, int keyval) throws MPIException; - -/** - * Deletes an attribute value associated with a key. - *

Java binding of the MPI operation {@code MPI_TYPE_DELETE_ATTR}. - * @param keyval attribute key - * @throws MPIException - */ -public void deleteAttr(int keyval) throws MPIException -{ - MPI.check(); - deleteAttr(handle, keyval); -} - -private native void deleteAttr(long type, int keyval) throws MPIException; - -/** - * Gets the offset of a buffer in bytes. - * @param buffer buffer - * @return offset in bytes - */ -protected int getOffset(Object buffer) -{ - return baseSize * ((Buffer)buffer).arrayOffset(); -} +public final class Datatype implements Freeable, Cloneable +{ + protected long handle; + protected int baseType; + protected int baseSize; + + // Cache to avoid unnecessary jni calls. + private int lb, extent, trueLb, trueExtent; + + protected static final int NULL = 0; + protected static final int BYTE = 1; + protected static final int CHAR = 2; + protected static final int SHORT = 3; + protected static final int BOOLEAN = 4; + protected static final int INT = 5; + protected static final int LONG = 6; + protected static final int FLOAT = 7; + protected static final int DOUBLE = 8; + protected static final int PACKED = 9; + protected static final int INT2 = 10; + protected static final int SHORT_INT = 11; + protected static final int LONG_INT = 12; + protected static final int FLOAT_INT = 13; + protected static final int DOUBLE_INT = 14; + protected static final int FLOAT_COMPLEX = 15; + protected static final int DOUBLE_COMPLEX = 16; + + static + { + init(); + } + + private static native void init(); + + /* + * Constructor used in static initializer of 'MPI'. + * + * (Called before MPI.Init(), so cannot make any native MPI calls.) + * + * (Initialization done in separate 'setBasic', so can create + * datatype objects for 'BYTE', etc in static initializers invoked before + * MPI.Init(), then initialize objects after MPI initialized.) + */ + protected Datatype() + { + } + + protected void setBasic(int type) + { + baseType = type; + handle = getDatatype(type); + baseSize = type == NULL ? 0 : getSize(handle); + } + + protected void setBasic(int type, Datatype oldType) + { + baseType = oldType.baseType; + handle = getDatatype(type); + baseSize = oldType.baseSize; + } + + private static native long getDatatype(int type); + + /* + * Constructor used in 'create*' methods. + */ + private Datatype(Datatype oldType, long handle) + { + baseType = oldType.baseType; + baseSize = oldType.baseSize; + this.handle = handle; + } + + /* + * Constructor used in 'create*' methods. + */ + private Datatype(int baseType, int baseSize, long handle) + { + this.baseType = baseType; + this.baseSize = baseSize; + this.handle = handle; + } + + /** + * Returns the lower bound of a datatype. + *

Java binding of the MPI operation {@code MPI_TYPE_GET_EXTENT}. + * @return lower bound of datatype + * @throws MPIException Signals that an MPI exception of some sort has occurred. + */ + public int getLb() throws MPIException + { + if(extent == 0) + getLbExtent(); + + return lb; + } + + /** + * Returns the extent of a datatype. + *

Java binding of the MPI operation {@code MPI_TYPE_GET_EXTENT}. + * @return datatype extent + * @throws MPIException Signals that an MPI exception of some sort has occurred. + */ + public int getExtent() throws MPIException + { + if(extent == 0) + getLbExtent(); + + return extent; + } + + private void getLbExtent() throws MPIException + { + MPI.check(); + int lbExt[] = new int[2]; + getLbExtent(handle, lbExt); + lb = lbExt[0] / baseSize; + extent = lbExt[1] / baseSize; + } + + private native void getLbExtent(long handle, int[] lbExt); + + /** + * Returns the true lower bound of a datatype. + *

Java binding of the MPI operation {@code MPI_TYPE_GET_TRUE_EXTENT}. + * @return lower bound of datatype + * @throws MPIException Signals that an MPI exception of some sort has occurred. + */ + public int getTrueLb() throws MPIException + { + if(trueExtent == 0) + getTrueLbExtent(); + + return trueLb; + } + + /** + * Returns the true extent of a datatype. + *

Java binding of the MPI operation {@code MPI_TYPE_GET_TRUE_EXTENT}. + * @return datatype true extent + * @throws MPIException Signals that an MPI exception of some sort has occurred. + */ + public int getTrueExtent() throws MPIException + { + if(trueExtent == 0) + getTrueLbExtent(); + + return trueExtent; + } + + private void getTrueLbExtent() throws MPIException + { + MPI.check(); + int lbExt[] = new int[2]; + getTrueLbExtent(handle, lbExt); + trueLb = lbExt[0] / baseSize; + trueExtent = lbExt[1] / baseSize; + } + + private native void getTrueLbExtent(long handle, int[] lbExt); + + /** + * Returns the total size of a datatype - the number of buffer + * elements it represents. + *

Java binding of the MPI operation {@code MPI_TYPE_SIZE}. + * @return datatype size + * @throws MPIException Signals that an MPI exception of some sort has occurred. + */ + public int getSize() throws MPIException + { + MPI.check(); + return getSize(handle) / baseSize; + } + + private native int getSize(long type); + + /** + * Commits a derived datatype. + * Java binding of the MPI operation {@code MPI_TYPE_COMMIT}. + * @throws MPIException Signals that an MPI exception of some sort has occurred. + */ + public void commit() throws MPIException + { + MPI.check(); + commit(handle); + } + + private native void commit(long type); + + /** + * Frees the datatype. + *

Java binding of the MPI operation {@code MPI_TYPE_FREE}. + * @throws MPIException Signals that an MPI exception of some sort has occurred. + */ + @Override public void free() throws MPIException + { + MPI.check(); + handle = free(handle); + } + + private native long free(long type) throws MPIException; + + /** + * Returns {@code true} if this datatype is MPI_DATATYPE_NULL. + * @return {@code true} if this datatype is MPI_DATATYPE_NULL + */ + public boolean isNull() + { + return handle == MPI.DATATYPE_NULL.handle; + } + + /** + * Java binding of {@code MPI_TYPE_DUP}. + *

It is recommended to use {@link #dup} instead of {@link #clone} + * because the last can't throw an {@link mpi.MPIException}. + * @return new datatype + */ + @Override public Datatype clone() + { + try + { + return dup(); + } + catch(MPIException e) + { + throw new RuntimeException(e.getMessage()); + } + } + + /** + * Java binding of {@code MPI_TYPE_DUP}. + * @return new datatype + * @throws MPIException Signals that an MPI exception of some sort has occurred. + */ + public Datatype dup() throws MPIException + { + MPI.check(); + return new Datatype(this, dup(handle)); + } + + private native long dup(long type) throws MPIException; + + /** + * Construct new datatype representing replication of old datatype into + * contiguous locations. + *

Java binding of the MPI operation {@code MPI_TYPE_CONTIGUOUS}. + *

The base type of the new datatype is the same as the base type of + * {@code oldType}. + * @param count replication count + * @param oldType old datatype + * @return new datatype + * @throws MPIException Signals that an MPI exception of some sort has occurred. + */ + public static Datatype createContiguous(int count, Datatype oldType) + throws MPIException + { + MPI.check(); + return new Datatype(oldType, getContiguous(count, oldType.handle)); + } + + private static native long getContiguous(int count, long oldType); + + /** + * Construct new datatype representing replication of old datatype into + * locations that consist of equally spaced blocks. + *

Java binding of the MPI operation {@code MPI_TYPE_VECTOR}. + *

The base type of the new datatype is the same as the base type of + * {@code oldType}. + * @param count number of blocks + * @param blockLength number of elements in each block + * @param stride number of elements between start of each block + * @param oldType old datatype + * @return new datatype + * @throws MPIException Signals that an MPI exception of some sort has occurred. + */ + public static Datatype createVector(int count, int blockLength, + int stride, Datatype oldType) + throws MPIException + { + MPI.check(); + long handle = getVector(count, blockLength, stride, oldType.handle); + return new Datatype(oldType, handle); + } + + private static native long getVector( + int count, int blockLength, int stride, long oldType) + throws MPIException; + + /** + * Identical to {@code createVector} except that the stride is expressed + * directly in terms of the buffer index, rather than the units of + * the old type. + *

Java binding of the MPI operation {@code MPI_TYPE_CREATE_HVECTOR}. + * @param count number of blocks + * @param blockLength number of elements in each + * @param stride number of bytes between start of each block + * @param oldType old datatype + * @return new datatype + * @throws MPIException Signals that an MPI exception of some sort has occurred. + */ + public static Datatype createHVector(int count, int blockLength, + int stride, Datatype oldType) + throws MPIException + { + MPI.check(); + long handle = getHVector(count, blockLength, stride, oldType.handle); + return new Datatype(oldType, handle); + } + + private static native long getHVector( + int count, int blockLength, int stride, long oldType) + throws MPIException; + + /** + * Construct new datatype representing replication of old datatype into + * a sequence of blocks where each block can contain a different number + * of copies and have a different displacement. + *

Java binding of the MPI operation {@code MPI_TYPE_INDEXED}. + *

The number of blocks is taken to be size of the {@code blockLengths} + * argument. The second argument, {@code displacements}, should be the + * same size. The base type of the new datatype is the same as the base + * type of {@code oldType}. + * @param blockLengths number of elements per block + * @param displacements displacement of each block in units of old type + * @param oldType old datatype + * @return new datatype + * @throws MPIException Signals that an MPI exception of some sort has occurred. + */ + public static Datatype createIndexed(int[] blockLengths, + int[] displacements, Datatype oldType) + throws MPIException + { + MPI.check(); + long handle = getIndexed(blockLengths, displacements, oldType.handle); + return new Datatype(oldType, handle); + } + + private static native long getIndexed( + int[] blockLengths, int[] displacements, long oldType) + throws MPIException; + + /** + * Identical to {@code createIndexed} except that the displacements are + * expressed directly in terms of the buffer index, rather than the + * units of the old type. + *

Java binding of the MPI operation {@code MPI_TYPE_CREATE_HINDEXED}. + * @param blockLengths number of elements per block + * @param displacements byte displacement in buffer for each block + * @param oldType old datatype + * @return new datatype + * @throws MPIException Signals that an MPI exception of some sort has occurred. + */ + public static Datatype createHIndexed(int[] blockLengths, + int[] displacements, Datatype oldType) + throws MPIException + { + MPI.check(); + long handle = getHIndexed(blockLengths, displacements, oldType.handle); + return new Datatype(oldType, handle); + } + + private static native long getHIndexed( + int[] blockLengths, int[] displacements, long oldType) + throws MPIException; + + /** + * The most general type constructor. + *

Java binding of the MPI operation {@code MPI_TYPE_STRUCT}. + *

The number of blocks is taken to be size of the {@code blockLengths} + * argument. The second and third arguments, {@code displacements}, + * and {@code types}, should be the same size. + * @param blockLengths number of elements in each block + * @param displacements byte displacement of each block + * @param types type of elements in each block + * @return new datatype + * @throws MPIException Signals that an MPI exception of some sort has occurred. + */ + public static Datatype createStruct(int[] blockLengths, + int[] displacements, Datatype[] types) + throws MPIException + { + MPI.check(); + long handle = getStruct(blockLengths, displacements, types); + return new Datatype(MPI.BYTE, handle); + } + + private static native long getStruct( + int[] blockLengths, int[] displacements, Datatype[] types) + throws MPIException; + + /* + * JMS add proper documentation here + * JMS int != Aint! This needs to be fixed throughout. + */ + /** + * Create a datatype with a new lower bound and extent from an existing + * datatype. + *

Java binding of the MPI operation {@code MPI_TYPE_CREATE_RESIZED}. + * @param oldType input datatype + * @param lb new lower bound of datatype (address integer) + * @param extent new extent of datatype (address integer) + * @return new datatype + * @throws MPIException Signals that an MPI exception of some sort has occurred. + */ + public static Datatype createResized(Datatype oldType, int lb, int extent) + throws MPIException + { + MPI.check(); + long handle = getResized(oldType.handle, lb, extent); + return new Datatype(oldType, handle); + } + + private static native long getResized(long oldType, int lb, int extent); + + /** + * Sets the print name for the datatype. + * @param name name for the datatype + * @throws MPIException Signals that an MPI exception of some sort has occurred. + */ + public void setName(String name) throws MPIException + { + MPI.check(); + setName(handle, name); + } + + private native void setName(long handle, String name) throws MPIException; + + /** + * Return the print name from the datatype. + * @return name of the datatype + * @throws MPIException Signals that an MPI exception of some sort has occurred. + */ + public String getName() throws MPIException + { + MPI.check(); + return getName(handle); + } + + private native String getName(long handle) throws MPIException; + + /** + * Create a new attribute key. + *

Java binding of the MPI operation {@code MPI_TYPE_CREATE_KEYVAL}. + * @return attribute key for future access + * @throws MPIException Signals that an MPI exception of some sort has occurred. + */ + public static int createKeyval() throws MPIException + { + MPI.check(); + return createKeyval_jni(); + } + + private static native int createKeyval_jni() throws MPIException; + + /** + * Frees an attribute key. + *

Java binding of the MPI operation {@code MPI_TYPE_FREE_KEYVAL}. + * @param keyval attribute key + * @throws MPIException Signals that an MPI exception of some sort has occurred. + */ + public static void freeKeyval(int keyval) throws MPIException + { + MPI.check(); + freeKeyval_jni(keyval); + } + + private static native void freeKeyval_jni(int keyval) throws MPIException; + + /** + * Stores attribute value associated with a key. + *

Java binding of the MPI operation {@code MPI_TYPE_SET_ATTR}. + * @param keyval attribute key + * @param value attribute value + * @throws MPIException Signals that an MPI exception of some sort has occurred. + */ + public void setAttr(int keyval, Object value) throws MPIException + { + MPI.check(); + setAttr(handle, keyval, MPI.attrSet(value)); + } + + private native void setAttr(long type, int keyval, byte[] value) + throws MPIException; + + /** + * Retrieves attribute value by key. + *

Java binding of the MPI operation {@code MPI_TYPE_GET_ATTR}. + * @param keyval attribute key + * @return attribute value or null if no attribute is associated with the key. + * @throws MPIException Signals that an MPI exception of some sort has occurred. + */ + public Object getAttr(int keyval) throws MPIException + { + MPI.check(); + Object obj = getAttr(handle, keyval); + return obj instanceof byte[] ? MPI.attrGet((byte[])obj) : obj; + } + + private native Object getAttr(long type, int keyval) throws MPIException; + + /** + * Deletes an attribute value associated with a key. + *

Java binding of the MPI operation {@code MPI_TYPE_DELETE_ATTR}. + * @param keyval attribute key + * @throws MPIException Signals that an MPI exception of some sort has occurred. + */ + public void deleteAttr(int keyval) throws MPIException + { + MPI.check(); + deleteAttr(handle, keyval); + } + + private native void deleteAttr(long type, int keyval) throws MPIException; + + /** + * Gets the offset of a buffer in bytes. + * @param buffer buffer + * @return offset in bytes + */ + protected int getOffset(Object buffer) + { + return baseSize * ((Buffer)buffer).arrayOffset(); + } } // Datatype diff --git a/ompi/mpi/java/java/DistGraphNeighbors.java b/ompi/mpi/java/java/DistGraphNeighbors.java index b8dc8e9391b..e545b6f3a1c 100644 --- a/ompi/mpi/java/java/DistGraphNeighbors.java +++ b/ompi/mpi/java/java/DistGraphNeighbors.java @@ -5,14 +5,16 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. + * Copyright (c) 2015 Los Alamos National Security, LLC. All rights + * reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -23,85 +25,85 @@ */ public final class DistGraphNeighbors { -private final int[] sources, sourceWeights, destinations, destWeights; -private final boolean weighted; + private final int[] sources, sourceWeights, destinations, destWeights; + private final boolean weighted; -protected DistGraphNeighbors( - int[] sources, int[] sourceWeights, - int[] destinations, int[] destWeights, boolean weighted) -{ - this.sources = sources; - this.sourceWeights = sourceWeights; - this.destinations = destinations; - this.destWeights = destWeights; - this.weighted = weighted; -} + protected DistGraphNeighbors( + int[] sources, int[] sourceWeights, + int[] destinations, int[] destWeights, boolean weighted) + { + this.sources = sources; + this.sourceWeights = sourceWeights; + this.destinations = destinations; + this.destWeights = destWeights; + this.weighted = weighted; + } -/** - * Gets the number of edges into this process. - * @return number of edges into this process - */ -public int getInDegree() -{ - return sources.length; -} + /** + * Gets the number of edges into this process. + * @return number of edges into this process + */ + public int getInDegree() + { + return sources.length; + } -/** - * Gets the number of edges out of this process. - * @return number of edges out of this process - */ -public int getOutDegree() -{ - return destinations.length; -} + /** + * Gets the number of edges out of this process. + * @return number of edges out of this process + */ + public int getOutDegree() + { + return destinations.length; + } -/** - * Returns false if {@code MPI_UNWEIGHTED} was supplied during creation. - * @return false if {@code MPI_UNWEIGHTED} was supplied, true otherwise - */ -public boolean isWeighted() -{ - return weighted; -} + /** + * Returns false if {@code MPI_UNWEIGHTED} was supplied during creation. + * @return false if {@code MPI_UNWEIGHTED} was supplied, true otherwise + */ + public boolean isWeighted() + { + return weighted; + } -/** - * Gets a process for which the calling processs is a destination. - * @param i source index - * @return process for which the calling processs is a destination - */ -public int getSource(int i) -{ - return sources[i]; -} + /** + * Gets a process for which the calling processs is a destination. + * @param i source index + * @return process for which the calling processs is a destination + */ + public int getSource(int i) + { + return sources[i]; + } -/** - * Gets the weight of an edge into the calling process. - * @param i source index - * @return weight of the edge into the calling process - */ -public int getSourceWeight(int i) -{ - return sourceWeights[i]; -} + /** + * Gets the weight of an edge into the calling process. + * @param i source index + * @return weight of the edge into the calling process + */ + public int getSourceWeight(int i) + { + return sourceWeights[i]; + } -/** - * Gets a process for which the calling process is a source - * @param i destination index - * @return process for which the calling process is a source - */ -public int getDestination(int i) -{ - return destinations[i]; -} + /** + * Gets a process for which the calling process is a source + * @param i destination index + * @return process for which the calling process is a source + */ + public int getDestination(int i) + { + return destinations[i]; + } -/** - * Gets the weight of an edge out of the calling process. - * @param i destination index - * @return weight of an edge out of the calling process - */ -public int getDestinationWeight(int i) -{ - return destWeights[i]; -} + /** + * Gets the weight of an edge out of the calling process. + * @param i destination index + * @return weight of an edge out of the calling process + */ + public int getDestinationWeight(int i) + { + return destWeights[i]; + } } // DistGraphNeighbors diff --git a/ompi/mpi/java/java/DoubleComplex.java b/ompi/mpi/java/java/DoubleComplex.java index e9497e218ba..3a75e3e6866 100644 --- a/ompi/mpi/java/java/DoubleComplex.java +++ b/ompi/mpi/java/java/DoubleComplex.java @@ -5,14 +5,16 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. + * Copyright (c) 2015 Los Alamos National Security, LLC. All rights + * reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -25,124 +27,124 @@ */ public final class DoubleComplex { -private final int offset; -private final DoubleBuffer buffer; - -private DoubleComplex(DoubleBuffer buffer, int index) -{ - this.buffer = buffer; - this.offset = index * 2; -} - -/** - * Wraps a complex number stored in a buffer - * @param buffer buffer - * @return complex number - */ -public static DoubleComplex get(DoubleBuffer buffer) -{ - return new DoubleComplex(buffer, 0); -} - -/** - * Wraps the complex number at the specified position - * of an array of complex numbers stored in a buffer. - * @param buffer buffer - * @param index index - * @return complex number - */ -public static DoubleComplex get(DoubleBuffer buffer, int index) -{ - return new DoubleComplex(buffer, index); -} - -/** - * Wraps a complex number stored in the first two values of an array. - * @param array array - * @return complex number - */ -public static DoubleComplex get(double[] array) -{ - return new DoubleComplex(DoubleBuffer.wrap(array), 0); -} - -/** - * Wraps the complex number at the specified position of - * an array of complex numbers stored in an array of doubles. - * @param array array - * @param index index - * @return complex number - */ -public static DoubleComplex get(double[] array, int index) -{ - return new DoubleComplex(DoubleBuffer.wrap(array), index); -} - -/** - * Wraps a complex number stored in a buffer - * @param buffer buffer - * @return complex number - */ -public static DoubleComplex get(ByteBuffer buffer) -{ - return new DoubleComplex(buffer.asDoubleBuffer(), 0); -} - -/** - * Wraps the complex number at the specified position - * of an array of complex numbers stored in a buffer. - * @param buffer buffer - * @param index index - * @return complex number - */ -public static DoubleComplex get(ByteBuffer buffer, int index) -{ - return new DoubleComplex(buffer.asDoubleBuffer(), index); -} - -/** - * Gets the real value. - * @return real value - */ -public double getReal() -{ - return buffer.get(offset); -} - -/** - * Gets the imaginary value. - * @return imaginary value. - */ -public double getImag() -{ - return buffer.get(offset + 1); -} - -/** - * Puts the real value. - * @param real real value - */ -public void putReal(double real) -{ - buffer.put(offset, real); -} - -/** - * Puts the imaginary value. - * @param imag imaginary value - */ -public void putImag(double imag) -{ - buffer.put(offset + 1, imag); -} - -/** - * Gets the buffer where the complex number is stored. - * @return buffer where the complex number is stored - */ -public DoubleBuffer getBuffer() -{ - return offset == 0 ? buffer : MPI.slice(buffer, offset); -} + private final int offset; + private final DoubleBuffer buffer; + + private DoubleComplex(DoubleBuffer buffer, int index) + { + this.buffer = buffer; + this.offset = index * 2; + } + + /** + * Wraps a complex number stored in a buffer + * @param buffer buffer + * @return complex number + */ + public static DoubleComplex get(DoubleBuffer buffer) + { + return new DoubleComplex(buffer, 0); + } + + /** + * Wraps the complex number at the specified position + * of an array of complex numbers stored in a buffer. + * @param buffer buffer + * @param index index + * @return complex number + */ + public static DoubleComplex get(DoubleBuffer buffer, int index) + { + return new DoubleComplex(buffer, index); + } + + /** + * Wraps a complex number stored in the first two values of an array. + * @param array array + * @return complex number + */ + public static DoubleComplex get(double[] array) + { + return new DoubleComplex(DoubleBuffer.wrap(array), 0); + } + + /** + * Wraps the complex number at the specified position of + * an array of complex numbers stored in an array of doubles. + * @param array array + * @param index index + * @return complex number + */ + public static DoubleComplex get(double[] array, int index) + { + return new DoubleComplex(DoubleBuffer.wrap(array), index); + } + + /** + * Wraps a complex number stored in a buffer + * @param buffer buffer + * @return complex number + */ + public static DoubleComplex get(ByteBuffer buffer) + { + return new DoubleComplex(buffer.asDoubleBuffer(), 0); + } + + /** + * Wraps the complex number at the specified position + * of an array of complex numbers stored in a buffer. + * @param buffer buffer + * @param index index + * @return complex number + */ + public static DoubleComplex get(ByteBuffer buffer, int index) + { + return new DoubleComplex(buffer.asDoubleBuffer(), index); + } + + /** + * Gets the real value. + * @return real value + */ + public double getReal() + { + return buffer.get(offset); + } + + /** + * Gets the imaginary value. + * @return imaginary value. + */ + public double getImag() + { + return buffer.get(offset + 1); + } + + /** + * Puts the real value. + * @param real real value + */ + public void putReal(double real) + { + buffer.put(offset, real); + } + + /** + * Puts the imaginary value. + * @param imag imaginary value + */ + public void putImag(double imag) + { + buffer.put(offset + 1, imag); + } + + /** + * Gets the buffer where the complex number is stored. + * @return buffer where the complex number is stored + */ + public DoubleBuffer getBuffer() + { + return offset == 0 ? buffer : MPI.slice(buffer, offset); + } } // DoubleComplex diff --git a/ompi/mpi/java/java/DoubleInt.java b/ompi/mpi/java/java/DoubleInt.java index 18b5712ea67..ac75ffb5e7a 100644 --- a/ompi/mpi/java/java/DoubleInt.java +++ b/ompi/mpi/java/java/DoubleInt.java @@ -5,14 +5,16 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. + * Copyright (c) 2015 Los Alamos National Security, LLC. All rights + * reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -23,89 +25,91 @@ */ public final class DoubleInt extends Struct { -private final int iOff, iSize; + private final int iOff, iSize; -/** - * The struct object will be created only in MPI class. - * @see MPI#doubleInt - */ -protected DoubleInt(int intOff, int intSize) -{ - int dOff = addDouble(); - assert dOff == 0; + /** + * The struct object will be created only in MPI class. + * @param intOff int offset + * @param intSize int size + * @see MPI#doubleInt + */ + protected DoubleInt(int intOff, int intSize) + { + int dOff = addDouble(); + assert dOff == 0; - iSize = intSize; - setOffset(intOff); + iSize = intSize; + setOffset(intOff); - switch(iSize) - { - case 4: iOff = addInt(); break; - case 8: iOff = addLong(); break; - default: throw new AssertionError("Unsupported int size: "+ iSize); - } + switch(iSize) + { + case 4: iOff = addInt(); break; + case 8: iOff = addLong(); break; + default: throw new AssertionError("Unsupported int size: "+ iSize); + } - assert(intOff == iOff); -} + assert(intOff == iOff); + } -/** - * Creates a Data object. - * @return new Data object. - */ -@Override protected DoubleInt.Data newData() -{ - return new DoubleInt.Data(); -} + /** + * Creates a Data object. + * @return new Data object. + */ + @Override protected DoubleInt.Data newData() + { + return new DoubleInt.Data(); + } -/** - * Class for reading/writing data in a struct stored in a byte buffer. - */ -public final class Data extends Struct.Data -{ - /** - * Gets the double value. - * @return double value - */ - public double getValue() - { - return getDouble(0); - } + /** + * Class for reading/writing data in a struct stored in a byte buffer. + */ + public final class Data extends Struct.Data + { + /** + * Gets the double value. + * @return double value + */ + public double getValue() + { + return getDouble(0); + } - /** - * Gets the int value. - * @return int value - */ - public int getIndex() - { - switch(iSize) - { - case 4: return getInt(iOff); - case 8: return (int)getLong(iOff); - default: throw new AssertionError(); - } - } + /** + * Gets the int value. + * @return int value + */ + public int getIndex() + { + switch(iSize) + { + case 4: return getInt(iOff); + case 8: return (int)getLong(iOff); + default: throw new AssertionError(); + } + } - /** - * Puts the double value. - * @param v double value - */ - public void putValue(double v) - { - putDouble(0, v); - } + /** + * Puts the double value. + * @param v double value + */ + public void putValue(double v) + { + putDouble(0, v); + } - /** - * Puts the int value. - * @param v int value - */ - public void putIndex(int v) - { - switch(iSize) - { - case 4: putInt(iOff, v); break; - case 8: putLong(iOff, v); break; - default: throw new AssertionError(); - } - } -} // Data + /** + * Puts the int value. + * @param v int value + */ + public void putIndex(int v) + { + switch(iSize) + { + case 4: putInt(iOff, v); break; + case 8: putLong(iOff, v); break; + default: throw new AssertionError(); + } + } + } // Data } // DoubleInt diff --git a/ompi/mpi/java/java/Errhandler.java b/ompi/mpi/java/java/Errhandler.java index b78efa2fefb..75917b408e5 100644 --- a/ompi/mpi/java/java/Errhandler.java +++ b/ompi/mpi/java/java/Errhandler.java @@ -5,35 +5,37 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. + * Copyright (c) 2015 Los Alamos National Security, LLC. All rights + * reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ - */ -/* + * + * * This file is almost a complete re-write for Open MPI compared to the * original mpiJava package. Its license and copyright are listed below. * See for more information. - */ -/* - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. -*/ -/* + * + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * * File : Errhandler.java * Author : Xinying Li * Created : Thu Apr 9 12:22:15 1998 @@ -50,14 +52,14 @@ */ public final class Errhandler { -protected long handle; + protected long handle; -protected static native long getFatal(); -protected static native long getReturn(); + protected static native long getFatal(); + protected static native long getReturn(); -protected Errhandler(long handle) -{ - this.handle = handle; -} + protected Errhandler(long handle) + { + this.handle = handle; + } } // Errhandler diff --git a/ompi/mpi/java/java/File.java b/ompi/mpi/java/java/File.java index f02374931d0..3309c623770 100644 --- a/ompi/mpi/java/java/File.java +++ b/ompi/mpi/java/java/File.java @@ -5,22 +5,24 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. + * Copyright (c) 2015 Los Alamos National Security, LLC. All rights + * reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ - */ -/* + * + * * IMPLEMENTATION DETAILS - * + * * All methods with buffers that can be direct or non direct have * a companion argument 'db' which is true if the buffer is direct. - * + * * Checking if a buffer is direct is faster in Java than C. */ @@ -36,1200 +38,1212 @@ */ public final class File { -private long handle; -private FileView view = new FileView(0, MPI.BYTE, MPI.BYTE, "native"); -private Status beginStatus; - -/** - * Java binding of {@code MPI_FILE_OPEN} using {@code MPI_INFO_NULL}. - * @param comm communicator - * @param filename name of the file to open - * @param amode file access mode - * @throws MPIException - */ -public File(Comm comm, String filename, int amode) throws MPIException -{ - MPI.check(); - handle = open(comm.handle, filename, amode, Info.NULL); -} - -/** - * Java binding of {@code MPI_FILE_OPEN}. - * @param comm communicator - * @param filename name of the file to open - * @param amode file access mode - * @param info info object - * @throws MPIException - */ -public File(Comm comm, String filename, int amode, Info info) - throws MPIException -{ - MPI.check(); - handle = open(comm.handle, filename, amode, info.handle); -} - -private native long open(long comm, String filename, int amode, long info) - throws MPIException; - -/** - * Java binding of {@code MPI_FILE_CLOSE}. - * @throws MPIException - */ -public void close() throws MPIException -{ - MPI.check(); - handle = close(handle); -} - -private native long close(long fh) throws MPIException; - -/** - * Java binding of {@code MPI_FILE_DELETE} using {@code MPI_INFO_NULL}. - * @param filename name of the file to delete - * @throws MPIException - */ -public static void delete(String filename) throws MPIException -{ - MPI.check(); - delete(filename, Info.NULL); -} - -/** - * Java binding of {@code MPI_FILE_DELETE}. - * @param filename name of the file to delete - * @param info info object - * @throws MPIException - */ -public static void delete(String filename, Info info) throws MPIException -{ - MPI.check(); - delete(filename, info.handle); -} - -private static native void delete(String filename, long info) - throws MPIException; - -/** - * Java binding of {@code MPI_FILE_SET_SIZE}. - * @param size size to truncate or expand file - * @throws MPIException - */ -public void setSize(long size) throws MPIException -{ - MPI.check(); - setSize(handle, size); -} - -private native void setSize(long fh, long size) throws MPIException; - -/** - * Java binding of {@code MPI_FILE_PREALLOCATE}. - * @param size size to preallocate file - * @throws MPIException - */ -public void preallocate(long size) throws MPIException -{ - MPI.check(); - preallocate(handle, size); -} - -private native void preallocate(long fh, long size) throws MPIException; - -/** - * Java binding of {@code MPI_FILE_GET_SIZE}. - * @return size of file in bytes - * @throws MPIException - */ -public long getSize() throws MPIException -{ - MPI.check(); - return getSize(handle); -} - -private native long getSize(long fh) throws MPIException; - -/** - * Java binding of {@code MPI_FILE_GET_GROUP}. - * @return group wich opened the file - * @throws MPIException - */ -public Group getGroup() throws MPIException -{ - MPI.check(); - return new Group(getGroup(handle)); -} - -private native long getGroup(long fh) throws MPIException; - -/** - * Java binding of {@code MPI_FILE_GET_AMODE}. - * @return file access mode to open the file - * @throws MPIException - */ -public int getAMode() throws MPIException -{ - MPI.check(); - return getAMode(handle); -} - -private native int getAMode(long fh) throws MPIException; - -/** - * Java binding of {@code MPI_FILE_SET_INFO}. - * @param info info object - * @throws MPIException - */ -public void setInfo(Info info) throws MPIException -{ - MPI.check(); - setInfo(handle, info.handle); -} - -private native void setInfo(long fh, long info) throws MPIException; - -/** - * Java binding of {@code MPI_FILE_GET_INFO}. - * @return new info object - * @throws MPIException - */ -public Info getInfo() throws MPIException -{ - MPI.check(); - return new Info(getInfo(handle)); -} - -private native long getInfo(long fh) throws MPIException; - -/** - * Java binding of {@code MPI_FILE_SET_VIEW} using {@code MPI_INFO_NULL}. - * @param disp displacement - * @param etype elementary datatype - * @param filetype filetype - * @param datarep data representation - * @throws MPIException - */ -public void setView(long disp, Datatype etype, - Datatype filetype, String datarep) - throws MPIException -{ - MPI.check(); - setView(handle, disp, etype.handle, filetype.handle, datarep, Info.NULL); - view = new FileView(disp, etype, filetype, datarep); -} - -/** - * Java binding of {@code MPI_FILE_SET_VIEW}. - * @param disp displacement - * @param etype elementary datatype - * @param filetype filetype - * @param datarep data representation - * @param info info object - * @throws MPIException - */ -public void setView(long disp, Datatype etype, - Datatype filetype, String datarep, Info info) - throws MPIException -{ - MPI.check(); - setView(handle, disp, etype.handle, filetype.handle, datarep, info.handle); - view = new FileView(disp, etype, filetype, datarep); -} - -private native void setView( - long fh, long disp, long etype, - long filetype, String datarep, long info) throws MPIException; - -/** - * Java binding of {@code MPI_FILE_GET_VIEW}. - * @return file view - */ -public FileView getView() -{ - return view; -} - -/** - * Java binding of {@code MPI_FILE_READ_AT}. - * @param offset file offset - * @param buf buffer - * @param count number of items in buffer - * @param type datatype of each buffer element - * @return status object - * @throws MPIException - */ -public Status readAt(long offset, Object buf, int count, Datatype type) - throws MPIException -{ - MPI.check(); - int off = 0; - boolean db = false; - Status status = new Status(); - - if(buf instanceof Buffer && !(db = ((Buffer)buf).isDirect())) - { - off = type.getOffset(buf); - buf = ((Buffer)buf).array(); - } - - readAt(handle, offset, buf, db, off, count, - type.handle, type.baseType, status.data); - - return status; -} - -private native void readAt( - long fh, long fileOffset, Object buf, boolean db, int offset, - int count, long type, int baseType, long[] stat) throws MPIException; - -/** - * Java binding of {@code MPI_FILE_READ_AT_ALL}. - * @param offset file offset - * @param buf buffer - * @param count number of items in buffer - * @param type datatype of each buffer element - * @return status object - * @throws MPIException - */ -public Status readAtAll(long offset, Object buf, int count, Datatype type) - throws MPIException -{ - MPI.check(); - int off = 0; - boolean db = false; - Status status = new Status(); - - if(buf instanceof Buffer && !(db = ((Buffer)buf).isDirect())) - { - off = type.getOffset(buf); - buf = ((Buffer)buf).array(); - } - - readAtAll(handle, offset, buf, db, off, count, - type.handle, type.baseType, status.data); - - return status; -} - -private native void readAtAll( - long fh, long fileOffset, Object buf, boolean db, int offset, - int count, long type, int baseType, long[] stat) throws MPIException; - -/** - * Java binding of {@code MPI_FILE_WRITE_AT}. - * @param offset file offset - * @param buf buffer - * @param count number of items in buffer - * @param type datatype of each buffer element - * @return status object - * @throws MPIException - */ -public Status writeAt(long offset, Object buf, int count, Datatype type) - throws MPIException -{ - MPI.check(); - int off = 0; - boolean db = false; - Status status = new Status(); - - if(buf instanceof Buffer && !(db = ((Buffer)buf).isDirect())) - { - off = type.getOffset(buf); - buf = ((Buffer)buf).array(); - } - - writeAt(handle, offset, buf, db, off, count, - type.handle, type.baseType, status.data); - - return status; -} - -private native void writeAt( - long fh, long fileOffset, Object buf, boolean db, int offset, - int count, long type, int baseType, long[] stat) throws MPIException; - -/** - * Java binding of {@code MPI_FILE_WRITE_AT_ALL}. - * @param offset file offset - * @param buf buffer - * @param count number of items in buffer - * @param type datatype of each buffer element - * @return status object - * @throws MPIException - */ -public Status writeAtAll(long offset, Object buf, int count, Datatype type) - throws MPIException -{ - MPI.check(); - int off = 0; - boolean db = false; - Status status = new Status(); - - if(buf instanceof Buffer && !(db = ((Buffer)buf).isDirect())) - { - off = type.getOffset(buf); - buf = ((Buffer)buf).array(); - } - - writeAtAll(handle, offset, buf, db, off, count, - type.handle, type.baseType, status.data); - - return status; -} - -private native void writeAtAll( - long fh, long fileOffset, Object buf, boolean db, int offset, - int count, long type, int baseType, long[] stat) throws MPIException; - -/** - * Java binding of {@code MPI_FILE_IREAD_AT}. - * @param offset file offset - * @param buf buffer - * @param count number of items in buffer - * @param type datatype of each buffer element - * @return request object - * @throws MPIException - */ -public Request iReadAt(long offset, Buffer buf, int count, Datatype type) - throws MPIException -{ - MPI.check(); - assertDirectBuffer(buf); - return new Request(iReadAt(handle, offset, buf, count, type.handle)); -} - -private native long iReadAt( - long fh, long offset, Buffer buf, int count, long type) - throws MPIException; - -/** - * Java binding of {@code MPI_FILE_IWRITE_AT}. - * @param offset file offset - * @param buf buffer - * @param count number of items in buffer - * @param type datatype of each buffer element - * @return request object - * @throws MPIException - */ -public Request iWriteAt(long offset, Buffer buf, int count, Datatype type) - throws MPIException -{ - MPI.check(); - assertDirectBuffer(buf); - return new Request(iWriteAt(handle, offset, buf, count, type.handle)); -} - -private native long iWriteAt( - long fh, long offset, Buffer buf, int count, long type) - throws MPIException; - -/** - * Java binding of {@code MPI_FILE_READ}. - * @param buf buffer - * @param count number of items in buffer - * @param type datatype of each buffer element - * @return status object - * @throws MPIException - */ -public Status read(Object buf, int count, Datatype type) throws MPIException -{ - MPI.check(); - int off = 0; - boolean db = false; - Status status = new Status(); - - if(buf instanceof Buffer && !(db = ((Buffer)buf).isDirect())) - { - off = type.getOffset(buf); - buf = ((Buffer)buf).array(); - } - - read(handle, buf, db, off, count, type.handle, type.baseType, status.data); - return status; -} - -private native void read( - long fh, Object buf, boolean db, int offset, - int count, long type, int baseType, long[] stat) throws MPIException; - -/** - * Java binding of {@code MPI_FILE_READ_ALL}. - * @param buf buffer - * @param count number of items in buffer - * @param type datatype of each buffer element - * @return status object - * @throws MPIException - */ -public Status readAll(Object buf, int count, Datatype type) throws MPIException -{ - MPI.check(); - int off = 0; - boolean db = false; - Status status = new Status(); - - if(buf instanceof Buffer && !(db = ((Buffer)buf).isDirect())) - { - off = type.getOffset(buf); - buf = ((Buffer)buf).array(); - } - - readAll(handle, buf,db,off, count, type.handle, type.baseType, status.data); - return status; -} - -private native void readAll( - long fh, Object buf, boolean db, int offset, - int count, long type, int baseType, long[] stat) throws MPIException; - -/** - * Java binding of {@code MPI_FILE_WRITE}. - * @param buf buffer - * @param count number of items in buffer - * @param type datatype of each buffer element - * @return status object - * @throws MPIException - */ -public Status write(Object buf, int count, Datatype type) throws MPIException -{ - MPI.check(); - int off = 0; - boolean db = false; - Status status = new Status(); - - if(buf instanceof Buffer && !(db = ((Buffer)buf).isDirect())) - { - off = type.getOffset(buf); - buf = ((Buffer)buf).array(); - } - - write(handle, buf, db, off, count, type.handle, type.baseType, status.data); - return status; -} - -private native void write( - long fh, Object buf, boolean db, int offset, - int count, long type, int baseType, long[] stat) throws MPIException; - -/** - * Java binding of {@code MPI_FILE_WRITE_ALL}. - * @param buf buffer - * @param count number of items in buffer - * @param type datatype of each buffer element - * @return status object - * @throws MPIException - */ -public Status writeAll(Object buf, int count, Datatype type) throws MPIException -{ - MPI.check(); - int off = 0; - boolean db = false; - Status status = new Status(); - - if(buf instanceof Buffer && !(db = ((Buffer)buf).isDirect())) - { - off = type.getOffset(buf); - buf = ((Buffer)buf).array(); - } - - writeAll(handle, buf,db,off, count, type.handle,type.baseType, status.data); - return status; -} - -private native void writeAll( - long fh, Object buf, boolean db, int offset, - int count, long type, int baseType, long[] stat) throws MPIException; - -/** - * Java binding of {@code MPI_FILE_IREAD}. - * @param buf buffer - * @param count number of items in buffer - * @param type datatype of each buffer element - * @return request object - * @throws MPIException - */ -public Request iRead(Buffer buf, int count, Datatype type) throws MPIException -{ - MPI.check(); - assertDirectBuffer(buf); - return new Request(iRead(handle, buf, count, type.handle)); -} - -private native long iRead(long fh, Buffer buf, int count, long type) - throws MPIException; - -/** - * Java binding of {@code MPI_FILE_IWRITE}. - * @param buf buffer - * @param count number of items in buffer - * @param type datatype of each buffer element - * @return request object - * @throws MPIException - */ -public Request iWrite(Buffer buf, int count, Datatype type) throws MPIException -{ - MPI.check(); - assertDirectBuffer(buf); - return new Request(iWrite(handle, buf, count, type.handle)); -} - -private native long iWrite(long fh, Buffer buf, int count, long type) - throws MPIException; - -/** - * Java binding of {@code MPI_FILE_SEEK}. - * @param offset file offset - * @param whence update mode - * @throws MPIException - */ -public void seek(long offset, int whence) throws MPIException -{ - MPI.check(); - seek(handle, offset, whence); -} - -private native void seek(long fh, long offset, int whence) throws MPIException; - -/** - * Java binding of {@code MPI_FILE_GET_POSITION}. - * @return offset of individual pointer - * @throws MPIException - */ -public long getPosition() throws MPIException -{ - MPI.check(); - return getPosition(handle); -} - -private native long getPosition(long fh) throws MPIException; - -/** - * Java binding of {@code MPI_FILE_GET_BYTE_OFFSET}. - * @param offset offset - * @return absolute byte position of offset - * @throws MPIException - */ -public long getByteOffset(long offset) throws MPIException -{ - MPI.check(); - return getByteOffset(handle, offset); -} - -private native long getByteOffset(long fh, long offset) throws MPIException; - -/** - * Java binding of {@code MPI_FILE_READ_SHARED}. - * @param buf buffer - * @param count number of items in buffer - * @param type datatype of each buffer element - * @return status object - * @throws MPIException - */ -public Status readShared(Object buf, int count, Datatype type) - throws MPIException -{ - MPI.check(); - int off = 0; - boolean db = false; - Status status = new Status(); - - if(buf instanceof Buffer && !(db = ((Buffer)buf).isDirect())) - { - off = type.getOffset(buf); - buf = ((Buffer)buf).array(); - } - - readShared(handle, buf, db, off, count, - type.handle, type.baseType, status.data); - - return status; -} - -private native void readShared( - long fh, Object buf, boolean db, int offset, int count, - long type, int baseType, long[] stat) throws MPIException; - -/** - * Java binding of {@code MPI_FILE_WRITE_SHARED}. - * @param buf buffer - * @param count number of items in buffer - * @param type datatype of each buffer element - * @return status object - * @throws MPIException - */ -public Status writeShared(Object buf, int count, Datatype type) - throws MPIException -{ - MPI.check(); - int off = 0; - boolean db = false; - Status status = new Status(); - - if(buf instanceof Buffer && !(db = ((Buffer)buf).isDirect())) - { - off = type.getOffset(buf); - buf = ((Buffer)buf).array(); - } - - writeShared(handle, buf, db, off, count, - type.handle, type.baseType, status.data); - - return status; -} - -private native void writeShared( - long fh, Object buf, boolean db, int offset, int count, - long type, int baseType, long[] stat) throws MPIException; - -/** - * Java binding of {@code MPI_FILE_IREAD_SHARED}. - * @param buf buffer - * @param count number of items in buffer - * @param type datatype of each buffer element - * @return request object - * @throws MPIException - */ -public Request iReadShared(Buffer buf, int count, Datatype type) - throws MPIException -{ - MPI.check(); - assertDirectBuffer(buf); - return new Request(iReadShared(handle, buf, count, type.handle)); -} - -private native long iReadShared(long fh, Buffer buf, int count, long type) - throws MPIException; - -/** - * Java binding of {@code MPI_FILE_IWRITE_SHARED}. - * @param buf buffer - * @param count number of items in buffer - * @param type datatype of each buffer element - * @return request object - * @throws MPIException - */ -public Request iWriteShared(Buffer buf, int count, Datatype type) - throws MPIException -{ - MPI.check(); - assertDirectBuffer(buf); - return new Request(iWriteShared(handle, buf, count, type.handle)); -} - -private native long iWriteShared(long fh, Buffer buf, int count, long type) - throws MPIException; - -/** - * Java binding of {@code MPI_FILE_READ_ORDERED}. - * @param buf buffer - * @param count number of items in buffer - * @param type datatype of each buffer element - * @return status object - * @throws MPIException - */ -public Status readOrdered(Object buf, int count, Datatype type) - throws MPIException -{ - MPI.check(); - int off = 0; - boolean db = false; - Status status = new Status(); - - if(buf instanceof Buffer && !(db = ((Buffer)buf).isDirect())) - { - off = type.getOffset(buf); - buf = ((Buffer)buf).array(); - } - - readOrdered(handle, buf, db, off, count, - type.handle, type.baseType, status.data); - - return status; -} - -private native void readOrdered( - long fh, Object buf, boolean db, int offset, int count, - long type, int baseType, long[] stat) throws MPIException; - -/** - * Java binding of {@code MPI_FILE_WRITE_ORDERED}. - * @param buf buffer - * @param count number of items in buffer - * @param type datatype of each buffer element - * @return status object - * @throws MPIException - */ -public Status writeOrdered(Object buf, int count, Datatype type) - throws MPIException -{ - MPI.check(); - int off = 0; - boolean db = false; - Status status = new Status(); - - if(buf instanceof Buffer && !(db = ((Buffer)buf).isDirect())) - { - off = type.getOffset(buf); - buf = ((Buffer)buf).array(); - } - - writeOrdered(handle, buf, db, off, count, - type.handle, type.baseType, status.data); - - return status; -} - -private native void writeOrdered( - long fh, Object buf, boolean db, int offset, int count, - long type, int baseType, long[] stat) throws MPIException; - -/** - * Java binding of {@code MPI_FILE_SEEK_SHARED}. - * @param offset file offset - * @param whence update mode - * @throws MPIException - */ -public void seekShared(long offset, int whence) throws MPIException -{ - MPI.check(); - seekShared(handle, offset, whence); -} - -private native void seekShared(long fh, long offset, int whence) - throws MPIException; - -/** - * Java binding of {@code MPI_FILE_GET_POSITION_SHARED}. - * @return offset of individual pointer - * @throws MPIException - */ -public long getPositionShared() throws MPIException -{ - MPI.check(); - return getPositionShared(handle); -} - -private native long getPositionShared(long fh) throws MPIException; - -/** - * Java binding of {@code MPI_FILE_READ_AT_ALL_BEGIN}. - * @param offset file offset - * @param buf buffer - * @param count number of items in buffer - * @param type datatype of each buffer element - * @throws MPIException - */ -public void readAtAllBegin(long offset, Object buf, int count, Datatype type) - throws MPIException -{ - MPI.check(); - - if(isDirectBuffer(buf)) - { - readAtAllBegin(handle, offset, buf, count, type.handle); - } - else - { - int off = 0; - Status status = new Status(); - - if(isHeapBuffer(buf)) - { - off = type.getOffset(buf); - buf = ((Buffer)buf).array(); - } - - readAtAll(handle, offset, buf, false, off, count, - type.handle, type.baseType, status.data); - - beginStatus = status; - } -} - -private native void readAtAllBegin( - long fh, long offset, Object buf, int count, long type) - throws MPIException; - -/** - * Java binding of {@code MPI_FILE_READ_AT_ALL_END}. - * @param buf buffer - * @return status object - * @throws MPIException - */ -public Status readAtAllEnd(Object buf) throws MPIException -{ - MPI.check(); - - if(isDirectBuffer(buf)) - { - Status status = new Status(); - readAtAllEnd(handle, buf, status.data); - return status; - } - else - { - return getBeginStatus(); - } -} - -private native void readAtAllEnd(long fh, Object buf, long[] stat) - throws MPIException; - -/** - * Java binding of {@code MPI_FILE_WRITE_AT_ALL_BEGIN}. - * @param offset file offset - * @param buf buffer - * @param count number of items in buffer - * @param type datatype of each buffer element - * @throws MPIException - */ -public void writeAtAllBegin(long offset, Object buf, int count, Datatype type) - throws MPIException -{ - MPI.check(); - - if(isDirectBuffer(buf)) - { - writeAtAllBegin(handle, offset, buf, count, type.handle); - } - else - { - int off = 0; - Status status = new Status(); - - if(isHeapBuffer(buf)) - { - off = type.getOffset(buf); - buf = ((Buffer)buf).array(); - } - - writeAtAll(handle, offset, buf, false, off, count, - type.handle, type.baseType, status.data); - - beginStatus = status; - } -} - -private native void writeAtAllBegin( - long fh, long fileOffset, Object buf, int count, long type) - throws MPIException; - -/** - * Java binding of {@code MPI_FILE_WRITE_AT_ALL_END}. - * @param buf buffer - * @return status object - * @throws MPIException - */ -public Status writeAtAllEnd(Object buf) throws MPIException -{ - MPI.check(); - - if(isDirectBuffer(buf)) - { - Status status = new Status(); - writeAtAllEnd(handle, buf, status.data); - return status; - } - else - { - return getBeginStatus(); - } -} - -private native void writeAtAllEnd(long fh, Object buf, long[] stat) - throws MPIException; - -/** - * Java binding of {@code MPI_FILE_READ_ALL_BEGIN}. - * @param buf buffer - * @param count number of items in buffer - * @param type datatype of each buffer element - * @throws MPIException - */ -public void readAllBegin(Object buf, int count, Datatype type) - throws MPIException -{ - MPI.check(); - - if(isDirectBuffer(buf)) - { - readAllBegin(handle, buf, count, type.handle); - } - else - { - int off = 0; - Status status = new Status(); - - if(isHeapBuffer(buf)) - { - off = type.getOffset(buf); - buf = ((Buffer)buf).array(); - } - - readAll(handle, buf, false, off, count, - type.handle, type.baseType, status.data); - - beginStatus = status; - } -} - -private native void readAllBegin(long fh, Object buf, int count, long type) - throws MPIException; - -/** - * Java binding of {@code MPI_FILE_READ_ALL_END}. - * @param buf buffer - * @return status object - * @throws MPIException - */ -public Status readAllEnd(Object buf) throws MPIException -{ - MPI.check(); - - if(isDirectBuffer(buf)) - { - Status status = new Status(); - readAllEnd(handle, buf, status.data); - return status; - } - else - { - return getBeginStatus(); - } -} - -private native void readAllEnd(long fh, Object buf, long[] stat) - throws MPIException; - -/** - * Java binding of {@code MPI_FILE_WRITE_ALL_BEGIN}. - * @param buf buffer - * @param count number of items in buffer - * @param type datatype of each buffer element - * @throws MPIException - */ -public void writeAllBegin(Object buf, int count, Datatype type) - throws MPIException -{ - MPI.check(); - - if(isDirectBuffer(buf)) - { - writeAllBegin(handle, buf, count, type.handle); - } - else - { - int off = 0; - Status status = new Status(); - - if(isHeapBuffer(buf)) - { - off = type.getOffset(buf); - buf = ((Buffer)buf).array(); - } - - writeAll(handle, buf, false, off, count, - type.handle, type.baseType, status.data); - - beginStatus = status; - } -} - -private native void writeAllBegin(long fh, Object buf, int count, long type) - throws MPIException; - -/** - * Java binding of {@code MPI_FILE_WRITE_ALL_END}. - * @param buf buffer - * @return status object - * @throws MPIException - */ -public Status writeAllEnd(Object buf) throws MPIException -{ - MPI.check(); - - if(isDirectBuffer(buf)) - { - Status status = new Status(); - writeAllEnd(handle, buf, status.data); - return status; - } - else - { - return getBeginStatus(); - } -} - -private native void writeAllEnd(long fh, Object buf, long[] stat) - throws MPIException; - -/** - * Java binding of {@code MPI_FILE_READ_ORDERED_BEGIN}. - * @param buf buffer - * @param count number of items in buffer - * @param type datatype of each buffer element - * @throws MPIException - */ -public void readOrderedBegin(Object buf, int count, Datatype type) - throws MPIException -{ - MPI.check(); - - if(isDirectBuffer(buf)) - { - readOrderedBegin(handle, buf, count, type.handle); - } - else - { - int off = 0; - Status status = new Status(); - - if(isHeapBuffer(buf)) - { - off = type.getOffset(buf); - buf = ((Buffer)buf).array(); - } - - readOrdered(handle, buf, false, off, count, - type.handle, type.baseType, status.data); - - beginStatus = status; - } -} - -private native void readOrderedBegin(long fh, Object buf, int count, long type) - throws MPIException; - -/** - * Java binding of {@code MPI_FILE_READ_ORDERED_END}. - * @param buf buffer - * @return status object - * @throws MPIException - */ -public Status readOrderedEnd(Object buf) throws MPIException -{ - MPI.check(); - - if(isDirectBuffer(buf)) - { - Status status = new Status(); - readOrderedEnd(handle, buf, status.data); - return status; - } - else - { - return getBeginStatus(); - } -} - -private native void readOrderedEnd(long fh, Object buf, long[] stat) - throws MPIException; - -/** - * Java binding of {@code MPI_FILE_WRITE_ORDERED_BEGIN}. - * @param buf buffer - * @param count number of items in buffer - * @param type datatype of each buffer element - * @throws MPIException - */ -public void writeOrderedBegin(Object buf, int count, Datatype type) - throws MPIException -{ - MPI.check(); - - if(isDirectBuffer(buf)) - { - writeOrderedBegin(handle, buf, count, type.handle); - } - else - { - int off = 0; - Status status = new Status(); - - if(isHeapBuffer(buf)) - { - off = type.getOffset(buf); - buf = ((Buffer)buf).array(); - } - - writeOrdered(handle, buf, false, off, count, - type.handle, type.baseType, status.data); - - beginStatus = status; - } -} - -private native void writeOrderedBegin(long fh, Object buf, int count, long type) - throws MPIException; - -/** - * Java binding of {@code MPI_FILE_WRITE_ORDERED_END}. - * @param buf buffer - * @return status object - * @throws MPIException - */ -public Status writeOrderedEnd(Object buf) throws MPIException -{ - MPI.check(); - - if(isDirectBuffer(buf)) - { - Status status = new Status(); - writeOrderedEnd(handle, buf, status.data); - return status; - } - else - { - return getBeginStatus(); - } -} - -private native void writeOrderedEnd(long fh, Object buf, long[] stat) - throws MPIException; - -private Status getBeginStatus() -{ - Status s = beginStatus; - beginStatus = null; - return s; -} - -/** - * Java binding of {@code MPI_FILE_GET_TYPE_EXTENT}. - * @param type - * @return datatype extent - * @throws MPIException - */ -public int getTypeExtent(Datatype type) throws MPIException -{ - MPI.check(); - return getTypeExtent(handle, type.handle) / type.baseSize; -} - -private native int getTypeExtent(long fh, long type) throws MPIException; - -/** - * Java binding of {@code MPI_FILE_SET_ATOMICITY}. - * @param atomicity true to set atomic mode, false to set nonatomic mode - * @throws MPIException - */ -public void setAtomicity(boolean atomicity) throws MPIException -{ - MPI.check(); - setAtomicity(handle, atomicity); -} - -private native void setAtomicity(long fh, boolean atomicity) - throws MPIException; - -/** - * Java binding of {@code MPI_FILE_SYNC}. - * @throws MPIException - */ -public void sync() throws MPIException -{ - MPI.check(); - sync(handle); -} - -private native void sync(long handle) throws MPIException; + private long handle; + private FileView view = new FileView(0, MPI.BYTE, MPI.BYTE, "native"); + private Status beginStatus; + + /** + * Java binding of {@code MPI_FILE_OPEN} using {@code MPI_INFO_NULL}. + * @param comm communicator + * @param filename name of the file to open + * @param amode file access mode + * @throws MPIException Signals that an MPI exception of some sort has occurred. + */ + public File(Comm comm, String filename, int amode) throws MPIException + { + MPI.check(); + handle = open(comm.handle, filename, amode, Info.NULL); + } + + /** + * Java binding of {@code MPI_FILE_OPEN}. + * @param comm communicator + * @param filename name of the file to open + * @param amode file access mode + * @param info info object + * @throws MPIException Signals that an MPI exception of some sort has occurred. + */ + public File(Comm comm, String filename, int amode, Info info) + throws MPIException + { + MPI.check(); + handle = open(comm.handle, filename, amode, info.handle); + } + + private native long open(long comm, String filename, int amode, long info) + throws MPIException; + + /** + * Java binding of {@code MPI_FILE_CLOSE}. + * @throws MPIException Signals that an MPI exception of some sort has occurred. + */ + public void close() throws MPIException + { + MPI.check(); + handle = close(handle); + } + + private native long close(long fh) throws MPIException; + + /** + * Java binding of {@code MPI_FILE_DELETE} using {@code MPI_INFO_NULL}. + * @param filename name of the file to delete + * @throws MPIException Signals that an MPI exception of some sort has occurred. + */ + public static void delete(String filename) throws MPIException + { + MPI.check(); + delete(filename, Info.NULL); + } + + /** + * Java binding of {@code MPI_FILE_DELETE}. + * @param filename name of the file to delete + * @param info info object + * @throws MPIException Signals that an MPI exception of some sort has occurred. + */ + public static void delete(String filename, Info info) throws MPIException + { + MPI.check(); + delete(filename, info.handle); + } + + private static native void delete(String filename, long info) + throws MPIException; + + /** + * Java binding of {@code MPI_FILE_SET_SIZE}. + * @param size size to truncate or expand file + * @throws MPIException Signals that an MPI exception of some sort has occurred. + */ + public void setSize(long size) throws MPIException + { + MPI.check(); + setSize(handle, size); + } + + private native void setSize(long fh, long size) throws MPIException; + + /** + * Java binding of {@code MPI_FILE_PREALLOCATE}. + * @param size size to preallocate file + * @throws MPIException Signals that an MPI exception of some sort has occurred. + */ + public void preallocate(long size) throws MPIException + { + MPI.check(); + preallocate(handle, size); + } + + private native void preallocate(long fh, long size) throws MPIException; + + /** + * Java binding of {@code MPI_FILE_GET_SIZE}. + * @return size of file in bytes + * @throws MPIException Signals that an MPI exception of some sort has occurred. + */ + public long getSize() throws MPIException + { + MPI.check(); + return getSize(handle); + } + + private native long getSize(long fh) throws MPIException; + + /** + * Java binding of {@code MPI_FILE_GET_GROUP}. + * @return group wich opened the file + * @throws MPIException Signals that an MPI exception of some sort has occurred. + */ + public Group getGroup() throws MPIException + { + MPI.check(); + return new Group(getGroup(handle)); + } + + private native long getGroup(long fh) throws MPIException; + + /** + * Java binding of {@code MPI_FILE_GET_AMODE}. + * @return file access mode to open the file + * @throws MPIException Signals that an MPI exception of some sort has occurred. + */ + public int getAMode() throws MPIException + { + MPI.check(); + return getAMode(handle); + } + + private native int getAMode(long fh) throws MPIException; + + /** + * Java binding of {@code MPI_FILE_SET_INFO}. + * @param info info object + * @throws MPIException Signals that an MPI exception of some sort has occurred. + */ + public void setInfo(Info info) throws MPIException + { + MPI.check(); + setInfo(handle, info.handle); + } + + private native void setInfo(long fh, long info) throws MPIException; + + /** + * Java binding of {@code MPI_FILE_GET_INFO}. + * @return new info object + * @throws MPIException Signals that an MPI exception of some sort has occurred. + */ + public Info getInfo() throws MPIException + { + MPI.check(); + return new Info(getInfo(handle)); + } + + private native long getInfo(long fh) throws MPIException; + + /** + * Java binding of {@code MPI_FILE_SET_VIEW} using {@code MPI_INFO_NULL}. + * @param disp displacement + * @param etype elementary datatype + * @param filetype filetype + * @param datarep data representation + * @throws MPIException Signals that an MPI exception of some sort has occurred. + */ + public void setView(long disp, Datatype etype, + Datatype filetype, String datarep) + throws MPIException + { + MPI.check(); + setView(handle, disp, etype.handle, filetype.handle, datarep, Info.NULL); + view = new FileView(disp, etype, filetype, datarep); + } + + /** + * Java binding of {@code MPI_FILE_SET_VIEW}. + * @param disp displacement + * @param etype elementary datatype + * @param filetype filetype + * @param datarep data representation + * @param info info object + * @throws MPIException Signals that an MPI exception of some sort has occurred. + */ + public void setView(long disp, Datatype etype, + Datatype filetype, String datarep, Info info) + throws MPIException + { + MPI.check(); + setView(handle, disp, etype.handle, filetype.handle, datarep, info.handle); + view = new FileView(disp, etype, filetype, datarep); + } + + private native void setView( + long fh, long disp, long etype, + long filetype, String datarep, long info) throws MPIException; + + /** + * Java binding of {@code MPI_FILE_GET_VIEW}. + * @return file view + */ + public FileView getView() + { + return view; + } + + /** + * Java binding of {@code MPI_FILE_READ_AT}. + * @param offset file offset + * @param buf buffer + * @param count number of items in buffer + * @param type datatype of each buffer element + * @return status object + * @throws MPIException Signals that an MPI exception of some sort has occurred. + */ + public Status readAt(long offset, Object buf, int count, Datatype type) + throws MPIException + { + MPI.check(); + int off = 0; + boolean db = false; + Status status = new Status(); + + if(buf instanceof Buffer && !(db = ((Buffer)buf).isDirect())) + { + off = type.getOffset(buf); + buf = ((Buffer)buf).array(); + } + + readAt(handle, offset, buf, db, off, count, + type.handle, type.baseType, status.data); + + return status; + } + + private native void readAt( + long fh, long fileOffset, Object buf, boolean db, int offset, + int count, long type, int baseType, long[] stat) throws MPIException; + + /** + * Java binding of {@code MPI_FILE_READ_AT_ALL}. + * @param offset file offset + * @param buf buffer + * @param count number of items in buffer + * @param type datatype of each buffer element + * @return status object + * @throws MPIException Signals that an MPI exception of some sort has occurred. + */ + public Status readAtAll(long offset, Object buf, int count, Datatype type) + throws MPIException + { + MPI.check(); + int off = 0; + boolean db = false; + Status status = new Status(); + + if(buf instanceof Buffer && !(db = ((Buffer)buf).isDirect())) + { + off = type.getOffset(buf); + buf = ((Buffer)buf).array(); + } + + readAtAll(handle, offset, buf, db, off, count, + type.handle, type.baseType, status.data); + + return status; + } + + private native void readAtAll( + long fh, long fileOffset, Object buf, boolean db, int offset, + int count, long type, int baseType, long[] stat) throws MPIException; + + /** + * Java binding of {@code MPI_FILE_WRITE_AT}. + * @param offset file offset + * @param buf buffer + * @param count number of items in buffer + * @param type datatype of each buffer element + * @return status object + * @throws MPIException Signals that an MPI exception of some sort has occurred. + */ + public Status writeAt(long offset, Object buf, int count, Datatype type) + throws MPIException + { + MPI.check(); + int off = 0; + boolean db = false; + Status status = new Status(); + + if(buf instanceof Buffer && !(db = ((Buffer)buf).isDirect())) + { + off = type.getOffset(buf); + buf = ((Buffer)buf).array(); + } + + writeAt(handle, offset, buf, db, off, count, + type.handle, type.baseType, status.data); + + return status; + } + + private native void writeAt( + long fh, long fileOffset, Object buf, boolean db, int offset, + int count, long type, int baseType, long[] stat) throws MPIException; + + /** + * Java binding of {@code MPI_FILE_WRITE_AT_ALL}. + * @param offset file offset + * @param buf buffer + * @param count number of items in buffer + * @param type datatype of each buffer element + * @return status object + * @throws MPIException Signals that an MPI exception of some sort has occurred. + */ + public Status writeAtAll(long offset, Object buf, int count, Datatype type) + throws MPIException + { + MPI.check(); + int off = 0; + boolean db = false; + Status status = new Status(); + + if(buf instanceof Buffer && !(db = ((Buffer)buf).isDirect())) + { + off = type.getOffset(buf); + buf = ((Buffer)buf).array(); + } + + writeAtAll(handle, offset, buf, db, off, count, + type.handle, type.baseType, status.data); + + return status; + } + + private native void writeAtAll( + long fh, long fileOffset, Object buf, boolean db, int offset, + int count, long type, int baseType, long[] stat) throws MPIException; + + /** + * Java binding of {@code MPI_FILE_IREAD_AT}. + * @param offset file offset + * @param buf buffer + * @param count number of items in buffer + * @param type datatype of each buffer element + * @return request object + * @throws MPIException Signals that an MPI exception of some sort has occurred. + */ + public Request iReadAt(long offset, Buffer buf, int count, Datatype type) + throws MPIException + { + MPI.check(); + assertDirectBuffer(buf); + Request req = new Request(iReadAt(handle, offset, buf, count, type.handle)); + req.addRecvBufRef(buf); + return req; + } + + private native long iReadAt( + long fh, long offset, Buffer buf, int count, long type) + throws MPIException; + + /** + * Java binding of {@code MPI_FILE_IWRITE_AT}. + * @param offset file offset + * @param buf buffer + * @param count number of items in buffer + * @param type datatype of each buffer element + * @return request object + * @throws MPIException Signals that an MPI exception of some sort has occurred. + */ + public Request iWriteAt(long offset, Buffer buf, int count, Datatype type) + throws MPIException + { + MPI.check(); + assertDirectBuffer(buf); + Request req = new Request(iWriteAt(handle, offset, buf, count, type.handle)); + req.addSendBufRef(buf); + return req; + } + + private native long iWriteAt( + long fh, long offset, Buffer buf, int count, long type) + throws MPIException; + + /** + * Java binding of {@code MPI_FILE_READ}. + * @param buf buffer + * @param count number of items in buffer + * @param type datatype of each buffer element + * @return status object + * @throws MPIException Signals that an MPI exception of some sort has occurred. + */ + public Status read(Object buf, int count, Datatype type) throws MPIException + { + MPI.check(); + int off = 0; + boolean db = false; + Status status = new Status(); + + if(buf instanceof Buffer && !(db = ((Buffer)buf).isDirect())) + { + off = type.getOffset(buf); + buf = ((Buffer)buf).array(); + } + + read(handle, buf, db, off, count, type.handle, type.baseType, status.data); + return status; + } + + private native void read( + long fh, Object buf, boolean db, int offset, + int count, long type, int baseType, long[] stat) throws MPIException; + + /** + * Java binding of {@code MPI_FILE_READ_ALL}. + * @param buf buffer + * @param count number of items in buffer + * @param type datatype of each buffer element + * @return status object + * @throws MPIException Signals that an MPI exception of some sort has occurred. + */ + public Status readAll(Object buf, int count, Datatype type) throws MPIException + { + MPI.check(); + int off = 0; + boolean db = false; + Status status = new Status(); + + if(buf instanceof Buffer && !(db = ((Buffer)buf).isDirect())) + { + off = type.getOffset(buf); + buf = ((Buffer)buf).array(); + } + + readAll(handle, buf,db,off, count, type.handle, type.baseType, status.data); + return status; + } + + private native void readAll( + long fh, Object buf, boolean db, int offset, + int count, long type, int baseType, long[] stat) throws MPIException; + + /** + * Java binding of {@code MPI_FILE_WRITE}. + * @param buf buffer + * @param count number of items in buffer + * @param type datatype of each buffer element + * @return status object + * @throws MPIException Signals that an MPI exception of some sort has occurred. + */ + public Status write(Object buf, int count, Datatype type) throws MPIException + { + MPI.check(); + int off = 0; + boolean db = false; + Status status = new Status(); + + if(buf instanceof Buffer && !(db = ((Buffer)buf).isDirect())) + { + off = type.getOffset(buf); + buf = ((Buffer)buf).array(); + } + + write(handle, buf, db, off, count, type.handle, type.baseType, status.data); + return status; + } + + private native void write( + long fh, Object buf, boolean db, int offset, + int count, long type, int baseType, long[] stat) throws MPIException; + + /** + * Java binding of {@code MPI_FILE_WRITE_ALL}. + * @param buf buffer + * @param count number of items in buffer + * @param type datatype of each buffer element + * @return status object + * @throws MPIException Signals that an MPI exception of some sort has occurred. + */ + public Status writeAll(Object buf, int count, Datatype type) throws MPIException + { + MPI.check(); + int off = 0; + boolean db = false; + Status status = new Status(); + + if(buf instanceof Buffer && !(db = ((Buffer)buf).isDirect())) + { + off = type.getOffset(buf); + buf = ((Buffer)buf).array(); + } + + writeAll(handle, buf,db,off, count, type.handle,type.baseType, status.data); + return status; + } + + private native void writeAll( + long fh, Object buf, boolean db, int offset, + int count, long type, int baseType, long[] stat) throws MPIException; + + /** + * Java binding of {@code MPI_FILE_IREAD}. + * @param buf buffer + * @param count number of items in buffer + * @param type datatype of each buffer element + * @return request object + * @throws MPIException Signals that an MPI exception of some sort has occurred. + */ + public Request iRead(Buffer buf, int count, Datatype type) throws MPIException + { + MPI.check(); + assertDirectBuffer(buf); + Request req = new Request(iRead(handle, buf, count, type.handle)); + req.addRecvBufRef(buf); + return req; + } + + private native long iRead(long fh, Buffer buf, int count, long type) + throws MPIException; + + /** + * Java binding of {@code MPI_FILE_IWRITE}. + * @param buf buffer + * @param count number of items in buffer + * @param type datatype of each buffer element + * @return request object + * @throws MPIException Signals that an MPI exception of some sort has occurred. + */ + public Request iWrite(Buffer buf, int count, Datatype type) throws MPIException + { + MPI.check(); + assertDirectBuffer(buf); + Request req = new Request(iWrite(handle, buf, count, type.handle)); + req.addRecvBufRef(buf); + return req; + } + + private native long iWrite(long fh, Buffer buf, int count, long type) + throws MPIException; + + /** + * Java binding of {@code MPI_FILE_SEEK}. + * @param offset file offset + * @param whence update mode + * @throws MPIException Signals that an MPI exception of some sort has occurred. + */ + public void seek(long offset, int whence) throws MPIException + { + MPI.check(); + seek(handle, offset, whence); + } + + private native void seek(long fh, long offset, int whence) throws MPIException; + + /** + * Java binding of {@code MPI_FILE_GET_POSITION}. + * @return offset of individual pointer + * @throws MPIException Signals that an MPI exception of some sort has occurred. + */ + public long getPosition() throws MPIException + { + MPI.check(); + return getPosition(handle); + } + + private native long getPosition(long fh) throws MPIException; + + /** + * Java binding of {@code MPI_FILE_GET_BYTE_OFFSET}. + * @param offset offset + * @return absolute byte position of offset + * @throws MPIException Signals that an MPI exception of some sort has occurred. + */ + public long getByteOffset(long offset) throws MPIException + { + MPI.check(); + return getByteOffset(handle, offset); + } + + private native long getByteOffset(long fh, long offset) throws MPIException; + + /** + * Java binding of {@code MPI_FILE_READ_SHARED}. + * @param buf buffer + * @param count number of items in buffer + * @param type datatype of each buffer element + * @return status object + * @throws MPIException Signals that an MPI exception of some sort has occurred. + */ + public Status readShared(Object buf, int count, Datatype type) + throws MPIException + { + MPI.check(); + int off = 0; + boolean db = false; + Status status = new Status(); + + if(buf instanceof Buffer && !(db = ((Buffer)buf).isDirect())) + { + off = type.getOffset(buf); + buf = ((Buffer)buf).array(); + } + + readShared(handle, buf, db, off, count, + type.handle, type.baseType, status.data); + + return status; + } + + private native void readShared( + long fh, Object buf, boolean db, int offset, int count, + long type, int baseType, long[] stat) throws MPIException; + + /** + * Java binding of {@code MPI_FILE_WRITE_SHARED}. + * @param buf buffer + * @param count number of items in buffer + * @param type datatype of each buffer element + * @return status object + * @throws MPIException Signals that an MPI exception of some sort has occurred. + */ + public Status writeShared(Object buf, int count, Datatype type) + throws MPIException + { + MPI.check(); + int off = 0; + boolean db = false; + Status status = new Status(); + + if(buf instanceof Buffer && !(db = ((Buffer)buf).isDirect())) + { + off = type.getOffset(buf); + buf = ((Buffer)buf).array(); + } + + writeShared(handle, buf, db, off, count, + type.handle, type.baseType, status.data); + + return status; + } + + private native void writeShared( + long fh, Object buf, boolean db, int offset, int count, + long type, int baseType, long[] stat) throws MPIException; + + /** + * Java binding of {@code MPI_FILE_IREAD_SHARED}. + * @param buf buffer + * @param count number of items in buffer + * @param type datatype of each buffer element + * @return request object + * @throws MPIException Signals that an MPI exception of some sort has occurred. + */ + public Request iReadShared(Buffer buf, int count, Datatype type) + throws MPIException + { + MPI.check(); + assertDirectBuffer(buf); + Request req = new Request(iReadShared(handle, buf, count, type.handle)); + req.addRecvBufRef(buf); + return req; + } + + private native long iReadShared(long fh, Buffer buf, int count, long type) + throws MPIException; + + /** + * Java binding of {@code MPI_FILE_IWRITE_SHARED}. + * @param buf buffer + * @param count number of items in buffer + * @param type datatype of each buffer element + * @return request object + * @throws MPIException Signals that an MPI exception of some sort has occurred. + */ + public Request iWriteShared(Buffer buf, int count, Datatype type) + throws MPIException + { + MPI.check(); + assertDirectBuffer(buf); + Request req = new Request(iWriteShared(handle, buf, count, type.handle)); + req.addSendBufRef(buf); + return req; + } + + private native long iWriteShared(long fh, Buffer buf, int count, long type) + throws MPIException; + + /** + * Java binding of {@code MPI_FILE_READ_ORDERED}. + * @param buf buffer + * @param count number of items in buffer + * @param type datatype of each buffer element + * @return status object + * @throws MPIException Signals that an MPI exception of some sort has occurred. + */ + public Status readOrdered(Object buf, int count, Datatype type) + throws MPIException + { + MPI.check(); + int off = 0; + boolean db = false; + Status status = new Status(); + + if(buf instanceof Buffer && !(db = ((Buffer)buf).isDirect())) + { + off = type.getOffset(buf); + buf = ((Buffer)buf).array(); + } + + readOrdered(handle, buf, db, off, count, + type.handle, type.baseType, status.data); + + return status; + } + + private native void readOrdered( + long fh, Object buf, boolean db, int offset, int count, + long type, int baseType, long[] stat) throws MPIException; + + /** + * Java binding of {@code MPI_FILE_WRITE_ORDERED}. + * @param buf buffer + * @param count number of items in buffer + * @param type datatype of each buffer element + * @return status object + * @throws MPIException Signals that an MPI exception of some sort has occurred. + */ + public Status writeOrdered(Object buf, int count, Datatype type) + throws MPIException + { + MPI.check(); + int off = 0; + boolean db = false; + Status status = new Status(); + + if(buf instanceof Buffer && !(db = ((Buffer)buf).isDirect())) + { + off = type.getOffset(buf); + buf = ((Buffer)buf).array(); + } + + writeOrdered(handle, buf, db, off, count, + type.handle, type.baseType, status.data); + + return status; + } + + private native void writeOrdered( + long fh, Object buf, boolean db, int offset, int count, + long type, int baseType, long[] stat) throws MPIException; + + /** + * Java binding of {@code MPI_FILE_SEEK_SHARED}. + * @param offset file offset + * @param whence update mode + * @throws MPIException Signals that an MPI exception of some sort has occurred. + */ + public void seekShared(long offset, int whence) throws MPIException + { + MPI.check(); + seekShared(handle, offset, whence); + } + + private native void seekShared(long fh, long offset, int whence) + throws MPIException; + + /** + * Java binding of {@code MPI_FILE_GET_POSITION_SHARED}. + * @return offset of individual pointer + * @throws MPIException Signals that an MPI exception of some sort has occurred. + */ + public long getPositionShared() throws MPIException + { + MPI.check(); + return getPositionShared(handle); + } + + private native long getPositionShared(long fh) throws MPIException; + + /** + * Java binding of {@code MPI_FILE_READ_AT_ALL_BEGIN}. + * @param offset file offset + * @param buf buffer + * @param count number of items in buffer + * @param type datatype of each buffer element + * @throws MPIException Signals that an MPI exception of some sort has occurred. + */ + public void readAtAllBegin(long offset, Object buf, int count, Datatype type) + throws MPIException + { + MPI.check(); + + if(isDirectBuffer(buf)) + { + readAtAllBegin(handle, offset, buf, count, type.handle); + } + else + { + int off = 0; + Status status = new Status(); + + if(isHeapBuffer(buf)) + { + off = type.getOffset(buf); + buf = ((Buffer)buf).array(); + } + + readAtAll(handle, offset, buf, false, off, count, + type.handle, type.baseType, status.data); + + beginStatus = status; + } + } + + private native void readAtAllBegin( + long fh, long offset, Object buf, int count, long type) + throws MPIException; + + /** + * Java binding of {@code MPI_FILE_READ_AT_ALL_END}. + * @param buf buffer + * @return status object + * @throws MPIException Signals that an MPI exception of some sort has occurred. + */ + public Status readAtAllEnd(Object buf) throws MPIException + { + MPI.check(); + + if(isDirectBuffer(buf)) + { + Status status = new Status(); + readAtAllEnd(handle, buf, status.data); + return status; + } + else + { + return getBeginStatus(); + } + } + + private native void readAtAllEnd(long fh, Object buf, long[] stat) + throws MPIException; + + /** + * Java binding of {@code MPI_FILE_WRITE_AT_ALL_BEGIN}. + * @param offset file offset + * @param buf buffer + * @param count number of items in buffer + * @param type datatype of each buffer element + * @throws MPIException Signals that an MPI exception of some sort has occurred. + */ + public void writeAtAllBegin(long offset, Object buf, int count, Datatype type) + throws MPIException + { + MPI.check(); + + if(isDirectBuffer(buf)) + { + writeAtAllBegin(handle, offset, buf, count, type.handle); + } + else + { + int off = 0; + Status status = new Status(); + + if(isHeapBuffer(buf)) + { + off = type.getOffset(buf); + buf = ((Buffer)buf).array(); + } + + writeAtAll(handle, offset, buf, false, off, count, + type.handle, type.baseType, status.data); + + beginStatus = status; + } + } + + private native void writeAtAllBegin( + long fh, long fileOffset, Object buf, int count, long type) + throws MPIException; + + /** + * Java binding of {@code MPI_FILE_WRITE_AT_ALL_END}. + * @param buf buffer + * @return status object + * @throws MPIException Signals that an MPI exception of some sort has occurred. + */ + public Status writeAtAllEnd(Object buf) throws MPIException + { + MPI.check(); + + if(isDirectBuffer(buf)) + { + Status status = new Status(); + writeAtAllEnd(handle, buf, status.data); + return status; + } + else + { + return getBeginStatus(); + } + } + + private native void writeAtAllEnd(long fh, Object buf, long[] stat) + throws MPIException; + + /** + * Java binding of {@code MPI_FILE_READ_ALL_BEGIN}. + * @param buf buffer + * @param count number of items in buffer + * @param type datatype of each buffer element + * @throws MPIException Signals that an MPI exception of some sort has occurred. + */ + public void readAllBegin(Object buf, int count, Datatype type) + throws MPIException + { + MPI.check(); + + if(isDirectBuffer(buf)) + { + readAllBegin(handle, buf, count, type.handle); + } + else + { + int off = 0; + Status status = new Status(); + + if(isHeapBuffer(buf)) + { + off = type.getOffset(buf); + buf = ((Buffer)buf).array(); + } + + readAll(handle, buf, false, off, count, + type.handle, type.baseType, status.data); + + beginStatus = status; + } + } + + private native void readAllBegin(long fh, Object buf, int count, long type) + throws MPIException; + + /** + * Java binding of {@code MPI_FILE_READ_ALL_END}. + * @param buf buffer + * @return status object + * @throws MPIException Signals that an MPI exception of some sort has occurred. + */ + public Status readAllEnd(Object buf) throws MPIException + { + MPI.check(); + + if(isDirectBuffer(buf)) + { + Status status = new Status(); + readAllEnd(handle, buf, status.data); + return status; + } + else + { + return getBeginStatus(); + } + } + + private native void readAllEnd(long fh, Object buf, long[] stat) + throws MPIException; + + /** + * Java binding of {@code MPI_FILE_WRITE_ALL_BEGIN}. + * @param buf buffer + * @param count number of items in buffer + * @param type datatype of each buffer element + * @throws MPIException Signals that an MPI exception of some sort has occurred. + */ + public void writeAllBegin(Object buf, int count, Datatype type) + throws MPIException + { + MPI.check(); + + if(isDirectBuffer(buf)) + { + writeAllBegin(handle, buf, count, type.handle); + } + else + { + int off = 0; + Status status = new Status(); + + if(isHeapBuffer(buf)) + { + off = type.getOffset(buf); + buf = ((Buffer)buf).array(); + } + + writeAll(handle, buf, false, off, count, + type.handle, type.baseType, status.data); + + beginStatus = status; + } + } + + private native void writeAllBegin(long fh, Object buf, int count, long type) + throws MPIException; + + /** + * Java binding of {@code MPI_FILE_WRITE_ALL_END}. + * @param buf buffer + * @return status object + * @throws MPIException Signals that an MPI exception of some sort has occurred. + */ + public Status writeAllEnd(Object buf) throws MPIException + { + MPI.check(); + + if(isDirectBuffer(buf)) + { + Status status = new Status(); + writeAllEnd(handle, buf, status.data); + return status; + } + else + { + return getBeginStatus(); + } + } + + private native void writeAllEnd(long fh, Object buf, long[] stat) + throws MPIException; + + /** + * Java binding of {@code MPI_FILE_READ_ORDERED_BEGIN}. + * @param buf buffer + * @param count number of items in buffer + * @param type datatype of each buffer element + * @throws MPIException Signals that an MPI exception of some sort has occurred. + */ + public void readOrderedBegin(Object buf, int count, Datatype type) + throws MPIException + { + MPI.check(); + + if(isDirectBuffer(buf)) + { + readOrderedBegin(handle, buf, count, type.handle); + } + else + { + int off = 0; + Status status = new Status(); + + if(isHeapBuffer(buf)) + { + off = type.getOffset(buf); + buf = ((Buffer)buf).array(); + } + + readOrdered(handle, buf, false, off, count, + type.handle, type.baseType, status.data); + + beginStatus = status; + } + } + + private native void readOrderedBegin(long fh, Object buf, int count, long type) + throws MPIException; + + /** + * Java binding of {@code MPI_FILE_READ_ORDERED_END}. + * @param buf buffer + * @return status object + * @throws MPIException Signals that an MPI exception of some sort has occurred. + */ + public Status readOrderedEnd(Object buf) throws MPIException + { + MPI.check(); + + if(isDirectBuffer(buf)) + { + Status status = new Status(); + readOrderedEnd(handle, buf, status.data); + return status; + } + else + { + return getBeginStatus(); + } + } + + private native void readOrderedEnd(long fh, Object buf, long[] stat) + throws MPIException; + + /** + * Java binding of {@code MPI_FILE_WRITE_ORDERED_BEGIN}. + * @param buf buffer + * @param count number of items in buffer + * @param type datatype of each buffer element + * @throws MPIException Signals that an MPI exception of some sort has occurred. + */ + public void writeOrderedBegin(Object buf, int count, Datatype type) + throws MPIException + { + MPI.check(); + + if(isDirectBuffer(buf)) + { + writeOrderedBegin(handle, buf, count, type.handle); + } + else + { + int off = 0; + Status status = new Status(); + + if(isHeapBuffer(buf)) + { + off = type.getOffset(buf); + buf = ((Buffer)buf).array(); + } + + writeOrdered(handle, buf, false, off, count, + type.handle, type.baseType, status.data); + + beginStatus = status; + } + } + + private native void writeOrderedBegin(long fh, Object buf, int count, long type) + throws MPIException; + + /** + * Java binding of {@code MPI_FILE_WRITE_ORDERED_END}. + * @param buf buffer + * @return status object + * @throws MPIException Signals that an MPI exception of some sort has occurred. + */ + public Status writeOrderedEnd(Object buf) throws MPIException + { + MPI.check(); + + if(isDirectBuffer(buf)) + { + Status status = new Status(); + writeOrderedEnd(handle, buf, status.data); + return status; + } + else + { + return getBeginStatus(); + } + } + + private native void writeOrderedEnd(long fh, Object buf, long[] stat) + throws MPIException; + + private Status getBeginStatus() + { + Status s = beginStatus; + beginStatus = null; + return s; + } + + /** + * Java binding of {@code MPI_FILE_GET_TYPE_EXTENT}. + * @param type type of data + * @return datatype extent + * @throws MPIException Signals that an MPI exception of some sort has occurred. + */ + public int getTypeExtent(Datatype type) throws MPIException + { + MPI.check(); + return getTypeExtent(handle, type.handle) / type.baseSize; + } + + private native int getTypeExtent(long fh, long type) throws MPIException; + + /** + * Java binding of {@code MPI_FILE_SET_ATOMICITY}. + * @param atomicity true to set atomic mode, false to set nonatomic mode + * @throws MPIException Signals that an MPI exception of some sort has occurred. + */ + public void setAtomicity(boolean atomicity) throws MPIException + { + MPI.check(); + setAtomicity(handle, atomicity); + } + + private native void setAtomicity(long fh, boolean atomicity) + throws MPIException; + + /** + * Java binding of {@code MPI_FILE_SYNC}. + * @throws MPIException Signals that an MPI exception of some sort has occurred. + */ + public void sync() throws MPIException + { + MPI.check(); + sync(handle); + } + + private native void sync(long handle) throws MPIException; } // File diff --git a/ompi/mpi/java/java/FileView.java b/ompi/mpi/java/java/FileView.java index 1b854248a31..cb791056c42 100644 --- a/ompi/mpi/java/java/FileView.java +++ b/ompi/mpi/java/java/FileView.java @@ -5,14 +5,16 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. + * Copyright (c) 2015 Los Alamos National Security, LLC. All rights + * reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -23,59 +25,59 @@ */ public final class FileView { -private final long disp; -private final Datatype etype, filetype; -private final String datarep; + private final long disp; + private final Datatype etype, filetype; + private final String datarep; -/** - * Constructs a file view. - * @param disp displacement - * @param etype elementary datatype - * @param filetype file type - * @param datarep data representation - */ -public FileView(long disp, Datatype etype, Datatype filetype, String datarep) -{ - this.disp = disp; - this.etype = etype; - this.filetype = filetype; - this.datarep = datarep; -} + /** + * Constructs a file view. + * @param disp displacement + * @param etype elementary datatype + * @param filetype file type + * @param datarep data representation + */ + public FileView(long disp, Datatype etype, Datatype filetype, String datarep) + { + this.disp = disp; + this.etype = etype; + this.filetype = filetype; + this.datarep = datarep; + } -/** - * Gets the displacement. - * @return displacement - */ -public long getDisp() -{ - return disp; -} + /** + * Gets the displacement. + * @return displacement + */ + public long getDisp() + { + return disp; + } -/** - * Gets the elementary datatype. - * @return elementary datatype - */ -public Datatype getEType() -{ - return etype; -} + /** + * Gets the elementary datatype. + * @return elementary datatype + */ + public Datatype getEType() + { + return etype; + } -/** - * Gets the file type. - * @return file type - */ -public Datatype getFileType() -{ - return filetype; -} + /** + * Gets the file type. + * @return file type + */ + public Datatype getFileType() + { + return filetype; + } -/** - * Gets the data representation. - * @return data representation - */ -public String getDataRep() -{ - return datarep; -} + /** + * Gets the data representation. + * @return data representation + */ + public String getDataRep() + { + return datarep; + } } // FileView diff --git a/ompi/mpi/java/java/FloatComplex.java b/ompi/mpi/java/java/FloatComplex.java index 9e7d527f74f..572f1d6fb30 100644 --- a/ompi/mpi/java/java/FloatComplex.java +++ b/ompi/mpi/java/java/FloatComplex.java @@ -5,14 +5,16 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. + * Copyright (c) 2015 Los Alamos National Security, LLC. All rights + * reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -25,124 +27,124 @@ */ public final class FloatComplex { -private final int offset; -private final FloatBuffer buffer; - -private FloatComplex(FloatBuffer buffer, int index) -{ - this.buffer = buffer; - this.offset = index * 2; -} - -/** - * Wraps a complex number stored in a buffer - * @param buffer buffer - * @return complex number - */ -public static FloatComplex get(FloatBuffer buffer) -{ - return new FloatComplex(buffer, 0); -} - -/** - * Wraps the complex number at the specified position - * of an array of complex numbers stored in a buffer. - * @param buffer buffer - * @param index index - * @return complex number - */ -public static FloatComplex get(FloatBuffer buffer, int index) -{ - return new FloatComplex(buffer, index); -} - -/** - * Wraps a complex number stored in the first two values of an array. - * @param array array - * @return complex number - */ -public static FloatComplex get(float[] array) -{ - return new FloatComplex(FloatBuffer.wrap(array), 0); -} - -/** - * Wraps the complex number at the specified position of - * an array of complex numbers stored in an array of floats. - * @param array array - * @param index index - * @return complex number - */ -public static FloatComplex get(float[] array, int index) -{ - return new FloatComplex(FloatBuffer.wrap(array), index); -} - -/** - * Wraps a complex number stored in a buffer - * @param buffer buffer - * @return complex number - */ -public static FloatComplex get(ByteBuffer buffer) -{ - return new FloatComplex(buffer.asFloatBuffer(), 0); -} - -/** - * Wraps the complex number at the specified position - * of an array of complex numbers stored in a buffer. - * @param buffer buffer - * @param index index - * @return complex number - */ -public static FloatComplex get(ByteBuffer buffer, int index) -{ - return new FloatComplex(buffer.asFloatBuffer(), index); -} - -/** - * Gets the real value. - * @return real value - */ -public float getReal() -{ - return buffer.get(offset); -} - -/** - * Gets the imaginary value. - * @return imaginary value. - */ -public float getImag() -{ - return buffer.get(offset + 1); -} - -/** - * Puts the real value. - * @param real real value - */ -public void putReal(float real) -{ - buffer.put(offset, real); -} - -/** - * Puts the imaginary value. - * @param imag imaginary value - */ -public void putImag(float imag) -{ - buffer.put(offset + 1, imag); -} - -/** - * Gets the buffer where the complex number is stored. - * @return buffer where the complex number is stored - */ -public FloatBuffer getBuffer() -{ - return offset == 0 ? buffer : MPI.slice(buffer, offset); -} + private final int offset; + private final FloatBuffer buffer; + + private FloatComplex(FloatBuffer buffer, int index) + { + this.buffer = buffer; + this.offset = index * 2; + } + + /** + * Wraps a complex number stored in a buffer + * @param buffer buffer + * @return complex number + */ + public static FloatComplex get(FloatBuffer buffer) + { + return new FloatComplex(buffer, 0); + } + + /** + * Wraps the complex number at the specified position + * of an array of complex numbers stored in a buffer. + * @param buffer buffer + * @param index index + * @return complex number + */ + public static FloatComplex get(FloatBuffer buffer, int index) + { + return new FloatComplex(buffer, index); + } + + /** + * Wraps a complex number stored in the first two values of an array. + * @param array array + * @return complex number + */ + public static FloatComplex get(float[] array) + { + return new FloatComplex(FloatBuffer.wrap(array), 0); + } + + /** + * Wraps the complex number at the specified position of + * an array of complex numbers stored in an array of floats. + * @param array array + * @param index index + * @return complex number + */ + public static FloatComplex get(float[] array, int index) + { + return new FloatComplex(FloatBuffer.wrap(array), index); + } + + /** + * Wraps a complex number stored in a buffer + * @param buffer buffer + * @return complex number + */ + public static FloatComplex get(ByteBuffer buffer) + { + return new FloatComplex(buffer.asFloatBuffer(), 0); + } + + /** + * Wraps the complex number at the specified position + * of an array of complex numbers stored in a buffer. + * @param buffer buffer + * @param index index + * @return complex number + */ + public static FloatComplex get(ByteBuffer buffer, int index) + { + return new FloatComplex(buffer.asFloatBuffer(), index); + } + + /** + * Gets the real value. + * @return real value + */ + public float getReal() + { + return buffer.get(offset); + } + + /** + * Gets the imaginary value. + * @return imaginary value. + */ + public float getImag() + { + return buffer.get(offset + 1); + } + + /** + * Puts the real value. + * @param real real value + */ + public void putReal(float real) + { + buffer.put(offset, real); + } + + /** + * Puts the imaginary value. + * @param imag imaginary value + */ + public void putImag(float imag) + { + buffer.put(offset + 1, imag); + } + + /** + * Gets the buffer where the complex number is stored. + * @return buffer where the complex number is stored + */ + public FloatBuffer getBuffer() + { + return offset == 0 ? buffer : MPI.slice(buffer, offset); + } } // FloatComplex diff --git a/ompi/mpi/java/java/FloatInt.java b/ompi/mpi/java/java/FloatInt.java index 60991cfd50c..1eb55bde76c 100644 --- a/ompi/mpi/java/java/FloatInt.java +++ b/ompi/mpi/java/java/FloatInt.java @@ -5,14 +5,16 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. + * Copyright (c) 2015 Los Alamos National Security, LLC. All rights + * reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -23,89 +25,91 @@ */ public final class FloatInt extends Struct { -private final int iOff, iSize; + private final int iOff, iSize; -/** - * The struct object will be created only in MPI class. - * @see MPI#floatInt - */ -protected FloatInt(int intOff, int intSize) -{ - int fOff = addFloat(); - assert fOff == 0; + /** + * The struct object will be created only in MPI class. + * @param intOff int offset + * @param intSize int size + * @see MPI#floatInt + */ + protected FloatInt(int intOff, int intSize) + { + int fOff = addFloat(); + assert fOff == 0; - iSize = intSize; - setOffset(intOff); + iSize = intSize; + setOffset(intOff); - switch(iSize) - { - case 4: iOff = addInt(); break; - case 8: iOff = addLong(); break; - default: throw new AssertionError("Unsupported int size: "+ iSize); - } + switch(iSize) + { + case 4: iOff = addInt(); break; + case 8: iOff = addLong(); break; + default: throw new AssertionError("Unsupported int size: "+ iSize); + } - assert(intOff == iOff); -} + assert(intOff == iOff); + } -/** - * Creates a Data object. - * @return new Data object. - */ -@Override protected Data newData() -{ - return new Data(); -} + /** + * Creates a Data object. + * @return new Data object. + */ + @Override protected Data newData() + { + return new Data(); + } -/** - * Class for reading/writing data in a struct stored in a byte buffer. - */ -public final class Data extends Struct.Data -{ - /** - * Gets the float value. - * @return float value - */ - public float getValue() - { - return getFloat(0); - } + /** + * Class for reading/writing data in a struct stored in a byte buffer. + */ + public final class Data extends Struct.Data + { + /** + * Gets the float value. + * @return float value + */ + public float getValue() + { + return getFloat(0); + } - /** - * Gets the int value. - * @return int value - */ - public int getIndex() - { - switch(iSize) - { - case 4: return getInt(iOff); - case 8: return (int)getLong(iOff); - default: throw new AssertionError(); - } - } + /** + * Gets the int value. + * @return int value + */ + public int getIndex() + { + switch(iSize) + { + case 4: return getInt(iOff); + case 8: return (int)getLong(iOff); + default: throw new AssertionError(); + } + } - /** - * Puts the float value. - * @param v float value - */ - public void putValue(float v) - { - putFloat(0, v); - } + /** + * Puts the float value. + * @param v float value + */ + public void putValue(float v) + { + putFloat(0, v); + } - /** - * Puts the int value. - * @param v int value - */ - public void putIndex(int v) - { - switch(iSize) - { - case 4: putInt(iOff, v); break; - case 8: putLong(iOff, v); break; - default: throw new AssertionError(); - } - } -} // Data + /** + * Puts the int value. + * @param v int value + */ + public void putIndex(int v) + { + switch(iSize) + { + case 4: putInt(iOff, v); break; + case 8: putLong(iOff, v); break; + default: throw new AssertionError(); + } + } + } // Data } // FloatInt diff --git a/ompi/mpi/java/java/Freeable.java b/ompi/mpi/java/java/Freeable.java index 7ce1163ff7c..9ec8a4571d4 100644 --- a/ompi/mpi/java/java/Freeable.java +++ b/ompi/mpi/java/java/Freeable.java @@ -5,35 +5,37 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. + * Copyright (c) 2015 Los Alamos National Security, LLC. All rights + * reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ - */ -/* + * + * * This file is almost a complete re-write for Open MPI compared to the * original mpiJava package. Its license and copyright are listed below. * See for more information. - */ -/* - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. -*/ -/* + * + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * * File : Freeable.java * Author : Bryan Carpenter * Created : Wed Jan 15 23:14:43 EST 2003 @@ -48,10 +50,10 @@ */ public interface Freeable { - /** - * Frees a freeable object. - * @throws MPIException - */ - void free() throws MPIException; + /** + * Frees a freeable object. + * @throws MPIException Signals that an MPI exception of some sort has occurred. + */ + void free() throws MPIException; } diff --git a/ompi/mpi/java/java/GraphComm.java b/ompi/mpi/java/java/GraphComm.java index 9424f383cc5..384136a9537 100644 --- a/ompi/mpi/java/java/GraphComm.java +++ b/ompi/mpi/java/java/GraphComm.java @@ -5,35 +5,37 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. + * Copyright (c) 2015 Los Alamos National Security, LLC. All rights + * reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ - */ -/* + * + * * This file is almost a complete re-write for Open MPI compared to the * original mpiJava package. Its license and copyright are listed below. * See for more information. - */ -/* - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. -*/ -/* + * + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * * File : Graphcomm.java * Author : Xinying Li * Created : Thu Apr 9 12:22:15 1998 @@ -50,132 +52,145 @@ */ public final class GraphComm extends Intracomm { -static -{ - init(); -} - -private static native void init(); - -protected GraphComm(long handle) throws MPIException -{ - super(handle); -} - -protected GraphComm(long[] commRequest) -{ - super(commRequest); -} - -/** - * Duplicates this communicator. - *

Java binding of {@code MPI_COMM_DUP}. - *

It is recommended to use {@link #dup} instead of {@link #clone} - * because the last can't throw an {@link mpi.MPIException}. - * @return copy of this communicator - */ -@Override public GraphComm clone() -{ - try - { - return dup(); - } - catch(MPIException e) - { - throw new RuntimeException(e.getMessage()); - } -} - -/** - * Duplicates this communicator. - *

Java binding of {@code MPI_COMM_DUP}. - * @return copy of this communicator - * @throws MPIException - */ -@Override public GraphComm dup() throws MPIException -{ - MPI.check(); - return new GraphComm(dup(handle)); -} - -/** - * Duplicates this communicator. - *

The new communicator can't be used before the operation completes. - * The request object must be obtained calling {@link #getRequest}. - *

Java binding of {@code MPI_COMM_IDUP}. - * @return copy of this communicator - * @throws MPIException - */ -@Override public GraphComm iDup() throws MPIException -{ - MPI.check(); - return new GraphComm(iDup(handle)); -} - -/** - * Returns graph topology information. - *

Java binding of the MPI operations {@code MPI_GRAPHDIMS_GET} - * and {@code MPI_GRAPH_GET}. - *

The number of nodes and number of edges can be extracted - * from the sizes of the {@code index} and {@code edges} fields - * of the returned object. - * @return object defining node degress and edges of graph - * @throws MPIException - */ -public GraphParms getDims() throws MPIException -{ - MPI.check(); - return getDims(handle); -} - -private native GraphParms getDims(long comm) throws MPIException; - -/** - * Provides adjacency information for general graph topology. - *

Java binding of the MPI operations {@code MPI_GRAPH_NEIGHBORS_COUNT} - * and {@code MPI_GRAPH_NEIGHBORS}. - *

The number of neighbors can be extracted from the size of the result. - * @param rank rank of a process in the group of this communicator - * @return array of ranks of neighbouring processes to one specified - * @throws MPIException - */ -public int[] getNeighbors(int rank) throws MPIException -{ - MPI.check(); - return getNeighbors(handle, rank); -} - -private native int[] getNeighbors(long comm, int rank) throws MPIException; - -/** - * Gets the adjacency information for a distributed graph topology. - * @return adjacency information for a distributed graph topology - * @throws MPIException - */ -public DistGraphNeighbors getDistGraphNeighbors() throws MPIException -{ - MPI.check(); - return getDistGraphNeighbors(handle); -} - -private native DistGraphNeighbors getDistGraphNeighbors(long comm) - throws MPIException; - -/** - * Compute an optimal placement. - *

Java binding of the MPI operation {@code MPI_GRAPH_MAP}. - *

The number of nodes is taken to be size of the {@code index} argument. - * @param index node degrees - * @param edges graph edges - * @return reordered rank of calling process - * @throws MPIException - */ -public int map(int[] index, int[] edges) throws MPIException -{ - MPI.check(); - return map(handle, index, edges); -} - -private native int map(long comm, int[] index, int[] edges) throws MPIException; + static + { + init(); + } + + private static native void init(); + + protected GraphComm(long handle) throws MPIException + { + super(handle); + } + + protected GraphComm(long[] commRequest) + { + super(commRequest); + } + + /** + * Duplicates this communicator. + *

Java binding of {@code MPI_COMM_DUP}. + *

It is recommended to use {@link #dup} instead of {@link #clone} + * because the last can't throw an {@link mpi.MPIException}. + * @return copy of this communicator + */ + @Override public GraphComm clone() + { + try + { + return dup(); + } + catch(MPIException e) + { + throw new RuntimeException(e.getMessage()); + } + } + + /** + * Duplicates this communicator. + *

Java binding of {@code MPI_COMM_DUP}. + * @return copy of this communicator + * @throws MPIException Signals that an MPI exception of some sort has occurred. + */ + @Override public GraphComm dup() throws MPIException + { + MPI.check(); + return new GraphComm(dup(handle)); + } + + /** + * Duplicates this communicator. + *

The new communicator can't be used before the operation completes. + * The request object must be obtained calling {@link #getRequest}. + *

Java binding of {@code MPI_COMM_IDUP}. + * @return copy of this communicator + * @throws MPIException Signals that an MPI exception of some sort has occurred. + */ + @Override public GraphComm iDup() throws MPIException + { + MPI.check(); + return new GraphComm(iDup(handle)); + } + + /** + * Duplicates this communicator with the info object used in the call. + *

Java binding of {@code MPI_COMM_DUP_WITH_INFO}. + * @param info info object to associate with the new communicator + * @return copy of this communicator + * @throws MPIException Signals that an MPI exception of some sort has occurred. + */ + @Override public GraphComm dupWithInfo(Info info) throws MPIException + { + MPI.check(); + return new GraphComm(dupWithInfo(handle, info.handle)); + } + + /** + * Returns graph topology information. + *

Java binding of the MPI operations {@code MPI_GRAPHDIMS_GET} + * and {@code MPI_GRAPH_GET}. + *

The number of nodes and number of edges can be extracted + * from the sizes of the {@code index} and {@code edges} fields + * of the returned object. + * @return object defining node degress and edges of graph + * @throws MPIException Signals that an MPI exception of some sort has occurred. + */ + public GraphParms getDims() throws MPIException + { + MPI.check(); + return getDims(handle); + } + + private native GraphParms getDims(long comm) throws MPIException; + + /** + * Provides adjacency information for general graph topology. + *

Java binding of the MPI operations {@code MPI_GRAPH_NEIGHBORS_COUNT} + * and {@code MPI_GRAPH_NEIGHBORS}. + *

The number of neighbors can be extracted from the size of the result. + * @param rank rank of a process in the group of this communicator + * @return array of ranks of neighbouring processes to one specified + * @throws MPIException Signals that an MPI exception of some sort has occurred. + */ + public int[] getNeighbors(int rank) throws MPIException + { + MPI.check(); + return getNeighbors(handle, rank); + } + + private native int[] getNeighbors(long comm, int rank) throws MPIException; + + /** + * Gets the adjacency information for a distributed graph topology. + * @return adjacency information for a distributed graph topology + * @throws MPIException Signals that an MPI exception of some sort has occurred. + */ + public DistGraphNeighbors getDistGraphNeighbors() throws MPIException + { + MPI.check(); + return getDistGraphNeighbors(handle); + } + + private native DistGraphNeighbors getDistGraphNeighbors(long comm) + throws MPIException; + + /** + * Compute an optimal placement. + *

Java binding of the MPI operation {@code MPI_GRAPH_MAP}. + *

The number of nodes is taken to be size of the {@code index} argument. + * @param index node degrees + * @param edges graph edges + * @return reordered rank of calling process + * @throws MPIException Signals that an MPI exception of some sort has occurred. + */ + public int map(int[] index, int[] edges) throws MPIException + { + MPI.check(); + return map(handle, index, edges); + } + + private native int map(long comm, int[] index, int[] edges) throws MPIException; } // Graphcomm diff --git a/ompi/mpi/java/java/GraphParms.java b/ompi/mpi/java/java/GraphParms.java index 3608a4d9aa8..357c94f2c0a 100644 --- a/ompi/mpi/java/java/GraphParms.java +++ b/ompi/mpi/java/java/GraphParms.java @@ -5,35 +5,37 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. + * Copyright (c) 2015 Los Alamos National Security, LLC. All rights + * reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ - */ -/* + * + * * This file is almost a complete re-write for Open MPI compared to the * original mpiJava package. Its license and copyright are listed below. * See for more information. - */ -/* - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. -*/ -/* + * + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * * File : GraphParms.java * Author : Xinying Li * Created : Thu Apr 9 12:22:15 1998 @@ -50,66 +52,66 @@ */ public final class GraphParms { -/** Node degrees. */ -private final int[] index; + /** Node degrees. */ + private final int[] index; -/** Graph edges. */ -private final int[] edges; + /** Graph edges. */ + private final int[] edges; -/** - * Constructs a graph topology information object. - * @param index node degrees. - * @param edges graph edges. - */ -protected GraphParms(int[] index, int[] edges) -{ - this.index = index; - this.edges = edges; -} + /** + * Constructs a graph topology information object. + * @param index node degrees. + * @param edges graph edges. + */ + protected GraphParms(int[] index, int[] edges) + { + this.index = index; + this.edges = edges; + } -/** - * Returns the number of nodes. - * @return number of nodes. - */ -public int getIndexCount() -{ - return index.length; -} + /** + * Returns the number of nodes. + * @return number of nodes. + */ + public int getIndexCount() + { + return index.length; + } -/** - * Returns the index of the node {@code i}. - *

{@code getIndex(0)} returns the degree of the node {@code 0}, and - * {@code getIndex(i)-getIndex(i-1)} is the degree of the node {@code i}. - * @param i position of the node. - * @return the index. - */ -public int getIndex(int i) -{ - return index[i]; -} + /** + * Returns the index of the node {@code i}. + *

{@code getIndex(0)} returns the degree of the node {@code 0}, and + * {@code getIndex(i)-getIndex(i-1)} is the degree of the node {@code i}. + * @param i position of the node. + * @return the index. + */ + public int getIndex(int i) + { + return index[i]; + } -/** - * Returns the number of edges. - * @return number of edges. - */ -public int getEdgeCount() -{ - return edges.length; -} + /** + * Returns the number of edges. + * @return number of edges. + */ + public int getEdgeCount() + { + return edges.length; + } -/** - * Returns the edge {@code i}. - *

The list of neighbors of node zero is stored in {@code getEdge(j)}, - * for {@code 0} ≤ {@code j} ≤ {@code getIndex(0)-1} and the list - * of neighbors of node {@code i}, {@code i} > {@code 0}, is stored - * in {@code getEdge(j)}, {@code getIndex(i-1)} ≤ {@code j} ≤ - * {@code getIndex(i)-1}. - * @param i index of the edge. - * @return the edge. - */ -public int getEdge(int i) -{ - return edges[i]; -} + /** + * Returns the edge {@code i}. + *

The list of neighbors of node zero is stored in {@code getEdge(j)}, + * for {@code 0} ≤ {@code j} ≤ {@code getIndex(0)-1} and the list + * of neighbors of node {@code i}, {@code i} > {@code 0}, is stored + * in {@code getEdge(j)}, {@code getIndex(i-1)} ≤ {@code j} ≤ + * {@code getIndex(i)-1}. + * @param i index of the edge. + * @return the edge. + */ + public int getEdge(int i) + { + return edges[i]; + } } // GraphParms diff --git a/ompi/mpi/java/java/Group.java b/ompi/mpi/java/java/Group.java index f7d93b1247c..7f5d505b5c3 100644 --- a/ompi/mpi/java/java/Group.java +++ b/ompi/mpi/java/java/Group.java @@ -5,35 +5,37 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. + * Copyright (c) 2015 Los Alamos National Security, LLC. All rights + * reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ - */ -/* + * + * * This file is almost a complete re-write for Open MPI compared to the * original mpiJava package. Its license and copyright are listed below. * See for more information. - */ -/* - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. -*/ -/* + * + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * * File : Group.java * Author : Xinying Li, Bryan Carpenter * Created : Thu Apr 9 12:22:15 1998 @@ -50,223 +52,223 @@ */ public final class Group implements Freeable { -protected long handle; -private static long nullHandle; - -static -{ - init(); -} - -private static native void init(); - -protected static native long getEmpty(); - -protected Group(long handle) -{ - this.handle = handle; -} - -/** - * Java binding of the MPI operation {@code MPI_GROUP_SIZE}. - * @return number of processes in the group - * @throws MPIException - */ -public int getSize() throws MPIException -{ - MPI.check(); - return getSize(handle); -} - -private native int getSize(long group) throws MPIException; - -/** - * Rank of this process in the group. - *

Java binding of the MPI operation {@code MPI_GROUP_RANK}. - * @return rank of this process in the group, or {@code MPI.UNDEFINED} - * if this process is not a member of the group. - * @throws MPIException - */ -public int getRank() throws MPIException -{ - MPI.check(); - return getRank(handle); -} - -private native int getRank(long group) throws MPIException; - -/** - * Java binding of the MPI operation {@code MPI_GROUP_FREE}. - */ -@Override public void free() throws MPIException -{ - MPI.check(); - handle = free(handle); -} - -private native long free(long group); - -/** - * Test if group object is null. - * @return true if the group object is null. - */ -public boolean isNull() -{ - return handle == nullHandle; -} - -/** - * Translate ranks within one group to ranks within another. - *

Java binding of the MPI operation {@code MPI_GROUP_TRANSLATE_RANKS}. - *

Result elements are {@code MPI.UNDEFINED} where no correspondence exists. - * @param group1 a group - * @param ranks1 array of valid ranks in group1 - * @param group2 another group - * @return array of corresponding ranks in group2 - * @throws MPIException - */ -public static int[] translateRanks(Group group1, int[] ranks1, Group group2) - throws MPIException -{ - MPI.check(); - return translateRanks(group1.handle, ranks1, group2.handle); -} - -private static native int[] translateRanks( - long group1, int[] ranks1, long group2) throws MPIException; - -/** - * Compare two groups. - *

Java binding of the MPI operation {@code MPI_GROUP_COMPARE}. - * @param group1 first group - * @param group2 second group - * @return {@code MPI.IDENT} if the group members and group order are exactly - * the same in both groups, {@code MPI.SIMILAR} if the group members are - * the same but the order is different, {@code MPI.UNEQUAL} otherwise. - * @throws MPIException - */ -public static int compare(Group group1, Group group2) throws MPIException -{ - MPI.check(); - return compare(group1.handle, group2.handle); -} - -private static native int compare(long group1, long group2) throws MPIException; - -/** - * Set union of two groups. - *

Java binding of the MPI operation {@code MPI_GROUP_UNION}. - * @param group1 first group - * @param group2 second group - * @return union group - * @throws MPIException - */ -public static Group union(Group group1, Group group2) throws MPIException -{ - MPI.check(); - return new Group(union(group1.handle, group2.handle)); -} - -private static native long union(long group1, long group2); - -/** - * Set intersection of two groups. - * Java binding of the MPI operation {@code MPI_GROUP_INTERSECTION}. - * @param group1 first group - * @param group2 second group - * @return intersection group - * @throws MPIException - */ -public static Group intersection(Group group1, Group group2) throws MPIException -{ - MPI.check(); - return new Group(intersection(group1.handle, group2.handle)); -} - -private static native long intersection(long group1, long group2); - -/** - * Set difference of two groups. - * Java binding of the MPI operation {@code MPI_GROUP_DIFFERENCE}. - * @param group1 first group - * @param group2 second group - * @return difference group - * @throws MPIException - */ -public static Group difference(Group group1, Group group2) throws MPIException -{ - MPI.check(); - return new Group(difference(group1.handle, group2.handle)); -} - -private static native long difference(long group1, long group2); - -/** - * Create a subset group including specified processes. - *

Java binding of the MPI operation {@code MPI_GROUP_INCL}. - * @param ranks ranks from this group to appear in new group - * @return new group - * @throws MPIException - */ -public Group incl(int[] ranks) throws MPIException -{ - MPI.check(); - return new Group(incl(handle, ranks)); -} - -private native long incl(long group, int[] ranks); - -/** - * Create a subset group excluding specified processes. - *

Java binding of the MPI operation {@code MPI_GROUP_EXCL}. - * @param ranks ranks from this group not to appear in new group - * @return new group - * @throws MPIException - */ -public Group excl(int[] ranks) throws MPIException -{ - MPI.check(); - return new Group(excl(handle, ranks)); -} - -private native long excl(long group, int[] ranks); - -/** - * Create a subset group including processes specified - * by strided intervals of ranks. - *

Java binding of the MPI operation {@code MPI_GROUP_RANGE_INCL}. - *

The triplets are of the form (first rank, last rank, stride) - * indicating ranks in this group to be included in the new group. - * The size of the first dimension of {@code ranges} is the number - * of triplets. The size of the second dimension is 3. - * @param ranges array of integer triplets - * @return new group - * @throws MPIException - */ -public Group rangeIncl(int[][] ranges) throws MPIException -{ - MPI.check(); - return new Group(rangeIncl(handle, ranges)); -} - -private native long rangeIncl(long group, int[][] ranges); - -/** - * Create a subset group excluding processes specified - * by strided intervals of ranks. - *

Java binding of the MPI operation {@code MPI_GROUP_RANGE_EXCL}. - *

Triplet array is defined as for {@code rangeIncl}, the ranges - * indicating ranks in this group to be excluded from the new group. - * @param ranges array of integer triplets - * @return new group - * @throws MPIException - */ -public Group rangeExcl(int[][] ranges) throws MPIException -{ - MPI.check(); - return new Group(rangeExcl(handle, ranges)); -} - -private native long rangeExcl(long group, int[][] ranges); + protected long handle; + private static long nullHandle; + + static + { + init(); + } + + private static native void init(); + + protected static native long getEmpty(); + + protected Group(long handle) + { + this.handle = handle; + } + + /** + * Java binding of the MPI operation {@code MPI_GROUP_SIZE}. + * @return number of processes in the group + * @throws MPIException Signals that an MPI exception of some sort has occurred. + */ + public int getSize() throws MPIException + { + MPI.check(); + return getSize(handle); + } + + private native int getSize(long group) throws MPIException; + + /** + * Rank of this process in the group. + *

Java binding of the MPI operation {@code MPI_GROUP_RANK}. + * @return rank of this process in the group, or {@code MPI.UNDEFINED} + * if this process is not a member of the group. + * @throws MPIException Signals that an MPI exception of some sort has occurred. + */ + public int getRank() throws MPIException + { + MPI.check(); + return getRank(handle); + } + + private native int getRank(long group) throws MPIException; + + /** + * Java binding of the MPI operation {@code MPI_GROUP_FREE}. + */ + @Override public void free() throws MPIException + { + MPI.check(); + handle = free(handle); + } + + private native long free(long group); + + /** + * Test if group object is null. + * @return true if the group object is null. + */ + public boolean isNull() + { + return handle == nullHandle; + } + + /** + * Translate ranks within one group to ranks within another. + *

Java binding of the MPI operation {@code MPI_GROUP_TRANSLATE_RANKS}. + *

Result elements are {@code MPI.UNDEFINED} where no correspondence exists. + * @param group1 a group + * @param ranks1 array of valid ranks in group1 + * @param group2 another group + * @return array of corresponding ranks in group2 + * @throws MPIException Signals that an MPI exception of some sort has occurred. + */ + public static int[] translateRanks(Group group1, int[] ranks1, Group group2) + throws MPIException + { + MPI.check(); + return translateRanks(group1.handle, ranks1, group2.handle); + } + + private static native int[] translateRanks( + long group1, int[] ranks1, long group2) throws MPIException; + + /** + * Compare two groups. + *

Java binding of the MPI operation {@code MPI_GROUP_COMPARE}. + * @param group1 first group + * @param group2 second group + * @return {@code MPI.IDENT} if the group members and group order are exactly + * the same in both groups, {@code MPI.SIMILAR} if the group members are + * the same but the order is different, {@code MPI.UNEQUAL} otherwise. + * @throws MPIException Signals that an MPI exception of some sort has occurred. + */ + public static int compare(Group group1, Group group2) throws MPIException + { + MPI.check(); + return compare(group1.handle, group2.handle); + } + + private static native int compare(long group1, long group2) throws MPIException; + + /** + * Set union of two groups. + *

Java binding of the MPI operation {@code MPI_GROUP_UNION}. + * @param group1 first group + * @param group2 second group + * @return union group + * @throws MPIException Signals that an MPI exception of some sort has occurred. + */ + public static Group union(Group group1, Group group2) throws MPIException + { + MPI.check(); + return new Group(union(group1.handle, group2.handle)); + } + + private static native long union(long group1, long group2); + + /** + * Set intersection of two groups. + * Java binding of the MPI operation {@code MPI_GROUP_INTERSECTION}. + * @param group1 first group + * @param group2 second group + * @return intersection group + * @throws MPIException Signals that an MPI exception of some sort has occurred. + */ + public static Group intersection(Group group1, Group group2) throws MPIException + { + MPI.check(); + return new Group(intersection(group1.handle, group2.handle)); + } + + private static native long intersection(long group1, long group2); + + /** + * Set difference of two groups. + * Java binding of the MPI operation {@code MPI_GROUP_DIFFERENCE}. + * @param group1 first group + * @param group2 second group + * @return difference group + * @throws MPIException Signals that an MPI exception of some sort has occurred. + */ + public static Group difference(Group group1, Group group2) throws MPIException + { + MPI.check(); + return new Group(difference(group1.handle, group2.handle)); + } + + private static native long difference(long group1, long group2); + + /** + * Create a subset group including specified processes. + *

Java binding of the MPI operation {@code MPI_GROUP_INCL}. + * @param ranks ranks from this group to appear in new group + * @return new group + * @throws MPIException Signals that an MPI exception of some sort has occurred. + */ + public Group incl(int[] ranks) throws MPIException + { + MPI.check(); + return new Group(incl(handle, ranks)); + } + + private native long incl(long group, int[] ranks); + + /** + * Create a subset group excluding specified processes. + *

Java binding of the MPI operation {@code MPI_GROUP_EXCL}. + * @param ranks ranks from this group not to appear in new group + * @return new group + * @throws MPIException Signals that an MPI exception of some sort has occurred. + */ + public Group excl(int[] ranks) throws MPIException + { + MPI.check(); + return new Group(excl(handle, ranks)); + } + + private native long excl(long group, int[] ranks); + + /** + * Create a subset group including processes specified + * by strided intervals of ranks. + *

Java binding of the MPI operation {@code MPI_GROUP_RANGE_INCL}. + *

The triplets are of the form (first rank, last rank, stride) + * indicating ranks in this group to be included in the new group. + * The size of the first dimension of {@code ranges} is the number + * of triplets. The size of the second dimension is 3. + * @param ranges array of integer triplets + * @return new group + * @throws MPIException Signals that an MPI exception of some sort has occurred. + */ + public Group rangeIncl(int[][] ranges) throws MPIException + { + MPI.check(); + return new Group(rangeIncl(handle, ranges)); + } + + private native long rangeIncl(long group, int[][] ranges); + + /** + * Create a subset group excluding processes specified + * by strided intervals of ranks. + *

Java binding of the MPI operation {@code MPI_GROUP_RANGE_EXCL}. + *

Triplet array is defined as for {@code rangeIncl}, the ranges + * indicating ranks in this group to be excluded from the new group. + * @param ranges array of integer triplets + * @return new group + * @throws MPIException Signals that an MPI exception of some sort has occurred. + */ + public Group rangeExcl(int[][] ranges) throws MPIException + { + MPI.check(); + return new Group(rangeExcl(handle, ranges)); + } + + private native long rangeExcl(long group, int[][] ranges); } // Group diff --git a/ompi/mpi/java/java/Info.java b/ompi/mpi/java/java/Info.java index c836b3aae02..82c3f668a5c 100644 --- a/ompi/mpi/java/java/Info.java +++ b/ompi/mpi/java/java/Info.java @@ -5,14 +5,16 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. + * Copyright (c) 2015 Los Alamos National Security, LLC. All rights + * reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -21,156 +23,157 @@ /** * This class represents {@code MPI_Info}. */ -public final class Info implements Freeable -{ -protected long handle; -protected static final long NULL = getNull(); - -/** - * Java binding of the MPI operation {@code MPI_INFO_CREATE}. - */ -public Info() throws MPIException -{ - MPI.check(); - handle = create(); -} - -protected Info(long handle) -{ - this.handle = handle; -} - -private native long create(); - -protected static Info newEnv() -{ - return new Info(getEnv()); -} - -private native static long getEnv(); -private native static long getNull(); - -/** - * Java binding of the MPI operation {@code MPI_INFO_SET}. - * @param key key - * @param value value - * @throws MPIException - */ -public void set(String key, String value) throws MPIException -{ - MPI.check(); - set(handle, key, value); -} - -private native void set(long handle, String key, String value) - throws MPIException; - -/** - * Java binding of the MPI operation {@code MPI_INFO_SET}. - * @param key key - * @return value or {@code null} if key is not defined - * @throws MPIException - */ -public String get(String key) throws MPIException -{ - MPI.check(); - return get(handle, key); -} - -private native String get(long handle, String key) throws MPIException; - -/** - * Java binding of the MPI operation {@code MPI_INFO_SET}. - * @param key key - * @throws MPIException - */ -public void delete(String key) throws MPIException +public final class Info implements Freeable, Cloneable { - MPI.check(); - delete(handle, key); -} - -private native void delete(long handle, String key) throws MPIException; - -/** - * Java binding of the MPI operation {@code MPI_INFO_GET_NKEYS}. - * @return number of defined keys - * @throws MPIException - */ -public int size() throws MPIException -{ - MPI.check(); - return size(handle); -} - -private native int size(long handle) throws MPIException; - -/** - * Java binding of the MPI operation {@code MPI_INFO_GET_NTHKEY}. - * @param i key number - * @return key - * @throws MPIException - */ -public String getKey(int i) throws MPIException -{ - MPI.check(); - return getKey(handle, i); -} - -private native String getKey(long handle, int i) throws MPIException; - -/** - * Java binding of the MPI operation {@code MPI_INFO_DUP}. - *

It is recommended to use {@link #dup} instead of {@link #clone} - * because the last can't throw an {@link mpi.MPIException}. - * @return info object - */ -@Override public Info clone() -{ - try - { - return dup(); - } - catch(MPIException e) - { - throw new RuntimeException(e.getMessage()); - } -} - -/** - * Java binding of the MPI operation {@code MPI_INFO_DUP}. - * @return info object - * @throws MPIException - */ -public Info dup() throws MPIException -{ - MPI.check(); - return new Info(dup(handle)); -} - -private native long dup(long handle) throws MPIException; - -/** - * Java binding of the MPI operation {@code MPI_INFO_FREE}. - * @throws MPIException - */ -@Override public void free() throws MPIException -{ - MPI.check(); - handle = free(handle); -} - -private native long free(long handle) throws MPIException; - -/** - * Tests if the info object is {@code MPI_INFO_NULL} (has been freed). - * @return true if the info object is {@code MPI_INFO_NULL}, false otherwise. - */ -public boolean isNull() -{ - return isNull(handle); -} - -private native boolean isNull(long handle); + protected long handle; + protected static final long NULL = getNull(); + + /** + * Java binding of the MPI operation {@code MPI_INFO_CREATE}. + * @throws MPIException Signals that an MPI exception of some sort has occurred. + */ + public Info() throws MPIException + { + MPI.check(); + handle = create(); + } + + protected Info(long handle) + { + this.handle = handle; + } + + private native long create(); + + protected static Info newEnv() + { + return new Info(getEnv()); + } + + private native static long getEnv(); + private native static long getNull(); + + /** + * Java binding of the MPI operation {@code MPI_INFO_SET}. + * @param key key + * @param value value + * @throws MPIException Signals that an MPI exception of some sort has occurred. + */ + public void set(String key, String value) throws MPIException + { + MPI.check(); + set(handle, key, value); + } + + private native void set(long handle, String key, String value) + throws MPIException; + + /** + * Java binding of the MPI operation {@code MPI_INFO_SET}. + * @param key key + * @return value or {@code null} if key is not defined + * @throws MPIException Signals that an MPI exception of some sort has occurred. + */ + public String get(String key) throws MPIException + { + MPI.check(); + return get(handle, key); + } + + private native String get(long handle, String key) throws MPIException; + + /** + * Java binding of the MPI operation {@code MPI_INFO_SET}. + * @param key key + * @throws MPIException Signals that an MPI exception of some sort has occurred. + */ + public void delete(String key) throws MPIException + { + MPI.check(); + delete(handle, key); + } + + private native void delete(long handle, String key) throws MPIException; + + /** + * Java binding of the MPI operation {@code MPI_INFO_GET_NKEYS}. + * @return number of defined keys + * @throws MPIException Signals that an MPI exception of some sort has occurred. + */ + public int size() throws MPIException + { + MPI.check(); + return size(handle); + } + + private native int size(long handle) throws MPIException; + + /** + * Java binding of the MPI operation {@code MPI_INFO_GET_NTHKEY}. + * @param i key number + * @return key + * @throws MPIException Signals that an MPI exception of some sort has occurred. + */ + public String getKey(int i) throws MPIException + { + MPI.check(); + return getKey(handle, i); + } + + private native String getKey(long handle, int i) throws MPIException; + + /** + * Java binding of the MPI operation {@code MPI_INFO_DUP}. + *

It is recommended to use {@link #dup} instead of {@link #clone} + * because the last can't throw an {@link mpi.MPIException}. + * @return info object + */ + @Override public Info clone() + { + try + { + return dup(); + } + catch(MPIException e) + { + throw new RuntimeException(e.getMessage()); + } + } + + /** + * Java binding of the MPI operation {@code MPI_INFO_DUP}. + * @return info object + * @throws MPIException Signals that an MPI exception of some sort has occurred. + */ + public Info dup() throws MPIException + { + MPI.check(); + return new Info(dup(handle)); + } + + private native long dup(long handle) throws MPIException; + + /** + * Java binding of the MPI operation {@code MPI_INFO_FREE}. + * @throws MPIException Signals that an MPI exception of some sort has occurred. + */ + @Override public void free() throws MPIException + { + MPI.check(); + handle = free(handle); + } + + private native long free(long handle) throws MPIException; + + /** + * Tests if the info object is {@code MPI_INFO_NULL} (has been freed). + * @return true if the info object is {@code MPI_INFO_NULL}, false otherwise. + */ + public boolean isNull() + { + return isNull(handle); + } + + private native boolean isNull(long handle); } // Info diff --git a/ompi/mpi/java/java/Int2.java b/ompi/mpi/java/java/Int2.java index fd36d4f8f63..0ba594d65cf 100644 --- a/ompi/mpi/java/java/Int2.java +++ b/ompi/mpi/java/java/Int2.java @@ -5,14 +5,16 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. + * Copyright (c) 2015 Los Alamos National Security, LLC. All rights + * reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -23,101 +25,103 @@ */ public final class Int2 extends Struct { -private final int iOff, iSize; + private final int iOff, iSize; -/** - * The struct object will be created only in MPI class. - * @see MPI#int2 - */ -protected Int2(int intOff, int intSize) -{ - iSize = intSize; - int off = addIntField(); - assert off == 0; - setOffset(intOff); - iOff = addIntField(); - assert intOff == iOff; -} + /** + * The struct object will be created only in MPI class. + * @param intOff int offset + * @param intSize int size + * @see MPI#int2 + */ + protected Int2(int intOff, int intSize) + { + iSize = intSize; + int off = addIntField(); + assert off == 0; + setOffset(intOff); + iOff = addIntField(); + assert intOff == iOff; + } -private int addIntField() -{ - switch(iSize) - { - case 4: return addInt(); - case 8: return addLong(); - default: throw new AssertionError("Unsupported int size: "+ iSize); - } -} + private int addIntField() + { + switch(iSize) + { + case 4: return addInt(); + case 8: return addLong(); + default: throw new AssertionError("Unsupported int size: "+ iSize); + } + } -/** - * Creates a Data object. - * @return new Data object. - */ -@Override protected Int2.Data newData() -{ - return new Int2.Data(); -} + /** + * Creates a Data object. + * @return new Data object. + */ + @Override protected Int2.Data newData() + { + return new Int2.Data(); + } -/** - * Class for reading/writing data in a struct stored in a byte buffer. - */ -public final class Data extends Struct.Data -{ - /** - * Gets the first int. - * @return first int - */ - public int getValue() - { - return get(0); - } + /** + * Class for reading/writing data in a struct stored in a byte buffer. + */ + public final class Data extends Struct.Data + { + /** + * Gets the first int. + * @return first int + */ + public int getValue() + { + return get(0); + } - /** - * Gets the second int. - * @return second int - */ - public int getIndex() - { - return get(iOff); - } + /** + * Gets the second int. + * @return second int + */ + public int getIndex() + { + return get(iOff); + } - /** - * Puts the first int. - * @param v first value - */ - public void putValue(int v) - { - put(0, v); - } + /** + * Puts the first int. + * @param v first value + */ + public void putValue(int v) + { + put(0, v); + } - /** - * Puts the second int. - * @param v second int - */ - public void putIndex(int v) - { - put(iOff, v); - } + /** + * Puts the second int. + * @param v second int + */ + public void putIndex(int v) + { + put(iOff, v); + } - private int get(int off) - { - switch(iSize) - { - case 4: return getInt(off); - case 8: return (int)getLong(off); - default: throw new AssertionError(); - } - } + private int get(int off) + { + switch(iSize) + { + case 4: return getInt(off); + case 8: return (int)getLong(off); + default: throw new AssertionError(); + } + } - private void put(int off, int v) - { - switch(iSize) - { - case 4: putInt(off, v); break; - case 8: putLong(off, v); break; - default: throw new AssertionError(); - } - } -} // Data + private void put(int off, int v) + { + switch(iSize) + { + case 4: putInt(off, v); break; + case 8: putLong(off, v); break; + default: throw new AssertionError(); + } + } + } // Data } // Int2 diff --git a/ompi/mpi/java/java/Intercomm.java b/ompi/mpi/java/java/Intercomm.java index 8609c1a6c4a..e5a52f00023 100644 --- a/ompi/mpi/java/java/Intercomm.java +++ b/ompi/mpi/java/java/Intercomm.java @@ -5,35 +5,37 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. + * Copyright (c) 2015 Los Alamos National Security, LLC. All rights + * reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ - */ -/* + * + * * This file is almost a complete re-write for Open MPI compared to the * original mpiJava package. Its license and copyright are listed below. * See for more information. - */ -/* - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. -*/ -/* + * + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * * File : Intercomm.java * Author : Xinying Li * Created : Thu Apr 9 12:22:15 1998 @@ -50,117 +52,130 @@ */ public final class Intercomm extends Comm { -protected Intercomm(long handle) -{ - super(handle); -} - -protected Intercomm(long[] commRequest) -{ - super(commRequest); -} - -/** - * Duplicates this communicator. - *

Java binding of {@code MPI_COMM_DUP}. - *

It is recommended to use {@link #dup} instead of {@link #clone} - * because the last can't throw an {@link mpi.MPIException}. - * @return copy of this communicator - */ -@Override public Intercomm clone() -{ - try - { - return dup(); - } - catch(MPIException e) - { - throw new RuntimeException(e.getMessage()); - } -} - -/** - * Duplicates this communicator. - *

Java binding of {@code MPI_COMM_DUP}. - * @return copy of this communicator - * @throws MPIException - */ -@Override public Intercomm dup() throws MPIException -{ - MPI.check(); - return new Intercomm(dup(handle)); -} - -/** - * Duplicates this communicator. - *

Java binding of {@code MPI_COMM_IDUP}. - *

The new communicator can't be used before the operation completes. - * The request object must be obtained calling {@link #getRequest}. - * @return copy of this communicator - * @throws MPIException - */ -@Override public Intercomm iDup() throws MPIException -{ - MPI.check(); - return new Intercomm(iDup(handle)); -} - -// Inter-Communication - -/** - * Size of remote group. - *

Java binding of the MPI operation {@code MPI_COMM_REMOTE_SIZE}. - * @return number of process in remote group of this communicator - * @throws MPIException - */ -public int getRemoteSize() throws MPIException -{ - MPI.check(); - return getRemoteSize_jni(); -} - -private native int getRemoteSize_jni() throws MPIException; - -/** - * Return the remote group. - *

Java binding of the MPI operation {@code MPI_COMM_REMOTE_GROUP}. - * @return remote group of this communicator - * @throws MPIException - */ -public Group getRemoteGroup() throws MPIException -{ - MPI.check(); - return new Group(getRemoteGroup_jni()); -} - -private native long getRemoteGroup_jni(); - -/** - * Creates an intracommuncator from an intercommunicator - *

Java binding of the MPI operation {@code MPI_INTERCOMM_MERGE}. - * @param high true if the local group has higher ranks in combined group - * @return new intra-communicator - * @throws MPIException - */ -public Intracomm merge(boolean high) throws MPIException -{ - MPI.check(); - return new Intracomm(merge_jni(high)); -} - -private native long merge_jni(boolean high); - -/** - * Java binding of {@code MPI_COMM_GET_PARENT}. - * @return the parent communicator - * @throws MPIException - */ -public static Intercomm getParent() throws MPIException -{ - MPI.check(); - return new Intercomm(getParent_jni()); -} - -private native static long getParent_jni() throws MPIException; + protected Intercomm(long handle) + { + super(handle); + } + + protected Intercomm(long[] commRequest) + { + super(commRequest); + } + + /** + * Duplicates this communicator. + *

Java binding of {@code MPI_COMM_DUP}. + *

It is recommended to use {@link #dup} instead of {@link #clone} + * because the last can't throw an {@link mpi.MPIException}. + * @return copy of this communicator + */ + @Override public Intercomm clone() + { + try + { + return dup(); + } + catch(MPIException e) + { + throw new RuntimeException(e.getMessage()); + } + } + + /** + * Duplicates this communicator. + *

Java binding of {@code MPI_COMM_DUP}. + * @return copy of this communicator + * @throws MPIException Signals that an MPI exception of some sort has occurred. + */ + @Override public Intercomm dup() throws MPIException + { + MPI.check(); + return new Intercomm(dup(handle)); + } + + /** + * Duplicates this communicator. + *

Java binding of {@code MPI_COMM_IDUP}. + *

The new communicator can't be used before the operation completes. + * The request object must be obtained calling {@link #getRequest}. + * @return copy of this communicator + * @throws MPIException Signals that an MPI exception of some sort has occurred. + */ + @Override public Intercomm iDup() throws MPIException + { + MPI.check(); + return new Intercomm(iDup(handle)); + } + + /** + * Duplicates this communicator with the info object used in the call. + *

Java binding of {@code MPI_COMM_DUP_WITH_INFO}. + * @param info info object to associate with the new communicator + * @return copy of this communicator + * @throws MPIException Signals that an MPI exception of some sort has occurred. + */ + @Override public Intercomm dupWithInfo(Info info) throws MPIException + { + MPI.check(); + return new Intercomm(dupWithInfo(handle, info.handle)); + } + + // Inter-Communication + + /** + * Size of remote group. + *

Java binding of the MPI operation {@code MPI_COMM_REMOTE_SIZE}. + * @return number of process in remote group of this communicator + * @throws MPIException Signals that an MPI exception of some sort has occurred. + */ + public int getRemoteSize() throws MPIException + { + MPI.check(); + return getRemoteSize_jni(); + } + + private native int getRemoteSize_jni() throws MPIException; + + /** + * Return the remote group. + *

Java binding of the MPI operation {@code MPI_COMM_REMOTE_GROUP}. + * @return remote group of this communicator + * @throws MPIException Signals that an MPI exception of some sort has occurred. + */ + public Group getRemoteGroup() throws MPIException + { + MPI.check(); + return new Group(getRemoteGroup_jni()); + } + + private native long getRemoteGroup_jni(); + + /** + * Creates an intracommuncator from an intercommunicator + *

Java binding of the MPI operation {@code MPI_INTERCOMM_MERGE}. + * @param high true if the local group has higher ranks in combined group + * @return new intra-communicator + * @throws MPIException Signals that an MPI exception of some sort has occurred. + */ + public Intracomm merge(boolean high) throws MPIException + { + MPI.check(); + return new Intracomm(merge_jni(high)); + } + + private native long merge_jni(boolean high); + + /** + * Java binding of {@code MPI_COMM_GET_PARENT}. + * @return the parent communicator + * @throws MPIException Signals that an MPI exception of some sort has occurred. + */ + public static Intercomm getParent() throws MPIException + { + MPI.check(); + return new Intercomm(getParent_jni()); + } + + private native static long getParent_jni() throws MPIException; } // Intercomm diff --git a/ompi/mpi/java/java/Intracomm.java b/ompi/mpi/java/java/Intracomm.java index fb0b042730b..bd6c9542d08 100644 --- a/ompi/mpi/java/java/Intracomm.java +++ b/ompi/mpi/java/java/Intracomm.java @@ -5,35 +5,37 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. + * Copyright (c) 2015 Los Alamos National Security, LLC. All rights + * reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ - */ -/* + * + * * This file is almost a complete re-write for Open MPI compared to the * original mpiJava package. Its license and copyright are listed below. * See for more information. - */ -/* - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. -*/ -/* + * + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * * File : Intracommm.java * Author : Sang Lim, Xinying Li, Bryan Carpenter * Created : Thu Apr 9 12:22:15 1998 @@ -41,16 +43,16 @@ * Updated : $Date: 2002/12/16 15:25:13 $ * Copyright: Northeast Parallel Architectures Center * at Syracuse University 1998 - */ - -/* + * + * + * * IMPLEMENTATION DETAILS - * + * * All methods with buffers that can be direct or non direct have * a companion argument 'db' which is true if the buffer is direct. * For example, if the buffer argument is recvBuf, the companion * argument will be 'rdb', meaning if the receive buffer is direct. - * + * * Checking if a buffer is direct is faster in Java than C. */ package mpi; @@ -63,767 +65,821 @@ */ public class Intracomm extends Comm { -protected Intracomm() -{ -} - -protected Intracomm(long handle) -{ - super(handle); -} - -protected Intracomm(long[] commRequest) -{ - super(commRequest); -} - -/** - * Duplicates this communicator. - *

Java binding of {@code MPI_COMM_DUP}. - *

It is recommended to use {@link #dup} instead of {@link #clone} - * because the last can't throw an {@link mpi.MPIException}. - * @return copy of this communicator - */ -@Override public Intracomm clone() -{ - try - { - return dup(); - } - catch(MPIException e) - { - throw new RuntimeException(e.getMessage()); - } -} - -/** - * Duplicates this communicator. - *

Java binding of {@code MPI_COMM_DUP}. - * @return copy of this communicator - * @throws MPIException - */ -@Override public Intracomm dup() throws MPIException -{ - MPI.check(); - return new Intracomm(dup(handle)); -} - -/** - * Duplicates this communicator. - *

Java binding of {@code MPI_COMM_IDUP}. - *

The new communicator can't be used before the operation completes. - * The request object must be obtained calling {@link #getRequest}. - * @return copy of this communicator - * @throws MPIException - */ -@Override public Intracomm iDup() throws MPIException -{ - MPI.check(); - return new Intracomm(iDup(handle)); -} - -/** - * Partition the group associated with this communicator and create - * a new communicator within each subgroup. - *

Java binding of the MPI operation {@code MPI_COMM_SPLIT}. - * @param colour control of subset assignment - * @param key control of rank assignment - * @return new communicator - * @throws MPIException - */ -public final Intracomm split(int colour, int key) throws MPIException -{ - MPI.check(); - return new Intracomm(split(handle, colour, key)); -} - -private native long split(long comm, int colour, int key) throws MPIException; - -/** - * Create a new communicator. - *

Java binding of the MPI operation {@code MPI_COMM_CREATE}. - * @param group group which is a subset of the group of this communicator - * @return new communicator - * @throws MPIException - */ -public final Intracomm create(Group group) throws MPIException -{ - MPI.check(); - return new Intracomm(create(handle, group.handle)); -} - -private native long create(long comm, long group); - -// Topology Constructors - -/** - * Creates a communicator to which the Cartesian topology - * information is attached. - * Create a cartesian topology communicator whose group is a subset - * of the group of this communicator. - *

Java binding of the MPI operation {@code MPI_CART_CREATE}. - *

The number of dimensions of the Cartesian grid is taken to be the - * size of the {@code dims} argument. The array {@code periods} must - * be the same size. - * @param dims the number of processes in each dimension - * @param periods {@code true} if grid is periodic, - * {@code false} if not, in each dimension - * @param reorder {@code true} if ranking may be reordered, - * {@code false} if not - * @return new cartesian topology communicator - * @throws MPIException - */ -public final CartComm createCart(int[] dims, boolean[] periods, boolean reorder) - throws MPIException -{ - MPI.check(); - return new CartComm(createCart(handle, dims, periods, reorder)); -} - -private native long createCart( - long comm, int[] dims, boolean[] periods, boolean reorder) - throws MPIException; - -/** - * Creates a communicator to which the graph topology information is attached. - *

Java binding of the MPI operation {@code MPI_GRAPH_CREATE}. - *

The number of nodes in the graph, nnodes, is taken - * to be size of the {@code index} argument. - * @param index node degrees - * @param edges graph edges - * @param reorder {@code true} if ranking may be reordered, - * {@code false} if not - * @return new graph topology communicator - * @throws MPIException - */ -public final GraphComm createGraph(int[] index, int[] edges, boolean reorder) - throws MPIException -{ - MPI.check(); - return new GraphComm(createGraph(handle, index, edges, reorder)); -} - -private native long createGraph( - long comm, int[] index, int[] edges, boolean reorder) - throws MPIException; - -/** - * Creates a communicator to which the distributed graph topology - * information is attached. - *

Java binding of the MPI operation {@code MPI_DIST_GRAPH_CREATE}. - *

The number of source nodes is the size of the {@code sources} argument. - * @param sources source nodes for which this process specifies edges - * @param degrees number of destinations for each source node - * @param destinations destination nodes for the source nodes - * @param weights weights for source to destination edges - * @param info hints on optimization and interpretation of weights - * @param reorder the process may be reordered (true) or not (false) - * @return communicator with distributed graph topology - * @throws MPIException - */ -public final GraphComm createDistGraph( - int[] sources, int[] degrees, int[] destinations, - int[] weights, Info info, boolean reorder) - throws MPIException -{ - MPI.check(); - - return new GraphComm(createDistGraph( - handle, sources, degrees, destinations, - weights, info.handle, reorder, true)); -} - -/** - * Creates a communicator to which the distributed graph topology - * information is attached. - *

Java binding of the MPI operation {@code MPI_DIST_GRAPH_CREATE} - * using {@code MPI_UNWEIGHTED}. - *

The number of source nodes is the size of the {@code sources} argument. - * @param sources source nodes for which this process specifies edges - * @param degrees number of destinations for each source node - * @param destinations destination nodes for the source nodes - * @param info hints on optimization and interpretation of weights - * @param reorder the process may be reordered (true) or not (false) - * @return communicator with distributed graph topology - * @throws MPIException - */ -public final GraphComm createDistGraph( - int[] sources, int[] degrees, int[] destinations, - Info info, boolean reorder) - throws MPIException -{ - MPI.check(); - - return new GraphComm(createDistGraph( - handle, sources, degrees, destinations, - null, info.handle, reorder, false)); -} - -private native long createDistGraph( - long comm, int[] sources, int[] degrees, int[] destinations, - int[] weights, long info, boolean reorder, boolean weighted) - throws MPIException; - - -/** - * Creates a communicator to which the distributed graph topology - * information is attached. - *

Java binding of the MPI operation {@code MPI_DIST_GRAPH_CREATE_ADJACENT}. - *

The number of source/destination nodes is the size of the - * {@code sources}/{@code destinations} argument. - * @param sources ranks of processes for which the calling process - * is a destination - * @param sourceWeights weights of the edges into the calling process - * @param destinations ranks of processes for which the calling process - * is a source - * @param destWeights weights of the edges out of the calling process - * @param info hints on optimization and interpretation of weights - * @param reorder the process may be reordered (true) or not (false) - * @return communicator with distributed graph topology - * @throws MPIException - */ -public final GraphComm createDistGraphAdjacent( - int[] sources, int[] sourceWeights, - int[] destinations, int[] destWeights, Info info, boolean reorder) - throws MPIException -{ - MPI.check(); - - return new GraphComm(createDistGraphAdjacent( - handle, sources, sourceWeights, destinations, - destWeights, info.handle, reorder, true)); -} - -/** - * Creates a communicator to which the distributed graph topology - * information is attached. - *

Java binding of the MPI operation {@code MPI_DIST_GRAPH_CREATE_ADJACENT} - * using {@code MPI_UNWEIGHTED}. - *

The number of source/destination nodes is the size of the - * {@code sources}/{@code destinations} argument. - * @param sources ranks of processes for which the calling process - * is a destination - * @param destinations ranks of processes for which the calling process - * is a source - * @param info hints on optimization and interpretation of weights - * @param reorder the process may be reordered (true) or not (false) - * @return communicator with distributed graph topology - * @throws MPIException - */ -public final GraphComm createDistGraphAdjacent( - int[] sources, int[] destinations, Info info, boolean reorder) - throws MPIException -{ - MPI.check(); - - return new GraphComm(createDistGraphAdjacent( - handle, sources, null, destinations, null, - info.handle, reorder, false)); -} - -private native long createDistGraphAdjacent( - long comm, int[] sources, int []sourceweights, int[] destinations, - int[] distweights, long info, boolean reorder, boolean weighted) - throws MPIException; - - -/** - * Perform a prefix reduction on data distributed across the group. - *

Java binding of the MPI operation {@code MPI_SCAN}. - * @param sendbuf send buffer array - * @param recvbuf receive buffer array - * @param count number of items in input buffer - * @param type data type of each item in input buffer - * @param op reduce operation - * @throws MPIException - */ -public final void scan(Object sendbuf, Object recvbuf, - int count, Datatype type, Op op) - throws MPIException -{ - MPI.check(); - - int sendoff = 0, - recvoff = 0; - - boolean sdb = false, - rdb = false; - - if(sendbuf instanceof Buffer && !(sdb = ((Buffer)sendbuf).isDirect())) - { - sendoff = type.getOffset(sendbuf); - sendbuf = ((Buffer)sendbuf).array(); - } - - if(recvbuf instanceof Buffer && !(rdb = ((Buffer)recvbuf).isDirect())) - { - recvoff = type.getOffset(recvbuf); - recvbuf = ((Buffer)recvbuf).array(); - } - - op.setDatatype(type); - - scan(handle, sendbuf, sdb, sendoff, recvbuf, rdb, recvoff, - count, type.handle, type.baseType, op, op.handle); -} - -/** - * Perform a prefix reduction on data distributed across the group. - *

Java binding of the MPI operation {@code MPI_SCAN} - * using {@code MPI_IN_PLACE} instead of the send buffer. - * @param recvbuf receive buffer array - * @param count number of items in input buffer - * @param type data type of each item in input buffer - * @param op reduce operation - * @throws MPIException - */ -public final void scan(Object recvbuf, int count, Datatype type, Op op) - throws MPIException -{ - MPI.check(); - int recvoff = 0; - boolean rdb = false; - - if(recvbuf instanceof Buffer && !(rdb = ((Buffer)recvbuf).isDirect())) - { - recvoff = type.getOffset(recvbuf); - recvbuf = ((Buffer)recvbuf).array(); - } - - op.setDatatype(type); - - scan(handle, null, false, 0, recvbuf, rdb, recvoff, - count, type.handle, type.baseType, op, op.handle); -} - -private native void scan( - long comm, Object sendbuf, boolean sdb, int sendoff, - Object recvbuf, boolean rdb, int recvoff, int count, - long type, int baseType, Op jOp, long hOp) throws MPIException; - -/** - * Perform a prefix reduction on data distributed across the group. - *

Java binding of the MPI operation {@code MPI_ISCAN}. - * @param sendbuf send buffer array - * @param recvbuf receive buffer array - * @param count number of items in input buffer - * @param type data type of each item in input buffer - * @param op reduce operation - * @return communication request - * @throws MPIException - */ -public final Request iScan(Buffer sendbuf, Buffer recvbuf, - int count, Datatype type, Op op) - throws MPIException -{ - MPI.check(); - op.setDatatype(type); - assertDirectBuffer(sendbuf, recvbuf); - - return new Request(iScan(handle, sendbuf, recvbuf, count, - type.handle, type.baseType, op, op.handle)); -} - -/** - * Perform a prefix reduction on data distributed across the group. - *

Java binding of the MPI operation {@code MPI_ISCAN} - * using {@code MPI_IN_PLACE} instead of the send buffer. - * @param buf send/receive buffer array - * @param count number of items in buffer - * @param type data type of each item in buffer - * @param op reduce operation - * @return communication request - * @throws MPIException - */ -public final Request iScan(Buffer buf, int count, Datatype type, Op op) - throws MPIException -{ - MPI.check(); - op.setDatatype(type); - assertDirectBuffer(buf); - - return new Request(iScan( - handle, null, buf, count, - type.handle, type.baseType, op, op.handle)); -} - -private native long iScan( - long comm, Buffer sendbuf, Buffer recvbuf, int count, - long type, int baseType, Op jOp, long hOp) throws MPIException; - -/** - * Perform a prefix reduction on data distributed across the group. - *

Java binding of the MPI operation {@code MPI_EXSCAN}. - * @param sendbuf send buffer array - * @param recvbuf receive buffer array - * @param count number of items in input buffer - * @param type data type of each item in input buffer - * @param op reduce operation - * @throws MPIException - */ -public final void exScan(Object sendbuf, Object recvbuf, - int count, Datatype type, Op op) - throws MPIException -{ - MPI.check(); - - int sendoff = 0, - recvoff = 0; - - boolean sdb = false, - rdb = false; - - if(sendbuf instanceof Buffer && !(sdb = ((Buffer)sendbuf).isDirect())) - { - sendoff = type.getOffset(sendbuf); - sendbuf = ((Buffer)sendbuf).array(); - } - - if(recvbuf instanceof Buffer && !(rdb = ((Buffer)recvbuf).isDirect())) - { - recvoff = type.getOffset(recvbuf); - recvbuf = ((Buffer)recvbuf).array(); - } - - op.setDatatype(type); - - exScan(handle, sendbuf, sdb, sendoff, recvbuf, rdb, recvoff, - count, type.handle, type.baseType, op, op.handle); -} - -/** - * Perform a prefix reduction on data distributed across the group. - *

Java binding of the MPI operation {@code MPI_EXSCAN} - * using {@code MPI_IN_PLACE} instead of the send buffer. - * @param buf receive buffer array - * @param count number of items in input buffer - * @param type data type of each item in input buffer - * @param op reduce operation - * @throws MPIException - */ -public final void exScan(Object buf, int count, Datatype type, Op op) - throws MPIException -{ - MPI.check(); - int off = 0; - boolean db = false; - - if(buf instanceof Buffer && !(db = ((Buffer)buf).isDirect())) - { - off = type.getOffset(buf); - buf = ((Buffer)buf).array(); - } - - op.setDatatype(type); - - exScan(handle, null, false, 0, buf, db, off, count, - type.handle, type.baseType, op, op.handle); -} - -private native void exScan( - long comm, Object sendbuf, boolean sdb, int sendoff, - Object recvbuf, boolean rdb, int recvoff, int count, - long type, int baseType, Op jOp, long hOp) throws MPIException; - -/** - * Perform a prefix reduction on data distributed across the group. - *

Java binding of the MPI operation {@code MPI_IEXSCAN}. - * @param sendbuf send buffer array - * @param recvbuf receive buffer array - * @param count number of items in input buffer - * @param type data type of each item in input buffer - * @param op reduce operation - * @return communication request - * @throws MPIException - */ -public final Request iExScan(Buffer sendbuf, Buffer recvbuf, - int count, Datatype type, Op op) - throws MPIException -{ - MPI.check(); - op.setDatatype(type); - assertDirectBuffer(sendbuf, recvbuf); - - return new Request(iExScan(handle, sendbuf, recvbuf, count, - type.handle, type.baseType, op, op.handle)); -} - -/** - * Perform a prefix reduction on data distributed across the group. - *

Java binding of the MPI operation {@code MPI_IEXSCAN} - * using {@code MPI_IN_PLACE} instead of the send buffer. - * @param buf receive buffer array - * @param count number of items in input buffer - * @param type data type of each item in input buffer - * @param op reduce operation - * @return communication request - * @throws MPIException - */ -public final Request iExScan(Buffer buf, int count, Datatype type, Op op) - throws MPIException -{ - MPI.check(); - op.setDatatype(type); - assertDirectBuffer(buf); - - return new Request(iExScan( - handle, null, buf, count, - type.handle, type.baseType, op, op.handle)); -} - -private native long iExScan( - long comm, Buffer sendbuf, Buffer recvbuf, int count, - long type, int baseType, Op jOp, long hOp) throws MPIException; - -/** - * Java binding of {@code MPI_OPEN_PORT} using {@code MPI_INFO_NULL}. - * @return port name - * @throws MPIException - */ -public static String openPort() throws MPIException -{ - MPI.check(); - return openPort(Info.NULL); -} - -/** - * Java binding of {@code MPI_OPEN_PORT}. - * @param info implementation-specific information - * @return port name - * @throws MPIException - */ -public static String openPort(Info info) throws MPIException -{ - MPI.check(); - return openPort(info.handle); -} - -private native static String openPort(long info) throws MPIException; - -/** - * Java binding of {@code MPI_CLOSE_PORT}. - * @param name port name - * @throws MPIException - */ -public static void closePort(String name) throws MPIException -{ - MPI.check(); - closePort_jni(name); -} - -private native static void closePort_jni(String name) throws MPIException; - -/** - * Java binding of {@code MPI_COMM_ACCEPT} using {@code MPI_INFO_NULL}. - * @param port port name - * @param root rank in comm of root node - * @return intercommunicator with client as remote group - */ -public final Intercomm accept(String port, int root) throws MPIException -{ - MPI.check(); - return new Intercomm(accept(handle, port, Info.NULL, root)); -} - -/** - * Java binding of {@code MPI_COMM_ACCEPT}. - * @param port port name - * @param info implementation-specific information - * @param root rank in comm of root node - * @return intercommunicator with client as remote group - * @throws MPIException - */ -public final Intercomm accept(String port, Info info, int root) - throws MPIException -{ - MPI.check(); - return new Intercomm(accept(handle, port, info.handle, root)); -} - -private native long accept(long comm, String port, long info, int root) - throws MPIException; - -/** - * Java binding of {@code MPI_COMM_CONNECT} using {@code MPI_INFO_NULL}. - * @param port port name - * @param root rank in comm of root node - * @return intercommunicator with server as remote group - * @throws MPIException - */ -public final Intercomm connect(String port, int root) throws MPIException -{ - MPI.check(); - return new Intercomm(connect(handle, port, Info.NULL, root)); -} - -/** - * Java binding of {@code MPI_COMM_CONNECT}. - * @param port port name - * @param info implementation-specific information - * @param root rank in comm of root node - * @return intercommunicator with server as remote group - * @throws MPIException - */ -public final Intercomm connect(String port, Info info, int root) - throws MPIException -{ - MPI.check(); - return new Intercomm(connect(handle, port, info.handle, root)); -} - -private native long connect(long comm, String port, long info, int root) - throws MPIException; - -/** - * Java binding of {@code MPI_PUBLISH_NAME} using {@code MPI_INFO_NULL}. - * @param service service name - * @param port port name - * @throws MPIException - */ -public static void publishName(String service, String port) - throws MPIException -{ - MPI.check(); - publishName(service, Info.NULL, port); -} - -/** - * Java binding of {@code MPI_PUBLISH_NAME}. - * @param service service name - * @param info implementation-specific information - * @param port port name - * @throws MPIException - */ -public static void publishName(String service, Info info, String port) - throws MPIException -{ - MPI.check(); - publishName(service, info.handle, port); -} - -private native static void publishName(String service, long info, String port) - throws MPIException; - -/** - * Java binding of {@code MPI_UNPUBLISH_NAME} using {@code MPI_INFO_NULL}. - * @param service service name - * @param port port name - * @throws MPIException - */ -public static void unpublishName(String service, String port) - throws MPIException -{ - MPI.check(); - unpublishName(service, Info.NULL, port); -} - -/** - * Java binding of {@code MPI_UNPUBLISH_NAME}. - * @param service service name - * @param info implementation-specific information - * @param port port name - * @throws MPIException - */ -public static void unpublishName(String service, Info info, String port) - throws MPIException -{ - MPI.check(); - unpublishName(service, info.handle, port); -} - -private native static void unpublishName(String service, long info, String port) - throws MPIException; - -/** - * Java binding of {@code MPI_LOOKUP_NAME} using {@code MPI_INFO_NULL}. - * @param service service name - * @return port name - * @throws MPIException - */ -public static String lookupName(String service) throws MPIException -{ - MPI.check(); - return lookupName(service, Info.NULL); -} - -/** - * Java binding of {@code MPI_LOOKUP_NAME}. - * @param service service name - * @param info mplementation-specific information - * @return port name - * @throws MPIException - */ -public static String lookupName(String service, Info info) throws MPIException -{ - MPI.check(); - return lookupName(service, info.handle); -} - -private native static String lookupName(String service, long info) - throws MPIException; - -/** - * Java binding of {@code MPI_COMM_SPAWN}. - * This intracommunicator will contain the group of spawned processes. - * @param command name of program to be spawned - * @param argv arguments to command; if this parameter is null, - * {@code MPI_ARGV_NULL} will be used. - * @param maxprocs maximum number of processes to start - * @param info info object telling the runtime where - * and how to start the processes - * @param root rank of process in which previous arguments are examined - * @param errcodes one code per process; if this parameter is null, - * {@code MPI_ERRCODES_IGNORE} will be used. - * @return intercommunicator between original group and the newly spawned group - * @throws MPIException - */ -public final Intercomm spawn(String command, String[] argv, int maxprocs, - Info info, int root, int[] errcodes) - throws MPIException -{ - MPI.check(); - - return new Intercomm(spawn(handle, command, argv, maxprocs, - info.handle, root, errcodes)); -} - -private native long spawn(long comm, String command, String[] argv, - int maxprocs, long info, int root, int[] errcodes) - throws MPIException; - -/** - * Java binding of {@code MPI_COMM_SPAWN_MULTIPLE}. - * This intracommunicator will contain the group of spawned processes. - * @param commands programs to be executed - * @param argv arguments for commands; if this parameter is null, - * {@code MPI_ARGVS_NULL} will be used. - * @param maxprocs maximum number of processes to start for each command - * @param info info objects telling the runtime where - * and how to start the processes - * @param root rank of process in which previous arguments are examined - * @param errcodes one code per process; if this parameter is null, - * {@code MPI_ERRCODES_IGNORE} will be used. - * @return intercommunicator between original group and the newly spawned group - * @throws MPIException - */ -public final Intercomm spawnMultiple( - String[] commands, String[][] argv, int[] maxprocs, - Info[] info, int root, int[] errcodes) - throws MPIException -{ - MPI.check(); - - long hInfo[] = new long[info.length]; - - for(int i = 0; i < info.length; i++) - hInfo[i] = info[i].handle; - - return new Intercomm(spawnMultiple(handle, commands, argv, maxprocs, - hInfo, root, errcodes)); -} - -private native long spawnMultiple( - long comm, String[] commands, String[][] argv, int[] maxprocs, - long[] info, int root, int[] errcodes) throws MPIException; + protected Intracomm() + { + } + + protected Intracomm(long handle) + { + super(handle); + } + + protected Intracomm(long[] commRequest) + { + super(commRequest); + } + + /** + * Duplicates this communicator. + *

Java binding of {@code MPI_COMM_DUP}. + *

It is recommended to use {@link #dup} instead of {@link #clone} + * because the last can't throw an {@link mpi.MPIException}. + * @return copy of this communicator + */ + @Override public Intracomm clone() + { + try + { + return dup(); + } + catch(MPIException e) + { + throw new RuntimeException(e.getMessage()); + } + } + + /** + * Duplicates this communicator. + *

Java binding of {@code MPI_COMM_DUP}. + * @return copy of this communicator + * @throws MPIException Signals that an MPI exception of some sort has occurred. + */ + @Override public Intracomm dup() throws MPIException + { + MPI.check(); + return new Intracomm(dup(handle)); + } + + /** + * Duplicates this communicator. + *

Java binding of {@code MPI_COMM_IDUP}. + *

The new communicator can't be used before the operation completes. + * The request object must be obtained calling {@link #getRequest}. + * @return copy of this communicator + * @throws MPIException Signals that an MPI exception of some sort has occurred. + */ + @Override public Intracomm iDup() throws MPIException + { + MPI.check(); + return new Intracomm(iDup(handle)); + } + + /** + * Duplicates this communicator with the info object used in the call. + *

Java binding of {@code MPI_COMM_DUP_WITH_INFO}. + * @param info info object to associate with the new communicator + * @return copy of this communicator + * @throws MPIException Signals that an MPI exception of some sort has occurred. + */ + @Override public Intracomm dupWithInfo(Info info) throws MPIException + { + MPI.check(); + return new Intracomm(dupWithInfo(handle, info.handle)); + } + + /** + * Partition the group associated with this communicator and create + * a new communicator within each subgroup. + *

Java binding of the MPI operation {@code MPI_COMM_SPLIT}. + * @param colour control of subset assignment + * @param key control of rank assignment + * @return new communicator + * @throws MPIException Signals that an MPI exception of some sort has occurred. + */ + public final Intracomm split(int colour, int key) throws MPIException + { + MPI.check(); + return new Intracomm(split(handle, colour, key)); + } + + private native long split(long comm, int colour, int key) throws MPIException; + + /** + * Partition the group associated with this communicator and create + * a new communicator within each subgroup. + *

Java binding of the MPI operation {@code MPI_COMM_SPLIT_TYPE}. + * @param splitType type of processes to be grouped together + * @param key control of rank assignment + * @param info info argument + * @return new communicator + * @throws MPIException Signals that an MPI exception of some sort has occurred. + */ + public final Intracomm splitType(int splitType, int key, Info info) throws MPIException + { + MPI.check(); + return new Intracomm(splitType(handle, splitType, key, info.handle)); + } + + private native long splitType(long comm, int colour, int key, long info) throws MPIException; + + /** + * Create a new communicator. + *

Java binding of the MPI operation {@code MPI_COMM_CREATE}. + * @param group group which is a subset of the group of this communicator + * @return new communicator + * @throws MPIException Signals that an MPI exception of some sort has occurred. + */ + public final Intracomm create(Group group) throws MPIException + { + MPI.check(); + return new Intracomm(create(handle, group.handle)); + } + + private native long create(long comm, long group); + + /** + * Create a new intracommunicator for the given group. + *

Java binding of the MPI operation {@code MPI_COMM_CREATE_GROUP}. + * @param group group which is a subset of the group of this communicator + * @param tag an integer tag + * @return new communicator + * @throws MPIException Signals that an MPI exception of some sort has occurred. + */ + public final Intracomm createGroup(Group group, int tag) throws MPIException + { + MPI.check(); + return new Intracomm(createGroup(handle, group.handle, tag)); + } + + private native long createGroup(long comm, long group, int tag); + + // Topology Constructors + + /** + * Creates a communicator to which the Cartesian topology + * information is attached. + * Create a cartesian topology communicator whose group is a subset + * of the group of this communicator. + *

Java binding of the MPI operation {@code MPI_CART_CREATE}. + *

The number of dimensions of the Cartesian grid is taken to be the + * size of the {@code dims} argument. The array {@code periods} must + * be the same size. + * @param dims the number of processes in each dimension + * @param periods {@code true} if grid is periodic, + * {@code false} if not, in each dimension + * @param reorder {@code true} if ranking may be reordered, + * {@code false} if not + * @return new cartesian topology communicator + * @throws MPIException Signals that an MPI exception of some sort has occurred. + */ + public final CartComm createCart(int[] dims, boolean[] periods, boolean reorder) + throws MPIException + { + MPI.check(); + return new CartComm(createCart(handle, dims, periods, reorder)); + } + + private native long createCart( + long comm, int[] dims, boolean[] periods, boolean reorder) + throws MPIException; + + /** + * Creates a communicator to which the graph topology information is attached. + *

Java binding of the MPI operation {@code MPI_GRAPH_CREATE}. + *

The number of nodes in the graph, nnodes, is taken + * to be size of the {@code index} argument. + * @param index node degrees + * @param edges graph edges + * @param reorder {@code true} if ranking may be reordered, + * {@code false} if not + * @return new graph topology communicator + * @throws MPIException Signals that an MPI exception of some sort has occurred. + */ + public final GraphComm createGraph(int[] index, int[] edges, boolean reorder) + throws MPIException + { + MPI.check(); + return new GraphComm(createGraph(handle, index, edges, reorder)); + } + + private native long createGraph( + long comm, int[] index, int[] edges, boolean reorder) + throws MPIException; + + /** + * Creates a communicator to which the distributed graph topology + * information is attached. + *

Java binding of the MPI operation {@code MPI_DIST_GRAPH_CREATE}. + *

The number of source nodes is the size of the {@code sources} argument. + * @param sources source nodes for which this process specifies edges + * @param degrees number of destinations for each source node + * @param destinations destination nodes for the source nodes + * @param weights weights for source to destination edges + * @param info hints on optimization and interpretation of weights + * @param reorder the process may be reordered (true) or not (false) + * @return communicator with distributed graph topology + * @throws MPIException Signals that an MPI exception of some sort has occurred. + */ + public final GraphComm createDistGraph( + int[] sources, int[] degrees, int[] destinations, + int[] weights, Info info, boolean reorder) + throws MPIException + { + MPI.check(); + + return new GraphComm(createDistGraph( + handle, sources, degrees, destinations, + weights, info.handle, reorder, true)); + } + + /** + * Creates a communicator to which the distributed graph topology + * information is attached. + *

Java binding of the MPI operation {@code MPI_DIST_GRAPH_CREATE} + * using {@code MPI_UNWEIGHTED}. + *

The number of source nodes is the size of the {@code sources} argument. + * @param sources source nodes for which this process specifies edges + * @param degrees number of destinations for each source node + * @param destinations destination nodes for the source nodes + * @param info hints on optimization and interpretation of weights + * @param reorder the process may be reordered (true) or not (false) + * @return communicator with distributed graph topology + * @throws MPIException Signals that an MPI exception of some sort has occurred. + */ + public final GraphComm createDistGraph( + int[] sources, int[] degrees, int[] destinations, + Info info, boolean reorder) + throws MPIException + { + MPI.check(); + + return new GraphComm(createDistGraph( + handle, sources, degrees, destinations, + null, info.handle, reorder, false)); + } + + private native long createDistGraph( + long comm, int[] sources, int[] degrees, int[] destinations, + int[] weights, long info, boolean reorder, boolean weighted) + throws MPIException; + + + /** + * Creates a communicator to which the distributed graph topology + * information is attached. + *

Java binding of the MPI operation {@code MPI_DIST_GRAPH_CREATE_ADJACENT}. + *

The number of source/destination nodes is the size of the + * {@code sources}/{@code destinations} argument. + * @param sources ranks of processes for which the calling process + * is a destination + * @param sourceWeights weights of the edges into the calling process + * @param destinations ranks of processes for which the calling process + * is a source + * @param destWeights weights of the edges out of the calling process + * @param info hints on optimization and interpretation of weights + * @param reorder the process may be reordered (true) or not (false) + * @return communicator with distributed graph topology + * @throws MPIException Signals that an MPI exception of some sort has occurred. + */ + public final GraphComm createDistGraphAdjacent( + int[] sources, int[] sourceWeights, + int[] destinations, int[] destWeights, Info info, boolean reorder) + throws MPIException + { + MPI.check(); + + return new GraphComm(createDistGraphAdjacent( + handle, sources, sourceWeights, destinations, + destWeights, info.handle, reorder, true)); + } + + /** + * Creates a communicator to which the distributed graph topology + * information is attached. + *

Java binding of the MPI operation {@code MPI_DIST_GRAPH_CREATE_ADJACENT} + * using {@code MPI_UNWEIGHTED}. + *

The number of source/destination nodes is the size of the + * {@code sources}/{@code destinations} argument. + * @param sources ranks of processes for which the calling process + * is a destination + * @param destinations ranks of processes for which the calling process + * is a source + * @param info hints on optimization and interpretation of weights + * @param reorder the process may be reordered (true) or not (false) + * @return communicator with distributed graph topology + * @throws MPIException Signals that an MPI exception of some sort has occurred. + */ + public final GraphComm createDistGraphAdjacent( + int[] sources, int[] destinations, Info info, boolean reorder) + throws MPIException + { + MPI.check(); + + return new GraphComm(createDistGraphAdjacent( + handle, sources, null, destinations, null, + info.handle, reorder, false)); + } + + private native long createDistGraphAdjacent( + long comm, int[] sources, int []sourceweights, int[] destinations, + int[] distweights, long info, boolean reorder, boolean weighted) + throws MPIException; + + + /** + * Perform a prefix reduction on data distributed across the group. + *

Java binding of the MPI operation {@code MPI_SCAN}. + * @param sendbuf send buffer array + * @param recvbuf receive buffer array + * @param count number of items in input buffer + * @param type data type of each item in input buffer + * @param op reduce operation + * @throws MPIException Signals that an MPI exception of some sort has occurred. + */ + public final void scan(Object sendbuf, Object recvbuf, + int count, Datatype type, Op op) + throws MPIException + { + MPI.check(); + + int sendoff = 0, + recvoff = 0; + + boolean sdb = false, + rdb = false; + + if(sendbuf instanceof Buffer && !(sdb = ((Buffer)sendbuf).isDirect())) + { + sendoff = type.getOffset(sendbuf); + sendbuf = ((Buffer)sendbuf).array(); + } + + if(recvbuf instanceof Buffer && !(rdb = ((Buffer)recvbuf).isDirect())) + { + recvoff = type.getOffset(recvbuf); + recvbuf = ((Buffer)recvbuf).array(); + } + + op.setDatatype(type); + + scan(handle, sendbuf, sdb, sendoff, recvbuf, rdb, recvoff, + count, type.handle, type.baseType, op, op.handle); + } + + /** + * Perform a prefix reduction on data distributed across the group. + *

Java binding of the MPI operation {@code MPI_SCAN} + * using {@code MPI_IN_PLACE} instead of the send buffer. + * @param recvbuf receive buffer array + * @param count number of items in input buffer + * @param type data type of each item in input buffer + * @param op reduce operation + * @throws MPIException Signals that an MPI exception of some sort has occurred. + */ + public final void scan(Object recvbuf, int count, Datatype type, Op op) + throws MPIException + { + MPI.check(); + int recvoff = 0; + boolean rdb = false; + + if(recvbuf instanceof Buffer && !(rdb = ((Buffer)recvbuf).isDirect())) + { + recvoff = type.getOffset(recvbuf); + recvbuf = ((Buffer)recvbuf).array(); + } + + op.setDatatype(type); + + scan(handle, null, false, 0, recvbuf, rdb, recvoff, + count, type.handle, type.baseType, op, op.handle); + } + + private native void scan( + long comm, Object sendbuf, boolean sdb, int sendoff, + Object recvbuf, boolean rdb, int recvoff, int count, + long type, int baseType, Op jOp, long hOp) throws MPIException; + + /** + * Perform a prefix reduction on data distributed across the group. + *

Java binding of the MPI operation {@code MPI_ISCAN}. + * @param sendbuf send buffer array + * @param recvbuf receive buffer array + * @param count number of items in input buffer + * @param type data type of each item in input buffer + * @param op reduce operation + * @return communication request + * @throws MPIException Signals that an MPI exception of some sort has occurred. + */ + public final Request iScan(Buffer sendbuf, Buffer recvbuf, + int count, Datatype type, Op op) + throws MPIException + { + MPI.check(); + op.setDatatype(type); + assertDirectBuffer(sendbuf, recvbuf); + Request req = new Request(iScan(handle, sendbuf, recvbuf, count, + type.handle, type.baseType, op, op.handle)); + req.addSendBufRef(sendbuf); + req.addRecvBufRef(recvbuf); + return req; + } + + /** + * Perform a prefix reduction on data distributed across the group. + *

Java binding of the MPI operation {@code MPI_ISCAN} + * using {@code MPI_IN_PLACE} instead of the send buffer. + * @param buf send/receive buffer array + * @param count number of items in buffer + * @param type data type of each item in buffer + * @param op reduce operation + * @return communication request + * @throws MPIException Signals that an MPI exception of some sort has occurred. + */ + public final Request iScan(Buffer buf, int count, Datatype type, Op op) + throws MPIException + { + MPI.check(); + op.setDatatype(type); + assertDirectBuffer(buf); + Request req = new Request(iScan( + handle, null, buf, count, + type.handle, type.baseType, op, op.handle)); + req.addSendBufRef(buf); + return req; + } + + private native long iScan( + long comm, Buffer sendbuf, Buffer recvbuf, int count, + long type, int baseType, Op jOp, long hOp) throws MPIException; + + /** + * Perform a prefix reduction on data distributed across the group. + *

Java binding of the MPI operation {@code MPI_EXSCAN}. + * @param sendbuf send buffer array + * @param recvbuf receive buffer array + * @param count number of items in input buffer + * @param type data type of each item in input buffer + * @param op reduce operation + * @throws MPIException Signals that an MPI exception of some sort has occurred. + */ + public final void exScan(Object sendbuf, Object recvbuf, + int count, Datatype type, Op op) + throws MPIException + { + MPI.check(); + + int sendoff = 0, + recvoff = 0; + + boolean sdb = false, + rdb = false; + + if(sendbuf instanceof Buffer && !(sdb = ((Buffer)sendbuf).isDirect())) + { + sendoff = type.getOffset(sendbuf); + sendbuf = ((Buffer)sendbuf).array(); + } + + if(recvbuf instanceof Buffer && !(rdb = ((Buffer)recvbuf).isDirect())) + { + recvoff = type.getOffset(recvbuf); + recvbuf = ((Buffer)recvbuf).array(); + } + + op.setDatatype(type); + + exScan(handle, sendbuf, sdb, sendoff, recvbuf, rdb, recvoff, + count, type.handle, type.baseType, op, op.handle); + } + + /** + * Perform a prefix reduction on data distributed across the group. + *

Java binding of the MPI operation {@code MPI_EXSCAN} + * using {@code MPI_IN_PLACE} instead of the send buffer. + * @param buf receive buffer array + * @param count number of items in input buffer + * @param type data type of each item in input buffer + * @param op reduce operation + * @throws MPIException Signals that an MPI exception of some sort has occurred. + */ + public final void exScan(Object buf, int count, Datatype type, Op op) + throws MPIException + { + MPI.check(); + int off = 0; + boolean db = false; + + if(buf instanceof Buffer && !(db = ((Buffer)buf).isDirect())) + { + off = type.getOffset(buf); + buf = ((Buffer)buf).array(); + } + + op.setDatatype(type); + + exScan(handle, null, false, 0, buf, db, off, count, + type.handle, type.baseType, op, op.handle); + } + + private native void exScan( + long comm, Object sendbuf, boolean sdb, int sendoff, + Object recvbuf, boolean rdb, int recvoff, int count, + long type, int baseType, Op jOp, long hOp) throws MPIException; + + /** + * Perform a prefix reduction on data distributed across the group. + *

Java binding of the MPI operation {@code MPI_IEXSCAN}. + * @param sendbuf send buffer array + * @param recvbuf receive buffer array + * @param count number of items in input buffer + * @param type data type of each item in input buffer + * @param op reduce operation + * @return communication request + * @throws MPIException Signals that an MPI exception of some sort has occurred. + */ + public final Request iExScan(Buffer sendbuf, Buffer recvbuf, + int count, Datatype type, Op op) + throws MPIException + { + MPI.check(); + op.setDatatype(type); + assertDirectBuffer(sendbuf, recvbuf); + Request req = new Request(iExScan(handle, sendbuf, recvbuf, count, + type.handle, type.baseType, op, op.handle)); + req.addSendBufRef(sendbuf); + req.addRecvBufRef(recvbuf); + return req; + } + + /** + * Perform a prefix reduction on data distributed across the group. + *

Java binding of the MPI operation {@code MPI_IEXSCAN} + * using {@code MPI_IN_PLACE} instead of the send buffer. + * @param buf receive buffer array + * @param count number of items in input buffer + * @param type data type of each item in input buffer + * @param op reduce operation + * @return communication request + * @throws MPIException Signals that an MPI exception of some sort has occurred. + */ + public final Request iExScan(Buffer buf, int count, Datatype type, Op op) + throws MPIException + { + MPI.check(); + op.setDatatype(type); + assertDirectBuffer(buf); + Request req = new Request(iExScan( + handle, null, buf, count, + type.handle, type.baseType, op, op.handle)); + req.addRecvBufRef(buf); + return req; + } + + private native long iExScan( + long comm, Buffer sendbuf, Buffer recvbuf, int count, + long type, int baseType, Op jOp, long hOp) throws MPIException; + + /** + * Java binding of {@code MPI_OPEN_PORT} using {@code MPI_INFO_NULL}. + * @return port name + * @throws MPIException Signals that an MPI exception of some sort has occurred. + */ + public static String openPort() throws MPIException + { + MPI.check(); + return openPort(Info.NULL); + } + + /** + * Java binding of {@code MPI_OPEN_PORT}. + * @param info implementation-specific information + * @return port name + * @throws MPIException Signals that an MPI exception of some sort has occurred. + */ + public static String openPort(Info info) throws MPIException + { + MPI.check(); + return openPort(info.handle); + } + + private native static String openPort(long info) throws MPIException; + + /** + * Java binding of {@code MPI_CLOSE_PORT}. + * @param name port name + * @throws MPIException Signals that an MPI exception of some sort has occurred. + */ + public static void closePort(String name) throws MPIException + { + MPI.check(); + closePort_jni(name); + } + + private native static void closePort_jni(String name) throws MPIException; + + /** + * Java binding of {@code MPI_COMM_ACCEPT} using {@code MPI_INFO_NULL}. + * @param port port name + * @param root rank in comm of root node + * @return intercommunicator with client as remote group + * @throws MPIException Signals that an MPI exception of some sort has occurred. + */ + public final Intercomm accept(String port, int root) throws MPIException + { + MPI.check(); + return new Intercomm(accept(handle, port, Info.NULL, root)); + } + + /** + * Java binding of {@code MPI_COMM_ACCEPT}. + * @param port port name + * @param info implementation-specific information + * @param root rank in comm of root node + * @return intercommunicator with client as remote group + * @throws MPIException Signals that an MPI exception of some sort has occurred. + */ + public final Intercomm accept(String port, Info info, int root) + throws MPIException + { + MPI.check(); + return new Intercomm(accept(handle, port, info.handle, root)); + } + + private native long accept(long comm, String port, long info, int root) + throws MPIException; + + /** + * Java binding of {@code MPI_COMM_CONNECT} using {@code MPI_INFO_NULL}. + * @param port port name + * @param root rank in comm of root node + * @return intercommunicator with server as remote group + * @throws MPIException Signals that an MPI exception of some sort has occurred. + */ + public final Intercomm connect(String port, int root) throws MPIException + { + MPI.check(); + return new Intercomm(connect(handle, port, Info.NULL, root)); + } + + /** + * Java binding of {@code MPI_COMM_CONNECT}. + * @param port port name + * @param info implementation-specific information + * @param root rank in comm of root node + * @return intercommunicator with server as remote group + * @throws MPIException Signals that an MPI exception of some sort has occurred. + */ + public final Intercomm connect(String port, Info info, int root) + throws MPIException + { + MPI.check(); + return new Intercomm(connect(handle, port, info.handle, root)); + } + + private native long connect(long comm, String port, long info, int root) + throws MPIException; + + /** + * Java binding of {@code MPI_PUBLISH_NAME} using {@code MPI_INFO_NULL}. + * @param service service name + * @param port port name + * @throws MPIException Signals that an MPI exception of some sort has occurred. + */ + public static void publishName(String service, String port) + throws MPIException + { + MPI.check(); + publishName(service, Info.NULL, port); + } + + /** + * Java binding of {@code MPI_PUBLISH_NAME}. + * @param service service name + * @param info implementation-specific information + * @param port port name + * @throws MPIException Signals that an MPI exception of some sort has occurred. + */ + public static void publishName(String service, Info info, String port) + throws MPIException + { + MPI.check(); + publishName(service, info.handle, port); + } + + private native static void publishName(String service, long info, String port) + throws MPIException; + + /** + * Java binding of {@code MPI_UNPUBLISH_NAME} using {@code MPI_INFO_NULL}. + * @param service service name + * @param port port name + * @throws MPIException Signals that an MPI exception of some sort has occurred. + */ + public static void unpublishName(String service, String port) + throws MPIException + { + MPI.check(); + unpublishName(service, Info.NULL, port); + } + + /** + * Java binding of {@code MPI_UNPUBLISH_NAME}. + * @param service service name + * @param info implementation-specific information + * @param port port name + * @throws MPIException Signals that an MPI exception of some sort has occurred. + */ + public static void unpublishName(String service, Info info, String port) + throws MPIException + { + MPI.check(); + unpublishName(service, info.handle, port); + } + + private native static void unpublishName(String service, long info, String port) + throws MPIException; + + /** + * Java binding of {@code MPI_LOOKUP_NAME} using {@code MPI_INFO_NULL}. + * @param service service name + * @return port name + * @throws MPIException Signals that an MPI exception of some sort has occurred. + */ + public static String lookupName(String service) throws MPIException + { + MPI.check(); + return lookupName(service, Info.NULL); + } + + /** + * Java binding of {@code MPI_LOOKUP_NAME}. + * @param service service name + * @param info mplementation-specific information + * @return port name + * @throws MPIException Signals that an MPI exception of some sort has occurred. + */ + public static String lookupName(String service, Info info) throws MPIException + { + MPI.check(); + return lookupName(service, info.handle); + } + + private native static String lookupName(String service, long info) + throws MPIException; + + /** + * Java binding of {@code MPI_COMM_SPAWN}. + * This intracommunicator will contain the group of spawned processes. + * @param command name of program to be spawned + * @param argv arguments to command; if this parameter is null, + * {@code MPI_ARGV_NULL} will be used. + * @param maxprocs maximum number of processes to start + * @param info info object telling the runtime where + * and how to start the processes + * @param root rank of process in which previous arguments are examined + * @param errcodes one code per process; if this parameter is null, + * {@code MPI_ERRCODES_IGNORE} will be used. + * @return intercommunicator between original group and the newly spawned group + * @throws MPIException Signals that an MPI exception of some sort has occurred. + */ + public final Intercomm spawn(String command, String[] argv, int maxprocs, + Info info, int root, int[] errcodes) + throws MPIException + { + MPI.check(); + + return new Intercomm(spawn(handle, command, argv, maxprocs, + info.handle, root, errcodes)); + } + + private native long spawn(long comm, String command, String[] argv, + int maxprocs, long info, int root, int[] errcodes) + throws MPIException; + + /** + * Java binding of {@code MPI_COMM_SPAWN_MULTIPLE}. + * This intracommunicator will contain the group of spawned processes. + * @param commands programs to be executed + * @param argv arguments for commands; if this parameter is null, + * {@code MPI_ARGVS_NULL} will be used. + * @param maxprocs maximum number of processes to start for each command + * @param info info objects telling the runtime where + * and how to start the processes + * @param root rank of process in which previous arguments are examined + * @param errcodes one code per process; if this parameter is null, + * {@code MPI_ERRCODES_IGNORE} will be used. + * @return intercommunicator between original group and the newly spawned group + * @throws MPIException Signals that an MPI exception of some sort has occurred. + */ + public final Intercomm spawnMultiple( + String[] commands, String[][] argv, int[] maxprocs, + Info[] info, int root, int[] errcodes) + throws MPIException + { + MPI.check(); + + long hInfo[] = new long[info.length]; + + for(int i = 0; i < info.length; i++) + hInfo[i] = info[i].handle; + + return new Intercomm(spawnMultiple(handle, commands, argv, maxprocs, + hInfo, root, errcodes)); + } + + private native long spawnMultiple( + long comm, String[] commands, String[][] argv, int[] maxprocs, + long[] info, int root, int[] errcodes) throws MPIException; } // Intracomm diff --git a/ompi/mpi/java/java/LongInt.java b/ompi/mpi/java/java/LongInt.java index 5b80817d49b..2d9a9143f31 100644 --- a/ompi/mpi/java/java/LongInt.java +++ b/ompi/mpi/java/java/LongInt.java @@ -5,14 +5,16 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. + * Copyright (c) 2015 Los Alamos National Security, LLC. All rights + * reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -23,107 +25,110 @@ */ public final class LongInt extends Struct { -private final int lSize, iOff, iSize; + private final int lSize, iOff, iSize; -/** - * The struct object will be created only in MPI class. - * @see MPI#longInt - */ -protected LongInt(int longSize, int intOff, int intSize) -{ - lSize = longSize; - iSize = intSize; - int lOff; + /** + * The struct object will be created only in MPI class. + * @param longSize size of long + * @param intOff int offset + * @param intSize int size + * @see MPI#longInt + */ + protected LongInt(int longSize, int intOff, int intSize) + { + lSize = longSize; + iSize = intSize; + int lOff; - switch(lSize) - { - case 4: lOff = addInt(); break; - case 8: lOff = addLong(); break; - default: throw new AssertionError("Unsupported long size: "+ lSize); - } + switch(lSize) + { + case 4: lOff = addInt(); break; + case 8: lOff = addLong(); break; + default: throw new AssertionError("Unsupported long size: "+ lSize); + } - assert lOff == 0; - setOffset(intOff); + assert lOff == 0; + setOffset(intOff); - switch(iSize) - { - case 4: iOff = addInt(); break; - case 8: iOff = addLong(); break; - default: throw new AssertionError("Unsupported int size: "+ iSize); - } + switch(iSize) + { + case 4: iOff = addInt(); break; + case 8: iOff = addLong(); break; + default: throw new AssertionError("Unsupported int size: "+ iSize); + } - assert(intOff == iOff); -} + assert(intOff == iOff); + } -/** - * Creates a Data object. - * @return new Data object. - */ -@Override protected LongInt.Data newData() -{ - return new LongInt.Data(); -} + /** + * Creates a Data object. + * @return new Data object. + */ + @Override protected LongInt.Data newData() + { + return new LongInt.Data(); + } -/** - * Class for reading/writing data in a struct stored in a byte buffer. - */ -public final class Data extends Struct.Data -{ - /** - * Gets the long value. - * @return long value - */ - public long getValue() - { - switch(lSize) - { - case 8: return getLong(0); - case 4: return getInt(0); - default: throw new AssertionError(); - } - } + /** + * Class for reading/writing data in a struct stored in a byte buffer. + */ + public final class Data extends Struct.Data + { + /** + * Gets the long value. + * @return long value + */ + public long getValue() + { + switch(lSize) + { + case 8: return getLong(0); + case 4: return getInt(0); + default: throw new AssertionError(); + } + } - /** - * Gets the int value. - * @return int value - */ - public int getIndex() - { - switch(iSize) - { - case 4: return getInt(iOff); - case 8: return (int)getLong(iOff); - default: throw new AssertionError(); - } - } + /** + * Gets the int value. + * @return int value + */ + public int getIndex() + { + switch(iSize) + { + case 4: return getInt(iOff); + case 8: return (int)getLong(iOff); + default: throw new AssertionError(); + } + } - /** - * Puts the long value. - * @param v long value - */ - public void putValue(long v) - { - switch(lSize) - { - case 8: putLong(0, v); break; - case 4: putInt(0, (int)v); break; - default: throw new AssertionError(); - } - } + /** + * Puts the long value. + * @param v long value + */ + public void putValue(long v) + { + switch(lSize) + { + case 8: putLong(0, v); break; + case 4: putInt(0, (int)v); break; + default: throw new AssertionError(); + } + } - /** - * Puts the int value. - * @param v int value - */ - public void putIndex(int v) - { - switch(iSize) - { - case 4: putInt(iOff, v); break; - case 8: putLong(iOff, v); break; - default: throw new AssertionError(); - } - } -} // Data + /** + * Puts the int value. + * @param v int value + */ + public void putIndex(int v) + { + switch(iSize) + { + case 4: putInt(iOff, v); break; + case 8: putLong(iOff, v); break; + default: throw new AssertionError(); + } + } + } // Data } // LongInt diff --git a/ompi/mpi/java/java/MPI.java b/ompi/mpi/java/java/MPI.java index 16af6a22c8c..ea37e9f811c 100644 --- a/ompi/mpi/java/java/MPI.java +++ b/ompi/mpi/java/java/MPI.java @@ -5,35 +5,37 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. + * Copyright (c) 2015 Los Alamos National Security, LLC. All rights + * reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ - */ -/* + * + * * This file is almost a complete re-write for Open MPI compared to the * original mpiJava package. Its license and copyright are listed below. * See for more information. - */ -/* - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. -*/ -/* + * + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * * File : MPI.java * Author : Sang Lim, Sung-Hoon Ko, Xinying Li, Bryan Carpenter * (contributions from MAEDA Atusi) @@ -54,920 +56,947 @@ */ public final class MPI { -private static boolean initialized, finalized; -private static byte[] buffer = null; // Buffer allocation -private static final int MAX_PROCESSOR_NAME = 256; -private static final ByteOrder nativeOrder = ByteOrder.nativeOrder(); - -public static final Intracomm COMM_WORLD, COMM_SELF; - -public static final int THREAD_SINGLE, THREAD_FUNNELED, THREAD_SERIALIZED, - THREAD_MULTIPLE; - -public static final int GRAPH, DIST_GRAPH, CART; -public static final int ANY_SOURCE, ANY_TAG; - -public static final Op MAX, MIN, SUM, PROD, LAND, BAND, - LOR, BOR, LXOR, BXOR; - -/** - * Global minimum operator. - *

{@code MINLOC} and {@link #MAXLOC} can be used with each of the following - * datatypes: {@link #INT2}, {@link #SHORT_INT}, {@link #LONG_INT}, - * {@link #FLOAT_INT} and {@link #DOUBLE_INT}. - */ -public static final Op MINLOC; - -/** Global maximum operator. See {@link #MINLOC}.*/ -public static final Op MAXLOC; - -public static final Datatype DATATYPE_NULL; - -public static final Datatype BYTE, CHAR, SHORT, BOOLEAN, - INT, LONG, FLOAT, DOUBLE, PACKED, - FLOAT_COMPLEX, DOUBLE_COMPLEX; - -/** Struct which must be used with {@link #int2}. */ -public static final Datatype INT2; -/** Struct which must be used with {@link #shortInt}. */ -public static final Datatype SHORT_INT; -/** Struct which must be used with {@link #longInt}. */ -public static final Datatype LONG_INT; -/** Struct which must be used with {@link #floatInt}. */ -public static final Datatype FLOAT_INT; -/** Struct which must be used with {@link #doubleInt}. */ -public static final Datatype DOUBLE_INT; - -/** Struct object for {@link #INT2} datatype. */ -public static final Int2 int2; -/** Struct object for {@link #SHORT_INT} datatype. */ -public static final ShortInt shortInt; -/** Struct object for {@link #LONG_INT} datatype. */ -public static final LongInt longInt; -/** Struct object for {@link #FLOAT_INT} datatype. */ -public static final FloatInt floatInt; -/** Struct object for {@link #DOUBLE_INT} datatype. */ -public static final DoubleInt doubleInt; - -public static final Request REQUEST_NULL; -public static final Group GROUP_EMPTY; -public static final Info INFO_ENV, INFO_NULL; - -public static final int PROC_NULL; -public static final int UNDEFINED; -public static final int IDENT, CONGRUENT, SIMILAR, UNEQUAL; -public static final int TAG_UB, HOST, IO, WTIME_IS_GLOBAL; - -public static final int APPNUM, LASTUSEDCODE, UNIVERSE_SIZE, WIN_BASE, - WIN_SIZE, WIN_DISP_UNIT; - -public static final int VERSION, SUBVERSION; -public static final int ROOT, KEYVAL_INVALID, BSEND_OVERHEAD; -public static final int MAX_OBJECT_NAME, MAX_PORT_NAME, MAX_DATAREP_STRING; -public static final int MAX_INFO_KEY, MAX_INFO_VAL; -public static final int ORDER_C, ORDER_FORTRAN; -public static final int DISTRIBUTE_BLOCK, DISTRIBUTE_CYCLIC, DISTRIBUTE_NONE, - DISTRIBUTE_DFLT_DARG; - -public static final int MODE_CREATE, MODE_RDONLY, MODE_WRONLY, MODE_RDWR, - MODE_DELETE_ON_CLOSE, MODE_UNIQUE_OPEN, MODE_EXCL, - MODE_APPEND, MODE_SEQUENTIAL; -public static final int DISPLACEMENT_CURRENT; -public static final int SEEK_SET, SEEK_CUR, SEEK_END; - -public static final int MODE_NOCHECK, MODE_NOPRECEDE, MODE_NOPUT, - MODE_NOSTORE, MODE_NOSUCCEED; -public static final int LOCK_EXCLUSIVE, LOCK_SHARED; - -public static final Errhandler ERRORS_ARE_FATAL, ERRORS_RETURN; - -// Error classes and codes -public static final int SUCCESS; -public static final int ERR_BUFFER; -public static final int ERR_COUNT; -public static final int ERR_TYPE; -public static final int ERR_TAG; -public static final int ERR_COMM; -public static final int ERR_RANK; -public static final int ERR_REQUEST; -public static final int ERR_ROOT; -public static final int ERR_GROUP; -public static final int ERR_OP; -public static final int ERR_TOPOLOGY; -public static final int ERR_DIMS; -public static final int ERR_ARG; -public static final int ERR_UNKNOWN; -public static final int ERR_TRUNCATE; -public static final int ERR_OTHER; -public static final int ERR_INTERN; -public static final int ERR_IN_STATUS; -public static final int ERR_PENDING; -public static final int ERR_ACCESS; -public static final int ERR_AMODE; -public static final int ERR_ASSERT; -public static final int ERR_BAD_FILE; -public static final int ERR_BASE; -public static final int ERR_CONVERSION; -public static final int ERR_DISP; -public static final int ERR_DUP_DATAREP; -public static final int ERR_FILE_EXISTS; -public static final int ERR_FILE_IN_USE; -public static final int ERR_FILE; -public static final int ERR_INFO_KEY; -public static final int ERR_INFO_NOKEY; -public static final int ERR_INFO_VALUE; -public static final int ERR_INFO; -public static final int ERR_IO; -public static final int ERR_KEYVAL; -public static final int ERR_LOCKTYPE; -public static final int ERR_NAME; -public static final int ERR_NO_MEM; -public static final int ERR_NOT_SAME; -public static final int ERR_NO_SPACE; -public static final int ERR_NO_SUCH_FILE; -public static final int ERR_PORT; -public static final int ERR_QUOTA; -public static final int ERR_READ_ONLY; -public static final int ERR_RMA_CONFLICT; -public static final int ERR_RMA_SYNC; -public static final int ERR_SERVICE; -public static final int ERR_SIZE; -public static final int ERR_SPAWN; -public static final int ERR_UNSUPPORTED_DATAREP; -public static final int ERR_UNSUPPORTED_OPERATION; -public static final int ERR_WIN; -public static final int ERR_LASTCODE; -public static final int ERR_SYSRESOURCE; - -static -{ - System.loadLibrary("mpi_java"); - - DATATYPE_NULL = new Datatype(); - - BYTE = new Datatype(); - CHAR = new Datatype(); - SHORT = new Datatype(); - BOOLEAN = new Datatype(); - INT = new Datatype(); - LONG = new Datatype(); - FLOAT = new Datatype(); - DOUBLE = new Datatype(); - PACKED = new Datatype(); - INT2 = new Datatype(); - - SHORT_INT = new Datatype(); - LONG_INT = new Datatype(); - FLOAT_INT = new Datatype(); - DOUBLE_INT = new Datatype(); - FLOAT_COMPLEX = new Datatype(); - DOUBLE_COMPLEX = new Datatype(); - - int2 = newInt2(); - shortInt = newShortInt(); - longInt = newLongInt(); - floatInt = newFloatInt(); - doubleInt = newDoubleInt(); - - MAX = new Op(1); - MIN = new Op(2); - SUM = new Op(3); - PROD = new Op(4); - LAND = new Op(5); - BAND = new Op(6); - LOR = new Op(7); - BOR = new Op(8); - LXOR = new Op(9); - BXOR = new Op(10); - MINLOC = new Op(11); - MAXLOC = new Op(12); - - GROUP_EMPTY = new Group(Group.getEmpty()); - REQUEST_NULL = new Request(Request.getNull()); - INFO_ENV = Info.newEnv(); - INFO_NULL = new Info(Info.NULL); - - Constant c = new Constant(); - - THREAD_SINGLE = c.THREAD_SINGLE; - THREAD_FUNNELED = c.THREAD_FUNNELED; - THREAD_SERIALIZED = c.THREAD_SERIALIZED; - THREAD_MULTIPLE = c.THREAD_MULTIPLE; - - GRAPH = c.GRAPH; - DIST_GRAPH = c.DIST_GRAPH; - CART = c.CART; - - ANY_SOURCE = c.ANY_SOURCE; - ANY_TAG = c.ANY_TAG; - PROC_NULL = c.PROC_NULL; - - UNDEFINED = c.UNDEFINED; - - IDENT = c.IDENT; - CONGRUENT = c.CONGRUENT; - SIMILAR = c.SIMILAR; - UNEQUAL = c.UNEQUAL; - - TAG_UB = c.TAG_UB; - HOST = c.HOST; - IO = c.IO; - WTIME_IS_GLOBAL = c.WTIME_IS_GLOBAL; - - APPNUM = c.APPNUM; - LASTUSEDCODE = c.LASTUSEDCODE; - UNIVERSE_SIZE = c.UNIVERSE_SIZE; - WIN_BASE = c.WIN_BASE; - WIN_SIZE = c.WIN_SIZE; - WIN_DISP_UNIT = c.WIN_DISP_UNIT; - - VERSION = c.VERSION; - SUBVERSION = c.SUBVERSION; - - ROOT = c.ROOT; - KEYVAL_INVALID = c.KEYVAL_INVALID; - BSEND_OVERHEAD = c.BSEND_OVERHEAD; - - MAX_OBJECT_NAME = c.MAX_OBJECT_NAME; - MAX_PORT_NAME = c.MAX_PORT_NAME; - MAX_DATAREP_STRING = c.MAX_DATAREP_STRING; - - MAX_INFO_KEY = c.MAX_INFO_KEY; - MAX_INFO_VAL = c.MAX_INFO_VAL; - - ORDER_C = c.ORDER_C; - ORDER_FORTRAN = c.ORDER_FORTRAN; - - DISTRIBUTE_BLOCK = c.DISTRIBUTE_BLOCK; - DISTRIBUTE_CYCLIC = c.DISTRIBUTE_CYCLIC; - DISTRIBUTE_NONE = c.DISTRIBUTE_NONE; - DISTRIBUTE_DFLT_DARG = c.DISTRIBUTE_DFLT_DARG; - - MODE_CREATE = c.MODE_CREATE; - MODE_RDONLY = c.MODE_RDONLY; - MODE_WRONLY = c.MODE_WRONLY; - MODE_RDWR = c.MODE_RDWR; - MODE_DELETE_ON_CLOSE = c.MODE_DELETE_ON_CLOSE; - MODE_UNIQUE_OPEN = c.MODE_UNIQUE_OPEN; - MODE_EXCL = c.MODE_EXCL; - MODE_APPEND = c.MODE_APPEND; - MODE_SEQUENTIAL = c.MODE_SEQUENTIAL; - - DISPLACEMENT_CURRENT = c.DISPLACEMENT_CURRENT; - - SEEK_SET = c.SEEK_SET; - SEEK_CUR = c.SEEK_CUR; - SEEK_END = c.SEEK_END; - - MODE_NOCHECK = c.MODE_NOCHECK; - MODE_NOPRECEDE = c.MODE_NOPRECEDE; - MODE_NOPUT = c.MODE_NOPUT; - MODE_NOSTORE = c.MODE_NOSTORE; - MODE_NOSUCCEED = c.MODE_NOSUCCEED; - LOCK_EXCLUSIVE = c.LOCK_EXCLUSIVE; - LOCK_SHARED = c.LOCK_SHARED; - - ERRORS_ARE_FATAL = new Errhandler(Errhandler.getFatal()); - ERRORS_RETURN = new Errhandler(Errhandler.getReturn()); - - COMM_WORLD = new Intracomm(); - COMM_SELF = new Intracomm(); - - // Error classes and codes - SUCCESS = c.SUCCESS; - ERR_BUFFER = c.ERR_BUFFER; - ERR_COUNT = c.ERR_COUNT; - ERR_TYPE = c.ERR_TYPE; - ERR_TAG = c.ERR_TAG; - ERR_COMM = c.ERR_COMM; - ERR_RANK = c.ERR_RANK; - ERR_REQUEST = c.ERR_REQUEST; - ERR_ROOT = c.ERR_ROOT; - ERR_GROUP = c.ERR_GROUP; - ERR_OP = c.ERR_OP; - ERR_TOPOLOGY = c.ERR_TOPOLOGY; - ERR_DIMS = c.ERR_DIMS; - ERR_ARG = c.ERR_ARG; - ERR_UNKNOWN = c.ERR_UNKNOWN; - ERR_TRUNCATE = c.ERR_TRUNCATE; - ERR_OTHER = c.ERR_OTHER; - ERR_INTERN = c.ERR_INTERN; - ERR_IN_STATUS = c.ERR_IN_STATUS; - ERR_PENDING = c.ERR_PENDING; - ERR_ACCESS = c.ERR_ACCESS; - ERR_AMODE = c.ERR_AMODE; - ERR_ASSERT = c.ERR_ASSERT; - ERR_BAD_FILE = c.ERR_BAD_FILE; - ERR_BASE = c.ERR_BASE; - ERR_CONVERSION = c.ERR_CONVERSION; - ERR_DISP = c.ERR_DISP; - ERR_DUP_DATAREP = c.ERR_DUP_DATAREP; - ERR_FILE_EXISTS = c.ERR_FILE_EXISTS; - ERR_FILE_IN_USE = c.ERR_FILE_IN_USE; - ERR_FILE = c.ERR_FILE; - ERR_INFO_KEY = c.ERR_INFO_KEY; - ERR_INFO_NOKEY = c.ERR_INFO_NOKEY; - ERR_INFO_VALUE = c.ERR_INFO_VALUE; - ERR_INFO = c.ERR_INFO; - ERR_IO = c.ERR_IO; - ERR_KEYVAL = c.ERR_KEYVAL; - ERR_LOCKTYPE = c.ERR_LOCKTYPE; - ERR_NAME = c.ERR_NAME; - ERR_NO_MEM = c.ERR_NO_MEM; - ERR_NOT_SAME = c.ERR_NOT_SAME; - ERR_NO_SPACE = c.ERR_NO_SPACE; - ERR_NO_SUCH_FILE = c.ERR_NO_SUCH_FILE; - ERR_PORT = c.ERR_PORT; - ERR_QUOTA = c.ERR_QUOTA; - ERR_READ_ONLY = c.ERR_READ_ONLY; - ERR_RMA_CONFLICT = c.ERR_RMA_CONFLICT; - ERR_RMA_SYNC = c.ERR_RMA_SYNC; - ERR_SERVICE = c.ERR_SERVICE; - ERR_SIZE = c.ERR_SIZE; - ERR_SPAWN = c.ERR_SPAWN; - ERR_UNSUPPORTED_DATAREP = c.ERR_UNSUPPORTED_DATAREP; - ERR_UNSUPPORTED_OPERATION = c.ERR_UNSUPPORTED_OPERATION; - ERR_WIN = c.ERR_WIN; - ERR_LASTCODE = c.ERR_LASTCODE; - ERR_SYSRESOURCE = c.ERR_SYSRESOURCE; -} - -private static native Int2 newInt2(); -private static native ShortInt newShortInt(); -private static native LongInt newLongInt(); -private static native FloatInt newFloatInt(); -private static native DoubleInt newDoubleInt(); - -private static void initCommon() throws MPIException -{ - initialized = true; - - DATATYPE_NULL.setBasic(Datatype.NULL); - - BYTE.setBasic(Datatype.BYTE); - CHAR.setBasic(Datatype.CHAR); - SHORT.setBasic(Datatype.SHORT); - BOOLEAN.setBasic(Datatype.BOOLEAN); - INT.setBasic(Datatype.INT); - LONG.setBasic(Datatype.LONG); - FLOAT.setBasic(Datatype.FLOAT); - DOUBLE.setBasic(Datatype.DOUBLE); - PACKED.setBasic(Datatype.PACKED); - - INT2.setBasic(Datatype.INT2, MPI.BYTE); - SHORT_INT.setBasic(Datatype.SHORT_INT, MPI.BYTE); - LONG_INT.setBasic(Datatype.LONG_INT, MPI.BYTE); - FLOAT_INT.setBasic(Datatype.FLOAT_INT, MPI.BYTE); - DOUBLE_INT.setBasic(Datatype.DOUBLE_INT, MPI.BYTE); - FLOAT_COMPLEX.setBasic(Datatype.FLOAT_COMPLEX, MPI.FLOAT); - DOUBLE_COMPLEX.setBasic(Datatype.DOUBLE_COMPLEX, MPI.DOUBLE); - - COMM_WORLD.setType(Intracomm.WORLD); - COMM_SELF.setType(Intracomm.SELF); -} - -/** - * Initialize MPI. - *

Java binding of the MPI operation {@code MPI_INIT}. - * @param args arguments to the {@code main} method. - * @return arguments - * @throws MPIException - */ -public static String[] Init(String[] args) throws MPIException -{ - if(initialized) - throw new MPIException("MPI is already initialized."); - - String[] newArgs = Init_jni(args); - initCommon(); - return newArgs; -} - -private static native String [] Init_jni(String[] args); - -/** - * Initialize MPI with threads. - *

Java binding of the MPI operation {@code MPI_INIT_THREAD}. - * @param args arguments to the {@code main} method. - * @param required desired level of thread support - * @return provided level of thread support - * @throws MPIException - */ -public static int InitThread(String[] args, int required) throws MPIException -{ - if(initialized) - throw new MPIException("MPI is already initialized."); - - int provided = InitThread_jni(args, required); - initCommon(); - return provided; -} - -private static native int InitThread_jni(String[] args, int required) - throws MPIException; - -/** - * Java binding of the MPI operation {@code MPI_QUERY_THREAD}. - * @return provided level of thread support - * @throws MPIException - */ -public static int queryThread() throws MPIException -{ - MPI.check(); - return queryThread_jni(); -} - -private static native int queryThread_jni() throws MPIException; - -/** - * Java binding of the MPI operation {@code MPI_IS_THREAD_MAIN}. - * @return true if it is the main thread - * @throws MPIException - */ -public static boolean isThreadMain() throws MPIException -{ - MPI.check(); - return isThreadMain_jni(); -} - -private static native boolean isThreadMain_jni() throws MPIException; - -/** - * Finalize MPI. - *

Java binding of the MPI operation {@code MPI_FINALIZE}. - * @throws MPIException - */ -public static void Finalize() throws MPIException -{ - check(); - Finalize_jni(); - finalized = true; -} - -private static native void Finalize_jni() throws MPIException; - -/** - * Returns an elapsed time on the calling processor. - *

Java binding of the MPI operation {@code MPI_WTIME}. - * @return time in seconds since an arbitrary time in the past. - * @throws MPIException - */ -public static double wtime() throws MPIException -{ - check(); - return wtime_jni(); -} - -private static native double wtime_jni(); - -/** - * Returns resolution of timer. - *

Java binding of the MPI operation {MPI_WTICK}. - * @return resolution of {@code wtime} in seconds. - * @throws MPIException - */ -public static double wtick() throws MPIException -{ - check(); - return wtick_jni(); -} - -private static native double wtick_jni(); - -/** - * Returns the name of the processor on which it is called. - *

Java binding of the MPI operation {@code MPI_GET_PROCESSOR_NAME}. - * @return A unique specifier for the actual node. - * @throws MPIException - */ -static public String getProcessorName() throws MPIException -{ - check(); - byte[] buf = new byte[MAX_PROCESSOR_NAME]; - int lengh = getProcessorName(buf); - return new String(buf,0,lengh); -} - -static private native int getProcessorName(byte[] buf); - -/** - * Test if MPI has been initialized. - *

Java binding of the MPI operation {@code MPI_INITIALIZED}. - * @return {@code true} if {@code Init} has been called, - * {@code false} otherwise. - * @throws MPIException - */ -static public native boolean isInitialized() throws MPIException; - -/** - * Test if MPI has been finalized. - *

Java binding of the MPI operation {@code MPI_FINALIZED}. - * @return {@code true} if {@code Finalize} has been called, - * {@code false} otherwise. - * @throws MPIException - */ -static public native boolean isFinalized() throws MPIException; - -/** - * Attaches a user-provided buffer for sending. - *

Java binding of the MPI operation {@code MPI_BUFFER_ATTACH}. - * @param buffer initial buffer - * @throws MPIException - */ -static public void attachBuffer(byte[] buffer) throws MPIException -{ - check(); - MPI.buffer = buffer; - attachBuffer_jni(buffer); -} - -static private native void attachBuffer_jni(byte[] buffer); - -/** - * Removes an existing buffer (for use in sending). - *

Java binding of the MPI operation {@code MPI_BUFFER_DETACH}. - * @return initial buffer - * @throws MPIException - */ -static public byte[] detachBuffer() throws MPIException -{ - check(); - detachBuffer_jni(buffer); - byte[] result = MPI.buffer; - MPI.buffer = null; - return result; -} - -static private native void detachBuffer_jni(byte[] buffer); - -/** - * Controls profiling. - *

This method is not implemented. - *

Java binding of the MPI operation {@code MPI_PCONTROL}. - * @param level Profiling level. - * @param obj Profiling information. - */ -public static void pControl(int level, Object obj) -{ - // Nothing to do here. -} - -/** - * Check if MPI has been initialized and hasn't been finalized. - * @throws MPIException - */ -protected static void check() throws MPIException -{ - if(!initialized) - throw new MPIException("MPI is not initialized."); - - if(finalized) - throw new MPIException("MPI is finalized."); -} - -protected static byte[] attrSet(Object value) throws MPIException -{ - try - { - ByteArrayOutputStream baos = new ByteArrayOutputStream(); - ObjectOutputStream os = new ObjectOutputStream(baos); - os.writeObject(value); - os.close(); - return baos.toByteArray(); - } - catch(IOException ex) - { - MPIException mpiex = new MPIException(ex); - mpiex.setStackTrace(ex.getStackTrace()); - throw mpiex; - } -} - -protected static Object attrGet(byte[] value) throws MPIException -{ - if(value == null) - return null; - - try - { - ByteArrayInputStream bais = new ByteArrayInputStream(value); - ObjectInputStream is = new ObjectInputStream(bais); - Object obj = is.readObject(); - is.close(); - return obj; - } - catch(ClassNotFoundException ex) - { - throw new MPIException(ex); - } - catch(IOException ex) - { - throw new MPIException(ex); - } -} - -/** - * Allocates a new direct byte buffer. - * @param capacity The new buffer's capacity, in bytes - * @return The new byte buffer - */ -public static ByteBuffer newByteBuffer(int capacity) -{ - ByteBuffer buf = ByteBuffer.allocateDirect(capacity); - buf.order(nativeOrder); - return buf; -} - -/** - * Allocates a new direct char buffer. - * @param capacity The new buffer's capacity, in chars - * @return The new char buffer - */ -public static CharBuffer newCharBuffer(int capacity) -{ - assert capacity <= Integer.MAX_VALUE / 2; - ByteBuffer buf = ByteBuffer.allocateDirect(capacity * 2); - buf.order(nativeOrder); - return buf.asCharBuffer(); -} - -/** - * Allocates a new direct short buffer. - * @param capacity The new buffer's capacity, in shorts - * @return The new short buffer - */ -public static ShortBuffer newShortBuffer(int capacity) -{ - assert capacity <= Integer.MAX_VALUE / 2; - ByteBuffer buf = ByteBuffer.allocateDirect(capacity * 2); - buf.order(nativeOrder); - return buf.asShortBuffer(); -} - -/** - * Allocates a new direct int buffer. - * @param capacity The new buffer's capacity, in ints - * @return The new int buffer - */ -public static IntBuffer newIntBuffer(int capacity) -{ - assert capacity <= Integer.MAX_VALUE / 4; - ByteBuffer buf = ByteBuffer.allocateDirect(capacity * 4); - buf.order(nativeOrder); - return buf.asIntBuffer(); -} - -/** - * Allocates a new direct long buffer. - * @param capacity The new buffer's capacity, in longs - * @return The new long buffer - */ -public static LongBuffer newLongBuffer(int capacity) -{ - assert capacity <= Integer.MAX_VALUE / 8; - ByteBuffer buf = ByteBuffer.allocateDirect(capacity * 8); - buf.order(nativeOrder); - return buf.asLongBuffer(); -} - -/** - * Allocates a new direct float buffer. - * @param capacity The new buffer's capacity, in floats - * @return The new float buffer - */ -public static FloatBuffer newFloatBuffer(int capacity) -{ - assert capacity <= Integer.MAX_VALUE / 4; - ByteBuffer buf = ByteBuffer.allocateDirect(capacity * 4); - buf.order(nativeOrder); - return buf.asFloatBuffer(); -} - -/** - * Allocates a new direct double buffer. - * @param capacity The new buffer's capacity, in doubles - * @return The new double buffer - */ -public static DoubleBuffer newDoubleBuffer(int capacity) -{ - assert capacity <= Integer.MAX_VALUE / 8; - ByteBuffer buf = ByteBuffer.allocateDirect(capacity * 8); - buf.order(nativeOrder); - return buf.asDoubleBuffer(); -} - -/** - * Asserts that a buffer is direct. - * @param buf buffer - */ -protected static void assertDirectBuffer(Buffer buf) -{ - if(!buf.isDirect()) - throw new IllegalArgumentException("The buffer must be direct."); -} - -/** - * Asserts that buffers are direct. - * @param sendbuf - * @param recvbuf - */ -protected static void assertDirectBuffer(Buffer sendbuf, Buffer recvbuf) -{ - if(!sendbuf.isDirect()) - throw new IllegalArgumentException("The send buffer must be direct."); - - if(!recvbuf.isDirect()) - throw new IllegalArgumentException("The recv. buffer must be direct."); -} - -/** - * Checks if an object is a direct buffer. - * @param obj object - * @return true if the object is a direct buffer - */ -protected static boolean isDirectBuffer(Object obj) -{ - return obj instanceof Buffer && ((Buffer)obj).isDirect(); -} - -/** - * Checks if an object is a heap buffer. - * @param obj object - * @return true if the object is a heap buffer - */ -protected static boolean isHeapBuffer(Object obj) -{ - return obj instanceof Buffer && !((Buffer)obj).isDirect(); -} - -/** - * Creates a new buffer whose content is a shared subsequence of a buffer. - *

The content of the new buffer will start at the specified offset. - * @param buf buffer - * @param offset offset - * @return the new buffer. - */ -public static ByteBuffer slice(ByteBuffer buf, int offset) -{ - return ((ByteBuffer)buf.clear().position(offset)) - .slice().order(nativeOrder); -} - -/** - * Creates a new buffer whose content is a shared subsequence of a buffer. - *

The content of the new buffer will start at the specified offset. - * @param buf buffer - * @param offset offset - * @return the new buffer. - */ -public static CharBuffer slice(CharBuffer buf, int offset) -{ - return ((CharBuffer)buf.clear().position(offset)).slice(); -} - -/** - * Creates a new buffer whose content is a shared subsequence of a buffer. - *

The content of the new buffer will start at the specified offset. - * @param buf buffer - * @param offset offset - * @return the new buffer. - */ -public static ShortBuffer slice(ShortBuffer buf, int offset) -{ - return ((ShortBuffer)buf.clear().position(offset)).slice(); -} - -/** - * Creates a new buffer whose content is a shared subsequence of a buffer. - *

The content of the new buffer will start at the specified offset. - * @param buf buffer - * @param offset offset - * @return the new buffer. - */ -public static IntBuffer slice(IntBuffer buf, int offset) -{ - return ((IntBuffer)buf.clear().position(offset)).slice(); -} - -/** - * Creates a new buffer whose content is a shared subsequence of a buffer. - *

The content of the new buffer will start at the specified offset. - * @param buf buffer - * @param offset offset - * @return the new buffer. - */ -public static LongBuffer slice(LongBuffer buf, int offset) -{ - return ((LongBuffer)buf.clear().position(offset)).slice(); -} - -/** - * Creates a new buffer whose content is a shared subsequence of a buffer. - *

The content of the new buffer will start at the specified offset. - * @param buf buffer - * @param offset offset - * @return the new buffer. - */ -public static FloatBuffer slice(FloatBuffer buf, int offset) -{ - return ((FloatBuffer)buf.clear().position(offset)).slice(); -} - -/** - * Creates a new buffer whose content is a shared subsequence of a buffer. - *

The content of the new buffer will start at the specified offset. - * @param buf buffer - * @param offset offset - * @return the new buffer. - */ -public static DoubleBuffer slice(DoubleBuffer buf, int offset) -{ - return ((DoubleBuffer)buf.clear().position(offset)).slice(); -} - -/** - * Creates a new buffer whose content is a shared subsequence of a buffer. - *

The content of the new buffer will start at the specified offset. - * @param buf buffer - * @param offset offset - * @return the new buffer. - */ -public static ByteBuffer slice(byte[] buf, int offset) -{ - return ByteBuffer.wrap(buf, offset, buf.length - offset) - .slice().order(nativeOrder); -} - -/** - * Creates a new buffer whose content is a shared subsequence of a buffer. - *

The content of the new buffer will start at the specified offset. - * @param buf buffer - * @param offset offset - * @return the new buffer. - */ -public static CharBuffer slice(char[] buf, int offset) -{ - return CharBuffer.wrap(buf, offset, buf.length - offset).slice(); -} - -/** - * Creates a new buffer whose content is a shared subsequence of a buffer. - *

The content of the new buffer will start at the specified offset. - * @param buf buffer - * @param offset offset - * @return the new buffer. - */ -public static ShortBuffer slice(short[] buf, int offset) -{ - return ShortBuffer.wrap(buf, offset, buf.length - offset).slice(); -} - -/** - * Creates a new buffer whose content is a shared subsequence of a buffer. - *

The content of the new buffer will start at the specified offset. - * @param buf buffer - * @param offset offset - * @return the new buffer. - */ -public static IntBuffer slice(int[] buf, int offset) -{ - return IntBuffer.wrap(buf, offset, buf.length - offset).slice(); -} - -/** - * Creates a new buffer whose content is a shared subsequence of a buffer. - *

The content of the new buffer will start at the specified offset. - * @param buf buffer - * @param offset offset - * @return the new buffer. - */ -public static LongBuffer slice(long[] buf, int offset) -{ - return LongBuffer.wrap(buf, offset, buf.length - offset).slice(); -} - -/** - * Creates a new buffer whose content is a shared subsequence of a buffer. - *

The content of the new buffer will start at the specified offset. - * @param buf buffer - * @param offset offset - * @return the new buffer. - */ -public static FloatBuffer slice(float[] buf, int offset) -{ - return FloatBuffer.wrap(buf, offset, buf.length - offset).slice(); -} - -/** - * Creates a new buffer whose content is a shared subsequence of a buffer. - *

The content of the new buffer will start at the specified offset. - * @param buf buffer - * @param offset offset - * @return the new buffer. - */ -public static DoubleBuffer slice(double[] buf, int offset) -{ - return DoubleBuffer.wrap(buf, offset, buf.length - offset).slice(); -} + private static boolean initialized, finalized; + private static byte[] buffer = null; // Buffer allocation + private static final int MAX_PROCESSOR_NAME = 256; + private static final ByteOrder nativeOrder = ByteOrder.nativeOrder(); + + public static final Intracomm COMM_WORLD, COMM_SELF; + + public static final int THREAD_SINGLE, THREAD_FUNNELED, THREAD_SERIALIZED, + THREAD_MULTIPLE; + + public static final int GRAPH, DIST_GRAPH, CART; + public static final int ANY_SOURCE, ANY_TAG; + + public static final Op MAX, MIN, SUM, PROD, LAND, BAND, + LOR, BOR, LXOR, BXOR, REPLACE, NO_OP; + + /** + * Global minimum operator. + *

{@code MINLOC} and {@link #MAXLOC} can be used with each of the following + * datatypes: {@link #INT2}, {@link #SHORT_INT}, {@link #LONG_INT}, + * {@link #FLOAT_INT} and {@link #DOUBLE_INT}. + */ + public static final Op MINLOC; + + /** Global maximum operator. See {@link #MINLOC}.*/ + public static final Op MAXLOC; + + public static final Datatype DATATYPE_NULL; + + public static final Datatype BYTE, CHAR, SHORT, BOOLEAN, + INT, LONG, FLOAT, DOUBLE, PACKED, + FLOAT_COMPLEX, DOUBLE_COMPLEX; + + /** Struct which must be used with {@link #int2}. */ + public static final Datatype INT2; + /** Struct which must be used with {@link #shortInt}. */ + public static final Datatype SHORT_INT; + /** Struct which must be used with {@link #longInt}. */ + public static final Datatype LONG_INT; + /** Struct which must be used with {@link #floatInt}. */ + public static final Datatype FLOAT_INT; + /** Struct which must be used with {@link #doubleInt}. */ + public static final Datatype DOUBLE_INT; + + /** Struct object for {@link #INT2} datatype. */ + public static final Int2 int2; + /** Struct object for {@link #SHORT_INT} datatype. */ + public static final ShortInt shortInt; + /** Struct object for {@link #LONG_INT} datatype. */ + public static final LongInt longInt; + /** Struct object for {@link #FLOAT_INT} datatype. */ + public static final FloatInt floatInt; + /** Struct object for {@link #DOUBLE_INT} datatype. */ + public static final DoubleInt doubleInt; + + public static final Request REQUEST_NULL; + public static final Group GROUP_EMPTY; + public static final Info INFO_ENV, INFO_NULL; + + public static final int PROC_NULL; + public static final int UNDEFINED; + public static final int IDENT, CONGRUENT, SIMILAR, UNEQUAL; + public static final int TAG_UB, HOST, IO, WTIME_IS_GLOBAL; + + public static final int APPNUM, LASTUSEDCODE, UNIVERSE_SIZE, WIN_BASE, + WIN_SIZE, WIN_DISP_UNIT; + + public static final int VERSION, SUBVERSION; + public static final int ROOT, KEYVAL_INVALID, BSEND_OVERHEAD; + public static final int MAX_OBJECT_NAME, MAX_PORT_NAME, MAX_DATAREP_STRING; + public static final int MAX_INFO_KEY, MAX_INFO_VAL; + public static final int ORDER_C, ORDER_FORTRAN; + public static final int DISTRIBUTE_BLOCK, DISTRIBUTE_CYCLIC, DISTRIBUTE_NONE, + DISTRIBUTE_DFLT_DARG; + + public static final int MODE_CREATE, MODE_RDONLY, MODE_WRONLY, MODE_RDWR, + MODE_DELETE_ON_CLOSE, MODE_UNIQUE_OPEN, MODE_EXCL, + MODE_APPEND, MODE_SEQUENTIAL; + public static final int DISPLACEMENT_CURRENT; + public static final int SEEK_SET, SEEK_CUR, SEEK_END; + + public static final int MODE_NOCHECK, MODE_NOPRECEDE, MODE_NOPUT, + MODE_NOSTORE, MODE_NOSUCCEED; + public static final int LOCK_EXCLUSIVE, LOCK_SHARED; + + public static final Errhandler ERRORS_ARE_FATAL, ERRORS_RETURN; + + // Error classes and codes + public static final int SUCCESS; + public static final int ERR_BUFFER; + public static final int ERR_COUNT; + public static final int ERR_TYPE; + public static final int ERR_TAG; + public static final int ERR_COMM; + public static final int ERR_RANK; + public static final int ERR_REQUEST; + public static final int ERR_ROOT; + public static final int ERR_GROUP; + public static final int ERR_OP; + public static final int ERR_TOPOLOGY; + public static final int ERR_DIMS; + public static final int ERR_ARG; + public static final int ERR_UNKNOWN; + public static final int ERR_TRUNCATE; + public static final int ERR_OTHER; + public static final int ERR_INTERN; + public static final int ERR_IN_STATUS; + public static final int ERR_PENDING; + public static final int ERR_ACCESS; + public static final int ERR_AMODE; + public static final int ERR_ASSERT; + public static final int ERR_BAD_FILE; + public static final int ERR_BASE; + public static final int ERR_CONVERSION; + public static final int ERR_DISP; + public static final int ERR_DUP_DATAREP; + public static final int ERR_FILE_EXISTS; + public static final int ERR_FILE_IN_USE; + public static final int ERR_FILE; + public static final int ERR_INFO_KEY; + public static final int ERR_INFO_NOKEY; + public static final int ERR_INFO_VALUE; + public static final int ERR_INFO; + public static final int ERR_IO; + public static final int ERR_KEYVAL; + public static final int ERR_LOCKTYPE; + public static final int ERR_NAME; + public static final int ERR_NO_MEM; + public static final int ERR_NOT_SAME; + public static final int ERR_NO_SPACE; + public static final int ERR_NO_SUCH_FILE; + public static final int ERR_PORT; + public static final int ERR_QUOTA; + public static final int ERR_READ_ONLY; + public static final int ERR_RMA_CONFLICT; + public static final int ERR_RMA_SYNC; + public static final int ERR_SERVICE; + public static final int ERR_SIZE; + public static final int ERR_SPAWN; + public static final int ERR_UNSUPPORTED_DATAREP; + public static final int ERR_UNSUPPORTED_OPERATION; + public static final int ERR_WIN; + public static final int ERR_LASTCODE; + public static final int ERR_SYSRESOURCE; + + static + { + System.loadLibrary("mpi_java"); + + DATATYPE_NULL = new Datatype(); + + BYTE = new Datatype(); + CHAR = new Datatype(); + SHORT = new Datatype(); + BOOLEAN = new Datatype(); + INT = new Datatype(); + LONG = new Datatype(); + FLOAT = new Datatype(); + DOUBLE = new Datatype(); + PACKED = new Datatype(); + INT2 = new Datatype(); + + SHORT_INT = new Datatype(); + LONG_INT = new Datatype(); + FLOAT_INT = new Datatype(); + DOUBLE_INT = new Datatype(); + FLOAT_COMPLEX = new Datatype(); + DOUBLE_COMPLEX = new Datatype(); + + int2 = newInt2(); + shortInt = newShortInt(); + longInt = newLongInt(); + floatInt = newFloatInt(); + doubleInt = newDoubleInt(); + + MAX = new Op(1); + MIN = new Op(2); + SUM = new Op(3); + PROD = new Op(4); + LAND = new Op(5); + BAND = new Op(6); + LOR = new Op(7); + BOR = new Op(8); + LXOR = new Op(9); + BXOR = new Op(10); + MINLOC = new Op(11); + MAXLOC = new Op(12); + REPLACE = new Op(13); + NO_OP = new Op(14); + + GROUP_EMPTY = new Group(Group.getEmpty()); + REQUEST_NULL = new Request(Request.getNull()); + INFO_ENV = Info.newEnv(); + INFO_NULL = new Info(Info.NULL); + + Constant c = new Constant(); + + THREAD_SINGLE = c.THREAD_SINGLE; + THREAD_FUNNELED = c.THREAD_FUNNELED; + THREAD_SERIALIZED = c.THREAD_SERIALIZED; + THREAD_MULTIPLE = c.THREAD_MULTIPLE; + + GRAPH = c.GRAPH; + DIST_GRAPH = c.DIST_GRAPH; + CART = c.CART; + + ANY_SOURCE = c.ANY_SOURCE; + ANY_TAG = c.ANY_TAG; + PROC_NULL = c.PROC_NULL; + + UNDEFINED = c.UNDEFINED; + + IDENT = c.IDENT; + CONGRUENT = c.CONGRUENT; + SIMILAR = c.SIMILAR; + UNEQUAL = c.UNEQUAL; + + TAG_UB = c.TAG_UB; + HOST = c.HOST; + IO = c.IO; + WTIME_IS_GLOBAL = c.WTIME_IS_GLOBAL; + + APPNUM = c.APPNUM; + LASTUSEDCODE = c.LASTUSEDCODE; + UNIVERSE_SIZE = c.UNIVERSE_SIZE; + WIN_BASE = c.WIN_BASE; + WIN_SIZE = c.WIN_SIZE; + WIN_DISP_UNIT = c.WIN_DISP_UNIT; + + VERSION = c.VERSION; + SUBVERSION = c.SUBVERSION; + + ROOT = c.ROOT; + KEYVAL_INVALID = c.KEYVAL_INVALID; + BSEND_OVERHEAD = c.BSEND_OVERHEAD; + + MAX_OBJECT_NAME = c.MAX_OBJECT_NAME; + MAX_PORT_NAME = c.MAX_PORT_NAME; + MAX_DATAREP_STRING = c.MAX_DATAREP_STRING; + + MAX_INFO_KEY = c.MAX_INFO_KEY; + MAX_INFO_VAL = c.MAX_INFO_VAL; + + ORDER_C = c.ORDER_C; + ORDER_FORTRAN = c.ORDER_FORTRAN; + + DISTRIBUTE_BLOCK = c.DISTRIBUTE_BLOCK; + DISTRIBUTE_CYCLIC = c.DISTRIBUTE_CYCLIC; + DISTRIBUTE_NONE = c.DISTRIBUTE_NONE; + DISTRIBUTE_DFLT_DARG = c.DISTRIBUTE_DFLT_DARG; + + MODE_CREATE = c.MODE_CREATE; + MODE_RDONLY = c.MODE_RDONLY; + MODE_WRONLY = c.MODE_WRONLY; + MODE_RDWR = c.MODE_RDWR; + MODE_DELETE_ON_CLOSE = c.MODE_DELETE_ON_CLOSE; + MODE_UNIQUE_OPEN = c.MODE_UNIQUE_OPEN; + MODE_EXCL = c.MODE_EXCL; + MODE_APPEND = c.MODE_APPEND; + MODE_SEQUENTIAL = c.MODE_SEQUENTIAL; + + DISPLACEMENT_CURRENT = c.DISPLACEMENT_CURRENT; + + SEEK_SET = c.SEEK_SET; + SEEK_CUR = c.SEEK_CUR; + SEEK_END = c.SEEK_END; + + MODE_NOCHECK = c.MODE_NOCHECK; + MODE_NOPRECEDE = c.MODE_NOPRECEDE; + MODE_NOPUT = c.MODE_NOPUT; + MODE_NOSTORE = c.MODE_NOSTORE; + MODE_NOSUCCEED = c.MODE_NOSUCCEED; + LOCK_EXCLUSIVE = c.LOCK_EXCLUSIVE; + LOCK_SHARED = c.LOCK_SHARED; + + ERRORS_ARE_FATAL = new Errhandler(Errhandler.getFatal()); + ERRORS_RETURN = new Errhandler(Errhandler.getReturn()); + + COMM_WORLD = new Intracomm(); + COMM_SELF = new Intracomm(); + + // Error classes and codes + SUCCESS = c.SUCCESS; + ERR_BUFFER = c.ERR_BUFFER; + ERR_COUNT = c.ERR_COUNT; + ERR_TYPE = c.ERR_TYPE; + ERR_TAG = c.ERR_TAG; + ERR_COMM = c.ERR_COMM; + ERR_RANK = c.ERR_RANK; + ERR_REQUEST = c.ERR_REQUEST; + ERR_ROOT = c.ERR_ROOT; + ERR_GROUP = c.ERR_GROUP; + ERR_OP = c.ERR_OP; + ERR_TOPOLOGY = c.ERR_TOPOLOGY; + ERR_DIMS = c.ERR_DIMS; + ERR_ARG = c.ERR_ARG; + ERR_UNKNOWN = c.ERR_UNKNOWN; + ERR_TRUNCATE = c.ERR_TRUNCATE; + ERR_OTHER = c.ERR_OTHER; + ERR_INTERN = c.ERR_INTERN; + ERR_IN_STATUS = c.ERR_IN_STATUS; + ERR_PENDING = c.ERR_PENDING; + ERR_ACCESS = c.ERR_ACCESS; + ERR_AMODE = c.ERR_AMODE; + ERR_ASSERT = c.ERR_ASSERT; + ERR_BAD_FILE = c.ERR_BAD_FILE; + ERR_BASE = c.ERR_BASE; + ERR_CONVERSION = c.ERR_CONVERSION; + ERR_DISP = c.ERR_DISP; + ERR_DUP_DATAREP = c.ERR_DUP_DATAREP; + ERR_FILE_EXISTS = c.ERR_FILE_EXISTS; + ERR_FILE_IN_USE = c.ERR_FILE_IN_USE; + ERR_FILE = c.ERR_FILE; + ERR_INFO_KEY = c.ERR_INFO_KEY; + ERR_INFO_NOKEY = c.ERR_INFO_NOKEY; + ERR_INFO_VALUE = c.ERR_INFO_VALUE; + ERR_INFO = c.ERR_INFO; + ERR_IO = c.ERR_IO; + ERR_KEYVAL = c.ERR_KEYVAL; + ERR_LOCKTYPE = c.ERR_LOCKTYPE; + ERR_NAME = c.ERR_NAME; + ERR_NO_MEM = c.ERR_NO_MEM; + ERR_NOT_SAME = c.ERR_NOT_SAME; + ERR_NO_SPACE = c.ERR_NO_SPACE; + ERR_NO_SUCH_FILE = c.ERR_NO_SUCH_FILE; + ERR_PORT = c.ERR_PORT; + ERR_QUOTA = c.ERR_QUOTA; + ERR_READ_ONLY = c.ERR_READ_ONLY; + ERR_RMA_CONFLICT = c.ERR_RMA_CONFLICT; + ERR_RMA_SYNC = c.ERR_RMA_SYNC; + ERR_SERVICE = c.ERR_SERVICE; + ERR_SIZE = c.ERR_SIZE; + ERR_SPAWN = c.ERR_SPAWN; + ERR_UNSUPPORTED_DATAREP = c.ERR_UNSUPPORTED_DATAREP; + ERR_UNSUPPORTED_OPERATION = c.ERR_UNSUPPORTED_OPERATION; + ERR_WIN = c.ERR_WIN; + ERR_LASTCODE = c.ERR_LASTCODE; + ERR_SYSRESOURCE = c.ERR_SYSRESOURCE; + + initVersion(); + } + + private static native Int2 newInt2(); + private static native ShortInt newShortInt(); + private static native LongInt newLongInt(); + private static native FloatInt newFloatInt(); + private static native DoubleInt newDoubleInt(); + private static native void initVersion(); + + private static void initCommon() throws MPIException + { + initialized = true; + + DATATYPE_NULL.setBasic(Datatype.NULL); + + BYTE.setBasic(Datatype.BYTE); + CHAR.setBasic(Datatype.CHAR); + SHORT.setBasic(Datatype.SHORT); + BOOLEAN.setBasic(Datatype.BOOLEAN); + INT.setBasic(Datatype.INT); + LONG.setBasic(Datatype.LONG); + FLOAT.setBasic(Datatype.FLOAT); + DOUBLE.setBasic(Datatype.DOUBLE); + PACKED.setBasic(Datatype.PACKED); + + INT2.setBasic(Datatype.INT2, MPI.BYTE); + SHORT_INT.setBasic(Datatype.SHORT_INT, MPI.BYTE); + LONG_INT.setBasic(Datatype.LONG_INT, MPI.BYTE); + FLOAT_INT.setBasic(Datatype.FLOAT_INT, MPI.BYTE); + DOUBLE_INT.setBasic(Datatype.DOUBLE_INT, MPI.BYTE); + FLOAT_COMPLEX.setBasic(Datatype.FLOAT_COMPLEX, MPI.FLOAT); + DOUBLE_COMPLEX.setBasic(Datatype.DOUBLE_COMPLEX, MPI.DOUBLE); + + COMM_WORLD.setType(Intracomm.WORLD); + COMM_SELF.setType(Intracomm.SELF); + } + + /** + * Initialize MPI. + *

Java binding of the MPI operation {@code MPI_INIT}. + * @param args arguments to the {@code main} method. + * @return arguments + * @throws MPIException Signals that an MPI exception of some sort has occurred. + */ + public static String[] Init(String[] args) throws MPIException + { + if(initialized) + throw new MPIException("MPI is already initialized."); + + String[] newArgs = Init_jni(args); + initCommon(); + return newArgs; + } + + private static native String [] Init_jni(String[] args); + + /** + * Initialize MPI with threads. + *

Java binding of the MPI operation {@code MPI_INIT_THREAD}. + * @param args arguments to the {@code main} method. + * @param required desired level of thread support + * @return provided level of thread support + * @throws MPIException Signals that an MPI exception of some sort has occurred. + */ + public static int InitThread(String[] args, int required) throws MPIException + { + if(initialized) + throw new MPIException("MPI is already initialized."); + + int provided = InitThread_jni(args, required); + initCommon(); + return provided; + } + + private static native int InitThread_jni(String[] args, int required) + throws MPIException; + + /** + * Java binding of the MPI operation {@code MPI_QUERY_THREAD}. + * @return provided level of thread support + * @throws MPIException Signals that an MPI exception of some sort has occurred. + */ + public static int queryThread() throws MPIException + { + MPI.check(); + return queryThread_jni(); + } + + private static native int queryThread_jni() throws MPIException; + + /** + * Java binding of the MPI operation {@code MPI_IS_THREAD_MAIN}. + * @return true if it is the main thread + * @throws MPIException Signals that an MPI exception of some sort has occurred. + */ + public static boolean isThreadMain() throws MPIException + { + MPI.check(); + return isThreadMain_jni(); + } + + private static native boolean isThreadMain_jni() throws MPIException; + + /** + * Finalize MPI. + *

Java binding of the MPI operation {@code MPI_FINALIZE}. + * @throws MPIException Signals that an MPI exception of some sort has occurred. + */ + public static void Finalize() throws MPIException + { + check(); + Finalize_jni(); + finalized = true; + } + + private static native void Finalize_jni() throws MPIException; + + /** + * Returns an elapsed time on the calling processor. + *

Java binding of the MPI operation {@code MPI_WTIME}. + * @return time in seconds since an arbitrary time in the past. + * @throws MPIException Signals that an MPI exception of some sort has occurred. + */ + public static double wtime() throws MPIException + { + check(); + return wtime_jni(); + } + + private static native double wtime_jni(); + + /** + * Returns resolution of timer. + *

Java binding of the MPI operation {MPI_WTICK}. + * @return resolution of {@code wtime} in seconds. + * @throws MPIException Signals that an MPI exception of some sort has occurred. + */ + public static double wtick() throws MPIException + { + check(); + return wtick_jni(); + } + + private static native double wtick_jni(); + + /** + * Returns a version object representing the version of MPI being used. + *

Java binding of the MPI operation {@code MPI_GET_VERSION}. + * @return A version object representing the version and subversion of MPI being used. + */ + public static Version getVersion() { + return getVersionJNI(); + } + + private static native Version getVersionJNI(); + + /** + * Returns the version of the MPI Library + *

Java binding of the MPI operation {@code MPI_GET_LIBRARY_VERSION}. + * @return A string representation of the MPI Library + */ + public static String getLibVersion() { + return getLibVersionJNI(); + } + + private static native String getLibVersionJNI(); + + /** + * Returns the name of the processor on which it is called. + *

Java binding of the MPI operation {@code MPI_GET_PROCESSOR_NAME}. + * @return A unique specifier for the actual node. + * @throws MPIException Signals that an MPI exception of some sort has occurred. + */ + static public String getProcessorName() throws MPIException + { + check(); + byte[] buf = new byte[MAX_PROCESSOR_NAME]; + int lengh = getProcessorName(buf); + return new String(buf,0,lengh); + } + + static private native int getProcessorName(byte[] buf); + + /** + * Test if MPI has been initialized. + *

Java binding of the MPI operation {@code MPI_INITIALIZED}. + * @return {@code true} if {@code Init} has been called, + * {@code false} otherwise. + * @throws MPIException Signals that an MPI exception of some sort has occurred. + */ + static public native boolean isInitialized() throws MPIException; + + /** + * Test if MPI has been finalized. + *

Java binding of the MPI operation {@code MPI_FINALIZED}. + * @return {@code true} if {@code Finalize} has been called, + * {@code false} otherwise. + * @throws MPIException Signals that an MPI exception of some sort has occurred. + */ + static public native boolean isFinalized() throws MPIException; + + /** + * Attaches a user-provided buffer for sending. + *

Java binding of the MPI operation {@code MPI_BUFFER_ATTACH}. + * @param buffer initial buffer + * @throws MPIException Signals that an MPI exception of some sort has occurred. + */ + static public void attachBuffer(byte[] buffer) throws MPIException + { + check(); + MPI.buffer = buffer; + attachBuffer_jni(buffer); + } + + static private native void attachBuffer_jni(byte[] buffer); + + /** + * Removes an existing buffer (for use in sending). + *

Java binding of the MPI operation {@code MPI_BUFFER_DETACH}. + * @return initial buffer + * @throws MPIException Signals that an MPI exception of some sort has occurred. + */ + static public byte[] detachBuffer() throws MPIException + { + check(); + detachBuffer_jni(buffer); + byte[] result = MPI.buffer; + MPI.buffer = null; + return result; + } + + static private native void detachBuffer_jni(byte[] buffer); + + /** + * Controls profiling. + *

This method is not implemented. + *

Java binding of the MPI operation {@code MPI_PCONTROL}. + * @param level Profiling level. + * @param obj Profiling information. + */ + public static void pControl(int level, Object obj) + { + // Nothing to do here. + } + + /** + * Check if MPI has been initialized and hasn't been finalized. + * @throws MPIException Signals that an MPI exception of some sort has occurred. + */ + protected static void check() throws MPIException + { + if(!initialized) + throw new MPIException("MPI is not initialized."); + + if(finalized) + throw new MPIException("MPI is finalized."); + } + + protected static byte[] attrSet(Object value) throws MPIException + { + try + { + ByteArrayOutputStream baos = new ByteArrayOutputStream(); + ObjectOutputStream os = new ObjectOutputStream(baos); + os.writeObject(value); + os.close(); + return baos.toByteArray(); + } + catch(IOException ex) + { + MPIException mpiex = new MPIException(ex); + mpiex.setStackTrace(ex.getStackTrace()); + throw mpiex; + } + } + + protected static Object attrGet(byte[] value) throws MPIException + { + if(value == null) + return null; + + try + { + ByteArrayInputStream bais = new ByteArrayInputStream(value); + ObjectInputStream is = new ObjectInputStream(bais); + Object obj = is.readObject(); + is.close(); + return obj; + } + catch(ClassNotFoundException ex) + { + throw new MPIException(ex); + } + catch(IOException ex) + { + throw new MPIException(ex); + } + } + + /** + * Allocates a new direct byte buffer. + * @param capacity The new buffer's capacity, in bytes + * @return The new byte buffer + */ + public static ByteBuffer newByteBuffer(int capacity) + { + ByteBuffer buf = ByteBuffer.allocateDirect(capacity); + buf.order(nativeOrder); + return buf; + } + + /** + * Allocates a new direct char buffer. + * @param capacity The new buffer's capacity, in chars + * @return The new char buffer + */ + public static CharBuffer newCharBuffer(int capacity) + { + assert capacity <= Integer.MAX_VALUE / 2; + ByteBuffer buf = ByteBuffer.allocateDirect(capacity * 2); + buf.order(nativeOrder); + return buf.asCharBuffer(); + } + + /** + * Allocates a new direct short buffer. + * @param capacity The new buffer's capacity, in shorts + * @return The new short buffer + */ + public static ShortBuffer newShortBuffer(int capacity) + { + assert capacity <= Integer.MAX_VALUE / 2; + ByteBuffer buf = ByteBuffer.allocateDirect(capacity * 2); + buf.order(nativeOrder); + return buf.asShortBuffer(); + } + + /** + * Allocates a new direct int buffer. + * @param capacity The new buffer's capacity, in ints + * @return The new int buffer + */ + public static IntBuffer newIntBuffer(int capacity) + { + assert capacity <= Integer.MAX_VALUE / 4; + ByteBuffer buf = ByteBuffer.allocateDirect(capacity * 4); + buf.order(nativeOrder); + return buf.asIntBuffer(); + } + + /** + * Allocates a new direct long buffer. + * @param capacity The new buffer's capacity, in longs + * @return The new long buffer + */ + public static LongBuffer newLongBuffer(int capacity) + { + assert capacity <= Integer.MAX_VALUE / 8; + ByteBuffer buf = ByteBuffer.allocateDirect(capacity * 8); + buf.order(nativeOrder); + return buf.asLongBuffer(); + } + + /** + * Allocates a new direct float buffer. + * @param capacity The new buffer's capacity, in floats + * @return The new float buffer + */ + public static FloatBuffer newFloatBuffer(int capacity) + { + assert capacity <= Integer.MAX_VALUE / 4; + ByteBuffer buf = ByteBuffer.allocateDirect(capacity * 4); + buf.order(nativeOrder); + return buf.asFloatBuffer(); + } + + /** + * Allocates a new direct double buffer. + * @param capacity The new buffer's capacity, in doubles + * @return The new double buffer + */ + public static DoubleBuffer newDoubleBuffer(int capacity) + { + assert capacity <= Integer.MAX_VALUE / 8; + ByteBuffer buf = ByteBuffer.allocateDirect(capacity * 8); + buf.order(nativeOrder); + return buf.asDoubleBuffer(); + } + + /** + * Asserts that a buffer is direct. + * @param buf buffer + */ + protected static void assertDirectBuffer(Buffer buf) + { + if(!buf.isDirect()) + throw new IllegalArgumentException("The buffer must be direct."); + } + + /** + * Asserts that buffers are direct. + * @param sendbuf The send buffer + * @param recvbuf The receive buffer + */ + protected static void assertDirectBuffer(Buffer sendbuf, Buffer recvbuf) + { + if(!sendbuf.isDirect()) + throw new IllegalArgumentException("The send buffer must be direct."); + + if(!recvbuf.isDirect()) + throw new IllegalArgumentException("The recv. buffer must be direct."); + } + + /** + * Checks if an object is a direct buffer. + * @param obj object + * @return true if the object is a direct buffer + */ + protected static boolean isDirectBuffer(Object obj) + { + return obj instanceof Buffer && ((Buffer)obj).isDirect(); + } + + /** + * Checks if an object is a heap buffer. + * @param obj object + * @return true if the object is a heap buffer + */ + protected static boolean isHeapBuffer(Object obj) + { + return obj instanceof Buffer && !((Buffer)obj).isDirect(); + } + + /** + * Creates a new buffer whose content is a shared subsequence of a buffer. + *

The content of the new buffer will start at the specified offset. + * @param buf buffer + * @param offset offset + * @return the new buffer. + */ + public static ByteBuffer slice(ByteBuffer buf, int offset) + { + return ((ByteBuffer)buf.clear().position(offset)) + .slice().order(nativeOrder); + } + + /** + * Creates a new buffer whose content is a shared subsequence of a buffer. + *

The content of the new buffer will start at the specified offset. + * @param buf buffer + * @param offset offset + * @return the new buffer. + */ + public static CharBuffer slice(CharBuffer buf, int offset) + { + return ((CharBuffer)buf.clear().position(offset)).slice(); + } + + /** + * Creates a new buffer whose content is a shared subsequence of a buffer. + *

The content of the new buffer will start at the specified offset. + * @param buf buffer + * @param offset offset + * @return the new buffer. + */ + public static ShortBuffer slice(ShortBuffer buf, int offset) + { + return ((ShortBuffer)buf.clear().position(offset)).slice(); + } + + /** + * Creates a new buffer whose content is a shared subsequence of a buffer. + *

The content of the new buffer will start at the specified offset. + * @param buf buffer + * @param offset offset + * @return the new buffer. + */ + public static IntBuffer slice(IntBuffer buf, int offset) + { + return ((IntBuffer)buf.clear().position(offset)).slice(); + } + + /** + * Creates a new buffer whose content is a shared subsequence of a buffer. + *

The content of the new buffer will start at the specified offset. + * @param buf buffer + * @param offset offset + * @return the new buffer. + */ + public static LongBuffer slice(LongBuffer buf, int offset) + { + return ((LongBuffer)buf.clear().position(offset)).slice(); + } + + /** + * Creates a new buffer whose content is a shared subsequence of a buffer. + *

The content of the new buffer will start at the specified offset. + * @param buf buffer + * @param offset offset + * @return the new buffer. + */ + public static FloatBuffer slice(FloatBuffer buf, int offset) + { + return ((FloatBuffer)buf.clear().position(offset)).slice(); + } + + /** + * Creates a new buffer whose content is a shared subsequence of a buffer. + *

The content of the new buffer will start at the specified offset. + * @param buf buffer + * @param offset offset + * @return the new buffer. + */ + public static DoubleBuffer slice(DoubleBuffer buf, int offset) + { + return ((DoubleBuffer)buf.clear().position(offset)).slice(); + } + + /** + * Creates a new buffer whose content is a shared subsequence of a buffer. + *

The content of the new buffer will start at the specified offset. + * @param buf buffer + * @param offset offset + * @return the new buffer. + */ + public static ByteBuffer slice(byte[] buf, int offset) + { + return ByteBuffer.wrap(buf, offset, buf.length - offset) + .slice().order(nativeOrder); + } + + /** + * Creates a new buffer whose content is a shared subsequence of a buffer. + *

The content of the new buffer will start at the specified offset. + * @param buf buffer + * @param offset offset + * @return the new buffer. + */ + public static CharBuffer slice(char[] buf, int offset) + { + return CharBuffer.wrap(buf, offset, buf.length - offset).slice(); + } + + /** + * Creates a new buffer whose content is a shared subsequence of a buffer. + *

The content of the new buffer will start at the specified offset. + * @param buf buffer + * @param offset offset + * @return the new buffer. + */ + public static ShortBuffer slice(short[] buf, int offset) + { + return ShortBuffer.wrap(buf, offset, buf.length - offset).slice(); + } + + /** + * Creates a new buffer whose content is a shared subsequence of a buffer. + *

The content of the new buffer will start at the specified offset. + * @param buf buffer + * @param offset offset + * @return the new buffer. + */ + public static IntBuffer slice(int[] buf, int offset) + { + return IntBuffer.wrap(buf, offset, buf.length - offset).slice(); + } + + /** + * Creates a new buffer whose content is a shared subsequence of a buffer. + *

The content of the new buffer will start at the specified offset. + * @param buf buffer + * @param offset offset + * @return the new buffer. + */ + public static LongBuffer slice(long[] buf, int offset) + { + return LongBuffer.wrap(buf, offset, buf.length - offset).slice(); + } + + /** + * Creates a new buffer whose content is a shared subsequence of a buffer. + *

The content of the new buffer will start at the specified offset. + * @param buf buffer + * @param offset offset + * @return the new buffer. + */ + public static FloatBuffer slice(float[] buf, int offset) + { + return FloatBuffer.wrap(buf, offset, buf.length - offset).slice(); + } + + /** + * Creates a new buffer whose content is a shared subsequence of a buffer. + *

The content of the new buffer will start at the specified offset. + * @param buf buffer + * @param offset offset + * @return the new buffer. + */ + public static DoubleBuffer slice(double[] buf, int offset) + { + return DoubleBuffer.wrap(buf, offset, buf.length - offset).slice(); + } } // MPI diff --git a/ompi/mpi/java/java/MPIException.java b/ompi/mpi/java/java/MPIException.java index 4cce5bfce46..24ec52f9074 100644 --- a/ompi/mpi/java/java/MPIException.java +++ b/ompi/mpi/java/java/MPIException.java @@ -5,35 +5,37 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. + * Copyright (c) 2015 Los Alamos National Security, LLC. All rights + * reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ - */ -/* + * + * * This file is almost a complete re-write for Open MPI compared to the * original mpiJava package. Its license and copyright are listed below. * See for more information. - */ -/* - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. -*/ -/* + * + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * * File : MPIException.java * Author : Bryan Carpenter * Created : Tue Sep 14 13:03:57 EDT 1999 @@ -52,50 +54,50 @@ */ public final class MPIException extends Exception { -private int errorCode, errorClass; + private int errorCode, errorClass; -protected MPIException(int code, int clazz, String message) -{ - super(message); - errorCode = code; - errorClass = clazz; -} + protected MPIException(int code, int clazz, String message) + { + super(message); + errorCode = code; + errorClass = clazz; + } -/** - * Creates an exception. - * @param message message associated to the exception - */ -public MPIException(String message) -{ - super(message); -} + /** + * Creates an exception. + * @param message message associated to the exception + */ + public MPIException(String message) + { + super(message); + } -/** - * Creates an exception: - * @param cause cause associated to the exception - */ -public MPIException(Throwable cause) -{ - super(cause); - setStackTrace(cause.getStackTrace()); -} + /** + * Creates an exception: + * @param cause cause associated to the exception + */ + public MPIException(Throwable cause) + { + super(cause); + setStackTrace(cause.getStackTrace()); + } -/** - * Gets the MPI error code. - * @return error code - */ -public int getErrorCode() -{ - return errorCode; -} + /** + * Gets the MPI error code. + * @return error code + */ + public int getErrorCode() + { + return errorCode; + } -/** - * Gets the MPI error class. - * @return error class - */ -public int getErrorClass() -{ - return errorClass; -} + /** + * Gets the MPI error class. + * @return error class + */ + public int getErrorClass() + { + return errorClass; + } } // MPIException diff --git a/ompi/mpi/java/java/Makefile.am b/ompi/mpi/java/java/Makefile.am index 99335c52880..bf7d2aaa3e5 100644 --- a/ompi/mpi/java/java/Makefile.am +++ b/ompi/mpi/java/java/Makefile.am @@ -1,10 +1,12 @@ # -*- makefile -*- # # Copyright (c) 2011-2014 Cisco Systems, Inc. All rights reserved. +# Copyright (c) 2015 Los Alamos National Security, LLC. All rights +# reserved. # $COPYRIGHT$ -# +# # Additional copyrights may follow -# +# # $HEADER$ # @@ -23,39 +25,41 @@ include $(top_srcdir)/Makefile.ompi-rules # just list them here in EXTRA_DIST so that they get picked up by # "make dist". JAVA_SRC_FILES = \ - CartComm.java \ - CartParms.java \ - Comm.java \ + CartComm.java \ + CartParms.java \ + Comm.java \ Constant.java \ - Datatype.java \ + Count.java \ + Datatype.java \ DistGraphNeighbors.java \ DoubleInt.java \ DoubleComplex.java \ - Errhandler.java \ + Errhandler.java \ FloatComplex.java \ FloatInt.java \ File.java \ FileView.java \ - Freeable.java \ - GraphComm.java \ - GraphParms.java \ - Group.java \ + Freeable.java \ + GraphComm.java \ + GraphParms.java \ + Group.java \ Info.java \ Int2.java \ - Intercomm.java \ - Intracomm.java \ + Intercomm.java \ + Intracomm.java \ LongInt.java \ Message.java \ - MPI.java \ - MPIException.java \ - Op.java \ - Prequest.java \ - Request.java \ - ShiftParms.java \ + MPI.java \ + MPIException.java \ + Op.java \ + Prequest.java \ + Request.java \ + ShiftParms.java \ ShortInt.java \ - Status.java \ + Status.java \ Struct.java \ UserFunction.java \ + Version.java \ Win.java EXTRA_DIST = $(JAVA_SRC_FILES) @@ -68,26 +72,28 @@ if OMPI_WANT_JAVA_BINDINGS # we have a specific list of files here, as opposed to deriving them # from JAVA_SRC_FILES. JAVA_H = \ - mpi_MPI.h \ - mpi_CartParms.h \ - mpi_CartComm.h \ - mpi_Comm.h \ + mpi_MPI.h \ + mpi_CartParms.h \ + mpi_CartComm.h \ + mpi_Comm.h \ mpi_Constant.h \ - mpi_Datatype.h \ - mpi_Errhandler.h \ + mpi_Count.h \ + mpi_Datatype.h \ + mpi_Errhandler.h \ mpi_File.h \ - mpi_GraphParms.h \ - mpi_GraphComm.h \ - mpi_Group.h \ + mpi_GraphParms.h \ + mpi_GraphComm.h \ + mpi_Group.h \ mpi_Info.h \ - mpi_Intercomm.h \ - mpi_Intracomm.h \ + mpi_Intercomm.h \ + mpi_Intracomm.h \ mpi_Message.h \ - mpi_Op.h \ - mpi_Prequest.h \ - mpi_Request.h \ - mpi_ShiftParms.h \ - mpi_Status.h \ + mpi_Op.h \ + mpi_Prequest.h \ + mpi_Request.h \ + mpi_ShiftParms.h \ + mpi_Status.h \ + mpi_Version.h \ mpi_Win.h # A little verbosity magic; see Makefile.ompi-rules for an explanation. diff --git a/ompi/mpi/java/java/Message.java b/ompi/mpi/java/java/Message.java index 82588b6b75a..9946a671294 100644 --- a/ompi/mpi/java/java/Message.java +++ b/ompi/mpi/java/java/Message.java @@ -5,22 +5,24 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. + * Copyright (c) 2015 Los Alamos National Security, LLC. All rights + * reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ - */ -/* + * + * * IMPLEMENTATION DETAILS - * + * * All methods with buffers that can be direct or non direct have * a companion argument 'db' which is true if the buffer is direct. - * + * * Checking if a buffer is direct is faster in Java than C. */ @@ -34,125 +36,128 @@ */ public final class Message { -protected long handle; -private static long NULL, NO_PROC; - -static -{ - init(); -} - -private static native void init(); - -/** - * Creates a {@code MPI_MESSAGE_NULL}. - */ -public Message() -{ - handle = NULL; -} - -/** - * Tests if the message is {@code MPI_MESSAGE_NULL}. - * @return true if the message is {@code MPI_MESSAGE_NULL}. - */ -public boolean isNull() -{ - return handle == NULL; -} - -/** - * Tests if the message is {@code MPI_MESSAGE_NO_PROC}. - * @return true if the message is {@code MPI_MESSAGE_NO_PROC}. - */ -public boolean isNoProc() -{ - return handle == NO_PROC; -} - -/** - * Java binding of {@code MPI_MPROBE}. - * @param source rank of the source - * @param tag message tag - * @param comm communicator - * @return status object - * @throws MPIException - */ -public Status mProbe(int source, int tag, Comm comm) throws MPIException -{ - MPI.check(); - Status status = new Status(); - handle = mProbe(source, tag, comm.handle, status.data); - return status; -} - -private native long mProbe(int source, int tag, long comm, long[] status) - throws MPIException; - -/** - * Java binding of {@code MPI_IMPROBE}. - * @param source rank of the source - * @param tag message tag - * @param comm communicator - * @return status object if there is a message, {@code null} otherwise - * @throws MPIException - */ -public Status imProbe(int source, int tag, Comm comm) throws MPIException -{ - MPI.check(); - return imProbe(source, tag, comm.handle); -} - -private native Status imProbe(int source, int tag, long comm) - throws MPIException; - -/** - * Java binding of {@code MPI_MRECV}. - * @param buf receive buffer - * @param count number of elements in receve buffer - * @param type datatype of each receive buffer element - * @return status object - */ -public Status mRecv(Object buf, int count, Datatype type) throws MPIException -{ - MPI.check(); - int off = 0; - boolean db = false; - Status status = new Status(); - - if(buf instanceof Buffer && !(db = ((Buffer)buf).isDirect())) - { - off = type.getOffset(buf); - buf = ((Buffer)buf).array(); - } - - handle = mRecv(handle, buf, db, off, count, - type.handle, type.baseType, status.data); - - return status; -} - -private native long mRecv( - long message, Object buf, boolean db, int offset, int count, - long type, int baseType, long[] status) throws MPIException; - -/** - * Java binding of {@code MPI_IMRECV}. - * @param buf receive buffer - * @param count number of elements in receve buffer - * @param type datatype of each receive buffer element - * @return request object - * @throws MPIException - */ -public Request imRecv(Buffer buf, int count, Datatype type) - throws MPIException -{ - MPI.check(); - assertDirectBuffer(buf); - return new Request(imRecv(handle, buf, count, type.handle)); -} - -private native long imRecv(long message, Object buf, int count, long type) - throws MPIException; + protected long handle; + private static long NULL, NO_PROC; + + static + { + init(); + } + + private static native void init(); + + /** + * Creates a {@code MPI_MESSAGE_NULL}. + */ + public Message() + { + handle = NULL; + } + + /** + * Tests if the message is {@code MPI_MESSAGE_NULL}. + * @return true if the message is {@code MPI_MESSAGE_NULL}. + */ + public boolean isNull() + { + return handle == NULL; + } + + /** + * Tests if the message is {@code MPI_MESSAGE_NO_PROC}. + * @return true if the message is {@code MPI_MESSAGE_NO_PROC}. + */ + public boolean isNoProc() + { + return handle == NO_PROC; + } + + /** + * Java binding of {@code MPI_MPROBE}. + * @param source rank of the source + * @param tag message tag + * @param comm communicator + * @return status object + * @throws MPIException Signals that an MPI exception of some sort has occurred. + */ + public Status mProbe(int source, int tag, Comm comm) throws MPIException + { + MPI.check(); + Status status = new Status(); + handle = mProbe(source, tag, comm.handle, status.data); + return status; + } + + private native long mProbe(int source, int tag, long comm, long[] status) + throws MPIException; + + /** + * Java binding of {@code MPI_IMPROBE}. + * @param source rank of the source + * @param tag message tag + * @param comm communicator + * @return status object if there is a message, {@code null} otherwise + * @throws MPIException Signals that an MPI exception of some sort has occurred. + */ + public Status imProbe(int source, int tag, Comm comm) throws MPIException + { + MPI.check(); + return imProbe(source, tag, comm.handle); + } + + private native Status imProbe(int source, int tag, long comm) + throws MPIException; + + /** + * Java binding of {@code MPI_MRECV}. + * @param buf receive buffer + * @param count number of elements in receve buffer + * @param type datatype of each receive buffer element + * @return status object + * @throws MPIException Signals that an MPI exception of some sort has occurred. + */ + public Status mRecv(Object buf, int count, Datatype type) throws MPIException + { + MPI.check(); + int off = 0; + boolean db = false; + Status status = new Status(); + + if(buf instanceof Buffer && !(db = ((Buffer)buf).isDirect())) + { + off = type.getOffset(buf); + buf = ((Buffer)buf).array(); + } + + handle = mRecv(handle, buf, db, off, count, + type.handle, type.baseType, status.data); + + return status; + } + + private native long mRecv( + long message, Object buf, boolean db, int offset, int count, + long type, int baseType, long[] status) throws MPIException; + + /** + * Java binding of {@code MPI_IMRECV}. + * @param buf receive buffer + * @param count number of elements in receve buffer + * @param type datatype of each receive buffer element + * @return request object + * @throws MPIException Signals that an MPI exception of some sort has occurred. + */ + public Request imRecv(Buffer buf, int count, Datatype type) + throws MPIException + { + MPI.check(); + assertDirectBuffer(buf); + Request req = new Request(imRecv(handle, buf, count, type.handle)); + req.addRecvBufRef(buf); + return req; + } + + private native long imRecv(long message, Object buf, int count, long type) + throws MPIException; } // Message diff --git a/ompi/mpi/java/java/Op.java b/ompi/mpi/java/java/Op.java index bc417672425..eb3ccd86638 100644 --- a/ompi/mpi/java/java/Op.java +++ b/ompi/mpi/java/java/Op.java @@ -5,35 +5,37 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. + * Copyright (c) 2015 Los Alamos National Security, LLC. All rights + * reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ - */ -/* + * + * * This file is almost a complete re-write for Open MPI compared to the * original mpiJava package. Its license and copyright are listed below. * See for more information. - */ -/* - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. -*/ -/* + * + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * * File : Op.java * Author : Xinying Li, Sang LIm * Created : Thu Apr 9 12:22:15 1998 @@ -52,80 +54,80 @@ */ public final class Op implements Freeable { -protected final UserFunction uf; -private boolean commute; -private Datatype datatype; -protected long handle; - -static -{ - init(); -} - -private static native void init(); - -protected Op(int type) -{ - getOp(type); - uf = null; - commute = true; -} - -private native void getOp(int type); - -/** - * Bind a user-defined global reduction operation to an {@code Op} object. - *

Java binding of the MPI operation {@code MPI_OP_CREATE}. - * @param function user defined function - * @param commute {@code true} if commutative, {@code false} otherwise - */ -public Op(UserFunction function, boolean commute) -{ - handle = 0; // When JNI code gets the handle it will be initialized. - uf = function; - this.commute = commute; -} - -protected void setDatatype(Datatype t) -{ - datatype = t; -} - -protected void call(Object invec, Object inoutvec, int count) - throws MPIException -{ - if(datatype.baseType == Datatype.BOOLEAN) - { - uf.call(invec, inoutvec, count, datatype); - } - else - { - uf.call(((ByteBuffer)invec).order(ByteOrder.nativeOrder()), - ((ByteBuffer)inoutvec).order(ByteOrder.nativeOrder()), - count, datatype); - } -} - -/** - * Test if the operation is conmutative. - *

Java binding of the MPI operation {@code MPI_OP_COMMUTATIVE}. - * @return {@code true} if commutative, {@code false} otherwise - */ -public boolean isCommutative() -{ - return commute; -} - -/** - * Java binding of the MPI operation {@code MPI_OP_FREE}. - * @throws MPIException - */ -@Override public native void free() throws MPIException; - -/** - * Test if operation object is null. - * @return true if the operation object is null, false otherwise - */ -public native boolean isNull(); + protected final UserFunction uf; + private boolean commute; + private Datatype datatype; + protected long handle; + + static + { + init(); + } + + private static native void init(); + + protected Op(int type) + { + getOp(type); + uf = null; + commute = true; + } + + private native void getOp(int type); + + /** + * Bind a user-defined global reduction operation to an {@code Op} object. + *

Java binding of the MPI operation {@code MPI_OP_CREATE}. + * @param function user defined function + * @param commute {@code true} if commutative, {@code false} otherwise + */ + public Op(UserFunction function, boolean commute) + { + handle = 0; // When JNI code gets the handle it will be initialized. + uf = function; + this.commute = commute; + } + + protected void setDatatype(Datatype t) + { + datatype = t; + } + + protected void call(Object invec, Object inoutvec, int count) + throws MPIException + { + if(datatype.baseType == Datatype.BOOLEAN) + { + uf.call(invec, inoutvec, count, datatype); + } + else + { + uf.call(((ByteBuffer)invec).order(ByteOrder.nativeOrder()), + ((ByteBuffer)inoutvec).order(ByteOrder.nativeOrder()), + count, datatype); + } + } + + /** + * Test if the operation is conmutative. + *

Java binding of the MPI operation {@code MPI_OP_COMMUTATIVE}. + * @return {@code true} if commutative, {@code false} otherwise + */ + public boolean isCommutative() + { + return commute; + } + + /** + * Java binding of the MPI operation {@code MPI_OP_FREE}. + * @throws MPIException Signals that an MPI exception of some sort has occurred. + */ + @Override public native void free() throws MPIException; + + /** + * Test if operation object is null. + * @return true if the operation object is null, false otherwise + */ + public native boolean isNull(); } // Op diff --git a/ompi/mpi/java/java/Prequest.java b/ompi/mpi/java/java/Prequest.java index 342191b99a6..94b36978392 100644 --- a/ompi/mpi/java/java/Prequest.java +++ b/ompi/mpi/java/java/Prequest.java @@ -5,35 +5,37 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. + * Copyright (c) 2015 Los Alamos National Security, LLC. All rights + * reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ - */ -/* + * + * * This file is almost a complete re-write for Open MPI compared to the * original mpiJava package. Its license and copyright are listed below. * See for more information. - */ -/* - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. -*/ -/* + * + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * * File : Prequest.java * Author : Sang Lim, Xinying Li, Bryan Carpenter * Created : Thu Apr 9 12:22:15 1998 @@ -50,43 +52,45 @@ */ public final class Prequest extends Request { -/** - * Constructor used by {@code sendInit}, etc. - */ -protected Prequest(long handle) -{ - super(handle); -} + /** + * Constructor used by {@code sendInit}, etc. + * @param handle Handle for the Prequest object + */ + protected Prequest(long handle) + { + super(handle); + } -/** - * Activate a persistent communication request. - *

Java binding of the MPI operation {@code MPI_START}. - * The communication is completed by using the request in - * one of the {@code wait} or {@code test} operations. - * On successful completion the request becomes inactive again. - * It can be reactivated by a further call to {@code Start}. - */ -public void start() throws MPIException -{ - handle = start(handle); -} + /** + * Activate a persistent communication request. + *

Java binding of the MPI operation {@code MPI_START}. + * The communication is completed by using the request in + * one of the {@code wait} or {@code test} operations. + * On successful completion the request becomes inactive again. + * It can be reactivated by a further call to {@code Start}. + * @throws MPIException Signals that an MPI exception of some sort has occurred. + */ + public void start() throws MPIException + { + handle = start(handle); + } -private native long start(long request) throws MPIException; + private native long start(long request) throws MPIException; -/** - * Activate a list of communication requests. - *

Java binding of the MPI operation {@code MPI_STARTALL}. - * @param requests array of requests - * @throws MPIException - */ -public static void startAll(Prequest[] requests) throws MPIException -{ - MPI.check(); - long[] r = getHandles(requests); - startAll(r); - setHandles(requests, r); -} + /** + * Activate a list of communication requests. + *

Java binding of the MPI operation {@code MPI_STARTALL}. + * @param requests array of requests + * @throws MPIException Signals that an MPI exception of some sort has occurred. + */ + public static void startAll(Prequest[] requests) throws MPIException + { + MPI.check(); + long[] r = getHandles(requests); + startAll(r); + setHandles(requests, r); + } -private native static void startAll(long[] requests) throws MPIException; + private native static void startAll(long[] requests) throws MPIException; } // Prequest diff --git a/ompi/mpi/java/java/Request.java b/ompi/mpi/java/java/Request.java index b9ddee9f819..a1c59fa4a9e 100644 --- a/ompi/mpi/java/java/Request.java +++ b/ompi/mpi/java/java/Request.java @@ -5,44 +5,46 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. + * Copyright (c) 2015 Los Alamos National Security, LLC. All rights + * reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ - */ -/* + * + * * This file is almost a complete re-write for Open MPI compared to the * original mpiJava package. Its license and copyright are listed below. * See for more information. - */ -/* - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. -*/ -/* File : Request.java + * + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * File : Request.java * Author : Sang Lim, Xinying Li, Bryan Carpenter * Created : Thu Apr 9 12:22:15 1998 * Revision : $Revision: 1.11 $ * Updated : $Date: 2001/08/07 16:36:25 $ * Copyright: Northeast Parallel Architectures Center * at Syracuse University 1998 - */ - -/* + * + * + * * Note: in a send request for a buffer containing objects, the primary * `MPI_Request' referenced by `handle' is the request to send the data. * The request to send the header is in the secondary field, `hdrReq'. @@ -50,9 +52,9 @@ * the primary `MPI_Request' is the request to send the header. * The receive of the data is not initiated until a `wait' or `test' * operation succeeds. - */ - -/* + * + * + * * Probably `Request' should be an abstract class, and there should * be several concrete subclasses. At the moment requests are created * in a few different ways, and the differently constructed requests are @@ -61,406 +63,458 @@ package mpi; +import java.nio.Buffer; + /** * Request object. */ public class Request implements Freeable { -protected long handle; - -static -{ - init(); -} - -private static native void init(); - -protected static native long getNull(); - -protected Request(long handle) -{ - this.handle = handle; -} - -/** - * Set the request object to be void. - * Java binding of the MPI operation {@code MPI_REQUEST_FREE}. - */ -@Override public void free() throws MPIException -{ - if(!isNull()) - { - MPI.check(); - handle = free(handle); - } -} - -private native long free(long req) throws MPIException; - -/** - * Mark a pending nonblocking communication for cancellation. - * Java binding of the MPI operation {@code MPI_CANCEL}. - */ -public final void cancel() throws MPIException -{ - MPI.check(); - cancel(handle); -} - -private native void cancel(long request) throws MPIException; - -/** - * Test if request object is null. - * @return true if the request object is null, false otherwise - */ -public final boolean isNull() -{ - return handle == 0 || handle == MPI.REQUEST_NULL.handle; -} - -/** - * Blocks until the operation identified by the request is complete. - *

Java binding of the MPI operation {@code MPI_WAIT}. - *

After the call returns, the request object becomes inactive. - * @return status object - * @throws MPIException - */ -public final Status waitStatus() throws MPIException -{ - MPI.check(); - Status status = new Status(); - handle = waitStatus(handle, status.data); - return status; -} - -private native long waitStatus(long request, long[] stat) throws MPIException; - -/** - * Blocks until the operation identified by the request is complete. - *

Java binding of the MPI operation {@code MPI_WAIT}. - *

After the call returns, the request object becomes inactive. - * @throws MPIException - */ -public final void waitFor() throws MPIException -{ - MPI.check(); - handle = waitFor(handle); -} - -private native long waitFor(long request) throws MPIException; - -/** - * Returns a status object if the operation identified by the request - * is complete, or a null reference otherwise. - *

Java binding of the MPI operation {@code MPI_TEST}. - *

After the call, if the operation is complete (ie, if the return - * value is non-null), the request object becomes inactive. - * @return status object - * @throws MPIException - */ -public final Status testStatus() throws MPIException -{ - MPI.check(); - return testStatus(handle); -} - -private native Status testStatus(long request) throws MPIException; - -/** - * Returns true if the operation identified by the request - * is complete, or false otherwise. - *

Java binding of the MPI operation {@code MPI_TEST}. - *

After the call, if the operation is complete (ie, if the return - * value is true), the request object becomes inactive. - * @return true if the operation identified by the request, false otherwise - * @throws MPIException - */ -public final boolean test() throws MPIException -{ - MPI.check(); - return test(handle); -} - -private native boolean test(long handle) throws MPIException; - -/** - * Blocks until one of the operations associated with the active - * requests in the array has completed. - *

Java binding of the MPI operation {@code MPI_WAITANY}. - *

The index in array of {@code requests} for the request that - * completed can be obtained from the returned status object through - * the {@code Status.getIndex()} method. The corresponding element - * of array of {@code requests} becomes inactive. - * @param requests array of requests - * @return status object - * @throws MPIException - */ -public static Status waitAnyStatus(Request[] requests) throws MPIException -{ - MPI.check(); - long[] r = getHandles(requests); - Status status = new Status(); - waitAnyStatus(r, status.data); - setHandles(requests, r); - return status; -} - -private static native void waitAnyStatus(long[] requests, long[] status) - throws MPIException; - -/** - * Blocks until one of the operations associated with the active - * requests in the array has completed. - *

Java binding of the MPI operation {@code MPI_WAITANY}. - *

The request that completed becomes inactive. - * @param requests array of requests - * @return The index in array of {@code requests} for the request that - * completed. If all of the requests are MPI_REQUEST_NULL, then index - * is returned as {@code MPI.UNDEFINED}. - * @throws MPIException - */ -public static int waitAny(Request[] requests) throws MPIException -{ - MPI.check(); - long[] r = getHandles(requests); - int index = waitAny(r); - setHandles(requests, r); - return index; -} - -private static native int waitAny(long[] requests) throws MPIException; - -/** - * Tests for completion of either one or none of the operations - * associated with active requests. - *

Java binding of the MPI operation {@code MPI_TESTANY}. - *

If some request completed, the index in array of {@code requests} - * for that request can be obtained from the returned status object. - * The corresponding element in array of {@code requests} becomes inactive. - * If no request completed, {testAny} returns {@code null}. - * @param requests array of requests - * @return status object if one request completed, {@code null} otherwise. - * @throws MPIException - */ -public static Status testAnyStatus(Request[] requests) throws MPIException -{ - MPI.check(); - long[] r = getHandles(requests); - Status status = testAnyStatus(r); - setHandles(requests, r); - return status; -} - -private static native Status testAnyStatus(long[] requests) throws MPIException; - -/** - * Tests for completion of either one or none of the operations - * associated with active requests. - *

Java binding of the MPI operation {@code MPI_TESTANY}. - *

If some request completed, ii becomes inactive. - * @param requests array of requests - * @return index of operation that completed, or {@code MPI.UNDEFINED} - * if none completed. - * @throws MPIException - */ -public static int testAny(Request[] requests) throws MPIException -{ - MPI.check(); - long[] r = getHandles(requests); - int index = testAny(r); - setHandles(requests, r); - return index; -} - -private static native int testAny(long[] requests) throws MPIException; - -/** - * Blocks until all of the operations associated with the active - * requests in the array have completed. - *

Java binding of the MPI operation {@code MPI_WAITALL}. - *

On exit, requests become inactive. If the input value of - * array of {@code requests} contains inactive requests, corresponding - * elements of the status array will contain null status references. - * @param requests array of requests - * @return array of statuses - * @throws MPIException - */ -public static Status[] waitAllStatus(Request[] requests) throws MPIException -{ - MPI.check(); - long[] r = getHandles(requests); - Status[] status = waitAllStatus(r); - setHandles(requests, r); - return status; -} - -private static native Status[] waitAllStatus(long[] requests) - throws MPIException; - -/** - * Blocks until all of the operations associated with the active - * requests in the array have completed. - *

Java binding of the MPI operation {@code MPI_WAITALL}. - * @param requests array of requests - * @throws MPIException - */ -public static void waitAll(Request[] requests) throws MPIException -{ - MPI.check(); - long[] r = getHandles(requests); - waitAll(r); - setHandles(requests, r); -} - -private static native void waitAll(long[] requests) throws MPIException; - -/** - * Tests for completion of all of the operations associated - * with active requests. - *

Java binding of the MPI operation {@code MPI_TESTALL}. - *

If all operations have completed, the exit value of the argument array - * is as for {@code waitAllStatus}. - * @param requests array of requests - * @return array of statuses if all operations have completed, - * {@code null} otherwise. - * @throws MPIException - */ -public static Status[] testAllStatus(Request[] requests) throws MPIException -{ - MPI.check(); - long[] r = getHandles(requests); - Status[] status = testAllStatus(r); - setHandles(requests, r); - return status; -} - -private static native Status[] testAllStatus(long[] requests) - throws MPIException; - -/** - * Tests for completion of all of the operations associated - * with active requests. - *

Java binding of the MPI operation {@code MPI_TESTALL}. - * @param requests array of requests - * @return {@code true} if all operations have completed, - * {@code false} otherwise. - * @throws MPIException - */ -public static boolean testAll(Request[] requests) throws MPIException -{ - MPI.check(); - long[] r = getHandles(requests); - boolean completed = testAll(r); - setHandles(requests, r); - return completed; -} - -private static native boolean testAll(long[] requests) throws MPIException; - -/** - * Blocks until at least one of the operations associated with the active - * requests in the array has completed. - *

Java binding of the MPI operation {@code MPI_WAITSOME}. - *

The size of the result array will be the number of operations that - * completed. The index in array of {@code requests} for each request that - * completed can be obtained from the returned status objects through the - * {@code Status.getIndex()} method. The corresponding element in - * array of {@code requests} becomes inactive. - * @param requests array of requests - * @return array of statuses or {@code null} if the number of operations - * completed is {@code MPI_UNDEFINED}. - * @throws MPIException - */ -public static Status[] waitSomeStatus(Request[] requests) throws MPIException -{ - MPI.check(); - long[] r = getHandles(requests); - Status[] status = waitSomeStatus(r); - setHandles(requests, r); - return status; -} - -private static native Status[] waitSomeStatus(long[] requests) - throws MPIException; - -/** - * Blocks until at least one of the operations associated with the active - * active requests in the array has completed. - *

Java binding of the MPI operation {@code MPI_WAITSOME}. - *

The size of the result array will be the number of operations that - * completed. The corresponding element in array of {@code requests} becomes - * inactive. - * @param requests array of requests - * @return array of indexes of {@code requests} that completed or {@code null} - * if the number of operations completed is {@code MPI_UNDEFINED}. - * @throws MPIException - */ -public static int[] waitSome(Request[] requests) throws MPIException -{ - MPI.check(); - long[] r = getHandles(requests); - int[] indexes = waitSome(r); - setHandles(requests, r); - return indexes; -} - -private static native int[] waitSome(long[] requests) throws MPIException; - -/** - * Behaves like {@code waitSome}, except that it returns immediately. - *

Java binding of the MPI operation {@code MPI_TESTSOME}. - *

If no operation has completed, {@code testSome} returns an array of - * length zero, otherwise the return value are as for {@code waitSome}. - * @param requests array of requests - * @return array of statuses - * @throws MPIException - */ -public static Status[] testSomeStatus(Request[] requests) throws MPIException -{ - MPI.check(); - long[] r = getHandles(requests); - Status[] status = testSomeStatus(r); - setHandles(requests, r); - return status; -} - -private static native Status[] testSomeStatus(long[] requests) - throws MPIException; - -/** - * Behaves like {@code waitSome}, except that it returns immediately. - *

Java binding of the MPI operation {@code MPI_TESTSOME}. - *

If no operation has completed, {@code testSome} returns an array of - * length zero, otherwise the return value are as for {@code waitSome}. - * @param requests array of requests - * @return array of indexes of {@code requests} that completed. - * @throws MPIException - */ -public static int[] testSome(Request[] requests) throws MPIException -{ - MPI.check(); - long[] r = getHandles(requests); - int[] indexes = testSome(r); - setHandles(requests, r); - return indexes; -} - -private static native int[] testSome(long[] requests) throws MPIException; - -protected static long[] getHandles(Request[] r) -{ - long[] h = new long[r.length]; - - for(int i = 0; i < r.length; i++) - h[i] = r[i].handle; - - return h; -} - -protected static void setHandles(Request[] r, long[] h) -{ - for(int i = 0; i < r.length; i++) - r[i].handle = h[i]; -} + protected long handle; + protected Buffer sendBuf; + protected Buffer recvBuf; + + static + { + init(); + } + + private static native void init(); + + protected static native long getNull(); + + protected Request(long handle) + { + this.handle = handle; + } + + /** + * Set the request object to be void. + * Java binding of the MPI operation {@code MPI_REQUEST_FREE}. + */ + @Override public void free() throws MPIException + { + if(!isNull()) + { + MPI.check(); + handle = free(handle); + } + } + + private native long free(long req) throws MPIException; + + /** + * Mark a pending nonblocking communication for cancellation. + * Java binding of the MPI operation {@code MPI_CANCEL}. + * @throws MPIException Signals that an MPI exception of some sort has occurred. + */ + public final void cancel() throws MPIException + { + MPI.check(); + cancel(handle); + } + + private native void cancel(long request) throws MPIException; + + /** + * Adds a receive buffer to this Request object. This method + * should be called by the internal api whenever a persistent + * request is created and any time a request object, that has + * an associated buffer, is returned from an opperation to protect + * the buffer from getting prematurely garbage collected. + * @param buf buffer to add to the array list + */ + protected final void addRecvBufRef(Buffer buf) + { + this.recvBuf = buf; + } + + /** + * Adds a send buffer to this Request object. This method + * should be called by the internal api whenever a persistent + * request is created and any time a request object, that has + * an associated buffer, is returned from an opperation to protect + * the buffer from getting prematurely garbage collected. + * @param buf buffer to add to the array list + */ + protected final void addSendBufRef(Buffer buf) + { + this.sendBuf = buf; + } + + /** + * Test if request object is null. + * @return true if the request object is null, false otherwise + */ + public final boolean isNull() + { + return handle == 0 || handle == MPI.REQUEST_NULL.handle; + } + + /** + * Blocks until the operation identified by the request is complete. + *

Java binding of the MPI operation {@code MPI_WAIT}. + *

After the call returns, the request object becomes inactive. + * @return status object + * @throws MPIException Signals that an MPI exception of some sort has occurred. + */ + public final Status waitStatus() throws MPIException + { + MPI.check(); + Status status = new Status(); + handle = waitStatus(handle, status.data); + return status; + } + + private native long waitStatus(long request, long[] stat) throws MPIException; + + /** + * Blocks until the operation identified by the request is complete. + *

Java binding of the MPI operation {@code MPI_WAIT}. + *

After the call returns, the request object becomes inactive. + * @throws MPIException Signals that an MPI exception of some sort has occurred. + */ + public final void waitFor() throws MPIException + { + MPI.check(); + handle = waitFor(handle); + } + + private native long waitFor(long request) throws MPIException; + + /** + * Returns a status object if the operation identified by the request + * is complete, or a null reference otherwise. + *

Java binding of the MPI operation {@code MPI_TEST}. + *

After the call, if the operation is complete (ie, if the return + * value is non-null), the request object becomes inactive. + * @return status object + * @throws MPIException Signals that an MPI exception of some sort has occurred. + */ + public final Status testStatus() throws MPIException + { + MPI.check(); + return testStatus(handle); + } + + private native Status testStatus(long request) throws MPIException; + + /** + * Returns a status object if the operation identified by the request + * is complete, or a null reference otherwise. + *

Java binding of the MPI operation {@code MPI_REQUEST_GET_STATUS}. + *

After the call, if the operation is complete (ie, if the return + * value is non-null), the request object remains active. + * @return status object + * @throws MPIException Signals that an MPI exception of some sort has occurred. + */ + public final Status getStatus() throws MPIException + { + MPI.check(); + return getStatus(handle); + } + + private native Status getStatus(long request) throws MPIException; + + /** + * Returns true if the operation identified by the request + * is complete, or false otherwise. + *

Java binding of the MPI operation {@code MPI_TEST}. + *

After the call, if the operation is complete (ie, if the return + * value is true), the request object becomes inactive. + * @return true if the operation identified by the request, false otherwise + * @throws MPIException Signals that an MPI exception of some sort has occurred. + */ + public final boolean test() throws MPIException + { + MPI.check(); + return test(handle); + } + + private native boolean test(long handle) throws MPIException; + + /** + * Blocks until one of the operations associated with the active + * requests in the array has completed. + *

Java binding of the MPI operation {@code MPI_WAITANY}. + *

The index in array of {@code requests} for the request that + * completed can be obtained from the returned status object through + * the {@code Status.getIndex()} method. The corresponding element + * of array of {@code requests} becomes inactive. + * @param requests array of requests + * @return status object + * @throws MPIException Signals that an MPI exception of some sort has occurred. + */ + public static Status waitAnyStatus(Request[] requests) throws MPIException + { + MPI.check(); + long[] r = getHandles(requests); + Status status = new Status(); + waitAnyStatus(r, status.data); + setHandles(requests, r); + return status; + } + + private static native void waitAnyStatus(long[] requests, long[] status) + throws MPIException; + + /** + * Blocks until one of the operations associated with the active + * requests in the array has completed. + *

Java binding of the MPI operation {@code MPI_WAITANY}. + *

The request that completed becomes inactive. + * @param requests array of requests + * @return The index in array of {@code requests} for the request that + * completed. If all of the requests are MPI_REQUEST_NULL, then index + * is returned as {@code MPI.UNDEFINED}. + * @throws MPIException Signals that an MPI exception of some sort has occurred. + */ + public static int waitAny(Request[] requests) throws MPIException + { + MPI.check(); + long[] r = getHandles(requests); + int index = waitAny(r); + setHandles(requests, r); + return index; + } + + private static native int waitAny(long[] requests) throws MPIException; + + /** + * Tests for completion of either one or none of the operations + * associated with active requests. + *

Java binding of the MPI operation {@code MPI_TESTANY}. + *

If some request completed, the index in array of {@code requests} + * for that request can be obtained from the returned status object. + * The corresponding element in array of {@code requests} becomes inactive. + * If no request completed, {testAny} returns {@code null}. + * @param requests array of requests + * @return status object if one request completed, {@code null} otherwise. + * @throws MPIException Signals that an MPI exception of some sort has occurred. + */ + public static Status testAnyStatus(Request[] requests) throws MPIException + { + MPI.check(); + long[] r = getHandles(requests); + Status status = testAnyStatus(r); + setHandles(requests, r); + return status; + } + + private static native Status testAnyStatus(long[] requests) throws MPIException; + + /** + * Tests for completion of either one or none of the operations + * associated with active requests. + *

Java binding of the MPI operation {@code MPI_TESTANY}. + *

If some request completed, ii becomes inactive. + * @param requests array of requests + * @return index of operation that completed, or {@code MPI.UNDEFINED} + * if none completed. + * @throws MPIException Signals that an MPI exception of some sort has occurred. + */ + public static int testAny(Request[] requests) throws MPIException + { + MPI.check(); + long[] r = getHandles(requests); + int index = testAny(r); + setHandles(requests, r); + return index; + } + + private static native int testAny(long[] requests) throws MPIException; + + /** + * Blocks until all of the operations associated with the active + * requests in the array have completed. + *

Java binding of the MPI operation {@code MPI_WAITALL}. + *

On exit, requests become inactive. If the input value of + * array of {@code requests} contains inactive requests, corresponding + * elements of the status array will contain null status references. + * @param requests array of requests + * @return array of statuses + * @throws MPIException Signals that an MPI exception of some sort has occurred. + */ + public static Status[] waitAllStatus(Request[] requests) throws MPIException + { + MPI.check(); + long[] r = getHandles(requests); + Status[] status = waitAllStatus(r); + setHandles(requests, r); + return status; + } + + private static native Status[] waitAllStatus(long[] requests) + throws MPIException; + + /** + * Blocks until all of the operations associated with the active + * requests in the array have completed. + *

Java binding of the MPI operation {@code MPI_WAITALL}. + * @param requests array of requests + * @throws MPIException Signals that an MPI exception of some sort has occurred. + */ + public static void waitAll(Request[] requests) throws MPIException + { + MPI.check(); + long[] r = getHandles(requests); + waitAll(r); + setHandles(requests, r); + } + + private static native void waitAll(long[] requests) throws MPIException; + + /** + * Tests for completion of all of the operations associated + * with active requests. + *

Java binding of the MPI operation {@code MPI_TESTALL}. + *

If all operations have completed, the exit value of the argument array + * is as for {@code waitAllStatus}. + * @param requests array of requests + * @return array of statuses if all operations have completed, + * {@code null} otherwise. + * @throws MPIException Signals that an MPI exception of some sort has occurred. + */ + public static Status[] testAllStatus(Request[] requests) throws MPIException + { + MPI.check(); + long[] r = getHandles(requests); + Status[] status = testAllStatus(r); + setHandles(requests, r); + return status; + } + + private static native Status[] testAllStatus(long[] requests) + throws MPIException; + + /** + * Tests for completion of all of the operations associated + * with active requests. + *

Java binding of the MPI operation {@code MPI_TESTALL}. + * @param requests array of requests + * @return {@code true} if all operations have completed, + * {@code false} otherwise. + * @throws MPIException Signals that an MPI exception of some sort has occurred. + */ + public static boolean testAll(Request[] requests) throws MPIException + { + MPI.check(); + long[] r = getHandles(requests); + boolean completed = testAll(r); + setHandles(requests, r); + return completed; + } + + private static native boolean testAll(long[] requests) throws MPIException; + + /** + * Blocks until at least one of the operations associated with the active + * requests in the array has completed. + *

Java binding of the MPI operation {@code MPI_WAITSOME}. + *

The size of the result array will be the number of operations that + * completed. The index in array of {@code requests} for each request that + * completed can be obtained from the returned status objects through the + * {@code Status.getIndex()} method. The corresponding element in + * array of {@code requests} becomes inactive. + * @param requests array of requests + * @return array of statuses or {@code null} if the number of operations + * completed is {@code MPI_UNDEFINED}. + * @throws MPIException Signals that an MPI exception of some sort has occurred. + */ + public static Status[] waitSomeStatus(Request[] requests) throws MPIException + { + MPI.check(); + long[] r = getHandles(requests); + Status[] status = waitSomeStatus(r); + setHandles(requests, r); + return status; + } + + private static native Status[] waitSomeStatus(long[] requests) + throws MPIException; + + /** + * Blocks until at least one of the operations associated with the active + * active requests in the array has completed. + *

Java binding of the MPI operation {@code MPI_WAITSOME}. + *

The size of the result array will be the number of operations that + * completed. The corresponding element in array of {@code requests} becomes + * inactive. + * @param requests array of requests + * @return array of indexes of {@code requests} that completed or {@code null} + * if the number of operations completed is {@code MPI_UNDEFINED}. + * @throws MPIException Signals that an MPI exception of some sort has occurred. + */ + public static int[] waitSome(Request[] requests) throws MPIException + { + MPI.check(); + long[] r = getHandles(requests); + int[] indexes = waitSome(r); + setHandles(requests, r); + return indexes; + } + + private static native int[] waitSome(long[] requests) throws MPIException; + + /** + * Behaves like {@code waitSome}, except that it returns immediately. + *

Java binding of the MPI operation {@code MPI_TESTSOME}. + *

If no operation has completed, {@code testSome} returns an array of + * length zero, otherwise the return value are as for {@code waitSome}. + * @param requests array of requests + * @return array of statuses + * @throws MPIException Signals that an MPI exception of some sort has occurred. + */ + public static Status[] testSomeStatus(Request[] requests) throws MPIException + { + MPI.check(); + long[] r = getHandles(requests); + Status[] status = testSomeStatus(r); + setHandles(requests, r); + return status; + } + + private static native Status[] testSomeStatus(long[] requests) + throws MPIException; + + /** + * Behaves like {@code waitSome}, except that it returns immediately. + *

Java binding of the MPI operation {@code MPI_TESTSOME}. + *

If no operation has completed, {@code testSome} returns an array of + * length zero, otherwise the return value are as for {@code waitSome}. + * @param requests array of requests + * @return array of indexes of {@code requests} that completed. + * @throws MPIException Signals that an MPI exception of some sort has occurred. + */ + public static int[] testSome(Request[] requests) throws MPIException + { + MPI.check(); + long[] r = getHandles(requests); + int[] indexes = testSome(r); + setHandles(requests, r); + return indexes; + } + + private static native int[] testSome(long[] requests) throws MPIException; + + protected static long[] getHandles(Request[] r) + { + long[] h = new long[r.length]; + + for(int i = 0; i < r.length; i++) { + if(r[i] != null) + h[i] = r[i].handle; + else + h[i] = 0; + } + + return h; + } + + protected static void setHandles(Request[] r, long[] h) + { + for(int i = 0; i < r.length; i++) + r[i].handle = h[i]; + } } // Request diff --git a/ompi/mpi/java/java/ShiftParms.java b/ompi/mpi/java/java/ShiftParms.java index 30311eda497..37f26572e9e 100644 --- a/ompi/mpi/java/java/ShiftParms.java +++ b/ompi/mpi/java/java/ShiftParms.java @@ -5,35 +5,37 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. + * Copyright (c) 2015 Los Alamos National Security, LLC. All rights + * reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ - */ -/* + * + * * This file is almost a complete re-write for Open MPI compared to the * original mpiJava package. Its license and copyright are listed below. * See for more information. - */ -/* - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. -*/ -/* + * + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * * File : ShiftParms.java * Author : Xinying Li * Created : Thu Apr 9 12:22:15 1998 @@ -50,31 +52,31 @@ */ public final class ShiftParms { -private final int rankSource; -private final int rankDest; + private final int rankSource; + private final int rankDest; -protected ShiftParms(int rankSource, int rankDest) -{ - this.rankSource = rankSource; - this.rankDest = rankDest; -} + protected ShiftParms(int rankSource, int rankDest) + { + this.rankSource = rankSource; + this.rankDest = rankDest; + } -/** - * Gets the source rank. - * @return source rank - */ -public int getRankSource() -{ - return rankSource; -} + /** + * Gets the source rank. + * @return source rank + */ + public int getRankSource() + { + return rankSource; + } -/** - * Gets the destination rank. - * @return destination rank - */ -public int getRankDest() -{ - return rankDest; -} + /** + * Gets the destination rank. + * @return destination rank + */ + public int getRankDest() + { + return rankDest; + } } // ShiftParms diff --git a/ompi/mpi/java/java/ShortInt.java b/ompi/mpi/java/java/ShortInt.java index bb764d4bcc3..18c1a421f05 100644 --- a/ompi/mpi/java/java/ShortInt.java +++ b/ompi/mpi/java/java/ShortInt.java @@ -5,14 +5,16 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. + * Copyright (c) 2015 Los Alamos National Security, LLC. All rights + * reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -23,110 +25,113 @@ */ public final class ShortInt extends Struct { -private final int sSize, iOff, iSize; + private final int sSize, iOff, iSize; -/** - * The struct object will be created only in MPI class. - * @see MPI#shortInt - */ -protected ShortInt(int shortSize, int intOff, int intSize) -{ - sSize = shortSize; - iSize = intSize; - int sOff; + /** + * The struct object will be created only in MPI class. + * @param shortSize short size + * @param intOff int offset + * @param intSize int size + * @see MPI#shortInt + */ + protected ShortInt(int shortSize, int intOff, int intSize) + { + sSize = shortSize; + iSize = intSize; + int sOff; - switch(sSize) - { - case 2: sOff = addShort(); break; - case 4: sOff = addInt(); break; - case 8: sOff = addLong(); break; - default: throw new AssertionError("Unsupported short size: "+ sSize); - } + switch(sSize) + { + case 2: sOff = addShort(); break; + case 4: sOff = addInt(); break; + case 8: sOff = addLong(); break; + default: throw new AssertionError("Unsupported short size: "+ sSize); + } - assert sOff == 0; - setOffset(intOff); + assert sOff == 0; + setOffset(intOff); - switch(iSize) - { - case 4: iOff = addInt(); break; - case 8: iOff = addLong(); break; - default: throw new AssertionError("Unsupported int size: "+ iSize); - } + switch(iSize) + { + case 4: iOff = addInt(); break; + case 8: iOff = addLong(); break; + default: throw new AssertionError("Unsupported int size: "+ iSize); + } - assert(intOff == iOff); -} + assert(intOff == iOff); + } -/** - * Creates a Data object. - * @return new Data object. - */ -@Override protected Data newData() -{ - return new Data(); -} + /** + * Creates a Data object. + * @return new Data object. + */ + @Override protected Data newData() + { + return new Data(); + } -/** - * Class for reading/writing data in a struct stored in a byte buffer. - */ -public final class Data extends Struct.Data -{ - /** - * Gets the short value. - * @return short value - */ - public short getValue() - { - switch(sSize) - { - case 2: return getShort(0); - case 4: return (short)getInt(0); - case 8: return (short)getLong(0); - default: throw new AssertionError(); - } - } + /** + * Class for reading/writing data in a struct stored in a byte buffer. + */ + public final class Data extends Struct.Data + { + /** + * Gets the short value. + * @return short value + */ + public short getValue() + { + switch(sSize) + { + case 2: return getShort(0); + case 4: return (short)getInt(0); + case 8: return (short)getLong(0); + default: throw new AssertionError(); + } + } - /** - * Gets the int value. - * @return int value - */ - public int getIndex() - { - switch(iSize) - { - case 4: return getInt(iOff); - case 8: return (int)getLong(iOff); - default: throw new AssertionError(); - } - } + /** + * Gets the int value. + * @return int value + */ + public int getIndex() + { + switch(iSize) + { + case 4: return getInt(iOff); + case 8: return (int)getLong(iOff); + default: throw new AssertionError(); + } + } - /** - * Puts the short value. - * @param v short value - */ - public void putValue(short v) - { - switch(sSize) - { - case 2: putShort(0, v); break; - case 4: putInt(0, v); break; - case 8: putLong(0, v); break; - default: throw new AssertionError(); - } - } + /** + * Puts the short value. + * @param v short value + */ + public void putValue(short v) + { + switch(sSize) + { + case 2: putShort(0, v); break; + case 4: putInt(0, v); break; + case 8: putLong(0, v); break; + default: throw new AssertionError(); + } + } - /** - * Puts the int value. - * @param v int value - */ - public void putIndex(int v) - { - switch(iSize) - { - case 4: putInt(iOff, v); break; - case 8: putLong(iOff, v); break; - default: throw new AssertionError(); - } - } -} // Data + /** + * Puts the int value. + * @param v int value + */ + public void putIndex(int v) + { + switch(iSize) + { + case 4: putInt(iOff, v); break; + case 8: putLong(iOff, v); break; + default: throw new AssertionError(); + } + } + } // Data } // ShortInt diff --git a/ompi/mpi/java/java/Status.java b/ompi/mpi/java/java/Status.java index 393fcd35181..977542b69f9 100644 --- a/ompi/mpi/java/java/Status.java +++ b/ompi/mpi/java/java/Status.java @@ -5,35 +5,37 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. + * Copyright (c) 2015 Los Alamos National Security, LLC. All rights + * reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ - */ -/* + * + * * This file is almost a complete re-write for Open MPI compared to the * original mpiJava package. Its license and copyright are listed below. * See for more information. - */ -/* - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. -*/ -/* + * + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * * File : Status.java * Author : Sang Lim, Sung-Hoon Ko, Xinying Li, Bryan Carpenter * Created : Thu Apr 9 12:22:15 1998 @@ -50,127 +52,226 @@ */ public final class Status { -protected final long[] data; + protected final long[] data; -static -{ - init(); -} + static + { + init(); + } -private static native void init(); + private static native void init(); -/** - * Status objects must be created only by the MPI methods. - */ -protected Status() -{ - data = new long[6]; -} + /** + * Status objects must be created only by the MPI methods. + */ + protected Status() + { + data = new long[6]; + } -/** - * Returns the number of received entries. - *

Java binding of the MPI operation {@code MPI_GET_COUNT}. - * @param datatype datatype of each item in receive buffer - * @return number of received entries - * @throws MPIException - */ -public int getCount(Datatype datatype) throws MPIException -{ - MPI.check(); - int i = 0; - int source = (int)data[i++]; - int tag = (int)data[i++]; - int error = (int)data[i++]; - int cancelled = (int)data[i++]; - long ucount = data[i++]; - return getCount(source, tag, error, cancelled, ucount, datatype.handle); -} - -private native int getCount( - int source, int tag, int error, - int cancelled, long ucount, long datatype) throws MPIException; + /** + * Returns the number of received entries. + *

Java binding of the MPI operation {@code MPI_GET_COUNT}. + * @param datatype datatype of each item in receive buffer + * @return number of received entries + * @throws MPIException Signals that an MPI exception of some sort has occurred. + */ + public int getCount(Datatype datatype) throws MPIException + { + MPI.check(); + int i = 0; + int source = (int)data[i++]; + int tag = (int)data[i++]; + int error = (int)data[i++]; + int cancelled = (int)data[i++]; + long ucount = data[i++]; + return getCount(source, tag, error, cancelled, ucount, datatype.handle); + } -/** - * Tests if the communication was cancelled. - *

Java binding of the MPI operation {@code MPI_TEST_CANCELLED}. - * @return true if the operation was succesfully cancelled, false otherwise - * @throws MPIException - */ -public boolean isCancelled() throws MPIException -{ - MPI.check(); - int i = 0; - int source = (int)data[i++]; - int tag = (int)data[i++]; - int error = (int)data[i++]; - int cancelled = (int)data[i++]; - long ucount = data[i++]; - return isCancelled(source, tag, error, cancelled, ucount); -} - -private native boolean isCancelled( - int source, int tag, int error, int cancelled, long ucount) - throws MPIException; + private native int getCount( + int source, int tag, int error, + int cancelled, long ucount, long datatype) throws MPIException; -/** - * Retrieves the number of basic elements from status. - *

Java binding of the MPI operation {@code MPI_GET_ELEMENTS}. - * @param datatype datatype used by receive operation - * @return number of received basic elements - * @throws MPIException - */ -public int getElements(Datatype datatype) throws MPIException -{ - MPI.check(); - int i = 0; - int source = (int)data[i++]; - int tag = (int)data[i++]; - int error = (int)data[i++]; - int cancelled = (int)data[i++]; - long ucount = data[i++]; - return getElements(source, tag, error, cancelled, ucount, datatype.handle); -} - -private native int getElements( - int source, int tag, int error, - int cancelled, long ucount, long datatype) throws MPIException; + /** + * Tests if the communication was cancelled. + *

Java binding of the MPI operation {@code MPI_TEST_CANCELLED}. + * @return true if the operation was succesfully cancelled, false otherwise + * @throws MPIException Signals that an MPI exception of some sort has occurred. + */ + public boolean isCancelled() throws MPIException + { + MPI.check(); + int i = 0; + int source = (int)data[i++]; + int tag = (int)data[i++]; + int error = (int)data[i++]; + int cancelled = (int)data[i++]; + long ucount = data[i++]; + return isCancelled(source, tag, error, cancelled, ucount); + } -/** - * Returns the "source" of message. - *

Java binding of the MPI value {@code MPI_SOURCE}. - * @return source of message - */ -public int getSource() -{ - return (int)data[0]; -} + private native boolean isCancelled( + int source, int tag, int error, int cancelled, long ucount) + throws MPIException; -/** - * Returns the "tag" of message. - *

Java binding of the MPI value {@code MPI_TAG}. - * @return tag of message - */ -public int getTag() -{ - return (int)data[1]; -} + /** + * Retrieves the number of basic elements from status. + *

Java binding of the MPI operation {@code MPI_GET_ELEMENTS}. + * @param datatype datatype used by receive operation + * @return number of received basic elements + * @throws MPIException Signals that an MPI exception of some sort has occurred. + */ + public int getElements(Datatype datatype) throws MPIException + { + MPI.check(); + int i = 0; + int source = (int)data[i++]; + int tag = (int)data[i++]; + int error = (int)data[i++]; + int cancelled = (int)data[i++]; + long ucount = data[i++]; + return getElements(source, tag, error, cancelled, ucount, datatype.handle); + } -/** - * Returns the {@code MPI_ERROR} of message. - * @return error of message. - */ -public int getError() -{ - return (int)data[2]; -} + private native int getElements( + int source, int tag, int error, + int cancelled, long ucount, long datatype) throws MPIException; -/** - * Returns the index of message. - * @return index of message. - */ -public int getIndex() -{ - return (int)data[5]; -} + /** + * Retrieves the number of basic elements from status. + *

Java binding of the MPI operation {@code MPI_GET_ELEMENTS_X}. + * @param datatype datatype used by receive operation + * @return number of received basic elements + * @throws MPIException Signals that an MPI exception of some sort has occurred. + */ + public Count getElementsX(Datatype datatype) throws MPIException + { + MPI.check(); + int i = 0; + int source = (int)data[i++]; + int tag = (int)data[i++]; + int error = (int)data[i++]; + int cancelled = (int)data[i++]; + long ucount = data[i++]; + return getElementsX(source, tag, error, cancelled, ucount, datatype.handle); + } + + private native Count getElementsX( + int source, int tag, int error, + int cancelled, long ucount, long datatype) throws MPIException; + + /** + * Sets the number of basic elements for this status object. + *

Java binding of the MPI operation {@code MPI_STATUS_SET_ELEMENTS}. + * @param datatype datatype used by receive operation + * @param count number of elements to associate with the status + * @throws MPIException Signals that an MPI exception of some sort has occurred. + */ + public void setElements(Datatype datatype, int count) throws MPIException + { + MPI.check(); + int i = 0; + int source = (int)data[i++]; + int tag = (int)data[i++]; + int error = (int)data[i++]; + int cancelled = (int)data[i++]; + long ucount = data[i++]; + data[4] = setElements(source, tag, error, cancelled, ucount, datatype.handle, count); + } + + private native int setElements( + int source, int tag, int error, + int cancelled, long ucount, long datatype, int count) throws MPIException; + + /** + * Sets the number of basic elements for this status object. + *

Java binding of the MPI operation {@code MPI_STATUS_SET_ELEMENTS_X}. + * @param datatype datatype used by receive operation + * @param count number of elements to associate with the status + * @throws MPIException Signals that an MPI exception of some sort has occurred. + */ + public void setElementsX(Datatype datatype, Count count) throws MPIException + { + MPI.check(); + int i = 0; + int source = (int)data[i++]; + int tag = (int)data[i++]; + int error = (int)data[i++]; + int cancelled = (int)data[i++]; + long ucount = data[i++]; + data[4] = setElementsX(source, tag, error, cancelled, ucount, datatype.handle, count.getCount()); + } + + private native long setElementsX( + int source, int tag, int error, + int cancelled, long ucount, long datatype, long count) throws MPIException; + + /** + * Sets the cancelled flag. + *

Java binding of the MPI operation {@code MPI_STATUS_SET_CANCELLED}. + * @param flag if true indicates request was cancelled + * @throws MPIException Signals that an MPI exception of some sort has occurred. + */ + public void setCancelled(boolean flag) throws MPIException + { + MPI.check(); + int i = 0; + int source = (int)data[i++]; + int tag = (int)data[i++]; + int error = (int)data[i++]; + int cancelled = (int)data[i++]; + long ucount = data[i++]; + + if(flag) { + setCancelled(source, tag, error, cancelled, ucount, 1); + data[3] = 1; + } else { + setCancelled(source, tag, error, cancelled, ucount, 0); + data[3] = 0; + } + + } + + private native void setCancelled( + int source, int tag, int error, + int cancelled, long ucount, int flag) throws MPIException; + + /** + * Returns the "source" of message. + *

Java binding of the MPI value {@code MPI_SOURCE}. + * @return source of message + */ + public int getSource() + { + return (int)data[0]; + } + + /** + * Returns the "tag" of message. + *

Java binding of the MPI value {@code MPI_TAG}. + * @return tag of message + */ + public int getTag() + { + return (int)data[1]; + } + + /** + * Returns the {@code MPI_ERROR} of message. + * @return error of message. + */ + public int getError() + { + return (int)data[2]; + } + + /** + * Returns the index of message. + * @return index of message. + */ + public int getIndex() + { + return (int)data[5]; + } } // Status diff --git a/ompi/mpi/java/java/Struct.java b/ompi/mpi/java/java/Struct.java index a65d76b4fbc..b7dfefe4d4b 100644 --- a/ompi/mpi/java/java/Struct.java +++ b/ompi/mpi/java/java/Struct.java @@ -5,14 +5,16 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. + * Copyright (c) 2015 Los Alamos National Security, LLC. All rights + * reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -26,769 +28,775 @@ */ public abstract class Struct { -private int extent; -private ArrayList fields = new ArrayList(); - -private Datatype datatype, types[]; -private int offsets[], lengths[]; -private static final String typeMismatch = "Type mismatch"; - -private void commit() throws MPIException -{ - if(datatype == null) - createStruct(); -} - -private void createStruct() throws MPIException -{ - int count = fields.size(); - types = new Datatype[count]; - offsets = new int[count]; - lengths = new int[count]; - - for(int i = 0; i < count; i++) - { - Field f = fields.get(i); - - types[i] = f.type instanceof Struct ? ((Struct)f.type).datatype - : (Datatype)f.type; - offsets[i] = f.offset; - lengths[i] = f.length; - } - - datatype = Datatype.createStruct(lengths, offsets, types); - datatype.commit(); - extent = datatype.getExtent(); -} - -/** - * Returns the extent of the struct data type. - * @return Extent of the struct data type. - * @throws MPIException - */ -public final int getExtent() throws MPIException -{ - commit(); - return extent; -} - -/** - * Returns the data type of the struct. - * @return The data type of the struct. - * @throws MPIException - */ -public final Datatype getType() throws MPIException -{ - commit(); - return datatype; -} - -/** - * Creates a Data object. - * @return New Data object. - */ -protected abstract Data newData(); - -@SuppressWarnings("unchecked") -private T newData(ByteBuffer buffer, int offset) -{ - Data d = newData(); - d.buffer = buffer; - d.offset = offset; - return (T)d; -} - -/** - * Gets a Data object in order to access to the buffer. - * @param buffer the Data object will read/write on this buffer. - * @return Data object - * @throws MPIException - */ -public final T getData(ByteBuffer buffer) throws MPIException -{ - commit(); - return newData(buffer, 0); -} - -/** - * Gets a Data object in order to access to the struct at the - * specified position of a struct array stored in a Buffer. - * @param buffer The Data object will read/write on this buffer. - * @param index Index of the struct in the buffer. - * @return Data object. - * @throws MPIException - */ -public final T getData(ByteBuffer buffer, int index) - throws MPIException -{ - commit(); - return newData(buffer, index * extent); -} - -/** - * Gets a Data object in order to access to the byte array. - * @param array The Data object will read/write on this byte array. - * @return Data object. - * @throws MPIException - */ -public final T getData(byte[] array) throws MPIException -{ - ByteBuffer buffer = ByteBuffer.wrap(array); - buffer.order(ByteOrder.nativeOrder()); - return getData(buffer); -} - -/** - * Gets a Data object in order to access to the struct at the - * specified position of a struct array stored in a byte array. - * @param array The Data object will read/write on this byte array. - * @param index Index of the struct in the array. - * @return Data object. - * @throws MPIException - */ -public final T getData(byte[] array, int index) - throws MPIException -{ - ByteBuffer buffer = ByteBuffer.wrap(array); - buffer.order(ByteOrder.nativeOrder()); - return getData(buffer, index); -} - -private int addField(Object type, int typeExtent, int length) -{ - if(datatype != null) - throw new AssertionError("The struct data type was committed."); - - int offset = extent; - extent += typeExtent * length; - fields.add(new Field(type, offset, length)); - return offset; -} - -/** - * Sets the offset of the next field. - *

The offset must be greater or equal to the accumulated extent. - * @param offset offset of the next field - * @return this object in order to allow adding fields in a chained expression - */ -public final Struct setOffset(int offset) -{ - if(datatype != null) - throw new AssertionError("The struct data type was committed."); - - if(offset < extent) - { - throw new IllegalArgumentException( - "The offset must be greater or equal to the accumulated extent."); - } - - extent = offset; - return this; -} - -/** - * Adds a byte field to this struct. - * @return Offset of the new field. - */ -public final int addByte() -{ - return addByte(1); -} - -/** - * Adds a byte array to this struct. - * @param length Length of the array. - * @return Offset of the new field. - */ -public final int addByte(int length) -{ - return addField(MPI.BYTE, 1, length); -} - -/** - * Adds a char field to this struct. - * @return Offset of the new field. - */ -public final int addChar() -{ - return addChar(1); -} - -/** - * Adds a char array to this struct. - * @param length Length of the array. - * @return Offset of the new field. - */ -public final int addChar(int length) -{ - return addField(MPI.CHAR, 2, length); -} - -/** - * Adds a short field to this struct. - * @return Offset of the new field. - */ -public final int addShort() -{ - return addShort(1); -} - -/** - * Adds a short array to this struct. - * @param length Length of the array. - * @return Offset of the new field. - */ -public final int addShort(int length) -{ - return addField(MPI.SHORT, 2, length); -} - -/** - * Adds an int field to this struct. - * @return Offset of the new field. - */ -public final int addInt() -{ - return addInt(1); -} - -/** - * Adds an int array to this struct. - * @param length Length of the array. - * @return Offset of the new field. - */ -public final int addInt(int length) -{ - return addField(MPI.INT, 4, length); -} - -/** - * Adds a long field to this struct. - * @return Offset of the new field. - */ -public final int addLong() -{ - return addLong(1); -} - -/** - * Adds a long array to this struct. - * @param length Length of the array. - * @return Offset of the new field. - */ -public final int addLong(int length) -{ - return addField(MPI.LONG, 8, length); -} - -/** - * Adds a float field to this struct. - * @return Offset of the new field. - */ -public final int addFloat() -{ - return addFloat(1); -} - -/** - * Adds a float array to this struct. - * @param length Length of the array. - * @return Offset of the new field. - */ -public final int addFloat(int length) -{ - return addField(MPI.FLOAT, 4, length); -} - -/** - * Adds a double field to this struct. - * @return Offset of the new field. - */ -public final int addDouble() -{ - return addDouble(1); -} - -/** - * Adds a double array to this struct. - * @param length Length of the array. - * @return Offset of the new field. - */ -public final int addDouble(int length) -{ - return addField(MPI.DOUBLE, 8, length); -} - -/** - * Adds a struct field to this struct. - * @param struct Type of the field. - * @return Offset of the new field. - * @throws MPIException - */ -public final int addStruct(Struct struct) throws MPIException -{ - return addStruct(struct, 1); -} - -/** - * Adds an array of structs to this struct. - * @param struct Type of the array. - * @param length Length of the array. - * @return Offset of the new field. - * @throws MPIException - */ -public final int addStruct(Struct struct, int length) throws MPIException -{ - struct.commit(); - return addField(struct, struct.extent, length); -} - -/** - * Adds a field of the specified data type. - * @param type Data type. - * @return Offset of the new field. - * @throws MPIException - */ -public final int addData(Datatype type) throws MPIException -{ - return addData(type, 1); -} - -/** - * Adds an array of the specified data type. - * @param type Data type. - * @param length Length of the array. - * @return Offset of the new field. - * @throws MPIException - */ -public final int addData(Datatype type, int length) throws MPIException -{ - return addField(type, type.getExtent() * type.baseSize, length); -} - -private boolean validType(int fieldOffset, int index, Datatype type) -{ - int i = Arrays.binarySearch(offsets, fieldOffset); - return index >= 0 && index < lengths[i] && type == types[i]; -} - -private static class Field -{ - private Object type; - private int offset, length; - - private Field(Object type, int offset, int length) - { - this.type = type; - this.offset = offset; - this.length = length; - } - -} // Field - -/** - * Base class for reading/writing data in a struct stored in a byte buffer. - */ -public abstract class Data -{ - private ByteBuffer buffer; - private int offset; - - /** - * Gets the buffer where this struct data is stored. - *

The buffer can be used in {@code send}/{@code recv} operations. - * @return Buffer where the struct data is stored. - */ - public final ByteBuffer getBuffer() - { - return offset == 0 ? buffer : MPI.slice(buffer, offset); - } - - /** - * Gets the byte value of a field. - * @param field Offset of the field. - * @return Byte value. - */ - protected final byte getByte(int field) - { - assert validType(field, 0, MPI.BYTE) : typeMismatch; - return buffer.get(offset + field); - } - - /** - * Gets the byte value at the specified position of a byte array. - * @param field Offset of the byte array. - * @param index Index of the byte in the array. - * @return Byte value. - */ - protected final byte getByte(int field, int index) - { - assert validType(field, index, MPI.BYTE) : typeMismatch; - return buffer.get(offset + field + index); - } - - /** - * Puts a byte value in a field. - * @param field Offset of the field. - * @param v Byte value. - */ - protected final void putByte(int field, byte v) - { - assert validType(field, 0, MPI.BYTE) : typeMismatch; - buffer.put(offset + field, v); - } - - /** - * Puts a byte value at the specified position of a byte array. - * @param field Offset of the byte array. - * @param index Index of the byte in the array. - * @param v Byte value. - */ - protected final void putByte(int field, int index, byte v) - { - assert validType(field, index, MPI.BYTE) : typeMismatch; - buffer.put(offset + field + index, v); - } - - /** - * Gets the char value of a field. - * @param field Offset of the field. - * @return Char value. - */ - protected final char getChar(int field) - { - assert validType(field, 0, MPI.CHAR) : typeMismatch; - return buffer.getChar(offset + field); - } - - /** - * Gets the char value at the specified position of a char array. - * @param field Offset of the char array. - * @param index Index of the char in the array. - * @return Char value. - */ - protected final char getChar(int field, int index) - { - assert validType(field, index, MPI.CHAR) : typeMismatch; - return buffer.getChar(offset + field + index * 2); - } - - /** - * Puts a char value in a field. - * @param field Offset of the field. - * @param v Char value. - */ - protected final void putChar(int field, char v) - { - assert validType(field, 0, MPI.CHAR) : typeMismatch; - buffer.putChar(offset + field, v); - } - - /** - * Puts a char value at the specified position of a char array. - * @param field Offset of the char array. - * @param index Index of the char in the array. - * @param v Char value. - */ - protected final void putChar(int field, int index, char v) - { - assert validType(field, index, MPI.CHAR) : typeMismatch; - buffer.putChar(offset + field + index * 2, v); - } - - /** - * Gets the short value of a field. - * @param field Offset of the field. - * @return Short value. - */ - protected final short getShort(int field) - { - assert validType(field, 0, MPI.SHORT) : typeMismatch; - return buffer.getShort(offset + field); - } - - /** - * Gets the short value at the specified position of a short array. - * @param field Offset of the short array. - * @param index Index of the short in the array. - * @return Short value. - */ - protected final short getShort(int field, int index) - { - assert validType(field, index, MPI.SHORT) : typeMismatch; - return buffer.getShort(offset + field + index * 2); - } - - /** - * Puts a short value in a field. - * @param field Offset of the field. - * @param v Short value. - */ - protected final void putShort(int field, short v) - { - assert validType(field, 0, MPI.SHORT) : typeMismatch; - buffer.putShort(offset + field, v); - } - - /** - * Puts a short value at the specified position of a short array. - * @param field Offset of the short array. - * @param index Index of the short in the array. - * @param v Short value. - */ - protected final void putShort(int field, int index, short v) - { - assert validType(field, index, MPI.SHORT) : typeMismatch; - buffer.putShort(offset + field + index * 2, v); - } - - /** - * Gets the int value of a field. - * @param field Offset of the field. - * @return Int value. - */ - protected final int getInt(int field) - { - assert validType(field, 0, MPI.INT) : typeMismatch; - return buffer.getInt(offset + field); - } - - /** - * Gets the int value at the specified position of an int array. - * @param field Offset of the int array. - * @param index Index of the int in the array. - * @return Int value. - */ - protected final int getInt(int field, int index) - { - assert validType(field, index, MPI.INT) : typeMismatch; - return buffer.getInt(offset + field + index * 4); - } - - /** - * Puts an int value in a field. - * @param field Offset of the field. - * @param v Int value. - */ - protected final void putInt(int field, int v) - { - assert validType(field, 0, MPI.INT) : typeMismatch; - buffer.putInt(offset + field, v); - } - - /** - * Puts an int value at the specified position of an int array. - * @param field Offset of the int array. - * @param index Index of the int in the array. - * @param v Int value. - */ - protected final void putInt(int field, int index, int v) - { - assert validType(field, index, MPI.INT) : typeMismatch; - buffer.putInt(offset + field + index * 4, v); - } - - /** - * Gets the long value of a field. - * @param field Offset of the field. - * @return Long value. - */ - protected final long getLong(int field) - { - assert validType(field, 0, MPI.LONG) : typeMismatch; - return buffer.getLong(offset + field); - } - - /** - * Gets the long value at the specified position of a long array. - * @param field Offset of the long array. - * @param index Index of the long in the array. - * @return Long value. - */ - protected final long getLong(int field, int index) - { - assert validType(field, index, MPI.LONG) : typeMismatch; - return buffer.getLong(offset + field + index * 8); - } - - /** - * Puts a long value in a field. - * @param field Offset of the field. - * @param v Long value. - */ - protected final void putLong(int field, long v) - { - assert validType(field, 0, MPI.LONG) : typeMismatch; - buffer.putLong(offset + field, v); - } - - /** - * Puts a long value at the specified position of a long array. - * @param field Offset of the long array. - * @param index Index of the long in the array. - * @param v Long value. - */ - protected final void putLong(int field, int index, long v) - { - assert validType(field, index, MPI.LONG) : typeMismatch; - buffer.putLong(offset + field + index * 8, v); - } - - /** - * Gets the float value of a field. - * @param field Offset of the field. - * @return Float value. - */ - protected final float getFloat(int field) - { - assert validType(field, 0, MPI.FLOAT) : typeMismatch; - return buffer.getFloat(offset + field); - } - - /** - * Gets the float value at the specified position of a float array. - * @param field Offset of the float array. - * @param index Index of the float in the array. - * @return Float value. - */ - protected final float getFloat(int field, int index) - { - assert validType(field, index, MPI.FLOAT) : typeMismatch; - return buffer.getFloat(offset + field + index * 4); - } - - /** - * Puts a float value in a field. - * @param field Offset of the field. - * @param v Float value. - */ - protected final void putFloat(int field, float v) - { - assert validType(field, 0, MPI.FLOAT) : typeMismatch; - buffer.putFloat(offset + field, v); - } - - /** - * Puts a float value at the specified position of a float array. - * @param field Offset of the float array. - * @param index Index of the float in the array. - * @param v Float value. - */ - protected final void putFloat(int field, int index, float v) - { - assert validType(field, index, MPI.FLOAT) : typeMismatch; - buffer.putFloat(offset + field + index * 4, v); - } - - /** - * Gets the double value of a field. - * @param field Offset of the field. - * @return Double value. - */ - protected final double getDouble(int field) - { - assert validType(field, 0, MPI.DOUBLE) : typeMismatch; - return buffer.getDouble(offset + field); - } - - /** - * Gets the double value at the specified position of a double array. - * @param field Offset of the double array. - * @param index Index of the double in the array. - * @return Double value. - */ - protected final double getDouble(int field, int index) - { - assert validType(field, index, MPI.DOUBLE) : typeMismatch; - return buffer.getDouble(offset + field + index * 8); - } - - /** - * Puts a double value in a field. - * @param field Offset of the field. - * @param v Double value. - */ - protected final void putDouble(int field, double v) - { - assert validType(field, 0, MPI.DOUBLE) : typeMismatch; - buffer.putDouble(offset + field, v); - } - - /** - * Puts a double value at the specified position of a double array. - * @param field Offset of the double array. - * @param index Index of the double in the array. - * @param v Double value. - */ - protected final void putDouble(int field, int index, double v) - { - assert validType(field, index, MPI.DOUBLE) : typeMismatch; - buffer.putDouble(offset + field + index * 8, v); - } - - /** - * Gets the struct data of a field. - * @param struct Struct type. - * @param field Offset of the field. - * @return Struct data. - */ - protected final - D getData(S struct, int field) - { - Struct s = (Struct)struct; - assert validType(field, 0, s.datatype) : typeMismatch; - return s.newData(buffer, offset + field); - } - - /** - * Gets the struct data at the specified position of a struct array. - * @param struct Struct type. - * @param field Offset of the struct array. - * @param index Index of the struct in the array. - * @return Struct data. - */ - protected final - D getData(S struct, int field, int index) - { - Struct s = (Struct)struct; - assert validType(field, index, s.datatype) : typeMismatch; - return s.newData(buffer, offset + field + index * s.extent); - } - - /** - * Gets the buffer of a field. - *

The buffer can be used in {@code send}/{@code recv} operations. - * @param type Data type of the buffer. - * @param field Offset of the field. - * @return Buffer object. - */ - protected final ByteBuffer getBuffer(Datatype type, int field) - { - assert validType(field, 0, type) : typeMismatch; - int position = offset + field; - return position == 0 ? buffer : MPI.slice(buffer, position); - } - - /** - * Gets the buffer data at the specified position of a buffer array. - *

The buffer can be used in {@code send}/{@code recv} operations. - * @param type Data type of the buffer. - * @param field Offset of the buffer array. - * @param index Index of the buffer in the array. - * @return Buffer object. - * @throws MPIException - */ - protected final ByteBuffer getBuffer(Datatype type, int field, int index) - throws MPIException - { - assert validType(field, index, type) : typeMismatch; - - int extent = type.getExtent() * type.baseSize, - position = offset + field + index * extent; - - return position == 0 ? buffer : MPI.slice(buffer, position); - } - -} // Data + private int extent; + private ArrayList fields = new ArrayList(); + + private Datatype datatype, types[]; + private int offsets[], lengths[]; + private static final String typeMismatch = "Type mismatch"; + + private void commit() throws MPIException + { + if(datatype == null) + createStruct(); + } + + private void createStruct() throws MPIException + { + int count = fields.size(); + types = new Datatype[count]; + offsets = new int[count]; + lengths = new int[count]; + + for(int i = 0; i < count; i++) + { + Field f = fields.get(i); + + types[i] = f.type instanceof Struct ? ((Struct)f.type).datatype + : (Datatype)f.type; + offsets[i] = f.offset; + lengths[i] = f.length; + } + + datatype = Datatype.createStruct(lengths, offsets, types); + datatype.commit(); + extent = datatype.getExtent(); + } + + /** + * Returns the extent of the struct data type. + * @return Extent of the struct data type. + * @throws MPIException Signals that an MPI exception of some sort has occurred. + */ + public final int getExtent() throws MPIException + { + commit(); + return extent; + } + + /** + * Returns the data type of the struct. + * @return The data type of the struct. + * @throws MPIException Signals that an MPI exception of some sort has occurred. + */ + public final Datatype getType() throws MPIException + { + commit(); + return datatype; + } + + /** + * Creates a Data object. + * @return New Data object. + */ + protected abstract Data newData(); + + @SuppressWarnings("unchecked") + private T newData(ByteBuffer buffer, int offset) + { + Data d = newData(); + d.buffer = buffer; + d.offset = offset; + return (T)d; + } + + @SuppressWarnings("javadoc") + /** + * Gets a Data object in order to access to the buffer. + * @param buffer the Data object will read/write on this buffer. + * @return Data object + * @throws MPIException Signals that an MPI exception of some sort has occurred. + */ + public final T getData(ByteBuffer buffer) throws MPIException + { + commit(); + return newData(buffer, 0); + } + + @SuppressWarnings("javadoc") + /** + * Gets a Data object in order to access to the struct at the + * specified position of a struct array stored in a Buffer. + * @param buffer The Data object will read/write on this buffer. + * @param index Index of the struct in the buffer. + * @return Data object. + * @throws MPIException Signals that an MPI exception of some sort has occurred. + */ + public final T getData(ByteBuffer buffer, int index) + throws MPIException + { + commit(); + return newData(buffer, index * extent); + } + + @SuppressWarnings("javadoc") + /** + * Gets a Data object in order to access to the byte array. + * @param array The Data object will read/write on this byte array. + * @return Data object. + * @throws MPIException Signals that an MPI exception of some sort has occurred. + */ + public final T getData(byte[] array) throws MPIException + { + ByteBuffer buffer = ByteBuffer.wrap(array); + buffer.order(ByteOrder.nativeOrder()); + return getData(buffer); + } + + @SuppressWarnings("javadoc") + /** + * Gets a Data object in order to access to the struct at the + * specified position of a struct array stored in a byte array. + * @param array The Data object will read/write on this byte array. + * @param index Index of the struct in the array. + * @return Data object. + * @throws MPIException Signals that an MPI exception of some sort has occurred. + */ + public final T getData(byte[] array, int index) + throws MPIException + { + ByteBuffer buffer = ByteBuffer.wrap(array); + buffer.order(ByteOrder.nativeOrder()); + return getData(buffer, index); + } + + private int addField(Object type, int typeExtent, int length) + { + if(datatype != null) + throw new AssertionError("The struct data type was committed."); + + int offset = extent; + extent += typeExtent * length; + fields.add(new Field(type, offset, length)); + return offset; + } + + /** + * Sets the offset of the next field. + *

The offset must be greater or equal to the accumulated extent. + * @param offset offset of the next field + * @return this object in order to allow adding fields in a chained expression + */ + public final Struct setOffset(int offset) + { + if(datatype != null) + throw new AssertionError("The struct data type was committed."); + + if(offset < extent) + { + throw new IllegalArgumentException( + "The offset must be greater or equal to the accumulated extent."); + } + + extent = offset; + return this; + } + + /** + * Adds a byte field to this struct. + * @return Offset of the new field. + */ + public final int addByte() + { + return addByte(1); + } + + /** + * Adds a byte array to this struct. + * @param length Length of the array. + * @return Offset of the new field. + */ + public final int addByte(int length) + { + return addField(MPI.BYTE, 1, length); + } + + /** + * Adds a char field to this struct. + * @return Offset of the new field. + */ + public final int addChar() + { + return addChar(1); + } + + /** + * Adds a char array to this struct. + * @param length Length of the array. + * @return Offset of the new field. + */ + public final int addChar(int length) + { + return addField(MPI.CHAR, 2, length); + } + + /** + * Adds a short field to this struct. + * @return Offset of the new field. + */ + public final int addShort() + { + return addShort(1); + } + + /** + * Adds a short array to this struct. + * @param length Length of the array. + * @return Offset of the new field. + */ + public final int addShort(int length) + { + return addField(MPI.SHORT, 2, length); + } + + /** + * Adds an int field to this struct. + * @return Offset of the new field. + */ + public final int addInt() + { + return addInt(1); + } + + /** + * Adds an int array to this struct. + * @param length Length of the array. + * @return Offset of the new field. + */ + public final int addInt(int length) + { + return addField(MPI.INT, 4, length); + } + + /** + * Adds a long field to this struct. + * @return Offset of the new field. + */ + public final int addLong() + { + return addLong(1); + } + + /** + * Adds a long array to this struct. + * @param length Length of the array. + * @return Offset of the new field. + */ + public final int addLong(int length) + { + return addField(MPI.LONG, 8, length); + } + + /** + * Adds a float field to this struct. + * @return Offset of the new field. + */ + public final int addFloat() + { + return addFloat(1); + } + + /** + * Adds a float array to this struct. + * @param length Length of the array. + * @return Offset of the new field. + */ + public final int addFloat(int length) + { + return addField(MPI.FLOAT, 4, length); + } + + /** + * Adds a double field to this struct. + * @return Offset of the new field. + */ + public final int addDouble() + { + return addDouble(1); + } + + /** + * Adds a double array to this struct. + * @param length Length of the array. + * @return Offset of the new field. + */ + public final int addDouble(int length) + { + return addField(MPI.DOUBLE, 8, length); + } + + /** + * Adds a struct field to this struct. + * @param struct Type of the field. + * @return Offset of the new field. + * @throws MPIException Signals that an MPI exception of some sort has occurred. + */ + public final int addStruct(Struct struct) throws MPIException + { + return addStruct(struct, 1); + } + + /** + * Adds an array of structs to this struct. + * @param struct Type of the array. + * @param length Length of the array. + * @return Offset of the new field. + * @throws MPIException Signals that an MPI exception of some sort has occurred. + */ + public final int addStruct(Struct struct, int length) throws MPIException + { + struct.commit(); + return addField(struct, struct.extent, length); + } + + /** + * Adds a field of the specified data type. + * @param type Data type. + * @return Offset of the new field. + * @throws MPIException Signals that an MPI exception of some sort has occurred. + */ + public final int addData(Datatype type) throws MPIException + { + return addData(type, 1); + } + + /** + * Adds an array of the specified data type. + * @param type Data type. + * @param length Length of the array. + * @return Offset of the new field. + * @throws MPIException Signals that an MPI exception of some sort has occurred. + */ + public final int addData(Datatype type, int length) throws MPIException + { + return addField(type, type.getExtent() * type.baseSize, length); + } + + private boolean validType(int fieldOffset, int index, Datatype type) + { + int i = Arrays.binarySearch(offsets, fieldOffset); + return index >= 0 && index < lengths[i] && type == types[i]; + } + + private static class Field + { + private Object type; + private int offset, length; + + private Field(Object type, int offset, int length) + { + this.type = type; + this.offset = offset; + this.length = length; + } + + } // Field + + /** + * Base class for reading/writing data in a struct stored in a byte buffer. + */ + public abstract class Data + { + private ByteBuffer buffer; + private int offset; + + /** + * Gets the buffer where this struct data is stored. + *

The buffer can be used in {@code send}/{@code recv} operations. + * @return Buffer where the struct data is stored. + */ + public final ByteBuffer getBuffer() + { + return offset == 0 ? buffer : MPI.slice(buffer, offset); + } + + /** + * Gets the byte value of a field. + * @param field Offset of the field. + * @return Byte value. + */ + protected final byte getByte(int field) + { + assert validType(field, 0, MPI.BYTE) : typeMismatch; + return buffer.get(offset + field); + } + + /** + * Gets the byte value at the specified position of a byte array. + * @param field Offset of the byte array. + * @param index Index of the byte in the array. + * @return Byte value. + */ + protected final byte getByte(int field, int index) + { + assert validType(field, index, MPI.BYTE) : typeMismatch; + return buffer.get(offset + field + index); + } + + /** + * Puts a byte value in a field. + * @param field Offset of the field. + * @param v Byte value. + */ + protected final void putByte(int field, byte v) + { + assert validType(field, 0, MPI.BYTE) : typeMismatch; + buffer.put(offset + field, v); + } + + /** + * Puts a byte value at the specified position of a byte array. + * @param field Offset of the byte array. + * @param index Index of the byte in the array. + * @param v Byte value. + */ + protected final void putByte(int field, int index, byte v) + { + assert validType(field, index, MPI.BYTE) : typeMismatch; + buffer.put(offset + field + index, v); + } + + /** + * Gets the char value of a field. + * @param field Offset of the field. + * @return Char value. + */ + protected final char getChar(int field) + { + assert validType(field, 0, MPI.CHAR) : typeMismatch; + return buffer.getChar(offset + field); + } + + /** + * Gets the char value at the specified position of a char array. + * @param field Offset of the char array. + * @param index Index of the char in the array. + * @return Char value. + */ + protected final char getChar(int field, int index) + { + assert validType(field, index, MPI.CHAR) : typeMismatch; + return buffer.getChar(offset + field + index * 2); + } + + /** + * Puts a char value in a field. + * @param field Offset of the field. + * @param v Char value. + */ + protected final void putChar(int field, char v) + { + assert validType(field, 0, MPI.CHAR) : typeMismatch; + buffer.putChar(offset + field, v); + } + + /** + * Puts a char value at the specified position of a char array. + * @param field Offset of the char array. + * @param index Index of the char in the array. + * @param v Char value. + */ + protected final void putChar(int field, int index, char v) + { + assert validType(field, index, MPI.CHAR) : typeMismatch; + buffer.putChar(offset + field + index * 2, v); + } + + /** + * Gets the short value of a field. + * @param field Offset of the field. + * @return Short value. + */ + protected final short getShort(int field) + { + assert validType(field, 0, MPI.SHORT) : typeMismatch; + return buffer.getShort(offset + field); + } + + /** + * Gets the short value at the specified position of a short array. + * @param field Offset of the short array. + * @param index Index of the short in the array. + * @return Short value. + */ + protected final short getShort(int field, int index) + { + assert validType(field, index, MPI.SHORT) : typeMismatch; + return buffer.getShort(offset + field + index * 2); + } + + /** + * Puts a short value in a field. + * @param field Offset of the field. + * @param v Short value. + */ + protected final void putShort(int field, short v) + { + assert validType(field, 0, MPI.SHORT) : typeMismatch; + buffer.putShort(offset + field, v); + } + + /** + * Puts a short value at the specified position of a short array. + * @param field Offset of the short array. + * @param index Index of the short in the array. + * @param v Short value. + */ + protected final void putShort(int field, int index, short v) + { + assert validType(field, index, MPI.SHORT) : typeMismatch; + buffer.putShort(offset + field + index * 2, v); + } + + /** + * Gets the int value of a field. + * @param field Offset of the field. + * @return Int value. + */ + protected final int getInt(int field) + { + assert validType(field, 0, MPI.INT) : typeMismatch; + return buffer.getInt(offset + field); + } + + /** + * Gets the int value at the specified position of an int array. + * @param field Offset of the int array. + * @param index Index of the int in the array. + * @return Int value. + */ + protected final int getInt(int field, int index) + { + assert validType(field, index, MPI.INT) : typeMismatch; + return buffer.getInt(offset + field + index * 4); + } + + /** + * Puts an int value in a field. + * @param field Offset of the field. + * @param v Int value. + */ + protected final void putInt(int field, int v) + { + assert validType(field, 0, MPI.INT) : typeMismatch; + buffer.putInt(offset + field, v); + } + + /** + * Puts an int value at the specified position of an int array. + * @param field Offset of the int array. + * @param index Index of the int in the array. + * @param v Int value. + */ + protected final void putInt(int field, int index, int v) + { + assert validType(field, index, MPI.INT) : typeMismatch; + buffer.putInt(offset + field + index * 4, v); + } + + /** + * Gets the long value of a field. + * @param field Offset of the field. + * @return Long value. + */ + protected final long getLong(int field) + { + assert validType(field, 0, MPI.LONG) : typeMismatch; + return buffer.getLong(offset + field); + } + + /** + * Gets the long value at the specified position of a long array. + * @param field Offset of the long array. + * @param index Index of the long in the array. + * @return Long value. + */ + protected final long getLong(int field, int index) + { + assert validType(field, index, MPI.LONG) : typeMismatch; + return buffer.getLong(offset + field + index * 8); + } + + /** + * Puts a long value in a field. + * @param field Offset of the field. + * @param v Long value. + */ + protected final void putLong(int field, long v) + { + assert validType(field, 0, MPI.LONG) : typeMismatch; + buffer.putLong(offset + field, v); + } + + /** + * Puts a long value at the specified position of a long array. + * @param field Offset of the long array. + * @param index Index of the long in the array. + * @param v Long value. + */ + protected final void putLong(int field, int index, long v) + { + assert validType(field, index, MPI.LONG) : typeMismatch; + buffer.putLong(offset + field + index * 8, v); + } + + /** + * Gets the float value of a field. + * @param field Offset of the field. + * @return Float value. + */ + protected final float getFloat(int field) + { + assert validType(field, 0, MPI.FLOAT) : typeMismatch; + return buffer.getFloat(offset + field); + } + + /** + * Gets the float value at the specified position of a float array. + * @param field Offset of the float array. + * @param index Index of the float in the array. + * @return Float value. + */ + protected final float getFloat(int field, int index) + { + assert validType(field, index, MPI.FLOAT) : typeMismatch; + return buffer.getFloat(offset + field + index * 4); + } + + /** + * Puts a float value in a field. + * @param field Offset of the field. + * @param v Float value. + */ + protected final void putFloat(int field, float v) + { + assert validType(field, 0, MPI.FLOAT) : typeMismatch; + buffer.putFloat(offset + field, v); + } + + /** + * Puts a float value at the specified position of a float array. + * @param field Offset of the float array. + * @param index Index of the float in the array. + * @param v Float value. + */ + protected final void putFloat(int field, int index, float v) + { + assert validType(field, index, MPI.FLOAT) : typeMismatch; + buffer.putFloat(offset + field + index * 4, v); + } + + /** + * Gets the double value of a field. + * @param field Offset of the field. + * @return Double value. + */ + protected final double getDouble(int field) + { + assert validType(field, 0, MPI.DOUBLE) : typeMismatch; + return buffer.getDouble(offset + field); + } + + /** + * Gets the double value at the specified position of a double array. + * @param field Offset of the double array. + * @param index Index of the double in the array. + * @return Double value. + */ + protected final double getDouble(int field, int index) + { + assert validType(field, index, MPI.DOUBLE) : typeMismatch; + return buffer.getDouble(offset + field + index * 8); + } + + /** + * Puts a double value in a field. + * @param field Offset of the field. + * @param v Double value. + */ + protected final void putDouble(int field, double v) + { + assert validType(field, 0, MPI.DOUBLE) : typeMismatch; + buffer.putDouble(offset + field, v); + } + + /** + * Puts a double value at the specified position of a double array. + * @param field Offset of the double array. + * @param index Index of the double in the array. + * @param v Double value. + */ + protected final void putDouble(int field, int index, double v) + { + assert validType(field, index, MPI.DOUBLE) : typeMismatch; + buffer.putDouble(offset + field + index * 8, v); + } + + @SuppressWarnings("javadoc") + /** + * Gets the struct data of a field. + * @param struct Struct type. + * @param field Offset of the field. + * @return Struct data. + */ + protected final + D getData(S struct, int field) + { + Struct s = (Struct)struct; + assert validType(field, 0, s.datatype) : typeMismatch; + return s.newData(buffer, offset + field); + } + + @SuppressWarnings("javadoc") + /** + * Gets the struct data at the specified position of a struct array. + * @param struct Struct type. + * @param field Offset of the struct array. + * @param index Index of the struct in the array. + * @return Struct data. + */ + protected final + D getData(S struct, int field, int index) + { + Struct s = (Struct)struct; + assert validType(field, index, s.datatype) : typeMismatch; + return s.newData(buffer, offset + field + index * s.extent); + } + + /** + * Gets the buffer of a field. + *

The buffer can be used in {@code send}/{@code recv} operations. + * @param type Data type of the buffer. + * @param field Offset of the field. + * @return Buffer object. + */ + protected final ByteBuffer getBuffer(Datatype type, int field) + { + assert validType(field, 0, type) : typeMismatch; + int position = offset + field; + return position == 0 ? buffer : MPI.slice(buffer, position); + } + + /** + * Gets the buffer data at the specified position of a buffer array. + *

The buffer can be used in {@code send}/{@code recv} operations. + * @param type Data type of the buffer. + * @param field Offset of the buffer array. + * @param index Index of the buffer in the array. + * @return Buffer object. + * @throws MPIException Signals that an MPI exception of some sort has occurred. + */ + protected final ByteBuffer getBuffer(Datatype type, int field, int index) + throws MPIException + { + assert validType(field, index, type) : typeMismatch; + + int extent = type.getExtent() * type.baseSize, + position = offset + field + index * extent; + + return position == 0 ? buffer : MPI.slice(buffer, position); + } + + } // Data } // Struct diff --git a/ompi/mpi/java/java/UserFunction.java b/ompi/mpi/java/java/UserFunction.java index 6a497cda7ea..13a98d67c17 100644 --- a/ompi/mpi/java/java/UserFunction.java +++ b/ompi/mpi/java/java/UserFunction.java @@ -5,14 +5,16 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. + * Copyright (c) 2015 Los Alamos National Security, LLC. All rights + * reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ /* @@ -32,7 +34,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. -*/ + */ /* * File : User_function.java * Author : Xinying Li @@ -52,156 +54,158 @@ */ public abstract class UserFunction { -/** - * User-defined function for a new {@code Op}. - * @param inVec array of values to combine with {@code inoutvec} elements - * @param inOutVec in-out array of accumulator locations - * @param count number of items in arrays - * @param datatype type of each item - */ -public void call(Object inVec, Object inOutVec, int count, Datatype datatype) - throws MPIException -{ - throw new UnsupportedOperationException("Not supported yet."); -} - -/** - * User-defined function for a new {@code Op}. - * @param in direct byte buffer to combine with {@code inOut} buffer - * @param inOut in-out direct byte buffer of accumulator locations - * @param count number of items in buffers - * @param datatype type of each item - */ -public void call(ByteBuffer in, ByteBuffer inOut, int count, Datatype datatype) - throws MPIException -{ - switch(datatype.baseType) - { - case Datatype.BYTE: - vCall(in, inOut, count, datatype); - break; - case Datatype.CHAR: - vCall(in.asCharBuffer(), inOut.asCharBuffer(), count, datatype); - break; - case Datatype.SHORT: - vCall(in.asShortBuffer(), inOut.asShortBuffer(), count, datatype); - break; - case Datatype.INT: - vCall(in.asIntBuffer(), inOut.asIntBuffer(), count, datatype); - break; - case Datatype.LONG: - vCall(in.asLongBuffer(), inOut.asLongBuffer(), count, datatype); - break; - case Datatype.FLOAT: - vCall(in.asFloatBuffer(), inOut.asFloatBuffer(), count, datatype); - break; - case Datatype.DOUBLE: - vCall(in.asDoubleBuffer(), inOut.asDoubleBuffer(), count, datatype); - break; - case Datatype.PACKED: - vCall(in, inOut, count, datatype); - break; - default: - throw new IllegalArgumentException("Unsupported datatype."); - } -} - -private void vCall(ByteBuffer in, ByteBuffer inOut, - int count, Datatype datatype) throws MPIException -{ - int extent = datatype.getExtent(); - byte[] inVec = new byte[count * extent], - inOutVec = new byte[count * extent]; - - in.get(inVec); - inOut.get(inOutVec); - call(inVec, inOutVec, count, datatype); - inOut.clear(); - inOut.put(inOutVec); -} - -private void vCall(CharBuffer inBuf, CharBuffer inOutBuf, - int count, Datatype datatype) throws MPIException -{ - int extent = datatype.getExtent(); - char[] inVec = new char[count * extent], - inOutVec = new char[count * extent]; - - inBuf.get(inVec); - inOutBuf.get(inOutVec); - call(inVec, inOutVec, count, datatype); - inOutBuf.clear(); - inOutBuf.put(inOutVec); -} - -private void vCall(ShortBuffer inBuf, ShortBuffer inOutBuf, - int count, Datatype datatype) throws MPIException -{ - int extent = datatype.getExtent(); - short[] inVec = new short[count * extent], - inOutVec = new short[count * extent]; - - inBuf.get(inVec); - inOutBuf.get(inOutVec); - call(inVec, inOutVec, count, datatype); - inOutBuf.clear(); - inOutBuf.put(inOutVec); -} - -private void vCall(IntBuffer inBuf, IntBuffer inOutBuf, - int count, Datatype datatype) throws MPIException -{ - int extent = datatype.getExtent(); - int[] inVec = new int[count * extent], - inOutVec = new int[count * extent]; - - inBuf.get(inVec); - inOutBuf.get(inOutVec); - call(inVec, inOutVec, count, datatype); - inOutBuf.clear(); - inOutBuf.put(inOutVec); -} - -private void vCall(LongBuffer inBuf, LongBuffer inOutBuf, - int count, Datatype datatype) throws MPIException -{ - int extent = datatype.getExtent(); - long[] inVec = new long[count * extent], - inOutVec = new long[count * extent]; - - inBuf.get(inVec); - inOutBuf.get(inOutVec); - call(inVec, inOutVec, count, datatype); - inOutBuf.clear(); - inOutBuf.put(inOutVec); -} - -private void vCall(FloatBuffer inBuf, FloatBuffer inOutBuf, - int count, Datatype datatype) throws MPIException -{ - int extent = datatype.getExtent(); - float[] inVec = new float[count * extent], - inOutVec = new float[count * extent]; - - inBuf.get(inVec); - inOutBuf.get(inOutVec); - call(inVec, inOutVec, count, datatype); - inOutBuf.clear(); - inOutBuf.put(inOutVec); -} - -private void vCall(DoubleBuffer inBuf, DoubleBuffer inOutBuf, - int count, Datatype datatype) throws MPIException -{ - int extent = datatype.getExtent(); - double[] inVec = new double[count * extent], - inOutVec = new double[count * extent]; - - inBuf.get(inVec); - inOutBuf.get(inOutVec); - call(inVec, inOutVec, count, datatype); - inOutBuf.clear(); - inOutBuf.put(inOutVec); -} + /** + * User-defined function for a new {@code Op}. + * @param inVec array of values to combine with {@code inoutvec} elements + * @param inOutVec in-out array of accumulator locations + * @param count number of items in arrays + * @param datatype type of each item + * @throws MPIException Signals that an MPI exception of some sort has occurred. + */ + public void call(Object inVec, Object inOutVec, int count, Datatype datatype) + throws MPIException + { + throw new UnsupportedOperationException("Not supported yet."); + } + + /** + * User-defined function for a new {@code Op}. + * @param in direct byte buffer to combine with {@code inOut} buffer + * @param inOut in-out direct byte buffer of accumulator locations + * @param count number of items in buffers + * @param datatype type of each item + * @throws MPIException Signals that an MPI exception of some sort has occurred. + */ + public void call(ByteBuffer in, ByteBuffer inOut, int count, Datatype datatype) + throws MPIException + { + switch(datatype.baseType) + { + case Datatype.BYTE: + vCall(in, inOut, count, datatype); + break; + case Datatype.CHAR: + vCall(in.asCharBuffer(), inOut.asCharBuffer(), count, datatype); + break; + case Datatype.SHORT: + vCall(in.asShortBuffer(), inOut.asShortBuffer(), count, datatype); + break; + case Datatype.INT: + vCall(in.asIntBuffer(), inOut.asIntBuffer(), count, datatype); + break; + case Datatype.LONG: + vCall(in.asLongBuffer(), inOut.asLongBuffer(), count, datatype); + break; + case Datatype.FLOAT: + vCall(in.asFloatBuffer(), inOut.asFloatBuffer(), count, datatype); + break; + case Datatype.DOUBLE: + vCall(in.asDoubleBuffer(), inOut.asDoubleBuffer(), count, datatype); + break; + case Datatype.PACKED: + vCall(in, inOut, count, datatype); + break; + default: + throw new IllegalArgumentException("Unsupported datatype."); + } + } + + private void vCall(ByteBuffer in, ByteBuffer inOut, + int count, Datatype datatype) throws MPIException + { + int extent = datatype.getExtent(); + byte[] inVec = new byte[count * extent], + inOutVec = new byte[count * extent]; + + in.get(inVec); + inOut.get(inOutVec); + call(inVec, inOutVec, count, datatype); + inOut.clear(); + inOut.put(inOutVec); + } + + private void vCall(CharBuffer inBuf, CharBuffer inOutBuf, + int count, Datatype datatype) throws MPIException + { + int extent = datatype.getExtent(); + char[] inVec = new char[count * extent], + inOutVec = new char[count * extent]; + + inBuf.get(inVec); + inOutBuf.get(inOutVec); + call(inVec, inOutVec, count, datatype); + inOutBuf.clear(); + inOutBuf.put(inOutVec); + } + + private void vCall(ShortBuffer inBuf, ShortBuffer inOutBuf, + int count, Datatype datatype) throws MPIException + { + int extent = datatype.getExtent(); + short[] inVec = new short[count * extent], + inOutVec = new short[count * extent]; + + inBuf.get(inVec); + inOutBuf.get(inOutVec); + call(inVec, inOutVec, count, datatype); + inOutBuf.clear(); + inOutBuf.put(inOutVec); + } + + private void vCall(IntBuffer inBuf, IntBuffer inOutBuf, + int count, Datatype datatype) throws MPIException + { + int extent = datatype.getExtent(); + int[] inVec = new int[count * extent], + inOutVec = new int[count * extent]; + + inBuf.get(inVec); + inOutBuf.get(inOutVec); + call(inVec, inOutVec, count, datatype); + inOutBuf.clear(); + inOutBuf.put(inOutVec); + } + + private void vCall(LongBuffer inBuf, LongBuffer inOutBuf, + int count, Datatype datatype) throws MPIException + { + int extent = datatype.getExtent(); + long[] inVec = new long[count * extent], + inOutVec = new long[count * extent]; + + inBuf.get(inVec); + inOutBuf.get(inOutVec); + call(inVec, inOutVec, count, datatype); + inOutBuf.clear(); + inOutBuf.put(inOutVec); + } + + private void vCall(FloatBuffer inBuf, FloatBuffer inOutBuf, + int count, Datatype datatype) throws MPIException + { + int extent = datatype.getExtent(); + float[] inVec = new float[count * extent], + inOutVec = new float[count * extent]; + + inBuf.get(inVec); + inOutBuf.get(inOutVec); + call(inVec, inOutVec, count, datatype); + inOutBuf.clear(); + inOutBuf.put(inOutVec); + } + + private void vCall(DoubleBuffer inBuf, DoubleBuffer inOutBuf, + int count, Datatype datatype) throws MPIException + { + int extent = datatype.getExtent(); + double[] inVec = new double[count * extent], + inOutVec = new double[count * extent]; + + inBuf.get(inVec); + inOutBuf.get(inOutVec); + call(inVec, inOutVec, count, datatype); + inOutBuf.clear(); + inOutBuf.put(inOutVec); + } } // UserFunction diff --git a/ompi/mpi/java/java/Version.java b/ompi/mpi/java/java/Version.java new file mode 100644 index 00000000000..e63911210d4 --- /dev/null +++ b/ompi/mpi/java/java/Version.java @@ -0,0 +1,68 @@ +/* + * Copyright (c) 2015 Los Alamos National Security, LLC. All rights + * reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + * + * + * This file is almost a complete re-write for Open MPI compared to the + * original mpiJava package. Its license and copyright are listed below. + * See for more information. + * + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * + * File : Version.java + * Author : Nathaniel Graham + * Created : Thu Jul 23 09:25 2015 + */ + +package mpi; + +/** + * Version and Subversion for MPI + */ +public final class Version +{ +private final int version; +private final int subVersion; + +protected Version(int version, int subVersion) +{ + this.version = version; + this.subVersion = subVersion; +} + +/** + * Gets the MPI version. + * @return MPI version + */ +public int getVersion() +{ + return version; +} + +/** + * Gets the MPI subversion. + * @return MPI subversion + */ +public int getSubVersion() +{ + return subVersion; +} + +} // Version diff --git a/ompi/mpi/java/java/Win.java b/ompi/mpi/java/java/Win.java index 9d3678236c6..6fa3b181d19 100644 --- a/ompi/mpi/java/java/Win.java +++ b/ompi/mpi/java/java/Win.java @@ -5,14 +5,14 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -25,384 +25,879 @@ */ public final class Win implements Freeable { -private long handle; - -/** - * Java binding of {@code MPI_WIN_CREATE}. - * @param base initial address of window - * @param size size of window (buffer elements) - * @param dispUnit local unit size for displacements (buffer elements) - * @param info info object - * @param comm communicator - * @throws MPIException - */ -public Win(Buffer base, int size, int dispUnit, Info info, Comm comm) - throws MPIException -{ - if(!base.isDirect()) - throw new IllegalArgumentException("The buffer must be direct."); - - int baseSize; - - if(base instanceof ByteBuffer) - baseSize = 1; - else if(base instanceof CharBuffer || base instanceof ShortBuffer) - baseSize = 2; - else if(base instanceof IntBuffer || base instanceof FloatBuffer) - baseSize = 4; - else if(base instanceof LongBuffer || base instanceof DoubleBuffer) - baseSize = 8; - else - throw new AssertionError(); - - int sizeBytes = size * baseSize, - dispBytes = dispUnit * baseSize; - - handle = createWin(base, sizeBytes, dispBytes, info.handle, comm.handle); -} - -private native long createWin( - Buffer base, int size, int dispUnit, long info, long comm) - throws MPIException; - -private int getBaseType(Datatype orgType, Datatype targetType) -{ - int baseType = orgType.baseType; - - if(baseType != targetType.baseType) - { - throw new IllegalArgumentException( - "Both datatype arguments must be constructed "+ - "from the same predefined datatype."); - } - - return baseType; -} - -/** - * Java binding of the MPI operation {@code MPI_GET_GROUP}. - * @return group of processes which share access to the window - * @throws MPIException - */ -public Group getGroup() throws MPIException -{ - MPI.check(); - return new Group(getGroup(handle)); -} - -private native long getGroup(long win) throws MPIException; - -/** - * Java binding of {@code MPI_PUT}. - * @param origin origin buffer - * @param orgCount number of entries in origin buffer - * @param orgType datatype of each entry in origin buffer - * @param targetRank rank of target - * @param targetDisp displacement from start of window to target buffer - * @param targetCount number of entries in target buffer - * @param targetType datatype of each entry in target buffer - * @throws MPIException - */ -public void put(Buffer origin, int orgCount, Datatype orgType, - int targetRank, int targetDisp, int targetCount, - Datatype targetType) - throws MPIException -{ - MPI.check(); - - if(!origin.isDirect()) - throw new IllegalArgumentException("The origin must be direct buffer."); - - put(handle, origin, orgCount, orgType.handle, - targetRank, targetDisp, targetCount, targetType.handle, - getBaseType(orgType, targetType)); -} - -private native void put( - long win, Buffer origin, int orgCount, long orgType, - int targetRank, int targetDisp, int targetCount, long targetType, - int baseType) throws MPIException; - -/** - * Java binding of {@code MPI_GET}. - * @param origin origin buffer - * @param orgCount number of entries in origin buffer - * @param orgType datatype of each entry in origin buffer - * @param targetRank rank of target - * @param targetDisp displacement from start of window to target buffer - * @param targetCount number of entries in target buffer - * @param targetType datatype of each entry in target buffer - */ -public void get(Buffer origin, int orgCount, Datatype orgType, - int targetRank, int targetDisp, int targetCount, - Datatype targetType) - throws MPIException -{ - MPI.check(); - - if(!origin.isDirect()) - throw new IllegalArgumentException("The origin must be direct buffer."); - - get(handle, origin, orgCount, orgType.handle, - targetRank, targetDisp, targetCount, targetType.handle, - getBaseType(orgType, targetType)); -} - -private native void get( - long win, Buffer origin, int orgCount, long orgType, - int targetRank, int targetDisp, int targetCount, long targetType, - int baseType) throws MPIException; - -/** - * Java binding of {@code MPI_ACCUMULATE}. - * @param origin origin buffer - * @param orgCount number of entries in origin buffer - * @param orgType datatype of each entry in origin buffer - * @param targetRank rank of target - * @param targetDisp displacement from start of window to target buffer - * @param targetCount number of entries in target buffer - * @param targetType datatype of each entry in target buffer - * @param op reduce operation - */ -public void accumulate(Buffer origin, int orgCount, Datatype orgType, - int targetRank, int targetDisp, int targetCount, - Datatype targetType, Op op) - throws MPIException -{ - MPI.check(); - - if(!origin.isDirect()) - throw new IllegalArgumentException("The origin must be direct buffer."); - - accumulate(handle, origin, orgCount, orgType.handle, - targetRank, targetDisp, targetCount, targetType.handle, - op, op.handle, getBaseType(orgType, targetType)); -} - -private native void accumulate( - long win, Buffer origin, int orgCount, long orgType, - int targetRank, int targetDisp, int targetCount, long targetType, - Op jOp, long hOp, int baseType) throws MPIException; - -/** - * Java binding of {@code MPI_WIN_FENCE}. - * @param assertion program assertion - */ -public void fence(int assertion) throws MPIException -{ - MPI.check(); - fence(handle, assertion); -} - -private native void fence(long win, int assertion) throws MPIException; - -/** - * Java binding of the MPI operation {@code MPI_WIN_START}. - * @param group group of target processes - * @param assertion program assertion - * @throws MPIException - */ -public void start(Group group, int assertion) throws MPIException -{ - MPI.check(); - start(handle, group.handle, assertion); -} - -private native void start(long win, long group, int assertion) - throws MPIException; - -/** - * Java binding of the MPI operation {@code MPI_WIN_COMPLETE}. - * @throws MPIException - */ -public void complete() throws MPIException -{ - MPI.check(); - complete(handle); -} - -private native void complete(long win) throws MPIException; - -/** - * Java binding of the MPI operation {@code MPI_WIN_POST}. - * @param group group of origin processes - * @param assertion program assertion - * @throws MPIException - */ -public void post(Group group, int assertion) throws MPIException -{ - MPI.check(); - post(handle, group.handle, assertion); -} - -private native void post(long win, long group, int assertion) - throws MPIException; - -/** - * Java binding of the MPI operation {@code MPI_WIN_WAIT}. - * @throws MPIException - */ -public void waitFor() throws MPIException -{ - MPI.check(); - waitFor(handle); -} - -private native void waitFor(long win) throws MPIException; - -/** - * Java binding of the MPI operation {@code MPI_WIN_TEST}. - * @return true if success - * @throws MPIException - */ -public boolean test() throws MPIException -{ - MPI.check(); - return test(handle); -} - -private native boolean test(long win) throws MPIException; - -/** - * Java binding of the MPI operation {@code MPI_WIN_LOCK}. - * @param lockType either MPI.LOCK_EXCLUSIVE or MPI.LOCK_SHARED - * @param rank rank of locked window - * @param assertion program assertion - * @throws MPIException - */ -public void lock(int lockType, int rank, int assertion) throws MPIException -{ - MPI.check(); - lock(handle, lockType, rank, assertion); -} - -private native void lock(long win, int lockType, int rank, int assertion) - throws MPIException; - -/** - * Java binding of the MPI operation {@code MPI_WIN_UNLOCK}. - * @param rank rank of window - * @throws MPIException - */ -public void unlock(int rank) throws MPIException -{ - MPI.check(); - unlock(handle, rank); -} - -private native void unlock(long win, int rank) throws MPIException; - -/** - * Java binding of the MPI operation {@code MPI_WIN_SET_ERRHANDLER}. - * @param errhandler new MPI error handler for window - * @throws MPIException - */ -public void setErrhandler(Errhandler errhandler) throws MPIException -{ - MPI.check(); - setErrhandler(handle, errhandler.handle); -} - -private native void setErrhandler(long win, long errhandler) - throws MPIException; - -/** - * Java binding of the MPI operation {@code MPI_WIN_CALL_ERRHANDLER}. - * @param errorCode error code - * @throws MPIException - */ -public void callErrhandler(int errorCode) throws MPIException -{ - callErrhandler(handle, errorCode); -} - -private native void callErrhandler(long handle, int errorCode) - throws MPIException; - -/** - * Create a new attribute key. - *

Java binding of the MPI operation {@code MPI_WIN_CREATE_KEYVAL}. - * @return attribute key for future access - * @throws MPIException - */ -public static int createKeyval() throws MPIException -{ - MPI.check(); - return createKeyval_jni(); -} - -private static native int createKeyval_jni() throws MPIException; - -/** - * Frees an attribute key. - *

Java binding of the MPI operation {@code MPI_WIN_FREE_KEYVAL}. - * @param keyval attribute key - * @throws MPIException - */ -public static void freeKeyval(int keyval) throws MPIException -{ - MPI.check(); - freeKeyval_jni(keyval); -} - -private static native void freeKeyval_jni(int keyval) throws MPIException; - -/** - * Stores attribute value associated with a key. - *

Java binding of the MPI operation {@code MPI_WIN_SET_ATTR}. - * @param keyval attribute key - * @param value attribute value - * @throws MPIException - */ -public void setAttr(int keyval, Object value) throws MPIException -{ - MPI.check(); - setAttr(handle, keyval, MPI.attrSet(value)); -} - -private native void setAttr(long win, int keyval, byte[] value) - throws MPIException; - -/** - * Retrieves attribute value by key. - *

Java binding of the MPI operation {@code MPI_WIN_GET_ATTR}. - * @param keyval attribute key - * @return attribute value or null if no attribute is associated with the key. - * @throws MPIException - */ -public Object getAttr(int keyval) throws MPIException -{ - MPI.check(); - Object obj = getAttr(handle, keyval); - return obj instanceof byte[] ? MPI.attrGet((byte[])obj) : obj; -} - -private native Object getAttr(long win, int keyval) throws MPIException; - -/** - * Deletes an attribute value associated with a key. - *

Java binding of the MPI operation {@code MPI_WIN_DELETE_ATTR}. - * @param keyval attribute key - * @throws MPIException - */ -public void deleteAttr(int keyval) throws MPIException -{ - MPI.check(); - deleteAttr(handle, keyval); -} - -private native void deleteAttr(long win, int keyval) throws MPIException; - -/** - * Java binding of {@code MPI_WIN_FREE}. - * @throws MPIException - */ -@Override public void free() throws MPIException -{ - MPI.check(); - handle = free(handle); -} - -private native long free(long win) throws MPIException; + private long handle; + public static final int WIN_NULL = 0; + public static final int FLAVOR_PRIVATE = 0; + public static final int FLAVOR_SHARED = 1; + + /** + * Java binding of {@code MPI_WIN_CREATE}. + * @param base initial address of window + * @param size size of window (buffer elements) + * @param dispUnit local unit size for displacements (buffer elements) + * @param info info object + * @param comm communicator + * @throws MPIException Signals that an MPI exception of some sort has occurred. + */ + public Win(Buffer base, int size, int dispUnit, Info info, Comm comm) + throws MPIException + { + if(!base.isDirect()) + throw new IllegalArgumentException("The buffer must be direct."); + + int baseSize; + + if(base instanceof ByteBuffer) + baseSize = 1; + else if(base instanceof CharBuffer || base instanceof ShortBuffer) + baseSize = 2; + else if(base instanceof IntBuffer || base instanceof FloatBuffer) + baseSize = 4; + else if(base instanceof LongBuffer || base instanceof DoubleBuffer) + baseSize = 8; + else + throw new AssertionError(); + + int sizeBytes = size * baseSize, + dispBytes = dispUnit * baseSize; + + handle = createWin(base, sizeBytes, dispBytes, info.handle, comm.handle); + } + + private native long createWin( + Buffer base, int size, int dispUnit, long info, long comm) + throws MPIException; + + /** + * Java binding of {@code MPI_WIN_ALLOCATE} and {@code MPI_WIN_ALLOCATE_SHARED}. + * @param size size of window (buffer elements) + * @param dispUnit local unit size for displacements (buffer elements) + * @param info info object + * @param comm communicator + * @param base initial address of window + * @param flavor FLAVOR_PRIVATE or FLAVOR_SHARED + * @throws MPIException Signals that an MPI exception of some sort has occurred. + */ + public Win(int size, int dispUnit, Info info, Comm comm, Buffer base, int flavor) + throws MPIException + { + if(!base.isDirect()) + throw new IllegalArgumentException("The buffer must be direct."); + + int baseSize; + + if(base instanceof ByteBuffer) + baseSize = 1; + else if(base instanceof CharBuffer || base instanceof ShortBuffer) + baseSize = 2; + else if(base instanceof IntBuffer || base instanceof FloatBuffer) + baseSize = 4; + else if(base instanceof LongBuffer || base instanceof DoubleBuffer) + baseSize = 8; + else + throw new AssertionError(); + + int sizeBytes = size * baseSize, + dispBytes = dispUnit * baseSize; + + if(flavor == 0) { + handle = allocateWin(sizeBytes, dispBytes, info.handle, comm.handle, base); + } else if(flavor == 1) { + handle = allocateSharedWin(sizeBytes, dispBytes, info.handle, comm.handle, base); + } + } + + private native long allocateWin( + int size, int dispUnit, long info, long comm, Buffer base) + throws MPIException; + + private native long allocateSharedWin( + int size, int dispUnit, long info, long comm, Buffer base) + throws MPIException; + + /** + * Java binding of {@code MPI_WIN_CREATE_DYNAMIC}. + * @param info info object + * @param comm communicator + * @throws MPIException Signals that an MPI exception of some sort has occurred. + */ + public Win(Info info, Comm comm) + throws MPIException + { + handle = createDynamicWin(info.handle, comm.handle); + } + + private native long createDynamicWin( + long info, long comm) + throws MPIException; + + private int getBaseType(Datatype orgType, Datatype targetType) + { + int baseType = orgType.baseType; + + if(baseType != targetType.baseType) + { + throw new IllegalArgumentException( + "Both datatype arguments must be constructed "+ + "from the same predefined datatype."); + } + + return baseType; + } + + /** + * Java binding of {@code MPI_WIN_ATTACH}. + * @param base initial address of window + * @param size size of window (buffer elements) + * @throws MPIException Signals that an MPI exception of some sort has occurred. + */ + public void attach(Buffer base, int size) throws MPIException + { + MPI.check(); + if(!base.isDirect()) + throw new IllegalArgumentException("The buffer must be direct."); + + int baseSize; + + if(base instanceof ByteBuffer) + baseSize = 1; + else if(base instanceof CharBuffer || base instanceof ShortBuffer) + baseSize = 2; + else if(base instanceof IntBuffer || base instanceof FloatBuffer) + baseSize = 4; + else if(base instanceof LongBuffer || base instanceof DoubleBuffer) + baseSize = 8; + else + throw new AssertionError(); + + int sizeBytes = size * baseSize; + + attach(handle, base, sizeBytes); + } + + private native void attach(long win, Buffer base, int size) throws MPIException; + + /** + * Java binding of {@code MPI_WIN_DETACH}. + * @param base initial address of window + * @throws MPIException Signals that an MPI exception of some sort has occurred. + */ + public void detach(Buffer base) throws MPIException + { + MPI.check(); + if(!base.isDirect()) + throw new IllegalArgumentException("The buffer must be direct."); + + detach(handle, base); + } + + private native void detach(long win, Buffer base) throws MPIException; + + /** + * Java binding of the MPI operation {@code MPI_GET_GROUP}. + * @return group of processes which share access to the window + * @throws MPIException Signals that an MPI exception of some sort has occurred. + */ + public Group getGroup() throws MPIException + { + MPI.check(); + return new Group(getGroup(handle)); + } + + private native long getGroup(long win) throws MPIException; + + /** + * Java binding of {@code MPI_PUT}. + * @param origin origin buffer + * @param orgCount number of entries in origin buffer + * @param orgType datatype of each entry in origin buffer + * @param targetRank rank of target + * @param targetDisp displacement from start of window to target buffer + * @param targetCount number of entries in target buffer + * @param targetType datatype of each entry in target buffer + * @throws MPIException Signals that an MPI exception of some sort has occurred. + */ + public void put(Buffer origin, int orgCount, Datatype orgType, + int targetRank, int targetDisp, int targetCount, + Datatype targetType) + throws MPIException + { + MPI.check(); + + if(!origin.isDirect()) + throw new IllegalArgumentException("The origin must be direct buffer."); + + put(handle, origin, orgCount, orgType.handle, + targetRank, targetDisp, targetCount, targetType.handle, + getBaseType(orgType, targetType)); + } + + private native void put( + long win, Buffer origin, int orgCount, long orgType, + int targetRank, int targetDisp, int targetCount, long targetType, + int baseType) throws MPIException; + + /** + * Java binding of {@code MPI_GET}. + * @param origin origin buffer + * @param orgCount number of entries in origin buffer + * @param orgType datatype of each entry in origin buffer + * @param targetRank rank of target + * @param targetDisp displacement from start of window to target buffer + * @param targetCount number of entries in target buffer + * @param targetType datatype of each entry in target buffer + * @throws MPIException Signals that an MPI exception of some sort has occurred. + */ + public void get(Buffer origin, int orgCount, Datatype orgType, + int targetRank, int targetDisp, int targetCount, + Datatype targetType) + throws MPIException + { + MPI.check(); + + if(!origin.isDirect()) + throw new IllegalArgumentException("The origin must be direct buffer."); + + get(handle, origin, orgCount, orgType.handle, + targetRank, targetDisp, targetCount, targetType.handle, + getBaseType(orgType, targetType)); + } + + private native void get( + long win, Buffer origin, int orgCount, long orgType, + int targetRank, int targetDisp, int targetCount, long targetType, + int baseType) throws MPIException; + + /** + * Java binding of {@code MPI_ACCUMULATE}. + * @param origin origin buffer + * @param orgCount number of entries in origin buffer + * @param orgType datatype of each entry in origin buffer + * @param targetRank rank of target + * @param targetDisp displacement from start of window to target buffer + * @param targetCount number of entries in target buffer + * @param targetType datatype of each entry in target buffer + * @param op reduce operation + * @throws MPIException Signals that an MPI exception of some sort has occurred. + */ + public void accumulate(Buffer origin, int orgCount, Datatype orgType, + int targetRank, int targetDisp, int targetCount, + Datatype targetType, Op op) + throws MPIException + { + MPI.check(); + + if(!origin.isDirect()) + throw new IllegalArgumentException("The origin must be direct buffer."); + + accumulate(handle, origin, orgCount, orgType.handle, + targetRank, targetDisp, targetCount, targetType.handle, + op, op.handle, getBaseType(orgType, targetType)); + } + + private native void accumulate( + long win, Buffer origin, int orgCount, long orgType, + int targetRank, int targetDisp, int targetCount, long targetType, + Op jOp, long hOp, int baseType) throws MPIException; + + /** + * Java binding of {@code MPI_WIN_FENCE}. + * @param assertion program assertion + * @throws MPIException Signals that an MPI exception of some sort has occurred. + */ + public void fence(int assertion) throws MPIException + { + MPI.check(); + fence(handle, assertion); + } + + private native void fence(long win, int assertion) throws MPIException; + + /** + * Java binding of the MPI operation {@code MPI_WIN_START}. + * @param group group of target processes + * @param assertion program assertion + * @throws MPIException Signals that an MPI exception of some sort has occurred. + */ + public void start(Group group, int assertion) throws MPIException + { + MPI.check(); + start(handle, group.handle, assertion); + } + + private native void start(long win, long group, int assertion) + throws MPIException; + + /** + * Java binding of the MPI operation {@code MPI_WIN_COMPLETE}. + * @throws MPIException Signals that an MPI exception of some sort has occurred. + */ + public void complete() throws MPIException + { + MPI.check(); + complete(handle); + } + + private native void complete(long win) throws MPIException; + + /** + * Java binding of the MPI operation {@code MPI_WIN_POST}. + * @param group group of origin processes + * @param assertion program assertion + * @throws MPIException Signals that an MPI exception of some sort has occurred. + */ + public void post(Group group, int assertion) throws MPIException + { + MPI.check(); + post(handle, group.handle, assertion); + } + + private native void post(long win, long group, int assertion) + throws MPIException; + + /** + * Java binding of the MPI operation {@code MPI_WIN_WAIT}. + * @throws MPIException Signals that an MPI exception of some sort has occurred. + */ + public void waitFor() throws MPIException + { + MPI.check(); + waitFor(handle); + } + + private native void waitFor(long win) throws MPIException; + + /** + * Java binding of the MPI operation {@code MPI_WIN_TEST}. + * @return true if success + * @throws MPIException Signals that an MPI exception of some sort has occurred. + */ + public boolean test() throws MPIException + { + MPI.check(); + return test(handle); + } + + private native boolean test(long win) throws MPIException; + + /** + * Java binding of the MPI operation {@code MPI_WIN_LOCK}. + * @param lockType either MPI.LOCK_EXCLUSIVE or MPI.LOCK_SHARED + * @param rank rank of locked window + * @param assertion program assertion + * @throws MPIException Signals that an MPI exception of some sort has occurred. + */ + public void lock(int lockType, int rank, int assertion) throws MPIException + { + MPI.check(); + lock(handle, lockType, rank, assertion); + } + + private native void lock(long win, int lockType, int rank, int assertion) + throws MPIException; + + /** + * Java binding of the MPI operation {@code MPI_WIN_UNLOCK}. + * @param rank rank of window + * @throws MPIException Signals that an MPI exception of some sort has occurred. + */ + public void unlock(int rank) throws MPIException + { + MPI.check(); + unlock(handle, rank); + } + + private native void unlock(long win, int rank) throws MPIException; + + /** + * Java binding of the MPI operation {@code MPI_WIN_SET_ERRHANDLER}. + * @param errhandler new MPI error handler for window + * @throws MPIException Signals that an MPI exception of some sort has occurred. + */ + public void setErrhandler(Errhandler errhandler) throws MPIException + { + MPI.check(); + setErrhandler(handle, errhandler.handle); + } + + private native void setErrhandler(long win, long errhandler) + throws MPIException; + + /** + * Java binding of the MPI operation {@code MPI_WIN_CALL_ERRHANDLER}. + * @param errorCode error code + * @throws MPIException Signals that an MPI exception of some sort has occurred. + */ + public void callErrhandler(int errorCode) throws MPIException + { + callErrhandler(handle, errorCode); + } + + private native void callErrhandler(long handle, int errorCode) + throws MPIException; + + /** + * Create a new attribute key. + *

Java binding of the MPI operation {@code MPI_WIN_CREATE_KEYVAL}. + * @return attribute key for future access + * @throws MPIException Signals that an MPI exception of some sort has occurred. + */ + public static int createKeyval() throws MPIException + { + MPI.check(); + return createKeyval_jni(); + } + + private static native int createKeyval_jni() throws MPIException; + + /** + * Frees an attribute key. + *

Java binding of the MPI operation {@code MPI_WIN_FREE_KEYVAL}. + * @param keyval attribute key + * @throws MPIException Signals that an MPI exception of some sort has occurred. + */ + public static void freeKeyval(int keyval) throws MPIException + { + MPI.check(); + freeKeyval_jni(keyval); + } + + private static native void freeKeyval_jni(int keyval) throws MPIException; + + /** + * Stores attribute value associated with a key. + *

Java binding of the MPI operation {@code MPI_WIN_SET_ATTR}. + * @param keyval attribute key + * @param value attribute value + * @throws MPIException Signals that an MPI exception of some sort has occurred. + */ + public void setAttr(int keyval, Object value) throws MPIException + { + MPI.check(); + setAttr(handle, keyval, MPI.attrSet(value)); + } + + private native void setAttr(long win, int keyval, byte[] value) + throws MPIException; + + /** + * Retrieves attribute value by key. + *

Java binding of the MPI operation {@code MPI_WIN_GET_ATTR}. + * @param keyval attribute key + * @return attribute value or null if no attribute is associated with the key. + * @throws MPIException Signals that an MPI exception of some sort has occurred. + */ + public Object getAttr(int keyval) throws MPIException + { + MPI.check(); + Object obj = getAttr(handle, keyval); + return obj instanceof byte[] ? MPI.attrGet((byte[])obj) : obj; + } + + private native Object getAttr(long win, int keyval) throws MPIException; + + /** + * Deletes an attribute value associated with a key. + *

Java binding of the MPI operation {@code MPI_WIN_DELETE_ATTR}. + * @param keyval attribute key + * @throws MPIException Signals that an MPI exception of some sort has occurred. + */ + public void deleteAttr(int keyval) throws MPIException + { + MPI.check(); + deleteAttr(handle, keyval); + } + + private native void deleteAttr(long win, int keyval) throws MPIException; + + /** + * Java binding of {@code MPI_WIN_FREE}. + * @throws MPIException Signals that an MPI exception of some sort has occurred. + */ + @Override public void free() throws MPIException + { + MPI.check(); + handle = free(handle); + } + + private native long free(long win) throws MPIException; + + /** + * Java binding of the MPI operation {@code MPI_WIN_GET_INFO}. + * @return Info Info object associated with this window + * @throws MPIException Signals that an MPI exception of some sort has occurred. + */ + public Info getInfo() throws MPIException + { + MPI.check(); + return new Info(getInfo(handle)); + } + + private native long getInfo(long win) + throws MPIException; + + /** + * Java binding of the MPI operation {@code MPI_WIN_SET_INFO}. + * @param info the new info + * @throws MPIException Signals that an MPI exception of some sort has occurred. + */ + public void setInfo(Info info) throws MPIException + { + MPI.check(); + setInfo(handle, info.handle); + } + + private native void setInfo(long win, long info) + throws MPIException; + + /** + *

Java binding of the MPI operation {@code MPI_RPUT}. + * @param origin_addr initial address of origin buffer + * @param origin_count number of entries in origin buffer + * @param origin_datatype datatype of each entry in origin buffer + * @param target_rank rank of target + * @param target_disp displacement from start of window to target buffer + * @param target_count number of entries in target buffer + * @param target_datatype datatype of each entry in target buffer + * @return RMA request + * @throws MPIException Signals that an MPI exception of some sort has occurred. + */ + public final Request rPut(Buffer origin_addr, int origin_count, + Datatype origin_datatype, int target_rank, int target_disp, + int target_count, Datatype target_datatype) + throws MPIException + { + if(!origin_addr.isDirect()) + throw new IllegalArgumentException("The origin must be direct buffer."); + Request req = new Request(rPut(handle, origin_addr, origin_count, + origin_datatype.handle, target_rank, target_disp, + target_count, target_datatype.handle, getBaseType(origin_datatype, target_datatype))); + req.addSendBufRef(origin_addr); + return req; + } + + private native long rPut(long win, Buffer origin_addr, int origin_count, + long origin_datatype, int target_rank, int target_disp, + int target_count, long target_datatype, int baseType) + throws MPIException; + + /** + * Java binding of {@code MPI_RGET}. + * @param origin origin buffer + * @param orgCount number of entries in origin buffer + * @param orgType datatype of each entry in origin buffer + * @param targetRank rank of target + * @param targetDisp displacement from start of window to target buffer + * @param targetCount number of entries in target buffer + * @param targetType datatype of each entry in target buffer + * @return RMA request + * @throws MPIException Signals that an MPI exception of some sort has occurred. + */ + public final Request rGet(Buffer origin, int orgCount, Datatype orgType, + int targetRank, int targetDisp, int targetCount, + Datatype targetType) + throws MPIException + { + MPI.check(); + + if(!origin.isDirect()) + throw new IllegalArgumentException("The origin must be direct buffer."); + Request req = new Request(rGet(handle, origin, orgCount, orgType.handle, + targetRank, targetDisp, targetCount, targetType.handle, + getBaseType(orgType, targetType))); + req.addRecvBufRef(origin); + return req; + } + + private native long rGet( + long win, Buffer origin, int orgCount, long orgType, + int targetRank, int targetDisp, int targetCount, long targetType, + int baseType) throws MPIException; + + /** + * Java binding of {@code MPI_RACCUMULATE}. + * @param origin origin buffer + * @param orgCount number of entries in origin buffer + * @param orgType datatype of each entry in origin buffer + * @param targetRank rank of target + * @param targetDisp displacement from start of window to target buffer + * @param targetCount number of entries in target buffer + * @param targetType datatype of each entry in target buffer + * @param op reduce operation + * @return RMA request + * @throws MPIException Signals that an MPI exception of some sort has occurred. + */ + public Request rAccumulate(Buffer origin, int orgCount, Datatype orgType, + int targetRank, int targetDisp, int targetCount, + Datatype targetType, Op op) + throws MPIException + { + MPI.check(); + + if(!origin.isDirect()) + throw new IllegalArgumentException("The origin must be direct buffer."); + Request req = new Request(rAccumulate(handle, origin, orgCount, orgType.handle, + targetRank, targetDisp, targetCount, targetType.handle, + op, op.handle, getBaseType(orgType, targetType))); + req.addSendBufRef(origin); + return req; + } + + private native long rAccumulate( + long win, Buffer origin, int orgCount, long orgType, + int targetRank, int targetDisp, int targetCount, long targetType, + Op jOp, long hOp, int baseType) throws MPIException; + + /** + * Java binding of {@code MPI_GET_ACCUMULATE}. + * @param origin origin buffer + * @param orgCount number of entries in origin buffer + * @param orgType datatype of each entry in origin buffer + * @param resultAddr result buffer + * @param resultCount number of entries in result buffer + * @param resultType datatype of each entry in result buffer + * @param targetRank rank of target + * @param targetDisp displacement from start of window to target buffer + * @param targetCount number of entries in target buffer + * @param targetType datatype of each entry in target buffer + * @param op reduce operation + * @throws MPIException Signals that an MPI exception of some sort has occurred. + */ + + public void getAccumulate(Buffer origin, int orgCount, Datatype orgType, + Buffer resultAddr, int resultCount, Datatype resultType, + int targetRank, int targetDisp, int targetCount, + Datatype targetType, Op op) + throws MPIException + { + MPI.check(); + + if(!origin.isDirect()) + throw new IllegalArgumentException("The origin must be direct buffer."); + + getAccumulate(handle, origin, orgCount, orgType.handle, + resultAddr, resultCount, resultType.handle, + targetRank, targetDisp, targetCount, targetType.handle, + op, op.handle, getBaseType(orgType, targetType)); + } + + private native void getAccumulate( + long win, Buffer origin, int orgCount, long orgType, + Buffer resultAddr, int resultCount, long resultType, + int targetRank, int targetDisp, int targetCount, long targetType, + Op jOp, long hOp, int baseType) throws MPIException; + + /** + * Java binding of {@code MPI_RGET_ACCUMULATE}. + * @param origin origin buffer + * @param orgCount number of entries in origin buffer + * @param orgType datatype of each entry in origin buffer + * @param resultAddr result buffer + * @param resultCount number of entries in result buffer + * @param resultType datatype of each entry in result buffer + * @param targetRank rank of target + * @param targetDisp displacement from start of window to target buffer + * @param targetCount number of entries in target buffer + * @param targetType datatype of each entry in target buffer + * @param op reduce operation + * @return RMA request + * @throws MPIException Signals that an MPI exception of some sort has occurred. + */ + + public Request rGetAccumulate(Buffer origin, int orgCount, Datatype orgType, + Buffer resultAddr, int resultCount, Datatype resultType, + int targetRank, int targetDisp, int targetCount, + Datatype targetType, Op op) + throws MPIException + { + MPI.check(); + + if(!origin.isDirect()) + throw new IllegalArgumentException("The origin must be direct buffer."); + Request req = new Request(rGetAccumulate(handle, origin, orgCount, orgType.handle, + resultAddr, resultCount, resultType.handle, + targetRank, targetDisp, targetCount, targetType.handle, + op, op.handle, getBaseType(orgType, targetType))); + req.addRecvBufRef(origin); + return req; + } + + private native long rGetAccumulate( + long win, Buffer origin, int orgCount, long orgType, + Buffer resultAddr, int resultCount, long resultType, + int targetRank, int targetDisp, int targetCount, long targetType, + Op jOp, long hOp, int baseType) throws MPIException; + + /** + * Java binding of the MPI operation {@code MPI_WIN_LOCK_ALL}. + * @param assertion program assertion + * @throws MPIException Signals that an MPI exception of some sort has occurred. + */ + public void lockAll(int assertion) throws MPIException + { + MPI.check(); + lockAll(handle, assertion); + } + + private native void lockAll(long win, int assertion) + throws MPIException; + + /** + * Java binding of the MPI operation {@code MPI_WIN_UNLOCK_ALL}. + * @throws MPIException Signals that an MPI exception of some sort has occurred. + */ + public void unlockAll() throws MPIException + { + MPI.check(); + unlockAll(handle); + } + + private native void unlockAll(long win) throws MPIException; + + /** + * Java binding of the MPI operation {@code MPI_WIN_SYNC}. + * @throws MPIException Signals that an MPI exception of some sort has occurred. + */ + public void sync() throws MPIException + { + MPI.check(); + sync(handle); + } + + private native void sync(long win) throws MPIException; + + /** + * Java binding of the MPI operation {@code MPI_WIN_FLUSH}. + * @param targetRank rank of target window + * @throws MPIException Signals that an MPI exception of some sort has occurred. + */ + public void flush(int targetRank) throws MPIException + { + MPI.check(); + flush(handle, targetRank); + } + + private native void flush(long win, int targetRank) throws MPIException; + + /** + * Java binding of the MPI operation {@code MPI_WIN_FLUSH_ALL}. + * @throws MPIException Signals that an MPI exception of some sort has occurred. + */ + public void flushAll() throws MPIException + { + MPI.check(); + flushAll(handle); + } + + private native void flushAll(long win) throws MPIException; + + /** + * Java binding of {@code MPI_COMPARE_AND_SWAP}. + * @param origin origin buffer + * @param compareAddr compare buffer + * @param resultAddr result buffer + * @param targetType datatype of each entry in target buffer + * @param targetRank rank of target + * @param targetDisp displacement from start of window to target buffer + * @throws MPIException Signals that an MPI exception of some sort has occurred. + */ + + public void compareAndSwap(Buffer origin, Buffer compareAddr, Buffer resultAddr, + Datatype targetType, int targetRank, int targetDisp) + throws MPIException + { + MPI.check(); + + if(!origin.isDirect()) + throw new IllegalArgumentException("The origin must be direct buffer."); + + compareAndSwap(handle, origin, compareAddr, resultAddr, + targetType.handle, targetRank, targetDisp); + } + + private native void compareAndSwap( + long win, Buffer origin, Buffer compareAddr, Buffer resultAddr, + long targetType, int targetRank, int targetDisp) throws MPIException; + + /** + * Java binding of {@code MPI_FETCH_AND_OP}. + * @param origin origin buffer + * @param resultAddr result buffer + * @param dataType datatype of entry in origin, result, and target buffers + * @param targetRank rank of target + * @param targetDisp displacement from start of window to target buffer + * @param op reduce operation + * @throws MPIException Signals that an MPI exception of some sort has occurred. + */ + + public void fetchAndOp(Buffer origin, Buffer resultAddr, Datatype dataType, + int targetRank, int targetDisp, Op op) + throws MPIException + { + MPI.check(); + + if(!origin.isDirect()) + throw new IllegalArgumentException("The origin must be direct buffer."); + + fetchAndOp(handle, origin, resultAddr, dataType.handle, targetRank, + targetDisp, op, op.handle, getBaseType(dataType, dataType)); + } + + private native void fetchAndOp( + long win, Buffer origin, Buffer resultAddr, long targetType, int targetRank, + int targetDisp, Op jOp, long hOp, int baseType) throws MPIException; + + /** + * Java binding of the MPI operation {@code MPI_WIN_FLUSH_LOCAL}. + * @param targetRank rank of target window + * @throws MPIException Signals that an MPI exception of some sort has occurred. + */ + + public void flushLocal(int targetRank) throws MPIException + { + MPI.check(); + flushLocal(handle, targetRank); + } + + private native void flushLocal(long win, int targetRank) throws MPIException; + + /** + * Java binding of the MPI operation {@code MPI_WIN_FLUSH_LOCAL_ALL}. + * @throws MPIException Signals that an MPI exception of some sort has occurred. + */ + + public void flushLocalAll() throws MPIException + { + MPI.check(); + flushLocalAll(handle); + } + + private native void flushLocalAll(long win) throws MPIException; + + /** + * Java binding of the MPI operation {@code MPI_WIN_GET_NAME}. + * @return the name associated with this window + * @throws MPIException Signals that an MPI exception of some sort has occurred. + */ + public String getName() throws MPIException + { + MPI.check(); + return getName(handle); + } + + private native String getName(long handle) throws MPIException; + + /** + * Java binding of the MPI operation {@code MPI_WIN_SET_NAME}. + * @param name the name to associate with this window + * @throws MPIException Signals that an MPI exception of some sort has occurred. + */ + public void setName(String name) throws MPIException + { + MPI.check(); + setName(handle, name); + } + + private native void setName(long handle, String name) throws MPIException; } // Win diff --git a/ompi/mpi/man/man3/MPI.3in b/ompi/mpi/man/man3/MPI.3in index 09f1cac1ae2..9e99d67dc4d 100644 --- a/ompi/mpi/man/man3/MPI.3in +++ b/ompi/mpi/man/man3/MPI.3in @@ -1,10 +1,10 @@ .\" -*- nroff -*- -.\" Copyright 2008 Sun Microsystems, Inc. +.\" Copyright 2008 Sun Microsystems, Inc. .\" Copyright (c) 1996 Thinking Machines Corporation .\" $COPYRIGHT$ .TH MPI 3 "#OMPI_DATE#" "#PACKAGE_VERSION#" "#PACKAGE_NAME#" .SH NAME -MPI \- General information #PACKAGE_NAME# #PACKAGE_VERSION#. +MPI \- General information #PACKAGE_NAME# #PACKAGE_VERSION#. .SH MPI .ft R @@ -37,11 +37,11 @@ The MPI standards are available at the following URL: .SH MAN PAGE SYNTAX .ft R -Man pages for Open MPI and Open MPI I/O routines are named according to C syntax, that is, they begin with the prefix "MPI_", all in uppercase, and the first letter following the "MPI_" prefix is also uppercase. The rest of the letters in the routine are all lowercase, for example, "MPI_Comm_get_attr". +Man pages for Open MPI and Open MPI I/O routines are named according to C syntax, that is, they begin with the prefix "MPI_", all in uppercase, and the first letter following the "MPI_" prefix is also uppercase. The rest of the letters in the routine are all lowercase, for example, "MPI_Comm_get_attr". .SH ENVIRONMENT .ft R -To fine-tune your Open MPI environment, you can either use arguments to the \fImpirun\fP, \fIorterun\fP, or \fImpiexec\fP commands, or you can use MCA parameters. +To fine-tune your Open MPI environment, you can either use arguments to the \fImpirun\fP, \fIorterun\fP, or \fImpiexec\fP commands, or you can use MCA parameters. .sp For more information on arguments, see the orterun.1 man page. .sp @@ -59,92 +59,136 @@ called. By default, this error handler aborts the MPI job. The error handler may be changed with MPI_Comm_set_errhandler; the predefined error handler MPI_ERRORS_RETURN may be used to cause error values to be returned. Note that MPI does not guarantee that an MPI program can continue past -an error. +an error. .sp For more information on Open MPI error codes, see \fImpi.h\fP in the \fIinclude\fP directory. .sp Standard error return classes for Open MPI: .sp .nf -MPI_SUCCESS 0 Successful return code. -MPI_ERR_BUFFER 1 Invalid buffer pointer. -MPI_ERR_COUNT 2 Invalid count argument. -MPI_ERR_TYPE 3 Invalid datatype argument. -MPI_ERR_TAG 4 Invalid tag argument. -MPI_ERR_COMM 5 Invalid communicator. - -MPI_ERR_RANK 6 Invalid rank. -MPI_ERR_REQUEST 7 Invalid MPI_Request handle. -MPI_ERR_ROOT 7 Invalid root. -MPI_ERR_GROUP 8 Null group passed to function. -MPI_ERR_OP 9 Invalid operation. -MPI_ERR_TOPOLOGY 10 Invalid topology. - -MPI_ERR_DIMS 11 Illegal dimension argument. -MPI_ERR_ARG 12 Invalid argument. -MPI_ERR_UNKNOWN 13 Unknown error. -MPI_ERR_TRUNCATE 14 Message truncated on receive. -MPI_ERR_OTHER 15 Other error; use Error_string. - -MPI_ERR_INTERN 16 Internal error code. -MPI_ERR_IN_STATUS 17 Look in status for error value. -MPI_ERR_PENDING 18 Pending request. -MPI_ERR_ACCESS 19 Permission denied. -MPI_ERR_AMODE 20 Unsupported amode passed to open. - -MPI_ERR_ASSERT 21 Invalid assert. -MPI_ERR_BAD_FILE 22 Invalid file name (for example, - path name too long). -MPI_ERR_BASE 23 Invalid base. -MPI_ERR_CONVERSION 24 An error occurred in a user-supplied - data-conversion function. -MPI_ERR_DISP 25 Invalid displacement. - -MPI_ERR_DUP_DATAREP 26 Conversion functions could not - be registered because a data - representation identifier that was - already defined was passed to - MPI_REGISTER_DATAREP. -MPI_ERR_FILE_EXISTS 27 File exists. -MPI_ERR_FILE_IN_USE 28 File operation could not be - completed, as the file is currently - open by some process. -MPI_ERR_FILE 29 -MPI_ERR_INFO_KEY 30 Illegal info key. - -MPI_ERR_INFO_NOKEY 31 No such key. -MPI_ERR_INFO_VALUE 32 Illegal info value. -MPI_ERR_INFO 33 Invalid info object. -MPI_ERR_IO 34 I/O error. -MPI_ERR_KEYVAL 35 Illegal key value. - -MPI_ERR_LOCKTYPE 36 Invalid locktype. -MPI_ERR_NAME 37 Name not found. -MPI_ERR_NO_MEM 38 Memory exhausted. -MPI_ERR_NOT_SAME 39 -MPI_ERR_NO_SPACE 40 Not enough space. - -MPI_ERR_NO_SUCH_FILE 41 File (or directory) does not exist. -MPI_ERR_PORT 42 Invalid port. -MPI_ERR_QUOTA 43 Quota exceeded. -MPI_ERR_READ_ONLY 44 Read-only file system. -MPI_ERR_RMA_CONFLICT 45 Conflicting accesses to window. - -MPI_ERR_RMA_SYNC 46 Erroneous RMA synchronization. -MPI_ERR_SERVICE 47 Invalid publish/unpublish. -MPI_ERR_SIZE 48 Invalid size. -MPI_ERR_SPAWN 49 Error spawning. -MPI_ERR_UNSUPPORTED_DATAREP - 50 Unsupported datarep passed to - MPI_File_set_view. - +MPI_SUCCESS 0 Successful return code. +MPI_ERR_BUFFER 1 Invalid buffer pointer. +MPI_ERR_COUNT 2 Invalid count argument. +MPI_ERR_TYPE 3 Invalid datatype argument. +MPI_ERR_TAG 4 Invalid tag argument. +MPI_ERR_COMM 5 Invalid communicator. + +MPI_ERR_RANK 6 Invalid rank. +MPI_ERR_REQUEST 7 Invalid MPI_Request handle. +MPI_ERR_ROOT 8 Invalid root. +MPI_ERR_GROUP 9 Null group passed to function. +MPI_ERR_OP 10 Invalid operation. + +MPI_ERR_TOPOLOGY 11 Invalid topology. +MPI_ERR_DIMS 12 Illegal dimension argument. +MPI_ERR_ARG 13 Invalid argument. +MPI_ERR_UNKNOWN 14 Unknown error. +MPI_ERR_TRUNCATE 15 Message truncated on receive. + +MPI_ERR_OTHER 16 Other error; use Error_string. +MPI_ERR_INTERN 17 Internal error code. +MPI_ERR_IN_STATUS 18 Look in status for error value. +MPI_ERR_PENDING 19 Pending request. +MPI_ERR_ACCESS 20 Permission denied. + +MPI_ERR_AMODE 21 Unsupported amode passed to open. +MPI_ERR_ASSERT 22 Invalid assert. +MPI_ERR_BAD_FILE 23 Invalid file name (for example, + path name too long). +MPI_ERR_BASE 24 Invalid base. +MPI_ERR_CONVERSION 25 An error occurred in a user-supplied + data-conversion function. + +MPI_ERR_DISP 26 Invalid displacement. +MPI_ERR_DUP_DATAREP 27 Conversion functions could not + be registered because a data + representation identifier that was + already defined was passed to + MPI_REGISTER_DATAREP. +MPI_ERR_FILE_EXISTS 28 File exists. +MPI_ERR_FILE_IN_USE 29 File operation could not be + completed, as the file is currently + open by some process. +MPI_ERR_FILE 30 Invalid file handle. + +MPI_ERR_INFO_KEY 31 Illegal info key. +MPI_ERR_INFO_NOKEY 32 No such key. +MPI_ERR_INFO_VALUE 33 Illegal info value. +MPI_ERR_INFO 34 Invalid info object. +MPI_ERR_IO 35 I/O error. + +MPI_ERR_KEYVAL 36 Illegal key value. +MPI_ERR_LOCKTYPE 37 Invalid locktype. +MPI_ERR_NAME 38 Name not found. +MPI_ERR_NO_MEM 39 Memory exhausted. +MPI_ERR_NOT_SAME 40 Collective argument not identical + on all processes, or collective + routines called in a different order + by different processes. + +MPI_ERR_NO_SPACE 41 Not enough space. +MPI_ERR_NO_SUCH_FILE 42 File (or directory) does not exist. +MPI_ERR_PORT 43 Invalid port. +MPI_ERR_QUOTA 44 Quota exceeded. +MPI_ERR_READ_ONLY 45 Read-only file system. + +MPI_ERR_RMA_CONFLICT 46 Conflicting accesses to window. +MPI_ERR_RMA_SYNC 47 Erroneous RMA synchronization. +MPI_ERR_SERVICE 48 Invalid publish/unpublish. +MPI_ERR_SIZE 49 Invalid size. +MPI_ERR_SPAWN 50 Error spawning. + +MPI_ERR_UNSUPPORTED_DATAREP + 51 Unsupported datarep passed to + MPI_File_set_view. MPI_ERR_UNSUPPORTED_OPERATION - 51 Unsupported operation, such as - seeking on a file that supports - only sequential access. -MPI_ERR_WIN 52 Invalid window. -MPI_ERR_LASTCODE 53 Last error code. - -MPI_ERR_SYSRESOURCE -2 Out of resources + 52 Unsupported operation, such as + seeking on a file that supports + only sequential access. +MPI_ERR_WIN 53 Invalid window. +MPI_T_ERR_MEMORY 54 Out of memory. +MPI_T_ERR_NOT_INITIALIZED 55 Interface not initialized. + +MPI_T_ERR_CANNOT_INIT 56 Interface not in the state to be + initialized. +MPI_T_ERR_INVALID_INDEX 57 The enumeration index is invalid. +MPI_T_ERR_INVALID_ITEM 58 The item index queried is out of + range. +MPI_T_ERR_INVALID_HANDLE 59 The handle is invalid. +MPI_T_ERR_OUT_OF_HANDLES 60 No more handles available. + +MPI_T_ERR_OUT_OF_SESSIONS 61 No more sessions available. +MPI_T_ERR_INVALID_SESSION 62 Session argument is not a valid + session. +MPI_T_ERR_CVAR_SET_NOT_NOW + 63 Variable cannot be set at this + moment. +MPI_T_ERR_CVAR_SET_NEVER 64 Variable cannot be set until end of + execution. +MPI_T_ERR_PVAR_NO_STARTSTOP + 65 Variable cannot be started or stopped. + +MPI_T_ERR_PVAR_NO_WRITE 66 Variable cannot be written or reset. +MPI_T_ERR_PVAR_NO_ATOMIC 67 Variable cannot be read and written + atomically. +MPI_ERR_RMA_RANGE 68 Target memory is not part of the + window (in the case of a window + created with MPI_WIN_CREATE_DYNAMIC, + target memory is not attached). +MPI_ERR_RMA_ATTACH 69 Memory cannot be attached (e.g., + because of resource exhaustion). +MPI_ERR_RMA_FLAVOR 70 Passed window has the wrong flavor + for the called function. + +MPI_ERR_RMA_SHARED 71 Memory cannot be shared (e.g., some + process in the group of the specified + communicator cannot expose shared + memory). +MPI_T_ERR_INVALID 72 Invalid use of the interface or bad + parameter values(s). +MPI_T_ERR_INVALID_NAME 73 The variable or category name is + invalid. + +MPI_ERR_LASTCODE 93 Last error code. .fi diff --git a/ompi/mpi/man/man3/MPI_Abort.3in b/ompi/mpi/man/man3/MPI_Abort.3in index b10af29d3c3..8b292f9f889 100644 --- a/ompi/mpi/man/man3/MPI_Abort.3in +++ b/ompi/mpi/man/man3/MPI_Abort.3in @@ -19,8 +19,8 @@ int MPI_Abort(MPI_Comm \fIcomm\fP, int\fI errorcode\fP) .ft R .nf INCLUDE 'mpif.h' -MPI_ABORT(\fICOMM\fP, \fIERRORCODE\fP, \fIIERROR\fP) - INTEGER \fICOMM\fP,\fI ERRORCODE\fP,\fI IERROR +MPI_ABORT(\fICOMM\fP, \fIERRORCODE\fP, \fIIERROR\fP) + INTEGER \fICOMM\fP,\fI ERRORCODE\fP,\fI IERROR .fi .SH C++ Syntax @@ -32,17 +32,17 @@ void Comm::Abort(int \fIerrorcode\fP) .SH INPUT PARAMETERS .ft R .TP 1i -comm +comm Communicator of tasks to abort. .TP 1i -errorcode +errorcode Error code to return to invoking environment. .SH OUTPUT PARAMETER .ft R .TP 1i IERROR -Fortran only: Error status (integer). +Fortran only: Error status (integer). .SH DESCRIPTION .ft R @@ -61,5 +61,5 @@ Almost all MPI routines return an error value; C routines as the value of the fu .sp Before the error value is returned, the current MPI error handler is called. By default, this error handler aborts the MPI job, except for I/O function errors. The error handler -may be changed with MPI_Comm_set_errhandler; the predefined error handler MPI_ERRORS_RETURN may be used to cause error values to be returned. Note that MPI does not guarantee that an MPI program can continue past an error. +may be changed with MPI_Comm_set_errhandler; the predefined error handler MPI_ERRORS_RETURN may be used to cause error values to be returned. Note that MPI does not guarantee that an MPI program can continue past an error. diff --git a/ompi/mpi/man/man3/MPI_Accumulate.3in b/ompi/mpi/man/man3/MPI_Accumulate.3in index ccb25e049fd..4d11966b963 100644 --- a/ompi/mpi/man/man3/MPI_Accumulate.3in +++ b/ompi/mpi/man/man3/MPI_Accumulate.3in @@ -14,8 +14,8 @@ .nf #include int MPI_Accumulate(const void *\fIorigin_addr\fP, int \fIorigin_count\fP, - MPI_Datatype \fIorigin_datatype\fP, int \fItarget_rank\fP, - MPI_Aint \fItarget_disp\fP, int \fItarget_count\fP, + MPI_Datatype \fIorigin_datatype\fP, int \fItarget_rank\fP, + MPI_Aint \fItarget_disp\fP, int \fItarget_count\fP, MPI_Datatype \fItarget_datatype\fP, MPI_Op \fIop\fP, MPI_Win \fIwin\fP) int MPI_Raccumulate(const void *\fIorigin_addr\fP, int \fIorigin_count\fP, @@ -28,11 +28,11 @@ int MPI_Raccumulate(const void *\fIorigin_addr\fP, int \fIorigin_count\fP, .SH Fortran Syntax (see FORTRAN 77 NOTES) .nf INCLUDE 'mpif.h' -MPI_ACCUMULATE(\fIORIGIN_ADDR, ORIGIN_COUNT, ORIGIN_DATATYPE, TARGET_RANK, - TARGET_DISP, TARGET_COUNT, TARGET_DATATYPE, OP, WIN, IERROR\fP) - \fIORIGIN_ADDR\fP(*) - INTEGER(KIND=MPI_ADDRESS_KIND) \fITARGET_DISP\fP - INTEGER \fIORIGIN_COUNT, ORIGIN_DATATYPE, TARGET_RANK, TARGET_COUNT, +MPI_ACCUMULATE(\fIORIGIN_ADDR, ORIGIN_COUNT, ORIGIN_DATATYPE, TARGET_RANK, + TARGET_DISP, TARGET_COUNT, TARGET_DATATYPE, OP, WIN, IERROR\fP) + \fIORIGIN_ADDR\fP(*) + INTEGER(KIND=MPI_ADDRESS_KIND) \fITARGET_DISP\fP + INTEGER \fIORIGIN_COUNT, ORIGIN_DATATYPE, TARGET_RANK, TARGET_COUNT, TARGET_DATATYPE, OP, WIN, IERROR \fP MPI_RACCUMULATE(\fIORIGIN_ADDR, ORIGIN_COUNT, ORIGIN_DATATYPE, TARGET_RANK, @@ -46,10 +46,10 @@ MPI_RACCUMULATE(\fIORIGIN_ADDR, ORIGIN_COUNT, ORIGIN_DATATYPE, TARGET_RANK, .SH C++ Syntax .nf #include -void MPI::Win::Accumulate(const void* \fIorigin_addr\fP, int \fIorigin_count\fP, - const MPI::Datatype& \fIorigin_datatype\fP, int \fItarget_rank\fP, - MPI::Aint \fItarget_disp\fP, int \fItarget_count\fP, const MPI::Datatype& - \fItarget_datatype\fP, const MPI::Op& \fIop\fP) const +void MPI::Win::Accumulate(const void* \fIorigin_addr\fP, int \fIorigin_count\fP, + const MPI::Datatype& \fIorigin_datatype\fP, int \fItarget_rank\fP, + MPI::Aint \fItarget_disp\fP, int \fItarget_count\fP, const MPI::Datatype& + \fItarget_datatype\fP, const MPI::Op& \fIop\fP) const .fi .SH INPUT PARAMETERS @@ -72,11 +72,11 @@ Rank of target (nonnegative integer). .ft R .TP 1i target_disp -Displacement from start of window to beginning of target buffer (nonnegative integer). +Displacement from start of window to beginning of target buffer (nonnegative integer). .ft R .TP 1i target_count -Number of entries in target buffer (nonnegative integer). +Number of entries in target buffer (nonnegative integer). .ft R .TP 1i target_datatype @@ -84,11 +84,11 @@ Data type of each entry in target buffer (handle). .ft R .TP 1i op -Reduce operation (handle). +Reduce operation (handle). .ft R .TP 1i win -Window object (handle). +Window object (handle). .SH OUTPUT PARAMETER .ft R @@ -96,17 +96,17 @@ Window object (handle). MPI_Raccumulate: RMA request .TP 1i IERROR -Fortran only: Error status (integer). +Fortran only: Error status (integer). .SH DESCRIPTION .ft R \fBMPI_Accumulate\fP is a function used for one-sided MPI communication that adds the contents of the origin buffer (as defined by \fIorigin_addr\fP, \fIorigin_count\fP, and \fIorigin_datatype\fP) to the buffer specified by the arguments \fItarget_count\fP and \fItarget_datatype\fP, at offset \fItarget_disp\fP, in the target window specified by \fItarget_rank\fP and \fIwin\fP, using the operation \fIop\fP. The target window can only be accessed by processes within the same node. This is similar to MPI_Put, except that data is combined into the target area instead of overwriting it. .sp -Any of the predefined operations for MPI_Reduce can be used. User-defined functions cannot be used. For example, if \fIop\fP is MPI_SUM, each element of the origin buffer is added to the corresponding element in the target, replacing the former value in the target. +Any of the predefined operations for MPI_Reduce can be used. User-defined functions cannot be used. For example, if \fIop\fP is MPI_SUM, each element of the origin buffer is added to the corresponding element in the target, replacing the former value in the target. .sp -Each datatype argument must be a predefined data type or a derived data type, where all basic components are of the same predefined data type. Both datatype arguments must be constructed from the same predefined data type. The operation \fIop\fP applies to elements of that predefined type. The \fItarget_datatype\fP argument must not specify overlapping entries, and the target buffer must fit in the target window. +Each datatype argument must be a predefined data type or a derived data type, where all basic components are of the same predefined data type. Both datatype arguments must be constructed from the same predefined data type. The operation \fIop\fP applies to elements of that predefined type. The \fItarget_datatype\fP argument must not specify overlapping entries, and the target buffer must fit in the target window. .sp -A new predefined operation, MPI_REPLACE, is defined. It corresponds to the associative function f(a, b) =b; that is, the current value in the target memory is replaced by the value supplied by the origin. +A new predefined operation, MPI_REPLACE, is defined. It corresponds to the associative function f(a, b) =b; that is, the current value in the target memory is replaced by the value supplied by the origin. .sp \fBMPI_Raccumulate\fP is similar to \fBMPI_Accumulate\fP, except that it allocates a communication request object and associates it with the request handle (the argument \fIrequest\fP) that can be used to wait or test for completion. The completion of an \fBMPI_Raccumulate\fP operation indicates that the \fIorigin_addr\fP buffer is free to be updated. It does not indicate that the operation has completed at the target window. @@ -136,11 +136,11 @@ Almost all MPI routines return an error value; C routines as the value of the fu .sp Before the error value is returned, the current MPI error handler is called. By default, this error handler aborts the MPI job, except for I/O function errors. The error handler -may be changed with MPI_Comm_set_errhandler; the predefined error handler MPI_ERRORS_RETURN may be used to cause error values to be returned. Note that MPI does not guarantee that an MPI program can continue past an error. +may be changed with MPI_Comm_set_errhandler; the predefined error handler MPI_ERRORS_RETURN may be used to cause error values to be returned. Note that MPI does not guarantee that an MPI program can continue past an error. .SH SEE ALSO .ft R .sp -MPI_Put +MPI_Put MPI_Get_accumulate MPI_Reduce diff --git a/ompi/mpi/man/man3/MPI_Add_error_class.3in b/ompi/mpi/man/man3/MPI_Add_error_class.3in index dbb85df2f1a..e1a8ac872a4 100644 --- a/ompi/mpi/man/man3/MPI_Add_error_class.3in +++ b/ompi/mpi/man/man3/MPI_Add_error_class.3in @@ -23,7 +23,7 @@ int MPI_Add_error_class(int *\fIerrorclass\fP) .nf INCLUDE 'mpif.h' MPI_ADD_ERROR_CLASS(\fIERRORCLASS, IERROR\fP) - INTEGER \fIERRORCLASS, IERROR\fP + INTEGER \fIERRORCLASS, IERROR\fP .fi .SH C++ Syntax @@ -40,7 +40,7 @@ New error class (integer). .ft R .TP 1.4i IERROR -Fortran only: Error status (integer). +Fortran only: Error status (integer). .SH DESCRIPTION .ft R @@ -78,7 +78,7 @@ called. By default, this error handler aborts the MPI job, except for I/O function errors. The error handler may be changed with MPI_Comm_set_errhandler; the predefined error handler MPI_ERRORS_RETURN may be used to cause error values to be returned. Note that MPI does not -guarantee that an MPI program can continue past an error. +guarantee that an MPI program can continue past an error. .SH SEE ALSO .ft R diff --git a/ompi/mpi/man/man3/MPI_Add_error_code.3in b/ompi/mpi/man/man3/MPI_Add_error_code.3in index 9412f6c0f37..ab4fa8b2f75 100644 --- a/ompi/mpi/man/man3/MPI_Add_error_code.3in +++ b/ompi/mpi/man/man3/MPI_Add_error_code.3in @@ -45,7 +45,7 @@ Error code returned by an MPI routine or an MPI error class (integer). .ft R .TP 1.4i IERROR -Fortran only: Error status (integer). +Fortran only: Error status (integer). .SH DESCRIPTION Creates a new error code associated with \fIerrorclass\fP and returns @@ -71,7 +71,7 @@ called. By default, this error handler aborts the MPI job, except for I/O function errors. The error handler may be changed with MPI_Comm_set_errhandler; the predefined error handler MPI_ERRORS_RETURN may be used to cause error values to be returned. Note that MPI does not -guarantee that an MPI program can continue past an error. +guarantee that an MPI program can continue past an error. .SH SEE ALSO .ft R diff --git a/ompi/mpi/man/man3/MPI_Add_error_string.3in b/ompi/mpi/man/man3/MPI_Add_error_string.3in index baeab402afa..0956bccba63 100644 --- a/ompi/mpi/man/man3/MPI_Add_error_string.3in +++ b/ompi/mpi/man/man3/MPI_Add_error_string.3in @@ -25,7 +25,7 @@ int MPI_Add_error_string(int \fIerrorcode\fP, const char *\fIstring\fP) INCLUDE 'mpif.h' MPI_ADD_ERROR_STRING(\fIERRORCODE, STRING, IERROR\fP) INTEGER \fIERRORCODE, IERROR\fP - CHARACTER*(*) \fISTRING\fP + CHARACTER*(*) \fISTRING\fP .fi .SH C++ Syntax @@ -48,7 +48,7 @@ Text that corresponds to the error code or class (string). .ft R .TP 1.4i IERROR -Fortran only: Error status (integer). +Fortran only: Error status (integer). .SH DESCRIPTION .ft R @@ -73,7 +73,7 @@ called. By default, this error handler aborts the MPI job, except for I/O function errors. The error handler may be changed with MPI_Comm_set_errhandler; the predefined error handler MPI_ERRORS_RETURN may be used to cause error values to be returned. Note that MPI does not -guarantee that an MPI program can continue past an error. +guarantee that an MPI program can continue past an error. .SH SEE ALSO .ft R diff --git a/ompi/mpi/man/man3/MPI_Address.3in b/ompi/mpi/man/man3/MPI_Address.3in index f2a98164e4b..83fef395e36 100644 --- a/ompi/mpi/man/man3/MPI_Address.3in +++ b/ompi/mpi/man/man3/MPI_Address.3in @@ -11,15 +11,15 @@ .ft R .SH C Syntax .nf -#include +#include int MPI_Address(void *\fIlocation\fP, MPI_Aint\fP *address\fP) .fi .SH Fortran Syntax .nf INCLUDE 'mpif.h' -MPI_ADDRESS(\fILOCATION\fP,\fI ADDRESS\fP,\fI IERROR\fP) - \fILOCATION\fP (*) +MPI_ADDRESS(\fILOCATION\fP,\fI ADDRESS\fP,\fI IERROR\fP) + \fILOCATION\fP (*) INTEGER \fIADDRESS\fP,\fI IERROR\fP @@ -38,33 +38,33 @@ Address of location (integer). .ft R .TP 1i IERROR -Fortran only: Error status (integer). +Fortran only: Error status (integer). .SH DESCRIPTION .ft R -Note that use of this routine is \fIdeprecated\fP as of MPI-2. Please use MPI_Get_address instead. +Note that use of this routine is \fIdeprecated\fP as of MPI-2. Please use MPI_Get_address instead. .sp -This deprecated routine is not available in C++. +This deprecated routine is not available in C++. .sp The address of a location in memory can be found by invoking this function. Returns the (byte) address of location. .sp -Example: Using MPI_Address for an array. +Example: Using MPI_Address for an array. .sp .nf -REAL A(100,100) +REAL A(100,100) .fi .br - INTEGER I1, I2, DIFF + INTEGER I1, I2, DIFF .br - CALL MPI_ADDRESS(A(1,1), I1, IERROR) + CALL MPI_ADDRESS(A(1,1), I1, IERROR) .br - CALL MPI_ADDRESS(A(10,10), I2, IERROR) + CALL MPI_ADDRESS(A(10,10), I2, IERROR) .br - DIFF = I2 - I1 + DIFF = I2 - I1 .br -! The value of DIFF is 909*sizeofreal; the values of I1 and I2 are +! The value of DIFF is 909*sizeofreal; the values of I1 and I2 are .br -! implementation dependent. +! implementation dependent. .fi .SH NOTES @@ -75,7 +75,7 @@ C users may be tempted to avoid using MPI_Address and rely on the availability of the address operator &. Note, however, that & cast-expression is a pointer, not an address. ANSI C does not require that the value of a pointer (or the pointer -cast to int) be the absolute address of the object pointed at although +cast to int) be the absolute address of the object pointed at although this is commonly the case. Furthermore, referencing may not have a unique definition on machines with a segmented address space. The use of MPI_Address to "reference" C variables guarantees portability to @@ -86,7 +86,7 @@ Almost all MPI routines return an error value; C routines as the value of the fu .sp Before the error value is returned, the current MPI error handler is called. By default, this error handler aborts the MPI job, except for I/O function errors. The error handler -may be changed with MPI_Comm_set_errhandler; the predefined error handler MPI_ERRORS_RETURN may be used to cause error values to be returned. Note that MPI does not guarantee that an MPI program can continue past an error. +may be changed with MPI_Comm_set_errhandler; the predefined error handler MPI_ERRORS_RETURN may be used to cause error values to be returned. Note that MPI does not guarantee that an MPI program can continue past an error. .SH SEE ALSO MPI_Get_address diff --git a/ompi/mpi/man/man3/MPI_Aint_add.3in b/ompi/mpi/man/man3/MPI_Aint_add.3in new file mode 100644 index 00000000000..bd0ac974dc5 --- /dev/null +++ b/ompi/mpi/man/man3/MPI_Aint_add.3in @@ -0,0 +1,83 @@ +.\" -*- nroff -*- +.\" Copyright 2013-2015 Los Alamos National Security, LLC. All rights reserved. +.\" Copyright 2010 Cisco Systems, Inc. All rights reserved. +.\" Copyright 2006-2008 Sun Microsystems, Inc. +.\" Copyright (c) 1996 Thinking Machines Corporation +.\" $COPYRIGHT$ +.TH MPI_Aint_add 3 "#OMPI_DATE#" "#PACKAGE_VERSION#" "#PACKAGE_NAME#" +.SH NAME +\fBMPI_Aint_add\fP, \fBMPI_Aint_diff\fP \- Portable functions for +arithmetic on MPI_Aint values. + +.SH SYNTAX +.ft R +.SH C Syntax +.nf +#include +MPI_Aint MPI_Aint_add(MPI_Aint \fIbase\fP, MPI_Aint \fIdisp\fP) + +MPI_Aint MPI_Aint_diff(MPI_Aint \fIaddr1\fP, MPI_Aint \fIaddr2\fP) + +.fi +.SH Fortran Syntax +.nf +INCLUDE 'mpif.h' +INTEGER(KIND=MPI_ADDRESS_KIND) MPI_AINT_ADD(\fIBASE, DISP\fP) + INTEGER(KIND=MPI_ADDRESS_KIND) \fIBASE, DISP\fP + +INTEGER(KIND=MPI_ADDRESS_KIND) MPI_AINT_DIFF(\fIADDR1, ADDR2\fP) + INTEGER(KIND=MPI_ADDRESS_KIND) \fIADDR1, ADDR2\fP + +.fi +.SH INPUT PARAMETERS +.ft R +.TP 1i +base +Base address (integer). +.ft R +.TP 1i +disp +Displacement (integer). +.ft R +.TP 1i +addr1 +Minuend address (integer). +.ft R +.TP +addr2 +Subtrahend address (integer). + +.SH DESCRIPTION +.ft R +\fBMPI_Aint_add\fP produces a new MPI_Aint value that is equivalent to the sum of +the \fIbase\fP and \fIdisp\fP arguments, where \fIbase\fP represents +a base address returned by a call to \fBMPI_Get_address\fP and +\fIdisp\fP represents a signed integer displacement. The resulting +address is valid only at the process that generated \fIbase\fP, and it +must correspond to a location in the same object referenced by +\fIbase\fP, as described in MPI-3.1 \[char167] 4.1.12. The addition is +performed in a manner that results in the correct MPI_Aint +representation of the output address, as if the process that +originally produced \fIbase\fP had called: + +.nf + \fBMPI_Get_address\fP ((char *) \fIbase\fP + \fIdisp\fP, &\fIresult\fP); +.fi +.sp +.ft R +\fBMPI_Aint_diff\fP produces a new MPI_Aint value that is equivalent +to the difference between \fIaddr1\fP and \fIaddr2\fP arguments, where +\fIaddr1\fP and \fIaddr2\fP represent addresses returned by calls to +\fBMPI_Get_address\fP. The resulting address is valid only at the +process that generated \fIaddr1\fP and \fIaddr2\fP, and \fIaddr1\fP +and \fIaddr2\fP must correspond to locations in the same object in the +same process, as described in MPI-3.1 \[char167] 4.1.12. The difference is +calculated in a manner that results in the signed difference from +\fIaddr1\fP to \fIaddr2\fP, as if the process that originally produced +the addresses had called (char *) \fIaddr1\fP - (char *) \fIaddr2\fP +on the addresses initially passed to \fBMPI_Get_address\fP. + +.SH SEE ALSO +.ft R +.sp +MPI_Get_address diff --git a/ompi/mpi/man/man3/MPI_Aint_diff.3in b/ompi/mpi/man/man3/MPI_Aint_diff.3in new file mode 100644 index 00000000000..5fb829b5fb7 --- /dev/null +++ b/ompi/mpi/man/man3/MPI_Aint_diff.3in @@ -0,0 +1 @@ +.so man3/MPI_Aint_add.3 diff --git a/ompi/mpi/man/man3/MPI_Allgather.3in b/ompi/mpi/man/man3/MPI_Allgather.3in index df6a132820b..59d34c6e482 100644 --- a/ompi/mpi/man/man3/MPI_Allgather.3in +++ b/ompi/mpi/man/man3/MPI_Allgather.3in @@ -15,7 +15,7 @@ #include int MPI_Allgather(const void\fI *sendbuf\fP, int \fI sendcount\fP, MPI_Datatype\fI sendtype\fP, void\fI *recvbuf\fP, int\fI recvcount\fP, - MPI_Datatype\fI recvtype\fP, MPI_Comm\fI comm\fP) + MPI_Datatype\fI recvtype\fP, MPI_Comm\fI comm\fP) int MPI_Iallgather(const void\fI *sendbuf\fP, int \fI sendcount\fP, MPI_Datatype\fI sendtype\fP, void\fI *recvbuf\fP, int\fI recvcount\fP, @@ -25,11 +25,11 @@ int MPI_Iallgather(const void\fI *sendbuf\fP, int \fI sendcount\fP, .SH Fortran Syntax .nf INCLUDE 'mpif.h' -MPI_ALLGATHER(\fISENDBUF\fP,\fI SENDCOUNT\fP,\fI SENDTYPE\fP,\fI RECVBUF\fP,\fI RECVCOUNT\fP,\fI - RECVTYPE\fP,\fI COMM\fP,\fI IERROR\fP) +MPI_ALLGATHER(\fISENDBUF\fP,\fI SENDCOUNT\fP,\fI SENDTYPE\fP,\fI RECVBUF\fP,\fI RECVCOUNT\fP,\fI + RECVTYPE\fP,\fI COMM\fP,\fI IERROR\fP) \fISENDBUF\fP (*), \fIRECVBUF\fP (*) INTEGER \fISENDCOUNT\fP,\fI SENDTYPE\fP,\fI RECVCOUNT\fP,\fI RECVTYPE\fP,\fI COMM\fP, - INTEGER \fIIERROR\fP + INTEGER \fIIERROR\fP MPI_IALLGATHER(\fISENDBUF, SENDCOUNT, SENDTYPE, RECVBUF, RECVCOUNT, RECVTYPE, COMM, REQUEST, IERROR\fP) @@ -49,25 +49,25 @@ void MPI::Comm::Allgather(const void* \fIsendbuf\fP, int \fIsendcount\fP, const .SH INPUT PARAMETERS .ft R .TP 1i -sendbuf +sendbuf Starting address of send buffer (choice). .TP 1i -sendcount +sendcount Number of elements in send buffer (integer). .TP 1i -sendtype +sendtype Datatype of send buffer elements (handle). .TP 1i -recvbuf +recvbuf Starting address of recv buffer (choice). .TP 1i -recvcount +recvcount Number of elements received from any process (integer). .TP 1i -recvtype +recvtype Datatype of receive buffer elements (handle). .TP 1i -comm +comm Communicator (handle). .SH OUTPUT PARAMETERS @@ -81,19 +81,19 @@ request Request (handle, non-blocking only). .TP 1i IERROR -Fortran only: Error status (integer). +Fortran only: Error status (integer). .SH DESCRIPTION .ft R -MPI_Allgather is similar to MPI_Gather, except that all processes receive the result, instead of just the root. In other words, all processes contribute to the result, and all processes receive the result. +MPI_Allgather is similar to MPI_Gather, except that all processes receive the result, instead of just the root. In other words, all processes contribute to the result, and all processes receive the result. .sp The type signature associated with sendcount, sendtype at a process must be equal to the type signature associated with recvcount, recvtype at any other process. .sp -The outcome of a call to MPI_Allgather(\&...) is as if all processes executed n calls to +The outcome of a call to MPI_Allgather(\&...) is as if all processes executed n calls to .sp .nf MPI_Gather(sendbuf,sendcount,sendtype,recvbuf,recvcount, - recvtype,root,comm), + recvtype,root,comm), .fi .sp .fi @@ -102,39 +102,39 @@ for root = 0 , ..., n-1. The rules for correct usage of MPI_Allgather are easily \fBExample:\fR The all-gather version of Example 1 in MPI_Gather. Using MPI_Allgather, we will gather 100 ints from every process in the group to every process. .sp .nf -MPI_Comm comm; - int gsize,sendarray[100]; - int *rbuf; - \&... - MPI_Comm_size( comm, &gsize); - rbuf = (int *)malloc(gsize*100*sizeof(int)); - MPI_Allgather( sendarray, 100, MPI_INT, rbuf, 100, MPI_INT, comm); +MPI_Comm comm; + int gsize,sendarray[100]; + int *rbuf; + \&... + MPI_Comm_size( comm, &gsize); + rbuf = (int *)malloc(gsize*100*sizeof(int)); + MPI_Allgather( sendarray, 100, MPI_INT, rbuf, 100, MPI_INT, comm); .fi .sp -After the call, every process has the group-wide concatenation of the sets of data. +After the call, every process has the group-wide concatenation of the sets of data. .SH USE OF IN-PLACE OPTION When the communicator is an intracommunicator, you can perform an all-gather operation in-place (the output buffer is used as the input buffer). Use the variable MPI_IN_PLACE as the value of \fIsendbuf\fR. In this case, \fIsendcount\fR and \fIsendtype\fR are ignored. The input data of each process is assumed to be in the area where that process would receive its own contribution to the receive buffer. Specifically, the outcome of a call to MPI_Allgather that used the in-place option is identical to the case in which all processes executed \fIn\fR calls to .sp .nf - MPI_GATHER ( MPI_IN_PLACE, 0, MPI_DATATYPE_NULL, recvbuf, + MPI_GATHER ( MPI_IN_PLACE, 0, MPI_DATATYPE_NULL, recvbuf, recvcount, recvtype, root, comm ) for root =0, ... , n-1. -.fi +.fi .sp Note that MPI_IN_PLACE is a special kind of value; it has the same restrictions on its use as MPI_BOTTOM. .sp -Because the in-place option converts the receive buffer into a send-and-receive buffer, a Fortran binding that includes INTENT must mark these as INOUT, not OUT. +Because the in-place option converts the receive buffer into a send-and-receive buffer, a Fortran binding that includes INTENT must mark these as INOUT, not OUT. .sp .SH WHEN COMMUNICATOR IS AN INTER-COMMUNICATOR .sp -When the communicator is an inter-communicator, the gather operation occurs in two phases. The data is gathered from all the members of the first group and received by all the members of the second group. Then the data is gathered from all the members of the second group and received by all the members of the first. The operation, however, need not be symmetric. The number of items sent by the processes in first group need not be equal to the number of items sent by the the processes in the second group. You can move data in only one direction by giving \fIsendcount\fR a value of 0 for communication in the reverse direction. +When the communicator is an inter-communicator, the gather operation occurs in two phases. The data is gathered from all the members of the first group and received by all the members of the second group. Then the data is gathered from all the members of the second group and received by all the members of the first. The operation, however, need not be symmetric. The number of items sent by the processes in first group need not be equal to the number of items sent by the the processes in the second group. You can move data in only one direction by giving \fIsendcount\fR a value of 0 for communication in the reverse direction. .sp The first group defines the root process. The root process uses MPI_ROOT as the value of \fIroot\fR. All other processes in the first group use MPI_PROC_NULL as the value of \fIroot\fR. All processes in the second group use the rank of the root process in the first group as the value of \fIroot\fR. .sp When the communicator is an intra-communicator, these groups are the same, and the operation occurs in a single phase. -.sp +.sp .SH ERRORS @@ -142,7 +142,7 @@ Almost all MPI routines return an error value; C routines as the value of the fu .sp Before the error value is returned, the current MPI error handler is called. By default, this error handler aborts the MPI job, except for I/O function errors. The error handler -may be changed with MPI_Comm_set_errhandler; the predefined error handler MPI_ERRORS_RETURN may be used to cause error values to be returned. Note that MPI does not guarantee that an MPI program can continue past an error. +may be changed with MPI_Comm_set_errhandler; the predefined error handler MPI_ERRORS_RETURN may be used to cause error values to be returned. Note that MPI does not guarantee that an MPI program can continue past an error. .SH SEE ALSO .ft R diff --git a/ompi/mpi/man/man3/MPI_Allgatherv.3in b/ompi/mpi/man/man3/MPI_Allgatherv.3in index a315966453a..b075f58cc3d 100644 --- a/ompi/mpi/man/man3/MPI_Allgatherv.3in +++ b/ompi/mpi/man/man3/MPI_Allgatherv.3in @@ -1,7 +1,7 @@ .\" -*- nroff -*- .\" Copyright 2013 Los Alamos National Security, LLC. All rights reserved. .\" Copyright (c) 2010-2014 Cisco Systems, Inc. All rights reserved. -.\" Copyright 2007-2008 Sun Microsystems, Inc. +.\" Copyright 2007-2008 Sun Microsystems, Inc. .\" Copyright (c) 1996 Thinking Machines Corporation .\" $COPYRIGHT$ .TH MPI_Allgatherv 3 "#OMPI_DATE#" "#PACKAGE_VERSION#" "#PACKAGE_NAME#" @@ -42,7 +42,7 @@ MPI_IALLGATHERV(\fISENDBUF, SENDCOUNT, SENDTYPE, RECVBUF, .SH C++ Syntax .nf #include -void MPI::Comm::Allgatherv(const void* \fIsendbuf\fP, int \fIsendcount\fP, +void MPI::Comm::Allgatherv(const void* \fIsendbuf\fP, int \fIsendcount\fP, const MPI::Datatype& \fIsendtype\fP, void* \fIrecvbuf\fP, const int \fIrecvcounts\fP[], const int \fIdispls\fP[], const MPI::Datatype& \fIrecvtype\fP) const = 0 @@ -51,31 +51,31 @@ void MPI::Comm::Allgatherv(const void* \fIsendbuf\fP, int \fIsendcount\fP, .SH INPUT PARAMETERS .ft R .TP 1i -sendbuf +sendbuf Starting address of send buffer (choice). .TP 1i -sendcount +sendcount Number of elements in send buffer (integer). .TP 1i -sendtype +sendtype Datatype of send buffer elements (handle). .TP 1i -recvcount +recvcount Integer array (of length group size) containing the number of elements that are received from each process. .TP 1i -displs +displs Integer array (of length group size). Entry i specifies the displacement (relative to recvbuf) at which to place the incoming data from process i. .TP 1i -recvtype +recvtype Datatype of receive buffer elements (handle). .TP 1i -comm +comm Communicator (handle). .sp .SH OUTPUT PARAMETERS .ft R .TP 1i -recvbuf +recvbuf Address of receive buffer (choice). .TP 1i request @@ -83,52 +83,52 @@ Request (handle, non-blocking only). .ft R .TP 1i IERROR -Fortran only: Error status (integer). +Fortran only: Error status (integer). .SH DESCRIPTION .ft R -MPI_Allgatherv is similar to MPI_Allgather in that all processes gather data from all other processes, except that each process can send a different amount of data. The block of data sent from the jth process is received by every process and placed in the jth block of the buffer +MPI_Allgatherv is similar to MPI_Allgather in that all processes gather data from all other processes, except that each process can send a different amount of data. The block of data sent from the jth process is received by every process and placed in the jth block of the buffer .I recvbuf. .sp -The type signature associated with sendcount, sendtype, at process j must be equal to the type signature associated with recvcounts[j], recvtype at any other process. +The type signature associated with sendcount, sendtype, at process j must be equal to the type signature associated with recvcounts[j], recvtype at any other process. .sp -The outcome is as if all processes executed calls to +The outcome is as if all processes executed calls to .nf MPI_Gatherv(sendbuf,sendcount,sendtype,recvbuf,recvcount, displs,recvtype,root,comm) .fi .sp -for root = 0 , ..., n-1. The rules for correct usage of MPI_Allgatherv are easily found from the corresponding rules for MPI_Gatherv. +for root = 0 , ..., n-1. The rules for correct usage of MPI_Allgatherv are easily found from the corresponding rules for MPI_Gatherv. .SH USE OF IN-PLACE OPTION When the communicator is an intracommunicator, you can perform an all-gather operation in-place (the output buffer is used as the input buffer). Use the variable MPI_IN_PLACE as the value of \fIsendbuf\fR. In this case, \fIsendcount\fR and \fIsendtype\fR are ignored. The input data of each process is assumed to be in the area where that process would receive its own contribution to the receive buffer. Specifically, the outcome of a call to MPI_Allgather that used the in-place option is identical to the case in which all processes executed \fIn\fR calls to .sp .nf - MPI_GATHERV ( MPI_IN_PLACE, 0, MPI_DATATYPE_NULL, recvbuf, + MPI_GATHERV ( MPI_IN_PLACE, 0, MPI_DATATYPE_NULL, recvbuf, recvcounts, displs, recvtype, root, comm ) for root =0, ... , n-1. -.fi +.fi .sp Note that MPI_IN_PLACE is a special kind of value; it has the same restrictions on its use as MPI_BOTTOM. .sp -Because the in-place option converts the receive buffer into a send-and-receive buffer, a Fortran binding that includes INTENT must mark these as INOUT, not OUT. +Because the in-place option converts the receive buffer into a send-and-receive buffer, a Fortran binding that includes INTENT must mark these as INOUT, not OUT. .sp .SH WHEN COMMUNICATOR IS AN INTER-COMMUNICATOR .sp -When the communicator is an inter-communicator, the gather operation occurs in two phases. The data is gathered from all the members of the first group, concatenated, and received by all the members of the second group. Then the data is gathered from all the members of the second group, concatenated, and received by all the members of the first. The send buffer arguments in the one group must be consistent with the receive buffer arguments in the other group, and vice versa. The operation must exhibit symmetric, full-duplex behavior. +When the communicator is an inter-communicator, the gather operation occurs in two phases. The data is gathered from all the members of the first group, concatenated, and received by all the members of the second group. Then the data is gathered from all the members of the second group, concatenated, and received by all the members of the first. The send buffer arguments in the one group must be consistent with the receive buffer arguments in the other group, and vice versa. The operation must exhibit symmetric, full-duplex behavior. .sp The first group defines the root process. The root process uses MPI_ROOT as the value of \fIroot\fR. All other processes in the first group use MPI_PROC_NULL as the value of \fIroot\fR. All processes in the second group use the rank of the root process in the first group as the value of \fIroot\fR. .sp When the communicator is an intra-communicator, these groups are the same, and the operation occurs in a single phase. -.sp +.sp .SH ERRORS Almost all MPI routines return an error value; C routines as the value of the function and Fortran routines in the last argument. C++ functions do not return errors. If the default error handler is set to MPI::ERRORS_THROW_EXCEPTIONS, then on error the C++ exception mechanism will be used to throw an MPI::Exception object. .sp Before the error value is returned, the current MPI error handler is called. By default, this error handler aborts the MPI job, except for I/O function errors. The error handler -may be changed with MPI_Comm_set_errhandler; the predefined error handler MPI_ERRORS_RETURN may be used to cause error values to be returned. Note that MPI does not guarantee that an MPI program can continue past an error. +may be changed with MPI_Comm_set_errhandler; the predefined error handler MPI_ERRORS_RETURN may be used to cause error values to be returned. Note that MPI does not guarantee that an MPI program can continue past an error. .SH SEE ALSO .ft R diff --git a/ompi/mpi/man/man3/MPI_Alloc_mem.3in b/ompi/mpi/man/man3/MPI_Alloc_mem.3in index d7b6c7a4df9..a6a851ccf74 100644 --- a/ompi/mpi/man/man3/MPI_Alloc_mem.3in +++ b/ompi/mpi/man/man3/MPI_Alloc_mem.3in @@ -5,7 +5,7 @@ .\" $COPYRIGHT$ .TH MPI_Alloc_mem 3 "#OMPI_DATE#" "#PACKAGE_VERSION#" "#PACKAGE_NAME#" .SH NAME -\fBMPI_Alloc_mem \fP \- Allocates a specified memory segment. +\fBMPI_Alloc_mem \fP \- Allocates a specified memory segment. .SH SYNTAX .ft R @@ -18,9 +18,9 @@ int MPI_Alloc_mem(MPI_Aint \fIsize\fP, MPI_Info \fIinfo\fP, void *\fIbaseptr\fP) .SH Fortran Syntax (see FORTRAN NOTES) .nf INCLUDE 'mpif.h' -MPI_ALLOC_MEM(\fISIZE, INFO, BASEPTR, IERROR\fP) - INTEGER \fIINFO, IERROR\fP - INTEGER(KIND=MPI_ADDRESS_KIND) \fISIZE, BASEPTR\fP +MPI_ALLOC_MEM(\fISIZE, INFO, BASEPTR, IERROR\fP) + INTEGER \fIINFO, IERROR\fP + INTEGER(KIND=MPI_ADDRESS_KIND) \fISIZE, BASEPTR\fP .fi .SH C++ Syntax @@ -33,45 +33,45 @@ void* MPI::Alloc_mem(MPI::Aint \fIsize\fP, const MPI::Info& \fIinfo\fP) .ft R .TP 1i size -Size of memory segment in bytes (nonnegative integer). +Size of memory segment in bytes (nonnegative integer). .ft R .TP 1i info -Info argument (handle). +Info argument (handle). .SH OUTPUT PARAMETERS .ft R .TP 1i baseptr -Pointer to beginning of memory segment allocated. +Pointer to beginning of memory segment allocated. .TP 1i IERROR -Fortran only: Error status (integer). +Fortran only: Error status (integer). .SH DESCRIPTION .ft R MPI_Alloc_mem allocates \fIsize\fP bytes of memory. The starting address -of this memory is returned in the variable \fIbase\fP. +of this memory is returned in the variable \fIbase\fP. .sp .SH FORTRAN NOTES .ft R There is no portable FORTRAN 77 syntax for using MPI_Alloc_mem. There is no portable Fortran syntax for using pointers returned -from MPI_Alloc_mem. However, MPI_Alloc_mem can be used with Sun +from MPI_Alloc_mem. However, MPI_Alloc_mem can be used with Sun Fortran compilers. .sp -From FORTRAN 77, you can use the following non-standard +From FORTRAN 77, you can use the following non-standard declarations for the SIZE and BASEPTR arguments: .nf INCLUDE "mpif.h" INTEGER*MPI_ADDRESS_KIND SIZE, BASEPTR .fi .sp -From either FORTRAN 77 or Fortran 90, you can use "Cray pointers" -for the BASEPTR argument. Cray pointers are described further in -the Fortran User's Guide and are supported by many Fortran compilers. -For example, +From either FORTRAN 77 or Fortran 90, you can use "Cray pointers" +for the BASEPTR argument. Cray pointers are described further in +the Fortran User's Guide and are supported by many Fortran compilers. +For example, .sp .nf INCLUDE "mpif.h" @@ -93,7 +93,7 @@ Almost all MPI routines return an error value; C routines as the value of the fu .sp Before the error value is returned, the current MPI error handler is called. By default, this error handler aborts the MPI job, except for I/O function errors. The error handler -may be changed with MPI_Comm_set_errhandler; the predefined error handler MPI_ERRORS_RETURN may be used to cause error values to be returned. Note that MPI does not guarantee that an MPI program can continue past an error. +may be changed with MPI_Comm_set_errhandler; the predefined error handler MPI_ERRORS_RETURN may be used to cause error values to be returned. Note that MPI does not guarantee that an MPI program can continue past an error. .SH SEE ALSO .ft R diff --git a/ompi/mpi/man/man3/MPI_Allreduce.3in b/ompi/mpi/man/man3/MPI_Allreduce.3in index 4bb731d95d8..bad1c72be3d 100644 --- a/ompi/mpi/man/man3/MPI_Allreduce.3in +++ b/ompi/mpi/man/man3/MPI_Allreduce.3in @@ -36,8 +36,8 @@ MPI_IALLREDUCE(\fISENDBUF, RECVBUF, COUNT, DATATYPE, OP, COMM, REQUEST, IERROR\f .SH C++ Syntax .nf #include -void MPI::Comm::Allreduce(const void* \fIsendbuf\fP, void* \fIrecvbuf\fP, - int \fIcount\fP, const MPI::Datatype& \fIdatatype\fP, const +void MPI::Comm::Allreduce(const void* \fIsendbuf\fP, void* \fIrecvbuf\fP, + int \fIcount\fP, const MPI::Datatype& \fIdatatype\fP, const MPI::Op& \fIop\fP) const=0 .fi @@ -70,68 +70,68 @@ Request (handle, non-blocking only). .ft R .TP 1i IERROR -Fortran only: Error status (integer). +Fortran only: Error status (integer). .SH DESCRIPTION .ft R -Same as MPI_Reduce except that the result appears in the receive buffer of all the group members. +Same as MPI_Reduce except that the result appears in the receive buffer of all the group members. .sp \fBExample 1:\fR A routine that computes the product of a vector and an array that are distributed across a group of processes and returns the answer at all nodes (compare with Example 2, with MPI_Reduce, below). .sp .nf -SUBROUTINE PAR_BLAS2(m, n, a, b, c, comm) -REAL a(m), b(m,n) ! local slice of array -REAL c(n) ! result -REAL sum(n) -INTEGER n, comm, i, j, ierr - -! local sum -DO j= 1, n - sum(j) = 0.0 - DO i = 1, m - sum(j) = sum(j) + a(i)*b(i,j) - END DO -END DO - -! global sum -CALL MPI_ALLREDUCE(sum, c, n, MPI_REAL, MPI_SUM, comm, ierr) - -! return result at all nodes +SUBROUTINE PAR_BLAS2(m, n, a, b, c, comm) +REAL a(m), b(m,n) ! local slice of array +REAL c(n) ! result +REAL sum(n) +INTEGER n, comm, i, j, ierr + +! local sum +DO j= 1, n + sum(j) = 0.0 + DO i = 1, m + sum(j) = sum(j) + a(i)*b(i,j) + END DO +END DO + +! global sum +CALL MPI_ALLREDUCE(sum, c, n, MPI_REAL, MPI_SUM, comm, ierr) + +! return result at all nodes RETURN .fi .sp \fBExample 2:\fR A routine that computes the product of a vector and an array that are distributed across a group of processes and returns the answer at node zero. .sp .nf -SUBROUTINE PAR_BLAS2(m, n, a, b, c, comm) -REAL a(m), b(m,n) ! local slice of array -REAL c(n) ! result -REAL sum(n) -INTEGER n, comm, i, j, ierr - -! local sum -DO j= 1, n - sum(j) = 0.0 - DO i = 1, m - sum(j) = sum(j) + a(i)*b(i,j) - END DO -END DO - -! global sum -CALL MPI_REDUCE(sum, c, n, MPI_REAL, MPI_SUM, 0, comm, ierr) - -! return result at node zero (and garbage at the other nodes) +SUBROUTINE PAR_BLAS2(m, n, a, b, c, comm) +REAL a(m), b(m,n) ! local slice of array +REAL c(n) ! result +REAL sum(n) +INTEGER n, comm, i, j, ierr + +! local sum +DO j= 1, n + sum(j) = 0.0 + DO i = 1, m + sum(j) = sum(j) + a(i)*b(i,j) + END DO +END DO + +! global sum +CALL MPI_REDUCE(sum, c, n, MPI_REAL, MPI_SUM, 0, comm, ierr) + +! return result at node zero (and garbage at the other nodes) RETURN .fi .SH USE OF IN-PLACE OPTION -When the communicator is an intracommunicator, you can perform an all-reduce operation in-place (the output buffer is used as the input buffer). Use the variable MPI_IN_PLACE as the value of \fIsendbuf\fR at all processes. +When the communicator is an intracommunicator, you can perform an all-reduce operation in-place (the output buffer is used as the input buffer). Use the variable MPI_IN_PLACE as the value of \fIsendbuf\fR at all processes. .sp Note that MPI_IN_PLACE is a special kind of value; it has the same restrictions on its use as MPI_BOTTOM. .sp -Because the in-place option converts the receive buffer into a send-and-receive buffer, a Fortran binding that includes INTENT must mark these as INOUT, not OUT. +Because the in-place option converts the receive buffer into a send-and-receive buffer, a Fortran binding that includes INTENT must mark these as INOUT, not OUT. .sp .SH WHEN COMMUNICATOR IS AN INTER-COMMUNICATOR -When the communicator is an inter-communicator, the reduce operation occurs in two phases. The data is reduced from all the members of the first group and received by all the members of the second group. Then the data is reduced from all the members of the second group and received by all the members of the first. The operation exhibits a symmetric, full-duplex behavior. +When the communicator is an inter-communicator, the reduce operation occurs in two phases. The data is reduced from all the members of the first group and received by all the members of the second group. Then the data is reduced from all the members of the second group and received by all the members of the first. The operation exhibits a symmetric, full-duplex behavior. .sp When the communicator is an intra-communicator, these groups are the same, and the operation occurs in a single phase. .SH NOTES ON COLLECTIVE OPERATIONS @@ -139,11 +139,11 @@ When the communicator is an intra-communicator, these groups are the same, and t The reduction functions ( .I MPI_Op ) do not return an error value. As a result, -if the functions detect an error, all they can do is either call +if the functions detect an error, all they can do is either call .I MPI_Abort or silently skip the problem. Thus, if you change the error handler from .I MPI_ERRORS_ARE_FATAL -to something else, for example, +to something else, for example, .I MPI_ERRORS_RETURN , then no error may be indicated. @@ -153,6 +153,6 @@ Almost all MPI routines return an error value; C routines as the value of the fu .sp Before the error value is returned, the current MPI error handler is called. By default, this error handler aborts the MPI job, except for I/O function errors. The error handler -may be changed with MPI_Comm_set_errhandler; the predefined error handler MPI_ERRORS_RETURN may be used to cause error values to be returned. Note that MPI does not guarantee that an MPI program can continue past an error. +may be changed with MPI_Comm_set_errhandler; the predefined error handler MPI_ERRORS_RETURN may be used to cause error values to be returned. Note that MPI does not guarantee that an MPI program can continue past an error. diff --git a/ompi/mpi/man/man3/MPI_Alltoall.3in b/ompi/mpi/man/man3/MPI_Alltoall.3in index 8103b622561..f5be926a6d9 100644 --- a/ompi/mpi/man/man3/MPI_Alltoall.3in +++ b/ompi/mpi/man/man3/MPI_Alltoall.3in @@ -46,7 +46,7 @@ MPI_IALLTOALL(\fISENDBUF, SENDCOUNT, SENDTYPE, RECVBUF, RECVCOUNT, .nf #include void MPI::Comm::Alltoall(const void* \fIsendbuf\fP, int \fIsendcount\fP, - const MPI::Datatype& \fIsendtype\fP, void* \fIrecvbuf\fP, + const MPI::Datatype& \fIsendtype\fP, void* \fIrecvbuf\fP, int \fIrecvcount\fP, const MPI::Datatype& \fIrecvtype\fP) .fi @@ -82,7 +82,7 @@ Request (handle, non-blocking only). .ft R .TP 1.2i IERROR -Fortran only: Error status (integer). +Fortran only: Error status (integer). .SH DESCRIPTION .ft R @@ -108,7 +108,7 @@ amount of data received, pairwise, between every pair of processes. WHEN COMMUNICATOR IS AN INTER-COMMUNICATOR .sp -When the communicator is an inter-communicator, the gather operation occurs in two phases. The data is gathered from all the members of the first group and received by all the members of the second group. Then the data is gathered from all the members of the second group and received by all the members of the first. The operation exhibits a symmetric, full-duplex behavior. +When the communicator is an inter-communicator, the gather operation occurs in two phases. The data is gathered from all the members of the first group and received by all the members of the second group. Then the data is gathered from all the members of the second group and received by all the members of the first. The operation exhibits a symmetric, full-duplex behavior. .sp The first group defines the root process. The root process uses MPI_ROOT as the value of \fIroot\fR. All other processes in the first group use MPI_PROC_NULL as the value of \fIroot\fR. All processes in the second group use the rank of the root process in the first group as the value of \fIroot\fR. .sp @@ -117,7 +117,7 @@ When the communicator is an intra-communicator, these groups are the same, and t .SH USE OF IN-PLACE OPTION When the communicator is an intracommunicator, you can perform an all-to-all operation in-place (the output buffer is used as the input buffer). Use the variable MPI_IN_PLACE as the value of \fIsendbuf\fR. In this case, \fIsendcount\fR and \fIsendtype\fR are ignored. The input data of each process is assumed to be in the area where that process would receive its own contribution to the receive buffer. -.sp +.sp .SH NOTES .sp All arguments on all processes are significant. The \fIcomm\fP argument, @@ -143,7 +143,7 @@ called. By default, this error handler aborts the MPI job, except for I/O function errors. The error handler may be changed with MPI_Comm_set_errhandler; the predefined error handler MPI_ERRORS_RETURN may be used to cause error values to be returned. Note that MPI does not -guarantee that an MPI program can continue past an error. +guarantee that an MPI program can continue past an error. .SH SEE ALSO .ft R diff --git a/ompi/mpi/man/man3/MPI_Alltoallv.3in b/ompi/mpi/man/man3/MPI_Alltoallv.3in index 1d4822a9f60..3ae6edbab95 100644 --- a/ompi/mpi/man/man3/MPI_Alltoallv.3in +++ b/ompi/mpi/man/man3/MPI_Alltoallv.3in @@ -39,7 +39,7 @@ MPI_ALLTOALLV(\fISENDBUF, SENDCOUNTS, SDISPLS, SENDTYPE, INTEGER \fICOMM, IERROR\fP MPI_IALLTOALLV(\fISENDBUF, SENDCOUNTS, SDISPLS, SENDTYPE, - RECVBUF, RECVCOUNTS, RDISPLS, RECVTYPE, REQUEST, COMM, IERROR\fP) + RECVBUF, RECVCOUNTS, RDISPLS, RECVTYPE, COMM, REQUEST, IERROR\fP) \fISENDBUF(*), RECVBUF(*)\fP INTEGER \fISENDCOUNTS(*), SDISPLS(*), SENDTYPE\fP @@ -126,7 +126,7 @@ communicator \fIcomm\fP) independent point-to-point communications .sp Process j sends the k-th block of its local \fIsendbuf\fP to process k, which places the data in the j-th block of its local -\fIrecvbuf\fP. +\fIrecvbuf\fP. .sp When a pair of processes exchanges data, each may pass different element count and datatype arguments so long as the sender specifies @@ -140,12 +140,12 @@ different amounts of data to different processes in the communicator. .sp WHEN COMMUNICATOR IS AN INTER-COMMUNICATOR .sp -When the communicator is an inter-communicator, the gather operation occurs in two phases. The data is gathered from all the members of the first group and received by all the members of the second group. Then the data is gathered from all the members of the second group and received by all the members of the first. The operation exhibits a symmetric, full-duplex behavior. +When the communicator is an inter-communicator, the gather operation occurs in two phases. The data is gathered from all the members of the first group and received by all the members of the second group. Then the data is gathered from all the members of the second group and received by all the members of the first. The operation exhibits a symmetric, full-duplex behavior. .sp The first group defines the root process. The root process uses MPI_ROOT as the value of \fIroot\fR. All other processes in the first group use MPI_PROC_NULL as the value of \fIroot\fR. All processes in the second group use the rank of the root process in the first group as the value of \fIroot\fR. .sp When the communicator is an intra-communicator, these groups are the same, and the operation occurs in a single phase. -.sp +.sp .SH USE OF IN-PLACE OPTION When the communicator is an intracommunicator, you can perform an all-to-all operation in-place (the output buffer is used as the input buffer). Use the variable MPI_IN_PLACE as the value of \fIsendbuf\fR. In this case, \fIsendcounts\fR, \fIsdispls\fP, and \fIsendtype\fR are ignored. The input data of each process is assumed to be in the area where that process would receive its own contribution to the receive buffer. @@ -175,7 +175,7 @@ called. By default, this error handler aborts the MPI job, except for I/O function errors. The error handler may be changed with MPI_Comm_set_errhandler; the predefined error handler MPI_ERRORS_RETURN may be used to cause error values to be returned. Note that MPI does not -guarantee that an MPI program can continue past an error. +guarantee that an MPI program can continue past an error. .SH SEE ALSO .ft R diff --git a/ompi/mpi/man/man3/MPI_Alltoallw.3in b/ompi/mpi/man/man3/MPI_Alltoallw.3in index cf95e513bc6..fbc0c384858 100644 --- a/ompi/mpi/man/man3/MPI_Alltoallw.3in +++ b/ompi/mpi/man/man3/MPI_Alltoallw.3in @@ -141,12 +141,12 @@ communicator. WHEN COMMUNICATOR IS AN INTER-COMMUNICATOR .sp -When the communicator is an inter-communicator, the gather operation occurs in two phases. The data is gathered from all the members of the first group and received by all the members of the second group. Then the data is gathered from all the members of the second group and received by all the members of the first. The operation exhibits a symmetric, full-duplex behavior. +When the communicator is an inter-communicator, the gather operation occurs in two phases. The data is gathered from all the members of the first group and received by all the members of the second group. Then the data is gathered from all the members of the second group and received by all the members of the first. The operation exhibits a symmetric, full-duplex behavior. .sp The first group defines the root process. The root process uses MPI_ROOT as the value of \fIroot\fR. All other processes in the first group use MPI_PROC_NULL as the value of \fIroot\fR. All processes in the second group use the rank of the root process in the first group as the value of \fIroot\fR. .sp When the communicator is an intra-communicator, these groups are the same, and the operation occurs in a single phase. -.sp +.sp .SH USE OF IN-PLACE OPTION When the communicator is an intracommunicator, you can perform an all-to-all operation in-place (the output buffer is used as the input buffer). Use the variable MPI_IN_PLACE as the value of \fIsendbuf\fR. In this case, \fIsendcounts\fR, \fIsdispls\fP, and \fIsendtypes\fR are ignored. The input data of each process is assumed to be in the area where that process would receive its own contribution to the receive buffer. @@ -176,7 +176,7 @@ called. By default, this error handler aborts the MPI job, except for I/O function errors. The error handler may be changed with MPI_Comm_set_errhandler; the predefined error handler MPI_ERRORS_RETURN may be used to cause error values to be returned. Note that MPI does not -guarantee that an MPI program can continue past an error. +guarantee that an MPI program can continue past an error. .SH SEE ALSO .ft R diff --git a/ompi/mpi/man/man3/MPI_Attr_delete.3in b/ompi/mpi/man/man3/MPI_Attr_delete.3in index 058d3163985..30ee14ad7ac 100644 --- a/ompi/mpi/man/man3/MPI_Attr_delete.3in +++ b/ompi/mpi/man/man3/MPI_Attr_delete.3in @@ -18,7 +18,7 @@ int MPI_Attr_delete(MPI_Comm\fI comm\fP, int\fI keyval\fP) .SH Fortran Syntax .nf INCLUDE 'mpif.h' -MPI_ATTR_DELETE(\fICOMM\fP,\fI KEYVAL\fP, \fIIERROR\fP) +MPI_ATTR_DELETE(\fICOMM\fP,\fI KEYVAL\fP, \fIIERROR\fP) INTEGER \fICOMM\fP,\fI KEYVAL\fP,\fI IERROR\fP @@ -36,16 +36,16 @@ The key value of the deleted attribute (integer). .ft R .TP 1i IERROR -Fortran only: Error status (integer). +Fortran only: Error status (integer). .SH DESCRIPTION Note that use of this routine is \fIdeprecated\fP as of MPI-2, and was \fIdeleted\fP in MPI-3. Please use MPI_Comm_delete_attr. This function does not have a C++ or mpi_f08 binding. .sp -Delete attribute from cache by key. This function invokes the attribute delete function delete_fn specified when the keyval was created. The call will fail if the delete_fn function returns an error code other than MPI_SUCCESS. +Delete attribute from cache by key. This function invokes the attribute delete function delete_fn specified when the keyval was created. The call will fail if the delete_fn function returns an error code other than MPI_SUCCESS. -Whenever a communicator is replicated using the function MPI_Comm_dup, all callback copy functions for attributes that are currently set are invoked (in arbitrary order). Whenever a communicator is deleted using the function MPI_Comm_free, all callback delete functions for attributes that are currently set are invoked. +Whenever a communicator is replicated using the function MPI_Comm_dup, all callback copy functions for attributes that are currently set are invoked (in arbitrary order). Whenever a communicator is deleted using the function MPI_Comm_free, all callback delete functions for attributes that are currently set are invoked. .SH NOTES @@ -61,7 +61,7 @@ Almost all MPI routines return an error value; C routines as the value of the fu .sp Before the error value is returned, the current MPI error handler is called. By default, this error handler aborts the MPI job, except for I/O function errors. The error handler -may be changed with MPI_Comm_set_errhandler; the predefined error handler MPI_ERRORS_RETURN may be used to cause error values to be returned. Note that MPI does not guarantee that an MPI program can continue past an error. +may be changed with MPI_Comm_set_errhandler; the predefined error handler MPI_ERRORS_RETURN may be used to cause error values to be returned. Note that MPI does not guarantee that an MPI program can continue past an error. .SH SEE ALSO MPI_Comm_delete_attr diff --git a/ompi/mpi/man/man3/MPI_Attr_get.3in b/ompi/mpi/man/man3/MPI_Attr_get.3in index 3edb4148b92..f355acad902 100644 --- a/ompi/mpi/man/man3/MPI_Attr_get.3in +++ b/ompi/mpi/man/man3/MPI_Attr_get.3in @@ -45,7 +45,7 @@ True if an attribute value was extracted; false if no attribute is associated wi .ft R .TP 1i IERROR -Fortran only: Error status (integer). +Fortran only: Error status (integer). .SH DESCRIPTION .ft R @@ -54,14 +54,14 @@ was \fIdeleted\fP in MPI-3. Please use MPI_Comm_create_attr. This function does not have a C++ or mpi_f08 binding. .sp Retrieves attribute value by key. The call is erroneous if there is no key -with value keyval. On the other hand, the call is correct if the key value exists, but no attribute is attached on comm for that key; in such case, the call returns flag = false. In particular MPI_KEYVAL_INVALID is an erroneous key value. +with value keyval. On the other hand, the call is correct if the key value exists, but no attribute is attached on comm for that key; in such case, the call returns flag = false. In particular MPI_KEYVAL_INVALID is an erroneous key value. .SH ERRORS Almost all MPI routines return an error value; C routines as the value of the function and Fortran routines in the last argument. C++ functions do not return errors. If the default error handler is set to MPI::ERRORS_THROW_EXCEPTIONS, then on error the C++ exception mechanism will be used to throw an MPI::Exception object. .sp Before the error value is returned, the current MPI error handler is called. By default, this error handler aborts the MPI job, except for I/O function errors. The error handler -may be changed with MPI_Comm_set_errhandler; the predefined error handler MPI_ERRORS_RETURN may be used to cause error values to be returned. Note that MPI does not guarantee that an MPI program can continue past an error. +may be changed with MPI_Comm_set_errhandler; the predefined error handler MPI_ERRORS_RETURN may be used to cause error values to be returned. Note that MPI does not guarantee that an MPI program can continue past an error. .SH SEE ALSO diff --git a/ompi/mpi/man/man3/MPI_Attr_put.3in b/ompi/mpi/man/man3/MPI_Attr_put.3in index e78ba8df947..32804686c1f 100644 --- a/ompi/mpi/man/man3/MPI_Attr_put.3in +++ b/ompi/mpi/man/man3/MPI_Attr_put.3in @@ -39,7 +39,7 @@ Attribute value. .ft R .TP 1i IERROR -Fortran only: Error status (integer). +Fortran only: Error status (integer). .SH DESCRIPTION .ft R @@ -63,7 +63,7 @@ Almost all MPI routines return an error value; C routines as the value of the fu .sp Before the error value is returned, the current MPI error handler is called. By default, this error handler aborts the MPI job, except for I/O function errors. The error handler -may be changed with MPI_Comm_set_errhandler; the predefined error handler MPI_ERRORS_RETURN may be used to cause error values to be returned. Note that MPI does not guarantee that an MPI program can continue past an error. +may be changed with MPI_Comm_set_errhandler; the predefined error handler MPI_ERRORS_RETURN may be used to cause error values to be returned. Note that MPI does not guarantee that an MPI program can continue past an error. .SH SEE ALSO .ft R diff --git a/ompi/mpi/man/man3/MPI_Barrier.3in b/ompi/mpi/man/man3/MPI_Barrier.3in index 3ec2452404a..e87e13e11ba 100644 --- a/ompi/mpi/man/man3/MPI_Barrier.3in +++ b/ompi/mpi/man/man3/MPI_Barrier.3in @@ -14,7 +14,7 @@ #include int MPI_Barrier(MPI_Comm \fIcomm\fP) -int MPI_Ibarrier(MPI_Comm \fIcomm\fP, MPI_Request \fIrequest\fP) +int MPI_Ibarrier(MPI_Comm \fIcomm\fP, MPI_Request \fI*request\fP) .fi .SH Fortran Syntax @@ -29,8 +29,8 @@ MPI_IBARRIER(\fICOMM\fP, \fIREQUEST\fP, \fIIERROR\fP) .fi .SH C++ Syntax .nf -import mpi.*; -void MPI.COMM_WORLD.Barrier() +#include +void MPI::Comm::Barrier() const = 0 .fi .SH INPUT PARAMETER @@ -39,14 +39,14 @@ void MPI.COMM_WORLD.Barrier() comm Communicator (handle). -.SH OUTPUT PARAMETER +.SH OUTPUT PARAMETERS .ft R .TP 1i request Request (handle, non-blocking only). .TP 1i IERROR -Fortran only: Error status (integer). +Fortran only: Error status (integer). .SH DESCRIPTION .ft R @@ -55,13 +55,13 @@ barrier. .SH WHEN COMMUNICATOR IS AN INTER-COMMUNICATOR .sp -When the communicator is an inter-communicator, the barrier operation is performed across all processes in both groups. All processes in the first group may exit the barrier when all processes in the second group have entered the barrier. +When the communicator is an inter-communicator, the barrier operation is performed across all processes in both groups. All processes in the first group may exit the barrier when all processes in the second group have entered the barrier. .SH ERRORS Almost all MPI routines return an error value; C routines as the value of the function and Fortran routines in the last argument. C++ functions do not return errors. If the default error handler is set to MPI::ERRORS_THROW_EXCEPTIONS, then on error the C++ exception mechanism will be used to throw an MPI::Exception object. .sp Before the error value is returned, the current MPI error handler is -called. By default, this error handler aborts the MPI job, except for I/O function errors. The error handler may be changed with MPI_Comm_set_errhandler; the predefined error handler MPI_ERRORS_RETURN may be used to cause error values to be returned. Note that MPI does not guarantee that an MPI program can continue past an error. +called. By default, this error handler aborts the MPI job, except for I/O function errors. The error handler may be changed with MPI_Comm_set_errhandler; the predefined error handler MPI_ERRORS_RETURN may be used to cause error values to be returned. Note that MPI does not guarantee that an MPI program can continue past an error. .SH SEE ALSO .ft R diff --git a/ompi/mpi/man/man3/MPI_Bcast.3in b/ompi/mpi/man/man3/MPI_Bcast.3in index 8c033f5c712..c3608a43db3 100644 --- a/ompi/mpi/man/man3/MPI_Bcast.3in +++ b/ompi/mpi/man/man3/MPI_Bcast.3in @@ -24,7 +24,7 @@ int MPI_Ibcast(void \fI*buffer\fP, int\fI count\fP, MPI_Datatype\fI datatype\fP, INCLUDE 'mpif.h' MPI_BCAST(\fIBUFFER\fP,\fI COUNT\fP, \fIDATATYPE\fP,\fI ROOT\fP,\fI COMM\fP,\fI IERROR\fP) \fIBUFFER\fP(*) - INTEGER \fICOUNT\fP,\fI DATATYPE\fP,\fI ROOT\fP,\fI COMM\fP,\fI IERROR\fP + INTEGER \fICOUNT\fP,\fI DATATYPE\fP,\fI ROOT\fP,\fI COMM\fP,\fI IERROR\fP MPI_IBCAST(\fIBUFFER\fP,\fI COUNT\fP, \fIDATATYPE\fP,\fI ROOT\fP,\fI COMM\fP, \fIREQUEST\fP,\fI IERROR\fP) \fIBUFFER\fP(*) @@ -63,21 +63,21 @@ request Request (handle, non-blocking only). .TP 1i IERROR -Fortran only: Error status (integer). +Fortran only: Error status (integer). .SH DESCRIPTION .ft R -MPI_Bcast broadcasts a message from the process with rank root to all processes of the group, itself included. It is called by all members of group using the same arguments for comm, root. On return, the contents of root's communication buffer has been copied to all processes. +MPI_Bcast broadcasts a message from the process with rank root to all processes of the group, itself included. It is called by all members of group using the same arguments for comm, root. On return, the contents of root's communication buffer has been copied to all processes. .sp -General, derived datatypes are allowed for datatype. The type signature of count, datatype on any process must be equal to the type signature of count, datatype at the root. This implies that the amount of data sent must be equal to the amount received, pairwise between each process and the root. MPI_Bcast and all other data-movement collective routines make this restriction. Distinct type maps between sender and receiver are still allowed. +General, derived datatypes are allowed for datatype. The type signature of count, datatype on any process must be equal to the type signature of count, datatype at the root. This implies that the amount of data sent must be equal to the amount received, pairwise between each process and the root. MPI_Bcast and all other data-movement collective routines make this restriction. Distinct type maps between sender and receiver are still allowed. .sp \fBExample:\fR Broadcast 100 ints from process 0 to every process in the group. .nf - MPI_Comm comm; - int array[100]; - int root=0; - \&... - MPI_Bcast( array, 100, MPI_INT, root, comm); + MPI_Comm comm; + int array[100]; + int root=0; + \&... + MPI_Bcast( array, 100, MPI_INT, root, comm); .fi .sp As in many of our sample code fragments, we assume that some of the variables (such as comm in the example above) have been assigned appropriate values. @@ -85,7 +85,7 @@ As in many of our sample code fragments, we assume that some of the variables (s .SH WHEN COMMUNICATOR IS AN INTER-COMMUNICATOR .sp When the communicator is an inter-communicator, the root process in the first group broadcasts data to all the processes in the second group. The first group defines the root process. That process uses MPI_ROOT as the value of its \fIroot\fR argument. The remaining processes use MPI_PROC_NULL as the value of their \fIroot\fR argument. All processes in the second group use the rank of that root process in the first group as the value of their \fIroot\fR argument. The receive buffer arguments of the processes in the second group must be consistent with the send buffer argument of the root process in the first group. -.sp +.sp .SH NOTES This function does not support the in-place option. .sp @@ -96,5 +96,5 @@ This function does not support the in-place option. Almost all MPI routines return an error value; C routines as the value of the function and Fortran routines in the last argument. C++ functions do not return errors. If the default error handler is set to MPI::ERRORS_THROW_EXCEPTIONS, then on error the C++ exception mechanism will be used to throw an MPI::Exception object. .sp Before the error value is returned, the current MPI error handler is -called. By default, this error handler aborts the MPI job, except for I/O function errors. The error handler may be changed with MPI_Comm_set_errhandler; the predefined error handler MPI_ERRORS_RETURN may be used to cause error values to be returned. Note that MPI does not guarantee that an MPI program can continue past an error. +called. By default, this error handler aborts the MPI job, except for I/O function errors. The error handler may be changed with MPI_Comm_set_errhandler; the predefined error handler MPI_ERRORS_RETURN may be used to cause error values to be returned. Note that MPI does not guarantee that an MPI program can continue past an error. diff --git a/ompi/mpi/man/man3/MPI_Bsend.3in b/ompi/mpi/man/man3/MPI_Bsend.3in index 9e139f19f39..cd258181bb1 100644 --- a/ompi/mpi/man/man3/MPI_Bsend.3in +++ b/ompi/mpi/man/man3/MPI_Bsend.3in @@ -28,7 +28,7 @@ MPI_BSEND(\fIBUF\fP,\fI COUNT\fP,\fIDATATYPE\fP,\fI DEST\fP,\fI TAG\fP,\fI COMM\ .SH C++ Syntax .nf #include -void Comm::Bsend(const void* \fIbuf\fP, int \fIcount\fP, const +void Comm::Bsend(const void* \fIbuf\fP, int \fIcount\fP, const Datatype& \fIdatatype\fP, int \fIdest\fP, int \fItag\fP) const .fi @@ -57,7 +57,7 @@ Communicator (handle). .ft R .TP 1i IERROR -Fortran only: Error status (integer). +Fortran only: Error status (integer). .SH DESCRIPTION .ft R @@ -81,7 +81,7 @@ this code does not allocate enough buffer space: .fi because only enough buffer space is provided for a single send, and the -loop may start a second +loop may start a second .I MPI_Bsend before the first is done making use of the buffer. @@ -90,7 +90,7 @@ In C, you can force the messages to be delivered by MPI_Buffer_detach( &b, &n ); MPI_Buffer_attach( b, n ); -(The +(The .I MPI_Buffer_detach will not complete until all buffered messages are delivered.) @@ -100,7 +100,7 @@ delivered.) Almost all MPI routines return an error value; C routines as the value of the function and Fortran routines in the last argument. C++ functions do not return errors. If the default error handler is set to MPI::ERRORS_THROW_EXCEPTIONS, then on error the C++ exception mechanism will be used to throw an MPI::Exception object. .sp Before the error value is returned, the current MPI error handler is -called. By default, this error handler aborts the MPI job, except for I/O function errors. The error handler may be changed with MPI_Comm_set_errhandler; the predefined error handler MPI_ERRORS_RETURN may be used to cause error values to be returned. Note that MPI does not guarantee that an MPI program can continue past an error. +called. By default, this error handler aborts the MPI job, except for I/O function errors. The error handler may be changed with MPI_Comm_set_errhandler; the predefined error handler MPI_ERRORS_RETURN may be used to cause error values to be returned. Note that MPI does not guarantee that an MPI program can continue past an error. .SH SEE ALSO .ft R diff --git a/ompi/mpi/man/man3/MPI_Bsend_init.3in b/ompi/mpi/man/man3/MPI_Bsend_init.3in index c35e9ebd942..0f4c9f19c72 100644 --- a/ompi/mpi/man/man3/MPI_Bsend_init.3in +++ b/ompi/mpi/man/man3/MPI_Bsend_init.3in @@ -24,14 +24,14 @@ MPI_BSEND_INIT(\fIBUF\fP,\fI COUNT\fP, \fIDATATYPE\fP,\fI DEST\fP,\fI TAG\fP,\fI \fIIERROR\fP) \fIBUF\fP(\fI*\fP) INTEGER \fICOUNT\fP,\fI DATATYPE\fP, \fIDEST\fP,\fI TAG\fP, - INTEGER \fICOMM\fP,\fI REQUEST\fP,\fI IERROR + INTEGER \fICOMM\fP,\fI REQUEST\fP,\fI IERROR .fi .SH C++ Syntax .nf -import mpi.*; -Prequest MPI.COMM_WORLD.Bsend_init(const void* \fIbuf\fP, int \fIcount\fP, const - Datatype& \fIdatatype\fP, int \fIdest\fP, int \fItag\fP) const +#include +Prequest Comm::Bsend_init(const void* \fIbuf\fP, int \fIcount\fP, const + Datatype& \fIdatatype\fP, int \fIdest\fP, int \fItag\fP) const .fi .SH INPUT PARAMETERS @@ -63,19 +63,19 @@ Communication request (handle). .ft R .TP 1i IERROR -Fortran only: Error status (integer). +Fortran only: Error status (integer). .SH DESCRIPTION .ft R -Creates a persistent communication request for a buffered mode send, and binds to it all the arguments of a send operation. +Creates a persistent communication request for a buffered mode send, and binds to it all the arguments of a send operation. .sp -A communication (send or receive) that uses a persistent request is initiated by the function MPI_Start. +A communication (send or receive) that uses a persistent request is initiated by the function MPI_Start. .SH ERRORS Almost all MPI routines return an error value; C routines as the value of the function and Fortran routines in the last argument. C++ functions do not return errors. If the default error handler is set to MPI::ERRORS_THROW_EXCEPTIONS, then on error the C++ exception mechanism will be used to throw an MPI::Exception object. .sp Before the error value is returned, the current MPI error handler is -called. By default, this error handler aborts the MPI job, except for I/O function errors. The error handler may be changed with MPI_Comm_set_errhandler; the predefined error handler MPI_ERRORS_RETURN may be used to cause error values to be returned. Note that MPI does not guarantee that an MPI program can continue past an error. +called. By default, this error handler aborts the MPI job, except for I/O function errors. The error handler may be changed with MPI_Comm_set_errhandler; the predefined error handler MPI_ERRORS_RETURN may be used to cause error values to be returned. Note that MPI does not guarantee that an MPI program can continue past an error. .SH SEE ALSO .ft R diff --git a/ompi/mpi/man/man3/MPI_Buffer_attach.3in b/ompi/mpi/man/man3/MPI_Buffer_attach.3in index 6d662ba7547..5adfa02d262 100644 --- a/ompi/mpi/man/man3/MPI_Buffer_attach.3in +++ b/ompi/mpi/man/man3/MPI_Buffer_attach.3in @@ -42,11 +42,11 @@ Buffer size, in bytes (integer). .ft R .TP 1i IERROR -Fortran only: Error status (integer). +Fortran only: Error status (integer). .SH DESCRIPTION .ft R -Provides to MPI a buffer in the user's memory to be used for buffering outgoing messages. The buffer is used only by messages sent in buffered mode. Only one buffer can be attached to a process at a time. +Provides to MPI a buffer in the user's memory to be used for buffering outgoing messages. The buffer is used only by messages sent in buffered mode. Only one buffer can be attached to a process at a time. .SH NOTES .ft R @@ -73,7 +73,7 @@ MPI_BSEND_OVERHEAD gives the maximum amount of buffer space that may be used by Almost all MPI routines return an error value; C routines as the value of the function and Fortran routines in the last argument. C++ functions do not return errors. If the default error handler is set to MPI::ERRORS_THROW_EXCEPTIONS, then on error the C++ exception mechanism will be used to throw an MPI::Exception object. .sp Before the error value is returned, the current MPI error handler is -called. By default, this error handler aborts the MPI job, except for I/O function errors. The error handler may be changed with MPI_Comm_set_errhandler; the predefined error handler MPI_ERRORS_RETURN may be used to cause error values to be returned. Note that MPI does not guarantee that an MPI program can continue past an error. +called. By default, this error handler aborts the MPI job, except for I/O function errors. The error handler may be changed with MPI_Comm_set_errhandler; the predefined error handler MPI_ERRORS_RETURN may be used to cause error values to be returned. Note that MPI does not guarantee that an MPI program can continue past an error. .SH SEE ALSO .ft R diff --git a/ompi/mpi/man/man3/MPI_Buffer_detach.3in b/ompi/mpi/man/man3/MPI_Buffer_detach.3in index 4b1073b46be..98e8acb142a 100644 --- a/ompi/mpi/man/man3/MPI_Buffer_detach.3in +++ b/ompi/mpi/man/man3/MPI_Buffer_detach.3in @@ -40,24 +40,24 @@ Buffer size, in bytes (integer). .ft R .TP 1i IERROR -Fortran only: Error status (integer). +Fortran only: Error status (integer). .SH DESCRIPTION .ft R -Detach the buffer currently associated with MPI. The call returns the address and the size of the detached buffer. This operation will block until all messages currently in the buffer have been transmitted. Upon return of this function, the user may reuse or deallocate the space taken by the buffer. +Detach the buffer currently associated with MPI. The call returns the address and the size of the detached buffer. This operation will block until all messages currently in the buffer have been transmitted. Upon return of this function, the user may reuse or deallocate the space taken by the buffer. .sp \fBExample:\fP Calls to attach and detach buffers. .sp .nf - #define BUFFSIZE 10000 - int size - char *buff; - MPI_Buffer_attach( malloc(BUFFSIZE), BUFFSIZE); - /* a buffer of 10000 bytes can now be used by MPI_Bsend */ - MPI_Buffer_detach( &buff, &size); - /* Buffer size reduced to zero */ - MPI_Buffer_attach( buff, size); - /* Buffer of 10000 bytes available again */ + #define BUFFSIZE 10000 + int size + char *buff; + MPI_Buffer_attach( malloc(BUFFSIZE), BUFFSIZE); + /* a buffer of 10000 bytes can now be used by MPI_Bsend */ + MPI_Buffer_detach( &buff, &size); + /* Buffer size reduced to zero */ + MPI_Buffer_attach( buff, size); + /* Buffer of 10000 bytes available again */ .fi .SH NOTES @@ -89,7 +89,7 @@ MPI_Buffer_detach both have a first argument of type void*, these arguments are Almost all MPI routines return an error value; C routines as the value of the function and Fortran routines in the last argument. C++ functions do not return errors. If the default error handler is set to MPI::ERRORS_THROW_EXCEPTIONS, then on error the C++ exception mechanism will be used to throw an MPI::Exception object. .sp Before the error value is returned, the current MPI error handler is -called. By default, this error handler aborts the MPI job, except for I/O function errors. The error handler may be changed with MPI_Comm_set_errhandler; the predefined error handler MPI_ERRORS_RETURN may be used to cause error values to be returned. Note that MPI does not guarantee that an MPI program can continue past an error. +called. By default, this error handler aborts the MPI job, except for I/O function errors. The error handler may be changed with MPI_Comm_set_errhandler; the predefined error handler MPI_ERRORS_RETURN may be used to cause error values to be returned. Note that MPI does not guarantee that an MPI program can continue past an error. .SH SEE ALSO .ft R diff --git a/ompi/mpi/man/man3/MPI_Cancel.3in b/ompi/mpi/man/man3/MPI_Cancel.3in index cc1ac1065db..d7e2379cce0 100644 --- a/ompi/mpi/man/man3/MPI_Cancel.3in +++ b/ompi/mpi/man/man3/MPI_Cancel.3in @@ -19,7 +19,7 @@ int MPI_Cancel(MPI_Request\fI *request\fP) .nf INCLUDE 'mpif.h' MPI_CANCEL(\fIREQUEST\fP, \fIIERROR\fP) - INTEGER \fIREQUEST\fP, \fIIERROR\fP + INTEGER \fIREQUEST\fP, \fIIERROR\fP .fi .SH C++ Syntax @@ -38,21 +38,21 @@ Communication request (handle). .ft R .TP 1i IERROR -Fortran only: Error status (integer). +Fortran only: Error status (integer). .SH DESCRIPTION .ft R -The MPI_Cancel operation allows pending communications to be canceled. This is required for cleanup. Posting a send or a receive ties up user resources (send or receive buffers), and a cancel may be needed to free these resources gracefully. +The MPI_Cancel operation allows pending communications to be canceled. This is required for cleanup. Posting a send or a receive ties up user resources (send or receive buffers), and a cancel may be needed to free these resources gracefully. .sp -A call to MPI_Cancel marks for cancellation a pending, nonblocking communication operation (send or receive). The cancel call is local. It returns immediately, possibly before the communication is actually canceled. It is still necessary to complete a communication that has been marked for cancellation, using a call to MPI_Request_free, MPI_Wait, or MPI_Test (or any of the derived operations). +A call to MPI_Cancel marks for cancellation a pending, nonblocking communication operation (send or receive). The cancel call is local. It returns immediately, possibly before the communication is actually canceled. It is still necessary to complete a communication that has been marked for cancellation, using a call to MPI_Request_free, MPI_Wait, or MPI_Test (or any of the derived operations). .sp If a communication is marked for cancellation, then an MPI_Wait call for that communication is guaranteed to return, irrespective of the activities of other processes (i.e., MPI_Wait behaves as a local function); similarly if MPI_Test is repeatedly called in a busy wait loop for a canceled communication, then MPI_Test will eventually be successful. .sp -MPI_Cancel can be used to cancel a communication that uses a persistent request (see Section 3.9 in the MPI-1 Standard, "Persistent Communication Requests") in the same way it is used for nonpersistent requests. A successful cancellation cancels the active communication, but not the request itself. After the call to MPI_Cancel and the subsequent call to MPI_Wait or MPI_Test, the request becomes inactive and can be activated for a new communication. +MPI_Cancel can be used to cancel a communication that uses a persistent request (see Section 3.9 in the MPI-1 Standard, "Persistent Communication Requests") in the same way it is used for nonpersistent requests. A successful cancellation cancels the active communication, but not the request itself. After the call to MPI_Cancel and the subsequent call to MPI_Wait or MPI_Test, the request becomes inactive and can be activated for a new communication. .sp -The successful cancellation of a buffered send frees the buffer space occupied by the pending message. +The successful cancellation of a buffered send frees the buffer space occupied by the pending message. .sp -Either the cancellation succeeds or the communication succeeds, but not both. If a send is marked for cancellation, then it must be the case that either the send completes normally, in which case the message sent is received at the destination process, or that the send is successfully canceled, in which case no part of the message is received at the destination. Then, any matching receive has to be satisfied by another send. If a receive is marked for cancellation, then it must be the case that either the receive completes normally, or that the receive is successfully canceled, in which case no part of the receive buffer is altered. Then, any matching send has to be satisfied by another receive. +Either the cancellation succeeds or the communication succeeds, but not both. If a send is marked for cancellation, then it must be the case that either the send completes normally, in which case the message sent is received at the destination process, or that the send is successfully canceled, in which case no part of the message is received at the destination. Then, any matching receive has to be satisfied by another send. If a receive is marked for cancellation, then it must be the case that either the receive completes normally, or that the receive is successfully canceled, in which case no part of the receive buffer is altered. Then, any matching send has to be satisfied by another receive. .sp If the operation has been canceled, then information to that effect will be returned in the status argument of the operation that completes the communication. @@ -67,7 +67,7 @@ using MPI_Cancel allows the user to cancel these unsatisfied requests. Almost all MPI routines return an error value; C routines as the value of the function and Fortran routines in the last argument. C++ functions do not return errors. If the default error handler is set to MPI::ERRORS_THROW_EXCEPTIONS, then on error the C++ exception mechanism will be used to throw an MPI::Exception object. .sp Before the error value is returned, the current MPI error handler is -called. By default, this error handler aborts the MPI job, except for I/O function errors. The error handler may be changed with MPI_Comm_set_errhandler; the predefined error handler MPI_ERRORS_RETURN may be used to cause error values to be returned. Note that MPI does not guarantee that an MPI program can continue past an error. +called. By default, this error handler aborts the MPI job, except for I/O function errors. The error handler may be changed with MPI_Comm_set_errhandler; the predefined error handler MPI_ERRORS_RETURN may be used to cause error values to be returned. Note that MPI does not guarantee that an MPI program can continue past an error. .SH SEE ALSO .ft R diff --git a/ompi/mpi/man/man3/MPI_Cart_coords.3in b/ompi/mpi/man/man3/MPI_Cart_coords.3in index 2be7df13542..e18c5ce4818 100644 --- a/ompi/mpi/man/man3/MPI_Cart_coords.3in +++ b/ompi/mpi/man/man3/MPI_Cart_coords.3in @@ -26,7 +26,7 @@ MPI_CART_COORDS(\fICOMM\fP,\fI RANK\fP,\fI MAXDIMS\fP,\fI COORDS\fP, \fIIERROR\f .SH C++ Syntax .nf #include -void Cartcomm::Get_coords(int \fIrank\fP, int \fImaxdims\fP, +void Cartcomm::Get_coords(int \fIrank\fP, int \fImaxdims\fP, int \fIcoords\fP[]) const .fi @@ -50,7 +50,7 @@ Integer array (of size ndims,which was defined by MPI_Cart_create call) containi .ft R .TP 1i IERROR -Fortran only: Error status (integer). +Fortran only: Error status (integer). .SH DESCRIPTION .ft R @@ -60,5 +60,5 @@ MPI_Cart_coords provies a mapping of ranks to Cartesian coordinates. Almost all MPI routines return an error value; C routines as the value of the function and Fortran routines in the last argument. C++ functions do not return errors. If the default error handler is set to MPI::ERRORS_THROW_EXCEPTIONS, then on error the C++ exception mechanism will be used to throw an MPI::Exception object. .sp Before the error value is returned, the current MPI error handler is -called. By default, this error handler aborts the MPI job, except for I/O function errors. The error handler may be changed with MPI_Comm_set_errhandler; the predefined error handler MPI_ERRORS_RETURN may be used to cause error values to be returned. Note that MPI does not guarantee that an MPI program can continue past an error. +called. By default, this error handler aborts the MPI job, except for I/O function errors. The error handler may be changed with MPI_Comm_set_errhandler; the predefined error handler MPI_ERRORS_RETURN may be used to cause error values to be returned. Note that MPI does not guarantee that an MPI program can continue past an error. diff --git a/ompi/mpi/man/man3/MPI_Cart_create.3in b/ompi/mpi/man/man3/MPI_Cart_create.3in index 165e8058496..5a04beabf40 100644 --- a/ompi/mpi/man/man3/MPI_Cart_create.3in +++ b/ompi/mpi/man/man3/MPI_Cart_create.3in @@ -29,7 +29,7 @@ MPI_CART_CREATE(\fICOMM_OLD\fP,\fI NDIMS\fP,\fI DIMS\fP,\fI PERIODS\fP,\fI REORD .SH C++ Syntax .nf #include -Cartcomm Intracomm.Create_cart(int[] \fIndims\fP, int[] \fIdims\fP[], +Cartcomm Intracomm.Create_cart(int[] \fIndims\fP, int[] \fIdims\fP[], const bool \fIperiods\fP[], bool \fIreorder\fP) const .fi @@ -61,15 +61,15 @@ Communicator with new Cartesian topology (handle). .ft R .TP 1i IERROR -Fortran only: Error status (integer). +Fortran only: Error status (integer). .SH DESCRIPTION .ft R -MPI_Cart_create returns a handle to a new communicator to which the Cartesian topology information is attached. If reorder = false then the rank of each process in the new group is identical to its rank in the old group. Otherwise, the function may reorder the processes (possibly so as to choose a good embedding of the virtual topology onto the physical machine). If the total size of the Cartesian grid is smaller than the size of the group of comm, then some processes are returned MPI_COMM_NULL, in analogy to MPI_Comm_split. The call is erroneous if it specifies a grid that is larger than the group size. +MPI_Cart_create returns a handle to a new communicator to which the Cartesian topology information is attached. If reorder = false then the rank of each process in the new group is identical to its rank in the old group. Otherwise, the function may reorder the processes (possibly so as to choose a good embedding of the virtual topology onto the physical machine). If the total size of the Cartesian grid is smaller than the size of the group of comm, then some processes are returned MPI_COMM_NULL, in analogy to MPI_Comm_split. The call is erroneous if it specifies a grid that is larger than the group size. .SH ERRORS Almost all MPI routines return an error value; C routines as the value of the function and Fortran routines in the last argument. C++ functions do not return errors. If the default error handler is set to MPI::ERRORS_THROW_EXCEPTIONS, then on error the C++ exception mechanism will be used to throw an MPI::Exception object. .sp Before the error value is returned, the current MPI error handler is -called. By default, this error handler aborts the MPI job, except for I/O function errors. The error handler may be changed with MPI_Comm_set_errhandler; the predefined error handler MPI_ERRORS_RETURN may be used to cause error values to be returned. Note that MPI does not guarantee that an MPI program can continue past an error. +called. By default, this error handler aborts the MPI job, except for I/O function errors. The error handler may be changed with MPI_Comm_set_errhandler; the predefined error handler MPI_ERRORS_RETURN may be used to cause error values to be returned. Note that MPI does not guarantee that an MPI program can continue past an error. diff --git a/ompi/mpi/man/man3/MPI_Cart_get.3in b/ompi/mpi/man/man3/MPI_Cart_get.3in index 24b33cd2f94..af1b85730ba 100644 --- a/ompi/mpi/man/man3/MPI_Cart_get.3in +++ b/ompi/mpi/man/man3/MPI_Cart_get.3in @@ -27,8 +27,8 @@ MPI_CART_GET(\fICOMM\fP, \fIMAXDIMS\fP, \fIDIMS\fP, \fIPERIODS\fP, \fICOORDS\fP, .SH C++ Syntax .nf #include -void Cartcomm::Get_topo(int \fImaxdims\fP, int \fIdims\fP[], - bool \fIperiods\fP[], int \fIcoords\fP[]) const +void Cartcomm::Get_topo(int \fImaxdims\fP, int \fIdims\fP[], + bool \fIperiods\fP[], int \fIcoords\fP[]) const .fi .SH INPUT PARAMETERS @@ -50,11 +50,11 @@ periods Periodicity (true/false) for each Cartesian dimension (array of logicals). .TP 1i coords -Coordinates of calling process in Cartesian structure (array of integers). +Coordinates of calling process in Cartesian structure (array of integers). .ft R .TP 1i IERROR -Fortran only: Error status (integer). +Fortran only: Error status (integer). .SH DESCRIPTION .ft R @@ -64,7 +64,7 @@ The functions MPI_Cartdim_get and MPI_Cart_get return the Cartesian topology inf Almost all MPI routines return an error value; C routines as the value of the function and Fortran routines in the last argument. C++ functions do not return errors. If the default error handler is set to MPI::ERRORS_THROW_EXCEPTIONS, then on error the C++ exception mechanism will be used to throw an MPI::Exception object. .sp Before the error value is returned, the current MPI error handler is -called. By default, this error handler aborts the MPI job, except for I/O function errors. The error handler may be changed with MPI_Comm_set_errhandler; the predefined error handler MPI_ERRORS_RETURN may be used to cause error values to be returned. Note that MPI does not guarantee that an MPI program can continue past an error. +called. By default, this error handler aborts the MPI job, except for I/O function errors. The error handler may be changed with MPI_Comm_set_errhandler; the predefined error handler MPI_ERRORS_RETURN may be used to cause error values to be returned. Note that MPI does not guarantee that an MPI program can continue past an error. .SH SEE ALSO .ft R diff --git a/ompi/mpi/man/man3/MPI_Cart_map.3in b/ompi/mpi/man/man3/MPI_Cart_map.3in index b88698d04df..7f99d39a0cb 100644 --- a/ompi/mpi/man/man3/MPI_Cart_map.3in +++ b/ompi/mpi/man/man3/MPI_Cart_map.3in @@ -28,8 +28,8 @@ MPI_CART_MAP(\fICOMM, NDIMS, DIMS, PERIODS, NEWRANK, IERROR\fP) .SH C++ Syntax .nf #include -int Cartcomm::Map(int \fIndims\fP, const int \fIdims\fP[], - const bool \fIperiods\fP[]) const +int Cartcomm::Map(int \fIndims\fP, const int \fIdims\fP[], + const bool \fIperiods\fP[]) const .fi .SH INPUT PARAMETERS @@ -56,19 +56,19 @@ Reordered rank of the calling process; MPI_UNDEFINED if calling process does not .ft R .TP 1i IERROR -Fortran only: Error status (integer). +Fortran only: Error status (integer). .SH DESCRIPTION .ft R -MPI_Cart_map and MPI_Graph_map can be used to implement all other topology functions. In general they will not be called by the user directly, unless he or she is creating additional virtual topology capability other than that provided by MPI. +MPI_Cart_map and MPI_Graph_map can be used to implement all other topology functions. In general they will not be called by the user directly, unless he or she is creating additional virtual topology capability other than that provided by MPI. .sp -MPI_Cart_map computes an "optimal" placement for the calling process on the physical machine. A possible implementation of this function is to always return the rank of the calling process, that is, not to perform any reordering. +MPI_Cart_map computes an "optimal" placement for the calling process on the physical machine. A possible implementation of this function is to always return the rank of the calling process, that is, not to perform any reordering. .SH ERRORS Almost all MPI routines return an error value; C routines as the value of the function and Fortran routines in the last argument. C++ functions do not return errors. If the default error handler is set to MPI::ERRORS_THROW_EXCEPTIONS, then on error the C++ exception mechanism will be used to throw an MPI::Exception object. .sp Before the error value is returned, the current MPI error handler is -called. By default, this error handler aborts the MPI job, except for I/O function errors. The error handler may be changed with MPI_Comm_set_errhandler; the predefined error handler MPI_ERRORS_RETURN may be used to cause error values to be returned. Note that MPI does not guarantee that an MPI program can continue past an error. +called. By default, this error handler aborts the MPI job, except for I/O function errors. The error handler may be changed with MPI_Comm_set_errhandler; the predefined error handler MPI_ERRORS_RETURN may be used to cause error values to be returned. Note that MPI does not guarantee that an MPI program can continue past an error. .SH SEE ALSO .ft R diff --git a/ompi/mpi/man/man3/MPI_Cart_rank.3in b/ompi/mpi/man/man3/MPI_Cart_rank.3in index 53bd158f1ee..324b54c2e15 100644 --- a/ompi/mpi/man/man3/MPI_Cart_rank.3in +++ b/ompi/mpi/man/man3/MPI_Cart_rank.3in @@ -35,7 +35,7 @@ comm Communicator with Cartesian structure (handle). .TP 1i coords -Integer array (of size ndims, which was defined by MPI_Cart_create call) specifying the Cartesian coordinates of a process. +Integer array (of size ndims, which was defined by MPI_Cart_create call) specifying the Cartesian coordinates of a process. .SH OUTPUT PARAMETER .ft R @@ -45,7 +45,7 @@ Rank of specified process (integer). .ft R .TP 1i IERROR -Fortran only: Error status (integer). +Fortran only: Error status (integer). .SH DESCRIPTION .ft R @@ -56,7 +56,7 @@ translates the logical process coordinates to process ranks as they are used by Almost all MPI routines return an error value; C routines as the value of the function and Fortran routines in the last argument. C++ functions do not return errors. If the default error handler is set to MPI::ERRORS_THROW_EXCEPTIONS, then on error the C++ exception mechanism will be used to throw an MPI::Exception object. .sp Before the error value is returned, the current MPI error handler is -called. By default, this error handler aborts the MPI job, except for I/O function errors. The error handler may be changed with MPI_Comm_set_errhandler; the predefined error handler MPI_ERRORS_RETURN may be used to cause error values to be returned. Note that MPI does not guarantee that an MPI program can continue past an error. +called. By default, this error handler aborts the MPI job, except for I/O function errors. The error handler may be changed with MPI_Comm_set_errhandler; the predefined error handler MPI_ERRORS_RETURN may be used to cause error values to be returned. Note that MPI does not guarantee that an MPI program can continue past an error. .SH SEE ALSO .sp diff --git a/ompi/mpi/man/man3/MPI_Cart_shift.3in b/ompi/mpi/man/man3/MPI_Cart_shift.3in index adc9e23ede6..505417f510e 100644 --- a/ompi/mpi/man/man3/MPI_Cart_shift.3in +++ b/ompi/mpi/man/man3/MPI_Cart_shift.3in @@ -12,7 +12,7 @@ .SH C Syntax .nf #include -int MPI_Cart_shift(MPI_Comm \fIcomm\fP, int\fI direction\fP, int\fI disp\fP, +int MPI_Cart_shift(MPI_Comm \fIcomm\fP, int\fI direction\fP, int\fI disp\fP, int\fI *rank_source\fP, int\fI *rank_dest\fP) .fi @@ -28,8 +28,8 @@ MPI_CART_SHIFT(\fICOMM, DIRECTION, DISP, RANK_SOURCE, .SH C++ Syntax .nf #include -void Cartcomm::Shift(int \fIdirection\fP, int \fIdisp\fP, int& \fIrank_source\fP, - int& \fIrank_dest\fP) const +void Cartcomm::Shift(int \fIdirection\fP, int \fIdisp\fP, int& \fIrank_source\fP, + int& \fIrank_dest\fP) const .fi .SH INPUT PARAMETERS @@ -55,42 +55,42 @@ Rank of destination process (integer). .ft R .TP 1i IERROR -Fortran only: Error status (integer). +Fortran only: Error status (integer). .SH DESCRIPTION .ft R -If the process topology is a Cartesian structure, an MPI_Sendrecv operation is likely to be used along a coordinate direction to perform a shift of data. As input, MPI_Sendrecv takes the rank of a source process for the receive, and the rank of a destination process for the send. If the function MPI_Cart_shift is called for a Cartesian process group, it provides the calling process with the above identifiers, which then can be passed to MPI_Sendrecv. The user specifies the coordinate direction and the size of the step (positive or negative). The function is local. +If the process topology is a Cartesian structure, an MPI_Sendrecv operation is likely to be used along a coordinate direction to perform a shift of data. As input, MPI_Sendrecv takes the rank of a source process for the receive, and the rank of a destination process for the send. If the function MPI_Cart_shift is called for a Cartesian process group, it provides the calling process with the above identifiers, which then can be passed to MPI_Sendrecv. The user specifies the coordinate direction and the size of the step (positive or negative). The function is local. .sp -The direction argument indicates the dimension of the shift, i.e., the coordinate whose value is modified by the shift. The coordinates are numbered from 0 to ndims-1, where ndims is the number of dimensions. +The direction argument indicates the dimension of the shift, i.e., the coordinate whose value is modified by the shift. The coordinates are numbered from 0 to ndims-1, where ndims is the number of dimensions. .sp -\fBNote:\fP The direction argument is in the range [0, n-1] for an n-dimensional Cartesian mesh. +\fBNote:\fP The direction argument is in the range [0, n-1] for an n-dimensional Cartesian mesh. .sp Depending on the periodicity of the Cartesian group in the specified coordinate direction, MPI_Cart_shift provides the identifiers for a circular or an end-off shift. In the case of an end-off shift, the value MPI_PROC_NULL may be returned in rank_source or rank_dest, indicating that the source or the destination for the shift is out of range. .sp -\fBExample:\fP The communicator, comm, has a two-dimensional, periodic, Cartesian topology associated with it. A two-dimensional array of REALs is stored one element per process, in variable A. One wishes to skew this array, by shifting column i (vertically, i.e., along the column) by i steps. +\fBExample:\fP The communicator, comm, has a two-dimensional, periodic, Cartesian topology associated with it. A two-dimensional array of REALs is stored one element per process, in variable A. One wishes to skew this array, by shifting column i (vertically, i.e., along the column) by i steps. .sp .nf - \&.... - C find process rank + \&.... + C find process rank CALL MPI_COMM_RANK(comm, rank, ierr)) - C find Cartesian coordinates - CALL MPI_CART_COORDS(comm, rank, maxdims, coords, - ierr) - C compute shift source and destination - CALL MPI_CART_SHIFT(comm, 0, coords(2), source, - dest, ierr) - C skew array - CALL MPI_SENDRECV_REPLACE(A, 1, MPI_REAL, dest, 0, - source, 0, comm, status, + C find Cartesian coordinates + CALL MPI_CART_COORDS(comm, rank, maxdims, coords, + ierr) + C compute shift source and destination + CALL MPI_CART_SHIFT(comm, 0, coords(2), source, + dest, ierr) + C skew array + CALL MPI_SENDRECV_REPLACE(A, 1, MPI_REAL, dest, 0, + source, 0, comm, status, ierr) .fi .SH NOTE -In Fortran, the dimension indicated by DIRECTION = i has DIMS(i+1) nodes, where DIMS is the array that was used to create the grid. In C, the dimension indicated by direction = i is the dimension specified by dims[i]. +In Fortran, the dimension indicated by DIRECTION = i has DIMS(i+1) nodes, where DIMS is the array that was used to create the grid. In C, the dimension indicated by direction = i is the dimension specified by dims[i]. .SH ERRORS Almost all MPI routines return an error value; C routines as the value of the function and Fortran routines in the last argument. C++ functions do not return errors. If the default error handler is set to MPI::ERRORS_THROW_EXCEPTIONS, then on error the C++ exception mechanism will be used to throw an MPI::Exception object. .sp Before the error value is returned, the current MPI error handler is -called. By default, this error handler aborts the MPI job, except for I/O function errors. The error handler may be changed with MPI_Comm_set_errhandler; the predefined error handler MPI_ERRORS_RETURN may be used to cause error values to be returned. Note that MPI does not guarantee that an MPI program can continue past an error. +called. By default, this error handler aborts the MPI job, except for I/O function errors. The error handler may be changed with MPI_Comm_set_errhandler; the predefined error handler MPI_ERRORS_RETURN may be used to cause error values to be returned. Note that MPI does not guarantee that an MPI program can continue past an error. diff --git a/ompi/mpi/man/man3/MPI_Cart_sub.3in b/ompi/mpi/man/man3/MPI_Cart_sub.3in index 3d175a7ef13..80f4c9b7b42 100644 --- a/ompi/mpi/man/man3/MPI_Cart_sub.3in +++ b/ompi/mpi/man/man3/MPI_Cart_sub.3in @@ -21,7 +21,7 @@ int MPI_Cart_sub(MPI_Comm \fIcomm\fP, const int\fI remain_dims\fP[], MPI_Comm\fI INCLUDE 'mpif.h' MPI_CART_SUB(\fICOMM, REMAIN_DIMS, COMM_NEW, IERROR\fP) INTEGER \fICOMM, COMM_NEW, IERROR\fP - LOGICAL \fIREMAIN_DIMS\fP(*) + LOGICAL \fIREMAIN_DIMS\fP(*) .fi .SH C++ Syntax @@ -47,16 +47,16 @@ Communicator containing the subgrid that includes the calling process (handle). .ft R .TP 1i IERROR -Fortran only: Error status (integer). +Fortran only: Error status (integer). .SH DESCRIPTION .ft R -If a Cartesian topology has been created with MPI_Cart_create, the function MPI_Cart_sub can be used to partition the communicator group into subgroups that form lower-dimensional Cartesian subgrids, and to build for each subgroup a communicator with the associated subgrid Cartesian topology. (This function is closely related to MPI_Comm_split.) +If a Cartesian topology has been created with MPI_Cart_create, the function MPI_Cart_sub can be used to partition the communicator group into subgroups that form lower-dimensional Cartesian subgrids, and to build for each subgroup a communicator with the associated subgrid Cartesian topology. (This function is closely related to MPI_Comm_split.) .sp -\fBExample:\fP Assume that MPI_Cart_create( \&..., comm) has defined a (2 x 3 x 4) grid. Let remain_dims = (true, false, true). Then a call to +\fBExample:\fP Assume that MPI_Cart_create( \&..., comm) has defined a (2 x 3 x 4) grid. Let remain_dims = (true, false, true). Then a call to .sp .nf - MPI_Cart_sub(comm, remain_dims, comm_new) + MPI_Cart_sub(comm, remain_dims, comm_new) .fi .sp will create three communicators, each with eight processes in a 2 x 4 Cartesian topology. If remain_dims = (false, false, true) then the call to MPI_Cart_sub(comm, remain_dims, comm_new) will create six nonoverlapping communicators, each with four processes, in a one-dimensional Cartesian topology. @@ -65,7 +65,7 @@ will create three communicators, each with eight processes in a 2 x 4 Cartesian Almost all MPI routines return an error value; C routines as the value of the function and Fortran routines in the last argument. C++ functions do not return errors. If the default error handler is set to MPI::ERRORS_THROW_EXCEPTIONS, then on error the C++ exception mechanism will be used to throw an MPI::Exception object. .sp Before the error value is returned, the current MPI error handler is -called. By default, this error handler aborts the MPI job, except for I/O function errors. The error handler may be changed with MPI_Comm_set_errhandler; the predefined error handler MPI_ERRORS_RETURN may be used to cause error values to be returned. Note that MPI does not guarantee that an MPI program can continue past an error. +called. By default, this error handler aborts the MPI job, except for I/O function errors. The error handler may be changed with MPI_Comm_set_errhandler; the predefined error handler MPI_ERRORS_RETURN may be used to cause error values to be returned. Note that MPI does not guarantee that an MPI program can continue past an error. .SH SEE ALSO .ft R diff --git a/ompi/mpi/man/man3/MPI_Cartdim_get.3in b/ompi/mpi/man/man3/MPI_Cartdim_get.3in index 60d2b27df5b..9159adfa425 100644 --- a/ompi/mpi/man/man3/MPI_Cartdim_get.3in +++ b/ompi/mpi/man/man3/MPI_Cartdim_get.3in @@ -42,17 +42,17 @@ Number of dimensions of the Cartesian structure (integer). .ft R .TP 1i IERROR -Fortran only: Error status (integer). +Fortran only: Error status (integer). .SH DESCRIPTION .ft R -MPI_Cartdim_get returns the number of dimensions of the Cartesian structure. +MPI_Cartdim_get returns the number of dimensions of the Cartesian structure. .SH ERRORS Almost all MPI routines return an error value; C routines as the value of the function and Fortran routines in the last argument. C++ functions do not return errors. If the default error handler is set to MPI::ERRORS_THROW_EXCEPTIONS, then on error the C++ exception mechanism will be used to throw an MPI::Exception object. .sp Before the error value is returned, the current MPI error handler is -called. By default, this error handler aborts the MPI job, except for I/O function errors. The error handler may be changed with MPI_Comm_set_errhandler; the predefined error handler MPI_ERRORS_RETURN may be used to cause error values to be returned. Note that MPI does not guarantee that an MPI program can continue past an error. +called. By default, this error handler aborts the MPI job, except for I/O function errors. The error handler may be changed with MPI_Comm_set_errhandler; the predefined error handler MPI_ERRORS_RETURN may be used to cause error values to be returned. Note that MPI does not guarantee that an MPI program can continue past an error. .SH SEE ALSO .ft R diff --git a/ompi/mpi/man/man3/MPI_Close_port.3in b/ompi/mpi/man/man3/MPI_Close_port.3in index d76a9f20350..9899fe3572b 100644 --- a/ompi/mpi/man/man3/MPI_Close_port.3in +++ b/ompi/mpi/man/man3/MPI_Close_port.3in @@ -6,7 +6,7 @@ .\" $COPYRIGHT$ .TH MPI_Close_port 3 "#OMPI_DATE#" "#PACKAGE_VERSION#" "#PACKAGE_NAME#" .SH NAME -\fBMPI_Close_port \fP \- Releases the specified network address. +\fBMPI_Close_port \fP \- Releases the specified network address. .SH SYNTAX .ft R @@ -21,7 +21,7 @@ int MPI_Close_port(const char *\fIport_name\fP) INCLUDE 'mpif.h' MPI_CLOSE_PORT(\fIPORT_NAME, IERROR\fP) CHARACTER*(*) \fIPORT_NAME\fP - INTEGER \fIIERROR\fP + INTEGER \fIIERROR\fP .fi .SH C++ Syntax @@ -40,7 +40,7 @@ A port (string). .ft R .TP 1i IERROR -Fortran only: Error status (integer). +Fortran only: Error status (integer). .SH DESCRIPTION .ft R @@ -50,5 +50,5 @@ MPI_Close_port releases the network address represented by \fIport_name\fP. Almost all MPI routines return an error value; C routines as the value of the function and Fortran routines in the last argument. C++ functions do not return errors. If the default error handler is set to MPI::ERRORS_THROW_EXCEPTIONS, then on error the C++ exception mechanism will be used to throw an MPI::Exception object. .sp Before the error value is returned, the current MPI error handler is -called. By default, this error handler aborts the MPI job, except for I/O function errors. The error handler may be changed with MPI_Comm_set_errhandler; the predefined error handler MPI_ERRORS_RETURN may be used to cause error values to be returned. Note that MPI does not guarantee that an MPI program can continue past an error. +called. By default, this error handler aborts the MPI job, except for I/O function errors. The error handler may be changed with MPI_Comm_set_errhandler; the predefined error handler MPI_ERRORS_RETURN may be used to cause error values to be returned. Note that MPI does not guarantee that an MPI program can continue past an error. diff --git a/ompi/mpi/man/man3/MPI_Comm_accept.3in b/ompi/mpi/man/man3/MPI_Comm_accept.3in index a487bf8c0c8..021bd0b7152 100644 --- a/ompi/mpi/man/man3/MPI_Comm_accept.3in +++ b/ompi/mpi/man/man3/MPI_Comm_accept.3in @@ -6,7 +6,7 @@ .\" $COPYRIGHT$ .TH MPI_Comm_accept 3OpenMPI "#OMPI_DATE#" "#PACKAGE_VERSION#" "#PACKAGE_NAME#" .SH NAME -\fBMPI_Comm_accept \fP \- Establishes communication with a client. +\fBMPI_Comm_accept \fP \- Establishes communication with a client. .SH SYNTAX .ft R @@ -21,7 +21,7 @@ int MPI_Comm_accept(const char *\fIport_name\fP, MPI_Info \fIinfo\fP, int \fIroo INCLUDE 'mpif.h' MPI_COMM_ACCEPT(\fIPORT_NAME, INFO, ROOT, COMM, NEWCOMM, IERROR\fP) CHARACTER*(*) \fIPORT_NAME\fP - INTEGER \fIINFO, ROOT, COMM, NEWCOMM, IERROR\fP + INTEGER \fIINFO, ROOT, COMM, NEWCOMM, IERROR\fP .fi .SH C++ Syntax @@ -38,7 +38,7 @@ port_name Port name (string, used only on \fIroot\fP). .TP 1i info -Options given by root for the accept (handle, used only on root). No options currently supported. +Options given by root for the accept (handle, used only on root). No options currently supported. .TP 1i root Rank in \fIcomm\fP of root node (integer). @@ -53,7 +53,7 @@ newcomm Intercommunicator with client as remote group (handle) .TP 1i IERROR -Fortran only: Error status (integer). +Fortran only: Error status (integer). .SH DESCRIPTION .ft R @@ -66,9 +66,9 @@ The \fIport_name\fP must have been established through a call to MPI_Open_port o Almost all MPI routines return an error value; C routines as the value of the function and Fortran routines in the last argument. C++ functions do not return errors. If the default error handler is set to MPI::ERRORS_THROW_EXCEPTIONS, then on error the C++ exception mechanism will be used to throw an MPI::Exception object. .sp Before the error value is returned, the current MPI error handler is -called. By default, this error handler aborts the MPI job, except for I/O function errors. The error handler may be changed with MPI_Comm_set_errhandler; the predefined error handler MPI_ERRORS_RETURN may be used to cause error values to be returned. +called. By default, this error handler aborts the MPI job, except for I/O function errors. The error handler may be changed with MPI_Comm_set_errhandler; the predefined error handler MPI_ERRORS_RETURN may be used to cause error values to be returned. .sp -See the MPI man page for a full list of MPI error codes. +See the MPI man page for a full list of MPI error codes. .SH SEE ALSO MPI_Comm_connect diff --git a/ompi/mpi/man/man3/MPI_Comm_call_errhandler.3in b/ompi/mpi/man/man3/MPI_Comm_call_errhandler.3in index a15121d2489..f5b82b0599d 100644 --- a/ompi/mpi/man/man3/MPI_Comm_call_errhandler.3in +++ b/ompi/mpi/man/man3/MPI_Comm_call_errhandler.3in @@ -15,37 +15,37 @@ error handler assigned to a communicator .SH C Syntax .nf #include -int MPI_Comm_call_errhandler(MPI_Comm \fIcomm\fP, int \fIerrorcode\fP) +int MPI_Comm_call_errhandler(MPI_Comm \fIcomm\fP, int \fIerrorcode\fP) .fi .SH Fortran Syntax .nf INCLUDE 'mpif.h' MPI_COMM_CALL_ERRHANDLER(\fICOMM, ERRORCODE, IERROR\fP) - INTEGER \fICOMM, ERRORCODE, IERROR\fP + INTEGER \fICOMM, ERRORCODE, IERROR\fP .fi .SH C++ Syntax .nf #include -void MPI::Comm::Call_errhandler(int \fIerrorcode\fP) const +void MPI::Comm::Call_errhandler(int \fIerrorcode\fP) const .fi .SH INPUT PARAMETER .ft R .TP 1.4i -comm +comm communicator with error handler (handle). .ft R .TP 1.4i -errorcode +errorcode error code (integer). .SH OUTPUT PARAMETERS .ft R .TP 1.4i IERROR -Fortran only: Error status (integer). +Fortran only: Error status (integer). .SH DESCRIPTION .ft R diff --git a/ompi/mpi/man/man3/MPI_Comm_compare.3in b/ompi/mpi/man/man3/MPI_Comm_compare.3in index 0527da7a055..8ca324755b2 100644 --- a/ompi/mpi/man/man3/MPI_Comm_compare.3in +++ b/ompi/mpi/man/man3/MPI_Comm_compare.3in @@ -41,11 +41,11 @@ Comm2 (handle). .ft R .TP 1i result -Result of comparison (integer). +Result of comparison (integer). .ft R .TP 1i IERROR -Fortran only: Error status (integer). +Fortran only: Error status (integer). .SH DESCRIPTION .ft R @@ -55,6 +55,6 @@ MPI_IDENT results if and only if comm1 and comm2 are handles for the same object Almost all MPI routines return an error value; C routines as the value of the function and Fortran routines in the last argument. C++ functions do not return errors. If the default error handler is set to MPI::ERRORS_THROW_EXCEPTIONS, then on error the C++ exception mechanism will be used to throw an MPI::Exception object. .sp Before the error value is returned, the current MPI error handler is -called. By default, this error handler aborts the MPI job, except for I/O function errors. The error handler may be changed with MPI_Comm_set_errhandler; the predefined error handler MPI_ERRORS_RETURN may be used to cause error values to be returned. Note that MPI does not guarantee that an MPI program can continue past an error. +called. By default, this error handler aborts the MPI job, except for I/O function errors. The error handler may be changed with MPI_Comm_set_errhandler; the predefined error handler MPI_ERRORS_RETURN may be used to cause error values to be returned. Note that MPI does not guarantee that an MPI program can continue past an error. diff --git a/ompi/mpi/man/man3/MPI_Comm_connect.3in b/ompi/mpi/man/man3/MPI_Comm_connect.3in index eaf6537bb18..5de2b72b696 100644 --- a/ompi/mpi/man/man3/MPI_Comm_connect.3in +++ b/ompi/mpi/man/man3/MPI_Comm_connect.3in @@ -6,14 +6,14 @@ .\" $COPYRIGHT$ .TH MPI_Comm_connect 3 "#OMPI_DATE#" "#PACKAGE_VERSION#" "#PACKAGE_NAME#" .SH NAME -\fBMPI_Comm_connect \fP \- Establishes communication with a server. +\fBMPI_Comm_connect \fP \- Establishes communication with a server. .SH SYNTAX .ft R .SH C Syntax .nf #include -int MPI_Comm_connect(const gchar *\fIport_name\fP, MPI_Info \fIinfo\fP, int \fIroot\fP, +int MPI_Comm_connect(const char *\fIport_name\fP, MPI_Info \fIinfo\fP, int \fIroot\fP, MPI_Comm \fIcomm\fP, MPI_Comm *\fInewcomm\fP) .fi @@ -22,7 +22,7 @@ int MPI_Comm_connect(const gchar *\fIport_name\fP, MPI_Info \fIinfo\fP, int \fIr INCLUDE 'mpif.h' MPI_COMM_CONNECT(\fIPORT_NAME, INFO, ROOT, COMM, NEWCOMM, IERROR\fP) CHARACTER*(*) \fIPORT_NAME\fP - INTEGER \fIINFO, ROOT, COMM, NEWCOMM, IERROR\fP + INTEGER \fIINFO, ROOT, COMM, NEWCOMM, IERROR\fP .fi .SH C++ Syntax @@ -39,7 +39,7 @@ port_name Port name (string, used only on \fIroot\fP). .TP 1i info -Options given by root for the connect (handle, used only on root). No options currently supported. +Options given by root for the connect (handle, used only on root). No options currently supported. .TP 1i root Rank in \fIcomm\fP of root node (integer). @@ -54,26 +54,26 @@ newcomm Intercommunicator with client as remote group (handle) .TP 1i IERROR -Fortran only: Error status (integer). +Fortran only: Error status (integer). .SH DESCRIPTION .ft R MPI_Comm_connect establishes communication with a server specified by \fIport_name\fP. It is collective over the calling communicator and returns an intercommunicator in which the remote group participated in an MPI_Comm_accept. The MPI_Comm_connect call must only be called after the MPI_Comm_accept call has been made by the MPI job acting as the server. .sp -If the named port does not exist (or has been closed), MPI_Comm_connect raises an error of class MPI_ERR_PORT. +If the named port does not exist (or has been closed), MPI_Comm_connect raises an error of class MPI_ERR_PORT. .sp -MPI provides no guarantee of fairness in servicing connection attempts. That is, connection attempts are not necessarily satisfied in the order in which they were initiated, and competition from other connection attempts may prevent a particular connection attempt from being satisfied. +MPI provides no guarantee of fairness in servicing connection attempts. That is, connection attempts are not necessarily satisfied in the order in which they were initiated, and competition from other connection attempts may prevent a particular connection attempt from being satisfied. + +The \fIport_name\fP parameter is the address of the server. It must be the same as the name returned by MPI_Open_port on the server. -The \fIport_name\fP parameter is the address of the server. It must be the same as the name returned by MPI_Open_port on the server. - .SH ERRORS Almost all MPI routines return an error value; C routines as the value of the function and Fortran routines in the last argument. C++ functions do not return errors. If the default error handler is set to MPI::ERRORS_THROW_EXCEPTIONS, then on error the C++ exception mechanism will be used to throw an MPI::Exception object. .sp Before the error value is returned, the current MPI error handler is -called. By default, this error handler aborts the MPI job, except for I/O function errors. The error handler may be changed with MPI_Comm_set_errhandler; the predefined error handler MPI_ERRORS_RETURN may be used to cause error values to be returned. Note that MPI does not guarantee that an MPI program can continue past an error. +called. By default, this error handler aborts the MPI job, except for I/O function errors. The error handler may be changed with MPI_Comm_set_errhandler; the predefined error handler MPI_ERRORS_RETURN may be used to cause error values to be returned. Note that MPI does not guarantee that an MPI program can continue past an error. .sp -See the MPI man page for a full list of MPI error codes. +See the MPI man page for a full list of MPI error codes. .SH SEE ALSO MPI_Comm_accept diff --git a/ompi/mpi/man/man3/MPI_Comm_create.3in b/ompi/mpi/man/man3/MPI_Comm_create.3in index 5f236bed889..1a32519c748 100644 --- a/ompi/mpi/man/man3/MPI_Comm_create.3in +++ b/ompi/mpi/man/man3/MPI_Comm_create.3in @@ -48,7 +48,7 @@ New communicator (handle). .ft R .TP 1i IERROR -Fortran only: Error status (integer). +Fortran only: Error status (integer). .SH DESCRIPTION .ft R @@ -56,7 +56,7 @@ This function creates a new communicator newcomm with communication group defined by group and a new context. The function sets \fInewcomm\fR to a new communicator that spans all the processes that are in the group. It sets \fInewcomm\fR to MPI_COMM_NULL for -processes that are not in the group. +processes that are not in the group. Each process must call with a \fIgroup\fR argument that is a subgroup of the group associated with \fIcomm\fR; this could be @@ -69,13 +69,13 @@ order. Otherwise the call is erroneous. .LP .SH NOTES -MPI_Comm_create provides a means of making a subset of processes for the purpose of separate MIMD computation, with separate communication space. \fInewcomm\fR, which is created by MPI_Comm_create, can be used in subsequent calls to MPI_Comm_create (or other communicator constructors) to further subdivide a computation into parallel sub-computations. A more general service is provided by MPI_Comm_split. +MPI_Comm_create provides a means of making a subset of processes for the purpose of separate MIMD computation, with separate communication space. \fInewcomm\fR, which is created by MPI_Comm_create, can be used in subsequent calls to MPI_Comm_create (or other communicator constructors) to further subdivide a computation into parallel sub-computations. A more general service is provided by MPI_Comm_split. .SH ERRORS Almost all MPI routines return an error value; C routines as the value of the function and Fortran routines in the last argument. C++ functions do not return errors. If the default error handler is set to MPI::ERRORS_THROW_EXCEPTIONS, then on error the C++ exception mechanism will be used to throw an MPI::Exception object. .sp Before the error value is returned, the current MPI error handler is -called. By default, this error handler aborts the MPI job, except for I/O function errors. The error handler may be changed with MPI_Comm_set_errhandler; the predefined error handler MPI_ERRORS_RETURN may be used to cause error values to be returned. Note that MPI does not guarantee that an MPI program can continue past an error. +called. By default, this error handler aborts the MPI job, except for I/O function errors. The error handler may be changed with MPI_Comm_set_errhandler; the predefined error handler MPI_ERRORS_RETURN may be used to cause error values to be returned. Note that MPI does not guarantee that an MPI program can continue past an error. .SH SEE ALSO .ft R diff --git a/ompi/mpi/man/man3/MPI_Comm_create_errhandler.3in b/ompi/mpi/man/man3/MPI_Comm_create_errhandler.3in index b94d5d9cc0e..ffae73f6659 100644 --- a/ompi/mpi/man/man3/MPI_Comm_create_errhandler.3in +++ b/ompi/mpi/man/man3/MPI_Comm_create_errhandler.3in @@ -5,7 +5,7 @@ .\" $COPYRIGHT$ .TH MPI_Comm_create_errhandler 3 "#OMPI_DATE#" "#PACKAGE_VERSION#" "#PACKAGE_NAME#" .SH NAME -\fBMPI_Comm_create_errhandler \fP \- Creates an error handler that can be attached to communicators. +\fBMPI_Comm_create_errhandler \fP \- Creates an error handler that can be attached to communicators. .SH SYNTAX .ft R @@ -54,20 +54,20 @@ MPI error handler (handle). .ft R .TP 1i IERROR -Fortran only: Error status (integer). +Fortran only: Error status (integer). .SH DESCRIPTION .ft R -MPI_Comm_create_errhandler creates an error handler that can be attached to communicators. This function is identical to MPI_Errhandler_create, the use of which is deprecated. +MPI_Comm_create_errhandler creates an error handler that can be attached to communicators. This function is identical to MPI_Errhandler_create, the use of which is deprecated. .sp -In C, the user routine should be a function of type MPI_Comm_errhandler_function, which is defined as +In C, the user routine should be a function of type MPI_Comm_errhandler_function, which is defined as .sp .nf - typedef void MPI_Comm_errhandler_function(MPI_Comm *, int *, \&...); + typedef void MPI_Comm_errhandler_function(MPI_Comm *, int *, \&...); .fi .sp The first argument is the communicator in use. The second is the error code -to be returned by the MPI routine that raised the error. This typedef replaces MPI_Handler_function, the use of which is deprecated. +to be returned by the MPI routine that raised the error. This typedef replaces MPI_Handler_function, the use of which is deprecated. .sp In Fortran, the user routine should be of this form: .sp @@ -79,14 +79,14 @@ In Fortran, the user routine should be of this form: In C++, the user routine should be of this form: .sp .nf - typedef void MPI::Comm::Errhandler_function(MPI_Comm &, int *, \&...); + typedef void MPI::Comm::Errhandler_function(MPI_Comm &, int *, \&...); .fi .SH ERRORS Almost all MPI routines return an error value; C routines as the value of the function and Fortran routines in the last argument. C++ functions do not return errors. If the default error handler is set to MPI::ERRORS_THROW_EXCEPTIONS, then on error the C++ exception mechanism will be used to throw an MPI::Exception object. .sp Before the error value is returned, the current MPI error handler is -called. By default, this error handler aborts the MPI job, except for I/O function errors. The error handler may be changed with MPI_Comm_set_errhandler; the predefined error handler MPI_ERRORS_RETURN may be used to cause error values to be returned. Note that MPI does not guarantee that an MPI program can continue past an error. +called. By default, this error handler aborts the MPI job, except for I/O function errors. The error handler may be changed with MPI_Comm_set_errhandler; the predefined error handler MPI_ERRORS_RETURN may be used to cause error values to be returned. Note that MPI does not guarantee that an MPI program can continue past an error. diff --git a/ompi/mpi/man/man3/MPI_Comm_create_keyval.3in b/ompi/mpi/man/man3/MPI_Comm_create_keyval.3in index b4d6fcd6cba..ff6be052196 100644 --- a/ompi/mpi/man/man3/MPI_Comm_create_keyval.3in +++ b/ompi/mpi/man/man3/MPI_Comm_create_keyval.3in @@ -5,23 +5,23 @@ .\" $COPYRIGHT$ .TH MPI_Comm_create_keyval 3 "#OMPI_DATE#" "#PACKAGE_VERSION#" "#PACKAGE_NAME#" .SH NAME -\fBMPI_Comm_create_keyval\fP \- Generates a new attribute key. +\fBMPI_Comm_create_keyval\fP \- Generates a new attribute key. .SH SYNTAX .ft R .SH C Syntax .nf #include -int MPI_Comm_create_keyval(MPI_Comm_copy_attr_function - *\fIcomm_copy_attr_fn\fP, MPI_Comm_delete_attr_function - *\fIcomm_delete_attr_fn\fP, int *\fIcomm_keyval\fP, - void *\fIextra_state\fP) +int MPI_Comm_create_keyval(MPI_Comm_copy_attr_function + *\fIcomm_copy_attr_fn\fP, MPI_Comm_delete_attr_function + *\fIcomm_delete_attr_fn\fP, int *\fIcomm_keyval\fP, + void *\fIextra_state\fP) .fi .SH Fortran Syntax (see FORTRAN 77 NOTES) .nf INCLUDE 'mpif.h' -MPI_COMM_CREATE_KEYVAL(\fICOMM_COPY_ATTR_FN, COMM_DELETE_ATTR_FN, +MPI_COMM_CREATE_KEYVAL(\fICOMM_COPY_ATTR_FN, COMM_DELETE_ATTR_FN, COMM_KEYVAL, EXTRA_STATE, IERROR\fP) EXTERNAL \fICOMM_COPY_ATTR_FN, COMM_DELETE_ATTR_FN\fP @@ -57,24 +57,24 @@ comm_keyval Key value for future access (integer). .TP 1i IERROR -Fortran only: Error status (integer). +Fortran only: Error status (integer). .SH DESCRIPTION .ft R -This function replaces MPI_Keyval_create, the use of which is deprecated. The C binding is identical. The Fortran binding differs in that \fIextra_state\fP is an address-sized integer. Also, the copy and delete callback functions have Fortran bindings that are consistent with address-sized attributes. +This function replaces MPI_Keyval_create, the use of which is deprecated. The C binding is identical. The Fortran binding differs in that \fIextra_state\fP is an address-sized integer. Also, the copy and delete callback functions have Fortran bindings that are consistent with address-sized attributes. .sp -The argument \fIcomm_copy_attr_fn\fP may be specified as MPI_COMM_NULL_COPY_FN or MPI_COMM_DUP_FN from C, C++, or Fortran. MPI_COMM_NULL_COPY_FN is a function that does nothing more than returning \fIflag\fP = 0 and MPI_SUCCESS. MPI_COMM_DUP_FN is a simple-minded copy function that sets \fIflag\fP = 1, returns the value of \fIattribute_val_in\fP in \fIattribute_val_out\fP, and returns MPI_SUCCESS. These replace the MPI-1 predefined callbacks MPI_NULL_COPY_FN and MPI_DUP_FN, the use of which is deprecated. +The argument \fIcomm_copy_attr_fn\fP may be specified as MPI_COMM_NULL_COPY_FN or MPI_COMM_DUP_FN from C, C++, or Fortran. MPI_COMM_NULL_COPY_FN is a function that does nothing more than returning \fIflag\fP = 0 and MPI_SUCCESS. MPI_COMM_DUP_FN is a simple-minded copy function that sets \fIflag\fP = 1, returns the value of \fIattribute_val_in\fP in \fIattribute_val_out\fP, and returns MPI_SUCCESS. These replace the MPI-1 predefined callbacks MPI_NULL_COPY_FN and MPI_DUP_FN, the use of which is deprecated. .sp The C callback functions are: .sp .nf -typedef int MPI_Comm_copy_attr_function(MPI_Comm \fIoldcomm\fP, int \fIcomm_keyval\fP, - void *\fIextra_state\fP, void *\fIattribute_val_in\fP, +typedef int MPI_Comm_copy_attr_function(MPI_Comm \fIoldcomm\fP, int \fIcomm_keyval\fP, + void *\fIextra_state\fP, void *\fIattribute_val_in\fP, void *\fIattribute_val_out\fP, int *\fIflag\fP); .fi and .nf -typedef int MPI_Comm_delete_attr_function(MPI_Comm \fIcomm\fP, int \fIcomm_keyval\fP, +typedef int MPI_Comm_delete_attr_function(MPI_Comm \fIcomm\fP, int \fIcomm_keyval\fP, void *\fIattribute_val\fP, void *\fIextra_state\fP); .fi .sp @@ -83,32 +83,32 @@ which are the same as the MPI-1.1 calls but with a new name. The old names are d The Fortran callback functions are: .sp .nf -SUBROUTINE COMM_COPY_ATTR_FN(\fIOLDCOMM, COMM_KEYVAL, EXTRA_STATE, - ATTRIBUTE_VAL_IN, ATTRIBUTE_VAL_OUT, FLAG, IERROR\fP) - INTEGER \fIOLDCOMM, COMM_KEYVAL, IERROR\fP - INTEGER(KIND=MPI_ADDRESS_KIND) \fIEXTRA_STATE, ATTRIBUTE_VAL_IN, - ATTRIBUTE_VAL_OUT\fP - LOGICAL \fIFLAG\fP +SUBROUTINE COMM_COPY_ATTR_FN(\fIOLDCOMM, COMM_KEYVAL, EXTRA_STATE, + ATTRIBUTE_VAL_IN, ATTRIBUTE_VAL_OUT, FLAG, IERROR\fP) + INTEGER \fIOLDCOMM, COMM_KEYVAL, IERROR\fP + INTEGER(KIND=MPI_ADDRESS_KIND) \fIEXTRA_STATE, ATTRIBUTE_VAL_IN, + ATTRIBUTE_VAL_OUT\fP + LOGICAL \fIFLAG\fP .fi and .nf -SUBROUTINE COMM_DELETE_ATTR_FN(\fICOMM, COMM_KEYVAL, ATTRIBUTE_VAL, EXTRA_STATE, - IERROR\fP) - INTEGER \fICOMM, COMM_KEYVAL, IERROR\fP - INTEGER(KIND=MPI_ADDRESS_KIND) \fIATTRIBUTE_VAL, EXTRA_STATE\fP +SUBROUTINE COMM_DELETE_ATTR_FN(\fICOMM, COMM_KEYVAL, ATTRIBUTE_VAL, EXTRA_STATE, + IERROR\fP) + INTEGER \fICOMM, COMM_KEYVAL, IERROR\fP + INTEGER(KIND=MPI_ADDRESS_KIND) \fIATTRIBUTE_VAL, EXTRA_STATE\fP .fi .sp The C++ callbacks are: .sp .nf -typedef int MPI::Comm::Copy_attr_function(const MPI::Comm& \fIoldcomm\fP, - int \fIcomm_keyval\fP, void* \fIextra_state\fP, void* \fIattribute_val_in\fP, - void* \fIattribute_val_out\fP, bool& \fIflag\fP); +typedef int MPI::Comm::Copy_attr_function(const MPI::Comm& \fIoldcomm\fP, + int \fIcomm_keyval\fP, void* \fIextra_state\fP, void* \fIattribute_val_in\fP, + void* \fIattribute_val_out\fP, bool& \fIflag\fP); .fi and .nf -typedef int MPI::Comm::Delete_attr_function(MPI::Comm& \fIcomm\fP, - int \fIcomm_keyval\fP, void* \fIattribute_val\fP, void* \fIextra_state\fP); +typedef int MPI::Comm::Delete_attr_function(MPI::Comm& \fIcomm\fP, + int \fIcomm_keyval\fP, void* \fIattribute_val\fP, void* \fIextra_state\fP); .fi .SH FORTRAN 77 NOTES @@ -128,9 +128,9 @@ and gives the length of the declared integer in bytes. Almost all MPI routines return an error value; C routines as the value of the function and Fortran routines in the last argument. C++ functions do not return errors. If the default error handler is set to MPI::ERRORS_THROW_EXCEPTIONS, then on error the C++ exception mechanism will be used to throw an MPI::Exception object. .sp Before the error value is returned, the current MPI error handler is -called. By default, this error handler aborts the MPI job, except for I/O function errors. The error handler may be changed with MPI_Comm_set_errhandler; the predefined error handler MPI_ERRORS_RETURN may be used to cause error values to be returned. Note that MPI does not guarantee that an MPI program can continue past an error. +called. By default, this error handler aborts the MPI job, except for I/O function errors. The error handler may be changed with MPI_Comm_set_errhandler; the predefined error handler MPI_ERRORS_RETURN may be used to cause error values to be returned. Note that MPI does not guarantee that an MPI program can continue past an error. .sp -See the MPI man page for a full list of MPI error codes. +See the MPI man page for a full list of MPI error codes. .SH SEE ALSO .ft R diff --git a/ompi/mpi/man/man3/MPI_Comm_delete_attr.3in b/ompi/mpi/man/man3/MPI_Comm_delete_attr.3in index bb51b04d771..24f26451e83 100644 --- a/ompi/mpi/man/man3/MPI_Comm_delete_attr.3in +++ b/ompi/mpi/man/man3/MPI_Comm_delete_attr.3in @@ -5,7 +5,7 @@ .\" $COPYRIGHT$ .TH MPI_Comm_delete_attr 3 "#OMPI_DATE#" "#PACKAGE_VERSION#" "#PACKAGE_NAME#" .SH NAME -\fBMPI_Comm_delete_attr\fP \- Deletes attribute value associated with a key. +\fBMPI_Comm_delete_attr\fP \- Deletes attribute value associated with a key. .SH SYNTAX .ft R @@ -32,7 +32,7 @@ void MPI::Comm::Delete_attr(int \fIcomm_keyval\fP) .ft R .TP 1i comm -Communicator from which the attribute is deleted (handle). +Communicator from which the attribute is deleted (handle). .SH INPUT PARAMETER .ft R @@ -44,15 +44,15 @@ Key value (integer). .ft R .TP 1i IERROR -Fortran only: Error status (integer). +Fortran only: Error status (integer). .SH DESCRIPTION .ft R -MPI_Comm_delete_attr deletes an attribute from cache by key. This function invokes the attribute delete function delete_fn specified when the \fIcomm_keyval\fP was created. The call will fail if the delete_fn function returns an error code other than MPI_SUCCESS. +MPI_Comm_delete_attr deletes an attribute from cache by key. This function invokes the attribute delete function delete_fn specified when the \fIcomm_keyval\fP was created. The call will fail if the delete_fn function returns an error code other than MPI_SUCCESS. -Whenever a communicator is replicated using the function MPI_Comm_dup, all callback copy functions for attributes that are currently set are invoked (in arbitrary order). Whenever a communicator is deleted using the function MPI_Comm_free, all callback delete functions for attributes that are currently set are invoked. +Whenever a communicator is replicated using the function MPI_Comm_dup, all callback copy functions for attributes that are currently set are invoked (in arbitrary order). Whenever a communicator is deleted using the function MPI_Comm_free, all callback delete functions for attributes that are currently set are invoked. .sp -This function is the same as MPI_Attr_delete but is needed to match the communicator-specific functions introduced in the MPI-2 standard. The use of MPI_Attr_delete is deprecated. +This function is the same as MPI_Attr_delete but is needed to match the communicator-specific functions introduced in the MPI-2 standard. The use of MPI_Attr_delete is deprecated. .SH NOTES @@ -67,5 +67,5 @@ is being invoked. Almost all MPI routines return an error value; C routines as the value of the function and Fortran routines in the last argument. C++ functions do not return errors. If the default error handler is set to MPI::ERRORS_THROW_EXCEPTIONS, then on error the C++ exception mechanism will be used to throw an MPI::Exception object. .sp Before the error value is returned, the current MPI error handler is -called. By default, this error handler aborts the MPI job, except for I/O function errors. The error handler may be changed with MPI_Comm_set_errhandler; the predefined error handler MPI_ERRORS_RETURN may be used to cause error values to be returned. Note that MPI does not guarantee that an MPI program can continue past an error. +called. By default, this error handler aborts the MPI job, except for I/O function errors. The error handler may be changed with MPI_Comm_set_errhandler; the predefined error handler MPI_ERRORS_RETURN may be used to cause error values to be returned. Note that MPI does not guarantee that an MPI program can continue past an error. diff --git a/ompi/mpi/man/man3/MPI_Comm_disconnect.3in b/ompi/mpi/man/man3/MPI_Comm_disconnect.3in index ba4b17c8793..d1da8f3a548 100644 --- a/ompi/mpi/man/man3/MPI_Comm_disconnect.3in +++ b/ompi/mpi/man/man3/MPI_Comm_disconnect.3in @@ -5,7 +5,7 @@ .\" $COPYRIGHT$ .TH MPI_Comm_disconnect 3 "#OMPI_DATE#" "#PACKAGE_VERSION#" "#PACKAGE_NAME#" .SH NAME -\fBMPI_Comm_disconnect\fP \- Deallocates communicator object and sets handle to MPI_COMM_NULL. +\fBMPI_Comm_disconnect\fP \- Deallocates communicator object and sets handle to MPI_COMM_NULL. .SH SYNTAX .ft R @@ -38,27 +38,27 @@ Communicator (handle). .ft R .TP 1i IERROR -Fortran only: Error status (integer). +Fortran only: Error status (integer). .SH DESCRIPTION .ft R -MPI_Comm_disconnect waits for all pending communication on \fIcomm\fP to complete internally, deallocates the communicator object, and sets the handle to MPI_COMM_NULL. It is a collective operation. +MPI_Comm_disconnect waits for all pending communication on \fIcomm\fP to complete internally, deallocates the communicator object, and sets the handle to MPI_COMM_NULL. It is a collective operation. .sp It may not be called with the communicator MPI_COMM_WORLD or MPI_COMM_SELF. .sp -MPI_Comm_disconnect may be called only if all communication is complete and matched, so that buffered data can be delivered to its destination. This requirement is the same as for MPI_Finalize. +MPI_Comm_disconnect may be called only if all communication is complete and matched, so that buffered data can be delivered to its destination. This requirement is the same as for MPI_Finalize. .sp -MPI_Comm_disconnect has the same action as MPI_Comm_free, except that it waits for pending communication to finish internally and enables the guarantee about the behavior of disconnected processes. +MPI_Comm_disconnect has the same action as MPI_Comm_free, except that it waits for pending communication to finish internally and enables the guarantee about the behavior of disconnected processes. .SH NOTES .ft R -To disconnect two processes you may need to call MPI_Comm_disconnect, MPI_Win_free, and MPI_File_close to remove all communication paths between the two processes. Note that it may be necessary to disconnect several communicators (or to free several windows or files) before two processes are completely independent. +To disconnect two processes you may need to call MPI_Comm_disconnect, MPI_Win_free, and MPI_File_close to remove all communication paths between the two processes. Note that it may be necessary to disconnect several communicators (or to free several windows or files) before two processes are completely independent. .SH ERRORS Almost all MPI routines return an error value; C routines as the value of the function and Fortran routines in the last argument. C++ functions do not return errors. If the default error handler is set to MPI::ERRORS_THROW_EXCEPTIONS, then on error the C++ exception mechanism will be used to throw an MPI::Exception object. .sp Before the error value is returned, the current MPI error handler is -called. By default, this error handler aborts the MPI job, except for I/O function errors. The error handler may be changed with MPI_Comm_set_errhandler; the predefined error handler MPI_ERRORS_RETURN may be used to cause error values to be returned. Note that MPI does not guarantee that an MPI program can continue past an error. +called. By default, this error handler aborts the MPI job, except for I/O function errors. The error handler may be changed with MPI_Comm_set_errhandler; the predefined error handler MPI_ERRORS_RETURN may be used to cause error values to be returned. Note that MPI does not guarantee that an MPI program can continue past an error. .SH SEE ALSO .ft R diff --git a/ompi/mpi/man/man3/MPI_Comm_dup.3in b/ompi/mpi/man/man3/MPI_Comm_dup.3in index b410dc651db..c0ab489eb94 100644 --- a/ompi/mpi/man/man3/MPI_Comm_dup.3in +++ b/ompi/mpi/man/man3/MPI_Comm_dup.3in @@ -18,8 +18,8 @@ int MPI_Comm_dup(MPI_Comm \fIcomm\fP, MPI_Comm\fI *newcomm\fP) .SH Fortran Syntax .nf INCLUDE 'mpif.h' -MPI_COMM_DUP(\fICOMM, NEWCOMM, IERROR\fP) - INTEGER \fICOMM, NEWCOMM, IERROR\fP +MPI_COMM_DUP(\fICOMM, NEWCOMM, IERROR\fP) + INTEGER \fICOMM, NEWCOMM, IERROR\fP .fi .SH C++ Syntax @@ -34,7 +34,7 @@ Intercomm Intercomm::Dup() const .ft R .TP 1i comm -Communicator (handle). +Communicator (handle). .SH OUTPUT PARAMETERS .ft R @@ -44,7 +44,7 @@ Copy of comm (handle). .ft R .TP 1i IERROR -Fortran only: Error status (integer). +Fortran only: Error status (integer). .SH DESCRIPTION .ft R @@ -55,7 +55,7 @@ values. For each key value, the respective copy callback function determines the This operation is used to provide a parallel library call with a duplicate communication space that has the same properties as the original communicator. This includes any attributes (see below) and topologies (see Chapter 6, "Process Topologies," in the MPI-1 Standard). This call is valid even if there are pending point-to-point communications involving the communicator comm. A typical call might involve an MPI_Comm_dup at the beginning of the parallel call, and an MPI_Comm_free of that duplicated communicator at the end of the call. Other models of communicator management are also possible. .sp -This call applies to both intra- and intercommunicators. +This call applies to both intra- and intercommunicators. Note that it is not defined by the MPI standard what happens if the attribute copy callback invokes other MPI functions. In Open MPI, it @@ -67,7 +67,7 @@ copy callback is being invoked. Almost all MPI routines return an error value; C routines as the value of the function and Fortran routines in the last argument. C++ functions do not return errors. If the default error handler is set to MPI::ERRORS_THROW_EXCEPTIONS, then on error the C++ exception mechanism will be used to throw an MPI::Exception object. .sp Before the error value is returned, the current MPI error handler is -called. By default, this error handler aborts the MPI job, except for I/O function errors. The error handler may be changed with MPI_Comm_set_errhandler; the predefined error handler MPI_ERRORS_RETURN may be used to cause error values to be returned. Note that MPI does not guarantee that an MPI program can continue past an error. +called. By default, this error handler aborts the MPI job, except for I/O function errors. The error handler may be changed with MPI_Comm_set_errhandler; the predefined error handler MPI_ERRORS_RETURN may be used to cause error values to be returned. Note that MPI does not guarantee that an MPI program can continue past an error. .SH SEE ALSO diff --git a/ompi/mpi/man/man3/MPI_Comm_f2c.3in b/ompi/mpi/man/man3/MPI_Comm_f2c.3in index cf7726290d0..734ee978097 100644 --- a/ompi/mpi/man/man3/MPI_Comm_f2c.3in +++ b/ompi/mpi/man/man3/MPI_Comm_f2c.3in @@ -4,7 +4,7 @@ .\" $COPYRIGHT$ .TH MPI_Comm_f2c 3 "#OMPI_DATE#" "#PACKAGE_VERSION#" "#PACKAGE_NAME#" .SH NAME -\fBMPI_Comm_f2c, MPI_Comm_c2f, MPI_File_f2c, MPI_File_c2f, MPI_Info_f2c, MPI_Info_c2f, MPI_Op_f2c, MPI_Op_c2f, MPI_Request_f2c, MPI_Request_c2f, MPI_Type_f2c, MPI_Type_c2f, MPI_Win_f2c, MPI_Win_c2f \fP \- Translates a C handle into a Fortran handle, or vice versa. +\fBMPI_Comm_f2c, MPI_Comm_c2f, MPI_File_f2c, MPI_File_c2f, MPI_Info_f2c, MPI_Info_c2f, MPI_Message_f2c, MPI_Message_c2f, MPI_Op_f2c, MPI_Op_c2f, MPI_Request_f2c, MPI_Request_c2f, MPI_Type_f2c, MPI_Type_c2f, MPI_Win_f2c, MPI_Win_c2f \fP \- Translates a C handle into a Fortran handle, or vice versa. .SH SYNTAX .ft R @@ -23,6 +23,9 @@ MPI_Fint MPI_Group_c2f(MPI Group \fIgroup\fP) MPI_Info MPI_Info_f2c(MPI_Fint \fIinfo\fP) MPI_Fint MPI_Info_c2f(MPI_Info \fIinfo\fP) +MPI_Message MPI_Message_f2c(MPI_Fint \fImessage\fP) +MPI_Fint MPI_Message_c2f(MPI_Message \fImessage\fP) + MPI_Op MPI_Op_f2c(MPI_Fint \fIop\fP) MPI_Fint MPI_Op_c2f(MPI_Op \fIop\fP) @@ -40,6 +43,6 @@ MPI_Fint MPI_Win_c2f(MPI_Win \fIwin\fP) .ft R Handles are passed between Fortran and C or C++ by using an explicit C wrapper to convert Fortran handles to C handles. There is no direct access to C or C++ handles in Fortran. Handles are passed between C and C++ using overloaded C++ operators called from C++ code. There is no direct access to C++ objects from C. The type definition \fIMPI_Fint\fP is provided in C/C++ for an integer of the size that matches a Fortran \fIINTEGER\fP; usually, \fIMPI_Fint\fP will be equivalent to \fIint\fP. The handle translation functions are provided in C to convert from a Fortran handle (which is an integer) to a C handle, and vice versa. .PP -For example, if \fIcomm\fP is a valid Fortran handle to a communicator, then MPI_Comm_f2c returns a valid C handle to that same communicator; if \fIcomm\fP = MPI_COMM_NULL (Fortran value), then MPI_Comm_f2c returns a null C handle; if \fIcomm\fP is an invalid Fortran handle, then MPI_Comm_f2c returns an invalid C handle. +For example, if \fIcomm\fP is a valid Fortran handle to a communicator, then MPI_Comm_f2c returns a valid C handle to that same communicator; if \fIcomm\fP = MPI_COMM_NULL (Fortran value), then MPI_Comm_f2c returns a null C handle; if \fIcomm\fP is an invalid Fortran handle, then MPI_Comm_f2c returns an invalid C handle. .SH NOTE This function does not return an error value. Consequently, the result of calling it before MPI_Init or after MPI_Finalize is undefined. diff --git a/ompi/mpi/man/man3/MPI_Comm_free.3in b/ompi/mpi/man/man3/MPI_Comm_free.3in index 472cacb9ad8..ec8acc8b050 100644 --- a/ompi/mpi/man/man3/MPI_Comm_free.3in +++ b/ompi/mpi/man/man3/MPI_Comm_free.3in @@ -19,7 +19,7 @@ int MPI_Comm_free(MPI_Comm *\fIcomm\fP) .nf INCLUDE 'mpif.h' MPI_COMM_FREE(\fICOMM, IERROR\fP) - INTEGER \fICOMM, IERROR\fP + INTEGER \fICOMM, IERROR\fP .fi .SH C++ Syntax @@ -38,11 +38,11 @@ Communicator to be destroyed (handle). .ft R .TP 1i IERROR -Fortran only: Error status (integer). +Fortran only: Error status (integer). .SH DESCRIPTION .ft R -This operation marks the communicator object for deallocation. The handle is set to MPI_COMM_NULL. Any pending operations that use this communicator will complete normally; the object is actually deallocated only if there are no other active references to it. This call applies to intracommunicators and intercommunicators. Upon actual deallocation, the delete callback functions for all cached attributes (see Section 5.7 in the MPI-1 Standard, "Caching") are called in arbitrary order. +This operation marks the communicator object for deallocation. The handle is set to MPI_COMM_NULL. Any pending operations that use this communicator will complete normally; the object is actually deallocated only if there are no other active references to it. This call applies to intracommunicators and intercommunicators. Upon actual deallocation, the delete callback functions for all cached attributes (see Section 5.7 in the MPI-1 Standard, "Caching") are called in arbitrary order. .SH NOTES @@ -57,7 +57,7 @@ is being invoked. Almost all MPI routines return an error value; C routines as the value of the function and Fortran routines in the last argument. C++ functions do not return errors. If the default error handler is set to MPI::ERRORS_THROW_EXCEPTIONS, then on error the C++ exception mechanism will be used to throw an MPI::Exception object. .sp Before the error value is returned, the current MPI error handler is -called. By default, this error handler aborts the MPI job, except for I/O function errors. The error handler may be changed with MPI_Comm_set_errhandler; the predefined error handler MPI_ERRORS_RETURN may be used to cause error values to be returned. Note that MPI does not guarantee that an MPI program can continue past an error. +called. By default, this error handler aborts the MPI job, except for I/O function errors. The error handler may be changed with MPI_Comm_set_errhandler; the predefined error handler MPI_ERRORS_RETURN may be used to cause error values to be returned. Note that MPI does not guarantee that an MPI program can continue past an error. .SH SEE ALSO .ft R diff --git a/ompi/mpi/man/man3/MPI_Comm_free_keyval.3in b/ompi/mpi/man/man3/MPI_Comm_free_keyval.3in index b855c2324e6..e9ca795011f 100644 --- a/ompi/mpi/man/man3/MPI_Comm_free_keyval.3in +++ b/ompi/mpi/man/man3/MPI_Comm_free_keyval.3in @@ -37,14 +37,14 @@ comm_keyval .ft R .TP 1i IERROR -Fortran only: Error status (integer). +Fortran only: Error status (integer). .SH DESCRIPTION .ft R -MPI_Comm_free_keyval frees an extant attribute key. This function sets the value of \fIkeyval\fP to MPI_KEYVAL_INVALID. Note that it is not erroneous to free an attribute key that is in use, because the actual free does not transpire until after all references (in other communicators on the process) to the key have been freed. These references need to be explicitly freed by the program, either via calls to MPI_Comm_delete_attr that free one attribute instance, or by calls to MPI_Comm_free that free all attribute instances associated with the freed communicator. +MPI_Comm_free_keyval frees an extant attribute key. This function sets the value of \fIkeyval\fP to MPI_KEYVAL_INVALID. Note that it is not erroneous to free an attribute key that is in use, because the actual free does not transpire until after all references (in other communicators on the process) to the key have been freed. These references need to be explicitly freed by the program, either via calls to MPI_Comm_delete_attr that free one attribute instance, or by calls to MPI_Comm_free that free all attribute instances associated with the freed communicator. .sp -This call is identical to the call MPI_Keyval_free but is needed to match the communicator-specific creation function introduced in the MPI-2 standard. The use of MPI_Keyval_free is deprecated. +This call is identical to the call MPI_Keyval_free but is needed to match the communicator-specific creation function introduced in the MPI-2 standard. The use of MPI_Keyval_free is deprecated. .SH NOTES @@ -55,5 +55,5 @@ Key values are global (they can be used with any and all communicators). Almost all MPI routines return an error value; C routines as the value of the function and Fortran routines in the last argument. C++ functions do not return errors. If the default error handler is set to MPI::ERRORS_THROW_EXCEPTIONS, then on error the C++ exception mechanism will be used to throw an MPI::Exception object. .sp Before the error value is returned, the current MPI error handler is -called. By default, this error handler aborts the MPI job, except for I/O function errors. The error handler may be changed with MPI_Comm_set_errhandler; the predefined error handler MPI_ERRORS_RETURN may be used to cause error values to be returned. Note that MPI does not guarantee that an MPI program can continue past an error. +called. By default, this error handler aborts the MPI job, except for I/O function errors. The error handler may be changed with MPI_Comm_set_errhandler; the predefined error handler MPI_ERRORS_RETURN may be used to cause error values to be returned. Note that MPI does not guarantee that an MPI program can continue past an error. diff --git a/ompi/mpi/man/man3/MPI_Comm_get_attr.3in b/ompi/mpi/man/man3/MPI_Comm_get_attr.3in index 6650208c088..c746386b030 100644 --- a/ompi/mpi/man/man3/MPI_Comm_get_attr.3in +++ b/ompi/mpi/man/man3/MPI_Comm_get_attr.3in @@ -12,7 +12,7 @@ .SH C Syntax .nf #include -int MPI_Comm_get_attr(MPI_Comm \fIcomm\fP, int \fIcomm_keyval\fP, +int MPI_Comm_get_attr(MPI_Comm \fIcomm\fP, int \fIcomm_keyval\fP, void *\fIattribute_val\fP, int *\fIflag\fP) .fi @@ -28,7 +28,7 @@ MPI_COMM_GET_ATTR(\fICOMM, COMM_KEYVAL, ATTRIBUTE_VAL, FLAG, IERROR\fP) .SH C++ Syntax .nf #include -bool MPI::Comm::Get_attr(int \fIcomm_keyval\fP, void* \fIattribute_val\fP) +bool MPI::Comm::Get_attr(int \fIcomm_keyval\fP, void* \fIattribute_val\fP) const .fi @@ -48,14 +48,14 @@ attribute_val Attribute value, unless f\fIlag\fP = false. .TP 1i flag -False if no attribute is associated with the key (logical). +False if no attribute is associated with the key (logical). .TP 1i IERROR -Fortran only: Error status (integer). +Fortran only: Error status (integer). .SH DESCRIPTION .ft R -MPI_Comm_get_attr retrieves an attribute value by key. The call is erroneous if there is no key with value \fIkeyval\fP. On the other hand, the call is correct if the key value exists, but no attribute is attached on \fIcomm\fP for that key; in that case, the call returns \fIflag\fP = false. In particular, MPI_KEYVAL_INVALID is an erroneous key value. +MPI_Comm_get_attr retrieves an attribute value by key. The call is erroneous if there is no key with value \fIkeyval\fP. On the other hand, the call is correct if the key value exists, but no attribute is attached on \fIcomm\fP for that key; in that case, the call returns \fIflag\fP = false. In particular, MPI_KEYVAL_INVALID is an erroneous key value. .sp This function replaces MPI_Attr_get, the use of which is deprecated. The C binding is identical. The Fortran binding differs in that \fIattribute_val\fP is an address-sized integer. @@ -76,5 +76,5 @@ and gives the length of the declared integer in bytes. Almost all MPI routines return an error value; C routines as the value of the function and Fortran routines in the last argument. C++ functions do not return errors. If the default error handler is set to MPI::ERRORS_THROW_EXCEPTIONS, then on error the C++ exception mechanism will be used to throw an MPI::Exception object. .sp Before the error value is returned, the current MPI error handler is -called. By default, this error handler aborts the MPI job, except for I/O function errors. The error handler may be changed with MPI_Comm_set_errhandler; the predefined error handler MPI_ERRORS_RETURN may be used to cause error values to be returned. Note that MPI does not guarantee that an MPI program can continue past an error. +called. By default, this error handler aborts the MPI job, except for I/O function errors. The error handler may be changed with MPI_Comm_set_errhandler; the predefined error handler MPI_ERRORS_RETURN may be used to cause error values to be returned. Note that MPI does not guarantee that an MPI program can continue past an error. diff --git a/ompi/mpi/man/man3/MPI_Comm_get_errhandler.3in b/ompi/mpi/man/man3/MPI_Comm_get_errhandler.3in index c775c203abc..50cb3fcf49f 100644 --- a/ompi/mpi/man/man3/MPI_Comm_get_errhandler.3in +++ b/ompi/mpi/man/man3/MPI_Comm_get_errhandler.3in @@ -5,7 +5,7 @@ .\" $COPYRIGHT$ .TH MPI_Comm_get_errhandler 3 "#OMPI_DATE#" "#PACKAGE_VERSION#" "#PACKAGE_NAME#" .SH NAME -\fBMPI_Comm_get_errhandler \fP \- Retrieves error handler associated with a communicator. +\fBMPI_Comm_get_errhandler \fP \- Retrieves error handler associated with a communicator. .SH SYNTAX .ft R @@ -33,7 +33,7 @@ MPI::Errhandler MPI::Comm::Get_errhandler() const .ft R .TP 1i comm -Communicator (handle). +Communicator (handle). .SH OUTPUT PARAMETERS .ft R @@ -43,18 +43,18 @@ New error handler for communicator (handle). .ft R .TP 1i IERROR -Fortran only: Error status (integer). +Fortran only: Error status (integer). .SH DESCRIPTION .ft R -MPI_Comm_get_errhandler retrieves the error handler currently associated with a communicator. This call is identical to MPI_Errhandler_get, the use of which is deprecated. +MPI_Comm_get_errhandler retrieves the error handler currently associated with a communicator. This call is identical to MPI_Errhandler_get, the use of which is deprecated. .SH ERRORS Almost all MPI routines return an error value; C routines as the value of the function and Fortran routines in the last argument. C++ functions do not return errors. If the default error handler is set to MPI::ERRORS_THROW_EXCEPTIONS, then on error the C++ exception mechanism will be used to throw an MPI::Exception object. .sp Before the error value is returned, the current MPI error handler is -called. By default, this error handler aborts the MPI job, except for I/O function errors. The error handler may be changed with MPI_Comm_set_errhandler; the predefined error handler MPI_ERRORS_RETURN may be used to cause error values to be returned. Note that MPI does not guarantee that an MPI program can continue past an error. +called. By default, this error handler aborts the MPI job, except for I/O function errors. The error handler may be changed with MPI_Comm_set_errhandler; the predefined error handler MPI_ERRORS_RETURN may be used to cause error values to be returned. Note that MPI does not guarantee that an MPI program can continue past an error. .sp -See the MPI man page for a full list of MPI error codes. +See the MPI man page for a full list of MPI error codes. diff --git a/ompi/mpi/man/man3/MPI_Comm_get_info.3in b/ompi/mpi/man/man3/MPI_Comm_get_info.3in index d380f91ba9f..636f5c8ddb6 100644 --- a/ompi/mpi/man/man3/MPI_Comm_get_info.3in +++ b/ompi/mpi/man/man3/MPI_Comm_get_info.3in @@ -35,16 +35,16 @@ info_used New info object returned with all active hints on this communicator. .TP 1i IERROR -Fortran only: Error status (integer). +Fortran only: Error status (integer). . .SH DESCRIPTION .ft R MPI_Comm_get_info returns a new info object containing the hints of -the communicator associated with -.IR comm . +the communicator associated with +.IR comm . The current setting of all hints actually used by the system related -to this communicator is returned in -.IR info_used . +to this communicator is returned in +.IR info_used . If no such hints exist, a handle to a newly created info object is returned that contains no key/value pair. The user is responsible for freeing info_used via MPI_Info_free. diff --git a/ompi/mpi/man/man3/MPI_Comm_get_name.3in b/ompi/mpi/man/man3/MPI_Comm_get_name.3in index 17f951c680d..c0566643c0b 100644 --- a/ompi/mpi/man/man3/MPI_Comm_get_name.3in +++ b/ompi/mpi/man/man3/MPI_Comm_get_name.3in @@ -5,7 +5,7 @@ .\" $COPYRIGHT$ .TH MPI_Comm_get_name 3 "#OMPI_DATE#" "#PACKAGE_VERSION#" "#PACKAGE_NAME#" .SH NAME -\fBMPI_Comm_get_name\fP \- Returns the name that was most recently associated with a communicator. +\fBMPI_Comm_get_name\fP \- Returns the name that was most recently associated with a communicator. .SH SYNTAX .ft R @@ -43,26 +43,26 @@ comm_name Name previously stored on the communicator, or an empty string if no such name exists (string). .TP 1i resultlen -Length of returned name (integer). +Length of returned name (integer). .TP 1i IERROR -Fortran only: Error status (integer). +Fortran only: Error status (integer). .SH DESCRIPTION .ft R MPI_Comm_get_name returns the last name that was previously associated with the given communicator. The name may be set and retrieved from any language. The same name will be returned independent of the language used. \fIcomm_name\fP should be allocated so that it can hold a resulting string of length MPI_MAX_OBJECT_NAME characters. MPI_Comm_get_name returns a copy of the set name in \fIcomm_name\fP. .sp -If the user has not associated a name with a communicator, or an error occurs, MPI_Comm_get_name will return an empty string (all spaces in Fortran, "" in C and C++). The three predefined communicators will have predefined names associated with them. Thus, the names of MPI_COMM_WORLD, MPI_COMM_SELF, and MPI_COMM_PARENT will have the default of MPI_COMM_WORLD, MPI_COMM_SELF, and MPI_COMM_PARENT. The fact that the system may have chosen to give a default name to a communicator does not prevent the user from setting a name on the same communicator; doing this removes the old name and assigns the new one. +If the user has not associated a name with a communicator, or an error occurs, MPI_Comm_get_name will return an empty string (all spaces in Fortran, "" in C and C++). The three predefined communicators will have predefined names associated with them. Thus, the names of MPI_COMM_WORLD, MPI_COMM_SELF, and MPI_COMM_PARENT will have the default of MPI_COMM_WORLD, MPI_COMM_SELF, and MPI_COMM_PARENT. The fact that the system may have chosen to give a default name to a communicator does not prevent the user from setting a name on the same communicator; doing this removes the old name and assigns the new one. .SH NOTES .ft R -It is safe simply to print the string returned by MPI_Comm_get_name, as it is always a valid string even if there was no name. +It is safe simply to print the string returned by MPI_Comm_get_name, as it is always a valid string even if there was no name. .sp -Note that associating a name with a communicator has no effect on the semantics of an MPI program, and will (necessarily) increase the store requirement of the program, since the names must be saved. Therefore, there is no requirement that users use these functions to associate names with communicators. However debugging and profiling MPI applications may be made easier if names are associated with communicators, since the debugger or profiler should then be able to present information in a less cryptic manner. +Note that associating a name with a communicator has no effect on the semantics of an MPI program, and will (necessarily) increase the store requirement of the program, since the names must be saved. Therefore, there is no requirement that users use these functions to associate names with communicators. However debugging and profiling MPI applications may be made easier if names are associated with communicators, since the debugger or profiler should then be able to present information in a less cryptic manner. .SH ERRORS Almost all MPI routines return an error value; C routines as the value of the function and Fortran routines in the last argument. C++ functions do not return errors. If the default error handler is set to MPI::ERRORS_THROW_EXCEPTIONS, then on error the C++ exception mechanism will be used to throw an MPI::Exception object. .sp Before the error value is returned, the current MPI error handler is -called. By default, this error handler aborts the MPI job, except for I/O function errors. The error handler may be changed with MPI_Comm_set_errhandler; the predefined error handler MPI_ERRORS_RETURN may be used to cause error values to be returned. Note that MPI does not guarantee that an MPI program can continue past an error. +called. By default, this error handler aborts the MPI job, except for I/O function errors. The error handler may be changed with MPI_Comm_set_errhandler; the predefined error handler MPI_ERRORS_RETURN may be used to cause error values to be returned. Note that MPI does not guarantee that an MPI program can continue past an error. diff --git a/ompi/mpi/man/man3/MPI_Comm_get_parent.3in b/ompi/mpi/man/man3/MPI_Comm_get_parent.3in index 296e7dd32a1..7fd445232f7 100644 --- a/ompi/mpi/man/man3/MPI_Comm_get_parent.3in +++ b/ompi/mpi/man/man3/MPI_Comm_get_parent.3in @@ -5,7 +5,7 @@ .\" $COPYRIGHT$ .TH MPI_Comm_get_parent 3 "#OMPI_DATE#" "#PACKAGE_VERSION#" "#PACKAGE_NAME#" .SH NAME -\fBMPI_Comm_get_parent\fP \- Returns the parent intercommunicator of current spawned process. +\fBMPI_Comm_get_parent\fP \- Returns the parent intercommunicator of current spawned process. .SH SYNTAX .ft R @@ -36,25 +36,25 @@ parent The parent communicator (handle). .TP 1i IERROR -Fortran only: Error status (integer). +Fortran only: Error status (integer). .SH DESCRIPTION .ft R -If a process was started with MPI_Comm_spawn or MPI_Comm_spawn_multiple, MPI_Comm_get_parent returns the "parent" intercommunicator of the current process. This parent intercommunicator is created implicitly inside of MPI_Init and is the same intercommunicator returned by the spawn call made in the parents. +If a process was started with MPI_Comm_spawn or MPI_Comm_spawn_multiple, MPI_Comm_get_parent returns the "parent" intercommunicator of the current process. This parent intercommunicator is created implicitly inside of MPI_Init and is the same intercommunicator returned by the spawn call made in the parents. .sp If the process was not spawned, MPI_Comm_get_parent returns MPI_COMM_NULL. .sp -After the parent communicator is freed or disconnected, MPI_Comm_get_parent returns MPI_COMM_NULL. +After the parent communicator is freed or disconnected, MPI_Comm_get_parent returns MPI_COMM_NULL. .SH NOTES .ft R -MPI_Comm_get_parent returns a handle to a single intercommunicator. Calling MPI_Comm_get_parent a second time returns a handle to the same intercommunicator. Freeing the handle with MPI_Comm_disconnect or MPI_Comm_free will cause other references to the intercommunicator to become invalid (dangling). Note that calling MPI_Comm_free on the parent communicator is not useful. +MPI_Comm_get_parent returns a handle to a single intercommunicator. Calling MPI_Comm_get_parent a second time returns a handle to the same intercommunicator. Freeing the handle with MPI_Comm_disconnect or MPI_Comm_free will cause other references to the intercommunicator to become invalid (dangling). Note that calling MPI_Comm_free on the parent communicator is not useful. .SH ERRORS Almost all MPI routines return an error value; C routines as the value of the function and Fortran routines in the last argument. C++ functions do not return errors. If the default error handler is set to MPI::ERRORS_THROW_EXCEPTIONS, then on error the C++ exception mechanism will be used to throw an MPI::Exception object. .sp Before the error value is returned, the current MPI error handler is -called. By default, this error handler aborts the MPI job, except for I/O function errors. The error handler may be changed with MPI_Comm_set_errhandler; the predefined error handler MPI_ERRORS_RETURN may be used to cause error values to be returned. Note that MPI does not guarantee that an MPI program can continue past an error. +called. By default, this error handler aborts the MPI job, except for I/O function errors. The error handler may be changed with MPI_Comm_set_errhandler; the predefined error handler MPI_ERRORS_RETURN may be used to cause error values to be returned. Note that MPI does not guarantee that an MPI program can continue past an error. .SH SEE ALSO .ft R diff --git a/ompi/mpi/man/man3/MPI_Comm_group.3in b/ompi/mpi/man/man3/MPI_Comm_group.3in index 9ca2a9040c3..27342e08ff2 100644 --- a/ompi/mpi/man/man3/MPI_Comm_group.3in +++ b/ompi/mpi/man/man3/MPI_Comm_group.3in @@ -5,7 +5,7 @@ .\" $COPYRIGHT$ .TH MPI_Comm_group 3 "#OMPI_DATE#" "#PACKAGE_VERSION#" "#PACKAGE_NAME#" .SH NAME -\fBMPI_Comm_group \fP \- Returns the group associated with a communicator. +\fBMPI_Comm_group \fP \- Returns the group associated with a communicator. .SH SYNTAX .ft R @@ -19,7 +19,7 @@ int MPI_Comm_group(MPI_Comm \fIcomm\fP, MPI_Group *\fIgroup\fP) .nf INCLUDE 'mpif.h' MPI_COMM_GROUP(\fICOMM, GROUP, IERROR\fP) - INTEGER \fICOMM, GROUP, IERROR\fP + INTEGER \fICOMM, GROUP, IERROR\fP .fi .SH C++ Syntax @@ -42,15 +42,15 @@ Group in communicator (handle). .ft R .TP 1i IERROR -Fortran only: Error status (integer). +Fortran only: Error status (integer). .SH DESCRIPTION .ft R -If the communicator is an intercommunicator (enables communication between two groups of processes), this function returns the local group. To return the remote group, use the MPI_Comm_remote_group function. +If the communicator is an intercommunicator (enables communication between two groups of processes), this function returns the local group. To return the remote group, use the MPI_Comm_remote_group function. .SH ERRORS Almost all MPI routines return an error value; C routines as the value of the function and Fortran routines in the last argument. C++ functions do not return errors. If the default error handler is set to MPI::ERRORS_THROW_EXCEPTIONS, then on error the C++ exception mechanism will be used to throw an MPI::Exception object. .sp Before the error value is returned, the current MPI error handler is -called. By default, this error handler aborts the MPI job, except for I/O function errors. The error handler may be changed with MPI_Comm_set_errhandler; the predefined error handler MPI_ERRORS_RETURN may be used to cause error values to be returned. Note that MPI does not guarantee that an MPI program can continue past an error. +called. By default, this error handler aborts the MPI job, except for I/O function errors. The error handler may be changed with MPI_Comm_set_errhandler; the predefined error handler MPI_ERRORS_RETURN may be used to cause error values to be returned. Note that MPI does not guarantee that an MPI program can continue past an error. diff --git a/ompi/mpi/man/man3/MPI_Comm_join.3in b/ompi/mpi/man/man3/MPI_Comm_join.3in index 34d16e94510..7ab0e9e5ef7 100644 --- a/ompi/mpi/man/man3/MPI_Comm_join.3in +++ b/ompi/mpi/man/man3/MPI_Comm_join.3in @@ -43,7 +43,7 @@ intercomm Intercommunicator between processes (handle). .TP 1i IERROR -Fortran only: Error status (integer). +Fortran only: Error status (integer). .SH DESCRIPTION .ft R @@ -89,7 +89,7 @@ called. By default, this error handler aborts the MPI job, except for I/O function errors. The error handler may be changed with MPI_Comm_set_errhandler; the predefined error handler MPI_ERRORS_RETURN may be used to cause error values to be returned. Note that MPI does not -guarantee that an MPI program can continue past an error. +guarantee that an MPI program can continue past an error. .sp See the MPI man page for a full list of MPI error codes. diff --git a/ompi/mpi/man/man3/MPI_Comm_rank.3in b/ompi/mpi/man/man3/MPI_Comm_rank.3in index 6c163345d17..414ae19120a 100644 --- a/ompi/mpi/man/man3/MPI_Comm_rank.3in +++ b/ompi/mpi/man/man3/MPI_Comm_rank.3in @@ -19,7 +19,7 @@ int MPI_Comm_rank(MPI_Comm \fIcomm\fP, int\fI *rank\fP) .nf INCLUDE 'mpif.h' MPI_COMM_RANK(\fICOMM, RANK, IERROR\fP) - INTEGER \fICOMM, RANK, IERROR\fP + INTEGER \fICOMM, RANK, IERROR\fP .fi .SH C++ Syntax @@ -42,21 +42,21 @@ Rank of the calling process in group of comm (integer). .ft R .TP 1i IERROR -Fortran only: Error status (integer). +Fortran only: Error status (integer). .SH DESCRIPTION .ft R This function gives the rank of the process in the particular communicator's group. It is equivalent to accessing the -communicator's group with MPI_Comm_group, computing the rank using MPI_Group_rank, and then freeing the temporary group via MPI_Group_free. +communicator's group with MPI_Comm_group, computing the rank using MPI_Group_rank, and then freeing the temporary group via MPI_Group_free. .sp -Many programs will be written with the master-slave model, where one process (such as the rank-zero process) will play a supervisory role, and the other processes will serve as compute nodes. In this framework, MPI_Comm_size and MPI_Comm_rank are useful for determining the roles of the various processes of a communicator. +Many programs will be written with the master-slave model, where one process (such as the rank-zero process) will play a supervisory role, and the other processes will serve as compute nodes. In this framework, MPI_Comm_size and MPI_Comm_rank are useful for determining the roles of the various processes of a communicator. .SH ERRORS Almost all MPI routines return an error value; C routines as the value of the function and Fortran routines in the last argument. C++ functions do not return errors. If the default error handler is set to MPI::ERRORS_THROW_EXCEPTIONS, then on error the C++ exception mechanism will be used to throw an MPI::Exception object. .sp Before the error value is returned, the current MPI error handler is -called. By default, this error handler aborts the MPI job, except for I/O function errors. The error handler may be changed with MPI_Comm_set_errhandler; the predefined error handler MPI_ERRORS_RETURN may be used to cause error values to be returned. Note that MPI does not guarantee that an MPI program can continue past an error. +called. By default, this error handler aborts the MPI job, except for I/O function errors. The error handler may be changed with MPI_Comm_set_errhandler; the predefined error handler MPI_ERRORS_RETURN may be used to cause error values to be returned. Note that MPI does not guarantee that an MPI program can continue past an error. .SH SEE ALSO .ft R diff --git a/ompi/mpi/man/man3/MPI_Comm_remote_group.3in b/ompi/mpi/man/man3/MPI_Comm_remote_group.3in index 080ec198373..b33608e7aaa 100644 --- a/ompi/mpi/man/man3/MPI_Comm_remote_group.3in +++ b/ompi/mpi/man/man3/MPI_Comm_remote_group.3in @@ -19,7 +19,7 @@ int MPI_Comm_remote_group(MPI_Comm \fIcomm\fP, MPI_Group\fI *group\fP) .nf INCLUDE 'mpif.h' MPI_COMM_REMOTE_GROUP(\fICOMM, GROUP, IERROR\fP) - INTEGER \fICOMM, GROUP, IERROR\fP + INTEGER \fICOMM, GROUP, IERROR\fP .fi .SH C++ Syntax @@ -42,7 +42,7 @@ Remote group of communicator. .ft R .TP 1i IERROR -Fortran only: Error status (integer). +Fortran only: Error status (integer). .SH DESCRIPTION .ft R @@ -55,7 +55,7 @@ MPI_Comm_remote_group) are all local operations. Almost all MPI routines return an error value; C routines as the value of the function and Fortran routines in the last argument. C++ functions do not return errors. If the default error handler is set to MPI::ERRORS_THROW_EXCEPTIONS, then on error the C++ exception mechanism will be used to throw an MPI::Exception object. .sp Before the error value is returned, the current MPI error handler is -called. By default, this error handler aborts the MPI job, except for I/O function errors. The error handler may be changed with MPI_Comm_set_errhandler; the predefined error handler MPI_ERRORS_RETURN may be used to cause error values to be returned. Note that MPI does not guarantee that an MPI program can continue past an error. +called. By default, this error handler aborts the MPI job, except for I/O function errors. The error handler may be changed with MPI_Comm_set_errhandler; the predefined error handler MPI_ERRORS_RETURN may be used to cause error values to be returned. Note that MPI does not guarantee that an MPI program can continue past an error. .SH SEE ALSO .sp diff --git a/ompi/mpi/man/man3/MPI_Comm_remote_size.3in b/ompi/mpi/man/man3/MPI_Comm_remote_size.3in index f13d4c337e3..f1f8009b909 100644 --- a/ompi/mpi/man/man3/MPI_Comm_remote_size.3in +++ b/ompi/mpi/man/man3/MPI_Comm_remote_size.3in @@ -19,7 +19,7 @@ int MPI_Comm_remote_size(MPI_Comm \fIcomm\fP, int\fI *size\fP) .nf INCLUDE 'mpif.h' MPI_COMM_REMOTE_SIZE(\fICOMM, SIZE, IERROR\fP) - INTEGER \fICOMM, SIZE, IERROR\fP + INTEGER \fICOMM, SIZE, IERROR\fP .fi .SH C++ Syntax @@ -42,7 +42,7 @@ Number of processes in the remote group of comm (integer). .ft R .TP 1i IERROR -Fortran only: Error status (integer). +Fortran only: Error status (integer). .SH DESCRIPTION .ft R @@ -54,7 +54,7 @@ The intercommunicator accessors (MPI_Comm_test_inter, MPI_Comm_remote_size, MPI Almost all MPI routines return an error value; C routines as the value of the function and Fortran routines in the last argument. C++ functions do not return errors. If the default error handler is set to MPI::ERRORS_THROW_EXCEPTIONS, then on error the C++ exception mechanism will be used to throw an MPI::Exception object. .sp Before the error value is returned, the current MPI error handler is -called. By default, this error handler aborts the MPI job, except for I/O function errors. The error handler may be changed with MPI_Comm_set_errhandler; the predefined error handler MPI_ERRORS_RETURN may be used to cause error values to be returned. Note that MPI does not guarantee that an MPI program can continue past an error. +called. By default, this error handler aborts the MPI job, except for I/O function errors. The error handler may be changed with MPI_Comm_set_errhandler; the predefined error handler MPI_ERRORS_RETURN may be used to cause error values to be returned. Note that MPI does not guarantee that an MPI program can continue past an error. .SH SEE ALSO .ft R diff --git a/ompi/mpi/man/man3/MPI_Comm_set_attr.3in b/ompi/mpi/man/man3/MPI_Comm_set_attr.3in index f496f5408b1..d5f5e7c9572 100644 --- a/ompi/mpi/man/man3/MPI_Comm_set_attr.3in +++ b/ompi/mpi/man/man3/MPI_Comm_set_attr.3in @@ -48,13 +48,13 @@ Attribute value. .ft R .TP 1i IERROR -Fortran only: Error status (integer). +Fortran only: Error status (integer). .SH DESCRIPTION .ft R MPI_Comm_set_attr stores the stipulated attribute value \fIattribute_val\fP for subsequent retrieval by MPI_Comm_get_attr. If the value is already present, then the outcome is as if MPI_Comm_delete_attr was first called to delete the previous value (and the callback function delete_fn was executed), and a new value was next stored. The call is erroneous if there is no key with value \fIcomm_keyval\fP; in particular MPI_KEYVAL_INVALID is an erroneous key value. The call will fail if the delete_fn function returned an error code other than MPI_SUCCESS. .sp -This function replaces MPI_Attr_put, the use of which is deprecated. The C binding is identical. The Fortran binding differs in that \fIattribute_val\fP is an address-sized integer. +This function replaces MPI_Attr_put, the use of which is deprecated. The C binding is identical. The Fortran binding differs in that \fIattribute_val\fP is an address-sized integer. .SH FORTRAN 77 NOTES .ft R @@ -74,7 +74,7 @@ and gives the length of the declared integer in bytes. Values of the permanent attributes MPI_TAG_UB, MPI_HOST, MPI_IO, and MPI_WTIME_IS_GLOBAL may not be changed. .sp -The type of the attribute value depends on whether C or Fortran is being used. In C, an attribute value is a pointer (void *); in Fortran, it is a single, address-size integer system for which a pointer does not fit in an integer. +The type of the attribute value depends on whether C or Fortran is being used. In C, an attribute value is a pointer (void *); in Fortran, it is a single, address-size integer system for which a pointer does not fit in an integer. .sp If an attribute is already present, the delete function (specified when the corresponding keyval was created) will be called. @@ -82,5 +82,5 @@ If an attribute is already present, the delete function (specified when the corr Almost all MPI routines return an error value; C routines as the value of the function and Fortran routines in the last argument. C++ functions do not return errors. If the default error handler is set to MPI::ERRORS_THROW_EXCEPTIONS, then on error the C++ exception mechanism will be used to throw an MPI::Exception object. .sp Before the error value is returned, the current MPI error handler is -called. By default, this error handler aborts the MPI job, except for I/O function errors. The error handler may be changed with MPI_Comm_set_errhandler; the predefined error handler MPI_ERRORS_RETURN may be used to cause error values to be returned. Note that MPI does not guarantee that an MPI program can continue past an error. +called. By default, this error handler aborts the MPI job, except for I/O function errors. The error handler may be changed with MPI_Comm_set_errhandler; the predefined error handler MPI_ERRORS_RETURN may be used to cause error values to be returned. Note that MPI does not guarantee that an MPI program can continue past an error. diff --git a/ompi/mpi/man/man3/MPI_Comm_set_errhandler.3in b/ompi/mpi/man/man3/MPI_Comm_set_errhandler.3in index 39db6f7c970..216f652a204 100644 --- a/ompi/mpi/man/man3/MPI_Comm_set_errhandler.3in +++ b/ompi/mpi/man/man3/MPI_Comm_set_errhandler.3in @@ -5,7 +5,7 @@ .\" $COPYRIGHT$ .TH MPI_Comm_set_errhandler 3 "#OMPI_DATE#" "#PACKAGE_VERSION#" "#PACKAGE_NAME#" .SH NAME -\fBMPI_Comm_set_errhandler \fP \- Attaches a new error handler to a communicator. +\fBMPI_Comm_set_errhandler \fP \- Attaches a new error handler to a communicator. .SH SYNTAX .ft R @@ -33,7 +33,7 @@ void MPI::Comm::Set_errhandler(const MPI::Errhandler& \fIerrhandler\fP) .ft R .TP 1i comm -Communicator (handle). +Communicator (handle). .SH OUTPUT PARAMETERS .ft R @@ -43,15 +43,15 @@ New error handler for communicator (handle). .ft R .TP 1i IERROR -Fortran only: Error status (integer). +Fortran only: Error status (integer). .SH DESCRIPTION .ft R -MPI_Comm_set_errhandler attaches a new error handler to a communicator. The error handler must be either a predefined error handler or an error handler created by a call to MPI_Comm_create_errhandler. This call is identical to MPI_Errhandler_set, the use of which is deprecated. +MPI_Comm_set_errhandler attaches a new error handler to a communicator. The error handler must be either a predefined error handler or an error handler created by a call to MPI_Comm_create_errhandler. This call is identical to MPI_Errhandler_set, the use of which is deprecated. .SH ERRORS Almost all MPI routines return an error value; C routines as the value of the function and Fortran routines in the last argument. C++ functions do not return errors. If the default error handler is set to MPI::ERRORS_THROW_EXCEPTIONS, then on error the C++ exception mechanism will be used to throw an MPI::Exception object. .sp Before the error value is returned, the current MPI error handler is -called. By default, this error handler aborts the MPI job, except for I/O function errors. The error handler may be changed with MPI_Comm_set_errhandler; the predefined error handler MPI_ERRORS_RETURN may be used to cause error values to be returned. Note that MPI does not guarantee that an MPI program can continue past an error. +called. By default, this error handler aborts the MPI job, except for I/O function errors. The error handler may be changed with MPI_Comm_set_errhandler; the predefined error handler MPI_ERRORS_RETURN may be used to cause error values to be returned. Note that MPI does not guarantee that an MPI program can continue past an error. diff --git a/ompi/mpi/man/man3/MPI_Comm_set_info.3in b/ompi/mpi/man/man3/MPI_Comm_set_info.3in index 99bdb747778..552bb0ce123 100644 --- a/ompi/mpi/man/man3/MPI_Comm_set_info.3in +++ b/ompi/mpi/man/man3/MPI_Comm_set_info.3in @@ -35,17 +35,17 @@ Info object containing hints to be set on .SH OUTPUT PARAMETERS .TP 1i IERROR -Fortran only: Error status (integer). +Fortran only: Error status (integer). . .SH DESCRIPTION .ft R MPI_COMM_SET_INFO sets new values for the hints of the communicator -associated with +associated with .IR comm . MPI_COMM_SET_INFO is a collective routine. The info object may be different on each process, but any info entries that an implementation requires to be the same on all processes must appear with the same -value in each process's +value in each process's .I info object. . diff --git a/ompi/mpi/man/man3/MPI_Comm_set_name.3in b/ompi/mpi/man/man3/MPI_Comm_set_name.3in index b78c6191236..d40ee07e717 100644 --- a/ompi/mpi/man/man3/MPI_Comm_set_name.3in +++ b/ompi/mpi/man/man3/MPI_Comm_set_name.3in @@ -46,21 +46,21 @@ Character string to be used as the identifier for the communicator (string). .ft R .TP 1i IERROR -Fortran only: Error status (integer). +Fortran only: Error status (integer). .SH DESCRIPTION .ft R MPI_Comm_set_name allows a user to associate a name string with a communicator. The character string that is passed to MPI_Comm_set_name is saved inside the MPI library (so it can be freed by the caller immediately after the call, or allocated on the stack). Leading spaces in \fIname\fP are significant, but trailing ones are not. .sp -MPI_Comm_set_name is a local (noncollective) operation, which affects only the name of the communicator as seen in the process that made the MPI_Comm_set_name call. There is no requirement that the same (or any) name be assigned to a communicator in every process where it exists. +MPI_Comm_set_name is a local (noncollective) operation, which affects only the name of the communicator as seen in the process that made the MPI_Comm_set_name call. There is no requirement that the same (or any) name be assigned to a communicator in every process where it exists. .sp The length of the name that can be stored is limited to the value of MPI_MAX_OBJECT_NAME in Fortran and MPI_MAX_OBJECT_NAME-1 in C and C++ (to allow for the null terminator). Attempts to set names longer than this will result in truncation of the name. MPI_MAX_OBJECT_NAME must have a value of at least 64. .SH NOTES .ft R -Since MPI_Comm_set_name is provided to help debug code, it is sensible to give the same name to a communicator in all of the processes where it exists, to avoid confusion. +Since MPI_Comm_set_name is provided to help debug code, it is sensible to give the same name to a communicator in all of the processes where it exists, to avoid confusion. .sp Regarding name length, under circumstances of store exhaustion, an attempt to set a name of any length could fail; therefore, the value of MPI_MAX_OBJECT_NAME should be viewed only as a strict upper bound on the name length, not a guarantee that setting names of less than this length will always succeed. @@ -68,7 +68,7 @@ Regarding name length, under circumstances of store exhaustion, an attempt to se Almost all MPI routines return an error value; C routines as the value of the function and Fortran routines in the last argument. C++ functions do not return errors. If the default error handler is set to MPI::ERRORS_THROW_EXCEPTIONS, then on error the C++ exception mechanism will be used to throw an MPI::Exception object. .sp Before the error value is returned, the current MPI error handler is -called. By default, this error handler aborts the MPI job, except for I/O function errors. The error handler may be changed with MPI_Comm_set_errhandler; the predefined error handler MPI_ERRORS_RETURN may be used to cause error values to be returned. Note that MPI does not guarantee that an MPI program can continue past an error. +called. By default, this error handler aborts the MPI job, except for I/O function errors. The error handler may be changed with MPI_Comm_set_errhandler; the predefined error handler MPI_ERRORS_RETURN may be used to cause error values to be returned. Note that MPI does not guarantee that an MPI program can continue past an error. .SH SEE ALSO .ft R diff --git a/ompi/mpi/man/man3/MPI_Comm_size.3in b/ompi/mpi/man/man3/MPI_Comm_size.3in index fb0ddc14bbf..cd9bbae3fb7 100644 --- a/ompi/mpi/man/man3/MPI_Comm_size.3in +++ b/ompi/mpi/man/man3/MPI_Comm_size.3in @@ -5,7 +5,7 @@ .\" $COPYRIGHT$ .TH MPI_Comm_size 3 "#OMPI_DATE#" "#PACKAGE_VERSION#" "#PACKAGE_NAME#" .SH NAME -\fBMPI_Comm_size \fP \- Returns the size of the group associated with a communicator. +\fBMPI_Comm_size \fP \- Returns the size of the group associated with a communicator. .SH SYNTAX .ft R @@ -19,7 +19,7 @@ int MPI_Comm_size(MPI_Comm \fIcomm\fP, int *\fIsize\fP) .nf INCLUDE 'mpif.h' MPI_COMM_SIZE(\fICOMM, SIZE, IERROR\fP) - INTEGER \fICOMM, SIZE, IERROR\fP + INTEGER \fICOMM, SIZE, IERROR\fP .fi .SH C++ Syntax @@ -42,7 +42,7 @@ Number of processes in the group of comm (integer). .ft R .TP 1i IERROR -Fortran only: Error status (integer). +Fortran only: Error status (integer). .SH DESCRIPTION .ft R @@ -56,17 +56,17 @@ communication between two groups), this function returns the size of the local group. To return the size of the remote group, use the MPI_Comm_remote_size function. .sp -This call is often used with MPI_Comm_rank to determine the amount of concurrency available for a specific library or program. MPI_Comm_rank indicates the rank of the process that calls it in the range from 0 . . . size-1, where size is the return value of MPI_Comm_size. +This call is often used with MPI_Comm_rank to determine the amount of concurrency available for a specific library or program. MPI_Comm_rank indicates the rank of the process that calls it in the range from 0 . . . size-1, where size is the return value of MPI_Comm_size. .SH NOTE .ft R -MPI_COMM_NULL is not considered a valid argument to this function. +MPI_COMM_NULL is not considered a valid argument to this function. .SH ERRORS Almost all MPI routines return an error value; C routines as the value of the function and Fortran routines in the last argument. C++ functions do not return errors. If the default error handler is set to MPI::ERRORS_THROW_EXCEPTIONS, then on error the C++ exception mechanism will be used to throw an MPI::Exception object. .sp Before the error value is returned, the current MPI error handler is -called. By default, this error handler aborts the MPI job, except for I/O function errors. The error handler may be changed with MPI_Comm_set_errhandler; the predefined error handler MPI_ERRORS_RETURN may be used to cause error values to be returned. Note that MPI does not guarantee that an MPI program can continue past an error. +called. By default, this error handler aborts the MPI job, except for I/O function errors. The error handler may be changed with MPI_Comm_set_errhandler; the predefined error handler MPI_ERRORS_RETURN may be used to cause error values to be returned. Note that MPI does not guarantee that an MPI program can continue past an error. .SH SEE ALSO .ft R diff --git a/ompi/mpi/man/man3/MPI_Comm_spawn.3in b/ompi/mpi/man/man3/MPI_Comm_spawn.3in index fd1cc867c97..2d4df8a7adf 100644 --- a/ompi/mpi/man/man3/MPI_Comm_spawn.3in +++ b/ompi/mpi/man/man3/MPI_Comm_spawn.3in @@ -6,7 +6,7 @@ .\" $COPYRIGHT$ .TH MPI_Comm_spawn 3 "#OMPI_DATE#" "#PACKAGE_VERSION#" "#PACKAGE_NAME#" .SH NAME -\fBMPI_Comm_spawn\fP \- Spawns a number of identical binaries. +\fBMPI_Comm_spawn\fP \- Spawns a number of identical binaries. .SH SYNTAX .ft R @@ -14,19 +14,19 @@ .nf #include int MPI_Comm_spawn(const char *\fIcommand\fP, char *\fIargv\fP[], int \fImaxprocs\fP, - MPI_Info \fIinfo\fP, int \fIroot\fP, MPI_Comm \fIcomm\fP, + MPI_Info \fIinfo\fP, int \fIroot\fP, MPI_Comm \fIcomm\fP, MPI_Comm *\fIintercomm\fP, int \fIarray_of_errcodes\fP[]) .fi .SH Fortran Syntax .nf INCLUDE 'mpif.h' -MPI_COMM_SPAWN(\fICOMMAND, ARGV, MAXPROCS, INFO, ROOT, COMM, +MPI_COMM_SPAWN(\fICOMMAND, ARGV, MAXPROCS, INFO, ROOT, COMM, INTERCOMM, ARRAY_OF_ERRCODES, IERROR\fP) - CHARACTER*(*) \fICOMMAND, ARGV(*)\fP - INTEGER \fIINFO, MAXPROCS, ROOT, COMM, INTERCOMM, - ARRAY_OF_ERRCODES(*), IERROR\fP + CHARACTER*(*) \fICOMMAND, ARGV(*)\fP + INTEGER \fIINFO, MAXPROCS, ROOT, COMM, INTERCOMM, + ARRAY_OF_ERRCODES(*), IERROR\fP .fi .SH C++ Syntax @@ -54,7 +54,7 @@ maxprocs Maximum number of processes to start (integer, significant only at \fIroot\fP). .TP 1i info -A set of key-value pairs telling the runtime system where and how to start the processes (handle, significant only at \fIroot\fP). +A set of key-value pairs telling the runtime system where and how to start the processes (handle, significant only at \fIroot\fP). .TP 1i root Rank of process in which previous arguments are examined (integer). @@ -72,17 +72,17 @@ array_of_errcodes One code per process (array of integers). .TP 1i IERROR -Fortran only: Error status (integer). +Fortran only: Error status (integer). .SH DESCRIPTION .ft R -MPI_Comm_spawn tries to start \fImaxprocs\fP identical copies of the MPI program specified by \fIcommand\fP, establishing communication with them and returning an intercommunicator. The spawned processes are referred to as children. The children have their own MPI_COMM_WORLD, which is separate from that of the parents. MPI_Comm_spawn is collective over \fIcomm\fP, and also may not return until MPI_Init has been called in the children. Similarly, MPI_Init in the children may not return until all parents have called MPI_Comm_spawn. In this sense, MPI_Comm_spawn in the parents and MPI_Init in the children form a collective operation over the union of parent and child processes. The intercommunicator returned by MPI_Comm_spawn contains the parent processes in the local group and the child processes in the remote group. The ordering of processes in the local and remote groups is the same as the as the ordering of the group of the \fIcomm\fP in the parents and of MPI_COMM_WORLD of the children, respectively. This intercommunicator can be obtained in the children through the function MPI_Comm_get_parent. +MPI_Comm_spawn tries to start \fImaxprocs\fP identical copies of the MPI program specified by \fIcommand\fP, establishing communication with them and returning an intercommunicator. The spawned processes are referred to as children. The children have their own MPI_COMM_WORLD, which is separate from that of the parents. MPI_Comm_spawn is collective over \fIcomm\fP, and also may not return until MPI_Init has been called in the children. Similarly, MPI_Init in the children may not return until all parents have called MPI_Comm_spawn. In this sense, MPI_Comm_spawn in the parents and MPI_Init in the children form a collective operation over the union of parent and child processes. The intercommunicator returned by MPI_Comm_spawn contains the parent processes in the local group and the child processes in the remote group. The ordering of processes in the local and remote groups is the same as the as the ordering of the group of the \fIcomm\fP in the parents and of MPI_COMM_WORLD of the children, respectively. This intercommunicator can be obtained in the children through the function MPI_Comm_get_parent. .sp The MPI standard allows an implementation to use the MPI_UNIVERSE_SIZE attribute of MPI_COMM_WORLD to specify the number of processes that will be active in a program. Although this implementation of the MPI standard defines MPI_UNIVERSE_SIZE, it does not allow the user to set its value. If you try to set the value of MPI_UNIVERSE_SIZE, you will get an error message. .sp -The \fIcommand\fP Argument +The \fIcommand\fP Argument .sp -The \fIcommand\fP argument is a string containing the name of a program to be spawned. The string is null-terminated in C. In Fortran, leading and trailing spaces are stripped. MPI looks for the file first in the working directory of the spawning process. +The \fIcommand\fP argument is a string containing the name of a program to be spawned. The string is null-terminated in C. In Fortran, leading and trailing spaces are stripped. MPI looks for the file first in the working directory of the spawning process. .sp The \fIargv\fP Argument .sp @@ -91,8 +91,8 @@ to the program. The first element of \fIargv\fP is the first argument passed to \fIcommand\fP, not, as is conventional in some contexts, the command itself. The argument list is terminated by NULL in C and C++ and an empty string in Fortran (note that it is the MPI application's -responsibility to ensure that the last entry of the -.I argv +responsibility to ensure that the last entry of the +.I argv array is an empty string; the compiler will not automatically insert it). In Fortran, leading and trailing spaces are always stripped, so that a string consisting of all spaces is considered an empty @@ -100,17 +100,17 @@ string. The constant MPI_ARGV_NULL may be used in C, C++ and Fortran to indicate an empty argument list. In C and C++, this constant is the same as NULL. .sp -In C, the MPI_Comm_spawn argument \fIargv\fP differs from the \fIargv\fP argument of \fImain\fP in two respects. First, it is shifted by one element. Specifically, \fIargv\fP[0] of \fImain\fP contains the name of the program (given by \fIcommand\fP). \fIargv\fP[1] of \fImain\fP corresponds to \fIargv\fP[0] in MPI_Comm_spawn, \fIargv\fP[2] of \fImain\fP to \fIargv\fP[1] of MPI_Comm_spawn, and so on. Second, \fIargv\fP of MPI_Comm_spawn must be null-terminated, so that its length can be determined. Passing an \fIargv\fP of MPI_ARGV_NULL to MPI_Comm_spawn results in \fImain\fP receiving \fIargc\fP of 1 and an \fIargv\fP whose element 0 is the name of the program. +In C, the MPI_Comm_spawn argument \fIargv\fP differs from the \fIargv\fP argument of \fImain\fP in two respects. First, it is shifted by one element. Specifically, \fIargv\fP[0] of \fImain\fP contains the name of the program (given by \fIcommand\fP). \fIargv\fP[1] of \fImain\fP corresponds to \fIargv\fP[0] in MPI_Comm_spawn, \fIargv\fP[2] of \fImain\fP to \fIargv\fP[1] of MPI_Comm_spawn, and so on. Second, \fIargv\fP of MPI_Comm_spawn must be null-terminated, so that its length can be determined. Passing an \fIargv\fP of MPI_ARGV_NULL to MPI_Comm_spawn results in \fImain\fP receiving \fIargc\fP of 1 and an \fIargv\fP whose element 0 is the name of the program. .sp The \fImaxprocs\fP Argument .sp Open MPI tries to spawn \fImaxprocs\fP processes. If it is unable to spawn \fImaxprocs\fP processes, it raises an error of class MPI_ERR_SPAWN. If MPI is able to spawn the specified number of processes, MPI_Comm_spawn returns successfully and the number of spawned processes, \fIm\fP, is given by the size of the remote group of \fIintercomm\fP. .sp -A spawn call with the default behavior is called hard. A spawn call for which fewer than \fImaxprocs\fP processes may be returned is called soft. +A spawn call with the default behavior is called hard. A spawn call for which fewer than \fImaxprocs\fP processes may be returned is called soft. .sp -The \fIinfo\fP Argument +The \fIinfo\fP Argument .sp -The \fIinfo\fP argument is an opaque handle of type MPI_Info in C, MPI::Info in C++ and INTEGER in Fortran. It is a container for a number of user-speci ed (\fIkey,value\fP) pairs. \fIkey\fP and \fIvalue\fP are strings (null-terminated char* in C, character*(*) in Fortran). Routines to create and manipulate the \fIinfo\fP argument are described in Section 4.10 of the MPI-2 standard. +The \fIinfo\fP argument is an opaque handle of type MPI_Info in C, MPI::Info in C++ and INTEGER in Fortran. It is a container for a number of user-specified (\fIkey,value\fP) pairs. \fIkey\fP and \fIvalue\fP are strings (null-terminated char* in C, character*(*) in Fortran). Routines to create and manipulate the \fIinfo\fP argument are described in Section 4.10 of the MPI-2 standard. .sp For the SPAWN calls, \fIinfo\fP provides additional, implementation-dependent instructions to MPI and the runtime system on how to start processes. An application may pass MPI_INFO_NULL in C or Fortran. Portable programs not requiring detailed control over process locations should use MPI_INFO_NULL. .sp @@ -199,26 +199,26 @@ and non-zero values are true). If the string value is (case-insensitive) "yes" or "true", the boolean is true. If the string value is (case-insensitive) "no" or "false", the boolean is false. All other string values are unrecognized, and therefore false. - + .sp The \fIroot\fP Argument .sp -All arguments before the \fIroot\fP argument are examined only on the process whose rank in \fIcomm\fP is equal to \fIroot\fP. The value of these arguments on other processes is ignored. +All arguments before the \fIroot\fP argument are examined only on the process whose rank in \fIcomm\fP is equal to \fIroot\fP. The value of these arguments on other processes is ignored. .sp The \fIarray_of_errcodes\fP Argument .sp -The \fIarray_of_errcodes\fP is an array of length \fImaxprocs\fP in which MPI reports the status of the processes that MPI was requested to start. If all \fImaxprocs\fP processes were spawned, \fIarray_of_errcodes\fP is filled in with the value MPI_SUCCESS. If anyof the processes are \fInot\fP spawned, \fIarray_of_errcodes\fP is filled in with the value MPI_ERR_SPAWN. In C or Fortran, an application may pass MPI_ERRCODES_IGNORE if it is not interested in the error codes. In C++ this constant does not exist, and the \fIarray_of_errcodes\fP argument may be omitted from the argument list. +The \fIarray_of_errcodes\fP is an array of length \fImaxprocs\fP in which MPI reports the status of the processes that MPI was requested to start. If all \fImaxprocs\fP processes were spawned, \fIarray_of_errcodes\fP is filled in with the value MPI_SUCCESS. If anyof the processes are \fInot\fP spawned, \fIarray_of_errcodes\fP is filled in with the value MPI_ERR_SPAWN. In C or Fortran, an application may pass MPI_ERRCODES_IGNORE if it is not interested in the error codes. In C++ this constant does not exist, and the \fIarray_of_errcodes\fP argument may be omitted from the argument list. .SH NOTES .ft R -Completion of MPI_Comm_spawn in the parent does not necessarily mean that MPI_Init has been called in the children (although the returned intercommunicator can be used immediately). +Completion of MPI_Comm_spawn in the parent does not necessarily mean that MPI_Init has been called in the children (although the returned intercommunicator can be used immediately). .SH ERRORS Almost all MPI routines return an error value; C routines as the value of the function and Fortran routines in the last argument. C++ functions do not return errors. If the default error handler is set to MPI::ERRORS_THROW_EXCEPTIONS, then on error the C++ exception mechanism will be used to throw an MPI::Exception object. .sp Before the error value is returned, the current MPI error handler is -called. By default, this error handler aborts the MPI job, except for I/O function errors. The error handler may be changed with MPI_Comm_set_errhandler; the predefined error handler MPI_ERRORS_RETURN may be used to cause error values to be returned. Note that MPI does not guarantee that an MPI program can continue past an error. +called. By default, this error handler aborts the MPI job, except for I/O function errors. The error handler may be changed with MPI_Comm_set_errhandler; the predefined error handler MPI_ERRORS_RETURN may be used to cause error values to be returned. Note that MPI does not guarantee that an MPI program can continue past an error. .SH SEE ALSO .ft R diff --git a/ompi/mpi/man/man3/MPI_Comm_spawn_multiple.3in b/ompi/mpi/man/man3/MPI_Comm_spawn_multiple.3in index e7befe19434..f46ec5d19c7 100644 --- a/ompi/mpi/man/man3/MPI_Comm_spawn_multiple.3in +++ b/ompi/mpi/man/man3/MPI_Comm_spawn_multiple.3in @@ -6,7 +6,7 @@ .\" $COPYRIGHT$ .TH MPI_Comm_spawn_multiple 3 "#OMPI_DATE#" "#PACKAGE_VERSION#" "#PACKAGE_NAME#" .SH NAME -\fBMPI_Comm_spawn_multiple\fP \- Spawns multiple binaries, or the same binary with multiple sets of arguments. +\fBMPI_Comm_spawn_multiple\fP \- Spawns multiple binaries, or the same binary with multiple sets of arguments. .SH SYNTAX .ft R @@ -22,11 +22,11 @@ int MPI_Comm_spawn_multiple(int \fIcount\fP, char *\fIarray_of_commands\fP[], .SH Fortran Syntax .nf INCLUDE 'mpif.h' -MPI_COMM_SPAWN_MULTIPLE(\fICOUNT, ARRAY_OF_COMMANDS, ARRAY_OF_ARGV, - ARRAY_OF_MAXPROCS, ARRAY_OF_INFO, ROOT, COMM, INTERCOMM, +MPI_COMM_SPAWN_MULTIPLE(\fICOUNT, ARRAY_OF_COMMANDS, ARRAY_OF_ARGV, + ARRAY_OF_MAXPROCS, ARRAY_OF_INFO, ROOT, COMM, INTERCOMM, ARRAY_OF_ERRCODES, IERROR\fP) - INTEGER \fICOUNT, ARRAY_OF_INFO(*), ARRAY_OF_MAXPROCS(*), ROOT, - COMM, INTERCOMM, ARRAY_OF_ERRCODES(*), IERROR\fP + INTEGER \fICOUNT, ARRAY_OF_INFO(*), ARRAY_OF_MAXPROCS(*), ROOT, + COMM, INTERCOMM, ARRAY_OF_ERRCODES(*), IERROR\fP CHARACTER*(*) \fIARRAY_OF_COMMANDS\fP(*), \fIARRAY_OF_ARGV\fP(\fICOUNT\fP, *) .fi @@ -51,13 +51,13 @@ count Number of commands (positive integer, significant to MPI only at \fIroot\fP -- see NOTES). .TP 1i array_of_commands -Programs to be executed (array of strings, significant only at \fIroot\fP). +Programs to be executed (array of strings, significant only at \fIroot\fP). .TP 1i array_of_argv -Arguments for \fIcommands\fP (array of array of strings, significant only at \fIroot\fP). +Arguments for \fIcommands\fP (array of array of strings, significant only at \fIroot\fP). .TP 1i array_of_maxprocs -Maximum number of processes to start for each command (array of integers, significant only at \fIroot\fP). +Maximum number of processes to start for each command (array of integers, significant only at \fIroot\fP). .TP 1i array_of_info Info objects telling the runtime system where and how to start processes (array of handles, significant only at \fIroot\fP). @@ -78,7 +78,7 @@ array_of_errcodes One code per process (array of integers). .TP 1i IERROR -Fortran only: Error status (integer). +Fortran only: Error status (integer). .SH DESCRIPTION .ft R @@ -89,33 +89,33 @@ arrays of the corresponding arguments in MPI_Comm_spawn(3). The next argument, \fIarray_of_info\fP, is an array of \fIinfo\fP arguments, one for each executable. See the INFO ARGUMENTS section for more information. .sp -For the Fortran version of \fIarray_of_argv\fP, the element \fIarray_of_argv\fP(i,j) is the jth argument to command number i. +For the Fortran version of \fIarray_of_argv\fP, the element \fIarray_of_argv\fP(i,j) is the jth argument to command number i. .sp -In any language, an application may use the constant MPI_ARGVS_NULL (which is likely to be (char ***)0 in C) to specify that no arguments should be passed to any commands. The effect of setting individual elements of \fIarray_of_argv\fP to MPI_ARGV_NULL is not defined. To specify arguments for some commands but not others, the commands without arguments should have a corresponding \fIargv\fP whose first element is null ((char *)0 in C and empty string in Fortran). +In any language, an application may use the constant MPI_ARGVS_NULL (which is likely to be (char ***)0 in C) to specify that no arguments should be passed to any commands. The effect of setting individual elements of \fIarray_of_argv\fP to MPI_ARGV_NULL is not defined. To specify arguments for some commands but not others, the commands without arguments should have a corresponding \fIargv\fP whose first element is null ((char *)0 in C and empty string in Fortran). .sp All of the spawned processes have the same MPI_COMM_WORLD. Their ranks in MPI_COMM_WORLD correspond directly to the order in which the commands are specified in MPI_Comm_spawn_multiple. Assume that m1 processes are generated by the first command, m2 by the second, etc. The processes corresponding to the first command have ranks 0, 1,..., m1-1. The processes in the second command have ranks m1, m1+1, ..., m1+m2-1. The processes in the third have ranks m1+m2, m1+m2+1, ..., m1+m2+m3-1, etc. .sp -The \fIarray_of_errcodes\fP argument is 1-dimensional array of size +The \fIarray_of_errcodes\fP argument is 1-dimensional array of size .sp .nf _ count \\ n , - /_ i=1 i + /_ i=1 i .fi .sp -where i is the ith element of \fIarray_of_maxprocs\fP. Command number \fIi\fP corresponds to the i contiguous slots in this array from element +where i is the ith element of \fIarray_of_maxprocs\fP. Command number \fIi\fP corresponds to the i contiguous slots in this array from element .sp .nf _ _ _ \fIi\fP-1 | _ \fIi\fP | \\ n , to | \\ n | -1 /_ \fIj\fP=1 i | /_ \fIj\fP=1 j | - |_ _| -.fi + |_ _| +.fi .sp -Error codes are treated as for MPI_Comm_spawn(3). +Error codes are treated as for MPI_Comm_spawn(3). + - .SH INFO ARGUMENTS The following keys for \fIinfo\fP are recognized in "#PACKAGE_NAME#". (The reserved values mentioned in Section 5.3.4 of the MPI-2 standard are not implemented.) .sp @@ -196,7 +196,7 @@ env char* Newline-delimited list of envars to be passed to the spawned procs .fi -.sp +.sp \fIbool\fP info keys are actually strings but are evaluated as follows: if the string value is a number, it is converted to an integer and cast to a boolean (meaning that zero integers are false @@ -210,12 +210,12 @@ Note that if any of the info handles have \fIompi_non_mpi\fP set to true, then all info handles must have it set to true. If some are set to true, but others are set to false (or are unset), MPI_ERR_INFO will be returned. - + .sp -Note that in "#PACKAGE_NAME#", the first array location in \fIarray_of_info\fP is applied to all the commands in \fIarray_of_commands\fP. +Note that in "#PACKAGE_NAME#", the first array location in \fIarray_of_info\fP is applied to all the commands in \fIarray_of_commands\fP. .SH NOTES -The argument \fIcount\fP is interpreted by MPI only at the root, as is \fIarray_of_argv\fP. Since the leading dimension of \fIarray_of_argv\fP is \fIcount\fP, a nonpositive value of \fIcount\fP at a nonroot node could theoretically cause a runtime bounds check error, even though \fIarray_of_argv\fP should be ignored by the subroutine. If this happens, you should explicitly supply a reasonable value of \fIcount\fP on the nonroot nodes. +The argument \fIcount\fP is interpreted by MPI only at the root, as is \fIarray_of_argv\fP. Since the leading dimension of \fIarray_of_argv\fP is \fIcount\fP, a nonpositive value of \fIcount\fP at a nonroot node could theoretically cause a runtime bounds check error, even though \fIarray_of_argv\fP should be ignored by the subroutine. If this happens, you should explicitly supply a reasonable value of \fIcount\fP on the nonroot nodes. .sp Similar to MPI_Comm_spawn(3), it is the application's responsibility to terminate each individual set of argv in the @@ -226,7 +226,7 @@ compilers will not automatically insert this blank string; the application must ensure to have enough space for an empty string entry as the last element of the array). .sp -Other restrictions apply to the +Other restrictions apply to the .I array_of_argv parameter; see MPI_Comm_spawn(3)'s description of the .I argv @@ -244,7 +244,7 @@ times. Almost all MPI routines return an error value; C routines as the value of the function and Fortran routines in the last argument. C++ functions do not return errors. If the default error handler is set to MPI::ERRORS_THROW_EXCEPTIONS, then on error the C++ exception mechanism will be used to throw an MPI::Exception object. .sp Before the error value is returned, the current MPI error handler is -called. By default, this error handler aborts the MPI job, except for I/O function errors. The error handler may be changed with MPI_Comm_set_errhandler; the predefined error handler MPI_ERRORS_RETURN may be used to cause error values to be returned. Note that MPI does not guarantee that an MPI program can continue past an error. +called. By default, this error handler aborts the MPI job, except for I/O function errors. The error handler may be changed with MPI_Comm_set_errhandler; the predefined error handler MPI_ERRORS_RETURN may be used to cause error values to be returned. Note that MPI does not guarantee that an MPI program can continue past an error. .SH SEE ALSO .ft R diff --git a/ompi/mpi/man/man3/MPI_Comm_split.3in b/ompi/mpi/man/man3/MPI_Comm_split.3in index 87d707df15c..438b7b81136 100644 --- a/ompi/mpi/man/man3/MPI_Comm_split.3in +++ b/ompi/mpi/man/man3/MPI_Comm_split.3in @@ -51,26 +51,26 @@ New communicator (handle). .ft R .TP 1i IERROR -Fortran only: Error status (integer). +Fortran only: Error status (integer). .SH DESCRIPTION .ft R -This function partitions the group associated with comm into disjoint subgroups, one for each value of color. Each subgroup contains all processes of the same color. Within each subgroup, the processes are ranked in the order defined by the value of the argument key, with ties broken according to their rank in the old group. A new communicator is created for each subgroup and returned in newcomm. A process may supply the color value MPI_UNDEFINED, in which case newcomm returns MPI_COMM_NULL. This is a collective call, but each process is permitted to provide different values for color and key. +This function partitions the group associated with comm into disjoint subgroups, one for each value of color. Each subgroup contains all processes of the same color. Within each subgroup, the processes are ranked in the order defined by the value of the argument key, with ties broken according to their rank in the old group. A new communicator is created for each subgroup and returned in newcomm. A process may supply the color value MPI_UNDEFINED, in which case newcomm returns MPI_COMM_NULL. This is a collective call, but each process is permitted to provide different values for color and key. .sp -When you call MPI_Comm_split on an inter-communicator, the processes on the left with the same color as those on the right combine to create a new inter-communicator. The key argument describes the relative rank of processes on each side of the inter-communicator. The function returns MPI_COMM_NULL for those colors that are specified on only one side of the inter-communicator, or for those that specify MPI_UNDEFINED as the color. +When you call MPI_Comm_split on an inter-communicator, the processes on the left with the same color as those on the right combine to create a new inter-communicator. The key argument describes the relative rank of processes on each side of the inter-communicator. The function returns MPI_COMM_NULL for those colors that are specified on only one side of the inter-communicator, or for those that specify MPI_UNDEFINED as the color. .sp -A call to MPI_Comm_create(\fIcomm\fP, \fIgroup\fP, \fInewcomm\fP) is equivalent to a call to MPI_Comm_split(\fIcomm\fP, \fIcolor\fP,\fI key\fP, \fInewcomm\fP), where all members of \fIgroup\fP provide \fIcolor\fP = 0 and \fIkey\fP = rank in group, and all processes that are not members of \fIgroup\fP provide \fIcolor\fP = MPI_UNDEFINED. The function MPI_Comm_split allows more general partitioning of a group into one or more subgroups with optional reordering. +A call to MPI_Comm_create(\fIcomm\fP, \fIgroup\fP, \fInewcomm\fP) is equivalent to a call to MPI_Comm_split(\fIcomm\fP, \fIcolor\fP,\fI key\fP, \fInewcomm\fP), where all members of \fIgroup\fP provide \fIcolor\fP = 0 and \fIkey\fP = rank in group, and all processes that are not members of \fIgroup\fP provide \fIcolor\fP = MPI_UNDEFINED. The function MPI_Comm_split allows more general partitioning of a group into one or more subgroups with optional reordering. .sp -The value of \fIcolor\fP must be nonnegative or MPI_UNDEFINED. +The value of \fIcolor\fP must be nonnegative or MPI_UNDEFINED. .SH NOTES .ft R This is an extremely powerful mechanism for dividing a single communicating group of processes into k subgroups, with k chosen implicitly by the user (by the number of colors asserted over all -the processes). Each resulting communicator will be nonoverlapping. Such a division could be useful for defining a hierarchy of computations, such as for multigrid or linear algebra. +the processes). Each resulting communicator will be nonoverlapping. Such a division could be useful for defining a hierarchy of computations, such as for multigrid or linear algebra. .sp -Multiple calls to MPI_Comm_split can be used to overcome the requirement that any call have no overlap of the resulting communicators (each process is of only one color per call). In this way, multiple overlapping communication structures can be created. Creative use of the color and key in such splitting operations is encouraged. +Multiple calls to MPI_Comm_split can be used to overcome the requirement that any call have no overlap of the resulting communicators (each process is of only one color per call). In this way, multiple overlapping communication structures can be created. Creative use of the color and key in such splitting operations is encouraged. .sp Note that, for a fixed color, the keys need not be unique. It is MPI_Comm_split's responsibility to sort processes in ascending order according to this key, and to break ties in a consistent way. If all the keys are specified in the same way, then all the processes in a given color will have the relative rank order as they did in their parent group. (In general, they will have different ranks.) .sp @@ -81,7 +81,7 @@ means that one needn't really pay attention to the rank-order of the processes i Almost all MPI routines return an error value; C routines as the value of the function and Fortran routines in the last argument. C++ functions do not return errors. If the default error handler is set to MPI::ERRORS_THROW_EXCEPTIONS, then on error the C++ exception mechanism will be used to throw an MPI::Exception object. .sp Before the error value is returned, the current MPI error handler is -called. By default, this error handler aborts the MPI job, except for I/O function errors. The error handler may be changed with MPI_Comm_set_errhandler; the predefined error handler MPI_ERRORS_RETURN may be used to cause error values to be returned. Note that MPI does not guarantee that an MPI program can continue past an error. +called. By default, this error handler aborts the MPI job, except for I/O function errors. The error handler may be changed with MPI_Comm_set_errhandler; the predefined error handler MPI_ERRORS_RETURN may be used to cause error values to be returned. Note that MPI does not guarantee that an MPI program can continue past an error. .SH SEE ALSO .ft R diff --git a/ompi/mpi/man/man3/MPI_Comm_test_inter.3in b/ompi/mpi/man/man3/MPI_Comm_test_inter.3in index 302767b3c63..c128a1a2e27 100644 --- a/ompi/mpi/man/man3/MPI_Comm_test_inter.3in +++ b/ompi/mpi/man/man3/MPI_Comm_test_inter.3in @@ -42,11 +42,11 @@ flag (Logical.) .ft R .TP 1i IERROR -Fortran only: Error status (integer). +Fortran only: Error status (integer). .SH DESCRIPTION .ft R -This local routine allows the calling process to determine the type of a communicator. It returns true for an intercommunicator, false for an intracommunicator. +This local routine allows the calling process to determine the type of a communicator. It returns true for an intercommunicator, false for an intracommunicator. .sp The type of communicator also affects the value returned by three other functions. When dealing with an intracommunicator (enables communication within a single group), the functions listed below return the expected values, group size, group, and rank. When dealing with an inter-communicator, however, they return the following values: .sp @@ -58,10 +58,10 @@ MPI_Comm_rank Returns the rank in the local group. .sp To return the remote group and remote group size of an inter-communicator, use the MPI_Comm_remote_group and MPI_Comm_remote_size functions. .sp -The operation MPI_Comm_compare is valid for intercommunicators. Both communicators must be either intra- or intercommunicators, or else MPI_UNEQUAL results. Both corresponding local and remote groups must compare correctly to get the results MPI_CONGRUENT and MPI_SIMILAR. In particular, it is possible for MPI_SIMILAR to result because either the local or remote groups were similar but not identical. +The operation MPI_Comm_compare is valid for intercommunicators. Both communicators must be either intra- or intercommunicators, or else MPI_UNEQUAL results. Both corresponding local and remote groups must compare correctly to get the results MPI_CONGRUENT and MPI_SIMILAR. In particular, it is possible for MPI_SIMILAR to result because either the local or remote groups were similar but not identical. .sp The following accessors provide consistent access to the remote group of an -intercommunicator: MPI_Comm_remote_size, MPI_Comm_remote_group. +intercommunicator: MPI_Comm_remote_size, MPI_Comm_remote_group. .sp The intercommunicator accessors (MPI_Comm_test_inter, MPI_Comm_remote_size, MPI_Comm_remote_group) are all local operations. @@ -69,7 +69,7 @@ The intercommunicator accessors (MPI_Comm_test_inter, MPI_Comm_remote_size, MPI_ Almost all MPI routines return an error value; C routines as the value of the function and Fortran routines in the last argument. C++ functions do not return errors. If the default error handler is set to MPI::ERRORS_THROW_EXCEPTIONS, then on error the C++ exception mechanism will be used to throw an MPI::Exception object. .sp Before the error value is returned, the current MPI error handler is -called. By default, this error handler aborts the MPI job, except for I/O function errors. The error handler may be changed with MPI_Comm_set_errhandler; the predefined error handler MPI_ERRORS_RETURN may be used to cause error values to be returned. Note that MPI does not guarantee that an MPI program can continue past an error. +called. By default, this error handler aborts the MPI job, except for I/O function errors. The error handler may be changed with MPI_Comm_set_errhandler; the predefined error handler MPI_ERRORS_RETURN may be used to cause error values to be returned. Note that MPI does not guarantee that an MPI program can continue past an error. .SH SEE ALSO .ft R diff --git a/ompi/mpi/man/man3/MPI_Compare_and_swap.3in b/ompi/mpi/man/man3/MPI_Compare_and_swap.3in index a2ee2a92e85..f0ddef3d58d 100644 --- a/ompi/mpi/man/man3/MPI_Compare_and_swap.3in +++ b/ompi/mpi/man/man3/MPI_Compare_and_swap.3in @@ -1,5 +1,5 @@ .\" -*- nroff -*- -.\" Copyright 2013-2014 Los Alamos National Security, LLC. All rights reserved. +.\" Copyright 2013-2015 Los Alamos National Security, LLC. All rights reserved. .\" Copyright 2010 Cisco Systems, Inc. All rights reserved. .\" Copyright 2006-2008 Sun Microsystems, Inc. .\" Copyright (c) 1996 Thinking Machines Corporation @@ -13,8 +13,8 @@ .SH C Syntax .nf #include -int MPI_Compare_and_swap(const void *\fIorigin_addr\fP, const void *\fIcompar_addr\fP, - void *\fresult_addr\fP, MPI_Datatype \fdatatype\fP, int \fItarget_rank\fP, +int MPI_Compare_and_swap(const void *\fIorigin_addr\fP, const void *\fIcompare_addr\fP, + void *\fIresult_addr\fP, MPI_Datatype \fIdatatype\fP, int \fItarget_rank\fP, MPI_Aint \fItarget_disp\fP, MPI_Win \fIwin\fP) .fi diff --git a/ompi/mpi/man/man3/MPI_Dims_create.3in b/ompi/mpi/man/man3/MPI_Dims_create.3in index 632e98d6d99..37c1e691c96 100644 --- a/ompi/mpi/man/man3/MPI_Dims_create.3in +++ b/ompi/mpi/man/man3/MPI_Dims_create.3in @@ -46,18 +46,18 @@ Integer array of size ndims specifying the number of nodes in each dimension. .ft R .TP 1i IERROR -Fortran only: Error status (integer). +Fortran only: Error status (integer). .SH DESCRIPTION .ft R -For Cartesian topologies, the function MPI_Dims_create helps the user select a balanced distribution of processes per coordinate direction, depending on the number of processes in the group to be balanced and optional constraints that can be specified by the user. One use is to partition all the processes (the size of MPI_COMM_WORLD's group) into an n-dimensional topology. +For Cartesian topologies, the function MPI_Dims_create helps the user select a balanced distribution of processes per coordinate direction, depending on the number of processes in the group to be balanced and optional constraints that can be specified by the user. One use is to partition all the processes (the size of MPI_COMM_WORLD's group) into an n-dimensional topology. .sp -The entries in the array \fIdims\fP are set to describe a Cartesian grid with \fIndims\fP dimensions and a total of \fInnodes\fP nodes. The dimensions are set to be as close to each other as possible, using an appropriate divisibility algorithm. The caller may further constrain the operation of this routine by specifying elements of array dims. If dims[i] is set to a positive number, the routine will not modify the number of nodes in dimension i; only those entries where dims[i] = 0 are modified by the call. +The entries in the array \fIdims\fP are set to describe a Cartesian grid with \fIndims\fP dimensions and a total of \fInnodes\fP nodes. The dimensions are set to be as close to each other as possible, using an appropriate divisibility algorithm. The caller may further constrain the operation of this routine by specifying elements of array dims. If dims[i] is set to a positive number, the routine will not modify the number of nodes in dimension i; only those entries where dims[i] = 0 are modified by the call. .sp Negative input values of dims[i] are erroneous. An error will occur if -nnodes is not a multiple of ((pi) over (i, dims[i] != 0)) dims[i]. +nnodes is not a multiple of ((pi) over (i, dims[i] != 0)) dims[i]. .sp -For dims[i] set by the call, dims[i] will be ordered in nonincreasing order. Array dims is suitable for use as input to routine MPI_Cart_create. MPI_Dims_create is local. +For dims[i] set by the call, dims[i] will be ordered in nonincreasing order. Array dims is suitable for use as input to routine MPI_Cart_create. MPI_Dims_create is local. .sp \fBExample:\fP .nf @@ -77,5 +77,5 @@ call function call on return Almost all MPI routines return an error value; C routines as the value of the function and Fortran routines in the last argument. C++ functions do not return errors. If the default error handler is set to MPI::ERRORS_THROW_EXCEPTIONS, then on error the C++ exception mechanism will be used to throw an MPI::Exception object. .sp Before the error value is returned, the current MPI error handler is -called. By default, this error handler aborts the MPI job, except for I/O function errors. The error handler may be changed with MPI_Comm_set_errhandler; the predefined error handler MPI_ERRORS_RETURN may be used to cause error values to be returned. Note that MPI does not guarantee that an MPI program can continue past an error. +called. By default, this error handler aborts the MPI job, except for I/O function errors. The error handler may be changed with MPI_Comm_set_errhandler; the predefined error handler MPI_ERRORS_RETURN may be used to cause error values to be returned. Note that MPI does not guarantee that an MPI program can continue past an error. diff --git a/ompi/mpi/man/man3/MPI_Errhandler_create.3in b/ompi/mpi/man/man3/MPI_Errhandler_create.3in index 29bbcb1de1c..5a7f1fd04cd 100644 --- a/ompi/mpi/man/man3/MPI_Errhandler_create.3in +++ b/ompi/mpi/man/man3/MPI_Errhandler_create.3in @@ -39,34 +39,34 @@ MPI error handler (handle). .ft R .TP 1i IERROR -Fortran only: Error status (integer). +Fortran only: Error status (integer). .SH DESCRIPTION .ft R -Note that use of this routine is \fIdeprecated\fP as of MPI-2. Please use MPI_Comm_create_errhandler instead. +Note that use of this routine is \fIdeprecated\fP as of MPI-2. Please use MPI_Comm_create_errhandler instead. .sp -This deprecated routine is not available in C++. +This deprecated routine is not available in C++. .sp -Registers the user routine function for use as an MPI exception handler. Returns in errhandler a handle to the registered exception handler. +Registers the user routine function for use as an MPI exception handler. Returns in errhandler a handle to the registered exception handler. .sp In the C language, the user routine should be a C function of type MPI_Handler_function, which is defined as .sp .nf - typedef void (MPI_Handler_function)(MPI_Comm *, int *, \&...); + typedef void (MPI_Handler_function)(MPI_Comm *, int *, \&...); .fi .sp The first argument is the communicator in use. The second is the error code -to be returned by the MPI routine that raised the error. If the routine would have returned MPI_ERR_IN_STATUS, it is the error code returned in the status for the request that caused the error handler to be invoked. The remaining arguments are stdargs arguments whose number and meaning is implementation-dependent. An implementation should clearly document these arguments. Addresses are used so that the handler may be written in Fortran. +to be returned by the MPI routine that raised the error. If the routine would have returned MPI_ERR_IN_STATUS, it is the error code returned in the status for the request that caused the error handler to be invoked. The remaining arguments are stdargs arguments whose number and meaning is implementation-dependent. An implementation should clearly document these arguments. Addresses are used so that the handler may be written in Fortran. .SH NOTE .ft R -The MPI-1 Standard states that an implementation may make the output value (errhandler) simply the address of the function. However, the action of MPI_Errhandler_ free makes this impossible, since it is required to set the value of the argument to MPI_ERRHANDLER_NULL. In addition, the actual error handler must remain until all communicators that use it are freed. +The MPI-1 Standard states that an implementation may make the output value (errhandler) simply the address of the function. However, the action of MPI_Errhandler_ free makes this impossible, since it is required to set the value of the argument to MPI_ERRHANDLER_NULL. In addition, the actual error handler must remain until all communicators that use it are freed. .SH ERRORS Almost all MPI routines return an error value; C routines as the value of the function and Fortran routines in the last argument. C++ functions do not return errors. If the default error handler is set to MPI::ERRORS_THROW_EXCEPTIONS, then on error the C++ exception mechanism will be used to throw an MPI::Exception object. .sp Before the error value is returned, the current MPI error handler is -called. By default, this error handler aborts the MPI job, except for I/O function errors. The error handler may be changed with MPI_Comm_set_errhandler; the predefined error handler MPI_ERRORS_RETURN may be used to cause error values to be returned. Note that MPI does not guarantee that an MPI program can continue past an error. +called. By default, this error handler aborts the MPI job, except for I/O function errors. The error handler may be changed with MPI_Comm_set_errhandler; the predefined error handler MPI_ERRORS_RETURN may be used to cause error values to be returned. Note that MPI does not guarantee that an MPI program can continue past an error. .SH SEE ALSO .br diff --git a/ompi/mpi/man/man3/MPI_Errhandler_free.3in b/ompi/mpi/man/man3/MPI_Errhandler_free.3in index 4799c3b0a7b..96300c68730 100644 --- a/ompi/mpi/man/man3/MPI_Errhandler_free.3in +++ b/ompi/mpi/man/man3/MPI_Errhandler_free.3in @@ -19,13 +19,13 @@ int MPI_Errhandler_free(MPI_Errhandler *\fIerrhandler\fP) .nf INCLUDE 'mpif.h' MPI_ERRHANDLER_FREE(\fIERRHANDLER, IERROR\fP) - INTEGER \fIERRHANDLER, IERROR\fP + INTEGER \fIERRHANDLER, IERROR\fP .fi .SH C++ Syntax .nf #include -void Errhandler::Free() +void Errhandler::Free() .fi .SH INPUT PARAMETER @@ -38,17 +38,17 @@ MPI error handler (handle). Set to MPI_ERRHANDLER_NULL on exit. .ft R .TP 1i IERROR -Fortran only: Error status (integer). +Fortran only: Error status (integer). .SH DESCRIPTION .ft R -Marks the error handler associated with errhandler for deallocation and sets errhandler to MPI_ERRHANDLER_NULL. The error handler will be deallocated after all communicators associated with it have been deallocated. +Marks the error handler associated with errhandler for deallocation and sets errhandler to MPI_ERRHANDLER_NULL. The error handler will be deallocated after all communicators associated with it have been deallocated. .SH ERRORS Almost all MPI routines return an error value; C routines as the value of the function and Fortran routines in the last argument. C++ functions do not return errors. If the default error handler is set to MPI::ERRORS_THROW_EXCEPTIONS, then on error the C++ exception mechanism will be used to throw an MPI::Exception object. .sp Before the error value is returned, the current MPI error handler is -called. By default, this error handler aborts the MPI job, except for I/O function errors. The error handler may be changed with MPI_Comm_set_errhandler; the predefined error handler MPI_ERRORS_RETURN may be used to cause error values to be returned. Note that MPI does not guarantee that an MPI program can continue past an error. +called. By default, this error handler aborts the MPI job, except for I/O function errors. The error handler may be changed with MPI_Comm_set_errhandler; the predefined error handler MPI_ERRORS_RETURN may be used to cause error values to be returned. Note that MPI does not guarantee that an MPI program can continue past an error. .SH SEE ALSO .ft R diff --git a/ompi/mpi/man/man3/MPI_Errhandler_get.3in b/ompi/mpi/man/man3/MPI_Errhandler_get.3in index 250c50cefcb..7448fdfc9cf 100644 --- a/ompi/mpi/man/man3/MPI_Errhandler_get.3in +++ b/ompi/mpi/man/man3/MPI_Errhandler_get.3in @@ -19,7 +19,7 @@ int MPI_Errhandler_get(MPI_Comm \fIcomm\fP, MPI_Errhandler\fI *errhandler\fP) .nf INCLUDE 'mpif.h' MPI_ERRHANDLER_GET(\fICOMM, ERRHANDLER, IERROR\fP) - INTEGER \fICOMM, ERRHANDLER, IERROR\fP + INTEGER \fICOMM, ERRHANDLER, IERROR\fP .fi @@ -37,24 +37,24 @@ MPI error handler currently associated with communicator (handle). .ft R .TP 1i IERROR -Fortran only: Error status (integer). +Fortran only: Error status (integer). .SH DESCRIPTION .ft R -Note that use of this routine is \fIdeprecated\fP as of MPI-2. Please use MPI_Comm_get_errhandler instead. +Note that use of this routine is \fIdeprecated\fP as of MPI-2. Please use MPI_Comm_get_errhandler instead. .sp -This deprecated routine is not available in C++. +This deprecated routine is not available in C++. .sp Returns in errhandler (a handle to) the error handler that is currently associated with communicator comm. .sp -\fBExample:\fP A library function may register at its entry point the current error handler for a communicator, set its own private error handler for this communicator, and restore before exiting the previous error handler. +\fBExample:\fP A library function may register at its entry point the current error handler for a communicator, set its own private error handler for this communicator, and restore before exiting the previous error handler. .SH ERRORS Almost all MPI routines return an error value; C routines as the value of the function and Fortran routines in the last argument. C++ functions do not return errors. If the default error handler is set to MPI::ERRORS_THROW_EXCEPTIONS, then on error the C++ exception mechanism will be used to throw an MPI::Exception object. .sp Before the error value is returned, the current MPI error handler is -called. By default, this error handler aborts the MPI job, except for I/O function errors. The error handler may be changed with MPI_Comm_set_errhandler; the predefined error handler MPI_ERRORS_RETURN may be used to cause error values to be returned. Note that MPI does not guarantee that an MPI program can continue past an error. +called. By default, this error handler aborts the MPI job, except for I/O function errors. The error handler may be changed with MPI_Comm_set_errhandler; the predefined error handler MPI_ERRORS_RETURN may be used to cause error values to be returned. Note that MPI does not guarantee that an MPI program can continue past an error. .SH SEE ALSO .ft R diff --git a/ompi/mpi/man/man3/MPI_Errhandler_set.3in b/ompi/mpi/man/man3/MPI_Errhandler_set.3in index 6bab69e522c..e4c1d3d773a 100644 --- a/ompi/mpi/man/man3/MPI_Errhandler_set.3in +++ b/ompi/mpi/man/man3/MPI_Errhandler_set.3in @@ -35,21 +35,21 @@ New MPI error handler for communicator (handle). .ft R .TP 1i IERROR -Fortran only: Error status (integer). +Fortran only: Error status (integer). .SH DESCRIPTION .ft R -Note that use of this routine is \fIdeprecated\fP as of MPI-2. Please use MPI_Comm_set_errhandler instead. +Note that use of this routine is \fIdeprecated\fP as of MPI-2. Please use MPI_Comm_set_errhandler instead. .sp -This deprecated routine is not available in C++. +This deprecated routine is not available in C++. .sp -Associates the new error handler errhandler with communicator comm at the calling process. Note that an error handler is always associated with the communicator. +Associates the new error handler errhandler with communicator comm at the calling process. Note that an error handler is always associated with the communicator. .SH ERRORS Almost all MPI routines return an error value; C routines as the value of the function and Fortran routines in the last argument. C++ functions do not return errors. If the default error handler is set to MPI::ERRORS_THROW_EXCEPTIONS, then on error the C++ exception mechanism will be used to throw an MPI::Exception object. .sp Before the error value is returned, the current MPI error handler is -called. By default, this error handler aborts the MPI job, except for I/O function errors. The error handler may be changed with MPI_Comm_set_errhandler; the predefined error handler MPI_ERRORS_RETURN may be used to cause error values to be returned. Note that MPI does not guarantee that an MPI program can continue past an error. +called. By default, this error handler aborts the MPI job, except for I/O function errors. The error handler may be changed with MPI_Comm_set_errhandler; the predefined error handler MPI_ERRORS_RETURN may be used to cause error values to be returned. Note that MPI does not guarantee that an MPI program can continue past an error. .SH SEE ALSO .ft R diff --git a/ompi/mpi/man/man3/MPI_Error_class.3in b/ompi/mpi/man/man3/MPI_Error_class.3in index a9e5d86ffa8..bd892fdea0e 100644 --- a/ompi/mpi/man/man3/MPI_Error_class.3in +++ b/ompi/mpi/man/man3/MPI_Error_class.3in @@ -19,7 +19,7 @@ int MPI_Error_class(int \fIerrorcode\fP, int\fI *errorclass\fP) .nf INCLUDE 'mpif.h' MPI_ERROR_CLASS(\fIERRORCODE, ERRORCLASS, IERROR\fP) - INTEGER \fIERRORCODE, ERRORCLASS, IERROR\fP + INTEGER \fIERRORCODE, ERRORCLASS, IERROR\fP .fi .SH C++ Syntax @@ -42,7 +42,7 @@ Error class associated with errorcode. .ft R .TP 1i IERROR -Fortran only: Error status (integer). +Fortran only: Error status (integer). .SH DESCRIPTION .ft R @@ -52,7 +52,7 @@ The function MPI_Error_class maps each standard error code (error class) onto it Almost all MPI routines return an error value; C routines as the value of the function and Fortran routines in the last argument. C++ functions do not return errors. If the default error handler is set to MPI::ERRORS_THROW_EXCEPTIONS, then on error the C++ exception mechanism will be used to throw an MPI::Exception object. .sp Before the error value is returned, the current MPI error handler is -called. By default, this error handler aborts the MPI job, except for I/O function errors. The error handler may be changed with MPI_Comm_set_errhandler; the predefined error handler MPI_ERRORS_RETURN may be used to cause error values to be returned. Note that MPI does not guarantee that an MPI program can continue past an error. +called. By default, this error handler aborts the MPI job, except for I/O function errors. The error handler may be changed with MPI_Comm_set_errhandler; the predefined error handler MPI_ERRORS_RETURN may be used to cause error values to be returned. Note that MPI does not guarantee that an MPI program can continue past an error. .SH SEE ALSO .ft R diff --git a/ompi/mpi/man/man3/MPI_Error_string.3in b/ompi/mpi/man/man3/MPI_Error_string.3in index 8b78e5a5225..db67d55b878 100644 --- a/ompi/mpi/man/man3/MPI_Error_string.3in +++ b/ompi/mpi/man/man3/MPI_Error_string.3in @@ -20,7 +20,7 @@ int MPI_Error_string(int \fIerrorcode\fP, char\fI *string\fP, int\fI *resultlen\ INCLUDE 'mpif.h' MPI_ERROR_STRING(\fIERRORCODE, STRING, RESULTLEN, IERROR\fP) INTEGER \fIERRORCODE, RESULTLEN, IERROR\fP - CHARACTER*(*) \fISTRING\fP + CHARACTER*(*) \fISTRING\fP .fi .SH C++ Syntax @@ -46,20 +46,20 @@ Length of string. .ft R .TP 1i IERROR -Fortran only: Error status (integer). +Fortran only: Error status (integer). .SH DESCRIPTION .ft R -Returns the error string associated with an error code or class. The argument string must represent storage that is at least MPI_MAX_ERROR_STRING characters long. +Returns the error string associated with an error code or class. The argument string must represent storage that is at least MPI_MAX_ERROR_STRING characters long. .sp The number of characters actually written is returned in the output -argument, resultlen. +argument, resultlen. .SH ERRORS Almost all MPI routines return an error value; C routines as the value of the function and Fortran routines in the last argument. C++ functions do not return errors. If the default error handler is set to MPI::ERRORS_THROW_EXCEPTIONS, then on error the C++ exception mechanism will be used to throw an MPI::Exception object. .sp Before the error value is returned, the current MPI error handler is -called. By default, this error handler aborts the MPI job, except for I/O function errors. The error handler may be changed with MPI_Comm_set_errhandler; the predefined error handler MPI_ERRORS_RETURN may be used to cause error values to be returned. Note that MPI does not guarantee that an MPI program can continue past an error. +called. By default, this error handler aborts the MPI job, except for I/O function errors. The error handler may be changed with MPI_Comm_set_errhandler; the predefined error handler MPI_ERRORS_RETURN may be used to cause error values to be returned. Note that MPI does not guarantee that an MPI program can continue past an error. .SH SEE ALSO .ft R diff --git a/ompi/mpi/man/man3/MPI_Exscan.3in b/ompi/mpi/man/man3/MPI_Exscan.3in index 80816172582..e6715ecba0b 100644 --- a/ompi/mpi/man/man3/MPI_Exscan.3in +++ b/ompi/mpi/man/man3/MPI_Exscan.3in @@ -28,7 +28,7 @@ int MPI_Iexscan(const void *\fIsendbuf\fP, void *\fIrecvbuf\fP, int \fIcount\fP, INCLUDE 'mpif.h' MPI_EXSCAN(\fISENDBUF, RECVBUF, COUNT, DATATYPE, OP, COMM, IERROR\fP) \fISENDBUF(*), RECVBUF(*)\fP - INTEGER \fICOUNT, DATATYPE, OP, COMM, IERROR\fP + INTEGER \fICOUNT, DATATYPE, OP, COMM, IERROR\fP MPI_IEXSCAN(\fISENDBUF, RECVBUF, COUNT, DATATYPE, OP, COMM, REQUEST, IERROR\fP) \fISENDBUF(*), RECVBUF(*)\fP @@ -72,7 +72,7 @@ Request (handle, non-blocking only). .ft R .TP 1i IERROR -Fortran only: Error status (integer). +Fortran only: Error status (integer). .SH DESCRIPTION .ft R @@ -90,7 +90,13 @@ as \fIrecvbuf\fP is not significant for process 0. The value of \fIrecvbuf\fP on process 1 is always the value in \fIsendbuf\fP on process 0. .sp -No MPI_IN_PLACE operation is supported. +.SH USE OF IN-PLACE OPTION +The `in place' option for intracommunicators is specified by passing MPI_IN_PLACE in the \fIsendbuf\fP argument. In this case, the input data is taken from the receive buffer, and replaced by the output data. +.sp +Note that MPI_IN_PLACE is a special kind of value; it has the same restrictions on its use as MPI_BOTTOM. +.sp +Because the in-place option converts the receive buffer into a send-and-receive buffer, a Fortran binding that includes INTENT must mark these as INOUT, not OUT. +.sp .SH NOTES .ft R @@ -132,7 +138,7 @@ called. By default, this error handler aborts the MPI job, except for I/O function errors. The error handler may be changed with MPI_Comm_set_errhandler; the predefined error handler MPI_ERRORS_RETURN may be used to cause error values to be returned. Note that MPI does not -guarantee that an MPI program can continue past an error. +guarantee that an MPI program can continue past an error. .sp See the MPI man page for a full list of MPI error codes. diff --git a/ompi/mpi/man/man3/MPI_Fetch_and_op.3in b/ompi/mpi/man/man3/MPI_Fetch_and_op.3in index d703bbd6da1..dfddfe816ab 100644 --- a/ompi/mpi/man/man3/MPI_Fetch_and_op.3in +++ b/ompi/mpi/man/man3/MPI_Fetch_and_op.3in @@ -1,5 +1,5 @@ .\" -*- nroff -*- -.\" Copyright 2013-2014 Los Alamos National Security, LLC. All rights reserved. +.\" Copyright 2013-2015 Los Alamos National Security, LLC. All rights reserved. .\" Copyright 2010 Cisco Systems, Inc. All rights reserved. .\" Copyright 2006-2008 Sun Microsystems, Inc. .\" Copyright (c) 1996 Thinking Machines Corporation @@ -13,8 +13,8 @@ .SH C Syntax .nf #include -int MPI_Fetch_and_op(const void *\fIorigin_addr\fP, void *\fresult_addr\fP, - MPI_Datatype \fdatatype\fP, int \fItarget_rank\fP, MPI_Aint \fItarget_disp\fP, +int MPI_Fetch_and_op(const void *\fIorigin_addr\fP, void *\fIresult_addr\fP, + MPI_Datatype \fIdatatype\fP, int \fItarget_rank\fP, MPI_Aint \fItarget_disp\fP, MPI_Op \fIop\fP, MPI_Win \fIwin\fP) .fi diff --git a/ompi/mpi/man/man3/MPI_File_call_errhandler.3in b/ompi/mpi/man/man3/MPI_File_call_errhandler.3in index c3ddb674fe3..4b9e356a5c0 100644 --- a/ompi/mpi/man/man3/MPI_File_call_errhandler.3in +++ b/ompi/mpi/man/man3/MPI_File_call_errhandler.3in @@ -45,7 +45,7 @@ MPI error code (integer). .ft R .TP 1.4i IERROR -Fortran only: Error status (integer). +Fortran only: Error status (integer). .SH DESCRIPTION .ft R diff --git a/ompi/mpi/man/man3/MPI_File_close.3in b/ompi/mpi/man/man3/MPI_File_close.3in index 37e70279e60..b55c84730e0 100644 --- a/ompi/mpi/man/man3/MPI_File_close.3in +++ b/ompi/mpi/man/man3/MPI_File_close.3in @@ -2,6 +2,8 @@ .\" Copyright 2010 Cisco Systems, Inc. All rights reserved. .\" Copyright 2006-2008 Sun Microsystems, Inc. .\" Copyright (c) 1996 Thinking Machines Corporation +.\" Copyright 2015 Research Organization for Information Science +.\" and Technology (RIST). All rights reserved. .\" $COPYRIGHT$ .TH MPI_File_close 3 "#OMPI_DATE#" "#PACKAGE_VERSION#" "#PACKAGE_NAME#" .SH NAME @@ -10,16 +12,17 @@ .SH SYNTAX .ft R .nf -C Syntax - #include - int MPI_File_close(MPI_File \fI*fh\fP) +.SH C Syntax +#include +int MPI_File_close(MPI_File \fI*fh\fP) .fi .SH Fortran Syntax .nf - INCLUDE 'mpif.h' - MPI_FILE_CLOSE(\fIFH\fP,\fI IERROR\fP) - INTEGER FH, IERROR +USE MPI +! or the older form: INCLUDE 'mpif.h' +MPI_FILE_CLOSE(\fIFH\fP, \fIIERROR\fP) + INTEGER \fIFH, IERROR\fP .fi .SH C++ Syntax @@ -31,20 +34,20 @@ void MPI::File::Close() .SH INPUT/OUTPUT PARAMETER .ft R .TP 1i -fh +fh File handle (handle). .SH OUTPUT PARAMETER .ft R .TP 1i IERROR -Fortran only: Error status (integer). +Fortran only: Error status (integer). .SH DESCRIPTION .ft R MPI_File_close first synchronizes file state, then closes the file -associated with -.I fh. +associated with +.I fh. MPI_File_close is a collective routine. The user is responsible for ensuring that all outstanding requests associated with .I fh @@ -54,6 +57,6 @@ have completed before calling MPI_File_close. Almost all MPI routines return an error value; C routines as the value of the function and Fortran routines in the last argument. C++ functions do not return errors. If the default error handler is set to MPI::ERRORS_THROW_EXCEPTIONS, then on error the C++ exception mechanism will be used to throw an MPI::Exception object. .sp Before the error value is returned, the current MPI error handler is -called. For MPI I/O function errors, the default error handler is set to MPI_ERRORS_RETURN. The error handler may be changed with MPI_File_set_errhandler; the predefined error handler MPI_ERRORS_ARE_FATAL may be used to make I/O errors fatal. Note that MPI does not guarantee that an MPI program can continue past an error. +called. For MPI I/O function errors, the default error handler is set to MPI_ERRORS_RETURN. The error handler may be changed with MPI_File_set_errhandler; the predefined error handler MPI_ERRORS_ARE_FATAL may be used to make I/O errors fatal. Note that MPI does not guarantee that an MPI program can continue past an error. diff --git a/ompi/mpi/man/man3/MPI_File_create_errhandler.3in b/ompi/mpi/man/man3/MPI_File_create_errhandler.3in index 46b81c3f3a2..f917634c6f4 100644 --- a/ompi/mpi/man/man3/MPI_File_create_errhandler.3in +++ b/ompi/mpi/man/man3/MPI_File_create_errhandler.3in @@ -5,7 +5,7 @@ .\" $COPYRIGHT$ .TH MPI_File_create_errhandler 3 "#OMPI_DATE#" "#PACKAGE_VERSION#" "#PACKAGE_NAME#" .SH NAME -\fBMPI_File_create_errhandler \fP \- Creates an MPI-style error handler that can be attached to a file. +\fBMPI_File_create_errhandler \fP \- Creates an MPI-style error handler that can be attached to a file. .SH SYNTAX .ft R @@ -54,21 +54,21 @@ MPI error handler (handle). .ft R .TP 1i IERROR -Fortran only: Error status (integer). +Fortran only: Error status (integer). .SH DESCRIPTION .ft R -Registers the user routine \fIfunction\fP for use as an MPI exception handler. Returns in errhandler a handle to the registered exception handler. +Registers the user routine \fIfunction\fP for use as an MPI exception handler. Returns in errhandler a handle to the registered exception handler. .sp In the C language, the user routine \fIfunction\fP should be a C function of type MPI_File_errhandler_function, which is defined as .sp .nf - typedef void (MPI_File_errhandler_function)(MPI_File *, int *, - \&...); + typedef void (MPI_File_errhandler_function)(MPI_File *, int *, + \&...); .fi .sp The first argument to \fIfunction\fP is the file in use. The second is the error code -to be returned by the MPI routine that raised the error. +to be returned by the MPI routine that raised the error. .sp In the Fortran language, the user routine should be of the form: .sp @@ -80,7 +80,7 @@ In the Fortran language, the user routine should be of the form: In C++, the user routine \fIfunction\fP should be of the form: .sp .nf - typedef void MPI::File::Errhandler_function(MPI::File &, int *, + typedef void MPI::File::Errhandler_function(MPI::File &, int *, ...); .fi .sp @@ -89,6 +89,6 @@ In C++, the user routine \fIfunction\fP should be of the form: Almost all MPI routines return an error value; C routines as the value of the function and Fortran routines in the last argument. C++ functions do not return errors. If the default error handler is set to MPI::ERRORS_THROW_EXCEPTIONS, then on error the C++ exception mechanism will be used to throw an MPI::Exception object. .sp Before the error value is returned, the current MPI error handler is -called. For MPI I/O function errors, the default error handler is set to MPI_ERRORS_RETURN. The error handler may be changed with MPI_File_set_errhandler; the predefined error handler MPI_ERRORS_ARE_FATAL may be used to make I/O errors fatal. Note that MPI does not guarantee that an MPI program can continue past an error. +called. For MPI I/O function errors, the default error handler is set to MPI_ERRORS_RETURN. The error handler may be changed with MPI_File_set_errhandler; the predefined error handler MPI_ERRORS_ARE_FATAL may be used to make I/O errors fatal. Note that MPI does not guarantee that an MPI program can continue past an error. diff --git a/ompi/mpi/man/man3/MPI_File_delete.3in b/ompi/mpi/man/man3/MPI_File_delete.3in index 595d1d9794c..882e6bfa9c5 100644 --- a/ompi/mpi/man/man3/MPI_File_delete.3in +++ b/ompi/mpi/man/man3/MPI_File_delete.3in @@ -3,6 +3,8 @@ .\" Copyright 2010 Cisco Systems, Inc. All rights reserved. .\" Copyright 2006-2008 Sun Microsystems, Inc. .\" Copyright (c) 1996 Thinking Machines Corporation +.\" Copyright 2015 Research Organization for Information Science +.\" and Technology (RIST). All rights reserved. .\" $COPYRIGHT$ .TH MPI_File_delete 3 "#OMPI_DATE#" "#PACKAGE_VERSION#" "#PACKAGE_NAME#" .SH NAME @@ -11,23 +13,24 @@ .SH SYNTAX .ft R .nf -C Syntax - #include - int MPI_File_delete(const char \fI*filename\fP, MPI_Info \fIinfo\fP) +.SH C Syntax +#include +int MPI_File_delete(const char \fI*filename\fP, MPI_Info \fIinfo\fP) .fi .SH Fortran Syntax .nf - INCLUDE 'mpif.h' - MPI_FILE_DELETE(\fIFILENAME\fP, \fIINFO\fP, \fIIERROR\fP) - CHARACTER*(*) \fIFILENAME\fP - INTEGER \fIINFO, IERROR\fP +USE MPI +! or the older form: INCLUDE 'mpif.h' +MPI_FILE_DELETE(\fIFILENAME\fP, \fIINFO\fP, \fIIERROR\fP) + CHARACTER*(*) \fIFILENAME\fP + INTEGER \fIINFO, IERROR\fP .fi .SH C++ Syntax .nf #include -static void MPI::File::Delete(const char* \fIfilename\fP, const +static void MPI::File::Delete(const char* \fIfilename\fP, const MPI::Info& \fIinfo\fP) .fi @@ -38,17 +41,17 @@ filename Name of file to delete (string). .TP 1i info -Info object (handle). +Info object (handle). .SH OUTPUT PARAMETER .ft R .TP 1i IERROR -Fortran only: Error status (integer). +Fortran only: Error status (integer). .SH DESCRIPTION .ft R -MPI_File_delete deletes the file identified by the file name +MPI_File_delete deletes the file identified by the file name \fIfilename\fP, provided it is not currently open by any process. It is an error to delete the file with MPI_File_delete if some process has it open, but MPI_File_delete does not check this. If the file does not exist, MPI_File_delete returns an error in the class MPI_ERR_NO_SUCH_FILE. .sp @@ -56,6 +59,6 @@ MPI_File_delete deletes the file identified by the file name Almost all MPI routines return an error value; C routines as the value of the function and Fortran routines in the last argument. C++ functions do not return errors. If the default error handler is set to MPI::ERRORS_THROW_EXCEPTIONS, then on error the C++ exception mechanism will be used to throw an MPI::Exception object. .sp Before the error value is returned, the current MPI error handler is -called. For MPI I/O function errors, the default error handler is set to MPI_ERRORS_RETURN. The error handler may be changed with MPI_File_set_errhandler; the predefined error handler MPI_ERRORS_ARE_FATAL may be used to make I/O errors fatal. Note that MPI does not guarantee that an MPI program can continue past an error. +called. For MPI I/O function errors, the default error handler is set to MPI_ERRORS_RETURN. The error handler may be changed with MPI_File_set_errhandler; the predefined error handler MPI_ERRORS_ARE_FATAL may be used to make I/O errors fatal. Note that MPI does not guarantee that an MPI program can continue past an error. diff --git a/ompi/mpi/man/man3/MPI_File_get_amode.3in b/ompi/mpi/man/man3/MPI_File_get_amode.3in index a75bc401faa..22b1932a431 100644 --- a/ompi/mpi/man/man3/MPI_File_get_amode.3in +++ b/ompi/mpi/man/man3/MPI_File_get_amode.3in @@ -2,6 +2,8 @@ .\" Copyright 2010 Cisco Systems, Inc. All rights reserved. .\" Copyright 2006-2008 Sun Microsystems, Inc. .\" Copyright (c) 1996 Thinking Machines Corporation +.\" Copyright 2015 Research Organization for Information Science +.\" and Technology (RIST). All rights reserved. .\" $COPYRIGHT$ .TH MPI_File_get_amode 3 "#OMPI_DATE#" "#PACKAGE_VERSION#" "#PACKAGE_NAME#" .SH NAME @@ -10,16 +12,17 @@ .SH SYNTAX .ft R .nf -C Syntax - #include - int MPI_File_get_amode(MPI_File \fIfh\fP, int \fI*amode\fP) +.SH C Syntax +#include +int MPI_File_get_amode(MPI_File \fIfh\fP, int \fI*amode\fP) .fi .SH Fortran Syntax .nf - INCLUDE 'mpif.h' - MPI_FILE_GET_AMODE(\fIFH\fP,\fI AMODE\fP, \fI IERROR\fP) - INTEGER FH, AMODE, IERROR +USE MPI +! or the older form: INCLUDE 'mpif.h' +MPI_FILE_GET_AMODE(\fIFH\fP, \fIAMODE\fP, \fIIERROR\fP) + INTEGER \fIFH, AMODE, IERROR\fP .fi .SH C++ Syntax @@ -31,7 +34,7 @@ int MPI::File::Get_amode() const .SH INPUT PARAMETER .ft R .TP 1i -fh +fh File handle (handle). .SH OUTPUT PARAMETERS @@ -41,19 +44,19 @@ amode File access mode used to open the file (integer). .TP 1i IERROR -Fortran only: Error status (integer). +Fortran only: Error status (integer). .SH DESCRIPTION .ft R -MPI_File_get_amode returns, in +MPI_File_get_amode returns, in .I amode, -the access mode associated with the open file +the access mode associated with the open file .I fh. .SH ERRORS Almost all MPI routines return an error value; C routines as the value of the function and Fortran routines in the last argument. C++ functions do not return errors. If the default error handler is set to MPI::ERRORS_THROW_EXCEPTIONS, then on error the C++ exception mechanism will be used to throw an MPI::Exception object. .sp Before the error value is returned, the current MPI error handler is -called. For MPI I/O function errors, the default error handler is set to MPI_ERRORS_RETURN. The error handler may be changed with MPI_File_set_errhandler; the predefined error handler MPI_ERRORS_ARE_FATAL may be used to make I/O errors fatal. Note that MPI does not guarantee that an MPI program can continue past an error. +called. For MPI I/O function errors, the default error handler is set to MPI_ERRORS_RETURN. The error handler may be changed with MPI_File_set_errhandler; the predefined error handler MPI_ERRORS_ARE_FATAL may be used to make I/O errors fatal. Note that MPI does not guarantee that an MPI program can continue past an error. diff --git a/ompi/mpi/man/man3/MPI_File_get_atomicity.3in b/ompi/mpi/man/man3/MPI_File_get_atomicity.3in index 605c70c4692..7a2c5026fab 100644 --- a/ompi/mpi/man/man3/MPI_File_get_atomicity.3in +++ b/ompi/mpi/man/man3/MPI_File_get_atomicity.3in @@ -2,6 +2,8 @@ .\" Copyright 2010 Cisco Systems, Inc. All rights reserved. .\" Copyright 2006-2008 Sun Microsystems, Inc. .\" Copyright (c) 1996 Thinking Machines Corporation +.\" Copyright 2015 Research Organization for Information Science +.\" and Technology (RIST). All rights reserved. .\" $COPYRIGHT$ .TH MPI_File_get_atomicity 3 "#OMPI_DATE#" "#PACKAGE_VERSION#" "#PACKAGE_NAME#" .SH NAME @@ -10,17 +12,18 @@ .SH SYNTAX .ft R .nf -C Syntax - #include - int MPI_File_get_atomicity(MPI_File \fIfh\fP, int \fI*flag\fP) +.SH C Syntax +#include +int MPI_File_get_atomicity(MPI_File \fIfh\fP, int \fI*flag\fP) .fi .SH Fortran Syntax .nf - INCLUDE 'mpif.h' - MPI_FILE_GET_ATOMICITY(\fIFH\fP,\fI FLAG\fP,\fI IERROR\fP) - INTEGER \FIFH, IERROR\FP - LOGICAL \FIFLAG\FP +USE MPI +! or the older form: INCLUDE 'mpif.h' +MPI_FILE_GET_ATOMICITY(\fIFH\fP, \fIFLAG\fP, \fIIERROR\fP) + INTEGER \fIFH, IERROR\fP + LOGICAL \fIFLAG\fP .fi .SH C++ Syntax @@ -42,23 +45,23 @@ flag true if atomic mode is enabled, false if nonatomic mode is enabled (boolean). .TP 1i IERROR -Fortran only: Error status (integer). +Fortran only: Error status (integer). .SH DESCRIPTION .ft R MPI_File_get_atomicity returns the current consistency semantics for data access operations on the set of file handles created by one -collective MPI_File_open. If \fIflag\fP is +collective MPI_File_open. If \fIflag\fP is .I true, -atomic mode is currently enabled; if -.I flag -is +atomic mode is currently enabled; if +.I flag +is .I false, -nonatomic mode is currently enabled. +nonatomic mode is currently enabled. .SH ERRORS Almost all MPI routines return an error value; C routines as the value of the function and Fortran routines in the last argument. C++ functions do not return errors. If the default error handler is set to MPI::ERRORS_THROW_EXCEPTIONS, then on error the C++ exception mechanism will be used to throw an MPI::Exception object. .sp Before the error value is returned, the current MPI error handler is -called. For MPI I/O function errors, the default error handler is set to MPI_ERRORS_RETURN. The error handler may be changed with MPI_File_set_errhandler; the predefined error handler MPI_ERRORS_ARE_FATAL may be used to make I/O errors fatal. Note that MPI does not guarantee that an MPI program can continue past an error. +called. For MPI I/O function errors, the default error handler is set to MPI_ERRORS_RETURN. The error handler may be changed with MPI_File_set_errhandler; the predefined error handler MPI_ERRORS_ARE_FATAL may be used to make I/O errors fatal. Note that MPI does not guarantee that an MPI program can continue past an error. diff --git a/ompi/mpi/man/man3/MPI_File_get_byte_offset.3in b/ompi/mpi/man/man3/MPI_File_get_byte_offset.3in index 1e87940934c..ff7b19545c8 100644 --- a/ompi/mpi/man/man3/MPI_File_get_byte_offset.3in +++ b/ompi/mpi/man/man3/MPI_File_get_byte_offset.3in @@ -2,6 +2,8 @@ .\" Copyright 2010 Cisco Systems, Inc. All rights reserved. .\" Copyright 2006-2008 Sun Microsystems, Inc. .\" Copyright (c) 1996 Thinking Machines Corporation +.\" Copyright 2015 Research Organization for Information Science +.\" and Technology (RIST). All rights reserved. .\" $COPYRIGHT$ .TH MPI_File_get_byte_offset 3 "#OMPI_DATE#" "#PACKAGE_VERSION#" "#PACKAGE_NAME#" .SH NAME @@ -10,31 +12,32 @@ .SH SYNTAX .ft R .nf -C Syntax - #include - int MPI_File_get_byte_offset(MPI_File \fIfh\fP, MPI_Offset \fIoffset\fP, - MPI_Offset \fI*disp\fP) +.SH C Syntax +#include +int MPI_File_get_byte_offset(MPI_File \fIfh\fP, MPI_Offset \fIoffset\fP, + MPI_Offset \fI*disp\fP) .fi .SH Fortran Syntax (see FORTRAN 77 NOTES) .nf - INCLUDE 'mpif.h' - MPI_FILE_GET_BYTE_OFFSET(\fIFH\fP, \fIOFFSET\fP, \fIDISP\fP,\fI IERROR\fP) - INTEGER \fIFH, IERROR\fP - INTEGER(KIND=MPI_OFFSET_KIND) \fIOFFSET, DISP\fP +USE MPI +! or the older form: INCLUDE 'mpif.h' +MPI_FILE_GET_BYTE_OFFSET(\fIFH\fP, \fIOFFSET\fP, \fIDISP\fP, \fIIERROR\fP) + INTEGER \fIFH, IERROR\fP + INTEGER(KIND=MPI_OFFSET_KIND) \fIOFFSET, DISP\fP .fi .SH C++ Syntax .nf #include -MPI::Offset MPI::File::Get_byte_offset(const MPI::Offset \fIdisp\fP) +MPI::Offset MPI::File::Get_byte_offset(const MPI::Offset \fIdisp\fP) const .fi .SH INPUT PARAMETERS .ft R .TP 1i -fh +fh File handle (handle). .ft R .TP 1i @@ -45,14 +48,14 @@ Offset (integer). .ft R .TP 1i disp -Absolute byte position of offset (integer). +Absolute byte position of offset (integer). .TP 1i IERROR -Fortran only: Error status (integer). +Fortran only: Error status (integer). .SH DESCRIPTION .ft R -MPI_File_get_byte_offset converts an offset specified for the current view to its corresponding displacement value, or absolute byte position, from the beginning of the file. The absolute byte position of \fIoffset\fP relative to the current view of \fIfh\fP is returned in \fIdisp\fP. +MPI_File_get_byte_offset converts an offset specified for the current view to its corresponding displacement value, or absolute byte position, from the beginning of the file. The absolute byte position of \fIoffset\fP relative to the current view of \fIfh\fP is returned in \fIdisp\fP. .SH FORTRAN 77 NOTES .ft R @@ -73,6 +76,6 @@ and gives the length of the declared integer in bytes. Almost all MPI routines return an error value; C routines as the value of the function and Fortran routines in the last argument. C++ functions do not return errors. If the default error handler is set to MPI::ERRORS_THROW_EXCEPTIONS, then on error the C++ exception mechanism will be used to throw an MPI::Exception object. .sp Before the error value is returned, the current MPI error handler is -called. For MPI I/O function errors, the default error handler is set to MPI_ERRORS_RETURN. The error handler may be changed with MPI_File_set_errhandler; the predefined error handler MPI_ERRORS_ARE_FATAL may be used to make I/O errors fatal. Note that MPI does not guarantee that an MPI program can continue past an error. +called. For MPI I/O function errors, the default error handler is set to MPI_ERRORS_RETURN. The error handler may be changed with MPI_File_set_errhandler; the predefined error handler MPI_ERRORS_ARE_FATAL may be used to make I/O errors fatal. Note that MPI does not guarantee that an MPI program can continue past an error. diff --git a/ompi/mpi/man/man3/MPI_File_get_errhandler.3in b/ompi/mpi/man/man3/MPI_File_get_errhandler.3in index 61a592887db..482c0a617cd 100644 --- a/ompi/mpi/man/man3/MPI_File_get_errhandler.3in +++ b/ompi/mpi/man/man3/MPI_File_get_errhandler.3in @@ -5,14 +5,14 @@ .\" $COPYRIGHT$ .TH MPI_File_get_errhandler 3 "#OMPI_DATE#" "#PACKAGE_VERSION#" "#PACKAGE_NAME#" .SH NAME -\fBMPI_File_get_errhandler \fP \- Gets the error handler for a file. +\fBMPI_File_get_errhandler \fP \- Gets the error handler for a file. .SH SYNTAX .ft R .SH C Syntax .nf #include -int MPI_File_get_errhandler(MPI_File \fIfile\fP, MPI_Errhandler\fI +int MPI_File_get_errhandler(MPI_File \fIfile\fP, MPI_Errhandler\fI *errhandler\fP) .fi @@ -20,7 +20,7 @@ int MPI_File_get_errhandler(MPI_File \fIfile\fP, MPI_Errhandler\fI .nf INCLUDE 'mpif.h' MPI_FILE_GET_ERRHANDLER(\fIFILE, ERRHANDLER, IERROR\fP) - INTEGER \fIFILE, ERRHANDLER, IERROR\fP + INTEGER \fIFILE, ERRHANDLER, IERROR\fP .fi .SH C++ Syntax @@ -33,7 +33,7 @@ MPI::Errhandler MPI::File::Get_errhandler() const .ft R .TP 1i file -File (handle). +File (handle). .SH OUTPUT PARAMETERS .ft R @@ -43,15 +43,15 @@ MPI error handler currently associated with file (handle). .ft R .TP 1i IERROR -Fortran only: Error status (integer). +Fortran only: Error status (integer). .SH DESCRIPTION .ft R -Returns in \fIerrhandler\fP (a handle to) the error handler that is currently associated with file \fIfile\fP. +Returns in \fIerrhandler\fP (a handle to) the error handler that is currently associated with file \fIfile\fP. .SH ERRORS Almost all MPI routines return an error value; C routines as the value of the function and Fortran routines in the last argument. C++ functions do not return errors. If the default error handler is set to MPI::ERRORS_THROW_EXCEPTIONS, then on error the C++ exception mechanism will be used to throw an MPI::Exception object. .sp Before the error value is returned, the current MPI error handler is -called. For MPI I/O function errors, the default error handler is set to MPI_ERRORS_RETURN. The error handler may be changed with MPI_File_set_errhandler; the predefined error handler MPI_ERRORS_ARE_FATAL may be used to make I/O errors fatal. Note that MPI does not guarantee that an MPI program can continue past an error. +called. For MPI I/O function errors, the default error handler is set to MPI_ERRORS_RETURN. The error handler may be changed with MPI_File_set_errhandler; the predefined error handler MPI_ERRORS_ARE_FATAL may be used to make I/O errors fatal. Note that MPI does not guarantee that an MPI program can continue past an error. diff --git a/ompi/mpi/man/man3/MPI_File_get_group.3in b/ompi/mpi/man/man3/MPI_File_get_group.3in index 5335f3f1a6b..c8c4650943c 100644 --- a/ompi/mpi/man/man3/MPI_File_get_group.3in +++ b/ompi/mpi/man/man3/MPI_File_get_group.3in @@ -2,6 +2,8 @@ .\" Copyright 2010 Cisco Systems, Inc. All rights reserved. .\" Copyright 2006-2008 Sun Microsystems, Inc. .\" Copyright (c) 1996 Thinking Machines Corporation +.\" Copyright 2015 Research Organization for Information Science +.\" and Technology (RIST). All rights reserved. .\" $COPYRIGHT$ .TH MPI_File_get_group 3 "#OMPI_DATE#" "#PACKAGE_VERSION#" "#PACKAGE_NAME#" .SH NAME @@ -10,16 +12,17 @@ .SH SYNTAX .ft R .nf -C Syntax - #include - int MPI_File_get_group(MPI_File \fIfh\fP, MPI_Group \fI*group\fP) +.SH C Syntax +#include +int MPI_File_get_group(MPI_File \fIfh\fP, MPI_Group \fI*group\fP) .fi .SH Fortran Syntax .nf - INCLUDE 'mpif.h' - MPI_FILE_GET_GROUP(\fIFH\fP,\fI GROUP\fP, \fI IERROR\fP) - INTEGER FH, GROUP, IERROR +USE MPI +! or the older form: INCLUDE 'mpif.h' +MPI_FILE_GET_GROUP(\fIFH\fP, \fIGROUP\fP, \fIIERROR\fP) + INTEGER \fIFH, GROUP, IERROR\fP .fi .SH C++ Syntax @@ -31,7 +34,7 @@ MPI::Group MPI::File::Get_group() const .SH INPUT PARAMETER .ft R .TP 1i -fh +fh File handle (handle). .SH OUTPUT PARAMETERS @@ -40,22 +43,22 @@ group Group that opened the file (handle). .TP 1i IERROR -Fortran only: Error status (integer). +Fortran only: Error status (integer). .SH DESCRIPTION .ft R MPI_File_get_group returns a duplicate of the group of the communicator -used to open the file associated with +used to open the file associated with .I fh. -The group is returned in +The group is returned in .I group. -The user is responsible for freeing -.I group, -using MPI_Group_free. +The user is responsible for freeing +.I group, +using MPI_Group_free. .SH ERRORS Almost all MPI routines return an error value; C routines as the value of the function and Fortran routines in the last argument. C++ functions do not return errors. If the default error handler is set to MPI::ERRORS_THROW_EXCEPTIONS, then on error the C++ exception mechanism will be used to throw an MPI::Exception object. .sp Before the error value is returned, the current MPI error handler is -called. For MPI I/O function errors, the default error handler is set to MPI_ERRORS_RETURN. The error handler may be changed with MPI_File_set_errhandler; the predefined error handler MPI_ERRORS_ARE_FATAL may be used to make I/O errors fatal. Note that MPI does not guarantee that an MPI program can continue past an error. +called. For MPI I/O function errors, the default error handler is set to MPI_ERRORS_RETURN. The error handler may be changed with MPI_File_set_errhandler; the predefined error handler MPI_ERRORS_ARE_FATAL may be used to make I/O errors fatal. Note that MPI does not guarantee that an MPI program can continue past an error. diff --git a/ompi/mpi/man/man3/MPI_File_get_info.3in b/ompi/mpi/man/man3/MPI_File_get_info.3in index 8946709d4d3..f533d749ef4 100644 --- a/ompi/mpi/man/man3/MPI_File_get_info.3in +++ b/ompi/mpi/man/man3/MPI_File_get_info.3in @@ -2,25 +2,28 @@ .\" Copyright 2010 Cisco Systems, Inc. All rights reserved. .\" Copyright 2006-2008 Sun Microsystems, Inc. .\" Copyright (c) 1996 Thinking Machines Corporation +.\" Copyright 2015 Research Organization for Information Science +.\" and Technology (RIST). All rights reserved. .\" $COPYRIGHT$ .TH MPI_File_get_info 3 "#OMPI_DATE#" "#PACKAGE_VERSION#" "#PACKAGE_NAME#" .SH NAME -\fBMPI_File_get_info\fP \- Returns a new info object containing values for current hints associated with a file. +\fBMPI_File_get_info\fP \- Returns a new info object containing values for current hints associated with a file. .SH SYNTAX .ft R .nf -C Syntax - #include - int MPI_File_get_info(MPI_File \fIfh\fP, MPI_Info \fI*info_used\fP) +.SH C Syntax +#include +int MPI_File_get_info(MPI_File \fIfh\fP, MPI_Info \fI*info_used\fP) .fi .SH Fortran Syntax .nf - INCLUDE 'mpif.h' - MPI_FILE_GET_INFO(\fIFH\fP, \fIINFO_USED\fP, \fIIERROR\fP) - INTEGER FH, INFO_USED, IERROR +USE MPI +! or the older form: INCLUDE 'mpif.h' +MPI_FILE_GET_INFO(\fIFH\fP, \fIINFO_USED\fP, \fIIERROR\fP) + INTEGER \fIFH, INFO_USED, IERROR\fP .fi .SH C++ Syntax @@ -32,7 +35,7 @@ MPI::Info MPI::File::Get_info() const .SH INPUT PARAMETER .ft R .TP 1i -fh +fh File handle (handle). .SH OUTPUT PARAMETERS @@ -42,26 +45,26 @@ info_used New info object (handle). .TP 1i IERROR -Fortran only: Error status (integer). +Fortran only: Error status (integer). .SH DESCRIPTION .ft R -MPI_File_get_info returns a new info object containing all the hints that the system currently associates with the file \fIfh\fP. The current setting of all hints actually used by the system related to this open file is returned in \fIinfo_used\fP. The user is responsible for freeing \fIinfo_used\fP via MPI_Info_free. +MPI_File_get_info returns a new info object containing all the hints that the system currently associates with the file \fIfh\fP. The current setting of all hints actually used by the system related to this open file is returned in \fIinfo_used\fP. The user is responsible for freeing \fIinfo_used\fP via MPI_Info_free. -Note that the set of hints returned in \fIinfo_used\fP may be greater or smaller than the set of hints passed in to MPI_File_open, MPI_File_set_view, and MPI_File_set_info, as the system may not recognize some hints set by the user, and may automatically set other hints that the user has not requested to be set. See the HINTS section for a list of hints that can be set. +Note that the set of hints returned in \fIinfo_used\fP may be greater or smaller than the set of hints passed in to MPI_File_open, MPI_File_set_view, and MPI_File_set_info, as the system may not recognize some hints set by the user, and may automatically set other hints that the user has not requested to be set. See the HINTS section for a list of hints that can be set. .SH HINTS .ft R -The following hints can be used as values for the \fIinfo_used\fP argument. +The following hints can be used as values for the \fIinfo_used\fP argument. .sp SETTABLE HINTS: .sp -- shared_file_timeout: Amount of time (in seconds) to wait for access to the +- shared_file_timeout: Amount of time (in seconds) to wait for access to the shared file pointer before exiting with MPI_ERR_TIMEDOUT. .sp -- rwlock_timeout: Amount of time (in seconds) to wait for obtaining a read or +- rwlock_timeout: Amount of time (in seconds) to wait for obtaining a read or write lock on a contiguous chunk of a UNIX file before exiting with MPI_ERR_TIMEDOUT. -.sp +.sp - noncoll_read_bufsize: Maximum size of the buffer used by MPI I/O to satisfy read requests in the noncollective data-access routines. (See NOTE, below.) @@ -85,13 +88,13 @@ I/O routines can bind an extra thread to an LWP. .sp - mpiio_coll_contiguous: (boolean) controls whether subsequent collective data accesses will request collectively contiguous regions of the file. .sp -NON-SETTABLE HINTS: +NON-SETTABLE HINTS: .sp -- filename: Access this hint to get the name of the file. +- filename: Access this hint to get the name of the file. .SH ERRORS Almost all MPI routines return an error value; C routines as the value of the function and Fortran routines in the last argument. C++ functions do not return errors. If the default error handler is set to MPI::ERRORS_THROW_EXCEPTIONS, then on error the C++ exception mechanism will be used to throw an MPI::Exception object. .sp Before the error value is returned, the current MPI error handler is -called. For MPI I/O function errors, the default error handler is set to MPI_ERRORS_RETURN. The error handler may be changed with MPI_File_set_errhandler; the predefined error handler MPI_ERRORS_ARE_FATAL may be used to make I/O errors fatal. Note that MPI does not guarantee that an MPI program can continue past an error. +called. For MPI I/O function errors, the default error handler is set to MPI_ERRORS_RETURN. The error handler may be changed with MPI_File_set_errhandler; the predefined error handler MPI_ERRORS_ARE_FATAL may be used to make I/O errors fatal. Note that MPI does not guarantee that an MPI program can continue past an error. diff --git a/ompi/mpi/man/man3/MPI_File_get_position.3in b/ompi/mpi/man/man3/MPI_File_get_position.3in index c2148477fc4..7ca60ac230a 100644 --- a/ompi/mpi/man/man3/MPI_File_get_position.3in +++ b/ompi/mpi/man/man3/MPI_File_get_position.3in @@ -2,6 +2,8 @@ .\" Copyright 2010 Cisco Systems, Inc. All rights reserved. .\" Copyright 2006-2008 Sun Microsystems, Inc. .\" Copyright (c) 1996 Thinking Machines Corporation +.\" Copyright 2015 Research Organization for Information Science +.\" and Technology (RIST). All rights reserved. .\" $COPYRIGHT$ .TH MPI_File_get_position 3 "#OMPI_DATE#" "#PACKAGE_VERSION#" "#PACKAGE_NAME#" .SH NAME @@ -10,17 +12,18 @@ .SH SYNTAX .ft R .nf -C Syntax - #include - int MPI_File_get_position(MPI_File \fIfh\fP, MPI_Offset \fI*offset\fP) +.SH C Syntax +#include +int MPI_File_get_position(MPI_File \fIfh\fP, MPI_Offset \fI*offset\fP) .fi .SH Fortran Syntax (see FORTRAN 77 NOTES) .nf - INCLUDE 'mpif.h' - MPI_FILE_GET_POSITION(\fIFH\fP,\fI OFFSET\fP,\fI IERROR\fP) - INTEGER \fIFH, IERROR\fP - INTEGER(KIND=MPI_OFFSET_KIND) \fIOFFSET\fP +USE MPI +! or the older form: INCLUDE 'mpif.h' +MPI_FILE_GET_POSITION(\fIFH\fP, \fIOFFSET\fP, \fIIERROR\fP) + INTEGER \fIFH, IERROR\fP + INTEGER(KIND=MPI_OFFSET_KIND) \fIOFFSET\fP .fi .SH C++ Syntax @@ -32,7 +35,7 @@ MPI::Offset MPI::File::Get_position() const .SH INPUT PARAMETER .ft R .TP 1i -fh +fh File handle (handle). .SH OUTPUT PARAMETERS @@ -42,15 +45,15 @@ offset Offset of the individual file pointer (integer). .TP 1i IERROR -Fortran only: Error status (integer). +Fortran only: Error status (integer). .SH DESCRIPTION .ft R -MPI_File_get_position returns, in +MPI_File_get_position returns, in .I offset, -the current position of the individual file pointer in -.I etype -units relative to the current displacement and file type. +the current position of the individual file pointer in +.I etype +units relative to the current displacement and file type. .SH FORTRAN 77 NOTES .ft R @@ -69,5 +72,5 @@ and gives the length of the declared integer in bytes. Almost all MPI routines return an error value; C routines as the value of the function and Fortran routines in the last argument. C++ functions do not return errors. If the default error handler is set to MPI::ERRORS_THROW_EXCEPTIONS, then on error the C++ exception mechanism will be used to throw an MPI::Exception object. .sp Before the error value is returned, the current MPI error handler is -called. For MPI I/O function errors, the default error handler is set to MPI_ERRORS_RETURN. The error handler may be changed with MPI_File_set_errhandler; the predefined error handler MPI_ERRORS_ARE_FATAL may be used to make I/O errors fatal. Note that MPI does not guarantee that an MPI program can continue past an error. +called. For MPI I/O function errors, the default error handler is set to MPI_ERRORS_RETURN. The error handler may be changed with MPI_File_set_errhandler; the predefined error handler MPI_ERRORS_ARE_FATAL may be used to make I/O errors fatal. Note that MPI does not guarantee that an MPI program can continue past an error. diff --git a/ompi/mpi/man/man3/MPI_File_get_position_shared.3in b/ompi/mpi/man/man3/MPI_File_get_position_shared.3in index afc927e872a..3936ec65a33 100644 --- a/ompi/mpi/man/man3/MPI_File_get_position_shared.3in +++ b/ompi/mpi/man/man3/MPI_File_get_position_shared.3in @@ -2,6 +2,8 @@ .\" Copyright 2010 Cisco Systems, Inc. All rights reserved. .\" Copyright 2006-2008 Sun Microsystems, Inc. .\" Copyright (c) 1996 Thinking Machines Corporation +.\" Copyright 2015 Research Organization for Information Science +.\" and Technology (RIST). All rights reserved. .\" $COPYRIGHT$ .TH MPI_File_get_position_shared 3 "#OMPI_DATE#" "#PACKAGE_VERSION#" "#PACKAGE_NAME#" .SH NAME @@ -10,17 +12,18 @@ .SH SYNTAX .ft R .nf -C Syntax +.SH C Syntax #include int MPI_File_get_position_shared(MPI_File \fIfh\fP, MPI_Offset \fI*offset\fP) .fi .SH Fortran Syntax (see FORTRAN 77 NOTES) .nf - INCLUDE 'mpif.h' - MPI_FILE_GET_POSITION_SHARED(\fIFH\fP,\fI OFFSET\fP,\fI IERROR\fP) - INTEGER \fIFH, IERROR\fP - INTEGER(KIND=MPI_OFFSET_KIND) \fIOFFSET\fP +USE MPI +! or the older form: INCLUDE 'mpif.h' +MPI_FILE_GET_POSITION_SHARED(\fIFH\fP, \fIOFFSET\fP, \fIIERROR\fP) + INTEGER \fIFH, IERROR\fP + INTEGER(KIND=MPI_OFFSET_KIND) \fIOFFSET\fP .fi .SH C++ Syntax @@ -32,7 +35,7 @@ MPI::Offset MPI::File::Get_position_shared() const .SH INPUT PARAMETER .ft R .TP 1i -fh +fh File handle (handle). .SH OUTPUT PARAMETERS @@ -42,15 +45,15 @@ offset Offset of the shared file pointer (integer). .TP 1i IERROR -Fortran only: Error status (integer). +Fortran only: Error status (integer). .SH DESCRIPTION .ft R -MPI_File_get_position_shared returns, in +MPI_File_get_position_shared returns, in .I offset, -the current position of the shared file pointer in -.I etype -units relative to the current displacement and file type. +the current position of the shared file pointer in +.I etype +units relative to the current displacement and file type. .SH FORTRAN 77 NOTES .ft R @@ -69,5 +72,5 @@ and gives the length of the declared integer in bytes. Almost all MPI routines return an error value; C routines as the value of the function and Fortran routines in the last argument. C++ functions do not return errors. If the default error handler is set to MPI::ERRORS_THROW_EXCEPTIONS, then on error the C++ exception mechanism will be used to throw an MPI::Exception object. .sp Before the error value is returned, the current MPI error handler is -called. For MPI I/O function errors, the default error handler is set to MPI_ERRORS_RETURN. The error handler may be changed with MPI_File_set_errhandler; the predefined error handler MPI_ERRORS_ARE_FATAL may be used to make I/O errors fatal. Note that MPI does not guarantee that an MPI program can continue past an error. +called. For MPI I/O function errors, the default error handler is set to MPI_ERRORS_RETURN. The error handler may be changed with MPI_File_set_errhandler; the predefined error handler MPI_ERRORS_ARE_FATAL may be used to make I/O errors fatal. Note that MPI does not guarantee that an MPI program can continue past an error. diff --git a/ompi/mpi/man/man3/MPI_File_get_size.3in b/ompi/mpi/man/man3/MPI_File_get_size.3in index 52c39b11cd9..40c5ac4cd02 100644 --- a/ompi/mpi/man/man3/MPI_File_get_size.3in +++ b/ompi/mpi/man/man3/MPI_File_get_size.3in @@ -2,6 +2,8 @@ .\" Copyright 2010 Cisco Systems, Inc. All rights reserved. .\" Copyright 2006-2008 Sun Microsystems, Inc. .\" Copyright (c) 1996 Thinking Machines Corporation +.\" Copyright 2015 Research Organization for Information Science +.\" and Technology (RIST). All rights reserved. .\" $COPYRIGHT$ .TH MPI_File_get_size 3 "#OMPI_DATE#" "#PACKAGE_VERSION#" "#PACKAGE_NAME#" .SH NAME @@ -10,17 +12,18 @@ .SH SYNTAX .ft R .nf -C Syntax - #include - int MPI_File_get_size(MPI_File \fIfh\fP, MPI_Offset \fI*size\fP) +.SH C Syntax +#include +int MPI_File_get_size(MPI_File \fIfh\fP, MPI_Offset \fI*size\fP) .fi .SH Fortran Syntax (see FORTRAN 77 NOTES) .nf - INCLUDE 'mpif.h' - MPI_FILE_GET_SIZE(\fIFH\fP,\fI SIZE\fP, \fI IERROR\fP) - INTEGER \fIFH, ERROR\fP - INTEGER(KIND=MPI_OFFSET_KIND) \fISIZE\fP +USE MPI +! or the older form: INCLUDE 'mpif.h' +MPI_FILE_GET_SIZE(\fIFH\fP, \fISIZE\fP, \fIIERROR\fP) + INTEGER \fIFH, ERROR\fP + INTEGER(KIND=MPI_OFFSET_KIND) \fISIZE\fP .fi .SH C++ Syntax @@ -32,7 +35,7 @@ MPI::Offset MPI::File::Get_size() const .SH INPUT PARAMETERS .ft R .TP 1i -fh +fh File handle (handle). .TP 1i size @@ -42,14 +45,14 @@ Size of the file in bytes (integer). .ft R .TP 1i IERROR -Fortran only: Error status (integer). +Fortran only: Error status (integer). .SH DESCRIPTION .ft R -MPI_File_get_size returns, in +MPI_File_get_size returns, in .I size -, the current size in bytes of the file associated with the file handle -\fIfh\fP. Note that the file size returned by Solaris may not represent the number of bytes physically allocated for the file in those cases where all bytes in this file have not been written at least once. +, the current size in bytes of the file associated with the file handle +\fIfh\fP. Note that the file size returned by Solaris may not represent the number of bytes physically allocated for the file in those cases where all bytes in this file have not been written at least once. .SH FORTRAN 77 NOTES .ft R @@ -68,7 +71,7 @@ and gives the length of the declared integer in bytes. Almost all MPI routines return an error value; C routines as the value of the function and Fortran routines in the last argument. C++ functions do not return errors. If the default error handler is set to MPI::ERRORS_THROW_EXCEPTIONS, then on error the C++ exception mechanism will be used to throw an MPI::Exception object. .sp Before the error value is returned, the current MPI error handler is -called. For MPI I/O function errors, the default error handler is set to MPI_ERRORS_RETURN. The error handler may be changed with MPI_File_set_errhandler; the predefined error handler MPI_ERRORS_ARE_FATAL may be used to make I/O errors fatal. Note that MPI does not guarantee that an MPI program can continue past an error. +called. For MPI I/O function errors, the default error handler is set to MPI_ERRORS_RETURN. The error handler may be changed with MPI_File_set_errhandler; the predefined error handler MPI_ERRORS_ARE_FATAL may be used to make I/O errors fatal. Note that MPI does not guarantee that an MPI program can continue past an error. .SH SEE ALSO .br diff --git a/ompi/mpi/man/man3/MPI_File_get_type_extent.3in b/ompi/mpi/man/man3/MPI_File_get_type_extent.3in index 5b2375b9513..7dd5ceeffa3 100644 --- a/ompi/mpi/man/man3/MPI_File_get_type_extent.3in +++ b/ompi/mpi/man/man3/MPI_File_get_type_extent.3in @@ -2,32 +2,35 @@ .\" Copyright 2010 Cisco Systems, Inc. All rights reserved. .\" Copyright 2006-2008 Sun Microsystems, Inc. .\" Copyright (c) 1996 Thinking Machines Corporation +.\" Copyright 2015 Research Organization for Information Science +.\" and Technology (RIST). All rights reserved. .\" $COPYRIGHT$ .TH MPI_File_get_type_extent 3 "#OMPI_DATE#" "#PACKAGE_VERSION#" "#PACKAGE_NAME#" .SH NAME -\fBMPI_File_get_type_extent\fP \- Returns the extent of the data type in a file. +\fBMPI_File_get_type_extent\fP \- Returns the extent of the data type in a file. .SH SYNTAX .ft R .nf -C Syntax - #include - int MPI_File_get_type_extent(MPI_File \fIfh\fP, MPI_Datatype - \fIdatatype\fP, MPI_Aint \fI*extent\fP) +.SH C Syntax +#include +int MPI_File_get_type_extent(MPI_File \fIfh\fP, MPI_Datatype + \fIdatatype\fP, MPI_Aint \fI*extent\fP) .fi .SH Fortran Syntax (see FORTRAN 77 NOTES) .nf - INCLUDE 'mpif.h' - MPI_FILE_GET_TYPE_EXTENT(\fIFH\fP, \fIDATATYPE\fP, \fIEXTENT\fP, \fI IERROR\fP) - INTEGER \fIFH, DATATYPE, IERROR\fP - INTEGER(KIND=MPI_ADDRESS_KIND) \fIEXTENT\fP +USE MPI +! or the older form: INCLUDE 'mpif.h' +MPI_FILE_GET_TYPE_EXTENT(\fIFH\fP, \fIDATATYPE\fP, \fIEXTENT\fP, \fIIERROR\fP) + INTEGER \fIFH, DATATYPE, IERROR\fP + INTEGER(KIND=MPI_ADDRESS_KIND) \fIEXTENT\fP .fi .SH C++ Syntax .nf #include -MPI::Aint MPI::File::Get_type_extent(const MPI::Datatype& +MPI::Aint MPI::File::Get_type_extent(const MPI::Datatype& \fIdatatype\fP) const .fi @@ -46,14 +49,14 @@ Data type (handle). .ft R .TP 1i extent -Data type extent (integer). +Data type extent (integer). .TP 1i IERROR -Fortran only: Error status (integer). +Fortran only: Error status (integer). .SH DESCRIPTION .ft R -MPI_File_get_type_extent can be used to calculate \fIextent\fP for \fIdatatype\fP in the file. The extent is the same for all processes accessing the file associated with \fIfh\fP. If the current view uses a user-defined data representation, MPI_File_get_type_extent uses the \fIdtype_file_extent_fn\fP callback to calculate the extent. +MPI_File_get_type_extent can be used to calculate \fIextent\fP for \fIdatatype\fP in the file. The extent is the same for all processes accessing the file associated with \fIfh\fP. If the current view uses a user-defined data representation, MPI_File_get_type_extent uses the \fIdtype_file_extent_fn\fP callback to calculate the extent. .SH FORTRAN 77 NOTES .ft R @@ -70,11 +73,11 @@ and gives the length of the declared integer in bytes. .SH NOTES .ft R -If the file data representation is other than "native," care must be taken in constructing etypes and file types. Any of the data-type constructor functions may be used; however, for those functions that accept displacements in bytes, the displacements must be specified in terms of their values in the file for the file data representation being used. MPI will interpret these byte displacements as is; no scaling will be done. The function MPI_File_get_type_extent can be used to calculate the extents of data types in the file. For etypes and file types that are portable data types, MPI will scale any displacements in the data types to match the file data representation. Data types passed as arguments to read/write routines specify the data layout in memory; therefore, they must always be constructed using displacements corresponding to displacements in memory. +If the file data representation is other than "native," care must be taken in constructing etypes and file types. Any of the data-type constructor functions may be used; however, for those functions that accept displacements in bytes, the displacements must be specified in terms of their values in the file for the file data representation being used. MPI will interpret these byte displacements as is; no scaling will be done. The function MPI_File_get_type_extent can be used to calculate the extents of data types in the file. For etypes and file types that are portable data types, MPI will scale any displacements in the data types to match the file data representation. Data types passed as arguments to read/write routines specify the data layout in memory; therefore, they must always be constructed using displacements corresponding to displacements in memory. .SH ERRORS Almost all MPI routines return an error value; C routines as the value of the function and Fortran routines in the last argument. C++ functions do not return errors. If the default error handler is set to MPI::ERRORS_THROW_EXCEPTIONS, then on error the C++ exception mechanism will be used to throw an MPI::Exception object. .sp Before the error value is returned, the current MPI error handler is -called. For MPI I/O function errors, the default error handler is set to MPI_ERRORS_RETURN. The error handler may be changed with MPI_File_set_errhandler; the predefined error handler MPI_ERRORS_ARE_FATAL may be used to make I/O errors fatal. Note that MPI does not guarantee that an MPI program can continue past an error. +called. For MPI I/O function errors, the default error handler is set to MPI_ERRORS_RETURN. The error handler may be changed with MPI_File_set_errhandler; the predefined error handler MPI_ERRORS_ARE_FATAL may be used to make I/O errors fatal. Note that MPI does not guarantee that an MPI program can continue past an error. diff --git a/ompi/mpi/man/man3/MPI_File_get_view.3in b/ompi/mpi/man/man3/MPI_File_get_view.3in index dccbd7a77c3..2ddc05c6340 100644 --- a/ompi/mpi/man/man3/MPI_File_get_view.3in +++ b/ompi/mpi/man/man3/MPI_File_get_view.3in @@ -2,6 +2,8 @@ .\" Copyright 2010 Cisco Systems, Inc. All rights reserved. .\" Copyright 2006-2008 Sun Microsystems, Inc. .\" Copyright (c) 1996 Thinking Machines Corporation +.\" Copyright 2015 Research Organization for Information Science +.\" and Technology (RIST). All rights reserved. .\" $COPYRIGHT$ .TH MPI_File_get_view 3 "#OMPI_DATE#" "#PACKAGE_VERSION#" "#PACKAGE_NAME#" .SH NAME @@ -10,21 +12,22 @@ .SH SYNTAX .ft R .nf -C Syntax - #include - int MPI_File_get_view(MPI_File \fIfh\fP, MPI_Offset \fI*disp\fP, - MPI_Datatype \fI*etype\fP, MPI_Datatype \fI*filetype\fP, - char \fI*datarep\fP) +.SH C Syntax +#include +int MPI_File_get_view(MPI_File \fIfh\fP, MPI_Offset \fI*disp\fP, + MPI_Datatype \fI*etype\fP, MPI_Datatype \fI*filetype\fP, + char \fI*datarep\fP) .fi .SH Fortran Syntax (see FORTRAN 77 NOTES) .nf - INCLUDE 'mpif.h' - MPI_FILE_GET_VIEW(\fIFH\fP,\fI DISP\fP,\fI ETYPE\fP, - \fI FILETYPE\fP, \fIDATAREP\fP, \fI IERROR\fP) - INTEGER \fIFH, ETYPE, FILETYPE, IERROR\fP - CHARACTER*(*) \fIDATAREP\fP - INTEGER(KIND=MPI_OFFSET_KIND) \fIDISP\fP +USE MPI +! or the older form: INCLUDE 'mpif.h' +MPI_FILE_GET_VIEW(\fIFH\fP, \fIDISP\fP, \fIETYPE\fP, + \fIFILETYPE\fP, \fIDATAREP\fP, \fIIERROR\fP) + INTEGER \fIFH, ETYPE, FILETYPE, IERROR\fP + CHARACTER*(*) \fIDATAREP\fP + INTEGER(KIND=MPI_OFFSET_KIND) \fIDISP\fP .fi .SH C++ Syntax @@ -44,29 +47,29 @@ File handle (handle). .SH OUTPUT PARAMETERS .ft R .TP 1i -disp +disp Displacement (integer). .TP 1i -etype -Elementary data type (handle). +etype +Elementary data type (handle). .TP 1i filetype File type (handle). See Restrictions, below. .TP 1i datarep -Data representation (string). +Data representation (string). .TP 1i IERROR -Fortran only: Error status (integer). +Fortran only: Error status (integer). .SH DESCRIPTION .ft R The MPI_File_get_view routine returns the process's view of the data in the file. The current values of the displacement, etype, and -filetype are returned in +filetype are returned in .I disp, .I etype, -and +and .I filetype, respectively. .sp @@ -89,5 +92,5 @@ and gives the length of the declared integer in bytes. Almost all MPI routines return an error value; C routines as the value of the function and Fortran routines in the last argument. C++ functions do not return errors. If the default error handler is set to MPI::ERRORS_THROW_EXCEPTIONS, then on error the C++ exception mechanism will be used to throw an MPI::Exception object. .sp Before the error value is returned, the current MPI error handler is -called. For MPI I/O function errors, the default error handler is set to MPI_ERRORS_RETURN. The error handler may be changed with MPI_File_set_errhandler; the predefined error handler MPI_ERRORS_ARE_FATAL may be used to make I/O errors fatal. Note that MPI does not guarantee that an MPI program can continue past an error. +called. For MPI I/O function errors, the default error handler is set to MPI_ERRORS_RETURN. The error handler may be changed with MPI_File_set_errhandler; the predefined error handler MPI_ERRORS_ARE_FATAL may be used to make I/O errors fatal. Note that MPI does not guarantee that an MPI program can continue past an error. diff --git a/ompi/mpi/man/man3/MPI_File_iread.3in b/ompi/mpi/man/man3/MPI_File_iread.3in index 504e746ee8c..fdf62fb9321 100644 --- a/ompi/mpi/man/man3/MPI_File_iread.3in +++ b/ompi/mpi/man/man3/MPI_File_iread.3in @@ -2,6 +2,8 @@ .\" Copyright 2010 Cisco Systems, Inc. All rights reserved. .\" Copyright 2006-2008 Sun Microsystems, Inc. .\" Copyright (c) 1996 Thinking Machines Corporation +.\" Copyright 2015 Research Organization for Information Science +.\" and Technology (RIST). All rights reserved. .\" $COPYRIGHT$ .TH MPI_File_iread 3 "#OMPI_DATE#" "#PACKAGE_VERSION#" "#PACKAGE_NAME#" .SH NAME @@ -10,18 +12,19 @@ .SH SYNTAX .ft R .nf -C Syntax - #include - int MPI_File_iread(MPI_File \fIfh\fP, void \fI*buf\fP, int \fIcount\fP, - MPI_Datatype \fIdatatype\fP, MPI_Request \fI*request\fP) +.SH C Syntax +#include +int MPI_File_iread(MPI_File \fIfh\fP, void \fI*buf\fP, int \fIcount\fP, + MPI_Datatype \fIdatatype\fP, MPI_Request \fI*request\fP) .fi .SH Fortran Syntax .nf - INCLUDE 'mpif.h' - MPI_FILE_IREAD(\fIFH\fP, \fIBUF\fP, \fICOUNT\fP, \fIDATATYPE\fP, \fIREQUEST\fP,\fI IERROR\fP) - BUF(*) - INTEGER FH, COUNT, DATATYPE, REQUEST, IERROR +USE MPI +! or the older form: INCLUDE 'mpif.h' +MPI_FILE_IREAD(\fIFH\fP, \fIBUF\fP, \fICOUNT\fP, \fIDATATYPE\fP, \fIREQUEST\fP, \fIIERROR\fP) + \fIBUF(*)\fP + INTEGER \fIFH, COUNT, DATATYPE, REQUEST, IERROR\fP .fi .SH C++ Syntax @@ -34,7 +37,7 @@ MPI::Request MPI::File::Iread(void* \fIbuf\fP, int \fIcount\fP, .SH INPUT/OUTPUT PARAMETER .ft R .TP 1i -fh +fh File handle (handle). .SH INPUT PARAMETERS @@ -58,29 +61,29 @@ request Request object (handle). .TP 1i IERROR -Fortran only: Error status (integer). +Fortran only: Error status (integer). .SH DESCRIPTION .ft R -MPI_File_iread is a nonblocking version of MPI_File_read. It attempts to read from the file associated with +MPI_File_iread is a nonblocking version of MPI_File_read. It attempts to read from the file associated with .I fh -at the current individual file pointer position maintained by the system in which a total number of +at the current individual file pointer position maintained by the system in which a total number of .I count -data items having +data items having .I datatype -type are read into the user's buffer +type are read into the user's buffer .I buf. The data is taken out of those parts of the file specified by the current view. MPI_File_iread stores the -number of data-type elements actually read in +number of data-type elements actually read in .I status. -All other fields of +All other fields of .I status -are undefined. It is erroneous to call this function if MPI_MODE_SEQUENTIAL mode was specified when the file was opened. +are undefined. It is erroneous to call this function if MPI_MODE_SEQUENTIAL mode was specified when the file was opened. .SH ERRORS Almost all MPI routines return an error value; C routines as the value of the function and Fortran routines in the last argument. C++ functions do not return errors. If the default error handler is set to MPI::ERRORS_THROW_EXCEPTIONS, then on error the C++ exception mechanism will be used to throw an MPI::Exception object. .sp Before the error value is returned, the current MPI error handler is -called. For MPI I/O function errors, the default error handler is set to MPI_ERRORS_RETURN. The error handler may be changed with MPI_File_set_errhandler; the predefined error handler MPI_ERRORS_ARE_FATAL may be used to make I/O errors fatal. Note that MPI does not guarantee that an MPI program can continue past an error. +called. For MPI I/O function errors, the default error handler is set to MPI_ERRORS_RETURN. The error handler may be changed with MPI_File_set_errhandler; the predefined error handler MPI_ERRORS_ARE_FATAL may be used to make I/O errors fatal. Note that MPI does not guarantee that an MPI program can continue past an error. diff --git a/ompi/mpi/man/man3/MPI_File_iread_all.3in b/ompi/mpi/man/man3/MPI_File_iread_all.3in new file mode 100644 index 00000000000..7095dca4485 --- /dev/null +++ b/ompi/mpi/man/man3/MPI_File_iread_all.3in @@ -0,0 +1,82 @@ +.\" -*- nroff -*- +.\" Copyright 2010 Cisco Systems, Inc. All rights reserved. +.\" Copyright 2006-2008 Sun Microsystems, Inc. +.\" Copyright (c) 1996 Thinking Machines Corporation +.\" Copyright 2015 Research Organization for Information Science +.\" and Technology (RIST). All rights reserved. +.\" $COPYRIGHT$ +.TH MPI_File_iread_all 3 "#OMPI_DATE#" "#PACKAGE_VERSION#" "#PACKAGE_NAME#" +.SH NAME +\fBMPI_File_iread_all\fP \- Reads a file starting at the location specified by the individual file pointer (nonblocking, collective). + +.SH SYNTAX +.ft R +.nf +.SH C Syntax +#include +int MPI_File_iread_all(MPI_File \fIfh\fP, void \fI*buf\fP, int \fIcount\fP, + MPI_Datatype \fIdatatype\fP, MPI_Request \fI*request\fP) + +.fi +.SH Fortran Syntax +.nf +USE MPI +! or the older form: INCLUDE 'mpif.h' +MPI_FILE_IREAD_ALL(\fIFH\fP, \fIBUF\fP, \fICOUNT\fP, \fIDATATYPE\fP, \fIREQUEST\fP, \fIIERROR\fP) + \fIBUF(*)\fP + INTEGER \fIFH, COUNT, DATATYPE, REQUEST, IERROR\fP + +.fi +.SH INPUT/OUTPUT PARAMETER +.ft R +.TP 1i +fh +File handle (handle). + +.SH INPUT PARAMETERS +.ft R +.TP 1i +count +Number of elements in the buffer (integer). +.ft R +.TP 1i +datatype +Data type of each buffer element (handle). + +.SH OUTPUT PARAMETERS +.ft R +.TP 1i +buf +Initial address of buffer (choice). +.ft R +.TP 1i +request +Request object (handle). +.TP 1i +IERROR +Fortran only: Error status (integer). + +.SH DESCRIPTION +.ft R +MPI_File_iread_all is a nonblocking version of MPI_File_read_all. It attempts to read from the file associated with +.I fh +at the current individual file pointer position maintained by the system in which a total number of +.I count +data items having +.I datatype +type are read into the user's buffer +.I buf. +The data is taken out of those parts of the +file specified by the current view. MPI_File_iread_all stores the +number of data-type elements actually read in +.I status. +All other fields of +.I status +are undefined. It is erroneous to call this function if MPI_MODE_SEQUENTIAL mode was specified when the file was opened. + +.SH ERRORS +Almost all MPI routines return an error value; C routines as the value of the function and Fortran routines in the last argument. C++ functions do not return errors. If the default error handler is set to MPI::ERRORS_THROW_EXCEPTIONS, then on error the C++ exception mechanism will be used to throw an MPI::Exception object. +.sp +Before the error value is returned, the current MPI error handler is +called. For MPI I/O function errors, the default error handler is set to MPI_ERRORS_RETURN. The error handler may be changed with MPI_File_set_errhandler; the predefined error handler MPI_ERRORS_ARE_FATAL may be used to make I/O errors fatal. Note that MPI does not guarantee that an MPI program can continue past an error. + diff --git a/ompi/mpi/man/man3/MPI_File_iread_at.3in b/ompi/mpi/man/man3/MPI_File_iread_at.3in index ab00512e49e..43ad8ecd081 100644 --- a/ompi/mpi/man/man3/MPI_File_iread_at.3in +++ b/ompi/mpi/man/man3/MPI_File_iread_at.3in @@ -2,28 +2,31 @@ .\" Copyright 2010 Cisco Systems, Inc. All rights reserved. .\" Copyright 2006-2008 Sun Microsystems, Inc. .\" Copyright (c) 1996 Thinking Machines Corporation +.\" Copyright 2015 Research Organization for Information Science +.\" and Technology (RIST). All rights reserved. .\" $COPYRIGHT$ .TH MPI_File_iread_at 3 "#OMPI_DATE#" "#PACKAGE_VERSION#" "#PACKAGE_NAME#" .SH NAME -\fBMPI_File_iread_at\fP \- Reads a file at an explicitly specified offset (nonblocking, noncollective). +\fBMPI_File_iread_at\fP \- Reads a file at an explicitly specified offset (nonblocking, noncollective). .SH SYNTAX .ft R .nf -C Syntax - #include - int MPI_File_iread_at(MPI_File \fIfh\fP, MPI_Offset \fIoffset\fP, - void \fI*buf\fP, int \fIcount\fP, MPI_Datatype \fIdatatype\fP, - MPI_Request \fI*request\fP) +.SH C Syntax +#include +int MPI_File_iread_at(MPI_File \fIfh\fP, MPI_Offset \fIoffset\fP, + void \fI*buf\fP, int \fIcount\fP, MPI_Datatype \fIdatatype\fP, + MPI_Request \fI*request\fP) .fi .SH Fortran Syntax (see FORTRAN 77 NOTES) .nf - INCLUDE 'mpif.h' - MPI_FILE_IREAD_AT(\fIFH\fP, \fIOFFSET\fP, \fIBUF\fP, \fICOUNT\fP, \fIDATATYPE\fP, \fIREQUEST\fP, \fIIERROR\fP) - \fIBUF\fP(*) - INTEGER \fIFH, COUNT, DATATYPE, REQUEST, IERROR\fP - INTEGER(KIND=MPI_OFFSET_KIND) \fIOFFSET\fP +USE MPI +! or the older form: INCLUDE 'mpif.h' +MPI_FILE_IREAD_AT(\fIFH\fP, \fIOFFSET\fP, \fIBUF\fP, \fICOUNT\fP, \fIDATATYPE\fP, \fIREQUEST\fP, \fIIERROR\fP) + \fIBUF\fP(*) + INTEGER \fIFH, COUNT, DATATYPE, REQUEST, IERROR\fP + INTEGER(KIND=MPI_OFFSET_KIND) \fIOFFSET\fP .fi .SH C++ Syntax @@ -41,7 +44,7 @@ File handle (handle). .ft R .TP 1i offset -File offset (integer). +File offset (integer). .ft R .TP 1i count @@ -49,7 +52,7 @@ Number of elements in the buffer (integer). .ft R .TP 1i datatype -Data type of each buffer element (handle). +Data type of each buffer element (handle). .SH OUTPUT PARAMETERS .ft R @@ -62,32 +65,32 @@ request Request object (handle). .TP 1i IERROR -Fortran only: Error status (integer). +Fortran only: Error status (integer). .SH DESCRIPTION .ft R MPI_File_iread_at is the nonblocking version of MPI_File_read_at. -MPI_File_iread_at is a nonblocking routine that attempts to read from the file associated with +MPI_File_iread_at is a nonblocking routine that attempts to read from the file associated with .I fh -at the +at the .I offset -position a total number of +position a total number of .I count -data items having +data items having .I datatype -type into the user's buffer +type into the user's buffer .I buf. -The +The .I offset is in etype units relative to the current view. That is, holes are not counted when locating an offset. The data is taken out of those parts of the file specified by the current view. MPI_File_iread_at stores the -number of +number of .I datatype -elements actually read in +elements actually read in .I status. -All other fields of +All other fields of .I status are undefined. @@ -108,5 +111,5 @@ and gives the length of the declared integer in bytes. Almost all MPI routines return an error value; C routines as the value of the function and Fortran routines in the last argument. C++ functions do not return errors. If the default error handler is set to MPI::ERRORS_THROW_EXCEPTIONS, then on error the C++ exception mechanism will be used to throw an MPI::Exception object. .sp Before the error value is returned, the current MPI error handler is -called. For MPI I/O function errors, the default error handler is set to MPI_ERRORS_RETURN. The error handler may be changed with MPI_File_set_errhandler; the predefined error handler MPI_ERRORS_ARE_FATAL may be used to make I/O errors fatal. Note that MPI does not guarantee that an MPI program can continue past an error. +called. For MPI I/O function errors, the default error handler is set to MPI_ERRORS_RETURN. The error handler may be changed with MPI_File_set_errhandler; the predefined error handler MPI_ERRORS_ARE_FATAL may be used to make I/O errors fatal. Note that MPI does not guarantee that an MPI program can continue past an error. diff --git a/ompi/mpi/man/man3/MPI_File_iread_at_all.3in b/ompi/mpi/man/man3/MPI_File_iread_at_all.3in new file mode 100644 index 00000000000..25b0250afa5 --- /dev/null +++ b/ompi/mpi/man/man3/MPI_File_iread_at_all.3in @@ -0,0 +1,108 @@ +.\" -*- nroff -*- +.\" Copyright 2010 Cisco Systems, Inc. All rights reserved. +.\" Copyright 2006-2008 Sun Microsystems, Inc. +.\" Copyright (c) 1996 Thinking Machines Corporation +.\" Copyright 2015 Research Organization for Information Science +.\" and Technology (RIST). All rights reserved. +.\" $COPYRIGHT$ +.TH MPI_File_iread_at_all 3 "#OMPI_DATE#" "#PACKAGE_VERSION#" "#PACKAGE_NAME#" +.SH NAME +\fBMPI_File_iread_at_all\fP \- Reads a file at an explicitly specified offset (nonblocking, collective). + +.SH SYNTAX +.ft R +.nf +.SH C Syntax +#include +int MPI_File_iread_at_all(MPI_File \fIfh\fP, MPI_Offset \fIoffset\fP, + void \fI*buf\fP, int \fIcount\fP, MPI_Datatype \fIdatatype\fP, + MPI_Request \fI*request\fP) + +.fi +.SH Fortran Syntax (see FORTRAN 77 NOTES) +.nf +USE MPI +! or the older form: INCLUDE 'mpif.h' +MPI_FILE_IREAD_AT_ALL(\fIFH\fP, \fIOFFSET\fP, \fIBUF\fP, \fICOUNT\fP, \fIDATATYPE\fP, \fIREQUEST\fP, \fIIERROR\fP) + \fIBUF\fP(*) + INTEGER \fIFH, COUNT, DATATYPE, REQUEST, IERROR\fP + INTEGER(KIND=MPI_OFFSET_KIND) \fIOFFSET\fP + +.fi +.SH INPUT PARAMETERS +.ft R +.TP 1i +fh +File handle (handle). +.ft R +.TP 1i +offset +File offset (integer). +.ft R +.TP 1i +count +Number of elements in the buffer (integer). +.ft R +.TP 1i +datatype +Data type of each buffer element (handle). + +.SH OUTPUT PARAMETERS +.ft R +.TP 1i +buf +Initial address of the buffer (choice). +.ft R +.TP 1i +request +Request object (handle). +.TP 1i +IERROR +Fortran only: Error status (integer). + +.SH DESCRIPTION +.ft R +MPI_File_iread_at_all is the nonblocking version of MPI_File_read_at_all. + +MPI_File_iread_at_all is a nonblocking routine that attempts to read from the file associated with +.I fh +at the +.I offset +position a total number of +.I count +data items having +.I datatype +type into the user's buffer +.I buf. +The +.I offset +is in etype units relative to the current view. That is, holes are not counted +when locating an offset. The data is taken out of those parts of the +file specified by the current view. MPI_File_iread_at_all stores the +number of +.I datatype +elements actually read in +.I status. +All other fields of +.I status +are undefined. + +.SH FORTRAN 77 NOTES +.ft R +The MPI standard prescribes portable Fortran syntax for +the \fIOFFSET\fP argument only for Fortran 90. Sun FORTRAN 77 +users may use the non-portable syntax +.sp +.nf + INTEGER*MPI_OFFSET_KIND \fIOFFSET\fP +.fi +.sp +where MPI_OFFSET_KIND is a constant defined in mpif.h +and gives the length of the declared integer in bytes. + +.SH ERRORS +Almost all MPI routines return an error value; C routines as the value of the function and Fortran routines in the last argument. C++ functions do not return errors. If the default error handler is set to MPI::ERRORS_THROW_EXCEPTIONS, then on error the C++ exception mechanism will be used to throw an MPI::Exception object. +.sp +Before the error value is returned, the current MPI error handler is +called. For MPI I/O function errors, the default error handler is set to MPI_ERRORS_RETURN. The error handler may be changed with MPI_File_set_errhandler; the predefined error handler MPI_ERRORS_ARE_FATAL may be used to make I/O errors fatal. Note that MPI does not guarantee that an MPI program can continue past an error. + diff --git a/ompi/mpi/man/man3/MPI_File_iread_shared.3in b/ompi/mpi/man/man3/MPI_File_iread_shared.3in index 37410e28fc0..ad31801c01a 100644 --- a/ompi/mpi/man/man3/MPI_File_iread_shared.3in +++ b/ompi/mpi/man/man3/MPI_File_iread_shared.3in @@ -2,6 +2,8 @@ .\" Copyright 2010 Cisco Systems, Inc. All rights reserved. .\" Copyright 2006-2008 Sun Microsystems, Inc. .\" Copyright (c) 1996 Thinking Machines Corporation +.\" Copyright 2015 Research Organization for Information Science +.\" and Technology (RIST). All rights reserved. .\" $COPYRIGHT$ .TH MPI_File_iread_shared 3 "#OMPI_DATE#" "#PACKAGE_VERSION#" "#PACKAGE_NAME#" .SH NAME @@ -10,18 +12,19 @@ .SH SYNTAX .ft R .nf -C Syntax - #include - int MPI_File_iread_shared(MPI_File \fIfh\fP, void \fI*buf\fP, int \fIcount\fP, - MPI_Datatype \fIdatatype\fP, MPI_Request \fI*request\fP) +.SH C Syntax +#include +int MPI_File_iread_shared(MPI_File \fIfh\fP, void \fI*buf\fP, int \fIcount\fP, + MPI_Datatype \fIdatatype\fP, MPI_Request \fI*request\fP) .fi .SH Fortran Syntax .nf - INCLUDE 'mpif.h' - MPI_FILE_IREAD_SHARED(\fIFH\fP, \fIBUF\fP, \fICOUNT\fP, \fIDATATYPE\fP, \fIREQUEST\fP,\fI IERROR\fP) - BUF(*) - INTEGER FH, COUNT, DATATYPE, REQUEST, IERROR +USE MPI +! or the older form: INCLUDE 'mpif.h' +MPI_FILE_IREAD_SHARED(\fIFH\fP, \fIBUF\fP, \fICOUNT\fP, \fIDATATYPE\fP, \fIREQUEST\fP, \fIIERROR\fP) + \fIBUF(*)\fP + INTEGER \fIFH, COUNT, DATATYPE, REQUEST, IERROR\fP .fi .SH C++ Syntax @@ -34,7 +37,7 @@ MPI::Request MPI::File::Iread_shared(void* \fIbuf\fP, int \fIcount\fP, .SH INPUT/OUTPUT PARAMETER .ft R .TP 1i -fh +fh File handle (handle). .SH INPUT PARAMETERS @@ -58,15 +61,15 @@ request Request object (handle). .TP 1i IERROR -Fortran only: Error status (integer). +Fortran only: Error status (integer). .SH DESCRIPTION .ft R -MPI_File_iread_shared is a nonblocking version of the MPI_File_read_shared interface. It uses the shared file pointer to read files. The order of serialization among the processors is not deterministic for this noncollective routine, so you need to use other methods of synchronization to impose a particular order among processors. +MPI_File_iread_shared is a nonblocking version of the MPI_File_read_shared interface. It uses the shared file pointer to read files. The order of serialization among the processors is not deterministic for this noncollective routine, so you need to use other methods of synchronization to impose a particular order among processors. .SH ERRORS Almost all MPI routines return an error value; C routines as the value of the function and Fortran routines in the last argument. C++ functions do not return errors. If the default error handler is set to MPI::ERRORS_THROW_EXCEPTIONS, then on error the C++ exception mechanism will be used to throw an MPI::Exception object. .sp Before the error value is returned, the current MPI error handler is -called. For MPI I/O function errors, the default error handler is set to MPI_ERRORS_RETURN. The error handler may be changed with MPI_File_set_errhandler; the predefined error handler MPI_ERRORS_ARE_FATAL may be used to make I/O errors fatal. Note that MPI does not guarantee that an MPI program can continue past an error. +called. For MPI I/O function errors, the default error handler is set to MPI_ERRORS_RETURN. The error handler may be changed with MPI_File_set_errhandler; the predefined error handler MPI_ERRORS_ARE_FATAL may be used to make I/O errors fatal. Note that MPI does not guarantee that an MPI program can continue past an error. diff --git a/ompi/mpi/man/man3/MPI_File_iwrite.3in b/ompi/mpi/man/man3/MPI_File_iwrite.3in index d0df7ea019f..0a761002fe3 100644 --- a/ompi/mpi/man/man3/MPI_File_iwrite.3in +++ b/ompi/mpi/man/man3/MPI_File_iwrite.3in @@ -3,6 +3,8 @@ .\" Copyright 2010 Cisco Systems, Inc. All rights reserved. .\" Copyright 2006-2008 Sun Microsystems, Inc. .\" Copyright (c) 1996 Thinking Machines Corporation +.\" Copyright 2015 Research Organization for Information Science +.\" and Technology (RIST). All rights reserved. .\" $COPYRIGHT$ .TH MPI_File_iwrite 3 "#OMPI_DATE#" "#PACKAGE_VERSION#" "#PACKAGE_NAME#" .SH NAME @@ -11,18 +13,19 @@ .SH SYNTAX .ft R .nf -C Syntax - #include - int MPI_File_iwrite(MPI_File \fIfh\fP, const void \fI*buf\fP, int \fIcount\fP, - MPI_Datatype \fIdatatype\fP, MPI_Request \fI*request\fP) +.SH C Syntax +#include +int MPI_File_iwrite(MPI_File \fIfh\fP, const void \fI*buf\fP, int \fIcount\fP, + MPI_Datatype \fIdatatype\fP, MPI_Request \fI*request\fP) .fi .SH Fortran Syntax .nf - INCLUDE 'mpif.h' - MPI_FILE_IWRITE(\fIFH\fP, \fIBUF\fP, \fICOUNT\fP, \fIDATATYPE\fP, \fIREQUEST\fP,\fI IERROR\fP) - BUF(*) - INTEGER FH, COUNT, DATATYPE, REQUEST, IERROR +USE MPI +! or the older form: INCLUDE 'mpif.h' +MPI_FILE_IWRITE(\fIFH\fP, \fIBUF\fP, \fICOUNT\fP, \fIDATATYPE\fP, \fIREQUEST\fP, \fIIERROR\fP) + \fIBUF(*)\fP + INTEGER \fIFH, COUNT, DATATYPE, REQUEST, IERROR\fP .fi .SH C++ Syntax @@ -35,7 +38,7 @@ MPI::Request MPI::File::Iwrite(const void* \fIbuf\fP, int \fIcount\fP, .SH INPUT/OUTPUT PARAMETER .ft R .TP 1i -fh +fh File handle (handle). .SH INPUT PARAMETERS @@ -56,36 +59,36 @@ Data type of each buffer element (handle). .ft R .TP 1i request -Request object (handle). +Request object (handle). .TP 1i IERROR -Fortran only: Error status (integer). +Fortran only: Error status (integer). .SH DESCRIPTION .ft R -MPI_File_iwrite is a nonblocking version of the MPI_File_write interface. It attempts to write into the file associated with +MPI_File_iwrite is a nonblocking version of the MPI_File_write interface. It attempts to write into the file associated with .I fh -(at the current individual file pointer position maintained by the system) a total number of -.I count +(at the current individual file pointer position maintained by the system) a total number of +.I count data items having -.I datatype -type from the user's buffer +.I datatype +type from the user's buffer .I buf. The data is written into those parts of the file specified by the current view. MPI_File_iwrite stores the -number of -.I datatype -elements actually written in -.I status. -All other fields of -.I status +number of +.I datatype +elements actually written in +.I status. +All other fields of +.I status are undefined. .sp -It is erroneous to call this function if MPI_MODE_SEQUENTIAL mode was specified when the file was open. +It is erroneous to call this function if MPI_MODE_SEQUENTIAL mode was specified when the file was open. .SH ERRORS Almost all MPI routines return an error value; C routines as the value of the function and Fortran routines in the last argument. C++ functions do not return errors. If the default error handler is set to MPI::ERRORS_THROW_EXCEPTIONS, then on error the C++ exception mechanism will be used to throw an MPI::Exception object. .sp Before the error value is returned, the current MPI error handler is -called. For MPI I/O function errors, the default error handler is set to MPI_ERRORS_RETURN. The error handler may be changed with MPI_File_set_errhandler; the predefined error handler MPI_ERRORS_ARE_FATAL may be used to make I/O errors fatal. Note that MPI does not guarantee that an MPI program can continue past an error. +called. For MPI I/O function errors, the default error handler is set to MPI_ERRORS_RETURN. The error handler may be changed with MPI_File_set_errhandler; the predefined error handler MPI_ERRORS_ARE_FATAL may be used to make I/O errors fatal. Note that MPI does not guarantee that an MPI program can continue past an error. diff --git a/ompi/mpi/man/man3/MPI_File_iwrite_all.3in b/ompi/mpi/man/man3/MPI_File_iwrite_all.3in new file mode 100644 index 00000000000..efc5738b8e3 --- /dev/null +++ b/ompi/mpi/man/man3/MPI_File_iwrite_all.3in @@ -0,0 +1,87 @@ +.\" -*- nroff -*- +.\" Copyright 2013 Los Alamos National Security, LLC. All rights reserved. +.\" Copyright 2010 Cisco Systems, Inc. All rights reserved. +.\" Copyright 2006-2008 Sun Microsystems, Inc. +.\" Copyright (c) 1996 Thinking Machines Corporation +.\" Copyright 2015 Research Organization for Information Science +.\" and Technology (RIST). All rights reserved. +.\" $COPYRIGHT$ +.TH MPI_File_iwrite_all 3 "#OMPI_DATE#" "#PACKAGE_VERSION#" "#PACKAGE_NAME#" +.SH NAME +\fBMPI_File_iwrite_all\fP \- Writes a file starting at the location specified by the individual file pointer (nonblocking, collective). + +.SH SYNTAX +.ft R +.nf +.SH C Syntax +#include +int MPI_File_iwrite_all(MPI_File \fIfh\fP, const void \fI*buf\fP, int \fIcount\fP, + MPI_Datatype \fIdatatype\fP, MPI_Request \fI*request\fP) + +.fi +.SH Fortran Syntax +.nf +USE MPI +! or the older form: INCLUDE 'mpif.h' +MPI_FILE_IWRITE_ALL(\fIFH\fP, \fIBUF\fP, \fICOUNT\fP, \fIDATATYPE\fP, \fIREQUEST\fP, \fIIERROR\fP) + \fIBUF(*)\fP + INTEGER \fIFH, COUNT, DATATYPE, REQUEST, IERROR\fP + +.fi +.SH INPUT/OUTPUT PARAMETER +.ft R +.TP 1i +fh +File handle (handle). + +.SH INPUT PARAMETERS +.ft R +.TP 1i +buf +Initial address of buffer (choice). +.ft R +.TP 1i +count +Number of elements in buffer (integer). +.ft R +.TP 1i +datatype +Data type of each buffer element (handle). + +.SH OUTPUT PARAMETER +.ft R +.TP 1i +request +Request object (handle). +.TP 1i +IERROR +Fortran only: Error status (integer). + +.SH DESCRIPTION +.ft R +MPI_File_iwrite_all is a nonblocking version of the MPI_File_write_all interface. It attempts to write into the file associated with +.I fh +(at the current individual file pointer position maintained by the system) a total number of +.I count +data items having +.I datatype +type from the user's buffer +.I buf. +The data is written into those parts of the +file specified by the current view. MPI_File_iwrite_all stores the +number of +.I datatype +elements actually written in +.I status. +All other fields of +.I status +are undefined. +.sp +It is erroneous to call this function if MPI_MODE_SEQUENTIAL mode was specified when the file was open. + +.SH ERRORS +Almost all MPI routines return an error value; C routines as the value of the function and Fortran routines in the last argument. C++ functions do not return errors. If the default error handler is set to MPI::ERRORS_THROW_EXCEPTIONS, then on error the C++ exception mechanism will be used to throw an MPI::Exception object. +.sp +Before the error value is returned, the current MPI error handler is +called. For MPI I/O function errors, the default error handler is set to MPI_ERRORS_RETURN. The error handler may be changed with MPI_File_set_errhandler; the predefined error handler MPI_ERRORS_ARE_FATAL may be used to make I/O errors fatal. Note that MPI does not guarantee that an MPI program can continue past an error. + diff --git a/ompi/mpi/man/man3/MPI_File_iwrite_at.3in b/ompi/mpi/man/man3/MPI_File_iwrite_at.3in index d4fdac55474..2ab1a594e58 100644 --- a/ompi/mpi/man/man3/MPI_File_iwrite_at.3in +++ b/ompi/mpi/man/man3/MPI_File_iwrite_at.3in @@ -3,6 +3,8 @@ .\" Copyright 2010 Cisco Systems, Inc. All rights reserved. .\" Copyright 2006-2008 Sun Microsystems, Inc. .\" Copyright (c) 1996 Thinking Machines Corporation +.\" Copyright 2015 Research Organization for Information Science +.\" and Technology (RIST). All rights reserved. .\" $COPYRIGHT$ .TH MPI_File_iwrite_at 3 "#OMPI_DATE#" "#PACKAGE_VERSION#" "#PACKAGE_NAME#" .SH NAME @@ -11,19 +13,20 @@ .SH SYNTAX .ft R .nf -C Syntax - #include - int MPI_File_iwrite_at(MPI_File \fIfh\fP, MPI_Offset \fIoffset\fP, - const void \fI*buf\fP, int \fIcount\fP, MPI_Datatype \fIdatatype\fP, MPI_Request \fI*request\fP) +.SH C Syntax +#include +int MPI_File_iwrite_at(MPI_File \fIfh\fP, MPI_Offset \fIoffset\fP, + const void \fI*buf\fP, int \fIcount\fP, MPI_Datatype \fIdatatype\fP, MPI_Request \fI*request\fP) .fi .SH Fortran Syntax (see FORTRAN 77 NOTES) .nf - INCLUDE 'mpif.h' - MPI_FILE_IWRITE_AT(\fIFH\fP, \fIOFFSET\fP, \fIBUF\fP, \fICOUNT\fP, \fIDATATYPE\fP, \fIREQUEST\fP, \fIIERROR\fP) - \fIBUF\fP(*) - INTEGER \fIFH, COUNT, DATATYPE, REQUEST, IERROR\fP - INTEGER(KIND=MPI_OFFSET_KIND) \fIOFFSET\fP +USE MPI +! or the older form: INCLUDE 'mpif.h' +MPI_FILE_IWRITE_AT(\fIFH\fP, \fIOFFSET\fP, \fIBUF\fP, \fICOUNT\fP, \fIDATATYPE\fP, \fIREQUEST\fP, \fIIERROR\fP) + \fIBUF\fP(*) + INTEGER \fIFH, COUNT, DATATYPE, REQUEST, IERROR\fP + INTEGER(KIND=MPI_OFFSET_KIND) \fIOFFSET\fP .fi .SH C++ Syntax @@ -36,7 +39,7 @@ MPI::Request MPI::File::Iwrite_at(MPI::Offset \fIoffset\fP, const void* \fIbuf\f .SH INPUT/OUTPUT PARAMETER .ft R .TP 1i -fh +fh File handle (handle). .SH INPUT PARAMETERS @@ -64,34 +67,34 @@ request Request object (handle). .TP 1i IERROR -Fortran only: Error status (integer). +Fortran only: Error status (integer). .SH DESCRIPTION .ft R -MPI_File_iwrite_at is a nonblocking version of MPI_File_write_at. It attempts to write into the file associated with +MPI_File_iwrite_at is a nonblocking version of MPI_File_write_at. It attempts to write into the file associated with .I fh -(at the -.I offset -position) a total number of -.I count +(at the +.I offset +position) a total number of +.I count data items having -.I datatype -type from the user's buffer +.I datatype +type from the user's buffer .I buf. -The offset is in +The offset is in .I etype units relative to the current view. That is, holes are not counted when locating an offset. The data is written into those parts of the file specified by the current view. MPI_File_iwrite_at stores the -number of -.I datatype -elements actually written in -.I status. -All other fields of -.I status -are undefined. The request structure can be passed to MPI_Wait or MPI_Test, which will return a status with the number of bytes actually accessed. +number of +.I datatype +elements actually written in +.I status. +All other fields of +.I status +are undefined. The request structure can be passed to MPI_Wait or MPI_Test, which will return a status with the number of bytes actually accessed. .sp -It is erroneous to call this function if MPI_MODE_SEQUENTIAL mode was specified when the file was open. +It is erroneous to call this function if MPI_MODE_SEQUENTIAL mode was specified when the file was open. .SH FORTRAN 77 NOTES .ft R @@ -110,5 +113,5 @@ and gives the length of the declared integer in bytes. Almost all MPI routines return an error value; C routines as the value of the function and Fortran routines in the last argument. C++ functions do not return errors. If the default error handler is set to MPI::ERRORS_THROW_EXCEPTIONS, then on error the C++ exception mechanism will be used to throw an MPI::Exception object. .sp Before the error value is returned, the current MPI error handler is -called. For MPI I/O function errors, the default error handler is set to MPI_ERRORS_RETURN. The error handler may be changed with MPI_File_set_errhandler; the predefined error handler MPI_ERRORS_ARE_FATAL may be used to make I/O errors fatal. Note that MPI does not guarantee that an MPI program can continue past an error. +called. For MPI I/O function errors, the default error handler is set to MPI_ERRORS_RETURN. The error handler may be changed with MPI_File_set_errhandler; the predefined error handler MPI_ERRORS_ARE_FATAL may be used to make I/O errors fatal. Note that MPI does not guarantee that an MPI program can continue past an error. diff --git a/ompi/mpi/man/man3/MPI_File_iwrite_at_all.3in b/ompi/mpi/man/man3/MPI_File_iwrite_at_all.3in new file mode 100644 index 00000000000..c3477910984 --- /dev/null +++ b/ompi/mpi/man/man3/MPI_File_iwrite_at_all.3in @@ -0,0 +1,110 @@ +.\" -*- nroff -*- +.\" Copyright 2013 Los Alamos National Security, LLC. All rights reserved. +.\" Copyright 2010 Cisco Systems, Inc. All rights reserved. +.\" Copyright 2006-2008 Sun Microsystems, Inc. +.\" Copyright (c) 1996 Thinking Machines Corporation +.\" Copyright 2015 Research Organization for Information Science +.\" and Technology (RIST). All rights reserved. +.\" $COPYRIGHT$ +.TH MPI_File_iwrite_at_all 3 "#OMPI_DATE#" "#PACKAGE_VERSION#" "#PACKAGE_NAME#" +.SH NAME +\fBMPI_File_iwrite_at_all\fP \- Writes a file at an explicitly specified offset (nonblocking, collective). + +.SH SYNTAX +.ft R +.nf +.SH C Syntax +#include +int MPI_File_iwrite_at_all(MPI_File \fIfh\fP, MPI_Offset \fIoffset\fP, + const void \fI*buf\fP, int \fIcount\fP, MPI_Datatype \fIdatatype\fP, MPI_Request \fI*request\fP) + +.fi +.SH Fortran Syntax (see FORTRAN 77 NOTES) +.nf +USE MPI +! or the older form: INCLUDE 'mpif.h' +MPI_FILE_IWRITE_AT_ALL(\fIFH\fP, \fIOFFSET\fP, \fIBUF\fP, \fICOUNT\fP, \fIDATATYPE\fP, \fIREQUEST\fP, \fIIERROR\fP) + \fIBUF\fP(*) + INTEGER \fIFH, COUNT, DATATYPE, REQUEST, IERROR\fP + INTEGER(KIND=MPI_OFFSET_KIND) \fIOFFSET\fP + +.fi +.SH INPUT/OUTPUT PARAMETER +.ft R +.TP 1i +fh +File handle (handle). + +.SH INPUT PARAMETERS +.ft R +.TP 1i +offset +File offset (integer). +.ft R +.TP 1i +buf +Initial address of buffer (choice). +.ft R +.TP 1i +count +Number of elements in buffer (integer). +.ft R +.TP 1i +datatype +Data type of each buffer element (handle). + +.SH OUTPUT PARAMETERS +.ft R +.TP 1i +request +Request object (handle). +.TP 1i +IERROR +Fortran only: Error status (integer). + +.SH DESCRIPTION +.ft R +MPI_File_iwrite_at_all is a nonblocking version of MPI_File_write_at_all. It attempts to write into the file associated with +.I fh +(at the +.I offset +position) a total number of +.I count +data items having +.I datatype +type from the user's buffer +.I buf. +The offset is in +.I etype +units relative to the current view. That is, holes are not counted +when locating an offset. The data is written into those parts of the +file specified by the current view. MPI_File_iwrite_at_all stores the +number of +.I datatype +elements actually written in +.I status. +All other fields of +.I status +are undefined. The request structure can be passed to MPI_Wait or MPI_Test, which will return a status with the number of bytes actually accessed. +.sp +It is erroneous to call this function if MPI_MODE_SEQUENTIAL mode was specified when the file was open. + +.SH FORTRAN 77 NOTES +.ft R +The MPI standard prescribes portable Fortran syntax for +the \fIOFFSET\fP argument only for Fortran 90. FORTRAN 77 +users may use the non-portable syntax +.sp +.nf + INTEGER*MPI_OFFSET_KIND \fIOFFSET\fP +.fi +.sp +where MPI_OFFSET_KIND is a constant defined in mpif.h +and gives the length of the declared integer in bytes. + +.SH ERRORS +Almost all MPI routines return an error value; C routines as the value of the function and Fortran routines in the last argument. C++ functions do not return errors. If the default error handler is set to MPI::ERRORS_THROW_EXCEPTIONS, then on error the C++ exception mechanism will be used to throw an MPI::Exception object. +.sp +Before the error value is returned, the current MPI error handler is +called. For MPI I/O function errors, the default error handler is set to MPI_ERRORS_RETURN. The error handler may be changed with MPI_File_set_errhandler; the predefined error handler MPI_ERRORS_ARE_FATAL may be used to make I/O errors fatal. Note that MPI does not guarantee that an MPI program can continue past an error. + diff --git a/ompi/mpi/man/man3/MPI_File_iwrite_shared.3in b/ompi/mpi/man/man3/MPI_File_iwrite_shared.3in index 053fd5e8531..f1f332cfe2d 100644 --- a/ompi/mpi/man/man3/MPI_File_iwrite_shared.3in +++ b/ompi/mpi/man/man3/MPI_File_iwrite_shared.3in @@ -3,6 +3,8 @@ .\" Copyright 2010 Cisco Systems, Inc. All rights reserved. .\" Copyright 2006-2008 Sun Microsystems, Inc. .\" Copyright (c) 1996 Thinking Machines Corporation +.\" Copyright 2015 Research Organization for Information Science +.\" and Technology (RIST). All rights reserved. .\" $COPYRIGHT$ .TH MPI_File_iwrite_shared 3 "#OMPI_DATE#" "#PACKAGE_VERSION#" "#PACKAGE_NAME#" .SH NAME @@ -13,18 +15,19 @@ .SH SYNTAX .ft R .nf -C Syntax - #include - int MPI_File_(MPI_File \fIfh\fP, const void \fI*buf\fP, int \fIcount\fP, MPI_Datatype - \fIdatatype\fP, MPI_Request \fI*request\fP) +.SH C Syntax +#include +int MPI_File_iwrite_shared(MPI_File \fIfh\fP, const void \fI*buf\fP, int \fIcount\fP, MPI_Datatype + \fIdatatype\fP, MPI_Request \fI*request\fP) .fi .SH Fortran Syntax .nf - INCLUDE 'mpif.h' - MPI_File_(\fIFH\fP, \fIBUF\fP, \fICOUNT\fP, \fIDATATYPE\fP, \fIREQUEST\fP,\fI IERROR\fP) - BUF(*) - INTEGER FH, COUNT, DATATYPE, REQUEST, IERROR +USE MPI +! or the older form: INCLUDE 'mpif.h' +MPI_FILE_IWRITE_SHARED(\fIFH\fP, \fIBUF\fP, \fICOUNT\fP, \fIDATATYPE\fP, \fIREQUEST\fP, \fIIERROR\fP) + \fIBUF(*)\fP + INTEGER \fIFH, COUNT, DATATYPE, REQUEST, IERROR\fP .fi .SH C++ Syntax @@ -37,7 +40,7 @@ MPI::Request MPI::File::Iwrite_shared(const void* \fIbuf\fP, int \fIcount\fP, .SH INPUT/OUTPUT PARAMETER .ft R .TP 1i -fh +fh File handle (handle). .SH INPUT PARAMETERS @@ -60,15 +63,15 @@ request Request object (handle). .TP 1i IERROR -Fortran only: Error status (integer). +Fortran only: Error status (integer). .SH DESCRIPTION .ft R -MPI_File_iwrite_shared is a nonblocking routine that uses the shared file pointer to write files. The order of serialization is not deterministic for this noncollective routine, so you need to use other methods of synchronization to impose a particular order. +MPI_File_iwrite_shared is a nonblocking routine that uses the shared file pointer to write files. The order of serialization is not deterministic for this noncollective routine, so you need to use other methods of synchronization to impose a particular order. .SH ERRORS Almost all MPI routines return an error value; C routines as the value of the function and Fortran routines in the last argument. C++ functions do not return errors. If the default error handler is set to MPI::ERRORS_THROW_EXCEPTIONS, then on error the C++ exception mechanism will be used to throw an MPI::Exception object. .sp Before the error value is returned, the current MPI error handler is -called. For MPI I/O function errors, the default error handler is set to MPI_ERRORS_RETURN. The error handler may be changed with MPI_File_set_errhandler; the predefined error handler MPI_ERRORS_ARE_FATAL may be used to make I/O errors fatal. Note that MPI does not guarantee that an MPI program can continue past an error. +called. For MPI I/O function errors, the default error handler is set to MPI_ERRORS_RETURN. The error handler may be changed with MPI_File_set_errhandler; the predefined error handler MPI_ERRORS_ARE_FATAL may be used to make I/O errors fatal. Note that MPI does not guarantee that an MPI program can continue past an error. diff --git a/ompi/mpi/man/man3/MPI_File_open.3in b/ompi/mpi/man/man3/MPI_File_open.3in index 682e8b37e02..bb783d493d6 100644 --- a/ompi/mpi/man/man3/MPI_File_open.3in +++ b/ompi/mpi/man/man3/MPI_File_open.3in @@ -1,8 +1,10 @@ .\" -*- nroff -*- .\" Copyright 2013 Los Alamos National Security, LLC. All rights reserved. -.\" Copyright 2010 Cisco Systems, Inc. All rights reserved. +.\" Copyright (c) 2010-2015 Cisco Systems, Inc. All rights reserved. .\" Copyright 2006-2008 Sun Microsystems, Inc. .\" Copyright (c) 1996 Thinking Machines Corporation +.\" Copyright 2015 Research Organization for Information Science +.\" and Technology (RIST). All rights reserved. .\" $COPYRIGHT$ .TH MPI_File_open 3 "#OMPI_DATE#" "#PACKAGE_VERSION#" "#PACKAGE_NAME#" .SH NAME @@ -10,19 +12,20 @@ .SH SYNTAX .ft R .nf -C Syntax - #include - int MPI_File_open(MPI_Comm \fIcomm\fP, const char \fI*filename\fP, - int \fIamode\fP, MPI_Info \fIinfo\fP, - MPI_File \fI*fh\fP) +.SH C Syntax +#include +int MPI_File_open(MPI_Comm \fIcomm\fP, const char \fI*filename\fP, + int \fIamode\fP, MPI_Info \fIinfo\fP, + MPI_File \fI*fh\fP) .fi .SH Fortran Syntax .nf - INCLUDE 'mpif.h' - MPI_FILE_OPEN(\fICOMM\fP,\fI FILENAME\fP,\fI AMODE\fP, \fIINFO\fP,\fI FH\fP,\fI IERROR\fP) - CHARACTER*(*) FILENAME - INTEGER COMM, AMODE, INFO, FH, IERROR +USE MPI +! or the older form: INCLUDE 'mpif.h' +MPI_FILE_OPEN(\fICOMM\fP, \fIFILENAME\fP, \fIAMODE\fP, \fIINFO\fP, \fIFH\fP, \fIIERROR\fP) + CHARACTER*(*) \fIFILENAME\fP + INTEGER \fICOMM, AMODE, INFO, FH, IERROR\fP .fi .SH C++ Syntax @@ -35,85 +38,88 @@ static MPI::File MPI::File::Open(const MPI::Intracomm& \fIcomm\fP, .SH INPUT PARAMETERS .ft R .TP 1i -comm +comm Communicator (handle). .TP 1i filename -Name of file to open (string). +Name of file to open (string). .TP 1i -amode +amode File access mode (integer). .TP 1i info -Info object (handle). +Info object (handle). .SH OUTPUT PARAMETERS .ft R .TP 1i -fh +fh New file handle (handle). .TP 1i IERROR -Fortran only: Error status (integer). +Fortran only: Error status (integer). .SH DESCRIPTION .ft R -MPI_File_open opens the file identified by the filename +MPI_File_open opens the file identified by the filename .I filename -on all processes in the +on all processes in the .I comm communicator group. MPI_File_open is a collective routine; all processes must provide the same value for .I amode, and all processes must provide filenames that reference the same -file and which are textually identical. A process can open a file -independently of other processes by using the MPI_COMM_SELF -communicator. The file handle returned, +file which are textually identical (note: Open MPI I/O plugins may +have restrictions on characters that can be used in filenames. For +example, the ROMIO plugin may disallow the colon (":") character from +appearing in a filename). A process can open a file independently of +other processes by using the MPI_COMM_SELF communicator. The file +handle returned, .I fh, can be subsequently used to access the file until the file is closed using MPI_File_close. Before calling MPI_Finalize, the user is required to close (via MPI_File_close) all files that were opened with MPI_File_open. Note -that the communicator -.I comm +that the communicator +.I comm is unaffected by MPI_File_open and continues to be usable in all MPI -routines. Furthermore, use of -.I comm +routines. Furthermore, use of +.I comm will not interfere with I/O behavior. .sp -Initially, all processes view the file as a linear byte stream; that is, the -.I etype -and +Initially, all processes view the file as a linear byte stream; that is, the +.I etype +and .I filetype are both MPI_BYTE. The file view can be changed via the MPI_File_set_view routine. .sp The following access modes are supported (specified in amode, in a bit-vector OR in one of the following integer constants): .TP .5i - o + o MPI_MODE_APPEND .TP .5i - o + o MPI_MODE_CREATE -- Create the file if it does not exist. .TP .5i - o + o MPI_MODE_DELETE_ON_CLOSE .TP .5i - o -MPI_MODE_EXCL -- Error creating a file that already exists. + o +MPI_MODE_EXCL -- Error creating a file that already exists. .TP .5i - o + o MPI_MODE_RDONLY -- Read only. .TP .5i - o + o MPI_MODE_RDWR -- Reading and writing. .TP .5i - o + o MPI_MODE_SEQUENTIAL .TP .5i - o + o MPI_MODE_WRONLY -- Write only. .TP .5i - o + o MPI_MODE_UNIQUE_OPEN .RE .sp @@ -123,26 +129,26 @@ specify MPI_MODE_CREATE in conjunction with MPI_MODE_RDONLY. Errors related to the access mode are raised in the class MPI_ERR_AMODE. .sp On single-node clusters, files are opened by default using nonatomic mode file consistency -semantics. The more stringent atomic-mode consistency semantics, required for atomicity of overlapping accesses, are the default when processors in a communicator group reside on more than one node. +semantics. The more stringent atomic-mode consistency semantics, required for atomicity of overlapping accesses, are the default when processors in a communicator group reside on more than one node. This setting can be changed using MPI_File_set_atomicity. .sp -The MPI_File_open interface allows the user to pass information via the \fIinfo\fP argument. It can be set to MPI_INFO_NULL. See the HINTS section for a list of hints that can be set. +The MPI_File_open interface allows the user to pass information via the \fIinfo\fP argument. It can be set to MPI_INFO_NULL. See the HINTS section for a list of hints that can be set. .SH HINTS .ft R -The following hints can be used as values for the \fIinfo\fP argument. +The following hints can be used as values for the \fIinfo\fP argument. .sp SETTABLE HINTS: .sp - MPI_INFO_NULL .sp -- shared_file_timeout: Amount of time (in seconds) to wait for access to the +- shared_file_timeout: Amount of time (in seconds) to wait for access to the shared file pointer before exiting with MPI_ERR_TIMEDOUT. .sp -- rwlock_timeout: Amount of time (in seconds) to wait for obtaining a read or +- rwlock_timeout: Amount of time (in seconds) to wait for obtaining a read or write lock on a contiguous chunk of a UNIX file before exiting with MPI_ERR_TIMEDOUT. -.sp +.sp - noncoll_read_bufsize: Maximum size of the buffer used by MPI I/O to satisfy multiple noncontiguous read requests in the noncollective data-access routines. (See NOTE, below.) @@ -166,13 +172,13 @@ I/O routines can bind an extra thread to an LWP. .sp - mpiio_coll_contiguous: (boolean) controls whether subsequent collective data accesses will request collectively contiguous regions of the file. .sp -NON-SETTABLE HINTS: +NON-SETTABLE HINTS: .sp -- filename: Access this hint to get the name of the file. +- filename: Access this hint to get the name of the file. .SH ERRORS Almost all MPI routines return an error value; C routines as the value of the function and Fortran routines in the last argument. C++ functions do not return errors. If the default error handler is set to MPI::ERRORS_THROW_EXCEPTIONS, then on error the C++ exception mechanism will be used to throw an MPI::Exception object. .sp Before the error value is returned, the current MPI error handler is -called. For MPI I/O function errors, the default error handler is set to MPI_ERRORS_RETURN. The error handler may be changed with MPI_File_set_errhandler; the predefined error handler MPI_ERRORS_ARE_FATAL may be used to make I/O errors fatal. Note that MPI does not guarantee that an MPI program can continue past an error. +called. For MPI I/O function errors, the default error handler is set to MPI_ERRORS_RETURN. The error handler may be changed with MPI_File_set_errhandler; the predefined error handler MPI_ERRORS_ARE_FATAL may be used to make I/O errors fatal. Note that MPI does not guarantee that an MPI program can continue past an error. diff --git a/ompi/mpi/man/man3/MPI_File_preallocate.3in b/ompi/mpi/man/man3/MPI_File_preallocate.3in index 5ea0f7b475a..799cdf11002 100644 --- a/ompi/mpi/man/man3/MPI_File_preallocate.3in +++ b/ompi/mpi/man/man3/MPI_File_preallocate.3in @@ -2,25 +2,28 @@ .\" Copyright 2010 Cisco Systems, Inc. All rights reserved. .\" Copyright 2006-2008 Sun Microsystems, Inc. .\" Copyright (c) 1996 Thinking Machines Corporation +.\" Copyright 2015 Research Organization for Information Science +.\" and Technology (RIST). All rights reserved. .\" $COPYRIGHT$ .TH MPI_File_preallocate 3 "#OMPI_DATE#" "#PACKAGE_VERSION#" "#PACKAGE_NAME#" .SH NAME -\fBMPI_File_preallocate\fP \- Preallocates a specified amount of storage space at the beginning of a file (collective). +\fBMPI_File_preallocate\fP \- Preallocates a specified amount of storage space at the beginning of a file (collective). .SH SYNTAX .ft R .nf -C Syntax - #include - int MPI_File_preallocate(MPI_File \fIfh\fP, MPI_Offset \fIsize\fP) +.SH C Syntax +#include +int MPI_File_preallocate(MPI_File \fIfh\fP, MPI_Offset \fIsize\fP) .fi .SH Fortran Syntax (see FORTRAN 77 NOTES) .nf - INCLUDE 'mpif.h' - MPI_FILE_PREALLOCATE(\fIFH\fP, \fISIZE\fP, \fIIERROR\fP) - INTEGER \fIFH, IERROR\fP - INTEGER(KIND=MPI_OFFSET_KIND) \fISIZE\fP +USE MPI +! or the older form: INCLUDE 'mpif.h' +MPI_FILE_PREALLOCATE(\fIFH\fP, \fISIZE\fP, \fIIERROR\fP) + INTEGER \fIFH, IERROR\fP + INTEGER(KIND=MPI_OFFSET_KIND) \fISIZE\fP .fi .SH C++ Syntax @@ -32,26 +35,26 @@ void MPI::File::Preallocate(MPI::Offset \fIsize\fP) .SH INPUT/OUTPUT PARAMETER .ft R .TP 1i -fh +fh File handle (handle). .SH INPUT PARAMETER .ft R .TP 1i size -Size to preallocate file, in bytes (integer). +Size to preallocate file, in bytes (integer). .SH OUTPUT PARAMETER .ft R .TP 1i IERROR -Fortran only: Error status (integer). +Fortran only: Error status (integer). .SH DESCRIPTION .ft R -MPI_File_preallocate ensures that storage space is allocated for the first \fIsize\fP bytes of the file associated with \fIfh\fP. MPI_File_preallocate can be a very time-consuming operation. +MPI_File_preallocate ensures that storage space is allocated for the first \fIsize\fP bytes of the file associated with \fIfh\fP. MPI_File_preallocate can be a very time-consuming operation. -MPI_File_preallocate is collective; all processes in the group must pass identical values for \fIsize\fP. Regions of the file that have previously been written are unaffected. For newly allocated regions of the file, MPI_File_preallocate has the same effect as writing undefined data. If size is larger than the current file size, the file size increases to \fIsize\fP. If \fIsize\fP is less than or equal to the current file size, the file size is unchanged. +MPI_File_preallocate is collective; all processes in the group must pass identical values for \fIsize\fP. Regions of the file that have previously been written are unaffected. For newly allocated regions of the file, MPI_File_preallocate has the same effect as writing undefined data. If size is larger than the current file size, the file size increases to \fIsize\fP. If \fIsize\fP is less than or equal to the current file size, the file size is unchanged. The treatment of file pointers, pending nonblocking accesses, and file consistency is the same as with MPI_File_set_size. If MPI_MODE_SEQUENTIAL mode was specified when the file was opened, it is erroneous to call this routine. @@ -70,13 +73,13 @@ and gives the length of the declared integer in bytes. .SH NOTES .ft R -When using the collective routine MPI_File_set_size on a UNIX file, if the size that is set is smaller than the current file size, the file is truncated at the position defined by size. If the size is set to be larger than the current file size, the file size becomes the set size. When the file size is increased this way with MPI_File_set_size, new regions are created in the file with displacements between the old file size and the larger, newly set file size. +When using the collective routine MPI_File_set_size on a UNIX file, if the size that is set is smaller than the current file size, the file is truncated at the position defined by size. If the size is set to be larger than the current file size, the file size becomes the set size. When the file size is increased this way with MPI_File_set_size, new regions are created in the file with displacements between the old file size and the larger, newly set file size. .sp -Sun MPI I/O does not necessarily allocate file space for such new regions. You may reserve file space either by using MPI_File_preallocate or by performing a read or write to certain bytes. +Sun MPI I/O does not necessarily allocate file space for such new regions. You may reserve file space either by using MPI_File_preallocate or by performing a read or write to certain bytes. .SH ERRORS Almost all MPI routines return an error value; C routines as the value of the function and Fortran routines in the last argument. C++ functions do not return errors. If the default error handler is set to MPI::ERRORS_THROW_EXCEPTIONS, then on error the C++ exception mechanism will be used to throw an MPI::Exception object. .sp Before the error value is returned, the current MPI error handler is -called. For MPI I/O function errors, the default error handler is set to MPI_ERRORS_RETURN. The error handler may be changed with MPI_File_set_errhandler; the predefined error handler MPI_ERRORS_ARE_FATAL may be used to make I/O errors fatal. Note that MPI does not guarantee that an MPI program can continue past an error. +called. For MPI I/O function errors, the default error handler is set to MPI_ERRORS_RETURN. The error handler may be changed with MPI_File_set_errhandler; the predefined error handler MPI_ERRORS_ARE_FATAL may be used to make I/O errors fatal. Note that MPI does not guarantee that an MPI program can continue past an error. diff --git a/ompi/mpi/man/man3/MPI_File_read.3in b/ompi/mpi/man/man3/MPI_File_read.3in index 72021ec42cf..086530e3c59 100644 --- a/ompi/mpi/man/man3/MPI_File_read.3in +++ b/ompi/mpi/man/man3/MPI_File_read.3in @@ -2,6 +2,8 @@ .\" Copyright 2010 Cisco Systems, Inc. All rights reserved. .\" Copyright 2006-2008 Sun Microsystems, Inc. .\" Copyright (c) 1996 Thinking Machines Corporation +.\" Copyright 2015 Research Organization for Information Science +.\" and Technology (RIST). All rights reserved. .\" $COPYRIGHT$ .TH MPI_File_read 3 "#OMPI_DATE#" "#PACKAGE_VERSION#" "#PACKAGE_NAME#" .SH NAME @@ -10,29 +12,29 @@ .SH SYNTAX .ft R .nf -C Syntax - #include - int MPI_File_read(MPI_File \fIfh\fP, void \fI*buf\fP, - int \fIcount\fP, MPI_Datatype \fIdatatype\fP, MPI_Status \fI*status\fP) +.SH C Syntax +#include +int MPI_File_read(MPI_File \fIfh\fP, void \fI*buf\fP, + int \fIcount\fP, MPI_Datatype \fIdatatype\fP, MPI_Status \fI*status\fP) .fi .SH Fortran Syntax .nf - INCLUDE 'mpif.h' - MPI_FILE_READ(\fI FH\fP, \fI BUF\fP, \fICOUNT\fP, - \fI DATATYPE\fP, \fISTATUS\fP, \fIIERROR\fP) - BUF(*) - INTEGER FH, COUNT, DATATYPE, STATUS(MPI_STATUS_SIZE), - IERROR +USE MPI +! or the older form: INCLUDE 'mpif.h' +MPI_FILE_READ(\fIFH\fP, \fIBUF\fP, \fICOUNT\fP, + \fIDATATYPE\fP, \fISTATUS\fP, \fIIERROR\fP) + \fIBUF(*)\fP + INTEGER \fIFH, COUNT, DATATYPE, STATUS(MPI_STATUS_SIZE),IERROR\fP .fi .SH C++ Syntax .nf #include -void MPI::File::Read(void* \fIbuf\fP, int \fIcount\fP, +void MPI::File::Read(void* \fIbuf\fP, int \fIcount\fP, const MPI::Datatype& \fIdatatype\fP, MPI::Status& \fIstatus\fP) -void MPI::File::Read(void* \fIbuf\fP, int \fIcount\fP, +void MPI::File::Read(void* \fIbuf\fP, int \fIcount\fP, const MPI::Datatype& \fIdatatype\fP) .fi @@ -58,31 +60,31 @@ status Status object (status). .TP 1i IERROR -Fortran only: Error status (integer). +Fortran only: Error status (integer). .SH DESCRIPTION .ft R -MPI_File_read attempts to read from the file associated with +MPI_File_read attempts to read from the file associated with .I fh -(at the current individual file pointer position maintained by the system) a total number of +(at the current individual file pointer position maintained by the system) a total number of .I count -data items having +data items having .I datatype -type into the user's buffer +type into the user's buffer .I buf. The data is taken out of those parts of the file specified by the current view. MPI_File_read stores the -number of data-type elements actually read in +number of data-type elements actually read in .I status. -All other fields of +All other fields of .I status -are undefined. +are undefined. .sp -It is erroneous to call this function if MPI_MODE_SEQUENTIAL mode was specified when the file was opened. +It is erroneous to call this function if MPI_MODE_SEQUENTIAL mode was specified when the file was opened. .SH ERRORS Almost all MPI routines return an error value; C routines as the value of the function and Fortran routines in the last argument. C++ functions do not return errors. If the default error handler is set to MPI::ERRORS_THROW_EXCEPTIONS, then on error the C++ exception mechanism will be used to throw an MPI::Exception object. .sp Before the error value is returned, the current MPI error handler is -called. For MPI I/O function errors, the default error handler is set to MPI_ERRORS_RETURN. The error handler may be changed with MPI_File_set_errhandler; the predefined error handler MPI_ERRORS_ARE_FATAL may be used to make I/O errors fatal. Note that MPI does not guarantee that an MPI program can continue past an error. +called. For MPI I/O function errors, the default error handler is set to MPI_ERRORS_RETURN. The error handler may be changed with MPI_File_set_errhandler; the predefined error handler MPI_ERRORS_ARE_FATAL may be used to make I/O errors fatal. Note that MPI does not guarantee that an MPI program can continue past an error. diff --git a/ompi/mpi/man/man3/MPI_File_read_all.3in b/ompi/mpi/man/man3/MPI_File_read_all.3in index 12479e9ae9d..05e6a7a3023 100644 --- a/ompi/mpi/man/man3/MPI_File_read_all.3in +++ b/ompi/mpi/man/man3/MPI_File_read_all.3in @@ -2,6 +2,8 @@ .\" Copyright 2010 Cisco Systems, Inc. All rights reserved. .\" Copyright 2006-2008 Sun Microsystems, Inc. .\" Copyright (c) 1996 Thinking Machines Corporation +.\" Copyright 2015 Research Organization for Information Science +.\" and Technology (RIST). All rights reserved. .\" $COPYRIGHT$ .TH MPI_File_read_all 3 "#OMPI_DATE#" "#PACKAGE_VERSION#" "#PACKAGE_NAME#" .SH NAME @@ -10,20 +12,20 @@ .SH SYNTAX .ft R .nf -C Syntax - #include - int MPI_File_read_all(MPI_File \fIfh\fP, void \fI*buf\fP, - int \fIcount\fP, MPI_Datatype \fIdatatype\fP, MPI_Status \fI*status\fP) +.SH C Syntax +#include +int MPI_File_read_all(MPI_File \fIfh\fP, void \fI*buf\fP, + int \fIcount\fP, MPI_Datatype \fIdatatype\fP, MPI_Status \fI*status\fP) .fi .SH Fortran Syntax .nf - INCLUDE 'mpif.h' - MPI_FILE_READ_ALL(\fI FH\fP, \fI BUF\fP, \fICOUNT\fP, - \fI DATATYPE\fP, \fISTATUS\fP, \fI IERROR\fP) - BUF(*) - INTEGER FH, COUNT, DATATYPE, STATUS(MPI_STATUS_SIZE), - IERROR +USE MPI +! or the older form: INCLUDE 'mpif.h' +MPI_FILE_READ_ALL(\fIFH\fP, \fIBUF\fP, \fICOUNT\fP, + \fIDATATYPE\fP, \fISTATUS\fP, \fIIERROR\fP) + \fIBUF(*)\fP + INTEGER \fIFH, COUNT, DATATYPE, STATUS(MPI_STATUS_SIZE),IERROR\fP .fi .SH C++ Syntax @@ -58,32 +60,32 @@ status Status object (status). .TP 1i IERROR -Fortran only: Error status (integer). +Fortran only: Error status (integer). .SH DESCRIPTION .ft R -MPI_File_read_all is a collective routine that attempts to read from the file associated with +MPI_File_read_all is a collective routine that attempts to read from the file associated with .I fh -(at the current individual file pointer position maintained by the system) a total number of +(at the current individual file pointer position maintained by the system) a total number of .I count -data items having +data items having .I datatype -type into the user's buffer +type into the user's buffer .I buf. The data is taken out of those parts of the file specified by the current view. MPI_File_read_all stores the -number of data-type elements actually read in +number of data-type elements actually read in .I status. -All other fields of +All other fields of .I status -are undefined. +are undefined. .sp -It is erroneous to call this function if MPI_MODE_SEQUENTIAL mode was specified when the file was opened. +It is erroneous to call this function if MPI_MODE_SEQUENTIAL mode was specified when the file was opened. .SH ERRORS Almost all MPI routines return an error value; C routines as the value of the function and Fortran routines in the last argument. C++ functions do not return errors. If the default error handler is set to MPI::ERRORS_THROW_EXCEPTIONS, then on error the C++ exception mechanism will be used to throw an MPI::Exception object. .sp Before the error value is returned, the current MPI error handler is -called. For MPI I/O function errors, the default error handler is set to MPI_ERRORS_RETURN. The error handler may be changed with MPI_File_set_errhandler; the predefined error handler MPI_ERRORS_ARE_FATAL may be used to make I/O errors fatal. Note that MPI does not guarantee that an MPI program can continue past an error. +called. For MPI I/O function errors, the default error handler is set to MPI_ERRORS_RETURN. The error handler may be changed with MPI_File_set_errhandler; the predefined error handler MPI_ERRORS_ARE_FATAL may be used to make I/O errors fatal. Note that MPI does not guarantee that an MPI program can continue past an error. diff --git a/ompi/mpi/man/man3/MPI_File_read_all_begin.3in b/ompi/mpi/man/man3/MPI_File_read_all_begin.3in index 184dec1bac6..6eed215bc30 100644 --- a/ompi/mpi/man/man3/MPI_File_read_all_begin.3in +++ b/ompi/mpi/man/man3/MPI_File_read_all_begin.3in @@ -2,26 +2,29 @@ .\" Copyright 2010 Cisco Systems, Inc. All rights reserved. .\" Copyright 2006-2008 Sun Microsystems, Inc. .\" Copyright (c) 1996 Thinking Machines Corporation +.\" Copyright 2015 Research Organization for Information Science +.\" and Technology (RIST). All rights reserved. .\" $COPYRIGHT$ .TH MPI_File_read_all_begin 3 "#OMPI_DATE#" "#PACKAGE_VERSION#" "#PACKAGE_NAME#" .SH NAME -\fBMPI_File_read_all_begin\fP \- Reads a file starting at the locations specified by individual file pointers; beginning part of a split collective routine (nonblocking). +\fBMPI_File_read_all_begin\fP \- Reads a file starting at the locations specified by individual file pointers; beginning part of a split collective routine (nonblocking). .SH SYNTAX .ft R .nf -C Syntax - #include - int MPI_File_read_all_begin(MPI_File \fIfh\fP, void \fI*buf\fP, - int \fIcount\fP, MPI_Datatype \fIdatatype\fP) +.SH C Syntax +#include +int MPI_File_read_all_begin(MPI_File \fIfh\fP, void \fI*buf\fP, + int \fIcount\fP, MPI_Datatype \fIdatatype\fP) .fi .SH Fortran Syntax .nf - INCLUDE 'mpif.h' - MPI_FILE_READ_ALL_BEGIN(\fIFH\fP, \fIBUF\fP, \fICOUNT\fP, \fIDATATYPE\fP,\fI IERROR\fP) - \fIBUF\fP(*) - INTEGER \fIFH, COUNT, DATATYPE, IERROR\fP +USE MPI +! or the older form: INCLUDE 'mpif.h' +MPI_FILE_READ_ALL_BEGIN(\fIFH\fP, \fIBUF\fP, \fICOUNT\fP, \fIDATATYPE\fP, \fIIERROR\fP) + \fIBUF\fP(*) + INTEGER \fIFH, COUNT, DATATYPE, IERROR\fP .fi .SH C++ Syntax @@ -34,7 +37,7 @@ void MPI::File::Read_all_begin(void* \fIbuf\fP, int \fIcount\fP, .SH INPUT/OUTPUT PARAMETER .ft R .TP 1i -fh +fh File handle (handle). .SH INPUT PARAMETERS @@ -54,28 +57,28 @@ buf Initial address of buffer (choice). .TP 1i IERROR -Fortran only: Error status (integer). +Fortran only: Error status (integer). .SH DESCRIPTION .ft R -MPI_File_read_all_begin is the beginning part of a split collective operation that attempts to read from the file associated with +MPI_File_read_all_begin is the beginning part of a split collective operation that attempts to read from the file associated with .I fh -(at the current individual file pointer position maintained by the system) a total number of +(at the current individual file pointer position maintained by the system) a total number of .I count -data items having +data items having .I datatype -type into the user's buffer +type into the user's buffer .I buf. The data is taken out of those parts of the -file specified by the current view. +file specified by the current view. .SH NOTES .ft R -All the nonblocking collective routines for data access are "split" into two routines, each with _begin or _end as a suffix. These split collective routines are subject to the semantic rules described in Section 9.4.5 of the MPI-2 standard. +All the nonblocking collective routines for data access are "split" into two routines, each with _begin or _end as a suffix. These split collective routines are subject to the semantic rules described in Section 9.4.5 of the MPI-2 standard. .SH ERRORS Almost all MPI routines return an error value; C routines as the value of the function and Fortran routines in the last argument. C++ functions do not return errors. If the default error handler is set to MPI::ERRORS_THROW_EXCEPTIONS, then on error the C++ exception mechanism will be used to throw an MPI::Exception object. .sp Before the error value is returned, the current MPI error handler is -called. For MPI I/O function errors, the default error handler is set to MPI_ERRORS_RETURN. The error handler may be changed with MPI_File_set_errhandler; the predefined error handler MPI_ERRORS_ARE_FATAL may be used to make I/O errors fatal. Note that MPI does not guarantee that an MPI program can continue past an error. +called. For MPI I/O function errors, the default error handler is set to MPI_ERRORS_RETURN. The error handler may be changed with MPI_File_set_errhandler; the predefined error handler MPI_ERRORS_ARE_FATAL may be used to make I/O errors fatal. Note that MPI does not guarantee that an MPI program can continue past an error. diff --git a/ompi/mpi/man/man3/MPI_File_read_all_end.3in b/ompi/mpi/man/man3/MPI_File_read_all_end.3in index 21857c96e2f..226bc4552f6 100644 --- a/ompi/mpi/man/man3/MPI_File_read_all_end.3in +++ b/ompi/mpi/man/man3/MPI_File_read_all_end.3in @@ -2,26 +2,29 @@ .\" Copyright 2010 Cisco Systems, Inc. All rights reserved. .\" Copyright 2006-2008 Sun Microsystems, Inc. .\" Copyright (c) 1996 Thinking Machines Corporation +.\" Copyright 2015 Research Organization for Information Science +.\" and Technology (RIST). All rights reserved. .\" $COPYRIGHT$ .TH MPI_File_read_all_end 3 "#OMPI_DATE#" "#PACKAGE_VERSION#" "#PACKAGE_NAME#" .SH NAME -\fBMPI_File_read_all_end\fP \- Reads a file starting at the locations specified by individual file pointers; ending part of a split collective routine (blocking). +\fBMPI_File_read_all_end\fP \- Reads a file starting at the locations specified by individual file pointers; ending part of a split collective routine (blocking). .SH SYNTAX .ft R .nf -C Syntax - #include - int MPI_File_read_all_end(MPI_File \fIfh\fP, void \fI*buf\fP, +.SH C Syntax +#include +int MPI_File_read_all_end(MPI_File \fIfh\fP, void \fI*buf\fP, MPI_Status \fI*status\fP) .fi .SH Fortran Syntax .nf - INCLUDE 'mpif.h' - MPI_FILE_READ_ALL_END(\fIFH\fP, \fIBUF\fP, \fISTATUS\fP,\fI IERROR\fP) - BUF(*) - INTEGER FH, STATUS(MPI_STATUS_SIZE), IERROR +USE MPI +! or the older form: INCLUDE 'mpif.h' +MPI_FILE_READ_ALL_END(\fIFH\fP, \fIBUF\fP, \fISTATUS\fP, \fIIERROR\fP) + \fIBUF(*)\fP + INTEGER \fIFH, STATUS(MPI_STATUS_SIZE), IERROR\fP .fi .SH C++ Syntax @@ -35,7 +38,7 @@ void MPI::File::Read_all_end(void* \fIbuf\fP) .SH INPUT/OUTPUT PARAMETER .ft R .TP 1i -fh +fh File handle (handle). .SH OUTPUT PARAMETERS @@ -46,32 +49,32 @@ Initial address of buffer (choice). .ft R .TP 1i status -Status object (status). +Status object (status). .TP 1i IERROR -Fortran only: Error status (integer). +Fortran only: Error status (integer). .SH DESCRIPTION .ft R -MPI_File_read_all_end is the ending part of a split collective operation that stores the number of elements actually read from the file associated with +MPI_File_read_all_end is the ending part of a split collective operation that stores the number of elements actually read from the file associated with .I fh -(at the current individual file pointer position maintained by the system) -into the user's buffer +(at the current individual file pointer position maintained by the system) +into the user's buffer .I buf -in +in .I status. The data is taken out of those parts of the -file specified by the current view. All other fields of +file specified by the current view. All other fields of .I status are undefined. .SH NOTES .ft R -All the nonblocking collective routines for data access are "split" into two routines, each with _begin or _end as a suffix. These split collective routines are subject to the semantic rules described in Section 9.4.5 of the MPI-2 standard. +All the nonblocking collective routines for data access are "split" into two routines, each with _begin or _end as a suffix. These split collective routines are subject to the semantic rules described in Section 9.4.5 of the MPI-2 standard. .SH ERRORS Almost all MPI routines return an error value; C routines as the value of the function and Fortran routines in the last argument. C++ functions do not return errors. If the default error handler is set to MPI::ERRORS_THROW_EXCEPTIONS, then on error the C++ exception mechanism will be used to throw an MPI::Exception object. .sp Before the error value is returned, the current MPI error handler is -called. For MPI I/O function errors, the default error handler is set to MPI_ERRORS_RETURN. The error handler may be changed with MPI_File_set_errhandler; the predefined error handler MPI_ERRORS_ARE_FATAL may be used to make I/O errors fatal. Note that MPI does not guarantee that an MPI program can continue past an error. +called. For MPI I/O function errors, the default error handler is set to MPI_ERRORS_RETURN. The error handler may be changed with MPI_File_set_errhandler; the predefined error handler MPI_ERRORS_ARE_FATAL may be used to make I/O errors fatal. Note that MPI does not guarantee that an MPI program can continue past an error. diff --git a/ompi/mpi/man/man3/MPI_File_read_at.3in b/ompi/mpi/man/man3/MPI_File_read_at.3in index 7701de66c0e..11992746ee5 100644 --- a/ompi/mpi/man/man3/MPI_File_read_at.3in +++ b/ompi/mpi/man/man3/MPI_File_read_at.3in @@ -2,6 +2,8 @@ .\" Copyright 2010 Cisco Systems, Inc. All rights reserved. .\" Copyright 2006-2008 Sun Microsystems, Inc. .\" Copyright (c) 1996 Thinking Machines Corporation +.\" Copyright 2015 Research Organization for Information Science +.\" and Technology (RIST). All rights reserved. .\" $COPYRIGHT$ .TH MPI_File_read_at 3 "#OMPI_DATE#" "#PACKAGE_VERSION#" "#PACKAGE_NAME#" .SH NAME @@ -10,28 +12,29 @@ .SH SYNTAX .ft R .nf -C Syntax - #include - int MPI_File_read_at(MPI_File \fIfh\fP, MPI_Offset \fIoffset\fP, - void \fI*buf\fP, int \fIcount\fP, MPI_Datatype \fIdatatype\fP, +.SH C Syntax +#include +int MPI_File_read_at(MPI_File \fIfh\fP, MPI_Offset \fIoffset\fP, + void \fI*buf\fP, int \fIcount\fP, MPI_Datatype \fIdatatype\fP, MPI_Status \fI*status\fP) .fi .SH Fortran Syntax (see FORTRAN 77 NOTES) .nf - INCLUDE 'mpif.h' - MPI_FILE_READ_AT(\fIFH\fP, \fIOFFSET\fP, \fIBUF\fP, \fICOUNT\fP, - \fI DATATYPE\fP, \fISTATUS\fP, \fIIERROR\fP) - \fIBUF\fP(*) - INTEGER \fIFH, COUNT, DATATYPE, STATUS(MPI_STATUS_SIZE), IERROR\fP - INTEGER(KIND=MPI_OFFSET_KIND) \fIOFFSET\fP +USE MPI +! or the older form: INCLUDE 'mpif.h' +MPI_FILE_READ_AT(\fIFH\fP, \fIOFFSET\fP, \fIBUF\fP, \fICOUNT\fP, + \fIDATATYPE\fP, \fISTATUS\fP, \fIIERROR\fP) + \fIBUF\fP(*) + INTEGER \fIFH, COUNT, DATATYPE, STATUS(MPI_STATUS_SIZE), IERROR\fP + INTEGER(KIND=MPI_OFFSET_KIND) \fIOFFSET\fP .fi .SH C++ Syntax .nf #include void MPI::File::Read_at(MPI::Offset \fIoffset\fP, void* \fIbuf\fP, - int \fIcount\fP, const MPI::Datatype& \fIdatatype\fP, + int \fIcount\fP, const MPI::Datatype& \fIdatatype\fP, MPI::Status& \fIstatus\fP) void MPI::File::Read_at(MPI::Offset \fIoffset\fP, void* \fIbuf\fP, @@ -63,35 +66,35 @@ status Status object (status). .TP 1i IERROR -Fortran only: Error status (integer). +Fortran only: Error status (integer). .SH DESCRIPTION .ft R -MPI_File_read_at attempts to read from the file associated with +MPI_File_read_at attempts to read from the file associated with .I fh -(at the +(at the .I offset -position) a total number of +position) a total number of .I count -data items having +data items having .I datatype -type into the user's buffer +type into the user's buffer .I buf. -The +The .I offset -is in +is in .I etype units relative to the current view. That is, holes are not counted when locating an offset. The data is taken out of those parts of the file specified by the current view. MPI_File_read_at stores the -number of +number of .I datatype -elements actually read in +elements actually read in .I status. -All other fields of +All other fields of .I status -are undefined. It is erroneous to call this function if MPI_MODE_SEQUENTIAL mode was specified when the file was opened. +are undefined. It is erroneous to call this function if MPI_MODE_SEQUENTIAL mode was specified when the file was opened. .SH FORTRAN 77 NOTES .ft R @@ -110,5 +113,5 @@ and gives the length of the declared integer in bytes. Almost all MPI routines return an error value; C routines as the value of the function and Fortran routines in the last argument. C++ functions do not return errors. If the default error handler is set to MPI::ERRORS_THROW_EXCEPTIONS, then on error the C++ exception mechanism will be used to throw an MPI::Exception object. .sp Before the error value is returned, the current MPI error handler is -called. For MPI I/O function errors, the default error handler is set to MPI_ERRORS_RETURN. The error handler may be changed with MPI_File_set_errhandler; the predefined error handler MPI_ERRORS_ARE_FATAL may be used to make I/O errors fatal. Note that MPI does not guarantee that an MPI program can continue past an error. +called. For MPI I/O function errors, the default error handler is set to MPI_ERRORS_RETURN. The error handler may be changed with MPI_File_set_errhandler; the predefined error handler MPI_ERRORS_ARE_FATAL may be used to make I/O errors fatal. Note that MPI does not guarantee that an MPI program can continue past an error. diff --git a/ompi/mpi/man/man3/MPI_File_read_at_all.3in b/ompi/mpi/man/man3/MPI_File_read_at_all.3in index d6d06532f5a..b710c4fd99f 100644 --- a/ompi/mpi/man/man3/MPI_File_read_at_all.3in +++ b/ompi/mpi/man/man3/MPI_File_read_at_all.3in @@ -2,6 +2,8 @@ .\" Copyright 2010 Cisco Systems, Inc. All rights reserved. .\" Copyright 2006-2008 Sun Microsystems, Inc. .\" Copyright (c) 1996 Thinking Machines Corporation +.\" Copyright 2015 Research Organization for Information Science +.\" and Technology (RIST). All rights reserved. .\" $COPYRIGHT$ .TH MPI_File_read_at_all 3 "#OMPI_DATE#" "#PACKAGE_VERSION#" "#PACKAGE_NAME#" .SH NAME @@ -10,30 +12,31 @@ .SH SYNTAX .ft R .nf -C Syntax - #include - int MPI_File_read_at_all(MPI_File \fIfh\fP, MPI_Offset \fIoffset\fP, - void \fI*buf\fP, int \fIcount\fP, MPI_Datatype \fIdatatype\fP, +.SH C Syntax +#include +int MPI_File_read_at_all(MPI_File \fIfh\fP, MPI_Offset \fIoffset\fP, + void \fI*buf\fP, int \fIcount\fP, MPI_Datatype \fIdatatype\fP, MPI_Status \fI*status\fP) .fi .SH Fortran Syntax (see FORTRAN 77 NOTES) .nf - INCLUDE 'mpif.h' - MPI_FILE_READ_AT_ALL(\fIFH\fP, \fI OFFSET\fP, \fI BUF\fP, \fICOUNT\fP, - \fI DATATYPE\fP, \fISTATUS\fP, \fI IERROR\fP) - \fIBUF\fP(*) - INTEGER \fIFH, COUNT, DATATYPE, STATUS(MPI_STATUS_SIZE), IERROR\fP - INTEGER(KIND=MPI_OFFSET_KIND) \fIOFFSET\fP +USE MPI +! or the older form: INCLUDE 'mpif.h' +MPI_FILE_READ_AT_ALL(\fIFH\fP, \fIOFFSET\fP, \fIBUF\fP, \fICOUNT\fP, + \fIDATATYPE\fP, \fISTATUS\fP, \fIIERROR\fP) + \fIBUF\fP(*) + INTEGER \fIFH, COUNT, DATATYPE, STATUS(MPI_STATUS_SIZE), IERROR\fP + INTEGER(KIND=MPI_OFFSET_KIND) \fIOFFSET\fP .fi .SH C++ Syntax .nf #include -void MPI::File::Read_at_all(MPI::Offset \fIoffset\fP, void* \fIbuf\fP, +void MPI::File::Read_at_all(MPI::Offset \fIoffset\fP, void* \fIbuf\fP, int \fIcount\fP, const MPI::Datatype& \fIdatatype\fP, MPI::Status& \fIstatus\fP) -void MPI::File::Read_at_all(MPI::Offset \fIoffset\fP, void* \fIbuf\fP, +void MPI::File::Read_at_all(MPI::Offset \fIoffset\fP, void* \fIbuf\fP, int \fIcount\fP, const MPI::Datatype& \fIdatatype\fP) .fi @@ -62,32 +65,32 @@ status Status object (status). .TP 1i IERROR -Fortran only: Error status (integer). +Fortran only: Error status (integer). .SH DESCRIPTION .ft R -MPI_File_read_at_all is a collective routine that attempts to read from the file associated with +MPI_File_read_at_all is a collective routine that attempts to read from the file associated with .I fh -(at the +(at the .I offset -position) a total number of +position) a total number of .I count -data items having +data items having .I datatype -type into the user's buffer +type into the user's buffer .I buf. -The +The .I offset is in etype units relative to the current view. That is, holes are not counted when locating an offset. The data is taken out of those parts of the file specified by the current view. MPI_File_read_at_all stores the -number of +number of .I datatype -elements actually read in +elements actually read in .I status. -All other fields of +All other fields of .I status -are undefined. It is erroneous to call this function if MPI_MODE_SEQUENTIAL mode was specified when the file was opened. +are undefined. It is erroneous to call this function if MPI_MODE_SEQUENTIAL mode was specified when the file was opened. .SH FORTRAN 77 NOTES .ft R @@ -106,5 +109,5 @@ and gives the length of the declared integer in bytes. Almost all MPI routines return an error value; C routines as the value of the function and Fortran routines in the last argument. C++ functions do not return errors. If the default error handler is set to MPI::ERRORS_THROW_EXCEPTIONS, then on error the C++ exception mechanism will be used to throw an MPI::Exception object. .sp Before the error value is returned, the current MPI error handler is -called. For MPI I/O function errors, the default error handler is set to MPI_ERRORS_RETURN. The error handler may be changed with MPI_File_set_errhandler; the predefined error handler MPI_ERRORS_ARE_FATAL may be used to make I/O errors fatal. Note that MPI does not guarantee that an MPI program can continue past an error. +called. For MPI I/O function errors, the default error handler is set to MPI_ERRORS_RETURN. The error handler may be changed with MPI_File_set_errhandler; the predefined error handler MPI_ERRORS_ARE_FATAL may be used to make I/O errors fatal. Note that MPI does not guarantee that an MPI program can continue past an error. diff --git a/ompi/mpi/man/man3/MPI_File_read_at_all_begin.3in b/ompi/mpi/man/man3/MPI_File_read_at_all_begin.3in index cc6255074d6..8c6ae19e079 100644 --- a/ompi/mpi/man/man3/MPI_File_read_at_all_begin.3in +++ b/ompi/mpi/man/man3/MPI_File_read_at_all_begin.3in @@ -2,6 +2,8 @@ .\" Copyright 2010 Cisco Systems, Inc. All rights reserved. .\" Copyright 2006-2008 Sun Microsystems, Inc. .\" Copyright (c) 1996 Thinking Machines Corporation +.\" Copyright 2015 Research Organization for Information Science +.\" and Technology (RIST). All rights reserved. .\" $COPYRIGHT$ .TH MPI_File_read_at_all_begin 3 "#OMPI_DATE#" "#PACKAGE_VERSION#" "#PACKAGE_NAME#" .SH NAME @@ -10,21 +12,22 @@ .SH SYNTAX .ft R .nf -C Syntax - #include - int MPI_File_read_at_all_begin(MPI_File \fIfh\fP, MPI_Offset - \fIoffset\fP, void \fI*buf\fP, int \fIcount\fP, MPI_Datatype +.SH C Syntax +#include +int MPI_File_read_at_all_begin(MPI_File \fIfh\fP, MPI_Offset + \fIoffset\fP, void \fI*buf\fP, int \fIcount\fP, MPI_Datatype \fIdatatype\fP) .fi .SH Fortran Syntax (see FORTRAN 77 NOTES) .nf - INCLUDE 'mpif.h' - MPI_FILE_READ_AT_ALL_BEGIN(\fIFH\fP, \fIOFFSET\fP, \fIBUF\fP, - \fICOUNT\fP, \fIDATATYPE\fP,\fI IERROR\fP) - \fIBUF\fP(*) - INTEGER \fIFH, COUNT, DATATYPE, IERROR\fP - INTEGER(KIND=MPI_OFFSET_KIND) \fIOFFSET\fP +USE MPI +! or the older form: INCLUDE 'mpif.h' +MPI_FILE_READ_AT_ALL_BEGIN(\fIFH\fP, \fIOFFSET\fP, \fIBUF\fP, + \fICOUNT\fP, \fIDATATYPE\fP, \fIIERROR\fP) + \fIBUF\fP(*) + INTEGER \fIFH, COUNT, DATATYPE, IERROR\fP + INTEGER(KIND=MPI_OFFSET_KIND) \fIOFFSET\fP .fi .SH C++ Syntax @@ -37,7 +40,7 @@ void MPI::File::Read_at_all_begin(MPI::Offset \fIoffset\fP, void* \fIbuf\fP, .SH INPUT PARAMETERS .ft R .TP 1i -fh +fh File handle (handle). .ft R .TP 1i @@ -59,25 +62,25 @@ buf Initial address of buffer (choice). .TP 1i IERROR -Fortran only: Error status (integer). +Fortran only: Error status (integer). .SH DESCRIPTION .ft R -MPI_File_read_at_all_begin is the beginning part of a split collective routine that attempts to read from the file associated with +MPI_File_read_at_all_begin is the beginning part of a split collective routine that attempts to read from the file associated with .I fh -(at the +(at the .I offset -position) a total number of +position) a total number of .I count -data items having +data items having .I datatype -type into the user's buffer +type into the user's buffer .I buf. -The +The .I offset is in etype units relative to the current view. That is, holes are not counted when locating an offset. The data is taken out of those parts of the -file specified by the current view. +file specified by the current view. .SH FORTRAN 77 NOTES .ft R @@ -94,11 +97,11 @@ and gives the length of the declared integer in bytes. .SH NOTES .ft R -All the nonblocking collective routines for data access are "split" into two routines, each with _begin or _end as a suffix. These split collective routines are subject to the semantic rules described in Section 9.4.5 of the MPI-2 standard. +All the nonblocking collective routines for data access are "split" into two routines, each with _begin or _end as a suffix. These split collective routines are subject to the semantic rules described in Section 9.4.5 of the MPI-2 standard. .SH ERRORS Almost all MPI routines return an error value; C routines as the value of the function and Fortran routines in the last argument. C++ functions do not return errors. If the default error handler is set to MPI::ERRORS_THROW_EXCEPTIONS, then on error the C++ exception mechanism will be used to throw an MPI::Exception object. .sp Before the error value is returned, the current MPI error handler is -called. For MPI I/O function errors, the default error handler is set to MPI_ERRORS_RETURN. The error handler may be changed with MPI_File_set_errhandler; the predefined error handler MPI_ERRORS_ARE_FATAL may be used to make I/O errors fatal. Note that MPI does not guarantee that an MPI program can continue past an error. +called. For MPI I/O function errors, the default error handler is set to MPI_ERRORS_RETURN. The error handler may be changed with MPI_File_set_errhandler; the predefined error handler MPI_ERRORS_ARE_FATAL may be used to make I/O errors fatal. Note that MPI does not guarantee that an MPI program can continue past an error. diff --git a/ompi/mpi/man/man3/MPI_File_read_at_all_end.3in b/ompi/mpi/man/man3/MPI_File_read_at_all_end.3in index 5137787c609..b49ec0fa500 100644 --- a/ompi/mpi/man/man3/MPI_File_read_at_all_end.3in +++ b/ompi/mpi/man/man3/MPI_File_read_at_all_end.3in @@ -2,6 +2,8 @@ .\" Copyright 2010 Cisco Systems, Inc. All rights reserved. .\" Copyright 2006-2008 Sun Microsystems, Inc. .\" Copyright (c) 1996 Thinking Machines Corporation +.\" Copyright 2015 Research Organization for Information Science +.\" and Technology (RIST). All rights reserved. .\" $COPYRIGHT$ .TH MPI_File_read_at_all_end 3 "#OMPI_DATE#" "#PACKAGE_VERSION#" "#PACKAGE_NAME#" .SH NAME @@ -10,18 +12,19 @@ .SH SYNTAX .ft R .nf -C Syntax - #include - int MPI_File_read_at_all_end(MPI_File \fIfh\fP, void \fI*buf\fP, +.SH C Syntax +#include +int MPI_File_read_at_all_end(MPI_File \fIfh\fP, void \fI*buf\fP, MPI_Status \fI*status\fP) .fi .SH Fortran Syntax .nf - INCLUDE 'mpif.h' - MPI_FILE_READ_AT_ALL_END(\fIFH\fP, \fIBUF\fP, \fISTATUS\fP,\fI IERROR\fP) - BUF(*) - INTEGER FH, STATUS(MPI_STATUS_SIZE), IERROR +USE MPI +! or the older form: INCLUDE 'mpif.h' +MPI_FILE_READ_AT_ALL_END(\fIFH\fP, \fIBUF\fP, \fISTATUS\fP, \fIIERROR\fP) + \fIBUF(*)\fP + INTEGER \fIFH, STATUS(MPI_STATUS_SIZE), IERROR\fP .fi .SH C++ Syntax @@ -35,7 +38,7 @@ void MPI::File::Read_at_all_end(void* \fIbuf\fP) .SH INPUT PARAMETER .ft R .TP 1i -fh +fh File handle (handle). .SH OUTPUT PARAMETERS @@ -46,28 +49,28 @@ Initial address of buffer (choice). .ft R .TP 1i status -Status object (status). +Status object (status). .TP 1i IERROR -Fortran only: Error status (integer). +Fortran only: Error status (integer). .SH DESCRIPTION .ft R -MPI_File_read_at_all_end is a split collective routine that stores the number of elements actually read from the file associated with +MPI_File_read_at_all_end is a split collective routine that stores the number of elements actually read from the file associated with .I fh -in +in .I status. -MPI_File_read_at_all_end blocks until the operation initiated by MPI_File_read_at_all_begin completes. The data is taken out of those parts of the file specified by the current view. All other fields of +MPI_File_read_at_all_end blocks until the operation initiated by MPI_File_read_at_all_begin completes. The data is taken out of those parts of the file specified by the current view. All other fields of .I status are undefined. .SH NOTES .ft R -All the nonblocking collective routines for data access are "split" into two routines, each with _begin or _end as a suffix. These split collective routines are subject to the semantic rules described in Section 9.4.5 of the MPI-2 standard. +All the nonblocking collective routines for data access are "split" into two routines, each with _begin or _end as a suffix. These split collective routines are subject to the semantic rules described in Section 9.4.5 of the MPI-2 standard. .SH ERRORS Almost all MPI routines return an error value; C routines as the value of the function and Fortran routines in the last argument. C++ functions do not return errors. If the default error handler is set to MPI::ERRORS_THROW_EXCEPTIONS, then on error the C++ exception mechanism will be used to throw an MPI::Exception object. .sp Before the error value is returned, the current MPI error handler is -called. For MPI I/O function errors, the default error handler is set to MPI_ERRORS_RETURN. The error handler may be changed with MPI_File_set_errhandler; the predefined error handler MPI_ERRORS_ARE_FATAL may be used to make I/O errors fatal. Note that MPI does not guarantee that an MPI program can continue past an error. +called. For MPI I/O function errors, the default error handler is set to MPI_ERRORS_RETURN. The error handler may be changed with MPI_File_set_errhandler; the predefined error handler MPI_ERRORS_ARE_FATAL may be used to make I/O errors fatal. Note that MPI does not guarantee that an MPI program can continue past an error. diff --git a/ompi/mpi/man/man3/MPI_File_read_ordered.3in b/ompi/mpi/man/man3/MPI_File_read_ordered.3in index 73544b70368..2c347a0479c 100644 --- a/ompi/mpi/man/man3/MPI_File_read_ordered.3in +++ b/ompi/mpi/man/man3/MPI_File_read_ordered.3in @@ -2,6 +2,8 @@ .\" Copyright 2010 Cisco Systems, Inc. All rights reserved. .\" Copyright 2006-2008 Sun Microsystems, Inc. .\" Copyright (c) 1996 Thinking Machines Corporation +.\" Copyright 2015 Research Organization for Information Science +.\" and Technology (RIST). All rights reserved. .\" $COPYRIGHT$ .TH MPI_File_read_ordered 3 "#OMPI_DATE#" "#PACKAGE_VERSION#" "#PACKAGE_NAME#" .SH NAME @@ -9,37 +11,38 @@ .SH SYNTAX .ft R -C Syntax +.SH C Syntax .nf - #include - int MPI_File_read_ordered(MPI_File \fIfh\fP, void \fI*buf\fP, - int \fIcount\fP, MPI_Datatype \fIdatatype\fP, - MPI_Status \fI*status\fP) +#include +int MPI_File_read_ordered(MPI_File \fIfh\fP, void \fI*buf\fP, + int \fIcount\fP, MPI_Datatype \fIdatatype\fP, + MPI_Status \fI*status\fP) .fi .SH Fortran Syntax .nf - INCLUDE 'mpif.h' - MPI_FILE_READ_ORDERED(\fIFH\fP,\fI BUF\fP,\fI COUNT\fP,\fI DATATYPE\fP, - \fISTATUS\fP,\fI IERROR\fP) - \fIBUF\fP(*) - INTEGER \fIFH, COUNT, DATATYPE, STATUS(MPI_STATUS_SIZE), IERROR\fP +USE MPI +! or the older form: INCLUDE 'mpif.h' +MPI_FILE_READ_ORDERED(\fIFH\fP, \fIBUF\fP, \fICOUNT\fP, \fIDATATYPE\fP, + \fISTATUS\fP, \fIIERROR\fP) + \fIBUF\fP(*) + INTEGER \fIFH, COUNT, DATATYPE, STATUS(MPI_STATUS_SIZE), IERROR\fP .fi .SH C++ Syntax .nf #include -void MPI::File::Read_ordered(void* \fIbuf\fP, int \fIcount\fP, +void MPI::File::Read_ordered(void* \fIbuf\fP, int \fIcount\fP, const MPI::Datatype& \fIdatatype\fP, MPI::Status& \fIstatus\fP) -void MPI::File::Read_ordered(void* \fIbuf\fP, int \fIcount\fP, +void MPI::File::Read_ordered(void* \fIbuf\fP, int \fIcount\fP, const MPI::Datatype& \fIdatatype\fP) .fi .SH INPUT PARAMETERS .ft R .TP 1i -fh +fh File handle (handle). .TP 1i count @@ -58,30 +61,30 @@ status Status object (Status). .TP 1i IERROR -Fortran only: Error status (integer). +Fortran only: Error status (integer). .SH DESCRIPTION .ft R MPI_File_read_ordered is a collective routine. This routine must be called by all processes in the communicator group associated with the -file handle +file handle .I fh. -Each process may pass different argument values for the -.I datatype -and -.I count -arguments. Each process attempts to read, from the file associated with +Each process may pass different argument values for the +.I datatype +and +.I count +arguments. Each process attempts to read, from the file associated with .I fh, -a total number of -.I count -data items having -.I datatype -type into the user's buffer +a total number of +.I count +data items having +.I datatype +type into the user's buffer .I buf. -For each process, the location in the file at which data is read is the position at which the shared file pointer would be after all processes whose ranks within the group are less than that of this process had read their data. MPI_File_read_ordered returns the actual number of -.I datatype -elements read in +For each process, the location in the file at which data is read is the position at which the shared file pointer would be after all processes whose ranks within the group are less than that of this process had read their data. MPI_File_read_ordered returns the actual number of +.I datatype +elements read in .I status. The shared file pointer is updated by the amounts of data requested by all processes of the group. @@ -89,5 +92,5 @@ The shared file pointer is updated by the amounts of data requested by all proce Almost all MPI routines return an error value; C routines as the value of the function and Fortran routines in the last argument. C++ functions do not return errors. If the default error handler is set to MPI::ERRORS_THROW_EXCEPTIONS, then on error the C++ exception mechanism will be used to throw an MPI::Exception object. .sp Before the error value is returned, the current MPI error handler is -called. For MPI I/O function errors, the default error handler is set to MPI_ERRORS_RETURN. The error handler may be changed with MPI_File_set_errhandler; the predefined error handler MPI_ERRORS_ARE_FATAL may be used to make I/O errors fatal. Note that MPI does not guarantee that an MPI program can continue past an error. +called. For MPI I/O function errors, the default error handler is set to MPI_ERRORS_RETURN. The error handler may be changed with MPI_File_set_errhandler; the predefined error handler MPI_ERRORS_ARE_FATAL may be used to make I/O errors fatal. Note that MPI does not guarantee that an MPI program can continue past an error. diff --git a/ompi/mpi/man/man3/MPI_File_read_ordered_begin.3in b/ompi/mpi/man/man3/MPI_File_read_ordered_begin.3in index 1dee629cf95..7c00f00cd39 100644 --- a/ompi/mpi/man/man3/MPI_File_read_ordered_begin.3in +++ b/ompi/mpi/man/man3/MPI_File_read_ordered_begin.3in @@ -2,6 +2,8 @@ .\" Copyright 2010 Cisco Systems, Inc. All rights reserved. .\" Copyright 2006-2008 Sun Microsystems, Inc. .\" Copyright (c) 1996 Thinking Machines Corporation +.\" Copyright 2015 Research Organization for Information Science +.\" and Technology (RIST). All rights reserved. .\" $COPYRIGHT$ .TH MPI_File_read_ordered_begin 3 "#OMPI_DATE#" "#PACKAGE_VERSION#" "#PACKAGE_NAME#" .SH NAME @@ -10,18 +12,19 @@ .SH SYNTAX .ft R .nf -C Syntax - #include - int MPI_File_read_ordered_begin(MPI_File \fIfh\fP, void \fI*buf\fP, - int \fIcount\fP, MPI_Datatype \fIdatatype\fP) +.SH C Syntax +#include +int MPI_File_read_ordered_begin(MPI_File \fIfh\fP, void \fI*buf\fP, + int \fIcount\fP, MPI_Datatype \fIdatatype\fP) .fi .SH Fortran Syntax .nf - INCLUDE 'mpif.h' - MPI_FILE_READ_ORDERED_BEGIN(\fIFH\fP, \fIBUF\fP, \fICOUNT\fP, \fIDATATYPE\fP,\fI IERROR\fP) - BUF(*) - INTEGER FH, COUNT, DATATYPE, IERROR +USE MPI +! or the older form: INCLUDE 'mpif.h' +MPI_FILE_READ_ORDERED_BEGIN(\fIFH\fP, \fIBUF\fP, \fICOUNT\fP, \fIDATATYPE\fP, \fIIERROR\fP) + \fIBUF(*)\fP + INTEGER \fIFH, COUNT, DATATYPE, IERROR\fP .fi .SH C++ Syntax @@ -34,7 +37,7 @@ void MPI::File::Read_ordered_begin(void* \fIbuf\fP, int \fIcount\fP, .SH INPUT/OUTPUT PARAMETER .ft R .TP 1i -fh +fh File handle (handle). .SH INPUT PARAMETERS @@ -54,35 +57,35 @@ buf Initial address of buffer (choice). .TP 1i IERROR -Fortran only: Error status (integer). +Fortran only: Error status (integer). .SH DESCRIPTION .ft R MPI_File_read_ordered_begin is the beginning part of a split collective, nonblocking routine that must be called by all processes in the communicator group associated with the -file handle +file handle .I fh. -Each process may pass different argument values for the -.I datatype -and -.I count -arguments. Each process attempts to read, from the file associated with +Each process may pass different argument values for the +.I datatype +and +.I count +arguments. Each process attempts to read, from the file associated with .I fh, -a total number of -.I count -data items having -.I datatype -type into the user's buffer +a total number of +.I count +data items having +.I datatype +type into the user's buffer .I buf. -For each process, the location in the file at which data is read is the position at which the shared file pointer would be after all processes whose ranks within the group are less than that of this process had read their data. +For each process, the location in the file at which data is read is the position at which the shared file pointer would be after all processes whose ranks within the group are less than that of this process had read their data. .SH NOTES .ft R -All the nonblocking collective routines for data access are "split" into two routines, each with _begin or _end as a suffix. These split collective routines are subject to the semantic rules described in Section 9.4.5 of the MPI-2 standard. +All the nonblocking collective routines for data access are "split" into two routines, each with _begin or _end as a suffix. These split collective routines are subject to the semantic rules described in Section 9.4.5 of the MPI-2 standard. .SH ERRORS Almost all MPI routines return an error value; C routines as the value of the function and Fortran routines in the last argument. C++ functions do not return errors. If the default error handler is set to MPI::ERRORS_THROW_EXCEPTIONS, then on error the C++ exception mechanism will be used to throw an MPI::Exception object. .sp Before the error value is returned, the current MPI error handler is -called. For MPI I/O function errors, the default error handler is set to MPI_ERRORS_RETURN. The error handler may be changed with MPI_File_set_errhandler; the predefined error handler MPI_ERRORS_ARE_FATAL may be used to make I/O errors fatal. Note that MPI does not guarantee that an MPI program can continue past an error. +called. For MPI I/O function errors, the default error handler is set to MPI_ERRORS_RETURN. The error handler may be changed with MPI_File_set_errhandler; the predefined error handler MPI_ERRORS_ARE_FATAL may be used to make I/O errors fatal. Note that MPI does not guarantee that an MPI program can continue past an error. diff --git a/ompi/mpi/man/man3/MPI_File_read_ordered_end.3in b/ompi/mpi/man/man3/MPI_File_read_ordered_end.3in index 0d77dc602e1..43e50a8ad41 100644 --- a/ompi/mpi/man/man3/MPI_File_read_ordered_end.3in +++ b/ompi/mpi/man/man3/MPI_File_read_ordered_end.3in @@ -2,6 +2,8 @@ .\" Copyright 2010 Cisco Systems, Inc. All rights reserved. .\" Copyright 2006-2008 Sun Microsystems, Inc. .\" Copyright (c) 1996 Thinking Machines Corporation +.\" Copyright 2015 Research Organization for Information Science +.\" and Technology (RIST). All rights reserved. .\" $COPYRIGHT$ .TH MPI_File_read_ordered_end 3 "#OMPI_DATE#" "#PACKAGE_VERSION#" "#PACKAGE_NAME#" .SH NAME @@ -10,18 +12,19 @@ .SH SYNTAX .ft R .nf -C Syntax - #include - int MPI_File_read_ordered_end(MPI_File \fIfh\fP, void \fI*buf\fP, - MPI_Status \fI*status\fP) +.SH C Syntax +#include +int MPI_File_read_ordered_end(MPI_File \fIfh\fP, void \fI*buf\fP, + MPI_Status \fI*status\fP) .fi .SH Fortran Syntax .nf - INCLUDE 'mpif.h' - MPI_FILE_READ_ORDERED_END(\fIFH\fP, \fIBUF\fP, \fISTATUS\fP,\fI IERROR\fP) - BUF(*) - INTEGER FH, STATUS(MPI_STATUS_SIZE), IERROR +USE MPI +! or the older form: INCLUDE 'mpif.h' +MPI_FILE_READ_ORDERED_END(\fIFH\fP, \fIBUF\fP, \fISTATUS\fP, \fIIERROR\fP) + \fIBUF(*)\fP + INTEGER \fIFH, STATUS(MPI_STATUS_SIZE), IERROR\fP .fi .SH C++ Syntax @@ -29,13 +32,13 @@ C Syntax #include void MPI::File::Read_ordered_end(void* \fIbuf\fP, MPI::Status& \fIstatus\fP) -void MPI::File::Read_ordered_end(void* \fIbuf\fP) +void MPI::File::Read_ordered_end(void* \fIbuf\fP) .fi .SH INPUT/OUTPUT PARAMETER .ft R .TP 1i -fh +fh File handle (handle). .SH OUTPUT PARAMETERS @@ -49,26 +52,26 @@ status Status object (status). .TP 1i IERROR -Fortran only: Error status (integer). +Fortran only: Error status (integer). .SH DESCRIPTION .ft R MPI_File_read_ordered_end is the ending part of a split collective routine that must be called by all processes in the communicator group associated with the -file handle -.I fh. -MPI_File_rad_ordered_end blocks until the operation initiated by MPI_File_read_ordered_begin completes. It attempts to read the file associated with +file handle +.I fh. +MPI_File_rad_ordered_end blocks until the operation initiated by MPI_File_read_ordered_begin completes. It attempts to read the file associated with .I fh -into the user's buffer +into the user's buffer .I buf. -The shared file pointer is updated by the amounts of data requested by all processes of the group. For each process, the location in the file at which data is read is the position at which the shared file pointer would be after all processes whose ranks within the group are less than that of this process had read their data. +The shared file pointer is updated by the amounts of data requested by all processes of the group. For each process, the location in the file at which data is read is the position at which the shared file pointer would be after all processes whose ranks within the group are less than that of this process had read their data. .SH NOTES .ft R -All the nonblocking collective routines for data access are "split" into two routines, each with _begin or _end as a suffix. These split collective routines are subject to the semantic rules described in Section 9.4.5 of the MPI-2 standard. +All the nonblocking collective routines for data access are "split" into two routines, each with _begin or _end as a suffix. These split collective routines are subject to the semantic rules described in Section 9.4.5 of the MPI-2 standard. .SH ERRORS Almost all MPI routines return an error value; C routines as the value of the function and Fortran routines in the last argument. C++ functions do not return errors. If the default error handler is set to MPI::ERRORS_THROW_EXCEPTIONS, then on error the C++ exception mechanism will be used to throw an MPI::Exception object. .sp Before the error value is returned, the current MPI error handler is -called. For MPI I/O function errors, the default error handler is set to MPI_ERRORS_RETURN. The error handler may be changed with MPI_File_set_errhandler; the predefined error handler MPI_ERRORS_ARE_FATAL may be used to make I/O errors fatal. Note that MPI does not guarantee that an MPI program can continue past an error. +called. For MPI I/O function errors, the default error handler is set to MPI_ERRORS_RETURN. The error handler may be changed with MPI_File_set_errhandler; the predefined error handler MPI_ERRORS_ARE_FATAL may be used to make I/O errors fatal. Note that MPI does not guarantee that an MPI program can continue past an error. diff --git a/ompi/mpi/man/man3/MPI_File_read_shared.3in b/ompi/mpi/man/man3/MPI_File_read_shared.3in index 5a451b22c48..17bfe9897c9 100644 --- a/ompi/mpi/man/man3/MPI_File_read_shared.3in +++ b/ompi/mpi/man/man3/MPI_File_read_shared.3in @@ -2,6 +2,8 @@ .\" Copyright 2010 Cisco Systems, Inc. All rights reserved. .\" Copyright 2006-2008 Sun Microsystems, Inc. .\" Copyright (c) 1996 Thinking Machines Corporation +.\" Copyright 2015 Research Organization for Information Science +.\" and Technology (RIST). All rights reserved. .\" $COPYRIGHT$ .TH MPI_File_read_shared 3 "#OMPI_DATE#" "#PACKAGE_VERSION#" "#PACKAGE_NAME#" .SH NAME @@ -10,20 +12,20 @@ .SH SYNTAX .ft R .nf -C Syntax - #include - int MPI_File_read_shared(MPI_File \fIfh\fP, void \fI*buf\fP, int \fIcount\fP, -MPI_Datatype \fIdatatype\fP, MPI_Status \fI*status\fP) +.SH C Syntax +#include +int MPI_File_read_shared(MPI_File \fIfh\fP, void \fI*buf\fP, int \fIcount\fP, + MPI_Datatype \fIdatatype\fP, MPI_Status \fI*status\fP) .fi .SH Fortran Syntax .nf - INCLUDE 'mpif.h' - MPI_FILE_READ_SHARED(\fIFH\fP, \fIBUF\fP, \fICOUNT\fP, \fIDATATYPE\fP, \fISTATUS\fP, +USE MPI +! or the older form: INCLUDE 'mpif.h' +MPI_FILE_READ_SHARED(\fIFH\fP, \fIBUF\fP, \fICOUNT\fP, \fIDATATYPE\fP, \fISTATUS\fP, \fIIERROR\fP) - BUF(*) - INTEGER FH, COUNT, DATATYPE, - STATUS(MPI_STATUS_SIZE), IERROR + \fIBUF(*)\fP + INTEGER \fIFH, COUNT, DATATYPE,STATUS(MPI_STATUS_SIZE), IERROR\fP .fi .SH C++ Syntax @@ -39,7 +41,7 @@ void MPI::File::Read_shared(void* \fIbuf\fP, int \fIcount\fP, .SH INPUT/OUTPUT PARAMETER .ft R .TP 1i -fh +fh File handle (handle). .SH INPUT PARAMETERS @@ -63,7 +65,7 @@ status Status object (status). .TP 1i IERROR -Fortran only: Error status (integer). +Fortran only: Error status (integer). .SH DESCRIPTION .ft R @@ -73,5 +75,5 @@ MPI_File_read_shared is a blocking routine that uses the shared file pointer to Almost all MPI routines return an error value; C routines as the value of the function and Fortran routines in the last argument. C++ functions do not return errors. If the default error handler is set to MPI::ERRORS_THROW_EXCEPTIONS, then on error the C++ exception mechanism will be used to throw an MPI::Exception object. .sp Before the error value is returned, the current MPI error handler is -called. For MPI I/O function errors, the default error handler is set to MPI_ERRORS_RETURN. The error handler may be changed with MPI_File_set_errhandler; the predefined error handler MPI_ERRORS_ARE_FATAL may be used to make I/O errors fatal. Note that MPI does not guarantee that an MPI program can continue past an error. +called. For MPI I/O function errors, the default error handler is set to MPI_ERRORS_RETURN. The error handler may be changed with MPI_File_set_errhandler; the predefined error handler MPI_ERRORS_ARE_FATAL may be used to make I/O errors fatal. Note that MPI does not guarantee that an MPI program can continue past an error. diff --git a/ompi/mpi/man/man3/MPI_File_seek.3in b/ompi/mpi/man/man3/MPI_File_seek.3in index 5359954b648..b75c450bf27 100644 --- a/ompi/mpi/man/man3/MPI_File_seek.3in +++ b/ompi/mpi/man/man3/MPI_File_seek.3in @@ -2,6 +2,8 @@ .\" Copyright 2010 Cisco Systems, Inc. All rights reserved. .\" Copyright 2006-2008 Sun Microsystems, Inc. .\" Copyright (c) 1996 Thinking Machines Corporation +.\" Copyright 2015 Research Organization for Information Science +.\" and Technology (RIST). All rights reserved. .\" $COPYRIGHT$ .TH MPI_File_seek 3 "#OMPI_DATE#" "#PACKAGE_VERSION#" "#PACKAGE_NAME#" .SH NAME @@ -10,18 +12,19 @@ .SH SYNTAX .ft R .nf -C Syntax - #include - int MPI_File_seek(MPI_File \fIfh\fP, MPI_Offset \fIoffset\fP, - int \fIwhence\fP) +.SH C Syntax +#include +int MPI_File_seek(MPI_File \fIfh\fP, MPI_Offset \fIoffset\fP, + int \fIwhence\fP) .fi .SH Fortran Syntax (see FORTRAN 77 NOTES) .nf - INCLUDE 'mpif.h' - MPI_FILE_SEEK(\fIFH\fP,\fI OFFSET\fP,\fI WHENCE\fP,\fI IERROR\fP) - INTEGER \fIFH, WHENCE, IERROR\fP - INTEGER(KIND=MPI_OFFSET_KIND) \fIOFFSET\fP +USE MPI +! or the older form: INCLUDE 'mpif.h' +MPI_FILE_SEEK(\fIFH\fP, \fIOFFSET\fP, \fIWHENCE\fP, \fIIERROR\fP) + INTEGER \fIFH, WHENCE, IERROR\fP + INTEGER(KIND=MPI_OFFSET_KIND) \fIOFFSET\fP .fi .SH C++ Syntax @@ -33,10 +36,10 @@ void MPI::File::Seek(MPI::Offset \fIoffset\fP, int \fIwhence\fP) .SH INPUT PARAMETERS .ft R .TP 1i -fh +fh File handle (handle). .TP 1i -offset +offset File offset (integer). .TP 1i whence @@ -46,29 +49,29 @@ Update mode (integer). .ft R .TP 1i IERROR -Fortran only: Error status (integer). +Fortran only: Error status (integer). .SH DESCRIPTION .ft R -MPI_File_seek updates the individual file pointer according to +MPI_File_seek updates the individual file pointer according to .I whence, which could have the following possible values: .TP - o -MPI_SEEK_SET - The pointer is set to + o +MPI_SEEK_SET - The pointer is set to .I offset. .TP - o -MPI_SEEK_CUR - The pointer is set to the current pointer position plus + o +MPI_SEEK_CUR - The pointer is set to the current pointer position plus .I offset. .TP - o -MPI_SEEK_END - The pointer is set to the end of the file plus + o +MPI_SEEK_END - The pointer is set to the end of the file plus .I offset. .sp .RE -The -.I offset +The +.I offset can be negative, which allows seeking backwards. It is erroneous to seek to a negative position in the file. The end of the file is defined to be the location of the next elementary data item @@ -92,5 +95,5 @@ and gives the length of the declared integer in bytes. Almost all MPI routines return an error value; C routines as the value of the function and Fortran routines in the last argument. C++ functions do not return errors. If the default error handler is set to MPI::ERRORS_THROW_EXCEPTIONS, then on error the C++ exception mechanism will be used to throw an MPI::Exception object. .sp Before the error value is returned, the current MPI error handler is -called. For MPI I/O function errors, the default error handler is set to MPI_ERRORS_RETURN. The error handler may be changed with MPI_File_set_errhandler; the predefined error handler MPI_ERRORS_ARE_FATAL may be used to make I/O errors fatal. Note that MPI does not guarantee that an MPI program can continue past an error. +called. For MPI I/O function errors, the default error handler is set to MPI_ERRORS_RETURN. The error handler may be changed with MPI_File_set_errhandler; the predefined error handler MPI_ERRORS_ARE_FATAL may be used to make I/O errors fatal. Note that MPI does not guarantee that an MPI program can continue past an error. diff --git a/ompi/mpi/man/man3/MPI_File_seek_shared.3in b/ompi/mpi/man/man3/MPI_File_seek_shared.3in index d0862c867a5..b3959afa5d4 100644 --- a/ompi/mpi/man/man3/MPI_File_seek_shared.3in +++ b/ompi/mpi/man/man3/MPI_File_seek_shared.3in @@ -2,6 +2,8 @@ .\" Copyright 2010 Cisco Systems, Inc. All rights reserved. .\" Copyright 2006-2008 Sun Microsystems, Inc. .\" Copyright (c) 1996 Thinking Machines Corporation +.\" Copyright 2015 Research Organization for Information Science +.\" and Technology (RIST). All rights reserved. .\" $COPYRIGHT$ .TH MPI_File_seek_shared 3 "#OMPI_DATE#" "#PACKAGE_VERSION#" "#PACKAGE_NAME#" .SH NAME @@ -10,18 +12,19 @@ .SH SYNTAX .ft R .nf -C Syntax - #include - int MPI_File_seek_shared(MPI_File \fIfh\fP, MPI_Offset \fIoffset\fP, - int \fIwhence\fP) +.SH C Syntax +#include +int MPI_File_seek_shared(MPI_File \fIfh\fP, MPI_Offset \fIoffset\fP, + int \fIwhence\fP) .fi .SH Fortran Syntax (see FORTRAN 77 NOTES) .nf - INCLUDE 'mpif.h' - MPI_FILE_SEEK_SHARED(\fIFH\fP,\fI OFFSET\fP,\fI WHENCE\fP,\fI IERROR\fP) - INTEGER \fIFH, WHENCE, IERROR\fP - INTEGER(KIND=MPI_OFFSET_KIND) \fIOFFSET\fP +USE MPI +! or the older form: INCLUDE 'mpif.h' +MPI_FILE_SEEK_SHARED(\fIFH\fP, \fIOFFSET\fP, \fIWHENCE\fP, \fIIERROR\fP) + INTEGER \fIFH, WHENCE, IERROR\fP + INTEGER(KIND=MPI_OFFSET_KIND) \fIOFFSET\fP .fi .SH C++ Syntax @@ -33,10 +36,10 @@ void MPI::File::Seek_shared(MPI::Offset \fIoffset\fP, int \fIwhence\fP) .SH INPUT PARAMETERS .ft R .TP 1i -fh +fh File handle (handle). .TP 1i -offset +offset File offset (integer). .TP 1i whence @@ -46,39 +49,39 @@ Update mode (integer). .ft R .TP 1i IERROR -Fortran only: Error status (integer). +Fortran only: Error status (integer). .SH DESCRIPTION .ft R -MPI_File_seek_shared updates the shared file pointer according to +MPI_File_seek_shared updates the shared file pointer according to .I whence, which could have the following possible values: .TP - o -MPI_SEEK_SET - The pointer is set to + o +MPI_SEEK_SET - The pointer is set to .I offset. .TP - o -MPI_SEEK_CUR - The pointer is set to the current pointer position plus + o +MPI_SEEK_CUR - The pointer is set to the current pointer position plus .I offset. .TP - o -MPI_SEEK_END - The pointer is set to the end of the file plus + o +MPI_SEEK_END - The pointer is set to the end of the file plus .I offset. .sp .RE MPI_File_seek_shared is collective; all the processes in the communicator -group associated with the file handle -.I fh -must call MPI_File_seek_shared with the same -.I offset -and +group associated with the file handle +.I fh +must call MPI_File_seek_shared with the same +.I offset +and .I whence. All processes in the communicator group are synchronized before the shared file pointer is updated. .sp -The -.I offset +The +.I offset can be negative, which allows seeking backwards. It is erroneous to seek to a negative position in the view. The end of the view is defined to be the position of the next elementary data item, relative @@ -102,5 +105,5 @@ and gives the length of the declared integer in bytes. Almost all MPI routines return an error value; C routines as the value of the function and Fortran routines in the last argument. C++ functions do not return errors. If the default error handler is set to MPI::ERRORS_THROW_EXCEPTIONS, then on error the C++ exception mechanism will be used to throw an MPI::Exception object. .sp Before the error value is returned, the current MPI error handler is -called. For MPI I/O function errors, the default error handler is set to MPI_ERRORS_RETURN. The error handler may be changed with MPI_File_set_errhandler; the predefined error handler MPI_ERRORS_ARE_FATAL may be used to make I/O errors fatal. Note that MPI does not guarantee that an MPI program can continue past an error. +called. For MPI I/O function errors, the default error handler is set to MPI_ERRORS_RETURN. The error handler may be changed with MPI_File_set_errhandler; the predefined error handler MPI_ERRORS_ARE_FATAL may be used to make I/O errors fatal. Note that MPI does not guarantee that an MPI program can continue past an error. diff --git a/ompi/mpi/man/man3/MPI_File_set_atomicity.3in b/ompi/mpi/man/man3/MPI_File_set_atomicity.3in index 2235d6dc7d0..82be40d0439 100644 --- a/ompi/mpi/man/man3/MPI_File_set_atomicity.3in +++ b/ompi/mpi/man/man3/MPI_File_set_atomicity.3in @@ -2,6 +2,8 @@ .\" Copyright 2010 Cisco Systems, Inc. All rights reserved. .\" Copyright 2006-2008 Sun Microsystems, Inc. .\" Copyright (c) 1996 Thinking Machines Corporation +.\" Copyright 2015 Research Organization for Information Science +.\" and Technology (RIST). All rights reserved. .\" $COPYRIGHT$ .TH MPI_File_set_atomicity 3 "#OMPI_DATE#" "#PACKAGE_VERSION#" "#PACKAGE_NAME#" .SH NAME @@ -10,16 +12,17 @@ .SH SYNTAX .ft R .nf -C Syntax - #include - int MPI_File_set_atomicity(MPI_File \fIfh\fP, int \fIflag\fP) +.SH C Syntax +#include +int MPI_File_set_atomicity(MPI_File \fIfh\fP, int \fIflag\fP) .fi .SH Fortran Syntax .nf - INCLUDE 'mpif.h' - MPI_FILE_SET_ATOMICITY(\fIFH\fP,\fI FLAG\fP,\fI IERROR\fP) - INTEGER FH, FLAG, IERROR +USE MPI +! or the older form: INCLUDE 'mpif.h' +MPI_FILE_SET_ATOMICITY(\fIFH\fP, \fIFLAG\fP, \fIIERROR\fP) + INTEGER \fIFH, FLAG, IERROR\fP .fi .SH C++ Syntax @@ -41,31 +44,31 @@ flag .ft R .TP 1i IERROR -Fortran only: Error status (integer). +Fortran only: Error status (integer). .SH DESCRIPTION .ft R The consistency semantics for data-access operations using the set of file handles created by one collective MPI_File_open is set by collectively -calling MPI_File_set_atomicity. All processes in the group must pass identical values for -.I fh +calling MPI_File_set_atomicity. All processes in the group must pass identical values for +.I fh and .I flag. -If -.I flag -is +If +.I flag +is .I true, -atomic mode is set; if -.I flag -is +atomic mode is set; if +.I flag +is .I false, nonatomic mode is set. .sp -The default value on a call to MPI_File_open in Open MPI is \fItrue\fP for jobs running on more than one node, \fIfalse\fP for jobs running on a single SMP. For more information, see the MPI-2 standard. +The default value on a call to MPI_File_open in Open MPI is \fItrue\fP for jobs running on more than one node, \fIfalse\fP for jobs running on a single SMP. For more information, see the MPI-2 standard. .SH ERRORS Almost all MPI routines return an error value; C routines as the value of the function and Fortran routines in the last argument. C++ functions do not return errors. If the default error handler is set to MPI::ERRORS_THROW_EXCEPTIONS, then on error the C++ exception mechanism will be used to throw an MPI::Exception object. .sp Before the error value is returned, the current MPI error handler is -called. For MPI I/O function errors, the default error handler is set to MPI_ERRORS_RETURN. The error handler may be changed with MPI_File_set_errhandler; the predefined error handler MPI_ERRORS_ARE_FATAL may be used to make I/O errors fatal. Note that MPI does not guarantee that an MPI program can continue past an error. +called. For MPI I/O function errors, the default error handler is set to MPI_ERRORS_RETURN. The error handler may be changed with MPI_File_set_errhandler; the predefined error handler MPI_ERRORS_ARE_FATAL may be used to make I/O errors fatal. Note that MPI does not guarantee that an MPI program can continue past an error. diff --git a/ompi/mpi/man/man3/MPI_File_set_errhandler.3in b/ompi/mpi/man/man3/MPI_File_set_errhandler.3in index a931c21339f..fd0a4a06891 100644 --- a/ompi/mpi/man/man3/MPI_File_set_errhandler.3in +++ b/ompi/mpi/man/man3/MPI_File_set_errhandler.3in @@ -5,14 +5,14 @@ .\" $COPYRIGHT$ .TH MPI_File_set_errhandler 3 "#OMPI_DATE#" "#PACKAGE_VERSION#" "#PACKAGE_NAME#" .SH NAME -\fBMPI_File_set_errhandler \fP \- Sets the error handler for a file. +\fBMPI_File_set_errhandler \fP \- Sets the error handler for a file. .SH SYNTAX .ft R .SH C Syntax .nf #include -int MPI_File_set_errhandler(MPI_File \fIfile\fP, MPI_Errhandler +int MPI_File_set_errhandler(MPI_File \fIfile\fP, MPI_Errhandler \fIerrhandler\fP) .fi @@ -39,22 +39,22 @@ File (handle). .ft R .TP 1i errhandler -New error handler for file (handle). +New error handler for file (handle). .SH OUTPUT PARAMETER .ft R .TP 1i IERROR -Fortran only: Error status (integer). +Fortran only: Error status (integer). .SH DESCRIPTION .ft R -Attaches a new error handler to a file. The error handler must be either a predefined error handler or an error handler created by a call to MPI_File_create_errhandler. +Attaches a new error handler to a file. The error handler must be either a predefined error handler or an error handler created by a call to MPI_File_create_errhandler. .SH ERRORS Almost all MPI routines return an error value; C routines as the value of the function and Fortran routines in the last argument. C++ functions do not return errors. If the default error handler is set to MPI::ERRORS_THROW_EXCEPTIONS, then on error the C++ exception mechanism will be used to throw an MPI::Exception object. .sp Before the error value is returned, the current MPI error handler is -called. For MPI I/O function errors, the default error handler is set to MPI_ERRORS_RETURN. The error handler may be changed with MPI_File_set_errhandler; the predefined error handler MPI_ERRORS_ARE_FATAL may be used to make I/O errors fatal. Note that MPI does not guarantee that an MPI program can continue past an error. +called. For MPI I/O function errors, the default error handler is set to MPI_ERRORS_RETURN. The error handler may be changed with MPI_File_set_errhandler; the predefined error handler MPI_ERRORS_ARE_FATAL may be used to make I/O errors fatal. Note that MPI does not guarantee that an MPI program can continue past an error. diff --git a/ompi/mpi/man/man3/MPI_File_set_info.3in b/ompi/mpi/man/man3/MPI_File_set_info.3in index ed1bb2a7791..fbf16aae6b2 100644 --- a/ompi/mpi/man/man3/MPI_File_set_info.3in +++ b/ompi/mpi/man/man3/MPI_File_set_info.3in @@ -2,24 +2,27 @@ .\" Copyright 2010 Cisco Systems, Inc. All rights reserved. .\" Copyright 2006-2008 Sun Microsystems, Inc. .\" Copyright (c) 1996 Thinking Machines Corporation +.\" Copyright 2015 Research Organization for Information Science +.\" and Technology (RIST). All rights reserved. .\" $COPYRIGHT$ .TH MPI_File_set_info 3 "#OMPI_DATE#" "#PACKAGE_VERSION#" "#PACKAGE_NAME#" .SH NAME -\fBMPI_File_set_info\fP \- Sets new values for hints (collective). +\fBMPI_File_set_info\fP \- Sets new values for hints (collective). .SH SYNTAX .ft R .nf -C Syntax - #include - int MPI_File_set_info(MPI_File \fIfh\fP, MPI_Info \fIinfo\fP) +.SH C Syntax +#include +int MPI_File_set_info(MPI_File \fIfh\fP, MPI_Info \fIinfo\fP) .fi .SH Fortran Syntax .nf - INCLUDE 'mpif.h' - MPI_FILE_SET_INFO(\fIFH\fP, \fIINFO\fP, \fIIERROR\fP) - INTEGER FH, INFO, IERROR +USE MPI +! or the older form: INCLUDE 'mpif.h' +MPI_FILE_SET_INFO(\fIFH\fP, \fIINFO\fP, \fIIERROR\fP) + INTEGER \fIFH, INFO, IERROR\fP .fi .SH C++ Syntax @@ -31,7 +34,7 @@ void MPI::File::Set_info(const MPI::Info& \fIinfo\fP) .SH INPUT/OUTPUT PARAMETER .ft R .TP 1i -fh +fh File handle (handle). .SH INPUT PARAMETER @@ -44,24 +47,24 @@ Info object (handle). .ft R .TP 1i IERROR -Fortran only: Error status (integer). +Fortran only: Error status (integer). .SH DESCRIPTION .ft R -MPI_File_set_info is a collective routine that sets new values for the hints of the file associated with \fIfh\fP. These hints are set for each file, using the MPI_File_open, MPI_File_delete, MPI_File_set_view, and MPI_File_set_info routines. The opaque \fIinfo\fP object, which allows you to provide hints for optimization of your code, may be different on each process, but some \fIinfo\fP entries are required to be the same on all processes: In these cases, they must appear with the same value in each process's info object. See the HINTS section for a list of hints that can be set. +MPI_File_set_info is a collective routine that sets new values for the hints of the file associated with \fIfh\fP. These hints are set for each file, using the MPI_File_open, MPI_File_delete, MPI_File_set_view, and MPI_File_set_info routines. The opaque \fIinfo\fP object, which allows you to provide hints for optimization of your code, may be different on each process, but some \fIinfo\fP entries are required to be the same on all processes: In these cases, they must appear with the same value in each process's info object. See the HINTS section for a list of hints that can be set. .SH HINTS .ft R -The following hints can be used as values for the \fIinfo\fP argument. +The following hints can be used as values for the \fIinfo\fP argument. .sp SETTABLE HINTS: .sp -- shared_file_timeout: Amount of time (in seconds) to wait for access to the +- shared_file_timeout: Amount of time (in seconds) to wait for access to the shared file pointer before exiting with MPI_ERR_TIMEDOUT. .sp -- rwlock_timeout: Amount of time (in seconds) to wait for obtaining a read or +- rwlock_timeout: Amount of time (in seconds) to wait for obtaining a read or write lock on a contiguous chunk of a UNIX file before exiting with MPI_ERR_TIMEDOUT. -.sp +.sp - noncoll_read_bufsize: Maximum size of the buffer used by MPI I/O to satisfy read requests in the noncollective data-access routines. (See NOTE, below.) @@ -85,13 +88,13 @@ I/O routines can bind an extra thread to an LWP. .sp - mpiio_coll_contiguous: (boolean) controls whether subsequent collective data accesses will request collectively contiguous regions of the file. .sp -NON-SETTABLE HINTS: +NON-SETTABLE HINTS: .sp -- filename: Access this hint to get the name of the file. +- filename: Access this hint to get the name of the file. .SH ERRORS Almost all MPI routines return an error value; C routines as the value of the function and Fortran routines in the last argument. C++ functions do not return errors. If the default error handler is set to MPI::ERRORS_THROW_EXCEPTIONS, then on error the C++ exception mechanism will be used to throw an MPI::Exception object. .sp Before the error value is returned, the current MPI error handler is -called. For MPI I/O function errors, the default error handler is set to MPI_ERRORS_RETURN. The error handler may be changed with MPI_File_set_errhandler; the predefined error handler MPI_ERRORS_ARE_FATAL may be used to make I/O errors fatal. Note that MPI does not guarantee that an MPI program can continue past an error. +called. For MPI I/O function errors, the default error handler is set to MPI_ERRORS_RETURN. The error handler may be changed with MPI_File_set_errhandler; the predefined error handler MPI_ERRORS_ARE_FATAL may be used to make I/O errors fatal. Note that MPI does not guarantee that an MPI program can continue past an error. diff --git a/ompi/mpi/man/man3/MPI_File_set_size.3in b/ompi/mpi/man/man3/MPI_File_set_size.3in index fdd7ad94832..5396ea5cedb 100644 --- a/ompi/mpi/man/man3/MPI_File_set_size.3in +++ b/ompi/mpi/man/man3/MPI_File_set_size.3in @@ -2,6 +2,8 @@ .\" Copyright 2010 Cisco Systems, Inc. All rights reserved. .\" Copyright 2006-2008 Sun Microsystems, Inc. .\" Copyright (c) 1996 Thinking Machines Corporation +.\" Copyright 2015 Research Organization for Information Science +.\" and Technology (RIST). All rights reserved. .\" $COPYRIGHT$ .TH MPI_File_set_size 3 "#OMPI_DATE#" "#PACKAGE_VERSION#" "#PACKAGE_NAME#" .SH NAME @@ -10,17 +12,18 @@ .SH SYNTAX .ft R .nf -C Syntax - #include - int MPI_File_set_size(MPI_File \fIfh\fP, MPI_Offset \fIsize\fP) +.SH C Syntax +#include +int MPI_File_set_size(MPI_File \fIfh\fP, MPI_Offset \fIsize\fP) .fi .SH Fortran Syntax (see FORTRAN 77 NOTES) .nf - INCLUDE 'mpif.h' - MPI_FILE_SET_SIZE(\fIFH\fP,\fI SIZE\fP, \fI IERROR\fP) - INTEGER \fIFH, IERROR\fP - INTEGER(KIND=MPI_OFFSET_KIND) \fISIZE\fP +USE MPI +! or the older form: INCLUDE 'mpif.h' +MPI_FILE_SET_SIZE(\fIFH\fP, \fISIZE\fP, \fIIERROR\fP) + INTEGER \fIFH, IERROR\fP + INTEGER(KIND=MPI_OFFSET_KIND) \fISIZE\fP .fi .SH C++ Syntax @@ -32,7 +35,7 @@ void MPI::File::Set_size(MPI::Offset \fIsize\fP) .SH INPUT PARAMETERS .ft R .TP 1i -fh +fh File handle (handle). .TP 1i size @@ -42,7 +45,7 @@ Size to truncate or expand file (integer). .ft R .TP 1i IERROR -Fortran only: Error status (integer). +Fortran only: Error status (integer). .SH DESCRIPTION .ft R @@ -51,14 +54,14 @@ MPI_File_set_size resizes the file associated with the file handle truncating UNIX files as necessary. MPI_File_set_size is collective; all processes in the group must pass identical values for size. .sp -When using MPI_File_set_size on a UNIX file, if \fIsize\fP is larger than the current file size, the file size becomes \fIsize\fP. If \fIsize\fP is smaller than the current file size, the file is truncated at the position defined by \fIsize\fP (from the beginning of the file and measured in bytes). Regions of the file which have been previously written are unaffected. +When using MPI_File_set_size on a UNIX file, if \fIsize\fP is larger than the current file size, the file size becomes \fIsize\fP. If \fIsize\fP is smaller than the current file size, the file is truncated at the position defined by \fIsize\fP (from the beginning of the file and measured in bytes). Regions of the file which have been previously written are unaffected. .sp MPI_File_set_size does not affect the individual file pointers or the shared file pointer. .sp -Note that the actual amount of storage space cannot be allocated by MPI_File_set_size. Use MPI_File_preallocate to accomplish this. +Note that the actual amount of storage space cannot be allocated by MPI_File_set_size. Use MPI_File_preallocate to accomplish this. .sp -It is erroneous to call this function if MPI_MODE_SEQUENTIAL mode was specified when the file was opened. +It is erroneous to call this function if MPI_MODE_SEQUENTIAL mode was specified when the file was opened. .SH FORTRAN 77 NOTES .ft R @@ -77,6 +80,6 @@ and gives the length of the declared integer in bytes. Almost all MPI routines return an error value; C routines as the value of the function and Fortran routines in the last argument. C++ functions do not return errors. If the default error handler is set to MPI::ERRORS_THROW_EXCEPTIONS, then on error the C++ exception mechanism will be used to throw an MPI::Exception object. .sp Before the error value is returned, the current MPI error handler is -called. For MPI I/O function errors, the default error handler is set to MPI_ERRORS_RETURN. The error handler may be changed with MPI_File_set_errhandler; the predefined error handler MPI_ERRORS_ARE_FATAL may be used to make I/O errors fatal. Note that MPI does not guarantee that an MPI program can continue past an error. +called. For MPI I/O function errors, the default error handler is set to MPI_ERRORS_RETURN. The error handler may be changed with MPI_File_set_errhandler; the predefined error handler MPI_ERRORS_ARE_FATAL may be used to make I/O errors fatal. Note that MPI does not guarantee that an MPI program can continue past an error. diff --git a/ompi/mpi/man/man3/MPI_File_set_view.3in b/ompi/mpi/man/man3/MPI_File_set_view.3in index 1a2c36504b9..3dfcdfe06a0 100644 --- a/ompi/mpi/man/man3/MPI_File_set_view.3in +++ b/ompi/mpi/man/man3/MPI_File_set_view.3in @@ -3,6 +3,8 @@ .\" Copyright 2010 Cisco Systems, Inc. All rights reserved. .\" Copyright 2006-2008 Sun Microsystems, Inc. .\" Copyright (c) 1996 Thinking Machines Corporation +.\" Copyright 2015 Research Organization for Information Science +.\" and Technology (RIST). All rights reserved. .\" $COPYRIGHT$ .TH MPI_File_set_view 3 "#OMPI_DATE#" "#PACKAGE_VERSION#" "#PACKAGE_NAME#" .SH NAME @@ -10,27 +12,28 @@ .SH SYNTAX .ft R .nf -C Syntax - #include - int MPI_File_set_view(MPI_File \fIfh\fP, MPI_Offset \fIdisp\fP, - MPI_Datatype \fIetype\fP, MPI_Datatype \fIfiletype\fP, - const char \fI*datarep\fP, MPI_Info \fIinfo\fP) +.SH C Syntax +#include +int MPI_File_set_view(MPI_File \fIfh\fP, MPI_Offset \fIdisp\fP, + MPI_Datatype \fIetype\fP, MPI_Datatype \fIfiletype\fP, + const char \fI*datarep\fP, MPI_Info \fIinfo\fP) .fi .SH Fortran Syntax (see FORTRAN 77 NOTES) .nf - INCLUDE 'mpif.h' - MPI_FILE_SET_VIEW(\fIFH\fP,\fI DISP\fP,\fI ETYPE\fP, - \fI FILETYPE\fP, \fIDATAREP\fP, \fIINFO\fP,\fI IERROR\fP) - INTEGER \fIFH, ETYPE, FILETYPE, INFO, IERROR\fP - CHARACTER*(*) \fIDATAREP\fP - INTEGER(KIND=MPI_OFFSET_KIND) \fIDISP\fP +USE MPI +! or the older form: INCLUDE 'mpif.h' +MPI_FILE_SET_VIEW(\fIFH\fP, \fIDISP\fP, \fIETYPE\fP, + \fIFILETYPE\fP, \fIDATAREP\fP, \fIINFO\fP, \fIIERROR\fP) + INTEGER \fIFH, ETYPE, FILETYPE, INFO, IERROR\fP + CHARACTER*(*) \fIDATAREP\fP + INTEGER(KIND=MPI_OFFSET_KIND) \fIDISP\fP .fi .SH C++ Syntax .nf #include -void MPI::File::Set_view(MPI::Offset \fIdisp\fP, +void MPI::File::Set_view(MPI::Offset \fIdisp\fP, const MPI::Datatype& \fIetype\fP, const MPI::Datatype& \fIfiletype\fP, const char* \fIdatarep\fP, const MPI::Info& \fIinfo\fP) @@ -45,81 +48,81 @@ File handle (handle). .SH INPUT PARAMETERS .ft R .TP 1i -disp +disp Displacement (integer). .TP 1i -etype -Elementary data type (handle). +etype +Elementary data type (handle). .TP 1i filetype File type (handle). See Restrictions, below. .TP 1i datarep -Data representation (string). +Data representation (string). .TP 1i info -Info object (handle). +Info object (handle). .SH OUTPUT PARAMETER .ft R .TP 1i IERROR -Fortran only: Error status (integer). +Fortran only: Error status (integer). .SH DESCRIPTION .ft R The MPI_File_set_view routine changes the process's view of the data in the file -- the beginning of the data accessible in the file through -that view is set to +that view is set to .I disp; -the type of data is set to +the type of data is set to .I etype; -and the distribution of data to processes is set to +and the distribution of data to processes is set to .I filetype. In addition, MPI_File_set_view resets the independent file pointers and -the shared file pointer to zero. MPI_File_set_view is collective across the -.IR fh ; -all processes in the group must pass identical values for -.IR datarep +the shared file pointer to zero. MPI_File_set_view is collective across the +.IR fh ; +all processes in the group must pass identical values for +.IR datarep and provide an .I etype with an identical extent. The values for -.IR disp , +.IR disp , .IR filetype , -and +and .I info may vary. It is erroneous to use the shared file pointer data-access -routines unless identical values for +routines unless identical values for .I disp -and +and .I filetype -are also given. The data types passed in +are also given. The data types passed in .I etype -and +and .I filetype must be committed. .sp -The +The .I disp displacement argument specifies the position (absolute offset in bytes from the beginning of the file) where the view begins. .sp -The MPI_File_set_view interface allows the user to pass a data-representation string to MPI I/O via the \fIdatarep\fP argument. To obtain the default value (or "native"), pass NULL. The user can also pass information via the \fIinfo\fP argument. See the HINTS section for a list of hints that can be set. For more information, see the MPI-2 standard. +The MPI_File_set_view interface allows the user to pass a data-representation string to MPI I/O via the \fIdatarep\fP argument. To obtain the default value (or "native"), pass NULL. The user can also pass information via the \fIinfo\fP argument. See the HINTS section for a list of hints that can be set. For more information, see the MPI-2 standard. .SH HINTS .ft R -The following hints can be used as values for the \fIinfo\fP argument. +The following hints can be used as values for the \fIinfo\fP argument. .sp SETTABLE HINTS: -.sp +.sp - MPI_INFO_NULL .sp -- shared_file_timeout: Amount of time (in seconds) to wait for access to the +- shared_file_timeout: Amount of time (in seconds) to wait for access to the shared file pointer before exiting with MPI_ERR_TIMEDOUT. .sp -- rwlock_timeout: Amount of time (in seconds) to wait for obtaining a read or +- rwlock_timeout: Amount of time (in seconds) to wait for obtaining a read or write lock on a contiguous chunk of a UNIX file before exiting with MPI_ERR_TIMEDOUT. -.sp +.sp - noncoll_read_bufsize: Maximum size of the buffer used by MPI I/O to satisfy read requests in the noncollective data-access routines. (See NOTE, below.) @@ -143,9 +146,9 @@ I/O routines can bind an extra thread to an LWP. .sp - mpiio_coll_contiguous: (boolean) controls whether subsequent collective data accesses will request collectively contiguous regions of the file. .sp -NON-SETTABLE HINTS: +NON-SETTABLE HINTS: .sp -- filename: Access this hint to get the name of the file. +- filename: Access this hint to get the name of the file. .SH FORTRAN 77 NOTES .ft R @@ -164,6 +167,6 @@ and gives the length of the declared integer in bytes. Almost all MPI routines return an error value; C routines as the value of the function and Fortran routines in the last argument. C++ functions do not return errors. If the default error handler is set to MPI::ERRORS_THROW_EXCEPTIONS, then on error the C++ exception mechanism will be used to throw an MPI::Exception object. .sp Before the error value is returned, the current MPI error handler is -called. For MPI I/O function errors, the default error handler is set to MPI_ERRORS_RETURN. The error handler may be changed with MPI_File_set_errhandler; the predefined error handler MPI_ERRORS_ARE_FATAL may be used to make I/O errors fatal. Note that MPI does not guarantee that an MPI program can continue past an error. +called. For MPI I/O function errors, the default error handler is set to MPI_ERRORS_RETURN. The error handler may be changed with MPI_File_set_errhandler; the predefined error handler MPI_ERRORS_ARE_FATAL may be used to make I/O errors fatal. Note that MPI does not guarantee that an MPI program can continue past an error. diff --git a/ompi/mpi/man/man3/MPI_File_sync.3in b/ompi/mpi/man/man3/MPI_File_sync.3in index 35883fb2acb..dea6bc388d5 100644 --- a/ompi/mpi/man/man3/MPI_File_sync.3in +++ b/ompi/mpi/man/man3/MPI_File_sync.3in @@ -2,24 +2,27 @@ .\" Copyright 2010 Cisco Systems, Inc. All rights reserved. .\" Copyright 2006-2008 Sun Microsystems, Inc. .\" Copyright (c) 1996 Thinking Machines Corporation +.\" Copyright 2015 Research Organization for Information Science +.\" and Technology (RIST). All rights reserved. .\" $COPYRIGHT$ .TH MPI_File_sync 3 "#OMPI_DATE#" "#PACKAGE_VERSION#" "#PACKAGE_NAME#" .SH NAME -\fBMPI_File_sync\fP \- Makes semantics consistent for data-access operations (collective). +\fBMPI_File_sync\fP \- Makes semantics consistent for data-access operations (collective). .SH SYNTAX .ft R .nf -C Syntax - #include - int MPI_File_sync(MPI_File \fIfh\fP) +.SH C Syntax +#include +int MPI_File_sync(MPI_File \fIfh\fP) .fi .SH Fortran Syntax .nf - INCLUDE 'mpif.h' - MPI_FILE_SYNC(\fIFH\fP, \fIIERROR\fP) - INTEGER FH, IERROR +USE MPI +! or the older form: INCLUDE 'mpif.h' +MPI_FILE_SYNC(\fIFH\fP, \fIIERROR\fP) + INTEGER \fIFH, IERROR\fP .fi .SH C++ Syntax @@ -38,19 +41,19 @@ File handle (handle). .ft R .TP 1i IERROR -Fortran only: Error status (integer). +Fortran only: Error status (integer). .SH DESCRIPTION .ft R Calling MPI_File_sync with .I fh -causes all previous writes to +causes all previous writes to .I fh -by the calling process to be written to permanent storage. If other processes have made updates to permanent storage, then all such updates become visible to subsequent reads of +by the calling process to be written to permanent storage. If other processes have made updates to permanent storage, then all such updates become visible to subsequent reads of .I fh by the calling process. .sp -MPI_File_sync is a collective operation. The user is responsible for ensuring that all nonblocking requests on +MPI_File_sync is a collective operation. The user is responsible for ensuring that all nonblocking requests on .I fh have been completed before calling MPI_File_sync. Otherwise, the call to MPI_File_sync is erroneous. @@ -58,6 +61,6 @@ have been completed before calling MPI_File_sync. Otherwise, the call to MPI_Fil Almost all MPI routines return an error value; C routines as the value of the function and Fortran routines in the last argument. C++ functions do not return errors. If the default error handler is set to MPI::ERRORS_THROW_EXCEPTIONS, then on error the C++ exception mechanism will be used to throw an MPI::Exception object. .sp Before the error value is returned, the current MPI error handler is -called. For MPI I/O function errors, the default error handler is set to MPI_ERRORS_RETURN. The error handler may be changed with MPI_File_set_errhandler; the predefined error handler MPI_ERRORS_ARE_FATAL may be used to make I/O errors fatal. Note that MPI does not guarantee that an MPI program can continue past an error. +called. For MPI I/O function errors, the default error handler is set to MPI_ERRORS_RETURN. The error handler may be changed with MPI_File_set_errhandler; the predefined error handler MPI_ERRORS_ARE_FATAL may be used to make I/O errors fatal. Note that MPI does not guarantee that an MPI program can continue past an error. diff --git a/ompi/mpi/man/man3/MPI_File_write.3in b/ompi/mpi/man/man3/MPI_File_write.3in index 89021362796..c03031b8a15 100644 --- a/ompi/mpi/man/man3/MPI_File_write.3in +++ b/ompi/mpi/man/man3/MPI_File_write.3in @@ -3,6 +3,8 @@ .\" Copyright 2010 Cisco Systems, Inc. All rights reserved. .\" Copyright 2006-2008 Sun Microsystems, Inc. .\" Copyright (c) 1996 Thinking Machines Corporation +.\" Copyright 2015 Research Organization for Information Science +.\" and Technology (RIST). All rights reserved. .\" $COPYRIGHT$ .TH MPI_File_write 3 "#OMPI_DATE#" "#PACKAGE_VERSION#" "#PACKAGE_NAME#" .SH NAME @@ -11,21 +13,21 @@ .SH SYNTAX .ft R .nf -C Syntax - #include - int MPI_File_write(MPI_File \fIfh\fP, const void \fI*buf\fP, - int \fIcount\fP, MPI_Datatype \fIdatatype\fP, +.SH C Syntax +#include +int MPI_File_write(MPI_File \fIfh\fP, const void \fI*buf\fP, + int \fIcount\fP, MPI_Datatype \fIdatatype\fP, MPI_Status \fI*status\fP) .fi .SH Fortran Syntax .nf - INCLUDE 'mpif.h' - MPI_FILE_WRITE(\fIFH\fP, \fI BUF\fP, \fICOUNT\fP, - \fI DATATYPE\fP, \fISTATUS\fP, \fI IERROR\fP) - \fIBUF\fP(*) - INTEGER \fIFH, COUNT, DATATYPE, STATUS(MPI_STATUS_SIZE), - IERROR\fP +USE MPI +! or the older form: INCLUDE 'mpif.h' +MPI_FILE_WRITE(\fIFH\fP, \fIBUF\fP, \fICOUNT\fP, + \fIDATATYPE\fP, \fISTATUS\fP, \fIIERROR\fP) + \fIBUF\fP(*) + INTEGER \fIFH, COUNT, DATATYPE, STATUS(MPI_STATUS_SIZE), IERROR\fP .fi .SH C++ Syntax @@ -63,33 +65,33 @@ status Status object (status). .TP 1i IERROR -Fortran only: Error status (integer). +Fortran only: Error status (integer). .SH DESCRIPTION .ft R -MPI_File_write attempts to write into the file associated with +MPI_File_write attempts to write into the file associated with .I fh -(at the current individual file pointer position maintained by the system) a total number of -.I count +(at the current individual file pointer position maintained by the system) a total number of +.I count data items having -.I datatype -type from the user's buffer +.I datatype +type from the user's buffer .I buf. The data is written into those parts of the file specified by the current view. MPI_File_write stores the -number of -.I datatype -elements actually written in -.I status. -All other fields of -.I status +number of +.I datatype +elements actually written in +.I status. +All other fields of +.I status are undefined. .sp -It is erroneous to call this function if MPI_MODE_SEQUENTIAL mode was specified when the file was opened. +It is erroneous to call this function if MPI_MODE_SEQUENTIAL mode was specified when the file was opened. .SH ERRORS Almost all MPI routines return an error value; C routines as the value of the function and Fortran routines in the last argument. C++ functions do not return errors. If the default error handler is set to MPI::ERRORS_THROW_EXCEPTIONS, then on error the C++ exception mechanism will be used to throw an MPI::Exception object. .sp Before the error value is returned, the current MPI error handler is -called. For MPI I/O function errors, the default error handler is set to MPI_ERRORS_RETURN. The error handler may be changed with MPI_File_set_errhandler; the predefined error handler MPI_ERRORS_ARE_FATAL may be used to make I/O errors fatal. Note that MPI does not guarantee that an MPI program can continue past an error. +called. For MPI I/O function errors, the default error handler is set to MPI_ERRORS_RETURN. The error handler may be changed with MPI_File_set_errhandler; the predefined error handler MPI_ERRORS_ARE_FATAL may be used to make I/O errors fatal. Note that MPI does not guarantee that an MPI program can continue past an error. diff --git a/ompi/mpi/man/man3/MPI_File_write_all.3in b/ompi/mpi/man/man3/MPI_File_write_all.3in index 9c85de90da0..624e191d006 100644 --- a/ompi/mpi/man/man3/MPI_File_write_all.3in +++ b/ompi/mpi/man/man3/MPI_File_write_all.3in @@ -3,6 +3,8 @@ .\" Copyright 2010 Cisco Systems, Inc. All rights reserved. .\" Copyright 2006-2008 Sun Microsystems, Inc. .\" Copyright (c) 1996 Thinking Machines Corporation +.\" Copyright 2015 Research Organization for Information Science +.\" and Technology (RIST). All rights reserved. .\" $COPYRIGHT$ .TH MPI_File_write_all 3 "#OMPI_DATE#" "#PACKAGE_VERSION#" "#PACKAGE_NAME#" .SH NAME @@ -11,20 +13,20 @@ .SH SYNTAX .ft R .nf -C Syntax - #include - int MPI_File_write_all(MPI_File \fIfh\fP, const void \fI*buf\fP, - int \fIcount\fP, MPI_Datatype \fIdatatype\fP, MPI_Status \fI*status\fP) +.SH C Syntax +#include +int MPI_File_write_all(MPI_File \fIfh\fP, const void \fI*buf\fP, + int \fIcount\fP, MPI_Datatype \fIdatatype\fP, MPI_Status \fI*status\fP) .fi .SH Fortran Syntax .nf - INCLUDE 'mpif.h' - MPI_FILE_WRITE_ALL(\fIFH\fP, \fI BUF\fP, \fICOUNT\fP, - \fI DATATYPE\fP, \fISTATUS\fP, \fI IERROR\fP) - BUF(*) - INTEGER FH, COUNT, DATATYPE, STATUS(MPI_STATUS_SIZE), - IERROR +USE MPI +! or the older form: INCLUDE 'mpif.h' +MPI_FILE_WRITE_ALL(\fIFH\fP, \fIBUF\fP, \fICOUNT\fP, + \fIDATATYPE\fP, \fISTATUS\fP, \fIIERROR\fP) + \fIBUF(*)\fP + INTEGER \fIFH, COUNT, DATATYPE, STATUS(MPI_STATUS_SIZE), IERROR\fP .fi .SH C++ Syntax @@ -59,33 +61,33 @@ status Status object (status). .TP 1i IERROR -Fortran only: Error status (integer). +Fortran only: Error status (integer). .SH DESCRIPTION .ft R -MPI_File_write_all is a collective routine that attempts to write into the file associated with +MPI_File_write_all is a collective routine that attempts to write into the file associated with .I fh -(at the current individual file pointer position maintained by the system) a total number of -.I count +(at the current individual file pointer position maintained by the system) a total number of +.I count data items having -.I datatype -type from the user's buffer +.I datatype +type from the user's buffer .I buf. The data is written into those parts of the file specified by the current view. MPI_File_write_all stores the -number of -.I datatype -elements actually written in -.I status. -All other fields of -.I status +number of +.I datatype +elements actually written in +.I status. +All other fields of +.I status are undefined. .sp -It is erroneous to call this function if MPI_MODE_SEQUENTIAL mode was specified when the file was opened. +It is erroneous to call this function if MPI_MODE_SEQUENTIAL mode was specified when the file was opened. .SH ERRORS Almost all MPI routines return an error value; C routines as the value of the function and Fortran routines in the last argument. C++ functions do not return errors. If the default error handler is set to MPI::ERRORS_THROW_EXCEPTIONS, then on error the C++ exception mechanism will be used to throw an MPI::Exception object. .sp Before the error value is returned, the current MPI error handler is -called. For MPI I/O function errors, the default error handler is set to MPI_ERRORS_RETURN. The error handler may be changed with MPI_File_set_errhandler; the predefined error handler MPI_ERRORS_ARE_FATAL may be used to make I/O errors fatal. Note that MPI does not guarantee that an MPI program can continue past an error. +called. For MPI I/O function errors, the default error handler is set to MPI_ERRORS_RETURN. The error handler may be changed with MPI_File_set_errhandler; the predefined error handler MPI_ERRORS_ARE_FATAL may be used to make I/O errors fatal. Note that MPI does not guarantee that an MPI program can continue past an error. diff --git a/ompi/mpi/man/man3/MPI_File_write_all_begin.3in b/ompi/mpi/man/man3/MPI_File_write_all_begin.3in index 017f0f09981..0f262d9ebf5 100644 --- a/ompi/mpi/man/man3/MPI_File_write_all_begin.3in +++ b/ompi/mpi/man/man3/MPI_File_write_all_begin.3in @@ -3,39 +3,42 @@ .\" Copyright 2010 Cisco Systems, Inc. All rights reserved. .\" Copyright 2006-2008 Sun Microsystems, Inc. .\" Copyright (c) 1996 Thinking Machines Corporation +.\" Copyright 2015 Research Organization for Information Science +.\" and Technology (RIST). All rights reserved. .\" $COPYRIGHT$ .TH MPI_File_write_all_begin 3 "#OMPI_DATE#" "#PACKAGE_VERSION#" "#PACKAGE_NAME#" .SH NAME -\fBMPI_File_write_all_begin\fP \- Writes a file starting at the locations specified by individual file pointers; beginning part of a split collective routine (nonblocking). +\fBMPI_File_write_all_begin\fP \- Writes a file starting at the locations specified by individual file pointers; beginning part of a split collective routine (nonblocking). .SH SYNTAX .ft R .nf -C Syntax - #include - int MPI_File_write_all_begin(MPI_File \fIfh\fP, const void \fI*buf\fP, - int \fIcount\fP, MPI_Datatype \fIdatatype\fP) +.SH C Syntax +#include +int MPI_File_write_all_begin(MPI_File \fIfh\fP, const void \fI*buf\fP, + int \fIcount\fP, MPI_Datatype \fIdatatype\fP) .fi .SH Fortran Syntax .nf - INCLUDE 'mpif.h' - MPI_FILE_WRITE_ALL_BEGIN(\fIFH\fP, \fIBUF\fP, \fICOUNT\fP, \fIDATATYPE\fP,\fI IERROR\fP) - BUF(*) - INTEGER FH, COUNT, DATATYPE, IERROR +USE MPI +! or the older form: INCLUDE 'mpif.h' +MPI_FILE_WRITE_ALL_BEGIN(\fIFH\fP, \fIBUF\fP, \fICOUNT\fP, \fIDATATYPE\fP, \fIIERROR\fP) + \fIBUF(*)\fP + INTEGER \fIFH, COUNT, DATATYPE, IERROR\fP .fi .SH C++ Syntax .nf #include -void MPI::File::Write_all_begin(const void* \fIbuf\fP, int \fIcount\fP, +void MPI::File::Write_all_begin(const void* \fIbuf\fP, int \fIcount\fP, const MPI::Datatype& \fIdatatype\fP) .fi .SH INPUT/OUTPUT PARAMETER .ft R .TP 1i -fh +fh File handle (handle). .SH INPUT PARAMETERS @@ -56,30 +59,30 @@ Data type of each buffer element (handle). .ft R .TP 1i IERROR -Fortran only: Error status (integer). +Fortran only: Error status (integer). .SH DESCRIPTION .ft R -MPI_File_write_all_begin is the beginning part of a split collective, nonblocking routine that attempts to write into the file associated with +MPI_File_write_all_begin is the beginning part of a split collective, nonblocking routine that attempts to write into the file associated with .I fh -(at the current individual file pointer position maintained by the system) a total number of -.I count +(at the current individual file pointer position maintained by the system) a total number of +.I count data items having -.I datatype -type from the user's buffer +.I datatype +type from the user's buffer .I buf. The data is written into those parts of the -file specified by the current view. +file specified by the current view. .SH NOTES .ft R -All the nonblocking collective routines for data access are "split" into two routines, each with _begin or _end as a suffix. These split collective routines are subject to the semantic rules described in Section 9.4.5 of the MPI-2 standard. +All the nonblocking collective routines for data access are "split" into two routines, each with _begin or _end as a suffix. These split collective routines are subject to the semantic rules described in Section 9.4.5 of the MPI-2 standard. .SH ERRORS Almost all MPI routines return an error value; C routines as the value of the function and Fortran routines in the last argument. C++ functions do not return errors. If the default error handler is set to MPI::ERRORS_THROW_EXCEPTIONS, then on error the C++ exception mechanism will be used to throw an MPI::Exception object. .sp Before the error value is returned, the current MPI error handler is -called. For MPI I/O function errors, the default error handler is set to MPI_ERRORS_RETURN. The error handler may be changed with MPI_File_set_errhandler; the predefined error handler MPI_ERRORS_ARE_FATAL may be used to make I/O errors fatal. Note that MPI does not guarantee that an MPI program can continue past an error. +called. For MPI I/O function errors, the default error handler is set to MPI_ERRORS_RETURN. The error handler may be changed with MPI_File_set_errhandler; the predefined error handler MPI_ERRORS_ARE_FATAL may be used to make I/O errors fatal. Note that MPI does not guarantee that an MPI program can continue past an error. diff --git a/ompi/mpi/man/man3/MPI_File_write_all_end.3in b/ompi/mpi/man/man3/MPI_File_write_all_end.3in index f7f9eda1753..fbaa3dde59d 100644 --- a/ompi/mpi/man/man3/MPI_File_write_all_end.3in +++ b/ompi/mpi/man/man3/MPI_File_write_all_end.3in @@ -3,25 +3,28 @@ .\" Copyright 2010 Cisco Systems, Inc. All rights reserved. .\" Copyright 2006-2008 Sun Microsystems, Inc. .\" Copyright (c) 1996 Thinking Machines Corporation +.\" Copyright 2015 Research Organization for Information Science +.\" and Technology (RIST). All rights reserved. .\" $COPYRIGHT$ .TH MPI_File_write_all_end 3 "#OMPI_DATE#" "#PACKAGE_VERSION#" "#PACKAGE_NAME#" .SH NAME -\fBMPI_File_write_all_end\fP \- Writes a file starting at the locations specified by individual file pointers; ending part of a split collective routine (blocking). +\fBMPI_File_write_all_end\fP \- Writes a file starting at the locations specified by individual file pointers; ending part of a split collective routine (blocking). .SH SYNTAX .ft R .nf -C Syntax - #include - int MPI_File_write_all_end(MPI_File \fIfh\fP, const void \fI*buf\fP, MPI_Status \fI*status\fP) +.SH C Syntax +#include +int MPI_File_write_all_end(MPI_File \fIfh\fP, const void \fI*buf\fP, MPI_Status \fI*status\fP) .fi .SH Fortran Syntax .nf - INCLUDE 'mpif.h' - MPI_FILE_WRITE_ALL_END(\fIFH\fP, \fIBUF\fP, \fISTATUS\fP,\fI IERROR\fP) - BUF(*) - INTEGER FH, STATUS, IERROR +USE MPI +! or the older form: INCLUDE 'mpif.h' +MPI_FILE_WRITE_ALL_END(\fIFH\fP, \fIBUF\fP, \fISTATUS\fP, \fIIERROR\fP) + \fIBUF(*)\fP + INTEGER \fIFH, STATUS, IERROR\fP .fi .SH C++ Syntax @@ -35,14 +38,14 @@ void MPI::File::Write_all_end(const void* \fIbuf\fP) .SH INPUT/OUTPUT PARAMETER .ft R .TP 1i -fh +fh File handle (handle). .SH INPUT PARAMETER .ft R .TP 1i buf -Initial address of buffer (choice). +Initial address of buffer (choice). .SH OUTPUT PARAMETERS .ft R @@ -51,28 +54,28 @@ status Status object (status). .TP 1i IERROR -Fortran only: Error status (integer). +Fortran only: Error status (integer). .SH DESCRIPTION .ft R MPI_File_write_all_end is the ending part of a split collective routine that stores the -number of elements actually written into the file associated with +number of elements actually written into the file associated with .I fh -from the user's buffer +from the user's buffer .I buf in -.I status. +.I status. MPI_File_write_all_end blocks until the operation initiated by MPI_File_write_all_begin completes. The data is written into those parts of the -file specified by the current view. All other fields of -.I status +file specified by the current view. All other fields of +.I status are undefined. .SH NOTES .ft R -All the nonblocking collective routines for data access are "split" into two routines, each with _begin or _end as a suffix. These split collective routines are subject to the semantic rules described in Section 9.4.5 of the MPI-2 standard. +All the nonblocking collective routines for data access are "split" into two routines, each with _begin or _end as a suffix. These split collective routines are subject to the semantic rules described in Section 9.4.5 of the MPI-2 standard. .SH ERRORS Almost all MPI routines return an error value; C routines as the value of the function and Fortran routines in the last argument. C++ functions do not return errors. If the default error handler is set to MPI::ERRORS_THROW_EXCEPTIONS, then on error the C++ exception mechanism will be used to throw an MPI::Exception object. .sp Before the error value is returned, the current MPI error handler is -called. For MPI I/O function errors, the default error handler is set to MPI_ERRORS_RETURN. The error handler may be changed with MPI_File_set_errhandler; the predefined error handler MPI_ERRORS_ARE_FATAL may be used to make I/O errors fatal. Note that MPI does not guarantee that an MPI program can continue past an error. +called. For MPI I/O function errors, the default error handler is set to MPI_ERRORS_RETURN. The error handler may be changed with MPI_File_set_errhandler; the predefined error handler MPI_ERRORS_ARE_FATAL may be used to make I/O errors fatal. Note that MPI does not guarantee that an MPI program can continue past an error. diff --git a/ompi/mpi/man/man3/MPI_File_write_at.3in b/ompi/mpi/man/man3/MPI_File_write_at.3in index c986ee83b28..9ac6cfb6a54 100644 --- a/ompi/mpi/man/man3/MPI_File_write_at.3in +++ b/ompi/mpi/man/man3/MPI_File_write_at.3in @@ -3,6 +3,8 @@ .\" Copyright 2010 Cisco Systems, Inc. All rights reserved. .\" Copyright 2006-2008 Sun Microsystems, Inc. .\" Copyright (c) 1996 Thinking Machines Corporation +.\" Copyright 2015 Research Organization for Information Science +.\" and Technology (RIST). All rights reserved. .\" $COPYRIGHT$ .TH MPI_File_write_at 3 "#OMPI_DATE#" "#PACKAGE_VERSION#" "#PACKAGE_NAME#" .SH NAME @@ -11,29 +13,30 @@ .SH SYNTAX .ft R .nf -C Syntax - #include - int MPI_File_write_at(MPI_File \fIfh\fP, MPI_Offset \fIoffset\fP, const void \fI*buf\fP, - int \fIcount\fP, MPI_Datatype \fIdatatype\fP, MPI_Status \fI*status\fP) +.SH C Syntax +#include +int MPI_File_write_at(MPI_File \fIfh\fP, MPI_Offset \fIoffset\fP, const void \fI*buf\fP, + int \fIcount\fP, MPI_Datatype \fIdatatype\fP, MPI_Status \fI*status\fP) .fi .SH Fortran Syntax (see FORTRAN 77 NOTES) .nf - INCLUDE 'mpif.h' - MPI_FILE_WRITE_AT(\fIFH\fP, \fI OFFSET\fP, \fI BUF\fP, \fICOUNT\fP, - \fI DATATYPE\fP, \fISTATUS\fP, \fI IERROR\fP) - \fIBUF\fP(*) - INTEGER \fIFH, COUNT, DATATYPE, STATUS(MPI_STATUS_SIZE), IERROR\fP - INTEGER(KIND=MPI_OFFSET_KIND) \fIOFFSET\fP +USE MPI +! or the older form: INCLUDE 'mpif.h' +MPI_FILE_WRITE_AT(\fIFH\fP, \fIOFFSET\fP, \fIBUF\fP, \fICOUNT\fP, + \fIDATATYPE\fP, \fISTATUS\fP, \fIIERROR\fP) + \fIBUF\fP(*) + INTEGER \fIFH, COUNT, DATATYPE, STATUS(MPI_STATUS_SIZE), IERROR\fP + INTEGER(KIND=MPI_OFFSET_KIND) \fIOFFSET\fP .fi .SH C++ Syntax .nf #include -void MPI::File::Write_at(MPI::Offset \fIoffset\fP, const void* \fIbuf\fP, +void MPI::File::Write_at(MPI::Offset \fIoffset\fP, const void* \fIbuf\fP, int \fIcount\fP, const MPI::Datatype& \fIdatatype\fP, MPI::Status& \fIstatus\fP) -void MPI::File::Write_at(MPI::Offset \fIoffset\fP, const void* \fIbuf\fP, +void MPI::File::Write_at(MPI::Offset \fIoffset\fP, const void* \fIbuf\fP, int \fIcount\fP, const MPI::Datatype& \fIdatatype\fP) .fi @@ -62,34 +65,34 @@ status Status object (status). .TP 1i IERROR -Fortran only: Error status (integer). +Fortran only: Error status (integer). .SH DESCRIPTION .ft R -MPI_File_write_at attempts to write into the file associated with +MPI_File_write_at attempts to write into the file associated with .I fh -(at the -.I offset -position) a total number of -.I count +(at the +.I offset +position) a total number of +.I count data items having -.I datatype -type from the user's buffer +.I datatype +type from the user's buffer .I buf. -The offset is in +The offset is in .I etype units relative to the current view. That is, holes are not counted when locating an offset. The data is written into those parts of the file specified by the current view. MPI_File_write_at stores the -number of -.I datatype -elements actually written in -.I status. -All other fields of -.I status +number of +.I datatype +elements actually written in +.I status. +All other fields of +.I status are undefined. .sp -It is erroneous to call this function if MPI_MODE_SEQUENTIAL mode was specified when the file was opened. +It is erroneous to call this function if MPI_MODE_SEQUENTIAL mode was specified when the file was opened. .SH FORTRAN 77 NOTES .ft R @@ -108,7 +111,7 @@ and gives the length of the declared integer in bytes. Almost all MPI routines return an error value; C routines as the value of the function and Fortran routines in the last argument. C++ functions do not return errors. If the default error handler is set to MPI::ERRORS_THROW_EXCEPTIONS, then on error the C++ exception mechanism will be used to throw an MPI::Exception object. .sp Before the error value is returned, the current MPI error handler is -called. For MPI I/O function errors, the default error handler is set to MPI_ERRORS_RETURN. The error handler may be changed with MPI_File_set_errhandler; the predefined error handler MPI_ERRORS_ARE_FATAL may be used to make I/O errors fatal. Note that MPI does not guarantee that an MPI program can continue past an error. +called. For MPI I/O function errors, the default error handler is set to MPI_ERRORS_RETURN. The error handler may be changed with MPI_File_set_errhandler; the predefined error handler MPI_ERRORS_ARE_FATAL may be used to make I/O errors fatal. Note that MPI does not guarantee that an MPI program can continue past an error. .SH SEE ALSO .ft R diff --git a/ompi/mpi/man/man3/MPI_File_write_at_all.3in b/ompi/mpi/man/man3/MPI_File_write_at_all.3in index 2fa85bcdb4e..eff151f07c9 100644 --- a/ompi/mpi/man/man3/MPI_File_write_at_all.3in +++ b/ompi/mpi/man/man3/MPI_File_write_at_all.3in @@ -3,6 +3,8 @@ .\" Copyright 2010 Cisco Systems, Inc. All rights reserved. .\" Copyright 2006-2008 Sun Microsystems, Inc. .\" Copyright (c) 1996 Thinking Machines Corporation +.\" Copyright 2015 Research Organization for Information Science +.\" and Technology (RIST). All rights reserved. .\" $COPYRIGHT$ .TH MPI_File_write_at_all 3 "#OMPI_DATE#" "#PACKAGE_VERSION#" "#PACKAGE_NAME#" .SH NAME @@ -11,27 +13,28 @@ .SH SYNTAX .ft R .nf -C Syntax - #include - int MPI_File_write_at_all(MPI_File \fIfh\fP, MPI_Offset \fIoffset\fP, const void \fI*buf\fP, - int \fIcount\fP, MPI_Datatype \fIdatatype\fP, MPI_Status \fI*status\fP) +.SH C Syntax +#include +int MPI_File_write_at_all(MPI_File \fIfh\fP, MPI_Offset \fIoffset\fP, const void \fI*buf\fP, + int \fIcount\fP, MPI_Datatype \fIdatatype\fP, MPI_Status \fI*status\fP) .fi .SH Fortran Syntax (see FORTRAN 77 NOTES) .nf - INCLUDE 'mpif.h' - MPI_FILE_WRITE_AT_ALL(\fIFH\fP, \fI OFFSET\fP, \fI BUF\fP, \fICOUNT\fP, - \fI DATATYPE\fP, \fISTATUS\fP, \fI IERROR\fP) - \fIBUF\fP(*) - INTEGER \fIFH, COUNT, DATATYPE, STATUS(MPI_STATUS_SIZE), IERROR\fP - INTEGER(KIND=MPI_OFFSET_KIND) \fIOFFSET\fP +USE MPI +! or the older form: INCLUDE 'mpif.h' +MPI_FILE_WRITE_AT_ALL(\fIFH\fP, \fIOFFSET\fP, \fIBUF\fP, \fICOUNT\fP, + \fIDATATYPE\fP, \fISTATUS\fP, \fIIERROR\fP) + \fIBUF\fP(*) + INTEGER \fIFH, COUNT, DATATYPE, STATUS(MPI_STATUS_SIZE), IERROR\fP + INTEGER(KIND=MPI_OFFSET_KIND) \fIOFFSET\fP .fi .SH C++ Syntax .nf #include void MPI::File::Write_at_all(MPI::Offset \fIoffset\fP, const void* \fIbuf\fP, - int \fIcount\fP, const MPI::Datatype& \fIdatatype\fP, + int \fIcount\fP, const MPI::Datatype& \fIdatatype\fP, MPI::Status& \fIstatus\fP) void MPI::File::Write_at_all(MPI::Offset \fIoffset\fP, const void* \fIbuf\fP, @@ -63,32 +66,32 @@ status Status object (status). .TP 1i IERROR -Fortran only: Error status (integer). +Fortran only: Error status (integer). .SH DESCRIPTION .ft R -MPI_File_write_at_all is a collective routine that attempts to write into the file associated with +MPI_File_write_at_all is a collective routine that attempts to write into the file associated with .I fh -(at the -.I offset -position) a total number of -.I count +(at the +.I offset +position) a total number of +.I count data items having -.I datatype -type from the user's buffer +.I datatype +type from the user's buffer .I buf. The offset is in etype units relative to the current view. That is, holes are not counted when locating an offset. The data is written into those parts of the file specified by the current view. MPI_File_write_at_all stores the -number of -.I datatype -elements actually written in -.I status. -All other fields of -.I status +number of +.I datatype +elements actually written in +.I status. +All other fields of +.I status are undefined. .sp -It is erroneous to call this function if MPI_MODE_SEQUENTIAL mode was specified when the file was opened. +It is erroneous to call this function if MPI_MODE_SEQUENTIAL mode was specified when the file was opened. .SH FORTRAN 77 NOTES .ft R @@ -107,5 +110,5 @@ and gives the length of the declared integer in bytes. Almost all MPI routines return an error value; C routines as the value of the function and Fortran routines in the last argument. C++ functions do not return errors. If the default error handler is set to MPI::ERRORS_THROW_EXCEPTIONS, then on error the C++ exception mechanism will be used to throw an MPI::Exception object. .sp Before the error value is returned, the current MPI error handler is -called. For MPI I/O function errors, the default error handler is set to MPI_ERRORS_RETURN. The error handler may be changed with MPI_File_set_errhandler; the predefined error handler MPI_ERRORS_ARE_FATAL may be used to make I/O errors fatal. Note that MPI does not guarantee that an MPI program can continue past an error. +called. For MPI I/O function errors, the default error handler is set to MPI_ERRORS_RETURN. The error handler may be changed with MPI_File_set_errhandler; the predefined error handler MPI_ERRORS_ARE_FATAL may be used to make I/O errors fatal. Note that MPI does not guarantee that an MPI program can continue past an error. diff --git a/ompi/mpi/man/man3/MPI_File_write_at_all_begin.3in b/ompi/mpi/man/man3/MPI_File_write_at_all_begin.3in index 65e927b0e4a..6fadc8e97fc 100644 --- a/ompi/mpi/man/man3/MPI_File_write_at_all_begin.3in +++ b/ompi/mpi/man/man3/MPI_File_write_at_all_begin.3in @@ -3,6 +3,8 @@ .\" Copyright 2010 Cisco Systems, Inc. All rights reserved. .\" Copyright 2006-2008 Sun Microsystems, Inc. .\" Copyright (c) 1996 Thinking Machines Corporation +.\" Copyright 2015 Research Organization for Information Science +.\" and Technology (RIST). All rights reserved. .\" $COPYRIGHT$ .TH MPI_File_write_at_all_begin 3 "#OMPI_DATE#" "#PACKAGE_VERSION#" "#PACKAGE_NAME#" .SH NAME @@ -11,33 +13,34 @@ .SH SYNTAX .ft R .nf -C Syntax - #include - int MPI_File_write_at_all_begin(MPI_File \fIfh\fP, MPI_Offset \fIoffset\fP, - const void \fI*buf\fP, int \fIcount\fP, MPI_Datatype \fIdatatype\fP) +.SH C Syntax +#include +int MPI_File_write_at_all_begin(MPI_File \fIfh\fP, MPI_Offset \fIoffset\fP, + const void \fI*buf\fP, int \fIcount\fP, MPI_Datatype \fIdatatype\fP) .fi .SH Fortran Syntax (see FORTRAN 77 NOTES) .nf - INCLUDE 'mpif.h' - MPI_FILE_WRITE_AT_ALL_BEGIN(\fIFH\fP, \fIOFFSET\fP, \fIBUF\fP, \fICOUNT\fP, \fIDATATYPE\fP,\fI IERROR\fP) - \fIBUF\fP(*) - INTEGER \fIFH, COUNT, DATATYPE, IERROR\fP - INTEGER(KIND=MPI_OFFSET_KIND) \fIOFFSET\fP +USE MPI +! or the older form: INCLUDE 'mpif.h' +MPI_FILE_WRITE_AT_ALL_BEGIN(\fIFH\fP, \fIOFFSET\fP, \fIBUF\fP, \fICOUNT\fP, \fIDATATYPE\fP, \fIIERROR\fP) + \fIBUF\fP(*) + INTEGER \fIFH, COUNT, DATATYPE, IERROR\fP + INTEGER(KIND=MPI_OFFSET_KIND) \fIOFFSET\fP .fi .SH C++ Syntax .nf #include -void MPI::File::Write_at_all_begin(MPI::Offset \fIoffset\fP, - const void* \fIbuf\fP, int \fIcount\fP, +void MPI::File::Write_at_all_begin(MPI::Offset \fIoffset\fP, + const void* \fIbuf\fP, int \fIcount\fP, const MPI::Datatype& \fIdatatype\fP) .fi .SH INPUT/OUTPUT PARAMETER .ft R .TP 1i -fh +fh File handle (handle). .SH INPUT PARAMETERS @@ -56,29 +59,29 @@ Number of elements in buffer (integer). .ft R .TP 1i datatype -Data type of each buffer element (handle). +Data type of each buffer element (handle). .SH OUTPUT PARAMETER .ft R .TP 1i IERROR -Fortran only: Error status (integer). +Fortran only: Error status (integer). .SH DESCRIPTION .ft R -MPI_File_write_at_all_begin is the beginning part of a split collective, that is, a nonblocking routine that attempts to write into the file associated with +MPI_File_write_at_all_begin is the beginning part of a split collective, that is, a nonblocking routine that attempts to write into the file associated with .I fh -(at the -.I offset -position) a total number of -.I count +(at the +.I offset +position) a total number of +.I count data items having -.I datatype -type from the user's buffer +.I datatype +type from the user's buffer .I buf. The offset is in etype units relative to the current view. That is, holes are not counted when locating an offset. The data is written into those parts of the -file specified by the current view. +file specified by the current view. .SH FORTRAN 77 NOTES .ft R @@ -95,11 +98,11 @@ and gives the length of the declared integer in bytes. .SH NOTES .ft R -All the nonblocking collective routines for data access are "split" into two routines, each with _begin or _end as a suffix. These split collective routines are subject to the semantic rules described in Section 9.4.5 of the MPI-2 standard. +All the nonblocking collective routines for data access are "split" into two routines, each with _begin or _end as a suffix. These split collective routines are subject to the semantic rules described in Section 9.4.5 of the MPI-2 standard. .SH ERRORS Almost all MPI routines return an error value; C routines as the value of the function and Fortran routines in the last argument. C++ functions do not return errors. If the default error handler is set to MPI::ERRORS_THROW_EXCEPTIONS, then on error the C++ exception mechanism will be used to throw an MPI::Exception object. .sp Before the error value is returned, the current MPI error handler is -called. For MPI I/O function errors, the default error handler is set to MPI_ERRORS_RETURN. The error handler may be changed with MPI_File_set_errhandler; the predefined error handler MPI_ERRORS_ARE_FATAL may be used to make I/O errors fatal. Note that MPI does not guarantee that an MPI program can continue past an error. +called. For MPI I/O function errors, the default error handler is set to MPI_ERRORS_RETURN. The error handler may be changed with MPI_File_set_errhandler; the predefined error handler MPI_ERRORS_ARE_FATAL may be used to make I/O errors fatal. Note that MPI does not guarantee that an MPI program can continue past an error. diff --git a/ompi/mpi/man/man3/MPI_File_write_at_all_end.3in b/ompi/mpi/man/man3/MPI_File_write_at_all_end.3in index cdfec299b67..e48d1fe2fed 100644 --- a/ompi/mpi/man/man3/MPI_File_write_at_all_end.3in +++ b/ompi/mpi/man/man3/MPI_File_write_at_all_end.3in @@ -3,6 +3,8 @@ .\" Copyright 2010 Cisco Systems, Inc. All rights reserved. .\" Copyright 2006-2008 Sun Microsystems, Inc. .\" Copyright (c) 1996 Thinking Machines Corporation +.\" Copyright 2015 Research Organization for Information Science +.\" and Technology (RIST). All rights reserved. .\" $COPYRIGHT$ .TH MPI_File_write_at_all_end 3 "#OMPI_DATE#" "#PACKAGE_VERSION#" "#PACKAGE_NAME#" .SH NAME @@ -11,18 +13,19 @@ .SH SYNTAX .ft R .nf -C Syntax - #include - int MPI_File_write_at_all_end(MPI_File \fIfh\fP, const void \fI*buf\fP, - MPI_Status \fI*status\fP) +.SH C Syntax +#include +int MPI_File_write_at_all_end(MPI_File \fIfh\fP, const void \fI*buf\fP, + MPI_Status \fI*status\fP) .fi .SH Fortran Syntax .nf - INCLUDE 'mpif.h' - MPI_FILE_WRITE_AT_ALL_END(\fIFH\fP, \fIBUF\fP, \fISTATUS\fP,\fI IERROR\fP) - BUF(*) - INTEGER FH, STATUS(MPI_STATUS_SIZE), IERROR +USE MPI +! or the older form: INCLUDE 'mpif.h' +MPI_FILE_WRITE_AT_ALL_END(\fIFH\fP, \fIBUF\fP, \fISTATUS\fP, \fIIERROR\fP) + \fIBUF(*)\fP + INTEGER \fIFH, STATUS(MPI_STATUS_SIZE), IERROR\fP .fi .SH C++ Syntax @@ -49,30 +52,30 @@ Initial address of buffer (choice). .ft R .TP 1i status -Status object (status). +Status object (status). .TP 1i IERROR -Fortran only: Error status (integer). +Fortran only: Error status (integer). .SH DESCRIPTION .ft R MPI_File_write_at_all_end is the ending part of a split collective routine that stores the -number of elements actually written into the file associated with +number of elements actually written into the file associated with .I fh in -.I status. +.I status. The data is written into those parts of the -file specified by the current view. All other fields of -.I status +file specified by the current view. All other fields of +.I status are undefined. .SH NOTES .ft R -All the nonblocking collective routines for data access are "split" into two routines, each with _begin or _end as a suffix. These split collective routines are subject to the semantic rules described in Section 9.4.5 of the MPI-2 standard. +All the nonblocking collective routines for data access are "split" into two routines, each with _begin or _end as a suffix. These split collective routines are subject to the semantic rules described in Section 9.4.5 of the MPI-2 standard. .SH ERRORS Almost all MPI routines return an error value; C routines as the value of the function and Fortran routines in the last argument. C++ functions do not return errors. If the default error handler is set to MPI::ERRORS_THROW_EXCEPTIONS, then on error the C++ exception mechanism will be used to throw an MPI::Exception object. .sp Before the error value is returned, the current MPI error handler is -called. For MPI I/O function errors, the default error handler is set to MPI_ERRORS_RETURN. The error handler may be changed with MPI_File_set_errhandler; the predefined error handler MPI_ERRORS_ARE_FATAL may be used to make I/O errors fatal. Note that MPI does not guarantee that an MPI program can continue past an error. +called. For MPI I/O function errors, the default error handler is set to MPI_ERRORS_RETURN. The error handler may be changed with MPI_File_set_errhandler; the predefined error handler MPI_ERRORS_ARE_FATAL may be used to make I/O errors fatal. Note that MPI does not guarantee that an MPI program can continue past an error. diff --git a/ompi/mpi/man/man3/MPI_File_write_ordered.3in b/ompi/mpi/man/man3/MPI_File_write_ordered.3in index cb23716a037..9df1ccb6596 100644 --- a/ompi/mpi/man/man3/MPI_File_write_ordered.3in +++ b/ompi/mpi/man/man3/MPI_File_write_ordered.3in @@ -3,6 +3,8 @@ .\" Copyright 2010 Cisco Systems, Inc. All rights reserved. .\" Copyright 2006-2008 Sun Microsystems, Inc. .\" Copyright (c) 1996 Thinking Machines Corporation +.\" Copyright 2015 Research Organization for Information Science +.\" and Technology (RIST). All rights reserved. .\" $COPYRIGHT$ .TH MPI_File_write_ordered 3 "#OMPI_DATE#" "#PACKAGE_VERSION#" "#PACKAGE_NAME#" .SH NAME @@ -10,21 +12,22 @@ .SH SYNTAX .ft R -C Syntax +.SH C Syntax .nf - #include - int MPI_File_write_ordered(MPI_File \fIfh\fP, const void \fI*buf\fP, - int \fIcount\fP, MPI_Datatype \fIdatatype\fP, - MPI_Status \fI*status\fP) +#include +int MPI_File_write_ordered(MPI_File \fIfh\fP, const void \fI*buf\fP, + int \fIcount\fP, MPI_Datatype \fIdatatype\fP, + MPI_Status \fI*status\fP) .fi .SH Fortran Syntax .nf - INCLUDE 'mpif.h' - MPI_FILE_WRITE_ORDERED(\fIFH\fP,\fI BUF\fP,\fI COUNT\fP,\fI DATATYPE\fP, - \fISTATUS\fP,\fI IERROR\fP) - \fIBUF\fP(*) - INTEGER \fIFH, COUNT, DATATYPE, STATUS(MPI_STATUS_SIZE), IERROR\fP +USE MPI +! or the older form: INCLUDE 'mpif.h' +MPI_FILE_WRITE_ORDERED(\fIFH\fP, \fIBUF\fP, \fICOUNT\fP, \fIDATATYPE\fP, + \fISTATUS\fP, \fIIERROR\fP) + \fIBUF\fP(*) + INTEGER \fIFH, COUNT, DATATYPE, STATUS(MPI_STATUS_SIZE), IERROR\fP .fi .SH C++ Syntax @@ -40,7 +43,7 @@ void MPI::File::Write_ordered(const void* \fIbuf\fP, int \fIcount\fP, .SH INPUT PARAMETERS .ft R .TP 1i -fh +fh File handle (handle). .TP 1i buf @@ -59,33 +62,33 @@ status Status object (Status). .TP 1i IERROR -Fortran only: Error status (integer). +Fortran only: Error status (integer). .SH DESCRIPTION .ft R MPI_File_write_ordered is a collective routine. This routine must be called by all processes in the communicator group associated with -the file handle +the file handle .I fh. Each process may pass different argument values -for the -.I datatype -and -.I count +for the +.I datatype +and +.I count arguments. Each process attempts to -write, into the file associated with +write, into the file associated with .I fh, -a total number of -.I count -data items having datatype type contained in the user's buffer +a total number of +.I count +data items having datatype type contained in the user's buffer .I buf. For each process, the location in the file at which data is written is the position at which the shared file pointer would be after all processes whose ranks within the group are less than that of this process had written their data. MPI_File_write_ordered returns the number of -.I datatype -elements written in +.I datatype +elements written in .I status. The shared file pointer is updated by the amounts of data requested by all processes of the @@ -95,5 +98,5 @@ group. Almost all MPI routines return an error value; C routines as the value of the function and Fortran routines in the last argument. C++ functions do not return errors. If the default error handler is set to MPI::ERRORS_THROW_EXCEPTIONS, then on error the C++ exception mechanism will be used to throw an MPI::Exception object. .sp Before the error value is returned, the current MPI error handler is -called. For MPI I/O function errors, the default error handler is set to MPI_ERRORS_RETURN. The error handler may be changed with MPI_File_set_errhandler; the predefined error handler MPI_ERRORS_ARE_FATAL may be used to make I/O errors fatal. Note that MPI does not guarantee that an MPI program can continue past an error. +called. For MPI I/O function errors, the default error handler is set to MPI_ERRORS_RETURN. The error handler may be changed with MPI_File_set_errhandler; the predefined error handler MPI_ERRORS_ARE_FATAL may be used to make I/O errors fatal. Note that MPI does not guarantee that an MPI program can continue past an error. diff --git a/ompi/mpi/man/man3/MPI_File_write_ordered_begin.3in b/ompi/mpi/man/man3/MPI_File_write_ordered_begin.3in index 05f9974ee11..abba6a3b73e 100644 --- a/ompi/mpi/man/man3/MPI_File_write_ordered_begin.3in +++ b/ompi/mpi/man/man3/MPI_File_write_ordered_begin.3in @@ -3,6 +3,8 @@ .\" Copyright 2010 Cisco Systems, Inc. All rights reserved. .\" Copyright 2006-2008 Sun Microsystems, Inc. .\" Copyright (c) 1996 Thinking Machines Corporation +.\" Copyright 2015 Research Organization for Information Science +.\" and Technology (RIST). All rights reserved. .\" $COPYRIGHT$ .TH MPI_File_write_ordered_begin 3 "#OMPI_DATE#" "#PACKAGE_VERSION#" "#PACKAGE_NAME#" .SH NAME @@ -11,18 +13,19 @@ .SH SYNTAX .ft R .nf -C Syntax - #include - int MPI_File_write_ordered_begin(MPI_File \fIfh\fP, const void \fI*buf\fP, - int \fIcount\fP, MPI_Datatype \fIdatatype\fP) +.SH C Syntax +#include +int MPI_File_write_ordered_begin(MPI_File \fIfh\fP, const void \fI*buf\fP, + int \fIcount\fP, MPI_Datatype \fIdatatype\fP) .fi .SH Fortran Syntax .nf - INCLUDE 'mpif.h' - MPI_FILE_WRITE_ORDERED_BEGIN(\fIFH\fP, \fIBUF\fP, \fICOUNT\fP, \fIDATATYPE\fP,\fI IERROR\fP) - BUF(*) - INTEGER FH, COUNT, DATATYPE, IERROR +USE MPI +! or the older form: INCLUDE 'mpif.h' +MPI_FILE_WRITE_ORDERED_BEGIN(\fIFH\fP, \fIBUF\fP, \fICOUNT\fP, \fIDATATYPE\fP, \fIIERROR\fP) + \fIBUF(*)\fP + INTEGER \fIFH, COUNT, DATATYPE, IERROR\fP .fi .SH C++ Syntax @@ -35,7 +38,7 @@ void MPI::File::Write_ordered_begin(const void* \fIbuf\fP, int \fIcount\fP, .SH INPUT/OUTPUT PARAMETER .ft R .TP 1i -fh +fh File handle (handle). .SH INPUT PARAMETERS @@ -56,40 +59,40 @@ Data type of each buffer element (handle). .ft R .TP 1i IERROR -Fortran only: Error status (integer). +Fortran only: Error status (integer). .SH DESCRIPTION .ft R MPI_File_write_ordered_begin is the beginning part of a split collective, nonblocking routine that must be called by all processes in the communicator group associated with -the file handle +the file handle .I fh. Each process may pass different argument values -for the -.I datatype -and -.I count +for the +.I datatype +and +.I count arguments. After all processes of the group have issued their respective calls, each process attempts to -write, into the file associated with +write, into the file associated with .I fh, -a total number of -.I count -data items having datatype type contained in the user's buffer +a total number of +.I count +data items having datatype type contained in the user's buffer .I buf. For each process, the location in the file at which data is written is the position at which the shared file pointer would be after all processes whose ranks within the group are less than that of this process had -written their data. +written their data. .SH NOTES .ft R -All the nonblocking collective routines for data access are "split" into two routines, each with _begin or _end as a suffix. These split collective routines are subject to the semantic rules described in Section 9.4.5 of the MPI-2 standard. +All the nonblocking collective routines for data access are "split" into two routines, each with _begin or _end as a suffix. These split collective routines are subject to the semantic rules described in Section 9.4.5 of the MPI-2 standard. .SH ERRORS Almost all MPI routines return an error value; C routines as the value of the function and Fortran routines in the last argument. C++ functions do not return errors. If the default error handler is set to MPI::ERRORS_THROW_EXCEPTIONS, then on error the C++ exception mechanism will be used to throw an MPI::Exception object. .sp Before the error value is returned, the current MPI error handler is -called. For MPI I/O function errors, the default error handler is set to MPI_ERRORS_RETURN. The error handler may be changed with MPI_File_set_errhandler; the predefined error handler MPI_ERRORS_ARE_FATAL may be used to make I/O errors fatal. Note that MPI does not guarantee that an MPI program can continue past an error. +called. For MPI I/O function errors, the default error handler is set to MPI_ERRORS_RETURN. The error handler may be changed with MPI_File_set_errhandler; the predefined error handler MPI_ERRORS_ARE_FATAL may be used to make I/O errors fatal. Note that MPI does not guarantee that an MPI program can continue past an error. diff --git a/ompi/mpi/man/man3/MPI_File_write_ordered_end.3in b/ompi/mpi/man/man3/MPI_File_write_ordered_end.3in index ed6440c21fd..64a2c9d138b 100644 --- a/ompi/mpi/man/man3/MPI_File_write_ordered_end.3in +++ b/ompi/mpi/man/man3/MPI_File_write_ordered_end.3in @@ -3,6 +3,8 @@ .\" Copyright 2010 Cisco Systems, Inc. All rights reserved. .\" Copyright 2006-2008 Sun Microsystems, Inc. .\" Copyright (c) 1996 Thinking Machines Corporation +.\" Copyright 2015 Research Organization for Information Science +.\" and Technology (RIST). All rights reserved. .\" $COPYRIGHT$ .TH MPI_File_write_ordered_end 3 "#OMPI_DATE#" "#PACKAGE_VERSION#" "#PACKAGE_NAME#" .SH NAME @@ -11,18 +13,19 @@ .SH SYNTAX .ft R .nf -C Syntax - #include - int MPI_File_write_ordered_end(MPI_File \fIfh\fP, const void \fI*buf\fP, - MPI_Status \fI*status\fP) +.SH C Syntax +#include +int MPI_File_write_ordered_end(MPI_File \fIfh\fP, const void \fI*buf\fP, + MPI_Status \fI*status\fP) .fi .SH Fortran Syntax .nf - INCLUDE 'mpif.h' - MPI_FILE_WRITE_ORDERED_END(\fIFH\fP, \fIBUF\fP, \fISTATUS\fP,\fI IERROR\fP) - BUF(*) - INTEGER FH, STATUS(MPI_STATUS_SIZE), IERROR +USE MPI +! or the older form: INCLUDE 'mpif.h' +MPI_FILE_WRITE_ORDERED_END(\fIFH\fP, \fIBUF\fP, \fISTATUS\fP, \fIIERROR\fP) + \fIBUF(*)\fP + INTEGER \fIFH, STATUS(MPI_STATUS_SIZE), IERROR\fP .fi .SH C++ Syntax @@ -36,7 +39,7 @@ void MPI::File::Write_ordered_end(const void* \fIbuf\fP) .SH INPUT/OUTPUT PARAMETER .ft R .TP 1i -fh +fh File handle (handle). .SH INPUT PARAMETER @@ -49,29 +52,29 @@ Initial address of buffer (choice). .ft R .TP 1i status -Status object (status). +Status object (status). .TP 1i IERROR -Fortran only: Error status (integer). +Fortran only: Error status (integer). .SH DESCRIPTION .ft R MPI_File_write_ordered_end is the ending part of a split collective routine that must be called by all processes in the communicator group associated with -the file handle +the file handle .I fh. -MPI_File_write_ordered_end returns the number of elements written into the file associated with +MPI_File_write_ordered_end returns the number of elements written into the file associated with .I fh in .I status. .SH NOTES .ft R -All the nonblocking collective routines for data access are "split" into two routines, each with _begin or _end as a suffix. These split collective routines are subject to the semantic rules described in Section 9.4.5 of the MPI-2 standard. +All the nonblocking collective routines for data access are "split" into two routines, each with _begin or _end as a suffix. These split collective routines are subject to the semantic rules described in Section 9.4.5 of the MPI-2 standard. .SH ERRORS Almost all MPI routines return an error value; C routines as the value of the function and Fortran routines in the last argument. C++ functions do not return errors. If the default error handler is set to MPI::ERRORS_THROW_EXCEPTIONS, then on error the C++ exception mechanism will be used to throw an MPI::Exception object. .sp Before the error value is returned, the current MPI error handler is -called. For MPI I/O function errors, the default error handler is set to MPI_ERRORS_RETURN. The error handler may be changed with MPI_File_set_errhandler; the predefined error handler MPI_ERRORS_ARE_FATAL may be used to make I/O errors fatal. Note that MPI does not guarantee that an MPI program can continue past an error. +called. For MPI I/O function errors, the default error handler is set to MPI_ERRORS_RETURN. The error handler may be changed with MPI_File_set_errhandler; the predefined error handler MPI_ERRORS_ARE_FATAL may be used to make I/O errors fatal. Note that MPI does not guarantee that an MPI program can continue past an error. diff --git a/ompi/mpi/man/man3/MPI_File_write_shared.3in b/ompi/mpi/man/man3/MPI_File_write_shared.3in index 7cb3f40cd92..039fee7efa5 100644 --- a/ompi/mpi/man/man3/MPI_File_write_shared.3in +++ b/ompi/mpi/man/man3/MPI_File_write_shared.3in @@ -3,6 +3,8 @@ .\" Copyright 2010 Cisco Systems, Inc. All rights reserved. .\" Copyright 2006-2008 Sun Microsystems, Inc. .\" Copyright (c) 1996 Thinking Machines Corporation +.\" Copyright 2015 Research Organization for Information Science +.\" and Technology (RIST). All rights reserved. .\" $COPYRIGHT$ .TH MPI_File_write_shared 3 "#OMPI_DATE#" "#PACKAGE_VERSION#" "#PACKAGE_NAME#" .SH NAME @@ -11,19 +13,19 @@ .SH SYNTAX .ft R .nf -C Syntax - #include - int MPI_File_write_shared(MPI_File \fIfh\fP, const void \fI*buf\fP, int \fIcount\fP, - MPI_Datatype \fIdatatype\fP, MPI_Status \fI*status\fP) +.SH C Syntax +#include +int MPI_File_write_shared(MPI_File \fIfh\fP, const void \fI*buf\fP, int \fIcount\fP, + MPI_Datatype \fIdatatype\fP, MPI_Status \fI*status\fP) .fi .SH Fortran Syntax .nf - INCLUDE 'mpif.h' - MPI_FILE_WRITE_SHARED(\fIFH\fP, \fIBUF\fP, \fICOUNT\fP, \fIDATATYPE\fP, \fISTATUS\fP,\fI IERROR\fP) - \fIBUF(*)\fP - INTEGER \fIFH, COUNT, DATATYPE, STATUS(MPI_STATUS_SIZE), - IERROR\fP +USE MPI +! or the older form: INCLUDE 'mpif.h' +MPI_FILE_WRITE_SHARED(\fIFH\fP, \fIBUF\fP, \fICOUNT\fP, \fIDATATYPE\fP, \fISTATUS\fP, \fIIERROR\fP) + \fIBUF(*)\fP + INTEGER \fIFH, COUNT, DATATYPE, STATUS(MPI_STATUS_SIZE), IERROR\fP .fi .SH C++ Syntax @@ -39,7 +41,7 @@ void MPI::File::Write_shared(const void* \fIbuf\fP, int \fIcount\fP, .SH INPUT/OUTPUT PARAMETER .ft R .TP 1i -fh +fh File handle (handle). .SH INPUT PARAMETERS @@ -63,7 +65,7 @@ status Status object (status). .TP 1i IERROR -Fortran only: Error status (integer). +Fortran only: Error status (integer). .SH DESCRIPTION .ft R @@ -73,5 +75,5 @@ MPI_File_write_shared is a blocking routine that uses the shared file pointer to Almost all MPI routines return an error value; C routines as the value of the function and Fortran routines in the last argument. C++ functions do not return errors. If the default error handler is set to MPI::ERRORS_THROW_EXCEPTIONS, then on error the C++ exception mechanism will be used to throw an MPI::Exception object. .sp Before the error value is returned, the current MPI error handler is -called. For MPI I/O function errors, the default error handler is set to MPI_ERRORS_RETURN. The error handler may be changed with MPI_File_set_errhandler; the predefined error handler MPI_ERRORS_ARE_FATAL may be used to make I/O errors fatal. Note that MPI does not guarantee that an MPI program can continue past an error. +called. For MPI I/O function errors, the default error handler is set to MPI_ERRORS_RETURN. The error handler may be changed with MPI_File_set_errhandler; the predefined error handler MPI_ERRORS_ARE_FATAL may be used to make I/O errors fatal. Note that MPI does not guarantee that an MPI program can continue past an error. diff --git a/ompi/mpi/man/man3/MPI_Finalize.3in b/ompi/mpi/man/man3/MPI_Finalize.3in index 88f2d266f9b..9c6d7cc27cb 100644 --- a/ompi/mpi/man/man3/MPI_Finalize.3in +++ b/ompi/mpi/man/man3/MPI_Finalize.3in @@ -1,5 +1,5 @@ .\" -*- nroff -*- -.\" Copyright 2010 Cisco Systems, Inc. All rights reserved. +.\" Copyright (c) 2010-2015 Cisco Systems, Inc. All rights reserved. .\" Copyright 2006-2008 Sun Microsystems, Inc. .\" Copyright (c) 1996 Thinking Machines Corporation .\" $COPYRIGHT$ @@ -19,7 +19,7 @@ int MPI_Finalize() .nf INCLUDE 'mpif.h' MPI_FINALIZE(\fIIERROR\fP) - INTEGER \fIIERROR\fP + INTEGER \fIIERROR\fP .fi .SH C++ Syntax @@ -32,26 +32,33 @@ void Finalize() .ft R .TP 1i IERROR -Fortran only: Error status (integer). +Fortran only: Error status (integer). .SH DESCRIPTION .ft R -This routine cleans up all MPI states. Once this routine is called, no MPI routine (not even MPI_Init) may be called, except for MPI_Get_version, MPI_Initialized, and MPI_Finalized. Unless there has been a call to MPI_Abort, you must ensure that all pending communications involving a process are complete before the process calls MPI_Finalize. If the call returns, each process may either continue local computations or exit without participating in further communication with other processes. At the moment when the last process calls MPI_Finalize, all pending sends must be matched by a receive, and all pending receives must be matched by a send. +This routine cleans up all MPI states. Once this routine is called, no MPI routine (not even MPI_Init) may be called, except for MPI_Get_version, MPI_Initialized, and MPI_Finalized. Unless there has been a call to MPI_Abort, you must ensure that all pending communications involving a process are complete before the process calls MPI_Finalize. If the call returns, each process may either continue local computations or exit without participating in further communication with other processes. At the moment when the last process calls MPI_Finalize, all pending sends must be matched by a receive, and all pending receives must be matched by a send. -MPI_Finalize is collective over all connected processes. If no processes were spawned, accepted, or connected, then this means it is collective over MPI_COMM_WORLD. Otherwise, it is collective over the union of all processes that have been and continue to be connected. +MPI_Finalize is collective over all connected processes. If no processes were spawned, accepted, or connected, then this means it is collective over MPI_COMM_WORLD. Otherwise, it is collective over the union of all processes that have been and continue to be connected. .SH NOTES .ft R All processes must call this routine before exiting. All processes will still exist but may not make any further MPI calls. MPI_Finalize guarantees that all local actions required by communications the user has completed will, in fact, occur before it returns. However, MPI_Finalize guarantees nothing about pending communications that have \fInot\fP been completed; completion is ensured only by MPI_Wait, MPI_Test, or MPI_Request_free combined with some other verification of completion. .sp -For example, a successful return from a blocking communication operation or from MPI_Wait or MPI_Test means that the communication is completed by the user and the buffer can be reused, but does not guarantee that the local process has no more work to do. Similarly, a successful return from MPI_Request_free with a request handle generated by an MPI_Isend nullifies the handle but does not guarantee that the operation has completed. The MPI_Isend is complete only when a matching receive has completed. +For example, a successful return from a blocking communication operation or from MPI_Wait or MPI_Test means that the communication is completed by the user and the buffer can be reused, but does not guarantee that the local process has no more work to do. Similarly, a successful return from MPI_Request_free with a request handle generated by an MPI_Isend nullifies the handle but does not guarantee that the operation has completed. The MPI_Isend is complete only when a matching receive has completed. .sp -If you would like to cause actions to happen when a process finishes, attach an attribute to MPI_COMM_SELF with a callback function. Then, when MPI_Finalize is called, it will first execute the equivalent of an MPI_Comm_free on MPI_COMM_SELF. This will cause the delete callback function to be executed on all keys associated with MPI_COMM_SELF in an arbitrary order. If no key has been attached to MPI_COMM_SELF, then no callback is invoked. This freeing of MPI_COMM_SELF happens before any other parts of MPI are affected. Calling MPI_Finalized will thus return "false" in any of these callback functions. Once you have done this with MPI_COMM_SELF, the results of MPI_Finalize are not specified. +If you would like to cause actions to happen when a process finishes, attach an attribute to MPI_COMM_SELF with a callback function. Then, when MPI_Finalize is called, it will first execute the equivalent of an MPI_Comm_free on MPI_COMM_SELF. This will cause the delete callback function to be executed on all keys associated with MPI_COMM_SELF in an arbitrary order. If no key has been attached to MPI_COMM_SELF, then no callback is invoked. This freeing of MPI_COMM_SELF happens before any other parts of MPI are affected. Calling MPI_Finalized will thus return "false" in any of these callback functions. Once you have done this with MPI_COMM_SELF, the results of MPI_Finalize are not specified. .SH ERRORS Almost all MPI routines return an error value; C routines as the value of the function and Fortran routines in the last argument. C++ functions do not return errors. If the default error handler is set to MPI::ERRORS_THROW_EXCEPTIONS, then on error the C++ exception mechanism will be used to throw an MPI::Exception object. .sp Before the error value is returned, the current MPI error handler is -called. By default, this error handler aborts the MPI job, except for I/O function errors. The error handler may be changed with MPI_Comm_set_errhandler; the predefined error handler MPI_ERRORS_RETURN may be used to cause error values to be returned. Note that MPI does not guarantee that an MPI program can continue past an error. +called. By default, this error handler aborts the MPI job, except for I/O function errors. The error handler may be changed with MPI_Comm_set_errhandler; the predefined error handler MPI_ERRORS_RETURN may be used to cause error values to be returned. Note that MPI does not guarantee that an MPI program can continue past an error. +.SH SEE ALSO +.ft R +.nf +MPI_Init +MPI_Init_thread +MPI_Initialized +MPI_Finalized diff --git a/ompi/mpi/man/man3/MPI_Finalized.3in b/ompi/mpi/man/man3/MPI_Finalized.3in index 9fb4f04b5d2..db1b893a349 100644 --- a/ompi/mpi/man/man3/MPI_Finalized.3in +++ b/ompi/mpi/man/man3/MPI_Finalized.3in @@ -1,11 +1,11 @@ .\" -*- nroff -*- -.\" Copyright 2010 Cisco Systems, Inc. All rights reserved. +.\" Copyright (c) 2010-2015 Cisco Systems, Inc. All rights reserved. .\" Copyright 2006-2008 Sun Microsystems, Inc. .\" Copyright (c) 1996 Thinking Machines Corporation .\" $COPYRIGHT$ .TH MPI_Finalized 3 "#OMPI_DATE#" "#PACKAGE_VERSION#" "#PACKAGE_NAME#" .SH NAME -\fBMPI_Finalized \fP \- Checks whether MPI_Finalize has completed. +\fBMPI_Finalized \fP \- Checks whether MPI has been finalized .SH SYNTAX .ft R @@ -20,7 +20,7 @@ int MPI_Finalized(int \fI*flag\fP) INCLUDE 'mpif.h' MPI_FINALIZED(\fIFLAG\fP, \fIIERROR\fP) LOGICAL \fIFLAG\fP - INTEGER \fIIERROR\fP + INTEGER \fIIERROR\fP .fi .SH C++ Syntax @@ -33,19 +33,29 @@ bool MPI::Is_finalized() .ft R .TP 1i flag -True if MPI was finalized (logical). +True if MPI was finalized, and false otherwise (logical). .ft R .TP 1i IERROR -Fortran only: Error status (integer). +Fortran only: Error status (integer). .SH DESCRIPTION .ft R -This routine returns "true" if MPI_Finalize has completed. You can call MPI_Finalized before MPI_Init and after MPI_Finalize. +This routine may be used to determine whether MPI has been finalized. +It is one of a small number of routines that may be called before MPI +is initialized and after MPI has been finalized (MPI_Initialized is +another). .SH ERRORS Almost all MPI routines return an error value; C routines as the value of the function and Fortran routines in the last argument. C++ functions do not return errors. If the default error handler is set to MPI::ERRORS_THROW_EXCEPTIONS, then on error the C++ exception mechanism will be used to throw an MPI::Exception object. .sp Before the error value is returned, the current MPI error handler is -called. By default, this error handler aborts the MPI job, except for I/O function errors. The error handler may be changed with MPI_Comm_set_errhandler; the predefined error handler MPI_ERRORS_RETURN may be used to cause error values to be returned. Note that MPI does not guarantee that an MPI program can continue past an error. +called. By default, this error handler aborts the MPI job, except for I/O function errors. The error handler may be changed with MPI_Comm_set_errhandler; the predefined error handler MPI_ERRORS_RETURN may be used to cause error values to be returned. Note that MPI does not guarantee that an MPI program can continue past an error. +.SH SEE ALSO +.ft R +.nf +MPI_Init +MPI_Init_thread +MPI_Initialized +MPI_Finalize diff --git a/ompi/mpi/man/man3/MPI_Free_mem.3in b/ompi/mpi/man/man3/MPI_Free_mem.3in index 52872142dc0..56cbdcf3482 100644 --- a/ompi/mpi/man/man3/MPI_Free_mem.3in +++ b/ompi/mpi/man/man3/MPI_Free_mem.3in @@ -18,9 +18,9 @@ int MPI_Free_mem(void *\fIbase\fP) .SH Fortran Syntax .nf INCLUDE 'mpif.h' -MPI_FREE_MEM(\fIBASE, IERROR\fP) - \fIBASE\fP(*) - INTEGER \fIIERROR\fP +MPI_FREE_MEM(\fIBASE, IERROR\fP) + \fIBASE\fP(*) + INTEGER \fIIERROR\fP .fi .SH C++ Syntax @@ -33,23 +33,23 @@ void MPI::Free_mem(void *\fIbase\fP) .ft R .TP 1i base -Initial address of memory segment allocated by MPI_Alloc_mem (choice). +Initial address of memory segment allocated by MPI_Alloc_mem (choice). .SH OUTPUT PARAMETER .ft R .TP 1i IERROR -Fortran only: Error status (integer). +Fortran only: Error status (integer). .SH DESCRIPTION .ft R -MPI_Free_mem frees memory that has been allocated by MPI_Alloc_mem. +MPI_Free_mem frees memory that has been allocated by MPI_Alloc_mem. .SH ERRORS Almost all MPI routines return an error value; C routines as the value of the function and Fortran routines in the last argument. C++ functions do not return errors. If the default error handler is set to MPI::ERRORS_THROW_EXCEPTIONS, then on error the C++ exception mechanism will be used to throw an MPI::Exception object. .sp Before the error value is returned, the current MPI error handler is -called. By default, this error handler aborts the MPI job, except for I/O function errors. The error handler may be changed with MPI_Comm_set_errhandler; the predefined error handler MPI_ERRORS_RETURN may be used to cause error values to be returned. Note that MPI does not guarantee that an MPI program can continue past an error. +called. By default, this error handler aborts the MPI job, except for I/O function errors. The error handler may be changed with MPI_Comm_set_errhandler; the predefined error handler MPI_ERRORS_RETURN may be used to cause error values to be returned. Note that MPI does not guarantee that an MPI program can continue past an error. .SH SEE ALSO .ft R diff --git a/ompi/mpi/man/man3/MPI_Gather.3in b/ompi/mpi/man/man3/MPI_Gather.3in index 85c56beb639..e8ae67bb7a9 100644 --- a/ompi/mpi/man/man3/MPI_Gather.3in +++ b/ompi/mpi/man/man3/MPI_Gather.3in @@ -29,7 +29,7 @@ MPI_GATHER(\fISENDBUF, SENDCOUNT, SENDTYPE, RECVBUF, RECVCOUNT, RECVTYPE, ROOT, COMM, IERROR\fP) \fISENDBUF(*), RECVBUF(*)\fP INTEGER \fISENDCOUNT, SENDTYPE, RECVCOUNT, RECVTYPE, ROOT\fP - INTEGER \fICOMM, IERROR\fP + INTEGER \fICOMM, IERROR\fP MPI_IGATHER(\fISENDBUF, SENDCOUNT, SENDTYPE, RECVBUF, RECVCOUNT, RECVTYPE, ROOT, COMM, REQUEST, IERROR\fP) @@ -42,7 +42,7 @@ MPI_IGATHER(\fISENDBUF, SENDCOUNT, SENDTYPE, RECVBUF, RECVCOUNT, .nf #include void MPI::Comm::Gather(const void* \fIsendbuf\fP, int \fIsendcount\fP, - const MPI::Datatype& \fIsendtype\fP, void* \fIrecvbuf\fP, + const MPI::Datatype& \fIsendtype\fP, void* \fIrecvbuf\fP, int \fIrecvcount\fP, const MPI::Datatype& \fIrecvtype\fP, int \fIroot\fP, const = 0 @@ -82,48 +82,48 @@ Request (handle, non-blocking only). .ft R .TP 1i IERROR -Fortran only: Error status (integer). +Fortran only: Error status (integer). .SH DESCRIPTION .ft R -Each process (root process included) sends the contents of its send buffer to the root process. The root process receives the messages and stores them in rank order. The outcome is as if each of the n processes in the group (including the root process) had executed a call to +Each process (root process included) sends the contents of its send buffer to the root process. The root process receives the messages and stores them in rank order. The outcome is as if each of the n processes in the group (including the root process) had executed a call to .sp .nf MPI_Send(sendbuf, sendcount, sendtype, root, \&...) .fi .sp -and the root had executed n calls to +and the root had executed n calls to .sp .nf - MPI_Recv(recfbuf + i * recvcount * extent(recvtype), \ + MPI_Recv(recfbuf + i * recvcount * extent(recvtype), \ recvcount, recvtype, i, \&...) .fi .sp -where extent(recvtype) is the type extent obtained from a call to MPI_Type_extent(). +where extent(recvtype) is the type extent obtained from a call to MPI_Type_extent(). .sp -An alternative description is that the n messages sent by the processes in the group are concatenated in rank order, and the resulting message is received by the root as if by a call to MPI_RECV(recvbuf, recvcount * n, recvtype, . . . ). +An alternative description is that the n messages sent by the processes in the group are concatenated in rank order, and the resulting message is received by the root as if by a call to MPI_RECV(recvbuf, recvcount * n, recvtype, . . . ). .sp -The receive buffer is ignored for all nonroot processes. +The receive buffer is ignored for all nonroot processes. .sp General, derived datatypes are allowed for both sendtype and recvtype. The type signature of sendcount, sendtype on process i must be equal to the type signature of recvcount, recvtype at the root. This implies that the amount of data sent must be equal to the amount of data received, pairwise between each process and the root. Distinct type maps between sender and receiver are still allowed. .sp -All arguments to the function are significant on process root, while on other processes, only arguments sendbuf, sendcount, sendtype, root, comm are significant. The arguments root and comm must have identical values on all processes. +All arguments to the function are significant on process root, while on other processes, only arguments sendbuf, sendcount, sendtype, root, comm are significant. The arguments root and comm must have identical values on all processes. .sp -The specification of counts and types should not cause any location on the root to be written more than once. Such a call is erroneous. +The specification of counts and types should not cause any location on the root to be written more than once. Such a call is erroneous. .sp Note that the recvcount argument at the root indicates the number of items it receives from each process, not the total number of items it receives. .sp \fBExample 1:\fP Gather 100 ints from every process in group to root. .sp .nf - MPI_Comm comm; - int gsize,sendarray[100]; - int root, *rbuf; - \&... - MPI_Comm_size( comm, &gsize); - rbuf = (int *)malloc(gsize*100*sizeof(int)); - MPI_Gather( sendarray, 100, MPI_INT, rbuf, 100, MPI_INT, root, comm); + MPI_Comm comm; + int gsize,sendarray[100]; + int root, *rbuf; + \&... + MPI_Comm_size( comm, &gsize); + rbuf = (int *)malloc(gsize*100*sizeof(int)); + MPI_Gather( sendarray, 100, MPI_INT, rbuf, 100, MPI_INT, root, comm); .fi .sp @@ -131,53 +131,53 @@ Note that the recvcount argument at the root indicates the number of items it re \fBExample 2:\fP Previous example modified -- only the root allocates memory for the receive buffer. .sp .nf - MPI_Comm comm; - int gsize,sendarray[100]; - int root, myrank, *rbuf; - \&... - MPI_Comm_rank( comm, myrank); - if ( myrank == root) { - MPI_Comm_size( comm, &gsize); - rbuf = (int *)malloc(gsize*100*sizeof(int)); - } - MPI_Gather( sendarray, 100, MPI_INT, rbuf, 100, MPI_INT, root, comm); + MPI_Comm comm; + int gsize,sendarray[100]; + int root, myrank, *rbuf; + \&... + MPI_Comm_rank( comm, myrank); + if ( myrank == root) { + MPI_Comm_size( comm, &gsize); + rbuf = (int *)malloc(gsize*100*sizeof(int)); + } + MPI_Gather( sendarray, 100, MPI_INT, rbuf, 100, MPI_INT, root, comm); .fi .sp \fBExample 3:\fP Do the same as the previous example, but use a derived -datatype. Note that the type cannot be the entire set of gsize * 100 ints since type matching is defined pairwise between the root and each process in the gather. +datatype. Note that the type cannot be the entire set of gsize * 100 ints since type matching is defined pairwise between the root and each process in the gather. .nf - MPI_Comm comm; - int gsize,sendarray[100]; - int root, *rbuf; - MPI_Datatype rtype; - \&... - MPI_Comm_size( comm, &gsize); - MPI_Type_contiguous( 100, MPI_INT, &rtype ); - MPI_Type_commit( &rtype ); - rbuf = (int *)malloc(gsize*100*sizeof(int)); - MPI_Gather( sendarray, 100, MPI_INT, rbuf, 1, rtype, root, comm); + MPI_Comm comm; + int gsize,sendarray[100]; + int root, *rbuf; + MPI_Datatype rtype; + \&... + MPI_Comm_size( comm, &gsize); + MPI_Type_contiguous( 100, MPI_INT, &rtype ); + MPI_Type_commit( &rtype ); + rbuf = (int *)malloc(gsize*100*sizeof(int)); + MPI_Gather( sendarray, 100, MPI_INT, rbuf, 1, rtype, root, comm); .fi .SH USE OF IN-PLACE OPTION -When the communicator is an intracommunicator, you can perform a gather operation in-place (the output buffer is used as the input buffer). Use the variable MPI_IN_PLACE as the value of the root process \fIsendbuf\fR. In this case, \fIsendcount\fR and \fIsendtype\fR are ignored, and the contribution of the root process to the gathered vector is assumed to already be in the correct place in the receive buffer. +When the communicator is an intracommunicator, you can perform a gather operation in-place (the output buffer is used as the input buffer). Use the variable MPI_IN_PLACE as the value of the root process \fIsendbuf\fR. In this case, \fIsendcount\fR and \fIsendtype\fR are ignored, and the contribution of the root process to the gathered vector is assumed to already be in the correct place in the receive buffer. .sp Note that MPI_IN_PLACE is a special kind of value; it has the same restrictions on its use as MPI_BOTTOM. .sp -Because the in-place option converts the receive buffer into a send-and-receive buffer, a Fortran binding that includes INTENT must mark these as INOUT, not OUT. +Because the in-place option converts the receive buffer into a send-and-receive buffer, a Fortran binding that includes INTENT must mark these as INOUT, not OUT. .sp .SH WHEN COMMUNICATOR IS AN INTER-COMMUNICATOR .sp -When the communicator is an inter-communicator, the root process in the first group gathers data from all the processes in the second group. The first group defines the root process. That process uses MPI_ROOT as the value of its \fIroot\fR argument. The remaining processes use MPI_PROC_NULL as the value of their \fIroot\fR argument. All processes in the second group use the rank of that root process in the first group as the value of their \fIroot\fR argument. The send buffer argument of the processes in the first group must be consistent with the receive buffer argument of the root process in the second group. -.sp +When the communicator is an inter-communicator, the root process in the first group gathers data from all the processes in the second group. The first group defines the root process. That process uses MPI_ROOT as the value of its \fIroot\fR argument. The remaining processes use MPI_PROC_NULL as the value of their \fIroot\fR argument. All processes in the second group use the rank of that root process in the first group as the value of their \fIroot\fR argument. The send buffer argument of the processes in the first group must be consistent with the receive buffer argument of the root process in the second group. +.sp .SH ERRORS Almost all MPI routines return an error value; C routines as the value of the function and Fortran routines in the last argument. C++ functions do not return errors. If the default error handler is set to MPI::ERRORS_THROW_EXCEPTIONS, then on error the C++ exception mechanism will be used to throw an MPI::Exception object. .sp Before the error value is returned, the current MPI error handler is -called. By default, this error handler aborts the MPI job, except for I/O function errors. The error handler may be changed with MPI_Comm_set_errhandler; the predefined error handler MPI_ERRORS_RETURN may be used to cause error values to be returned. Note that MPI does not guarantee that an MPI program can continue past an error. +called. By default, this error handler aborts the MPI job, except for I/O function errors. The error handler may be changed with MPI_Comm_set_errhandler; the predefined error handler MPI_ERRORS_RETURN may be used to cause error values to be returned. Note that MPI does not guarantee that an MPI program can continue past an error. .sp -See the MPI man page for a full list of MPI error codes. +See the MPI man page for a full list of MPI error codes. .SH SEE ALSO .ft R diff --git a/ompi/mpi/man/man3/MPI_Gatherv.3in b/ompi/mpi/man/man3/MPI_Gatherv.3in index 4045c21c0c7..c3c71ea2eca 100644 --- a/ompi/mpi/man/man3/MPI_Gatherv.3in +++ b/ompi/mpi/man/man3/MPI_Gatherv.3in @@ -29,7 +29,7 @@ MPI_GATHERV(\fISENDBUF, SENDCOUNT, SENDTYPE, RECVBUF, RECVCOUNTS, DISPLS, RECVTYPE, ROOT, COMM, IERROR\fP) \fISENDBUF(*), RECVBUF(*)\fP INTEGER \fISENDCOUNT, SENDTYPE, RECVCOUNTS(*), DISPLS(*)\fP - INTEGER \fIRECVTYPE, ROOT, COMM, IERROR\fP + INTEGER \fIRECVTYPE, ROOT, COMM, IERROR\fP MPI_IGATHERV(\fISENDBUF, SENDCOUNT, SENDTYPE, RECVBUF, RECVCOUNTS, DISPLS, RECVTYPE, ROOT, COMM, REQUEST, IERROR\fP) @@ -41,7 +41,7 @@ MPI_IGATHERV(\fISENDBUF, SENDCOUNT, SENDTYPE, RECVBUF, RECVCOUNTS, .SH C++ Syntax .nf #include -void MPI::Comm::Gatherv(const void* \fIsendbuf\fP, int \fIsendcount\fP, +void MPI::Comm::Gatherv(const void* \fIsendbuf\fP, int \fIsendcount\fP, const MPI::Datatype& \fIsendtype\fP, void* \fIrecvbuf\fP, const int \fIrecvcounts\fP[], const int \fIdispls\fP[], const MPI::Datatype& \fIrecvtype\fP, int \fIroot\fP) const = 0 @@ -87,19 +87,19 @@ Request (handle, non-blocking only). .ft R .TP 1i IERROR -Fortran only: Error status (integer). +Fortran only: Error status (integer). .SH DESCRIPTION .ft R -MPI_Gatherv extends the functionality of MPI_Gather by allowing a varying count of data from each process, since recvcounts is now an array. It also allows more flexibility as to where the data is placed on the root, by providing the new argument, displs. +MPI_Gatherv extends the functionality of MPI_Gather by allowing a varying count of data from each process, since recvcounts is now an array. It also allows more flexibility as to where the data is placed on the root, by providing the new argument, displs. .sp -The outcome is as if each process, including the root process, sends a message to the root, +The outcome is as if each process, including the root process, sends a message to the root, .sp .nf MPI_Send(sendbuf, sendcount, sendtype, root, \&...) .fi .sp -and the root executes n receives, +and the root executes n receives, .sp .nf MPI_Recv(recvbuf + disp[i] * extent(recvtype), \\ @@ -108,237 +108,237 @@ and the root executes n receives, .sp Messages are placed in the receive buffer of the root process in rank order, that is, the data sent from process j is placed in the jth portion of the receive buffer recvbuf on process root. The jth portion of recvbuf begins at offset displs[j] elements (in terms of recvtype) into recvbuf. .sp -The receive buffer is ignored for all nonroot processes. +The receive buffer is ignored for all nonroot processes. .sp The type signature implied by sendcount, sendtype on process i must be equal to the type signature implied by recvcounts[i], recvtype at the root. This implies that the amount of data sent must be equal to the amount of data received, pairwise between each process and the root. Distinct type maps between sender and receiver are still allowed, as illustrated in Example 2, below. .sp -All arguments to the function are significant on process root, while on other processes, only arguments sendbuf, sendcount, sendtype, root, comm are significant. The arguments root and comm must have identical values on all processes. +All arguments to the function are significant on process root, while on other processes, only arguments sendbuf, sendcount, sendtype, root, comm are significant. The arguments root and comm must have identical values on all processes. .sp -The specification of counts, types, and displacements should not cause any location on the root to be written more than once. Such a call is erroneous. +The specification of counts, types, and displacements should not cause any location on the root to be written more than once. Such a call is erroneous. .sp \fBExample 1:\fP Now have each process send 100 ints to root, but place each set (of 100) stride ints apart at receiving end. Use MPI_Gatherv and -the displs argument to achieve this effect. Assume stride >= 100. +the displs argument to achieve this effect. Assume stride >= 100. .sp .nf - MPI_Comm comm; - int gsize,sendarray[100]; - int root, *rbuf, stride; - int *displs,i,*rcounts; - - \&... + MPI_Comm comm; + int gsize,sendarray[100]; + int root, *rbuf, stride; + int *displs,i,*rcounts; - MPI_Comm_size(comm, &gsize); - rbuf = (int *)malloc(gsize*stride*sizeof(int)); - displs = (int *)malloc(gsize*sizeof(int)); - rcounts = (int *)malloc(gsize*sizeof(int)); - for (i=0; i -MPI_Get(void *\fIorigin_addr\fP, int \fIorigin_count\fP, MPI_Datatype - \fIorigin_datatype\fP, int \fItarget_rank\fP, MPI_Aint \fItarget_disp\fP, +MPI_Get(void *\fIorigin_addr\fP, int \fIorigin_count\fP, MPI_Datatype + \fIorigin_datatype\fP, int \fItarget_rank\fP, MPI_Aint \fItarget_disp\fP, int \fItarget_count\fP, MPI_Datatype \fItarget_datatype\fP, MPI_Win \fIwin\fP) MPI_Rget(void *\fIorigin_addr\fP, int \fIorigin_count\fP, MPI_Datatype @@ -26,11 +26,11 @@ MPI_Rget(void *\fIorigin_addr\fP, int \fIorigin_count\fP, MPI_Datatype .SH Fortran Syntax (see FORTRAN 77 NOTES) .nf INCLUDE 'mpif.h' -MPI_GET(\fIORIGIN_ADDR, ORIGIN_COUNT, ORIGIN_DATATYPE, TARGET_RANK, +MPI_GET(\fIORIGIN_ADDR, ORIGIN_COUNT, ORIGIN_DATATYPE, TARGET_RANK, TARGET_DISP, TARGET_COUNT, TARGET_DATATYPE, WIN, IERROR\fP) \fIORIGIN_ADDR\fP(*) INTEGER(KIND=MPI_ADDRESS_KIND) \fITARGET_DISP\fP - INTEGER \fIORIGIN_COUNT, ORIGIN_DATATYPE, TARGET_RANK, + INTEGER \fIORIGIN_COUNT, ORIGIN_DATATYPE, TARGET_RANK, TARGET_COUNT, TARGET_DATATYPE, WIN, IERROR\fP MPI_RGET(\fIORIGIN_ADDR, ORIGIN_COUNT, ORIGIN_DATATYPE, TARGET_RANK, @@ -44,9 +44,9 @@ MPI_RGET(\fIORIGIN_ADDR, ORIGIN_COUNT, ORIGIN_DATATYPE, TARGET_RANK, .SH C++ Syntax .nf #include -void MPI::Win::Get(const void *\fIorigin_addr\fP, int \fIorigin_count\fP, - const MPI::Datatype& \fIorigin_datatype\fP, int \fItarget_rank\fP, - MPI::Aint \fItarget_disp\fP, int \fItarget_count\fP, +void MPI::Win::Get(const void *\fIorigin_addr\fP, int \fIorigin_count\fP, + const MPI::Datatype& \fIorigin_datatype\fP, int \fItarget_rank\fP, + MPI::Aint \fItarget_disp\fP, int \fItarget_count\fP, const MPI::Datatype& \fItarget_datatype\fP) const .fi @@ -57,7 +57,7 @@ origin_addr Initial address of origin buffer (choice). .TP 1i origin_count -Number of entries in origin buffer (nonnegative integer). +Number of entries in origin buffer (nonnegative integer). .TP 1i origin_datatype Data type of each entry in origin buffer (handle). @@ -66,13 +66,13 @@ target_rank Rank of target (nonnegative integer). .TP 1i target_disp -Displacement from window start to the beginning of the target buffer (nonnegative integer). +Displacement from window start to the beginning of the target buffer (nonnegative integer). .TP 1i target_count Number of entries in target buffer (nonnegative integer). .TP 1i target datatype -datatype of each entry in target buffer (handle) +datatype of each entry in target buffer (handle) .TP 1i win window object used for communication (handle) @@ -84,7 +84,7 @@ request MPI_Rget: RMA request .TP 1i IERROR -Fortran only: Error status (integer). +Fortran only: Error status (integer). .SH DESCRIPTION .ft R @@ -110,7 +110,7 @@ and gives the length of the declared integer in bytes. Almost all MPI routines return an error value; C routines as the value of the function and Fortran routines in the last argument. C++ functions do not return errors. If the default error handler is set to MPI::ERRORS_THROW_EXCEPTIONS, then on error the C++ exception mechanism will be used to throw an MPI::Exception object. .sp Before the error value is returned, the current MPI error handler is -called. By default, this error handler aborts the MPI job, except for I/O function errors. The error handler may be changed with MPI_Comm_set_errhandler; the predefined error handler MPI_ERRORS_RETURN may be used to cause error values to be returned. Note that MPI does not guarantee that an MPI program can continue past an error. +called. By default, this error handler aborts the MPI job, except for I/O function errors. The error handler may be changed with MPI_Comm_set_errhandler; the predefined error handler MPI_ERRORS_RETURN may be used to cause error values to be returned. Note that MPI does not guarantee that an MPI program can continue past an error. .SH SEE ALSO .ft R diff --git a/ompi/mpi/man/man3/MPI_Get_address.3in b/ompi/mpi/man/man3/MPI_Get_address.3in index 9d3f155796d..36b268eede7 100644 --- a/ompi/mpi/man/man3/MPI_Get_address.3in +++ b/ompi/mpi/man/man3/MPI_Get_address.3in @@ -6,7 +6,7 @@ .\" $COPYRIGHT$ .TH MPI_Get_address 3 "#OMPI_DATE#" "#PACKAGE_VERSION#" "#PACKAGE_NAME#" .SH NAME -\fBMPI_Get_address\fP \- Gets the address of a location in memory. +\fBMPI_Get_address\fP \- Gets the address of a location in memory. .SH SYNTAX .ft R @@ -21,7 +21,8 @@ int MPI_Get_address(const void *\fIlocation\fP, MPI_Aint *\fIaddress\fP) INCLUDE 'mpif.h' MPI_GET_ADDRESS(\fILOCATION, ADDRESS, IERROR\fP) \fILOCATION\fP(*) - INTEGER \fIADDRESS, IERROR\fP + INTEGER(KIND=MPI_ADDRESS_KIND) \fIADDRESS\fP + INTEGER \fIIERROR\fP .fi .SH C++ Syntax @@ -43,38 +44,38 @@ address Address of location (integer). .TP 1i IERROR -Fortran only: Error status (integer). +Fortran only: Error status (integer). .SH DESCRIPTION .ft R MPI_Get_address returns the byte address of a location in memory. .sp -Example: Using MPI_Get_address for an array. +Example: Using MPI_Get_address for an array. .sp .nf -EAL A(100,100) +EAL A(100,100) .fi .br - INTEGER I1, I2, DIFF + INTEGER I1, I2, DIFF .br - CALL MPI_GET_ADDRESS(A(1,1), I1, IERROR) + CALL MPI_GET_ADDRESS(A(1,1), I1, IERROR) .br - CALL MPI_GET_ADDRESS(A(10,10), I2, IERROR) + CALL MPI_GET_ADDRESS(A(10,10), I2, IERROR) .br - DIFF = I2 - I1 + DIFF = I2 - I1 .br -! The value of DIFF is 909*sizeofreal; the values of I1 and I2 are +! The value of DIFF is 909*sizeofreal; the values of I1 and I2 are .br -! implementation dependent. +! implementation dependent. .fi .SH NOTES .ft R -Current Fortran MPI codes will run unmodified and will port to any system. However, they may fail if addresses larger than 2^32 - 1 are used in the program. New codes should be written so that they use the new functions. This provides compatibility with C/C++ and avoids errors on 64-bit architectures. However, such newly written codes may need to be (slightly) rewritten to port to old Fortran 77 environments that do not support KIND declarations. +Current Fortran MPI codes will run unmodified and will port to any system. However, they may fail if addresses larger than 2^32 - 1 are used in the program. New codes should be written so that they use the new functions. This provides compatibility with C/C++ and avoids errors on 64-bit architectures. However, such newly written codes may need to be (slightly) rewritten to port to old Fortran 77 environments that do not support KIND declarations. .SH ERRORS Almost all MPI routines return an error value; C routines as the value of the function and Fortran routines in the last argument. C++ functions do not return errors. If the default error handler is set to MPI::ERRORS_THROW_EXCEPTIONS, then on error the C++ exception mechanism will be used to throw an MPI::Exception object. .sp Before the error value is returned, the current MPI error handler is -called. By default, this error handler aborts the MPI job, except for I/O function errors. The error handler may be changed with MPI_Comm_set_errhandler; the predefined error handler MPI_ERRORS_RETURN may be used to cause error values to be returned. Note that MPI does not guarantee that an MPI program can continue past an error. +called. By default, this error handler aborts the MPI job, except for I/O function errors. The error handler may be changed with MPI_Comm_set_errhandler; the predefined error handler MPI_ERRORS_RETURN may be used to cause error values to be returned. Note that MPI does not guarantee that an MPI program can continue past an error. diff --git a/ompi/mpi/man/man3/MPI_Get_count.3in b/ompi/mpi/man/man3/MPI_Get_count.3in index 6a468602986..81eb522a305 100644 --- a/ompi/mpi/man/man3/MPI_Get_count.3in +++ b/ompi/mpi/man/man3/MPI_Get_count.3in @@ -20,7 +20,7 @@ int MPI_Get_count(const MPI_Status *\fIstatus\fP, MPI_Datatype\fI datatype\fP, .nf INCLUDE 'mpif.h' MPI_GET_COUNT(\fISTATUS, DATATYPE, COUNT, IERROR\fP) - INTEGER \fISTATUS(MPI_STATUS_SIZE), DATATYPE, COUNT, IERROR\fP + INTEGER \fISTATUS(MPI_STATUS_SIZE), DATATYPE, COUNT, IERROR\fP .fi .SH C++ Syntax @@ -46,40 +46,40 @@ Number of received elements (integer). .ft R .TP 1i IERROR -Fortran only: Error status (integer). +Fortran only: Error status (integer). .SH DESCRIPTION .ft R Returns the number of entries received. (We count entries, each of type datatype, not bytes.) The datatype argument should match the argument -provided by the receive call that set the status variable. (As explained in Section 3.12.5 in the MPI-1 Standard, "Use of General Datatypes in Communication," MPI_Get_count may, in certain situations, return the value MPI_UNDEFINED.) +provided by the receive call that set the status variable. (As explained in Section 3.12.5 in the MPI-1 Standard, "Use of General Datatypes in Communication," MPI_Get_count may, in certain situations, return the value MPI_UNDEFINED.) .sp The datatype argument is passed to MPI_Get_count to improve performance. A message might be received without counting the number of elements it contains, and the count value is often not needed. Also, this allows the same function to be used after a call to MPI_Probe. .SH NOTES If the size of the datatype is zero, this routine will return a count of -zero. If the amount of data in +zero. If the amount of data in .I status is not an exact multiple of the -size of +size of .I datatype -(so that +(so that .I count -would not be integral), a +would not be integral), a .I count of .I MPI_UNDEFINED is returned instead. .SH ERRORS -If the value to be returned is larger than can fit into the +If the value to be returned is larger than can fit into the .I count parameter, an MPI_ERR_TRUNCATE exception is invoked. .sp Almost all MPI routines return an error value; C routines as the value of the function and Fortran routines in the last argument. C++ functions do not return errors. If the default error handler is set to MPI::ERRORS_THROW_EXCEPTIONS, then on error the C++ exception mechanism will be used to throw an MPI::Exception object. .sp Before the error value is returned, the current MPI error handler is -called. By default, this error handler aborts the MPI job, except for I/O function errors. The error handler may be changed with MPI_Comm_set_errhandler; the predefined error handler MPI_ERRORS_RETURN may be used to cause error values to be returned. Note that MPI does not guarantee that an MPI program can continue past an error. +called. By default, this error handler aborts the MPI job, except for I/O function errors. The error handler may be changed with MPI_Comm_set_errhandler; the predefined error handler MPI_ERRORS_RETURN may be used to cause error values to be returned. Note that MPI does not guarantee that an MPI program can continue past an error. .SH SEE ALSO .ft R diff --git a/ompi/mpi/man/man3/MPI_Get_elements.3in b/ompi/mpi/man/man3/MPI_Get_elements.3in index 68a54b6b0c1..1f1bd9fa073 100644 --- a/ompi/mpi/man/man3/MPI_Get_elements.3in +++ b/ompi/mpi/man/man3/MPI_Get_elements.3in @@ -51,40 +51,40 @@ count Number of received basic elements (integer). .ft R .TP 1i IERROR -Fortran only: Error status (integer). +Fortran only: Error status (integer). .SH DESCRIPTION .ft R MPI_Get_elements and MPI_Get_elements_x behave different from MPI_Get_count, which returns the number of "top-level entries" received, i.e., the number of "copies" of type datatype. MPI_Get_count may return any integer value k, where 0 =< k =< count. If MPI_Get_count returns k, then the number of basic elements received (and the value returned by MPI_Get_elements and MPI_Get_elements_x) is n * k, where n is the number of basic elements in the type map of datatype. If the number of basic elements received is not a multiple of n, that is, if the receive operation has not received an integral number of datatype "copies," then MPI_Get_count returns the value MPI_UNDEFINED. For both functions, if the \fIcount\fP parameter cannot express the value to be returned (e.g., if the parameter is too small to hold the output value), it is set to MPI_UNDEFINED. .sp -\fBExample:\fP Usage of MPI_Get_count and MPI_Get_element: +\fBExample:\fP Usage of MPI_Get_count and MPI_Get_element: .sp .nf - \&... - CALL MPI_TYPE_CONTIGUOUS(2, MPI_REAL, Type2, ierr) - CALL MPI_TYPE_COMMIT(Type2, ierr) - \&... - CALL MPI_COMM_RANK(comm, rank, ierr) - IF(rank.EQ.0) THEN - CALL MPI_SEND(a, 2, MPI_REAL, 1, 0, comm, ierr) - CALL MPI_SEND(a, 3, MPI_REAL, 1, 0, comm, ierr) - ELSE - CALL MPI_RECV(a, 2, Type2, 0, 0, comm, stat, ierr) - CALL MPI_GET_COUNT(stat, Type2, i, ierr) ! returns i=1 - CALL MPI_GET_ELEMENTS(stat, Type2, i, ierr) ! returns i=2 - CALL MPI_RECV(a, 2, Type2, 0, 0, comm, stat, ierr) + \&... + CALL MPI_TYPE_CONTIGUOUS(2, MPI_REAL, Type2, ierr) + CALL MPI_TYPE_COMMIT(Type2, ierr) + \&... + CALL MPI_COMM_RANK(comm, rank, ierr) + IF(rank.EQ.0) THEN + CALL MPI_SEND(a, 2, MPI_REAL, 1, 0, comm, ierr) + CALL MPI_SEND(a, 3, MPI_REAL, 1, 0, comm, ierr) + ELSE + CALL MPI_RECV(a, 2, Type2, 0, 0, comm, stat, ierr) + CALL MPI_GET_COUNT(stat, Type2, i, ierr) ! returns i=1 + CALL MPI_GET_ELEMENTS(stat, Type2, i, ierr) ! returns i=2 + CALL MPI_RECV(a, 2, Type2, 0, 0, comm, stat, ierr) CALL MPI_GET_COUNT(stat, Type2, i, ierr) ! returns i=MPI_UNDEFINED - CALL MPI_GET_ELEMENTS(stat, Type2, i, ierr) ! returns i=3 - END IF + CALL MPI_GET_ELEMENTS(stat, Type2, i, ierr) ! returns i=3 + END IF .fi .sp -The function MPI_Get_elements can also be used after a probe to find the number of elements in the probed message. Note that the two functions MPI_Get_count and MPI_Get_elements return the same values when they are used with primitive data types. - +The function MPI_Get_elements can also be used after a probe to find the number of elements in the probed message. Note that the two functions MPI_Get_count and MPI_Get_elements return the same values when they are used with primitive data types. + .SH ERRORS Almost all MPI routines return an error value; C routines as the value of the function and Fortran routines in the last argument. C++ functions do not return errors. If the default error handler is set to MPI::ERRORS_THROW_EXCEPTIONS, then on error the C++ exception mechanism will be used to throw an MPI::Exception object. .sp Before the error value is returned, the current MPI error handler is -called. By default, this error handler aborts the MPI job, except for I/O function errors. The error handler may be changed with MPI_Comm_set_errhandler; the predefined error handler MPI_ERRORS_RETURN may be used to cause error values to be returned. Note that MPI does not guarantee that an MPI program can continue past an error. +called. By default, this error handler aborts the MPI job, except for I/O function errors. The error handler may be changed with MPI_Comm_set_errhandler; the predefined error handler MPI_ERRORS_RETURN may be used to cause error values to be returned. Note that MPI does not guarantee that an MPI program can continue past an error. .SH FORTRAN 77 NOTES .ft R diff --git a/ompi/mpi/man/man3/MPI_Get_library_version.3in b/ompi/mpi/man/man3/MPI_Get_library_version.3in index 048ed920385..ebe9346bda6 100644 --- a/ompi/mpi/man/man3/MPI_Get_library_version.3in +++ b/ompi/mpi/man/man3/MPI_Get_library_version.3in @@ -20,7 +20,7 @@ int MPI_Get_library_version(char \fI*version\fP, int \fI*resultlen\fP) INCLUDE 'mpif.h' MPI_GET_LIBRARY_VERSION(\fIVERSION\fP, \fIRESULTLEN\fP, \fIIERROR\fP) CHARACTER*(*) \fINAME\fP - INTEGER \fIRESULTLEN\fP, \fIIERROR\fP + INTEGER \fIRESULTLEN\fP, \fIIERROR\fP .fi .SH C++ Syntax @@ -42,7 +42,7 @@ Length (in characters) of result returned in \fIversion\fP (integer). .ft R .TP 1i IERROR -Fortran only: Error status (integer). +Fortran only: Error status (integer). .SH DESCRIPTION .ft R diff --git a/ompi/mpi/man/man3/MPI_Get_processor_name.3in b/ompi/mpi/man/man3/MPI_Get_processor_name.3in index 0ad2f3a7da2..a59997f7dad 100644 --- a/ompi/mpi/man/man3/MPI_Get_processor_name.3in +++ b/ompi/mpi/man/man3/MPI_Get_processor_name.3in @@ -40,14 +40,14 @@ Length (in characters) of result returned in name. .ft R .TP 1i IERROR -Fortran only: Error status (integer). +Fortran only: Error status (integer). .SH DESCRIPTION .ft R -This routine returns the name of the processor on which it was called at the moment of the call. The name is a character string for maximum flexibility. From this value it must be possible to identify a specific piece of hardware. The argument name must represent storage that is at least MPI_MAX_PROCESSOR_NAME characters long. +This routine returns the name of the processor on which it was called at the moment of the call. The name is a character string for maximum flexibility. From this value it must be possible to identify a specific piece of hardware. The argument name must represent storage that is at least MPI_MAX_PROCESSOR_NAME characters long. .sp The number of characters actually written is returned in the output -argument, resultlen. +argument, resultlen. .sp .SH NOTES .ft R @@ -58,7 +58,7 @@ The user must provide at least MPI_MAX_PROCESSOR_NAME space to write the process Almost all MPI routines return an error value; C routines as the value of the function and Fortran routines in the last argument. C++ functions do not return errors. If the default error handler is set to MPI::ERRORS_THROW_EXCEPTIONS, then on error the C++ exception mechanism will be used to throw an MPI::Exception object. .sp Before the error value is returned, the current MPI error handler is -called. By default, this error handler aborts the MPI job, except for I/O function errors. The error handler may be changed with MPI_Comm_set_errhandler; the predefined error handler MPI_ERRORS_RETURN may be used to cause error values to be returned. Note that MPI does not guarantee that an MPI program can continue past an error. +called. By default, this error handler aborts the MPI job, except for I/O function errors. The error handler may be changed with MPI_Comm_set_errhandler; the predefined error handler MPI_ERRORS_RETURN may be used to cause error values to be returned. Note that MPI does not guarantee that an MPI program can continue past an error. diff --git a/ompi/mpi/man/man3/MPI_Get_version.3in b/ompi/mpi/man/man3/MPI_Get_version.3in index 3095e232ec5..c7501ca0088 100644 --- a/ompi/mpi/man/man3/MPI_Get_version.3in +++ b/ompi/mpi/man/man3/MPI_Get_version.3in @@ -5,7 +5,7 @@ .\" $COPYRIGHT$ .TH MPI_Get_version 3 "#OMPI_DATE#" "#PACKAGE_VERSION#" "#PACKAGE_NAME#" .SH NAME -\fBMPI_Get_version\fP \- Returns the version of the standard corresponding to the current implementation. +\fBMPI_Get_version\fP \- Returns the version of the standard corresponding to the current implementation. .SH SYNTAX .ft R @@ -19,7 +19,7 @@ int MPI_Get_version(int \fI*version\fP, int \fI*subversion\fP) .nf INCLUDE 'mpif.h' MPI_GET_VERSION(\fIVERSION\fP, \fISUBVERSION\fP, \fIIERROR\fP) - INTEGER \fIVERSION\fP, \fISUBVERSION\fP, \fIIERROR\fP + INTEGER \fIVERSION\fP, \fISUBVERSION\fP, \fIIERROR\fP .fi .SH C++ Syntax @@ -42,20 +42,20 @@ The minor version number of the corresponding standard (integer). .ft R .TP 1i IERROR -Fortran only: Error status (integer). +Fortran only: Error status (integer). .SH DESCRIPTION .ft R -Since Open MPI is MPI 2.1 compliant, this function will return a version value of 2 and a subversion value of 1 for this release. +Since Open MPI is MPI 3.1 compliant, this function will return a version value of 3 and a subversion value of 1 for this release. .SH NOTE .ft R -MPI_Get_version is one of the few functions that can be called before MPI_Init and after MPI_Finalize. +MPI_Get_version is one of the few functions that can be called before MPI_Init and after MPI_Finalize. .SH ERRORS Almost all MPI routines return an error value; C routines as the value of the function and Fortran routines in the last argument. C++ functions do not return errors. If the default error handler is set to MPI::ERRORS_THROW_EXCEPTIONS, then on error the C++ exception mechanism will be used to throw an MPI::Exception object. .sp Before the error value is returned, the current MPI error handler is -called. By default, this error handler aborts the MPI job, except for I/O function errors. The error handler may be changed with MPI_Comm_set_errhandler; the predefined error handler MPI_ERRORS_RETURN may be used to cause error values to be returned. Note that MPI does not guarantee that an MPI program can continue past an error. +called. By default, this error handler aborts the MPI job, except for I/O function errors. The error handler may be changed with MPI_Comm_set_errhandler; the predefined error handler MPI_ERRORS_RETURN may be used to cause error values to be returned. Note that MPI does not guarantee that an MPI program can continue past an error. diff --git a/ompi/mpi/man/man3/MPI_Graph_create.3in b/ompi/mpi/man/man3/MPI_Graph_create.3in index b31c92e55e1..fe586375d6b 100644 --- a/ompi/mpi/man/man3/MPI_Graph_create.3in +++ b/ompi/mpi/man/man3/MPI_Graph_create.3in @@ -30,8 +30,8 @@ MPI_GRAPH_CREATE(\fICOMM_OLD, NNODES, INDEX, EDGES, REORDER, .SH C++ Syntax .nf #include -Graphcomm Intracomm::Create_graph(int \fInnodes\fP, const int \fIindex\fP[], - const int \fIedges\fP[], bool \fIreorder\fP) const +Graphcomm Intracomm::Create_graph(int \fInnodes\fP, const int \fIindex\fP[], + const int \fIedges\fP[], bool \fIreorder\fP) const .fi .SH INPUT PARAMETERS @@ -60,11 +60,11 @@ Communicator with graph topology added (handle). .ft R .TP 1i IERROR -Fortran only: Error status (integer). +Fortran only: Error status (integer). .SH DESCRIPTION .ft R -MPI_Graph_create returns a handle to a new communicator to which the graph topology information is attached. If reorder = false then the rank of each process in the new group is identical to its rank in the old group. Otherwise, the function may reorder the processes. If the size, nnodes, of the graph is smaller than the size of the group of comm_old, then some processes are returned MPI_COMM_NULL, in analogy to MPI_Cart_create and MPI_Comm_split. The call is erroneous if it specifies a graph that is larger than the group size of the input communicator. +MPI_Graph_create returns a handle to a new communicator to which the graph topology information is attached. If reorder = false then the rank of each process in the new group is identical to its rank in the old group. Otherwise, the function may reorder the processes. If the size, nnodes, of the graph is smaller than the size of the group of comm_old, then some processes are returned MPI_COMM_NULL, in analogy to MPI_Cart_create and MPI_Comm_split. The call is erroneous if it specifies a graph that is larger than the group size of the input communicator. .sp The three parameters nnodes, index, and edges define the graph structure. nnodes is the number of nodes of the graph. The nodes are numbered from 0 to nnodes-1. The ith entry of array index stores the total number of neighbors of the first i graph nodes. The lists of neighbors of nodes 0,\ 1,\ ..., nnodes-1 are stored in consecutive locations in array edges. The array edges is a flattened representation of the edge lists. The total number of entries in index is nnodes and the total number of entries in edges is equal to the number of graph edges. .sp @@ -91,18 +91,18 @@ Then, the input arguments are: Thus, in C, index[0] is the degree of node zero, and index[i] - index[i-1] is the degree of node i, i=1, . . . , nnodes-1; the list of neighbors of node zero is stored in edges[j], for 0 <= j <= index[0] - 1 and the list of -neighbors of node i, i > 0 , is stored in edges[j], index[i-1] <= j <= index[i] - 1. +neighbors of node i, i > 0 , is stored in edges[j], index[i-1] <= j <= index[i] - 1. .sp In Fortran, index(1) is the degree of node zero, and index(i+1) - index(i) is the degree of node i, i=1, . . . , nnodes-1; the list of neighbors of node zero is stored in edges(j), for 1 <= j <= index(1) and the list of -neighbors of node i, i > 0, is stored in edges(j), index(i) + 1 <= j <= index(i + 1). +neighbors of node i, i > 0, is stored in edges(j), index(i) + 1 <= j <= index(i + 1). .SH ERRORS Almost all MPI routines return an error value; C routines as the value of the function and Fortran routines in the last argument. C++ functions do not return errors. If the default error handler is set to MPI::ERRORS_THROW_EXCEPTIONS, then on error the C++ exception mechanism will be used to throw an MPI::Exception object. .sp Before the error value is returned, the current MPI error handler is -called. By default, this error handler aborts the MPI job, except for I/O function errors. The error handler may be changed with MPI_Comm_set_errhandler; the predefined error handler MPI_ERRORS_RETURN may be used to cause error values to be returned. Note that MPI does not guarantee that an MPI program can continue past an error. +called. By default, this error handler aborts the MPI job, except for I/O function errors. The error handler may be changed with MPI_Comm_set_errhandler; the predefined error handler MPI_ERRORS_RETURN may be used to cause error values to be returned. Note that MPI does not guarantee that an MPI program can continue past an error. .SH SEE ALSO .ft R diff --git a/ompi/mpi/man/man3/MPI_Graph_get.3in b/ompi/mpi/man/man3/MPI_Graph_get.3in index 7f3e747ff1b..4ae1dc2ba76 100644 --- a/ompi/mpi/man/man3/MPI_Graph_get.3in +++ b/ompi/mpi/man/man3/MPI_Graph_get.3in @@ -21,14 +21,14 @@ int MPI_Graph_get(MPI_Comm \fIcomm\fP, int\fI maxindex\fP, int\fI maxedges\fP, INCLUDE 'mpif.h' MPI_GRAPH_GET(\fICOMM, MAXINDEX, MAXEDGES, INDEX, EDGES, IERROR\fP) INTEGER \fICOMM, MAXINDEX, MAXEDGES, INDEX(*)\fP - INTEGER \fIEDGES(*), IERROR\fP + INTEGER \fIEDGES(*), IERROR\fP .fi .SH C++ Syntax .nf #include -void Graphcomm::Get_topo(int \fImaxindex\fP, int \fImaxedges\fP, - int \fIindex\fP[], int \fIedges\fP[]) const +void Graphcomm::Get_topo(int \fImaxindex\fP, int \fImaxedges\fP, + int \fIindex\fP[], int \fIedges\fP[]) const .fi .SH INPUT PARAMETERS @@ -55,7 +55,7 @@ Array of integers containing the graph structure. .ft R .TP 1i IERROR -Fortran only: Error status (integer). +Fortran only: Error status (integer). .SH DESCRIPTION .ft R @@ -67,7 +67,7 @@ The information provided by MPI_Graphdims_get can be used to dimension the vecto Almost all MPI routines return an error value; C routines as the value of the function and Fortran routines in the last argument. C++ functions do not return errors. If the default error handler is set to MPI::ERRORS_THROW_EXCEPTIONS, then on error the C++ exception mechanism will be used to throw an MPI::Exception object. .sp Before the error value is returned, the current MPI error handler is -called. By default, this error handler aborts the MPI job, except for I/O function errors. The error handler may be changed with MPI_Comm_set_errhandler; the predefined error handler MPI_ERRORS_RETURN may be used to cause error values to be returned. Note that MPI does not guarantee that an MPI program can continue past an error. +called. By default, this error handler aborts the MPI job, except for I/O function errors. The error handler may be changed with MPI_Comm_set_errhandler; the predefined error handler MPI_ERRORS_RETURN may be used to cause error values to be returned. Note that MPI does not guarantee that an MPI program can continue past an error. .SH SEE ALSO .ft R diff --git a/ompi/mpi/man/man3/MPI_Graph_map.3in b/ompi/mpi/man/man3/MPI_Graph_map.3in index 260bf6095ce..79f4b747bed 100644 --- a/ompi/mpi/man/man3/MPI_Graph_map.3in +++ b/ompi/mpi/man/man3/MPI_Graph_map.3in @@ -21,14 +21,14 @@ int MPI_Graph_map(MPI_Comm \fIcomm\fP, int\fI nnodes\fP, const int\fI index\fP[] .nf INCLUDE 'mpif.h' MPI_GRAPH_MAP(\fICOMM, NNODES, INDEX, EDGES, NEWRANK, IERROR\fP) - INTEGER \fICOMM, NNODES, INDEX(*), EDGES(*), NEWRANK, IERROR\fP + INTEGER \fICOMM, NNODES, INDEX(*), EDGES(*), NEWRANK, IERROR\fP .fi .SH C++ Syntax .nf #include -int Graphcomm::Map(int \fInnodes\fP, const int \fIindex\fP[], - const int \fIedges\fP[]) const +int Graphcomm::Map(int \fInnodes\fP, const int \fIindex\fP[], + const int \fIedges\fP[]) const .fi .SH INPUT PARAMETERS @@ -54,18 +54,18 @@ Reordered rank of the calling process; MPI_UNDEFINED if the calling process does .ft R .TP 1i IERROR -Fortran only: Error status (integer). +Fortran only: Error status (integer). .SH DESCRIPTION .ft R MPI_Cart_map and MPI_Graph_map can be used to implement all other topology -functions. In general they will not be called by the user directly, unless he or she is creating additional virtual topology capability other than that provided by MPI. +functions. In general they will not be called by the user directly, unless he or she is creating additional virtual topology capability other than that provided by MPI. .SH ERRORS Almost all MPI routines return an error value; C routines as the value of the function and Fortran routines in the last argument. C++ functions do not return errors. If the default error handler is set to MPI::ERRORS_THROW_EXCEPTIONS, then on error the C++ exception mechanism will be used to throw an MPI::Exception object. .sp Before the error value is returned, the current MPI error handler is -called. By default, this error handler aborts the MPI job, except for I/O function errors. The error handler may be changed with MPI_Comm_set_errhandler; the predefined error handler MPI_ERRORS_RETURN may be used to cause error values to be returned. Note that MPI does not guarantee that an MPI program can continue past an error. +called. By default, this error handler aborts the MPI job, except for I/O function errors. The error handler may be changed with MPI_Comm_set_errhandler; the predefined error handler MPI_ERRORS_RETURN may be used to cause error values to be returned. Note that MPI does not guarantee that an MPI program can continue past an error. .SH SEE ALSO .sp diff --git a/ompi/mpi/man/man3/MPI_Graph_neighbors.3in b/ompi/mpi/man/man3/MPI_Graph_neighbors.3in index a5fe5bfc92a..5b0e29195db 100644 --- a/ompi/mpi/man/man3/MPI_Graph_neighbors.3in +++ b/ompi/mpi/man/man3/MPI_Graph_neighbors.3in @@ -20,14 +20,14 @@ int MPI_Graph_neighbors(MPI_Comm \fIcomm\fP, int\fI rank\fP, int\fI maxneighbors .nf INCLUDE 'mpif.h' MPI_GRAPH_NEIGHBORS(\fICOMM, RANK, MAXNEIGHBORS, NEIGHBORS, IERROR\fP) - INTEGER \fICOMM, RANK, MAXNEIGHBORS, NEIGHBORS(*), IERROR\fP + INTEGER \fICOMM, RANK, MAXNEIGHBORS, NEIGHBORS(*), IERROR\fP .fi .SH C++ Syntax .nf #include -void Graphcomm::Get_neighbors(int \fIrank\fP, int \fImaxneighbors\fP, - int \fIneighbors\fP[]) const +void Graphcomm::Get_neighbors(int \fIrank\fP, int \fImaxneighbors\fP, + int \fIneighbors\fP[]) const .fi .SH INPUT PARAMETERS @@ -50,7 +50,7 @@ Ranks of processes that are neighbors to specified process (array of integers). .ft R .TP 1i IERROR -Fortran only: Error status (integer). +Fortran only: Error status (integer). .SH DESCRIPTION .ft R @@ -73,18 +73,18 @@ topology. The group has 2n members. Each process is labeled by a(1),\ ..., a(n) Suppose that the communicator comm has this topology associated with it. The following code fragment cycles through the three types of neighbors and performs an appropriate permutation for each. .sp .nf -C assume: each process has stored a real number A. -C extract neighborhood information - CALL MPI_COMM_RANK(comm, myrank, ierr) - CALL MPI_GRAPH_NEIGHBORS(comm, myrank, 3, neighbors, ierr) -C perform exchange permutation - CALL MPI_SENDRECV_REPLACE(A, 1, MPI_REAL, neighbors(1), 0, - + neighbors(1), 0, comm, status, ierr) -C perform shuffle permutation - CALL MPI_SENDRECV_REPLACE(A, 1, MPI_REAL, neighbors(2), 0, - + neighbors(3), 0, comm, status, ierr) -C perform unshuffle permutation - CALL MPI_SENDRECV_REPLACE(A, 1, MPI_REAL, neighbors(3), 0, +C assume: each process has stored a real number A. +C extract neighborhood information + CALL MPI_COMM_RANK(comm, myrank, ierr) + CALL MPI_GRAPH_NEIGHBORS(comm, myrank, 3, neighbors, ierr) +C perform exchange permutation + CALL MPI_SENDRECV_REPLACE(A, 1, MPI_REAL, neighbors(1), 0, + + neighbors(1), 0, comm, status, ierr) +C perform shuffle permutation + CALL MPI_SENDRECV_REPLACE(A, 1, MPI_REAL, neighbors(2), 0, + + neighbors(3), 0, comm, status, ierr) +C perform unshuffle permutation + CALL MPI_SENDRECV_REPLACE(A, 1, MPI_REAL, neighbors(3), 0, + neighbors(2), 0, comm, status, ierr) .fi @@ -92,7 +92,7 @@ C perform unshuffle permutation Almost all MPI routines return an error value; C routines as the value of the function and Fortran routines in the last argument. C++ functions do not return errors. If the default error handler is set to MPI::ERRORS_THROW_EXCEPTIONS, then on error the C++ exception mechanism will be used to throw an MPI::Exception object. .sp Before the error value is returned, the current MPI error handler is -called. By default, this error handler aborts the MPI job, except for I/O function errors. The error handler may be changed with MPI_Comm_set_errhandler; the predefined error handler MPI_ERRORS_RETURN may be used to cause error values to be returned. Note that MPI does not guarantee that an MPI program can continue past an error. +called. By default, this error handler aborts the MPI job, except for I/O function errors. The error handler may be changed with MPI_Comm_set_errhandler; the predefined error handler MPI_ERRORS_RETURN may be used to cause error values to be returned. Note that MPI does not guarantee that an MPI program can continue past an error. .SH SEE ALSO .ft R diff --git a/ompi/mpi/man/man3/MPI_Graph_neighbors_count.3in b/ompi/mpi/man/man3/MPI_Graph_neighbors_count.3in index ae7f9401d2a..14aca310a3a 100644 --- a/ompi/mpi/man/man3/MPI_Graph_neighbors_count.3in +++ b/ompi/mpi/man/man3/MPI_Graph_neighbors_count.3in @@ -20,7 +20,7 @@ int MPI_Graph_neighbors_count(MPI_Comm \fIcomm\fP, int\fI rank\fP, .nf INCLUDE 'mpif.h' MPI_GRAPH_NEIGHBORS_COUNT(\fICOMM, RANK, NNEIGHBORS, IERROR\fP) - INTEGER \fICOMM, RANK, NNEIGHBORS, IERROR\fP + INTEGER \fICOMM, RANK, NNEIGHBORS, IERROR\fP .fi .SH C++ Syntax @@ -46,17 +46,17 @@ Number of neighbors of specified process (integer). .ft R .TP 1i IERROR -Fortran only: Error status (integer). +Fortran only: Error status (integer). .SH DESCRIPTION .ft R -MPI_Graph_neighbors_count and MPI_Graph_neighbors provide adjacency information for a general, graph topology. MPI_Graph_neighbors_count returns the number of neighbors for the process signified by rank. +MPI_Graph_neighbors_count and MPI_Graph_neighbors provide adjacency information for a general, graph topology. MPI_Graph_neighbors_count returns the number of neighbors for the process signified by rank. .SH ERRORS Almost all MPI routines return an error value; C routines as the value of the function and Fortran routines in the last argument. C++ functions do not return errors. If the default error handler is set to MPI::ERRORS_THROW_EXCEPTIONS, then on error the C++ exception mechanism will be used to throw an MPI::Exception object. .sp Before the error value is returned, the current MPI error handler is -called. By default, this error handler aborts the MPI job, except for I/O function errors. The error handler may be changed with MPI_Comm_set_errhandler; the predefined error handler MPI_ERRORS_RETURN may be used to cause error values to be returned. Note that MPI does not guarantee that an MPI program can continue past an error. +called. By default, this error handler aborts the MPI job, except for I/O function errors. The error handler may be changed with MPI_Comm_set_errhandler; the predefined error handler MPI_ERRORS_RETURN may be used to cause error values to be returned. Note that MPI does not guarantee that an MPI program can continue past an error. .SH SEE ALSO .ft R diff --git a/ompi/mpi/man/man3/MPI_Graphdims_get.3in b/ompi/mpi/man/man3/MPI_Graphdims_get.3in index a0139e8bf5a..298c7a2245e 100644 --- a/ompi/mpi/man/man3/MPI_Graphdims_get.3in +++ b/ompi/mpi/man/man3/MPI_Graphdims_get.3in @@ -19,7 +19,7 @@ int MPI_Graphdims_get(MPI_Comm \fIcomm\fP, int\fI *nnodes\fP, int\fI *nedges\fP) .nf INCLUDE 'mpif.h' MPI_GRAPHDIMS_GET(\fICOMM, NNODES, NEDGES, IERROR\fP) - INTEGER \fICOMM, NNODES, NEDGES, IERROR\fP + INTEGER \fICOMM, NNODES, NEDGES, IERROR\fP .fi .SH C++ Syntax @@ -45,7 +45,7 @@ Number of edges in graph (integer). .ft R .TP 1i IERROR -Fortran only: Error status (integer). +Fortran only: Error status (integer). .SH DESCRIPTION .ft R @@ -57,7 +57,7 @@ The information provided by MPI_Graphdims_get can be used to dimension the vecto Almost all MPI routines return an error value; C routines as the value of the function and Fortran routines in the last argument. C++ functions do not return errors. If the default error handler is set to MPI::ERRORS_THROW_EXCEPTIONS, then on error the C++ exception mechanism will be used to throw an MPI::Exception object. .sp Before the error value is returned, the current MPI error handler is -called. By default, this error handler aborts the MPI job, except for I/O function errors. The error handler may be changed with MPI_Comm_set_errhandler; the predefined error handler MPI_ERRORS_RETURN may be used to cause error values to be returned. Note that MPI does not guarantee that an MPI program can continue past an error. +called. By default, this error handler aborts the MPI job, except for I/O function errors. The error handler may be changed with MPI_Comm_set_errhandler; the predefined error handler MPI_ERRORS_RETURN may be used to cause error values to be returned. Note that MPI does not guarantee that an MPI program can continue past an error. .SH SEE ALSO .ft R diff --git a/ompi/mpi/man/man3/MPI_Grequest_complete.3in b/ompi/mpi/man/man3/MPI_Grequest_complete.3in index db6c8e9f689..9e9cd6cad1d 100644 --- a/ompi/mpi/man/man3/MPI_Grequest_complete.3in +++ b/ompi/mpi/man/man3/MPI_Grequest_complete.3in @@ -5,7 +5,7 @@ .\" $COPYRIGHT$ .TH MPI_Grequest_complete 3 "#OMPI_DATE#" "#PACKAGE_VERSION#" "#PACKAGE_NAME#" .SH NAME -\fBMPI_Grequest_complete \fP \- Reports that a generalized request is complete. +\fBMPI_Grequest_complete \fP \- Reports that a generalized request is complete. .SH SYNTAX .ft R @@ -19,7 +19,7 @@ int MPI_Grequest_complete(MPI_Request \fIrequest\fP) .nf INCLUDE 'mpif.h' MPI_GREQUEST_COMPLETE(\fIREQUEST, IERROR\fP) - INTEGER \fIREQUEST, IERROR\fP + INTEGER \fIREQUEST, IERROR\fP .fi .SH C++ Syntax @@ -38,18 +38,18 @@ Generalized request (handle). .ft R .TP 1i IERROR -Fortran only: Error status (integer). +Fortran only: Error status (integer). .SH DESCRIPTION .ft R -MPI_Grequest_complete informs MPI that the operations represented by the generalized request \fIrequest\fP are complete. A call to MPI_Wait(\fIrequest, status\fP) will return, and a call to MPI_Test(\fIrequest, flag, status\fP) will return flag=true only after a call to MPI_Grequest_complete has declared that these operations are complete. +MPI_Grequest_complete informs MPI that the operations represented by the generalized request \fIrequest\fP are complete. A call to MPI_Wait(\fIrequest, status\fP) will return, and a call to MPI_Test(\fIrequest, flag, status\fP) will return flag=true only after a call to MPI_Grequest_complete has declared that these operations are complete. .sp -MPI imposes no restrictions on the code executed by the callback functions. However, new nonblocking operations should be defined so that the general semantic rules about MPI calls such as MPI_Test, MPI_Request_free, or MPI_Cancel still hold. For example, all these calls are supposed to be local and nonblocking. Therefore, the callback functions \fIquery_fn\fP, \fIfree_fn\fP, or \fIcancel_fn\fP should invoke blocking MPI communication calls only if the context is such that these calls are guaranteed to return in finite time. Once MPI_Cancel has been invoked, the canceled operation should complete in finite time, regardless of the state of other processes (the operation has acquired "local" semantics). It should either succeed or fail without side-effects. The user should guarantee these same properties for newly defined operations. +MPI imposes no restrictions on the code executed by the callback functions. However, new nonblocking operations should be defined so that the general semantic rules about MPI calls such as MPI_Test, MPI_Request_free, or MPI_Cancel still hold. For example, all these calls are supposed to be local and nonblocking. Therefore, the callback functions \fIquery_fn\fP, \fIfree_fn\fP, or \fIcancel_fn\fP should invoke blocking MPI communication calls only if the context is such that these calls are guaranteed to return in finite time. Once MPI_Cancel has been invoked, the canceled operation should complete in finite time, regardless of the state of other processes (the operation has acquired "local" semantics). It should either succeed or fail without side-effects. The user should guarantee these same properties for newly defined operations. .SH ERRORS Almost all MPI routines return an error value; C routines as the value of the function and Fortran routines in the last argument. C++ functions do not return errors. If the default error handler is set to MPI::ERRORS_THROW_EXCEPTIONS, then on error the C++ exception mechanism will be used to throw an MPI::Exception object. .sp Before the error value is returned, the current MPI error handler is -called. By default, this error handler aborts the MPI job, except for I/O function errors. The error handler may be changed with MPI_Comm_set_errhandler; the predefined error handler MPI_ERRORS_RETURN may be used to cause error values to be returned. Note that MPI does not guarantee that an MPI program can continue past an error. +called. By default, this error handler aborts the MPI job, except for I/O function errors. The error handler may be changed with MPI_Comm_set_errhandler; the predefined error handler MPI_ERRORS_RETURN may be used to cause error values to be returned. Note that MPI does not guarantee that an MPI program can continue past an error. diff --git a/ompi/mpi/man/man3/MPI_Grequest_start.3in b/ompi/mpi/man/man3/MPI_Grequest_start.3in index 50f7d9c843f..cc14de48d53 100644 --- a/ompi/mpi/man/man3/MPI_Grequest_start.3in +++ b/ompi/mpi/man/man3/MPI_Grequest_start.3in @@ -5,15 +5,15 @@ .\" $COPYRIGHT$ .TH MPI_Grequest_start 3 "#OMPI_DATE#" "#PACKAGE_VERSION#" "#PACKAGE_NAME#" .SH NAME -\fBMPI_Grequest_start \fP \- Starts a generalized request and returns a handle to it in \fIrequest\fP. +\fBMPI_Grequest_start \fP \- Starts a generalized request and returns a handle to it in \fIrequest\fP. .SH SYNTAX .ft R .SH C Syntax .nf #include -int MPI_Grequest_start(MPI_Grequest_query_function \fI*query_fn\fP, - MPI_Grequest_free_function \fI*free_fn\fP, +int MPI_Grequest_start(MPI_Grequest_query_function \fI*query_fn\fP, + MPI_Grequest_free_function \fI*free_fn\fP, MPI_Grequest_cancel_function \fI*cancel_fn\fP, void \fI*extra_state\fP, MPI_Request \fI*request\fP) @@ -21,11 +21,11 @@ int MPI_Grequest_start(MPI_Grequest_query_function \fI*query_fn\fP, .SH Fortran Syntax (see FORTRAN 77 NOTES) .nf INCLUDE 'mpif.h' -MPI_GREQUEST_START(\fIQUERY_FN, FREE_FN, CANCEL_FN, EXTRA_STATE, +MPI_GREQUEST_START(\fIQUERY_FN, FREE_FN, CANCEL_FN, EXTRA_STATE, REQUEST, IERROR\fP) INTEGER \fIREQUEST, IERROR\fP EXTERNAL \fIQUERY_FN, FREE_FN, CANCEL_FN\fP - INTEGER (KIND=MPI_ADDRESS_KIND) \fIEXTRA_STATE\fP + INTEGER(KIND=MPI_ADDRESS_KIND) \fIEXTRA_STATE\fP .fi .SH C++ Syntax @@ -61,7 +61,7 @@ Generalized request (handle). .ft R .TP 1i IERROR -Fortran only: Error status (integer). +Fortran only: Error status (integer). .SH DESCRIPTION .ft R @@ -91,7 +91,7 @@ and in C++, it is .sp The \fIquery_fn\fP function computes the status that should be returned for the generalized request. The status also includes information about successful/unsuccessful cancellation of the request (result to be returned by MPI_Test_cancelled). .sp -The \fIquery_fn\fP function is invoked by the MPI_{Wait|Test}{any|some|all} call that completed the generalized request associated with this callback. The callback function is also invoked by calls to MPI_Request_get_status if the request is complete when the call occurs. In both cases, the callback is passed a reference to the corresponding status variable passed by the user to the MPI call. If the user provided MPI_STATUS_IGNORE or MPI_STATUSES_IGNORE to the MPI function that causes \fIquery_fn\fP to be called, then MPI will pass a valid status object to \fIquery_fn\fP, and this status will be ignored upon return of the callback function. Note that \fIquery_fn\fP is invoked only after MPI_Grequest_complete is called on the request; it may be invoked several times for the same generalized request. Note also that a call to MPI_{Wait|Test}{some|all} may cause multiple invocations of \fIquery_fn\fP callback functions, one for each generalized request that is completed by the MPI call. The order of these invocations is not specified by MPI. +The \fIquery_fn\fP function is invoked by the MPI_{Wait|Test}{any|some|all} call that completed the generalized request associated with this callback. The callback function is also invoked by calls to MPI_Request_get_status if the request is complete when the call occurs. In both cases, the callback is passed a reference to the corresponding status variable passed by the user to the MPI call. If the user provided MPI_STATUS_IGNORE or MPI_STATUSES_IGNORE to the MPI function that causes \fIquery_fn\fP to be called, then MPI will pass a valid status object to \fIquery_fn\fP, and this status will be ignored upon return of the callback function. Note that \fIquery_fn\fP is invoked only after MPI_Grequest_complete is called on the request; it may be invoked several times for the same generalized request. Note also that a call to MPI_{Wait|Test}{some|all} may cause multiple invocations of \fIquery_fn\fP callback functions, one for each generalized request that is completed by the MPI call. The order of these invocations is not specified by MPI. .sp In C, the free function is .sp @@ -115,9 +115,9 @@ And in C++, it is .sp The \fIfree_fn\fP callback function is invoked to clean up user-allocated resources when the generalized request is freed. .sp -The \fIfree_fn\fP function is invoked by the MPI_{Wait|Test}{any|some|all} call that completed the generalized request associated with this callback. \fIfree_fn\fP is invoked after the call to \fIquery_fn\fP for the same request. However, if the MPI call completed multiple generalized requests, the order in which \fIfree_fn\fP callback functions are invoked is not specified by MPI. +The \fIfree_fn\fP function is invoked by the MPI_{Wait|Test}{any|some|all} call that completed the generalized request associated with this callback. \fIfree_fn\fP is invoked after the call to \fIquery_fn\fP for the same request. However, if the MPI call completed multiple generalized requests, the order in which \fIfree_fn\fP callback functions are invoked is not specified by MPI. .sp -The \fIfree_fn\fP callback is also invoked for generalized requests that are freed by a call to MPI_Request_free (no call to MPI_{Wait|Test}{any|some|all} will occur for such a request). In this case, the callback function will be called either in the MPI call MPI_Request_free(request) or in the MPI call MPI_Grequest_complete(request), whichever happens last. In other words, in this case the actual freeing code is executed as soon as both calls (MPI_Request_free and MPI_Grequest_complete) have occurred. The \fIrequest\fP is not deallocated until after \fIfree_fn\fP completes. Note that \fIfree_fn\fP will be invoked only once per request by a correct program. +The \fIfree_fn\fP callback is also invoked for generalized requests that are freed by a call to MPI_Request_free (no call to MPI_{Wait|Test}{any|some|all} will occur for such a request). In this case, the callback function will be called either in the MPI call MPI_Request_free(request) or in the MPI call MPI_Grequest_complete(request), whichever happens last. In other words, in this case the actual freeing code is executed as soon as both calls (MPI_Request_free and MPI_Grequest_complete) have occurred. The \fIrequest\fP is not deallocated until after \fIfree_fn\fP completes. Note that \fIfree_fn\fP will be invoked only once per request by a correct program. .sp In C, the cancel function is .sp @@ -160,11 +160,11 @@ and gives the length of the declared integer in bytes. Almost all MPI routines return an error value; C routines as the value of the function and Fortran routines in the last argument. C++ functions do not return errors. If the default error handler is set to MPI::ERRORS_THROW_EXCEPTIONS, then on error the C++ exception mechanism will be used to throw an MPI::Exception object. .sp Before the error value is returned, the current MPI error handler is -called. By default, this error handler aborts the MPI job, except for I/O function errors. The error handler may be changed with MPI_Comm_set_errhandler; the predefined error handler MPI_ERRORS_RETURN may be used to cause error values to be returned. Note that MPI does not guarantee that an MPI program can continue past an error. +called. By default, this error handler aborts the MPI job, except for I/O function errors. The error handler may be changed with MPI_Comm_set_errhandler; the predefined error handler MPI_ERRORS_RETURN may be used to cause error values to be returned. Note that MPI does not guarantee that an MPI program can continue past an error. .sp -All callback functions return an error code. The code is passed back and dealt with as appropriate for the error code by the MPI function that invoked the callback function. For example, if error codes are returned, then the error code returned by the callback function will be returned by the MPI function that invoked the callback function. In the case of a MPI_{Wait|Test}any call that invokes both \fIquery_fn\fP and \fIfree_fn\fP, the MPI call will return the error code returned by the last callback, namely \fIfree_fn\fP. If one or more of the requests in a call to MPI_{Wait|Test}{some|all} has failed, then the MPI call will return MPI_ERR_IN_STATUS. In such a case, if the MPI call was passed an array of statuses, then MPI will return in each of the statuses that correspond to a completed generalized request the error code returned by the corresponding invocation of its \fIfree_fn\fP callback function. However, if the MPI function was passed MPI_STATUSES_IGNORE, then the individual error codes returned by each callback function will be lost. +All callback functions return an error code. The code is passed back and dealt with as appropriate for the error code by the MPI function that invoked the callback function. For example, if error codes are returned, then the error code returned by the callback function will be returned by the MPI function that invoked the callback function. In the case of a MPI_{Wait|Test}any call that invokes both \fIquery_fn\fP and \fIfree_fn\fP, the MPI call will return the error code returned by the last callback, namely \fIfree_fn\fP. If one or more of the requests in a call to MPI_{Wait|Test}{some|all} has failed, then the MPI call will return MPI_ERR_IN_STATUS. In such a case, if the MPI call was passed an array of statuses, then MPI will return in each of the statuses that correspond to a completed generalized request the error code returned by the corresponding invocation of its \fIfree_fn\fP callback function. However, if the MPI function was passed MPI_STATUSES_IGNORE, then the individual error codes returned by each callback function will be lost. .sp -See the MPI man page for a full list of MPI error codes. +See the MPI man page for a full list of MPI error codes. diff --git a/ompi/mpi/man/man3/MPI_Group_compare.3in b/ompi/mpi/man/man3/MPI_Group_compare.3in index 598e818e853..0dec5e9bc0a 100644 --- a/ompi/mpi/man/man3/MPI_Group_compare.3in +++ b/ompi/mpi/man/man3/MPI_Group_compare.3in @@ -19,7 +19,7 @@ int MPI_Group_compare(MPI_Group \fIgroup1\fP, MPI_Group\fI group2\fP, .nf INCLUDE 'mpif.h' MPI_GROUP_COMPARE(\fIGROUP1, GROUP2, RESULT, IERROR\fP) - INTEGER \fIGROUP1, GROUP2, RESULT, IERROR\fP + INTEGER \fIGROUP1, GROUP2, RESULT, IERROR\fP .fi .SH C++ Syntax @@ -45,15 +45,15 @@ Integer which is MPI_IDENT if the order and members of the two groups are the sa .ft R .TP 1i IERROR -Fortran only: Error status (integer). +Fortran only: Error status (integer). .SH DESCRIPTION .ft R -MPI_IDENT results if the group members and group order is exactly the same in both groups. This happens for instance if group1 and group2 are the same handle. MPI_SIMILAR results if the group members are the same but the order is different. MPI_UNEQUAL results otherwise. +MPI_IDENT results if the group members and group order is exactly the same in both groups. This happens for instance if group1 and group2 are the same handle. MPI_SIMILAR results if the group members are the same but the order is different. MPI_UNEQUAL results otherwise. .SH ERRORS Almost all MPI routines return an error value; C routines as the value of the function and Fortran routines in the last argument. C++ functions do not return errors. If the default error handler is set to MPI::ERRORS_THROW_EXCEPTIONS, then on error the C++ exception mechanism will be used to throw an MPI::Exception object. .sp Before the error value is returned, the current MPI error handler is -called. By default, this error handler aborts the MPI job, except for I/O function errors. The error handler may be changed with MPI_Comm_set_errhandler; the predefined error handler MPI_ERRORS_RETURN may be used to cause error values to be returned. Note that MPI does not guarantee that an MPI program can continue past an error. +called. By default, this error handler aborts the MPI job, except for I/O function errors. The error handler may be changed with MPI_Comm_set_errhandler; the predefined error handler MPI_ERRORS_RETURN may be used to cause error values to be returned. Note that MPI does not guarantee that an MPI program can continue past an error. diff --git a/ompi/mpi/man/man3/MPI_Group_difference.3in b/ompi/mpi/man/man3/MPI_Group_difference.3in index 769699f0b99..4d7ee723514 100644 --- a/ompi/mpi/man/man3/MPI_Group_difference.3in +++ b/ompi/mpi/man/man3/MPI_Group_difference.3in @@ -20,7 +20,7 @@ int MPI_Group_difference(MPI_Group \fIgroup1\fP, MPI_Group\fI group2\fP, .nf INCLUDE 'mpif.h' MPI_GROUP_DIFFERENCE(\fIGROUP1, GROUP2, NEWGROUP, IERROR\fP) - INTEGER \fIGROUP1, GROUP2, NEWGROUP, IERROR\fP + INTEGER \fIGROUP1, GROUP2, NEWGROUP, IERROR\fP .fi .SH C++ Syntax @@ -46,7 +46,7 @@ Difference group (handle). .ft R .TP 1i IERROR -Fortran only: Error status (integer). +Fortran only: Error status (integer). .SH DESCRIPTION .ft R @@ -63,7 +63,7 @@ group, ordered as in first group o difference -- all elements of the first group that are not in the second group, ordered as in the first group .LP -Note that for these operations the order of processes in the output group is determined primarily by order in the first group (if possible) and then, if necessary, by order in the second group. Neither union nor intersection are commutative, but both are associative. +Note that for these operations the order of processes in the output group is determined primarily by order in the first group (if possible) and then, if necessary, by order in the second group. Neither union nor intersection are commutative, but both are associative. .sp The new group can be empty, that is, equal to MPI_GROUP_EMPTY. @@ -71,7 +71,7 @@ The new group can be empty, that is, equal to MPI_GROUP_EMPTY. Almost all MPI routines return an error value; C routines as the value of the function and Fortran routines in the last argument. C++ functions do not return errors. If the default error handler is set to MPI::ERRORS_THROW_EXCEPTIONS, then on error the C++ exception mechanism will be used to throw an MPI::Exception object. .sp Before the error value is returned, the current MPI error handler is -called. By default, this error handler aborts the MPI job, except for I/O function errors. The error handler may be changed with MPI_Comm_set_errhandler; the predefined error handler MPI_ERRORS_RETURN may be used to cause error values to be returned. Note that MPI does not guarantee that an MPI program can continue past an error. +called. By default, this error handler aborts the MPI job, except for I/O function errors. The error handler may be changed with MPI_Comm_set_errhandler; the predefined error handler MPI_ERRORS_RETURN may be used to cause error values to be returned. Note that MPI does not guarantee that an MPI program can continue past an error. .SH SEE ALSO MPI_Group_free diff --git a/ompi/mpi/man/man3/MPI_Group_excl.3in b/ompi/mpi/man/man3/MPI_Group_excl.3in index 1b6cfd75f4c..eb5ec8c053f 100644 --- a/ompi/mpi/man/man3/MPI_Group_excl.3in +++ b/ompi/mpi/man/man3/MPI_Group_excl.3in @@ -21,7 +21,7 @@ int MPI_Group_excl(MPI_Group \fIgroup\fP, int\fI n\fP, const int\fI ranks\fP[], .nf INCLUDE 'mpif.h' MPI_GROUP_EXCL(\fIGROUP, N, RANKS, NEWGROUP, IERROR\fP) - INTEGER \fIGROUP, N, RANKS(*), NEWGROUP, IERROR\fP + INTEGER \fIGROUP, N, RANKS(*), NEWGROUP, IERROR\fP .fi .SH C++ Syntax @@ -50,21 +50,21 @@ New group derived from above, preserving the order defined by group (handle). .ft R .TP 1i IERROR -Fortran only: Error status (integer). +Fortran only: Error status (integer). .SH DESCRIPTION .ft R -The function MPI_Group_excl creates a group of processes newgroup that is obtained by deleting from group those processes with ranks ranks[0], \&... ranks[n-1]. The ordering of processes in newgroup is identical to the ordering in group. Each of the n elements of ranks must be a valid rank in group and all elements must be distinct; otherwise, the call is erroneous. If n = 0, then newgroup is identical to group. +The function MPI_Group_excl creates a group of processes newgroup that is obtained by deleting from group those processes with ranks ranks[0], \&... ranks[n-1]. The ordering of processes in newgroup is identical to the ordering in group. Each of the n elements of ranks must be a valid rank in group and all elements must be distinct; otherwise, the call is erroneous. If n = 0, then newgroup is identical to group. .SH NOTE .ft R -Currently, each of the ranks to exclude must be a valid rank in the group and all elements must be distinct or the function is erroneous. This restriction is per the draft. +Currently, each of the ranks to exclude must be a valid rank in the group and all elements must be distinct or the function is erroneous. This restriction is per the draft. .SH ERRORS Almost all MPI routines return an error value; C routines as the value of the function and Fortran routines in the last argument. C++ functions do not return errors. If the default error handler is set to MPI::ERRORS_THROW_EXCEPTIONS, then on error the C++ exception mechanism will be used to throw an MPI::Exception object. .sp Before the error value is returned, the current MPI error handler is -called. By default, this error handler aborts the MPI job, except for I/O function errors. The error handler may be changed with MPI_Comm_set_errhandler; the predefined error handler MPI_ERRORS_RETURN may be used to cause error values to be returned. Note that MPI does not guarantee that an MPI program can continue past an error. +called. By default, this error handler aborts the MPI job, except for I/O function errors. The error handler may be changed with MPI_Comm_set_errhandler; the predefined error handler MPI_ERRORS_RETURN may be used to cause error values to be returned. Note that MPI does not guarantee that an MPI program can continue past an error. .SH SEE ALSO .ft R diff --git a/ompi/mpi/man/man3/MPI_Group_free.3in b/ompi/mpi/man/man3/MPI_Group_free.3in index 74eed8709f2..c406a47f37f 100644 --- a/ompi/mpi/man/man3/MPI_Group_free.3in +++ b/ompi/mpi/man/man3/MPI_Group_free.3in @@ -19,7 +19,7 @@ int MPI_Group_free(MPI_Group *\fIgroup\fP) .nf INCLUDE 'mpif.h' MPI_GROUP_FREE(\fIGROUP, IERROR\fP) - INTEGER \fIGROUP, IERROR\fP + INTEGER \fIGROUP, IERROR\fP .fi .SH C++ Syntax @@ -37,11 +37,11 @@ Group (handle). .ft R .TP 1i IERROR -Fortran only: Error status (integer). +Fortran only: Error status (integer). .SH DESCRIPTION .ft R -This operation marks a group object for deallocation. The handle group is set to MPI_GROUP_NULL by the call. Any ongoing operation using this group will complete normally. +This operation marks a group object for deallocation. The handle group is set to MPI_GROUP_NULL by the call. Any ongoing operation using this group will complete normally. .SH NOTE .ft R @@ -51,5 +51,5 @@ On return, group is set to MPI_GROUP_NULL. Almost all MPI routines return an error value; C routines as the value of the function and Fortran routines in the last argument. C++ functions do not return errors. If the default error handler is set to MPI::ERRORS_THROW_EXCEPTIONS, then on error the C++ exception mechanism will be used to throw an MPI::Exception object. .sp Before the error value is returned, the current MPI error handler is -called. By default, this error handler aborts the MPI job, except for I/O function errors. The error handler may be changed with MPI_Comm_set_errhandler; the predefined error handler MPI_ERRORS_RETURN may be used to cause error values to be returned. Note that MPI does not guarantee that an MPI program can continue past an error. +called. By default, this error handler aborts the MPI job, except for I/O function errors. The error handler may be changed with MPI_Comm_set_errhandler; the predefined error handler MPI_ERRORS_RETURN may be used to cause error values to be returned. Note that MPI does not guarantee that an MPI program can continue past an error. diff --git a/ompi/mpi/man/man3/MPI_Group_incl.3in b/ompi/mpi/man/man3/MPI_Group_incl.3in index 16e5c148ef5..3326420cc32 100644 --- a/ompi/mpi/man/man3/MPI_Group_incl.3in +++ b/ompi/mpi/man/man3/MPI_Group_incl.3in @@ -21,7 +21,7 @@ int MPI_Group_incl(MPI_Group \fIgroup\fP, int\fI n\fP, const int\fI ranks\fP[], .nf INCLUDE 'mpif.h' MPI_GROUP_INCL(\fIGROUP, N, RANKS, NEWGROUP, IERROR\fP) - INTEGER \fIGROUP, N, RANKS(*), NEWGROUP, IERROR\fP + INTEGER \fIGROUP, N, RANKS(*), NEWGROUP, IERROR\fP .fi .SH C++ Syntax @@ -50,7 +50,7 @@ New group derived from above, in the order defined by ranks (handle). .ft R .TP 1i IERROR -Fortran only: Error status (integer). +Fortran only: Error status (integer). .SH DESCRIPTION .ft R @@ -65,7 +65,7 @@ duplicates in the list of ranks. Almost all MPI routines return an error value; C routines as the value of the function and Fortran routines in the last argument. C++ functions do not return errors. If the default error handler is set to MPI::ERRORS_THROW_EXCEPTIONS, then on error the C++ exception mechanism will be used to throw an MPI::Exception object. .sp Before the error value is returned, the current MPI error handler is -called. By default, this error handler aborts the MPI job, except for I/O function errors. The error handler may be changed with MPI_Comm_set_errhandler; the predefined error handler MPI_ERRORS_RETURN may be used to cause error values to be returned. Note that MPI does not guarantee that an MPI program can continue past an error. +called. By default, this error handler aborts the MPI job, except for I/O function errors. The error handler may be changed with MPI_Comm_set_errhandler; the predefined error handler MPI_ERRORS_RETURN may be used to cause error values to be returned. Note that MPI does not guarantee that an MPI program can continue past an error. .SH SEE ALSO .ft R diff --git a/ompi/mpi/man/man3/MPI_Group_intersection.3in b/ompi/mpi/man/man3/MPI_Group_intersection.3in index dfc394c6186..3bddcd7465a 100644 --- a/ompi/mpi/man/man3/MPI_Group_intersection.3in +++ b/ompi/mpi/man/man3/MPI_Group_intersection.3in @@ -20,7 +20,7 @@ int MPI_Group_intersection(MPI_Group \fIgroup1\fP, MPI_Group\fI group2\fP, .nf INCLUDE 'mpif.h' MPI_GROUP_INTERSECTION(\fIGROUP1, GROUP2, NEWGROUP, IERROR\fP) - INTEGER \fIGROUP1, GROUP2, NEWGROUP, IERROR\fP + INTEGER \fIGROUP1, GROUP2, NEWGROUP, IERROR\fP .fi .SH C++ Syntax @@ -46,7 +46,7 @@ Intersection group (handle). .ft R .TP 1i IERROR -Fortran only: Error status (integer). +Fortran only: Error status (integer). .SH DESCRIPTION .ft R @@ -61,9 +61,9 @@ intersect -- all elements of the first group that are also in the second group, ordered as in first group. .TP o -difference -- all elements of the first group that are not in the second group, ordered as in the first group. +difference -- all elements of the first group that are not in the second group, ordered as in the first group. .LP -Note that for these operations the order of processes in the output group is determined primarily by order in the first group (if possible) and then, if necessary, by order in the second group. Neither union nor intersection are commutative, but both are associative. +Note that for these operations the order of processes in the output group is determined primarily by order in the first group (if possible) and then, if necessary, by order in the second group. Neither union nor intersection are commutative, but both are associative. .sp The new group can be empty, that is, equal to MPI_GROUP_EMPTY. @@ -71,7 +71,7 @@ The new group can be empty, that is, equal to MPI_GROUP_EMPTY. Almost all MPI routines return an error value; C routines as the value of the function and Fortran routines in the last argument. C++ functions do not return errors. If the default error handler is set to MPI::ERRORS_THROW_EXCEPTIONS, then on error the C++ exception mechanism will be used to throw an MPI::Exception object. .sp Before the error value is returned, the current MPI error handler is -called. By default, this error handler aborts the MPI job, except for I/O function errors. The error handler may be changed with MPI_Comm_set_errhandler; the predefined error handler MPI_ERRORS_RETURN may be used to cause error values to be returned. Note that MPI does not guarantee that an MPI program can continue past an error. +called. By default, this error handler aborts the MPI job, except for I/O function errors. The error handler may be changed with MPI_Comm_set_errhandler; the predefined error handler MPI_ERRORS_RETURN may be used to cause error values to be returned. Note that MPI does not guarantee that an MPI program can continue past an error. .SH SEE ALSO MPI_Group_free diff --git a/ompi/mpi/man/man3/MPI_Group_range_excl.3in b/ompi/mpi/man/man3/MPI_Group_range_excl.3in index 96c1824b777..7d4aa8c27ae 100644 --- a/ompi/mpi/man/man3/MPI_Group_range_excl.3in +++ b/ompi/mpi/man/man3/MPI_Group_range_excl.3in @@ -20,7 +20,7 @@ int MPI_Group_range_excl(MPI_Group \fIgroup\fP, int\fI n\fP, int\fI ranges\fP[][ .nf INCLUDE 'mpif.h' MPI_GROUP_RANGE_EXCL(\fIGROUP, N, RANGES, NEWGROUP, IERROR\fP) - INTEGER \fIGROUP, N, RANGES(3,*), NEWGROUP, IERROR\fP + INTEGER \fIGROUP, N, RANGES(3,*), NEWGROUP, IERROR\fP .fi .SH C++ Syntax @@ -49,11 +49,11 @@ New group derived from above, preserving the order in group (handle). .ft R .TP 1i IERROR -Fortran only: Error status (integer). +Fortran only: Error status (integer). .SH DESCRIPTION .ft R -Each computed rank must be a valid rank in group and all computed ranks must be distinct, or else the program is erroneous. +Each computed rank must be a valid rank in group and all computed ranks must be distinct, or else the program is erroneous. .sp The functionality of this routine is specified to be equivalent to expanding the array of ranges to an array of the excluded ranks and passing the resulting array of ranks and other arguments to MPI_Group_excl. A call to MPI_Group_excl is equivalent to a call to MPI_Group_range_excl with each rank i in ranks replaced by the triplet (i,i,1) in the argument ranges. @@ -62,12 +62,12 @@ expanding the array of ranges to an array of the excluded ranks and passing the Almost all MPI routines return an error value; C routines as the value of the function and Fortran routines in the last argument. C++ functions do not return errors. If the default error handler is set to MPI::ERRORS_THROW_EXCEPTIONS, then on error the C++ exception mechanism will be used to throw an MPI::Exception object. .sp Before the error value is returned, the current MPI error handler is -called. By default, this error handler aborts the MPI job, except for I/O function errors. The error handler may be changed with MPI_Comm_set_errhandler; the predefined error handler MPI_ERRORS_RETURN may be used to cause error values to be returned. Note that MPI does not guarantee that an MPI program can continue past an error. +called. By default, this error handler aborts the MPI job, except for I/O function errors. The error handler may be changed with MPI_Comm_set_errhandler; the predefined error handler MPI_ERRORS_RETURN may be used to cause error values to be returned. Note that MPI does not guarantee that an MPI program can continue past an error. .SH SEE ALSO .ft R .sp -MPI_Group_excl +MPI_Group_excl .br MPI_Group_free .br diff --git a/ompi/mpi/man/man3/MPI_Group_range_incl.3in b/ompi/mpi/man/man3/MPI_Group_range_incl.3in index 9a2609046d4..1e8f4bd5c39 100644 --- a/ompi/mpi/man/man3/MPI_Group_range_incl.3in +++ b/ompi/mpi/man/man3/MPI_Group_range_incl.3in @@ -20,7 +20,7 @@ int MPI_Group_range_incl(MPI_Group \fIgroup\fP, int\fI n\fP, int\fI ranges\fP[][ .nf INCLUDE 'mpif.h' MPI_GROUP_RANGE_INCL(\fIGROUP, N, RANGES, NEWGROUP, IERROR\fP) - INTEGER \fIGROUP, N, RANGES(3,*), NEWGROUP, IERROR\fP + INTEGER \fIGROUP, N, RANGES(3,*), NEWGROUP, IERROR\fP .fi .SH C++ Syntax @@ -49,11 +49,11 @@ New group derived from above, in the order defined by ranges (handle). .ft R .TP 1i IERROR -Fortran only: Error status (integer). +Fortran only: Error status (integer). .SH DESCRIPTION .ft R -If ranges consist of the triplets +If ranges consist of the triplets .sp .nf (first1, last1, stride1),\ ..., (firstn, lastn, striden) @@ -83,12 +83,12 @@ This implementation does not currently check to see that the list of ranges to i Almost all MPI routines return an error value; C routines as the value of the function and Fortran routines in the last argument. C++ functions do not return errors. If the default error handler is set to MPI::ERRORS_THROW_EXCEPTIONS, then on error the C++ exception mechanism will be used to throw an MPI::Exception object. .sp Before the error value is returned, the current MPI error handler is -called. By default, this error handler aborts the MPI job, except for I/O function errors. The error handler may be changed with MPI_Comm_set_errhandler; the predefined error handler MPI_ERRORS_RETURN may be used to cause error values to be returned. Note that MPI does not guarantee that an MPI program can continue past an error. +called. By default, this error handler aborts the MPI job, except for I/O function errors. The error handler may be changed with MPI_Comm_set_errhandler; the predefined error handler MPI_ERRORS_RETURN may be used to cause error values to be returned. Note that MPI does not guarantee that an MPI program can continue past an error. .SH SEE ALSO .ft R .sp -MPI_Group_incl +MPI_Group_incl .br MPI_Group_free diff --git a/ompi/mpi/man/man3/MPI_Group_rank.3in b/ompi/mpi/man/man3/MPI_Group_rank.3in index b777420c0eb..35f94efc58a 100644 --- a/ompi/mpi/man/man3/MPI_Group_rank.3in +++ b/ompi/mpi/man/man3/MPI_Group_rank.3in @@ -18,8 +18,8 @@ int MPI_Group_rank(MPI_Group \fIgroup\fP, int *\fIrank\fP) .SH Fortran Syntax .nf INCLUDE 'mpif.h' -MPI_GROUP_RANK(\fIGROUP, RANK, IERROR\fP) - INTEGER \fIGROUP, RANK, IERROR\fP +MPI_GROUP_RANK(\fIGROUP, RANK, IERROR\fP) + INTEGER \fIGROUP, RANK, IERROR\fP .fi .SH C++ Syntax @@ -42,7 +42,7 @@ Rank of the calling process in group, or MPI_UNDEFINED if the process is not a m .ft R .TP 1i IERROR -Fortran only: Error status (integer). +Fortran only: Error status (integer). .SH DESCRIPTION .ft R @@ -52,5 +52,5 @@ MPI_Group_rank returns as the output parameter \fIrank\fP the rank of the callin Almost all MPI routines return an error value; C routines as the value of the function and Fortran routines in the last argument. C++ functions do not return errors. If the default error handler is set to MPI::ERRORS_THROW_EXCEPTIONS, then on error the C++ exception mechanism will be used to throw an MPI::Exception object. .sp Before the error value is returned, the current MPI error handler is -called. By default, this error handler aborts the MPI job, except for I/O function errors. The error handler may be changed with MPI_Comm_set_errhandler; the predefined error handler MPI_ERRORS_RETURN may be used to cause error values to be returned. Note that MPI does not guarantee that an MPI program can continue past an error. +called. By default, this error handler aborts the MPI job, except for I/O function errors. The error handler may be changed with MPI_Comm_set_errhandler; the predefined error handler MPI_ERRORS_RETURN may be used to cause error values to be returned. Note that MPI does not guarantee that an MPI program can continue past an error. diff --git a/ompi/mpi/man/man3/MPI_Group_size.3in b/ompi/mpi/man/man3/MPI_Group_size.3in index cddca0b2c58..200312d4a60 100644 --- a/ompi/mpi/man/man3/MPI_Group_size.3in +++ b/ompi/mpi/man/man3/MPI_Group_size.3in @@ -42,7 +42,7 @@ Number of processes in the group (integer). .ft R .TP 1i IERROR -Fortran only: Error status (integer). +Fortran only: Error status (integer). .SH DESCRIPTION .ft R @@ -52,5 +52,5 @@ MPI_Group_size returns in \fIsize\fP the number of processes in the group. Thus, Almost all MPI routines return an error value; C routines as the value of the function and Fortran routines in the last argument. C++ functions do not return errors. If the default error handler is set to MPI::ERRORS_THROW_EXCEPTIONS, then on error the C++ exception mechanism will be used to throw an MPI::Exception object. .sp Before the error value is returned, the current MPI error handler is -called. By default, this error handler aborts the MPI job, except for I/O function errors. The error handler may be changed with MPI_Comm_set_errhandler; the predefined error handler MPI_ERRORS_RETURN may be used to cause error values to be returned. Note that MPI does not guarantee that an MPI program can continue past an error. +called. By default, this error handler aborts the MPI job, except for I/O function errors. The error handler may be changed with MPI_Comm_set_errhandler; the predefined error handler MPI_ERRORS_RETURN may be used to cause error values to be returned. Note that MPI does not guarantee that an MPI program can continue past an error. diff --git a/ompi/mpi/man/man3/MPI_Group_translate_ranks.3in b/ompi/mpi/man/man3/MPI_Group_translate_ranks.3in index 7af79b194d3..c700f995d2b 100644 --- a/ompi/mpi/man/man3/MPI_Group_translate_ranks.3in +++ b/ompi/mpi/man/man3/MPI_Group_translate_ranks.3in @@ -22,14 +22,14 @@ int MPI_Group_translate_ranks(MPI_Group \fIgroup1\fP, int\fI n\fP, INCLUDE 'mpif.h' MPI_GROUP_TRANSLATE_RANKS(\fIGROUP1, N, RANKS1, GROUP2, RANKS2, IERROR\fP) - INTEGER \fIGROUP1, N, RANKS1(*), GROUP2, RANKS2(*), IERROR\fP + INTEGER \fIGROUP1, N, RANKS1(*), GROUP2, RANKS2(*), IERROR\fP .fi .SH C++ Syntax .nf #include -static void Group::Translate_ranks (const Group& \fIgroup1\fP, int \fIn\fP, - const int \fIranks1\fP[], const Group& \fIgroup2\fP, int \fIranks2\fP[]) +static void Group::Translate_ranks (const Group& \fIgroup1\fP, int \fIn\fP, + const int \fIranks1\fP[], const Group& \fIgroup2\fP, int \fIranks2\fP[]) .fi .SH INPUT PARAMETERS @@ -51,11 +51,11 @@ Second group (handle). .ft R .TP 1i ranks2 -Array of corresponding ranks in group2, MPI_UNDEFINED when no correspondence exists. +Array of corresponding ranks in group2, MPI_UNDEFINED when no correspondence exists. .ft R .TP 1i IERROR -Fortran only: Error status (integer). +Fortran only: Error status (integer). .SH DESCRIPTION .ft R @@ -65,5 +65,5 @@ This function is important for determining the relative numbering of the same pr Almost all MPI routines return an error value; C routines as the value of the function and Fortran routines in the last argument. C++ functions do not return errors. If the default error handler is set to MPI::ERRORS_THROW_EXCEPTIONS, then on error the C++ exception mechanism will be used to throw an MPI::Exception object. .sp Before the error value is returned, the current MPI error handler is -called. By default, this error handler aborts the MPI job, except for I/O function errors. The error handler may be changed with MPI_Comm_set_errhandler; the predefined error handler MPI_ERRORS_RETURN may be used to cause error values to be returned. Note that MPI does not guarantee that an MPI program can continue past an error. +called. By default, this error handler aborts the MPI job, except for I/O function errors. The error handler may be changed with MPI_Comm_set_errhandler; the predefined error handler MPI_ERRORS_RETURN may be used to cause error values to be returned. Note that MPI does not guarantee that an MPI program can continue past an error. diff --git a/ompi/mpi/man/man3/MPI_Group_union.3in b/ompi/mpi/man/man3/MPI_Group_union.3in index f176acb768a..028f9d35f19 100644 --- a/ompi/mpi/man/man3/MPI_Group_union.3in +++ b/ompi/mpi/man/man3/MPI_Group_union.3in @@ -20,7 +20,7 @@ int MPI_Group_union(MPI_Group \fIgroup1\fP, MPI_Group \fIgroup2\fP, .nf INCLUDE 'mpif.h' MPI_GROUP_UNION(\fIGROUP1, GROUP2, NEWGROUP, IERROR\fP) - INTEGER \fIGROUP1, GROUP2, NEWGROUP, IERROR\fP + INTEGER \fIGROUP1, GROUP2, NEWGROUP, IERROR\fP .fi .SH C++ Syntax @@ -46,7 +46,7 @@ Union group (handle). .ft R .TP 1i IERROR -Fortran only: Error status (integer). +Fortran only: Error status (integer). .SH DESCRIPTION .ft R @@ -61,10 +61,10 @@ intersect -- all elements of the first group that are also in the second group, ordered as in first group. .TP o -difference -- all elements of the first group that are not in the second group, ordered as in the first group. +difference -- all elements of the first group that are not in the second group, ordered as in the first group. .sp .LP -Note that for these operations the order of processes in the output group is determined primarily by order in the first group (if possible) and then, if necessary, by order in the second group. Neither union nor intersection are commutative, but both are associative. +Note that for these operations the order of processes in the output group is determined primarily by order in the first group (if possible) and then, if necessary, by order in the second group. Neither union nor intersection are commutative, but both are associative. .sp The new group can be empty, that is, equal to MPI_GROUP_EMPTY. @@ -72,7 +72,7 @@ The new group can be empty, that is, equal to MPI_GROUP_EMPTY. Almost all MPI routines return an error value; C routines as the value of the function and Fortran routines in the last argument. C++ functions do not return errors. If the default error handler is set to MPI::ERRORS_THROW_EXCEPTIONS, then on error the C++ exception mechanism will be used to throw an MPI::Exception object. .sp Before the error value is returned, the current MPI error handler is -called. By default, this error handler aborts the MPI job, except for I/O function errors. The error handler may be changed with MPI_Comm_set_errhandler; the predefined error handler MPI_ERRORS_RETURN may be used to cause error values to be returned. Note that MPI does not guarantee that an MPI program can continue past an error. +called. By default, this error handler aborts the MPI job, except for I/O function errors. The error handler may be changed with MPI_Comm_set_errhandler; the predefined error handler MPI_ERRORS_RETURN may be used to cause error values to be returned. Note that MPI does not guarantee that an MPI program can continue past an error. .SH SEE ALSO MPI_Group_free diff --git a/ompi/mpi/man/man3/MPI_Ibsend.3in b/ompi/mpi/man/man3/MPI_Ibsend.3in index 12975d24fe7..e061c23ee42 100644 --- a/ompi/mpi/man/man3/MPI_Ibsend.3in +++ b/ompi/mpi/man/man3/MPI_Ibsend.3in @@ -22,13 +22,13 @@ int MPI_Ibsend(const void *\fIbuf\fP, int\fI count\fP, MPI_Datatype\fI datatype\ INCLUDE 'mpif.h' MPI_IBSEND(\fIBUF, COUNT, DATATYPE, DEST, TAG, COMM, REQUEST, IERROR\fP) \fIBUF\fP(*) - INTEGER \fICOUNT, DATATYPE, DEST, TAG, COMM, REQUEST, IERROR\fP + INTEGER \fICOUNT, DATATYPE, DEST, TAG, COMM, REQUEST, IERROR\fP .fi .SH C++ Syntax .nf #include -Request Comm::Ibsend(const void* \fIbuf\fP, int \fIcount\fP, const +Request Comm::Ibsend(const void* \fIbuf\fP, int \fIcount\fP, const Datatype& \fIdatatype\fP, int \fIdest\fP, int \fItag\fP) const .fi @@ -61,11 +61,11 @@ Communication request (handle). .ft R .TP 1i IERROR -Fortran only: Error status (integer). +Fortran only: Error status (integer). .SH DESCRIPTION .ft R -MPI_Ibsend posts a buffered-mode, nonblocking send. Nonblocking calls allocate a communication request object and associate it with the request handle (the argument request). The request can be used later to query the status of the communication or wait for its completion. +MPI_Ibsend posts a buffered-mode, nonblocking send. Nonblocking calls allocate a communication request object and associate it with the request handle (the argument request). The request can be used later to query the status of the communication or wait for its completion. .sp A nonblocking send call indicates that the system may start copying data out of the send buffer. The sender should not modify any part of the send buffer after a nonblocking send operation is called, until the send completes. @@ -73,7 +73,7 @@ A nonblocking send call indicates that the system may start copying data out of Almost all MPI routines return an error value; C routines as the value of the function and Fortran routines in the last argument. C++ functions do not return errors. If the default error handler is set to MPI::ERRORS_THROW_EXCEPTIONS, then on error the C++ exception mechanism will be used to throw an MPI::Exception object. .sp Before the error value is returned, the current MPI error handler is -called. By default, this error handler aborts the MPI job, except for I/O function errors. The error handler may be changed with MPI_Comm_set_errhandler; the predefined error handler MPI_ERRORS_RETURN may be used to cause error values to be returned. Note that MPI does not guarantee that an MPI program can continue past an error. +called. By default, this error handler aborts the MPI job, except for I/O function errors. The error handler may be changed with MPI_Comm_set_errhandler; the predefined error handler MPI_ERRORS_RETURN may be used to cause error values to be returned. Note that MPI does not guarantee that an MPI program can continue past an error. .SH SEE ALSO MPI_Test diff --git a/ompi/mpi/man/man3/MPI_Improbe.3in b/ompi/mpi/man/man3/MPI_Improbe.3in index 5d16fae4531..9abfcdf8c80 100644 --- a/ompi/mpi/man/man3/MPI_Improbe.3in +++ b/ompi/mpi/man/man3/MPI_Improbe.3in @@ -13,7 +13,7 @@ .SH C Syntax .nf #include -int MPI_Improbe(int \fIsource\fP, int\fI tag\fP, MPI_Comm\fI comm\fP, +int MPI_Improbe(int \fIsource\fP, int\fI tag\fP, MPI_Comm\fI comm\fP, int\fI *flag\fP, MPI_Message\fI *message\fP, MPI_Status\fI *status\fP) .fi @@ -23,7 +23,7 @@ INCLUDE 'mpif.h' MPI_IMPROBE(\fISOURCE, TAG, COMM, FLAG, MESSAGE, STATUS, IERROR\fP) LOGICAL \fIFLAG\fP INTEGER \fISOURCE, TAG, COMM, MESSAGE\fP - INTEGER \fISTATUS(MPI_STATUS_SIZE), IERROR\fP + INTEGER \fISTATUS(MPI_STATUS_SIZE), IERROR\fP .fi .SH C++ Syntax @@ -59,7 +59,7 @@ Status object (status). .ft R .TP 1i IERROR -Fortran only: Error status (integer). +Fortran only: Error status (integer). .SH DESCRIPTION .ft R @@ -88,7 +88,7 @@ This is an MPI-3 function and has no C++ binding. .SH ERRORS Almost all MPI routines return an error value; C routines as the value of the function and Fortran routines in the last argument. C++ -functions do not return errors. +functions do not return errors. .sp Before the error value is returned, the current MPI error handler is called. By default, this error handler aborts the MPI job, except for diff --git a/ompi/mpi/man/man3/MPI_Imrecv.3in b/ompi/mpi/man/man3/MPI_Imrecv.3in index e1ea13b4c8c..8d0dc0aedf1 100644 --- a/ompi/mpi/man/man3/MPI_Imrecv.3in +++ b/ompi/mpi/man/man3/MPI_Imrecv.3in @@ -12,7 +12,7 @@ .SH C Syntax .nf #include -int MPI_Imrecv(void \fI*buf\fP, int\fI count\fP, MPI_Datatype\fI type\fP, +int MPI_Imrecv(void \fI*buf\fP, int\fI count\fP, MPI_Datatype\fI type\fP, MPI_Message\fI *message\fP, MPI_Request\fI *request\fP) .fi @@ -21,7 +21,7 @@ int MPI_Imrecv(void \fI*buf\fP, int\fI count\fP, MPI_Datatype\fI type\fP, INCLUDE 'mpif.h' MPI_IMRECV(\fIBUF, COUNT, DATATYPE, MESSAGE, REQUEST, IERROR\fP) \fIBUF(*)\fP - INTEGER \fCOUNT, DATATYPE, MESSAGE, REQUEST, IERROR\fP + INTEGER \fCOUNT, DATATYPE, MESSAGE, REQUEST, IERROR\fP .fi .SH C++ Syntax @@ -51,7 +51,7 @@ request Request (handle). .TP 1i IERROR -Fortran only: Error status (integer). +Fortran only: Error status (integer). .SH DESCRIPTION .ft R @@ -72,7 +72,7 @@ is possible to cancel the returned request with MPI_Cancel. If MPI_Cancel succeeds, the matched message must be found by a subsequent message probe (MPI_Probe, MPI_Iprobe, MPI_Mprobe, or MPI_Improbe), received by a subsequent receive operation or canceled by the -sender. +sender. .sp Note, however, that is it possible for the cancellation of operations initiated with MPI_Imrecv to fail. An example of a failing case is @@ -87,7 +87,7 @@ This is an MPI-3 function and has no C++ binding. .SH ERRORS Almost all MPI routines return an error value; C routines as the value of the function and Fortran routines in the last argument. C++ -functions do not return errors. +functions do not return errors. .sp Before the error value is returned, the current MPI error handler is called. By default, this error handler aborts the MPI job, except for diff --git a/ompi/mpi/man/man3/MPI_Info_create.3in b/ompi/mpi/man/man3/MPI_Info_create.3in index 28c1fa02d73..624c0cf6d3b 100644 --- a/ompi/mpi/man/man3/MPI_Info_create.3in +++ b/ompi/mpi/man/man3/MPI_Info_create.3in @@ -19,7 +19,7 @@ int MPI_Info_create(MPI_Info \fI*info\fP) .nf INCLUDE 'mpif.h' MPI_INFO_CREATE(\fIINFO, IERROR\fP) - INTEGER \fIINFO, IERROR\fP + INTEGER \fIINFO, IERROR\fP .fi .SH C++ Syntax @@ -36,17 +36,17 @@ Info object created (handle). .ft R .TP 1i IERROR -Fortran only: Error status (integer). +Fortran only: Error status (integer). .SH DESCRIPTION .ft R -MPI_Info_create creates a new info object. The newly created object contains no key/value pairs. +MPI_Info_create creates a new info object. The newly created object contains no key/value pairs. .SH ERRORS Almost all MPI routines return an error value; C routines as the value of the function and Fortran routines in the last argument. C++ functions do not return errors. If the default error handler is set to MPI::ERRORS_THROW_EXCEPTIONS, then on error the C++ exception mechanism will be used to throw an MPI::Exception object. .sp Before the error value is returned, the current MPI error handler is -called. By default, this error handler aborts the MPI job, except for I/O function errors. The error handler may be changed with MPI_Comm_set_errhandler; the predefined error handler MPI_ERRORS_RETURN may be used to cause error values to be returned. Note that MPI does not guarantee that an MPI program can continue past an error. +called. By default, this error handler aborts the MPI job, except for I/O function errors. The error handler may be changed with MPI_Comm_set_errhandler; the predefined error handler MPI_ERRORS_RETURN may be used to cause error values to be returned. Note that MPI does not guarantee that an MPI program can continue past an error. .SH SEE ALSO .ft r diff --git a/ompi/mpi/man/man3/MPI_Info_delete.3in b/ompi/mpi/man/man3/MPI_Info_delete.3in index 471e3b2eab8..fa2bdc389e4 100644 --- a/ompi/mpi/man/man3/MPI_Info_delete.3in +++ b/ompi/mpi/man/man3/MPI_Info_delete.3in @@ -6,7 +6,7 @@ .\" $COPYRIGHT$ .TH MPI_Info_delete 3 "#OMPI_DATE#" "#PACKAGE_VERSION#" "#PACKAGE_NAME#" .SH NAME -\fBMPI_Info_delete\fP \- Deletes a key/value pair from \fIinfo\fP. +\fBMPI_Info_delete\fP \- Deletes a key/value pair from \fIinfo\fP. .SH SYNTAX .ft R @@ -20,7 +20,7 @@ int MPI_Info_delete(MPI_Info \fIinfo\fP, const char \fI*key\fP) .nf INCLUDE 'mpif.h' MPI_INFO_DELETE(\fIINFO, KEY, IERROR\fP) - INTEGER \fIINFO, IERROR\fP + INTEGER \fIINFO, IERROR\fP CHARACTER*(*) \fIKEY\fP .fi @@ -46,7 +46,7 @@ Key (string). .ft R .TP 1i IERROR -Fortran only: Error status (integer). +Fortran only: Error status (integer). .SH DESCRIPTION .ft R @@ -56,7 +56,7 @@ MPI_Info_delete deletes a (key,value) pair from \fIinfo\fP. If \fIkey\fP is not Almost all MPI routines return an error value; C routines as the value of the function and Fortran routines in the last argument. C++ functions do not return errors. If the default error handler is set to MPI::ERRORS_THROW_EXCEPTIONS, then on error the C++ exception mechanism will be used to throw an MPI::Exception object. .sp Before the error value is returned, the current MPI error handler is -called. By default, this error handler aborts the MPI job, except for I/O function errors. The error handler may be changed with MPI_Comm_set_errhandler; the predefined error handler MPI_ERRORS_RETURN may be used to cause error values to be returned. Note that MPI does not guarantee that an MPI program can continue past an error. +called. By default, this error handler aborts the MPI job, except for I/O function errors. The error handler may be changed with MPI_Comm_set_errhandler; the predefined error handler MPI_ERRORS_RETURN may be used to cause error values to be returned. Note that MPI does not guarantee that an MPI program can continue past an error. .SH SEE ALSO .ft r diff --git a/ompi/mpi/man/man3/MPI_Info_dup.3in b/ompi/mpi/man/man3/MPI_Info_dup.3in index 722d8dc1558..7cd21f587b0 100644 --- a/ompi/mpi/man/man3/MPI_Info_dup.3in +++ b/ompi/mpi/man/man3/MPI_Info_dup.3in @@ -5,7 +5,7 @@ .\" $COPYRIGHT$ .TH MPI_Info_dup 3 "#OMPI_DATE#" "#PACKAGE_VERSION#" "#PACKAGE_NAME#" .SH NAME -\fBMPI_Info_dup\fP \- Duplicates an info object. +\fBMPI_Info_dup\fP \- Duplicates an info object. .SH SYNTAX .ft R @@ -19,7 +19,7 @@ int MPI_Info_dup(MPI_Info \fIinfo\fP, MPI_Info \fI*newinfo\fP) .nf INCLUDE 'mpif.h' MPI_INFO_DUP(\fIINFO, NEWINFO, IERROR\fP) - INTEGER \fIINFO, NEWINFO, IERROR\fP + INTEGER \fIINFO, NEWINFO, IERROR\fP .fi .SH C++ Syntax @@ -42,17 +42,17 @@ Info object (handle). .ft R .TP 1i IERROR -Fortran only: Error status (integer). +Fortran only: Error status (integer). .SH DESCRIPTION .ft R -MPI_Info_dup duplicates an existing info object, creating a new object, with the same (key,value) pairs and the same ordering of keys. +MPI_Info_dup duplicates an existing info object, creating a new object, with the same (key,value) pairs and the same ordering of keys. .SH ERRORS Almost all MPI routines return an error value; C routines as the value of the function and Fortran routines in the last argument. C++ functions do not return errors. If the default error handler is set to MPI::ERRORS_THROW_EXCEPTIONS, then on error the C++ exception mechanism will be used to throw an MPI::Exception object. .sp Before the error value is returned, the current MPI error handler is -called. By default, this error handler aborts the MPI job, except for I/O function errors. The error handler may be changed with MPI_Comm_set_errhandler; the predefined error handler MPI_ERRORS_RETURN may be used to cause error values to be returned. Note that MPI does not guarantee that an MPI program can continue past an error. +called. By default, this error handler aborts the MPI job, except for I/O function errors. The error handler may be changed with MPI_Comm_set_errhandler; the predefined error handler MPI_ERRORS_RETURN may be used to cause error values to be returned. Note that MPI does not guarantee that an MPI program can continue past an error. .SH SEE ALSO .ft r diff --git a/ompi/mpi/man/man3/MPI_Info_env.3in b/ompi/mpi/man/man3/MPI_Info_env.3in index 67d68a47724..c7ef970fa7d 100644 --- a/ompi/mpi/man/man3/MPI_Info_env.3in +++ b/ompi/mpi/man/man3/MPI_Info_env.3in @@ -17,7 +17,7 @@ The MPI-3 standard established a static MPI_Info object named \fIMPI_INFO_ENV\fP command If available, the value will be set to argv[0]. Note that the value may not always be available - e.g., it is valid for a program to call MPI_Init with NULL parameters, in which case argv[0] will not be set if run as a singleton. This value will never be set in a Fortran program as the argv are not available. .TP 1i -argv +argv The argv given for the application. If no arguments are passed to the application, then this value will not be set. It will also not be set in the case of a singleton that calls MPI_Init with NULL parameters, or a Fortran program. .TP 1i maxprocs @@ -42,7 +42,7 @@ thread_level The requested MPI thread level - note that this may differ from the \fIactual\fP MPI thread level of the application. .TP 1i ompi_num_apps -The number of application contexts in an MPMD job. +The number of application contexts in an MPMD job. This is an Open MPI-specific field and value. .TP 1i ompi_np diff --git a/ompi/mpi/man/man3/MPI_Info_free.3in b/ompi/mpi/man/man3/MPI_Info_free.3in index 49b93635429..2b851402ea2 100644 --- a/ompi/mpi/man/man3/MPI_Info_free.3in +++ b/ompi/mpi/man/man3/MPI_Info_free.3in @@ -5,7 +5,7 @@ .\" $COPYRIGHT$ .TH MPI_Info_free 3 "#OMPI_DATE#" "#PACKAGE_VERSION#" "#PACKAGE_NAME#" .SH NAME -\fBMPI_Info_free\fP \- Frees an info object. +\fBMPI_Info_free\fP \- Frees an info object. .SH SYNTAX .ft R @@ -19,7 +19,7 @@ int MPI_Info_free(MPI_Info \fI*info\fP) .nf INCLUDE 'mpif.h' MPI_INFO_FREE(\fIINFO, IERROR\fP) - INTEGER \fIINFO, IERROR\fP + INTEGER \fIINFO, IERROR\fP .fi .SH C++ Syntax @@ -38,17 +38,17 @@ Info object (handle). .ft R .TP 1i IERROR -Fortran only: Error status (integer). +Fortran only: Error status (integer). .SH DESCRIPTION .ft R -MPI_Info_free frees \fIinfo\fP and sets it to MPI_INFO_NULL. +MPI_Info_free frees \fIinfo\fP and sets it to MPI_INFO_NULL. .SH ERRORS Almost all MPI routines return an error value; C routines as the value of the function and Fortran routines in the last argument. C++ functions do not return errors. If the default error handler is set to MPI::ERRORS_THROW_EXCEPTIONS, then on error the C++ exception mechanism will be used to throw an MPI::Exception object. .sp Before the error value is returned, the current MPI error handler is -called. By default, this error handler aborts the MPI job, except for I/O function errors. The error handler may be changed with MPI_Comm_set_errhandler; the predefined error handler MPI_ERRORS_RETURN may be used to cause error values to be returned. Note that MPI does not guarantee that an MPI program can continue past an error. +called. By default, this error handler aborts the MPI job, except for I/O function errors. The error handler may be changed with MPI_Comm_set_errhandler; the predefined error handler MPI_ERRORS_RETURN may be used to cause error values to be returned. Note that MPI does not guarantee that an MPI program can continue past an error. .SH SEE ALSO .ft r diff --git a/ompi/mpi/man/man3/MPI_Info_get.3in b/ompi/mpi/man/man3/MPI_Info_get.3in index c6b7366b732..cdb9534d25e 100644 --- a/ompi/mpi/man/man3/MPI_Info_get.3in +++ b/ompi/mpi/man/man3/MPI_Info_get.3in @@ -6,7 +6,7 @@ .\" $COPYRIGHT$ .TH MPI_Info_get 3 "#OMPI_DATE#" "#PACKAGE_VERSION#" "#PACKAGE_NAME#" .SH NAME -\fBMPI_Info_get\fP \- Retrieves the value associated with a key in an info object. +\fBMPI_Info_get\fP \- Retrieves the value associated with a key in an info object. .SH SYNTAX .ft R @@ -20,7 +20,7 @@ int MPI_Info_get(MPI_Info \fIinfo\fP, const char \fI*key\fP, int \fIvaluelen\fP, .nf INCLUDE 'mpif.h' MPI_INFO_GET(\fIINFO, KEY, VALUELEN, VALUE, FLAG, IERROR\fP) - INTEGER \fIINFO, VALUELEN, IERROR\fP + INTEGER \fIINFO, VALUELEN, IERROR\fP CHARACTER*(*) \fIKEY, VALUE\fP LOGICAL \fIFLAG\fP @@ -28,7 +28,7 @@ MPI_INFO_GET(\fIINFO, KEY, VALUELEN, VALUE, FLAG, IERROR\fP) .SH C++ Syntax .nf #include -bool MPI::Info::Get(const char* \fIkey\fP, int \fIvaluelen\fP, +bool MPI::Info::Get(const char* \fIkey\fP, int \fIvaluelen\fP, char* \fIvalue\fP) const .fi @@ -54,23 +54,23 @@ Value (string). .ft R .TP 1i flag -Returns true if key defined, false if not (boolean). +Returns true if key defined, false if not (boolean). .ft R .TP 1i IERROR -Fortran only: Error status (integer). +Fortran only: Error status (integer). .SH DESCRIPTION .ft R -MPI_Info_get retrieves the value associated with \fIkey\fP in a previous call to MPI_Info_set. If such a key exists, it sets \fIflag\fP to true and returns the value in \fIvalue\fP; otherwise it sets \fIflag\fP to false and leaves \fIvalue\fP unchanged. \fIvaluelen\fP is the number of characters available in value. If it is less than the actual size of the value, the returned value is truncated. In C, \fIvaluelen\fP should be one less than the amount of allocated space to allow for the null terminator. +MPI_Info_get retrieves the value associated with \fIkey\fP in a previous call to MPI_Info_set. If such a key exists, it sets \fIflag\fP to true and returns the value in \fIvalue\fP; otherwise it sets \fIflag\fP to false and leaves \fIvalue\fP unchanged. \fIvaluelen\fP is the number of characters available in value. If it is less than the actual size of the value, the returned value is truncated. In C, \fIvaluelen\fP should be one less than the amount of allocated space to allow for the null terminator. .sp -If \fIkey\fP is larger than MPI_MAX_INFO_KEY, the call is erroneous. +If \fIkey\fP is larger than MPI_MAX_INFO_KEY, the call is erroneous. .SH ERRORS Almost all MPI routines return an error value; C routines as the value of the function and Fortran routines in the last argument. C++ functions do not return errors. If the default error handler is set to MPI::ERRORS_THROW_EXCEPTIONS, then on error the C++ exception mechanism will be used to throw an MPI::Exception object. .sp Before the error value is returned, the current MPI error handler is -called. By default, this error handler aborts the MPI job, except for I/O function errors. The error handler may be changed with MPI_Comm_set_errhandler; the predefined error handler MPI_ERRORS_RETURN may be used to cause error values to be returned. Note that MPI does not guarantee that an MPI program can continue past an error. +called. By default, this error handler aborts the MPI job, except for I/O function errors. The error handler may be changed with MPI_Comm_set_errhandler; the predefined error handler MPI_ERRORS_RETURN may be used to cause error values to be returned. Note that MPI does not guarantee that an MPI program can continue past an error. .SH SEE ALSO .ft r diff --git a/ompi/mpi/man/man3/MPI_Info_get_nkeys.3in b/ompi/mpi/man/man3/MPI_Info_get_nkeys.3in index 50a58e432a0..bad0c34ee7e 100644 --- a/ompi/mpi/man/man3/MPI_Info_get_nkeys.3in +++ b/ompi/mpi/man/man3/MPI_Info_get_nkeys.3in @@ -5,7 +5,7 @@ .\" $COPYRIGHT$ .TH MPI_Info_get_nkeys 3 "#OMPI_DATE#" "#PACKAGE_VERSION#" "#PACKAGE_NAME#" .SH NAME -\fBMPI_Info_get_nkeys\fP \- Gets the number of keys currently defined in an info object. +\fBMPI_Info_get_nkeys\fP \- Gets the number of keys currently defined in an info object. .SH SYNTAX .ft R @@ -19,7 +19,7 @@ int MPI_Info_get_nkeys(MPI_Info \fIinfo\fP, int \fI*nkeys\fP) .nf INCLUDE 'mpif.h' MPI_INFO_GET_NKEYS(\fIINFO, NKEYS, IERROR\fP) - INTEGER \fIINFO, NKEYS, IERROR\fP + INTEGER \fIINFO, NKEYS, IERROR\fP .fi .SH C++ Syntax @@ -38,21 +38,21 @@ Info object (handle). .ft R .TP 1i nkeys -Number of defined keys (integer). +Number of defined keys (integer). .ft R .TP 1i IERROR -Fortran only: Error status (integer). +Fortran only: Error status (integer). .SH DESCRIPTION .ft R -MPI_Info_get_nkeys returns the number of currently defined keys in \fIinfo\fP. +MPI_Info_get_nkeys returns the number of currently defined keys in \fIinfo\fP. .SH ERRORS Almost all MPI routines return an error value; C routines as the value of the function and Fortran routines in the last argument. C++ functions do not return errors. If the default error handler is set to MPI::ERRORS_THROW_EXCEPTIONS, then on error the C++ exception mechanism will be used to throw an MPI::Exception object. .sp Before the error value is returned, the current MPI error handler is -called. By default, this error handler aborts the MPI job, except for I/O function errors. The error handler may be changed with MPI_Comm_set_errhandler; the predefined error handler MPI_ERRORS_RETURN may be used to cause error values to be returned. Note that MPI does not guarantee that an MPI program can continue past an error. +called. By default, this error handler aborts the MPI job, except for I/O function errors. The error handler may be changed with MPI_Comm_set_errhandler; the predefined error handler MPI_ERRORS_RETURN may be used to cause error values to be returned. Note that MPI does not guarantee that an MPI program can continue past an error. .SH SEE ALSO .ft r @@ -60,6 +60,6 @@ MPI_Info_get .br MPI_Info_get_nthkey .br -MPI_Info_get_valueln +MPI_Info_get_valuelen .br diff --git a/ompi/mpi/man/man3/MPI_Info_get_nthkey.3in b/ompi/mpi/man/man3/MPI_Info_get_nthkey.3in index 1388b6a85bf..11c9291914a 100644 --- a/ompi/mpi/man/man3/MPI_Info_get_nthkey.3in +++ b/ompi/mpi/man/man3/MPI_Info_get_nthkey.3in @@ -19,7 +19,7 @@ int MPI_Info_get_nthkey(MPI_Info \fIinfo\fP, int \fIn\fP, char \fI*key\fP) .nf INCLUDE 'mpif.h' MPI_INFO_GET_NTHKEY(\fIINFO, N, KEY, IERROR\fP) - INTEGER \fIINFO, N, IERROR\fP + INTEGER \fIINFO, N, IERROR\fP CHARACTER*(*) \fIKEY\fP .fi @@ -43,21 +43,21 @@ Key number (integer). .ft R .TP 1i key -Key (string). +Key (string). .ft R .TP 1i IERROR -Fortran only: Error status (integer). +Fortran only: Error status (integer). .SH DESCRIPTION .ft R -MPI_Info_get_nthkey returns the \fIn\fPth defined key in \fIinfo\fP. Keys are numbered 0\...\fIN\fP - 1 where \fIN\fP is the value returned by MPI_Info_get_nkeys. All keys between 0 and \fIN\fP - 1 are guaranteed to be defined. The number of a given key does not change as long as \fIinfo\fP is not modified with MPI_Info_set or MPI_Info_delete. +MPI_Info_get_nthkey returns the \fIn\fPth defined key in \fIinfo\fP. Keys are numbered 0\...\fIN\fP - 1 where \fIN\fP is the value returned by MPI_Info_get_nkeys. All keys between 0 and \fIN\fP - 1 are guaranteed to be defined. The number of a given key does not change as long as \fIinfo\fP is not modified with MPI_Info_set or MPI_Info_delete. .SH ERRORS Almost all MPI routines return an error value; C routines as the value of the function and Fortran routines in the last argument. C++ functions do not return errors. If the default error handler is set to MPI::ERRORS_THROW_EXCEPTIONS, then on error the C++ exception mechanism will be used to throw an MPI::Exception object. .sp Before the error value is returned, the current MPI error handler is -called. By default, this error handler aborts the MPI job, except for I/O function errors. The error handler may be changed with MPI_Comm_set_errhandler; the predefined error handler MPI_ERRORS_RETURN may be used to cause error values to be returned. Note that MPI does not guarantee that an MPI program can continue past an error. +called. By default, this error handler aborts the MPI job, except for I/O function errors. The error handler may be changed with MPI_Comm_set_errhandler; the predefined error handler MPI_ERRORS_RETURN may be used to cause error values to be returned. Note that MPI does not guarantee that an MPI program can continue past an error. .SH SEE ALSO .ft r @@ -65,6 +65,6 @@ MPI_Info_get .br MPI_Info_get_nkeys .br -MPI_Info_get_valueln +MPI_Info_get_valuelen .br diff --git a/ompi/mpi/man/man3/MPI_Info_get_valuelen.3in b/ompi/mpi/man/man3/MPI_Info_get_valuelen.3in index d82d2e403cf..6ede6b37616 100644 --- a/ompi/mpi/man/man3/MPI_Info_get_valuelen.3in +++ b/ompi/mpi/man/man3/MPI_Info_get_valuelen.3in @@ -6,7 +6,7 @@ .\" $COPYRIGHT$ .TH MPI_Info_get_valuelen 3 "#OMPI_DATE#" "#PACKAGE_VERSION#" "#PACKAGE_NAME#" .SH NAME -\fBMPI_Info_get_valuelen\fP \- Retrieves the length of the key value associated with an info object. +\fBMPI_Info_get_valuelen\fP \- Retrieves the length of the key value associated with an info object. .SH SYNTAX .ft R @@ -21,7 +21,7 @@ int MPI_Info_get_valuelen(MPI_Info \fIinfo\fP, const char \fI*key\fP, .nf INCLUDE 'mpif.h' MPI_INFO_GET_VALUELEN(\fIINFO, KEY, VALUELEN, FLAG, IERROR\fP) - INTEGER \fIINFO, VALUELEN, IERROR\fP + INTEGER \fIINFO, VALUELEN, IERROR\fP LOGICAL \fIFLAG\fP CHARACTER*(*) \fIKEY\fP @@ -29,7 +29,7 @@ MPI_INFO_GET_VALUELEN(\fIINFO, KEY, VALUELEN, FLAG, IERROR\fP) .SH C++ Syntax .nf #include -bool MPI::Info::Get_valuelen(const char* \fIkey\fP, int& \fIvaluelen\fP) +bool MPI::Info::Get_valuelen(const char* \fIkey\fP, int& \fIvaluelen\fP) const .fi @@ -51,23 +51,23 @@ Length of value arg (integer). .ft R .TP 1i flag -Returns true if key defined, false if not (boolean). +Returns true if key defined, false if not (boolean). .ft R .TP 1i IERROR -Fortran only: Error status (integer). +Fortran only: Error status (integer). .SH DESCRIPTION .ft R -MPI_Info_get_valuelen retrieves the length of the \fIvalue\fP associated with \fIkey\fP. If \fIkey\fP is defined, \fIvaluelen\fP is set to the length of its associated value and \fIflag\fP is set to true. If \fIkey\fP is not defined, \fIvaluelen\fP is not touched and \fIflag\fP is set to false. The length returned in C or C++ does not include the end-of-string character. +MPI_Info_get_valuelen retrieves the length of the \fIvalue\fP associated with \fIkey\fP. If \fIkey\fP is defined, \fIvaluelen\fP is set to the length of its associated value and \fIflag\fP is set to true. If \fIkey\fP is not defined, \fIvaluelen\fP is not touched and \fIflag\fP is set to false. The length returned in C or C++ does not include the end-of-string character. .sp -If \fIkey\fP is larger than MPI_MAX_INFO_KEY, the call is erroneous. +If \fIkey\fP is larger than MPI_MAX_INFO_KEY, the call is erroneous. .SH ERRORS Almost all MPI routines return an error value; C routines as the value of the function and Fortran routines in the last argument. C++ functions do not return errors. If the default error handler is set to MPI::ERRORS_THROW_EXCEPTIONS, then on error the C++ exception mechanism will be used to throw an MPI::Exception object. .sp Before the error value is returned, the current MPI error handler is -called. By default, this error handler aborts the MPI job, except for I/O function errors. The error handler may be changed with MPI_Comm_set_errhandler; the predefined error handler MPI_ERRORS_RETURN may be used to cause error values to be returned. Note that MPI does not guarantee that an MPI program can continue past an error. +called. By default, this error handler aborts the MPI job, except for I/O function errors. The error handler may be changed with MPI_Comm_set_errhandler; the predefined error handler MPI_ERRORS_RETURN may be used to cause error values to be returned. Note that MPI does not guarantee that an MPI program can continue past an error. .SH SEE ALSO .ft r diff --git a/ompi/mpi/man/man3/MPI_Info_set.3in b/ompi/mpi/man/man3/MPI_Info_set.3in index 7e47d54caf9..705aeb83f38 100644 --- a/ompi/mpi/man/man3/MPI_Info_set.3in +++ b/ompi/mpi/man/man3/MPI_Info_set.3in @@ -19,7 +19,7 @@ int MPI_Info_set(MPI_Info \fIinfo\fP, char \fI*key\fP, char \fI*value\fP) .nf INCLUDE 'mpif.h' MPI_INFO_SET(\fIINFO, KEY, VALUE, IERROR\fP) - INTEGER \fIINFO, IERROR\fP + INTEGER \fIINFO, IERROR\fP CHARACTER*(*) \fIKEY, VALUE\fP .fi @@ -49,17 +49,17 @@ Value (string). .ft R .TP 1i IERROR -Fortran only: Error status (integer). +Fortran only: Error status (integer). .SH DESCRIPTION .ft R -MPI_Info_set adds the (key,value) pair to \fIinfo\fP and overrides the value if a value for the same key was previously set. The \fIkey\fP and \fIvalue\fP parameters are null-terminated strings in C. In Fortran, leading and trailing spaces in \fIkey\fP and \fIvalue\fP are stripped. If either \fIkey\fP or \fIvalue\fP is larger than the allowed maximums, the error MPI_ERR_INFO_KEY or MPI_ERR_INFO_VALUE is raised, respectively. +MPI_Info_set adds the (key,value) pair to \fIinfo\fP and overrides the value if a value for the same key was previously set. The \fIkey\fP and \fIvalue\fP parameters are null-terminated strings in C. In Fortran, leading and trailing spaces in \fIkey\fP and \fIvalue\fP are stripped. If either \fIkey\fP or \fIvalue\fP is larger than the allowed maximums, the error MPI_ERR_INFO_KEY or MPI_ERR_INFO_VALUE is raised, respectively. .SH ERRORS Almost all MPI routines return an error value; C routines as the value of the function and Fortran routines in the last argument. C++ functions do not return errors. If the default error handler is set to MPI::ERRORS_THROW_EXCEPTIONS, then on error the C++ exception mechanism will be used to throw an MPI::Exception object. .sp Before the error value is returned, the current MPI error handler is -called. By default, this error handler aborts the MPI job, except for I/O function errors. The error handler may be changed with MPI_Comm_set_errhandler; the predefined error handler MPI_ERRORS_RETURN may be used to cause error values to be returned. Note that MPI does not guarantee that an MPI program can continue past an error. +called. By default, this error handler aborts the MPI job, except for I/O function errors. The error handler may be changed with MPI_Comm_set_errhandler; the predefined error handler MPI_ERRORS_RETURN may be used to cause error values to be returned. Note that MPI does not guarantee that an MPI program can continue past an error. .SH SEE ALSO .ft r diff --git a/ompi/mpi/man/man3/MPI_Init.3in b/ompi/mpi/man/man3/MPI_Init.3in index f98b7257cdf..d2cc5abc721 100644 --- a/ompi/mpi/man/man3/MPI_Init.3in +++ b/ompi/mpi/man/man3/MPI_Init.3in @@ -1,5 +1,5 @@ .\" -*- nroff -*- -.\" Copyright 2010 Cisco Systems, Inc. All rights reserved. +.\" Copyright (c) 2010-2015 Cisco Systems, Inc. All rights reserved. .\" Copyright 2006-2008 Sun Microsystems, Inc. .\" Copyright (c) 1996 Thinking Machines Corporation .\" $COPYRIGHT$ @@ -21,7 +21,7 @@ int MPI_Init(int *\fIargc\fP, char ***\fIargv\fP) .nf INCLUDE 'mpif.h' MPI_INIT(\fIIERROR\fP) - INTEGER \fIIERROR\fP + INTEGER \fIIERROR\fP .fi .SH C++ Syntax @@ -44,14 +44,14 @@ C/C++ only: Argument vector. .ft R .TP 1i IERROR -Fortran only: Error status (integer). +Fortran only: Error status (integer). .SH DESCRIPTION .ft R -This routine, or MPI_Init_thread, must be called before any other MPI -routine (apart from MPI_Initialized) is called. MPI can be initialized -at most once; subsequent calls to MPI_Init or MPI_Init_thread are -erroneous. +This routine, or MPI_Init_thread, must be called before most other MPI +routines are called. There are a small number of exceptions, such as +MPI_Initialized and MPI_Finalized. MPI can be initialized at most +once; subsequent calls to MPI_Init or MPI_Init_thread are erroneous. .sp All MPI programs must contain a call to MPI_Init or MPI_Init_thread. Open MPI accepts the C/C++ \fIargc\fP and \fIargv\fP @@ -63,7 +63,7 @@ them: /* declare variables */ MPI_Init(&argc, &argv); /* parse arguments */ - /* main program */ + /* main program */ MPI_Finalize(); } .fi @@ -92,7 +92,7 @@ called. By default, this error handler aborts the MPI job, except for I/O function errors. The error handler may be changed with MPI_Comm_set_errhandler; the predefined error handler MPI_ERRORS_RETURN may be used to cause error values to be returned. Note that MPI does not -guarantee that an MPI program can continue past an error. +guarantee that an MPI program can continue past an error. .sp See the MPI man page for a full list of MPI error codes. @@ -102,4 +102,4 @@ See the MPI man page for a full list of MPI error codes. MPI_Init_thread MPI_Initialized MPI_Finalize - +MPI_Finalized diff --git a/ompi/mpi/man/man3/MPI_Init_thread.3in b/ompi/mpi/man/man3/MPI_Init_thread.3in index bbba48bac4d..4383eb0fc24 100644 --- a/ompi/mpi/man/man3/MPI_Init_thread.3in +++ b/ompi/mpi/man/man3/MPI_Init_thread.3in @@ -1,7 +1,7 @@ .\" -*- nroff -*- .\" Copyright 2006-2008 Sun Microsystems, Inc. .\" Copyright (c) 1996 Thinking Machines Corporation -.\" Copyright (c) 2010 Cisco Systems, Inc. All rights reserved. +.\" Copyright (c) 2010-2015 Cisco Systems, Inc. All rights reserved. .\" $COPYRIGHT$ .TH MPI_Init_thread 3 "#OMPI_DATE#" "#PACKAGE_VERSION#" "#PACKAGE_NAME#" . @@ -22,7 +22,7 @@ int MPI_Init_thread(int *\fIargc\fP, char ***\fIargv\fP, .nf INCLUDE 'mpif.h' MPI_INIT_THREAD(\fIREQUIRED, PROVIDED, IERROR\fP) - INTEGER \fIREQUIRED, PROVIDED, IERROR\fP + INTEGER \fIREQUIRED, PROVIDED, IERROR\fP .fi .SH C++ Syntax @@ -52,13 +52,14 @@ provided Available level of thread support (integer). .TP 1i IERROR -Fortran only: Error status (integer). +Fortran only: Error status (integer). . . .SH DESCRIPTION .ft R -This routine, or MPI_Init, must be called before any other MPI routine -(apart from MPI_Initialized) is called. MPI can be initialized at most +This routine, or MPI_Init, must be called before most other MPI +routines are called. There are a small number of exceptions, such as +MPI_Initialized and MPI_Finalized. MPI can be initialized at most once; subsequent calls to MPI_Init or MPI_Init_thread are erroneous. .sp MPI_Init_thread, as compared to MPI_Init, has a provision to request a @@ -86,10 +87,10 @@ function. In Open MPI, the value is dependent on how the library was configured and built. Note that there is no guarantee that \fIprovided\fP will be greater than or equal to \fIrequired\fP. .sp -Also note that calling MPI_Init_thread with a -.I required +Also note that calling MPI_Init_thread with a +.I required value of -.I MPI_THREAD_SINGLE +.I MPI_THREAD_SINGLE is equivalent to calling MPI_Init. .sp All MPI programs must contain a call to MPI_Init or @@ -102,7 +103,7 @@ them: /* declare variables */ MPI_Init_thread(&argc, &argv, req, &prov); /* parse arguments */ - /* main program */ + /* main program */ MPI_Finalize(); } .fi @@ -162,13 +163,13 @@ by multiple threads. . .PP Note that Open MPI's thread support is in a fairly early stage; the -above devices are likely to -.IR work , +above devices are likely to +.IR work , but the latency is likely to be fairly high. Specifically, efforts so -far have concentrated on -.IR correctness , -not -.I performance +far have concentrated on +.IR correctness , +not +.I performance (yet). . . @@ -185,7 +186,7 @@ called. By default, this error handler aborts the MPI job, except for I/O function errors. The error handler may be changed with MPI_Comm_set_errhandler; the predefined error handler MPI_ERRORS_RETURN may be used to cause error values to be returned. Note that MPI does not -guarantee that an MPI program can continue past an error. +guarantee that an MPI program can continue past an error. . .SH SEE ALSO .ft R @@ -193,4 +194,4 @@ guarantee that an MPI program can continue past an error. MPI_Init MPI_Initialized MPI_Finalize - +MPI_Finalized diff --git a/ompi/mpi/man/man3/MPI_Initialized.3in b/ompi/mpi/man/man3/MPI_Initialized.3in index 24ecae57164..d92cf39b03c 100644 --- a/ompi/mpi/man/man3/MPI_Initialized.3in +++ b/ompi/mpi/man/man3/MPI_Initialized.3in @@ -1,11 +1,11 @@ .\" -*- nroff -*- -.\" Copyright 2010 Cisco Systems, Inc. All rights reserved. +.\" Copyright (c) 2010-2015 Cisco Systems, Inc. All rights reserved. .\" Copyright 2006-2008 Sun Microsystems, Inc. .\" Copyright (c) 1996 Thinking Machines Corporation .\" $COPYRIGHT$ .TH MPI_Initialized 3 "#OMPI_DATE#" "#PACKAGE_VERSION#" "#PACKAGE_NAME#" .SH NAME -\fBMPI_Initialized\fP \- Indicates whether MPI_Init has been called. +\fBMPI_Initialized\fP \- Checks whether MPI has been initialized .SH SYNTAX .ft R @@ -19,8 +19,8 @@ int MPI_Initialized(int *\fIflag\fP) .nf INCLUDE 'mpif.h' MPI_INITIALIZED(\fIFLAG, IERROR\fP) - LOGICAL \fIFLAG\fP - INTEGER \fIIERROR\fP + LOGICAL \fIFLAG\fP + INTEGER \fIIERROR\fP .fi .SH C++ Syntax @@ -33,24 +33,29 @@ bool Is_initialized() .ft R .TP 1i flag -Flag is true if MPI_Init has been called and false otherwise. +True if MPI has been initialized, and false otherwise (logical). .ft R .TP 1i IERROR -Fortran only: Error status (integer). +Fortran only: Error status (integer). .SH DESCRIPTION .ft R -This routine may be used to determine whether MPI_Init has been called. It is the only routine that may be called before MPI_Init is called. +This routine may be used to determine whether MPI has been +initialized. It is one of a small number of routines that may be +called before MPI is initialized and after MPI has been finalized +(MPI_Finalized is another). .SH ERRORS Almost all MPI routines return an error value; C routines as the value of the function and Fortran routines in the last argument. C++ functions do not return errors. If the default error handler is set to MPI::ERRORS_THROW_EXCEPTIONS, then on error the C++ exception mechanism will be used to throw an MPI::Exception object. .sp Before the error value is returned, the current MPI error handler is -called. By default, this error handler aborts the MPI job, except for I/O function errors. The error handler may be changed with MPI_Comm_set_errhandler; the predefined error handler MPI_ERRORS_RETURN may be used to cause error values to be returned. Note that MPI does not guarantee that an MPI program can continue past an error. +called. By default, this error handler aborts the MPI job, except for I/O function errors. The error handler may be changed with MPI_Comm_set_errhandler; the predefined error handler MPI_ERRORS_RETURN may be used to cause error values to be returned. Note that MPI does not guarantee that an MPI program can continue past an error. .SH SEE ALSO .ft R -.sp +.nf MPI_Init - +MPI_Init_thread +MPI_Finalize +MPI_Finalized diff --git a/ompi/mpi/man/man3/MPI_Intercomm_create.3in b/ompi/mpi/man/man3/MPI_Intercomm_create.3in index 88001457971..23c32baee87 100644 --- a/ompi/mpi/man/man3/MPI_Intercomm_create.3in +++ b/ompi/mpi/man/man3/MPI_Intercomm_create.3in @@ -22,13 +22,13 @@ INCLUDE 'mpif.h' MPI_INTERCOMM_CREATE(\fILOCAL_COMM, LOCAL_LEADER, PEER_COMM, REMOTE_LEADER, TAG, NEWINTERCOMM, IERROR\fP) INTEGER \fILOCAL_COMM, LOCAL_LEADER, PEER_COMM, REMOTE_LEADER\fP - INTEGER \fITAG, NEWINTERCOMM, IERROR\fP + INTEGER \fITAG, NEWINTERCOMM, IERROR\fP .fi .SH C++ Syntax .nf #include -Intercomm Intracomm::Create_intercomm(int \fIlocal_leader\fP, const +Intercomm Intracomm::Create_intercomm(int \fIlocal_leader\fP, const Comm& \fIpeer_comm\fP, int \fIremote_leader\fP, int \fItag\fP) const .fi @@ -48,7 +48,7 @@ remote_leader Rank of remote group leader in peer_comm; significant only at the local_leader (integer). .TP 1i tag -Message tag used to identify new intercommunicator (integer). +Message tag used to identify new intercommunicator (integer). .SH OUTPUT PARAMETERS .ft R @@ -58,7 +58,7 @@ Created intercommunicator (handle). .ft R .TP 1i IERROR -Fortran only: Error status (integer). +Fortran only: Error status (integer). .SH DESCRIPTION .ft R @@ -67,7 +67,7 @@ This call creates an intercommunicator. It is collective over the union of the l This call uses point-to-point communication with communicator peer_comm, and with tag tag between the leaders. Thus, care must be taken that there be no pending communication on peer_comm that could interfere with this communication. -If multiple MPI_Intercomm_creates are being made, they should use different tags (more precisely, they should ensure that the local and remote leaders are using different tags for each MPI_intercomm_create). +If multiple MPI_Intercomm_creates are being made, they should use different tags (more precisely, they should ensure that the local and remote leaders are using different tags for each MPI_intercomm_create). .SH NOTES We recommend using a dedicated peer communicator, such as a duplicate of MPI_COMM_WORLD, to avoid trouble with peer communicators. @@ -76,14 +76,14 @@ The MPI 1.1 Standard contains two mutually exclusive comments on the input intracommunicators. One says that their respective groups must be disjoint; the other that the leaders can be the same process. After some discussion by the MPI Forum, it has been decided that the groups must -be disjoint. Note that the +be disjoint. Note that the .B reason given for this in the standard is .B not -the reason for this choice; rather, the +the reason for this choice; rather, the .B other operations on -intercommunicators (like +intercommunicators (like .I MPI_Intercomm_merge ) do not make sense if the groups are not disjoint. @@ -92,7 +92,7 @@ groups are not disjoint. Almost all MPI routines return an error value; C routines as the value of the function and Fortran routines in the last argument. C++ functions do not return errors. If the default error handler is set to MPI::ERRORS_THROW_EXCEPTIONS, then on error the C++ exception mechanism will be used to throw an MPI::Exception object. .sp Before the error value is returned, the current MPI error handler is -called. By default, this error handler aborts the MPI job, except for I/O function errors. The error handler may be changed with MPI_Comm_set_errhandler; the predefined error handler MPI_ERRORS_RETURN may be used to cause error values to be returned. Note that MPI does not guarantee that an MPI program can continue past an error. +called. By default, this error handler aborts the MPI job, except for I/O function errors. The error handler may be changed with MPI_Comm_set_errhandler; the predefined error handler MPI_ERRORS_RETURN may be used to cause error values to be returned. Note that MPI does not guarantee that an MPI program can continue past an error. .SH SEE ALSO MPI_Intercomm_merge diff --git a/ompi/mpi/man/man3/MPI_Intercomm_merge.3in b/ompi/mpi/man/man3/MPI_Intercomm_merge.3in index 6da078e777c..6d1e2b2a326 100644 --- a/ompi/mpi/man/man3/MPI_Intercomm_merge.3in +++ b/ompi/mpi/man/man3/MPI_Intercomm_merge.3in @@ -12,7 +12,7 @@ .SH C Syntax .nf #include -int MPI_Intercomm_merge(MPI_Comm \fIintercomm\fP, int\fI high\fP, +int MPI_Intercomm_merge(MPI_Comm \fIintercomm\fP, int\fI high\fP, MPI_Comm\fI *newintracomm\fP) .fi @@ -20,8 +20,8 @@ int MPI_Intercomm_merge(MPI_Comm \fIintercomm\fP, int\fI high\fP, .nf INCLUDE 'mpif.h' MPI_INTERCOMM_MERGE(\fIINTERCOMM, HIGH, NEWINTRACOMM, IERROR\fP) - INTEGER \fIINTERCOMM, NEWINTRACOMM, IERROR\fP - LOGICAL \fIHIGH\fP + INTEGER \fIINTERCOMM, NEWINTRACOMM, IERROR\fP + LOGICAL \fIHIGH\fP .fi .SH C++ Syntax @@ -47,17 +47,17 @@ Created intracommunicator (type indicator). .ft R .TP 1i IERROR -Fortran only: Error status (integer). +Fortran only: Error status (integer). .SH DESCRIPTION .ft R -This function creates an intracommunicator from the union of the two groups that are associated with intercomm. All processes should provide the same high value within each of the two groups. If processes in one group provide the value high = false and processes in the other group provide the value high = true, then the union orders the "low" group before the "high" group. If all processes provide the same high argument, then the order of the union is arbitrary. This call is blocking and collective within the union of the two groups. +This function creates an intracommunicator from the union of the two groups that are associated with intercomm. All processes should provide the same high value within each of the two groups. If processes in one group provide the value high = false and processes in the other group provide the value high = true, then the union orders the "low" group before the "high" group. If all processes provide the same high argument, then the order of the union is arbitrary. This call is blocking and collective within the union of the two groups. .SH ERRORS Almost all MPI routines return an error value; C routines as the value of the function and Fortran routines in the last argument. C++ functions do not return errors. If the default error handler is set to MPI::ERRORS_THROW_EXCEPTIONS, then on error the C++ exception mechanism will be used to throw an MPI::Exception object. .sp Before the error value is returned, the current MPI error handler is -called. By default, this error handler aborts the MPI job, except for I/O function errors. The error handler may be changed with MPI_Comm_set_errhandler; the predefined error handler MPI_ERRORS_RETURN may be used to cause error values to be returned. Note that MPI does not guarantee that an MPI program can continue past an error. +called. By default, this error handler aborts the MPI job, except for I/O function errors. The error handler may be changed with MPI_Comm_set_errhandler; the predefined error handler MPI_ERRORS_RETURN may be used to cause error values to be returned. Note that MPI does not guarantee that an MPI program can continue past an error. .SH SEE ALSO MPI_Intercomm_create diff --git a/ompi/mpi/man/man3/MPI_Iprobe.3in b/ompi/mpi/man/man3/MPI_Iprobe.3in index b666543c158..3d09716a85f 100644 --- a/ompi/mpi/man/man3/MPI_Iprobe.3in +++ b/ompi/mpi/man/man3/MPI_Iprobe.3in @@ -21,7 +21,7 @@ int MPI_Iprobe(int \fIsource\fP, int\fI tag\fP, MPI_Comm\fI comm\fP, int\fI *fla INCLUDE 'mpif.h' MPI_IPROBE(\fISOURCE, TAG, COMM, FLAG, STATUS, IERROR\fP) LOGICAL \fIFLAG\fP - INTEGER \fISOURCE, TAG, COMM, STATUS(MPI_STATUS_SIZE), IERROR\fP + INTEGER \fISOURCE, TAG, COMM, STATUS(MPI_STATUS_SIZE), IERROR\fP .fi .SH C++ Syntax @@ -55,13 +55,13 @@ Status object (status). .ft R .TP 1i IERROR -Fortran only: Error status (integer). +Fortran only: Error status (integer). .SH DESCRIPTION .ft R -The MPI_Probe and MPI_Iprobe operations allow checking of incoming messages without actual receipt of them. The user can then decide how to receive them, based on the information returned by the probe (basically, the information returned by status). In particular, the user may allocate memory for the receive buffer, according to the length of the probed message. +The MPI_Probe and MPI_Iprobe operations allow checking of incoming messages without actual receipt of them. The user can then decide how to receive them, based on the information returned by the probe (basically, the information returned by status). In particular, the user may allocate memory for the receive buffer, according to the length of the probed message. .sp -MPI_Iprobe(source, tag, comm, flag, status) returns flag = true if there is a message that can be received and that matches the pattern specified by the arguments source, tag, and comm. The call matches the same message that would have been received by a call to MPI_Recv(\&..., source, tag, comm, status) executed at the same point in the program, and returns in status the same value that would have been returned by MPI_Recv(). Otherwise, the call returns flag = false, and leaves status undefined. +MPI_Iprobe(source, tag, comm, flag, status) returns flag = true if there is a message that can be received and that matches the pattern specified by the arguments source, tag, and comm. The call matches the same message that would have been received by a call to MPI_Recv(\&..., source, tag, comm, status) executed at the same point in the program, and returns in status the same value that would have been returned by MPI_Recv(). Otherwise, the call returns flag = false, and leaves status undefined. .sp If MPI_Iprobe returns flag = true, then the content of the status object can be subsequently accessed as described in Section 3.2.5 of the MPI-1 Standard, "Return Status," to find the source, tag, and length of the probed message. .sp @@ -69,18 +69,18 @@ A subsequent receive executed with the same context, and the source and tag retu .sp The source argument of MPI_Probe can be MPI_ANY_SOURCE, and the tag argument can be MPI_ANY_TAG, so that one can probe for messages from an arbitrary source and/or with an arbitrary tag. However, a specific communication context must be provided with the comm argument. .sp -If your application does not need to examine the \fIstatus\fP field, you can save resources by using the predefined constant MPI_STATUS_IGNORE as a special value for the \fIstatus\fP argument. +If your application does not need to examine the \fIstatus\fP field, you can save resources by using the predefined constant MPI_STATUS_IGNORE as a special value for the \fIstatus\fP argument. .sp -It is not necessary to receive a message immediately after it has been probed for, and the same message may be probed for several times before it is received. +It is not necessary to receive a message immediately after it has been probed for, and the same message may be probed for several times before it is received. .sp .SH NOTE -Users of libmpi-mt should remember that two threads may do an MPI_Iprobe that actually returns true for the same message for both threads. +Users of libmpi-mt should remember that two threads may do an MPI_Iprobe that actually returns true for the same message for both threads. .SH ERRORS Almost all MPI routines return an error value; C routines as the value of the function and Fortran routines in the last argument. C++ functions do not return errors. If the default error handler is set to MPI::ERRORS_THROW_EXCEPTIONS, then on error the C++ exception mechanism will be used to throw an MPI::Exception object. .sp Before the error value is returned, the current MPI error handler is -called. By default, this error handler aborts the MPI job, except for I/O function errors. The error handler may be changed with MPI_Comm_set_errhandler; the predefined error handler MPI_ERRORS_RETURN may be used to cause error values to be returned. Note that MPI does not guarantee that an MPI program can continue past an error. +called. By default, this error handler aborts the MPI job, except for I/O function errors. The error handler may be changed with MPI_Comm_set_errhandler; the predefined error handler MPI_ERRORS_RETURN may be used to cause error values to be returned. Note that MPI does not guarantee that an MPI program can continue past an error. .SH SEE ALSO .ft R diff --git a/ompi/mpi/man/man3/MPI_Irecv.3in b/ompi/mpi/man/man3/MPI_Irecv.3in index b8c26cb85d6..ccbad0d95f9 100644 --- a/ompi/mpi/man/man3/MPI_Irecv.3in +++ b/ompi/mpi/man/man3/MPI_Irecv.3in @@ -12,7 +12,7 @@ .SH C Syntax .nf #include -int MPI_Irecv(void *\fIbuf\fP, int\fI count\fP, MPI_Datatype\fI datatype\fP, +int MPI_Irecv(void *\fIbuf\fP, int\fI count\fP, MPI_Datatype\fI datatype\fP, int\fI source\fP, int\fI tag\fP, MPI_Comm\fI comm\fP, MPI_Request\fI *request\fP) .fi @@ -22,13 +22,13 @@ INCLUDE 'mpif.h' MPI_IRECV(\fIBUF, COUNT, DATATYPE, SOURCE, TAG, COMM, REQUEST, IERROR\fP) \fIBUF\fP(*) - INTEGER \fICOUNT, DATATYPE, SOURCE, TAG, COMM, REQUEST, IERROR\fP + INTEGER \fICOUNT, DATATYPE, SOURCE, TAG, COMM, REQUEST, IERROR\fP .fi .SH C++ Syntax .nf #include -Request Comm::Irecv(void* \fIbuf\fP, int \fIcount\fP, const Datatype& +Request Comm::Irecv(void* \fIbuf\fP, int \fIcount\fP, const Datatype& \fIdatatype\fP, int \fIsource\fP, int \fItag\fP) const .fi @@ -61,11 +61,11 @@ Communication request (handle). .ft R .TP 1i IERROR -Fortran only: Error status (integer). +Fortran only: Error status (integer). .SH DESCRIPTION .ft R -Nonblocking calls allocate a communication request object and associate it with the request handle (the argument request). The request can be used later to query the status of the communication or wait for its completion. +Nonblocking calls allocate a communication request object and associate it with the request handle (the argument request). The request can be used later to query the status of the communication or wait for its completion. .sp A nonblocking receive call indicates that the system may start writing data into the receive buffer. The receiver should not access any part of the receive buffer after a nonblocking receive operation is called, until the receive completes. .sp @@ -75,7 +75,7 @@ A receive request can be determined being completed by calling the MPI_Wait, MPI Almost all MPI routines return an error value; C routines as the value of the function and Fortran routines in the last argument. C++ functions do not return errors. If the default error handler is set to MPI::ERRORS_THROW_EXCEPTIONS, then on error the C++ exception mechanism will be used to throw an MPI::Exception object. .sp Before the error value is returned, the current MPI error handler is -called. By default, this error handler aborts the MPI job, except for I/O function errors. The error handler may be changed with MPI_Comm_set_errhandler; the predefined error handler MPI_ERRORS_RETURN may be used to cause error values to be returned. Note that MPI does not guarantee that an MPI program can continue past an error. +called. By default, this error handler aborts the MPI job, except for I/O function errors. The error handler may be changed with MPI_Comm_set_errhandler; the predefined error handler MPI_ERRORS_RETURN may be used to cause error values to be returned. Note that MPI does not guarantee that an MPI program can continue past an error. .SH SEE ALSO MPI_Recv diff --git a/ompi/mpi/man/man3/MPI_Irsend.3in b/ompi/mpi/man/man3/MPI_Irsend.3in index ad03754f207..fbc144a2088 100644 --- a/ompi/mpi/man/man3/MPI_Irsend.3in +++ b/ompi/mpi/man/man3/MPI_Irsend.3in @@ -22,13 +22,13 @@ int MPI_Irsend(const void *\fIbuf\fP, int\fI count\fP, MPI_Datatype\fI datatype\ INCLUDE 'mpif.h' MPI_IRSEND(\fIBUF, COUNT, DATATYPE, DEST, TAG, COMM, REQUEST, IERROR\fP) \fIBUF\fP(*) - INTEGER \fICOUNT, DATATYPE, DEST, TAG, COMM, REQUEST, IERROR\fP + INTEGER \fICOUNT, DATATYPE, DEST, TAG, COMM, REQUEST, IERROR\fP .fi .SH C++ Syntax .nf #include -Request Comm::Irsend(const void* \fIbuf\fP, int \fIcount\fP, const +Request Comm::Irsend(const void* \fIbuf\fP, int \fIcount\fP, const Datatype& \fIdatatype\fP, int \fIdest\fP, int \fItag\fP) const .fi @@ -61,11 +61,11 @@ Communication request (handle). .ft R .TP 1i IERROR -Fortran only: Error status (integer). +Fortran only: Error status (integer). .SH DESCRIPTION .ft R -MPI_Irsend starts a ready-mode nonblocking send. Nonblocking calls allocate a communication request object and associate it with the request handle (the argument request). The request can be used later to query the status of the communication or to wait for its completion. +MPI_Irsend starts a ready-mode nonblocking send. Nonblocking calls allocate a communication request object and associate it with the request handle (the argument request). The request can be used later to query the status of the communication or to wait for its completion. .sp A nonblocking send call indicates that the system may start copying data out of the send buffer. The sender should not modify any part of the send buffer after a nonblocking send operation is called, until the send completes. @@ -73,7 +73,7 @@ A nonblocking send call indicates that the system may start copying data out of Almost all MPI routines return an error value; C routines as the value of the function and Fortran routines in the last argument. C++ functions do not return errors. If the default error handler is set to MPI::ERRORS_THROW_EXCEPTIONS, then on error the C++ exception mechanism will be used to throw an MPI::Exception object. .sp Before the error value is returned, the current MPI error handler is -called. By default, this error handler aborts the MPI job, except for I/O function errors. The error handler may be changed with MPI_Comm_set_errhandler; the predefined error handler MPI_ERRORS_RETURN may be used to cause error values to be returned. Note that MPI does not guarantee that an MPI program can continue past an error. +called. By default, this error handler aborts the MPI job, except for I/O function errors. The error handler may be changed with MPI_Comm_set_errhandler; the predefined error handler MPI_ERRORS_RETURN may be used to cause error values to be returned. Note that MPI does not guarantee that an MPI program can continue past an error. .SH SEE ALSO MPI_Rsend diff --git a/ompi/mpi/man/man3/MPI_Is_thread_main.3in b/ompi/mpi/man/man3/MPI_Is_thread_main.3in index 31fa2a43876..86ef8daa4a5 100644 --- a/ompi/mpi/man/man3/MPI_Is_thread_main.3in +++ b/ompi/mpi/man/man3/MPI_Is_thread_main.3in @@ -37,7 +37,7 @@ True if calling thread is main thread (boolean). .ft R .TP 1i IERROR -Fortran only: Error status (integer). +Fortran only: Error status (integer). .SH DESCRIPTION .ft R @@ -58,7 +58,7 @@ called. By default, this error handler aborts the MPI job, except for I/O function errors. The error handler may be changed with MPI_Comm_set_errhandler; the predefined error handler MPI_ERRORS_RETURN may be used to cause error values to be returned. Note that MPI does not -guarantee that an MPI program can continue past an error. +guarantee that an MPI program can continue past an error. .sp See the MPI man page for a full list of MPI error codes. diff --git a/ompi/mpi/man/man3/MPI_Isend.3in b/ompi/mpi/man/man3/MPI_Isend.3in index ce79f9569fb..d5a40f2ea64 100644 --- a/ompi/mpi/man/man3/MPI_Isend.3in +++ b/ompi/mpi/man/man3/MPI_Isend.3in @@ -22,13 +22,13 @@ int MPI_Isend(const void *\fIbuf\fP, int\fI count\fP, MPI_Datatype\fI datatype\f INCLUDE 'mpif.h' MPI_ISEND(\fIBUF, COUNT, DATATYPE, DEST, TAG, COMM, REQUEST, IERROR\fP) \fIBUF\fP(*) - INTEGER \fICOUNT, DATATYPE, DEST, TAG, COMM, REQUEST, IERROR\fP + INTEGER \fICOUNT, DATATYPE, DEST, TAG, COMM, REQUEST, IERROR\fP .fi .SH C++ Syntax .nf #include -Request Comm::Isend(const void* \fIbuf\fP, int \fIcount\fP, const +Request Comm::Isend(const void* \fIbuf\fP, int \fIcount\fP, const Datatype& \fIdatatype\fP, int \fIdest\fP, int \fItag\fP) const .fi @@ -61,11 +61,11 @@ Communication request (handle). .ft R .TP 1i IERROR -Fortran only: Error status (integer). +Fortran only: Error status (integer). .SH DESCRIPTION .ft R -MPI_Isend starts a standard-mode, nonblocking send. Nonblocking calls allocate a communication request object and associate it with the request handle (the argument request). The request can be used later to query the status of the communication or wait for its completion. +MPI_Isend starts a standard-mode, nonblocking send. Nonblocking calls allocate a communication request object and associate it with the request handle (the argument request). The request can be used later to query the status of the communication or wait for its completion. .sp A nonblocking send call indicates that the system may start copying data out of the send buffer. The sender should not modify any part of the send buffer after a nonblocking send operation is called, until the send completes. .sp @@ -75,7 +75,7 @@ A send request can be determined being completed by calling the MPI_Wait, MPI_Wa Almost all MPI routines return an error value; C routines as the value of the function and Fortran routines in the last argument. C++ functions do not return errors. If the default error handler is set to MPI::ERRORS_THROW_EXCEPTIONS, then on error the C++ exception mechanism will be used to throw an MPI::Exception object. .sp Before the error value is returned, the current MPI error handler is -called. By default, this error handler aborts the MPI job, except for I/O function errors. The error handler may be changed with MPI_Comm_set_errhandler; the predefined error handler MPI_ERRORS_RETURN may be used to cause error values to be returned. Note that MPI does not guarantee that an MPI program can continue past an error. +called. By default, this error handler aborts the MPI job, except for I/O function errors. The error handler may be changed with MPI_Comm_set_errhandler; the predefined error handler MPI_ERRORS_RETURN may be used to cause error values to be returned. Note that MPI does not guarantee that an MPI program can continue past an error. .SH SEE ALSO MPI_Send diff --git a/ompi/mpi/man/man3/MPI_Issend.3in b/ompi/mpi/man/man3/MPI_Issend.3in index 28044169167..0e8e4be4879 100644 --- a/ompi/mpi/man/man3/MPI_Issend.3in +++ b/ompi/mpi/man/man3/MPI_Issend.3in @@ -22,13 +22,13 @@ int MPI_Issend(const void *\fIbuf\fP, int\fI count\fP, MPI_Datatype\fI datatype\ INCLUDE 'mpif.h' MPI_ISSEND(\fIBUF, COUNT, DATATYPE, DEST, TAG, COMM, REQUEST, IERROR\fP) \fIBUF\fP(*) - INTEGER \fICOUNT, DATATYPE, DEST, TAG, COMM, REQUEST, IERROR\fP + INTEGER \fICOUNT, DATATYPE, DEST, TAG, COMM, REQUEST, IERROR\fP .fi .SH C++ Syntax .nf #include -Request Comm::Issend(const void* \fIbuf\fP, int \fIcount\fP, const +Request Comm::Issend(const void* \fIbuf\fP, int \fIcount\fP, const Datatype& \fIdatatype\fP, int \fIdest\fP, int \fItag\fP) const .fi @@ -61,13 +61,13 @@ Communication request (handle). .ft R .TP 1i IERROR -Fortran only: Error status (integer). +Fortran only: Error status (integer). .SH DESCRIPTION .ft R Starts a synchronous mode, nonblocking send. .sp -Nonblocking calls allocate a communication request object and associate it with the request handle (the argument request). The request can be used later to query the status of the communication or wait for its completion. +Nonblocking calls allocate a communication request object and associate it with the request handle (the argument request). The request can be used later to query the status of the communication or wait for its completion. .sp A nonblocking send call indicates that the system may start copying data out of the send buffer. The sender should not modify any part of the send buffer after a nonblocking send operation is called, until the send completes. @@ -75,7 +75,7 @@ A nonblocking send call indicates that the system may start copying data out of Almost all MPI routines return an error value; C routines as the value of the function and Fortran routines in the last argument. C++ functions do not return errors. If the default error handler is set to MPI::ERRORS_THROW_EXCEPTIONS, then on error the C++ exception mechanism will be used to throw an MPI::Exception object. .sp Before the error value is returned, the current MPI error handler is -called. By default, this error handler aborts the MPI job, except for I/O function errors. The error handler may be changed with MPI_Comm_set_errhandler; the predefined error handler MPI_ERRORS_RETURN may be used to cause error values to be returned. Note that MPI does not guarantee that an MPI program can continue past an error. +called. By default, this error handler aborts the MPI job, except for I/O function errors. The error handler may be changed with MPI_Comm_set_errhandler; the predefined error handler MPI_ERRORS_RETURN may be used to cause error values to be returned. Note that MPI does not guarantee that an MPI program can continue past an error. .SH SEE ALSO MPI_Ssend diff --git a/ompi/mpi/man/man3/MPI_Keyval_create.3in b/ompi/mpi/man/man3/MPI_Keyval_create.3in index 7aa32c6290e..4679513eb52 100644 --- a/ompi/mpi/man/man3/MPI_Keyval_create.3in +++ b/ompi/mpi/man/man3/MPI_Keyval_create.3in @@ -45,38 +45,38 @@ Key value for future access (integer). .ft R .TP 1i IERROR -Fortran only: Error status (integer). +Fortran only: Error status (integer). .SH DESCRIPTION .ft R -Note that use of this routine is \fIdeprecated\fP as of MPI-2. Please use MPI_Comm_create_keyval instead. +Note that use of this routine is \fIdeprecated\fP as of MPI-2. Please use MPI_Comm_create_keyval instead. .sp -This deprecated routine is not available in C++. +This deprecated routine is not available in C++. .sp Generates a new attribute key. Keys are locally unique in a process and opaque to the user, though they are explicitly stored in integers. Once allocated, the key value can be used to associate attributes and access them on any locally defined communicator. .sp -The copy_fn function is invoked when a communicator is duplicated by MPI_COMM_DUP. copy_fn should be of type MPI_Copy_function, which is defined as follows: +The copy_fn function is invoked when a communicator is duplicated by MPI_COMM_DUP. copy_fn should be of type MPI_Copy_function, which is defined as follows: .sp .nf - typedef int MPI_Copy_function(MPI_Comm oldcomm, int keyval, - void *extra_state, void *attribute_val_in, + typedef int MPI_Copy_function(MPI_Comm oldcomm, int keyval, + void *extra_state, void *attribute_val_in, void *attribute_val_out, int *flag) .fi -A Fortran declaration for such a function is as follows: +A Fortran declaration for such a function is as follows: .sp .nf SUBROUTINE COPY_FUNCTION(OLDCOMM, KEYVAL, EXTRA_STATE, ATTRIBUTE_VAL_IN, ATTRIBUTE_VAL_OUT, FLAG, IERR) INTEGER OLDCOMM, KEYVAL, EXTRA_STATE, - ATTRIBUTE_VAL_IN, ATTRIBUTE_VAL_OUT, IERR - LOGICAL FLAG + ATTRIBUTE_VAL_IN, ATTRIBUTE_VAL_OUT, IERR + LOGICAL FLAG .fi .sp -The copy callback function is invoked for each key value in oldcomm in arbitrary order. Each call to the copy callback is made with a key value and its corresponding attribute. If it returns flag = 0, then the attribute is deleted in the duplicated communicator. Otherwise ( flag = 1), the new attribute value is set to the value returned in attribute_val_out. The function returns MPI_SUCCESS on success and an error code on failure (in which case MPI_Comm_dup will fail). +The copy callback function is invoked for each key value in oldcomm in arbitrary order. Each call to the copy callback is made with a key value and its corresponding attribute. If it returns flag = 0, then the attribute is deleted in the duplicated communicator. Otherwise ( flag = 1), the new attribute value is set to the value returned in attribute_val_out. The function returns MPI_SUCCESS on success and an error code on failure (in which case MPI_Comm_dup will fail). .sp copy_fn may be specified as MPI_NULL_COPY_FN or MPI_DUP_FN from either C or -Fortran; MPI_NULL_COPY_FN is a function that does nothing other than return flag = 0, and MPI_SUCCESS. MPI_DUP_FN is a simple-minded copy function that sets flag = 1, returns the value of attribute_val_in in attribute_val_out, and returns MPI_SUCCESS. +Fortran; MPI_NULL_COPY_FN is a function that does nothing other than return flag = 0, and MPI_SUCCESS. MPI_DUP_FN is a simple-minded copy function that sets flag = 1, returns the value of attribute_val_in in attribute_val_out, and returns MPI_SUCCESS. .SH NOTES Key values are global (available for any and all communicators). @@ -84,35 +84,35 @@ Key values are global (available for any and all communicators). There are subtle differences between C and Fortran that require that the copy_fn be written in the same language that MPI_Keyval_create is called from. This should not be a problem for most users; only programmers using both Fortran and C in the same program need to be sure that they follow this rule. .sp Even though both formal arguments attribute_val_in -and attribute_val_out are of type void*, their usage differs. The C copy function is passed by MPI in attribute_val_in the value of the attribute, and in attribute_val_out the address of the attribute, so as to allow the function to return the (new) attribute value. The use of type void* for both is to avoid messy type casts. +and attribute_val_out are of type void*, their usage differs. The C copy function is passed by MPI in attribute_val_in the value of the attribute, and in attribute_val_out the address of the attribute, so as to allow the function to return the (new) attribute value. The use of type void* for both is to avoid messy type casts. .sp -A valid copy function is one that completely duplicates the information by making a full duplicate copy of the data structures implied by an attribute; another might just make another reference to that data structure, while using a reference-count mechanism. Other types of attributes might not copy at all (they might be specific to oldcomm only). +A valid copy function is one that completely duplicates the information by making a full duplicate copy of the data structures implied by an attribute; another might just make another reference to that data structure, while using a reference-count mechanism. Other types of attributes might not copy at all (they might be specific to oldcomm only). .sp -Analogous to copy_fn is a callback deletion function, defined as follows. The delete_fn function is invoked when a communicator is deleted by MPI_Comm_free or when a call is made explicitly to MPI_Attr_delete. delete_fn should be of type MPI_Delete_function, which is defined as follows: +Analogous to copy_fn is a callback deletion function, defined as follows. The delete_fn function is invoked when a communicator is deleted by MPI_Comm_free or when a call is made explicitly to MPI_Attr_delete. delete_fn should be of type MPI_Delete_function, which is defined as follows: .sp .nf - typedef int MPI_Delete_function(MPI_Comm comm, int keyval, - void *attribute_val, void *extra_state); + typedef int MPI_Delete_function(MPI_Comm comm, int keyval, + void *attribute_val, void *extra_state); .fi .sp -A Fortran declaration for such a function is as follows: +A Fortran declaration for such a function is as follows: .sp .nf SUBROUTINE DELETE_FUNCTION(COMM, KEYVAL,ATTRIBUTE_VAL, EXTRA_STATE, IERR) - INTEGER COMM, KEYVAL, ATTRIBUTE_VAL, EXTRA_STATE, IERR + INTEGER COMM, KEYVAL, ATTRIBUTE_VAL, EXTRA_STATE, IERR .fi .sp -This function is called by MPI_Comm_free, MPI_Attr_delete, and MPI_Attr_put to do whatever is needed to remove an attribute. The function returns MPI_SUCCESS on success and an error code on failure (in which case MPI_COMM_FREE will fail). +This function is called by MPI_Comm_free, MPI_Attr_delete, and MPI_Attr_put to do whatever is needed to remove an attribute. The function returns MPI_SUCCESS on success and an error code on failure (in which case MPI_COMM_FREE will fail). .sp -delete_fn may be specified as MPI_NULL_DELETE_FN from either C or FORTRAN; MPI_NULL_DELETE_FN is a function that does nothing, other than returning MPI_SUCCESS. +delete_fn may be specified as MPI_NULL_DELETE_FN from either C or FORTRAN; MPI_NULL_DELETE_FN is a function that does nothing, other than returning MPI_SUCCESS. .sp -The special key value MPI_KEYVAL_INVALID is never returned by MPI_Keyval_create. Therefore, it can be used for static initialization of key values. +The special key value MPI_KEYVAL_INVALID is never returned by MPI_Keyval_create. Therefore, it can be used for static initialization of key values. .SH ERRORS Almost all MPI routines return an error value; C routines as the value of the function and Fortran routines in the last argument. C++ functions do not return errors. If the default error handler is set to MPI::ERRORS_THROW_EXCEPTIONS, then on error the C++ exception mechanism will be used to throw an MPI::Exception object. .sp Before the error value is returned, the current MPI error handler is -called. By default, this error handler aborts the MPI job, except for I/O function errors. The error handler may be changed with MPI_Comm_set_errhandler; the predefined error handler MPI_ERRORS_RETURN may be used to cause error values to be returned. Note that MPI does not guarantee that an MPI program can continue past an error. +called. By default, this error handler aborts the MPI job, except for I/O function errors. The error handler may be changed with MPI_Comm_set_errhandler; the predefined error handler MPI_ERRORS_RETURN may be used to cause error values to be returned. Note that MPI does not guarantee that an MPI program can continue past an error. .SH SEE ALSO MPI_Keyval_free diff --git a/ompi/mpi/man/man3/MPI_Keyval_free.3in b/ompi/mpi/man/man3/MPI_Keyval_free.3in index 9d9e877e6d3..7c2d152334a 100644 --- a/ompi/mpi/man/man3/MPI_Keyval_free.3in +++ b/ompi/mpi/man/man3/MPI_Keyval_free.3in @@ -19,7 +19,7 @@ int MPI_Keyval_free(int *\fIkeyval\fP) .nf INCLUDE 'mpif.h' MPI_KEYVAL_FREE(\fIKEYVAL, IERROR\fP) - INTEGER \fIKEYVAL, IERROR\fP + INTEGER \fIKEYVAL, IERROR\fP .fi @@ -33,25 +33,25 @@ Frees the integer key value (integer). .ft R .TP 1i IERROR -Fortran only: Error status (integer). +Fortran only: Error status (integer). .SH DESCRIPTION .ft R -Note that use of this routine is \fIdeprecated\fP as of MPI-2. Please use MPI_Comm_free_keyval instead. +Note that use of this routine is \fIdeprecated\fP as of MPI-2. Please use MPI_Comm_free_keyval instead. .sp -This deprecated routine is not available in C++. +This deprecated routine is not available in C++. .sp -Frees an extant attribute key. This function sets the value of keyval to MPI_KEYVAL_INVALID. Note that it is not erroneous to free an attribute key that is in use, because the actual free does not transpire until after all references (in other communicators on the process) to the key have been freed. These references need to be explicitly freed by the program, either via calls to MPI_Attr_delete that free one attribute instance, or by calls to MPI_Comm_free that free all attribute instances associated with the freed communicator. +Frees an extant attribute key. This function sets the value of keyval to MPI_KEYVAL_INVALID. Note that it is not erroneous to free an attribute key that is in use, because the actual free does not transpire until after all references (in other communicators on the process) to the key have been freed. These references need to be explicitly freed by the program, either via calls to MPI_Attr_delete that free one attribute instance, or by calls to MPI_Comm_free that free all attribute instances associated with the freed communicator. .SH NOTE .ft R -Key values are global (they can be used with any and all communicators). +Key values are global (they can be used with any and all communicators). .SH ERRORS Almost all MPI routines return an error value; C routines as the value of the function and Fortran routines in the last argument. C++ functions do not return errors. If the default error handler is set to MPI::ERRORS_THROW_EXCEPTIONS, then on error the C++ exception mechanism will be used to throw an MPI::Exception object. .sp Before the error value is returned, the current MPI error handler is -called. By default, this error handler aborts the MPI job, except for I/O function errors. The error handler may be changed with MPI_Comm_set_errhandler; the predefined error handler MPI_ERRORS_RETURN may be used to cause error values to be returned. Note that MPI does not guarantee that an MPI program can continue past an error. +called. By default, this error handler aborts the MPI job, except for I/O function errors. The error handler may be changed with MPI_Comm_set_errhandler; the predefined error handler MPI_ERRORS_RETURN may be used to cause error values to be returned. Note that MPI does not guarantee that an MPI program can continue past an error. .SH SEE ALSO MPI_Keyval_create diff --git a/ompi/mpi/man/man3/MPI_Lookup_name.3in b/ompi/mpi/man/man3/MPI_Lookup_name.3in index 8dcc9f5b97f..ed944e49798 100644 --- a/ompi/mpi/man/man3/MPI_Lookup_name.3in +++ b/ompi/mpi/man/man3/MPI_Lookup_name.3in @@ -52,7 +52,7 @@ port_name a port name (string). .TP 1.4i IERROR -Fortran only: Error status (integer). +Fortran only: Error status (integer). .SH DESCRIPTION .ft R @@ -118,7 +118,7 @@ called. By default, this error handler aborts the MPI job, except for I/O function errors. The error handler may be changed with MPI_Comm_set_errhandler; the predefined error handler MPI_ERRORS_RETURN may be used to cause error values to be returned. Note that MPI does not -guarantee that an MPI program can continue past an error. +guarantee that an MPI program can continue past an error. .sp See the MPI man page for a full list of MPI error codes. diff --git a/ompi/mpi/man/man3/MPI_Message_c2f.3in b/ompi/mpi/man/man3/MPI_Message_c2f.3in new file mode 100644 index 00000000000..a13fce697dd --- /dev/null +++ b/ompi/mpi/man/man3/MPI_Message_c2f.3in @@ -0,0 +1 @@ +.so man3/MPI_Comm_f2c.3 diff --git a/ompi/mpi/man/man3/MPI_Message_f2c.3in b/ompi/mpi/man/man3/MPI_Message_f2c.3in new file mode 100644 index 00000000000..a13fce697dd --- /dev/null +++ b/ompi/mpi/man/man3/MPI_Message_f2c.3in @@ -0,0 +1 @@ +.so man3/MPI_Comm_f2c.3 diff --git a/ompi/mpi/man/man3/MPI_Mprobe.3in b/ompi/mpi/man/man3/MPI_Mprobe.3in index 109d9072793..7b8a8e87d64 100644 --- a/ompi/mpi/man/man3/MPI_Mprobe.3in +++ b/ompi/mpi/man/man3/MPI_Mprobe.3in @@ -13,7 +13,7 @@ .SH C Syntax .nf #include -int MPI_Mprobe(int \fIsource\fP, int\fI tag\fP, MPI_Comm\fI comm\fP, +int MPI_Mprobe(int \fIsource\fP, int\fI tag\fP, MPI_Comm\fI comm\fP, MPI_Message\fI *message\fP, MPI_Status\fI *status\fP) .fi @@ -22,7 +22,7 @@ int MPI_Mprobe(int \fIsource\fP, int\fI tag\fP, MPI_Comm\fI comm\fP, INCLUDE 'mpif.h' MPI_MPROBE(\fISOURCE, TAG, COMM, MESSAGE, STATUS, IERROR\fP) INTEGER \fISOURCE, TAG, COMM, MESSAGE\fP - INTEGER \fISTATUS(MPI_STATUS_SIZE), IERROR\fP + INTEGER \fISTATUS(MPI_STATUS_SIZE), IERROR\fP .fi .SH C++ Syntax @@ -54,7 +54,7 @@ Status object (status). .ft R .TP 1i IERROR -Fortran only: Error status (integer). +Fortran only: Error status (integer). .SH DESCRIPTION .ft R @@ -82,7 +82,7 @@ This is an MPI-3 function and has no C++ binding. .SH ERRORS Almost all MPI routines return an error value; C routines as the value of the function and Fortran routines in the last argument. C++ -functions do not return errors. +functions do not return errors. .sp Before the error value is returned, the current MPI error handler is called. By default, this error handler aborts the MPI job, except for diff --git a/ompi/mpi/man/man3/MPI_Mrecv.3in b/ompi/mpi/man/man3/MPI_Mrecv.3in index e9b5285d022..d8005e8b15e 100644 --- a/ompi/mpi/man/man3/MPI_Mrecv.3in +++ b/ompi/mpi/man/man3/MPI_Mrecv.3in @@ -12,7 +12,7 @@ .SH C Syntax .nf #include -int MPI_Mrecv(void \fI*buf\fP, int\fI count\fP, MPI_Datatype\fI type\fP, +int MPI_Mrecv(void \fI*buf\fP, int\fI count\fP, MPI_Datatype\fI type\fP, MPI_Message\fI *message\fP, MPI_Status\fI *status\fP) .fi @@ -22,7 +22,7 @@ INCLUDE 'mpif.h' MPI_MRECV(\fIBUF, COUNT, DATATYPE, MESSAGE, STATUS, IERROR\fP) \fIBUF(*)\fP INTEGER \fCOUNT, DATATYPE, MESSAGE\fP - INTEGER \fISTATUS(MPI_STATUS_SIZE), IERROR\fP + INTEGER \fISTATUS(MPI_STATUS_SIZE), IERROR\fP .fi .SH C++ Syntax @@ -52,7 +52,7 @@ status Status object (status). .TP 1i IERROR -Fortran only: Error status (integer). +Fortran only: Error status (integer). .SH DESCRIPTION .ft R @@ -70,7 +70,7 @@ This is an MPI-3 function and has no C++ binding. .SH ERRORS Almost all MPI routines return an error value; C routines as the value of the function and Fortran routines in the last argument. C++ -functions do not return errors. +functions do not return errors. .sp Before the error value is returned, the current MPI error handler is called. By default, this error handler aborts the MPI job, except for diff --git a/ompi/mpi/man/man3/MPI_Neighbor_alltoallv.3in b/ompi/mpi/man/man3/MPI_Neighbor_alltoallv.3in index 1bc4d9ce6c9..79d4a4df810 100644 --- a/ompi/mpi/man/man3/MPI_Neighbor_alltoallv.3in +++ b/ompi/mpi/man/man3/MPI_Neighbor_alltoallv.3in @@ -15,15 +15,15 @@ .nf #include int MPI_Neighbor_alltoallv(const void *\fIsendbuf\fP, const int \fIsendcounts\fP[], - const int \fIsdispls\f[]P, MPI_Datatype \fIsendtype\fP, + const int \fIsdispls\fP[], MPI_Datatype \fIsendtype\fP, void *\fIrecvbuf\fP, const int\fI recvcounts\fP[], const int \fIrdispls\fP[], MPI_Datatype \fIrecvtype\fP, MPI_Comm \fIcomm\fP) int MPI_Ineighbor_alltoallv(const void *\fIsendbuf\fP, const int \fIsendcounts\fP[], - const int \fIsdispls\f[]P, MPI_Datatype \fIsendtype\fP, + const int \fIsdispls\fP[], MPI_Datatype \fIsendtype\fP, void *\fIrecvbuf\fP, const int\fI recvcounts\fP[], const int \fIrdispls\fP[], MPI_Datatype \fIrecvtype\fP, MPI_Comm \fIcomm\fP, - MPI_Request \fI*request\fP) + MPI_Request \fI*request\fP) .fi .SH Fortran Syntax diff --git a/ompi/mpi/man/man3/MPI_Neighbor_alltoallw.3in b/ompi/mpi/man/man3/MPI_Neighbor_alltoallw.3in index 7dcfdbc9a92..3db3b9a0cbf 100644 --- a/ompi/mpi/man/man3/MPI_Neighbor_alltoallw.3in +++ b/ompi/mpi/man/man3/MPI_Neighbor_alltoallw.3in @@ -16,14 +16,14 @@ .nf #include int MPI_Neighbor_alltoallw(const void *\fIsendbuf\fP, const int \fIsendcounts\fP[], - const int \fIsdispls\fP[], const MPI_Datatype \fIsendtypes\fP[], - void *\fIrecvbuf\fP, const int \fIrecvcounts\fP[], const int \fIrdispls\fP[], - const MPI_Datatype \fIrecvtypes\fP[], MPI_Comm \fIcomm\fP) + const MPI_Aint \fIsdispls\fP[], const MPI_Datatype \fIsendtypes\fP[], + void *\fIrecvbuf\fP, const int \fIrecvcounts\fP[], const MPI_Aint \fIrdispls\fP[], + const MPI_Datatype \fIrecvtypes\fP[], MPI_Comm \fIcomm\fP) int MPI_Ineighbor_alltoallw(const void *\fIsendbuf\fP, const int \fIsendcounts\fP[], - const int \fIsdispls\fP[], const MPI_Datatype \fIsendtypes\fP[], - void *\fIrecvbuf\fP, const int \fIrecvcounts\fP[], const int \fIrdispls\fP[], - const MPI_Datatype \fIrecvtypes\fP[], MPI_Comm \fIcomm\fP, MPI_Request \fI*request\fP) + const MPI_Aint \fIsdispls\fP[], const MPI_Datatype \fIsendtypes\fP[], + void *\fIrecvbuf\fP, const int \fIrecvcounts\fP[], const MPI_Aint \fIrdispls\fP[], + const MPI_Datatype \fIrecvtypes\fP[], MPI_Comm \fIcomm\fP, MPI_Request \fI*request\fP) .fi .SH Fortran Syntax @@ -33,16 +33,18 @@ MPI_NEIGHBOR_ALLTOALLW(\fISENDBUF, SENDCOUNTS, SDISPLS, SENDTYPES, RECVBUF, RECVCOUNTS, RDISPLS, RECVTYPES, COMM, IERROR\fP) \fISENDBUF(*), RECVBUF(*)\fP - INTEGER \fISENDCOUNTS(*), SDISPLS(*), SENDTYPES(*)\fP - INTEGER \fIRECVCOUNTS(*), RDISPLS(*), RECVTYPES(*)\fP + INTEGER \fISENDCOUNTS(*), SENDTYPES(*)\fP + INTEGER \fIRECVCOUNTS(*), RECVTYPES(*)\fP + INTEGER(KIND=MPI_ADDRESS_KIND) \fISDISPLS(*), RDISPLS(*)\fP INTEGER \fICOMM, IERROR\fP MPI_INEIGHBOR_ALLTOALLW(\fISENDBUF, SENDCOUNTS, SDISPLS, SENDTYPES, RECVBUF, RECVCOUNTS, RDISPLS, RECVTYPES, COMM, REQUEST, IERROR\fP) \fISENDBUF(*), RECVBUF(*)\fP - INTEGER \fISENDCOUNTS(*), SDISPLS(*), SENDTYPES(*)\fP - INTEGER \fIRECVCOUNTS(*), RDISPLS(*), RECVTYPES(*)\fP + INTEGER \fISENDCOUNTS(*), SENDTYPES(*)\fP + INTEGER \fIRECVCOUNTS(*), RECVTYPES(*)\fP + INTEGER(KIND=MPI_ADDRESS_KIND) \fISDISPLS(*), RDISPLS(*)\fP INTEGER \fICOMM, REQUEST, IERROR\fP .fi diff --git a/ompi/mpi/man/man3/MPI_Op_commutative.3in b/ompi/mpi/man/man3/MPI_Op_commutative.3in new file mode 100644 index 00000000000..536cb9a7adb --- /dev/null +++ b/ompi/mpi/man/man3/MPI_Op_commutative.3in @@ -0,0 +1,68 @@ +.\" -*- nroff -*- +.\" Copyright 2015 FUJITSU LIMITED. All rights reserved. +.\" $COPYRIGHT$ +.TH MPI_Op_commutative 3 "#OMPI_DATE#" "#PACKAGE_VERSION#" "#PACKAGE_NAME#" +.SH NAME +\fBMPI_Op_commutative\fP \- Query of commutativity of reduction operation. + +.SH SYNTAX +.ft R +.SH C Syntax +.nf +#include +int MPI_Op_commutative(MPI_Op \fIop\fP, int *\fIcommute\fP) + +.fi +.SH Fortran Syntax +.nf +USE MPI +! or the older form: INCLUDE 'mpif.h' +MPI_OP_COMMUTATIVE(\fIOP, COMMUTE, IERROR\fP) + LOGICAL \fICOMMUTE\fP + INTEGER \fIOP, IERROR\fP + +.fi +.SH Fortran 2008 Syntax +.nf +USE mpi_f08 +MPI_Op_commutative(\fIop\fP, \fIcommute\fP, \fIierror\fP) + TYPE(MPI_Op), INTENT(IN) :: \fIop\fP + INTEGER, INTENT(OUT) :: \fIcommute\fP + INTEGER, OPTIONAL, INTENT(OUT) :: \fIierror\fP + +.fi +.SH C++ Syntax +.nf +#include +bool Op::Is_commutative() + +.fi +.SH INPUT PARAMETER +.TP 1i +op +Operation (handle). + +.SH OUTPUT PARAMETERS +.ft R +.TP 1i +commute +True if op is commutative, false otherwise (logical). +.ft R +.TP 1i +IERROR +Fortran only: Error status (integer). + +.SH DESCRIPTION +.ft R +Reduction operations can be queried for their commutativity. + +.SH ERRORS +Almost all MPI routines return an error value; C routines as the value of the function and Fortran routines in the last argument. C++ functions do not return errors. If the default error handler is set to MPI::ERRORS_THROW_EXCEPTIONS, then on error the C++ exception mechanism will be used to throw an MPI::Exception object. +.sp +Before the error value is returned, the current MPI error handler is +called. By default, this error handler aborts the MPI job, except for I/O function errors. The error handler may be changed with MPI_Comm_set_errhandler; the predefined error handler MPI_ERRORS_RETURN may be used to cause error values to be returned. Note that MPI does not guarantee that an MPI program can continue past an error. + +.SH SEE ALSO +.sp +MPI_Op_create + diff --git a/ompi/mpi/man/man3/MPI_Op_create.3in b/ompi/mpi/man/man3/MPI_Op_create.3in index 0bc85b22d68..9c6aa61bb54 100644 --- a/ompi/mpi/man/man3/MPI_Op_create.3in +++ b/ompi/mpi/man/man3/MPI_Op_create.3in @@ -48,29 +48,29 @@ Operation (handle). .ft R .TP 1i IERROR -Fortran only: Error status (integer). +Fortran only: Error status (integer). .SH DESCRIPTION .ft R MPI_Op_create binds a user-defined global operation to an op handle that can subsequently be used in MPI_Reduce, MPI_Allreduce, MPI_Reduce_scatter, and MPI_Scan. The user-defined operation is assumed to be associative. If commute = true, then the operation should be both commutative and associative. If commute = false, then the order of operands is fixed and is defined to be in ascending, process rank order, beginning with process zero. The order of evaluation can be changed, taking advantage of the associativity of the operation. If commute = true then the order of evaluation can be changed, taking advantage of commutativity and associativity. .sp -\fIfunction\fP is the user-defined function, which must have the following four arguments: invec, inoutvec, len, and datatype. +\fIfunction\fP is the user-defined function, which must have the following four arguments: invec, inoutvec, len, and datatype. .sp The ANSI-C prototype for the function is the following: .sp .nf typedef void MPI_User_function(void *invec, void *inoutvec, - int *len, + int *len, MPI_Datatype *datatype); .fi .sp The Fortran declaration of the user-defined function appears below. .sp .nf - FUNCTION USER_FUNCTION( INVEC(*), INOUTVEC(*), LEN, TYPE) - INVEC(LEN), INOUTVEC(LEN) - INTEGER LEN, TYPE + FUNCTION USER_FUNCTION( INVEC(*), INOUTVEC(*), LEN, TYPE) + INVEC(LEN), INOUTVEC(LEN) + INTEGER LEN, TYPE .fi .sp The datatype argument is a handle to the data type that was passed into the @@ -84,67 +84,67 @@ pointwise evaluation of the reduce operator on len elements: i.e, the function returns in inoutvec[i] the value invec[i] o inoutvec[i], for i = 0\,...,\ count-1, where o is the combining operation computed by the function. .sp -By internally comparing the value of the datatype argument to known, global handles, it is possible to overload the use of a single user-defined function for several different data types. +By internally comparing the value of the datatype argument to known, global handles, it is possible to overload the use of a single user-defined function for several different data types. .sp -General datatypes may be passed to the user function. However, use of datatypes that are not contiguous is likely to lead to inefficiencies. +General datatypes may be passed to the user function. However, use of datatypes that are not contiguous is likely to lead to inefficiencies. .sp No MPI communication function may be called inside the user function. -MPI_Abort may be called inside the function in case of an error. +MPI_Abort may be called inside the function in case of an error. .SH NOTES Suppose one defines a library of user-defined reduce -functions that are overloaded: The datatype argument is used to select the right execution path at each invocation, according to the types of the operands. The user-defined reduce function cannot "decode" the datatype argument that it is passed, and cannot identify, by itself, the correspondence between the datatype handles and the datatype they represent. This correspondence was established when the datatypes were created. Before the library is used, a library initialization preamble must be executed. This preamble code will define the datatypes that are used by the library and store handles to these datatypes in global, static variables that are shared by the user code and the library code. +functions that are overloaded: The datatype argument is used to select the right execution path at each invocation, according to the types of the operands. The user-defined reduce function cannot "decode" the datatype argument that it is passed, and cannot identify, by itself, the correspondence between the datatype handles and the datatype they represent. This correspondence was established when the datatypes were created. Before the library is used, a library initialization preamble must be executed. This preamble code will define the datatypes that are used by the library and store handles to these datatypes in global, static variables that are shared by the user code and the library code. -\fBExample:\fP Example of user-defined reduce: +\fBExample:\fP Example of user-defined reduce: .sp Compute the product of an array of complex numbers, in C. .sp .nf - typedef struct { - double real,imag; - } Complex; - - /* the user-defined function - */ - void myProd( Complex *in, Complex *inout, int *len, - MPI_Datatype *dptr ) - { - int i; - Complex c; - - for (i=0; i< *len; ++i) { - c.real = inout->real*in->real - - inout->imag*in->imag; - c.imag = inout->real*in->imag + - inout->imag*in->real; - *inout = c; - in++; inout++; - } - } - - /* and, to call it\&... - */ + typedef struct { + double real,imag; + } Complex; + + /* the user-defined function + */ + void myProd( Complex *in, Complex *inout, int *len, + MPI_Datatype *dptr ) + { + int i; + Complex c; + + for (i=0; i< *len; ++i) { + c.real = inout->real*in->real - + inout->imag*in->imag; + c.imag = inout->real*in->imag + + inout->imag*in->real; + *inout = c; + in++; inout++; + } + } + + /* and, to call it\&... + */ \&... - /* each process has an array of 100 Complexes - */ - Complex a[100], answer[100]; - MPI_Op myOp; - MPI_Datatype ctype; - - /* explain to MPI how type Complex is defined - */ - MPI_Type_contiguous( 2, MPI_DOUBLE, &ctype ); - MPI_Type_commit( &ctype ); - /* create the complex-product user-op - */ - MPI_Op_create( myProd, True, &myOp ); - - MPI_Reduce( a, answer, 100, ctype, myOp, root, comm ); - - /* At this point, the answer, which consists of 100 Complexes, - * resides on process root - */ + /* each process has an array of 100 Complexes + */ + Complex a[100], answer[100]; + MPI_Op myOp; + MPI_Datatype ctype; + + /* explain to MPI how type Complex is defined + */ + MPI_Type_contiguous( 2, MPI_DOUBLE, &ctype ); + MPI_Type_commit( &ctype ); + /* create the complex-product user-op + */ + MPI_Op_create( myProd, True, &myOp ); + + MPI_Reduce( a, answer, 100, ctype, myOp, root, comm ); + + /* At this point, the answer, which consists of 100 Complexes, + * resides on process root + */ .fi .sp The Fortran version of MPI_Reduce will invoke a user-defined reduce function using the Fortran calling conventions and will pass a Fortran-type datatype argument; the C version will use C calling convention and the C representation of a datatype handle. Users who plan to mix languages should define their reduction functions accordingly. @@ -154,11 +154,11 @@ The Fortran version of MPI_Reduce will invoke a user-defined reduce function usi The reduction functions ( .I MPI_Op ) do not return an error value. As a result, -if the functions detect an error, all they can do is either call +if the functions detect an error, all they can do is either call .I MPI_Abort or silently skip the problem. Thus, if you change the error handler from .I MPI_ERRORS_ARE_FATAL -to something else, for example, +to something else, for example, .I MPI_ERRORS_RETURN , then no error may be indicated. @@ -170,7 +170,7 @@ all collective routines return the same error value. Almost all MPI routines return an error value; C routines as the value of the function and Fortran routines in the last argument. C++ functions do not return errors. If the default error handler is set to MPI::ERRORS_THROW_EXCEPTIONS, then on error the C++ exception mechanism will be used to throw an MPI::Exception object. .sp Before the error value is returned, the current MPI error handler is -called. By default, this error handler aborts the MPI job, except for I/O function errors. The error handler may be changed with MPI_Comm_set_errhandler; the predefined error handler MPI_ERRORS_RETURN may be used to cause error values to be returned. Note that MPI does not guarantee that an MPI program can continue past an error. +called. By default, this error handler aborts the MPI job, except for I/O function errors. The error handler may be changed with MPI_Comm_set_errhandler; the predefined error handler MPI_ERRORS_RETURN may be used to cause error values to be returned. Note that MPI does not guarantee that an MPI program can continue past an error. .SH SEE ALSO .ft R diff --git a/ompi/mpi/man/man3/MPI_Op_free.3in b/ompi/mpi/man/man3/MPI_Op_free.3in index 332a62fd70f..4573682d2f5 100644 --- a/ompi/mpi/man/man3/MPI_Op_free.3in +++ b/ompi/mpi/man/man3/MPI_Op_free.3in @@ -19,7 +19,7 @@ int MPI_Op_free(MPI_Op *\fIop\fP) .nf INCLUDE 'mpif.h' MPI_OP_FREE(\fIOP, IERROR\fP) - INTEGER \fIOP, IERROR\fP + INTEGER \fIOP, IERROR\fP .fi .SH C++ Syntax @@ -37,17 +37,17 @@ Operation (handle). .ft R .TP 1i IERROR -Fortran only: Error status (integer). +Fortran only: Error status (integer). .SH DESCRIPTION .ft R -Marks a user-defined reduction operation for deallocation and sets \fIop\fP to MPI_OP_NULL. +Marks a user-defined reduction operation for deallocation and sets \fIop\fP to MPI_OP_NULL. .SH ERRORS Almost all MPI routines return an error value; C routines as the value of the function and Fortran routines in the last argument. C++ functions do not return errors. If the default error handler is set to MPI::ERRORS_THROW_EXCEPTIONS, then on error the C++ exception mechanism will be used to throw an MPI::Exception object. .sp Before the error value is returned, the current MPI error handler is -called. By default, this error handler aborts the MPI job, except for I/O function errors. The error handler may be changed with MPI_Comm_set_errhandler; the predefined error handler MPI_ERRORS_RETURN may be used to cause error values to be returned. Note that MPI does not guarantee that an MPI program can continue past an error. +called. By default, this error handler aborts the MPI job, except for I/O function errors. The error handler may be changed with MPI_Comm_set_errhandler; the predefined error handler MPI_ERRORS_RETURN may be used to cause error values to be returned. Note that MPI does not guarantee that an MPI program can continue past an error. .SH SEE ALSO .sp diff --git a/ompi/mpi/man/man3/MPI_Open_port.3in b/ompi/mpi/man/man3/MPI_Open_port.3in index 744bebc868e..be8040b17cc 100644 --- a/ompi/mpi/man/man3/MPI_Open_port.3in +++ b/ompi/mpi/man/man3/MPI_Open_port.3in @@ -20,7 +20,7 @@ int MPI_Open_port(MPI_Info \fIinfo\fP, char *\fIport_name\fP) INCLUDE 'mpif.h' MPI_OPEN_PORT(\fIINFO, PORT_NAME, IERROR\fP) CHARACTER*(*) \fIPORT_NAME\fP - INTEGER \fIINFO, IERROR\fP + INTEGER \fIINFO, IERROR\fP .fi .SH C++ Syntax @@ -42,22 +42,22 @@ port_name Newly established port (string). .TP 1i IERROR -Fortran only: Error status (integer). +Fortran only: Error status (integer). .SH DESCRIPTION .ft R -MPI_Open_port establishes a network address, encoded in the \fIport_name\fP string, at which the server will be able to accept connections from clients. \fIport_name\fP is supplied by the system. +MPI_Open_port establishes a network address, encoded in the \fIport_name\fP string, at which the server will be able to accept connections from clients. \fIport_name\fP is supplied by the system. .sp MPI copies a system-supplied port name into \fIport_name\fP. \fIport_name\fP identifies the newly opened port and can be used by a client to contact the server. The maximum size string that may be supplied by the system is MPI_MAX_PORT_NAME. .SH SUPPORTED INFO KEYS -None. +None. .SH ERRORS Almost all MPI routines return an error value; C routines as the value of the function and Fortran routines in the last argument. C++ functions do not return errors. If the default error handler is set to MPI::ERRORS_THROW_EXCEPTIONS, then on error the C++ exception mechanism will be used to throw an MPI::Exception object. .sp Before the error value is returned, the current MPI error handler is -called. By default, this error handler aborts the MPI job, except for I/O function errors. The error handler may be changed with MPI_Comm_set_errhandler; the predefined error handler MPI_ERRORS_RETURN may be used to cause error values to be returned. Note that MPI does not guarantee that an MPI program can continue past an error. +called. By default, this error handler aborts the MPI job, except for I/O function errors. The error handler may be changed with MPI_Comm_set_errhandler; the predefined error handler MPI_ERRORS_RETURN may be used to cause error values to be returned. Note that MPI does not guarantee that an MPI program can continue past an error. .SH SEE ALSO MPI_Comm_accept diff --git a/ompi/mpi/man/man3/MPI_Pack.3in b/ompi/mpi/man/man3/MPI_Pack.3in index 1ebb54d3a05..25eb7ab345e 100644 --- a/ompi/mpi/man/man3/MPI_Pack.3in +++ b/ompi/mpi/man/man3/MPI_Pack.3in @@ -23,14 +23,14 @@ INCLUDE 'mpif.h' MPI_PACK(\fIINBUF, INCOUNT, DATATYPE, OUTBUF,OUTSIZE, POSITION, COMM, IERROR\fP) \fIINBUF(*), OUTBUF(*)\fP - INTEGER \fIINCOUNT, DATATYPE, OUTSIZE, POSITION, COMM, IERROR\fP + INTEGER \fIINCOUNT, DATATYPE, OUTSIZE, POSITION, COMM, IERROR\fP .fi .SH C++ Syntax .nf #include -void Datatype::Pack(const void* \fIinbuf\fP, int \fIincount\fP, void *\fIoutbuf\fP, - int \fIoutsize\fP, int& \fIposition\fP, const Comm &\fIcomm\fP) const +void Datatype::Pack(const void* \fIinbuf\fP, int \fIincount\fP, void *\fIoutbuf\fP, + int \fIoutsize\fP, int& \fIposition\fP, const Comm &\fIcomm\fP) const .fi .SH INPUT PARAMETERS @@ -65,7 +65,7 @@ Output buffer start (choice). .ft R .TP 1i IERROR -Fortran only: Error status (integer). +Fortran only: Error status (integer). .SH DESCRIPTION .ft R @@ -76,24 +76,24 @@ The input value of \fIposition\fP is the first location in the output buffer to \fBExample:\fP An example using MPI_Pack: .sp .nf - int position, i, j, a[2]; - char buff[1000]; - - \&.... - - MPI_Comm_rank(MPI_COMM_WORLD, &myrank); - if (myrank == 0) - { - / * SENDER CODE */ - - position = 0; - MPI_Pack(&i, 1, MPI_INT, buff, 1000, &position, MPI_COMM_WORLD); - MPI_Pack(&j, 1, MPI_INT, buff, 1000, &position, MPI_COMM_WORLD); - MPI_Send( buff, position, MPI_PACKED, 1, 0, MPI_COMM_WORLD); - } - else /* RECEIVER CODE */ - MPI_Recv( a, 2, MPI_INT, 0, 0, MPI_COMM_WORLD) - + int position, i, j, a[2]; + char buff[1000]; + + \&.... + + MPI_Comm_rank(MPI_COMM_WORLD, &myrank); + if (myrank == 0) + { + / * SENDER CODE */ + + position = 0; + MPI_Pack(&i, 1, MPI_INT, buff, 1000, &position, MPI_COMM_WORLD); + MPI_Pack(&j, 1, MPI_INT, buff, 1000, &position, MPI_COMM_WORLD); + MPI_Send( buff, position, MPI_PACKED, 1, 0, MPI_COMM_WORLD); + } + else /* RECEIVER CODE */ + MPI_Recv( a, 2, MPI_INT, 0, 0, MPI_COMM_WORLD) + } .fi @@ -101,7 +101,7 @@ The input value of \fIposition\fP is the first location in the output buffer to Almost all MPI routines return an error value; C routines as the value of the function and Fortran routines in the last argument. C++ functions do not return errors. If the default error handler is set to MPI::ERRORS_THROW_EXCEPTIONS, then on error the C++ exception mechanism will be used to throw an MPI::Exception object. .sp Before the error value is returned, the current MPI error handler is -called. By default, this error handler aborts the MPI job, except for I/O function errors. The error handler may be changed with MPI_Comm_set_errhandler; the predefined error handler MPI_ERRORS_RETURN may be used to cause error values to be returned. Note that MPI does not guarantee that an MPI program can continue past an error. +called. By default, this error handler aborts the MPI job, except for I/O function errors. The error handler may be changed with MPI_Comm_set_errhandler; the predefined error handler MPI_ERRORS_RETURN may be used to cause error values to be returned. Note that MPI does not guarantee that an MPI program can continue past an error. .SH SEE ALSO .ft R diff --git a/ompi/mpi/man/man3/MPI_Pack_external.3in b/ompi/mpi/man/man3/MPI_Pack_external.3in index b468811ab65..818a7bd7621 100644 --- a/ompi/mpi/man/man3/MPI_Pack_external.3in +++ b/ompi/mpi/man/man3/MPI_Pack_external.3in @@ -17,7 +17,7 @@ #include int MPI_Pack_external(const char *\fIdatarep\fP, const void *\fIinbuf\fP, int \fIincount\fP, MPI_Datatype\fI datatype\fP, - void *\fIoutbuf\fP, MPI_Aint \fIoutsize\fP, + void *\fIoutbuf\fP, MPI_Aint \fIoutsize\fP, MPI_Aint *\fIposition\fP) .fi @@ -28,7 +28,7 @@ MPI_PACK_EXTERNAL(\fIDATAREP, INBUF, INCOUNT, DATATYPE, OUTBUF, OUTSIZE, POSITION, IERROR\fP) INTEGER \fIINCOUNT, DATATYPE, IERROR\fP - INTEGER (KIND=MPI_ADDRESS_KIND) \fIOUTSIZE, POSITION\fP + INTEGER(KIND=MPI_ADDRESS_KIND) \fIOUTSIZE, POSITION\fP CHARACTER*(*) \fIDATAREP\fP \fIINBUF(*), OUTBUF(*)\fP @@ -38,7 +38,7 @@ MPI_PACK_EXTERNAL(\fIDATAREP, INBUF, INCOUNT, DATATYPE, #include void MPI::Datatype::Pack_external(const char* \fIdatarep\fP, const void* \fIinbuf\fP, int \fIincount\fP, - void* \fIoutbuf\fP, MPI::Aint \fIoutsize\fP, + void* \fIoutbuf\fP, MPI::Aint \fIoutsize\fP, MPI::Aint& \fIposition\fP) const .fi @@ -74,7 +74,7 @@ outbuf Output buffer start (choice). .TP 1i IERROR -Fortran only: Error status (integer). +Fortran only: Error status (integer). .SH DESCRIPTION .ft R @@ -100,30 +100,30 @@ to MPI_Pack_external. \fBExample:\fP An example using MPI_Pack_external: .sp .nf - int position, i; - double msg[5]; + int position, i; + double msg[5]; char buf[1000]; \&... - MPI_Comm_rank(MPI_COMM_WORLD, &myrank); - if (myrank == 0) { /* SENDER CODE */ + MPI_Comm_rank(MPI_COMM_WORLD, &myrank); + if (myrank == 0) { /* SENDER CODE */ position = 0; i = 5; /* number of doubles in msg[] */ MPI_Pack_external("external32", &i, 1, MPI_INT, - buf, 1000, &position); + buf, 1000, &position); MPI_Pack_external("external32", &msg, i, MPI_DOUBLE, - buf, 1000, &position); + buf, 1000, &position); MPI_Send(buf, position, MPI_BYTE, 1, 0, - MPI_COMM_WORLD); - } else { /* RECEIVER CODE */ + MPI_COMM_WORLD); + } else { /* RECEIVER CODE */ MPI_Recv(buf, 1, MPI_BYTE, 0, 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE); - MPI_Unpack_external("external32", buf, 1000, + MPI_Unpack_external("external32", buf, 1000, MPI_INT, &i, 1, &position); - MPI_Unpack_external("external32", buf, 1000, + MPI_Unpack_external("external32", buf, 1000, MPI_DOUBLE, &msg, i, &position); - } + } .fi .SH NOTES @@ -186,7 +186,7 @@ called. By default, this error handler aborts the MPI job, except for I/O function errors. The error handler may be changed with MPI_Comm_set_errhandler; the predefined error handler MPI_ERRORS_RETURN may be used to cause error values to be returned. Note that MPI does not -guarantee that an MPI program can continue past an error. +guarantee that an MPI program can continue past an error. .sp See the MPI man page for a full list of MPI error codes. diff --git a/ompi/mpi/man/man3/MPI_Pack_external_size.3in b/ompi/mpi/man/man3/MPI_Pack_external_size.3in index 42cba5de194..edc1e34389f 100644 --- a/ompi/mpi/man/man3/MPI_Pack_external_size.3in +++ b/ompi/mpi/man/man3/MPI_Pack_external_size.3in @@ -15,7 +15,7 @@ to write to a portable format .SH C Syntax .nf #include -int MPI_Pack_external_size(char *\fIdatarep\fP, int \fIincount\fP, +int MPI_Pack_external_size(char *\fIdatarep\fP, int \fIincount\fP, MPI_Datatype \fIdatatype\fP, MPI_Aint *\fIsize\fP) .fi @@ -24,15 +24,15 @@ int MPI_Pack_external_size(char *\fIdatarep\fP, int \fIincount\fP, INCLUDE 'mpif.h' MPI_PACK_EXTERNAL_SIZE(\fIDATAREP, INCOUNT, DATATYPE, SIZE, IERROR\fP) - INTEGER \fIINCOUNT, DATATYPE, IERROR\fP - INTEGER (KIND=MPI_ADDRESS_KIND) \fISIZE\fP + INTEGER \fIINCOUNT, DATATYPE, IERROR\fP + INTEGER(KIND=MPI_ADDRESS_KIND) \fISIZE\fP CHARACTER*(*) \fIDATAREP\fP .fi .SH C++ Syntax .nf #include -MPI::Aint MPI::Datatype::Pack_external_size(const char* \fIdatarep\fP, +MPI::Aint MPI::Datatype::Pack_external_size(const char* \fIdatarep\fP, int \fIincount\fP) const .fi @@ -55,7 +55,7 @@ size Upper bound on size of packed message, in bytes (integer). .TP 1i IERROR -Fortran only: Error status (integer). +Fortran only: Error status (integer). .SH DESCRIPTION .ft R @@ -90,7 +90,7 @@ called. By default, this error handler aborts the MPI job, except for I/O function errors. The error handler may be changed with MPI_Comm_set_errhandler; the predefined error handler MPI_ERRORS_RETURN may be used to cause error values to be returned. Note that MPI does not -guarantee that an MPI program can continue past an error. +guarantee that an MPI program can continue past an error. .sp See the MPI man page for a full list of MPI error codes. diff --git a/ompi/mpi/man/man3/MPI_Pack_size.3in b/ompi/mpi/man/man3/MPI_Pack_size.3in index 8fa9233e396..8a38b4bccfb 100644 --- a/ompi/mpi/man/man3/MPI_Pack_size.3in +++ b/ompi/mpi/man/man3/MPI_Pack_size.3in @@ -20,7 +20,7 @@ int MPI_Pack_size(int \fIincount\fP, MPI_Datatype\fI datatype\fP, MPI_Comm\fI co .nf INCLUDE 'mpif.h' MPI_PACK_SIZE(\fIINCOUNT, DATATYPE, COMM, SIZE, IERROR\fP) - INTEGER \fIINCOUNT, DATATYPE, COMM, SIZE, IERROR\fP + INTEGER \fIINCOUNT, DATATYPE, COMM, SIZE, IERROR\fP .fi .SH C++ Syntax @@ -49,20 +49,20 @@ Upper bound on size of packed message, in bytes (integer). .ft R .TP 1i IERROR -Fortran only: Error status (integer). +Fortran only: Error status (integer). .SH DESCRIPTION .ft R MPI_Pack_size allows the application to find out how much space is needed to pack a message. A call to MPI_Pack_size(incount, datatype, comm, size) returns in size an -upper bound on the increment in position that would occur in a call to MPI_Pack, with the same values for \fIincount\fP, \fIdatatype\fP, and \fIcomm\fP. +upper bound on the increment in position that would occur in a call to MPI_Pack, with the same values for \fIincount\fP, \fIdatatype\fP, and \fIcomm\fP. .sp -\fBRationale:\fP The call returns an upper bound, rather than an exact bound, since the exact amount of space needed to pack the message may depend on the context (e.g., first message packed in a packing unit may take more space). +\fBRationale:\fP The call returns an upper bound, rather than an exact bound, since the exact amount of space needed to pack the message may depend on the context (e.g., first message packed in a packing unit may take more space). .SH ERRORS Almost all MPI routines return an error value; C routines as the value of the function and Fortran routines in the last argument. C++ functions do not return errors. If the default error handler is set to MPI::ERRORS_THROW_EXCEPTIONS, then on error the C++ exception mechanism will be used to throw an MPI::Exception object. .sp Before the error value is returned, the current MPI error handler is -called. By default, this error handler aborts the MPI job, except for I/O function errors. The error handler may be changed with MPI_Comm_set_errhandler; the predefined error handler MPI_ERRORS_RETURN may be used to cause error values to be returned. Note that MPI does not guarantee that an MPI program can continue past an error. +called. By default, this error handler aborts the MPI job, except for I/O function errors. The error handler may be changed with MPI_Comm_set_errhandler; the predefined error handler MPI_ERRORS_RETURN may be used to cause error values to be returned. Note that MPI does not guarantee that an MPI program can continue past an error. .SH SEE ALSO .ft R diff --git a/ompi/mpi/man/man3/MPI_Pcontrol.3in b/ompi/mpi/man/man3/MPI_Pcontrol.3in index c748e7a2737..2f80b0fcad5 100644 --- a/ompi/mpi/man/man3/MPI_Pcontrol.3in +++ b/ompi/mpi/man/man3/MPI_Pcontrol.3in @@ -35,9 +35,9 @@ Profiling level. .SH DESCRIPTION .ft R -MPI libraries themselves make no use of this routine; they simply return immediately to the user code. However the presence of calls to this routine allows a profiling package to be explicitly called by the user. +MPI libraries themselves make no use of this routine; they simply return immediately to the user code. However the presence of calls to this routine allows a profiling package to be explicitly called by the user. .sp -Since MPI has no control of the implementation of the profiling code, we are unable to specify precisely the semantics that will be provided by calls to MPI_Pcontrol. This vagueness extends to the number of arguments to the function, and their datatypes. +Since MPI has no control of the implementation of the profiling code, we are unable to specify precisely the semantics that will be provided by calls to MPI_Pcontrol. This vagueness extends to the number of arguments to the function, and their datatypes. .sp However to provide some level of portability of user codes to different profiling libraries, we request the following meanings for certain values of level: @@ -53,16 +53,16 @@ level==2 Profile buffers are flushed. (This may be a no-op in some profilers). .TP o -All other values of level have profile library-defined effects and additional arguments. +All other values of level have profile library-defined effects and additional arguments. .LP .sp We also request that the default state after MPI_Init has been called is for profiling to be enabled at the normal default level (i.e., as if MPI_Pcontrol had just been called with the argument 1). This allows users to link with a profiling library and obtain profile output without having to modify their source code at all. .sp -The provision of MPI_Pcontrol as a no-op in the standard MPI library allows users to modify their source code to obtain more detailed profiling information, but still be able to link exactly the same code against the standard MPI library. +The provision of MPI_Pcontrol as a no-op in the standard MPI library allows users to modify their source code to obtain more detailed profiling information, but still be able to link exactly the same code against the standard MPI library. .SH NOTES .ft R -This routine provides a common interface for profiling control. The interpretation of level and any other arguments is left to the profiling library. +This routine provides a common interface for profiling control. The interpretation of level and any other arguments is left to the profiling library. .sp This function does not return an error value. Consequently, the result of calling it before MPI_Init or after MPI_Finalize is undefined. diff --git a/ompi/mpi/man/man3/MPI_Probe.3in b/ompi/mpi/man/man3/MPI_Probe.3in index 85e6f10579f..03a5811689d 100644 --- a/ompi/mpi/man/man3/MPI_Probe.3in +++ b/ompi/mpi/man/man3/MPI_Probe.3in @@ -19,7 +19,7 @@ int MPI_Probe(int \fIsource\fP, int\fI tag\fP, MPI_Comm\fI comm\fP, MPI_Status\f .nf INCLUDE 'mpif.h' MPI_PROBE(\fISOURCE, TAG, COMM, STATUS, IERROR\fP) - INTEGER \fISOURCE, TAG, COMM, STATUS(MPI_STATUS_SIZE), IERROR\fP + INTEGER \fISOURCE, TAG, COMM, STATUS(MPI_STATUS_SIZE), IERROR\fP .fi .SH C++ Syntax @@ -50,71 +50,71 @@ Status object (status). .ft R .TP 1i IERROR -Fortran only: Error status (integer). +Fortran only: Error status (integer). .SH DESCRIPTION .ft R -The MPI_Probe and MPI_Iprobe operations allow checking of incoming messages, without actual receipt of them. The user can then decide how to receive them, based on the information returned by the probe in the status variable. For example, the user may allocate memory for the receive buffer, according to the length of the probed message. +The MPI_Probe and MPI_Iprobe operations allow checking of incoming messages, without actual receipt of them. The user can then decide how to receive them, based on the information returned by the probe in the status variable. For example, the user may allocate memory for the receive buffer, according to the length of the probed message. .sp MPI_Probe behaves like MPI_Iprobe except that it is a blocking call that returns only after a matching message has been found. .sp -If your application does not need to examine the \fIstatus\fP field, you can save resources by using the predefined constant MPI_STATUS_IGNORE as a special value for the \fIstatus\fP argument. +If your application does not need to examine the \fIstatus\fP field, you can save resources by using the predefined constant MPI_STATUS_IGNORE as a special value for the \fIstatus\fP argument. .sp -The semantics of MPI_Probe and MPI_Iprobe guarantee progress: If a call to MPI_Probe has been issued by a process, and a send that matches the probe has been initiated by some process, then the call to MPI_Probe will return, unless the message is received by another concurrent receive operation (that is executed by another thread at the probing process). Similarly, if a process busy waits with MPI_Iprobe and a matching message has been issued, then the call to MPI_Iprobe will eventually return flag = true unless the message is received by another concurrent receive operation. +The semantics of MPI_Probe and MPI_Iprobe guarantee progress: If a call to MPI_Probe has been issued by a process, and a send that matches the probe has been initiated by some process, then the call to MPI_Probe will return, unless the message is received by another concurrent receive operation (that is executed by another thread at the probing process). Similarly, if a process busy waits with MPI_Iprobe and a matching message has been issued, then the call to MPI_Iprobe will eventually return flag = true unless the message is received by another concurrent receive operation. .sp -\fBExample 1:\fP Use blocking probe to wait for an incoming message. +\fBExample 1:\fP Use blocking probe to wait for an incoming message. .sp .nf -CALL MPI_COMM_RANK(comm, rank, ierr) - IF (rank.EQ.0) THEN - CALL MPI_SEND(i, 1, MPI_INTEGER, 2, 0, comm, ierr) - ELSE IF(rank.EQ.1) THEN - CALL MPI_SEND(x, 1, MPI_REAL, 2, 0, comm, ierr) - ELSE ! rank.EQ.2 - DO i=1, 2 - CALL MPI_PROBE(MPI_ANY_SOURCE, 0, - comm, status, ierr) - IF (status(MPI_SOURCE) = 0) THEN -100 CALL MPI_RECV(i, 1, MPI_INTEGER, 0, 0, status, ierr) - ELSE -200 CALL MPI_RECV(x, 1, MPI_REAL, 1, 0, status, ierr) - END IF - END DO - END IF +CALL MPI_COMM_RANK(comm, rank, ierr) + IF (rank.EQ.0) THEN + CALL MPI_SEND(i, 1, MPI_INTEGER, 2, 0, comm, ierr) + ELSE IF(rank.EQ.1) THEN + CALL MPI_SEND(x, 1, MPI_REAL, 2, 0, comm, ierr) + ELSE ! rank.EQ.2 + DO i=1, 2 + CALL MPI_PROBE(MPI_ANY_SOURCE, 0, + comm, status, ierr) + IF (status(MPI_SOURCE) = 0) THEN +100 CALL MPI_RECV(i, 1, MPI_INTEGER, 0, 0, status, ierr) + ELSE +200 CALL MPI_RECV(x, 1, MPI_REAL, 1, 0, status, ierr) + END IF + END DO + END IF .fi .sp Each message is received with the right type. .sp -\fBExample 2:\fP A program similar to the previous example, but with a problem. +\fBExample 2:\fP A program similar to the previous example, but with a problem. .sp .nf -CALL MPI_COMM_RANK(comm, rank, ierr) - IF (rank.EQ.0) THEN - CALL MPI_SEND(i, 1, MPI_INTEGER, 2, 0, comm, ierr) - ELSE IF(rank.EQ.1) THEN - CALL MPI_SEND(x, 1, MPI_REAL, 2, 0, comm, ierr) - ELSE - DO i=1, 2 - CALL MPI_PROBE(MPI_ANY_SOURCE, 0, - comm, status, ierr) - IF (status(MPI_SOURCE) = 0) THEN -100 CALL MPI_RECV(i, 1, MPI_INTEGER, MPI_ANY_SOURCE, - 0, status, ierr) - ELSE -200 CALL MPI_RECV(x, 1, MPI_REAL, MPI_ANY_SOURCE, - 0, status, ierr) - END IF - END DO +CALL MPI_COMM_RANK(comm, rank, ierr) + IF (rank.EQ.0) THEN + CALL MPI_SEND(i, 1, MPI_INTEGER, 2, 0, comm, ierr) + ELSE IF(rank.EQ.1) THEN + CALL MPI_SEND(x, 1, MPI_REAL, 2, 0, comm, ierr) + ELSE + DO i=1, 2 + CALL MPI_PROBE(MPI_ANY_SOURCE, 0, + comm, status, ierr) + IF (status(MPI_SOURCE) = 0) THEN +100 CALL MPI_RECV(i, 1, MPI_INTEGER, MPI_ANY_SOURCE, + 0, status, ierr) + ELSE +200 CALL MPI_RECV(x, 1, MPI_REAL, MPI_ANY_SOURCE, + 0, status, ierr) + END IF + END DO END IF .fi .sp -We slightly modified Example 2, using MPI_ANY_SOURCE as the source argument in the two receive calls in statements labeled 100 and 200. The program is now incorrect: The receive operation may receive a message that is distinct from the message probed by the preceding call to MPI_Probe. +We slightly modified Example 2, using MPI_ANY_SOURCE as the source argument in the two receive calls in statements labeled 100 and 200. The program is now incorrect: The receive operation may receive a message that is distinct from the message probed by the preceding call to MPI_Probe. .SH ERRORS Almost all MPI routines return an error value; C routines as the value of the function and Fortran routines in the last argument. C++ functions do not return errors. If the default error handler is set to MPI::ERRORS_THROW_EXCEPTIONS, then on error the C++ exception mechanism will be used to throw an MPI::Exception object. .sp Before the error value is returned, the current MPI error handler is -called. By default, this error handler aborts the MPI job, except for I/O function errors. The error handler may be changed with MPI_Comm_set_errhandler; the predefined error handler MPI_ERRORS_RETURN may be used to cause error values to be returned. Note that MPI does not guarantee that an MPI program can continue past an error. +called. By default, this error handler aborts the MPI job, except for I/O function errors. The error handler may be changed with MPI_Comm_set_errhandler; the predefined error handler MPI_ERRORS_RETURN may be used to cause error values to be returned. Note that MPI does not guarantee that an MPI program can continue past an error. .SH SEE ALSO .ft R diff --git a/ompi/mpi/man/man3/MPI_Publish_name.3in b/ompi/mpi/man/man3/MPI_Publish_name.3in index 53adc8d4d06..bdc2609453f 100644 --- a/ompi/mpi/man/man3/MPI_Publish_name.3in +++ b/ompi/mpi/man/man3/MPI_Publish_name.3in @@ -81,7 +81,7 @@ ompi_unique bool If set to true, return an error if the value. .fi -.sp +.sp \fIbool\fP info keys are actually strings but are evaluated as follows: if the string value is a number, it is converted to an integer and cast to a boolean (meaning that zero integers are false @@ -138,7 +138,7 @@ local scope, processes in job3 cannot access that data. In contrast, if the data had been published using global scope, then any process in job3 could access the data, provided that mpirun was given knowledge of how to contact the central server and the process could establish communication -with it. +with it. .SH ERRORS .ft R @@ -153,7 +153,7 @@ called. By default, this error handler aborts the MPI job, except for I/O function errors. The error handler may be changed with MPI_Comm_set_errhandler; the predefined error handler MPI_ERRORS_RETURN may be used to cause error values to be returned. Note that MPI does not -guarantee that an MPI program can continue past an error. +guarantee that an MPI program can continue past an error. .sp See the MPI man page for a full list of MPI error codes. diff --git a/ompi/mpi/man/man3/MPI_Put.3in b/ompi/mpi/man/man3/MPI_Put.3in index 2ca7b00dd9b..be9fdf90ace 100644 --- a/ompi/mpi/man/man3/MPI_Put.3in +++ b/ompi/mpi/man/man3/MPI_Put.3in @@ -14,7 +14,7 @@ .nf #include MPI_Put(const void *\fIorigin_addr\fP, int \fIorigin_count\fP, MPI_Datatype - \fIorigin_datatype\fP, int \fItarget_rank\fP, MPI_Aint \fItarget_disp\fP, + \fIorigin_datatype\fP, int \fItarget_rank\fP, MPI_Aint \fItarget_disp\fP, int \fItarget_count\fP, MPI_Datatype \fItarget_datatype\fP, MPI_Win \fIwin\fP) MPI_Rput(const void *\fIorigin_addr\fP, int \fIorigin_count\fP, MPI_Datatype @@ -26,11 +26,11 @@ MPI_Rput(const void *\fIorigin_addr\fP, int \fIorigin_count\fP, MPI_Datatype .SH Fortran Syntax (see FORTRAN 77 NOTES) .nf INCLUDE 'mpif.h' -MPI_PUT(\fIORIGIN_ADDR, ORIGIN_COUNT, ORIGIN_DATATYPE, TARGET_RANK, +MPI_PUT(\fIORIGIN_ADDR, ORIGIN_COUNT, ORIGIN_DATATYPE, TARGET_RANK, TARGET_DISP, TARGET_COUNT, TARGET_DATATYPE, WIN, IERROR\fP) \fIORIGIN_ADDR\fP(*) INTEGER(KIND=MPI_ADDRESS_KIND) \fITARGET_DISP\fP - INTEGER \fIORIGIN_COUNT, ORIGIN_DATATYPE, TARGET_RANK, TARGET_COUNT, + INTEGER \fIORIGIN_COUNT, ORIGIN_DATATYPE, TARGET_RANK, TARGET_COUNT, TARGET_DATATYPE, WIN, IERROR\fP MPI_RPUT(\fIORIGIN_ADDR, ORIGIN_COUNT, ORIGIN_DATATYPE, TARGET_RANK, @@ -44,9 +44,9 @@ MPI_RPUT(\fIORIGIN_ADDR, ORIGIN_COUNT, ORIGIN_DATATYPE, TARGET_RANK, .SH C++ Syntax .nf #include -void MPI::Win::Put(const void* \fIorigin_addr\fP, int \fIorigin_count\fP, const - MPI::Datatype& \fIorigin_datatype\fP, int \fItarget_rank\fP, MPI::Aint - \fItarget_disp\fP, int \fItarget_count\fP, const MPI::Datatype& +void MPI::Win::Put(const void* \fIorigin_addr\fP, int \fIorigin_count\fP, const + MPI::Datatype& \fIorigin_datatype\fP, int \fItarget_rank\fP, MPI::Aint + \fItarget_disp\fP, int \fItarget_count\fP, const MPI::Datatype& \fItarget_datatype\fP) const .fi @@ -84,7 +84,7 @@ request MPI_Rput: RMA request .TP 1i IERROR -Fortran only: Error status (integer). +Fortran only: Error status (integer). .SH DESCRIPTION .ft R @@ -98,7 +98,7 @@ The communication must satisfy the same constraints as for a similar message-pas .sp The \fItarget_datatype\fP argument is a handle to a datatype object defined at the origin process. However, this object is interpreted at the target process: The outcome is as if the target datatype object were defined at the target process, by the same sequence of calls used to define it at the origin process. The target data type must contain only relative displacements, not absolute addresses. The same holds for get and accumulate. .sp -\fBMPI_Rput\bP is similar to \fBMPI_Put\fI, except that it allocates a communication request object and associates it with the request handle (the argument \fIrequest\fP). The completion of an MPI_Rput operation (i.e., after the corresponding test or wait) indicates that the sender is now free to update the locations in the \fIorigin_addr\fP buffer. It does not indicate that the data is available at the target window. If remote completion is required, \fBMPI_Win_flush\fP, \fBMPI_Win_flush_all\fP, \fBMPI_Win_unlock\fP, or \fBMPI_Win_unlock_all\fP can be used. +\fBMPI_Rput\fP is similar to \fBMPI_Put\fP, except that it allocates a communication request object and associates it with the request handle (the argument \fIrequest\fP). The completion of an MPI_Rput operation (i.e., after the corresponding test or wait) indicates that the sender is now free to update the locations in the \fIorigin_addr\fP buffer. It does not indicate that the data is available at the target window. If remote completion is required, \fBMPI_Win_flush\fP, \fBMPI_Win_flush_all\fP, \fBMPI_Win_unlock\fP, or \fBMPI_Win_unlock_all\fP can be used. .SH NOTES The \fItarget_datatype\fP argument is a handle to a datatype object that is defined at the origin process, even though it defines a data layout in the target process memory. This does not cause problems in a homogeneous or heterogeneous environment, as long as only portable data types are used (portable data types are defined in Section 2.4 of the MPI-2 Standard). @@ -122,7 +122,7 @@ and gives the length of the declared integer in bytes. Almost all MPI routines return an error value; C routines as the value of the function and Fortran routines in the last argument. C++ functions do not return errors. If the default error handler is set to MPI::ERRORS_THROW_EXCEPTIONS, then on error the C++ exception mechanism will be used to throw an MPI::Exception object. .sp Before the error value is returned, the current MPI error handler is -called. By default, this error handler aborts the MPI job, except for I/O function errors. The error handler may be changed with MPI_Comm_set_errhandler; the predefined error handler MPI_ERRORS_RETURN may be used to cause error values to be returned. Note that MPI does not guarantee that an MPI program can continue past an error. +called. By default, this error handler aborts the MPI job, except for I/O function errors. The error handler may be changed with MPI_Comm_set_errhandler; the predefined error handler MPI_ERRORS_RETURN may be used to cause error values to be returned. Note that MPI does not guarantee that an MPI program can continue past an error. .SH SEE ALSO .ft R diff --git a/ompi/mpi/man/man3/MPI_Query_thread.3in b/ompi/mpi/man/man3/MPI_Query_thread.3in index 3cbb8d7f05c..952194e208f 100644 --- a/ompi/mpi/man/man3/MPI_Query_thread.3in +++ b/ompi/mpi/man/man3/MPI_Query_thread.3in @@ -34,10 +34,10 @@ int MPI::Query_thread() .ft R .TP 1i provided -C/Fortran only: Level of thread support (integer). +C/Fortran only: Level of thread support (integer). .TP 1i IERROR -Fortran only: Error status (integer). +Fortran only: Error status (integer). .SH DESCRIPTION .ft R @@ -82,7 +82,7 @@ called. By default, this error handler aborts the MPI job, except for I/O function errors. The error handler may be changed with MPI_Comm_set_errhandler; the predefined error handler MPI_ERRORS_RETURN may be used to cause error values to be returned. Note that MPI does not -guarantee that an MPI program can continue past an error. +guarantee that an MPI program can continue past an error. .sp See the MPI man page for a full list of MPI error codes. diff --git a/ompi/mpi/man/man3/MPI_Recv.3in b/ompi/mpi/man/man3/MPI_Recv.3in index 7113ffa2ff4..e6bbfe4d473 100644 --- a/ompi/mpi/man/man3/MPI_Recv.3in +++ b/ompi/mpi/man/man3/MPI_Recv.3in @@ -12,7 +12,7 @@ .SH C Syntax .nf #include -int MPI_Recv(void *\fIbuf\fP, int\fI count\fP, MPI_Datatype\fI datatype\fP, +int MPI_Recv(void *\fIbuf\fP, int\fI count\fP, MPI_Datatype\fI datatype\fP, int\fI source\fP, int\fI tag\fP, MPI_Comm\fI comm\fP, MPI_Status\fI *status\fP) .fi @@ -20,19 +20,19 @@ int MPI_Recv(void *\fIbuf\fP, int\fI count\fP, MPI_Datatype\fI datatype\fP, .nf INCLUDE 'mpif.h' MPI_RECV(\fIBUF, COUNT, DATATYPE, SOURCE, TAG, COMM, STATUS, IERROR\fP) - \fIBUF\fP(*) - INTEGER \fICOUNT, DATATYPE, SOURCE, TAG, COMM\fP + \fIBUF\fP(*) + INTEGER \fICOUNT, DATATYPE, SOURCE, TAG, COMM\fP INTEGER \fISTATUS(MPI_STATUS_SIZE), IERROR\fP .fi .SH C++ Syntax .nf #include -void Comm::Recv(void* \fIbuf\fP, int \fIcount\fP, const Datatype& \fIdatatype\fP, +void Comm::Recv(void* \fIbuf\fP, int \fIcount\fP, const Datatype& \fIdatatype\fP, int \fIsource\fP, int \fItag\fP, Status& \fIstatus\fP) const -void Comm::Recv(void* \fIbuf\fP, int \fIcount\fP, const Datatype& \fIdatatype\fP, - int \fIsource\fP, int \fItag\fP) const +void Comm::Recv(void* \fIbuf\fP, int \fIcount\fP, const Datatype& \fIdatatype\fP, + int \fIsource\fP, int \fItag\fP) const .fi .SH INPUT PARAMETERS @@ -64,7 +64,7 @@ Status object (status). .ft R .TP 1i IERROR -Fortran only: Error status (integer). +Fortran only: Error status (integer). .SH DESCRIPTION .ft R @@ -72,7 +72,7 @@ This basic receive operation, MPI_Recv, is blocking: it returns only after the r .sp The blocking semantics of this call are described in Section 3.4 of the MPI-1 Standard, "Communication Modes." .sp -The receive buffer contains a number (defined by the value of \fIcount\fP) of consecutive elements. The first element in the set of elements is located at \fIaddress_buf\fP. The type of each of these elements is specified by \fIdatatype\fP. +The receive buffer contains a number (defined by the value of \fIcount\fP) of consecutive elements. The first element in the set of elements is located at \fIaddress_buf\fP. The type of each of these elements is specified by \fIdatatype\fP. .sp The length of the received message must be less than or equal to the length of the receive buffer. An MPI_ERR_TRUNCATE is returned upon the overflow condition. .sp @@ -82,31 +82,31 @@ those locations corresponding to the (shorter) received message are modified. .SH NOTES The \fIcount\fP argument indicates the maximum number of entries of type \fIdatatype\fP that can be received in a message. Once a message is received, use the MPI_Get_count function to determine the actual number of entries within that message. .sp -To receive messages of unknown length, use the MPI_Probe function. (For more information about MPI_Probe and MPI_Cancel, see their respective man pages; also, see Section 3.8 of the MPI-1 Standard, "Probe and Cancel.") +To receive messages of unknown length, use the MPI_Probe function. (For more information about MPI_Probe and MPI_Cancel, see their respective man pages; also, see Section 3.8 of the MPI-1 Standard, "Probe and Cancel.") .sp -A message can be received by a receive operation only if it is addressed to the receiving process, and if its source, tag, and communicator (comm) values match the source, tag, and comm values specified by the receive operation. The receive operation may specify a wildcard value for source and/or tag, indicating that any source and/or tag are acceptable. The wildcard value for source is source = MPI_ANY_SOURCE. The wildcard value for tag is tag = MPI_ANY_TAG. There is no wildcard value for comm. The scope of these wildcards is limited to the proceses in the group of the specified communicator. +A message can be received by a receive operation only if it is addressed to the receiving process, and if its source, tag, and communicator (comm) values match the source, tag, and comm values specified by the receive operation. The receive operation may specify a wildcard value for source and/or tag, indicating that any source and/or tag are acceptable. The wildcard value for source is source = MPI_ANY_SOURCE. The wildcard value for tag is tag = MPI_ANY_TAG. There is no wildcard value for comm. The scope of these wildcards is limited to the proceses in the group of the specified communicator. .sp -The message tag is specified by the tag argument of the receive operation. +The message tag is specified by the tag argument of the receive operation. .sp -The argument source, if different from MPI_ANY_SOURCE, is specified as a rank within the process group associated with that same communicator (remote process group, for intercommunicators). Thus, the range of valid values for the source argument is {0,...,n-1} {MPI_ANY_SOURCE}, where n is the number of processes in this group. +The argument source, if different from MPI_ANY_SOURCE, is specified as a rank within the process group associated with that same communicator (remote process group, for intercommunicators). Thus, the range of valid values for the source argument is {0,...,n-1} {MPI_ANY_SOURCE}, where n is the number of processes in this group. .sp -Note the asymmetry between send and receive operations: A receive operation may accept messages from an arbitrary sender; on the other hand, a send operation must specify a unique receiver. This matches a "push" communication mechanism, where data transfer is effected by the sender (rather than a "pull" mechanism, where data transfer is effected by the receiver). +Note the asymmetry between send and receive operations: A receive operation may accept messages from an arbitrary sender; on the other hand, a send operation must specify a unique receiver. This matches a "push" communication mechanism, where data transfer is effected by the sender (rather than a "pull" mechanism, where data transfer is effected by the receiver). .sp -Source = destination is allowed, that is, a process can send a message to itself. However, it is not recommended for a process to send messages to itself using the blocking send and receive operations described above, since this may lead to deadlock. See Section 3.5 of the MPI-1 Standard, "Semantics of Point-to-Point Communication." +Source = destination is allowed, that is, a process can send a message to itself. However, it is not recommended for a process to send messages to itself using the blocking send and receive operations described above, since this may lead to deadlock. See Section 3.5 of the MPI-1 Standard, "Semantics of Point-to-Point Communication." .sp -If your application does not need to examine the \fIstatus\fP field, you can save resources by using the predefined constant MPI_STATUS_IGNORE as a special value for the \fIstatus\fP argument. +If your application does not need to examine the \fIstatus\fP field, you can save resources by using the predefined constant MPI_STATUS_IGNORE as a special value for the \fIstatus\fP argument. .SH ERRORS Almost all MPI routines return an error value; C routines as the value of the function and Fortran routines in the last argument. C++ functions do not return errors. If the default error handler is set to MPI::ERRORS_THROW_EXCEPTIONS, then on error the C++ exception mechanism will be used to throw an MPI::Exception object. .sp Before the error value is returned, the current MPI error handler is -called. By default, this error handler aborts the MPI job, except for I/O function errors. The error handler may be changed with MPI_Comm_set_errhandler; the predefined error handler MPI_ERRORS_RETURN may be used to cause error values to be returned. Note that MPI does not guarantee that an MPI program can continue past an error. +called. By default, this error handler aborts the MPI job, except for I/O function errors. The error handler may be changed with MPI_Comm_set_errhandler; the predefined error handler MPI_ERRORS_RETURN may be used to cause error values to be returned. Note that MPI does not guarantee that an MPI program can continue past an error. .SH SEE ALSO .ft R .nf MPI_Irecv -MPI_Probe +MPI_Probe diff --git a/ompi/mpi/man/man3/MPI_Recv_init.3in b/ompi/mpi/man/man3/MPI_Recv_init.3in index 4f3e7ba6849..e98fdbffd84 100644 --- a/ompi/mpi/man/man3/MPI_Recv_init.3in +++ b/ompi/mpi/man/man3/MPI_Recv_init.3in @@ -22,13 +22,13 @@ INCLUDE 'mpif.h' MPI_RECV_INIT(\fIBUF, COUNT, DATATYPE, SOURCE, TAG, COMM, REQUEST, IERROR\fP) \fIBUF\fP(*) - INTEGER \fICOUNT, DATATYPE, SOURCE, TAG, COMM, REQUEST, IERROR\fP + INTEGER \fICOUNT, DATATYPE, SOURCE, TAG, COMM, REQUEST, IERROR\fP .fi .SH C++ Syntax .nf #include -Prequest Comm::Recv_init(void* \fIbuf\fP, int \fIcount\fP, const +Prequest Comm::Recv_init(void* \fIbuf\fP, int \fIcount\fP, const Datatype& \fIdatatype\fP, int \fIsource\fP, int \fItag\fP) const .fi @@ -63,21 +63,21 @@ Communication request (handle). .ft R .TP 1i IERROR -Fortran only: Error status (integer). +Fortran only: Error status (integer). .SH DESCRIPTION .ft R -Creates a persistent communication request for a receive operation. The argument \fIbuf\fP is marked as OUT because the user gives permission to write on the receive buffer by passing the argument to MPI_Recv_init. +Creates a persistent communication request for a receive operation. The argument \fIbuf\fP is marked as OUT because the user gives permission to write on the receive buffer by passing the argument to MPI_Recv_init. .sp -A persistent communication request is inactive after it is created -- no active communication is attached to the request. +A persistent communication request is inactive after it is created -- no active communication is attached to the request. .sp -A communication (send or receive) that uses a persistent request is initiated by the function MPI_Start or MPI_Startall. +A communication (send or receive) that uses a persistent request is initiated by the function MPI_Start or MPI_Startall. .SH ERRORS Almost all MPI routines return an error value; C routines as the value of the function and Fortran routines in the last argument. C++ functions do not return errors. If the default error handler is set to MPI::ERRORS_THROW_EXCEPTIONS, then on error the C++ exception mechanism will be used to throw an MPI::Exception object. .sp Before the error value is returned, the current MPI error handler is -called. By default, this error handler aborts the MPI job, except for I/O function errors. The error handler may be changed with MPI_Comm_set_errhandler; the predefined error handler MPI_ERRORS_RETURN may be used to cause error values to be returned. Note that MPI does not guarantee that an MPI program can continue past an error. +called. By default, this error handler aborts the MPI job, except for I/O function errors. The error handler may be changed with MPI_Comm_set_errhandler; the predefined error handler MPI_ERRORS_RETURN may be used to cause error values to be returned. Note that MPI does not guarantee that an MPI program can continue past an error. .SH SEE ALSO .ft R diff --git a/ompi/mpi/man/man3/MPI_Reduce.3in b/ompi/mpi/man/man3/MPI_Reduce.3in index e8d9796c6cd..5e0c12fbac8 100644 --- a/ompi/mpi/man/man3/MPI_Reduce.3in +++ b/ompi/mpi/man/man3/MPI_Reduce.3in @@ -28,7 +28,7 @@ INCLUDE 'mpif.h' MPI_REDUCE(\fISENDBUF, RECVBUF, COUNT, DATATYPE, OP, ROOT, COMM, IERROR\fP) \fISENDBUF(*), RECVBUF(*)\fP - INTEGER \fICOUNT, DATATYPE, OP, ROOT, COMM, IERROR\fP + INTEGER \fICOUNT, DATATYPE, OP, ROOT, COMM, IERROR\fP MPI_IREDUCE(\fISENDBUF, RECVBUF, COUNT, DATATYPE, OP, ROOT, COMM, REQUEST, IERROR\fP) @@ -41,7 +41,7 @@ MPI_IREDUCE(\fISENDBUF, RECVBUF, COUNT, DATATYPE, OP, ROOT, COMM, #include void MPI::Intracomm::Reduce(const void* \fIsendbuf\fP, void* \fIrecvbuf\fP, int \fIcount\fP, const MPI::Datatype& \fIdatatype\fP, const MPI::Op& \fIop\fP, - int \fIroot\fP) const + int \fIroot\fP) const .fi .SH INPUT PARAMETERS @@ -76,134 +76,134 @@ Request (handle, non-blocking only). .ft R .TP 1i IERROR -Fortran only: Error status (integer). +Fortran only: Error status (integer). .SH DESCRIPTION .ft R The global reduce functions (MPI_Reduce, MPI_Op_create, MPI_Op_free, MPI_Allreduce, MPI_Reduce_scatter, MPI_Scan) perform a global reduce operation (such as sum, max, logical AND, etc.) across all the members of a group. The reduction operation can be either one of a predefined list of operations, or a user-defined operation. The global reduction functions come in several flavors: a reduce that returns the result of the reduction at one node, an all-reduce that returns this result at all nodes, and a scan (parallel prefix) operation. In addition, a reduce-scatter operation combines the functionality of a reduce and a scatter operation. .sp -MPI_Reduce combines the elements provided in the input buffer of each process in the group, using the operation op, and returns the combined value in the output buffer of the process with rank root. The input buffer is defined by the arguments sendbuf, count, and datatype; the output buffer is defined by the arguments recvbuf, count, and datatype; both have the same number of elements, with the same type. The routine is called by all group members using the same arguments for count, datatype, op, root, and comm. Thus, all processes provide input buffers and output buffers of the same length, with elements of the same type. Each process can provide one element, or a sequence of elements, in which case the combine operation is executed element-wise on each entry of the sequence. For example, if the operation is MPI_MAX and the send buffer contains two elements that are floating-point numbers (count = 2 and datatype = MPI_FLOAT), then recvbuf(1) = global max (sendbuf(1)) and recvbuf(2) = global max(sendbuf(2)). +MPI_Reduce combines the elements provided in the input buffer of each process in the group, using the operation op, and returns the combined value in the output buffer of the process with rank root. The input buffer is defined by the arguments sendbuf, count, and datatype; the output buffer is defined by the arguments recvbuf, count, and datatype; both have the same number of elements, with the same type. The routine is called by all group members using the same arguments for count, datatype, op, root, and comm. Thus, all processes provide input buffers and output buffers of the same length, with elements of the same type. Each process can provide one element, or a sequence of elements, in which case the combine operation is executed element-wise on each entry of the sequence. For example, if the operation is MPI_MAX and the send buffer contains two elements that are floating-point numbers (count = 2 and datatype = MPI_FLOAT), then recvbuf(1) = global max (sendbuf(1)) and recvbuf(2) = global max(sendbuf(2)). .sp .SH USE OF IN-PLACE OPTION -When the communicator is an intracommunicator, you can perform a reduce operation in-place (the output buffer is used as the input buffer). Use the variable MPI_IN_PLACE as the value of the root process \fIsendbuf\fR. In this case, the input data is taken at the root from the receive buffer, where it will be replaced by the output data. +When the communicator is an intracommunicator, you can perform a reduce operation in-place (the output buffer is used as the input buffer). Use the variable MPI_IN_PLACE as the value of the root process \fIsendbuf\fR. In this case, the input data is taken at the root from the receive buffer, where it will be replaced by the output data. .sp Note that MPI_IN_PLACE is a special kind of value; it has the same restrictions on its use as MPI_BOTTOM. .sp -Because the in-place option converts the receive buffer into a send-and-receive buffer, a Fortran binding that includes INTENT must mark these as INOUT, not OUT. +Because the in-place option converts the receive buffer into a send-and-receive buffer, a Fortran binding that includes INTENT must mark these as INOUT, not OUT. .sp .SH WHEN COMMUNICATOR IS AN INTER-COMMUNICATOR .sp -When the communicator is an inter-communicator, the root process in the first group combines data from all the processes in the second group and then performs the \fIop\fR operation. The first group defines the root process. That process uses MPI_ROOT as the value of its \fIroot\fR argument. The remaining processes use MPI_PROC_NULL as the value of their \fIroot\fR argument. All processes in the second group use the rank of that root process in the first group as the value of their \fIroot\fR argument. Only the send buffer arguments are significant in the second group, and only the receive buffer arguments are significant in the root process of the first group. -.sp +When the communicator is an inter-communicator, the root process in the first group combines data from all the processes in the second group and then performs the \fIop\fR operation. The first group defines the root process. That process uses MPI_ROOT as the value of its \fIroot\fR argument. The remaining processes use MPI_PROC_NULL as the value of their \fIroot\fR argument. All processes in the second group use the rank of that root process in the first group as the value of their \fIroot\fR argument. Only the send buffer arguments are significant in the second group, and only the receive buffer arguments are significant in the root process of the first group. +.sp .SH PREDEFINED REDUCE OPERATIONS .sp The set of predefined operations provided by MPI is listed below (Predefined Reduce Operations). That section also enumerates the datatypes each operation can be applied to. In addition, users may define their own operations that can be overloaded to operate on several datatypes, either basic or derived. This is further explained in the description of the user-defined operations (see the man pages for MPI_Op_create and MPI_Op_free). .sp -The operation op is always assumed to be associative. All predefined operations are also assumed to be commutative. Users may define operations that are assumed to be associative, but not commutative. The ``canonical'' evaluation order of a reduction is determined by the ranks of the processes in the group. However, the implementation can take advantage of associativity, or associativity and commutativity, in order to change the order of evaluation. This may change the result of the reduction for operations that are not strictly associative and commutative, such as floating point addition. +The operation op is always assumed to be associative. All predefined operations are also assumed to be commutative. Users may define operations that are assumed to be associative, but not commutative. The ``canonical'' evaluation order of a reduction is determined by the ranks of the processes in the group. However, the implementation can take advantage of associativity, or associativity and commutativity, in order to change the order of evaluation. This may change the result of the reduction for operations that are not strictly associative and commutative, such as floating point addition. .sp Predefined operators work only with the MPI types listed below (Predefined Reduce Operations, and the section MINLOC and MAXLOC, below). User-defined operators may operate on general, derived datatypes. In this case, each argument that the reduce operation is applied to is one element described by such a datatype, which may contain several basic values. This is further explained in Section 4.9.4 of the MPI Standard, "User-Defined Operations." The following predefined operations are supplied for MPI_Reduce and related functions MPI_Allreduce, MPI_Reduce_scatter, and MPI_Scan. These operations are invoked by placing the following in op: .sp .nf - Name Meaning + Name Meaning --------- -------------------- - MPI_MAX maximum - MPI_MIN minimum - MPI_SUM sum - MPI_PROD product - MPI_LAND logical and - MPI_BAND bit-wise and - MPI_LOR logical or - MPI_BOR bit-wise or - MPI_LXOR logical xor - MPI_BXOR bit-wise xor - MPI_MAXLOC max value and location - MPI_MINLOC min value and location + MPI_MAX maximum + MPI_MIN minimum + MPI_SUM sum + MPI_PROD product + MPI_LAND logical and + MPI_BAND bit-wise and + MPI_LOR logical or + MPI_BOR bit-wise or + MPI_LXOR logical xor + MPI_BXOR bit-wise xor + MPI_MAXLOC max value and location + MPI_MINLOC min value and location .fi .sp The two operations MPI_MINLOC and MPI_MAXLOC are discussed separately below (MINLOC and MAXLOC). For the other predefined operations, we enumerate below the allowed combinations of op and datatype arguments. First, define groups of MPI basic datatypes in the following way: .sp .nf - C integer: MPI_INT, MPI_LONG, MPI_SHORT, - MPI_UNSIGNED_SHORT, MPI_UNSIGNED, - MPI_UNSIGNED_LONG - Fortran integer: MPI_INTEGER - Floating-point: MPI_FLOAT, MPI_DOUBLE, MPI_REAL, - MPI_DOUBLE_PRECISION, MPI_LONG_DOUBLE - Logical: MPI_LOGICAL - Complex: MPI_COMPLEX - Byte: MPI_BYTE + C integer: MPI_INT, MPI_LONG, MPI_SHORT, + MPI_UNSIGNED_SHORT, MPI_UNSIGNED, + MPI_UNSIGNED_LONG + Fortran integer: MPI_INTEGER + Floating-point: MPI_FLOAT, MPI_DOUBLE, MPI_REAL, + MPI_DOUBLE_PRECISION, MPI_LONG_DOUBLE + Logical: MPI_LOGICAL + Complex: MPI_COMPLEX + Byte: MPI_BYTE .fi .sp Now, the valid datatypes for each option is specified below. .sp .nf - Op Allowed Types + Op Allowed Types ---------------- --------------------------- - MPI_MAX, MPI_MIN C integer, Fortran integer, - floating-point + MPI_MAX, MPI_MIN C integer, Fortran integer, + floating-point - MPI_SUM, MPI_PROD C integer, Fortran integer, - floating-point, complex + MPI_SUM, MPI_PROD C integer, Fortran integer, + floating-point, complex - MPI_LAND, MPI_LOR, C integer, logical + MPI_LAND, MPI_LOR, C integer, logical MPI_LXOR - MPI_BAND, MPI_BOR, C integer, Fortran integer, byte + MPI_BAND, MPI_BOR, C integer, Fortran integer, byte MPI_BXOR .fi .sp -\fBExample 1:\fR A routine that computes the dot product of two vectors that are distributed across a group of processes and returns the answer at process zero. +\fBExample 1:\fR A routine that computes the dot product of two vectors that are distributed across a group of processes and returns the answer at process zero. .sp .nf - SUBROUTINE PAR_BLAS1(m, a, b, c, comm) - REAL a(m), b(m) ! local slice of array - REAL c ! result (at process zero) - REAL sum - INTEGER m, comm, i, ierr - - ! local sum - sum = 0.0 - DO i = 1, m - sum = sum + a(i)*b(i) - END DO - - ! global sum - CALL MPI_REDUCE(sum, c, 1, MPI_REAL, MPI_SUM, 0, comm, ierr) - RETURN + SUBROUTINE PAR_BLAS1(m, a, b, c, comm) + REAL a(m), b(m) ! local slice of array + REAL c ! result (at process zero) + REAL sum + INTEGER m, comm, i, ierr + + ! local sum + sum = 0.0 + DO i = 1, m + sum = sum + a(i)*b(i) + END DO + + ! global sum + CALL MPI_REDUCE(sum, c, 1, MPI_REAL, MPI_SUM, 0, comm, ierr) + RETURN .fi .sp \fBExample 2:\fR A routine that computes the product of a vector and an array that are distributed across a group of processes and returns the answer at process zero. .sp .nf - SUBROUTINE PAR_BLAS2(m, n, a, b, c, comm) - REAL a(m), b(m,n) ! local slice of array - REAL c(n) ! result - REAL sum(n) - INTEGER n, comm, i, j, ierr - - ! local sum - DO j= 1, n - sum(j) = 0.0 - DO i = 1, m - sum(j) = sum(j) + a(i)*b(i,j) - END DO - END DO - - ! global sum - CALL MPI_REDUCE(sum, c, n, MPI_REAL, MPI_SUM, 0, comm, ierr) - - ! return result at process zero (and garbage at the other nodes) + SUBROUTINE PAR_BLAS2(m, n, a, b, c, comm) + REAL a(m), b(m,n) ! local slice of array + REAL c(n) ! result + REAL sum(n) + INTEGER n, comm, i, j, ierr + + ! local sum + DO j= 1, n + sum(j) = 0.0 + DO i = 1, m + sum(j) = sum(j) + a(i)*b(i,j) + END DO + END DO + + ! global sum + CALL MPI_REDUCE(sum, c, n, MPI_REAL, MPI_SUM, 0, comm, ierr) + + ! return result at process zero (and garbage at the other nodes) RETURN .fi .SH MINLOC AND MAXLOC .ft R -The operator MPI_MINLOC is used to compute a global minimum and also an index attached to the minimum value. MPI_MAXLOC similarly computes a global maximum and index. One application of these is to compute a global minimum (maximum) and the rank of the process containing this value. +The operator MPI_MINLOC is used to compute a global minimum and also an index attached to the minimum value. MPI_MAXLOC similarly computes a global maximum and index. One application of these is to compute a global minimum (maximum) and the rank of the process containing this value. .sp -The operation that defines MPI_MAXLOC is +The operation that defines MPI_MAXLOC is .sp .nf ( u ) ( v ) ( w ) @@ -220,7 +220,7 @@ and ( k = ( min(i, j) if u = v ( - ( j if u < v) + ( j if u < v) MPI_MINLOC is defined similarly: @@ -239,7 +239,7 @@ and ( k = ( min(i, j) if u = v ( - ( j if u > v) + ( j if u > v) .fi @@ -271,145 +271,145 @@ provides nine such predefined datatypes. The operations MPI_MAXLOC and MPI_MINLOC can be used with each of the following datatypes: .sp .nf - Fortran: - Name Description - MPI_2REAL pair of REALs - MPI_2DOUBLE_PRECISION pair of DOUBLE-PRECISION variables - MPI_2INTEGER pair of INTEGERs - - C: - Name Description - MPI_FLOAT_INT float and int - MPI_DOUBLE_INT double and int - MPI_LONG_INT long and int - MPI_2INT pair of ints - MPI_SHORT_INT short and int + Fortran: + Name Description + MPI_2REAL pair of REALs + MPI_2DOUBLE_PRECISION pair of DOUBLE-PRECISION variables + MPI_2INTEGER pair of INTEGERs + + C: + Name Description + MPI_FLOAT_INT float and int + MPI_DOUBLE_INT double and int + MPI_LONG_INT long and int + MPI_2INT pair of ints + MPI_SHORT_INT short and int MPI_LONG_DOUBLE_INT long double and int .fi .sp The data type MPI_2REAL is equivalent to: .nf - MPI_TYPE_CONTIGUOUS(2, MPI_REAL, MPI_2REAL) + MPI_TYPE_CONTIGUOUS(2, MPI_REAL, MPI_2REAL) .fi .sp Similar statements apply for MPI_2INTEGER, MPI_2DOUBLE_PRECISION, and MPI_2INT. -.sp +.sp The datatype MPI_FLOAT_INT is as if defined by the following sequence of instructions. .sp .nf - type[0] = MPI_FLOAT - type[1] = MPI_INT - disp[0] = 0 - disp[1] = sizeof(float) - block[0] = 1 - block[1] = 1 + type[0] = MPI_FLOAT + type[1] = MPI_INT + disp[0] = 0 + disp[1] = sizeof(float) + block[0] = 1 + block[1] = 1 MPI_TYPE_STRUCT(2, block, disp, type, MPI_FLOAT_INT) .fi .sp -Similar statements apply for MPI_LONG_INT and MPI_DOUBLE_INT. +Similar statements apply for MPI_LONG_INT and MPI_DOUBLE_INT. .sp \fBExample 3:\fR Each process has an array of 30 doubles, in C. For each of the 30 locations, compute the value and rank of the process containing the largest value. .sp .nf - \&... - /* each process has an array of 30 double: ain[30] - */ - double ain[30], aout[30]; - int ind[30]; - struct { - double val; - int rank; - } in[30], out[30]; - int i, myrank, root; - - MPI_Comm_rank(MPI_COMM_WORLD, &myrank); - for (i=0; i<30; ++i) { - in[i].val = ain[i]; - in[i].rank = myrank; - } - MPI_Reduce( in, out, 30, MPI_DOUBLE_INT, MPI_MAXLOC, root, comm ); - /* At this point, the answer resides on process root - */ - if (myrank == root) { - /* read ranks out - */ - for (i=0; i<30; ++i) { - aout[i] = out[i].val; - ind[i] = out[i].rank; - } - } + \&... + /* each process has an array of 30 double: ain[30] + */ + double ain[30], aout[30]; + int ind[30]; + struct { + double val; + int rank; + } in[30], out[30]; + int i, myrank, root; + + MPI_Comm_rank(MPI_COMM_WORLD, &myrank); + for (i=0; i<30; ++i) { + in[i].val = ain[i]; + in[i].rank = myrank; + } + MPI_Reduce( in, out, 30, MPI_DOUBLE_INT, MPI_MAXLOC, root, comm ); + /* At this point, the answer resides on process root + */ + if (myrank == root) { + /* read ranks out + */ + for (i=0; i<30; ++i) { + aout[i] = out[i].val; + ind[i] = out[i].rank; + } + } .fi .sp .fi -\fBExample 4:\fR Same example, in Fortran. +\fBExample 4:\fR Same example, in Fortran. .sp .nf - \&... - ! each process has an array of 30 double: ain(30) - - DOUBLE PRECISION ain(30), aout(30) - INTEGER ind(30); - DOUBLE PRECISION in(2,30), out(2,30) - INTEGER i, myrank, root, ierr; - - MPI_COMM_RANK(MPI_COMM_WORLD, myrank); - DO I=1, 30 - in(1,i) = ain(i) - in(2,i) = myrank ! myrank is coerced to a double - END DO - - MPI_REDUCE( in, out, 30, MPI_2DOUBLE_PRECISION, MPI_MAXLOC, root, - comm, ierr ); - ! At this point, the answer resides on process root - - IF (myrank .EQ. root) THEN - ! read ranks out - DO I= 1, 30 - aout(i) = out(1,i) - ind(i) = out(2,i) ! rank is coerced back to an integer - END DO - END IF + \&... + ! each process has an array of 30 double: ain(30) + + DOUBLE PRECISION ain(30), aout(30) + INTEGER ind(30); + DOUBLE PRECISION in(2,30), out(2,30) + INTEGER i, myrank, root, ierr; + + MPI_COMM_RANK(MPI_COMM_WORLD, myrank); + DO I=1, 30 + in(1,i) = ain(i) + in(2,i) = myrank ! myrank is coerced to a double + END DO + + MPI_REDUCE( in, out, 30, MPI_2DOUBLE_PRECISION, MPI_MAXLOC, root, + comm, ierr ); + ! At this point, the answer resides on process root + + IF (myrank .EQ. root) THEN + ! read ranks out + DO I= 1, 30 + aout(i) = out(1,i) + ind(i) = out(2,i) ! rank is coerced back to an integer + END DO + END IF .fi .sp \fBExample 5:\fR Each process has a nonempty array of values. Find the minimum global value, the rank of the process that holds it, and its index on this process. .sp .nf - #define LEN 1000 - - float val[LEN]; /* local array of values */ - int count; /* local number of values */ - int myrank, minrank, minindex; - float minval; - - struct { - float value; - int index; - } in, out; - - /* local minloc */ - in.value = val[0]; - in.index = 0; - for (i=1; i < count; i++) - if (in.value > val[i]) { - in.value = val[i]; - in.index = i; - } - - /* global minloc */ - MPI_Comm_rank(MPI_COMM_WORLD, &myrank); - in.index = myrank*LEN + in.index; - MPI_Reduce( in, out, 1, MPI_FLOAT_INT, MPI_MINLOC, root, comm ); - /* At this point, the answer resides on process root - */ - if (myrank == root) { - /* read answer out - */ - minval = out.value; - minrank = out.index / LEN; + #define LEN 1000 + + float val[LEN]; /* local array of values */ + int count; /* local number of values */ + int myrank, minrank, minindex; + float minval; + + struct { + float value; + int index; + } in, out; + + /* local minloc */ + in.value = val[0]; + in.index = 0; + for (i=1; i < count; i++) + if (in.value > val[i]) { + in.value = val[i]; + in.index = i; + } + + /* global minloc */ + MPI_Comm_rank(MPI_COMM_WORLD, &myrank); + in.index = myrank*LEN + in.index; + MPI_Reduce( in, out, 1, MPI_FLOAT_INT, MPI_MINLOC, root, comm ); + /* At this point, the answer resides on process root + */ + if (myrank == root) { + /* read answer out + */ + minval = out.value; + minrank = out.index / LEN; minindex = out.index % LEN; .fi .sp @@ -419,11 +419,11 @@ All MPI objects (e.g., MPI_Datatype, MPI_Comm) are of type INTEGER in Fortran. The reduction functions ( .I MPI_Op ) do not return an error value. As a result, -if the functions detect an error, all they can do is either call +if the functions detect an error, all they can do is either call .I MPI_Abort or silently skip the problem. Thus, if you change the error handler from .I MPI_ERRORS_ARE_FATAL -to something else, for example, +to something else, for example, .I MPI_ERRORS_RETURN , then no error may be indicated. @@ -435,7 +435,7 @@ all collective routines return the same error value. Almost all MPI routines return an error value; C routines as the value of the function and Fortran routines in the last argument. C++ functions do not return errors. If the default error handler is set to MPI::ERRORS_THROW_EXCEPTIONS, then on error the C++ exception mechanism will be used to throw an MPI::Exception object. .sp Before the error value is returned, the current MPI error handler is -called. By default, this error handler aborts the MPI job, except for I/O function errors. The error handler may be changed with MPI_Comm_set_errhandler; the predefined error handler MPI_ERRORS_RETURN may be used to cause error values to be returned. Note that MPI does not guarantee that an MPI program can continue past an error. +called. By default, this error handler aborts the MPI job, except for I/O function errors. The error handler may be changed with MPI_Comm_set_errhandler; the predefined error handler MPI_ERRORS_RETURN may be used to cause error values to be returned. Note that MPI does not guarantee that an MPI program can continue past an error. .SH SEE ALSO .ft R diff --git a/ompi/mpi/man/man3/MPI_Reduce_local.3in b/ompi/mpi/man/man3/MPI_Reduce_local.3in index e3fbc6b13fb..6e9b20c1eb7 100644 --- a/ompi/mpi/man/man3/MPI_Reduce_local.3in +++ b/ompi/mpi/man/man3/MPI_Reduce_local.3in @@ -22,7 +22,7 @@ int MPI_Reduce_local(const void *\fIinbuf\fP, void *\fIinoutbuf\fP, int\fI count INCLUDE 'mpif.h' MPI_REDUCE_LOCAL(\fIINBUF, INOUTBUF, COUNT, DATATYPE, OP, IERROR\fP) \fIINBUF(*), INOUTBUF(*)\fP - INTEGER \fICOUNT, DATATYPE, OP, IERROR\fP + INTEGER \fICOUNT, DATATYPE, OP, IERROR\fP .fi .SH C++ Syntax @@ -55,84 +55,82 @@ Address of in/out buffer (choice). .ft R .TP 1i IERROR -Fortran only: Error status (integer). +Fortran only: Error status (integer). .SH DESCRIPTION .ft R -.I The MPI_Reduce_local function is proposed for MPI-2.2 and (as of 10 Jan 2009) has not yet been ratified. Use at your own risk. See https://svn.mpi-forum.org/trac/mpi-forum-web/ticket/24. -.sp The global reduce functions (MPI_Reduce_local, MPI_Op_create, MPI_Op_free, MPI_Allreduce, MPI_Reduce_local_scatter, MPI_Scan) perform a global reduce operation (such as sum, max, logical AND, etc.) across all the members of a group. The reduction operation can be either one of a predefined list of operations, or a user-defined operation. The global reduction functions come in several flavors: a reduce that returns the result of the reduction at one node, an all-reduce that returns this result at all nodes, and a scan (parallel prefix) operation. In addition, a reduce-scatter operation combines the functionality of a reduce and a scatter operation. .sp -MPI_Reduce_local combines the elements provided in the input and input/output buffers of the local process, using the operation op, and returns the combined value in the inout/output buffer. The input buffer is defined by the arguments inbuf, count, and datatype; the output buffer is defined by the arguments inoutbuf, count, and datatype; both have the same number of elements, with the same type. The routine is a local call. The process can provide one element, or a sequence of elements, in which case the combine operation is executed element-wise on each entry of the sequence. For example, if the operation is MPI_MAX and the input buffer contains two elements that are floating-point numbers (count = 2 and datatype = MPI_FLOAT), then inoutbuf(1) = global max (inbuf(1)) and inoutbuf(2) = global max(inbuf(2)). +MPI_Reduce_local combines the elements provided in the input and input/output buffers of the local process, using the operation op, and returns the combined value in the inout/output buffer. The input buffer is defined by the arguments inbuf, count, and datatype; the output buffer is defined by the arguments inoutbuf, count, and datatype; both have the same number of elements, with the same type. The routine is a local call. The process can provide one element, or a sequence of elements, in which case the combine operation is executed element-wise on each entry of the sequence. For example, if the operation is MPI_MAX and the input buffer contains two elements that are floating-point numbers (count = 2 and datatype = MPI_FLOAT), then inoutbuf(1) = global max (inbuf(1)) and inoutbuf(2) = global max(inbuf(2)). .sp .SH USE OF IN-PLACE OPTION The use of MPI_IN_PLACE is disallowed with MPI_Reduce_local. -.sp +.sp .SH PREDEFINED REDUCE OPERATIONS .sp The set of predefined operations provided by MPI is listed below (Predefined Reduce Operations). That section also enumerates the datatypes each operation can be applied to. In addition, users may define their own operations that can be overloaded to operate on several datatypes, either basic or derived. This is further explained in the description of the user-defined operations (see the man pages for MPI_Op_create and MPI_Op_free). .sp -The operation op is always assumed to be associative. All predefined operations are also assumed to be commutative. Users may define operations that are assumed to be associative, but not commutative. The ``canonical'' evaluation order of a reduction is determined by the ranks of the processes in the group. However, the implementation can take advantage of associativity, or associativity and commutativity, in order to change the order of evaluation. This may change the result of the reduction for operations that are not strictly associative and commutative, such as floating point addition. +The operation op is always assumed to be associative. All predefined operations are also assumed to be commutative. Users may define operations that are assumed to be associative, but not commutative. The ``canonical'' evaluation order of a reduction is determined by the ranks of the processes in the group. However, the implementation can take advantage of associativity, or associativity and commutativity, in order to change the order of evaluation. This may change the result of the reduction for operations that are not strictly associative and commutative, such as floating point addition. .sp Predefined operators work only with the MPI types listed below (Predefined Reduce Operations, and the section MINLOC and MAXLOC, below). User-defined operators may operate on general, derived datatypes. In this case, each argument that the reduce operation is applied to is one element described by such a datatype, which may contain several basic values. This is further explained in Section 4.9.4 of the MPI Standard, "User-Defined Operations." The following predefined operations are supplied for MPI_Reduce_local and related functions MPI_Allreduce, MPI_Reduce_scatter, and MPI_Scan. These operations are invoked by placing the following in op: .sp .nf - Name Meaning + Name Meaning --------- -------------------- - MPI_MAX maximum - MPI_MIN minimum - MPI_SUM sum - MPI_PROD product - MPI_LAND logical and - MPI_BAND bit-wise and - MPI_LOR logical or - MPI_BOR bit-wise or - MPI_LXOR logical xor - MPI_BXOR bit-wise xor - MPI_MAXLOC max value and location - MPI_MINLOC min value and location + MPI_MAX maximum + MPI_MIN minimum + MPI_SUM sum + MPI_PROD product + MPI_LAND logical and + MPI_BAND bit-wise and + MPI_LOR logical or + MPI_BOR bit-wise or + MPI_LXOR logical xor + MPI_BXOR bit-wise xor + MPI_MAXLOC max value and location + MPI_MINLOC min value and location .fi .sp The two operations MPI_MINLOC and MPI_MAXLOC are discussed separately below (MINLOC and MAXLOC). For the other predefined operations, we enumerate below the allowed combinations of op and datatype arguments. First, define groups of MPI basic datatypes in the following way: .sp .nf - C integer: MPI_INT, MPI_LONG, MPI_SHORT, - MPI_UNSIGNED_SHORT, MPI_UNSIGNED, - MPI_UNSIGNED_LONG - Fortran integer: MPI_INTEGER - Floating-point: MPI_FLOAT, MPI_DOUBLE, MPI_REAL, - MPI_DOUBLE_PRECISION, MPI_LONG_DOUBLE - Logical: MPI_LOGICAL - Complex: MPI_COMPLEX - Byte: MPI_BYTE + C integer: MPI_INT, MPI_LONG, MPI_SHORT, + MPI_UNSIGNED_SHORT, MPI_UNSIGNED, + MPI_UNSIGNED_LONG + Fortran integer: MPI_INTEGER + Floating-point: MPI_FLOAT, MPI_DOUBLE, MPI_REAL, + MPI_DOUBLE_PRECISION, MPI_LONG_DOUBLE + Logical: MPI_LOGICAL + Complex: MPI_COMPLEX + Byte: MPI_BYTE .fi .sp Now, the valid datatypes for each option is specified below. .sp .nf - Op Allowed Types + Op Allowed Types ---------------- --------------------------- - MPI_MAX, MPI_MIN C integer, Fortran integer, - floating-point + MPI_MAX, MPI_MIN C integer, Fortran integer, + floating-point - MPI_SUM, MPI_PROD C integer, Fortran integer, - floating-point, complex + MPI_SUM, MPI_PROD C integer, Fortran integer, + floating-point, complex - MPI_LAND, MPI_LOR, C integer, logical + MPI_LAND, MPI_LOR, C integer, logical MPI_LXOR - MPI_BAND, MPI_BOR, C integer, Fortran integer, byte + MPI_BAND, MPI_BOR, C integer, Fortran integer, byte MPI_BXOR .fi .sp .SH MINLOC AND MAXLOC .ft R -The operator MPI_MINLOC is used to compute a global minimum and also an index attached to the minimum value. MPI_MAXLOC similarly computes a global maximum and index. One application of these is to compute a global minimum (maximum) and the rank of the process containing this value. +The operator MPI_MINLOC is used to compute a global minimum and also an index attached to the minimum value. MPI_MAXLOC similarly computes a global maximum and index. One application of these is to compute a global minimum (maximum) and the rank of the process containing this value. .sp -The operation that defines MPI_MAXLOC is +The operation that defines MPI_MAXLOC is .sp .nf ( u ) ( v ) ( w ) @@ -149,7 +147,7 @@ and ( k = ( min(i, j) if u = v ( - ( j if u < v) + ( j if u < v) MPI_MINLOC is defined similarly: @@ -168,7 +166,7 @@ and ( k = ( min(i, j) if u = v ( - ( j if u > v) + ( j if u > v) .fi @@ -200,44 +198,44 @@ provides nine such predefined datatypes. The operations MPI_MAXLOC and MPI_MINLOC can be used with each of the following datatypes: .sp .nf - Fortran: - Name Description - MPI_2REAL pair of REALs - MPI_2DOUBLE_PRECISION pair of DOUBLE-PRECISION variables - MPI_2INTEGER pair of INTEGERs - - C: - Name Description - MPI_FLOAT_INT float and int - MPI_DOUBLE_INT double and int - MPI_LONG_INT long and int - MPI_2INT pair of ints - MPI_SHORT_INT short and int + Fortran: + Name Description + MPI_2REAL pair of REALs + MPI_2DOUBLE_PRECISION pair of DOUBLE-PRECISION variables + MPI_2INTEGER pair of INTEGERs + + C: + Name Description + MPI_FLOAT_INT float and int + MPI_DOUBLE_INT double and int + MPI_LONG_INT long and int + MPI_2INT pair of ints + MPI_SHORT_INT short and int MPI_LONG_DOUBLE_INT long double and int .fi .sp The data type MPI_2REAL is equivalent to: .nf - MPI_TYPE_CONTIGUOUS(2, MPI_REAL, MPI_2REAL) + MPI_TYPE_CONTIGUOUS(2, MPI_REAL, MPI_2REAL) .fi .sp Similar statements apply for MPI_2INTEGER, MPI_2DOUBLE_PRECISION, and MPI_2INT. -.sp +.sp The datatype MPI_FLOAT_INT is as if defined by the following sequence of instructions. .sp .nf - type[0] = MPI_FLOAT - type[1] = MPI_INT - disp[0] = 0 - disp[1] = sizeof(float) - block[0] = 1 - block[1] = 1 + type[0] = MPI_FLOAT + type[1] = MPI_INT + disp[0] = 0 + disp[1] = sizeof(float) + block[0] = 1 + block[1] = 1 MPI_TYPE_STRUCT(2, block, disp, type, MPI_FLOAT_INT) .fi .sp -Similar statements apply for MPI_LONG_INT and MPI_DOUBLE_INT. +Similar statements apply for MPI_LONG_INT and MPI_DOUBLE_INT. .sp All MPI objects (e.g., MPI_Datatype, MPI_Comm) are of type INTEGER in Fortran. .SH NOTES ON COLLECTIVE OPERATIONS @@ -245,11 +243,11 @@ All MPI objects (e.g., MPI_Datatype, MPI_Comm) are of type INTEGER in Fortran. The reduction operators ( .I MPI_Op ) do not return an error value. As a result, -if the functions detect an error, all they can do is either call +if the functions detect an error, all they can do is either call .I MPI_Abort or silently skip the problem. Thus, if you change the error handler from .I MPI_ERRORS_ARE_FATAL -to something else, for example, +to something else, for example, .I MPI_ERRORS_RETURN , then no error may be indicated. @@ -261,7 +259,7 @@ all collective routines return the same error value. Almost all MPI routines return an error value; C routines as the value of the function and Fortran routines in the last argument. C++ functions do not return errors. If the default error handler is set to MPI::ERRORS_THROW_EXCEPTIONS, then on error the C++ exception mechanism will be used to throw an MPI::Exception object. .sp Before the error value is returned, the current MPI error handler is -called. By default, this error handler aborts the MPI job, except for I/O function errors. The error handler may be changed with MPI_Comm_set_errhandler; the predefined error handler MPI_ERRORS_RETURN may be used to cause error values to be returned. Note that MPI does not guarantee that an MPI program can continue past an error. +called. By default, this error handler aborts the MPI job, except for I/O function errors. The error handler may be changed with MPI_Comm_set_errhandler; the predefined error handler MPI_ERRORS_RETURN may be used to cause error values to be returned. Note that MPI does not guarantee that an MPI program can continue past an error. .SH SEE ALSO .ft R diff --git a/ompi/mpi/man/man3/MPI_Reduce_scatter.3in b/ompi/mpi/man/man3/MPI_Reduce_scatter.3in index 7fc18df555b..51bf5fd69f9 100644 --- a/ompi/mpi/man/man3/MPI_Reduce_scatter.3in +++ b/ompi/mpi/man/man3/MPI_Reduce_scatter.3in @@ -72,7 +72,7 @@ Request (handle, non-blocking only). .ft R .TP 1i IERROR -Fortran only: Error status (integer). +Fortran only: Error status (integer). .SH DESCRIPTION .ft R @@ -86,22 +86,22 @@ the receive buffer defined by \fIrecvbuf\fP, \fIrecvcounts\fP[i], and \fIdatatyp .SH USE OF IN-PLACE OPTION -When the communicator is an intracommunicator, you can perform a reduce-scatter operation in-place (the output buffer is used as the input buffer). Use the variable MPI_IN_PLACE as the value of the \fIsendbuf\fR. In this case, the input data is taken from the top of the receive buffer. The area occupied by the input data may be either longer or shorter than the data filled by the output data. +When the communicator is an intracommunicator, you can perform a reduce-scatter operation in-place (the output buffer is used as the input buffer). Use the variable MPI_IN_PLACE as the value of the \fIsendbuf\fR. In this case, the input data is taken from the top of the receive buffer. The area occupied by the input data may be either longer or shorter than the data filled by the output data. .sp .SH WHEN COMMUNICATOR IS AN INTER-COMMUNICATOR .sp -When the communicator is an inter-communicator, the reduce-scatter operation occurs in two phases. First, the result of the reduction performed on the data provided by the processes in the first group is scattered among the processes in the second group. Then the reverse occurs: the reduction performed on the data provided by the processes in the second group is scattered among the processes in the first group. For each group, all processes provide the same \fIrecvcounts\fR argument, and the sum of the \fIrecvcounts\fR values should be the same for both groups. -.sp +When the communicator is an inter-communicator, the reduce-scatter operation occurs in two phases. First, the result of the reduction performed on the data provided by the processes in the first group is scattered among the processes in the second group. Then the reverse occurs: the reduction performed on the data provided by the processes in the second group is scattered among the processes in the first group. For each group, all processes provide the same \fIrecvcounts\fR argument, and the sum of the \fIrecvcounts\fR values should be the same for both groups. +.sp .SH NOTES ON COLLECTIVE OPERATIONS The reduction functions ( .I MPI_Op ) do not return an error value. As a result, -if the functions detect an error, all they can do is either call +if the functions detect an error, all they can do is either call .I MPI_Abort or silently skip the problem. Thus, if you change the error handler from .I MPI_ERRORS_ARE_FATAL -to something else, for example, +to something else, for example, .I MPI_ERRORS_RETURN , then no error may be indicated. @@ -113,5 +113,5 @@ all collective routines return the same error value. Almost all MPI routines return an error value; C routines as the value of the function and Fortran routines in the last argument. C++ functions do not return errors. If the default error handler is set to MPI::ERRORS_THROW_EXCEPTIONS, then on error the C++ exception mechanism will be used to throw an MPI::Exception object. .sp Before the error value is returned, the current MPI error handler is -called. By default, this error handler aborts the MPI job, except for I/O function errors. The error handler may be changed with MPI_Comm_set_errhandler; the predefined error handler MPI_ERRORS_RETURN may be used to cause error values to be returned. Note that MPI does not guarantee that an MPI program can continue past an error. +called. By default, this error handler aborts the MPI job, except for I/O function errors. The error handler may be changed with MPI_Comm_set_errhandler; the predefined error handler MPI_ERRORS_RETURN may be used to cause error values to be returned. Note that MPI does not guarantee that an MPI program can continue past an error. diff --git a/ompi/mpi/man/man3/MPI_Register_datarep.3in b/ompi/mpi/man/man3/MPI_Register_datarep.3in index 8fb2b23074b..fb0c49b6e3f 100644 --- a/ompi/mpi/man/man3/MPI_Register_datarep.3in +++ b/ompi/mpi/man/man3/MPI_Register_datarep.3in @@ -3,33 +3,37 @@ .\" Copyright 2010 Cisco Systems, Inc. All rights reserved. .\" Copyright 2006-2008 Sun Microsystems, Inc. .\" Copyright (c) 1996 Thinking Machines Corporation +.\" Copyright 2015 Research Organization for Information Science +.\" and Technology (RIST). All rights reserved. .\" $COPYRIGHT$ .TH MPI_Register_datarep 3 "#OMPI_DATE#" "#PACKAGE_VERSION#" "#PACKAGE_NAME#" .SH NAME -\fBMPI_Register_datarep\fP \- Defines data representation. +\fBMPI_Register_datarep\fP \- Defines data representation. .SH SYNTAX .ft R .nf -C Syntax - #include - int MPI_Register_datarep(const char \fI*datarep\fP, - MPI_Datarep_conversion_function \fI*read_conversion_fn\fP, - MPI_Datarep_conversion_function \fI*write_conversion_fn\fP, - MPI_Datarep_extent_function \fI*dtype_file_extent_fn\fP, - void \fI*extra_state\fP) +.SH C Syntax +#include +int MPI_Register_datarep(const char \fI*datarep\fP, + MPI_Datarep_conversion_function \fI*read_conversion_fn\fP, + MPI_Datarep_conversion_function \fI*write_conversion_fn\fP, + MPI_Datarep_extent_function \fI*dtype_file_extent_fn\fP, + void \fI*extra_state\fP) + .fi .SH Fortran Syntax .nf - INCLUDE 'mpif.h' - MPI_REGISTER_DATAREP(\fIDATAREP\fP, \fIREAD_CONVERSION_FN\fP, - \fIWRITE_CONVERSION_FN\fP, \fIDTYPE_FILE_EXTENT_FN\fP, - \fIEXTRA_STATE\fP,\fI IERROR\fP) - CHARACTER*(*) \fIDATAREP\fP - EXTERNAL \fIREAD_CONVERSION_FN, WRITE_CONVERSION_FN, - DTYPE_FILE_EXTENT_FN\fP - INTEGER \fIIERROR\fP - INTEGER(KIND=MPI_ADDRESS_KIND) \fIEXTRA_STATE\fP +USE MPI +! or the older form: INCLUDE 'mpif.h' +MPI_REGISTER_DATAREP(\fIDATAREP\fP, \fIREAD_CONVERSION_FN\fP, + \fIWRITE_CONVERSION_FN\fP, \fIDTYPE_FILE_EXTENT_FN\fP, + \fIEXTRA_STATE\fP, \fIIERROR\fP) + CHARACTER*(*) \fIDATAREP\fP + EXTERNAL \fIREAD_CONVERSION_FN, WRITE_CONVERSION_FN, DTYPE_FILE_EXTENT_FN\fP + INTEGER \fIIERROR\fP + INTEGER(KIND=MPI_ADDRESS_KIND) \fIEXTRA_STATE\fP + .fi .SH C++ Syntax .nf @@ -61,33 +65,33 @@ Function invoked to get the extent of a data type as represented in the file (fu .ft R .TP 1i extra_state -Extra state. +Extra state. .SH OUTPUT PARAMETER .ft R .TP 1i IERROR -Fortran only: Error status (integer). +Fortran only: Error status (integer). .SH DESCRIPTION .ft R -MPI_Register_datarep defines a data representation. It associates the data representation's identifier (a string) with the functions that convert from file representation to the native representation and vice versa, with the function that gets the extent of a data type as represented in the file, as well as with "extra state," which is used for passing arguments. Once a data representation has been registered using this routine, you may specify its identifier as an argument to MPI_File_set_view, causing subsequent data-access operations to call the specified conversion functions. +MPI_Register_datarep defines a data representation. It associates the data representation's identifier (a string) with the functions that convert from file representation to the native representation and vice versa, with the function that gets the extent of a data type as represented in the file, as well as with "extra state," which is used for passing arguments. Once a data representation has been registered using this routine, you may specify its identifier as an argument to MPI_File_set_view, causing subsequent data-access operations to call the specified conversion functions. -The call associates \fIread_conversion_fn\fP, \fIwrite_conversion_fn\fP, and \fIdtype_file_extent_fn\fP with the data representation identifier \fIdatarep\fP. \fIdatarep\fP can then be used as an argument to MPI_File_set_view, causing subsequent data access operations to call the conversion functions to convert all data items accessed between file data representation and native representation. MPI_Register_datarep is a local operation and only registers the data representation for the calling MPI process. If \fIdatarep\fP is already defined, an error in the error class MPI_ERR_DUP_DATAREP is raised using the default file error handler. The length of a data representation string is limited to the value of MPI_MAX_DATAREP_STRING. MPI_MAX_DATAREP_STRING must have a value of at least 64. No routines are provided to delete data representations and free the associated resources; it is not expected that an application will generate them in significant numbers. +The call associates \fIread_conversion_fn\fP, \fIwrite_conversion_fn\fP, and \fIdtype_file_extent_fn\fP with the data representation identifier \fIdatarep\fP. \fIdatarep\fP can then be used as an argument to MPI_File_set_view, causing subsequent data access operations to call the conversion functions to convert all data items accessed between file data representation and native representation. MPI_Register_datarep is a local operation and only registers the data representation for the calling MPI process. If \fIdatarep\fP is already defined, an error in the error class MPI_ERR_DUP_DATAREP is raised using the default file error handler. The length of a data representation string is limited to the value of MPI_MAX_DATAREP_STRING. MPI_MAX_DATAREP_STRING must have a value of at least 64. No routines are provided to delete data representations and free the associated resources; it is not expected that an application will generate them in significant numbers. .SH NOTES .ft R -The Fortran version of each MPI I/O routine includes a final argument, -IERROR, which is not defined in the PARAMETERS sections. This argument is used to return the error status of the routine in the manner typical for Fortran library routines. +The Fortran version of each MPI I/O routine includes a final argument, +IERROR, which is not defined in the PARAMETERS sections. This argument is used to return the error status of the routine in the manner typical for Fortran library routines. .sp The C version of each routine returns an error status as an integer return value. .sp -Error classes are found in mpi.h (for C), mpif.h (for Fortran), and mpi++.h (for C++). +Error classes are found in mpi.h (for C), mpif.h (for Fortran), and mpi++.h (for C++). .SH ERRORS Almost all MPI routines return an error value; C routines as the value of the function and Fortran routines in the last argument. C++ functions do not return errors. If the default error handler is set to MPI::ERRORS_THROW_EXCEPTIONS, then on error the C++ exception mechanism will be used to throw an MPI::Exception object. .sp Before the error value is returned, the current MPI error handler is -called. For MPI I/O function errors, the default error handler is set to MPI_ERRORS_RETURN. The error handler may be changed with MPI_File_set_errhandler; the predefined error handler MPI_ERRORS_ARE_FATAL may be used to make I/O errors fatal. Note that MPI does not guarantee that an MPI program can continue past an error. +called. For MPI I/O function errors, the default error handler is set to MPI_ERRORS_RETURN. The error handler may be changed with MPI_File_set_errhandler; the predefined error handler MPI_ERRORS_ARE_FATAL may be used to make I/O errors fatal. Note that MPI does not guarantee that an MPI program can continue past an error. diff --git a/ompi/mpi/man/man3/MPI_Request_free.3in b/ompi/mpi/man/man3/MPI_Request_free.3in index 93d3d494f1a..e64f2b22a92 100644 --- a/ompi/mpi/man/man3/MPI_Request_free.3in +++ b/ompi/mpi/man/man3/MPI_Request_free.3in @@ -19,7 +19,7 @@ int MPI_Request_free(MPI_Request *request) .nf \s-1INCLUDE\s0 'mpif.h' MPI_REQUEST_FREE(REQUEST, IERROR) - INTEGER REQUEST, IERROR + INTEGER REQUEST, IERROR .fi .SH C++ Syntax @@ -39,54 +39,54 @@ request This operation allows a request object to be deallocated without waiting for the associated communication to complete. .sp MPI_Request_free marks the request object for deallocation and sets request -to MPI_REQUEST_NULL. Any ongoing communication that is associated with the request will be allowed to complete. The request will be deallocated only after its completion. +to MPI_REQUEST_NULL. Any ongoing communication that is associated with the request will be allowed to complete. The request will be deallocated only after its completion. .SH NOTES -Once a request is freed by a call to MPI_Request_free, it is not possible to check for the successful completion of the associated communication with calls to MPI_Wait or MPI_Test. Also, if an error occurs subsequently during the communication, an error code cannot be returned to the user -- such an error must be treated as fatal. Questions arise as to how one knows when the operations have completed when using MPI_Request_free. Depending on the program logic, there may be other ways in which the program knows that certain operations have completed and this makes usage of MPI_Request_free practical. For example, an active send request could be freed when the logic of the program is such that the receiver sends a reply to the message sent -- the arrival of the reply informs the sender that the send has completed and the send buffer can be reused. An active receive request should never be freed, as the receiver will have no way to verify that the receive has completed and the receive buffer can be reused. +Once a request is freed by a call to MPI_Request_free, it is not possible to check for the successful completion of the associated communication with calls to MPI_Wait or MPI_Test. Also, if an error occurs subsequently during the communication, an error code cannot be returned to the user -- such an error must be treated as fatal. Questions arise as to how one knows when the operations have completed when using MPI_Request_free. Depending on the program logic, there may be other ways in which the program knows that certain operations have completed and this makes usage of MPI_Request_free practical. For example, an active send request could be freed when the logic of the program is such that the receiver sends a reply to the message sent -- the arrival of the reply informs the sender that the send has completed and the send buffer can be reused. An active receive request should never be freed, as the receiver will have no way to verify that the receive has completed and the receive buffer can be reused. .sp -\fBExample:\fR +\fBExample:\fR .sp .nf - CALL MPI_COMM_RANK(MPI_COMM_WORLD, rank) - IF(rank.EQ.0) THEN - DO i=1, n - CALL MPI_ISEND(outval, 1, MPI_REAL, 1, 0, req, ierr) - CALL MPI_REQUEST_FREE(req, ierr) - CALL MPI_IRECV(inval, 1, MPI_REAL, 1, 0, req, ierr) - CALL MPI_WAIT(req, status, ierr) - END DO - ELSE ! rank.EQ.1 - CALL MPI_IRECV(inval, 1, MPI_REAL, 0, 0, req, ierr) - CALL MPI_WAIT(req, status) - DO I=1, n-1 - CALL MPI_ISEND(outval, 1, MPI_REAL, 0, 0, req, ierr) - CALL MPI_REQUEST_FREE(req, ierr) - CALL MPI_IRECV(inval, 1, MPI_REAL, 0, 0, req, ierr) - CALL MPI_WAIT(req, status, ierr) - END DO - CALL MPI_ISEND(outval, 1, MPI_REAL, 0, 0, req, ierr) - CALL MPI_WAIT(req, status) - END IF + CALL MPI_COMM_RANK(MPI_COMM_WORLD, rank) + IF(rank.EQ.0) THEN + DO i=1, n + CALL MPI_ISEND(outval, 1, MPI_REAL, 1, 0, req, ierr) + CALL MPI_REQUEST_FREE(req, ierr) + CALL MPI_IRECV(inval, 1, MPI_REAL, 1, 0, req, ierr) + CALL MPI_WAIT(req, status, ierr) + END DO + ELSE ! rank.EQ.1 + CALL MPI_IRECV(inval, 1, MPI_REAL, 0, 0, req, ierr) + CALL MPI_WAIT(req, status) + DO I=1, n-1 + CALL MPI_ISEND(outval, 1, MPI_REAL, 0, 0, req, ierr) + CALL MPI_REQUEST_FREE(req, ierr) + CALL MPI_IRECV(inval, 1, MPI_REAL, 0, 0, req, ierr) + CALL MPI_WAIT(req, status, ierr) + END DO + CALL MPI_ISEND(outval, 1, MPI_REAL, 0, 0, req, ierr) + CALL MPI_WAIT(req, status) + END IF .fi .sp This routine is normally used to free persistent requests created with -either +either .I MPI_Recv_init -or +or .I MPI_Send_init and friends. However, it can be -used to free a request created with +used to free a request created with .I MPI_Irecv -or +or .I MPI_Isend and friends; in that case the use can not use the test/wait routines on the request. -It +It .B is permitted to free an active request. However, once freed, you can not -use the request in a wait or test routine (e.g., +use the request in a wait or test routine (e.g., .I MPI_Wait ). @@ -94,7 +94,7 @@ use the request in a wait or test routine (e.g., Almost all MPI routines return an error value; C routines as the value of the function and Fortran routines in the last argument. C++ functions do not return errors. If the default error handler is set to MPI::ERRORS_THROW_EXCEPTIONS, then on error the C++ exception mechanism will be used to throw an MPI::Exception object. .sp Before the error value is returned, the current MPI error handler is -called. By default, this error handler aborts the MPI job, except for I/O function errors. The error handler may be changed with MPI_Comm_set_errhandler; the predefined error handler MPI_ERRORS_RETURN may be used to cause error values to be returned. Note that MPI does not guarantee that an MPI program can continue past an error. +called. By default, this error handler aborts the MPI job, except for I/O function errors. The error handler may be changed with MPI_Comm_set_errhandler; the predefined error handler MPI_ERRORS_RETURN may be used to cause error values to be returned. Note that MPI does not guarantee that an MPI program can continue past an error. .SH SEE ALSO MPI_Isend @@ -132,4 +132,4 @@ MPI_Testany MPI_Testsome - + diff --git a/ompi/mpi/man/man3/MPI_Request_get_status.3in b/ompi/mpi/man/man3/MPI_Request_get_status.3in index 01bb9ae08ed..62809ca6ad6 100644 --- a/ompi/mpi/man/man3/MPI_Request_get_status.3in +++ b/ompi/mpi/man/man3/MPI_Request_get_status.3in @@ -5,7 +5,7 @@ .\" $COPYRIGHT$ .TH MPI_Request_get_status 3 "#OMPI_DATE#" "#PACKAGE_VERSION#" "#PACKAGE_NAME#" .SH NAME -\fBMPI_Request_get_status\fP \- Access information associated with a request without freeing the request. +\fBMPI_Request_get_status\fP \- Access information associated with a request without freeing the request. .SH SYNTAX .ft R @@ -19,7 +19,7 @@ int MPI_Request_get_status(MPI_Request \fIrequest\fP, int \fI*flag\fP, MPI_Statu .nf INCLUDE 'mpif.h' MPI_REQUEST_GET_STATUS(\fIREQUEST\fP, \fIFLAG\fP, \fISTATUS\fP, \fIIERROR\fP) - INTEGER REQUEST, STATUS(MPI_STATUS_SIZE), IERROR + INTEGER REQUEST, STATUS(MPI_STATUS_SIZE), IERROR LOGICAL FLAG .fi @@ -49,13 +49,13 @@ MPI_Status object if flag is true (status). .SH DESCRIPTION .ft R -MPI_Request_get_status sets \fIflag\fP=\fItrue\fP if the operation is complete or sets \fIflag\fP=\fIfalse\fP if it is not complete. If the operation is complete, it returns in \fIstatus\fP the request status. It does not deallocate or inactivate the request; a subsequent call to test, wait, or free should be executed with that request. +MPI_Request_get_status sets \fIflag\fP=\fItrue\fP if the operation is complete or sets \fIflag\fP=\fIfalse\fP if it is not complete. If the operation is complete, it returns in \fIstatus\fP the request status. It does not deallocate or inactivate the request; a subsequent call to test, wait, or free should be executed with that request. .sp -If your application does not need to examine the \fIstatus\fP field, you can save resources by using the predefined constant MPI_STATUS_IGNORE as a special value for the \fIstatus\fP argument. +If your application does not need to examine the \fIstatus\fP field, you can save resources by using the predefined constant MPI_STATUS_IGNORE as a special value for the \fIstatus\fP argument. .SH ERRORS Almost all MPI routines return an error value; C routines as the value of the function and Fortran routines in the last argument. C++ functions do not return errors. If the default error handler is set to MPI::ERRORS_THROW_EXCEPTIONS, then on error the C++ exception mechanism will be used to throw an MPI::Exception object. .sp Before the error value is returned, the current MPI error handler is -called. By default, this error handler aborts the MPI job, except for I/O function errors. The error handler may be changed with MPI_Comm_set_errhandler; the predefined error handler MPI_ERRORS_RETURN may be used to cause error values to be returned. Note that MPI does not guarantee that an MPI program can continue past an error. +called. By default, this error handler aborts the MPI job, except for I/O function errors. The error handler may be changed with MPI_Comm_set_errhandler; the predefined error handler MPI_ERRORS_RETURN may be used to cause error values to be returned. Note that MPI does not guarantee that an MPI program can continue past an error. diff --git a/ompi/mpi/man/man3/MPI_Rsend.3in b/ompi/mpi/man/man3/MPI_Rsend.3in index 58255c85d57..814321e0986 100644 --- a/ompi/mpi/man/man3/MPI_Rsend.3in +++ b/ompi/mpi/man/man3/MPI_Rsend.3in @@ -22,13 +22,13 @@ int MPI_Rsend(const void *\fIbuf\fP, int\fI count\fP, MPI_Datatype\fI datatype\f INCLUDE 'mpif.h' MPI_RSEND(\fIBUF, COUNT, DATATYPE, DEST, TAG, COMM, IERROR\fP) \fIBUF\fP(*) - INTEGER \fICOUNT, DATATYPE, DEST, TAG, COMM, IERROR\fP + INTEGER \fICOUNT, DATATYPE, DEST, TAG, COMM, IERROR\fP .fi .SH C++ Syntax .nf #include -void Comm::Rsend(const void* \fIbuf\fP, int \fIcount\fP, const Datatype& +void Comm::Rsend(const void* \fIbuf\fP, int \fIcount\fP, const Datatype& \fIdatatype\fP, int \fIdest\fP, int \fItag\fP) const .fi @@ -57,7 +57,7 @@ Communicator (handle). .ft R .TP 1i IERROR -Fortran only: Error status (integer). +Fortran only: Error status (integer). .SH DESCRIPTION .ft R @@ -69,5 +69,5 @@ ready send is called. Almost all MPI routines return an error value; C routines as the value of the function and Fortran routines in the last argument. C++ functions do not return errors. If the default error handler is set to MPI::ERRORS_THROW_EXCEPTIONS, then on error the C++ exception mechanism will be used to throw an MPI::Exception object. .sp Before the error value is returned, the current MPI error handler is -called. By default, this error handler aborts the MPI job, except for I/O function errors. The error handler may be changed with MPI_Comm_set_errhandler; the predefined error handler MPI_ERRORS_RETURN may be used to cause error values to be returned. Note that MPI does not guarantee that an MPI program can continue past an error. +called. By default, this error handler aborts the MPI job, except for I/O function errors. The error handler may be changed with MPI_Comm_set_errhandler; the predefined error handler MPI_ERRORS_RETURN may be used to cause error values to be returned. Note that MPI does not guarantee that an MPI program can continue past an error. diff --git a/ompi/mpi/man/man3/MPI_Rsend_init.3in b/ompi/mpi/man/man3/MPI_Rsend_init.3in index a64fffdcfc8..9a0d1e8ee42 100644 --- a/ompi/mpi/man/man3/MPI_Rsend_init.3in +++ b/ompi/mpi/man/man3/MPI_Rsend_init.3in @@ -23,13 +23,13 @@ INCLUDE 'mpif.h' MPI_RSEND_INIT(\fIBUF, COUNT, DATATYPE, DEST, TAG, COMM, REQUEST, IERROR\fP) \fIBUF\fP(*) - INTEGER \fICOUNT, DATATYPE, DEST, TAG, COMM, REQUEST, IERROR\fP + INTEGER \fICOUNT, DATATYPE, DEST, TAG, COMM, REQUEST, IERROR\fP .fi .SH C++ Syntax .nf #include -Prequest Comm::Rsend_init(const void* \fIbuf\fP, int \fIcount\fP, const +Prequest Comm::Rsend_init(const void* \fIbuf\fP, int \fIcount\fP, const Datatype& \fIdatatype\fP, int \fIdest\fP, int \fItag\fP) const .fi @@ -62,19 +62,19 @@ Communication request (handle). .ft R .TP 1i IERROR -Fortran only: Error status (integer). +Fortran only: Error status (integer). .SH DESCRIPTION .ft R -Creates a persistent communication object for a ready mode send operation, and binds to it all the arguments of a send operation. +Creates a persistent communication object for a ready mode send operation, and binds to it all the arguments of a send operation. .sp -A communication (send or receive) that uses a persistent request is initiated by the function MPI_Start. +A communication (send or receive) that uses a persistent request is initiated by the function MPI_Start. .SH ERRORS Almost all MPI routines return an error value; C routines as the value of the function and Fortran routines in the last argument. C++ functions do not return errors. If the default error handler is set to MPI::ERRORS_THROW_EXCEPTIONS, then on error the C++ exception mechanism will be used to throw an MPI::Exception object. .sp Before the error value is returned, the current MPI error handler is -called. By default, this error handler aborts the MPI job, except for I/O function errors. The error handler may be changed with MPI_Comm_set_errhandler; the predefined error handler MPI_ERRORS_RETURN may be used to cause error values to be returned. Note that MPI does not guarantee that an MPI program can continue past an error. +called. By default, this error handler aborts the MPI job, except for I/O function errors. The error handler may be changed with MPI_Comm_set_errhandler; the predefined error handler MPI_ERRORS_RETURN may be used to cause error values to be returned. Note that MPI does not guarantee that an MPI program can continue past an error. .SH SEE ALSO .ft R diff --git a/ompi/mpi/man/man3/MPI_Scan.3in b/ompi/mpi/man/man3/MPI_Scan.3in index b8b73813834..87b90dd3993 100644 --- a/ompi/mpi/man/man3/MPI_Scan.3in +++ b/ompi/mpi/man/man3/MPI_Scan.3in @@ -72,7 +72,7 @@ Request (handle, non-blocking only). .ft R .TP 1i IERROR -Fortran only: Error status (integer). +Fortran only: Error status (integer). .SH DESCRIPTION .ft R @@ -110,7 +110,7 @@ logical(j). The operator that produces this effect is where .sp ( u + v if i = j - w = ( + w = ( ( v if i != j .fi .sp @@ -122,7 +122,7 @@ given below. double val; int log; } SegScanPair; - + /* * the user-defined function */ @@ -131,7 +131,7 @@ given below. { int i; SegScanPair c; - + for (i = 0; i < *len; ++i) { if (in->log == inout->log) c.val = in->val + inout->val; @@ -152,36 +152,36 @@ we must be careful to specify that it is noncommutative, as in the following: .sp .nf - int i, base; - SeqScanPair a, answer; - MPI_Op myOp; - MPI_Datatype type[2] = {MPI_DOUBLE, MPI_INT}; - MPI_Aint disp[2]; - int blocklen[2] = {1, 1}; - MPI_Datatype sspair; + int i, base; + SeqScanPair a, answer; + MPI_Op myOp; + MPI_Datatype type[2] = {MPI_DOUBLE, MPI_INT}; + MPI_Aint disp[2]; + int blocklen[2] = {1, 1}; + MPI_Datatype sspair; /* - * explain to MPI how type SegScanPair is defined - */ - MPI_Get_address(a, disp); - MPI_Get_address(a.log, disp + 1); - base = disp[0]; + * explain to MPI how type SegScanPair is defined + */ + MPI_Get_address(a, disp); + MPI_Get_address(a.log, disp + 1); + base = disp[0]; for (i = 0; i < 2; ++i) - disp[i] -= base; - MPI_Type_struct(2, blocklen, disp, type, &sspair); + disp[i] -= base; + MPI_Type_struct(2, blocklen, disp, type, &sspair); MPI_Type_commit(&sspair); /* * create the segmented-scan user-op * noncommutative - set commute (arg 2) to 0 - */ + */ MPI_Op_create((MPI_User_function *)segScan, 0, &myOp); - \&... + \&... MPI_Scan(a, answer, 1, sspair, myOp, comm); .fi .SH USE OF IN-PLACE OPTION -When the communicator is an intracommunicator, you can perform a scanning operation in place (the output buffer is used as the input buffer). Use the variable MPI_IN_PLACE as the value of the \fIsendbuf\fR argument. The input data is taken from the receive buffer and replaced by the output data. +When the communicator is an intracommunicator, you can perform a scanning operation in place (the output buffer is used as the input buffer). Use the variable MPI_IN_PLACE as the value of the \fIsendbuf\fR argument. The input data is taken from the receive buffer and replaced by the output data. .SH NOTES ON COLLECTIVE OPERATIONS .ft R @@ -207,7 +207,7 @@ called. By default, this error handler aborts the MPI job, except for I/O function errors. The error handler may be changed with MPI_Comm_set_errhandler; the predefined error handler MPI_ERRORS_RETURN may be used to cause error values to be returned. Note that MPI does not -guarantee that an MPI program can continue past an error. +guarantee that an MPI program can continue past an error. .sp See the MPI man page for a full list of MPI error codes. diff --git a/ompi/mpi/man/man3/MPI_Scatter.3in b/ompi/mpi/man/man3/MPI_Scatter.3in index 48563b622f5..5fca42bdb7f 100644 --- a/ompi/mpi/man/man3/MPI_Scatter.3in +++ b/ompi/mpi/man/man3/MPI_Scatter.3in @@ -28,8 +28,8 @@ INCLUDE 'mpif.h' MPI_SCATTER(\fISENDBUF, SENDCOUNT, SENDTYPE, RECVBUF, RECVCOUNT, RECVTYPE, ROOT, COMM, IERROR\fP) \fISENDBUF(*), RECVBUF(*)\fP - INTEGER \fISENDCOUNT, SENDTYPE, RECVCOUNT, RECVTYPE, ROOT\fP - INTEGER \fICOMM, IERROR\fP + INTEGER \fISENDCOUNT, SENDTYPE, RECVCOUNT, RECVTYPE, ROOT\fP + INTEGER \fICOMM, IERROR\fP MPI_ISCATTER(\fISENDBUF, SENDCOUNT, SENDTYPE, RECVBUF, RECVCOUNT, RECVTYPE, ROOT, COMM, REQUEST, IERROR\fP) @@ -43,8 +43,8 @@ MPI_ISCATTER(\fISENDBUF, SENDCOUNT, SENDTYPE, RECVBUF, RECVCOUNT, #include void MPI::Comm::Scatter(const void* \fIsendbuf\fP, int \fIsendcount\fP, const MPI::Datatype& \fIsendtype\fP, void* \fIrecvbuf\fP, - int \fIrecvcount\fP, const MPI::Datatype& \fIrecvtype\fP, - int \fIroot\fP) const + int \fIrecvcount\fP, const MPI::Datatype& \fIrecvtype\fP, + int \fIroot\fP) const .fi .SH INPUT PARAMETERS @@ -58,7 +58,7 @@ Number of elements sent to each process (integer, significant only at root). .TP 1i sendtype -Datatype of send buffer elements (handle, significant only at root). +Datatype of send buffer elements (handle, significant only at root). .TP 1i recvcount Number of elements in receive buffer (integer). @@ -83,20 +83,20 @@ Request (handle, non-blocking only). .ft R .TP 1i IERROR -Fortran only: Error status (integer). +Fortran only: Error status (integer). .SH DESCRIPTION .ft R -MPI_Scatter is the inverse operation to MPI_Gather. +MPI_Scatter is the inverse operation to MPI_Gather. .sp -The outcome is as if the root executed n send operations, +The outcome is as if the root executed n send operations, .sp .nf MPI_Send(sendbuf + i * sendcount * extent(sendtype), sendcount, sendtype, i, \&...) .fi .sp -and each process executed a receive, +and each process executed a receive, .sp .nf MPI_Recv(recvbuf, recvcount, recvtype, i, \&...). @@ -107,7 +107,7 @@ MPI_Send(\fIsendbuf\fP, \fIsendcount\fP * \fIn\fP,\ \fIsendtype\fP, \&...). This into \fIn\fP equal segments, the ith segment is sent to the ith process in the group, and each process receives this message as above. .sp -The send buffer is ignored for all nonroot processes. +The send buffer is ignored for all nonroot processes. .sp The type signature associated with \fIsendcount\fP, \fIsendtype\fP at the root must be equal to the type signature associated with \fIrecvcount\fP, \fIrecvtype\fP at all @@ -129,36 +129,36 @@ to achieve symmetry with MPI_Gather, where the corresponding restriction (a multiple-write restriction) is necessary. .sp \fBExample:\fR The reverse of Example 1 in the MPI_Gather manpage. Scatter -sets of 100 ints from the root to each process in the group. +sets of 100 ints from the root to each process in the group. .sp .nf MPI_Comm comm; - int gsize,*sendbuf; - int root, rbuf[100]; - \&... - MPI_Comm_size(comm, &gsize); - sendbuf = (int *)malloc(gsize*100*sizeof(int)); - \&... - MPI_Scatter(sendbuf, 100, MPI_INT, rbuf, 100, + int gsize,*sendbuf; + int root, rbuf[100]; + \&... + MPI_Comm_size(comm, &gsize); + sendbuf = (int *)malloc(gsize*100*sizeof(int)); + \&... + MPI_Scatter(sendbuf, 100, MPI_INT, rbuf, 100, MPI_INT, root, comm); .fi .SH USE OF IN-PLACE OPTION -When the communicator is an intracommunicator, you can perform a gather operation in-place (the output buffer is used as the input buffer). Use the variable MPI_IN_PLACE as the value of the root process \fIrecvbuf\fR. In this case, \fIrecvcount\fR and \fIrecvtype\fR are ignored, and the root process sends no data to itself. +When the communicator is an intracommunicator, you can perform a scatter operation in-place (the output buffer is used as the input buffer). Use the variable MPI_IN_PLACE as the value of the root process \fIrecvbuf\fR. In this case, \fIrecvcount\fR and \fIrecvtype\fR are ignored, and the root process sends no data to itself. .sp Note that MPI_IN_PLACE is a special kind of value; it has the same restrictions on its use as MPI_BOTTOM. .sp -Because the in-place option converts the receive buffer into a send-and-receive buffer, a Fortran binding that includes INTENT must mark these as INOUT, not OUT. +Because the in-place option converts the receive buffer into a send-and-receive buffer, a Fortran binding that includes INTENT must mark these as INOUT, not OUT. .sp .SH WHEN COMMUNICATOR IS AN INTER-COMMUNICATOR .sp -When the communicator is an inter-communicator, the root process in the first group sends data to all processes in the second group. The first group defines the root process. That process uses MPI_ROOT as the value of its \fIroot\fR argument. The remaining processes use MPI_PROC_NULL as the value of their \fIroot\fR argument. All processes in the second group use the rank of that root process in the first group as the value of their \fIroot\fR argument. The receive buffer argument of the root process in the first group must be consistent with the receive buffer argument of the processes in the second group. -.sp +When the communicator is an inter-communicator, the root process in the first group sends data to all processes in the second group. The first group defines the root process. That process uses MPI_ROOT as the value of its \fIroot\fR argument. The remaining processes use MPI_PROC_NULL as the value of their \fIroot\fR argument. All processes in the second group use the rank of that root process in the first group as the value of their \fIroot\fR argument. The receive buffer argument of the root process in the first group must be consistent with the receive buffer argument of the processes in the second group. +.sp .SH ERRORS Almost all MPI routines return an error value; C routines as the value of the function and Fortran routines in the last argument. C++ functions do not return errors. If the default error handler is set to MPI::ERRORS_THROW_EXCEPTIONS, then on error the C++ exception mechanism will be used to throw an MPI::Exception object. .sp Before the error value is returned, the current MPI error handler is -called. By default, this error handler aborts the MPI job, except for I/O function errors. The error handler may be changed with MPI_Comm_set_errhandler; the predefined error handler MPI_ERRORS_RETURN may be used to cause error values to be returned. Note that MPI does not guarantee that an MPI program can continue past an error. +called. By default, this error handler aborts the MPI job, except for I/O function errors. The error handler may be changed with MPI_Comm_set_errhandler; the predefined error handler MPI_ERRORS_RETURN may be used to cause error values to be returned. Note that MPI does not guarantee that an MPI program can continue past an error. .SH SEE ALSO .ft R diff --git a/ompi/mpi/man/man3/MPI_Scatterv.3in b/ompi/mpi/man/man3/MPI_Scatterv.3in index 6eca5e9e574..ca922ec18dc 100644 --- a/ompi/mpi/man/man3/MPI_Scatterv.3in +++ b/ompi/mpi/man/man3/MPI_Scatterv.3in @@ -29,7 +29,7 @@ MPI_SCATTERV(\fISENDBUF, SENDCOUNTS, DISPLS, SENDTYPE, RECVBUF, RECVCOUNT, RECVTYPE, ROOT, COMM, IERROR\fP) \fISENDBUF(*), RECVBUF(*)\fP INTEGER \fISENDCOUNTS(*), DISPLS(*), SENDTYPE\fP - INTEGER \fIRECVCOUNT, RECVTYPE, ROOT, COMM, IERROR\fP + INTEGER \fIRECVCOUNT, RECVTYPE, ROOT, COMM, IERROR\fP MPI_ISCATTERV(\fISENDBUF, SENDCOUNTS, DISPLS, SENDTYPE, RECVBUF, RECVCOUNT, RECVTYPE, ROOT, COMM, REQUEST, IERROR\fP) @@ -44,7 +44,7 @@ MPI_ISCATTERV(\fISENDBUF, SENDCOUNTS, DISPLS, SENDTYPE, RECVBUF, void MPI::Comm::Scatterv(const void* \fIsendbuf\fP, const int \fIsendcounts\fP[], const int \fIdispls\fP[], const MPI::Datatype& \fIsendtype\fP, void* \fIrecvbuf\fP, int \fIrecvcount\fP, const MPI::Datatype& - \fIrecvtype\fP, int \fIroot\fP) const + \fIrecvtype\fP, int \fIroot\fP) const .fi .SH INPUT PARAMETERS @@ -87,11 +87,11 @@ Request (handle, non-blocking only). .ft R .TP 1i IERROR -Fortran only: Error status (integer). +Fortran only: Error status (integer). .SH DESCRIPTION .ft R -MPI_Scatterv is the inverse operation to MPI_Gatherv. +MPI_Scatterv is the inverse operation to MPI_Gatherv. .sp MPI_Scatterv extends the functionality of MPI_Scatter by allowing a varying count of data to be sent to each process, since \fIsendcounts\fP is now an array. @@ -128,81 +128,81 @@ location on the root to be read more than once. .sp \fBExample 1:\fR The reverse of Example 5 in the MPI_Gatherv manpage. We have a varying stride between blocks at sending (root) side, at the -receiving side we receive 100 - \fIi\fP elements into the \fIi\fPth column of a 100 x 150 C array at process \fIi\fP. +receiving side we receive 100 - \fIi\fP elements into the \fIi\fPth column of a 100 x 150 C array at process \fIi\fP. .sp .nf - MPI_Comm comm; - int gsize,recvarray[100][150],*rptr; - int root, *sendbuf, myrank, bufsize, *stride; - MPI_Datatype rtype; - int i, *displs, *scounts, offset; - \&... - MPI_Comm_size( comm, &gsize); - MPI_Comm_rank( comm, &myrank ); - - stride = (int *)malloc(gsize*sizeof(int)); - \&... - /* stride[i] for i = 0 to gsize-1 is set somehow - * sendbuf comes from elsewhere - */ - \&... - displs = (int *)malloc(gsize*sizeof(int)); - scounts = (int *)malloc(gsize*sizeof(int)); - offset = 0; - for (i=0; i= 100. +MPI_Scatterv, where \fIstride\fP >= 100. .sp .nf - MPI_Comm comm; - int gsize,*sendbuf; - int root, rbuf[100], i, *displs, *scounts; - - \&... - - MPI_Comm_size(comm, &gsize); - sendbuf = (int *)malloc(gsize*stride*sizeof(int)); - \&... - displs = (int *)malloc(gsize*sizeof(int)); - scounts = (int *)malloc(gsize*sizeof(int)); - for (i=0; i \fIBUF(*)\fP - INTEGER \fICOUNT, DATATYPE, DEST, TAG, COMM, IERROR\fP + INTEGER \fICOUNT, DATATYPE, DEST, TAG, COMM, IERROR\fP .fi .SH C++ Syntax .nf #include -void Comm::Send(const void* \fIbuf\fP, int \fIcount\fP, const Datatype& +void Comm::Send(const void* \fIbuf\fP, int \fIcount\fP, const Datatype& \fIdatatype\fP, int \fIdest\fP, int \fItag\fP) const .fi @@ -57,21 +57,21 @@ Communicator (handle). .ft R .TP 1i IERROR -Fortran only: Error status (integer). +Fortran only: Error status (integer). .SH DESCRIPTION .ft R -MPI_Send performs a standard-mode, blocking send. +MPI_Send performs a standard-mode, blocking send. .SH NOTE .ft R -This routine will block until the message is sent to the destination. For an in-depth explanation of the semantics of the standard-mode send, refer to the MPI-1 Standard. +This routine will block until the message is sent to the destination. For an in-depth explanation of the semantics of the standard-mode send, refer to the MPI-1 Standard. .SH ERRORS Almost all MPI routines return an error value; C routines as the value of the function and Fortran routines in the last argument. C++ functions do not return errors. If the default error handler is set to MPI::ERRORS_THROW_EXCEPTIONS, then on error the C++ exception mechanism will be used to throw an MPI::Exception object. .sp Before the error value is returned, the current MPI error handler is -called. By default, this error handler aborts the MPI job, except for I/O function errors. The error handler may be changed with MPI_Comm_set_errhandler; the predefined error handler MPI_ERRORS_RETURN may be used to cause error values to be returned. Note that MPI does not guarantee that an MPI program can continue past an error. +called. By default, this error handler aborts the MPI job, except for I/O function errors. The error handler may be changed with MPI_Comm_set_errhandler; the predefined error handler MPI_ERRORS_RETURN may be used to cause error values to be returned. Note that MPI does not guarantee that an MPI program can continue past an error. .SH SEE ALSO .ft R diff --git a/ompi/mpi/man/man3/MPI_Send_init.3in b/ompi/mpi/man/man3/MPI_Send_init.3in index 7b64db5815d..529e7e56813 100644 --- a/ompi/mpi/man/man3/MPI_Send_init.3in +++ b/ompi/mpi/man/man3/MPI_Send_init.3in @@ -24,14 +24,14 @@ MPI_SEND_INIT(\fIBUF, COUNT, DATATYPE, DEST, TAG, COMM, REQUEST, IERROR\fP) \fIBUF\fP(*) INTEGER \fIREQUEST, COUNT, DATATYPE, DEST, TAG\fP - INTEGER \fICOMM, REQUEST, IERROR\fP + INTEGER \fICOMM, REQUEST, IERROR\fP .fi .SH C++ Syntax .nf #include -Prequest Comm::Send_init(const void* \fIbuf\fP, int \fIcount\fP, const - Datatype& \fIdatatype\fP, int \fIdest\fP, int \fItag\fP) const +Prequest Comm::Send_init(const void* \fIbuf\fP, int \fIcount\fP, const + Datatype& \fIdatatype\fP, int \fIdest\fP, int \fItag\fP) const .fi .SH INPUT PARAMETERS @@ -63,19 +63,19 @@ Communication request (handle). .ft R .TP 1i IERROR -Fortran only: Error status (integer). +Fortran only: Error status (integer). .SH DESCRIPTION .ft R -Creates a persistent communication request for a standard mode send operation, and binds to it all the arguments of a send operation. +Creates a persistent communication request for a standard mode send operation, and binds to it all the arguments of a send operation. .sp -A communication (send or receive) that uses a persistent request is initiated by the function MPI_Start or MPI_Startall. +A communication (send or receive) that uses a persistent request is initiated by the function MPI_Start or MPI_Startall. .SH ERRORS Almost all MPI routines return an error value; C routines as the value of the function and Fortran routines in the last argument. C++ functions do not return errors. If the default error handler is set to MPI::ERRORS_THROW_EXCEPTIONS, then on error the C++ exception mechanism will be used to throw an MPI::Exception object. .sp Before the error value is returned, the current MPI error handler is -called. By default, this error handler aborts the MPI job, except for I/O function errors. The error handler may be changed with MPI_Comm_set_errhandler; the predefined error handler MPI_ERRORS_RETURN may be used to cause error values to be returned. Note that MPI does not guarantee that an MPI program can continue past an error. +called. By default, this error handler aborts the MPI job, except for I/O function errors. The error handler may be changed with MPI_Comm_set_errhandler; the predefined error handler MPI_ERRORS_RETURN may be used to cause error values to be returned. Note that MPI does not guarantee that an MPI program can continue past an error. .SH SEE ALSO .ft R diff --git a/ompi/mpi/man/man3/MPI_Sendrecv.3in b/ompi/mpi/man/man3/MPI_Sendrecv.3in index 2e3fe8814bd..6097ba5fdc6 100644 --- a/ompi/mpi/man/man3/MPI_Sendrecv.3in +++ b/ompi/mpi/man/man3/MPI_Sendrecv.3in @@ -15,7 +15,7 @@ #include int MPI_Sendrecv(const void *\fIsendbuf\fP, int\fI sendcount\fP, MPI_Datatype\fI sendtype\fP, int\fI dest\fP, int\fI sendtag\fP, void\fI *recvbuf\fP, int\fI recvcount\fP, - MPI_Datatype\fI recvtype\fP, int\fI source\fP, int\fI recvtag\fP, + MPI_Datatype\fI recvtype\fP, int\fI source\fP, int\fI recvtag\fP, MPI_Comm\fI comm\fP, MPI_Status\fI *status\fP) .fi @@ -28,21 +28,21 @@ MPI_SENDRECV(\fISENDBUF, SENDCOUNT, SENDTYPE, DEST, SENDTAG, \fISENDBUF(*), RECVBUF(*)\fP INTEGER \fISENDCOUNT, SENDTYPE, DEST, SENDTAG\fP INTEGER \fIRECVCOUNT, RECVTYPE, SOURCE, RECVTAG, COMM\fP - INTEGER \fISTATUS(MPI_STATUS_SIZE), IERROR\fP + INTEGER \fISTATUS(MPI_STATUS_SIZE), IERROR\fP .fi .SH C++ Syntax .nf #include -void Comm::Sendrecv(const void *\fIsendbuf\fP, int \fIsendcount\fP, const - Datatype& \fIsendtype\fP, int \fIdest\fP, int \fIsendtag\fP, void *\fIrecvbuf\fP, - int \fIrecvcount\fP, const Datatype& \fIrecvtype\fP, int \fIsource\fP, - int \fIrecvtag\fP, Status& \fIstatus\fP) const +void Comm::Sendrecv(const void *\fIsendbuf\fP, int \fIsendcount\fP, const + Datatype& \fIsendtype\fP, int \fIdest\fP, int \fIsendtag\fP, void *\fIrecvbuf\fP, + int \fIrecvcount\fP, const Datatype& \fIrecvtype\fP, int \fIsource\fP, + int \fIrecvtag\fP, Status& \fIstatus\fP) const -void Comm::Sendrecv(const void *\fIsendbuf\fP, int \fIsendcount\fP, const - Datatype& \fIsendtype\fP, int \fIdest\fP, int \fIsendtag\fP, void *\fIrecvbuf\fP, - int \fIrecvcount\fP, const Datatype& \fIrecvtype\fP, int \fIsource\fP, - int \fIrecvtag\fP) const +void Comm::Sendrecv(const void *\fIsendbuf\fP, int \fIsendcount\fP, const + Datatype& \fIsendtype\fP, int \fIdest\fP, int \fIsendtag\fP, void *\fIrecvbuf\fP, + int \fIrecvcount\fP, const Datatype& \fIrecvtype\fP, int \fIsource\fP, + int \fIrecvtag\fP) const .fi .SH INPUT PARAMETERS @@ -89,23 +89,23 @@ Status object (status). This refers to the receive operation. .ft R .TP 1i IERROR -Fortran only: Error status (integer). +Fortran only: Error status (integer). .SH DESCRIPTION .ft R The send-receive operations combine in one call the sending of a message to one destination and the receiving of another message, from another process. The two (source and destination) are possibly the same. A send-receive operation is useful for executing a shift operation across a chain of processes. If blocking sends and receives are used for such a shift, then one needs to order the sends and receives correctly (for example, even processes send, then receive; odd processes receive first, then send) in order to prevent cyclic dependencies that may lead to deadlock. When a send-receive operation is used, the communication subsystem takes care of these issues. The send-receive operation can be used in conjunction with the functions described in Chapter 6 of the MPI-1 Standard, "Process Topologies," in order to perform shifts on various logical topologies. Also, a send-receive operation is useful for implementing remote procedure calls. .sp -A message sent by a send-receive operation can be received by a regular receive operation or probed by a probe operation; a send-receive operation can receive a message sent by a regular send operation. +A message sent by a send-receive operation can be received by a regular receive operation or probed by a probe operation; a send-receive operation can receive a message sent by a regular send operation. .sp -MPI_Sendrecv executes a blocking send and receive operation. Both send and receive use the same communicator, but possibly different tags. The send buffer and receive buffers must be disjoint, and may have different lengths and datatypes. +MPI_Sendrecv executes a blocking send and receive operation. Both send and receive use the same communicator, but possibly different tags. The send buffer and receive buffers must be disjoint, and may have different lengths and datatypes. .sp -If your application does not need to examine the \fIstatus\fP field, you can save resources by using the predefined constant MPI_STATUS_IGNORE as a special value for the \fIstatus\fP argument. +If your application does not need to examine the \fIstatus\fP field, you can save resources by using the predefined constant MPI_STATUS_IGNORE as a special value for the \fIstatus\fP argument. .SH ERRORS Almost all MPI routines return an error value; C routines as the value of the function and Fortran routines in the last argument. C++ functions do not return errors. If the default error handler is set to MPI::ERRORS_THROW_EXCEPTIONS, then on error the C++ exception mechanism will be used to throw an MPI::Exception object. .sp Before the error value is returned, the current MPI error handler is -called. By default, this error handler aborts the MPI job, except for I/O function errors. The error handler may be changed with MPI_Comm_set_errhandler; the predefined error handler MPI_ERRORS_RETURN may be used to cause error values to be returned. Note that MPI does not guarantee that an MPI program can continue past an error. +called. By default, this error handler aborts the MPI job, except for I/O function errors. The error handler may be changed with MPI_Comm_set_errhandler; the predefined error handler MPI_ERRORS_RETURN may be used to cause error values to be returned. Note that MPI does not guarantee that an MPI program can continue past an error. .SH SEE ALSO .ft R diff --git a/ompi/mpi/man/man3/MPI_Sendrecv_replace.3in b/ompi/mpi/man/man3/MPI_Sendrecv_replace.3in index 73a35ec16b1..7d332fd1ecf 100644 --- a/ompi/mpi/man/man3/MPI_Sendrecv_replace.3in +++ b/ompi/mpi/man/man3/MPI_Sendrecv_replace.3in @@ -31,13 +31,13 @@ MPI_SENDRECV_REPLACE(\fIBUF, COUNT, DATATYPE, DEST, SENDTAG, SOURCE, .SH C++ Syntax .nf #include -void Comm::Sendrecv_replace(void* \fIbuf\fP, int \fIcount\fP, const - Datatype& \fIdatatype\fP, int \fIdest\fP, int \fIsendtag\fP, int \fIsource\fP, - int \fIrecvtag\fP, Status& \fIstatus\fP) const +void Comm::Sendrecv_replace(void* \fIbuf\fP, int \fIcount\fP, const + Datatype& \fIdatatype\fP, int \fIdest\fP, int \fIsendtag\fP, int \fIsource\fP, + int \fIrecvtag\fP, Status& \fIstatus\fP) const -void Comm::Sendrecv_replace(void* \fIbuf\fP, int \fIcount\fP, const - Datatype& \fIdatatype\fP, int \fIdest\fP, int \fIsendtag\fP, int \fIsource\fP, - int \fIrecvtag\fP) const +void Comm::Sendrecv_replace(void* \fIbuf\fP, int \fIcount\fP, const + Datatype& \fIdatatype\fP, int \fIdest\fP, int \fIsendtag\fP, int \fIsource\fP, + int \fIrecvtag\fP) const .fi .SH INPUT/OUTPUT PARAMETER @@ -77,23 +77,23 @@ status Status object (status). .TP 1i IERROR -Fortran only: Error status (integer). +Fortran only: Error status (integer). .SH DESCRIPTION .ft R The send-receive operations combine in one call the sending of a message to one destination and the receiving of another message, from another process. The two (source and destination) are possibly the same. A send-receive operation is useful for executing a shift operation across a chain of processes. If blocking sends and receives are used for such a shift, then one needs to order the sends and receives correctly (for example, even processes send, then receive; odd processes receive first, then send) in order to prevent cyclic dependencies that may lead to deadlock. When a send-receive operation is used, the communication subsystem takes care of these issues. The send-receive operation can be used in conjunction with the functions described in Chapter 6 of the MPI Standard, "Process Topologies," in order to perform shifts on various logical topologies. Also, a send-receive operation is useful for implementing remote procedure calls. .sp -A message sent by a send-receive operation can be received by a regular receive operation or probed by a probe operation; a send-receive operation can receive a message sent by a regular send operation. +A message sent by a send-receive operation can be received by a regular receive operation or probed by a probe operation; a send-receive operation can receive a message sent by a regular send operation. .sp MPI_Sendrecv_replace executes a blocking send and receive. The same buffer is used both for the send and for the receive, so that the message sent is replaced by the message received. .sp -The semantics of a send-receive operation is what would be obtained if the caller forked two concurrent threads, one to execute the send, and one to execute the receive, followed by a join of these two threads. +The semantics of a send-receive operation is what would be obtained if the caller forked two concurrent threads, one to execute the send, and one to execute the receive, followed by a join of these two threads. .SH ERRORS Almost all MPI routines return an error value; C routines as the value of the function and Fortran routines in the last argument. C++ functions do not return errors. If the default error handler is set to MPI::ERRORS_THROW_EXCEPTIONS, then on error the C++ exception mechanism will be used to throw an MPI::Exception object. .sp Before the error value is returned, the current MPI error handler is -called. By default, this error handler aborts the MPI job, except for I/O function errors. The error handler may be changed with MPI_Comm_set_errhandler; the predefined error handler MPI_ERRORS_RETURN may be used to cause error values to be returned. Note that MPI does not guarantee that an MPI program can continue past an error. +called. By default, this error handler aborts the MPI job, except for I/O function errors. The error handler may be changed with MPI_Comm_set_errhandler; the predefined error handler MPI_ERRORS_RETURN may be used to cause error values to be returned. Note that MPI does not guarantee that an MPI program can continue past an error. .SH SEE ALSO .ft R diff --git a/ompi/mpi/man/man3/MPI_Sizeof.3in b/ompi/mpi/man/man3/MPI_Sizeof.3in index 3dc19907f94..211eea85cc9 100644 --- a/ompi/mpi/man/man3/MPI_Sizeof.3in +++ b/ompi/mpi/man/man3/MPI_Sizeof.3in @@ -16,7 +16,7 @@ INCLUDE 'mpif.h' MPI_SIZEOF(\fIX, SIZE, IERROR\fP) \fIX\fP -INTEGER \fISIZE, IERROR\fP +INTEGER \fISIZE, IERROR\fP .fi .SH INPUT PARAMETER @@ -33,7 +33,7 @@ Size of machine representation of that type (integer). .ft R .TP 1i IERROR -Error status (integer). +Error status (integer). .SH DESCRIPTION .ft R @@ -59,7 +59,7 @@ called. By default, this error handler aborts the MPI job, except for I/O function errors. The error handler may be changed with MPI_Comm_set_errhandler; the predefined error handler MPI_ERRORS_RETURN may be used to cause error values to be returned. Note that MPI does not -guarantee that an MPI program can continue past an error. +guarantee that an MPI program can continue past an error. .sp See the MPI man page for a full list of MPI error codes. diff --git a/ompi/mpi/man/man3/MPI_Ssend.3in b/ompi/mpi/man/man3/MPI_Ssend.3in index 5bcd8c02950..6b29c5cfff2 100644 --- a/ompi/mpi/man/man3/MPI_Ssend.3in +++ b/ompi/mpi/man/man3/MPI_Ssend.3in @@ -28,7 +28,7 @@ MPI_SSEND(\fIBUF, COUNT, DATATYPE, DEST, TAG, COMM, IERROR\fP) .SH C++ Syntax .nf #include -void Comm::Ssend(const void* \fIbuf\fP, int \fIcount\fP, const Datatype& +void Comm::Ssend(const void* \fIbuf\fP, int \fIcount\fP, const Datatype& \fIdatatype\fP, int \fIdest\fP, int \fItag\fP) const .fi @@ -57,15 +57,15 @@ Communicator (handle). .ft R .TP 1i IERROR -Fortran only: Error status (integer). +Fortran only: Error status (integer). .SH DESCRIPTION .ft R -MPI_Ssend performs a synchronous-mode, blocking send. See the MPI-1 Standard for more detailed information about such sends. +MPI_Ssend performs a synchronous-mode, blocking send. See the MPI-1 Standard for more detailed information about such sends. .SH ERRORS Almost all MPI routines return an error value; C routines as the value of the function and Fortran routines in the last argument. C++ functions do not return errors. If the default error handler is set to MPI::ERRORS_THROW_EXCEPTIONS, then on error the C++ exception mechanism will be used to throw an MPI::Exception object. .sp Before the error value is returned, the current MPI error handler is -called. By default, this error handler aborts the MPI job, except for I/O function errors. The error handler may be changed with MPI_Comm_set_errhandler; the predefined error handler MPI_ERRORS_RETURN may be used to cause error values to be returned. Note that MPI does not guarantee that an MPI program can continue past an error. +called. By default, this error handler aborts the MPI job, except for I/O function errors. The error handler may be changed with MPI_Comm_set_errhandler; the predefined error handler MPI_ERRORS_RETURN may be used to cause error values to be returned. Note that MPI does not guarantee that an MPI program can continue past an error. diff --git a/ompi/mpi/man/man3/MPI_Ssend_init.3in b/ompi/mpi/man/man3/MPI_Ssend_init.3in index 743e0ffe1a8..f31ec9ba802 100644 --- a/ompi/mpi/man/man3/MPI_Ssend_init.3in +++ b/ompi/mpi/man/man3/MPI_Ssend_init.3in @@ -23,14 +23,14 @@ INCLUDE 'mpif.h' MPI_SSEND_INIT(\fIBUF, COUNT, DATATYPE, DEST, TAG, COMM, REQUEST, IERROR\fP) \fIBUF\fP(*) - INTEGER \fICOUNT, DATATYPE, DEST, TAG, COMM, REQUEST, IERROR\fP + INTEGER \fICOUNT, DATATYPE, DEST, TAG, COMM, REQUEST, IERROR\fP .fi .SH C++ Syntax .nf #include -Prequest Comm::Ssend_init(const void* \fIbuf\fP, int \fIcount\fP, const - Datatype& \fIdatatype\fP, int \fIdest\fP, int \fItag\fP) const +Prequest Comm::Ssend_init(const void* \fIbuf\fP, int \fIcount\fP, const + Datatype& \fIdatatype\fP, int \fIdest\fP, int \fItag\fP) const .fi .SH INPUT PARAMETERS @@ -62,19 +62,19 @@ Communication request (handle). .ft R .TP 1i IERROR -Fortran only: Error status (integer). +Fortran only: Error status (integer). .SH DESCRIPTION .ft R -Creates a persistent communication object for a synchronous mode send operation, and binds to it all the arguments of a send operation. +Creates a persistent communication object for a synchronous mode send operation, and binds to it all the arguments of a send operation. .sp -A communication (send or receive) that uses a persistent request is initiated by the function MPI_Start. +A communication (send or receive) that uses a persistent request is initiated by the function MPI_Start. .SH ERRORS Almost all MPI routines return an error value; C routines as the value of the function and Fortran routines in the last argument. C++ functions do not return errors. If the default error handler is set to MPI::ERRORS_THROW_EXCEPTIONS, then on error the C++ exception mechanism will be used to throw an MPI::Exception object. .sp Before the error value is returned, the current MPI error handler is -called. By default, this error handler aborts the MPI job, except for I/O function errors. The error handler may be changed with MPI_Comm_set_errhandler; the predefined error handler MPI_ERRORS_RETURN may be used to cause error values to be returned. Note that MPI does not guarantee that an MPI program can continue past an error. +called. By default, this error handler aborts the MPI job, except for I/O function errors. The error handler may be changed with MPI_Comm_set_errhandler; the predefined error handler MPI_ERRORS_RETURN may be used to cause error values to be returned. Note that MPI does not guarantee that an MPI program can continue past an error. .SH SEE ALSO .ft R diff --git a/ompi/mpi/man/man3/MPI_Start.3in b/ompi/mpi/man/man3/MPI_Start.3in index 22ad9ad28b4..09add808bb7 100644 --- a/ompi/mpi/man/man3/MPI_Start.3in +++ b/ompi/mpi/man/man3/MPI_Start.3in @@ -38,11 +38,11 @@ Communication request (handle). .ft R .TP 1i IERROR -Fortran only: Error status (integer). +Fortran only: Error status (integer). .SH DESCRIPTION .ft R -A communication (send or receive) that uses a persistent request is initiated by the function MPI_Start. +A communication (send or receive) that uses a persistent request is initiated by the function MPI_Start. .sp The argument, request, is a handle returned by one of the persistent communication-request initialization functions (MPI_Send_init, MPI_Bsend_init, MPI_Ssend_init, MPI_Rsend_init, MPI_Recv_init). The associated request should be inactive and becomes active once the call is made. .sp @@ -54,7 +54,7 @@ The call is local, with semantics similar to the nonblocking communication opera Almost all MPI routines return an error value; C routines as the value of the function and Fortran routines in the last argument. C++ functions do not return errors. If the default error handler is set to MPI::ERRORS_THROW_EXCEPTIONS, then on error the C++ exception mechanism will be used to throw an MPI::Exception object. .sp Before the error value is returned, the current MPI error handler is -called. By default, this error handler aborts the MPI job, except for I/O function errors. The error handler may be changed with MPI_Comm_set_errhandler; the predefined error handler MPI_ERRORS_RETURN may be used to cause error values to be returned. Note that MPI does not guarantee that an MPI program can continue past an error. +called. By default, this error handler aborts the MPI job, except for I/O function errors. The error handler may be changed with MPI_Comm_set_errhandler; the predefined error handler MPI_ERRORS_RETURN may be used to cause error values to be returned. Note that MPI does not guarantee that an MPI program can continue past an error. .SH SEE ALSO .ft R diff --git a/ompi/mpi/man/man3/MPI_Startall.3in b/ompi/mpi/man/man3/MPI_Startall.3in index 66e59cc4e37..94ba56f445e 100644 --- a/ompi/mpi/man/man3/MPI_Startall.3in +++ b/ompi/mpi/man/man3/MPI_Startall.3in @@ -19,13 +19,13 @@ int MPI_Startall(int \fIcount\fP, MPI_Request\fI array_of_requests[]\fP) .nf INCLUDE 'mpif.h' MPI_STARTALL(\fICOUNT, ARRAY_OF_REQUESTS, IERROR\fP) - INTEGER \fICOUNT, ARRAY_OF_REQUESTS(*), IERROR\fP + INTEGER \fICOUNT, ARRAY_OF_REQUESTS(*), IERROR\fP .fi .SH C++ Syntax .nf #include -static void Prequest::Startall(int \fIcount\fP, Prequest \fIarray_of_requests\fP[]) +static void Prequest::Startall(int \fIcount\fP, Prequest \fIarray_of_requests\fP[]) .fi .SH INPUT PARAMETER @@ -44,31 +44,31 @@ Array of requests (array of handle). .ft R .TP 1i IERROR -Fortran only: Error status (integer). +Fortran only: Error status (integer). .SH DESCRIPTION .ft R -Starts all communications associated with requests in array_of_requests. A call to MPI_Startall(count, array_of_requests) has the same effect as calls to MPI_Start (&array_of_requests[i]), executed for i=0 ,..., count-1, in some arbitrary order. +Starts all communications associated with requests in array_of_requests. A call to MPI_Startall(count, array_of_requests) has the same effect as calls to MPI_Start (&array_of_requests[i]), executed for i=0 ,..., count-1, in some arbitrary order. .sp A communication started with a call to MPI_Start or MPI_Startall is completed by a call to MPI_Wait, MPI_Test, or one of the derived functions MPI_Waitany, MPI_Testany, MPI_Waitall, MPI_Testall, MPI_Waitsome, MPI_Testsome (these are described in Section 3.7.5 of the MPI-1 Standard, "Multiple Completions"). The request becomes inactive after successful completion by such a call. The request is not deallocated, and it can be activated anew by another MPI_Start or MPI_Startall call. .sp -A persistent request is deallocated by a call to MPI_Request_free (see Section 3.7.3 of the MPI-1 Standard, "Communication Completion"). +A persistent request is deallocated by a call to MPI_Request_free (see Section 3.7.3 of the MPI-1 Standard, "Communication Completion"). .sp The call to MPI_Request_free can occur at any point in the program after the persistent request was created. However, the request will be deallocated only after it becomes inactive. Active receive requests should not be freed. Otherwise, it will not be possible to check that the receive has completed. It is preferable, in general, to free requests when they are inactive. If this rule is followed, then the persistent communication request functions will be invoked in a sequence of the form, .br .sp - Create (Start Complete)* Free + Create (Start Complete)* Free .br .sp where * indicates zero or more repetitions. If the same communication object is used in several concurrent threads, it is the user's responsibility to coordinate calls so that the correct sequence is obeyed. .sp -A send operation initiated with MPI_Start can be matched with any receive operation and, likewise, a receive operation initiated with MPI_Start can receive messages generated by any send operation. +A send operation initiated with MPI_Start can be matched with any receive operation and, likewise, a receive operation initiated with MPI_Start can receive messages generated by any send operation. .SH ERRORS Almost all MPI routines return an error value; C routines as the value of the function and Fortran routines in the last argument. C++ functions do not return errors. If the default error handler is set to MPI::ERRORS_THROW_EXCEPTIONS, then on error the C++ exception mechanism will be used to throw an MPI::Exception object. .sp Before the error value is returned, the current MPI error handler is -called. By default, this error handler aborts the MPI job, except for I/O function errors. The error handler may be changed with MPI_Comm_set_errhandler; the predefined error handler MPI_ERRORS_RETURN may be used to cause error values to be returned. Note that MPI does not guarantee that an MPI program can continue past an error. +called. By default, this error handler aborts the MPI job, except for I/O function errors. The error handler may be changed with MPI_Comm_set_errhandler; the predefined error handler MPI_ERRORS_RETURN may be used to cause error values to be returned. Note that MPI does not guarantee that an MPI program can continue past an error. .SH SEE ALSO .ft R diff --git a/ompi/mpi/man/man3/MPI_Status_f2c.3in b/ompi/mpi/man/man3/MPI_Status_f2c.3in index c2fa300d268..c28748be543 100644 --- a/ompi/mpi/man/man3/MPI_Status_f2c.3in +++ b/ompi/mpi/man/man3/MPI_Status_f2c.3in @@ -18,13 +18,13 @@ int MPI_Status_c2f(const MPI_Status \fI*c_status\fP, MPI_Fint \fI*f_status\fP) .fi .SH DESCRIPTION .ft R -These two procedures are provided in C to convert from a Fortran status (which is an array of integers) to a C status (which is a structure), and vice versa. The conversion occurs on all the information in \fIstatus\fP, including that which is hidden. That is, no status information is lost in the conversion. +These two procedures are provided in C to convert from a Fortran status (which is an array of integers) to a C status (which is a structure), and vice versa. The conversion occurs on all the information in \fIstatus\fP, including that which is hidden. That is, no status information is lost in the conversion. .sp -When using MPI_Status_f2c, if \fIf_status\fP is a valid Fortran status, but not the Fortran value of MPI_STATUS_IGNORE or MPI_STATUSES_IGNORE, then MPI_Status_f2c returns in \fIc_status\fP a valid C status with the same content. If \fIf_status\fP is the Fortran value of MPI_STATUS_IGNORE or MPI_STATUSES_IGNORE, or if \fIf_status\fP is not a valid Fortran status, then the call is erroneous. +When using MPI_Status_f2c, if \fIf_status\fP is a valid Fortran status, but not the Fortran value of MPI_STATUS_IGNORE or MPI_STATUSES_IGNORE, then MPI_Status_f2c returns in \fIc_status\fP a valid C status with the same content. If \fIf_status\fP is the Fortran value of MPI_STATUS_IGNORE or MPI_STATUSES_IGNORE, or if \fIf_status\fP is not a valid Fortran status, then the call is erroneous. .sp When using MPI_Status_c2f, the opposite conversion is applied. If \fIc_status\fP is MPI_STATUS_IGNORE or MPI_STATUSES_IGNORE, or if \fIc_status\fP is not a valid C status, then the call is erroneous. .sp -The C status has the same source, tag and error code values as the Fortran status, and returns the same answers when queried for count, elements, and cancellation. The conversion function may be called with a Fortran status argument that has an undefined error field, in which case the value of the error field in the C status argument is undefined. +The C status has the same source, tag and error code values as the Fortran status, and returns the same answers when queried for count, elements, and cancellation. The conversion function may be called with a Fortran status argument that has an undefined error field, in which case the value of the error field in the C status argument is undefined. .sp diff --git a/ompi/mpi/man/man3/MPI_Status_set_cancelled.3in b/ompi/mpi/man/man3/MPI_Status_set_cancelled.3in index 689168bd604..b27b183b51d 100644 --- a/ompi/mpi/man/man3/MPI_Status_set_cancelled.3in +++ b/ompi/mpi/man/man3/MPI_Status_set_cancelled.3in @@ -5,7 +5,7 @@ .\" $COPYRIGHT$ .TH MPI_Status_set_cancelled 3 "#OMPI_DATE#" "#PACKAGE_VERSION#" "#PACKAGE_NAME#" .SH NAME -\fBMPI_Status_set_cancelled\fP \- Sets \fIstatus\fP to indicate a request has been canceled. +\fBMPI_Status_set_cancelled\fP \- Sets \fIstatus\fP to indicate a request has been canceled. .SH SYNTAX .ft R @@ -45,19 +45,19 @@ If true, indicates request was canceled (logical). .ft R .TP 1i IERROR -Fortran only: Error status (integer). +Fortran only: Error status (integer). .SH DESCRIPTION .ft R -If \fIflag\fP is set to true, then a subsequent call to MPI_Test_cancelled(\fIstatus, flag\fP) will also return \fIflag\fP = true; otherwise it will return false. +If \fIflag\fP is set to true, then a subsequent call to MPI_Test_cancelled(\fIstatus, flag\fP) will also return \fIflag\fP = true; otherwise it will return false. .SH NOTES .ft R -Users are advised not to reuse the status fields for values other than those for which they were intended. Doing so may lead to unexpected results when using the status object. For example, calling MPI_Get_elements may cause an error if the value is out of range, or it may be impossible to detect such an error. The \fIextra_state\fP argument provided with a generalized request can be used to return information that does not logically belong in \fIstatus\fP. Furthermore, modifying the values in a status set internally by MPI, such as MPI_Recv, may lead to unpredictable results and is strongly discouraged. +Users are advised not to reuse the status fields for values other than those for which they were intended. Doing so may lead to unexpected results when using the status object. For example, calling MPI_Get_elements may cause an error if the value is out of range, or it may be impossible to detect such an error. The \fIextra_state\fP argument provided with a generalized request can be used to return information that does not logically belong in \fIstatus\fP. Furthermore, modifying the values in a status set internally by MPI, such as MPI_Recv, may lead to unpredictable results and is strongly discouraged. .SH ERRORS Almost all MPI routines return an error value; C routines as the value of the function and Fortran routines in the last argument. C++ functions do not return errors. If the default error handler is set to MPI::ERRORS_THROW_EXCEPTIONS, then on error the C++ exception mechanism will be used to throw an MPI::Exception object. .sp Before the error value is returned, the current MPI error handler is -called. By default, this error handler aborts the MPI job, except for I/O function errors. The error handler may be changed with MPI_Comm_set_errhandler; the predefined error handler MPI_ERRORS_RETURN may be used to cause error values to be returned. Note that MPI does not guarantee that an MPI program can continue past an error. +called. By default, this error handler aborts the MPI job, except for I/O function errors. The error handler may be changed with MPI_Comm_set_errhandler; the predefined error handler MPI_ERRORS_RETURN may be used to cause error values to be returned. Note that MPI does not guarantee that an MPI program can continue past an error. diff --git a/ompi/mpi/man/man3/MPI_Status_set_elements.3in b/ompi/mpi/man/man3/MPI_Status_set_elements.3in index b550aeec5b1..47a12c7bfcd 100644 --- a/ompi/mpi/man/man3/MPI_Status_set_elements.3in +++ b/ompi/mpi/man/man3/MPI_Status_set_elements.3in @@ -13,8 +13,8 @@ .SH C Syntax .nf #include -int MPI_Status_set_elements(MPI_Status *\fIstatus\fP, MPI_Datatype \fIdatatype\fP, int \fIcount\fP) -int MPI_Status_set_elements_x(MPI_Status *\fIstatus\fP, MPI_Datatype \fIdatatype\fP, MPI_Count \fIcount\fP) +int MPI_Status_set_elements(MPI_Status *\fIstatus\fP, MPI_Datatype \fIdatatype\fP, int \fIcount\fP) +int MPI_Status_set_elements_x(MPI_Status *\fIstatus\fP, MPI_Datatype \fIdatatype\fP, MPI_Count \fIcount\fP) .fi .SH Fortran Syntax @@ -53,23 +53,23 @@ Number of elements to associate with \fIstatus\fP (integer). .ft R .TP 1i IERROR -Fortran only: Error status (integer). +Fortran only: Error status (integer). .SH DESCRIPTION .ft R MPI_Status_set_elements modifies the opaque part of \fIstatus\fP so that a call to MPI_Get_elements or MPI_Get_elements_x will return \fIcount\fP. MPI_Get_count will return a compatible value. .sp -A subsequent call to MPI_Get_count(\fIstatus, datatype, count\fP), to MPI_Get_elements(\fIstatus, datatype, count\fP), or to MPI_Get_elements_x(\fIstatus, datatype, count\fP) must use a data-type argument that has the same type signature as the data-type argument that was used in the call to MPI_Status_set_elements. +A subsequent call to MPI_Get_count(\fIstatus, datatype, count\fP), to MPI_Get_elements(\fIstatus, datatype, count\fP), or to MPI_Get_elements_x(\fIstatus, datatype, count\fP) must use a data-type argument that has the same type signature as the data-type argument that was used in the call to MPI_Status_set_elements. .SH NOTES .ft R -Users are advised not to reuse the status fields for values other than those for which they were intended. Doing so may lead to unexpected results when using the status object. For example, calling MPI_Get_elements may cause an error if the value is out of range, or it may be impossible to detect such an error. The \fIextra_state\fP argument provided with a generalized request can be used to return information that does not logically belong in \fIstatus\fP. Furthermore, modifying the values in a status set internally by MPI, such as MPI_Recv, may lead to unpredictable results and is strongly discouraged. +Users are advised not to reuse the status fields for values other than those for which they were intended. Doing so may lead to unexpected results when using the status object. For example, calling MPI_Get_elements may cause an error if the value is out of range, or it may be impossible to detect such an error. The \fIextra_state\fP argument provided with a generalized request can be used to return information that does not logically belong in \fIstatus\fP. Furthermore, modifying the values in a status set internally by MPI, such as MPI_Recv, may lead to unpredictable results and is strongly discouraged. .SH ERRORS Almost all MPI routines return an error value; C routines as the value of the function and Fortran routines in the last argument. C++ functions do not return errors. If the default error handler is set to MPI::ERRORS_THROW_EXCEPTIONS, then on error the C++ exception mechanism will be used to throw an MPI::Exception object. .sp Before the error value is returned, the current MPI error handler is -called. By default, this error handler aborts the MPI job, except for I/O function errors. The error handler may be changed with MPI_Comm_set_errhandler; the predefined error handler MPI_ERRORS_RETURN may be used to cause error values to be returned. Note that MPI does not guarantee that an MPI program can continue past an error. +called. By default, this error handler aborts the MPI job, except for I/O function errors. The error handler may be changed with MPI_Comm_set_errhandler; the predefined error handler MPI_ERRORS_RETURN may be used to cause error values to be returned. Note that MPI does not guarantee that an MPI program can continue past an error. .SH FORTRAN 77 NOTES .ft R diff --git a/ompi/mpi/man/man3/MPI_T_pvar_get_info.3in b/ompi/mpi/man/man3/MPI_T_pvar_get_info.3in index abe6133cbbd..86e7dbaa7a1 100644 --- a/ompi/mpi/man/man3/MPI_T_pvar_get_info.3in +++ b/ompi/mpi/man/man3/MPI_T_pvar_get_info.3in @@ -87,7 +87,7 @@ behavior. The class returned in the \fIvar_class\fP parameter may be one of the .TP 2 MPI_T_PVAR_CLASS_STATE Variable represents a set of discrete states that may be described by an enumerator. Variables of this class -must be represented by an MPI_INT. The starting value is the current state of the variable. +must be represented by an MPI_INT. The starting value is the current state of the variable. .TP 2 MPI_T_PVAR_CLASS_LEVEL Variable represents the current utilization level of a resource. Variables of this class must be represented diff --git a/ompi/mpi/man/man3/MPI_T_pvar_start.3in b/ompi/mpi/man/man3/MPI_T_pvar_start.3in index d3a11c6f1eb..450638149aa 100644 --- a/ompi/mpi/man/man3/MPI_T_pvar_start.3in +++ b/ompi/mpi/man/man3/MPI_T_pvar_start.3in @@ -38,7 +38,7 @@ MPI_T_pvar_stop stops the performance variable with the handle specified in \fIh The special value MPI_T_PVAR_ALL_HANDLES can be passed in \fIhandle\fP to stop all non-continuous handles in the session specified in \fIsession\fP. -Continuous performance variables can neither be started nor stopped. +Continuous performance variables can neither be started nor stopped. .SH ERRORS .ft R diff --git a/ompi/mpi/man/man3/MPI_Test.3in b/ompi/mpi/man/man3/MPI_Test.3in index 7c4ec14572c..08adf28a86e 100644 --- a/ompi/mpi/man/man3/MPI_Test.3in +++ b/ompi/mpi/man/man3/MPI_Test.3in @@ -20,7 +20,7 @@ int MPI_Test(MPI_Request *\fIrequest\fP, int\fI *flag\fP, MPI_Status\fI *status\ INCLUDE 'mpif.h' MPI_TEST(\fIREQUEST, FLAG, STATUS, IERROR\fP) LOGICAL \fIFLAG\fP - INTEGER \fIREQUEST, STATUS(MPI_STATUS_SIZE), IERROR\fP + INTEGER \fIREQUEST, STATUS(MPI_STATUS_SIZE), IERROR\fP .fi .SH C++ Syntax @@ -48,15 +48,15 @@ Status object (status). .ft R .TP 1i IERROR -Fortran only: Error status (integer). +Fortran only: Error status (integer). .SH DESCRIPTION .ft R -A call to MPI_Test returns flag = true if the operation identified by request is complete. In such a case, the status object is set to contain information on the completed operation; if the communication object was created by a nonblocking send or receive, then it is deallocated and the request handle is set to MPI_REQUEST_NULL. The call returns flag = false, otherwise. In this case, the value of the status object is undefined. MPI_Test is a local operation. +A call to MPI_Test returns flag = true if the operation identified by request is complete. In such a case, the status object is set to contain information on the completed operation; if the communication object was created by a nonblocking send or receive, then it is deallocated and the request handle is set to MPI_REQUEST_NULL. The call returns flag = false, otherwise. In this case, the value of the status object is undefined. MPI_Test is a local operation. .sp The return status object for a receive operation carries information that can be accessed as described in Section 3.2.5 of the MPI-1 Standard, "Return Status." The status object for a send operation carries information that can be accessed by a call to MPI_Test_cancelled (see Section 3.8 of the MPI-1 Standard, "Probe and Cancel"). .sp -If your application does not need to examine the \fIstatus\fP field, you can save resources by using the predefined constant MPI_STATUS_IGNORE as a special value for the \fIstatus\fP argument. +If your application does not need to examine the \fIstatus\fP field, you can save resources by using the predefined constant MPI_STATUS_IGNORE as a special value for the \fIstatus\fP argument. .sp One is allowed to call MPI_Test with a null or inactive \fIrequest\fP argument. In such a case the operation returns with \fIflag\fP = true and empty \fIstatus\fP. .sp @@ -64,7 +64,7 @@ The functions MPI_Wait and MPI_Test can be used to complete both sends and receives. .SH NOTES -The use of the nonblocking MPI_Test call allows the user to schedule alternative activities within a single thread of execution. An event-driven thread scheduler can be emulated with periodic calls to MPI_Test. +The use of the nonblocking MPI_Test call allows the user to schedule alternative activities within a single thread of execution. An event-driven thread scheduler can be emulated with periodic calls to MPI_Test. .SH ERRORS Almost all MPI routines return an error value; C routines as the value of the function and Fortran routines in the last argument. C++ functions do not return errors. If the default error handler is set to MPI::ERRORS_THROW_EXCEPTIONS, then on error the C++ exception mechanism will be used to throw an MPI::Exception object. diff --git a/ompi/mpi/man/man3/MPI_Test_cancelled.3in b/ompi/mpi/man/man3/MPI_Test_cancelled.3in index c8d979de0e0..e227f937c0b 100644 --- a/ompi/mpi/man/man3/MPI_Test_cancelled.3in +++ b/ompi/mpi/man/man3/MPI_Test_cancelled.3in @@ -21,7 +21,7 @@ int MPI_Test_cancelled(const MPI_Status *\fIstatus\fP, int \fI*flag\fP) INCLUDE 'mpif.h' MPI_TEST_CANCELLED(\fISTATUS, FLAG, IERROR\fP) LOGICAL \fIFLAG\fP - INTEGER \fISTATUS(MPI_STATUS_SIZE), IERROR\fP + INTEGER \fISTATUS(MPI_STATUS_SIZE), IERROR\fP .fi .SH C++ Syntax @@ -40,11 +40,11 @@ Status object (status). .ft R .TP 1i flag -True if operation was cancelled (logical). +True if operation was cancelled (logical). .ft R .TP 1i IERROR -Fortran only: Error status (integer). +Fortran only: Error status (integer). .SH DESCRIPTION .ft R @@ -52,13 +52,13 @@ Returns \fIflag\fP = true if the communication associated with the status object was canceled successfully. In such a case, all other fields of status (such as \fIcount\fP or \fItag\fP) are undefined. Otherwise, returns \fIflag\fP = false. If a receive operation might be canceled, one should call MPI_Test_cancelled first, to check whether the operation was canceled, before checking on the other fields of the return status. .SH NOTES -Cancel can be an expensive operation that should be used only exceptionally. +Cancel can be an expensive operation that should be used only exceptionally. .SH ERRORS Almost all MPI routines return an error value; C routines as the value of the function and Fortran routines in the last argument. C++ functions do not return errors. If the default error handler is set to MPI::ERRORS_THROW_EXCEPTIONS, then on error the C++ exception mechanism will be used to throw an MPI::Exception object. .sp Before the error value is returned, the current MPI error handler is -called. By default, this error handler aborts the MPI job, except for I/O function errors. The error handler may be changed with MPI_Comm_set_errhandler; the predefined error handler MPI_ERRORS_RETURN may be used to cause error values to be returned. Note that MPI does not guarantee that an MPI program can continue past an error. +called. By default, this error handler aborts the MPI job, except for I/O function errors. The error handler may be changed with MPI_Comm_set_errhandler; the predefined error handler MPI_ERRORS_RETURN may be used to cause error values to be returned. Note that MPI does not guarantee that an MPI program can continue past an error. diff --git a/ompi/mpi/man/man3/MPI_Testall.3in b/ompi/mpi/man/man3/MPI_Testall.3in index 0462e09e5bc..87ba0a08546 100644 --- a/ompi/mpi/man/man3/MPI_Testall.3in +++ b/ompi/mpi/man/man3/MPI_Testall.3in @@ -29,8 +29,8 @@ MPI_TESTALL(\fICOUNT, ARRAY_OF_REQUESTS, FLAG, ARRAY_OF_STATUSES, .SH C++ Syntax .nf #include -static bool Request::Testall(int \fIcount\fP, Request - \fIarray_of_requests\fP[], Status \fIarray_of_statuses\fP[]) +static bool Request::Testall(int \fIcount\fP, Request + \fIarray_of_requests\fP[], Status \fIarray_of_statuses\fP[]) static bool Request::Testall(int \fIcount\fP, Request \fIarray_of_requests\fP[]) @@ -55,7 +55,7 @@ Array of status objects (array of status). .ft R .TP 1i IERROR -Fortran only: Error status (integer). +Fortran only: Error status (integer). .SH DESCRIPTION .ft R @@ -63,7 +63,7 @@ Returns \fIflag\fP = true if all communications associated with active handles i .sp Otherwise, \fIflag\fP = false is returned, no request is modified and the values of the status entries are undefined. This is a local operation. .sp -If your application does not need to examine the \fIarray_of_statuses\fP field, you can save resources by using the predefined constant MPI_STATUSES_IGNORE can be used as a special value for the \fIarray_of_statuses\fP argument. +If your application does not need to examine the \fIarray_of_statuses\fP field, you can save resources by using the predefined constant MPI_STATUSES_IGNORE can be used as a special value for the \fIarray_of_statuses\fP argument. .sp Errors that occurred during the execution of MPI_Testall are handled in the same manner as errors in MPI_Waitall. diff --git a/ompi/mpi/man/man3/MPI_Testany.3in b/ompi/mpi/man/man3/MPI_Testany.3in index e307f24c403..31465e6afd3 100644 --- a/ompi/mpi/man/man3/MPI_Testany.3in +++ b/ompi/mpi/man/man3/MPI_Testany.3in @@ -4,14 +4,14 @@ .\" $COPYRIGHT$ .TH MPI_Testany 3 "#OMPI_DATE#" "#PACKAGE_VERSION#" "#PACKAGE_NAME#" .SH NAME -\fBMPI_Testany\fP \- Tests for completion of any one previously initiated communication in a list. +\fBMPI_Testany\fP \- Tests for completion of any one previously initiated communication in a list. .SH SYNTAX .ft R .SH C Syntax .nf #include -int MPI_Testany(int \fIcount\fP, MPI_Request\fI array_of_requests[]\fP, +int MPI_Testany(int \fIcount\fP, MPI_Request\fI array_of_requests[]\fP, int\fI *index\fP, int\fI *flag\fP, MPI_Status\fI *status\fP) .fi @@ -19,19 +19,19 @@ int MPI_Testany(int \fIcount\fP, MPI_Request\fI array_of_requests[]\fP, .nf INCLUDE 'mpif.h' MPI_TESTANY(\fICOUNT, ARRAY_OF_REQUESTS, INDEX, FLAG, STATUS, IERROR\fP) - LOGICAL \fIFLAG\fP - INTEGER \fICOUNT, ARRAY_OF_REQUESTS(*), INDEX\fP - INTEGER \fISTATUS(MPI_STATUS_SIZE), IERROR\fP + LOGICAL \fIFLAG\fP + INTEGER \fICOUNT, ARRAY_OF_REQUESTS(*), INDEX\fP + INTEGER \fISTATUS(MPI_STATUS_SIZE), IERROR\fP .fi .SH C++ Syntax .nf #include -static bool Request::Testany(int \fIcount\fP, Request \fIarray_of_requests\fP[], - int& \fIindex\fP, Status& \fIstatus\fP) +static bool Request::Testany(int \fIcount\fP, Request \fIarray_of_requests\fP[], + int& \fIindex\fP, Status& \fIstatus\fP) -static bool Request::Testany(int \fIcount\fP, Request \fIarray_of_requests\fP[], - int& \fIindex\fP) +static bool Request::Testany(int \fIcount\fP, Request \fIarray_of_requests\fP[], + int& \fIindex\fP) .fi .SH INPUT PARAMETERS @@ -58,7 +58,7 @@ Status object (status). .ft R .TP 1i IERROR -Fortran only: Error status (integer). +Fortran only: Error status (integer). .SH DESCRIPTION .ft R @@ -69,7 +69,7 @@ The array may contain null or inactive handles. If the array contains no active If the array of requests contains active handles then the execution of MPI_Testany(count, array_of_requests, index, status) has the same effect as the execution of MPI_Test(&\fIarray_of_requests[i\fP], \fIflag\fP, \fIstatus\fP), for \fIi\fP=0,1,...,count-1, in some arbitrary order, until one call returns \fIflag\fP = true, or all fail. In the former case, \fIindex\fP is set to the last value of \fIi\fP, and in the latter case, it is set to MPI_UNDEFINED. MPI_Testany with an array containing one active entry is equivalent to MPI_Test. .sp -If your application does not need to examine the \fIstatus\fP field, you can save resources by using the predefined constant MPI_STATUS_IGNORE as a special value for the \fIstatus\fP argument. +If your application does not need to examine the \fIstatus\fP field, you can save resources by using the predefined constant MPI_STATUS_IGNORE as a special value for the \fIstatus\fP argument. .SH ERRORS Almost all MPI routines return an error value; C routines as the value of the function and Fortran routines in the last argument. C++ functions do not return errors. If the default error handler is set to MPI::ERRORS_THROW_EXCEPTIONS, then on error the C++ exception mechanism will be used to throw an MPI::Exception object. diff --git a/ompi/mpi/man/man3/MPI_Testsome.3in b/ompi/mpi/man/man3/MPI_Testsome.3in index 767ff8f31ef..5b79c586fb9 100644 --- a/ompi/mpi/man/man3/MPI_Testsome.3in +++ b/ompi/mpi/man/man3/MPI_Testsome.3in @@ -1,11 +1,11 @@ .\" -*- nroff -*- .\" Copyright 2006-2008 Sun Microsystems, Inc. .\" Copyright (c) 1996 Thinking Machines Corporation -.\" Copyright (c) 2011 Cisco Systems, Inc. All rights reserved. +.\" Copyright (c) 2011-2015 Cisco Systems, Inc. All rights reserved. .\" $COPYRIGHT$ .TH MPI_Testsome 3 "#OMPI_DATE#" "#PACKAGE_VERSION#" "#PACKAGE_NAME#" .SH NAME -\fBMPI_Testsome\fP \- Tests for completion of one or more previously initiated communications in a list. +\fBMPI_Testsome\fP \- Tests for completion of one or more previously initiated communications in a list. .SH SYNTAX .ft R @@ -13,7 +13,8 @@ .nf #include int MPI_Testsome(int \fIincount\fP, MPI_Request \fIarray_of_requests[]\fP, - int\fI *outcount\fP, int\fI array_of_indices[]\fP, MPI_Status\fI array_of_statuses[]\fP) + int\fI *outcount\fP, int\fI array_of_indices[]\fP, + MPI_Status\fI array_of_statuses[]\fP) .fi .SH Fortran Syntax @@ -23,17 +24,18 @@ MPI_TESTSOME(\fIINCOUNT, ARRAY_OF_REQUESTS, OUTCOUNT, ARRAY_OF_INDICES, ARRAY_OF_STATUSES, IERROR\fP) INTEGER \fIINCOUNT, ARRAY_OF_REQUESTS(*)\fP INTEGER \fIOUTCOUNT, ARRAY_OF_INDICES(*)\fP - INTEGER \fIARRAY_OF_STATUSES(MPI_STATUS_SIZE,*), IERROR\fP + INTEGER \fIARRAY_OF_STATUSES(MPI_STATUS_SIZE,*), IERROR\fP .fi .SH C++ Syntax .nf #include -static int Request::Testsome(int \fIincount\fP, Request - \fIarray_of_requests\fP[], int \fIarray_of_indices\fP[], Status \fIarray_of_statuses\fP[]) +static int Request::Testsome(int \fIincount\fP, Request + \fIarray_of_requests\fP[], int \fIarray_of_indices\fP[], + Status \fIarray_of_statuses\fP[]) -static int Request::Testsome(int \fIincount\fP, Request - \fIarray_of_requests\fP[], int \fIarray_of_indices\fP[]) +static int Request::Testsome(int \fIincount\fP, Request + \fIarray_of_requests\fP[], int \fIarray_of_indices\fP[]) .fi .SH INPUT PARAMETERS @@ -59,24 +61,37 @@ Array of status objects for operations that completed (array of status). .ft R .TP 1i IERROR -Fortran only: Error status (integer). +Fortran only: Error status (integer). .SH DESCRIPTION .ft R -Behaves like MPI_Waitsome, except that it returns immediately. If no operation has completed it returns outcount = 0. If there is no active handle in the list, it returns outcount = MPI_UNDEFINED. +Behaves like MPI_Waitsome, except that it returns immediately. +.sp +Returns in outcount the number of requests from the list +array_of_requests that have completed. Returns in the first outcount +locations of the array array_of_indices the indices of these +operations (index within the array array_of_requests; the array is +indexed from 0 in C and from 1 in Fortran). Returns in the first +outcount locations of the array array_of_status the status for these +completed operations. If a request that completed was allocated by a +nonblocking communication call, then it is deallocated, and the +associated handle is set to MPI_REQUEST_NULL. +.sp +If no operation has completed it returns outcount = 0. If there is no +active handle in the list, it returns outcount = MPI_UNDEFINED. .sp MPI_Testsome is a local operation, which returns immediately, whereas MPI_Waitsome blocks until a communication completes, if it was passed a list that contains at least one active handle. Both calls fulfill a fairness requirement: If a request for a receive repeatedly appears in a list of requests passed to MPI_Waitsome or MPI_Testsome, and a matching send has been posted, then the receive will eventually succeed unless the send is satisfied by another receive; send requests also fulfill this fairness requirement. .sp Errors that occur during the execution of MPI_Testsome are handled as for -MPI_Waitsome. +MPI_Waitsome. .sp -If your application does not need to examine the \fIarray_of_statuses\fP field, you can save resources by using the predefined constant MPI_STATUSES_IGNORE can be used as a special value for the \fIarray_of_statuses\fP argument. +If your application does not need to examine the \fIarray_of_statuses\fP field, you can save resources by using the predefined constant MPI_STATUSES_IGNORE can be used as a special value for the \fIarray_of_statuses\fP argument. .SH NOTES The use of MPI_Testsome is likely to be more -efficient than the use of MPI_Testany. The former returns information on all completed communications; with the latter, a new call is required for each communication that completes. +efficient than the use of MPI_Testany. The former returns information on all completed communications; with the latter, a new call is required for each communication that completes. .sp -A server with multiple clients can use MPI_Waitsome so as not to starve any client. Clients send messages to the server with service requests. The server calls MPI_Waitsome with one receive request for each client, then handles all receives that have completed. If a call to MPI_Waitany is used instead, then one client could starve while requests from another client always sneak in first. +A server with multiple clients can use MPI_Waitsome so as not to starve any client. Clients send messages to the server with service requests. The server calls MPI_Waitsome with one receive request for each client, then handles all receives that have completed. If a call to MPI_Waitany is used instead, then one client could starve while requests from another client always sneak in first. .SH ERRORS For each invocation of MPI_Testsome, if one or more requests generate diff --git a/ompi/mpi/man/man3/MPI_Topo_test.3in b/ompi/mpi/man/man3/MPI_Topo_test.3in index 438e7f7e0ef..ff4cdabed7d 100644 --- a/ompi/mpi/man/man3/MPI_Topo_test.3in +++ b/ompi/mpi/man/man3/MPI_Topo_test.3in @@ -42,7 +42,7 @@ Topology type of communicator comm (choice). .ft R .TP 1i IERROR -Fortran only: Error status (integer). +Fortran only: Error status (integer). .SH DESCRIPTION .ft R @@ -53,6 +53,7 @@ The output value \fItop_type\fP is one of the following: .nf MPI_GRAPH graph topology MPI_CART Cartesian topology + MPI_DIST_GRAPH distributed graph topology MPI_UNDEFINED no topology .fi @@ -60,7 +61,7 @@ The output value \fItop_type\fP is one of the following: Almost all MPI routines return an error value; C routines as the value of the function and Fortran routines in the last argument. C++ functions do not return errors. If the default error handler is set to MPI::ERRORS_THROW_EXCEPTIONS, then on error the C++ exception mechanism will be used to throw an MPI::Exception object. .sp Before the error value is returned, the current MPI error handler is -called. By default, this error handler aborts the MPI job, except for I/O function errors. The error handler may be changed with MPI_Comm_set_errhandler; the predefined error handler MPI_ERRORS_RETURN may be used to cause error values to be returned. Note that MPI does not guarantee that an MPI program can continue past an error. +called. By default, this error handler aborts the MPI job, except for I/O function errors. The error handler may be changed with MPI_Comm_set_errhandler; the predefined error handler MPI_ERRORS_RETURN may be used to cause error values to be returned. Note that MPI does not guarantee that an MPI program can continue past an error. .SH SEE ALSO MPI_Graph_create diff --git a/ompi/mpi/man/man3/MPI_Type_commit.3in b/ompi/mpi/man/man3/MPI_Type_commit.3in index 362cc4a133c..b556e4f266a 100644 --- a/ompi/mpi/man/man3/MPI_Type_commit.3in +++ b/ompi/mpi/man/man3/MPI_Type_commit.3in @@ -19,7 +19,7 @@ int MPI_Type_commit(MPI_Datatype *\fIdatatype\fP) .nf INCLUDE 'mpif.h' MPI_TYPE_COMMIT(\fIDATATYPE, IERROR\fP) - INTEGER \fIDATATYPE, IERROR\fP + INTEGER \fIDATATYPE, IERROR\fP .fi .SH C++ Syntax @@ -38,20 +38,20 @@ Data type (handle). .ft R .TP 1i IERROR -Fortran only: Error status (integer). +Fortran only: Error status (integer). .SH DESCRIPTION .ft R -The commit operation commits the data type. A data type is the formal description of a communication buffer, not the content of that buffer. After a data type has been committed, it can be repeatedly reused to communicate the changing content of a buffer or, indeed, the content of different buffers, with different starting addresses. +The commit operation commits the data type. A data type is the formal description of a communication buffer, not the content of that buffer. After a data type has been committed, it can be repeatedly reused to communicate the changing content of a buffer or, indeed, the content of different buffers, with different starting addresses. .sp -\fBExample:\fP The following Fortran code fragment gives examples of using MPI_Type_commit. +\fBExample:\fP The following Fortran code fragment gives examples of using MPI_Type_commit. .sp .nf - INTEGER type1, type2 - CALL MPI_TYPE_CONTIGUOUS(5, MPI_REAL, type1, ierr) - ! new type object created - CALL MPI_TYPE_COMMIT(type1, ierr) - ! now type1 can be used for communication + INTEGER type1, type2 + CALL MPI_TYPE_CONTIGUOUS(5, MPI_REAL, type1, ierr) + ! new type object created + CALL MPI_TYPE_COMMIT(type1, ierr) + ! now type1 can be used for communication .fi .sp If the data type specified in \fIdatatype\fP is already committed, it is equivalent to a no-op. @@ -60,5 +60,5 @@ If the data type specified in \fIdatatype\fP is already committed, it is equival Almost all MPI routines return an error value; C routines as the value of the function and Fortran routines in the last argument. C++ functions do not return errors. If the default error handler is set to MPI::ERRORS_THROW_EXCEPTIONS, then on error the C++ exception mechanism will be used to throw an MPI::Exception object. .sp Before the error value is returned, the current MPI error handler is -called. By default, this error handler aborts the MPI job, except for I/O function errors. The error handler may be changed with MPI_Comm_set_errhandler; the predefined error handler MPI_ERRORS_RETURN may be used to cause error values to be returned. Note that MPI does not guarantee that an MPI program can continue past an error. +called. By default, this error handler aborts the MPI job, except for I/O function errors. The error handler may be changed with MPI_Comm_set_errhandler; the predefined error handler MPI_ERRORS_RETURN may be used to cause error values to be returned. Note that MPI does not guarantee that an MPI program can continue past an error. diff --git a/ompi/mpi/man/man3/MPI_Type_contiguous.3in b/ompi/mpi/man/man3/MPI_Type_contiguous.3in index 64b0d4f4bcf..cbe055ed855 100644 --- a/ompi/mpi/man/man3/MPI_Type_contiguous.3in +++ b/ompi/mpi/man/man3/MPI_Type_contiguous.3in @@ -20,7 +20,7 @@ int MPI_Type_contiguous(int \fIcount\fP, MPI_Datatype\fI oldtype\fP, .nf INCLUDE 'mpif.h' MPI_TYPE_CONTIGUOUS(\fICOUNT, OLDTYPE, NEWTYPE, IERROR\fP) - INTEGER \fICOUNT, OLDTYPE, NEWTYPE, IERROR\fP + INTEGER \fICOUNT, OLDTYPE, NEWTYPE, IERROR\fP .fi .SH C++ Syntax @@ -32,49 +32,49 @@ Datatype Datatype::Create_contiguous(int \fIcount\fP) const .SH INPUT PARAMETERS .ft R .TP 1i -count +count Replication count (nonnegative integer). .TP 1i -oldtype +oldtype Old datatype (handle). .sp .SH OUTPUT PARAMETERS .ft R .TP 1i -newtype +newtype New datatype (handle). .ft R .TP 1i IERROR -Fortran only: Error status (integer). +Fortran only: Error status (integer). .SH DESCRIPTION .ft R -The simplest datatype constructor is MPI_Type_contiguous, which allows replication of a datatype into contiguous locations. +The simplest datatype constructor is MPI_Type_contiguous, which allows replication of a datatype into contiguous locations. .sp -\fInewtype\fP is the datatype obtained by concatenating \fIcount\fP copies of \fIoldtype\fP. Concatenation is defined using the extent of \fIoldtype\fP as the size of the concatenated copies. +\fInewtype\fP is the datatype obtained by concatenating \fIcount\fP copies of \fIoldtype\fP. Concatenation is defined using the extent of \fIoldtype\fP as the size of the concatenated copies. .sp -\fBExample:\fR Let oldtype have type map {(double, 0), (char, 8)}, with extent 16, and let count = 3. The type map of the datatype returned by newtype is +\fBExample:\fR Let oldtype have type map {(double, 0), (char, 8)}, with extent 16, and let count = 3. The type map of the datatype returned by newtype is .sp .nf - {(double, 0), (char, 8), (double, 16), (char, 24), + {(double, 0), (char, 8), (double, 16), (char, 24), (double, 32), (char, 40)]; .fi .sp i.e., alternating double and char elements, with displacements 0, 8, 16, 24, 32, 40. .sp -In general, assume that the type map of oldtype is +In general, assume that the type map of oldtype is .sp .nf {(type(0), disp(0)),...,(type(n-1), disp(n-1))}, .fi .sp -with extent ex. Then newtype has a type map with count times n entries defined by: +with extent ex. Then newtype has a type map with count times n entries defined by: .sp .nf {(type(0), disp(0)), ...,(type(n-1), disp(n-1)), - (type(0), disp(0) + ex), ...,(type(n-1), - disp(n-1) + ex), ...,(type(0), disp(0) + ex * (count - 1)), + (type(0), disp(0) + ex), ...,(type(n-1), + disp(n-1) + ex), ...,(type(0), disp(0) + ex * (count - 1)), ...,(type(n-1), disp(n-1) + ex * (count - 1))}. .fi .sp @@ -84,5 +84,5 @@ For more information about derived datatypes, see Section 3.12 of the MPI-1 Stan Almost all MPI routines return an error value; C routines as the value of the function and Fortran routines in the last argument. C++ functions do not return errors. If the default error handler is set to MPI::ERRORS_THROW_EXCEPTIONS, then on error the C++ exception mechanism will be used to throw an MPI::Exception object. .sp Before the error value is returned, the current MPI error handler is -called. By default, this error handler aborts the MPI job, except for I/O function errors. The error handler may be changed with MPI_Comm_set_errhandler; the predefined error handler MPI_ERRORS_RETURN may be used to cause error values to be returned. Note that MPI does not guarantee that an MPI program can continue past an error. +called. By default, this error handler aborts the MPI job, except for I/O function errors. The error handler may be changed with MPI_Comm_set_errhandler; the predefined error handler MPI_ERRORS_RETURN may be used to cause error values to be returned. Note that MPI does not guarantee that an MPI program can continue past an error. diff --git a/ompi/mpi/man/man3/MPI_Type_create_darray.3in b/ompi/mpi/man/man3/MPI_Type_create_darray.3in index abc08f47db0..4efdd79aa51 100644 --- a/ompi/mpi/man/man3/MPI_Type_create_darray.3in +++ b/ompi/mpi/man/man3/MPI_Type_create_darray.3in @@ -13,7 +13,7 @@ .SH C Syntax .nf #include -int MPI_Type_create_darray(int \fIsize\fP, int \fIrank\fP, int \fIndims\fP, +int MPI_Type_create_darray(int \fIsize\fP, int \fIrank\fP, int \fIndims\fP, const int \fIarray_of_gsizes\fP[], const int \fIarray_of_distribs\fP[], const int \fIarray_of_dargs\fP[], const int \fIarray_of_psizes\fP[], int \fIorder\fP, MPI_Datatype \fIoldtype\fP, MPI_Datatype \fI*newtype\fP) @@ -22,21 +22,21 @@ int MPI_Type_create_darray(int \fIsize\fP, int \fIrank\fP, int \fIndims\fP, .SH Fortran Syntax .nf INCLUDE 'mpif.h' -MPI_TYPE_CREATE_DARRAY(\fISIZE, RANK, NDIMS, ARRAY_OF_GSIZES, - ARRAY_OF_DISTRIBS, ARRAY_OF_DARGS, ARRAY_OF_PSIZES, ORDER, +MPI_TYPE_CREATE_DARRAY(\fISIZE, RANK, NDIMS, ARRAY_OF_GSIZES, + ARRAY_OF_DISTRIBS, ARRAY_OF_DARGS, ARRAY_OF_PSIZES, ORDER, OLDTYPE, NEWTYPE, IERROR\fP) INTEGER \fISIZE, RANK, NDIMS, ARRAY_OF_GSIZES(*), ARRAY_OF_DISTRIBS(*), - ARRAY_OF_DARGS(*), ARRAY_OF_PSIZES(*), ORDER, OLDTYPE, + ARRAY_OF_DARGS(*), ARRAY_OF_PSIZES(*), ORDER, OLDTYPE, NEWTYPE, IERROR\fP .fi .SH C++ Syntax .nf #include -MPI::Datatype MPI::Datatype::Create_darray(int \fIsize\fP, int \fIrank\fP, - int \fIndims\fP, const int \fIarray_of_gsizes\fP[], - const int \fIarray_of_distribs\fP[], const int \fIarray_of_dargs\fP[], +MPI::Datatype MPI::Datatype::Create_darray(int \fIsize\fP, int \fIrank\fP, + int \fIndims\fP, const int \fIarray_of_gsizes\fP[], + const int \fIarray_of_distribs\fP[], const int \fIarray_of_dargs\fP[], const int \fIarray_of_psizes\fP[], int \fIorder\fP) const .fi @@ -54,7 +54,7 @@ Number of array dimensions as well as process grid dimensions (positive integer) .sp .TP 1i array_of_gsizes -Number of elements of type \fIoldtype\fP in each dimension of global array (array of positive integers). +Number of elements of type \fIoldtype\fP in each dimension of global array (array of positive integers). .sp .TP 1i array_of_distribs @@ -81,12 +81,12 @@ newtype New data type (handle). .TP 1i IERROR -Fortran only: Error status (integer). +Fortran only: Error status (integer). .SH DESCRIPTION .ft R -MPI_Type_create_darray can be used to generate the data types corresponding to the distribution of an ndims-dimensional array of \fIoldtype\fP elements onto an \fIndims\fP-dimensional grid of logical processes. Unused dimensions of \fIarray_of_psizes\fP should be set to 1. For a call to MPI_Type_create_darray to be correct, the equation +MPI_Type_create_darray can be used to generate the data types corresponding to the distribution of an ndims-dimensional array of \fIoldtype\fP elements onto an \fIndims\fP-dimensional grid of logical processes. Unused dimensions of \fIarray_of_psizes\fP should be set to 1. For a call to MPI_Type_create_darray to be correct, the equation .sp .nf \fIndims\fP-1 @@ -95,25 +95,25 @@ MPI_Type_create_darray can be used to generate the data types corresponding to t .fi .sp -must be satisfied. The ordering of processes in the process grid is assumed to be row-major, as in the case of virtual Cartesian process topologies in MPI-1. +must be satisfied. The ordering of processes in the process grid is assumed to be row-major, as in the case of virtual Cartesian process topologies in MPI-1. .sp -Each dimension of the array can be distributed in one of three ways: +Each dimension of the array can be distributed in one of three ways: .sp .nf -- MPI_DISTRIBUTE_BLOCK - Block distribution -- MPI_DISTRIBUTE_CYCLIC - Cyclic distribution -- MPI_DISTRIBUTE_NONE - Dimension not distributed. +- MPI_DISTRIBUTE_BLOCK - Block distribution +- MPI_DISTRIBUTE_CYCLIC - Cyclic distribution +- MPI_DISTRIBUTE_NONE - Dimension not distributed. .fi .sp The constant MPI_DISTRIBUTE_DFLT_DARG specifies a default distribution argument. The distribution argument for a dimension that is not distributed is ignored. For any dimension \fIi\fP in which the distribution is MPI_DISTRIBUTE_BLOCK, it erroneous to specify \fIarray_of_dargs[i]\fP \fI*\fP \fIarray_of_psizes[i]\fP < \fIarray_of_gsizes[i]\fP. .sp -For example, the HPF layout ARRAY(CYCLIC(15)) corresponds to MPI_DISTRIBUTE_CYCLIC with a distribution argument of 15, and the HPF layout ARRAY(BLOCK) corresponds to MPI_DISTRIBUTE_BLOCK with a distribution argument of MPI_DISTRIBUTE_DFLT_DARG. +For example, the HPF layout ARRAY(CYCLIC(15)) corresponds to MPI_DISTRIBUTE_CYCLIC with a distribution argument of 15, and the HPF layout ARRAY(BLOCK) corresponds to MPI_DISTRIBUTE_BLOCK with a distribution argument of MPI_DISTRIBUTE_DFLT_DARG. .sp -The \fIorder\fP argument is used as in MPI_TYPE_CREATE_SUBARRAY to specify the storage order. Therefore, arrays described by this type constructor may be stored in Fortran (column-major) or C (row-major) order. Valid values for order are MPI_ORDER_FORTRAN and MPI_ORDER_C. +The \fIorder\fP argument is used as in MPI_TYPE_CREATE_SUBARRAY to specify the storage order. Therefore, arrays described by this type constructor may be stored in Fortran (column-major) or C (row-major) order. Valid values for order are MPI_ORDER_FORTRAN and MPI_ORDER_C. .sp -This routine creates a new MPI data type with a typemap defined in terms of a function called "cyclic()" (see below). +This routine creates a new MPI data type with a typemap defined in terms of a function called "cyclic()" (see below). .sp -Without loss of generality, it suffices to define the typemap for the MPI_DISTRIBUTE_CYCLIC case where MPI_DISTRIBUTE_DFLT_DARG is not used. +Without loss of generality, it suffices to define the typemap for the MPI_DISTRIBUTE_CYCLIC case where MPI_DISTRIBUTE_DFLT_DARG is not used. .sp MPI_DISTRIBUTE_BLOCK and MPI_DISTRIBUTE_NONE can be reduced to the MPI_DISTRIBUTE_CYCLIC case for dimension \fIi\fP as follows. .sp @@ -132,12 +132,12 @@ Finally, MPI_DISTRIBUTE_CYCLIC with \fIarray_of_dargs[i]\fP equal to MPI_DISTRIB .SH NOTES .ft R -For both Fortran and C arrays, the ordering of processes in the process grid is assumed to be row-major. This is consistent with the ordering used in virtual Cartesian process topologies in MPI-1. To create such virtual process topologies, or to find the coordinates of a process in the process grid, etc., users may use the corresponding functions provided in MPI-1. +For both Fortran and C arrays, the ordering of processes in the process grid is assumed to be row-major. This is consistent with the ordering used in virtual Cartesian process topologies in MPI-1. To create such virtual process topologies, or to find the coordinates of a process in the process grid, etc., users may use the corresponding functions provided in MPI-1. .SH ERRORS Almost all MPI routines return an error value; C routines as the value of the function and Fortran routines in the last argument. C++ functions do not return errors. If the default error handler is set to MPI::ERRORS_THROW_EXCEPTIONS, then on error the C++ exception mechanism will be used to throw an MPI::Exception object. .sp Before the error value is returned, the current MPI error handler is -called. By default, this error handler aborts the MPI job, except for I/O function errors. The error handler may be changed with MPI_Comm_set_errhandler; the predefined error handler MPI_ERRORS_RETURN may be used to cause error values to be returned. Note that MPI does not guarantee that an MPI program can continue past an error. +called. By default, this error handler aborts the MPI job, except for I/O function errors. The error handler may be changed with MPI_Comm_set_errhandler; the predefined error handler MPI_ERRORS_RETURN may be used to cause error values to be returned. Note that MPI does not guarantee that an MPI program can continue past an error. diff --git a/ompi/mpi/man/man3/MPI_Type_create_f90_complex.3in b/ompi/mpi/man/man3/MPI_Type_create_f90_complex.3in index dccc72d1b6f..b99d34069f2 100644 --- a/ompi/mpi/man/man3/MPI_Type_create_f90_complex.3in +++ b/ompi/mpi/man/man3/MPI_Type_create_f90_complex.3in @@ -49,7 +49,7 @@ newtype New data type (handle). .TP 1i IERROR -Fortran only: Error status (integer). +Fortran only: Error status (integer). .SH DESCRIPTION .ft R @@ -123,7 +123,7 @@ called. By default, this error handler aborts the MPI job, except for I/O function errors. The error handler may be changed with MPI_Comm_set_errhandler; the predefined error handler MPI_ERRORS_RETURN may be used to cause error values to be returned. Note that MPI does not -guarantee that an MPI program can continue past an error. +guarantee that an MPI program can continue past an error. .sp See the MPI man page for a full list of MPI error codes. diff --git a/ompi/mpi/man/man3/MPI_Type_create_f90_integer.3in b/ompi/mpi/man/man3/MPI_Type_create_f90_integer.3in index 3ecb3edab17..e989ebbb2db 100644 --- a/ompi/mpi/man/man3/MPI_Type_create_f90_integer.3in +++ b/ompi/mpi/man/man3/MPI_Type_create_f90_integer.3in @@ -45,7 +45,7 @@ newtype New data type (handle). .TP 1i IERROR -Fortran only: Error status (integer). +Fortran only: Error status (integer). .SH DESCRIPTION .ft R @@ -116,7 +116,7 @@ called. By default, this error handler aborts the MPI job, except for I/O function errors. The error handler may be changed with MPI_Comm_set_errhandler; the predefined error handler MPI_ERRORS_RETURN may be used to cause error values to be returned. Note that MPI does not -guarantee that an MPI program can continue past an error. +guarantee that an MPI program can continue past an error. .sp See the MPI man page for a full list of MPI error codes. diff --git a/ompi/mpi/man/man3/MPI_Type_create_f90_real.3in b/ompi/mpi/man/man3/MPI_Type_create_f90_real.3in index 99d2e254138..784027af2d7 100644 --- a/ompi/mpi/man/man3/MPI_Type_create_f90_real.3in +++ b/ompi/mpi/man/man3/MPI_Type_create_f90_real.3in @@ -48,7 +48,7 @@ newtype New data type (handle). .TP 1i IERROR -Fortran only: Error status (integer). +Fortran only: Error status (integer). .SH DESCRIPTION .ft R @@ -122,7 +122,7 @@ called. By default, this error handler aborts the MPI job, except for I/O function errors. The error handler may be changed with MPI_Comm_set_errhandler; the predefined error handler MPI_ERRORS_RETURN may be used to cause error values to be returned. Note that MPI does not -guarantee that an MPI program can continue past an error. +guarantee that an MPI program can continue past an error. .sp See the MPI man page for a full list of MPI error codes. diff --git a/ompi/mpi/man/man3/MPI_Type_create_hvector.3in b/ompi/mpi/man/man3/MPI_Type_create_hvector.3in index e83db8a4cb3..5738354ee3e 100644 --- a/ompi/mpi/man/man3/MPI_Type_create_hvector.3in +++ b/ompi/mpi/man/man3/MPI_Type_create_hvector.3in @@ -5,21 +5,21 @@ .\" $COPYRIGHT$ .TH MPI_Type_create_hvector 3 "#OMPI_DATE#" "#PACKAGE_VERSION#" "#PACKAGE_NAME#" .SH NAME -\fBMPI_Type_create_hvector\fP \- Creates a vector (strided) data type with offset in bytes. +\fBMPI_Type_create_hvector\fP \- Creates a vector (strided) data type with offset in bytes. .SH SYNTAX .ft R .SH C Syntax .nf #include -int MPI_Type_create_hvector(int \fIcount\fP, int \fIblocklength\fP, - MPI_Aint \fIstride\fP, MPI_Datatype \fIoldtype\fP, MPI_Datatype *\fInewtype\fP) +int MPI_Type_create_hvector(int \fIcount\fP, int \fIblocklength\fP, + MPI_Aint \fIstride\fP, MPI_Datatype \fIoldtype\fP, MPI_Datatype *\fInewtype\fP) .fi .SH Fortran Syntax (see FORTRAN 77 NOTES) .nf INCLUDE 'mpif.h' -MPI_TYPE_CREATE_HVECTOR(\fICOUNT, BLOCKLENGTH, STRIDE, OLDTYPE, +MPI_TYPE_CREATE_HVECTOR(\fICOUNT, BLOCKLENGTH, STRIDE, OLDTYPE, NEWTYPE, IERROR\fP) INTEGER \fICOUNT, BLOCKLENGTH, OLDTYPE, NEWTYPE, IERROR\fP @@ -29,8 +29,8 @@ MPI_TYPE_CREATE_HVECTOR(\fICOUNT, BLOCKLENGTH, STRIDE, OLDTYPE, .SH C++ Syntax .nf #include -MPI::Datatype MPI::Datatype::Create_hvector(int \fIcount\fP, - int \fIblocklength\fP, MPI::Aint \fIstride\fP) const +MPI::Datatype MPI::Datatype::Create_hvector(int \fIcount\fP, + int \fIblocklength\fP, MPI::Aint \fIstride\fP) const .fi .SH INPUT PARAMETERS @@ -55,10 +55,10 @@ newtype New data type (handle). .TP 1i IERROR -Fortran only: Error status (integer). +Fortran only: Error status (integer). .SH DESCRIPTION -MPI_Type_create_hvector creates a vector (strided) data type with offset in bytes. +MPI_Type_create_hvector creates a vector (strided) data type with offset in bytes. .PP NOTE \- This routine replaces MPI_Type_hvector, which is deprecated. See the man page MPI_Type_hvector(3) for information about that routine. @@ -79,7 +79,7 @@ and gives the length of the declared integer in bytes. Almost all MPI routines return an error value; C routines as the value of the function and Fortran routines in the last argument. C++ functions do not return errors. If the default error handler is set to MPI::ERRORS_THROW_EXCEPTIONS, then on error the C++ exception mechanism will be used to throw an MPI::Exception object. .sp Before the error value is returned, the current MPI error handler is -called. By default, this error handler aborts the MPI job, except for I/O function errors. The error handler may be changed with MPI_Comm_set_errhandler; the predefined error handler MPI_ERRORS_RETURN may be used to cause error values to be returned. Note that MPI does not guarantee that an MPI program can continue past an error. +called. By default, this error handler aborts the MPI job, except for I/O function errors. The error handler may be changed with MPI_Comm_set_errhandler; the predefined error handler MPI_ERRORS_RETURN may be used to cause error values to be returned. Note that MPI does not guarantee that an MPI program can continue past an error. .SH SEE ALSO .ft R diff --git a/ompi/mpi/man/man3/MPI_Type_create_indexed_block.3in b/ompi/mpi/man/man3/MPI_Type_create_indexed_block.3in index 0da8c481813..71cac9a4b9f 100644 --- a/ompi/mpi/man/man3/MPI_Type_create_indexed_block.3in +++ b/ompi/mpi/man/man3/MPI_Type_create_indexed_block.3in @@ -21,7 +21,7 @@ int MPI_Type_create_hindexed_block(int \fIcount\fP, int \fIblocklength\fP, const .SH Fortran Syntax .nf INCLUDE 'mpif.h' -MPI_TYPE_CREATE_INDEXED_BLOCK(\fICOUNT, BLOCKLENGTH, +MPI_TYPE_CREATE_INDEXED_BLOCK(\fICOUNT, BLOCKLENGTH, ARRAY_OF_DISPLACEMENTS, OLDTYPE, NEWTYPE, IERROR\fP) INTEGER \fICOUNT, BLOCKLENGTH, ARRAY_OF_DISPLACEMENTS(*), OLDTYPE, NEWTYPE, IERROR \fP @@ -64,7 +64,7 @@ newtype New data type (handle). .TP 1i IERROR -Fortran only: Error status (integer). +Fortran only: Error status (integer). .SH DESCRIPTION .ft R @@ -74,7 +74,7 @@ MPI_Type_create_indexed_block and MPI_Type_create_hindexed_block create an index Almost all MPI routines return an error value; C routines as the value of the function and Fortran routines in the last argument. C++ functions do not return errors. If the default error handler is set to MPI::ERRORS_THROW_EXCEPTIONS, then on error the C++ exception mechanism will be used to throw an MPI::Exception object. .sp Before the error value is returned, the current MPI error handler is -called. By default, this error handler aborts the MPI job, except for I/O function errors. The error handler may be changed with MPI_Comm_set_errhandler; the predefined error handler MPI_ERRORS_RETURN may be used to cause error values to be returned. Note that MPI does not guarantee that an MPI program can continue past an error. +called. By default, this error handler aborts the MPI job, except for I/O function errors. The error handler may be changed with MPI_Comm_set_errhandler; the predefined error handler MPI_ERRORS_RETURN may be used to cause error values to be returned. Note that MPI does not guarantee that an MPI program can continue past an error. .SH SEE ALSO .ft R diff --git a/ompi/mpi/man/man3/MPI_Type_create_keyval.3in b/ompi/mpi/man/man3/MPI_Type_create_keyval.3in index 758267df326..14e664ff606 100644 --- a/ompi/mpi/man/man3/MPI_Type_create_keyval.3in +++ b/ompi/mpi/man/man3/MPI_Type_create_keyval.3in @@ -5,22 +5,22 @@ .\" $COPYRIGHT$ .TH MPI_Type_create_keyval 3 "#OMPI_DATE#" "#PACKAGE_VERSION#" "#PACKAGE_NAME#" .SH NAME -\fBMPI_Type_create_keyval\fP \- Generates a new attribute key for caching on data types. +\fBMPI_Type_create_keyval\fP \- Generates a new attribute key for caching on data types. .SH SYNTAX .ft R .SH C Syntax .nf #include -int MPI_Type_create_keyval(MPI_Type_copy_attr_function *\fItype_copy_attr_fn\fP, - MPI_Type_delete_attr_function *\fItype_delete_attr_fn\fP, +int MPI_Type_create_keyval(MPI_Type_copy_attr_function *\fItype_copy_attr_fn\fP, + MPI_Type_delete_attr_function *\fItype_delete_attr_fn\fP, int *\fItype_keyval\fP, void *\fIextra_state\fP) .fi .SH Fortran Syntax (see FORTRAN 77 NOTES) .nf INCLUDE 'mpif.h' -MPI_TYPE_CREATE_KEYVAL(\fITYPE_COPY_ATTR_FN, TYPE_DELETE_ATTR_FN, +MPI_TYPE_CREATE_KEYVAL(\fITYPE_COPY_ATTR_FN, TYPE_DELETE_ATTR_FN, TYPE_KEYVAL, EXTRA_STATE, IERROR\fP) EXTERNAL \fITYPE_COPY_ATTR_FN, TYPE_DELETE_ATTR_FN\fP INTEGER \fITYPE_KEYVAL, IERROR \fP @@ -54,58 +54,58 @@ type_keyval Key value for future access (integer). .TP 1i IERROR -Fortran only: Error status (integer). +Fortran only: Error status (integer). .SH DESCRIPTION .ft R -MPI_Type_create_keyval generates a new attribute key for caching on data types. This routine partially replaces MPI_Keyval_create. +MPI_Type_create_keyval generates a new attribute key for caching on data types. This routine partially replaces MPI_Keyval_create. .sp The argument \fItype_copy_attr_fn\fP may be specified as MPI_TYPE_NULL_COPY_FN or MPI_TYPE_DUP_FN from C, C++, or Fortran. MPI_TYPE_NULL_COPY_FN is a function that does nothing other than returning \fIflag\fP = 0 and MPI_SUCCESS. MPI_TYPE_DUP_FN is a simple-minded copy function that sets \fIflag\fP = 1, returns the value of \fIattribute_val_in\fP in \fIattribute_val_out\fP, and returns MPI_SUCCESS. .sp The argument \fItype_delete_attr_fn\fP may be specified as MPI_TYPE_NULL_DELETE_FN from C, C++, or Fortran. MPI_TYPE_NULL_DELETE_FN is a function that does nothing beyond returning MPI_SUCCESS. -The C callback functions are: +The C callback functions are: .sp .nf -typedef int MPI_Type_copy_attr_function(MPI_Datatype \fIoldtype\fP, - int \fItype_keyval\fP, void *\fIextra_state\fP, void *\fIattribute_val_in\fP, - void *\fIattribute_val_out\fP, int *\fIflag\fP); +typedef int MPI_Type_copy_attr_function(MPI_Datatype \fIoldtype\fP, + int \fItype_keyval\fP, void *\fIextra_state\fP, void *\fIattribute_val_in\fP, + void *\fIattribute_val_out\fP, int *\fIflag\fP); .fi and .nf -typedef int MPI_Type_delete_attr_function(MPI_Datatype \fItype\fP, int \fItype_keyval\fP, - void *\fIattribute_val\fP, void *\fIextra_state\fP); +typedef int MPI_Type_delete_attr_function(MPI_Datatype \fItype\fP, int \fItype_keyval\fP, + void *\fIattribute_val\fP, void *\fIextra_state\fP); .fi .sp The Fortran callback functions are: .sp .nf -SUBROUTINE TYPE_COPY_ATTR_FN(\fIOLDTYPE, TYPE_KEYVAL, EXTRA_STATE, - ATTRIBUTE_VAL_IN, ATTRIBUTE_VAL_OUT, FLAG, IERROR\fP) +SUBROUTINE TYPE_COPY_ATTR_FN(\fIOLDTYPE, TYPE_KEYVAL, EXTRA_STATE, + ATTRIBUTE_VAL_IN, ATTRIBUTE_VAL_OUT, FLAG, IERROR\fP) INTEGER \fIOLDTYPE, TYPE KEYVAL, IERROR\fP - INTEGER(KIND=MPI_ADDRESS_KIND) \fIEXTRA_STATE, - ATTRIBUTE_VAL_IN, ATTRIBUTE_VAL_OUT\fP - LOGICAL \fIFLAG\fP + INTEGER(KIND=MPI_ADDRESS_KIND) \fIEXTRA_STATE, + ATTRIBUTE_VAL_IN, ATTRIBUTE_VAL_OUT\fP + LOGICAL \fIFLAG\fP .fi and .nf -SUBROUTINE TYPE_DELETE_ATTR_FN(\fITYPE, TYPE_KEYVAL, ATTRIBUTE_VAL, EXTRA_STATE, - IERROR\fP) - INTEGER \fITYPE, TYPE_KEYVAL, IERROR\fP - INTEGER(KIND=MPI_ADDRESS_KIND) \fIATTRIBUTE VAL, EXTRA_STATE\fP +SUBROUTINE TYPE_DELETE_ATTR_FN(\fITYPE, TYPE_KEYVAL, ATTRIBUTE_VAL, EXTRA_STATE, + IERROR\fP) + INTEGER \fITYPE, TYPE_KEYVAL, IERROR\fP + INTEGER(KIND=MPI_ADDRESS_KIND) \fIATTRIBUTE VAL, EXTRA_STATE\fP .fi -.sp +.sp The C++ callbacks are: .sp .nf -typedef int MPI::Datatype::Copy_attr_function(const MPI::Datatype& \fIoldtype\fP, - int \fItype_keyval\fP, void* \fIextra_state\fP, - const void* \fIattribute_val_in\fP, void* \fIattribute_val_out\fP, - bool& \fIflag\fP); +typedef int MPI::Datatype::Copy_attr_function(const MPI::Datatype& \fIoldtype\fP, + int \fItype_keyval\fP, void* \fIextra_state\fP, + const void* \fIattribute_val_in\fP, void* \fIattribute_val_out\fP, + bool& \fIflag\fP); .fi and .nf -typedef int MPI::Datatype::Delete_attr_function(MPI::Datatype& \fItype\fP, - int \fItype_keyval\fP, void* \fIattribute_val\fP, void* \fIextra_state\fP); +typedef int MPI::Datatype::Delete_attr_function(MPI::Datatype& \fItype\fP, + int \fItype_keyval\fP, void* \fIattribute_val\fP, void* \fIextra_state\fP); .fi .sp @@ -126,7 +126,7 @@ and gives the length of the declared integer in bytes. Almost all MPI routines return an error value; C routines as the value of the function and Fortran routines in the last argument. C++ functions do not return errors. If the default error handler is set to MPI::ERRORS_THROW_EXCEPTIONS, then on error the C++ exception mechanism will be used to throw an MPI::Exception object. .sp Before the error value is returned, the current MPI error handler is -called. By default, this error handler aborts the MPI job, except for I/O function errors. The error handler may be changed with MPI_Comm_set_errhandler; the predefined error handler MPI_ERRORS_RETURN may be used to cause error values to be returned. Note that MPI does not guarantee that an MPI program can continue past an error. +called. By default, this error handler aborts the MPI job, except for I/O function errors. The error handler may be changed with MPI_Comm_set_errhandler; the predefined error handler MPI_ERRORS_RETURN may be used to cause error values to be returned. Note that MPI does not guarantee that an MPI program can continue past an error. .SH SEE ALSO .ft R diff --git a/ompi/mpi/man/man3/MPI_Type_create_resized.3in b/ompi/mpi/man/man3/MPI_Type_create_resized.3in index 79486e30b88..567b49c7611 100644 --- a/ompi/mpi/man/man3/MPI_Type_create_resized.3in +++ b/ompi/mpi/man/man3/MPI_Type_create_resized.3in @@ -5,14 +5,14 @@ .\" $COPYRIGHT$ .TH MPI_Type_create_resized 3 "#OMPI_DATE#" "#PACKAGE_VERSION#" "#PACKAGE_NAME#" .SH NAME -\fBMPI_Type_create_resized\fP \- Returns a new data type with new extent and upper and lower bounds. +\fBMPI_Type_create_resized\fP \- Returns a new data type with new extent and upper and lower bounds. .SH SYNTAX .ft R .SH C Syntax .nf #include -int MPI_Type_create_resized(MPI_Datatype \fIoldtype\fP, MPI_Aint\fI lb\fP, +int MPI_Type_create_resized(MPI_Datatype \fIoldtype\fP, MPI_Aint\fI lb\fP, MPI_Aint \fIextent\fP, MPI_Datatype *\fInewtype\fP) .fi @@ -27,21 +27,21 @@ MPI_TYPE_CREATE_RESIZED(\fIOLDTYPE, LB, EXTENT, NEWTYPE, IERROR\fP) .SH C++ Syntax .nf #include -MPI::Datatype MPI::Datatype::Create_resized(const MPI::Aint \fIlb\fP, +MPI::Datatype MPI::Datatype::Create_resized(const MPI::Aint \fIlb\fP, const MPI::Aint \fIextent\fP) const .fi .SH INPUT PARAMETERS .ft R .TP 1i -oldtype +oldtype Input data type (handle). .TP 1i lb New lower bound of data type (integer). .TP 1i extent -New extent of data type (integer). +New extent of data type (integer). .SH OUTPUT PARAMETERS .ft R @@ -50,11 +50,11 @@ newtype Output data type (handle). .TP 1i IERROR -Fortran only: Error status (integer). +Fortran only: Error status (integer). .SH DESCRIPTION .ft R -MPI_Type_create_resized returns in \fInewtype\fP a handle to a new data type that is identical to \fIoldtype\fP, except that the lower bound of this new data type is set to be \fIlb\fP, and its upper bound is set to be \fIlb\fP + \fIextent\fP. Any previous \fIlb\fP and \fIub\fP markers are erased, and a new pair of lower bound and upper bound markers are put in the positions indicated by the \fIlb\fP and \fIextent\fP arguments. This affects the behavior of the data type when used in communication operations, with \fIcount\fP > 1, and when used in the construction of new derived data types. +MPI_Type_create_resized returns in \fInewtype\fP a handle to a new data type that is identical to \fIoldtype\fP, except that the lower bound of this new data type is set to be \fIlb\fP, and its upper bound is set to be \fIlb\fP + \fIextent\fP. Any previous \fIlb\fP and \fIub\fP markers are erased, and a new pair of lower bound and upper bound markers are put in the positions indicated by the \fIlb\fP and \fIextent\fP arguments. This affects the behavior of the data type when used in communication operations, with \fIcount\fP > 1, and when used in the construction of new derived data types. .SH FORTRAN 77 NOTES .ft R @@ -73,13 +73,13 @@ and gives the length of the declared integer in bytes. .SH NOTE .ft R -Use of MPI_Type_create_resized is strongly recommended over the old MPI-1 functions MPI_Type_extent and MPI_Type_lb. +Use of MPI_Type_create_resized is strongly recommended over the old MPI-1 functions MPI_Type_extent and MPI_Type_lb. .SH ERRORS Almost all MPI routines return an error value; C routines as the value of the function and Fortran routines in the last argument. C++ functions do not return errors. If the default error handler is set to MPI::ERRORS_THROW_EXCEPTIONS, then on error the C++ exception mechanism will be used to throw an MPI::Exception object. .sp Before the error value is returned, the current MPI error handler is -called. By default, this error handler aborts the MPI job, except for I/O function errors. The error handler may be changed with MPI_Comm_set_errhandler; the predefined error handler MPI_ERRORS_RETURN may be used to cause error values to be returned. Note that MPI does not guarantee that an MPI program can continue past an error. +called. By default, this error handler aborts the MPI job, except for I/O function errors. The error handler may be changed with MPI_Comm_set_errhandler; the predefined error handler MPI_ERRORS_RETURN may be used to cause error values to be returned. Note that MPI does not guarantee that an MPI program can continue past an error. .SH SEE ALSO diff --git a/ompi/mpi/man/man3/MPI_Type_create_struct.3in b/ompi/mpi/man/man3/MPI_Type_create_struct.3in index 993e3c381c0..accaee061af 100644 --- a/ompi/mpi/man/man3/MPI_Type_create_struct.3in +++ b/ompi/mpi/man/man3/MPI_Type_create_struct.3in @@ -6,14 +6,14 @@ .\" $COPYRIGHT$ .TH MPI_Type_create_struct 3 "#OMPI_DATE#" "#PACKAGE_VERSION#" "#PACKAGE_NAME#" .SH NAME -\fBMPI_Type_create_struct\fP \- Creates a structured data type. +\fBMPI_Type_create_struct\fP \- Creates a structured data type. .SH SYNTAX .ft R .SH C Syntax .nf #include -int MPI_Type_create_struct(int \fIcount\fP, int \fIarray_of_blocklengths\fP[], +int MPI_Type_create_struct(int \fIcount\fP, int \fIarray_of_blocklengths\fP[], const MPI_Aint \fIarray_of_displacements\fP[], const MPI_Datatype \fIarray_of_types\fP[], MPI_Datatype *\fInewtype\fP) @@ -21,7 +21,7 @@ int MPI_Type_create_struct(int \fIcount\fP, int \fIarray_of_blocklengths\fP[], .SH Fortran Syntax (see FORTRAN 77 NOTES) .nf INCLUDE 'mpif.h' -MPI_TYPE_CREATE_STRUCT(\fICOUNT, ARRAY_OF_BLOCKLENGTHS, +MPI_TYPE_CREATE_STRUCT(\fICOUNT, ARRAY_OF_BLOCKLENGTHS, ARRAY_OF_DISPLACEMENTS, ARRAY_OF_TYPES, NEWTYPE, IERROR\fP) INTEGER \fICOUNT, ARRAY_OF_BLOCKLENGTHS(*), ARRAY_OF_TYPES(*),\fP INTEGER \fINEWTYPE, IERROR \fP @@ -33,7 +33,7 @@ MPI_TYPE_CREATE_STRUCT(\fICOUNT, ARRAY_OF_BLOCKLENGTHS, #include static MPI::Datatype MPI::Datatype::Create_struct(int \fIcount\fP, const int \fIarray_of_blocklengths\fP[], const MPI::Aint - \fIarray_of_displacements\fP[], const MPI::Datatype + \fIarray_of_displacements\fP[], const MPI::Datatype \fIarray_of_types\fP[]) .fi @@ -41,7 +41,7 @@ static MPI::Datatype MPI::Datatype::Create_struct(int \fIcount\fP, .ft R .TP 1i count -Number of blocks (integer) -- also number of entries in arrays \fIarray_of_types\fP, \fIarray_of_displacements\fP, and \fIarray_of_blocklengths\fP. +Number of blocks (integer) -- also number of entries in arrays \fIarray_of_types\fP, \fIarray_of_displacements\fP, and \fIarray_of_blocklengths\fP. .TP 1i array_of_blocklengths Number of elements in each block (array of integers). @@ -59,10 +59,10 @@ newtype New data type (handle). .TP 1i IERROR -Fortran only: Error status (integer). +Fortran only: Error status (integer). .SH DESCRIPTION -MPI_Type_create_struct creates a structured data type. This routine replaces MPI_Type_struct, which is now deprecated. +MPI_Type_create_struct creates a structured data type. This routine replaces MPI_Type_struct, which is now deprecated. .PP NOTE \- This routine replaces MPI_Type_struct, which is deprecated. See the man page MPI_Type_struct(3) for information about that routine. @@ -83,7 +83,7 @@ and gives the length of the declared integer in bytes. Almost all MPI routines return an error value; C routines as the value of the function and Fortran routines in the last argument. C++ functions do not return errors. If the default error handler is set to MPI::ERRORS_THROW_EXCEPTIONS, then on error the C++ exception mechanism will be used to throw an MPI::Exception object. .sp Before the error value is returned, the current MPI error handler is -called. By default, this error handler aborts the MPI job, except for I/O function errors. The error handler may be changed with MPI_Comm_set_errhandler; the predefined error handler MPI_ERRORS_RETURN may be used to cause error values to be returned. Note that MPI does not guarantee that an MPI program can continue past an error. +called. By default, this error handler aborts the MPI job, except for I/O function errors. The error handler may be changed with MPI_Comm_set_errhandler; the predefined error handler MPI_ERRORS_RETURN may be used to cause error values to be returned. Note that MPI does not guarantee that an MPI program can continue past an error. .SH SEE ALSO .ft R diff --git a/ompi/mpi/man/man3/MPI_Type_create_subarray.3in b/ompi/mpi/man/man3/MPI_Type_create_subarray.3in index c1e62af2a00..665f3c3f859 100644 --- a/ompi/mpi/man/man3/MPI_Type_create_subarray.3in +++ b/ompi/mpi/man/man3/MPI_Type_create_subarray.3in @@ -6,7 +6,7 @@ .\" $COPYRIGHT$ .TH MPI_Type_create_subarray 3 "#OMPI_DATE#" "#PACKAGE_VERSION#" "#PACKAGE_NAME#" .SH NAME -\fBMPI_Type_create_subarray\fP \- Creates a data type describing an \fIn\fP-dimensional subarray of an \fIn\fP-dimensional array. +\fBMPI_Type_create_subarray\fP \- Creates a data type describing an \fIn\fP-dimensional subarray of an \fIn\fP-dimensional array. .SH SYNTAX .ft R @@ -19,8 +19,8 @@ int MPI_Type_create_subarray(int \fIndims\fP, const int \fIarray_of_sizes[]\fP, .SH Fortran Syntax .nf INCLUDE 'mpif.h' -MPI_TYPE_CREATE_SUBARRAY(\fINDIMS, ARRAY_OF_SIZES, ARRAY_OF_SUBSIZES, - ARRAY_OF_STARTS, ORDER, OLDTYPE, NEWTYPE, IERROR\fP) +MPI_TYPE_CREATE_SUBARRAY(\fINDIMS, ARRAY_OF_SIZES, ARRAY_OF_SUBSIZES, + ARRAY_OF_STARTS, ORDER, OLDTYPE, NEWTYPE, IERROR\fP) INTEGER \fINDIMS, ARRAY_OF_SIZES(*), ARRAY_OF_SUBSIZES(*), ARRAY_OF_STARTS(*), ORDER, OLDTYPE, NEWTYPE, IERROR\fP @@ -29,8 +29,8 @@ MPI_TYPE_CREATE_SUBARRAY(\fINDIMS, ARRAY_OF_SIZES, ARRAY_OF_SUBSIZES, .SH C++ Syntax .nf #include -MPI::Datatype MPI::Datatype::Create_subarray(int \fIndims\fP, - const int \fIarray_of_sizes\fP[], const int \fIarray_of_subsizes[]\fP, +MPI::Datatype MPI::Datatype::Create_subarray(int \fIndims\fP, + const int \fIarray_of_sizes\fP[], const int \fIarray_of_subsizes[]\fP, const int \fIarray_of_starts[]\fP, int \fIorder\fP) const .fi @@ -63,37 +63,37 @@ newtype New data type (handle). .TP 1i IERROR -Fortran only: Error status (integer). +Fortran only: Error status (integer). .SH DESCRIPTION .ft R -The subarray type constructor creates an MPI data type describing an \fIn\fP-dimensional subarray of an \fIn\fP-dimensional array. The subarray may be situated anywhere within the full array, and may be of any nonzero size up to the size of the larger array as long as it is confined within this array. This type constructor facilitates creating file types to access arrays distributed in blocks among processes to a single file that contains the global array. +The subarray type constructor creates an MPI data type describing an \fIn\fP-dimensional subarray of an \fIn\fP-dimensional array. The subarray may be situated anywhere within the full array, and may be of any nonzero size up to the size of the larger array as long as it is confined within this array. This type constructor facilitates creating file types to access arrays distributed in blocks among processes to a single file that contains the global array. .sp -This type constructor can handle arrays with an arbitrary number of dimensions and works for both C- and Fortran-ordered matrices (that is, row-major or column-major). Note that a C program may use Fortran order and a Fortran program may use C order. +This type constructor can handle arrays with an arbitrary number of dimensions and works for both C- and Fortran-ordered matrices (that is, row-major or column-major). Note that a C program may use Fortran order and a Fortran program may use C order. .sp -The \fIndims\fP parameter specifies the number of dimensions in the full data array and gives the number of elements in \fIarray_of_sizes\fP, \fIarray_of_subsizes\fP, and \fIarray_of_starts\fP. +The \fIndims\fP parameter specifies the number of dimensions in the full data array and gives the number of elements in \fIarray_of_sizes\fP, \fIarray_of_subsizes\fP, and \fIarray_of_starts\fP. .sp -The number of elements of type \fIoldtype\fP in each dimension of the \fIn\fP-dimensional array and the requested subarray are specified by \fIarray_of_sizes\fP and \fIarray_of_subsizes\fP, respectively. For any dimension \fIi\fP, it is erroneous to specify \fIarray_of_subsizes[i]\fP < 1 or \fIarray_of_subsizes[i]\fP > \fIarray of sizes[i]\fP. +The number of elements of type \fIoldtype\fP in each dimension of the \fIn\fP-dimensional array and the requested subarray are specified by \fIarray_of_sizes\fP and \fIarray_of_subsizes\fP, respectively. For any dimension \fIi\fP, it is erroneous to specify \fIarray_of_subsizes[i]\fP < 1 or \fIarray_of_subsizes[i]\fP > \fIarray of sizes[i]\fP. .sp -The \fIarray_of_starts\fP contains the starting coordinates of each dimension of the subarray. Arrays are assumed to be indexed starting from zero. For any dimension \fIi\fP, it is erroneous to specify +The \fIarray_of_starts\fP contains the starting coordinates of each dimension of the subarray. Arrays are assumed to be indexed starting from zero. For any dimension \fIi\fP, it is erroneous to specify .sp .nf -\fIarray_of_starts[i]\fP < 0 +\fIarray_of_starts[i]\fP < 0 .fi .sp -or +or .sp .nf -\fIarray_of_starts[i]\fP > (\fIarray_of_sizes[i]\fP - \fIarray_of_subsizes[i]\fP). +\fIarray_of_starts[i]\fP > (\fIarray_of_sizes[i]\fP - \fIarray_of_subsizes[i]\fP). .fi .sp -The \fIorder\fP argument specifies the storage order for the subarray as well as the full array. It must be set to one of the following: +The \fIorder\fP argument specifies the storage order for the subarray as well as the full array. It must be set to one of the following: .sp - MPI_ORDER_C: The ordering used by C arrays, (that is, row-major order) .sp - MPI_ORDER_FORTRAN: The ordering used by Fortran arrays, (that is, column-major order) .sp -A \fIndims\fP-dimensional subarray (\fInewtype\fP) with no extra padding can be defined by the function Subarray() as follows: +A \fIndims\fP-dimensional subarray (\fInewtype\fP) with no extra padding can be defined by the function Subarray() as follows: .sp .nf newtype = Subarray(ndims, {size , size ,\..., size }, @@ -111,21 +111,21 @@ Let the typemap of \fIoldtype\fP have the form: 0 0 1 1 n-1 n-1 .fi .sp -where type\fIi\fP is a predefined MPI data type, and let \fIex\fP be the extent of \fIoldtype\fP. +where type\fIi\fP is a predefined MPI data type, and let \fIex\fP be the extent of \fIoldtype\fP. .sp -The Subarray() function is defined recursively in three equations on page 72 of the MPI-2 standard. +The Subarray() function is defined recursively in three equations on page 72 of the MPI-2 standard. .sp For an example use of MPI_Type_create_subarray in the context of I/O, see Section 9.9.2 of the MPI-2 standard. .SH NOTES .ft R -In a Fortran program with arrays indexed starting from 1, if the starting coordinate of a particular dimension of the subarray is \fIn\fP, then the entry in array of starts for that dimension is \fIn\fP-1. +In a Fortran program with arrays indexed starting from 1, if the starting coordinate of a particular dimension of the subarray is \fIn\fP, then the entry in array of starts for that dimension is \fIn\fP-1. .SH ERRORS Almost all MPI routines return an error value; C routines as the value of the function and Fortran routines in the last argument. C++ functions do not return errors. If the default error handler is set to MPI::ERRORS_THROW_EXCEPTIONS, then on error the C++ exception mechanism will be used to throw an MPI::Exception object. .sp Before the error value is returned, the current MPI error handler is -called. By default, this error handler aborts the MPI job, except for I/O function errors. The error handler may be changed with MPI_Comm_set_errhandler; the predefined error handler MPI_ERRORS_RETURN may be used to cause error values to be returned. Note that MPI does not guarantee that an MPI program can continue past an error. +called. By default, this error handler aborts the MPI job, except for I/O function errors. The error handler may be changed with MPI_Comm_set_errhandler; the predefined error handler MPI_ERRORS_RETURN may be used to cause error values to be returned. Note that MPI does not guarantee that an MPI program can continue past an error. diff --git a/ompi/mpi/man/man3/MPI_Type_delete_attr.3in b/ompi/mpi/man/man3/MPI_Type_delete_attr.3in index fef9d77ccd8..fccc7b352e5 100644 --- a/ompi/mpi/man/man3/MPI_Type_delete_attr.3in +++ b/ompi/mpi/man/man3/MPI_Type_delete_attr.3in @@ -5,7 +5,7 @@ .\" $COPYRIGHT$ .TH MPI_Type_delete_attr 3 "#OMPI_DATE#" "#PACKAGE_VERSION#" "#PACKAGE_NAME#" .SH NAME -\fBMPI_Type_delete_attr\fP \- Deletes a datatype-caching attribute value associated with a key. +\fBMPI_Type_delete_attr\fP \- Deletes a datatype-caching attribute value associated with a key. .SH SYNTAX .ft R @@ -38,17 +38,17 @@ Data type from which the attribute is deleted (handle).n .ft R .TP 1i type_keyval -Key value (integer). +Key value (integer). .SH OUTPUT PARAMETER .ft R .TP 1i IERROR -Fortran only: Error status (integer). +Fortran only: Error status (integer). .SH DESCRIPTION .ft R -MPI_Type_delete_attr deletes a datatype-caching attribute value associated with a key. This routines partially replaces MPI_Attr_delete, which is now deprecated. +MPI_Type_delete_attr deletes a datatype-caching attribute value associated with a key. This routines partially replaces MPI_Attr_delete, which is now deprecated. .SH NOTES @@ -63,5 +63,5 @@ is being invoked. Almost all MPI routines return an error value; C routines as the value of the function and Fortran routines in the last argument. C++ functions do not return errors. If the default error handler is set to MPI::ERRORS_THROW_EXCEPTIONS, then on error the C++ exception mechanism will be used to throw an MPI::Exception object. .sp Before the error value is returned, the current MPI error handler is -called. By default, this error handler aborts the MPI job, except for I/O function errors. The error handler may be changed with MPI_Comm_set_errhandler; the predefined error handler MPI_ERRORS_RETURN may be used to cause error values to be returned. Note that MPI does not guarantee that an MPI program can continue past an error. +called. By default, this error handler aborts the MPI job, except for I/O function errors. The error handler may be changed with MPI_Comm_set_errhandler; the predefined error handler MPI_ERRORS_RETURN may be used to cause error values to be returned. Note that MPI does not guarantee that an MPI program can continue past an error. diff --git a/ompi/mpi/man/man3/MPI_Type_dup.3in b/ompi/mpi/man/man3/MPI_Type_dup.3in index 25290deeca1..21dcdb486aa 100644 --- a/ompi/mpi/man/man3/MPI_Type_dup.3in +++ b/ompi/mpi/man/man3/MPI_Type_dup.3in @@ -41,7 +41,7 @@ newtype Copy of \fItype\fP (handle). .TP 1i IERROR -Fortran only: Error status (integer). +Fortran only: Error status (integer). .SH DESCRIPTION .ft R @@ -60,7 +60,7 @@ copy callback is being invoked. Almost all MPI routines return an error value; C routines as the value of the function and Fortran routines in the last argument. C++ functions do not return errors. If the default error handler is set to MPI::ERRORS_THROW_EXCEPTIONS, then on error the C++ exception mechanism will be used to throw an MPI::Exception object. .sp Before the error value is returned, the current MPI error handler is -called. By default, this error handler aborts the MPI job, except for I/O function errors. The error handler may be changed with MPI_Comm_set_errhandler; the predefined error handler MPI_ERRORS_RETURN may be used to cause error values to be returned. Note that MPI does not guarantee that an MPI program can continue past an error. +called. By default, this error handler aborts the MPI job, except for I/O function errors. The error handler may be changed with MPI_Comm_set_errhandler; the predefined error handler MPI_ERRORS_RETURN may be used to cause error values to be returned. Note that MPI does not guarantee that an MPI program can continue past an error. .SH SEE ALSO .ft R diff --git a/ompi/mpi/man/man3/MPI_Type_extent.3in b/ompi/mpi/man/man3/MPI_Type_extent.3in index 6cca4cbb9d8..09e787526b4 100644 --- a/ompi/mpi/man/man3/MPI_Type_extent.3in +++ b/ompi/mpi/man/man3/MPI_Type_extent.3in @@ -19,34 +19,34 @@ int MPI_Type_extent(MPI_Datatype \fIdatatype\fP, MPI_Aint\fI *extent\fP) .nf INCLUDE 'mpif.h' MPI_TYPE_EXTENT(\fIDATATYPE, EXTENT, IERROR\fP) - INTEGER \fIDATATYPE, EXTENT, IERROR\fP + INTEGER \fIDATATYPE, EXTENT, IERROR\fP .fi .SH INPUT PARAMETER .ft R .TP 1i -datatype +datatype Datatype (handle). .sp .SH OUTPUT PARAMETERS .ft R .TP 1i -extent +extent Datatype extent (integer). .sp .ft R .TP 1i IERROR -Fortran only: Error status (integer). +Fortran only: Error status (integer). .SH DESCRIPTION .ft R -Note that use of this routine is \fIdeprecated\fP as of MPI-2. Please use MPI_Type_get_extent instead. +Note that use of this routine is \fIdeprecated\fP as of MPI-2. Please use MPI_Type_get_extent instead. .sp -This deprecated routine is not available in C++. +This deprecated routine is not available in C++. .sp -MPI_Type_extent returns the extent of a data type, the difference between the upper and lower bounds of the data type. +MPI_Type_extent returns the extent of a data type, the difference between the upper and lower bounds of the data type. .sp In general, if .sp @@ -54,7 +54,7 @@ In general, if Typemap = {(type(0), disp(0)), ..., (type(n-1), disp(n-1))} .fi .sp -then the lower bound of Typemap is defined to be +then the lower bound of Typemap is defined to be .sp .nf ( min(j) disp(j) if no entry has @@ -71,19 +71,19 @@ Similarly, the upper bound of Typemap is defined to be (max(j) {disp(j) such that type(j) = ub} otherwise .fi .sp -Then +Then .sp .nf extent(Typemap) = ub(Typemap) - lb(Typemap) .fi .sp -If type(i) requires alignment to a byte address that is a multiple of k(i), then e is the least nonnegative increment needed to round extent(Typemap) to the next multiple of max(i) k(i). +If type(i) requires alignment to a byte address that is a multiple of k(i), then e is the least nonnegative increment needed to round extent(Typemap) to the next multiple of max(i) k(i). .SH ERRORS Almost all MPI routines return an error value; C routines as the value of the function and Fortran routines in the last argument. C++ functions do not return errors. If the default error handler is set to MPI::ERRORS_THROW_EXCEPTIONS, then on error the C++ exception mechanism will be used to throw an MPI::Exception object. .sp Before the error value is returned, the current MPI error handler is -called. By default, this error handler aborts the MPI job, except for I/O function errors. The error handler may be changed with MPI_Comm_set_errhandler; the predefined error handler MPI_ERRORS_RETURN may be used to cause error values to be returned. Note that MPI does not guarantee that an MPI program can continue past an error. +called. By default, this error handler aborts the MPI job, except for I/O function errors. The error handler may be changed with MPI_Comm_set_errhandler; the predefined error handler MPI_ERRORS_RETURN may be used to cause error values to be returned. Note that MPI does not guarantee that an MPI program can continue past an error. .SH SEE ALSO .ft R diff --git a/ompi/mpi/man/man3/MPI_Type_free.3in b/ompi/mpi/man/man3/MPI_Type_free.3in index d2d044a5bdc..7cf0b0a2277 100644 --- a/ompi/mpi/man/man3/MPI_Type_free.3in +++ b/ompi/mpi/man/man3/MPI_Type_free.3in @@ -19,7 +19,7 @@ int MPI_Type_free(MPI_Datatype *\fIdatatype\fP) .nf INCLUDE 'mpif.h' MPI_TYPE_FREE(\fIDATATYPE, IERROR\fP) - INTEGER \fIDATATYPE, IERROR\fP + INTEGER \fIDATATYPE, IERROR\fP .fi .SH C++ Syntax @@ -31,18 +31,18 @@ void Datatype::Free() .SH INPUT/OUTPUT PARAMETER .ft R .TP 1i -datatype +datatype Datatype that is freed (handle). .sp .SH OUTPUT PARAMETER .ft R .TP 1i IERROR -Fortran only: Error status (integer). +Fortran only: Error status (integer). .SH DESCRIPTION .ft R -Marks the datatype object associated with datatype for de-allocation and sets datatype to MPI_DATATYPE_NULL. Any communication that is currently using this datatype will complete normally. Derived datatypes that were defined from the freed datatype are not affected. +Marks the datatype object associated with datatype for de-allocation and sets datatype to MPI_DATATYPE_NULL. Any communication that is currently using this datatype will complete normally. Derived datatypes that were defined from the freed datatype are not affected. .sp Freeing a datatype does not affect any other datatype that was built from the freed datatype. The system behaves as if input datatype arguments to derived datatype constructors are passed by value. @@ -50,7 +50,7 @@ Freeing a datatype does not affect any other datatype that was built from the fr Almost all MPI routines return an error value; C routines as the value of the function and Fortran routines in the last argument. C++ functions do not return errors. If the default error handler is set to MPI::ERRORS_THROW_EXCEPTIONS, then on error the C++ exception mechanism will be used to throw an MPI::Exception object. .sp Before the error value is returned, the current MPI error handler is -called. By default, this error handler aborts the MPI job, except for I/O function errors. The error handler may be changed with MPI_Comm_set_errhandler; the predefined error handler MPI_ERRORS_RETURN may be used to cause error values to be returned. Note that MPI does not guarantee that an MPI program can continue past an error. +called. By default, this error handler aborts the MPI job, except for I/O function errors. The error handler may be changed with MPI_Comm_set_errhandler; the predefined error handler MPI_ERRORS_RETURN may be used to cause error values to be returned. Note that MPI does not guarantee that an MPI program can continue past an error. diff --git a/ompi/mpi/man/man3/MPI_Type_free_keyval.3in b/ompi/mpi/man/man3/MPI_Type_free_keyval.3in index 9f6510b0680..8ee94a56457 100644 --- a/ompi/mpi/man/man3/MPI_Type_free_keyval.3in +++ b/ompi/mpi/man/man3/MPI_Type_free_keyval.3in @@ -5,7 +5,7 @@ .\" $COPYRIGHT$ .TH MPI_Type_free_keyval 3 "#OMPI_DATE#" "#PACKAGE_VERSION#" "#PACKAGE_NAME#" .SH NAME -\fBMPI_Type_free_keyval\fP \- Frees a previously created type key value. +\fBMPI_Type_free_keyval\fP \- Frees a previously created type key value. .SH SYNTAX .ft R @@ -38,7 +38,7 @@ Key value to free (integer). .ft R .TP 1i IERROR -Fortran only: Error status (integer). +Fortran only: Error status (integer). .SH DESCRIPTION .ft R @@ -47,7 +47,7 @@ Fortran only: Error status (integer). Almost all MPI routines return an error value; C routines as the value of the function and Fortran routines in the last argument. C++ functions do not return errors. If the default error handler is set to MPI::ERRORS_THROW_EXCEPTIONS, then on error the C++ exception mechanism will be used to throw an MPI::Exception object. .sp Before the error value is returned, the current MPI error handler is -called. By default, this error handler aborts the MPI job, except for I/O function errors. The error handler may be changed with MPI_Comm_set_errhandler; the predefined error handler MPI_ERRORS_RETURN may be used to cause error values to be returned. Note that MPI does not guarantee that an MPI program can continue past an error. +called. By default, this error handler aborts the MPI job, except for I/O function errors. The error handler may be changed with MPI_Comm_set_errhandler; the predefined error handler MPI_ERRORS_RETURN may be used to cause error values to be returned. Note that MPI does not guarantee that an MPI program can continue past an error. .SH SEE ALSO .ft R diff --git a/ompi/mpi/man/man3/MPI_Type_get_attr.3in b/ompi/mpi/man/man3/MPI_Type_get_attr.3in index 386adc2747c..d00d826dff0 100644 --- a/ompi/mpi/man/man3/MPI_Type_get_attr.3in +++ b/ompi/mpi/man/man3/MPI_Type_get_attr.3in @@ -5,7 +5,7 @@ .\" $COPYRIGHT$ .TH MPI_Type_get_attr 3 "#OMPI_DATE#" "#PACKAGE_VERSION#" "#PACKAGE_NAME#" .SH NAME -\fBMPI_Type_get_attr\fP \- Returns the attribute associated with a data type. +\fBMPI_Type_get_attr\fP \- Returns the attribute associated with a data type. .SH SYNTAX .ft R @@ -20,8 +20,8 @@ int MPI_Type_get_attr(MPI_Datatype \fItype\fP, int \fItype_keyval\fP, void *\fIa INCLUDE 'mpif.h' MPI_TYPE_GET_ATTR(\fITYPE, TYPE_KEYVAL, ATTRIBUTE_VAL, FLAG, IERROR\fP) INTEGER \fITYPE, TYPE_KEYVAL, IERROR \fP - INTEGER(KIND=MPI_ADDRESS_KIND) \fIATTRIBUTE_VAL\fP - LOGICAL \fIFLAG\fP + INTEGER(KIND=MPI_ADDRESS_KIND) \fIATTRIBUTE_VAL\fP + LOGICAL \fIFLAG\fP .fi .SH C++ Syntax @@ -34,7 +34,7 @@ bool MPI::Datatype::Get_attr(int \fItype_keyval\fP, void* \fIattribute_val\fP) c .ft R .TP 1i type -Data type to which the attribute is attached (handle). +Data type to which the attribute is attached (handle). .TP 1i type_keyval Key value (integer). @@ -49,10 +49,10 @@ flag "false" if no attribute is associated with the key (logical). .TP 1i IERROR -Fortran only: Error status (integer). +Fortran only: Error status (integer). .SH DESCRIPTION -For the given data type, MPI_Type_get_attr returns an attribute value that corresponds to the specified key value. +For the given data type, MPI_Type_get_attr returns an attribute value that corresponds to the specified key value. .SH FORTRAN 77 NOTES .ft R @@ -71,7 +71,7 @@ and gives the length of the declared integer in bytes. Almost all MPI routines return an error value; C routines as the value of the function and Fortran routines in the last argument. C++ functions do not return errors. If the default error handler is set to MPI::ERRORS_THROW_EXCEPTIONS, then on error the C++ exception mechanism will be used to throw an MPI::Exception object. .sp Before the error value is returned, the current MPI error handler is -called. By default, this error handler aborts the MPI job, except for I/O function errors. The error handler may be changed with MPI_Comm_set_errhandler; the predefined error handler MPI_ERRORS_RETURN may be used to cause error values to be returned. Note that MPI does not guarantee that an MPI program can continue past an error. +called. By default, this error handler aborts the MPI job, except for I/O function errors. The error handler may be changed with MPI_Comm_set_errhandler; the predefined error handler MPI_ERRORS_RETURN may be used to cause error values to be returned. Note that MPI does not guarantee that an MPI program can continue past an error. .SH SEE ALSO .ft R diff --git a/ompi/mpi/man/man3/MPI_Type_get_contents.3in b/ompi/mpi/man/man3/MPI_Type_get_contents.3in index fd94f651427..b07a17922ab 100644 --- a/ompi/mpi/man/man3/MPI_Type_get_contents.3in +++ b/ompi/mpi/man/man3/MPI_Type_get_contents.3in @@ -5,7 +5,7 @@ .\" $COPYRIGHT$ .TH MPI_Type_get_contents 3 "#OMPI_DATE#" "#PACKAGE_VERSION#" "#PACKAGE_NAME#" .SH NAME -\fBMPI_Type_get_contents\fP \- Returns information about arguments used in creation of a data type. +\fBMPI_Type_get_contents\fP \- Returns information about arguments used in creation of a data type. .SH SYNTAX .ft R @@ -24,7 +24,7 @@ MPI_TYPE_GET_CONTENTS(\fIDATATYPE, MAX_INTEGERS, MAX_ADDRESSES, ARRAY_OF_DATATYPES, IERROR\fP) INTEGER \fIDATATYPE, MAX_INTEGERS, MAX_ADDRESSES, MAX_DATATYPES\fP INTEGER \fIARRAY_OF_INTEGERS(*), ARRAY_OF_DATATYPES(*), IERROR\fP - INTEGER(KIND=MPI_ADDRESS_KIND) \fIARRAY_OF_ADDRESSES\fP(*) + INTEGER(KIND=MPI_ADDRESS_KIND) \fIARRAY_OF_ADDRESSES\fP(*) .fi .SH C++ Syntax @@ -64,23 +64,23 @@ array_of_datatypes Contains data-type arguments used in constructing \fIdatatype\fP (array of integers). .TP 1i IERROR -Fortran only: Error status (integer). +Fortran only: Error status (integer). .SH DESCRIPTION .ft R -For the given data type, MPI_Type_get_envelope returns information on the number and type of input arguments used in the call that created the data type. The number-of-arguments values returned can be used to provide sufficiently large arrays in the decoding routine MPI_Type_get_contents. This call and the meaning of the returned values is described below. The combiner reflects the MPI data type constructor call that was used in creating \fIdatatype\fP. +For the given data type, MPI_Type_get_envelope returns information on the number and type of input arguments used in the call that created the data type. The number-of-arguments values returned can be used to provide sufficiently large arrays in the decoding routine MPI_Type_get_contents. This call and the meaning of the returned values is described below. The combiner reflects the MPI data type constructor call that was used in creating \fIdatatype\fP. The parameter \fIdatatype\fP must be a predefined unnamed or a derived data type. The call is erroneous if \fIdatatype\fP is a predefined named data type. .sp The values given for \fImax_integers\fP, \fImax_addresses\fP, and \fImax_datatypes\fP must be at least as large as the value returned in \fInum_integers\fP, \fInum_addresses\fP, and \fInum_datatypes\fP, respectively, in the call MPI_Type_get_envelope for the same \fIdatatype\fP argument. .sp -The data types returned in \fIarray_of_datatypes\fP are handles to data-type objects that are equivalent to the data types used in the original construction call. If these were derived data types, then the returned data types are new data-type objects, and the user is responsible for freeing these datatypes with MPI_Type_free. If these were predefined data types, then the returned data type is equal to that (constant) predefined data type and cannot be freed. +The data types returned in \fIarray_of_datatypes\fP are handles to data-type objects that are equivalent to the data types used in the original construction call. If these were derived data types, then the returned data types are new data-type objects, and the user is responsible for freeing these datatypes with MPI_Type_free. If these were predefined data types, then the returned data type is equal to that (constant) predefined data type and cannot be freed. .sp -The committed state of returned derived data types is undefined, that is, the data types may or may not be committed. Furthermore, the content of attributes of returned data types is undefined. +The committed state of returned derived data types is undefined, that is, the data types may or may not be committed. Furthermore, the content of attributes of returned data types is undefined. .sp -Note that MPI_Type_get_contents can be invoked with a data-type argument that was constructed using MPI_Type_create_f90_real, MPI_Type_create_f90_integer, or MPI_Type_create_f90_complex (an unnamed predefined data type). In such a case, an empty \fIarray_of_datatypes\fP is returned. +Note that MPI_Type_get_contents can be invoked with a data-type argument that was constructed using MPI_Type_create_f90_real, MPI_Type_create_f90_integer, or MPI_Type_create_f90_complex (an unnamed predefined data type). In such a case, an empty \fIarray_of_datatypes\fP is returned. .sp -In the MPI-1 data-type constructor calls, the address arguments in Fortran are of type INTEGER. In the new MPI-2 calls, the address arguments are of type INTEGER(KIND=MPI ADDRESS KIND). The call MPI_Type_get_contents returns all addresses in an argument of type INTEGER(KIND=MPI ADDRESS KIND). This is true even if the old MPI-1 calls were used. Thus, the location of values returned can be thought of as being returned by the C bindings. It can also be determined by examining the new MPI-2 calls for data-type constructors for the deprecated MPI-1 calls that involve addresses. +In the MPI-1 data-type constructor calls, the address arguments in Fortran are of type INTEGER. In the new MPI-2 calls, the address arguments are of type INTEGER(KIND=MPI_ADDRESS_KIND). The call MPI_Type_get_contents returns all addresses in an argument of type INTEGER(KIND=MPI_ADDRESS_KIND). This is true even if the old MPI-1 calls were used. Thus, the location of values returned can be thought of as being returned by the C bindings. It can also be determined by examining the new MPI-2 calls for data-type constructors for the deprecated MPI-1 calls that involve addresses. .SH FORTRAN 77 NOTES .ft R @@ -99,7 +99,7 @@ and gives the length of the declared integer in bytes. Almost all MPI routines return an error value; C routines as the value of the function and Fortran routines in the last argument. C++ functions do not return errors. If the default error handler is set to MPI::ERRORS_THROW_EXCEPTIONS, then on error the C++ exception mechanism will be used to throw an MPI::Exception object. .sp Before the error value is returned, the current MPI error handler is -called. By default, this error handler aborts the MPI job, except for I/O function errors. The error handler may be changed with MPI_Comm_set_errhandler; the predefined error handler MPI_ERRORS_RETURN may be used to cause error values to be returned. Note that MPI does not guarantee that an MPI program can continue past an error. +called. By default, this error handler aborts the MPI job, except for I/O function errors. The error handler may be changed with MPI_Comm_set_errhandler; the predefined error handler MPI_ERRORS_RETURN may be used to cause error values to be returned. Note that MPI does not guarantee that an MPI program can continue past an error. .SH SEE ALSO .ft r diff --git a/ompi/mpi/man/man3/MPI_Type_get_envelope.3in b/ompi/mpi/man/man3/MPI_Type_get_envelope.3in index 87e082200bc..3c7e5df195e 100644 --- a/ompi/mpi/man/man3/MPI_Type_get_envelope.3in +++ b/ompi/mpi/man/man3/MPI_Type_get_envelope.3in @@ -5,7 +5,7 @@ .\" $COPYRIGHT$ .TH MPI_Type_get_envelope 3 "#OMPI_DATE#" "#PACKAGE_VERSION#" "#PACKAGE_NAME#" .SH NAME -\fBMPI_Type_get_envelope\fP \- Returns information about input arguments associated with a data type. +\fBMPI_Type_get_envelope\fP \- Returns information about input arguments associated with a data type. .SH SYNTAX .ft R @@ -16,7 +16,7 @@ int MPI_Type_get_envelope(MPI_Datatype \fIdatatype\fP, int *\fInum_integers\fP, int *\fInum_addresses\fP, int *\fInum_datatypes\fP, int *\fIcombiner\fP) .fi -.SH Fortran Syntax +.SH Fortran Syntax .nf INCLUDE 'mpif.h' MPI_TYPE_GET_ENVELOPE(\fIDATATYPE, NUM_INTEGERS, NUM_ADDRESSES, @@ -28,8 +28,8 @@ MPI_TYPE_GET_ENVELOPE(\fIDATATYPE, NUM_INTEGERS, NUM_ADDRESSES, .SH C++ Syntax .nf #include -void MPI::Datatype::Get_envelope(int& \fInum_integers\fP, - int& \fInum_addresses\fP, int& \fInum_datatypes\fP, +void MPI::Datatype::Get_envelope(int& \fInum_integers\fP, + int& \fInum_addresses\fP, int& \fInum_datatypes\fP, int& \fIcombiner\fP) const .fi @@ -52,14 +52,14 @@ num_datatypes Number of input data types used in the call constructing \fIcombiner\fP (nonnegative integer). .TP 1i combiner -Combiner (state). +Combiner (state). .TP 1i IERROR -Fortran only: Error status (integer). +Fortran only: Error status (integer). .SH DESCRIPTION .ft R -For the given data type, MPI_Type_get_envelope returns information on the number and type of input arguments used in the call that created the data type. The number-of-arguments values returned can be used to provide sufficiently large arrays in the decoding routine MPI_Type_get_contents. This call and the meaning of the returned values is described below. The combiner reflects the MPI data type constructor call that was used in creating \fIdatatype\fP. +For the given data type, MPI_Type_get_envelope returns information on the number and type of input arguments used in the call that created the data type. The number-of-arguments values returned can be used to provide sufficiently large arrays in the decoding routine MPI_Type_get_contents. This call and the meaning of the returned values is described below. The combiner reflects the MPI data type constructor call that was used in creating \fIdatatype\fP. .SH NOTES .ft R @@ -96,13 +96,13 @@ MPI_COMBINER_RESIZED MPI_Type_create_resized .sp If \fIcombiner\fP is MPI_COMBINER_NAMED, then \fIdatatype\fP is a named predefined data type. .sp -The actual arguments used in the creation call for a data type can be obtained from the call MPI_Type_get_contents. +The actual arguments used in the creation call for a data type can be obtained from the call MPI_Type_get_contents. .SH ERRORS Almost all MPI routines return an error value; C routines as the value of the function and Fortran routines in the last argument. C++ functions do not return errors. If the default error handler is set to MPI::ERRORS_THROW_EXCEPTIONS, then on error the C++ exception mechanism will be used to throw an MPI::Exception object. .sp Before the error value is returned, the current MPI error handler is -called. By default, this error handler aborts the MPI job, except for I/O function errors. The error handler may be changed with MPI_Comm_set_errhandler; the predefined error handler MPI_ERRORS_RETURN may be used to cause error values to be returned. Note that MPI does not guarantee that an MPI program can continue past an error. +called. By default, this error handler aborts the MPI job, except for I/O function errors. The error handler may be changed with MPI_Comm_set_errhandler; the predefined error handler MPI_ERRORS_RETURN may be used to cause error values to be returned. Note that MPI does not guarantee that an MPI program can continue past an error. .SH SEE ALSO .ft r diff --git a/ompi/mpi/man/man3/MPI_Type_get_extent.3in b/ompi/mpi/man/man3/MPI_Type_get_extent.3in index 88bf87cf6d6..f3283e83f89 100644 --- a/ompi/mpi/man/man3/MPI_Type_get_extent.3in +++ b/ompi/mpi/man/man3/MPI_Type_get_extent.3in @@ -6,16 +6,16 @@ .\" $COPYRIGHT$ .TH MPI_Type_get_extent 3 "#OMPI_DATE#" "#PACKAGE_VERSION#" "#PACKAGE_NAME#" .SH NAME -\fBMPI_Type_get_extent\fP, \fBMPI_Type_get_extent_x\fP \- Returns the lower bound and extent of a data type. +\fBMPI_Type_get_extent\fP, \fBMPI_Type_get_extent_x\fP \- Returns the lower bound and extent of a data type. .SH SYNTAX .ft R .SH C Syntax .nf #include -int MPI_Type_get_extent(MPI_Datatype \fIdatatype\fP, MPI_Aint\fI *lb\fP, +int MPI_Type_get_extent(MPI_Datatype \fIdatatype\fP, MPI_Aint\fI *lb\fP, MPI_Aint *\fIextent\fP) -int MPI_Type_get_extent_x(MPI_Datatype \fIdatatype\fP, MPI_Count\fI *lb\fP, +int MPI_Type_get_extent_x(MPI_Datatype \fIdatatype\fP, MPI_Count\fI *lb\fP, MPI_Count *\fIextent\fP) .fi @@ -23,24 +23,24 @@ int MPI_Type_get_extent_x(MPI_Datatype \fIdatatype\fP, MPI_Count\fI *lb\fP, .nf INCLUDE 'mpif.h' MPI_TYPE_GET_EXTENT(\fIDATATYPE, LB, EXTENT, IERROR\fP) - INTEGER \fIDATATYPE, IERROR\fP + INTEGER \fIDATATYPE, IERROR\fP INTEGER(KIND=MPI_ADDRESS_KIND) \fILB, EXTENT\fP MPI_TYPE_GET_EXTENT_X(\fIDATATYPE, LB, EXTENT, IERROR\fP) - INTEGER \fIDATATYPE, IERROR\fP + INTEGER \fIDATATYPE, IERROR\fP INTEGER(KIND=MPI_COUNT_KIND) \fILB, EXTENT\fP .fi .SH C++ Syntax .nf #include -void MPI::Datatype::Get_extent(MPI::Aint& \fIlb\fP, MPI::Aint& \fIextent\fP) +void MPI::Datatype::Get_extent(MPI::Aint& \fIlb\fP, MPI::Aint& \fIextent\fP) const .fi .SH INPUT PARAMETER .ft R .TP 1i -datatype +datatype Data type (handle). .sp .SH OUTPUT PARAMETERS @@ -49,12 +49,12 @@ Data type (handle). lb Lower bound of data type (integer). .TP 1i -extent +extent Data type extent (integer). .ft R .TP 1i IERROR -Fortran only: Error status (integer). +Fortran only: Error status (integer). .SH DESCRIPTION .ft R @@ -62,7 +62,7 @@ MPI_Type_get_extent returns the lower bound and the extent of \fIdatatype\fP. Fo .SH NOTE .ft R -Use of MPI_Type_get_extent is strongly recommended over the old MPI-1 functions MPI_Type_extent and MPI_Type_lb. +Use of MPI_Type_get_extent is strongly recommended over the old MPI-1 functions MPI_Type_extent and MPI_Type_lb. .SH FORTRAN 77 NOTES .ft R @@ -93,4 +93,4 @@ and give the length of the declared integer in bytes. Almost all MPI routines return an error value; C routines as the value of the function and Fortran routines in the last argument. C++ functions do not return errors. If the default error handler is set to MPI::ERRORS_THROW_EXCEPTIONS, then on error the C++ exception mechanism will be used to throw an MPI::Exception object. .sp Before the error value is returned, the current MPI error handler is -called. By default, this error handler aborts the MPI job, except for I/O function errors. The error handler may be changed with MPI_Comm_set_errhandler; the predefined error handler MPI_ERRORS_RETURN may be used to cause error values to be returned. Note that MPI does not guarantee that an MPI program can continue past an error. +called. By default, this error handler aborts the MPI job, except for I/O function errors. The error handler may be changed with MPI_Comm_set_errhandler; the predefined error handler MPI_ERRORS_RETURN may be used to cause error values to be returned. Note that MPI does not guarantee that an MPI program can continue past an error. diff --git a/ompi/mpi/man/man3/MPI_Type_get_name.3in b/ompi/mpi/man/man3/MPI_Type_get_name.3in index 12ed816c8df..28edd2d7761 100644 --- a/ompi/mpi/man/man3/MPI_Type_get_name.3in +++ b/ompi/mpi/man/man3/MPI_Type_get_name.3in @@ -5,14 +5,14 @@ .\" $COPYRIGHT$ .TH MPI_Type_get_name 3 "#OMPI_DATE#" "#PACKAGE_VERSION#" "#PACKAGE_NAME#" .SH NAME -\fBMPI_Type_get_name\fP \- Gets the name of a data type. +\fBMPI_Type_get_name\fP \- Gets the name of a data type. .SH SYNTAX .ft R .SH C Syntax .nf #include -int MPI_Type_get_name(MPI_Datatype \fItype\fP, char *\fItype_name\fP, +int MPI_Type_get_name(MPI_Datatype \fItype\fP, char *\fItype_name\fP, int *\fIresultlen\fP) .fi @@ -27,7 +27,7 @@ TYPE_GET_NAME(\fITYPE, TYPE_NAME, RESULTLEN, IERROR\fP) .SH C++ Syntax .nf #include -void MPI::Datatype::Get_name(char* \fItype_name\fP, int& \fIresultlen\fP) +void MPI::Datatype::Get_name(char* \fItype_name\fP, int& \fIresultlen\fP) const .fi @@ -35,7 +35,7 @@ void MPI::Datatype::Get_name(char* \fItype_name\fP, int& \fIresultlen\fP) .ft R .TP 1i type -Data type whose name is to be returned (handle). +Data type whose name is to be returned (handle). .SH OUTPUT PARAMETERS .ft R @@ -47,17 +47,17 @@ resultlen Length of returned name (integer). .TP 1i IERROR -Fortran only: Error status (integer). +Fortran only: Error status (integer). .SH DESCRIPTION .ft R -MPI_Type_get_name returns the printable identifier associated with an MPI data type. +MPI_Type_get_name returns the printable identifier associated with an MPI data type. .SH ERRORS Almost all MPI routines return an error value; C routines as the value of the function and Fortran routines in the last argument. C++ functions do not return errors. If the default error handler is set to MPI::ERRORS_THROW_EXCEPTIONS, then on error the C++ exception mechanism will be used to throw an MPI::Exception object. .sp Before the error value is returned, the current MPI error handler is -called. By default, this error handler aborts the MPI job, except for I/O function errors. The error handler may be changed with MPI_Comm_set_errhandler; the predefined error handler MPI_ERRORS_RETURN may be used to cause error values to be returned. Note that MPI does not guarantee that an MPI program can continue past an error. +called. By default, this error handler aborts the MPI job, except for I/O function errors. The error handler may be changed with MPI_Comm_set_errhandler; the predefined error handler MPI_ERRORS_RETURN may be used to cause error values to be returned. Note that MPI does not guarantee that an MPI program can continue past an error. .SH SEE ALSO .ft R diff --git a/ompi/mpi/man/man3/MPI_Type_get_true_extent.3in b/ompi/mpi/man/man3/MPI_Type_get_true_extent.3in index 70819543410..8b1587f9fe6 100644 --- a/ompi/mpi/man/man3/MPI_Type_get_true_extent.3in +++ b/ompi/mpi/man/man3/MPI_Type_get_true_extent.3in @@ -6,16 +6,16 @@ .\" $COPYRIGHT$ .TH MPI_Type_get_true_extent 3 "#OMPI_DATE#" "#PACKAGE_VERSION#" "#PACKAGE_NAME#" .SH NAME -\fBMPI_Type_get_true_extent\fP, \fBMPI_Type_get_true_extent_x\fP \- Returns the true lower bound and extent of a data type's corresponding typemap, ignoring MPI_UB and MPI_LB markers. +\fBMPI_Type_get_true_extent\fP, \fBMPI_Type_get_true_extent_x\fP \- Returns the true lower bound and extent of a data type's corresponding typemap, ignoring MPI_UB and MPI_LB markers. .SH SYNTAX .ft R .SH C Syntax .nf #include -int MPI_Type_get_true_extent(MPI_Datatype \fIdatatype\fP, +int MPI_Type_get_true_extent(MPI_Datatype \fIdatatype\fP, MPI_Aint *\fItrue_lb\fP, MPI_Aint *\fItrue_extent\fP) -int MPI_Type_get_true_extent_x(MPI_Datatype \fIdatatype\fP, +int MPI_Type_get_true_extent_x(MPI_Datatype \fIdatatype\fP, MPI_Count *\fItrue_lb\fP, MPI_Count *\fItrue_extent\fP) .fi @@ -23,24 +23,24 @@ int MPI_Type_get_true_extent_x(MPI_Datatype \fIdatatype\fP, .nf INCLUDE 'mpif.h' MPI_TYPE_GET_TRUE_EXTENT(\fIDATATYPE, TRUE_LB, TRUE_EXTENT, IERROR\fP) - INTEGER \fIDATATYPE, IERROR\fP + INTEGER \fIDATATYPE, IERROR\fP INTEGER(KIND=MPI_ADDRESS_KIND) \fITRUE_LB, TRUE_EXTENT\fP MPI_TYPE_GET_TRUE_EXTENT_X(\fIDATATYPE, TRUE_LB, TRUE_EXTENT, IERROR\fP) - INTEGER \fIDATATYPE, IERROR\fP + INTEGER \fIDATATYPE, IERROR\fP INTEGER(KIND=MPI_COUNT_KIND) \fITRUE_LB, TRUE_EXTENT\fP .fi .SH C++ Syntax .nf #include -void MPI::Datatype::Get_true_extent(MPI::Aint& \fItrue_lb\fP, +void MPI::Datatype::Get_true_extent(MPI::Aint& \fItrue_lb\fP, MPI::Aint& \fItrue_extent\fP) const .fi .SH INPUT PARAMETER .ft R .TP 1i -datatype +datatype Data type for which information is wanted (handle). .sp .SH OUTPUT PARAMETERS @@ -54,7 +54,7 @@ True size of data type (integer). .ft R .TP 1i IERROR -Fortran only: Error status (integer). +Fortran only: Error status (integer). .SH DESCRIPTION .ft R @@ -62,7 +62,7 @@ The \fItrue_lb\fP parameter returns the offset of the lowest unit of store that .sp The \fItrue_extent\fP is the minimum number of bytes of memory necessary to hold a data type, uncompressed. .sp -See § 4.1.8 of the MPI-3 standard for more detailed definitions of these parameters in relation to the typemap. +See § 4.1.8 of the MPI-3 standard for more detailed definitions of these parameters in relation to the typemap. .SH FORTRAN 77 NOTES .ft R @@ -91,5 +91,5 @@ where MPI_ADDRESS_KIND and MPI_COUNT_KIND are constants defined in mpif.h and gi Almost all MPI routines return an error value; C routines as the value of the function and Fortran routines in the last argument. C++ functions do not return errors. If the default error handler is set to MPI::ERRORS_THROW_EXCEPTIONS, then on error the C++ exception mechanism will be used to throw an MPI::Exception object. .sp Before the error value is returned, the current MPI error handler is -called. By default, this error handler aborts the MPI job, except for I/O function errors. The error handler may be changed with MPI_Comm_set_errhandler; the predefined error handler MPI_ERRORS_RETURN may be used to cause error values to be returned. Note that MPI does not guarantee that an MPI program can continue past an error. +called. By default, this error handler aborts the MPI job, except for I/O function errors. The error handler may be changed with MPI_Comm_set_errhandler; the predefined error handler MPI_ERRORS_RETURN may be used to cause error values to be returned. Note that MPI does not guarantee that an MPI program can continue past an error. diff --git a/ompi/mpi/man/man3/MPI_Type_hindexed.3in b/ompi/mpi/man/man3/MPI_Type_hindexed.3in index 4280cc504a5..6eb3fd2ca7c 100644 --- a/ompi/mpi/man/man3/MPI_Type_hindexed.3in +++ b/ompi/mpi/man/man3/MPI_Type_hindexed.3in @@ -31,39 +31,39 @@ MPI_TYPE_HINDEXED(\fICOUNT, ARRAY_OF_BLOCKLENGTHS, .SH INPUT PARAMETERS .ft R .TP 1i -count +count Number of blocks -- also number of entries in array_of_displacements and array_of_blocklengths (integer). .TP 1i array_of_blocklengths Number of elements in each block (array of nonnegative integers). .TP 1i -array_of_displacements -Byte displacement of each block (C: array of +array_of_displacements +Byte displacement of each block (C: array of .IR MPI_Aint , Fortran: array of integer). .TP 1i -oldtype +oldtype Old datatype (handle). .sp .SH OUTPUT PARAMETERS .ft R .TP 1i -newtype +newtype New datatype (handle). .sp .ft R .TP 1i IERROR -Fortran only: Error status (integer). +Fortran only: Error status (integer). .SH DESCRIPTION .ft R -Note that use of this routine is \fIdeprecated\fP as of MPI-2. Use MPI_Type_create_hindexed instead. +Note that use of this routine is \fIdeprecated\fP as of MPI-2. Use MPI_Type_create_hindexed instead. .sp -This deprecated routine is not available in C++. +This deprecated routine is not available in C++. .sp -The function is identical to MPI_Type_indexed, except that block displacements in array_of_displacements are specified in bytes, rather than in multiples of the oldtype extent. +The function is identical to MPI_Type_indexed, except that block displacements in array_of_displacements are specified in bytes, rather than in multiples of the oldtype extent. .sp Assume that oldtype has type map .sp @@ -72,13 +72,13 @@ Assume that oldtype has type map .fi .sp with extent ex. Let B be the array_of_blocklength argument and D be the -array_of_displacements argument. The newly created datatype has +array_of_displacements argument. The newly created datatype has .nf n x S^count-1 (i=0) B[i] entries: {(type(0), disp(0) + D[0]),...,(type(n-1), disp(n-1) + D[0]),..., - (type(0), disp(0) + (D[0] + B[0]-1)* ex),..., + (type(0), disp(0) + (D[0] + B[0]-1)* ex),..., type(n-1), disp(n-1) + (D[0]+ B[0]-1)* ex),..., (type(0), disp(0) + D[count-1]),...,(type(n-1), disp(n-1) + D[count-1]),..., (type(0), disp(0) + D[count-1] + (B[count-1] -1)* ex),..., @@ -89,7 +89,7 @@ n x S^count-1 Almost all MPI routines return an error value; C routines as the value of the function and Fortran routines in the last argument. C++ functions do not return errors. If the default error handler is set to MPI::ERRORS_THROW_EXCEPTIONS, then on error the C++ exception mechanism will be used to throw an MPI::Exception object. .sp Before the error value is returned, the current MPI error handler is -called. By default, this error handler aborts the MPI job, except for I/O function errors. The error handler may be changed with MPI_Comm_set_errhandler; the predefined error handler MPI_ERRORS_RETURN may be used to cause error values to be returned. Note that MPI does not guarantee that an MPI program can continue past an error. +called. By default, this error handler aborts the MPI job, except for I/O function errors. The error handler may be changed with MPI_Comm_set_errhandler; the predefined error handler MPI_ERRORS_RETURN may be used to cause error values to be returned. Note that MPI does not guarantee that an MPI program can continue past an error. .SH SEE ALSO .ft R diff --git a/ompi/mpi/man/man3/MPI_Type_hvector.3in b/ompi/mpi/man/man3/MPI_Type_hvector.3in index 0e323b0cd58..6f4aa826510 100644 --- a/ompi/mpi/man/man3/MPI_Type_hvector.3in +++ b/ompi/mpi/man/man3/MPI_Type_hvector.3in @@ -21,44 +21,44 @@ int MPI_Type_hvector(int \fIcount\fP, int\fI blocklength\fP, MPI_Aint\fI stride\ INCLUDE 'mpif.h' MPI_TYPE_HVECTOR(\fICOUNT, BLOCKLENGTH, STRIDE, OLDTYPE, NEWTYPE, IERROR\fP) - INTEGER \fICOUNT, BLOCKLENGTH, STRIDE, OLDTYPE\fP - INTEGER \fINEWTYPE, IERROR\fP + INTEGER \fICOUNT, BLOCKLENGTH, STRIDE, OLDTYPE\fP + INTEGER \fINEWTYPE, IERROR\fP .fi .SH INPUT PARAMETERS .ft R .TP 1i -count +count Number of blocks (nonnegative integer). .TP 1i -blocklength +blocklength Number of elements in each block (nonnegative integer). .TP 1i -stride +stride Number of bytes between start of each block (integer). .TP 1i -oldtype +oldtype Old datatype (handle). .sp .SH OUTPUT PARAMETERS .ft R .TP 1i -newtype +newtype New datatype (handle). .ft R .TP 1i IERROR -Fortran only: Error status (integer). +Fortran only: Error status (integer). .SH DESCRIPTION .ft R -Note that use of this routine is \fIdeprecated\fP as of MPI-2. Use MPI_Type_create_hvector instead. +Note that use of this routine is \fIdeprecated\fP as of MPI-2. Use MPI_Type_create_hvector instead. .sp -This deprecated routine is not available in C++. +This deprecated routine is not available in C++. .sp The function MPI_Type_hvector is identical to MPI_Type_vector, except that stride is given in bytes, rather than in elements. The use for both types -of vector constructors is illustrated in the examples in Section 3.12.7 of the MPI-1 Standard. +of vector constructors is illustrated in the examples in Section 3.12.7 of the MPI-1 Standard. .sp Assume that oldtype has type map .sp @@ -70,15 +70,15 @@ with extent ex. Let bl be the blocklength. The newly created datatype has a type .sp .nf {(type(0), disp(0)), ..., (type(n-1), disp(n-1)), - (type(0), disp(0) + ex), ..., (type(n-1), disp(n-1) + ex), - ..., (type(0), disp(0) + (bl -1) * ex),...,(type(n-1), - disp(n-1) + (bl -1) * ex), (type(0), disp(0) + stride), - ...,(type(n-1), disp(n-1) + stride), ..., (type(0), - disp(0) + stride + (bl - 1) * ex), ..., (type(n-1), - disp(n-1) + stride + (bl -1) * ex), ..., (type(0), - disp(0) + stride * (count -1)), ...,(type(n-1), - disp(n-1) + stride * (count -1)), ..., (type(0), - disp(0) + stride * (count -1) + (bl -1) * ex), ..., + (type(0), disp(0) + ex), ..., (type(n-1), disp(n-1) + ex), + ..., (type(0), disp(0) + (bl -1) * ex),...,(type(n-1), + disp(n-1) + (bl -1) * ex), (type(0), disp(0) + stride), + ...,(type(n-1), disp(n-1) + stride), ..., (type(0), + disp(0) + stride + (bl - 1) * ex), ..., (type(n-1), + disp(n-1) + stride + (bl -1) * ex), ..., (type(0), + disp(0) + stride * (count -1)), ...,(type(n-1), + disp(n-1) + stride * (count -1)), ..., (type(0), + disp(0) + stride * (count -1) + (bl -1) * ex), ..., (type(n-1), disp(n-1) + stride * (count -1) + (bl -1) * ex)} .fi @@ -86,7 +86,7 @@ with extent ex. Let bl be the blocklength. The newly created datatype has a type Almost all MPI routines return an error value; C routines as the value of the function and Fortran routines in the last argument. C++ functions do not return errors. If the default error handler is set to MPI::ERRORS_THROW_EXCEPTIONS, then on error the C++ exception mechanism will be used to throw an MPI::Exception object. .sp Before the error value is returned, the current MPI error handler is -called. By default, this error handler aborts the MPI job, except for I/O function errors. The error handler may be changed with MPI_Comm_set_errhandler; the predefined error handler MPI_ERRORS_RETURN may be used to cause error values to be returned. Note that MPI does not guarantee that an MPI program can continue past an error. +called. By default, this error handler aborts the MPI job, except for I/O function errors. The error handler may be changed with MPI_Comm_set_errhandler; the predefined error handler MPI_ERRORS_RETURN may be used to cause error values to be returned. Note that MPI does not guarantee that an MPI program can continue past an error. .SH SEE ALSO .ft R diff --git a/ompi/mpi/man/man3/MPI_Type_indexed.3in b/ompi/mpi/man/man3/MPI_Type_indexed.3in index 4c231268d74..4e840d40471 100644 --- a/ompi/mpi/man/man3/MPI_Type_indexed.3in +++ b/ompi/mpi/man/man3/MPI_Type_indexed.3in @@ -17,7 +17,7 @@ int MPI_Type_indexed(int \fIcount\fP, const int\fI array_of_blocklengths[]\fP, const int\fI array_of_displacements[]\fP, MPI_Datatype\fI oldtype\fP, MPI_Datatype\fI *newtype\fP) -int MPI_Type_create_hindexed(int \fIcount\fP, +int MPI_Type_create_hindexed(int \fIcount\fP, const int\fI array_of_blocklengths[]\fP, const MPI_Aint\fI array_of_displacements[]\fP, MPI_Datatype\fI oldtype\fP, MPI_Datatype\fI *newtype\fP) @@ -43,9 +43,9 @@ MPI_TYPE_CREATE_HINDEXED(\fICOUNT, ARRAY_OF_BLOCKLENGTHS, .SH C++ Syntax .nf #include -Datatype Datatype::Create_indexed(int \fIcount\fP, - const int \fIarray_of_blocklengths\fP[], - const int \fIarray_of_displacements\fP[]) const +Datatype Datatype::Create_indexed(int \fIcount\fP, + const int \fIarray_of_blocklengths\fP[], + const int \fIarray_of_displacements\fP[]) const Datatype Datatype::Create_hindexed(int \fIcount\fP, const int \fIarray_of_blocklengths\fP[], @@ -55,77 +55,77 @@ Datatype Datatype::Create_hindexed(int \fIcount\fP, .SH INPUT PARAMETERS .ft R .TP 1i -count +count Number of blocks -- also number of entries in array_of_displacements and array_of_blocklengths (nonnegative integer). .TP 1i -array_of_blocklengths +array_of_blocklengths Number of elements per block (array of nonnegative integers). .TP 1i -array_of_displacements +array_of_displacements Displacement for each block, in multiples of oldtype extent for MPI_Type_indexed and bytes for MPI_Type_create_hindexed (array of integer for .BR MPI_TYPE_INDEXED , -array of +array of .I MPI_Aint -for +for .BR MPI_TYPE_CREATE_HINDEXED ). .TP 1i -oldtype +oldtype Old datatype (handle). .sp .SH OUTPUT PARAMETERS .ft R .TP 1i -newtype +newtype New datatype (handle). .ft R .TP 1i IERROR -Fortran only: Error status (integer). +Fortran only: Error status (integer). .SH DESCRIPTION .ft R -The function MPI_Type_indexed allows replication of an old datatype into a sequence of blocks (each block is a concatenation of the old datatype), where each block can contain a different number of copies and have a different displacement. All block displacements are multiples of the old data type's extent. +The function MPI_Type_indexed allows replication of an old datatype into a sequence of blocks (each block is a concatenation of the old datatype), where each block can contain a different number of copies and have a different displacement. All block displacements are multiples of the old data type's extent. .sp -\fBExample:\fP Let oldtype have type map {(double, 0), (char, 8)}, with extent 16. Let B = (3, 1) and let D = (4, 0). A call to MPI_Type_indexed(2, B, D, oldtype, newtype) returns a datatype with type map +\fBExample:\fP Let oldtype have type map {(double, 0), (char, 8)}, with extent 16. Let B = (3, 1) and let D = (4, 0). A call to MPI_Type_indexed(2, B, D, oldtype, newtype) returns a datatype with type map .sp .nf - {(double, 64), (char, 72), (double, 80), (char, 88), - (double, 96), (char, 104), + {(double, 64), (char, 72), (double, 80), (char, 88), + (double, 96), (char, 104), (double, 0), (char, 8)} .fi .sp That is, three copies of the old type starting at displacement 4 x 16 = 64, and one copy starting at displacement 0. .sp -In general, assume that oldtype has type map +In general, assume that oldtype has type map .sp .nf {(type(0), disp(0)), ..., (type(n-1), disp(n-1))}, .fi .sp with extent ex. Let B be the array_of_blocklength argument and D be the -array_of_displacements argument. The newly created datatype has +array_of_displacements argument. The newly created datatype has .br .nf n x S ^count-1 i = 0 B[i] entries: - {(type(0), disp(0) + D[0]* ex), ..., + {(type(0), disp(0) + D[0]* ex), ..., (type(n-1), disp(n-1) + D[0]* ex), ..., - (type(0), disp(0) + (D[0] + B[0]-1)* ex), ..., + (type(0), disp(0) + (D[0] + B[0]-1)* ex), ..., (type(n-1), disp(n-1) + (D[0]+ B[0]-1)* ex), ..., - (type(0), disp(0) + D[count-1]* ex), ..., + (type(0), disp(0) + D[count-1]* ex), ..., (type(n-1), disp(n-1) + D[count-1]* ex), ..., (type(0), disp(0) + (D[count-1] + B[count-1] -1)* ex), ..., (type(n-1), disp(n-1) + (D[count-1] + B[count-1] -1)* ex)} .fi .sp -A call to MPI_Type_vector(count, blocklength, stride, oldtype, newtype) is equivalent to a call to MPI_Type_indexed(count, B, D, oldtype, newtype) where +A call to MPI_Type_vector(count, blocklength, stride, oldtype, newtype) is equivalent to a call to MPI_Type_indexed(count, B, D, oldtype, newtype) where .sp .nf - D[j] = j * stride, j = 0,..., count-1 + D[j] = j * stride, j = 0,..., count-1 and @@ -138,7 +138,7 @@ The function MPI_Type_create_hindexed is identical to MPI_Type_indexed, except t Almost all MPI routines return an error value; C routines as the value of the function and Fortran routines in the last argument. C++ functions do not return errors. If the default error handler is set to MPI::ERRORS_THROW_EXCEPTIONS, then on error the C++ exception mechanism will be used to throw an MPI::Exception object. .sp Before the error value is returned, the current MPI error handler is -called. By default, this error handler aborts the MPI job, except for I/O function errors. The error handler may be changed with MPI_Comm_set_errhandler; the predefined error handler MPI_ERRORS_RETURN may be used to cause error values to be returned. Note that MPI does not guarantee that an MPI program can continue past an error. +called. By default, this error handler aborts the MPI job, except for I/O function errors. The error handler may be changed with MPI_Comm_set_errhandler; the predefined error handler MPI_ERRORS_RETURN may be used to cause error values to be returned. Note that MPI does not guarantee that an MPI program can continue past an error. .SH SEE ALSO .ft R diff --git a/ompi/mpi/man/man3/MPI_Type_lb.3in b/ompi/mpi/man/man3/MPI_Type_lb.3in index 81f08b161d7..a2e7118aea5 100644 --- a/ompi/mpi/man/man3/MPI_Type_lb.3in +++ b/ompi/mpi/man/man3/MPI_Type_lb.3in @@ -19,31 +19,31 @@ int MPI_Type_lb(MPI_Datatype \fIdatatype\fP, MPI_Aint\fI *displacement\fP) .nf INCLUDE 'mpif.h' MPI_TYPE_LB(\fIDATATYPE, DISPLACEMENT, IERROR\fP) - INTEGER \fIDATATYPE, DISPLACEMENT, IERROR\fP + INTEGER \fIDATATYPE, DISPLACEMENT, IERROR\fP .fi .SH INPUT PARAMETER .ft R .TP 1i -datatype +datatype Datatype (handle). .sp .SH OUTPUT PARAMETERS .ft R .TP 1i -displacement +displacement Displacement of lower bound from origin, in bytes (integer). .ft R .TP 1i IERROR -Fortran only: Error status (integer). +Fortran only: Error status (integer). .SH DESCRIPTION .ft R -Note that use of this routine is \fIdeprecated\fP as of MPI-2. Please use MPI_Type_get_extent instead. +Note that use of this routine is \fIdeprecated\fP as of MPI-2. Please use MPI_Type_get_extent instead. .sp -This deprecated routine is not available in C++. +This deprecated routine is not available in C++. .sp MPI_Type_lb returns the lower bound of a data type. This may differ from zero if the type was constructed using MPI_LB. .sp @@ -55,7 +55,7 @@ In general, if Typemap = {(type0, disp0), ..., (type(n-1), disp(n-1)} .fi .sp -then the lower bound of Typemap is defined to be +then the lower bound of Typemap is defined to be .nf (min(j) disp(j) if no entry has @@ -70,19 +70,19 @@ Similarly, the upper bound of Typemap is defined to be ub(Typemap) = ( basic type ub (max(j) {disp(j) such that type(j) = ub} otherwise -Then +Then extent(Typemap) = ub(Typemap) - lb(Typemap) .fi .sp If type(i) requires alignment to a byte address that is a multiple of k(i), -then e is the least nonnegative increment needed to round extent(Typemap) to the next multiple of max(i) k(i). +then e is the least nonnegative increment needed to round extent(Typemap) to the next multiple of max(i) k(i). .SH ERRORS Almost all MPI routines return an error value; C routines as the value of the function and Fortran routines in the last argument. C++ functions do not return errors. If the default error handler is set to MPI::ERRORS_THROW_EXCEPTIONS, then on error the C++ exception mechanism will be used to throw an MPI::Exception object. .sp Before the error value is returned, the current MPI error handler is -called. By default, this error handler aborts the MPI job, except for I/O function errors. The error handler may be changed with MPI_Comm_set_errhandler; the predefined error handler MPI_ERRORS_RETURN may be used to cause error values to be returned. Note that MPI does not guarantee that an MPI program can continue past an error. +called. By default, this error handler aborts the MPI job, except for I/O function errors. The error handler may be changed with MPI_Comm_set_errhandler; the predefined error handler MPI_ERRORS_RETURN may be used to cause error values to be returned. Note that MPI does not guarantee that an MPI program can continue past an error. .SH SEE ALSO .ft R diff --git a/ompi/mpi/man/man3/MPI_Type_match_size.3in b/ompi/mpi/man/man3/MPI_Type_match_size.3in index b9b5083ef10..5cb4de8e7f0 100644 --- a/ompi/mpi/man/man3/MPI_Type_match_size.3in +++ b/ompi/mpi/man/man3/MPI_Type_match_size.3in @@ -22,7 +22,7 @@ int MPI_Type_match_size(int \fItypeclass\fP, int \fIsize\fP, .nf INCLUDE 'mpif.h' MPI_TYPE_MATCH_SIZE(\fITYPECLASS, SIZE, TYPE, IERROR\fP) - INTEGER \fITYPECLASS, SIZE, TYPE, IERROR\fP + INTEGER \fITYPECLASS, SIZE, TYPE, IERROR\fP .fi .SH C++ Syntax @@ -49,7 +49,7 @@ Datatype with correct type and size (handle). .ft R .TP 1i IERROR -Fortran only: Error status (integer). +Fortran only: Error status (integer). .SH DESCRIPTION .ft R @@ -83,7 +83,7 @@ called. By default, this error handler aborts the MPI job, except for I/O function errors. The error handler may be changed with MPI_Comm_set_errhandler; the predefined error handler MPI_ERRORS_RETURN may be used to cause error values to be returned. Note that MPI does not -guarantee that an MPI program can continue past an error. +guarantee that an MPI program can continue past an error. .sp See the MPI man page for a full list of MPI error codes. diff --git a/ompi/mpi/man/man3/MPI_Type_set_attr.3in b/ompi/mpi/man/man3/MPI_Type_set_attr.3in index 2d9ca5cf2ed..705f290f081 100644 --- a/ompi/mpi/man/man3/MPI_Type_set_attr.3in +++ b/ompi/mpi/man/man3/MPI_Type_set_attr.3in @@ -5,14 +5,14 @@ .\" $COPYRIGHT$ .TH MPI_Type_set_attr 3 "#OMPI_DATE#" "#PACKAGE_VERSION#" "#PACKAGE_NAME#" .SH NAME -\fBMPI_Type_set_attr\fP \- Sets a key value/attribute pair to a data type. +\fBMPI_Type_set_attr\fP \- Sets a key value/attribute pair to a data type. .SH SYNTAX .ft R .SH C Syntax .nf #include -int MPI_Type_set_attr(MPI_Datatype \fItype\fP, int \fItype_keyval\fP, +int MPI_Type_set_attr(MPI_Datatype \fItype\fP, int \fItype_keyval\fP, void *\fIattribute_val\fP) .fi @@ -34,7 +34,7 @@ void MPI::Datatype::Set_attr(int \fItype_keyval\fP, const void* \fIattribute_val .ft R .TP 1i type -Data type to which attribute will be attached (handle). +Data type to which attribute will be attached (handle). .SH INPUT PARAMETERS .ft R @@ -49,10 +49,10 @@ Attribute value. .ft R .TP 1i IERROR -Fortran only: Error status (integer). +Fortran only: Error status (integer). .SH DESCRIPTION -For the given data type, MPI_Type_set_attr sets the key value to the value of the specified attribute. +For the given data type, MPI_Type_set_attr sets the key value to the value of the specified attribute. .SH FORTRAN 77 NOTES .ft R @@ -71,7 +71,7 @@ and gives the length of the declared integer in bytes. Almost all MPI routines return an error value; C routines as the value of the function and Fortran routines in the last argument. C++ functions do not return errors. If the default error handler is set to MPI::ERRORS_THROW_EXCEPTIONS, then on error the C++ exception mechanism will be used to throw an MPI::Exception object. .sp Before the error value is returned, the current MPI error handler is -called. By default, this error handler aborts the MPI job, except for I/O function errors. The error handler may be changed with MPI_Comm_set_errhandler; the predefined error handler MPI_ERRORS_RETURN may be used to cause error values to be returned. Note that MPI does not guarantee that an MPI program can continue past an error. +called. By default, this error handler aborts the MPI job, except for I/O function errors. The error handler may be changed with MPI_Comm_set_errhandler; the predefined error handler MPI_ERRORS_RETURN may be used to cause error values to be returned. Note that MPI does not guarantee that an MPI program can continue past an error. .SH SEE ALSO .ft R diff --git a/ompi/mpi/man/man3/MPI_Type_set_name.3in b/ompi/mpi/man/man3/MPI_Type_set_name.3in index 83aaf14d961..8c47be73589 100644 --- a/ompi/mpi/man/man3/MPI_Type_set_name.3in +++ b/ompi/mpi/man/man3/MPI_Type_set_name.3in @@ -6,7 +6,7 @@ .\" $COPYRIGHT$ .TH MPI_Type_set_name 3 "#OMPI_DATE#" "#PACKAGE_VERSION#" "#PACKAGE_NAME#" .SH NAME -\fBMPI_Type_set_name\fP \- Sets the name of a data type. +\fBMPI_Type_set_name\fP \- Sets the name of a data type. .SH SYNTAX .ft R @@ -40,24 +40,24 @@ Data type for which the identifier is to be set (handle). .ft R .TP 1i type_name -The character string remembered as the name (string). +The character string remembered as the name (string). .SH OUTPUT PARAMETER .ft R .TP 1i IERROR -Fortran only: Error status (integer). +Fortran only: Error status (integer). .SH DESCRIPTION .ft R -MPI_Type_set_name associates a printable identifier with an MPI data type. +MPI_Type_set_name associates a printable identifier with an MPI data type. .SH ERRORS Almost all MPI routines return an error value; C routines as the value of the function and Fortran routines in the last argument. C++ functions do not return errors. If the default error handler is set to MPI::ERRORS_THROW_EXCEPTIONS, then on error the C++ exception mechanism will be used to throw an MPI::Exception object. .sp Before the error value is returned, the current MPI error handler is -called. By default, this error handler aborts the MPI job, except for I/O function errors. The error handler may be changed with MPI_Comm_set_errhandler; the predefined error handler MPI_ERRORS_RETURN may be used to cause error values to be returned. Note that MPI does not guarantee that an MPI program can continue past an error. +called. By default, this error handler aborts the MPI job, except for I/O function errors. The error handler may be changed with MPI_Comm_set_errhandler; the predefined error handler MPI_ERRORS_RETURN may be used to cause error values to be returned. Note that MPI does not guarantee that an MPI program can continue past an error. .SH SEE ALSO .ft R diff --git a/ompi/mpi/man/man3/MPI_Type_size.3in b/ompi/mpi/man/man3/MPI_Type_size.3in index db1c5d95e6a..87883a7c9fb 100644 --- a/ompi/mpi/man/man3/MPI_Type_size.3in +++ b/ompi/mpi/man/man3/MPI_Type_size.3in @@ -37,20 +37,20 @@ int Datatype::Get_size() const .SH INPUT PARAMETER .ft R .TP 1i -datatype +datatype Datatype (handle). .sp .SH OUTPUT PARAMETERS .ft R .TP 1i -size +size Datatype size (integer). .sp .ft R .TP 1i IERROR -Fortran only: Error status (integer). +Fortran only: Error status (integer). .SH DESCRIPTION .ft R @@ -60,7 +60,7 @@ MPI_Type_size returns the total size, in bytes, of the entries in the type signa Almost all MPI routines return an error value; C routines as the value of the function and Fortran routines in the last argument. C++ functions do not return errors. If the default error handler is set to MPI::ERRORS_THROW_EXCEPTIONS, then on error the C++ exception mechanism will be used to throw an MPI::Exception object. .sp Before the error value is returned, the current MPI error handler is -called. By default, this error handler aborts the MPI job, except for I/O function errors. The error handler may be changed with MPI_Comm_set_errhandler; the predefined error handler MPI_ERRORS_RETURN may be used to cause error values to be returned. Note that MPI does not guarantee that an MPI program can continue past an error. +called. By default, this error handler aborts the MPI job, except for I/O function errors. The error handler may be changed with MPI_Comm_set_errhandler; the predefined error handler MPI_ERRORS_RETURN may be used to cause error values to be returned. Note that MPI does not guarantee that an MPI program can continue past an error. .SH FORTRAN 77 NOTES .ft R diff --git a/ompi/mpi/man/man3/MPI_Type_struct.3in b/ompi/mpi/man/man3/MPI_Type_struct.3in index 21fe9b2efe7..7bc9a0cbe88 100644 --- a/ompi/mpi/man/man3/MPI_Type_struct.3in +++ b/ompi/mpi/man/man3/MPI_Type_struct.3in @@ -25,14 +25,14 @@ MPI_TYPE_STRUCT(\fICOUNT, ARRAY_OF_BLOCKLENGTHS, NEWTYPE, IERROR\fP) INTEGER \fICOUNT, ARRAY_OF_BLOCKLENGTHS(*)\fP INTEGER \fIARRAY_OF_DISPLACEMENTS(*)\fP - INTEGER \fIARRAY_OF_TYPES(*), NEWTYPE, IERROR\fP + INTEGER \fIARRAY_OF_TYPES(*), NEWTYPE, IERROR\fP .fi .SH INPUT PARAMETERS .ft R .TP 1i -count +count Number of blocks (integer) also number of entries in arrays array_of_types, array_of_displacements, and array_of_blocklengths. .TP 1i @@ -42,29 +42,29 @@ Number of elements in each block (array). array_of_displacements Byte displacement of each block (array). .TP 1i -array_of_types +array_of_types Type of elements in each block (array of handles to datatype objects). .sp .SH OUTPUT PARAMETERS .ft R .TP 1i -newtype +newtype New datatype (handle). .ft R .TP 1i IERROR -Fortran only: Error status (integer). +Fortran only: Error status (integer). .SH DESCRIPTION .ft R -Note that use of this routine is \fIdeprecated\fP as of MPI-2. Use MPI_Type_create_struct instead. +Note that use of this routine is \fIdeprecated\fP as of MPI-2. Use MPI_Type_create_struct instead. .sp -This deprecated routine is not available in C++. +This deprecated routine is not available in C++. .sp -MPI_Type_struct is the most general type constructor. It further generalizes MPI_Type_hindexed in that it allows each block to consist of replications of different datatypes. +MPI_Type_struct is the most general type constructor. It further generalizes MPI_Type_hindexed in that it allows each block to consist of replications of different datatypes. .sp -\fBExample:\fP Let type1 have type map +\fBExample:\fP Let type1 have type map .nf {(double, 0), (char, 8)} @@ -73,24 +73,24 @@ MPI_Type_struct is the most general type constructor. It further generalizes MPI with extent 16. Let B = (2, 1, 3), D = (0, 16, 26), and T = (MPI_FLOAT, type1, MPI_CHAR). Then a call to MPI_Type_struct(3, B, D, T, newtype) returns a datatype with type map .nf - {(float, 0), (float,4), (double, 16), (char, 24), + {(float, 0), (float,4), (double, 16), (char, 24), (char, 26), (char, 27), (char, 28)} .fi -That is, two copies of MPI_FLOAT starting at 0, followed by one copy of type1 starting at 16, followed by three copies of MPI_CHAR, starting at 26. (We assume that a float occupies 4 bytes.) +That is, two copies of MPI_FLOAT starting at 0, followed by one copy of type1 starting at 16, followed by three copies of MPI_CHAR, starting at 26. (We assume that a float occupies 4 bytes.) .sp For more information, see section 3.12.1 of the MPI-1.1 Standard. .SH NOTES If an upperbound is set explicitly by using the MPI datatype MPI_UB, the corresponding index must be positive. .sp -The MPI-1 Standard originally made vague statements about padding and alignment; this was intended to allow the simple definition of structures that could be sent with a count greater than one. For example, +The MPI-1 Standard originally made vague statements about padding and alignment; this was intended to allow the simple definition of structures that could be sent with a count greater than one. For example, .nf struct {int a; char b;} foo; .fi -may have +may have .nf - sizeof(foo) = sizeof(int) + sizeof(char); + sizeof(foo) = sizeof(int) + sizeof(char); .fi defining the extent of a datatype as including an epsilon, which would have allowed an implementation to make the extent an MPI datatype for this structure equal to 2*sizeof(int). However, since different systems might define different paddings, a clarification to the standard made epsilon zero. Thus, if you define a structure datatype and wish to send or receive multiple items, you should explicitly include an MPI_UB entry as the last member of the structure. For example, the following code can be used for the structure foo: .nf @@ -106,7 +106,7 @@ defining the extent of a datatype as including an epsilon, which would have allo Almost all MPI routines return an error value; C routines as the value of the function and Fortran routines in the last argument. C++ functions do not return errors. If the default error handler is set to MPI::ERRORS_THROW_EXCEPTIONS, then on error the C++ exception mechanism will be used to throw an MPI::Exception object. .sp Before the error value is returned, the current MPI error handler is -called. By default, this error handler aborts the MPI job, except for I/O function errors. The error handler may be changed with MPI_Comm_set_errhandler; the predefined error handler MPI_ERRORS_RETURN may be used to cause error values to be returned. Note that MPI does not guarantee that an MPI program can continue past an error. +called. By default, this error handler aborts the MPI job, except for I/O function errors. The error handler may be changed with MPI_Comm_set_errhandler; the predefined error handler MPI_ERRORS_RETURN may be used to cause error values to be returned. Note that MPI does not guarantee that an MPI program can continue past an error. .SH SEE ALSO .ft R diff --git a/ompi/mpi/man/man3/MPI_Type_ub.3in b/ompi/mpi/man/man3/MPI_Type_ub.3in index 7bb13524877..6f2808ff411 100644 --- a/ompi/mpi/man/man3/MPI_Type_ub.3in +++ b/ompi/mpi/man/man3/MPI_Type_ub.3in @@ -19,33 +19,33 @@ int MPI_Type_ub(MPI_Datatype \fIdatatype\fP, MPI_Aint\fI *displacement\fP) .nf INCLUDE 'mpif.h' MPI_TYPE_UB(\fIDATATYPE, DISPLACEMENT, IERROR\fP) - INTEGER \fIDATATYPE, DISPLACEMENT, IERROR\fP + INTEGER \fIDATATYPE, DISPLACEMENT, IERROR\fP .fi .SH INPUT PARAMETER .ft R .TP 1i -datatype +datatype Datatype (handle). .sp .SH OUTPUT PARAMETERS .ft R .TP 1i -displacement +displacement Displacement of upper bound from origin, in bytes (integer). .sp .ft R .TP 1i IERROR -Fortran only: Error status (integer). +Fortran only: Error status (integer). .SH DESCRIPTION .ft R -Note that use of this routine is \fIdeprecated\fP as of MPI-2. Please use MPI_Type_get_extent instead. +Note that use of this routine is \fIdeprecated\fP as of MPI-2. Please use MPI_Type_get_extent instead. .sp -This deprecated routine is not available in C++. +This deprecated routine is not available in C++. .sp MPI_Type_ub returns the upper bound of a data type. This will differ from zero if the type was constructed using MPI_UB. The upper bound will take into account any alignment considerations. .sp @@ -57,7 +57,7 @@ In general, if Typemap = {(type(0), disp(0)), ..., (type(n-1), disp(n-1))} .fi -then the lower bound of Typemap is defined to be +then the lower bound of Typemap is defined to be .nf (min(j) disp(j) if no entry has @@ -73,19 +73,19 @@ Similarly, the upper bound of Typemap is defined to be (max(j) {disp(j) such that type(j) = ub} otherwise .fi -Then +Then .nf extent(Typemap) = ub(Typemap) - lb(Typemap) .fi -If type(i) requires alignment to a byte address that is a multiple of k(i), then e is the least nonnegative increment needed to round extent(Typemap) to the next multiple of max(i) k(i). +If type(i) requires alignment to a byte address that is a multiple of k(i), then e is the least nonnegative increment needed to round extent(Typemap) to the next multiple of max(i) k(i). .SH ERRORS Almost all MPI routines return an error value; C routines as the value of the function and Fortran routines in the last argument. C++ functions do not return errors. If the default error handler is set to MPI::ERRORS_THROW_EXCEPTIONS, then on error the C++ exception mechanism will be used to throw an MPI::Exception object. .sp Before the error value is returned, the current MPI error handler is -called. By default, this error handler aborts the MPI job, except for I/O function errors. The error handler may be changed with MPI_Comm_set_errhandler; the predefined error handler MPI_ERRORS_RETURN may be used to cause error values to be returned. Note that MPI does not guarantee that an MPI program can continue past an error. +called. By default, this error handler aborts the MPI job, except for I/O function errors. The error handler may be changed with MPI_Comm_set_errhandler; the predefined error handler MPI_ERRORS_RETURN may be used to cause error values to be returned. Note that MPI does not guarantee that an MPI program can continue past an error. .SH SEE ALSO .ft R diff --git a/ompi/mpi/man/man3/MPI_Type_vector.3in b/ompi/mpi/man/man3/MPI_Type_vector.3in index 3951709c75e..fed088578da 100644 --- a/ompi/mpi/man/man3/MPI_Type_vector.3in +++ b/ompi/mpi/man/man3/MPI_Type_vector.3in @@ -21,62 +21,62 @@ int MPI_Type_vector(int \fIcount\fP, int\fI blocklength\fP, int\fI stride\fP, INCLUDE 'mpif.h' MPI_TYPE_VECTOR(\fICOUNT, BLOCKLENGTH, STRIDE, OLDTYPE, NEWTYPE, IERROR\fP) - INTEGER \fICOUNT, BLOCKLENGTH, STRIDE, OLDTYPE\fP - INTEGER \fINEWTYPE, IERROR\fP + INTEGER \fICOUNT, BLOCKLENGTH, STRIDE, OLDTYPE\fP + INTEGER \fINEWTYPE, IERROR\fP .fi .SH C++ Syntax .nf #include -Datatype Datatype::Create_vector(int \fIcount\fP, int \fIblocklength\fP, +Datatype Datatype::Create_vector(int \fIcount\fP, int \fIblocklength\fP, int \fIstride\fP) const .fi .SH INPUT PARAMETERS .ft R .TP 1i -count +count Number of blocks (nonnegative integer). .TP 1i -blocklength +blocklength Number of elements in each block (nonnegative integer). .TP 1i -stride +stride Number of elements between start of each block (integer). .TP 1i -oldtype +oldtype Old datatype (handle). .sp .SH OUTPUT PARAMETERS .ft R .TP 1i -newtype +newtype New datatype (handle). .sp .ft R .TP 1i IERROR -Fortran only: Error status (integer). +Fortran only: Error status (integer). .SH DESCRIPTION .ft R The function MPI_Type_vector is a general constructor that allows replication of a datatype into locations that consist of equally spaced blocks. Each block is obtained by concatenating the same number of copies of the old datatype. The spacing between blocks is a multiple of the extent of the old datatype. .sp -\fBExample 1:\fP Assume, again, that oldtype has type map {(double, 0), (char, 8)}, with extent 16. A call to MPI_Type_vector(2, 3, 4, oldtype, newtype) will create the datatype with type map +\fBExample 1:\fP Assume, again, that oldtype has type map {(double, 0), (char, 8)}, with extent 16. A call to MPI_Type_vector(2, 3, 4, oldtype, newtype) will create the datatype with type map .nf - {(double, 0), (char, 8), (double, 16), (char, 24), - (double, 32), (char, 40), - (double, 64), (char, 72), + {(double, 0), (char, 8), (double, 16), (char, 24), + (double, 32), (char, 40), + (double, 64), (char, 72), (double, 80), (char, 88), (double, 96), (char, 104)} .fi .sp -That is, two blocks with three copies each of the old type, with a stride of 4 elements (4 x 6 bytes) between the blocks. +That is, two blocks with three copies each of the old type, with a stride of 4 elements (4 x 6 bytes) between the blocks. .sp \fBExample 2:\fP A call to MPI_Type_vector(3, 1, -2, oldtype, newtype) will create the datatype .nf - {(double, 0), (char, 8), (double, -32), (char, -24), + {(double, 0), (char, 8), (double, -32), (char, -24), (double, -64), (char, -56)} .fi @@ -91,11 +91,11 @@ with extent ex. Let bl be the blocklength. The newly created datatype has a type {(type(0), disp(0)), ..., (type(n-1), disp(n-1)), (type(0), disp(0) + ex), ..., (type(n-1), disp(n-1) + ex), ..., - (type(0), disp(0) + (bl -1) * ex),..., + (type(0), disp(0) + (bl -1) * ex),..., (type(n-1), disp(n-1) + (bl -1)* ex), - (type(0), disp(0) + stride * ex),..., (type(n-1), + (type(0), disp(0) + stride * ex),..., (type(n-1), disp(n-1) + stride * ex), ..., - (type(0), disp(0) + (stride + bl - 1) * ex), ..., + (type(0), disp(0) + (stride + bl - 1) * ex), ..., (type(n-1), disp(n-1) + (stride + bl -1) * ex), ..., (type(0), disp(0) + stride * (count -1) * ex), ..., (type(n-1), disp(n-1) + stride * (count -1) * ex), ..., @@ -109,7 +109,7 @@ A call to MPI_Type_contiguous(count, oldtype, newtype) is equivalent to a call t Almost all MPI routines return an error value; C routines as the value of the function and Fortran routines in the last argument. C++ functions do not return errors. If the default error handler is set to MPI::ERRORS_THROW_EXCEPTIONS, then on error the C++ exception mechanism will be used to throw an MPI::Exception object. .sp Before the error value is returned, the current MPI error handler is -called. By default, this error handler aborts the MPI job, except for I/O function errors. The error handler may be changed with MPI_Comm_set_errhandler; the predefined error handler MPI_ERRORS_RETURN may be used to cause error values to be returned. Note that MPI does not guarantee that an MPI program can continue past an error. +called. By default, this error handler aborts the MPI job, except for I/O function errors. The error handler may be changed with MPI_Comm_set_errhandler; the predefined error handler MPI_ERRORS_RETURN may be used to cause error values to be returned. Note that MPI does not guarantee that an MPI program can continue past an error. .SH SEE ALSO .ft R diff --git a/ompi/mpi/man/man3/MPI_Unpack.3in b/ompi/mpi/man/man3/MPI_Unpack.3in index 00ecf22f095..9e8d558b719 100644 --- a/ompi/mpi/man/man3/MPI_Unpack.3in +++ b/ompi/mpi/man/man3/MPI_Unpack.3in @@ -14,86 +14,86 @@ .nf #include int MPI_Unpack(const void *\fIinbuf\fP, int\fI insize\fP, int\fI *position\fP, - void\fI *outbuf\fP, int\fI outcount\fP, MPI_Datatype\fI datatype\fP, + void\fI *outbuf\fP, int\fI outcount\fP, MPI_Datatype\fI datatype\fP, MPI_Comm\fI comm\fP) .fi .SH Fortran Syntax .nf INCLUDE 'mpif.h' -MPI_UNPACK(\fIINBUF, INSIZE, POSITION, OUTBUF, OUTCOUNT, +MPI_UNPACK(\fIINBUF, INSIZE, POSITION, OUTBUF, OUTCOUNT, DATATYPE, COMM, IERROR\fP) \fIINBUF(*), OUTBUF(*)\fP - INTEGER \fIINSIZE, POSITION, OUTCOUNT, DATATYPE, - COMM, IERROR\fP + INTEGER \fIINSIZE, POSITION, OUTCOUNT, DATATYPE, + COMM, IERROR\fP .fi .SH C++ Syntax .nf #include -void Datatype::Unpack(const void* \fIinbuf\fP, int \fIinsize\fP, - void *\fIoutbuf\fP, int \fIoutcount\fP, int& \fIposition\fP, +void Datatype::Unpack(const void* \fIinbuf\fP, int \fIinsize\fP, + void *\fIoutbuf\fP, int \fIoutcount\fP, int& \fIposition\fP, const Comm& \fIcomm\fP) const .fi .SH INPUT PARAMETERS .ft R .TP 1i -inbuf +inbuf Input buffer start (choice). .TP 1i -insize +insize Size of input buffer, in bytes (integer). .TP 1i -outcount +outcount Number of items to be unpacked (integer). .TP 1i -datatype +datatype Datatype of each output data item (handle). .TP 1i -comm +comm Communicator for packed message (handle). .sp .SH INPUT/OUTPUT PARAMETER .ft R .TP 1i -position +position Current position in bytes (integer). .SH OUTPUT PARAMETERS .ft R .TP 1i -outbuf +outbuf Output buffer start (choice). .ft R .TP 1i IERROR -Fortran only: Error status (integer). +Fortran only: Error status (integer). .SH DESCRIPTION .ft R -Unpacks a message into the receive buffer specified by outbuf, outcount, datatype from the buffer space specified by inbuf and insize. The output buffer can be any communication buffer allowed in MPI_Recv. The input buffer is a contiguous storage area containing insize bytes, starting at address inbuf. The input value of position is the first location in the input buffer occupied by the packed message. \fIposition\fP is incremented by the size of the packed message, so that the output value of position is the first location in the input buffer after the locations occupied by the message that was unpacked. \fIcomm\fP is the communicator used to receive the packed message. +Unpacks a message into the receive buffer specified by outbuf, outcount, datatype from the buffer space specified by inbuf and insize. The output buffer can be any communication buffer allowed in MPI_Recv. The input buffer is a contiguous storage area containing insize bytes, starting at address inbuf. The input value of position is the first location in the input buffer occupied by the packed message. \fIposition\fP is incremented by the size of the packed message, so that the output value of position is the first location in the input buffer after the locations occupied by the message that was unpacked. \fIcomm\fP is the communicator used to receive the packed message. .SH NOTES Note the difference between MPI_Recv and MPI_Unpack: In MPI_Recv, the \fIcount\fP argument specifies the maximum number of items that can be received. The actual number of items received is determined by the length of the incoming message. In MPI_Unpack, the count argument specifies the actual number of items that are to be unpacked; the "size" of the corresponding message is the increment in position. The reason for this change is that the "incoming message size" is not predetermined since the user decides how much to unpack; nor is it easy to determine the "message size" from the number of items to be unpacked. .sp -To understand the behavior of pack and unpack, it is convenient to think of the data part of a message as being the sequence obtained by concatenating the successive values sent in that message. The pack operation stores this sequence in the buffer space, as if sending the message to that buffer. The unpack operation retrieves this sequence from buffer space, as if receiving a message from that buffer. (It is helpful to think of internal Fortran files or sscanf in C for a similar function.) +To understand the behavior of pack and unpack, it is convenient to think of the data part of a message as being the sequence obtained by concatenating the successive values sent in that message. The pack operation stores this sequence in the buffer space, as if sending the message to that buffer. The unpack operation retrieves this sequence from buffer space, as if receiving a message from that buffer. (It is helpful to think of internal Fortran files or sscanf in C for a similar function.) .sp Several messages can be successively packed into one packing unit. This is effected by several successive related calls to MPI_Pack, where the first call provides position = 0, and each successive call inputs the value of position that was output by the previous call, and the same values for outbuf, outcount, and comm. This packing unit now contains the equivalent information that would have been stored in a message by one send call with a send buffer that is the "concatenation" of the individual send buffers. .sp -A packing unit can be sent using type MPI_Packed. Any point-to-point or collective communication function can be used to move the sequence of bytes that forms the packing unit from one process to another. This packing unit can now be received using any receive operation, with any datatype: The type-matching rules are relaxed for messages sent with type MPI_Packed. +A packing unit can be sent using type MPI_Packed. Any point-to-point or collective communication function can be used to move the sequence of bytes that forms the packing unit from one process to another. This packing unit can now be received using any receive operation, with any datatype: The type-matching rules are relaxed for messages sent with type MPI_Packed. .sp A message sent with any type (including MPI_Packed) can be received using the type MPI_Packed. Such a message can then be unpacked by calls to MPI_Unpack. .sp A packing unit (or a message created by a regular, "typed" send) can be unpacked into several successive messages. This is effected by several successive related calls to MPI_Unpack, where the first call provides position = 0, and each successive call inputs the value of position that was output by the previous call, and the same values for inbuf, insize, and comm. .sp -The concatenation of two packing units is not necessarily a packing unit; nor is a substring of a packing unit necessarily a packing unit. Thus, one cannot concatenate two packing units and then unpack the result as one packing unit; nor can one unpack a substring of a packing unit as a separate packing unit. Each packing unit that was created by a related sequence of pack calls or by a regular send must be unpacked as a unit, by a sequence of related unpack calls. +The concatenation of two packing units is not necessarily a packing unit; nor is a substring of a packing unit necessarily a packing unit. Thus, one cannot concatenate two packing units and then unpack the result as one packing unit; nor can one unpack a substring of a packing unit as a separate packing unit. Each packing unit that was created by a related sequence of pack calls or by a regular send must be unpacked as a unit, by a sequence of related unpack calls. .SH ERRORS Almost all MPI routines return an error value; C routines as the value of the function and Fortran routines in the last argument. C++ functions do not return errors. If the default error handler is set to MPI::ERRORS_THROW_EXCEPTIONS, then on error the C++ exception mechanism will be used to throw an MPI::Exception object. .sp Before the error value is returned, the current MPI error handler is -called. By default, this error handler aborts the MPI job, except for I/O function errors. The error handler may be changed with MPI_Comm_set_errhandler; the predefined error handler MPI_ERRORS_RETURN may be used to cause error values to be returned. Note that MPI does not guarantee that an MPI program can continue past an error. +called. By default, this error handler aborts the MPI job, except for I/O function errors. The error handler may be changed with MPI_Comm_set_errhandler; the predefined error handler MPI_ERRORS_RETURN may be used to cause error values to be returned. Note that MPI does not guarantee that an MPI program can continue past an error. .SH SEE ALSO .ft R diff --git a/ompi/mpi/man/man3/MPI_Unpack_external.3in b/ompi/mpi/man/man3/MPI_Unpack_external.3in index 8cd584fc15c..457616f079b 100644 --- a/ompi/mpi/man/man3/MPI_Unpack_external.3in +++ b/ompi/mpi/man/man3/MPI_Unpack_external.3in @@ -28,7 +28,7 @@ MPI_UNPACK_EXTERNAL(\fIDATAREP, INBUF, INSIZE, POSITION, OUTBUF, OUTCOUNT, DATATYPE, IERROR\fP) INTEGER \fIOUTCOUNT, DATATYPE, IERROR\fP - INTEGER (KIND=MPI_ADDRESS_KIND) \fIINSIZE, POSITION\fP + INTEGER(KIND=MPI_ADDRESS_KIND) \fIINSIZE, POSITION\fP CHARACTER*(*) \fIDATAREP\fP \fIINBUF(*), OUTBUF(*)\fP @@ -49,33 +49,33 @@ datarep Data Representation (string). .ft R .TP 1i -inbuf +inbuf Input buffer start (choice). .TP 1i -insize +insize Size of input buffer, in bytes (integer). .TP 1i -outcount +outcount Number of items to be unpacked (integer). .TP 1i -datatype +datatype Datatype of each output data item (handle). .SH INPUT/OUTPUT PARAMETER .ft R .TP 1i -position +position Current position in buffer, in bytes (integer). .SH OUTPUT PARAMETERS .ft R .TP 1i -outbuf +outbuf Output buffer start (choice). .ft R .TP 1i IERROR -Fortran only: Error status (integer). +Fortran only: Error status (integer). .SH DESCRIPTION .ft R diff --git a/ompi/mpi/man/man3/MPI_Unpublish_name.3in b/ompi/mpi/man/man3/MPI_Unpublish_name.3in index edd5f5b16cb..dc8a92704d8 100644 --- a/ompi/mpi/man/man3/MPI_Unpublish_name.3in +++ b/ompi/mpi/man/man3/MPI_Unpublish_name.3in @@ -78,7 +78,7 @@ ompi_global_scope bool If set to true, unpublish the name from .fi -.sp +.sp \fIbool\fP info keys are actually strings but are evaluated as follows: if the string value is a number, it is converted to an integer and cast to a boolean (meaning that zero integers are false @@ -116,7 +116,7 @@ called. By default, this error handler aborts the MPI job, except for I/O function errors. The error handler may be changed with MPI_Comm_set_errhandler; the predefined error handler MPI_ERRORS_RETURN may be used to cause error values to be returned. Note that MPI does not -guarantee that an MPI program can continue past an error. +guarantee that an MPI program can continue past an error. .sp See the MPI man page for a full list of MPI error codes. diff --git a/ompi/mpi/man/man3/MPI_Wait.3in b/ompi/mpi/man/man3/MPI_Wait.3in index 8d68464fdb8..658a699e113 100644 --- a/ompi/mpi/man/man3/MPI_Wait.3in +++ b/ompi/mpi/man/man3/MPI_Wait.3in @@ -18,7 +18,7 @@ int MPI_Wait(MPI_Request *\fIrequest\fP, MPI_Status\fI *status\fP) .nf INCLUDE 'mpif.h' MPI_WAIT(\fIREQUEST, STATUS, IERROR\fP) - INTEGER \fIREQUEST, STATUS(MPI_STATUS_SIZE), IERROR\fP + INTEGER \fIREQUEST, STATUS(MPI_STATUS_SIZE), IERROR\fP .fi .SH C++ Syntax @@ -26,51 +26,51 @@ MPI_WAIT(\fIREQUEST, STATUS, IERROR\fP) #include void Request::Wait(Status& \fIstatus\fP) -void Request::Wait() +void Request::Wait() .fi .SH INPUT PARAMETER .ft R .TP 1i -request +request Request (handle). .sp .SH OUTPUT PARAMETERS .ft R .TP 1i -status +status Status object (status). .ft R .TP 1i IERROR -Fortran only: Error status (integer). +Fortran only: Error status (integer). .SH DESCRIPTION .ft R -A call to MPI_Wait returns when the operation identified by request is complete. If the communication object associated with this request was created by a nonblocking send or receive call, then the object is deallocated by the call to MPI_Wait and the request handle is set to MPI_REQUEST_NULL. +A call to MPI_Wait returns when the operation identified by request is complete. If the communication object associated with this request was created by a nonblocking send or receive call, then the object is deallocated by the call to MPI_Wait and the request handle is set to MPI_REQUEST_NULL. .sp The call returns, in status, information on the completed operation. The content of the status object for a receive operation can be accessed as described in Section 3.2.5 of the MPI-1 Standard, "Return Status." The status object for a send operation may be queried by a call to MPI_Test_cancelled (see Section 3.8 of the MPI-1 Standard, "Probe and Cancel"). .sp -If your application does not need to examine the \fIstatus\fP field, you can save resources by using the predefined constant MPI_STATUS_IGNORE as a special value for the \fIstatus\fP argument. +If your application does not need to examine the \fIstatus\fP field, you can save resources by using the predefined constant MPI_STATUS_IGNORE as a special value for the \fIstatus\fP argument. .sp -One is allowed to call MPI_Wait with a null or inactive request argument. In this case the operation returns immediately with empty status. +One is allowed to call MPI_Wait with a null or inactive request argument. In this case the operation returns immediately with empty status. .SH NOTES -Successful return of MPI_Wait after an MPI_Ibsend implies that the user send buffer can be reused i.e., data has been sent out or copied into a buffer attached with MPI_Buffer_attach. Note that, at this point, we can no longer cancel the send (for more information, see Section 3.8 of the MPI-1 Standard, "Probe and Cancel"). If a matching receive is never posted, then the buffer cannot be freed. This runs somewhat counter to the stated goal of MPI_Cancel (always being able to free program space that was committed to the communication subsystem). +Successful return of MPI_Wait after an MPI_Ibsend implies that the user send buffer can be reused i.e., data has been sent out or copied into a buffer attached with MPI_Buffer_attach. Note that, at this point, we can no longer cancel the send (for more information, see Section 3.8 of the MPI-1 Standard, "Probe and Cancel"). If a matching receive is never posted, then the buffer cannot be freed. This runs somewhat counter to the stated goal of MPI_Cancel (always being able to free program space that was committed to the communication subsystem). .sp -Example: Simple usage of nonblocking operations and MPI_Wait. +Example: Simple usage of nonblocking operations and MPI_Wait. .sp .nf - CALL MPI_COMM_RANK(comm, rank, ierr) - IF(rank.EQ.0) THEN - CALL MPI_ISEND(a(1), 10, MPI_REAL, 1, tag, comm, request, ierr) - **** do some computation **** - CALL MPI_WAIT(request, status, ierr) - ELSE - CALL MPI_IRECV(a(1), 15, MPI_REAL, 0, tag, comm, request, ierr) - **** do some computation **** - CALL MPI_WAIT(request, status, ierr) - END IF + CALL MPI_COMM_RANK(comm, rank, ierr) + IF(rank.EQ.0) THEN + CALL MPI_ISEND(a(1), 10, MPI_REAL, 1, tag, comm, request, ierr) + **** do some computation **** + CALL MPI_WAIT(request, status, ierr) + ELSE + CALL MPI_IRECV(a(1), 15, MPI_REAL, 0, tag, comm, request, ierr) + **** do some computation **** + CALL MPI_WAIT(request, status, ierr) + END IF .fi .SH ERRORS diff --git a/ompi/mpi/man/man3/MPI_Waitall.3in b/ompi/mpi/man/man3/MPI_Waitall.3in index d86f69f2182..e3834036299 100644 --- a/ompi/mpi/man/man3/MPI_Waitall.3in +++ b/ompi/mpi/man/man3/MPI_Waitall.3in @@ -21,14 +21,14 @@ int MPI_Waitall(int \fIcount\fP, MPI_Request\fI array_of_requests[]\fP, INCLUDE 'mpif.h' MPI_WAITALL(\fICOUNT, ARRAY_OF_REQUESTS, ARRAY_OF_STATUSES, IERROR\fP) INTEGER \fICOUNT, ARRAY_OF_REQUESTS(*)\fP - INTEGER \fIARRAY_OF_STATUSES(MPI_STATUS_SIZE,*), IERROR\fP + INTEGER \fIARRAY_OF_STATUSES(MPI_STATUS_SIZE,*), IERROR\fP .fi .SH C++ Syntax .nf #include -static void Request::Waitall(int \fIcount\fP, Request \fIarray_of_requests\fP[], - Status \fIarray_of_statuses\fP[]) +static void Request::Waitall(int \fIcount\fP, Request \fIarray_of_requests\fP[], + Status \fIarray_of_statuses\fP[]) static void Request::Waitall(int \fIcount\fP, Request \fIarray_of_requests\fP[]) @@ -36,7 +36,7 @@ static void Request::Waitall(int \fIcount\fP, Request \fIarray_of_requests\fP[]) .SH INPUT PARAMETERS .ft R .TP 1i -count +count Lists length (integer). .TP 1i array_of_requests @@ -45,22 +45,22 @@ Array of requests (array of handles). .SH OUTPUT PARAMETERS .ft R .TP 1i -array_of_statuses +array_of_statuses Array of status objects (array of status). .ft R .TP 1i IERROR -Fortran only: Error status (integer). +Fortran only: Error status (integer). .SH DESCRIPTION .ft R -Blocks until all communication operations associated with active handles in the list complete, and returns the status of all these operations (this includes the case where no handle in the list is active). Both arrays have the same number of valid entries. The ith entry in array_of_statuses is set to the return status of the ith operation. Requests that were created by nonblocking communication operations are deallocated, and the corresponding handles in the array are set to MPI_REQUEST_NULL. The list may contain null or inactive handles. The call sets to empty the status of each such entry. +Blocks until all communication operations associated with active handles in the list complete, and returns the status of all these operations (this includes the case where no handle in the list is active). Both arrays have the same number of valid entries. The ith entry in array_of_statuses is set to the return status of the ith operation. Requests that were created by nonblocking communication operations are deallocated, and the corresponding handles in the array are set to MPI_REQUEST_NULL. The list may contain null or inactive handles. The call sets to empty the status of each such entry. .sp The error-free execution of MPI_Waitall(count, array_of_requests, array_of_statuses) has the same effect as the execution of MPI_Wait(&array_of_request[i], &array_of_statuses[i]), for i=0,...,count-1, in some arbitrary order. MPI_Waitall with an array of length 1 is equivalent to MPI_Wait. .sp -When one or more of the communications completed by a call to MPI_Waitall fail, it is desirable to return specific information on each communication. The function MPI_Waitall will return in such case the error code MPI_ERR_IN_STATUS and will set the error field of each status to a specific error code. This code will be MPI_SUCCESS if the specific communication completed; it will be another specific error code if it failed; or it can be MPI_ERR_PENDING if it has neither failed nor completed. The function MPI_Waitall will return MPI_SUCCESS if no request had an error, or will return another error code if it failed for other reasons (such as invalid arguments). In such cases, it will not update the error fields of the statuses. +When one or more of the communications completed by a call to MPI_Waitall fail, it is desirable to return specific information on each communication. The function MPI_Waitall will return in such case the error code MPI_ERR_IN_STATUS and will set the error field of each status to a specific error code. This code will be MPI_SUCCESS if the specific communication completed; it will be another specific error code if it failed; or it can be MPI_ERR_PENDING if it has neither failed nor completed. The function MPI_Waitall will return MPI_SUCCESS if no request had an error, or will return another error code if it failed for other reasons (such as invalid arguments). In such cases, it will not update the error fields of the statuses. .sp -If your application does not need to examine the \fIarray_of_statuses\fP field, you can save resources by using the predefined constant MPI_STATUSES_IGNORE can be used as a special value for the \fIarray_of_statuses\fP argument. +If your application does not need to examine the \fIarray_of_statuses\fP field, you can save resources by using the predefined constant MPI_STATUSES_IGNORE can be used as a special value for the \fIarray_of_statuses\fP argument. .SH ERRORS For each invocation of MPI_Waitall, if one or more requests generate diff --git a/ompi/mpi/man/man3/MPI_Waitany.3in b/ompi/mpi/man/man3/MPI_Waitany.3in index d43c0bea7f3..08cca9d0fdd 100644 --- a/ompi/mpi/man/man3/MPI_Waitany.3in +++ b/ompi/mpi/man/man3/MPI_Waitany.3in @@ -20,13 +20,13 @@ int MPI_Waitany(int \fIcount\fP, MPI_Request\fI array_of_requests[]\fP, INCLUDE 'mpif.h' MPI_WAITANY(\fICOUNT, ARRAY_OF_REQUESTS, INDEX, STATUS, IERROR\fP) INTEGER \fICOUNT, ARRAY_OF_REQUESTS(*), INDEX\fP - INTEGER \fISTATUS(MPI_STATUS_SIZE), IERROR\fP + INTEGER \fISTATUS(MPI_STATUS_SIZE), IERROR\fP .fi .SH C++ Syntax .nf #include -static int Request::Waitany(int \fIcount\fP, Request +static int Request::Waitany(int \fIcount\fP, Request \fIarray_of_requests\fP[], Status& \fIstatus\fP) static int Request::Waitany(int \fIcount\fP, Request \fIarray_of_requests\fP[]) @@ -35,7 +35,7 @@ static int Request::Waitany(int \fIcount\fP, Request \fIarray_of_requests\fP[]) .SH INPUT PARAMETERS .ft R .TP 1i -count +count List length (integer). .TP 1i array_of_requests @@ -45,50 +45,50 @@ Array of requests (array of handles). .SH OUTPUT PARAMETERS .ft R .TP 1i -index +index Index of handle for operation that completed (integer). In the range 0 to count-1. In Fortran, the range is 1 to count. .TP 1i -status +status Status object (status). .sp .ft R .TP 1i IERROR -Fortran only: Error status (integer). +Fortran only: Error status (integer). .SH DESCRIPTION .ft R -A call to MPI_Waitany can be used to wait for the completion of one out of several requests. +A call to MPI_Waitany can be used to wait for the completion of one out of several requests. .sp -The array_of_requests list may contain null or inactive handles. If the list contains no active handles (list has length zero or all entries are null or inactive), then the call returns immediately with index = MPI_UNDEFINED, and an empty status. +The array_of_requests list may contain null or inactive handles. If the list contains no active handles (list has length zero or all entries are null or inactive), then the call returns immediately with index = MPI_UNDEFINED, and an empty status. .sp The execution of MPI_Waitany(count, array_of_requests, index, status) has the same effect as the execution of MPI_Wait(&array_of_requests[i], status), where i is the value returned by index (unless the value of index is MPI_UNDEFINED). MPI_Waitany with an array containing one active entry is equivalent to MPI_Wait. .sp -If your application does not need to examine the \fIstatus\fP field, you can save resources by using the predefined constant MPI_STATUS_IGNORE as a special value for the \fIstatus\fP argument. +If your application does not need to examine the \fIstatus\fP field, you can save resources by using the predefined constant MPI_STATUS_IGNORE as a special value for the \fIstatus\fP argument. .sp -\fBExample:\fR Client-server code (starvation can occur). +\fBExample:\fR Client-server code (starvation can occur). .sp .nf - CALL MPI_COMM_SIZE(comm, size, ierr) - CALL MPI_COMM_RANK(comm, rank, ierr) - IF(rank .GT 0) THEN ! client code - DO WHILE(.TRUE.) - CALL MPI_ISEND(a, n, MPI_REAL, 0, tag, comm, request, ierr) - CALL MPI_WAIT(request, status, ierr) - END DO - ELSE ! rank=0 -- server code - DO i=1, size-1 - CALL MPI_IRECV(a(1,i), n, MPI_REAL, i tag, - comm, request_list(i), ierr) - END DO - DO WHILE(.TRUE.) - CALL MPI_WAITANY(size-1, request_list, index, status, ierr) - CALL DO_SERVICE(a(1,index)) ! handle one message - CALL MPI_IRECV(a(1, index), n, MPI_REAL, index, tag, - comm, request_list(index), ierr) - END DO - END IF + CALL MPI_COMM_SIZE(comm, size, ierr) + CALL MPI_COMM_RANK(comm, rank, ierr) + IF(rank .GT 0) THEN ! client code + DO WHILE(.TRUE.) + CALL MPI_ISEND(a, n, MPI_REAL, 0, tag, comm, request, ierr) + CALL MPI_WAIT(request, status, ierr) + END DO + ELSE ! rank=0 -- server code + DO i=1, size-1 + CALL MPI_IRECV(a(1,i), n, MPI_REAL, i tag, + comm, request_list(i), ierr) + END DO + DO WHILE(.TRUE.) + CALL MPI_WAITANY(size-1, request_list, index, status, ierr) + CALL DO_SERVICE(a(1,index)) ! handle one message + CALL MPI_IRECV(a(1, index), n, MPI_REAL, index, tag, + comm, request_list(index), ierr) + END DO + END IF .fi .sp diff --git a/ompi/mpi/man/man3/MPI_Waitsome.3in b/ompi/mpi/man/man3/MPI_Waitsome.3in index ba09f37e1ce..273ab0f1b5e 100644 --- a/ompi/mpi/man/man3/MPI_Waitsome.3in +++ b/ompi/mpi/man/man3/MPI_Waitsome.3in @@ -31,78 +31,78 @@ MPI_WAITSOME(\fIINCOUNT, ARRAY_OF_REQUESTS, OUTCOUNT, .SH C++ Syntax .nf #include -static int Request::Waitsome(int \fIincount\fP, Request - \fIarray_of_requests\fP[], int \fIarray_of_indices\fP[], Status \fIarray_of_statuses\fP[]) +static int Request::Waitsome(int \fIincount\fP, Request + \fIarray_of_requests\fP[], int \fIarray_of_indices\fP[], Status \fIarray_of_statuses\fP[]) -static int Request::Waitsome(int \fIincount\fP, Request - \fIarray_of_requests\fP[], int \fIarray_of_indices\fP[]) +static int Request::Waitsome(int \fIincount\fP, Request + \fIarray_of_requests\fP[], int \fIarray_of_indices\fP[]) .fi .SH INPUT PARAMETERS .ft R .TP 1i -incount +incount Length of array_of_requests (integer). .TP 1i -array_of_requests +array_of_requests Array of requests (array of handles). .SH OUTPUT PARAMETERS .ft R .TP 1i -outcount +outcount Number of completed requests (integer). .TP 1i -array_of_indices +array_of_indices Array of indices of operations that completed (array of integers). .TP 1i -array_of_statuses +array_of_statuses Array of status objects for operations that completed (array of status). .ft R .TP 1i IERROR -Fortran only: Error status (integer). +Fortran only: Error status (integer). .SH DESCRIPTION .ft R -Waits until at least one of the operations associated with active handles in the list have completed. Returns in outcount the number of requests from the list array_of_requests that have completed. Returns in the first outcount locations of the array array_of_indices the indices of these operations (index within the array array_of_requests; the array is indexed from 0 in C and from 1 in Fortran). Returns in the first outcount locations of the array array_of_status the status for these completed operations. If a request that completed was allocated by a nonblocking communication call, then it is deallocated, and the associated handle is set to MPI_REQUEST_NULL. +Waits until at least one of the operations associated with active handles in the list have completed. Returns in outcount the number of requests from the list array_of_requests that have completed. Returns in the first outcount locations of the array array_of_indices the indices of these operations (index within the array array_of_requests; the array is indexed from 0 in C and from 1 in Fortran). Returns in the first outcount locations of the array array_of_status the status for these completed operations. If a request that completed was allocated by a nonblocking communication call, then it is deallocated, and the associated handle is set to MPI_REQUEST_NULL. .sp -If the list contains no active handles, then the call returns immediately with outcount = MPI_UNDEFINED. +If the list contains no active handles, then the call returns immediately with outcount = MPI_UNDEFINED. .sp -When one or more of the communications completed by MPI_Waitsome fails, then it is desirable to return specific information on each communication. The arguments outcount, array_of_indices, and array_of_statuses will be adjusted to indicate completion of all communications that have succeeded or failed. The call will return the error code MPI_ERR_IN_STATUS and the error field of each status returned will be set to indicate success or to indicate the specific error that occurred. The call will return MPI_SUCCESS if no request resulted in an error, and will return another error code if it failed for other reasons (such as invalid arguments). In such cases, it will not update the error fields of the statuses. +When one or more of the communications completed by MPI_Waitsome fails, then it is desirable to return specific information on each communication. The arguments outcount, array_of_indices, and array_of_statuses will be adjusted to indicate completion of all communications that have succeeded or failed. The call will return the error code MPI_ERR_IN_STATUS and the error field of each status returned will be set to indicate success or to indicate the specific error that occurred. The call will return MPI_SUCCESS if no request resulted in an error, and will return another error code if it failed for other reasons (such as invalid arguments). In such cases, it will not update the error fields of the statuses. .sp -If your application does not need to examine the \fIarray_of_statuses\fP field, you can save resources by using the predefined constant MPI_STATUSES_IGNORE can be used as a special value for the \fIarray_of_statuses\fP argument. -.sp -\fBExample:\fR Same code as the example in the MPI_Waitany man page, but using MPI_Waitsome. +If your application does not need to examine the \fIarray_of_statuses\fP field, you can save resources by using the predefined constant MPI_STATUSES_IGNORE can be used as a special value for the \fIarray_of_statuses\fP argument. +.sp +\fBExample:\fR Same code as the example in the MPI_Waitany man page, but using MPI_Waitsome. .sp .nf - CALL MPI_COMM_SIZE(comm, size, ierr) - CALL MPI_COMM_RANK(comm, rank, ierr) - IF(rank .GT. 0) THEN ! client code - DO WHILE(.TRUE.) - CALL MPI_ISEND(a, n, MPI_REAL, 0, tag, comm, request, ierr) - CALL MPI_WAIT(request, status, ierr) - END DO - ELSE ! rank=0 -- server code - DO i=1, size-1 - CALL MPI_IRECV(a(1,i), n, MPI_REAL, i, tag, - comm, requests(i), ierr) - END DO - DO WHILE(.TRUE.) - CALL MPI_WAITSOME(size, request_list, numdone, - indices, statuses, ierr) - DO i=1, numdone - CALL DO_SERVICE(a(1, indices(i))) - CALL MPI_IRECV(a(1, indices(i)), n, MPI_REAL, 0, tag, - comm, requests(indices(i)), ierr) - END DO - END DO + CALL MPI_COMM_SIZE(comm, size, ierr) + CALL MPI_COMM_RANK(comm, rank, ierr) + IF(rank .GT. 0) THEN ! client code + DO WHILE(.TRUE.) + CALL MPI_ISEND(a, n, MPI_REAL, 0, tag, comm, request, ierr) + CALL MPI_WAIT(request, status, ierr) + END DO + ELSE ! rank=0 -- server code + DO i=1, size-1 + CALL MPI_IRECV(a(1,i), n, MPI_REAL, i, tag, + comm, requests(i), ierr) + END DO + DO WHILE(.TRUE.) + CALL MPI_WAITSOME(size, request_list, numdone, + indices, statuses, ierr) + DO i=1, numdone + CALL DO_SERVICE(a(1, indices(i))) + CALL MPI_IRECV(a(1, indices(i)), n, MPI_REAL, 0, tag, + comm, requests(indices(i)), ierr) + END DO + END DO END IF .fi .sp .SH NOTES .ft R -The array of indices are in the range 0 to incount-1 for C and in the range 1 to incount for Fortran. +The array of indices are in the range 0 to incount-1 for C and in the range 1 to incount for Fortran. .SH ERRORS For each invocation of MPI_Waitsome, if one or more requests generate diff --git a/ompi/mpi/man/man3/MPI_Win_allocate.3in b/ompi/mpi/man/man3/MPI_Win_allocate.3in new file mode 100644 index 00000000000..a6dc76f9b86 --- /dev/null +++ b/ompi/mpi/man/man3/MPI_Win_allocate.3in @@ -0,0 +1,106 @@ +.\" -*- nroff -*- +.\" Copyright 2015 Los Alamos National Security, LLC. All rights reserved. +.\" Copyright 2010 Cisco Systems, Inc. All rights reserved. +.\" Copyright 2007-2008 Sun Microsystems, Inc. +.\" Copyright (c) 1996 Thinking Machines Corporation +.\" $COPYRIGHT$ +.TH MPI_Win_allocate 3 "#OMPI_DATE#" "#PACKAGE_VERSION#" "#PACKAGE_NAME#" +.SH NAME +\fBMPI_Win_allocate\fP \- One-sided MPI call that allocates memory and +returns a window object for RMA operations. + +.SH SYNTAX +.ft R +.SH C Syntax +.nf +#include +int MPI_Win_allocate (MPI_Aint \fIsize\fP, int \fIdisp_unit\fP, MPI_Info \fIinfo\fP, + MPI_Comm \fIcomm\fP, void *\fIbaseptr\fP, MPI_Win *\fIwin\fP) + +.fi +.SH Fortran Syntax +.nf +INCLUDE 'mpif.h' +MPI_WIN_ALLOCATE(\fSIZE, DISP_UNIT, INFO, COMM, BASEPTR, WIN, IERROR\fP) + INTEGER(KIND=MPI_ADDRESS_KIND) \fISIZE, BASEPTR\fP + INTEGER \fIDISP_UNIT, INFO, COMM, WIN, IERROR\fP + +.fi +.SH INPUT PARAMETERS +.ft R +.TP 1i +size +Size of window in bytes (nonnegative integer). +.TP 1i +disp_unit +Local unit size for displacements, in bytes (positive integer). +.TP 1i +info +Info argument (handle). +.TP 1i +comm +Communicator (handle). + +.SH OUTPUT PARAMETERS +.ft R +.TP 1i +baseptr +Initial address of window. +.TP 1i +win +Window object returned by the call (handle). +.TP 1i +IERROR +Fortran only: Error status (integer). + +.SH DESCRIPTION +.ft R +\fBMPI_Win_allocate\fP is a collective call executed by all processes +in the group of \fIcomm\fP. On each process, it allocates memory of at +least \fIsize\fP bytes, returns a pointer to it, and returns a window +object that can be used by all processes in \fIcomm\fP to perform RMA +operations. The returned memory consists of \fIsize\fP bytes local to +each process, starting at address \fIbaseptr\fP and is associated with +the window as if the user called \fBMPI_Win_create\fP on existing +memory. The \fIsize\fP argument may be different at each process and +\fIsize\fP = 0 is valid; however, a library might allocate and expose +more memory in order to create a fast, globally symmetric +allocation. The discussion of and rationales for \fBMPI_Alloc_mem\fP and +\fBMPI_Free_mem\fP in MPI-3.1 \[char167] 8.2 also apply to +\fBMPI_Win_allocate\fP; in particular, see the rationale in MPI-3.1 +\[char167] 8.2 for an explanation of the type used for \fIbaseptr\fP. +.sp +The displacement unit argument is provided to facilitate address +arithmetic in RMA operations: the target displacement argument of an +RMA operation is scaled by the factor \fIdisp_unit\fP specified by the +target process, at window creation. +.sp +For supported info keys see \fBMPI_Win_create\fI. +.sp + +.SH NOTES +Common choices for \fIdisp_unit\fP are 1 (no scaling), and (in C +syntax) \fIsizeof(type)\fP, for a window that consists of an array of +elements of type \fItype\fP. The later choice will allow one to use +array indices in RMA calls, and have those scaled correctly to byte +displacements, even in a heterogeneous environment. +.sp + +.SH ERRORS +Almost all MPI routines return an error value; C routines as the value of the function and Fortran routines in the last argument. +.sp +Before the error value is returned, the current MPI error handler is +called. By default, this error handler aborts the MPI job, except for +I/O function errors. The error handler may be changed with +MPI_Comm_set_errhandler; the predefined error handler +MPI_ERRORS_RETURN may be used to cause error values to be +returned. Note that MPI does not guarantee that an MPI program can +continue past an error. + +.SH SEE ALSO +.ft R +.sp +MPI_Alloc_mem +MPI_Free_mem +MPI_Win_create +MPI_Win_allocate_shared diff --git a/ompi/mpi/man/man3/MPI_Win_allocate_shared.3in b/ompi/mpi/man/man3/MPI_Win_allocate_shared.3in new file mode 100644 index 00000000000..e929ea54689 --- /dev/null +++ b/ompi/mpi/man/man3/MPI_Win_allocate_shared.3in @@ -0,0 +1,131 @@ +.\" -*- nroff -*- +.\" Copyright 2015-2016 Los Alamos National Security, LLC. All rights reserved. +.\" Copyright 2010 Cisco Systems, Inc. All rights reserved. +.\" Copyright 2007-2008 Sun Microsystems, Inc. +.\" Copyright (c) 1996 Thinking Machines Corporation +.\" $COPYRIGHT$ +.TH MPI_Win_allocate_shared 3 "#OMPI_DATE#" "#PACKAGE_VERSION#" "#PACKAGE_NAME#" +.SH NAME +\fBMPI_Win_allocate_shared\fP \- One-sided MPI call that allocates +shared memory and returns a window object for RMA operations. + +.SH SYNTAX +.ft R +.SH C Syntax +.nf +#include +int MPI_Win_allocate_shared (MPI_Aint \fIsize\fP, int \fIdisp_unit\fP, MPI_Info \fIinfo\fP, + MPI_Comm \fIcomm\fP, void *\fIbaseptr\fP, MPI_Win *\fIwin\fP) + +.fi +.SH Fortran Syntax +.nf +INCLUDE 'mpif.h' +MPI_WIN_ALLOCATE_SHARED(\fSIZE, DISP_UNIT, INFO, COMM, BASEPTR, WIN, IERROR\fP) + INTEGER(KIND=MPI_ADDRESS_KIND) \fISIZE, BASEPTR\fP + INTEGER \fIDISP_UNIT, INFO, COMM, WIN, IERROR\fP + +.fi +.SH INPUT PARAMETERS +.ft R +.TP 1i +size +Size of window in bytes (nonnegative integer). +.TP 1i +disp_unit +Local unit size for displacements, in bytes (positive integer). +.TP 1i +info +Info argument (handle). +.TP 1i +comm +Communicator (handle). + +.SH OUTPUT PARAMETERS +.ft R +.TP 1i +baseptr +Initial address of window. +.TP 1i +win +Window object returned by the call (handle). +.TP 1i +IERROR +Fortran only: Error status (integer). + +.SH DESCRIPTION +.ft R +\fBMPI_Win_allocate_shared\fP is a collective call executed by all +processes in the group of \fIcomm\fP. On each process, it allocates +memory of at least \fIsize\fP bytes that is shared among all processes +in \fIcomm\fP, and returns a pointer to the locally allocated segment +in \fIbaseptr\fP that can be used for load/store accesses on the +calling process. The locally allocated memory can be the target of +load/store accesses by remote processes; the base pointers for other +processes can be queried using the function +\fBMPI_Win_shared_query\fP. The call also returns a window object that +can be used by all processes in \fIcomm\fP to perform RMA +operations. The \fIsize\fP argument may be different at each process +and \fIsize\fP = 0 is valid. It is the user's responsibility to ensure +that the communicator \fIcomm\fP represents a group of processes that +can create a shared memory segment that can be accessed by all +processes in the group. The discussions of rationales for +\fBMPI_Alloc_mem\fP and \fBMPI_Free_mem\fP in MPI-3.1 \[char167] 8.2 +also apply to \fBMPI_Win_allocate_shared\fP; in particular, see the +rationale in MPI-3.1 \[char167] 8.2 for an explanation of the type +used for \fIbaseptr\fP. The allocated memory is contiguous across +process ranks unless the info key \fIalloc_shared_noncontig\fP is +specified. Contiguous across process ranks means that the first +address in the memory segment of process i is consecutive with the +last address in the memory segment of process i - 1. This may enable +the user to calculate remote address offsets with local information +only. +.sp +The following info keys are supported: +.ft R +.TP 1i +alloc_shared_noncontig +If not set to \fItrue\fP, the allocation strategy is to allocate +contiguous memory across process ranks. This may limit the performance +on some architectures because it does not allow the implementation to +modify the data layout (e.g., padding to reduce access latency). +.sp +.TP 1i +blocking_fence +If set to \fItrue\fP, the osc/sm component will use \fBMPI_Barrier\fP +for \fBMPI_Win_fence\fP. If set to \fIfalse\fP a condition variable +and counter will be used instead. The default value is +\fIfalse\fP. This info key is Open MPI specific. +.sp +.TP 1i +For additional supported info keys see \fBMPI_Win_create\fP. +.sp + +.SH NOTES +Common choices for \fIdisp_unit\fP are 1 (no scaling), and (in C +syntax) \fIsizeof(type)\fP, for a window that consists of an array of +elements of type \fItype\fP. The later choice will allow one to use +array indices in RMA calls, and have those scaled correctly to byte +displacements, even in a heterogeneous environment. +.sp + +.SH ERRORS +Almost all MPI routines return an error value; C routines as the value +of the function and Fortran routines in the last argument. +.sp +Before the error value is returned, the current MPI error handler is +called. By default, this error handler aborts the MPI job, except for +I/O function errors. The error handler may be changed with +MPI_Comm_set_errhandler; the predefined error handler +MPI_ERRORS_RETURN may be used to cause error values to be +returned. Note that MPI does not guarantee that an MPI program can +continue past an error. + +.SH SEE ALSO +.ft R +.sp +MPI_Alloc_mem +MPI_Free_mem +MPI_Win_allocate +MPI_Win_create +MPI_Win_shared_query diff --git a/ompi/mpi/man/man3/MPI_Win_attach.3in b/ompi/mpi/man/man3/MPI_Win_attach.3in new file mode 100644 index 00000000000..9b31ca67ca4 --- /dev/null +++ b/ompi/mpi/man/man3/MPI_Win_attach.3in @@ -0,0 +1,67 @@ +.\" -*- nroff -*- +.\" Copyright (c) 2015 Research Organization for Information Science +.\" and Technology (RIST). All rights reserved. +.\" $COPYRIGHT$ +.TH MPI_Win_attach 3 "#OMPI_DATE#" "#PACKAGE_VERSION#" "#PACKAGE_NAME#" +.SH NAME +\fBMPI_Win_create, MPI_Win_detach\fP \- One-sided MPI call that attach / detach a window object for RMA operations. + +.SH SYNTAX +.ft R +.SH C Syntax +.nf +#include +MPI_Win_attach(MPI_Win *\fIwin\fP, void *\fIbase\fP, MPI_Aint \fIsize\fP) + +MPI_Win_detach(MPI_Win *\fIwin\fP, void *\fIbase\fP) +.fi +.SH Fortran Syntax +.nf +INCLUDE 'mpif.h' +MPI_WIN_ATTACH(\fIWIN, BASE, SIZE, IERROR\fP) + \fIBASE\fP(*) + INTEGER(KIND=MPI_ADDRESS_KIND) \fISIZE\fP + INTEGER \fIWIN, IERROR\fP + +.fi +.SH INPUT PARAMETERS +.ft R +.TP 1i +win +A window that was created with +.I MPI_Win_create_dynamic + +.TP 1i +base +Initial address of window (choice). +.TP 1i +size +Size of window in bytes (nonnegative integer). + +.SH OUTPUT PARAMETERS +.ft R +.TP 1i +win +Window object returned by the call (handle). +.TP 1i +IERROR +Fortran only: Error status (integer). + +.SH DESCRIPTION +.ft R +MPI_Win_attach is a one-sided MPI communication collective call executed by all processes in the group of \fIcomm\fP. It returns a window object that can be used by these processes to perform RMA operations. Each process specifies a window of existing memory that it exposes to RMA accesses by the processes in the group of \fIcomm\fP. The window consists of \fIsize\fP bytes, starting at address \fIbase\fP. A process may elect to expose no memory by specifying \fIsize\fP = 0. +.sp +If the \fIbase\fP value used by MPI_Win_create was allocated by MPI_Alloc_mem, the size of the window can be no larger than the value set by the MPI_ALLOC_MEM function. +.sp + +.SH NOTES +Use memory allocated by MPI_Alloc_mem to guarantee properly aligned window boundaries (such as word, double-word, cache line, page frame, and so on). +.sp + + + +.SH ERRORS +Almost all MPI routines return an error value; C routines as the value of the function and Fortran routines in the last argument. +.sp +Before the error value is returned, the current MPI error handler is +called. By default, this error handler aborts the MPI job, except for I/O function errors. The error handler may be changed with MPI_Comm_set_errhandler; the predefined error handler MPI_ERRORS_RETURN may be used to cause error values to be returned. Note that MPI does not guarantee that an MPI program can continue past an error. diff --git a/ompi/mpi/man/man3/MPI_Win_call_errhandler.3in b/ompi/mpi/man/man3/MPI_Win_call_errhandler.3in index 2c109e13afa..38a5eed882b 100644 --- a/ompi/mpi/man/man3/MPI_Win_call_errhandler.3in +++ b/ompi/mpi/man/man3/MPI_Win_call_errhandler.3in @@ -15,14 +15,14 @@ error handler assigned to a window .SH C Syntax .nf #include -int MPI_Win_call_errhandler(MPI_Win \fIwin\fP, int \fIerrorcode\fP) +int MPI_Win_call_errhandler(MPI_Win \fIwin\fP, int \fIerrorcode\fP) .fi .SH Fortran Syntax .nf INCLUDE 'mpif.h' -MPI_WIN_CALL_ERRHANDLER(\fIWIN, ERRORCODE, IERROR\fP) - INTEGER \fIWIN, ERRORCODE, IERROR\fP +MPI_WIN_CALL_ERRHANDLER(\fIWIN, ERRORCODE, IERROR\fP) + INTEGER \fIWIN, ERRORCODE, IERROR\fP .fi .SH C++ Syntax @@ -45,7 +45,7 @@ MPI error code (integer). .ft R .TP 1.4i IERROR -Fortran only: Error status (integer). +Fortran only: Error status (integer). .SH DESCRIPTION .ft R diff --git a/ompi/mpi/man/man3/MPI_Win_complete.3in b/ompi/mpi/man/man3/MPI_Win_complete.3in index fd5639d2428..9863b7e4d58 100644 --- a/ompi/mpi/man/man3/MPI_Win_complete.3in +++ b/ompi/mpi/man/man3/MPI_Win_complete.3in @@ -24,7 +24,7 @@ MPI_WIN_COMPLETE(WIN, IERROR) .SH C++ Syntax .nf #include -void MPI::Win::Complete() const +void MPI::Win::Complete() const .fi .SH INPUT PARAMETERS @@ -37,7 +37,7 @@ Window object (handle). .ft R .TP 1i IERROR -Fortran only: Error status (integer). +Fortran only: Error status (integer). .SH DESCRIPTION .ft R @@ -48,7 +48,7 @@ MPI_Win_complete is a one-sided MPI communication synchronization call, completi Almost all MPI routines return an error value; C routines as the value of the function and Fortran routines in the last argument. C++ functions do not return errors. If the default error handler is set to MPI::ERRORS_THROW_EXCEPTIONS, then on error the C++ exception mechanism will be used to throw an MPI::Exception object. .sp Before the error value is returned, the current MPI error handler is -called. By default, this error handler aborts the MPI job, except for I/O function errors. The error handler may be changed with MPI_Win_set_errhandler; the predefined error handler MPI_ERRORS_RETURN may be used to cause error values to be returned. Note that MPI does not guarantee that an MPI program can continue past an error. +called. By default, this error handler aborts the MPI job, except for I/O function errors. The error handler may be changed with MPI_Win_set_errhandler; the predefined error handler MPI_ERRORS_RETURN may be used to cause error values to be returned. Note that MPI does not guarantee that an MPI program can continue past an error. .SH SEE ALSO MPI_Win_start diff --git a/ompi/mpi/man/man3/MPI_Win_create.3in b/ompi/mpi/man/man3/MPI_Win_create.3in index fe48bcf98ca..563e7984beb 100644 --- a/ompi/mpi/man/man3/MPI_Win_create.3in +++ b/ompi/mpi/man/man3/MPI_Win_create.3in @@ -1,4 +1,6 @@ .\" -*- nroff -*- +.\" Copyright 2015 Los Alamos National Security, LLC. All rights +.\" reserved. .\" Copyright 2010 Cisco Systems, Inc. All rights reserved. .\" Copyright 2007-2008 Sun Microsystems, Inc. .\" Copyright (c) 1996 Thinking Machines Corporation @@ -12,7 +14,7 @@ .SH C Syntax .nf #include -MPI_Win_create(void *\fIbase\fP, MPI_Aint \fIsize\fP, int \fIdisp_unit\fP, +MPI_Win_create(void *\fIbase\fP, MPI_Aint \fIsize\fP, int \fIdisp_unit\fP, MPI_Info \fIinfo\fP, MPI_Comm \fIcomm\fP, MPI_Win *\fIwin\fP) .fi @@ -28,8 +30,8 @@ MPI_WIN_CREATE(\fIBASE, SIZE, DISP_UNIT, INFO, COMM, WIN, IERROR\fP) .SH C++ Syntax .nf #include -static MPI::Win MPI::Win::Create(const void* \fIbase\fP, - MPI::Aint \fIsize\fP, int \fIdisp_unit\fP, const +static MPI::Win MPI::Win::Create(const void* \fIbase\fP, + MPI::Aint \fIsize\fP, int \fIdisp_unit\fP, const MPI::Info& \fIinfo\fP, const MPI::Intracomm& \fIcomm\fP) .fi @@ -55,10 +57,10 @@ Communicator (handle). .ft R .TP 1i win -Window object returned by the call (handle). +Window object returned by the call (handle). .TP 1i IERROR -Fortran only: Error status (integer). +Fortran only: Error status (integer). .SH DESCRIPTION .ft R @@ -71,12 +73,12 @@ The displacement unit argument is provided to facilitate address arithmetic in R The following info keys are supported: .ft R .TP 1i -no_locks +no_locks If set to \fItrue\fP, then the implementation may assume that the local window is never locked (by a call to MPI_Win_lock or MPI_Win_lock_all). Setting this value if only active synchronization may allow the implementation to enable certain optimizations. -.sp +.sp .TP 1i accumulate_ordering By default, accumulate operations from one initiator to one target on @@ -87,7 +89,7 @@ required orderings consisting of \fIrar\fP, \fIwar\fP, \fIraw\fP, and \fIwaw\fP read-after-read, write-after-read, read-after-write, and write-after-write, respectively. Looser ordering constraints are likely to result in improved performance. -.sp +.sp .TP 1i accumulate_ops If set to \fIsame_op\fP, the implementation will assume that all concurrent @@ -95,14 +97,25 @@ accumulate calls to the same target address will use the same operation. If set to \fIsame_op_no_op\fP, then the implementation will assume that all concurrent accumulate calls to the same target address will use the same operation or MPI_NO_OP. The default is \fIsame_op_no_op\fP. -.sp - +.sp +.TP 1i +same_size +If set to \fItrue\fP, then the implementation may assume that the argument +\fIsize\fP is identical on all processes, and that all processes have +provided this info key with the same value. +.sp +.TP 1i +same_disp_unit +If set to \fItrue\fP, then the implementation may assume that the argument +\fIdisp_unit\fP is identical on all processes, and that all processes have +provided this info key with the same value. +.sp .SH NOTES -Common choices for \fIdisp_unit\fP are 1 (no scaling), and (in C syntax) \fIsizeof(type)\fP, for a window that consists of an array of elements of type \fItype\fP. The later choice will allow one to use array indices in RMA calls, and have those scaled correctly to byte displacements, even in a heterogeneous environment. +Common choices for \fIdisp_unit\fP are 1 (no scaling), and (in C syntax) \fIsizeof(type)\fP, for a window that consists of an array of elements of type \fItype\fP. The later choice will allow one to use array indices in RMA calls, and have those scaled correctly to byte displacements, even in a heterogeneous environment. .sp Use memory allocated by MPI_Alloc_mem to guarantee properly aligned window boundaries (such as word, double-word, cache line, page frame, and so on). .sp - + .SH FORTRAN 77 NOTES @@ -122,7 +135,12 @@ and gives the length of the declared integer in bytes. Almost all MPI routines return an error value; C routines as the value of the function and Fortran routines in the last argument. C++ functions do not return errors. If the default error handler is set to MPI::ERRORS_THROW_EXCEPTIONS, then on error the C++ exception mechanism will be used to throw an MPI::Exception object. .sp Before the error value is returned, the current MPI error handler is -called. By default, this error handler aborts the MPI job, except for I/O function errors. The error handler may be changed with MPI_Comm_set_errhandler; the predefined error handler MPI_ERRORS_RETURN may be used to cause error values to be returned. Note that MPI does not guarantee that an MPI program can continue past an error. - - +called. By default, this error handler aborts the MPI job, except for I/O function errors. The error handler may be changed with MPI_Comm_set_errhandler; the predefined error handler MPI_ERRORS_RETURN may be used to cause error values to be returned. Note that MPI does not guarantee that an MPI program can continue past an error. +.SH SEE ALSO +.ft R +.sp +MPI_Alloc_mem +MPI_Free_mem +MPI_Win_allocate +MPI_Win_allocate_shared diff --git a/ompi/mpi/man/man3/MPI_Win_create_dynamic.3in b/ompi/mpi/man/man3/MPI_Win_create_dynamic.3in new file mode 100644 index 00000000000..65c371887a7 --- /dev/null +++ b/ompi/mpi/man/man3/MPI_Win_create_dynamic.3in @@ -0,0 +1,78 @@ +.\" -*- nroff -*- +.\" Copyright (c) 2015 Research Organization for Information Science +.\" and Technology (RIST). All rights reserved. +.\" $COPYRIGHT$ +.TH MPI_Win_create_dynamic 3 "#OMPI_DATE#" "#PACKAGE_VERSION#" "#PACKAGE_NAME#" +.SH NAME +\fBMPI_Win_create_dynamic\fP \- One-sided MPI call that returns a window object for RMA operations. + +.SH SYNTAX +.ft R +.SH C Syntax +.nf +#include +MPI_Win_create_dynamic(MPI_Info \fIinfo\fP, MPI_Comm \fIcomm\fP, MPI_Win *\fIwin\fP) + +.fi +.SH Fortran Syntax +.nf +INCLUDE 'mpif.h' +MPI_WIN_CREATE_DYNAMIC(\fIINFO, COMM, WIN, IERROR\fP) + INTEGER \fIINFO, COMM, WIN, IERROR\fP + +.fi +.SH INPUT PARAMETERS +.ft R +.TP 1i +info +Info argument (handle). +.TP 1i +comm +Communicator (handle). + +.SH OUTPUT PARAMETERS +.ft R +.TP 1i +win +Window object returned by the call (handle). +.TP 1i +IERROR +Fortran only: Error status (integer). + +.SH DESCRIPTION +.ft R +MPI_Win_create_dynamic is a one-sided MPI communication collective call executed by all processes in the group of \fIcomm\fP. It returns a window object without memory attached that can be used by these processes to perform RMA operations. +.sp +The following info keys are supported: +.ft R +.TP 1i +no_locks +If set to \fItrue\fP, then the implementation may assume that the local +window is never locked (by a call to MPI_Win_lock or +MPI_Win_lock_all). Setting this value if only active synchronization +may allow the implementation to enable certain optimizations. +.sp +.TP 1i +accumulate_ordering +By default, accumulate operations from one initiator to one target on +the same window are strictly ordered. If the info key +accumulate_ordering is set to \fInone\fP, no ordering of accumulate +operations guaranteed. They key can also be a comma-separated list of +required orderings consisting of \fIrar\fP, \fIwar\fP, \fIraw\fP, and \fIwaw\fP for +read-after-read, write-after-read, read-after-write, and +write-after-write, respectively. Looser ordering constraints are +likely to result in improved performance. +.sp +.TP 1i +accumulate_ops +If set to \fIsame_op\fP, the implementation will assume that all concurrent +accumulate calls to the same target address will use the same +operation. If set to \fIsame_op_no_op\fP, then the implementation will +assume that all concurrent accumulate calls to the same target address +will use the same operation or MPI_NO_OP. The default is \fIsame_op_no_op\fP. + +.SH ERRORS +Almost all MPI routines return an error value; C routines as the value of the function and Fortran routines in the last argument. +.sp +Before the error value is returned, the current MPI error handler is +called. By default, this error handler aborts the MPI job, except for I/O function errors. The error handler may be changed with MPI_Comm_set_errhandler; the predefined error handler MPI_ERRORS_RETURN may be used to cause error values to be returned. Note that MPI does not guarantee that an MPI program can continue past an error. diff --git a/ompi/mpi/man/man3/MPI_Win_create_errhandler.3in b/ompi/mpi/man/man3/MPI_Win_create_errhandler.3in index d917e1f30af..da7588b5b7f 100644 --- a/ompi/mpi/man/man3/MPI_Win_create_errhandler.3in +++ b/ompi/mpi/man/man3/MPI_Win_create_errhandler.3in @@ -12,23 +12,23 @@ .SH C Syntax .nf #include -int MPI_Win_create_errhandler(MPI_Win_errhandler_function *\fIfunction\fP, - MPI_Errhandler *\fIerrhandler\fP) +int MPI_Win_create_errhandler(MPI_Win_errhandler_function *\fIfunction\fP, + MPI_Errhandler *\fIerrhandler\fP) .fi .SH Fortran Syntax .nf INCLUDE 'mpif.h' -MPI_WIN_CREATE_ERRHANDLER(\fIFUNCTION, ERRHANDLER, IERROR\fP) - EXTERNAL \fIFUNCTION\fP - INTEGER \fIERRHANDLER, IERROR\fP +MPI_WIN_CREATE_ERRHANDLER(\fIFUNCTION, ERRHANDLER, IERROR\fP) + EXTERNAL \fIFUNCTION\fP + INTEGER \fIERRHANDLER, IERROR\fP .fi .SH C++ Syntax .nf #include static MPI::Errhandler MPI::Win::Create_errhandler(MPI::Win:: - errhandler_function* \fIfunction\fP) + errhandler_function* \fIfunction\fP) .fi .SH DEPRECATED TYPE NAME NOTE @@ -43,7 +43,7 @@ typedefs to the _function names). .ft R .TP 1i function -User-defined error-handling procedure (function). +User-defined error-handling procedure (function). .SH OUTPUT PARAMETERS .ft R @@ -52,17 +52,17 @@ errhandler MPI error handler (handle). .TP 1i IERROR -Fortran only: Error status (integer). +Fortran only: Error status (integer). .SH DESCRIPTION .ft R -MPI_Win_create_errhandler should be, in C, a function of type MPI_Win_errhandler_function, which is defined as +MPI_Win_create_errhandler should be, in C, a function of type MPI_Win_errhandler_function, which is defined as .sp .nf typedef void MPI_Win_errhandler_function(MPI Win *, int *, ...); .fi .sp -The first argument is the window in use, the second is the error code to be returned. +The first argument is the window in use, the second is the error code to be returned. .sp In Fortran, the user routine should be of the form: .sp @@ -81,5 +81,5 @@ typedef void MPI::Win::errhandler_function(MPI::Win &, int *, ...); Almost all MPI routines return an error value; C routines as the value of the function and Fortran routines in the last argument. C++ functions do not return errors. If the default error handler is set to MPI::ERRORS_THROW_EXCEPTIONS, then on error the C++ exception mechanism will be used to throw an MPI::Exception object. .sp Before the error value is returned, the current MPI error handler is -called. By default, this error handler aborts the MPI job, except for I/O function errors. The error handler may be changed with MPI_Win_set_errhandler; the predefined error handler MPI_ERRORS_RETURN may be used to cause error values to be returned. Note that MPI does not guarantee that an MPI program can continue past an error. +called. By default, this error handler aborts the MPI job, except for I/O function errors. The error handler may be changed with MPI_Win_set_errhandler; the predefined error handler MPI_ERRORS_RETURN may be used to cause error values to be returned. Note that MPI does not guarantee that an MPI program can continue past an error. diff --git a/ompi/mpi/man/man3/MPI_Win_create_keyval.3in b/ompi/mpi/man/man3/MPI_Win_create_keyval.3in index 8ad742e32a0..1b26bf818d9 100644 --- a/ompi/mpi/man/man3/MPI_Win_create_keyval.3in +++ b/ompi/mpi/man/man3/MPI_Win_create_keyval.3in @@ -12,28 +12,28 @@ .SH C Syntax .nf #include -int MPI_Win_create_keyval(MPI_Win_copy_attr_function *\fIwin_copy_attr_fn\fP, - MPI_Win_delete_attr_function *\fIwin_delete_attr_fn\fP, - int *\fIwin_keyval\fP, void *\fIextra_state\fP) +int MPI_Win_create_keyval(MPI_Win_copy_attr_function *\fIwin_copy_attr_fn\fP, + MPI_Win_delete_attr_function *\fIwin_delete_attr_fn\fP, + int *\fIwin_keyval\fP, void *\fIextra_state\fP) .fi .SH Fortran Syntax (see FORTRAN 77 NOTES) .nf INCLUDE 'mpif.h' -MPI_WIN_CREATE_KEYVAL(\fIWIN_COPY_ATTR_FN, WIN_DELETE_ATTR_FN, - WIN_KEYVAL, EXTRA_STATE, IERROR\fP) - EXTERNAL \fIWIN_COPY_ATTR_FN, WIN_DELETE_ATTR_FN\fP - INTEGER \fIWIN_KEYVAL, IERROR\fP - INTEGER(KIND=MPI_ADDRESS_KIND) \fIEXTRA_STATE\fP +MPI_WIN_CREATE_KEYVAL(\fIWIN_COPY_ATTR_FN, WIN_DELETE_ATTR_FN, + WIN_KEYVAL, EXTRA_STATE, IERROR\fP) + EXTERNAL \fIWIN_COPY_ATTR_FN, WIN_DELETE_ATTR_FN\fP + INTEGER \fIWIN_KEYVAL, IERROR\fP + INTEGER(KIND=MPI_ADDRESS_KIND) \fIEXTRA_STATE\fP .fi .SH C++ Syntax .nf #include -static int MPI::Win::Create_keyval(MPI::Win::Copy_attr_function* - \fIwin_copy_attr_fn\fP, - MPI::Win::Delete_attr_function* \fIwin_delete_attr_fn\fP, - void* \fIextra_state\fP) +static int MPI::Win::Create_keyval(MPI::Win::Copy_attr_function* + \fIwin_copy_attr_fn\fP, + MPI::Win::Delete_attr_function* \fIwin_delete_attr_fn\fP, + void* \fIextra_state\fP) .fi .SH INPUT PARAMETERS @@ -43,73 +43,73 @@ win_copy_attr_fn Copy callback function for \fIwin_keyval\fP (function). .TP 1i win_delete_attr_fn -Delete callback function for \fIwin_keyval\fP (function). +Delete callback function for \fIwin_keyval\fP (function). .TP 1i extra_state -Extra state for callback functions. +Extra state for callback functions. .SH OUTPUT PARAMETERS .ft R .TP 1i win_keyval -Key value for future access (integer). +Key value for future access (integer). .TP 1i IERROR -Fortran only: Error status (integer). +Fortran only: Error status (integer). .SH DESCRIPTION .ft R -The argument \fIwin_copy_attr_fn\fP may be specified as MPI_WIN_NULL_COPY_FN or MPI_WIN_DUP_FN from either C, C++, or Fortran. MPI_WIN_NULL_COPY_FN is a function that serves only to return \fIflag\fP = 0 and MPI_SUCCESS. MPI_WIN_DUP_FN is a simple-minded copy function that sets \fIflag\fP = 1, returns the value of \fIattribute_val_in\fP in \fIattribute_val_out\fP, and returns MPI_SUCCESS. +The argument \fIwin_copy_attr_fn\fP may be specified as MPI_WIN_NULL_COPY_FN or MPI_WIN_DUP_FN from either C, C++, or Fortran. MPI_WIN_NULL_COPY_FN is a function that serves only to return \fIflag\fP = 0 and MPI_SUCCESS. MPI_WIN_DUP_FN is a simple-minded copy function that sets \fIflag\fP = 1, returns the value of \fIattribute_val_in\fP in \fIattribute_val_out\fP, and returns MPI_SUCCESS. .sp -The argument \fIwin_delete_attr_fn\fP may be specified as MPI_WIN_NULL_DELETE_FN from either C, C++, or Fortran. MPI_WIN_NULL_DELETE_FN is a function that serves only to return MPI_SUCCESS. +The argument \fIwin_delete_attr_fn\fP may be specified as MPI_WIN_NULL_DELETE_FN from either C, C++, or Fortran. MPI_WIN_NULL_DELETE_FN is a function that serves only to return MPI_SUCCESS. .sp The C callback functions are: .sp .nf -typedef int MPI_Win_copy_attr_function(MPI_Win \fIoldwin\fP, int \fIwin_keyval\fP, - void *\fIextra_state\fP, void *\fIattribute_val_in\fP, - void *\fIattribute_val_out\fP, int *\fIflag\fP); +typedef int MPI_Win_copy_attr_function(MPI_Win \fIoldwin\fP, int \fIwin_keyval\fP, + void *\fIextra_state\fP, void *\fIattribute_val_in\fP, + void *\fIattribute_val_out\fP, int *\fIflag\fP); .fi .sp and .sp .nf -typedef int MPI_Win_delete_attr_function(MPI_Win \fIwin\fP, int \fIwin_keyval\fP, - void *\fIattribute_val\fP, void *\fIextra_state\fP); +typedef int MPI_Win_delete_attr_function(MPI_Win \fIwin\fP, int \fIwin_keyval\fP, + void *\fIattribute_val\fP, void *\fIextra_state\fP); .fi .sp The Fortran callback functions are: .sp .nf -SUBROUTINE WIN_COPY_ATTR_FN(\fIOLDWIN, WIN_KEYVAL, EXTRA_STATE, - ATTRIBUTE_VAL_IN, ATTRIBUTE_VAL_OUT, FLAG, IERROR\fP) - INTEGER \fIOLDWIN, WIN_KEYVAL, IERROR\fP - INTEGER(KIND=MPI_ADDRESS_KIND) \fIEXTRA_STATE, ATTRIBUTE_VAL_IN, - ATTRIBUTE_VAL_OUT\fP - LOGICAL \fIFLAG\fP +SUBROUTINE WIN_COPY_ATTR_FN(\fIOLDWIN, WIN_KEYVAL, EXTRA_STATE, + ATTRIBUTE_VAL_IN, ATTRIBUTE_VAL_OUT, FLAG, IERROR\fP) + INTEGER \fIOLDWIN, WIN_KEYVAL, IERROR\fP + INTEGER(KIND=MPI_ADDRESS_KIND) \fIEXTRA_STATE, ATTRIBUTE_VAL_IN, + ATTRIBUTE_VAL_OUT\fP + LOGICAL \fIFLAG\fP .fi .sp and .sp .nf -SUBROUTINE WIN_DELETE_ATTR_FN(\fIWIN, WIN_KEYVAL, ATTRIBUTE_VAL, - EXTRA_STATE, IERROR\fP) - INTEGER \fIWIN, WIN_KEYVAL, IERROR\fP - INTEGER(KIND=MPI_ADDRESS_KIND) \fIATTRIBUTE_VAL, EXTRA_STATE\fP +SUBROUTINE WIN_DELETE_ATTR_FN(\fIWIN, WIN_KEYVAL, ATTRIBUTE_VAL, + EXTRA_STATE, IERROR\fP) + INTEGER \fIWIN, WIN_KEYVAL, IERROR\fP + INTEGER(KIND=MPI_ADDRESS_KIND) \fIATTRIBUTE_VAL, EXTRA_STATE\fP .fi .sp The C++ callbacks are: .sp .nf -typedef int MPI::Win::Copy_attr_function(const MPI::Win& \fIoldwin\fP, - int \fIwin_keyval\fP, void* \fIextra_state\fP, void* \fIattribute_val_in\fP, - void* \fIattribute_val_out\fP, bool& \fIflag\fP); +typedef int MPI::Win::Copy_attr_function(const MPI::Win& \fIoldwin\fP, + int \fIwin_keyval\fP, void* \fIextra_state\fP, void* \fIattribute_val_in\fP, + void* \fIattribute_val_out\fP, bool& \fIflag\fP); .fi .sp and .sp .nf -typedef int MPI::Win::Delete_attr_function(MPI::Win& \fIwin\fP, int \fIwin_keyval\fP, void* \fIattribute_val\fP, void* \fIextra_state\fP); +typedef int MPI::Win::Delete_attr_function(MPI::Win& \fIwin\fP, int \fIwin_keyval\fP, void* \fIattribute_val\fP, void* \fIextra_state\fP); .fi .SH FORTRAN 77 NOTES @@ -129,5 +129,5 @@ and gives the length of the declared integer in bytes. Almost all MPI routines return an error value; C routines as the value of the function and Fortran routines in the last argument. C++ functions do not return errors. If the default error handler is set to MPI::ERRORS_THROW_EXCEPTIONS, then on error the C++ exception mechanism will be used to throw an MPI::Exception object. .sp Before the error value is returned, the current MPI error handler is -called. By default, this error handler aborts the MPI job, except for I/O function errors. The error handler may be changed with MPI_Comm_set_errhandler; the predefined error handler MPI_ERRORS_RETURN may be used to cause error values to be returned. Note that MPI does not guarantee that an MPI program can continue past an error. +called. By default, this error handler aborts the MPI job, except for I/O function errors. The error handler may be changed with MPI_Comm_set_errhandler; the predefined error handler MPI_ERRORS_RETURN may be used to cause error values to be returned. Note that MPI does not guarantee that an MPI program can continue past an error. diff --git a/ompi/mpi/man/man3/MPI_Win_delete_attr.3in b/ompi/mpi/man/man3/MPI_Win_delete_attr.3in index 8777c1549bf..14b591e9127 100644 --- a/ompi/mpi/man/man3/MPI_Win_delete_attr.3in +++ b/ompi/mpi/man/man3/MPI_Win_delete_attr.3in @@ -18,8 +18,8 @@ int MPI_Win_delete_attr(MPI_Win \fIwin\fP, int \fIwin_keyval\fP) .SH Fortran Syntax .nf INCLUDE 'mpif.h' -MPI_WIN_DELETE_ATTR(\fIWIN, WIN_KEYVAL, IERROR\fP) - INTEGER \fIWIN, WIN_KEYVAL, IERROR\fP +MPI_WIN_DELETE_ATTR(\fIWIN, WIN_KEYVAL, IERROR\fP) + INTEGER \fIWIN, WIN_KEYVAL, IERROR\fP .fi .SH C++ Syntax @@ -32,19 +32,19 @@ void MPI::Win::Delete_attr(int \fIwin_keyval\fP) .ft R .TP 1i win -Window from which the attribute is deleted (handle). +Window from which the attribute is deleted (handle). .SH INPUT PARAMETER .ft R .TP 1i win_keyval -Key value (integer). +Key value (integer). .SH OUTPUT PARAMETER .ft R .TP 1i IERROR -Fortran only: Error status (integer). +Fortran only: Error status (integer). .SH NOTES Note that it is not defined by the MPI standard what happens if the @@ -57,5 +57,5 @@ is being invoked. Almost all MPI routines return an error value; C routines as the value of the function and Fortran routines in the last argument. C++ functions do not return errors. If the default error handler is set to MPI::ERRORS_THROW_EXCEPTIONS, then on error the C++ exception mechanism will be used to throw an MPI::Exception object. .sp Before the error value is returned, the current MPI error handler is -called. By default, this error handler aborts the MPI job, except for I/O function errors. The error handler may be changed with MPI_Comm_set_errhandler; the predefined error handler MPI_ERRORS_RETURN may be used to cause error values to be returned. Note that MPI does not guarantee that an MPI program can continue past an error. +called. By default, this error handler aborts the MPI job, except for I/O function errors. The error handler may be changed with MPI_Comm_set_errhandler; the predefined error handler MPI_ERRORS_RETURN may be used to cause error values to be returned. Note that MPI does not guarantee that an MPI program can continue past an error. diff --git a/ompi/mpi/man/man3/MPI_Win_detach.3in b/ompi/mpi/man/man3/MPI_Win_detach.3in new file mode 100644 index 00000000000..ff60c711116 --- /dev/null +++ b/ompi/mpi/man/man3/MPI_Win_detach.3in @@ -0,0 +1 @@ +.so man3/MPI_Win_attach diff --git a/ompi/mpi/man/man3/MPI_Win_fence.3in b/ompi/mpi/man/man3/MPI_Win_fence.3in index 07914931db5..23472a886ee 100644 --- a/ompi/mpi/man/man3/MPI_Win_fence.3in +++ b/ompi/mpi/man/man3/MPI_Win_fence.3in @@ -18,8 +18,8 @@ int MPI_Win_fence(int \fIassert\fP, MPI_Win \fIwin\fP) .SH Fortran Syntax .nf INCLUDE 'mpif.h' -MPI_WIN_FENCE(\fIASSERT, WIN, IERROR\fP) - INTEGER \fIASSERT, WIN, IERROR\fP +MPI_WIN_FENCE(\fIASSERT, WIN, IERROR\fP) + INTEGER \fIASSERT, WIN, IERROR\fP .fi .SH C++ Syntax @@ -35,24 +35,24 @@ assert Program assertion (integer). .TP 1i win -Window object (handle). +Window object (handle). .SH OUTPUT PARAMETER .ft R .TP 1i IERROR -Fortran only: Error status (integer). +Fortran only: Error status (integer). .SH DESCRIPTION .ft R -MPI_Win_fence synchronizes RMA calls on \fIwin\fP. The call is collective on the group of \fIwin\fP. All RMA operations on \fIwin\fP originating at a given process and started before the fence call will complete at that process before the fence call returns. They will be completed at their target before the fence call returns at the target. RMA operations on \fIwin\fP started by a process after the fence call returns will access their target window only after MPI_Win_fence has been called by the target process. +MPI_Win_fence synchronizes RMA calls on \fIwin\fP. The call is collective on the group of \fIwin\fP. All RMA operations on \fIwin\fP originating at a given process and started before the fence call will complete at that process before the fence call returns. They will be completed at their target before the fence call returns at the target. RMA operations on \fIwin\fP started by a process after the fence call returns will access their target window only after MPI_Win_fence has been called by the target process. .sp -The call completes an RMA access epoch if it was preceded by another fence call and the local process issued RMA communication calls on \fIwin\fP between these two calls. The call completes an RMA exposure epoch if it was preceded by another fence call and the local window was the target of RMA accesses between these two calls. The call starts an RMA access epoch if it is followed by another fence call and by RMA communication calls issued between these two fence calls. The call starts an exposure epoch if it is followed by another fence call and the local window is the target of RMA accesses between these two fence calls. Thus, the fence call is equivalent to calls to a subset of \fIpost, start, complete, wait\fP. +The call completes an RMA access epoch if it was preceded by another fence call and the local process issued RMA communication calls on \fIwin\fP between these two calls. The call completes an RMA exposure epoch if it was preceded by another fence call and the local window was the target of RMA accesses between these two calls. The call starts an RMA access epoch if it is followed by another fence call and by RMA communication calls issued between these two fence calls. The call starts an exposure epoch if it is followed by another fence call and the local window is the target of RMA accesses between these two fence calls. Thus, the fence call is equivalent to calls to a subset of \fIpost, start, complete, wait\fP. .sp -A fence call usually entails a barrier synchronization: a process completes a call to MPI_Win_fence only after all other processes in the group have entered their matching call. However, a call to MPI_Win_fence that is known not to end any epoch (in particular, a call with \fIassert\fP = MPI_MODE_NOPRECEDE) does not necessarily act as a barrier. +A fence call usually entails a barrier synchronization: a process completes a call to MPI_Win_fence only after all other processes in the group have entered their matching call. However, a call to MPI_Win_fence that is known not to end any epoch (in particular, a call with \fIassert\fP = MPI_MODE_NOPRECEDE) does not necessarily act as a barrier. .SH NOTE -Calls to MPI_Win_fence should both precede and follow calls to put, get or accumulate that are synchronized with fence calls. +Calls to MPI_Win_fence should both precede and follow calls to put, get or accumulate that are synchronized with fence calls. .sp @@ -60,7 +60,7 @@ Calls to MPI_Win_fence should both precede and follow calls to put, get or accum Almost all MPI routines return an error value; C routines as the value of the function and Fortran routines in the last argument. C++ functions do not return errors. If the default error handler is set to MPI::ERRORS_THROW_EXCEPTIONS, then on error the C++ exception mechanism will be used to throw an MPI::Exception object. .sp Before the error value is returned, the current MPI error handler is -called. By default, this error handler aborts the MPI job, except for I/O function errors. The error handler may be changed with MPI_Comm_set_errhandler; the predefined error handler MPI_ERRORS_RETURN may be used to cause error values to be returned. Note that MPI does not guarantee that an MPI program can continue past an error. +called. By default, this error handler aborts the MPI job, except for I/O function errors. The error handler may be changed with MPI_Comm_set_errhandler; the predefined error handler MPI_ERRORS_RETURN may be used to cause error values to be returned. Note that MPI does not guarantee that an MPI program can continue past an error. .SH SEE ALSO MPI_Win_create diff --git a/ompi/mpi/man/man3/MPI_Win_free.3in b/ompi/mpi/man/man3/MPI_Win_free.3in index 54521d61e0d..a52822fde9e 100644 --- a/ompi/mpi/man/man3/MPI_Win_free.3in +++ b/ompi/mpi/man/man3/MPI_Win_free.3in @@ -32,13 +32,13 @@ void MPI::Win::Free() .ft R .TP 1i win -Window object (handle). +Window object (handle). .SH OUTPUT PARAMETER .ft R .TP 1i IERROR -Fortran only: Error status (integer). +Fortran only: Error status (integer). .SH DESCRIPTION .ft R @@ -48,7 +48,7 @@ MPI_Win_free frees the window object \fIwin\fP and returns a null handle (equal Almost all MPI routines return an error value; C routines as the value of the function and Fortran routines in the last argument. C++ functions do not return errors. If the default error handler is set to MPI::ERRORS_THROW_EXCEPTIONS, then on error the C++ exception mechanism will be used to throw an MPI::Exception object. .sp Before the error value is returned, the current MPI error handler is -called. By default, this error handler aborts the MPI job, except for I/O function errors. The error handler may be changed with MPI_Comm_set_errhandler; the predefined error handler MPI_ERRORS_RETURN may be used to cause error values to be returned. Note that MPI does not guarantee that an MPI program can continue past an error. +called. By default, this error handler aborts the MPI job, except for I/O function errors. The error handler may be changed with MPI_Comm_set_errhandler; the predefined error handler MPI_ERRORS_RETURN may be used to cause error values to be returned. Note that MPI does not guarantee that an MPI program can continue past an error. .SH SEE ALSO MPI_Win_create diff --git a/ompi/mpi/man/man3/MPI_Win_free_keyval.3in b/ompi/mpi/man/man3/MPI_Win_free_keyval.3in index 71aaf7223bd..1b881c179ca 100644 --- a/ompi/mpi/man/man3/MPI_Win_free_keyval.3in +++ b/ompi/mpi/man/man3/MPI_Win_free_keyval.3in @@ -18,8 +18,8 @@ int MPI_Win_free_keyval(int *\fIwin_keyval\fP) .SH Fortran Syntax .nf INCLUDE 'mpif.h' -MPI_WIN_FREE_KEYVAL(\fIWIN_KEYVAL, IERROR\fP) - INTEGER \fIWIN_KEYVAL, IERROR\fP +MPI_WIN_FREE_KEYVAL(\fIWIN_KEYVAL, IERROR\fP) + INTEGER \fIWIN_KEYVAL, IERROR\fP .fi .SH C++ Syntax @@ -32,17 +32,17 @@ static void MPI::Win::Free_keyval(int& \fIwin_keyval\fP) .ft R .TP 1i win_keyval -Key value (integer). +Key value (integer). .SH OUTPUT PARAMETER .ft R .TP 1i IERROR -Fortran only: Error status (integer). +Fortran only: Error status (integer). .SH ERRORS Almost all MPI routines return an error value; C routines as the value of the function and Fortran routines in the last argument. C++ functions do not return errors. If the default error handler is set to MPI::ERRORS_THROW_EXCEPTIONS, then on error the C++ exception mechanism will be used to throw an MPI::Exception object. .sp Before the error value is returned, the current MPI error handler is -called. By default, this error handler aborts the MPI job, except for I/O function errors. The error handler may be changed with MPI_Comm_set_errhandler; the predefined error handler MPI_ERRORS_RETURN may be used to cause error values to be returned. Note that MPI does not guarantee that an MPI program can continue past an error. +called. By default, this error handler aborts the MPI job, except for I/O function errors. The error handler may be changed with MPI_Comm_set_errhandler; the predefined error handler MPI_ERRORS_RETURN may be used to cause error values to be returned. Note that MPI does not guarantee that an MPI program can continue past an error. diff --git a/ompi/mpi/man/man3/MPI_Win_get_attr.3in b/ompi/mpi/man/man3/MPI_Win_get_attr.3in index 7a544fb0359..d0d110c746c 100644 --- a/ompi/mpi/man/man3/MPI_Win_get_attr.3in +++ b/ompi/mpi/man/man3/MPI_Win_get_attr.3in @@ -12,30 +12,30 @@ .SH C Syntax .nf #include -int MPI_Win_get_attr(MPI_Win \fIwin\fP, int \fIwin_keyval\fP, - void *\fIattribute_val\fP, int *\fIflag\fP) +int MPI_Win_get_attr(MPI_Win \fIwin\fP, int \fIwin_keyval\fP, + void *\fIattribute_val\fP, int *\fIflag\fP) .fi .SH Fortran Syntax (see FORTRAN 77 NOTES) .nf INCLUDE 'mpif.h' -MPI_WIN_GET_ATTR(\fIWIN, WIN_KEYVAL, ATTRIBUTE_VAL, FLAG, IERROR\fP) - INTEGER \fIWIN, WIN_KEYVAL, IERROR\fP - INTEGER(KIND=MPI_ADDRESS_KIND) \fIATTRIBUTE_VAL\fP - LOGICAL \fIFLAG\fP +MPI_WIN_GET_ATTR(\fIWIN, WIN_KEYVAL, ATTRIBUTE_VAL, FLAG, IERROR\fP) + INTEGER \fIWIN, WIN_KEYVAL, IERROR\fP + INTEGER(KIND=MPI_ADDRESS_KIND) \fIATTRIBUTE_VAL\fP + LOGICAL \fIFLAG\fP .fi .SH C++ Syntax .nf #include -bool MPI::Win::Get_attr(int win_keyval, void* attribute_val) const +bool MPI::Win::Get_attr(int win_keyval, void* attribute_val) const .fi .SH INPUT PARAMETERS .ft R .TP 1i win -Window to which the attribute is attached (handle). +Window to which the attribute is attached (handle). .TP 1i win_keyval Key value (integer). @@ -47,10 +47,10 @@ attribute_val Attribute value, unless \fIag\fP = false .TP 1i flag -False if no attribute is associated with the key (logical). +False if no attribute is associated with the key (logical). .TP 1i IERROR -Fortran only: Error status (integer). +Fortran only: Error status (integer). .SH DESCRIPTION .ft R @@ -62,7 +62,7 @@ the \fIATTRIBUTE_VAL\fP argument only for Fortran 90. FORTRAN 77 users may use the non-portable syntax .sp .nf - INTEGER*MPI_ADDRESS_KIND \fIATTRIBUTE_VAL\fP + INTEGER*MPI_ADDRESS_KIND \fIATTRIBUTE_VAL\fP .fi .sp where MPI_ADDRESS_KIND is a constant defined in mpif.h @@ -72,5 +72,5 @@ and gives the length of the declared integer in bytes. Almost all MPI routines return an error value; C routines as the value of the function and Fortran routines in the last argument. C++ functions do not return errors. If the default error handler is set to MPI::ERRORS_THROW_EXCEPTIONS, then on error the C++ exception mechanism will be used to throw an MPI::Exception object. .sp Before the error value is returned, the current MPI error handler is -called. By default, this error handler aborts the MPI job, except for I/O function errors. The error handler may be changed with MPI_Comm_set_errhandler; the predefined error handler MPI_ERRORS_RETURN may be used to cause error values to be returned. Note that MPI does not guarantee that an MPI program can continue past an error. +called. By default, this error handler aborts the MPI job, except for I/O function errors. The error handler may be changed with MPI_Comm_set_errhandler; the predefined error handler MPI_ERRORS_RETURN may be used to cause error values to be returned. Note that MPI does not guarantee that an MPI program can continue past an error. diff --git a/ompi/mpi/man/man3/MPI_Win_get_errhandler.3in b/ompi/mpi/man/man3/MPI_Win_get_errhandler.3in index d75ccd7728e..725bc07b9d9 100644 --- a/ompi/mpi/man/man3/MPI_Win_get_errhandler.3in +++ b/ompi/mpi/man/man3/MPI_Win_get_errhandler.3in @@ -18,7 +18,7 @@ int MPI_Win_get_errhandler(MPI_Win \fIwin\fP, MPI_Errhandler *\fIerrhandler\fP) .SH Fortran Syntax .nf INCLUDE 'mpif.h' -MPI_WIN_GET_ERRHANDLER(\fIWIN, ERRHANDLER, IERROR\fP) +MPI_WIN_GET_ERRHANDLER(\fIWIN, ERRHANDLER, IERROR\fP) INTEGER \fIWIN, ERRHANDLER, IERROR\fP .fi @@ -38,10 +38,10 @@ Window (handle). .ft R .TP 1i errhandler -Error handler currently associated with window (handle). +Error handler currently associated with window (handle). .TP 1i IERROR -Fortran only: Error status (integer). +Fortran only: Error status (integer). .SH DESCRIPTION .ft R @@ -51,6 +51,6 @@ MPI_Win_get_errhandler retrieves the error handler currently associated with a w Almost all MPI routines return an error value; C routines as the value of the function and Fortran routines in the last argument. C++ functions do not return errors. If the default error handler is set to MPI::ERRORS_THROW_EXCEPTIONS, then on error the C++ exception mechanism will be used to throw an MPI::Exception object. .sp Before the error value is returned, the current MPI error handler is -called. By default, this error handler aborts the MPI job, except for I/O function errors. The error handler may be changed with MPI_Comm_set_errhandler; the predefined error handler MPI_ERRORS_RETURN may be used to cause error values to be returned. Note that MPI does not guarantee that an MPI program can continue past an error. +called. By default, this error handler aborts the MPI job, except for I/O function errors. The error handler may be changed with MPI_Comm_set_errhandler; the predefined error handler MPI_ERRORS_RETURN may be used to cause error values to be returned. Note that MPI does not guarantee that an MPI program can continue past an error. diff --git a/ompi/mpi/man/man3/MPI_Win_get_group.3in b/ompi/mpi/man/man3/MPI_Win_get_group.3in index 9b8f7bec6ec..7754e761ddc 100644 --- a/ompi/mpi/man/man3/MPI_Win_get_group.3in +++ b/ompi/mpi/man/man3/MPI_Win_get_group.3in @@ -5,7 +5,7 @@ .\" $COPYRIGHT$ .TH MPI_Win_get_group 3 "#OMPI_DATE#" "#PACKAGE_VERSION#" "#PACKAGE_NAME#" .SH NAME -\fBMPI_Win_get_group\fP \- Returns a duplicate of the group of the communicator used to create the window. +\fBMPI_Win_get_group\fP \- Returns a duplicate of the group of the communicator used to create the window. .SH SYNTAX .ft R @@ -32,24 +32,24 @@ MPI::Group MPI::Win::Get_group() const .ft R .TP 1i win -Window object (handle). +Window object (handle). .SH OUTPUT PARAMETERS .ft R .TP 1i group -Group of processes that share access to the window (handle). +Group of processes that share access to the window (handle). .TP 1i IERROR -Fortran only: Error status (integer). +Fortran only: Error status (integer). .SH DESCRIPTION .ft R -MPI_Win_get_group returns a duplicate of the group of the communicator used to create the window associated with \fIwin\fP. The group is returned in \fIgroup\fP. +MPI_Win_get_group returns a duplicate of the group of the communicator used to create the window associated with \fIwin\fP. The group is returned in \fIgroup\fP. .SH ERRORS Almost all MPI routines return an error value; C routines as the value of the function and Fortran routines in the last argument. C++ functions do not return errors. If the default error handler is set to MPI::ERRORS_THROW_EXCEPTIONS, then on error the C++ exception mechanism will be used to throw an MPI::Exception object. .sp Before the error value is returned, the current MPI error handler is -called. By default, this error handler aborts the MPI job, except for I/O function errors. The error handler may be changed with MPI_Comm_set_errhandler; the predefined error handler MPI_ERRORS_RETURN may be used to cause error values to be returned. Note that MPI does not guarantee that an MPI program can continue past an error. +called. By default, this error handler aborts the MPI job, except for I/O function errors. The error handler may be changed with MPI_Comm_set_errhandler; the predefined error handler MPI_ERRORS_RETURN may be used to cause error values to be returned. Note that MPI does not guarantee that an MPI program can continue past an error. diff --git a/ompi/mpi/man/man3/MPI_Win_get_info.3in b/ompi/mpi/man/man3/MPI_Win_get_info.3in new file mode 100644 index 00000000000..a7e16776a02 --- /dev/null +++ b/ompi/mpi/man/man3/MPI_Win_get_info.3in @@ -0,0 +1,65 @@ +.\" -*- nroff -*- +.\" Copyright (c) 2015 Research Organization for Information Science +.\" and Technology (RIST). All rights reserved. +.\" $COPYRIGHT$ +.TH MPI_Win_get_info 3 "#OMPI_DATE#" "#PACKAGE_VERSION#" "#PACKAGE_NAME#" +.SH NAME +\fBMPI_Win_get_info\fP \- Retrieves active window info hints +. +.SH SYNTAX +.ft R +.SH C Syntax +.nf +#include +int MPI_Win_get_info(MPI_Win \fIwin\fP, MPI_Info \fI*info_used\fP) +. +.fi +.SH Fortran Syntax +.nf +INCLUDE 'mpif.h' +MPI_WIN_GET_INFO(\fIWIN, INFO_USED, IERROR\fP) + INTEGER \fICOMM, INFO_USED, IERROR \fP +. +.fi +.SH INPUT PARAMETERS +.ft R +.TP 1i +win +Window from which to receive active info hints +. +.SH OUTPUT PARAMETERS +.ft R +.TP 1i +info_used +New info object returned with all active hints on this window. +.TP 1i +IERROR +Fortran only: Error status (integer). +. +.SH DESCRIPTION +.ft R +MPI_Win_get_info returns a new info object containing the hints of +the window associated with +.IR win . +The current setting of all hints actually used by the system related +to this window is returned in +.IR info_used . +If no such hints exist, a handle to a newly created info object is +returned that contains no key/value pair. The user is responsible for +freeing info_used via MPI_Info_free. +. +.SH ERRORS +Almost all MPI routines return an error value; C routines as the value +of the function and Fortran routines in the last argument. +.sp +Before the error value is returned, the current MPI error handler is +called. By default, this error handler aborts the MPI job, except for +I/O function errors. The error handler may be changed with +MPI_Comm_set_errhandler; the predefined error handler +MPI_ERRORS_RETURN may be used to cause error values to be +returned. Note that MPI does not guarantee that an MPI program can +continue past an error. +. +.SH SEE ALSO +MPI_Win_set_info, +MPI_Win_free diff --git a/ompi/mpi/man/man3/MPI_Win_get_name.3in b/ompi/mpi/man/man3/MPI_Win_get_name.3in index 05351662192..d3c8493aeb1 100644 --- a/ompi/mpi/man/man3/MPI_Win_get_name.3in +++ b/ompi/mpi/man/man3/MPI_Win_get_name.3in @@ -18,8 +18,8 @@ int MPI_Win_get_name(MPI_Win \fIwin\fP, char *\fIwin_name\fP, int *\fIresultlen\ .SH Fortran Syntax .nf INCLUDE 'mpif.h' -MPI_WIN_GET_NAME(\fIWIN, WIN_NAME, RESULTLEN, IERROR\fP) - INTEGER \fIWIN, RESULTLEN, IERROR\fP +MPI_WIN_GET_NAME(\fIWIN, WIN_NAME, RESULTLEN, IERROR\fP) + INTEGER \fIWIN, RESULTLEN, IERROR\fP CHARACTER*(*) \fIWIN_NAME\fP .fi @@ -33,19 +33,19 @@ void MPI::Win::Get_name(char* \fIwin_name\fP, int& \fIresultlen\fP) const .ft R .TP 1i win -Window whose name is to be returned (handle). +Window whose name is to be returned (handle). .SH OUTPUT PARAMETERS .ft R .TP 1i win_name -the Tame previously stored on the window, or an empty string if no such name exists (string). +the name previously stored on the window, or an empty string if no such name exists (string). .TP 1i resultlen -Length of returned name (integer). +Length of returned name (integer). .TP 1i IERROR -Fortran only: Error status (integer). +Fortran only: Error status (integer). .SH DESCRIPTION .ft R @@ -54,6 +54,6 @@ Fortran only: Error status (integer). Almost all MPI routines return an error value; C routines as the value of the function and Fortran routines in the last argument. C++ functions do not return errors. If the default error handler is set to MPI::ERRORS_THROW_EXCEPTIONS, then on error the C++ exception mechanism will be used to throw an MPI::Exception object. .sp Before the error value is returned, the current MPI error handler is -called. By default, this error handler aborts the MPI job, except for I/O function errors. The error handler may be changed with MPI_Comm_set_errhandler; the predefined error handler MPI_ERRORS_RETURN may be used to cause error values to be returned. Note that MPI does not guarantee that an MPI program can continue past an error. +called. By default, this error handler aborts the MPI job, except for I/O function errors. The error handler may be changed with MPI_Comm_set_errhandler; the predefined error handler MPI_ERRORS_RETURN may be used to cause error values to be returned. Note that MPI does not guarantee that an MPI program can continue past an error. diff --git a/ompi/mpi/man/man3/MPI_Win_lock.3in b/ompi/mpi/man/man3/MPI_Win_lock.3in index 5c42fce8012..401dce58cea 100644 --- a/ompi/mpi/man/man3/MPI_Win_lock.3in +++ b/ompi/mpi/man/man3/MPI_Win_lock.3in @@ -19,8 +19,8 @@ int MPI_Win_lock(int \fIlock_type\fP, int \fIrank\fP, int \fIassert\fP, MPI_Win .SH Fortran Syntax .nf INCLUDE 'mpif.h' -MPI_WIN_LOCK(\fILOCK_TYPE, RANK, ASSERT, WIN, IERROR\fP) - INTEGER \fILOCK_TYPE, RANK, ASSERT, WIN, IERROR\fP +MPI_WIN_LOCK(\fILOCK_TYPE, RANK, ASSERT, WIN, IERROR\fP) + INTEGER \fILOCK_TYPE, RANK, ASSERT, WIN, IERROR\fP .fi .SH C++ Syntax @@ -33,47 +33,47 @@ void MPI::Win::Lock(int \fIlock_type\fP, int \fIrank\fP, int \fIassert\fP) const .ft R .TP 1i lock_type -Either MPI_LOCK_EXCLUSIVE or MPI_LOCK_SHARED (state). +Either MPI_LOCK_EXCLUSIVE or MPI_LOCK_SHARED (state). .TP 1i rank -Rank of locked window (nonnegative integer). +Rank of locked window (nonnegative integer). .TP 1i assert -Program assertion (integer). +Program assertion (integer). .TP 1i win -Window object (handle). +Window object (handle). .SH OUTPUT PARAMETER .ft R .TP 1i IERROR -Fortran only: Error status (integer). +Fortran only: Error status (integer). .SH DESCRIPTION .ft R Starts an RMA access epoch. Locks ensure that only the windows created by specific processes can be accessed by those processes (and by no other processes) during that epoch. .sp -Locks are used to protect accesses to the locked target window effected by RMA calls issued between the lock and unlock call, and to protect local load/store accesses to a locked local window executed between the lock and unlock call. -Accesses that are protected by an exclusive lock will not be concurrent at the window site with other accesses to the same window that are lock protected. Accesses that are protected by a shared lock will not be concurrent at the window site with accesses protected by an exclusive lock to the same window. +Locks are used to protect accesses to the locked target window effected by RMA calls issued between the lock and unlock call, and to protect local load/store accesses to a locked local window executed between the lock and unlock call. +Accesses that are protected by an exclusive lock will not be concurrent at the window site with other accesses to the same window that are lock protected. Accesses that are protected by a shared lock will not be concurrent at the window site with accesses protected by an exclusive lock to the same window. .sp The \fIassert\fP argument is used to provide assertions on the context of the call that may be used for various optimizations. (See Section 6.4.4 of the MPI-2 Standard.) A value of \fIassert\fP = 0 is always valid. .sp -.ft -.SH NOTES +.ft +.SH NOTES .ft R In a client/server environment in which clients connect to a server and create windows that span both the client and the server, if a client or server that has obtained a lock on such a window and then terminates abnormally, the server or other clients -may hang in a MPI_Win_lock call, failing to notice that the peer MPI job +may hang in a MPI_Win_lock call, failing to notice that the peer MPI job has terminated. .SH ERRORS Almost all MPI routines return an error value; C routines as the value of the function and Fortran routines in the last argument. C++ functions do not return errors. If the default error handler is set to MPI::ERRORS_THROW_EXCEPTIONS, then on error the C++ exception mechanism will be used to throw an MPI::Exception object. .sp Before the error value is returned, the current MPI error handler is -called. By default, this error handler aborts the MPI job, except for I/O function errors. The error handler may be changed with MPI_Comm_set_errhandler; the predefined error handler MPI_ERRORS_RETURN may be used to cause error values to be returned. Note that MPI does not guarantee that an MPI program can continue past an error. +called. By default, this error handler aborts the MPI job, except for I/O function errors. The error handler may be changed with MPI_Comm_set_errhandler; the predefined error handler MPI_ERRORS_RETURN may be used to cause error values to be returned. Note that MPI does not guarantee that an MPI program can continue past an error. .SH SEE ALSO MPI_Win_unlock diff --git a/ompi/mpi/man/man3/MPI_Win_post.3in b/ompi/mpi/man/man3/MPI_Win_post.3in index 14bd9e3def6..191acacc252 100644 --- a/ompi/mpi/man/man3/MPI_Win_post.3in +++ b/ompi/mpi/man/man3/MPI_Win_post.3in @@ -45,7 +45,7 @@ Window object (handle) .ft R .TP 1i IERROR -Fortran only: Error status (integer). +Fortran only: Error status (integer). .SH DESCRIPTION @@ -55,7 +55,7 @@ Starts an RMA exposure epoch for the local window associated with \fIwin\fP. Onl Almost all MPI routines return an error value; C routines as the value of the function and Fortran routines in the last argument. C++ functions do not return errors. If the default error handler is set to MPI::ERRORS_THROW_EXCEPTIONS, then on error the C++ exception mechanism will be used to throw an MPI::Exception object. .sp Before the error value is returned, the current MPI error handler is -called. By default, this error handler aborts the MPI job, except for I/O function errors. The error handler may be changed with MPI_Win_set_errhandler; the predefined error handler MPI_ERRORS_RETURN may be used to cause error values to be returned. Note that MPI does not guarantee that an MPI program can continue past an error. +called. By default, this error handler aborts the MPI job, except for I/O function errors. The error handler may be changed with MPI_Win_set_errhandler; the predefined error handler MPI_ERRORS_RETURN may be used to cause error values to be returned. Note that MPI does not guarantee that an MPI program can continue past an error. .SH SEE ALSO MPI_Win_start diff --git a/ompi/mpi/man/man3/MPI_Win_set_attr.3in b/ompi/mpi/man/man3/MPI_Win_set_attr.3in index 0ea79b6ca7e..be68982c6f2 100644 --- a/ompi/mpi/man/man3/MPI_Win_set_attr.3in +++ b/ompi/mpi/man/man3/MPI_Win_set_attr.3in @@ -18,9 +18,9 @@ int MPI_Win_set_attr(MPI_Win \fIwin\fP, int \fIwin_keyval\fP, void *\fIattribute .SH Fortran Syntax (see FORTRAN 77 NOTES) .nf INCLUDE 'mpif.h' -MPI_WIN_SET_ATTR(\fIWIN, WIN_KEYVAL, ATTRIBUTE_VAL, IERROR\fP) - INTEGER \fIWIN, WIN_KEYVAL, IERROR\fP - INTEGER(KIND=MPI_ADDRESS_KIND) \fIATTRIBUTE_VAL\fP +MPI_WIN_SET_ATTR(\fIWIN, WIN_KEYVAL, ATTRIBUTE_VAL, IERROR\fP) + INTEGER \fIWIN, WIN_KEYVAL, IERROR\fP + INTEGER(KIND=MPI_ADDRESS_KIND) \fIATTRIBUTE_VAL\fP .fi .SH C++ Syntax @@ -33,22 +33,22 @@ void MPI::Win::Set_attr(int \fIwin_keyval\fP, const void* \fIattribute_val\fP) .ft R .TP 1i win -Window to which attribute will be attached (handle). +Window to which attribute will be attached (handle). .SH INPUT PARAMETERS .ft R .TP 1i win_keyval -Key value (integer). +Key value (integer). .TP 1i attribute_val -Attribute value. +Attribute value. .SH OUTPUT PARAMETER .ft R .TP 1i IERROR -Fortran only: Error status (integer). +Fortran only: Error status (integer). .SH DESCRIPTION .ft R @@ -60,7 +60,7 @@ the \fIATTRIBUTE_VAL\fP argument only for Fortran 90. FORTRAN 77 users may use the non-portable syntax .sp .nf - INTEGER*MPI_ADDRESS_KIND \fIATTRIBUTE_VAL\fP + INTEGER*MPI_ADDRESS_KIND \fIATTRIBUTE_VAL\fP .fi .sp where MPI_ADDRESS_KIND is a constant defined in mpif.h @@ -70,5 +70,5 @@ and gives the length of the declared integer in bytes. Almost all MPI routines return an error value; C routines as the value of the function and Fortran routines in the last argument. C++ functions do not return errors. If the default error handler is set to MPI::ERRORS_THROW_EXCEPTIONS, then on error the C++ exception mechanism will be used to throw an MPI::Exception object. .sp Before the error value is returned, the current MPI error handler is -called. By default, this error handler aborts the MPI job, except for I/O function errors. The error handler may be changed with MPI_Comm_set_errhandler; the predefined error handler MPI_ERRORS_RETURN may be used to cause error values to be returned. Note that MPI does not guarantee that an MPI program can continue past an error. +called. By default, this error handler aborts the MPI job, except for I/O function errors. The error handler may be changed with MPI_Comm_set_errhandler; the predefined error handler MPI_ERRORS_RETURN may be used to cause error values to be returned. Note that MPI does not guarantee that an MPI program can continue past an error. diff --git a/ompi/mpi/man/man3/MPI_Win_set_errhandler.3in b/ompi/mpi/man/man3/MPI_Win_set_errhandler.3in index f626e7ffa56..d23e705b2cb 100644 --- a/ompi/mpi/man/man3/MPI_Win_set_errhandler.3in +++ b/ompi/mpi/man/man3/MPI_Win_set_errhandler.3in @@ -18,8 +18,8 @@ int MPI_Win_set_errhandler(MPI_Win \fIwin\fP, MPI_Errhandler \fIerrhandler\fP) .SH Fortran Syntax .nf INCLUDE 'mpif.h' -MPI_WIN_SET_ERRHANDLER(\fIWIN, ERRHANDLER, IERROR\fP) - INTEGER \fIWIN, ERRHANDLER, IERROR\fP +MPI_WIN_SET_ERRHANDLER(\fIWIN, ERRHANDLER, IERROR\fP) + INTEGER \fIWIN, ERRHANDLER, IERROR\fP .fi .SH C++ Syntax @@ -32,28 +32,28 @@ void MPI::Win::Set_errhandler(const MPI::Errhandler& \fIerrhandler\fP) .ft R .TP 1i win -Window (handle). +Window (handle). .SH INPUT PARAMETER .ft R .TP 1i errhandler -New error handler for window (handle). +New error handler for window (handle). .SH OUTPUT PARAMETER .ft R .TP 1i IERROR -Fortran only: Error status (integer). +Fortran only: Error status (integer). .SH DESCRIPTION .ft R -MPI_Win_set_errhandler attaches a new error handler to a window. The error handler must be either a predefined error handler or an error handler created by a call to MPI_Win_create_errhandler. +MPI_Win_set_errhandler attaches a new error handler to a window. The error handler must be either a predefined error handler or an error handler created by a call to MPI_Win_create_errhandler. .SH ERRORS Almost all MPI routines return an error value; C routines as the value of the function and Fortran routines in the last argument. C++ functions do not return errors. If the default error handler is set to MPI::ERRORS_THROW_EXCEPTIONS, then on error the C++ exception mechanism will be used to throw an MPI::Exception object. .sp Before the error value is returned, the current MPI error handler is -called. By default, this error handler aborts the MPI job, except for I/O function errors. The error handler may be changed with MPI_Comm_set_errhandler; the predefined error handler MPI_ERRORS_RETURN may be used to cause error values to be returned. Note that MPI does not guarantee that an MPI program can continue past an error. +called. By default, this error handler aborts the MPI job, except for I/O function errors. The error handler may be changed with MPI_Comm_set_errhandler; the predefined error handler MPI_ERRORS_RETURN may be used to cause error values to be returned. Note that MPI does not guarantee that an MPI program can continue past an error. diff --git a/ompi/mpi/man/man3/MPI_Win_set_info.3in b/ompi/mpi/man/man3/MPI_Win_set_info.3in new file mode 100644 index 00000000000..8a6111b7f8e --- /dev/null +++ b/ompi/mpi/man/man3/MPI_Win_set_info.3in @@ -0,0 +1,67 @@ +.\" -*- nroff -*- +.\" Copyright (c) 2015 Research Organization for Information Science +.\" and Technology (RIST). All rights reserved. +.\" $COPYRIGHT$ +.TH MPI_Win_set_info 3 "#OMPI_DATE#" "#PACKAGE_VERSION#" "#PACKAGE_NAME#" +.SH NAME +\fBMPI_Win_set_info\fP \- Set window info hints +. +.SH SYNTAX +.ft R +.SH C Syntax +.nf +#include +int MPI_Win_set_info(MPI_Win \fIwin\fP, MPI_Info \fIinfo\fP) +. +.fi +.SH Fortran Syntax +.nf +INCLUDE 'mpif.h' +MPI_WIN_SET_INFO(\fIWIN, INFO, IERROR\fP) + INTEGER \fIWIN, INFO, IERROR \fP +. +.fi +.SH INPUT PARAMETERS +.ft R +.TP 1i +win +Window on which to set info hints +.TP 1i +info +Info object containing hints to be set on +.I win +. +.SH OUTPUT PARAMETERS +.TP 1i +IERROR +Fortran only: Error status (integer). +. +.SH DESCRIPTION +.ft R +MPI_WIN_SET_INFO sets new values for the hints of the window +associated with +.IR win. +MPI_WIN_SET_INFO is a collective routine. The info object may be +different on each process, but any info entries that an implementation +requires to be the same on all processes must appear with the same +value in each process's +.I info +object. +. +.SH ERRORS +Almost all MPI routines return an error value; C routines as the value +of the function and Fortran routines in the last argument. +.sp +Before the error value is returned, the current MPI error handler is +called. By default, this error handler aborts the MPI job, except for +I/O function errors. The error handler may be changed with +MPI_Comm_set_errhandler; the predefined error handler +MPI_ERRORS_RETURN may be used to cause error values to be +returned. Note that MPI does not guarantee that an MPI program can +continue past an error. +. +.SH SEE ALSO +MPI_Win_get_info, +MPI_Info_create, +MPI_Info_set, +MPI_Info_free diff --git a/ompi/mpi/man/man3/MPI_Win_set_name.3in b/ompi/mpi/man/man3/MPI_Win_set_name.3in index 8a5119eed39..05ea4c234ff 100644 --- a/ompi/mpi/man/man3/MPI_Win_set_name.3in +++ b/ompi/mpi/man/man3/MPI_Win_set_name.3in @@ -19,9 +19,9 @@ int MPI_Win_set_name(MPI_Win \fIwin\fP, const char *\fIwin_name\fP) .SH Fortran Syntax .nf INCLUDE 'mpif.h' -MPI_WIN_SET_NAME(\fIWIN, WIN_NAME, IERROR\fP) - INTEGER \fIWIN, IERROR\fP - CHARACTER*(*) \fIWIN_NAME\fP +MPI_WIN_SET_NAME(\fIWIN, WIN_NAME, IERROR\fP) + INTEGER \fIWIN, IERROR\fP + CHARACTER*(*) \fIWIN_NAME\fP .fi .SH C++ Syntax @@ -34,19 +34,19 @@ void MPI::Win::Set_name(const char* \fIwin_name\fP) .ft R .TP 1i win -Window whose identifier is to be set (handle). +Window whose identifier is to be set (handle). .SH INPUT PARAMETER .ft R .TP 1i win_name -The character string used as the name (string). +The character string used as the name (string). .SH OUTPUT PARAMETER .ft R .TP 1i IERROR -Fortran only: Error status (integer). +Fortran only: Error status (integer). .SH DESCRIPTION .ft R @@ -55,6 +55,6 @@ Fortran only: Error status (integer). Almost all MPI routines return an error value; C routines as the value of the function and Fortran routines in the last argument. C++ functions do not return errors. If the default error handler is set to MPI::ERRORS_THROW_EXCEPTIONS, then on error the C++ exception mechanism will be used to throw an MPI::Exception object. .sp Before the error value is returned, the current MPI error handler is -called. By default, this error handler aborts the MPI job, except for I/O function errors. The error handler may be changed with MPI_Comm_set_errhandler; the predefined error handler MPI_ERRORS_RETURN may be used to cause error values to be returned. Note that MPI does not guarantee that an MPI program can continue past an error. +called. By default, this error handler aborts the MPI job, except for I/O function errors. The error handler may be changed with MPI_Comm_set_errhandler; the predefined error handler MPI_ERRORS_RETURN may be used to cause error values to be returned. Note that MPI does not guarantee that an MPI program can continue past an error. diff --git a/ompi/mpi/man/man3/MPI_Win_shared_query.3in b/ompi/mpi/man/man3/MPI_Win_shared_query.3in new file mode 100644 index 00000000000..77d109efb85 --- /dev/null +++ b/ompi/mpi/man/man3/MPI_Win_shared_query.3in @@ -0,0 +1,87 @@ +.\" -*- nroff -*- +.\" Copyright 2015 Los Alamos National Security, LLC. All rights reserved. +.\" Copyright 2010 Cisco Systems, Inc. All rights reserved. +.\" Copyright 2007-2008 Sun Microsystems, Inc. +.\" Copyright (c) 1996 Thinking Machines Corporation +.\" $COPYRIGHT$ +.TH MPI_Win_shared_query 3 "#OMPI_DATE#" "#PACKAGE_VERSION#" "#PACKAGE_NAME#" +.SH NAME +\fBMPI_Win_shared_query\fP \- Query a shared memory window + +.SH SYNTAX +.ft R +.SH C Syntax +.nf +#include +int MPI_Win_shared_query (MPI_Win \fIwin\fP, int \fIrank\fP, MPI_Aint *\fIsize\fP, + int *\fIdisp_unit\fP, void *\fIbaseptr\fP) + +.fi +.SH Fortran Syntax +.nf +INCLUDE 'mpif.h' +MPI_WIN_SHARED_QUERY(WIN, RANK, SIZE, DISP_UNIT, BASEPTR, IERROR) + INTEGER WIN, RANK, DISP_UNIT, IERROR + INTEGER(KIND=MPI_ADDRESS_KIND) SIZE, BASEPTR + +.fi +.SH INPUT PARAMETERS +.ft R +.TP 1i +win +Shared memory window object (handle). +.TP 1i +rank +Rank in the group of window \fIwin\fP (non-negative integer) +or MPI_PROC_NULL. + +.SH OUTPUT PARAMETERS +.ft R +.TP 1i +size +Size of the window segment (non-negative integer). +.TP 1i +disp_unit +Local unit size for displacements, in bytes (positive integer). +.TP 1i +baseptr +Address for load/store access to window segment +(choice). +.TP 1i +IERROR +Fortran only: Error status (integer). + +.SH DESCRIPTION +.ft R +\fBMPI_Win_shared_query\fP queries the process-local address for +remote memory segments created with MPI_Win_allocate_shared. This +function can return different process-local addresses for the same +physical memory on different processes. The returned memory can be +used for load/store accesses subject to the constraints defined in +MPI-3.1 \[char167] 11.7. This function can only be called with windows +of flavor MPI_WIN_FLAVOR_SHARED. If the passed window is not of flavor +MPI_WIN_FLAVOR_SHARED, the error MPI_ERR_RMA_FLAVOR is raised. When +rank is MPI_PROC_NULL, the \fIpointer\fP, \fIdisp_unit\fP, and +\fIsize\fP returned are the pointer, disp_unit, and size of the memory +segment belonging the lowest rank that specified \fIsize\fP > 0. If +all processes in the group attached to the window specified \fIsize\fP += 0, then the call returns \fIsize\fP = 0 and a \fIbaseptr\fP as if +\fBMPI_Alloc_mem\fP was called with \fIsize\fP = 0. + +.SH ERRORS +Almost all MPI routines return an error value; C routines as the value +of the function and Fortran routines in the last argument. +.sp +Before the error value is returned, the current MPI error handler is +called. By default, this error handler aborts the MPI job, except for +I/O function errors. The error handler may be changed with +MPI_Comm_set_errhandler; the predefined error handler +MPI_ERRORS_RETURN may be used to cause error values to be +returned. Note that MPI does not guarantee that an MPI program can +continue past an error. + +.SH SEE ALSO +.ft R +.sp +MPI_Alloc_mem +MPI_Win_allocate_shared diff --git a/ompi/mpi/man/man3/MPI_Win_start.3in b/ompi/mpi/man/man3/MPI_Win_start.3in index c4f4b3d2a64..fb2f4086255 100644 --- a/ompi/mpi/man/man3/MPI_Win_start.3in +++ b/ompi/mpi/man/man3/MPI_Win_start.3in @@ -44,19 +44,19 @@ Window object (handle). .ft R .TP 1i IERROR -Fortran only: Error status (integer). +Fortran only: Error status (integer). .SH DESCRIPTION .ft R MPI_Win_start is a one-sided MPI communication synchronization call that starts an RMA access epoch for \fIwin\fP. RMA calls issued on \fIwin\fP during this epoch must access only windows at processes in \fIgroup\fP. Each process in \fIgroup\fP must issue a matching call to MPI_Win_post. MPI_Win_start -is allowed to block until the corresponding MPI_Win_post calls have been executed, but is not required to. +is allowed to block until the corresponding MPI_Win_post calls have been executed, but is not required to. .sp The \fIassert\fP argument is used to provide assertions on the context of the call that may be used for various optimizations. (See Section 6.4.4 of the MPI-2 Standard.) A value of \fIassert\fP = 0 is always valid. The following assertion value is supported: .sp .TP 1i -MPI_MODE_NOCHECK +MPI_MODE_NOCHECK When this value is passed in to this call, the library assumes that the post call on the target has been called and it is not necessary for the library to check to see if such a call has been made. @@ -65,7 +65,7 @@ for the library to check to see if such a call has been made. Almost all MPI routines return an error value; C routines as the value of the function and Fortran routines in the last argument. C++ functions do not return errors. If the default error handler is set to MPI::ERRORS_THROW_EXCEPTIONS, then on error the C++ exception mechanism will be used to throw an MPI::Exception object. .sp Before the error value is returned, the current MPI error handler is -called. By default, this error handler aborts the MPI job, except for I/O function errors. The error handler may be changed with MPI_Win_set_errhandler; the predefined error handler MPI_ERRORS_RETURN may be used to cause error values to be returned. Note that MPI does not guarantee that an MPI program can continue past an error. +called. By default, this error handler aborts the MPI job, except for I/O function errors. The error handler may be changed with MPI_Win_set_errhandler; the predefined error handler MPI_ERRORS_RETURN may be used to cause error values to be returned. Note that MPI does not guarantee that an MPI program can continue past an error. .SH SEE ALSO MPI_Win_post diff --git a/ompi/mpi/man/man3/MPI_Win_test.3in b/ompi/mpi/man/man3/MPI_Win_test.3in index cf316106fc8..78dbaa09c8d 100644 --- a/ompi/mpi/man/man3/MPI_Win_test.3in +++ b/ompi/mpi/man/man3/MPI_Win_test.3in @@ -25,7 +25,7 @@ MPI_WIN_TEST(\fI WIN, FLAG, IERROR\fP) .SH C++ Syntax .nf #include -bool MPI::Win::Test() const +bool MPI::Win::Test() const .fi .SH INPUT PARAMETERS @@ -39,7 +39,7 @@ Window object (handle) .ft R .TP 1i IERROR -Fortran only: Error status (integer). +Fortran only: Error status (integer). .TP 1i flag The returning state of the test for epoch closure. @@ -47,7 +47,7 @@ The returning state of the test for epoch closure. .SH DESCRIPTION .ft R -MPI_Win_test is a one-sided MPI communication synchronization call, a +MPI_Win_test is a one-sided MPI communication synchronization call, a nonblocking version of MPI_Win_wait. It returns \fIflag = true\fP if MPI_Win_wait would return, \fIflag = false\fP otherwise. The effect of return of MPI_Win_test with \fIflag = true\fP is the same as the effect of a return of MPI_Win_wait. If \fIflag = false\fP is returned, then the call has no visible effect. .sp @@ -59,7 +59,7 @@ the call has returned \fIflag = true\fP, it must not be invoked anew, until the Almost all MPI routines return an error value; C routines as the value of the function and Fortran routines in the last argument. C++ functions do not return errors. If the default error handler is set to MPI::ERRORS_THROW_EXCEPTIONS, then on error the C++ exception mechanism will be used to throw an MPI::Exception object. .sp Before the error value is returned, the current MPI error handler is -called. By default, this error handler aborts the MPI job, except for I/O function errors. The error handler may be changed with MPI_Win_set_errhandler; the predefined error handler MPI_ERRORS_RETURN may be used to cause error values to be returned. Note that MPI does not guarantee that an MPI program can continue past an error. +called. By default, this error handler aborts the MPI job, except for I/O function errors. The error handler may be changed with MPI_Win_set_errhandler; the predefined error handler MPI_ERRORS_RETURN may be used to cause error values to be returned. Note that MPI does not guarantee that an MPI program can continue past an error. .SH SEE ALSO MPI_Win_post diff --git a/ompi/mpi/man/man3/MPI_Win_unlock.3in b/ompi/mpi/man/man3/MPI_Win_unlock.3in index 099ad7732f8..5778ab2a737 100644 --- a/ompi/mpi/man/man3/MPI_Win_unlock.3in +++ b/ompi/mpi/man/man3/MPI_Win_unlock.3in @@ -6,7 +6,7 @@ .\" $COPYRIGHT$ .TH MPI_Win_unlock 3 "#OMPI_DATE#" "#PACKAGE_VERSION#" "#PACKAGE_NAME#" .SH NAME -\fBMPI_Win_unlock\fP \- Completes an RMA access epoch started by a call to MPI_Win_lock. +\fBMPI_Win_unlock\fP \- Completes an RMA access epoch started by a call to MPI_Win_lock. .SH SYNTAX .ft R @@ -19,8 +19,8 @@ int MPI_Win_unlock(int \fIrank\fP, MPI_Win \fIwin\fP) .SH Fortran Syntax .nf INCLUDE 'mpif.h' -MPI_WIN_UNLOCK(\fIRANK, WIN, IERROR\fP) - INTEGER \fIRANK, WIN, IERROR\fP +MPI_WIN_UNLOCK(\fIRANK, WIN, IERROR\fP) + INTEGER \fIRANK, WIN, IERROR\fP .fi .SH C++ Syntax @@ -33,28 +33,28 @@ void MPI::Win::Unlock(int \fIrank\fP) const .ft R .TP 1i rank -Rank of window (nonnegative integer). +Rank of window (nonnegative integer). .TP 1i win -Window object (handle). +Window object (handle). .SH OUTPUT PARAMETER .ft R .TP 1i IERROR -Fortran only: Error status (integer). +Fortran only: Error status (integer). .SH DESCRIPTION .ft R -MPI_Win_unlock completes an RMA access epoch started by a call to MPI_Win_lock. RMA operations issued during this period will have completed both at the origin and at the target when the call returns. +MPI_Win_unlock completes an RMA access epoch started by a call to MPI_Win_lock. RMA operations issued during this period will have completed both at the origin and at the target when the call returns. .sp -Locks are used to protect accesses to the locked target window effected by RMA calls issued between the lock and unlock call, and to protect local load/store accesses to a locked local window executed between the lock and unlock call. Accesses that are protected by an exclusive lock will not be concurrent at the window site with other accesses to the same window that are lock protected. Accesses that are protected by a shared lock will not be concurrent at the window site with accesses protected by an exclusive lock to the same window. +Locks are used to protect accesses to the locked target window effected by RMA calls issued between the lock and unlock call, and to protect local load/store accesses to a locked local window executed between the lock and unlock call. Accesses that are protected by an exclusive lock will not be concurrent at the window site with other accesses to the same window that are lock protected. Accesses that are protected by a shared lock will not be concurrent at the window site with accesses protected by an exclusive lock to the same window. .SH ERRORS Almost all MPI routines return an error value; C routines as the value of the function and Fortran routines in the last argument. C++ functions do not return errors. If the default error handler is set to MPI::ERRORS_THROW_EXCEPTIONS, then on error the C++ exception mechanism will be used to throw an MPI::Exception object. .sp Before the error value is returned, the current MPI error handler is -called. By default, this error handler aborts the MPI job, except for I/O function errors. The error handler may be changed with MPI_Comm_set_errhandler; the predefined error handler MPI_ERRORS_RETURN may be used to cause error values to be returned. Note that MPI does not guarantee that an MPI program can continue past an error. +called. By default, this error handler aborts the MPI job, except for I/O function errors. The error handler may be changed with MPI_Comm_set_errhandler; the predefined error handler MPI_ERRORS_RETURN may be used to cause error values to be returned. Note that MPI does not guarantee that an MPI program can continue past an error. .SH SEE ALSO MPI_Win_lock diff --git a/ompi/mpi/man/man3/MPI_Win_wait.3in b/ompi/mpi/man/man3/MPI_Win_wait.3in index bd55677b5ba..bb6e8c6fcb4 100644 --- a/ompi/mpi/man/man3/MPI_Win_wait.3in +++ b/ompi/mpi/man/man3/MPI_Win_wait.3in @@ -25,7 +25,7 @@ MPI_WIN_WAIT(\fI WIN, IERROR\fP) .SH C++ Syntax .nf #include -void MPI::Win::Wait() const +void MPI::Win::Wait() const .fi .SH INPUT PARAMETERS @@ -38,7 +38,7 @@ Window object (handle). .ft R .TP 1i IERROR -Fortran only: Error status (integer). +Fortran only: Error status (integer). .SH DESCRIPTION .ft R @@ -54,8 +54,8 @@ call returns, all these RMA accesses will have completed at the target window. Almost all MPI routines return an error value; C routines as the value of the function and Fortran routines in the last argument. C++ functions do not return errors. If the default error handler is set to MPI::ERRORS_THROW_EXCEPTIONS, then on error the C++ exception mechanism will be used to throw an MPI::Exception object. .sp Before the error value is returned, the current MPI error handler is -called. By default, this error handler aborts the MPI job, except for I/O function errors. The error handler may be changed with MPI_Win_set_errhandler; the predefined error handler MPI_ERRORS_RETURN may be used to cause error values to be returned. Note that MPI does not guarantee that an MPI program can continue past an error. - +called. By default, this error handler aborts the MPI job, except for I/O function errors. The error handler may be changed with MPI_Win_set_errhandler; the predefined error handler MPI_ERRORS_RETURN may be used to cause error values to be returned. Note that MPI does not guarantee that an MPI program can continue past an error. + .SH SEE ALSO MPI_Win_post .br diff --git a/ompi/mpi/man/man3/MPI_Wtick.3in b/ompi/mpi/man/man3/MPI_Wtick.3in index 78c5fe3e7c9..29bf4052248 100644 --- a/ompi/mpi/man/man3/MPI_Wtick.3in +++ b/ompi/mpi/man/man3/MPI_Wtick.3in @@ -1,6 +1,7 @@ .\" -*- nroff -*- .\" Copyright 2006-2008 Sun Microsystems, Inc. .\" Copyright (c) 1996 Thinking Machines Corporation +.\" Copyright (c) 2017 Cisco Systems, Inc. .\" $COPYRIGHT$ .TH MPI_Wtick 3 "#OMPI_DATE#" "#PACKAGE_VERSION#" "#PACKAGE_NAME#" .SH NAME @@ -38,15 +39,12 @@ successive clock ticks. For example, if the clock is implemented by the hardware as a counter that is incremented every millisecond, the value returned by MPI_Wtick should be 10^-3. .PP -Note that on POSIX platforms, Open MPI should always return 10^-6 for -MPI_Wtick. The returned value may be different on Windows platforms. -.PP .SH NOTE This function does not return an error value. Consequently, the result of calling it before MPI_Init or after MPI_Finalize is undefined. -.SH SEE ALSO +.SH SEE ALSO .ft R .sp MPI_Wtime diff --git a/ompi/mpi/man/man3/MPI_Wtime.3in b/ompi/mpi/man/man3/MPI_Wtime.3in index b41a6bad044..c9c51a5b0f2 100644 --- a/ompi/mpi/man/man3/MPI_Wtime.3in +++ b/ompi/mpi/man/man3/MPI_Wtime.3in @@ -33,34 +33,34 @@ Time in seconds since an arbitrary time in the past. .SH DESCRIPTION .ft R -MPI_Wtime returns a floating-point number of seconds, representing elapsed wall-clock time since some time in the past. +MPI_Wtime returns a floating-point number of seconds, representing elapsed wall-clock time since some time in the past. .PP -The "time in the past" is guaranteed not to change during the life of the process. The user is responsible for converting large numbers of seconds to other units if they are preferred. +The "time in the past" is guaranteed not to change during the life of the process. The user is responsible for converting large numbers of seconds to other units if they are preferred. .PP -This function is portable (it returns seconds, not "ticks"), it allows high resolution, and carries no unnecessary baggage. One would use it like this: +This function is portable (it returns seconds, not "ticks"), it allows high resolution, and carries no unnecessary baggage. One would use it like this: .sp .nf - { - double starttime, endtime; - starttime = MPI_Wtime(); - \&.... stuff to be timed \&... - endtime = MPI_Wtime(); - printf("That took %f seconds\\n",endtime-starttime); - } + { + double starttime, endtime; + starttime = MPI_Wtime(); + \&.... stuff to be timed \&... + endtime = MPI_Wtime(); + printf("That took %f seconds\\n",endtime-starttime); + } .fi .PP -The times returned are local to the node that called them. There is no requirement that different nodes return the "same" time. -.SH NOTES +The times returned are local to the node that called them. There is no requirement that different nodes return the "same" time. +.SH NOTES The boolean variable MPI_WTIME_IS_GLOBAL, a predefined attribute key that indicates whether clocks are synchronized, does not have a valid value in Open MPI, as the clocks are not guaranteed to be synchronized. .PP -This function is intended to be a high-resolution, elapsed (or wall) clock. See MPI_Wtick to determine the resolution of MPI_Wtime. +This function is intended to be a high-resolution, elapsed (or wall) clock. See MPI_Wtick to determine the resolution of MPI_Wtime. .PP On POSIX platforms, this function may utilize a timer that is cheaper to invoke than the gettimeofday() system call, but will fall back to gettimeofday() if a cheap high-resolution timer is not available. The ompi_info command can be consulted to see if Open MPI supports a -native high-resolution timer on your platform; see the value for "MPI_WTIME +native high-resolution timer on your platform; see the value for "MPI_WTIME support" (or "options:mpi-wtime" when viewing the parsable output). If this value is "native", a method that is likely to be cheaper than gettimeofday() will be used to obtain the time when diff --git a/ompi/mpi/man/man3/Makefile.extra b/ompi/mpi/man/man3/Makefile.extra index b683d915927..4fe77c71907 100644 --- a/ompi/mpi/man/man3/Makefile.extra +++ b/ompi/mpi/man/man3/Makefile.extra @@ -3,9 +3,9 @@ # Copyright (c) 2008 Sun Microsystems, Inc. All rights reserved. # Copyright (c) 2012-2013 Los Alamos National Security, LLC. All rights reserved. # $COPYRIGHT$ -# +# # Additional copyrights may follow -# +# # $HEADER$ # @@ -19,6 +19,8 @@ mpi_api_man_pages = \ mpi/man/man3/MPI_Add_error_code.3 \ mpi/man/man3/MPI_Add_error_string.3 \ mpi/man/man3/MPI_Address.3 \ + mpi/man/man3/MPI_Aint_add.3 \ + mpi/man/man3/MPI_Aint_diff.3 \ mpi/man/man3/MPI_Allgather.3 \ mpi/man/man3/MPI_Iallgather.3 \ mpi/man/man3/MPI_Allgatherv.3 \ @@ -124,9 +126,13 @@ mpi_api_man_pages = \ mpi/man/man3/MPI_File_get_view.3 \ mpi/man/man3/MPI_File_iread.3 \ mpi/man/man3/MPI_File_iread_at.3 \ + mpi/man/man3/MPI_File_iread_all.3 \ + mpi/man/man3/MPI_File_iread_at_all.3 \ mpi/man/man3/MPI_File_iread_shared.3 \ mpi/man/man3/MPI_File_iwrite.3 \ mpi/man/man3/MPI_File_iwrite_at.3 \ + mpi/man/man3/MPI_File_iwrite_all.3 \ + mpi/man/man3/MPI_File_iwrite_at_all.3 \ mpi/man/man3/MPI_File_iwrite_shared.3 \ mpi/man/man3/MPI_File_open.3 \ mpi/man/man3/MPI_File_preallocate.3 \ @@ -229,6 +235,8 @@ mpi_api_man_pages = \ mpi/man/man3/MPI_Keyval_create.3 \ mpi/man/man3/MPI_Keyval_free.3 \ mpi/man/man3/MPI_Lookup_name.3 \ + mpi/man/man3/MPI_Message_c2f.3 \ + mpi/man/man3/MPI_Message_f2c.3 \ mpi/man/man3/MPI_Mprobe.3 \ mpi/man/man3/MPI_Mrecv.3 \ mpi/man/man3/MPI_Neighbor_allgather.3 \ @@ -242,6 +250,7 @@ mpi_api_man_pages = \ mpi/man/man3/MPI_Neighbor_alltoallw.3 \ mpi/man/man3/MPI_Ineighbor_alltoallw.3 \ mpi/man/man3/MPI_Op_c2f.3 \ + mpi/man/man3/MPI_Op_commutative.3 \ mpi/man/man3/MPI_Op_create.3 \ mpi/man/man3/MPI_Open_port.3 \ mpi/man/man3/MPI_Op_f2c.3 \ @@ -377,13 +386,18 @@ mpi_api_man_pages = \ mpi/man/man3/MPI_Waitall.3 \ mpi/man/man3/MPI_Waitany.3 \ mpi/man/man3/MPI_Waitsome.3 \ + mpi/man/man3/MPI_Win_allocate.3 \ + mpi/man/man3/MPI_Win_allocate_shared.3 \ + mpi/man/man3/MPI_Win_attach.3 \ mpi/man/man3/MPI_Win_c2f.3 \ mpi/man/man3/MPI_Win_call_errhandler.3 \ mpi/man/man3/MPI_Win_complete.3 \ mpi/man/man3/MPI_Win_create.3 \ + mpi/man/man3/MPI_Win_create_dynamic.3 \ mpi/man/man3/MPI_Win_create_errhandler.3 \ mpi/man/man3/MPI_Win_create_keyval.3 \ mpi/man/man3/MPI_Win_delete_attr.3 \ + mpi/man/man3/MPI_Win_detach.3 \ mpi/man/man3/MPI_Win_f2c.3 \ mpi/man/man3/MPI_Win_fence.3 \ mpi/man/man3/MPI_Win_flush.3 \ @@ -395,13 +409,16 @@ mpi_api_man_pages = \ mpi/man/man3/MPI_Win_get_attr.3 \ mpi/man/man3/MPI_Win_get_errhandler.3 \ mpi/man/man3/MPI_Win_get_group.3 \ + mpi/man/man3/MPI_Win_get_info.3 \ mpi/man/man3/MPI_Win_get_name.3 \ mpi/man/man3/MPI_Win_lock.3 \ mpi/man/man3/MPI_Win_lock_all.3 \ mpi/man/man3/MPI_Win_post.3 \ mpi/man/man3/MPI_Win_set_attr.3 \ mpi/man/man3/MPI_Win_set_errhandler.3 \ + mpi/man/man3/MPI_Win_set_info.3 \ mpi/man/man3/MPI_Win_set_name.3 \ + mpi/man/man3/MPI_Win_shared_query.3 \ mpi/man/man3/MPI_Win_start.3 \ mpi/man/man3/MPI_Win_sync.3 \ mpi/man/man3/MPI_Win_test.3 \ diff --git a/ompi/mpi/tool/Makefile.am b/ompi/mpi/tool/Makefile.am index 4f6741d7115..c749cb3eb5a 100644 --- a/ompi/mpi/tool/Makefile.am +++ b/ompi/mpi/tool/Makefile.am @@ -2,14 +2,14 @@ # Copyright (c) 2004-2006 The University of Tennessee and The University # of Tennessee Research Foundation. All rights # reserved. -# Copyright (c) 2004-2006 High Performance Computing Center Stuttgart, +# Copyright (c) 2004-2006 High Performance Computing Center Stuttgart, # University of Stuttgart. All rights reserved. # Copyright (c) 2012-2014 Los Alamos National Security, LLC. All rights # reserved. # $COPYRIGHT$ -# +# # Additional copyrights may follow -# +# # $HEADER$ # diff --git a/ompi/mpi/tool/category_get_index.c b/ompi/mpi/tool/category_get_index.c index 3fab242d71e..6edb6f2af4d 100644 --- a/ompi/mpi/tool/category_get_index.c +++ b/ompi/mpi/tool/category_get_index.c @@ -36,6 +36,9 @@ int MPI_T_category_get_index (const char *name, int *category_index) mpit_lock (); ret = mca_base_var_group_find_by_name (name, category_index); mpit_unlock (); + if (OPAL_SUCCESS != ret) { + return MPI_T_ERR_INVALID_NAME; + } - return ompit_opal_to_mpit_error (ret); + return MPI_SUCCESS; } diff --git a/ompi/mpi/tool/cvar_get_index.c b/ompi/mpi/tool/cvar_get_index.c index 7ac919b0ac2..e587adf7f34 100644 --- a/ompi/mpi/tool/cvar_get_index.c +++ b/ompi/mpi/tool/cvar_get_index.c @@ -36,6 +36,9 @@ int MPI_T_cvar_get_index (const char *name, int *cvar_index) mpit_lock (); ret = mca_base_var_find_by_name (name, cvar_index); mpit_unlock (); + if (OPAL_SUCCESS != ret) { + return MPI_T_ERR_INVALID_NAME; + } - return ompit_opal_to_mpit_error (ret); + return MPI_SUCCESS; } diff --git a/ompi/mpi/tool/cvar_get_num.c b/ompi/mpi/tool/cvar_get_num.c index 2e3f0bace03..7ece8df6d84 100644 --- a/ompi/mpi/tool/cvar_get_num.c +++ b/ompi/mpi/tool/cvar_get_num.c @@ -33,6 +33,6 @@ int MPI_T_cvar_get_num (int *num_cvar) { mpit_lock (); *num_cvar = mca_base_var_get_count(); mpit_unlock (); - + return MPI_SUCCESS; } diff --git a/ompi/mpi/tool/cvar_read.c b/ompi/mpi/tool/cvar_read.c index 2b7e839fd51..843ca493f82 100644 --- a/ompi/mpi/tool/cvar_read.c +++ b/ompi/mpi/tool/cvar_read.c @@ -56,7 +56,7 @@ int MPI_T_cvar_read (MPI_T_cvar_handle handle, void *buf) ((unsigned long long *) buf)[0] = value->ullval; break; case MCA_BASE_VAR_TYPE_SIZE_T: - ((int *) buf)[0] = value->sizetval; + ((size_t *) buf)[0] = value->sizetval; break; case MCA_BASE_VAR_TYPE_BOOL: ((int *) buf)[0] = value->boolval; diff --git a/ompi/mpi/tool/enum_get_item.c b/ompi/mpi/tool/enum_get_item.c index 40c52aaefec..f86f3abecd4 100644 --- a/ompi/mpi/tool/enum_get_item.c +++ b/ompi/mpi/tool/enum_get_item.c @@ -46,7 +46,7 @@ int MPI_T_enum_get_item(MPI_T_enum enumtype, int index, int *value, char *name, } rc = enumtype->get_value(enumtype, index, value, &tmp); - if (OPAL_SUCCESS != rc) { + if (OPAL_SUCCESS != rc) { rc = MPI_ERR_OTHER; break; } diff --git a/ompi/mpi/tool/init_thread.c b/ompi/mpi/tool/init_thread.c index 478d3e9b162..8f0fb6b3c62 100644 --- a/ompi/mpi/tool/init_thread.c +++ b/ompi/mpi/tool/init_thread.c @@ -32,18 +32,8 @@ extern volatile int32_t initted; int MPI_T_init_thread (int required, int *provided) { - static volatile int32_t first_init = 1; int rc = MPI_SUCCESS; - if (opal_atomic_cmpset (&first_init, 1, 0) == 1) { - OBJ_CONSTRUCT(&mpit_big_lock, opal_mutex_t); - initted = 1; - } - - while (!initted) { - usleep (10); - } - mpit_lock (); do { diff --git a/ompi/mpi/tool/mpit_common.c b/ompi/mpi/tool/mpit_common.c index 785b8c0d831..22b54db180d 100644 --- a/ompi/mpi/tool/mpit_common.c +++ b/ompi/mpi/tool/mpit_common.c @@ -13,27 +13,26 @@ #include "ompi/mpi/tool/mpit-internal.h" -opal_mutex_t mpit_big_lock = {{0}}; +opal_mutex_t mpit_big_lock = OPAL_MUTEX_STATIC_INIT; volatile uint32_t mpit_init_count = 0; -volatile int32_t initted = 0; void mpit_lock (void) { - if (initted) { - opal_mutex_lock (&mpit_big_lock); - } + opal_mutex_lock (&mpit_big_lock); } void mpit_unlock (void) { - if (initted) { - opal_mutex_unlock (&mpit_big_lock); - } + opal_mutex_unlock (&mpit_big_lock); } int ompit_var_type_to_datatype (mca_base_var_type_t type, MPI_Datatype *datatype) { + if (!datatype) { + return OMPI_SUCCESS; + } + switch (type) { case MCA_BASE_VAR_TYPE_INT: *datatype = MPI_INT; @@ -61,6 +60,7 @@ int ompit_var_type_to_datatype (mca_base_var_type_t type, MPI_Datatype *datatype break; case MCA_BASE_VAR_TYPE_STRING: + case MCA_BASE_VAR_TYPE_VERSION_STRING: *datatype = MPI_CHAR; break; case MCA_BASE_VAR_TYPE_BOOL: diff --git a/ompi/mpi/tool/profile/Makefile.am b/ompi/mpi/tool/profile/Makefile.am index ef8eea80faf..d530d1c450a 100644 --- a/ompi/mpi/tool/profile/Makefile.am +++ b/ompi/mpi/tool/profile/Makefile.am @@ -6,19 +6,21 @@ # Copyright (c) 2004-2005 The University of Tennessee and The University # of Tennessee Research Foundation. All rights # reserved. -# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, # University of Stuttgart. All rights reserved. # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. # Copyright (c) 2009-2014 Cisco Systems, Inc. All rights reserved. # Copyright (c) 2011 Sandia National Laboratories. All rights reserved. -# Copyright (c) 2012 Oak Rigde National Laboratory. All rights reserved. +# Copyright (c) 2012 Oak Rigde National Laboratory. All rights reserved. # Copyright (c) 2013 Los Alamos National Security, LLC. All rights +# Copyright (c) 2015 Research Organization for Information Science +# and Technology (RIST). All rights reserved. # reserved. # $COPYRIGHT$ -# +# # Additional copyrights may follow -# +# # $HEADER$ # @@ -26,7 +28,7 @@ include $(top_srcdir)/Makefile.ompi-rules # # OMPI_PROFILING_DEFINES flag s enabled when we want our MPI_* symbols -# to be replaced by PMPI_*. In other words, this flag decides +# to be replaced by PMPI_*. In other words, this flag decides # whether "profile/defines.h" is included or not. "profile/defines.h" # replaces all MPI_* symbols with PMPI_* symbols. In this directory # we definately need it to be 1. @@ -38,10 +40,7 @@ AM_CPPFLAGS = -DOMPI_PROFILING_DEFINES=1 # Further, this build HAS to go through if profiling is required. # -noinst_LTLIBRARIES = -if BUILD_PMPI_BINDINGS_LAYER -noinst_LTLIBRARIES += libmpi_pmpit.la -endif +noinst_LTLIBRARIES = libmpi_pmpit.la headers = defines.h diff --git a/ompi/mpi/tool/profile/defines.h b/ompi/mpi/tool/profile/defines.h index ae6dd4d7f4a..44ef3a56eca 100644 --- a/ompi/mpi/tool/profile/defines.h +++ b/ompi/mpi/tool/profile/defines.h @@ -5,7 +5,7 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. @@ -15,18 +15,18 @@ * Copyright (c) 2013 Los Alamos National Security, LLC. All rights * reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ #ifndef OMPIT_PROFILE_DEFINES_H #define OMPIT_PROFILE_DEFINES_H /* - * This file is included in the top directory only if + * This file is included in the top directory only if * profiling is required. Once profiling is required, - * this file will replace all MPI_* symbols with + * this file will replace all MPI_* symbols with * PMPI_* symbols */ #define MPI_T_category_changed PMPI_T_category_changed diff --git a/ompi/mpi/tool/pvar_get_index.c b/ompi/mpi/tool/pvar_get_index.c index 03d15da91a3..88e71c5b4fe 100644 --- a/ompi/mpi/tool/pvar_get_index.c +++ b/ompi/mpi/tool/pvar_get_index.c @@ -1,6 +1,6 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ /* - * Copyright (c) 2012-2014 Los Alamos National Security, LLC. All rights + * Copyright (c) 2012-2015 Los Alamos National Security, LLC. All rights * reserved. * Copyright (c) 2014 Cisco Systems, Inc. All rights reserved. * $COPYRIGHT$ @@ -21,7 +21,7 @@ #endif -int MPI_T_pvar_get_index (const char *name, int *pvar_index) +int MPI_T_pvar_get_index (const char *name, int var_class, int *pvar_index) { int ret; @@ -34,8 +34,11 @@ int MPI_T_pvar_get_index (const char *name, int *pvar_index) } mpit_lock (); - ret = mca_base_pvar_find_by_name (name, pvar_index); + ret = mca_base_pvar_find_by_name (name, var_class, pvar_index); mpit_unlock (); + if (OPAL_SUCCESS != ret) { + return MPI_T_ERR_INVALID_NAME; + } - return ompit_opal_to_mpit_error (ret); + return MPI_SUCCESS; } diff --git a/ompi/mpi/tool/pvar_session_free.c b/ompi/mpi/tool/pvar_session_free.c index 41aa3bef6b4..98ec4a98a94 100644 --- a/ompi/mpi/tool/pvar_session_free.c +++ b/ompi/mpi/tool/pvar_session_free.c @@ -23,14 +23,19 @@ int MPI_T_pvar_session_free(MPI_T_pvar_session *session) { + int ret = MPI_SUCCESS; + if (!mpit_is_initialized ()) { return MPI_T_ERR_NOT_INITIALIZED; } - if (NULL != *session) { + /* Check that this is a valid session */ + if (MPI_T_PVAR_SESSION_NULL == *session) { + ret = MPI_T_ERR_INVALID_SESSION; + } else { OBJ_RELEASE(*session); - *session = NULL; + *session = MPI_T_PVAR_SESSION_NULL; } - return MPI_SUCCESS; + return ret; } diff --git a/ompi/mpiext/Makefile.am b/ompi/mpiext/Makefile.am index b19c2917bca..bbbdec3531c 100644 --- a/ompi/mpiext/Makefile.am +++ b/ompi/mpiext/Makefile.am @@ -2,10 +2,11 @@ # Copyright (c) 2004-2009 The Trustees of Indiana University and Indiana # University Research and Technology # Corporation. All rights reserved. +# Copyright (c) 2016 IBM Corporation. All rights reserved. # $COPYRIGHT$ -# +# # Additional copyrights may follow -# +# # $HEADER$ # @@ -15,5 +16,5 @@ headers += \ mpiext/mpiext.h -libmpi_la_SOURCES += \ +lib@OMPI_LIBMPI_NAME@_la_SOURCES += \ mpiext/mpiext.c diff --git a/ompi/mpiext/affinity/Makefile.am b/ompi/mpiext/affinity/Makefile.am index 8fa81d74862..de819bd32cf 100644 --- a/ompi/mpiext/affinity/Makefile.am +++ b/ompi/mpiext/affinity/Makefile.am @@ -4,9 +4,9 @@ # Corporation. All rights reserved. # Copyright (c) 2010-2012 Cisco Systems, Inc. All rights reserved. # $COPYRIGHT$ -# +# # Additional copyrights may follow -# +# # $HEADER$ # diff --git a/ompi/mpiext/affinity/c/Makefile.am b/ompi/mpiext/affinity/c/Makefile.am index c0471d46dd7..46573f7a461 100644 --- a/ompi/mpiext/affinity/c/Makefile.am +++ b/ompi/mpiext/affinity/c/Makefile.am @@ -4,9 +4,9 @@ # Corporation. All rights reserved. # Copyright (c) 2010-2014 Cisco Systems, Inc. All rights reserved. # $COPYRIGHT$ -# +# # Additional copyrights may follow -# +# # $HEADER$ # @@ -48,3 +48,6 @@ nodist_man_MANS = OMPI_Affinity_str.3 # Man page sources EXTRA_DIST = $(nodist_man_MANS:.3=.3in) example.c + +distclean-local: + rm -f $(nodist_man_MANS) diff --git a/ompi/mpiext/affinity/c/OMPI_Affinity_str.3in b/ompi/mpiext/affinity/c/OMPI_Affinity_str.3in index a5cbc748dd0..1c4431729b6 100644 --- a/ompi/mpiext/affinity/c/OMPI_Affinity_str.3in +++ b/ompi/mpiext/affinity/c/OMPI_Affinity_str.3in @@ -33,7 +33,7 @@ string as human-readable resource names, such as "socket 0, core 0". OMPI_AFFINITY_LAYOUT_FMT returns ASCII art representing where this MPI process is bound relative to the machine resource layout. For example -"[. B][. .]" shows the process that called the routine is bound to +"[. B][. .]" shows the process that called the routine is bound to socket 0, core 1 in a system with 2 sockets, each containing 2 cores. See below for more output examples. @@ -66,7 +66,7 @@ prettyprint information about three things: . .TP Where Open MPI bound this process. -The string returned in +The string returned in .B ompi_bound will either indicate that Open MPI did not bind this process to @@ -110,7 +110,7 @@ all processors in the system). MPI_Init(&argc, &argv); MPI_Comm_rank(MPI_COMM_WORLD, &rank); - OMPI_Affinity_str(OMPI_AFFINITY_RSRC_STRING_FMT, + OMPI_Affinity_str(OMPI_AFFINITY_RSRC_STRING_FMT, ompi_bound, current_binding, exists); printf("rank %d: \\n" " ompi_bound: %s\\n" @@ -122,24 +122,24 @@ all processors in the system). .PP Output of mpirun -np 2 -bind-to-core a.out: .nf -rank 0: - ompi_bound: socket 0[core 0] - current_binding: socket 0[core 0] +rank 0: + ompi_bound: socket 0[core 0] + current_binding: socket 0[core 0] exists: socket 0 has 4 cores -rank 1: - ompi_bound: socket 0[core 1] - current_binding: socket 0[core 1] +rank 1: + ompi_bound: socket 0[core 1] + current_binding: socket 0[core 1] exists: socket 0 has 4 cores .fi .PP Output of mpirun -np 2 -bind-to-socket a.out: .nf -rank 0: - ompi_bound: socket 0[core 0-3] +rank 0: + ompi_bound: socket 0[core 0-3] current_binding: Not bound (or bound to all available processors) exists: socket 0 has 4 cores -rank 1: - ompi_bound: socket 0[core 0-3] +rank 1: + ompi_bound: socket 0[core 0-3] current_binding: Not bound (or bound to all available processors) exists: socket 0 has 4 cores .fi @@ -156,7 +156,7 @@ rank 1: MPI_Init(&argc, &argv); MPI_Comm_rank(MPI_COMM_WORLD, &rank); - OMPI_Affinity_str(OMPI_AFFINITY_LAYOUT_FMT, + OMPI_Affinity_str(OMPI_AFFINITY_LAYOUT_FMT, ompi_bound, current_binding, exists); printf("rank %d: \\n" " ompi_bound: %s\\n" @@ -168,11 +168,11 @@ rank 1: .PP Output of mpirun -np 2 -bind-to-core a.out: .nf -rank 0: +rank 0: ompi_bound: [B . . .] current_binding: [B . . .] exists: [. . . .] -rank 1: +rank 1: ompi_bound: [. B . .] current_binding: [. B . .] exists: [. . . .] @@ -180,11 +180,11 @@ rank 1: .PP Output of mpirun -np 2 -bind-to-socket a.out: .nf -rank 0: +rank 0: ompi_bound: [B B B B] current_binding: [B B B B] exists: [. . . .] -rank 1: +rank 1: ompi_bound: [B B B B] current_binding: [B B B B] exists: [. . . .] diff --git a/ompi/mpiext/affinity/c/example.c b/ompi/mpiext/affinity/c/example.c index ace36443ea2..b72590034f4 100644 --- a/ompi/mpiext/affinity/c/example.c +++ b/ompi/mpiext/affinity/c/example.c @@ -8,10 +8,10 @@ int main(int argc, char* argv[]) char ompi_bound[OMPI_AFFINITY_STRING_MAX]; char current_binding[OMPI_AFFINITY_STRING_MAX]; char exists[OMPI_AFFINITY_STRING_MAX]; - + MPI_Init(NULL, NULL); MPI_Comm_rank(MPI_COMM_WORLD, &rank); - + OMPI_Affinity_str(OMPI_AFFINITY_RSRC_STRING_FMT, ompi_bound, current_binding, exists); printf("rank %d (resource string): \n" diff --git a/ompi/mpiext/affinity/c/mpiext_affinity_c.h b/ompi/mpiext/affinity/c/mpiext_affinity_c.h index 7b2a0167978..bf94f283c43 100644 --- a/ompi/mpiext/affinity/c/mpiext_affinity_c.h +++ b/ompi/mpiext/affinity/c/mpiext_affinity_c.h @@ -4,9 +4,9 @@ * Copyright (c) 2010-2012 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2010 Oracle and/or its affiliates. All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ * */ diff --git a/ompi/mpiext/affinity/c/mpiext_affinity_str.c b/ompi/mpiext/affinity/c/mpiext_affinity_str.c index de59be38b72..75bf194e165 100644 --- a/ompi/mpiext/affinity/c/mpiext_affinity_str.c +++ b/ompi/mpiext/affinity/c/mpiext_affinity_str.c @@ -2,14 +2,18 @@ * Copyright (c) 2004-2009 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. - * Copyright (c) 2010-2012 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2010-2015 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2010 Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2012 Los Alamos National Security, LLC. All rights - * reserved. + * reserved. + * reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. + * Copyright (c) 2015 Intel, Inc. All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ * * Simple routine to expose three things to the MPI process: @@ -17,7 +21,7 @@ * 1. What processor(s) Open MPI bound this process to * 2. What processor(s) this process is bound to * 3. What processor(s) exist on this host - * + * * Note that 1 and 2 may be different! */ @@ -38,6 +42,7 @@ static const char FUNC_NAME[] = "OMPI_Affinity"; static const char ompi_nobind_str[] = "Open MPI did not bind this process"; static const char not_bound_str[] = "Not bound (i.e., bound to all processors)"; + static int get_rsrc_ompi_bound(char str[OMPI_AFFINITY_STRING_MAX]); static int get_rsrc_current_binding(char str[OMPI_AFFINITY_STRING_MAX]); static int get_rsrc_exists(char str[OMPI_AFFINITY_STRING_MAX]); @@ -45,10 +50,9 @@ static int get_layout_ompi_bound(char str[OMPI_AFFINITY_STRING_MAX]); static int get_layout_current_binding(char str[OMPI_AFFINITY_STRING_MAX]); static int get_layout_exists(char str[OMPI_AFFINITY_STRING_MAX]); -/*---------------------------------------------------------------------------*/ int OMPI_Affinity_str(ompi_affinity_fmt_t fmt_type, - char ompi_bound[OMPI_AFFINITY_STRING_MAX], + char ompi_bound[OMPI_AFFINITY_STRING_MAX], char current_binding[OMPI_AFFINITY_STRING_MAX], char exists[OMPI_AFFINITY_STRING_MAX]) { @@ -59,30 +63,27 @@ int OMPI_Affinity_str(ompi_affinity_fmt_t fmt_type, /* If we have no hwloc support, return nothing */ if (NULL == opal_hwloc_topology) { - strncpy(ompi_bound, "Not supported", OMPI_AFFINITY_STRING_MAX); - strncpy(current_binding, "Not supported", OMPI_AFFINITY_STRING_MAX); - strncpy(exists, "Not supported", OMPI_AFFINITY_STRING_MAX); return MPI_SUCCESS; } /* Otherwise, return useful information */ switch (fmt_type) { case OMPI_AFFINITY_RSRC_STRING_FMT: - if (OMPI_SUCCESS != (ret = get_rsrc_ompi_bound(ompi_bound)) || - OMPI_SUCCESS != (ret = get_rsrc_current_binding(current_binding)) || - OMPI_SUCCESS != (ret = get_rsrc_exists(exists))) { - return OMPI_ERRHANDLER_INVOKE(MPI_COMM_WORLD, ret, FUNC_NAME); - } - break; + if (OMPI_SUCCESS != (ret = get_rsrc_ompi_bound(ompi_bound)) || + OMPI_SUCCESS != (ret = get_rsrc_current_binding(current_binding)) || + OMPI_SUCCESS != (ret = get_rsrc_exists(exists))) { + return OMPI_ERRHANDLER_INVOKE(MPI_COMM_WORLD, ret, FUNC_NAME); + } + break; case OMPI_AFFINITY_LAYOUT_FMT: - if (OMPI_SUCCESS != (ret = get_layout_ompi_bound(ompi_bound)) || - OMPI_SUCCESS != (ret = get_layout_current_binding(current_binding)) || - OMPI_SUCCESS != (ret = get_layout_exists(exists))) { - return OMPI_ERRHANDLER_INVOKE(MPI_COMM_WORLD, ret, FUNC_NAME); - } - break; + if (OMPI_SUCCESS != (ret = get_layout_ompi_bound(ompi_bound)) || + OMPI_SUCCESS != (ret = get_layout_current_binding(current_binding)) || + OMPI_SUCCESS != (ret = get_layout_exists(exists))) { + return OMPI_ERRHANDLER_INVOKE(MPI_COMM_WORLD, ret, FUNC_NAME); + } + break; default: - return OMPI_ERRHANDLER_INVOKE(MPI_COMM_WORLD, MPI_ERR_ARG, FUNC_NAME); + return OMPI_ERRHANDLER_INVOKE(MPI_COMM_WORLD, MPI_ERR_ARG, FUNC_NAME); } return MPI_SUCCESS; @@ -106,7 +107,7 @@ static int get_rsrc_ompi_bound(char str[OMPI_AFFINITY_STRING_MAX]) if (NULL == orte_proc_applied_binding) { ret = OPAL_ERR_NOT_BOUND; } else { - ret = opal_hwloc_base_cset2str(str, OMPI_AFFINITY_STRING_MAX, + ret = opal_hwloc_base_cset2str(str, OMPI_AFFINITY_STRING_MAX, opal_hwloc_topology, orte_proc_applied_binding); } @@ -134,7 +135,7 @@ static int get_rsrc_current_binding(char str[OMPI_AFFINITY_STRING_MAX]) /* get our bindings */ boundset = hwloc_bitmap_alloc(); - if (hwloc_get_cpubind(opal_hwloc_topology, boundset, + if (hwloc_get_cpubind(opal_hwloc_topology, boundset, HWLOC_CPUBIND_PROCESS) < 0) { /* we are NOT bound if get_cpubind fails, nor can we be bound - the environment does not support it */ @@ -171,7 +172,7 @@ static int get_rsrc_current_binding(char str[OMPI_AFFINITY_STRING_MAX]) } -/* +/* * Prettyprint a list of all available sockets and cores. Note that * this is *everything* -- not just the ones that are available to * this process. @@ -185,7 +186,7 @@ static int get_rsrc_exists(char str[OMPI_AFFINITY_STRING_MAX]) hwloc_obj_t socket, core, c2; str[0] = '\0'; - for (socket = hwloc_get_obj_by_type(opal_hwloc_topology, + for (socket = hwloc_get_obj_by_type(opal_hwloc_topology, HWLOC_OBJ_SOCKET, 0); NULL != socket; socket = socket->next_cousin) { /* If this isn't the first socket, add a delimiter */ @@ -204,17 +205,17 @@ static int get_rsrc_exists(char str[OMPI_AFFINITY_STRING_MAX]) socket->cpuset, HWLOC_OBJ_CORE); core = hwloc_get_obj_inside_cpuset_by_type(opal_hwloc_topology, - socket->cpuset, + socket->cpuset, HWLOC_OBJ_CORE, 0); if (NULL != core) { - num_pus = + num_pus = hwloc_get_nbobjs_inside_cpuset_by_type(opal_hwloc_topology, core->cpuset, HWLOC_OBJ_PU); - + /* Only 1 core */ if (1 == num_cores) { - strncat(str, "1 core with ", + strncat(str, "1 core with ", OMPI_AFFINITY_STRING_MAX - strlen(str)); if (1 == num_pus) { strncat(str, "1 hwt", @@ -223,26 +224,26 @@ static int get_rsrc_exists(char str[OMPI_AFFINITY_STRING_MAX]) snprintf(tmp, stmp, "%d hwts", num_pus); strncat(str, tmp, OMPI_AFFINITY_STRING_MAX - strlen(str)); } - } - + } + /* Multiple cores */ else { bool same = true; - + snprintf(tmp, stmp, "%d cores", num_cores); strncat(str, tmp, OMPI_AFFINITY_STRING_MAX - strlen(str)); - + /* Do all the cores have the same number of PUs? */ for (c2 = core; NULL != c2; c2 = c2->next_cousin) { if (hwloc_get_nbobjs_inside_cpuset_by_type(opal_hwloc_topology, core->cpuset, - HWLOC_OBJ_PU) != + HWLOC_OBJ_PU) != num_pus) { same = false; break; } } - + /* Yes, they all have the same number of PUs */ if (same) { snprintf(tmp, stmp, ", each with %d hwt", num_pus); @@ -251,26 +252,26 @@ static int get_rsrc_exists(char str[OMPI_AFFINITY_STRING_MAX]) strncat(str, "s", OMPI_AFFINITY_STRING_MAX - strlen(str)); } } - + /* No, they have differing numbers of PUs */ else { bool first = true; - + strncat(str, "with (", OMPI_AFFINITY_STRING_MAX - strlen(str)); for (c2 = core; NULL != c2; c2 = c2->next_cousin) { if (!first) { - strncat(str, ", ", + strncat(str, ", ", OMPI_AFFINITY_STRING_MAX - strlen(str)); } first = false; - + i = hwloc_get_nbobjs_inside_cpuset_by_type(opal_hwloc_topology, core->cpuset, HWLOC_OBJ_PU); snprintf(tmp, stmp, "%d", i); strncat(str, tmp, OMPI_AFFINITY_STRING_MAX - strlen(str)); } - strncat(str, ") hwts", + strncat(str, ") hwts", OMPI_AFFINITY_STRING_MAX - strlen(str)); } } @@ -299,7 +300,7 @@ static int get_layout_ompi_bound(char str[OMPI_AFFINITY_STRING_MAX]) if (NULL == orte_proc_applied_binding) { ret = OPAL_ERR_NOT_BOUND; } else { - ret = opal_hwloc_base_cset2mapstr(str, OMPI_AFFINITY_STRING_MAX, + ret = opal_hwloc_base_cset2mapstr(str, OMPI_AFFINITY_STRING_MAX, opal_hwloc_topology, orte_proc_applied_binding); } @@ -327,7 +328,7 @@ static int get_layout_current_binding(char str[OMPI_AFFINITY_STRING_MAX]) /* get our bindings */ boundset = hwloc_bitmap_alloc(); - if (hwloc_get_cpubind(opal_hwloc_topology, boundset, + if (hwloc_get_cpubind(opal_hwloc_topology, boundset, HWLOC_CPUBIND_PROCESS) < 0) { /* we are NOT bound if get_cpubind fails, nor can we be bound - the environment does not support it */ @@ -370,7 +371,7 @@ static int get_layout_current_binding(char str[OMPI_AFFINITY_STRING_MAX]) * * Example: [../..] * Key: [] - signifies socket - * / - signifies core + * / - signifies core * . - signifies PU */ static int get_layout_exists(char str[OMPI_AFFINITY_STRING_MAX]) @@ -382,20 +383,20 @@ static int get_layout_exists(char str[OMPI_AFFINITY_STRING_MAX]) str[0] = '\0'; /* Iterate over all existing sockets */ - for (socket = hwloc_get_obj_by_type(opal_hwloc_topology, + for (socket = hwloc_get_obj_by_type(opal_hwloc_topology, HWLOC_OBJ_SOCKET, 0); - NULL != socket; + NULL != socket; socket = socket->next_cousin) { strncat(str, "[", len - strlen(str)); /* Iterate over all existing cores in this socket */ core_index = 0; for (core = hwloc_get_obj_inside_cpuset_by_type(opal_hwloc_topology, - socket->cpuset, + socket->cpuset, HWLOC_OBJ_CORE, core_index); - NULL != core; + NULL != core; core = hwloc_get_obj_inside_cpuset_by_type(opal_hwloc_topology, - socket->cpuset, + socket->cpuset, HWLOC_OBJ_CORE, ++core_index)) { if (core_index > 0) { strncat(str, "/", len - strlen(str)); @@ -404,11 +405,11 @@ static int get_layout_exists(char str[OMPI_AFFINITY_STRING_MAX]) /* Iterate over all existing PUs in this core */ pu_index = 0; for (pu = hwloc_get_obj_inside_cpuset_by_type(opal_hwloc_topology, - core->cpuset, + core->cpuset, HWLOC_OBJ_PU, pu_index); - NULL != pu; + NULL != pu; pu = hwloc_get_obj_inside_cpuset_by_type(opal_hwloc_topology, - core->cpuset, + core->cpuset, HWLOC_OBJ_PU, ++pu_index)) { strncat(str, ".", len - strlen(str)); } diff --git a/ompi/mpiext/affinity/configure.m4 b/ompi/mpiext/affinity/configure.m4 index 51925443428..55ca956e414 100644 --- a/ompi/mpiext/affinity/configure.m4 +++ b/ompi/mpiext/affinity/configure.m4 @@ -2,11 +2,11 @@ # # Copyright (c) 2004-2009 The Trustees of Indiana University. # All rights reserved. -# Copyright (c) 2012 Cisco Systems, Inc. All rights reserved. +# Copyright (c) 2012-2015 Cisco Systems, Inc. All rights reserved. # $COPYRIGHT$ -# +# # Additional copyrights may follow -# +# # $HEADER$ # @@ -16,6 +16,10 @@ AC_DEFUN([OMPI_MPIEXT_affinity_CONFIG], [ AC_CONFIG_FILES([ompi/mpiext/affinity/Makefile]) AC_CONFIG_FILES([ompi/mpiext/affinity/c/Makefile]) - # This example can always build, so we just execute $1. - $1 + # This example can always build, so we just execute $1 if it was + # requested. + AS_IF([test "$ENABLE_affinity" = "1" || \ + test "$ENABLE_EXT_ALL" = "1"], + [$1], + [$2]) ]) diff --git a/ompi/mpiext/cr/Makefile.am b/ompi/mpiext/cr/Makefile.am index 97a15dc4ffc..1ab8b748181 100644 --- a/ompi/mpiext/cr/Makefile.am +++ b/ompi/mpiext/cr/Makefile.am @@ -4,9 +4,9 @@ # Corporation. All rights reserved. # Copyright (c) 2010-2012 Cisco Systems, Inc. All rights reserved. # $COPYRIGHT$ -# +# # Additional copyrights may follow -# +# # $HEADER$ # diff --git a/ompi/mpiext/cr/c/Makefile.am b/ompi/mpiext/cr/c/Makefile.am index f2667667f8b..fe54fe557b1 100644 --- a/ompi/mpiext/cr/c/Makefile.am +++ b/ompi/mpiext/cr/c/Makefile.am @@ -4,9 +4,9 @@ # Corporation. All rights reserved. # Copyright (c) 2012 Cisco Systems, Inc. All rights reserved. # $COPYRIGHT$ -# +# # Additional copyrights may follow -# +# # $HEADER$ # diff --git a/ompi/mpiext/cr/c/checkpoint.c b/ompi/mpiext/cr/c/checkpoint.c index a3221b5f9e3..fbd956eb35b 100644 --- a/ompi/mpiext/cr/c/checkpoint.c +++ b/ompi/mpiext/cr/c/checkpoint.c @@ -4,9 +4,9 @@ * Corporation. All rights reserved. * Copyright (c) 2012 Cisco Systems, Inc. All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ #include "ompi_config.h" @@ -62,7 +62,7 @@ int OMPI_CR_Checkpoint(char **handle, int *seq, MPI_Info *info) ret = orte_snapc.request_op(datum); if( OMPI_SUCCESS != ret ) { OBJ_RELEASE(datum); - OMPI_ERRHANDLER_INVOKE(comm, MPI_ERR_OTHER, + OMPI_ERRHANDLER_INVOKE(comm, MPI_ERR_OTHER, FUNC_NAME); } OPAL_CR_EXIT_LIBRARY(); diff --git a/ompi/mpiext/cr/c/inc_register_callback.c b/ompi/mpiext/cr/c/inc_register_callback.c index 9572c8b6f79..37ee7605533 100644 --- a/ompi/mpiext/cr/c/inc_register_callback.c +++ b/ompi/mpiext/cr/c/inc_register_callback.c @@ -4,9 +4,9 @@ * Corporation. All rights reserved. * Copyright (c) 2012 Cisco Systems, Inc. All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ #include "ompi_config.h" @@ -29,7 +29,7 @@ int OMPI_CR_INC_register_callback(OMPI_CR_INC_callback_event_t event, int rc; if ( MPI_PARAM_CHECK ) { - OMPI_ERR_INIT_FINALIZE(FUNC_NAME); + OMPI_ERR_INIT_FINALIZE(FUNC_NAME); } OPAL_CR_ENTER_LIBRARY(); diff --git a/ompi/mpiext/cr/c/migrate.c b/ompi/mpiext/cr/c/migrate.c index b6d7aeb81d5..00c014e1422 100644 --- a/ompi/mpiext/cr/c/migrate.c +++ b/ompi/mpiext/cr/c/migrate.c @@ -4,9 +4,9 @@ * Corporation. All rights reserved. * Copyright (c) 2012 Cisco Systems, Inc. All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ #include "ompi_config.h" @@ -104,7 +104,7 @@ int OMPI_CR_Migrate(MPI_Comm comm, char *hostname, int rank, MPI_Info *info) OPAL_CR_ENTER_LIBRARY(); ret = orte_snapc.request_op(datum); if( OMPI_SUCCESS != ret ) { - OMPI_ERRHANDLER_INVOKE(comm, MPI_ERR_OTHER, + OMPI_ERRHANDLER_INVOKE(comm, MPI_ERR_OTHER, FUNC_NAME); } OPAL_CR_EXIT_LIBRARY(); diff --git a/ompi/mpiext/cr/c/mpiext_cr_c.h b/ompi/mpiext/cr/c/mpiext_cr_c.h index cf1801b6898..d54d6794a61 100644 --- a/ompi/mpiext/cr/c/mpiext_cr_c.h +++ b/ompi/mpiext/cr/c/mpiext_cr_c.h @@ -3,9 +3,9 @@ * All rights reserved. * Copyright (c) 2012 Cisco Systems, Inc. All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ * */ diff --git a/ompi/mpiext/cr/c/quiesce_checkpoint.c b/ompi/mpiext/cr/c/quiesce_checkpoint.c index 5f782d82b43..85ff1f48e89 100644 --- a/ompi/mpiext/cr/c/quiesce_checkpoint.c +++ b/ompi/mpiext/cr/c/quiesce_checkpoint.c @@ -4,9 +4,9 @@ * Corporation. All rights reserved. * Copyright (c) 2012 Cisco Systems, Inc. All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ #include "ompi_config.h" @@ -55,7 +55,7 @@ int OMPI_CR_Quiesce_checkpoint(MPI_Comm commP, char **handle, int *seq, MPI_Info ret = orte_snapc.request_op(datum); /*ret = ompi_crcp_base_quiesce_start(info);*/ if( OMPI_SUCCESS != ret ) { - OMPI_ERRHANDLER_INVOKE(MPI_COMM_WORLD, MPI_ERR_OTHER, + OMPI_ERRHANDLER_INVOKE(MPI_COMM_WORLD, MPI_ERR_OTHER, FUNC_NAME); } OPAL_CR_EXIT_LIBRARY(); diff --git a/ompi/mpiext/cr/c/quiesce_end.c b/ompi/mpiext/cr/c/quiesce_end.c index 32189238eb0..7d716f2f7ab 100644 --- a/ompi/mpiext/cr/c/quiesce_end.c +++ b/ompi/mpiext/cr/c/quiesce_end.c @@ -4,9 +4,9 @@ * Corporation. All rights reserved. * Copyright (c) 2012 Cisco Systems, Inc. All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ #include "ompi_config.h" @@ -55,7 +55,7 @@ int OMPI_CR_Quiesce_end(MPI_Comm commP, MPI_Info *info) ret = orte_snapc.request_op(datum); /*ret = ompi_crcp_base_quiesce_end(info);*/ if( OMPI_SUCCESS != ret ) { - OMPI_ERRHANDLER_INVOKE(comm, MPI_ERR_OTHER, + OMPI_ERRHANDLER_INVOKE(comm, MPI_ERR_OTHER, FUNC_NAME); } OPAL_CR_EXIT_LIBRARY(); diff --git a/ompi/mpiext/cr/c/quiesce_start.c b/ompi/mpiext/cr/c/quiesce_start.c index b81e12a72b9..9b61ebe6d0a 100644 --- a/ompi/mpiext/cr/c/quiesce_start.c +++ b/ompi/mpiext/cr/c/quiesce_start.c @@ -7,9 +7,9 @@ * reserved. * Copyright (c) 2012 Cisco Systems, Inc. All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ #include "ompi_config.h" @@ -64,7 +64,7 @@ int OMPI_CR_Quiesce_start(MPI_Comm commP, MPI_Info *info) /*ret = ompi_crcp_base_quiesce_start(info);*/ if( OMPI_SUCCESS != ret ) { OBJ_RELEASE(datum); - OMPI_ERRHANDLER_INVOKE(comm, MPI_ERR_OTHER, + OMPI_ERRHANDLER_INVOKE(comm, MPI_ERR_OTHER, FUNC_NAME); } diff --git a/ompi/mpiext/cr/c/restart.c b/ompi/mpiext/cr/c/restart.c index a16ebed8b03..38814b08de4 100644 --- a/ompi/mpiext/cr/c/restart.c +++ b/ompi/mpiext/cr/c/restart.c @@ -4,9 +4,9 @@ * Corporation. All rights reserved. * Copyright (c) 2012 Cisco Systems, Inc. All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ #include "ompi_config.h" @@ -53,7 +53,7 @@ int OMPI_CR_Restart(char *handle, int seq, MPI_Info *info) OPAL_CR_ENTER_LIBRARY(); ret = orte_snapc.request_op(datum); if( OMPI_SUCCESS != ret ) { - OMPI_ERRHANDLER_INVOKE(comm, MPI_ERR_OTHER, + OMPI_ERRHANDLER_INVOKE(comm, MPI_ERR_OTHER, FUNC_NAME); } OPAL_CR_EXIT_LIBRARY(); diff --git a/ompi/mpiext/cr/c/self_register_checkpoint.c b/ompi/mpiext/cr/c/self_register_checkpoint.c index 97ab37523b8..a7bdc6330e8 100644 --- a/ompi/mpiext/cr/c/self_register_checkpoint.c +++ b/ompi/mpiext/cr/c/self_register_checkpoint.c @@ -4,9 +4,9 @@ * Corporation. All rights reserved. * Copyright (c) 2012 Cisco Systems, Inc. All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ #include "ompi_config.h" @@ -29,7 +29,7 @@ int OMPI_CR_self_register_checkpoint_callback(OMPI_CR_self_checkpoint_fn functio int rc; if ( MPI_PARAM_CHECK ) { - OMPI_ERR_INIT_FINALIZE(FUNC_NAME); + OMPI_ERR_INIT_FINALIZE(FUNC_NAME); } OPAL_CR_ENTER_LIBRARY(); diff --git a/ompi/mpiext/cr/c/self_register_continue.c b/ompi/mpiext/cr/c/self_register_continue.c index 062a2b3f0a2..166063699df 100644 --- a/ompi/mpiext/cr/c/self_register_continue.c +++ b/ompi/mpiext/cr/c/self_register_continue.c @@ -4,9 +4,9 @@ * Corporation. All rights reserved. * Copyright (c) 2012 Cisco Systems, Inc. All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ #include "ompi_config.h" @@ -29,7 +29,7 @@ int OMPI_CR_self_register_continue_callback(OMPI_CR_self_continue_fn function) int rc; if ( MPI_PARAM_CHECK ) { - OMPI_ERR_INIT_FINALIZE(FUNC_NAME); + OMPI_ERR_INIT_FINALIZE(FUNC_NAME); } OPAL_CR_ENTER_LIBRARY(); diff --git a/ompi/mpiext/cr/c/self_register_restart.c b/ompi/mpiext/cr/c/self_register_restart.c index c197d276f58..6e7675c3f65 100644 --- a/ompi/mpiext/cr/c/self_register_restart.c +++ b/ompi/mpiext/cr/c/self_register_restart.c @@ -4,9 +4,9 @@ * Corporation. All rights reserved. * Copyright (c) 2012 Cisco Systems, Inc. All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ #include "ompi_config.h" @@ -29,7 +29,7 @@ int OMPI_CR_self_register_restart_callback(OMPI_CR_self_restart_fn function) int rc; if ( MPI_PARAM_CHECK ) { - OMPI_ERR_INIT_FINALIZE(FUNC_NAME); + OMPI_ERR_INIT_FINALIZE(FUNC_NAME); } OPAL_CR_ENTER_LIBRARY(); diff --git a/ompi/mpiext/cr/configure.m4 b/ompi/mpiext/cr/configure.m4 index dc228b255da..ca07c5cb944 100644 --- a/ompi/mpiext/cr/configure.m4 +++ b/ompi/mpiext/cr/configure.m4 @@ -2,11 +2,11 @@ # # Copyright (c) 2004-2010 The Trustees of Indiana University. # All rights reserved. -# Copyright (c) 2012 Cisco Systems, Inc. All rights reserved. +# Copyright (c) 2012-2015 Cisco Systems, Inc. All rights reserved. # $COPYRIGHT$ -# +# # Additional copyrights may follow -# +# # $HEADER$ # @@ -16,8 +16,25 @@ AC_DEFUN([OMPI_MPIEXT_cr_CONFIG],[ AC_CONFIG_FILES([ompi/mpiext/cr/Makefile]) AC_CONFIG_FILES([ompi/mpiext/cr/c/Makefile]) - # If we don't want FT, don't compile this component - AS_IF([test "$opal_want_ft_cr" = "1"], - [$1], - [$2]) + OPAL_VAR_SCOPE_PUSH([ompi_mpi_ext_cr_happy]) + + # If we don't want FT, don't compile this extention + AS_IF([test "$ENABLE_cr" = "1" || \ + test "$ENABLE_EXT_ALL" = "1"], + [ompi_mpi_ext_cr_happy=1], + [ompi_mpi_ext_cr_happy=0]) + + AS_IF([test "$ompi_mpi_ext_cr_happy" = "1" && \ + test "$opal_want_ft_cr" = "1"], + [$1], + [ # Error if the user specifically asked for this extension, + # but we can't build it. + AS_IF([test "$ENABLE_cr" = "1"], + [AC_MSG_WARN([Requested "cr" MPI extension, but cannot build it]) + AC_MSG_WARN([because fault tolerance is not enabled.]) + AC_MSG_WARN([Try again with --enable-ft]) + AC_MSG_ERROR([Cannot continue])]) + $2]) + + OPAL_VAR_SCOPE_POP ]) diff --git a/ompi/mpiext/cuda/Makefile.am b/ompi/mpiext/cuda/Makefile.am new file mode 100644 index 00000000000..3d8db46ce94 --- /dev/null +++ b/ompi/mpiext/cuda/Makefile.am @@ -0,0 +1,24 @@ +# +# Copyright (c) 2004-2009 The Trustees of Indiana University and Indiana +# University Research and Technology +# Corporation. All rights reserved. +# Copyright (c) 2010-2012 Cisco Systems, Inc. All rights reserved. +# Copyright (c) 2015 NVIDIA, Inc. All rights reserved +# $COPYRIGHT$ +# +# Additional copyrights may follow +# +# $HEADER$ +# + +# This Makefile is not traversed during a normal "make all" in an OMPI +# build. It *is* traversed during "make dist", however. So you can +# put EXTRA_DIST targets in here. +# +# You can also use this as a convenience for building this MPI +# extension (i.e., "make all" in this directory to invoke "make all" +# in all the subdirectories). + +SUBDIRS = c + +EXTRA_DIST = README.txt diff --git a/ompi/mpiext/cuda/README.txt b/ompi/mpiext/cuda/README.txt new file mode 100644 index 00000000000..cc46fc3ef95 --- /dev/null +++ b/ompi/mpiext/cuda/README.txt @@ -0,0 +1,11 @@ +# Copyright (c) 2015 NVIDIA, Inc. All rights reserved. + +$COPYRIGHT$ + +Rolf vandeVaart + + +This extension provides a macro for compile time check of CUDA aware support. +It also provides a function for runtime check of CUDA aware support. + +See MPIX_Query_cuda_support(3) for more details. diff --git a/ompi/mpiext/cuda/c/MPIX_Query_cuda_support.3in b/ompi/mpiext/cuda/c/MPIX_Query_cuda_support.3in new file mode 100644 index 00000000000..676ec570bc9 --- /dev/null +++ b/ompi/mpiext/cuda/c/MPIX_Query_cuda_support.3in @@ -0,0 +1,34 @@ +.\" Copyright 2007-2010 Oracle and/or its affiliates. All rights reserved. +.\" Copyright (c) 1996 Thinking Machines Corporation +.\" Copyright (c) 2010 Cisco Systems, Inc. All rights reserved. +.\" Copyright (c) 2015 NVIDIA, Inc. All rights reserved. +.TH MPIx_CUDA_SUPPORT 3 "#OMPI_DATE#" "#PACKAGE_VERSION#" "#PACKAGE_NAME#" +.SH NAME +\fBMPIX_Query_cuda_support\fP \- Returns 1 if there is CUDA aware support and 0 if there is not. + +.SH SYNTAX +.ft R +.SH C Syntax +.nf +#include +#include + +int MPIX_Query_cuda_support(void) +.fi +.SH Fortran Syntax +There is no Fortran binding for this function. +. +.SH C++ Syntax +There is no C++ binding for this function. +. +.SH DESCRIPTION +.ft R + +.SH Examples +.ft R + +.SH See Also +.ft R +.nf + +.fi diff --git a/ompi/mpiext/cuda/c/Makefile.am b/ompi/mpiext/cuda/c/Makefile.am new file mode 100644 index 00000000000..41f0ab5fd52 --- /dev/null +++ b/ompi/mpiext/cuda/c/Makefile.am @@ -0,0 +1,48 @@ +# +# Copyright (c) 2004-2009 The Trustees of Indiana University and Indiana +# University Research and Technology +# Corporation. All rights reserved. +# Copyright (c) 2010-2014 Cisco Systems, Inc. All rights reserved. +# Copyright (c) 2015 NVIDIA, Inc. All rights reserved. +# $COPYRIGHT$ +# +# Additional copyrights may follow +# +# $HEADER$ +# + +# This file builds the C bindings for MPI extensions. It must be +# present in all MPI extensions. + +# We must set these #defines so that the inner OMPI MPI prototype +# header files do the Right Thing. +AM_CPPFLAGS = -DOMPI_PROFILE_LAYER=0 -DOMPI_COMPILING_FORTRAN_WRAPPERS=1 + +include $(top_srcdir)/Makefile.ompi-rules + +# Convenience libtool library that will be slurped up into libmpi.la. +noinst_LTLIBRARIES = libmpiext_cuda_c.la + +# This is where the top-level header file (that is included in +# ) must be installed. +ompidir = $(ompiincludedir)/ompi/mpiext/cuda/c + +# This is the header file that is installed. +ompi_HEADERS = mpiext_cuda_c.h + +# Sources for the convenience libtool library. Other than the one +# header file, all source files in the extension have no file naming +# conventions. +libmpiext_cuda_c_la_SOURCES = \ + $(ompi_HEADERS) \ + mpiext_cuda.c +libmpiext_cuda_c_la_LDFLAGS = -module -avoid-version + +# Man page installation +nodist_man_MANS = MPIX_Query_cuda_support.3 + +# Man page sources +EXTRA_DIST = $(nodist_man_MANS:.3=.3in) + +distclean-local: + rm -f $(nodist_man_MANS) diff --git a/ompi/mpiext/cuda/c/mpiext_cuda.c b/ompi/mpiext/cuda/c/mpiext_cuda.c new file mode 100644 index 00000000000..499d1441c39 --- /dev/null +++ b/ompi/mpiext/cuda/c/mpiext_cuda.c @@ -0,0 +1,31 @@ +/* + * Copyright (c) 2004-2009 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2010-2012 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2010 Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2012 Los Alamos National Security, LLC. All rights + * reserved. + * Copyright (c) 2015 NVIDIA, Inc. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + * + */ + +#include "ompi_config.h" + +#include +#include + +#include "opal/constants.h" +#include "ompi/mpiext/cuda/c/mpiext_cuda_c.h" + +/* If CUDA-aware support is configured in, return 1. Otherwise, return 0. + * This API may be extended to return more features in the future. */ +int MPIX_Query_cuda_support(void) +{ + return OPAL_CUDA_SUPPORT; +} diff --git a/ompi/mpiext/cuda/c/mpiext_cuda_c.h.in b/ompi/mpiext/cuda/c/mpiext_cuda_c.h.in new file mode 100644 index 00000000000..0a95eeb3cb3 --- /dev/null +++ b/ompi/mpiext/cuda/c/mpiext_cuda_c.h.in @@ -0,0 +1,16 @@ +/* + * Copyright (c) 2004-2009 The Trustees of Indiana University. + * All rights reserved. + * Copyright (c) 2010-2012 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2010 Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015 NVIDIA, Inc. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + * + */ + +#define MPIX_CUDA_AWARE_SUPPORT @MPIX_CUDA_AWARE_SUPPORT@ +OMPI_DECLSPEC int MPIX_Query_cuda_support(void); diff --git a/ompi/mpiext/cuda/configure.m4 b/ompi/mpiext/cuda/configure.m4 new file mode 100644 index 00000000000..cba2953a3f5 --- /dev/null +++ b/ompi/mpiext/cuda/configure.m4 @@ -0,0 +1,31 @@ +# -*- shell-script -*- +# +# Copyright (c) 2004-2010 The Trustees of Indiana University. +# All rights reserved. +# Copyright (c) 2012-2015 Cisco Systems, Inc. All rights reserved. +# Copyright (c) 2015 Intel, Inc. All rights reserved. +# Copyright (c) 2015 NVIDIA, Inc. All rights reserved. +# $COPYRIGHT$ +# +# Additional copyrights may follow +# +# $HEADER$ +# + +# OMPI_MPIEXT_cuda_CONFIG([action-if-found], [action-if-not-found]) +# ----------------------------------------------------------- +AC_DEFUN([OMPI_MPIEXT_cuda_CONFIG],[ + AC_CONFIG_FILES([ompi/mpiext/cuda/Makefile]) + AC_CONFIG_FILES([ompi/mpiext/cuda/c/Makefile]) + AC_CONFIG_HEADER([ompi/mpiext/cuda/c/mpiext_cuda_c.h]) + + AC_DEFINE_UNQUOTED([MPIX_CUDA_AWARE_SUPPORT],[$CUDA_SUPPORT], + [Macro that is set to 1 when CUDA-aware support is configured in and 0 when it is not]) + + # We compile this whether CUDA support was requested or not. It allows + # us to to detect if we have CUDA support. + AS_IF([test "$ENABLE_cuda" = "1" || \ + test "$ENABLE_EXT_ALL" = "1"], + [$1], + [$2]) +]) diff --git a/ompi/mpiext/example/Makefile.am b/ompi/mpiext/example/Makefile.am index c5e7ca73a63..8b2b03942db 100644 --- a/ompi/mpiext/example/Makefile.am +++ b/ompi/mpiext/example/Makefile.am @@ -1,9 +1,9 @@ # # Copyright (c) 2012 Cisco Systems, Inc. All rights reserved. # $COPYRIGHT$ -# +# # Additional copyrights may follow -# +# # $HEADER$ # diff --git a/ompi/mpiext/example/README.txt b/ompi/mpiext/example/README.txt index bb93902b0ee..13e237df3cc 100644 --- a/ompi/mpiext/example/README.txt +++ b/ompi/mpiext/example/README.txt @@ -3,7 +3,7 @@ Copyright (C) 2012 Cisco Systems, Inc. All rights reserved. $COPYRIGHT$ This example MPI extension shows how to make an MPI extension for Open -MPI. +MPI. An MPI extension provides new top-level APIs in Open MPI that are available to user-level applications (vs. adding new code/APIs that is @@ -74,7 +74,7 @@ Under this top-level directory, the extension *must* have a directory named "c" (for the C bindings) that: - contains a file named mpiext__c.h -- installs mpiext__c.h to +- installs mpiext__c.h to $includedir/openmpi/mpiext//c - builds a Libtool convenience library named libmpiext__c.la @@ -82,7 +82,7 @@ Optionally, the extension may have a director named "mpif-h" (for the Fortran mpif.h bindings) that: - contains a file named mpiext__mpifh.h -- installs mpiext__mpih.h to +- installs mpiext__mpih.h to $includedir/openmpi/mpiext//mpif-h - builds a Libtool convenience library named libmpiext__mpifh.la @@ -133,6 +133,6 @@ file. This are reasons for this strange ordering, but suffice it to say that "make dist" doesn't have the same ordering requiements as "make all", and is therefore easier to have a "normal" Automake-usual top-down -sequential directory traversal. +sequential directory traversal. Enjoy! diff --git a/ompi/mpiext/example/c/Makefile.am b/ompi/mpiext/example/c/Makefile.am index 088ab79877c..7f9e74df6c8 100644 --- a/ompi/mpiext/example/c/Makefile.am +++ b/ompi/mpiext/example/c/Makefile.am @@ -5,9 +5,9 @@ # Copyright (c) 2011 Oak Ridge National Labs. All rights reserved. # Copyright (c) 2011-2012 Cisco Systems, Inc. All rights reserved. # $COPYRIGHT$ -# +# # Additional copyrights may follow -# +# # $HEADER$ # diff --git a/ompi/mpiext/example/c/mpiext_example_c.h b/ompi/mpiext/example/c/mpiext_example_c.h index 8a69fa7341c..abaf87845d1 100644 --- a/ompi/mpiext/example/c/mpiext_example_c.h +++ b/ompi/mpiext/example/c/mpiext_example_c.h @@ -4,9 +4,9 @@ * Copyright (c) 2011 Oak Ridge National Labs. All rights reserved. * Copyright (c) 2012 Cisco Systems, Inc. All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ * */ diff --git a/ompi/mpiext/example/c/mpiext_example_module.c b/ompi/mpiext/example/c/mpiext_example_module.c index a0006b6234d..4ef52a36d7d 100644 --- a/ompi/mpiext/example/c/mpiext_example_module.c +++ b/ompi/mpiext/example/c/mpiext_example_module.c @@ -5,9 +5,9 @@ * Copyright (c) 2011 Oak Ridge National Labs. All rights reserved. * Copyright (c) 2012 Cisco Systems, Inc. All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -25,7 +25,7 @@ #include "ompi/mpiext/mpiext.h" #include "ompi/mpiext/example/c/mpiext_example_c.h" -/* +/* * The init/fini functions and the component struct are not required, * but optional. If an extension would like to have init/fini, in * addition to providing the hooks below, adding the line in diff --git a/ompi/mpiext/example/c/mpiext_example_progress.c b/ompi/mpiext/example/c/mpiext_example_progress.c index 00cbaca7ffe..23680472db9 100644 --- a/ompi/mpiext/example/c/mpiext_example_progress.c +++ b/ompi/mpiext/example/c/mpiext_example_progress.c @@ -4,10 +4,12 @@ * Corporation. All rights reserved. * Copyright (c) 2011 Oak Ridge National Labs. All rights reserved. * Copyright (c) 2012 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -36,7 +38,7 @@ int OMPI_Example_global = 42; * Just to make the extension "interesting", we pass in an integer and * an MPI handle. */ -int OMPI_Progress(int count, MPI_Comm comm) +int OMPI_Progress(int count, MPI_Comm comm) { char name[MPI_MAX_OBJECT_NAME]; int len; @@ -44,11 +46,7 @@ int OMPI_Progress(int count, MPI_Comm comm) /* Just as an example, get the name of the communicator and print it out. Use the PMPI name when possible so that these invocations don't show up in profiling tools. */ -#if OMPI_ENABLE_MPI_PROFILING PMPI_Comm_get_name(comm, name, &len); -#else - MPI_Comm_get_name(comm, name, &len); -#endif printf("Count = %d, comm = %s\n", count, name); diff --git a/ompi/mpiext/example/c/progress.c b/ompi/mpiext/example/c/progress.c index 5d38c672941..60c47028cbf 100644 --- a/ompi/mpiext/example/c/progress.c +++ b/ompi/mpiext/example/c/progress.c @@ -4,9 +4,9 @@ * Corporation. All rights reserved. * Copyright (c) 2011 Oak Ridge National Labs. All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ #include "ompi_config.h" @@ -18,7 +18,7 @@ static const char FUNC_NAME[] = "OMPI_Progress"; -/* +/* * The init/fini functions and the component struct are not required, * but optional. If an extension would like to have init/fini, in * addition to providing the hooks below, adding the line in @@ -45,7 +45,7 @@ ompi_mpiext_component_t ompi_mpiext_example = { }; -int OMPI_Progress(int count) +int OMPI_Progress(int count) { printf("Count = %d!\n", count); diff --git a/ompi/mpiext/example/configure.m4 b/ompi/mpiext/example/configure.m4 index 1ae1e9f67c5..096a01a849f 100644 --- a/ompi/mpiext/example/configure.m4 +++ b/ompi/mpiext/example/configure.m4 @@ -2,11 +2,11 @@ # # Copyright (c) 2004-2009 The Trustees of Indiana University. # All rights reserved. -# Copyright (c) 2012 Cisco Systems, Inc. All rights reserved. +# Copyright (c) 2012-2015 Cisco Systems, Inc. All rights reserved. # $COPYRIGHT$ -# +# # Additional copyrights may follow -# +# # $HEADER$ # @@ -20,8 +20,14 @@ AC_DEFUN([OMPI_MPIEXT_example_CONFIG],[ AC_CONFIG_FILES([ompi/mpiext/example/use-mpi/Makefile]) AC_CONFIG_FILES([ompi/mpiext/example/use-mpi-f08/Makefile]) - # This example can always build, so we just execute $1. - $1 + # If your extension can build, run $1. Otherwise, run $2. For + # the purposes of this example, we don't want it to build in most + # cases. So only build if someone specifies an --enable-mpi-ext + # value that contains the token "example". + AS_IF([test "$ENABLE_example" = "1" || \ + test "$ENABLE_EXT_ALL" = "1"], + [$1], + [$2]) ]) # only need to set this if the component needs init/finalize hooks diff --git a/ompi/mpiext/example/mpif-h/Makefile.am b/ompi/mpiext/example/mpif-h/Makefile.am index 004bae04071..fdd1c2a257a 100644 --- a/ompi/mpiext/example/mpif-h/Makefile.am +++ b/ompi/mpiext/example/mpif-h/Makefile.am @@ -5,9 +5,9 @@ # Copyright (c) 2011 Oak Ridge National Labs. All rights reserved. # Copyright (c) 2011-2012 Cisco Systems, Inc. All rights reserved. # $COPYRIGHT$ -# +# # Additional copyrights may follow -# +# # $HEADER$ # diff --git a/ompi/mpiext/example/mpif-h/mpiext_example_mpifh.h b/ompi/mpiext/example/mpif-h/mpiext_example_mpifh.h index fb8898d9bf1..7785d351728 100644 --- a/ompi/mpiext/example/mpif-h/mpiext_example_mpifh.h +++ b/ompi/mpiext/example/mpif-h/mpiext_example_mpifh.h @@ -3,9 +3,9 @@ ! Copyright (c) 2010-2011 Oak Ridge National Labs. All rights reserved. ! Copyright (c) 2012 Cisco Systems, Inc. All rights reserved. ! $COPYRIGHT$ -! +! ! Additional copyrights may follow -! +! ! $HEADER$ ! diff --git a/ompi/mpiext/example/mpif-h/mpiext_example_progress_f.c b/ompi/mpiext/example/mpif-h/mpiext_example_progress_f.c index f102db653ca..395e9932b97 100644 --- a/ompi/mpiext/example/mpif-h/mpiext_example_progress_f.c +++ b/ompi/mpiext/example/mpif-h/mpiext_example_progress_f.c @@ -2,9 +2,9 @@ * Copyright (c) 2011 Oak Ridge National Labs. All rights reserved. * Copyright (c) 2012 Cisco Systems, Inc. All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ diff --git a/ompi/mpiext/example/tests/Makefile b/ompi/mpiext/example/tests/Makefile index 9b1cfcb8351..ec66818e21e 100644 --- a/ompi/mpiext/example/tests/Makefile +++ b/ompi/mpiext/example/tests/Makefile @@ -1,9 +1,9 @@ # # Copyright (c) 2011-2012 Cisco Systems, Inc. All rights reserved. # $COPYRIGHT$ -# +# # Additional copyrights may follow -# +# # $HEADER$ # diff --git a/ompi/mpiext/example/tests/Makefile.include b/ompi/mpiext/example/tests/Makefile.include index 06bb7769763..62d5ec4be5e 100644 --- a/ompi/mpiext/example/tests/Makefile.include +++ b/ompi/mpiext/example/tests/Makefile.include @@ -6,7 +6,7 @@ # Copyright (c) 2004-2005 The University of Tennessee and The University # of Tennessee Research Foundation. All rights # reserved. -# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, # University of Stuttgart. All rights reserved. # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. @@ -14,9 +14,9 @@ # Copyright (c) 2007 Sun Microsystems, Inc. All rights reserved. # Copyright (c) 2012 Los Alamos National Security, Inc. All rights reserved. # $COPYRIGHT$ -# +# # Additional copyrights may follow -# +# # $HEADER$ # diff --git a/ompi/mpiext/example/tests/progress_mpifh.F90 b/ompi/mpiext/example/tests/progress_mpifh.F90 index 54f436d6464..5da795f8a0f 100644 --- a/ompi/mpiext/example/tests/progress_mpifh.F90 +++ b/ompi/mpiext/example/tests/progress_mpifh.F90 @@ -9,16 +9,16 @@ program main implicit none include 'mpif.h' include 'mpif-ext.h' - + integer ierr, rank, size - + call MPI_INIT(ierr) call MPI_COMM_RANK(MPI_COMM_WORLD, rank, ierr) call MPI_COMM_SIZE(MPI_COMM_WORLD, size, ierr) - + write(*, '("Hello, world, I am ", i2, " of ", i2)') rank, size call OMPI_PROGRESS(3, MPI_COMM_WORLD, ierr) - + call MPI_FINALIZE(ierr) end program - + diff --git a/ompi/mpiext/example/tests/progress_usempi.F90 b/ompi/mpiext/example/tests/progress_usempi.F90 index d39a2767503..64da098fcf0 100644 --- a/ompi/mpiext/example/tests/progress_usempi.F90 +++ b/ompi/mpiext/example/tests/progress_usempi.F90 @@ -9,16 +9,16 @@ program main use mpi use mpi_ext implicit none - + integer ierr, rank, size - + call MPI_INIT(ierr) call MPI_COMM_RANK(MPI_COMM_WORLD, rank, ierr) call MPI_COMM_SIZE(MPI_COMM_WORLD, size, ierr) - + write(*, '("Hello, world, I am ", i2, " of ", i2)') rank, size call OMPI_PROGRESS(3, MPI_COMM_WORLD, ierr) - + call MPI_FINALIZE(ierr) end program - + diff --git a/ompi/mpiext/example/tests/progress_usempif08.F90 b/ompi/mpiext/example/tests/progress_usempif08.F90 index 8e3d866654f..badc16733f4 100644 --- a/ompi/mpiext/example/tests/progress_usempif08.F90 +++ b/ompi/mpiext/example/tests/progress_usempif08.F90 @@ -9,16 +9,16 @@ program main use mpi_f08 use mpi_f08_ext implicit none - + integer ierr, rank, size - + call MPI_INIT(ierr) call MPI_COMM_RANK(MPI_COMM_WORLD, rank, ierr) call MPI_COMM_SIZE(MPI_COMM_WORLD, size, ierr) - + write(*, '("Hello, world, I am ", i2, " of ", i2)') rank, size call OMPI_PROGRESS(3, MPI_COMM_WORLD, ierr) - + call MPI_FINALIZE(ierr) end program - + diff --git a/ompi/mpiext/example/use-mpi-f08/Makefile.am b/ompi/mpiext/example/use-mpi-f08/Makefile.am index 9eb064a9bb3..656a036f098 100644 --- a/ompi/mpiext/example/use-mpi-f08/Makefile.am +++ b/ompi/mpiext/example/use-mpi-f08/Makefile.am @@ -1,9 +1,9 @@ # # Copyright (c) 2011-2012 Cisco Systems, Inc. All rights reserved. # $COPYRIGHT$ -# +# # Additional copyrights may follow -# +# # $HEADER$ # diff --git a/ompi/mpiext/example/use-mpi-f08/mpiext_example_progress_f08.F90 b/ompi/mpiext/example/use-mpi-f08/mpiext_example_progress_f08.F90 index 310de796576..550a27c50a4 100644 --- a/ompi/mpiext/example/use-mpi-f08/mpiext_example_progress_f08.F90 +++ b/ompi/mpiext/example/use-mpi-f08/mpiext_example_progress_f08.F90 @@ -16,9 +16,9 @@ subroutine OMPI_Progress_f08(count, comm, ierror) ! We use the "only" clause just to be a little nice in the scope of ! things that we grab from that file. use :: mpi_f08_types, only : MPI_Comm - + implicit none - + ! Prototype the back-end function in mpif-h that we'll be invoking ! at the bottom of this subroutine. This is a little klunky and ! for demonstration purposes only; real extensions might want to @@ -34,17 +34,17 @@ subroutine OMPI_Progress_f(count, comm, ierror) & INTEGER, INTENT(OUT) :: ierror end subroutine OMPI_Progress_f end interface - + ! Types for this subroutine's parameters and local variables. TYPE(MPI_Comm), INTENT(IN) :: comm INTEGER, OPTIONAL, INTENT(OUT) :: ierror integer :: count, c_ierror - + ! Here we call the the back-end C function in the mpif.h bindings, ! but convert the mpi_f08-style MPI handles to mpif.h-style handles ! (by taking the MPI_VAL member out of its "struct"). call OMPI_Progress_f(count, comm%MPI_VAL, c_ierror) - + ! ierror is optional in the mpi_f08 bindings, so keep that ! convention here, too -- assign to ierror *if it was provided*. if (present(ierror)) ierror = c_ierror diff --git a/ompi/mpiext/example/use-mpi-f08/mpiext_example_usempif08.h b/ompi/mpiext/example/use-mpi-f08/mpiext_example_usempif08.h index 8c054ac6f25..f696605affc 100644 --- a/ompi/mpiext/example/use-mpi-f08/mpiext_example_usempif08.h +++ b/ompi/mpiext/example/use-mpi-f08/mpiext_example_usempif08.h @@ -2,9 +2,9 @@ ! ! Copyright (c) 2012 Cisco Systems, Inc. All rights reserved. ! $COPYRIGHT$ -! +! ! Additional copyrights may follow -! +! ! $HEADER$ ! diff --git a/ompi/mpiext/example/use-mpi/Makefile.am b/ompi/mpiext/example/use-mpi/Makefile.am index f4593fb8ee8..00a6d8c7522 100644 --- a/ompi/mpiext/example/use-mpi/Makefile.am +++ b/ompi/mpiext/example/use-mpi/Makefile.am @@ -1,9 +1,9 @@ # # Copyright (c) 2011-2012 Cisco Systems, Inc. All rights reserved. # $COPYRIGHT$ -# +# # Additional copyrights may follow -# +# # $HEADER$ # diff --git a/ompi/mpiext/example/use-mpi/mpiext_example_usempi.h b/ompi/mpiext/example/use-mpi/mpiext_example_usempi.h index 7f7aff289cf..a0d9bd787e6 100644 --- a/ompi/mpiext/example/use-mpi/mpiext_example_usempi.h +++ b/ompi/mpiext/example/use-mpi/mpiext_example_usempi.h @@ -2,9 +2,9 @@ ! ! Copyright (c) 2012 Cisco Systems, Inc. All rights reserved. ! $COPYRIGHT$ -! +! ! Additional copyrights may follow -! +! ! $HEADER$ ! diff --git a/ompi/op/Makefile.am b/ompi/op/Makefile.am index 657929cf350..5599c31311b 100644 --- a/ompi/op/Makefile.am +++ b/ompi/op/Makefile.am @@ -6,15 +6,16 @@ # Copyright (c) 2004-2005 The University of Tennessee and The University # of Tennessee Research Foundation. All rights # reserved. -# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, # University of Stuttgart. All rights reserved. # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. # Copyright (c) 2008-2009 Cisco Systems, Inc. All rights reserved. +# Copyright (c) 2016 IBM Corporation. All rights reserved. # $COPYRIGHT$ -# +# # Additional copyrights may follow -# +# # $HEADER$ # @@ -23,4 +24,4 @@ headers += op/op.h -libmpi_la_SOURCES += op/op.c +lib@OMPI_LIBMPI_NAME@_la_SOURCES += op/op.c diff --git a/ompi/op/op.c b/ompi/op/op.c index d7a0427d25d..f1f4bf6f26f 100644 --- a/ompi/op/op.c +++ b/ompi/op/op.c @@ -6,7 +6,7 @@ * Copyright (c) 2004-2010 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2007 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2007 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. @@ -17,9 +17,9 @@ * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -56,7 +56,7 @@ static void ompi_op_destruct(ompi_op_t *eh); /* * Class instance */ -OBJ_CLASS_INSTANCE(ompi_op_t, opal_object_t, +OBJ_CLASS_INSTANCE(ompi_op_t, opal_object_t, ompi_op_construct, ompi_op_destruct); @@ -218,49 +218,49 @@ int ompi_op_init(void) /* Create the intrinsic ops */ - if (OMPI_SUCCESS != + if (OMPI_SUCCESS != add_intrinsic(&ompi_mpi_op_null.op, OMPI_OP_BASE_FORTRAN_NULL, FLAGS, "MPI_NULL") || - OMPI_SUCCESS != + OMPI_SUCCESS != add_intrinsic(&ompi_mpi_op_max.op, OMPI_OP_BASE_FORTRAN_MAX, FLAGS, "MPI_MAX") || - OMPI_SUCCESS != + OMPI_SUCCESS != add_intrinsic(&ompi_mpi_op_min.op, OMPI_OP_BASE_FORTRAN_MIN, FLAGS, "MPI_MIN") || - OMPI_SUCCESS != + OMPI_SUCCESS != add_intrinsic(&ompi_mpi_op_sum.op, OMPI_OP_BASE_FORTRAN_SUM, FLAGS_NO_FLOAT, "MPI_SUM") || - OMPI_SUCCESS != + OMPI_SUCCESS != add_intrinsic(&ompi_mpi_op_prod.op, OMPI_OP_BASE_FORTRAN_PROD, FLAGS_NO_FLOAT, "MPI_PROD") || - OMPI_SUCCESS != + OMPI_SUCCESS != add_intrinsic(&ompi_mpi_op_land.op, OMPI_OP_BASE_FORTRAN_LAND, FLAGS, "MPI_LAND") || - OMPI_SUCCESS != + OMPI_SUCCESS != add_intrinsic(&ompi_mpi_op_band.op, OMPI_OP_BASE_FORTRAN_BAND, FLAGS, "MPI_BAND") || - OMPI_SUCCESS != + OMPI_SUCCESS != add_intrinsic(&ompi_mpi_op_lor.op, OMPI_OP_BASE_FORTRAN_LOR, FLAGS, "MPI_LOR") || - OMPI_SUCCESS != + OMPI_SUCCESS != add_intrinsic(&ompi_mpi_op_bor.op, OMPI_OP_BASE_FORTRAN_BOR, FLAGS, "MPI_BOR") || - OMPI_SUCCESS != + OMPI_SUCCESS != add_intrinsic(&ompi_mpi_op_lxor.op, OMPI_OP_BASE_FORTRAN_LXOR, FLAGS, "MPI_LXOR") || - OMPI_SUCCESS != + OMPI_SUCCESS != add_intrinsic(&ompi_mpi_op_bxor.op, OMPI_OP_BASE_FORTRAN_BXOR, FLAGS, "MPI_BXOR") || - OMPI_SUCCESS != + OMPI_SUCCESS != add_intrinsic(&ompi_mpi_op_maxloc.op, OMPI_OP_BASE_FORTRAN_MAXLOC, FLAGS, "MPI_MAXLOC") || - OMPI_SUCCESS != + OMPI_SUCCESS != add_intrinsic(&ompi_mpi_op_minloc.op, OMPI_OP_BASE_FORTRAN_MINLOC, FLAGS, "MPI_MINLOC") || - OMPI_SUCCESS != + OMPI_SUCCESS != add_intrinsic(&ompi_mpi_op_replace.op, OMPI_OP_BASE_FORTRAN_REPLACE, FLAGS, "MPI_REPLACE") || - OMPI_SUCCESS != + OMPI_SUCCESS != add_intrinsic(&ompi_mpi_op_no_op.op, OMPI_OP_BASE_FORTRAN_NO_OP, FLAGS, "MPI_NO_OP")) { return OMPI_ERROR; @@ -356,6 +356,9 @@ ompi_op_t *ompi_op_create_user(bool commute, new_op->o_flags |= OMPI_OP_FLAGS_COMMUTE; } + strncpy(new_op->o_name, "USER OP", sizeof(new_op->o_name) - 1); + new_op->o_name[sizeof(new_op->o_name) - 1] = '\0'; + /* Set the user-defined callback function. The "fort_fn" member is part of a union, so it doesn't matter if this is a C or Fortan callback; we'll call the right flavor (per o_flags) at @@ -378,7 +381,7 @@ void ompi_op_set_cxx_callback(ompi_op_t *op, MPI_User_function *fn) /* The OMPI C++ intercept was previously stored in op->o_func.fort_fn by ompi_op_create_user(). So save that in cxx.intercept_fn and put the user's fn in cxx.user_fn. */ - op->o_func.cxx_data.intercept_fn = + op->o_func.cxx_data.intercept_fn = (ompi_op_cxx_handler_fn_t *) op->o_func.fort_fn; op->o_func.cxx_data.user_fn = fn; } @@ -395,7 +398,7 @@ void ompi_op_set_java_callback(ompi_op_t *op, void *jnienv, /* The OMPI Java intercept was previously stored in op->o_func.fort_fn by ompi_op_create_user(). So save that in cxx.intercept_fn and put the user's fn in cxx.user_fn. */ - op->o_func.java_data.intercept_fn = + op->o_func.java_data.intercept_fn = (ompi_op_java_handler_fn_t *) op->o_func.fort_fn; op->o_func.java_data.jnienv = jnienv; op->o_func.java_data.object = object; @@ -433,7 +436,7 @@ static int add_intrinsic(ompi_op_t *op, int fort_handle, int flags, } else { return OMPI_SUCCESS; } -} +} /* @@ -443,9 +446,14 @@ static void ompi_op_construct(ompi_op_t *new_op) { int i; + /* Provide a default of a high value. Useful for non-predefined ops. */ + new_op->op_type = OMPI_OP_NUM_OF_TYPES; + new_op->o_flags = 0; + new_op->o_name[0] = '\0'; + /* assign entry in fortran <-> c translation array */ - new_op->o_f_to_c_index = + new_op->o_f_to_c_index = opal_pointer_array_add(ompi_op_f_to_c_table, new_op); /* Set everything to NULL so that we can intelligently free diff --git a/ompi/op/op.h b/ompi/op/op.h index 8f5ff16e506..a99f64e9521 100644 --- a/ompi/op/op.h +++ b/ompi/op/op.h @@ -1,4 +1,4 @@ -/* -*- Mode: C; c-basic-offset:4 ; -*- */ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ /* * Copyright (c) 2004-2006 The Trustees of Indiana University and Indiana * University Research and Technology @@ -6,17 +6,19 @@ * Copyright (c) 2004-2007 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2007 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2007 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2008 UT-Battelle, LLC * Copyright (c) 2008-2012 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2009 Sun Microsystems, Inc. All rights reserved. + * Copyright (c) 2015 Los Alamos National Security, LLC. All rights + * reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ /** @@ -180,14 +182,14 @@ struct ompi_op_t { int baseType; } java_data; } o_func; - + /** 3-buffer functions, which is only for intrinsic ops. No need for the C/C++/Fortran user-defined functions. */ ompi_op_base_op_3buff_fns_t o_3buff_intrinsic; }; /** - * Convenience typedef + * Convenience typedef */ typedef struct ompi_op_t ompi_op_t; OMPI_DECLSPEC OBJ_CLASS_DECLARATION(ompi_op_t); @@ -594,6 +596,13 @@ static inline void ompi_op_reduce(ompi_op_t * op, void *source, return; } +static inline void ompi_3buff_op_user (ompi_op_t *op, void * restrict source1, void * restrict source2, + void * restrict result, int count, struct ompi_datatype_t *dtype) +{ + ompi_datatype_copy_content_same_ddt (dtype, count, result, source1); + op->o_func.c_fn (source2, result, &count, &dtype); +} + /** * Perform a reduction operation. * @@ -628,10 +637,14 @@ static inline void ompi_3buff_op_reduce(ompi_op_t * op, void *source1, src2 = source2; tgt = target; - op->o_3buff_intrinsic.fns[ompi_op_ddt_map[dtype->id]](src1, src2, - tgt, &count, - &dtype, - op->o_3buff_intrinsic.modules[ompi_op_ddt_map[dtype->id]]); + if (OPAL_LIKELY(ompi_op_is_intrinsic (op))) { + op->o_3buff_intrinsic.fns[ompi_op_ddt_map[dtype->id]](src1, src2, + tgt, &count, + &dtype, + op->o_3buff_intrinsic.modules[ompi_op_ddt_map[dtype->id]]); + } else { + ompi_3buff_op_user (op, src1, src2, tgt, count, dtype); + } } END_C_DECLS diff --git a/ompi/patterns/comm/Makefile.am b/ompi/patterns/comm/Makefile.am index ae4ae27c31d..9a733aff78a 100644 --- a/ompi/patterns/comm/Makefile.am +++ b/ompi/patterns/comm/Makefile.am @@ -1,8 +1,9 @@ # Copyright (c) 2013 Oak Ridge National Laboratory. All rights reserved. +# Copyright (c) 2016 IBM Corporation. All rights reserved. # $COPYRIGHT$ -# +# # Additional copyrights may follow -# +# # $HEADER$ # @@ -10,7 +11,7 @@ headers += \ patterns/comm/coll_ops.h \ patterns/comm/commpatterns.h -libmpi_la_SOURCES += \ +lib@OMPI_LIBMPI_NAME@_la_SOURCES += \ patterns/comm/allreduce.c \ patterns/comm/allgather.c \ patterns/comm/bcast.c diff --git a/ompi/patterns/comm/allgather.c b/ompi/patterns/comm/allgather.c index a12a198c836..48321bf3cf4 100644 --- a/ompi/patterns/comm/allgather.c +++ b/ompi/patterns/comm/allgather.c @@ -6,9 +6,9 @@ * Copyright (c) 2014 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ /** @file */ @@ -43,7 +43,7 @@ OMPI_DECLSPEC int comm_allgather_pml(void *src_buf, void *dest_buf, int count, OPAL_PTRDIFF_TYPE dt_extent; char *src_buf_current; char *dest_buf_current; - struct iovec send_iov[2] = {{0,0},{0,0}}, + struct iovec send_iov[2] = {{0,0},{0,0}}, recv_iov[2] = {{0,0},{0,0}}; ompi_request_t *requests[4]; @@ -76,7 +76,7 @@ OMPI_DECLSPEC int comm_allgather_pml(void *src_buf, void *dest_buf, int count, /* get my reduction communication pattern */ memset(&my_exchange_node, 0, sizeof(netpatterns_pair_exchange_node_t)); - rc = netpatterns_setup_recursive_doubling_tree_node(n_peers, + rc = netpatterns_setup_recursive_doubling_tree_node(n_peers, my_rank_in_group, &my_exchange_node); if(OMPI_SUCCESS != rc){ return rc; @@ -136,7 +136,7 @@ OMPI_DECLSPEC int comm_allgather_pml(void *src_buf, void *dest_buf, int count, msg_cnt=0; /* - * Power of 2 data segment + * Power of 2 data segment */ /* post non-blocking receive */ if(pair_rank > my_rank_in_group ){ @@ -226,7 +226,7 @@ OMPI_DECLSPEC int comm_allgather_pml(void *src_buf, void *dest_buf, int count, goto Error; } msg_cnt++; - if( iovec_len > 1 ) { + if( iovec_len > 1 ) { rc=MCA_PML_CALL(isend(send_iov[1].iov_base, send_iov[1].iov_len,dtype,ranks_in_comm[pair_rank], -OMPI_COMMON_TAG_ALLREDUCE,MCA_PML_BASE_SEND_STANDARD, @@ -241,7 +241,7 @@ OMPI_DECLSPEC int comm_allgather_pml(void *src_buf, void *dest_buf, int count, if(pair_rank < my_rank_in_group ){ src_buf_current-=current_data_extent; local_data_start_rank-=proc_block; - } + } proc_block*=2; current_data_extent*=2; current_data_count*=2; @@ -254,8 +254,8 @@ OMPI_DECLSPEC int comm_allgather_pml(void *src_buf, void *dest_buf, int count, if(0 < my_exchange_node.n_extra_sources) { if ( EXTRA_NODE == my_exchange_node.node_type ) { - /* - ** receive the data + /* + ** receive the data ** */ extra_rank=my_exchange_node.rank_extra_source; diff --git a/ompi/patterns/comm/allreduce.c b/ompi/patterns/comm/allreduce.c index 92b06472182..2fbf9e21773 100644 --- a/ompi/patterns/comm/allreduce.c +++ b/ompi/patterns/comm/allreduce.c @@ -6,9 +6,9 @@ * Copyright (c) 2014 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ /** @file */ @@ -28,9 +28,9 @@ /** * All-reduce for contigous primitive types */ -OMPI_DECLSPEC int comm_allreduce_pml(void *sbuf, void *rbuf, int count, +OMPI_DECLSPEC int comm_allreduce_pml(void *sbuf, void *rbuf, int count, ompi_datatype_t *dtype, int my_rank_in_group, - struct ompi_op_t *op, int n_peers,int *ranks_in_comm, + struct ompi_op_t *op, int n_peers,int *ranks_in_comm, ompi_communicator_t *comm) { /* local variables */ @@ -55,7 +55,7 @@ OMPI_DECLSPEC int comm_allreduce_pml(void *sbuf, void *rbuf, int count, if( OMPI_SUCCESS != rc ) { goto Error; } - + /* 1 process special case */ if(1 == n_peers) { /* place my data in the correct destination buffer */ @@ -108,7 +108,7 @@ OMPI_DECLSPEC int comm_allreduce_pml(void *sbuf, void *rbuf, int count, if(0 < my_exchange_node.n_extra_sources) { if ( EXCHANGE_NODE == my_exchange_node.node_type ) { - + /* ** Receive data from extra node */ @@ -134,7 +134,7 @@ OMPI_DECLSPEC int comm_allreduce_pml(void *sbuf, void *rbuf, int count, } else { - + /* ** Send data to "partner" node */ @@ -208,8 +208,8 @@ OMPI_DECLSPEC int comm_allreduce_pml(void *sbuf, void *rbuf, int count, if(0 < my_exchange_node.n_extra_sources) { if ( EXTRA_NODE == my_exchange_node.node_type ) { - /* - ** receive the data + /* + ** receive the data ** */ extra_rank=my_exchange_node.rank_extra_source; rc=MCA_PML_CALL(recv(scratch_bufers[recv_buffer], @@ -245,7 +245,7 @@ OMPI_DECLSPEC int comm_allreduce_pml(void *sbuf, void *rbuf, int count, /* copy data from the temp buffer into the output buffer */ rbuf_current = (char *) rbuf + count_processed * dt_size; memcpy(rbuf_current,scratch_bufers[send_buffer], count_this_stripe*dt_size); - + /* update the count of elements processed */ count_processed += count_this_stripe; } diff --git a/ompi/patterns/comm/bcast.c b/ompi/patterns/comm/bcast.c index 01110a63311..2a25d495db6 100644 --- a/ompi/patterns/comm/bcast.c +++ b/ompi/patterns/comm/bcast.c @@ -6,9 +6,9 @@ * Copyright (c) 2014 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ /** @file */ diff --git a/ompi/patterns/comm/coll_ops.h b/ompi/patterns/comm/coll_ops.h index 5682fbc2e1e..846e5660cc4 100644 --- a/ompi/patterns/comm/coll_ops.h +++ b/ompi/patterns/comm/coll_ops.h @@ -27,7 +27,7 @@ BEGIN_C_DECLS OMPI_DECLSPEC int comm_allgather_pml(void *src_buf, void *dest_buf, int count, - ompi_datatype_t *dtype, int my_rank_in_group, int n_peers, + ompi_datatype_t *dtype, int my_rank_in_group, int n_peers, int *ranks_in_comm,ompi_communicator_t *comm); OMPI_DECLSPEC int comm_allreduce_pml(void *sbuf, void *rbuf, int count, ompi_datatype_t *dtype, int my_rank_in_group, diff --git a/ompi/patterns/net/Makefile.am b/ompi/patterns/net/Makefile.am index a31fe2b8d42..d9b07fd1422 100644 --- a/ompi/patterns/net/Makefile.am +++ b/ompi/patterns/net/Makefile.am @@ -1,8 +1,9 @@ # Copyright (c) 2013 Oak Ridge National Laboratory. All rights reserved. +# Copyright (c) 2016 IBM Corporation. All rights reserved. # $COPYRIGHT$ -# +# # Additional copyrights may follow -# +# # $HEADER$ # @@ -11,7 +12,7 @@ headers += \ patterns/net/netpatterns_knomial_tree.h \ patterns/net/coll_ops.h -libmpi_la_SOURCES += \ +lib@OMPI_LIBMPI_NAME@_la_SOURCES += \ patterns/net/netpatterns_base.c \ patterns/net/netpatterns_multinomial_tree.c \ patterns/net/netpatterns_nary_tree.c \ diff --git a/ompi/patterns/net/allreduce.c b/ompi/patterns/net/allreduce.c index b0b97605a24..1f0cc0b4a89 100644 --- a/ompi/patterns/net/allreduce.c +++ b/ompi/patterns/net/allreduce.c @@ -4,9 +4,9 @@ * Copyright (c) 2012 Los Alamos National Security, LLC. * All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ /** @file */ @@ -20,7 +20,7 @@ #include "ompi/communicator/communicator.h" #include "ompi/mca/rte/rte.h" -void send_completion(nt status, struct ompi_process_name_t* peer, struct iovec* msg, +void send_completion(nt status, struct ompi_process_name_t* peer, struct iovec* msg, int count, ompi_rml_tag_t tag, void* cbdata) { /* set send completion flag */ @@ -28,7 +28,7 @@ void send_completion(nt status, struct ompi_process_name_t* peer, struct iovec* } -void recv_completion(nt status, struct ompi_process_name_t* peer, struct iovec* msg, +void recv_completion(nt status, struct ompi_process_name_t* peer, struct iovec* msg, int count, ompi_rml_tag_t tag, void* cbdata) { /* set receive completion flag */ @@ -48,7 +48,7 @@ static void op_reduce(int op_type,(void *)src_dest_buf,(void *) src_buf, int cou case OP_SUM: - + switch (data_type) { case TYPE_INT4: int *int_src_ptr=(int *)src_ptr; @@ -76,7 +76,7 @@ static void op_reduce(int op_type,(void *)src_dest_buf,(void *) src_buf, int cou * All-reduce for contigous primitive types */ static -comm_allreduce(void *sbuf, void *rbuf, int count, opal_datatype_t *dtype, +comm_allreduce(void *sbuf, void *rbuf, int count, opal_datatype_t *dtype, int op_type, opal_list_t *peers) { /* local variables */ @@ -157,7 +157,7 @@ comm_allreduce(void *sbuf, void *rbuf, int count, opal_datatype_t *dtype, return ret; } - /* setup flags for non-blocking communications */ + /* setup flags for non-blocking communications */ recv_done=&recv_completion_flag; send_done=&send_completion_flag; @@ -185,11 +185,11 @@ comm_allreduce(void *sbuf, void *rbuf, int count, opal_datatype_t *dtype, if(0 < my_exchange_node->n_extra_sources) { if ( EXCHANGE_NODE == my_exchange_node->node_type ) { - + /* ** Receive data from extra node */ - + extra_rank=my_exchange_node.rank_extra_source; recv_iov.iov_base=scratch_bufers[recv_buffer]; recv_iov.iov_len=count_this_stripe*dt_size; @@ -207,7 +207,7 @@ comm_allreduce(void *sbuf, void *rbuf, int count, opal_datatype_t *dtype, } else { - + /* ** Send data to "partner" node */ @@ -240,7 +240,7 @@ comm_allreduce(void *sbuf, void *rbuf, int count, opal_datatype_t *dtype, /* loop over data exchanges */ for(exchange=0 ; exchange < my_exchange_node->n_exchanges ; exchange++) { - /* debug + /* debug t4=opal_sys_timer_get_cycles(); end debug */ @@ -251,7 +251,7 @@ comm_allreduce(void *sbuf, void *rbuf, int count, opal_datatype_t *dtype, /* is the remote data read */ pair_rank=my_exchange_node->rank_exchanges[exchange]; - *recv_done=0; + *recv_done=0; *send_done=0; MB(); @@ -271,14 +271,14 @@ comm_allreduce(void *sbuf, void *rbuf, int count, opal_datatype_t *dtype, while(!(*recv_done) ) { opal_progress(); } - + /* reduce the data */ if( 0 < count_this_stripe ) { op_reduce(op_type,(void *)scratch_bufers[recv_buffer], (void *)scratch_bufers[send_buffer], n_my_count,TYPE_INT4); } - + /* get ready for next step */ index_read=(exchange&1); index_write=((exchange+1)&1); @@ -287,15 +287,15 @@ comm_allreduce(void *sbuf, void *rbuf, int count, opal_datatype_t *dtype, while(!(*send_done) ) { opal_progress(); } - + } /* copy data in from the "extra" source, if need be */ if(0 < my_exchange_node->n_extra_sources) { if ( EXTRA_NODE == my_exchange_node->node_type ) { - /* - ** receive the data + /* + ** receive the data ** */ extra_rank=my_exchange_node->rank_extra_source; @@ -334,7 +334,7 @@ comm_allreduce(void *sbuf, void *rbuf, int count, opal_datatype_t *dtype, /* copy data from the temp buffer into the output buffer */ rbuf_current=(char *)rbuf+count_processed*dt_size; memcopy(scratch_bufers[recv_buffer],rbuf_current,count_this_stripe*dt_size); - + /* update the count of elements processed */ count_processed+=count_this_stripe; } diff --git a/ompi/patterns/net/netpatterns.h b/ompi/patterns/net/netpatterns.h index 330d090eda4..e2ebb6ad627 100644 --- a/ompi/patterns/net/netpatterns.h +++ b/ompi/patterns/net/netpatterns.h @@ -36,7 +36,7 @@ OMPI_DECLSPEC extern int netpatterns_base_err(const char*, ...) __opal_attribute netpatterns_base_err args; \ netpatterns_base_err("\n"); \ } \ - } while(0); + } while(0); #else #define NETPATTERNS_VERBOSE(args) #endif @@ -116,7 +116,7 @@ struct netpatterns_narray_knomial_tree_node_t { /* Knomial recursive gather information */ struct netpatterns_k_exchange_node_t k_node; }; -typedef struct netpatterns_narray_knomial_tree_node_t +typedef struct netpatterns_narray_knomial_tree_node_t netpatterns_narray_knomial_tree_node_t; @@ -133,7 +133,7 @@ OMPI_DECLSPEC int netpatterns_setup_narray_knomial_tree( int tree_order, int my_ OMPI_DECLSPEC void netpatterns_cleanup_narray_knomial_tree (netpatterns_narray_knomial_tree_node_t *my_node); /* setup an multi-nomial tree - for each node in the tree - * this returns it's parent, and it's children + * this returns it's parent, and it's children */ OMPI_DECLSPEC int netpatterns_setup_multinomial_tree(int tree_order, int num_nodes, netpatterns_tree_node_t *tree_nodes); diff --git a/ompi/patterns/net/netpatterns_base.c b/ompi/patterns/net/netpatterns_base.c index 078bb1a4fc2..bc51490def5 100644 --- a/ompi/patterns/net/netpatterns_base.c +++ b/ompi/patterns/net/netpatterns_base.c @@ -3,9 +3,9 @@ * Copyright (c) 2009-2012 Mellanox Technologies. All rights reserved. * Copyright (c) 2009-2012 Oak Ridge National Laboratory. All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ #include @@ -47,7 +47,7 @@ static int was_called = 0; if (0 == was_called) { was_called = 1; - + return netpatterns_register_mca_params(); } diff --git a/ompi/patterns/net/netpatterns_knomial_tree.c b/ompi/patterns/net/netpatterns_knomial_tree.c index 491b5fda0c4..f09ef968fb7 100644 --- a/ompi/patterns/net/netpatterns_knomial_tree.c +++ b/ompi/patterns/net/netpatterns_knomial_tree.c @@ -45,7 +45,7 @@ OMPI_DECLSPEC int netpatterns_setup_recursive_knomial_allgather_tree_node( int k_temp2; int myid, reindex_myid = 0; int base, peer_base,base_temp; - int peer; + int peer; int *prev_data = NULL; int *current_data = NULL; int *group_info = NULL; @@ -64,8 +64,8 @@ OMPI_DECLSPEC int netpatterns_setup_recursive_knomial_allgather_tree_node( /* k-nomial radix */ exchange_node->tree_order = tree_order; - /* Calculate the number of levels in the tree for - * the largest power of tree_order less than or + /* Calculate the number of levels in the tree for + * the largest power of tree_order less than or * equal to the group size */ n_levels = 0; @@ -74,8 +74,8 @@ OMPI_DECLSPEC int netpatterns_setup_recursive_knomial_allgather_tree_node( cnt *= tree_order; n_levels++; } - /* this is the actual number of recusive k-ing steps - * we will perform, the last step may not be a full + /* this is the actual number of recusive k-ing steps + * we will perform, the last step may not be a full * step depending on the outcome of the next conditional */ pow_k = n_levels; @@ -91,7 +91,7 @@ OMPI_DECLSPEC int netpatterns_setup_recursive_knomial_allgather_tree_node( exchange_node->log_tree_order = pow_k; exchange_node->n_largest_pow_tree_order = cnt; - + /* find the number of complete groups of size tree_order, tree_order^2, tree_order^3,...,tree_order^pow_k */ /* I don't think we need to cache this info this group_info array */ group_info = (int *) calloc(pow_k , sizeof(int)); @@ -110,11 +110,11 @@ OMPI_DECLSPEC int netpatterns_setup_recursive_knomial_allgather_tree_node( } knt--; /*fprintf(stderr,"Maximal power of k is %d and the number of incomplete groups is %d \n", knt+1 ,tree_order - group_info[knt] );*/ - + /* k_temp is a synonym for cnt which is the largest full power of k group */ - /* now, start the calculation to find the first stray rank aka "extra" rank */ + /* now, start the calculation to find the first stray rank aka "extra" rank */ stray = 0; - /*fprintf(stderr,"Maximal power of k %d, first stragler rank is %d and the number of straglers is %d\n",cnt, + /*fprintf(stderr,"Maximal power of k %d, first stragler rank is %d and the number of straglers is %d\n",cnt, cnt*group_info[knt], num_nodes - cnt*group_info[knt]);*/ @@ -212,9 +212,9 @@ OMPI_DECLSPEC int netpatterns_setup_recursive_knomial_allgather_tree_node( } } - /* intialize the payload array - This is the money struct, just need to initialize this with - the subgroup information */ + /* intialize the payload array + This is the money struct, just need to initialize this with + the subgroup information */ /* for(i = 0; i < num_nodes; i++){ prev_data[i] = 1; @@ -227,8 +227,8 @@ OMPI_DECLSPEC int netpatterns_setup_recursive_knomial_allgather_tree_node( current_data[i] = hier_ranks[i]; } - /* everyone will need to do this loop over all ranks - * Phase I calculate the contribution from the extra ranks + /* everyone will need to do this loop over all ranks + * Phase I calculate the contribution from the extra ranks */ for( myid = 0; myid < num_nodes; myid++) { /* get my new rank */ @@ -268,7 +268,7 @@ OMPI_DECLSPEC int netpatterns_setup_recursive_knomial_allgather_tree_node( break; } } - if( reindex_myid < stray ) { + if( reindex_myid < stray ) { /* now start the actual algorithm */ FIND_BASE(base,reindex_myid,i+1,tree_order); for( j = 0; j < ( tree_order - 1 ); j ++ ) { @@ -280,19 +280,19 @@ OMPI_DECLSPEC int netpatterns_setup_recursive_knomial_allgather_tree_node( exchange_node->payload_info[i][j].r_len = prev_data[exchange_node->reindex_map[peer]]; /*fprintf(stderr,"exchange_node->payload_info[%d][%d].r_len %d\n",i,j,prev_data[exchange_node->reindex_map[peer]]);*/ if( i > 0 ) { - + /* find my len and offset */ FIND_BASE(peer_base,peer,i,tree_order); /* I do not want to mess with this, but it seems that I have no choice */ ex_node = exchange_node->reindex_map[peer_base]; /* now, find out how far down the line this guy really is */ knt2 =0; - for(kk = 0; kk < ex_node; kk++){ + for(kk = 0; kk < ex_node; kk++){ knt2 += hier_ranks[kk]; } - exchange_node->payload_info[i][j].r_offset = knt2; + exchange_node->payload_info[i][j].r_offset = knt2; /*fprintf(stderr,"exchange_node->payload_info[%d][%d].r_offset %d\n",i,j,exchange_node->payload_info[i][j].r_offset);*/ - + FIND_BASE(base_temp,reindex_myid,i,tree_order); ex_node = exchange_node->reindex_map[base_temp]; knt2 = 0; @@ -315,7 +315,7 @@ OMPI_DECLSPEC int netpatterns_setup_recursive_knomial_allgather_tree_node( for(kk = 0; kk < myid; kk++){ knt2 += hier_ranks[kk]; } - exchange_node->payload_info[i][j].s_offset = knt2; + exchange_node->payload_info[i][j].s_offset = knt2; /*fprintf(stderr,"exchange_node->payload_info[%d][%d].s_offset %d\n",i,j, exchange_node->payload_info[i][j].s_offset);*/ } /* how much I am to receive from this peer on this level */ @@ -345,7 +345,7 @@ OMPI_DECLSPEC int netpatterns_setup_recursive_knomial_allgather_tree_node( prev_data[j] = current_data[j]; } /* fprintf(stderr,"\n");*/ - + } @@ -368,7 +368,7 @@ OMPI_DECLSPEC int netpatterns_setup_recursive_knomial_allgather_tree_node( exchange_node->n_extra_sources = 0; for( i = stray; i < num_nodes; i++) { if(exchange_node->reindex_myid == ( i - cnt )) { - /* then I am a proxy rank and there is only a + /* then I am a proxy rank and there is only a * single extra source */ exchange_node->n_extra_sources = 1; @@ -407,7 +407,7 @@ OMPI_DECLSPEC int netpatterns_setup_recursive_knomial_allgather_tree_node( /* initialize this */ exchange_node->n_actual_exchanges = 0; /* Allocate 2 dimension array thak keeps - rank exchange information for each step*/ + rank exchange information for each step*/ exchange_node->rank_exchanges = (int **) malloc (exchange_node->n_exchanges * sizeof(int *)); if(NULL == exchange_node->rank_exchanges) { @@ -434,12 +434,12 @@ OMPI_DECLSPEC int netpatterns_setup_recursive_knomial_allgather_tree_node( if ( peer < stray ) { exchange_node->rank_exchanges[i][j] = exchange_node->reindex_map[peer]; /* an actual exchange occurs, bump the counter */ - + } else { /* out of range, skip it - do not bump the n_actual_exchanges counter */ exchange_node->rank_exchanges[i][j] = -1; } - + } k_temp1 *= tree_order; k_temp2 *= tree_order; @@ -465,7 +465,7 @@ OMPI_DECLSPEC int netpatterns_setup_recursive_knomial_allgather_tree_node( */ /* do we need this one */ exchange_node->n_tags = tree_order * n_levels + 1; - + free(prev_data); free(current_data); free(group_info); @@ -538,7 +538,7 @@ OMPI_DECLSPEC int netpatterns_setup_recursive_knomial_tree_node( /* local variables */ int i, j, tmp, cnt; int n_levels; - int k_base, kpow_num, peer; + int k_base, kpow_num, peer; NETPATTERNS_VERBOSE( ("Enter netpatterns_setup_recursive_knomial_tree_node(num_nodes=%d, node_rank=%d, tree_order=%d)", @@ -624,7 +624,7 @@ OMPI_DECLSPEC int netpatterns_setup_recursive_knomial_tree_node( if (EXCHANGE_NODE == exchange_node->node_type) { exchange_node->n_exchanges = n_levels; /* Allocate 2 dimension array thak keeps - rank exchange information for each step*/ + rank exchange information for each step*/ exchange_node->rank_exchanges = (int **) malloc (exchange_node->n_exchanges * sizeof(int *)); if(NULL == exchange_node->rank_exchanges) { @@ -638,18 +638,18 @@ OMPI_DECLSPEC int netpatterns_setup_recursive_knomial_tree_node( } } /* fill in exchange partners */ - for(i = 0, kpow_num = 1; i < exchange_node->n_exchanges; + for(i = 0, kpow_num = 1; i < exchange_node->n_exchanges; i++, kpow_num *= tree_order) { k_base = node_rank / (kpow_num * tree_order); for(j = 1; j < tree_order; j++) { peer = node_rank + kpow_num * j; if (k_base != peer/(kpow_num * tree_order)) { /* Wraparound the number */ - peer = k_base * (kpow_num * tree_order) + + peer = k_base * (kpow_num * tree_order) + peer % (kpow_num * tree_order); } exchange_node->rank_exchanges[i][j - 1] = peer; - NETPATTERNS_VERBOSE(("rank_exchanges#(%d,%d)/%d = %d", + NETPATTERNS_VERBOSE(("rank_exchanges#(%d,%d)/%d = %d", i, j, tree_order, peer)); } } @@ -696,7 +696,7 @@ OMPI_DECLSPEC void netpatterns_cleanup_recursive_knomial_tree_node( } } -#if 1 +#if 1 OMPI_DECLSPEC int netpatterns_setup_recursive_doubling_n_tree_node(int num_nodes, int node_rank, int tree_order, netpatterns_pair_exchange_node_t *exchange_node) { @@ -858,7 +858,7 @@ OMPI_DECLSPEC int netpatterns_setup_recursive_doubling_tree_node(int num_nodes, return netpatterns_setup_recursive_doubling_n_tree_node(num_nodes, node_rank, 2, exchange_node); } -#if 0 +#if 0 /*OMPI_DECLSPEC int old_netpatterns_setup_recursive_doubling_tree_node(int num_nodes, int node_rank,*/ OMPI_DECLSPEC int netpatterns_setup_recursive_doubling_n_tree_node(int num_nodes, int node_rank,int tree_order, netpatterns_pair_exchange_node_t *exchange_node) diff --git a/ompi/patterns/net/netpatterns_knomial_tree.h b/ompi/patterns/net/netpatterns_knomial_tree.h index 6ff89cd692e..a5736a1d877 100644 --- a/ompi/patterns/net/netpatterns_knomial_tree.h +++ b/ompi/patterns/net/netpatterns_knomial_tree.h @@ -44,7 +44,7 @@ struct netpatterns_pair_exchange_node_t { /* number of extra sources of data - outside largest power of 2 in * this group */ int n_extra_sources; - + /* rank of the extra source */ /* deprecated */ int rank_extra_source; int *rank_extra_sources_array; @@ -133,28 +133,28 @@ OMPI_DECLSPEC int netpatterns_setup_recursive_knomial_allgather_tree_node( OMPI_DECLSPEC void netpatterns_cleanup_recursive_knomial_allgather_tree_node( netpatterns_k_exchange_node_t *exchange_node); -/* Input: k_exchange_node structure - Output: index in rank_exchanges array that points - to the "start_point" for outgoing send. +/* Input: k_exchange_node structure + Output: index in rank_exchanges array that points + to the "start_point" for outgoing send. Please see below example of usage: - for (i = start_point ; i > 0; i--) - for (k = 0; k < tree_radix; k++) + for (i = start_point ; i > 0; i--) + for (k = 0; k < tree_radix; k++) send messages to exchange_node->rank_exchanges[i][k]; */ -static inline __opal_attribute_always_inline__ +static inline __opal_attribute_always_inline__ int netpatterns_get_knomial_level( - int my_rank, int src_rank, + int my_rank, int src_rank, int radix, int size, int *k_level) { - int distance, + int distance, pow_k; int logk_level = 0; /* Calculate disctance from source of data */ - distance = src_rank - my_rank; + distance = src_rank - my_rank; /* Wrap around */ if (0 > distance) { @@ -175,7 +175,7 @@ int netpatterns_get_knomial_level( /* Input: my_rank, root, radix, size * Output: source of the data, offset in power of K */ -static inline __opal_attribute_always_inline__ +static inline __opal_attribute_always_inline__ int netpatterns_get_knomial_data_source( int my_rank, int root, int radix, int size, int *k_level, int *logk_level) @@ -184,12 +184,12 @@ int netpatterns_get_knomial_data_source( int step = 0; /* Calculate source of the data */ - while((0 == (root - my_rank) % level) + while((0 == (root - my_rank) % level) && (level <= size)) { level *= radix; ++step; - } - + } + *k_level = level/radix; *logk_level = step; return my_rank - (my_rank % level - root % level); @@ -199,7 +199,7 @@ int netpatterns_get_knomial_data_source( * k_level - that you get from netpatterns_get_knomial_data_source * k_step - some integer * Output: peer - next children in the tree - * Usage: + * Usage: * src = netpatterns_get_knomial_data_source( * my_rank, root, radix, size, * &k_level, &logk_level) diff --git a/ompi/patterns/net/netpatterns_multinomial_tree.c b/ompi/patterns/net/netpatterns_multinomial_tree.c index 14f5c42296e..54fc41f4c98 100644 --- a/ompi/patterns/net/netpatterns_multinomial_tree.c +++ b/ompi/patterns/net/netpatterns_multinomial_tree.c @@ -51,9 +51,9 @@ OMPI_DECLSPEC int netpatterns_setup_multinomial_tree(int tree_order, int num_nod /* cummulative count of ranks */ while( 0 < result ) { result-=cnt; - cnt*=tree_order; + cnt*=tree_order; n_lvls_in_tree++; - }; + }; /* loop over tree levels */ n_nodes_in_this_level=1; @@ -65,7 +65,7 @@ OMPI_DECLSPEC int netpatterns_setup_multinomial_tree(int tree_order, int num_nod for ( node=0 ; node < n_nodes_in_this_level ; node++ ) { /* get node index */ node_index++; - + /* break if reach group size */ if( node_index == num_nodes) { break; diff --git a/ompi/patterns/net/netpatterns_nary_tree.c b/ompi/patterns/net/netpatterns_nary_tree.c index d0e0b1a37e1..8635bb13a53 100644 --- a/ompi/patterns/net/netpatterns_nary_tree.c +++ b/ompi/patterns/net/netpatterns_nary_tree.c @@ -88,7 +88,7 @@ int netpatterns_setup_narray_tree(int tree_order, int my_rank, int num_nodes, /* tree_order consecutive ranks have the same parent */ my_node->parent_rank=cum_cnt-cnt/tree_order+my_rank_in_my_level/tree_order; } - + /* figure out number of levels in the tree */ n_lvls_in_tree=0; result=num_nodes; @@ -138,7 +138,7 @@ int netpatterns_setup_narray_tree(int tree_order, int my_rank, int num_nodes, for (lvl= start_index ; lvl <= end_index ; lvl++ ) { my_node->children_ranks[lvl-start_index]=lvl; } - } + } } /* set node type */ if( 0 == my_node->n_parents ) { @@ -226,13 +226,13 @@ int netpatterns_setup_narray_knomial_tree( cnt*=tree_order; } - my_node->rank_on_level = - my_rank_in_my_level = + my_node->rank_on_level = + my_rank_in_my_level = my_rank-cum_cnt; my_node->level_size = cnt; rc = netpatterns_setup_recursive_knomial_tree_node( - my_node->level_size, my_node->rank_on_level, + my_node->level_size, my_node->rank_on_level, tree_order, &my_node->k_node); if (OMPI_SUCCESS != rc) { goto Error; @@ -241,7 +241,7 @@ int netpatterns_setup_narray_knomial_tree( /* tree_order consecutive ranks have the same parent */ my_node->parent_rank=cum_cnt-cnt/tree_order+my_rank_in_my_level/tree_order; } - + /* figure out number of levels in the tree */ n_lvls_in_tree=0; result=num_nodes; @@ -296,7 +296,7 @@ int netpatterns_setup_narray_knomial_tree( for (lvl= start_index ; lvl <= end_index ; lvl++ ) { my_node->children_ranks[lvl-start_index]=lvl; } - } + } } /* set node type */ if( 0 == my_node->n_parents ) { @@ -345,7 +345,7 @@ static int fill_in_node_data(int tree_order, int num_nodes, int my_node, /* local variables */ int rc, num_ranks_per_child, num_children, n_extra; int child, rank, n_to_offset, n_ranks_to_child; - + /* figure out who are my children */ num_ranks_per_child=num_nodes/tree_order; if( num_ranks_per_child ) { @@ -393,7 +393,7 @@ static int fill_in_node_data(int tree_order, int num_nodes, int my_node, rank=my_node+1+child*num_ranks_per_child; rank+=n_to_offset; - /* set parent information */ + /* set parent information */ nodes_data[rank].n_parents=1; nodes_data[rank].parent_rank=my_node; @@ -401,7 +401,7 @@ static int fill_in_node_data(int tree_order, int num_nodes, int my_node, if(n_extra && (child < n_extra) ) { n_ranks_to_child++; } - + /* set child information */ nodes_data[my_node].children_ranks[child]=rank; @@ -431,7 +431,7 @@ static int fill_in_node_data(int tree_order, int num_nodes, int my_node, * appropriate communication pattern for such roots. */ OMPI_DECLSPEC int netpatterns_setup_narray_tree_contigous_ranks( - int tree_order, int num_nodes, + int tree_order, int num_nodes, netpatterns_tree_node_t **tree_nodes) { /* local variables */ @@ -448,7 +448,7 @@ OMPI_DECLSPEC int netpatterns_setup_narray_tree_contigous_ranks( } (*tree_nodes)[0].n_parents=0; - rc=fill_in_node_data(tree_order, + rc=fill_in_node_data(tree_order, num_descendent_ranks, 0, *tree_nodes); /* successful return */ diff --git a/ompi/peruse/Makefile.am b/ompi/peruse/Makefile.am index 8cc29d03b48..9b2d043ce43 100644 --- a/ompi/peruse/Makefile.am +++ b/ompi/peruse/Makefile.am @@ -3,12 +3,13 @@ # Copyright (c) 2004-2006 The University of Tennessee and The University # of Tennessee Research Foundation. All rights # reserved. -# Copyright (c) 2004-2006 High Performance Computing Center Stuttgart, +# Copyright (c) 2004-2006 High Performance Computing Center Stuttgart, # University of Stuttgart. All rights reserved. +# Copyright (c) 2016 IBM Corporation. All rights reserved. # $COPYRIGHT$ -# +# # Additional copyrights may follow -# +# # $HEADER$ # @@ -20,7 +21,7 @@ if WANT_PERUSE # do NOT want this nobase - we want the peruse stripped off... include_HEADERS += peruse/peruse.h -libmpi_la_SOURCES += \ +lib@OMPI_LIBMPI_NAME@_la_SOURCES += \ peruse/peruse.c \ peruse/peruse_module.c endif diff --git a/ompi/peruse/peruse-internal.h b/ompi/peruse/peruse-internal.h index 005ab1e6c57..ce5008d0217 100644 --- a/ompi/peruse/peruse-internal.h +++ b/ompi/peruse/peruse-internal.h @@ -2,12 +2,12 @@ * Copyright (c) 2004-2006 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ #ifndef _PERUSE_INTERNAL_H_ diff --git a/ompi/peruse/peruse.c b/ompi/peruse/peruse.c index 19d76631ea5..ef9680b79a6 100644 --- a/ompi/peruse/peruse.c +++ b/ompi/peruse/peruse.c @@ -2,19 +2,17 @@ * Copyright (c) 2004-2006 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ #include "ompi_config.h" -#ifdef HAVE_STRING_H # include -#endif #include "mpi.h" #include "ompi/peruse/peruse.h" #include "ompi/peruse/peruse-internal.h" diff --git a/ompi/peruse/peruse.h b/ompi/peruse/peruse.h index 924a7e5868b..f1f0d817219 100644 --- a/ompi/peruse/peruse.h +++ b/ompi/peruse/peruse.h @@ -2,14 +2,14 @@ * Copyright (c) 2004-2009 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2007 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2007 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2007 Los Alamos National Security, LLC. All rights - * reserved. + * reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ diff --git a/ompi/peruse/peruse_module.c b/ompi/peruse/peruse_module.c index e540409e955..5f19168700f 100644 --- a/ompi/peruse/peruse_module.c +++ b/ompi/peruse/peruse_module.c @@ -3,19 +3,17 @@ * Copyright (c) 2004-2007 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2007 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2007 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ #include "ompi_config.h" -#ifdef HAVE_STDLIB_H # include -#endif #include "mpi.h" #include "ompi/peruse/peruse.h" #include "ompi/peruse/peruse-internal.h" diff --git a/ompi/proc/Makefile.am b/ompi/proc/Makefile.am index 9b245d43e9c..e9ad85d6f73 100644 --- a/ompi/proc/Makefile.am +++ b/ompi/proc/Makefile.am @@ -6,14 +6,15 @@ # Copyright (c) 2004-2005 The University of Tennessee and The University # of Tennessee Research Foundation. All rights # reserved. -# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, # University of Stuttgart. All rights reserved. # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. +# Copyright (c) 2016 IBM Corporation. All rights reserved. # $COPYRIGHT$ -# +# # Additional copyrights may follow -# +# # $HEADER$ # @@ -22,5 +23,5 @@ headers += \ proc/proc.h -libmpi_la_SOURCES += \ +lib@OMPI_LIBMPI_NAME@_la_SOURCES += \ proc/proc.c diff --git a/ompi/proc/proc.c b/ompi/proc/proc.c index 701fb4f7d14..d05cfce5792 100644 --- a/ompi/proc/proc.c +++ b/ompi/proc/proc.c @@ -1,3 +1,4 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ /* * Copyright (c) 2004-2006 The Trustees of Indiana University and Indiana * University Research and Technology @@ -9,12 +10,14 @@ * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2006 The Regents of the University of California. * All rights reserved. - * Copyright (c) 2006-2014 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2012 Los Alamos National Security, LLC. All rights - * reserved. - * Copyright (c) 2013-2014 Intel, Inc. All rights reserved - * Copyright (c) 2014 Research Organization for Information Science + * Copyright (c) 2006-2015 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2012-2015 Los Alamos National Security, LLC. All rights + * reserved. + * Copyright (c) 2013-2015 Intel, Inc. All rights reserved + * Copyright (c) 2014-2016 Research Organization for Information Science * and Technology (RIST). All rights reserved. + * Copyright (c) 2015-2017 Mellanox Technologies. All rights reserved. + * * $COPYRIGHT$ * * Additional copyrights may follow @@ -33,21 +36,24 @@ #include "opal/dss/dss.h" #include "opal/util/arch.h" #include "opal/util/show_help.h" -#include "opal/mca/dstore/dstore.h" #include "opal/mca/hwloc/base/base.h" #include "opal/mca/pmix/pmix.h" +#include "opal/util/argv.h" #include "ompi/proc/proc.h" #include "ompi/datatype/ompi_datatype.h" #include "ompi/runtime/mpiruntime.h" #include "ompi/runtime/params.h" -static opal_list_t ompi_proc_list; +opal_list_t ompi_proc_list = {{0}}; static opal_mutex_t ompi_proc_lock; +static opal_hash_table_t ompi_proc_hash; + ompi_proc_t* ompi_proc_local_proc = NULL; static void ompi_proc_construct(ompi_proc_t* proc); static void ompi_proc_destruct(ompi_proc_t* proc); +static ompi_proc_t *ompi_proc_for_name_nolock (const opal_process_name_t proc_name); OBJ_CLASS_INSTANCE( ompi_proc_t, @@ -82,51 +88,234 @@ void ompi_proc_destruct(ompi_proc_t* proc) if (NULL != proc->super.proc_hostname) { free(proc->super.proc_hostname); } - OPAL_THREAD_LOCK(&ompi_proc_lock); + opal_mutex_lock (&ompi_proc_lock); opal_list_remove_item(&ompi_proc_list, (opal_list_item_t*)proc); - OPAL_THREAD_UNLOCK(&ompi_proc_lock); + opal_hash_table_remove_value_ptr (&ompi_proc_hash, &proc->super.proc_name, sizeof (proc->super.proc_name)); + opal_mutex_unlock (&ompi_proc_lock); } +/** + * Allocate a new ompi_proc_T for the given jobid/vpid + * + * @param[in] jobid Job identifier + * @param[in] vpid Process identifier + * @param[out] procp New ompi_proc_t structure + * + * This function allocates a new ompi_proc_t and inserts it into + * the process list and hash table. + */ +static int ompi_proc_allocate (ompi_jobid_t jobid, ompi_vpid_t vpid, ompi_proc_t **procp) { + ompi_proc_t *proc = OBJ_NEW(ompi_proc_t); + + opal_list_append(&ompi_proc_list, (opal_list_item_t*)proc); -int ompi_proc_init(void) + OMPI_CAST_RTE_NAME(&proc->super.proc_name)->jobid = jobid; + OMPI_CAST_RTE_NAME(&proc->super.proc_name)->vpid = vpid; + + opal_hash_table_set_value_ptr (&ompi_proc_hash, &proc->super.proc_name, sizeof (proc->super.proc_name), + proc); + + *procp = proc; + + return OMPI_SUCCESS; +} + +/** + * Finish setting up an ompi_proc_t + * + * @param[in] proc ompi process structure + * + * This function contains the core code of ompi_proc_complete_init() and + * ompi_proc_refresh(). The tasks performed by this function include + * retrieving the hostname (if below the modex cutoff), determining the + * remote architecture, and calculating the locality of the process. + */ +int ompi_proc_complete_init_single (ompi_proc_t *proc) { - ompi_vpid_t i; -#if OPAL_ENABLE_HETEROGENEOUS_SUPPORT + uint16_t u16, *u16ptr; int ret; + + u16ptr = &u16; + + if ((OMPI_CAST_RTE_NAME(&proc->super.proc_name)->jobid == OMPI_PROC_MY_NAME->jobid) && + (OMPI_CAST_RTE_NAME(&proc->super.proc_name)->vpid == OMPI_PROC_MY_NAME->vpid)) { + /* nothing else to do */ + return OMPI_SUCCESS; + } + + /* get the locality information - all RTEs are required + * to provide this information at startup */ + OPAL_MODEX_RECV_VALUE_OPTIONAL(ret, OPAL_PMIX_LOCALITY, &proc->super.proc_name, &u16ptr, OPAL_UINT16); + if (OPAL_SUCCESS != ret) { + proc->super.proc_flags = OPAL_PROC_NON_LOCAL; + } else { + proc->super.proc_flags = u16; + } + + /* we can retrieve the hostname at no cost because it + * was provided at startup - but make it optional so + * we don't chase after it if some system doesn't + * provide it */ + proc->super.proc_hostname = NULL; + OPAL_MODEX_RECV_VALUE_OPTIONAL(ret, OPAL_PMIX_HOSTNAME, &proc->super.proc_name, + (char**)&(proc->super.proc_hostname), OPAL_STRING); + +#if OPAL_ENABLE_HETEROGENEOUS_SUPPORT + /* get the remote architecture - this might force a modex except + * for those environments where the RM provides it */ + { + uint32_t *ui32ptr; + ui32ptr = &(proc->super.proc_arch); + OPAL_MODEX_RECV_VALUE(ret, OPAL_PMIX_ARCH, &proc->super.proc_name, + (void**)&ui32ptr, OPAL_UINT32); + if (OPAL_SUCCESS == ret) { + /* if arch is different than mine, create a new convertor for this proc */ + if (proc->super.proc_arch != opal_local_arch) { + OBJ_RELEASE(proc->super.proc_convertor); + proc->super.proc_convertor = opal_convertor_create(proc->super.proc_arch, 0); + } + } else if (OMPI_ERR_NOT_IMPLEMENTED == ret) { + proc->super.proc_arch = opal_local_arch; + } else { + return ret; + } + } +#else + /* must be same arch as my own */ + proc->super.proc_arch = opal_local_arch; #endif + return OMPI_SUCCESS; +} + +opal_proc_t *ompi_proc_lookup (const opal_process_name_t proc_name) +{ + ompi_proc_t *proc = NULL; + int ret; + + /* try to lookup the value in the hash table */ + ret = opal_hash_table_get_value_ptr (&ompi_proc_hash, &proc_name, sizeof (proc_name), (void **) &proc); + + if (OPAL_SUCCESS == ret) { + return &proc->super; + } + + return NULL; +} + +static ompi_proc_t *ompi_proc_for_name_nolock (const opal_process_name_t proc_name) +{ + ompi_proc_t *proc = NULL; + int ret; + + /* double-check that another competing thread has not added this proc */ + ret = opal_hash_table_get_value_ptr (&ompi_proc_hash, &proc_name, sizeof (proc_name), (void **) &proc); + if (OPAL_SUCCESS == ret) { + goto exit; + } + + /* allocate a new ompi_proc_t object for the process and insert it into the process table */ + ret = ompi_proc_allocate (proc_name.jobid, proc_name.vpid, &proc); + if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) { + /* allocation fail */ + goto exit; + } + + /* finish filling in the important proc data fields */ + ret = ompi_proc_complete_init_single (proc); + if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) { + goto exit; + } +exit: + return proc; +} + +opal_proc_t *ompi_proc_for_name (const opal_process_name_t proc_name) +{ + ompi_proc_t *proc = NULL; + int ret; + + /* try to lookup the value in the hash table */ + ret = opal_hash_table_get_value_ptr (&ompi_proc_hash, &proc_name, sizeof (proc_name), (void **) &proc); + if (OPAL_SUCCESS == ret) { + return &proc->super; + } + + opal_mutex_lock (&ompi_proc_lock); + proc = ompi_proc_for_name_nolock (proc_name); + opal_mutex_unlock (&ompi_proc_lock); + + return (opal_proc_t *) proc; +} + +int ompi_proc_init(void) +{ + int opal_proc_hash_init_size = (ompi_process_info.num_procs < ompi_add_procs_cutoff) ? ompi_process_info.num_procs : + 1024; + ompi_proc_t *proc; + int ret; + OBJ_CONSTRUCT(&ompi_proc_list, opal_list_t); OBJ_CONSTRUCT(&ompi_proc_lock, opal_mutex_t); + OBJ_CONSTRUCT(&ompi_proc_hash, opal_hash_table_t); - /* create proc structures and find self */ - for( i = 0; i < ompi_process_info.num_procs; i++ ) { - ompi_proc_t *proc = OBJ_NEW(ompi_proc_t); - opal_list_append(&ompi_proc_list, (opal_list_item_t*)proc); + ret = opal_hash_table_init (&ompi_proc_hash, opal_proc_hash_init_size); + if (OPAL_SUCCESS != ret) { + return ret; + } - OMPI_CAST_RTE_NAME(&proc->super.proc_name)->jobid = OMPI_PROC_MY_NAME->jobid; - OMPI_CAST_RTE_NAME(&proc->super.proc_name)->vpid = i; + /* create a proc for the local process */ + ret = ompi_proc_allocate (OMPI_PROC_MY_NAME->jobid, OMPI_PROC_MY_NAME->vpid, &proc); + if (OMPI_SUCCESS != ret) { + return OMPI_ERR_OUT_OF_RESOURCE; + } - if (i == OMPI_PROC_MY_NAME->vpid) { - ompi_proc_local_proc = proc; - proc->super.proc_flags = OPAL_PROC_ALL_LOCAL; - proc->super.proc_hostname = strdup(ompi_process_info.nodename); - proc->super.proc_arch = opal_local_arch; - /* Register the local proc with OPAL */ - opal_proc_local_set(&proc->super); + /* set local process data */ + ompi_proc_local_proc = proc; + proc->super.proc_flags = OPAL_PROC_ALL_LOCAL; + proc->super.proc_hostname = strdup(ompi_process_info.nodename); + proc->super.proc_arch = opal_local_arch; + /* Register the local proc with OPAL */ + opal_proc_local_set(&proc->super); #if OPAL_ENABLE_HETEROGENEOUS_SUPPORT - /* add our arch to the modex */ - OPAL_MODEX_SEND_VALUE(ret, PMIX_SYNC_REQD, PMIX_GLOBAL, - OPAL_DSTORE_ARCH, &opal_local_arch, OPAL_UINT32); - if (OPAL_SUCCESS != ret) { + /* add our arch to the modex */ + OPAL_MODEX_SEND_VALUE(ret, OPAL_PMIX_GLOBAL, + OPAL_PMIX_ARCH, &opal_local_arch, OPAL_UINT32); + if (OPAL_SUCCESS != ret) { + return ret; + } +#endif + + if (ompi_process_info.num_procs < ompi_add_procs_cutoff) { + /* create proc structures and find self */ + for (ompi_vpid_t i = 0 ; i < ompi_process_info.num_procs ; ++i ) { + if (i == OMPI_PROC_MY_NAME->vpid) { + continue; + } + + ret = ompi_proc_allocate (OMPI_PROC_MY_NAME->jobid, i, &proc); + if (OMPI_SUCCESS != ret) { return ret; } -#endif } } return OMPI_SUCCESS; } +static int ompi_proc_compare_vid (opal_list_item_t **a, opal_list_item_t **b) +{ + ompi_proc_t *proca = (ompi_proc_t *) *a; + ompi_proc_t *procb = (ompi_proc_t *) *b; + + if (proca->super.proc_name.vpid > procb->super.proc_name.vpid) { + return 1; + } else { + return -1; + } + + /* they should never be equal */ +} /** * The process creation is split into two steps. The second step @@ -141,81 +330,42 @@ int ompi_proc_complete_init(void) { ompi_proc_t *proc; int ret, errcode = OMPI_SUCCESS; - opal_list_t myvals; - opal_value_t *kv; - OPAL_THREAD_LOCK(&ompi_proc_lock); + opal_mutex_lock (&ompi_proc_lock); OPAL_LIST_FOREACH(proc, &ompi_proc_list, ompi_proc_t) { - if (OMPI_CAST_RTE_NAME(&proc->super.proc_name)->vpid != OMPI_PROC_MY_NAME->vpid) { - /* get the locality information - do not use modex recv for - * this request as that will automatically cause the hostname - * to be loaded as well. All RTEs are required to provide this - * information at startup for procs on our node. Thus, not - * finding the info indicates that the proc is non-local. - */ - OBJ_CONSTRUCT(&myvals, opal_list_t); - if (OMPI_SUCCESS != (ret = opal_dstore.fetch(opal_dstore_internal, - &proc->super.proc_name, - OPAL_DSTORE_LOCALITY, &myvals))) { - proc->super.proc_flags = OPAL_PROC_NON_LOCAL; - } else { - kv = (opal_value_t*)opal_list_get_first(&myvals); - proc->super.proc_flags = kv->data.uint16; - } - OPAL_LIST_DESTRUCT(&myvals); - - if (ompi_process_info.num_procs < ompi_direct_modex_cutoff) { - /* IF the number of procs falls below the specified cutoff, - * then we assume the job is small enough that retrieving - * the hostname (which will typically cause retrieval of - * ALL modex info for this proc) will have no appreciable - * impact on launch scaling - */ - OPAL_MODEX_RECV_VALUE(ret, OPAL_DSTORE_HOSTNAME, (opal_proc_t*)&proc->super, - (char**)&(proc->super.proc_hostname), OPAL_STRING); - if (OPAL_SUCCESS != ret) { - errcode = ret; - break; - } - } else { - /* just set the hostname to NULL for now - we'll fill it in - * as modex_recv's are called for procs we will talk to, thus - * avoiding retrieval of ALL modex info for this proc until - * required. Transports that delay calling modex_recv until - * first message will therefore scale better than those that - * call modex_recv on all procs during init. - */ - proc->super.proc_hostname = NULL; - } -#if OPAL_ENABLE_HETEROGENEOUS_SUPPORT - /* get the remote architecture - this might force a modex except - * for those environments where the RM provides it */ - { - uint32_t *ui32ptr; - ui32ptr = &(proc->super.proc_arch); - OPAL_MODEX_RECV_VALUE(ret, OPAL_DSTORE_ARCH, (opal_proc_t*)&proc->super, - (void**)&ui32ptr, OPAL_UINT32); - if (OPAL_SUCCESS == ret) { - /* if arch is different than mine, create a new convertor for this proc */ - if (proc->super.proc_arch != opal_local_arch) { - OBJ_RELEASE(proc->super.proc_convertor); - proc->super.proc_convertor = opal_convertor_create(proc->super.proc_arch, 0); - } - } else if (OMPI_ERR_NOT_IMPLEMENTED == ret) { - proc->super.proc_arch = opal_local_arch; - } else { - errcode = ret; - break; + ret = ompi_proc_complete_init_single (proc); + if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) { + errcode = ret; + break; + } + } + opal_mutex_unlock (&ompi_proc_lock); + + if (ompi_process_info.num_procs >= ompi_add_procs_cutoff) { + char *val; + /* retrieve the local peers */ + OPAL_MODEX_RECV_VALUE(ret, OPAL_PMIX_LOCAL_PEERS, + ORTE_PROC_MY_NAME, &val, OPAL_STRING); + if (OPAL_SUCCESS == ret && NULL != val) { + char **peers = opal_argv_split(val, ','); + int i; + free(val); + for (i=0; NULL != peers[i]; i++) { + ompi_vpid_t local_rank = strtoul(peers[i], NULL, 10); + opal_process_name_t proc_name = {.vpid = local_rank, .jobid = OMPI_PROC_MY_NAME->jobid}; + + if (OMPI_PROC_MY_NAME->vpid == local_rank) { + continue; } + (void) ompi_proc_for_name (proc_name); } -#else - /* must be same arch as my own */ - proc->super.proc_arch = opal_local_arch; -#endif + opal_argv_free(peers); } } - OPAL_THREAD_UNLOCK(&ompi_proc_lock); + + opal_list_sort (&ompi_proc_list, ompi_proc_compare_vid); + return errcode; } @@ -251,11 +401,17 @@ int ompi_proc_finalize (void) /* now destruct the list and thread lock */ OBJ_DESTRUCT(&ompi_proc_list); OBJ_DESTRUCT(&ompi_proc_lock); + OBJ_DESTRUCT(&ompi_proc_hash); return OMPI_SUCCESS; } -ompi_proc_t** ompi_proc_world(size_t *size) +int ompi_proc_world_size (void) +{ + return ompi_process_info.num_procs; +} + +ompi_proc_t **ompi_proc_get_allocated (size_t *size) { ompi_proc_t **procs; ompi_proc_t *proc; @@ -271,10 +427,8 @@ ompi_proc_t** ompi_proc_world(size_t *size) my_name = *OMPI_CAST_RTE_NAME(&ompi_proc_local_proc->super.proc_name); /* First count how many match this jobid */ - OPAL_THREAD_LOCK(&ompi_proc_lock); - for (proc = (ompi_proc_t*)opal_list_get_first(&ompi_proc_list); - proc != (ompi_proc_t*)opal_list_get_end(&ompi_proc_list); - proc = (ompi_proc_t*)opal_list_get_next(proc)) { + opal_mutex_lock (&ompi_proc_lock); + OPAL_LIST_FOREACH(proc, &ompi_proc_list, ompi_proc_t) { if (OPAL_EQUAL == ompi_rte_compare_name_fields(mask, OMPI_CAST_RTE_NAME(&proc->super.proc_name), &my_name)) { ++count; } @@ -283,15 +437,13 @@ ompi_proc_t** ompi_proc_world(size_t *size) /* allocate an array */ procs = (ompi_proc_t**) malloc(count * sizeof(ompi_proc_t*)); if (NULL == procs) { - OPAL_THREAD_UNLOCK(&ompi_proc_lock); + opal_mutex_unlock (&ompi_proc_lock); return NULL; } /* now save only the procs that match this jobid */ count = 0; - for (proc = (ompi_proc_t*)opal_list_get_first(&ompi_proc_list); - proc != (ompi_proc_t*)opal_list_get_end(&ompi_proc_list); - proc = (ompi_proc_t*)opal_list_get_next(proc)) { + OPAL_LIST_FOREACH(proc, &ompi_proc_list, ompi_proc_t) { if (OPAL_EQUAL == ompi_rte_compare_name_fields(mask, &proc->super.proc_name, &my_name)) { /* DO NOT RETAIN THIS OBJECT - the reference count on this * object will be adjusted by external callers. The intent @@ -310,9 +462,55 @@ ompi_proc_t** ompi_proc_world(size_t *size) procs[count++] = proc; } } - OPAL_THREAD_UNLOCK(&ompi_proc_lock); + opal_mutex_unlock (&ompi_proc_lock); + + *size = count; + return procs; +} + +ompi_proc_t **ompi_proc_world (size_t *size) +{ + ompi_proc_t **procs; + size_t count = 0; + + /* check bozo case */ + if (NULL == ompi_proc_local_proc) { + return NULL; + } + + /* First count how many match this jobid (we already know this from our process info) */ + count = ompi_process_info.num_procs; + + /* allocate an array */ + procs = (ompi_proc_t **) malloc (count * sizeof(ompi_proc_t*)); + if (NULL == procs) { + return NULL; + } + + /* now get/allocate all the procs in this jobid */ + for (size_t i = 0 ; i < count ; ++i) { + opal_process_name_t name = {.jobid = OMPI_CAST_RTE_NAME(&ompi_proc_local_proc->super.proc_name)->jobid, + .vpid = i}; + + /* DO NOT RETAIN THIS OBJECT - the reference count on this + * object will be adjusted by external callers. The intent + * here is to allow the reference count to drop to zero if + * the app no longer desires to communicate with this proc. + * For example, the proc may call comm_disconnect on all + * communicators involving this proc. In such cases, we want + * the proc object to be removed from the list. By not incrementing + * the reference count here, we allow this to occur. + * + * We don't implement that yet, but we are still safe for now as + * the OBJ_NEW in ompi_proc_init owns the initial reference + * count which cannot be released until ompi_proc_finalize is + * called. + */ + procs[i] = (ompi_proc_t*)ompi_proc_for_name (name); + } *size = count; + return procs; } @@ -328,10 +526,8 @@ ompi_proc_t** ompi_proc_all(size_t* size) return NULL; } - OPAL_THREAD_LOCK(&ompi_proc_lock); - for(proc = (ompi_proc_t*)opal_list_get_first(&ompi_proc_list); - proc != (ompi_proc_t*)opal_list_get_end(&ompi_proc_list); - proc = (ompi_proc_t*)opal_list_get_next(proc)) { + opal_mutex_lock (&ompi_proc_lock); + OPAL_LIST_FOREACH(proc, &ompi_proc_list, ompi_proc_t) { /* We know this isn't consistent with the behavior in ompi_proc_world, * but we are leaving the RETAIN for now because the code using this function * assumes that the results need to be released when done. It will @@ -341,7 +537,7 @@ ompi_proc_t** ompi_proc_all(size_t* size) OBJ_RETAIN(proc); procs[count++] = proc; } - OPAL_THREAD_UNLOCK(&ompi_proc_lock); + opal_mutex_unlock (&ompi_proc_lock); *size = count; return procs; } @@ -372,16 +568,14 @@ ompi_proc_t * ompi_proc_find ( const ompi_process_name_t * name ) /* return the proc-struct which matches this jobid+process id */ mask = OMPI_RTE_CMP_JOBID | OMPI_RTE_CMP_VPID; - OPAL_THREAD_LOCK(&ompi_proc_lock); - for(proc = (ompi_proc_t*)opal_list_get_first(&ompi_proc_list); - proc != (ompi_proc_t*)opal_list_get_end(&ompi_proc_list); - proc = (ompi_proc_t*)opal_list_get_next(proc)) { + opal_mutex_lock (&ompi_proc_lock); + OPAL_LIST_FOREACH(proc, &ompi_proc_list, ompi_proc_t) { if (OPAL_EQUAL == ompi_rte_compare_name_fields(mask, &proc->super.proc_name, name)) { rproc = proc; break; } } - OPAL_THREAD_UNLOCK(&ompi_proc_lock); + opal_mutex_unlock (&ompi_proc_lock); return rproc; } @@ -390,19 +584,12 @@ ompi_proc_t * ompi_proc_find ( const ompi_process_name_t * name ) int ompi_proc_refresh(void) { ompi_proc_t *proc = NULL; - opal_list_item_t *item = NULL; ompi_vpid_t i = 0; int ret=OMPI_SUCCESS; - opal_list_t myvals; - opal_value_t *kv; - - OPAL_THREAD_LOCK(&ompi_proc_lock); - for( item = opal_list_get_first(&ompi_proc_list), i = 0; - item != opal_list_get_end(&ompi_proc_list); - item = opal_list_get_next(item), ++i ) { - proc = (ompi_proc_t*)item; + opal_mutex_lock (&ompi_proc_lock); + OPAL_LIST_FOREACH(proc, &ompi_proc_list, ompi_proc_t) { /* Does not change: proc->super.proc_name.vpid */ OMPI_CAST_RTE_NAME(&proc->super.proc_name)->jobid = OMPI_PROC_MY_NAME->jobid; @@ -416,81 +603,27 @@ int ompi_proc_refresh(void) proc->super.proc_arch = opal_local_arch; opal_proc_local_set(&proc->super); } else { - /* get the locality information - do not use modex recv for - * this request as that will automatically cause the hostname - * to be loaded as well. All RTEs are required to provide this - * information at startup for procs on our node. Thus, not - * finding the info indicates that the proc is non-local. - */ - OBJ_CONSTRUCT(&myvals, opal_list_t); - if (OMPI_SUCCESS != (ret = opal_dstore.fetch(opal_dstore_internal, - &proc->super.proc_name, - OPAL_DSTORE_LOCALITY, &myvals))) { - proc->super.proc_flags = OPAL_PROC_NON_LOCAL; - } else { - kv = (opal_value_t*)opal_list_get_first(&myvals); - proc->super.proc_flags = kv->data.uint16; - } - OPAL_LIST_DESTRUCT(&myvals); - - if (ompi_process_info.num_procs < ompi_direct_modex_cutoff) { - /* IF the number of procs falls below the specified cutoff, - * then we assume the job is small enough that retrieving - * the hostname (which will typically cause retrieval of - * ALL modex info for this proc) will have no appreciable - * impact on launch scaling - */ - OPAL_MODEX_RECV_VALUE(ret, OPAL_DSTORE_HOSTNAME, (opal_proc_t*)&proc->super, - (char**)&(proc->super.proc_hostname), OPAL_STRING); - if (OMPI_SUCCESS != ret) { - break; - } - } else { - /* just set the hostname to NULL for now - we'll fill it in - * as modex_recv's are called for procs we will talk to, thus - * avoiding retrieval of ALL modex info for this proc until - * required. Transports that delay calling modex_recv until - * first message will therefore scale better than those that - * call modex_recv on all procs during init. - */ - proc->super.proc_hostname = NULL; - } -#if OPAL_ENABLE_HETEROGENEOUS_SUPPORT - { - /* get the remote architecture */ - uint32_t* uiptr = &(proc->super.proc_arch); - OPAL_MODEX_RECV_VALUE(ret, OPAL_DSTORE_ARCH, (opal_proc_t*)&proc->super, - (void**)&uiptr, OPAL_UINT32); - if (OMPI_SUCCESS != ret) { - break; - } - /* if arch is different than mine, create a new convertor for this proc */ - if (proc->super.proc_arch != opal_local_arch) { - OBJ_RELEASE(proc->super.proc_convertor); - proc->super.proc_convertor = opal_convertor_create(proc->super.proc_arch, 0); - } + ret = ompi_proc_complete_init_single (proc); + if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) { + break; } -#else - /* must be same arch as my own */ - proc->super.proc_arch = opal_local_arch; -#endif } } - OPAL_THREAD_UNLOCK(&ompi_proc_lock); + opal_mutex_unlock (&ompi_proc_lock); - return ret; + return ret; } int ompi_proc_pack(ompi_proc_t **proclist, int proclistsize, - bool full_info, opal_buffer_t* buf) { - int i, rc; - - OPAL_THREAD_LOCK(&ompi_proc_lock); - + int rc; + char *nspace; + + opal_mutex_lock (&ompi_proc_lock); + /* cycle through the provided array, packing the OMPI level * data for each proc. This data may or may not be included * in any subsequent modex operation, so we include it here @@ -503,121 +636,87 @@ ompi_proc_pack(ompi_proc_t **proclist, int proclistsize, * reduced. For now, just go ahead and pack the info so it * can be sent. */ - for (i=0; isuper.proc_name), 1, OMPI_NAME); + for (int i = 0 ; i < proclistsize ; ++i) { + ompi_proc_t *proc = proclist[i]; + + if (ompi_proc_is_sentinel (proc)) { + proc = ompi_proc_for_name_nolock (ompi_proc_sentinel_to_name ((uintptr_t) proc)); + } + + /* send proc name */ + rc = opal_dss.pack(buf, &(proc->super.proc_name), 1, OMPI_NAME); if(rc != OPAL_SUCCESS) { OMPI_ERROR_LOG(rc); - OPAL_THREAD_UNLOCK(&ompi_proc_lock); + opal_mutex_unlock (&ompi_proc_lock); return rc; } - if (full_info) { - int32_t num_entries; - opal_value_t *kv; - opal_list_t data; - - /* fetch all info we know about the peer - while - * the remote procs may already know some of it, we cannot - * be certain they do. So we must include a full dump of - * everything we know about this proc - */ - OBJ_CONSTRUCT(&data, opal_list_t); - rc = opal_dstore.fetch(opal_dstore_internal, - &proclist[i]->super.proc_name, - NULL, &data); - if (OPAL_SUCCESS != rc) { - OMPI_ERROR_LOG(rc); - num_entries = 0; - } else { - /* count the number of entries we will send */ - num_entries = opal_list_get_size(&data); - } - - /* put the number of entries into the buffer */ - rc = opal_dss.pack(buf, &num_entries, 1, OPAL_INT32); - if (OPAL_SUCCESS != rc) { - OMPI_ERROR_LOG(rc); - break; - } - - /* if there are entries, store them */ - while (NULL != (kv = (opal_value_t*)opal_list_remove_first(&data))) { - if (OPAL_SUCCESS != (rc = opal_dss.pack(buf, &kv, 1, OPAL_VALUE))) { - OMPI_ERROR_LOG(rc); - break; - } - OBJ_RELEASE(kv); - } - OBJ_DESTRUCT(&data); - - } else { - rc = opal_dss.pack(buf, &(proclist[i]->super.proc_arch), 1, OPAL_UINT32); - if(rc != OPAL_SUCCESS) { - OMPI_ERROR_LOG(rc); - OPAL_THREAD_UNLOCK(&ompi_proc_lock); - return rc; - } - rc = opal_dss.pack(buf, &(proclist[i]->super.proc_hostname), 1, OPAL_STRING); - if(rc != OPAL_SUCCESS) { - OMPI_ERROR_LOG(rc); - OPAL_THREAD_UNLOCK(&ompi_proc_lock); - return rc; - } + /* retrieve and send the corresponding nspace for this job + * as the remote side may not know the translation */ + nspace = (char*)opal_pmix.get_nspace(proc->super.proc_name.jobid); + rc = opal_dss.pack(buf, &nspace, 1, OPAL_STRING); + if(rc != OPAL_SUCCESS) { + OMPI_ERROR_LOG(rc); + opal_mutex_unlock (&ompi_proc_lock); + return rc; + } + /* pack architecture flag */ + rc = opal_dss.pack(buf, &(proc->super.proc_arch), 1, OPAL_UINT32); + if(rc != OPAL_SUCCESS) { + OMPI_ERROR_LOG(rc); + opal_mutex_unlock (&ompi_proc_lock); + return rc; + } + /* pass the name of the host this proc is on */ + rc = opal_dss.pack(buf, &(proc->super.proc_hostname), 1, OPAL_STRING); + if(rc != OPAL_SUCCESS) { + OMPI_ERROR_LOG(rc); + opal_mutex_unlock (&ompi_proc_lock); + return rc; } } - OPAL_THREAD_UNLOCK(&ompi_proc_lock); + opal_mutex_unlock (&ompi_proc_lock); return OMPI_SUCCESS; } -static ompi_proc_t * +ompi_proc_t * ompi_proc_find_and_add(const ompi_process_name_t * name, bool* isnew) { ompi_proc_t *proc, *rproc = NULL; ompi_rte_cmp_bitmask_t mask; - + /* return the proc-struct which matches this jobid+process id */ mask = OMPI_RTE_CMP_JOBID | OMPI_RTE_CMP_VPID; - OPAL_THREAD_LOCK(&ompi_proc_lock); - for(proc = (ompi_proc_t*)opal_list_get_first(&ompi_proc_list); - proc != (ompi_proc_t*)opal_list_get_end(&ompi_proc_list); - proc = (ompi_proc_t*)opal_list_get_next(proc)) { + opal_mutex_lock (&ompi_proc_lock); + OPAL_LIST_FOREACH(proc, &ompi_proc_list, ompi_proc_t) { if (OPAL_EQUAL == ompi_rte_compare_name_fields(mask, &proc->super.proc_name, name)) { rproc = proc; *isnew = false; break; } } - + /* if we didn't find this proc in the list, create a new * proc_t and append it to the list */ if (NULL == rproc) { *isnew = true; rproc = OBJ_NEW(ompi_proc_t); - if (NULL != rproc) { - opal_list_append(&ompi_proc_list, (opal_list_item_t*)rproc); - *OMPI_CAST_RTE_NAME(&rproc->super.proc_name) = *name; - } - /* caller had better fill in the rest of the proc, or there's - going to be pain later... */ + ompi_proc_allocate (name->jobid, name->vpid, &rproc); } - - OPAL_THREAD_UNLOCK(&ompi_proc_lock); - + + opal_mutex_unlock (&ompi_proc_lock); + return rproc; } int -ompi_proc_unpack(opal_buffer_t* buf, +ompi_proc_unpack(opal_buffer_t* buf, int proclistsize, ompi_proc_t ***proclist, - bool full_info, int *newproclistsize, ompi_proc_t ***newproclist) { - int i; size_t newprocs_len = 0; ompi_proc_t **plist=NULL, **newprocs = NULL; - opal_list_t myvals; - opal_value_t *kv; /* do not free plist *ever*, since it is used in the remote group structure of a communicator */ @@ -635,13 +734,14 @@ ompi_proc_unpack(opal_buffer_t* buf, /* cycle through the array of provided procs and unpack * their info - as packed by ompi_proc_pack */ - for ( i=0; idata.uint32; - } else { - new_arch = opal_local_arch; - } - OPAL_LIST_DESTRUCT(&myvals); -#else - new_arch = opal_local_arch; -#endif - if (ompi_process_info.num_procs < ompi_direct_modex_cutoff) { - /* retrieve the hostname */ - OBJ_CONSTRUCT(&myvals, opal_list_t); - rc = opal_dstore.fetch(opal_dstore_internal, - &new_name, - OPAL_DSTORE_HOSTNAME, &myvals); - if( OPAL_SUCCESS == rc ) { - kv = (opal_value_t*)opal_list_get_first(&myvals); - new_hostname = strdup(kv->data.string); - } else { - new_hostname = NULL; - } - OPAL_LIST_DESTRUCT(&myvals); - } else { - /* just set the hostname to NULL for now - we'll fill it in - * as modex_recv's are called for procs we will talk to - */ - new_hostname = NULL; - } - } /* update all the values */ plist[i]->super.proc_arch = new_arch; /* if arch is different than mine, create a new convertor for this proc */ @@ -753,7 +792,7 @@ ompi_proc_unpack(opal_buffer_t* buf, #else opal_show_help("help-mpi-runtime.txt", "heterogeneous-support-unavailable", - true, ompi_process_info.nodename, + true, ompi_process_info.nodename, new_hostname == NULL ? "" : new_hostname); free(plist); @@ -770,28 +809,6 @@ ompi_proc_unpack(opal_buffer_t* buf, /* Save the hostname */ plist[i]->super.proc_hostname = new_hostname; } - - } else { - if (full_info) { - int32_t num_recvd_entries; - int32_t j, cnt; - - /* discard all keys: they are already locally known */ - cnt = 1; - if (OPAL_SUCCESS == (rc = opal_dss.unpack(buf, &num_recvd_entries, &cnt, OPAL_INT32))) { - for (j = 0; j < num_recvd_entries; j++) { - opal_value_t *kv; - cnt = 1; - if (OPAL_SUCCESS != (rc = opal_dss.unpack(buf, &kv, &cnt, OPAL_VALUE))) { - OMPI_ERROR_LOG(rc); - continue; - } - OBJ_RELEASE(kv); - } - } else { - OMPI_ERROR_LOG(rc); - } - } } } diff --git a/ompi/proc/proc.h b/ompi/proc/proc.h index 6fa7c14b141..1d42ac5f30e 100644 --- a/ompi/proc/proc.h +++ b/ompi/proc/proc.h @@ -5,18 +5,20 @@ * Copyright (c) 2004-2011 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2006-2012 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2007-2012 Los Alamos National Security, LLC. All rights - * reserved. + * reserved. * Copyright (c) 2013-2014 Intel, Inc. All rights reserved + * Copyright (c) 2015-2016 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -47,7 +49,7 @@ BEGIN_C_DECLS /** * Remote Open MPI process structure - * + * * Remote Open MPI process structure. Each process contains exactly * one ompi_proc_t structure for each remote process it knows about. * @@ -59,15 +61,20 @@ BEGIN_C_DECLS * should call OMPI_REQUIRE_ENDPOINT_TAG(). Requests which * share the same name will have the same value, allowing * cross-component sharing of endpoint data. The tag may be referenced - * by the pre-processor define OMPI_PROC_ENDPOINT_TAG_. Adding + * by the pre-processor define OMPI_PROC_ENDPOINT_TAG_. Adding * a tag increases the memory consumed by Open MPI, so should only be done * if unavoidable. */ + +#define OMPI_PROC_PADDING_SIZE 16 + struct ompi_proc_t { opal_proc_t super; /* endpoint data */ void *proc_endpoints[OMPI_PROC_ENDPOINT_TAG_MAX]; + + char padding[OMPI_PROC_PADDING_SIZE]; /* for future extensions (OSHMEM uses this area also)*/ }; typedef struct ompi_proc_t ompi_proc_t; OBJ_CLASS_DECLARATION(ompi_proc_t); @@ -83,7 +90,7 @@ OBJ_CLASS_DECLARATION(ompi_proc_t); * Please use ompi_proc_local() instead. */ OMPI_DECLSPEC extern ompi_proc_t* ompi_proc_local_proc; - +OMPI_DECLSPEC extern opal_list_t ompi_proc_list; /* ******************************************************************** */ @@ -118,6 +125,18 @@ OMPI_DECLSPEC int ompi_proc_init(void); */ OMPI_DECLSPEC int ompi_proc_complete_init(void); +/** + * Complete filling up the proc information (arch, name and locality) for + * a given proc. This function is to be called only after the modex exchange + * has been completed. + * + * @param[in] proc the proc whose information will be filled up + * + * @retval OMPI_SUCCESS All information correctly set. + * @retval OMPI_ERROR Some info could not be initialized. + */ +OMPI_DECLSPEC int ompi_proc_complete_init_single(ompi_proc_t* proc); + /** * Finalize the OMPI Process subsystem * @@ -136,7 +155,10 @@ OMPI_DECLSPEC int ompi_proc_finalize(void); * Returns the list of proc instances associated with this job. Given * the current association between a job and an MPI_COMM_WORLD, this * function provides the process instances for the current - * MPI_COMM_WORLD. + * MPI_COMM_WORLD. Use this function only if absolutely needed as it + * will cause ompi_proc_t objects to be allocated for every process in + * the job. If you only need the allocated ompi_proc_t objects call + * ompi_proc_get_allocated() instead. * * @note The reference count of each process in the array is * NOT incremented - the caller is responsible for ensuring the @@ -150,6 +172,36 @@ OMPI_DECLSPEC int ompi_proc_finalize(void); */ OMPI_DECLSPEC ompi_proc_t** ompi_proc_world(size_t* size); +/** + * Returns the number of processes in the associated with this job. + * + * Returns the list of proc instances associated with this job. Given + * the current association between a job and an MPI_COMM_WORLD, this + * function provides the number of processes for the current + * MPI_COMM_WORLD. + */ + +OMPI_DECLSPEC int ompi_proc_world_size (void); + +/** + * Returns the list of proc instances associated with this job. + * + * Returns the list of proc instances associated with this job that have + * already been allocated. Given the current association between a job + * and an MPI_COMM_WORLD, this function provides the allocated process + * instances for the current MPI_COMM_WORLD. + * + * @note The reference count of each process in the array is + * NOT incremented - the caller is responsible for ensuring the + * correctness of the reference count once they are done with + * the array. + * + * @param[in] size Number of processes in the ompi_proc_t array + * + * @return Array of pointers to allocated proc instances in the current + * MPI_COMM_WORLD, or NULL if there is an internal failure. + */ +OMPI_DECLSPEC ompi_proc_t **ompi_proc_get_allocated (size_t *size); /** * Returns the list of all known proc instances. @@ -200,14 +252,14 @@ OMPI_DECLSPEC ompi_proc_t** ompi_proc_self(size_t* size); * * @return Pointer to the local process structure */ -static inline ompi_proc_t* ompi_proc_local(void) -{ +static inline ompi_proc_t* ompi_proc_local(void) +{ return ompi_proc_local_proc; } /** - * Returns the proc instance for a given name + * Returns the proc instance for a given name * * Returns the proc instance for the specified process name. The * reference count for the proc instance is not incremented by this @@ -219,6 +271,8 @@ static inline ompi_proc_t* ompi_proc_local(void) */ OMPI_DECLSPEC ompi_proc_t * ompi_proc_find ( const ompi_process_name_t* name ); +OMPI_DECLSPEC ompi_proc_t * ompi_proc_find_and_add(const ompi_process_name_t * name, bool* isnew); + /** * Pack proc list into portable buffer * @@ -228,17 +282,17 @@ OMPI_DECLSPEC ompi_proc_t * ompi_proc_find ( const ompi_process_name_t* name ); * process name, the architecture, and the hostname. Ordering is * maintained. The buffer is packed to be sent to a remote node with * different architecture (endian or word size). - * + * * @param[in] proclist List of process pointers * @param[in] proclistsize Length of the proclist array - * @param[in,out] buf An opal_buffer containing the packed names. + * @param[in,out] buf An opal_buffer containing the packed names. * The buffer must be constructed but empty when * passed to this function * @retval OMPI_SUCCESS Success * @retval OMPI_ERROR Unspecified error */ -OMPI_DECLSPEC int ompi_proc_pack(ompi_proc_t **proclist, int proclistsize, - bool full_info, +OMPI_DECLSPEC int ompi_proc_pack(ompi_proc_t **proclist, + int proclistsize, opal_buffer_t *buf); @@ -281,10 +335,11 @@ OMPI_DECLSPEC int ompi_proc_pack(ompi_proc_t **proclist, int proclistsize, * OMPI_SUCCESS on success * OMPI_ERROR else */ -OMPI_DECLSPEC int ompi_proc_unpack(opal_buffer_t *buf, - int proclistsize, ompi_proc_t ***proclist, - bool full_info, - int *newproclistsize, ompi_proc_t ***newproclist); +OMPI_DECLSPEC int ompi_proc_unpack(opal_buffer_t *buf, + int proclistsize, + ompi_proc_t ***proclist, + int *newproclistsize, + ompi_proc_t ***newproclist); /** * Refresh the OMPI process subsystem @@ -301,6 +356,94 @@ OMPI_DECLSPEC int ompi_proc_unpack(opal_buffer_t *buf, */ OMPI_DECLSPEC int ompi_proc_refresh(void); +/** + * Get the ompi_proc_t for a given process name + * + * @param[in] proc_name opal process name + * + * @returns cached or new ompi_proc_t for the given process name + * + * This function looks up the given process name in the hash of existing + * ompi_proc_t structures. If no ompi_proc_t structure exists matching the + * given name a new ompi_proc_t is allocated, initialized, and returned. + * + * @note The ompi_proc_t is added to the local list of processes but is not + * added to any communicator. ompi_comm_peer_lookup is responsible for caching + * the ompi_proc_t on a communicator. + */ +OMPI_DECLSPEC opal_proc_t *ompi_proc_for_name (const opal_process_name_t proc_name); + + +OMPI_DECLSPEC opal_proc_t *ompi_proc_lookup (const opal_process_name_t proc_name); + +/** + * Check if an ompi_proc_t is a sentinel + */ +static inline bool ompi_proc_is_sentinel (ompi_proc_t *proc) +{ + return (intptr_t) proc & 0x1; +} + +#if OPAL_SIZEOF_PROCESS_NAME_T == SIZEOF_VOID_P +/* + * we assume an ompi_proc_t is at least aligned on two bytes, + * so if the LSB of a pointer to an ompi_proc_t is 1, we have to handle + * this pointer as a sentinel instead of a pointer. + * a sentinel can be seen as an uint64_t with the following format : + * - bit 0 : 1 + * - bits 1-15 : local jobid + * - bits 16-31 : job family + * - bits 32-63 : vpid + */ +static inline uintptr_t ompi_proc_name_to_sentinel (opal_process_name_t name) +{ + uintptr_t tmp, sentinel = 0; + /* local jobid must fit in 15 bits */ + assert(! (OMPI_LOCAL_JOBID(name.jobid) & 0x8000)); + sentinel |= 0x1; + tmp = (uintptr_t)OMPI_LOCAL_JOBID(name.jobid); + sentinel |= ((tmp << 1) & 0xfffe); + tmp = (uintptr_t)OMPI_JOB_FAMILY(name.jobid); + sentinel |= ((tmp << 16) & 0xffff0000); + tmp = (uintptr_t)name.vpid; + sentinel |= ((tmp << 32) & 0xffffffff00000000); + return sentinel; +} + +static inline opal_process_name_t ompi_proc_sentinel_to_name (uintptr_t sentinel) +{ + opal_process_name_t name; + uint32_t local, family; + uint32_t vpid; + assert(sentinel & 0x1); + local = (sentinel >> 1) & 0x7fff; + family = (sentinel >> 16) & 0xffff; + vpid = (sentinel >> 32) & 0xffffffff; + name.jobid = OMPI_CONSTRUCT_JOBID(family,local); + name.vpid = vpid; + return name; +} +#elif 4 == SIZEOF_VOID_P +/* + * currently, a sentinel is only made from the current jobid aka OMPI_PROC_MY_NAME->jobid + * so we only store the first 31 bits of the vpid + */ +static inline uintptr_t ompi_proc_name_to_sentinel (opal_process_name_t name) +{ + assert(OMPI_PROC_MY_NAME->jobid == name.jobid); + return (uintptr_t)((name.vpid <<1) | 0x1); +} + +static inline opal_process_name_t ompi_proc_sentinel_to_name (uintptr_t sentinel) +{ + opal_process_name_t name; + name.jobid = OMPI_PROC_MY_NAME->jobid; + name.vpid = sentinel >> 1; + return name; +} +#else +#error unsupported pointer size +#endif END_C_DECLS diff --git a/ompi/request/Makefile.am b/ompi/request/Makefile.am index 14732387213..e4a1254e04a 100644 --- a/ompi/request/Makefile.am +++ b/ompi/request/Makefile.am @@ -6,14 +6,17 @@ # Copyright (c) 2004-2005 The University of Tennessee and The University # of Tennessee Research Foundation. All rights # reserved. -# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, # University of Stuttgart. All rights reserved. # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. +# Copyright (c) 2015 Los Alamos National Security, LLC. +# All rights reserved. +# Copyright (c) 2016 IBM Corporation. All rights reserved. # $COPYRIGHT$ -# +# # Additional copyrights may follow -# +# # $HEADER$ # @@ -25,7 +28,7 @@ headers += \ request/request.h \ request/request_dbg.h -libmpi_la_SOURCES += \ +lib@OMPI_LIBMPI_NAME@_la_SOURCES += \ request/grequest.c \ request/request.c \ request/req_test.c \ diff --git a/ompi/request/grequest.c b/ompi/request/grequest.c index 6ac5e9a9517..10d8885d82d 100644 --- a/ompi/request/grequest.c +++ b/ompi/request/grequest.c @@ -2,19 +2,19 @@ * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. - * Copyright (c) 2004-2005 The University of Tennessee and The University + * Copyright (c) 2004-2016 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2006-2012 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2009 Sun Microsystems, Inc. All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -50,10 +50,10 @@ static int ompi_grequest_cancel(ompi_request_t* req, int flag) if (greq->greq_cancel.c_cancel != NULL) { if (greq->greq_funcs_are_c) { - rc = greq->greq_cancel.c_cancel(greq->greq_state, - greq->greq_base.req_complete); + rc = greq->greq_cancel.c_cancel(greq->greq_state, + REQUEST_COMPLETE(&greq->greq_base)); } else { - fflag = (ompi_fortran_logical_t) greq->greq_base.req_complete; + fflag = (ompi_fortran_logical_t) REQUEST_COMPLETE(&greq->greq_base); greq->greq_cancel.f_cancel((MPI_Aint*)greq->greq_state, &fflag, &ierr); rc = OMPI_FINT_2_INT(ierr); } @@ -89,7 +89,7 @@ static void ompi_grequest_construct(ompi_grequest_t* greq) * object. * * 2. Call MPI_REQUEST_FREE and then (!) -- with some other - * still-valid copy of the handler -- call MPI_GREQUEST_COMPLETE. + * still-valid copy of the handler -- call MPI_GREQUEST_COMPLETE. * * 3. Reverse the order of #2 -- call MPI_GREQUEST_COMPLETE and then * MPI_REQUEST_FREE. @@ -161,7 +161,7 @@ int ompi_grequest_start( greq->greq_state = gstate; greq->greq_query.c_query = gquery_fn; greq->greq_free.c_free = gfree_fn; - greq->greq_cancel.c_cancel = gcancel_fn; + greq->greq_cancel.c_cancel = gcancel_fn; greq->greq_base.req_status = ompi_status_empty; *request = &greq->greq_base; @@ -181,9 +181,7 @@ int ompi_grequest_complete(ompi_request_t *req) { int rc; - OPAL_THREAD_LOCK(&ompi_request_lock); rc = ompi_request_complete(req, true); - OPAL_THREAD_UNLOCK(&ompi_request_lock); OBJ_RELEASE(req); return rc; } @@ -192,7 +190,7 @@ int ompi_grequest_complete(ompi_request_t *req) /* * Grequest queries are invoked in two places: * - * 1. MPI_TEST* / MPI_WAIT*, when requests have completed. + * 1. MPI_TEST* / MPI_WAIT*, when requests have completed. * * 2. MPI_REQUEST_GET_STATUS, when requests may or may not have * completed. diff --git a/ompi/request/grequest.h b/ompi/request/grequest.h index 9ef38639cd6..c1b7fb71c4e 100644 --- a/ompi/request/grequest.h +++ b/ompi/request/grequest.h @@ -5,15 +5,15 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2006 Cisco Systems, Inc. All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -29,19 +29,19 @@ OMPI_DECLSPEC OBJ_CLASS_DECLARATION(ompi_grequest_t); /** * Fortran type for generalized request query function */ -typedef void (MPI_F_Grequest_query_function)(MPI_Aint *extra_state, - MPI_Fint *status, +typedef void (MPI_F_Grequest_query_function)(MPI_Aint *extra_state, + MPI_Fint *status, MPI_Fint *ierr); /** * Fortran type for generalized request free function */ -typedef void (MPI_F_Grequest_free_function)(MPI_Aint *extra_state, +typedef void (MPI_F_Grequest_free_function)(MPI_Aint *extra_state, MPI_Fint *ierr); /** * Fortran type for generalized request cancel function */ -typedef void (MPI_F_Grequest_cancel_function)(MPI_Aint *extra_state, - ompi_fortran_logical_t *complete, +typedef void (MPI_F_Grequest_cancel_function)(MPI_Aint *extra_state, + ompi_fortran_logical_t *complete, MPI_Fint *ierr); /** diff --git a/ompi/request/req_test.c b/ompi/request/req_test.c index b4853cdd6bd..241f7910593 100644 --- a/ompi/request/req_test.c +++ b/ompi/request/req_test.c @@ -1,8 +1,9 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ /* * Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. - * Copyright (c) 2004-2013 The University of Tennessee and The University + * Copyright (c) 2004-2016 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, @@ -12,6 +13,8 @@ * Copyright (c) 2006-2008 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2010-2012 Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2012 Oak Ridge National Labs. All rights reserved. + * Copyright (c) 2015 Los Alamos National Security, LLC. All rights + * reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -25,13 +28,12 @@ #include "ompi/request/request_default.h" #include "ompi/request/grequest.h" -#include "ompi/mca/crcp/crcp.h" - -int ompi_request_default_test( ompi_request_t ** rptr, - int *completed, - ompi_status_public_t * status ) +int ompi_request_default_test(ompi_request_t ** rptr, + int *completed, + ompi_status_public_t * status ) { ompi_request_t *request = *rptr; + #if OPAL_ENABLE_PROGRESS_THREADS == 0 int do_it_once = 0; @@ -46,8 +48,7 @@ int ompi_request_default_test( ompi_request_t ** rptr, return OMPI_SUCCESS; } - if (request->req_complete) { - OMPI_CRCP_REQUEST_COMPLETE(request); + if( REQUEST_COMPLETE(request) ) { *completed = true; /* For a generalized request, we *have* to call the query_fn @@ -117,8 +118,7 @@ int ompi_request_default_test_any( continue; } - if( request->req_complete ) { - OMPI_CRCP_REQUEST_COMPLETE(request); + if( REQUEST_COMPLETE(request) ) { *index = i; *completed = true; @@ -193,8 +193,7 @@ int ompi_request_default_test_all( request = *rptr; if( request->req_state == OMPI_REQUEST_INACTIVE || - request->req_complete) { - OMPI_CRCP_REQUEST_COMPLETE(request); + REQUEST_COMPLETE(request) ) { num_completed++; } } @@ -295,8 +294,7 @@ int ompi_request_default_test_some( num_requests_null_inactive++; continue; } - if (true == request->req_complete) { - OMPI_CRCP_REQUEST_COMPLETE(request); + if( REQUEST_COMPLETE(request) ) { indices[num_requests_done++] = i; } } diff --git a/ompi/request/req_wait.c b/ompi/request/req_wait.c index 8c4e2ce9331..21afa95f348 100644 --- a/ompi/request/req_wait.c +++ b/ompi/request/req_wait.c @@ -1,21 +1,25 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ /* * Copyright (c) 2004-2010 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. - * Copyright (c) 2004-2013 The University of Tennessee and The University + * Copyright (c) 2004-2016 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2006-2008 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2010-2012 Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2012 Oak Ridge National Labs. All rights reserved. + * Copyright (c) 2016 Los Alamos National Security, LLC. All rights + * reserved. + * Copyright (c) 2016 Mellanox Technologies. All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -25,13 +29,8 @@ #include "ompi/request/request_default.h" #include "ompi/request/grequest.h" -#include "opal/runtime/opal_cr.h" -#include "ompi/mca/crcp/crcp.h" #include "ompi/mca/pml/base/pml_base_request.h" -#if OPAL_ENABLE_PROGRESS_THREADS -static int ompi_progress_thread_count=0; -#endif int ompi_request_default_wait( ompi_request_t ** req_ptr, @@ -41,10 +40,7 @@ int ompi_request_default_wait( ompi_request_wait_completion(req); -#if OPAL_ENABLE_FT_CR == 1 - OMPI_CRCP_REQUEST_COMPLETE(req); -#endif - + /* return status. If it's a generalized request, we *have* to invoke the query_fn, even if the user procided STATUS_IGNORE. MPI-2:8.2. */ @@ -83,136 +79,105 @@ int ompi_request_default_wait( } -int ompi_request_default_wait_any( - size_t count, - ompi_request_t ** requests, - int *index, - ompi_status_public_t * status) +int ompi_request_default_wait_any(size_t count, + ompi_request_t ** requests, + int *index, + ompi_status_public_t * status) { -#if OPAL_ENABLE_PROGRESS_THREADS - int c; -#endif - size_t i=0, num_requests_null_inactive=0; + size_t i, completed = count, num_requests_null_inactive = 0; int rc = OMPI_SUCCESS; - int completed = -1; - ompi_request_t **rptr=NULL; ompi_request_t *request=NULL; + ompi_wait_sync_t sync; -#if OPAL_ENABLE_PROGRESS_THREADS - /* poll for completion */ - OPAL_THREAD_ADD32(&ompi_progress_thread_count,1); - for (c = 0; completed < 0 && c < opal_progress_spin_count; c++) { - rptr = requests; - num_requests_null_inactive = 0; - for (i = 0; i < count; i++, rptr++) { - request = *rptr; - /* - * Check for null or completed persistent request. - * For MPI_REQUEST_NULL, the req_state is always OMPI_REQUEST_INACTIVE - */ - if( request->req_state == OMPI_REQUEST_INACTIVE ) { - num_requests_null_inactive++; - continue; - } - if (true == request->req_complete) { - completed = i; - OPAL_THREAD_ADD32(&ompi_progress_thread_count,-1); - goto finished; - } - } - if( num_requests_null_inactive == count ) { - OPAL_THREAD_ADD32(&ompi_progress_thread_count,-1); - goto finished; - } - opal_progress(); - } - OPAL_THREAD_ADD32(&ompi_progress_thread_count,-1); -#endif - - /* give up and sleep until completion */ - OPAL_THREAD_LOCK(&ompi_request_lock); - ompi_request_waiting++; - do { - rptr = requests; - num_requests_null_inactive = 0; - for (i = 0; i < count; i++, rptr++) { - request = *rptr; + WAIT_SYNC_INIT(&sync, 1); - /* Sanity test */ - if( NULL == request) { - continue; - } + num_requests_null_inactive = 0; + for (i = 0; i < count; i++) { + request = requests[i]; - /* - * Check for null or completed persistent request. - * For MPI_REQUEST_NULL, the req_state is always OMPI_REQUEST_INACTIVE. - */ - if( request->req_state == OMPI_REQUEST_INACTIVE ) { - num_requests_null_inactive++; - continue; - } - if (request->req_complete == true) { - completed = i; - break; - } - } - if(num_requests_null_inactive == count) - break; - if (completed < 0) { - opal_condition_wait(&ompi_request_cond, &ompi_request_lock); + /* Check for null or completed persistent request. For + * MPI_REQUEST_NULL, the req_state is always OMPI_REQUEST_INACTIVE. + */ + if( request->req_state == OMPI_REQUEST_INACTIVE ) { + num_requests_null_inactive++; + continue; } - } while (completed < 0); - ompi_request_waiting--; - OPAL_THREAD_UNLOCK(&ompi_request_lock); -#if OPAL_ENABLE_PROGRESS_THREADS -finished: -#endif /* OPAL_ENABLE_PROGRESS_THREADS */ + if( !OPAL_ATOMIC_CMPSET_PTR(&request->req_complete, REQUEST_PENDING, &sync) ) { + assert(REQUEST_COMPLETE(request)); + completed = i; + *index = i; + goto after_sync_wait; + } + } if(num_requests_null_inactive == count) { *index = MPI_UNDEFINED; if (MPI_STATUS_IGNORE != status) { *status = ompi_status_empty; } - } else { - assert( true == request->req_complete ); - /* Per note above, we have to call gen request query_fn even - if STATUS_IGNORE was provided */ - if (OMPI_REQUEST_GEN == request->req_type) { - rc = ompi_grequest_invoke_query(request, &request->req_status); - } - if (MPI_STATUS_IGNORE != status) { - /* Do *NOT* set status->MPI_ERROR here! See MPI-1.1 doc, - sec 3.2.5, p.22 */ - int old_error = status->MPI_ERROR; - *status = request->req_status; - status->MPI_ERROR = old_error; + /* No signal-in-flight can be in this case */ + WAIT_SYNC_RELEASE_NOWAIT(&sync); + return rc; + } + + SYNC_WAIT(&sync); + + after_sync_wait: + /* recheck the complete status and clean up the sync primitives. + * Do it backward to return the earliest complete request to the + * user. + */ + for(i = completed-1; (i+1) > 0; i--) { + request = requests[i]; + + if( request->req_state == OMPI_REQUEST_INACTIVE ) { + continue; } - rc = request->req_status.MPI_ERROR; - if( request->req_persistent ) { - request->req_state = OMPI_REQUEST_INACTIVE; - } else if (MPI_SUCCESS == rc) { - /* Only free the request if there is no error on it */ - /* If there's an error while freeing the request, - assume that the request is still there. Otherwise, - Bad Things will happen later! */ - rc = ompi_request_free(rptr); + /* Atomically mark the request as pending. If this succeed then + * the request was not completed, and it is now marked as pending. + * Otherwise, the request has been completed meanwhile, and it + * has been atomically marked as REQUEST_COMPLETE. + */ + if( !OPAL_ATOMIC_CMPSET_PTR(&request->req_complete, &sync, REQUEST_PENDING) ) { + *index = i; } - *index = completed; } -#if OPAL_ENABLE_FT_CR == 1 - if( opal_cr_is_enabled) { - rptr = requests; - for (i = 0; i < count; i++, rptr++) { - request = *rptr; - if( true == request->req_complete) { - OMPI_CRCP_REQUEST_COMPLETE(request); - } - } + if( *index == (int)completed ) { + /* Only one request has triggered. There was no in-flight + * completions. Drop the signalled flag so we won't block + * in WAIT_SYNC_RELEASE + */ + WAIT_SYNC_SIGNALLED(&sync); } -#endif + request = requests[*index]; + assert( REQUEST_COMPLETE(request) ); + /* Per note above, we have to call gen request query_fn even + if STATUS_IGNORE was provided */ + if (OMPI_REQUEST_GEN == request->req_type) { + rc = ompi_grequest_invoke_query(request, &request->req_status); + } + if (MPI_STATUS_IGNORE != status) { + /* Do *NOT* set status->MPI_ERROR here! See MPI-1.1 doc, + sec 3.2.5, p.22 */ + int old_error = status->MPI_ERROR; + *status = request->req_status; + status->MPI_ERROR = old_error; + } + rc = request->req_status.MPI_ERROR; + if( request->req_persistent ) { + request->req_state = OMPI_REQUEST_INACTIVE; + } else if (MPI_SUCCESS == rc) { + /* Only free the request if there is no error on it */ + /* If there's an error while freeing the request, + assume that the request is still there. Otherwise, + Bad Things will happen later! */ + rc = ompi_request_free(&requests[*index]); + } + + WAIT_SYNC_RELEASE(&sync); return rc; } @@ -221,111 +186,44 @@ int ompi_request_default_wait_all( size_t count, ompi_request_t ** requests, ompi_status_public_t * statuses ) { - size_t completed = 0, i, failed = 0; + size_t i, completed = 0, failed = 0; ompi_request_t **rptr; ompi_request_t *request; int mpi_error = OMPI_SUCCESS; + ompi_wait_sync_t sync; + WAIT_SYNC_INIT(&sync, count); rptr = requests; for (i = 0; i < count; i++) { request = *rptr++; - if (request->req_complete == true) { + if( request->req_state == OMPI_REQUEST_INACTIVE ) { + completed++; + continue; + } + + if (!OPAL_ATOMIC_CMPSET_PTR(&request->req_complete, REQUEST_PENDING, &sync)) { if( OPAL_UNLIKELY( MPI_SUCCESS != request->req_status.MPI_ERROR ) ) { failed++; } completed++; } } - if( failed > 0 ) { goto finish; } - /* if all requests have not completed -- defer acquiring lock - * unless required - */ - if (completed != count) { - /* - * acquire lock and test for completion - if all requests are - * not completed pend on condition variable until a request - * completes - */ - OPAL_THREAD_LOCK(&ompi_request_lock); - ompi_request_waiting++; -#if OPAL_ENABLE_MULTI_THREADS - /* - * confirm the status of the pending requests. We have to do it before - * taking the condition or otherwise we can miss some requests completion (the - * one that happpens between our initial test and the aquisition of the lock). - */ - rptr = requests; - for( completed = i = 0; i < count; i++ ) { - request = *rptr++; - if (request->req_complete == true) { - if( MPI_SUCCESS != request->req_status.MPI_ERROR ) { - failed++; - } - completed++; - } - } - if( failed > 0 ) { - ompi_request_waiting--; - OPAL_THREAD_UNLOCK(&ompi_request_lock); - goto finish; - } -#endif /* OPAL_ENABLE_MULTI_THREADS */ - while( completed != count ) { - /* check number of pending requests */ - size_t start = ompi_request_completed; - size_t pending = count - completed; - size_t start_failed = ompi_request_failed; - /* - * wait until at least pending requests complete - */ - while (pending > ompi_request_completed - start) { - opal_condition_wait(&ompi_request_cond, &ompi_request_lock); - /* - * Check for failed requests. If one request fails, then - * this operation completes in error marking the remaining - * requests as PENDING. - */ - if( OPAL_UNLIKELY( 0 < (ompi_request_failed - start_failed) ) ) { - failed += (ompi_request_failed - start_failed); - ompi_request_waiting--; - OPAL_THREAD_UNLOCK(&ompi_request_lock); - goto finish; - } - } - /* - * confirm that all pending operations have completed. - */ - rptr = requests; - for( failed = completed = i = 0; i < count; i++ ) { - request = *rptr++; - if (request->req_complete == true) { - if( MPI_SUCCESS != request->req_status.MPI_ERROR ) { - failed++; - } - completed++; - } - } - } - ompi_request_waiting--; - OPAL_THREAD_UNLOCK(&ompi_request_lock); + if( 0 != completed ) { + wait_sync_update(&sync, completed, OPAL_SUCCESS); } -#if OPAL_ENABLE_FT_CR == 1 - if( opal_cr_is_enabled) { - rptr = requests; - for (i = 0; i < count; i++, rptr++) { - request = *rptr; - if( true == request->req_complete) { - OMPI_CRCP_REQUEST_COMPLETE(request); - } - } + /* wait until all requests complete or until an error is triggered. */ + mpi_error = SYNC_WAIT(&sync); + if( OPAL_SUCCESS != mpi_error ) { + /* if we are in an error case, increase the failed to ensure + proper cleanup during the requests completion. */ + failed++; } -#endif finish: rptr = requests; @@ -334,50 +232,50 @@ int ompi_request_default_wait_all( size_t count, for( i = 0; i < count; i++, rptr++ ) { request = *rptr; - /* - * Assert only if no requests were failed. - * Since some may still be pending. - */ - if( 0 >= failed ) { - assert( true == request->req_complete ); - } - if( request->req_state == OMPI_REQUEST_INACTIVE ) { statuses[i] = ompi_status_empty; continue; } - if (OMPI_REQUEST_GEN == request->req_type) { - ompi_grequest_invoke_query(request, &request->req_status); - } - statuses[i] = request->req_status; - /* - * Per MPI 2.2 p 60: - * Allows requests to be marked as MPI_ERR_PENDING if they are - * "neither failed nor completed." Which can only happen if - * there was an error in one of the other requests. - */ if( OPAL_UNLIKELY(0 < failed) ) { - if( !request->req_complete ) { + /* if we have failed requests we skipped the waiting on the sync. Thus, + * some of the requests might not be properly completed, in which case + * we must detach all requests from the sync. However, if we can succesfully + * mark the request as pending then it is neither failed nor complete, and + * we must stop altering it. + */ + if( OPAL_ATOMIC_CMPSET_PTR(&request->req_complete, &sync, REQUEST_PENDING ) ) { + /* + * Per MPI 2.2 p 60: + * Allows requests to be marked as MPI_ERR_PENDING if they are + * "neither failed nor completed." Which can only happen if + * there was an error in one of the other requests. + */ statuses[i].MPI_ERROR = MPI_ERR_PENDING; mpi_error = MPI_ERR_IN_STATUS; continue; } } + assert( REQUEST_COMPLETE(request) ); + + if (OMPI_REQUEST_GEN == request->req_type) { + ompi_grequest_invoke_query(request, &request->req_status); + } + + statuses[i] = request->req_status; if( request->req_persistent ) { request->req_state = OMPI_REQUEST_INACTIVE; continue; - } else { - /* Only free the request if there is no error on it */ - if (MPI_SUCCESS == request->req_status.MPI_ERROR) { - /* If there's an error while freeing the request, - assume that the request is still there. - Otherwise, Bad Things will happen later! */ - int tmp = ompi_request_free(rptr); - if (OMPI_SUCCESS == mpi_error && OMPI_SUCCESS != tmp) { - mpi_error = tmp; - } + } + /* Only free the request if there is no error on it */ + if (MPI_SUCCESS == request->req_status.MPI_ERROR) { + /* If there's an error while freeing the request, + assume that the request is still there. + Otherwise, Bad Things will happen later! */ + int tmp = ompi_request_free(rptr); + if (OMPI_SUCCESS == mpi_error && OMPI_SUCCESS != tmp) { + mpi_error = tmp; } } if( statuses[i].MPI_ERROR != OMPI_SUCCESS) { @@ -385,36 +283,44 @@ int ompi_request_default_wait_all( size_t count, } } } else { + int rc; /* free request if required */ for( i = 0; i < count; i++, rptr++ ) { - int rc; request = *rptr; + if( request->req_state == OMPI_REQUEST_INACTIVE ) { + rc = ompi_status_empty.MPI_ERROR; + goto absorb_error_and_continue; + } /* * Assert only if no requests were failed. * Since some may still be pending. */ - if( 0 >= failed ) { - assert( true == request->req_complete ); - } else { + if( OPAL_UNLIKELY(0 < failed) ) { /* If the request is still pending due to a failed request * then skip it in this loop. */ - if( !request->req_complete ) { - continue; - } - } + if( OPAL_ATOMIC_CMPSET_PTR(&request->req_complete, &sync, REQUEST_PENDING ) ) { + /* + * Per MPI 2.2 p 60: + * Allows requests to be marked as MPI_ERR_PENDING if they are + * "neither failed nor completed." Which can only happen if + * there was an error in one of the other requests. + */ + rc = MPI_ERR_PENDING; + goto absorb_error_and_continue; + } + } + assert( REQUEST_COMPLETE(request) ); /* Per note above, we have to call gen request query_fn even if STATUSES_IGNORE was provided */ if (OMPI_REQUEST_GEN == request->req_type) { rc = ompi_grequest_invoke_query(request, &request->req_status); } - if( request->req_state == OMPI_REQUEST_INACTIVE ) { - rc = ompi_status_empty.MPI_ERROR; - } else { - rc = request->req_status.MPI_ERROR; - } + + rc = request->req_status.MPI_ERROR; + if( request->req_persistent ) { request->req_state = OMPI_REQUEST_INACTIVE; } else if (MPI_SUCCESS == rc) { @@ -424,6 +330,7 @@ int ompi_request_default_wait_all( size_t count, mpi_error = tmp; } } + absorb_error_and_continue: /* * Per MPI 2.2 p34: * "It is possible for an MPI function to return MPI_ERR_IN_STATUS @@ -436,154 +343,126 @@ int ompi_request_default_wait_all( size_t count, } } } + WAIT_SYNC_RELEASE(&sync); return mpi_error; } -int ompi_request_default_wait_some( - size_t count, - ompi_request_t ** requests, - int * outcount, - int * indices, - ompi_status_public_t * statuses) +int ompi_request_default_wait_some(size_t count, + ompi_request_t ** requests, + int * outcount, + int * indices, + ompi_status_public_t * statuses) { -#if OPAL_ENABLE_PROGRESS_THREADS - int c; -#endif - size_t i, num_requests_null_inactive=0, num_requests_done=0; + size_t num_requests_null_inactive=0, num_requests_done=0; int rc = MPI_SUCCESS; - ompi_request_t **rptr=NULL; - ompi_request_t *request=NULL; + ompi_request_t **rptr = NULL; + ompi_request_t *request = NULL; + ompi_wait_sync_t sync; + size_t sync_sets = 0, sync_unsets = 0; + + WAIT_SYNC_INIT(&sync, 1); *outcount = 0; - for (i = 0; i < count; i++){ - indices[i] = 0; - } -#if OPAL_ENABLE_PROGRESS_THREADS - /* poll for completion */ - OPAL_THREAD_ADD32(&ompi_progress_thread_count,1); - for (c = 0; c < opal_progress_spin_count; c++) { - rptr = requests; - num_requests_null_inactive = 0; - num_requests_done = 0; - for (i = 0; i < count; i++, rptr++) { - request = *rptr; - /* - * Check for null or completed persistent request. - * For MPI_REQUEST_NULL, the req_state is always OMPI_REQUEST_INACTIVE - */ - if (request->req_state == OMPI_REQUEST_INACTIVE ) { - num_requests_null_inactive++; - continue; - } - if (true == request->req_complete) { - indices[i] = 1; - num_requests_done++; - } - } - if (num_requests_null_inactive == count || - num_requests_done > 0) { - OPAL_THREAD_ADD32(&ompi_progress_thread_count,-1); - goto finished; + rptr = requests; + num_requests_null_inactive = 0; + num_requests_done = 0; + for (size_t i = 0; i < count; i++, rptr++) { + request = *rptr; + /* + * Check for null or completed persistent request. + * For MPI_REQUEST_NULL, the req_state is always OMPI_REQUEST_INACTIVE. + */ + if( request->req_state == OMPI_REQUEST_INACTIVE ) { + num_requests_null_inactive++; + continue; } - opal_progress(); - } - OPAL_THREAD_ADD32(&ompi_progress_thread_count,-1); -#endif - /* - * We only get here when outcount still is 0. - * give up and sleep until completion - */ - OPAL_THREAD_LOCK(&ompi_request_lock); - ompi_request_waiting++; - do { - rptr = requests; - num_requests_null_inactive = 0; - num_requests_done = 0; - for (i = 0; i < count; i++, rptr++) { - request = *rptr; - /* - * Check for null or completed persistent request. - * For MPI_REQUEST_NULL, the req_state is always OMPI_REQUEST_INACTIVE. - */ - if( request->req_state == OMPI_REQUEST_INACTIVE ) { - num_requests_null_inactive++; - continue; - } - if (request->req_complete == true) { - indices[i] = 1; - num_requests_done++; - } - } - if (num_requests_null_inactive == count || - num_requests_done > 0) - break; - opal_condition_wait(&ompi_request_cond, &ompi_request_lock); - } while (1); - ompi_request_waiting--; - OPAL_THREAD_UNLOCK(&ompi_request_lock); - -#if OPAL_ENABLE_PROGRESS_THREADS -finished: -#endif /* OPAL_ENABLE_PROGRESS_THREADS */ - -#if OPAL_ENABLE_FT_CR == 1 - if( opal_cr_is_enabled) { - rptr = requests; - for (i = 0; i < count; i++, rptr++) { - request = *rptr; - if( true == request->req_complete) { - OMPI_CRCP_REQUEST_COMPLETE(request); - } + if( !OPAL_ATOMIC_CMPSET_PTR(&request->req_complete, REQUEST_PENDING, &sync) ) { + /* If the request is completed go ahead and mark it as such */ + assert( REQUEST_COMPLETE(request) ); + num_requests_done++; } } -#endif + sync_sets = count - num_requests_null_inactive - num_requests_done; if(num_requests_null_inactive == count) { *outcount = MPI_UNDEFINED; - } else { - /* - * Compress the index array. + /* nobody will signall us */ + WAIT_SYNC_RELEASE_NOWAIT(&sync); + return rc; + } + + if( 0 == num_requests_done ) { + /* One completed request is enough to satisfy the some condition */ + SYNC_WAIT(&sync); + } + + /* Do the final counting and */ + /* Clean up the synchronization primitives */ + + rptr = requests; + num_requests_done = 0; + for (size_t i = 0; i < count; i++, rptr++) { + request = *rptr; + + if( request->req_state == OMPI_REQUEST_INACTIVE ) { + continue; + } + /* Atomically mark the request as pending. If this succeed + * then the request was not completed, and it is now marked as + * pending. Otherwise, the request is complete )either it was + * before or it has been meanwhile). The major drawback here + * is that we will do all the atomics operations in all cases. */ - for (i = 0, num_requests_done = 0; i < count; i++) { - if (0 != indices[i]) { - indices[num_requests_done++] = i; - } + if( !OPAL_ATOMIC_CMPSET_PTR(&request->req_complete, &sync, REQUEST_PENDING) ) { + indices[num_requests_done] = i; + num_requests_done++; } + } + sync_unsets = count - num_requests_null_inactive - num_requests_done; - *outcount = num_requests_done; + if( sync_sets == sync_unsets ){ + /* nobody knows about us, + * set signa-in-progress flag to false + */ + WAIT_SYNC_SIGNALLED(&sync); + } - for (i = 0; i < num_requests_done; i++) { - request = requests[indices[i]]; - assert( true == request->req_complete ); - /* Per note above, we have to call gen request query_fn even - if STATUS_IGNORE was provided */ - if (OMPI_REQUEST_GEN == request->req_type) { - ompi_grequest_invoke_query(request, &request->req_status); - } - if (MPI_STATUSES_IGNORE != statuses) { - statuses[i] = request->req_status; - } + WAIT_SYNC_RELEASE(&sync); - if (MPI_SUCCESS != request->req_status.MPI_ERROR) { - rc = MPI_ERR_IN_STATUS; - } + *outcount = num_requests_done; - if( request->req_persistent ) { - request->req_state = OMPI_REQUEST_INACTIVE; - } else { - /* Only free the request if there was no error */ - if (MPI_SUCCESS == request->req_status.MPI_ERROR) { - int tmp; - tmp = ompi_request_free(&(requests[indices[i]])); - if (OMPI_SUCCESS != tmp) { - return tmp; - } + for (size_t i = 0; i < num_requests_done; i++) { + request = requests[indices[i]]; + assert( REQUEST_COMPLETE(request) ); + /* Per note above, we have to call gen request query_fn even + if STATUS_IGNORE was provided */ + if (OMPI_REQUEST_GEN == request->req_type) { + ompi_grequest_invoke_query(request, &request->req_status); + } + if (MPI_STATUSES_IGNORE != statuses) { + statuses[i] = request->req_status; + } + + if (MPI_SUCCESS != request->req_status.MPI_ERROR) { + rc = MPI_ERR_IN_STATUS; + } + + if( request->req_persistent ) { + request->req_state = OMPI_REQUEST_INACTIVE; + } else { + /* Only free the request if there was no error */ + if (MPI_SUCCESS == request->req_status.MPI_ERROR) { + int tmp; + tmp = ompi_request_free(&(requests[indices[i]])); + if (OMPI_SUCCESS != tmp) { + return tmp; } } } } + return rc; } diff --git a/ompi/request/request.c b/ompi/request/request.c index 86155d7290b..8a73624ba36 100644 --- a/ompi/request/request.c +++ b/ompi/request/request.c @@ -3,10 +3,10 @@ * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. - * Copyright (c) 2004-2013 The University of Tennessee and The University + * Copyright (c) 2004-2016 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2007 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2007 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. @@ -18,9 +18,9 @@ * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -33,11 +33,6 @@ #include "ompi/constants.h" opal_pointer_array_t ompi_request_f_to_c_table = {{0}}; -size_t ompi_request_waiting = 0; -size_t ompi_request_completed = 0; -size_t ompi_request_failed = 0; -opal_mutex_t ompi_request_lock = {{0}}; -opal_condition_t ompi_request_cond = {{0}}; ompi_predefined_request_t ompi_request_null = {{{{{0}}}}}; ompi_predefined_request_t *ompi_request_null_addr = &ompi_request_null; ompi_request_t ompi_request_empty = {{{{0}}}}; @@ -109,8 +104,6 @@ OBJ_CLASS_INSTANCE( int ompi_request_init(void) { - OBJ_CONSTRUCT(&ompi_request_lock, opal_mutex_t); - OBJ_CONSTRUCT(&ompi_request_cond, opal_condition_t); OBJ_CONSTRUCT(&ompi_request_null, ompi_request_t); OBJ_CONSTRUCT(&ompi_request_f_to_c_table, opal_pointer_array_t); @@ -125,7 +118,7 @@ int ompi_request_init(void) ompi_request_null.request.req_status._ucount = 0; ompi_request_null.request.req_status._cancelled = 0; - ompi_request_null.request.req_complete = true; + ompi_request_null.request.req_complete = REQUEST_COMPLETED; ompi_request_null.request.req_state = OMPI_REQUEST_INACTIVE; ompi_request_null.request.req_persistent = false; ompi_request_null.request.req_f_to_c_index = @@ -157,7 +150,7 @@ int ompi_request_init(void) ompi_request_empty.req_status._ucount = 0; ompi_request_empty.req_status._cancelled = 0; - ompi_request_empty.req_complete = true; + ompi_request_empty.req_complete = REQUEST_COMPLETED; ompi_request_empty.req_state = OMPI_REQUEST_ACTIVE; ompi_request_empty.req_persistent = false; ompi_request_empty.req_f_to_c_index = @@ -186,8 +179,6 @@ int ompi_request_finalize(void) OBJ_DESTRUCT( &ompi_request_null.request ); OMPI_REQUEST_FINI( &ompi_request_empty ); OBJ_DESTRUCT( &ompi_request_empty ); - OBJ_DESTRUCT( &ompi_request_cond ); - OBJ_DESTRUCT( &ompi_request_lock ); OBJ_DESTRUCT( &ompi_request_f_to_c_table ); return OMPI_SUCCESS; } diff --git a/ompi/request/request.h b/ompi/request/request.h index df58a7e1b94..9587486ec8c 100644 --- a/ompi/request/request.h +++ b/ompi/request/request.h @@ -3,22 +3,22 @@ * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. - * Copyright (c) 2004-2007 The University of Tennessee and The University + * Copyright (c) 2004-2016 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2006-2012 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2009-2012 Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2012 Oak Ridge National Labs. All rights reserved. - * Copyright (c) 2015 Los Alamos National Security, LLC. All rights + * Copyright (c) 2015-2016 Los Alamos National Security, LLC. All rights * reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ /** @@ -35,6 +35,7 @@ #include "opal/class/opal_free_list.h" #include "opal/class/opal_pointer_array.h" #include "opal/threads/condition.h" +#include "opal/threads/wait_sync.h" #include "ompi/constants.h" BEGIN_C_DECLS @@ -46,7 +47,7 @@ OMPI_DECLSPEC OBJ_CLASS_DECLARATION(ompi_request_t); /* * The following include pulls in shared typedefs with debugger plugins. - * For more information on why we do this see the Notice to developers + * For more information on why we do this see the Notice to developers * comment at the top of the ompi_msgq_dll.c file. */ @@ -62,12 +63,14 @@ typedef int (*ompi_request_free_fn_t)(struct ompi_request_t** rptr); /* * Optional function to cancel a pending request. */ -typedef int (*ompi_request_cancel_fn_t)(struct ompi_request_t* request, int flag); +typedef int (*ompi_request_cancel_fn_t)(struct ompi_request_t* request, int flag); /* * Optional function called when the request is completed from the MPI - * library perspective. This function is not allowed to release any - * ressources related to the request. + * library perspective. This function is allowed to release the request if + * the request will not be used with ompi_request_wait* or ompi_request_test. + * If the function reposts (using start) a request or calls ompi_request_free() + * on the request it *MUST* return 1. It should return 0 otherwise. */ typedef int (*ompi_request_complete_fn_t)(struct ompi_request_t* request); @@ -96,13 +99,13 @@ typedef union ompi_mpi_object_t { } ompi_mpi_object_t; /** - * Main top-level request struct definition + * Main top-level request struct definition */ struct ompi_request_t { opal_free_list_item_t super; /**< Base type */ ompi_request_type_t req_type; /**< Enum indicating the type of the request */ ompi_status_public_t req_status; /**< Completion status */ - volatile bool req_complete; /**< Flag indicating wether request has completed */ + volatile void *req_complete; /**< Flag indicating wether request has completed */ volatile ompi_request_state_t req_state; /**< enum indicate state of the request */ bool req_persistent; /**< flag indicating if the this is a persistent request */ int req_f_to_c_index; /**< Index in Fortran <-> C translation array */ @@ -118,6 +121,7 @@ struct ompi_request_t { */ typedef struct ompi_request_t ompi_request_t; + /** * Padded struct to maintain back compatibiltiy. * See ompi/communicator/communicator.h comments with struct ompi_communicator_t @@ -138,13 +142,18 @@ typedef struct ompi_predefined_request_t ompi_predefined_request_t; * performance path (since requests may be re-used, it is possible * that we will have to initialize a request multiple times). */ -#define OMPI_REQUEST_INIT(request, persistent) \ - do { \ - (request)->req_complete = false; \ - (request)->req_state = OMPI_REQUEST_INACTIVE; \ - (request)->req_persistent = (persistent); \ - } while (0); +#define OMPI_REQUEST_INIT(request, persistent) \ + do { \ + (request)->req_complete = \ + (persistent) ? REQUEST_COMPLETED : REQUEST_PENDING; \ + (request)->req_state = OMPI_REQUEST_INACTIVE; \ + (request)->req_persistent = (persistent); \ + (request)->req_complete_cb = NULL; \ + (request)->req_complete_cb_data = NULL; \ + } while (0); + +#define REQUEST_COMPLETE(req) (REQUEST_COMPLETED == (req)->req_complete) /** * Finalize a request. This is a macro to avoid function call * overhead, since this is typically invoked in the critical @@ -153,7 +162,7 @@ typedef struct ompi_predefined_request_t ompi_predefined_request_t; * * When finalizing a request, if MPI_Request_f2c() was previously * invoked on that request, then this request was added to the f2c - * table, and we need to remove it + * table, and we need to remove it * * This function should be called only from the MPI layer. It should * never be called from the PML. It take care of the upper level clean-up. @@ -168,7 +177,7 @@ do { \ (request)->req_f_to_c_index, NULL); \ (request)->req_f_to_c_index = MPI_UNDEFINED; \ } \ -} while (0); +} while (0); /** * Non-blocking test for request completion. @@ -306,18 +315,12 @@ typedef struct ompi_request_fns_t { /** * Globals used for tracking requests and request completion. */ -OMPI_DECLSPEC extern opal_pointer_array_t ompi_request_f_to_c_table; -OMPI_DECLSPEC extern size_t ompi_request_waiting; -OMPI_DECLSPEC extern size_t ompi_request_completed; -OMPI_DECLSPEC extern size_t ompi_request_failed; -OMPI_DECLSPEC extern int32_t ompi_request_poll; -OMPI_DECLSPEC extern opal_mutex_t ompi_request_lock; -OMPI_DECLSPEC extern opal_condition_t ompi_request_cond; +OMPI_DECLSPEC extern opal_pointer_array_t ompi_request_f_to_c_table; OMPI_DECLSPEC extern ompi_predefined_request_t ompi_request_null; OMPI_DECLSPEC extern ompi_predefined_request_t *ompi_request_null_addr; -OMPI_DECLSPEC extern ompi_request_t ompi_request_empty; -OMPI_DECLSPEC extern ompi_status_public_t ompi_status_empty; -OMPI_DECLSPEC extern ompi_request_fns_t ompi_request_functions; +OMPI_DECLSPEC extern ompi_request_t ompi_request_empty; +OMPI_DECLSPEC extern ompi_status_public_t ompi_status_empty; +OMPI_DECLSPEC extern ompi_request_fns_t ompi_request_functions; /** * Initialize the MPI_Request subsystem; invoked during MPI_INIT. @@ -365,55 +368,67 @@ static inline int ompi_request_free(ompi_request_t** request) #define ompi_request_wait_all (ompi_request_functions.req_wait_all) #define ompi_request_wait_some (ompi_request_functions.req_wait_some) - /** * Wait a particular request for completion */ + static inline void ompi_request_wait_completion(ompi_request_t *req) { - if(false == req->req_complete) { -#if OPAL_ENABLE_PROGRESS_THREADS - if(opal_progress_spin(&req->req_complete)) { - return; + if (opal_using_threads () && !REQUEST_COMPLETE(req)) { + ompi_wait_sync_t sync; + WAIT_SYNC_INIT(&sync, 1); + + if (OPAL_ATOMIC_CMPSET_PTR(&req->req_complete, REQUEST_PENDING, &sync)) { + SYNC_WAIT(&sync); + } else { + /* completed before we had a chance to swap in the sync object */ + WAIT_SYNC_SIGNALLED(&sync); } -#endif - OPAL_THREAD_LOCK(&ompi_request_lock); - ompi_request_waiting++; - while(false == req->req_complete) { - opal_condition_wait(&ompi_request_cond, &ompi_request_lock); + + assert(REQUEST_COMPLETE(req)); + WAIT_SYNC_RELEASE(&sync); + } else { + while(!REQUEST_COMPLETE(req)) { + opal_progress(); } - ompi_request_waiting--; - OPAL_THREAD_UNLOCK(&ompi_request_lock); } } /** * Signal or mark a request as complete. If with_signal is true this will - * wake any thread pending on the request and ompi_request_lock should be - * held while calling this function. If with_signal is false, there will - * signal generated, and no lock required. This is a special case when - * the function is called from the critical path for small messages, where - * we know the current execution flow created the request, and is still - * in the _START macro. + * wake any thread pending on the request. If with_signal is false, the + * opposite will be true, the request will simply be marked as completed + * and no effort will be made to correctly (atomically) handle the associated + * synchronization primitive. This is a special case when the function + * is called from the critical path for small messages, where we know + * the current execution flow created the request, and no synchronized wait + * has been set. + * BEWARE: The error code should be set on the request prior to calling + * this function, or the synchronization primitive might not be correctly + * triggered. */ static inline int ompi_request_complete(ompi_request_t* request, bool with_signal) { - ompi_request_complete_fn_t tmp = request->req_complete_cb; - if( NULL != tmp ) { + int rc = 0; + + if( NULL != request->req_complete_cb) { + rc = request->req_complete_cb( request ); request->req_complete_cb = NULL; - tmp( request ); } - ompi_request_completed++; - request->req_complete = true; - if( OPAL_UNLIKELY(MPI_SUCCESS != request->req_status.MPI_ERROR) ) { - ompi_request_failed++; - } - if(with_signal && ompi_request_waiting) { - /* Broadcast the condition, otherwise if there is already a thread - * waiting on another request it can use all signals. - */ - opal_condition_broadcast(&ompi_request_cond); + + if (0 == rc) { + if( OPAL_LIKELY(with_signal) ) { + if(!OPAL_ATOMIC_CMPSET_PTR(&request->req_complete, REQUEST_PENDING, REQUEST_COMPLETED)) { + ompi_wait_sync_t *tmp_sync = (ompi_wait_sync_t *) OPAL_ATOMIC_SWAP_PTR(&request->req_complete, + REQUEST_COMPLETED); + /* In the case where another thread concurrently changed the request to REQUEST_PENDING */ + if( REQUEST_PENDING != tmp_sync ) + wait_sync_update(tmp_sync, 1, request->req_status.MPI_ERROR); + } + } else + request->req_complete = REQUEST_COMPLETED; } + return OMPI_SUCCESS; } diff --git a/ompi/request/request_dbg.h b/ompi/request/request_dbg.h index c25ae29f835..7251b96dc5b 100644 --- a/ompi/request/request_dbg.h +++ b/ompi/request/request_dbg.h @@ -2,9 +2,9 @@ /* * Copyright (c) 2009 Sun Microsystems, Inc. All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ #ifndef OMPI_REQUEST_DBG_H @@ -12,7 +12,7 @@ /* * This file contains definitions used by both OMPI and debugger plugins. - * For more information on why we do this see the Notice to developers + * For more information on why we do this see the Notice to developers * comment at the top of the ompi_msgq_dll.c file. */ diff --git a/ompi/request/request_default.h b/ompi/request/request_default.h index e7f82a4b6c8..01319098705 100644 --- a/ompi/request/request_default.h +++ b/ompi/request/request_default.h @@ -3,9 +3,9 @@ * of Tennessee Research Foundation. All rights * reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ diff --git a/ompi/runtime/Makefile.am b/ompi/runtime/Makefile.am index 128601e1ed8..396dc70a239 100644 --- a/ompi/runtime/Makefile.am +++ b/ompi/runtime/Makefile.am @@ -5,16 +5,19 @@ # Copyright (c) 2004-2005 The University of Tennessee and The University # of Tennessee Research Foundation. All rights # reserved. -# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, # University of Stuttgart. All rights reserved. # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. # Copyright (c) 2006-2009 Cisco Systems, Inc. All rights reserved. # Copyright (c) 2014 Intel, Inc. All rights reserved. +# Copyright (c) 2015 Los Alamos National Security, LLC. +# All rights reserved. +# Copyright (c) 2016 IBM Corporation. All rights reserved. # $COPYRIGHT$ -# +# # Additional copyrights may follow -# +# # $HEADER$ # @@ -24,15 +27,13 @@ dist_ompidata_DATA += runtime/help-mpi-runtime.txt headers += \ runtime/mpiruntime.h \ - runtime/ompi_cr.h \ runtime/params.h \ runtime/ompi_info_support.h -libmpi_la_SOURCES += \ +lib@OMPI_LIBMPI_NAME@_la_SOURCES += \ runtime/ompi_mpi_abort.c \ runtime/ompi_mpi_init.c \ runtime/ompi_mpi_finalize.c \ runtime/ompi_mpi_params.c \ runtime/ompi_mpi_preconnect.c \ - runtime/ompi_cr.c \ runtime/ompi_info_support.c diff --git a/ompi/runtime/help-mpi-runtime.txt b/ompi/runtime/help-mpi-runtime.txt index 927087f5c3d..f2028417b98 100644 --- a/ompi/runtime/help-mpi-runtime.txt +++ b/ompi/runtime/help-mpi-runtime.txt @@ -6,16 +6,16 @@ # Copyright (c) 2004-2005 The University of Tennessee and The University # of Tennessee Research Foundation. All rights # reserved. -# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, # University of Stuttgart. All rights reserved. # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. -# Copyright (c) 2007-2012 Cisco Systems, Inc. All rights reserved. +# Copyright (c) 2007-2015 Cisco Systems, Inc. All rights reserved. # Copyright (c) 2013 NVIDIA Corporation. All rights reserved. # $COPYRIGHT$ -# +# # Additional copyrights may follow -# +# # $HEADER$ # # This is the US/English general help file for Open MPI. @@ -50,21 +50,31 @@ You may wish to try to narrow down the problem; WARNING: The MCA parameter mpi_param_check has been set to true, but parameter checking has been compiled out of Open MPI. The mpi_param_check value has therefore been ignored. +# +[mpi_init: invoked multiple times] +Open MPI has detected that this process has attempted to initialize +MPI (via MPI_INIT or MPI_INIT_THREAD) more than once. This is +erroneous. +# +[mpi_init: already finalized] +Open MPI has detected that this process has attempted to initialize +MPI (via MPI_INIT or MPI_INIT_THREAD) after MPI_FINALIZE has been +called. This is erroneous. +# +[mpi_finalize: not initialized] +The function MPI_FINALIZE was invoked before MPI was initialized in a +process on host %s, PID %d. + +This indicates an erroneous MPI program; MPI must be initialized +before it can be finalized. +# [mpi_finalize:invoked_multiple_times] The function MPI_FINALIZE was invoked multiple times in a single -process on host %s, PID %d. +process on host %s, PID %d. This indicates an erroneous MPI program; MPI_FINALIZE is only allowed to be invoked exactly once in a process. # -[proc:heterogeneous-support-unavailable] -The build of Open MPI running on host %s was not -compiled with heterogeneous support. A process running on host -%s appears to have a different architecture, -which will not work. Please recompile Open MPI with the -configure option --enable-heterogeneous or use a homogeneous -environment. -# [sparse groups enabled but compiled out] WARNING: The MCA parameter mpi_use_sparse_group_storage has been set to true, but sparse group support was not compiled into Open MPI. The @@ -80,15 +90,6 @@ Node: %s In order to operate in a heterogeneous environment, please reconfigure Open MPI with --enable-heterogeneous. # -[ompi mpi abort:cannot guarantee all killed] -An MPI process is aborting at a time when it cannot guarantee that all -of its peer processes in the job will be killed properly. You should -double check that everything has shut down cleanly. - - Reason: %s - Local host: %s - PID: %d -# [no cuda support] The user requested CUDA support with the --mca mpi_cuda_support 1 flag but the library was not compiled with any support. diff --git a/ompi/runtime/mpiruntime.h b/ompi/runtime/mpiruntime.h index 9dd87420f2a..7efde6fd257 100644 --- a/ompi/runtime/mpiruntime.h +++ b/ompi/runtime/mpiruntime.h @@ -5,20 +5,20 @@ * Copyright (c) 2004-2014 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2006-2012 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2007 Los Alamos National Security, LLC. All rights - * reserved. + * reserved. * Copyright (c) 2008 Sun Microsystems, Inc. All rights reserved. * Copyright (c) 2009 University of Houston. All rights reserved. * Copyright (c) 2014 Intel, Inc. All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -35,6 +35,7 @@ #include "opal/class/opal_list.h" #include "opal/class/opal_hash_table.h" +#include "opal/threads/mutex.h" BEGIN_C_DECLS @@ -47,14 +48,18 @@ struct ompi_predefined_datatype_t; /* Global variables and symbols for the MPI layer */ -/** Did mpi start to initialize? */ -OMPI_DECLSPEC extern bool ompi_mpi_init_started; -/** Is mpi initialized? */ -OMPI_DECLSPEC extern bool ompi_mpi_initialized; -/** Has mpi been finalized? */ -OMPI_DECLSPEC extern bool ompi_mpi_finalized; +/** Mutex to protect all the _init and _finalize variables */ +OMPI_DECLSPEC extern opal_mutex_t ompi_mpi_bootstrap_mutex; +/** Did MPI start to initialize? */ +OMPI_DECLSPEC extern volatile bool ompi_mpi_init_started; /** Has the RTE been initialized? */ -OMPI_DECLSPEC extern bool ompi_rte_initialized; +OMPI_DECLSPEC extern volatile bool ompi_rte_initialized; +/** Is MPI fully initialized? */ +OMPI_DECLSPEC extern volatile bool ompi_mpi_initialized; +/** Did MPI start to finalize? */ +OMPI_DECLSPEC extern volatile bool ompi_mpi_finalize_started; +/** Has MPI been fully finalized? */ +OMPI_DECLSPEC extern volatile bool ompi_mpi_finalized; /** Do we have multiple threads? */ OMPI_DECLSPEC extern bool ompi_mpi_thread_multiple; @@ -128,7 +133,7 @@ OMPI_DECLSPEC extern opal_list_t ompi_registered_datareps; /** In ompi_mpi_init: the lists of Fortran 90 mathing datatypes. * We need these lists and hashtables in order to satisfy the new - * requirements introduced in MPI 2-1 Sect. 10.2.5, + * requirements introduced in MPI 2-1 Sect. 10.2.5, * MPI_TYPE_CREATE_F90_xxxx, page 295, line 47. */ extern opal_hash_table_t ompi_mpi_f90_integer_hashtable; diff --git a/ompi/runtime/ompi_cr.c b/ompi/runtime/ompi_cr.c deleted file mode 100644 index 8278a01b760..00000000000 --- a/ompi/runtime/ompi_cr.c +++ /dev/null @@ -1,508 +0,0 @@ -/* -*- Mode: C; c-basic-offset:4 ; -*- */ -/* - * Copyright (c) 2004-2010 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2011 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * Copyright (c) 2012 The University of Wisconsin-La Crosse. All rights - * reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -/** @file - * - * OMPI Layer Checkpoint/Restart Runtime functions - * - */ - -#include "ompi_config.h" - -#include -#ifdef HAVE_UNISTD_H -#include -#endif /* HAVE_UNISTD_H */ -#ifdef HAVE_FCNTL_H -#include -#endif /* HAVE_FCNTL_H */ -#ifdef HAVE_SYS_TYPES_H -#include -#endif /* HAVE_SYS_TYPES_H */ -#ifdef HAVE_SYS_STAT_H -#include /* for mkfifo */ -#endif /* HAVE_SYS_STAT_H */ - -#include "opal/mca/event/event.h" -#include "opal/util/output.h" -#include "opal/mca/crs/crs.h" -#include "opal/mca/crs/base/base.h" -#include "opal/mca/installdirs/installdirs.h" -#include "opal/runtime/opal_cr.h" -#include "opal/mca/btl/base/base.h" - -#if OPAL_ENABLE_FT_CR == 1 -#include "orte/mca/snapc/snapc.h" -#include "orte/mca/snapc/base/base.h" -#endif - -#include "ompi/constants.h" -#include "ompi/mca/pml/pml.h" -#include "ompi/mca/pml/base/base.h" -#include "ompi/mca/crcp/crcp.h" -#include "ompi/mca/crcp/base/base.h" -#include "ompi/communicator/communicator.h" -#include "ompi/runtime/ompi_cr.h" -#if OPAL_ENABLE_CRDEBUG == 1 -#include "ompi/debuggers/debuggers.h" -#endif - -#if OPAL_ENABLE_CRDEBUG == 1 -OMPI_DECLSPEC int MPIR_checkpointable = 0; -OMPI_DECLSPEC char * MPIR_controller_hostname = NULL; -OMPI_DECLSPEC char * MPIR_checkpoint_command = NULL; -OMPI_DECLSPEC char * MPIR_restart_command = NULL; -OMPI_DECLSPEC char * MPIR_checkpoint_listing_command = NULL; -#endif - -/************* - * Local functions - *************/ -static int ompi_cr_coord_pre_ckpt(void); -static int ompi_cr_coord_pre_restart(void); -static int ompi_cr_coord_pre_continue(void); - -static int ompi_cr_coord_post_ckpt(void); -static int ompi_cr_coord_post_restart(void); -static int ompi_cr_coord_post_continue(void); - -/************* - * Local vars - *************/ -static opal_cr_coord_callback_fn_t prev_coord_callback = NULL; - -int ompi_cr_output = -1; -int ompi_cr_verbosity = 0; - -#define NUM_COLLECTIVES 16 - -#define SIGNAL(comm, modules, highest_module, msg, ret, func) \ - do { \ - bool found = false; \ - int k; \ - mca_coll_base_module_t *my_module = \ - comm->c_coll.coll_ ## func ## _module; \ - if (NULL != my_module) { \ - for (k = 0 ; k < highest_module ; ++k) { \ - if (my_module == modules[k]) found = true; \ - } \ - if (!found) { \ - modules[highest_module++] = my_module; \ - if (NULL != my_module->ft_event) { \ - ret = my_module->ft_event(msg); \ - if( OMPI_SUCCESS != ret ) { \ - return ret; \ - } \ - } \ - } \ - } \ - } while (0) - - -static int -notify_collectives(int msg) -{ - mca_coll_base_module_t *modules[NUM_COLLECTIVES]; - int i, max, ret, highest_module = 0; - - memset(&modules, 0, sizeof(mca_coll_base_module_t*) * NUM_COLLECTIVES); - - max = opal_pointer_array_get_size(&ompi_mpi_communicators); - for (i = 0 ; i < max ; ++i) { - ompi_communicator_t *comm = - (ompi_communicator_t *)opal_pointer_array_get_item(&ompi_mpi_communicators, i); - if (NULL == comm) continue; - - SIGNAL(comm, modules, highest_module, msg, ret, allgather); - SIGNAL(comm, modules, highest_module, msg, ret, allgatherv); - SIGNAL(comm, modules, highest_module, msg, ret, allreduce); - SIGNAL(comm, modules, highest_module, msg, ret, alltoall); - SIGNAL(comm, modules, highest_module, msg, ret, alltoallv); - SIGNAL(comm, modules, highest_module, msg, ret, alltoallw); - SIGNAL(comm, modules, highest_module, msg, ret, barrier); - SIGNAL(comm, modules, highest_module, msg, ret, bcast); - SIGNAL(comm, modules, highest_module, msg, ret, exscan); - SIGNAL(comm, modules, highest_module, msg, ret, gather); - SIGNAL(comm, modules, highest_module, msg, ret, gatherv); - SIGNAL(comm, modules, highest_module, msg, ret, reduce); - SIGNAL(comm, modules, highest_module, msg, ret, reduce_scatter); - SIGNAL(comm, modules, highest_module, msg, ret, scan); - SIGNAL(comm, modules, highest_module, msg, ret, scatter); - SIGNAL(comm, modules, highest_module, msg, ret, scatterv); - } - - return OMPI_SUCCESS; -} - - -/* - * CR Init - */ -int ompi_cr_init(void) -{ - /* - * Register some MCA variables - */ - ompi_cr_verbosity = 0; - (void) mca_base_var_register("ompi", "ompi", "cr", "verbose", - "Verbose output for the OMPI Checkpoint/Restart functionality", - MCA_BASE_VAR_TYPE_INT, NULL, 0, 0, - OPAL_INFO_LVL_9, - MCA_BASE_VAR_SCOPE_READONLY, - &ompi_cr_verbosity); - if(0 != ompi_cr_verbosity) { - ompi_cr_output = opal_output_open(NULL); - opal_output_set_verbosity(ompi_cr_output, ompi_cr_verbosity); - } else { - ompi_cr_output = opal_cr_output; - } - - opal_output_verbose(10, ompi_cr_output, - "ompi_cr: init: ompi_cr_init()"); - - /* Register the OMPI interlevel coordination callback */ - opal_cr_reg_coord_callback(ompi_cr_coord, &prev_coord_callback); - -#if OPAL_ENABLE_CRDEBUG == 1 - /* Check for C/R enabled debugging */ - if( MPIR_debug_with_checkpoint ) { - char *uri = NULL; - char *sep = NULL; - char *hostname = NULL; - - /* Mark as debuggable with C/R */ - MPIR_checkpointable = 1; - - /* Set the checkpoint and restart commands */ - /* Add the full path to the binary */ - asprintf(&MPIR_checkpoint_command, - "%s/ompi-checkpoint --crdebug --hnp-jobid %u", - opal_install_dirs.bindir, - ORTE_PROC_MY_HNP->jobid); - asprintf(&MPIR_restart_command, - "%s/ompi-restart --crdebug ", - opal_install_dirs.bindir); - asprintf(&MPIR_checkpoint_listing_command, - "%s/ompi-checkpoint -l --crdebug ", - opal_install_dirs.bindir); - - /* Set contact information for HNP */ - uri = strdup(ompi_process_info.my_hnp_uri); - hostname = strchr(uri, ';') + 1; - sep = strchr(hostname, ';'); - if (sep) { - *sep = 0; - } - if (strncmp(hostname, "tcp://", 6) == 0) { - hostname += 6; - sep = strchr(hostname, ':'); - *sep = 0; - MPIR_controller_hostname = strdup(hostname); - } else { - MPIR_controller_hostname = strdup("localhost"); - } - - /* Cleanup */ - if( NULL != uri ) { - free(uri); - uri = NULL; - } - } -#endif - - return OMPI_SUCCESS; -} - -/* - * Finalize - */ -int ompi_cr_finalize(void) -{ - opal_output_verbose(10, ompi_cr_output, - "ompi_cr: finalize: ompi_cr_finalize()"); - - return OMPI_SUCCESS; -} - -/* - * Interlayer coordination callback - */ -int ompi_cr_coord(int state) -{ - int ret, exit_status = OMPI_SUCCESS; - - opal_output_verbose(10, ompi_cr_output, - "ompi_cr: coord: ompi_cr_coord(%s)\n", - opal_crs_base_state_str((opal_crs_state_type_t)state)); - - /* - * Before calling the previous callback, we have the opportunity to - * take action given the state. - */ - if(OPAL_CRS_CHECKPOINT == state) { - /* Do Checkpoint Phase work */ - ret = ompi_cr_coord_pre_ckpt(); - if( ret == OMPI_EXISTS) { - return ret; - } - else if( ret != OMPI_SUCCESS) { - return ret; - } - } - else if (OPAL_CRS_CONTINUE == state ) { - /* Do Continue Phase work */ - ompi_cr_coord_pre_continue(); - } - else if (OPAL_CRS_RESTART == state ) { - /* Do Restart Phase work */ - ompi_cr_coord_pre_restart(); - } - else if (OPAL_CRS_TERM == state ) { - /* Do Continue Phase work in prep to terminate the application */ - } - else { - /* We must have been in an error state from the checkpoint - * recreate everything, as in the Continue Phase - */ - } - - /* - * Call the previous callback, which should be ORTE [which will handle OPAL] - */ - if(OMPI_SUCCESS != (ret = prev_coord_callback(state)) ) { - exit_status = ret; - goto cleanup; - } - - - /* - * After calling the previous callback, we have the opportunity to - * take action given the state to tidy up. - */ - if(OPAL_CRS_CHECKPOINT == state) { - /* Do Checkpoint Phase work */ - ompi_cr_coord_post_ckpt(); - } - else if (OPAL_CRS_CONTINUE == state ) { - /* Do Continue Phase work */ - ompi_cr_coord_post_continue(); - -#if OPAL_ENABLE_CRDEBUG == 1 - /* - * If C/R enabled debugging, - * wait here for debugger to attach - */ - if( MPIR_debug_with_checkpoint ) { - MPIR_checkpoint_debugger_breakpoint(); - } -#endif - } - else if (OPAL_CRS_RESTART == state ) { - /* Do Restart Phase work */ - ompi_cr_coord_post_restart(); - -#if OPAL_ENABLE_CRDEBUG == 1 - /* - * If C/R enabled debugging, - * wait here for debugger to attach - */ - if( MPIR_debug_with_checkpoint ) { - MPIR_checkpoint_debugger_breakpoint(); - } -#endif - } - else if (OPAL_CRS_TERM == state ) { - /* Do Continue Phase work in prep to terminate the application */ - } - else { - /* We must have been in an error state from the checkpoint - * recreate everything, as in the Continue Phase - */ - } - - cleanup: - return exit_status; -} - -/************* - * Pre Lower Layer - *************/ -static int ompi_cr_coord_pre_ckpt(void) { - int ret, exit_status = OMPI_SUCCESS; - - /* - * All the checkpoint heavey lifting in here... - */ - opal_output_verbose(10, ompi_cr_output, - "ompi_cr: coord_pre_ckpt: ompi_cr_coord_pre_ckpt()\n"); - - /* - * Notify Collectives - * - Need to do this on a per communicator basis - * Traverse all communicators... - */ - if (OMPI_SUCCESS != (ret = notify_collectives(OPAL_CR_CHECKPOINT))) { - goto cleanup; - } - - /* - * Notify PML - * - Will notify BML and BTL's - */ - if( OMPI_SUCCESS != (ret = mca_pml.pml_ft_event(OPAL_CRS_CHECKPOINT))) { - exit_status = ret; - goto cleanup; - } - - cleanup: - - return exit_status; -} - -static int ompi_cr_coord_pre_restart(void) { - int ret, exit_status = OMPI_SUCCESS; - - opal_output_verbose(10, ompi_cr_output, - "ompi_cr: coord_pre_restart: ompi_cr_coord_pre_restart()"); - - /* - * Notify PML - * - Will notify BML and BTL's - * - The intention here is to have the PML shutdown all the old components - * and handles. On the second pass (once ORTE is restarted) we can - * reconnect processes. - */ - if( OMPI_SUCCESS != (ret = mca_pml.pml_ft_event(OPAL_CRS_RESTART_PRE))) { - exit_status = ret; - goto cleanup; - } - - cleanup: - return exit_status; -} - -static int ompi_cr_coord_pre_continue(void) { -#if OPAL_ENABLE_FT_CR == 1 - int ret, exit_status = OMPI_SUCCESS; - - /* - * Can not really do much until ORTE is up and running, - * so defer action until the post_continue function. - */ - opal_output_verbose(10, ompi_cr_output, - "ompi_cr: coord_pre_continue: ompi_cr_coord_pre_continue()"); - - if (opal_cr_continue_like_restart) { - /* Mimic ompi_cr_coord_pre_restart(); */ - if( OMPI_SUCCESS != (ret = mca_pml.pml_ft_event(OPAL_CRS_CONTINUE))) { - exit_status = ret; - goto cleanup; - } - } - else { - if( opal_cr_timing_barrier_enabled ) { - OPAL_CR_SET_TIMER(OPAL_CR_TIMER_P2PBR1); - } - OPAL_CR_SET_TIMER(OPAL_CR_TIMER_P2P3); - if( opal_cr_timing_barrier_enabled ) { - OPAL_CR_SET_TIMER(OPAL_CR_TIMER_P2PBR2); - } - OPAL_CR_SET_TIMER(OPAL_CR_TIMER_CRCP1); - } - - cleanup: - return exit_status; -#else - return OMPI_SUCCESS; -#endif -} - -/************* - * Post Lower Layer - *************/ -static int ompi_cr_coord_post_ckpt(void) { - /* - * Now that ORTE/OPAL are shutdown, we really can't do much - * so assume pre_ckpt took care of everything. - */ - opal_output_verbose(10, ompi_cr_output, - "ompi_cr: coord_post_ckpt: ompi_cr_coord_post_ckpt()"); - - return OMPI_SUCCESS; -} - -static int ompi_cr_coord_post_restart(void) { - int ret, exit_status = OMPI_SUCCESS; - - opal_output_verbose(10, ompi_cr_output, - "ompi_cr: coord_post_restart: ompi_cr_coord_post_restart()"); - - /* - * Notify PML - * - Will notify BML and BTL's - */ - if( OMPI_SUCCESS != (ret = mca_pml.pml_ft_event(OPAL_CRS_RESTART))) { - exit_status = ret; - goto cleanup; - } - - /* - * Notify Collectives - * - Need to do this on a per communicator basis - * Traverse all communicators... - */ - if (OMPI_SUCCESS != (ret = notify_collectives(OPAL_CRS_RESTART))) { - goto cleanup; - } - - cleanup: - - return exit_status; -} - -static int ompi_cr_coord_post_continue(void) { - int ret, exit_status = OMPI_SUCCESS; - - opal_output_verbose(10, ompi_cr_output, - "ompi_cr: coord_post_continue: ompi_cr_coord_post_continue()"); - - /* - * Notify PML - * - Will notify BML and BTL's - */ - if( OMPI_SUCCESS != (ret = mca_pml.pml_ft_event(OPAL_CRS_CONTINUE))) { - exit_status = ret; - goto cleanup; - } - - /* - * Notify Collectives - * - Need to do this on a per communicator basis - * Traverse all communicators... - */ - if (OMPI_SUCCESS != (ret = notify_collectives(OPAL_CRS_CONTINUE))) { - goto cleanup; - } - - cleanup: - - return exit_status; -} diff --git a/ompi/runtime/ompi_cr.h b/ompi/runtime/ompi_cr.h deleted file mode 100644 index e23e56edc16..00000000000 --- a/ompi/runtime/ompi_cr.h +++ /dev/null @@ -1,65 +0,0 @@ -/* - * Copyright (c) 2004-2010 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2005 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -/** - * @file - * - * Checkpoint/Restart Functionality for the OMPI layer - */ - -#ifndef OMPI_CR_H -#define OMPI_CR_H - -#include "ompi_config.h" -#if OPAL_ENABLE_FT_CR == 1 -#include "orte/runtime/orte_cr.h" -#endif - -BEGIN_C_DECLS - - /* - * Initialization called in ompi_init() - */ - OMPI_DECLSPEC int ompi_cr_init(void); - - /* - * Finalization called in ompi_finalize() - */ - OMPI_DECLSPEC int ompi_cr_finalize(void); - - /* - * Interlayer Coodination Callback - */ - OMPI_DECLSPEC int ompi_cr_coord(int state); - - /* - * A general output handle to use for FT related messages - */ - OMPI_DECLSPEC extern int ompi_cr_output; - -#if OPAL_ENABLE_CRDEBUG == 1 - OMPI_DECLSPEC extern int MPIR_checkpointable; - OMPI_DECLSPEC extern char * MPIR_controller_hostname; - OMPI_DECLSPEC extern char * MPIR_checkpoint_command; - OMPI_DECLSPEC extern char * MPIR_restart_command; - OMPI_DECLSPEC extern char * MPIR_checkpoint_listing_command; -#endif - -END_C_DECLS - -#endif /* OMPI_CR_H */ diff --git a/ompi/runtime/ompi_mpi_abort.c b/ompi/runtime/ompi_mpi_abort.c index 3c8882436fa..32b8120a983 100644 --- a/ompi/runtime/ompi_mpi_abort.c +++ b/ompi/runtime/ompi_mpi_abort.c @@ -1,3 +1,4 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ /* * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana * University Research and Technology @@ -5,18 +6,22 @@ * Copyright (c) 2004-2014 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. - * Copyright (c) 2006-2014 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2006-2015 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2010-2011 Oak Ridge National Labs. All rights reserved. * Copyright (c) 2014 Research Organization for Information Science * and Technology (RIST). All rights reserved. + * Copyright (c) 2015 Los Alamos National Security, LLC. All rights + * reserved. + * Copyright (c) 2015 Mellanox Technologies, Inc. + * All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -37,6 +42,7 @@ #include #include "opal/mca/backtrace/backtrace.h" +#include "opal/runtime/opal_params.h" #include "ompi/communicator/communicator.h" #include "ompi/runtime/mpiruntime.h" @@ -88,7 +94,7 @@ static void try_kill_peers(ompi_communicator_t *comm, } else { assert(count <= nprocs); procs[count++] = - *OMPI_CAST_RTE_NAME(&ompi_group_get_proc_ptr(comm->c_remote_group, i)->super.proc_name); + *OMPI_CAST_RTE_NAME(&ompi_group_get_proc_ptr(comm->c_remote_group, i, true)->super.proc_name); } } @@ -96,7 +102,7 @@ static void try_kill_peers(ompi_communicator_t *comm, for (i = 0; i < ompi_comm_remote_size(comm); ++i) { assert(count <= nprocs); procs[count++] = - *OMPI_CAST_RTE_NAME(&ompi_group_get_proc_ptr(comm->c_remote_group, i)->super.proc_name); + *OMPI_CAST_RTE_NAME(&ompi_group_get_proc_ptr(comm->c_remote_group, i, true)->super.proc_name); } if (nprocs > 0) { @@ -113,7 +119,7 @@ int ompi_mpi_abort(struct ompi_communicator_t* comm, int errcode) { - char *msg, *host, hostname[MAXHOSTNAMELEN]; + char *msg, *host, hostname[OPAL_MAXHOSTNAMELEN]; pid_t pid = 0; /* Protection for recursive invocation */ @@ -134,13 +140,13 @@ ompi_mpi_abort(struct ompi_communicator_t* comm, /* Should we print a stack trace? Not aggregated because they might be different on all processes. */ - if (ompi_mpi_abort_print_stack) { + if (opal_abort_print_stack) { char **messages; int len, i; - if (OMPI_SUCCESS == opal_backtrace_buffer(&messages, &len)) { + if (OPAL_SUCCESS == opal_backtrace_buffer(&messages, &len)) { for (i = 0; i < len; ++i) { - fprintf(stderr, "[%s:%d] [%d] func:%s\n", host, (int) pid, + fprintf(stderr, "[%s:%d] [%d] func:%s\n", host, (int) pid, i, messages[i]); fflush(stderr); } @@ -156,9 +162,9 @@ ompi_mpi_abort(struct ompi_communicator_t* comm, /* Notify the debugger that we're about to abort */ if (errcode < 0 || - asprintf(&msg, "[%s:%d] aborting with MPI error %s%s", - host, (int) pid, ompi_mpi_errnum_get_string(errcode), - ompi_mpi_abort_print_stack ? + asprintf(&msg, "[%s:%d] aborting with MPI error %s%s", + host, (int) pid, ompi_mpi_errnum_get_string(errcode), + opal_abort_print_stack ? " (stack trace available on stderr)" : "") < 0) { msg = NULL; } @@ -169,20 +175,20 @@ ompi_mpi_abort(struct ompi_communicator_t* comm, /* Should we wait for a while before aborting? */ - if (0 != ompi_mpi_abort_delay) { - if (ompi_mpi_abort_delay < 0) { - fprintf(stderr ,"[%s:%d] Looping forever (MCA parameter mpi_abort_delay is < 0)\n", + if (0 != opal_abort_delay) { + if (opal_abort_delay < 0) { + fprintf(stderr ,"[%s:%d] Looping forever (MCA parameter opal_abort_delay is < 0)\n", host, (int) pid); fflush(stderr); - while (1) { - sleep(5); + while (1) { + sleep(5); } } else { fprintf(stderr, "[%s:%d] Delaying for %d seconds before aborting\n", - host, (int) pid, ompi_mpi_abort_delay); + host, (int) pid, opal_abort_delay); do { sleep(1); - } while (--ompi_mpi_abort_delay > 0); + } while (--opal_abort_delay > 0); } } @@ -190,9 +196,9 @@ ompi_mpi_abort(struct ompi_communicator_t* comm, killing everyone. Sorry, Charlie... */ if (!ompi_rte_initialized) { fprintf(stderr, "[%s:%d] Local abort %s completed successfully, but am not able to aggregate error messages, and not able to guarantee that all other processes were killed!\n", - host, (int) pid, ompi_mpi_finalized ? + host, (int) pid, ompi_mpi_finalized ? "after MPI_FINALIZE started" : "before MPI_INIT completed"); - exit(errcode == 0 ? 1 : errcode); + _exit(errcode == 0 ? 1 : errcode); } /* If OMPI is initialized and we have a non-NULL communicator, diff --git a/ompi/runtime/ompi_mpi_finalize.c b/ompi/runtime/ompi_mpi_finalize.c index c2473b65350..959e2f68082 100644 --- a/ompi/runtime/ompi_mpi_finalize.c +++ b/ompi/runtime/ompi_mpi_finalize.c @@ -6,22 +6,22 @@ * Copyright (c) 2004-2011 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. - * Copyright (c) 2006-2013 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2006-2014 Los Alamos National Security, LLC. All rights - * reserved. + * Copyright (c) 2006-2016 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2006-2015 Los Alamos National Security, LLC. All rights + * reserved. * Copyright (c) 2006 University of Houston. All rights reserved. * Copyright (c) 2009 Sun Microsystems, Inc. All rights reserved. * Copyright (c) 2011 Sandia National Laboratories. All rights reserved. - * Copyright (c) 2014 Intel, Inc. All rights reserved. + * Copyright (c) 2014-2016 Intel, Inc. All rights reserved. * * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -78,48 +78,59 @@ #include "ompi/mca/io/base/base.h" #include "ompi/mca/pml/base/pml_base_bsend.h" #include "ompi/runtime/params.h" -#include "ompi/mca/dpm/base/base.h" -#include "ompi/mca/pubsub/base/base.h" +#include "ompi/dpm/dpm.h" #include "ompi/mpiext/mpiext.h" -#if OPAL_ENABLE_FT_CR == 1 -#include "ompi/mca/crcp/crcp.h" -#include "ompi/mca/crcp/base/base.h" -#endif -#include "ompi/runtime/ompi_cr.h" - extern bool ompi_enable_timing; extern bool ompi_enable_timing_ext; +static void fence_cbfunc(int status, void *cbdata) +{ + volatile bool *active = (volatile bool*)cbdata; + *active = false; +} + int ompi_mpi_finalize(void) { - int ret; - static int32_t finalize_has_already_started = 0; + int ret = MPI_SUCCESS; opal_list_item_t *item; ompi_proc_t** procs; size_t nprocs; + volatile bool active; OPAL_TIMING_DECLARE(tm); OPAL_TIMING_INIT_EXT(&tm, OPAL_TIMING_GET_TIME_OF_DAY); - /* Be a bit social if an erroneous program calls MPI_FINALIZE in two different threads, otherwise we may deadlock in ompi_comm_free() (or run into other nasty lions, tigers, or - bears) */ - - if (! opal_atomic_cmpset_32(&finalize_has_already_started, 0, 1)) { - /* Note that if we're already finalized, we cannot raise an - MPI exception. The best that we can do is write something - to stderr. */ - char hostname[MAXHOSTNAMELEN]; + bears). + + This lock is held for the duration of ompi_mpi_init() and + ompi_mpi_finalize(). Hence, if we get it, then no other thread + is inside the critical section (and we don't have to check the + *_started bool variables). */ + opal_mutex_lock(&ompi_mpi_bootstrap_mutex); + if (!ompi_mpi_initialized || ompi_mpi_finalized) { + /* Note that if we're not initialized or already finalized, we + cannot raise an MPI exception. The best that we can do is + write something to stderr. */ + char hostname[OPAL_MAXHOSTNAMELEN]; pid_t pid = getpid(); gethostname(hostname, sizeof(hostname)); - opal_show_help("help-mpi-runtime.txt", - "mpi_finalize:invoked_multiple_times", - true, hostname, pid); + if (ompi_mpi_initialized) { + opal_show_help("help-mpi-runtime.txt", + "mpi_finalize: not initialized", + true, hostname, pid); + } else if (ompi_mpi_finalized) { + opal_show_help("help-mpi-runtime.txt", + "mpi_finalize:invoked_multiple_times", + true, hostname, pid); + } + opal_mutex_unlock(&ompi_mpi_bootstrap_mutex); return MPI_ERR_OTHER; } + ompi_mpi_finalize_started = true; ompi_mpiext_fini(); @@ -157,7 +168,7 @@ int ompi_mpi_finalize(void) /* NOTE: MPI-2.1 requires that MPI_FINALIZE is "collective" across *all* connected processes. This only means that all processes have to call it. It does *not* mean that all connected - processes need to synchronize (either directly or indirectly). + processes need to synchronize (either directly or indirectly). For example, it is quite easy to construct complicated scenarios where one job is "connected" to another job via @@ -209,7 +220,7 @@ int ompi_mpi_finalize(void) have many other, much higher priority issues to handle that deal with non-erroneous cases. */ - /* Wait for everyone to reach this point. This is a grpcomm + /* Wait for everyone to reach this point. This is a PMIx barrier instead of an MPI barrier for (at least) two reasons: 1. An MPI barrier doesn't ensure that all messages have been @@ -229,7 +240,27 @@ int ompi_mpi_finalize(void) del_procs behavior around May of 2014 (see https://svn.open-mpi.org/trac/ompi/ticket/4669#comment:4 for more details). */ - opal_pmix.fence(NULL, 0); + if (NULL != opal_pmix.fence_nb) { + active = true; + /* Note that use of the non-blocking PMIx fence will + * allow us to lazily cycle calling + * opal_progress(), which will allow any other pending + * communications/actions to complete. See + * https://github.com/open-mpi/ompi/issues/1576 for the + * original bug report. */ + opal_pmix.fence_nb(NULL, 0, fence_cbfunc, (void*)&active); + OMPI_LAZY_WAIT_FOR_COMPLETION(active); + } else { + /* However, we cannot guarantee that the provided PMIx has + * fence_nb. If it doesn't, then do the best we can: an MPI + * barrier on COMM_WORLD (which isn't the best because of the + * reasons cited above), followed by a blocking PMIx fence + * (which does not call opal_progress()). */ + ompi_communicator_t *comm = &ompi_mpi_comm_world.comm; + comm->c_coll.coll_barrier(comm, comm->c_coll.coll_barrier_module); + + opal_pmix.fence(NULL, 0); + } /* check for timing request - get stop time and report elapsed time if so */ @@ -238,13 +269,6 @@ int ompi_mpi_finalize(void) OPAL_TIMING_REPORT(ompi_enable_timing_ext, &tm); OPAL_TIMING_RELEASE(&tm); - /* - * Shutdown the Checkpoint/Restart Mech. - */ - if (OMPI_SUCCESS != (ret = ompi_cr_finalize())) { - OMPI_ERROR_LOG(ret); - } - /* Shut down any bindings-specific issues: C++, F77, F90 */ /* Remove all memory associated by MPI_REGISTER_DATAREP (per @@ -269,40 +293,43 @@ int ompi_mpi_finalize(void) /* free file resources */ if (OMPI_SUCCESS != (ret = ompi_file_finalize())) { - return ret; + goto done; } /* free window resources */ if (OMPI_SUCCESS != (ret = ompi_win_finalize())) { - return ret; + goto done; } if (OMPI_SUCCESS != (ret = ompi_osc_base_finalize())) { - return ret; + goto done; } /* free communicator resources. this MUST come before finalizing the PML * as this will call into the pml */ if (OMPI_SUCCESS != (ret = ompi_comm_finalize())) { - return ret; + goto done; } + /* call del_procs on all allocated procs even though some may not be known + * to the pml layer. the pml layer is expected to be resilient and ignore + * any unknown procs. */ nprocs = 0; - procs = ompi_proc_world(&nprocs); + procs = ompi_proc_get_allocated (&nprocs); MCA_PML_CALL(del_procs(procs, nprocs)); free(procs); - /* free pml resource */ - if(OMPI_SUCCESS != (ret = mca_pml_base_finalize())) { - return ret; + /* free pml resource */ + if(OMPI_SUCCESS != (ret = mca_pml_base_finalize())) { + goto done; } /* free requests */ if (OMPI_SUCCESS != (ret = ompi_request_finalize())) { - return ret; + goto done; } if (OMPI_SUCCESS != (ret = ompi_message_finalize())) { - return ret; + goto done; } /* If requested, print out a list of memory allocated by ALLOC_MEM @@ -315,74 +342,59 @@ int ompi_mpi_finalize(void) shut down MCA types having to do with communications */ if (OMPI_SUCCESS != (ret = mca_base_framework_close(&ompi_pml_base_framework) ) ) { OMPI_ERROR_LOG(ret); - return ret; + goto done; } /* shut down buffered send code */ mca_pml_base_bsend_fini(); -#if OPAL_ENABLE_FT_CR == 1 - /* - * Shutdown the CRCP Framework, must happen after PML shutdown - */ - if (OMPI_SUCCESS != (ret = mca_base_framework_close(&ompi_crcp_base_framework) ) ) { - OMPI_ERROR_LOG(ret); - return ret; - } -#endif - /* Free secondary resources */ /* free attr resources */ if (OMPI_SUCCESS != (ret = ompi_attr_finalize())) { - return ret; + goto done; } /* free group resources */ if (OMPI_SUCCESS != (ret = ompi_group_finalize())) { - return ret; + goto done; } - /* finalize the pubsub functions */ - if (OMPI_SUCCESS != (ret = mca_base_framework_close(&ompi_pubsub_base_framework) ) ) { - return ret; - } - - /* finalize the DPM framework */ - if ( OMPI_SUCCESS != (ret = mca_base_framework_close(&ompi_dpm_base_framework))) { - return ret; + /* finalize the DPM subsystem */ + if ( OMPI_SUCCESS != (ret = ompi_dpm_finalize())) { + goto done; } - + /* free internal error resources */ if (OMPI_SUCCESS != (ret = ompi_errcode_intern_finalize())) { - return ret; + goto done; } - + /* free error code resources */ if (OMPI_SUCCESS != (ret = ompi_mpi_errcode_finalize())) { - return ret; + goto done; } /* free errhandler resources */ if (OMPI_SUCCESS != (ret = ompi_errhandler_finalize())) { - return ret; + goto done; } /* Free all other resources */ /* free op resources */ if (OMPI_SUCCESS != (ret = ompi_op_finalize())) { - return ret; + goto done; } /* free ddt resources */ if (OMPI_SUCCESS != (ret = ompi_datatype_finalize())) { - return ret; + goto done; } /* free info resources */ if (OMPI_SUCCESS != (ret = ompi_info_finalize())) { - return ret; + goto done; } /* Close down MCA modules */ @@ -394,32 +406,32 @@ int ompi_mpi_finalize(void) ompi_io_base_framework.framework_refcnt = 1; if (OMPI_SUCCESS != mca_base_framework_close(&ompi_io_base_framework)) { - return ret; + goto done; } } (void) mca_base_framework_close(&ompi_topo_base_framework); if (OMPI_SUCCESS != (ret = mca_base_framework_close(&ompi_osc_base_framework))) { - return ret; + goto done; } if (OMPI_SUCCESS != (ret = mca_base_framework_close(&ompi_coll_base_framework))) { - return ret; + goto done; } if (OMPI_SUCCESS != (ret = mca_base_framework_close(&ompi_bml_base_framework))) { - return ret; + goto done; } if (OMPI_SUCCESS != (ret = mca_base_framework_close(&opal_mpool_base_framework))) { - return ret; + goto done; } if (OMPI_SUCCESS != (ret = mca_base_framework_close(&opal_rcache_base_framework))) { - return ret; + goto done; } if (OMPI_SUCCESS != (ret = mca_base_framework_close(&opal_allocator_base_framework))) { - return ret; + goto done; } /* free proc resources */ if ( OMPI_SUCCESS != (ret = ompi_proc_finalize())) { - return ret; + goto done; } if (NULL != ompi_mpi_main_thread) { @@ -430,21 +442,24 @@ int ompi_mpi_finalize(void) /* Leave the RTE */ if (OMPI_SUCCESS != (ret = ompi_rte_finalize())) { - return ret; + goto done; } ompi_rte_initialized = false; /* now close the rte framework */ if (OMPI_SUCCESS != (ret = mca_base_framework_close(&ompi_rte_base_framework) ) ) { OMPI_ERROR_LOG(ret); - return ret; + goto done; } if (OPAL_SUCCESS != (ret = opal_finalize_util())) { - return ret; + goto done; } /* All done */ - return MPI_SUCCESS; + done: + opal_mutex_unlock(&ompi_mpi_bootstrap_mutex); + + return ret; } diff --git a/ompi/runtime/ompi_mpi_init.c b/ompi/runtime/ompi_mpi_init.c index 596449ded3a..823e0e56c1d 100644 --- a/ompi/runtime/ompi_mpi_init.c +++ b/ompi/runtime/ompi_mpi_init.c @@ -6,25 +6,26 @@ * Copyright (c) 2004-2014 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. - * Copyright (c) 2006-2014 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2006-2015 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2006-2015 Los Alamos National Security, LLC. All rights - * reserved. + * reserved. * Copyright (c) 2006-2009 University of Houston. All rights reserved. * Copyright (c) 2008-2009 Sun Microsystems, Inc. All rights reserved. * Copyright (c) 2011 Sandia National Laboratories. All rights reserved. * Copyright (c) 2012-2013 Inria. All rights reserved. - * Copyright (c) 2014 Intel, Inc. All rights reserved. + * Copyright (c) 2014-2016 Intel, Inc. All rights reserved. * Copyright (c) 2014-2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. + * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved. * * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -58,6 +59,7 @@ #include "opal/mca/rcache/base/base.h" #include "opal/mca/rcache/rcache.h" #include "opal/mca/mpool/base/base.h" +#include "opal/mca/btl/base/base.h" #include "opal/mca/pmix/pmix.h" #include "opal/util/timings.h" @@ -89,24 +91,18 @@ #include "ompi/debuggers/debuggers.h" #include "ompi/proc/proc.h" #include "ompi/mca/pml/base/pml_base_bsend.h" -#include "ompi/mca/dpm/base/base.h" -#include "ompi/mca/pubsub/base/base.h" +#include "ompi/dpm/dpm.h" #include "ompi/mpiext/mpiext.h" -#if OPAL_ENABLE_FT_CR == 1 -#include "ompi/mca/crcp/crcp.h" -#include "ompi/mca/crcp/base/base.h" -#endif -#include "ompi/runtime/ompi_cr.h" - -#if defined(MEMORY_LINUX_PTMALLOC2) && MEMORY_LINUX_PTMALLOC2 -#include "opal/mca/memory/linux/memory_linux.h" +/* newer versions of gcc have poisoned this deprecated feature */ +#if HAVE___MALLOC_INITIALIZE_HOOK +#include "opal/mca/memory/base/base.h" /* So this sucks, but with OPAL in its own library that is brought in implicity from libmpi, there are times when the malloc initialize hook in the memory component doesn't work. So we have to do it from here, since any MPI code is going to call MPI_Init... */ -OPAL_DECLSPEC void (*__malloc_initialize_hook) (void) = - opal_memory_linux_malloc_init_hook; +OPAL_DECLSPEC void (*__malloc_initialize_hook) (void) = + opal_memory_base_malloc_init_hook; #endif /* This is required for the boundaries of the hash tables used to store @@ -125,10 +121,12 @@ const char ompi_version_string[] = OMPI_IDENT_STRING; * Global variables and symbols for the MPI layer */ -bool ompi_mpi_init_started = false; -bool ompi_mpi_initialized = false; -bool ompi_mpi_finalized = false; -bool ompi_rte_initialized = false; +opal_mutex_t ompi_mpi_bootstrap_mutex = OPAL_MUTEX_STATIC_INIT; +volatile bool ompi_mpi_init_started = false; +volatile bool ompi_mpi_initialized = false; +volatile bool ompi_mpi_finalize_started = false; +volatile bool ompi_mpi_finalized = false; +volatile bool ompi_rte_initialized = false; bool ompi_mpi_thread_multiple = false; int ompi_mpi_thread_requested = MPI_THREAD_SINGLE; @@ -238,7 +236,7 @@ MPI_Fint *MPI_F_STATUSES_IGNORE = NULL; The values are *NOT* initialized. We do not use the values of these constants; only their addresses (because they're always - passed by reference by Fortran). + passed by reference by Fortran). Initializing upon instantiation these can reveal size and/or alignment differences between Fortran and C (!) which can cause @@ -257,30 +255,7 @@ MPI_Fint *MPI_F_STATUSES_IGNORE = NULL; ompi/include/mpif-common.h. */ -#define INST(type, value, upper_case, lower_case, single_u, double_u) \ -type lower_case = value; \ -type upper_case = value; \ -type single_u = value; \ -type double_u = value - -INST(int, -1, MPI_FORTRAN_BOTTOM, mpi_fortran_bottom, - mpi_fortran_bottom_, mpi_fortran_bottom__); -INST(int, -1, MPI_FORTRAN_IN_PLACE, mpi_fortran_in_place, - mpi_fortran_in_place_, mpi_fortran_in_place__); -INST(int, -1, MPI_FORTRAN_UNWEIGHTED, mpi_fortran_unweighted, - mpi_fortran_unweighted_, mpi_fortran_unweighted__); -INST(int, -1, MPI_FORTRAN_WEIGHTS_EMPTY, mpi_fortran_weights_empty, - mpi_fortran_weights_empty_, mpi_fortran_weights_empty__); -INST(char *, NULL, MPI_FORTRAN_ARGV_NULL, mpi_fortran_argv_null, - mpi_fortran_argv_null_, mpi_fortran_argv_null__); -INST(char *, NULL, MPI_FORTRAN_ARGVS_NULL, mpi_fortran_argvs_null, - mpi_fortran_argvs_null_, mpi_fortran_argvs_null__); -INST(int *, NULL, MPI_FORTRAN_ERRCODES_IGNORE, mpi_fortran_errcodes_ignore, - mpi_fortran_errcodes_ignore_, mpi_fortran_errcodes_ignore__); -INST(int *, NULL, MPI_FORTRAN_STATUS_IGNORE, mpi_fortran_status_ignore, - mpi_fortran_status_ignore_, mpi_fortran_status_ignore__); -INST(int *, NULL, MPI_FORTRAN_STATUSES_IGNORE, mpi_fortran_statuses_ignore, - mpi_fortran_statuses_ignore_, mpi_fortran_statuses_ignore__); +#include "mpif-c-constants.h" /* * Hash tables for MPI_Type_create_f90* functions @@ -321,6 +296,19 @@ _process_name_compare(const opal_process_name_t p1, const opal_process_name_t p2 return ompi_rte_compare_name_fields(OMPI_RTE_CMP_ALL, o1, o2); } +static int _convert_string_to_process_name(opal_process_name_t *name, + const char* name_string) +{ + return ompi_rte_convert_string_to_process_name(name, name_string); +} + +static int _convert_process_name_to_string(char** name_string, + const opal_process_name_t *name) +{ + return ompi_rte_convert_process_name_to_string(name_string, name); +} + + void ompi_mpi_thread_level(int requested, int *provided) { /** @@ -348,7 +336,7 @@ void ompi_mpi_thread_level(int requested, int *provided) ompi_mpi_main_thread = opal_thread_get_self(); } - ompi_mpi_thread_multiple = (ompi_mpi_thread_provided == + ompi_mpi_thread_multiple = (ompi_mpi_thread_provided == MPI_THREAD_MULTIPLE); } @@ -380,6 +368,12 @@ static int ompi_register_mca_variables(void) return OMPI_SUCCESS; } +static void fence_release(int status, void *cbdata) +{ + volatile bool *active = (volatile bool*)cbdata; + *active = false; +} + int ompi_mpi_init(int argc, char **argv, int requested, int *provided) { int ret; @@ -387,28 +381,62 @@ int ompi_mpi_init(int argc, char **argv, int requested, int *provided) size_t nprocs; char *error = NULL; char *cmd=NULL, *av=NULL; + volatile bool active; OPAL_TIMING_DECLARE(tm); OPAL_TIMING_INIT_EXT(&tm, OPAL_TIMING_GET_TIME_OF_DAY); /* bitflag of the thread level support provided. To be used * for the modex in order to work in heterogeneous environments. */ - uint8_t threadlevel_bf; + uint8_t threadlevel_bf; - /* Indicate that we have *started* MPI_INIT*. MPI_FINALIZE has - something sorta similar in a static local variable in - ompi_mpi_finalize(). */ + /* Ensure that we were not already initialized or finalized. + + This lock is held for the duration of ompi_mpi_init() and + ompi_mpi_finalize(). Hence, if we get it, then no other thread + is inside the critical section (and we don't have to check the + *_started bool variables). */ + opal_mutex_lock(&ompi_mpi_bootstrap_mutex); + if (ompi_mpi_finalized) { + opal_show_help("help-mpi-runtime.txt", + "mpi_init: already finalized", true); + opal_mutex_unlock(&ompi_mpi_bootstrap_mutex); + return MPI_ERR_OTHER; + } else if (ompi_mpi_initialized) { + opal_show_help("help-mpi-runtime.txt", + "mpi_init: invoked multiple times", true); + opal_mutex_unlock(&ompi_mpi_bootstrap_mutex); + return MPI_ERR_OTHER; + } + + /* Indicate that we have *started* MPI_INIT* */ ompi_mpi_init_started = true; - /* Setup enough to check get/set MCA params */ + /* Figure out the final MPI thread levels. If we were not + compiled for support for MPI threads, then don't allow + MPI_THREAD_MULTIPLE. Set this stuff up here early in the + process so that other components can make decisions based on + this value. */ + ompi_mpi_thread_level(requested, provided); + + /* Setup enough to check get/set MCA params */ if (OPAL_SUCCESS != (ret = opal_init_util(&argc, &argv))) { error = "ompi_mpi_init: opal_init_util failed"; goto error; } + /* If thread support was enabled, then setup OPAL to allow for them. This must be done + * early to prevent a race condition that can occur with orte_init(). */ + if (*provided != MPI_THREAD_SINGLE) { + opal_set_using_threads(true); + } + /* Convince OPAL to use our naming scheme */ opal_process_name_print = _process_name_print_for_opal; opal_compare_proc = _process_name_compare; + opal_convert_string_to_process_name = _convert_string_to_process_name; + opal_convert_process_name_to_string = _convert_process_name_to_string; + opal_proc_for_name = ompi_proc_for_name; /* Register MCA variables */ if (OPAL_SUCCESS != (ret = ompi_register_mca_variables())) { @@ -479,11 +507,10 @@ int ompi_mpi_init(int argc, char **argv, int requested, int *provided) goto error; } ompi_rte_initialized = true; - + /* check for timing request - get stop time and report elapsed time if so */ OPAL_TIMING_MNEXT((&tm,"time from completion of rte_init to modex")); -#if OPAL_HAVE_HWLOC /* if hwloc is available but didn't get setup for some * reason, do so now */ @@ -493,7 +520,6 @@ int ompi_mpi_init(int argc, char **argv, int requested, int *provided) goto error; } } -#endif /* Register the default errhandler callback - RTE will ignore if it * doesn't support this capability @@ -501,13 +527,6 @@ int ompi_mpi_init(int argc, char **argv, int requested, int *provided) ompi_rte_register_errhandler(ompi_errhandler_runtime_callback, OMPI_RTE_ERRHANDLER_LAST); - /* Figure out the final MPI thread levels. If we were not - compiled for support for MPI threads, then don't allow - MPI_THREAD_MULTIPLE. Set this stuff up here early in the - process so that other components can make decisions based on - this value. */ - - ompi_mpi_thread_level(requested, provided); /* determine the bitflag belonging to the threadlevel_support provided */ memset ( &threadlevel_bf, 0, sizeof(uint8_t)); @@ -515,7 +534,7 @@ int ompi_mpi_init(int argc, char **argv, int requested, int *provided) #if OMPI_ENABLE_THREAD_MULTIPLE /* add this bitflag to the modex */ - OPAL_MODEX_SEND_STRING(ret, PMIX_SYNC_REQD, PMIX_GLOBAL, + OPAL_MODEX_SEND_STRING(ret, OPAL_PMIX_GLOBAL, "MPI_THREAD_LEVEL", &threadlevel_bf, sizeof(uint8_t)); if (OPAL_SUCCESS != ret) { error = "ompi_mpi_init: modex send thread level"; @@ -523,13 +542,6 @@ int ompi_mpi_init(int argc, char **argv, int requested, int *provided) } #endif - /* If thread support was enabled, then setup OPAL to allow for - them. */ - if ((OPAL_ENABLE_PROGRESS_THREADS == 1) || - (*provided != MPI_THREAD_SINGLE)) { - opal_set_using_threads(true); - } - /* initialize datatypes. This step should be done early as it will * create the local convertor and local arch used in the proc * init. @@ -553,7 +565,7 @@ int ompi_mpi_init(int argc, char **argv, int requested, int *provided) error = "ompi_op_base_open() failed"; goto error; } - if (OMPI_SUCCESS != + if (OMPI_SUCCESS != (ret = ompi_op_base_find_available(OPAL_ENABLE_PROGRESS_THREADS, ompi_mpi_thread_multiple))) { error = "ompi_op_base_find_available() failed"; @@ -582,6 +594,10 @@ int ompi_mpi_init(int argc, char **argv, int requested, int *provided) error = "mca_bml_base_open() failed"; goto error; } + if (OMPI_SUCCESS != (ret = mca_bml_base_init (1, ompi_mpi_thread_multiple))) { + error = "mca_bml_base_init() failed"; + goto error; + } if (OMPI_SUCCESS != (ret = mca_base_framework_open(&ompi_pml_base_framework, 0))) { error = "mca_pml_base_open() failed"; goto error; @@ -596,13 +612,6 @@ int ompi_mpi_init(int argc, char **argv, int requested, int *provided) goto error; } -#if OPAL_ENABLE_FT_CR == 1 - if (OMPI_SUCCESS != (ret = mca_base_framework_open(&ompi_crcp_base_framework, 0))) { - error = "ompi_crcp_base_open() failed"; - goto error; - } -#endif - /* In order to reduce the common case for MPI apps (where they don't use MPI-2 IO or MPI-1 topology functions), the io and topo frameworks are initialized lazily, at the first use of @@ -611,14 +620,7 @@ int ompi_mpi_init(int argc, char **argv, int requested, int *provided) /* Select which MPI components to use */ - if (OMPI_SUCCESS != - (ret = mca_mpool_base_init(OPAL_ENABLE_PROGRESS_THREADS, - ompi_mpi_thread_multiple))) { - error = "mca_mpool_base_init() failed"; - goto error; - } - - if (OMPI_SUCCESS != + if (OMPI_SUCCESS != (ret = mca_pml_base_select(OPAL_ENABLE_PROGRESS_THREADS, ompi_mpi_thread_multiple))) { error = "mca_pml_base_select() failed"; @@ -630,41 +632,43 @@ int ompi_mpi_init(int argc, char **argv, int requested, int *provided) /* exchange connection info - this function may also act as a barrier * if data exchange is required. The modex occurs solely across procs - * in our job, so no proc array is passed. If a barrier is required, - * the "fence" function will perform it internally - */ - OPAL_FENCE(NULL, 0, NULL, NULL); + * in our job. If a barrier is required, the "modex" function will + * perform it internally */ + active = true; + opal_pmix.commit(); + if (!opal_pmix_base_async_modex) { + if (NULL != opal_pmix.fence_nb) { + opal_pmix.fence_nb(NULL, opal_pmix_collect_all_data, + fence_release, (void*)&active); + OMPI_LAZY_WAIT_FOR_COMPLETION(active); + } else { + opal_pmix.fence(NULL, opal_pmix_collect_all_data); + } + } OPAL_TIMING_MNEXT((&tm,"time from modex to first barrier")); /* select buffered send allocator component to be used */ if( OMPI_SUCCESS != - (ret = mca_pml_base_bsend_init(ompi_mpi_thread_multiple))) { + (ret = mca_pml_base_bsend_init(ompi_mpi_thread_multiple))) { error = "mca_pml_base_bsend_init() failed"; goto error; } - if (OMPI_SUCCESS != + if (OMPI_SUCCESS != (ret = mca_coll_base_find_available(OPAL_ENABLE_PROGRESS_THREADS, ompi_mpi_thread_multiple))) { error = "mca_coll_base_find_available() failed"; goto error; } - if (OMPI_SUCCESS != + if (OMPI_SUCCESS != (ret = ompi_osc_base_find_available(OPAL_ENABLE_PROGRESS_THREADS, ompi_mpi_thread_multiple))) { error = "ompi_osc_base_find_available() failed"; goto error; } -#if OPAL_ENABLE_FT_CR == 1 - if (OMPI_SUCCESS != (ret = ompi_crcp_base_select() ) ) { - error = "ompi_crcp_base_select() failed"; - goto error; - } -#endif - /* io and topo components are not selected here -- see comment above about the io and topo frameworks being loaded lazily */ @@ -697,13 +701,13 @@ int ompi_mpi_init(int argc, char **argv, int requested, int *provided) error = "ompi_mpi_errcode_init() failed"; goto error; } - + /* initialize internal error codes */ if (OMPI_SUCCESS != (ret = ompi_errcode_intern_init())) { error = "ompi_errcode_intern_init() failed"; goto error; } - + /* initialize groups */ if (OMPI_SUCCESS != (ret = ompi_group_init())) { error = "ompi_group_init() failed"; @@ -749,10 +753,21 @@ int ompi_mpi_init(int argc, char **argv, int requested, int *provided) goto error; } - /* add all ompi_proc_t's to PML */ - if (NULL == (procs = ompi_proc_world(&nprocs))) { - error = "ompi_proc_world() failed"; - goto error; + /* some btls/mtls require we call add_procs with all procs in the job. + * since the btls/mtls have no visibility here it is up to the pml to + * convey this requirement */ + if (mca_pml_base_requires_world ()) { + if (NULL == (procs = ompi_proc_world (&nprocs))) { + error = "ompi_proc_get_allocated () failed"; + goto error; + } + } else { + /* add all allocated ompi_proc_t's to PML (below the add_procs limit this + * behaves identically to ompi_proc_world ()) */ + if (NULL == (procs = ompi_proc_get_allocated (&nprocs))) { + error = "ompi_proc_get_allocated () failed"; + goto error; + } } ret = MCA_PML_CALL(add_procs(procs, nprocs)); free(procs); @@ -776,8 +791,8 @@ int ompi_mpi_init(int argc, char **argv, int requested, int *provided) * Dump all MCA parameters if requested */ if (ompi_mpi_show_mca_params) { - ompi_show_all_mca_params(ompi_mpi_comm_world.comm.c_my_rank, - nprocs, + ompi_show_all_mca_params(ompi_mpi_comm_world.comm.c_my_rank, + nprocs, ompi_process_info.nodename); } @@ -790,7 +805,15 @@ int ompi_mpi_init(int argc, char **argv, int requested, int *provided) /* wait for everyone to reach this point - this is a hard * barrier requirement at this time, though we hope to relax * it at a later point */ - opal_pmix.fence(NULL, 0); + active = true; + opal_pmix.commit(); + if (NULL != opal_pmix.fence_nb) { + opal_pmix.fence_nb(NULL, false, + fence_release, (void*)&active); + OMPI_LAZY_WAIT_FOR_COMPLETION(active); + } else { + opal_pmix.fence(NULL, false); + } /* check for timing request - get stop time and report elapsed time if so, then start the clock again */ @@ -805,7 +828,7 @@ int ompi_mpi_init(int argc, char **argv, int requested, int *provided) latency. */ opal_progress_set_event_flag(OPAL_EVLOOP_NONBLOCK); #endif - + /* wire up the mpi interface, if requested. Do this after the non-block switch for non-TCP performance. Do before the polling change as anyone with a complex wire-up is going to be @@ -815,39 +838,24 @@ int ompi_mpi_init(int argc, char **argv, int requested, int *provided) goto error; } - /* Setup the publish/subscribe (PUBSUB) framework */ - if (OMPI_SUCCESS != (ret = mca_base_framework_open(&ompi_pubsub_base_framework, 0))) { - error = "mca_pubsub_base_open() failed"; - goto error; - } - if (OMPI_SUCCESS != (ret = ompi_pubsub_base_select())) { - error = "ompi_pubsub_base_select() failed"; - goto error; - } - - /* Setup the dynamic process management (DPM) framework */ - if (OMPI_SUCCESS != (ret = mca_base_framework_open(&ompi_dpm_base_framework, 0))) { - error = "ompi_dpm_base_open() failed"; - goto error; - } - if (OMPI_SUCCESS != (ret = ompi_dpm_base_select())) { - error = "ompi_dpm_base_select() failed"; + /* Setup the dynamic process management (DPM) subsystem */ + if (OMPI_SUCCESS != (ret = ompi_dpm_init())) { + error = "ompi_dpm_init() failed"; goto error; } - /* Determine the overall threadlevel support of all processes - in MPI_COMM_WORLD. This has to be done before calling + /* Determine the overall threadlevel support of all processes + in MPI_COMM_WORLD. This has to be done before calling coll_base_comm_select, since some of the collective components e.g. hierarch, might create subcommunicators. The threadlevel requested by all processes is required in order to know which cid allocation algorithm can be used. */ - if ( OMPI_SUCCESS != - ( ret = ompi_comm_cid_init ())) { - error = "ompi_mpi_init: ompi_comm_cid_init failed"; - goto error; + if (OMPI_SUCCESS != ( ret = ompi_comm_cid_init ())) { + error = "ompi_mpi_init: ompi_comm_cid_init failed"; + goto error; } - /* Init coll for the comms. This has to be after dpm_base_select, + /* Init coll for the comms. This has to be after dpm_base_select, (since dpm.mark_dyncomm is not set in the communicator creation function else), but before dpm.dyncom_init, since this function might require collective for the CID allocation. */ @@ -857,7 +865,7 @@ int ompi_mpi_init(int argc, char **argv, int requested, int *provided) goto error; } - if (OMPI_SUCCESS != + if (OMPI_SUCCESS != (ret = mca_coll_base_comm_select(MPI_COMM_SELF))) { error = "mca_coll_base_comm_select(MPI_COMM_SELF) failed"; goto error; @@ -866,31 +874,21 @@ int ompi_mpi_init(int argc, char **argv, int requested, int *provided) /* Check whether we have been spawned or not. We introduce that at the very end, since we need collectives, datatypes, ptls etc. up and running here.... */ - if (OMPI_SUCCESS != (ret = ompi_dpm.dyn_init())) { - error = "ompi_comm_dyn_init() failed"; + if (OMPI_SUCCESS != (ret = ompi_dpm_dyn_init())) { + error = "ompi_dpm_dyn_init() failed"; goto error; } - /* - * Startup the Checkpoint/Restart Mech. - * Note: Always do this so tools don't hang when - * in a non-checkpointable build - */ - if (OMPI_SUCCESS != (ret = ompi_cr_init())) { - error = "ompi_cr_init"; - goto error; - } - - /* Undo OPAL calling opal_progress_event_users_increment() during - opal_init, to get better latency when not using TCP. Do - this *after* dyn_init, as dyn init uses lots of RTE - communication and we don't want to hinder the performance of - that code. */ - opal_progress_event_users_decrement(); + /* Undo OPAL calling opal_progress_event_users_increment() during + opal_init, to get better latency when not using TCP. Do + this *after* dyn_init, as dyn init uses lots of RTE + communication and we don't want to hinder the performance of + that code. */ + opal_progress_event_users_decrement(); /* see if yield_when_idle was specified - if so, use it */ opal_progress_set_yield_when_idle(ompi_mpi_yield_when_idle); - + /* negative value means use default - just don't do anything */ if (ompi_mpi_event_tick_rate >= 0) { opal_progress_set_event_poll_rate(ompi_mpi_event_tick_rate); @@ -916,6 +914,7 @@ int ompi_mpi_init(int argc, char **argv, int requested, int *provided) "mpi_init:startup:internal-failure", true, "MPI_INIT", "MPI_INIT", error, err_msg, ret); } + opal_mutex_unlock(&ompi_mpi_bootstrap_mutex); return ret; } @@ -945,5 +944,6 @@ int ompi_mpi_init(int argc, char **argv, int requested, int *provided) OPAL_TIMING_REPORT(ompi_enable_timing_ext, &tm); OPAL_TIMING_RELEASE(&tm); + opal_mutex_unlock(&ompi_mpi_bootstrap_mutex); return MPI_SUCCESS; } diff --git a/ompi/runtime/ompi_mpi_params.c b/ompi/runtime/ompi_mpi_params.c index b3938be92e7..366e4972527 100644 --- a/ompi/runtime/ompi_mpi_params.c +++ b/ompi/runtime/ompi_mpi_params.c @@ -6,30 +6,28 @@ * Copyright (c) 2004-2006 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. - * Copyright (c) 2006-2009 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2006-2016 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2007-2015 Los Alamos National Security, LLC. All rights - * reserved. + * reserved. * Copyright (c) 2013 NVIDIA Corporation. All rights reserved. * Copyright (c) 2013-2014 Intel, Inc. All rights reserved + * Copyright (c) 2015 Mellanox Technologies, Inc. + * All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ #include "ompi_config.h" -#ifdef HAVE_STRING_H #include -#endif -#ifdef HAVE_TIME_H #include -#endif /* HAVE_TIME_H */ #include "ompi/constants.h" #include "ompi/datatype/ompi_datatype.h" @@ -57,8 +55,6 @@ int ompi_debug_show_mpi_alloc_mem_leaks = 0; bool ompi_debug_no_free_handles = false; bool ompi_mpi_show_mca_params = false; char *ompi_mpi_show_mca_params_file = NULL; -bool ompi_mpi_abort_print_stack = false; -int ompi_mpi_abort_delay = 0; bool ompi_mpi_keep_fqdn_hostnames = false; bool ompi_have_sparse_group_storage = OPAL_INT_TO_BOOL(OMPI_GROUP_SPARSE); bool ompi_use_sparse_group_storage = OPAL_INT_TO_BOOL(OMPI_GROUP_SPARSE); @@ -69,6 +65,10 @@ char *ompi_mpi_show_mca_params_string = NULL; bool ompi_mpi_have_sparse_group_storage = !!(OMPI_GROUP_SPARSE); bool ompi_mpi_preconnect_mpi = false; +#define OMPI_ADD_PROCS_CUTOFF_DEFAULT 0 +uint32_t ompi_add_procs_cutoff = OMPI_ADD_PROCS_CUTOFF_DEFAULT; +bool ompi_mpi_dynamics_enabled = true; + static bool show_default_mca_params = false; static bool show_file_mca_params = false; static bool show_enviro_mca_params = false; @@ -89,12 +89,12 @@ int ompi_mpi_register_params(void) MCA_BASE_VAR_SCOPE_READONLY, &ompi_mpi_param_check); if (ompi_mpi_param_check && !MPI_PARAM_CHECK) { - opal_show_help("help-mpi-runtime.txt", + opal_show_help("help-mpi-runtime.txt", "mpi-param-check-enabled-but-compiled-out", true); ompi_mpi_param_check = false; } - + /* * opal_progress: decide whether to yield and the event library * tick rate @@ -125,7 +125,7 @@ int ompi_mpi_register_params(void) OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_READONLY, &ompi_debug_show_handle_leaks); - + /* Whether or not to free MPI handles. Useless without run-time param checking, so implicitly set that to true if we don't want to free the handles. */ @@ -167,7 +167,7 @@ int ompi_mpi_register_params(void) if (NULL != ompi_mpi_show_mca_params_string) { char **args; int i; - + ompi_mpi_show_mca_params = true; args = opal_argv_split(ompi_mpi_show_mca_params_string, ','); if (NULL == args) { @@ -205,35 +205,8 @@ int ompi_mpi_register_params(void) OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_READONLY, &ompi_mpi_show_mca_params_file); - - /* User-level process pinning controls */ - /* MPI_ABORT controls */ - ompi_mpi_abort_delay = 0; - (void) mca_base_var_register("ompi", "mpi", NULL, "abort_delay", - "If nonzero, print out an identifying message when MPI_ABORT is invoked (hostname, PID of the process that called MPI_ABORT) and delay for that many seconds before exiting (a negative delay value means to never abort). This allows attaching of a debugger before quitting the job.", - MCA_BASE_VAR_TYPE_INT, NULL, 0, 0, - OPAL_INFO_LVL_9, - MCA_BASE_VAR_SCOPE_READONLY, - &ompi_mpi_abort_delay); - - ompi_mpi_abort_print_stack = false; - (void) mca_base_var_register("ompi", "mpi", NULL, "abort_print_stack", - "If nonzero, print out a stack trace when MPI_ABORT is invoked", - MCA_BASE_VAR_TYPE_BOOL, NULL, 0, - /* If we do not have stack trace - capability, make this a constant - MCA variable */ -#if OPAL_WANT_PRETTY_PRINT_STACKTRACE - 0, - OPAL_INFO_LVL_9, - MCA_BASE_VAR_SCOPE_READONLY, -#else - MCA_BASE_VAR_FLAG_DEFAULT_ONLY, - OPAL_INFO_LVL_9, - MCA_BASE_VAR_SCOPE_CONSTANT, -#endif - &ompi_mpi_abort_print_stack); + /* User-level process pinning controls */ ompi_mpi_preconnect_mpi = false; value = mca_base_var_register("ompi", "mpi", NULL, "preconnect_mpi", @@ -269,7 +242,7 @@ int ompi_mpi_register_params(void) ompi_mpi_have_sparse_group_storage ? MCA_BASE_VAR_SCOPE_READONLY : MCA_BASE_VAR_SCOPE_CONSTANT, &ompi_use_sparse_group_storage); if (ompi_use_sparse_group_storage && !ompi_mpi_have_sparse_group_storage) { - opal_show_help("help-mpi-runtime.txt", + opal_show_help("help-mpi-runtime.txt", "sparse groups enabled but compiled out", true); ompi_use_sparse_group_storage = false; @@ -292,6 +265,27 @@ int ompi_mpi_register_params(void) ompi_rte_abort(1, NULL); } + ompi_add_procs_cutoff = OMPI_ADD_PROCS_CUTOFF_DEFAULT; + (void) mca_base_var_register ("ompi", "mpi", NULL, "add_procs_cutoff", + "Maximum world size for pre-allocating resources for all " + "remote processes. Increasing this limit may improve " + "communication performance at the cost of memory usage", + MCA_BASE_VAR_TYPE_UNSIGNED_INT, NULL, + 0, 0, OPAL_INFO_LVL_3, MCA_BASE_VAR_SCOPE_LOCAL, + &ompi_add_procs_cutoff); + + value = mca_base_var_find ("opal", "opal", NULL, "abort_delay"); + if (0 <= value) { + (void) mca_base_var_register_synonym(value, "ompi", "mpi", NULL, "abort_delay", + MCA_BASE_VAR_SYN_FLAG_DEPRECATED); + } + + value = mca_base_var_find ("opal", "opal", NULL, "abort_print_stack"); + if (0 <= value) { + (void) mca_base_var_register_synonym(value, "ompi", "mpi", NULL, "abort_print_stack", + MCA_BASE_VAR_SYN_FLAG_DEPRECATED); + } + return OMPI_SUCCESS; } @@ -305,9 +299,9 @@ int ompi_show_all_mca_params(int32_t rank, int requested, char *nodename) { if (rank != 0) { return OMPI_SUCCESS; } - + timestamp = time(NULL); - + /* Open the file if one is specified */ if (0 != strlen(ompi_mpi_show_mca_params_file)) { if ( NULL == (fp = fopen(ompi_mpi_show_mca_params_file, "w")) ) { @@ -331,37 +325,37 @@ int ompi_show_all_mca_params(int32_t rank, int requested, char *nodename) { if (MCA_BASE_VAR_FLAG_INTERNAL & var->mbv_flags) { continue; } - + /* is this a default value and we are not displaying * defaults, ignore this one */ if (MCA_BASE_VAR_SOURCE_DEFAULT == var->mbv_source && !show_default_mca_params) { continue; } - + /* is this a file value and we are not displaying files, * ignore it */ if ((MCA_BASE_VAR_SOURCE_FILE == var->mbv_source || - MCA_BASE_VAR_SOURCE_OVERRIDE == var->mbv_source) && + MCA_BASE_VAR_SOURCE_OVERRIDE == var->mbv_source) && !show_file_mca_params) { continue; } - + /* is this an enviro value and we are not displaying enviros, * ignore it */ if (MCA_BASE_VAR_SOURCE_ENV == var->mbv_source && !show_enviro_mca_params) { continue; } - + /* is this an API value and we are not displaying APIs, * ignore it */ if (MCA_BASE_VAR_SOURCE_OVERRIDE == var->mbv_source && !show_override_mca_params) { continue; } - + ret = mca_base_var_dump (i, &var_dump, MCA_BASE_VAR_DUMP_SIMPLE); if (OPAL_SUCCESS != ret) { continue; @@ -376,11 +370,11 @@ int ompi_show_all_mca_params(int32_t rank, int requested, char *nodename) { free (var_dump[0]); free (var_dump); } - + /* Close file, cleanup allocated memory*/ if (0 != strlen(ompi_mpi_show_mca_params_file)) { fclose(fp); } - + return OMPI_SUCCESS; } diff --git a/ompi/runtime/ompi_mpi_preconnect.c b/ompi/runtime/ompi_mpi_preconnect.c index 12960b1b52e..79008c26439 100644 --- a/ompi/runtime/ompi_mpi_preconnect.c +++ b/ompi/runtime/ompi_mpi_preconnect.c @@ -6,11 +6,11 @@ * reserved. * Copyright (c) 2006 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2007 Los Alamos National Security, LLC. All rights - * reserved. + * reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -60,13 +60,13 @@ ompi_init_preconnect_mpi(void) ret = MCA_PML_CALL(isend(outbuf, 1, MPI_CHAR, next, 1, MCA_PML_BASE_SEND_COMPLETE, - MPI_COMM_WORLD, + MPI_COMM_WORLD, &requests[1])); if (OMPI_SUCCESS != ret) return ret; ret = MCA_PML_CALL(irecv(inbuf, 1, MPI_CHAR, prev, 1, - MPI_COMM_WORLD, + MPI_COMM_WORLD, &requests[0])); if(OMPI_SUCCESS != ret) return ret; diff --git a/ompi/runtime/params.h b/ompi/runtime/params.h index ff46f2a4a70..495f0f36fa7 100644 --- a/ompi/runtime/params.h +++ b/ompi/runtime/params.h @@ -1,3 +1,4 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ /* * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana * University Research and Technology @@ -5,19 +6,19 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. - * Copyright (c) 2007 Los Alamos National Security, LLC. All rights - * reserved. + * Copyright (c) 2007-2015 Los Alamos National Security, LLC. All rights + * reserved. * Copyright (c) 2006-2009 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2013 NVIDIA Corporation. All rights reserved. * Copyright (c) 2013 Intel, Inc. All rights reserved * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -84,7 +85,7 @@ OMPI_DECLSPEC extern bool ompi_debug_no_free_handles; /** * Whether or not to print MCA parameters on MPI_INIT * - * This is good debugging for user applications to see exactly which + * This is good debugging for user applications to see exactly which * MCA parameters are being used in the current program execution. */ OMPI_DECLSPEC extern bool ompi_mpi_show_mca_params; @@ -123,11 +124,16 @@ OMPI_DECLSPEC extern bool ompi_have_sparse_group_storage; */ OMPI_DECLSPEC extern bool ompi_use_sparse_group_storage; -/* +/** * Cutoff point for retrieving hostnames */ OMPI_DECLSPEC extern uint32_t ompi_direct_modex_cutoff; +/** + * Cutoff point for calling add_procs for all processes + */ +OMPI_DECLSPEC extern uint32_t ompi_add_procs_cutoff; + /** * Register MCA parameters used by the MPI layer. * @@ -139,8 +145,8 @@ OMPI_DECLSPEC extern uint32_t ompi_direct_modex_cutoff; OMPI_DECLSPEC int ompi_mpi_register_params(void); /** - * Display all MCA parameters used - * + * Display all MCA parameters used + * * @returns OMPI_SUCCESS * * Displays in key = value format diff --git a/ompi/tools/Makefile.am b/ompi/tools/Makefile.am index 9728a09e8b6..634a5f4fa94 100644 --- a/ompi/tools/Makefile.am +++ b/ompi/tools/Makefile.am @@ -6,15 +6,15 @@ # Copyright (c) 2004-2005 The University of Tennessee and The University # of Tennessee Research Foundation. All rights # reserved. -# Copyright (c) 2004-2009 High Performance Computing Center Stuttgart, +# Copyright (c) 2004-2009 High Performance Computing Center Stuttgart, # University of Stuttgart. All rights reserved. # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. # Copyright (c) 2014 Intel, Inc. All rights reserved. # $COPYRIGHT$ -# +# # Additional copyrights may follow -# +# # $HEADER$ # diff --git a/ompi/tools/mpisync/Makefile.am b/ompi/tools/mpisync/Makefile.am index e7ced3c7b21..50619e0aad8 100644 --- a/ompi/tools/mpisync/Makefile.am +++ b/ompi/tools/mpisync/Makefile.am @@ -5,7 +5,7 @@ # Copyright (c) 2004-2005 The University of Tennessee and The University # of Tennessee Research Foundation. All rights # reserved. -# Copyright (c) 2004-2009 High Performance Computing Center Stuttgart, +# Copyright (c) 2004-2009 High Performance Computing Center Stuttgart, # University of Stuttgart. All rights reserved. # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. @@ -14,11 +14,12 @@ # Copyright (c) 2012 Los Alamos National Security, LLC. # All rights reserved. # Copyright (c) 2014 Artem Polyakov +# Copyright (c) 2016 IBM Corporation. All rights reserved. # # $COPYRIGHT$ -# +# # Additional copyrights may follow -# +# # $HEADER$ # @@ -51,7 +52,7 @@ EXTRA_DIST = $(man_pages:.1=.1in)\ if OPAL_INSTALL_TIMING_BINARIES -bin_PROGRAMS = mpisync +bin_PROGRAMS = mpisync bin_SCRIPTS = mpirun_prof ompi_timing_post nodist_man_MANS = $(man_pages) @@ -71,7 +72,7 @@ mpisync_SOURCES = \ mpigclock.c \ sync.c -mpisync_LDADD = $(top_builddir)/ompi/libmpi.la +mpisync_LDADD = $(top_builddir)/ompi/lib@OMPI_LIBMPI_NAME@.la if OMPI_RTE_ORTE mpisync_LDADD += $(top_builddir)/orte/lib@ORTE_LIB_PREFIX@open-rte.la endif diff --git a/ompi/tools/mpisync/hpctimer.c b/ompi/tools/mpisync/hpctimer.c index 97a3ff3f430..7d6a85b60a6 100644 --- a/ompi/tools/mpisync/hpctimer.c +++ b/ompi/tools/mpisync/hpctimer.c @@ -34,9 +34,9 @@ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -209,8 +209,8 @@ static double hpctimer_wtime_gettimeofday(void) return (double)tv.tv_sec + 1E-6 * tv.tv_usec; } -/* - * hpctimer_wtime_tsc: Returns TSC-based walltime in seconds. +/* + * hpctimer_wtime_tsc: Returns TSC-based walltime in seconds. */ static double hpctimer_wtime_tsc(void) { @@ -227,38 +227,38 @@ static int hpctimer_tsc_initialize(void) { hpctimer_overhead = hpctimer_measure_overhead(); hpctimer_freq = hpctimer_calibrate_sleep(hpctimer_overhead); - return HPCTIMER_SUCCESS; + return HPCTIMER_SUCCESS; } -/* - * hpctimer_gettsc: Returns TSC value. +/* + * hpctimer_gettsc: Returns TSC value. */ static __inline__ uint64_t hpctimer_gettsc(void) { #if defined(__x86_64__) - uint32_t low, high; - __asm__ __volatile__( - "xorl %%eax, %%eax\n" - "cpuid\n" - ::: "%rax", "%rbx", "%rcx", "%rdx" - ); - __asm__ __volatile__( - "rdtsc\n" - : "=a" (low), "=d" (high) - ); - return ((uint64_t)high << 32) | low; + uint32_t low, high; + __asm__ __volatile__( + "xorl %%eax, %%eax\n" + "cpuid\n" + ::: "%rax", "%rbx", "%rcx", "%rdx" + ); + __asm__ __volatile__( + "rdtsc\n" + : "=a" (low), "=d" (high) + ); + return ((uint64_t)high << 32) | low; #elif defined(__i386__) uint64_t tsc; - __asm__ __volatile__( - "xorl %%eax, %%eax\n" - "cpuid\n" - ::: "%eax", "%ebx", "%ecx", "%edx" - ); - __asm__ __volatile__( - "rdtsc\n" - : "=A" (tsc) - ); + __asm__ __volatile__( + "xorl %%eax, %%eax\n" + "cpuid\n" + ::: "%eax", "%ebx", "%ecx", "%edx" + ); + __asm__ __volatile__( + "rdtsc\n" + : "=A" (tsc) + ); return tsc; #else # error "Unsupported platform" @@ -285,7 +285,7 @@ static uint64_t hpctimer_measure_overhead(void) return overhead; } -/* +/* * hpctimer_calibrate_adaptive: Returns number of TSC tics per second. * Adaptive algorithm based on sleep. */ @@ -297,7 +297,7 @@ static uint64_t hpctimer_calibrate_adaptive(uint64_t overhead) }; int i; uint64_t count, freq; - + freq = (uint64_t)(~0x01); for (i = 0; i < TSC_CALIBRATE_NTESTS; i++) { count = hpctimer_gettsc(); @@ -314,7 +314,7 @@ static uint64_t hpctimer_calibrate_adaptive(uint64_t overhead) } */ -/* +/* * hpctimer_calibrate_sleep: Returns number of TSC tics per second. */ static uint64_t hpctimer_calibrate_sleep(uint64_t overhead) @@ -328,7 +328,7 @@ static uint64_t hpctimer_calibrate_sleep(uint64_t overhead) return count / delay; } -/* +/* * hpctimer_calibrate_loop: Returns number of TSC tics per second. */ /* @@ -341,9 +341,9 @@ static uint64_t hpctimer_calibrate_loop(uint64_t overhead) struct timeval tv1, tv2; int i, j; __volatile__ int dummy = 0; - + for (i = 0; i < TSC_CALIBRATE_NTESTS; i++) { - gettimeofday(&tv1, NULL); + gettimeofday(&tv1, NULL); count = hpctimer_gettsc(); for (j = 0; j < 10000000; j++) { dummy++; @@ -355,7 +355,7 @@ static uint64_t hpctimer_calibrate_loop(uint64_t overhead) if (count < countmin) countmin = count; } - return countmin * 1000000 / (tv2.tv_sec * 1000000 + tv2.tv_usec - + return countmin * 1000000 / (tv2.tv_sec * 1000000 + tv2.tv_usec - tv1.tv_sec * 1000000 - tv1.tv_usec); } */ diff --git a/ompi/tools/mpisync/hpctimer.h b/ompi/tools/mpisync/hpctimer.h index 2cc2d56b49a..4763424c07b 100644 --- a/ompi/tools/mpisync/hpctimer.h +++ b/ompi/tools/mpisync/hpctimer.h @@ -34,9 +34,9 @@ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ diff --git a/ompi/tools/mpisync/mpigclock.c b/ompi/tools/mpisync/mpigclock.c index 5af0861347b..4d78bbfc77f 100644 --- a/ompi/tools/mpisync/mpigclock.c +++ b/ompi/tools/mpisync/mpigclock.c @@ -34,9 +34,9 @@ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ diff --git a/ompi/tools/mpisync/mpigclock.h b/ompi/tools/mpisync/mpigclock.h index 51d106507b4..41b90112bc6 100644 --- a/ompi/tools/mpisync/mpigclock.h +++ b/ompi/tools/mpisync/mpigclock.h @@ -34,9 +34,9 @@ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ diff --git a/ompi/tools/mpisync/mpisync.1in b/ompi/tools/mpisync/mpisync.1in index 3585bb5701f..219d89f034f 100644 --- a/ompi/tools/mpisync/mpisync.1in +++ b/ompi/tools/mpisync/mpisync.1in @@ -25,14 +25,14 @@ The name of output file where offsets related to HNP will be written Print help information .PP .BR ompi_timing_post -takes the timing output file as input parameter. The events are sorted by the timestamps. Next, the timestamps are replaced with time offsets relative to the +takes the timing output file as input parameter. The events are sorted by the timestamps. Next, the timestamps are replaced with time offsets relative to the .BR first : .BR previous event. .PP -.BR mpirun_prof -is a wrapper around +.BR mpirun_prof +is a wrapper around .BR mpirun that performs clock synchronisation and post-processing of the timing output file. diff --git a/ompi/tools/mpisync/ompi_timing_post b/ompi/tools/mpisync/ompi_timing_post index 0df0b9109c2..e5b9f56957a 100755 --- a/ompi/tools/mpisync/ompi_timing_post +++ b/ompi/tools/mpisync/ompi_timing_post @@ -18,9 +18,9 @@ fi initfile=$1 postfile=$2 -# 1. Filter OPAL_TRACE entrieas only +# 1. Filter OPAL_TRACE entrieas only # and put the timestamp to the first place -#.2. Sort considering that we dealing with +#.2. Sort considering that we dealing with # floating point numbers # 3. Return to initial field order and count relative fields cat $initfile | \ @@ -33,8 +33,8 @@ awk 'BEGIN { FPAT = "([^ ]+)|(\"[^\"]+\")" } print $0 } }' | sort --general-numeric-sort | \ -awk 'BEGIN { - FPAT = "([^ ]+)|(\"[^\"]+\")" +awk 'BEGIN { + FPAT = "([^ ]+)|(\"[^\"]+\")" first = 0 prev = 0 } diff --git a/ompi/tools/mpisync/sync.c b/ompi/tools/mpisync/sync.c index 8896973a916..a1331f900b4 100644 --- a/ompi/tools/mpisync/sync.c +++ b/ompi/tools/mpisync/sync.c @@ -2,12 +2,14 @@ * Copyright (C) 2014 Artem Polyakov * Copyright (c) 2014 Intel, Inc. All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ +#include "opal_config.h" + #include #include #include @@ -35,7 +37,7 @@ int parse_opts(int rank, int argc, char **argv) { while (1) { int option_index = 0; - static struct option long_options[] = { + static struct option long_options[] = { {"output", required_argument, 0, 'o' }, {"help", required_argument, 0, 'h' }, { 0, 0, 0, 0 } }; @@ -69,7 +71,7 @@ int main(int argc, char **argv) MPI_Comm comm = MPI_COMM_WORLD; int rank, commsize; double offs = 0, rtt = 0; - char hname[1024]; + char hname[OPAL_MAXHOSTNAMELEN]; MPI_Comm_rank(comm, &rank); MPI_Comm_size(comm, &commsize); @@ -93,7 +95,7 @@ int main(int argc, char **argv) exit(1); } - if( gethostname(hname, 1024) ){ + if( gethostname(hname, sizeof(hname)) ){ perror("Cannot get hostname. Abort"); MPI_Abort(MPI_COMM_WORLD, 1); } @@ -114,13 +116,13 @@ int main(int argc, char **argv) fprintf(stderr, "Fail to allocate memory. Abort\n"); MPI_Abort(MPI_COMM_WORLD, 1); } - char *hnames = malloc(1024*commsize); + char *hnames = malloc(OPAL_MAXHOSTNAMELEN * commsize); if( hnames == NULL ){ fprintf(stderr, "Fail to allocate memory. Abort\n"); MPI_Abort(MPI_COMM_WORLD, 1); } - MPI_Gather(hname,1024,MPI_CHAR,hnames,1024,MPI_CHAR, 0, MPI_COMM_WORLD); + MPI_Gather(hname,sizeof(hname),MPI_CHAR,hnames,sizeof(hname),MPI_CHAR, 0, MPI_COMM_WORLD); MPI_Gather(send,2,MPI_DOUBLE,measure,2, MPI_DOUBLE, 0, MPI_COMM_WORLD); char tmpname[128]; FILE *fp = fopen(filename,"w"); @@ -129,14 +131,14 @@ int main(int argc, char **argv) MPI_Abort(MPI_COMM_WORLD, 1); } double (*m)[2] = (void*)measure; - char (*h)[1024] = (void*)hnames; + char (*h)[OPAL_MAXHOSTNAMELEN] = (void*)hnames; int i; for(i=0; i #endif #include -#ifdef HAVE_SIGNAL_H #include -#endif #include "opal/version.h" #include "opal/mca/installdirs/installdirs.h" #include "opal/class/opal_object.h" #include "opal/class/opal_pointer_array.h" #include "opal/runtime/opal.h" -#if OPAL_ENABLE_FT_CR == 1 -#include "opal/runtime/opal_cr.h" -#endif #include "opal/mca/base/base.h" #include "opal/runtime/opal_info_support.h" #include "opal/util/argv.h" @@ -84,7 +79,7 @@ int main(int argc, char *argv[]) /* Initialize the argv parsing handle */ if (OPAL_SUCCESS != opal_init_util(&argc, &argv)) { - opal_show_help("help-opal_info.txt", "lib-call-fail", true, + opal_show_help("help-opal_info.txt", "lib-call-fail", true, "opal_init_util", __FILE__, __LINE__, NULL); exit(ret); } @@ -92,7 +87,7 @@ int main(int argc, char *argv[]) ompi_info_cmd_line = OBJ_NEW(opal_cmd_line_t); if (NULL == ompi_info_cmd_line) { ret = errno; - opal_show_help("help-opal_info.txt", "lib-call-fail", true, + opal_show_help("help-opal_info.txt", "lib-call-fail", true, "opal_cmd_line_create", __FILE__, __LINE__, NULL); exit(ret); } @@ -113,7 +108,7 @@ int main(int argc, char *argv[]) /* setup the mca_types array */ OBJ_CONSTRUCT(&mca_types, opal_pointer_array_t); opal_pointer_array_init(&mca_types, 256, INT_MAX, 128); - + /* add in the opal frameworks */ opal_info_register_types(&mca_types); @@ -121,7 +116,7 @@ int main(int argc, char *argv[]) /* add in the orte frameworks */ orte_info_register_types(&mca_types); #endif - + ompi_info_register_types(&mca_types); /* init the component map */ @@ -133,12 +128,12 @@ int main(int argc, char *argv[]) if (OMPI_ERR_BAD_PARAM == ret) { /* output what we got */ opal_info_do_params(true, opal_cmd_line_is_taken(ompi_info_cmd_line, "internal"), - &mca_types, NULL); + &mca_types, &component_map, NULL); } exit(1); } - /* Execute the desired action(s) */ + /* Execute the desired action(s) */ want_all = opal_cmd_line_is_taken(ompi_info_cmd_line, "all"); if (want_all) { opal_info_out("Package", "package", OPAL_PACKAGE_STRING); @@ -163,16 +158,16 @@ int main(int argc, char *argv[]) if (want_all || opal_cmd_line_is_taken(ompi_info_cmd_line, "param") || opal_cmd_line_is_taken(ompi_info_cmd_line, "params")) { opal_info_do_params(want_all, opal_cmd_line_is_taken(ompi_info_cmd_line, "internal"), - &mca_types, ompi_info_cmd_line); + &mca_types, &component_map, ompi_info_cmd_line); acted = true; } if (opal_cmd_line_is_taken(ompi_info_cmd_line, "type")) { opal_info_do_type(ompi_info_cmd_line); acted = true; } - + /* If no command line args are specified, show default set */ - + if (!acted) { opal_info_out("Package", "package", OPAL_PACKAGE_STRING); ompi_info_show_ompi_version(opal_info_ver_full); @@ -184,9 +179,9 @@ int main(int argc, char *argv[]) opal_info_component_all, opal_info_ver_full, opal_info_ver_all); } - + /* All done */ - + if (NULL != app_env) { opal_argv_free(app_env); } @@ -208,6 +203,6 @@ int main(int argc, char *argv[]) /* Put our own call to opal_finalize_util() here because we called it up above (and it refcounts) */ opal_finalize_util(); - + return 0; } diff --git a/ompi/tools/ompi_info/ompi_info.h b/ompi/tools/ompi_info/ompi_info.h index c420bb844c9..5da105c76b6 100644 --- a/ompi/tools/ompi_info/ompi_info.h +++ b/ompi/tools/ompi_info/ompi_info.h @@ -5,7 +5,7 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. @@ -13,9 +13,9 @@ * Copyright (c) 2013 Los Alamos National Security, LLC. * All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ diff --git a/ompi/tools/ompi_info/param.c b/ompi/tools/ompi_info/param.c index 12f40c2027d..3d5e811b846 100644 --- a/ompi/tools/ompi_info/param.c +++ b/ompi/tools/ompi_info/param.c @@ -5,7 +5,7 @@ * Copyright (c) 2004-2006 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. @@ -13,10 +13,13 @@ * Copyright (c) 2009 Oak Ridge National Labs. All rights reserved. * Copyright (c) 2014-2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. + * Copyright (c) 2015 Intel, Inc. All rights reserved + * Copyright (c) 2015 Los Alamos National Security, LLC. + * All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -73,8 +76,8 @@ static void append(char *dest, size_t max, int *first, char *src) * FALSE -> display selected options * * This function displays all the options with which the current - * installation of ompi was configured. There are many options here - * that are carried forward from OMPI-7 and are not mca parameters + * installation of ompi was configured. There are many options here + * that are carried forward from OMPI-7 and are not mca parameters * in OMPI-10. I have to dig through the invalid options and replace * them with OMPI-10 options. */ @@ -102,6 +105,7 @@ void ompi_info_do_config(bool want_all) char *fortran_have_abstract; char *fortran_have_asynchronous; char *fortran_have_procedure; + char *fortran_have_use_only; char *fortran_have_c_funloc; char *fortran_08_using_wrappers_for_choice_buffer_functions; char *fortran_build_sizeof; @@ -126,8 +130,6 @@ void ompi_info_do_config(bool want_all) char *have_mpi_io; char *wtime_support; char *symbol_visibility; - char *ft_support; - char *crdebug_support; char *topology_support; /* Do a little preprocessor trickery here to figure opal_info_out the @@ -153,14 +155,14 @@ void ompi_info_do_config(bool want_all) #else paramcheck = "runtime"; #endif - + /* setup the strings that don't require allocations*/ cxx = OMPI_BUILD_CXX_BINDINGS ? "yes" : "no"; if (OMPI_BUILD_FORTRAN_BINDINGS >= OMPI_FORTRAN_USEMPI_BINDINGS) { if (OMPI_FORTRAN_HAVE_IGNORE_TKR) { fortran_usempi = "yes (full: ignore TKR)"; } else { - fortran_usempi = "yes (limited: overloading)"; + fortran_usempi = "yes (limited: overloading)"; } } else { fortran_usempi = "no"; @@ -168,7 +170,7 @@ void ompi_info_do_config(bool want_all) fortran_usempif08 = OMPI_BUILD_FORTRAN_BINDINGS >= OMPI_FORTRAN_USEMPIF08_BINDINGS ? "yes" : "no"; fortran_have_f08_assumed_rank = OMPI_FORTRAN_HAVE_F08_ASSUMED_RANK ? "yes" : "no"; - fortran_build_f08_subarrays = OMPI_BUILD_FORTRAN_F08_SUBARRAYS ? + fortran_build_f08_subarrays = OMPI_BUILD_FORTRAN_F08_SUBARRAYS ? "yes" : "no"; fortran_have_optional_args = OMPI_FORTRAN_HAVE_OPTIONAL_ARGS ? "yes" : "no"; @@ -181,15 +183,16 @@ void ompi_info_do_config(bool want_all) "yes" : "no"; fortran_have_bind_c_sub = OMPI_FORTRAN_HAVE_BIND_C_SUB ? "yes" : "no"; fortran_have_bind_c_type = OMPI_FORTRAN_HAVE_BIND_C_TYPE ? "yes" : "no"; - fortran_have_bind_c_type_name = OMPI_FORTRAN_HAVE_BIND_C_TYPE_NAME ? + fortran_have_bind_c_type_name = OMPI_FORTRAN_HAVE_BIND_C_TYPE_NAME ? "yes" : "no"; fortran_have_private = OMPI_FORTRAN_HAVE_PRIVATE ? "yes" : "no"; fortran_have_protected = OMPI_FORTRAN_HAVE_PROTECTED ? "yes" : "no"; fortran_have_abstract = OMPI_FORTRAN_HAVE_ABSTRACT ? "yes" : "no"; fortran_have_asynchronous = OMPI_FORTRAN_HAVE_ASYNCHRONOUS ? "yes" : "no"; fortran_have_procedure = OMPI_FORTRAN_HAVE_PROCEDURE ? "yes" : "no"; + fortran_have_use_only = OMPI_FORTRAN_HAVE_USE_ONLY ? "yes" : "no"; fortran_have_c_funloc = OMPI_FORTRAN_HAVE_C_FUNLOC ? "yes" : "no"; - fortran_08_using_wrappers_for_choice_buffer_functions = + fortran_08_using_wrappers_for_choice_buffer_functions = OMPI_FORTRAN_NEED_WRAPPER_ROUTINES ? "yes" : "no"; fortran_build_sizeof = OMPI_FORTRAN_BUILD_SIZEOF ? "yes" : "no"; @@ -207,13 +210,14 @@ void ompi_info_do_config(bool want_all) OMPI_FORTRAN_HAVE_ABSTRACT && OMPI_FORTRAN_HAVE_ASYNCHRONOUS && OMPI_FORTRAN_HAVE_PROCEDURE && + OMPI_FORTRAN_HAVE_USE_ONLY && OMPI_FORTRAN_HAVE_C_FUNLOC && OMPI_FORTRAN_NEED_WRAPPER_ROUTINES) { fortran_usempif08_compliance = "The mpi_f08 module is available, and is fully compliant. w00t!"; } else { int first = 1; snprintf(f08_msg, sizeof(f08_msg), - "The mpi_f08 module is available, but due to limitations in the %s compiler, does not support the following: ", + "The mpi_f08 module is available, but due to limitations in the %s compiler, does not support the following: ", OMPI_FC); if (!OMPI_BUILD_FORTRAN_F08_SUBARRAYS) { append(f08_msg, sizeof(f08_msg), &first, "array subsections"); @@ -233,6 +237,9 @@ void ompi_info_do_config(bool want_all) if (!OMPI_FORTRAN_HAVE_PROCEDURE) { append(f08_msg, sizeof(f08_msg), &first, "PROCEDUREs"); } + if (!OMPI_FORTRAN_HAVE_USE_ONLY) { + append(f08_msg, sizeof(f08_msg), &first, "USE_ONLY"); + } if (!OMPI_FORTRAN_HAVE_C_FUNLOC) { append(f08_msg, sizeof(f08_msg), &first, "C_FUNLOCs"); } @@ -252,12 +259,12 @@ void ompi_info_do_config(bool want_all) memdebug = OPAL_ENABLE_MEM_DEBUG ? "yes" : "no"; debug = OPAL_ENABLE_DEBUG ? "yes" : "no"; mpi_interface_warning = OMPI_WANT_MPI_INTERFACE_WARNING ? "yes" : "no"; - cprofiling = OMPI_ENABLE_MPI_PROFILING ? "yes" : "no"; - cxxprofiling = (OMPI_BUILD_CXX_BINDINGS && OMPI_ENABLE_MPI_PROFILING) ? "yes" : "no"; + cprofiling = "yes"; + cxxprofiling = OMPI_BUILD_CXX_BINDINGS ? "yes" : "no"; cxxexceptions = (OMPI_BUILD_CXX_BINDINGS && OMPI_HAVE_CXX_EXCEPTION_SUPPORT) ? "yes" : "no"; - fortran_mpifh_profiling = (OMPI_ENABLE_MPI_PROFILING && OMPI_BUILD_FORTRAN_BINDINGS >= OMPI_FORTRAN_MPIFH_BINDINGS) ? "yes" : "no"; - fortran_usempi_profiling = (OMPI_ENABLE_MPI_PROFILING && OMPI_BUILD_FORTRAN_BINDINGS >= OMPI_FORTRAN_USEMPI_BINDINGS) ? "yes" : "no"; - fortran_usempif08_profiling = (OMPI_ENABLE_MPI_PROFILING && OMPI_BUILD_FORTRAN_BINDINGS >= OMPI_FORTRAN_USEMPIF08_BINDINGS) ? "yes" : "no"; + fortran_mpifh_profiling = (OMPI_BUILD_FORTRAN_BINDINGS >= OMPI_FORTRAN_MPIFH_BINDINGS) ? "yes" : "no"; + fortran_usempi_profiling = (OMPI_BUILD_FORTRAN_BINDINGS >= OMPI_FORTRAN_USEMPI_BINDINGS) ? "yes" : "no"; + fortran_usempif08_profiling = (OMPI_BUILD_FORTRAN_BINDINGS >= OMPI_FORTRAN_USEMPIF08_BINDINGS) ? "yes" : "no"; have_dl = OPAL_HAVE_DL_SUPPORT ? "yes" : "no"; #if OMPI_RTE_ORTE mpirun_prefix_by_default = ORTE_WANT_ORTERUN_PREFIX_BY_DEFAULT ? "yes" : "no"; @@ -266,8 +273,8 @@ void ompi_info_do_config(bool want_all) have_mpi_io = OMPI_PROVIDE_MPI_FILE_INTERFACE ? "yes" : "no"; wtime_support = OPAL_TIMER_USEC_NATIVE ? "native" : "gettimeofday"; symbol_visibility = OPAL_C_HAVE_VISIBILITY ? "yes" : "no"; - topology_support = OPAL_HAVE_HWLOC ? "yes" : "no"; - + topology_support = "yes"; + /* setup strings that require allocation */ if (OMPI_BUILD_FORTRAN_BINDINGS >= OMPI_FORTRAN_MPIFH_BINDINGS) { (void)asprintf(&fortran_mpifh, "yes (%s)", @@ -303,48 +310,42 @@ void ompi_info_do_config(bool want_all) OPAL_ENABLE_MULTI_THREADS ? "yes" : "no", OPAL_ENABLE_PROGRESS_THREADS ? "yes" : "no"); #endif - - (void)asprintf(&ft_support, "%s (checkpoint thread: %s)", - OPAL_ENABLE_FT ? "yes" : "no", OPAL_ENABLE_FT_THREAD ? "yes" : "no"); - - (void)asprintf(&crdebug_support, "%s", - OPAL_ENABLE_CRDEBUG ? "yes" : "no"); /* output values */ opal_info_out("Configured by", "config:user", OPAL_CONFIGURE_USER); opal_info_out("Configured on", "config:timestamp", OPAL_CONFIGURE_DATE); opal_info_out("Configure host", "config:host", OPAL_CONFIGURE_HOST); - + opal_info_out("Built by", "build:user", OMPI_BUILD_USER); opal_info_out("Built on", "build:timestamp", OMPI_BUILD_DATE); opal_info_out("Built host", "build:host", OMPI_BUILD_HOST); - + opal_info_out("C bindings", "bindings:c", "yes"); opal_info_out("C++ bindings", "bindings:cxx", cxx); opal_info_out("Fort mpif.h", "bindings:mpif.h", fortran_mpifh); free(fortran_mpifh); - opal_info_out("Fort use mpi", "bindings:use_mpi", + opal_info_out("Fort use mpi", "bindings:use_mpi", fortran_usempi); - opal_info_out("Fort use mpi size", "bindings:use_mpi:size", + opal_info_out("Fort use mpi size", "bindings:use_mpi:size", ompi_info_deprecated_value); - opal_info_out("Fort use mpi_f08", "bindings:use_mpi_f08", + opal_info_out("Fort use mpi_f08", "bindings:use_mpi_f08", fortran_usempif08); - opal_info_out("Fort mpi_f08 compliance", "bindings:use_mpi_f08:compliance", + opal_info_out("Fort mpi_f08 compliance", "bindings:use_mpi_f08:compliance", fortran_usempif08_compliance); - opal_info_out("Fort mpi_f08 subarrays", "bindings:use_mpi_f08:subarrays-supported", + opal_info_out("Fort mpi_f08 subarrays", "bindings:use_mpi_f08:subarrays-supported", fortran_build_f08_subarrays); opal_info_out("Java bindings", "bindings:java", java); - opal_info_out("Wrapper compiler rpath", "compiler:all:rpath", + opal_info_out("Wrapper compiler rpath", "compiler:all:rpath", WRAPPER_RPATH_SUPPORT); opal_info_out("C compiler", "compiler:c:command", OPAL_CC); - opal_info_out("C compiler absolute", "compiler:c:absolute", + opal_info_out("C compiler absolute", "compiler:c:absolute", OPAL_CC_ABSOLUTE); - opal_info_out("C compiler family name", "compiler:c:familyname", + opal_info_out("C compiler family name", "compiler:c:familyname", _STRINGIFY(OPAL_BUILD_PLATFORM_COMPILER_FAMILYNAME)); - opal_info_out("C compiler version", "compiler:c:version", + opal_info_out("C compiler version", "compiler:c:version", _STRINGIFY(OPAL_BUILD_PLATFORM_COMPILER_VERSION_STR)); - + if (want_all) { opal_info_out_int("C char size", "compiler:c:sizeof:char", sizeof(char)); /* JMS: should be fixed in MPI-2.2 to differentiate between C @@ -378,15 +379,15 @@ void ompi_info_do_config(bool want_all) opal_info_out("C++ compiler", "compiler:cxx:command", OMPI_CXX); opal_info_out("C++ compiler absolute", "compiler:cxx:absolute", OMPI_CXX_ABSOLUTE); opal_info_out("Fort compiler", "compiler:fortran:command", OMPI_FC); - opal_info_out("Fort compiler abs", "compiler:fortran:absolute", + opal_info_out("Fort compiler abs", "compiler:fortran:absolute", OMPI_FC_ABSOLUTE); opal_info_out("Fort ignore TKR", "compiler:fortran:ignore_tkr", fortran_have_ignore_tkr); free(fortran_have_ignore_tkr); - opal_info_out("Fort 08 assumed shape", + opal_info_out("Fort 08 assumed shape", "compiler:fortran:f08_assumed_rank", fortran_have_f08_assumed_rank); - opal_info_out("Fort optional args", + opal_info_out("Fort optional args", "compiler:fortran:optional_arguments", fortran_have_optional_args); opal_info_out("Fort INTERFACE", @@ -398,161 +399,164 @@ void ompi_info_do_config(bool want_all) opal_info_out("Fort STORAGE_SIZE", "compiler:fortran:storage_size", fortran_have_storage_size); - opal_info_out("Fort BIND(C) (all)", + opal_info_out("Fort BIND(C) (all)", "compiler:fortran:bind_c", fortran_have_bind_c); opal_info_out("Fort ISO_C_BINDING", "compiler:fortran:iso_c_binding", fortran_have_iso_c_binding); - opal_info_out("Fort SUBROUTINE BIND(C)", + opal_info_out("Fort SUBROUTINE BIND(C)", "compiler:fortran:subroutine_bind_c", fortran_have_bind_c_sub); - opal_info_out("Fort TYPE,BIND(C)", + opal_info_out("Fort TYPE,BIND(C)", "compiler:fortran:type_bind_c", fortran_have_bind_c_type); - opal_info_out("Fort T,BIND(C,name=\"a\")", + opal_info_out("Fort T,BIND(C,name=\"a\")", "compiler:fortran:type_name_bind_c", fortran_have_bind_c_type_name); - opal_info_out("Fort PRIVATE", + opal_info_out("Fort PRIVATE", "compiler:fortran:private", fortran_have_private); - opal_info_out("Fort PROTECTED", + opal_info_out("Fort PROTECTED", "compiler:fortran:protected", fortran_have_protected); - opal_info_out("Fort ABSTRACT", + opal_info_out("Fort ABSTRACT", "compiler:fortran:abstract", fortran_have_abstract); - opal_info_out("Fort ASYNCHRONOUS", + opal_info_out("Fort ASYNCHRONOUS", "compiler:fortran:asynchronous", fortran_have_asynchronous); - opal_info_out("Fort PROCEDURE", + opal_info_out("Fort PROCEDURE", "compiler:fortran:procedure", fortran_have_procedure); + opal_info_out("Fort USE...ONLY", + "compiler:fortran:use_only", + fortran_have_use_only); opal_info_out("Fort C_FUNLOC", "compiler:fortran:c_funloc", fortran_have_c_funloc); - opal_info_out("Fort f08 using wrappers", + opal_info_out("Fort f08 using wrappers", "compiler:fortran:08_wrappers", fortran_08_using_wrappers_for_choice_buffer_functions); opal_info_out("Fort MPI_SIZEOF", "compiler:fortran:mpi_sizeof", fortran_build_sizeof); - + if (want_all) { - + /* Will always have the size of Fortran integer */ - - opal_info_out_int("Fort integer size", "compiler:fortran:sizeof:integer", + + opal_info_out_int("Fort integer size", "compiler:fortran:sizeof:integer", OMPI_SIZEOF_FORTRAN_INTEGER); - - opal_info_out_int("Fort logical size", "compiler:fortran:sizeof:logical", + + opal_info_out_int("Fort logical size", "compiler:fortran:sizeof:logical", OMPI_SIZEOF_FORTRAN_LOGICAL); opal_info_out_int("Fort logical value true", "compiler:fortran:value:true", OMPI_FORTRAN_VALUE_TRUE); - - + + /* May or may not have the other Fortran sizes */ - + if (OMPI_BUILD_FORTRAN_BINDINGS >= OMPI_FORTRAN_MPIFH_BINDINGS) { - opal_info_out("Fort have integer1", "compiler:fortran:have:integer1", + opal_info_out("Fort have integer1", "compiler:fortran:have:integer1", OMPI_HAVE_FORTRAN_INTEGER1 ? "yes" : "no"); - opal_info_out("Fort have integer2", "compiler:fortran:have:integer2", + opal_info_out("Fort have integer2", "compiler:fortran:have:integer2", OMPI_HAVE_FORTRAN_INTEGER2 ? "yes" : "no"); - opal_info_out("Fort have integer4", "compiler:fortran:have:integer4", + opal_info_out("Fort have integer4", "compiler:fortran:have:integer4", OMPI_HAVE_FORTRAN_INTEGER4 ? "yes" : "no"); - opal_info_out("Fort have integer8", "compiler:fortran:have:integer8", + opal_info_out("Fort have integer8", "compiler:fortran:have:integer8", OMPI_HAVE_FORTRAN_INTEGER8 ? "yes" : "no"); - opal_info_out("Fort have integer16", "compiler:fortran:have:integer16", + opal_info_out("Fort have integer16", "compiler:fortran:have:integer16", OMPI_HAVE_FORTRAN_INTEGER16 ? "yes" : "no"); - - opal_info_out("Fort have real4", "compiler:fortran:have:real4", + + opal_info_out("Fort have real4", "compiler:fortran:have:real4", OMPI_HAVE_FORTRAN_REAL4 ? "yes" : "no"); - opal_info_out("Fort have real8", "compiler:fortran:have:real8", + opal_info_out("Fort have real8", "compiler:fortran:have:real8", OMPI_HAVE_FORTRAN_REAL8 ? "yes" : "no"); - opal_info_out("Fort have real16", "compiler:fortran:have:real16", + opal_info_out("Fort have real16", "compiler:fortran:have:real16", OMPI_HAVE_FORTRAN_REAL16 && OMPI_REAL16_MATCHES_C ? "yes" : "no"); - - opal_info_out("Fort have complex8", "compiler:fortran:have:complex8", + + opal_info_out("Fort have complex8", "compiler:fortran:have:complex8", OMPI_HAVE_FORTRAN_COMPLEX8 ? "yes" : "no"); - opal_info_out("Fort have complex16", "compiler:fortran:have:complex16", + opal_info_out("Fort have complex16", "compiler:fortran:have:complex16", OMPI_HAVE_FORTRAN_COMPLEX16 ? "yes" : "no"); - opal_info_out("Fort have complex32", "compiler:fortran:have:complex32", + opal_info_out("Fort have complex32", "compiler:fortran:have:complex32", OMPI_HAVE_FORTRAN_COMPLEX32 && OMPI_REAL16_MATCHES_C ? "yes" : "no"); - - opal_info_out_int("Fort integer1 size", "compiler:fortran:sizeof:integer1", + + opal_info_out_int("Fort integer1 size", "compiler:fortran:sizeof:integer1", OMPI_HAVE_FORTRAN_INTEGER1 ? OMPI_SIZEOF_FORTRAN_INTEGER1 : -1); - opal_info_out_int("Fort integer2 size", "compiler:fortran:sizeof:integer2", + opal_info_out_int("Fort integer2 size", "compiler:fortran:sizeof:integer2", OMPI_HAVE_FORTRAN_INTEGER2 ? OMPI_SIZEOF_FORTRAN_INTEGER2 : -1); - opal_info_out_int("Fort integer4 size", "compiler:fortran:sizeof:integer4", + opal_info_out_int("Fort integer4 size", "compiler:fortran:sizeof:integer4", OMPI_HAVE_FORTRAN_INTEGER4 ? OMPI_SIZEOF_FORTRAN_INTEGER4 : -1); - opal_info_out_int("Fort integer8 size", "compiler:fortran:sizeof:integer8", + opal_info_out_int("Fort integer8 size", "compiler:fortran:sizeof:integer8", OMPI_HAVE_FORTRAN_INTEGER8 ? OMPI_SIZEOF_FORTRAN_INTEGER8 : -1); - opal_info_out_int("Fort integer16 size", "compiler:fortran:sizeof:integer16", + opal_info_out_int("Fort integer16 size", "compiler:fortran:sizeof:integer16", OMPI_HAVE_FORTRAN_INTEGER16 ? OMPI_SIZEOF_FORTRAN_INTEGER16 : -1); - - opal_info_out_int("Fort real size", "compiler:fortran:sizeof:real", + + opal_info_out_int("Fort real size", "compiler:fortran:sizeof:real", OMPI_SIZEOF_FORTRAN_REAL); - opal_info_out_int("Fort real4 size", "compiler:fortran:sizeof:real4", + opal_info_out_int("Fort real4 size", "compiler:fortran:sizeof:real4", OMPI_HAVE_FORTRAN_REAL4 ? OMPI_SIZEOF_FORTRAN_REAL4 : -1); - opal_info_out_int("Fort real8 size", "compiler:fortran:sizeof:real8", + opal_info_out_int("Fort real8 size", "compiler:fortran:sizeof:real8", OMPI_HAVE_FORTRAN_REAL8 ? OMPI_SIZEOF_FORTRAN_REAL8 : -1); - opal_info_out_int("Fort real16 size", "compiler:fortran:sizeof:real17", + opal_info_out_int("Fort real16 size", "compiler:fortran:sizeof:real17", OMPI_HAVE_FORTRAN_REAL16 ? OMPI_SIZEOF_FORTRAN_REAL16 : -1); - - opal_info_out_int("Fort dbl prec size", + + opal_info_out_int("Fort dbl prec size", "compiler:fortran:sizeof:double_precision", OMPI_SIZEOF_FORTRAN_DOUBLE_PRECISION); - - opal_info_out_int("Fort cplx size", "compiler:fortran:sizeof:complex", + + opal_info_out_int("Fort cplx size", "compiler:fortran:sizeof:complex", OMPI_SIZEOF_FORTRAN_COMPLEX); opal_info_out_int("Fort dbl cplx size", - "compiler:fortran:sizeof:double_complex", + "compiler:fortran:sizeof:double_complex", OMPI_HAVE_FORTRAN_DOUBLE_COMPLEX ? OMPI_SIZEOF_FORTRAN_DOUBLE_COMPLEX : -1); - opal_info_out_int("Fort cplx8 size", "compiler:fortran:sizeof:complex8", + opal_info_out_int("Fort cplx8 size", "compiler:fortran:sizeof:complex8", OMPI_HAVE_FORTRAN_COMPLEX8 ? OMPI_SIZEOF_FORTRAN_COMPLEX8 : -1); - opal_info_out_int("Fort cplx16 size", "compiler:fortran:sizeof:complex16", + opal_info_out_int("Fort cplx16 size", "compiler:fortran:sizeof:complex16", OMPI_HAVE_FORTRAN_COMPLEX16 ? OMPI_SIZEOF_FORTRAN_COMPLEX16 : -1); - opal_info_out_int("Fort cplx32 size", "compiler:fortran:sizeof:complex32", + opal_info_out_int("Fort cplx32 size", "compiler:fortran:sizeof:complex32", OMPI_HAVE_FORTRAN_COMPLEX32 ? OMPI_SIZEOF_FORTRAN_COMPLEX32 : -1); - - opal_info_out_int("Fort integer align", "compiler:fortran:align:integer", + + opal_info_out_int("Fort integer align", "compiler:fortran:align:integer", OMPI_ALIGNMENT_FORTRAN_INTEGER); - opal_info_out_int("Fort integer1 align", "compiler:fortran:align:integer1", + opal_info_out_int("Fort integer1 align", "compiler:fortran:align:integer1", OMPI_HAVE_FORTRAN_INTEGER1 ? OMPI_ALIGNMENT_FORTRAN_INTEGER1 : -1); - opal_info_out_int("Fort integer2 align", "compiler:fortran:align:integer2", + opal_info_out_int("Fort integer2 align", "compiler:fortran:align:integer2", OMPI_HAVE_FORTRAN_INTEGER2 ? OMPI_ALIGNMENT_FORTRAN_INTEGER2 : -1); - opal_info_out_int("Fort integer4 align", "compiler:fortran:align:integer4", + opal_info_out_int("Fort integer4 align", "compiler:fortran:align:integer4", OMPI_HAVE_FORTRAN_INTEGER4 ? OMPI_ALIGNMENT_FORTRAN_INTEGER4 : -1); - opal_info_out_int("Fort integer8 align", "compiler:fortran:align:integer8", + opal_info_out_int("Fort integer8 align", "compiler:fortran:align:integer8", OMPI_HAVE_FORTRAN_INTEGER8 ? OMPI_ALIGNMENT_FORTRAN_INTEGER8 : -1); - opal_info_out_int("Fort integer16 align", "compiler:fortran:align:integer16", + opal_info_out_int("Fort integer16 align", "compiler:fortran:align:integer16", OMPI_HAVE_FORTRAN_INTEGER16 ? OMPI_ALIGNMENT_FORTRAN_INTEGER16 : -1); - - opal_info_out_int("Fort real align", "compiler:fortran:align:real", + + opal_info_out_int("Fort real align", "compiler:fortran:align:real", OMPI_ALIGNMENT_FORTRAN_REAL); - opal_info_out_int("Fort real4 align", "compiler:fortran:align:real4", + opal_info_out_int("Fort real4 align", "compiler:fortran:align:real4", OMPI_HAVE_FORTRAN_REAL4 ? OMPI_ALIGNMENT_FORTRAN_REAL4 : -1); - opal_info_out_int("Fort real8 align", "compiler:fortran:align:real8", + opal_info_out_int("Fort real8 align", "compiler:fortran:align:real8", OMPI_HAVE_FORTRAN_REAL8 ? OMPI_ALIGNMENT_FORTRAN_REAL8 : -1); - opal_info_out_int("Fort real16 align", "compiler:fortran:align:real16", + opal_info_out_int("Fort real16 align", "compiler:fortran:align:real16", OMPI_HAVE_FORTRAN_REAL16 ? OMPI_ALIGNMENT_FORTRAN_REAL16 : -1); - - opal_info_out_int("Fort dbl prec align", + + opal_info_out_int("Fort dbl prec align", "compiler:fortran:align:double_precision", OMPI_ALIGNMENT_FORTRAN_DOUBLE_PRECISION); - - opal_info_out_int("Fort cplx align", "compiler:fortran:align:complex", + + opal_info_out_int("Fort cplx align", "compiler:fortran:align:complex", OMPI_ALIGNMENT_FORTRAN_COMPLEX); opal_info_out_int("Fort dbl cplx align", - "compiler:fortran:align:double_complex", + "compiler:fortran:align:double_complex", OMPI_HAVE_FORTRAN_DOUBLE_COMPLEX ? OMPI_ALIGNMENT_FORTRAN_DOUBLE_COMPLEX : -1); - opal_info_out_int("Fort cplx8 align", "compiler:fortran:align:complex8", + opal_info_out_int("Fort cplx8 align", "compiler:fortran:align:complex8", OMPI_HAVE_FORTRAN_COMPLEX8 ? OMPI_ALIGNMENT_FORTRAN_COMPLEX8 : -1); - opal_info_out_int("Fort cplx16 align", "compiler:fortran:align:complex16", + opal_info_out_int("Fort cplx16 align", "compiler:fortran:align:complex16", OMPI_HAVE_FORTRAN_COMPLEX16 ? OMPI_ALIGNMENT_FORTRAN_COMPLEX16 : -1); - opal_info_out_int("Fort cplx32 align", "compiler:fortran:align:complex32", + opal_info_out_int("Fort cplx32 align", "compiler:fortran:align:complex32", OMPI_HAVE_FORTRAN_COMPLEX32 ? OMPI_ALIGNMENT_FORTRAN_COMPLEX32 : -1); - + } else { opal_info_out("Fort real size", "compiler:fortran:sizeof:real", "skipped"); opal_info_out("Fort dbl prec size", @@ -560,57 +564,57 @@ void ompi_info_do_config(bool want_all) opal_info_out("Fort cplx size", "compiler:fortran:sizeof:complex", "skipped"); opal_info_out("Fort dbl cplx size", "compiler:fortran:sizeof:double_complex", "skipped"); - + opal_info_out("Fort integer align", "compiler:fortran:align:integer", "skipped"); opal_info_out("Fort real align", "compiler:fortran:align:real", "skipped"); - opal_info_out("Fort dbl prec align", + opal_info_out("Fort dbl prec align", "compiler:fortran:align:double_precision","skipped"); opal_info_out("Fort cplx align", "compiler:fortran:align:complex", "skipped"); opal_info_out("Fort dbl cplx align", "compiler:fortran:align:double_complex", "skipped"); } } - + opal_info_out("C profiling", "option:profiling:c", cprofiling); opal_info_out("C++ profiling", "option:profiling:cxx", cxxprofiling); opal_info_out("Fort mpif.h profiling", "option:profiling:mpif.h", fortran_mpifh_profiling); opal_info_out("Fort use mpi profiling", "option:profiling:use_mpi", fortran_usempi_profiling); - opal_info_out("Fort use mpi_f08 prof", + opal_info_out("Fort use mpi_f08 prof", "option:profiling:use_mpi_f08", fortran_usempif08_profiling); - + opal_info_out("C++ exceptions", "option:cxx_exceptions", cxxexceptions); opal_info_out("Thread support", "option:threads", threads); free(threads); opal_info_out("Sparse Groups", "option:sparse:groups", sparse_groups); - + if (want_all) { - + /* Don't display the build CPPFLAGS or CXXCPPFLAGS because they're * just -I$(top_srcdir)/include, etc. Hence, they're a) boring, * and c) specific for ompi_info. */ - + opal_info_out("Build CFLAGS", "option:build:cflags", OMPI_BUILD_CFLAGS); opal_info_out("Build CXXFLAGS", "option:build:cxxflags", OMPI_BUILD_CXXFLAGS); opal_info_out("Build FCFLAGS", "option:build:fcflags", OMPI_BUILD_FCFLAGS); opal_info_out("Build LDFLAGS", "option:build:ldflags", OMPI_BUILD_LDFLAGS); opal_info_out("Build LIBS", "option:build:libs", OMPI_BUILD_LIBS); - - opal_info_out("Wrapper extra CFLAGS", "option:wrapper:extra_cflags", + + opal_info_out("Wrapper extra CFLAGS", "option:wrapper:extra_cflags", WRAPPER_EXTRA_CFLAGS); - opal_info_out("Wrapper extra CXXFLAGS", "option:wrapper:extra_cxxflags", + opal_info_out("Wrapper extra CXXFLAGS", "option:wrapper:extra_cxxflags", WRAPPER_EXTRA_CXXFLAGS); - opal_info_out("Wrapper extra FCFLAGS", "option:wrapper:extra_fcflags", + opal_info_out("Wrapper extra FCFLAGS", "option:wrapper:extra_fcflags", WRAPPER_EXTRA_FCFLAGS); - opal_info_out("Wrapper extra LDFLAGS", "option:wrapper:extra_ldflags", + opal_info_out("Wrapper extra LDFLAGS", "option:wrapper:extra_ldflags", WRAPPER_EXTRA_LDFLAGS); opal_info_out("Wrapper extra LIBS", "option:wrapper:extra_libs", WRAPPER_EXTRA_LIBS); } - + opal_info_out("Internal debug support", "option:debug", debug); opal_info_out("MPI interface warnings", "option:mpi-interface-warning", mpi_interface_warning); opal_info_out("MPI parameter check", "option:mpi-param-check", paramcheck); @@ -619,41 +623,35 @@ void ompi_info_do_config(bool want_all) opal_info_out("dl support", "option:dlopen", have_dl); opal_info_out("Heterogeneous support", "options:heterogeneous", heterogeneous); #if OMPI_RTE_ORTE - opal_info_out("mpirun default --prefix", "mpirun:prefix_by_default", + opal_info_out("mpirun default --prefix", "mpirun:prefix_by_default", mpirun_prefix_by_default); #endif opal_info_out("MPI I/O support", "options:mpi-io", have_mpi_io); opal_info_out("MPI_WTIME support", "options:mpi-wtime", wtime_support); opal_info_out("Symbol vis. support", "options:visibility", symbol_visibility); - opal_info_out("Host topology support", "options:host-topology", + opal_info_out("Host topology support", "options:host-topology", topology_support); - - opal_info_out("MPI extensions", "options:mpi_ext", OMPI_MPIEXT_COMPONENTS); - - opal_info_out("FT Checkpoint support", "options:ft_support", ft_support); - free(ft_support); - opal_info_out("C/R Enabled Debugging", "options:crdebug_support", crdebug_support); - free(crdebug_support); + opal_info_out("MPI extensions", "options:mpi_ext", OMPI_MPIEXT_COMPONENTS); - opal_info_out_int("MPI_MAX_PROCESSOR_NAME", "options:mpi-max-processor-name", + opal_info_out_int("MPI_MAX_PROCESSOR_NAME", "options:mpi-max-processor-name", MPI_MAX_PROCESSOR_NAME); - opal_info_out_int("MPI_MAX_ERROR_STRING", "options:mpi-max-error-string", + opal_info_out_int("MPI_MAX_ERROR_STRING", "options:mpi-max-error-string", MPI_MAX_ERROR_STRING); - opal_info_out_int("MPI_MAX_OBJECT_NAME", "options:mpi-max-object-name", + opal_info_out_int("MPI_MAX_OBJECT_NAME", "options:mpi-max-object-name", MPI_MAX_OBJECT_NAME); - opal_info_out_int("MPI_MAX_INFO_KEY", "options:mpi-max-info-key", + opal_info_out_int("MPI_MAX_INFO_KEY", "options:mpi-max-info-key", MPI_MAX_INFO_KEY); - opal_info_out_int("MPI_MAX_INFO_VAL", "options:mpi-max-info-val", + opal_info_out_int("MPI_MAX_INFO_VAL", "options:mpi-max-info-val", MPI_MAX_INFO_VAL); - opal_info_out_int("MPI_MAX_PORT_NAME", "options:mpi-max-port-name", + opal_info_out_int("MPI_MAX_PORT_NAME", "options:mpi-max-port-name", MPI_MAX_PORT_NAME); #if OMPI_PROVIDE_MPI_FILE_INTERFACE - opal_info_out_int("MPI_MAX_DATAREP_STRING", "options:mpi-max-datarep-string", + opal_info_out_int("MPI_MAX_DATAREP_STRING", "options:mpi-max-datarep-string", MPI_MAX_DATAREP_STRING); #else - opal_info_out("MPI_MAX_DATAREP_STRING", "options:mpi-max-datarep-string", + opal_info_out("MPI_MAX_DATAREP_STRING", "options:mpi-max-datarep-string", "IO interface not provided"); #endif - + } diff --git a/ompi/tools/wrappers/Makefile.am b/ompi/tools/wrappers/Makefile.am index b1b2ee99aec..9f973785048 100644 --- a/ompi/tools/wrappers/Makefile.am +++ b/ompi/tools/wrappers/Makefile.am @@ -5,7 +5,7 @@ # Copyright (c) 2004-2005 The University of Tennessee and The University # of Tennessee Research Foundation. All rights # reserved. -# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, # University of Stuttgart. All rights reserved. # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. @@ -15,9 +15,9 @@ # Copyright (c) 2014 Research Organization for Information Science # and Technology (RIST). All rights reserved. # $COPYRIGHT$ -# +# # Additional copyrights may follow -# +# # $HEADER$ # diff --git a/ompi/tools/wrappers/mpic++-wrapper-data.txt.in b/ompi/tools/wrappers/mpic++-wrapper-data.txt.in index 6f5c2b99623..082d3a94dc6 100644 --- a/ompi/tools/wrappers/mpic++-wrapper-data.txt.in +++ b/ompi/tools/wrappers/mpic++-wrapper-data.txt.in @@ -20,10 +20,10 @@ linker_flags=@OMPI_WRAPPER_EXTRA_LDFLAGS@ # intentionally only link in the MPI libraries (ORTE, OPAL, etc. are # pulled in implicitly) because we intend MPI applications to only use # the MPI API. -libs=@OMPI_WRAPPER_CXX_LIB@ -lmpi -libs_static=@OMPI_WRAPPER_CXX_LIB@ -lmpi -l@ORTE_LIB_PREFIX@open-rte -l@OPAL_LIB_PREFIX@open-pal @OMPI_WRAPPER_EXTRA_LIBS@ -dyn_lib_file=libmpi.@OPAL_DYN_LIB_SUFFIX@ -static_lib_file=libmpi.a +libs=@OMPI_WRAPPER_CXX_LIB@ -l@OMPI_LIBMPI_NAME@ +libs_static=@OMPI_WRAPPER_CXX_LIB@ -l@OMPI_LIBMPI_NAME@ -l@ORTE_LIB_PREFIX@open-rte -l@OPAL_LIB_PREFIX@open-pal @OMPI_WRAPPER_EXTRA_LIBS@ +dyn_lib_file=lib@OMPI_LIBMPI_NAME@.@OPAL_DYN_LIB_SUFFIX@ +static_lib_file=lib@OMPI_LIBMPI_NAME@.a required_file=@OMPI_WRAPPER_CXX_REQUIRED_FILE@ includedir=${includedir} libdir=${libdir} diff --git a/ompi/tools/wrappers/mpicc-wrapper-data.txt.in b/ompi/tools/wrappers/mpicc-wrapper-data.txt.in index b025f216087..5f5db45b024 100644 --- a/ompi/tools/wrappers/mpicc-wrapper-data.txt.in +++ b/ompi/tools/wrappers/mpicc-wrapper-data.txt.in @@ -20,10 +20,10 @@ linker_flags=@OMPI_WRAPPER_EXTRA_LDFLAGS@ # intentionally only link in the MPI libraries (ORTE, OPAL, etc. are # pulled in implicitly) because we intend MPI applications to only use # the MPI API. -libs=-lmpi -libs_static=-lmpi -l@ORTE_LIB_PREFIX@open-rte -l@OPAL_LIB_PREFIX@open-pal @OMPI_WRAPPER_EXTRA_LIBS@ -dyn_lib_file=libmpi.@OPAL_DYN_LIB_SUFFIX@ -static_lib_file=libmpi.a +libs=-l@OMPI_LIBMPI_NAME@ +libs_static=-l@OMPI_LIBMPI_NAME@ -l@ORTE_LIB_PREFIX@open-rte -l@OPAL_LIB_PREFIX@open-pal @OMPI_WRAPPER_EXTRA_LIBS@ +dyn_lib_file=lib@OMPI_LIBMPI_NAME@.@OPAL_DYN_LIB_SUFFIX@ +static_lib_file=lib@OMPI_LIBMPI_NAME@.a required_file= includedir=${includedir} libdir=${libdir} diff --git a/ompi/tools/wrappers/mpifort-wrapper-data.txt.in b/ompi/tools/wrappers/mpifort-wrapper-data.txt.in index f9c79151b21..aeb3e750651 100644 --- a/ompi/tools/wrappers/mpifort-wrapper-data.txt.in +++ b/ompi/tools/wrappers/mpifort-wrapper-data.txt.in @@ -13,16 +13,16 @@ compiler_env=FC compiler_flags_env=FCFLAGS compiler=@FC@ preprocessor_flags= -compiler_flags=@OMPI_WRAPPER_EXTRA_FCFLAGS@ +compiler_flags=@OMPI_WRAPPER_EXTRA_FCFLAGS@ linker_flags=@OMPI_WRAPPER_EXTRA_LDFLAGS@ # Note that per https://svn.open-mpi.org/trac/ompi/ticket/3422, we # intentionally only link in the MPI libraries (ORTE, OPAL, etc. are # pulled in implicitly) because we intend MPI applications to only use # the MPI API. -libs=@OMPI_FORTRAN_USEMPIF08_LIB@ @OMPI_FORTRAN_USEMPI_LIB@ -lmpi_mpifh -lmpi -libs_static=@OMPI_FORTRAN_USEMPIF08_LIB@ @OMPI_FORTRAN_USEMPI_LIB@ -lmpi_mpifh -lmpi -l@ORTE_LIB_PREFIX@open-rte -l@OPAL_LIB_PREFIX@open-pal @OMPI_WRAPPER_EXTRA_LIBS@ -dyn_lib_file=libmpi.@OPAL_DYN_LIB_SUFFIX@ -static_lib_file=libmpi.a +libs=@OMPI_FORTRAN_USEMPIF08_LIB@ @OMPI_FORTRAN_USEMPI_LIB@ -l@OMPI_LIBMPI_NAME@_mpifh -l@OMPI_LIBMPI_NAME@ +libs_static=@OMPI_FORTRAN_USEMPIF08_LIB@ @OMPI_FORTRAN_USEMPI_LIB@ -l@OMPI_LIBMPI_NAME@_mpifh -l@OMPI_LIBMPI_NAME@ -l@ORTE_LIB_PREFIX@open-rte -l@OPAL_LIB_PREFIX@open-pal @OMPI_WRAPPER_EXTRA_LIBS@ +dyn_lib_file=lib@OMPI_LIBMPI_NAME@.@OPAL_DYN_LIB_SUFFIX@ +static_lib_file=lib@OMPI_LIBMPI_NAME@.a required_file=@OMPI_WRAPPER_FORTRAN_REQUIRED_FILE@ includedir=${includedir} libdir=${libdir} diff --git a/ompi/tools/wrappers/mpijavac.1 b/ompi/tools/wrappers/mpijavac.1 index 39c35c405f1..15ffe26ef16 100644 --- a/ompi/tools/wrappers/mpijavac.1 +++ b/ompi/tools/wrappers/mpijavac.1 @@ -9,11 +9,11 @@ mpijava [-showme|-showme:compile|-showme:link] ... . .SH OPTIONS .TP ---showme +--showme This option comes in several different variants (see below). None of the variants invokes the underlying compiler; they all provide information on how the underlying compiler would have been invoked had -.I --showme +.I --showme not been used. The basic .I --showme @@ -137,10 +137,10 @@ line. . . .SH ENVIRONMENT VARIABLES -.PP +.PP By default, the wrappers use the compilers that were selected when Open MPI was configured. These compilers were either found automatically by Open MPI's "configure" script, or were selected by -the user in the CC, CXX, F77, JAVAC, and/or FC environment variables +the user in the CC, CXX, F77, JAVAC, and/or FC environment variables before "configure" was invoked. Additionally, other arguments specific to the compiler may have been selected by configure. diff --git a/ompi/tools/wrappers/ompi-c.pc.in b/ompi/tools/wrappers/ompi-c.pc.in index 083fbd3c0f2..be29ad77f2c 100644 --- a/ompi/tools/wrappers/ompi-c.pc.in +++ b/ompi/tools/wrappers/ompi-c.pc.in @@ -1,4 +1,5 @@ # Copyright (c) 2010 Cisco Systems, Inc. All rights reserved. +# Copyright (c) 2016 IBM Corporation. All rights reserved. # Name: Open MPI Description: Portable high-performance MPI implementation @@ -9,12 +10,13 @@ prefix=@prefix@ exec_prefix=@exec_prefix@ includedir=@includedir@ libdir=@libdir@ +pkgincludedir=@opalincludedir@ # # Note that -lopen-pal and all the EXTRA_LIBS are only necessary when # static linking (they're pulled in by libopen-rte.so's implicit # dependencies), so only list these in Libs.private. # -Libs: -L${libdir} @OMPI_PKG_CONFIG_LDFLAGS@ -lmpi -Libs.private: @OMPI_WRAPPER_EXTRA_LIBS@ +Libs: -L${libdir} @OMPI_PKG_CONFIG_LDFLAGS@ -l@OMPI_LIBMPI_NAME@ +Libs.private: -lopen-rte -lopen-pal @OMPI_WRAPPER_EXTRA_LIBS@ # Cflags: -I${includedir} @OMPI_WRAPPER_EXTRA_CPPFLAGS@ @OMPI_WRAPPER_EXTRA_CFLAGS@ diff --git a/ompi/tools/wrappers/ompi-cxx.pc.in b/ompi/tools/wrappers/ompi-cxx.pc.in index db2176ba5b6..4b1936639f3 100644 --- a/ompi/tools/wrappers/ompi-cxx.pc.in +++ b/ompi/tools/wrappers/ompi-cxx.pc.in @@ -1,4 +1,5 @@ # Copyright (c) 2010 Cisco Systems, Inc. All rights reserved. +# Copyright (c) 2016 IBM Corporation. All rights reserved. # Name: Open MPI Description: Portable high-performance MPI implementation @@ -9,12 +10,13 @@ prefix=@prefix@ exec_prefix=@exec_prefix@ includedir=@includedir@ libdir=@libdir@ +pkgincludedir=@opalincludedir@ # # Note that -lopen-pal and all the EXTRA_LIBS are only necessary when # static linking (they're pulled in by libopen-rte.so's implicit # dependencies), so only list these in Libs.private. # -Libs: -L${libdir} @OMPI_PKG_CONFIG_LDFLAGS@ @OMPI_WRAPPER_CXX_LIB@ -lmpi -Libs.private: @OMPI_WRAPPER_EXTRA_LIBS@ +Libs: -L${libdir} @OMPI_PKG_CONFIG_LDFLAGS@ @OMPI_WRAPPER_CXX_LIB@ -l@OMPI_LIBMPI_NAME@ +Libs.private: -lopen-rte -lopen-pal @OMPI_WRAPPER_EXTRA_LIBS@ # Cflags: -I${includedir} @OMPI_WRAPPER_EXTRA_CPPFLAGS@ @OMPI_WRAPPER_EXTRA_CXXFLAGS@ diff --git a/ompi/tools/wrappers/ompi-fort.pc.in b/ompi/tools/wrappers/ompi-fort.pc.in index 66641153250..5635870b80e 100644 --- a/ompi/tools/wrappers/ompi-fort.pc.in +++ b/ompi/tools/wrappers/ompi-fort.pc.in @@ -1,4 +1,5 @@ # Copyright (c) 2010 Cisco Systems, Inc. All rights reserved. +# Copyright (c) 2016 IBM Corporation. All rights reserved. # Name: Open MPI Description: Portable high-performance MPI implementation @@ -9,11 +10,12 @@ prefix=@prefix@ exec_prefix=@exec_prefix@ includedir=@includedir@ libdir=@libdir@ +pkgincludedir=@opalincludedir@ # # Note that -lopen-pal and all the EXTRA_LIBS are only necessary when # static linking (they're pulled in by libopen-rte.so's implicit # dependencies), so only list these in Libs.private. # -Libs: -L${libdir} @OMPI_PKG_CONFIG_LDFLAGS@ @OMPI_FORTRAN_USEMPIF08_LIB@ @OMPI_FORTRAN_USEMPI_LIB@ -lmpi_mpifh -lmpi -Libs.private: @OMPI_WRAPPER_EXTRA_LIBS@ +Libs: -L${libdir} @OMPI_PKG_CONFIG_LDFLAGS@ @OMPI_FORTRAN_USEMPIF08_LIB@ @OMPI_FORTRAN_USEMPI_LIB@ -l@OMPI_LIBMPI_NAME@_mpifh -l@OMPI_LIBMPI_NAME@ +Libs.private: -lopen-rte -lopen-pal @OMPI_WRAPPER_EXTRA_LIBS@ Cflags: -I${includedir} @OMPI_WRAPPER_EXTRA_CPPFLAGS@ @OMPI_WRAPPER_EXTRA_FCFLAGS@ diff --git a/ompi/tools/wrappers/ompi.pc.in b/ompi/tools/wrappers/ompi.pc.in index 083fbd3c0f2..c6961869ee8 100644 --- a/ompi/tools/wrappers/ompi.pc.in +++ b/ompi/tools/wrappers/ompi.pc.in @@ -1,4 +1,5 @@ # Copyright (c) 2010 Cisco Systems, Inc. All rights reserved. +# Copyright (c) 2016 IBM Corporation. All rights reserved. # Name: Open MPI Description: Portable high-performance MPI implementation @@ -9,12 +10,13 @@ prefix=@prefix@ exec_prefix=@exec_prefix@ includedir=@includedir@ libdir=@libdir@ +pkgincludedir=@opalincludedir@ # # Note that -lopen-pal and all the EXTRA_LIBS are only necessary when # static linking (they're pulled in by libopen-rte.so's implicit # dependencies), so only list these in Libs.private. # -Libs: -L${libdir} @OMPI_PKG_CONFIG_LDFLAGS@ -lmpi +Libs: -L${libdir} @OMPI_PKG_CONFIG_LDFLAGS@ -l@OMPI_LIBMPI_NAME@ Libs.private: @OMPI_WRAPPER_EXTRA_LIBS@ # Cflags: -I${includedir} @OMPI_WRAPPER_EXTRA_CPPFLAGS@ @OMPI_WRAPPER_EXTRA_CFLAGS@ diff --git a/ompi/tools/wrappers/ompi_wrapper_script.in b/ompi/tools/wrappers/ompi_wrapper_script.in index 47f59027fa5..2d0cfb2ae39 100644 --- a/ompi/tools/wrappers/ompi_wrapper_script.in +++ b/ompi/tools/wrappers/ompi_wrapper_script.in @@ -7,6 +7,7 @@ # Copyright (c) 2009-2012 Cisco Systems, Inc. All rights reserved. # Copyright (c) 2010 Oracle and/or its affiliates. All rights reserved. # Copyright (c) 2013 Sandia National Laboratories. All rights reserved. +# Copyright (c) 2016 IBM Corporation. All rights reserved. # $COPYRIGHT$ # # Additional copyrights may follow @@ -46,6 +47,7 @@ my $cxx_lib = "@OMPI_WRAPPER_CXX_LIB@"; my $fc_module_flag = "@OMPI_FC_MODULE_FLAG@"; my $dynamic_lib_suffix = "@OPAL_DYN_LIB_SUFFIX@"; my $fortran_libs = "@OMPI_FORTRAN_USEMPIF08_LIB@ @OMPI_FORTRAN_USEMPI_LIB@"; +my $ompi_libmpi_name = "@OMPI_LIBMPI_NAME@"; # Someone might want to fix for windows my $include_flag = "-I"; @@ -61,15 +63,15 @@ my $linker_flags = $libdir_flag . $libdir . " " . $extra_ldflags; # intentionally only link in the MPI libraries (ORTE, OPAL, etc. are # pulled in implicitly) because we intend MPI applications to only use # the MPI API. -my $libs = "-lmpi " . $extra_libs; -my $libs_static = "-lmpi -lopen-rte -lopen-pal " . $extra_libs; +my $libs = "-l".$ompi_libmpi_name." " . $extra_libs; +my $libs_static = "-l".$ompi_libmpi_name." -lopen-rte -lopen-pal " . $extra_libs; my $have_dynamic = 0; -if (-e $libdir . "/libmpi." . $dynamic_lib_suffix) { +if (-e $libdir . "/lib".$ompi_libmpi_name."." . $dynamic_lib_suffix) { $have_dynamic = 1; } my $have_static = 0; -if (-e $libdir . "/libmpi.a") { +if (-e $libdir . "/lib".$ompi_libmpi_name.".a") { $have_static = 1; } @@ -114,7 +116,7 @@ if (basename($0) eq "mpicc") { $comp_flags = $extra_cxxflags; $comp_flags_prefix = $extra_cxxflags_prefix; $libs = $cxx_lib . " " . $libs; -} +} # mpifort is now preferred; mpif77/mpif90 are legacy names elsif (basename($0) eq "mpifort" || basename($0) eq "mpif77" || basename($0) eq "mpif90") { @@ -123,7 +125,7 @@ elsif (basename($0) eq "mpifort" || # no extra includes for Fortran. $comp_flags = $extra_fcflags; $comp_flags_prefix = $extra_fcflags_prefix; - $libs = $fortran_libs . " -lmpi_mpifh " . $libs; + $libs = $fortran_libs . " -l".$ompi_libmpi_name."_mpifh " . $libs; } if ($lang eq "none") { diff --git a/ompi/win/Makefile.am b/ompi/win/Makefile.am index f0eaf6e7848..67126c71ec0 100644 --- a/ompi/win/Makefile.am +++ b/ompi/win/Makefile.am @@ -6,21 +6,22 @@ # Copyright (c) 2004-2005 The University of Tennessee and The University # of Tennessee Research Foundation. All rights # reserved. -# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, # University of Stuttgart. All rights reserved. # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. +# Copyright (c) 2016 IBM Corporation. All rights reserved. # $COPYRIGHT$ -# +# # Additional copyrights may follow -# +# # $HEADER$ # # This makefile.am does not stand on its own - it is included from ompi/Makefile.am headers += \ - win/win.h + win/win.h -libmpi_la_SOURCES += \ +lib@OMPI_LIBMPI_NAME@_la_SOURCES += \ win/win.c diff --git a/ompi/win/win.c b/ompi/win/win.c index 066175afe21..3b3d2b9ba04 100644 --- a/ompi/win/win.c +++ b/ompi/win/win.c @@ -1,25 +1,25 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ -/* +/* * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. * Copyright (c) 2004-2007 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2009 Sun Microsystems, Inc. All rights reserved. * Copyright (c) 2009-2012 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2013 Los Alamos National Security, LLC. All rights + * Copyright (c) 2013-2015 Los Alamos National Security, LLC. All rights * reserved. - * Copyright (c) 2015 Research Organization for Information Science + * Copyright (c) 2015-2016 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -35,15 +35,34 @@ #include "ompi/mca/osc/base/base.h" #include "ompi/mca/osc/osc.h" +#include "ompi/runtime/params.h" /* * Table for Fortran <-> C communicator handle conversion. Note that * these are not necessarily global. */ -opal_pointer_array_t ompi_mpi_windows = {{0}}; +opal_pointer_array_t ompi_mpi_windows = {{0}}; ompi_predefined_win_t ompi_mpi_win_null = {{{0}}}; ompi_predefined_win_t *ompi_mpi_win_null_addr = &ompi_mpi_win_null; +mca_base_var_enum_t *ompi_win_accumulate_ops = NULL; +mca_base_var_enum_flag_t *ompi_win_accumulate_order = NULL; + +static mca_base_var_enum_value_t accumulate_ops_values[] = { + {.value = OMPI_WIN_ACCUMULATE_OPS_SAME_OP_NO_OP, .string = "same_op_no_op",}, + {.value = OMPI_WIN_ACCUMULATE_OPS_SAME_OP, .string = "same_op",}, + {.value = -1, .string = NULL}, +}; + +static mca_base_var_enum_value_flag_t accumulate_order_flags[] = { + {.flag = OMPI_WIN_ACC_ORDER_NONE, .string = "none", .conflicting_flag = OMPI_WIN_ACC_ORDER_RAR | + OMPI_WIN_ACC_ORDER_WAR | OMPI_WIN_ACC_ORDER_RAW | OMPI_WIN_ACC_ORDER_WAW}, + {.flag = OMPI_WIN_ACC_ORDER_RAR, .string = "rar", .conflicting_flag = OMPI_WIN_ACC_ORDER_NONE}, + {.flag = OMPI_WIN_ACC_ORDER_WAR, .string = "war", .conflicting_flag = OMPI_WIN_ACC_ORDER_NONE}, + {.flag = OMPI_WIN_ACC_ORDER_RAW, .string = "raw", .conflicting_flag = OMPI_WIN_ACC_ORDER_NONE}, + {.flag = OMPI_WIN_ACC_ORDER_WAW, .string = "waw", .conflicting_flag = OMPI_WIN_ACC_ORDER_NONE}, + {0}, +}; static void ompi_win_construct(ompi_win_t *win); static void ompi_win_destruct(ompi_win_t *win); @@ -54,6 +73,10 @@ OBJ_CLASS_INSTANCE(ompi_win_t, opal_object_t, int ompi_win_init(void) { + int ret; + + assert (sizeof (ompi_predefined_win_t) >= sizeof (ompi_win_t)); + /* setup window Fortran array */ OBJ_CONSTRUCT(&ompi_mpi_windows, opal_pointer_array_t); if( OPAL_SUCCESS != opal_pointer_array_init(&ompi_mpi_windows, 0, @@ -69,36 +92,93 @@ ompi_win_init(void) ompi_win_set_name(&ompi_mpi_win_null.win, "MPI_WIN_NULL"); opal_pointer_array_set_item(&ompi_mpi_windows, 0, &ompi_mpi_win_null.win); + ret = mca_base_var_enum_create ("accumulate_ops", accumulate_ops_values, &ompi_win_accumulate_ops); + if (OPAL_SUCCESS != ret) { + return ret; + } + + ret = mca_base_var_enum_create_flag ("accumulate_order", accumulate_order_flags, &ompi_win_accumulate_order); + if (OPAL_SUCCESS != ret) { + return ret; + } + return OMPI_SUCCESS; } +static void ompi_win_dump (ompi_win_t *win) +{ + opal_output(0, "Dumping information for window: %s\n", win->w_name); + opal_output(0," Fortran window handle: %d, window size: %d\n", + win->w_f_to_c_index, ompi_group_size (win->w_group)); +} -int -ompi_win_finalize(void) +int ompi_win_finalize(void) { + size_t size = opal_pointer_array_get_size (&ompi_mpi_windows); + /* start at 1 to skip win null */ + for (size_t i = 1 ; i < size ; ++i) { + ompi_win_t *win = + (ompi_win_t *) opal_pointer_array_get_item (&ompi_mpi_windows, i); + if (NULL != win) { + if (ompi_debug_show_handle_leaks && !ompi_win_invalid(win)){ + opal_output(0,"WARNING: MPI_Win still allocated in MPI_Finalize\n"); + ompi_win_dump (win); + } + ompi_win_free (win); + } + } + OBJ_DESTRUCT(&ompi_mpi_win_null.win); OBJ_DESTRUCT(&ompi_mpi_windows); + OBJ_RELEASE(ompi_win_accumulate_ops); + OBJ_RELEASE(ompi_win_accumulate_order); return OMPI_SUCCESS; } -static ompi_win_t * -alloc_window(struct ompi_communicator_t *comm) +static int alloc_window(struct ompi_communicator_t *comm, ompi_info_t *info, int flavor, ompi_win_t **win_out) { ompi_win_t *win; ompi_group_t *group; + int acc_ops, acc_order, flag, ret; /* create the object */ win = OBJ_NEW(ompi_win_t); - if (NULL == win) return NULL; + if (NULL == win) { + return OMPI_ERR_OUT_OF_RESOURCE; + } + + ret = ompi_info_get_value_enum (info, "accumulate_ops", &acc_ops, + OMPI_WIN_ACCUMULATE_OPS_SAME_OP_NO_OP, + ompi_win_accumulate_ops, &flag); + if (OMPI_SUCCESS != ret) { + OBJ_RELEASE(win); + return ret; + } + + win->w_acc_ops = (ompi_win_accumulate_ops_t)acc_ops; + + ret = ompi_info_get_value_enum (info, "accumulate_order", &acc_order, + OMPI_WIN_ACC_ORDER_RAR | OMPI_WIN_ACC_ORDER_WAR | + OMPI_WIN_ACC_ORDER_RAW | OMPI_WIN_ACC_ORDER_WAW, + &(ompi_win_accumulate_order->super), &flag); + if (OMPI_SUCCESS != ret) { + OBJ_RELEASE(win); + return ret; + } + + win->w_acc_order = acc_order; + + win->w_flavor = flavor; /* setup data that is independent of osc component */ group = comm->c_local_group; OBJ_RETAIN(group); - ompi_group_increment_proc_count(group); win->w_group = group; - return win; + *win_out = win; + + return OMPI_SUCCESS; } static int @@ -107,27 +187,27 @@ config_window(void *base, size_t size, int disp_unit, { int ret; - ret = ompi_attr_set_c(WIN_ATTR, win, &win->w_keyhash, + ret = ompi_attr_set_c(WIN_ATTR, win, &win->w_keyhash, MPI_WIN_BASE, base, true); if (OMPI_SUCCESS != ret) return ret; - ret = ompi_attr_set_fortran_mpi2(WIN_ATTR, win, - &win->w_keyhash, + ret = ompi_attr_set_fortran_mpi2(WIN_ATTR, win, + &win->w_keyhash, MPI_WIN_SIZE, size, true); if (OMPI_SUCCESS != ret) return ret; - ret = ompi_attr_set_fortran_mpi2(WIN_ATTR, win, - &win->w_keyhash, + ret = ompi_attr_set_fortran_mpi1(WIN_ATTR, win, + &win->w_keyhash, MPI_WIN_DISP_UNIT, disp_unit, true); if (OMPI_SUCCESS != ret) return ret; - ret = ompi_attr_set_fortran_mpi2(WIN_ATTR, win, + ret = ompi_attr_set_fortran_mpi1(WIN_ATTR, win, &win->w_keyhash, MPI_WIN_CREATE_FLAVOR, flavor, true); if (OMPI_SUCCESS != ret) return ret; - ret = ompi_attr_set_fortran_mpi2(WIN_ATTR, win, + ret = ompi_attr_set_fortran_mpi1(WIN_ATTR, win, &win->w_keyhash, MPI_WIN_MODEL, model, true); if (OMPI_SUCCESS != ret) return ret; @@ -139,7 +219,7 @@ config_window(void *base, size_t size, int disp_unit, } int -ompi_win_create(void *base, size_t size, +ompi_win_create(void *base, size_t size, int disp_unit, ompi_communicator_t *comm, ompi_info_t *info, ompi_win_t** newwin) @@ -148,8 +228,10 @@ ompi_win_create(void *base, size_t size, int model; int ret; - win = alloc_window(comm); - if (NULL == win) return OMPI_ERR_OUT_OF_RESOURCE; + ret = alloc_window (comm, info, MPI_WIN_FLAVOR_CREATE, &win); + if (OMPI_SUCCESS != ret) { + return ret; + } ret = ompi_osc_base_select(win, &base, size, disp_unit, comm, info, MPI_WIN_FLAVOR_CREATE, &model); if (OMPI_SUCCESS != ret) { @@ -178,8 +260,10 @@ ompi_win_allocate(size_t size, int disp_unit, ompi_info_t *info, int ret; void *base; - win = alloc_window(comm); - if (NULL == win) return OMPI_ERR_OUT_OF_RESOURCE; + ret = alloc_window (comm, info, MPI_WIN_FLAVOR_ALLOCATE, &win); + if (OMPI_SUCCESS != ret) { + return ret; + } ret = ompi_osc_base_select(win, &base, size, disp_unit, comm, info, MPI_WIN_FLAVOR_ALLOCATE, &model); if (OMPI_SUCCESS != ret) { @@ -209,8 +293,10 @@ ompi_win_allocate_shared(size_t size, int disp_unit, ompi_info_t *info, int ret; void *base; - win = alloc_window(comm); - if (NULL == win) return OMPI_ERR_OUT_OF_RESOURCE; + ret = alloc_window (comm, info, MPI_WIN_FLAVOR_SHARED, &win); + if (OMPI_SUCCESS != ret) { + return ret; + } ret = ompi_osc_base_select(win, &base, size, disp_unit, comm, info, MPI_WIN_FLAVOR_SHARED, &model); if (OMPI_SUCCESS != ret) { @@ -238,8 +324,10 @@ ompi_win_create_dynamic(ompi_info_t *info, ompi_communicator_t *comm, ompi_win_t int model; int ret; - win = alloc_window(comm); - if (NULL == win) return OMPI_ERR_OUT_OF_RESOURCE; + ret = alloc_window (comm, info, MPI_WIN_FLAVOR_DYNAMIC, &win); + if (OMPI_SUCCESS != ret) { + return ret; + } ret = ompi_osc_base_select(win, MPI_BOTTOM, 0, 1, comm, info, MPI_WIN_FLAVOR_DYNAMIC, &model); if (OMPI_SUCCESS != ret) { @@ -306,7 +394,6 @@ ompi_win_get_name(ompi_win_t *win, char *win_name, int *length) int ompi_win_group(ompi_win_t *win, ompi_group_t **group) { OBJ_RETAIN(win->w_group); - ompi_group_increment_proc_count(win->w_group); *group = win->w_group; return OMPI_SUCCESS; @@ -346,7 +433,6 @@ ompi_win_destruct(ompi_win_t *win) } if (NULL != win->w_group) { - ompi_group_decrement_proc_count(win->w_group); OBJ_RELEASE(win->w_group); } diff --git a/ompi/win/win.h b/ompi/win/win.h index 7a8baec0d18..bd49bb69279 100644 --- a/ompi/win/win.h +++ b/ompi/win/win.h @@ -1,23 +1,23 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ -/* +/* * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. * Copyright (c) 2004-2007 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2006-2012 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2009 Sun Microsystems, Inc. All rights reserved. - * Copyright (c) 2013 Los Alamos National Security, LLC. All rights + * Copyright (c) 2013-2015 Los Alamos National Security, LLC. All rights * reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -41,6 +41,34 @@ BEGIN_C_DECLS #define OMPI_WIN_FREED 0x00000001 #define OMPI_WIN_INVALID 0x00000002 #define OMPI_WIN_NO_LOCKS 0x00000004 +#define OMPI_WIN_SAME_DISP 0x00000008 +#define OMPI_WIN_SAME_SIZE 0x00000010 + +enum ompi_win_accumulate_ops_t { + OMPI_WIN_ACCUMULATE_OPS_SAME_OP_NO_OP, + OMPI_WIN_ACCUMULATE_OPS_SAME_OP, +}; +typedef enum ompi_win_accumulate_ops_t ompi_win_accumulate_ops_t; + +/** + * Accumulate ordering flags. The default accumulate ordering in + * MPI-3.1 is rar,war,raw,waw. + */ +enum ompi_win_accumulate_order_flags_t { + /** no accumulate ordering (may valid with any other flag) */ + OMPI_WIN_ACC_ORDER_NONE = 0x01, + /** read-after-read ordering */ + OMPI_WIN_ACC_ORDER_RAR = 0x02, + /** write-after-read ordering */ + OMPI_WIN_ACC_ORDER_WAR = 0x04, + /** read-after-write ordering */ + OMPI_WIN_ACC_ORDER_RAW = 0x08, + /** write-after-write ordering */ + OMPI_WIN_ACC_ORDER_WAW = 0x10, +}; + +OMPI_DECLSPEC extern mca_base_var_enum_t *ompi_win_accumulate_ops; +OMPI_DECLSPEC extern mca_base_var_enum_flag_t *ompi_win_accumulate_order; OMPI_DECLSPEC extern opal_pointer_array_t ompi_mpi_windows; @@ -57,6 +85,12 @@ struct ompi_win_t { /* Information about the state of the window. */ uint16_t w_flags; + /** Window flavor */ + uint16_t w_flavor; + + /** Accumulate ops */ + ompi_win_accumulate_ops_t w_acc_ops; + /* Attributes */ opal_hash_table_t *w_keyhash; @@ -71,6 +105,9 @@ struct ompi_win_t { /* one sided interface */ ompi_osc_base_module_t *w_osc_module; + + /** Accumulate ordering (see ompi_win_accumulate_order_flags_t above) */ + int32_t w_acc_order; }; typedef struct ompi_win_t ompi_win_t; OMPI_DECLSPEC OBJ_CLASS_DECLARATION(ompi_win_t); @@ -94,7 +131,7 @@ OMPI_DECLSPEC extern ompi_predefined_win_t *ompi_mpi_win_null_addr; int ompi_win_init(void); int ompi_win_finalize(void); -int ompi_win_create(void *base, size_t size, int disp_unit, +int ompi_win_create(void *base, size_t size, int disp_unit, ompi_communicator_t *comm, ompi_info_t *info, ompi_win_t **newwin); int ompi_win_allocate(size_t size, int disp_unit, ompi_info_t *info, @@ -114,7 +151,7 @@ OMPI_DECLSPEC int ompi_win_group(ompi_win_t *win, ompi_group_t **group); to the defintion of an "invalid" communicator. See a big comment in ompi/communicator/communicator.h about this. */ static inline int ompi_win_invalid(ompi_win_t *win) { - if (NULL == win || + if (NULL == win || MPI_WIN_NULL == win || (OMPI_WIN_INVALID & win->w_flags) || (OMPI_WIN_FREED & win->w_flags)) { @@ -125,7 +162,7 @@ static inline int ompi_win_invalid(ompi_win_t *win) { } static inline int ompi_win_peer_invalid(ompi_win_t *win, int peer) { - if (win->w_group->grp_proc_count <= peer) return true; + if (win->w_group->grp_proc_count <= peer || peer < 0) return true; return false; } diff --git a/opal/Makefile.am b/opal/Makefile.am index dddfe4b6cb4..127ac4fa598 100644 --- a/opal/Makefile.am +++ b/opal/Makefile.am @@ -5,15 +5,15 @@ # Copyright (c) 2004-2009 The University of Tennessee and The University # of Tennessee Research Foundation. All rights # reserved. -# Copyright (c) 2004-2009 High Performance Computing Center Stuttgart, +# Copyright (c) 2004-2009 High Performance Computing Center Stuttgart, # University of Stuttgart. All rights reserved. # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. # Copyright (c) 2009-2015 Cisco Systems, Inc. All rights reserved. # $COPYRIGHT$ -# +# # Additional copyrights may follow -# +# # $HEADER$ # @@ -75,6 +75,7 @@ nobase_opal_HEADERS = $(headers) endif include class/Makefile.am +include errhandler/Makefile.am include memoryhooks/Makefile.am include runtime/Makefile.am include threads/Makefile.am diff --git a/opal/asm/Makefile.am b/opal/asm/Makefile.am index e056a80171d..73eebed27d6 100644 --- a/opal/asm/Makefile.am +++ b/opal/asm/Makefile.am @@ -5,15 +5,17 @@ # Copyright (c) 2004-2005 The University of Tennessee and The University # of Tennessee Research Foundation. All rights # reserved. -# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, # University of Stuttgart. All rights reserved. # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. # Copyright (c) 2011-2014 Cisco Systems, Inc. All rights reserved. +# Copyright (c) 2017 Research Organization for Information Science +# and Technology (RIST). All rights reserved. # $COPYRIGHT$ -# +# # Additional copyrights may follow -# +# # $HEADER$ # @@ -63,8 +65,7 @@ EXTRA_DIST = \ generate-all-asm.pl \ base/aix.conf \ base/default.conf \ - base/ALPHA.asm \ - base/AMD64.asm \ + base/X86_64.asm \ base/ARM.asm \ base/IA32.asm \ base/IA64.asm \ diff --git a/opal/asm/asm-data.txt b/opal/asm/asm-data.txt index 3f597fd8bfe..198c9f6c886 100644 --- a/opal/asm/asm-data.txt +++ b/opal/asm/asm-data.txt @@ -5,15 +5,17 @@ # Copyright (c) 2004-2005 The University of Tennessee and The University # of Tennessee Research Foundation. All rights # reserved. -# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, # University of Stuttgart. All rights reserved. # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. # Copyright (c) 2014 Intel, Inc. All rights reserved. +# Copyright (c) 2017 Research Organization for Information Science +# and Technology (RIST). All rights reserved. # $COPYRIGHT$ -# +# # Additional copyrights may follow -# +# # $HEADER$ # @@ -28,23 +30,14 @@ # Assembly Format field: # config_file-text-global-label_suffix-gsym-lsym-type-size-align_log-ppc_r_reg-64_bit-gnu_stack -###################################################################### -# -# Alpha -# -###################################################################### - -ALPHA default-.text-.globl-:--$-@-1-1-1-1-1 alpha-linux - - ###################################################################### # # AMD Opteron / Intel EM64T # ###################################################################### -AMD64 default-.text-.globl-:--.L-@-1-0-1-1-1 amd64-linux -AMD64 default-.text-.globl-:--.L-@-1-0-1-1-0 amd64-linux-nongas +X86_64 default-.text-.globl-:--.L-@-1-0-1-1-1 x86_64-linux +X86_64 default-.text-.globl-:--.L-@-1-0-1-1-0 x86_64-linux-nongas ###################################################################### @@ -81,7 +74,7 @@ IA64 default-.text-.globl-:--.L-@-1-0-1-1-0 ia64-linux-nongas ###################################################################### # -# PowerPC / POWER +# PowerPC / POWER # ###################################################################### diff --git a/opal/asm/asm.c b/opal/asm/asm.c index ded09c26d23..766f50f394c 100644 --- a/opal/asm/asm.c +++ b/opal/asm/asm.c @@ -5,14 +5,14 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -30,7 +30,7 @@ /* have to fix if you change LOCKS_TABLE_SIZE */ static opal_atomic_lock_t locks_table[LOCKS_TABLE_SIZE] = { - { { OPAL_ATOMIC_UNLOCKED } }, + { { OPAL_ATOMIC_UNLOCKED } }, { { OPAL_ATOMIC_UNLOCKED } }, { { OPAL_ATOMIC_UNLOCKED } }, { { OPAL_ATOMIC_UNLOCKED } }, diff --git a/opal/asm/base/ALPHA.asm b/opal/asm/base/ALPHA.asm deleted file mode 100644 index a16f5b1367b..00000000000 --- a/opal/asm/base/ALPHA.asm +++ /dev/null @@ -1,225 +0,0 @@ - .set noreorder - .set volatile - .set noat - .set nomacro - .text - .align 2 - .align 4 - .globl opal_atomic_mb - .ent opal_atomic_mb -$opal_atomic_mb..ng: -opal_atomic_mb: - .eflag 48 - .frame $30,0,$26,0 - .prologue 0 - .set macro - mb - .set nomacro - ret $31,($26),1 - .end opal_atomic_mb - .align 2 - .align 4 - .globl opal_atomic_rmb - .ent opal_atomic_rmb -$opal_atomic_rmb..ng: -opal_atomic_rmb: - .eflag 48 - .frame $30,0,$26,0 - .prologue 0 - .set macro - mb - .set nomacro - ret $31,($26),1 - .end opal_atomic_rmb - .align 2 - .align 4 - .globl opal_atomic_wmb - .ent opal_atomic_wmb -$opal_atomic_wmb..ng: -opal_atomic_wmb: - .eflag 48 - .frame $30,0,$26,0 - .prologue 0 - .set macro - wmb - .set nomacro - ret $31,($26),1 - .end opal_atomic_wmb - .align 2 - .align 4 - .globl opal_atomic_cmpset_32 - .ent opal_atomic_cmpset_32 -$opal_atomic_cmpset_32..ng: -opal_atomic_cmpset_32: - .eflag 48 - .frame $30,0,$26,0 - .prologue 0 - .set macro - 1: ldl_l $0, 0($16) - cmpeq $0, $17, $0 - beq $0, 2f - mov $18, $0 - stl_c $0, 0($16) - beq $0, 1b - jmp 3f -2: mov $31, $0 -3: - - .set nomacro - addl $31,$0,$0 - ret $31,($26),1 - .end opal_atomic_cmpset_32 - .align 2 - .align 4 - .globl opal_atomic_cmpset_acq_32 - .ent opal_atomic_cmpset_acq_32 -$opal_atomic_cmpset_acq_32..ng: -opal_atomic_cmpset_acq_32: - .eflag 48 - .frame $30,0,$26,0 - .prologue 0 - .set macro - 1: ldl_l $0, 0($16) - cmpeq $0, $17, $0 - beq $0, 2f - mov $18, $0 - stl_c $0, 0($16) - beq $0, 1b - jmp 3f -2: mov $31, $0 -3: - - .set nomacro - addl $31,$0,$0 - .set macro - mb - .set nomacro - ret $31,($26),1 - .end opal_atomic_cmpset_acq_32 - .align 2 - .align 4 - .globl opal_atomic_cmpset_rel_32 - .ent opal_atomic_cmpset_rel_32 -$opal_atomic_cmpset_rel_32..ng: -opal_atomic_cmpset_rel_32: - .eflag 48 - .frame $30,0,$26,0 - .prologue 0 - .set macro - wmb - 1: ldl_l $0, 0($16) - cmpeq $0, $17, $0 - beq $0, 2f - mov $18, $0 - stl_c $0, 0($16) - beq $0, 1b - jmp 3f -2: mov $31, $0 -3: - - .set nomacro - addl $31,$0,$0 - ret $31,($26),1 - .end opal_atomic_cmpset_rel_32 - .align 2 - .align 4 - .globl opal_atomic_cmpset_64 - .ent opal_atomic_cmpset_64 -$opal_atomic_cmpset_64..ng: -opal_atomic_cmpset_64: - .eflag 48 - .frame $30,0,$26,0 - .prologue 0 - .set macro - 1: ldq_l $0, 0($16) - cmpeq $0, $17, $0 - beq $0, 2f - mov $18, $0 - stq_c $0, 0($16) - beq $0, 1b - jmp 3f -2: mov $31, $0 -3: - - .set nomacro - addl $31,$0,$0 - ret $31,($26),1 - .end opal_atomic_cmpset_64 - .align 2 - .align 4 - .globl opal_atomic_cmpset_acq_64 - .ent opal_atomic_cmpset_acq_64 -$opal_atomic_cmpset_acq_64..ng: -opal_atomic_cmpset_acq_64: - .eflag 48 - .frame $30,0,$26,0 - .prologue 0 - .set macro - 1: ldq_l $0, 0($16) - cmpeq $0, $17, $0 - beq $0, 2f - mov $18, $0 - stq_c $0, 0($16) - beq $0, 1b - jmp 3f -2: mov $31, $0 -3: - - .set nomacro - addl $31,$0,$0 - .set macro - mb - .set nomacro - ret $31,($26),1 - .end opal_atomic_cmpset_acq_64 - .align 2 - .align 4 - .globl opal_atomic_cmpset_rel_64 - .ent opal_atomic_cmpset_rel_64 -$opal_atomic_cmpset_rel_64..ng: -opal_atomic_cmpset_rel_64: - .eflag 48 - .frame $30,0,$26,0 - .prologue 0 - .set macro - wmb - 1: ldq_l $0, 0($16) - cmpeq $0, $17, $0 - beq $0, 2f - mov $18, $0 - stq_c $0, 0($16) - beq $0, 1b - jmp 3f -2: mov $31, $0 -3: - - .set nomacro - addl $31,$0,$0 - ret $31,($26),1 - .end opal_atomic_cmpset_rel_64 - .align 2 - .align 4 - .globl opal_sys_timer_get_cycles - .ent opal_sys_timer_get_cycles - $opal_sys_timer_get_cycles..ng: - opal_sys_timer_get_cycles: - .eflag 48 - .frame $30,0,$26,0 - .prologue 0 - .set macro - wmb - 1: ldq_l $0, 0($16) - cmpeq $0, $17, $0 - beq $0, 2f - mov $18, $0 - stq_c $0, 0($16) - beq $0, 1b - jmp 3f - 2: mov $31, $0 - 3: - - .set nomacro - rpcc $0 - ret - .end opal_sys_timer_get_cycles - diff --git a/opal/asm/base/AMD64.asm b/opal/asm/base/AMD64.asm deleted file mode 100644 index 827998b79b3..00000000000 --- a/opal/asm/base/AMD64.asm +++ /dev/null @@ -1,52 +0,0 @@ -START_FILE - TEXT - -START_FUNC(opal_atomic_mb) - pushq %rbp - movq %rsp, %rbp - leave - ret -END_FUNC(opal_atomic_mb) - - -START_FUNC(opal_atomic_rmb) - pushq %rbp - movq %rsp, %rbp - leave - ret -END_FUNC(opal_atomic_rmb) - - -START_FUNC(opal_atomic_wmb) - pushq %rbp - movq %rsp, %rbp - leave - ret -END_FUNC(opal_atomic_wmb) - - -START_FUNC(opal_atomic_cmpset_32) - movl %esi, %eax - lock; cmpxchgl %edx,(%rdi) - sete %dl - movzbl %dl, %eax - ret -END_FUNC(opal_atomic_cmpset_32) - - -START_FUNC(opal_atomic_cmpset_64) - movq %rsi, %rax - lock; cmpxchgq %rdx,(%rdi) - sete %dl - movzbl %dl, %eax - ret -END_FUNC(opal_atomic_cmpset_64) - - -START_FUNC(opal_sys_timer_get_cycles) - rdtsc - salq $32, %rdx - mov %eax, %eax - orq %rdx, %rax - ret -END_FUNC(opal_sys_timer_get_cycles) diff --git a/opal/asm/base/IA32.asm b/opal/asm/base/IA32.asm index b487ca4b07c..f82b9ce2d15 100644 --- a/opal/asm/base/IA32.asm +++ b/opal/asm/base/IA32.asm @@ -61,12 +61,12 @@ START_FUNC(opal_atomic_cmpset_64) movl -32(%ebp), %esi movl -28(%ebp), %ecx movl %ebx, %eax - push %ebx - movl %esi, %ebx + push %ebx + movl %esi, %ebx lock; cmpxchg8b (%edi) - sete %dl - pop %ebx - + sete %dl + pop %ebx + movzbl %dl, %eax movl -12(%ebp), %ebx movl -8(%ebp), %esi diff --git a/opal/asm/base/MIPS.asm b/opal/asm/base/MIPS.asm index 3dd758c0b48..0a82a173dbc 100644 --- a/opal/asm/base/MIPS.asm +++ b/opal/asm/base/MIPS.asm @@ -6,7 +6,7 @@ START_FILE #include #endif #include - + TEXT ALIGN(8) @@ -17,11 +17,11 @@ LEAF(opal_atomic_mb) sync #ifdef __linux__ .set mips0 -#endif +#endif j ra END(opal_atomic_mb) - + ALIGN(8) LEAF(opal_atomic_rmb) #ifdef __linux__ @@ -33,8 +33,8 @@ LEAF(opal_atomic_rmb) #endif j ra END(opal_atomic_rmb) - - + + LEAF(opal_atomic_wmb) #ifdef __linux__ .set mips2 @@ -48,54 +48,54 @@ END(opal_atomic_wmb) LEAF(opal_atomic_cmpset_32) - .set noreorder -retry1: + .set noreorder +retry1: #ifdef __linux__ .set mips2 #endif - ll $3, 0($4) + ll $3, 0($4) #ifdef __linux__ .set mips0 #endif - bne $3, $5, done1 - or $2, $6, 0 + bne $3, $5, done1 + or $2, $6, 0 #ifdef __linux__ .set mips2 #endif - sc $2, 0($4) + sc $2, 0($4) #ifdef __linux__ .set mips0 #endif beqz $2, retry1 -done1: +done1: xor $3,$3,$5 j ra sltu $2,$3,1 - .set reorder + .set reorder END(opal_atomic_cmpset_32) LEAF(opal_atomic_cmpset_acq_32) - .set noreorder -retry2: + .set noreorder +retry2: #ifdef __linux__ .set mips2 #endif - ll $3, 0($4) + ll $3, 0($4) #ifdef __linux__ .set mips0 #endif - bne $3, $5, done2 - or $2, $6, 0 + bne $3, $5, done2 + or $2, $6, 0 #ifdef __linux__ .set mips2 #endif - sc $2, 0($4) + sc $2, 0($4) #ifdef __linux__ .set mips0 #endif - beqz $2, retry2 -done2: + beqz $2, retry2 +done2: #ifdef __linux__ .set mips2 #endif @@ -106,12 +106,12 @@ done2: xor $3,$3,$5 j ra sltu $2,$3,1 - .set reorder + .set reorder END(opal_atomic_cmpset_acq_32) - + LEAF(opal_atomic_cmpset_rel_32) - .set noreorder + .set noreorder #ifdef __linux__ .set mips2 #endif @@ -119,78 +119,78 @@ LEAF(opal_atomic_cmpset_rel_32) #ifdef __linux__ .set mips0 #endif -retry3: +retry3: #ifdef __linux__ .set mips2 #endif - ll $3, 0($4) + ll $3, 0($4) #ifdef __linux__ .set mips0 #endif - bne $3, $5, done3 - or $2, $6, 0 + bne $3, $5, done3 + or $2, $6, 0 #ifdef __linux__ .set mips2 #endif - sc $2, 0($4) + sc $2, 0($4) #ifdef __linux__ .set mips0 #endif - beqz $2, retry3 -done3: + beqz $2, retry3 +done3: xor $3,$3,$5 j ra sltu $2,$3,1 - .set reorder + .set reorder END(opal_atomic_cmpset_rel_32) - -#ifdef __mips64 + +#ifdef __mips64 LEAF(opal_atomic_cmpset_64) - .set noreorder -retry4: - lld $3, 0($4) - bne $3, $5, done4 - or $2, $6, 0 - scd $2, 0($4) - beqz $2, retry4 -done4: + .set noreorder +retry4: + lld $3, 0($4) + bne $3, $5, done4 + or $2, $6, 0 + scd $2, 0($4) + beqz $2, retry4 +done4: xor $3,$3,$5 j ra sltu $2,$3,1 - .set reorder + .set reorder END(opal_atomic_cmpset_64) LEAF(opal_atomic_cmpset_acq_64) - .set noreorder -retry5: - lld $3, 0($4) - bne $3, $5, done5 - or $2, $6, 0 - scd $2, 0($4) - beqz $2, retry5 -done5: + .set noreorder +retry5: + lld $3, 0($4) + bne $3, $5, done5 + or $2, $6, 0 + scd $2, 0($4) + beqz $2, retry5 +done5: sync xor $3,$3,$5 j ra sltu $2,$3,1 - .set reorder + .set reorder END(opal_atomic_cmpset_acq_64) LEAF(opal_atomic_cmpset_rel_64) - .set noreorder + .set noreorder sync -retry6: - lld $3, 0($4) - bne $3, $5, done6 - or $2, $6, 0 - scd $2, 0($4) - beqz $2, retry6 -done6: +retry6: + lld $3, 0($4) + bne $3, $5, done6 + or $2, $6, 0 + scd $2, 0($4) + beqz $2, retry6 +done6: xor $3,$3,$5 j ra sltu $2,$3,1 - .set reorder + .set reorder END(opal_atomic_cmpset_rel_64) #endif /* __mips64 */ diff --git a/opal/asm/base/POWERPC32.asm b/opal/asm/base/POWERPC32.asm index 66b7315299d..6939fef8f86 100644 --- a/opal/asm/base/POWERPC32.asm +++ b/opal/asm/base/POWERPC32.asm @@ -21,10 +21,10 @@ END_FUNC(opal_atomic_wmb) START_FUNC(opal_atomic_cmpset_32) - LSYM(1) lwarx r0, 0, r3 - cmpw 0, r0, r4 + LSYM(1) lwarx r0, 0, r3 + cmpw 0, r0, r4 bne- REFLSYM(2) - stwcx. r5, 0, r3 + stwcx. r5, 0, r3 bne- REFLSYM(1) LSYM(2) xor r3,r0,r4 @@ -35,12 +35,12 @@ END_FUNC(opal_atomic_cmpset_32) START_FUNC(opal_atomic_cmpset_acq_32) - LSYM(3) lwarx r0, 0, r3 - cmpw 0, r0, r4 - bne- REFLSYM(4) - stwcx. r5, 0, r3 + LSYM(3) lwarx r0, 0, r3 + cmpw 0, r0, r4 + bne- REFLSYM(4) + stwcx. r5, 0, r3 bne- REFLSYM(3) - sync + sync LSYM(4) xor r3,r0,r4 subfic r5,r3,0 @@ -52,12 +52,12 @@ END_FUNC(opal_atomic_cmpset_acq_32) START_FUNC(opal_atomic_cmpset_rel_32) eieio - LSYM(5) lwarx r0, 0, r3 - cmpw 0, r0, r4 + LSYM(5) lwarx r0, 0, r3 + cmpw 0, r0, r4 bne- REFLSYM(6) - stwcx. r5, 0, r3 + stwcx. r5, 0, r3 bne- REFLSYM(5) - sync + sync LSYM(6) xor r3,r0,r4 subfic r5,r3,0 @@ -73,9 +73,9 @@ START_FUNC(opal_atomic_cmpset_64) stw r7,-20(r1) ld r5,-32(r1) ld r7,-24(r1) - LSYM(7) ldarx r9, 0, r3 - cmpd 0, r9, r5 - bne- REFLSYM(8) + LSYM(7) ldarx r9, 0, r3 + cmpd 0, r9, r5 + bne- REFLSYM(8) stdcx. r7, 0, r3 bne- REFLSYM(7) LSYM(8) @@ -94,10 +94,10 @@ START_FUNC(opal_atomic_cmpset_acq_64) ld r5,-32(r1) ld r7,-24(r1) - LSYM(9) ldarx r9, 0, r3 + LSYM(9) ldarx r9, 0, r3 cmpd 0, r9, r5 - bne- REFLSYM(10) - stdcx. r7, 0, r3 + bne- REFLSYM(10) + stdcx. r7, 0, r3 bne- REFLSYM(9) LSYM(10) xor r3,r5,r9 @@ -118,10 +118,10 @@ START_FUNC(opal_atomic_cmpset_rel_64) ld r7,-24(r1) eieio - LSYM(11) ldarx r9, 0, r3 - cmpd 0, r9, r5 - bne- REFLSYM(12) - stdcx. r7, 0, r3 + LSYM(11) ldarx r9, 0, r3 + cmpd 0, r9, r5 + bne- REFLSYM(12) + stdcx. r7, 0, r3 bne- REFLSYM(11) LSYM(12) xor r3,r5,r9 @@ -135,9 +135,9 @@ END_FUNC(opal_atomic_cmpset_rel_64) START_FUNC(opal_atomic_add_32) - LSYM(13) lwarx r0, 0, r3 - add r0, r4, r0 - stwcx. r0, 0, r3 + LSYM(13) lwarx r0, 0, r3 + add r0, r4, r0 + stwcx. r0, 0, r3 bne- REFLSYM(13) mr r3,r0 blr @@ -146,9 +146,9 @@ END_FUNC(opal_atomic_add_32) START_FUNC(opal_atomic_sub_32) LSYM(14) lwarx r0,0,r3 - subf r0,r4,r0 - stwcx. r0,0,r3 - bne- REFLSYM(14) + subf r0,r4,r0 + stwcx. r0,0,r3 + bne- REFLSYM(14) mr r3,r0 blr END_FUNC(opal_atomic_sub_32) diff --git a/opal/asm/base/POWERPC64.asm b/opal/asm/base/POWERPC64.asm index 5e86e2d4abc..28da3f4d8e0 100644 --- a/opal/asm/base/POWERPC64.asm +++ b/opal/asm/base/POWERPC64.asm @@ -21,10 +21,10 @@ END_FUNC(opal_atomic_wmb) START_FUNC(opal_atomic_cmpset_32) - LSYM(1) lwarx r0, 0, r3 - cmpw 0, r0, r4 - bne- REFLSYM(2) - stwcx. r5, 0, r3 + LSYM(1) lwarx r0, 0, r3 + cmpw 0, r0, r4 + bne- REFLSYM(2) + stwcx. r5, 0, r3 bne- REFLSYM(1) LSYM(2) cmpw cr7,r0,r4 @@ -32,7 +32,7 @@ START_FUNC(opal_atomic_cmpset_32) rlwinm r3,r3,31,1 blr END_FUNC(opal_atomic_cmpset_32) - + START_FUNC(opal_atomic_cmpset_acq_32) mflr r0 @@ -77,10 +77,10 @@ END_FUNC(opal_atomic_cmpset_rel_32) START_FUNC(opal_atomic_cmpset_64) - LSYM(3) ldarx r0, 0, r3 - cmpd 0, r0, r4 + LSYM(3) ldarx r0, 0, r3 + cmpd 0, r0, r4 bne- REFLSYM(4) - stdcx. r5, 0, r3 + stdcx. r5, 0, r3 bne- REFLSYM(3) LSYM(4) xor r3,r4,r0 @@ -121,11 +121,11 @@ END_FUNC(opal_atomic_cmpset_rel_64) START_FUNC(opal_atomic_add_32) - LSYM(5) lwarx r0, 0, r3 - add r0, r4, r0 - stwcx. r0, 0, r3 + LSYM(5) lwarx r0, 0, r3 + add r0, r4, r0 + stwcx. r0, 0, r3 bne- REFLSYM(5) - + mr r3,r0 blr END_FUNC(opal_atomic_add_32) @@ -133,10 +133,10 @@ END_FUNC(opal_atomic_add_32) START_FUNC(opal_atomic_sub_32) LSYM(6) lwarx r0,0,r3 - subf r0,r4,r0 - stwcx. r0,0,r3 + subf r0,r4,r0 + stwcx. r0,0,r3 bne- REFLSYM(6) - + mr r3,r0 blr END_FUNC(opal_atomic_sub_32) diff --git a/opal/asm/base/SPARCV9_32.asm b/opal/asm/base/SPARCV9_32.asm index b39f9ed98ad..eb004a80653 100644 --- a/opal/asm/base/SPARCV9_32.asm +++ b/opal/asm/base/SPARCV9_32.asm @@ -3,7 +3,7 @@ START_FILE ALIGN(4) - + START_FUNC(opal_atomic_mb) !#PROLOGUE# 0 !#PROLOGUE# 1 @@ -76,10 +76,10 @@ START_FUNC(opal_atomic_cmpset_64) st %i1, [%fp-32] st %i2, [%fp-28] std %o4, [%fp-24] - ldx [%fp-24], %g1 - ldx [%fp-32], %g2 - casxa [%i0] 0x80, %g2, %g1 - stx %g1, [%fp-24] + ldx [%fp-24], %g1 + ldx [%fp-32], %g2 + casxa [%i0] 0x80, %g2, %g1 + stx %g1, [%fp-24] ld [%fp-24], %i5 ld [%fp-32], %g1 @@ -107,10 +107,10 @@ START_FUNC(opal_atomic_cmpset_acq_64) mov %i4, %o3 std %o4, [%fp-32] std %o2, [%fp-24] - ldx [%fp-24], %g1 - ldx [%fp-32], %g2 - casxa [%i0] 0x80, %g2, %g1 - stx %g1, [%fp-24] + ldx [%fp-24], %g1 + ldx [%fp-32], %g2 + casxa [%i0] 0x80, %g2, %g1 + stx %g1, [%fp-24] ld [%fp-24], %i5 ld [%fp-32], %g1 @@ -140,10 +140,10 @@ START_FUNC(opal_atomic_cmpset_rel_64) membar #StoreStore std %o4, [%fp-32] std %o2, [%fp-24] - ldx [%fp-24], %g1 - ldx [%fp-32], %g2 - casxa [%i0] 0x80, %g2, %g1 - stx %g1, [%fp-24] + ldx [%fp-24], %g1 + ldx [%fp-32], %g2 + casxa [%i0] 0x80, %g2, %g1 + stx %g1, [%fp-24] ld [%fp-24], %i5 ld [%fp-32], %g1 diff --git a/opal/asm/base/SPARCV9_64.asm b/opal/asm/base/SPARCV9_64.asm index 72b4557b0cc..9820ab34ce1 100644 --- a/opal/asm/base/SPARCV9_64.asm +++ b/opal/asm/base/SPARCV9_64.asm @@ -3,7 +3,7 @@ START_FILE ALIGN(4) - + START_FUNC(opal_atomic_mb) !#PROLOGUE# 0 !#PROLOGUE# 1 diff --git a/opal/asm/base/X86_64.asm b/opal/asm/base/X86_64.asm new file mode 100644 index 00000000000..2468b638f64 --- /dev/null +++ b/opal/asm/base/X86_64.asm @@ -0,0 +1,52 @@ +START_FILE + TEXT + +START_FUNC(opal_atomic_mb) + pushq %rbp + movq %rsp, %rbp + leave + ret +END_FUNC(opal_atomic_mb) + + +START_FUNC(opal_atomic_rmb) + pushq %rbp + movq %rsp, %rbp + leave + ret +END_FUNC(opal_atomic_rmb) + + +START_FUNC(opal_atomic_wmb) + pushq %rbp + movq %rsp, %rbp + leave + ret +END_FUNC(opal_atomic_wmb) + + +START_FUNC(opal_atomic_cmpset_32) + movl %esi, %eax + lock; cmpxchgl %edx,(%rdi) + sete %dl + movzbl %dl, %eax + ret +END_FUNC(opal_atomic_cmpset_32) + + +START_FUNC(opal_atomic_cmpset_64) + movq %rsi, %rax + lock; cmpxchgq %rdx,(%rdi) + sete %dl + movzbl %dl, %eax + ret +END_FUNC(opal_atomic_cmpset_64) + + +START_FUNC(opal_sys_timer_get_cycles) + rdtsc + salq $32, %rdx + mov %eax, %eax + orq %rdx, %rax + ret +END_FUNC(opal_sys_timer_get_cycles) diff --git a/opal/asm/base/default.conf b/opal/asm/base/default.conf index 8f764a311ee..c54f085cf99 100644 --- a/opal/asm/base/default.conf +++ b/opal/asm/base/default.conf @@ -10,7 +10,7 @@ sub start_func($) my $ret = ""; $ret = "\t$GLOBAL $GSYM$func_name\n"; - if (! $TYPE eq "") { + if (! $TYPE eq "") { $ret .= "\t.type $GSYM$func_name, $TYPE" . "function\n"; } $ret .= "$GSYM$func_name$SUFFIX\n"; diff --git a/opal/asm/generate-asm.pl b/opal/asm/generate-asm.pl index c2f9a55f0e2..6c904a77f36 100644 --- a/opal/asm/generate-asm.pl +++ b/opal/asm/generate-asm.pl @@ -2,9 +2,9 @@ # # Copyright (c) 2014 Cisco Systems, Inc. All rights reserved. # $COPYRIGHT$ -# +# # Additional copyrights may follow -# +# # $HEADER$ # @@ -14,12 +14,12 @@ my $basedir = shift; my $output = shift; -if ( ! $asmarch) { +if ( ! $asmarch) { print "usage: generate-asm.pl [ASMARCH] [ASMFORMAT] [BASEDIR] [OUTPUT NAME]\n"; exit(1); } -open(INPUT, "$basedir/$asmarch.asm") || +open(INPUT, "$basedir/$asmarch.asm") || die "Could not open $basedir/$asmarch.asm: $!\n"; open(OUTPUT, ">$output") || die "Could not open $output: $!\n"; @@ -100,9 +100,9 @@ $delete = 1; } } - if (/^\#END_64BIT/) { + if (/^\#END_64BIT/) { $_ = ""; - $delete = 0; + $delete = 0; } if ($delete == 0) { diff --git a/opal/class/Makefile.am b/opal/class/Makefile.am index 5b9776f0043..e98f955de8d 100644 --- a/opal/class/Makefile.am +++ b/opal/class/Makefile.am @@ -6,7 +6,7 @@ # Copyright (c) 2004-2007 The University of Tennessee and The University # of Tennessee Research Foundation. All rights # reserved. -# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, # University of Stuttgart. All rights reserved. # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. @@ -14,9 +14,9 @@ # Copyright (c) 2014-2015 Los Alamos National Security, LLC. All rights # reserved. # $COPYRIGHT$ -# +# # Additional copyrights may follow -# +# # $HEADER$ # diff --git a/opal/class/opal_bitmap.c b/opal/class/opal_bitmap.c index 530dc51f4d9..11d2a21bb38 100644 --- a/opal/class/opal_bitmap.c +++ b/opal/class/opal_bitmap.c @@ -5,7 +5,7 @@ * Copyright (c) 2004-2014 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. @@ -15,9 +15,9 @@ * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -37,12 +37,12 @@ static void opal_bitmap_construct(opal_bitmap_t *bm); static void opal_bitmap_destruct(opal_bitmap_t *bm); -OBJ_CLASS_INSTANCE(opal_bitmap_t, opal_object_t, +OBJ_CLASS_INSTANCE(opal_bitmap_t, opal_object_t, opal_bitmap_construct, opal_bitmap_destruct); -static void -opal_bitmap_construct(opal_bitmap_t *bm) +static void +opal_bitmap_construct(opal_bitmap_t *bm) { bm->bitmap = NULL; bm->array_size = 0; @@ -109,41 +109,41 @@ int opal_bitmap_set_bit(opal_bitmap_t *bm, int bit) { int index, offset, new_size; - + if ((bit < 0) || (NULL == bm) || (bit > bm->max_size)) { return OPAL_ERR_BAD_PARAM; } - + index = bit / SIZE_OF_BASE_TYPE; offset = bit % SIZE_OF_BASE_TYPE; - + if (index >= bm->array_size) { - + /* We need to allocate more space for the bitmap, since we are out of range. We don't throw any error here, because this is valid and we simply expand the bitmap */ - + new_size = (int)(((size_t)index / bm->array_size + 1 ) * bm->array_size); if( new_size > bm->max_size ) new_size = bm->max_size; - + /* New size is just a multiple of the original size to fit in the index. */ bm->bitmap = (uint64_t*)realloc(bm->bitmap, new_size*sizeof(uint64_t)); if (NULL == bm->bitmap) { return OPAL_ERR_OUT_OF_RESOURCE; } - + /* zero out the new elements */ memset(&bm->bitmap[bm->array_size], 0, (new_size - bm->array_size) * sizeof(uint64_t)); - + /* Update the array_size */ bm->array_size = new_size; } - + /* Now set the bit */ bm->bitmap[index] |= (1UL << offset); - + return OPAL_SUCCESS; } @@ -152,14 +152,14 @@ int opal_bitmap_clear_bit(opal_bitmap_t *bm, int bit) { int index, offset; - + if ((bit < 0) || NULL == bm || (bit >= (bm->array_size * SIZE_OF_BASE_TYPE))) { return OPAL_ERR_BAD_PARAM; } - - index = bit / SIZE_OF_BASE_TYPE; + + index = bit / SIZE_OF_BASE_TYPE; offset = bit % SIZE_OF_BASE_TYPE; - + bm->bitmap[index] &= ~(1UL << offset); return OPAL_SUCCESS; } @@ -169,18 +169,18 @@ bool opal_bitmap_is_set_bit(opal_bitmap_t *bm, int bit) { int index, offset; - + if ((bit < 0) || NULL == bm || (bit >= (bm->array_size * SIZE_OF_BASE_TYPE))) { return false; } - - index = bit / SIZE_OF_BASE_TYPE; + + index = bit / SIZE_OF_BASE_TYPE; offset = bit % SIZE_OF_BASE_TYPE; - + if (0 != (bm->bitmap[index] & (1UL << offset))) { return true; } - + return false; } @@ -191,7 +191,7 @@ opal_bitmap_clear_all_bits(opal_bitmap_t *bm) if (NULL == bm) { return OPAL_ERR_BAD_PARAM; } - + memset(bm->bitmap, 0, bm->array_size * sizeof(uint64_t)); return OPAL_SUCCESS; } @@ -203,9 +203,9 @@ opal_bitmap_set_all_bits(opal_bitmap_t *bm) if (NULL == bm) { return OPAL_ERR_BAD_PARAM; } - + memset(bm->bitmap, 0xff, bm->array_size * sizeof(uint64_t)); - + return OPAL_SUCCESS; } @@ -215,25 +215,25 @@ opal_bitmap_find_and_set_first_unset_bit(opal_bitmap_t *bm, int *position) { int i = 0; uint64_t temp, all_ones = 0xffffffffffffffffUL; - + if (NULL == bm) { return OPAL_ERR_BAD_PARAM; } - + /* Neglect all which don't have an unset bit */ *position = 0; while((i < bm->array_size) && (bm->bitmap[i] == all_ones)) { ++i; } - + if (i == bm->array_size) { /* increase the bitmap size then */ *position = bm->array_size * SIZE_OF_BASE_TYPE; return opal_bitmap_set_bit(bm, *position); } - + /* This one has an unset bit, find its bit number */ - + temp = bm->bitmap[i]; bm->bitmap[i] |= (bm->bitmap[i] + 1); /* Set the first zero bit */ temp ^= bm->bitmap[i]; /* Compute the change: the first unset bit in the original number */ diff --git a/opal/class/opal_bitmap.h b/opal/class/opal_bitmap.h index 5f2515997d0..cda93888451 100644 --- a/opal/class/opal_bitmap.h +++ b/opal/class/opal_bitmap.h @@ -6,16 +6,16 @@ * Copyright (c) 2004-2014 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2007 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2010-2012 Oak Ridge National Labs. All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ * */ @@ -93,7 +93,7 @@ OPAL_DECLSPEC int opal_bitmap_init (opal_bitmap_t *bm, int size); * @return OPAL error code or success * */ -OPAL_DECLSPEC int opal_bitmap_set_bit(opal_bitmap_t *bm, int bit); +OPAL_DECLSPEC int opal_bitmap_set_bit(opal_bitmap_t *bm, int bit); /** @@ -130,8 +130,8 @@ OPAL_DECLSPEC bool opal_bitmap_is_set_bit(opal_bitmap_t *bm, int bit); * @return err OPAL_SUCCESS on success */ -OPAL_DECLSPEC int opal_bitmap_find_and_set_first_unset_bit(opal_bitmap_t *bm, - int *position); +OPAL_DECLSPEC int opal_bitmap_find_and_set_first_unset_bit(opal_bitmap_t *bm, + int *position); /** @@ -139,7 +139,7 @@ OPAL_DECLSPEC int opal_bitmap_find_and_set_first_unset_bit(opal_bitmap_t *bm, * * @param bitmap The input bitmap (IN) * @return OPAL error code if bm is NULL - * + * */ OPAL_DECLSPEC int opal_bitmap_clear_all_bits(opal_bitmap_t *bm); @@ -214,7 +214,7 @@ OPAL_DECLSPEC int opal_bitmap_bitwise_xor_inplace(opal_bitmap_t *dest, opal_bitm /** * If the bitmaps are different - * + * * @param left Pointer to a bitmap * @param right Pointer to another bitmap * @return true if different, false if the same diff --git a/opal/class/opal_fifo.h b/opal/class/opal_fifo.h index 8c866aa5b22..ad9cbdbcbb4 100644 --- a/opal/class/opal_fifo.h +++ b/opal/class/opal_fifo.h @@ -12,7 +12,7 @@ * All rights reserved. * Copyright (c) 2007 Voltaire All rights reserved. * Copyright (c) 2010 IBM Corporation. All rights reserved. - * Copyright (c) 2014-2015 Los Alamos National Security, LLC. All rights + * Copyright (c) 2014-2016 Los Alamos National Security, LLC. All rights * reseved. * $COPYRIGHT$ * @@ -101,7 +101,8 @@ static inline opal_list_item_t *opal_fifo_push_atomic (opal_fifo_t *fifo, if (&fifo->opal_fifo_ghost == tail.data.item) { /* update the head */ - fifo->opal_fifo_head.data.item = item; + opal_counted_pointer_t head = {.value = fifo->opal_fifo_head.value}; + opal_update_counted_pointer (&fifo->opal_fifo_head, head, item); } else { /* update previous item */ tail.data.item->opal_list_next = item; @@ -189,6 +190,8 @@ static inline opal_list_item_t *opal_fifo_push_atomic (opal_fifo_t *fifo, item->opal_list_next = &fifo->opal_fifo_ghost; + opal_atomic_wmb (); + /* try to get the tail */ tail_item = opal_atomic_swap_ptr (&fifo->opal_fifo_tail.data.item, item); @@ -214,6 +217,27 @@ static inline opal_list_item_t *opal_fifo_pop_atomic (opal_fifo_t *fifo) { opal_list_item_t *item, *next; +#if OPAL_HAVE_ATOMIC_LLSC_PTR + /* use load-linked store-conditional to avoid ABA issues */ + do { + item = opal_atomic_ll_ptr (&fifo->opal_fifo_head.data.item); + if (&fifo->opal_fifo_ghost == item) { + if (&fifo->opal_fifo_ghost == fifo->opal_fifo_tail.data.item) { + return NULL; + } + + /* fifo does not appear empty. wait for the fifo to be made + * consistent by conflicting thread. */ + continue; + } + + next = (opal_list_item_t *) item->opal_list_next; + if (opal_atomic_sc_ptr (&fifo->opal_fifo_head.data.item, next)) { + break; + } + } while (1); +#else + /* protect against ABA issues by "locking" the head */ do { if (opal_atomic_cmpset_32 ((int32_t *) &fifo->opal_fifo_head.data.counter, 0, 1)) { break; @@ -232,6 +256,7 @@ static inline opal_list_item_t *opal_fifo_pop_atomic (opal_fifo_t *fifo) next = (opal_list_item_t *) item->opal_list_next; fifo->opal_fifo_head.data.item = next; +#endif if (&fifo->opal_fifo_ghost == next) { if (!opal_atomic_cmpset_ptr (&fifo->opal_fifo_tail.data.item, item, &fifo->opal_fifo_ghost)) { diff --git a/opal/class/opal_free_list.c b/opal/class/opal_free_list.c index b1caf188f3a..dd686d30998 100644 --- a/opal/class/opal_free_list.c +++ b/opal/class/opal_free_list.c @@ -6,19 +6,19 @@ * Copyright (c) 2004-2009 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2006-2007 Mellanox Technologies. All rights reserved. * Copyright (c) 2010-2013 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2011 NVIDIA Corporation. All rights reserved. - * Copyright (c) 2012-2015 Los Alamos National Security, LLC. All rights + * Copyright (c) 2012-2016 Los Alamos National Security, LLC. All rights * reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -28,6 +28,9 @@ #include "opal/align.h" #include "opal/util/output.h" #include "opal/mca/mpool/mpool.h" +#include "opal/mca/mpool/base/base.h" +#include "opal/mca/rcache/rcache.h" +#include "opal/util/sys_limits.h" typedef struct opal_free_list_item_t opal_free_list_memory_t; @@ -49,17 +52,22 @@ static void opal_free_list_construct(opal_free_list_t* fl) fl->fl_payload_buffer_alignment = 0; fl->fl_frag_class = OBJ_CLASS(opal_free_list_item_t); fl->fl_mpool = NULL; + fl->fl_rcache = NULL; /* default flags */ - fl->fl_mpool_reg_flags = MCA_MPOOL_FLAGS_CACHE_BYPASS | - MCA_MPOOL_FLAGS_CUDA_REGISTER_MEM; + fl->fl_rcache_reg_flags = MCA_RCACHE_FLAGS_CACHE_BYPASS | + MCA_RCACHE_FLAGS_CUDA_REGISTER_MEM; fl->ctx = NULL; OBJ_CONSTRUCT(&(fl->fl_allocations), opal_list_t); } static void opal_free_list_allocation_release (opal_free_list_t *fl, opal_free_list_memory_t *fl_mem) { + if (NULL != fl->fl_rcache) { + fl->fl_rcache->rcache_deregister (fl->fl_rcache, fl_mem->registration); + } + if (NULL != fl->fl_mpool) { - fl->fl_mpool->mpool_free (fl->fl_mpool, fl_mem->ptr, fl_mem->registration); + fl->fl_mpool->mpool_free (fl->fl_mpool, fl_mem->ptr); } else if (fl_mem->ptr) { free (fl_mem->ptr); } @@ -108,8 +116,9 @@ int opal_free_list_init (opal_free_list_t *flist, size_t frag_size, size_t frag_ opal_class_t *frag_class, size_t payload_buffer_size, size_t payload_buffer_alignment, int num_elements_to_alloc, int max_elements_to_alloc, int num_elements_per_alloc, - mca_mpool_base_module_t* mpool, int mpool_reg_flags, - void *unused0, opal_free_list_item_init_fn_t item_init, void *ctx) + mca_mpool_base_module_t *mpool, int rcache_reg_flags, + mca_rcache_base_module_t *rcache, opal_free_list_item_init_fn_t item_init, + void *ctx) { /* alignment must be more than zero and power of two */ if (frag_alignment <= 1 || (frag_alignment & (frag_alignment - 1))) { @@ -121,6 +130,10 @@ int opal_free_list_init (opal_free_list_t *flist, size_t frag_size, size_t frag_ return OPAL_ERROR; } + if (frag_class && frag_size < frag_class->cls_sizeof) { + frag_size = frag_class->cls_sizeof; + } + if (frag_size > flist->fl_frag_size) { flist->fl_frag_size = frag_size; } @@ -133,11 +146,12 @@ int opal_free_list_init (opal_free_list_t *flist, size_t frag_size, size_t frag_ flist->fl_max_to_alloc = max_elements_to_alloc; flist->fl_num_allocated = 0; flist->fl_num_per_alloc = num_elements_per_alloc; - flist->fl_mpool = mpool; + flist->fl_mpool = mpool ? mpool : mca_mpool_base_default_module; + flist->fl_rcache = rcache; flist->fl_frag_alignment = frag_alignment; flist->fl_payload_buffer_alignment = payload_buffer_alignment; flist->item_init = item_init; - flist->fl_mpool_reg_flags |= mpool_reg_flags; + flist->fl_rcache_reg_flags |= rcache_reg_flags; flist->ctx = ctx; if (num_elements_to_alloc) { @@ -149,10 +163,10 @@ int opal_free_list_init (opal_free_list_t *flist, size_t frag_size, size_t frag_ int opal_free_list_grow_st (opal_free_list_t* flist, size_t num_elements) { - unsigned char *ptr, *mpool_alloc_ptr = NULL, *payload_ptr = NULL; + unsigned char *ptr, *payload_ptr = NULL; opal_free_list_memory_t *alloc_ptr; - size_t alloc_size, head_size, elem_size = 0; - mca_mpool_base_registration_t *reg = NULL; + size_t alloc_size, head_size, elem_size = 0, buffer_size, align; + mca_rcache_base_registration_t *reg = NULL; int rc = OPAL_SUCCESS; if (flist->fl_max_to_alloc && (flist->fl_num_allocated + num_elements) > @@ -164,9 +178,30 @@ int opal_free_list_grow_st (opal_free_list_t* flist, size_t num_elements) return OPAL_ERR_TEMP_OUT_OF_RESOURCE; } - head_size = (NULL == flist->fl_mpool) ? flist->fl_frag_size: - flist->fl_frag_class->cls_sizeof; - head_size = OPAL_ALIGN(head_size, flist->fl_frag_alignment, size_t); + head_size = OPAL_ALIGN(flist->fl_frag_size, flist->fl_frag_alignment, size_t); + + /* NTH: calculate allocation alignment first as it might change the number of elements */ + if (0 != flist->fl_payload_buffer_size) { + elem_size = OPAL_ALIGN(flist->fl_payload_buffer_size, + flist->fl_payload_buffer_alignment, size_t); + + /* elem_size should not be 0 here */ + assert (elem_size > 0); + + buffer_size = num_elements * elem_size; + align = flist->fl_payload_buffer_alignment; + + if (MCA_RCACHE_FLAGS_CUDA_REGISTER_MEM & flist->fl_rcache_reg_flags) { + size_t pagesize = opal_getpagesize (); + /* CUDA cannot handle registering overlapping regions, so make + * sure each region is page sized and page aligned. */ + align = OPAL_ALIGN(align, pagesize, size_t); + buffer_size = OPAL_ALIGN(buffer_size, pagesize, size_t); + + /* avoid wasting space in the buffer */ + num_elements = buffer_size / elem_size; + } + } /* calculate head allocation size */ alloc_size = num_elements * head_size + sizeof(opal_free_list_memory_t) + @@ -178,37 +213,23 @@ int opal_free_list_grow_st (opal_free_list_t* flist, size_t num_elements) } if (0 != flist->fl_payload_buffer_size) { - elem_size = OPAL_ALIGN(flist->fl_payload_buffer_size, - flist->fl_payload_buffer_alignment, size_t); - - /* elem_size should not be 0 here */ - assert (elem_size > 0); - /* allocate the rest from the mpool (or use memalign/malloc) */ - if(flist->fl_mpool != NULL) { - payload_ptr = mpool_alloc_ptr = - (unsigned char *) flist->fl_mpool->mpool_alloc(flist->fl_mpool, - num_elements * elem_size, - flist->fl_payload_buffer_alignment, - flist->fl_mpool_reg_flags, ®); - } else { -#ifdef HAVE_POSIX_MEMALIGN - posix_memalign ((void **) &mpool_alloc_ptr, flist->fl_payload_buffer_alignment, - num_elements * elem_size); - payload_ptr = mpool_alloc_ptr; -#else - mpool_alloc_ptr = (unsigned char *) malloc (num_elements * elem_size + - flist->fl_payload_buffer_alignment); - payload_ptr = (unsigned char *) OPAL_ALIGN((uintptr_t)mpool_alloc_ptr, - flist->fl_payload_buffer_alignment, - uintptr_t); -#endif - } - - if(NULL == mpool_alloc_ptr) { + payload_ptr = (unsigned char *) flist->fl_mpool->mpool_alloc(flist->fl_mpool, buffer_size, align, 0); + if (NULL == payload_ptr) { free(alloc_ptr); return OPAL_ERR_TEMP_OUT_OF_RESOURCE; } + + if (flist->fl_rcache) { + rc = flist->fl_rcache->rcache_register (flist->fl_rcache, payload_ptr, num_elements * elem_size, + flist->fl_rcache_reg_flags, MCA_RCACHE_ACCESS_ANY, ®); + if (OPAL_UNLIKELY(OPAL_SUCCESS != rc)) { + free (alloc_ptr); + flist->fl_mpool->mpool_free (flist->fl_mpool, payload_ptr); + + return rc; + } + } } /* make the alloc_ptr a list item, save the chunk in the allocations list, @@ -217,7 +238,7 @@ int opal_free_list_grow_st (opal_free_list_t* flist, size_t num_elements) opal_list_append(&(flist->fl_allocations), (opal_list_item_t*)alloc_ptr); alloc_ptr->registration = reg; - alloc_ptr->ptr = mpool_alloc_ptr; + alloc_ptr->ptr = payload_ptr; ptr = (unsigned char*)alloc_ptr + sizeof(opal_free_list_memory_t); ptr = OPAL_ALIGN_PTR(ptr, flist->fl_frag_alignment, unsigned char*); diff --git a/opal/class/opal_free_list.h b/opal/class/opal_free_list.h index 496c3ed8625..3a196141cc1 100644 --- a/opal/class/opal_free_list.h +++ b/opal/class/opal_free_list.h @@ -6,7 +6,7 @@ * Copyright (c) 2004-2013 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. @@ -15,9 +15,9 @@ * Copyright (c) 2014-2015 Los Alamos National Security, LLC. All rights * reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -77,6 +77,8 @@ struct opal_free_list_t { /** mpool to use for free list buffer allocation (posix_memalign/malloc * are used if this is NULL) */ struct mca_mpool_base_module_t *fl_mpool; + /** registration cache */ + struct mca_rcache_base_module_t *fl_rcache; /** Multi-threaded lock. Used when the free list is empty. */ opal_mutex_t fl_lock; /** Multi-threaded condition. Used when threads are waiting on free @@ -84,8 +86,8 @@ struct opal_free_list_t { opal_condition_t fl_condition; /** List of free list allocation */ opal_list_t fl_allocations; - /** Flags to pass to the mpool register function */ - int fl_mpool_reg_flags; + /** Flags to pass to the rcache register function */ + int fl_rcache_reg_flags; /** Free list item initialization function */ opal_free_list_item_init_fn_t item_init; /** Initialization function context */ @@ -96,12 +98,12 @@ OPAL_DECLSPEC OBJ_CLASS_DECLARATION(opal_free_list_t); struct mca_mpool_base_registration_t; struct opal_free_list_item_t -{ - opal_list_item_t super; - struct mca_mpool_base_registration_t *registration; +{ + opal_list_item_t super; + struct mca_rcache_base_registration_t *registration; void *ptr; -}; -typedef struct opal_free_list_item_t opal_free_list_item_t; +}; +typedef struct opal_free_list_item_t opal_free_list_item_t; OPAL_DECLSPEC OBJ_CLASS_DECLARATION(opal_free_list_item_t); @@ -118,12 +120,12 @@ OPAL_DECLSPEC OBJ_CLASS_DECLARATION(opal_free_list_item_t); * @param max_elements_to_alloc (IN) Maximum number of elements to allocate. * @param num_elements_per_alloc (IN) Number of elements to grow by per allocation. * @param mpool (IN) Optional memory pool for allocations. - * @param mpool_reg_flags (IN) Flags to pass to mpool registration function. - * @param unused0 (IN) Future. Must be NULL. + * @param rcache_reg_flags (IN) Flags to pass to rcache registration function. + * @param rcache (IN) Optional registration cache. * @param item_init (IN) Optional item initialization function * @param ctx (IN) Initialization function context. */ - + OPAL_DECLSPEC int opal_free_list_init (opal_free_list_t *free_list, size_t frag_size, size_t frag_alignment, @@ -134,8 +136,8 @@ OPAL_DECLSPEC int opal_free_list_init (opal_free_list_t *free_list, int max_elements_to_alloc, int num_elements_per_alloc, struct mca_mpool_base_module_t *mpool, - int mpool_reg_flags, - void *unused0, + int rcache_reg_flags, + struct mca_rcache_base_module_t *rcache, opal_free_list_item_init_fn_t item_init, void *ctx); @@ -174,13 +176,13 @@ OPAL_DECLSPEC int opal_free_list_resize_mt (opal_free_list_t *flist, size_t size /** - * Attemp to obtain an item from a free list. + * Attemp to obtain an item from a free list. * * @param fl (IN) Free list. * @param item (OUT) Allocated item. * - * If the requested item is not available the free list is grown to - * accomodate the request - unless the max number of allocations has + * If the requested item is not available the free list is grown to + * accomodate the request - unless the max number of allocations has * been reached. If this is the case - a NULL pointer is returned * to the caller. This function comes in three flavor: thread safe * (opal_free_list_get_mt), single threaded (opal_free_list_get_st), @@ -233,8 +235,8 @@ static inline opal_free_list_item_t *opal_free_list_get (opal_free_list_t *flist * @param fl (IN) Free list. * @param item (OUT) Allocated item. * - * If the requested item is not available the free list is grown to - * accomodate the request - unless the max number of allocations has + * If the requested item is not available the free list is grown to + * accomodate the request - unless the max number of allocations has * been reached. In this case the caller is blocked until an item * is returned to the list. */ @@ -306,7 +308,7 @@ static inline opal_free_list_item_t *opal_free_list_wait (opal_free_list_t *fl) } /** - * Return an item to a free list. + * Return an item to a free list. * * @param fl (IN) Free list. * @param item (OUT) Allocated item. @@ -361,5 +363,5 @@ static inline void opal_free_list_return (opal_free_list_t *flist, opal_free_list_return (fl, item) END_C_DECLS -#endif +#endif diff --git a/opal/class/opal_graph.c b/opal/class/opal_graph.c index e27af310a28..77089f4dfaf 100644 --- a/opal/class/opal_graph.c +++ b/opal/class/opal_graph.c @@ -1,3 +1,4 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ /* * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana * University Research and Technology @@ -5,15 +6,17 @@ * Copyright (c) 2004-2012 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2007 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2007 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2007 Voltaire All rights reserved. + * Copyright (c) 2016 Los Alamos National Security, LLC. All rights + * reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -149,13 +152,7 @@ static void opal_graph_construct(opal_graph_t *graph) static void opal_graph_destruct(opal_graph_t *graph) { - opal_adjacency_list_t *aj_list; - - while (false == opal_list_is_empty(graph->adjacency_list)) { - aj_list = (opal_adjacency_list_t *)opal_list_remove_first(graph->adjacency_list); - OBJ_RELEASE(aj_list); - } - OBJ_RELEASE(graph->adjacency_list); + OPAL_LIST_RELEASE(graph->adjacency_list); graph->number_of_vertices = 0; graph->number_of_edges = 0; } @@ -174,21 +171,14 @@ static void opal_adjacency_list_construct(opal_adjacency_list_t *aj_list) static void opal_adjacency_list_destruct(opal_adjacency_list_t *aj_list) { - opal_graph_edge_t *edge; - aj_list->vertex = NULL; - while (false == opal_list_is_empty(aj_list->edges)) { - edge = (opal_graph_edge_t *)opal_list_remove_first(aj_list->edges); - OBJ_RELEASE(edge); - } - OBJ_RELEASE(aj_list->edges); - + OPAL_LIST_RELEASE(aj_list->edges); } /** * This function deletes all the edges that are connected *to* a * vertex. - * + * * @param graph * @param vertex */ @@ -230,7 +220,7 @@ static void delete_all_edges_conceded_to_vertex(opal_graph_t *graph, opal_graph_ /** * This graph API adds a vertex to graph. The most common use * for this API is while building a graph. - * + * * @param graph The graph that the vertex will be added to. * @param vertex The vertex we want to add. */ @@ -256,7 +246,7 @@ void opal_graph_add_vertex(opal_graph_t *graph, opal_graph_vertex_t *vertex) aj_list->vertex = vertex; /* point the vertex to the adjacency list of the vertex (for easy searching) */ vertex->in_adj_list = aj_list; - /* Append the new creates adjacency list to the graph */ + /* Append the new creates adjacency list to the graph */ opal_list_append(graph->adjacency_list, (opal_list_item_t*)aj_list); /* point the vertex to the graph it belongs to (mostly for debug uses)*/ vertex->in_graph = graph; @@ -269,14 +259,14 @@ void opal_graph_add_vertex(opal_graph_t *graph, opal_graph_vertex_t *vertex) * This graph API adds an edge (connection between two * vertices) to a graph. The most common use * for this API is while building a graph. - * + * * @param graph The graph that this edge will be added to. * @param edge The edge that we want to add. - * + * * @return int Success or error. this API can return an error if * one of the vertices is not in the graph. */ -int opal_graph_add_edge(opal_graph_t *graph, opal_graph_edge_t *edge) +int opal_graph_add_edge(opal_graph_t *graph, opal_graph_edge_t *edge) { opal_adjacency_list_t *aj_list, *start_aj_list= NULL; opal_list_item_t *item; @@ -322,7 +312,7 @@ int opal_graph_add_edge(opal_graph_t *graph, opal_graph_edge_t *edge) * graph. while removing vertices from a graph, we should also * remove the connections from and to the vertices that we are * removing. - * + * * @param graph The graph that this edge will be remove from. * @param edge the edge that we want to remove. */ @@ -339,7 +329,7 @@ void opal_graph_remove_edge (opal_graph_t *graph, opal_graph_edge_t *edge) * This graph API remove a vertex from graph. The most common * use for this API is while distracting a graph or while * removing relevant vertices from a graph. - * + * * @param graph The graph that the vertex will be remove from. * @param vertex The vertex we want to remove. */ @@ -347,16 +337,10 @@ void opal_graph_remove_edge (opal_graph_t *graph, opal_graph_edge_t *edge) void opal_graph_remove_vertex(opal_graph_t *graph, opal_graph_vertex_t *vertex) { opal_adjacency_list_t *adj_list; - opal_graph_edge_t *edge; - /** - * remove all the edges of this vertex and destruct them. - */ + /* do not need to remove all the edges of this vertex and destruct them as + * they will be released in the destructor for adj_list */ adj_list = vertex->in_adj_list; - while (false == opal_list_is_empty(adj_list->edges)) { - edge = (opal_graph_edge_t *)opal_list_remove_first(adj_list->edges); - OBJ_RELEASE(edge); - } /** * remove the adjscency list of this vertex from the graph and * destruct it. @@ -375,11 +359,11 @@ void opal_graph_remove_vertex(opal_graph_t *graph, opal_graph_vertex_t *vertex) /** * This graph API tell us if two vertices are adjacent - * + * * @param graph The graph that the vertices belongs to. * @param vertex1 first vertex. * @param vertex2 second vertex. - * + * * @return uint32_t the weight of the connection between the two * vertices or infinity if the vertices are not * connected. @@ -432,9 +416,9 @@ uint32_t opal_graph_adjacent(opal_graph_t *graph, opal_graph_vertex_t *vertex1, /** * This Graph API returns the order of the graph (number of * vertices) - * + * * @param graph - * + * * @return int */ int opal_graph_get_order(opal_graph_t *graph) @@ -447,7 +431,7 @@ int opal_graph_get_order(opal_graph_t *graph) * edges) * * @param graph - * + * * @return int */ int opal_graph_get_size(opal_graph_t *graph) @@ -461,7 +445,7 @@ int opal_graph_get_size(opal_graph_t *graph) * @param graph the graph we searching in. * @param vertex_data the vertex data we are searching according * to. - * + * * @return opal_graph_vertex_t* The vertex founded or NULL. */ opal_graph_vertex_t *opal_graph_find_vertex(opal_graph_t *graph, void *vertex_data) @@ -492,12 +476,12 @@ opal_graph_vertex_t *opal_graph_find_vertex(opal_graph_t *graph, void *vertex_da /** * This graph API returns an array of pointers of all the * vertices in the graph. - * - * + * + * * @param graph * @param vertices_list an array of pointers of all the * vertices in the graph vertices. - * + * * @return int returning the graph order (the * number of vertices in the returned array) */ @@ -528,20 +512,20 @@ int opal_graph_get_graph_vertices(opal_graph_t *graph, opal_pointer_array_t *ver /** * This graph API returns all the adjacents of a vertex and the * distance (weight) of those adjacents and the vertex. - * + * * @param graph * @param vertex The reference vertex * @param adjacents An allocated pointer array of vertices and * their distance from the reference vertex. * Note that this pointer should be free after * usage by the user - * + * * @return int the number of adjacents in the list. */ int opal_graph_get_adjacent_vertices(opal_graph_t *graph, opal_graph_vertex_t *vertex, opal_value_array_t *adjacents) { opal_adjacency_list_t *adj_list; - opal_graph_edge_t *edge; + opal_graph_edge_t *edge; int adjacents_number; opal_list_item_t *item; vertex_distance_from_t distance_from; @@ -576,11 +560,11 @@ int opal_graph_get_adjacent_vertices(opal_graph_t *graph, opal_graph_vertex_t *v /** * This graph API finds the shortest path between two vertices. - * + * * @param graph * @param vertex1 The start vertex. * @param vertex2 The end vertex. - * + * * @return uint32_t the distance between the two vertices. */ @@ -632,10 +616,10 @@ uint32_t opal_graph_spf(opal_graph_t *graph, opal_graph_vertex_t *vertex1, opal_ * Compare the distance between two vertex distance items. this * function is used for sorting an array of vertices distance by * qsort function. - * + * * @param item1 a void pointer to vertex distance structure * @param item2 a void pointer to vertex distance structure - * + * * @return int 1 - the first item weight is higher then the * second item weight. 0 - the weights are equal. -1 - * the second item weight is higher the the first item @@ -666,12 +650,12 @@ static int compare_vertex_distance(const void *item1, const void *item2) * This graph API returns the distance (weight) from a reference * vertex to all other vertices in the graph using the Dijkstra * algorithm - * + * * @param graph * @param vertex The reference vertex. * @param distance_array An array of vertices and * their distance from the reference vertex. - * + * * @return uint32_t the size of the distance array */ uint32_t opal_graph_dijkstra(opal_graph_t *graph, opal_graph_vertex_t *vertex, opal_value_array_t *distance_array) @@ -758,7 +742,7 @@ uint32_t opal_graph_dijkstra(opal_graph_t *graph, opal_graph_vertex_t *vertex, o * This graph API duplicates a graph. Note that this API does * not copy the graph but builds a new graph while coping just * the vertex data. - * + * * @param dest The new created graph. * @param src The graph we want to duplicate. */ diff --git a/opal/class/opal_graph.h b/opal/class/opal_graph.h index 68743728f3b..47b5f54bdb9 100644 --- a/opal/class/opal_graph.h +++ b/opal/class/opal_graph.h @@ -5,24 +5,24 @@ * Copyright (c) 2004-2006 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2007 Voltaire All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ /** - * @file + * @file * The opal_graph interface is used to provide a generic graph infrastructure * to Open-MPI. The graph is represented as an adjacentcy list. * The graph is a list of vertices. The graph is a weighted directional graph. - * Each vertex contains a pointer to a vertex data. - * This pointer can point to the structure that this vertex belongs to. + * Each vertex contains a pointer to a vertex data. + * This pointer can point to the structure that this vertex belongs to. */ #ifndef OPAL_GRAPH_H #define OPAL_GRAPH_H @@ -58,7 +58,7 @@ OPAL_DECLSPEC OBJ_CLASS_DECLARATION(opal_graph_t); * * @param dst The destination pointer of vertex_data * @param src The source pointer of the vertex_data - * + * * */ typedef void (*opal_graph_copy_vertex_data)(void **dst, void *src); @@ -66,7 +66,7 @@ typedef void (*opal_graph_copy_vertex_data)(void **dst, void *src); /** * free vertex data. * @param vertex_data - * + * * The vertex data can point to the structure that this vertex * belongs to. */ @@ -79,8 +79,8 @@ typedef void *(*opal_graph_alloc_vertex_data)(void); /** * Compare two vertices data. - * - *@param vertex_data1 + * + *@param vertex_data1 *@param vertex_data2 * *@return int The comparition results. 1- vertex_data1 is bigger @@ -92,7 +92,7 @@ typedef int (*opal_graph_compare_vertex_data)(void *vertex_data1, void *vertex_ /** * print a vertex data. - * + * * @param vertex_data */ typedef char *(*opal_graph_print_vertex)(void *vertex_data); @@ -127,7 +127,7 @@ typedef struct opal_graph_vertex_t opal_graph_vertex_t; */ struct opal_adjacency_list_t { opal_list_item_t super; /* A pointer to vertex parent */ - opal_graph_vertex_t *vertex; /* The adjacency_list is for adjacent of this vertex */ + opal_graph_vertex_t *vertex; /* The adjacency_list is for adjacent of this vertex */ opal_list_t *edges; /* An edge list for all the adjacent and their weights */ }; @@ -172,7 +172,7 @@ struct opal_graph_t { /** * A type for graph class */ -typedef struct opal_graph_t opal_graph_t; +typedef struct opal_graph_t opal_graph_t; /** * This structure represent the distance (weight) of a vertex @@ -191,7 +191,7 @@ typedef struct vertex_distance_from_t vertex_distance_from_t; /** * This graph API adds a vertex to graph. The most common use * for this API is while building a graph. - * + * * @param graph The graph that the vertex will be added to. * @param vertex The vertex we want to add. */ @@ -201,7 +201,7 @@ OPAL_DECLSPEC void opal_graph_add_vertex(opal_graph_t *graph, opal_graph_vertex_ * This graph API remove a vertex from graph. The most common * use for this API is while distracting a graph or while * removing relevant vertices from a graph. - * + * * @param graph The graph that the vertex will be remove from. * @param vertex The vertex we want to remove. */ @@ -211,14 +211,14 @@ OPAL_DECLSPEC void opal_graph_remove_vertex(opal_graph_t *graph, opal_graph_vert * This graph API adds an edge (connection between two * vertices) to a graph. The most common use * for this API is while building a graph. - * + * * @param graph The graph that this edge will be added to. * @param edge The edge that we want to add. - * + * * @return int Success or error. this API can return an error if * one of the vertices is not in the graph. */ -OPAL_DECLSPEC int opal_graph_add_edge(opal_graph_t *graph, opal_graph_edge_t *edge); +OPAL_DECLSPEC int opal_graph_add_edge(opal_graph_t *graph, opal_graph_edge_t *edge); /** * This graph API removes an edge (a connection between two @@ -227,7 +227,7 @@ OPAL_DECLSPEC int opal_graph_add_edge(opal_graph_t *graph, opal_graph_edge_t *ed * graph. while removing vertices from a graph, we should also * remove the connections from and to the vertices that we are * removing. - * + * * @param graph The graph that this edge will be remove from. * @param edge the edge that we want to remove. */ @@ -235,11 +235,11 @@ OPAL_DECLSPEC void opal_graph_remove_edge (opal_graph_t *graph, opal_graph_edge_ /** * This graph API tell us if two vertices are adjacent - * + * * @param graph The graph that the vertices belongs to. * @param vertex1 first vertex. * @param vertex2 second vertex. - * + * * @return uint32_t the weight of the connection between the two * vertices or infinity if the vertices are not * connected. @@ -249,9 +249,9 @@ OPAL_DECLSPEC uint32_t opal_graph_adjacent(opal_graph_t *graph, opal_graph_verte /** * This Graph API returns the order of the graph (number of * vertices) - * + * * @param graph - * + * * @return int */ OPAL_DECLSPEC int opal_graph_get_order(opal_graph_t *graph); @@ -261,7 +261,7 @@ OPAL_DECLSPEC int opal_graph_get_order(opal_graph_t *graph); * edges) * * @param graph - * + * * @return int */ OPAL_DECLSPEC int opal_graph_get_size(opal_graph_t *graph); @@ -272,7 +272,7 @@ OPAL_DECLSPEC int opal_graph_get_size(opal_graph_t *graph); * @param graph the graph we searching in. * @param vertex_data the vertex data we are searching according * to. - * + * * @return opal_graph_vertex_t* The vertex founded or NULL. */ OPAL_DECLSPEC opal_graph_vertex_t *opal_graph_find_vertex(opal_graph_t *graph, void *vertex_data); @@ -281,12 +281,12 @@ OPAL_DECLSPEC opal_graph_vertex_t *opal_graph_find_vertex(opal_graph_t *graph, v /** * This graph API returns an array of pointers of all the * vertices in the graph. - * - * + * + * * @param graph * @param vertices_list an array of pointers of all the * vertices in the graph vertices. - * + * * @return int returning the graph order (the * number of vertices in the returned array) */ @@ -295,14 +295,14 @@ OPAL_DECLSPEC int opal_graph_get_graph_vertices(opal_graph_t *graph, opal_pointe /** * This graph API returns all the adjacent of a vertex and the * distance (weight) of those adjacent and the vertex. - * + * * @param graph * @param vertex The reference vertex * @param adjacent An allocated pointer array of vertices and * their distance from the reference vertex. * Note that this pointer should be free after * usage by the user - * + * * @return int the number of adjacent in the list. */ OPAL_DECLSPEC int opal_graph_get_adjacent_vertices(opal_graph_t *graph, opal_graph_vertex_t *vertex, opal_value_array_t *adjacent); @@ -311,7 +311,7 @@ OPAL_DECLSPEC int opal_graph_get_adjacent_vertices(opal_graph_t *graph, opal_gra * This graph API duplicates a graph. Note that this API does * not copy the graph but builds a new graph while coping just * the vertices data. - * + * * @param dest The new created graph. * @param src The graph we want to duplicate. */ @@ -319,11 +319,11 @@ OPAL_DECLSPEC void opal_graph_duplicate(opal_graph_t **dest, opal_graph_t *src); /** * This graph API finds the shortest path between two vertices. - * + * * @param graph * @param vertex1 The start vertex. * @param vertex2 The end vertex. - * + * * @return uint32_t the distance between the two vertices. */ OPAL_DECLSPEC uint32_t opal_graph_spf(opal_graph_t *graph, opal_graph_vertex_t *vertex1, opal_graph_vertex_t *vertex2); @@ -332,12 +332,12 @@ OPAL_DECLSPEC uint32_t opal_graph_spf(opal_graph_t *graph, opal_graph_vertex_t * * This graph API returns the distance (weight) from a reference * vertex to all other vertices in the graph using the Dijkstra * algorithm - * + * * @param graph * @param vertex The reference vertex. * @param distance_array An array of vertices and * their distance from the reference vertex. - * + * * @return uint32_t the size of the distance array */ OPAL_DECLSPEC uint32_t opal_graph_dijkstra(opal_graph_t *graph, opal_graph_vertex_t *vertex, opal_value_array_t *distance_array); diff --git a/opal/class/opal_hash_table.c b/opal/class/opal_hash_table.c index dcc246efa5e..628440dc1fa 100644 --- a/opal/class/opal_hash_table.c +++ b/opal/class/opal_hash_table.c @@ -5,7 +5,7 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. @@ -16,9 +16,9 @@ * Copyright (c) 2014 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -33,9 +33,9 @@ /* * opal_hash_table_t - * + * * Sketch: [Contributed by David Linden of Hewlett-Packard] - * + * * This has been found to be good for search and insert and * (seldom-)remove, all with probablistic O(1) time. Having a good * distribution of the hash indices is important, so even if you know @@ -55,7 +55,7 @@ * because searching will eventually find an invalid element. At * maximum density, assuming random usage of the elements, the * expected search length is 1/(1-density); for a density of 1/2, this - * is 2. + * is 2. * * I believe this blinded bucket/element scheme is actually more * storage-efficient than a bucket having a linear list of elements. @@ -87,7 +87,7 @@ #define HASH_MULTIPLIER 31 -/* +/* * Define the structs that are opaque in the .h */ @@ -108,7 +108,7 @@ typedef struct opal_hash_element_t opal_hash_element_t; struct opal_hash_type_methods_t { /* Frees any storage associated with the element * The value is not owned by the hash table - * The key,key_size of pointer keys is + * The key,key_size of pointer keys is */ void (*elt_destructor)(opal_hash_element_t * elt); /* Hash the key of the element -- for growing and adjusting-after-removal */ @@ -121,7 +121,7 @@ static void opal_hash_table_construct(opal_hash_table_t* ht); static void opal_hash_table_destruct(opal_hash_table_t* ht); OBJ_CLASS_INSTANCE( - opal_hash_table_t, + opal_hash_table_t, opal_object_t, opal_hash_table_construct, opal_hash_table_destruct @@ -144,11 +144,11 @@ opal_hash_table_destruct(opal_hash_table_t* ht) free(ht->ht_table); } -/* +/* * Init, etc */ -static size_t +static size_t opal_hash_round_capacity_up(size_t capacity) { /* round up to (1 mod 30) */ @@ -200,7 +200,7 @@ opal_hash_table_remove_all(opal_hash_table_t* ht) ht->ht_size = 0; /* the tests reuse the hash table for different types after removing all */ /* so we should allow that by forgetting what type it used to be */ - ht->ht_type_methods = NULL; + ht->ht_type_methods = NULL; return OPAL_SUCCESS; } @@ -212,10 +212,10 @@ opal_hash_grow(opal_hash_table_t * ht) opal_hash_element_t* new_table; size_t old_capacity; size_t new_capacity; - + old_table = ht->ht_table; old_capacity = ht->ht_capacity; - + new_capacity = old_capacity * ht->ht_growth_numer / ht->ht_growth_denom; new_capacity = opal_hash_round_capacity_up(new_capacity); @@ -317,13 +317,13 @@ opal_hash_table_remove_elt_at(opal_hash_table_t * ht, size_t ii) /***************************************************************************/ -static uint64_t +static uint64_t opal_hash_hash_elt_uint32(opal_hash_element_t * elt) { return elt->key.u32; } -static const struct opal_hash_type_methods_t +static const struct opal_hash_type_methods_t opal_hash_type_methods_uint32 = { NULL, opal_hash_hash_elt_uint32 @@ -450,13 +450,13 @@ opal_hash_table_remove_value_uint32(opal_hash_table_t * ht, uint32_t key) /***************************************************************************/ -static uint64_t +static uint64_t opal_hash_hash_elt_uint64(opal_hash_element_t * elt) { return elt->key.u64; } -static const struct opal_hash_type_methods_t +static const struct opal_hash_type_methods_t opal_hash_type_methods_uint64 = { NULL, opal_hash_hash_elt_uint64 @@ -584,13 +584,13 @@ opal_hash_table_remove_value_uint64(opal_hash_table_t * ht, uint64_t key) /***************************************************************************/ /* helper function used in several places */ -static uint64_t +static uint64_t opal_hash_hash_key_ptr(const void * key, size_t key_size) { uint64_t hash; const unsigned char *scanner; size_t ii; - + hash = 0; scanner = (const unsigned char *)key; for (ii = 0; ii < key_size; ii += 1) { @@ -601,7 +601,7 @@ opal_hash_hash_key_ptr(const void * key, size_t key_size) /* ptr methods */ -static void +static void opal_hash_destruct_elt_ptr(opal_hash_element_t * elt) { elt->key.ptr.key_size = 0; @@ -612,20 +612,20 @@ opal_hash_destruct_elt_ptr(opal_hash_element_t * elt) } } -static uint64_t +static uint64_t opal_hash_hash_elt_ptr(opal_hash_element_t * elt) { return opal_hash_hash_key_ptr(elt->key.ptr.key, elt->key.ptr.key_size); } -static const struct opal_hash_type_methods_t +static const struct opal_hash_type_methods_t opal_hash_type_methods_ptr = { opal_hash_destruct_elt_ptr, opal_hash_hash_elt_ptr }; int /* OPAL_ return code */ -opal_hash_table_get_value_ptr(opal_hash_table_t * ht, +opal_hash_table_get_value_ptr(opal_hash_table_t * ht, const void * key, size_t key_size, void * *value) { @@ -663,8 +663,8 @@ opal_hash_table_get_value_ptr(opal_hash_table_t * ht, } int /* OPAL_ return code */ -opal_hash_table_set_value_ptr(opal_hash_table_t * ht, - const void * key, size_t key_size, +opal_hash_table_set_value_ptr(opal_hash_table_t * ht, + const void * key, size_t key_size, void * value) { int rc; @@ -707,7 +707,7 @@ opal_hash_table_set_value_ptr(opal_hash_table_t * ht, } else if (elt->key.ptr.key_size == key_size && 0 == memcmp(elt->key.ptr.key, key, key_size)) { /* replace existing value */ - elt->value = value; + elt->value = value; return OPAL_SUCCESS; } else { /* keep looking */ @@ -716,7 +716,7 @@ opal_hash_table_set_value_ptr(opal_hash_table_t * ht, } int /* OPAL_ return code */ -opal_hash_table_remove_value_ptr(opal_hash_table_t * ht, +opal_hash_table_remove_value_ptr(opal_hash_table_t * ht, const void * key, size_t key_size) { size_t ii, capacity = ht->ht_capacity; @@ -755,7 +755,7 @@ opal_hash_table_remove_value_ptr(opal_hash_table_t * ht, /* Traversals */ static int /* OPAL_ return code */ -opal_hash_table_get_next_elt(opal_hash_table_t *ht, +opal_hash_table_get_next_elt(opal_hash_table_t *ht, opal_hash_element_t * prev_elt, /* NULL means find first */ opal_hash_element_t * *next_elt) { @@ -773,16 +773,16 @@ opal_hash_table_get_next_elt(opal_hash_table_t *ht, } int /* OPAL_ return code */ -opal_hash_table_get_first_key_uint32(opal_hash_table_t * ht, - uint32_t *key, void * *value, +opal_hash_table_get_first_key_uint32(opal_hash_table_t * ht, + uint32_t *key, void * *value, void * *node) { return opal_hash_table_get_next_key_uint32(ht, key, value, NULL, node); } int /* OPAL_ return code */ -opal_hash_table_get_next_key_uint32(opal_hash_table_t * ht, - uint32_t *key, void * *value, +opal_hash_table_get_next_key_uint32(opal_hash_table_t * ht, + uint32_t *key, void * *value, void * in_node, void * *out_node) { opal_hash_element_t * elt; @@ -796,16 +796,16 @@ opal_hash_table_get_next_key_uint32(opal_hash_table_t * ht, } int /* OPAL_ return code */ -opal_hash_table_get_first_key_ptr(opal_hash_table_t * ht, - void * *key, size_t *key_size, void * *value, +opal_hash_table_get_first_key_ptr(opal_hash_table_t * ht, + void * *key, size_t *key_size, void * *value, void * *node) { return opal_hash_table_get_next_key_ptr(ht, key, key_size, value, NULL, node); } int /* OPAL_ return code */ -opal_hash_table_get_next_key_ptr(opal_hash_table_t * ht, - void * *key, size_t *key_size, void * *value, +opal_hash_table_get_next_key_ptr(opal_hash_table_t * ht, + void * *key, size_t *key_size, void * *value, void * in_node, void * *out_node) { opal_hash_element_t * elt; @@ -820,16 +820,16 @@ opal_hash_table_get_next_key_ptr(opal_hash_table_t * ht, } int /* OPAL_ return code */ -opal_hash_table_get_first_key_uint64(opal_hash_table_t * ht, - uint64_t *key, void * *value, +opal_hash_table_get_first_key_uint64(opal_hash_table_t * ht, + uint64_t *key, void * *value, void * *node) { return opal_hash_table_get_next_key_uint64(ht, key, value, NULL, node); } int /* OPAL_ return code */ -opal_hash_table_get_next_key_uint64(opal_hash_table_t * ht, - uint64_t *key, void * *value, +opal_hash_table_get_next_key_uint64(opal_hash_table_t * ht, + uint64_t *key, void * *value, void * in_node, void * *out_node) { opal_hash_element_t * elt; @@ -849,7 +849,7 @@ static void opal_proc_table_construct(opal_proc_table_t* pt); static void opal_proc_table_destruct(opal_proc_table_t* pt); OBJ_CLASS_INSTANCE( - opal_proc_table_t, + opal_proc_table_t, opal_hash_table_t, opal_proc_table_construct, opal_proc_table_destruct @@ -866,7 +866,7 @@ opal_proc_table_destruct(opal_proc_table_t* pt) { } -/* +/* * Init, etc */ @@ -886,7 +886,7 @@ int opal_proc_table_remove_all(opal_proc_table_t *pt) { void * node; rc = opal_hash_table_get_first_key_uint32(&pt->super, &jobid, (void **)&vpids, &node); - + if (OPAL_SUCCESS == rc) { do { if (NULL != vpids) { @@ -901,7 +901,7 @@ int opal_proc_table_remove_all(opal_proc_table_t *pt) { return rc; } -int opal_proc_table_get_value(opal_proc_table_t* pt, opal_process_name_t key, +int opal_proc_table_get_value(opal_proc_table_t* pt, opal_process_name_t key, void** ptr) { int rc; opal_hash_table_t * vpids; diff --git a/opal/class/opal_hash_table.h b/opal/class/opal_hash_table.h index ddf29a1b91f..c621883f238 100644 --- a/opal/class/opal_hash_table.h +++ b/opal/class/opal_hash_table.h @@ -5,7 +5,7 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. @@ -17,14 +17,14 @@ * Copyright (c) 2014 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ * */ -/** @file +/** @file * * A hash table that may be indexed with either fixed length * (e.g. uint32_t/uint64_t) or arbitrary size binary key @@ -37,16 +37,14 @@ #include "opal_config.h" -#ifdef HAVE_STDINT_H #include -#endif #include "opal/class/opal_list.h" #include "opal/util/proc.h" BEGIN_C_DECLS OPAL_DECLSPEC OBJ_CLASS_DECLARATION(opal_hash_table_t); - + struct opal_hash_table_t { opal_object_t super; /**< subclass of opal_object_t */ @@ -60,14 +58,14 @@ struct opal_hash_table_t }; typedef struct opal_hash_table_t opal_hash_table_t; - - + + /** * Initializes the table size, must be called before using * the table. * * @param table The input hash table (IN). - * @param size The size of the table, which will be rounded up + * @param size The size of the table, which will be rounded up * (if required) to the next highest power of two (IN). * @return OPAL error code. * @@ -116,7 +114,7 @@ OPAL_DECLSPEC int opal_hash_table_remove_all(opal_hash_table_t *ht); * */ -OPAL_DECLSPEC int opal_hash_table_get_value_uint32(opal_hash_table_t* table, uint32_t key, +OPAL_DECLSPEC int opal_hash_table_get_value_uint32(opal_hash_table_t* table, uint32_t key, void** ptr); /** @@ -194,7 +192,7 @@ OPAL_DECLSPEC int opal_hash_table_remove_value_uint64(opal_hash_table_t *table, * */ -OPAL_DECLSPEC int opal_hash_table_get_value_ptr(opal_hash_table_t *table, const void* key, +OPAL_DECLSPEC int opal_hash_table_get_value_ptr(opal_hash_table_t *table, const void* key, size_t keylen, void **ptr); /** @@ -248,11 +246,11 @@ OPAL_DECLSPEC int opal_hash_table_get_first_key_uint32(opal_hash_table_t *table, /** - * Get the next 32 bit key from the hash table, knowing the current key + * Get the next 32 bit key from the hash table, knowing the current key * @param table The hash table pointer (IN) * @param key The key (OUT) * @param value The value corresponding to this key (OUT) - * @param in_node The node pointer from previous call to either get_first + * @param in_node The node pointer from previous call to either get_first or get_next (IN) * @param out_node The pointer to the hash table internal node which stores * the key-value pair (this is required for subsequent calls @@ -284,11 +282,11 @@ OPAL_DECLSPEC int opal_hash_table_get_first_key_uint64(opal_hash_table_t *table, /** - * Get the next 64 bit key from the hash table, knowing the current key + * Get the next 64 bit key from the hash table, knowing the current key * @param table The hash table pointer (IN) * @param key The key (OUT) * @param value The value corresponding to this key (OUT) - * @param in_node The node pointer from previous call to either get_first + * @param in_node The node pointer from previous call to either get_first or get_next (IN) * @param out_node The pointer to the hash table internal node which stores * the key-value pair (this is required for subsequent calls @@ -296,7 +294,7 @@ OPAL_DECLSPEC int opal_hash_table_get_first_key_uint64(opal_hash_table_t *table, * @return OPAL error code * */ - + OPAL_DECLSPEC int opal_hash_table_get_next_key_uint64(opal_hash_table_t *table, uint64_t *key, void **value, void *in_node, void **out_node); @@ -321,12 +319,12 @@ OPAL_DECLSPEC int opal_hash_table_get_first_key_ptr(opal_hash_table_t *table, vo /** - * Get the next ptr bit key from the hash table, knowing the current key + * Get the next ptr bit key from the hash table, knowing the current key * @param table The hash table pointer (IN) * @param key The key (OUT) * @param key_size The key size (OUT) * @param value The value corresponding to this key (OUT) - * @param in_node The node pointer from previous call to either get_first + * @param in_node The node pointer from previous call to either get_first or get_next (IN) * @param out_node The pointer to the hash table internal node which stores * the key-value pair (this is required for subsequent calls @@ -342,7 +340,7 @@ OPAL_DECLSPEC int opal_hash_table_get_next_key_ptr(opal_hash_table_t *table, voi OPAL_DECLSPEC OBJ_CLASS_DECLARATION(opal_proc_table_t); - + struct opal_proc_table_t { opal_hash_table_t super; /**< subclass of opal_object_t */ @@ -355,16 +353,16 @@ struct opal_proc_table_t }; typedef struct opal_proc_table_t opal_proc_table_t; - - + + /** * Initializes the table size, must be called before using * the table. * * @param pt The input hash table (IN). - * @param jobids The size of the jobids table, which will be rounded up + * @param jobids The size of the jobids table, which will be rounded up * (if required) to the next highest power of two (IN). - * @param vpids The size of the vpids table, which will be rounded up + * @param vpids The size of the vpids table, which will be rounded up * (if required) to the next highest power of two (IN). * @return OPAL error code. * @@ -395,7 +393,7 @@ OPAL_DECLSPEC int opal_proc_table_remove_all(opal_proc_table_t *pt); * */ -OPAL_DECLSPEC int opal_proc_table_get_value(opal_proc_table_t* pt, opal_process_name_t key, +OPAL_DECLSPEC int opal_proc_table_get_value(opal_proc_table_t* pt, opal_process_name_t key, void** ptr); /** @@ -443,16 +441,16 @@ OPAL_DECLSPEC int opal_proc_table_get_first_key(opal_proc_table_t *pt, opal_proc /** - * Get the next opal_process_name_t key from the hash table, knowing the current key + * Get the next opal_process_name_t key from the hash table, knowing the current key * @param pt The hash table pointer (IN) * @param key The key (OUT) * @param value The value corresponding to this key (OUT) - * @param in_node1 The first node pointer from previous call to either get_first + * @param in_node1 The first node pointer from previous call to either get_first or get_next (IN) * @param out_node1 The first pointer to the hash table internal node which stores * the key-value pair (this is required for subsequent calls * to get_next_key) (OUT) - * @param in_node2 The second node pointer from previous call to either get_first + * @param in_node2 The second node pointer from previous call to either get_first or get_next (IN) * @param out_node2 The second pointer to the hash table internal node which stores * the key-value pair (this is required for subsequent calls diff --git a/opal/class/opal_hotel.c b/opal/class/opal_hotel.c index 5c0fae78f09..2a02c7e552f 100644 --- a/opal/class/opal_hotel.c +++ b/opal/class/opal_hotel.c @@ -1,10 +1,11 @@ /* - * Copyright (c) 2012 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2012-2016 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2012 Los Alamos National Security, LLC. All rights reserved + * Copyright (c) 2015 Intel, Inc. All rights reserved * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -19,20 +20,31 @@ static void local_eviction_callback(int fd, short flags, void *arg) { - opal_hotel_room_eviction_callback_arg_t *eargs = + opal_hotel_room_eviction_callback_arg_t *eargs = (opal_hotel_room_eviction_callback_arg_t*) arg; void *occupant = eargs->hotel->rooms[eargs->room_num].occupant; - /* Remove the occupant from the room and invoke the user callback - to tell them that they were evicted */ - opal_hotel_checkout(eargs->hotel, eargs->room_num); - eargs->hotel->evict_callback_fn(eargs->hotel, - eargs->room_num, - occupant); + /* Remove the occurpant from the room. + + Do not change this logic without also changing the same logic + in opal_hotel_checkout() and + opal_hotel_checkout_and_return_occupant(). */ + opal_hotel_t *hotel = eargs->hotel; + opal_hotel_room_t *room = &(hotel->rooms[eargs->room_num]); + room->occupant = NULL; + hotel->last_unoccupied_room++; + assert(hotel->last_unoccupied_room < hotel->num_rooms); + hotel->unoccupied_rooms[hotel->last_unoccupied_room] = eargs->room_num; + + /* Invoke the user callback to tell them that they were evicted */ + hotel->evict_callback_fn(hotel, + eargs->room_num, + occupant); } int opal_hotel_init(opal_hotel_t *h, int num_rooms, + opal_event_base_t *evbase, uint32_t eviction_timeout, int eviction_event_priority, opal_hotel_eviction_callback_fn_t evict_callback_fn) @@ -46,12 +58,13 @@ int opal_hotel_init(opal_hotel_t *h, int num_rooms, } h->num_rooms = num_rooms; + h->evbase = evbase; h->eviction_timeout.tv_usec = eviction_timeout % 1000000; h->eviction_timeout.tv_sec = eviction_timeout / 1000000; h->evict_callback_fn = evict_callback_fn; h->rooms = (opal_hotel_room_t*)malloc(num_rooms * sizeof(opal_hotel_room_t)); if (NULL != evict_callback_fn) { - h->eviction_args = + h->eviction_args = (opal_hotel_room_eviction_callback_arg_t*)malloc(num_rooms * sizeof(opal_hotel_room_eviction_callback_arg_t)); } h->unoccupied_rooms = (int*) malloc(num_rooms * sizeof(int)); @@ -69,14 +82,16 @@ int opal_hotel_init(opal_hotel_t *h, int num_rooms, h->eviction_args[i].room_num = i; /* Create this room's event (but don't add it) */ - opal_event_set(opal_event_base, - &(h->rooms[i].eviction_timer_event), - -1, 0, local_eviction_callback, - &(h->eviction_args[i])); - - /* Set the priority so it gets serviced properly */ - opal_event_set_priority(&(h->rooms[i].eviction_timer_event), - eviction_event_priority); + if (NULL != h->evbase) { + opal_event_set(h->evbase, + &(h->rooms[i].eviction_timer_event), + -1, 0, local_eviction_callback, + &(h->eviction_args[i])); + + /* Set the priority so it gets serviced properly */ + opal_event_set_priority(&(h->rooms[i].eviction_timer_event), + eviction_event_priority); + } } return OPAL_SUCCESS; @@ -85,6 +100,7 @@ int opal_hotel_init(opal_hotel_t *h, int num_rooms, static void constructor(opal_hotel_t *h) { h->num_rooms = 0; + h->evbase = NULL; h->eviction_timeout.tv_sec = 0; h->eviction_timeout.tv_usec = 0; h->evict_callback_fn = NULL; @@ -99,9 +115,11 @@ static void destructor(opal_hotel_t *h) int i; /* Go through all occupied rooms and destroy their events */ - for (i = 0; i < h->num_rooms; ++i) { - if (NULL != h->rooms[i].occupant) { - opal_event_del(&(h->rooms[i].eviction_timer_event)); + if (NULL != h->evbase) { + for (i = 0; i < h->num_rooms; ++i) { + if (NULL != h->rooms[i].occupant) { + opal_event_del(&(h->rooms[i].eviction_timer_event)); + } } } diff --git a/opal/class/opal_hotel.h b/opal/class/opal_hotel.h index f8ecd4c0cb5..202783edcfe 100644 --- a/opal/class/opal_hotel.h +++ b/opal/class/opal_hotel.h @@ -1,7 +1,6 @@ /* - * Copyright (c) 2012-2013 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2012-2016 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2012 Los Alamos National Security, LLC. All rights reserved - * Copyright (c) 2015 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -111,6 +110,8 @@ typedef struct opal_hotel_t { /* Max number of rooms in the hotel */ int num_rooms; + /* event base to be used for eviction timeout */ + opal_event_base_t *evbase; struct timeval eviction_timeout; opal_hotel_eviction_callback_fn_t evict_callback_fn; @@ -133,6 +134,7 @@ OBJ_CLASS_DECLARATION(opal_hotel_t); * * @param hotel Pointer to a hotel (IN) * @param num_rooms The total number of rooms in the hotel (IN) + * @param evbase Pointer to event base used for eviction timeout * @param eviction_timeout Max length of a stay at the hotel before * the eviction callback is invoked (in microseconds) * @param eviction_event_priority Event lib priority for the eviction timeout @@ -143,10 +145,16 @@ OBJ_CLASS_DECLARATION(opal_hotel_t); * will be set - occupants will remain checked into the hotel until * explicitly checked out. * + * Also note: the eviction_callback_fn should absolutely not call any + * of the hotel checkout functions. Specifically: the occupant has + * already been ("forcibly") checked out *before* the + * eviction_callback_fn is invoked. + * * @return OPAL_SUCCESS if all initializations were succesful. Otherwise, * the error indicate what went wrong in the function. */ OPAL_DECLSPEC int opal_hotel_init(opal_hotel_t *hotel, int num_rooms, + opal_event_base_t *evbase, uint32_t eviction_timeout, int eviction_event_priority, opal_hotel_eviction_callback_fn_t evict_callback_fn); @@ -188,8 +196,10 @@ static inline int opal_hotel_checkin(opal_hotel_t *hotel, room->occupant = occupant; /* Assign the event and make it pending */ - opal_event_add(&(room->eviction_timer_event), - &(hotel->eviction_timeout)); + if (NULL != hotel->evbase) { + opal_event_add(&(room->eviction_timer_event), + &(hotel->eviction_timeout)); + } return OPAL_SUCCESS; } @@ -211,8 +221,10 @@ static inline void opal_hotel_checkin_with_res(opal_hotel_t *hotel, room->occupant = occupant; /* Assign the event and make it pending */ - opal_event_add(&(room->eviction_timer_event), - &(hotel->eviction_timeout)); + if (NULL != hotel->evbase) { + opal_event_add(&(room->eviction_timer_event), + &(hotel->eviction_timeout)); + } } /** @@ -236,9 +248,13 @@ static inline void opal_hotel_checkout(opal_hotel_t *hotel, int room_num) /* If there's an occupant in the room, check them out */ room = &(hotel->rooms[room_num]); if (OPAL_LIKELY(NULL != room->occupant)) { + /* Do not change this logic without also changing the same + logic in opal_hotel_checkout_and_return_occupant() and + opal_hotel.c:local_eviction_callback(). */ room->occupant = NULL; - opal_event_del(&(room->eviction_timer_event)); - + if (NULL != hotel->evbase) { + opal_event_del(&(room->eviction_timer_event)); + } hotel->last_unoccupied_room++; assert(hotel->last_unoccupied_room < hotel->num_rooms); hotel->unoccupied_rooms[hotel->last_unoccupied_room] = room_num; @@ -271,17 +287,21 @@ static inline void opal_hotel_checkout_and_return_occupant(opal_hotel_t *hotel, room = &(hotel->rooms[room_num]); if (OPAL_LIKELY(NULL != room->occupant)) { opal_output (10, "checking out occupant %p from room num %d", room->occupant, room_num); + /* Do not change this logic without also changing the same + logic in opal_hotel_checkout() and + opal_hotel.c:local_eviction_callback(). */ *occupant = room->occupant; room->occupant = NULL; - opal_event_del(&(room->eviction_timer_event)); + if (NULL != hotel->evbase) { + opal_event_del(&(room->eviction_timer_event)); + } hotel->last_unoccupied_room++; assert(hotel->last_unoccupied_room < hotel->num_rooms); hotel->unoccupied_rooms[hotel->last_unoccupied_room] = room_num; } else { - opal_output( 0, " OOPS there is no occupant in room_num %d", room_num); - } - + *occupant = NULL; + } } /** @@ -299,17 +319,31 @@ static inline bool opal_hotel_is_empty (opal_hotel_t *hotel) } /** - * Destroy a hotel. + * Access the occupant of a room, but leave them checked into their room. * * @param hotel Pointer to hotel (IN) + * @param room Room number to checkout (IN) + * @param void * occupant (OUT) * - * @return OPAL_SUCCESS Always - * - * The hotel (and all of its rooms) is destroyed. No further eviction - * callbacks will be invoked. + * This accessor function is typically used to cycle across the occupants + * to check for someone already present that matches a description. */ -OPAL_DECLSPEC int opal_hotel_finalize(opal_hotel_t *hotel); +static inline void opal_hotel_knock(opal_hotel_t *hotel, int room_num, void **occupant) +{ + opal_hotel_room_t *room; + + /* Bozo check */ + assert(room_num < hotel->num_rooms); + + *occupant = NULL; + /* If there's an occupant in the room, have them come to the door */ + room = &(hotel->rooms[room_num]); + if (OPAL_LIKELY(NULL != room->occupant)) { + opal_output (10, "occupant %p in room num %d responded to knock", room->occupant, room_num); + *occupant = room->occupant; + } +} END_C_DECLS diff --git a/opal/class/opal_lifo.h b/opal/class/opal_lifo.h index ca66a6e9a5d..a4e106343a8 100644 --- a/opal/class/opal_lifo.h +++ b/opal/class/opal_lifo.h @@ -12,7 +12,7 @@ * All rights reserved. * Copyright (c) 2007 Voltaire All rights reserved. * Copyright (c) 2010 IBM Corporation. All rights reserved. - * Copyright (c) 2014 Los Alamos National Security, LLC. All rights + * Copyright (c) 2014-2015 Los Alamos National Security, LLC. All rights * reseved. * $COPYRIGHT$ * @@ -25,6 +25,7 @@ #define OPAL_LIFO_H_HAS_BEEN_INCLUDED #include "opal_config.h" +#include #include "opal/class/opal_list.h" #include "opal/sys/atomic.h" @@ -180,6 +181,52 @@ static inline opal_list_item_t *opal_lifo_push_atomic (opal_lifo_t *lifo, } while (1); } +#if OPAL_HAVE_ATOMIC_LLSC_PTR + +static inline void _opal_lifo_release_cpu (void) +{ + /* NTH: there are many ways to cause the current thread to be suspended. This one + * should work well in most cases. Another approach would be to use poll (NULL, 0, ) but + * the interval will be forced to be in ms (instead of ns or us). Note that there + * is a performance improvement for the lifo test when this call is made on detection + * of contention but it may not translate into actually MPI or application performance + * improvements. */ + static struct timespec interval = { .tv_sec = 0, .tv_nsec = 100 }; + nanosleep (&interval, NULL); +} + +/* Retrieve one element from the LIFO. If we reach the ghost element then the LIFO + * is empty so we return NULL. + */ +static inline opal_list_item_t *opal_lifo_pop_atomic (opal_lifo_t* lifo) +{ + opal_list_item_t *item, *next; + int attempt = 0; + + do { + if (++attempt == 5) { + /* deliberatly suspend this thread to allow other threads to run. this should + * only occur during periods of contention on the lifo. */ + _opal_lifo_release_cpu (); + attempt = 0; + } + + item = (opal_list_item_t *) opal_atomic_ll_ptr (&lifo->opal_lifo_head.data.item); + if (&lifo->opal_lifo_ghost == item) { + return NULL; + } + + next = (opal_list_item_t *) item->opal_list_next; + } while (!opal_atomic_sc_ptr (&lifo->opal_lifo_head.data.item, next)); + + opal_atomic_wmb (); + + item->opal_list_next = NULL; + return item; +} + +#else + /* Retrieve one element from the LIFO. If we reach the ghost element then the LIFO * is empty so we return NULL. */ @@ -187,20 +234,22 @@ static inline opal_list_item_t *opal_lifo_pop_atomic (opal_lifo_t* lifo) { opal_list_item_t *item; while ((item = (opal_list_item_t *) lifo->opal_lifo_head.data.item) != &lifo->opal_lifo_ghost) { - opal_atomic_rmb(); - /* ensure it is safe to pop the head */ if (opal_atomic_swap_32((volatile int32_t *) &item->item_free, 1)) { continue; } + opal_atomic_wmb (); + /* try to swap out the head pointer */ if (opal_atomic_cmpset_ptr (&lifo->opal_lifo_head.data.item, item, (void *) item->opal_list_next)) { break; } + /* NTH: don't need another atomic here */ item->item_free = 0; + /* Do some kind of pause to release the bus */ } @@ -214,6 +263,8 @@ static inline opal_list_item_t *opal_lifo_pop_atomic (opal_lifo_t* lifo) return item; } +#endif /* OPAL_HAVE_ATOMIC_LLSC_PTR */ + #endif /* single-threaded versions of the lifo functions */ diff --git a/opal/class/opal_list.c b/opal/class/opal_list.c index e40aa5abe25..e0a5112c38a 100644 --- a/opal/class/opal_list.c +++ b/opal/class/opal_list.c @@ -5,15 +5,15 @@ * Copyright (c) 2004-2014 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2007 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2007 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2007 Voltaire All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -114,11 +114,11 @@ bool opal_list_insert(opal_list_t *list, opal_list_item_t *item, long long idx) /* Adds item to list at index and retains item. */ int i; volatile opal_list_item_t *ptr, *next; - + if ( idx >= (long long)list->opal_list_length ) { return false; } - + if ( 0 == idx ) { opal_list_prepend(list, item); @@ -188,7 +188,7 @@ opal_list_transfer(opal_list_item_t *pos, opal_list_item_t *begin, void -opal_list_join(opal_list_t *thislist, opal_list_item_t *pos, +opal_list_join(opal_list_t *thislist, opal_list_item_t *pos, opal_list_t *xlist) { if (0 != opal_list_get_size(xlist)) { @@ -206,14 +206,14 @@ void opal_list_splice(opal_list_t *thislist, opal_list_item_t *pos, opal_list_t *xlist, opal_list_item_t *first, opal_list_item_t *last) -{ +{ size_t change = 0; opal_list_item_t *tmp; if (first != last) { /* figure out how many things we are going to move (have to do * first, since last might be end and then we wouldn't be able - * to run the loop) + * to run the loop) */ for (tmp = first ; tmp != last ; tmp = opal_list_get_next(tmp)) { change++; @@ -237,7 +237,7 @@ int opal_list_sort(opal_list_t* list, opal_list_item_compare_fn_t compare) if (0 == list->opal_list_length) { return OPAL_SUCCESS; } - items = (opal_list_item_t**)malloc(sizeof(opal_list_item_t*) * + items = (opal_list_item_t**)malloc(sizeof(opal_list_item_t*) * list->opal_list_length); if (NULL == items) { @@ -247,8 +247,8 @@ int opal_list_sort(opal_list_t* list, opal_list_item_compare_fn_t compare) while(NULL != (item = opal_list_remove_first(list))) { items[index++] = item; } - - qsort(items, index, sizeof(opal_list_item_t*), + + qsort(items, index, sizeof(opal_list_item_t*), (int(*)(const void*,const void*))compare); for (i=0; iopal_list_sentinel.opal_list_next == + return (list->opal_list_sentinel.opal_list_next == &(list->opal_list_sentinel) ? true : false); } @@ -407,7 +407,7 @@ static inline opal_list_item_t* opal_list_get_end(opal_list_t* list) * * @returns The size of the list (size_t) * - * This is an O(1) lookup to return the size of the list. + * This is an O(1) lookup to return the size of the list. * * This is an inlined function in compilers that support inlining, so * it's usually a cheap operation. @@ -424,7 +424,7 @@ static inline size_t opal_list_get_size(opal_list_t* list) #if OPAL_ENABLE_DEBUG && 0 /* not sure if we really want this running in devel, as it does * slow things down. Wanted for development of splice / join to - * make sure length was reset properly + * make sure length was reset properly */ size_t check_len = 0; opal_list_item_t *item; @@ -589,8 +589,8 @@ static inline void _opal_list_append(opal_list_t *list, opal_list_item_t *item * This is an inlined function in compilers that support inlining, so * it's usually a cheap operation. */ -static inline void opal_list_prepend(opal_list_t *list, - opal_list_item_t *item) +static inline void opal_list_prepend(opal_list_t *list, + opal_list_item_t *item) { opal_list_item_t* sentinel = &(list->opal_list_sentinel); #if OPAL_ENABLE_DEBUG @@ -602,16 +602,16 @@ static inline void opal_list_prepend(opal_list_t *list, /* reset item's next pointer */ item->opal_list_next = sentinel->opal_list_next; - + /* reset item's previous pointer */ item->opal_list_prev = sentinel; - + /* reset previous first element's previous poiner */ sentinel->opal_list_next->opal_list_prev = item; - + /* reset head's next pointer */ sentinel->opal_list_next = item; - + /* increment list element counter */ list->opal_list_length++; @@ -652,7 +652,7 @@ static inline opal_list_item_t *opal_list_remove_first(opal_list_t *list) if ( 0 == list->opal_list_length ) { return (opal_list_item_t *)NULL; } - + #if OPAL_ENABLE_DEBUG /* Spot check: ensure that the first item is only on this list */ @@ -661,16 +661,16 @@ static inline opal_list_item_t *opal_list_remove_first(opal_list_t *list) /* reset list length counter */ list->opal_list_length--; - + /* get pointer to first element on the list */ item = list->opal_list_sentinel.opal_list_next; - + /* reset previous pointer of next item on the list */ item->opal_list_next->opal_list_prev = item->opal_list_prev; - + /* reset the head next pointer */ list->opal_list_sentinel.opal_list_next = item->opal_list_next; - + #if OPAL_ENABLE_DEBUG assert( list == item->opal_list_item_belong_to ); item->opal_list_item_belong_to = NULL; @@ -714,7 +714,7 @@ static inline opal_list_item_t *opal_list_remove_last(opal_list_t *list) if ( 0 == list->opal_list_length ) { return (opal_list_item_t *)NULL; } - + #if OPAL_ENABLE_DEBUG /* Spot check: ensure that the first item is only on this list */ @@ -723,16 +723,16 @@ static inline opal_list_item_t *opal_list_remove_last(opal_list_t *list) /* reset list length counter */ list->opal_list_length--; - + /* get item */ item = list->opal_list_sentinel.opal_list_prev; - + /* reset previous pointer on next to last pointer */ item->opal_list_prev->opal_list_next = item->opal_list_next; - + /* reset tail's previous pointer */ list->opal_list_sentinel.opal_list_prev = item->opal_list_prev; - + #if OPAL_ENABLE_DEBUG assert( list == item->opal_list_item_belong_to ); item->opal_list_next = item->opal_list_prev = (opal_list_item_t *)NULL; @@ -807,7 +807,7 @@ static inline void opal_list_insert_pos(opal_list_t *list, opal_list_item_t *pos * If index is greater than the length of the list, no action is * performed and false is returned. */ - OPAL_DECLSPEC bool opal_list_insert(opal_list_t *list, opal_list_item_t *item, + OPAL_DECLSPEC bool opal_list_insert(opal_list_t *list, opal_list_item_t *item, long long idx); @@ -820,7 +820,7 @@ static inline void opal_list_insert_pos(opal_list_t *list, opal_list_item_t *pos * @param xlist List container for list being spliced from * * Join a list into another list. All of the elements of \c xlist - * are inserted before \c pos and removed from \c xlist. + * are inserted before \c pos and removed from \c xlist. * * This operation is an O(1) operation. Both \c thislist and \c * xlist must be valid list containsers. \c xlist will be empty @@ -828,7 +828,7 @@ static inline void opal_list_insert_pos(opal_list_t *list, opal_list_item_t *pos * containers remain valid, including those that point to elements * in \c xlist. */ - OPAL_DECLSPEC void opal_list_join(opal_list_t *thislist, opal_list_item_t *pos, + OPAL_DECLSPEC void opal_list_join(opal_list_t *thislist, opal_list_item_t *pos, opal_list_t *xlist); @@ -839,7 +839,7 @@ static inline void opal_list_insert_pos(opal_list_t *list, opal_list_item_t *pos * @param pos List item in \c thislist marking the position before * which items are inserted * @param xlist List container for list being spliced from - * @param first List item in \c xlist marking the start of elements + * @param first List item in \c xlist marking the start of elements * to be copied into \c thislist * @param last List item in \c xlist marking the end of elements * to be copied into \c thislist diff --git a/opal/class/opal_object.c b/opal/class/opal_object.c index 24a07131906..e3121850ada 100644 --- a/opal/class/opal_object.c +++ b/opal/class/opal_object.c @@ -5,20 +5,20 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2007 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2007 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ /** * @file - * + * * Implementation of opal_object_t, the base opal foundation class */ @@ -118,7 +118,7 @@ void opal_class_initialize(opal_class_t *cls) * plus for each a NULL-sentinel */ - cls->cls_construct_array = + cls->cls_construct_array = (void (**)(opal_object_t*))malloc((cls_construct_array_count + cls_destruct_array_count + 2) * sizeof(opal_construct_t) ); @@ -169,8 +169,8 @@ int opal_class_finalize(void) if (NULL != classes) { for (i = 0; i < num_classes; ++i) { - if (NULL != classes[i]) { - free(classes[i]); + if (NULL != classes[i]) { + free(classes[i]); } } free(classes); diff --git a/opal/class/opal_object.h b/opal/class/opal_object.h index 02d9b17ada7..7e919098f8f 100644 --- a/opal/class/opal_object.h +++ b/opal/class/opal_object.h @@ -1,3 +1,4 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ /* * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana * University Research and Technology @@ -10,8 +11,14 @@ * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2007-2014 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2014 Research Organization for Information Science + * and Technology (RIST). All rights reserved. + * Copyright (c) 2015 Los Alamos National Security, LLC. All rights + * reserved. * $COPYRIGHT$ + * * Additional copyrights may follow + * * $HEADER$ */ @@ -116,11 +123,9 @@ #include "opal_config.h" #include -#ifdef HAVE_STDLIB_H #include -#endif /* HAVE_STDLIB_H */ -#include "opal/sys/atomic.h" +#include "opal/threads/thread_usage.h" BEGIN_C_DECLS @@ -165,9 +170,20 @@ struct opal_class_t { * @param NAME Name of the class to initialize */ #if OPAL_ENABLE_DEBUG -#define OPAL_OBJ_STATIC_INIT(BASE_CLASS) { OPAL_OBJ_MAGIC_ID, OBJ_CLASS(BASE_CLASS), 1, __FILE__, __LINE__ } +#define OPAL_OBJ_STATIC_INIT(BASE_CLASS) \ + { \ + .obj_magic_id = OPAL_OBJ_MAGIC_ID, \ + .obj_class = OBJ_CLASS(BASE_CLASS), \ + .obj_reference_count = 1, \ + .cls_init_file_name = __FILE__, \ + .cls_init_lineno = __LINE__, \ + } #else -#define OPAL_OBJ_STATIC_INIT(BASE_CLASS) { OBJ_CLASS(BASE_CLASS), 1 } +#define OPAL_OBJ_STATIC_INIT(BASE_CLASS) \ + { \ + .obj_class = OBJ_CLASS(BASE_CLASS), \ + .obj_reference_count = 1, \ + } #endif /** @@ -302,14 +318,12 @@ static inline opal_object_t *opal_obj_new_debug(opal_class_t* type, const char* * to NULL. * * @param object Pointer to the object - * - * */ #if OPAL_ENABLE_DEBUG #define OBJ_RELEASE(object) \ do { \ - assert(OPAL_OBJ_MAGIC_ID == ((opal_object_t *) (object))->obj_magic_id); \ assert(NULL != ((opal_object_t *) (object))->obj_class); \ + assert(OPAL_OBJ_MAGIC_ID == ((opal_object_t *) (object))->obj_magic_id); \ if (0 == opal_obj_update((opal_object_t *) (object), -1)) { \ OBJ_SET_MAGIC_ID((object), 0); \ opal_obj_run_destructors((opal_object_t *) (object)); \ @@ -494,7 +508,7 @@ static inline opal_object_t *opal_obj_new(opal_class_t * cls) static inline int opal_obj_update(opal_object_t *object, int inc) __opal_attribute_always_inline__; static inline int opal_obj_update(opal_object_t *object, int inc) { - return opal_atomic_add_32(&(object->obj_reference_count), inc); + return OPAL_THREAD_ADD32(&object->obj_reference_count, inc); } END_C_DECLS diff --git a/opal/class/opal_pointer_array.c b/opal/class/opal_pointer_array.c index f18b41c077c..3c948d30032 100644 --- a/opal/class/opal_pointer_array.c +++ b/opal/class/opal_pointer_array.c @@ -6,14 +6,14 @@ * Copyright (c) 2004-2007 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -75,21 +75,21 @@ int opal_pointer_array_init(opal_pointer_array_t* array, int max_size, int block_size) { size_t num_bytes; - + /* check for errors */ if (NULL == array || max_size < block_size) { return OPAL_ERR_BAD_PARAM; } - + array->max_size = max_size; array->block_size = block_size; - + num_bytes = (0 < initial_allocation ? initial_allocation : block_size); array->number_free = num_bytes; array->size = num_bytes; num_bytes *= sizeof(void*); - /* Allocate and set the array to NULL */ + /* Allocate and set the array to NULL */ array->addr = (void **)calloc(num_bytes, 1); if (NULL == array->addr) { /* out of memory */ return OPAL_ERR_OUT_OF_RESOURCE; @@ -114,8 +114,8 @@ int opal_pointer_array_add(opal_pointer_array_t *table, void *ptr) if (table->number_free == 0) { /* need to grow table */ - if (!grow_table(table, - (NULL == table->addr ? TABLE_INIT : table->size * TABLE_GROW), + if (!grow_table(table, + (NULL == table->addr ? TABLE_INIT : table->size * TABLE_GROW), INT_MAX)) { OPAL_THREAD_UNLOCK(&(table->lock)); return OPAL_ERR_OUT_OF_RESOURCE; @@ -192,17 +192,17 @@ int opal_pointer_array_set_item(opal_pointer_array_t *table, int index, /* Reset lowest_free if required */ if ( index == table->lowest_free ) { int i; - + table->lowest_free = table->size; for ( i=index + 1; isize; i++) { if ( NULL == table->addr[i] ){ table->lowest_free = i; break; - } + } } } } - table->addr[index] = value; + table->addr[index] = value; #if 0 opal_output(0,"opal_pointer_array_set_item: OUT: " @@ -227,10 +227,10 @@ int opal_pointer_array_set_item(opal_pointer_array_t *table, int index, * @return true/false True if element could be reserved * False if element could not be reserved (e.g.in use). * - * In contrary to array_set, this function does not allow to overwrite + * In contrary to array_set, this function does not allow to overwrite * a value, unless the previous value is NULL ( equiv. to free ). */ -bool opal_pointer_array_test_and_set_item (opal_pointer_array_t *table, +bool opal_pointer_array_test_and_set_item (opal_pointer_array_t *table, int index, void *value) { assert(table != NULL); @@ -262,7 +262,7 @@ bool opal_pointer_array_test_and_set_item (opal_pointer_array_t *table, } } - /* + /* * allow a specific index to be changed. */ table->addr[index] = value; @@ -276,7 +276,7 @@ bool opal_pointer_array_test_and_set_item (opal_pointer_array_t *table, if ( NULL == table->addr[i] ){ table->lowest_free = i; break; - } + } } } @@ -328,7 +328,7 @@ static bool grow_table(opal_pointer_array_t *table, int soft, int hard) if (p == NULL) { return false; } - + new_size_int = (int) new_size; table->number_free += new_size_int - table->size; table->addr = (void**)p; diff --git a/opal/class/opal_pointer_array.h b/opal/class/opal_pointer_array.h index 03e7bd7af7b..8271fe50ba1 100644 --- a/opal/class/opal_pointer_array.h +++ b/opal/class/opal_pointer_array.h @@ -6,14 +6,14 @@ * Copyright (c) 2004-2008 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ /** @file @@ -107,7 +107,7 @@ OPAL_DECLSPEC int opal_pointer_array_add(opal_pointer_array_t *array, void *ptr) * * @return Error code. (-1) indicates an error. */ -OPAL_DECLSPEC int opal_pointer_array_set_item(opal_pointer_array_t *array, +OPAL_DECLSPEC int opal_pointer_array_set_item(opal_pointer_array_t *array, int index, void *value); /** @@ -119,7 +119,7 @@ OPAL_DECLSPEC int opal_pointer_array_set_item(opal_pointer_array_t *array, * @return Error code. NULL indicates an error. */ -static inline void *opal_pointer_array_get_item(opal_pointer_array_t *table, +static inline void *opal_pointer_array_get_item(opal_pointer_array_t *table, int element_index) { void *p; @@ -172,10 +172,10 @@ OPAL_DECLSPEC int opal_pointer_array_set_size(opal_pointer_array_t *array, int s * @return true/false True if element could be reserved * False if element could not be reserved (e.g., in use). * - * In contrary to array_set, this function does not allow to overwrite + * In contrary to array_set, this function does not allow to overwrite * a value, unless the previous value is NULL ( equiv. to free ). */ -OPAL_DECLSPEC bool opal_pointer_array_test_and_set_item (opal_pointer_array_t *table, +OPAL_DECLSPEC bool opal_pointer_array_test_and_set_item (opal_pointer_array_t *table, int index, void *value); @@ -190,7 +190,7 @@ static inline void opal_pointer_array_remove_all(opal_pointer_array_t *array) int i; if( array->number_free == array->size ) return; /* nothing to do here this time (the array is already empty) */ - + OPAL_THREAD_LOCK(&array->lock); array->lowest_free = 0; array->number_free = array->size; diff --git a/opal/class/opal_rb_tree.c b/opal/class/opal_rb_tree.c index eef78e03d4f..cdbaa187642 100644 --- a/opal/class/opal_rb_tree.c +++ b/opal/class/opal_rb_tree.c @@ -6,16 +6,16 @@ * Copyright (c) 2004-2013 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2015 Los Alamos National Security, LLC. All rights * reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ /* @@ -459,7 +459,7 @@ int opal_rb_tree_traverse(opal_rb_tree_t *tree, } -static void inorder_traversal(opal_rb_tree_t *tree, +static void inorder_traversal(opal_rb_tree_t *tree, opal_rb_tree_condition_fn_t cond, opal_rb_tree_action_fn_t action, opal_rb_tree_node_t * node) diff --git a/opal/class/opal_rb_tree.h b/opal/class/opal_rb_tree.h index 1c7cf9e8492..c405904b699 100644 --- a/opal/class/opal_rb_tree.h +++ b/opal/class/opal_rb_tree.h @@ -6,16 +6,16 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2015 Los Alamos National Security, LLC. All rights * reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ * */ diff --git a/opal/class/opal_ring_buffer.c b/opal/class/opal_ring_buffer.c index 77767726bef..e06b00eb889 100644 --- a/opal/class/opal_ring_buffer.c +++ b/opal/class/opal_ring_buffer.c @@ -6,15 +6,15 @@ * Copyright (c) 2004-2007 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2010 Cisco Systems, Inc. All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -74,8 +74,8 @@ int opal_ring_buffer_init(opal_ring_buffer_t* ring, int size) if (NULL == ring) { return OPAL_ERR_BAD_PARAM; } - - /* Allocate and set the ring to NULL */ + + /* Allocate and set the ring to NULL */ ring->addr = (char **)calloc(size * sizeof(char*), 1); if (NULL == ring->addr) { /* out of memory */ return OPAL_ERR_OUT_OF_RESOURCE; @@ -88,7 +88,7 @@ int opal_ring_buffer_init(opal_ring_buffer_t* ring, int size) void* opal_ring_buffer_push(opal_ring_buffer_t *ring, void *ptr) { char *p=NULL; - + OPAL_ACQUIRE_THREAD(&(ring->lock), &(ring->cond), &(ring->in_use)); if (NULL != ring->addr[ring->head]) { p = (char*)ring->addr[ring->head]; diff --git a/opal/class/opal_ring_buffer.h b/opal/class/opal_ring_buffer.h index a0e23865af5..7a841b3bbcb 100644 --- a/opal/class/opal_ring_buffer.h +++ b/opal/class/opal_ring_buffer.h @@ -6,15 +6,15 @@ * Copyright (c) 2004-2008 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2010 Cisco Systems, Inc. All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ /** @file diff --git a/opal/class/opal_tree.c b/opal/class/opal_tree.c index 516b23db603..fdd41ea20a1 100644 --- a/opal/class/opal_tree.c +++ b/opal/class/opal_tree.c @@ -8,9 +8,9 @@ * Copyright (c) 2014 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -95,9 +95,9 @@ static void opal_tree_construct(opal_tree_t *tree) tree->opal_tree_sentinel.opal_tree_parent = &tree->opal_tree_sentinel; tree->opal_tree_sentinel.opal_tree_num_ancestors = -1; - tree->opal_tree_sentinel.opal_tree_next_sibling = + tree->opal_tree_sentinel.opal_tree_next_sibling = &tree->opal_tree_sentinel; - tree->opal_tree_sentinel.opal_tree_prev_sibling = + tree->opal_tree_sentinel.opal_tree_prev_sibling = &tree->opal_tree_sentinel; tree->opal_tree_sentinel.opal_tree_first_child = &tree->opal_tree_sentinel; @@ -122,7 +122,7 @@ static void opal_tree_destruct(opal_tree_t *tree) /* * initialize tree container */ -void opal_tree_init(opal_tree_t *tree, opal_tree_comp_fn_t comp, +void opal_tree_init(opal_tree_t *tree, opal_tree_comp_fn_t comp, opal_tree_item_serialize_fn_t serialize, opal_tree_item_deserialize_fn_t deserialize, opal_tree_get_key_fn_t get_key) @@ -139,7 +139,7 @@ void opal_tree_init(opal_tree_t *tree, opal_tree_comp_fn_t comp, static int count_descendants(opal_tree_item_t* item) { int current_count = 0; - + /* loop over all siblings for descendants to count */ while (item) { current_count += count_descendants(opal_tree_get_first_child(item)); @@ -157,7 +157,7 @@ size_t opal_tree_get_size(opal_tree_t* tree) #if OPAL_ENABLE_DEBUG /* not sure if we really want this running in devel, as it does * slow things down. Wanted for development of splice / join to - * make sure length was reset properly + * make sure length was reset properly */ size_t check_len = 0; opal_tree_item_t *root; @@ -167,7 +167,7 @@ size_t opal_tree_get_size(opal_tree_t* tree) root = opal_tree_get_root(tree); check_len = count_descendants(root); } - + if (check_len != tree->opal_tree_num_items) { fprintf(stderr," Error :: opal_tree_get_size - opal_tree_num_items does not match actual tree length\n"); fflush(stderr); @@ -175,13 +175,13 @@ size_t opal_tree_get_size(opal_tree_t* tree) } #endif - return tree->opal_tree_num_items; + return tree->opal_tree_num_items; } /* * add item to parent's child list */ -void opal_tree_add_child(opal_tree_item_t *parent_item, +void opal_tree_add_child(opal_tree_item_t *parent_item, opal_tree_item_t *new_item) { #if OPAL_ENABLE_DEBUG @@ -200,7 +200,7 @@ void opal_tree_add_child(opal_tree_item_t *parent_item, } else { /* no children existing on parent */ parent_item->opal_tree_first_child = new_item; - } + } parent_item->opal_tree_last_child = new_item; parent_item->opal_tree_num_children++; new_item->opal_tree_container = parent_item->opal_tree_container; @@ -216,7 +216,7 @@ void opal_tree_add_child(opal_tree_item_t *parent_item, #endif } -/* +/* * check to see if item is in tree */ #if OPAL_ENABLE_DEBUG @@ -277,7 +277,7 @@ opal_tree_item_t *opal_tree_remove_subtree(opal_tree_item_t *item) - If I have no children, then my immediate sibling */ if (item->opal_tree_parent->opal_tree_first_child == item) { if (item->opal_tree_num_children > 0) { - parent_item->opal_tree_first_child = + parent_item->opal_tree_first_child = item->opal_tree_next_sibling; } else { parent_item->opal_tree_first_child = @@ -285,7 +285,7 @@ opal_tree_item_t *opal_tree_remove_subtree(opal_tree_item_t *item) } } else if (parent_item->opal_tree_last_child == item) { if (item->opal_tree_num_children > 0) { - parent_item->opal_tree_last_child = + parent_item->opal_tree_last_child = item->opal_tree_last_child; } else { parent_item->opal_tree_last_child = @@ -364,11 +364,11 @@ int opal_tree_remove_item(opal_tree_t *tree, } else { /* There were multiple children. If I was the first or last, then ensure the parent gets a valid first or last child: - - If I have children, then my first/last + - If I have children, then my first/last - If I have no childen, then my immediate sibling */ if (parent_item->opal_tree_first_child == item) { if (item->opal_tree_num_children > 0) { - parent_item->opal_tree_first_child = + parent_item->opal_tree_first_child = item->opal_tree_first_child; } else { parent_item->opal_tree_first_child = @@ -376,7 +376,7 @@ int opal_tree_remove_item(opal_tree_t *tree, } } else if (parent_item->opal_tree_last_child == item) { if (item->opal_tree_num_children > 0) { - parent_item->opal_tree_last_child = + parent_item->opal_tree_last_child = item->opal_tree_last_child; } else { parent_item->opal_tree_last_child = @@ -395,12 +395,12 @@ static char *end_lvl = "]"; static char *end_stream = "E"; /* - * add item to opal buffer that represents all items of a sub-tree from the + * add item to opal buffer that represents all items of a sub-tree from the * item passed in on down. We exit out of converting tree items once we've * done the last child of the tree_item and we are at depth 1. */ -static int add_tree_item2buf(opal_tree_item_t *tree_item, - opal_buffer_t *buf, +static int add_tree_item2buf(opal_tree_item_t *tree_item, + opal_buffer_t *buf, opal_tree_item_serialize_fn_t fn, int depth ) @@ -410,7 +410,7 @@ static int add_tree_item2buf(opal_tree_item_t *tree_item, do { /* add start delim to buffer */ - if (OPAL_SUCCESS != + if (OPAL_SUCCESS != (rc = opal_dss.pack(buf, &start_lvl, 1, OPAL_STRING))){ return(rc); } @@ -419,30 +419,30 @@ static int add_tree_item2buf(opal_tree_item_t *tree_item, if ((first_child = opal_tree_get_first_child(tree_item))) { /* add items for our children */ - if (OPAL_SUCCESS != + if (OPAL_SUCCESS != (rc = add_tree_item2buf(first_child, buf, fn, depth+1))){ return(rc); } - if (OPAL_SUCCESS != + if (OPAL_SUCCESS != (rc = opal_dss.pack(buf, &end_lvl, 1, OPAL_STRING))){ return(rc); } } else { /* end item entry */ - if (OPAL_SUCCESS != + if (OPAL_SUCCESS != (rc = opal_dss.pack(buf, &end_lvl, 1, OPAL_STRING))){ return(rc); } } - /* advance to next sibling, if none we'll drop out of + /* advance to next sibling, if none we'll drop out of * loop and return to our parent */ tree_item = opal_tree_get_next_sibling(tree_item); } while (tree_item && 1 < depth); - + return(OPAL_SUCCESS); -} +} /* * serialize tree data @@ -452,19 +452,19 @@ int opal_tree_serialize(opal_tree_item_t *start_item, opal_buffer_t *buffer) int rc; if (OPAL_SUCCESS != - (rc = add_tree_item2buf(start_item, buffer, + (rc = add_tree_item2buf(start_item, buffer, start_item->opal_tree_container->serialize, 1))){ return(rc); } - if (OPAL_SUCCESS != + if (OPAL_SUCCESS != (rc = opal_dss.pack(buffer, &end_stream, 1, OPAL_STRING))){ return(rc); - } + } return(OPAL_SUCCESS); } -static int deserialize_add_tree_item(opal_buffer_t *data, +static int deserialize_add_tree_item(opal_buffer_t *data, opal_tree_item_t *parent_item, opal_tree_item_deserialize_fn_t deserialize, char **curr_delim, @@ -513,15 +513,15 @@ static int deserialize_add_tree_item(opal_buffer_t *data, } return(OPAL_SUCCESS); } - + /* * deserialize tree data */ -int opal_tree_deserialize(opal_buffer_t *serialized_data, +int opal_tree_deserialize(opal_buffer_t *serialized_data, opal_tree_item_t *start_item) { char * null = NULL; - deserialize_add_tree_item(serialized_data, + deserialize_add_tree_item(serialized_data, start_item, start_item->opal_tree_container->deserialize, &null, @@ -655,7 +655,7 @@ static opal_tree_item_t *find_in_descendants(opal_tree_item_t* item, void *key) while (!result && item) { /* check for item match */ - result = (item->opal_tree_container->comp(item, key) == 0) ? + result = (item->opal_tree_container->comp(item, key) == 0) ? item : NULL; if (!result && (first_child = opal_tree_get_first_child(item))) { /* search descendants for match */ @@ -675,7 +675,7 @@ static opal_tree_item_t *find_in_descendants(opal_tree_item_t* item, void *key) opal_tree_item_t *opal_tree_find_with(opal_tree_item_t *item, void *key) { opal_tree_item_t *curr_item = item, *result = NULL; - + if (!opal_tree_is_empty(item->opal_tree_container)) { /* check my descendant for a match */ result = find_in_descendants(opal_tree_get_first_child(item), key); @@ -686,12 +686,12 @@ opal_tree_item_t *opal_tree_find_with(opal_tree_item_t *item, void *key) result = find_in_descendants(curr_item, key); } } - + /* check my ancestors (uncles) for match */ curr_item = item; while (!result && curr_item && curr_item->opal_tree_num_ancestors > 0){ curr_item = opal_tree_get_next_sibling(item->opal_tree_parent); - while (NULL == curr_item && + while (NULL == curr_item && item->opal_tree_parent->opal_tree_num_ancestors > 0) { item = item->opal_tree_parent; curr_item = opal_tree_get_next_sibling(item->opal_tree_parent); @@ -701,7 +701,7 @@ opal_tree_item_t *opal_tree_find_with(opal_tree_item_t *item, void *key) result = find_in_descendants(curr_item, key); } } - } + } return(result); } diff --git a/opal/class/opal_tree.h b/opal/class/opal_tree.h index 870b729609c..e1a42a33559 100644 --- a/opal/class/opal_tree.h +++ b/opal/class/opal_tree.h @@ -3,13 +3,13 @@ * Copyright (c) 2011 Oak Ridge National Labs. All rights reserved. * * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ /** - * @file + * @file * * The opal_tree_t interface is used to provide a generic * tree list container for Open MPI. It was inspired by the opal_list_t @@ -19,14 +19,14 @@ * The general idea is a user creates an class instance that has two * components. A tree structure component as defined by opal_tree_item_t * that links all the items together to form the tree. Then there is - * a user specific data component which the user defines what is stored at + * a user specific data component which the user defines what is stored at * each item. When a user create a type to be used for a OBJ_CLASS_INSTANCE * it will contain the opal_tree_item_t followed by any user specific * data. Then the opal_tree_item_t objects can be put in an * opal_tree_t. Hence, you create a new type that derives from * opal_tree_item_t; this new type can then be used with opal_tree_t * containers. - * + * * NOTE: opal_tree_item_t instances can only be on \em one tree at a * time. Specifically, if you add an opal_tree_item_t to one tree, * and then add it to another tree (without first removing it from the @@ -87,7 +87,7 @@ OPAL_DECLSPEC OBJ_CLASS_DECLARATION(opal_tree_item_t); /** * \internal - * + * * Struct of an opal_tree_item_t */ typedef struct opal_tree_item_t @@ -143,8 +143,8 @@ typedef struct opal_tree_item_t typedef int (*opal_tree_comp_fn_t)(opal_tree_item_t *item, void *key); /** - * The serialize function typedef. This function is called by the - * opal tree serialize code to serialize a tree item's user specific + * The serialize function typedef. This function is called by the + * opal tree serialize code to serialize a tree item's user specific * data of a class type. * * @params item - item to serialize the user specific data from @@ -152,12 +152,12 @@ typedef int (*opal_tree_comp_fn_t)(opal_tree_item_t *item, void *key); * * @returns OPAL_SUCCESS - when successfully serialized item */ -typedef int (*opal_tree_item_serialize_fn_t)(opal_tree_item_t *item, +typedef int (*opal_tree_item_serialize_fn_t)(opal_tree_item_t *item, opal_buffer_t *buffer); /** * The deserialize function typedef. This function is called by the - * opal tree deserialize code to deserialize a tree item's user + * opal tree deserialize code to deserialize a tree item's user * specific data. * * @params buffer - the opal_buffer_t to deserialized data. @@ -223,7 +223,7 @@ static inline opal_tree_item_t *opal_tree_get_parent(opal_tree_item_t *item) * * This function is safe to be called with a null item pointer. */ -static inline opal_tree_item_t *opal_tree_get_next_sibling(opal_tree_item_t +static inline opal_tree_item_t *opal_tree_get_next_sibling(opal_tree_item_t *item) { return ((item) ? item->opal_tree_next_sibling : NULL); @@ -255,7 +255,7 @@ static inline opal_tree_item_t *opal_tree_get_prev_sibling(opal_tree_item_t * This function is safe to be called with a null item pointer. * */ -static inline opal_tree_item_t *opal_tree_get_first_child(opal_tree_item_t +static inline opal_tree_item_t *opal_tree_get_first_child(opal_tree_item_t *item) { return ((item) ? item->opal_tree_first_child : NULL); @@ -271,7 +271,7 @@ static inline opal_tree_item_t *opal_tree_get_first_child(opal_tree_item_t * This function is safe to be called with a null item pointer. * */ -static inline opal_tree_item_t *opal_tree_get_last_child(opal_tree_item_t +static inline opal_tree_item_t *opal_tree_get_last_child(opal_tree_item_t *item) { return ((item) ? item->opal_tree_last_child : NULL); @@ -308,7 +308,7 @@ static inline bool opal_tree_is_empty(opal_tree_t* tree) * * @returns A pointer to the first item in the tree * - * This is an O(1) operation to return the first item in the tree. + * This is an O(1) operation to return the first item in the tree. * * This is an inlined function in compilers that support inlining, so * it's usually a cheap operation. @@ -336,15 +336,15 @@ static inline opal_tree_item_t* opal_tree_get_root(opal_tree_t* tree) * * @returns The size of the tree (size_t) * - * This is an O(1) (in non-debug mode) lookup to return the - * size of the list. + * This is an O(1) (in non-debug mode) lookup to return the + * size of the list. */ OPAL_DECLSPEC size_t opal_tree_get_size(opal_tree_t* tree); /* Functions to manage the tree */ /** - * Initialize tree container; must be called before using + * Initialize tree container; must be called before using * the tree. * * @param tree The tree to initialize @@ -353,12 +353,12 @@ OPAL_DECLSPEC size_t opal_tree_get_size(opal_tree_t* tree); * @param deserialize De-serialization function to attach to tree. * */ -OPAL_DECLSPEC void opal_tree_init(opal_tree_t *tree, - opal_tree_comp_fn_t comp, +OPAL_DECLSPEC void opal_tree_init(opal_tree_t *tree, + opal_tree_comp_fn_t comp, opal_tree_item_serialize_fn_t serialize, opal_tree_item_deserialize_fn_t deserialize, opal_tree_get_key_fn_t get_key); - + /** * Add new item as child to its parent item * @@ -367,7 +367,7 @@ OPAL_DECLSPEC void opal_tree_init(opal_tree_t *tree, * * The new_item is added at the end of the child list of the parent_item. */ -OPAL_DECLSPEC void opal_tree_add_child(opal_tree_item_t *parent_item, +OPAL_DECLSPEC void opal_tree_add_child(opal_tree_item_t *parent_item, opal_tree_item_t *new_item); /** @@ -382,9 +382,9 @@ OPAL_DECLSPEC void opal_tree_add_child(opal_tree_item_t *parent_item, * item and all children below it will be removed from the tree. This * means the item's siblings pointers and potentially the parents first * and last pointers will be updated to skip over the item. The tree container - * will also have its num_items adjusted to reflect the number of items - * that were removed. The tree item (and all children below it) that is - * returned is now "owned" by the caller -- they are responsible for + * will also have its num_items adjusted to reflect the number of items + * that were removed. The tree item (and all children below it) that is + * returned is now "owned" by the caller -- they are responsible for * OBJ_RELEASE()'ing it. * * With ENABLE_DEBUG on this routine will validate whether the item is actually @@ -407,28 +407,28 @@ OPAL_DECLSPEC int opal_tree_remove_item(opal_tree_t *tree, * Serialize tree data * * @param start_item The item of a tree to start serializing data - * @param buffer The opal buffer that contains the serialized + * @param buffer The opal buffer that contains the serialized * data stream of the tree * * @returns OPAL_SUCCESS if data has been successfully converted. * * This routine walks the tree starting at start_item until it has serialized * all children items of start_item and creates a bytestream of data, - * using the opal_dss.pack routine, that can be sent over a network. - * The format of the bytestream represents the tree parent/child relationship + * using the opal_dss.pack routine, that can be sent over a network. + * The format of the bytestream represents the tree parent/child relationship * of each item in the tree plus the data inside the tree. This routine calls - * the tree's serialization method to serialize the user specific data for + * the tree's serialization method to serialize the user specific data for * each item. * */ -OPAL_DECLSPEC int opal_tree_serialize(opal_tree_item_t *start_item, +OPAL_DECLSPEC int opal_tree_serialize(opal_tree_item_t *start_item, opal_buffer_t *buffer); /** * De-serialize tree data * * @param buffer The opal buffer that is to be deserialized - * @param start_item The item in the tree the data should be + * @param start_item The item in the tree the data should be * deserialized into * * @returns Status of call OPAL_SUCCESS if everything worked @@ -437,7 +437,7 @@ OPAL_DECLSPEC int opal_tree_serialize(opal_tree_item_t *start_item, * opal_tree_serialize() function and deserializes it into the * tree given. If the tree already has data in it, this routine * will start adding the new data as a new child of the root - * item. This routine calls the tree's de-serialization + * item. This routine calls the tree's de-serialization * method to deserialize the user specific data for each item. * */ @@ -512,8 +512,8 @@ OPAL_DECLSPEC int opal_tree_compare(opal_tree_t *left, opal_tree_t *right); * @param key the key we are wanting to match with * * @returns A pointer to the next item that in the tree (starting from item) - * that matches the key based on a depth first search of the tree. A null - * pointer is returned if we've reached the end of the tree and have not + * that matches the key based on a depth first search of the tree. A null + * pointer is returned if we've reached the end of the tree and have not * matched the key. * * This routine uses the tree container's comp function to determine the @@ -523,7 +523,7 @@ OPAL_DECLSPEC int opal_tree_compare(opal_tree_t *left, opal_tree_t *right); * and NULL pointer is always returned for this function. * */ -OPAL_DECLSPEC opal_tree_item_t *opal_tree_find_with(opal_tree_item_t *item, +OPAL_DECLSPEC opal_tree_item_t *opal_tree_find_with(opal_tree_item_t *item, void *key); END_C_DECLS diff --git a/opal/class/opal_value_array.c b/opal/class/opal_value_array.c index 7e95b96964a..a9615dc1cba 100644 --- a/opal/class/opal_value_array.c +++ b/opal/class/opal_value_array.c @@ -5,14 +5,14 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2007 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2007 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ diff --git a/opal/class/opal_value_array.h b/opal/class/opal_value_array.h index 2059cd44617..de59eb358af 100644 --- a/opal/class/opal_value_array.h +++ b/opal/class/opal_value_array.h @@ -5,14 +5,14 @@ * Copyright (c) 2004-2006 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -51,7 +51,7 @@ typedef struct opal_value_array_t opal_value_array_t; OPAL_DECLSPEC OBJ_CLASS_DECLARATION(opal_value_array_t); /** - * Initialize the array to hold items by value. This routine must + * Initialize the array to hold items by value. This routine must * be called prior to using the array. * * @param array The array to initialize (IN). @@ -66,7 +66,7 @@ OPAL_DECLSPEC OBJ_CLASS_DECLARATION(opal_value_array_t); static inline int opal_value_array_init(opal_value_array_t *array, size_t item_sizeof) { array->array_item_sizeof = item_sizeof; - array->array_alloc_size = 1; + array->array_alloc_size = 1; array->array_size = 0; array->array_items = (unsigned char*)realloc(array->array_items, item_sizeof * array->array_alloc_size); return (NULL != array->array_items) ? OPAL_SUCCESS : OPAL_ERR_OUT_OF_RESOURCE; @@ -121,15 +121,15 @@ static inline size_t opal_value_array_get_size(opal_value_array_t* array) * Note that resizing the array to a smaller size may not change * the underlying memory allocated by the array. However, setting * the size larger than the current allocation will grow it. In either - * case, if the routine is successful, opal_value_array_get_size() will + * case, if the routine is successful, opal_value_array_get_size() will * return the new size. */ OPAL_DECLSPEC int opal_value_array_set_size(opal_value_array_t* array, size_t size); -/** - * Macro to retrieve an item from the array by value. +/** + * Macro to retrieve an item from the array by value. * * @param array The input array (IN). * @param item_type The C datatype of the array item (IN). @@ -137,8 +137,8 @@ OPAL_DECLSPEC int opal_value_array_set_size(opal_value_array_t* array, size_t si * * @returns item The requested item. * - * Note that this does not change the size of the array - this macro is - * strictly for performance - the user assumes the responsibility of + * Note that this does not change the size of the array - this macro is + * strictly for performance - the user assumes the responsibility of * ensuring the array index is valid (0 <= item index < array size). */ @@ -164,7 +164,7 @@ static inline void* opal_value_array_get_item(opal_value_array_t *array, size_t return array->array_items + (item_index * array->array_item_sizeof); } -/** +/** * Macro to set an array element by value. * * @param array The input array (IN). @@ -172,8 +172,8 @@ static inline void* opal_value_array_get_item(opal_value_array_t *array, size_t * @param item_index The array index (IN). * @param item_value The new value for the specified index (IN). * - * Note that this does not change the size of the array - this macro is - * strictly for performance - the user assumes the responsibility of + * Note that this does not change the size of the array - this macro is + * strictly for performance - the user assumes the responsibility of * ensuring the array index is valid (0 <= item index < array size). * * It is safe to free the item after returning from this call; it is @@ -183,12 +183,12 @@ static inline void* opal_value_array_get_item(opal_value_array_t *array, size_t #define OPAL_VALUE_ARRAY_SET_ITEM(array, item_type, item_index, item_value) \ (((item_type*)((array)->array_items))[item_index] = item_value) -/** +/** * Set an array element by value. * * @param array The input array (IN). * @param item_index The array index (IN). - * @param item_value A pointer to the item, which is copied into + * @param item_value A pointer to the item, which is copied into * the array. * * @return OPAL error code. @@ -200,7 +200,7 @@ static inline void* opal_value_array_get_item(opal_value_array_t *array, size_t static inline int opal_value_array_set_item(opal_value_array_t *array, size_t item_index, const void* item) { int rc; - if(item_index >= array->array_size && + if(item_index >= array->array_size && (rc = opal_value_array_set_size(array, item_index+1)) != OPAL_SUCCESS) return rc; memcpy(array->array_items + (item_index * array->array_item_sizeof), item, array->array_item_sizeof); @@ -209,13 +209,13 @@ static inline int opal_value_array_set_item(opal_value_array_t *array, size_t it /** - * Appends an item to the end of the array. + * Appends an item to the end of the array. * * @param array The input array (IN). - * @param item A pointer to the item to append, which is copied + * @param item A pointer to the item to append, which is copied * into the array. * - * @return OPAL error code + * @return OPAL error code * * This will grow the array if it is not large enough to contain the * item. It is safe to free the item after returning from this call; @@ -229,7 +229,7 @@ static inline int opal_value_array_append_item(opal_value_array_t *array, const /** - * Remove a specific item from the array. + * Remove a specific item from the array. * * @param array The input array (IN). * @param item_index The index to remove, which must be less than @@ -247,8 +247,8 @@ static inline int opal_value_array_remove_item(opal_value_array_t *array, size_t opal_output(0, "opal_value_array_remove_item: invalid index %lu\n", (unsigned long)item_index); return OPAL_ERR_BAD_PARAM; } -#endif - memmove(array->array_items+(array->array_item_sizeof * item_index), +#endif + memmove(array->array_items+(array->array_item_sizeof * item_index), array->array_items+(array->array_item_sizeof * (item_index+1)), array->array_item_sizeof * (array->array_size - item_index - 1)); array->array_size--; @@ -257,7 +257,7 @@ static inline int opal_value_array_remove_item(opal_value_array_t *array, size_t /** * Get the base pointer of the underlying array. - * + * * @param array The input array (IN). * @param array_type The C datatype of the array (IN). * diff --git a/opal/datatype/Makefile.am b/opal/datatype/Makefile.am index 55b2d3c025d..6002a739f20 100644 --- a/opal/datatype/Makefile.am +++ b/opal/datatype/Makefile.am @@ -6,7 +6,7 @@ # Copyright (c) 2004-2010 The University of Tennessee and The University # of Tennessee Research Foundation. All rights # reserved. -# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, # University of Stuttgart. All rights reserved. # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. @@ -16,9 +16,9 @@ # Copyright (c) 2010 Cisco Systems, Inc. All rights reserved. # Copyright (c) 2011-2013 NVIDIA Corporation. All rights reserved. # $COPYRIGHT$ -# +# # Additional copyrights may follow -# +# # $HEADER$ # diff --git a/opal/datatype/opal_convertor.c b/opal/datatype/opal_convertor.c index 9a211ba845d..46aff829723 100644 --- a/opal/datatype/opal_convertor.c +++ b/opal/datatype/opal_convertor.c @@ -3,7 +3,7 @@ * Copyright (c) 2004-2006 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. - * Copyright (c) 2004-2014 The University of Tennessee and The University + * Copyright (c) 2004-2016 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. * Copyright (c) 2004-2006 High Performance Computing Center Stuttgart, @@ -12,7 +12,7 @@ * All rights reserved. * Copyright (c) 2009 Oak Ridge National Labs. All rights reserved. * Copyright (c) 2011 NVIDIA Corporation. All rights reserved. - * Copyright (c) 2013 Research Organization for Information Science + * Copyright (c) 2013-2016 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * @@ -25,10 +25,7 @@ #include #include - -#ifdef HAVE_STDINT_H #include -#endif #include "opal/prefetch.h" #include "opal/util/arch.h" @@ -214,7 +211,7 @@ opal_convertor_t* opal_convertor_create( int32_t remote_arch, int32_t mode ) assert( (CONVERTOR)->bConverted < (CONVERTOR)->local_size ); \ } while(0) -/** +/** * Return 0 if everything went OK and if there is still room before the complete * conversion of the data (need additional call with others input buffers ) * 1 if everything went fine and the data was completly converted @@ -452,16 +449,17 @@ int32_t opal_convertor_set_position_nocheck( opal_convertor_t* convertor, /** - * Compute the remote size. + * Compute the remote size. If necessary remove the homogeneous flag + * and redirect the convertor description toward the non-optimized + * datatype representation. */ -#if OPAL_ENABLE_HETEROGENEOUS_SUPPORT #define OPAL_CONVERTOR_COMPUTE_REMOTE_SIZE(convertor, datatype, bdt_mask) \ { \ if( OPAL_UNLIKELY(0 != (bdt_mask)) ) { \ opal_convertor_master_t* master; \ int i; \ uint32_t mask = datatype->bdt_used; \ - convertor->flags ^= CONVERTOR_HOMOGENEOUS; \ + convertor->flags &= (~CONVERTOR_HOMOGENEOUS); \ master = convertor->master; \ convertor->remote_size = 0; \ for( i = OPAL_DATATYPE_FIRST_TYPE; mask && (i < OPAL_DATATYPE_MAX_PREDEFINED); i++ ) { \ @@ -475,10 +473,6 @@ int32_t opal_convertor_set_position_nocheck( opal_convertor_t* convertor, convertor->use_desc = &(datatype->desc); \ } \ } -#else -#define OPAL_CONVERTOR_COMPUTE_REMOTE_SIZE(convertor, datatype, bdt_mask) \ - assert(0 == (bdt_mask)) -#endif /* OPAL_ENABLE_HETEROGENEOUS_SUPPORT */ /** * This macro will initialize a convertor based on a previously created @@ -511,16 +505,13 @@ int32_t opal_convertor_set_position_nocheck( opal_convertor_t* convertor, convertor->flags |= (CONVERTOR_NO_OP | CONVERTOR_HOMOGENEOUS); \ convertor->pDesc = (opal_datatype_t*)datatype; \ convertor->bConverted = 0; \ - /* By default consider the optimized description */ \ convertor->use_desc = &(datatype->opt_desc); \ \ convertor->remote_size = convertor->local_size; \ if( OPAL_LIKELY(convertor->remoteArch == opal_local_arch) ) { \ - if( (convertor->flags & (CONVERTOR_WITH_CHECKSUM | OPAL_DATATYPE_FLAG_NO_GAPS)) == OPAL_DATATYPE_FLAG_NO_GAPS ) { \ - return OPAL_SUCCESS; \ - } \ - if( ((convertor->flags & (CONVERTOR_WITH_CHECKSUM | OPAL_DATATYPE_FLAG_CONTIGUOUS)) \ - == OPAL_DATATYPE_FLAG_CONTIGUOUS) && (1 == count) ) { \ + if( !(convertor->flags & CONVERTOR_WITH_CHECKSUM) && \ + ((convertor->flags & OPAL_DATATYPE_FLAG_NO_GAPS) || \ + ((convertor->flags & OPAL_DATATYPE_FLAG_CONTIGUOUS) && (1 == count))) ) { \ return OPAL_SUCCESS; \ } \ } \ @@ -532,8 +523,9 @@ int32_t opal_convertor_set_position_nocheck( opal_convertor_t* convertor, /* For predefined datatypes (contiguous) do nothing more */ \ /* if checksum is enabled then always continue */ \ if( ((convertor->flags & (CONVERTOR_WITH_CHECKSUM | OPAL_DATATYPE_FLAG_NO_GAPS)) \ - == OPAL_DATATYPE_FLAG_NO_GAPS) && \ - (convertor->flags & (CONVERTOR_SEND | CONVERTOR_HOMOGENEOUS)) ) { \ + == OPAL_DATATYPE_FLAG_NO_GAPS) && \ + ((convertor->flags & (CONVERTOR_SEND | CONVERTOR_HOMOGENEOUS)) == \ + (CONVERTOR_SEND | CONVERTOR_HOMOGENEOUS)) ) { \ return OPAL_SUCCESS; \ } \ convertor->flags &= ~CONVERTOR_NO_OP; \ @@ -566,26 +558,24 @@ int32_t opal_convertor_prepare_for_recv( opal_convertor_t* convertor, OPAL_CONVERTOR_PREPARE( convertor, datatype, count, pUserBuf ); if( convertor->flags & CONVERTOR_WITH_CHECKSUM ) { -#if OPAL_ENABLE_HETEROGENEOUS_SUPPORT if( !(convertor->flags & CONVERTOR_HOMOGENEOUS) ) { convertor->fAdvance = opal_unpack_general_checksum; - } else -#endif - if( convertor->pDesc->flags & OPAL_DATATYPE_FLAG_CONTIGUOUS ) { - convertor->fAdvance = opal_unpack_homogeneous_contig_checksum; } else { - convertor->fAdvance = opal_generic_simple_unpack_checksum; + if( convertor->pDesc->flags & OPAL_DATATYPE_FLAG_CONTIGUOUS ) { + convertor->fAdvance = opal_unpack_homogeneous_contig_checksum; + } else { + convertor->fAdvance = opal_generic_simple_unpack_checksum; + } } } else { -#if OPAL_ENABLE_HETEROGENEOUS_SUPPORT if( !(convertor->flags & CONVERTOR_HOMOGENEOUS) ) { convertor->fAdvance = opal_unpack_general; - } else -#endif - if( convertor->pDesc->flags & OPAL_DATATYPE_FLAG_CONTIGUOUS ) { - convertor->fAdvance = opal_unpack_homogeneous_contig; } else { - convertor->fAdvance = opal_generic_simple_unpack; + if( convertor->pDesc->flags & OPAL_DATATYPE_FLAG_CONTIGUOUS ) { + convertor->fAdvance = opal_unpack_homogeneous_contig; + } else { + convertor->fAdvance = opal_generic_simple_unpack; + } } } return OPAL_SUCCESS; @@ -605,24 +595,32 @@ int32_t opal_convertor_prepare_for_send( opal_convertor_t* convertor, OPAL_CONVERTOR_PREPARE( convertor, datatype, count, pUserBuf ); if( convertor->flags & CONVERTOR_WITH_CHECKSUM ) { - if( datatype->flags & OPAL_DATATYPE_FLAG_CONTIGUOUS ) { - if( ((datatype->ub - datatype->lb) == (OPAL_PTRDIFF_TYPE)datatype->size) - || (1 >= convertor->count) ) - convertor->fAdvance = opal_pack_homogeneous_contig_checksum; - else - convertor->fAdvance = opal_pack_homogeneous_contig_with_gaps_checksum; + if( CONVERTOR_SEND_CONVERSION == (convertor->flags & (CONVERTOR_SEND_CONVERSION|CONVERTOR_HOMOGENEOUS)) ) { + convertor->fAdvance = opal_pack_general_checksum; } else { - convertor->fAdvance = opal_generic_simple_pack_checksum; + if( datatype->flags & OPAL_DATATYPE_FLAG_CONTIGUOUS ) { + if( ((datatype->ub - datatype->lb) == (OPAL_PTRDIFF_TYPE)datatype->size) + || (1 >= convertor->count) ) + convertor->fAdvance = opal_pack_homogeneous_contig_checksum; + else + convertor->fAdvance = opal_pack_homogeneous_contig_with_gaps_checksum; + } else { + convertor->fAdvance = opal_generic_simple_pack_checksum; + } } } else { - if( datatype->flags & OPAL_DATATYPE_FLAG_CONTIGUOUS ) { - if( ((datatype->ub - datatype->lb) == (OPAL_PTRDIFF_TYPE)datatype->size) - || (1 >= convertor->count) ) - convertor->fAdvance = opal_pack_homogeneous_contig; - else - convertor->fAdvance = opal_pack_homogeneous_contig_with_gaps; + if( CONVERTOR_SEND_CONVERSION == (convertor->flags & (CONVERTOR_SEND_CONVERSION|CONVERTOR_HOMOGENEOUS)) ) { + convertor->fAdvance = opal_pack_general; } else { - convertor->fAdvance = opal_generic_simple_pack; + if( datatype->flags & OPAL_DATATYPE_FLAG_CONTIGUOUS ) { + if( ((datatype->ub - datatype->lb) == (OPAL_PTRDIFF_TYPE)datatype->size) + || (1 >= convertor->count) ) + convertor->fAdvance = opal_pack_homogeneous_contig; + else + convertor->fAdvance = opal_pack_homogeneous_contig_with_gaps; + } else { + convertor->fAdvance = opal_generic_simple_pack; + } } } return OPAL_SUCCESS; @@ -678,15 +676,33 @@ int opal_convertor_clone( const opal_convertor_t* source, void opal_convertor_dump( opal_convertor_t* convertor ) { - printf( "Convertor %p count %d stack position %d bConverted %ld\n", (void*)convertor, - convertor->count, convertor->stack_pos, (unsigned long)convertor->bConverted ); - printf( "\tlocal_size %ld remote_size %ld flags %X stack_size %d pending_length %d\n", - (unsigned long)convertor->local_size, (unsigned long)convertor->remote_size, - convertor->flags, convertor->stack_size, convertor->partial_length ); + opal_output( 0, "Convertor %p count %d stack position %d bConverted %ld\n" + "\tlocal_size %ld remote_size %ld flags %X stack_size %d pending_length %d\n" + "\tremote_arch %u local_arch %u\n", + (void*)convertor, + convertor->count, convertor->stack_pos, (unsigned long)convertor->bConverted, + (unsigned long)convertor->local_size, (unsigned long)convertor->remote_size, + convertor->flags, convertor->stack_size, convertor->partial_length, + convertor->remoteArch, opal_local_arch ); + if( convertor->flags & CONVERTOR_RECV ) opal_output( 0, "unpack "); + if( convertor->flags & CONVERTOR_SEND ) opal_output( 0, "pack "); + if( convertor->flags & CONVERTOR_SEND_CONVERSION ) opal_output( 0, "conversion "); + if( convertor->flags & CONVERTOR_HOMOGENEOUS ) opal_output( 0, "homogeneous " ); + else opal_output( 0, "heterogeneous "); + if( convertor->flags & CONVERTOR_NO_OP ) opal_output( 0, "no_op "); + if( convertor->flags & CONVERTOR_WITH_CHECKSUM ) opal_output( 0, "checksum "); + if( convertor->flags & CONVERTOR_CUDA ) opal_output( 0, "CUDA "); + if( convertor->flags & CONVERTOR_CUDA_ASYNC ) opal_output( 0, "CUDA Async "); + if( convertor->flags & CONVERTOR_COMPLETED ) opal_output( 0, "COMPLETED "); + opal_datatype_dump( convertor->pDesc ); - printf( "Actual stack representation\n" ); - opal_datatype_dump_stack( convertor->pStack, convertor->stack_pos, - convertor->pDesc->desc.desc, convertor->pDesc->name ); + if( !((0 == convertor->stack_pos) && + ((size_t)convertor->pStack[convertor->stack_pos].index > convertor->pDesc->desc.length)) ) { + /* only if the convertor is completely initialized */ + opal_output( 0, "Actual stack representation\n" ); + opal_datatype_dump_stack( convertor->pStack, convertor->stack_pos, + convertor->pDesc->desc.desc, convertor->pDesc->name ); + } } diff --git a/opal/datatype/opal_convertor.h b/opal/datatype/opal_convertor.h index 0629b69aa2f..7c5de1af39b 100644 --- a/opal/datatype/opal_convertor.h +++ b/opal/datatype/opal_convertor.h @@ -175,9 +175,7 @@ static inline int opal_convertor_cleanup( opal_convertor_t* convertor ) */ static inline int32_t opal_convertor_need_buffers( const opal_convertor_t* pConvertor ) { -#if OPAL_ENABLE_HETEROGENEOUS_SUPPORT if (OPAL_UNLIKELY(0 == (pConvertor->flags & CONVERTOR_HOMOGENEOUS))) return 1; -#endif #if OPAL_CUDA_SUPPORT if( pConvertor->flags & (CONVERTOR_CUDA | CONVERTOR_CUDA_UNIFIED)) return 1; #endif @@ -235,7 +233,7 @@ OPAL_DECLSPEC int32_t opal_convertor_prepare_for_send( opal_convertor_t* convert int32_t count, const void* pUserBuf); -static inline int32_t opal_convertor_copy_and_prepare_for_send( const opal_convertor_t* pSrcConv, +static inline int32_t opal_convertor_copy_and_prepare_for_send( const opal_convertor_t* pSrcConv, const struct opal_datatype_t* datatype, int32_t count, const void* pUserBuf, @@ -256,7 +254,7 @@ OPAL_DECLSPEC int32_t opal_convertor_prepare_for_recv( opal_convertor_t* convert const struct opal_datatype_t* datatype, int32_t count, const void* pUserBuf ); -static inline int32_t opal_convertor_copy_and_prepare_for_recv( const opal_convertor_t* pSrcConv, +static inline int32_t opal_convertor_copy_and_prepare_for_recv( const opal_convertor_t* pSrcConv, const struct opal_datatype_t* datatype, int32_t count, const void* pUserBuf, diff --git a/opal/datatype/opal_convertor_raw.c b/opal/datatype/opal_convertor_raw.c index 9b75fc81492..b57d5aa1ded 100644 --- a/opal/datatype/opal_convertor_raw.c +++ b/opal/datatype/opal_convertor_raw.c @@ -34,7 +34,7 @@ * length we're working on are local. */ int32_t -opal_convertor_raw( opal_convertor_t* pConvertor, +opal_convertor_raw( opal_convertor_t* pConvertor, struct iovec* iov, uint32_t* iov_count, size_t* length ) { diff --git a/opal/datatype/opal_copy_functions_heterogeneous.c b/opal/datatype/opal_copy_functions_heterogeneous.c index 3cefc2cf147..956a1d46bcb 100644 --- a/opal/datatype/opal_copy_functions_heterogeneous.c +++ b/opal/datatype/opal_copy_functions_heterogeneous.c @@ -16,9 +16,7 @@ #include "opal_config.h" #include -#ifdef HAVE_STDINT_H #include -#endif #include "opal/util/arch.h" @@ -332,7 +330,7 @@ COPY_TYPE_HETEROGENEOUS( float12, long double ) #if SIZEOF_FLOAT == 16 COPY_TYPE_HETEROGENEOUS( float16, float ) -#elif SIZEOF_DOUBLE == 8 +#elif SIZEOF_DOUBLE == 16 COPY_TYPE_HETEROGENEOUS( float16, double ) #elif HAVE_LONG_DOUBLE && SIZEOF_LONG_DOUBLE == 16 COPY_TYPE_HETEROGENEOUS( float16, long double ) diff --git a/opal/datatype/opal_datatype.h b/opal/datatype/opal_datatype.h index cf00a690c56..25f014ead0d 100644 --- a/opal/datatype/opal_datatype.h +++ b/opal/datatype/opal_datatype.h @@ -3,7 +3,7 @@ * Copyright (c) 2004-2006 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. - * Copyright (c) 2004-2010 The University of Tennessee and The University + * Copyright (c) 2004-2015 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. * Copyright (c) 2004-2006 High Performance Computing Center Stuttgart, @@ -329,6 +329,25 @@ OPAL_DECLSPEC opal_datatype_t* opal_datatype_create_from_packed_description( void** packed_buffer, struct opal_proc_t* remote_processor ); +/* Compute the span in memory of count datatypes. This function help with temporary + * memory allocations for receiving already typed data (such as those used for reduce + * operations). This span is the distance between the minimum and the maximum byte + * in the memory layout of count datatypes, or in other terms the memory needed to + * allocate count times the datatype without the gap in the beginning and at the end. + * + * Returns: the memory span of count repetition of the datatype, and in the gap + * argument, the number of bytes of the gap at the beginning. + */ +static inline OPAL_PTRDIFF_TYPE +opal_datatype_span( const opal_datatype_t* pData, int64_t count, + OPAL_PTRDIFF_TYPE* gap) +{ + OPAL_PTRDIFF_TYPE extent = (pData->ub - pData->lb); + OPAL_PTRDIFF_TYPE true_extent = (pData->true_ub - pData->true_lb); + *gap = pData->true_lb; + return true_extent + (count - 1) * extent; +} + #if OPAL_ENABLE_DEBUG /* * Set a breakpoint to this function in your favorite debugger diff --git a/opal/datatype/opal_datatype_add.c b/opal/datatype/opal_datatype_add.c index d6a1458b65b..890f5503bbd 100644 --- a/opal/datatype/opal_datatype_add.c +++ b/opal/datatype/opal_datatype_add.c @@ -3,7 +3,7 @@ * Copyright (c) 2004-2006 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. - * Copyright (c) 2004-2009 The University of Tennessee and The University + * Copyright (c) 2004-2016 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. * Copyright (c) 2004-2006 High Performance Computing Center Stuttgart, @@ -278,27 +278,14 @@ int32_t opal_datatype_add( opal_datatype_t* pdtBase, const opal_datatype_t* pdtA */ if( (pdtAdd->flags & (OPAL_DATATYPE_FLAG_PREDEFINED | OPAL_DATATYPE_FLAG_DATA)) == (OPAL_DATATYPE_FLAG_PREDEFINED | OPAL_DATATYPE_FLAG_DATA) ) { pdtBase->btypes[pdtAdd->id] += count; + pLast->elem.common.type = pdtAdd->id; + pLast->elem.count = count; + pLast->elem.disp = disp; + pLast->elem.extent = extent; + pdtBase->desc.used++; + pLast->elem.common.flags = pdtAdd->flags & ~(OPAL_DATATYPE_FLAG_COMMITTED); if( (extent != (OPAL_PTRDIFF_TYPE)pdtAdd->size) && (count > 1) ) { /* gaps around the datatype */ - localFlags = pdtAdd->flags & ~(OPAL_DATATYPE_FLAG_COMMITTED | OPAL_DATATYPE_FLAG_CONTIGUOUS | OPAL_DATATYPE_FLAG_NO_GAPS); - CREATE_LOOP_START( pLast, count, 2, extent, localFlags ); - pLast++; - pLast->elem.common.type = pdtAdd->id; - pLast->elem.count = 1; - pLast->elem.disp = disp; - pLast->elem.extent = pdtAdd->size; - pLast->elem.common.flags = localFlags | OPAL_DATATYPE_FLAG_CONTIGUOUS; - pLast++; - CREATE_LOOP_END( pLast, 2, disp, pdtAdd->size, localFlags ); - pdtBase->desc.used += 3; - pdtBase->btypes[OPAL_DATATYPE_LOOP] = 1; - pdtBase->btypes[OPAL_DATATYPE_END_LOOP] = 1; - } else { - pLast->elem.common.type = pdtAdd->id; - pLast->elem.count = count; - pLast->elem.disp = disp; - pLast->elem.extent = extent; - pdtBase->desc.used++; - pLast->elem.common.flags = pdtAdd->flags & ~(OPAL_DATATYPE_FLAG_COMMITTED); + pLast->elem.common.flags &= ~(OPAL_DATATYPE_FLAG_CONTIGUOUS | OPAL_DATATYPE_FLAG_NO_GAPS); } } else { /* keep trace of the total number of basic datatypes in the datatype definition */ diff --git a/opal/datatype/opal_datatype_copy.c b/opal/datatype/opal_datatype_copy.c index f41233a794e..1e2c5f70000 100644 --- a/opal/datatype/opal_datatype_copy.c +++ b/opal/datatype/opal_datatype_copy.c @@ -13,6 +13,8 @@ * Copyright (c) 2009 Oak Ridge National Labs. All rights reserved. * Copyright (c) 2011 NVIDIA Corporation. All rights reserved. * Copyright (c) 2013 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -23,9 +25,6 @@ #include "opal_config.h" #include -#ifdef HAVE_ALLOCA_H -#include -#endif #include #include "opal/prefetch.h" @@ -94,7 +93,7 @@ static size_t opal_datatype_memop_block_size = 128 * 1024; } \ } while(0) #else -#define SET_CUDA_COPY_FCT(cuda_device_bufs, fct, copy_function) +#define SET_CUDA_COPY_FCT(cuda_device_bufs, fct, copy_function) #endif int32_t opal_datatype_copy_content_same_ddt( const opal_datatype_t* datatype, int32_t count, diff --git a/opal/datatype/opal_datatype_copy.h b/opal/datatype/opal_datatype_copy.h index 7519674765c..d4ed216a5d3 100644 --- a/opal/datatype/opal_datatype_copy.h +++ b/opal/datatype/opal_datatype_copy.h @@ -4,6 +4,8 @@ * of Tennessee Research Foundation. All rights * reserved. * Copyright (c) 2009 Oak Ridge National Labs. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -11,6 +13,10 @@ * $HEADER$ */ +#ifdef HAVE_ALLOCA_H +#include +#endif + #if !defined(MEM_OP_NAME) #error #endif /* !defined((MEM_OP_NAME) */ diff --git a/opal/datatype/opal_datatype_cuda.c b/opal/datatype/opal_datatype_cuda.c index d62af3fa8b9..71b60e60801 100644 --- a/opal/datatype/opal_datatype_cuda.c +++ b/opal/datatype/opal_datatype_cuda.c @@ -42,7 +42,7 @@ void opal_cuda_add_initialization_function(int (*fptr)(opal_common_cuda_function * for all future calls. */ void mca_cuda_convertor_init(opal_convertor_t* convertor, const void *pUserBuf) -{ +{ /* Only do the initialization on the first GPU access */ if (!initialized) { opal_cuda_support_init(); @@ -98,7 +98,7 @@ void *opal_cuda_memcpy(void *dest, const void *src, size_t size, opal_convertor_ if (!(convertor->flags & CONVERTOR_CUDA)) { return memcpy(dest, src, size); } - + if (convertor->flags & CONVERTOR_CUDA_ASYNC) { res = ftable.gpu_cu_memcpy_async(dest, (void *)src, size, convertor); } else { @@ -151,7 +151,7 @@ void *opal_cuda_memmove(void *dest, void *src, size_t size) /** * This function gets called once to check if the program is running in a cuda - * environment. + * environment. */ static void opal_cuda_support_init(void) { diff --git a/opal/datatype/opal_datatype_dump.c b/opal/datatype/opal_datatype_dump.c index c1b86cb4398..30575674196 100644 --- a/opal/datatype/opal_datatype_dump.c +++ b/opal/datatype/opal_datatype_dump.c @@ -89,8 +89,8 @@ int opal_datatype_dump_data_desc( dt_elem_desc_t* pDesc, int nbElems, char* ptr, (int)pDesc->end_loop.items, (long)pDesc->end_loop.first_elem_disp, (int)pDesc->end_loop.size ); else - index += snprintf( ptr + index, length - index, "count %d disp 0x%lx (%ld) extent %d (size %ld)\n", - (int)pDesc->elem.count, (long)pDesc->elem.disp, (long)pDesc->elem.disp, + index += snprintf( ptr + index, length - index, "count %d disp 0x%lx (%ld) blen %d extent %d (size %ld)\n", + (int)pDesc->elem.count, (long)pDesc->elem.disp, (long)pDesc->elem.disp, (int)pDesc->elem.blocklen, (int)pDesc->elem.extent, (long)(pDesc->elem.count * opal_datatype_basicDatatypes[pDesc->elem.common.type]->size) ); pDesc++; diff --git a/opal/datatype/opal_datatype_fake_stack.c b/opal/datatype/opal_datatype_fake_stack.c index 1cb436381bb..4f72b343672 100644 --- a/opal/datatype/opal_datatype_fake_stack.c +++ b/opal/datatype/opal_datatype_fake_stack.c @@ -176,7 +176,7 @@ int opal_convertor_create_stack_with_pos_general( opal_convertor_t* pConvertor, int32_t cnt = (int32_t)(resting_place / basic_type->size); loop_length += (cnt * basic_type->size); resting_place -= (cnt * basic_type->size); - PUSH_STACK( pStack, pConvertor->stack_pos, pos_desc, pElems->elem.common.type, + PUSH_STACK( pStack, pConvertor->stack_pos, pos_desc, pElems->elem.common.type, pElems->elem.count - cnt, pElems->elem.disp + cnt * pElems->elem.extent ); pConvertor->bConverted = starting_point - resting_place; diff --git a/opal/datatype/opal_datatype_internal.h b/opal/datatype/opal_datatype_internal.h index a113db7c348..cc1352776ac 100644 --- a/opal/datatype/opal_datatype_internal.h +++ b/opal/datatype/opal_datatype_internal.h @@ -26,12 +26,8 @@ #include "opal_config.h" -#ifdef HAVE_STDARG_H #include -#endif -#ifdef HAVE_STRING_H #include -#endif #if defined(VERBOSE) #include "opal/util/output.h" @@ -70,7 +66,7 @@ static inline void DUMP( char* fmt, ... ) # define __opal_attribute_unused_tmp__ __opal_attribute_unused__ # else # define __opal_attribute_unused_tmp__ -# endif +# endif static inline void DUMP( char* fmt __opal_attribute_unused_tmp__, ... ) { #if defined(__PGI) diff --git a/opal/datatype/opal_datatype_optimize.c b/opal/datatype/opal_datatype_optimize.c index b52719bcfc3..5b66e4df595 100644 --- a/opal/datatype/opal_datatype_optimize.c +++ b/opal/datatype/opal_datatype_optimize.c @@ -11,7 +11,9 @@ * Copyright (c) 2004-2006 The Regents of the University of California. * All rights reserved. * Copyright (c) 2009 Oak Ridge National Labs. All rights reserved. - * Copyright (c) 2014 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2014 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -22,9 +24,6 @@ #include "opal_config.h" #include -#ifdef HAVE_ALLOCA_H -#include -#endif #include #include "opal/datatype/opal_datatype.h" diff --git a/opal/datatype/opal_datatype_pack.c b/opal/datatype/opal_datatype_pack.c index 45f1213b811..c9fbbc761cd 100644 --- a/opal/datatype/opal_datatype_pack.c +++ b/opal/datatype/opal_datatype_pack.c @@ -3,7 +3,7 @@ * Copyright (c) 2004-2006 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. - * Copyright (c) 2004-2014 The University of Tennessee and The University + * Copyright (c) 2004-2016 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. * Copyright (c) 2004-2006 High Performance Computing Center Stuttgart, @@ -11,7 +11,7 @@ * Copyright (c) 2004-2006 The Regents of the University of California. * All rights reserved. * Copyright (c) 2009 Oak Ridge National Labs. All rights reserved. - * Copyright (c) 2013 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2013-2017 Cisco Systems, Inc. All rights reserved * $COPYRIGHT$ * * Additional copyrights may follow @@ -42,10 +42,12 @@ #define opal_pack_homogeneous_contig_function opal_pack_homogeneous_contig_checksum #define opal_pack_homogeneous_contig_with_gaps_function opal_pack_homogeneous_contig_with_gaps_checksum #define opal_generic_simple_pack_function opal_generic_simple_pack_checksum +#define opal_pack_general_function opal_pack_general_checksum #else #define opal_pack_homogeneous_contig_function opal_pack_homogeneous_contig #define opal_pack_homogeneous_contig_with_gaps_function opal_pack_homogeneous_contig_with_gaps #define opal_generic_simple_pack_function opal_generic_simple_pack +#define opal_pack_general_function opal_pack_general #endif /* defined(CHECKSUM) */ @@ -393,3 +395,200 @@ opal_generic_simple_pack_function( opal_convertor_t* pConvertor, pConvertor->stack_pos, pStack->index, (int)pStack->count, (long)pStack->disp ); ); return 0; } + +/* + * Remember that the first item in the stack (ie. position 0) is the number + * of times the datatype is involved in the operation (ie. the count argument + * in the MPI_ call). + */ +/* Convert data from multiple input buffers (as received from the network layer) + * to a contiguous output buffer with a predefined size. + * return OPAL_SUCCESS if everything went OK and if there is still room before the complete + * conversion of the data (need additional call with others input buffers ) + * 1 if everything went fine and the data was completly converted + * -1 something wrong occurs. + */ + +static inline void +pack_predefined_heterogeneous( opal_convertor_t* CONVERTOR, + const dt_elem_desc_t* ELEM, + uint32_t* COUNT, + unsigned char** SOURCE, + unsigned char** DESTINATION, + size_t* SPACE ) +{ + uint32_t _count = *(COUNT); + size_t _r_blength, _l_blength; + const ddt_elem_desc_t* _elem = &((ELEM)->elem); + unsigned char* _source = (*SOURCE) + _elem->disp; + const opal_convertor_master_t* master = (CONVERTOR)->master; + OPAL_PTRDIFF_TYPE advance; + + _r_blength = master->remote_sizes[_elem->common.type]; + _l_blength = opal_datatype_basicDatatypes[_elem->common.type]->size; + if( (_count * _r_blength) > *(SPACE) ) { + _count = (uint32_t)(*(SPACE) / _r_blength); + if( 0 == _count ) return; /* nothing to do */ + } + + OPAL_DATATYPE_SAFEGUARD_POINTER( _source, (_count * _elem->extent), (CONVERTOR)->pBaseBuf, + (CONVERTOR)->pDesc, (CONVERTOR)->count ); + DO_DEBUG( opal_output( 0, "pack [l %s r %s] memcpy( %p, %p, %lu ) => space %lu\n", + ((OPAL_PTRDIFF_TYPE)_l_blength == _elem->extent) ? "cont" : "----", + ((OPAL_PTRDIFF_TYPE)_r_blength == _elem->extent) ? "cont" : "----", + *(DESTINATION), _source, (unsigned long)_r_blength, + (unsigned long)(*(SPACE)) ); ); + master->pFunctions[_elem->common.type]( CONVERTOR, _count, + _source, *SPACE, _elem->extent, + *DESTINATION, *SPACE, _r_blength, + &advance ); + _r_blength *= _count; /* update the remote length to encompass all the elements */ + *(SOURCE) += _count * _elem->extent; + *(DESTINATION) += _r_blength; + *(SPACE) -= _r_blength; + *(COUNT) -= _count; +} + +int32_t +opal_pack_general_function( opal_convertor_t* pConvertor, + struct iovec* iov, uint32_t* out_size, + size_t* max_data ) +{ + dt_stack_t* pStack; /* pointer to the position on the stack */ + uint32_t pos_desc; /* actual position in the description of the derived datatype */ + uint32_t count_desc; /* the number of items already done in the actual pos_desc */ + size_t total_packed = 0; /* total amount packed this time */ + dt_elem_desc_t* description; + dt_elem_desc_t* pElem; + const opal_datatype_t *pData = pConvertor->pDesc; + unsigned char *conv_ptr, *iov_ptr; + size_t iov_len_local; + uint32_t iov_count; + int type; + + DO_DEBUG( opal_output( 0, "opal_convertor_general_pack( %p:%p, {%p, %lu}, %d )\n", + (void*)pConvertor, (void*)pConvertor->pBaseBuf, + iov[0].iov_base, (unsigned long)iov[0].iov_len, *out_size ); ); + + description = pConvertor->use_desc->desc; + + /* For the first step we have to add both displacement to the source. After in the + * main while loop we will set back the conv_ptr to the correct value. This is + * due to the fact that the convertor can stop in the middle of a data with a count + */ + pStack = pConvertor->pStack + pConvertor->stack_pos; + pos_desc = pStack->index; + conv_ptr = pConvertor->pBaseBuf + pStack->disp; + count_desc = (uint32_t)pStack->count; + pStack--; + pConvertor->stack_pos--; + pElem = &(description[pos_desc]); + + DO_DEBUG( opal_output( 0, "pack start pos_desc %d count_desc %d disp %ld\n" + "stack_pos %d pos_desc %d count_desc %d disp %ld\n", + pos_desc, count_desc, (long)(conv_ptr - pConvertor->pBaseBuf), + pConvertor->stack_pos, pStack->index, (int)pStack->count, (long)pStack->disp ); ); + + for( iov_count = 0; iov_count < (*out_size); iov_count++ ) { + iov_ptr = (unsigned char *) iov[iov_count].iov_base; + iov_len_local = iov[iov_count].iov_len; + while( 1 ) { + while( pElem->elem.common.flags & OPAL_DATATYPE_FLAG_DATA ) { + type = description[pos_desc].elem.common.type; + /* now here we have a basic datatype */ + DO_DEBUG( opal_output( 0, "pack (%p:%ld, %d, %ld) -> (%p, %ld) type %s\n", + pConvertor->pBaseBuf, conv_ptr + pElem->elem.disp - pConvertor->pBaseBuf, + count_desc, description[pos_desc].elem.extent, + iov_ptr, iov_len_local, + opal_datatype_basicDatatypes[type]->name ); ); + + pack_predefined_heterogeneous( pConvertor, pElem, &count_desc, + &conv_ptr, &iov_ptr, &iov_len_local); +#if 0 + PACK_PREDEFINED_DATATYPE( pConvertor, pElem, count_desc, + conv_ptr, iov_ptr, iov_len_local ); +#endif + if( 0 == count_desc ) { /* completed */ + conv_ptr = pConvertor->pBaseBuf + pStack->disp; + pos_desc++; /* advance to the next data */ + UPDATE_INTERNAL_COUNTERS( description, pos_desc, pElem, count_desc ); + continue; + } + goto complete_loop; + } + if( OPAL_DATATYPE_END_LOOP == pElem->elem.common.type ) { /* end of the current loop */ + DO_DEBUG( opal_output( 0, "pack end_loop count %d stack_pos %d" + " pos_desc %d disp %ld space %lu\n", + (int)pStack->count, pConvertor->stack_pos, + pos_desc, (long)pStack->disp, (unsigned long)iov_len_local ); ); + if( --(pStack->count) == 0 ) { /* end of loop */ + if( 0 == pConvertor->stack_pos ) { + /* we lie about the size of the next element in order to + * make sure we exit the main loop. + */ + *out_size = iov_count; + goto complete_loop; /* completed */ + } + pConvertor->stack_pos--; + pStack--; + pos_desc++; + } else { + pos_desc = pStack->index + 1; + if( pStack->index == -1 ) { + pStack->disp += (pData->ub - pData->lb); + } else { + assert( OPAL_DATATYPE_LOOP == description[pStack->index].loop.common.type ); + pStack->disp += description[pStack->index].loop.extent; + } + } + conv_ptr = pConvertor->pBaseBuf + pStack->disp; + UPDATE_INTERNAL_COUNTERS( description, pos_desc, pElem, count_desc ); + DO_DEBUG( opal_output( 0, "pack new_loop count %d stack_pos %d pos_desc %d count_desc %d disp %ld space %lu\n", + (int)pStack->count, pConvertor->stack_pos, pos_desc, + count_desc, (long)pStack->disp, (unsigned long)iov_len_local ); ); + } + if( OPAL_DATATYPE_LOOP == pElem->elem.common.type ) { + OPAL_PTRDIFF_TYPE local_disp = (OPAL_PTRDIFF_TYPE)conv_ptr; +#if 0 + if( pElem->loop.common.flags & OPAL_DATATYPE_FLAG_CONTIGUOUS ) { + PACK_CONTIGUOUS_LOOP( pConvertor, pElem, count_desc, + conv_ptr, iov_ptr, iov_len_local ); + if( 0 == count_desc ) { /* completed */ + pos_desc += pElem->loop.items + 1; + goto update_loop_description; + } + /* Save the stack with the correct last_count value. */ + } +#endif /* in a heterogeneous environment we can't handle the contiguous loops */ + local_disp = (OPAL_PTRDIFF_TYPE)conv_ptr - local_disp; + PUSH_STACK( pStack, pConvertor->stack_pos, pos_desc, OPAL_DATATYPE_LOOP, count_desc, + pStack->disp + local_disp); + pos_desc++; +#if 0 + // This label currently in another if 0'ed out block + update_loop_description: /* update the current state */ +#endif + conv_ptr = pConvertor->pBaseBuf + pStack->disp; + UPDATE_INTERNAL_COUNTERS( description, pos_desc, pElem, count_desc ); + DDT_DUMP_STACK( pConvertor->pStack, pConvertor->stack_pos, pElem, "advance loop" ); + continue; + } + } + complete_loop: + iov[iov_count].iov_len -= iov_len_local; /* update the amount of valid data */ + total_packed += iov[iov_count].iov_len; + } + *max_data = total_packed; + pConvertor->bConverted += total_packed; /* update the already converted bytes */ + *out_size = iov_count; + if( pConvertor->bConverted == pConvertor->local_size ) { + pConvertor->flags |= CONVERTOR_COMPLETED; + return 1; + } + /* Save the global position for the next round */ + PUSH_STACK( pStack, pConvertor->stack_pos, pos_desc, pElem->elem.common.type, count_desc, + conv_ptr - pConvertor->pBaseBuf ); + DO_DEBUG( opal_output( 0, "pack save stack stack_pos %d pos_desc %d count_desc %d disp %ld\n", + pConvertor->stack_pos, pStack->index, (int)pStack->count, (long)pStack->disp ); ); + return 0; +} diff --git a/opal/datatype/opal_datatype_position.c b/opal/datatype/opal_datatype_position.c index f187c6749df..f5e51b86f9d 100644 --- a/opal/datatype/opal_datatype_position.c +++ b/opal/datatype/opal_datatype_position.c @@ -12,8 +12,8 @@ * All rights reserved. * Copyright (c) 2009 Oak Ridge National Labs. All rights reserved. * Copyright (c) 2013 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2014 Research Organization for Information Science - * and Technology (RIST). All rights reserved. + * Copyright (c) 2014-2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -26,10 +26,6 @@ #include #include -#ifdef HAVE_ALLOCA_H -#include -#endif - #include "opal/datatype/opal_datatype.h" #include "opal/datatype/opal_convertor.h" #include "opal/datatype/opal_datatype_internal.h" diff --git a/opal/datatype/opal_datatype_prototypes.h b/opal/datatype/opal_datatype_prototypes.h index bcfb59b9b31..668397112b8 100644 --- a/opal/datatype/opal_datatype_prototypes.h +++ b/opal/datatype/opal_datatype_prototypes.h @@ -1,6 +1,6 @@ /* -*- Mode: C; c-basic-offset:4 ; -*- */ /* - * Copyright (c) 2004-2009 The University of Tennessee and The University + * Copyright (c) 2004-2016 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. * Copyright (c) 2009 Oak Ridge National Labs. All rights reserved. @@ -24,6 +24,14 @@ BEGIN_C_DECLS */ OPAL_DECLSPEC int32_t +opal_pack_general( opal_convertor_t* pConvertor, + struct iovec* iov, uint32_t* out_size, + size_t* max_data ); +OPAL_DECLSPEC int32_t +opal_pack_general_checksum( opal_convertor_t* pConvertor, + struct iovec* iov, uint32_t* out_size, + size_t* max_data ); +OPAL_DECLSPEC int32_t opal_unpack_general( opal_convertor_t* pConvertor, struct iovec* iov, uint32_t* out_size, size_t* max_data ); diff --git a/opal/datatype/opal_datatype_resize.c b/opal/datatype/opal_datatype_resize.c index 71347d0b5de..b239c675b02 100644 --- a/opal/datatype/opal_datatype_resize.c +++ b/opal/datatype/opal_datatype_resize.c @@ -4,6 +4,8 @@ * of Tennessee Research Foundation. All rights * reserved. * Copyright (c) 2009 Oak Ridge National Labs. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -21,9 +23,6 @@ int32_t opal_datatype_resize( opal_datatype_t* type, OPAL_PTRDIFF_TYPE lb, OPAL_ type->lb = lb; type->ub = lb + extent; - type->true_lb += lb; - type->true_ub += lb; - type->flags &= ~OPAL_DATATYPE_FLAG_NO_GAPS; if( (extent == (OPAL_PTRDIFF_TYPE)type->size) && (type->flags & OPAL_DATATYPE_FLAG_CONTIGUOUS) ) { diff --git a/opal/datatype/opal_datatype_unpack.c b/opal/datatype/opal_datatype_unpack.c index d43805ef215..e5c05e14e2d 100644 --- a/opal/datatype/opal_datatype_unpack.c +++ b/opal/datatype/opal_datatype_unpack.c @@ -379,7 +379,7 @@ opal_generic_simple_unpack_function( opal_convertor_t* pConvertor, if( OPAL_DATATYPE_LOOP == pElem->elem.common.type ) { OPAL_PTRDIFF_TYPE local_disp = (OPAL_PTRDIFF_TYPE)conv_ptr; if( pElem->loop.common.flags & OPAL_DATATYPE_FLAG_CONTIGUOUS ) { - UNPACK_CONTIGUOUS_LOOP( pConvertor, pElem, count_desc, + UNPACK_CONTIGUOUS_LOOP( pConvertor, pElem, count_desc, iov_ptr, conv_ptr, iov_len_local ); if( 0 == count_desc ) { /* completed */ pos_desc += pElem->loop.items + 1; diff --git a/opal/dss/Makefile.am b/opal/dss/Makefile.am index 0f34606d9c0..329e14ff706 100644 --- a/opal/dss/Makefile.am +++ b/opal/dss/Makefile.am @@ -10,7 +10,7 @@ # University of Stuttgart. All rights reserved. # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. -# Copyright (c) 2012 Los Alamos National Security, Inc. All rights reserved. +# Copyright (c) 2012 Los Alamos National Security, Inc. All rights reserved. # Copyright (c) 2014 Cisco Systems, Inc. All rights reserved. # $COPYRIGHT$ # diff --git a/opal/dss/dss.h b/opal/dss/dss.h index fc081f154c1..35e3589577d 100644 --- a/opal/dss/dss.h +++ b/opal/dss/dss.h @@ -41,7 +41,8 @@ OPAL_DECLSPEC int opal_value_load(opal_value_t *kv, void *data, opal_data_type_t type); OPAL_DECLSPEC int opal_value_unload(opal_value_t *kv, void **data, opal_data_type_t type); - +OPAL_DECLSPEC int opal_value_xfer(opal_value_t *dest, + opal_value_t *src); /** * Top-level interface function to pack one or more values into a * buffer. diff --git a/opal/dss/dss_compare.c b/opal/dss/dss_compare.c index 15d858d3988..0329d2b03ba 100644 --- a/opal/dss/dss_compare.c +++ b/opal/dss/dss_compare.c @@ -9,7 +9,7 @@ * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. - * Copyright (c) 2012 Los Alamos National Security, Inc. All rights reserved. + * Copyright (c) 2012 Los Alamos National Security, Inc. All rights reserved. * Copyright (c) 2014 Intel, Inc. All rights reserved. * Copyright (c) 2014 Research Organization for Information Science * and Technology (RIST). All rights reserved. diff --git a/opal/dss/dss_copy.c b/opal/dss/dss_copy.c index bb0d3b9efe3..f0d1544ba59 100644 --- a/opal/dss/dss_copy.c +++ b/opal/dss/dss_copy.c @@ -201,14 +201,14 @@ int opal_dss_copy_pstat(opal_pstats_t **dest, opal_pstats_t *src, opal_data_type_t type) { opal_pstats_t *p; - + /* create the new object */ *dest = OBJ_NEW(opal_pstats_t); if (NULL == *dest) { return OPAL_ERR_OUT_OF_RESOURCE; } p = *dest; - + /* copy the individual fields */ memcpy(p->node, src->node, sizeof(src->node)); p->rank = src->rank; @@ -223,7 +223,7 @@ int opal_dss_copy_pstat(opal_pstats_t **dest, opal_pstats_t *src, p->peak_vsize = src->peak_vsize; p->processor = src->processor; p->sample_time.tv_sec = src->sample_time.tv_sec; - p->sample_time.tv_usec = src->sample_time.tv_usec; + p->sample_time.tv_usec = src->sample_time.tv_usec; return OPAL_SUCCESS; } @@ -232,14 +232,14 @@ int opal_dss_copy_node_stat(opal_node_stats_t **dest, opal_node_stats_t *src, opal_data_type_t type) { opal_node_stats_t *p; - + /* create the new object */ *dest = OBJ_NEW(opal_node_stats_t); if (NULL == *dest) { return OPAL_ERR_OUT_OF_RESOURCE; } p = *dest; - + /* copy the individual fields */ p->la = src->la; p->la5 = src->la5; @@ -247,7 +247,7 @@ int opal_dss_copy_node_stat(opal_node_stats_t **dest, opal_node_stats_t *src, p->total_mem = src->total_mem; p->free_mem = src->free_mem; p->sample_time.tv_sec = src->sample_time.tv_sec; - p->sample_time.tv_usec = src->sample_time.tv_usec; + p->sample_time.tv_usec = src->sample_time.tv_usec; return OPAL_SUCCESS; } @@ -256,14 +256,14 @@ int opal_dss_copy_value(opal_value_t **dest, opal_value_t *src, opal_data_type_t type) { opal_value_t *p; - + /* create the new object */ *dest = OBJ_NEW(opal_value_t); if (NULL == *dest) { return OPAL_ERR_OUT_OF_RESOURCE; } p = *dest; - + /* copy the type and key */ if (NULL != src->key) { p->key = strdup(src->key); diff --git a/opal/dss/dss_internal.h b/opal/dss/dss_internal.h index 0c262bd8123..e9152f7199b 100644 --- a/opal/dss/dss_internal.h +++ b/opal/dss/dss_internal.h @@ -10,7 +10,7 @@ * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. - * Copyright (c) 2012 Los Alamos National Security, Inc. All rights reserved. + * Copyright (c) 2012 Los Alamos National Security, Inc. All rights reserved. * Copyright (c) 2014 Intel, Inc. All rights reserved. * Copyright (c) 2014 Research Organization for Information Science * and Technology (RIST). All rights reserved. @@ -36,12 +36,10 @@ #include "opal/dss/dss.h" #include "opal/util/proc.h" -#ifdef HAVE_STRING_H -# if !defined(STDC_HEADERS) && HAVE_MEMORY_H -# include -# endif -# include +#if !defined(STDC_HEADERS) && HAVE_MEMORY_H +# include #endif +#include BEGIN_C_DECLS @@ -50,7 +48,7 @@ BEGIN_C_DECLS */ #define OPAL_DSS_DEFAULT_INITIAL_SIZE 128 /* - * The default threshold size when we switch from doubling the + * The default threshold size when we switch from doubling the * buffer size to addatively increasing it */ #define OPAL_DSS_DEFAULT_THRESHOLD_SIZE 1024 @@ -155,7 +153,7 @@ BEGIN_C_DECLS ret = OPAL_ERR_NOT_FOUND; \ } \ } while (0) - + /* NOTE: do not need to deal with endianness here, as the unpacking of the underling sender-side type will do that for us. Repeat: the data in tmpbuf[] is already in host byte order. */ @@ -169,8 +167,8 @@ BEGIN_C_DECLS } \ free(tmpbuf); \ } while (0) - - + + /** * Internal struct used for holding registered dss functions */ @@ -258,10 +256,10 @@ void opal_dss_dump_data_types(int output); /* * Specialized functions */ -OPAL_DECLSPEC int opal_dss_pack_buffer(opal_buffer_t *buffer, const void *src, +OPAL_DECLSPEC int opal_dss_pack_buffer(opal_buffer_t *buffer, const void *src, int32_t num_vals, opal_data_type_t type); -OPAL_DECLSPEC int opal_dss_unpack_buffer(opal_buffer_t *buffer, void *dst, +OPAL_DECLSPEC int opal_dss_unpack_buffer(opal_buffer_t *buffer, void *dst, int32_t *num_vals, opal_data_type_t type); /* diff --git a/opal/dss/dss_internal_functions.c b/opal/dss/dss_internal_functions.c index d8e31a28505..09fef892597 100644 --- a/opal/dss/dss_internal_functions.c +++ b/opal/dss/dss_internal_functions.c @@ -5,14 +5,14 @@ * Copyright (c) 2004-2006 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -44,7 +44,7 @@ char* opal_dss_buffer_extend(opal_buffer_t *buffer, size_t bytes_to_add) required = buffer->bytes_used + bytes_to_add; if(required >= (size_t)opal_dss_threshold_size) { - to_alloc = ((required + opal_dss_threshold_size - 1) + to_alloc = ((required + opal_dss_threshold_size - 1) / opal_dss_threshold_size) * opal_dss_threshold_size; } else { to_alloc = buffer->bytes_allocated; @@ -53,7 +53,7 @@ char* opal_dss_buffer_extend(opal_buffer_t *buffer, size_t bytes_to_add) } while(to_alloc < required) { to_alloc <<= 1; - } + } } if (NULL != buffer->base_ptr) { @@ -67,14 +67,14 @@ char* opal_dss_buffer_extend(opal_buffer_t *buffer, size_t bytes_to_add) buffer->bytes_used = 0; buffer->base_ptr = (char*)malloc(to_alloc); } - - if (NULL == buffer->base_ptr) { - return NULL; + + if (NULL == buffer->base_ptr) { + return NULL; } buffer->pack_ptr = ((char*) buffer->base_ptr) + pack_offset; buffer->unpack_ptr = ((char*) buffer->base_ptr) + unpack_offset; buffer->bytes_allocated = to_alloc; - + /* All done */ return buffer->pack_ptr; @@ -87,13 +87,13 @@ char* opal_dss_buffer_extend(opal_buffer_t *buffer, size_t bytes_to_add) bool opal_dss_too_small(opal_buffer_t *buffer, size_t bytes_reqd) { size_t bytes_remaining_packed; - + if (buffer->pack_ptr < buffer->unpack_ptr) { return true; } bytes_remaining_packed = buffer->pack_ptr - buffer->unpack_ptr; - + if (bytes_remaining_packed < bytes_reqd) { /* don't error log this - it could be that someone is trying to * simply read until the buffer is empty @@ -109,11 +109,11 @@ int opal_dss_store_data_type(opal_buffer_t *buffer, opal_data_type_t type) opal_dss_type_info_t *info; /* Lookup the pack function for the actual opal_data_type type and call it */ - + if (NULL == (info = (opal_dss_type_info_t*)opal_pointer_array_get_item(&opal_dss_types, OPAL_DATA_TYPE_T))) { return OPAL_ERR_PACK_FAILURE; } - + return info->odti_pack_fn(buffer, &type, 1, OPAL_DATA_TYPE_T); } @@ -121,12 +121,12 @@ int opal_dss_get_data_type(opal_buffer_t *buffer, opal_data_type_t *type) { opal_dss_type_info_t *info; int32_t n=1; - + /* Lookup the unpack function for the actual opal_data_type type and call it */ - + if (NULL == (info = (opal_dss_type_info_t*)opal_pointer_array_get_item(&opal_dss_types, OPAL_DATA_TYPE_T))) { return OPAL_ERR_PACK_FAILURE; } - + return info->odti_unpack_fn(buffer, type, &n, OPAL_DATA_TYPE_T); } diff --git a/opal/dss/dss_load_unload.c b/opal/dss/dss_load_unload.c index 038aaae84d9..1b375997cc5 100644 --- a/opal/dss/dss_load_unload.c +++ b/opal/dss/dss_load_unload.c @@ -5,20 +5,20 @@ * Copyright (c) 2004-2006 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. - * Copyright (c) 2014 Intel, Inc. All rights reserved. + * Copyright (c) 2014-2015 Intel, Inc. All rights reserved. * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ - + /* * DSS Buffer Operations */ @@ -41,7 +41,7 @@ int opal_dss_unload(opal_buffer_t *buffer, void **payload, if (NULL == payload) { return OPAL_ERR_BAD_PARAM; } - + /* anything in the buffer - if not, nothing to do */ if (NULL == buffer->base_ptr || 0 == buffer->bytes_used) { *payload = NULL; @@ -49,14 +49,29 @@ int opal_dss_unload(opal_buffer_t *buffer, void **payload, return OPAL_SUCCESS; } + /* if nothing has been unpacked, we can pass the entire + * region back and protect it - no need to copy. This is + * an optimization */ + if (buffer->unpack_ptr == buffer->base_ptr) { + *payload = buffer->base_ptr; + *bytes_used = buffer->bytes_used; + buffer->base_ptr = NULL; + buffer->unpack_ptr = NULL; + buffer->pack_ptr = NULL; + buffer->bytes_used = 0; + return OPAL_SUCCESS; + } + /* okay, we have something to provide - pass it back */ - *payload = buffer->base_ptr; - *bytes_used = buffer->bytes_used; - - /* dereference everything in buffer */ - buffer->base_ptr = NULL; - buffer->pack_ptr = buffer->unpack_ptr = NULL; - buffer->bytes_allocated = buffer->bytes_used = 0; + *bytes_used = buffer->bytes_used - (buffer->unpack_ptr - buffer->base_ptr); + if (0 == (*bytes_used)) { + *payload = NULL; + } else { + /* we cannot just set the pointer as it might be + * partway in a malloc'd region */ + *payload = (void*)malloc(*bytes_used); + memcpy(*payload, buffer->unpack_ptr, *bytes_used); + } /* All done */ @@ -71,7 +86,7 @@ int opal_dss_load(opal_buffer_t *buffer, void *payload, if (NULL == buffer) { return OPAL_ERR_BAD_PARAM; } - + /* check if buffer already has payload - free it if so */ if (NULL != buffer->base_ptr) { free(buffer->base_ptr); @@ -88,10 +103,10 @@ int opal_dss_load(opal_buffer_t *buffer, void *payload, } /* populate the buffer */ - buffer->base_ptr = (char*)payload; + buffer->base_ptr = (char*)payload; /* set pack/unpack pointers */ - buffer->pack_ptr = ((char*)buffer->base_ptr) + bytes_used; + buffer->pack_ptr = ((char*)buffer->base_ptr) + bytes_used; buffer->unpack_ptr = buffer->base_ptr; /* set counts for size and space */ @@ -99,7 +114,7 @@ int opal_dss_load(opal_buffer_t *buffer, void *payload, /* All done */ - return OPAL_SUCCESS; + return OPAL_SUCCESS; } @@ -116,7 +131,7 @@ int opal_dss_copy_payload(opal_buffer_t *dest, opal_buffer_t *src) if (NULL == dest || NULL == src) { return OPAL_ERR_BAD_PARAM; } - + /* if the dest is already populated, check to ensure that both * source and dest are of the same buffer type */ @@ -125,12 +140,12 @@ int opal_dss_copy_payload(opal_buffer_t *dest, opal_buffer_t *src) return OPAL_ERR_BUFFER; } } - + /* either the dest was empty or the two types already match - * either way, just ensure the two types DO match */ dest->type = src->type; - + /* compute how much of the src buffer remains unpacked * buffer->bytes_used is the total number of bytes in the buffer that * have been packed. However, we may have already unpacked some of @@ -139,24 +154,24 @@ int opal_dss_copy_payload(opal_buffer_t *dest, opal_buffer_t *src) * beyond the unpack_ptr */ bytes_left = src->bytes_used - (src->unpack_ptr - src->base_ptr); - + /* if nothing is left, then nothing to do */ if (0 == bytes_left) { return OPAL_SUCCESS; } - + /* add room to the dest for the src buffer's payload */ if (NULL == (dst_ptr = opal_dss_buffer_extend(dest, bytes_left))) { return OPAL_ERR_OUT_OF_RESOURCE; } - + /* copy the src payload to the specified location in dest */ memcpy(dst_ptr, src->unpack_ptr, bytes_left); - + /* adjust the dest buffer's bookkeeping */ dest->bytes_used += bytes_left; dest->pack_ptr = ((char*)dest->pack_ptr) + bytes_left; - + return OPAL_SUCCESS; } @@ -363,3 +378,104 @@ int opal_value_unload(opal_value_t *kv, } return OPAL_SUCCESS; } + +int opal_value_xfer(opal_value_t *dest, + opal_value_t *src) +{ + opal_byte_object_t *boptr; + + if (NULL != src->key) { + dest->key = strdup(src->key); + } + dest->type = src->type; + + switch (src->type) { + case OPAL_BOOL: + dest->data.flag = src->data.flag; + break; + case OPAL_BYTE: + dest->data.byte = src->data.byte; + break; + case OPAL_STRING: + if (NULL != dest->data.string) { + free(dest->data.string); + } + if (NULL != src->data.string) { + dest->data.string = strdup(src->data.string); + } else { + dest->data.string = NULL; + } + break; + case OPAL_SIZE: + dest->data.size = src->data.size; + break; + case OPAL_PID: + dest->data.pid = src->data.pid; + break; + + case OPAL_INT: + dest->data.integer = src->data.integer; + break; + case OPAL_INT8: + dest->data.int8 = src->data.int8; + break; + case OPAL_INT16: + dest->data.int16 = src->data.int16; + break; + case OPAL_INT32: + dest->data.int32 = src->data.int32; + break; + case OPAL_INT64: + dest->data.int64 = src->data.int64; + break; + + case OPAL_UINT: + dest->data.uint = src->data.uint; + break; + case OPAL_UINT8: + dest->data.uint8 = src->data.uint8; + break; + case OPAL_UINT16: + dest->data.uint16 = src->data.uint16; + break; + case OPAL_UINT32: + dest->data.uint32 = src->data.uint32; + break; + case OPAL_UINT64: + dest->data.uint64 = src->data.uint64; + break; + + case OPAL_BYTE_OBJECT: + if (NULL != dest->data.bo.bytes) { + free(dest->data.bo.bytes); + } + boptr = &src->data.bo; + if (NULL != boptr && NULL != boptr->bytes && 0 < boptr->size) { + dest->data.bo.bytes = (uint8_t *) malloc(boptr->size); + memcpy(dest->data.bo.bytes, boptr->bytes, boptr->size); + dest->data.bo.size = boptr->size; + } else { + dest->data.bo.bytes = NULL; + dest->data.bo.size = 0; + } + break; + + case OPAL_FLOAT: + dest->data.fval = src->data.fval; + break; + + case OPAL_TIMEVAL: + dest->data.tv.tv_sec = src->data.tv.tv_sec; + dest->data.tv.tv_usec = src->data.tv.tv_usec; + break; + + case OPAL_PTR: + dest->data.ptr = src->data.ptr; + break; + + default: + OPAL_ERROR_LOG(OPAL_ERR_NOT_SUPPORTED); + return OPAL_ERR_NOT_SUPPORTED; + } + return OPAL_SUCCESS; +} diff --git a/opal/dss/dss_open_close.c b/opal/dss/dss_open_close.c index ebec81a7199..329402adb0c 100644 --- a/opal/dss/dss_open_close.c +++ b/opal/dss/dss_open_close.c @@ -10,7 +10,7 @@ * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. - * Copyright (c) 2012-2013 Los Alamos National Security, Inc. All rights reserved. + * Copyright (c) 2012-2013 Los Alamos National Security, Inc. All rights reserved. * Copyright (c) 2014 Intel, Inc. All rights reserved. * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. @@ -279,7 +279,7 @@ int opal_dss_register_vars (void) return ret; } - /* the threshold as to where to stop doubling the size of the buffer + /* the threshold as to where to stop doubling the size of the buffer * allocated memory and start doing additive increases */ opal_dss_threshold_size = OPAL_DSS_DEFAULT_THRESHOLD_SIZE; ret = mca_base_var_register ("opal", "dss", NULL, "buffer_threshold_size", NULL, @@ -577,7 +577,7 @@ int opal_dss_open(void) "OPAL_TIME", &tmp))) { return rc; } - + tmp = OPAL_NAME; if (OPAL_SUCCESS != (rc = opal_dss.register_type(opal_dss_pack_name, opal_dss_unpack_name, diff --git a/opal/dss/dss_pack.c b/opal/dss/dss_pack.c index 0d54a12d3e6..a68ad12930c 100644 --- a/opal/dss/dss_pack.c +++ b/opal/dss/dss_pack.c @@ -423,11 +423,11 @@ int opal_dss_pack_data_type(opal_buffer_t *buffer, const void *src, int32_t num_ opal_data_type_t type) { int ret; - + /* Turn around and pack the real type */ if (OPAL_SUCCESS != (ret = opal_dss_pack_buffer(buffer, src, num_vals, OPAL_DATA_TYPE_T))) { } - + return ret; } @@ -469,9 +469,9 @@ int opal_dss_pack_pstat(opal_buffer_t *buffer, const void *src, int32_t i; int ret; char *cptr; - + ptr = (opal_pstats_t **) src; - + for (i = 0; i < num_vals; ++i) { cptr = ptr[i]->node; if (OPAL_SUCCESS != (ret = opal_dss_pack_buffer(buffer, &cptr, 1, OPAL_STRING))) { @@ -622,7 +622,7 @@ int opal_dss_pack_node_stat(opal_buffer_t *buffer, const void *src, opal_netstats_t *ns; ptr = (opal_node_stats_t **) src; - + for (i = 0; i < num_vals; ++i) { if (OPAL_SUCCESS != (ret = opal_dss_pack_float(buffer, &ptr[i]->la, 1, OPAL_FLOAT))) { return ret; @@ -702,7 +702,7 @@ int opal_dss_pack_value(opal_buffer_t *buffer, const void *src, int ret; ptr = (opal_value_t **) src; - + for (i = 0; i < num_vals; ++i) { /* pack the key and type */ if (OPAL_SUCCESS != (ret = opal_dss_pack_string(buffer, &ptr[i]->key, 1, OPAL_STRING))) { @@ -1132,7 +1132,7 @@ int opal_dss_pack_buffer_contents(opal_buffer_t *buffer, const void *src, int ret; ptr = (opal_buffer_t **) src; - + for (i = 0; i < num_vals; ++i) { /* pack the number of bytes */ OPAL_OUTPUT((opal_dss_verbose, "opal_dss_pack_buffer_contents: bytes_used %u\n", (unsigned)ptr[i]->bytes_used)); diff --git a/opal/dss/dss_peek.c b/opal/dss/dss_peek.c index 78ec5f38cba..84561df397e 100644 --- a/opal/dss/dss_peek.c +++ b/opal/dss/dss_peek.c @@ -40,7 +40,7 @@ int opal_dss_peek(opal_buffer_t *buffer, opal_data_type_t *type, *num_vals = 0; return OPAL_ERR_UNPACK_READ_PAST_END_OF_BUFFER; } - + /* if this is NOT a fully described buffer, then that is as much as * we can do - there is no way we can tell the caller what type is * in the buffer since that info wasn't stored. diff --git a/opal/dss/dss_print.c b/opal/dss/dss_print.c index 432cc5dfb37..ece4572eec4 100644 --- a/opal/dss/dss_print.c +++ b/opal/dss/dss_print.c @@ -9,7 +9,7 @@ * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. - * Copyright (c) 2012 Los Alamos National Security, Inc. All rights reserved. + * Copyright (c) 2012 Los Alamos National Security, Inc. All rights reserved. * Copyright (c) 2014 Intel, Inc. All rights reserved. * Copyright (c) 2014 Research Organization for Information Science * and Technology (RIST). All rights reserved. @@ -643,7 +643,7 @@ int opal_dss_print_pstat(char **output, char *prefix, opal_pstats_t *src, opal_d /* deal with NULL prefix */ if (NULL == prefix) asprintf(&prefx, " "); else prefx = prefix; - + /* if src is NULL, just print data type and return */ if (NULL == src) { asprintf(output, "%sData type: OPAL_PSTATS\tValue: NULL pointer", prefx); @@ -674,7 +674,7 @@ int opal_dss_print_node_stat(char **output, char *prefix, opal_node_stats_t *src /* deal with NULL prefix */ if (NULL == prefix) asprintf(&prefx, " "); else prefx = prefix; - + /* if src is NULL, just print data type and return */ if (NULL == src) { asprintf(output, "%sData type: OPAL_NODE_STATS\tValue: NULL pointer", prefx); @@ -693,7 +693,7 @@ int opal_dss_print_node_stat(char **output, char *prefix, opal_node_stats_t *src if (prefx != prefix) { free(prefx); } - + return OPAL_SUCCESS; } @@ -708,7 +708,7 @@ int opal_dss_print_value(char **output, char *prefix, opal_value_t *src, opal_da /* deal with NULL prefix */ if (NULL == prefix) asprintf(&prefx, " "); else prefx = prefix; - + /* if src is NULL, just print data type and return */ if (NULL == src) { asprintf(output, "%sData type: OPAL_VALUE\tValue: NULL pointer", prefx); @@ -717,7 +717,7 @@ int opal_dss_print_value(char **output, char *prefix, opal_value_t *src, opal_da } return OPAL_SUCCESS; } - + switch (src->type) { case OPAL_BOOL: asprintf(output, "%sOPAL_VALUE: Data type: OPAL_BOOL\tKey: %s\tValue: %s", diff --git a/opal/dss/dss_register.c b/opal/dss/dss_register.c index d41ef168178..8e89bc573e7 100644 --- a/opal/dss/dss_register.c +++ b/opal/dss/dss_register.c @@ -9,7 +9,7 @@ * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. - * Copyright (c) 2012 Los Alamos National Security, Inc. All rights reserved. + * Copyright (c) 2012 Los Alamos National Security, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -74,6 +74,6 @@ int opal_dss_register(opal_dss_pack_fn_t pack_fn, info->odti_compare_fn = compare_fn; info->odti_print_fn = print_fn; info->odti_structured = structured; - + return opal_pointer_array_set_item(&opal_dss_types, *type, info); } diff --git a/opal/dss/dss_types.h b/opal/dss/dss_types.h index 4bf25bff970..c2612231f35 100644 --- a/opal/dss/dss_types.h +++ b/opal/dss/dss_types.h @@ -14,7 +14,7 @@ * Copyright (c) 2012-2013 Los Alamos National Security, Inc. All rights * reserved. * Copyright (c) 2014 Intel, Inc. All rights reserved. - * Copyright (c) 2014 Research Organization for Information Science + * Copyright (c) 2014-2016 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * @@ -47,6 +47,7 @@ typedef struct { opal_jobid_t jobid; opal_vpid_t vpid; } opal_process_name_t; +#define OPAL_SIZEOF_PROCESS_NAME_T 8 BEGIN_C_DECLS @@ -378,7 +379,7 @@ struct opal_buffer_t { /** Where the next data will be unpacked from (within the allocated memory starting as base_ptr) */ char *unpack_ptr; - + /** Number of bytes allocated (starting at base_ptr) */ size_t bytes_allocated; /** Number of bytes used by the buffer (i.e., amount of data -- diff --git a/opal/dss/dss_unpack.c b/opal/dss/dss_unpack.c index 7bd0a0bae4d..eb1a8f9869a 100644 --- a/opal/dss/dss_unpack.c +++ b/opal/dss/dss_unpack.c @@ -194,7 +194,7 @@ int opal_dss_unpack_int(opal_buffer_t *buffer, void *dest, /* slow path - types are different sizes */ UNPACK_SIZE_MISMATCH(int, remote_type, ret); } - + return ret; } @@ -227,7 +227,7 @@ int opal_dss_unpack_sizet(opal_buffer_t *buffer, void *dest, /* slow path - types are different sizes */ UNPACK_SIZE_MISMATCH(size_t, remote_type, ret); } - + return ret; } @@ -260,7 +260,7 @@ int opal_dss_unpack_pid(opal_buffer_t *buffer, void *dest, /* slow path - types are different sizes */ UNPACK_SIZE_MISMATCH(pid_t, remote_type, ret); } - + return ret; } @@ -588,10 +588,10 @@ int opal_dss_unpack_pstat(opal_buffer_t *buffer, void *dest, int32_t i, n, m; int ret; char *cptr; - + ptr = (opal_pstats_t **) dest; n = *num_vals; - + for (i = 0; i < n; ++i) { /* allocate the new object */ ptr[i] = OBJ_NEW(opal_pstats_t); @@ -668,7 +668,7 @@ int opal_dss_unpack_pstat(opal_buffer_t *buffer, void *dest, return ret; } } - + return OPAL_SUCCESS; } @@ -856,10 +856,10 @@ int opal_dss_unpack_node_stat(opal_buffer_t *buffer, void *dest, opal_node_stats_t **ptr; int32_t i, n, m; int ret; - + ptr = (opal_node_stats_t **) dest; n = *num_vals; - + for (i = 0; i < n; ++i) { /* allocate the new object */ ptr[i] = OBJ_NEW(opal_node_stats_t); @@ -937,7 +937,7 @@ int opal_dss_unpack_node_stat(opal_buffer_t *buffer, void *dest, return ret; } } - + return OPAL_SUCCESS; } @@ -953,7 +953,7 @@ int opal_dss_unpack_value(opal_buffer_t *buffer, void *dest, ptr = (opal_value_t **) dest; n = *num_vals; - + for (i = 0; i < n; ++i) { /* allocate the new object */ ptr[i] = OBJ_NEW(opal_value_t); @@ -1395,7 +1395,7 @@ int opal_dss_unpack_buffer_contents(opal_buffer_t *buffer, void *dest, ptr = (opal_buffer_t **) dest; n = *num_vals; - + for (i = 0; i < n; ++i) { /* allocate the new object */ ptr[i] = OBJ_NEW(opal_buffer_t); diff --git a/opal/errhandler/Makefile.am b/opal/errhandler/Makefile.am new file mode 100644 index 00000000000..b6e3eab4d5a --- /dev/null +++ b/opal/errhandler/Makefile.am @@ -0,0 +1,17 @@ +# -*- makefile -*- +# +# Copyright (c) 2015 Intel, Inc. All rights reserved. +# $COPYRIGHT$ +# +# Additional copyrights may follow +# +# $HEADER$ +# + +# This makefile.am does not stand on its own - it is included from opal/Makefile.am + +headers += \ + errhandler/opal_errhandler.h + +lib@OPAL_LIB_PREFIX@open_pal_la_SOURCES += \ + errhandler/opal_errhandler.c diff --git a/opal/errhandler/opal_errhandler.c b/opal/errhandler/opal_errhandler.c new file mode 100644 index 00000000000..fdd00d67674 --- /dev/null +++ b/opal/errhandler/opal_errhandler.c @@ -0,0 +1,34 @@ +/* + * Copyright (c) 2015 Intel, Inc. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include "opal_config.h" + +#include "opal/errhandler/opal_errhandler.h" + +opal_errhandler_fn_t errhandler = NULL; +void *cbdata = NULL; + +void opal_register_errhandler(opal_errhandler_fn_t newerr, void *cbd) +{ + errhandler = newerr; + cbdata = cbd; +} + +void opal_deregister_errhandler(void) +{ + errhandler = NULL; + cbdata = NULL; +} + +void opal_invoke_errhandler(int status, opal_proc_t *proc) +{ + if (NULL != errhandler) { + errhandler(status, proc, cbdata); + } +} diff --git a/opal/errhandler/opal_errhandler.h b/opal/errhandler/opal_errhandler.h new file mode 100644 index 00000000000..4a1646f52b8 --- /dev/null +++ b/opal/errhandler/opal_errhandler.h @@ -0,0 +1,25 @@ +/* + * Copyright (c) 2015 Intel, Inc. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#ifndef OPAL_ERRHANDLER_H +#define OPAL_ERRHANDLER_H + +#include "opal_config.h" + +#include "opal/util/proc.h" + +typedef void (*opal_errhandler_fn_t)(int status, opal_proc_t *proc, void *cbdata); + +OPAL_DECLSPEC void opal_register_errhandler(opal_errhandler_fn_t errhandler, void *cbdata); + +OPAL_DECLSPEC void opal_deregister_errhandler(void); + +OPAL_DECLSPEC void opal_invoke_errhandler(int status, opal_proc_t *proc); + +#endif diff --git a/opal/etc/Makefile.am b/opal/etc/Makefile.am index af4ce30a427..22c35bc73bd 100644 --- a/opal/etc/Makefile.am +++ b/opal/etc/Makefile.am @@ -5,15 +5,15 @@ # Copyright (c) 2004-2005 The University of Tennessee and The University # of Tennessee Research Foundation. All rights # reserved. -# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, # University of Stuttgart. All rights reserved. # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. # Copyright (c) 2008 Cisco Systems, Inc. All rights reserved. # $COPYRIGHT$ -# +# # Additional copyrights may follow -# +# # $HEADER$ # @@ -53,7 +53,7 @@ install-data-local: # Only remove if exactly the same as what in our tree -# NOTE TO READER: Bourne shell if ... fi evaluates the body if +# NOTE TO READER: Bourne shell if ... fi evaluates the body if # the return of the evaluted command is 0 (as opposed to non-zero # as used by everyone else) uninstall-local: diff --git a/opal/etc/openmpi-mca-params.conf b/opal/etc/openmpi-mca-params.conf index 7a1f92367aa..e4914804723 100644 --- a/opal/etc/openmpi-mca-params.conf +++ b/opal/etc/openmpi-mca-params.conf @@ -5,15 +5,15 @@ # Copyright (c) 2004-2005 The University of Tennessee and The University # of Tennessee Research Foundation. All rights # reserved. -# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, # University of Stuttgart. All rights reserved. # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. -# Copyright (c) 2006 Cisco Systems, Inc. All rights reserved. +# Copyright (c) 2006-2017 Cisco Systems, Inc. All rights reserved # $COPYRIGHT$ -# +# # Additional copyrights may follow -# +# # $HEADER$ # @@ -54,5 +54,5 @@ # Change component loading path # component_path = /usr/local/lib/openmpi:~/my_openmpi_components -# See "ompi_info --param all all" for a full listing of Open MPI MCA -# parameters available and their default values. +# See "ompi_info --param all all --level 9" for a full listing of Open +# MPI MCA parameters available and their default values. diff --git a/opal/include/Makefile.am b/opal/include/Makefile.am index 5d18694ba72..464faafd0ae 100644 --- a/opal/include/Makefile.am +++ b/opal/include/Makefile.am @@ -5,15 +5,15 @@ # Copyright (c) 2004-2005 The University of Tennessee and The University # of Tennessee Research Foundation. All rights # reserved. -# Copyright (c) 2004-2009 High Performance Computing Center Stuttgart, +# Copyright (c) 2004-2009 High Performance Computing Center Stuttgart, # University of Stuttgart. All rights reserved. # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. # Copyright (c) 2010-2011 Cisco Systems, Inc. All rights reserved. # $COPYRIGHT$ -# +# # Additional copyrights may follow -# +# # $HEADER$ # @@ -28,7 +28,7 @@ include opal/Makefile.am EXTRA_DIST = $(headers) -if WANT_INSTALL_HEADERS +if WANT_INSTALL_HEADERS opaldir = $(opalincludedir) nobase_dist_opal_HEADERS = $(headers) nobase_nodist_opal_HEADERS = $(nodist_headers) diff --git a/opal/include/opal/Makefile.am b/opal/include/opal/Makefile.am index 140c5ab7055..f0389a25815 100644 --- a/opal/include/opal/Makefile.am +++ b/opal/include/opal/Makefile.am @@ -5,15 +5,15 @@ # Copyright (c) 2004-2005 The University of Tennessee and The University # of Tennessee Research Foundation. All rights # reserved. -# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, # University of Stuttgart. All rights reserved. # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. # Copyright (c) 2007 Cisco Systems, Inc. All rights reserved. # $COPYRIGHT$ -# +# # Additional copyrights may follow -# +# # $HEADER$ # diff --git a/opal/include/opal/align.h b/opal/include/opal/align.h index c445b062176..ea68d33bc6c 100644 --- a/opal/include/opal/align.h +++ b/opal/include/opal/align.h @@ -5,7 +5,7 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. @@ -13,15 +13,17 @@ * Copyright (c) 2010 Oracle and/or its affiliates. All rights reserved. * * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ #ifndef OPAL_ALIGN_H #define OPAL_ALIGN_H +#define OPAL_DOWN_ALIGN(x,a,t) ((x) & ~(((t)(a)-1))) +#define OPAL_DOWN_ALIGN_PTR(x,a,t) ((t)OPAL_DOWN_ALIGN((uintptr_t)x, a, uintptr_t)) #define OPAL_ALIGN(x,a,t) (((x)+((t)(a)-1)) & ~(((t)(a)-1))) #define OPAL_ALIGN_PTR(x,a,t) ((t)OPAL_ALIGN((uintptr_t)x, a, uintptr_t)) #define OPAL_ALIGN_PAD_AMOUNT(x,s) ((~((uintptr_t)(x))+1) & ((uintptr_t)(s)-1)) diff --git a/opal/include/opal/constants.h b/opal/include/opal/constants.h index a5cf60331b2..82c046946f8 100644 --- a/opal/include/opal/constants.h +++ b/opal/include/opal/constants.h @@ -5,26 +5,26 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2010-2012 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2014 Intel, Inc. All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ #ifndef OPAL_CONSTANTS_H #define OPAL_CONSTANTS_H - -/* error codes - don't forget to update opal/rutime/opal_init.c when + +/* error codes - don't forget to update opal/rutime/opal_init.c when adding to this list */ #define OPAL_ERR_BASE 0 /* internal use only */ - + enum { OPAL_SUCCESS = (OPAL_ERR_BASE), @@ -79,7 +79,8 @@ enum { OPAL_ERR_CONNECTION_FAILED = (OPAL_ERR_BASE - 49), OPAL_ERR_AUTHENTICATION_FAILED = (OPAL_ERR_BASE - 50), OPAL_ERR_COMM_FAILURE = (OPAL_ERR_BASE - 51), - OPAL_ERR_SERVER_NOT_AVAIL = (OPAL_ERR_BASE - 52) + OPAL_ERR_SERVER_NOT_AVAIL = (OPAL_ERR_BASE - 52), + OPAL_ERR_IN_PROCESS = (OPAL_ERR_BASE - 53) }; #define OPAL_ERR_MAX (OPAL_ERR_BASE - 100) diff --git a/opal/include/opal/hash_string.h b/opal/include/opal/hash_string.h index b6b835ced52..c4af2153806 100644 --- a/opal/include/opal/hash_string.h +++ b/opal/include/opal/hash_string.h @@ -3,9 +3,9 @@ * of Tennessee Research Foundation. All rights * reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ diff --git a/opal/include/opal/opal_socket_errno.h b/opal/include/opal/opal_socket_errno.h index 54fb822a1ec..1e3f346a0cb 100644 --- a/opal/include/opal/opal_socket_errno.h +++ b/opal/include/opal/opal_socket_errno.h @@ -5,14 +5,14 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ #ifndef OPAL_GET_SOCKET_ERROR_H diff --git a/opal/include/opal/prefetch.h b/opal/include/opal/prefetch.h index 95ea8211a11..059b3a023f4 100644 --- a/opal/include/opal/prefetch.h +++ b/opal/include/opal/prefetch.h @@ -2,9 +2,9 @@ * Copyright (c) 2004-2006 The Regents of the University of California. * All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ diff --git a/opal/include/opal/sys/Makefile.am b/opal/include/opal/sys/Makefile.am index 6141e12ce3e..230abe81e79 100644 --- a/opal/include/opal/sys/Makefile.am +++ b/opal/include/opal/sys/Makefile.am @@ -5,16 +5,20 @@ # Copyright (c) 2004-2005 The University of Tennessee and The University # of Tennessee Research Foundation. All rights # reserved. -# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, # University of Stuttgart. All rights reserved. # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. # Copyright (c) 2010 Cisco Systems, Inc. All rights reserved. # Copyright (c) 2011 Sandia National Laboratories. All rights reserved. +# Copyright (c) 2016 Los Alamos National Security, LLC. All rights +# reserved. +# Copyright (c) 2017 Research Organization for Information Science +# and Technology (RIST). All rights reserved. # $COPYRIGHT$ -# +# # Additional copyrights may follow -# +# # $HEADER$ # @@ -27,9 +31,9 @@ headers += \ opal/sys/timer.h \ opal/sys/cma.h -include opal/sys/alpha/Makefile.am -include opal/sys/amd64/Makefile.am +include opal/sys/x86_64/Makefile.am include opal/sys/arm/Makefile.am +include opal/sys/arm64/Makefile.am include opal/sys/ia32/Makefile.am include opal/sys/ia64/Makefile.am include opal/sys/mips/Makefile.am @@ -37,3 +41,4 @@ include opal/sys/osx/Makefile.am include opal/sys/powerpc/Makefile.am include opal/sys/sparcv9/Makefile.am include opal/sys/sync_builtin/Makefile.am +include opal/sys/gcc_builtin/Makefile.am diff --git a/opal/include/opal/sys/alpha/Makefile.am b/opal/include/opal/sys/alpha/Makefile.am deleted file mode 100644 index 4457ad82a28..00000000000 --- a/opal/include/opal/sys/alpha/Makefile.am +++ /dev/null @@ -1,21 +0,0 @@ -# -# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana -# University Research and Technology -# Corporation. All rights reserved. -# Copyright (c) 2004-2005 The University of Tennessee and The University -# of Tennessee Research Foundation. All rights -# reserved. -# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, -# University of Stuttgart. All rights reserved. -# Copyright (c) 2004-2005 The Regents of the University of California. -# All rights reserved. -# $COPYRIGHT$ -# -# Additional copyrights may follow -# -# $HEADER$ -# - -# This makefile.am does not stand on its own - it is included from opal/include/Makefile.am - -headers += opal/sys/alpha/atomic.h diff --git a/opal/include/opal/sys/alpha/atomic.h b/opal/include/opal/sys/alpha/atomic.h deleted file mode 100644 index df1d788c1db..00000000000 --- a/opal/include/opal/sys/alpha/atomic.h +++ /dev/null @@ -1,169 +0,0 @@ -/* - * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2005 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#ifndef OPAL_SYS_ARCH_ATOMIC_H -#define OPAL_SYS_ARCH_ATOMIC_H 1 - -/* - * On alpha, everything is load-locked, store-conditional... - */ - -#define MB() __asm__ __volatile__ ("mb"); -#define RMB() __asm__ __volatile__ ("mb"); -#define WMB() __asm__ __volatile__ ("wmb"); - -/********************************************************************** - * - * Define constants for PowerPC 32 - * - *********************************************************************/ -#define OPAL_HAVE_ATOMIC_MEM_BARRIER 1 - -#define OPAL_HAVE_ATOMIC_CMPSET_32 1 - -#define OPAL_HAVE_ATOMIC_CMPSET_64 1 - - -/********************************************************************** - * - * Memory Barriers - * - *********************************************************************/ -#if OPAL_GCC_INLINE_ASSEMBLY - -static inline void opal_atomic_mb(void) -{ - MB(); -} - - -static inline void opal_atomic_rmb(void) -{ - RMB(); -} - - -static inline void opal_atomic_wmb(void) -{ - WMB(); -} - -#endif /* OPAL_GCC_INLINE_ASSEMBLY */ - - -/********************************************************************** - * - * Atomic math operations - * - *********************************************************************/ -#if OPAL_GCC_INLINE_ASSEMBLY - -static inline int opal_atomic_cmpset_32( volatile int32_t *addr, - int32_t oldval, int32_t newval) -{ - int32_t ret; - - __asm __volatile__ ( - "1: ldl_l %0, %1 \n\t" - "cmpeq %0, %2, %0 \n\t" - "beq %0, 2f \n\t" - "mov %3, %0 \n\t" - "stl_c %0, %1 \n\t" - "beq %0, 1b \n\t" - "jmp 3f \n" - "2: mov $31, %0 \n" - "3: \n" - : "=&r" (ret), "+m" (*addr) - : "r" (oldval), "r" (newval) - : "memory"); - - return ret; -} - - -static inline int opal_atomic_cmpset_acq_32(volatile int32_t *addr, - int32_t oldval, - int32_t newval) -{ - int rc; - - rc = opal_atomic_cmpset_32(addr, oldval, newval); - opal_atomic_rmb(); - - return rc; -} - - -static inline int opal_atomic_cmpset_rel_32(volatile int32_t *addr, - int32_t oldval, - int32_t newval) -{ - opal_atomic_wmb(); - return opal_atomic_cmpset_32(addr, oldval, newval); -} - - -static inline int opal_atomic_cmpset_64( volatile int64_t *addr, - int64_t oldval, int64_t newval) -{ - int32_t ret; - - __asm__ __volatile__ ( - "1: ldq_l %0, %1 \n\t" - "cmpeq %0, %2, %0 \n\t" - "beq %0, 2f \n\t" - "mov %3, %0 \n\t" - "stq_c %0, %1 \n\t" - "beq %0, 1b \n\t" - "jmp 3f \n" - "2: mov $31, %0 \n" - "3: \n" - : "=&r" (ret), "+m" (*addr) - : "r" (oldval), "r" (newval) - : "memory"); - - return ret; -} - - -static inline int opal_atomic_cmpset_acq_64(volatile int64_t *addr, - int64_t oldval, - int64_t newval) -{ - int rc; - - rc = opal_atomic_cmpset_64(addr, oldval, newval); - opal_atomic_rmb(); - - return rc; -} - - -static inline int opal_atomic_cmpset_rel_64(volatile int64_t *addr, - int64_t oldval, - int64_t newval) -{ - opal_atomic_wmb(); - return opal_atomic_cmpset_64(addr, oldval, newval); -} - - -#endif /* OPAL_GCC_INLINE_ASSEMBLY */ - - -#endif /* ! OPAL_SYS_ARCH_ATOMIC_H */ diff --git a/opal/include/opal/sys/alpha/update.sh b/opal/include/opal/sys/alpha/update.sh deleted file mode 100644 index 366d534549a..00000000000 --- a/opal/include/opal/sys/alpha/update.sh +++ /dev/null @@ -1,35 +0,0 @@ -#!/bin/sh -# -# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana -# University Research and Technology -# Corporation. All rights reserved. -# Copyright (c) 2004-2005 The University of Tennessee and The University -# of Tennessee Research Foundation. All rights -# reserved. -# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, -# University of Stuttgart. All rights reserved. -# Copyright (c) 2004-2005 The Regents of the University of California. -# All rights reserved. -# $COPYRIGHT$ -# -# Additional copyrights may follow -# -# $HEADER$ -# - -CFILE=/tmp/opal_atomic_$$.c - -trap "/bin/rm -f $CFILE; exit 0" 0 1 2 15 - -echo Updating asm.s from atomic.h using gcc - -cat > $CFILE< -#include -#define static -#define inline -#define OPAL_GCC_INLINE_ASSEMBLY 1 -#include "atomic.h" -EOF - -gcc -O3 -I. -S $CFILE -o asm.s diff --git a/opal/include/opal/sys/amd64/Makefile.am b/opal/include/opal/sys/amd64/Makefile.am deleted file mode 100644 index 08a222adab7..00000000000 --- a/opal/include/opal/sys/amd64/Makefile.am +++ /dev/null @@ -1,23 +0,0 @@ -# -# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana -# University Research and Technology -# Corporation. All rights reserved. -# Copyright (c) 2004-2005 The University of Tennessee and The University -# of Tennessee Research Foundation. All rights -# reserved. -# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, -# University of Stuttgart. All rights reserved. -# Copyright (c) 2004-2005 The Regents of the University of California. -# All rights reserved. -# $COPYRIGHT$ -# -# Additional copyrights may follow -# -# $HEADER$ -# - -# This makefile.am does not stand on its own - it is included from opal/include/Makefile.am - -headers += \ - opal/sys/amd64/atomic.h \ - opal/sys/amd64/timer.h diff --git a/opal/include/opal/sys/amd64/atomic.h b/opal/include/opal/sys/amd64/atomic.h deleted file mode 100644 index 6b5dc1b205d..00000000000 --- a/opal/include/opal/sys/amd64/atomic.h +++ /dev/null @@ -1,274 +0,0 @@ -/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ -/* - * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2010 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * Copyright (c) 2007 Sun Microsystems, Inc. All rights reserverd. - * Copyright (c) 2012-2014 Los Alamos National Security, LLC. All rights - * reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ -#ifndef OPAL_SYS_ARCH_ATOMIC_H -#define OPAL_SYS_ARCH_ATOMIC_H 1 - -/* - * On amd64, we use cmpxchg. - */ - - -#define SMPLOCK "lock; " -#define MB() __asm__ __volatile__("": : :"memory") - - -/********************************************************************** - * - * Define constants for AMD64 / x86_64 / EM64T / ... - * - *********************************************************************/ -#define OPAL_HAVE_ATOMIC_MEM_BARRIER 1 - -#define OPAL_HAVE_ATOMIC_CMPSET_32 1 - -#define OPAL_HAVE_ATOMIC_CMPSET_64 1 - -/********************************************************************** - * - * Memory Barriers - * - *********************************************************************/ -#if OPAL_GCC_INLINE_ASSEMBLY - -static inline void opal_atomic_mb(void) -{ - MB(); -} - - -static inline void opal_atomic_rmb(void) -{ - MB(); -} - - -static inline void opal_atomic_wmb(void) -{ - MB(); -} - -#endif /* OPAL_GCC_INLINE_ASSEMBLY */ - - -/********************************************************************** - * - * Atomic math operations - * - *********************************************************************/ -#if OPAL_GCC_INLINE_ASSEMBLY - -static inline int opal_atomic_cmpset_32( volatile int32_t *addr, - int32_t oldval, int32_t newval) -{ - unsigned char ret; - __asm__ __volatile__ ( - SMPLOCK "cmpxchgl %3,%2 \n\t" - "sete %0 \n\t" - : "=qm" (ret), "+a" (oldval), "+m" (*addr) - : "q"(newval) - : "memory", "cc"); - - return (int)ret; -} - -#endif /* OPAL_GCC_INLINE_ASSEMBLY */ - -#define opal_atomic_cmpset_acq_32 opal_atomic_cmpset_32 -#define opal_atomic_cmpset_rel_32 opal_atomic_cmpset_32 - -#if OPAL_GCC_INLINE_ASSEMBLY - -static inline int opal_atomic_cmpset_64( volatile int64_t *addr, - int64_t oldval, int64_t newval) -{ - unsigned char ret; - __asm__ __volatile__ ( - SMPLOCK "cmpxchgq %3,%2 \n\t" - "sete %0 \n\t" - : "=qm" (ret), "+a" (oldval), "+m" (*((volatile long*)addr)) - : "q"(newval) - : "memory", "cc" - ); - - return (int)ret; -} - -#endif /* OPAL_GCC_INLINE_ASSEMBLY */ - -#define opal_atomic_cmpset_acq_64 opal_atomic_cmpset_64 -#define opal_atomic_cmpset_rel_64 opal_atomic_cmpset_64 - -#if OPAL_GCC_INLINE_ASSEMBLY && OPAL_HAVE_CMPXCHG16B && HAVE_OPAL_INT128_T - -static inline int opal_atomic_cmpset_128 (volatile opal_int128_t *addr, opal_int128_t oldval, - opal_int128_t newval) -{ - unsigned char ret; - - /* cmpxchg16b compares the value at the address with eax:edx (low:high). if the values are - * the same the contents of ebx:ecx are stores at the address. in all cases the value stored - * at the address is returned in eax:edx. */ - __asm__ __volatile__ (SMPLOCK "cmpxchg16b (%%rsi) \n\t" - "sete %0 \n\t" - : "=qm" (ret) - : "S" (addr), "b" (((int64_t *)&newval)[0]), "c" (((int64_t *)&newval)[1]), - "a" (((int64_t *)&oldval)[0]), "d" (((int64_t *)&oldval)[1]) - : "memory", "cc"); - - return (int) ret; -} - -#define OPAL_HAVE_ATOMIC_CMPSET_128 1 - -#endif /* OPAL_GCC_INLINE_ASSEMBLY */ - - -#if OPAL_GCC_INLINE_ASSEMBLY - -#define OPAL_HAVE_ATOMIC_SWAP_32 1 - -#define OPAL_HAVE_ATOMIC_SWAP_64 1 - -static inline int32_t opal_atomic_swap_32( volatile int32_t *addr, - int32_t newval) -{ - int32_t oldval; - - __asm__ __volatile__("xchg %1, %0" : - "=r" (oldval), "=m" (*addr) : - "0" (newval), "m" (*addr) : - "memory"); - return oldval; -} - -#endif /* OPAL_GCC_INLINE_ASSEMBLY */ - -#if OPAL_GCC_INLINE_ASSEMBLY - -static inline int64_t opal_atomic_swap_64( volatile int64_t *addr, - int64_t newval) -{ - int64_t oldval; - - __asm__ __volatile__("xchgq %1, %0" : - "=r" (oldval) : - "m" (*addr), "0" (newval) : - "memory"); - return oldval; -} - -#endif /* OPAL_GCC_INLINE_ASSEMBLY */ - - - -#if OPAL_GCC_INLINE_ASSEMBLY - -#define OPAL_HAVE_ATOMIC_MATH_32 1 -#define OPAL_HAVE_ATOMIC_MATH_64 1 - -#define OPAL_HAVE_ATOMIC_ADD_32 1 - -/** - * atomic_add - add integer to atomic variable - * @i: integer value to add - * @v: pointer of type int - * - * Atomically adds @i to @v. - */ -static inline int32_t opal_atomic_add_32(volatile int32_t* v, int i) -{ - int ret = i; - __asm__ __volatile__( - SMPLOCK "xaddl %1,%0" - :"=m" (*v), "+r" (ret) - :"m" (*v) - :"memory", "cc" - ); - return (ret+i); -} - -#define OPAL_HAVE_ATOMIC_ADD_64 1 - -/** - * atomic_add - add integer to atomic variable - * @i: integer value to add - * @v: pointer of type int - * - * Atomically adds @i to @v. - */ -static inline int64_t opal_atomic_add_64(volatile int64_t* v, int64_t i) -{ - int64_t ret = i; - __asm__ __volatile__( - SMPLOCK "xaddq %1,%0" - :"=m" (*v), "+r" (ret) - :"m" (*v) - :"memory", "cc" - ); - return (ret+i); -} - -#define OPAL_HAVE_ATOMIC_SUB_32 1 - -/** - * atomic_sub - subtract the atomic variable - * @i: integer value to subtract - * @v: pointer of type int - * - * Atomically subtracts @i from @v. - */ -static inline int32_t opal_atomic_sub_32(volatile int32_t* v, int i) -{ - int ret = -i; - __asm__ __volatile__( - SMPLOCK "xaddl %1,%0" - :"=m" (*v), "+r" (ret) - :"m" (*v) - :"memory", "cc" - ); - return (ret-i); -} - -#define OPAL_HAVE_ATOMIC_SUB_64 1 - -/** - * atomic_sub - subtract the atomic variable - * @i: integer value to subtract - * @v: pointer of type int - * - * Atomically subtracts @i from @v. - */ -static inline int64_t opal_atomic_sub_64(volatile int64_t* v, int64_t i) -{ - int64_t ret = -i; - __asm__ __volatile__( - SMPLOCK "xaddq %1,%0" - :"=m" (*v), "+r" (ret) - :"m" (*v) - :"memory", "cc" - ); - return (ret-i); -} - -#endif /* OPAL_GCC_INLINE_ASSEMBLY */ - -#endif /* ! OPAL_SYS_ARCH_ATOMIC_H */ diff --git a/opal/include/opal/sys/amd64/timer.h b/opal/include/opal/sys/amd64/timer.h deleted file mode 100644 index 2014217f04e..00000000000 --- a/opal/include/opal/sys/amd64/timer.h +++ /dev/null @@ -1,69 +0,0 @@ -/* - * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2014 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#ifndef OPAL_SYS_ARCH_TIMER_H -#define OPAL_SYS_ARCH_TIMER_H 1 - - -typedef uint64_t opal_timer_t; - -/* Using RDTSC(P) results in non-monotonic timers across cores */ -#undef OPAL_TIMER_MONOTONIC -#define OPAL_TIMER_MONOTONIC 0 - -#if OPAL_GCC_INLINE_ASSEMBLY - -/** - * http://www.intel.com/content/www/us/en/intelligent-systems/embedded-systems-training/ia-32-ia-64-benchmark-code-execution-paper.html - */ -static inline opal_timer_t -opal_sys_timer_get_cycles(void) -{ - unsigned l, h; -#if !OPAL_ASSEMBLY_SUPPORTS_RDTSCP - __asm__ __volatile__ ("cpuid\n\t" - "rdtsc\n\t" - : "=a" (l), "=d" (h) - :: "rbx", "rcx"); -#else - /* If we need higher accuracy we should implement the algorithm proposed - * on the Intel document referenced above. However, in the context of MPI - * this function will be used as the backend for MPI_Wtime and as such - * can afford a small inaccuracy. - */ - __asm__ __volatile__ ("rdtscp\n\t" - "mov %%edx, %0\n\t" - "mov %%eax, %1\n\t" - "cpuid\n\t" - : "=r" (h), "=r" (l) - :: "rax", "rbx", "rcx", "rdx"); -#endif - return ((opal_timer_t)l) | (((opal_timer_t)h) << 32); -} - -#define OPAL_HAVE_SYS_TIMER_GET_CYCLES 1 - -#else - -opal_timer_t opal_sys_timer_get_cycles(void); - -#define OPAL_HAVE_SYS_TIMER_GET_CYCLES 1 - -#endif /* OPAL_GCC_INLINE_ASSEMBLY */ - -#endif /* ! OPAL_SYS_ARCH_TIMER_H */ diff --git a/opal/include/opal/sys/amd64/update.sh b/opal/include/opal/sys/amd64/update.sh deleted file mode 100644 index 9784b86f45d..00000000000 --- a/opal/include/opal/sys/amd64/update.sh +++ /dev/null @@ -1,36 +0,0 @@ -#!/bin/sh -# -# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana -# University Research and Technology -# Corporation. All rights reserved. -# Copyright (c) 2004-2005 The University of Tennessee and The University -# of Tennessee Research Foundation. All rights -# reserved. -# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, -# University of Stuttgart. All rights reserved. -# Copyright (c) 2004-2005 The Regents of the University of California. -# All rights reserved. -# $COPYRIGHT$ -# -# Additional copyrights may follow -# -# $HEADER$ -# - -CFILE=/tmp/opal_atomic_$$.c - -trap "/bin/rm -f $CFILE; exit 0" 0 1 2 15 - -echo Updating asm.s from atomic.h and timer.h using gcc - -cat > $CFILE< -#include -#define static -#define inline -#define OPAL_GCC_INLINE_ASSEMBLY 1 -#include "atomic.h" -#include "timer.h" -EOF - -gcc -O3 -I. -S $CFILE -o asm.s diff --git a/opal/include/opal/sys/architecture.h b/opal/include/opal/sys/architecture.h index 69a3a6b132a..6341fc354fb 100644 --- a/opal/include/opal/sys/architecture.h +++ b/opal/include/opal/sys/architecture.h @@ -5,16 +5,20 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2011 Sandia National Laboratories. All rights reserved. * Copyright (c) 2014 Intel, Inc. All rights reserved + * Copyright (c) 2016 Los Alamos National Security, LLC. All rights + * reserved. + * Copyright (c) 2017 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -29,8 +33,7 @@ #define OPAL_UNSUPPORTED 0000 #define OPAL_IA32 0010 #define OPAL_IA64 0020 -#define OPAL_AMD64 0030 -#define OPAL_ALPHA 0040 +#define OPAL_X86_64 0030 #define OPAL_POWERPC32 0050 #define OPAL_POWERPC64 0051 #define OPAL_SPARC 0060 @@ -38,9 +41,11 @@ #define OPAL_SPARCV9_64 0062 #define OPAL_MIPS 0070 #define OPAL_ARM 0100 +#define OPAL_ARM64 0101 #define OPAL_BUILTIN_SYNC 0200 #define OPAL_BUILTIN_OSX 0201 -#define OPAL_BUILTIN_NO 0202 +#define OPAL_BUILTIN_GCC 0202 +#define OPAL_BUILTIN_NO 0203 /* Formats */ #define OPAL_DEFAULT 1000 /* standard for given architecture */ diff --git a/opal/include/opal/sys/arm/Makefile.am b/opal/include/opal/sys/arm/Makefile.am index 68da5aca528..602cce94bed 100644 --- a/opal/include/opal/sys/arm/Makefile.am +++ b/opal/include/opal/sys/arm/Makefile.am @@ -5,14 +5,14 @@ # Copyright (c) 2004-2008 The University of Tennessee and The University # of Tennessee Research Foundation. All rights # reserved. -# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, # University of Stuttgart. All rights reserved. # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. # $COPYRIGHT$ -# +# # Additional copyrights may follow -# +# # $HEADER$ # diff --git a/opal/include/opal/sys/arm/atomic.h b/opal/include/opal/sys/arm/atomic.h index 210ef9dbea3..81f81ddd536 100644 --- a/opal/include/opal/sys/arm/atomic.h +++ b/opal/include/opal/sys/arm/atomic.h @@ -5,16 +5,16 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2010 IBM Corporation. All rights reserved. * Copyright (c) 2010 ARM ltd. All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ diff --git a/opal/include/opal/sys/arm/timer.h b/opal/include/opal/sys/arm/timer.h index 646f974a045..b93689c908d 100644 --- a/opal/include/opal/sys/arm/timer.h +++ b/opal/include/opal/sys/arm/timer.h @@ -3,9 +3,9 @@ * of Tennessee Research Foundation. All rights * reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ diff --git a/opal/include/opal/sys/arm/update.sh b/opal/include/opal/sys/arm/update.sh index cf42acc814d..94d8ed2714b 100644 --- a/opal/include/opal/sys/arm/update.sh +++ b/opal/include/opal/sys/arm/update.sh @@ -6,14 +6,14 @@ # Copyright (c) 2004-2005 The University of Tennessee and The University # of Tennessee Research Foundation. All rights # reserved. -# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, # University of Stuttgart. All rights reserved. # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. # $COPYRIGHT$ -# +# # Additional copyrights may follow -# +# # $HEADER$ # diff --git a/opal/include/opal/sys/arm64/Makefile.am b/opal/include/opal/sys/arm64/Makefile.am new file mode 100644 index 00000000000..33db6ecb014 --- /dev/null +++ b/opal/include/opal/sys/arm64/Makefile.am @@ -0,0 +1,24 @@ +# +# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana +# University Research and Technology +# Corporation. All rights reserved. +# Copyright (c) 2004-2008 The University of Tennessee and The University +# of Tennessee Research Foundation. All rights +# reserved. +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# University of Stuttgart. All rights reserved. +# Copyright (c) 2004-2005 The Regents of the University of California. +# All rights reserved. +# $COPYRIGHT$ +# +# Additional copyrights may follow +# +# $HEADER$ +# + +# This makefile.am does not stand on its own - it is included from opal/include/Makefile.am + +headers += \ + opal/sys/arm64/atomic.h \ + opal/sys/arm64/timer.h + diff --git a/opal/include/opal/sys/arm64/atomic.h b/opal/include/opal/sys/arm64/atomic.h new file mode 100644 index 00000000000..2f7f7d32aac --- /dev/null +++ b/opal/include/opal/sys/arm64/atomic.h @@ -0,0 +1,301 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ +/* + * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2005 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2010 IBM Corporation. All rights reserved. + * Copyright (c) 2010 ARM ltd. All rights reserved. + * Copyright (c) 2016 Los Alamos National Security, LLC. All rights + * reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#if !defined(OPAL_SYS_ARCH_ATOMIC_H) + +#define OPAL_SYS_ARCH_ATOMIC_H 1 + +#if OPAL_GCC_INLINE_ASSEMBLY + +#define OPAL_HAVE_ATOMIC_MEM_BARRIER 1 +#define OPAL_HAVE_ATOMIC_LLSC_32 1 +#define OPAL_HAVE_ATOMIC_CMPSET_32 1 +#define OPAL_HAVE_ATOMIC_SWAP_32 1 +#define OPAL_HAVE_ATOMIC_MATH_32 1 +#define OPAL_HAVE_ATOMIC_CMPSET_64 1 +#define OPAL_HAVE_ATOMIC_SWAP_64 1 +#define OPAL_HAVE_ATOMIC_LLSC_64 1 +#define OPAL_HAVE_ATOMIC_ADD_32 1 +#define OPAL_HAVE_ATOMIC_SUB_32 1 +#define OPAL_HAVE_ATOMIC_ADD_64 1 +#define OPAL_HAVE_ATOMIC_SUB_64 1 + +#define MB() __asm__ __volatile__ ("dmb sy" : : : "memory") +#define RMB() __asm__ __volatile__ ("dmb ld" : : : "memory") +#define WMB() __asm__ __volatile__ ("dmb st" : : : "memory") + +/********************************************************************** + * + * Memory Barriers + * + *********************************************************************/ + +static inline void opal_atomic_mb (void) +{ + MB(); +} + +static inline void opal_atomic_rmb (void) +{ + RMB(); +} + +static inline void opal_atomic_wmb (void) +{ + WMB(); +} + +static inline void opal_atomic_isync (void) +{ + __asm__ __volatile__ ("isb"); +} + +/********************************************************************** + * + * Atomic math operations + * + *********************************************************************/ + +static inline int opal_atomic_cmpset_32(volatile int32_t *addr, + int32_t oldval, int32_t newval) +{ + int32_t ret, tmp; + + __asm__ __volatile__ ("1: ldaxr %w0, [%2] \n" + " cmp %w0, %w3 \n" + " bne 2f \n" + " stxr %w1, %w4, [%2] \n" + " cbnz %w1, 1b \n" + "2: \n" + : "=&r" (ret), "=&r" (tmp) + : "r" (addr), "r" (oldval), "r" (newval) + : "cc", "memory"); + + return (ret == oldval); +} + +static inline int32_t opal_atomic_swap_32(volatile int32_t *addr, int32_t newval) +{ + int32_t ret, tmp; + + __asm__ __volatile__ ("1: ldaxr %w0, [%2] \n" + " stlxr %w1, %w3, [%2] \n" + " cbnz %w1, 1b \n" + : "=&r" (ret), "=&r" (tmp) + : "r" (addr), "r" (newval) + : "cc", "memory"); + + return ret; +} + +/* these two functions aren't inlined in the non-gcc case because then + there would be two function calls (since neither cmpset_32 nor + atomic_?mb can be inlined). Instead, we "inline" them by hand in + the assembly, meaning there is one function call overhead instead + of two */ +static inline int opal_atomic_cmpset_acq_32(volatile int32_t *addr, + int32_t oldval, int32_t newval) +{ + int32_t ret, tmp; + + __asm__ __volatile__ ("1: ldaxr %w0, [%2] \n" + " cmp %w0, %w3 \n" + " bne 2f \n" + " stxr %w1, %w4, [%2] \n" + " cbnz %w1, 1b \n" + "2: \n" + : "=&r" (ret), "=&r" (tmp) + : "r" (addr), "r" (oldval), "r" (newval) + : "cc", "memory"); + + return (ret == oldval); +} + + +static inline int opal_atomic_cmpset_rel_32(volatile int32_t *addr, + int32_t oldval, int32_t newval) +{ + int32_t ret, tmp; + + __asm__ __volatile__ ("1: ldxr %w0, [%2] \n" + " cmp %w0, %w3 \n" + " bne 2f \n" + " stlxr %w1, %w4, [%2] \n" + " cbnz %w1, 1b \n" + "2: \n" + : "=&r" (ret), "=&r" (tmp) + : "r" (addr), "r" (oldval), "r" (newval) + : "cc", "memory"); + + return (ret == oldval); +} + +static inline int32_t opal_atomic_ll_32 (volatile int32_t *addr) +{ + int32_t ret; + + __asm__ __volatile__ ("ldaxr %w0, [%1] \n" + : "=&r" (ret) + : "r" (addr)); + + return ret; +} + +static inline int opal_atomic_sc_32 (volatile int32_t *addr, int32_t newval) +{ + int ret; + + __asm__ __volatile__ ("stlxr %w0, %w2, [%1] \n" + : "=&r" (ret) + : "r" (addr), "r" (newval) + : "cc", "memory"); + + return ret == 0; +} + +static inline int opal_atomic_cmpset_64(volatile int64_t *addr, + int64_t oldval, int64_t newval) +{ + int64_t ret; + int tmp; + + __asm__ __volatile__ ("1: ldaxr %0, [%2] \n" + " cmp %0, %3 \n" + " bne 2f \n" + " stxr %w1, %4, [%2] \n" + " cbnz %w1, 1b \n" + "2: \n" + : "=&r" (ret), "=&r" (tmp) + : "r" (addr), "r" (oldval), "r" (newval) + : "cc", "memory"); + + return (ret == oldval); +} + +static inline int64_t opal_atomic_swap_64 (volatile int64_t *addr, int64_t newval) +{ + int64_t ret; + int tmp; + + __asm__ __volatile__ ("1: ldaxr %0, [%2] \n" + " stlxr %w1, %3, [%2] \n" + " cbnz %w1, 1b \n" + : "=&r" (ret), "=&r" (tmp) + : "r" (addr), "r" (newval) + : "cc", "memory"); + + return ret; +} + +/* these two functions aren't inlined in the non-gcc case because then + there would be two function calls (since neither cmpset_64 nor + atomic_?mb can be inlined). Instead, we "inline" them by hand in + the assembly, meaning there is one function call overhead instead + of two */ +static inline int opal_atomic_cmpset_acq_64(volatile int64_t *addr, + int64_t oldval, int64_t newval) +{ + int64_t ret; + int tmp; + + __asm__ __volatile__ ("1: ldaxr %0, [%2] \n" + " cmp %0, %3 \n" + " bne 2f \n" + " stxr %w1, %4, [%2] \n" + " cbnz %w1, 1b \n" + "2: \n" + : "=&r" (ret), "=&r" (tmp) + : "r" (addr), "r" (oldval), "r" (newval) + : "cc", "memory"); + + return (ret == oldval); +} + + +static inline int opal_atomic_cmpset_rel_64(volatile int64_t *addr, + int64_t oldval, int64_t newval) +{ + int64_t ret; + int tmp; + + __asm__ __volatile__ ("1: ldxr %0, [%2] \n" + " cmp %0, %3 \n" + " bne 2f \n" + " stlxr %w1, %4, [%2] \n" + " cbnz %w1, 1b \n" + "2: \n" + : "=&r" (ret), "=&r" (tmp) + : "r" (addr), "r" (oldval), "r" (newval) + : "cc", "memory"); + + return (ret == oldval); +} + +static inline int64_t opal_atomic_ll_64 (volatile int64_t *addr) +{ + int64_t ret; + + __asm__ __volatile__ ("ldaxr %0, [%1] \n" + : "=&r" (ret) + : "r" (addr)); + + return ret; +} + +static inline int opal_atomic_sc_64 (volatile int64_t *addr, int64_t newval) +{ + int ret; + + __asm__ __volatile__ ("stlxr %w0, %2, [%1] \n" + : "=&r" (ret) + : "r" (addr), "r" (newval) + : "cc", "memory"); + + return ret == 0; +} + +#define OPAL_ASM_MAKE_ATOMIC(type, bits, name, inst, reg) \ + static inline type opal_atomic_ ## name ## _ ## bits (volatile type *addr, type value) \ + { \ + type newval; \ + int32_t tmp; \ + \ + __asm__ __volatile__("1: ldxr %" reg "0, [%2] \n" \ + " " inst " %" reg "0, %" reg "0, %" reg "3 \n" \ + " stxr %w1, %" reg "0, [%2] \n" \ + " cbnz %w1, 1b \n" \ + : "=&r" (newval), "=&r" (tmp) \ + : "r" (addr), "r" (value) \ + : "cc", "memory"); \ + \ + return newval; \ + } + +OPAL_ASM_MAKE_ATOMIC(int32_t, 32, add, "add", "w") +OPAL_ASM_MAKE_ATOMIC(int32_t, 32, sub, "sub", "w") +OPAL_ASM_MAKE_ATOMIC(int64_t, 64, add, "add", "") +OPAL_ASM_MAKE_ATOMIC(int64_t, 64, sub, "sub", "") + +#endif /* OPAL_GCC_INLINE_ASSEMBLY */ + +#endif /* ! OPAL_SYS_ARCH_ATOMIC_H */ diff --git a/opal/include/opal/sys/arm64/timer.h b/opal/include/opal/sys/arm64/timer.h new file mode 100644 index 00000000000..5da3ffecba8 --- /dev/null +++ b/opal/include/opal/sys/arm64/timer.h @@ -0,0 +1,45 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ +/* + * Copyright (c) 2008 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2016 Broadcom Limited. All rights reserved. + * Copyright (c) 2016 Los Alamos National Security, LLC. All rights + * reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#ifndef OPAL_SYS_ARCH_TIMER_H +#define OPAL_SYS_ARCH_TIMER_H 1 + +#include + +typedef uint64_t opal_timer_t; + +static inline opal_timer_t +opal_sys_timer_get_cycles(void) +{ + opal_timer_t ret; + + __asm__ __volatile__ ("isb" ::: "memory"); + __asm__ __volatile__ ("mrs %0, CNTVCT_EL0" : "=r" (ret)); + + return ret; +} + + +static inline opal_timer_t +opal_sys_timer_freq(void) +{ + opal_timer_t freq; + __asm__ __volatile__ ("mrs %0, CNTFRQ_EL0" : "=r" (freq)); + return (opal_timer_t)(freq); +} + +#define OPAL_HAVE_SYS_TIMER_GET_CYCLES 1 + +#endif /* ! OPAL_SYS_ARCH_TIMER_H */ diff --git a/opal/include/opal/sys/arm64/update.sh b/opal/include/opal/sys/arm64/update.sh new file mode 100644 index 00000000000..94d8ed2714b --- /dev/null +++ b/opal/include/opal/sys/arm64/update.sh @@ -0,0 +1,36 @@ +#!/bin/sh +# +# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana +# University Research and Technology +# Corporation. All rights reserved. +# Copyright (c) 2004-2005 The University of Tennessee and The University +# of Tennessee Research Foundation. All rights +# reserved. +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# University of Stuttgart. All rights reserved. +# Copyright (c) 2004-2005 The Regents of the University of California. +# All rights reserved. +# $COPYRIGHT$ +# +# Additional copyrights may follow +# +# $HEADER$ +# + +CFILE=/tmp/opal_atomic_$$.c + +trap "/bin/rm -f $CFILE; exit 0" 0 1 2 15 + +echo Updating atomic.s from atomic.h using gcc + +cat > $CFILE< +#include +#define static +#define inline +#define OPAL_GCC_INLINE_ASSEMBLY 1 +#include "../architecture.h" +#include "atomic.h" +EOF + +gcc -O1 -I. -S $CFILE -o atomic.s diff --git a/opal/include/opal/sys/atomic.h b/opal/include/opal/sys/atomic.h index cae28a49b10..1622d4f8303 100644 --- a/opal/include/opal/sys/atomic.h +++ b/opal/include/opal/sys/atomic.h @@ -1,3 +1,4 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ /* * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana * University Research and Technology @@ -5,16 +6,20 @@ * Copyright (c) 2004-2006 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2007 Sun Microsystems, Inc. All rights reserved. * Copyright (c) 2011 Sandia National Laboratories. All rights reserved. + * Copyright (c) 2011-2015 Los Alamos National Security, LLC. All rights + * reserved. + * Copyright (c) 2017 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -36,7 +41,7 @@ * - \c OPAL_HAVE_ATOMIC_MEM_BARRIER atomic memory barriers * - \c OPAL_HAVE_ATOMIC_SPINLOCKS atomic spinlocks * - \c OPAL_HAVE_ATOMIC_MATH_32 if 32 bit add/sub/cmpset can be done "atomicly" - * - \c OPAL_HAVE_ATOMIC_MATH_64 if 32 bit add/sub/cmpset can be done "atomicly" + * - \c OPAL_HAVE_ATOMIC_MATH_64 if 64 bit add/sub/cmpset can be done "atomicly" * * Note that for the Atomic math, atomic add/sub may be implemented as * C code using opal_atomic_cmpset. The appearance of atomic @@ -103,8 +108,8 @@ typedef struct opal_atomic_lock_t opal_atomic_lock_t; /********************************************************************** * * Set or unset these macros in the architecture-specific atomic.h - * files if we need to specify them as inline or non-inline - * + * files if we need to specify them as inline or non-inline + * *********************************************************************/ #if !OPAL_GCC_INLINE_ASSEMBLY #define OPAL_HAVE_INLINE_ATOMIC_MEM_BARRIER 0 @@ -128,24 +133,34 @@ typedef struct opal_atomic_lock_t opal_atomic_lock_t; #define OPAL_HAVE_INLINE_ATOMIC_SWAP_64 1 #endif +/** + * Enumeration of lock states + */ +enum { + OPAL_ATOMIC_UNLOCKED = 0, + OPAL_ATOMIC_LOCKED = 1 +}; + /********************************************************************** * * Load the appropriate architecture files and set some reasonable * default values for our support * - *********************************************************************/ + *********************************************************************/ #if defined(DOXYGEN) -/* don't include system-level gorp when generating doxygen files */ +/* don't include system-level gorp when generating doxygen files */ #elif OPAL_ASSEMBLY_BUILTIN == OPAL_BUILTIN_SYNC #include "opal/sys/sync_builtin/atomic.h" +#elif OPAL_ASSEMBLY_BUILTIN == OPAL_BUILTIN_GCC +#include "opal/sys/gcc_builtin/atomic.h" #elif OPAL_ASSEMBLY_BUILTIN == OPAL_BUILTIN_OSX #include "opal/sys/osx/atomic.h" -#elif OPAL_ASSEMBLY_ARCH == OPAL_ALPHA -#include "opal/sys/alpha/atomic.h" -#elif OPAL_ASSEMBLY_ARCH == OPAL_AMD64 -#include "opal/sys/amd64/atomic.h" +#elif OPAL_ASSEMBLY_ARCH == OPAL_X86_64 +#include "opal/sys/x86_64/atomic.h" #elif OPAL_ASSEMBLY_ARCH == OPAL_ARM #include "opal/sys/arm/atomic.h" +#elif OPAL_ASSEMBLY_ARCH == OPAL_ARM64 +#include "opal/sys/arm64/atomic.h" #elif OPAL_ASSEMBLY_ARCH == OPAL_IA32 #include "opal/sys/ia32/atomic.h" #elif OPAL_ASSEMBLY_ARCH == OPAL_IA64 @@ -177,6 +192,12 @@ typedef struct opal_atomic_lock_t opal_atomic_lock_t; #ifndef OPAL_HAVE_ATOMIC_CMPSET_128 #define OPAL_HAVE_ATOMIC_CMPSET_128 0 #endif +#ifndef OPAL_HAVE_ATOMIC_LLSC_32 +#define OPAL_HAVE_ATOMIC_LLSC_32 0 +#endif +#ifndef OPAL_HAVE_ATOMIC_LLSC_64 +#define OPAL_HAVE_ATOMIC_LLSC_64 0 +#endif #endif /* DOXYGEN */ /********************************************************************** @@ -206,7 +227,7 @@ typedef struct opal_atomic_lock_t opal_atomic_lock_t; */ #if OPAL_HAVE_INLINE_ATOMIC_MEM_BARRIER -static inline +static inline #endif void opal_atomic_mb(void); @@ -221,7 +242,7 @@ void opal_atomic_mb(void); */ #if OPAL_HAVE_INLINE_ATOMIC_MEM_BARRIER -static inline +static inline #endif void opal_atomic_rmb(void); @@ -236,7 +257,7 @@ void opal_atomic_rmb(void); */ #if OPAL_HAVE_INLINE_ATOMIC_MEM_BARRIER -static inline +static inline #endif void opal_atomic_wmb(void); @@ -257,15 +278,6 @@ void opal_atomic_wmb(void); #if defined(DOXYGEN) || OPAL_HAVE_ATOMIC_SPINLOCKS || (OPAL_HAVE_ATOMIC_CMPSET_32 || OPAL_HAVE_ATOMIC_CMPSET_64) -/** - * Enumeration of lock states - */ -enum { - OPAL_ATOMIC_UNLOCKED = 0, - OPAL_ATOMIC_LOCKED = 1 -}; - - /** * Initialize a lock to value * @@ -273,7 +285,7 @@ enum { * @param value Initial value to set lock to */ #if OPAL_HAVE_ATOMIC_SPINLOCKS == 0 -static inline +static inline #endif void opal_atomic_init(opal_atomic_lock_t* lock, int32_t value); @@ -332,19 +344,19 @@ void opal_atomic_unlock(opal_atomic_lock_t *lock); #if defined(DOXYGEN) || OPAL_HAVE_ATOMIC_CMPSET_32 #if OPAL_HAVE_INLINE_ATOMIC_CMPSET_32 -static inline +static inline #endif int opal_atomic_cmpset_32(volatile int32_t *addr, int32_t oldval, int32_t newval); #if OPAL_HAVE_INLINE_ATOMIC_CMPSET_32 -static inline +static inline #endif int opal_atomic_cmpset_acq_32(volatile int32_t *addr, int32_t oldval, int32_t newval); #if OPAL_HAVE_INLINE_ATOMIC_CMPSET_32 -static inline +static inline #endif int opal_atomic_cmpset_rel_32(volatile int32_t *addr, int32_t oldval, int32_t newval); @@ -357,19 +369,19 @@ int opal_atomic_cmpset_rel_32(volatile int32_t *addr, int32_t oldval, #if defined(DOXYGEN) || OPAL_HAVE_ATOMIC_CMPSET_64 #if OPAL_HAVE_INLINE_ATOMIC_CMPSET_64 -static inline +static inline #endif int opal_atomic_cmpset_64(volatile int64_t *addr, int64_t oldval, int64_t newval); #if OPAL_HAVE_INLINE_ATOMIC_CMPSET_64 -static inline +static inline #endif int opal_atomic_cmpset_acq_64(volatile int64_t *addr, int64_t oldval, int64_t newval); #if OPAL_HAVE_INLINE_ATOMIC_CMPSET_64 -static inline +static inline #endif int opal_atomic_cmpset_rel_64(volatile int64_t *addr, int64_t oldval, int64_t newval); @@ -483,27 +495,27 @@ opal_atomic_sub_size_t(volatile size_t *addr, int delta) static inline */ static inline int opal_atomic_cmpset_xx(volatile void* addr, int64_t oldval, int64_t newval, size_t length); -static inline int opal_atomic_cmpset_acq_xx(volatile void* addr, - int64_t oldval, int64_t newval, +static inline int opal_atomic_cmpset_acq_xx(volatile void* addr, + int64_t oldval, int64_t newval, size_t length); -static inline int opal_atomic_cmpset_rel_xx(volatile void* addr, - int64_t oldval, int64_t newval, +static inline int opal_atomic_cmpset_rel_xx(volatile void* addr, + int64_t oldval, int64_t newval, size_t length); -static inline int opal_atomic_cmpset_ptr(volatile void* addr, - void* oldval, +static inline int opal_atomic_cmpset_ptr(volatile void* addr, + void* oldval, void* newval); -static inline int opal_atomic_cmpset_acq_ptr(volatile void* addr, - void* oldval, +static inline int opal_atomic_cmpset_acq_ptr(volatile void* addr, + void* oldval, void* newval); -static inline int opal_atomic_cmpset_rel_ptr(volatile void* addr, - void* oldval, +static inline int opal_atomic_cmpset_rel_ptr(volatile void* addr, + void* oldval, void* newval); /** * Atomic compare and set of pointer with relaxed semantics. This * macro detect at compile time the type of the first argument and - * choose the correct function to be called. + * choose the correct function to be called. * * \note This macro should only be used for integer types. * @@ -519,7 +531,7 @@ static inline int opal_atomic_cmpset_rel_ptr(volatile void* addr, /** * Atomic compare and set of pointer with acquire semantics. This - * macro detect at compile time the type of the first argument + * macro detect at compile time the type of the first argument * and choose the correct function to be called. * * \note This macro should only be used for integer types. @@ -537,7 +549,7 @@ static inline int opal_atomic_cmpset_rel_ptr(volatile void* addr, /** * Atomic compare and set of pointer with release semantics. This - * macro detect at compile time the type of the first argument + * macro detect at compile time the type of the first argument * and choose the correct function to b * * \note This macro should only be used for integer types. @@ -556,9 +568,9 @@ static inline int opal_atomic_cmpset_rel_ptr(volatile void* addr, #if defined(DOXYGEN) || (OPAL_HAVE_ATOMIC_MATH_32 || OPAL_HAVE_ATOMIC_MATH_64) -static inline void opal_atomic_add_xx(volatile void* addr, +static inline void opal_atomic_add_xx(volatile void* addr, int32_t value, size_t length); -static inline void opal_atomic_sub_xx(volatile void* addr, +static inline void opal_atomic_sub_xx(volatile void* addr, int32_t value, size_t length); #if SIZEOF_VOID_P == 4 && OPAL_HAVE_ATOMIC_CMPSET_32 static inline int32_t opal_atomic_add_ptr( volatile void* addr, void* delta ); @@ -572,7 +584,7 @@ static inline int64_t opal_atomic_sub_ptr( volatile void* addr, void* delta ); /** * Atomically increment the content depending on the type. This - * macro detect at compile time the type of the first argument + * macro detect at compile time the type of the first argument * and choose the correct function to be called. * * \note This macro should only be used for integer types. @@ -586,7 +598,7 @@ static inline int64_t opal_atomic_sub_ptr( volatile void* addr, void* delta ); /** * Atomically decrement the content depending on the type. This - * macro detect at compile time the type of the first argument + * macro detect at compile time the type of the first argument * and choose the correct function to be called. * * \note This macro should only be used for integer types. diff --git a/opal/include/opal/sys/atomic_impl.h b/opal/include/opal/sys/atomic_impl.h index 496fecdcc40..16b03b485f3 100644 --- a/opal/include/opal/sys/atomic_impl.h +++ b/opal/include/opal/sys/atomic_impl.h @@ -6,25 +6,23 @@ * Copyright (c) 2004-2014 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2010-2014 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2012-2014 Los Alamos National Security, LLC. All rights + * Copyright (c) 2012-2015 Los Alamos National Security, LLC. All rights * reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ /* Inline C implementation of the functions defined in atomic.h */ -#ifdef HAVE_STDLIB_H #include -#endif /********************************************************************** * @@ -61,7 +59,7 @@ static inline int32_t opal_atomic_add_32(volatile int32_t *addr, int delta) { int32_t oldval; - + do { oldval = *addr; } while (0 == opal_atomic_cmpset_32(addr, oldval, oldval + delta)); @@ -76,7 +74,7 @@ static inline int32_t opal_atomic_sub_32(volatile int32_t *addr, int delta) { int32_t oldval; - + do { oldval = *addr; } while (0 == opal_atomic_cmpset_32(addr, oldval, oldval - delta)); @@ -108,7 +106,7 @@ static inline int64_t opal_atomic_add_64(volatile int64_t *addr, int64_t delta) { int64_t oldval; - + do { oldval = *addr; } while (0 == opal_atomic_cmpset_64(addr, oldval, oldval + delta)); @@ -216,15 +214,15 @@ opal_atomic_cmpset_rel_xx(volatile void* addr, int64_t oldval, static inline int -opal_atomic_cmpset_ptr(volatile void* addr, - void* oldval, +opal_atomic_cmpset_ptr(volatile void* addr, + void* oldval, void* newval) { #if SIZEOF_VOID_P == 4 && OPAL_HAVE_ATOMIC_CMPSET_32 - return opal_atomic_cmpset_32((int32_t*) addr, (unsigned long) oldval, + return opal_atomic_cmpset_32((int32_t*) addr, (unsigned long) oldval, (unsigned long) newval); #elif SIZEOF_VOID_P == 8 && OPAL_HAVE_ATOMIC_CMPSET_64 - return opal_atomic_cmpset_64((int64_t*) addr, (unsigned long) oldval, + return opal_atomic_cmpset_64((int64_t*) addr, (unsigned long) oldval, (unsigned long) newval); #else abort(); @@ -233,15 +231,15 @@ opal_atomic_cmpset_ptr(volatile void* addr, static inline int -opal_atomic_cmpset_acq_ptr(volatile void* addr, - void* oldval, +opal_atomic_cmpset_acq_ptr(volatile void* addr, + void* oldval, void* newval) { #if SIZEOF_VOID_P == 4 && OPAL_HAVE_ATOMIC_CMPSET_32 - return opal_atomic_cmpset_acq_32((int32_t*) addr, (unsigned long) oldval, + return opal_atomic_cmpset_acq_32((int32_t*) addr, (unsigned long) oldval, (unsigned long) newval); #elif SIZEOF_VOID_P == 8 && OPAL_HAVE_ATOMIC_CMPSET_64 - return opal_atomic_cmpset_acq_64((int64_t*) addr, (unsigned long) oldval, + return opal_atomic_cmpset_acq_64((int64_t*) addr, (unsigned long) oldval, (unsigned long) newval); #else abort(); @@ -249,15 +247,15 @@ opal_atomic_cmpset_acq_ptr(volatile void* addr, } -static inline int opal_atomic_cmpset_rel_ptr(volatile void* addr, - void* oldval, +static inline int opal_atomic_cmpset_rel_ptr(volatile void* addr, + void* oldval, void* newval) { #if SIZEOF_VOID_P == 4 && OPAL_HAVE_ATOMIC_CMPSET_32 - return opal_atomic_cmpset_rel_32((int32_t*) addr, (unsigned long) oldval, + return opal_atomic_cmpset_rel_32((int32_t*) addr, (unsigned long) oldval, (unsigned long) newval); #elif SIZEOF_VOID_P == 8 && OPAL_HAVE_ATOMIC_CMPSET_64 - return opal_atomic_cmpset_rel_64((int64_t*) addr, (unsigned long) oldval, + return opal_atomic_cmpset_rel_64((int64_t*) addr, (unsigned long) oldval, (unsigned long) newval); #else abort(); @@ -276,6 +274,30 @@ static inline int opal_atomic_cmpset_rel_ptr(volatile void* addr, #endif /* (OPAL_HAVE_ATOMIC_SWAP_32 || OPAL_HAVE_ATOMIC_SWAP_64) */ +#if (OPAL_HAVE_ATOMIC_LLSC_32 || OPAL_HAVE_ATOMIC_LLSC_64) + +#if SIZEOF_VOID_P == 4 && OPAL_HAVE_ATOMIC_LLSC_32 + +#define opal_atomic_ll_ptr(addr) (void *) opal_atomic_ll_32((int32_t *) addr) +#define opal_atomic_sc_ptr(addr, newval) opal_atomic_sc_32((int32_t *) addr, (int32_t) newval) + +#define OPAL_HAVE_ATOMIC_LLSC_PTR 1 + +#elif SIZEOF_VOID_P == 8 && OPAL_HAVE_ATOMIC_LLSC_64 + +#define opal_atomic_ll_ptr(addr) (void *) opal_atomic_ll_64((int64_t *) addr) +#define opal_atomic_sc_ptr(addr, newval) opal_atomic_sc_64((int64_t *) addr, (int64_t) newval) + +#define OPAL_HAVE_ATOMIC_LLSC_PTR 1 + +#endif + +#endif /* (OPAL_HAVE_ATOMIC_LLSC_32 || OPAL_HAVE_ATOMIC_LLSC_64)*/ + +#if !defined(OPAL_HAVE_ATOMIC_LLSC_PTR) +#define OPAL_HAVE_ATOMIC_LLSC_PTR 0 +#endif + #if OPAL_HAVE_ATOMIC_MATH_32 || OPAL_HAVE_ATOMIC_MATH_64 @@ -325,19 +347,19 @@ opal_atomic_sub_xx(volatile void* addr, int32_t value, size_t length) } #if SIZEOF_VOID_P == 4 && OPAL_HAVE_ATOMIC_ADD_32 -static inline int32_t opal_atomic_add_ptr( volatile void* addr, +static inline int32_t opal_atomic_add_ptr( volatile void* addr, void* delta ) { return opal_atomic_add_32((int32_t*) addr, (unsigned long) delta); } #elif SIZEOF_VOID_P == 8 && OPAL_HAVE_ATOMIC_ADD_64 -static inline int64_t opal_atomic_add_ptr( volatile void* addr, +static inline int64_t opal_atomic_add_ptr( volatile void* addr, void* delta ) { return opal_atomic_add_64((int64_t*) addr, (unsigned long) delta); } #else -static inline int32_t opal_atomic_add_ptr( volatile void* addr, +static inline int32_t opal_atomic_add_ptr( volatile void* addr, void* delta ) { abort(); @@ -346,19 +368,19 @@ static inline int32_t opal_atomic_add_ptr( volatile void* addr, #endif #if SIZEOF_VOID_P == 4 && OPAL_HAVE_ATOMIC_SUB_32 -static inline int32_t opal_atomic_sub_ptr( volatile void* addr, +static inline int32_t opal_atomic_sub_ptr( volatile void* addr, void* delta ) { return opal_atomic_sub_32((int32_t*) addr, (unsigned long) delta); } #elif SIZEOF_VOID_P == 8 && OPAL_HAVE_ATOMIC_SUB_32 -static inline int64_t opal_atomic_sub_ptr( volatile void* addr, +static inline int64_t opal_atomic_sub_ptr( volatile void* addr, void* delta ) { return opal_atomic_sub_64((int64_t*) addr, (unsigned long) delta); } #else -static inline int32_t opal_atomic_sub_ptr( volatile void* addr, +static inline int32_t opal_atomic_sub_ptr( volatile void* addr, void* delta ) { abort(); @@ -375,7 +397,7 @@ static inline int32_t opal_atomic_sub_ptr( volatile void* addr, *********************************************************************/ #ifdef OPAL_NEED_INLINE_ATOMIC_SPINLOCKS -/* +/* * Lock initialization function. It set the lock to UNLOCKED. */ static inline void diff --git a/opal/include/opal/sys/cma.h b/opal/include/opal/sys/cma.h index 6592bd95feb..91187af4c5d 100644 --- a/opal/include/opal/sys/cma.h +++ b/opal/include/opal/sys/cma.h @@ -1,20 +1,27 @@ /* - * Copyright (c) 2011-2012 IBM Corporation. All rights reserved. - * + * Copyright (c) 2011-2012 IBM Corporation. All rights reserved. + * Copyright (c) 2016 Los Alamos National Security, LLC. All rights + * reserved. + * Copyright (c) 2017 Research Organization for Information Science + * and Technology (RIST). All rights reserved. + * $COPYRIGHT$ */ /** @file * - * Cross Memory Attach syscall definitions. + * Cross Memory Attach syscall definitions. * - * These are only needed temporarily until these new syscalls + * These are only needed temporarily until these new syscalls * are incorporated into glibc */ #ifndef OPAL_SYS_CMA_H #define OPAL_SYS_CMA_H 1 +#if !defined(OPAL_ASSEMBLY_ARCH) +/* need opal_config.h for the assembly architecture */ #include "opal_config.h" +#endif #include "opal/sys/architecture.h" @@ -30,7 +37,7 @@ /* Cross Memory Attach is so far only supported under linux */ -#if OPAL_ASSEMBLY_ARCH == OPAL_AMD64 +#if OPAL_ASSEMBLY_ARCH == OPAL_X86_64 #define __NR_process_vm_readv 310 #define __NR_process_vm_writev 311 #elif OPAL_ASSEMBLY_ARCH == OPAL_IA32 @@ -45,14 +52,49 @@ #elif OPAL_ASSEMBLY_ARCH == OPAL_POWERPC64 #define __NR_process_vm_readv 351 #define __NR_process_vm_writev 352 +#elif OPAL_ASSEMBLY_ARCH == OPAL_ARM + +#define __NR_process_vm_readv 376 +#define __NR_process_vm_writev 377 + +#elif OPAL_ASSEMBLY_ARCH == OPAL_ARM64 + +/* ARM64 uses the asm-generic syscall numbers */ + +#define __NR_process_vm_readv 270 +#define __NR_process_vm_writev 271 + +#elif OPAL_ASSEMBLY_ARCH == OPAL_MIPS + +#if _MIPS_SIM == _MIPS_SIM_ABI32 + +#define __NR_process_vm_readv 4345 +#define __NR_process_vm_writev 4346 + +#elif _MIPS_SIM == _MIPS_SIM_ABI64 + +#define __NR_process_vm_readv 5304 +#define __NR_process_vm_writev 5305 + +#elif _MIPS_SIM == _MIPS_SIM_NABI32 + +#define __NR_process_vm_readv 6309 +#define __NR_process_vm_writev 6310 + +#else + +#error "Unsupported MIPS architecture for process_vm_readv and process_vm_writev syscalls" + +#endif + #else #error "Unsupported architecture for process_vm_readv and process_vm_writev syscalls" #endif static inline ssize_t -process_vm_readv(pid_t pid, - const struct iovec *lvec, +process_vm_readv(pid_t pid, + const struct iovec *lvec, unsigned long liovcnt, const struct iovec *rvec, unsigned long riovcnt, @@ -62,8 +104,8 @@ process_vm_readv(pid_t pid, } static inline ssize_t -process_vm_writev(pid_t pid, - const struct iovec *lvec, +process_vm_writev(pid_t pid, + const struct iovec *lvec, unsigned long liovcnt, const struct iovec *rvec, unsigned long riovcnt, diff --git a/opal/include/opal/sys/gcc_builtin/Makefile.am b/opal/include/opal/sys/gcc_builtin/Makefile.am new file mode 100644 index 00000000000..a717786a5e9 --- /dev/null +++ b/opal/include/opal/sys/gcc_builtin/Makefile.am @@ -0,0 +1,25 @@ +# +# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana +# University Research and Technology +# Corporation. All rights reserved. +# Copyright (c) 2004-2005 The University of Tennessee and The University +# of Tennessee Research Foundation. All rights +# reserved. +# Copyright (c) 2004-2009 High Performance Computing Center Stuttgart, +# University of Stuttgart. All rights reserved. +# Copyright (c) 2004-2005 The Regents of the University of California. +# All rights reserved. +# Copyright (c) 2011 Sandia National Laboratories. All rights reserved. +# Copyright (c) 2016 Los Alamos National Security, LLC. All rights +# reserved. +# $COPYRIGHT$ +# +# Additional copyrights may follow +# +# $HEADER$ +# + +# This makefile.am does not stand on its own - it is included from opal/include/Makefile.am + +headers += \ + opal/sys/gcc_builtin/atomic.h diff --git a/opal/include/opal/sys/gcc_builtin/atomic.h b/opal/include/opal/sys/gcc_builtin/atomic.h new file mode 100644 index 00000000000..82b75f47d8d --- /dev/null +++ b/opal/include/opal/sys/gcc_builtin/atomic.h @@ -0,0 +1,214 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ +/* + * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2013 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2011 Sandia National Laboratories. All rights reserved. + * Copyright (c) 2014-2016 Los Alamos National Security, LLC. All rights + * reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#ifndef OPAL_SYS_ARCH_ATOMIC_H +#define OPAL_SYS_ARCH_ATOMIC_H 1 + +#include + +/********************************************************************** + * + * Memory Barriers + * + *********************************************************************/ +#define OPAL_HAVE_ATOMIC_MEM_BARRIER 1 + +#define OPAL_HAVE_ATOMIC_MATH_32 1 +#define OPAL_HAVE_ATOMIC_CMPSET_32 1 +#define OPAL_HAVE_ATOMIC_ADD_32 1 +#define OPAL_HAVE_ATOMIC_SUB_32 1 +#define OPAL_HAVE_ATOMIC_SWAP_32 1 +#define OPAL_HAVE_ATOMIC_MATH_64 1 +#define OPAL_HAVE_ATOMIC_CMPSET_64 1 +#define OPAL_HAVE_ATOMIC_ADD_64 1 +#define OPAL_HAVE_ATOMIC_SUB_64 1 +#define OPAL_HAVE_ATOMIC_SWAP_64 1 + + +static inline void opal_atomic_mb(void) +{ + __atomic_thread_fence (__ATOMIC_SEQ_CST); +} + +static inline void opal_atomic_rmb(void) +{ + __atomic_thread_fence (__ATOMIC_ACQUIRE); +} + +static inline void opal_atomic_wmb(void) +{ + __atomic_thread_fence (__ATOMIC_RELEASE); +} + +#define MB() opal_atomic_mb() + +/********************************************************************** + * + * Atomic math operations + * + *********************************************************************/ + +static inline int opal_atomic_cmpset_acq_32( volatile int32_t *addr, + int32_t oldval, int32_t newval) +{ + return __atomic_compare_exchange_n (addr, &oldval, newval, false, + __ATOMIC_ACQUIRE, __ATOMIC_RELAXED); +} + + +static inline int opal_atomic_cmpset_rel_32( volatile int32_t *addr, + int32_t oldval, int32_t newval) +{ + return __atomic_compare_exchange_n (addr, &oldval, newval, false, + __ATOMIC_RELEASE, __ATOMIC_RELAXED); +} + +static inline int opal_atomic_cmpset_32( volatile int32_t *addr, + int32_t oldval, int32_t newval) +{ + return __atomic_compare_exchange_n (addr, &oldval, newval, false, + __ATOMIC_ACQUIRE, __ATOMIC_RELAXED); +} + +static inline int32_t opal_atomic_swap_32 (volatile int32_t *addr, int32_t newval) +{ + int32_t oldval; + __atomic_exchange (addr, &newval, &oldval, __ATOMIC_RELAXED); + return oldval; +} + +static inline int32_t opal_atomic_add_32(volatile int32_t *addr, int32_t delta) +{ + return __atomic_add_fetch (addr, delta, __ATOMIC_RELAXED); +} + +static inline int32_t opal_atomic_sub_32(volatile int32_t *addr, int32_t delta) +{ + return __atomic_sub_fetch (addr, delta, __ATOMIC_RELAXED); +} + +static inline int opal_atomic_cmpset_acq_64( volatile int64_t *addr, + int64_t oldval, int64_t newval) +{ + return __atomic_compare_exchange_n (addr, &oldval, newval, false, + __ATOMIC_ACQUIRE, __ATOMIC_RELAXED); +} + +static inline int opal_atomic_cmpset_rel_64( volatile int64_t *addr, + int64_t oldval, int64_t newval) +{ + return __atomic_compare_exchange_n (addr, &oldval, newval, false, + __ATOMIC_RELEASE, __ATOMIC_RELAXED); +} + + +static inline int opal_atomic_cmpset_64( volatile int64_t *addr, + int64_t oldval, int64_t newval) +{ + return __atomic_compare_exchange_n (addr, &oldval, newval, false, + __ATOMIC_ACQUIRE, __ATOMIC_RELAXED); +} + +static inline int64_t opal_atomic_swap_64 (volatile int64_t *addr, int64_t newval) +{ + int64_t oldval; + __atomic_exchange (addr, &newval, &oldval, __ATOMIC_RELAXED); + return oldval; +} + +static inline int64_t opal_atomic_add_64(volatile int64_t *addr, int64_t delta) +{ + return __atomic_add_fetch (addr, delta, __ATOMIC_RELAXED); +} + +static inline int64_t opal_atomic_sub_64(volatile int64_t *addr, int64_t delta) +{ + return __atomic_sub_fetch (addr, delta, __ATOMIC_RELAXED); +} + +#if OPAL_HAVE_GCC_BUILTIN_CSWAP_INT128 + +#define OPAL_HAVE_ATOMIC_CMPSET_128 1 + +static inline int opal_atomic_cmpset_128 (volatile opal_int128_t *addr, + opal_int128_t oldval, opal_int128_t newval) +{ + return __atomic_compare_exchange_n (addr, &oldval, newval, false, + __ATOMIC_ACQUIRE, __ATOMIC_RELAXED); +} + +#elif defined(OPAL_HAVE_SYNC_BUILTIN_CSWAP_INT128) && OPAL_HAVE_SYNC_BUILTIN_CSWAP_INT128 + +#define OPAL_HAVE_ATOMIC_CMPSET_128 1 + +/* __atomic version is not lock-free so use legacy __sync version */ + +static inline int opal_atomic_cmpset_128 (volatile opal_int128_t *addr, + opal_int128_t oldval, opal_int128_t newval) +{ + return __sync_bool_compare_and_swap (addr, oldval, newval); +} + +#endif + +#if defined(__HLE__) + +#include + +#define OPAL_HAVE_ATOMIC_SPINLOCKS 1 + +static inline void opal_atomic_init (opal_atomic_lock_t* lock, int32_t value) +{ + lock->u.lock = value; +} + +static inline int opal_atomic_trylock(opal_atomic_lock_t *lock) +{ + int ret = __atomic_exchange_n (&lock->u.lock, OPAL_ATOMIC_LOCKED, + __ATOMIC_ACQUIRE | __ATOMIC_HLE_ACQUIRE); + if (OPAL_ATOMIC_LOCKED == ret) { + /* abort the transaction */ + _mm_pause (); + return 1; + } + + return 0; +} + +static inline void opal_atomic_lock (opal_atomic_lock_t *lock) +{ + while (OPAL_ATOMIC_LOCKED == __atomic_exchange_n (&lock->u.lock, OPAL_ATOMIC_LOCKED, + __ATOMIC_ACQUIRE | __ATOMIC_HLE_ACQUIRE)) { + /* abort the transaction */ + _mm_pause (); + } +} + +static inline void opal_atomic_unlock (opal_atomic_lock_t *lock) +{ + __atomic_store_n (&lock->u.lock, OPAL_ATOMIC_UNLOCKED, + __ATOMIC_RELEASE | __ATOMIC_HLE_RELEASE); +} + +#endif + +#endif /* ! OPAL_SYS_ARCH_ATOMIC_H */ diff --git a/opal/include/opal/sys/ia32/Makefile.am b/opal/include/opal/sys/ia32/Makefile.am index 92820a6d47f..b3a0efcd554 100644 --- a/opal/include/opal/sys/ia32/Makefile.am +++ b/opal/include/opal/sys/ia32/Makefile.am @@ -5,14 +5,14 @@ # Copyright (c) 2004-2005 The University of Tennessee and The University # of Tennessee Research Foundation. All rights # reserved. -# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, # University of Stuttgart. All rights reserved. # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. # $COPYRIGHT$ -# +# # Additional copyrights may follow -# +# # $HEADER$ # diff --git a/opal/include/opal/sys/ia32/atomic.h b/opal/include/opal/sys/ia32/atomic.h index d5528a5baf2..a923e67d122 100644 --- a/opal/include/opal/sys/ia32/atomic.h +++ b/opal/include/opal/sys/ia32/atomic.h @@ -6,7 +6,7 @@ * Copyright (c) 2004-2010 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. @@ -16,9 +16,9 @@ * Copyright (c) 2015 Los Alamos National Security, LLC. All rights * reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -96,7 +96,7 @@ static inline int opal_atomic_cmpset_32(volatile int32_t *addr, : "=qm" (ret), "+a" (oldval), "+m" (*addr) : "q"(newval) : "memory", "cc"); - + return (int)ret; } @@ -119,16 +119,16 @@ static inline int opal_atomic_cmpset_32(volatile int32_t *addr, #endif /* On Linux the EBX register is used by the shared libraries - * to keep the global offset. In same time this register is + * to keep the global offset. In same time this register is * required by the cmpxchg8b instruction (as an input parameter). - * This conflict force us to save the EBX before the cmpxchg8b + * This conflict force us to save the EBX before the cmpxchg8b * and to restore it afterward. */ static inline int opal_atomic_cmpset_64(volatile int64_t *addr, int64_t oldval, int64_t newval) { - /* + /* * Compare EDX:EAX with m64. If equal, set ZF and load ECX:EBX into * m64. Else, clear ZF and load m64 into EDX:EAX. */ diff --git a/opal/include/opal/sys/ia32/timer.h b/opal/include/opal/sys/ia32/timer.h index 2a74dbba190..bb4c94d71ff 100644 --- a/opal/include/opal/sys/ia32/timer.h +++ b/opal/include/opal/sys/ia32/timer.h @@ -5,14 +5,14 @@ * Copyright (c) 2004-2014 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -35,9 +35,9 @@ opal_sys_timer_get_cycles(void) int tmp; __asm__ __volatile__( - "xchg{l} {%%}ebx, %1\n" + "xchgl %%ebx, %1\n" "cpuid\n" - "xchg{l} {%%}ebx, %1\n" + "xchgl %%ebx, %1\n" "rdtsc\n" : "=A"(ret), "=r"(tmp) :: "ecx"); diff --git a/opal/include/opal/sys/ia32/update.sh b/opal/include/opal/sys/ia32/update.sh index ee9754f61e7..8d7107f362b 100644 --- a/opal/include/opal/sys/ia32/update.sh +++ b/opal/include/opal/sys/ia32/update.sh @@ -6,14 +6,14 @@ # Copyright (c) 2004-2005 The University of Tennessee and The University # of Tennessee Research Foundation. All rights # reserved. -# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, # University of Stuttgart. All rights reserved. # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. # $COPYRIGHT$ -# +# # Additional copyrights may follow -# +# # $HEADER$ # diff --git a/opal/include/opal/sys/ia64/Makefile.am b/opal/include/opal/sys/ia64/Makefile.am index eb23679428b..b189dc22d44 100644 --- a/opal/include/opal/sys/ia64/Makefile.am +++ b/opal/include/opal/sys/ia64/Makefile.am @@ -5,14 +5,14 @@ # Copyright (c) 2004-2005 The University of Tennessee and The University # of Tennessee Research Foundation. All rights # reserved. -# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, # University of Stuttgart. All rights reserved. # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. # $COPYRIGHT$ -# +# # Additional copyrights may follow -# +# # $HEADER$ # diff --git a/opal/include/opal/sys/ia64/atomic.h b/opal/include/opal/sys/ia64/atomic.h index e0068f66d52..eee362543b0 100644 --- a/opal/include/opal/sys/ia64/atomic.h +++ b/opal/include/opal/sys/ia64/atomic.h @@ -5,14 +5,14 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ diff --git a/opal/include/opal/sys/ia64/timer.h b/opal/include/opal/sys/ia64/timer.h index ec37131e8a2..36356730aec 100644 --- a/opal/include/opal/sys/ia64/timer.h +++ b/opal/include/opal/sys/ia64/timer.h @@ -5,14 +5,14 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ diff --git a/opal/include/opal/sys/ia64/update.sh b/opal/include/opal/sys/ia64/update.sh index 793b2f669e0..0f2f4af1eea 100644 --- a/opal/include/opal/sys/ia64/update.sh +++ b/opal/include/opal/sys/ia64/update.sh @@ -6,15 +6,15 @@ # Copyright (c) 2004-2005 The University of Tennessee and The University # of Tennessee Research Foundation. All rights # reserved. -# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, # University of Stuttgart. All rights reserved. # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. # Copyright (c) 2007 Sun Microsystems, Inc. All rights reserved. # $COPYRIGHT$ -# +# # Additional copyrights may follow -# +# # $HEADER$ # diff --git a/opal/include/opal/sys/mips/Makefile.am b/opal/include/opal/sys/mips/Makefile.am index 0ec5c443d34..cf7f925b209 100644 --- a/opal/include/opal/sys/mips/Makefile.am +++ b/opal/include/opal/sys/mips/Makefile.am @@ -5,14 +5,14 @@ # Copyright (c) 2004-2008 The University of Tennessee and The University # of Tennessee Research Foundation. All rights # reserved. -# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, # University of Stuttgart. All rights reserved. # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. # $COPYRIGHT$ -# +# # Additional copyrights may follow -# +# # $HEADER$ # diff --git a/opal/include/opal/sys/mips/atomic.h b/opal/include/opal/sys/mips/atomic.h index c31ce153aff..a4714f413a7 100644 --- a/opal/include/opal/sys/mips/atomic.h +++ b/opal/include/opal/sys/mips/atomic.h @@ -5,14 +5,14 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ diff --git a/opal/include/opal/sys/mips/timer.h b/opal/include/opal/sys/mips/timer.h index 646f974a045..b93689c908d 100644 --- a/opal/include/opal/sys/mips/timer.h +++ b/opal/include/opal/sys/mips/timer.h @@ -3,9 +3,9 @@ * of Tennessee Research Foundation. All rights * reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ diff --git a/opal/include/opal/sys/mips/update.sh b/opal/include/opal/sys/mips/update.sh index cf42acc814d..94d8ed2714b 100644 --- a/opal/include/opal/sys/mips/update.sh +++ b/opal/include/opal/sys/mips/update.sh @@ -6,14 +6,14 @@ # Copyright (c) 2004-2005 The University of Tennessee and The University # of Tennessee Research Foundation. All rights # reserved. -# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, # University of Stuttgart. All rights reserved. # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. # $COPYRIGHT$ -# +# # Additional copyrights may follow -# +# # $HEADER$ # diff --git a/opal/include/opal/sys/osx/Makefile.am b/opal/include/opal/sys/osx/Makefile.am index 2427b77d5f4..012ada40296 100644 --- a/opal/include/opal/sys/osx/Makefile.am +++ b/opal/include/opal/sys/osx/Makefile.am @@ -5,16 +5,16 @@ # Copyright (c) 2004-2005 The University of Tennessee and The University # of Tennessee Research Foundation. All rights # reserved. -# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, # University of Stuttgart. All rights reserved. # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. # Copyright (c) 2013 Los Alamos National Security, LLC. All rights # reserved. # $COPYRIGHT$ -# +# # Additional copyrights may follow -# +# # $HEADER$ # diff --git a/opal/include/opal/sys/osx/atomic.h b/opal/include/opal/sys/osx/atomic.h index 029d98ca007..748fffdcef3 100644 --- a/opal/include/opal/sys/osx/atomic.h +++ b/opal/include/opal/sys/osx/atomic.h @@ -6,7 +6,7 @@ * Copyright (c) 2004-2010 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. @@ -14,9 +14,9 @@ * Copyright (c) 2013 Los Alamos National Security, LLC. All rights * reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ diff --git a/opal/include/opal/sys/powerpc/Makefile.am b/opal/include/opal/sys/powerpc/Makefile.am index 210028f737d..612dd2e4d7f 100644 --- a/opal/include/opal/sys/powerpc/Makefile.am +++ b/opal/include/opal/sys/powerpc/Makefile.am @@ -5,14 +5,14 @@ # Copyright (c) 2004-2005 The University of Tennessee and The University # of Tennessee Research Foundation. All rights # reserved. -# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, # University of Stuttgart. All rights reserved. # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. # $COPYRIGHT$ -# +# # Additional copyrights may follow -# +# # $HEADER$ # diff --git a/opal/include/opal/sys/powerpc/atomic.h b/opal/include/opal/sys/powerpc/atomic.h index 80b8b07626c..639101d2ec0 100644 --- a/opal/include/opal/sys/powerpc/atomic.h +++ b/opal/include/opal/sys/powerpc/atomic.h @@ -1,3 +1,4 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ /* * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana * University Research and Technology @@ -5,15 +6,17 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2010 IBM Corporation. All rights reserved. + * Copyright (c) 2015-2016 Los Alamos National Security, LLC. All rights + * reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -39,6 +42,8 @@ #define OPAL_HAVE_ATOMIC_MEM_BARRIER 1 #define OPAL_HAVE_ATOMIC_CMPSET_32 1 +#define OPAL_HAVE_ATOMIC_SWAP_32 1 +#define OPAL_HAVE_ATOMIC_LLSC_32 1 #define OPAL_HAVE_ATOMIC_MATH_32 1 #define OPAL_HAVE_ATOMIC_ADD_32 1 @@ -47,6 +52,11 @@ #if (OPAL_ASSEMBLY_ARCH == OPAL_POWERPC64) || OPAL_ASM_SUPPORT_64BIT #define OPAL_HAVE_ATOMIC_CMPSET_64 1 +#define OPAL_HAVE_ATOMIC_SWAP_64 1 +#define OPAL_HAVE_ATOMIC_LLSC_64 1 +#define OPAL_HAVE_ATOMIC_MATH_64 1 +#define OPAL_HAVE_ATOMIC_ADD_64 1 +#define OPAL_HAVE_ATOMIC_SUB_64 1 #endif @@ -114,6 +124,16 @@ void opal_atomic_wmb(void) #define OPAL_ASM_ADDR(a) (a) #endif +#if defined(__PGI) +/* work-around for bug in PGI 16.5-16.7 where the compiler fails to + * correctly emit load instructions for 64-bit operands. without this + * it will emit lwz instead of ld to load the 64-bit operand. */ +#define OPAL_ASM_VALUE64(x) (void *)(intptr_t) (x) +#else +#define OPAL_ASM_VALUE64(x) x +#endif + + static inline int opal_atomic_cmpset_32(volatile int32_t *addr, int32_t oldval, int32_t newval) { @@ -133,6 +153,32 @@ static inline int opal_atomic_cmpset_32(volatile int32_t *addr, return (ret == oldval); } +static inline int32_t opal_atomic_ll_32 (volatile int32_t *addr) +{ + int32_t ret; + + __asm__ __volatile__ ("lwarx %0, 0, %1 \n\t" + : "=&r" (ret) + : "r" (addr) + ); + return ret; +} + +static inline int opal_atomic_sc_32 (volatile int32_t *addr, int32_t newval) +{ + int32_t ret, foo; + + __asm__ __volatile__ (" stwcx. %4, 0, %3 \n\t" + " li %0,0 \n\t" + " bne- 1f \n\t" + " ori %0,%0,1 \n\t" + "1:" + : "=r" (ret), "=m" (*addr), "=r" (foo) + : "r" (addr), "r" (newval) + : "cc", "memory"); + return ret; +} + /* these two functions aren't inlined in the non-gcc case because then there would be two function calls (since neither cmpset_32 nor atomic_?mb can be inlined). Instead, we "inline" them by hand in @@ -157,12 +203,59 @@ static inline int opal_atomic_cmpset_rel_32(volatile int32_t *addr, return opal_atomic_cmpset_32(addr, oldval, newval); } +static inline int32_t opal_atomic_swap_32(volatile int32_t *addr, int32_t newval) +{ + int32_t ret; + + __asm__ __volatile__ ("1: lwarx %0, 0, %2 \n\t" + " stwcx. %3, 0, %2 \n\t" + " bne- 1b \n\t" + : "=&r" (ret), "=m" (*addr) + : "r" (addr), "r" (newval) + : "cc", "memory"); + + return ret; +} + #endif /* OPAL_GCC_INLINE_ASSEMBLY */ #if (OPAL_ASSEMBLY_ARCH == OPAL_POWERPC64) #if OPAL_GCC_INLINE_ASSEMBLY + +static inline int64_t opal_atomic_add_64 (volatile int64_t* v, int64_t inc) +{ + int64_t t; + + __asm__ __volatile__("1: ldarx %0, 0, %3 \n\t" + " add %0, %2, %0 \n\t" + " stdcx. %0, 0, %3 \n\t" + " bne- 1b \n\t" + : "=&r" (t), "=m" (*v) + : "r" (OPAL_ASM_VALUE64(inc)), "r" OPAL_ASM_ADDR(v), "m" (*v) + : "cc"); + + return t; +} + + +static inline int64_t opal_atomic_sub_64 (volatile int64_t* v, int64_t dec) +{ + int64_t t; + + __asm__ __volatile__( + "1: ldarx %0,0,%3 \n\t" + " subf %0,%2,%0 \n\t" + " stdcx. %0,0,%3 \n\t" + " bne- 1b \n\t" + : "=&r" (t), "=m" (*v) + : "r" (OPAL_ASM_VALUE64(dec)), "r" OPAL_ASM_ADDR(v), "m" (*v) + : "cc"); + + return t; +} + static inline int opal_atomic_cmpset_64(volatile int64_t *addr, int64_t oldval, int64_t newval) { @@ -176,12 +269,38 @@ static inline int opal_atomic_cmpset_64(volatile int64_t *addr, " bne- 1b \n\t" "2:" : "=&r" (ret), "=m" (*addr) - : "r" (addr), "r" (oldval), "r" (newval), "m" (*addr) + : "r" (addr), "r" (OPAL_ASM_VALUE64(oldval)), "r" (OPAL_ASM_VALUE64(newval)), "m" (*addr) : "cc", "memory"); - + return (ret == oldval); } +static inline int64_t opal_atomic_ll_64(volatile int64_t *addr) +{ + int64_t ret; + + __asm__ __volatile__ ("ldarx %0, 0, %1 \n\t" + : "=&r" (ret) + : "r" (addr) + ); + return ret; +} + +static inline int opal_atomic_sc_64(volatile int64_t *addr, int64_t newval) +{ + int32_t ret; + + __asm__ __volatile__ (" stdcx. %2, 0, %1 \n\t" + " li %0,0 \n\t" + " bne- 1f \n\t" + " ori %0,%0,1 \n\t" + "1:" + : "=r" (ret) + : "r" (addr), "r" (OPAL_ASM_VALUE64(newval)) + : "cc", "memory"); + return ret; +} + /* these two functions aren't inlined in the non-gcc case because then there would be two function calls (since neither cmpset_64 nor atomic_?mb can be inlined). Instead, we "inline" them by hand in @@ -206,6 +325,20 @@ static inline int opal_atomic_cmpset_rel_64(volatile int64_t *addr, return opal_atomic_cmpset_64(addr, oldval, newval); } +static inline int64_t opal_atomic_swap_64(volatile int64_t *addr, int64_t newval) +{ + int64_t ret; + + __asm__ __volatile__ ("1: ldarx %0, 0, %2 \n\t" + " stdcx. %3, 0, %2 \n\t" + " bne- 1b \n\t" + : "=&r" (ret), "=m" (*addr) + : "r" (addr), "r" (OPAL_ASM_VALUE64(newval)) + : "cc", "memory"); + + return ret; +} + #endif /* OPAL_GCC_INLINE_ASSEMBLY */ #elif (OPAL_ASSEMBLY_ARCH == OPAL_POWERPC32) && OPAL_ASM_SUPPORT_64BIT @@ -246,10 +379,10 @@ static inline int opal_atomic_cmpset_64(volatile int64_t *addr, "subfic r9,r5,0 \n\t" "adde %0,r9,r5 \n\t" : "=&r" (ret) - : "r"OPAL_ASM_ADDR(addr), + : "r"OPAL_ASM_ADDR(addr), "m"(oldval), "m"(newval) : "r4", "r5", "r9", "cc", "memory"); - + return ret; } diff --git a/opal/include/opal/sys/powerpc/timer.h b/opal/include/opal/sys/powerpc/timer.h index 7155edbcf4e..543352d7af0 100644 --- a/opal/include/opal/sys/powerpc/timer.h +++ b/opal/include/opal/sys/powerpc/timer.h @@ -5,14 +5,14 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ diff --git a/opal/include/opal/sys/powerpc/update.sh b/opal/include/opal/sys/powerpc/update.sh index 8c2ae0c8698..095868d4fb5 100644 --- a/opal/include/opal/sys/powerpc/update.sh +++ b/opal/include/opal/sys/powerpc/update.sh @@ -6,14 +6,14 @@ # Copyright (c) 2004-2005 The University of Tennessee and The University # of Tennessee Research Foundation. All rights # reserved. -# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, # University of Stuttgart. All rights reserved. # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. # $COPYRIGHT$ -# +# # Additional copyrights may follow -# +# # $HEADER$ # diff --git a/opal/include/opal/sys/sparcv9/Makefile.am b/opal/include/opal/sys/sparcv9/Makefile.am index 6703f1d7490..a16ecf17ea9 100644 --- a/opal/include/opal/sys/sparcv9/Makefile.am +++ b/opal/include/opal/sys/sparcv9/Makefile.am @@ -5,14 +5,14 @@ # Copyright (c) 2004-2005 The University of Tennessee and The University # of Tennessee Research Foundation. All rights # reserved. -# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, # University of Stuttgart. All rights reserved. # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. # $COPYRIGHT$ -# +# # Additional copyrights may follow -# +# # $HEADER$ # diff --git a/opal/include/opal/sys/sparcv9/atomic.h b/opal/include/opal/sys/sparcv9/atomic.h index 4ee217b5909..57fb722b1d9 100644 --- a/opal/include/opal/sys/sparcv9/atomic.h +++ b/opal/include/opal/sys/sparcv9/atomic.h @@ -5,15 +5,15 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2007 Sun Microsystems, Inc. All rights reserverd. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -169,10 +169,10 @@ static inline int opal_atomic_cmpset_acq_64( volatile int64_t *addr, int64_t oldval, int64_t newval) { int rc; - + rc = opal_atomic_cmpset_64(addr, oldval, newval); opal_atomic_rmb(); - + return rc; } diff --git a/opal/include/opal/sys/sparcv9/timer.h b/opal/include/opal/sys/sparcv9/timer.h index e9ce7faea8b..5e79c6d1c9a 100644 --- a/opal/include/opal/sys/sparcv9/timer.h +++ b/opal/include/opal/sys/sparcv9/timer.h @@ -5,14 +5,14 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -46,7 +46,7 @@ opal_sys_timer_get_cycles(void) __asm__ __volatile__("rd %%tick, %0 \n" "srlx %0, 32, %1 " : - "=r"(a), "=r"(b) + "=r"(a), "=r"(b) ); ret = (0x00000000FFFFFFFF & a) | (((opal_timer_t) b) << 32); diff --git a/opal/include/opal/sys/sparcv9/update.sh b/opal/include/opal/sys/sparcv9/update.sh index 2f3da1f3303..69c7931863d 100644 --- a/opal/include/opal/sys/sparcv9/update.sh +++ b/opal/include/opal/sys/sparcv9/update.sh @@ -6,14 +6,14 @@ # Copyright (c) 2004-2005 The University of Tennessee and The University # of Tennessee Research Foundation. All rights # reserved. -# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, # University of Stuttgart. All rights reserved. # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. # $COPYRIGHT$ -# +# # Additional copyrights may follow -# +# # $HEADER$ # diff --git a/opal/include/opal/sys/sync_builtin/Makefile.am b/opal/include/opal/sys/sync_builtin/Makefile.am index fbb8a608f54..1c2686811dd 100644 --- a/opal/include/opal/sys/sync_builtin/Makefile.am +++ b/opal/include/opal/sys/sync_builtin/Makefile.am @@ -5,15 +5,15 @@ # Copyright (c) 2004-2005 The University of Tennessee and The University # of Tennessee Research Foundation. All rights # reserved. -# Copyright (c) 2004-2009 High Performance Computing Center Stuttgart, +# Copyright (c) 2004-2009 High Performance Computing Center Stuttgart, # University of Stuttgart. All rights reserved. # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. # Copyright (c) 2011 Sandia National Laboratories. All rights reserved. # $COPYRIGHT$ -# +# # Additional copyrights may follow -# +# # $HEADER$ # diff --git a/opal/include/opal/sys/sync_builtin/atomic.h b/opal/include/opal/sys/sync_builtin/atomic.h index 43bccef637d..0f18039ff66 100644 --- a/opal/include/opal/sys/sync_builtin/atomic.h +++ b/opal/include/opal/sys/sync_builtin/atomic.h @@ -6,17 +6,17 @@ * Copyright (c) 2004-2013 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2011 Sandia National Laboratories. All rights reserved. - * Copyright (c) 2014 Los Alamos National Security, LLC. All rights + * Copyright (c) 2014-2016 Los Alamos National Security, LLC. All rights * reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -31,7 +31,7 @@ #define OPAL_HAVE_ATOMIC_MEM_BARRIER 1 static inline void opal_atomic_mb(void) -{ +{ __sync_synchronize(); } @@ -41,7 +41,7 @@ static inline void opal_atomic_rmb(void) } static inline void opal_atomic_wmb(void) -{ +{ __sync_synchronize(); } @@ -86,6 +86,8 @@ static inline int32_t opal_atomic_sub_32(volatile int32_t *addr, int32_t delta) return __sync_sub_and_fetch(addr, delta); } +#if OPAL_ASM_SYNC_HAVE_64BIT + #define OPAL_HAVE_ATOMIC_CMPSET_64 1 static inline int opal_atomic_cmpset_acq_64( volatile int64_t *addr, int64_t oldval, int64_t newval) @@ -105,17 +107,6 @@ static inline int opal_atomic_cmpset_64( volatile int64_t *addr, return __sync_bool_compare_and_swap(addr, oldval, newval); } -#if OPAL_HAVE_SYNC_BUILTIN_CSWAP_INT128 -static inline int opal_atomic_cmpset_128 (volatile opal_int128_t *addr, - opal_int128_t oldval, opal_int128_t newval) -{ - return __sync_bool_compare_and_swap(addr, oldval, newval); -} - -#define OPAL_HAVE_ATOMIC_CMPSET_128 1 - -#endif - #define OPAL_HAVE_ATOMIC_MATH_64 1 #define OPAL_HAVE_ATOMIC_ADD_64 1 static inline int64_t opal_atomic_add_64(volatile int64_t *addr, int64_t delta) @@ -129,4 +120,17 @@ static inline int64_t opal_atomic_sub_64(volatile int64_t *addr, int64_t delta) return __sync_sub_and_fetch(addr, delta); } +#endif + +#if OPAL_HAVE_SYNC_BUILTIN_CSWAP_INT128 +static inline int opal_atomic_cmpset_128 (volatile opal_int128_t *addr, + opal_int128_t oldval, opal_int128_t newval) +{ + return __sync_bool_compare_and_swap(addr, oldval, newval); +} + +#define OPAL_HAVE_ATOMIC_CMPSET_128 1 + +#endif + #endif /* ! OPAL_SYS_ARCH_ATOMIC_H */ diff --git a/opal/include/opal/sys/timer.h b/opal/include/opal/sys/timer.h index 4e4fdbb6bbd..014903dbe01 100644 --- a/opal/include/opal/sys/timer.h +++ b/opal/include/opal/sys/timer.h @@ -1,3 +1,4 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ /* * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana * University Research and Technology @@ -5,14 +6,17 @@ * Copyright (c) 2004-2014 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. + * Copyright (c) 2016 Broadcom Limited. All rights reserved. + * Copyright (c) 2016 Los Alamos National Security, LLC. All rights + * reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -79,11 +83,13 @@ BEGIN_C_DECLS opal/mca/timer/linux/configure.m4. Or not. */ #if defined(DOXYGEN) -/* don't include system-level gorp when generating doxygen files */ -#elif OPAL_ASSEMBLY_ARCH == OPAL_AMD64 -#include "opal/sys/amd64/timer.h" +/* don't include system-level gorp when generating doxygen files */ +#elif OPAL_ASSEMBLY_ARCH == OPAL_X86_64 +#include "opal/sys/x86_64/timer.h" #elif OPAL_ASSEMBLY_ARCH == OPAL_ARM #include "opal/sys/arm/timer.h" +#elif OPAL_ASSEMBLY_ARCH == OPAL_ARM64 +#include "opal/sys/arm64/timer.h" #elif OPAL_ASSEMBLY_ARCH == OPAL_IA32 #include "opal/sys/ia32/timer.h" #elif OPAL_ASSEMBLY_ARCH == OPAL_IA64 @@ -108,6 +114,17 @@ typedef long opal_timer_t; #endif #endif +#ifndef OPAL_HAVE_SYS_TIMER_IS_MONOTONIC + +#define OPAL_HAVE_SYS_TIMER_IS_MONOTONIC 1 + +static inline bool opal_sys_timer_is_monotonic (void) +{ + return OPAL_TIMER_MONOTONIC; +} + +#endif + END_C_DECLS #endif /* OPAL_SYS_TIMER_H */ diff --git a/opal/include/opal/sys/x86_64/Makefile.am b/opal/include/opal/sys/x86_64/Makefile.am new file mode 100644 index 00000000000..dfdd2392391 --- /dev/null +++ b/opal/include/opal/sys/x86_64/Makefile.am @@ -0,0 +1,25 @@ +# +# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana +# University Research and Technology +# Corporation. All rights reserved. +# Copyright (c) 2004-2005 The University of Tennessee and The University +# of Tennessee Research Foundation. All rights +# reserved. +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# University of Stuttgart. All rights reserved. +# Copyright (c) 2004-2005 The Regents of the University of California. +# All rights reserved. +# Copyright (c) 2017 Research Organization for Information Science +# and Technology (RIST). All rights reserved. +# $COPYRIGHT$ +# +# Additional copyrights may follow +# +# $HEADER$ +# + +# This makefile.am does not stand on its own - it is included from opal/include/Makefile.am + +headers += \ + opal/sys/x86_64/atomic.h \ + opal/sys/x86_64/timer.h diff --git a/opal/include/opal/sys/x86_64/atomic.h b/opal/include/opal/sys/x86_64/atomic.h new file mode 100644 index 00000000000..41b47839d3b --- /dev/null +++ b/opal/include/opal/sys/x86_64/atomic.h @@ -0,0 +1,276 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ +/* + * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2010 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2007 Sun Microsystems, Inc. All rights reserverd. + * Copyright (c) 2012-2014 Los Alamos National Security, LLC. All rights + * reserved. + * Copyright (c) 2017 Research Organization for Information Science + * and Technology (RIST). All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ +#ifndef OPAL_SYS_ARCH_ATOMIC_H +#define OPAL_SYS_ARCH_ATOMIC_H 1 + +/* + * On x86_64, we use cmpxchg. + */ + + +#define SMPLOCK "lock; " +#define MB() __asm__ __volatile__("": : :"memory") + + +/********************************************************************** + * + * Define constants for AMD64 / x86_64 / EM64T / ... + * + *********************************************************************/ +#define OPAL_HAVE_ATOMIC_MEM_BARRIER 1 + +#define OPAL_HAVE_ATOMIC_CMPSET_32 1 + +#define OPAL_HAVE_ATOMIC_CMPSET_64 1 + +/********************************************************************** + * + * Memory Barriers + * + *********************************************************************/ +#if OPAL_GCC_INLINE_ASSEMBLY + +static inline void opal_atomic_mb(void) +{ + MB(); +} + + +static inline void opal_atomic_rmb(void) +{ + MB(); +} + + +static inline void opal_atomic_wmb(void) +{ + MB(); +} + +#endif /* OPAL_GCC_INLINE_ASSEMBLY */ + + +/********************************************************************** + * + * Atomic math operations + * + *********************************************************************/ +#if OPAL_GCC_INLINE_ASSEMBLY + +static inline int opal_atomic_cmpset_32( volatile int32_t *addr, + int32_t oldval, int32_t newval) +{ + unsigned char ret; + __asm__ __volatile__ ( + SMPLOCK "cmpxchgl %3,%2 \n\t" + "sete %0 \n\t" + : "=qm" (ret), "+a" (oldval), "+m" (*addr) + : "q"(newval) + : "memory", "cc"); + + return (int)ret; +} + +#endif /* OPAL_GCC_INLINE_ASSEMBLY */ + +#define opal_atomic_cmpset_acq_32 opal_atomic_cmpset_32 +#define opal_atomic_cmpset_rel_32 opal_atomic_cmpset_32 + +#if OPAL_GCC_INLINE_ASSEMBLY + +static inline int opal_atomic_cmpset_64( volatile int64_t *addr, + int64_t oldval, int64_t newval) +{ + unsigned char ret; + __asm__ __volatile__ ( + SMPLOCK "cmpxchgq %3,%2 \n\t" + "sete %0 \n\t" + : "=qm" (ret), "+a" (oldval), "+m" (*((volatile long*)addr)) + : "q"(newval) + : "memory", "cc" + ); + + return (int)ret; +} + +#endif /* OPAL_GCC_INLINE_ASSEMBLY */ + +#define opal_atomic_cmpset_acq_64 opal_atomic_cmpset_64 +#define opal_atomic_cmpset_rel_64 opal_atomic_cmpset_64 + +#if OPAL_GCC_INLINE_ASSEMBLY && OPAL_HAVE_CMPXCHG16B && HAVE_OPAL_INT128_T + +static inline int opal_atomic_cmpset_128 (volatile opal_int128_t *addr, opal_int128_t oldval, + opal_int128_t newval) +{ + unsigned char ret; + + /* cmpxchg16b compares the value at the address with eax:edx (low:high). if the values are + * the same the contents of ebx:ecx are stores at the address. in all cases the value stored + * at the address is returned in eax:edx. */ + __asm__ __volatile__ (SMPLOCK "cmpxchg16b (%%rsi) \n\t" + "sete %0 \n\t" + : "=qm" (ret) + : "S" (addr), "b" (((int64_t *)&newval)[0]), "c" (((int64_t *)&newval)[1]), + "a" (((int64_t *)&oldval)[0]), "d" (((int64_t *)&oldval)[1]) + : "memory", "cc"); + + return (int) ret; +} + +#define OPAL_HAVE_ATOMIC_CMPSET_128 1 + +#endif /* OPAL_GCC_INLINE_ASSEMBLY */ + + +#if OPAL_GCC_INLINE_ASSEMBLY + +#define OPAL_HAVE_ATOMIC_SWAP_32 1 + +#define OPAL_HAVE_ATOMIC_SWAP_64 1 + +static inline int32_t opal_atomic_swap_32( volatile int32_t *addr, + int32_t newval) +{ + int32_t oldval; + + __asm__ __volatile__("xchg %1, %0" : + "=r" (oldval), "=m" (*addr) : + "0" (newval), "m" (*addr) : + "memory"); + return oldval; +} + +#endif /* OPAL_GCC_INLINE_ASSEMBLY */ + +#if OPAL_GCC_INLINE_ASSEMBLY + +static inline int64_t opal_atomic_swap_64( volatile int64_t *addr, + int64_t newval) +{ + int64_t oldval; + + __asm__ __volatile__("xchgq %1, %0" : + "=r" (oldval) : + "m" (*addr), "0" (newval) : + "memory"); + return oldval; +} + +#endif /* OPAL_GCC_INLINE_ASSEMBLY */ + + + +#if OPAL_GCC_INLINE_ASSEMBLY + +#define OPAL_HAVE_ATOMIC_MATH_32 1 +#define OPAL_HAVE_ATOMIC_MATH_64 1 + +#define OPAL_HAVE_ATOMIC_ADD_32 1 + +/** + * atomic_add - add integer to atomic variable + * @i: integer value to add + * @v: pointer of type int + * + * Atomically adds @i to @v. + */ +static inline int32_t opal_atomic_add_32(volatile int32_t* v, int i) +{ + int ret = i; + __asm__ __volatile__( + SMPLOCK "xaddl %1,%0" + :"=m" (*v), "+r" (ret) + :"m" (*v) + :"memory", "cc" + ); + return (ret+i); +} + +#define OPAL_HAVE_ATOMIC_ADD_64 1 + +/** + * atomic_add - add integer to atomic variable + * @i: integer value to add + * @v: pointer of type int + * + * Atomically adds @i to @v. + */ +static inline int64_t opal_atomic_add_64(volatile int64_t* v, int64_t i) +{ + int64_t ret = i; + __asm__ __volatile__( + SMPLOCK "xaddq %1,%0" + :"=m" (*v), "+r" (ret) + :"m" (*v) + :"memory", "cc" + ); + return (ret+i); +} + +#define OPAL_HAVE_ATOMIC_SUB_32 1 + +/** + * atomic_sub - subtract the atomic variable + * @i: integer value to subtract + * @v: pointer of type int + * + * Atomically subtracts @i from @v. + */ +static inline int32_t opal_atomic_sub_32(volatile int32_t* v, int i) +{ + int ret = -i; + __asm__ __volatile__( + SMPLOCK "xaddl %1,%0" + :"=m" (*v), "+r" (ret) + :"m" (*v) + :"memory", "cc" + ); + return (ret-i); +} + +#define OPAL_HAVE_ATOMIC_SUB_64 1 + +/** + * atomic_sub - subtract the atomic variable + * @i: integer value to subtract + * @v: pointer of type int + * + * Atomically subtracts @i from @v. + */ +static inline int64_t opal_atomic_sub_64(volatile int64_t* v, int64_t i) +{ + int64_t ret = -i; + __asm__ __volatile__( + SMPLOCK "xaddq %1,%0" + :"=m" (*v), "+r" (ret) + :"m" (*v) + :"memory", "cc" + ); + return (ret-i); +} + +#endif /* OPAL_GCC_INLINE_ASSEMBLY */ + +#endif /* ! OPAL_SYS_ARCH_ATOMIC_H */ diff --git a/opal/include/opal/sys/x86_64/timer.h b/opal/include/opal/sys/x86_64/timer.h new file mode 100644 index 00000000000..56b4e542955 --- /dev/null +++ b/opal/include/opal/sys/x86_64/timer.h @@ -0,0 +1,91 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ +/* + * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2014 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2016 Los Alamos National Security, LLC. ALl rights + * reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#ifndef OPAL_SYS_ARCH_TIMER_H +#define OPAL_SYS_ARCH_TIMER_H 1 + + +typedef uint64_t opal_timer_t; + +/* Using RDTSC(P) results in non-monotonic timers across cores */ +#undef OPAL_TIMER_MONOTONIC +#define OPAL_TIMER_MONOTONIC 0 + +#if OPAL_GCC_INLINE_ASSEMBLY + +/** + * http://www.intel.com/content/www/us/en/intelligent-systems/embedded-systems-training/ia-32-ia-64-benchmark-code-execution-paper.html + */ +static inline opal_timer_t +opal_sys_timer_get_cycles(void) +{ + unsigned l, h; +#if !OPAL_ASSEMBLY_SUPPORTS_RDTSCP + __asm__ __volatile__ ("cpuid\n\t" + "rdtsc\n\t" + : "=a" (l), "=d" (h) + :: "rbx", "rcx"); +#else + /* If we need higher accuracy we should implement the algorithm proposed + * on the Intel document referenced above. However, in the context of MPI + * this function will be used as the backend for MPI_Wtime and as such + * can afford a small inaccuracy. + */ + __asm__ __volatile__ ("rdtscp\n\t" + "mov %%edx, %0\n\t" + "mov %%eax, %1\n\t" + "cpuid\n\t" + : "=r" (h), "=r" (l) + :: "rax", "rbx", "rcx", "rdx"); +#endif + return ((opal_timer_t)l) | (((opal_timer_t)h) << 32); +} + +static inline bool opal_sys_timer_is_monotonic (void) +{ + int64_t tmp; + int32_t cpuid1, cpuid2; + const int32_t level = 0x80000007; + + /* cpuid clobbers ebx but it must be restored for -fPIC so save + * then restore ebx */ + __asm__ volatile ("xchg %%rbx, %2\n" + "cpuid\n" + "xchg %%rbx, %2\n": + "=a" (cpuid1), "=d" (cpuid2), "=r" (tmp) : + "a" (level) : + "ecx", "ebx"); + /* bit 8 of edx contains the invariant tsc flag */ + return !!(cpuid2 & (1 << 8)); +} + +#define OPAL_HAVE_SYS_TIMER_GET_CYCLES 1 +#define OPAL_HAVE_SYS_TIMER_IS_MONOTONIC 1 + +#else + +opal_timer_t opal_sys_timer_get_cycles(void); + +#define OPAL_HAVE_SYS_TIMER_GET_CYCLES 1 + +#endif /* OPAL_GCC_INLINE_ASSEMBLY */ + +#endif /* ! OPAL_SYS_ARCH_TIMER_H */ diff --git a/opal/include/opal/sys/x86_64/update.sh b/opal/include/opal/sys/x86_64/update.sh new file mode 100644 index 00000000000..dbef4d61cd0 --- /dev/null +++ b/opal/include/opal/sys/x86_64/update.sh @@ -0,0 +1,36 @@ +#!/bin/sh +# +# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana +# University Research and Technology +# Corporation. All rights reserved. +# Copyright (c) 2004-2005 The University of Tennessee and The University +# of Tennessee Research Foundation. All rights +# reserved. +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# University of Stuttgart. All rights reserved. +# Copyright (c) 2004-2005 The Regents of the University of California. +# All rights reserved. +# $COPYRIGHT$ +# +# Additional copyrights may follow +# +# $HEADER$ +# + +CFILE=/tmp/opal_atomic_$$.c + +trap "/bin/rm -f $CFILE; exit 0" 0 1 2 15 + +echo Updating asm.s from atomic.h and timer.h using gcc + +cat > $CFILE< +#include +#define static +#define inline +#define OPAL_GCC_INLINE_ASSEMBLY 1 +#include "atomic.h" +#include "timer.h" +EOF + +gcc -O3 -I. -S $CFILE -o asm.s diff --git a/opal/include/opal/types.h b/opal/include/opal/types.h index 7738f6807df..2089057365a 100644 --- a/opal/include/opal/types.h +++ b/opal/include/opal/types.h @@ -5,15 +5,15 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2014 Intel, Inc. All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -22,9 +22,7 @@ #include "opal_config.h" -#ifdef HAVE_STDINT_H #include -#endif #ifdef HAVE_SYS_TYPES_H #include #endif @@ -152,7 +150,7 @@ static inline void* opal_ptr_ltop( uint64_t value ) #if defined(WORDS_BIGENDIAN) || !defined(HAVE_UNIX_BYTESWAP) static inline uint16_t opal_swap_bytes2(uint16_t val) __opal_attribute_const__; -static inline uint16_t opal_swap_bytes2(uint16_t val) +static inline uint16_t opal_swap_bytes2(uint16_t val) { union { uint16_t bigval; uint8_t arrayval[2]; @@ -197,7 +195,7 @@ static inline uint64_t opal_swap_bytes8(uint64_t val) r.arrayval[5] = w.arrayval[2]; r.arrayval[6] = w.arrayval[1]; r.arrayval[7] = w.arrayval[0]; - + return r.bigval; } diff --git a/opal/include/opal/version.h.in b/opal/include/opal/version.h.in index 9b4e3ea2155..ac9c429e52d 100644 --- a/opal/include/opal/version.h.in +++ b/opal/include/opal/version.h.in @@ -5,15 +5,15 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2011 Cisco Systems, Inc. All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ * * This file should be included by any file that needs full diff --git a/opal/include/opal_config_bottom.h b/opal/include/opal_config_bottom.h index 3cfc7f8b8a7..a9c1c78e328 100644 --- a/opal/include/opal_config_bottom.h +++ b/opal/include/opal_config_bottom.h @@ -5,7 +5,7 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2010 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2010 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. @@ -16,9 +16,9 @@ * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ * * This file is included at the bottom of opal_config.h, and is @@ -30,7 +30,7 @@ * need to #ifndef/#endif protection here. */ -#ifndef OPAL_CONFIG_H +#ifndef OPAL_CONFIG_H #error "opal_config_bottom.h should only be included from opal_config.h" #endif @@ -241,10 +241,6 @@ # define OPAL_MODULE_DECLSPEC # endif -/* - * Do we have ? - */ -#ifdef HAVE_STDINT_H #if !defined(__STDC_LIMIT_MACROS) && (defined(c_plusplus) || defined (__cplusplus)) /* When using a C++ compiler, the max / min value #defines for std types are only included if __STDC_LIMIT_MACROS is set before @@ -252,10 +248,7 @@ #define __STDC_LIMIT_MACROS #endif #include "opal_config.h" -#include -#else #include "opal_stdint.h" -#endif /*********************************************************************** * @@ -346,6 +339,14 @@ typedef unsigned char bool; #define OPAL_PATH_SEP "/" #define OPAL_ENV_SEP ':' +#if defined(MAXHOSTNAMELEN) +#define OPAL_MAXHOSTNAMELEN (MAXHOSTNAMELEN + 1) +#elif defined(HOST_NAME_MAX) +#define OPAL_MAXHOSTNAMELEN (HOST_NAME_MAX + 1) +#else +/* SUSv2 guarantees that "Host names are limited to 255 bytes". */ +#define OPAL_MAXHOSTNAMELEN (255 + 1) +#endif /* * Do we want memory debugging? @@ -547,14 +548,9 @@ static inline uint16_t ntohs(uint16_t netvar) { return netvar; } #ifdef HAVE_HOSTLIB_H /* gethostname() */ #include - -#ifndef MAXHOSTNAMELEN -#define MAXHOSTNAMELEN 64 #endif #endif -#endif - /* If we're in C++, then just undefine restrict and then define it to nothing. "restrict" is not part of the C++ language, and we don't have a corresponding AC_CXX_RESTRICT to figure out what the C++ @@ -570,7 +566,7 @@ static inline uint16_t ntohs(uint16_t netvar) { return netvar; } want to protect others from the autoconf/automake-generated PACKAGE_ macros in opal_config.h. We can't put these undef's directly in opal_config.h because they'll be turned into #defines' - via autoconf. + via autoconf. So put them here in case any only else includes OMPI/ORTE/OPAL's config.h files. */ diff --git a/opal/include/opal_config_top.h b/opal/include/opal_config_top.h index 9b1837dbc04..1ce5267c389 100644 --- a/opal/include/opal_config_top.h +++ b/opal/include/opal_config_top.h @@ -1,9 +1,9 @@ /* * Copyright (c) 2011 Cisco Systems, Inc. All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ * * This file is included at the top of opal_config.h, and is @@ -15,7 +15,7 @@ * need to #ifndef/#endif protection here. */ -#ifndef OPAL_CONFIG_H +#ifndef OPAL_CONFIG_H #error "opal_config_top.h should only be included from opal_config.h" #endif @@ -24,7 +24,7 @@ you include a .h file from another project that defines these macros (e.g., gmp.h) and then include OMPI/ORTE/OPAL's config.h, you'll get a preprocessor conflict. So put these undef's here to - protect us from other package's PACKAGE_ macros. + protect us from other package's PACKAGE_ macros. Note that we can't put them directly in opal_config.h (e.g., via AH_TOP) because they will be turned into #define's by autoconf. */ diff --git a/opal/include/opal_stdint.h b/opal/include/opal_stdint.h index d08b22073b9..4089cb55a99 100644 --- a/opal/include/opal_stdint.h +++ b/opal/include/opal_stdint.h @@ -1,3 +1,4 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ /* * Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana * University Research and Technology @@ -5,14 +6,18 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. + * Copyright (c) 2015 Los Alamos National Security, LLC. All rights + * reserved. + * Copyright (c) 2016 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ * * This file includes the C99 stdint.h file if available, and otherwise @@ -27,113 +32,12 @@ * Include what we can and define what is missing. */ #include +#include #ifdef HAVE_SYS_TYPES_H #include #endif -/* 8-bit */ - -#if SIZEOF_CHAR == 1 - -#ifndef HAVE_INT8_T -typedef signed char int8_t; -#endif - -#ifndef HAVE_UINT8_T -typedef unsigned char uint8_t; -#endif - -#else - -#error Failed to define 8-bit types - -#endif - -/* 16-bit */ - -#if SIZEOF_SHORT == 2 - -#ifndef HAVE_INT16_T -typedef signed short int16_t; -#endif - -#ifndef HAVE_UINT16_T -typedef unsigned short uint16_t; -#endif - -#else - -#error Failed to define 16-bit types - -#endif - -/* 32-bit */ - -#if SIZEOF_INT == 4 - -#ifndef HAVE_INT32_T -typedef signed int int32_t; -#endif - -#ifndef HAVE_UINT32_T -typedef unsigned int uint32_t; -#endif - -#elif SIZEOF_LONG == 4 - -#ifndef HAVE_INT32_T -typedef signed long int32_t; -#endif - -#ifndef HAVE_UINT32_T -typedef unsigned long uint32_t; -#endif - -#else - -#error Failed to define 32-bit types - -#endif - -/* 64-bit */ - -#if SIZEOF_INT == 8 - -#ifndef HAVE_INT64_T -typedef signed int int64_t; -#endif - -#ifndef HAVE_UINT64_T -typedef unsigned int uint64_t; -#endif - -#elif SIZEOF_LONG == 8 - -#ifndef HAVE_INT64_T -typedef signed long int64_t; -#endif - -#ifndef HAVE_UINT64_T -typedef unsigned long uint64_t; -#endif - -#elif HAVE_LONG_LONG && SIZEOF_LONG_LONG == 8 - -#ifndef HAVE_INT64_T -typedef signed long long int64_t; -#endif - -#ifndef HAVE_UINT64_T -typedef unsigned long long uint64_t; -#endif - -#else - -#error Failed to define 64-bit types - -#endif - /* 128-bit */ #ifdef HAVE_INT128_T @@ -150,7 +54,7 @@ typedef uint128_t opal_uint128_t; /* Clang won't quietly accept "-pedantic", but GCC versions older than ~4.8 * won't quietly accept "-Wpedanic". The whole "#pragma GCC diagnostic ..." * facility only was added to GCC as of version 4.6. */ -#if defined(__clang__) +#if defined(__clang__) || (defined(__GNUC__) && __GNUC__ >= 6) #pragma GCC diagnostic ignored "-Wpedantic" #else #pragma GCC diagnostic ignored "-pedantic" @@ -204,143 +108,8 @@ typedef unsigned long long uintptr_t; #endif -/* fix up some constants that may be missing */ -#ifndef SIZE_MAX -# if SIZEOF_VOID_P == SIZEOF_INT -# define SIZE_MAX UINT_MAX -# elif SIZEOF_VOID_P == SIZEOF_LONG -# define SIZE_MAX ULONG_MAX -# else -# error Failed to find value for SIZE_MAX -# endif -#endif /* ifndef SIZE_MAX */ - - /* inttypes.h printf specifiers */ -#ifdef HAVE_INTTYPES_H # include -#else - -# if SIZEOF_LONG == 8 -# define __PRI64_PREFIX "l" -# define __PRIPTR_PREFIX "l" -# else -# define __PRI64_PREFIX "ll" -# define __PRIPTR_PREFIX -# endif - -/* Decimal notation. */ -# define PRId8 "d" -# define PRId16 "d" -# define PRId32 "d" -# define PRId64 __PRI64_PREFIX "d" - -# define PRIdLEAST8 "d" -# define PRIdLEAST16 "d" -# define PRIdLEAST32 "d" -# define PRIdLEAST64 __PRI64_PREFIX "d" - -# define PRIdFAST8 "d" -# define PRIdFAST16 __PRIPTR_PREFIX "d" -# define PRIdFAST32 __PRIPTR_PREFIX "d" -# define PRIdFAST64 __PRI64_PREFIX "d" - -# define PRIi8 "i" -# define PRIi16 "i" -# define PRIi32 "i" -# define PRIi64 __PRI64_PREFIX "i" - -# define PRIiLEAST8 "i" -# define PRIiLEAST16 "i" -# define PRIiLEAST32 "i" -# define PRIiLEAST64 __PRI64_PREFIX "i" - -# define PRIiFAST8 "i" -# define PRIiFAST16 __PRIPTR_PREFIX "i" -# define PRIiFAST32 __PRIPTR_PREFIX "i" -# define PRIiFAST64 __PRI64_PREFIX "i" - -/* Octal notation. */ -# define PRIo8 "o" -# define PRIo16 "o" -# define PRIo32 "o" -# define PRIo64 __PRI64_PREFIX "o" - -# define PRIoLEAST8 "o" -# define PRIoLEAST16 "o" -# define PRIoLEAST32 "o" -# define PRIoLEAST64 __PRI64_PREFIX "o" - -# define PRIoFAST8 "o" -# define PRIoFAST16 __PRIPTR_PREFIX "o" -# define PRIoFAST32 __PRIPTR_PREFIX "o" -# define PRIoFAST64 __PRI64_PREFIX "o" - -/* Unsigned integers. */ -# define PRIu8 "u" -# define PRIu16 "u" -# define PRIu32 "u" -# define PRIu64 __PRI64_PREFIX "u" - -# define PRIuLEAST8 "u" -# define PRIuLEAST16 "u" -# define PRIuLEAST32 "u" -# define PRIuLEAST64 __PRI64_PREFIX "u" - -# define PRIuFAST8 "u" -# define PRIuFAST16 __PRIPTR_PREFIX "u" -# define PRIuFAST32 __PRIPTR_PREFIX "u" -# define PRIuFAST64 __PRI64_PREFIX "u" - -/* lowercase hexadecimal notation. */ -# define PRIx8 "x" -# define PRIx16 "x" -# define PRIx32 "x" -# define PRIx64 __PRI64_PREFIX "x" - -# define PRIxLEAST8 "x" -# define PRIxLEAST16 "x" -# define PRIxLEAST32 "x" -# define PRIxLEAST64 __PRI64_PREFIX "x" - -# define PRIxFAST8 "x" -# define PRIxFAST16 __PRIPTR_PREFIX "x" -# define PRIxFAST32 __PRIPTR_PREFIX "x" -# define PRIxFAST64 __PRI64_PREFIX "x" - -/* UPPERCASE hexadecimal notation. */ -# define PRIX8 "X" -# define PRIX16 "X" -# define PRIX32 "X" -# define PRIX64 __PRI64_PREFIX "X" - -# define PRIXLEAST8 "X" -# define PRIXLEAST16 "X" -# define PRIXLEAST32 "X" -# define PRIXLEAST64 __PRI64_PREFIX "X" - -# define PRIXFAST8 "X" -# define PRIXFAST16 __PRIPTR_PREFIX "X" -# define PRIXFAST32 __PRIPTR_PREFIX "X" -# define PRIXFAST64 __PRI64_PREFIX "X" - -/* Macros for printing `intmax_t' and `uintmax_t'. */ -# define PRIdMAX __PRI64_PREFIX "d" -# define PRIiMAX __PRI64_PREFIX "i" -# define PRIoMAX __PRI64_PREFIX "o" -# define PRIuMAX __PRI64_PREFIX "u" -# define PRIxMAX __PRI64_PREFIX "x" -# define PRIXMAX __PRI64_PREFIX "X" - -/* Macros for printing `intptr_t' and `uintptr_t'. */ -# define PRIdPTR __PRIPTR_PREFIX "d" -# define PRIiPTR __PRIPTR_PREFIX "i" -# define PRIoPTR __PRIPTR_PREFIX "o" -# define PRIuPTR __PRIPTR_PREFIX "u" -# define PRIxPTR __PRIPTR_PREFIX "x" -# define PRIXPTR __PRIPTR_PREFIX "X" - -#endif #ifndef PRIsize_t # if defined(ACCEPT_C99) diff --git a/opal/mca/Makefile.am b/opal/mca/Makefile.am index 8d3fca3fb4f..89dcb0f652b 100644 --- a/opal/mca/Makefile.am +++ b/opal/mca/Makefile.am @@ -5,14 +5,14 @@ # Copyright (c) 2004-2005 The University of Tennessee and The University # of Tennessee Research Foundation. All rights # reserved. -# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, # University of Stuttgart. All rights reserved. # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. # $COPYRIGHT$ -# +# # Additional copyrights may follow -# +# # $HEADER$ # diff --git a/opal/mca/allocator/Makefile.am b/opal/mca/allocator/Makefile.am index e8c1be97c51..2e9ddc087a9 100644 --- a/opal/mca/allocator/Makefile.am +++ b/opal/mca/allocator/Makefile.am @@ -5,15 +5,15 @@ # Copyright (c) 2004-2005 The University of Tennessee and The University # of Tennessee Research Foundation. All rights # reserved. -# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, # University of Stuttgart. All rights reserved. # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. # Copyright (c) 2010 Cisco Systems, Inc. All rights reserved. # $COPYRIGHT$ -# +# # Additional copyrights may follow -# +# # $HEADER$ # diff --git a/opal/mca/allocator/allocator.h b/opal/mca/allocator/allocator.h index a4c11749eb5..77180462c3d 100644 --- a/opal/mca/allocator/allocator.h +++ b/opal/mca/allocator/allocator.h @@ -6,7 +6,7 @@ * Copyright (c) 2004-2006 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. @@ -19,7 +19,7 @@ * $HEADER$ */ /** - * @file + * @file * The public definition of the MCA Allocator framework. */ #ifndef MCA_ALLOCATOR_H @@ -27,7 +27,6 @@ #include "opal_config.h" #include "opal/mca/mca.h" -#include "opal/mca/mpool/mpool.h" BEGIN_C_DECLS @@ -38,18 +37,16 @@ struct mca_allocator_base_module_t; * The allocate function typedef for the function to be provided by the component. */ typedef void* (*mca_allocator_base_module_alloc_fn_t)( - struct mca_allocator_base_module_t*, - size_t size, - size_t align, - mca_mpool_base_registration_t** registration); - + struct mca_allocator_base_module_t*, + size_t size, + size_t align); + /** * The realloc function typedef */ typedef void* (*mca_allocator_base_module_realloc_fn_t)( - struct mca_allocator_base_module_t*, - void*, size_t, - mca_mpool_base_registration_t** registration); + struct mca_allocator_base_module_t*, + void*, size_t); /** * Free function typedef @@ -63,16 +60,16 @@ typedef void(*mca_allocator_base_module_free_fn_t)( */ typedef int (*mca_allocator_base_module_compact_fn_t)( - struct mca_allocator_base_module_t* allocator + struct mca_allocator_base_module_t* allocator ); - + /** * cleanup (free) any resources held by allocator */ typedef int (*mca_allocator_base_module_finalize_fn_t)( - struct mca_allocator_base_module_t* allocator + struct mca_allocator_base_module_t* allocator ); /** @@ -81,16 +78,16 @@ typedef int (*mca_allocator_base_module_finalize_fn_t)( struct mca_allocator_base_module_t { mca_allocator_base_module_alloc_fn_t alc_alloc; /**< Allocate memory */ - mca_allocator_base_module_realloc_fn_t alc_realloc; + mca_allocator_base_module_realloc_fn_t alc_realloc; /**< Reallocate memory */ - mca_allocator_base_module_free_fn_t alc_free; + mca_allocator_base_module_free_fn_t alc_free; /**< Free memory */ - mca_allocator_base_module_compact_fn_t alc_compact; + mca_allocator_base_module_compact_fn_t alc_compact; /**< Return memory */ - mca_allocator_base_module_finalize_fn_t alc_finalize; + mca_allocator_base_module_finalize_fn_t alc_finalize; /**< Finalize and free everything */ /* memory pool and resources */ - struct mca_mpool_base_module_t* alc_mpool; + void *alc_context; }; /** * Convenience typedef. @@ -103,30 +100,27 @@ typedef struct mca_allocator_base_module_t mca_allocator_base_module_t; * provided by the module to the allocator framework. */ -typedef void* (*mca_allocator_base_component_segment_alloc_fn_t)( - struct mca_mpool_base_module_t* module, - size_t* size, - mca_mpool_base_registration_t** registration); +typedef void* (*mca_allocator_base_component_segment_alloc_fn_t)(void *ctx, + size_t *size); /** - * A function to free memory from the control of the allocator framework + * A function to free memory from the control of the allocator framework * back to the system. This function is to be provided by the module to the * allocator framework. */ -typedef void (*mca_allocator_base_component_segment_free_fn_t)( - struct mca_mpool_base_module_t* module, - void* segment); +typedef void (*mca_allocator_base_component_segment_free_fn_t)(void *ctx, + void *segment); /** - * The function used to initialize the component. + * The function used to initialize the component. */ -typedef struct mca_allocator_base_module_t* +typedef struct mca_allocator_base_module_t* (*mca_allocator_base_component_init_fn_t)( bool enable_mpi_threads, mca_allocator_base_component_segment_alloc_fn_t segment_alloc, - mca_allocator_base_component_segment_free_fn_t segment_free, - struct mca_mpool_base_module_t* mpool + mca_allocator_base_component_segment_free_fn_t segment_free, + void *context ); /** @@ -134,11 +128,11 @@ typedef struct mca_allocator_base_module_t* * describes the component. */ struct mca_allocator_base_component_2_0_0_t { - mca_base_component_t allocator_version; + mca_base_component_t allocator_version; /**< The version of the component */ - mca_base_component_data_t allocator_data; + mca_base_component_data_t allocator_data; /**< The component metadata */ - mca_allocator_base_component_init_fn_t allocator_init; + mca_allocator_base_component_init_fn_t allocator_init; /**< The component initialization function. */ }; diff --git a/opal/mca/allocator/base/Makefile.am b/opal/mca/allocator/base/Makefile.am index bda5d3cedbf..55ad8cfc293 100644 --- a/opal/mca/allocator/base/Makefile.am +++ b/opal/mca/allocator/base/Makefile.am @@ -5,19 +5,19 @@ # Copyright (c) 2004-2005 The University of Tennessee and The University # of Tennessee Research Foundation. All rights # reserved. -# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, # University of Stuttgart. All rights reserved. # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. # $COPYRIGHT$ -# +# # Additional copyrights may follow -# +# # $HEADER$ # headers += \ - base/base.h + base/base.h libmca_allocator_la_SOURCES += \ - base/allocator_base_frame.c + base/allocator_base_frame.c diff --git a/opal/mca/allocator/base/allocator_base_frame.c b/opal/mca/allocator/base/allocator_base_frame.c index a87c2e19e8d..cf8559af2af 100644 --- a/opal/mca/allocator/base/allocator_base_frame.c +++ b/opal/mca/allocator/base/allocator_base_frame.c @@ -5,23 +5,21 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ #include "opal_config.h" #include -#ifdef HAVE_STRING_H #include -#endif /* HAVE_STRING_H */ #include "opal/mca/mca.h" #include "opal/mca/base/base.h" diff --git a/opal/mca/allocator/base/base.h b/opal/mca/allocator/base/base.h index 1d52a941b8b..acdb44de79c 100644 --- a/opal/mca/allocator/base/base.h +++ b/opal/mca/allocator/base/base.h @@ -5,14 +5,16 @@ * Copyright (c) 2004-2006 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. + * Copyright (c) 2015 Los Alamos National Security, LLC. All rights + * reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ /** @@ -26,17 +28,18 @@ #include "opal/class/opal_list.h" #include "opal/mca/mca.h" #include "opal/mca/allocator/allocator.h" +#include "opal/mca/base/mca_base_framework.h" BEGIN_C_DECLS /** * Structure which describes a selected module. */ struct mca_allocator_base_selected_module_t { - opal_list_item_t super; + opal_list_item_t super; /**< Makes this an object of type opal_list_item */ - mca_allocator_base_component_t *allocator_component; + mca_allocator_base_component_t *allocator_component; /**< Info about the module */ - mca_allocator_base_module_t *allocator_module; + mca_allocator_base_module_t *allocator_module; /**< The function pointers for all the module's functions. */ }; /** @@ -45,10 +48,10 @@ struct mca_allocator_base_selected_module_t { typedef struct mca_allocator_base_selected_module_t mca_allocator_base_selected_module_t; /** - * Declaces mca_mpool_base_selected_module_t as a class. + * Declaces mca_allocator_base_selected_module_t as a class. */ OPAL_DECLSPEC OBJ_CLASS_DECLARATION(mca_allocator_base_selected_module_t); - + OPAL_DECLSPEC mca_allocator_base_component_t* mca_allocator_component_lookup(const char* name); diff --git a/opal/mca/allocator/base/owner.txt b/opal/mca/allocator/base/owner.txt index 44825b1295e..2d23c9be654 100644 --- a/opal/mca/allocator/base/owner.txt +++ b/opal/mca/allocator/base/owner.txt @@ -3,5 +3,5 @@ # owner: institution that is responsible for this package # status: e.g. active, maintenance, unmaintained # -owner: ? +owner: project status: maintenance diff --git a/opal/mca/allocator/basic/Makefile.am b/opal/mca/allocator/basic/Makefile.am index 2ff200ad1ca..48d497723bc 100644 --- a/opal/mca/allocator/basic/Makefile.am +++ b/opal/mca/allocator/basic/Makefile.am @@ -5,21 +5,21 @@ # Copyright (c) 2004-2005 The University of Tennessee and The University # of Tennessee Research Foundation. All rights # reserved. -# Copyright (c) 2004-2009 High Performance Computing Center Stuttgart, +# Copyright (c) 2004-2009 High Performance Computing Center Stuttgart, # University of Stuttgart. All rights reserved. # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. # Copyright (c) 2010 Cisco Systems, Inc. All rights reserved. # $COPYRIGHT$ -# +# # Additional copyrights may follow -# +# # $HEADER$ # sources = \ allocator_basic.c \ - allocator_basic.h + allocator_basic.h # Make the output library in this directory, and name it either # mca__.la (for DSO builds) or libmca__.la diff --git a/opal/mca/allocator/basic/allocator_basic.c b/opal/mca/allocator/basic/allocator_basic.c index c212ac7483a..72c26db098d 100644 --- a/opal/mca/allocator/basic/allocator_basic.c +++ b/opal/mca/allocator/basic/allocator_basic.c @@ -6,7 +6,7 @@ * Copyright (c) 2004-2013 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. @@ -16,9 +16,9 @@ * Copyright (c) 2015 Los Alamos National Security, LLC. All rights * reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -29,13 +29,13 @@ mca_allocator_base_component_t mca_allocator_basic_component = { - + /* First, the mca_base_module_t struct containing meta information about the module itself */ - + { MCA_ALLOCATOR_BASE_VERSION_2_0_0, - + "basic", /* MCA module name */ OPAL_MAJOR_VERSION, OPAL_MINOR_VERSION, @@ -77,8 +77,8 @@ int mca_allocator_basic_component_close(void) mca_allocator_base_module_t* mca_allocator_basic_component_init( bool enable_mpi_threads, mca_allocator_base_component_segment_alloc_fn_t segment_alloc, - mca_allocator_base_component_segment_free_fn_t segment_free, - struct mca_mpool_base_module_t* mpool) + mca_allocator_base_component_segment_free_fn_t segment_free, + void *context) { mca_allocator_basic_module_t *module = (mca_allocator_basic_module_t *) malloc(sizeof(mca_allocator_basic_module_t)); @@ -91,13 +91,13 @@ mca_allocator_base_module_t* mca_allocator_basic_component_init( module->super.alc_free = mca_allocator_basic_free; module->super.alc_compact = mca_allocator_basic_compact; module->super.alc_finalize = mca_allocator_basic_finalize; - module->super.alc_mpool = mpool; + module->super.alc_context = context; module->seg_alloc = segment_alloc; module->seg_free = segment_free; OBJ_CONSTRUCT(&module->seg_list, opal_list_t); OBJ_CONSTRUCT(&module->seg_lock, opal_mutex_t); OBJ_CONSTRUCT(&module->seg_descriptors, opal_free_list_t); - + opal_free_list_init (&module->seg_descriptors, sizeof(mca_allocator_basic_segment_t), opal_cache_line_size, @@ -110,13 +110,13 @@ mca_allocator_base_module_t* mca_allocator_basic_component_init( return &module->super; } - + /** * Combine adjacent segments together. */ static void mca_allocator_basic_combine_prev( - mca_allocator_basic_module_t* module, + mca_allocator_basic_module_t* module, mca_allocator_basic_segment_t* seg) { opal_list_item_t* item = opal_list_get_prev(seg); @@ -132,7 +132,7 @@ static void mca_allocator_basic_combine_prev( } static void mca_allocator_basic_combine_next( - mca_allocator_basic_module_t* module, + mca_allocator_basic_module_t* module, mca_allocator_basic_segment_t* seg) { opal_list_item_t *item = opal_list_get_next(seg); @@ -161,10 +161,9 @@ static void mca_allocator_basic_combine_next( */ void *mca_allocator_basic_alloc( - mca_allocator_base_module_t * base, + mca_allocator_base_module_t * base, size_t size, - size_t align, - mca_mpool_base_registration_t** registration) + size_t align) { mca_allocator_basic_module_t* module = (mca_allocator_basic_module_t*)base; mca_allocator_basic_segment_t* seg; @@ -198,7 +197,7 @@ void *mca_allocator_basic_alloc( /* request additional block */ allocated_size = size; - if(NULL == (addr = (unsigned char *)module->seg_alloc(module->super.alc_mpool, &allocated_size, registration))) { + if(NULL == (addr = (unsigned char *)module->seg_alloc(module->super.alc_context, &allocated_size))) { OPAL_THREAD_UNLOCK(&module->seg_lock); return NULL; } @@ -237,16 +236,15 @@ void *mca_allocator_basic_alloc( */ void * mca_allocator_basic_realloc( - mca_allocator_base_module_t * base, - void * ptr, - size_t size, - mca_mpool_base_registration_t** registration) + mca_allocator_base_module_t * base, + void * ptr, + size_t size) { unsigned char* addr = ((unsigned char*)ptr) - sizeof(size_t); size_t alloc_size = *(size_t*)addr; if(size <= alloc_size) return ptr; - addr = (unsigned char *)mca_allocator_basic_alloc(base,size,0,registration); + addr = (unsigned char *)mca_allocator_basic_alloc(base, size, 0); if(addr == NULL) return addr; memcpy(addr,ptr,alloc_size); @@ -285,7 +283,7 @@ void mca_allocator_basic_free( mca_allocator_basic_combine_next(module, seg); OPAL_THREAD_UNLOCK(&module->seg_lock); return; - } + } /* otherwise continue to check next larger entry */ } else { diff --git a/opal/mca/allocator/basic/allocator_basic.h b/opal/mca/allocator/basic/allocator_basic.h index 9b4fa93c1c3..aa257457dbc 100644 --- a/opal/mca/allocator/basic/allocator_basic.h +++ b/opal/mca/allocator/basic/allocator_basic.h @@ -6,16 +6,16 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2015 Los Alamos National Security, LLC. All rights * reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -23,8 +23,8 @@ * A generic memory basic allocator. **/ -#ifndef ALLOCATOR_BASIC_H -#define ALLOCATOR_BASIC_H +#ifndef ALLOCATOR_BASIC_H +#define ALLOCATOR_BASIC_H #include "opal_config.h" #include @@ -76,10 +76,10 @@ int mca_allocator_basic_component_close(void); mca_allocator_base_module_t* mca_allocator_basic_component_init( bool enable_mpi_threads, mca_allocator_base_component_segment_alloc_fn_t segment_alloc, - mca_allocator_base_component_segment_free_fn_t segment_free, - struct mca_mpool_base_module_t* module + mca_allocator_base_component_segment_free_fn_t segment_free, + void *ctx ); - + /** * Accepts a request for memory in a specific region defined by the * mca_allocator_basic_options_t struct and returns a pointer to memory in that @@ -92,10 +92,9 @@ mca_allocator_base_module_t* mca_allocator_basic_component_init( * @retval NULL if the allocation was unsuccessful */ void * mca_allocator_basic_alloc( - mca_allocator_base_module_t * mem, - size_t size, - size_t align, - mca_mpool_base_registration_t** registration); + mca_allocator_base_module_t * mem, + size_t size, + size_t align); /** * Attempts to resize the passed region of memory into a larger or a smaller @@ -112,10 +111,9 @@ mca_allocator_base_module_t* mca_allocator_basic_component_init( * */ void * mca_allocator_basic_realloc( - mca_allocator_base_module_t * mem, - void * ptr, - size_t size, - mca_mpool_base_registration_t** registration); + mca_allocator_base_module_t * mem, + void * ptr, + size_t size); /** * Frees the passed region of memory diff --git a/opal/mca/allocator/bucket/Makefile.am b/opal/mca/allocator/bucket/Makefile.am index dac7e6d0aa7..2726a044c1c 100644 --- a/opal/mca/allocator/bucket/Makefile.am +++ b/opal/mca/allocator/bucket/Makefile.am @@ -5,22 +5,22 @@ # Copyright (c) 2004-2005 The University of Tennessee and The University # of Tennessee Research Foundation. All rights # reserved. -# Copyright (c) 2004-2009 High Performance Computing Center Stuttgart, +# Copyright (c) 2004-2009 High Performance Computing Center Stuttgart, # University of Stuttgart. All rights reserved. # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. # Copyright (c) 2010 Cisco Systems, Inc. All rights reserved. # $COPYRIGHT$ -# +# # Additional copyrights may follow -# +# # $HEADER$ # sources = \ allocator_bucket.c \ allocator_bucket_alloc.c \ - allocator_bucket_alloc.h + allocator_bucket_alloc.h # Make the output library in this directory, and name it either # mca__.la (for DSO builds) or libmca__.la diff --git a/opal/mca/allocator/bucket/allocator_bucket.c b/opal/mca/allocator/bucket/allocator_bucket.c index 19ef519eb2e..edaa6403031 100644 --- a/opal/mca/allocator/bucket/allocator_bucket.c +++ b/opal/mca/allocator/bucket/allocator_bucket.c @@ -6,32 +6,32 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2008 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2014 Los Alamos National Security, LLC. All rights + * Copyright (c) 2014-2015 Los Alamos National Security, LLC. All rights * reseved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ #include "opal_config.h" +#include "opal/mca/base/mca_base_var.h" #include "opal/mca/allocator/allocator.h" #include "opal/constants.h" #include "opal/mca/allocator/bucket/allocator_bucket_alloc.h" -#include "opal/mca/mpool/mpool.h" +#include "opal/mca/base/mca_base_var.h" struct mca_allocator_base_module_t* mca_allocator_bucket_module_init( bool enable_mpi_threads, mca_allocator_base_component_segment_alloc_fn_t segment_alloc, - mca_allocator_base_component_segment_free_fn_t segment_free, - struct mca_mpool_base_module_t* mpool - ); + mca_allocator_base_component_segment_free_fn_t segment_free, + void *context); int mca_allocator_bucket_module_open(void); @@ -39,8 +39,7 @@ int mca_allocator_bucket_module_close(void); void * mca_allocator_bucket_alloc_wrapper( struct mca_allocator_base_module_t* allocator, - size_t size, size_t align, - mca_mpool_base_registration_t** registration); + size_t size, size_t align); static int mca_allocator_num_buckets; @@ -65,8 +64,8 @@ int mca_allocator_bucket_finalize(struct mca_allocator_base_module_t* allocator) struct mca_allocator_base_module_t* mca_allocator_bucket_module_init( bool enable_mpi_threads, mca_allocator_base_component_segment_alloc_fn_t segment_alloc, - mca_allocator_base_component_segment_free_fn_t segment_free, - struct mca_mpool_base_module_t* mpool) + mca_allocator_base_component_segment_free_fn_t segment_free, + void *context) { size_t alloc_size = sizeof(mca_allocator_bucket_t); mca_allocator_bucket_t * retval; @@ -74,9 +73,9 @@ struct mca_allocator_base_module_t* mca_allocator_bucket_module_init( if(NULL == allocator) { return NULL; } - retval = mca_allocator_bucket_init((mca_allocator_base_module_t *) allocator, - mca_allocator_num_buckets, - segment_alloc, + retval = mca_allocator_bucket_init((mca_allocator_base_module_t *) allocator, + mca_allocator_num_buckets, + segment_alloc, segment_free); if(NULL == retval) { free(allocator); @@ -87,7 +86,7 @@ struct mca_allocator_base_module_t* mca_allocator_bucket_module_init( allocator->super.alc_free = mca_allocator_bucket_free; allocator->super.alc_compact = mca_allocator_bucket_cleanup; allocator->super.alc_finalize = mca_allocator_bucket_finalize; - allocator->super.alc_mpool = mpool; + allocator->super.alc_context = context; return (mca_allocator_base_module_t *) allocator; } @@ -110,18 +109,17 @@ int mca_allocator_bucket_module_close(void) { void * mca_allocator_bucket_alloc_wrapper( struct mca_allocator_base_module_t* allocator, - size_t size, - size_t align, - mca_mpool_base_registration_t** registration) + size_t size, + size_t align) { if(0 == align){ - return mca_allocator_bucket_alloc(allocator, size, registration); + return mca_allocator_bucket_alloc(allocator, size); } - return mca_allocator_bucket_alloc_align(allocator, size, align, registration); -} + return mca_allocator_bucket_alloc_align(allocator, size, align); +} -mca_allocator_base_component_t mca_allocator_bucket_component = { +mca_allocator_base_component_t mca_allocator_bucket_component = { /* First, the mca_base_module_t struct containing meta information about the module itself */ diff --git a/opal/mca/allocator/bucket/allocator_bucket_alloc.c b/opal/mca/allocator/bucket/allocator_bucket_alloc.c index bd2634d9e42..be3db944d06 100644 --- a/opal/mca/allocator/bucket/allocator_bucket_alloc.c +++ b/opal/mca/allocator/bucket/allocator_bucket_alloc.c @@ -5,15 +5,15 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2007 IBM Corp., All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -71,10 +71,8 @@ mca_allocator_bucket_t * mca_allocator_bucket_init( * region or NULL if there was an error * */ -void * mca_allocator_bucket_alloc( - mca_allocator_base_module_t * mem, - size_t size, - mca_mpool_base_registration_t** registration) +void * mca_allocator_bucket_alloc(mca_allocator_base_module_t * mem, + size_t size) { mca_allocator_bucket_t * mem_options = (mca_allocator_bucket_t *) mem; /* initialize for the later bit shifts */ @@ -101,36 +99,36 @@ void * mca_allocator_bucket_alloc( mem_options->buckets[bucket_num].free_chunk = chunk->u.next_free; chunk->u.bucket = bucket_num; /* go past the header */ - chunk += 1; + chunk += 1; /*release the lock */ OPAL_THREAD_UNLOCK(&(mem_options->buckets[bucket_num].lock)); return((void *) chunk); } /* figure out the size of bucket we need */ allocated_size = bucket_size; - /* we have to add in the size of the segment header into the + /* we have to add in the size of the segment header into the * amount we need to request */ allocated_size += sizeof(mca_allocator_bucket_segment_head_t); /* attempt to get the memory */ segment_header = (mca_allocator_bucket_segment_head_t *) - mem_options->get_mem_fn(mem_options->super.alc_mpool, &allocated_size, registration); + mem_options->get_mem_fn(mem_options->super.alc_context, &allocated_size); if(NULL == segment_header) { /* release the lock */ - OPAL_THREAD_UNLOCK(&(mem_options->buckets[bucket_num].lock)); + OPAL_THREAD_UNLOCK(&(mem_options->buckets[bucket_num].lock)); return(NULL); } /* if were allocated more memory then we actually need, then we will try to * break it up into multiple chunks in the current bucket */ allocated_size -= (sizeof(mca_allocator_bucket_segment_head_t) + bucket_size); - chunk = first_chunk = segment_header->first_chunk = - (mca_allocator_bucket_chunk_header_t *) (segment_header + 1); + chunk = first_chunk = segment_header->first_chunk = + (mca_allocator_bucket_chunk_header_t *) (segment_header + 1); /* add the segment into the segment list */ segment_header->next_segment = mem_options->buckets[bucket_num].segment_head; mem_options->buckets[bucket_num].segment_head = segment_header; if(allocated_size >= bucket_size) { - mem_options->buckets[bucket_num].free_chunk = + mem_options->buckets[bucket_num].free_chunk = (mca_allocator_bucket_chunk_header_t *) ((char *) chunk + bucket_size); - chunk->next_in_segment = (mca_allocator_bucket_chunk_header_t *) + chunk->next_in_segment = (mca_allocator_bucket_chunk_header_t *) ((char *)chunk + bucket_size); while(allocated_size >= bucket_size) { chunk = (mca_allocator_bucket_chunk_header_t *) ((char *) chunk + bucket_size); @@ -153,13 +151,10 @@ void * mca_allocator_bucket_alloc( /* * allocates an aligned region of memory */ -void * mca_allocator_bucket_alloc_align( - mca_allocator_base_module_t * mem, - size_t size, - size_t alignment, - mca_mpool_base_registration_t** registration) +void * mca_allocator_bucket_alloc_align(mca_allocator_base_module_t * mem, + size_t size, size_t alignment) { - mca_allocator_bucket_t * mem_options = (mca_allocator_bucket_t *) mem; + mca_allocator_bucket_t * mem_options = (mca_allocator_bucket_t *) mem; int bucket_num = 1; void * ptr; size_t aligned_max_size, bucket_size; @@ -168,16 +163,16 @@ void * mca_allocator_bucket_alloc_align( mca_allocator_bucket_chunk_header_t * first_chunk; mca_allocator_bucket_segment_head_t * segment_header; char * aligned_memory; - + /* since we do not have a way to get pre aligned memory, we need to request * a chunk then return an aligned spot in it. In the worst case we need * the requested size plus the alignment and the header size */ aligned_max_size = size + alignment + sizeof(mca_allocator_bucket_chunk_header_t) - + sizeof(mca_allocator_bucket_segment_head_t); + + sizeof(mca_allocator_bucket_segment_head_t); bucket_size = size + sizeof(mca_allocator_bucket_chunk_header_t); - allocated_size = aligned_max_size; - /* get some memory */ - ptr = mem_options->get_mem_fn(mem_options->super.alc_mpool, &allocated_size, registration); + allocated_size = aligned_max_size; + /* get some memory */ + ptr = mem_options->get_mem_fn(mem_options->super.alc_context, &allocated_size); if(NULL == ptr) { return(NULL); } @@ -188,7 +183,7 @@ void * mca_allocator_bucket_alloc_align( /* we want to align the memory right after the header, so we go past the header */ aligned_memory = (char *) (first_chunk + 1); - /* figure out how much the alignment is off by */ + /* figure out how much the alignment is off by */ alignment_off = ((size_t) aligned_memory) % alignment; aligned_memory += (alignment - alignment_off); /* we now have an aligned piece of memory. Now we have to put the chunk @@ -199,7 +194,7 @@ void * mca_allocator_bucket_alloc_align( bucket_num++; } bucket_size = 1; - bucket_size <<= MCA_ALLOCATOR_BUCKET_1_BITSHIFTS + bucket_num; + bucket_size <<= MCA_ALLOCATOR_BUCKET_1_BITSHIFTS + bucket_num; /* if were allocated more memory then we actually need, then we will try to * break it up into multiple chunks in the current bucket */ @@ -236,11 +231,8 @@ void * mca_allocator_bucket_alloc_align( /* * function to reallocate the segment of memory */ -void * mca_allocator_bucket_realloc( - mca_allocator_base_module_t * mem, - void * ptr, - size_t size, - mca_mpool_base_registration_t** registration) +void * mca_allocator_bucket_realloc(mca_allocator_base_module_t * mem, + void * ptr, size_t size) { mca_allocator_bucket_t * mem_options = (mca_allocator_bucket_t *) mem; /* initialize for later bit shifts */ @@ -261,7 +253,7 @@ void * mca_allocator_bucket_realloc( return(ptr); } /* we need a new space in memory, so let's get it */ - ret_ptr = mca_allocator_bucket_alloc((mca_allocator_base_module_t *) mem_options, size, registration); + ret_ptr = mca_allocator_bucket_alloc((mca_allocator_base_module_t *) mem_options, size); if(NULL == ret_ptr) { /* we were unable to get a larger area of memory */ return(NULL); @@ -270,7 +262,7 @@ void * mca_allocator_bucket_realloc( memcpy(ret_ptr, ptr, bucket_size); /* free the old area in memory */ mca_allocator_bucket_free((mca_allocator_base_module_t *) mem_options, ptr); - return(ret_ptr); + return(ret_ptr); } @@ -281,10 +273,10 @@ void * mca_allocator_bucket_realloc( void mca_allocator_bucket_free(mca_allocator_base_module_t * mem, void * ptr) { mca_allocator_bucket_t * mem_options = (mca_allocator_bucket_t *) mem; - mca_allocator_bucket_chunk_header_t * chunk = (mca_allocator_bucket_chunk_header_t *) ptr - 1; + mca_allocator_bucket_chunk_header_t * chunk = (mca_allocator_bucket_chunk_header_t *) ptr - 1; int bucket_num = chunk->u.bucket; OPAL_THREAD_LOCK(&(mem_options->buckets[bucket_num].lock)); - chunk->u.next_free = mem_options->buckets[bucket_num].free_chunk; + chunk->u.next_free = mem_options->buckets[bucket_num].free_chunk; mem_options->buckets[bucket_num].free_chunk = chunk; OPAL_THREAD_UNLOCK(&(mem_options->buckets[bucket_num].lock)); } @@ -321,7 +313,7 @@ int mca_allocator_bucket_cleanup(mca_allocator_base_module_t * mem) empty = true; segment = mem_options->buckets[i].segment_head; while( (true == empty) && (NULL != segment) ) { - first_chunk = segment->first_chunk; + first_chunk = segment->first_chunk; chunk = first_chunk; /* determine if the segment is free */ do { @@ -341,7 +333,7 @@ int mca_allocator_bucket_cleanup(mca_allocator_base_module_t * mem) next_segment = segment->next_segment; /* free the memory */ if(mem_options->free_mem_fn) - mem_options->free_mem_fn(mem->alc_mpool, segment); + mem_options->free_mem_fn(mem->alc_context, segment); segment = next_segment; } mem_options->buckets[i].free_chunk = NULL; @@ -349,7 +341,7 @@ int mca_allocator_bucket_cleanup(mca_allocator_base_module_t * mem) } else { /* traverse the list of segment headers until we hit NULL */ while(NULL != *segment_header) { - first_chunk = (*segment_header)->first_chunk; + first_chunk = (*segment_header)->first_chunk; chunk = first_chunk; empty = true; /* determine if the segment is free */ @@ -370,7 +362,7 @@ int mca_allocator_bucket_cleanup(mca_allocator_base_module_t * mem) while(next_chunk->u.next_free != chunk) { next_chunk = next_chunk->u.next_free; } - next_chunk->u.next_free = chunk->u.next_free; + next_chunk->u.next_free = chunk->u.next_free; } } while((chunk = chunk->next_in_segment) != first_chunk); /* set the segment list to point to the next segment */ @@ -378,7 +370,7 @@ int mca_allocator_bucket_cleanup(mca_allocator_base_module_t * mem) *segment_header = segment->next_segment; /* free the memory */ if(mem_options->free_mem_fn) - mem_options->free_mem_fn(mem->alc_mpool, segment); + mem_options->free_mem_fn(mem->alc_context, segment); } else { /* go to next segment */ segment_header = &((*segment_header)->next_segment); diff --git a/opal/mca/allocator/bucket/allocator_bucket_alloc.h b/opal/mca/allocator/bucket/allocator_bucket_alloc.h index 0d07e846596..fe0b66e881e 100644 --- a/opal/mca/allocator/bucket/allocator_bucket_alloc.h +++ b/opal/mca/allocator/bucket/allocator_bucket_alloc.h @@ -1,18 +1,21 @@ -/** +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ +/* * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. + * Copyright (c) 2015 Los Alamos National Security, LLC. All rights + * reseved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -20,8 +23,8 @@ * A generic memory bucket allocator. **/ -#ifndef ALLOCATOR_BUCKET_ALLOC_H -#define ALLOCATOR_BUCKET_ALLOC_H +#ifndef ALLOCATOR_BUCKET_ALLOC_H +#define ALLOCATOR_BUCKET_ALLOC_H #include "opal_config.h" #include @@ -35,7 +38,7 @@ BEGIN_C_DECLS * Structure for the header of each memory chunk */ struct mca_allocator_bucket_chunk_header_t { - struct mca_allocator_bucket_chunk_header_t * next_in_segment; + struct mca_allocator_bucket_chunk_header_t * next_in_segment; /**< The next chunk in the memory segment */ /** * Union which holds either a pointer to the next free chunk @@ -48,42 +51,42 @@ struct mca_allocator_bucket_chunk_header_t { * chunks a list of free elements. */ union u { - struct mca_allocator_bucket_chunk_header_t * next_free; + struct mca_allocator_bucket_chunk_header_t * next_free; /**< if the chunk is free this will point to the next free chunk in the bucket */ int bucket; /**< the bucket number it belongs to */ } u; /**< the union */ }; /** * Typedef so we don't have to use struct - */ + */ typedef struct mca_allocator_bucket_chunk_header_t mca_allocator_bucket_chunk_header_t; /** - * Structure that heads each segment + * Structure that heads each segment */ struct mca_allocator_bucket_segment_head_t { struct mca_allocator_bucket_chunk_header_t * first_chunk; /**< the first chunk of the header */ - struct mca_allocator_bucket_segment_head_t * next_segment; /**< the next segment in the + struct mca_allocator_bucket_segment_head_t * next_segment; /**< the next segment in the bucket */ }; /** * Typedef so we don't have to use struct */ typedef struct mca_allocator_bucket_segment_head_t mca_allocator_bucket_segment_head_t; - + /** * Structure for each bucket */ struct mca_allocator_bucket_bucket_t { mca_allocator_bucket_chunk_header_t * free_chunk; /**< the first free chunk of memory */ - opal_mutex_t lock; /**< the lock on the bucket */ + opal_mutex_t lock; /**< the lock on the bucket */ mca_allocator_bucket_segment_head_t * segment_head; /**< the list of segment headers */ }; /** * Typedef so we don't have to use struct */ typedef struct mca_allocator_bucket_bucket_t mca_allocator_bucket_bucket_t; - + /** * Structure that holds the necessary information for each area of memory */ @@ -91,7 +94,7 @@ struct mca_allocator_bucket_t { mca_allocator_base_module_t super; /**< makes this a child of class mca_allocator_t */ mca_allocator_bucket_bucket_t * buckets; /**< the array of buckets */ int num_buckets; /**< the number of buckets */ - mca_allocator_base_component_segment_alloc_fn_t get_mem_fn; + mca_allocator_base_component_segment_alloc_fn_t get_mem_fn; /**< pointer to the function to get more memory */ mca_allocator_base_component_segment_free_fn_t free_mem_fn; /**< pointer to the function to free memory */ @@ -130,9 +133,8 @@ typedef struct mca_allocator_bucket_t mca_allocator_bucket_t; * @retval NULL if the allocation was unsuccessful */ void * mca_allocator_bucket_alloc( - mca_allocator_base_module_t * mem, - size_t size, - mca_mpool_base_registration_t** registration); + mca_allocator_base_module_t * mem, + size_t size); /** * Accepts a request for memory in a specific region defined by the @@ -143,7 +145,7 @@ typedef struct mca_allocator_bucket_t mca_allocator_bucket_t; * memory. * @param size The size of the requested area of memory * @param alignment The requested alignment of the new area of memory. This - * MUST be a power of 2. + * MUST be a power of 2. * * @retval Pointer to the area of memory if the allocation was successful * @retval NULL if the allocation was unsuccessful @@ -151,9 +153,8 @@ typedef struct mca_allocator_bucket_t mca_allocator_bucket_t; */ void * mca_allocator_bucket_alloc_align( mca_allocator_base_module_t * mem, - size_t size, - size_t alignment, - mca_mpool_base_registration_t** registration); + size_t size, + size_t alignment); /** * Attempts to resize the passed region of memory into a larger or a smaller @@ -170,10 +171,9 @@ typedef struct mca_allocator_bucket_t mca_allocator_bucket_t; * */ void * mca_allocator_bucket_realloc( - mca_allocator_base_module_t * mem, - void * ptr, - size_t size, - mca_mpool_base_registration_t** registration); + mca_allocator_base_module_t * mem, + void * ptr, + size_t size); /** * Frees the passed region of memory diff --git a/opal/mca/backtrace/Makefile.am b/opal/mca/backtrace/Makefile.am index 93f6f6fb968..110acdb7de7 100644 --- a/opal/mca/backtrace/Makefile.am +++ b/opal/mca/backtrace/Makefile.am @@ -5,15 +5,15 @@ # Copyright (c) 2004-2005 The University of Tennessee and The University # of Tennessee Research Foundation. All rights # reserved. -# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, # University of Stuttgart. All rights reserved. # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. # Copyright (c) 2010 Cisco Systems, Inc. All rights reserved. # $COPYRIGHT$ -# +# # Additional copyrights may follow -# +# # $HEADER$ # diff --git a/opal/mca/backtrace/backtrace.h b/opal/mca/backtrace/backtrace.h index 6b223b28bde..9ca5658cdea 100644 --- a/opal/mca/backtrace/backtrace.h +++ b/opal/mca/backtrace/backtrace.h @@ -6,16 +6,17 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2006 The Regents of the University of California. * All rights reserved. * Copyright (c) 2015 Los Alamos National Security, LLC. All rights * reserved. + * Copyright (c) 2017 IBM Corporation. All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -26,6 +27,7 @@ #include "opal/mca/mca.h" #include "opal/mca/base/base.h" +#include "opal/util/stacktrace.h" BEGIN_C_DECLS @@ -39,6 +41,8 @@ BEGIN_C_DECLS /* * Print back trace to FILE file with a prefix for each line. * First strip lines are not printed. + * If 'file' is NULL then the component should try to use the file descriptor + * saved in opal_stacktrace_output_fileno * * \note some attempts made to be signal safe. */ diff --git a/opal/mca/backtrace/base/Makefile.am b/opal/mca/backtrace/base/Makefile.am index ed88ad439ea..a4acf9a658a 100644 --- a/opal/mca/backtrace/base/Makefile.am +++ b/opal/mca/backtrace/base/Makefile.am @@ -5,14 +5,14 @@ # Copyright (c) 2004-2005 The University of Tennessee and The University # of Tennessee Research Foundation. All rights # reserved. -# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, # University of Stuttgart. All rights reserved. # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. # $COPYRIGHT$ -# +# # Additional copyrights may follow -# +# # $HEADER$ # diff --git a/opal/mca/backtrace/base/backtrace_component.c b/opal/mca/backtrace/base/backtrace_component.c index 8fb321cfe17..995fa2328c6 100644 --- a/opal/mca/backtrace/base/backtrace_component.c +++ b/opal/mca/backtrace/base/backtrace_component.c @@ -5,16 +5,16 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2013 Los Alamos National Security, LLC. All rights * reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ diff --git a/opal/mca/backtrace/base/base.h b/opal/mca/backtrace/base/base.h index a1e7fbaa72d..0015b781079 100644 --- a/opal/mca/backtrace/base/base.h +++ b/opal/mca/backtrace/base/base.h @@ -5,14 +5,14 @@ * Copyright (c) 2004-2006 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2006 The Regents of the University of California. * All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ * */ diff --git a/opal/mca/backtrace/configure.m4 b/opal/mca/backtrace/configure.m4 index 6d94b88403f..0ee4261f3e8 100644 --- a/opal/mca/backtrace/configure.m4 +++ b/opal/mca/backtrace/configure.m4 @@ -6,14 +6,14 @@ dnl Corporation. All rights reserved. dnl Copyright (c) 2004-2005 The University of Tennessee and The University dnl of Tennessee Research Foundation. All rights dnl reserved. -dnl Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +dnl Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, dnl University of Stuttgart. All rights reserved. dnl Copyright (c) 2004-2006 The Regents of the University of California. dnl All rights reserved. dnl $COPYRIGHT$ -dnl +dnl dnl Additional copyrights may follow -dnl +dnl dnl $HEADER$ dnl diff --git a/opal/mca/backtrace/execinfo/Makefile.am b/opal/mca/backtrace/execinfo/Makefile.am index 0fae37c363f..91338fdbe08 100644 --- a/opal/mca/backtrace/execinfo/Makefile.am +++ b/opal/mca/backtrace/execinfo/Makefile.am @@ -5,14 +5,14 @@ # Copyright (c) 2004-2005 The University of Tennessee and The University # of Tennessee Research Foundation. All rights # reserved. -# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, # University of Stuttgart. All rights reserved. # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. # $COPYRIGHT$ -# +# # Additional copyrights may follow -# +# # $HEADER$ # diff --git a/opal/mca/backtrace/execinfo/backtrace_execinfo.c b/opal/mca/backtrace/execinfo/backtrace_execinfo.c index 7666406cb50..0f17c514c1d 100644 --- a/opal/mca/backtrace/execinfo/backtrace_execinfo.c +++ b/opal/mca/backtrace/execinfo/backtrace_execinfo.c @@ -5,15 +5,16 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2006 The Regents of the University of California. * All rights reserved. * Copyright (c) 2011 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2017 IBM Corporation. All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -34,12 +35,16 @@ int opal_backtrace_print(FILE *file, char *prefix, int strip) { - int i, fd, len; + int i, len; int trace_size; void * trace[32]; char buf[6]; + int fd = opal_stacktrace_output_fileno; + + if( NULL != file ) { + fd = fileno(file); + } - fd = fileno (file); if (-1 == fd) { return OPAL_ERR_BAD_PARAM; } @@ -60,7 +65,7 @@ opal_backtrace_print(FILE *file, char *prefix, int strip) int -opal_backtrace_buffer(char ***message_out, int *len_out) +opal_backtrace_buffer(char ***message_out, int *len_out) { int trace_size; void * trace[32]; diff --git a/opal/mca/backtrace/execinfo/backtrace_execinfo_component.c b/opal/mca/backtrace/execinfo/backtrace_execinfo_component.c index 1bda3dd60b8..cb44d2ce692 100644 --- a/opal/mca/backtrace/execinfo/backtrace_execinfo_component.c +++ b/opal/mca/backtrace/execinfo/backtrace_execinfo_component.c @@ -6,16 +6,16 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2006 The Regents of the University of California. * All rights reserved. * Copyright (c) 2015 Los Alamos National Security, LLC. All rights * reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ diff --git a/opal/mca/backtrace/execinfo/configure.m4 b/opal/mca/backtrace/execinfo/configure.m4 index 4d173931fd0..5ad80d2e43f 100644 --- a/opal/mca/backtrace/execinfo/configure.m4 +++ b/opal/mca/backtrace/execinfo/configure.m4 @@ -6,15 +6,15 @@ # Copyright (c) 2004-2005 The University of Tennessee and The University # of Tennessee Research Foundation. All rights # reserved. -# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, # University of Stuttgart. All rights reserved. # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. # Copyright (c) 2010-2014 Cisco Systems, Inc. All rights reserved. # $COPYRIGHT$ -# +# # Additional copyrights may follow -# +# # $HEADER$ # AC_DEFUN([MCA_opal_backtrace_execinfo_PRIORITY], [30]) @@ -25,7 +25,7 @@ AC_DEFUN([MCA_opal_backtrace_execinfo_COMPILE_MODE], [ AC_MSG_RESULT([$$4]) ]) -# MCA_backtrace_execinfo_CONFIG(action-if-can-compile, +# MCA_backtrace_execinfo_CONFIG(action-if-can-compile, # [action-if-cant-compile]) # ------------------------------------------------ AC_DEFUN([MCA_opal_backtrace_execinfo_CONFIG],[ @@ -37,6 +37,6 @@ AC_DEFUN([MCA_opal_backtrace_execinfo_CONFIG],[ [backtrace_execinfo_happy="yes"], [backtrace_execinfo_happy="no"]) - AS_IF([test "$backtrace_execinfo_happy" = "yes"], + AS_IF([test "$backtrace_execinfo_happy" = "yes"], [$1], [$2]) ]) diff --git a/opal/mca/backtrace/none/Makefile.am b/opal/mca/backtrace/none/Makefile.am index a8ca9f4a9a1..e6748bbe92d 100644 --- a/opal/mca/backtrace/none/Makefile.am +++ b/opal/mca/backtrace/none/Makefile.am @@ -5,14 +5,14 @@ # Copyright (c) 2004-2005 The University of Tennessee and The University # of Tennessee Research Foundation. All rights # reserved. -# Copyright (c) 2004-2009 High Performance Computing Center Stuttgart, +# Copyright (c) 2004-2009 High Performance Computing Center Stuttgart, # University of Stuttgart. All rights reserved. # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. # $COPYRIGHT$ -# +# # Additional copyrights may follow -# +# # $HEADER$ # diff --git a/opal/mca/backtrace/none/backtrace_none.c b/opal/mca/backtrace/none/backtrace_none.c index abfa740fea4..ba343f22dbf 100644 --- a/opal/mca/backtrace/none/backtrace_none.c +++ b/opal/mca/backtrace/none/backtrace_none.c @@ -5,15 +5,15 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2006 The Regents of the University of California. * All rights reserved. * Copyright (c) 2006 Sun Microsystems, Inc. All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ #include "opal_config.h" diff --git a/opal/mca/backtrace/none/backtrace_none_component.c b/opal/mca/backtrace/none/backtrace_none_component.c index 735d7b37d2c..9f820ea695a 100644 --- a/opal/mca/backtrace/none/backtrace_none_component.c +++ b/opal/mca/backtrace/none/backtrace_none_component.c @@ -6,16 +6,16 @@ * Copyright (c) 2004-2006 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2006 The Regents of the University of California. * All rights reserved. * Copyright (c) 2015 Los Alamos National Security, LLC. All rights * reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ diff --git a/opal/mca/backtrace/none/configure.m4 b/opal/mca/backtrace/none/configure.m4 index 5191f257753..6882fd0c015 100644 --- a/opal/mca/backtrace/none/configure.m4 +++ b/opal/mca/backtrace/none/configure.m4 @@ -6,15 +6,15 @@ # Copyright (c) 2004-2005 The University of Tennessee and The University # of Tennessee Research Foundation. All rights # reserved. -# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, # University of Stuttgart. All rights reserved. # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. # Copyright (c) 2010 Cisco Systems, Inc. All rights reserved. # $COPYRIGHT$ -# +# # Additional copyrights may follow -# +# # $HEADER$ # AC_DEFUN([MCA_opal_backtrace_none_PRIORITY], [0]) @@ -26,7 +26,7 @@ AC_DEFUN([MCA_opal_backtrace_none_COMPILE_MODE], [ ]) -# MCA_backtrace_none_CONFIG(action-if-can-compile, +# MCA_backtrace_none_CONFIG(action-if-can-compile, # [action-if-cant-compile]) # ------------------------------------------------ AC_DEFUN([MCA_opal_backtrace_none_CONFIG],[ diff --git a/opal/mca/backtrace/printstack/Makefile.am b/opal/mca/backtrace/printstack/Makefile.am index 0db500da440..a32e038e427 100644 --- a/opal/mca/backtrace/printstack/Makefile.am +++ b/opal/mca/backtrace/printstack/Makefile.am @@ -5,14 +5,14 @@ # Copyright (c) 2004-2005 The University of Tennessee and The University # of Tennessee Research Foundation. All rights # reserved. -# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, # University of Stuttgart. All rights reserved. # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. # $COPYRIGHT$ -# +# # Additional copyrights may follow -# +# # $HEADER$ # diff --git a/opal/mca/backtrace/printstack/backtrace_printstack.c b/opal/mca/backtrace/printstack/backtrace_printstack.c index 1c73b4d6765..214cacfb14b 100644 --- a/opal/mca/backtrace/printstack/backtrace_printstack.c +++ b/opal/mca/backtrace/printstack/backtrace_printstack.c @@ -5,15 +5,16 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2006 The Regents of the University of California. * All rights reserved. * Copyright (c) 2006 Sun Microsystems, Inc. All rights reserved. + * Copyright (c) 2017 IBM Corporation. All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ #include "opal_config.h" @@ -27,7 +28,13 @@ int opal_backtrace_print(FILE *file, char *prefix, int strip) { - printstack(fileno(file)); + int fd = opal_stacktrace_output_fileno; + + if( NULL != file ) { + fd = fileno(file); + } + + printstack(fd); return OPAL_SUCCESS; } diff --git a/opal/mca/backtrace/printstack/backtrace_printstack_component.c b/opal/mca/backtrace/printstack/backtrace_printstack_component.c index 77f179accb2..3a37269969d 100644 --- a/opal/mca/backtrace/printstack/backtrace_printstack_component.c +++ b/opal/mca/backtrace/printstack/backtrace_printstack_component.c @@ -6,16 +6,16 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2006 The Regents of the University of California. * All rights reserved. * Copyright (c) 2015 Los Alamos National Security, LLC. All rights * reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ diff --git a/opal/mca/backtrace/printstack/configure.m4 b/opal/mca/backtrace/printstack/configure.m4 index f44ccf65612..4a158e156b7 100644 --- a/opal/mca/backtrace/printstack/configure.m4 +++ b/opal/mca/backtrace/printstack/configure.m4 @@ -6,15 +6,15 @@ # Copyright (c) 2004-2005 The University of Tennessee and The University # of Tennessee Research Foundation. All rights # reserved. -# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, # University of Stuttgart. All rights reserved. # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. # Copyright (c) 2010 Cisco Systems, Inc. All rights reserved. # $COPYRIGHT$ -# +# # Additional copyrights may follow -# +# # $HEADER$ # AC_DEFUN([MCA_opal_backtrace_printstack_PRIORITY], [30]) @@ -26,7 +26,7 @@ AC_DEFUN([MCA_opal_backtrace_printstack_COMPILE_MODE], [ ]) -# MCA_backtrace_printstack_CONFIG(action-if-can-compile, +# MCA_backtrace_printstack_CONFIG(action-if-can-compile, # [action-if-cant-compile]) # ------------------------------------------------ AC_DEFUN([MCA_opal_backtrace_printstack_CONFIG],[ @@ -38,6 +38,6 @@ AC_DEFUN([MCA_opal_backtrace_printstack_CONFIG],[ [backtrace_printstack_happy="yes"], [backtrace_printstack_happy="no"]) - AS_IF([test "$backtrace_printstack_happy" = "yes"], + AS_IF([test "$backtrace_printstack_happy" = "yes"], [$1], [$2]) ]) diff --git a/opal/mca/base/Makefile.am b/opal/mca/base/Makefile.am index 08bbcaca60b..bd6145d354a 100644 --- a/opal/mca/base/Makefile.am +++ b/opal/mca/base/Makefile.am @@ -5,15 +5,15 @@ # Copyright (c) 2004-2005 The University of Tennessee and The University # of Tennessee Research Foundation. All rights # reserved. -# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, # University of Stuttgart. All rights reserved. # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. # Copyright (c) 2010 Cisco Systems, Inc. All rights reserved. # $COPYRIGHT$ -# +# # Additional copyrights may follow -# +# # $HEADER$ # diff --git a/opal/mca/base/base.h b/opal/mca/base/base.h index 936f72684b9..1fdcbd899d7 100644 --- a/opal/mca/base/base.h +++ b/opal/mca/base/base.h @@ -6,17 +6,19 @@ * Copyright (c) 2004-2007 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2009 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2013 Los Alamos National Security, LLC. All rights + * Copyright (c) 2013-2015 Los Alamos National Security, LLC. All rights * reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -35,6 +37,7 @@ #include "opal/mca/base/mca_base_var.h" #include "opal/mca/base/mca_base_framework.h" #include "opal/util/cmd_line.h" +#include "opal/util/output.h" BEGIN_C_DECLS @@ -55,7 +58,7 @@ struct mca_base_component_priority_list_item_t { mca_base_component_list_item_t super; int cpli_priority; }; -typedef struct mca_base_component_priority_list_item_t +typedef struct mca_base_component_priority_list_item_t mca_base_component_priority_list_item_t; OPAL_DECLSPEC OBJ_CLASS_DECLARATION(mca_base_component_priority_list_item_t); @@ -69,6 +72,30 @@ OPAL_DECLSPEC extern bool mca_base_component_disable_dlopen; OPAL_DECLSPEC extern char *mca_base_system_default_path; OPAL_DECLSPEC extern char *mca_base_user_default_path; +/* + * Standard verbosity levels + */ +enum { + /** total silence */ + MCA_BASE_VERBOSE_NONE = -1, + /** only errors are printed */ + MCA_BASE_VERBOSE_ERROR = 0, + /** emit messages about component selection, open, and unloading */ + MCA_BASE_VERBOSE_COMPONENT = 10, + /** also emit warnings */ + MCA_BASE_VERBOSE_WARN = 20, + /** also emit general, user-relevant information, such as rationale as to why certain choices + * or code paths were taken, information gleaned from probing the local system, etc. */ + MCA_BASE_VERBOSE_INFO = 40, + /** also emit relevant tracing information (e.g., which functions were invoked / + * call stack entry/exit info) */ + MCA_BASE_VERBOSE_TRACE = 60, + /** also emit Open MPI-developer-level (i.e,. highly detailed) information */ + MCA_BASE_VERBOSE_DEBUG = 80, + /** also output anything else that might be useful */ + MCA_BASE_VERBOSE_MAX = 100, +}; + /* * Public functions */ @@ -78,7 +105,7 @@ OPAL_DECLSPEC extern char *mca_base_user_default_path; * * @return OPAL_SUCCESS Upon success * @return OPAL_ERROR Upon failure - * + * * This function starts up the entire MCA. It initializes a bunch * of built-in MCA parameters, and initialized the MCA component * repository. @@ -96,7 +123,7 @@ OPAL_DECLSPEC int mca_base_open(void); * @return OPAL_ERROR Upon failure * * This function closes down the entire MCA. It clears all MCA - * parameters and closes down the MCA component respository. + * parameters and closes down the MCA component respository. * * It must be the last MCA function invoked. It is normally invoked * during the finalize stage. @@ -110,7 +137,8 @@ OPAL_DECLSPEC int mca_base_close(void); OPAL_DECLSPEC int mca_base_select(const char *type_name, int output_id, opal_list_t *components_available, mca_base_module_t **best_module, - mca_base_component_t **best_component); + mca_base_component_t **best_component, + int *priority_out); /** * A function for component query functions to discover if they have @@ -215,7 +243,7 @@ OPAL_DECLSPEC void mca_base_component_close (const mca_base_component_t *compone */ void mca_base_component_unload (const mca_base_component_t *component, int output_id); -OPAL_DECLSPEC int mca_base_components_close(int output_id, opal_list_t *components_available, +OPAL_DECLSPEC int mca_base_components_close(int output_id, opal_list_t *components_available, const mca_base_component_t *skip); OPAL_DECLSPEC int mca_base_framework_components_close (struct mca_base_framework_t *framework, diff --git a/opal/mca/base/help-mca-base.txt b/opal/mca/base/help-mca-base.txt index 746904486fb..63bc471c1bd 100644 --- a/opal/mca/base/help-mca-base.txt +++ b/opal/mca/base/help-mca-base.txt @@ -6,15 +6,15 @@ # Copyright (c) 2004-2005 The University of Tennessee and The University # of Tennessee Research Foundation. All rights # reserved. -# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, # University of Stuttgart. All rights reserved. # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. # Copyright (c) 2008-2014 Cisco Systems, Inc. All rights reserved. # $COPYRIGHT$ -# +# # Additional copyrights may follow -# +# # $HEADER$ # # This is the US/English help file for Open MPI MCA error messages. diff --git a/opal/mca/base/help-mca-var.txt b/opal/mca/base/help-mca-var.txt index 8ebacee954f..316342bb005 100644 --- a/opal/mca/base/help-mca-var.txt +++ b/opal/mca/base/help-mca-var.txt @@ -6,7 +6,7 @@ # Copyright (c) 2004-2005 The University of Tennessee and The University # of Tennessee Research Foundation. All rights # reserved. -# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, # University of Stuttgart. All rights reserved. # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. @@ -14,9 +14,9 @@ # Copyright (c) 2013 Los Alamos National Security, LLC. All rights # reserved. # $COPYRIGHT$ -# +# # Additional copyrights may follow -# +# # $HEADER$ # # This is the US/English help file for Open MPI MCA error messages. diff --git a/opal/mca/base/mca_base_close.c b/opal/mca/base/mca_base_close.c index 11486e722ee..352ed01bec1 100644 --- a/opal/mca/base/mca_base_close.c +++ b/opal/mca/base/mca_base_close.c @@ -6,7 +6,7 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. @@ -14,9 +14,9 @@ * Copyright (c) 2015 Los Alamos National Security, LLC. All rights * reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ diff --git a/opal/mca/base/mca_base_cmd_line.c b/opal/mca/base/mca_base_cmd_line.c index 1b272fc7270..d8319167011 100644 --- a/opal/mca/base/mca_base_cmd_line.c +++ b/opal/mca/base/mca_base_cmd_line.c @@ -5,16 +5,16 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2014 Intel, Inc. All rights reserved. * Copyright (c) 2014-2015 Cisco Systems, Inc. All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -31,7 +31,7 @@ #include "opal/constants.h" -/* +/* * Private variables */ @@ -64,7 +64,7 @@ int mca_base_cmd_line_setup(opal_cmd_line_t *cmd) } { - opal_cmd_line_init_t entry = + opal_cmd_line_init_t entry = {"mca_base_param_file_prefix", '\0', "am", NULL, 1, NULL, OPAL_CMD_LINE_TYPE_STRING, "Aggregate MCA parameter set file list" @@ -113,7 +113,7 @@ int mca_base_cmd_line_process_args(opal_cmd_line_t *cmd, num_insts = opal_cmd_line_get_ninsts(cmd, OPAL_MCA_CMD_LINE_ID); params = values = NULL; for (i = 0; i < num_insts; ++i) { - if (OPAL_SUCCESS != (rc = process_arg(opal_cmd_line_get_param(cmd, OPAL_MCA_CMD_LINE_ID, i, 0), + if (OPAL_SUCCESS != (rc = process_arg(opal_cmd_line_get_param(cmd, OPAL_MCA_CMD_LINE_ID, i, 0), opal_cmd_line_get_param(cmd, OPAL_MCA_CMD_LINE_ID, i, 1), ¶ms, &values))) { return rc; @@ -130,7 +130,7 @@ int mca_base_cmd_line_process_args(opal_cmd_line_t *cmd, num_insts = opal_cmd_line_get_ninsts(cmd, "g"OPAL_MCA_CMD_LINE_ID); params = values = NULL; for (i = 0; i < num_insts; ++i) { - if (OPAL_SUCCESS != (rc = process_arg(opal_cmd_line_get_param(cmd, "g"OPAL_MCA_CMD_LINE_ID, i, 0), + if (OPAL_SUCCESS != (rc = process_arg(opal_cmd_line_get_param(cmd, "g"OPAL_MCA_CMD_LINE_ID, i, 0), opal_cmd_line_get_param(cmd, "g"OPAL_MCA_CMD_LINE_ID, i, 1), ¶ms, &values))) { return rc; @@ -190,7 +190,7 @@ static int process_arg(const char *param, const char *value, /* If we didn't already have an value for the same param, save this one away */ - + opal_argv_append_nosize(params, param); opal_argv_append_nosize(values, p1); free(p1); diff --git a/opal/mca/base/mca_base_component_compare.c b/opal/mca/base/mca_base_component_compare.c index 5e89028fdc5..aeec7bc7158 100644 --- a/opal/mca/base/mca_base_component_compare.c +++ b/opal/mca/base/mca_base_component_compare.c @@ -5,14 +5,14 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -36,7 +36,7 @@ * may help the gentle reader to consider this an inverse comparison. * :-) */ -int +int mca_base_component_compare_priority(mca_base_component_priority_list_item_t *a, mca_base_component_priority_list_item_t *b) { @@ -53,7 +53,7 @@ mca_base_component_compare_priority(mca_base_component_priority_list_item_t *a, } -int mca_base_component_compare(const mca_base_component_t* aa, +int mca_base_component_compare(const mca_base_component_t* aa, const mca_base_component_t* bb) { int val; @@ -72,22 +72,22 @@ int mca_base_component_compare(const mca_base_component_t* aa, /* The names were equal, so compare the versions */ - if (aa->mca_component_major_version > + if (aa->mca_component_major_version > bb->mca_component_major_version) { return -1; - } else if (aa->mca_component_major_version < + } else if (aa->mca_component_major_version < bb->mca_component_major_version) { return 1; - } else if (aa->mca_component_minor_version > + } else if (aa->mca_component_minor_version > bb->mca_component_minor_version) { return -1; - } else if (aa->mca_component_minor_version < + } else if (aa->mca_component_minor_version < bb->mca_component_minor_version) { return 1; - } else if (aa->mca_component_release_version > + } else if (aa->mca_component_release_version > bb->mca_component_release_version) { return -1; - } else if (aa->mca_component_release_version < + } else if (aa->mca_component_release_version < bb->mca_component_release_version) { return 1; } @@ -97,11 +97,11 @@ int mca_base_component_compare(const mca_base_component_t* aa, /** - * compare but exclude the release version - declare compatible + * compare but exclude the release version - declare compatible * if the major/minor version are the same. */ int mca_base_component_compatible( - const mca_base_component_t* aa, + const mca_base_component_t* aa, const mca_base_component_t* bb) { int val; @@ -120,16 +120,16 @@ int mca_base_component_compatible( /* The names were equal, so compare the versions */ - if (aa->mca_component_major_version > + if (aa->mca_component_major_version > bb->mca_component_major_version) { return -1; - } else if (aa->mca_component_major_version < + } else if (aa->mca_component_major_version < bb->mca_component_major_version) { return 1; - } else if (aa->mca_component_minor_version > + } else if (aa->mca_component_minor_version > bb->mca_component_minor_version) { return -1; - } else if (aa->mca_component_minor_version < + } else if (aa->mca_component_minor_version < bb->mca_component_minor_version) { return 1; } @@ -142,7 +142,7 @@ int mca_base_component_compatible( */ char * mca_base_component_to_string(const mca_base_component_t *a) { char * str = NULL; - if(0 > asprintf(&str, "%s.%s.%d.%d", a->mca_type_name, + if(0 > asprintf(&str, "%s.%s.%d.%d", a->mca_type_name, a->mca_component_name, a->mca_component_major_version, a->mca_component_minor_version)) { return NULL; diff --git a/opal/mca/base/mca_base_component_find.c b/opal/mca/base/mca_base_component_find.c index 67e6d14853c..899673dfaf9 100644 --- a/opal/mca/base/mca_base_component_find.c +++ b/opal/mca/base/mca_base_component_find.c @@ -6,7 +6,7 @@ * Copyright (c) 2004-2007 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. @@ -14,10 +14,12 @@ * Copyright (c) 2008 Sun Microsystems, Inc. All rights reserved. * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. + * Copyright (c) 2014-2015 Los Alamos National Security, LLC. All rights + * reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -133,8 +135,8 @@ int mca_base_component_find (const char *directory, mca_base_framework_t *framew find_dyn_components(directory, framework, (const char**)requested_component_names, include_mode); } else { - opal_output_verbose(40, 0, - "mca: base: component_find: dso loading for %s MCA components disabled", + opal_output_verbose (MCA_BASE_VERBOSE_INFO, 0, + "mca: base: component_find: dso loading for %s MCA components disabled", framework->framework_name); } #endif @@ -193,11 +195,11 @@ int mca_base_components_filter (mca_base_framework_t *framework, uint32_t filter if (!can_use || (filter_flags & dummy->data.param_field) != filter_flags) { if (can_use && (filter_flags & MCA_BASE_METADATA_PARAM_CHECKPOINT) && !(MCA_BASE_METADATA_PARAM_CHECKPOINT & dummy->data.param_field)) { - opal_output_verbose(10, output_id, - "mca: base: components_filter: " - "(%s) Component %s is *NOT* Checkpointable - Disabled", - component->reserved, - component->mca_component_name); + opal_output_verbose (MCA_BASE_VERBOSE_COMPONENT, output_id, + "mca: base: components_filter: " + "(%s) Component %s is *NOT* Checkpointable - Disabled", + component->reserved, + component->mca_component_name); } opal_list_remove_item (components, &cli->super); @@ -206,11 +208,11 @@ int mca_base_components_filter (mca_base_framework_t *framework, uint32_t filter OBJ_RELEASE(cli); } else if (filter_flags & MCA_BASE_METADATA_PARAM_CHECKPOINT) { - opal_output_verbose(10, output_id, - "mca: base: components_filter: " - "(%s) Component %s is Checkpointable", - component->reserved, - component->mca_component_name); + opal_output_verbose (MCA_BASE_VERBOSE_COMPONENT, output_id, + "mca: base: components_filter: " + "(%s) Component %s is Checkpointable", + component->reserved, + component->mca_component_name); } } @@ -272,7 +274,7 @@ static bool use_component(const bool include_mode, { bool found = false; const char **req_comp_name = requested_component_names; - + /* * If no selection is specified then we use all components * we can find. @@ -320,7 +322,7 @@ static int component_find_check (mca_base_framework_t *framework, char **request bool found = false; OPAL_LIST_FOREACH(cli, components, mca_base_component_list_item_t) { - if (0 == strcmp(requested_component_names[i], + if (0 == strcmp(requested_component_names[i], cli->cli_component->mca_component_name)) { found = true; break; @@ -328,9 +330,9 @@ static int component_find_check (mca_base_framework_t *framework, char **request } if (!found) { - char h[MAXHOSTNAMELEN]; + char h[OPAL_MAXHOSTNAMELEN]; gethostname(h, sizeof(h)); - opal_show_help("help-mca-base.txt", + opal_show_help("help-mca-base.txt", "find-available:not-valid", true, h, framework->framework_name, requested_component_names[i]); return OPAL_ERR_NOT_FOUND; @@ -364,7 +366,7 @@ int mca_base_component_parse_requested (const char *requested, bool *include_mod /* Double check to ensure that the user did not specify the negate character anywhere else in the value. */ if (NULL != strstr (requested, negate)) { - opal_show_help("help-mca-base.txt", + opal_show_help("help-mca-base.txt", "framework-param:too-many-negates", true, requested_orig); return OPAL_ERROR; diff --git a/opal/mca/base/mca_base_component_repository.c b/opal/mca/base/mca_base_component_repository.c index fc618ce0be8..9761bd995fc 100644 --- a/opal/mca/base/mca_base_component_repository.c +++ b/opal/mca/base/mca_base_component_repository.c @@ -6,7 +6,7 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. @@ -16,9 +16,9 @@ * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -30,6 +30,9 @@ #include #include #include +#ifdef HAVE_UNISTD_H +#include +#endif #include "opal/class/opal_list.h" #include "opal/mca/mca.h" @@ -314,10 +317,10 @@ int mca_base_component_repository_open (mca_base_framework_t *framework, char *struct_name = NULL; int vl, ret; - opal_output_verbose(40, 0, "mca_base_component_repository_open: examining dynamic %s MCA component \"%s\" at path %s", - ri->ri_type, ri->ri_name, ri->ri_path); + opal_output_verbose(MCA_BASE_VERBOSE_INFO, 0, "mca_base_component_repository_open: examining dynamic " + "%s MCA component \"%s\" at path %s", ri->ri_type, ri->ri_name, ri->ri_path); - vl = mca_base_component_show_load_errors ? 0 : 40; + vl = mca_base_component_show_load_errors ? MCA_BASE_VERBOSE_ERROR : MCA_BASE_VERBOSE_INFO; /* Ensure that this component is not already loaded (should only happen if it was statically loaded). It's an error if it's already @@ -327,7 +330,7 @@ int mca_base_component_repository_open (mca_base_framework_t *framework, OPAL_LIST_FOREACH(mitem, &framework->framework_components, mca_base_component_list_item_t) { if (0 == strcmp(mitem->cli_component->mca_component_name, ri->ri_name)) { - opal_output_verbose(40, 0, "mca_base_component_repository_open: already loaded (ignored)"); + opal_output_verbose (MCA_BASE_VERBOSE_INFO, 0, "mca_base_component_repository_open: already loaded (ignored)"); return OPAL_ERR_BAD_PARAM; } } @@ -336,7 +339,7 @@ int mca_base_component_repository_open (mca_base_framework_t *framework, mitem = NULL; if (NULL != ri->ri_dlhandle) { - opal_output_verbose(40, 0, "mca_base_component_repository_open: already loaded. returning cached component"); + opal_output_verbose (MCA_BASE_VERBOSE_INFO, 0, "mca_base_component_repository_open: already loaded. returning cached component"); mitem = OBJ_NEW(mca_base_component_list_item_t); if (NULL == mitem) { return OPAL_ERR_OUT_OF_RESOURCE; @@ -445,8 +448,8 @@ int mca_base_component_repository_open (mca_base_framework_t *framework, ri->ri_component_struct = mitem->cli_component = component_struct; opal_list_append(&framework->framework_components, &mitem->super); - opal_output_verbose(40, 0, "mca_base_component_repository_open: opened dynamic %s MCA component \"%s\"", - ri->ri_type, ri->ri_name); + opal_output_verbose (MCA_BASE_VERBOSE_INFO, 0, "mca_base_component_repository_open: opened dynamic %s MCA " + "component \"%s\"", ri->ri_type, ri->ri_name); return OPAL_SUCCESS; } while (0); diff --git a/opal/mca/base/mca_base_component_repository.h b/opal/mca/base/mca_base_component_repository.h index 8ba27f1da93..d480bb8cf4b 100644 --- a/opal/mca/base/mca_base_component_repository.h +++ b/opal/mca/base/mca_base_component_repository.h @@ -5,15 +5,15 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2015 Cisco Systems, Inc. All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ diff --git a/opal/mca/base/mca_base_components_close.c b/opal/mca/base/mca_base_components_close.c index ed0fd380cbc..b79522fd034 100644 --- a/opal/mca/base/mca_base_components_close.c +++ b/opal/mca/base/mca_base_components_close.c @@ -6,16 +6,16 @@ * Copyright (c) 2004-2006 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2006 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2006 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2006 The Regents of the University of California. * All rights reserved. - * Copyright (c) 2013 Los Alamos National Security, LLC. All rights + * Copyright (c) 2013-2015 Los Alamos National Security, LLC. All rights * reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -33,9 +33,9 @@ void mca_base_component_unload (const mca_base_component_t *component, int outpu int ret; /* Unload */ - opal_output_verbose(10, output_id, - "mca: base: close: unloading component %s", - component->mca_component_name); + opal_output_verbose (MCA_BASE_VERBOSE_COMPONENT, output_id, + "mca: base: close: unloading component %s", + component->mca_component_name); ret = mca_base_var_group_find (component->mca_project_name, component->mca_type_name, component->mca_component_name); @@ -51,9 +51,9 @@ void mca_base_component_close (const mca_base_component_t *component, int output /* Close */ if (NULL != component->mca_close_component) { component->mca_close_component(); - opal_output_verbose(10, output_id, - "mca: base: close: component %s closed", - component->mca_component_name); + opal_output_verbose (MCA_BASE_VERBOSE_COMPONENT, output_id, + "mca: base: close: component %s closed", + component->mca_component_name); } mca_base_component_unload (component, output_id); @@ -67,7 +67,7 @@ int mca_base_framework_components_close (mca_base_framework_t *framework, skip); } -int mca_base_components_close(int output_id, opal_list_t *components, +int mca_base_components_close(int output_id, opal_list_t *components, const mca_base_component_t *skip) { mca_base_component_list_item_t *cli, *next; diff --git a/opal/mca/base/mca_base_components_open.c b/opal/mca/base/mca_base_components_open.c index 76475333003..b18540d2068 100644 --- a/opal/mca/base/mca_base_components_open.c +++ b/opal/mca/base/mca_base_components_open.c @@ -6,7 +6,7 @@ * Copyright (c) 2004-2013 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. @@ -15,9 +15,9 @@ * All rights reserved. * Copyright (c) 2014 Hochschule Esslingen. All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -33,9 +33,6 @@ #include "opal/mca/mca.h" #include "opal/mca/base/base.h" #include "opal/constants.h" -#if OPAL_ENABLE_FT_CR == 1 -#include "opal/runtime/opal_params.h" -#endif /* * Local functions @@ -95,13 +92,8 @@ static int open_components(mca_base_framework_t *framework) * * NTH: Logic moved to mca_base_components_filter. */ -#if (OPAL_ENABLE_FT == 1) && (OPAL_ENABLE_FT_CR == 1) - if (opal_base_distill_checkpoint_ready) { - open_only_flags |= MCA_BASE_METADATA_PARAM_CHECKPOINT; - } -#endif /* (OPAL_ENABLE_FT == 1) && (OPAL_ENABLE_FT_CR == 1) */ - /* If mca_base_framework_register_components was called with the MCA_BASE_COMPONENTS_ALL flag + /* If mca_base_framework_register_components was called with the MCA_BASE_COMPONENTS_ALL flag we need to trim down and close any extra components we do not want open */ ret = mca_base_components_filter (framework, open_only_flags); if (OPAL_SUCCESS != ret) { @@ -109,49 +101,50 @@ static int open_components(mca_base_framework_t *framework) } /* Announce */ - opal_output_verbose(10, output_id, "mca: base: components_open: opening %s components", - framework->framework_name); - + opal_output_verbose (MCA_BASE_VERBOSE_COMPONENT, output_id, "mca: base: components_open: opening %s components", + framework->framework_name); + /* Traverse the list of components */ OPAL_LIST_FOREACH_SAFE(cli, next, components, mca_base_component_list_item_t) { const mca_base_component_t *component = cli->cli_component; - - opal_output_verbose(10, output_id, - "mca: base: components_open: found loaded component %s", - component->mca_component_name); + + opal_output_verbose (MCA_BASE_VERBOSE_COMPONENT, output_id, + "mca: base: components_open: found loaded component %s", + component->mca_component_name); if (NULL != component->mca_open_component) { /* Call open if register didn't call it already */ ret = component->mca_open_component(); if (OPAL_SUCCESS == ret) { - opal_output_verbose(10, output_id, - "mca: base: components_open: " - "component %s open function successful", - component->mca_component_name); + opal_output_verbose (MCA_BASE_VERBOSE_COMPONENT, output_id, + "mca: base: components_open: " + "component %s open function successful", + component->mca_component_name); } else { if (OPAL_ERR_NOT_AVAILABLE != ret) { /* If the component returns OPAL_ERR_NOT_AVAILABLE, it's a cue to "silently ignore me" -- it's not a failure, it's just a way for the component to say - "nope!". + "nope!". Otherwise, however, display an error. We may end up displaying this twice, but it may go to separate streams. So better to be redundant than to not display the error in the stream where it was expected. */ - + if (mca_base_component_show_load_errors) { - opal_output(0, "mca: base: components_open: " - "component %s / %s open function failed", - component->mca_type_name, - component->mca_component_name); + opal_output_verbose (MCA_BASE_VERBOSE_ERROR, output_id, + "mca: base: components_open: component %s " + "/ %s open function failed", + component->mca_type_name, + component->mca_component_name); } - opal_output_verbose(10, output_id, - "mca: base: components_open: " - "component %s open function failed", - component->mca_component_name); + opal_output_verbose (MCA_BASE_VERBOSE_COMPONENT, output_id, + "mca: base: components_open: " + "component %s open function failed", + component->mca_component_name); } mca_base_component_close (component, output_id); @@ -161,8 +154,8 @@ static int open_components(mca_base_framework_t *framework) } } } - + /* All done */ - + return OPAL_SUCCESS; } diff --git a/opal/mca/base/mca_base_components_register.c b/opal/mca/base/mca_base_components_register.c index 54a06b29660..1598a533030 100644 --- a/opal/mca/base/mca_base_components_register.c +++ b/opal/mca/base/mca_base_components_register.c @@ -6,7 +6,7 @@ * Copyright (c) 2004-2012 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. @@ -14,9 +14,9 @@ * Copyright (c) 2011-2015 Los Alamos National Security, LLC. * All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -74,27 +74,27 @@ static int register_components(mca_base_framework_t *framework) mca_base_component_t *component; mca_base_component_list_item_t *cli, *next; int output_id = framework->framework_output; - + /* Announce */ - opal_output_verbose(10, output_id, - "mca: base: components_register: registering framework %s components", - framework->framework_name); - + opal_output_verbose (MCA_BASE_VERBOSE_COMPONENT, output_id, + "mca: base: components_register: registering framework %s components", + framework->framework_name); + /* Traverse the list of found components */ - + OPAL_LIST_FOREACH_SAFE(cli, next, &framework->framework_components, mca_base_component_list_item_t) { component = (mca_base_component_t *)cli->cli_component; - opal_output_verbose(10, output_id, + opal_output_verbose(MCA_BASE_VERBOSE_COMPONENT, output_id, "mca: base: components_register: found loaded component %s", component->mca_component_name); /* Call the component's MCA parameter registration function (or open if register doesn't exist) */ if (NULL == component->mca_register_component_params) { - opal_output_verbose(10, output_id, - "mca: base: components_register: " - "component %s has no register or open function", - component->mca_component_name); + opal_output_verbose (MCA_BASE_VERBOSE_COMPONENT, output_id, + "mca: base: components_register: " + "component %s has no register or open function", + component->mca_component_name); ret = OPAL_SUCCESS; } else { ret = component->mca_register_component_params(); @@ -105,25 +105,26 @@ static int register_components(mca_base_framework_t *framework) /* If the component returns OPAL_ERR_NOT_AVAILABLE, it's a cue to "silently ignore me" -- it's not a failure, it's just a way for the component to say - "nope!". - + "nope!". + Otherwise, however, display an error. We may end up displaying this twice, but it may go to separate streams. So better to be redundant than to not display the error in the stream where it was expected. */ - + if (mca_base_component_show_load_errors) { - opal_output(0, "mca: base: components_register: " - "component %s / %s register function failed", - component->mca_type_name, - component->mca_component_name); + opal_output_verbose (MCA_BASE_VERBOSE_ERROR, output_id, + "mca: base: components_register: component %s " + "/ %s register function failed", + component->mca_type_name, + component->mca_component_name); } - opal_output_verbose(10, output_id, - "mca: base: components_register: " - "component %s register function failed", - component->mca_component_name); + opal_output_verbose (MCA_BASE_VERBOSE_COMPONENT, output_id, + "mca: base: components_register: " + "component %s register function failed", + component->mca_component_name); } opal_list_remove_item (&framework->framework_components, &cli->super); @@ -134,7 +135,7 @@ static int register_components(mca_base_framework_t *framework) } if (NULL != component->mca_register_component_params) { - opal_output_verbose (10, output_id, "mca: base: components_register: " + opal_output_verbose (MCA_BASE_VERBOSE_COMPONENT, output_id, "mca: base: components_register: " "component %s register function successful", component->mca_component_name); } @@ -153,8 +154,8 @@ static int register_components(mca_base_framework_t *framework) OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_CONSTANT, &component->mca_component_release_version); } - + /* All done */ - + return OPAL_SUCCESS; } diff --git a/opal/mca/base/mca_base_components_select.c b/opal/mca/base/mca_base_components_select.c index 2ec5af33664..cf45bf903ec 100644 --- a/opal/mca/base/mca_base_components_select.c +++ b/opal/mca/base/mca_base_components_select.c @@ -1,11 +1,15 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ /* * Copyright (c) 2004-2008 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. + * Copyright (c) 2015 Los Alamos National Security, LLC. All rights + * reserved. + * Copyright (c) 2015 Intel, Inc. All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -30,19 +34,21 @@ int mca_base_select(const char *type_name, int output_id, opal_list_t *components_available, mca_base_module_t **best_module, - mca_base_component_t **best_component) + mca_base_component_t **best_component, + int *priority_out) { mca_base_component_list_item_t *cli = NULL; mca_base_component_t *component = NULL; mca_base_module_t *module = NULL; int priority = 0, best_priority = INT32_MIN; + int rc; *best_module = NULL; *best_component = NULL; - opal_output_verbose(10, output_id, - "mca:base:select: Auto-selecting %s components", - type_name); + opal_output_verbose (MCA_BASE_VERBOSE_COMPONENT, output_id, + "mca:base:select: Auto-selecting %s components", + type_name); /* * Traverse the list of available components. @@ -55,37 +61,48 @@ int mca_base_select(const char *type_name, int output_id, * If there is a query function then use it. */ if (NULL == component->mca_query_component) { - opal_output_verbose(5, output_id, - "mca:base:select:(%5s) Skipping component [%s]. It does not implement a query function", - type_name, component->mca_component_name ); + opal_output_verbose (MCA_BASE_VERBOSE_COMPONENT, output_id, + "mca:base:select:(%5s) Skipping component [%s]. It does not implement a query function", + type_name, component->mca_component_name ); continue; } /* * Query this component for the module and priority */ - opal_output_verbose(5, output_id, - "mca:base:select:(%5s) Querying component [%s]", - type_name, component->mca_component_name); - - component->mca_query_component(&module, &priority); + opal_output_verbose (MCA_BASE_VERBOSE_COMPONENT, output_id, + "mca:base:select:(%5s) Querying component [%s]", + type_name, component->mca_component_name); + + rc = component->mca_query_component(&module, &priority); + if (OPAL_ERR_FATAL == rc) { + /* a fatal error was detected by this component - e.g., the + * user specified a required element and the component could + * not find it. In this case, we must not continue as we might + * find some other component that could run, causing us to do + * something the user didn't want */ + return rc; + } else if (OPAL_SUCCESS != rc) { + /* silently skip this component */ + continue; + } /* * If no module was returned, then skip component */ if (NULL == module) { - opal_output_verbose(5, output_id, - "mca:base:select:(%5s) Skipping component [%s]. Query failed to return a module", - type_name, component->mca_component_name ); + opal_output_verbose (MCA_BASE_VERBOSE_COMPONENT, output_id, + "mca:base:select:(%5s) Skipping component [%s]. Query failed to return a module", + type_name, component->mca_component_name ); continue; } /* * Determine if this is the best module we have seen by looking the priority */ - opal_output_verbose(5, output_id, - "mca:base:select:(%5s) Query of component [%s] set priority to %d", - type_name, component->mca_component_name, priority); + opal_output_verbose (MCA_BASE_VERBOSE_COMPONENT, output_id, + "mca:base:select:(%5s) Query of component [%s] set priority to %d", + type_name, component->mca_component_name, priority); if (priority > best_priority) { best_priority = priority; *best_component = component; @@ -93,13 +110,16 @@ int mca_base_select(const char *type_name, int output_id, } } + if (priority_out) { + *priority_out = best_priority; + } /* * Finished querying all components. * Make sure we found something in the process. */ if (NULL == *best_component) { - opal_output_verbose(5, output_id, + opal_output_verbose (MCA_BASE_VERBOSE_COMPONENT, output_id, "mca:base:select:(%5s) No component selected!", type_name); /* @@ -111,9 +131,9 @@ int mca_base_select(const char *type_name, int output_id, return OPAL_ERR_NOT_FOUND; } - opal_output_verbose(5, output_id, - "mca:base:select:(%5s) Selected component [%s]", - type_name, (*best_component)->mca_component_name); + opal_output_verbose (MCA_BASE_VERBOSE_COMPONENT, output_id, + "mca:base:select:(%5s) Selected component [%s]", + type_name, (*best_component)->mca_component_name); /* * Close the non-selected components diff --git a/opal/mca/base/mca_base_framework.c b/opal/mca/base/mca_base_framework.c index 2fd9662c78f..a1e49e4d5b0 100644 --- a/opal/mca/base/mca_base_framework.c +++ b/opal/mca/base/mca_base_framework.c @@ -92,14 +92,20 @@ int mca_base_framework_register (struct mca_base_framework_t *framework, } /* register a verbosity variable for this framework */ - asprintf (&desc, "Verbosity level for the %s framework (0 = no verbosity)", - framework->framework_name); + ret = asprintf (&desc, "Verbosity level for the %s framework (default: 0)", + framework->framework_name); + if (0 > ret) { + return OPAL_ERR_OUT_OF_RESOURCE; + } + + framework->framework_verbose = MCA_BASE_VERBOSE_ERROR; ret = mca_base_framework_var_register (framework, "verbose", desc, - MCA_BASE_VAR_TYPE_INT, NULL, 0, + MCA_BASE_VAR_TYPE_INT, + &mca_base_var_enum_verbose, 0, MCA_BASE_VAR_FLAG_SETTABLE, OPAL_INFO_LVL_8, MCA_BASE_VAR_SCOPE_LOCAL, - &framework->framework_verbose); + &framework->framework_verbose); free(desc); if (0 > ret) { return ret; diff --git a/opal/mca/base/mca_base_framework.h b/opal/mca/base/mca_base_framework.h index 6c7036ac80b..c5009ac3823 100644 --- a/opal/mca/base/mca_base_framework.h +++ b/opal/mca/base/mca_base_framework.h @@ -69,7 +69,7 @@ typedef int (*mca_base_framework_register_params_fn_t) (mca_base_register_flag_t * * This function is invoked during opal_init() and during the * initialization of the special case of the ompi_info command. - * + * * This function fills in the components framework value, which * is a list of all components that were successfully opened. * This variable should \em only be used by other framework base @@ -130,7 +130,7 @@ typedef struct mca_base_framework_t { char *framework_name; /** Description of this framework or NULL */ const char *framework_description; - /** Framework register function or NULL if the framework + /** Framework register function or NULL if the framework and all its components have nothing to register */ mca_base_framework_register_params_fn_t framework_register; /** Framework open function or NULL */ @@ -143,7 +143,7 @@ typedef struct mca_base_framework_t { int framework_refcnt; /** List of static components */ const mca_base_component_t **framework_static_components; - /** Component selection. This will be registered with the MCA + /** Component selection. This will be registered with the MCA variable system and should be either NULL (all components) or a heap allocated, comma-delimited list of components. */ char *framework_selection; diff --git a/opal/mca/base/mca_base_list.c b/opal/mca/base/mca_base_list.c index 3a015581be3..0c97e69a0ce 100644 --- a/opal/mca/base/mca_base_list.c +++ b/opal/mca/base/mca_base_list.c @@ -5,14 +5,14 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -22,7 +22,7 @@ #include "opal/mca/base/base.h" -/* +/* * Local functions */ static void cl_constructor(opal_object_t *obj); @@ -32,14 +32,14 @@ static void cpl_constructor(opal_object_t *obj); /* * Class instance of the mca_base_component_list_item_t class */ -OBJ_CLASS_INSTANCE(mca_base_component_list_item_t, +OBJ_CLASS_INSTANCE(mca_base_component_list_item_t, opal_list_item_t, cl_constructor, NULL); /* * Class instance of the mca_base_component_priority_list_item_t class */ -OBJ_CLASS_INSTANCE(mca_base_component_priority_list_item_t, +OBJ_CLASS_INSTANCE(mca_base_component_priority_list_item_t, mca_base_component_list_item_t, cpl_constructor, NULL); @@ -58,7 +58,7 @@ static void cl_constructor(opal_object_t *obj) */ static void cpl_constructor(opal_object_t *obj) { - mca_base_component_priority_list_item_t *cpli = + mca_base_component_priority_list_item_t *cpli = (mca_base_component_priority_list_item_t *) obj; cpli->cpli_priority = -1; } diff --git a/opal/mca/base/mca_base_open.c b/opal/mca/base/mca_base_open.c index 1701e4c771d..0e7144ac1a6 100644 --- a/opal/mca/base/mca_base_open.c +++ b/opal/mca/base/mca_base_open.c @@ -6,17 +6,18 @@ * Copyright (c) 2004-2008 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2011 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2015 Los Alamos National Security, LLC. All rights * reserved. + * Copyright (c) 2017 IBM Corporation. All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -60,13 +61,13 @@ static void parse_verbose(char *e, opal_output_stream_t *lds); /* - * Main MCA initialization. + * Main MCA initialization. */ int mca_base_open(void) { char *value; opal_output_stream_t lds; - char hostname[64]; + char hostname[OPAL_MAXHOSTNAMELEN]; int var_id; if (mca_base_opened++) { @@ -78,7 +79,7 @@ int mca_base_open(void) mca_base_system_default_path = strdup(opal_install_dirs.opallibdir); asprintf(&mca_base_user_default_path, "%s"OPAL_PATH_SEP".openmpi"OPAL_PATH_SEP"components", opal_home_directory()); #else - asprintf(&mca_base_system_default_path, "%s", opal_install_dirs.opallibdir); + asprintf(&mca_base_system_default_path, "%s", opal_install_dirs.opallibdir); #endif /* see if the user wants to override the defaults */ @@ -121,7 +122,13 @@ int mca_base_open(void) MCA_BASE_VAR_SYN_FLAG_DEPRECATED); /* What verbosity level do we want for the default 0 stream? */ - mca_base_verbose = "stderr"; + char *str = getenv("OPAL_OUTPUT_INTERNAL_TO_STDOUT"); + if (NULL != str && str[0] == '1') { + mca_base_verbose = "stdout"; + } + else { + mca_base_verbose = "stderr"; + } var_id = mca_base_var_register("opal", "mca", "base", "verbose", "Specifies where the default error output stream goes (this is separate from distinct help messages). Accepts a comma-delimited list of: stderr, stdout, syslog, syslogpri:, syslogid: (where str is the prefix string for all syslog notices), file[:filename] (if filename is not specified, a default filename is used), fileappend (if not specified, the file is opened for truncation), level[:N] (if specified, integer verbose level; otherwise, 0 is implied)", MCA_BASE_VAR_TYPE_STRING, NULL, 0, 0, @@ -137,10 +144,10 @@ int mca_base_open(void) } else { set_defaults(&lds); } - gethostname(hostname, 64); + gethostname(hostname, sizeof(hostname)); asprintf(&lds.lds_prefix, "[%s:%05d] ", hostname, getpid()); opal_output_reopen(0, &lds); - opal_output_verbose(5, 0, "mca: base: opening components"); + opal_output_verbose (MCA_BASE_VERBOSE_COMPONENT, 0, "mca: base: opening components"); free(lds.lds_prefix); /* Open up the component repository */ @@ -225,7 +232,7 @@ static void parse_verbose(char *e, opal_output_stream_t *lds) lds->lds_want_file = true; lds->lds_want_file_append = 1; have_output = true; - } + } else if (strncasecmp(ptr, "level", 5) == 0) { lds->lds_verbose_level = 0; diff --git a/opal/mca/base/mca_base_parse_paramfile.c b/opal/mca/base/mca_base_parse_paramfile.c index d8271522c77..80ea86e1d5c 100644 --- a/opal/mca/base/mca_base_parse_paramfile.c +++ b/opal/mca/base/mca_base_parse_paramfile.c @@ -6,16 +6,16 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2013 Los Alamos National Security, LLC. All rights * reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ diff --git a/opal/mca/base/mca_base_pvar.c b/opal/mca/base/mca_base_pvar.c index 8d4758c5ede..eaf7c0ec6d3 100644 --- a/opal/mca/base/mca_base_pvar.c +++ b/opal/mca/base/mca_base_pvar.c @@ -1,6 +1,6 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ /* - * Copyright (c) 2013 Los Alamos National Security, LLC. All rights + * Copyright (c) 2013-2015 Los Alamos National Security, LLC. All rights * reserved. * Copyright (c) 2015 Cisco Systems, Inc. All rights reserved. * $COPYRIGHT$ @@ -76,7 +76,7 @@ int mca_base_pvar_find (const char *project, const char *framework, const char * return OPAL_ERROR; } - ret = mca_base_pvar_find_by_name (full_name, &index); + ret = mca_base_pvar_find_by_name (full_name, MCA_BASE_PVAR_CLASS_ANY, &index); free (full_name); /* NTH: should we verify the name components match the returned variable? */ @@ -84,8 +84,9 @@ int mca_base_pvar_find (const char *project, const char *framework, const char * return (OPAL_SUCCESS != ret) ? ret : index; } -int mca_base_pvar_find_by_name (const char *full_name, int *index) +int mca_base_pvar_find_by_name (const char *full_name, int var_class, int *index) { + mca_base_pvar_t *pvar; void *tmp; int rc; @@ -95,6 +96,15 @@ int mca_base_pvar_find_by_name (const char *full_name, int *index) return rc; } + rc = mca_base_pvar_get_internal ((int)(uintptr_t) tmp, &pvar, false); + if (OPAL_SUCCESS != rc) { + return rc; + } + + if (MCA_BASE_PVAR_CLASS_ANY != var_class && pvar->var_class != var_class) { + return OPAL_ERR_NOT_FOUND; + } + *index = (int)(uintptr_t) tmp; return OPAL_SUCCESS; @@ -169,7 +179,7 @@ int mca_base_pvar_register (const char *project, const char *framework, const ch int bind, mca_base_pvar_flag_t flags, mca_base_get_value_fn_t get_value, mca_base_set_value_fn_t set_value, mca_base_notify_fn_t notify, void *ctx) { - int ret, group_index; + int ret, group_index, pvar_index; mca_base_pvar_t *pvar; /* assert on usage errors */ @@ -274,15 +284,18 @@ int mca_base_pvar_register (const char *project, const char *framework, const ch } } - /* add this performance variable to the MCA variable group */ - ret = mca_base_var_group_add_pvar (group_index, pvar_count); - if (0 > ret) { + pvar_index = opal_pointer_array_add (®istered_pvars, pvar); + if (0 > pvar_index) { break; } + pvar->pvar_index = pvar_index; - ret = opal_pointer_array_add (®istered_pvars, pvar); - if (0 > ret) { - break; + /* add this performance variable to the MCA variable group */ + if (0 <= group_index) { + ret = mca_base_var_group_add_pvar (group_index, pvar_index); + if (0 > ret) { + break; + } } opal_hash_table_set_value_ptr (&mca_base_pvar_index_hash, pvar->name, strlen (pvar->name), @@ -317,7 +330,7 @@ int mca_base_pvar_register (const char *project, const char *framework, const ch if (!(flags & MCA_BASE_PVAR_FLAG_READONLY)) { pvar->set_value = set_value ? set_value : mca_base_pvar_default_set_value; } - + pvar->ctx = ctx; pvar->pvar_index = pvar_count; @@ -447,7 +460,7 @@ int mca_base_pvar_handle_alloc (mca_base_pvar_session_t *session, int index, voi } pvar_handle->obj_handle = obj_handle; - pvar_handle->pvar = pvar; + pvar_handle->pvar = pvar; *handle = pvar_handle; @@ -492,7 +505,7 @@ int mca_base_pvar_handle_alloc (mca_base_pvar_session_t *session, int index, voi ret = OPAL_ERR_OUT_OF_RESOURCE; break; } - + pvar_handle->last_value = calloc (*count, datatype_size); if (NULL == pvar_handle->last_value) { ret = OPAL_ERR_OUT_OF_RESOURCE; @@ -699,7 +712,7 @@ int mca_base_pvar_handle_write_value (mca_base_pvar_handle_t *handle, const void if (OPAL_SUCCESS != ret) { return ret; } - + memmove (handle->current_value, value, handle->count * var_type_sizes[handle->pvar->type]); return OPAL_SUCCESS; diff --git a/opal/mca/base/mca_base_pvar.h b/opal/mca/base/mca_base_pvar.h index 3cefae23312..44f23b3dfc1 100644 --- a/opal/mca/base/mca_base_pvar.h +++ b/opal/mca/base/mca_base_pvar.h @@ -1,6 +1,6 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ /* - * Copyright (c) 2013 Los Alamos National Security, LLC. All rights + * Copyright (c) 2013-2015 Los Alamos National Security, LLC. All rights * reserved. * * Additional copyrights may follow @@ -91,6 +91,8 @@ enum { MCA_BASE_PVAR_CLASS_GENERIC }; +#define MCA_BASE_PVAR_CLASS_ANY -1 + /* * Reserved bindings; passed when registering a new pvar. OPAL will * ignore any other binding type. @@ -356,7 +358,7 @@ OPAL_DECLSPEC int mca_base_pvar_find (const char *project, const char *framework * * See mca_base_pvar_find(). */ -OPAL_DECLSPEC int mca_base_pvar_find_by_name (const char *full_name, int *index); +OPAL_DECLSPEC int mca_base_pvar_find_by_name (const char *full_name, int var_class, int *index); /**************************************************************************** * The following functions are the back-end to the MPI_T API functions @@ -369,7 +371,7 @@ OPAL_DECLSPEC int mca_base_pvar_find_by_name (const char *full_name, int *index) * @param[out] count Number of registered performance variables. * * This function can be called before mca_base_pvar_init() and after - * mca_base_pvar_finalize(). + * mca_base_pvar_finalize(). */ OPAL_DECLSPEC int mca_base_pvar_get_count (int *count); diff --git a/opal/mca/base/mca_base_var.c b/opal/mca/base/mca_base_var.c index f2a6f7a406f..5a5c4f915ca 100644 --- a/opal/mca/base/mca_base_var.c +++ b/opal/mca/base/mca_base_var.c @@ -11,7 +11,7 @@ * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2008-2015 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2012-2015 Los Alamos National Security, LLC. All rights + * Copyright (c) 2012-2017 Los Alamos National Security, LLC. All rights * reserved. * Copyright (c) 2014 Intel, Inc. All rights reserved. * Copyright (c) 2015 Research Organization for Information Science @@ -724,7 +724,7 @@ static int var_set_from_string (mca_base_var_t *var, char *src) case MCA_BASE_VAR_TYPE_BOOL: case MCA_BASE_VAR_TYPE_SIZE_T: ret = int_from_string(src, var->mbv_enumerator, &int_value); - if (OPAL_ERR_VALUE_OUT_OF_BOUNDS == ret || + if (OPAL_SUCCESS != ret || (MCA_BASE_VAR_TYPE_INT == var->mbv_type && ((int) int_value != (int64_t) int_value)) || (MCA_BASE_VAR_TYPE_UNSIGNED_INT == var->mbv_type && ((unsigned int) int_value != int_value))) { if (var->mbv_enumerator) { @@ -852,7 +852,7 @@ int mca_base_var_deregister(int vari) var->mbv_storage->stringval) { free (var->mbv_storage->stringval); var->mbv_storage->stringval = NULL; - } else if (MCA_BASE_VAR_TYPE_BOOL != var->mbv_type && NULL != var->mbv_enumerator) { + } else if (var->mbv_enumerator && !var->mbv_enumerator->enum_is_static) { OBJ_RELEASE(var->mbv_enumerator); } @@ -1486,7 +1486,9 @@ static int register_variable (const char *project_name, const char *framework_na OBJ_RELEASE (var->mbv_enumerator); } - OBJ_RETAIN(enumerator); + if (!enumerator->enum_is_static) { + OBJ_RETAIN(enumerator); + } } var->mbv_enumerator = enumerator; @@ -1524,7 +1526,7 @@ int mca_base_var_register (const char *project_name, const char *framework_name, mca_base_var_scope_t scope, void *storage) { /* Only integer variables can have enumerator */ - assert (NULL == enumerator || MCA_BASE_VAR_TYPE_INT == type); + assert (NULL == enumerator || (MCA_BASE_VAR_TYPE_INT == type || MCA_BASE_VAR_TYPE_UNSIGNED_INT == type)); return register_variable (project_name, framework_name, component_name, variable_name, description, type, enumerator, @@ -1674,7 +1676,7 @@ static int var_set_from_env (mca_base_var_t *var, mca_base_var_t *original) const char *new_variable = "None (going away)"; if (is_synonym) { - new_variable = var->mbv_full_name; + new_variable = original->mbv_full_name; } switch (var->mbv_source) { @@ -1847,7 +1849,7 @@ static void var_destructor(mca_base_var_t *var) } /* don't release the boolean enumerator */ - if (MCA_BASE_VAR_TYPE_BOOL != var->mbv_type && NULL != var->mbv_enumerator) { + if (var->mbv_enumerator && !var->mbv_enumerator->enum_is_static) { OBJ_RELEASE(var->mbv_enumerator); } @@ -1860,6 +1862,7 @@ static void var_destructor(mca_base_var_t *var) if (NULL != var->mbv_long_name) { free(var->mbv_long_name); } + if (NULL != var->mbv_description) { free(var->mbv_description); } @@ -1924,7 +1927,6 @@ static char *source_name(mca_base_var_t *var) static int var_value_string (mca_base_var_t *var, char **value_string) { const mca_base_var_storage_t *value; - const char *tmp; int ret; assert (MCA_BASE_VAR_TYPE_MAX > var->mbv_type); @@ -1971,19 +1973,14 @@ static int var_value_string (mca_base_var_t *var, char **value_string) } else { /* we use an enumerator to handle string->bool and bool->string conversion */ if (MCA_BASE_VAR_TYPE_BOOL == var->mbv_type) { - ret = var->mbv_enumerator->string_from_value(var->mbv_enumerator, value->boolval, &tmp); + ret = var->mbv_enumerator->string_from_value(var->mbv_enumerator, value->boolval, value_string); } else { - ret = var->mbv_enumerator->string_from_value(var->mbv_enumerator, value->intval, &tmp); + ret = var->mbv_enumerator->string_from_value(var->mbv_enumerator, value->intval, value_string); } if (OPAL_SUCCESS != ret) { return ret; } - - *value_string = strdup (tmp); - if (NULL == *value_string) { - ret = OPAL_ERR_OUT_OF_RESOURCE; - } } return ret; diff --git a/opal/mca/base/mca_base_var.h b/opal/mca/base/mca_base_var.h index f815f6563ce..46b8a2ce760 100644 --- a/opal/mca/base/mca_base_var.h +++ b/opal/mca/base/mca_base_var.h @@ -6,7 +6,7 @@ * Copyright (c) 2004-2006 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. @@ -14,13 +14,13 @@ * Copyright (c) 2012-2015 Los Alamos National Security, LLC. All rights * reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ -/** @file +/** @file * This file presents the MCA variable interface. * * Note that there are two scopes for MCA variables: "normal" and @@ -135,16 +135,16 @@ typedef enum { MCA_BASE_VAR_SCOPE_READONLY, /** The value of this variable may be changed locally. */ MCA_BASE_VAR_SCOPE_LOCAL, - /** The value of this variable must be set to a consistent value + /** The value of this variable must be set to a consistent value within a group */ MCA_BASE_VAR_SCOPE_GROUP, - /** The value of this variable must be set to the same value + /** The value of this variable must be set to the same value within a group */ MCA_BASE_VAR_SCOPE_GROUP_EQ, - /** The value of this variable must be set to a consistent value + /** The value of this variable must be set to a consistent value for all processes */ MCA_BASE_VAR_SCOPE_ALL, - /** The value of this variable must be set to the same value + /** The value of this variable must be set to the same value for all processes */ MCA_BASE_VAR_SCOPE_ALL_EQ, MCA_BASE_VAR_SCOPE_MAX @@ -397,7 +397,7 @@ OPAL_DECLSPEC int mca_base_var_init(void); * {storage} points to a (char *), the pointed-to string will be * duplicated and maintained internally by the MCA variable system; * the caller may free the original string after this function returns - * successfully. + * successfully. */ OPAL_DECLSPEC int mca_base_var_register (const char *project_name, const char *framework_name, const char *component_name, const char *variable_name, @@ -452,12 +452,12 @@ OPAL_DECLSPEC int mca_base_framework_var_register (const mca_base_framework_t *f * variable. * @returns OPAL_ERR_OUT_OF_RESOURCE If memory could not be allocated. * @returns OPAL_ERROR For all other errors. - * + * * Upon success, this function creates a synonym MCA variable * that will be treated almost exactly like the original. The * type (int or string) is irrelevant; this function simply * creates a new name that by which the same variable value is - * accessible. + * accessible. * * Note that the original variable name has precendence over all * synonyms. For example, consider the case if variable is diff --git a/opal/mca/base/mca_base_var_enum.c b/opal/mca/base/mca_base_var_enum.c index 6eddd3451d2..9c10d93c312 100644 --- a/opal/mca/base/mca_base_var_enum.c +++ b/opal/mca/base/mca_base_var_enum.c @@ -6,23 +6,25 @@ * Copyright (c) 2004-2012 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2008-2013 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2012-2014 Los Alamos National Security, LLC. All rights + * Copyright (c) 2012-2015 Los Alamos National Security, LLC. All rights * reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ #include "opal_config.h" #include "opal/mca/base/mca_base_var_enum.h" +#include "opal/mca/base/base.h" +#include "opal/util/argv.h" #include #include @@ -33,6 +35,15 @@ static void mca_base_var_enum_destructor (mca_base_var_enum_t *enumerator); OBJ_CLASS_INSTANCE(mca_base_var_enum_t, opal_object_t, mca_base_var_enum_constructor, mca_base_var_enum_destructor); +static void mca_base_var_enum_flag_constructor (mca_base_var_enum_flag_t *enumerator); +static void mca_base_var_enum_flag_destructor (mca_base_var_enum_flag_t *enumerator); +OBJ_CLASS_INSTANCE(mca_base_var_enum_flag_t, opal_object_t, mca_base_var_enum_flag_constructor, + mca_base_var_enum_flag_destructor); + +static int enum_dump (mca_base_var_enum_t *self, char **out); +static int enum_get_count (mca_base_var_enum_t *self, int *count); +static int enum_get_value (mca_base_var_enum_t *self, int index, int *value, const char **string_value); + static int mca_base_var_enum_bool_get_count (mca_base_var_enum_t *enumerator, int *count) { *count = 2; @@ -80,10 +91,10 @@ static int mca_base_var_enum_bool_vfs (mca_base_var_enum_t *self, const char *st } static int mca_base_var_enum_bool_sfv (mca_base_var_enum_t *self, const int value, - const char **string_value) + char **string_value) { if (string_value) { - *string_value = value ? "true" : "false"; + *string_value = strdup (value ? "true" : "false"); } return OPAL_SUCCESS; @@ -97,6 +108,7 @@ static int mca_base_var_enum_bool_dump (mca_base_var_enum_t *self, char **out) mca_base_var_enum_t mca_base_var_enum_bool = { .super = OPAL_OBJ_STATIC_INIT(opal_object_t), + .enum_is_static = true, .enum_name = "boolean", .get_count = mca_base_var_enum_bool_get_count, .get_value = mca_base_var_enum_bool_get_value, @@ -105,6 +117,112 @@ mca_base_var_enum_t mca_base_var_enum_bool = { .dump = mca_base_var_enum_bool_dump }; +/* verbosity enumerator */ +static mca_base_var_enum_value_t verbose_values[] = { + {MCA_BASE_VERBOSE_NONE, "none"}, + {MCA_BASE_VERBOSE_ERROR, "error"}, + {MCA_BASE_VERBOSE_COMPONENT, "component"}, + {MCA_BASE_VERBOSE_WARN, "warn"}, + {MCA_BASE_VERBOSE_INFO, "info"}, + {MCA_BASE_VERBOSE_TRACE, "trace"}, + {MCA_BASE_VERBOSE_DEBUG, "debug"}, + {MCA_BASE_VERBOSE_MAX, "max"}, + {-1, NULL} +}; + +static int mca_base_var_enum_verbose_vfs (mca_base_var_enum_t *self, const char *string_value, + int *value) +{ + char *tmp; + int v; + + /* skip whitespace */ + string_value += strspn (string_value, " \t\n\v\f\r"); + + v = strtol (string_value, &tmp, 10); + if (*tmp != '\0') { + for (int i = 0 ; verbose_values[i].string ; ++i) { + if (0 == strcmp (verbose_values[i].string, string_value)) { + *value = verbose_values[i].value; + return OPAL_SUCCESS; + } + } + + return OPAL_ERR_NOT_FOUND; + } else if (v < MCA_BASE_VERBOSE_NONE) { + v = MCA_BASE_VERBOSE_NONE; + } else if (v > MCA_BASE_VERBOSE_MAX) { + v = MCA_BASE_VERBOSE_MAX; + } + + *value = v; + + return OPAL_SUCCESS; +} + +static int mca_base_var_enum_verbose_sfv (mca_base_var_enum_t *self, const int value, + char **string_value) +{ + int ret; + + if (value < 0 || value > 100) { + return OPAL_ERR_VALUE_OUT_OF_BOUNDS; + } + + for (int i = 0 ; verbose_values[i].string ; ++i) { + if (verbose_values[i].value == value) { + if (string_value) { + *string_value = strdup (verbose_values[i].string); + } + return OPAL_SUCCESS; + } + } + + if (string_value) { + ret = asprintf (string_value, "%d", value); + if (0 > ret) { + return OPAL_ERR_OUT_OF_RESOURCE; + } + } + + return OPAL_SUCCESS; +} + +static int mca_base_var_enum_verbose_dump (mca_base_var_enum_t *self, char **out) +{ + char *tmp; + int ret; + + ret = enum_dump (self, out); + if (OPAL_SUCCESS != ret) { + return ret; + } + + ret = asprintf (&tmp, "%s, 0 - 100", *out); + free (*out); + if (0 > ret) { + *out = NULL; + return OPAL_ERR_OUT_OF_RESOURCE; + } + + *out = tmp; + + return OPAL_SUCCESS; +} + +mca_base_var_enum_t mca_base_var_enum_verbose = { + .super = OPAL_OBJ_STATIC_INIT(opal_object_t), + .enum_is_static = true, + .enum_name = "verbosity", + .get_count = enum_get_count, + .get_value = enum_get_value, + .value_from_string = mca_base_var_enum_verbose_vfs, + .string_from_value = mca_base_var_enum_verbose_sfv, + .dump = mca_base_var_enum_verbose_dump, + .enum_value_count = 8, + .enum_values = verbose_values, +}; + int mca_base_var_enum_create (const char *name, const mca_base_var_enum_value_t *values, mca_base_var_enum_t **enumerator) { @@ -143,6 +261,52 @@ int mca_base_var_enum_create (const char *name, const mca_base_var_enum_value_t return OPAL_SUCCESS; } +int mca_base_var_enum_create_flag (const char *name, const mca_base_var_enum_value_flag_t *flags, mca_base_var_enum_flag_t **enumerator) +{ + mca_base_var_enum_flag_t *new_enum; + int i; + + *enumerator = NULL; + + new_enum = OBJ_NEW(mca_base_var_enum_flag_t); + if (NULL == new_enum) { + return OPAL_ERR_OUT_OF_RESOURCE; + } + + new_enum->super.enum_name = strdup (name); + if (NULL == new_enum->super.enum_name) { + return OPAL_ERR_OUT_OF_RESOURCE; + } + + for (i = 0 ; flags[i].string ; ++i); + new_enum->super.enum_value_count = i; + + /* make a copy of the values */ + new_enum->enum_flags = calloc (new_enum->super.enum_value_count + 1, sizeof (*new_enum->enum_flags)); + if (NULL == new_enum->enum_flags) { + OBJ_RELEASE(new_enum); + return OPAL_ERR_OUT_OF_RESOURCE; + } + + int all_flags = 0; + for (i = 0 ; i < new_enum->super.enum_value_count ; ++i) { + new_enum->enum_flags[i].flag = flags[i].flag; + new_enum->enum_flags[i].string = strdup (flags[i].string); + new_enum->enum_flags[i].conflicting_flag = flags[i].conflicting_flag; + /* ensure flags are only set a single bit, doesn't conflict with itself, and + * hasn't already been specified. */ + assert (!(flags[i].flag & (flags[i].flag - 1))); + assert (!(flags[i].flag & flags[i].conflicting_flag)); + assert (!(all_flags & flags[i].flag)); + assert (flags[i].flag); + all_flags |= flags[i].flag; + } + + *enumerator = new_enum; + + return OPAL_SUCCESS; +} + static int enum_dump (mca_base_var_enum_t *self, char **out) { int i; @@ -193,7 +357,7 @@ static int enum_get_value (mca_base_var_enum_t *self, int index, int *value, con } if (string_value) { - *string_value = self->enum_values[index].string; + *string_value = strdup (self->enum_values[index].string); } return OPAL_SUCCESS; @@ -230,7 +394,7 @@ static int enum_value_from_string(mca_base_var_enum_t *self, const char *string_ return OPAL_SUCCESS; } -static int enum_string_from_value(mca_base_var_enum_t *self, const int value, const char **string_value) { +static int enum_string_from_value(mca_base_var_enum_t *self, const int value, char **string_value) { int count, ret, i; ret = self->get_count(self, &count); @@ -249,7 +413,7 @@ static int enum_string_from_value(mca_base_var_enum_t *self, const int value, co } if (string_value) { - *string_value = self->enum_values[i].string; + *string_value = strdup (self->enum_values[i].string); } return OPAL_SUCCESS; @@ -264,6 +428,7 @@ static void mca_base_var_enum_constructor (mca_base_var_enum_t *enumerator) enumerator->value_from_string = enum_value_from_string; enumerator->string_from_value = enum_string_from_value; enumerator->dump = enum_dump; + enumerator->enum_is_static = false; } static void mca_base_var_enum_destructor (mca_base_var_enum_t *enumerator) @@ -280,3 +445,187 @@ static void mca_base_var_enum_destructor (mca_base_var_enum_t *enumerator) free (enumerator->enum_values); } } + +static int enum_get_value_flag (mca_base_var_enum_t *self, int index, int *value, const char **string_value) +{ + mca_base_var_enum_flag_t *flag_enum = (mca_base_var_enum_flag_t *) self; + int count, ret; + + ret = self->get_count(self, &count); + if (OPAL_SUCCESS != ret) { + return ret; + } + + if (index >= count) { + return OPAL_ERR_VALUE_OUT_OF_BOUNDS; + } + + if (value) { + *value = flag_enum->enum_flags[index].flag; + } + + if (string_value) { + *string_value = strdup (flag_enum->enum_flags[index].string); + } + + return OPAL_SUCCESS; +} + +static int enum_value_from_string_flag (mca_base_var_enum_t *self, const char *string_value, int *value_out) { + mca_base_var_enum_flag_t *flag_enum = (mca_base_var_enum_flag_t *) self; + int value, count, ret, flag; + char **flags; + bool is_int; + char *tmp; + + ret = self->get_count(self, &count); + if (OPAL_SUCCESS != ret) { + return ret; + } + + flags = opal_argv_split (string_value, ','); + if (NULL == flags) { + return OPAL_ERR_BAD_PARAM; + } + + flag = 0; + + for (int i = 0 ; flags[i] ; ++i) { + value = strtol (flags[i], &tmp, 0); + is_int = tmp[0] == '\0'; + + bool found = false, conflict = false; + for (int j = 0 ; j < count ; ++j) { + if ((is_int && (value & flag_enum->enum_flags[j].flag)) || + 0 == strcasecmp (flags[i], flag_enum->enum_flags[j].string)) { + found = true; + + if (flag & flag_enum->enum_flags[j].conflicting_flag) { + conflict = true; + } else { + flag |= flag_enum->enum_flags[j].flag; + } + + if (is_int) { + value &= ~flag_enum->enum_flags[j].flag; + if (0 == value) { + break; + } + } else { + break; + } + } + } + + if (!found || conflict || (is_int && value)) { + opal_argv_free (flags); + return !found ? OPAL_ERR_VALUE_OUT_OF_BOUNDS : OPAL_ERR_BAD_PARAM; + } + } + + opal_argv_free (flags); + + *value_out = flag; + + return OPAL_SUCCESS; +} + +static int enum_string_from_value_flag (mca_base_var_enum_t *self, const int value, char **string_value) { + mca_base_var_enum_flag_t *flag_enum = (mca_base_var_enum_flag_t *) self; + int count, ret, current; + char *out = NULL, *tmp; + + ret = self->get_count(self, &count); + if (OPAL_SUCCESS != ret) { + return ret; + } + + current = value; + for (int i = 0 ; i < count ; ++i) { + if (!(flag_enum->enum_flags[i].flag & current)) { + continue; + } + + tmp = out; + + ret = asprintf (&out, "%s%s%s", tmp ? tmp : "", tmp ? "," : "", flag_enum->enum_flags[i].string); + free (tmp); + + if (0 > ret) { + return OPAL_ERR_OUT_OF_RESOURCE; + } + + if (value & flag_enum->enum_flags[i].conflicting_flag) { + free (out); + return OPAL_ERR_BAD_PARAM; + } + + current &= ~flag_enum->enum_flags[i].flag; + } + + if (current) { + free (out); + return OPAL_ERR_VALUE_OUT_OF_BOUNDS; + } + + if (string_value) { + *string_value = out ? out : strdup (""); + } else { + free (out); + } + + return OPAL_SUCCESS; +} + +static int enum_dump_flag (mca_base_var_enum_t *self, char **out) +{ + mca_base_var_enum_flag_t *flag_enum = (mca_base_var_enum_flag_t *) self; + char *tmp; + int ret; + + *out = NULL; + + if (NULL == self) { + return OPAL_ERROR; + } + + *out = strdup ("Comma-delimited list of: "); + if (NULL == *out) { + return OPAL_ERR_OUT_OF_RESOURCE; + } + + for (int i = 0; i < self->enum_value_count ; ++i) { + tmp = *out; + + ret = asprintf (out, "%s%s0x%x:\"%s\"", tmp, i ? ", " : " ", flag_enum->enum_flags[i].flag, + flag_enum->enum_flags[i].string); + free (tmp); + if (0 > ret) { + return OPAL_ERR_OUT_OF_RESOURCE; + } + } + + return OPAL_SUCCESS; +} + +static void mca_base_var_enum_flag_constructor (mca_base_var_enum_flag_t *enumerator) +{ + enumerator->enum_flags = NULL; + enumerator->super.get_value = enum_get_value_flag; + enumerator->super.get_count = enum_get_count; + enumerator->super.value_from_string = enum_value_from_string_flag; + enumerator->super.string_from_value = enum_string_from_value_flag; + enumerator->super.dump = enum_dump_flag; + enumerator->super.enum_is_static = false; +} + +static void mca_base_var_enum_flag_destructor (mca_base_var_enum_flag_t *enumerator) +{ + /* release the copy of the values */ + if (enumerator->enum_flags) { + for (int i = 0 ; i < enumerator->super.enum_value_count ; ++i) { + free ((void *) enumerator->enum_flags[i].string); + } + free (enumerator->enum_flags); + } +} diff --git a/opal/mca/base/mca_base_var_enum.h b/opal/mca/base/mca_base_var_enum.h index a4cf7d312e3..a7fc90379bf 100644 --- a/opal/mca/base/mca_base_var_enum.h +++ b/opal/mca/base/mca_base_var_enum.h @@ -6,17 +6,17 @@ * Copyright (c) 2004-2006 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2008-2011 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2012-2013 Los Alamos National Security, LLC. All rights + * Copyright (c) 2012-2016 Los Alamos National Security, LLC. All rights * reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -85,10 +85,11 @@ typedef int (*mca_base_var_enum_dump_fn_t)(mca_base_var_enum_t *self, char **out * * @long This function returns the string value for a given interger value in the * {string_value} parameter. The {string_value} parameter may be NULL in which case - * no string is returned. + * no string is returned. If a string is returned in {string_value} the caller + * must free the string with free(). */ typedef int (*mca_base_var_enum_sfv_fn_t)(mca_base_var_enum_t *self, const int value, - const char **string_value); + char **string_value); /** * The default enumerator class takes in a list of integer-string pairs. If a @@ -102,10 +103,15 @@ struct mca_base_var_enum_value_t { typedef struct mca_base_var_enum_value_t mca_base_var_enum_value_t; -/* enumerator base class */ +/** + * enumerator base class + */ struct mca_base_var_enum_t { opal_object_t super; + /** Is the enumerator statically allocated */ + bool enum_is_static; + /** Name of this enumerator. This value is duplicated from the argument provided to mca_base_var_enum_create() */ char *enum_name; @@ -132,6 +138,36 @@ struct mca_base_var_enum_t { mca_base_var_enum_value_t *enum_values; }; + +/** + * The default flag enumerator class takes in a list of integer-string pairs. If a + * string is read from an environment variable or a file value the matching + * flag value is used for the MCA variable. The conflicting_flag is used to + * indicate any flags that should conflict. + */ +struct mca_base_var_enum_value_flag_t { + /** flag value (must be power-of-two) */ + int flag; + /** corresponding string name */ + const char *string; + /** conflicting flag(s) if any */ + int conflicting_flag; +}; + +typedef struct mca_base_var_enum_value_flag_t mca_base_var_enum_value_flag_t; + +/** + * flag enumerator base class + */ +struct mca_base_var_enum_flag_t { + /** use the existing enumerator interface */ + mca_base_var_enum_t super; + /** flag value(s) */ + mca_base_var_enum_value_flag_t *enum_flags; +}; + +typedef struct mca_base_var_enum_flag_t mca_base_var_enum_flag_t; + /** * Object declaration for mca_base_var_enum_t */ @@ -149,14 +185,12 @@ OPAL_DECLSPEC OBJ_CLASS_DECLARATION(mca_base_var_enum_t); * @retval opal error code On error * * This function creates a value enumerator for integer variables. The - * value array is stored by reference in the enumerator so it should - * not be allocated on the stack. The OUT enumerator value will be a - * newly OBJ_NEW'ed object that should be released by the caller via - * OBJ_RELEASE. + * OUT enumerator value will be a newly OBJ_NEW'ed object that should + * be released by the caller via OBJ_RELEASE. * * Note that the output enumerator can be OBJ_RELEASE'd after it has - * been used in a pvar registration, because variables that use the - * enumerator will OBJ_RETAIN it. + * been used in a cvar or pvar registration, because the variable + * registration functions will OBJ_RETAIN the enumberator. * * Note that all the strings in the values[] array are strdup'ed into * internal storage, meaning that the caller can free all of the @@ -166,6 +200,33 @@ OPAL_DECLSPEC OBJ_CLASS_DECLARATION(mca_base_var_enum_t); OPAL_DECLSPEC int mca_base_var_enum_create (const char *name, const mca_base_var_enum_value_t values[], mca_base_var_enum_t **enumerator); +/** + * Create a new default flag enumerator + * + * @param[in] name Name for this enumerator + * @param[in] flags List of flags terminated with a NULL .string + * member. + * @param[out] enumerator Newly created enumerator. + * + * @retval OPAL_SUCCESS On success + * @retval opal error code On error + * + * This function creates a flag enumerator for integer variables. The + * OUT enumerator value will be a newly OBJ_NEW'ed object that should + * be released by the caller via OBJ_RELEASE. + * + * Note that the output enumerator can be OBJ_RELEASE'd after it has + * been used in a cvar or pvar registration, because the variable + * registration functions will OBJ_RETAIN the enumberator. + * + * Note that all the strings in the values[] array are strdup'ed into + * internal storage, meaning that the caller can free all of the + * strings passed in values[] after mca_base_var_enum_create() + * returns. + */ +OPAL_DECLSPEC int mca_base_var_enum_create_flag (const char *name, const mca_base_var_enum_value_flag_t flags[], + mca_base_var_enum_flag_t **enumerator); + /* standard enumerators. it is invalid to call OBJ_RELEASE on any of these enumerators */ /** * Boolean enumerator @@ -176,5 +237,9 @@ OPAL_DECLSPEC int mca_base_var_enum_create (const char *name, const mca_base_var */ extern mca_base_var_enum_t mca_base_var_enum_bool; +/** + * Verbosity level enumerator + */ +extern mca_base_var_enum_t mca_base_var_enum_verbose; #endif /* !defined(MCA_BASE_VAR_ENUM_H) */ diff --git a/opal/mca/base/mca_base_var_group.c b/opal/mca/base/mca_base_var_group.c index 64f9defd6fd..6fec2e21067 100644 --- a/opal/mca/base/mca_base_var_group.c +++ b/opal/mca/base/mca_base_var_group.c @@ -6,7 +6,7 @@ * Copyright (c) 2004-2012 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. @@ -14,9 +14,9 @@ * Copyright (c) 2012-2015 Los Alamos National Security, LLC. All rights * reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -218,6 +218,12 @@ static int group_register (const char *project_name, const char *framework_name, return -1; } + /* avoid groups of the form opal_opal, ompi_ompi, etc */ + if (NULL != project_name && NULL != framework_name && + (0 == strcmp (project_name, framework_name))) { + project_name = NULL; + } + group_id = group_find (project_name, framework_name, component_name, true); if (0 <= group_id) { ret = mca_base_var_group_get_internal (group_id, &group, true); @@ -274,12 +280,6 @@ static int group_register (const char *project_name, const char *framework_name, } } - /* avoid groups of the form opal_opal, ompi_ompi, etc */ - if (NULL != project_name && NULL != framework_name && - (0 == strcmp (project_name, framework_name))) { - project_name = NULL; - } - /* build the group name */ ret = mca_base_var_generate_full_name4 (NULL, project_name, framework_name, component_name, &group->group_full_name); diff --git a/opal/mca/base/mca_base_var_group.h b/opal/mca/base/mca_base_var_group.h index b33bc25be62..8217b476d88 100644 --- a/opal/mca/base/mca_base_var_group.h +++ b/opal/mca/base/mca_base_var_group.h @@ -6,7 +6,7 @@ * Copyright (c) 2004-2006 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. @@ -14,9 +14,9 @@ * Copyright (c) 2012-2013 Los Alamos National Security, LLC. All rights * reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -165,7 +165,7 @@ OPAL_DECLSPEC int mca_base_var_group_get_count (void); /** * Get a relative timestamp for the MCA group system * - * @retval stamp + * @retval stamp * * This value will change if groups or variables are either added or removed. */ diff --git a/opal/mca/base/mca_base_vari.h b/opal/mca/base/mca_base_vari.h index 0cb9fcd8e90..25dc108ab95 100644 --- a/opal/mca/base/mca_base_vari.h +++ b/opal/mca/base/mca_base_vari.h @@ -6,7 +6,7 @@ * Copyright (c) 2004-2006 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. @@ -14,14 +14,14 @@ * Copyright (c) 2012-2013 Los Alamos National Security, LLC. All rights * reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ -/** - * @file +/** + * @file * * This is the private declarations for the MCA variable system. * This file is internal to the MCA variable system and should not @@ -81,7 +81,7 @@ extern bool mca_base_var_initialized; struct mca_base_var_file_value_t { /** Allow this to be an OPAL OBJ */ opal_list_item_t super; - + /** Parameter name */ char *mbvfv_var; /** Parameter value */ @@ -169,5 +169,5 @@ OPAL_DECLSPEC int mca_base_pvar_init (void); OPAL_DECLSPEC int mca_base_pvar_finalize (void); END_C_DECLS - + #endif /* OPAL_MCA_BASE_VAR_INTERNAL_H */ diff --git a/opal/mca/btl/Makefile.am b/opal/mca/btl/Makefile.am index 976dab28d09..a235089a8f5 100644 --- a/opal/mca/btl/Makefile.am +++ b/opal/mca/btl/Makefile.am @@ -5,15 +5,15 @@ # Copyright (c) 2004-2005 The University of Tennessee and The University # of Tennessee Research Foundation. All rights # reserved. -# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, # University of Stuttgart. All rights reserved. # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. # Copyright (c) 2010 Cisco Systems, Inc. All rights reserved. # $COPYRIGHT$ -# +# # Additional copyrights may follow -# +# # $HEADER$ # diff --git a/opal/mca/btl/base/Makefile.am b/opal/mca/btl/base/Makefile.am index 26de26c1ae2..9284793c0ba 100644 --- a/opal/mca/btl/base/Makefile.am +++ b/opal/mca/btl/base/Makefile.am @@ -5,14 +5,14 @@ # Copyright (c) 2004-2005 The University of Tennessee and The University # of Tennessee Research Foundation. All rights # reserved. -# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, # University of Stuttgart. All rights reserved. # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. # $COPYRIGHT$ -# +# # Additional copyrights may follow -# +# # $HEADER$ # diff --git a/opal/mca/btl/base/base.h b/opal/mca/btl/base/base.h index f2d3db463e6..c66f0c81bbd 100644 --- a/opal/mca/btl/base/base.h +++ b/opal/mca/btl/base/base.h @@ -1,3 +1,4 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ /* * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana * University Research and Technology @@ -5,16 +6,18 @@ * Copyright (c) 2004-2006 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2006 Sun Microsystems, Inc. All rights reserved. * Copyright (c) 2008 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015-2016 Los Alamos National Security, LLC. All rights + * reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ /** @@ -26,7 +29,9 @@ #include "opal_config.h" #include "opal/class/opal_list.h" #include "opal/mca/mca.h" +#include "opal/mca/base/mca_base_framework.h" #include "opal/mca/btl/btl.h" +#include "opal/mca/base/mca_base_var_enum.h" BEGIN_C_DECLS @@ -38,17 +43,17 @@ struct mca_btl_base_selected_module_t { typedef struct mca_btl_base_selected_module_t mca_btl_base_selected_module_t; -/* holds the recv call back function to be called by the btl on - * a receive. - */ -struct mca_btl_base_recv_reg_t { - mca_btl_base_module_recv_cb_fn_t cbfunc; - void* cbdata; -}; -typedef struct mca_btl_base_recv_reg_t mca_btl_base_recv_reg_t; +/* holds the recv call back function to be called by the btl on + * a receive. + */ +struct mca_btl_base_recv_reg_t { + mca_btl_base_module_recv_cb_fn_t cbfunc; + void* cbdata; +}; +typedef struct mca_btl_base_recv_reg_t mca_btl_base_recv_reg_t; -OPAL_DECLSPEC OBJ_CLASS_DECLARATION(mca_btl_base_selected_module_t); +OPAL_DECLSPEC OBJ_CLASS_DECLARATION(mca_btl_base_selected_module_t); /* * Global functions for MCA: overall BTL open and close @@ -75,6 +80,9 @@ OPAL_DECLSPEC extern bool mca_btl_base_thread_multiple_override; OPAL_DECLSPEC extern mca_base_framework_t opal_btl_base_framework; +extern mca_base_var_enum_flag_t *mca_btl_base_flag_enum; +extern mca_base_var_enum_flag_t *mca_btl_base_atomic_enum; + END_C_DECLS - + #endif /* MCA_BTL_BASE_H */ diff --git a/opal/mca/btl/base/btl_base_error.c b/opal/mca/btl/base/btl_base_error.c index ec437b1afa5..9b525c32390 100644 --- a/opal/mca/btl/base/btl_base_error.c +++ b/opal/mca/btl/base/btl_base_error.c @@ -5,7 +5,7 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. @@ -14,9 +14,9 @@ * All rights reserved. * Copyright (c) 2014 Intel, Inc. All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -57,7 +57,7 @@ int mca_btl_base_out(const char* fmt, ...) } -void mca_btl_base_error_no_nics(const char* transport, +void mca_btl_base_error_no_nics(const char* transport, const char* nic_name) { char *procid; diff --git a/opal/mca/btl/base/btl_base_error.h b/opal/mca/btl/base/btl_base_error.h index 58ac9378cf7..fb18eb8a055 100644 --- a/opal/mca/btl/base/btl_base_error.h +++ b/opal/mca/btl/base/btl_base_error.h @@ -5,7 +5,7 @@ * Copyright (c) 2004-2011 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. @@ -15,9 +15,9 @@ * All rights reserved. * Copyright (c) 2013-2014 Intel, Inc. All rights reserved * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -83,9 +83,9 @@ OPAL_DECLSPEC extern int mca_btl_base_out(const char*, ...) __opal_attribute_for mca_btl_base_err args; \ mca_btl_base_err("\n"); \ } \ - } while(0); + } while(0); #else -#define BTL_VERBOSE(args) +#define BTL_VERBOSE(args) #endif #endif @@ -93,7 +93,7 @@ OPAL_DECLSPEC extern int mca_btl_base_out(const char*, ...) __opal_attribute_for BEGIN_C_DECLS -OPAL_DECLSPEC extern void mca_btl_base_error_no_nics(const char* transport, +OPAL_DECLSPEC extern void mca_btl_base_error_no_nics(const char* transport, const char* nic_name); END_C_DECLS diff --git a/opal/mca/btl/base/btl_base_frame.c b/opal/mca/btl/base/btl_base_frame.c index 4e7d6a334c7..f5f15c86544 100644 --- a/opal/mca/btl/base/btl_base_frame.c +++ b/opal/mca/btl/base/btl_base_frame.c @@ -1,3 +1,4 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ /* * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana * University Research and Technology @@ -5,7 +6,7 @@ * Copyright (c) 2004-2007 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. @@ -13,10 +14,12 @@ * Copyright (c) 2008-2013 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. + * Copyright (c) 2016 Los Alamos National Security, LLC. All rights + * reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -30,6 +33,45 @@ #include "opal/mca/btl/btl.h" #include "opal/mca/btl/base/base.h" +mca_base_var_enum_flag_t *mca_btl_base_flag_enum = NULL; +mca_base_var_enum_flag_t *mca_btl_base_atomic_enum = NULL; + +mca_base_var_enum_value_flag_t mca_btl_base_flag_enum_flags[] = { + {MCA_BTL_FLAGS_SEND, "send", 0}, + {MCA_BTL_FLAGS_PUT, "put", 0}, + {MCA_BTL_FLAGS_GET, "get", 0}, + {MCA_BTL_FLAGS_SEND_INPLACE, "inplace", 0}, + {MCA_BTL_FLAGS_SIGNALED, "signaled", 0}, + {MCA_BTL_FLAGS_ATOMIC_OPS, "atomics", 0}, + {MCA_BTL_FLAGS_ATOMIC_FOPS, "fetching-atomics", 0}, + {MCA_BTL_FLAGS_SINGLE_ADD_PROCS, "static", 0}, + {MCA_BTL_FLAGS_CUDA_PUT, "cuda-put", 0}, + {MCA_BTL_FLAGS_CUDA_GET, "cuda-get", 0}, + {MCA_BTL_FLAGS_CUDA_COPY_ASYNC_SEND, "cuda-async-send", 0}, + {MCA_BTL_FLAGS_CUDA_COPY_ASYNC_RECV, "cuda-async-recv", 0}, + {MCA_BTL_FLAGS_FAILOVER_SUPPORT, "failover", 0}, + {MCA_BTL_FLAGS_NEED_ACK, "need-ack", 0}, + {MCA_BTL_FLAGS_NEED_CSUM, "need-csum", 0}, + {MCA_BTL_FLAGS_HETEROGENEOUS_RDMA, "hetero-rdma", 0}, + {0, NULL, 0} +}; + +mca_base_var_enum_value_flag_t mca_btl_base_atomic_enum_flags[] = { + {MCA_BTL_ATOMIC_SUPPORTS_ADD, "add", 0}, + {MCA_BTL_ATOMIC_SUPPORTS_AND, "and", 0}, + {MCA_BTL_ATOMIC_SUPPORTS_OR, "or", 0}, + {MCA_BTL_ATOMIC_SUPPORTS_XOR, "xor", 0}, + {MCA_BTL_ATOMIC_SUPPORTS_LAND, "land", 0}, + {MCA_BTL_ATOMIC_SUPPORTS_LOR, "lor", 0}, + {MCA_BTL_ATOMIC_SUPPORTS_LXOR, "lxor", 0}, + {MCA_BTL_ATOMIC_SUPPORTS_SWAP, "swap", 0}, + {MCA_BTL_ATOMIC_SUPPORTS_MIN, "min", 0}, + {MCA_BTL_ATOMIC_SUPPORTS_MAX, "max", 0}, + {MCA_BTL_ATOMIC_SUPPORTS_CSWAP, "compare-and-swap", 0}, + {MCA_BTL_ATOMIC_SUPPORTS_GLOB, "global"}, + {0, NULL, 0} +}; + mca_btl_active_message_callback_t mca_btl_base_active_message_trigger[MCA_BTL_TAG_MAX] = {{0}}; /* @@ -104,6 +146,9 @@ static int mca_btl_base_register(mca_base_register_flag_t flags) MCA_BASE_VAR_SCOPE_READONLY, &mca_btl_base_warn_component_unused); + (void) mca_base_var_enum_create_flag ("btl_flags", mca_btl_base_flag_enum_flags, &mca_btl_base_flag_enum); + (void) mca_base_var_enum_create_flag ("btl_atomic_flags", mca_btl_base_atomic_enum_flags, &mca_btl_base_atomic_enum); + return OPAL_SUCCESS; } @@ -116,8 +161,8 @@ static int mca_btl_base_open(mca_base_open_flag_t flags) int ret; /* Open up all available components */ - - if (OPAL_SUCCESS != + + if (OPAL_SUCCESS != (ret = mca_base_framework_components_open(&opal_btl_base_framework, flags))) { return ret; } @@ -159,6 +204,14 @@ static int mca_btl_base_close(void) OBJ_DESTRUCT(&mca_btl_base_modules_initialized); + if (mca_btl_base_flag_enum) { + OBJ_RELEASE(mca_btl_base_flag_enum); + } + + if (mca_btl_base_atomic_enum) { + OBJ_RELEASE(mca_btl_base_atomic_enum); + } + #if 0 /* restore event processing */ opal_event_enable(); diff --git a/opal/mca/btl/base/btl_base_mca.c b/opal/mca/btl/base/btl_base_mca.c index 5c14a32aaf0..7988096f957 100644 --- a/opal/mca/btl/base/btl_base_mca.c +++ b/opal/mca/btl/base/btl_base_mca.c @@ -6,7 +6,7 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. @@ -14,11 +14,13 @@ * Copyright (c) 2007 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2010 Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2013 NVIDIA Corporation. All rights reserved. + * Copyright (c) 2016 Los Alamos National Security, LLC. All rights + * reserved. * * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -28,14 +30,13 @@ #include "opal/util/output.h" #include "opal/constants.h" +#include "opal/mca/base/mca_base_var.h" #include "opal/mca/btl/btl.h" #include "opal/mca/btl/base/base.h" int mca_btl_base_param_register(mca_base_component_t *version, mca_btl_base_module_t *module) { - char *msg; - /* If this is ever triggered change the uint32_ts in mca_btl_base_module_t to unsigned ints */ assert(sizeof(unsigned int) == sizeof(uint32_t)); @@ -46,33 +47,14 @@ int mca_btl_base_param_register(mca_base_component_t *version, MCA_BASE_VAR_SCOPE_READONLY, &module->btl_exclusivity); - asprintf(&msg, "BTL bit flags (general flags: SEND=%d, PUT=%d, GET=%d, SEND_INPLACE=%d, HETEROGENEOUS_RDMA=%d, " - "ATOMIC_OPS=%d; flags only used by the \"dr\" PML (ignored by others): ACK=%d, CHECKSUM=%d, " - "RDMA_COMPLETION=%d; flags only used by the \"bfo\" PML (ignored by others): FAILOVER_SUPPORT=%d)", - MCA_BTL_FLAGS_SEND, - MCA_BTL_FLAGS_PUT, - MCA_BTL_FLAGS_GET, - MCA_BTL_FLAGS_SEND_INPLACE, - MCA_BTL_FLAGS_HETEROGENEOUS_RDMA, - MCA_BTL_FLAGS_ATOMIC_OPS, - MCA_BTL_FLAGS_NEED_ACK, - MCA_BTL_FLAGS_NEED_CSUM, - MCA_BTL_FLAGS_RDMA_COMPLETION, - MCA_BTL_FLAGS_FAILOVER_SUPPORT); - (void) mca_base_component_var_register(version, "flags", msg, - MCA_BASE_VAR_TYPE_UNSIGNED_INT, NULL, 0, 0, - OPAL_INFO_LVL_5, - MCA_BASE_VAR_SCOPE_READONLY, - &module->btl_flags); - free(msg); - - asprintf (&msg, "BTL atomic bit flags (general flags: ADD=%d, AND=%d, OR=%d, XOR=%d", - MCA_BTL_ATOMIC_SUPPORTS_ADD, MCA_BTL_ATOMIC_SUPPORTS_AND, MCA_BTL_ATOMIC_SUPPORTS_OR, - MCA_BTL_ATOMIC_SUPPORTS_XOR); - (void) mca_base_component_var_register(version, "atomic_flags", msg, MCA_BASE_VAR_TYPE_UNSIGNED_INT, - NULL, 0, MCA_BASE_VAR_FLAG_DEFAULT_ONLY, OPAL_INFO_LVL_5, + (void) mca_base_component_var_register(version, "flags", "BTL bit flags (general flags: send, put, get, in-place, hetero-rdma, " + "atomics, fetching-atomics)", MCA_BASE_VAR_TYPE_UNSIGNED_INT, + &mca_btl_base_flag_enum->super, 0, 0, OPAL_INFO_LVL_5, + MCA_BASE_VAR_SCOPE_READONLY, &module->btl_flags); + + (void) mca_base_component_var_register(version, "atomic_flags", "BTL atomic support flags", MCA_BASE_VAR_TYPE_UNSIGNED_INT, + &mca_btl_base_atomic_enum->super, 0, MCA_BASE_VAR_FLAG_DEFAULT_ONLY, OPAL_INFO_LVL_5, MCA_BASE_VAR_SCOPE_CONSTANT, &module->btl_atomic_flags); - free(msg); (void) mca_base_component_var_register(version, "rndv_eager_limit", "Size (in bytes, including header) of \"phase 1\" fragment sent for all large messages (must be >= 0 and <= eager_limit)", MCA_BASE_VAR_TYPE_SIZE_T, NULL, 0, 0, @@ -135,6 +117,14 @@ int mca_btl_base_param_register(mca_base_component_t *version, MCA_BASE_VAR_SCOPE_READONLY, &module->btl_cuda_rdma_limit); #endif /* OPAL_CUDA_GDR_SUPPORT */ +#if OPAL_CUDA_SUPPORT + module->btl_cuda_max_send_size = 0; + (void) mca_base_component_var_register(version, "cuda_max_send_size", "Maximum size (in bytes) of a single GPU \"phase 2\" fragment of a long message when using the pipeline protocol (must be >= 1) (only valid on smcuda btl)", + MCA_BASE_VAR_TYPE_SIZE_T, NULL, 0, 0, + OPAL_INFO_LVL_4, + MCA_BASE_VAR_SCOPE_READONLY, + &module->btl_cuda_max_send_size); +#endif /* OPAL_CUDA_SUPPORT */ (void) mca_base_component_var_register(version, "max_send_size", "Maximum size (in bytes) of a single \"phase 2\" fragment of a long message when using the pipeline protocol (must be >= 1)", MCA_BASE_VAR_TYPE_SIZE_T, NULL, 0, 0, @@ -179,9 +169,9 @@ int mca_btl_base_param_register(mca_base_component_t *version, /* Verify btl parameters make sense */ int mca_btl_base_param_verify(mca_btl_base_module_t *module) { - if (module->btl_min_rdma_pipeline_size < + if (module->btl_min_rdma_pipeline_size < (module->btl_eager_limit + module->btl_rdma_pipeline_send_length)) { - module->btl_min_rdma_pipeline_size = + module->btl_min_rdma_pipeline_size = module->btl_eager_limit + module->btl_rdma_pipeline_send_length; } diff --git a/opal/mca/btl/base/btl_base_select.c b/opal/mca/btl/base/btl_base_select.c index 6783a5cda8d..642901fecdb 100644 --- a/opal/mca/btl/base/btl_base_select.c +++ b/opal/mca/btl/base/btl_base_select.c @@ -6,7 +6,7 @@ * Copyright (c) 2004-2007 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. @@ -17,9 +17,9 @@ * and Technology (RIST). All rights reserved. * Copyright (c) 2014 Cisco Systems, Inc. All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -69,7 +69,7 @@ int mca_btl_base_select(bool enable_progress_threads, /* if there is an include list - item must be in the list to be included */ if ( NULL != include ) { - char** argv = include; + char** argv = include; bool found = false; while(argv && *argv) { if(strcmp(component->btl_version.mca_component_name,*argv) == 0) { @@ -84,7 +84,7 @@ int mca_btl_base_select(bool enable_progress_threads, /* otherwise - check the exclude list to see if this item has been specifically excluded */ } else if ( NULL != exclude ) { - char** argv = exclude; + char** argv = exclude; bool found = false; while(argv && *argv) { if(strcmp(component->btl_version.mca_component_name,*argv) == 0) { @@ -98,7 +98,7 @@ int mca_btl_base_select(bool enable_progress_threads, } } - opal_output_verbose(10, opal_btl_base_framework.framework_output, + opal_output_verbose(10, opal_btl_base_framework.framework_output, "select: initializing %s component %s", component->btl_version.mca_type_name, component->btl_version.mca_component_name); @@ -122,7 +122,7 @@ int mca_btl_base_select(bool enable_progress_threads, OBJ_RELEASE(cli); mca_base_component_close((mca_base_component_t *) component, opal_btl_base_framework.framework_output); - } + } /* Otherwise, if it initialized properly, save it. */ diff --git a/opal/mca/btl/base/help-mpi-btl-base.txt b/opal/mca/btl/base/help-mpi-btl-base.txt index 91a3e4146e5..5531b1dd3ec 100644 --- a/opal/mca/btl/base/help-mpi-btl-base.txt +++ b/opal/mca/btl/base/help-mpi-btl-base.txt @@ -6,15 +6,15 @@ # Copyright (c) 2004-2005 The University of Tennessee and The University # of Tennessee Research Foundation. All rights # reserved. -# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, # University of Stuttgart. All rights reserved. # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. -# Copyright (c) 2008 Cisco Systems, Inc. All rights reserved. +# Copyright (c) 2008-2017 Cisco Systems, Inc. All rights reserved # $COPYRIGHT$ -# +# # Additional copyrights may follow -# +# # $HEADER$ # [btl:no-nics] @@ -26,3 +26,6 @@ Module: %s Another transport will be used instead, although this may result in lower performance. + +NOTE: You can disable this warning by setting the MCA parameter +btl_base_warn_component_unused to 0. diff --git a/opal/mca/btl/base/owner.txt b/opal/mca/btl/base/owner.txt index 5994893ef5a..bf08d5fcc99 100644 --- a/opal/mca/btl/base/owner.txt +++ b/opal/mca/btl/base/owner.txt @@ -1,5 +1,5 @@ # -# owner/status file +# owner/status file # owner: institution that is responsible for this package # status: e.g. active, maintenance, unmaintained # diff --git a/opal/mca/btl/btl.h b/opal/mca/btl/btl.h index b41e54b353e..19af3630084 100644 --- a/opal/mca/btl/btl.h +++ b/opal/mca/btl/btl.h @@ -3,14 +3,14 @@ * Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. - * Copyright (c) 2004-2008 The University of Tennessee and The University + * Copyright (c) 2004-2016 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. - * Copyright (c) 2006-2015 Los Alamos National Security, LLC. All rights + * Copyright (c) 2006-2016 Los Alamos National Security, LLC. All rights * reserved. * Copyright (c) 2010 Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2012-2013 NVIDIA Corporation. All rights reserved. @@ -122,8 +122,7 @@ #include "opal/datatype/opal_convertor.h" #include "opal/mca/mca.h" #include "opal/mca/mpool/mpool.h" -#include "opal/mca/crs/crs.h" -#include "opal/mca/crs/base/base.h" +#include "opal/mca/rcache/rcache.h" BEGIN_C_DECLS @@ -134,7 +133,6 @@ BEGIN_C_DECLS struct mca_btl_base_module_t; struct mca_btl_base_endpoint_t; struct mca_btl_base_descriptor_t; -struct mca_mpool_base_resources_t; struct opal_proc_t; /** @@ -231,12 +229,18 @@ typedef uint8_t mca_btl_base_tag_t; */ #define MCA_BTL_FLAGS_SIGNALED 0x4000 - /** The BTL supports network atomic operations */ #define MCA_BTL_FLAGS_ATOMIC_OPS 0x08000 /** The BTL supports fetching network atomic operations */ #define MCA_BTL_FLAGS_ATOMIC_FOPS 0x10000 +/** The BTL requires add_procs to be with all procs including non-local. Shared-memory + * BTLs should not set this flag. */ +#define MCA_BTL_FLAGS_SINGLE_ADD_PROCS 0x20000 + +/* The BTL is using progress thread and need the protection on matching */ +#define MCA_BTL_FLAGS_BTL_PROGRESS_THREAD_ENABLED 0x40000 + /* Default exclusivity levels */ #define MCA_BTL_EXCLUSIVITY_HIGH (64*1024) /* internal loopback */ #define MCA_BTL_EXCLUSIVITY_DEFAULT 1024 /* GM/IB/etc. */ @@ -247,28 +251,29 @@ typedef uint8_t mca_btl_base_tag_t; #define MCA_BTL_ERROR_FLAGS_NONFATAL 0x2 #define MCA_BTL_ERROR_FLAGS_ADD_CUDA_IPC 0x4 -/** registration flags */ +/** registration flags. the access flags are a 1-1 mapping with the mpool + * access flags. */ enum { /** Allow local write on the registered region. If a region is registered * with this flag the registration can be used as the local handle for a * btl_get operation. */ - MCA_BTL_REG_FLAG_LOCAL_WRITE = 0x00000001, + MCA_BTL_REG_FLAG_LOCAL_WRITE = MCA_RCACHE_ACCESS_LOCAL_WRITE, /** Allow remote read on the registered region. If a region is registered * with this flag the registration can be used as the remote handle for a * btl_get operation. */ - MCA_BTL_REG_FLAG_REMOTE_READ = 0x00000002, + MCA_BTL_REG_FLAG_REMOTE_READ = MCA_RCACHE_ACCESS_REMOTE_READ, /** Allow remote write on the registered region. If a region is registered * with this flag the registration can be used as the remote handle for a * btl_put operation. */ - MCA_BTL_REG_FLAG_REMOTE_WRITE = 0x00000004, + MCA_BTL_REG_FLAG_REMOTE_WRITE = MCA_RCACHE_ACCESS_REMOTE_WRITE, /** Allow remote atomic operations on the registered region. If a region is * registered with this flag the registration can be used as the remote * handle for a btl_atomic_op or btl_atomic_fop operation. */ - MCA_BTL_REG_FLAG_REMOTE_ATOMIC = 0x00000008, + MCA_BTL_REG_FLAG_REMOTE_ATOMIC = MCA_RCACHE_ACCESS_REMOTE_ATOMIC, /** Allow any btl operation on the registered region. If a region is registered * with this flag the registration can be used as the local or remote handle for * any btl operation. */ - MCA_BTL_REG_FLAG_ACCESS_ANY = 0x0000000f, + MCA_BTL_REG_FLAG_ACCESS_ANY = MCA_RCACHE_ACCESS_ANY, #if OPAL_CUDA_GDR_SUPPORT /** Region is in GPU memory */ MCA_BTL_REG_FLAG_CUDA_GPU_MEM = 0x00010000, @@ -285,12 +290,45 @@ enum { MCA_BTL_ATOMIC_SUPPORTS_OR = 0x00000400, /** The btl supports atomic bitwise exclusive or */ MCA_BTL_ATOMIC_SUPPORTS_XOR = 0x00000800, + + /** The btl supports logical and */ + MCA_BTL_ATOMIC_SUPPORTS_LAND = 0x00001000, + /** The btl supports logical or */ + MCA_BTL_ATOMIC_SUPPORTS_LOR = 0x00002000, + /** The btl supports logical exclusive or */ + MCA_BTL_ATOMIC_SUPPORTS_LXOR = 0x00004000, + + /** The btl supports atomic swap */ + MCA_BTL_ATOMIC_SUPPORTS_SWAP = 0x00010000, + + /** The btl supports atomic min */ + MCA_BTL_ATOMIC_SUPPORTS_MIN = 0x00100000, + /** The btl supports atomic min */ + MCA_BTL_ATOMIC_SUPPORTS_MAX = 0x00200000, + + /** The btl supports 32-bit integer operations. Keep in mind the btl may + * support only a subset of the available atomics. */ + MCA_BTL_ATOMIC_SUPPORTS_32BIT = 0x01000000, + + /** The btl supports floating-point operations. Keep in mind the btl may + * support only a subset of the available atomics and may not support + * both 64 or 32-bit floating point. */ + MCA_BTL_ATOMIC_SUPPORTS_FLOAT = 0x02000000, + /** The btl supports atomic compare-and-swap */ MCA_BTL_ATOMIC_SUPPORTS_CSWAP = 0x10000000, + /** The btl guarantees global atomicity (can mix btl atomics with cpu atomics) */ MCA_BTL_ATOMIC_SUPPORTS_GLOB = 0x20000000, }; +enum { + /** Use 32-bit atomics */ + MCA_BTL_ATOMIC_FLAG_32BIT = 0x00000001, + /** Use floating-point atomics */ + MCA_BTL_ATOMIC_FLAG_FLOAT = 0x00000002, +}; + enum mca_btl_base_atomic_op_t { /** Atomic add: (*remote_address) = (*remote_address) + operand */ MCA_BTL_ATOMIC_ADD = 0x0001, @@ -300,6 +338,20 @@ enum mca_btl_base_atomic_op_t { MCA_BTL_ATOMIC_OR = 0x0012, /** Atomic xor: (*remote_address) = (*remote_address) ^ operand */ MCA_BTL_ATOMIC_XOR = 0x0014, + /** Atomic logical and: (*remote_address) = (*remote_address) && operand */ + MCA_BTL_ATOMIC_LAND = 0x0015, + /** Atomic logical or: (*remote_address) = (*remote_address) || operand */ + MCA_BTL_ATOMIC_LOR = 0x0016, + /** Atomic logical xor: (*remote_address) = (*remote_address) != operand */ + MCA_BTL_ATOMIC_LXOR = 0x0017, + /** Atomic swap: (*remote_address) = operand */ + MCA_BTL_ATOMIC_SWAP = 0x001a, + /** Atomic min */ + MCA_BTL_ATOMIC_MIN = 0x0020, + /** Atomic max */ + MCA_BTL_ATOMIC_MAX = 0x0021, + + MCA_BTL_ATOMIC_LAST, }; typedef enum mca_btl_base_atomic_op_t mca_btl_base_atomic_op_t; @@ -605,12 +657,15 @@ typedef int (*mca_btl_base_module_finalize_fn_t)( * modex_recv() function. The BTL may utilize this information to * determine reachability of each peer process. * - * For each process that is reachable by the BTL, the bit corresponding to the index - * into the proc array (nprocs) should be set in the reachable bitmask. The BTL - * will return an array of pointers to a data structure defined - * by the BTL that is then returned to the BTL on subsequent calls to the BTL data - * transfer functions (e.g btl_send). This may be used by the BTL to cache any addressing - * or connection information (e.g. TCP socket, IB queue pair). + * The caller may pass a "reachable" bitmap pointer. If it is not + * NULL, for each process that is reachable by the BTL, the bit + * corresponding to the index into the proc array (nprocs) should be + * set in the reachable bitmask. The BTL will return an array of + * pointers to a data structure defined by the BTL that is then + * returned to the BTL on subsequent calls to the BTL data transfer + * functions (e.g btl_send). This may be used by the BTL to cache any + * addressing or connection information (e.g. TCP socket, IB queue + * pair). */ typedef int (*mca_btl_base_module_add_procs_fn_t)( struct mca_btl_base_module_t* btl, @@ -966,7 +1021,7 @@ typedef int (*mca_btl_base_module_get_fn_t) (struct mca_btl_base_module_t *btl, * (remote_address, remote_address + 8) * @param op (IN) Operation to perform * @param operand (IN) Operand for the operation - * @param flags (IN) Flags for this put operation + * @param flags (IN) Flags for this atomic operation * @param order (IN) Ordering * @param cbfunc (IN) Function to call on completion (if queued) * @param cbcontext (IN) Context for the callback @@ -1010,7 +1065,7 @@ typedef int (*mca_btl_base_module_atomic_op64_fn_t) (struct mca_btl_base_module_ * (remote_address, remote_address + 8) * @param op (IN) Operation to perform * @param operand (IN) Operand for the operation - * @param flags (IN) Flags for this put operation + * @param flags (IN) Flags for this atomic operation * @param order (IN) Ordering * @param cbfunc (IN) Function to call on completion (if queued) * @param cbcontext (IN) Context for the callback @@ -1056,7 +1111,7 @@ typedef int (*mca_btl_base_module_atomic_fop64_fn_t) (struct mca_btl_base_module * (remote_address, remote_address + 8) * @param compare (IN) Operand for the operation * @param value (IN) Value to store on success - * @param flags (IN) Flags for this put operation + * @param flags (IN) Flags for this atomic operation * @param order (IN) Ordering * @param cbfunc (IN) Function to call on completion (if queued) * @param cbcontext (IN) Context for the callback @@ -1170,6 +1225,9 @@ struct mca_btl_base_module_t { size_t btl_cuda_eager_limit; /**< switch from eager to RDMA */ size_t btl_cuda_rdma_limit; /**< switch from RDMA to rndv pipeline */ #endif /* OPAL_CUDA_GDR_SUPPORT */ +#if OPAL_CUDA_SUPPORT + size_t btl_cuda_max_send_size; /**< set if CUDA max send_size is different from host max send size */ +#endif /* OPAL_CUDA_SUPPORT */ }; typedef struct mca_btl_base_module_t mca_btl_base_module_t; diff --git a/opal/mca/btl/openib/Makefile.am b/opal/mca/btl/openib/Makefile.am index 8ec0d4398ff..9c4237085a5 100644 --- a/opal/mca/btl/openib/Makefile.am +++ b/opal/mca/btl/openib/Makefile.am @@ -55,8 +55,6 @@ sources = \ btl_openib_async.h \ btl_openib_xrc.c \ btl_openib_xrc.h \ - btl_openib_fd.h \ - btl_openib_fd.c \ btl_openib_ip.h \ btl_openib_ip.c \ btl_openib_put.c \ @@ -68,13 +66,6 @@ sources = \ connect/btl_openib_connect_empty.h \ connect/connect.h -# If we have failover support, build that file -if MCA_btl_openib_enable_failover -sources += \ - btl_openib_failover.c \ - btl_openib_failover.h -endif - # If we have rdmacm support, build that CPC if MCA_btl_openib_have_rdmacm sources += \ diff --git a/opal/mca/btl/openib/btl_openib.c b/opal/mca/btl/openib/btl_openib.c index 53e22100aad..ddc9720acb3 100644 --- a/opal/mca/btl/openib/btl_openib.c +++ b/opal/mca/btl/openib/btl_openib.c @@ -10,14 +10,14 @@ * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. - * Copyright (c) 2007-2013 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2006-2009 Mellanox Technologies. All rights reserved. - * Copyright (c) 2006-2015 Los Alamos National Security, LLC. All rights + * Copyright (c) 2007-2017 Cisco Systems, Inc. All rights reserved + * Copyright (c) 2006-2015 Mellanox Technologies. All rights reserved. + * Copyright (c) 2006-2016 Los Alamos National Security, LLC. All rights * reserved. * Copyright (c) 2006-2007 Voltaire All rights reserved. * Copyright (c) 2008-2012 Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2009 IBM Corporation. All rights reserved. - * Copyright (c) 2013-2014 Intel, Inc. All rights reserved + * Copyright (c) 2013-2015 Intel, Inc. All rights reserved * Copyright (c) 2013-2015 NVIDIA Corporation. All rights reserved. * Copyright (c) 2014-2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. @@ -41,10 +41,6 @@ #include "opal/mca/btl/btl.h" #include "opal/mca/btl/base/btl_base_error.h" -#if OPAL_ENABLE_FT_CR == 1 -#include "opal/runtime/opal_cr.h" -#endif - #include "btl_openib_ini.h" #include "btl_openib.h" @@ -57,7 +53,7 @@ #include "opal/datatype/opal_convertor.h" #include "opal/mca/mpool/base/base.h" #include "opal/mca/mpool/mpool.h" -#include "opal/mca/mpool/grdma/mpool_grdma.h" +#include "opal/mca/rcache/rcache.h" #if OPAL_CUDA_SUPPORT #include "opal/datatype/opal_datatype_cuda.h" @@ -84,9 +80,7 @@ #ifdef HAVE_UNISTD_H #include #endif -#ifdef OPAL_HAVE_HWLOC -#include "opal/mca/hwloc/hwloc.h" -#endif +#include "opal/mca/hwloc/hwloc-internal.h" #ifndef MIN #define MIN(a,b) ((a)<(b)?(a):(b)) @@ -113,7 +107,7 @@ mca_btl_openib_module_t mca_btl_openib_module = { .btl_get = mca_btl_openib_get, .btl_dump = mca_btl_base_dump, .btl_register_error = mca_btl_openib_register_error_cb, /* error call back registration */ - .btl_ft_event = mca_btl_openib_ft_event, + .btl_ft_event = NULL, .btl_register_mem = mca_btl_openib_register_mem, .btl_deregister_mem = mca_btl_openib_deregister_mem, #if HAVE_DECL_IBV_ATOMIC_HCA @@ -212,7 +206,6 @@ static int adjust_cq(mca_btl_openib_device_t *device, const int cq) return OPAL_ERROR; } - OPAL_THREAD_LOCK(&device->device_lock); if (!device->progress) { int rc; device->progress = true; @@ -221,7 +214,6 @@ static int adjust_cq(mca_btl_openib_device_t *device, const int cq) return rc; } } - OPAL_THREAD_UNLOCK(&device->device_lock); #endif } #ifdef HAVE_IBV_RESIZE_CQ @@ -358,8 +350,10 @@ static int create_srq(mca_btl_openib_module_t *openib_btl) } else #endif { + opal_mutex_lock(&openib_btl->device->device_lock); openib_btl->qps[qp].u.srq_qp.srq = ibv_create_srq(openib_btl->device->ib_pd, &attr); + opal_mutex_unlock(&openib_btl->device->device_lock); } if (NULL == openib_btl->qps[qp].u.srq_qp.srq) { mca_btl_openib_show_init_error(__FILE__, __LINE__, @@ -405,47 +399,73 @@ static int create_srq(mca_btl_openib_module_t *openib_btl) } } + openib_btl->srqs_created = true; + return OPAL_SUCCESS; } -static int mca_btl_openib_size_queues(struct mca_btl_openib_module_t* openib_btl, size_t nprocs) +static int openib_btl_prepare(struct mca_btl_openib_module_t* openib_btl) +{ + int rc = OPAL_SUCCESS; + opal_mutex_lock(&openib_btl->ib_lock); + if (!openib_btl->srqs_created && + (mca_btl_openib_component.num_srq_qps > 0 || + mca_btl_openib_component.num_xrc_qps > 0)) { + rc = create_srq(openib_btl); + } + opal_mutex_unlock(&openib_btl->ib_lock); + return rc; +} + + +static int openib_btl_size_queues(struct mca_btl_openib_module_t* openib_btl) { uint32_t send_cqes, recv_cqes; - int rc = OPAL_SUCCESS, qp; + int rc = OPAL_SUCCESS; mca_btl_openib_device_t *device = openib_btl->device; + uint32_t requested[BTL_OPENIB_MAX_CQ]; + + opal_mutex_lock(&openib_btl->ib_lock); + + for (int cq = 0 ; cq < BTL_OPENIB_MAX_CQ ; ++cq) { + requested[cq] = 0; + } /* figure out reasonable sizes for completion queues */ - for(qp = 0; qp < mca_btl_openib_component.num_qps; qp++) { - if(BTL_OPENIB_QP_TYPE_SRQ(qp)) { + for (int qp = 0 ; qp < mca_btl_openib_component.num_qps ; qp++) { + if (BTL_OPENIB_QP_TYPE_SRQ(qp)) { send_cqes = mca_btl_openib_component.qp_infos[qp].u.srq_qp.sd_max; recv_cqes = mca_btl_openib_component.qp_infos[qp].rd_num; } else { send_cqes = (mca_btl_openib_component.qp_infos[qp].rd_num + - mca_btl_openib_component.qp_infos[qp].u.pp_qp.rd_rsv) * nprocs; + mca_btl_openib_component.qp_infos[qp].u.pp_qp.rd_rsv) * openib_btl->num_peers; recv_cqes = send_cqes; } - openib_btl->device->cq_size[qp_cq_prio(qp)] += recv_cqes; - openib_btl->device->cq_size[BTL_OPENIB_LP_CQ] += send_cqes; - } - rc = adjust_cq(device, BTL_OPENIB_HP_CQ); - if (OPAL_SUCCESS != rc) { - goto out; + requested[qp_cq_prio(qp)] += recv_cqes; + requested[BTL_OPENIB_LP_CQ] += send_cqes; } - rc = adjust_cq(device, BTL_OPENIB_LP_CQ); - if (OPAL_SUCCESS != rc) { - goto out; - } + opal_mutex_lock (&openib_btl->device->device_lock); + for (int cq = 0 ; cq < BTL_OPENIB_MAX_CQ ; ++cq) { + if (requested[cq] < mca_btl_openib_component.ib_cq_size[cq]) { + requested[cq] = mca_btl_openib_component.ib_cq_size[cq]; + } else if (requested[cq] > (uint32_t) openib_btl->device->ib_dev_attr.max_cqe) { + requested[cq] = openib_btl->device->ib_dev_attr.max_cqe; + } - if (0 == openib_btl->num_peers && - (mca_btl_openib_component.num_srq_qps > 0 || - mca_btl_openib_component.num_xrc_qps > 0)) { - rc = create_srq(openib_btl); + if (openib_btl->device->cq_size[cq] < requested[cq]) { + openib_btl->device->cq_size[cq] = requested[cq]; + + rc = adjust_cq (device, cq); + if (OPAL_SUCCESS != rc) { + break; + } + } } + opal_mutex_unlock (&openib_btl->device->device_lock); + opal_mutex_unlock(&openib_btl->ib_lock); - openib_btl->num_peers += nprocs; -out: return rc; } @@ -608,10 +628,12 @@ static int mca_btl_openib_tune_endpoint(mca_btl_openib_module_t* openib_btl, static int prepare_device_for_use (mca_btl_openib_device_t *device) { mca_btl_openib_frag_init_data_t *init_data; - int rc, length; + int rc = OPAL_SUCCESS, length; + + opal_mutex_lock(&device->device_lock); if (device->ready_for_use) { - return OPAL_SUCCESS; + goto exit; } /* For each btl module that we made - find every @@ -632,7 +654,8 @@ static int prepare_device_for_use (mca_btl_openib_device_t *device) sizeof(mca_btl_openib_device_qp_t)); if (NULL == device->qps) { BTL_ERROR(("Failed malloc: %s:%d", __FILE__, __LINE__)); - return OPAL_ERR_OUT_OF_RESOURCE; + rc = OPAL_ERR_OUT_OF_RESOURCE; + goto exit; } for (int qp_index = 0 ; qp_index < mca_btl_openib_component.num_qps ; qp_index++) { @@ -640,28 +663,10 @@ static int prepare_device_for_use (mca_btl_openib_device_t *device) OBJ_CONSTRUCT(&device->qps[qp_index].recv_free, opal_free_list_t); } - if(mca_btl_openib_component.use_async_event_thread) { - mca_btl_openib_async_cmd_t async_command = {.a_cmd = OPENIB_ASYNC_CMD_FD_ADD, - .fd = device->ib_dev_context->async_fd, - .qp = NULL}; + device->got_fatal_event = false; + device->got_port_event = false; + mca_btl_openib_async_add_device (device); - /* start the async even thread if it is not already started */ - if (start_async_event_thread() != OPAL_SUCCESS) - return OPAL_ERROR; - - device->got_fatal_event = false; - device->got_port_event = false; - if (write(mca_btl_openib_component.async_pipe[1], - &async_command, sizeof(mca_btl_openib_async_cmd_t))<0){ - BTL_ERROR(("Failed to write to pipe [%d]",errno)); - return OPAL_ERROR; - } - /* wait for ok from thread */ - if (OPAL_SUCCESS != - btl_openib_async_command_done(device->ib_dev_context->async_fd)) { - return OPAL_ERROR; - } - } #if OPAL_ENABLE_PROGRESS_THREADS == 1 /* Prepare data for thread, but not starting it */ OBJ_CONSTRUCT(&device->thread, opal_thread_t); @@ -682,13 +687,15 @@ static int prepare_device_for_use (mca_btl_openib_device_t *device) mca_btl_openib_component.num_xrc_qps, ibv_get_device_name(device->ib_dev), opal_process_info.nodename); - return OPAL_ERROR; + rc = OPAL_ERROR; + goto exit; } if (MCA_BTL_XRC_ENABLED) { if (OPAL_SUCCESS != mca_btl_openib_open_xrc_domain(device)) { BTL_ERROR(("XRC Internal error. Failed to open xrc domain")); - return OPAL_ERROR; + rc = OPAL_ERROR; + goto exit; } } #endif @@ -703,7 +710,8 @@ static int prepare_device_for_use (mca_btl_openib_device_t *device) sizeof(mca_btl_openib_endpoint_t*)); if(NULL == device->eager_rdma_buffers) { BTL_ERROR(("Memory allocation fails")); - return OPAL_ERR_OUT_OF_RESOURCE; + rc = OPAL_ERR_OUT_OF_RESOURCE; + goto exit; } } @@ -716,7 +724,8 @@ static int prepare_device_for_use (mca_btl_openib_device_t *device) device->eager_rdma_buffers = NULL; } BTL_ERROR(("Memory allocation fails")); - return OPAL_ERR_OUT_OF_RESOURCE; + rc = OPAL_ERR_OUT_OF_RESOURCE; + goto exit; } length = sizeof(mca_btl_openib_header_t) + @@ -732,7 +741,7 @@ static int prepare_device_for_use (mca_btl_openib_device_t *device) mca_btl_openib_component.buffer_alignment, mca_btl_openib_component.ib_free_list_num, -1, mca_btl_openib_component.ib_free_list_inc, - device->mpool, 0, NULL, mca_btl_openib_frag_init, + device->mpool, 0, device->rcache, mca_btl_openib_frag_init, init_data); if (OPAL_SUCCESS != rc) { /* If we're "out of memory", this usually means that we ran @@ -744,7 +753,7 @@ static int prepare_device_for_use (mca_btl_openib_device_t *device) "opal_free_list_init", ibv_get_device_name(device->ib_dev)); } - return rc; + goto exit; } /* setup all the qps */ @@ -752,7 +761,8 @@ static int prepare_device_for_use (mca_btl_openib_device_t *device) init_data = (mca_btl_openib_frag_init_data_t *) malloc(sizeof(mca_btl_openib_frag_init_data_t)); if (NULL == init_data) { BTL_ERROR(("Memory allocation fails")); - return OPAL_ERR_OUT_OF_RESOURCE; + rc = OPAL_ERR_OUT_OF_RESOURCE; + goto exit; } /* Initialize pool of send fragments */ @@ -772,7 +782,7 @@ static int prepare_device_for_use (mca_btl_openib_device_t *device) mca_btl_openib_component.ib_free_list_num, mca_btl_openib_component.ib_free_list_max, mca_btl_openib_component.ib_free_list_inc, - device->mpool, 0, NULL, mca_btl_openib_frag_init, + device->mpool, 0, device->rcache, mca_btl_openib_frag_init, init_data); if (OPAL_SUCCESS != rc) { /* If we're "out of memory", this usually means that we @@ -785,7 +795,7 @@ static int prepare_device_for_use (mca_btl_openib_device_t *device) "opal_free_list_init", ibv_get_device_name(device->ib_dev)); } - return OPAL_ERROR; + goto exit; } init_data = (mca_btl_openib_frag_init_data_t *) malloc(sizeof(mca_btl_openib_frag_init_data_t)); @@ -805,17 +815,208 @@ static int prepare_device_for_use (mca_btl_openib_device_t *device) mca_btl_openib_component.ib_free_list_num, mca_btl_openib_component.ib_free_list_max, mca_btl_openib_component.ib_free_list_inc, - device->mpool, 0, NULL, mca_btl_openib_frag_init, + device->mpool, 0, device->rcache, mca_btl_openib_frag_init, init_data)) { - return OPAL_ERROR; + rc = OPAL_ERROR; + goto exit; } } device->ready_for_use = true; +exit: + opal_mutex_unlock(&device->device_lock); + return rc; +} + +static int init_ib_proc_nolock(mca_btl_openib_module_t* openib_btl, mca_btl_openib_proc_t* ib_proc, + volatile mca_btl_base_endpoint_t **endpoint_ptr, + int local_port_cnt, int btl_rank) +{ + int rem_port_cnt, matching_port = -1, j, rc; + mca_btl_base_endpoint_t *endpoint; + opal_btl_openib_connect_base_module_t *local_cpc; + opal_btl_openib_connect_base_module_data_t *remote_cpc_data; + + *endpoint_ptr = NULL; + + /* check if the remote proc has any ports that: + - on the same subnet as the local proc, and + - on that subnet, has a CPC in common with the local proc + */ + + rem_port_cnt = 0; + BTL_VERBOSE(("got %d port_infos ", ib_proc->proc_port_count)); + for (j = 0; j < (int) ib_proc->proc_port_count; j++){ + BTL_VERBOSE(("got a subnet %016" PRIx64, + ib_proc->proc_ports[j].pm_port_info.subnet_id)); + if (ib_proc->proc_ports[j].pm_port_info.subnet_id == + openib_btl->port_info.subnet_id) { + BTL_VERBOSE(("Got a matching subnet!")); + if (rem_port_cnt == btl_rank) { + matching_port = j; + } + rem_port_cnt++; + } else { + if (mca_btl_openib_component.allow_different_subnets) { + BTL_VERBOSE(("Using different subnets!")); + if (rem_port_cnt == btl_rank) { + matching_port = j; + } + rem_port_cnt++; + } + } + } + + if (0 == rem_port_cnt) { + /* no use trying to communicate with this endpoint */ + BTL_VERBOSE(("No matching subnet id/CPC was found, moving on.. ")); + return OPAL_ERROR; + } + + /* If this process has multiple ports on a single subnet ID, + and the report proc also has multiple ports on this same + subnet ID, the default connection pattern is: + + LOCAL REMOTE PEER + 1st port on subnet X <--> 1st port on subnet X + 2nd port on subnet X <--> 2nd port on subnet X + 3nd port on subnet X <--> 3nd port on subnet X + ...etc. + + Note that the port numbers may not be contiguous, and they + may not be the same on either side. Hence the "1st", "2nd", + "3rd, etc. notation, above. + + Hence, if the local "rank" of this module's port on the + subnet ID is greater than the total number of ports on the + peer on this same subnet, then we have no match. So skip + this connection. */ + if (rem_port_cnt < local_port_cnt && btl_rank >= rem_port_cnt) { + BTL_VERBOSE(("Not enough remote ports on this subnet id, moving on.. ")); + return OPAL_ERROR; + } + + /* Now that we have verified that we're on the same subnet and + the remote peer has enough ports, see if that specific port + on the peer has a matching CPC. */ + assert(btl_rank <= ib_proc->proc_port_count); + assert(matching_port != -1); + if (OPAL_SUCCESS != + opal_btl_openib_connect_base_find_match(openib_btl, + &(ib_proc->proc_ports[matching_port]), + &local_cpc, + &remote_cpc_data)) { + return OPAL_ERROR; + } + + /* The btl_proc datastructure is shared by all IB BTL + * instances that are trying to reach this destination. + * Cache the peer instance on the btl_proc. + */ + endpoint = OBJ_NEW(mca_btl_openib_endpoint_t); + assert(((opal_object_t*)endpoint)->obj_reference_count == 1); + if(NULL == endpoint) { + return OPAL_ERR_OUT_OF_RESOURCE; + } + + +#if HAVE_XRC + if (MCA_BTL_XRC_ENABLED) { + int rem_port_cnt = 0; + for(j = 0; j < (int) ib_proc->proc_port_count; j++) { + if(ib_proc->proc_ports[j].pm_port_info.subnet_id == + openib_btl->port_info.subnet_id) { + if (rem_port_cnt == btl_rank) + break; + else + rem_port_cnt ++; + } else { + if (mca_btl_openib_component.allow_different_subnets) { + if (rem_port_cnt == btl_rank) + break; + else + rem_port_cnt ++; + } + } + } + + assert(rem_port_cnt == btl_rank); + /* Push the subnet/lid/jobid to xrc hash */ + rc = mca_btl_openib_ib_address_add_new( + ib_proc->proc_ports[j].pm_port_info.lid, + ib_proc->proc_ports[j].pm_port_info.subnet_id, + ib_proc->proc_opal->proc_name.jobid, endpoint); + if (OPAL_SUCCESS != rc ) { + return OPAL_ERROR; + } + } +#endif + mca_btl_openib_endpoint_init(openib_btl, endpoint, + local_cpc, + &(ib_proc->proc_ports[matching_port]), + remote_cpc_data); + + rc = mca_btl_openib_proc_insert(ib_proc, endpoint); + if (OPAL_SUCCESS != rc) { + OBJ_RELEASE(endpoint); + return OPAL_ERROR; + } + + if(OPAL_SUCCESS != mca_btl_openib_tune_endpoint(openib_btl, endpoint)) { + OBJ_RELEASE(endpoint); + return OPAL_ERROR; + } + + /* protect device because several endpoints for different ib_proc's + * may be simultaneously initialized */ + opal_mutex_lock(&openib_btl->device->device_lock); + endpoint->index = opal_pointer_array_add(openib_btl->device->endpoints, (void*)endpoint); + opal_mutex_unlock(&openib_btl->device->device_lock); + + if( 0 > endpoint->index ) { + OBJ_RELEASE(endpoint); + return OPAL_ERROR; + } + + /* Tell the selected CPC that it won. NOTE: This call is + outside of / separate from mca_btl_openib_endpoint_init() + because this function likely needs the endpoint->index. */ + if (NULL != local_cpc->cbm_endpoint_init) { + rc = local_cpc->cbm_endpoint_init(endpoint); + if (OPAL_SUCCESS != rc) { + OBJ_RELEASE(endpoint); + return OPAL_ERROR; + } + } + + *endpoint_ptr = endpoint; return OPAL_SUCCESS; } +static int get_openib_btl_params(mca_btl_openib_module_t* openib_btl, int *port_cnt_ptr) +{ + int port_cnt = 0, rank = -1, j; + for(j=0; j < mca_btl_openib_component.ib_num_btls; j++){ + if(mca_btl_openib_component.openib_btls[j]->port_info.subnet_id + == openib_btl->port_info.subnet_id) { + if(openib_btl == mca_btl_openib_component.openib_btls[j]) { + rank = port_cnt; + } + port_cnt++; + } else { + if (mca_btl_openib_component.allow_different_subnets) { + if (openib_btl == mca_btl_openib_component.openib_btls[j]) { + rank = port_cnt; + } + port_cnt++; + } + } + } + *port_cnt_ptr = port_cnt; + return rank; +} + /* * add a proc to this btl module * creates an endpoint that is setup on the @@ -829,22 +1030,15 @@ int mca_btl_openib_add_procs( opal_bitmap_t* reachable) { mca_btl_openib_module_t* openib_btl = (mca_btl_openib_module_t*)btl; - int i,j, rc, local_procs; - int rem_subnet_id_port_cnt; + size_t nprocs_new_loc = 0, nprocs_new = 0; + int i,j, rc; int lcl_subnet_id_port_cnt = 0; int btl_rank = 0; - mca_btl_base_endpoint_t* endpoint; - opal_btl_openib_connect_base_module_t *local_cpc; - opal_btl_openib_connect_base_module_data_t *remote_cpc_data; + volatile mca_btl_base_endpoint_t* endpoint; - for(j=0; j < mca_btl_openib_component.ib_num_btls; j++){ - if(mca_btl_openib_component.openib_btls[j]->port_info.subnet_id - == openib_btl->port_info.subnet_id) { - if(openib_btl == mca_btl_openib_component.openib_btls[j]) { - btl_rank = lcl_subnet_id_port_cnt; - } - lcl_subnet_id_port_cnt++; - } + btl_rank = get_openib_btl_params(openib_btl, &lcl_subnet_id_port_cnt); + if( 0 > btl_rank ){ + return OPAL_ERR_NOT_FOUND; } #if HAVE_XRC @@ -864,22 +1058,20 @@ int mca_btl_openib_add_procs( return rc; } - rc = mca_btl_openib_size_queues(openib_btl, nprocs); - if (OPAL_SUCCESS != rc) { - BTL_ERROR(("error creating cqs")); - return rc; + if (0 == openib_btl->num_peers) { + /* ensure completion queues are created before attempting to + * make a loop-back queue pair */ + rc = openib_btl_size_queues(openib_btl); + if (OPAL_SUCCESS != rc) { + BTL_ERROR(("error creating cqs")); + return rc; + } } - for (i = 0, local_procs = 0 ; i < (int) nprocs; i++) { + /* prepare all proc's and account them properly */ + for (i = 0, nprocs_new_loc = 0 ; i < (int) nprocs; i++) { struct opal_proc_t* proc = procs[i]; mca_btl_openib_proc_t* ib_proc; - int remote_matching_port; - - opal_output(-1, "add procs: adding proc %d", i); - - if (OPAL_PROC_ON_LOCAL_NODE(proc->proc_flags)) { - local_procs ++; - } #if defined(HAVE_STRUCT_IBV_DEVICE_TRANSPORT_TYPE) /* Most current iWARP adapters (June 2008) cannot handle @@ -893,161 +1085,199 @@ int mca_btl_openib_add_procs( } #endif - if(NULL == (ib_proc = mca_btl_openib_proc_create(proc))) { + if(NULL == (ib_proc = mca_btl_openib_proc_get_locked(proc)) ) { /* if we don't have connection info for this process, it's * okay because some other method might be able to reach it, * so just mark it as unreachable by us */ continue; } - /* check if the remote proc has any ports that: - - on the same subnet as the local proc, and - - on that subnet, has a CPC in common with the local proc - */ - remote_matching_port = -1; - rem_subnet_id_port_cnt = 0; - BTL_VERBOSE(("got %d port_infos ", ib_proc->proc_port_count)); - for (j = 0; j < (int) ib_proc->proc_port_count; j++){ - BTL_VERBOSE(("got a subnet %016" PRIx64, - ib_proc->proc_ports[j].pm_port_info.subnet_id)); - if (ib_proc->proc_ports[j].pm_port_info.subnet_id == - openib_btl->port_info.subnet_id) { - BTL_VERBOSE(("Got a matching subnet!")); - if (rem_subnet_id_port_cnt == btl_rank) { - remote_matching_port = j; - } - rem_subnet_id_port_cnt++; + /* account this openib_btl in this proc */ + rc = mca_btl_openib_proc_reg_btl(ib_proc, openib_btl); + + opal_mutex_unlock( &ib_proc->proc_lock ); + + switch( rc ){ + case OPAL_SUCCESS: + /* this is a new process to this openib btl */ + nprocs_new++; + if (OPAL_PROC_ON_LOCAL_NODE(proc->proc_flags)) { + nprocs_new_loc ++; } + break; + case OPAL_ERR_RESOURCE_BUSY: + /* process was accounted earlier in this openib btl */ + break; + default: + /* unexpected error, e.g. out of mem */ + return rc; } + } - if (0 == rem_subnet_id_port_cnt) { - /* no use trying to communicate with this endpoint */ - BTL_VERBOSE(("No matching subnet id/CPC was found, moving on.. ")); - continue; + if (nprocs_new) { + opal_atomic_add_32 (&openib_btl->num_peers, nprocs_new); + + /* adjust cq sizes given the new procs */ + rc = openib_btl_size_queues (openib_btl); + if (OPAL_SUCCESS != rc) { + BTL_ERROR(("error creating cqs")); + return rc; } + } - /* If this process has multiple ports on a single subnet ID, - and the report proc also has multiple ports on this same - subnet ID, the default connection pattern is: - - LOCAL REMOTE PEER - 1st port on subnet X <--> 1st port on subnet X - 2nd port on subnet X <--> 2nd port on subnet X - 3nd port on subnet X <--> 3nd port on subnet X - ...etc. - - Note that the port numbers may not be contiguous, and they - may not be the same on either side. Hence the "1st", "2nd", - "3rd, etc. notation, above. - - Hence, if the local "rank" of this module's port on the - subnet ID is greater than the total number of ports on the - peer on this same subnet, then we have no match. So skip - this connection. */ - if (rem_subnet_id_port_cnt < lcl_subnet_id_port_cnt && - btl_rank >= rem_subnet_id_port_cnt) { - BTL_VERBOSE(("Not enough remote ports on this subnet id, moving on.. ")); + rc = openib_btl_prepare (openib_btl); + if (OPAL_SUCCESS != rc) { + BTL_ERROR(("could not prepare openib btl module for use")); + return rc; + } + + opal_mutex_lock(&openib_btl->device->device_lock); + openib_btl->local_procs += nprocs_new_loc; + if( 0 < nprocs_new_loc ){ + openib_btl->device->mem_reg_max = openib_btl->device->mem_reg_max_total / openib_btl->local_procs; + } + opal_mutex_unlock(&openib_btl->device->device_lock); + + /* prepare endpoints */ + for (i = 0, nprocs_new_loc = 0 ; i < (int) nprocs; i++) { + struct opal_proc_t* proc = procs[i]; + mca_btl_openib_proc_t* ib_proc; + bool found_existing = false; + + opal_output(-1, "add procs: adding proc %d", i); + +#if defined(HAVE_STRUCT_IBV_DEVICE_TRANSPORT_TYPE) + /* Most current iWARP adapters (June 2008) cannot handle + talking to other processes on the same host (!) -- so mark + them as unreachable (need to use sm). So for the moment, + we'll just mark any local peer on an iWARP NIC as + unreachable. See trac ticket #1352. */ + if (IBV_TRANSPORT_IWARP == openib_btl->device->ib_dev->transport_type && + OPAL_PROC_ON_LOCAL_NODE(proc->proc_flags)) { continue; } +#endif - /* Now that we have verified that we're on the same subnet and - the remote peer has enough ports, see if that specific port - on the peer has a matching CPC. */ - assert(btl_rank <= ib_proc->proc_port_count); - assert(remote_matching_port != -1); - if (OPAL_SUCCESS != - opal_btl_openib_connect_base_find_match(openib_btl, - &(ib_proc->proc_ports[remote_matching_port]), - &local_cpc, - &remote_cpc_data)) { + if(NULL == (ib_proc = mca_btl_openib_proc_get_locked(proc)) ) { + /* if we don't have connection info for this process, it's + * okay because some other method might be able to reach it, + * so just mark it as unreachable by us */ continue; } - OPAL_THREAD_LOCK(&ib_proc->proc_lock); - - /* The btl_proc datastructure is shared by all IB BTL - * instances that are trying to reach this destination. - * Cache the peer instance on the btl_proc. - */ - endpoint = OBJ_NEW(mca_btl_openib_endpoint_t); - assert(((opal_object_t*)endpoint)->obj_reference_count == 1); - if(NULL == endpoint) { - OPAL_THREAD_UNLOCK(&ib_proc->proc_lock); - return OPAL_ERR_OUT_OF_RESOURCE; + found_existing = false; + + for (j = 0 ; j < (int) ib_proc->proc_endpoint_count ; ++j) { + endpoint = ib_proc->proc_endpoints[j]; + if (endpoint->endpoint_btl == openib_btl) { + found_existing = true; + break; + } } -#if HAVE_XRC - if (MCA_BTL_XRC_ENABLED) { - int rem_port_cnt = 0; - for(j = 0; j < (int) ib_proc->proc_port_count; j++) { - if(ib_proc->proc_ports[j].pm_port_info.subnet_id == - openib_btl->port_info.subnet_id) { - if (rem_port_cnt == btl_rank) - break; - else - rem_port_cnt ++; - } + if( !found_existing ) { + rc = init_ib_proc_nolock(openib_btl, ib_proc, &endpoint, + lcl_subnet_id_port_cnt, btl_rank); + if( OPAL_SUCCESS == rc ){ + found_existing = true; } + } + opal_mutex_unlock( &ib_proc->proc_lock ); - assert(rem_port_cnt == btl_rank); - /* Push the subnet/lid/jobid to xrc hash */ - rc = mca_btl_openib_ib_address_add_new( - ib_proc->proc_ports[j].pm_port_info.lid, - ib_proc->proc_ports[j].pm_port_info.subnet_id, - proc->proc_name.jobid, endpoint); - if (OPAL_SUCCESS != rc ) { - OPAL_THREAD_UNLOCK(&ib_proc->proc_lock); - return OPAL_ERROR; + if (found_existing) { + if (reachable) { + opal_bitmap_set_bit(reachable, i); } + peers[i] = (mca_btl_base_endpoint_t*)endpoint; } -#endif - mca_btl_openib_endpoint_init(openib_btl, endpoint, - local_cpc, - &(ib_proc->proc_ports[remote_matching_port]), - remote_cpc_data); - rc = mca_btl_openib_proc_insert(ib_proc, endpoint); + } + + return OPAL_SUCCESS; +} + +struct mca_btl_base_endpoint_t *mca_btl_openib_get_ep (struct mca_btl_base_module_t *btl, struct opal_proc_t *proc) +{ + mca_btl_openib_module_t *openib_btl = (mca_btl_openib_module_t *) btl; + volatile mca_btl_base_endpoint_t *endpoint = NULL; + int local_port_cnt = 0, btl_rank, rc; + mca_btl_openib_proc_t *ib_proc; + + rc = prepare_device_for_use (openib_btl->device); + if (OPAL_SUCCESS != rc) { + BTL_ERROR(("could not prepare openib device for use")); + return NULL; + } + + if (NULL == (ib_proc = mca_btl_openib_proc_get_locked(proc))) { + /* if we don't have connection info for this process, it's + * okay because some other method might be able to reach it, + * so just mark it as unreachable by us */ + return NULL; + } + + rc = mca_btl_openib_proc_reg_btl(ib_proc, openib_btl); + + switch( rc ){ + case OPAL_SUCCESS: + /* unlock first to avoid possible deadlocks */ + opal_mutex_unlock(&ib_proc->proc_lock); + + /* this is a new process to this openib btl + * account this procs if need */ + opal_atomic_add_32 (&openib_btl->num_peers, 1); + rc = openib_btl_size_queues(openib_btl); if (OPAL_SUCCESS != rc) { - OBJ_RELEASE(endpoint); - OPAL_THREAD_UNLOCK(&ib_proc->proc_lock); - continue; + BTL_ERROR(("error creating cqs")); + return NULL; } - if(OPAL_SUCCESS != mca_btl_openib_tune_endpoint(openib_btl, endpoint)) { - OBJ_RELEASE(endpoint); - OPAL_THREAD_UNLOCK(&ib_proc->proc_lock); - return OPAL_ERROR; + if( OPAL_PROC_ON_LOCAL_NODE(proc->proc_flags) ) { + opal_mutex_lock(&openib_btl->ib_lock); + openib_btl->local_procs += 1; + openib_btl->device->mem_reg_max = openib_btl->device->mem_reg_max_total / openib_btl->local_procs; + opal_mutex_unlock(&openib_btl->ib_lock); } - endpoint->index = opal_pointer_array_add(openib_btl->device->endpoints, (void*)endpoint); - if( 0 > endpoint->index ) { - OBJ_RELEASE(endpoint); - OPAL_THREAD_UNLOCK(&ib_proc->proc_lock); - continue; - } + /* lock process back */ + opal_mutex_lock(&ib_proc->proc_lock); + break; + case OPAL_ERR_RESOURCE_BUSY: + /* process was accounted earlier in this openib btl */ + break; + default: + /* unexpected error, e.g. out of mem */ + BTL_ERROR(("Unexpected OPAL error %d", rc)); + return NULL; + } - /* Tell the selected CPC that it won. NOTE: This call is - outside of / separate from mca_btl_openib_endpoint_init() - because this function likely needs the endpoint->index. */ - if (NULL != local_cpc->cbm_endpoint_init) { - rc = local_cpc->cbm_endpoint_init(endpoint); - if (OPAL_SUCCESS != rc) { - OBJ_RELEASE(endpoint); - OPAL_THREAD_UNLOCK(&ib_proc->proc_lock); - continue; - } + rc = openib_btl_prepare(openib_btl); + if (OPAL_SUCCESS != rc) { + BTL_ERROR(("could not prepare openib btl structure for use")); + goto exit; + } + + for (size_t j = 0 ; j < ib_proc->proc_endpoint_count ; ++j) { + endpoint = ib_proc->proc_endpoints[j]; + if (endpoint->endpoint_btl == openib_btl) { + goto exit; } + } - opal_bitmap_set_bit(reachable, i); - OPAL_THREAD_UNLOCK(&ib_proc->proc_lock); + endpoint = NULL; - peers[i] = endpoint; + btl_rank = get_openib_btl_params(openib_btl, &local_port_cnt); + if( 0 > btl_rank ){ + goto exit; } - openib_btl->local_procs += local_procs; - openib_btl->device->mem_reg_max /= openib_btl->local_procs; + (void)init_ib_proc_nolock(openib_btl, ib_proc, &endpoint, + local_port_cnt, btl_rank); - return OPAL_SUCCESS; +exit: + opal_mutex_unlock(&ib_proc->proc_lock); + + return (struct mca_btl_base_endpoint_t *)endpoint; } /* @@ -1257,7 +1487,7 @@ mca_btl_base_descriptor_t* mca_btl_openib_alloc( to_com_frag(sfrag)->sg_entry.addr = (uint64_t)(uintptr_t)sfrag->hdr; } - cfrag->hdr = (mca_btl_openib_header_coalesced_t*)((unsigned char*)(sfrag->hdr + 1) + + cfrag->hdr = (mca_btl_openib_header_coalesced_t*)((unsigned char*)(sfrag->hdr + 1) + sfrag->coalesced_length + to_base_frag(sfrag)->segment.seg_len); cfrag->hdr = (mca_btl_openib_header_coalesced_t*)BTL_OPENIB_ALIGN_COALESCE_HDR(cfrag->hdr); @@ -1616,23 +1846,13 @@ int mca_btl_openib_sendi( struct mca_btl_base_module_t* btl, assert(max_data == payload_size); } -#if BTL_OPENIB_FAILOVER_ENABLED - send_signaled = 1; -#else send_signaled = qp_need_signal(ep, qp, payload_size + header_size, do_rdma); -#endif ib_rc = post_send(ep, to_send_frag(item), do_rdma, send_signaled); if (!ib_rc) { if (0 == send_signaled) { MCA_BTL_IB_FRAG_RETURN(frag); } -#if BTL_OPENIB_FAILOVER_ENABLED - else { - /* Return up in case needed for failover */ - *descriptor = (struct mca_btl_base_descriptor_t *) frag; - } -#endif OPAL_THREAD_UNLOCK(&ep->endpoint_lock); return OPAL_SUCCESS; @@ -1703,18 +1923,20 @@ static mca_btl_base_registration_handle_t *mca_btl_openib_register_mem (mca_btl_ mca_btl_base_endpoint_t *endpoint, void *base, size_t size, uint32_t flags) { + mca_btl_openib_module_t *openib_module = (mca_btl_openib_module_t *) btl; mca_btl_openib_reg_t *reg; uint32_t mflags = 0; + int access_flags = flags & MCA_BTL_REG_FLAG_ACCESS_ANY; int rc; #if OPAL_CUDA_GDR_SUPPORT if (flags & MCA_BTL_REG_FLAG_CUDA_GPU_MEM) { - mflags |= MCA_MPOOL_FLAGS_CUDA_GPU_MEM; + mflags |= MCA_RCACHE_FLAGS_CUDA_GPU_MEM; } #endif /* OPAL_CUDA_GDR_SUPPORT */ - rc = btl->btl_mpool->mpool_register (btl->btl_mpool, base, size, mflags, - (mca_mpool_base_registration_t **) ®); + rc = openib_module->device->rcache->rcache_register (openib_module->device->rcache, base, size, mflags, + access_flags, (mca_rcache_base_registration_t **) ®); if (OPAL_UNLIKELY(OPAL_SUCCESS != rc || NULL == reg)) { return NULL; } @@ -1724,60 +1946,11 @@ static mca_btl_base_registration_handle_t *mca_btl_openib_register_mem (mca_btl_ static int mca_btl_openib_deregister_mem (mca_btl_base_module_t *btl, mca_btl_base_registration_handle_t *handle) { + mca_btl_openib_module_t *openib_module = (mca_btl_openib_module_t *) btl; mca_btl_openib_reg_t *reg = (mca_btl_openib_reg_t *)((intptr_t) handle - offsetof (mca_btl_openib_reg_t, btl_handle)); - btl->btl_mpool->mpool_deregister (btl->btl_mpool, (mca_mpool_base_registration_t *) reg); - - return OPAL_SUCCESS; -} - -#if OPAL_ENABLE_FT_CR == 0 -int mca_btl_openib_ft_event(int state) { - return OPAL_SUCCESS; -} -#else -int mca_btl_openib_ft_event(int state) { - int i; - - if(OPAL_CRS_CHECKPOINT == state) { - /* Continue must reconstruct the routes (including modex), since we - * have to tear down the devices completely. */ - opal_cr_continue_like_restart = true; - - /* - * To keep the node from crashing we need to call ibv_close_device - * before the checkpoint is taken. To do this we need to tear - * everything down, and rebuild it all on continue/restart. :( - */ - - /* Shutdown all modules - * - Do this backwards since the openib_finalize function also loops - * over this variable. - */ - for (i = 0; i < mca_btl_openib_component.ib_num_btls; ++i ) { - mca_btl_openib_finalize_resources( &(mca_btl_openib_component.openib_btls[i])->super); - } - - mca_btl_openib_component.devices_count = 0; - mca_btl_openib_component.ib_num_btls = 0; - OBJ_DESTRUCT(&mca_btl_openib_component.ib_procs); - - opal_btl_openib_connect_base_finalize(); - } - else if(OPAL_CRS_CONTINUE == state) { - ; /* Cleared by forcing the modex, no work needed */ - } - else if(OPAL_CRS_RESTART == state) { - ; - } - else if(OPAL_CRS_TERM == state ) { - ; - } - else { - ; - } + openib_module->device->rcache->rcache_deregister (openib_module->device->rcache, (mca_rcache_base_registration_t *) reg); return OPAL_SUCCESS; } -#endif /* OPAL_ENABLE_FT_CR */ diff --git a/opal/mca/btl/openib/btl_openib.h b/opal/mca/btl/openib/btl_openib.h index f7741e5cd9e..4c3a7c6005f 100644 --- a/opal/mca/btl/openib/btl_openib.h +++ b/opal/mca/btl/openib/btl_openib.h @@ -12,14 +12,16 @@ * All rights reserved. * Copyright (c) 2006-2011 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2006-2009 Mellanox Technologies. All rights reserved. - * Copyright (c) 2006-2015 Los Alamos National Security, LLC. All rights + * Copyright (c) 2006-2016 Los Alamos National Security, LLC. All rights * reserved. * Copyright (c) 2006-2007 Voltaire All rights reserved. * Copyright (c) 2009-2010 Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2013-2014 NVIDIA Corporation. All rights reserved. * Copyright (c) 2014 Bull SAS. All rights reserved. - * Copyright (c) 2015 Research Organization for Information Science + * Copyright (c) 2015-2016 Research Organization for Information Science * and Technology (RIST). All rights reserved. + * Copyright (c) 2015 Mellanox Technologies. All rights reserved. + * * $COPYRIGHT$ * * Additional copyrights may follow @@ -45,16 +47,18 @@ #include "opal/mca/event/event.h" #include "opal/threads/threads.h" #include "opal/mca/btl/btl.h" +#include "opal/mca/rcache/rcache.h" #include "opal/mca/mpool/mpool.h" #include "opal/mca/btl/base/btl_base_error.h" #include "opal/mca/btl/base/base.h" +#include "opal/runtime/opal_progress_threads.h" #include "connect/connect.h" BEGIN_C_DECLS -#define HAVE_XRC (1 == OPAL_HAVE_CONNECTX_XRC) -#define ENABLE_DYNAMIC_SL (1 == OPAL_ENABLE_DYNAMIC_SL) +#define HAVE_XRC (OPAL_HAVE_CONNECTX_XRC || OPAL_HAVE_CONNECTX_XRC_DOMAINS) +#define ENABLE_DYNAMIC_SL OPAL_ENABLE_DYNAMIC_SL #define MCA_BTL_IB_LEAVE_PINNED 1 #define IB_DEFAULT_GID_PREFIX 0xfe80000000000000ll @@ -80,6 +84,12 @@ BEGIN_C_DECLS * Infiniband (IB) BTL component. */ +enum { + BTL_OPENIB_HP_CQ, + BTL_OPENIB_LP_CQ, + BTL_OPENIB_MAX_CQ, +}; + typedef enum { MCA_BTL_OPENIB_TRANSPORT_IB, MCA_BTL_OPENIB_TRANSPORT_IWARP, @@ -183,8 +193,11 @@ struct mca_btl_openib_component_t { opal_mutex_t ib_lock; /**< lock for accessing module state */ - char* ib_mpool_name; - /**< name of ib memory pool */ + char* ib_mpool_hints; + /**< hints for selecting an mpool component */ + + char *ib_rcache_name; + /**< name of ib registration cache */ uint8_t num_pp_qps; /**< number of pp qp's */ uint8_t num_srq_qps; /**< number of srq qp's */ @@ -201,7 +214,7 @@ struct mca_btl_openib_component_t { uint32_t reg_mru_len; /**< Length of the registration cache most recently used list */ uint32_t use_srq; /**< Use the Shared Receive Queue (SRQ mode) */ - uint32_t ib_cq_size[2]; /**< Max outstanding CQE on the CQ */ + uint32_t ib_cq_size[BTL_OPENIB_MAX_CQ]; /**< Max outstanding CQE on the CQ */ int ib_max_inline_data; /**< Max size of inline data */ unsigned int ib_pkey_val; @@ -227,14 +240,9 @@ struct mca_btl_openib_component_t { int apm_ports; unsigned int buffer_alignment; /**< Preferred communication buffer alignment in Bytes (must be power of two) */ int32_t error_counter; /**< Counts number on error events that we got on all devices */ - int async_pipe[2]; /**< Pipe for comunication with async event thread */ - int async_comp_pipe[2]; /**< Pipe for async thread comunication with main thread */ - pthread_t async_thread; /**< Async thread that will handle fatal errors */ + opal_event_base_t *async_evbase; /**< Async event base */ bool use_async_event_thread; /**< Use the async event handler */ mca_btl_openib_srq_manager_t srq_manager; /**< Hash table for all BTL SRQs */ -#if BTL_OPENIB_FAILOVER_ENABLED - bool port_error_failover; /**< Report port errors to speed up failover */ -#endif /* declare as an int instead of btl_openib_device_type_t since there is no guarantee about the size of an enum. this value will be registered as an integer with the MCA variable system */ @@ -292,20 +300,15 @@ struct mca_btl_openib_component_t { char* default_recv_qps; /** GID index to use */ int gid_index; + /* Whether we want to allow connecting processes from different subnets. + * set to 'no' by default */ + bool allow_different_subnets; /** Whether we want a dynamically resizing srq, enabled by default */ bool enable_srq_resize; bool allow_max_memory_registration; int memory_registration_verbose_level; int memory_registration_verbose; int ignore_locality; -#if BTL_OPENIB_FAILOVER_ENABLED - int verbose_failover; -#endif -#if BTL_OPENIB_MALLOC_HOOKS_ENABLED - int use_memalign; - size_t memalign_threshold; - void* (*previous_malloc_hook)(size_t __size, const void*); -#endif #if OPAL_CUDA_SUPPORT bool cuda_async_send; bool cuda_async_recv; @@ -316,6 +319,7 @@ struct mca_btl_openib_component_t { #if HAVE_DECL_IBV_LINK_LAYER_ETHERNET bool rroce_enable; #endif + unsigned int num_default_gid_btls; /* numbers of btl in the default subnet */ }; typedef struct mca_btl_openib_component_t mca_btl_openib_component_t; OPAL_MODULE_DECLSPEC extern mca_btl_openib_component_t mca_btl_openib_component; @@ -372,11 +376,15 @@ typedef struct mca_btl_openib_device_t { #endif opal_mutex_t device_lock; /* device level lock */ struct ibv_context *ib_dev_context; +#if HAVE_DECL_IBV_EXP_QUERY_DEVICE + struct ibv_exp_device_attr ib_exp_dev_attr; +#endif struct ibv_device_attr ib_dev_attr; struct ibv_pd *ib_pd; - struct ibv_cq *ib_cq[2]; - uint32_t cq_size[2]; + struct ibv_cq *ib_cq[BTL_OPENIB_MAX_CQ]; + uint32_t cq_size[BTL_OPENIB_MAX_CQ]; mca_mpool_base_module_t *mpool; + mca_rcache_base_module_t *rcache; /* MTU for this device */ uint32_t mtu; /* Whether this device supports eager RDMA */ @@ -407,9 +415,11 @@ typedef struct mca_btl_openib_device_t { /* Maximum value supported by this device for max_inline_data */ uint32_t max_inline_data; /* Registration limit and current count */ - uint64_t mem_reg_max, mem_reg_active; + uint64_t mem_reg_max, mem_reg_max_total, mem_reg_active; /* Device is ready for use */ bool ready_for_use; + /* Async event */ + opal_event_t async_event; } mca_btl_openib_device_t; OBJ_CLASS_DECLARATION(mca_btl_openib_device_t); @@ -459,6 +469,7 @@ struct mca_btl_openib_module_t { mca_btl_base_module_t super; bool btl_inited; + bool srqs_created; /** Common information about all ports */ mca_btl_openib_modex_message_t port_info; @@ -489,6 +500,8 @@ struct mca_btl_openib_module_t { mca_btl_openib_module_qp_t * qps; int local_procs; /** number of local procs */ + + bool atomic_ops_be; /** atomic result is big endian */ }; typedef struct mca_btl_openib_module_t mca_btl_openib_module_t; @@ -500,7 +513,7 @@ struct mca_btl_base_registration_handle_t { }; struct mca_btl_openib_reg_t { - mca_mpool_base_registration_t base; + mca_rcache_base_registration_t base; struct ibv_mr *mr; mca_btl_base_registration_handle_t btl_handle; }; @@ -837,26 +850,12 @@ mca_btl_base_descriptor_t* mca_btl_openib_prepare_src( extern void mca_btl_openib_frag_progress_pending_put_get( struct mca_btl_base_endpoint_t*, const int); -/** - * Fault Tolerance Event Notification Function - * - * @param state (IN) Checkpoint State - * @return OPAL_SUCCESS or failure status - */ -extern int mca_btl_openib_ft_event(int state); - - /** * Show an error during init, particularly when running out of * registered memory. */ void mca_btl_openib_show_init_error(const char *file, int line, const char *func, const char *dev); - -#define BTL_OPENIB_HP_CQ 0 -#define BTL_OPENIB_LP_CQ 1 - - /** * Post to Shared Receive Queue with certain priority * @@ -874,6 +873,18 @@ int mca_btl_openib_post_srr(mca_btl_openib_module_t* openib_btl, const int qp); const char* btl_openib_get_transport_name(mca_btl_openib_transport_type_t transport_type); +/** + * Get an endpoint for a process + * + * @param btl (IN) BTL module + * @param proc (IN) opal process object + * + * This function will return an existing endpoint if one exists otherwise it will allocate + * a new endpoint and return it. + */ +struct mca_btl_base_endpoint_t *mca_btl_openib_get_ep (struct mca_btl_base_module_t *btl, + struct opal_proc_t *proc); + /** * Get a transport type of btl. */ @@ -895,6 +906,15 @@ static inline int qp_cq_prio(const int qp) #define BTL_OPENIB_RDMA_QP(QP) \ ((QP) == mca_btl_openib_component.rdma_qp) +/** + * Run function as part of opal_progress() + * + * @param[in] fn function to run + * @param[in] arg function data + */ +int mca_btl_openib_run_in_main (void *(*fn)(void *), void *arg); + + END_C_DECLS #endif /* MCA_BTL_IB_H */ diff --git a/opal/mca/btl/openib/btl_openib_async.c b/opal/mca/btl/openib/btl_openib_async.c index 7f6ec499d9b..3662624292e 100644 --- a/opal/mca/btl/openib/btl_openib_async.c +++ b/opal/mca/btl/openib/btl_openib_async.c @@ -36,6 +36,10 @@ #include "btl_openib_proc.h" #include "btl_openib_endpoint.h" +static opal_list_t ignore_qp_err_list; +static opal_mutex_t ignore_qp_err_list_lock; +static int32_t btl_openib_async_device_count = 0; + struct mca_btl_openib_async_poll { int active_poll_size; int poll_size; @@ -50,14 +54,7 @@ typedef struct { OBJ_CLASS_INSTANCE(mca_btl_openib_qp_list, opal_list_item_t, NULL, NULL); -static int return_status = OPAL_ERROR; - -static int btl_openib_async_poll_init(struct mca_btl_openib_async_poll *hcas_poll); -static int btl_openib_async_commandh(struct mca_btl_openib_async_poll *hcas_poll, opal_list_t *ignore_qp_err_list); -static int btl_openib_async_deviceh(struct mca_btl_openib_async_poll *hcas_poll, int index, - opal_list_t *ignore_qp_err_list); static const char *openib_event_to_str (enum ibv_event_type event); -static int send_command_comp(int in); /* Function converts event to string (name) * Open Fabris don't have function that do this job :( @@ -122,7 +119,7 @@ static mca_btl_openib_endpoint_t * qp2endpoint(struct ibv_qp *qp, mca_btl_openib return NULL; } -#if HAVE_XRC && !OPAL_HAVE_CONNECTX_XRC_DOMAINS +#if OPAL_HAVE_CONNECTX_XRC /* XRC recive QP to endpoint */ static mca_btl_openib_endpoint_t * xrc_qp2endpoint(uint32_t qp_num, mca_btl_openib_device_t *device) { @@ -138,132 +135,6 @@ static mca_btl_openib_endpoint_t * xrc_qp2endpoint(uint32_t qp_num, mca_btl_open #endif /* Function inits mca_btl_openib_async_poll */ -static int btl_openib_async_poll_init(struct mca_btl_openib_async_poll *devices_poll) -{ - devices_poll->active_poll_size = 1; - devices_poll->poll_size = 4; - devices_poll->async_pollfd = malloc(sizeof(struct pollfd) * devices_poll->poll_size); - if (NULL == devices_poll->async_pollfd) { - BTL_ERROR(("Failed malloc: %s:%d" - , __FILE__, __LINE__)); - return OPAL_ERROR; - } - /* Creating comunication channel with the main thread */ - devices_poll->async_pollfd[0].fd = mca_btl_openib_component.async_pipe[0]; - devices_poll->async_pollfd[0].events = POLLIN; - devices_poll->async_pollfd[0].revents = 0; - return OPAL_SUCCESS; -} - -/* Send command completion to main thread */ -static int send_command_comp(int in) -{ - if (write(mca_btl_openib_component.async_comp_pipe[1], &in, sizeof(int)) < 0) { - BTL_ERROR(("Write failed [%d]",errno)); - return OPAL_ERROR; - } - return OPAL_SUCCESS; -} - -/* Function handle async thread commands */ -static int btl_openib_async_commandh(struct mca_btl_openib_async_poll *devices_poll, opal_list_t *ignore_qp_err_list) -{ - struct pollfd *async_pollfd_tmp; - mca_btl_openib_async_cmd_t cmd; - int fd,flags,j,ret; - /* Got command from main thread */ - ret = read(devices_poll->async_pollfd[0].fd, &cmd, sizeof(mca_btl_openib_async_cmd_t)); - if (sizeof(mca_btl_openib_async_cmd_t) != ret) { - BTL_ERROR(("Read failed [%d]",errno)); - return OPAL_ERROR; - } - - BTL_VERBOSE(("Got cmd %d", cmd.a_cmd)); - if (OPENIB_ASYNC_CMD_FD_ADD == cmd.a_cmd) { - fd = cmd.fd; - BTL_VERBOSE(("Got fd %d", fd)); - BTL_VERBOSE(("Adding device [%d] to async event poll[%d]", - fd, devices_poll->active_poll_size)); - flags = fcntl(fd, F_GETFL); - if (fcntl(fd, F_SETFL, flags | O_NONBLOCK) < 0) { - BTL_ERROR(("Failed to change file descriptor of async event")); - return OPAL_ERROR; - } - if ((devices_poll->active_poll_size + 1) > devices_poll->poll_size) { - devices_poll->poll_size+=devices_poll->poll_size; - async_pollfd_tmp = malloc(sizeof(struct pollfd) * devices_poll->poll_size); - if (NULL == async_pollfd_tmp) { - BTL_ERROR(("Failed malloc: %s:%d. " - "Fatal error, stoping asynch event thread" - , __FILE__, __LINE__)); - return OPAL_ERROR; - } - memcpy (async_pollfd_tmp,devices_poll->async_pollfd, - sizeof(struct pollfd) * (devices_poll->active_poll_size)); - free(devices_poll->async_pollfd); - devices_poll->async_pollfd = async_pollfd_tmp; - } - devices_poll->async_pollfd[devices_poll->active_poll_size].fd = fd; - devices_poll->async_pollfd[devices_poll->active_poll_size].events = POLLIN; - devices_poll->async_pollfd[devices_poll->active_poll_size].revents = 0; - devices_poll->active_poll_size++; - if (OPAL_SUCCESS != send_command_comp(fd)) { - return OPAL_ERROR; - } - } else if (OPENIB_ASYNC_CMD_FD_REMOVE == cmd.a_cmd) { - bool fd_found = false; - - fd = cmd.fd; - BTL_VERBOSE(("Got fd %d", fd)); - - /* Removing device from poll */ - BTL_VERBOSE(("Removing device [%d] from async event poll [%d]", - fd, devices_poll->active_poll_size)); - if (devices_poll->active_poll_size > 1) { - for (j=0; (j < devices_poll->active_poll_size || !fd_found); j++) { - if (devices_poll->async_pollfd[j].fd == fd) { - devices_poll->async_pollfd[j].fd = - devices_poll->async_pollfd[devices_poll->active_poll_size-1].fd; - devices_poll->async_pollfd[j].events = - devices_poll->async_pollfd[devices_poll->active_poll_size-1].events; - devices_poll->async_pollfd[j].revents = - devices_poll->async_pollfd[devices_poll->active_poll_size-1].revents; - fd_found = true; - } - } - if (!fd_found) { - BTL_ERROR(("Requested FD[%d] was not found in poll array",fd)); - return OPAL_ERROR; - } - } - devices_poll->active_poll_size--; - if (OPAL_SUCCESS != send_command_comp(fd)) { - return OPAL_ERROR; - } - } else if (OPENIB_ASYNC_IGNORE_QP_ERR == cmd.a_cmd) { - mca_btl_openib_qp_list *new_qp; - new_qp = OBJ_NEW(mca_btl_openib_qp_list); - BTL_VERBOSE(("Ignore errors on QP %p", (void *)cmd.qp)); - new_qp->qp = cmd.qp; - opal_list_append(ignore_qp_err_list, (opal_list_item_t *)new_qp); - send_command_comp(OPENIB_ASYNC_IGNORE_QP_ERR); - - } else if (OPENIB_ASYNC_THREAD_EXIT == cmd.a_cmd) { - /* Got 0 - command to close the thread */ - opal_list_item_t *item; - BTL_VERBOSE(("Async event thread exit")); - free(devices_poll->async_pollfd); - return_status = OPAL_SUCCESS; - - while ((item = opal_list_remove_first(ignore_qp_err_list))) { - OBJ_RELEASE(item); - } - OBJ_DESTRUCT(ignore_qp_err_list); - - pthread_exit(&return_status); - } - return OPAL_SUCCESS; -} /* The main idea of resizing SRQ algorithm - We create a SRQ with size = rd_num, but for efficient usage of resources @@ -323,238 +194,118 @@ static int btl_openib_async_srq_limit_event(struct ibv_srq* srq) } /* Function handle async device events */ -static int btl_openib_async_deviceh(struct mca_btl_openib_async_poll *devices_poll, int index, - opal_list_t *ignore_qp_err_list) +static void btl_openib_async_device (int fd, short flags, void *arg) { - int j; - mca_btl_openib_device_t *device = NULL; + mca_btl_openib_device_t *device = (mca_btl_openib_device_t *) arg; struct ibv_async_event event; int event_type; - /* We need to find correct device and process this event */ - for (j=0; j < mca_btl_openib_component.ib_num_btls; j++) { - if (mca_btl_openib_component.openib_btls[j]->device->ib_dev_context->async_fd == - devices_poll->async_pollfd[index].fd ) { - device = mca_btl_openib_component.openib_btls[j]->device; - break; + if (ibv_get_async_event((struct ibv_context *)device->ib_dev_context,&event) < 0) { + if (EWOULDBLOCK != errno) { + BTL_ERROR(("Failed to get async event")); } + + return; } - if (NULL != device) { - if (ibv_get_async_event((struct ibv_context *)device->ib_dev_context,&event) < 0) { - if (EWOULDBLOCK == errno) { - /* No event found ? - * It was handled by somebody other */ - return OPAL_SUCCESS; - } else { - BTL_ERROR(("Failed to get async event")); - return OPAL_ERROR; - } - } - event_type = event.event_type; -#if HAVE_XRC - /* is it XRC event ?*/ -#if OPAL_HAVE_CONNECTX_XRC_DOMAINS -#else - bool xrc_event = false; - if (IBV_XRC_QP_EVENT_FLAG & event.event_type) { - xrc_event = true; - /* Clean the bitnd handel as usual */ - event_type ^= IBV_XRC_QP_EVENT_FLAG; - } -#endif -#endif - switch(event_type) { - case IBV_EVENT_PATH_MIG: - BTL_ERROR(("Alternative path migration event reported")); - if (APM_ENABLED) { - BTL_ERROR(("Trying to find additional path...")); -#if HAVE_XRC && !OPAL_HAVE_CONNECTX_XRC_DOMAINS - if (xrc_event) - mca_btl_openib_load_apm_xrc_rcv(event.element.xrc_qp_num, - xrc_qp2endpoint(event.element.xrc_qp_num, device)); - else + event_type = event.event_type; +#if OPAL_HAVE_CONNECTX_XRC + /* is it XRC event ?*/ + bool xrc_event = false; + if (IBV_XRC_QP_EVENT_FLAG & event.event_type) { + xrc_event = true; + /* Clean the bitnd handel as usual */ + event_type ^= IBV_XRC_QP_EVENT_FLAG; + } #endif - mca_btl_openib_load_apm(event.element.qp, - qp2endpoint(event.element.qp, device)); - } - break; - case IBV_EVENT_DEVICE_FATAL: - /* Set the flag to fatal */ - device->got_fatal_event = true; - /* It is not critical to protect the counter */ - OPAL_THREAD_ADD32(&mca_btl_openib_component.error_counter, 1); - /* fall through */ - case IBV_EVENT_CQ_ERR: - case IBV_EVENT_QP_FATAL: - if (event_type == IBV_EVENT_QP_FATAL) { - opal_list_item_t *item; - mca_btl_openib_qp_list *qp_item; - bool in_ignore_list = false; - - BTL_VERBOSE(("QP is in err state %p", (void *)event.element.qp)); - - /* look through ignore list */ - for (item = opal_list_get_first(ignore_qp_err_list); - item != opal_list_get_end(ignore_qp_err_list); - item = opal_list_get_next(item)) { - qp_item = (mca_btl_openib_qp_list *)item; - if (qp_item->qp == event.element.qp) { - BTL_VERBOSE(("QP %p is in error ignore list", - (void *)event.element.qp)); - in_ignore_list = true; - break; - } - } - if (in_ignore_list) - break; - } - - case IBV_EVENT_QP_REQ_ERR: - case IBV_EVENT_QP_ACCESS_ERR: - case IBV_EVENT_PATH_MIG_ERR: - case IBV_EVENT_SRQ_ERR: - opal_show_help("help-mpi-btl-openib.txt", "of error event", - true,opal_process_info.nodename, (int)getpid(), - event_type, - openib_event_to_str((enum ibv_event_type)event_type)); - break; - case IBV_EVENT_PORT_ERR: - opal_show_help("help-mpi-btl-openib.txt", "of error event", - true,opal_process_info.nodename, (int)getpid(), - event_type, - openib_event_to_str((enum ibv_event_type)event_type)); - /* Set the flag to indicate port error */ - device->got_port_event = true; - OPAL_THREAD_ADD32(&mca_btl_openib_component.error_counter, 1); - break; - case IBV_EVENT_COMM_EST: - case IBV_EVENT_PORT_ACTIVE: - case IBV_EVENT_SQ_DRAINED: - case IBV_EVENT_LID_CHANGE: - case IBV_EVENT_PKEY_CHANGE: - case IBV_EVENT_SM_CHANGE: - case IBV_EVENT_QP_LAST_WQE_REACHED: -#if HAVE_DECL_IBV_EVENT_CLIENT_REREGISTER - case IBV_EVENT_CLIENT_REREGISTER: + switch(event_type) { + case IBV_EVENT_PATH_MIG: + BTL_ERROR(("Alternative path migration event reported")); + if (APM_ENABLED) { + BTL_ERROR(("Trying to find additional path...")); +#if OPAL_HAVE_CONNECTX_XRC + if (xrc_event) + mca_btl_openib_load_apm_xrc_rcv(event.element.xrc_qp_num, + xrc_qp2endpoint(event.element.xrc_qp_num, device)); + else #endif - break; - /* The event is signaled when number of prepost receive WQEs is going - under predefined threshold - srq_limit */ - case IBV_EVENT_SRQ_LIMIT_REACHED: - if(OPAL_SUCCESS != - btl_openib_async_srq_limit_event(event.element.srq)) { - return OPAL_ERROR; + mca_btl_openib_load_apm(event.element.qp, + qp2endpoint(event.element.qp, device)); + } + break; + case IBV_EVENT_DEVICE_FATAL: + /* Set the flag to fatal */ + device->got_fatal_event = true; + /* It is not critical to protect the counter */ + OPAL_THREAD_ADD32(&mca_btl_openib_component.error_counter, 1); + /* fall through */ + case IBV_EVENT_CQ_ERR: + case IBV_EVENT_QP_FATAL: + if (event_type == IBV_EVENT_QP_FATAL) { + mca_btl_openib_qp_list *qp_item; + bool in_ignore_list = false; + + BTL_VERBOSE(("QP is in err state %p", (void *)event.element.qp)); + + /* look through ignore list */ + opal_mutex_lock (&ignore_qp_err_list_lock); + OPAL_LIST_FOREACH(qp_item, &ignore_qp_err_list, mca_btl_openib_qp_list) { + if (qp_item->qp == event.element.qp) { + BTL_VERBOSE(("QP %p is in error ignore list", + (void *)event.element.qp)); + in_ignore_list = true; + break; } + } + opal_mutex_unlock (&ignore_qp_err_list_lock); + if (in_ignore_list) { break; - default: - opal_show_help("help-mpi-btl-openib.txt", "of unknown event", - true,opal_process_info.nodename, (int)getpid(), - event_type); - } - ibv_ack_async_event(&event); - } else { - /* if (device == NULL), then failed to locate the device! - This should never happen... */ - BTL_ERROR(("Failed to find device with FD %d. " - "Fatal error, stoping asynch event thread", - devices_poll->async_pollfd[index].fd)); - return OPAL_ERROR; - } - return OPAL_SUCCESS; -} - -/* This Async event thread is handling all async event of - * all btls/devices in openib component - */ -static void* btl_openib_async_thread(void * async) -{ - int rc; - int i; - struct mca_btl_openib_async_poll devices_poll; - opal_list_t ignore_qp_err_list; - - OBJ_CONSTRUCT(&ignore_qp_err_list, opal_list_t); - - if (OPAL_SUCCESS != btl_openib_async_poll_init(&devices_poll)) { - BTL_ERROR(("Fatal error, stoping asynch event thread")); - pthread_exit(&return_status); - } - - while(1) { - rc = poll(devices_poll.async_pollfd, devices_poll.active_poll_size, -1); - if (rc < 0) { - if (errno != EINTR) { - BTL_ERROR(("Poll failed. Fatal error, stoping asynch event thread")); - pthread_exit(&return_status); - } else { - /* EINTR - we got interupt */ - continue; } } - for(i = 0; i < devices_poll.active_poll_size; i++) { - switch (devices_poll.async_pollfd[i].revents) { - case 0: - /* no events */ - break; - case POLLIN: -#if defined(__SVR4) && defined(__sun) - /* - * Need workaround for Solaris IB user verbs since - * "Poll on IB async fd returns POLLRDNORM revent even though it is masked out" - */ - case POLLIN | POLLRDNORM: + /* fall through */ + case IBV_EVENT_QP_REQ_ERR: + case IBV_EVENT_QP_ACCESS_ERR: + case IBV_EVENT_PATH_MIG_ERR: + case IBV_EVENT_SRQ_ERR: + opal_show_help("help-mpi-btl-openib.txt", "of error event", + true,opal_process_info.nodename, (int)getpid(), + event_type, + openib_event_to_str((enum ibv_event_type)event_type)); + break; + case IBV_EVENT_PORT_ERR: + opal_show_help("help-mpi-btl-openib.txt", "of error event", + true,opal_process_info.nodename, (int)getpid(), + event_type, + openib_event_to_str((enum ibv_event_type)event_type)); + /* Set the flag to indicate port error */ + device->got_port_event = true; + OPAL_THREAD_ADD32(&mca_btl_openib_component.error_counter, 1); + break; + case IBV_EVENT_COMM_EST: + case IBV_EVENT_PORT_ACTIVE: + case IBV_EVENT_SQ_DRAINED: + case IBV_EVENT_LID_CHANGE: + case IBV_EVENT_PKEY_CHANGE: + case IBV_EVENT_SM_CHANGE: + case IBV_EVENT_QP_LAST_WQE_REACHED: +#if HAVE_DECL_IBV_EVENT_CLIENT_REREGISTER + case IBV_EVENT_CLIENT_REREGISTER: #endif - /* Processing our event */ - if (0 == i) { - /* 0 poll we use for comunication with main thread */ - if (OPAL_SUCCESS != btl_openib_async_commandh(&devices_poll, - &ignore_qp_err_list)) { - free(devices_poll.async_pollfd); - BTL_ERROR(("Failed to process async thread process. " - "Fatal error, stoping asynch event thread")); - pthread_exit(&return_status); - } - } else { - /* We get device event */ - if (btl_openib_async_deviceh(&devices_poll, i, - &ignore_qp_err_list)) { - free(devices_poll.async_pollfd); - BTL_ERROR(("Failed to process async thread process. " - "Fatal error, stoping asynch event thread")); - pthread_exit(&return_status); - } - } - break; - default: - /* Get event other than POLLIN - * this case should not never happend */ - BTL_ERROR(("Got unexpected event %d. " - "Fatal error, stoping asynch event thread", - devices_poll.async_pollfd[i].revents)); - free(devices_poll.async_pollfd); - pthread_exit(&return_status); - } - } - } - return PTHREAD_CANCELED; -} + break; + /* The event is signaled when number of prepost receive WQEs is going + under predefined threshold - srq_limit */ + case IBV_EVENT_SRQ_LIMIT_REACHED: + (void) btl_openib_async_srq_limit_event (event.element.srq); -int btl_openib_async_command_done(int exp) -{ - int comp; - if (read(mca_btl_openib_component.async_comp_pipe[0], &comp, - sizeof(int)) < (int) sizeof (int)){ - BTL_ERROR(("Failed to read from pipe")); - return OPAL_ERROR; - } - if (exp != comp){ - BTL_ERROR(("Get wrong completion on async command. Waiting for %d and got %d", - exp, comp)); - return OPAL_ERROR; + break; + default: + opal_show_help("help-mpi-btl-openib.txt", "of unknown event", + true,opal_process_info.nodename, (int)getpid(), + event_type); } - return OPAL_SUCCESS; + + ibv_ack_async_event(&event); } static void apm_update_attr(struct ibv_qp_attr *attr, enum ibv_qp_attr_mask *mask) @@ -653,7 +404,7 @@ void mca_btl_openib_load_apm(struct ibv_qp *qp, mca_btl_openib_endpoint_t *ep) qp->qp_num, strerror(errno), errno)); } -#if HAVE_XRC && ! OPAL_HAVE_CONNECTX_XRC_DOMAINS +#if OPAL_HAVE_CONNECTX_XRC void mca_btl_openib_load_apm_xrc_rcv(uint32_t qp_num, mca_btl_openib_endpoint_t *ep) { struct ibv_qp_init_attr qp_init_attr; @@ -688,34 +439,70 @@ void mca_btl_openib_load_apm_xrc_rcv(uint32_t qp_num, mca_btl_openib_endpoint_t } #endif -int start_async_event_thread(void) +int mca_btl_openib_async_init (void) { - if (0 != mca_btl_openib_component.async_thread) { + if (!mca_btl_openib_component.use_async_event_thread || + mca_btl_openib_component.async_evbase) { return OPAL_SUCCESS; } + mca_btl_openib_component.async_evbase = opal_progress_thread_init (NULL); + + OBJ_CONSTRUCT(&ignore_qp_err_list, opal_list_t); + OBJ_CONSTRUCT(&ignore_qp_err_list_lock, opal_mutex_t); + /* Set the error counter to zero */ mca_btl_openib_component.error_counter = 0; - /* Create pipe for communication with async event thread */ - if (pipe(mca_btl_openib_component.async_pipe)) { - BTL_ERROR(("Failed to create pipe for communication with " - "async event thread")); - return OPAL_ERROR; + return OPAL_SUCCESS; +} + +void mca_btl_openib_async_fini (void) +{ + if (mca_btl_openib_component.async_evbase) { + OPAL_LIST_DESTRUCT(&ignore_qp_err_list); + OBJ_DESTRUCT(&ignore_qp_err_list_lock); + opal_progress_thread_finalize (NULL); + mca_btl_openib_component.async_evbase = NULL; } +} - if (pipe(mca_btl_openib_component.async_comp_pipe)) { - BTL_ERROR(("Failed to create comp pipe for communication with " - "main thread")); - return OPAL_ERROR; +void mca_btl_openib_async_add_device (mca_btl_openib_device_t *device) +{ + if (mca_btl_openib_component.async_evbase) { + if (1 == OPAL_THREAD_ADD32 (&btl_openib_async_device_count, 1)) { + mca_btl_openib_async_init (); + } + opal_event_set (mca_btl_openib_component.async_evbase, &device->async_event, + device->ib_dev_context->async_fd, OPAL_EV_READ | OPAL_EV_PERSIST, + btl_openib_async_device, device); + opal_event_add (&device->async_event, 0); } +} - /* Starting async event thread for the component */ - if (pthread_create(&mca_btl_openib_component.async_thread, NULL, - (void*(*)(void*)) btl_openib_async_thread, NULL)) { - BTL_ERROR(("Failed to create async event thread")); - return OPAL_ERROR; +void mca_btl_openib_async_rem_device (mca_btl_openib_device_t *device) +{ + if (mca_btl_openib_component.async_evbase) { + opal_event_del (&device->async_event); + if (0 == OPAL_THREAD_ADD32 (&btl_openib_async_device_count, -1)) { + mca_btl_openib_async_fini (); + } } +} - return OPAL_SUCCESS; +void mca_btl_openib_async_add_qp_ignore (struct ibv_qp *qp) +{ + if (mca_btl_openib_component.async_evbase) { + mca_btl_openib_qp_list *new_qp = OBJ_NEW(mca_btl_openib_qp_list); + if (OPAL_UNLIKELY(NULL == new_qp)) { + /* can allocate a small object. not much more can be done */ + return; + } + + BTL_VERBOSE(("Ignoring errors on QP %p", (void *) qp)); + new_qp->qp = qp; + opal_mutex_lock (&ignore_qp_err_list_lock); + opal_list_append (&ignore_qp_err_list, (opal_list_item_t *) new_qp); + opal_mutex_unlock (&ignore_qp_err_list_lock); + } } diff --git a/opal/mca/btl/openib/btl_openib_async.h b/opal/mca/btl/openib/btl_openib_async.h index c9243821d38..b62fdbec3fb 100644 --- a/opal/mca/btl/openib/btl_openib_async.h +++ b/opal/mca/btl/openib/btl_openib_async.h @@ -3,6 +3,8 @@ * Copyright (c) 2014 Bull SAS. All rights reserved. * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. + * Copyright (c) 2015 Los Alamos National Security, LLC. All rights + * received. * $COPYRIGHT$ * * Additional copyrights may follow @@ -16,29 +18,42 @@ #define MCA_BTL_OPENIB_ASYNC_H #include "btl_openib_endpoint.h" -int start_async_event_thread(void); void mca_btl_openib_load_apm(struct ibv_qp *qp, mca_btl_openib_endpoint_t *ep); -int btl_openib_async_command_done(int exp); -#if HAVE_XRC && ! OPAL_HAVE_CONNECTX_XRC_DOMAINS +#if OPAL_HAVE_CONNECTX_XRC void mca_btl_openib_load_apm_xrc_rcv(uint32_t qp_num, mca_btl_openib_endpoint_t *ep); #endif #define APM_ENABLED (0 != mca_btl_openib_component.apm_lmc || 0 != mca_btl_openib_component.apm_ports) -/* - * Command types for communicating with the async thread +/** + * Initialize the async event base + */ +int mca_btl_openib_async_init (void); + +/** + * Finalize the async event base + */ +void mca_btl_openib_async_fini (void); + +/** + * Register a device with the async event base + * + * @param[in] device device to register + */ +void mca_btl_openib_async_add_device (mca_btl_openib_device_t *device); + +/** + * Deregister a device with the async event base + * + * @param[in] device device to deregister + */ +void mca_btl_openib_async_rem_device (mca_btl_openib_device_t *device); + +/** + * Ignore error events on a queue pair + * + * @param[in] qp queue pair to ignore */ -typedef enum { - OPENIB_ASYNC_CMD_FD_ADD, - OPENIB_ASYNC_CMD_FD_REMOVE, - OPENIB_ASYNC_IGNORE_QP_ERR, - OPENIB_ASYNC_THREAD_EXIT -} btl_openib_async_cmd_type_t; - -typedef struct { - btl_openib_async_cmd_type_t a_cmd; - int fd; - struct ibv_qp *qp; -} mca_btl_openib_async_cmd_t; +void mca_btl_openib_async_add_qp_ignore (struct ibv_qp *qp); #endif diff --git a/opal/mca/btl/openib/btl_openib_atomic.c b/opal/mca/btl/openib/btl_openib_atomic.c index f96ce4c681b..ec0eb644f1a 100644 --- a/opal/mca/btl/openib/btl_openib_atomic.c +++ b/opal/mca/btl/openib/btl_openib_atomic.c @@ -1,6 +1,6 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ /* - * Copyright (c) 2014 Los Alamos National Security, LLC. All rights + * Copyright (c) 2014-2016 Los Alamos National Security, LLC. All rights * reserved. * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. @@ -22,11 +22,12 @@ static int mca_btl_openib_atomic_internal (struct mca_btl_base_module_t *btl, struct mca_btl_base_endpoint_t *endpoint, void *local_address, uint64_t remote_address, mca_btl_base_registration_handle_t *local_handle, mca_btl_base_registration_handle_t *remote_handle, enum ibv_wr_opcode opcode, - int64_t operand, int operand2, int flags, int order, mca_btl_base_rdma_completion_fn_t cbfunc, + int64_t operand, int64_t operand2, int flags, int order, mca_btl_base_rdma_completion_fn_t cbfunc, void *cbcontext, void *cbdata) { mca_btl_openib_get_frag_t* frag = NULL; int qp = order; + int32_t rkey; int rc; frag = to_get_frag(alloc_recv_user_frag()); @@ -61,22 +62,19 @@ static int mca_btl_openib_atomic_internal (struct mca_btl_base_module_t *btl, st frag->sr_desc.wr.atomic.compare_add = operand; frag->sr_desc.wr.atomic.swap = operand2; + rkey = remote_handle->rkey; + #if OPAL_ENABLE_HETEROGENEOUS_SUPPORT if((endpoint->endpoint_proc->proc_opal->proc_arch & OPAL_ARCH_ISBIGENDIAN) != (opal_proc_local_get()->proc_arch & OPAL_ARCH_ISBIGENDIAN)) { - frag->sr_desc.wr.atomic.rkey = opal_swap_bytes4 (remote_handle->rkey); - } else -#endif - { - frag->sr_desc.wr.atomic.rkey = remote_handle->rkey; - } - -#if HAVE_XRC - if (MCA_BTL_XRC_ENABLED && BTL_OPENIB_QP_TYPE_XRC(qp)) { - frag->sr_desc.xrc_remote_srq_num=endpoint->rem_info.rem_srqs[qp].rem_srq_num; + rkey = opal_swap_bytes4 (rkey); } #endif + frag->sr_desc.wr.atomic.rkey = rkey; + + /* NTH: the SRQ# is set in mca_btl_get_internal */ + if (endpoint->endpoint_state != MCA_BTL_IB_CONNECTED) { OPAL_THREAD_LOCK(&endpoint->endpoint_lock); rc = check_endpoint_state(endpoint, &to_base_frag(frag)->base, &endpoint->pending_get_frags); @@ -114,7 +112,7 @@ int mca_btl_openib_atomic_fop (struct mca_btl_base_module_t *btl, struct mca_btl void *cbcontext, void *cbdata) { - if (OPAL_UNLIKELY(MCA_BTL_ATOMIC_ADD != op)) { + if (OPAL_UNLIKELY(MCA_BTL_ATOMIC_ADD != op || (MCA_BTL_ATOMIC_FLAG_32BIT & flags))) { return OPAL_ERR_NOT_SUPPORTED; } @@ -130,6 +128,10 @@ int mca_btl_openib_atomic_cswap (struct mca_btl_base_module_t *btl, struct mca_b uint64_t value, int flags, int order, mca_btl_base_rdma_completion_fn_t cbfunc, void *cbcontext, void *cbdata) { + if (OPAL_UNLIKELY(MCA_BTL_ATOMIC_FLAG_32BIT & flags)) { + return OPAL_ERR_NOT_SUPPORTED; + } + return mca_btl_openib_atomic_internal (btl, endpoint, local_address, remote_address, local_handle, remote_handle, IBV_WR_ATOMIC_CMP_AND_SWP, compare, value, flags, order, cbfunc, cbcontext, cbdata); diff --git a/opal/mca/btl/openib/btl_openib_component.c b/opal/mca/btl/openib/btl_openib_component.c index 56beb0b9c4b..44792a13ab8 100644 --- a/opal/mca/btl/openib/btl_openib_component.c +++ b/opal/mca/btl/openib/btl_openib_component.c @@ -10,16 +10,16 @@ * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. - * Copyright (c) 2006-2015 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2006-2009 Mellanox Technologies. All rights reserved. + * Copyright (c) 2006-2017 Cisco Systems, Inc. All rights reserved + * Copyright (c) 2006-2015 Mellanox Technologies. All rights reserved. * Copyright (c) 2006-2015 Los Alamos National Security, LLC. All rights * reserved. * Copyright (c) 2006-2007 Voltaire All rights reserved. * Copyright (c) 2009-2012 Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2011-2014 NVIDIA Corporation. All rights reserved. + * Copyright (c) 2011-2015 NVIDIA Corporation. All rights reserved. * Copyright (c) 2012 Oak Ridge National Laboratory. All rights reserved - * Copyright (c) 2013-2014 Intel, Inc. All rights reserved - * Copyright (c) 2014-2015 Research Organization for Information Science + * Copyright (c) 2013-2015 Intel, Inc. All rights reserved + * Copyright (c) 2014-2016 Research Organization for Information Science * and Technology (RIST). All rights reserved. * Copyright (c) 2014 Bull SAS. All rights reserved. * $COPYRIGHT$ @@ -42,22 +42,8 @@ #include #include #include -#if BTL_OPENIB_MALLOC_HOOKS_ENABLED -/* - * The include of malloc.h below breaks abstractions in OMPI (by - * directly including a header file from another component), but has - * been ruled "ok" because the openib component is only supported on - * Linux. - * - * The malloc hooks in newer glibc were deprecated, including stock - * malloc.h causes compilation warnings. Instead, we use the internal - * linux component malloc.h which does not cause these warnings. - * Internally, OMPI uses the built-in ptmalloc from the linux memory - * component anyway. - */ -#include "opal/mca/memory/linux/malloc.h" -#endif +#include "opal/mca/memory/memory.h" #include "opal/mca/event/event.h" #include "opal/align.h" #include "opal/util/output.h" @@ -73,7 +59,7 @@ know its exact path. We have to rely on the framework header files to find the right hwloc verbs helper file for us. */ #define OPAL_HWLOC_WANT_VERBS_HELPER 1 -#include "opal/mca/hwloc/hwloc.h" +#include "opal/mca/hwloc/hwloc-internal.h" #include "opal/mca/hwloc/base/base.h" #include "opal/mca/installdirs/installdirs.h" #include "opal_stdint.h" @@ -81,7 +67,8 @@ #include "opal/mca/btl/btl.h" #include "opal/mca/btl/base/base.h" #include "opal/mca/mpool/base/base.h" -#include "opal/mca/mpool/grdma/mpool_grdma.h" +#include "opal/mca/rcache/rcache.h" +#include "opal/mca/rcache/base/base.h" #include "opal/mca/common/cuda/common_cuda.h" #include "opal/mca/common/verbs/common_verbs.h" #include "opal/runtime/opal_params.h" @@ -97,10 +84,6 @@ #include "btl_openib_ini.h" #include "btl_openib_mca.h" #include "btl_openib_xrc.h" -#include "btl_openib_fd.h" -#if BTL_OPENIB_FAILOVER_ENABLED -#include "btl_openib_failover.h" -#endif #include "btl_openib_async.h" #include "connect/base.h" #include "btl_openib_ip.h" @@ -124,7 +107,6 @@ static void btl_openib_handle_incoming_completion(mca_btl_base_module_t* btl, * Local variables */ static mca_btl_openib_device_t *receive_queues_device = NULL; -static bool malloc_hook_set = false; static int num_devices_intentionally_ignored = 0; mca_btl_openib_component_t mca_btl_openib_component = { @@ -148,30 +130,6 @@ mca_btl_openib_component_t mca_btl_openib_component = { } }; -#if BTL_OPENIB_MALLOC_HOOKS_ENABLED -/* This is a memory allocator hook. The purpose of this is to make - * every malloc aligned since this speeds up IB HCA work. - * There two basic cases here: - * - * 1. Memory manager for Open MPI is enabled. Then memalign below will - * be overridden by __memalign_hook which is set to - * opal_memory_linux_memalign_hook. Thus, _malloc_hook is going to - * use opal_memory_linux_memalign_hook. - * - * 2. No memory manager support. The memalign below is just regular glibc - * memalign which will be called through __malloc_hook instead of malloc. - */ -static void *btl_openib_malloc_hook(size_t sz, const void* caller) -{ - if (sz < mca_btl_openib_component.memalign_threshold && - malloc_hook_set) { - return mca_btl_openib_component.previous_malloc_hook(sz, caller); - } else { - return memalign(mca_btl_openib_component.use_memalign, sz); - } -} -#endif - static int btl_openib_component_register(void) { int ret; @@ -187,7 +145,7 @@ static int btl_openib_component_register(void) mca_btl_openib_module.super.btl_eager_limit; /* if_include and if_exclude need to be mutually exclusive */ - if (OPAL_SUCCESS != + if (OPAL_SUCCESS != mca_base_var_check_exclusive("ompi", mca_btl_openib_component.super.btl_version.mca_type_name, mca_btl_openib_component.super.btl_version.mca_component_name, @@ -200,6 +158,10 @@ static int btl_openib_component_register(void) return OPAL_ERR_NOT_AVAILABLE; } +#if OPAL_CUDA_SUPPORT + mca_common_cuda_register_mca_variables(); +#endif + return OPAL_SUCCESS; } @@ -217,6 +179,7 @@ static int btl_openib_component_open(void) /* initialize state */ mca_btl_openib_component.ib_num_btls = 0; + mca_btl_openib_component.num_default_gid_btls = 0; mca_btl_openib_component.openib_btls = NULL; OBJ_CONSTRUCT(&mca_btl_openib_component.devices, opal_pointer_array_t); mca_btl_openib_component.devices_count = 0; @@ -241,47 +204,18 @@ static int btl_openib_component_close(void) { int rc = OPAL_SUCCESS; - /* Tell the async thread to shutdown */ - if (mca_btl_openib_component.use_async_event_thread && - 0 != mca_btl_openib_component.async_thread) { - mca_btl_openib_async_cmd_t async_command = {.a_cmd = OPENIB_ASYNC_THREAD_EXIT, - .fd = -1, .qp = NULL}; - if (write(mca_btl_openib_component.async_pipe[1], &async_command, - sizeof(mca_btl_openib_async_cmd_t)) < 0) { - BTL_ERROR(("Failed to communicate with async event thread")); - rc = OPAL_ERROR; - } else { - if (pthread_join(mca_btl_openib_component.async_thread, NULL)) { - BTL_ERROR(("Failed to stop OpenIB async event thread")); - rc = OPAL_ERROR; - } - } - close(mca_btl_openib_component.async_pipe[0]); - close(mca_btl_openib_component.async_pipe[1]); - close(mca_btl_openib_component.async_comp_pipe[0]); - close(mca_btl_openib_component.async_comp_pipe[1]); - } + /* remove the async event from the event base */ + mca_btl_openib_async_fini (); OBJ_DESTRUCT(&mca_btl_openib_component.srq_manager.lock); OBJ_DESTRUCT(&mca_btl_openib_component.srq_manager.srq_addr_table); opal_btl_openib_connect_base_finalize(); - opal_btl_openib_fd_finalize(); opal_btl_openib_ini_finalize(); if (NULL != mca_btl_openib_component.default_recv_qps) { free(mca_btl_openib_component.default_recv_qps); } - -#if BTL_OPENIB_MALLOC_HOOKS_ENABLED - /* Must check to see whether the malloc hook was set before - assigning it back because ompi_info will call _register() and - then _close() (which won't set the hook) */ - if (malloc_hook_set) { - __malloc_hook = mca_btl_openib_component.previous_malloc_hook; - malloc_hook_set = false; - } -#endif /* close memory registration debugging output */ opal_output_close (mca_btl_openib_component.memory_registration_verbose); @@ -447,7 +381,7 @@ static int btl_openib_modex_send(void) } /* All done -- send it! */ - OPAL_MODEX_SEND(rc, PMIX_SYNC_REQD, PMIX_GLOBAL, + OPAL_MODEX_SEND(rc, OPAL_PMIX_GLOBAL, &mca_btl_openib_component.super.btl_version, message, msg_size); free(message); @@ -565,28 +499,35 @@ static void btl_openib_control(mca_btl_base_module_t* btl, mca_btl_openib_endpoint_connected(ep); } break; -#if BTL_OPENIB_FAILOVER_ENABLED - case MCA_BTL_OPENIB_CONTROL_EP_BROKEN: - case MCA_BTL_OPENIB_CONTROL_EP_EAGER_RDMA_ERROR: - btl_openib_handle_failover_control_messages(ctl_hdr, ep); - break; -#endif default: BTL_ERROR(("Unknown message type received by BTL")); break; } } -static int openib_reg_mr(void *reg_data, void *base, size_t size, - mca_mpool_base_registration_t *reg) +static int openib_reg_mr (void *reg_data, void *base, size_t size, + mca_rcache_base_registration_t *reg) { mca_btl_openib_device_t *device = (mca_btl_openib_device_t*)reg_data; mca_btl_openib_reg_t *openib_reg = (mca_btl_openib_reg_t*)reg; - enum ibv_access_flags access_flag = (enum ibv_access_flags) (IBV_ACCESS_LOCAL_WRITE | - IBV_ACCESS_REMOTE_WRITE | IBV_ACCESS_REMOTE_READ); + enum ibv_access_flags access_flag = 0; + + if (reg->access_flags & MCA_RCACHE_ACCESS_REMOTE_READ) { + access_flag |= IBV_ACCESS_REMOTE_READ; + } + + if (reg->access_flags & MCA_RCACHE_ACCESS_REMOTE_WRITE) { + access_flag |= IBV_ACCESS_REMOTE_WRITE | IBV_ACCESS_LOCAL_WRITE; + } + + if (reg->access_flags & MCA_RCACHE_ACCESS_LOCAL_WRITE) { + access_flag |= IBV_ACCESS_LOCAL_WRITE; + } #if HAVE_DECL_IBV_ATOMIC_HCA - access_flag |= IBV_ACCESS_REMOTE_ATOMIC; + if (reg->access_flags & MCA_RCACHE_ACCESS_REMOTE_ATOMIC) { + access_flag |= IBV_ACCESS_REMOTE_ATOMIC | IBV_ACCESS_LOCAL_WRITE; + } #endif if (device->mem_reg_max && @@ -597,7 +538,7 @@ static int openib_reg_mr(void *reg_data, void *base, size_t size, device->mem_reg_active += size; #if HAVE_DECL_IBV_ACCESS_SO - if (reg->flags & MCA_MPOOL_FLAGS_SO_MEM) { + if (reg->flags & MCA_RCACHE_FLAGS_SO_MEM) { access_flag |= IBV_ACCESS_SO; } #endif @@ -619,16 +560,16 @@ static int openib_reg_mr(void *reg_data, void *base, size_t size, (int) (reg->bound - reg->base + 1), reg->flags)); #if OPAL_CUDA_SUPPORT - if (reg->flags & MCA_MPOOL_FLAGS_CUDA_REGISTER_MEM) { - mca_common_cuda_register(base, size, - openib_reg->base.mpool->mpool_component->mpool_version.mca_component_name); + if (reg->flags & MCA_RCACHE_FLAGS_CUDA_REGISTER_MEM) { + mca_common_cuda_register (base, size, + openib_reg->base.rcache->rcache_component->rcache_version.mca_component_name); } #endif return OPAL_SUCCESS; } -static int openib_dereg_mr(void *reg_data, mca_mpool_base_registration_t *reg) +static int openib_dereg_mr(void *reg_data, mca_rcache_base_registration_t *reg) { mca_btl_openib_device_t *device = (mca_btl_openib_device_t*)reg_data; mca_btl_openib_reg_t *openib_reg = (mca_btl_openib_reg_t*)reg; @@ -645,9 +586,9 @@ static int openib_dereg_mr(void *reg_data, mca_mpool_base_registration_t *reg) } #if OPAL_CUDA_SUPPORT - if (reg->flags & MCA_MPOOL_FLAGS_CUDA_REGISTER_MEM) { + if (reg->flags & MCA_RCACHE_FLAGS_CUDA_REGISTER_MEM) { mca_common_cuda_unregister(openib_reg->base.base, - openib_reg->base.mpool->mpool_component->mpool_version.mca_component_name); + openib_reg->base.rcache->rcache_component->rcache_version.mca_component_name); } #endif @@ -739,13 +680,17 @@ static int init_one_port(opal_list_t *btl_list, mca_btl_openib_device_t *device, ibv_get_device_name(device->ib_dev), port_num, subnet_id)); #endif - if(mca_btl_openib_component.ib_num_btls > 0 && + if(mca_btl_openib_component.num_default_gid_btls > 0 && IB_DEFAULT_GID_PREFIX == subnet_id && mca_btl_openib_component.warn_default_gid_prefix) { opal_show_help("help-mpi-btl-openib.txt", "default subnet prefix", true, opal_process_info.nodename); } + if (IB_DEFAULT_GID_PREFIX == subnet_id) { + mca_btl_openib_component.num_default_gid_btls++; + } + lmc = (1 << ib_port_attr->lmc); lmc_step = 1; @@ -825,13 +770,41 @@ static int init_one_port(opal_list_t *btl_list, mca_btl_openib_device_t *device, openib_btl->super.btl_get_local_registration_threshold = 0; #if HAVE_DECL_IBV_ATOMIC_HCA - if (openib_btl->device->ib_dev_attr.atomic_cap == IBV_ATOMIC_NONE) { + openib_btl->atomic_ops_be = false; + +#ifdef HAVE_STRUCT_IBV_EXP_DEVICE_ATTR_EXT_ATOM + /* check that 8-byte atomics are supported */ + if (!(device->ib_exp_dev_attr.ext_atom.log_atomic_arg_sizes & (1<<3ull))) { openib_btl->super.btl_flags &= ~MCA_BTL_FLAGS_ATOMIC_FOPS; openib_btl->super.btl_atomic_flags = 0; openib_btl->super.btl_atomic_fop = NULL; openib_btl->super.btl_atomic_cswap = NULL; - } else if (IBV_ATOMIC_GLOB == openib_btl->device->ib_dev_attr.atomic_cap) { + } +#endif + +#ifdef HAVE_STRUCT_IBV_EXP_DEVICE_ATTR_EXP_ATOMIC_CAP + switch (openib_btl->device->ib_exp_dev_attr.exp_atomic_cap) +#else + switch (openib_btl->device->ib_dev_attr.atomic_cap) +#endif + { + case IBV_ATOMIC_GLOB: openib_btl->super.btl_flags |= MCA_BTL_ATOMIC_SUPPORTS_GLOB; + break; +#if HAVE_DECL_IBV_EXP_ATOMIC_HCA_REPLY_BE + case IBV_EXP_ATOMIC_HCA_REPLY_BE: + openib_btl->atomic_ops_be = true; + break; +#endif + case IBV_ATOMIC_HCA: + break; + case IBV_ATOMIC_NONE: + default: + /* no atomics or an unsupported atomic type */ + openib_btl->super.btl_flags &= ~MCA_BTL_FLAGS_ATOMIC_FOPS; + openib_btl->super.btl_atomic_flags = 0; + openib_btl->super.btl_atomic_fop = NULL; + openib_btl->super.btl_atomic_cswap = NULL; } #endif @@ -902,7 +875,8 @@ static void device_construct(mca_btl_openib_device_t *device) device->ib_dev_context = NULL; device->ib_pd = NULL; device->mpool = NULL; -#if OPAL_ENABLE_PROGRESS_THREADS + device->rcache = NULL; +#if OPAL_ENABLE_PROGRESS_THREADS == 1 device->ib_channel = NULL; #endif device->btls = 0; @@ -922,10 +896,6 @@ static void device_construct(mca_btl_openib_device_t *device) device->xrc_fd = -1; #endif device->qps = NULL; - mca_btl_openib_component.async_pipe[0] = - mca_btl_openib_component.async_pipe[1] = -1; - mca_btl_openib_component.async_comp_pipe[0] = - mca_btl_openib_component.async_comp_pipe[1] = -1; OBJ_CONSTRUCT(&device->device_lock, opal_mutex_t); OBJ_CONSTRUCT(&device->send_free_control, opal_free_list_t); device->max_inline_data = 0; @@ -936,8 +906,8 @@ static void device_destruct(mca_btl_openib_device_t *device) { int i; -#if OPAL_ENABLE_PROGRESS_THREADS - if(device->progress) { +#if OPAL_ENABLE_PROGRESS_THREADS == 1 + if (device->progress) { device->progress = false; if (pthread_cancel(device->thread.t_handle)) { BTL_ERROR(("Failed to cancel OpenIB progress thread")); @@ -945,27 +915,15 @@ static void device_destruct(mca_btl_openib_device_t *device) } opal_thread_join(&device->thread, NULL); } + if (ibv_destroy_comp_channel(device->ib_channel)) { BTL_VERBOSE(("Failed to close comp_channel")); goto device_error; } #endif + /* signaling to async_tread to stop poll for this device */ - if (mca_btl_openib_component.use_async_event_thread && - -1 != mca_btl_openib_component.async_pipe[1]) { - mca_btl_openib_async_cmd_t async_command = {.a_cmd = OPENIB_ASYNC_CMD_FD_REMOVE, - .fd = device->ib_dev_context->async_fd, - .qp = NULL}; - if (write(mca_btl_openib_component.async_pipe[1], &async_command, - sizeof(mca_btl_openib_async_cmd_t)) < 0){ - BTL_ERROR(("Failed to write to pipe")); - goto device_error; - } - /* wait for ok from thread */ - if (OPAL_SUCCESS != btl_openib_async_command_done(device->ib_dev_context->async_fd)){ - goto device_error; - } - } + mca_btl_openib_async_rem_device (device); if(device->eager_rdma_buffers) { int i; @@ -1000,17 +958,13 @@ static void device_destruct(mca_btl_openib_device_t *device) } } - if (OPAL_SUCCESS != mca_mpool_base_module_destroy(device->mpool)) { - BTL_VERBOSE(("Failed to release mpool")); + if (OPAL_SUCCESS != mca_rcache_base_module_destroy (device->rcache)) { + BTL_VERBOSE(("failed to release registration cache")); goto device_error; } #if HAVE_XRC - if (!mca_btl_openib_xrc_check_api()) { - return; - } - if (MCA_BTL_XRC_ENABLED) { if (OPAL_SUCCESS != mca_btl_openib_close_xrc_domain(device)) { BTL_VERBOSE(("XRC Internal error. Failed to close xrc domain")); @@ -1222,6 +1176,16 @@ static void init_apm_port(mca_btl_openib_device_t *device, int port, uint16_t li } } +static int get_var_source (const char *var_name, mca_base_var_source_t *source) +{ + int vari = mca_base_var_find ("opal", "btl", "openib", var_name); + if (0 > vari) { + return vari; + } + + return mca_base_var_get_value (vari, NULL, source, NULL); +} + static int setup_qps(void) { char **queues, **params = NULL; @@ -1430,6 +1394,33 @@ static int setup_qps(void) mca_btl_openib_module.super.btl_max_send_size) ? mca_btl_openib_module.super.btl_eager_limit : mca_btl_openib_module.super.btl_max_send_size; + + if (max_qp_size < max_size_needed) { + mca_base_var_source_t eager_source = MCA_BASE_VAR_SOURCE_DEFAULT; + mca_base_var_source_t max_send_source = MCA_BASE_VAR_SOURCE_DEFAULT; + + (void) get_var_source ("max_send_size", &max_send_source); + (void) get_var_source ("eager_limit", &eager_source); + + /* the largest queue pair is too small for either the max send size or eager + * limit. check where we got the max_send_size and eager_limit and adjust if + * the user did not specify one or the other. */ + if (mca_btl_openib_module.super.btl_eager_limit > max_qp_size && + MCA_BASE_VAR_SOURCE_DEFAULT == eager_source) { + mca_btl_openib_module.super.btl_eager_limit = max_qp_size; + } + + if (mca_btl_openib_module.super.btl_max_send_size > max_qp_size && + MCA_BASE_VAR_SOURCE_DEFAULT == max_send_source) { + mca_btl_openib_module.super.btl_max_send_size = max_qp_size; + } + + max_size_needed = (mca_btl_openib_module.super.btl_eager_limit > + mca_btl_openib_module.super.btl_max_send_size) ? + mca_btl_openib_module.super.btl_eager_limit : + mca_btl_openib_module.super.btl_max_send_size; + } + if (max_qp_size < max_size_needed) { opal_show_help("help-mpi-btl-openib.txt", "biggest qp size is too small", true, @@ -1508,7 +1499,6 @@ static uint64_t read_module_param(char *file, uint64_t value, uint64_t max) /* calculate memory registation limits */ static uint64_t calculate_total_mem (void) { -#if OPAL_HAVE_HWLOC hwloc_obj_t machine; machine = hwloc_get_next_obj_by_type (opal_hwloc_topology, HWLOC_OBJ_MACHINE, NULL); @@ -1517,9 +1507,6 @@ static uint64_t calculate_total_mem (void) } return machine->memory.total_memory; -#else - return 0; -#endif } @@ -1601,7 +1588,7 @@ static uint64_t calculate_max_reg (const char *device_name) static int init_one_device(opal_list_t *btl_list, struct ibv_device* ib_dev) { - struct mca_mpool_base_resources_t mpool_resources; + mca_rcache_base_resources_t rcache_resources; mca_btl_openib_device_t *device; uint8_t i, k = 0; int ret = -1, port_cnt; @@ -1617,7 +1604,7 @@ static int init_one_device(opal_list_t *btl_list, struct ibv_device* ib_dev) } /* Find out if this device supports RC QPs */ - if (OPAL_SUCCESS != opal_common_verbs_qp_test(dev_context, + if (OPAL_SUCCESS != opal_common_verbs_qp_test(dev_context, OPAL_COMMON_VERBS_FLAGS_RC)) { ibv_close_device(dev_context); BTL_VERBOSE(("openib: RC QPs not supported -- skipping %s", @@ -1634,7 +1621,8 @@ static int init_one_device(opal_list_t *btl_list, struct ibv_device* ib_dev) } device->mem_reg_active = 0; - device->mem_reg_max = calculate_max_reg(ibv_get_device_name(ib_dev)); + device->mem_reg_max_total = calculate_max_reg(ibv_get_device_name(ib_dev)); + device->mem_reg_max = device->mem_reg_max_total; if(( 0 == device->mem_reg_max) && mca_btl_openib_component.abort_not_enough_reg_mem) { return OPAL_ERROR; } @@ -1653,7 +1641,14 @@ static int init_one_device(opal_list_t *btl_list, struct ibv_device* ib_dev) ibv_get_device_name(device->ib_dev), strerror(errno))); goto error; } - +#if HAVE_DECL_IBV_EXP_QUERY_DEVICE + device->ib_exp_dev_attr.comp_mask = IBV_EXP_DEVICE_ATTR_RESERVED - 1; + if(ibv_exp_query_device(device->ib_dev_context, &device->ib_exp_dev_attr)){ + BTL_ERROR(("error obtaining device attributes for %s errno says %s", + ibv_get_device_name(device->ib_dev), strerror(errno))); + goto error; + } +#endif if(ibv_query_device(device->ib_dev_context, &device->ib_dev_attr)){ BTL_ERROR(("error obtaining device attributes for %s errno says %s", ibv_get_device_name(device->ib_dev), strerror(errno))); @@ -1812,20 +1807,25 @@ static int init_one_device(opal_list_t *btl_list, struct ibv_device* ib_dev) "eager RDMA and progress threads", true); } - asprintf (&mpool_resources.pool_name, "verbs.%" PRIu64, device->ib_dev_attr.node_guid); - mpool_resources.reg_data = (void*)device; - mpool_resources.sizeof_reg = sizeof(mca_btl_openib_reg_t); - mpool_resources.register_mem = openib_reg_mr; - mpool_resources.deregister_mem = openib_dereg_mr; - device->mpool = - mca_mpool_base_module_create(mca_btl_openib_component.ib_mpool_name, - device, &mpool_resources); - if(NULL == device->mpool){ + asprintf (&rcache_resources.cache_name, "verbs.%" PRIu64, device->ib_dev_attr.node_guid); + rcache_resources.reg_data = (void*)device; + rcache_resources.sizeof_reg = sizeof(mca_btl_openib_reg_t); + rcache_resources.register_mem = openib_reg_mr; + rcache_resources.deregister_mem = openib_dereg_mr; + device->rcache = + mca_rcache_base_module_create (mca_btl_openib_component.ib_rcache_name, + device, &rcache_resources); + if (NULL == device->rcache) { /* Don't print an error message here -- we'll get one from mpool_create anyway */ goto error; } + device->mpool = mca_mpool_base_module_lookup (mca_btl_openib_component.ib_mpool_hints); + if (NULL == device->mpool) { + goto error; + } + #if OPAL_ENABLE_PROGRESS_THREADS device->ib_channel = ibv_create_comp_channel(device->ib_dev_context); if (NULL == device->ib_channel) { @@ -2123,18 +2123,15 @@ static int init_one_device(opal_list_t *btl_list, struct ibv_device* ib_dev) { /* we need to read this MCA param at this point in case someone * altered it via MPI_T */ - int index; mca_base_var_source_t source; - index = mca_base_var_find("opal","btl","openib","receive_queues"); - if (index >= 0) { - if (OPAL_SUCCESS != (ret = mca_base_var_get_value(index, NULL, &source, NULL))) { - BTL_ERROR(("mca_base_var_get_value failed to get value for receive_queues: %s:%d", - __FILE__, __LINE__)); - goto error; - } else { - mca_btl_openib_component.receive_queues_source = source; - } + + if (OPAL_SUCCESS != (ret = get_var_source ("receive_queues", &source))) { + BTL_ERROR(("mca_base_var_get_value failed to get value for receive_queues: %s:%d", + __FILE__, __LINE__)); + goto error; } + + mca_btl_openib_component.receive_queues_source = source; } /* If the MCA param was specified, skip all the checks */ @@ -2219,19 +2216,6 @@ static int init_one_device(opal_list_t *btl_list, struct ibv_device* ib_dev) } error: -#if OPAL_ENABLE_PROGRESS_THREADS - if (device->ib_channel) { - ibv_destroy_comp_channel(device->ib_channel); - } -#endif - if (device->mpool) { - mca_mpool_base_module_destroy(device->mpool); - } - - if (device->ib_pd) { - ibv_dealloc_pd(device->ib_pd); - } - if (OPAL_SUCCESS != ret) { opal_show_help("help-mpi-btl-openib.txt", "error in device init", true, @@ -2239,9 +2223,6 @@ static int init_one_device(opal_list_t *btl_list, struct ibv_device* ib_dev) ibv_get_device_name(device->ib_dev)); } - if (device->ib_dev_context) { - ibv_close_device(device->ib_dev_context); - } if (NULL != allowed_ports) { free(allowed_ports); } @@ -2291,8 +2272,8 @@ static int finish_btl_init(mca_btl_openib_module_t *openib_btl) openib_btl->super.btl_eager_limit, mca_btl_openib_component.buffer_alignment, size_t); - opal_output_verbose(1, opal_btl_base_framework.framework_output, - "[rank=%d] openib: using port %s:%d", + opal_output_verbose(1, opal_btl_base_framework.framework_output, + "[rank=%d] openib: using port %s:%d", OPAL_PROC_MY_NAME.vpid, ibv_get_device_name(openib_btl->device->ib_dev), openib_btl->port_num); @@ -2329,7 +2310,6 @@ static float get_ib_dev_distance(struct ibv_device *dev) return distance; } -#if OPAL_HAVE_HWLOC float a, b; int i; hwloc_cpuset_t my_cpuset = NULL, ibv_cpuset = NULL; @@ -2340,8 +2320,8 @@ static float get_ib_dev_distance(struct ibv_device *dev) static const struct hwloc_distances_s *hwloc_distances = NULL; if (NULL == hwloc_distances) { - hwloc_distances = - hwloc_get_whole_distance_matrix_by_type(opal_hwloc_topology, + hwloc_distances = + hwloc_get_whole_distance_matrix_by_type(opal_hwloc_topology, HWLOC_OBJ_NODE); } @@ -2363,6 +2343,13 @@ static float get_ib_dev_distance(struct ibv_device *dev) goto out; } + opal_output_verbose(5, opal_btl_base_framework.framework_output, + "hwloc_distances->nbobjs=%d", hwloc_distances->nbobjs); + for (i = 0; i < (int)(2 * hwloc_distances->nbobjs); i++) { + opal_output_verbose(5, opal_btl_base_framework.framework_output, + "hwloc_distances->latency[%d]=%f", i, hwloc_distances->latency[i]); + } + /* If ibv_obj is a NUMA node or below, we're good. */ switch (ibv_obj->type) { case HWLOC_OBJ_NODE: @@ -2375,9 +2362,10 @@ static float get_ib_dev_distance(struct ibv_device *dev) } break; - default: + default: /* If it's above a NUMA node, then I don't know how to compute the distance... */ + opal_output_verbose(5, opal_btl_base_framework.framework_output, "ibv_obj->type set to NULL"); ibv_obj = NULL; break; } @@ -2387,6 +2375,8 @@ static float get_ib_dev_distance(struct ibv_device *dev) goto out; } + opal_output_verbose(5, opal_btl_base_framework.framework_output, + "ibv_obj->logical_index=%d", ibv_obj->logical_index); /* This function is only called if the process is bound, so let's find out where we are bound to. For the moment, we only care about the NUMA node to which we are bound. */ @@ -2413,35 +2403,37 @@ static float get_ib_dev_distance(struct ibv_device *dev) my_obj = my_obj->parent; } if (NULL != my_obj) { + opal_output_verbose(5, opal_btl_base_framework.framework_output, + "my_obj->logical_index=%d", my_obj->logical_index); /* Distance may be asymetrical, so calculate both of them and take the max */ a = hwloc_distances->latency[my_obj->logical_index + - (ibv_obj->logical_index * + (ibv_obj->logical_index * hwloc_distances->nbobjs)]; b = hwloc_distances->latency[ibv_obj->logical_index + - (my_obj->logical_index * + (my_obj->logical_index * hwloc_distances->nbobjs)]; distance = (a > b) ? a : b; } break; - default: + default: /* If the obj is above a NUMA node, then we're bound to more than one NUMA node. Find the max distance. */ i = 0; for (node_obj = hwloc_get_obj_inside_cpuset_by_type(opal_hwloc_topology, - ibv_obj->cpuset, + ibv_obj->cpuset, HWLOC_OBJ_NODE, i); - NULL != node_obj; + NULL != node_obj; node_obj = hwloc_get_obj_inside_cpuset_by_type(opal_hwloc_topology, - ibv_obj->cpuset, + ibv_obj->cpuset, HWLOC_OBJ_NODE, ++i)) { a = hwloc_distances->latency[node_obj->logical_index + - (ibv_obj->logical_index * + (ibv_obj->logical_index * hwloc_distances->nbobjs)]; b = hwloc_distances->latency[ibv_obj->logical_index + - (node_obj->logical_index * + (node_obj->logical_index * hwloc_distances->nbobjs)]; a = (a > b) ? a : b; distance = (a > distance) ? a : distance; @@ -2456,7 +2448,6 @@ static float get_ib_dev_distance(struct ibv_device *dev) if (NULL != my_cpuset) { hwloc_bitmap_free(my_cpuset); } -#endif return distance; } @@ -2472,15 +2463,18 @@ sort_devs_by_distance(struct ibv_device **ib_devs, int count) for (i = 0; i < count; i++) { devs[i].ib_dev = ib_devs[i]; + opal_output_verbose(5, opal_btl_base_framework.framework_output, + "Checking distance from this process to device=%s", ibv_get_device_name(ib_devs[i])); /* If we're not bound, just assume that the device is close. */ devs[i].distance = 0; -#if OPAL_HAVE_HWLOC if (opal_process_info.cpuset) { /* If this process is bound to one or more PUs, we can get an accurate distance. */ devs[i].distance = get_ib_dev_distance(ib_devs[i]); } -#endif + opal_output_verbose(5, opal_btl_base_framework.framework_output, + "Process is %s: distance to device is %f", + (opal_process_info.cpuset ? "bound" : "not bound"), devs[i].distance); } qsort(devs, count, sizeof(struct dev_distance), compare_distance); @@ -2512,7 +2506,7 @@ btl_openib_component_init(int *num_btl_modules, mca_btl_openib_frag_init_data_t *init_data; struct dev_distance *dev_sorted; float distance; - int index, value; + int index; bool found; mca_base_var_source_t source; int list_count = 0; @@ -2521,25 +2515,12 @@ btl_openib_component_init(int *num_btl_modules, *num_btl_modules = 0; num_devs = 0; -#if BTL_OPENIB_MALLOC_HOOKS_ENABLED /* If we got this far, then setup the memory alloc hook (because we're most likely going to be using this component). The hook is to be set up as early as possible in this function since we - want most of the allocated resources be aligned.*/ - if (mca_btl_openib_component.use_memalign > 0 && - (opal_mem_hooks_support_level() & - (OPAL_MEMORY_FREE_SUPPORT | OPAL_MEMORY_CHUNK_SUPPORT)) != 0) { - mca_btl_openib_component.previous_malloc_hook = __malloc_hook; - __malloc_hook = btl_openib_malloc_hook; - malloc_hook_set = true; - } -#endif - /* Currently refuse to run if MPI_THREAD_MULTIPLE is enabled */ - if (enable_mpi_threads && !mca_btl_base_thread_multiple_override) { - opal_output_verbose(5, opal_btl_base_framework.framework_output, - "btl:openib: MPI_THREAD_MULTIPLE not suppported; skipping this component"); - goto no_btls; - } + want most of the allocated resources be aligned. + */ + opal_memory->memoryc_set_alignment(32, mca_btl_openib_module.super.btl_eager_limit); /* Per https://svn.open-mpi.org/trac/ompi/ticket/1305, check to see if $sysfsdir/class/infiniband exists. If it does not, @@ -2555,44 +2536,6 @@ btl_openib_component_init(int *num_btl_modules, goto no_btls; } - /* Initialize FD listening */ - if (OPAL_SUCCESS != opal_btl_openib_fd_init()) { - goto no_btls; - } - - /* If we are using ptmalloc2 and there are no posix threads - available, this will cause memory corruption. Refuse to run. - Right now, ptmalloc2 is the only memory manager that we have on - OS's that support OpenFabrics that provide both FREE and MUNMAP - support, so the following test is [currently] good enough... */ - value = opal_mem_hooks_support_level(); - - /* If we have a memory manager available, and - opal_leave_pinned==-1, then unless the user explicitly set - opal_leave_pinned_pipeline==0, then set opal_leave_pinned to 1. - - We have a memory manager if we have both FREE and MUNMAP - support */ - if ((OPAL_MEMORY_FREE_SUPPORT | OPAL_MEMORY_MUNMAP_SUPPORT) == - ((OPAL_MEMORY_FREE_SUPPORT | OPAL_MEMORY_MUNMAP_SUPPORT) & value)) { - if (0 == opal_leave_pinned_pipeline && - -1 == opal_leave_pinned) { - opal_leave_pinned = 1; - } - } else { - opal_leave_pinned = 0; - opal_leave_pinned_pipeline = 0; - } - -#if OPAL_CUDA_SUPPORT - if (mca_btl_openib_component.cuda_want_gdr && (0 == opal_leave_pinned)) { - opal_show_help("help-mpi-btl-openib.txt", - "CUDA_gdr_and_nopinned", true, - opal_process_info.nodename); - goto no_btls; - } -#endif /* OPAL_CUDA_SUPPORT */ - index = mca_base_var_find("ompi", "btl", "openib", "max_inline_data"); if (index >= 0) { if (OPAL_SUCCESS == mca_base_var_get_value(index, NULL, &source, NULL)) { @@ -2734,7 +2677,7 @@ btl_openib_component_init(int *num_btl_modules, OBJ_CONSTRUCT(&btl_list, opal_list_t); OBJ_CONSTRUCT(&mca_btl_openib_component.ib_lock, opal_mutex_t); - mca_btl_openib_component.async_thread = 0; + distance = dev_sorted[0].distance; for (found = false, i = 0; i < num_devs && (-1 == mca_btl_openib_component.ib_max_btls || @@ -2742,8 +2685,8 @@ btl_openib_component_init(int *num_btl_modules, mca_btl_openib_component.ib_max_btls); i++) { if (0 != mca_btl_openib_component.ib_num_btls && (dev_sorted[i].distance - distance) > EPS) { - opal_output_verbose(1, opal_btl_base_framework.framework_output, - "[rank=%d] openib: skipping device %s; it is too far away", + opal_output_verbose(1, opal_btl_base_framework.framework_output, + "[rank=%d] openib: skipping device %s; it is too far away", OPAL_PROC_MY_NAME.vpid, ibv_get_device_name(dev_sorted[i].ib_dev)); break; @@ -2820,7 +2763,7 @@ btl_openib_component_init(int *num_btl_modules, ignored, warn about it */ if (num_devices_intentionally_ignored < num_devs) { opal_show_help("help-mpi-btl-openib.txt", - "no active ports found", true, + "no active ports found", true, opal_process_info.nodename); } goto no_btls; @@ -2929,6 +2872,15 @@ btl_openib_component_init(int *num_btl_modules, mca_btl_openib_component.if_exclude_list = NULL; } +#if OPAL_CUDA_SUPPORT + if (mca_btl_openib_component.cuda_want_gdr && (0 == opal_leave_pinned)) { + opal_show_help("help-mpi-btl-openib.txt", + "CUDA_gdr_and_nopinned", true, + opal_process_info.nodename); + goto no_btls; + } +#endif /* OPAL_CUDA_SUPPORT */ + mca_btl_openib_component.memory_registration_verbose = opal_output_open(NULL); opal_output_set_verbosity (mca_btl_openib_component.memory_registration_verbose, mca_btl_openib_component.memory_registration_verbose_level); @@ -2945,13 +2897,6 @@ btl_openib_component_init(int *num_btl_modules, mca_btl_openib_component.ib_num_btls = 0; btl_openib_modex_send(); -#if BTL_OPENIB_MALLOC_HOOKS_ENABLED - /*Unset malloc hook since the component won't start*/ - if (malloc_hook_set) { - __malloc_hook = mca_btl_openib_component.previous_malloc_hook; - malloc_hook_set = false; - } -#endif if (NULL != btls) { free(btls); } @@ -3110,8 +3055,8 @@ static int btl_openib_handle_incoming(mca_btl_openib_module_t *openib_btl, reg = mca_btl_base_active_message_trigger + hdr->tag; reg->cbfunc( &openib_btl->super, hdr->tag, des, reg->cbdata ); #if OPAL_CUDA_SUPPORT /* CUDA_ASYNC_RECV */ - if (des->des_flags & MCA_BTL_DES_FLAGS_CUDA_COPY_ASYNC) { - /* Since ASYNC flag is set, we know this descriptor is being used + if (des->des_flags & MCA_BTL_DES_FLAGS_CUDA_COPY_ASYNC) { + /* Since ASYNC flag is set, we know this descriptor is being used * for asynchronous copy and cannot be freed yet. Therefore, set * up callback for PML to call when complete, add argument into * descriptor and return. */ @@ -3376,12 +3321,15 @@ progress_pending_frags_wqe(mca_btl_base_endpoint_t *ep, const int qpn) frag = opal_list_remove_first(&ep->qps[qpn].no_wqe_pending_frags[i]); if(NULL == frag) break; + assert(0 == frag->opal_list_item_refcount); tmp_ep = to_com_frag(frag)->endpoint; ret = mca_btl_openib_endpoint_post_send(tmp_ep, to_send_frag(frag)); if (OPAL_SUCCESS != ret) { /* NTH: this handles retrying if we are out of credits but other errors are not * handled (maybe abort?). */ - opal_list_prepend (&ep->qps[qpn].no_wqe_pending_frags[i], (opal_list_item_t *) frag); + if (OPAL_ERR_RESOURCE_BUSY != ret) { + opal_list_prepend (&ep->qps[qpn].no_wqe_pending_frags[i], (opal_list_item_t *) frag); + } break; } } @@ -3464,6 +3412,11 @@ static void handle_wc(mca_btl_openib_device_t* device, const uint32_t cq, mca_btl_openib_get_frag_t *get_frag = to_get_frag(des); + /* check if atomic result needs to be byte swapped (mlx5) */ + if (openib_btl->atomic_ops_be && IBV_WC_RDMA_READ != wc->opcode) { + *((int64_t *) frag->sg_entry.addr) = ntoh64 (*((int64_t *) frag->sg_entry.addr)); + } + get_frag->cb.func (&openib_btl->super, endpoint, (void *)(intptr_t) frag->sg_entry.addr, get_frag->cb.local_handle, get_frag->cb.context, get_frag->cb.data, OPAL_SUCCESS); @@ -3484,20 +3437,8 @@ static void handle_wc(mca_btl_openib_device_t* device, const uint32_t cq, opal_list_item_t *i; while((i = opal_list_remove_first(&to_send_frag(des)->coalesced_frags))) { btl_ownership = (to_base_frag(i)->base.des_flags & MCA_BTL_DES_FLAGS_BTL_OWNERSHIP); -#if BTL_OPENIB_FAILOVER_ENABLED - /* The check for the callback flag is only needed when running - * with the failover case because there is a chance that a fragment - * generated from a sendi call (which does not set the flag) gets - * coalesced. In normal operation, this cannot happen as the sendi - * call will never queue up a fragment which could potentially become - * a coalesced fragment. It will revert to a regular send. */ - if (to_base_frag(i)->base.des_flags & MCA_BTL_DES_SEND_ALWAYS_CALLBACK) { -#endif to_base_frag(i)->base.des_cbfunc(&openib_btl->super, endpoint, &to_base_frag(i)->base, OPAL_SUCCESS); -#if BTL_OPENIB_FAILOVER_ENABLED - } -#endif if( btl_ownership ) { mca_btl_openib_free(&openib_btl->super, &to_base_frag(i)->base); } @@ -3622,14 +3563,9 @@ static void handle_wc(mca_btl_openib_device_t* device, const uint32_t cq, } } -#if BTL_OPENIB_FAILOVER_ENABLED - mca_btl_openib_handle_endpoint_error(openib_btl, des, qp, - remote_proc, endpoint); -#else if(openib_btl) openib_btl->error_cb(&openib_btl->super, MCA_BTL_ERROR_FLAGS_FATAL, (struct opal_proc_t*)remote_proc, NULL); -#endif } static int poll_device(mca_btl_openib_device_t* device, int count) @@ -3798,7 +3734,9 @@ static int btl_openib_component_progress(void) for(i = 0; i < mca_btl_openib_component.devices_count; i++) { mca_btl_openib_device_t *device = (mca_btl_openib_device_t *) opal_pointer_array_get_item(&mca_btl_openib_component.devices, i); - count += progress_one_device(device); + if (NULL != device) { + count += progress_one_device(device); + } } #if OPAL_CUDA_SUPPORT /* CUDA_ASYNC_SEND */ @@ -3807,7 +3745,7 @@ static int btl_openib_component_progress(void) * The only thing that gets completed here are asynchronous copies * so there is no need to free anything. */ - { + { int local_count = 0; mca_btl_base_descriptor_t *frag; while (local_count < 10 && (1 == progress_one_cuda_dtoh_event(&frag))) { @@ -3838,9 +3776,6 @@ static int btl_openib_component_progress(void) if(openib_btl->device->got_port_event) { /* These are non-fatal so just ignore it. */ openib_btl->device->got_port_event = false; -#if BTL_OPENIB_FAILOVER_ENABLED - mca_btl_openib_handle_btl_error(openib_btl); -#endif } } return count; @@ -3915,3 +3850,42 @@ int mca_btl_openib_post_srr(mca_btl_openib_module_t* openib_btl, const int qp) return OPAL_ERROR; } + +struct mca_btl_openib_event_t { + opal_event_t super; + void *(*fn)(void *); + void *arg; + opal_event_t *event; +}; + +typedef struct mca_btl_openib_event_t mca_btl_openib_event_t; + +static void *mca_btl_openib_run_once_cb (int fd, int flags, void *context) +{ + mca_btl_openib_event_t *event = (mca_btl_openib_event_t *) context; + void *ret; + + ret = event->fn (event->arg); + opal_event_del (&event->super); + free (event); + return ret; +} + +int mca_btl_openib_run_in_main (void *(*fn)(void *), void *arg) +{ + mca_btl_openib_event_t *event = malloc (sizeof (mca_btl_openib_event_t)); + + if (OPAL_UNLIKELY(NULL == event)) { + return OPAL_ERR_OUT_OF_RESOURCE; + } + + event->fn = fn; + event->arg = arg; + + opal_event_set (opal_sync_event_base, &event->super, -1, OPAL_EV_READ, + mca_btl_openib_run_once_cb, event); + + opal_event_active (&event->super, OPAL_EV_READ, 1); + + return OPAL_SUCCESS; +} diff --git a/opal/mca/btl/openib/btl_openib_eager_rdma.h b/opal/mca/btl/openib/btl_openib_eager_rdma.h index b11378cdd11..0ba5a030d4c 100644 --- a/opal/mca/btl/openib/btl_openib_eager_rdma.h +++ b/opal/mca/btl/openib/btl_openib_eager_rdma.h @@ -1,5 +1,8 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ /* * Copyright (c) 2006-2007 Voltaire All rights reserved. + * Copyright (c) 2015 Los Alamos National Security, LLC. All rights + * reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -17,6 +20,7 @@ BEGIN_C_DECLS struct mca_btl_openib_eager_rdma_local_t { opal_ptr_t base; /**< buffer for RDMAing eager messages */ + void *alloc_base; /**< allocated base */ mca_btl_openib_recv_frag_t *frags; mca_btl_openib_reg_t *reg; uint16_t head; /**< RDMA buffer to poll */ @@ -81,17 +85,33 @@ typedef struct mca_btl_openib_eager_rdma_remote_t mca_btl_openib_eager_rdma_remo mca_btl_openib_component.eager_rdma_num) \ (I) = 0; \ } while (0) -#define MCA_BTL_OPENIB_RDMA_MOVE_INDEX(HEAD, OLD_HEAD) \ - do { \ - int32_t new_head; \ - do { \ - OLD_HEAD = HEAD; \ - new_head = OLD_HEAD + 1; \ - if(new_head == mca_btl_openib_component.eager_rdma_num) \ - new_head = 0; \ - } while(!OPAL_ATOMIC_CMPSET_32(&HEAD, OLD_HEAD, new_head)); \ + + +#if OPAL_ENABLE_DEBUG + +/** + * @brief read and increment the remote head index and generate a sequence + * number + */ + +#define MCA_BTL_OPENIB_RDMA_MOVE_INDEX(HEAD, OLD_HEAD, SEQ) \ + do { \ + (SEQ) = OPAL_THREAD_ADD32(&(HEAD), 1) - 1; \ + (OLD_HEAD) = (SEQ) % mca_btl_openib_component.eager_rdma_num; \ } while(0) +#else + +/** + * @brief read and increment the remote head index + */ + +#define MCA_BTL_OPENIB_RDMA_MOVE_INDEX(HEAD, OLD_HEAD) \ + do { \ + (OLD_HEAD) = (OPAL_THREAD_ADD32(&(HEAD), 1) - 1) % mca_btl_openib_component.eager_rdma_num; \ + } while(0) + +#endif END_C_DECLS #endif diff --git a/opal/mca/btl/openib/btl_openib_endpoint.c b/opal/mca/btl/openib/btl_openib_endpoint.c index a4f84e08927..32d70bec81b 100644 --- a/opal/mca/btl/openib/btl_openib_endpoint.c +++ b/opal/mca/btl/openib/btl_openib_endpoint.c @@ -11,7 +11,7 @@ * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2006-2013 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2006-2015 Los Alamos National Security, LLC. All rights + * Copyright (c) 2006-2017 Los Alamos National Security, LLC. All rights * reserved. * Copyright (c) 2006-2007 Voltaire All rights reserved. * Copyright (c) 2006-2009 Mellanox Technologies, Inc. All rights reserved. @@ -347,14 +347,17 @@ static void mca_btl_openib_endpoint_destruct(mca_btl_base_endpoint_t* endpoint) * was not in "connect" or "bad" flow (failed to allocate memory) * and changed the pointer back to NULL */ - if(!opal_atomic_cmpset_ptr(&endpoint->eager_rdma_local.base.pval, NULL, - (void*)1)) { - if ((void*)1 != endpoint->eager_rdma_local.base.pval && - NULL != endpoint->eager_rdma_local.base.pval) { - endpoint->endpoint_btl->super.btl_mpool->mpool_free(endpoint->endpoint_btl->super.btl_mpool, - endpoint->eager_rdma_local.base.pval, - (mca_mpool_base_registration_t*)endpoint->eager_rdma_local.reg); - pval_clean=true; + if(!opal_atomic_cmpset_ptr(&endpoint->eager_rdma_local.base.pval, NULL, (void*)1)) { + if (NULL != endpoint->eager_rdma_local.reg) { + endpoint->endpoint_btl->device->rcache->rcache_deregister (endpoint->endpoint_btl->device->rcache, + &endpoint->eager_rdma_local.reg->base); + endpoint->eager_rdma_local.reg = NULL; + } + + void *alloc_base = opal_atomic_swap_ptr (&endpoint->eager_rdma_local.alloc_base, NULL); + if (alloc_base) { + endpoint->endpoint_btl->super.btl_mpool->mpool_free (endpoint->endpoint_btl->super.btl_mpool, alloc_base); + pval_clean = true; } } else { pval_clean=true; @@ -579,7 +582,7 @@ void mca_btl_openib_endpoint_connected(mca_btl_openib_endpoint_t *endpoint) opal_output(-1, "Now we are CONNECTED"); if (MCA_BTL_XRC_ENABLED) { - OPAL_THREAD_LOCK(&endpoint->ib_addr->addr_lock); + opal_mutex_lock (&endpoint->ib_addr->addr_lock); if (MCA_BTL_IB_ADDR_CONNECTED == endpoint->ib_addr->status) { /* We are not xrc master */ /* set our qp pointer to master qp */ @@ -608,10 +611,6 @@ void mca_btl_openib_endpoint_connected(mca_btl_openib_endpoint_t *endpoint) endpoint->endpoint_state = MCA_BTL_IB_CONNECTED; endpoint->endpoint_btl->device->non_eager_rdma_endpoints++; - /* The connection is correctly setup. Now we can decrease the - event trigger. */ - opal_progress_event_users_decrement(); - if(MCA_BTL_XRC_ENABLED) { if (master) { while (NULL != (ep_item = opal_list_remove_first(&endpoint->ib_addr->pending_ep))) { @@ -622,15 +621,14 @@ void mca_btl_openib_endpoint_connected(mca_btl_openib_endpoint_t *endpoint) } } } - OPAL_THREAD_UNLOCK(&endpoint->ib_addr->addr_lock); + opal_mutex_unlock (&endpoint->ib_addr->addr_lock); } /* Process pending packet on the endpoint */ /* While there are frags in the list, process them */ - while (!opal_list_is_empty(&(endpoint->pending_lazy_frags))) { - frag_item = opal_list_remove_first(&(endpoint->pending_lazy_frags)); + while (NULL != (frag_item = opal_list_remove_first(&(endpoint->pending_lazy_frags)))) { frag = to_send_frag(frag_item); /* We need to post this one */ @@ -861,10 +859,10 @@ void mca_btl_openib_endpoint_connect_eager_rdma( mca_btl_openib_endpoint_t* endpoint) { mca_btl_openib_module_t* openib_btl = endpoint->endpoint_btl; - char *buf; + char *buf, *alloc_base; mca_btl_openib_recv_frag_t *headers_buf; - int i; - uint32_t flag = MCA_MPOOL_FLAGS_CACHE_BYPASS; + int i, rc; + uint32_t flag = MCA_RCACHE_FLAGS_CACHE_BYPASS; /* Set local rdma pointer to 1 temporarily so other threads will not try * to enter the function */ @@ -890,19 +888,26 @@ void mca_btl_openib_endpoint_connect_eager_rdma( The following flag will be interpreted and the appropriate steps will be taken when the memory is registered in openib_reg_mr(). */ - flag |= MCA_MPOOL_FLAGS_SO_MEM; + flag |= MCA_RCACHE_FLAGS_SO_MEM; #endif - buf = (char *) openib_btl->super.btl_mpool->mpool_alloc(openib_btl->super.btl_mpool, - openib_btl->eager_rdma_frag_size * - mca_btl_openib_component.eager_rdma_num, - mca_btl_openib_component.buffer_alignment, - flag, - (mca_mpool_base_registration_t**)&endpoint->eager_rdma_local.reg); + alloc_base = buf = (char *) openib_btl->super.btl_mpool->mpool_alloc(openib_btl->super.btl_mpool, + openib_btl->eager_rdma_frag_size * + mca_btl_openib_component.eager_rdma_num, + mca_btl_openib_component.buffer_alignment, + 0); if(!buf) goto free_headers_buf; + rc = openib_btl->device->rcache->rcache_register (openib_btl->device->rcache, buf, openib_btl->eager_rdma_frag_size * + mca_btl_openib_component.eager_rdma_num, flag, MCA_RCACHE_ACCESS_ANY, + (mca_rcache_base_registration_t**)&endpoint->eager_rdma_local.reg); + if (OPAL_SUCCESS != rc) { + openib_btl->super.btl_mpool->mpool_free (openib_btl->super.btl_mpool, alloc_base); + goto free_headers_buf; + } + buf = buf + openib_btl->eager_rdma_frag_size - sizeof(mca_btl_openib_footer_t) - openib_btl->super.btl_eager_limit - sizeof(mca_btl_openib_header_t); @@ -913,7 +918,7 @@ void mca_btl_openib_endpoint_connect_eager_rdma( mca_btl_openib_frag_init_data_t init_data; item = (opal_free_list_item_t*)&headers_buf[i]; - item->registration = (mca_mpool_base_registration_t *)endpoint->eager_rdma_local.reg; + item->registration = (mca_rcache_base_registration_t *)endpoint->eager_rdma_local.reg; item->ptr = buf + i * openib_btl->eager_rdma_frag_size; OBJ_CONSTRUCT(item, mca_btl_openib_recv_frag_t); @@ -941,6 +946,7 @@ void mca_btl_openib_endpoint_connect_eager_rdma( /* set local rdma pointer to real value */ (void)opal_atomic_cmpset_ptr(&endpoint->eager_rdma_local.base.pval, (void*)1, buf); + endpoint->eager_rdma_local.alloc_base = alloc_base; if(mca_btl_openib_endpoint_send_eager_rdma(endpoint) == OPAL_SUCCESS) { mca_btl_openib_device_t *device = endpoint->endpoint_btl->device; @@ -957,8 +963,9 @@ void mca_btl_openib_endpoint_connect_eager_rdma( return; } - openib_btl->super.btl_mpool->mpool_free(openib_btl->super.btl_mpool, - buf, (mca_mpool_base_registration_t*)endpoint->eager_rdma_local.reg); + openib_btl->device->rcache->rcache_deregister (openib_btl->device->rcache, + (mca_rcache_base_registration_t*)endpoint->eager_rdma_local.reg); + openib_btl->super.btl_mpool->mpool_free(openib_btl->super.btl_mpool, buf); free_headers_buf: free(headers_buf); unlock_rdma_local: diff --git a/opal/mca/btl/openib/btl_openib_endpoint.h b/opal/mca/btl/openib/btl_openib_endpoint.h index 3b55c570199..c55df2b6b18 100644 --- a/opal/mca/btl/openib/btl_openib_endpoint.h +++ b/opal/mca/btl/openib/btl_openib_endpoint.h @@ -11,7 +11,7 @@ * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2007-2009 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2006-2015 Los Alamos National Security, LLC. All rights + * Copyright (c) 2006-2017 Los Alamos National Security, LLC. All rights * reserved. * Copyright (c) 2006-2007 Voltaire All rights reserved. * Copyright (c) 2007-2009 Mellanox Technologies. All rights reserved. @@ -313,7 +313,7 @@ static inline int qp_need_signal(mca_btl_openib_endpoint_t *ep, const int qp, si { /* note that size here is payload only */ - if (ep->qps[qp].qp->sd_wqe <= 0 || + if (ep->qps[qp].qp->sd_wqe <= 0 || size + sizeof(mca_btl_openib_header_t) + (rdma ? sizeof(mca_btl_openib_footer_t) : 0) > ep->qps[qp].ib_inline_max || (!BTL_OPENIB_QP_TYPE_PP(qp) && ep->endpoint_btl->qps[qp].u.srq_qp.sd_credits <= 0)) { ep->qps[qp].qp->wqe_count = QP_TX_BATCH_COUNT; @@ -498,13 +498,6 @@ static inline int check_endpoint_state(mca_btl_openib_endpoint_t *ep, if (OPAL_SUCCESS == rc) { rc = OPAL_ERR_RESOURCE_BUSY; } - /* - * As long as we expect a message from the peer (in order - * to setup the connection) let the event engine pool the - * OOB events. Note: we increment it once peer active - * connection. - */ - opal_progress_event_users_increment(); /* fall through */ default: opal_list_append(pending_list, (opal_list_item_t *)des); @@ -561,7 +554,7 @@ static inline int post_send(mca_btl_openib_endpoint_t *ep, BTL_OPENIB_HEADER_HTON(*frag->hdr); if(rdma) { - int32_t head; + int32_t head; mca_btl_openib_footer_t* ftr = (mca_btl_openib_footer_t*)(((char*)frag->hdr) + sg->length + BTL_OPENIB_FTR_PADDING(sg->length) - sizeof(mca_btl_openib_footer_t)); @@ -569,24 +562,18 @@ static inline int post_send(mca_btl_openib_endpoint_t *ep, MCA_BTL_OPENIB_RDMA_FRAG_SET_SIZE(ftr, sg->length); MCA_BTL_OPENIB_RDMA_MAKE_LOCAL(ftr); #if OPAL_ENABLE_DEBUG - do { - ftr->seq = ep->eager_rdma_remote.seq; - } while (!OPAL_ATOMIC_CMPSET_32((int32_t*) &ep->eager_rdma_remote.seq, - (int32_t) ftr->seq, - (int32_t) (ftr->seq+1))); + /* NTH: generate the sequence from the remote head index to ensure that the + * wrong sequence isn't set. The way this code used to look the sequence number + * and head were updated independently and it led to false positives for incorrect + * sequence numbers. */ + MCA_BTL_OPENIB_RDMA_MOVE_INDEX(ep->eager_rdma_remote.head, head, ftr->seq); +#else + MCA_BTL_OPENIB_RDMA_MOVE_INDEX(ep->eager_rdma_remote.head, head); #endif if(ep->nbo) BTL_OPENIB_FOOTER_HTON(*ftr); sr_desc->wr.rdma.rkey = ep->eager_rdma_remote.rkey; - MCA_BTL_OPENIB_RDMA_MOVE_INDEX(ep->eager_rdma_remote.head, head); -#if BTL_OPENIB_FAILOVER_ENABLED - /* frag->ftr is unused on the sending fragment, so use it - * to indicate it is an eager fragment. A non-zero value - * indicates it is eager, and the value indicates the - * location in the eager RDMA array that it lives. */ - frag->ftr = (mca_btl_openib_footer_t*)(long)(1 + head); -#endif sr_desc->wr.rdma.remote_addr = ep->eager_rdma_remote.base.lval + head * openib_btl->eager_rdma_frag_size + diff --git a/opal/mca/btl/openib/btl_openib_failover.c b/opal/mca/btl/openib/btl_openib_failover.c deleted file mode 100644 index 7e38953a253..00000000000 --- a/opal/mca/btl/openib/btl_openib_failover.c +++ /dev/null @@ -1,790 +0,0 @@ -/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ -/* - * Copyright (c) 2010-2011 Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2011 NVIDIA Corporation. All rights reserved. - * Copyright (c) 2012-2015 Los Alamos National Security, LLC. All rights - * reserved. - * Copyright (c) 2013 NVIDIA Corporation. All rights reserved. - * Copyright (c) 2014 Research Organization for Information Science - * and Technology (RIST). All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -/** - * @file - * Functions specific to implementing failover support. - * - * This file is conditionally copiled into the BTL when one configures - * it in with --enable-openib-failover. When this file is compiled - * in, the multi-BTL configurations can handle errors. The - * requirement is that there needs to be more than one openib BTL in - * use so that all the traffic can move to the other BTL. This does - * not support failing over to a different BTL like TCP. - */ - -#include "opal_config.h" -#include "opal_stdint.h" - -#include "btl_openib.h" -#include "btl_openib_endpoint.h" -#include "btl_openib_proc.h" -#include "btl_openib_failover.h" - -static void error_out_all_pending_frags(mca_btl_base_endpoint_t *ep, - struct mca_btl_base_module_t* module, - bool errout); -static void mca_btl_openib_endpoint_notify(mca_btl_openib_endpoint_t *endpoint, - uint8_t type, int index); - -/* debug functions that are normally not needed */ -void mca_btl_openib_dump_all_local_rdma_frags(mca_btl_openib_device_t *device); -void mca_btl_openib_dump_all_internal_queues(bool errout); -static void dump_local_rdma_frags(mca_btl_openib_endpoint_t * endpoint); - -/** - * This function is called when we get an error on the completion - * event of a fragment. We check to see what type of fragment it is - * and act accordingly. In most cases, we first call up into the PML - * and have it map out this connection for any future communication. - * In addition, this function will possibly send some control messages - * over the other openib BTL. The first control message will tell the - * remote side to also map out this connection. The second control - * message makes sure the eager RDMA connection remains in a sane - * state. See that function for more details. - * @param openib_btl Pointer to BTL that had the error - * @param des Pointer to descriptor that had the error - * @param qp Queue pair that had the error - * @param remote_proc Pointer to process that had the error - * @param endpoint Pointer to endpoint that had the error - */ -void mca_btl_openib_handle_endpoint_error(mca_btl_openib_module_t *openib_btl, - mca_btl_base_descriptor_t *des, - int qp, - opal_proc_t* remote_proc, - mca_btl_openib_endpoint_t* endpoint) -{ - char *btlname = NULL; - int btl_ownership; - /* Since this BTL supports failover, it will call the PML error handler - * function with the NONFATAL flag. If the PML is running with failover - * support, then it will map out the endpoint for further communication - * and return control here. If the PML does not have failover support, - * it will abort the job and control will not return here. */ - - /* Note: At this point, what needs to be done is based on the type - * of openib fragment that got the error. Also note that in the wc - * struct, when wc->status != IBV_WC_SUCCESS, these are the only - * valid fields: wc->wr_id, wc->status, wc->vendor_err, wc->qp_num. - * This means that one cannot key off of the wc->opcode to see what - * operation was done. The important information needs to be read - * from the fragment. */ - - /* Cannot issue callback to SRQ errors because the shared receive - * queue is shared and is not specific to a connection. There is no - * way to figure out what type of message created the error because - * we need the information in the wc->imm_data field which does not - * exist when we have an error. So, nothing to do here but return. */ - if ((openib_frag_type(des) == MCA_BTL_OPENIB_FRAG_RECV) && - !BTL_OPENIB_QP_TYPE_PP(qp)) { - opal_output_verbose(20, mca_btl_openib_component.verbose_failover, - "SRQ RECV type=%d", openib_frag_type(des)); - /* Need to think about returning any shared resources of the - * SRQ. For now, we do nothing as we rarely see an error on - * the SRQ. */ - return; - } - assert(NULL != remote_proc); - - /* Create a nice string to help with debug */ - if (NULL != openib_btl) { - asprintf(&btlname, "lid=%d:name=%s", - openib_btl->lid, openib_btl->device->ib_dev->name); - } - - /* The next set of errors are associated with an endpoint, but not - * with a PML descriptor. They are not associated with a PML - * descriptor because: - * A. It was a receive - * B. It was some type of openib specific control message. - * Therefore, just drop the fragments and call up into the PML to - * disable this endpoint for future communication. */ - if (((openib_frag_type(des) == MCA_BTL_OPENIB_FRAG_RECV) && - (BTL_OPENIB_QP_TYPE_PP(qp))) || - (openib_frag_type(des) == MCA_BTL_OPENIB_FRAG_CONTROL) || - (openib_frag_type(des) == MCA_BTL_OPENIB_FRAG_EAGER_RDMA)) { - openib_btl->error_cb(&openib_btl->super, MCA_BTL_ERROR_FLAGS_NONFATAL, - remote_proc, btlname); - /* Now that this connection has been mapped out at the PML layer, - * we change the state in the BTL layer. The change in the PML - * layer should prevent that we ever try to send on this BTL - * again. If we do, then this is an error case. */ - if (MCA_BTL_IB_FAILED != endpoint->endpoint_state) { - endpoint->endpoint_state = MCA_BTL_IB_FAILED; - mca_btl_openib_endpoint_notify(endpoint, MCA_BTL_OPENIB_CONTROL_EP_BROKEN, 0); - error_out_all_pending_frags(endpoint, &openib_btl->super, true); - } - opal_output_verbose(60, mca_btl_openib_component.verbose_failover, - "MCA_BTL_OPENIG_FRAG=%d, " - "dropping since connection is broken (des=%lx)", - openib_frag_type(des), (long unsigned int) des); - if (NULL != btlname) free(btlname); - return; - } - - /* These are RDMA read type fragments. Just continue with processing */ - if (openib_frag_type(des) == MCA_BTL_OPENIB_FRAG_RECV_USER) { - OPAL_THREAD_ADD32(&endpoint->get_tokens, 1); - opal_output_verbose(20, mca_btl_openib_component.verbose_failover, - "OPENIB_FRAG_RECV_USER fragment, " - "btl=%lx, continue with callbacks", - (long unsigned int) &openib_btl->super); - } - - /* If we are at this point, we have completed a send, RDMA read or - * RDMA write. Call the PML callback function to map out this - * btl for further sending. We just call this every time we get an - * error even though it is not necessary. Subsequent calls with - * the same remote_proc argument will not actually map anything out. */ - openib_btl->error_cb(&openib_btl->super, MCA_BTL_ERROR_FLAGS_NONFATAL, - remote_proc, btlname); - if (NULL != btlname) free(btlname); - - /* Since we believe we have done a send, read or write, then the - * des_segments fields should have valid data. */ - assert(des->des_segments != NULL); - - /* If the endpoint is not yet in the MCA_BTL_IB_CLOSED state, then - * change the status. Since this connection was mapped out in the - * PML layer, no more attempts should be made to send on it. In - * addition, send a message to other end of the connection letting - * it know that this side is now broken. This is needed in the case - * of a spurious error which may not cause the remote side to detect - * the error. */ - if (MCA_BTL_IB_FAILED != endpoint->endpoint_state) { - endpoint->endpoint_state = MCA_BTL_IB_FAILED; - mca_btl_openib_endpoint_notify(endpoint, MCA_BTL_OPENIB_CONTROL_EP_BROKEN, 0); - } - - /* Now, call the callback function associated with the fragment. - * In case the fragments were coalesced we need to pull them apart - * and call the callback function for each one. */ - if(openib_frag_type(des) == MCA_BTL_OPENIB_FRAG_SEND) { - opal_list_item_t *i; - while((i = opal_list_remove_first(&to_send_frag(des)->coalesced_frags))) { - btl_ownership = (to_base_frag(i)->base.des_flags & MCA_BTL_DES_FLAGS_BTL_OWNERSHIP); - to_base_frag(i)->base.des_cbfunc(&openib_btl->super, endpoint, - &to_base_frag(i)->base, OPAL_ERROR); - if( btl_ownership ) { - mca_btl_openib_free(&openib_btl->super, &to_base_frag(i)->base); - } - } - } - - /* This must be a MCA_BTL_OPENIB_FRAG_SEND, MCA_BTL_OPENIB_FRAG_SEND_USER - * or MCA_BTL_OPENIB_FRAG_RECV_USER. */ - btl_ownership = (des->des_flags & MCA_BTL_DES_FLAGS_BTL_OWNERSHIP); - des->des_cbfunc(&openib_btl->super, endpoint, des, OPAL_ERROR); - if( btl_ownership ) { - mca_btl_openib_free(&openib_btl->super, des); - } - - /* Here we send another control message to notify the remote side - * we had an error on a eager fragment. A non-zero value for the - * ftr variable indicates that this was an eager RDMA fragment. - * We need to do this in case the eager RDMA fragment after this - * one actually made it successfully. */ - if (0 != to_send_frag(des)->ftr) { - mca_btl_openib_endpoint_notify(endpoint, - MCA_BTL_OPENIB_CONTROL_EP_EAGER_RDMA_ERROR, - (long)to_send_frag(des)->ftr - 1); - } - - /* We know we have completed a send so return some resources even - * though connection is broken. With SRQ, the resources are shared - * so if we do not return the credits we may not be allowed to send - * anymore. */ - qp_put_wqe(endpoint, qp); - if((openib_frag_type(des) == MCA_BTL_OPENIB_FRAG_SEND) && !BTL_OPENIB_QP_TYPE_PP(qp)) { - OPAL_THREAD_ADD32(&openib_btl->qps[qp].u.srq_qp.sd_credits, 1); - } - - /* There are several queues associated with an endpoint that may - * have some unsent fragments sitting in them. Remove them and - * call the callback functions with an error so the PML can send - * them down a different path. This really only needs to be called - * once on an endpoint, but for now, just call it a bunch of times. - * The first time through will remove the unsent fragments so - * subsequent calls are no-ops. */ - if (endpoint) { - error_out_all_pending_frags(endpoint, &openib_btl->super, true); - } -} - -/** - * This functions allows an error to map out the entire BTL. First a - * call is made up to the PML to map out all connections from this BTL. - * Then a message is sent to all the endpoints connected to this BTL. - * This function is enabled by the btl_openib_port_error_failover - * MCA parameter. If that parameter is not set, then this function - * does not do anything. - * @param openib_btl Pointer to BTL that had the error - */ -void mca_btl_openib_handle_btl_error(mca_btl_openib_module_t* openib_btl) { - mca_btl_base_endpoint_t* endpoint; - int i; - - /* Check to see that the flag is set for the entire map out. */ - if(mca_btl_openib_component.port_error_failover) { - /* Since we are not specifying a specific connection to bring down, - * the PML layer will may out the entire BTL for future communication. */ - char *btlname = NULL; - asprintf(&btlname, "lid=%d:name=%s", - openib_btl->lid, openib_btl->device->ib_dev->name); - openib_btl->error_cb(&openib_btl->super, MCA_BTL_ERROR_FLAGS_NONFATAL, - NULL, btlname); - if (NULL != btlname) free(btlname); - - /* Now send out messages to all endpoints that we are disconnecting. - * Only do this to endpoints that are connected. Otherwise, the - * remote side does not yet have the information on this endpoint. */ - for (i = 0; i < opal_pointer_array_get_size(openib_btl->device->endpoints); i++) { - endpoint = (mca_btl_openib_endpoint_t*) - opal_pointer_array_get_item(openib_btl->device->endpoints, i); - if (NULL == endpoint) { - continue; - } - if (MCA_BTL_IB_CONNECTED == endpoint->endpoint_state) { - mca_btl_openib_endpoint_notify(endpoint, MCA_BTL_OPENIB_CONTROL_EP_BROKEN, 0); - endpoint->endpoint_state = MCA_BTL_IB_FAILED; - error_out_all_pending_frags(endpoint, &openib_btl->super, true); - } - } - } -} - -/** - * This function gets called when a control message is received that - * is one of the following types: - * MCA_BTL_OPENIB_CONTROL_EP_BROKEN - * MCA_BTL_OPENIB_CONTROL_EP_EAGER_RDMA_ERROR message - * Note that we are using the working connection to send information - * about the broken connection. That is why we have to look at the - * various information in the control message to figure out which - * endpoint is broken. It is (obviously) not the one the message was - * received on, because we would not have received the message in that - * case. In the case of the BROKEN message, that means the remote - * side is notifying us that it has brought down its half of the - * connection. Therefore, we need to bring out half down. This is - * done because it has been observed that there are cases where only - * one side of the connection actually sees the error. This means we - * can be left in a state where one side believes it has two BTLs, but - * the other side believes it only has one. This can cause problems. - * In the case of the EAGER_RDMA_ERROR, see elsewhere in the code what - * we are doing. - * @param ctl_hdr Pointer control header that was received - */ -void btl_openib_handle_failover_control_messages(mca_btl_openib_control_header_t *ctl_hdr, - mca_btl_openib_endpoint_t* ep) -{ - mca_btl_openib_broken_connection_header_t *bc_hdr = - (mca_btl_openib_broken_connection_header_t*)ctl_hdr; - int i; - int found = false; - - if(ep->nbo) { - BTL_OPENIB_BROKEN_CONNECTION_HEADER_NTOH((*bc_hdr)); - } - - opal_output_verbose(30, mca_btl_openib_component.verbose_failover, - "IB: Control message received from %d: lid=%d,subnet=0x%" PRIx64 "", - bc_hdr->vpid, bc_hdr->lid, bc_hdr->subnet_id); - - /* Now we walk through all the endpoints on all the BTLs to - * find out which one to map out. */ - for(i = 0; i < mca_btl_openib_component.ib_num_btls; i++) { - mca_btl_openib_module_t* newbtl; - int j; - - newbtl = mca_btl_openib_component.openib_btls[i]; - /* Now, find the endpoint associated with it */ - for (j = 0; j < opal_pointer_array_get_size(newbtl->device->endpoints); j++) { - mca_btl_base_endpoint_t* newep; - newep = (mca_btl_openib_endpoint_t*) - opal_pointer_array_get_item(newbtl->device->endpoints, j); - if (NULL == newep) { - continue; - } - /* Now compare the LID, subnet ID, and the vpid we received - * from the remote side and try to match it to an endpoint. */ - if ((bc_hdr->lid == newep->rem_info.rem_lid) && - (bc_hdr->subnet_id == newep->rem_info.rem_subnet_id) && - (bc_hdr->vpid == newep->endpoint_proc->proc_opal->proc_name.vpid)) { - opal_output_verbose(30, mca_btl_openib_component.verbose_failover, - "IB: Control message received from %d: " - "found match: lid=%d," - "subnet=0x%" PRIx64 ",endpoint_state=%d", - newep->endpoint_proc->proc_opal->proc_name.vpid, - newep->rem_info.rem_lid, - newep->rem_info.rem_subnet_id, - newep->endpoint_state); - found = true; - /* At this point, we have found the endpoint. Now decode the - * message type and do the appropriate action. */ - if (MCA_BTL_OPENIB_CONTROL_EP_BROKEN == ctl_hdr->type) { - /* Now that we found a match, check the state of the - * endpoint to see it is already in a failed state. - * If not, then notify the upper layer and error out - * any pending fragments. */ - if (MCA_BTL_IB_FAILED == newep->endpoint_state) { - return; - } else { - char *btlname = NULL; - opal_proc_t* remote_proc = NULL; - - asprintf(&btlname, "lid=%d:name=%s", - newbtl->lid, newbtl->device->ib_dev->name); - - remote_proc = newep->endpoint_proc->proc_opal; - - opal_output_verbose(10, mca_btl_openib_component.verbose_failover, - "IB: Control message received from %d: " - "bringing down connection,lid=%d," - "subnet=0x%" PRIx64 ",endpoint_state=%d", - newep->endpoint_proc->proc_opal->proc_name.vpid, - newep->rem_info.rem_lid, - newep->rem_info.rem_subnet_id, - newep->endpoint_state); - newbtl->error_cb(&newbtl->super, MCA_BTL_ERROR_FLAGS_NONFATAL, - remote_proc, btlname); - if (NULL != btlname) free(btlname); - - error_out_all_pending_frags(newep, &newbtl->super, true); - newep->endpoint_state = MCA_BTL_IB_FAILED; - return; - } - } else { /* MCA_BTL_OPENIB_CONTROL_EP_EAGER_RDMA_ERROR message */ - /* If we are still pointing at the location where - * we detected an error on the remote side, then - * bump the index by one. */ - if (newep->eager_rdma_local.head == (uint16_t)bc_hdr->index) { - /* Adjust the local head by one just in case */ - MCA_BTL_OPENIB_RDMA_NEXT_INDEX(newep->eager_rdma_local.head); - opal_output_verbose(20, mca_btl_openib_component.verbose_failover, - "IB: rank=%d, control message (remote=%d), " - "moved local head by one (new=%d)", - OPAL_PROC_MY_NAME.vpid, - newep->endpoint_proc->proc_opal->proc_name.vpid, - newep->eager_rdma_local.head); - } else { - opal_output_verbose(20, mca_btl_openib_component.verbose_failover, - "IB: rank=%d, control message (remote=%d), " - "did not move local head by one (still=%d)", - OPAL_PROC_MY_NAME.vpid, - newep->endpoint_proc->proc_opal->proc_name.vpid, - newep->eager_rdma_local.head); - } - } - break; /* since we found the endpoint */ - } - } - } - if (false == found) { - opal_output_verbose(30, mca_btl_openib_component.verbose_failover, - "IB: Control message: no match found"); - } -} - -/** - * This function will find all the pending fragments on an endpoint - * and call the callback function with OPAL_ERROR. It walks through - * each qp with each priority and looks for both no_credits_pending_frags - * and no_wqe_pending_frags. It then looks for any pending_lazy_frags, - * pending_put_frags, and pending_get_frags. This function is only - * called when running with failover support enabled. Note that - * the errout parameter allows the function to also be used as a - * debugging tool to see if there are any fragments on any of the - * queues. - * @param ep Pointer to endpoint that had error - * @param module Pointer to module that had error - * @param errout Boolean which says whether to error them out or not - */ -static void error_out_all_pending_frags(mca_btl_base_endpoint_t *ep, - struct mca_btl_base_module_t* module, - bool errout) -{ - int qp, pri, len, total, btl_ownership; - - opal_list_item_t *item; - mca_btl_openib_com_frag_t* frag; - mca_btl_base_descriptor_t *des; - int verbose = 10; /* Verbosity level unless debugging */ - - /* If debugging, drop verbosity level so we can see the output - * regardless of the level the program was run with. */ - if (false == errout) { - verbose = 0; - } - - total = 0; - /* Traverse all QPs and all priorities and move to other endpoint */ - for (qp = 0; qp < mca_btl_openib_component.num_qps; ++qp) { - for (pri = 0; pri < 2; ++pri) { - /* All types of qp's have a no_wqe_pending_frags list */ - len = opal_list_get_size(&ep->qps[qp].no_wqe_pending_frags[pri]); - if (len > 0) { - total += len; - opal_output_verbose(verbose, mca_btl_openib_component.verbose_failover, - "IB: Checking for no_wqe_pending_frags qp=%d, " - "pri=%d, list size=%d", - qp, pri, len); - if (true == errout) { - while (NULL != (item = opal_list_remove_first(&ep->qps[qp]. - no_wqe_pending_frags[pri]))) { - frag = (mca_btl_openib_com_frag_t *) item; - des = (mca_btl_base_descriptor_t *)frag; - - /* Error out any coalesced frags if they exist */ - if(openib_frag_type(des) == MCA_BTL_OPENIB_FRAG_SEND) { - opal_list_item_t *i; - while((i = opal_list_remove_first(&to_send_frag(des)->coalesced_frags))) { - opal_output_verbose(verbose, mca_btl_openib_component.verbose_failover, - "IB: Found coalesced frag in no_wqe_pending_frags"); - btl_ownership = (to_base_frag(i)->base.des_flags & - MCA_BTL_DES_FLAGS_BTL_OWNERSHIP); - to_base_frag(i)->base.des_cbfunc(module, ep, - &to_base_frag(i)->base, OPAL_ERROR); - if( btl_ownership ) { - mca_btl_openib_free(module, &to_base_frag(i)->base); - } - } - } - btl_ownership = (des->des_flags & MCA_BTL_DES_FLAGS_BTL_OWNERSHIP); - des->des_cbfunc(module, ep, des, OPAL_ERROR); - if( btl_ownership ) { - mca_btl_openib_free(module, des); - } - } - } - } - if (BTL_OPENIB_QP_TYPE_PP(qp)) { - len = opal_list_get_size(&ep->qps[qp].no_credits_pending_frags[pri]); - if (len > 0) { - total += len; - opal_output_verbose(verbose, mca_btl_openib_component.verbose_failover, - "IB: Checking for no_credits_pending_frags qp=%d, " - "pri=%d, list size=%d", - qp, pri, len); - if (true == errout) { - while (NULL != (item = opal_list_remove_first(&ep->qps[qp]. - no_credits_pending_frags[pri]))) { - frag = (mca_btl_openib_com_frag_t *) item; - des = (mca_btl_base_descriptor_t *)frag; - - /* Error out any coalesced frags if they exist */ - if(openib_frag_type(des) == MCA_BTL_OPENIB_FRAG_SEND) { - opal_list_item_t *i; - while((i = opal_list_remove_first(&to_send_frag(des)->coalesced_frags))) { - opal_output_verbose(verbose, mca_btl_openib_component.verbose_failover, - "IB: Found coalesced frag in " - "no_credits_pending_frags"); - btl_ownership = (to_base_frag(i)->base.des_flags & - MCA_BTL_DES_FLAGS_BTL_OWNERSHIP); - to_base_frag(i)->base.des_cbfunc(module, ep, - &to_base_frag(i)->base, OPAL_ERROR); - if( btl_ownership ) { - mca_btl_openib_free(module, &to_base_frag(i)->base); - } - } - } - btl_ownership = (des->des_flags & MCA_BTL_DES_FLAGS_BTL_OWNERSHIP); - des->des_cbfunc(module, ep, des, OPAL_ERROR); - if( btl_ownership ) { - mca_btl_openib_free(module, des); - } - } - } - } - - } else if (BTL_OPENIB_QP_TYPE_SRQ(qp)) { - len = opal_list_get_size(&ep->endpoint_btl->qps[qp].u.srq_qp.pending_frags[pri]); - if (len > 0) { - total += len; - opal_output_verbose(verbose, mca_btl_openib_component.verbose_failover, - "IB: Checking for srq pending_frags qp=%d, pri=%d, " - "list size=%d", - qp, pri, len); - if (true == errout) { - while (NULL != (item = opal_list_remove_first(&ep->endpoint_btl->qps[qp]. - u.srq_qp.pending_frags[pri]))) { - frag = (mca_btl_openib_com_frag_t *) item; - des = (mca_btl_base_descriptor_t *)frag; - - /* Error out any coalesced frags if they exist */ - if(openib_frag_type(des) == MCA_BTL_OPENIB_FRAG_SEND) { - opal_list_item_t *i; - while((i = opal_list_remove_first(&to_send_frag(des)->coalesced_frags))) { - opal_output_verbose(verbose, mca_btl_openib_component.verbose_failover, - "IB: Found coalesced frag in SRQ pending_frags"); - btl_ownership = (to_base_frag(i)->base.des_flags & - MCA_BTL_DES_FLAGS_BTL_OWNERSHIP); - to_base_frag(i)->base.des_cbfunc(module, ep, - &to_base_frag(i)->base, OPAL_ERROR); - if( btl_ownership ) { - mca_btl_openib_free(module, &to_base_frag(i)->base); - } - } - } - btl_ownership = (des->des_flags & MCA_BTL_DES_FLAGS_BTL_OWNERSHIP); - des->des_cbfunc(module, ep, des, OPAL_ERROR); - if( btl_ownership ) { - mca_btl_openib_free(module, des); - } - } - } - } - } - } - } - - /* Check for any frags from a connection that was never made. Not sure if this - * can actually happen. */ - len = opal_list_get_size(&ep->pending_lazy_frags); - - if (len > 0) { - total += len; - opal_output_verbose(verbose, mca_btl_openib_component.verbose_failover, - "IB: Checking for pending_lazy_frags, list size=%d", len); - if (true == errout) { - while (NULL != (item = opal_list_remove_first(&(ep->pending_lazy_frags)))) { - frag = (mca_btl_openib_com_frag_t *) item; - des = (mca_btl_base_descriptor_t *)frag; - des->des_cbfunc(module, ep, des, OPAL_ERROR); - } - } - } - - len = opal_list_get_size(&ep->pending_put_frags); - if (len > 0) { - total += len; - opal_output_verbose(verbose, mca_btl_openib_component.verbose_failover, - "IB: Checking for pending_put_frags, list size=%d", len); - if (true == errout) { - while (NULL != (item = opal_list_remove_first(&(ep->pending_put_frags)))) { - frag = (mca_btl_openib_com_frag_t *) item; - des = (mca_btl_base_descriptor_t *)frag; - des->des_cbfunc(module, ep, des, OPAL_ERROR); - } - } - } - - len = opal_list_get_size(&ep->pending_get_frags); - if (len > 0) { - total += len; - opal_output_verbose(verbose, mca_btl_openib_component.verbose_failover, - "IB: Checking for pending_get_frags, list size=%d", len); - if (true == errout) { - while (NULL != (item = opal_list_remove_first(&(ep->pending_put_frags)))) { - frag = (mca_btl_openib_com_frag_t *) item; - des = (mca_btl_base_descriptor_t *)frag; - des->des_cbfunc(module, ep, des, OPAL_ERROR); - } - } - } - - opal_output_verbose(verbose + 30, mca_btl_openib_component.verbose_failover, - "IB: Finished checking for pending_frags, total moved=%d", - total); -} - -/* local callback function for completion of a failover control message */ -static void mca_btl_openib_endpoint_notify_cb(mca_btl_base_module_t* btl, - struct mca_btl_base_endpoint_t* endpoint, - struct mca_btl_base_descriptor_t* descriptor, - int status) -{ - MCA_BTL_IB_FRAG_RETURN(descriptor); -} - -/** - * This function is used to send a message to the remote side - * indicating the endpoint is broken and telling the remote side to - * brings its endpoint down as well. This is needed because there are - * cases where only one side of the connection determines that the - * there was a problem. - * @param endpoint Pointer to endpoint with error - * @param type Type of message to be sent, can be one of two types - * @param index When sending RDMA error message, index is non zero - */ -static void mca_btl_openib_endpoint_notify(mca_btl_base_endpoint_t* endpoint, uint8_t type, int index) -{ - mca_btl_openib_module_t* openib_btl = endpoint->endpoint_btl; - mca_btl_openib_module_t* newbtl = NULL; - bool found = false; - mca_btl_openib_broken_connection_header_t *bc_hdr; - mca_btl_openib_send_control_frag_t* frag; - mca_btl_base_endpoint_t* newep; - int i, rc; - opal_proc_t* remote_proc = endpoint->endpoint_proc->proc_opal; - - /* First, find a different BTL than this one that got the - * error to send the message over. */ - for(i = 0; i < mca_btl_openib_component.ib_num_btls; i++) { - if (mca_btl_openib_component.openib_btls[i] != openib_btl) { - newbtl = mca_btl_openib_component.openib_btls[i]; - break; - } - } - if (NULL == newbtl) { - opal_output_verbose(20, mca_btl_openib_component.verbose_failover, - "IB: Endpoint Notify: No BTL found"); - /* If we cannot find one, then just return. */ - return; - } - - /* Now, find the endpoint associated with it. The device - * associated with the BTL has the list of all the - * endpoints. */ - for (i = 0; i < opal_pointer_array_get_size(newbtl->device->endpoints); i++) { - newep = (mca_btl_openib_endpoint_t*) - opal_pointer_array_get_item(newbtl->device->endpoints, i); - if (NULL == newep) { - continue; - } - if (newep->endpoint_proc->proc_opal == remote_proc) { - found = true; - break; - } - } - if (false == found) { - opal_output_verbose(20, mca_btl_openib_component.verbose_failover, - "IB: Endpoint Notify: No endpoint found"); - /* If we cannot find a match, then just return. */ - return; - } - - frag = alloc_control_frag(newbtl); - if(NULL == frag) { - opal_output_verbose(20, mca_btl_openib_component.verbose_failover, - "IB: Endpoint Notify: No frag space"); - /* If no frag available, then just return. */ - return; - } - - to_base_frag(frag)->base.des_cbfunc = - mca_btl_openib_endpoint_notify_cb; - to_base_frag(frag)->base.des_cbdata = NULL; - to_base_frag(frag)->base.des_flags |= MCA_BTL_DES_FLAGS_PRIORITY|MCA_BTL_DES_SEND_ALWAYS_CALLBACK; - to_base_frag(frag)->base.order = mca_btl_openib_component.credits_qp; - to_base_frag(frag)->segment.seg_len = - sizeof(mca_btl_openib_broken_connection_header_t); - to_com_frag(frag)->endpoint = newep; - - frag->hdr->tag = MCA_BTL_TAG_IB; - bc_hdr = (mca_btl_openib_broken_connection_header_t*)to_base_frag(frag)->segment.seg_addr.pval; - bc_hdr->control.type = type; - bc_hdr->lid = endpoint->endpoint_btl->port_info.lid; - bc_hdr->subnet_id = endpoint->endpoint_btl->port_info.subnet_id; - bc_hdr->vpid = OPAL_PROC_MY_NAME.vpid; - bc_hdr->index = index; - - if(newep->nbo) { - BTL_OPENIB_BROKEN_CONNECTION_HEADER_HTON((*bc_hdr)); - } - rc = mca_btl_openib_endpoint_send(newep, frag); - if (OPAL_SUCCESS == rc || OPAL_ERR_RESOURCE_BUSY == rc) { - return; - } - - MCA_BTL_IB_FRAG_RETURN(frag); - BTL_ERROR(("Error sending BROKEN CONNECTION buffer (%s)", strerror(errno))); - return; -} - -/* - * Function used for debugging problems in eager rdma. - */ -static void dump_local_rdma_frags(mca_btl_openib_endpoint_t * endpoint) { - mca_btl_openib_recv_frag_t *headers_buf = endpoint->eager_rdma_local.frags; - mca_btl_openib_recv_frag_t * frag; - mca_btl_openib_control_header_t* chdr; - int i, size; - - opal_output(0, "Head = %d", endpoint->eager_rdma_local.head); - - for (i = 0; i < mca_btl_openib_component.eager_rdma_num; i++) { - frag = &headers_buf[i]; - size = MCA_BTL_OPENIB_RDMA_FRAG_GET_SIZE(frag->ftr); - - frag->hdr = (mca_btl_openib_header_t*)(((char*)frag->ftr) - - size + sizeof(mca_btl_openib_footer_t)); - to_base_frag(frag)->segment.seg_addr.pval = - ((unsigned char* )frag->hdr) + sizeof(mca_btl_openib_header_t); - - chdr = to_base_frag(frag)->segment.seg_addr.pval; - if ((MCA_BTL_TAG_IB == frag->hdr->tag) && - (MCA_BTL_OPENIB_CONTROL_CREDITS == chdr->type)) { - opal_output(0, "tag[%d] is credit message", i); - } else { - opal_output(0, "frag[%d] size=%d,tag=%d,ftr->u.buf=%d", i, size, frag->hdr->tag, - frag->ftr->u.buf[3]); - } - } -} - -/* - * Function used for debugging problems in eager rdma. - */ -void mca_btl_openib_dump_all_local_rdma_frags(mca_btl_openib_device_t *device) { - int i, c; - mca_btl_openib_endpoint_t* endpoint; - - c = device->eager_rdma_buffers_count; - opal_output(0, "rank=%d, device=%s", OPAL_PROC_MY_NAME.vpid, device->ib_dev->name); - - for(i = 0; i < c; i++) { - endpoint = device->eager_rdma_buffers[i]; - - if(!endpoint) - continue; - - dump_local_rdma_frags(endpoint); - } -} - -/** - * This function is a debugging tool. If you notify a hang, you can - * call this function from a debugger and see if there are any - * messages stuck in any of the queues. If you call it with - * errout=true, then it will error them out. Otherwise, it will - * just print out the size of the queues with data in them. - */ -void mca_btl_openib_dump_all_internal_queues(bool errout) { - int i, j, num_eps; - mca_btl_openib_module_t* btl; - int total; - mca_btl_base_endpoint_t* ep; - struct mca_btl_base_module_t* module; - - for(i = 0; i < mca_btl_openib_component.ib_num_btls; i++) { - btl = mca_btl_openib_component.openib_btls[i]; - module = &btl->super; - num_eps = opal_pointer_array_get_size(btl->device->endpoints); - - /* Now, find the endpoint associated with it */ - for (j = 0; j < num_eps; j++) { - ep = (mca_btl_openib_endpoint_t*) - opal_pointer_array_get_item(btl->device->endpoints, j); - if (NULL == ep) { - continue; - } - - total = 0; - error_out_all_pending_frags(ep, module, errout); - } - } -} - diff --git a/opal/mca/btl/openib/btl_openib_failover.h b/opal/mca/btl/openib/btl_openib_failover.h deleted file mode 100644 index afb77a42b95..00000000000 --- a/opal/mca/btl/openib/btl_openib_failover.h +++ /dev/null @@ -1,31 +0,0 @@ -/* - * Copyright (c) 2010 Oracle and/or its affiliates. All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -/** - * @file - * Functions called by BTL to handle error events - */ - -#ifndef MCA_BTL_IB_FAILOVER_H -#define MCA_BTL_IB_FAILOVER_H - -BEGIN_C_DECLS - -void mca_btl_openib_handle_endpoint_error(mca_btl_openib_module_t *openib_btl, - mca_btl_base_descriptor_t *des, - int qp, - opal_proc_t* remote_proc, - mca_btl_openib_endpoint_t* endpoint); -void mca_btl_openib_handle_btl_error(mca_btl_openib_module_t* openib_btl); -void btl_openib_handle_failover_control_messages(mca_btl_openib_control_header_t *ctl_hdr, - mca_btl_openib_endpoint_t* ep); - -END_C_DECLS - -#endif diff --git a/opal/mca/btl/openib/btl_openib_fd.c b/opal/mca/btl/openib/btl_openib_fd.c deleted file mode 100644 index 76f87b09f00..00000000000 --- a/opal/mca/btl/openib/btl_openib_fd.c +++ /dev/null @@ -1,693 +0,0 @@ -/* - * Copyright (c) 2008-2013 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2009 Sandia National Laboratories. All rights reserved. - * Copyright (c) 2015 Research Organization for Information Science - * and Technology (RIST). All rights reserved. - * - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -/** - * Note: this file is a little fast-n-loose -- - * it uses this value in run-time "if" conditionals (vs. compile-time - * #if conditionals). We also don't protect including . - * That's because this component currently only compiles on Linux and - * Solaris, and both of these OS's have pthreads. Using the run-time - * conditionals gives us better compile-time checking, even of code - * that isn't activated. - * - * Note, too, that the functionality in this file does *not* require - * all the heavyweight OMPI thread infrastructure (e.g., from - * --enable-mpi-thread-multiple or --enable-progress-threads). All work that - * is done in a separate progress thread is very carefully segregated - * from that of the main thread, and communication back to the main - * thread - */ - -#include "opal_config.h" - -#include -#include -#include -#include - -#include "opal/class/opal_list.h" -#include "opal/mca/event/event.h" -#include "opal/util/output.h" -#include "opal/util/fd.h" -#include "opal/threads/threads.h" - -#include "btl_openib_fd.h" - - -typedef union { - opal_btl_openib_fd_event_callback_fn_t *event; - opal_btl_openib_fd_main_callback_fn_t *main; -} callback_u_t; - -/* - * Data for each registered item - */ -typedef struct { - opal_list_item_t super; - bool ri_event_used; - opal_event_t ri_event; - int ri_fd; - int ri_flags; - callback_u_t ri_callback; - void *ri_context; -} registered_item_t; - -static OBJ_CLASS_INSTANCE(registered_item_t, opal_list_item_t, NULL, NULL); - -/* - * Command types - */ -typedef enum { - /* Read by service thread */ - CMD_TIME_TO_QUIT, - CMD_ADD_FD, - CMD_REMOVE_FD, - ACK_RAN_FUNCTION, - - /* Read by service and main threads */ - CMD_CALL_FUNCTION, - CMD_MAX -} cmd_type_t; - -/* - * Commands. Fields ordered to avoid memory holes (and valgrind warnings). - */ -typedef struct { - callback_u_t pc_fn; - void *pc_context; - int pc_fd; - int pc_flags; - cmd_type_t pc_cmd; - char end; -} cmd_t; - -/* - * Queued up list of commands to send to the main thread - */ -typedef struct { - opal_list_item_t super; - cmd_t cli_cmd; -} cmd_list_item_t; - -static OBJ_CLASS_INSTANCE(cmd_list_item_t, opal_list_item_t, NULL, NULL); - -static bool initialized = false; -static int cmd_size = 0; -static fd_set read_fds, write_fds; -static int max_fd; -static opal_list_t registered_items; - -/* These items are only used in the threaded version */ -/* Owned by the main thread */ -static pthread_t thread; -static opal_event_t main_thread_event; -static int pipe_to_service_thread[2] = { -1, -1 }; - -/* Owned by the service thread */ -static int pipe_to_main_thread[2] = { -1, -1 }; -static const size_t max_outstanding_to_main_thread = 32; -static size_t waiting_for_ack_from_main_thread = 0; -static opal_list_t pending_to_main_thread; - - -/* - * Write a command to the main thread, or queue it up if the pipe is full - */ -static int write_to_main_thread(cmd_t *cmd) -{ - /* Note that if we write too much to the main thread pipe and the - main thread doesn't check it often, we could fill up the pipe - and cause this thread to block. Bad! So we do some simple - counting here and ensure that we don't fill the pipe. If we - are in danger of that, then queue up the commands here in the - service thread. The main thread will ACK every CALL_FUNCTION - command, so we have a built-in mechanism to wake up the service - thread to drain any queued-up commands. */ - if (opal_list_get_size(&pending_to_main_thread) > 0 || - waiting_for_ack_from_main_thread >= max_outstanding_to_main_thread) { - cmd_list_item_t *cli = OBJ_NEW(cmd_list_item_t); - if (NULL == cli) { - return OPAL_ERR_OUT_OF_RESOURCE; - } - memcpy(&cli->cli_cmd, cmd, cmd_size); - opal_list_append(&pending_to_main_thread, &(cli->super)); - } else { - OPAL_OUTPUT((-1, "fd: writing to main thread")); - opal_fd_write(pipe_to_main_thread[1], cmd_size, cmd); - ++waiting_for_ack_from_main_thread; - } - - return OPAL_SUCCESS; -} - -static void service_fd_callback(int fd, short event, void *context) -{ - registered_item_t *ri = (registered_item_t*) context; - ri->ri_callback.event(fd, event, ri->ri_context); -} - - -/* - * Add an fd to the listening set - */ -static int service_pipe_cmd_add_fd(bool use_libevent, cmd_t *cmd) -{ - registered_item_t *ri = OBJ_NEW(registered_item_t); - if (NULL == ri) { - return OPAL_ERR_OUT_OF_RESOURCE; - } - ri->ri_event_used = false; - ri->ri_fd = cmd->pc_fd; - ri->ri_flags = cmd->pc_flags; - ri->ri_callback.event = cmd->pc_fn.event; - ri->ri_context = cmd->pc_context; - - if (use_libevent) { - /* Make an event for this fd */ - ri->ri_event_used = true; - opal_event_set(opal_event_base, &ri->ri_event, ri->ri_fd, - ri->ri_flags | OPAL_EV_PERSIST, service_fd_callback, - ri); - opal_event_add(&ri->ri_event, 0); - } else { - /* Add the fd to the relevant fd local sets and update max_fd */ - if (OPAL_EV_READ & ri->ri_flags) { - FD_SET(ri->ri_fd, &read_fds); - } - if (OPAL_EV_WRITE & cmd->pc_flags) { - FD_SET(ri->ri_fd, &write_fds); - } - max_fd = (max_fd > ri->ri_fd) ? max_fd : ri->ri_fd + 1; - } - - opal_list_append(®istered_items, &ri->super); - return OPAL_SUCCESS; -} - -/* - * Run a function - */ -static int service_pipe_cmd_call_function(cmd_t *cmd) -{ - cmd_t local_cmd; - - OPAL_OUTPUT((-1, "fd service thread: calling function!")); - /* Call the function */ - if (NULL != cmd->pc_fn.main) { - cmd->pc_fn.main(cmd->pc_context); - } - - /* Now ACK that we ran the function */ - memset(&local_cmd, 0, cmd_size); - local_cmd.pc_cmd = ACK_RAN_FUNCTION; - opal_fd_write(pipe_to_main_thread[1], cmd_size, &local_cmd); - - /* Done */ - return OPAL_SUCCESS; -} - -/* - * Remove an fd from the listening set - */ -static int service_pipe_cmd_remove_fd(cmd_t *cmd) -{ - int i; - opal_list_item_t *item; - registered_item_t *ri; - - OPAL_OUTPUT((-1, "service thread got unmonitor fd %d", cmd->pc_fd)); - /* Go through the list of registered fd's and find the fd to - remove */ - for (item = opal_list_get_first(®istered_items); - NULL != opal_list_get_end(®istered_items); - item = opal_list_get_next(item)) { - ri = (registered_item_t*) item; - if (cmd->pc_fd == ri->ri_fd) { - /* Found it. The item knows if it was used as a libevent - event or an entry in the local fd sets. */ - if (ri->ri_event_used) { - /* Remove this event from libevent */ - opal_event_del(&ri->ri_event); - } else { - /* Remove this item from the fd_sets and recalculate - MAX_FD */ - FD_CLR(cmd->pc_fd, &read_fds); - FD_CLR(cmd->pc_fd, &write_fds); - for (max_fd = i = pipe_to_service_thread[0]; i < FD_SETSIZE; ++i) { - if (FD_ISSET(i, &read_fds) || FD_ISSET(i, &write_fds)) { - max_fd = i + 1; - } - } - } - - /* Let the caller know that we have stopped monitoring - this fd (if they care) */ - if (NULL != cmd->pc_fn.event) { - cmd->pc_fn.event(cmd->pc_fd, 0, cmd->pc_context); - } - - /* Remove this item from the list of registered items and - release it */ - opal_list_remove_item(®istered_items, item); - OBJ_RELEASE(item); - return OPAL_SUCCESS; - } - } - - /* This shouldn't happen */ - return OPAL_ERR_NOT_FOUND; -} - - -/* - * Call a function and ACK that we ran it - */ -static int main_pipe_cmd_call_function(cmd_t *cmd) -{ - cmd_t local_cmd; - - OPAL_OUTPUT((-1, "fd main thread: calling function!")); - /* Call the function */ - if (NULL != cmd->pc_fn.main) { - cmd->pc_fn.main(cmd->pc_context); - } - - /* Now ACK that we ran the function */ - memset(&local_cmd, 0, cmd_size); - local_cmd.pc_cmd = ACK_RAN_FUNCTION; - opal_fd_write(pipe_to_service_thread[1], cmd_size, &local_cmd); - - /* Done */ - return OPAL_SUCCESS; -} - - -/* - * Act on pipe commands - */ -static bool service_pipe_cmd(void) -{ - bool ret = false; - cmd_t cmd; - cmd_list_item_t *cli; - - opal_fd_read(pipe_to_service_thread[0], cmd_size, &cmd); - switch (cmd.pc_cmd) { - case CMD_ADD_FD: - OPAL_OUTPUT((-1, "fd service thread: CMD_ADD_FD")); - if (OPAL_SUCCESS != service_pipe_cmd_add_fd(false, &cmd)) { - ret = true; - } - break; - - case CMD_REMOVE_FD: - OPAL_OUTPUT((-1, "fd service thread: CMD_REMOVE_FD")); - if (OPAL_SUCCESS != service_pipe_cmd_remove_fd(&cmd)) { - ret = true; - } - break; - - case CMD_CALL_FUNCTION: - OPAL_OUTPUT((-1, "fd service thread: CMD_RUN_FUNCTION")); - if (OPAL_SUCCESS != service_pipe_cmd_call_function(&cmd)) { - ret = true; - } - break; - - case CMD_TIME_TO_QUIT: - OPAL_OUTPUT((-1, "fd service thread: CMD_TIME_TO_QUIT")); - ret = true; - break; - - case ACK_RAN_FUNCTION: - /* We don't have a guarantee that the main thread will check - its pipe frequently, so we do some simple counting to - ensure we just don't have too many outstanding commands to - the main thread at any given time. The main thread will - ACK every CALL_FUNCTION command, so this thread will always - wake up and continue to drain any queued up functions. */ - cli = (cmd_list_item_t*) opal_list_remove_first(&pending_to_main_thread); - if (NULL != cli) { - OPAL_OUTPUT((-1, "sending queued up cmd function to main thread")); - opal_fd_write(pipe_to_main_thread[1], cmd_size, &(cli->cli_cmd)); - OBJ_RELEASE(cli); - } else { - --waiting_for_ack_from_main_thread; - } - break; - - default: - OPAL_OUTPUT((-1, "fd service thread: unknown pipe command!")); - break; - } - - return ret; -} - - -/* - * Main thread logic - */ -static void *service_thread_start(void *context) -{ - int rc, flags; - fd_set read_fds_copy, write_fds_copy; - opal_list_item_t *item; - registered_item_t *ri; - - /* Make an fd set that we can select() on */ - FD_ZERO(&write_fds); - FD_ZERO(&read_fds); - FD_SET(pipe_to_service_thread[0], &read_fds); - max_fd = pipe_to_service_thread[0] + 1; - - OPAL_OUTPUT((-1, "fd service thread running")); - - /* Main loop waiting for commands over the fd's */ - while (1) { - memcpy(&read_fds_copy, &read_fds, sizeof(read_fds)); - memcpy(&write_fds_copy, &write_fds, sizeof(write_fds)); - OPAL_OUTPUT((-1, "fd service thread blocking on select...")); - rc = select(max_fd, &read_fds_copy, &write_fds_copy, NULL, NULL); - if (0 != rc && EAGAIN == errno) { - continue; - } - - OPAL_OUTPUT((-1, "fd service thread woke up!")); - - if (0 > rc) { - if (EBADF == errno) { - /* We are assuming we lost a socket so set rc to 1 so we'll - * try to read a command off the service pipe to receive a - * rm command (corresponding to the socket that went away). - * If the EBADF is from the service pipe then the error - * condition will be handled by the service_pipe_cmd(). - */ - OPAL_OUTPUT((-1,"fd service thread: non-EAGAIN from select %d", errno)); - rc = 1; - } - } - if (rc > 0) { - if (FD_ISSET(pipe_to_service_thread[0], &read_fds_copy)) { - OPAL_OUTPUT((-1, "fd service thread: pipe command")); - if (service_pipe_cmd()) { - break; - } - OPAL_OUTPUT((-1, "fd service thread: back from pipe command")); - /* Continue to the top of the loop to see if there are more - * commands on the pipe. This is done to reset the fds - * list just in case the last select incurred an EBADF. - * Please do not remove this continue thinking one is trying - * to enforce a fairness of reading the sockets or we'll - * end up with segv's below when select incurs an EBADF. - */ - continue; - } - - /* Go through all the registered events and see who had - activity */ - if (!opal_list_is_empty(®istered_items)) { - for (item = opal_list_get_first(®istered_items); - item != opal_list_get_end(®istered_items); - item = opal_list_get_next(item)) { - ri = (registered_item_t*) item; - flags = 0; - - /* See if this fd was ready for reading or writing - (fd's will only be in the read_fds or write_fds - set depending on what they registered for) */ - if (FD_ISSET(ri->ri_fd, &read_fds_copy)) { - flags |= OPAL_EV_READ; - } - if (FD_ISSET(ri->ri_fd, &write_fds_copy)) { - flags |= OPAL_EV_WRITE; - } - - /* If either was ready, invoke the callback */ - if (0 != flags) { - OPAL_OUTPUT((-1, "fd service thread: invoking callback for registered fd %d", ri->ri_fd)); - ri->ri_callback.event(ri->ri_fd, flags, - ri->ri_context); - OPAL_OUTPUT((-1, "fd service thread: back from callback for registered fd %d", ri->ri_fd)); - } - } - } - } - } - - /* All done */ - OPAL_OUTPUT((-1, "fd service thread: exiting")); - opal_atomic_wmb(); - return NULL; -} - - -static void main_thread_event_callback(int fd, short event, void *context) -{ - cmd_t cmd; - - OPAL_OUTPUT((-1, "main thread -- reading command")); - opal_fd_read(pipe_to_main_thread[0], cmd_size, &cmd); - switch (cmd.pc_cmd) { - case CMD_CALL_FUNCTION: - OPAL_OUTPUT((-1, "fd main thread: calling command")); - main_pipe_cmd_call_function(&cmd); - break; - - default: - OPAL_OUTPUT((-1, "fd main thread: unknown pipe command: %d", - cmd.pc_cmd)); - break; - } -} - -/****************************************************************** - * Main interface calls - ******************************************************************/ - -/* - * Initialize - * Called by main thread - */ -int opal_btl_openib_fd_init(void) -{ - if (!initialized) { - cmd_t bogus; - - OBJ_CONSTRUCT(®istered_items, opal_list_t); - - /* Calculate the real size of the cmd struct */ - cmd_size = (int) (&(bogus.end) - ((char*) &bogus)); - - OBJ_CONSTRUCT(&pending_to_main_thread, opal_list_t); - - /* Create pipes to communicate between the two threads */ - if (0 != pipe(pipe_to_service_thread)) { - return OPAL_ERR_IN_ERRNO; - } - if (0 != pipe(pipe_to_main_thread)) { - return OPAL_ERR_IN_ERRNO; - } - - /* Create a libevent event that is used in the main thread - to watch its pipe */ - opal_event_set(opal_event_base, &main_thread_event, pipe_to_main_thread[0], - OPAL_EV_READ | OPAL_EV_PERSIST, - main_thread_event_callback, NULL); - opal_event_add(&main_thread_event, 0); - - /* Start the service thread */ - if (0 != pthread_create(&thread, NULL, service_thread_start, - NULL)) { - int errno_save = errno; - opal_event_del(&main_thread_event); - close(pipe_to_service_thread[0]); - close(pipe_to_service_thread[1]); - close(pipe_to_main_thread[0]); - close(pipe_to_main_thread[1]); - errno = errno_save; - return OPAL_ERR_IN_ERRNO; - } - - initialized = true; - } - return OPAL_SUCCESS; -} - - -/* - * Start monitoring an fd - * Called by main or service thread; callback will be in service thread - */ -int opal_btl_openib_fd_monitor(int fd, int flags, - opal_btl_openib_fd_event_callback_fn_t *callback, - void *context) -{ - cmd_t cmd; - - /* Sanity check */ - if (fd < 0 || 0 == flags || NULL == callback) { - return OPAL_ERR_BAD_PARAM; - } - - cmd.pc_cmd = CMD_ADD_FD; - cmd.pc_fd = fd; - cmd.pc_flags = flags; - cmd.pc_fn.event = callback; - cmd.pc_context = context; - /* For the threaded version, write a command down the pipe */ - OPAL_OUTPUT((-1, "main thread sending monitor fd %d", fd)); - opal_fd_write(pipe_to_service_thread[1], cmd_size, &cmd); - - return OPAL_SUCCESS; -} - - -/* - * Stop monitoring an fd - * Called by main or service thread; callback will be in service thread - */ -int opal_btl_openib_fd_unmonitor(int fd, - opal_btl_openib_fd_event_callback_fn_t *callback, - void *context) -{ - cmd_t cmd; - - /* Sanity check */ - if (fd < 0) { - return OPAL_ERR_BAD_PARAM; - } - - cmd.pc_cmd = CMD_REMOVE_FD; - cmd.pc_fd = fd; - cmd.pc_flags = 0; - cmd.pc_fn.event = callback; - cmd.pc_context = context; - /* For the threaded version, write a command down the pipe */ - OPAL_OUTPUT((-1, "main thread sending unmonitor fd %d", fd)); - opal_fd_write(pipe_to_service_thread[1], cmd_size, &cmd); - - return OPAL_SUCCESS; -} - -/* - * Run in the service thread - * Called by main thread; callback will be in service thread - */ -int opal_btl_openib_fd_run_in_service(opal_btl_openib_fd_main_callback_fn_t *callback, - void *context) -{ - cmd_t cmd; - - cmd.pc_cmd = CMD_CALL_FUNCTION; - cmd.pc_fd = -1; - cmd.pc_flags = 0; - cmd.pc_fn.main = callback; - cmd.pc_context = context; - /* For the threaded version, write a command down the pipe */ - OPAL_OUTPUT((-1, "main thread sending 'run in service'")); - opal_fd_write(pipe_to_service_thread[1], cmd_size, &cmd); - - return OPAL_SUCCESS; -} - -/* - * Run a function in the main thread - * Called by service thread - */ -int opal_btl_openib_fd_run_in_main(opal_btl_openib_fd_main_callback_fn_t *callback, - void *context) -{ - cmd_t cmd; - - OPAL_OUTPUT((-1, "run in main -- sending command")); - /* For the threaded version, write a command down the pipe */ - cmd.pc_cmd = CMD_CALL_FUNCTION; - cmd.pc_fd = -1; - cmd.pc_flags = 0; - cmd.pc_fn.main = callback; - cmd.pc_context = context; - write_to_main_thread(&cmd); - - return OPAL_SUCCESS; -} - - -int -opal_btl_openib_fd_main_thread_drain(void) -{ - int nfds, ret; - fd_set rfds; - struct timeval tv; - - while (1) { - FD_ZERO(&rfds); - FD_SET(pipe_to_main_thread[0], &rfds); - nfds = pipe_to_main_thread[0] + 1; - - tv.tv_sec = 0; - tv.tv_usec = 0; - - ret = select(nfds, &rfds, NULL, NULL, &tv); - if (ret > 0) { - main_thread_event_callback(pipe_to_main_thread[0], 0, NULL); - return 0; - } else { - return ret; - } - } -} - - -/* - * Finalize - * Called by main thread - */ -int opal_btl_openib_fd_finalize(void) -{ - if (initialized) { - /* For the threaded version, send a command down the pipe */ - cmd_t cmd; - OPAL_OUTPUT((-1, "shutting down openib fd")); - /* Check if the thread exists before asking it to quit */ - if (ESRCH != pthread_kill(thread, 0)) { - memset(&cmd, 0, cmd_size); - cmd.pc_cmd = CMD_TIME_TO_QUIT; - if (OPAL_SUCCESS != opal_fd_write(pipe_to_service_thread[1], - cmd_size, &cmd)) { - /* We cancel the thread if there's an error - * sending the "quit" cmd. This only ever happens on - * a "restart" which could result in dangling - * fds. OMPI must not rely on the checkpointer to - * save/restore any fds or connections - */ - pthread_cancel(thread); - } - - pthread_join(thread, NULL); - opal_atomic_rmb(); - } - - opal_event_del(&main_thread_event); - - close(pipe_to_service_thread[0]); - close(pipe_to_service_thread[1]); - close(pipe_to_main_thread[0]); - close(pipe_to_main_thread[1]); - OBJ_DESTRUCT(&pending_to_main_thread); - OBJ_DESTRUCT(®istered_items); - } - initialized = false; - - return OPAL_SUCCESS; -} diff --git a/opal/mca/btl/openib/btl_openib_fd.h b/opal/mca/btl/openib/btl_openib_fd.h deleted file mode 100644 index 6dc63a2468e..00000000000 --- a/opal/mca/btl/openib/btl_openib_fd.h +++ /dev/null @@ -1,81 +0,0 @@ -/* - * Copyright (c) 2008 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2009 Sandia National Laboratories. All rights reserved. - * - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#ifndef OPAL_BTL_OPENIB_FD_H_ -#define OPAL_BTL_OPENIB_FD_H_ - -#include "opal_config.h" - -BEGIN_C_DECLS - -/** - * Typedef for fd callback function - */ -typedef void *(opal_btl_openib_fd_event_callback_fn_t)(int fd, int flags, - void *context); - -/** - * Typedef for generic callback function - */ -typedef void *(opal_btl_openib_fd_main_callback_fn_t)(void *context); - -/** - * Initialize fd monitoring. - * Called by the main thread. - */ -int opal_btl_openib_fd_init(void); - -/** - * Start monitoring an fd. - * Called by main or service thread; callback will be in service thread. - */ -int opal_btl_openib_fd_monitor(int fd, int flags, - opal_btl_openib_fd_event_callback_fn_t *callback, - void *context); - -/** - * Stop monitoring an fd. - * Called by main or service thread; callback will be in service thread. - */ -int opal_btl_openib_fd_unmonitor(int fd, - opal_btl_openib_fd_event_callback_fn_t *callback, - void *context); - -/** - * Run a function in the service thread. - * Called by the main thread. - */ -int opal_btl_openib_fd_run_in_service(opal_btl_openib_fd_main_callback_fn_t callback, - void *context); - -/** - * Run a function in the main thread. - * Called by the service thread. - */ -int opal_btl_openib_fd_run_in_main(opal_btl_openib_fd_main_callback_fn_t callback, - void *context); - -/** - * Drain all pending messages from the main thread's pipe. - * Likely only useful during finalize, when the event library - * won't fire callbacks. - */ -int opal_btl_openib_fd_main_thread_drain(void); - -/** - * Finalize fd monitoring. - * Called by the main thread. - */ -int opal_btl_openib_fd_finalize(void); - -END_C_DECLS - -#endif /* OPAL_BTL_OPENIB_FD_H_ */ diff --git a/opal/mca/btl/openib/btl_openib_frag.h b/opal/mca/btl/openib/btl_openib_frag.h index 82d727ee442..d140fe4a8a8 100644 --- a/opal/mca/btl/openib/btl_openib_frag.h +++ b/opal/mca/btl/openib/btl_openib_frag.h @@ -39,7 +39,7 @@ struct mca_btl_openib_header_t { mca_btl_base_tag_t tag; uint8_t cm_seen; uint16_t credits; -#if OPAL_OPENIB_PAD_HDR +#if OPAL_OPENIB_PAD_HDR uint8_t padding[4]; #endif }; @@ -81,19 +81,19 @@ typedef struct mca_btl_openib_header_coalesced_t { #if OPAL_OPENIB_PAD_HDR /* BTL_OPENIB_FTR_PADDING - * This macro is used to keep the pointer to openib footers aligned for - * systems like SPARC64 that take a big performance hit when addresses - * are not aligned (and by default sigbus instead of coercing the type on + * This macro is used to keep the pointer to openib footers aligned for + * systems like SPARC64 that take a big performance hit when addresses + * are not aligned (and by default sigbus instead of coercing the type on * an unaligned address). * - * We assure alignment of a packet's structures when OPAL_OPENIB_PAD_HDR + * We assure alignment of a packet's structures when OPAL_OPENIB_PAD_HDR * is set to 1. When this is the case then several structures are padded * to assure alignment and the mca_btl_openib_footer_t structure itself - * will uses the BTL_OPENIB_FTR_PADDING macro to shift the location of the + * will uses the BTL_OPENIB_FTR_PADDING macro to shift the location of the * pointer to assure proper alignment after the PML Header and data. - * For example sending a 1 byte data packet the memory layout without + * For example sending a 1 byte data packet the memory layout without * footer alignment would look something like the following: - * + * * 0x00 : mca_btl_openib_coalesced_header_t (12 bytes + 4 byte pad) * 0x10 : mca_btl_openib_control_header_t (1 byte + 7 byte pad) * 0x18 : mca_btl_openib_header_t (4 bytes + 4 byte pad) @@ -102,9 +102,9 @@ typedef struct mca_btl_openib_header_coalesced_t { * 0x31 : end of packet * * By applying the BTL_OPENIB_FTR_PADDING() in the progress_one_device - * and post_send routines we adjust the pointer to mca_btl_openib_footer_t - * from 0x29 to 0x2C thus correctly aligning the start of the - * footer pointer. This adjustment will cause the padding field of + * and post_send routines we adjust the pointer to mca_btl_openib_footer_t + * from 0x29 to 0x2C thus correctly aligning the start of the + * footer pointer. This adjustment will cause the padding field of * mca_btl_openib_footer_t to overlap with the neighboring memory but since * we never use the padding we do not end up inadvertently overwriting * memory that does not belong to the fragment. @@ -113,15 +113,15 @@ typedef struct mca_btl_openib_header_coalesced_t { OPAL_ALIGN_PAD_AMOUNT(size, sizeof(uint64_t)) /* BTL_OPENIB_ALIGN_COALESCE_HDR - * This macro is used in btl_openib.c, while creating a coalesce fragment, + * This macro is used in btl_openib.c, while creating a coalesce fragment, * to align the coalesce headers. */ #define BTL_OPENIB_ALIGN_COALESCE_HDR(ptr) \ OPAL_ALIGN_PTR(ptr, sizeof(uint32_t), unsigned char*) /* BTL_OPENIB_COALESCE_HDR_PADDING - * This macro is used in btl_openib_component.c, while parsing an incoming - * coalesce fragment, to determine the padding amount used to align the + * This macro is used in btl_openib_component.c, while parsing an incoming + * coalesce fragment, to determine the padding amount used to align the * mca_btl_openib_coalesce_hdr_t. */ #define BTL_OPENIB_COALESCE_HDR_PADDING(ptr) \ @@ -143,12 +143,12 @@ struct mca_btl_openib_footer_t { #if OPAL_OPENIB_PAD_HDR #if OPAL_ENABLE_DEBUG /* this footer needs to be of a 8-byte multiple so by adding the - * seq field you throw this off and you cannot just remove the + * seq field you throw this off and you cannot just remove the * padding because the padding is needed in order to adjust the alignment * and not overwrite other packets. */ uint8_t padding[12]; -#else +#else uint8_t padding[8]; #endif #endif @@ -190,10 +190,6 @@ typedef struct mca_btl_openib_footer_t mca_btl_openib_footer_t; #define MCA_BTL_OPENIB_CONTROL_RDMA 1 #define MCA_BTL_OPENIB_CONTROL_COALESCED 2 #define MCA_BTL_OPENIB_CONTROL_CTS 3 -#if BTL_OPENIB_FAILOVER_ENABLED -#define MCA_BTL_OPENIB_CONTROL_EP_BROKEN 4 -#define MCA_BTL_OPENIB_CONTROL_EP_EAGER_RDMA_ERROR 5 -#endif struct mca_btl_openib_control_header_t { uint8_t type; @@ -243,32 +239,6 @@ do { \ (h).rdma_credits = ntohs((h).rdma_credits); \ } while (0) -#if BTL_OPENIB_FAILOVER_ENABLED -struct mca_btl_openib_broken_connection_header_t { - mca_btl_openib_control_header_t control; - uint32_t lid; - uint64_t subnet_id; - uint32_t vpid; - uint32_t index; /* for eager RDMA only */ -}; -typedef struct mca_btl_openib_broken_connection_header_t mca_btl_openib_broken_connection_header_t; - -#define BTL_OPENIB_BROKEN_CONNECTION_HEADER_HTON(h) \ - do { \ - (h).lid = htonl((h).lid); \ - (h).subnet_id = hton64((h).subnet_id); \ - (h).vpid = htonl((h).vpid); \ - (h).index = htonl((h).index); \ - } while (0) - -#define BTL_OPENIB_BROKEN_CONNECTION_HEADER_NTOH(h) \ - do { \ - (h).lid = ntohl((h).lid); \ - (h).subnet_id = ntoh64((h).subnet_id); \ - (h).vpid = ntohl((h).vpid); \ - (h).index = ntohl((h).index); \ - } while (0) -#endif enum mca_btl_openib_frag_type_t { MCA_BTL_OPENIB_FRAG_RECV, MCA_BTL_OPENIB_FRAG_RECV_USER, diff --git a/opal/mca/btl/openib/btl_openib_get.c b/opal/mca/btl/openib/btl_openib_get.c index 2d335619c19..c8bc78105db 100644 --- a/opal/mca/btl/openib/btl_openib_get.c +++ b/opal/mca/btl/openib/btl_openib_get.c @@ -12,7 +12,7 @@ * All rights reserved. * Copyright (c) 2007-2013 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2006-2009 Mellanox Technologies. All rights reserved. - * Copyright (c) 2006-2014 Los Alamos National Security, LLC. All rights + * Copyright (c) 2006-2016 Los Alamos National Security, LLC. All rights * reserved. * Copyright (c) 2006-2007 Voltaire All rights reserved. * Copyright (c) 2008-2012 Oracle and/or its affiliates. All rights reserved. @@ -92,16 +92,6 @@ int mca_btl_openib_get (mca_btl_base_module_t *btl, struct mca_btl_base_endpoint frag->sr_desc.wr.rdma.rkey = remote_handle->rkey; } -#if HAVE_XRC - if (MCA_BTL_XRC_ENABLED && BTL_OPENIB_QP_TYPE_XRC(qp)) { -#if OPAL_HAVE_CONNECTX_XRC_DOMAINS - frag->sr_desc.qp_type.xrc.remote_srqn = ep->rem_info.rem_srqs[qp].rem_srq_num; -#else - frag->sr_desc.xrc_remote_srq_num = ep->rem_info.rem_srqs[qp].rem_srq_num; -#endif - } -#endif - if (ep->endpoint_state != MCA_BTL_IB_CONNECTED) { OPAL_THREAD_LOCK(&ep->endpoint_lock); rc = check_endpoint_state(ep, &to_base_frag(frag)->base, &ep->pending_get_frags); @@ -138,6 +128,19 @@ int mca_btl_openib_get_internal (mca_btl_base_module_t *btl, struct mca_btl_base int qp = to_base_frag(frag)->base.order; struct ibv_send_wr *bad_wr; +#if HAVE_XRC + if (MCA_BTL_XRC_ENABLED && BTL_OPENIB_QP_TYPE_XRC(qp)) { + /* NTH: the remote SRQ number is only available once the endpoint is connected. By + * setting the value here instead of mca_btl_openib_get we guarantee the rem_srqs + * array is initialized. */ +#if OPAL_HAVE_CONNECTX_XRC_DOMAINS + frag->sr_desc.qp_type.xrc.remote_srqn = ep->rem_info.rem_srqs[qp].rem_srq_num; +#else + frag->sr_desc.xrc_remote_srq_num = ep->rem_info.rem_srqs[qp].rem_srq_num; +#endif + } +#endif + /* check for a send wqe */ if (qp_get_wqe(ep, qp) < 0) { qp_put_wqe(ep, qp); diff --git a/opal/mca/btl/openib/btl_openib_ini.c b/opal/mca/btl/openib/btl_openib_ini.c index a61cf28e270..e6bc6e89c66 100644 --- a/opal/mca/btl/openib/btl_openib_ini.c +++ b/opal/mca/btl/openib/btl_openib_ini.c @@ -13,7 +13,7 @@ * Copyright (c) 2006-2013 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2008 Mellanox Technologies. All rights reserved. * Copyright (c) 2012-2015 Los Alamos National Security, LLC. All rights - * reserved. + * reserved. * Copyright (c) 2014 Intel, Inc. All rights reserved * Copyright (c) 2014-2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. @@ -421,7 +421,7 @@ static int parse_line(parsed_section_values_t *sv) /* Have no idea what this parameter is. Not an error -- just ignore it */ if (!showed_unknown_field_warning) { - opal_show_help("help-mpi-btl-openib.txt", + opal_show_help("help-mpi-btl-openib.txt", "ini file:unknown field", true, ini_filename, btl_openib_ini_yynewlines, key_buffer); @@ -510,7 +510,7 @@ static void reset_values(opal_btl_openib_ini_values_t *v) v->rdmacm_reject_causes_connect_error = false; v->rdmacm_reject_causes_connect_error_set = false; - v->ignore_device = false; + v->ignore_device = false; v->ignore_device_set = false; } diff --git a/opal/mca/btl/openib/btl_openib_mca.c b/opal/mca/btl/openib/btl_openib_mca.c index 790e3a06103..037d7d8d07c 100644 --- a/opal/mca/btl/openib/btl_openib_mca.c +++ b/opal/mca/btl/openib/btl_openib_mca.c @@ -10,7 +10,7 @@ * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. - * Copyright (c) 2006-2015 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2006-2017 Cisco Systems, Inc. All rights reserved * Copyright (c) 2006-2009 Mellanox Technologies. All rights reserved. * Copyright (c) 2006-2015 Los Alamos National Security, LLC. All rights * reserved. @@ -89,11 +89,6 @@ static mca_base_var_enum_value_t device_type_values[] = { static int btl_openib_cq_size; static bool btl_openib_have_fork_support = OPAL_HAVE_IBV_FORK_INIT; -#if BTL_OPENIB_FAILOVER_ENABLED -static int btl_openib_verbose_failover; -static bool btl_openib_failover_enabled = true; -#endif - /* * utility routine for string parameter registration */ @@ -316,9 +311,12 @@ int btl_openib_register_mca_params(void) "(must be >= 1)", 32, &mca_btl_openib_component.ib_free_list_inc, REGINT_GE_ONE)); - CHECK(reg_string("mpool", NULL, - "Name of the memory pool to be used (it is unlikely that you will ever want to change this)", - "grdma", &mca_btl_openib_component.ib_mpool_name, + CHECK(reg_string("mpool_hints", NULL, "hints for selecting a memory pool (default: none)", + NULL, &mca_btl_openib_component.ib_mpool_hints, + 0)); + CHECK(reg_string("rcache", NULL, + "Name of the registration cache to be used (it is unlikely that you will ever want to change this)", + "grdma", &mca_btl_openib_component.ib_rcache_name, 0)); CHECK(reg_int("reg_mru_len", NULL, "Length of the registration cache most recently used list " @@ -357,7 +355,7 @@ int btl_openib_register_mca_params(void) "InfiniBand outstanding atomic reads " "(must be >= 0)", 4, &mca_btl_openib_component.ib_qp_ous_rd_atom, 0)); - + asprintf(&msg, "OpenFabrics MTU, in bytes (if not specified in INI files). Valid values are: %d=256 bytes, %d=512 bytes, %d=1024 bytes, %d=2048 bytes, %d=4096 bytes", IBV_MTU_256, IBV_MTU_512, @@ -394,7 +392,7 @@ int btl_openib_register_mca_params(void) "InfiniBand transmit timeout, plugged into formula: 4.096 microseconds * (2^btl_openib_ib_timeout) " "(must be >= 0 and <= 31)", 20, &mca_btl_openib_component.ib_timeout, 0)); - + CHECK(reg_uint("ib_retry_count", NULL, "InfiniBand transmit retry count " "(must be >= 0 and <= 7)", @@ -470,30 +468,6 @@ int btl_openib_register_mca_params(void) "If nonzero, use the thread that will handle InfiniBand asynchronous events", true, &mca_btl_openib_component.use_async_event_thread)); -#if BTL_OPENIB_FAILOVER_ENABLED - /* failover specific output */ - CHECK(reg_int("verbose_failover", NULL, - "Output some verbose OpenIB BTL failover information " - "(0 = no output, nonzero = output)", 0, &btl_openib_verbose_failover, 0)); - mca_btl_openib_component.verbose_failover = opal_output_open(NULL); - opal_output_set_verbosity(mca_btl_openib_component.verbose_failover, btl_openib_verbose_failover); - - CHECK(reg_bool("port_error_failover", NULL, - "If nonzero, asynchronous port errors will trigger failover", - 0, &mca_btl_openib_component.port_error_failover)); - - /* Make non writeable parameter that indicates failover is configured in. */ - tmp = mca_base_component_var_register(&mca_btl_openib_component.super.btl_version, - "failover_enabled", - "openib failover is configured: run with bfo PML to support failover between openib BTLs", - MCA_BASE_VAR_TYPE_BOOL, NULL, 0, - MCA_BASE_VAR_FLAG_DEFAULT_ONLY, - OPAL_INFO_LVL_9, - MCA_BASE_VAR_SCOPE_CONSTANT, - &btl_openib_failover_enabled); - if (0 > tmp) ret = tmp; -#endif - CHECK(reg_bool("enable_srq_resize", NULL, "Enable/Disable on demand SRQ resize. " "(0 = without resizing, nonzero = with resizing)", 1, @@ -565,11 +539,8 @@ int btl_openib_register_mca_params(void) mca_btl_openib_module.super.btl_rdma_pipeline_frag_size = 1024 * 1024; mca_btl_openib_module.super.btl_min_rdma_pipeline_size = 256 * 1024; mca_btl_openib_module.super.btl_flags = MCA_BTL_FLAGS_RDMA | - MCA_BTL_FLAGS_NEED_ACK | MCA_BTL_FLAGS_NEED_CSUM | MCA_BTL_FLAGS_HETEROGENEOUS_RDMA; -#if BTL_OPENIB_FAILOVER_ENABLED - mca_btl_openib_module.super.btl_flags |= MCA_BTL_FLAGS_FAILOVER_SUPPORT; -#endif - + MCA_BTL_FLAGS_NEED_ACK | MCA_BTL_FLAGS_NEED_CSUM | MCA_BTL_FLAGS_HETEROGENEOUS_RDMA | + MCA_BTL_FLAGS_SEND; #if HAVE_DECL_IBV_ATOMIC_HCA mca_btl_openib_module.super.btl_flags |= MCA_BTL_FLAGS_ATOMIC_FOPS; mca_btl_openib_module.super.btl_atomic_flags = MCA_BTL_ATOMIC_SUPPORTS_ADD | MCA_BTL_ATOMIC_SUPPORTS_CSWAP; @@ -662,7 +633,7 @@ int btl_openib_register_mca_params(void) } asprintf(&default_qps, - "P,128,256,192,128:S,%u,1024,1008,64:S,%u,1024,1008,64:S,%u,1024,1008,64", + "S,128,256,192,128:S,%u,1024,1008,64:S,%u,1024,1008,64:S,%u,1024,1008,64", mid_qp_size, (uint32_t)mca_btl_openib_module.super.btl_eager_limit, (uint32_t)mca_btl_openib_module.super.btl_max_send_size); @@ -702,26 +673,10 @@ int btl_openib_register_mca_params(void) 0, &mca_btl_openib_component.gid_index, REGINT_GE_ZERO)); -#if BTL_OPENIB_MALLOC_HOOKS_ENABLED - CHECK(reg_int("memalign", NULL, - "[64 | 32 | 0] - Enable (64bit or 32bit)/Disable(0) memory" - "alignment for all malloc calls if btl openib is used.", - 32, &mca_btl_openib_component.use_memalign, - REGINT_GE_ZERO)); - - mca_btl_openib_component.memalign_threshold = - mca_btl_openib_module.super.btl_eager_limit; - tmp = mca_base_component_var_register(&mca_btl_openib_component.super.btl_version, - "memalign_threshold", - "Allocating memory more than btl_openib_memalign_threshhold" - "bytes will automatically be algined to the value of btl_openib_memalign bytes." - "memalign_threshhold defaults to the same value as mca_btl_openib_eager_limit.", - MCA_BASE_VAR_TYPE_SIZE_T, NULL, 0, 0, - OPAL_INFO_LVL_9, - MCA_BASE_VAR_SCOPE_READONLY, - &mca_btl_openib_component.memalign_threshold); - if (0 > tmp) ret = tmp; -#endif + CHECK(reg_bool("allow_different_subnets", NULL, + "Allow connecting processes from different IB subnets." + "(0 = do not allow; 1 = allow)", + false, &mca_btl_openib_component.allow_different_subnets)); /* Register any MCA params for the connect pseudo-components */ if (OPAL_SUCCESS == ret) { @@ -815,16 +770,10 @@ int btl_openib_verify_mca_params (void) } } #endif /* Workaround */ -#endif - -#if BTL_OPENIB_MALLOC_HOOKS_ENABLED - if (mca_btl_openib_component.use_memalign != 32 - && mca_btl_openib_component.use_memalign != 64 - && mca_btl_openib_component.use_memalign != 0){ - opal_show_help("help-mpi-btl-openib.txt", "invalid mca param value", - true, "Wrong btl_openib_memalign parameter value. Allowed values: 64, 32, 0.", - "btl_openib_memalign is reset to 32"); - mca_btl_openib_component.use_memalign = 32; + if (0 != mca_btl_openib_module.super.btl_cuda_max_send_size) { + opal_show_help("help-mpi-btl-openib.txt", "do_not_set_openib_value", + true, opal_process_info.nodename); + mca_btl_openib_module.super.btl_cuda_max_send_size = 0; } #endif diff --git a/opal/mca/btl/openib/btl_openib_proc.c b/opal/mca/btl/openib/btl_openib_proc.c index 2d622fec3b8..8dfdf7ade46 100644 --- a/opal/mca/btl/openib/btl_openib_proc.c +++ b/opal/mca/btl/openib/btl_openib_proc.c @@ -1,3 +1,4 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ /* * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana * University Research and Technology @@ -9,9 +10,15 @@ * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. - * Copyright (c) 2007-2008 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2007-2015 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2006-2007 Voltaire All rights reserved. * Copyright (c) 2014 Intel, Inc. All rights reserved. + * Copyright (c) 2015 Mellanox Technologies. All rights reserved. + * Copyright (c) 2016 Los Alamos National Security, LLC. All rights + * reserved. + * + * Copyright (c) 2016 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -29,6 +36,23 @@ #include "connect/base.h" #include "connect/connect.h" +static void mca_btl_openib_proc_btl_construct(mca_btl_openib_proc_btlptr_t* elem); +static void mca_btl_openib_proc_btl_destruct(mca_btl_openib_proc_btlptr_t* elem); + +OBJ_CLASS_INSTANCE(mca_btl_openib_proc_btlptr_t, + opal_list_item_t, mca_btl_openib_proc_btl_construct, + mca_btl_openib_proc_btl_destruct); + +static void mca_btl_openib_proc_btl_construct(mca_btl_openib_proc_btlptr_t* elem) +{ + elem->openib_btl = NULL; +} + +static void mca_btl_openib_proc_btl_destruct(mca_btl_openib_proc_btlptr_t* elem) +{ + elem->openib_btl = NULL; +} + static void mca_btl_openib_proc_construct(mca_btl_openib_proc_t* proc); static void mca_btl_openib_proc_destruct(mca_btl_openib_proc_t* proc); @@ -44,10 +68,7 @@ void mca_btl_openib_proc_construct(mca_btl_openib_proc_t* ib_proc) ib_proc->proc_endpoints = 0; ib_proc->proc_endpoint_count = 0; OBJ_CONSTRUCT(&ib_proc->proc_lock, opal_mutex_t); - /* add to list of all proc instance */ - OPAL_THREAD_LOCK(&mca_btl_openib_component.ib_lock); - opal_list_append(&mca_btl_openib_component.ib_procs, &ib_proc->super); - OPAL_THREAD_UNLOCK(&mca_btl_openib_component.ib_lock); + OBJ_CONSTRUCT(&ib_proc->openib_btls, opal_list_t); } /* @@ -56,10 +77,7 @@ void mca_btl_openib_proc_construct(mca_btl_openib_proc_t* ib_proc) void mca_btl_openib_proc_destruct(mca_btl_openib_proc_t* ib_proc) { - /* remove from list of all proc instances */ - OPAL_THREAD_LOCK(&mca_btl_openib_component.ib_lock); - opal_list_remove_item(&mca_btl_openib_component.ib_procs, &ib_proc->super); - OPAL_THREAD_UNLOCK(&mca_btl_openib_component.ib_lock); + mca_btl_openib_proc_btlptr_t* elem; /* release resources */ if(NULL != ib_proc->proc_endpoints) { @@ -77,6 +95,13 @@ void mca_btl_openib_proc_destruct(mca_btl_openib_proc_t* ib_proc) free(ib_proc->proc_ports); } OBJ_DESTRUCT(&ib_proc->proc_lock); + + elem = (mca_btl_openib_proc_btlptr_t*)opal_list_remove_first(&ib_proc->openib_btls); + while( NULL != elem ){ + OBJ_RELEASE(elem); + elem = (mca_btl_openib_proc_btlptr_t*)opal_list_remove_first(&ib_proc->openib_btls); + } + OBJ_DESTRUCT(&ib_proc->openib_btls); } @@ -84,26 +109,38 @@ void mca_btl_openib_proc_destruct(mca_btl_openib_proc_t* ib_proc) * Look for an existing IB process instances based on the associated * opal_proc_t instance. */ -static mca_btl_openib_proc_t* mca_btl_openib_proc_lookup_proc(opal_proc_t* proc) +static mca_btl_openib_proc_t* ibproc_lookup_no_lock(opal_proc_t* proc) { mca_btl_openib_proc_t* ib_proc; - OPAL_THREAD_LOCK(&mca_btl_openib_component.ib_lock); - for(ib_proc = (mca_btl_openib_proc_t*) opal_list_get_first(&mca_btl_openib_component.ib_procs); ib_proc != (mca_btl_openib_proc_t*) opal_list_get_end(&mca_btl_openib_component.ib_procs); ib_proc = (mca_btl_openib_proc_t*)opal_list_get_next(ib_proc)) { if(ib_proc->proc_opal == proc) { - OPAL_THREAD_UNLOCK(&mca_btl_openib_component.ib_lock); return ib_proc; } } - OPAL_THREAD_UNLOCK(&mca_btl_openib_component.ib_lock); return NULL; } +static mca_btl_openib_proc_t* ibproc_lookup_and_lock(opal_proc_t* proc) +{ + mca_btl_openib_proc_t* ib_proc; + + /* get the process from the list */ + opal_mutex_lock(&mca_btl_openib_component.ib_lock); + ib_proc = ibproc_lookup_no_lock(proc); + opal_mutex_unlock(&mca_btl_openib_component.ib_lock); + if( NULL != ib_proc ){ + /* if we were able to find it - lock it. + * NOTE: we want to lock it outside of list locked region */ + opal_mutex_lock(&ib_proc->proc_lock); + } + return ib_proc; +} + static void inline unpack8(char **src, uint8_t *value) { /* Copy one character */ @@ -120,9 +157,9 @@ static void inline unpack8(char **src, uint8_t *value) * associated w/ a given destination on this datastructure. */ -mca_btl_openib_proc_t* mca_btl_openib_proc_create(opal_proc_t* proc) +mca_btl_openib_proc_t* mca_btl_openib_proc_get_locked(opal_proc_t* proc) { - mca_btl_openib_proc_t* module_proc = NULL; + mca_btl_openib_proc_t *ib_proc = NULL, *ib_proc_ret = NULL; size_t msg_size; uint32_t size; int rc, i, j; @@ -130,34 +167,45 @@ mca_btl_openib_proc_t* mca_btl_openib_proc_create(opal_proc_t* proc) char *offset; int modex_message_size; mca_btl_openib_modex_message_t dummy; + bool is_new = false; /* Check if we have already created a IB proc * structure for this ompi process */ - module_proc = mca_btl_openib_proc_lookup_proc(proc); - if (NULL != module_proc) { + ib_proc = ibproc_lookup_and_lock(proc); + if (NULL != ib_proc) { /* Gotcha! */ - return module_proc; + return ib_proc; } - /* Oops! First time, gotta create a new IB proc + /* All initialization has to be an atomic operation. we do the following assumption: + * - we let all concurent threads to try to do the initialization; + * - when one has finished it locks ib_lock and checks if corresponding + * process is still missing; + * - if so - new proc is added, otherwise - initialized proc struct is released. + */ + + /* First time, gotta create a new IB proc * out of the opal_proc ... */ - module_proc = OBJ_NEW(mca_btl_openib_proc_t); + ib_proc = OBJ_NEW(mca_btl_openib_proc_t); + if (NULL == ib_proc) { + return NULL; + } + /* Initialize number of peer */ - module_proc->proc_endpoint_count = 0; - module_proc->proc_opal = proc; + ib_proc->proc_endpoint_count = 0; + ib_proc->proc_opal = proc; /* query for the peer address info */ OPAL_MODEX_RECV(rc, &mca_btl_openib_component.super.btl_version, - proc, &message, &msg_size); + &proc->proc_name, &message, &msg_size); if (OPAL_SUCCESS != rc) { BTL_VERBOSE(("[%s:%d] opal_modex_recv failed for peer %s", __FILE__, __LINE__, OPAL_NAME_PRINT(proc->proc_name))); - OBJ_RELEASE(module_proc); - return NULL; + goto err_exit; } if (0 == msg_size) { - return NULL; + goto err_exit; } /* Message was packed in btl_openib_component.c; the format is @@ -166,46 +214,46 @@ mca_btl_openib_proc_t* mca_btl_openib_proc_create(opal_proc_t* proc) /* Unpack the number of modules in the message */ offset = (char *) message; - unpack8(&offset, &(module_proc->proc_port_count)); - BTL_VERBOSE(("unpack: %d btls", module_proc->proc_port_count)); - if (module_proc->proc_port_count > 0) { - module_proc->proc_ports = (mca_btl_openib_proc_modex_t *) + unpack8(&offset, &(ib_proc->proc_port_count)); + BTL_VERBOSE(("unpack: %d btls", ib_proc->proc_port_count)); + if (ib_proc->proc_port_count > 0) { + ib_proc->proc_ports = (mca_btl_openib_proc_modex_t *) malloc(sizeof(mca_btl_openib_proc_modex_t) * - module_proc->proc_port_count); + ib_proc->proc_port_count); } else { - module_proc->proc_ports = NULL; + ib_proc->proc_ports = NULL; } /* Loop over unpacking all the ports */ - for (i = 0; i < module_proc->proc_port_count; i++) { + for (i = 0; i < ib_proc->proc_port_count; i++) { /* Unpack the modex comment message struct */ size = modex_message_size; - memcpy(&(module_proc->proc_ports[i].pm_port_info), offset, size); + memcpy(&(ib_proc->proc_ports[i].pm_port_info), offset, size); #if !defined(WORDS_BIGENDIAN) && OPAL_ENABLE_HETEROGENEOUS_SUPPORT - MCA_BTL_OPENIB_MODEX_MSG_NTOH(module_proc->proc_ports[i].pm_port_info); + MCA_BTL_OPENIB_MODEX_MSG_NTOH(ib_proc->proc_ports[i].pm_port_info); #endif offset += size; BTL_VERBOSE(("unpacked btl %d: modex message, offset now %d", i, (int)(offset-((char*)message)))); /* Unpack the number of CPCs that follow */ - unpack8(&offset, &(module_proc->proc_ports[i].pm_cpc_data_count)); + unpack8(&offset, &(ib_proc->proc_ports[i].pm_cpc_data_count)); BTL_VERBOSE(("unpacked btl %d: number of cpcs to follow %d (offset now %d)", - i, module_proc->proc_ports[i].pm_cpc_data_count, + i, ib_proc->proc_ports[i].pm_cpc_data_count, (int)(offset-((char*)message)))); - module_proc->proc_ports[i].pm_cpc_data = (opal_btl_openib_connect_base_module_data_t *) - calloc(module_proc->proc_ports[i].pm_cpc_data_count, + ib_proc->proc_ports[i].pm_cpc_data = (opal_btl_openib_connect_base_module_data_t *) + calloc(ib_proc->proc_ports[i].pm_cpc_data_count, sizeof(opal_btl_openib_connect_base_module_data_t)); - if (NULL == module_proc->proc_ports[i].pm_cpc_data) { - return NULL; + if (NULL == ib_proc->proc_ports[i].pm_cpc_data) { + goto err_exit; } /* Unpack the CPCs */ - for (j = 0; j < module_proc->proc_ports[i].pm_cpc_data_count; ++j) { + for (j = 0; j < ib_proc->proc_ports[i].pm_cpc_data_count; ++j) { uint8_t u8; opal_btl_openib_connect_base_module_data_t *cpcd; - cpcd = module_proc->proc_ports[i].pm_cpc_data + j; + cpcd = ib_proc->proc_ports[i].pm_cpc_data + j; unpack8(&offset, &u8); BTL_VERBOSE(("unpacked btl %d: cpc %d: index %d (offset now %d)", i, j, u8, (int)(offset-(char*)message))); @@ -224,7 +272,7 @@ mca_btl_openib_proc_t* mca_btl_openib_proc_create(opal_proc_t* proc) cpcd->cbm_modex_message = malloc(cpcd->cbm_modex_message_len); if (NULL == cpcd->cbm_modex_message) { BTL_ERROR(("Failed to malloc")); - return NULL; + goto err_exit; } memcpy(cpcd->cbm_modex_message, offset, cpcd->cbm_modex_message_len); @@ -238,20 +286,51 @@ mca_btl_openib_proc_t* mca_btl_openib_proc_create(opal_proc_t* proc) } } - if (0 == module_proc->proc_port_count) { - module_proc->proc_endpoints = NULL; + if (0 == ib_proc->proc_port_count) { + ib_proc->proc_endpoints = NULL; } else { - module_proc->proc_endpoints = (mca_btl_base_endpoint_t**) - malloc(module_proc->proc_port_count * + ib_proc->proc_endpoints = (volatile mca_btl_base_endpoint_t**) + malloc(ib_proc->proc_port_count * sizeof(mca_btl_base_endpoint_t*)); } - if (NULL == module_proc->proc_endpoints) { - OBJ_RELEASE(module_proc); - return NULL; + if (NULL == ib_proc->proc_endpoints) { + goto err_exit; } BTL_VERBOSE(("unpacking done!")); - return module_proc; + + /* Finally add this process to the initialized procs list */ + opal_mutex_lock(&mca_btl_openib_component.ib_lock); + + ib_proc_ret = ibproc_lookup_no_lock(proc); + if (NULL == ib_proc_ret) { + /* if process can't be found in this list - insert it locked + * it is safe to lock ib_proc here because this thread is + * the only one who knows about it so far */ + opal_mutex_lock(&ib_proc->proc_lock); + opal_list_append(&mca_btl_openib_component.ib_procs, &ib_proc->super); + ib_proc_ret = ib_proc; + is_new = true; + } else { + /* otherwise - release module_proc */ + OBJ_RELEASE(ib_proc); + } + opal_mutex_unlock(&mca_btl_openib_component.ib_lock); + + /* if we haven't insert the process - lock it here so we + * won't lock mca_btl_openib_component.ib_lock */ + if( !is_new ){ + opal_mutex_lock(&ib_proc_ret->proc_lock); + } + + return ib_proc_ret; + +err_exit: + + BTL_ERROR(("%d: error exit from mca_btl_openib_proc_create", OPAL_PROC_MY_NAME.vpid)); + + OBJ_RELEASE(ib_proc); + return NULL; } int mca_btl_openib_proc_remove(opal_proc_t *proc, @@ -262,7 +341,7 @@ int mca_btl_openib_proc_remove(opal_proc_t *proc, /* Remove endpoint from the openib BTL version of the proc as well */ - ib_proc = mca_btl_openib_proc_lookup_proc(proc); + ib_proc = ibproc_lookup_and_lock(proc); if (NULL != ib_proc) { for (i = 0; i < ib_proc->proc_endpoint_count; ++i) { if (ib_proc->proc_endpoints[i] == endpoint) { @@ -270,6 +349,7 @@ int mca_btl_openib_proc_remove(opal_proc_t *proc, if (i == ib_proc->proc_endpoint_count - 1) { --ib_proc->proc_endpoint_count; } + opal_mutex_unlock(&ib_proc->proc_lock); return OPAL_SUCCESS; } } @@ -310,3 +390,27 @@ int mca_btl_openib_proc_insert(mca_btl_openib_proc_t* module_proc, module_proc->proc_endpoints[module_proc->proc_endpoint_count++] = module_endpoint; return OPAL_SUCCESS; } + +int mca_btl_openib_proc_reg_btl(mca_btl_openib_proc_t* ib_proc, + mca_btl_openib_module_t* openib_btl) +{ + mca_btl_openib_proc_btlptr_t* elem; + + + for(elem = (mca_btl_openib_proc_btlptr_t*)opal_list_get_first(&ib_proc->openib_btls); + elem != (mca_btl_openib_proc_btlptr_t*)opal_list_get_end(&ib_proc->openib_btls); + elem = (mca_btl_openib_proc_btlptr_t*)opal_list_get_next(elem)) { + if(elem->openib_btl == openib_btl) { + /* this is normal return meaning that this BTL has already touched this ib_proc */ + return OPAL_ERR_RESOURCE_BUSY; + } + } + + elem = OBJ_NEW(mca_btl_openib_proc_btlptr_t); + if( NULL == elem ){ + return OPAL_ERR_OUT_OF_RESOURCE; + } + elem->openib_btl = openib_btl; + opal_list_append(&ib_proc->openib_btls, &elem->super); + return OPAL_SUCCESS; +} diff --git a/opal/mca/btl/openib/btl_openib_proc.h b/opal/mca/btl/openib/btl_openib_proc.h index 0ca3c1358da..576018e5aac 100644 --- a/opal/mca/btl/openib/btl_openib_proc.h +++ b/opal/mca/btl/openib/btl_openib_proc.h @@ -11,6 +11,8 @@ * All rights reserved. * Copyright (c) 2006-2007 Voltaire All rights reserved. * Copyright (c) 2008 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Mellanox Technologies. All rights reserved. + * * $COPYRIGHT$ * * Additional copyrights may follow @@ -52,6 +54,19 @@ typedef struct mca_btl_openib_proc_modex_t { uint8_t pm_cpc_data_count; } mca_btl_openib_proc_modex_t; +/** + * The list element to hold pointers to openin_btls that are using this + * ib_proc. + */ + +struct mca_btl_openib_proc_btlptr_t { + opal_list_item_t super; + mca_btl_openib_module_t* openib_btl; +}; +typedef struct mca_btl_openib_proc_btlptr_t mca_btl_openib_proc_btlptr_t; + +OBJ_CLASS_DECLARATION(mca_btl_openib_proc_btlptr_t); + /** * Represents the state of a remote process and the set of addresses * that it exports. Also cache an instance of mca_btl_base_endpoint_t for @@ -71,11 +86,14 @@ struct mca_btl_openib_proc_t { /** length of proc_ports array */ uint8_t proc_port_count; + /** list of openib_btl's that touched this proc **/ + opal_list_t openib_btls; + /** array of endpoints that have been created to access this proc */ - struct mca_btl_base_endpoint_t **proc_endpoints; + volatile struct mca_btl_base_endpoint_t **proc_endpoints; /** number of endpoints (length of proc_endpoints array) */ - size_t proc_endpoint_count; + volatile size_t proc_endpoint_count; /** lock to protect against concurrent access to proc state */ opal_mutex_t proc_lock; @@ -84,10 +102,13 @@ typedef struct mca_btl_openib_proc_t mca_btl_openib_proc_t; OBJ_CLASS_DECLARATION(mca_btl_openib_proc_t); -mca_btl_openib_proc_t* mca_btl_openib_proc_create(opal_proc_t* proc); +mca_btl_openib_proc_t* mca_btl_openib_proc_get_locked(opal_proc_t* proc); int mca_btl_openib_proc_insert(mca_btl_openib_proc_t*, mca_btl_base_endpoint_t*); int mca_btl_openib_proc_remove(opal_proc_t* proc, mca_btl_base_endpoint_t* module_endpoint); +int mca_btl_openib_proc_reg_btl(mca_btl_openib_proc_t* ib_proc, + mca_btl_openib_module_t* openib_btl); + END_C_DECLS diff --git a/opal/mca/btl/openib/btl_openib_put.c b/opal/mca/btl/openib/btl_openib_put.c index d7839c1c2ce..83260e05446 100644 --- a/opal/mca/btl/openib/btl_openib_put.c +++ b/opal/mca/btl/openib/btl_openib_put.c @@ -12,7 +12,7 @@ * All rights reserved. * Copyright (c) 2007-2013 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2006-2009 Mellanox Technologies. All rights reserved. - * Copyright (c) 2006-2014 Los Alamos National Security, LLC. All rights + * Copyright (c) 2006-2016 Los Alamos National Security, LLC. All rights * reserved. * Copyright (c) 2006-2007 Voltaire All rights reserved. * Copyright (c) 2008-2012 Oracle and/or its affiliates. All rights reserved. @@ -49,7 +49,7 @@ int mca_btl_openib_put (mca_btl_base_module_t *btl, struct mca_btl_base_endpoint qp = mca_btl_openib_component.rdma_qp; } - if (OPAL_UNLIKELY((ep->qps[qp].ib_inline_max < size && !local_handle) || !remote_handle || + if (OPAL_UNLIKELY((btl->btl_put_local_registration_threshold < size && !local_handle) || !remote_handle || size > btl->btl_put_limit)) { return OPAL_ERR_BAD_PARAM; } @@ -85,7 +85,6 @@ int mca_btl_openib_put (mca_btl_base_module_t *btl, struct mca_btl_base_endpoint /* post descriptor */ to_out_frag(frag)->sr_desc.opcode = IBV_WR_RDMA_WRITE; - to_out_frag(frag)->sr_desc.send_flags = ib_send_flags(size, &(ep->qps[qp]), 1); to_out_frag(frag)->sr_desc.wr.rdma.remote_addr = remote_address; qp_inflight_wqe_to_frag(ep, qp, to_com_frag(frag)); @@ -101,16 +100,6 @@ int mca_btl_openib_put (mca_btl_base_module_t *btl, struct mca_btl_base_endpoint to_out_frag(frag)->sr_desc.wr.rdma.rkey = remote_handle->rkey; } -#if HAVE_XRC - if (MCA_BTL_XRC_ENABLED && BTL_OPENIB_QP_TYPE_XRC(qp)) { -#if OPAL_HAVE_CONNECTX_XRC_DOMAINS - to_out_frag(frag)->sr_desc.qp_type.xrc.remote_srqn = ep->rem_info.rem_srqs[qp].rem_srq_num; -#else - to_out_frag(frag)->sr_desc.xrc_remote_srq_num = ep->rem_info.rem_srqs[qp].rem_srq_num; -#endif - } -#endif - if (ep->endpoint_state != MCA_BTL_IB_CONNECTED) { OPAL_THREAD_LOCK(&ep->endpoint_lock); rc = check_endpoint_state(ep, &to_base_frag(frag)->base, &ep->pending_put_frags); @@ -150,6 +139,24 @@ int mca_btl_openib_put_internal (mca_btl_base_module_t *btl, struct mca_btl_base struct ibv_send_wr *bad_wr; int rc; + /* NTH: the inline send size and remote SRQ number are only available once the endpoint is + * connected. By setting these values here instead of mca_btl_openib_put we guarantee + * both fields are initialized */ + to_out_frag(frag)->sr_desc.send_flags = ib_send_flags (to_com_frag(frag)->sg_entry.length, + &(ep->qps[qp]), 1); + +#if HAVE_XRC + if (MCA_BTL_XRC_ENABLED && BTL_OPENIB_QP_TYPE_XRC(qp)) { +#if OPAL_HAVE_CONNECTX_XRC + to_out_frag(frag)->sr_desc.xrc_remote_srq_num = ep->rem_info.rem_srqs[qp].rem_srq_num; +#elif OPAL_HAVE_CONNECTX_XRC_DOMAINS + to_out_frag(frag)->sr_desc.qp_type.xrc.remote_srqn = ep->rem_info.rem_srqs[qp].rem_srq_num; +#else +#error "that should never happen" +#endif + } +#endif + /* check for a send wqe */ if (qp_get_wqe(ep, qp) < 0) { qp_put_wqe(ep, qp); @@ -161,7 +168,7 @@ int mca_btl_openib_put_internal (mca_btl_base_module_t *btl, struct mca_btl_base if (0 != (rc = ibv_post_send(ep->qps[qp].qp->lcl_qp, &to_out_frag(frag)->sr_desc, &bad_wr))) { qp_put_wqe(ep, qp); - return OPAL_ERROR;; + return OPAL_ERROR; } return OPAL_SUCCESS; diff --git a/opal/mca/btl/openib/btl_openib_xrc.c b/opal/mca/btl/openib/btl_openib_xrc.c index 8a837817aa5..1952c31b12f 100644 --- a/opal/mca/btl/openib/btl_openib_xrc.c +++ b/opal/mca/btl/openib/btl_openib_xrc.c @@ -1,3 +1,4 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ /* * Copyright (c) 2007-2008 Mellanox Technologies. All rights reserved. * Copyright (c) 2009 Cisco Systems, Inc. All rights reserved. @@ -5,6 +6,8 @@ * Copyright (c) 2014-2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. * Copyright (c) 2014 Bull SAS. All rights reserved. + * Copyright (c) 2016 Los Alamos National Security, LLC. All rights + * reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -39,29 +42,6 @@ OBJ_CLASS_INSTANCE(ib_address_t, ib_address_constructor, ib_address_destructor); -/* run-time check for which libibverbs XRC API we really have underneath */ -bool mca_btl_openib_xrc_check_api() -{ - void *lib = dlopen(NULL, RTLD_NOW); /* current program */ - if (!lib) { - BTL_ERROR(("XRC error: could not find XRC API version")); - return false; - } - -#if OPAL_HAVE_CONNECTX_XRC_DOMAINS - if (NULL != dlsym(lib, "ibv_open_xrcd")) { - BTL_ERROR(("XRC error: bad XRC API (require XRC from OFED 3.12+)")); - return false; - } -#else - if (NULL != dlsym(lib, "ibv_create_xrc_rcv_qp")) { - BTL_ERROR(("XRC error: bad XRC API (require XRC from OFED pre 3.12).")); - return false; - } -#endif - return true; -} - /* This func. opens XRC domain */ int mca_btl_openib_open_xrc_domain(struct mca_btl_openib_device_t *device) { @@ -145,7 +125,10 @@ static void ib_address_constructor(ib_address_t *ib_addr) ib_addr->lid = 0; ib_addr->status = MCA_BTL_IB_ADDR_CLOSED; ib_addr->qp = NULL; - OBJ_CONSTRUCT(&ib_addr->addr_lock, opal_mutex_t); + /* NTH: make the addr_lock recursive because mca_btl_openib_endpoint_connected can call + * into the CPC with the lock held. The alternative would be to drop the lock but the + * lock is never obtained in a critical path. */ + OBJ_CONSTRUCT(&ib_addr->addr_lock, opal_recursive_mutex_t); OBJ_CONSTRUCT(&ib_addr->pending_ep, opal_list_t); } diff --git a/opal/mca/btl/openib/btl_openib_xrc.h b/opal/mca/btl/openib/btl_openib_xrc.h index 4f537b3e8b0..72e1509c1c6 100644 --- a/opal/mca/btl/openib/btl_openib_xrc.h +++ b/opal/mca/btl/openib/btl_openib_xrc.h @@ -51,6 +51,4 @@ int mca_btl_openib_close_xrc_domain(struct mca_btl_openib_device_t *device); int mca_btl_openib_ib_address_add_new (uint16_t lid, uint64_t s_id, opal_jobid_t ep_jobid, mca_btl_openib_endpoint_t *ep); -bool mca_btl_openib_xrc_check_api(void); - #endif diff --git a/opal/mca/btl/openib/configure.m4 b/opal/mca/btl/openib/configure.m4 index bbcca40757a..d91c8edd78b 100644 --- a/opal/mca/btl/openib/configure.m4 +++ b/opal/mca/btl/openib/configure.m4 @@ -26,10 +26,10 @@ # MCA_btl_openib_POST_CONFIG([should_build]) # ------------------------------------------ AC_DEFUN([MCA_opal_btl_openib_POST_CONFIG], [ - AM_CONDITIONAL([MCA_btl_openib_have_xrc], [test $1 -eq 1 -a "x$btl_openib_have_xrc" = "x1"]) - AM_CONDITIONAL([MCA_btl_openib_have_rdmacm], [test $1 -eq 1 -a "x$btl_openib_have_rdmacm" = "x1"]) - AM_CONDITIONAL([MCA_btl_openib_have_dynamic_sl], [test $1 -eq 1 -a "x$btl_openib_have_opensm_devel" = "x1"]) - AM_CONDITIONAL([MCA_btl_openib_have_udcm], [test $1 -eq 1 -a "x$btl_openib_have_udcm" = "x1"]) + AM_CONDITIONAL([MCA_btl_openib_have_xrc], [test $1 -eq 1 && test "x$btl_openib_have_xrc" = "x1"]) + AM_CONDITIONAL([MCA_btl_openib_have_rdmacm], [test $1 -eq 1 && test "x$btl_openib_have_rdmacm" = "x1"]) + AM_CONDITIONAL([MCA_btl_openib_have_dynamic_sl], [test $1 -eq 1 && test "x$btl_openib_have_opensm_devel" = "x1"]) + AM_CONDITIONAL([MCA_btl_openib_have_udcm], [test $1 -eq 1 && test "x$btl_openib_have_udcm" = "x1"]) ]) @@ -39,23 +39,24 @@ AC_DEFUN([MCA_opal_btl_openib_POST_CONFIG], [ AC_DEFUN([MCA_opal_btl_openib_CONFIG],[ AC_CONFIG_FILES([opal/mca/btl/openib/Makefile]) - OPAL_VAR_SCOPE_PUSH([cpcs LDFLAGS_save LIBS_save]) + OPAL_VAR_SCOPE_PUSH([cpcs btl_openib_LDFLAGS_save btl_openib_LIBS_save]) cpcs="oob" OPAL_CHECK_OPENFABRICS([btl_openib], [btl_openib_happy="yes" OPAL_CHECK_OPENFABRICS_CM([btl_openib])], [btl_openib_happy="no"]) + OPAL_CHECK_EXP_VERBS([btl_openib], [], []) AS_IF([test "$btl_openib_happy" = "yes"], [# With the new openib flags, look for ibv_fork_init - LDFLAGS_save="$LDFLAGS" - LIBS_save="$LIBS" + btl_openib_LDFLAGS_save="$LDFLAGS" + btl_openib_LIBS_save="$LIBS" LDFLAGS="$LDFLAGS $btl_openib_LDFLAGS" LIBS="$LIBS $btl_openib_LIBS" AC_CHECK_FUNCS([ibv_fork_init]) - LDFLAGS="$LDFLAGS_save" - LIBS="$LIBS_save" + LDFLAGS="$btl_openib_LDFLAGS_save" + LIBS="$btl_openib_LIBS_save" $1], [$2]) @@ -103,47 +104,9 @@ AC_DEFUN([MCA_opal_btl_openib_CONFIG],[ AC_MSG_CHECKING([which openib btl cpcs will be built]) AC_MSG_RESULT([$cpcs])]) - # Enable openib device failover. It is disabled by default. - AC_MSG_CHECKING([whether openib failover is enabled]) - AC_ARG_ENABLE([btl-openib-failover], - [AC_HELP_STRING([--enable-btl-openib-failover], - [enable openib BTL failover (default: disabled)])]) - if test "$enable_btl_openib_failover" = "yes"; then - AC_MSG_RESULT([yes]) - btl_openib_failover_enabled=1 - else - AC_MSG_RESULT([no]) - btl_openib_failover_enabled=0 - fi - AC_DEFINE_UNQUOTED([BTL_OPENIB_FAILOVER_ENABLED], [$btl_openib_failover_enabled], - [enable openib BTL failover]) - AM_CONDITIONAL([MCA_btl_openib_enable_failover], [test "x$btl_openib_failover_enabled" = "x1"]) - - # Check for __malloc_hook availability - AC_ARG_ENABLE(btl-openib-malloc-alignment, - AC_HELP_STRING([--enable-btl-openib-malloc-alignment], [Enable support for allocated memory alignment. Default: enabled if supported, disabled otherwise.])) - - btl_openib_malloc_hooks_enabled=0 - AS_IF([test "$enable_btl_openib_malloc_alignment" != "no"], - [AC_CHECK_HEADER([malloc.h], - [AC_CHECK_FUNC([__malloc_hook], - [AC_CHECK_FUNC([__realloc_hook], - [AC_CHECK_FUNC([__free_hook], - [btl_openib_malloc_hooks_enabled=1])])])])]) - - AS_IF([test "$enable_btl_openib_malloc_alignment" = "yes" -a "$btl_openib_malloc_hooks_enabled" = "0"], - [AC_MSG_ERROR([openib malloc alignment is requested but __malloc_hook is not available])]) - AC_MSG_CHECKING([whether the openib BTL will use malloc hooks]) - AS_IF([test "$btl_openib_malloc_hooks_enabled" = "0"], - [AC_MSG_RESULT([no])], - [AC_MSG_RESULT([yes])]) - - AC_DEFINE_UNQUOTED(BTL_OPENIB_MALLOC_HOOKS_ENABLED, [$btl_openib_malloc_hooks_enabled], - [Whether the openib BTL malloc hooks are enabled]) - # make sure that CUDA-aware checks have been done AC_REQUIRE([OPAL_CHECK_CUDA]) - + # substitute in the things needed to build openib AC_SUBST([btl_openib_CFLAGS]) AC_SUBST([btl_openib_CPPFLAGS]) diff --git a/opal/mca/btl/openib/connect/btl_openib_connect_base.c b/opal/mca/btl/openib/connect/btl_openib_connect_base.c index eadde149838..ca67d0f3635 100644 --- a/opal/mca/btl/openib/connect/btl_openib_connect_base.c +++ b/opal/mca/btl/openib/connect/btl_openib_connect_base.c @@ -3,7 +3,7 @@ * Copyright (c) 2007-2013 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2007 Mellanox Technologies, Inc. All rights reserved. * Copyright (c) 2012-2015 Los Alamos National Security, LLC. All rights - * reserved. + * reserved. * Copyright (c) 2013-2014 Intel, Inc. All rights reserved * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. diff --git a/opal/mca/btl/openib/connect/btl_openib_connect_rdmacm.c b/opal/mca/btl/openib/connect/btl_openib_connect_rdmacm.c index 3d47502b764..48133b25962 100644 --- a/opal/mca/btl/openib/connect/btl_openib_connect_rdmacm.c +++ b/opal/mca/btl/openib/connect/btl_openib_connect_rdmacm.c @@ -1,11 +1,12 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ /* * Copyright (c) 2007-2013 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2007-2008 Chelsio, Inc. All rights reserved. * Copyright (c) 2008 Mellanox Technologies. All rights reserved. * Copyright (c) 2009 Sandia National Laboratories. All rights reserved. * Copyright (c) 2010 Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2012-2013 Los Alamos National Security, LLC. All rights - * reserved. + * Copyright (c) 2012-2016 Los Alamos National Security, LLC. All rights + * reserved. * Copyright (c) 2013-2014 Intel, Inc. All rights reserved * Copyright (c) 2014 The University of Tennessee and The University * of Tennessee Research Foundation. All rights @@ -54,8 +55,8 @@ #include "opal/util/error.h" #include "opal/util/show_help.h" #include "opal/util/proc.h" +#include "opal/runtime/opal_progress_threads.h" -#include "btl_openib_fd.h" #include "btl_openib_proc.h" #include "btl_openib_endpoint.h" #include "connect/connect.h" @@ -184,6 +185,7 @@ typedef struct { #endif uint32_t rem_index; uint8_t qpnum; + opal_process_name_t rem_name; } __opal_attribute_packed__ private_data_t; #if !BTL_OPENIB_RDMACM_IB_ADDR @@ -211,8 +213,12 @@ static uint32_t rdmacm_addr = 0; static int rdmacm_resolve_timeout = 30000; static int rdmacm_resolve_max_retry_count = 20; static bool rdmacm_reject_causes_connect_error = false; +static pthread_cond_t rdmacm_disconnect_cond; +static pthread_mutex_t rdmacm_disconnect_lock; static volatile int disconnect_callbacks = 0; static bool rdmacm_component_initialized = false; +static opal_event_base_t *rdmacm_event_base = NULL; +static opal_event_t rdmacm_event; /* Calculate the *real* length of the message (not aligned/rounded up) */ @@ -371,68 +377,23 @@ static char *stringify(uint32_t addr) * the rdma_cm event id */ static mca_btl_openib_endpoint_t *rdmacm_find_endpoint(rdmacm_contents_t *contents, - struct rdma_cm_id *id, -#if BTL_OPENIB_RDMACM_IB_ADDR - uint64_t rem_port) -#else - uint16_t rem_port) -#endif + opal_process_name_t rem_name) { - int i; + mca_btl_openib_module_t *btl = contents->openib_btl; mca_btl_openib_endpoint_t *ep = NULL; - opal_pointer_array_t *endpoints = contents->openib_btl->device->endpoints; - - struct sockaddr *peeraddr = rdma_get_peer_addr(id); -#if BTL_OPENIB_RDMACM_IB_ADDR - union ibv_gid *ep_gid, peer_gid; - memcpy(peer_gid.raw, ((struct sockaddr_ib *) peeraddr)->sib_addr.sib_raw, sizeof peer_gid); -#else - uint32_t peeripaddr = ((struct sockaddr_in *) peeraddr)->sin_addr.s_addr; - -#if OPAL_ENABLE_DEBUG - char *a; -#endif - - OPAL_OUTPUT((-1, "remote peer requesting connection: %s port %d", - a = stringify(peeripaddr), rem_port)); -#if OPAL_ENABLE_DEBUG - free(a); -#endif -#endif - - for (i = 0; i < opal_pointer_array_get_size(endpoints); i++) { - mca_btl_openib_endpoint_t *endpoint; - modex_message_t *message; - - endpoint = (mca_btl_openib_endpoint_t *) opal_pointer_array_get_item(endpoints, i); - if (NULL == endpoint) { - continue; - } - - message = (modex_message_t *) endpoint->endpoint_remote_cpc_data->cbm_modex_message; -#if !BTL_OPENIB_RDMACM_IB_ADDR - OPAL_OUTPUT((-1, "message ipaddr = %s port %d", - a = stringify(message->ipaddr), message->tcp_port)); -#if OPAL_ENABLE_DEBUG - free(a); -#endif -#endif + opal_proc_t *opal_proc; -#if BTL_OPENIB_RDMACM_IB_ADDR - ep_gid = (union ibv_gid *) message->gid; - if (ep_gid->global.interface_id == peer_gid.global.interface_id && - ep_gid->global.subnet_prefix == peer_gid.global.subnet_prefix && - message->service_id == rem_port) { -#else - if (message->ipaddr == peeripaddr && message->tcp_port == rem_port) { -#endif - ep = endpoint; - break; - } + opal_proc = opal_proc_for_name (rem_name); + if (NULL == opal_proc) { + BTL_ERROR(("could not get proc associated with remote peer %s", + opal_process_name_print (rem_name))); + return NULL; } + ep = mca_btl_openib_get_ep (&btl->super, opal_proc); if (NULL == ep) { - BTL_ERROR(("can't find suitable endpoint for this peer")); + BTL_ERROR(("could not find endpoint for peer %s", + opal_process_name_print (rem_name))); } return ep; @@ -981,6 +942,7 @@ static int handle_connect_request(struct rdma_cm_event *event) rdmacm_contents_t *contents = listener_context->contents; mca_btl_openib_endpoint_t *endpoint; struct rdma_conn_param conn_param; + opal_process_name_t rem_name; modex_message_t *message; private_data_t msg; int rc = -1, qpnum; @@ -994,10 +956,11 @@ static int handle_connect_request(struct rdma_cm_event *event) qpnum = ((private_data_t *)event->param.conn.private_data)->qpnum; rem_port = ((private_data_t *)event->param.conn.private_data)->rem_port; rem_index = ((private_data_t *)event->param.conn.private_data)->rem_index; + rem_name = ((private_data_t *)event->param.conn.private_data)->rem_name; /* Determine which endpoint the remote side is trying to connect to; use the listener's context->contents to figure it out */ - endpoint = rdmacm_find_endpoint(contents, event->id, rem_port); + endpoint = rdmacm_find_endpoint(contents, rem_name); if (NULL == endpoint) { #if !BTL_OPENIB_RDMACM_IB_ADDR struct sockaddr *peeraddr = rdma_get_peer_addr(event->id); @@ -1011,7 +974,7 @@ static int handle_connect_request(struct rdma_cm_event *event) ((struct sockaddr_in *)peeraddr)->sin_addr.s_addr; c->peer_tcp_port = rdma_get_dst_port(event->id); } - opal_btl_openib_fd_run_in_main(show_help_cant_find_endpoint, c); + show_help_cant_find_endpoint (c); #else BTL_ERROR(("Cannot find endpoint.")); #endif @@ -1140,6 +1103,7 @@ static int handle_connect_request(struct rdma_cm_event *event) /* Fill the private data being sent to the other side */ msg.qpnum = qpnum; msg.rem_index = endpoint->index; + msg.rem_name = OPAL_PROC_MY_NAME; /* Accepting the connection will result in a RDMA_CM_EVENT_ESTABLISHED event on both the client and server @@ -1160,19 +1124,6 @@ static int handle_connect_request(struct rdma_cm_event *event) return OPAL_ERROR; } -/* - * Invoked by service thread - */ -static void *rdmacm_unmonitor(int fd, int flags, void *context) -{ - volatile int *barrier = (volatile int *) context; - - OPAL_OUTPUT((-1, "SERVICE rdmacm unlocking main thread")); - *barrier = 1; - - return NULL; -} - /* * Runs in service thread * @@ -1181,24 +1132,36 @@ static void *rdmacm_unmonitor(int fd, int flags, void *context) * in the service thread while rdma_disconnect() is still running in * the main thread (which causes all manner of Bad Things to occur). */ -static void *call_disconnect_callback(void *v) +static void *call_disconnect_callback(int fd, int flags, void *v) { + rdmacm_contents_t *contents = (rdmacm_contents_t *) v; void *tmp = NULL; id_context_t *context = (id_context_t*) v; - OPAL_OUTPUT((-1, "SERVICE Service thread calling disconnect on ID %p", - (void*) context->id)); + opal_list_item_t *item; + + pthread_mutex_lock (&rdmacm_disconnect_lock); + while (NULL != (item = opal_list_remove_first(&contents->ids))) { + context = (id_context_t *) item; - if (!context->already_disconnected) { - tmp = context->id; - rdma_disconnect(context->id); - context->already_disconnected = true; + OPAL_OUTPUT((-1, "RDMACM Event thread calling disconnect on ID %p", + (void*) context->id)); + + if (!context->already_disconnected) { + tmp = context->id; + rdma_disconnect(context->id); + context->already_disconnected = true; + } + + OBJ_RELEASE(context); + + OPAL_OUTPUT((-1, "RDMACM Event thread disconnect on ID %p done", + (void*) tmp)); } - OBJ_RELEASE(context); /* Tell the main thread that we're done */ - (void)opal_atomic_add(&disconnect_callbacks, 1); - OPAL_OUTPUT((-1, "SERVICE Service thread disconnect on ID %p done; count=%d", - (void*) tmp, disconnect_callbacks)); + pthread_cond_signal(&rdmacm_disconnect_cond); + pthread_mutex_unlock(&rdmacm_disconnect_lock); + return NULL; } @@ -1212,8 +1175,8 @@ static void *call_disconnect_callback(void *v) */ static int rdmacm_endpoint_finalize(struct mca_btl_base_endpoint_t *endpoint) { - int num_to_wait_for; - opal_list_item_t *item, *item2; + rdmacm_contents_t *contents = NULL, *item; + opal_event_t event; BTL_VERBOSE(("Start disconnecting...")); OPAL_OUTPUT((-1, "MAIN Endpoint finalizing")); @@ -1232,35 +1195,29 @@ static int rdmacm_endpoint_finalize(struct mca_btl_base_endpoint_t *endpoint) * main thread and service thread. */ opal_mutex_lock(&client_list_lock); - num_to_wait_for = disconnect_callbacks = 0; - for (item = opal_list_get_first(&client_list); - item != opal_list_get_end(&client_list); - item = opal_list_get_next(item)) { - rdmacm_contents_t *contents = (rdmacm_contents_t *) item; - - if (endpoint == contents->endpoint) { - while (NULL != - (item2 = opal_list_remove_first(&(contents->ids)))) { - /* Fun race condition: we cannot call - rdma_disconnect() here in the main thread, because - if we do, there is a nonzero chance that the - DISCONNECT event will be delivered and get executed - in the service thread immediately. If this all - happens before rdma_disconnect() returns, all - manner of Bad Things can/will occur. So just - invoke rdma_disconnect() in the service thread - where we guarantee that we won't be processing an - event when it is called. */ - OPAL_OUTPUT((-1, "MAIN Main thread calling disconnect on ID %p", - (void*) ((id_context_t*) item2)->id)); - ++num_to_wait_for; - opal_btl_openib_fd_run_in_service(call_disconnect_callback, - item2); - } + OPAL_LIST_FOREACH(item, &client_list, rdmacm_contents_t) { + if (endpoint == item->endpoint) { + contents = item; + opal_list_remove_item(&client_list, (opal_list_item_t *) contents); + contents->on_client_list = false; + + /* Fun race condition: we cannot call + rdma_disconnect() in this thread, because + if we do, there is a nonzero chance that the + DISCONNECT event will be delivered and get executed + in the rdcm event thread immediately. If this all + happens before rdma_disconnect() returns, all + manner of Bad Things can/will occur. So just + invoke rdma_disconnect() in the rdmacm event thread + where we guarantee that we won't be processing an + event when it is called. */ + + opal_event_set (rdmacm_event_base, &event, -1, OPAL_EV_READ, + call_disconnect_callback, contents); + opal_event_active (&event, OPAL_EV_READ, 1); + /* remove_item returns the item before the item removed, meaning that the for list is still safe */ - item = opal_list_remove_item(&client_list, item); - contents->on_client_list = false; break; } } @@ -1269,10 +1226,13 @@ static int rdmacm_endpoint_finalize(struct mca_btl_base_endpoint_t *endpoint) opal_atomic_wmb(); opal_mutex_unlock(&client_list_lock); - /* Now wait for all the disconnect callbacks to occur */ - while (num_to_wait_for != disconnect_callbacks) { - opal_btl_openib_fd_main_thread_drain(); - sched_yield(); + if (NULL != contents) { + /* Now wait for all the disconnect callbacks to occur */ + pthread_mutex_lock(&rdmacm_disconnect_lock); + while (opal_list_get_size (&contents->ids)) { + pthread_cond_wait (&rdmacm_disconnect_cond, &rdmacm_disconnect_lock); + } + pthread_mutex_unlock(&rdmacm_disconnect_lock); } OPAL_OUTPUT((-1, "MAIN Endpoint finished finalizing")); @@ -1355,7 +1315,7 @@ static int rdmacm_connect_endpoint(id_context_t *context, /* Ensure that all the writes back to the endpoint and associated data structures have completed */ opal_atomic_wmb(); - opal_btl_openib_fd_run_in_main(local_endpoint_cpc_complete, endpoint); + mca_btl_openib_run_in_main (local_endpoint_cpc_complete, endpoint); return OPAL_SUCCESS; } @@ -1619,6 +1579,7 @@ static int finish_connect(id_context_t *context) msg.qpnum = context->qpnum; msg.rem_index = contents->endpoint->index; + msg.rem_name = OPAL_PROC_MY_NAME; #if BTL_OPENIB_RDMACM_IB_ADDR memset(msg.librdmacm_header, 0, sizeof(msg.librdmacm_header)); msg.rem_port = contents->service_id; @@ -1668,9 +1629,8 @@ static int finish_connect(id_context_t *context) /* * Runs in main thread */ -static void *show_help_rdmacm_event_error(void *c) +static void *show_help_rdmacm_event_error (struct rdma_cm_event *event) { - struct rdma_cm_event *event = (struct rdma_cm_event*) c; id_context_t *context = (id_context_t*) event->id->context; if (RDMA_CM_EVENT_DEVICE_REMOVAL == event->event) { @@ -1802,7 +1762,7 @@ static int event_handler(struct rdma_cm_event *event) case RDMA_CM_EVENT_CONNECT_RESPONSE: case RDMA_CM_EVENT_ADDR_ERROR: case RDMA_CM_EVENT_DEVICE_REMOVAL: - opal_btl_openib_fd_run_in_main(show_help_rdmacm_event_error, event); + show_help_rdmacm_event_error (event); rc = OPAL_ERROR; break; @@ -1817,7 +1777,7 @@ static int event_handler(struct rdma_cm_event *event) rc = resolve_route(context); break; } - opal_btl_openib_fd_run_in_main(show_help_rdmacm_event_error, event); + show_help_rdmacm_event_error (event); rc = OPAL_ERROR; break; @@ -1833,7 +1793,7 @@ static int event_handler(struct rdma_cm_event *event) } /* - * Runs in service thread + * Runs in event thread */ static inline void rdmamcm_event_error(struct rdma_cm_event *event) { @@ -1843,12 +1803,12 @@ static inline void rdmamcm_event_error(struct rdma_cm_event *event) endpoint = ((id_context_t *)event->id->context)->contents->endpoint; } - opal_btl_openib_fd_run_in_main(mca_btl_openib_endpoint_invoke_error, - endpoint); + mca_btl_openib_run_in_main (mca_btl_openib_endpoint_invoke_error, + endpoint); } /* - * Runs in service thread + * Runs in event thread */ static void *rdmacm_event_dispatch(int fd, int flags, void *context) { @@ -2043,6 +2003,15 @@ static int rdmacm_component_query(mca_btl_openib_module_t *openib_btl, opal_btl_ struct sockaddr_in sin; #endif + /* RDMACM is not supported for MPI_THREAD_MULTIPLE */ + if (opal_using_threads()) { + BTL_VERBOSE(("rdmacm CPC is not supported with MPI_THREAD_MULTIPLE; skipped on %s:%d", + ibv_get_device_name(openib_btl->device->ib_dev), + openib_btl->port_num)); + rc = OPAL_ERR_NOT_SUPPORTED; + goto out; + } + /* RDMACM is not supported if we have any XRC QPs */ if (mca_btl_openib_component.num_xrc_qps > 0) { BTL_VERBOSE(("rdmacm CPC not supported with XRC receive queues, please try xoob CPC; skipped on %s:%d", @@ -2051,6 +2020,11 @@ static int rdmacm_component_query(mca_btl_openib_module_t *openib_btl, opal_btl_ rc = OPAL_ERR_NOT_SUPPORTED; goto out; } + if (!BTL_OPENIB_QP_TYPE_PP(0)) { + BTL_VERBOSE(("rdmacm CPC only supported when the first QP is a PP QP; skipped")); + rc = OPAL_ERR_NOT_SUPPORTED; + goto out; + } BTL_VERBOSE(("rdmacm_component_query")); @@ -2072,6 +2046,7 @@ static int rdmacm_component_query(mca_btl_openib_module_t *openib_btl, opal_btl_ selected if QP 0 is PP */ (*cpc)->cbm_uses_cts = true; + /* Start monitoring the fd associated with the cm_device */ server = OBJ_NEW(rdmacm_contents_t); if (NULL == server) { rc = OPAL_ERR_OUT_OF_RESOURCE; @@ -2106,7 +2081,8 @@ static int rdmacm_component_query(mca_btl_openib_module_t *openib_btl, opal_btl_ sin.sin_addr.s_addr = rdmacm_addr; sin.sin_port = (uint16_t) rdmacm_port; #else - rc = ibv_query_gid(openib_btl->device->ib_pd->context, openib_btl->port_num, 0, &server->gid); + rc = ibv_query_gid(openib_btl->device->ib_pd->context, openib_btl->port_num, + mca_btl_openib_component.gid_index, &server->gid); if (0 != rc) { BTL_ERROR(("local gid query failed")); goto out4; @@ -2220,9 +2196,7 @@ static int rdmacm_component_query(mca_btl_openib_module_t *openib_btl, opal_btl_ */ static int rdmacm_component_finalize(void) { - volatile int barrier = 0; opal_list_item_t *item, *item2; - int rc; BTL_VERBOSE(("rdmacm_component_finalize")); @@ -2232,36 +2206,20 @@ static int rdmacm_component_finalize(void) return OPAL_SUCCESS; } - if (NULL != event_channel) { - rc = opal_btl_openib_fd_unmonitor(event_channel->fd, - rdmacm_unmonitor, (void*) &barrier); - if (OPAL_SUCCESS != rc) { - BTL_ERROR(("Error disabling fd monitor")); - } - - /* Wait for the service thread to stop monitoring the fd */ - OPAL_OUTPUT((-1, "MAIN rdmacm_component_finalize: waiting for thread to finish")); - while (0 == barrier) { - sched_yield(); - } - OPAL_OUTPUT((-1, "MAIN rdmacm_component_finalize: thread finished")); + if (rdmacm_event_base) { + opal_event_del (&rdmacm_event); + opal_progress_thread_finalize (NULL); + rdmacm_event_base = NULL; } - /* The service thread is no longer running; no need to lock access + /* The event thread is no longer running; no need to lock access to the client_list */ - for (item = opal_list_remove_first(&client_list); - NULL != item; - item = opal_list_remove_first(&client_list)) { - OBJ_RELEASE(item); - } - OBJ_DESTRUCT(&client_list); + OPAL_LIST_DESTRUCT(&client_list); /* For each of the items in the server list, there's only one item in the "ids" list -- the server listener. So explicitly destroy its RDMA ID context. */ - for (item = opal_list_remove_first(&server_listener_list); - NULL != item; - item = opal_list_remove_first(&server_listener_list)) { + while (NULL != (item = opal_list_remove_first(&server_listener_list))) { rdmacm_contents_t *contents = (rdmacm_contents_t*) item; item2 = opal_list_remove_first(&(contents->ids)); OBJ_RELEASE(item2); @@ -2277,6 +2235,9 @@ static int rdmacm_component_finalize(void) mca_btl_openib_free_rdma_addr_list(); + pthread_cond_destroy (&rdmacm_disconnect_cond); + pthread_mutex_destroy (&rdmacm_disconnect_lock); + return OPAL_SUCCESS; } @@ -2326,10 +2287,22 @@ static int rdmacm_component_init(void) return OPAL_ERR_UNREACH; } - /* Start monitoring the fd associated with the cm_device */ - opal_btl_openib_fd_monitor(event_channel->fd, OPAL_EV_READ, - rdmacm_event_dispatch, NULL); + rdmacm_event_base = opal_progress_thread_init (NULL); + if (NULL == rdmacm_event_base) { + opal_output_verbose (5, opal_btl_base_framework.framework_output, + "openib BTL: could not create rdmacm event thread"); + return OPAL_ERR_UNREACH; + } + + opal_event_set (rdmacm_event_base, &rdmacm_event, event_channel->fd, + OPAL_EV_READ | OPAL_EV_PERSIST, rdmacm_event_dispatch, NULL); + + opal_event_add (&rdmacm_event, 0); + + pthread_cond_init (&rdmacm_disconnect_cond, NULL); + pthread_mutex_init (&rdmacm_disconnect_lock, NULL); rdmacm_component_initialized = true; + return OPAL_SUCCESS; } diff --git a/opal/mca/btl/openib/connect/btl_openib_connect_udcm.c b/opal/mca/btl/openib/connect/btl_openib_connect_udcm.c index 85113739a02..7920fd7aa35 100644 --- a/opal/mca/btl/openib/connect/btl_openib_connect_udcm.c +++ b/opal/mca/btl/openib/connect/btl_openib_connect_udcm.c @@ -3,17 +3,18 @@ * Copyright (c) 2007-2013 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2008-2009 Mellanox Technologies. All rights reserved. * Copyright (c) 2009 IBM Corporation. All rights reserved. - * Copyright (c) 2011-2015 Los Alamos National Security, LLC. All rights + * Copyright (c) 2011-2016 Los Alamos National Security, LLC. All rights * reserved. * Copyright (c) 2014-2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. * Copyright (c) 2014 Intel, Inc. All rights reserved. * Copyright (c) 2014 Bull SAS. All rights reserved. + * Copyright (c) 2016 Mellanox Technologies. All rights reserved. * * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -66,14 +67,13 @@ #include "opal/util/error.h" #include "opal/util/alfg.h" #include "opal_stdint.h" +#include "opal/class/opal_fifo.h" #include "btl_openib_endpoint.h" #include "btl_openib_proc.h" -#include "btl_openib_fd.h" #include "btl_openib_async.h" #include "connect/connect.h" -#include "opal/mca/mpool/grdma/mpool_grdma.h" #include "opal/util/sys_limits.h" #if (ENABLE_DYNAMIC_SL) @@ -149,9 +149,7 @@ typedef struct udcm_module { opal_mutex_t cm_send_lock; /* Receive queue */ - opal_mutex_t cm_recv_msg_queue_lock; - opal_list_t cm_recv_msg_queue; - bool cm_message_event_active; + opal_fifo_t cm_recv_msg_fifo; /* The associated BTL */ struct mca_btl_openib_module_t *btl; @@ -159,8 +157,20 @@ typedef struct udcm_module { /* This module's modex message */ modex_msg_t modex; - /** The channel is being monitored */ - bool channel_monitored; + /* channel monitoring */ + + /** channel event base */ + opal_event_base_t *channel_evbase; + + /** channel monitoring event */ + opal_event_t channel_event; + + /* message processing */ + /** mesage event is active */ + int32_t cm_message_event_active; + + /** message event */ + opal_event_t cm_message_event; } udcm_module_t; /* @@ -218,6 +228,7 @@ typedef struct udcm_msg_hdr { union { /* UDCM_MESSAGE_CONNECT */ struct msg_connect { + opal_process_name_t rem_name; int32_t rem_ep_index; uint8_t rem_port_num; } req; @@ -228,6 +239,7 @@ typedef struct udcm_msg_hdr { #if HAVE_XRC /* UDCM_MESSAGE_XCONNECT, UDCM_MESSAGE_XCONNECT2 */ struct msg_xrc_connect { + opal_process_name_t rem_name; int32_t rem_ep_index; uint8_t rem_port_num; uint32_t rem_qp_num; @@ -287,7 +299,7 @@ static OBJ_CLASS_INSTANCE(udcm_message_sent_t, opal_list_item_t, /*--------------------------------------------------------------------*/ static void udcm_component_register(void); -static int udcm_component_query(mca_btl_openib_module_t *btl, +static int udcm_component_query(mca_btl_openib_module_t *btl, opal_btl_openib_connect_base_module_t **cpc); static int udcm_component_finalize(void); @@ -302,10 +314,10 @@ static int udcm_module_finalize(mca_btl_openib_module_t *btl, opal_btl_openib_connect_base_module_t *cpc); static void *udcm_cq_event_dispatch(int fd, int flags, void *context); -static void *udcm_message_callback (void *context); +static void *udcm_message_callback (int fd, int flags, void *context); static void udcm_set_message_timeout (udcm_message_sent_t *message); -static void udcm_cancel_message_timeout (udcm_message_sent_t *message); +static void udcm_free_message (udcm_message_sent_t *message); static int udcm_module_init (udcm_module_t *m, mca_btl_openib_module_t *btl); @@ -331,11 +343,7 @@ static int udcm_xrc_start_connect (opal_btl_openib_connect_base_module_t *cpc, static int udcm_xrc_restart_connect (mca_btl_base_endpoint_t *lcl_ep); static int udcm_xrc_send_qp_connect (mca_btl_openib_endpoint_t *lcl_ep, uint32_t rem_qp_num, uint32_t rem_psn); static int udcm_xrc_send_qp_create (mca_btl_base_endpoint_t *lcl_ep); -#if OPAL_HAVE_CONNECTX_XRC_DOMAINS static int udcm_xrc_recv_qp_connect (mca_btl_openib_endpoint_t *lcl_ep, uint32_t qp_num); -#else -static int udcm_xrc_recv_qp_connect (mca_btl_openib_endpoint_t *lcl_ep); -#endif static int udcm_xrc_recv_qp_create (mca_btl_openib_endpoint_t *lcl_ep, uint32_t rem_qp_num, uint32_t rem_psn); static int udcm_xrc_send_request (mca_btl_base_endpoint_t *lcl_ep, mca_btl_base_endpoint_t *rem_ep, uint8_t msg_type); @@ -426,7 +434,7 @@ static void udcm_component_register(void) &udcm_max_retry); } -static int udcm_component_query(mca_btl_openib_module_t *btl, +static int udcm_component_query(mca_btl_openib_module_t *btl, opal_btl_openib_connect_base_module_t **cpc) { udcm_module_t *m = NULL; @@ -492,8 +500,8 @@ static int udcm_component_query(mca_btl_openib_module_t *btl, btl->port_num)); } else { BTL_VERBOSE(("unavailable for use on %s:%d; fatal error %d (%s)", - ibv_get_device_name(btl->device->ib_dev), - btl->port_num, rc, + ibv_get_device_name(btl->device->ib_dev), + btl->port_num, rc, opal_strerror(rc))); } @@ -517,27 +525,24 @@ static int udcm_component_finalize(void) static int udcm_endpoint_init_self_xrc (struct mca_btl_base_endpoint_t *lcl_ep) { udcm_endpoint_t *udep = UDCM_ENDPOINT_DATA(lcl_ep); + int32_t recv_qpn; int rc; opal_mutex_lock (&udep->udep_lock); do { -#if OPAL_HAVE_CONNECTX_XRC_DOMAINS - rc = udcm_xrc_recv_qp_connect (lcl_ep, lcl_ep->qps[0].qp->lcl_qp->qp_num); -#else - lcl_ep->xrc_recv_qp_num = lcl_ep->qps[0].qp->lcl_qp->qp_num; - rc = udcm_xrc_recv_qp_connect (lcl_ep); -#endif - if (OPAL_SUCCESS != rc) { - BTL_VERBOSE(("error connecting loopback XRC receive queue pair")); + if (OPAL_SUCCESS != (rc = udcm_endpoint_init_data (lcl_ep))) { + BTL_VERBOSE(("error initializing loopback endpoint cpc data")); break; } - rc = mca_btl_openib_endpoint_post_recvs (lcl_ep); + rc = udcm_xrc_send_qp_create (lcl_ep); if (OPAL_SUCCESS != rc) { - BTL_VERBOSE(("error posting receives for loopback queue pair")); + BTL_VERBOSE(("error creating send queue pair for loopback endpoint")); break; } + lcl_ep->rem_info.rem_index = lcl_ep->index; + rc = udcm_xrc_recv_qp_create (lcl_ep, lcl_ep->qps[0].qp->lcl_qp->qp_num, lcl_ep->qps[0].qp->lcl_psn); if (OPAL_SUCCESS != rc) { @@ -545,15 +550,38 @@ static int udcm_endpoint_init_self_xrc (struct mca_btl_base_endpoint_t *lcl_ep) break; } - rc = udcm_xrc_send_qp_connect (lcl_ep, lcl_ep->qps[0].qp->lcl_qp->qp_num, - lcl_ep->qps[0].qp->lcl_psn); + for (int i = 0 ; i < mca_btl_openib_component.num_xrc_qps ; ++i) { + uint32_t srq_num; +#if OPAL_HAVE_CONNECTX_XRC_DOMAINS + if (ibv_get_srq_num(lcl_ep->endpoint_btl->qps[i].u.srq_qp.srq, &srq_num)) { + BTL_ERROR(("BTL openib UDCM internal error: can't get srq num")); + } +#else + srq_num = lcl_ep->endpoint_btl->qps[i].u.srq_qp.srq->xrc_srq_num; +#endif + lcl_ep->rem_info.rem_srqs[i].rem_srq_num = srq_num; + } + +#if OPAL_HAVE_CONNECTX_XRC_DOMAINS + recv_qpn = lcl_ep->xrc_recv_qp->qp_num; +#else + recv_qpn = lcl_ep->xrc_recv_qp_num; +#endif + + lcl_ep->ib_addr->remote_xrc_rcv_qp_num = recv_qpn; + lcl_ep->rem_info.rem_qps[0].rem_psn = lcl_ep->xrc_recv_psn; + lcl_ep->rem_info.rem_qps[0].rem_qp_num = recv_qpn; + + rc = udcm_xrc_send_qp_connect (lcl_ep, recv_qpn, lcl_ep->xrc_recv_psn); if (OPAL_SUCCESS != rc) { - BTL_VERBOSE(("error creating loopback XRC send queue pair")); + BTL_VERBOSE(("error connecting loopback XRC send queue pair")); break; } - lcl_ep->endpoint_state = MCA_BTL_IB_CONNECTED; + BTL_VERBOSE(("successfully created loopback queue pair")); + /* need to hold the endpoint lock before calling udcm_finish_connection */ + OPAL_THREAD_LOCK(&lcl_ep->endpoint_lock); rc = udcm_finish_connection (lcl_ep); } while (0); opal_mutex_unlock (&udep->udep_lock); @@ -592,8 +620,8 @@ static int udcm_endpoint_init_self (struct mca_btl_base_endpoint_t *lcl_ep) break; } - lcl_ep->endpoint_state = MCA_BTL_IB_CONNECTED; - + /* need to hold the endpoint lock before calling udcm_finish_connection */ + OPAL_THREAD_LOCK(&lcl_ep->endpoint_lock); rc = udcm_finish_connection (lcl_ep); return OPAL_SUCCESS; @@ -605,7 +633,7 @@ static int udcm_endpoint_init_self (struct mca_btl_base_endpoint_t *lcl_ep) static int udcm_endpoint_init (struct mca_btl_base_endpoint_t *lcl_ep) { - udcm_endpoint_t *udep = lcl_ep->endpoint_local_cpc_data = + udcm_endpoint_t *udep = lcl_ep->endpoint_local_cpc_data = calloc(1, sizeof(udcm_endpoint_t)); if (NULL == udep) { BTL_ERROR(("malloc failed!")); @@ -630,7 +658,7 @@ static int udcm_endpoint_init (struct mca_btl_base_endpoint_t *lcl_ep) static int udcm_endpoint_finalize(struct mca_btl_base_endpoint_t *lcl_ep) { udcm_endpoint_t *udep = UDCM_ENDPOINT_DATA(lcl_ep); - + /* Free the stuff we allocated in udcm_endpoint_init */ if (NULL != udep) { if (udep->ah) { @@ -655,8 +683,7 @@ static int udcm_module_init (udcm_module_t *m, mca_btl_openib_module_t *btl) OBJ_CONSTRUCT(&m->cm_lock, opal_mutex_t); OBJ_CONSTRUCT(&m->cm_send_lock, opal_mutex_t); - OBJ_CONSTRUCT(&m->cm_recv_msg_queue, opal_list_t); - OBJ_CONSTRUCT(&m->cm_recv_msg_queue_lock, opal_mutex_t); + OBJ_CONSTRUCT(&m->cm_recv_msg_fifo, opal_fifo_t); OBJ_CONSTRUCT(&m->flying_messages, opal_list_t); OBJ_CONSTRUCT(&m->cm_timeout_lock, opal_mutex_t); @@ -728,15 +755,23 @@ static int udcm_module_init (udcm_module_t *m, mca_btl_openib_module_t *btl) m->cm_exiting = false; /* Monitor the fd associated with the completion channel */ - opal_btl_openib_fd_monitor(m->cm_channel->fd, OPAL_EV_READ, - udcm_cq_event_dispatch, m); - m->channel_monitored = true; + m->channel_evbase = opal_progress_thread_init (NULL); + + opal_event_set (m->channel_evbase, &m->channel_event, + m->cm_channel->fd, OPAL_EV_READ | OPAL_EV_PERSIST, + udcm_cq_event_dispatch, m); + + opal_event_add (&m->channel_event, 0); udcm_timeout_tv.tv_sec = udcm_timeout / 1000000; udcm_timeout_tv.tv_usec = udcm_timeout - 1000000 * udcm_timeout_tv.tv_sec; - m->cm_message_event_active = false; + m->cm_message_event_active = 0; + + /* set up the message event */ + opal_event_set (opal_sync_event_base, &m->cm_message_event, -1, + OPAL_EV_READ, udcm_message_callback, m); /* Finally, request CQ notification */ if (0 != ibv_req_notify_cq (m->cm_recv_cq, 0)) { @@ -756,7 +791,7 @@ udcm_module_start_connect(opal_btl_openib_connect_base_module_t *cpc, udcm_endpoint_t *udep = UDCM_ENDPOINT_DATA(lcl_ep); int rc = OPAL_SUCCESS; - BTL_VERBOSE(("endpoint %p (lid %d, ep index %d)", + BTL_VERBOSE(("endpoint %p (lid %d, ep index %d)", (void*)lcl_ep, lcl_ep->endpoint_btl->port_info.lid, lcl_ep->index)); @@ -799,21 +834,11 @@ udcm_module_start_connect(opal_btl_openib_connect_base_module_t *cpc, return rc; } -static void *udcm_unmonitor(int fd, int flags, void *context) -{ - volatile int *barrier = (volatile int *)context; - - *barrier = 1; - - return NULL; -} - static int udcm_module_finalize(mca_btl_openib_module_t *btl, opal_btl_openib_connect_base_module_t *cpc) { udcm_module_t *m = (udcm_module_t *) cpc; opal_list_item_t *item; - volatile int barrier = 0; if (NULL == m) { return OPAL_SUCCESS; @@ -821,27 +846,19 @@ static int udcm_module_finalize(mca_btl_openib_module_t *btl, m->cm_exiting = true; - if (m->channel_monitored) { - /* stop monitoring the channel's fd before destroying the listen qp */ - opal_btl_openib_fd_unmonitor(m->cm_channel->fd, udcm_unmonitor, (void *)&barrier); - - while (0 == barrier) { - sched_yield(); - } + if (m->channel_evbase) { + opal_event_del (&m->channel_event); + opal_progress_thread_finalize (NULL); } opal_mutex_lock (&m->cm_lock); - opal_mutex_lock (&m->cm_recv_msg_queue_lock); - /* clear message queue */ - while ((item = opal_list_remove_first(&m->cm_recv_msg_queue))) { + while (NULL != (item = opal_fifo_pop_atomic (&m->cm_recv_msg_fifo))) { OBJ_RELEASE(item); } - opal_mutex_unlock (&m->cm_recv_msg_queue_lock); - - OBJ_DESTRUCT(&m->cm_recv_msg_queue); + OBJ_DESTRUCT(&m->cm_recv_msg_fifo); opal_mutex_lock (&m->cm_timeout_lock); while ((item = opal_list_remove_first(&m->flying_messages))) { @@ -885,7 +902,6 @@ static int udcm_module_finalize(mca_btl_openib_module_t *btl, opal_mutex_unlock (&m->cm_lock); OBJ_DESTRUCT(&m->cm_send_lock); OBJ_DESTRUCT(&m->cm_lock); - OBJ_DESTRUCT(&m->cm_recv_msg_queue_lock); OBJ_DESTRUCT(&m->cm_timeout_lock); return OPAL_SUCCESS; @@ -915,7 +931,7 @@ static int udcm_module_create_listen_qp (udcm_module_t *m) init_attr.cap.max_recv_wr = udcm_recv_count; init_attr.cap.max_send_wr = 1; - qp = ibv_create_qp(m->btl->device->ib_pd, &init_attr); + qp = ibv_create_qp(m->btl->device->ib_pd, &init_attr); if (NULL == qp) { BTL_VERBOSE(("could not create UD listen queue pair")); return OPAL_ERROR; @@ -934,7 +950,7 @@ static int udcm_module_create_listen_qp (udcm_module_t *m) BTL_ERROR(("error modifying qp to INIT errno says %s", strerror(errno))); return OPAL_ERROR; - } + } /* Move listen QP to RTR */ attr.qp_state = IBV_QPS_RTR; @@ -974,24 +990,7 @@ static void udcm_module_destroy_listen_qp (udcm_module_t *m) return; } - if (mca_btl_openib_component.use_async_event_thread && - -1 != mca_btl_openib_component.async_pipe[1]) { - /* Tell the openib async thread to ignore ERR state on the QP - we are about to manually set the ERR state on */ - mca_btl_openib_async_cmd_t async_command; - async_command.a_cmd = OPENIB_ASYNC_IGNORE_QP_ERR; - async_command.qp = m->listen_qp; - if (write(mca_btl_openib_component.async_pipe[1], - &async_command, sizeof(mca_btl_openib_async_cmd_t))<0){ - BTL_ERROR(("Failed to write to pipe [%d]",errno)); - return; - } - /* wait for ok from thread */ - if (OPAL_SUCCESS != - btl_openib_async_command_done(OPENIB_ASYNC_IGNORE_QP_ERR)) { - BTL_ERROR(("Command to openib async thread to ignore QP ERR state failed")); - } - } + mca_btl_openib_async_add_qp_ignore (m->listen_qp); do { /* Move listen QP into the ERR state to cancel all outstanding @@ -1321,7 +1320,11 @@ static int udcm_rc_qp_create_one(udcm_module_t *m, mca_btl_base_endpoint_t* lcl_ uint32_t max_send_wr) { udcm_endpoint_t *udep = UDCM_ENDPOINT_DATA(lcl_ep); +#if HAVE_DECL_IBV_EXP_CREATE_QP + struct ibv_exp_qp_init_attr init_attr; +#else struct ibv_qp_init_attr init_attr; +#endif size_t req_inline; int rc; @@ -1331,7 +1334,7 @@ static int udcm_rc_qp_create_one(udcm_module_t *m, mca_btl_base_endpoint_t* lcl_ init_attr.send_cq = m->btl->device->ib_cq[BTL_OPENIB_LP_CQ]; init_attr.recv_cq = m->btl->device->ib_cq[qp_cq_prio(qp)]; init_attr.srq = srq; - init_attr.cap.max_inline_data = req_inline = + init_attr.cap.max_inline_data = req_inline = max_inline_size(qp, m->btl->device); init_attr.cap.max_send_sge = 1; init_attr.cap.max_recv_sge = 1; /* we do not use SG list */ @@ -1342,15 +1345,45 @@ static int udcm_rc_qp_create_one(udcm_module_t *m, mca_btl_base_endpoint_t* lcl_ } init_attr.cap.max_send_wr = max_send_wr; +#if HAVE_DECL_IBV_EXP_CREATE_QP + /* use expanded verbs qp create to enable use of mlx5 atomics */ + init_attr.comp_mask = IBV_EXP_QP_INIT_ATTR_PD; + init_attr.pd = m->btl->device->ib_pd; + +#if HAVE_DECL_IBV_EXP_QP_INIT_ATTR_ATOMICS_ARG + init_attr.comp_mask |= IBV_EXP_QP_INIT_ATTR_ATOMICS_ARG; + init_attr.max_atomic_arg = sizeof (int64_t); +#endif + +#if HAVE_DECL_IBV_EXP_ATOMIC_HCA_REPLY_BE + if (IBV_EXP_ATOMIC_HCA_REPLY_BE == m->btl->device->ib_exp_dev_attr.exp_atomic_cap) { + init_attr.exp_create_flags = IBV_EXP_QP_CREATE_ATOMIC_BE_REPLY; + init_attr.comp_mask |= IBV_EXP_QP_INIT_ATTR_CREATE_FLAGS; + } +#endif + + while (NULL == (lcl_ep->qps[qp].qp->lcl_qp = ibv_exp_create_qp (m->btl->device->ib_dev_context, + &init_attr))) { + /* NTH: this process may be out of registered memory. try evicting an item from + the lru of this btl's mpool */ + if (false == m->btl->device->rcache->rcache_evict (m->btl->device->rcache)) { + break; + } + } + +#else + while (NULL == (lcl_ep->qps[qp].qp->lcl_qp = ibv_create_qp(m->btl->device->ib_pd, &init_attr))) { /* NTH: this process may be out of registered memory. try evicting an item from the lru of this btl's mpool */ - if (false == mca_mpool_grdma_evict (m->btl->super.btl_mpool)) { + if (false == m->btl->device->rcache->rcache_evict (m->btl->device->rcache)) { break; } } +#endif + if (NULL == lcl_ep->qps[qp].qp->lcl_qp) { opal_show_help("help-mpi-btl-openib-cpc-base.txt", "ibv_create_qp failed", true, opal_process_info.nodename, @@ -1423,7 +1456,7 @@ static int udcm_rc_qp_create_all (mca_btl_base_endpoint_t *lcl_ep) pp_qp_num = 1; } - for (qp = 0; qp < mca_btl_openib_component.num_qps; ++qp) { + for (qp = 0; qp < mca_btl_openib_component.num_qps; ++qp) { struct ibv_srq *srq = NULL; uint32_t max_recv_wr, max_send_wr; int32_t rd_rsv, rd_num_credits; @@ -1437,7 +1470,7 @@ static int udcm_rc_qp_create_all (mca_btl_base_endpoint_t *lcl_ep) } if (BTL_OPENIB_QP_TYPE_PP(qp)) { - max_recv_wr = mca_btl_openib_component.qp_infos[qp].rd_num + + max_recv_wr = mca_btl_openib_component.qp_infos[qp].rd_num + rd_rsv; max_send_wr = mca_btl_openib_component.qp_infos[qp].rd_num + rd_num_credits; @@ -1469,36 +1502,26 @@ static int udcm_rc_qp_create_all (mca_btl_base_endpoint_t *lcl_ep) /* JMS: optimization target -- can we send something in private data to find the proc directly instead of having to search through *all* procs? */ -static mca_btl_openib_endpoint_t *udcm_find_endpoint (opal_pointer_array_t *endpoints, +static mca_btl_openib_endpoint_t *udcm_find_endpoint (struct mca_btl_openib_module_t *btl, uint32_t qp_num, uint16_t lid, udcm_msg_hdr_t *msg_hdr) { - uint8_t port_num; - int i; - - port_num = msg_hdr->data.req.rem_port_num; - - for (i = 0 ; i < opal_pointer_array_get_size (endpoints) ; ++i) { - mca_btl_openib_endpoint_t *endpoint; - modex_msg_t *msg; - - endpoint = (mca_btl_openib_endpoint_t *) - opal_pointer_array_get_item (endpoints, i); - if (NULL == endpoint) { - continue; - } - - msg = UDCM_ENDPOINT_REM_MODEX(endpoint); + mca_btl_base_endpoint_t *endpoint; + struct opal_proc_t *opal_proc; - if (msg->mm_qp_num == qp_num && msg->mm_port_num == port_num && - msg->mm_lid == lid) - return endpoint; + opal_proc = opal_proc_for_name (msg_hdr->data.req.rem_name); + if (NULL == opal_proc) { + BTL_ERROR(("could not get proc associated with remote peer")); + return NULL; } - BTL_ERROR(("could not find endpoint with port: %d, lid: %d, msg_type: %d", - port_num, lid, msg_hdr->type)); + endpoint = mca_btl_openib_get_ep (&btl->super, opal_proc); + if (NULL == endpoint) { + BTL_ERROR(("could not find endpoint with port: %d, lid: %d, msg_type: %d", + msg_hdr->data.req.rem_port_num, lid, msg_hdr->type)); + } - return NULL; + return endpoint; } static int udcm_endpoint_init_data (mca_btl_base_endpoint_t *lcl_ep) @@ -1674,6 +1697,7 @@ static int udcm_send_request (mca_btl_base_endpoint_t *lcl_ep, msg->data->hdr.data.req.rem_ep_index = htonl(lcl_ep->index); msg->data->hdr.data.req.rem_port_num = m->modex.mm_port_num; + msg->data->hdr.data.req.rem_name = OPAL_PROC_MY_NAME; for (i = 0 ; i < mca_btl_openib_component.num_qps ; ++i) { msg->data->qps[i].psn = htonl(lcl_ep->qps[i].qp->lcl_psn); @@ -1683,7 +1707,7 @@ static int udcm_send_request (mca_btl_base_endpoint_t *lcl_ep, if (0 != (rc = udcm_post_send (lcl_ep, msg->data, m->msg_length, 0))) { BTL_VERBOSE(("error posting REQ")); - udcm_cancel_message_timeout (msg); + udcm_free_message (msg); return rc; } @@ -1706,7 +1730,7 @@ static int udcm_send_complete (mca_btl_base_endpoint_t *lcl_ep, if (0 != rc) { BTL_VERBOSE(("error posting complete")); - udcm_cancel_message_timeout (msg); + udcm_free_message (msg); return rc; } @@ -1732,7 +1756,7 @@ static int udcm_send_reject (mca_btl_base_endpoint_t *lcl_ep, if (0 != rc) { BTL_VERBOSE(("error posting rejection")); - udcm_cancel_message_timeout (msg); + udcm_free_message (msg); return rc; } @@ -1977,8 +2001,7 @@ static int udcm_process_messages (struct ibv_cq *event_cq, udcm_module_t *m) lcl_ep = message->hdr.lcl_ep; if (NULL == lcl_ep) { - lcl_ep = udcm_find_endpoint (m->btl->device->endpoints, wc[i].src_qp, - wc[i].slid, &message->hdr); + lcl_ep = udcm_find_endpoint (m->btl, wc[i].src_qp, wc[i].slid, &message->hdr); } if (NULL == lcl_ep ) { @@ -2083,9 +2106,7 @@ static int udcm_process_messages (struct ibv_cq *event_cq, udcm_module_t *m) /* Copy just the message header */ memcpy (&item->msg_hdr, &message->hdr, sizeof (message->hdr)); - opal_mutex_lock(&m->cm_recv_msg_queue_lock); - opal_list_append (&m->cm_recv_msg_queue, &item->super); - opal_mutex_unlock(&m->cm_recv_msg_queue_lock); + opal_fifo_push_atomic (&m->cm_recv_msg_fifo, &item->super); udcm_send_ack (lcl_ep, message->hdr.rem_ctx); @@ -2093,13 +2114,11 @@ static int udcm_process_messages (struct ibv_cq *event_cq, udcm_module_t *m) udcm_module_post_one_recv (m, msg_num); } - opal_mutex_lock (&m->cm_recv_msg_queue_lock); - if (opal_list_get_size (&m->cm_recv_msg_queue) && - !m->cm_message_event_active) { - m->cm_message_event_active = true; - opal_btl_openib_fd_run_in_main (udcm_message_callback, (void *) m); + opal_atomic_wmb (); + + if (0 == opal_atomic_swap_32 (&m->cm_message_event_active, 1)) { + opal_event_active (&m->cm_message_event, OPAL_EV_READ, 1); } - opal_mutex_unlock (&m->cm_recv_msg_queue_lock); return count; } @@ -2147,18 +2166,19 @@ static void *udcm_cq_event_dispatch(int fd, int flags, void *context) return NULL; } -static void *udcm_message_callback (void *context) +static void *udcm_message_callback (int fd, int flags, void *context) { udcm_module_t *m = (udcm_module_t *) context; udcm_message_recv_t *item; BTL_VERBOSE(("running message thread")); - opal_mutex_lock(&m->cm_recv_msg_queue_lock); - while ((item = (udcm_message_recv_t *) - opal_list_remove_first (&m->cm_recv_msg_queue))) { + /* Mark that the callback was started */ + opal_atomic_swap_32 (&m->cm_message_event_active, 0); + opal_atomic_wmb (); + + while ((item = (udcm_message_recv_t *) opal_fifo_pop_atomic (&m->cm_recv_msg_fifo))) { mca_btl_openib_endpoint_t *lcl_ep = item->msg_hdr.lcl_ep; - opal_mutex_unlock(&m->cm_recv_msg_queue_lock); OPAL_THREAD_LOCK(&lcl_ep->endpoint_lock); @@ -2194,15 +2214,10 @@ static void *udcm_message_callback (void *context) } OBJ_RELEASE (item); - - opal_mutex_lock(&m->cm_recv_msg_queue_lock); } BTL_VERBOSE(("exiting message thread")); - m->cm_message_event_active = false; - opal_mutex_unlock(&m->cm_recv_msg_queue_lock); - return NULL; } @@ -2212,7 +2227,7 @@ static void udcm_sent_message_constructor (udcm_message_sent_t *message) { memset ((char *)message + sizeof (message->super), 0, sizeof (*message) - sizeof (message->super)); - opal_event_evtimer_set(opal_event_base, &message->event, udcm_send_timeout, message); + opal_event_evtimer_set(opal_sync_event_base, &message->event, udcm_send_timeout, message); } static void udcm_sent_message_destructor (udcm_message_sent_t *message) @@ -2221,10 +2236,8 @@ static void udcm_sent_message_destructor (udcm_message_sent_t *message) free (message->data); } - if (message->event_active) { - opal_event_evtimer_del (&message->event); - message->event_active = false; - } + opal_event_evtimer_del (&message->event); + message->event_active = false; } /* mark: message timeout code */ @@ -2267,9 +2280,9 @@ static void udcm_send_timeout (evutil_socket_t fd, short event, void *arg) UDCM_ENDPOINT_REM_MODEX(lcl_ep)->mm_qp_num); /* We are running in the timeout thread. Invoke the error in the - main thread */ - opal_btl_openib_fd_run_in_main(mca_btl_openib_endpoint_invoke_error, - lcl_ep); + * "main thread" because it may call up into the pml or another + * component that may not have threading support enabled. */ + mca_btl_openib_run_in_main (mca_btl_openib_endpoint_invoke_error, lcl_ep); break; } @@ -2279,8 +2292,7 @@ static void udcm_send_timeout (evutil_socket_t fd, short event, void *arg) if (0 != udcm_post_send (lcl_ep, msg->data, msg->length, 0)) { BTL_VERBOSE(("error reposting message")); - opal_btl_openib_fd_run_in_main(mca_btl_openib_endpoint_invoke_error, - lcl_ep); + mca_btl_openib_run_in_main (mca_btl_openib_endpoint_invoke_error, lcl_ep); break; } } while (0); @@ -2303,21 +2315,22 @@ static void udcm_set_message_timeout (udcm_message_sent_t *message) opal_mutex_unlock (&m->cm_timeout_lock); } -static void udcm_cancel_message_timeout (udcm_message_sent_t *message) +static void udcm_free_message (udcm_message_sent_t *message) { udcm_module_t *m = UDCM_ENDPOINT_MODULE(message->endpoint); - BTL_VERBOSE(("cancelling timeout for message %p", (void *) message)); + BTL_VERBOSE(("releasing message %p", (void *) message)); opal_mutex_lock (&m->cm_timeout_lock); - opal_list_remove_item (&m->flying_messages, &message->super); - - /* start the event */ - opal_event_evtimer_del (&message->event); - message->event_active = false; + if (message->event_active) { + opal_list_remove_item (&m->flying_messages, &message->super); + message->event_active = false; + } opal_mutex_unlock (&m->cm_timeout_lock); + + OBJ_RELEASE(message); } /* mark: xrc connection support */ @@ -2608,11 +2621,7 @@ static int udcm_xrc_send_qp_create (mca_btl_base_endpoint_t *lcl_ep) /* mark: xrc receive qp */ /* Recv qp connect */ -#if OPAL_HAVE_CONNECTX_XRC_DOMAINS static int udcm_xrc_recv_qp_connect (mca_btl_openib_endpoint_t *lcl_ep, uint32_t qp_num) -#else -static int udcm_xrc_recv_qp_connect (mca_btl_openib_endpoint_t *lcl_ep) -#endif { mca_btl_openib_module_t *openib_btl = lcl_ep->endpoint_btl; @@ -2626,9 +2635,9 @@ static int udcm_xrc_recv_qp_connect (mca_btl_openib_endpoint_t *lcl_ep) BTL_VERBOSE(("Connecting Recv QP\n")); lcl_ep->xrc_recv_qp = ibv_open_qp(openib_btl->device->ib_dev_context, &attr); if (NULL == lcl_ep->xrc_recv_qp) { /* failed to regester the qp, so it is already die and we should create new one */ - /* Return NOT READY !!!*/ - BTL_ERROR(("Failed to register qp_num: %d , get error: %s (%d)\n. Replying with RNR", - lcl_ep->xrc_recv_qp->qp_num, strerror(errno), errno)); + /* Return NOT READY !!!*/ + BTL_VERBOSE(("Failed to register qp_num: %d, get error: %s (%d)\n. Replying with RNR", + qp_num, strerror(errno), errno)); return OPAL_ERROR; } else { BTL_VERBOSE(("Connected to XRC Recv qp [%d]", lcl_ep->xrc_recv_qp->qp_num)); @@ -2636,13 +2645,16 @@ static int udcm_xrc_recv_qp_connect (mca_btl_openib_endpoint_t *lcl_ep) } #else int ret; + /* silence unused variable warning */ + (void) qp_num; + BTL_VERBOSE(("Connecting receive qp: %d", lcl_ep->xrc_recv_qp_num)); ret = ibv_reg_xrc_rcv_qp(openib_btl->device->xrc_domain, lcl_ep->xrc_recv_qp_num); if (ret) { /* failed to regester the qp, so it is already die and we should create new one */ /* Return NOT READY !!!*/ lcl_ep->xrc_recv_qp_num = 0; - BTL_ERROR(("Failed to register qp_num: %d , get error: %s (%d). Replying with RNR", - lcl_ep->xrc_recv_qp_num, strerror(ret), ret)); + BTL_VERBOSE(("Failed to register qp_num: %d , get error: %s (%d). Replying with RNR", + lcl_ep->xrc_recv_qp_num, strerror(ret), ret)); return OPAL_ERROR; } #endif @@ -2818,8 +2830,9 @@ static int udcm_xrc_send_request (mca_btl_base_endpoint_t *lcl_ep, mca_btl_base_ return rc; } - msg->data->hdr.data.req.rem_ep_index = htonl(lcl_ep->index); - msg->data->hdr.data.req.rem_port_num = m->modex.mm_port_num; + msg->data->hdr.data.xreq.rem_ep_index = htonl(lcl_ep->index); + msg->data->hdr.data.xreq.rem_port_num = m->modex.mm_port_num; + msg->data->hdr.data.xreq.rem_name = OPAL_PROC_MY_NAME; if (UDCM_MESSAGE_XCONNECT == msg_type) { BTL_VERBOSE(("Sending XConnect with qp: %d, psn: %d", lcl_ep->qps[0].qp->lcl_qp->qp_num, @@ -2834,7 +2847,7 @@ static int udcm_xrc_send_request (mca_btl_base_endpoint_t *lcl_ep, mca_btl_base_ if (0 != (rc = udcm_post_send (lcl_ep, msg->data, sizeof (udcm_msg_hdr_t), 0))) { BTL_VERBOSE(("error posting XREQ")); - udcm_cancel_message_timeout (msg); + udcm_free_message (msg); return rc; } @@ -2887,7 +2900,7 @@ static int udcm_xrc_send_xresponse (mca_btl_base_endpoint_t *lcl_ep, mca_btl_bas if (0 != rc) { BTL_VERBOSE(("error posting complete")); - udcm_cancel_message_timeout (msg); + udcm_free_message (msg); return rc; } @@ -2904,6 +2917,9 @@ static int udcm_xrc_handle_xconnect (mca_btl_openib_endpoint_t *lcl_ep, udcm_msg int response_type; int rc = OPAL_ERROR; + /* sanity check on message type */ + assert (UDCM_MESSAGE_XCONNECT == msg_hdr->type || UDCM_MESSAGE_XCONNECT2 == msg_hdr->type); + do { if (NULL == udep) { break; @@ -2920,11 +2936,7 @@ static int udcm_xrc_handle_xconnect (mca_btl_openib_endpoint_t *lcl_ep, udcm_msg if (UDCM_MESSAGE_XCONNECT2 == msg_hdr->type) { response_type = UDCM_MESSAGE_XRESPONSE2; -#if OPAL_HAVE_CONNECTX_XRC_DOMAINS rc = udcm_xrc_recv_qp_connect (lcl_ep, msg_hdr->data.xreq.rem_qp_num); -#else - rc = udcm_xrc_recv_qp_connect (lcl_ep); -#endif if (OPAL_SUCCESS != rc) { /* return not ready. remote side will retry */ rej_reason = UDCM_REJ_NOT_READY; @@ -2960,7 +2972,9 @@ static int udcm_xrc_handle_xconnect (mca_btl_openib_endpoint_t *lcl_ep, udcm_msg return OPAL_SUCCESS; } while (0); - opal_mutex_unlock (&udep->udep_lock); + if (udep) { + opal_mutex_unlock (&udep->udep_lock); + } /* Reject the request */ BTL_VERBOSE(("rejecting request for reason %d", rej_reason)); diff --git a/opal/mca/btl/openib/connect/btl_openib_connect_udcm.h b/opal/mca/btl/openib/connect/btl_openib_connect_udcm.h index db6a0762fe2..a0fc2b062bd 100644 --- a/opal/mca/btl/openib/connect/btl_openib_connect_udcm.h +++ b/opal/mca/btl/openib/connect/btl_openib_connect_udcm.h @@ -4,9 +4,9 @@ * right reserved. * * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ diff --git a/opal/mca/btl/openib/connect/help-mpi-btl-openib-cpc-base.txt b/opal/mca/btl/openib/connect/help-mpi-btl-openib-cpc-base.txt index b0abf2193e8..18dc23cb6e4 100644 --- a/opal/mca/btl/openib/connect/help-mpi-btl-openib-cpc-base.txt +++ b/opal/mca/btl/openib/connect/help-mpi-btl-openib-cpc-base.txt @@ -2,9 +2,9 @@ # # Copyright (c) 2008-2009 Cisco Systems, Inc. All rights reserved. # $COPYRIGHT$ -# +# # Additional copyrights may follow -# +# # $HEADER$ # # This is the US/English help file for Open MPI's OpenFabrics IB CPC @@ -21,7 +21,7 @@ support) will be disabled for this port. CPCs attempted: %s # [cpc name not found] -An invalid CPC name was specified via the btl_openib_cpc_%s MCA +An invalid CPC name was specified via the btl_openib_cpc_%s MCA parameter. Local host: %s diff --git a/opal/mca/btl/openib/connect/help-mpi-btl-openib-cpc-rdmacm.txt b/opal/mca/btl/openib/connect/help-mpi-btl-openib-cpc-rdmacm.txt index 65c35ed91a1..3c292643626 100644 --- a/opal/mca/btl/openib/connect/help-mpi-btl-openib-cpc-rdmacm.txt +++ b/opal/mca/btl/openib/connect/help-mpi-btl-openib-cpc-rdmacm.txt @@ -2,9 +2,9 @@ # # Copyright (c) 2008 Cisco Systems, Inc. All rights reserved. # $COPYRIGHT$ -# +# # Additional copyrights may follow -# +# # $HEADER$ # # This is the US/English help file for Open MPI's OpenFabrics RDMA CM diff --git a/opal/mca/btl/openib/help-mpi-btl-openib.txt b/opal/mca/btl/openib/help-mpi-btl-openib.txt index 94dcc7b8f50..7266893b6ea 100644 --- a/opal/mca/btl/openib/help-mpi-btl-openib.txt +++ b/opal/mca/btl/openib/help-mpi-btl-openib.txt @@ -700,3 +700,9 @@ with CUDA GPU Direct RDMA. Either disable GPU Direct RDMA support or enable "leave pinned" support. Deactivating the openib BTL. Local host: %s +# +[do_not_set_openib_value] +Open MPI has detected that you have attempted to set the btl_openib_cuda_max_send_size +value. This is not supported. Setting back to default value of 0. + + Local host: %s diff --git a/opal/mca/btl/openib/mca-btl-openib-device-params.ini b/opal/mca/btl/openib/mca-btl-openib-device-params.ini index 132510d0273..d8d3372b393 100644 --- a/opal/mca/btl/openib/mca-btl-openib-device-params.ini +++ b/opal/mca/btl/openib/mca-btl-openib-device-params.ini @@ -181,6 +181,15 @@ max_inline_data = 256 ############################################################################ +[Mellanox ConnectX5] +vendor_id = 0x2c9,0x5ad,0x66a,0x8f1,0x1708,0x03ba,0x15b3,0x119f +vendor_part_id = 4119,4121 +use_eager_rdma = 1 +mtu = 4096 +max_inline_data = 256 + +############################################################################ + [IBM eHCA 4x and 12x] vendor_id = 0x5076 vendor_part_id = 0 @@ -253,6 +262,14 @@ mtu = 2048 receive_queues = P,65536,64 max_inline_data = 280 +[Chelsio T6] +vendor_id = 0x1425 +vendor_part_id = 0x6400,0x6401,0x6402,0x6403,0x6404,0x6405,0x6406,0x6407,0x6408,0x6409,0x640d,0x6410,0x6411,0x6414,0x6415 +use_eager_rdma = 1 +mtu = 2048 +receive_queues = P,65536,64 +max_inline_data = 280 + ############################################################################ # I'm *assuming* that 0x4040 is the PCI ID... @@ -278,6 +295,13 @@ mtu = 2048 receive_queues = P,65536,256,192,128 max_inline_data = 64 +[Intel HFI1] +vendor_id = 0x1175 +vendor_part_id = 9456,9457 +use_eager_rdma = 1 +mtu = 4096 +max_inline_data = 0 + ############################################################################ # Intel has several OUI's, including 0x8086. Amusing. :-) Intel has diff --git a/opal/mca/btl/openib/owner.txt b/opal/mca/btl/openib/owner.txt index 57b23bfe7a9..92eb51d94bd 100644 --- a/opal/mca/btl/openib/owner.txt +++ b/opal/mca/btl/openib/owner.txt @@ -3,5 +3,5 @@ # owner: institution that is responsible for this package # status: e.g. active, maintenance, unmaintained # -owner:MELLANOX +owner:Chelsio status:maintenance diff --git a/opal/mca/btl/portals4/Makefile.am b/opal/mca/btl/portals4/Makefile.am index 50fee3795a9..d7cc49eca3a 100644 --- a/opal/mca/btl/portals4/Makefile.am +++ b/opal/mca/btl/portals4/Makefile.am @@ -5,7 +5,7 @@ # Copyright (c) 2004-2005 The University of Tennessee and The University # of Tennessee Research Foundation. All rights # reserved. -# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, # University of Stuttgart. All rights reserved. # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. @@ -13,9 +13,9 @@ # Copyright (c) 2010-2012 Sandia National Laboratories. All rights reserved. # Copyright (c) 2014 Bull SAS. All rights reserved. # $COPYRIGHT$ -# +# # Additional copyrights may follow -# +# # $HEADER$ # diff --git a/opal/mca/btl/portals4/btl_portals4.c b/opal/mca/btl/portals4/btl_portals4.c index 5b7b70eb5fd..b4504d502ce 100644 --- a/opal/mca/btl/portals4/btl_portals4.c +++ b/opal/mca/btl/portals4/btl_portals4.c @@ -91,7 +91,7 @@ btl_portals4_init_interface(void) opal_output_verbose(1, opal_btl_base_framework.framework_output, "%s:%d: PtlEQAlloc failed for NI %d: %d", __FILE__, __LINE__, interface, ret); - goto error; + goto error; } mca_btl_portals4_component.eqs_h[interface] = portals4_btl->recv_eq_h; OPAL_OUTPUT_VERBOSE((90, opal_btl_base_framework.framework_output, @@ -99,7 +99,6 @@ btl_portals4_init_interface(void) /* Create recv_idx portal table entry */ ret = PtlPTAlloc(portals4_btl->portals_ni_h, - PTL_PT_ONLY_USE_ONCE | PTL_PT_ONLY_TRUNCATE, portals4_btl->recv_eq_h, REQ_BTL_TABLE_ID, @@ -108,7 +107,7 @@ btl_portals4_init_interface(void) opal_output_verbose(1, opal_btl_base_framework.framework_output, "%s:%d: PtlPTAlloc failed for NI %d: %d", __FILE__, __LINE__, interface, ret); - goto error; + goto error; } OPAL_OUTPUT_VERBOSE((90, opal_btl_base_framework.framework_output, "PtlPTAlloc (recv_idx) OK for NI %d recv_idx=%d", interface, portals4_btl->recv_idx)); @@ -148,7 +147,7 @@ btl_portals4_init_interface(void) ret = PtlMDBind(portals4_btl->portals_ni_h, &md, - &portals4_btl->send_md_h); + &portals4_btl->send_md_h); if (PTL_OK != ret) { opal_output_verbose(1, opal_btl_base_framework.framework_output, "%s:%d: PtlMDBind failed for NI %d: %d\n", @@ -217,114 +216,236 @@ btl_portals4_init_interface(void) return OPAL_ERROR; } -int -mca_btl_portals4_add_procs(struct mca_btl_base_module_t* btl_base, - size_t nprocs, - struct opal_proc_t **procs, - struct mca_btl_base_endpoint_t** btl_peer_data, - opal_bitmap_t* reachable) +static int +create_endpoint(int interface, + opal_proc_t *proc, + mca_btl_base_endpoint_t **endpoint) { - struct mca_btl_portals4_module_t* portals4_btl = (struct mca_btl_portals4_module_t*) btl_base; int ret; - struct opal_proc_t *curr_proc = NULL; + size_t size; ptl_process_t *id; - size_t i, size; - bool need_activate = false; - ptl_process_t *maptable; - opal_output_verbose(50, opal_btl_base_framework.framework_output, - "mca_btl_portals4_add_procs: Adding %d procs (%d) for NI %d", (int) nprocs, - (int) portals4_btl->portals_num_procs, portals4_btl->interface_num); + OPAL_MODEX_RECV(ret, &mca_btl_portals4_component.super.btl_version, + &proc->proc_name, (void**) &id, &size); + + if (OPAL_ERR_NOT_FOUND == ret) { + OPAL_OUTPUT_VERBOSE((30, opal_btl_base_framework.framework_output, + "btl/portals4: Portals 4 BTL not available on peer: %s", opal_strerror(ret))); + return ret; + } else if (OPAL_SUCCESS != ret) { + opal_output_verbose(0, opal_btl_base_framework.framework_output, + "btl/portals4: opal_modex_recv failed: %s", opal_strerror(ret)); + return ret; + } + if (size < sizeof(ptl_process_t)) { /* no available connection */ + return OPAL_ERROR; + } + if ((size % sizeof(ptl_process_t)) != 0) { + opal_output_verbose(0, opal_btl_base_framework.framework_output, + "btl/portals4: invalid format in modex"); + return OPAL_ERROR; + } + OPAL_OUTPUT_VERBOSE((90, opal_btl_base_framework.framework_output, + "btl/portals4: %d NI(s) declared in the modex", (int) (size/sizeof(ptl_process_t)))); - if (mca_btl_portals4_component.use_logical) { - maptable = malloc(sizeof(ptl_process_t) * nprocs); - if (NULL == maptable) { - opal_output_verbose(1, opal_btl_base_framework.framework_output, - "%s:%d: malloc failed\n", - __FILE__, __LINE__); + *endpoint = malloc(sizeof(mca_btl_base_endpoint_t)); + if (NULL == *endpoint) { + return OPAL_ERR_OUT_OF_RESOURCE; + } + + (*endpoint)->ptl_proc = id[interface]; + + return OPAL_SUCCESS; +} + +static int +create_peer_and_endpoint(int interface, + opal_proc_t *proc, + ptl_process_t *phys_peer, + mca_btl_base_endpoint_t **endpoint) +{ + int ret; + size_t size; + ptl_process_t *id; + + OPAL_MODEX_RECV(ret, &mca_btl_portals4_component.super.btl_version, + &proc->proc_name, (void**) &id, &size); + + if (OPAL_ERR_NOT_FOUND == ret) { + OPAL_OUTPUT_VERBOSE((30, opal_btl_base_framework.framework_output, + "btl/portals4: Portals 4 BTL not available on peer: %s", opal_strerror(ret))); + return ret; + } else if (OPAL_SUCCESS != ret) { + opal_output_verbose(0, opal_btl_base_framework.framework_output, + "btl/portals4: opal_modex_recv failed: %s", opal_strerror(ret)); + return ret; + } + if (size < sizeof(ptl_process_t)) { /* no available connection */ + return OPAL_ERROR; + } + if ((size % sizeof(ptl_process_t)) != 0) { + opal_output_verbose(0, opal_btl_base_framework.framework_output, + "btl/portals4: invalid format in modex"); + return OPAL_ERROR; + } + OPAL_OUTPUT_VERBOSE((90, opal_btl_base_framework.framework_output, + "btl/portals4: %d NI(s) declared in the modex", (int) (size/sizeof(ptl_process_t)))); + + /* + * check if create_endpoint() already created the endpoint. + * if not, create it here. + */ + if (NULL == *endpoint) { + *endpoint = malloc(sizeof(mca_btl_base_endpoint_t)); + if (NULL == *endpoint) { return OPAL_ERR_OUT_OF_RESOURCE; } } + /* + * regardless of who created the endpoint, set the rank here + * because we are using logical mapping. + */ + (*endpoint)->ptl_proc.rank = proc->proc_name.vpid; + + phys_peer->phys.pid = id[interface].phys.pid; + phys_peer->phys.nid = id[interface].phys.nid; + opal_output_verbose(50, opal_btl_base_framework.framework_output, + "logical: global rank=%d pid=%d nid=%d\n", + proc->proc_name.vpid, phys_peer->phys.pid, phys_peer->phys.nid); - if (0 == portals4_btl->portals_num_procs) { - need_activate = true; + return OPAL_SUCCESS; +} + +static int +create_maptable(struct mca_btl_portals4_module_t *portals4_btl, + size_t nprocs, + opal_proc_t **procs, + mca_btl_base_endpoint_t **endpoint) +{ + int ret; + ptl_process_t *maptable; + + maptable = malloc(sizeof(ptl_process_t) * nprocs); + if (NULL == maptable) { + opal_output_verbose(1, opal_btl_base_framework.framework_output, + "%s:%d: malloc failed\n", + __FILE__, __LINE__); + return OPAL_ERR_OUT_OF_RESOURCE; } - for (i = 0 ; i < nprocs ; ++i) { + for (uint32_t i = 0 ; i < nprocs ; i++) { + struct opal_proc_t *curr_proc; + curr_proc = procs[i]; /* portals doesn't support heterogeneous yet... */ if (opal_proc_local_get()->proc_arch != curr_proc->proc_arch) { - continue; + opal_output_verbose(1, opal_btl_base_framework.framework_output, + "Portals 4 BTL does not support heterogeneous operations."); + opal_output_verbose(1, opal_btl_base_framework.framework_output, + "Proc %s architecture %x, mine %x.", + OPAL_NAME_PRINT(curr_proc->proc_name), + curr_proc->proc_arch, opal_proc_local_get()->proc_arch); + return OPAL_ERR_NOT_SUPPORTED; } - OPAL_MODEX_RECV(ret, &mca_btl_portals4_component.super.btl_version, - curr_proc, (void**) &id, &size); - - if (OPAL_ERR_NOT_FOUND == ret) { - OPAL_OUTPUT_VERBOSE((30, opal_btl_base_framework.framework_output, - "btl/portals4: Portals 4 BTL not available on peer: %s", opal_strerror(ret))); - continue; - } else if (OPAL_SUCCESS != ret) { - opal_output_verbose(0, opal_btl_base_framework.framework_output, - "btl/portals4: opal_modex_recv failed: %s", opal_strerror(ret)); + ret = create_peer_and_endpoint(portals4_btl->interface_num, + curr_proc, + &maptable[i], + &endpoint[i]); + if (OPAL_SUCCESS != ret) { + opal_output_verbose(1, opal_btl_base_framework.framework_output, + "%s:%d: create_maptable::create_peer_and_endpoint failed: %d\n", + __FILE__, __LINE__, ret); return ret; } - if (size < sizeof(ptl_process_t)) { /* no available connection */ - return OPAL_ERROR; - } - if ((size % sizeof(ptl_process_t)) != 0) { - opal_output_verbose(0, opal_btl_base_framework.framework_output, - "btl/portals4: invalid format in modex"); - return OPAL_ERROR; - } - OPAL_OUTPUT_VERBOSE((90, opal_btl_base_framework.framework_output, - "btl/portals4: %d NI(s) declared in the modex", (int) (size/sizeof(ptl_process_t)))); + } - btl_peer_data[i] = malloc(sizeof(mca_btl_base_endpoint_t)); - if (NULL == btl_peer_data[i]) return OPAL_ERROR; + ret = PtlSetMap(portals4_btl->portals_ni_h, + nprocs, + maptable); + if (OPAL_SUCCESS != ret) { + opal_output_verbose(1, opal_btl_base_framework.framework_output, + "%s:%d: logical mapping failed: %d\n", + __FILE__, __LINE__, ret); + return ret; + } + opal_output_verbose(90, opal_btl_base_framework.framework_output, + "logical mapping OK\n"); + free(maptable); - /* The modex may receive more than one id (this is the - normal case if there is more than one interface). Store the id of the corresponding - interface */ + return OPAL_SUCCESS; +} - if (mca_btl_portals4_component.use_logical) { - btl_peer_data[i]->ptl_proc.rank = i; - maptable[i].phys.pid = id[portals4_btl->interface_num].phys.pid; - maptable[i].phys.nid = id[portals4_btl->interface_num].phys.nid; - opal_output_verbose(50, opal_btl_base_framework.framework_output, - "logical: global rank=%d pid=%d nid=%d\n", - (int)i, maptable[i].phys.pid, maptable[i].phys.nid); - } else { - btl_peer_data[i]->ptl_proc = id[portals4_btl->interface_num]; +#define NEED_ALL_PROCS (mca_btl_portals4_component.use_logical) + +int +mca_btl_portals4_add_procs(struct mca_btl_base_module_t* btl_base, + size_t nprocs, + struct opal_proc_t **procs, + struct mca_btl_base_endpoint_t** btl_peer_data, + opal_bitmap_t* reachable) +{ + struct mca_btl_portals4_module_t* portals4_btl = (struct mca_btl_portals4_module_t*) btl_base; + int ret; + size_t i; + bool need_activate = false; + + opal_output_verbose(50, opal_btl_base_framework.framework_output, + "mca_btl_portals4_add_procs: Adding %d procs (%d) for NI %d", + (int) nprocs, + (int) portals4_btl->portals_num_procs, + portals4_btl->interface_num); + + if (0 == portals4_btl->portals_num_procs) { + need_activate = true; + } + + /* + * The PML handed us a list of procs that need Portals4 + * peer info. Complete those procs here. + */ + for (i = 0 ; i < nprocs ; ++i) { + struct opal_proc_t *curr_proc = procs[i]; + + /* portals doesn't support heterogeneous yet... */ + if (opal_proc_local_get()->proc_arch != curr_proc->proc_arch) { + opal_output_verbose(1, opal_btl_base_framework.framework_output, + "Portals 4 BTL does not support heterogeneous operations."); + opal_output_verbose(1, opal_btl_base_framework.framework_output, + "Proc %s architecture %x, mine %x.", + OPAL_NAME_PRINT(curr_proc->proc_name), + curr_proc->proc_arch, opal_proc_local_get()->proc_arch); + return OPAL_ERR_NOT_SUPPORTED; } - OPAL_OUTPUT_VERBOSE((90, opal_btl_base_framework.framework_output, - "add_procs: rank=%x nid=%x pid=%x for NI %d\n", - btl_peer_data[i]->ptl_proc.rank, - btl_peer_data[i]->ptl_proc.phys.nid, - btl_peer_data[i]->ptl_proc.phys.pid, - portals4_btl->interface_num)); + ret = create_endpoint(portals4_btl->interface_num, + curr_proc, + &btl_peer_data[i]); OPAL_THREAD_ADD32(&portals4_btl->portals_num_procs, 1); /* and here we can reach */ opal_bitmap_set_bit(reachable, i); - } - if (mca_btl_portals4_component.use_logical) { - ret = PtlSetMap(portals4_btl->portals_ni_h, nprocs, maptable); - if (OPAL_SUCCESS != ret) { - opal_output_verbose(1, opal_btl_base_framework.framework_output, - "%s:%d: logical mapping failed: %d\n", - __FILE__, __LINE__, ret); - return ret; - } - opal_output_verbose(90, opal_btl_base_framework.framework_output, - "logical mapping OK\n"); - free(maptable); + OPAL_OUTPUT_VERBOSE((90, opal_btl_base_framework.framework_output, + "add_procs: rank=%lx nid=%x pid=%x for NI %d", + i, + btl_peer_data[i]->ptl_proc.phys.nid, + btl_peer_data[i]->ptl_proc.phys.pid, + portals4_btl->interface_num)); } if (need_activate && portals4_btl->portals_num_procs > 0) { + if (mca_btl_portals4_component.use_logical) { + ret = create_maptable(portals4_btl, nprocs, procs, btl_peer_data); + if (OPAL_SUCCESS != ret) { + opal_output_verbose(1, opal_btl_base_framework.framework_output, + "%s:%d: mca_btl_portals4_add_procs::create_maptable() failed: %d\n", + __FILE__, __LINE__, ret); + return ret; + } + } + ret = btl_portals4_init_interface(); if (OPAL_SUCCESS != ret) { opal_output_verbose(1, opal_btl_base_framework.framework_output, @@ -333,6 +454,7 @@ mca_btl_portals4_add_procs(struct mca_btl_base_module_t* btl_base, return ret; } } + return OPAL_SUCCESS; } @@ -385,7 +507,6 @@ mca_btl_portals4_alloc(struct mca_btl_base_module_t* btl_base, size : portals4_btl->super.btl_max_send_size ; } - frag->md_h = PTL_INVALID_HANDLE; frag->base.des_segment_count = 1; frag->base.des_flags = flags | MCA_BTL_DES_SEND_ALWAYS_CALLBACK; frag->base.order = MCA_BTL_NO_ORDER; @@ -469,7 +590,7 @@ mca_btl_portals4_prepare_src(struct mca_btl_base_module_t* btl_base, ret = opal_convertor_pack(convertor, &iov, &iov_count, &max_data ); *size = max_data; - if ( ret < 0 ) { + if (ret < 0) { mca_btl_portals4_free(btl_base, (mca_btl_base_descriptor_t *) frag); return NULL; } @@ -502,53 +623,52 @@ mca_btl_portals4_register_mem(mca_btl_base_module_t *btl_base, } handle->key = OPAL_THREAD_ADD64(&(portals4_btl->portals_rdma_key), 1); + handle->remote_offset = 0; OPAL_OUTPUT_VERBOSE((90, opal_btl_base_framework.framework_output, - "mca_btl_portals4_register_mem NI=%d base=%p size=%ld handle=%p key=%ld\n", - portals4_btl->interface_num, base, size, (void *)handle, handle->key)); + "mca_btl_portals4_register_mem NI=%d base=%p size=%ld handle=%p key=%ld flags=%d", + portals4_btl->interface_num, base, size, (void *)handle, handle->key, flags)); + + /* create a match entry */ + me.start = base; + me.length = size; + me.ct_handle = PTL_CT_NONE; + me.min_free = 0; + me.uid = PTL_UID_ANY; + me.options = PTL_ME_OP_GET | + PTL_ME_EVENT_LINK_DISABLE | + PTL_ME_EVENT_COMM_DISABLE | + PTL_ME_EVENT_UNLINK_DISABLE; - if (MCA_BTL_FLAGS_PUT == flags) { - /* create a match entry */ - me.start = base; - me.length = size; - me.ct_handle = PTL_CT_NONE; - me.min_free = 0; - me.uid = PTL_UID_ANY; - me.options = PTL_ME_OP_GET | PTL_ME_USE_ONCE | - PTL_ME_EVENT_LINK_DISABLE | - PTL_ME_EVENT_COMM_DISABLE | - PTL_ME_EVENT_UNLINK_DISABLE; - - if (mca_btl_portals4_component.use_logical) { - me.match_id.rank = endpoint->ptl_proc.rank; - } else { - me.match_id.phys.nid = endpoint->ptl_proc.phys.nid; - me.match_id.phys.pid = endpoint->ptl_proc.phys.pid; - } - me.match_bits = handle->key; - me.ignore_bits = BTL_PORTALS4_PROTOCOL_MASK | - BTL_PORTALS4_CONTEXT_MASK | - BTL_PORTALS4_SOURCE_MASK; - me.ignore_bits = 0; + if (mca_btl_portals4_component.use_logical) { + me.match_id.rank = endpoint->ptl_proc.rank; + } else { + me.match_id.phys.nid = endpoint->ptl_proc.phys.nid; + me.match_id.phys.pid = endpoint->ptl_proc.phys.pid; + } + me.match_bits = handle->key; + me.ignore_bits = BTL_PORTALS4_PROTOCOL_MASK | + BTL_PORTALS4_CONTEXT_MASK | + BTL_PORTALS4_SOURCE_MASK; + me.ignore_bits = 0; - ret = PtlMEAppend(portals4_btl->portals_ni_h, - portals4_btl->recv_idx, - &me, - PTL_PRIORITY_LIST, - handle, - &(handle->me_h)); - if (PTL_OK != ret) { - opal_output_verbose(1, opal_btl_base_framework.framework_output, - "%s:%d: PtlMEAppend failed: %d\n", - __FILE__, __LINE__, ret); - OPAL_THREAD_ADD32(&portals4_btl->portals_outstanding_ops, -1); - return NULL; - } - OPAL_OUTPUT_VERBOSE((90, opal_btl_base_framework.framework_output, - "PtlMEAppend (mca_btl_portals4_register_mem) handle=%p, me_h=%d start=%p length=%ld rank=%x nid=%x pid=%x match_bits=%lx\n", - (void *)handle, handle->me_h, me.start, me.length, - me.match_id.rank, me.match_id.phys.nid, me.match_id.phys.pid, me.match_bits)); + ret = PtlMEAppend(portals4_btl->portals_ni_h, + portals4_btl->recv_idx, + &me, + PTL_PRIORITY_LIST, + handle, + &(handle->me_h)); + if (PTL_OK != ret) { + opal_output_verbose(1, opal_btl_base_framework.framework_output, + "%s:%d: PtlMEAppend failed: %d\n", + __FILE__, __LINE__, ret); + OPAL_THREAD_ADD32(&portals4_btl->portals_outstanding_ops, -1); + return NULL; } + OPAL_OUTPUT_VERBOSE((90, opal_btl_base_framework.framework_output, + "PtlMEAppend (mca_btl_portals4_register_mem) handle=%p, me_h=%d start=%p length=%ld rank=%x nid=%x pid=%x match_bits=%lx\n", + (void *)handle, handle->me_h, me.start, me.length, + me.match_id.rank, me.match_id.phys.nid, me.match_id.phys.pid, me.match_bits)); return handle; } @@ -556,11 +676,22 @@ int mca_btl_portals4_deregister_mem(mca_btl_base_module_t *btl_base, mca_btl_base_registration_handle_t *handle) { + int ret; struct mca_btl_portals4_module_t *portals4_btl = (struct mca_btl_portals4_module_t*) btl_base; OPAL_OUTPUT_VERBOSE((90, opal_btl_base_framework.framework_output, - "mca_btl_portals4_deregister_mem NI=%d handle=%p key=%ld\n", - portals4_btl->interface_num, (void *)handle, handle->key)); + "mca_btl_portals4_deregister_mem NI=%d handle=%p key=%ld me_h=%d\n", + portals4_btl->interface_num, (void *)handle, handle->key, handle->me_h)); + + if (!PtlHandleIsEqual(handle->me_h, PTL_INVALID_HANDLE)) { + ret = PtlMEUnlink(handle->me_h); + if (PTL_OK != ret) { + opal_output_verbose(1, opal_btl_base_framework.framework_output, + "%s:%d: PtlMEUnlink failed: %d\n",__FILE__, __LINE__, ret); + return OPAL_ERROR; + } + handle->me_h = PTL_INVALID_HANDLE; + } free(handle); diff --git a/opal/mca/btl/portals4/btl_portals4.h b/opal/mca/btl/portals4/btl_portals4.h index 9af415b5802..92c294b80a0 100644 --- a/opal/mca/btl/portals4/btl_portals4.h +++ b/opal/mca/btl/portals4/btl_portals4.h @@ -42,12 +42,15 @@ BEGIN_C_DECLS struct mca_btl_portals4_component_t { /* base BTL component */ mca_btl_base_component_2_0_0_t super; - + unsigned int num_btls; unsigned int max_btls; /* Maximum number of accepted Portals4 cards */ struct mca_btl_portals4_module_t** btls; /* array of available BTL modules */ + /* add_procs() can get called multiple times. this prevents multiple calls to portals4_init_interface(). */ + int need_init; + /* Use the logical to physical table to accelerate portals4 adressing: 1 (true) : 0 (false) */ int use_logical; @@ -76,6 +79,9 @@ struct mca_btl_portals4_component_t { /** Event queue handles table used in PtlEQPoll */ ptl_handle_eq_t *eqs_h; + + /** Upper limit for message sizes */ + unsigned long portals_max_msg_size; }; typedef struct mca_btl_portals4_component_t mca_btl_portals4_component_t; @@ -191,17 +197,17 @@ int mca_btl_portals4_del_procs(struct mca_btl_base_module_t* btl_base, struct opal_proc_t **procs, struct mca_btl_base_endpoint_t** peers); -mca_btl_base_descriptor_t* -mca_btl_portals4_alloc(struct mca_btl_base_module_t* btl_base, +mca_btl_base_descriptor_t* +mca_btl_portals4_alloc(struct mca_btl_base_module_t* btl_base, struct mca_btl_base_endpoint_t* endpoint, uint8_t order, size_t size, - uint32_t flags); + uint32_t flags); -int mca_btl_portals4_free(struct mca_btl_base_module_t* btl_base, - mca_btl_base_descriptor_t* des); +int mca_btl_portals4_free(struct mca_btl_base_module_t* btl_base, + mca_btl_base_descriptor_t* des); -mca_btl_base_descriptor_t* +mca_btl_base_descriptor_t* mca_btl_portals4_prepare_src(struct mca_btl_base_module_t* btl_base, struct mca_btl_base_endpoint_t* peer, struct opal_convertor_t* convertor, @@ -212,7 +218,7 @@ mca_btl_portals4_prepare_src(struct mca_btl_base_module_t* btl_base, int mca_btl_portals4_send(struct mca_btl_base_module_t* btl_base, struct mca_btl_base_endpoint_t* btl_peer, - struct mca_btl_base_descriptor_t* descriptor, + struct mca_btl_base_descriptor_t* descriptor, mca_btl_base_tag_t tag); @@ -224,7 +230,7 @@ int mca_btl_portals4_sendi(struct mca_btl_base_module_t* btl_base, size_t payload_size, uint8_t order, uint32_t flags, - mca_btl_base_tag_t tag, + mca_btl_base_tag_t tag, mca_btl_base_descriptor_t** des); int mca_btl_portals4_put(struct mca_btl_base_module_t* btl_base, @@ -252,6 +258,8 @@ struct mca_btl_base_registration_handle_t { ptl_match_bits_t key; /** Portals4 me_h */ ptl_handle_me_t me_h; + /** Remote offset */ + ptl_size_t remote_offset; }; /* diff --git a/opal/mca/btl/portals4/btl_portals4_component.c b/opal/mca/btl/portals4/btl_portals4_component.c index 33086c24e2e..eda9cd81f70 100644 --- a/opal/mca/btl/portals4/btl_portals4_component.c +++ b/opal/mca/btl/portals4/btl_portals4_component.c @@ -11,7 +11,7 @@ * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2010-2012 Sandia National Laboratories. All rights reserved. - * Copyright (c) 2014 Intel, Inc. All rights reserved. + * Copyright (c) 2014-2015 Intel, Inc. All rights reserved. * Copyright (c) 2014 Bull SAS. All rights reserved. * Copyright (c) 2015 Los Alamos National Security, LLC. All rights * reserved. @@ -61,7 +61,7 @@ mca_btl_portals4_component_t mca_btl_portals4_component = { /* The component is not checkpoint ready */ .param_field = MCA_BASE_METADATA_PARAM_NONE }, - + .btl_init = mca_btl_portals4_component_init, .btl_progress = mca_btl_portals4_component_progress, } @@ -201,6 +201,18 @@ mca_btl_portals4_component_register(void) OPAL_INFO_LVL_5, MCA_BASE_VAR_SCOPE_READONLY, &(mca_btl_portals4_component.portals_recv_mds_size)); + + mca_btl_portals4_component.portals_max_msg_size = PTL_SIZE_MAX; + (void) mca_base_component_var_register(&mca_btl_portals4_component.super.btl_version, + "max_msg_size", + "Max size supported by portals4 (above that, a message is cut into messages less than that size)", + MCA_BASE_VAR_TYPE_UNSIGNED_LONG, + NULL, + 0, + 0, + OPAL_INFO_LVL_5, + MCA_BASE_VAR_SCOPE_READONLY, + &(mca_btl_portals4_component.portals_max_msg_size)); return OPAL_SUCCESS; } @@ -216,16 +228,21 @@ mca_btl_portals4_component_open(void) mca_btl_portals4_module.super.btl_eager_limit = 32 * 1024; mca_btl_portals4_module.super.btl_rndv_eager_limit = 32 * 1024; mca_btl_portals4_module.super.btl_max_send_size = 64 * 1024; + if (mca_btl_portals4_module.super.btl_max_send_size > mca_btl_portals4_component.portals_max_msg_size) + mca_btl_portals4_module.super.btl_max_send_size = mca_btl_portals4_component.portals_max_msg_size; mca_btl_portals4_module.super.btl_rdma_pipeline_send_length = 64 * 1024; mca_btl_portals4_module.super.btl_rdma_pipeline_frag_size = INT_MAX; mca_btl_portals4_module.super.btl_min_rdma_pipeline_size = 0; mca_btl_portals4_module.super.btl_flags = MCA_BTL_FLAGS_RDMA | - MCA_BTL_FLAGS_RDMA_MATCHED; + MCA_BTL_FLAGS_RDMA_MATCHED | + MCA_BTL_FLAGS_SEND; mca_btl_portals4_module.super.btl_registration_handle_size = sizeof (mca_btl_base_registration_handle_t); mca_btl_portals4_module.super.btl_get_limit = SIZE_MAX; + if (mca_btl_portals4_module.super.btl_get_limit > mca_btl_portals4_component.portals_max_msg_size) + mca_btl_portals4_module.super.btl_get_limit = mca_btl_portals4_component.portals_max_msg_size; mca_btl_portals4_module.super.btl_put_limit = 0; /* not implemented */ mca_btl_portals4_module.super.btl_get_alignment = 0; mca_btl_portals4_module.super.btl_put_alignment = 0; @@ -251,6 +268,15 @@ mca_btl_portals4_component_open(void) mca_btl_portals4_module.portals_outstanding_ops = 0; mca_btl_portals4_module.recv_idx = (ptl_pt_index_t) ~0UL; + if (1 == mca_btl_portals4_component.use_logical) { + /* + * set the MCA_BTL_FLAGS_SINGLE_ADD_PROCS flag here in the default + * module, so it gets copied into the module for each Portals4 + * interface during init(). + */ + mca_btl_portals4_module.super.btl_flags |= MCA_BTL_FLAGS_SINGLE_ADD_PROCS; + } + return OPAL_SUCCESS; } @@ -283,6 +309,7 @@ static mca_btl_base_module_t** mca_btl_portals4_component_init(int *num_btls, mca_btl_base_module_t **btls = NULL; unsigned int ret, interface; ptl_handle_ni_t *portals4_nis_h = NULL; + ptl_ni_limits_t portals4_ni_limits ; ptl_process_t *ptl_process_ids = NULL; opal_output_verbose(50, opal_btl_base_framework.framework_output, "mca_btl_portals4_component_init\n"); @@ -315,14 +342,14 @@ static mca_btl_base_module_t** mca_btl_portals4_component_init(int *num_btls, PTL_NI_LOGICAL | PTL_NI_MATCHING, PTL_PID_ANY, /* let library assign our pid */ NULL, /* no desired limits */ - NULL, /* actual limits */ + &portals4_ni_limits, /* actual limits */ &portals4_nis_h[*num_btls] /* our interface handle */ ); else ret = PtlNIInit((1 == mca_btl_portals4_component.max_btls) ? PTL_IFACE_DEFAULT : interface, PTL_NI_PHYSICAL | PTL_NI_MATCHING, PTL_PID_ANY, /* let library assign our pid */ NULL, /* no desired limits */ - NULL, /* actual limits */ + &portals4_ni_limits, /* actual limits */ &portals4_nis_h[*num_btls] /* our interface handle */ ); if (PTL_OK != ret) { @@ -330,7 +357,15 @@ static mca_btl_base_module_t** mca_btl_portals4_component_init(int *num_btls, "%s:%d: PtlNIInit failed for NI %d: %d\n", __FILE__, __LINE__, interface, ret); } else { - OPAL_OUTPUT_VERBOSE((90, opal_btl_base_framework.framework_output, "PtlNIInit OK for NI %d\n", *num_btls)); + if (mca_btl_portals4_component.portals_max_msg_size > portals4_ni_limits.max_msg_size) + mca_btl_portals4_component.portals_max_msg_size = portals4_ni_limits.max_msg_size; + if (mca_btl_portals4_module.super.btl_max_send_size > portals4_ni_limits.max_msg_size) + mca_btl_portals4_module.super.btl_max_send_size = portals4_ni_limits.max_msg_size; + if (mca_btl_portals4_module.super.btl_get_limit > portals4_ni_limits.max_msg_size) + mca_btl_portals4_module.super.btl_get_limit = portals4_ni_limits.max_msg_size; + OPAL_OUTPUT_VERBOSE((90, opal_btl_base_framework.framework_output, "PtlNIInit OK for NI %d max_msg_size=%ld", + *num_btls, mca_btl_portals4_component.portals_max_msg_size)); + (*num_btls)++; } } @@ -422,7 +457,7 @@ static mca_btl_base_module_t** mca_btl_portals4_component_init(int *num_btls, ptl_process_ids[interface].rank, ptl_process_ids[interface].phys.nid, ptl_process_ids[interface].phys.pid)); } - OPAL_MODEX_SEND(ret, PMIX_SYNC_REQD, PMIX_GLOBAL, + OPAL_MODEX_SEND(ret, OPAL_PMIX_GLOBAL, &mca_btl_portals4_component.super.btl_version, ptl_process_ids, mca_btl_portals4_component.num_btls * sizeof(ptl_process_t)); if (OPAL_SUCCESS != ret) { @@ -435,12 +470,14 @@ static mca_btl_base_module_t** mca_btl_portals4_component_init(int *num_btls, ptl_process_ids = NULL; btls = malloc(mca_btl_portals4_component.num_btls * sizeof(mca_btl_portals4_module_t*) ); - memcpy(btls , mca_btl_portals4_component.btls, + memcpy(btls , mca_btl_portals4_component.btls, mca_btl_portals4_component.num_btls*sizeof(mca_btl_portals4_module_t*) ); opal_output_verbose(1, opal_btl_base_framework.framework_output, "The btl portals4 component has been initialized and uses %d NI(s)", mca_btl_portals4_component.num_btls); + mca_btl_portals4_component.need_init = 1; + return btls; error: @@ -680,20 +717,18 @@ mca_btl_portals4_component_progress(void) /* The distant PtlMEAppend is not finished (distant PTL_EVENT_LINK not received) */ /* Re-issue the PtlGet (see btl_portals4_rdma.c) */ - ret = PtlGet(frag->md_h, - 0, + ret = PtlGet(portals4_btl->send_md_h, + (ptl_size_t) frag->addr, frag->length, frag->peer_proc, portals4_btl->recv_idx, frag->match_bits, /* match bits */ - 0, + 0, // Warning : should be ev.remote_offset but it is not defined, frag); if (OPAL_UNLIKELY(PTL_OK != ret)) { opal_output_verbose(1, opal_btl_base_framework.framework_output, "%s:%d: Re-issued PtlGet failed: %d", __FILE__, __LINE__, ret); - PtlMDRelease(frag->md_h); - frag->md_h = PTL_INVALID_HANDLE; return OPAL_ERROR; } @@ -712,11 +747,9 @@ mca_btl_portals4_component_progress(void) frag->rdma_cb.context, frag->rdma_cb.data, OPAL_SUCCESS); - PtlMDRelease(frag->md_h); - frag->md_h = PTL_INVALID_HANDLE; OPAL_BTL_PORTALS4_FRAG_RETURN_USER(&portals4_btl->super, frag); - OPAL_THREAD_ADD32(&portals4_btl->portals_outstanding_ops, -1); + OPAL_THREAD_ADD32(&portals4_btl->portals_outstanding_ops, -1); OPAL_OUTPUT_VERBOSE((90, opal_btl_base_framework.framework_output, "PTL_EVENT_REPLY: Decrementing portals_outstanding_ops=%d\n", portals4_btl->portals_outstanding_ops)); goto done; diff --git a/opal/mca/btl/portals4/btl_portals4_frag.c b/opal/mca/btl/portals4/btl_portals4_frag.c index 5358553c4a1..6aa20a14af4 100644 --- a/opal/mca/btl/portals4/btl_portals4_frag.c +++ b/opal/mca/btl/portals4/btl_portals4_frag.c @@ -5,27 +5,27 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2008 UT-Battelle, LLC. All rights reserved. * Copyright (c) 2014 Bull SAS. All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ #include "opal_config.h" -#include "btl_portals4.h" -#include "btl_portals4_frag.h" +#include "btl_portals4.h" +#include "btl_portals4_frag.h" static void -mca_btl_portals4_frag_common_send_constructor(mca_btl_portals4_frag_t* frag) -{ +mca_btl_portals4_frag_common_send_constructor(mca_btl_portals4_frag_t* frag) +{ frag->base.des_flags = 0; frag->base.des_segments = &frag->segments[0].base; frag->base.des_segment_count = 2; @@ -38,10 +38,10 @@ mca_btl_portals4_frag_common_send_constructor(mca_btl_portals4_frag_t* frag) } static void -mca_btl_portals4_frag_eager_constructor(mca_btl_portals4_frag_t* frag) -{ - frag->size = mca_btl_portals4_module.super.btl_eager_limit; - mca_btl_portals4_frag_common_send_constructor(frag); +mca_btl_portals4_frag_eager_constructor(mca_btl_portals4_frag_t* frag) +{ + frag->size = mca_btl_portals4_module.super.btl_eager_limit; + mca_btl_portals4_frag_common_send_constructor(frag); frag->type = BTL_PORTALS4_FRAG_TYPE_EAGER; } @@ -55,41 +55,41 @@ mca_btl_portals4_frag_eager_destructor(mca_btl_portals4_frag_t* frag) } static void -mca_btl_portals4_frag_max_constructor(mca_btl_portals4_frag_t* frag) -{ - frag->size = mca_btl_portals4_module.super.btl_max_send_size; - mca_btl_portals4_frag_common_send_constructor(frag); +mca_btl_portals4_frag_max_constructor(mca_btl_portals4_frag_t* frag) +{ + frag->size = mca_btl_portals4_module.super.btl_max_send_size; + mca_btl_portals4_frag_common_send_constructor(frag); frag->type = BTL_PORTALS4_FRAG_TYPE_MAX; } static void -mca_btl_portals4_frag_user_constructor(mca_btl_portals4_frag_t* frag) -{ +mca_btl_portals4_frag_user_constructor(mca_btl_portals4_frag_t* frag) +{ frag->base.des_flags = 0; - frag->size = 0; + frag->size = 0; frag->type = BTL_PORTALS4_FRAG_TYPE_USER; } OBJ_CLASS_INSTANCE( - mca_btl_portals4_frag_t, - mca_btl_base_descriptor_t, - NULL, - NULL); + mca_btl_portals4_frag_t, + mca_btl_base_descriptor_t, + NULL, + NULL); OBJ_CLASS_INSTANCE( - mca_btl_portals4_frag_eager_t, - mca_btl_base_descriptor_t, - mca_btl_portals4_frag_eager_constructor, + mca_btl_portals4_frag_eager_t, + mca_btl_base_descriptor_t, + mca_btl_portals4_frag_eager_constructor, mca_btl_portals4_frag_eager_destructor); OBJ_CLASS_INSTANCE( - mca_btl_portals4_frag_max_t, - mca_btl_base_descriptor_t, - mca_btl_portals4_frag_max_constructor, - NULL); + mca_btl_portals4_frag_max_t, + mca_btl_base_descriptor_t, + mca_btl_portals4_frag_max_constructor, + NULL); OBJ_CLASS_INSTANCE( - mca_btl_portals4_frag_user_t, - mca_btl_base_descriptor_t, - mca_btl_portals4_frag_user_constructor, - NULL); + mca_btl_portals4_frag_user_t, + mca_btl_base_descriptor_t, + mca_btl_portals4_frag_user_constructor, + NULL); diff --git a/opal/mca/btl/portals4/btl_portals4_frag.h b/opal/mca/btl/portals4/btl_portals4_frag.h index 814a80a82ee..8f3c6e49ac3 100644 --- a/opal/mca/btl/portals4/btl_portals4_frag.h +++ b/opal/mca/btl/portals4/btl_portals4_frag.h @@ -6,7 +6,7 @@ * Copyright (c) 2004-2006 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. @@ -14,9 +14,9 @@ * All rights reserved. * Copyright (c) 2014 Bull SAS. All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -28,7 +28,7 @@ BEGIN_C_DECLS struct mca_btl_portals4_segment_t { - mca_btl_base_segment_t base; + mca_btl_base_segment_t base; ptl_match_bits_t key; }; typedef struct mca_btl_portals4_segment_t mca_btl_portals4_segment_t; @@ -37,19 +37,18 @@ typedef struct mca_btl_portals4_segment_t mca_btl_portals4_segment_t; * Portals send fragment derived type */ struct mca_btl_portals4_frag_t { - mca_btl_base_descriptor_t base; - mca_btl_portals4_segment_t segments[1]; + mca_btl_base_descriptor_t base; + mca_btl_portals4_segment_t segments[1]; /* needed for retransmit case */ - struct mca_btl_base_endpoint_t *endpoint; + struct mca_btl_base_endpoint_t *endpoint; /* needed for retransmit case */ mca_btl_base_header_t hdr; /* handle to use for communication */ ptl_handle_me_t me_h; - /* handle to use for communication */ - ptl_handle_md_t md_h; /* size of the allocated memory region -- not the amount of data we need to send */ - size_t size; + void *addr; + size_t size; /* match bits for retransmit case */ ptl_match_bits_t match_bits; /* length for retransmit case */ @@ -65,22 +64,22 @@ struct mca_btl_portals4_frag_t { mca_btl_base_registration_handle_t *local_handle; } rdma_cb; - enum { BTL_PORTALS4_FRAG_TYPE_EAGER, + enum { BTL_PORTALS4_FRAG_TYPE_EAGER, BTL_PORTALS4_FRAG_TYPE_MAX, BTL_PORTALS4_FRAG_TYPE_USER } type; unsigned char data[16]; }; -typedef struct mca_btl_portals4_frag_t mca_btl_portals4_frag_t; +typedef struct mca_btl_portals4_frag_t mca_btl_portals4_frag_t; OBJ_CLASS_DECLARATION(mca_btl_portals4_frag_t); -typedef struct mca_btl_portals4_frag_t mca_btl_portals4_frag_eager_t; -OBJ_CLASS_DECLARATION(mca_btl_portals4_frag_eager_t); +typedef struct mca_btl_portals4_frag_t mca_btl_portals4_frag_eager_t; +OBJ_CLASS_DECLARATION(mca_btl_portals4_frag_eager_t); -typedef struct mca_btl_portals4_frag_t mca_btl_portals4_frag_max_t; -OBJ_CLASS_DECLARATION(mca_btl_portals4_frag_max_t); +typedef struct mca_btl_portals4_frag_t mca_btl_portals4_frag_max_t; +OBJ_CLASS_DECLARATION(mca_btl_portals4_frag_max_t); -typedef struct mca_btl_portals4_frag_t mca_btl_portals4_frag_user_t; -OBJ_CLASS_DECLARATION(mca_btl_portals4_frag_user_t); +typedef struct mca_btl_portals4_frag_t mca_btl_portals4_frag_user_t; +OBJ_CLASS_DECLARATION(mca_btl_portals4_frag_user_t); /* * Macros to allocate/return descriptors from module specific diff --git a/opal/mca/btl/portals4/btl_portals4_rdma.c b/opal/mca/btl/portals4/btl_portals4_rdma.c index 76898a41e11..33fb9ab326e 100644 --- a/opal/mca/btl/portals4/btl_portals4_rdma.c +++ b/opal/mca/btl/portals4/btl_portals4_rdma.c @@ -5,16 +5,16 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2008 UT-Battelle, LLC. All rights reserved. * Copyright (c) 2014 Bull SAS. All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -50,7 +50,6 @@ mca_btl_portals4_get(struct mca_btl_base_module_t* btl_base, { mca_btl_portals4_module_t *portals4_btl = (mca_btl_portals4_module_t *) btl_base; mca_btl_portals4_frag_t *frag = NULL; - ptl_md_t md; int ret; /* reserve space in the event queue for rdma operations immediately */ @@ -78,49 +77,29 @@ mca_btl_portals4_get(struct mca_btl_base_module_t* btl_base, frag->endpoint = btl_peer; frag->hdr.tag = MCA_BTL_TAG_MAX; - /* Bind the memory */ - md.start = (void *)local_address; - md.length = size; - md.options = 0; - md.eq_handle = portals4_btl->recv_eq_h; - md.ct_handle = PTL_CT_NONE; - - ret = PtlMDBind(portals4_btl->portals_ni_h, - &md, - &frag->md_h); - - if (OPAL_UNLIKELY(PTL_OK != ret)) { - opal_output_verbose(1, opal_btl_base_framework.framework_output, - "%s:%d: PtlMDBind failed: %d", - __FILE__, __LINE__, ret); - return OPAL_ERROR; - } - frag->match_bits = remote_handle->key; - frag->length = md.length; + frag->addr = local_address; + frag->length = size; frag->peer_proc = btl_peer->ptl_proc; - OPAL_OUTPUT_VERBOSE((90, opal_btl_base_framework.framework_output, "PtlGet start=%p length=%ld nid=%x pid=%x match_bits=%lx\n", - md.start, md.length, btl_peer->ptl_proc.phys.nid, btl_peer->ptl_proc.phys.pid, frag->match_bits)); + OPAL_OUTPUT_VERBOSE((90, opal_btl_base_framework.framework_output, "PtlGet offset=%p length=%ld remote_offset=%p nid=%x pid=%x match_bits=%lx", + local_address, size, (void*)local_handle->remote_offset, btl_peer->ptl_proc.phys.nid, btl_peer->ptl_proc.phys.pid, frag->match_bits)); - ret = PtlGet(frag->md_h, - 0, - md.length, + ret = PtlGet(portals4_btl->send_md_h, + (ptl_size_t) local_address, + size, btl_peer->ptl_proc, portals4_btl->recv_idx, frag->match_bits, /* match bits */ - 0, + local_handle->remote_offset, frag); if (OPAL_UNLIKELY(PTL_OK != ret)) { opal_output_verbose(1, opal_btl_base_framework.framework_output, "%s:%d: PtlGet failed: %d", __FILE__, __LINE__, ret); - PtlMDRelease(frag->md_h); - frag->md_h = PTL_INVALID_HANDLE; return OPAL_ERROR; } - OPAL_OUTPUT_VERBOSE((90, opal_btl_base_framework.framework_output, "SUCCESS: PtlGet start=%p length=%ld nid=%x pid=%x match_bits=%lx\n", - md.start, md.length, btl_peer->ptl_proc.phys.nid, btl_peer->ptl_proc.phys.pid, frag->match_bits)); + local_handle->remote_offset += size; return OPAL_SUCCESS; } diff --git a/opal/mca/btl/portals4/btl_portals4_recv.c b/opal/mca/btl/portals4/btl_portals4_recv.c index f557975e3cc..c8dbfa3cc26 100644 --- a/opal/mca/btl/portals4/btl_portals4_recv.c +++ b/opal/mca/btl/portals4/btl_portals4_recv.c @@ -5,15 +5,15 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2014 Bull SAS. All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ diff --git a/opal/mca/btl/portals4/btl_portals4_recv.h b/opal/mca/btl/portals4/btl_portals4_recv.h index 5375fb75ff4..be7e98ca7cf 100644 --- a/opal/mca/btl/portals4/btl_portals4_recv.h +++ b/opal/mca/btl/portals4/btl_portals4_recv.h @@ -5,15 +5,15 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2014 Bull SAS. All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -21,19 +21,19 @@ #define OPAL_BTL_PORTALS4_RECV_H #include "btl_portals4_frag.h" - + struct mca_btl_portals4_recv_block_t { opal_list_item_t base; - + mca_btl_portals4_module_t *btl; - - void *start; + + void *start; size_t length; ptl_handle_me_t me_h; volatile bool full; volatile int32_t pending; -}; +}; typedef struct mca_btl_portals4_recv_block_t mca_btl_portals4_recv_block_t; OBJ_CLASS_DECLARATION(mca_btl_portals4_recv_block_t); @@ -85,7 +85,7 @@ mca_btl_portals4_activate_block(mca_btl_portals4_recv_block_t *block) me.options = PTL_ME_OP_PUT | PTL_ME_MANAGE_LOCAL | - PTL_ME_EVENT_LINK_DISABLE | + PTL_ME_EVENT_LINK_DISABLE | PTL_ME_MAY_ALIGN; if (mca_btl_portals4_component.use_logical) { diff --git a/opal/mca/btl/portals4/btl_portals4_send.c b/opal/mca/btl/portals4/btl_portals4_send.c index 0b50a20337e..1f50fb2ef58 100644 --- a/opal/mca/btl/portals4/btl_portals4_send.c +++ b/opal/mca/btl/portals4/btl_portals4_send.c @@ -5,19 +5,19 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2008 UT-Battelle, LLC. All rights reserved. * Copyright (c) 2012 Los Alamos National Security, LLC. All rights - * reserved. + * reserved. * Copyright (c) 2013 Sandia National Laboratories. All rights reserved. * Copyright (c) 2014 Bull SAS. All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -29,7 +29,7 @@ int mca_btl_portals4_send(struct mca_btl_base_module_t* btl_base, struct mca_btl_base_endpoint_t* endpoint, - struct mca_btl_base_descriptor_t* descriptor, + struct mca_btl_base_descriptor_t* descriptor, mca_btl_base_tag_t tag) { struct mca_btl_portals4_module_t* portals4_btl = (struct mca_btl_portals4_module_t*) btl_base; @@ -62,7 +62,7 @@ int mca_btl_portals4_send(struct mca_btl_base_module_t* btl_base, OPAL_OUTPUT_VERBOSE((50, opal_btl_base_framework.framework_output, "PtlPut frag=%p rank=%x pid=%x tag=%x len=%ld match_bits=%lx\n", - (void*)frag, endpoint->ptl_proc.rank, endpoint->ptl_proc.phys.pid, tag, + (void*)frag, endpoint->ptl_proc.rank, endpoint->ptl_proc.phys.pid, tag, put_length, (uint64_t)match_bits)); ret = PtlPut(portals4_btl->send_md_h, @@ -96,7 +96,7 @@ int mca_btl_portals4_sendi(struct mca_btl_base_module_t* btl_base, size_t payload_size, uint8_t order, uint32_t flags, - mca_btl_base_tag_t tag, + mca_btl_base_tag_t tag, mca_btl_base_descriptor_t** des) { opal_output(opal_btl_base_framework.framework_output, "mca_btl_portals_sendi is not implemented"); diff --git a/opal/mca/btl/portals4/configure.m4 b/opal/mca/btl/portals4/configure.m4 index 7ad3a3a0655..cc7c0dbda2c 100644 --- a/opal/mca/btl/portals4/configure.m4 +++ b/opal/mca/btl/portals4/configure.m4 @@ -6,7 +6,7 @@ # Copyright (c) 2004-2005 The University of Tennessee and The University # of Tennessee Research Foundation. All rights # reserved. -# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, # University of Stuttgart. All rights reserved. # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. @@ -14,13 +14,13 @@ # Copyright (c) 2010 Sandia National Laboratories. All rights reserved. # Copyright (c) 2014 Bull SAS. All rights reserved. # $COPYRIGHT$ -# +# # Additional copyrights may follow -# +# # $HEADER$ # -# MCA_btl_portals4_CONFIG(action-if-can-compile, +# MCA_btl_portals4_CONFIG(action-if-can-compile, # [action-if-cant-compile]) # ------------------------------------------------ AC_DEFUN([MCA_opal_btl_portals4_CONFIG],[ diff --git a/opal/mca/btl/scif/Makefile.am b/opal/mca/btl/scif/Makefile.am index 4a0ea7da322..da1c9f7f5a7 100644 --- a/opal/mca/btl/scif/Makefile.am +++ b/opal/mca/btl/scif/Makefile.am @@ -2,9 +2,9 @@ # # Copyright (c) 2011-2013 Los Alamos National Security, LLC. All rights # reserved. -# +# # Additional copyrights may follow -# +# # $HEADER$ # @@ -39,7 +39,7 @@ mcacomponentdir = $(opallibdir) mcacomponent_LTLIBRARIES = $(component_install) mca_btl_scif_la_SOURCES = $(scif_SOURCES) nodist_mca_btl_scif_la_SOURCES = $(scif_nodist_SOURCES) -mca_btl_scif_la_LIBADD = $(btl_scif_LIBS) +mca_btl_scif_la_LIBADD = $(btl_scif_LIBS) mca_btl_scif_la_LDFLAGS = -module -avoid-version $(btl_scif_LDFLAGS) noinst_LTLIBRARIES = $(component_noinst) diff --git a/opal/mca/btl/scif/btl_scif.h b/opal/mca/btl/scif/btl_scif.h index 496f508e31b..9b5917224cc 100644 --- a/opal/mca/btl/scif/btl_scif.h +++ b/opal/mca/btl/scif/btl_scif.h @@ -1,6 +1,6 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ /* - * Copyright (c) 2013-2015 Los Alamos National Security, LLC. All rights + * Copyright (c) 2013-2016 Los Alamos National Security, LLC. All rights * reserved. * Copyright (c) 2014 Research Organization for Information Science * and Technology (RIST). All rights reserved. @@ -16,9 +16,6 @@ #include "opal_config.h" -#include "opal/mca/mpool/mpool.h" -#include "opal/mca/mpool/base/base.h" -#include "opal/mca/mpool/grdma/mpool_grdma.h" #include "opal/util/output.h" #include "opal_stdint.h" #include "opal/util/proc.h" @@ -26,6 +23,8 @@ #include "opal/mca/btl/btl.h" #include "opal/mca/btl/base/base.h" #include "opal/mca/btl/base/btl_base_error.h" +#include "opal/mca/rcache/rcache.h" +#include "opal/mca/rcache/base/base.h" #include #include @@ -93,6 +92,8 @@ typedef struct mca_btl_scif_module_t { volatile bool exiting; bool listening; + + mca_rcache_base_module_t *rcache; } mca_btl_scif_module_t; typedef struct mca_btl_scif_component_t { @@ -133,7 +134,7 @@ typedef struct mca_btl_scif_component_t { int mca_btl_scif_module_init (void); /** - * BML->BTL notification of change in the process list. + * BML->BTL notification of change in the process list. * * location: btl_scif_add_procs.c * @@ -235,12 +236,12 @@ struct mca_btl_scif_registration_handle_t { typedef struct mca_btl_scif_registration_handle_t mca_btl_scif_registration_handle_t; typedef struct mca_btl_scif_reg_t { - mca_mpool_base_registration_t base; + mca_rcache_base_registration_t base; /** per-endpoint btl handles for this registration */ mca_btl_scif_registration_handle_t *handles; } mca_btl_scif_reg_t; -/* Global structures */ +/* Global structures */ OPAL_MODULE_DECLSPEC extern mca_btl_scif_component_t mca_btl_scif_component; OPAL_MODULE_DECLSPEC extern mca_btl_scif_module_t mca_btl_scif_module; diff --git a/opal/mca/btl/scif/btl_scif_add_procs.c b/opal/mca/btl/scif/btl_scif_add_procs.c index 49099f5bdc6..b29d694fc65 100644 --- a/opal/mca/btl/scif/btl_scif_add_procs.c +++ b/opal/mca/btl/scif/btl_scif_add_procs.c @@ -1,6 +1,6 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ /* - * Copyright (c) 2013-2015 Los Alamos National Security, LLC. All rights + * Copyright (c) 2013-2016 Los Alamos National Security, LLC. All rights * reserved. * Copyright (c) 2014 Research Organization for Information Science * and Technology (RIST). All rights reserved. @@ -17,8 +17,7 @@ #include "btl_scif.h" #include "btl_scif_frag.h" -static int -mca_btl_scif_setup_mpools (mca_btl_scif_module_t *scif_module); +static int mca_btl_scif_setup_rcache (mca_btl_scif_module_t *scif_module); static void *mca_btl_scif_connect_accept (void *arg); int mca_btl_scif_add_procs(struct mca_btl_base_module_t* btl, @@ -48,16 +47,16 @@ int mca_btl_scif_add_procs(struct mca_btl_base_module_t* btl, procs_on_board++; } - /* allocate space for the detected peers and setup the mpool */ + /* allocate space for the detected peers and setup the rcache */ if (NULL == scif_module->endpoints) { scif_module->endpoints = calloc (procs_on_board, sizeof (mca_btl_base_endpoint_t)); if (OPAL_UNLIKELY(NULL == scif_module->endpoints)) { return OPAL_ERR_OUT_OF_RESOURCE; } - rc = mca_btl_scif_setup_mpools (scif_module); + rc = mca_btl_scif_setup_rcache (scif_module); if (OPAL_UNLIKELY(OPAL_SUCCESS != rc)) { - BTL_ERROR(("btl/scif error setting up mpools/free lists")); + BTL_ERROR(("btl/scif error setting up rcache or free lists")); return rc; } } @@ -157,7 +156,7 @@ int mca_btl_scif_del_procs (struct mca_btl_base_module_t *btl, return OPAL_SUCCESS; } -static int scif_dereg_mem (void *reg_data, mca_mpool_base_registration_t *reg) +static int scif_dereg_mem (void *reg_data, mca_rcache_base_registration_t *reg) { mca_btl_scif_reg_t *scif_reg = (mca_btl_scif_reg_t *)reg; size_t size = (size_t)((uintptr_t) reg->bound - (uintptr_t) reg->base); @@ -178,7 +177,7 @@ static int scif_dereg_mem (void *reg_data, mca_mpool_base_registration_t *reg) } static int scif_reg_mem (void *reg_data, void *base, size_t size, - mca_mpool_base_registration_t *reg) + mca_rcache_base_registration_t *reg) { mca_btl_scif_reg_t *scif_reg = (mca_btl_scif_reg_t *)reg; int rc = OPAL_SUCCESS; @@ -186,7 +185,7 @@ static int scif_reg_mem (void *reg_data, void *base, size_t size, scif_reg->handles = calloc (mca_btl_scif_module.endpoint_count, sizeof (scif_reg->handles[0])); - /* intialize all scif offsets to -1 and initialize the pointer back to the mpool registration */ + /* intialize all scif offsets to -1 and initialize the pointer back to the rcache registration */ for (i = 0 ; i < mca_btl_scif_module.endpoint_count ; ++i) { scif_reg->handles[i].btl_handle.scif_offset = -1; scif_reg->handles[i].btl_handle.scif_base = (intptr_t) base; @@ -211,22 +210,20 @@ static int scif_reg_mem (void *reg_data, void *base, size_t size, return rc; } -static int -mca_btl_scif_setup_mpools (mca_btl_scif_module_t *scif_module) +static int mca_btl_scif_setup_rcache (mca_btl_scif_module_t *scif_module) { - struct mca_mpool_base_resources_t mpool_resources; + mca_rcache_base_resources_t rcache_resources; int rc; - /* initialize the grdma mpool */ - mpool_resources.pool_name = "scif"; - mpool_resources.reg_data = (void *) scif_module; - mpool_resources.sizeof_reg = sizeof (mca_btl_scif_reg_t); - mpool_resources.register_mem = scif_reg_mem; - mpool_resources.deregister_mem = scif_dereg_mem; - scif_module->super.btl_mpool = - mca_mpool_base_module_create("grdma", scif_module, &mpool_resources); - if (NULL == scif_module->super.btl_mpool) { - BTL_ERROR(("error creating grdma mpool")); + /* initialize the grdma rcache */ + rcache_resources.cache_name = "scif"; + rcache_resources.reg_data = (void *) scif_module; + rcache_resources.sizeof_reg = sizeof (mca_btl_scif_reg_t); + rcache_resources.register_mem = scif_reg_mem; + rcache_resources.deregister_mem = scif_dereg_mem; + scif_module->rcache = mca_rcache_base_module_create ("grdma", scif_module, &rcache_resources); + if (NULL == scif_module->rcache) { + BTL_ERROR(("error creating grdma rcache")); return OPAL_ERROR; } diff --git a/opal/mca/btl/scif/btl_scif_component.c b/opal/mca/btl/scif/btl_scif_component.c index 36db8804451..42ef9b6473f 100644 --- a/opal/mca/btl/scif/btl_scif_component.c +++ b/opal/mca/btl/scif/btl_scif_component.c @@ -4,7 +4,7 @@ * reserved. * Copyright (c) 2014 Research Organization for Information Science * and Technology (RIST). All rights reserved. - * Copyright (c) 2014 Intel, Inc. All rights reserved. + * Copyright (c) 2014-2015 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -158,7 +158,7 @@ static int btl_scif_component_register(void) NULL, NULL, NULL, &mca_btl_scif_component.put_count); #endif - mca_btl_scif_module.super.btl_exclusivity = MCA_BTL_EXCLUSIVITY_HIGH; + mca_btl_scif_module.super.btl_exclusivity = MCA_BTL_EXCLUSIVITY_HIGH - 1; mca_btl_scif_module.super.btl_eager_limit = 1 * 1024; mca_btl_scif_module.super.btl_rndv_eager_limit = 1 * 1024; mca_btl_scif_module.super.btl_rdma_pipeline_frag_size = 4 * 1024 * 1024; @@ -216,7 +216,7 @@ static int mca_btl_scif_modex_send (void) memset(&modex, 0, sizeof(mca_btl_scif_modex_t)); modex.port_id = mca_btl_scif_module.port_id; - OPAL_MODEX_SEND(rc, PMIX_SYNC_REQD, PMIX_LOCAL, + OPAL_MODEX_SEND(rc, OPAL_PMIX_LOCAL, &mca_btl_scif_component.super.btl_version, &modex, sizeof (modex)); return rc; diff --git a/opal/mca/btl/scif/btl_scif_endpoint.h b/opal/mca/btl/scif/btl_scif_endpoint.h index 39b6782a478..c04ea35405b 100644 --- a/opal/mca/btl/scif/btl_scif_endpoint.h +++ b/opal/mca/btl/scif/btl_scif_endpoint.h @@ -10,7 +10,7 @@ * $HEADER$ */ -#ifndef MCA_BTL_SCIF_ENDPOINT_H +#ifndef MCA_BTL_SCIF_ENDPOINT_H #define MCA_BTL_SCIF_ENDPOINT_H #include "btl_scif.h" @@ -81,7 +81,7 @@ static inline int mca_btl_scif_ep_init (mca_btl_scif_endpoint_t *endpoint, endpoint->state = MCA_BTL_SCIF_EP_STATE_INIT; OPAL_MODEX_RECV(rc, &mca_btl_scif_component.super.btl_version, - peer_proc, (void **) &modex, &msg_size); + &peer_proc->proc_name, (void **) &modex, &msg_size); if (OPAL_SUCCESS != rc) { return rc; } diff --git a/opal/mca/btl/scif/btl_scif_frag.h b/opal/mca/btl/scif/btl_scif_frag.h index aea6005457c..d17ea2a5cec 100644 --- a/opal/mca/btl/scif/btl_scif_frag.h +++ b/opal/mca/btl/scif/btl_scif_frag.h @@ -60,8 +60,8 @@ static inline int mca_btl_scif_frag_alloc (mca_btl_base_endpoint_t *ep, static inline int mca_btl_scif_frag_return (mca_btl_scif_base_frag_t *frag) { if (frag->registration) { - frag->endpoint->btl->super.btl_mpool->mpool_deregister(frag->endpoint->btl->super.btl_mpool, - &frag->registration->base); + frag->endpoint->btl->rcache->rcache_deregister (frag->endpoint->btl->rcache, + &frag->registration->base); frag->registration = NULL; } diff --git a/opal/mca/btl/scif/btl_scif_get.c b/opal/mca/btl/scif/btl_scif_get.c index 131352b3276..3b68dfe8c95 100644 --- a/opal/mca/btl/scif/btl_scif_get.c +++ b/opal/mca/btl/scif/btl_scif_get.c @@ -41,7 +41,7 @@ int mca_btl_scif_get (mca_btl_base_module_t *btl, struct mca_btl_base_endpoint_t roffset = remote_handle->scif_offset + (off_t)(remote_address - remote_handle->scif_base); loffset = local_handle->scif_offset + (off_t)((intptr_t)local_address - local_handle->scif_base); - + if (mca_btl_scif_component.rma_use_cpu) { scif_flags = SCIF_RMA_USECPU; } diff --git a/opal/mca/btl/scif/btl_scif_module.c b/opal/mca/btl/scif/btl_scif_module.c index 67e57dd2c91..e5d3f09da8a 100644 --- a/opal/mca/btl/scif/btl_scif_module.c +++ b/opal/mca/btl/scif/btl_scif_module.c @@ -1,6 +1,6 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ /* - * Copyright (c) 2013-2015 Los Alamos National Security, LLC. All rights + * Copyright (c) 2013-2016 Los Alamos National Security, LLC. All rights * reserved. * Copyright (c) 2014 Research Organization for Information Science * and Technology (RIST). All rights reserved. @@ -123,6 +123,11 @@ mca_btl_scif_module_finalize (struct mca_btl_base_module_t *btl) scif_module->endpoints = NULL; } + if (NULL != scif_module->rcache) { + mca_rcache_base_module_destroy (scif_module->rcache); + scif_module->rcache = NULL; + } + /* close the listening endpoint */ if (mca_btl_scif_module.listening && -1 != mca_btl_scif_module.scif_fd) { /* wake up the scif thread */ @@ -180,7 +185,9 @@ static mca_btl_base_registration_handle_t *mca_btl_scif_register_mem (struct mca mca_btl_base_endpoint_t *endpoint, void *base, size_t size, uint32_t flags) { + mca_btl_scif_module_t *scif_module = &mca_btl_scif_module; mca_btl_scif_reg_t *scif_reg; + int access_flags = flags & MCA_BTL_REG_FLAG_ACCESS_ANY; int rc; if (MCA_BTL_ENDPOINT_ANY == endpoint) { @@ -199,8 +206,8 @@ static mca_btl_base_registration_handle_t *mca_btl_scif_register_mem (struct mca } } - rc = btl->btl_mpool->mpool_register(btl->btl_mpool, base, size, 0, - (mca_mpool_base_registration_t **) &scif_reg); + rc = scif_module->rcache->rcache_register (scif_module->rcache, base, size, 0, access_flags, + (mca_rcache_base_registration_t **) &scif_reg); if (OPAL_UNLIKELY(OPAL_SUCCESS != rc)) { return NULL; } @@ -209,7 +216,7 @@ static mca_btl_base_registration_handle_t *mca_btl_scif_register_mem (struct mca if ((off_t) -1 == scif_reg->handles[endpoint->id].btl_handle.scif_offset) { size_t seg_size = (size_t)((uintptr_t) scif_reg->base.bound - (uintptr_t) scif_reg->base.base) + 1; - /* NTH: until we determine a way to pass permissions to the mpool just make all segments + /* NTH: until we determine a way to pass permissions to the rcache just make all segments * read/write */ scif_reg->handles[endpoint->id].btl_handle.scif_offset = scif_register (endpoint->scif_epd, scif_reg->base.base, seg_size, 0, SCIF_PROT_READ | @@ -224,9 +231,10 @@ static mca_btl_base_registration_handle_t *mca_btl_scif_register_mem (struct mca static int mca_btl_scif_deregister_mem (struct mca_btl_base_module_t *btl, mca_btl_base_registration_handle_t *handle) { mca_btl_scif_registration_handle_t *scif_handle = (mca_btl_scif_registration_handle_t *) handle; + mca_btl_scif_module_t *scif_module = &mca_btl_scif_module; mca_btl_scif_reg_t *scif_reg = scif_handle->reg; - btl->btl_mpool->mpool_deregister (btl->btl_mpool, &scif_reg->base); + scif_module->rcache->rcache_deregister (scif_module->rcache, &scif_reg->base); return OPAL_SUCCESS; } diff --git a/opal/mca/btl/scif/btl_scif_send.c b/opal/mca/btl/scif/btl_scif_send.c index e7109b59ed3..008e23b439c 100644 --- a/opal/mca/btl/scif/btl_scif_send.c +++ b/opal/mca/btl/scif/btl_scif_send.c @@ -107,7 +107,7 @@ static void mark_buffer (struct mca_btl_base_endpoint_t *endpoint) endpoint->send_buffer.start = endpoint->send_buffer.startp[0]; } else { MB(); - endpoint->send_buffer.endp[0] = endpoint->send_buffer.end; + endpoint->send_buffer.endp[0] = endpoint->send_buffer.end; } } diff --git a/opal/mca/btl/scif/configure.m4 b/opal/mca/btl/scif/configure.m4 index 33b292197a8..f8b814e2e70 100644 --- a/opal/mca/btl/scif/configure.m4 +++ b/opal/mca/btl/scif/configure.m4 @@ -2,6 +2,9 @@ # # Copyright (c) 2013-2014 Los Alamos National Security, LLC. All rights # reserved. +# Copyright (c) 2015 Research Organization for Information Science +# and Technology (RIST). All rights reserved. +# Copyright (c) 2016 Cisco Systems, Inc. All rights reserved. # $COPYRIGHT$ # # Additional copyrights may follow @@ -20,20 +23,22 @@ AC_DEFUN([MCA_opal_btl_scif_CONFIG],[ opal_btl_scif_happy="no" if test "$with_scif" != "no" ; then - if test -n "$with_scif" -a "$with_scif" != "yes" ; then + if test -n "$with_scif" && test "$with_scif" != "yes" ; then opal_check_scif_dir=$with_scif fi OPAL_CHECK_PACKAGE([btl_scif], [scif.h], [scif], [scif_open], [], [$opal_check_scif_dir], [], [opal_btl_scif_happy="yes"], []) - if test "$opal_btl_scif_happy" != "yes" -a -n "$with_scif" ; then + if test "$opal_btl_scif_happy" != "yes" && test -n "$with_scif" ; then AC_MSG_ERROR([SCIF support requested but not found. Aborting]) fi fi AS_IF([test "$opal_btl_scif_happy" = "yes"], [$1], [$2]) + OPAL_SUMMARY_ADD([[Transports]],[[Intel SCIF]],[[btl_scif]],[$opal_btl_scif_happy]) + # substitute in the things needed to build scif AC_SUBST([btl_scif_CPPFLAGS]) AC_SUBST([btl_scif_LDFLAGS]) diff --git a/opal/mca/btl/scif/owner.txt b/opal/mca/btl/scif/owner.txt index 98709a8170b..30615e90eb7 100644 --- a/opal/mca/btl/scif/owner.txt +++ b/opal/mca/btl/scif/owner.txt @@ -1,5 +1,5 @@ # -# owner/status file +# owner/status file # owner: institution that is responsible for this package # status: e.g. active, maintenance, unmaintained # diff --git a/opal/mca/btl/self/Makefile.am b/opal/mca/btl/self/Makefile.am index 4cecee7c6cf..e35fb91d803 100644 --- a/opal/mca/btl/self/Makefile.am +++ b/opal/mca/btl/self/Makefile.am @@ -5,15 +5,15 @@ # Copyright (c) 2004-2005 The University of Tennessee and The University # of Tennessee Research Foundation. All rights # reserved. -# Copyright (c) 2004-2009 High Performance Computing Center Stuttgart, +# Copyright (c) 2004-2009 High Performance Computing Center Stuttgart, # University of Stuttgart. All rights reserved. # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. # Copyright (c) 2010 Cisco Systems, Inc. All rights reserved. # $COPYRIGHT$ -# +# # Additional copyrights may follow -# +# # $HEADER$ # @@ -22,7 +22,7 @@ libmca_btl_self_la_sources = \ btl_self.h \ btl_self_component.c \ btl_self_frag.c \ - btl_self_frag.h + btl_self_frag.h # Make the output library in this directory, and name it either # mca__.la (for DSO builds) or libmca__.la diff --git a/opal/mca/btl/self/btl_self.c b/opal/mca/btl/self/btl_self.c index 26f2a88f8ed..5296ef1f9e5 100644 --- a/opal/mca/btl/self/btl_self.c +++ b/opal/mca/btl/self/btl_self.c @@ -6,17 +6,19 @@ * Copyright (c) 2004-2013 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2012-2013 Inria. All rights reserved. - * Copyright (c) 2014 Los Alamos National Security, LLC. All rights + * Copyright (c) 2014-2016 Los Alamos National Security, LLC. All rights * reserved. + * Copyright (c) 2016 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -24,68 +26,55 @@ #include #include -#include -#include -#include -#include #include "opal/class/opal_bitmap.h" #include "opal/datatype/opal_convertor.h" -#include "opal/sys/atomic.h" -#include "opal/mca/btl/btl.h" -#include "opal/mca/mpool/base/base.h" #include "btl_self.h" #include "btl_self_frag.h" #include "opal/util/proc.h" -static int mca_btl_self_put (mca_btl_base_module_t *btl, struct mca_btl_base_endpoint_t *endpoint, void *local_address, - uint64_t remote_address, mca_btl_base_registration_handle_t *local_handle, - mca_btl_base_registration_handle_t *remote_handle, size_t size, int flags, - int order, mca_btl_base_rdma_completion_fn_t cbfunc, void *cbcontext, void *cbdata); - -static int mca_btl_self_get (mca_btl_base_module_t *btl, struct mca_btl_base_endpoint_t *endpoint, void *local_address, - uint64_t remote_address, mca_btl_base_registration_handle_t *local_handle, - mca_btl_base_registration_handle_t *remote_handle, size_t size, int flags, - int order, mca_btl_base_rdma_completion_fn_t cbfunc, void *cbcontext, void *cbdata); - -mca_btl_base_module_t mca_btl_self = { - .btl_component = &mca_btl_self_component.super, - .btl_add_procs = mca_btl_self_add_procs, - .btl_del_procs = mca_btl_self_del_procs, - .btl_finalize = mca_btl_self_finalize, - .btl_alloc = mca_btl_self_alloc, - .btl_free = mca_btl_self_free, - .btl_prepare_src = mca_btl_self_prepare_src, - .btl_send = mca_btl_self_send, - .btl_put = mca_btl_self_put, - .btl_get = mca_btl_self_get, - .btl_dump = mca_btl_base_dump, - .btl_ft_event = mca_btl_self_ft_event, -}; - - -int mca_btl_self_add_procs( struct mca_btl_base_module_t* btl, - size_t nprocs, - struct opal_proc_t **procs, - struct mca_btl_base_endpoint_t **peers, - opal_bitmap_t* reachability ) +/** + * PML->BTL notification of change in the process list. + * PML->BTL Notification that a receive fragment has been matched. + * Called for message that is send from process with the virtual + * address of the shared memory segment being different than that of + * the receiver. + * + * @param btl (IN) + * @param proc (IN) + * @param peer (OUT) + * @return OPAL_SUCCESS or error status on failure. + * + */ +static int mca_btl_self_add_procs (struct mca_btl_base_module_t *btl, size_t nprocs, + struct opal_proc_t **procs, + struct mca_btl_base_endpoint_t **peers, + opal_bitmap_t* reachability) { - int i; - - for( i = 0; i < (int)nprocs; i++ ) { + for (int i = 0; i < (int)nprocs; i++ ) { if( 0 == opal_compare_proc(procs[i]->proc_name, OPAL_PROC_MY_NAME) ) { opal_bitmap_set_bit( reachability, i ); + /* need to return something to keep the bml from ignoring us */ + peers[i] = (struct mca_btl_base_endpoint_t *) 1; break; /* there will always be only one ... */ } } + return OPAL_SUCCESS; } - -int mca_btl_self_del_procs( struct mca_btl_base_module_t* btl, - size_t nprocs, - struct opal_proc_t **procs, - struct mca_btl_base_endpoint_t **peers ) +/** + * PML->BTL notification of change in the process list. + * + * @param btl (IN) BTL instance + * @param proc (IN) Peer process + * @param peer (IN) Peer addressing information. + * @return Status indicating if cleanup was successful + * + */ +static int mca_btl_self_del_procs (struct mca_btl_base_module_t *btl, size_t nprocs, + struct opal_proc_t **procs, + struct mca_btl_base_endpoint_t **peers) { return OPAL_SUCCESS; } @@ -104,7 +93,7 @@ int mca_btl_self_del_procs( struct mca_btl_base_module_t* btl, * */ -int mca_btl_self_finalize(struct mca_btl_base_module_t* btl) +static int mca_btl_self_finalize(struct mca_btl_base_module_t* btl) { return OPAL_SUCCESS; } @@ -116,125 +105,93 @@ int mca_btl_self_finalize(struct mca_btl_base_module_t* btl) * @param btl (IN) BTL module * @param size (IN) Request segment size. */ -mca_btl_base_descriptor_t* mca_btl_self_alloc( - struct mca_btl_base_module_t* btl, - struct mca_btl_base_endpoint_t* endpoint, - uint8_t order, - size_t size, - uint32_t flags) +static mca_btl_base_descriptor_t *mca_btl_self_alloc (struct mca_btl_base_module_t *btl, + struct mca_btl_base_endpoint_t *endpoint, + uint8_t order, size_t size, uint32_t flags) { - mca_btl_self_frag_t* frag = NULL; + mca_btl_self_frag_t *frag = NULL; - if(size <= mca_btl_self.btl_eager_limit) { + if (size <= MCA_BTL_SELF_MAX_INLINE_SIZE) { + MCA_BTL_SELF_FRAG_ALLOC_RDMA(frag); + } else if (size <= mca_btl_self.btl_eager_limit) { MCA_BTL_SELF_FRAG_ALLOC_EAGER(frag); } else if (size <= btl->btl_max_send_size) { MCA_BTL_SELF_FRAG_ALLOC_SEND(frag); } + if( OPAL_UNLIKELY(NULL == frag) ) { - return NULL; + return NULL; } - - frag->segment.seg_len = size; - frag->base.des_flags = flags; - frag->base.des_segments = &(frag->segment); + + frag->segments[0].seg_len = size; frag->base.des_segment_count = 1; - return (mca_btl_base_descriptor_t*)frag; + frag->base.des_flags = flags; + + return &frag->base; } - + /** * Return a segment allocated by this BTL. * * @param btl (IN) BTL module * @param segment (IN) Allocated segment. */ -int mca_btl_self_free( struct mca_btl_base_module_t* btl, - mca_btl_base_descriptor_t* des ) +static int mca_btl_self_free (struct mca_btl_base_module_t *btl, mca_btl_base_descriptor_t *des) { - mca_btl_self_frag_t* frag = (mca_btl_self_frag_t*)des; - - frag->base.des_segments = NULL; - frag->base.des_segment_count = 0; + MCA_BTL_SELF_FRAG_RETURN((mca_btl_self_frag_t *) des); - if(frag->size == mca_btl_self.btl_eager_limit) { - MCA_BTL_SELF_FRAG_RETURN_EAGER(frag); - } else if (frag->size == mca_btl_self.btl_max_send_size) { - MCA_BTL_SELF_FRAG_RETURN_SEND(frag); - } else { - MCA_BTL_SELF_FRAG_RETURN_RDMA(frag); - } return OPAL_SUCCESS; } /** - * Prepare data for send/put + * Prepare data for send * * @param btl (IN) BTL module */ -struct mca_btl_base_descriptor_t* -mca_btl_self_prepare_src( struct mca_btl_base_module_t* btl, - struct mca_btl_base_endpoint_t* endpoint, - struct opal_convertor_t* convertor, - uint8_t order, - size_t reserve, - size_t* size, - uint32_t flags ) +static struct mca_btl_base_descriptor_t *mca_btl_self_prepare_src (struct mca_btl_base_module_t* btl, + struct mca_btl_base_endpoint_t *endpoint, + struct opal_convertor_t *convertor, + uint8_t order, size_t reserve, + size_t *size, uint32_t flags) { - mca_btl_self_frag_t* frag; - struct iovec iov; - uint32_t iov_count = 1; - size_t max_data = *size; - int rc; - - /* non-contigous data */ - if( opal_convertor_need_buffers(convertor) || - max_data < mca_btl_self.btl_max_send_size || - reserve != 0 ) { + bool inline_send = !opal_convertor_need_buffers(convertor); + size_t buffer_len = reserve + (inline_send ? 0 : *size); + mca_btl_self_frag_t *frag; - MCA_BTL_SELF_FRAG_ALLOC_SEND(frag); - if(OPAL_UNLIKELY(NULL == frag)) { - return NULL; - } + frag = (mca_btl_self_frag_t *) mca_btl_self_alloc (btl, endpoint, order, buffer_len, flags); + if (OPAL_UNLIKELY(NULL == frag)) { + return NULL; + } - if(reserve + max_data > frag->size) { - max_data = frag->size - reserve; - } - iov.iov_len = max_data; - iov.iov_base = (IOVBASE_TYPE*)((unsigned char*)(frag+1) + reserve); + /* non-contigous data */ + if (OPAL_UNLIKELY(!inline_send)) { + struct iovec iov = {.iov_len = *size, .iov_base = (IOVBASE_TYPE *) ((uintptr_t) frag->data + reserve)}; + size_t max_data = *size; + uint32_t iov_count = 1; + int rc; - rc = opal_convertor_pack(convertor, &iov, &iov_count, &max_data ); + rc = opal_convertor_pack (convertor, &iov, &iov_count, &max_data); if(rc < 0) { - MCA_BTL_SELF_FRAG_RETURN_SEND(frag); + mca_btl_self_free (btl, &frag->base); return NULL; } - frag->segment.seg_addr.pval = frag+1; - frag->segment.seg_len = reserve + max_data; + *size = max_data; + frag->segments[0].seg_len = reserve + max_data; } else { - MCA_BTL_SELF_FRAG_ALLOC_RDMA(frag); - if(OPAL_UNLIKELY(NULL == frag)) { - return NULL; - } - iov.iov_len = max_data; - iov.iov_base = NULL; + void *data_ptr; - /* convertor should return offset into users buffer */ - rc = opal_convertor_pack(convertor, &iov, &iov_count, &max_data ); - if(rc < 0) { - MCA_BTL_SELF_FRAG_RETURN_RDMA(frag); - return NULL; - } - frag->segment.seg_addr.lval = (uint64_t)(uintptr_t) iov.iov_base; - frag->segment.seg_len = max_data; - *size = max_data; + opal_convertor_get_current_pointer (convertor, &data_ptr); + + frag->segments[1].seg_addr.pval = data_ptr; + frag->segments[1].seg_len = *size; + frag->base.des_segment_count = 2; } - frag->base.des_flags = flags; - frag->base.des_segments = &frag->segment; - frag->base.des_segment_count = 1; return &frag->base; } - + /** * Initiate a send to the peer. * @@ -242,10 +199,10 @@ mca_btl_self_prepare_src( struct mca_btl_base_module_t* btl, * @param peer (IN) BTL peer addressing */ -int mca_btl_self_send( struct mca_btl_base_module_t* btl, - struct mca_btl_base_endpoint_t* endpoint, - struct mca_btl_base_descriptor_t* des, - mca_btl_base_tag_t tag ) +static int mca_btl_self_send (struct mca_btl_base_module_t *btl, + struct mca_btl_base_endpoint_t *endpoint, + struct mca_btl_base_descriptor_t *des, + mca_btl_base_tag_t tag) { mca_btl_active_message_callback_t* reg; int btl_ownership = (des->des_flags & MCA_BTL_DES_FLAGS_BTL_OWNERSHIP); @@ -264,6 +221,39 @@ int mca_btl_self_send( struct mca_btl_base_module_t* btl, return 1; } +static int mca_btl_self_sendi (struct mca_btl_base_module_t *btl, struct mca_btl_base_endpoint_t *endpoint, + struct opal_convertor_t *convertor, void *header, size_t header_size, + size_t payload_size, uint8_t order, uint32_t flags, mca_btl_base_tag_t tag, + mca_btl_base_descriptor_t **descriptor) +{ + mca_btl_base_descriptor_t *frag; + + if (!payload_size || !opal_convertor_need_buffers(convertor)) { + void *data_ptr = NULL; + if (payload_size) { + opal_convertor_get_current_pointer (convertor, &data_ptr); + } + + mca_btl_base_segment_t segments[2] = {{.seg_addr.pval = header, .seg_len = header_size}, + {.seg_addr.pval = data_ptr, .seg_len = payload_size}}; + mca_btl_base_descriptor_t des = {.des_segments = segments, .des_segment_count = payload_size ? 2 : 1, + .des_flags = 0}; + + (void) mca_btl_self_send (btl, endpoint, &des, tag); + return OPAL_SUCCESS; + } + + frag = mca_btl_self_prepare_src (btl, endpoint, convertor, order, header_size, &payload_size, + flags | MCA_BTL_DES_FLAGS_BTL_OWNERSHIP); + if (NULL == frag) { + *descriptor = NULL; + return OPAL_ERR_OUT_OF_RESOURCE; + } + + memcpy (frag->des_segments[0].seg_addr.pval, header, header_size); + (void) mca_btl_self_send (btl, endpoint, frag, tag); + return OPAL_SUCCESS; +} static int mca_btl_self_put (mca_btl_base_module_t *btl, struct mca_btl_base_endpoint_t *endpoint, void *local_address, uint64_t remote_address, mca_btl_base_registration_handle_t *local_handle, @@ -289,22 +279,18 @@ static int mca_btl_self_get (mca_btl_base_module_t *btl, struct mca_btl_base_end return OPAL_SUCCESS; } -int mca_btl_self_ft_event(int state) { - if(OPAL_CRS_CHECKPOINT == state) { - ; - } - else if(OPAL_CRS_CONTINUE == state) { - ; - } - else if(OPAL_CRS_RESTART == state) { - ; - } - else if(OPAL_CRS_TERM == state ) { - ; - } - else { - ; - } - - return OPAL_SUCCESS; -} +/* btl self module */ +mca_btl_base_module_t mca_btl_self = { + .btl_component = &mca_btl_self_component.super, + .btl_add_procs = mca_btl_self_add_procs, + .btl_del_procs = mca_btl_self_del_procs, + .btl_finalize = mca_btl_self_finalize, + .btl_alloc = mca_btl_self_alloc, + .btl_free = mca_btl_self_free, + .btl_prepare_src = mca_btl_self_prepare_src, + .btl_send = mca_btl_self_send, + .btl_sendi = mca_btl_self_sendi, + .btl_put = mca_btl_self_put, + .btl_get = mca_btl_self_get, + .btl_dump = mca_btl_base_dump, +}; diff --git a/opal/mca/btl/self/btl_self.h b/opal/mca/btl/self/btl_self.h index e3988e30299..ac4cab6eb6a 100644 --- a/opal/mca/btl/self/btl_self.h +++ b/opal/mca/btl/self/btl_self.h @@ -6,16 +6,16 @@ * Copyright (c) 2004-2009 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. - * Copyright (c) 2014-2015 Los Alamos National Security, LLC. All rights + * Copyright (c) 2014-2016 Los Alamos National Security, LLC. All rights * reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ /** @@ -26,18 +26,18 @@ #include "opal_config.h" -#ifdef HAVE_STDLIB_H #include -#endif /* HAVE_STDLIB_H */ #ifdef HAVE_SYS_TYPES_H #include #endif /* HAVE_SYS_TYPES_H */ -#include "opal/mca/event/event.h" + #include "opal/mca/btl/btl.h" -#include "opal/mca/btl/base/base.h" +#include "opal/mca/btl/base/base.h" BEGIN_C_DECLS +#define MCA_BTL_SELF_MAX_INLINE_SIZE 128 + /** * Shared Memory (SELF) BTL module. */ @@ -46,7 +46,6 @@ struct mca_btl_self_component_t { int free_list_num; /**< initial size of free lists */ int free_list_max; /**< maximum size of free lists */ int free_list_inc; /**< number of elements to alloc when growing free lists */ - opal_mutex_t self_lock; opal_free_list_t self_frags_eager; /**< free list of self first */ opal_free_list_t self_frags_send; /**< free list of self second */ opal_free_list_t self_frags_rdma; /**< free list of self second */ @@ -54,146 +53,8 @@ struct mca_btl_self_component_t { typedef struct mca_btl_self_component_t mca_btl_self_component_t; OPAL_MODULE_DECLSPEC extern mca_btl_self_component_t mca_btl_self_component; -/** - * Register shared memory module parameters with the MCA framework - */ -int mca_btl_self_component_open(void); - -/** - * Any final cleanup before being unloaded. - */ -int mca_btl_self_component_close(void); - -/** - * SELF module initialization. - * - * @param num_btls (OUT) Number of BTLs returned in BTL array. - * @param enable_progress_threads (IN) Flag indicating whether BTL is allowed to have progress threads - * @param enable_mpi_threads (IN) Flag indicating whether BTL must support multilple simultaneous invocations from different threads - * - */ -mca_btl_base_module_t** mca_btl_self_component_init( - int *num_btls, - bool enable_progress_threads, - bool enable_mpi_threads -); - extern mca_btl_base_module_t mca_btl_self; - -/** - * Cleanup any resources held by the BTL. - * - * @param btl BTL instance. - * @return OPAL_SUCCESS or error status on failure. - */ - -int mca_btl_self_finalize( - struct mca_btl_base_module_t* btl -); - - -/** - * PML->BTL notification of change in the process list. - * PML->BTL Notification that a receive fragment has been matched. - * Called for message that is send from process with the virtual - * address of the shared memory segment being different than that of - * the receiver. - * - * @param btl (IN) - * @param proc (IN) - * @param peer (OUT) - * @return OPAL_SUCCESS or error status on failure. - * - */ - -int mca_btl_self_add_procs( - struct mca_btl_base_module_t* btl, - size_t nprocs, - struct opal_proc_t **procs, - struct mca_btl_base_endpoint_t** peers, - struct opal_bitmap_t* reachability -); - - -/** - * PML->BTL notification of change in the process list. - * - * @param btl (IN) BTL instance - * @param proc (IN) Peer process - * @param peer (IN) Peer addressing information. - * @return Status indicating if cleanup was successful - * - */ -int mca_btl_self_del_procs( - struct mca_btl_base_module_t* btl, - size_t nprocs, - struct opal_proc_t **procs, - struct mca_btl_base_endpoint_t **peers -); - - -/** - * Allocate a segment. - * - * @param btl (IN) BTL module - * @param size (IN) Request segment size. - */ -mca_btl_base_descriptor_t* mca_btl_self_alloc( - struct mca_btl_base_module_t* btl, - struct mca_btl_base_endpoint_t* endpoint, - uint8_t order, - size_t size, - uint32_t flags -); - -/** - * Return a segment allocated by this BTL. - * - * @param btl (IN) BTL module - * @param segment (IN) Allocated segment. - */ -int mca_btl_self_free( - struct mca_btl_base_module_t* btl, - mca_btl_base_descriptor_t* segment -); - -/** - * Pack data - * - * @param btl (IN) BTL module - * @param peer (IN) BTL peer addressing - */ -struct mca_btl_base_descriptor_t* mca_btl_self_prepare_src( - struct mca_btl_base_module_t* btl, - struct mca_btl_base_endpoint_t* endpoint, - struct opal_convertor_t* convertor, - uint8_t order, - size_t reserve, - size_t* size, - uint32_t flags -); - -/** - * Initiate a send to the peer. - * - * @param btl (IN) BTL module - * @param peer (IN) BTL peer addressing - */ -int mca_btl_self_send( - struct mca_btl_base_module_t* btl, - struct mca_btl_base_endpoint_t* endpoint, - struct mca_btl_base_descriptor_t* descriptor, - mca_btl_base_tag_t tag -); - -/** - * Fault Tolerance Event Notification Function - * @param state Checkpoint Stae - * @return OPAL_SUCCESS or failure status - */ -int mca_btl_self_ft_event(int state); - END_C_DECLS #endif diff --git a/opal/mca/btl/self/btl_self_component.c b/opal/mca/btl/self/btl_self_component.c index 3690cda80dd..99c0983a59d 100644 --- a/opal/mca/btl/self/btl_self_component.c +++ b/opal/mca/btl/self/btl_self_component.c @@ -6,39 +6,43 @@ * Copyright (c) 2004-2006 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2010 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2014-2015 Los Alamos National Security, LLC. All rights + * Copyright (c) 2014-2016 Los Alamos National Security, LLC. All rights * reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ #include "opal_config.h" -#ifdef HAVE_UNISTD_H -#include -#endif /* HAVE_UNISTD_H */ -#ifdef HAVE_STRING_H -#include -#endif /* HAVE_STRING_H */ -#ifdef HAVE_SYS_TYPES_H -#include -#endif /* HAVE_SYS_TYPES_H */ - -#include "opal/runtime/opal.h" -#include "opal/mca/event/event.h" + #include "btl_self.h" #include "btl_self_frag.h" +#include "opal/mca/base/mca_base_var.h" static int mca_btl_self_component_register(void); +static int mca_btl_self_component_open(void); +static int mca_btl_self_component_close(void); + +/** + * SELF module initialization. + * + * @param num_btls (OUT) Number of BTLs returned in BTL array. + * @param enable_progress_threads (IN) Flag indicating whether BTL is allowed to have progress threads + * @param enable_mpi_threads (IN) Flag indicating whether BTL must support multilple simultaneous invocations from different threads + * + */ +static mca_btl_base_module_t **mca_btl_self_component_init (int *num_btls, + bool enable_progress_threads, + bool enable_mpi_threads); /* - * Shared Memory (SELF) component instance. + * Shared Memory (SELF) component instance. */ mca_btl_self_component_t mca_btl_self_component = { @@ -78,14 +82,15 @@ static int mca_btl_self_component_register(void) OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_READONLY, &mca_btl_self_component.free_list_num); - mca_btl_self_component.free_list_max = -1; + /* NTH: free list buffers are not released until we tear down so DO NOT make them unlimited here */ + mca_btl_self_component.free_list_max = 64; (void) mca_base_component_var_register(&mca_btl_self_component.super.btl_version, "free_list_max", "Maximum number of fragments", MCA_BASE_VAR_TYPE_INT, NULL, 0, 0, OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_READONLY, &mca_btl_self_component.free_list_max); - mca_btl_self_component.free_list_inc = 32; + mca_btl_self_component.free_list_inc = 8; (void) mca_base_component_var_register(&mca_btl_self_component.super.btl_version, "free_list_inc", "Increment by this number of fragments", MCA_BASE_VAR_TYPE_INT, NULL, 0, 0, @@ -94,25 +99,23 @@ static int mca_btl_self_component_register(void) &mca_btl_self_component.free_list_inc); mca_btl_self.btl_exclusivity = MCA_BTL_EXCLUSIVITY_HIGH; - mca_btl_self.btl_eager_limit = 128 * 1024; + mca_btl_self.btl_eager_limit = 1024; mca_btl_self.btl_rndv_eager_limit = 128 * 1024; - mca_btl_self.btl_max_send_size = 256 * 1024; + mca_btl_self.btl_max_send_size = 16 * 1024; mca_btl_self.btl_rdma_pipeline_send_length = INT_MAX; mca_btl_self.btl_rdma_pipeline_frag_size = INT_MAX; mca_btl_self.btl_min_rdma_pipeline_size = 0; - mca_btl_self.btl_flags = MCA_BTL_FLAGS_PUT | MCA_BTL_FLAGS_SEND_INPLACE; + mca_btl_self.btl_flags = MCA_BTL_FLAGS_RDMA | MCA_BTL_FLAGS_SEND_INPLACE | MCA_BTL_FLAGS_SEND; mca_btl_self.btl_bandwidth = 100; mca_btl_self.btl_latency = 0; - mca_btl_base_param_register(&mca_btl_self_component.super.btl_version, - &mca_btl_self); + mca_btl_base_param_register (&mca_btl_self_component.super.btl_version, &mca_btl_self); return OPAL_SUCCESS; } -int mca_btl_self_component_open(void) +static int mca_btl_self_component_open(void) { /* initialize objects */ - OBJ_CONSTRUCT(&mca_btl_self_component.self_lock, opal_mutex_t); OBJ_CONSTRUCT(&mca_btl_self_component.self_frags_eager, opal_free_list_t); OBJ_CONSTRUCT(&mca_btl_self_component.self_frags_send, opal_free_list_t); OBJ_CONSTRUCT(&mca_btl_self_component.self_frags_rdma, opal_free_list_t); @@ -125,64 +128,67 @@ int mca_btl_self_component_open(void) * component cleanup - sanity checking of queue lengths */ -int mca_btl_self_component_close(void) +static int mca_btl_self_component_close(void) { - OBJ_DESTRUCT(&mca_btl_self_component.self_lock); OBJ_DESTRUCT(&mca_btl_self_component.self_frags_eager); OBJ_DESTRUCT(&mca_btl_self_component.self_frags_send); OBJ_DESTRUCT(&mca_btl_self_component.self_frags_rdma); return OPAL_SUCCESS; } - /* * SELF component initialization */ -mca_btl_base_module_t** mca_btl_self_component_init( int *num_btls, - bool enable_progress_threads, - bool enable_mpi_threads ) +static mca_btl_base_module_t **mca_btl_self_component_init (int *num_btls, + bool enable_progress_threads, + bool enable_mpi_threads) { mca_btl_base_module_t **btls = NULL; - *num_btls = 0; + int ret; - /* allocate the Shared Memory PTL */ - *num_btls = 1; - btls = (mca_btl_base_module_t**)malloc((*num_btls)*sizeof(mca_btl_base_module_t*)); - if (NULL == btls) { + /* initialize free lists */ + ret = opal_free_list_init (&mca_btl_self_component.self_frags_eager, + sizeof (mca_btl_self_frag_eager_t) + mca_btl_self.btl_eager_limit, + opal_cache_line_size, OBJ_CLASS(mca_btl_self_frag_eager_t), 0, + opal_cache_line_size, mca_btl_self_component.free_list_num, + mca_btl_self_component.free_list_max, + mca_btl_self_component.free_list_inc, + NULL, 0, NULL, NULL, NULL); + if (OPAL_SUCCESS != ret) { return NULL; } - /* initialize free lists */ - opal_free_list_init (&mca_btl_self_component.self_frags_eager, - sizeof(mca_btl_self_frag_eager_t) + mca_btl_self.btl_eager_limit, - opal_cache_line_size, - OBJ_CLASS(mca_btl_self_frag_eager_t), - 0,opal_cache_line_size, - mca_btl_self_component.free_list_num, - mca_btl_self_component.free_list_max, - mca_btl_self_component.free_list_inc, - NULL, 0, NULL, NULL, NULL); - opal_free_list_init (&mca_btl_self_component.self_frags_send, - sizeof(mca_btl_self_frag_send_t) + mca_btl_self.btl_max_send_size, - opal_cache_line_size, - OBJ_CLASS(mca_btl_self_frag_send_t), - 0,opal_cache_line_size, - mca_btl_self_component.free_list_num, - mca_btl_self_component.free_list_max, - mca_btl_self_component.free_list_inc, - NULL, 0, NULL, NULL, NULL); - opal_free_list_init (&mca_btl_self_component.self_frags_rdma, - sizeof(mca_btl_self_frag_rdma_t), - opal_cache_line_size, - OBJ_CLASS(mca_btl_self_frag_rdma_t), - 0,opal_cache_line_size, - mca_btl_self_component.free_list_num, - mca_btl_self_component.free_list_max, - mca_btl_self_component.free_list_inc, - NULL, 0, NULL, NULL, NULL); + ret = opal_free_list_init (&mca_btl_self_component.self_frags_send, + sizeof (mca_btl_self_frag_send_t) + mca_btl_self.btl_max_send_size, + opal_cache_line_size, OBJ_CLASS(mca_btl_self_frag_send_t), 0, + opal_cache_line_size, mca_btl_self_component.free_list_num, + mca_btl_self_component.free_list_max, + mca_btl_self_component.free_list_inc, + NULL, 0, NULL, NULL, NULL); + if (OPAL_SUCCESS != ret) { + return NULL; + } + + ret = opal_free_list_init (&mca_btl_self_component.self_frags_rdma, + sizeof (mca_btl_self_frag_rdma_t) + MCA_BTL_SELF_MAX_INLINE_SIZE, + opal_cache_line_size, OBJ_CLASS(mca_btl_self_frag_rdma_t), 0, + opal_cache_line_size, mca_btl_self_component.free_list_num, + mca_btl_self_component.free_list_max, + mca_btl_self_component.free_list_inc, + NULL, 0, NULL, NULL, NULL); + if (OPAL_SUCCESS != ret) { + return NULL; + } /* get pointer to the btls */ - btls[0] = (mca_btl_base_module_t *)(&mca_btl_self); + btls = (mca_btl_base_module_t **) malloc (sizeof (mca_btl_base_module_t *)); + if (NULL == btls) { + return NULL; + } + + btls[0] = &mca_btl_self; + *num_btls = 1; + return btls; } diff --git a/opal/mca/btl/self/btl_self_endpoint.h b/opal/mca/btl/self/btl_self_endpoint.h deleted file mode 100644 index 6ef32fa79bf..00000000000 --- a/opal/mca/btl/self/btl_self_endpoint.h +++ /dev/null @@ -1,42 +0,0 @@ -/* - * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2005 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ -/** - * @file - */ -#ifndef MCA_BTL_SELF_ENDPOINT_H -#define MCA_BTL_SELF_ENDPOINT_H - -#if OPAL_ENABLE_PROGRESS_THREADS == 1 -#include "opal/mca/event/event.h" -#endif - -/** - * An abstraction that represents a connection to a endpoint process. - * An instance of mca_ptl_base_endpoint_t is associated w/ each process - * and BTL pair at startup. - */ - -struct mca_btl_base_endpoint_t { - int my_selfp_rank; /**< My SELFP process rank. Used for accessing - * SELFP specfic data structures. */ - int peer_selfp_rank; /**< My peer's SELFP process rank. Used for accessing - * SELFP specfic data structures. */ -}; - -#endif - diff --git a/opal/mca/btl/self/btl_self_frag.c b/opal/mca/btl/self/btl_self_frag.c index 95186ac67d4..eff0fbc4bec 100644 --- a/opal/mca/btl/self/btl_self_frag.c +++ b/opal/mca/btl/self/btl_self_frag.c @@ -1,3 +1,4 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ /* * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana * University Research and Technology @@ -9,6 +10,8 @@ * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. + * Copyright (c) 2016 Los Alamos National Security, LLC. All rights + * reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -21,31 +24,32 @@ static inline void mca_btl_self_frag_constructor(mca_btl_self_frag_t* frag) { - frag->segment.seg_addr.pval = frag+1; - frag->segment.seg_len = (uint32_t)frag->size; - frag->base.des_segments = &frag->segment; - frag->base.des_segment_count = 1; - frag->base.des_flags = 0; + frag->base.des_flags = 0; + frag->segments[0].seg_addr.pval = (void *) frag->data; + frag->segments[0].seg_len = (uint32_t) frag->size; + frag->base.des_segments = frag->segments; + frag->base.des_segment_count = 1; } static void mca_btl_self_frag_eager_constructor(mca_btl_self_frag_t* frag) { + frag->list = &mca_btl_self_component.self_frags_eager; frag->size = mca_btl_self.btl_eager_limit; mca_btl_self_frag_constructor(frag); } static void mca_btl_self_frag_send_constructor(mca_btl_self_frag_t* frag) { + frag->list = &mca_btl_self_component.self_frags_send; frag->size = mca_btl_self.btl_max_send_size; mca_btl_self_frag_constructor(frag); } static void mca_btl_self_frag_rdma_constructor(mca_btl_self_frag_t* frag) { - frag->size = 0; - frag->segment.seg_addr.pval = frag+1; - frag->segment.seg_len = (uint32_t)frag->size; - frag->base.des_flags = 0; + frag->list = &mca_btl_self_component.self_frags_rdma; + frag->size = MCA_BTL_SELF_MAX_INLINE_SIZE; + mca_btl_self_frag_constructor(frag); } OBJ_CLASS_INSTANCE( mca_btl_self_frag_eager_t, diff --git a/opal/mca/btl/self/btl_self_frag.h b/opal/mca/btl/self/btl_self_frag.h index 6116322ec51..38236f1d7f5 100644 --- a/opal/mca/btl/self/btl_self_frag.h +++ b/opal/mca/btl/self/btl_self_frag.h @@ -6,16 +6,16 @@ * Copyright (c) 2004-2013 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2015 Los Alamos National Security, LLC. All rights * reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ /** @@ -33,9 +33,11 @@ */ struct mca_btl_self_frag_t { mca_btl_base_descriptor_t base; - mca_btl_base_segment_t segment; + mca_btl_base_segment_t segments[2]; struct mca_btl_base_endpoint_t *endpoint; + opal_free_list_t *list; size_t size; + unsigned char data[]; }; typedef struct mca_btl_self_frag_t mca_btl_self_frag_t; typedef struct mca_btl_self_frag_t mca_btl_self_frag_eager_t; @@ -47,43 +49,27 @@ OBJ_CLASS_DECLARATION(mca_btl_self_frag_send_t); OBJ_CLASS_DECLARATION(mca_btl_self_frag_rdma_t); #define MCA_BTL_SELF_FRAG_ALLOC_EAGER(frag) \ -{ \ - frag = (mca_btl_self_frag_t *) \ - opal_free_list_get (&mca_btl_self_component.self_frags_eager); \ -} + { \ + frag = (mca_btl_self_frag_t *) \ + opal_free_list_get (&mca_btl_self_component.self_frags_eager); \ + } -#define MCA_BTL_SELF_FRAG_RETURN_EAGER(frag) \ -{ \ - opal_free_list_return (&mca_btl_self_component.self_frags_eager, \ - (opal_free_list_item_t*)(frag)); \ - frag->segment.seg_addr.pval = frag+1; \ -} - -#define MCA_BTL_SELF_FRAG_ALLOC_SEND(frag) \ -{ \ - frag = (mca_btl_self_frag_t *) \ - opal_free_list_get (&mca_btl_self_component.self_frags_send); \ -} - -#define MCA_BTL_SELF_FRAG_RETURN_SEND(frag) \ -{ \ - opal_free_list_return (&mca_btl_self_component.self_frags_send, \ - (opal_free_list_item_t*)(frag)); \ - frag->segment.seg_addr.pval = frag+1; \ -} #define MCA_BTL_SELF_FRAG_ALLOC_RDMA(frag) \ -{ \ - frag = (mca_btl_self_frag_t *) \ - opal_free_list_get (&mca_btl_self_component.self_frags_rdma); \ -} + { \ + frag = (mca_btl_self_frag_t *) \ + opal_free_list_get (&mca_btl_self_component.self_frags_rdma); \ + } -#define MCA_BTL_SELF_FRAG_RETURN_RDMA(frag) \ -{ \ - opal_free_list_return (&mca_btl_self_component.self_frags_rdma, \ - (opal_free_list_item_t*)(frag)); \ - frag->segment.seg_addr.pval = frag+1; \ -} +#define MCA_BTL_SELF_FRAG_ALLOC_SEND(frag) \ + { \ + frag = (mca_btl_self_frag_t *) \ + opal_free_list_get (&mca_btl_self_component.self_frags_send); \ + } -#endif +#define MCA_BTL_SELF_FRAG_RETURN(frag) \ + { \ + opal_free_list_return ((frag)->list, (opal_free_list_item_t*)(frag)); \ + } +#endif /* MCA_BTL_SELF_SEND_FRAG_H */ diff --git a/opal/mca/btl/sm/Makefile.am b/opal/mca/btl/sm/Makefile.am index 5e33da0f868..06a064751b9 100644 --- a/opal/mca/btl/sm/Makefile.am +++ b/opal/mca/btl/sm/Makefile.am @@ -5,16 +5,16 @@ # Copyright (c) 2004-2009 The University of Tennessee and The University # of Tennessee Research Foundation. All rights # reserved. -# Copyright (c) 2004-2009 High Performance Computing Center Stuttgart, +# Copyright (c) 2004-2009 High Performance Computing Center Stuttgart, # University of Stuttgart. All rights reserved. # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. # Copyright (c) 2009-2014 Cisco Systems, Inc. All rights reserved. # Copyright (c) 2014 NVIDIA Corporation. All rights reserved. # $COPYRIGHT$ -# +# # Additional copyrights may follow -# +# # $HEADER$ # @@ -27,7 +27,7 @@ libmca_btl_sm_la_sources = \ btl_sm_endpoint.h \ btl_sm_fifo.h \ btl_sm_frag.c \ - btl_sm_frag.h + btl_sm_frag.h # Make the output library in this directory, and name it either # mca__.la (for DSO builds) or libmca__.la diff --git a/opal/mca/btl/sm/btl_sm.c b/opal/mca/btl/sm/btl_sm.c index 1179c3ddb21..842a07b91dd 100644 --- a/opal/mca/btl/sm/btl_sm.c +++ b/opal/mca/btl/sm/btl_sm.c @@ -13,10 +13,10 @@ * Copyright (c) 2006-2007 Voltaire. All rights reserved. * Copyright (c) 2009-2012 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2010-2015 Los Alamos National Security, LLC. - * All rights reserved. + * All rights reserved. * Copyright (c) 2010-2012 IBM Corporation. All rights reserved. * Copyright (c) 2012 Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2013 Intel, Inc. All rights reserved. + * Copyright (c) 2013-2015 Intel, Inc. All rights reserved. * Copyright (c) 2014-2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ @@ -30,6 +30,9 @@ #include #include +#ifdef HAVE_UNISTD_H +#include +#endif #ifdef HAVE_FCNTL_H #include #endif /* HAVE_FCNTL_H */ @@ -53,20 +56,10 @@ #include "opal/datatype/opal_convertor.h" #include "opal/mca/btl/btl.h" -#include "opal/mca/mpool/base/base.h" -#include "opal/mca/mpool/sm/mpool_sm.h" #include "opal/align.h" #include "opal/util/sys_limits.h" -#if OPAL_ENABLE_FT_CR == 1 -#include "opal/util/basename.h" -#include "opal/mca/crs/base/base.h" -#include "opal/util/basename.h" -#include "orte/mca/sstore/sstore.h" -#include "opal/runtime/opal_cr.h" -#endif - #include "btl_sm.h" #include "btl_sm_endpoint.h" #include "btl_sm_frag.h" @@ -87,7 +80,7 @@ mca_btl_sm_t mca_btl_sm = { .btl_sendi = mca_btl_sm_sendi, .btl_dump = mca_btl_sm_dump, .btl_register_error = mca_btl_sm_register_error_cb, /* register error */ - .btl_ft_event = mca_btl_sm_ft_event + .btl_ft_event = NULL } }; @@ -108,7 +101,7 @@ static void *mpool_calloc(size_t nmemb, size_t size) size_t bsize = nmemb * size; mca_mpool_base_module_t *mpool = mca_btl_sm_component.sm_mpool; - buf = mpool->mpool_alloc(mpool, bsize, opal_cache_line_size, 0, NULL); + buf = mpool->mpool_alloc(mpool, bsize, opal_cache_line_size, 0); if (NULL == buf) return NULL; @@ -119,7 +112,7 @@ static void *mpool_calloc(size_t nmemb, size_t size) static int setup_mpool_base_resources(mca_btl_sm_component_t *comp_ptr, - mca_mpool_base_resources_t *out_res) + mca_common_sm_mpool_resources_t *out_res) { int rc = OPAL_SUCCESS; int fd = -1; @@ -219,14 +212,13 @@ sm_btl_first_time_init(mca_btl_sm_t *sm_btl, size_t length, length_payload; sm_fifo_t *my_fifos; int my_mem_node, num_mem_nodes, i, rc; - mca_mpool_base_resources_t *res = NULL; + mca_common_sm_mpool_resources_t *res = NULL; mca_btl_sm_component_t* m = &mca_btl_sm_component; /* Assume we don't have hwloc support and fill in dummy info */ mca_btl_sm_component.mem_node = my_mem_node = 0; mca_btl_sm_component.num_mem_nodes = num_mem_nodes = 1; -#if OPAL_HAVE_HWLOC /* If we have hwloc support, then get accurate information */ if (NULL != opal_hwloc_topology) { i = opal_hwloc_base_get_nbobjs_by_type(opal_hwloc_topology, @@ -276,7 +268,6 @@ sm_btl_first_time_init(mca_btl_sm_t *sm_btl, } } } -#endif if (NULL == (res = calloc(1, sizeof(*res)))) { return OPAL_ERR_OUT_OF_RESOURCE; @@ -290,15 +281,14 @@ sm_btl_first_time_init(mca_btl_sm_t *sm_btl, /* Disable memory binding, because each MPI process will claim pages in the * mpool for their local NUMA node */ res->mem_node = -1; + res->allocator = mca_btl_sm_component.allocator; if (OPAL_SUCCESS != (rc = setup_mpool_base_resources(m, res))) { free(res); return rc; } /* now that res is fully populated, create the thing */ - mca_btl_sm_component.sm_mpools[0] = - mca_mpool_base_module_create(mca_btl_sm_component.sm_mpool_name, - sm_btl, res); + mca_btl_sm_component.sm_mpools[0] = common_sm_mpool_create (res); /* Sanity check to ensure that we found it */ if (NULL == mca_btl_sm_component.sm_mpools[0]) { free(res); @@ -404,7 +394,7 @@ sm_btl_first_time_init(mca_btl_sm_t *sm_btl, mca_btl_sm_component.sm_free_list_inc, mca_btl_sm_component.sm_mpool, 0, NULL, NULL, NULL); if ( OPAL_SUCCESS != i ) - return i; + return i; mca_btl_sm_component.num_outstanding_frags = 0; @@ -469,7 +459,7 @@ int mca_btl_sm_add_procs( bool have_connected_peer = false; char **bases; /* for easy access to the mpool_sm_module */ - mca_mpool_sm_module_t *sm_mpool_modp = NULL; + mca_common_sm_mpool_module_t *sm_mpool_modp = NULL; /* initializion */ @@ -547,7 +537,7 @@ int mca_btl_sm_add_procs( } bases = mca_btl_sm_component.shm_bases; - sm_mpool_modp = (mca_mpool_sm_module_t *)mca_btl_sm_component.sm_mpool; + sm_mpool_modp = (mca_common_sm_mpool_module_t *)mca_btl_sm_component.sm_mpool; /* initialize own FIFOs */ /* @@ -899,7 +889,9 @@ int mca_btl_sm_sendi( struct mca_btl_base_module_t* btl, /* note that frag==NULL is equivalent to rc returning an error code */ MCA_BTL_SM_FRAG_ALLOC_EAGER(frag); if( OPAL_UNLIKELY(NULL == frag) ) { - *descriptor = NULL; + if (NULL != descriptor) { + *descriptor = NULL; + } return OPAL_ERR_OUT_OF_RESOURCE; } @@ -1085,7 +1077,7 @@ int mca_btl_sm_get_sync (mca_btl_base_module_t *btl, struct mca_btl_base_endpoin if (OPAL_LIKELY(mca_btl_sm_component.use_knem)) { struct knem_cmd_inline_copy icopy; struct knem_cmd_param_iovec recv_iovec; - + /* Fill in the ioctl data fields. There's no async completion, so we don't need to worry about getting a slot, etc. */ recv_iovec.base = (uintptr_t) local_address; @@ -1167,7 +1159,7 @@ int mca_btl_sm_get_async (mca_btl_base_module_t *btl, struct mca_btl_base_endpoi mca_btl_sm_frag_t* frag; struct knem_cmd_inline_copy icopy; struct knem_cmd_param_iovec recv_iovec; - + /* If we have no knem slots available, fall back to synchronous */ if (sm_btl->knem_status_num_used >= mca_btl_sm_component.knem_max_simultaneous) { @@ -1197,7 +1189,7 @@ int mca_btl_sm_get_async (mca_btl_base_module_t *btl, struct mca_btl_base_endpoi icopy.local_iovec_nr = 1; icopy.write = 0; icopy.async_status_index = sm_btl->knem_status_first_avail++; - if (sm_btl->knem_status_first_avail >= + if (sm_btl->knem_status_first_avail >= mca_btl_sm_component.knem_max_simultaneous) { sm_btl->knem_status_first_avail = 0; } @@ -1213,7 +1205,7 @@ int mca_btl_sm_get_async (mca_btl_base_module_t *btl, struct mca_btl_base_endpoi } sm_btl->knem_frag_array[icopy.async_status_index] = frag; - if (OPAL_LIKELY(0 == ioctl(sm_btl->knem_fd, + if (OPAL_LIKELY(0 == ioctl(sm_btl->knem_fd, KNEM_CMD_INLINE_COPY, &icopy))) { if (icopy.current_status != KNEM_STATUS_PENDING) { MCA_BTL_SM_FRAG_RETURN(frag); @@ -1248,75 +1240,16 @@ void mca_btl_sm_dump(struct mca_btl_base_module_t* btl, if( NULL != endpoint ) { mca_btl_base_err("BTL SM %p endpoint %p [smp_rank %d] [peer_rank %d]\n", - (void*) btl, (void*) endpoint, + (void*) btl, (void*) endpoint, endpoint->my_smp_rank, endpoint->peer_smp_rank); for(item = opal_list_get_first(&endpoint->pending_sends); - item != opal_list_get_end(&endpoint->pending_sends); + item != opal_list_get_end(&endpoint->pending_sends); item = opal_list_get_next(item)) { frag = (mca_btl_sm_frag_t*)item; mca_btl_base_err(" | frag %p size %lu (hdr frag %p len %lu rank %d tag %d)\n", (void*) frag, frag->size, (void*) frag->hdr->frag, - frag->hdr->len, frag->hdr->my_smp_rank, + frag->hdr->len, frag->hdr->my_smp_rank, frag->hdr->tag); } } } - -#if OPAL_ENABLE_FT_CR == 0 -int mca_btl_sm_ft_event(int state) { - return OPAL_SUCCESS; -} -#else -int mca_btl_sm_ft_event(int state) { - /* Notify mpool */ - if( NULL != mca_btl_sm_component.sm_mpool && - NULL != mca_btl_sm_component.sm_mpool->mpool_ft_event) { - mca_btl_sm_component.sm_mpool->mpool_ft_event(state); - } - - if(OPAL_CRS_CHECKPOINT == state) { - if( NULL != mca_btl_sm_component.sm_seg ) { - /* On restart we need the old file names to exist (not necessarily - * contain content) so the CRS component does not fail when searching - * for these old file handles. The restart procedure will make sure - * these files get cleaned up appropriately. - */ - /* Disabled to get FT code compiled again - * TODO: FIXIT soon - orte_sstore.set_attr(orte_sstore_handle_current, - SSTORE_METADATA_LOCAL_TOUCH, - mca_btl_sm_component.sm_seg->shmem_ds.seg_name); - */ - } - } - else if(OPAL_CRS_CONTINUE == state) { - if (opal_cr_continue_like_restart) { - if( NULL != mca_btl_sm_component.sm_seg ) { - /* Add shared memory file */ - opal_crs_base_cleanup_append(mca_btl_sm_component.sm_seg->shmem_ds.seg_name, false); - } - - /* Clear this so we force the module to re-init the sm files */ - mca_btl_sm_component.sm_mpool = NULL; - } - } - else if(OPAL_CRS_RESTART == state || - OPAL_CRS_RESTART_PRE == state) { - if( NULL != mca_btl_sm_component.sm_seg ) { - /* Add shared memory file */ - opal_crs_base_cleanup_append(mca_btl_sm_component.sm_seg->shmem_ds.seg_name, false); - } - - /* Clear this so we force the module to re-init the sm files */ - mca_btl_sm_component.sm_mpool = NULL; - } - else if(OPAL_CRS_TERM == state ) { - ; - } - else { - ; - } - - return OPAL_SUCCESS; -} -#endif /* OPAL_ENABLE_FT_CR */ diff --git a/opal/mca/btl/sm/btl_sm.h b/opal/mca/btl/sm/btl_sm.h index 09e03fbbe96..ab96dcfeb70 100644 --- a/opal/mca/btl/sm/btl_sm.h +++ b/opal/mca/btl/sm/btl_sm.h @@ -13,7 +13,7 @@ * Copyright (c) 2006-2007 Voltaire. All rights reserved. * Copyright (c) 2009-2010 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2010-2015 Los Alamos National Security, LLC. - * All rights reserved. + * All rights reserved. * Copyright (c) 2010-2012 IBM Corporation. All rights reserved. * $COPYRIGHT$ * @@ -31,9 +31,7 @@ #include #include #include -#ifdef HAVE_STDINT_H #include -#endif /* HAVE_STDINT_H */ #ifdef HAVE_SCHED_H #include #endif /* HAVE_SCHED_H */ @@ -87,23 +85,23 @@ BEGIN_C_DECLS struct sm_fifo_t { /* This queue pointer is used only by the heads. */ - volatile void **queue; + volatile void **queue; char pad0[SM_CACHE_LINE_PAD - sizeof(void **)]; /* This lock is used by the heads. */ - opal_atomic_lock_t head_lock; + opal_atomic_lock_t head_lock; char pad1[SM_CACHE_LINE_PAD - sizeof(opal_atomic_lock_t)]; /* This index is used by the head holding the head lock. */ - volatile int head; + volatile int head; char pad2[SM_CACHE_LINE_PAD - sizeof(int)]; /* This mask is used "read only" by all processes. */ - unsigned int mask; + unsigned int mask; char pad3[SM_CACHE_LINE_PAD - sizeof(int)]; /* The following are used only by the tail. */ volatile void **queue_recv; opal_atomic_lock_t tail_lock; volatile int tail; int num_to_clear; - int lazy_free; + int lazy_free; char pad4[SM_CACHE_LINE_PAD - sizeof(void **) - sizeof(opal_atomic_lock_t) - sizeof(int) * 3]; @@ -169,7 +167,7 @@ struct mca_btl_sm_component_t { int num_pending_sends; /**< total number on all of my pending-send queues */ int mem_node; int num_mem_nodes; - + #if OPAL_ENABLE_PROGRESS_THREADS == 1 char sm_fifo_path[PATH_MAX]; /**< path to fifo used to signal this process */ int sm_fifo_fd; /**< file descriptor corresponding to opened fifo */ @@ -214,6 +212,12 @@ struct mca_btl_sm_component_t { char *sm_mpool_rndv_file_name; char *sm_ctl_file_name; char *sm_rndv_file_name; + + /** minimum size of a btl/sm mpool */ + unsigned long mpool_min_size; + + /** allocator name to use with the mpool */ + char *allocator; }; typedef struct mca_btl_sm_component_t mca_btl_sm_component_t; OPAL_MODULE_DECLSPEC extern mca_btl_sm_component_t mca_btl_sm_component; @@ -283,7 +287,7 @@ static inline int sm_fifo_init(int fifo_size, mca_mpool_base_module_t *mpool, /* allocate the queue in the receiver's address space */ fifo->queue_recv = (volatile void **)mpool->mpool_alloc( - mpool, sizeof(void *) * qsize, opal_cache_line_size, 0, NULL); + mpool, sizeof(void *) * qsize, opal_cache_line_size, 0); if(NULL == fifo->queue_recv) { return OPAL_ERR_OUT_OF_RESOURCE; } @@ -529,13 +533,6 @@ extern void mca_btl_sm_dump(struct mca_btl_base_module_t* btl, struct mca_btl_base_endpoint_t* endpoint, int verbose); -/** - * Fault Tolerance Event Notification Function - * @param state Checkpoint Stae - * @return OPAL_SUCCESS or failure status - */ -int mca_btl_sm_ft_event(int state); - #if OPAL_ENABLE_PROGRESS_THREADS == 1 void mca_btl_sm_component_event_thread(opal_object_t*); #endif diff --git a/opal/mca/btl/sm/btl_sm_component.c b/opal/mca/btl/sm/btl_sm_component.c index 2d6b01e8652..74f13edf902 100644 --- a/opal/mca/btl/sm/btl_sm_component.c +++ b/opal/mca/btl/sm/btl_sm_component.c @@ -11,7 +11,7 @@ * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2006-2007 Voltaire. All rights reserved. - * Copyright (c) 2009-2014 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2009-2016 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2010-2015 Los Alamos National Security, LLC. * All rights reserved. * Copyright (c) 2011-2014 NVIDIA Corporation. All rights reserved. @@ -30,9 +30,7 @@ #ifdef HAVE_UNISTD_H #include #endif /* HAVE_UNISTD_H */ -#ifdef HAVE_STRING_H #include -#endif /* HAVE_STRING_H */ #ifdef HAVE_FCNTL_H #include #endif /* HAVE_FCNTL_H */ @@ -56,10 +54,6 @@ #include "opal/mca/common/sm/common_sm.h" #include "opal/mca/btl/base/btl_base_error.h" -#if OPAL_ENABLE_FT_CR == 1 -#include "opal/runtime/opal_cr.h" -#endif - #include "btl_sm.h" #include "btl_sm_frag.h" #include "btl_sm_fifo.h" @@ -223,6 +217,19 @@ static int sm_register(void) 0, OPAL_INFO_LVL_5, MCA_BASE_VAR_SCOPE_READONLY, &mca_btl_sm_component.knem_max_simultaneous); + mca_btl_sm_component.allocator = "bucket"; + (void) mca_base_component_var_register (&mca_btl_sm_component.super.btl_version, "allocator", + "Name of allocator component to use for btl/sm allocations", + MCA_BASE_VAR_TYPE_STRING, NULL, 0, 0, OPAL_INFO_LVL_9, + MCA_BASE_VAR_SCOPE_LOCAL, &mca_btl_sm_component.allocator); + + mca_btl_sm_component.mpool_min_size = 134217728; + (void) mca_base_component_var_register(&mca_btl_sm_component.super.btl_version, "min_size", + "Minimum size of the common/sm mpool shared memory file", + MCA_BASE_VAR_TYPE_UNSIGNED_LONG, NULL, 0, 0, + OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_READONLY, + &mca_btl_sm_component.mpool_min_size); + /* CMA parameters */ mca_btl_sm_component.use_cma = 0; (void) mca_base_component_var_register(&mca_btl_sm_component.super.btl_version, @@ -236,9 +243,6 @@ static int sm_register(void) mca_btl_sm_param_register_int("free_list_max", -1, OPAL_INFO_LVL_5, &mca_btl_sm_component.sm_free_list_max); mca_btl_sm_param_register_int("free_list_inc", 64, OPAL_INFO_LVL_5, &mca_btl_sm_component.sm_free_list_inc); mca_btl_sm_param_register_int("max_procs", -1, OPAL_INFO_LVL_5, &mca_btl_sm_component.sm_max_procs); - /* there is no practical use for the mpool name parameter since mpool resources differ - between components */ - mca_btl_sm_component.sm_mpool_name = "sm"; mca_btl_sm_param_register_uint("fifo_size", 4096, OPAL_INFO_LVL_4, &mca_btl_sm_component.fifo_size); mca_btl_sm_param_register_int("num_fifos", 1, OPAL_INFO_LVL_4, &mca_btl_sm_component.nfifos); @@ -334,7 +338,7 @@ static int mca_btl_sm_component_close(void) mca_btl_sm.knem_frag_array = NULL; } if (NULL != mca_btl_sm.knem_status_array) { - munmap(mca_btl_sm.knem_status_array, + munmap(mca_btl_sm.knem_status_array, mca_btl_sm_component.knem_max_simultaneous); mca_btl_sm.knem_status_array = NULL; } @@ -369,17 +373,7 @@ static int mca_btl_sm_component_close(void) * to it are gone - no error checking, since we want all procs * to call this, so that in an abnormal termination scenario, * this file will still get cleaned up */ -#if OPAL_ENABLE_FT_CR == 1 - /* Only unlink the file if we are *not* restarting - * If we are restarting the file will be unlinked at a later time. - */ - if(OPAL_CR_STATUS_RESTART_PRE != opal_cr_checkpointing_state && - OPAL_CR_STATUS_RESTART_POST != opal_cr_checkpointing_state ) { - unlink(mca_btl_sm_component.sm_seg->shmem_ds.seg_name); - } -#else unlink(mca_btl_sm_component.sm_seg->shmem_ds.seg_name); -#endif OBJ_RELEASE(mca_btl_sm_component.sm_seg); } @@ -400,7 +394,7 @@ static int mca_btl_sm_component_close(void) #endif CLEANUP: - + #if OPAL_CUDA_SUPPORT mca_common_cuda_fini(); #endif /* OPAL_CUDA_SUPPORT */ @@ -409,7 +403,7 @@ static int mca_btl_sm_component_close(void) return return_value; } -/* +/* * Returns the number of processes on the node. */ static inline int @@ -458,41 +452,6 @@ create_and_attach(mca_btl_sm_component_t *comp_ptr, return OPAL_SUCCESS; } -/* - * SKG - I'm not happy with this, but I can't figure out a better way of - * finding the sm mpool's minimum size 8-|. The way I see it. This BTL only - * uses the sm mpool, so maybe this isn't so bad... - * - * The problem is the we need to size the mpool resources at sm BTL component - * init. That means we need to know the mpool's minimum size at create. - */ -static int -get_min_mpool_size(mca_btl_sm_component_t *comp_ptr, - size_t *out_size) -{ - const char *type_name = "mpool"; - const char *param_name = "min_size"; - const mca_base_var_storage_t *min_size; - int id = 0; - - if (0 > (id = mca_base_var_find("ompi", type_name, comp_ptr->sm_mpool_name, - param_name))) { - opal_output(0, "mca_base_var_find: failure looking for %s_%s_%s\n", - type_name, comp_ptr->sm_mpool_name, param_name); - return OPAL_ERR_NOT_FOUND; - } - - if (OPAL_SUCCESS != mca_base_var_get_value(id, &min_size, NULL, NULL)) { - opal_output(0, "mca_base_var_get_value failure\n"); - return OPAL_ERROR; - } - - /* the min_size variable is an unsigned long long */ - *out_size = (size_t) min_size->ullval; - - return OPAL_SUCCESS; -} - static int get_mpool_res_size(int32_t max_procs, size_t *out_res_size) @@ -609,24 +568,21 @@ create_rndv_file(mca_btl_sm_component_t *comp_ptr, int rc = OPAL_SUCCESS; int fd = -1; char *fname = NULL; + char *tmpfname = NULL; /* used as a temporary store so we can extract shmem_ds info */ mca_common_sm_module_t *tmp_modp = NULL; if (MCA_BTL_SM_RNDV_MOD_MPOOL == type) { - size_t min_size = 0; /* get the segment size for the sm mpool. */ if (OPAL_SUCCESS != (rc = get_mpool_res_size(comp_ptr->sm_max_procs, &size))) { /* rc is already set */ goto out; } - /* do we need to update the size based on the sm mpool's min size? */ - if (OPAL_SUCCESS != (rc = get_min_mpool_size(comp_ptr, &min_size))) { - goto out; - } + /* update size if less than required minimum */ - if (size < min_size) { - size = min_size; + if (size < mca_btl_sm_component.mpool_min_size) { + size = mca_btl_sm_component.mpool_min_size; } /* we only need the shmem_ds info at this point. initilization will be * completed in the mpool module code. the idea is that we just need this @@ -665,8 +621,19 @@ create_rndv_file(mca_btl_sm_component_t *comp_ptr, * file containing all the meta info required for attach. */ /* now just write the contents of tmp_modp->shmem_ds to the full - * sizeof(opal_shmem_ds_t), so we know where the mpool_res_size starts. */ - if (-1 == (fd = open(fname, O_CREAT | O_RDWR, 0600))) { + * sizeof(opal_shmem_ds_t), so we know where the mpool_res_size + * starts. Note that we write into a temporary file first and + * then do a rename(2) to move the full file into its final + * destination. This avoids a race condition where a peer process + * might open/read part of the file before this processes finishes + * writing it (see + * https://github.com/open-mpi/ompi/issues/1230). */ + asprintf(&tmpfname, "%s.tmp", fname); + if (NULL == tmpfname) { + rc = OPAL_ERR_OUT_OF_RESOURCE; + goto out; + } + if (-1 == (fd = open(tmpfname, O_CREAT | O_RDWR, 0600))) { int err = errno; opal_show_help("help-mpi-btl-sm.txt", "sys call fail", true, "open(2)", strerror(err), err); @@ -692,11 +659,20 @@ create_rndv_file(mca_btl_sm_component_t *comp_ptr, /* only do this for the mpool case */ OBJ_RELEASE(tmp_modp); } + (void)close(fd); + fd = -1; + if (0 != rename(tmpfname, fname)) { + rc = OPAL_ERR_IN_ERRNO; + goto out; + } out: if (-1 != fd) { (void)close(fd); } + if (NULL != tmpfname) { + free(tmpfname); + } return rc; } @@ -980,7 +956,7 @@ mca_btl_sm_component_init(int *num_btls, mca_btl_sm.knem_frag_array = NULL; } if (NULL != mca_btl_sm.knem_status_array) { - munmap(mca_btl_sm.knem_status_array, + munmap(mca_btl_sm.knem_status_array, mca_btl_sm_component.knem_max_simultaneous); mca_btl_sm.knem_status_array = NULL; } @@ -1034,22 +1010,22 @@ void mca_btl_sm_component_event_thread(opal_object_t* thread) } #endif -void btl_sm_process_pending_sends(struct mca_btl_base_endpoint_t *ep) -{ - btl_sm_pending_send_item_t *si; - int rc; +void btl_sm_process_pending_sends(struct mca_btl_base_endpoint_t *ep) +{ + btl_sm_pending_send_item_t *si; + int rc; while ( 0 < opal_list_get_size(&ep->pending_sends) ) { /* Note that we access the size of ep->pending_sends unlocked - as it doesn't really matter if the result is wrong as + as it doesn't really matter if the result is wrong as opal_list_remove_first is called with a lock and we handle it not finding an item to process */ OPAL_THREAD_LOCK(&ep->endpoint_lock); - si = (btl_sm_pending_send_item_t*)opal_list_remove_first(&ep->pending_sends); + si = (btl_sm_pending_send_item_t*)opal_list_remove_first(&ep->pending_sends); OPAL_THREAD_UNLOCK(&ep->endpoint_lock); if(NULL == si) return; /* Another thread got in before us. Thats ok. */ - + OPAL_THREAD_ADD32(&mca_btl_sm_component.num_pending_sends, -1); MCA_BTL_SM_FIFO_WRITE(ep, ep->my_smp_rank, ep->peer_smp_rank, si->data, @@ -1060,7 +1036,7 @@ void btl_sm_process_pending_sends(struct mca_btl_base_endpoint_t *ep) if ( OPAL_SUCCESS != rc ) return; } -} +} int mca_btl_sm_component_progress(void) { @@ -1212,13 +1188,13 @@ int mca_btl_sm_component_progress(void) return nevents; } while (mca_btl_sm.knem_status_num_used > 0 && - KNEM_STATUS_PENDING != + KNEM_STATUS_PENDING != mca_btl_sm.knem_status_array[mca_btl_sm.knem_status_first_used]) { - if (KNEM_STATUS_SUCCESS == + if (KNEM_STATUS_SUCCESS == mca_btl_sm.knem_status_array[mca_btl_sm.knem_status_first_used]) { /* Handle the completed fragment */ - frag = + frag = mca_btl_sm.knem_frag_array[mca_btl_sm.knem_status_first_used]; frag->cb.func (&mca_btl_sm.super, frag->endpoint, frag->cb.local_address, frag->cb.local_handle, @@ -1230,7 +1206,7 @@ int mca_btl_sm_component_progress(void) ++nevents; --mca_btl_sm.knem_status_num_used; ++mca_btl_sm.knem_status_first_used; - if (mca_btl_sm.knem_status_first_used >= + if (mca_btl_sm.knem_status_first_used >= mca_btl_sm_component.knem_max_simultaneous) { mca_btl_sm.knem_status_first_used = 0; } diff --git a/opal/mca/btl/sm/help-mpi-btl-sm.txt b/opal/mca/btl/sm/help-mpi-btl-sm.txt index 601d3e46315..3cb288cd0da 100644 --- a/opal/mca/btl/sm/help-mpi-btl-sm.txt +++ b/opal/mca/btl/sm/help-mpi-btl-sm.txt @@ -7,9 +7,9 @@ # Copyright (c) 2012-2013 Los Alamos National Security, LLC. # All rights reserved. # $COPYRIGHT$ -# +# # Additional copyrights may follow -# +# # $HEADER$ # # This is the US/English help file for Open MPI's shared memory support. diff --git a/opal/mca/btl/smcuda/Makefile.am b/opal/mca/btl/smcuda/Makefile.am index 271c07c2f84..077ddc792f4 100644 --- a/opal/mca/btl/smcuda/Makefile.am +++ b/opal/mca/btl/smcuda/Makefile.am @@ -5,16 +5,16 @@ # Copyright (c) 2004-2009 The University of Tennessee and The University # of Tennessee Research Foundation. All rights # reserved. -# Copyright (c) 2004-2009 High Performance Computing Center Stuttgart, +# Copyright (c) 2004-2009 High Performance Computing Center Stuttgart, # University of Stuttgart. All rights reserved. # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. # Copyright (c) 2009-2014 Cisco Systems, Inc. All rights reserved. # Copyright (c) 2012 NVIDIA Corporation. All rights reserved. # $COPYRIGHT$ -# +# # Additional copyrights may follow -# +# # $HEADER$ # @@ -27,7 +27,7 @@ libmca_btl_smcuda_la_sources = \ btl_smcuda_endpoint.h \ btl_smcuda_fifo.h \ btl_smcuda_frag.c \ - btl_smcuda_frag.h + btl_smcuda_frag.h # Make the output library in this directory, and name it either # mca__.la (for DSO builds) or libmca__.la diff --git a/opal/mca/btl/smcuda/README b/opal/mca/btl/smcuda/README index b978f762107..8b9bcf91296 100644 --- a/opal/mca/btl/smcuda/README +++ b/opal/mca/btl/smcuda/README @@ -34,7 +34,7 @@ with the OB1 PML and uses flags that it sends in the BML layer. OTHER CONSIDERATIONS CUDA IPC is not necessarily supported by all GPUs on a node. In NUMA -nodes, CUDA IPC may only work between GPUs that are not connected +nodes, CUDA IPC may only work between GPUs that are not connected over the IOH. In addition, we want to check for CUDA IPC support lazily, when the first GPU access occurs, rather than during MPI_Init() time. This complicates the design. diff --git a/opal/mca/btl/smcuda/btl_smcuda.c b/opal/mca/btl/smcuda/btl_smcuda.c index c4930ab8097..8be1a4de53c 100644 --- a/opal/mca/btl/smcuda/btl_smcuda.c +++ b/opal/mca/btl/smcuda/btl_smcuda.c @@ -12,12 +12,13 @@ * All rights reserved. * Copyright (c) 2006-2007 Voltaire. All rights reserved. * Copyright (c) 2009-2012 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2010-2015 Los Alamos National Security, LLC. - * All rights reserved. + * Copyright (c) 2010-2016 Los Alamos National Security, LLC. All rights + * reserved. * Copyright (c) 2012-2015 NVIDIA Corporation. All rights reserved. * Copyright (c) 2012 Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2014 Research Organization for Information Science * and Technology (RIST). All rights reserved. + * Copyright (c) 2015 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -52,18 +53,13 @@ #include "opal/datatype/opal_convertor.h" #include "opal/mca/btl/btl.h" +#include "opal/mca/common/sm/common_sm_mpool.h" + #if OPAL_CUDA_SUPPORT #include "opal/mca/common/cuda/common_cuda.h" #endif /* OPAL_CUDA_SUPPORT */ #include "opal/mca/mpool/base/base.h" -#include "opal/mca/mpool/sm/mpool_sm.h" - -#if OPAL_ENABLE_FT_CR == 1 -#include "opal/mca/crs/base/base.h" -#include "opal/util/basename.h" -#include "orte/mca/sstore/sstore.h" -#include "opal/runtime/opal_cr.h" -#endif +#include "opal/mca/rcache/base/base.h" #include "btl_smcuda.h" #include "btl_smcuda_endpoint.h" @@ -96,7 +92,7 @@ mca_btl_smcuda_t mca_btl_smcuda = { .btl_sendi = mca_btl_smcuda_sendi, .btl_dump = mca_btl_smcuda_dump, .btl_register_error = mca_btl_smcuda_register_error_cb, - .btl_ft_event = mca_btl_smcuda_ft_event + .btl_ft_event = NULL } }; @@ -121,7 +117,7 @@ static void *mpool_calloc(size_t nmemb, size_t size) size_t bsize = nmemb * size; mca_mpool_base_module_t *mpool = mca_btl_smcuda_component.sm_mpool; - buf = mpool->mpool_alloc(mpool, bsize, opal_cache_line_size, 0, NULL); + buf = mpool->mpool_alloc(mpool, bsize, opal_cache_line_size, 0); if (NULL == buf) return NULL; @@ -132,7 +128,7 @@ static void *mpool_calloc(size_t nmemb, size_t size) static int setup_mpool_base_resources(mca_btl_smcuda_component_t *comp_ptr, - mca_mpool_base_resources_t *out_res) + mca_common_sm_mpool_resources_t *out_res) { int rc = OPAL_SUCCESS; int fd = -1; @@ -227,14 +223,13 @@ smcuda_btl_first_time_init(mca_btl_smcuda_t *smcuda_btl, size_t length, length_payload; sm_fifo_t *my_fifos; int my_mem_node, num_mem_nodes, i, rc; - mca_mpool_base_resources_t *res = NULL; + mca_common_sm_mpool_resources_t *res = NULL; mca_btl_smcuda_component_t* m = &mca_btl_smcuda_component; /* Assume we don't have hwloc support and fill in dummy info */ mca_btl_smcuda_component.mem_node = my_mem_node = 0; mca_btl_smcuda_component.num_mem_nodes = num_mem_nodes = 1; -#if OPAL_HAVE_HWLOC /* If we have hwloc support, then get accurate information */ if (NULL != opal_hwloc_topology) { i = opal_hwloc_base_get_nbobjs_by_type(opal_hwloc_topology, @@ -284,7 +279,6 @@ smcuda_btl_first_time_init(mca_btl_smcuda_t *smcuda_btl, } } } -#endif if (NULL == (res = calloc(1, sizeof(*res)))) { return OPAL_ERR_OUT_OF_RESOURCE; @@ -298,15 +292,14 @@ smcuda_btl_first_time_init(mca_btl_smcuda_t *smcuda_btl, /* Disable memory binding, because each MPI process will claim pages in the * mpool for their local NUMA node */ res->mem_node = -1; + res->allocator = mca_btl_smcuda_component.allocator; if (OPAL_SUCCESS != (rc = setup_mpool_base_resources(m, res))) { free(res); return rc; } /* now that res is fully populated, create the thing */ - mca_btl_smcuda_component.sm_mpools[0] = - mca_mpool_base_module_create(mca_btl_smcuda_component.sm_mpool_name, - smcuda_btl, res); + mca_btl_smcuda_component.sm_mpools[0] = common_sm_mpool_create (res); /* Sanity check to ensure that we found it */ if (NULL == mca_btl_smcuda_component.sm_mpools[0]) { free(res); @@ -336,7 +329,7 @@ smcuda_btl_first_time_init(mca_btl_smcuda_t *smcuda_btl, #if OPAL_CUDA_SUPPORT /* Register the entire shared memory region with the CUDA library which will * force it to be pinned. This aproach was chosen as there is no way for this - * local process to know which parts of the memory are being utilized by a + * local process to know which parts of the memory are being utilized by a * remote process. */ opal_output_verbose(10, opal_btl_base_framework.framework_output, "btl:smcuda: CUDA cuMemHostRegister address=%p, size=%d", @@ -346,10 +339,9 @@ smcuda_btl_first_time_init(mca_btl_smcuda_t *smcuda_btl, /* Create a local memory pool that sends handles to the remote * side. Note that the res argument is not really used, but * needed to satisfy function signature. */ - smcuda_btl->super.btl_mpool = mca_mpool_base_module_create("gpusm", - smcuda_btl, - res); - if (NULL == smcuda_btl->super.btl_mpool) { + mca_rcache_base_resources_t rcache_res; + smcuda_btl->rcache = mca_rcache_base_module_create("gpusm", smcuda_btl, &rcache_res); + if (NULL == smcuda_btl->rcache) { return OPAL_ERR_OUT_OF_RESOURCE; } #endif /* OPAL_CUDA_SUPPORT */ @@ -432,7 +424,7 @@ smcuda_btl_first_time_init(mca_btl_smcuda_t *smcuda_btl, mca_btl_smcuda_component.sm_free_list_inc, mca_btl_smcuda_component.sm_mpool, 0, NULL, NULL, NULL); if ( OPAL_SUCCESS != i ) - return i; + return i; mca_btl_smcuda_component.num_outstanding_frags = 0; @@ -480,16 +472,9 @@ create_sm_endpoint(int local_proc, struct opal_proc_t *proc) } #endif #if OPAL_CUDA_SUPPORT - { - mca_mpool_base_resources_t resources; /* unused, but needed */ - - /* Create a remote memory pool on the endpoint. Note that the resources - * argument is just to satisfy the function signature. The rcuda mpool - * actually takes care of filling in the resources. */ - ep->mpool = mca_mpool_base_module_create("rgpusm", - NULL, - &resources); - } + /* Create a remote memory pool on the endpoint. The rgpusm component + * does not take any resources. They are filled in internally. */ + ep->rcache = mca_rcache_base_module_create ("rgpusm", NULL, NULL); #endif /* OPAL_CUDA_SUPPORT */ return ep; } @@ -508,7 +493,7 @@ int mca_btl_smcuda_add_procs( bool have_connected_peer = false; char **bases; /* for easy access to the mpool_sm_module */ - mca_mpool_sm_module_t *sm_mpool_modp = NULL; + mca_common_sm_mpool_module_t *sm_mpool_modp = NULL; /* initializion */ @@ -585,7 +570,7 @@ int mca_btl_smcuda_add_procs( } bases = mca_btl_smcuda_component.shm_bases; - sm_mpool_modp = (mca_mpool_sm_module_t *)mca_btl_smcuda_component.sm_mpool; + sm_mpool_modp = (mca_common_sm_mpool_module_t *)mca_btl_smcuda_component.sm_mpool; /* initialize own FIFOs */ /* @@ -694,6 +679,13 @@ int mca_btl_smcuda_del_procs( struct opal_proc_t **procs, struct mca_btl_base_endpoint_t **peers) { + for (size_t i = 0 ; i < nprocs ; ++i) { + if (peers[i]->rcache) { + mca_rcache_base_module_destroy (peers[i]->rcache); + peers[i]->rcache = NULL; + } + } + return OPAL_SUCCESS; } @@ -885,6 +877,13 @@ int mca_btl_smcuda_sendi( struct mca_btl_base_module_t* btl, if (mca_common_cuda_enabled && (IPC_INIT == endpoint->ipcstate) && mca_btl_smcuda_component.use_cuda_ipc) { mca_btl_smcuda_send_cuda_ipc_request(btl, endpoint); } + /* We do not want to use this path when we have CUDA IPC support */ + if ((convertor->flags & CONVERTOR_CUDA) && (IPC_ACKED == endpoint->ipcstate)) { + if (NULL != descriptor) { + *descriptor = mca_btl_smcuda_alloc(btl, endpoint, order, payload_size+header_size, flags); + } + return OPAL_ERR_RESOURCE_BUSY; + } #endif /* OPAL_CUDA_SUPPORT */ /* this check should be unnecessary... turn into an assertion? */ @@ -1003,15 +1002,17 @@ static struct mca_btl_base_registration_handle_t *mca_btl_smcuda_register_mem ( struct mca_btl_base_module_t* btl, struct mca_btl_base_endpoint_t *endpoint, void *base, size_t size, uint32_t flags) { - mca_mpool_common_cuda_reg_t *reg; - int mpool_flags = 0; + mca_btl_smcuda_t *smcuda_module = (mca_btl_smcuda_t *) btl; + mca_rcache_common_cuda_reg_t *reg; + int access_flags = flags & MCA_BTL_REG_FLAG_ACCESS_ANY; + int rcache_flags = 0; if (MCA_BTL_REG_FLAG_CUDA_GPU_MEM & flags) { - mpool_flags |= MCA_MPOOL_FLAGS_CUDA_GPU_MEM; + rcache_flags |= MCA_RCACHE_FLAGS_CUDA_GPU_MEM; } - btl->btl_mpool->mpool_register (btl->btl_mpool, base, size, mpool_flags, - (mca_mpool_base_registration_t **) ®); + smcuda_module->rcache->rcache_register (smcuda_module->rcache, base, size, rcache_flags, + access_flags, (mca_rcache_base_registration_t **) ®); if (OPAL_UNLIKELY(NULL == reg)) { return NULL; } @@ -1022,10 +1023,11 @@ static struct mca_btl_base_registration_handle_t *mca_btl_smcuda_register_mem ( static int mca_btl_smcuda_deregister_mem (struct mca_btl_base_module_t* btl, struct mca_btl_base_registration_handle_t *handle) { - mca_mpool_common_cuda_reg_t *reg = (mca_mpool_common_cuda_reg_t *) - ((intptr_t) handle - offsetof (mca_mpool_common_cuda_reg_t, data)); + mca_btl_smcuda_t *smcuda_module = (mca_btl_smcuda_t *) btl; + mca_rcache_common_cuda_reg_t *reg = (mca_rcache_common_cuda_reg_t *) + ((intptr_t) handle - offsetof (mca_rcache_common_cuda_reg_t, data)); - btl->btl_mpool->mpool_deregister (btl->btl_mpool, ®->base); + smcuda_module->rcache->rcache_deregister (smcuda_module->rcache, ®->base); return OPAL_SUCCESS; } @@ -1036,8 +1038,8 @@ int mca_btl_smcuda_get_cuda (struct mca_btl_base_module_t *btl, struct mca_btl_base_registration_handle_t *remote_handle, size_t size, int flags, int order, mca_btl_base_rdma_completion_fn_t cbfunc, void *cbcontext, void *cbdata) { - mca_mpool_common_cuda_reg_t rget_reg; - mca_mpool_common_cuda_reg_t *reg_ptr = &rget_reg; + mca_rcache_common_cuda_reg_t rget_reg; + mca_rcache_common_cuda_reg_t *reg_ptr = &rget_reg; int rc, done; void *remote_memory_address; size_t offset; @@ -1059,11 +1061,11 @@ int mca_btl_smcuda_get_cuda (struct mca_btl_base_module_t *btl, frag->base.des_cbdata = cbdata; frag->base.des_context = cbcontext; frag->local_handle = local_handle; - + /* Set to 0 for debugging since it is a list item but I am not * intializing it properly and it is annoying to see all the * garbage in the debugger. */ - + memset(&rget_reg, 0, sizeof(rget_reg)); memcpy(&rget_reg.data.memHandle, remote_handle->reg_data.memHandle, sizeof(remote_handle->reg_data.memHandle)); @@ -1080,15 +1082,16 @@ int mca_btl_smcuda_get_cuda (struct mca_btl_base_module_t *btl, * remote memory which may lie somewhere in the middle. This is taken care of * a few lines down. Note that we hand in the peer rank just for debugging * support. */ - rc = ep->mpool->mpool_register(ep->mpool, remote_handle->reg_data.memh_seg_addr.pval, - remote_handle->reg_data.memh_seg_len, ep->peer_smp_rank, - (mca_mpool_base_registration_t **)®_ptr); + rc = ep->rcache->rcache_register (ep->rcache, remote_handle->reg_data.memh_seg_addr.pval, + remote_handle->reg_data.memh_seg_len, ep->peer_smp_rank, + MCA_RCACHE_ACCESS_LOCAL_WRITE, + (mca_rcache_base_registration_t **)®_ptr); if (OPAL_SUCCESS != rc) { opal_output(0, "Failed to register remote memory, rc=%d", rc); return rc; } - frag->registration = (mca_mpool_base_registration_t *)reg_ptr; + frag->registration = (mca_rcache_base_registration_t *)reg_ptr; frag->endpoint = ep; /* The registration has given us back the memory block that this @@ -1105,7 +1108,7 @@ int mca_btl_smcuda_get_cuda (struct mca_btl_base_module_t *btl, /* The remote side posted an IPC event to make sure we do not start our * copy until IPC event completes. This is to ensure that the data being sent * is available in the sender's GPU buffer. Therefore, do a stream synchronize - * on the IPC event that we received. Note that we pull it from + * on the IPC event that we received. Note that we pull it from * rget_reg, not reg_ptr, as we do not cache the event. */ mca_common_wait_stream_synchronize(&rget_reg); @@ -1167,7 +1170,7 @@ static void mca_btl_smcuda_send_cuda_ipc_request(struct mca_btl_base_module_t* b if ( mca_btl_smcuda_component.num_outstanding_frags * 2 > (int) mca_btl_smcuda_component.fifo_size ) { mca_btl_smcuda_component_progress(); } - + if (0 != (res = mca_common_cuda_get_device(&mydevnum))) { opal_output(0, "Cannot determine device. IPC cannot be set."); endpoint->ipcstate = IPC_BAD; @@ -1222,76 +1225,18 @@ void mca_btl_smcuda_dump(struct mca_btl_base_module_t* btl, mca_btl_smcuda_frag_t* frag; mca_btl_base_err("BTL SM %p endpoint %p [smp_rank %d] [peer_rank %d]\n", - (void*) btl, (void*) endpoint, + (void*) btl, (void*) endpoint, endpoint->my_smp_rank, endpoint->peer_smp_rank); if( NULL != endpoint ) { for(item = opal_list_get_first(&endpoint->pending_sends); - item != opal_list_get_end(&endpoint->pending_sends); + item != opal_list_get_end(&endpoint->pending_sends); item = opal_list_get_next(item)) { frag = (mca_btl_smcuda_frag_t*)item; mca_btl_base_err(" | frag %p size %lu (hdr frag %p len %lu rank %d tag %d)\n", (void*) frag, frag->size, (void*) frag->hdr->frag, - frag->hdr->len, frag->hdr->my_smp_rank, + frag->hdr->len, frag->hdr->my_smp_rank, frag->hdr->tag); } } } -#if OPAL_ENABLE_FT_CR == 0 -int mca_btl_smcuda_ft_event(int state) { - return OPAL_SUCCESS; -} -#else -int mca_btl_smcuda_ft_event(int state) { - /* Notify mpool */ - if( NULL != mca_btl_smcuda_component.sm_mpool && - NULL != mca_btl_smcuda_component.sm_mpool->mpool_ft_event) { - mca_btl_smcuda_component.sm_mpool->mpool_ft_event(state); - } - - if(OPAL_CRS_CHECKPOINT == state) { - if( NULL != mca_btl_smcuda_component.sm_seg ) { - /* On restart we need the old file names to exist (not necessarily - * contain content) so the CRS component does not fail when searching - * for these old file handles. The restart procedure will make sure - * these files get cleaned up appropriately. - */ - /* Disabled to get FT code compiled again - * TODO: FIXIT soon - orte_sstore.set_attr(orte_sstore_handle_current, - SSTORE_METADATA_LOCAL_TOUCH, - mca_btl_smcuda_component.sm_seg->shmem_ds.seg_name); - */ - } - } - else if(OPAL_CRS_CONTINUE == state) { - if (opal_cr_continue_like_restart) { - if( NULL != mca_btl_smcuda_component.sm_seg ) { - /* Add shared memory file */ - opal_crs_base_cleanup_append(mca_btl_smcuda_component.sm_seg->shmem_ds.seg_name, false); - } - - /* Clear this so we force the module to re-init the sm files */ - mca_btl_smcuda_component.sm_mpool = NULL; - } - } - else if(OPAL_CRS_RESTART == state || - OPAL_CRS_RESTART_PRE == state) { - if( NULL != mca_btl_smcuda_component.sm_seg ) { - /* Add shared memory file */ - opal_crs_base_cleanup_append(mca_btl_smcuda_component.sm_seg->shmem_ds.seg_name, false); - } - - /* Clear this so we force the module to re-init the sm files */ - mca_btl_smcuda_component.sm_mpool = NULL; - } - else if(OPAL_CRS_TERM == state ) { - ; - } - else { - ; - } - - return OPAL_SUCCESS; -} -#endif /* OPAL_ENABLE_FT_CR */ diff --git a/opal/mca/btl/smcuda/btl_smcuda.h b/opal/mca/btl/smcuda/btl_smcuda.h index b94b6e7fc6a..c2f32b0f103 100644 --- a/opal/mca/btl/smcuda/btl_smcuda.h +++ b/opal/mca/btl/smcuda/btl_smcuda.h @@ -13,7 +13,7 @@ * Copyright (c) 2006-2007 Voltaire. All rights reserved. * Copyright (c) 2009-2010 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2010-2015 Los Alamos National Security, LLC. - * All rights reserved. + * All rights reserved. * Copyright (c) 2012-2013 NVIDIA Corporation. All rights reserved. * $COPYRIGHT$ * @@ -31,9 +31,7 @@ #include #include #include -#ifdef HAVE_STDINT_H #include -#endif /* HAVE_STDINT_H */ #ifdef HAVE_SCHED_H #include #endif /* HAVE_SCHED_H */ @@ -83,23 +81,23 @@ BEGIN_C_DECLS struct sm_fifo_t { /* This queue pointer is used only by the heads. */ - volatile void **queue; + volatile void **queue; char pad0[SM_CACHE_LINE_PAD - sizeof(void **)]; /* This lock is used by the heads. */ - opal_atomic_lock_t head_lock; + opal_atomic_lock_t head_lock; char pad1[SM_CACHE_LINE_PAD - sizeof(opal_atomic_lock_t)]; /* This index is used by the head holding the head lock. */ - volatile int head; + volatile int head; char pad2[SM_CACHE_LINE_PAD - sizeof(int)]; /* This mask is used "read only" by all processes. */ - unsigned int mask; + unsigned int mask; char pad3[SM_CACHE_LINE_PAD - sizeof(int)]; /* The following are used only by the tail. */ volatile void **queue_recv; opal_atomic_lock_t tail_lock; volatile int tail; int num_to_clear; - int lazy_free; + int lazy_free; char pad4[SM_CACHE_LINE_PAD - sizeof(void **) - sizeof(opal_atomic_lock_t) - sizeof(int) * 3]; @@ -165,7 +163,7 @@ struct mca_btl_smcuda_component_t { int num_pending_sends; /**< total number on all of my pending-send queues */ int mem_node; int num_mem_nodes; - + #if OPAL_ENABLE_PROGRESS_THREADS == 1 char sm_fifo_path[PATH_MAX]; /**< path to fifo used to signal this process */ int sm_fifo_fd; /**< file descriptor corresponding to opened fifo */ @@ -208,6 +206,8 @@ struct mca_btl_smcuda_component_t { int use_cuda_ipc; int use_cuda_ipc_same_gpu; #endif /* OPAL_CUDA_SUPPORT */ + unsigned long mpool_min_size; + char *allocator; }; typedef struct mca_btl_smcuda_component_t mca_btl_smcuda_component_t; OPAL_MODULE_DECLSPEC extern mca_btl_smcuda_component_t mca_btl_smcuda_component; @@ -219,7 +219,7 @@ struct mca_btl_smcuda_t { mca_btl_base_module_t super; /**< base BTL interface */ bool btl_inited; /**< flag indicating if btl has been inited */ mca_btl_base_module_error_cb_fn_t error_cb; - + mca_rcache_base_module_t *rcache; }; typedef struct mca_btl_smcuda_t mca_btl_smcuda_t; OPAL_MODULE_DECLSPEC extern mca_btl_smcuda_t mca_btl_smcuda; @@ -256,7 +256,7 @@ static inline int sm_fifo_init(int fifo_size, mca_mpool_base_module_t *mpool, /* allocate the queue in the receiver's address space */ fifo->queue_recv = (volatile void **)mpool->mpool_alloc( - mpool, sizeof(void *) * qsize, opal_cache_line_size, 0, NULL); + mpool, sizeof(void *) * qsize, opal_cache_line_size, 0); if(NULL == fifo->queue_recv) { return OPAL_ERR_OUT_OF_RESOURCE; } @@ -517,13 +517,6 @@ extern void mca_btl_smcuda_dump(struct mca_btl_base_module_t* btl, struct mca_btl_base_endpoint_t* endpoint, int verbose); -/** - * Fault Tolerance Event Notification Function - * @param state Checkpoint Stae - * @return OPAL_SUCCESS or failure status - */ -int mca_btl_smcuda_ft_event(int state); - #if OPAL_ENABLE_PROGRESS_THREADS == 1 void mca_btl_smcuda_component_event_thread(opal_object_t*); #endif diff --git a/opal/mca/btl/smcuda/btl_smcuda_component.c b/opal/mca/btl/smcuda/btl_smcuda_component.c index 38c4418adfb..705c658484d 100644 --- a/opal/mca/btl/smcuda/btl_smcuda_component.c +++ b/opal/mca/btl/smcuda/btl_smcuda_component.c @@ -12,8 +12,8 @@ * All rights reserved. * Copyright (c) 2006-2007 Voltaire. All rights reserved. * Copyright (c) 2009-2010 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2010-2015 Los Alamos National Security, LLC. - * All rights reserved. + * Copyright (c) 2010-2016 Los Alamos National Security, LLC. All rights + * reserved. * Copyright (c) 2011-2015 NVIDIA Corporation. All rights reserved. * Copyright (c) 2014 Intel, Inc. All rights reserved. * $COPYRIGHT$ @@ -27,9 +27,7 @@ #ifdef HAVE_UNISTD_H #include #endif /* HAVE_UNISTD_H */ -#ifdef HAVE_STRING_H #include -#endif /* HAVE_STRING_H */ #ifdef HAVE_FCNTL_H #include #endif /* HAVE_FCNTL_H */ @@ -57,9 +55,6 @@ #if OPAL_CUDA_SUPPORT #include "opal/mca/common/cuda/common_cuda.h" #endif /* OPAL_CUDA_SUPPORT */ -#if OPAL_ENABLE_FT_CR == 1 -#include "opal/runtime/opal_cr.h" -#endif #include "btl_smcuda.h" #include "btl_smcuda_frag.h" @@ -143,6 +138,13 @@ static int mca_btl_smcuda_component_verify(void) { static int smcuda_register(void) { /* register SM component parameters */ + mca_btl_smcuda_component.mpool_min_size = 134217728; + (void) mca_base_component_var_register(&mca_btl_smcuda_component.super.btl_version, "min_size", + "Minimum size of the common/sm mpool shared memory file", + MCA_BASE_VAR_TYPE_UNSIGNED_LONG, NULL, 0, 0, + OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_READONLY, + &mca_btl_smcuda_component.mpool_min_size); + mca_btl_smcuda_param_register_int("free_list_num", 8, OPAL_INFO_LVL_5, &mca_btl_smcuda_component.sm_free_list_num); mca_btl_smcuda_param_register_int("free_list_max", -1, OPAL_INFO_LVL_5, &mca_btl_smcuda_component.sm_free_list_max); mca_btl_smcuda_param_register_int("free_list_inc", 64, OPAL_INFO_LVL_5, &mca_btl_smcuda_component.sm_free_list_inc); @@ -158,6 +160,12 @@ static int smcuda_register(void) /* default number of extra procs to allow for future growth */ mca_btl_smcuda_param_register_int("sm_extra_procs", 0, OPAL_INFO_LVL_9, &mca_btl_smcuda_component.sm_extra_procs); + mca_btl_smcuda_component.allocator = "bucket"; + (void) mca_base_component_var_register (&mca_btl_smcuda_component.super.btl_version, "allocator", + "Name of allocator component to use for btl/smcuda allocations", + MCA_BASE_VAR_TYPE_STRING, NULL, 0, 0, OPAL_INFO_LVL_9, + MCA_BASE_VAR_SCOPE_LOCAL, &mca_btl_smcuda_component.allocator); + #if OPAL_CUDA_SUPPORT /* Lower priority when CUDA support is not requested */ if (opal_cuda_support) { @@ -175,7 +183,7 @@ static int smcuda_register(void) #endif /* OPAL_CUDA_SUPPORT */ mca_btl_smcuda.super.btl_eager_limit = 4*1024; mca_btl_smcuda.super.btl_rndv_eager_limit = 4*1024; - mca_btl_smcuda.super.btl_max_send_size = 128*1024; + mca_btl_smcuda.super.btl_max_send_size = 32*1024; mca_btl_smcuda.super.btl_rdma_pipeline_send_length = 64*1024; mca_btl_smcuda.super.btl_rdma_pipeline_frag_size = 64*1024; mca_btl_smcuda.super.btl_min_rdma_pipeline_size = 64*1024; @@ -187,7 +195,18 @@ static int smcuda_register(void) /* Call the BTL based to register its MCA params */ mca_btl_base_param_register(&mca_btl_smcuda_component.super.btl_version, &mca_btl_smcuda.super); - +#if OPAL_CUDA_SUPPORT + /* If user has not set the value, then set to the defalt */ + if (0 == mca_btl_smcuda.super.btl_cuda_max_send_size) { + mca_btl_smcuda.super.btl_cuda_max_send_size = 128*1024; + } + /* If user has not set the value, then set to magic number which will be converted to the minimum + * size needed to fit the PML header (see pml_ob1.c) */ + if (0 == mca_btl_smcuda.super.btl_cuda_eager_limit) { + mca_btl_smcuda.super.btl_cuda_eager_limit = SIZE_MAX; /* magic number */ + } + mca_common_cuda_register_mca_variables(); +#endif /* OPAL_CUDA_SUPPORT */ return mca_btl_smcuda_component_verify(); } @@ -216,6 +235,17 @@ static int mca_btl_smcuda_component_open(void) mca_btl_smcuda_component.max_frag_size = mca_btl_smcuda.super.btl_max_send_size; mca_btl_smcuda_component.eager_limit = mca_btl_smcuda.super.btl_eager_limit; +#if OPAL_CUDA_SUPPORT + /* Possibly adjust max_frag_size if the cuda size is bigger */ + if (mca_btl_smcuda.super.btl_cuda_max_send_size > mca_btl_smcuda.super.btl_max_send_size) { + mca_btl_smcuda_component.max_frag_size = mca_btl_smcuda.super.btl_cuda_max_send_size; + } + opal_output_verbose(10, opal_btl_base_framework.framework_output, + "btl: smcuda: cuda_max_send_size=%d, max_send_size=%d, max_frag_size=%d", + (int)mca_btl_smcuda.super.btl_cuda_max_send_size, (int)mca_btl_smcuda.super.btl_max_send_size, + (int)mca_btl_smcuda_component.max_frag_size); +#endif /* OPAL_CUDA_SUPPORT */ + /* initialize objects */ OBJ_CONSTRUCT(&mca_btl_smcuda_component.sm_lock, opal_mutex_t); OBJ_CONSTRUCT(&mca_btl_smcuda_component.sm_frags_eager, opal_free_list_t); @@ -257,17 +287,7 @@ static int mca_btl_smcuda_component_close(void) * to it are gone - no error checking, since we want all procs * to call this, so that in an abnormal termination scenario, * this file will still get cleaned up */ -#if OPAL_ENABLE_FT_CR == 1 - /* Only unlink the file if we are *not* restarting - * If we are restarting the file will be unlinked at a later time. - */ - if(OPAL_CR_STATUS_RESTART_PRE != opal_cr_checkpointing_state && - OPAL_CR_STATUS_RESTART_POST != opal_cr_checkpointing_state ) { - unlink(mca_btl_smcuda_component.sm_seg->shmem_ds.seg_name); - } -#else unlink(mca_btl_smcuda_component.sm_seg->shmem_ds.seg_name); -#endif OBJ_RELEASE(mca_btl_smcuda_component.sm_seg); } @@ -297,7 +317,7 @@ static int mca_btl_smcuda_component_close(void) return return_value; } -/* +/* * Returns the number of processes on the node. */ static inline int @@ -346,41 +366,6 @@ create_and_attach(mca_btl_smcuda_component_t *comp_ptr, return OPAL_SUCCESS; } -/* - * SKG - I'm not happy with this, but I can't figure out a better way of - * finding the sm mpool's minimum size 8-|. The way I see it. This BTL only - * uses the sm mpool, so maybe this isn't so bad... - * - * The problem is the we need to size the mpool resources at sm BTL component - * init. That means we need to know the mpool's minimum size at create. - */ -static int -get_min_mpool_size(mca_btl_smcuda_component_t *comp_ptr, - size_t *out_size) -{ - const char *type_name = "mpool"; - const char *param_name = "min_size"; - const mca_base_var_storage_t *min_size; - int id = 0; - - if (0 > (id = mca_base_var_find("ompi", type_name, comp_ptr->sm_mpool_name, - param_name))) { - opal_output(0, "mca_base_var_find: failure looking for %s_%s_%s\n", - type_name, comp_ptr->sm_mpool_name, param_name); - return OPAL_ERR_NOT_FOUND; - } - - if (OPAL_SUCCESS != mca_base_var_get_value(id, &min_size, NULL, NULL)) { - opal_output(0, "mca_base_var_get_value failure\n"); - return OPAL_ERROR; - } - - /* the min_size variable is an unsigned long long */ - *out_size = (size_t) min_size->ullval; - - return OPAL_SUCCESS; -} - static int get_mpool_res_size(int32_t max_procs, size_t *out_res_size) @@ -501,21 +486,18 @@ create_rndv_file(mca_btl_smcuda_component_t *comp_ptr, mca_common_sm_module_t *tmp_modp = NULL; if (MCA_BTL_SM_RNDV_MOD_MPOOL == type) { - size_t min_size = 0; /* get the segment size for the sm mpool. */ if (OPAL_SUCCESS != (rc = get_mpool_res_size(comp_ptr->sm_max_procs, &size))) { /* rc is already set */ goto out; } - /* do we need to update the size based on the sm mpool's min size? */ - if (OPAL_SUCCESS != (rc = get_min_mpool_size(comp_ptr, &min_size))) { - goto out; - } + /* update size if less than required minimum */ - if (size < min_size) { - size = min_size; + if (size < mca_btl_smcuda_component.mpool_min_size) { + size = mca_btl_smcuda_component.mpool_min_size; } + /* we only need the shmem_ds info at this point. initilization will be * completed in the mpool module code. the idea is that we just need this * info so we can populate the rndv file (or modex when we have it). */ @@ -784,8 +766,8 @@ static void btl_smcuda_control(mca_btl_base_module_t* btl, smcuda_btl->error_cb(&smcuda_btl->super, MCA_BTL_ERROR_FLAGS_ADD_CUDA_IPC, ep_proc, (char *)&mca_btl_smcuda_component.cuda_ipc_output); opal_output_verbose(10, mca_btl_smcuda_component.cuda_ipc_output, - "Sending CUDA IPC ACK: myrank=%d, mydev=%d, peerrank=%d, peerdev=%d", - endpoint->my_smp_rank, mydevnum, endpoint->peer_smp_rank, + "Sending CUDA IPC ACK: myrank=%d, mydev=%d, peerrank=%d, peerdev=%d", + endpoint->my_smp_rank, mydevnum, endpoint->peer_smp_rank, ctrlhdr.cudev); mca_btl_smcuda_send_cuda_ipc_ack(btl, endpoint, 1); } @@ -969,22 +951,22 @@ void mca_btl_smcuda_component_event_thread(opal_object_t* thread) } #endif -void btl_smcuda_process_pending_sends(struct mca_btl_base_endpoint_t *ep) -{ - btl_smcuda_pending_send_item_t *si; - int rc; +void btl_smcuda_process_pending_sends(struct mca_btl_base_endpoint_t *ep) +{ + btl_smcuda_pending_send_item_t *si; + int rc; while ( 0 < opal_list_get_size(&ep->pending_sends) ) { /* Note that we access the size of ep->pending_sends unlocked - as it doesn't really matter if the result is wrong as + as it doesn't really matter if the result is wrong as opal_list_remove_first is called with a lock and we handle it not finding an item to process */ OPAL_THREAD_LOCK(&ep->endpoint_lock); - si = (btl_smcuda_pending_send_item_t*)opal_list_remove_first(&ep->pending_sends); + si = (btl_smcuda_pending_send_item_t*)opal_list_remove_first(&ep->pending_sends); OPAL_THREAD_UNLOCK(&ep->endpoint_lock); if(NULL == si) return; /* Another thread got in before us. Thats ok. */ - + OPAL_THREAD_ADD32(&mca_btl_smcuda_component.num_pending_sends, -1); MCA_BTL_SMCUDA_FIFO_WRITE(ep, ep->my_smp_rank, ep->peer_smp_rank, si->data, @@ -995,7 +977,7 @@ void btl_smcuda_process_pending_sends(struct mca_btl_base_endpoint_t *ep) if ( OPAL_SUCCESS != rc ) return; } -} +} int mca_btl_smcuda_component_progress(void) { @@ -1141,8 +1123,8 @@ int mca_btl_smcuda_component_progress(void) OPAL_SUCCESS); if(frag->registration != NULL) { - frag->endpoint->mpool->mpool_deregister(frag->endpoint->mpool, - (mca_mpool_base_registration_t*)frag->registration); + frag->endpoint->rcache->rcache_deregister (frag->endpoint->rcache, + (mca_rcache_base_registration_t*)frag->registration); frag->registration = NULL; MCA_BTL_SMCUDA_FRAG_RETURN(frag); } diff --git a/opal/mca/btl/smcuda/btl_smcuda_endpoint.h b/opal/mca/btl/smcuda/btl_smcuda_endpoint.h index cead5ec7a5c..1dfb359e17f 100644 --- a/opal/mca/btl/smcuda/btl_smcuda_endpoint.h +++ b/opal/mca/btl/smcuda/btl_smcuda_endpoint.h @@ -1,3 +1,4 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ /* * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana * University Research and Technology @@ -11,6 +12,8 @@ * All rights reserved. * Copyright (c) 2006-2007 Voltaire. All rights reserved. * Copyright (c) 2012 NVIDIA Corporation. All rights reserved. + * Copyright (c) 2015 Los Alamos National Security, LLC. All rights + * reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -35,7 +38,7 @@ struct mca_btl_base_endpoint_t { int peer_smp_rank; /**< My peer's SMP process rank. Used for accessing * SMP specfic data structures. */ #if OPAL_CUDA_SUPPORT - mca_mpool_base_module_t *mpool; /**< mpool for remotely registered memory */ + mca_rcache_base_module_t *rcache; /**< rcache for remotely registered memory */ #endif /* OPAL_CUDA_SUPPORT */ #if OPAL_ENABLE_PROGRESS_THREADS == 1 int fifo_fd; /**< pipe/fifo used to signal endpoint that data is queued */ diff --git a/opal/mca/btl/smcuda/btl_smcuda_frag.h b/opal/mca/btl/smcuda/btl_smcuda_frag.h index c570d65bdcd..78cc9c39012 100644 --- a/opal/mca/btl/smcuda/btl_smcuda_frag.h +++ b/opal/mca/btl/smcuda/btl_smcuda_frag.h @@ -54,7 +54,7 @@ typedef struct mca_btl_smcuda_hdr_t mca_btl_smcuda_hdr_t; #if OPAL_CUDA_SUPPORT struct mca_btl_base_registration_handle_t { - mca_mpool_common_cuda_reg_data_t reg_data; + mca_rcache_common_cuda_reg_data_t reg_data; }; #endif @@ -63,9 +63,9 @@ struct mca_btl_smcuda_segment_t { #if OPAL_CUDA_SUPPORT uint8_t key[128]; /* 64 bytes for CUDA mem handle, 64 bytes for CUDA event handle */ /** Address of the entire memory handle */ - opal_ptr_t memh_seg_addr; + opal_ptr_t memh_seg_addr; /** Length in bytes of entire memory handle */ - uint32_t memh_seg_len; + uint32_t memh_seg_len; #endif /* OPAL_CUDA_SUPPORT */ }; typedef struct mca_btl_smcuda_segment_t mca_btl_smcuda_segment_t; @@ -78,7 +78,7 @@ struct mca_btl_smcuda_frag_t { mca_btl_base_segment_t segment; struct mca_btl_base_endpoint_t *endpoint; #if OPAL_CUDA_SUPPORT - struct mca_mpool_base_registration_t *registration; + struct mca_rcache_base_registration_t *registration; struct mca_btl_base_registration_handle_t *local_handle; #endif /* OPAL_CUDA_SUPPORT */ size_t size; diff --git a/opal/mca/btl/smcuda/configure.m4 b/opal/mca/btl/smcuda/configure.m4 index d56cb178580..016f691944f 100644 --- a/opal/mca/btl/smcuda/configure.m4 +++ b/opal/mca/btl/smcuda/configure.m4 @@ -4,7 +4,7 @@ # of Tennessee Research Foundation. All rights # reserved. # Copyright (c) 2009-2010 Cisco Systems, Inc. All rights reserved. -# Copyright (c) 2012-2013 NVIDIA Corporation. All rights reserved. +# Copyright (c) 2012-2015 NVIDIA Corporation. All rights reserved. # $COPYRIGHT$ # # Additional copyrights may follow @@ -21,8 +21,8 @@ AC_DEFUN([MCA_opal_btl_smcuda_CONFIG],[ # make sure that CUDA-aware checks have been done AC_REQUIRE([OPAL_CHECK_CUDA]) - # Only build if CUDA 4.1 support is available - AS_IF([test "x$CUDA_SUPPORT_41" = "x1"], + # Only build if CUDA support is available + AS_IF([test "x$CUDA_SUPPORT" = "x1"], [$1], [$2]) diff --git a/opal/mca/btl/smcuda/help-mpi-btl-smcuda.txt b/opal/mca/btl/smcuda/help-mpi-btl-smcuda.txt index 99fdfb40b27..dcba31b824f 100644 --- a/opal/mca/btl/smcuda/help-mpi-btl-smcuda.txt +++ b/opal/mca/btl/smcuda/help-mpi-btl-smcuda.txt @@ -6,9 +6,9 @@ # Copyright (c) 2006-2010 Cisco Systems, Inc. All rights reserved. # Copyright (c) 2012-2014 NVIDIA Corporation. All rights reserved. # $COPYRIGHT$ -# +# # Additional copyrights may follow -# +# # $HEADER$ # # This is the US/English help file for Open MPI's smcuda BTL. diff --git a/opal/mca/btl/tcp/Makefile.am b/opal/mca/btl/tcp/Makefile.am index cc61d81fea3..0d806cb6b65 100644 --- a/opal/mca/btl/tcp/Makefile.am +++ b/opal/mca/btl/tcp/Makefile.am @@ -5,16 +5,18 @@ # Copyright (c) 2004-2005 The University of Tennessee and The University # of Tennessee Research Foundation. All rights # reserved. -# Copyright (c) 2004-2009 High Performance Computing Center Stuttgart, +# Copyright (c) 2004-2009 High Performance Computing Center Stuttgart, # University of Stuttgart. All rights reserved. # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. # Copyright (c) 2009-2014 Cisco Systems, Inc. All rights reserved. # Copyright (c) 2013 NVIDIA Corporation. All rights reserved. +# Copyright (c) 2015 Los Alamos National Security, LLC. +# All rights reserved. # $COPYRIGHT$ -# +# # Additional copyrights may follow -# +# # $HEADER$ # @@ -31,9 +33,7 @@ sources = \ btl_tcp_frag.h \ btl_tcp_hdr.h \ btl_tcp_proc.c \ - btl_tcp_proc.h \ - btl_tcp_ft.c \ - btl_tcp_ft.h + btl_tcp_proc.h # Make the output library in this directory, and name it either # mca__.la (for DSO builds) or libmca__.la diff --git a/opal/mca/btl/tcp/btl_tcp.c b/opal/mca/btl/tcp/btl_tcp.c index 979c65d1a8b..b83f0e26bd4 100644 --- a/opal/mca/btl/tcp/btl_tcp.c +++ b/opal/mca/btl/tcp/btl_tcp.c @@ -6,17 +6,20 @@ * Copyright (c) 2004-2014 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. - * Copyright (c) 2006-2014 Los Alamos National Security, LLC. All rights - * reserved. + * Copyright (c) 2006-2015 Los Alamos National Security, LLC. All rights + * reserved. + * Copyright (c) 2016 Research Organization for Information Science + * and Technology (RIST). All rights reserved. + * Copyright (c) 2016 Intel, Inc. All rights reserved. * * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -24,12 +27,13 @@ #include #include "opal/class/opal_bitmap.h" #include "opal/mca/btl/btl.h" -#include "opal/datatype/opal_convertor.h" -#include "opal/mca/mpool/base/base.h" -#include "opal/mca/mpool/mpool.h" +#include "opal/datatype/opal_convertor.h" +#include "opal/mca/mpool/base/base.h" +#include "opal/mca/mpool/mpool.h" +#include "opal/mca/btl/base/btl_base_error.h" #include "btl_tcp.h" -#include "btl_tcp_frag.h" +#include "btl_tcp_frag.h" #include "btl_tcp_proc.h" #include "btl_tcp_endpoint.h" @@ -45,7 +49,7 @@ mca_btl_tcp_module_t mca_btl_tcp_module = { .btl_send = mca_btl_tcp_send, .btl_put = mca_btl_tcp_put, .btl_dump = mca_btl_base_dump, - .btl_ft_event = mca_btl_tcp_ft_event + .btl_ft_event = NULL } }; @@ -53,10 +57,10 @@ mca_btl_tcp_module_t mca_btl_tcp_module = { * */ -int mca_btl_tcp_add_procs( struct mca_btl_base_module_t* btl, - size_t nprocs, - struct opal_proc_t **procs, - struct mca_btl_base_endpoint_t** peers, +int mca_btl_tcp_add_procs( struct mca_btl_base_module_t* btl, + size_t nprocs, + struct opal_proc_t **procs, + struct mca_btl_base_endpoint_t** peers, opal_bitmap_t* reachable ) { mca_btl_tcp_module_t* tcp_btl = (mca_btl_tcp_module_t*)btl; @@ -72,6 +76,7 @@ int mca_btl_tcp_add_procs( struct mca_btl_base_module_t* btl, struct opal_proc_t* opal_proc = procs[i]; mca_btl_tcp_proc_t* tcp_proc; mca_btl_base_endpoint_t* tcp_endpoint; + bool existing_found = false; /* Do not create loopback TCP connections */ if( my_proc == opal_proc ) { @@ -83,35 +88,50 @@ int mca_btl_tcp_add_procs( struct mca_btl_base_module_t* btl, } /* - * Check to make sure that the peer has at least as many interface - * addresses exported as we are trying to use. If not, then + * Check to make sure that the peer has at least as many interface + * addresses exported as we are trying to use. If not, then * don't bind this BTL instance to the proc. */ OPAL_THREAD_LOCK(&tcp_proc->proc_lock); - /* The btl_proc datastructure is shared by all TCP BTL - * instances that are trying to reach this destination. - * Cache the peer instance on the btl_proc. - */ - tcp_endpoint = OBJ_NEW(mca_btl_tcp_endpoint_t); - if(NULL == tcp_endpoint) { - OPAL_THREAD_UNLOCK(&tcp_proc->proc_lock); - return OPAL_ERR_OUT_OF_RESOURCE; + for (uint32_t j = 0 ; j < (uint32_t)tcp_proc->proc_endpoint_count ; ++j) { + tcp_endpoint = tcp_proc->proc_endpoints[j]; + if (tcp_endpoint->endpoint_btl == tcp_btl) { + existing_found = true; + break; + } } - tcp_endpoint->endpoint_btl = tcp_btl; - rc = mca_btl_tcp_proc_insert(tcp_proc, tcp_endpoint); - if(rc != OPAL_SUCCESS) { - OPAL_THREAD_UNLOCK(&tcp_proc->proc_lock); - OBJ_RELEASE(tcp_endpoint); - continue; + if (!existing_found) { + /* The btl_proc datastructure is shared by all TCP BTL + * instances that are trying to reach this destination. + * Cache the peer instance on the btl_proc. + */ + tcp_endpoint = OBJ_NEW(mca_btl_tcp_endpoint_t); + if(NULL == tcp_endpoint) { + OPAL_THREAD_UNLOCK(&tcp_proc->proc_lock); + return OPAL_ERR_OUT_OF_RESOURCE; + } + + tcp_endpoint->endpoint_btl = tcp_btl; + rc = mca_btl_tcp_proc_insert(tcp_proc, tcp_endpoint); + if(rc != OPAL_SUCCESS) { + OPAL_THREAD_UNLOCK(&tcp_proc->proc_lock); + OBJ_RELEASE(tcp_endpoint); + continue; + } + + opal_list_append(&tcp_btl->tcp_endpoints, (opal_list_item_t*)tcp_endpoint); } - opal_bitmap_set_bit(reachable, i); OPAL_THREAD_UNLOCK(&tcp_proc->proc_lock); + + if (NULL != reachable) { + opal_bitmap_set_bit(reachable, i); + } + peers[i] = tcp_endpoint; - opal_list_append(&tcp_btl->tcp_endpoints, (opal_list_item_t*)tcp_endpoint); /* we increase the count of MPI users of the event library once per peer, so that we are used until we aren't @@ -122,14 +142,14 @@ int mca_btl_tcp_add_procs( struct mca_btl_base_module_t* btl, return OPAL_SUCCESS; } -int mca_btl_tcp_del_procs(struct mca_btl_base_module_t* btl, - size_t nprocs, - struct opal_proc_t **procs, +int mca_btl_tcp_del_procs(struct mca_btl_base_module_t* btl, + size_t nprocs, + struct opal_proc_t **procs, struct mca_btl_base_endpoint_t ** endpoints) { mca_btl_tcp_module_t* tcp_btl = (mca_btl_tcp_module_t*)btl; size_t i; - for(i=0; iendpoint_proc != mca_btl_tcp_proc_local()) { opal_list_remove_item(&tcp_btl->tcp_endpoints, (opal_list_item_t*)tcp_endpoint); @@ -156,22 +176,22 @@ mca_btl_base_descriptor_t* mca_btl_tcp_alloc( uint32_t flags) { mca_btl_tcp_frag_t* frag = NULL; - - if(size <= btl->btl_eager_limit) { - MCA_BTL_TCP_FRAG_ALLOC_EAGER(frag); - } else if (size <= btl->btl_max_send_size) { - MCA_BTL_TCP_FRAG_ALLOC_MAX(frag); + + if(size <= btl->btl_eager_limit) { + MCA_BTL_TCP_FRAG_ALLOC_EAGER(frag); + } else if (size <= btl->btl_max_send_size) { + MCA_BTL_TCP_FRAG_ALLOC_MAX(frag); } if( OPAL_UNLIKELY(NULL == frag) ) { return NULL; } - + frag->segments[0].seg_len = size; frag->segments[0].seg_addr.pval = frag+1; frag->base.des_segments = frag->segments; frag->base.des_segment_count = 1; - frag->base.des_flags = flags; + frag->base.des_flags = flags; frag->base.order = MCA_BTL_NO_ORDER; frag->btl = (mca_btl_tcp_module_t*)btl; return (mca_btl_base_descriptor_t*)frag; @@ -183,12 +203,12 @@ mca_btl_base_descriptor_t* mca_btl_tcp_alloc( */ int mca_btl_tcp_free( - struct mca_btl_base_module_t* btl, - mca_btl_base_descriptor_t* des) + struct mca_btl_base_module_t* btl, + mca_btl_base_descriptor_t* des) { - mca_btl_tcp_frag_t* frag = (mca_btl_tcp_frag_t*)des; - MCA_BTL_TCP_FRAG_RETURN(frag); - return OPAL_SUCCESS; + mca_btl_tcp_frag_t* frag = (mca_btl_tcp_frag_t*)des; + MCA_BTL_TCP_FRAG_RETURN(frag); + return OPAL_SUCCESS; } /** @@ -223,7 +243,7 @@ mca_btl_base_descriptor_t* mca_btl_tcp_prepare_src( if (max_data+reserve <= btl->btl_eager_limit) { MCA_BTL_TCP_FRAG_ALLOC_EAGER(frag); } else { - /* + /* * otherwise pack as much data as we can into a fragment * that is the max send size. */ @@ -244,13 +264,13 @@ mca_btl_base_descriptor_t* mca_btl_tcp_prepare_src( } iov.iov_len = max_data; iov.iov_base = (IOVBASE_TYPE*)(((unsigned char*)(frag->segments[0].seg_addr.pval)) + reserve); - + rc = opal_convertor_pack(convertor, &iov, &iov_count, &max_data ); if( OPAL_UNLIKELY(rc < 0) ) { mca_btl_tcp_free(btl, &frag->base); return NULL; } - + frag->segments[0].seg_len += max_data; } else { @@ -287,11 +307,11 @@ mca_btl_base_descriptor_t* mca_btl_tcp_prepare_src( int mca_btl_tcp_send( struct mca_btl_base_module_t* btl, struct mca_btl_base_endpoint_t* endpoint, - struct mca_btl_base_descriptor_t* descriptor, + struct mca_btl_base_descriptor_t* descriptor, mca_btl_base_tag_t tag ) { - mca_btl_tcp_module_t* tcp_btl = (mca_btl_tcp_module_t*) btl; - mca_btl_tcp_frag_t* frag = (mca_btl_tcp_frag_t*)descriptor; + mca_btl_tcp_module_t* tcp_btl = (mca_btl_tcp_module_t*) btl; + mca_btl_tcp_frag_t* frag = (mca_btl_tcp_frag_t*)descriptor; int i; frag->btl = tcp_btl; @@ -334,7 +354,7 @@ int mca_btl_tcp_put (mca_btl_base_module_t *btl, struct mca_btl_base_endpoint_t mca_btl_base_registration_handle_t *remote_handle, size_t size, int flags, int order, mca_btl_base_rdma_completion_fn_t cbfunc, void *cbcontext, void *cbdata) { - mca_btl_tcp_module_t* tcp_btl = (mca_btl_tcp_module_t*) btl; + mca_btl_tcp_module_t* tcp_btl = (mca_btl_tcp_module_t*) btl; mca_btl_tcp_frag_t *frag = NULL; int i; @@ -399,7 +419,7 @@ int mca_btl_tcp_get (mca_btl_base_module_t *btl, struct mca_btl_base_endpoint_t mca_btl_base_registration_handle_t *remote_handle, size_t size, int flags, int order, mca_btl_base_rdma_completion_fn_t cbfunc, void *cbcontext, void *cbdata) { - mca_btl_tcp_module_t* tcp_btl = (mca_btl_tcp_module_t*) btl; + mca_btl_tcp_module_t* tcp_btl = (mca_btl_tcp_module_t*) btl; mca_btl_tcp_frag_t* frag = NULL; int rc; @@ -457,7 +477,7 @@ int mca_btl_tcp_get (mca_btl_base_module_t *btl, struct mca_btl_base_endpoint_t int mca_btl_tcp_finalize(struct mca_btl_base_module_t* btl) { - mca_btl_tcp_module_t* tcp_btl = (mca_btl_tcp_module_t*) btl; + mca_btl_tcp_module_t* tcp_btl = (mca_btl_tcp_module_t*) btl; opal_list_item_t* item; for( item = opal_list_remove_first(&tcp_btl->tcp_endpoints); item != NULL; @@ -469,3 +489,34 @@ int mca_btl_tcp_finalize(struct mca_btl_base_module_t* btl) free(tcp_btl); return OPAL_SUCCESS; } + +void mca_btl_tcp_dump(struct mca_btl_base_module_t* base_btl, + struct mca_btl_base_endpoint_t* endpoint, + int verbose) +{ + mca_btl_tcp_module_t* btl = (mca_btl_tcp_module_t*)base_btl; + mca_btl_base_err("%s TCP %p kernel_id %d\n" +#if MCA_BTL_TCP_STATISTICS + " | statistics: sent %lu recv %lu\n" +#endif /* MCA_BTL_TCP_STATISTICS */ + " | latency %u bandwidth %u\n", + OPAL_NAME_PRINT(OPAL_PROC_MY_NAME), (void*)btl, btl->tcp_ifkindex, +#if MCA_BTL_TCP_STATISTICS + btl->tcp_bytes_sent, btl->btl_bytes_recv, +#endif /* MCA_BTL_TCP_STATISTICS */ + btl->super.btl_latency, btl->super.btl_bandwidth); +#if OPAL_ENABLE_DEBUG && WANT_PEER_DUMP + if( NULL != endpoint ) { + MCA_BTL_TCP_ENDPOINT_DUMP(10, endpoint, false, "TCP"); + + } else if( verbose ) { + opal_list_item_t *item; + + for(item = opal_list_get_first(&btl->tcp_endpoints); + item != opal_list_get_end(&btl->tcp_endpoints); + item = opal_list_get_next(item)) { + MCA_BTL_TCP_ENDPOINT_DUMP(10, (mca_btl_base_endpoint_t*)item, false, "TCP"); + } + } +#endif /* OPAL_ENABLE_DEBUG && WANT_PEER_DUMP */ +} diff --git a/opal/mca/btl/tcp/btl_tcp.h b/opal/mca/btl/tcp/btl_tcp.h index 178f4b56271..3ad2f08f0fa 100644 --- a/opal/mca/btl/tcp/btl_tcp.h +++ b/opal/mca/btl/tcp/btl_tcp.h @@ -3,22 +3,22 @@ * Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. - * Copyright (c) 2004-2012 The University of Tennessee and The University + * Copyright (c) 2004-2016 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2010-2011 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2014 Research Organization for Information Science + * Copyright (c) 2014-2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. * Copyright (c) 2014-2015 Los Alamos National Security, LLC. All rights * reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ /** @@ -37,17 +37,80 @@ #ifdef HAVE_NETINET_IN_H #include #endif +#ifdef HAVE_UNISTD_H +#include +#endif /* Open MPI includes */ #include "opal/mca/event/event.h" +#include "opal/class/opal_free_list.h" #include "opal/mca/btl/btl.h" #include "opal/mca/btl/base/base.h" #include "opal/mca/mpool/mpool.h" #include "opal/class/opal_hash_table.h" +#include "opal/util/fd.h" #define MCA_BTL_TCP_STATISTICS 0 BEGIN_C_DECLS +#if (HAVE_PTHREAD_H == 1) +#define MCA_BTL_TCP_SUPPORT_PROGRESS_THREAD 1 +#else +#define MCA_BTL_TCP_SUPPORT_PROGRESS_THREAD 0 +#endif /* (HAVE_PTHREAD_H == 1) */ + +extern opal_event_base_t* mca_btl_tcp_event_base; + +#define MCA_BTL_TCP_COMPLETE_FRAG_SEND(frag) \ + do { \ + int btl_ownership = (frag->base.des_flags & MCA_BTL_DES_FLAGS_BTL_OWNERSHIP); \ + if( frag->base.des_flags & MCA_BTL_DES_SEND_ALWAYS_CALLBACK ) { \ + frag->base.des_cbfunc(&frag->endpoint->endpoint_btl->super, frag->endpoint, \ + &frag->base, frag->rc); \ + } \ + if( btl_ownership ) { \ + MCA_BTL_TCP_FRAG_RETURN(frag); \ + } \ + } while (0) +#define MCA_BTL_TCP_RECV_TRIGGER_CB(frag) \ + do { \ + if( MCA_BTL_TCP_HDR_TYPE_SEND == frag->hdr.type ) { \ + mca_btl_active_message_callback_t* reg; \ + reg = mca_btl_base_active_message_trigger + frag->hdr.base.tag; \ + reg->cbfunc(&frag->endpoint->endpoint_btl->super, frag->hdr.base.tag, &frag->base, reg->cbdata); \ + } \ + } while (0) + +#if MCA_BTL_TCP_SUPPORT_PROGRESS_THREAD +extern opal_list_t mca_btl_tcp_ready_frag_pending_queue; +extern opal_mutex_t mca_btl_tcp_ready_frag_mutex; +extern int mca_btl_tcp_pipe_to_progress[2]; +extern int mca_btl_tcp_progress_thread_trigger; + +#define MCA_BTL_TCP_CRITICAL_SECTION_ENTER(name) \ + opal_mutex_atomic_lock((name)) +#define MCA_BTL_TCP_CRITICAL_SECTION_LEAVE(name) \ + opal_mutex_atomic_unlock((name)) + +#define MCA_BTL_TCP_ACTIVATE_EVENT(event, value) \ + do { \ + if(0 < mca_btl_tcp_progress_thread_trigger) { \ + opal_event_t* _event = (opal_event_t*)(event); \ + (void) opal_fd_write( mca_btl_tcp_pipe_to_progress[1], sizeof(opal_event_t*), \ + &_event); \ + } \ + else { \ + opal_event_add(event, (value)); \ + } \ + } while (0) +#else +#define MCA_BTL_TCP_CRITICAL_SECTION_ENTER(name) +#define MCA_BTL_TCP_CRITICAL_SECTION_LEAVE(name) +#define MCA_BTL_TCP_ACTIVATE_EVENT(event, value) \ + do { \ + opal_event_add(event, (value)); \ + } while (0) +#endif /* MCA_BTL_TCP_SUPPORT_PROGRESS_THREAD */ /** * TCP BTL component. @@ -66,6 +129,7 @@ struct mca_btl_tcp_component_t { int tcp_endpoint_cache; /**< amount of cache on each endpoint */ opal_proc_table_t tcp_procs; /**< hash table of tcp proc structures */ opal_mutex_t tcp_lock; /**< lock for accessing module state */ + opal_list_t tcp_events; opal_event_t tcp_recv_event; /**< recv event for IPv4 listen socket */ int tcp_listen_sd; /**< IPv4 listen socket for incoming connection requests */ @@ -92,6 +156,14 @@ struct mca_btl_tcp_component_t { opal_free_list_t tcp_frag_max; opal_free_list_t tcp_frag_user; + int tcp_enable_progress_thread; /** Support for tcp progress thread flag */ + +#if MCA_BTL_TCP_SUPPORT_PROGRESS_THREAD + opal_event_t tcp_recv_thread_async_event; + opal_mutex_t tcp_frag_eager_mutex; + opal_mutex_t tcp_frag_max_mutex; + opal_mutex_t tcp_frag_user_mutex; +#endif /* Do we want to use TCP_NODELAY? */ int tcp_not_use_nodelay; @@ -99,7 +171,7 @@ struct mca_btl_tcp_component_t { * that are not found? */ bool report_all_unfound_interfaces; -}; +}; typedef struct mca_btl_tcp_component_t mca_btl_tcp_component_t; OPAL_MODULE_DECLSPEC extern mca_btl_tcp_component_t mca_btl_tcp_component; @@ -122,7 +194,7 @@ struct mca_btl_tcp_module_t { size_t tcp_bytes_recv; size_t tcp_send_handler; #endif -}; +}; typedef struct mca_btl_tcp_module_t mca_btl_tcp_module_t; extern mca_btl_tcp_module_t mca_btl_tcp_module; @@ -130,28 +202,21 @@ extern mca_btl_tcp_module_t mca_btl_tcp_module; /** * TCP component initialization. - * + * * @param num_btl_modules (OUT) Number of BTLs returned in BTL array. * @param allow_multi_user_threads (OUT) Flag indicating wether BTL supports user threads (TRUE) * @param have_hidden_threads (OUT) Flag indicating wether BTL uses threads (TRUE) */ extern mca_btl_base_module_t** mca_btl_tcp_component_init( - int *num_btl_modules, + int *num_btl_modules, bool allow_multi_user_threads, bool have_hidden_threads ); -/** - * TCP component progress. - */ -extern int mca_btl_tcp_component_progress(void); - - - /** * Cleanup any resources held by the BTL. - * + * * @param btl BTL instance. * @return OPAL_SUCCESS or error status on failure. */ @@ -163,14 +228,14 @@ extern int mca_btl_tcp_finalize( /** * PML->BTL notification of change in the process list. - * + * * @param btl (IN) * @param nprocs (IN) Number of processes * @param procs (IN) Set of processes * @param peers (OUT) Set of (optional) peer addressing info. * @param peers (IN/OUT) Set of processes that are reachable via this BTL. * @return OPAL_SUCCESS or error status on failure. - * + * */ extern int mca_btl_tcp_add_procs( @@ -250,7 +315,7 @@ extern mca_btl_base_descriptor_t* mca_btl_tcp_alloc( struct mca_btl_base_endpoint_t* endpoint, uint8_t order, size_t size, - uint32_t flags); + uint32_t flags); /** @@ -261,9 +326,9 @@ extern mca_btl_base_descriptor_t* mca_btl_tcp_alloc( */ extern int mca_btl_tcp_free( - struct mca_btl_base_module_t* btl, - mca_btl_base_descriptor_t* des); - + struct mca_btl_base_module_t* btl, + mca_btl_base_descriptor_t* des); + /** * Prepare a descriptor for send/rdma using the supplied @@ -276,7 +341,7 @@ extern int mca_btl_tcp_free( * @param endpoint (IN) BTL peer addressing * @param convertor (IN) Data type convertor * @param reserve (IN) Additional bytes requested by upper layer to precede user data - * @param size (IN/OUT) Number of bytes to prepare (IN), number of bytes actually prepared (OUT) + * @param size (IN/OUT) Number of bytes to prepare (IN), number of bytes actually prepared (OUT) */ mca_btl_base_descriptor_t* mca_btl_tcp_prepare_src( @@ -289,13 +354,10 @@ mca_btl_base_descriptor_t* mca_btl_tcp_prepare_src( uint32_t flags ); - -/** - * Fault Tolerance Event Notification Function - * @param state Checkpoint Stae - * @return OPAL_SUCCESS or failure status - */ -int mca_btl_tcp_ft_event(int state); +extern void +mca_btl_tcp_dump(struct mca_btl_base_module_t* btl, + struct mca_btl_base_endpoint_t* endpoint, + int verbose); END_C_DECLS #endif diff --git a/opal/mca/btl/tcp/btl_tcp_addr.h b/opal/mca/btl/tcp/btl_tcp_addr.h index d9de87eec0c..aa53535cce0 100644 --- a/opal/mca/btl/tcp/btl_tcp_addr.h +++ b/opal/mca/btl/tcp/btl_tcp_addr.h @@ -5,14 +5,14 @@ * Copyright (c) 2004-2007 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ /** @@ -53,13 +53,13 @@ struct mca_btl_tcp_addr_t { uint32_t _pad[3]; } _addr__inet; } _union_inet; - } addr_inet; + } addr_inet; #endif in_port_t addr_port; /**< listen port */ uint16_t addr_ifkindex; /**< remote interface index assigned with this address */ unsigned short addr_inuse; /**< local meaning only */ - uint8_t addr_family; /**< AF_INET or AF_INET6 */ + uint8_t addr_family; /**< AF_INET or AF_INET6 */ }; typedef struct mca_btl_tcp_addr_t mca_btl_tcp_addr_t; diff --git a/opal/mca/btl/tcp/btl_tcp_component.c b/opal/mca/btl/tcp/btl_tcp_component.c index 8f5fed354bd..142de0ed76e 100644 --- a/opal/mca/btl/tcp/btl_tcp_component.c +++ b/opal/mca/btl/tcp/btl_tcp_component.c @@ -3,10 +3,10 @@ * Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. - * Copyright (c) 2004-2014 The University of Tennessee and The University + * Copyright (c) 2004-2016 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. @@ -14,15 +14,15 @@ * Copyright (c) 2008 Sun Microsystems, Inc. All rights reserved. * Copyright (c) 2009 Oak Ridge National Laboratory * Copyright (c) 2012-2015 Los Alamos National Security, LLC. All rights - * reserved. + * reserved. * Copyright (c) 2013-2015 NVIDIA Corporation. All rights reserved. - * Copyright (c) 2014 Intel, Inc. All rights reserved. + * Copyright (c) 2014-2015 Intel, Inc. All rights reserved. * Copyright (c) 2014-2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ * */ @@ -61,30 +61,46 @@ #include "opal/util/argv.h" #include "opal/util/net.h" #include "opal/util/proc.h" +#include "opal/util/net.h" +#include "opal/util/fd.h" #include "opal/util/show_help.h" #include "opal/constants.h" #include "opal/mca/btl/btl.h" -#include "opal/mca/btl/base/base.h" -#include "opal/mca/mpool/base/base.h" +#include "opal/mca/btl/base/base.h" +#include "opal/mca/mpool/base/base.h" #include "opal/mca/btl/base/btl_base_error.h" #include "opal/mca/pmix/pmix.h" +#include "opal/threads/threads.h" +#include "opal/constants.h" +#include "opal/mca/btl/btl.h" +#include "opal/mca/btl/base/base.h" +#include "opal/mca/btl/base/btl_base_error.h" #include "btl_tcp.h" #include "btl_tcp_addr.h" #include "btl_tcp_proc.h" #include "btl_tcp_frag.h" -#include "btl_tcp_endpoint.h" +#include "btl_tcp_endpoint.h" #if OPAL_CUDA_SUPPORT #include "opal/mca/common/cuda/common_cuda.h" #endif /* OPAL_CUDA_SUPPORT */ -/* +/* * Local functions */ static int mca_btl_tcp_component_register(void); static int mca_btl_tcp_component_open(void); static int mca_btl_tcp_component_close(void); +opal_event_base_t* mca_btl_tcp_event_base = NULL; +#if MCA_BTL_TCP_SUPPORT_PROGRESS_THREAD +int mca_btl_tcp_progress_thread_trigger = -1; +int mca_btl_tcp_pipe_to_progress[2] = { -1, -1 }; +static opal_thread_t mca_btl_tcp_progress_thread = { { 0 } }; +opal_list_t mca_btl_tcp_ready_frag_pending_queue = { { 0 } }; +opal_mutex_t mca_btl_tcp_ready_frag_mutex = OPAL_MUTEX_STATIC_INIT; +#endif /* MCA_BTL_TCP_SUPPORT_PROGRESS_THREAD */ + mca_btl_tcp_component_t mca_btl_tcp_component = { .super = { /* First, the mca_base_component_t struct containing meta information @@ -102,6 +118,7 @@ mca_btl_tcp_component_t mca_btl_tcp_component = { }, .btl_init = mca_btl_tcp_component_init, + .btl_progress = NULL, } }; @@ -110,7 +127,7 @@ mca_btl_tcp_component_t mca_btl_tcp_component = { */ static inline char* mca_btl_tcp_param_register_string( - const char* param_name, + const char* param_name, const char* help_string, const char* default_value, int level, @@ -125,7 +142,7 @@ static inline char* mca_btl_tcp_param_register_string( } static inline int mca_btl_tcp_param_register_int( - const char* param_name, + const char* param_name, const char* help_string, int default_value, int level, @@ -140,7 +157,7 @@ static inline int mca_btl_tcp_param_register_int( } static inline unsigned int mca_btl_tcp_param_register_uint( - const char* param_name, + const char* param_name, const char* help_string, unsigned int default_value, int level, @@ -165,8 +182,25 @@ struct mca_btl_tcp_event_t { }; typedef struct mca_btl_tcp_event_t mca_btl_tcp_event_t; -OBJ_CLASS_INSTANCE( mca_btl_tcp_event_t, opal_list_item_t, - NULL, NULL); +static void mca_btl_tcp_event_construct(mca_btl_tcp_event_t* event) +{ + MCA_BTL_TCP_CRITICAL_SECTION_ENTER(&mca_btl_tcp_component.tcp_lock); + opal_list_append(&mca_btl_tcp_component.tcp_events, &event->item); + MCA_BTL_TCP_CRITICAL_SECTION_LEAVE(&mca_btl_tcp_component.tcp_lock); +} + +static void mca_btl_tcp_event_destruct(mca_btl_tcp_event_t* event) +{ + MCA_BTL_TCP_CRITICAL_SECTION_ENTER(&mca_btl_tcp_component.tcp_lock); + opal_list_remove_item(&mca_btl_tcp_component.tcp_events, &event->item); + MCA_BTL_TCP_CRITICAL_SECTION_LEAVE(&mca_btl_tcp_component.tcp_lock); +} + +OBJ_CLASS_INSTANCE( + mca_btl_tcp_event_t, + opal_list_item_t, + mca_btl_tcp_event_construct, + mca_btl_tcp_event_destruct); /* @@ -207,7 +241,7 @@ static int mca_btl_tcp_component_register(void) /* register TCP component parameters */ mca_btl_tcp_param_register_uint("links", NULL, 1, OPAL_INFO_LVL_4, &mca_btl_tcp_component.tcp_num_links); mca_btl_tcp_param_register_string("if_include", "Comma-delimited list of devices and/or CIDR notation of networks to use for MPI communication (e.g., \"eth0,192.168.0.0/16\"). Mutually exclusive with btl_tcp_if_exclude.", "", OPAL_INFO_LVL_1, &mca_btl_tcp_component.tcp_if_include); - mca_btl_tcp_param_register_string("if_exclude", "Comma-delimited list of devices and/or CIDR notation of networks to NOT use for MPI communication -- all devices not matching these specifications will be used (e.g., \"eth0,192.168.0.0/16\"). If set to a non-default value, it is mutually exclusive with btl_tcp_if_include.", + mca_btl_tcp_param_register_string("if_exclude", "Comma-delimited list of devices and/or CIDR notation of networks to NOT use for MPI communication -- all devices not matching these specifications will be used (e.g., \"eth0,192.168.0.0/16\"). If set to a non-default value, it is mutually exclusive with btl_tcp_if_include.", "127.0.0.1/8,sppp", OPAL_INFO_LVL_1, &mca_btl_tcp_component.tcp_if_exclude); @@ -221,12 +255,13 @@ static int mca_btl_tcp_component_register(void) " used to reduce the number of syscalls, by replacing them with memcpy." " Every read will read the expected data plus the amount of the" " endpoint_cache", 30*1024, OPAL_INFO_LVL_4, &mca_btl_tcp_component.tcp_endpoint_cache); - mca_btl_tcp_param_register_int ("use_nagle", "Whether to use Nagle's algorithm or not (using Nagle's algorithm may increase short message latency)", 0, OPAL_INFO_LVL_4, &mca_btl_tcp_component.tcp_not_use_nodelay); - mca_btl_tcp_param_register_int( "port_min_v4", + mca_btl_tcp_param_register_int ("use_nagle", "Whether to use Nagle's algorithm or not (using Nagle's algorithm may increase short message latency)", + 0, OPAL_INFO_LVL_4, &mca_btl_tcp_component.tcp_not_use_nodelay); + mca_btl_tcp_param_register_int( "port_min_v4", "The minimum port where the TCP BTL will try to bind (default 1024)", 1024, OPAL_INFO_LVL_2, &mca_btl_tcp_component.tcp_port_min); - asprintf( &message, + asprintf( &message, "The number of ports where the TCP BTL will try to bind (default %d)." " This parameter together with the port min, define a range of ports" " where Open MPI will open sockets.", @@ -239,7 +274,7 @@ static int mca_btl_tcp_component_register(void) mca_btl_tcp_param_register_int( "port_min_v6", "The minimum port where the TCP BTL will try to bind (default 1024)", 1024, OPAL_INFO_LVL_2, & mca_btl_tcp_component.tcp6_port_min ); - asprintf( &message, + asprintf( &message, "The number of ports where the TCP BTL will try to bind (default %d)." " This parameter together with the port min, define a range of ports" " where Open MPI will open sockets.", @@ -250,6 +285,19 @@ static int mca_btl_tcp_component_register(void) free(message); #endif + /* Check if we should support async progress */ + mca_btl_tcp_param_register_int ("progress_thread", NULL, 0, OPAL_INFO_LVL_1, + &mca_btl_tcp_component.tcp_enable_progress_thread); +#if !defined(MCA_BTL_TCP_SUPPORT_PROGRESS_THREAD) + if( mca_btl_tcp_component.tcp_enable_progress_thread ) { + opal_show_help("help-mpi-btl-tcp.txt", + "unsuported progress thread", + true, "progress thread", + opal_process_info.nodename, + mca_btl_tcp_component.tcp_if_seq, + "Progress thread support compiled out"); + } +#endif /* !defined(MCA_BTL_TCP_SUPPORT_PROGRESS_THREAD) */ mca_btl_tcp_component.report_all_unfound_interfaces = false; (void) mca_base_component_var_register(&mca_btl_tcp_component.super.btl_version, "warn_all_unfound_interfaces", @@ -269,7 +317,8 @@ static int mca_btl_tcp_component_register(void) MCA_BTL_FLAGS_SEND_INPLACE | MCA_BTL_FLAGS_NEED_CSUM | MCA_BTL_FLAGS_NEED_ACK | - MCA_BTL_FLAGS_HETEROGENEOUS_RDMA; + MCA_BTL_FLAGS_HETEROGENEOUS_RDMA | + MCA_BTL_FLAGS_SEND; mca_btl_tcp_module.super.btl_bandwidth = 100; mca_btl_tcp_module.super.btl_latency = 100; @@ -296,18 +345,27 @@ static int mca_btl_tcp_component_open(void) mca_btl_tcp_component.tcp_num_btls=0; mca_btl_tcp_component.tcp_addr_count = 0; mca_btl_tcp_component.tcp_btls=NULL; - - /* initialize objects */ + + /* initialize objects */ OBJ_CONSTRUCT(&mca_btl_tcp_component.tcp_lock, opal_mutex_t); OBJ_CONSTRUCT(&mca_btl_tcp_component.tcp_procs, opal_proc_table_t); + OBJ_CONSTRUCT(&mca_btl_tcp_component.tcp_events, opal_list_t); OBJ_CONSTRUCT(&mca_btl_tcp_component.tcp_frag_eager, opal_free_list_t); OBJ_CONSTRUCT(&mca_btl_tcp_component.tcp_frag_max, opal_free_list_t); OBJ_CONSTRUCT(&mca_btl_tcp_component.tcp_frag_user, opal_free_list_t); opal_proc_table_init(&mca_btl_tcp_component.tcp_procs, 16, 256); +#if MCA_BTL_TCP_SUPPORT_PROGRESS_THREAD + OBJ_CONSTRUCT(&mca_btl_tcp_component.tcp_frag_eager_mutex, opal_mutex_t); + OBJ_CONSTRUCT(&mca_btl_tcp_component.tcp_frag_max_mutex, opal_mutex_t); + OBJ_CONSTRUCT(&mca_btl_tcp_component.tcp_frag_user_mutex, opal_mutex_t); + OBJ_CONSTRUCT(&mca_btl_tcp_ready_frag_mutex, opal_mutex_t); + OBJ_CONSTRUCT(&mca_btl_tcp_ready_frag_pending_queue, opal_list_t); +#endif /* MCA_BTL_TCP_SUPPORT_PROGRESS_THREAD */ + /* if_include and if_exclude need to be mutually exclusive */ - if (OPAL_SUCCESS != - mca_base_var_check_exclusive("ompi", + if (OPAL_SUCCESS != + mca_base_var_check_exclusive("opal", mca_btl_tcp_component.super.btl_version.mca_type_name, mca_btl_tcp_component.super.btl_version.mca_component_name, "if_include", @@ -318,7 +376,7 @@ static int mca_btl_tcp_component_open(void) "open" failing is not printed */ return OPAL_ERR_NOT_AVAILABLE; } - + return OPAL_SUCCESS; } @@ -329,9 +387,51 @@ static int mca_btl_tcp_component_open(void) static int mca_btl_tcp_component_close(void) { - if (NULL != mca_btl_tcp_component.tcp_btls) + opal_list_item_t *item; + +#if MCA_BTL_TCP_SUPPORT_PROGRESS_THREAD + /** + * If we have a progress thread we should shut it down before + * moving forward with the TCP tearing down process. + */ + if( (NULL != mca_btl_tcp_event_base) && + (mca_btl_tcp_event_base != opal_sync_event_base) ) { + /* Turn of the progress thread before moving forward */ + if( -1 != mca_btl_tcp_progress_thread_trigger ) { + void* ret = NULL; /* not currently used */ + + mca_btl_tcp_progress_thread_trigger = 0; + /* Let the progress thread know that we're going away */ + if( -1 != mca_btl_tcp_pipe_to_progress[1] ) { + close(mca_btl_tcp_pipe_to_progress[1]); + mca_btl_tcp_pipe_to_progress[1] = -1; + } + /* wait until the TCP progress thread completes */ + opal_thread_join(&mca_btl_tcp_progress_thread, &ret); + assert( -1 == mca_btl_tcp_progress_thread_trigger ); + } + opal_event_del(&mca_btl_tcp_component.tcp_recv_thread_async_event); + opal_event_base_free(mca_btl_tcp_event_base); + mca_btl_tcp_event_base = NULL; + + /* Close the remaining pipes */ + if( -1 != mca_btl_tcp_pipe_to_progress[0] ) { + close(mca_btl_tcp_pipe_to_progress[0]); + mca_btl_tcp_pipe_to_progress[0] = -1; + } + } + + OBJ_DESTRUCT(&mca_btl_tcp_component.tcp_frag_eager_mutex); + OBJ_DESTRUCT(&mca_btl_tcp_component.tcp_frag_max_mutex); + + OBJ_DESTRUCT(&mca_btl_tcp_ready_frag_mutex); + OBJ_DESTRUCT(&mca_btl_tcp_ready_frag_pending_queue); +#endif + + if (NULL != mca_btl_tcp_component.tcp_btls) { free(mca_btl_tcp_component.tcp_btls); - + } + if (mca_btl_tcp_component.tcp_listen_sd >= 0) { opal_event_del(&mca_btl_tcp_component.tcp_recv_event); CLOSE_THE_SOCKET(mca_btl_tcp_component.tcp_listen_sd); @@ -345,6 +445,14 @@ static int mca_btl_tcp_component_close(void) } #endif + /* remove all pending events. Do not lock the tcp_events list as + the event themselves will unregister during the destructor. */ + while( NULL != (item = opal_list_remove_first(&mca_btl_tcp_component.tcp_events)) ) { + mca_btl_tcp_event_t* event = (mca_btl_tcp_event_t*)item; + opal_event_del(&event->event); + OBJ_RELEASE(event); + } + /* release resources */ OBJ_DESTRUCT(&mca_btl_tcp_component.tcp_procs); OBJ_DESTRUCT(&mca_btl_tcp_component.tcp_frag_eager); @@ -448,7 +556,7 @@ static char **split_and_resolve(char **orig_str, char *name, bool reqd) str = strchr(argv[i], '/'); if (NULL == str) { opal_show_help("help-mpi-btl-tcp.txt", "invalid if_inexclude", - true, name, opal_process_info.nodename, + true, name, opal_process_info.nodename, tmp, "Invalid specification (missing \"/\")"); free(argv[i]); free(tmp); @@ -459,7 +567,7 @@ static char **split_and_resolve(char **orig_str, char *name, bool reqd) /* Now convert the IPv4 address */ ((struct sockaddr*) &argv_inaddr)->sa_family = AF_INET; - ret = inet_pton(AF_INET, argv[i], + ret = inet_pton(AF_INET, argv[i], &((struct sockaddr_in*) &argv_inaddr)->sin_addr); free(argv[i]); @@ -470,16 +578,16 @@ static char **split_and_resolve(char **orig_str, char *name, bool reqd) free(tmp); continue; } - opal_output_verbose(20, opal_btl_base_framework.framework_output, + opal_output_verbose(20, opal_btl_base_framework.framework_output, "btl: tcp: Searching for %s address+prefix: %s / %u", name, opal_net_get_hostname((struct sockaddr*) &argv_inaddr), argv_prefix); - + /* Go through all interfaces and see if we can find a match */ - for (if_index = opal_ifbegin(); if_index >= 0; + for (if_index = opal_ifbegin(); if_index >= 0; if_index = opal_ifnext(if_index)) { - opal_ifindextoaddr(if_index, + opal_ifindextoaddr(if_index, (struct sockaddr*) &if_inaddr, sizeof(if_inaddr)); if (opal_net_samenetwork((struct sockaddr*) &argv_inaddr, @@ -488,7 +596,7 @@ static char **split_and_resolve(char **orig_str, char *name, bool reqd) break; } } - + /* If we didn't find a match, keep trying */ if (if_index < 0) { if (reqd || mca_btl_tcp_component.report_all_unfound_interfaces) { @@ -503,7 +611,7 @@ static char **split_and_resolve(char **orig_str, char *name, bool reqd) /* We found a match; get the name and replace it in the argv */ opal_ifindextoname(if_index, if_name, sizeof(if_name)); - opal_output_verbose(20, opal_btl_base_framework.framework_output, + opal_output_verbose(20, opal_btl_base_framework.framework_output, "btl: tcp: Found match: %s (%s)", opal_net_get_hostname((struct sockaddr*) &if_inaddr), if_name); @@ -523,7 +631,7 @@ static char **split_and_resolve(char **orig_str, char *name, bool reqd) /* * Create a TCP BTL instance for either: * (1) all interfaces specified by the user - * (2) all available interfaces + * (2) all available interfaces * (3) all available interfaces except for those excluded by the user */ @@ -610,7 +718,7 @@ static int mca_btl_tcp_component_create_instances(void) goto cleanup; } - /* if the interface list was not specified by the user, create + /* if the interface list was not specified by the user, create * a BTL for each interface that was not excluded. */ exclude = split_and_resolve(&mca_btl_tcp_component.tcp_if_exclude, @@ -651,14 +759,42 @@ static int mca_btl_tcp_component_create_instances(void) return ret; } +#if MCA_BTL_TCP_SUPPORT_PROGRESS_THREAD +static void* mca_btl_tcp_progress_thread_engine(opal_object_t *obj) +{ + opal_thread_t* current_thread = (opal_thread_t*)obj; + + while( 1 == (*((int*)current_thread->t_arg)) ) { + opal_event_loop(mca_btl_tcp_event_base, OPAL_EVLOOP_ONCE); + } + (*((int*)current_thread->t_arg)) = -1; + return NULL; +} + +static void mca_btl_tcp_component_event_async_handler(int fd, short unused, void *context) +{ + opal_event_t* event; + int rc; + + rc = read(fd, (void*)&event, sizeof(opal_event_t*)); + assert( fd == mca_btl_tcp_pipe_to_progress[0] ); + if( 0 == rc ) { + /* The main thread closed the pipe to trigger the shutdown procedure */ + opal_thread_t* current_thread = (opal_thread_t*)context; + (*((int*)current_thread->t_arg)) = 0; + } else { + opal_event_add(event, 0); + } +} +#endif + /* * Create a listen socket and bind to all interfaces */ static int mca_btl_tcp_component_create_listen(uint16_t af_family) { - int flags; - int sd; + int flags, sd, rc; struct sockaddr_storage inaddr; opal_socklen_t addrlen; @@ -677,17 +813,16 @@ static int mca_btl_tcp_component_create_listen(uint16_t af_family) #if OPAL_ENABLE_IPV6 { struct addrinfo hints, *res = NULL; - int error; memset (&hints, 0, sizeof(hints)); hints.ai_family = af_family; hints.ai_socktype = SOCK_STREAM; hints.ai_flags = AI_PASSIVE; - if ((error = getaddrinfo(NULL, "0", &hints, &res))) { + if ((rc = getaddrinfo(NULL, "0", &hints, &res))) { opal_output (0, - "mca_btl_tcp_create_listen: unable to resolve. %s\n", - gai_strerror (error)); + "mca_btl_tcp_create_listen: unable to resolve. %s\n", + gai_strerror (rc)); CLOSE_THE_SOCKET(sd); return OPAL_ERROR; } @@ -727,7 +862,7 @@ static int mca_btl_tcp_component_create_listen(uint16_t af_family) { int index, range, port; - + #if OPAL_ENABLE_IPV6 if (AF_INET6 == af_family) { range = mca_btl_tcp_component.tcp6_port_range; @@ -793,7 +928,7 @@ static int mca_btl_tcp_component_create_listen(uint16_t af_family) /* setup listen backlog to maximum allowed by kernel */ if(listen(sd, SOMAXCONN) < 0) { - BTL_ERROR(("listen() failed: %s (%d)", + BTL_ERROR(("listen() failed: %s (%d)", strerror(opal_socket_errno), opal_socket_errno)); CLOSE_THE_SOCKET(sd); return OPAL_ERROR; @@ -815,38 +950,101 @@ static int mca_btl_tcp_component_create_listen(uint16_t af_family) } } - /* register listen port */ + if(mca_btl_tcp_component.tcp_enable_progress_thread){ + /* Declare our intent to use threads. */ + opal_event_use_threads(); + if( NULL == mca_btl_tcp_event_base ) { + /* fall back to only one event base (the one shared by the entire Open MPI framework) */ + + if( NULL == (mca_btl_tcp_event_base = opal_event_base_create()) ) { + BTL_ERROR(("BTL TCP failed to create progress event base")); + goto move_forward_with_no_thread; + } + opal_event_base_priority_init(mca_btl_tcp_event_base, OPAL_EVENT_NUM_PRI); + + /* construct the thread object */ + OBJ_CONSTRUCT(&mca_btl_tcp_progress_thread, opal_thread_t); + + /** + * Create a pipe to communicate between the main thread and the progress thread. + */ + if (0 != pipe(mca_btl_tcp_pipe_to_progress)) { + opal_event_base_free(mca_btl_tcp_event_base); + /* fall back to only one event base (the one shared by the entire Open MPI framework */ + mca_btl_tcp_event_base = opal_sync_event_base; + mca_btl_tcp_progress_thread_trigger = -1; /* thread not started */ + goto move_forward_with_no_thread; + } + /* setup the receiving end of the pipe as non-blocking */ + if((flags = fcntl(mca_btl_tcp_pipe_to_progress[0], F_GETFL, 0)) < 0) { + BTL_ERROR(("fcntl(F_GETFL) failed: %s (%d)", + strerror(opal_socket_errno), opal_socket_errno)); + } else { + flags |= O_NONBLOCK; + if(fcntl(mca_btl_tcp_pipe_to_progress[0], F_SETFL, flags) < 0) + BTL_ERROR(("fcntl(F_SETFL) failed: %s (%d)", + strerror(opal_socket_errno), opal_socket_errno)); + } + /* Progress thread event */ + opal_event_set(mca_btl_tcp_event_base, &mca_btl_tcp_component.tcp_recv_thread_async_event, + mca_btl_tcp_pipe_to_progress[0], + OPAL_EV_READ|OPAL_EV_PERSIST, + mca_btl_tcp_component_event_async_handler, + &mca_btl_tcp_progress_thread ); + opal_event_add(&mca_btl_tcp_component.tcp_recv_thread_async_event, 0); + + /* fork off a thread to progress it */ + mca_btl_tcp_progress_thread.t_run = mca_btl_tcp_progress_thread_engine; + mca_btl_tcp_progress_thread.t_arg = &mca_btl_tcp_progress_thread_trigger; + mca_btl_tcp_progress_thread_trigger = 1; /* thread up and running */ + if( OPAL_SUCCESS != (rc = opal_thread_start(&mca_btl_tcp_progress_thread)) ) { + BTL_ERROR(("BTL TCP progress thread initialization failed (%d)", rc)); + opal_event_base_free(mca_btl_tcp_event_base); + /* fall back to only one event base (the one shared by the entire Open MPI framework */ + mca_btl_tcp_event_base = opal_sync_event_base; + mca_btl_tcp_progress_thread_trigger = -1; /* thread not started */ + goto move_forward_with_no_thread; + } + /* We have async progress, the rest of the library should now protect itself against races */ + opal_set_using_threads(true); + } + } + else { + move_forward_with_no_thread: + mca_btl_tcp_event_base = opal_sync_event_base; + } + + if (AF_INET == af_family) { + opal_event_set(mca_btl_tcp_event_base, &mca_btl_tcp_component.tcp_recv_event, + mca_btl_tcp_component.tcp_listen_sd, + OPAL_EV_READ|OPAL_EV_PERSIST, + mca_btl_tcp_component_accept_handler, + 0 ); + MCA_BTL_TCP_ACTIVATE_EVENT(&mca_btl_tcp_component.tcp_recv_event, 0); + } #if OPAL_ENABLE_IPV6 if (AF_INET6 == af_family) { - opal_event_set(opal_event_base, &mca_btl_tcp_component.tcp6_recv_event, - mca_btl_tcp_component.tcp6_listen_sd, - OPAL_EV_READ|OPAL_EV_PERSIST, - mca_btl_tcp_component_accept_handler, - 0 ); - opal_event_add(&mca_btl_tcp_component.tcp6_recv_event, 0); - } else -#endif - { - opal_event_set(opal_event_base, &mca_btl_tcp_component.tcp_recv_event, - mca_btl_tcp_component.tcp_listen_sd, - OPAL_EV_READ|OPAL_EV_PERSIST, - mca_btl_tcp_component_accept_handler, - 0 ); - opal_event_add(&mca_btl_tcp_component.tcp_recv_event, 0); + opal_event_set(mca_btl_tcp_event_base, &mca_btl_tcp_component.tcp6_recv_event, + mca_btl_tcp_component.tcp6_listen_sd, + OPAL_EV_READ|OPAL_EV_PERSIST, + mca_btl_tcp_component_accept_handler, + 0 ); + MCA_BTL_TCP_ACTIVATE_EVENT(&mca_btl_tcp_component.tcp6_recv_event, 0); } +#endif return OPAL_SUCCESS; } /* * Register TCP module addressing information. The MCA framework - * will make this available to all peers. + * will make this available to all peers. */ static int mca_btl_tcp_component_exchange(void) { int rc = 0, index; size_t i = 0; - size_t size = mca_btl_tcp_component.tcp_addr_count * + size_t size = mca_btl_tcp_component.tcp_addr_count * mca_btl_tcp_component.tcp_num_links * sizeof(mca_btl_tcp_addr_t); /* adi@2007-04-12: * @@ -876,10 +1074,10 @@ static int mca_btl_tcp_component_exchange(void) continue; } - if (OPAL_SUCCESS != + if (OPAL_SUCCESS != opal_ifindextoaddr(index, (struct sockaddr*) &my_ss, sizeof (my_ss))) { - opal_output (0, + opal_output (0, "btl_tcp_component: problems getting address for index %i (kernel index %i)\n", index, opal_ifindextokindex (index)); continue; @@ -891,7 +1089,7 @@ static int mca_btl_tcp_component_exchange(void) memcpy(&addrs[current_addr].addr_inet, &((struct sockaddr_in6*)&my_ss)->sin6_addr, sizeof(addrs[0].addr_inet)); - addrs[current_addr].addr_port = + addrs[current_addr].addr_port = mca_btl_tcp_component.tcp6_listen_port; addrs[current_addr].addr_family = MCA_BTL_TCP_AF_INET6; xfer_size += sizeof (mca_btl_tcp_addr_t); @@ -903,10 +1101,10 @@ static int mca_btl_tcp_component_exchange(void) #endif if ((AF_INET == my_ss.ss_family) && (4 != mca_btl_tcp_component.tcp_disable_family)) { - memcpy(&addrs[current_addr].addr_inet, + memcpy(&addrs[current_addr].addr_inet, &((struct sockaddr_in*)&my_ss)->sin_addr, sizeof(addrs[0].addr_inet)); - addrs[current_addr].addr_port = + addrs[current_addr].addr_port = mca_btl_tcp_component.tcp_listen_port; addrs[current_addr].addr_family = MCA_BTL_TCP_AF_INET; xfer_size += sizeof (mca_btl_tcp_addr_t); @@ -917,7 +1115,7 @@ static int mca_btl_tcp_component_exchange(void) } } /* end of for opal_ifbegin() */ } /* end of for tcp_num_btls */ - OPAL_MODEX_SEND(rc, PMIX_SYNC_REQD, PMIX_GLOBAL, + OPAL_MODEX_SEND(rc, OPAL_PMIX_GLOBAL, &mca_btl_tcp_component.super.btl_version, addrs, xfer_size); free(addrs); @@ -932,17 +1130,18 @@ static int mca_btl_tcp_component_exchange(void) * (2) setup TCP listen socket for incoming connection attempts * (3) register BTL parameters with the MCA */ -mca_btl_base_module_t** mca_btl_tcp_component_init(int *num_btl_modules, +mca_btl_base_module_t** mca_btl_tcp_component_init(int *num_btl_modules, bool enable_progress_threads, bool enable_mpi_threads) { int ret = OPAL_SUCCESS; + unsigned int i; mca_btl_base_module_t **btls; *num_btl_modules = 0; /* initialize free lists */ opal_free_list_init( &mca_btl_tcp_component.tcp_frag_eager, - sizeof (mca_btl_tcp_frag_eager_t) + + sizeof (mca_btl_tcp_frag_eager_t) + mca_btl_tcp_module.super.btl_eager_limit, opal_cache_line_size, OBJ_CLASS (mca_btl_tcp_frag_eager_t), @@ -953,7 +1152,7 @@ mca_btl_base_module_t** mca_btl_tcp_component_init(int *num_btl_modules, NULL, 0, NULL, NULL, NULL ); opal_free_list_init( &mca_btl_tcp_component.tcp_frag_max, - sizeof (mca_btl_tcp_frag_max_t) + + sizeof (mca_btl_tcp_frag_max_t) + mca_btl_tcp_module.super.btl_max_send_size, opal_cache_line_size, OBJ_CLASS (mca_btl_tcp_frag_max_t), @@ -996,13 +1195,19 @@ mca_btl_base_module_t** mca_btl_tcp_component_init(int *num_btl_modules, if(OPAL_SUCCESS != (ret = mca_btl_tcp_component_exchange() )) { return 0; } - - btls = (mca_btl_base_module_t **)malloc(mca_btl_tcp_component.tcp_num_btls * + btls = (mca_btl_base_module_t **)malloc(mca_btl_tcp_component.tcp_num_btls * sizeof(mca_btl_base_module_t*)); if(NULL == btls) { return NULL; } + /* Register the btl to support the progress_thread */ + if (0 < mca_btl_tcp_progress_thread_trigger) { + for( i = 0; i < mca_btl_tcp_component.tcp_num_btls; i++) { + mca_btl_tcp_component.tcp_btls[i]->super.btl_flags |= MCA_BTL_FLAGS_BTL_PROGRESS_THREAD_ENABLED; + } + } + #if OPAL_CUDA_SUPPORT mca_common_cuda_stage_one_init(); #endif /* OPAL_CUDA_SUPPORT */ @@ -1035,11 +1240,11 @@ static void mca_btl_tcp_component_accept_handler( int incoming_sd, if(sd < 0) { if(opal_socket_errno == EINTR) continue; - if (opal_socket_errno != EAGAIN && + if (opal_socket_errno != EAGAIN && opal_socket_errno != EWOULDBLOCK) { opal_show_help("help-mpi-btl-tcp.txt", "accept failed", true, opal_process_info.nodename, - getpid(), + getpid(), opal_socket_errno, strerror(opal_socket_errno)); } @@ -1047,28 +1252,30 @@ static void mca_btl_tcp_component_accept_handler( int incoming_sd, } mca_btl_tcp_set_socket_options(sd); + assert( NULL != mca_btl_tcp_event_base ); /* wait for receipt of peers process identifier to complete this connection */ event = OBJ_NEW(mca_btl_tcp_event_t); - opal_event_set(opal_event_base, &event->event, sd, OPAL_EV_READ, mca_btl_tcp_component_recv_handler, event); + opal_event_set(mca_btl_tcp_event_base, &(event->event), sd, + OPAL_EV_READ, mca_btl_tcp_component_recv_handler, event); opal_event_add(&event->event, 0); } } /** - * Event callback when there is data available on the registered + * Event callback when there is data available on the registered * socket to recv. This callback is triggered only once per lifetime * for any socket, in the beginning when we setup the handshake * protocol. */ static void mca_btl_tcp_component_recv_handler(int sd, short flags, void* user) { + mca_btl_tcp_event_t *event = (mca_btl_tcp_event_t *)user; opal_process_name_t guid; struct sockaddr_storage addr; - int retval; - mca_btl_tcp_proc_t* btl_proc; opal_socklen_t addr_len = sizeof(addr); - mca_btl_tcp_event_t *event = (mca_btl_tcp_event_t *)user; + mca_btl_tcp_proc_t* btl_proc; + int retval; OBJ_RELEASE(event); @@ -1091,7 +1298,7 @@ static void mca_btl_tcp_component_recv_handler(int sd, short flags, void* user) strerror(opal_socket_errno), opal_socket_errno)); } } - + /* lookup the corresponding process */ btl_proc = mca_btl_tcp_proc_lookup(&guid); if(NULL == btl_proc) { @@ -1101,13 +1308,12 @@ static void mca_btl_tcp_component_recv_handler(int sd, short flags, void* user) /* lookup peer address */ if(getpeername(sd, (struct sockaddr*)&addr, &addr_len) != 0) { - BTL_ERROR(("getpeername() failed: %s (%d)", + BTL_ERROR(("getpeername() failed: %s (%d)", strerror(opal_socket_errno), opal_socket_errno)); CLOSE_THE_SOCKET(sd); return; } - /* are there any existing peer instances will to accept this connection */ + /* are there any existing peer instances willing to accept this connection */ (void)mca_btl_tcp_proc_accept(btl_proc, (struct sockaddr*)&addr, sd); } - diff --git a/opal/mca/btl/tcp/btl_tcp_endpoint.c b/opal/mca/btl/tcp/btl_tcp_endpoint.c index e2676ef40ef..5ca5a354d1b 100644 --- a/opal/mca/btl/tcp/btl_tcp_endpoint.c +++ b/opal/mca/btl/tcp/btl_tcp_endpoint.c @@ -2,7 +2,7 @@ * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. - * Copyright (c) 2004-2015 The University of Tennessee and The University + * Copyright (c) 2004-2016 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, @@ -48,9 +48,7 @@ #ifdef HAVE_SYS_TIME_H #include #endif /* HAVE_SYS_TIME_H */ -#ifdef HAVE_TIME_H #include -#endif /* HAVE_TIME_H */ #include "opal/mca/event/event.h" #include "opal/util/net.h" @@ -118,16 +116,11 @@ static void mca_btl_tcp_endpoint_connected(mca_btl_base_endpoint_t*); static void mca_btl_tcp_endpoint_recv_handler(int sd, short flags, void* user); static void mca_btl_tcp_endpoint_send_handler(int sd, short flags, void* user); -/* - * Diagnostics: change this to "1" to enable the function - * mca_btl_tcp_endpoint_dump(), below - */ -#define WANT_PEER_DUMP 0 /* * diagnostics */ -#if WANT_PEER_DUMP +#if OPAL_ENABLE_DEBUG && WANT_PEER_DUMP #define DEBUG_LENGTH 1024 /** @@ -138,7 +131,7 @@ static void mca_btl_tcp_endpoint_send_handler(int sd, short flags, void* user); * might access freed memory. Thus, the caller should lock the endpoint prior * to the call. */ -static void +void mca_btl_tcp_endpoint_dump(int level, const char* fname, int lineno, @@ -258,7 +251,7 @@ mca_btl_tcp_endpoint_dump(int level, if (used >= DEBUG_LENGTH) goto out; #if MCA_BTL_TCP_ENDPOINT_CACHE used += snprintf(&outmsg[used], DEBUG_LENGTH - used, "\n\t[cache %p used %lu/%lu]", - btl_endpoint->endpoint_cache, btl_endpoint->endpoint_cache_pos - btl_endpoint->endpoint_cache, + (void*)btl_endpoint->endpoint_cache, btl_endpoint->endpoint_cache_pos - btl_endpoint->endpoint_cache, btl_endpoint->endpoint_cache_length); if (used >= DEBUG_LENGTH) goto out; #endif /* MCA_BTL_TCP_ENDPOINT_CACHE */ @@ -291,13 +284,7 @@ mca_btl_tcp_endpoint_dump(int level, (NULL != btl_endpoint->endpoint_proc ? OPAL_NAME_PRINT(btl_endpoint->endpoint_proc->proc_opal->proc_name) : "unknown remote"), outmsg); } -#endif /* WANT_PEER_DUMP */ - -#if OPAL_ENABLE_DEBUG && WANT_PEER_DUMP -#define MCA_BTL_TCP_ENDPOINT_DUMP(LEVEL, ENDPOINT, INFO, MSG) mca_btl_tcp_endpoint_dump((LEVEL), __FILE__, __LINE__, __func__, (ENDPOINT), (INFO), (MSG)) -#else -#define MCA_BTL_TCP_ENDPOINT_DUMP(LEVEL, ENDPOINT, INFO, MSG) -#endif /* OPAL_ENABLE_DEBUG && WANT_PEER_DUMP */ +#endif /* OPAL_ENABLE_DEBUG && WANT_PEER_DUMP */ /* * Initialize events to be used by the endpoint instance for TCP select/poll callbacks. @@ -311,7 +298,7 @@ static inline void mca_btl_tcp_endpoint_event_init(mca_btl_base_endpoint_t* btl_ btl_endpoint->endpoint_cache_pos = btl_endpoint->endpoint_cache; #endif /* MCA_BTL_TCP_ENDPOINT_CACHE */ - opal_event_set(opal_event_base, &btl_endpoint->endpoint_recv_event, + opal_event_set(mca_btl_tcp_event_base, &btl_endpoint->endpoint_recv_event, btl_endpoint->endpoint_sd, OPAL_EV_READ|OPAL_EV_PERSIST, mca_btl_tcp_endpoint_recv_handler, @@ -322,7 +309,7 @@ static inline void mca_btl_tcp_endpoint_event_init(mca_btl_base_endpoint_t* btl_ * will be fired only once, and when the endpoint is marked as * CONNECTED the event should be recreated with the correct flags. */ - opal_event_set(opal_event_base, &btl_endpoint->endpoint_send_event, + opal_event_set(mca_btl_tcp_event_base, &btl_endpoint->endpoint_send_event, btl_endpoint->endpoint_sd, OPAL_EV_WRITE, mca_btl_tcp_endpoint_send_handler, @@ -370,8 +357,8 @@ int mca_btl_tcp_endpoint_send(mca_btl_base_endpoint_t* btl_endpoint, mca_btl_tcp } else { btl_endpoint->endpoint_send_frag = frag; MCA_BTL_TCP_ENDPOINT_DUMP(10, btl_endpoint, true, "event_add(send) [endpoint_send]"); - opal_event_add(&btl_endpoint->endpoint_send_event, 0); frag->base.des_flags |= MCA_BTL_DES_SEND_ALWAYS_CALLBACK; + MCA_BTL_TCP_ACTIVATE_EVENT(&btl_endpoint->endpoint_send_event, 0); } } else { MCA_BTL_TCP_ENDPOINT_DUMP(10, btl_endpoint, true, "send fragment enqueued [endpoint_send]"); @@ -511,7 +498,7 @@ void mca_btl_tcp_endpoint_accept(mca_btl_base_endpoint_t* btl_endpoint, assert(btl_endpoint->endpoint_sd_next == -1); btl_endpoint->endpoint_sd_next = sd; - opal_event_evtimer_set(opal_event_base, &btl_endpoint->endpoint_accept_event, + opal_event_evtimer_set(mca_btl_tcp_event_base, &btl_endpoint->endpoint_accept_event, mca_btl_tcp_endpoint_complete_accept, btl_endpoint); opal_event_add(&btl_endpoint->endpoint_accept_event, &now); } @@ -524,6 +511,7 @@ void mca_btl_tcp_endpoint_accept(mca_btl_base_endpoint_t* btl_endpoint, */ void mca_btl_tcp_endpoint_close(mca_btl_base_endpoint_t* btl_endpoint) { + MCA_BTL_TCP_ENDPOINT_DUMP(1, btl_endpoint, false, "[close]"); if(btl_endpoint->endpoint_sd < 0) return; btl_endpoint->endpoint_retries++; @@ -531,14 +519,16 @@ void mca_btl_tcp_endpoint_close(mca_btl_base_endpoint_t* btl_endpoint) opal_event_del(&btl_endpoint->endpoint_recv_event); MCA_BTL_TCP_ENDPOINT_DUMP(1, btl_endpoint, false, "event_del(send) [close]"); opal_event_del(&btl_endpoint->endpoint_send_event); - CLOSE_THE_SOCKET(btl_endpoint->endpoint_sd); - btl_endpoint->endpoint_sd = -1; + #if MCA_BTL_TCP_ENDPOINT_CACHE free( btl_endpoint->endpoint_cache ); btl_endpoint->endpoint_cache = NULL; btl_endpoint->endpoint_cache_pos = NULL; btl_endpoint->endpoint_cache_length = 0; #endif /* MCA_BTL_TCP_ENDPOINT_CACHE */ + + CLOSE_THE_SOCKET(btl_endpoint->endpoint_sd); + btl_endpoint->endpoint_sd = -1; /** * If we keep failing to connect to the peer let the caller know about * this situation by triggering all the pending fragments callback and @@ -572,7 +562,7 @@ static void mca_btl_tcp_endpoint_connected(mca_btl_base_endpoint_t* btl_endpoint MCA_BTL_TCP_ENDPOINT_DUMP(1, btl_endpoint, true, "READY [endpoint_connected]"); /* Create the send event in a persistent manner. */ - opal_event_set(opal_event_base, &btl_endpoint->endpoint_send_event, + opal_event_set(mca_btl_tcp_event_base, &btl_endpoint->endpoint_send_event, btl_endpoint->endpoint_sd, OPAL_EV_WRITE | OPAL_EV_PERSIST, mca_btl_tcp_endpoint_send_handler, @@ -696,9 +686,9 @@ void mca_btl_tcp_set_socket_options(int sd) /* * Start a connection to the endpoint. This will likely not complete, * as the socket is set to non-blocking, so register for event - * notification of connect completion. On connection we send - * our globally unique process identifier to the endpoint and wait for - * the endpoints response. + * notification of connect completion. On connection we send our + * globally unique process identifier to the endpoint and wait for + * the endpoint response. */ static int mca_btl_tcp_endpoint_start_connect(mca_btl_base_endpoint_t* btl_endpoint) { @@ -762,7 +752,7 @@ static int mca_btl_tcp_endpoint_start_connect(mca_btl_base_endpoint_t* btl_endpo if(opal_socket_errno == EINPROGRESS || opal_socket_errno == EWOULDBLOCK) { btl_endpoint->endpoint_state = MCA_BTL_TCP_CONNECTING; MCA_BTL_TCP_ENDPOINT_DUMP(10, btl_endpoint, true, "event_add(send) [start_connect]"); - opal_event_add(&btl_endpoint->endpoint_send_event, 0); + MCA_BTL_TCP_ACTIVATE_EVENT(&btl_endpoint->endpoint_send_event, 0); return OPAL_SUCCESS; } } @@ -792,6 +782,8 @@ static void mca_btl_tcp_endpoint_complete_connect(mca_btl_base_endpoint_t* btl_e opal_socklen_t so_length = sizeof(so_error); struct sockaddr_storage endpoint_addr; + opal_event_del(&btl_endpoint->endpoint_send_event); + mca_btl_tcp_proc_tosocks(btl_endpoint->endpoint_addr, &endpoint_addr); /* check connect completion status */ diff --git a/opal/mca/btl/tcp/btl_tcp_endpoint.h b/opal/mca/btl/tcp/btl_tcp_endpoint.h index 65337967508..5e405511911 100644 --- a/opal/mca/btl/tcp/btl_tcp_endpoint.h +++ b/opal/mca/btl/tcp/btl_tcp_endpoint.h @@ -2,17 +2,17 @@ * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. - * Copyright (c) 2004-2007 The University of Tennessee and The University + * Copyright (c) 2004-2016 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -61,7 +61,7 @@ struct mca_btl_base_endpoint_t { struct mca_btl_tcp_frag_t* endpoint_send_frag; /**< current send frag being processed */ struct mca_btl_tcp_frag_t* endpoint_recv_frag; /**< current recv frag being processed */ mca_btl_tcp_state_t endpoint_state; /**< current state of the connection */ - size_t endpoint_retries; /**< number of connection retries attempted */ + uint32_t endpoint_retries; /**< number of connection retries attempted */ opal_list_t endpoint_frags; /**< list of pending frags to send */ opal_mutex_t endpoint_send_lock; /**< lock for concurrent access to endpoint state */ opal_mutex_t endpoint_recv_lock; /**< lock for concurrent access to endpoint state */ @@ -81,5 +81,20 @@ int mca_btl_tcp_endpoint_send(mca_btl_base_endpoint_t*, struct mca_btl_tcp_frag void mca_btl_tcp_endpoint_accept(mca_btl_base_endpoint_t*, struct sockaddr*, int); void mca_btl_tcp_endpoint_shutdown(mca_btl_base_endpoint_t*); +/* + * Diagnostics: change this to "1" to enable the function + * mca_btl_tcp_endpoint_dump(), below + */ +#define WANT_PEER_DUMP 0 + +#if OPAL_ENABLE_DEBUG && WANT_PEER_DUMP +#define MCA_BTL_TCP_ENDPOINT_DUMP(LEVEL, ENDPOINT, INFO, MSG) mca_btl_tcp_endpoint_dump((LEVEL), __FILE__, __LINE__, __func__, (ENDPOINT), (INFO), (MSG)) +void mca_btl_tcp_endpoint_dump(int level, const char* fname, int lineno, const char* funcname, + mca_btl_base_endpoint_t* btl_endpoint, bool full_info, const char* msg); +#else +#define MCA_BTL_TCP_ENDPOINT_DUMP(LEVEL, ENDPOINT, INFO, MSG) +#endif /* OPAL_ENABLE_DEBUG && WANT_PEER_DUMP */ + END_C_DECLS + #endif diff --git a/opal/mca/btl/tcp/btl_tcp_frag.c b/opal/mca/btl/tcp/btl_tcp_frag.c index 02e82df4034..67b1e21b915 100644 --- a/opal/mca/btl/tcp/btl_tcp_frag.c +++ b/opal/mca/btl/tcp/btl_tcp_frag.c @@ -3,18 +3,18 @@ * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. - * Copyright (c) 2004-2014 The University of Tennessee and The University + * Copyright (c) 2004-2016 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. - * Copyright (c) 2008-2012 Oracle and/or all its affiliates. All rights reserved. * Copyright (c) 2014 Los Alamos National Security, LLC. All rights * reserved. * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. + * Copyright (c) 2015-2016 Cisco Systems, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -44,8 +44,12 @@ #include "opal/opal_socket_errno.h" #include "opal/mca/btl/base/btl_base_error.h" +#include "opal/util/show_help.h" + #include "btl_tcp_frag.h" #include "btl_tcp_endpoint.h" +#include "btl_tcp_proc.h" + static void mca_btl_tcp_frag_eager_constructor(mca_btl_tcp_frag_t* frag) { @@ -108,7 +112,7 @@ size_t mca_btl_tcp_frag_dump(mca_btl_tcp_frag_t* frag, char* msg, char* buf, siz bool mca_btl_tcp_frag_send(mca_btl_tcp_frag_t* frag, int sd) { - int cnt=-1; + ssize_t cnt = -1; size_t i, num_vecs; /* non-blocking write, but continue if interrupted */ @@ -141,7 +145,7 @@ bool mca_btl_tcp_frag_send(mca_btl_tcp_frag_t* frag, int sd) /* if the write didn't complete - update the iovec state */ num_vecs = frag->iov_cnt; for(i=0; i= (int)frag->iov_ptr->iov_len) { + if(cnt >= (ssize_t)frag->iov_ptr->iov_len) { cnt -= frag->iov_ptr->iov_len; frag->iov_ptr++; frag->iov_idx++; @@ -150,6 +154,9 @@ bool mca_btl_tcp_frag_send(mca_btl_tcp_frag_t* frag, int sd) frag->iov_ptr->iov_base = (opal_iov_base_ptr_t) (((unsigned char*)frag->iov_ptr->iov_base) + cnt); frag->iov_ptr->iov_len -= cnt; + OPAL_OUTPUT_VERBOSE((100, opal_btl_base_framework.framework_output, + "%s:%d write %ld bytes on socket %d\n", + __FILE__, __LINE__, cnt, sd)); break; } } @@ -158,9 +165,9 @@ bool mca_btl_tcp_frag_send(mca_btl_tcp_frag_t* frag, int sd) bool mca_btl_tcp_frag_recv(mca_btl_tcp_frag_t* frag, int sd) { - int cnt, dont_copy_data = 0; - size_t i, num_vecs; mca_btl_base_endpoint_t* btl_endpoint = frag->endpoint; + int i, num_vecs, dont_copy_data = 0; + ssize_t cnt; repeat: num_vecs = frag->iov_cnt; @@ -172,7 +179,7 @@ bool mca_btl_tcp_frag_recv(mca_btl_tcp_frag_t* frag, int sd) * is still some data pending. */ cnt = length = btl_endpoint->endpoint_cache_length; - for( i = 0; i < frag->iov_cnt; i++ ) { + for( i = 0; i < (int)frag->iov_cnt; i++ ) { if( length > frag->iov_ptr[i].iov_len ) length = frag->iov_ptr[i].iov_len; if( (0 == dont_copy_data) || (length < frag->iov_ptr[i].iov_len) ) { @@ -204,25 +211,35 @@ bool mca_btl_tcp_frag_recv(mca_btl_tcp_frag_t* frag, int sd) cnt = -1; while( cnt < 0 ) { cnt = readv(sd, frag->iov_ptr, num_vecs); - if( 0 < cnt ) goto advance_iov_position; - if( cnt == 0 ) { - btl_endpoint->endpoint_state = MCA_BTL_TCP_FAILED; - mca_btl_tcp_endpoint_close(btl_endpoint); - return false; - } - switch(opal_socket_errno) { - case EINTR: - continue; - case EWOULDBLOCK: - return false; - case EFAULT: + if( 0 < cnt ) goto advance_iov_position; + if( cnt == 0 ) { + btl_endpoint->endpoint_state = MCA_BTL_TCP_FAILED; + mca_btl_tcp_endpoint_close(btl_endpoint); + return false; + } + switch(opal_socket_errno) { + case EINTR: + continue; + case EWOULDBLOCK: + return false; + case EFAULT: BTL_ERROR(("mca_btl_tcp_frag_recv: readv error (%p, %lu)\n\t%s(%lu)\n", frag->iov_ptr[0].iov_base, (unsigned long) frag->iov_ptr[0].iov_len, strerror(opal_socket_errno), (unsigned long) frag->iov_cnt)); btl_endpoint->endpoint_state = MCA_BTL_TCP_FAILED; - mca_btl_tcp_endpoint_close(btl_endpoint); - return false; - default: + mca_btl_tcp_endpoint_close(btl_endpoint); + return false; + + case ECONNRESET: + opal_show_help("help-mpi-btl-tcp.txt", "peer hung up", + true, opal_process_info.nodename, + getpid(), + btl_endpoint->endpoint_proc->proc_opal->proc_hostname); + btl_endpoint->endpoint_state = MCA_BTL_TCP_FAILED; + mca_btl_tcp_endpoint_close(btl_endpoint); + return false; + + default: BTL_ERROR(("mca_btl_tcp_frag_recv: readv failed: %s (%d)", strerror(opal_socket_errno), opal_socket_errno)); @@ -236,17 +253,17 @@ bool mca_btl_tcp_frag_recv(mca_btl_tcp_frag_t* frag, int sd) /* if the read didn't complete - update the iovec state */ num_vecs = frag->iov_cnt; for( i = 0; i < num_vecs; i++ ) { - if( cnt < (int)frag->iov_ptr->iov_len ) { + if( cnt < (ssize_t)frag->iov_ptr->iov_len ) { frag->iov_ptr->iov_base = (opal_iov_base_ptr_t) (((unsigned char*)frag->iov_ptr->iov_base) + cnt); frag->iov_ptr->iov_len -= cnt; cnt = 0; break; - } - cnt -= frag->iov_ptr->iov_len; - frag->iov_idx++; - frag->iov_ptr++; - frag->iov_cnt--; + } + cnt -= frag->iov_ptr->iov_len; + frag->iov_idx++; + frag->iov_ptr++; + frag->iov_cnt--; } #if MCA_BTL_TCP_ENDPOINT_CACHE btl_endpoint->endpoint_cache_length = cnt; @@ -264,11 +281,13 @@ bool mca_btl_tcp_frag_recv(mca_btl_tcp_frag_t* frag, int sd) frag->iov[1].iov_len = frag->hdr.size; frag->iov_cnt++; #ifndef __sparc +#if !MCA_BTL_TCP_SUPPORT_PROGRESS_THREAD /* The following cannot be done for sparc code * because it causes alignment errors when accessing * structures later on in the btl and pml code. */ dont_copy_data = 1; +#endif #endif goto repeat; } @@ -296,3 +315,4 @@ bool mca_btl_tcp_frag_recv(mca_btl_tcp_frag_t* frag, int sd) } return false; } + diff --git a/opal/mca/btl/tcp/btl_tcp_frag.h b/opal/mca/btl/tcp/btl_tcp_frag.h index 9802069f4fa..b73da8f6edb 100644 --- a/opal/mca/btl/tcp/btl_tcp_frag.h +++ b/opal/mca/btl/tcp/btl_tcp_frag.h @@ -3,7 +3,7 @@ * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. - * Copyright (c) 2004-2014 The University of Tennessee and The University + * Copyright (c) 2004-2016 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, @@ -56,6 +56,7 @@ struct mca_btl_tcp_frag_t { size_t iov_cnt; size_t iov_idx; size_t size; + uint16_t next_step; int rc; opal_free_list_t* my_list; /* fake rdma completion */ @@ -126,8 +127,6 @@ do { \ bool mca_btl_tcp_frag_send(mca_btl_tcp_frag_t*, int sd); bool mca_btl_tcp_frag_recv(mca_btl_tcp_frag_t*, int sd); - -size_t mca_btl_tcp_frag_dump(mca_btl_tcp_frag_t*, char*, char*, size_t); - +size_t mca_btl_tcp_frag_dump(mca_btl_tcp_frag_t* frag, char* msg, char* buf, size_t length); END_C_DECLS #endif diff --git a/opal/mca/btl/tcp/btl_tcp_ft.c b/opal/mca/btl/tcp/btl_tcp_ft.c deleted file mode 100644 index 8f61a83d9fa..00000000000 --- a/opal/mca/btl/tcp/btl_tcp_ft.c +++ /dev/null @@ -1,54 +0,0 @@ -/* - * Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2006 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * Copyright (c) 2006 Los Alamos National Security, LLC. All rights - * reserved. - * - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ -#include "opal_config.h" - -#include - - -#include "btl_tcp.h" -#include "btl_tcp_frag.h" -#include "btl_tcp_proc.h" -#include "btl_tcp_endpoint.h" - -#include "btl_tcp_ft.h" - -int mca_btl_tcp_ft_event(int state) -{ - - if(OPAL_CRS_CHECKPOINT == state) { - ; - } - else if(OPAL_CRS_CONTINUE == state) { - ; - } - else if(OPAL_CRS_RESTART == state) { - ; - } - else if(OPAL_CRS_TERM == state ) { - ; - } - else { - ; - } - - return OPAL_SUCCESS; -} - diff --git a/opal/mca/btl/tcp/btl_tcp_ft.h b/opal/mca/btl/tcp/btl_tcp_ft.h deleted file mode 100644 index bbca866e73c..00000000000 --- a/opal/mca/btl/tcp/btl_tcp_ft.h +++ /dev/null @@ -1,34 +0,0 @@ -/* - * Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2005 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ -/** - * @file - */ -#ifndef MCA_BTL_TCP_FT_H -#define MCA_BTL_TCP_FT_H - -#ifdef HAVE_SYS_TYPES_H -#include -#endif - -BEGIN_C_DECLS - - -END_C_DECLS - -#endif - diff --git a/opal/mca/btl/tcp/btl_tcp_hdr.h b/opal/mca/btl/tcp/btl_tcp_hdr.h index 0f08c873780..b7977762012 100644 --- a/opal/mca/btl/tcp/btl_tcp_hdr.h +++ b/opal/mca/btl/tcp/btl_tcp_hdr.h @@ -5,14 +5,14 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -22,7 +22,7 @@ #include "opal_config.h" #include "opal/mca/btl/base/base.h" -#include "btl_tcp.h" +#include "btl_tcp.h" BEGIN_C_DECLS @@ -38,9 +38,9 @@ struct mca_btl_tcp_hdr_t { mca_btl_base_header_t base; uint8_t type; uint16_t count; - uint32_t size; -}; -typedef struct mca_btl_tcp_hdr_t mca_btl_tcp_hdr_t; + uint32_t size; +}; +typedef struct mca_btl_tcp_hdr_t mca_btl_tcp_hdr_t; #define MCA_BTL_TCP_HDR_HTON(hdr) \ do { \ diff --git a/opal/mca/btl/tcp/btl_tcp_proc.c b/opal/mca/btl/tcp/btl_tcp_proc.c index b6094345779..c2b55d88e2a 100644 --- a/opal/mca/btl/tcp/btl_tcp_proc.c +++ b/opal/mca/btl/tcp/btl_tcp_proc.c @@ -6,19 +6,21 @@ * Copyright (c) 2004-2014 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2008-2010 Oracle and/or its affiliates. All rights reserved - * Copyright (c) 2013-2014 Intel, Inc. All rights reserved - * Copyright (c) 2014-2015 Research Organization for Information Science + * Copyright (c) 2013-2015 Intel, Inc. All rights reserved + * Copyright (c) 2014-2016 Research Organization for Information Science * and Technology (RIST). All rights reserved. - * Copyright (c) 2015 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015-2017 Los Alamos National Security, LLC. All rights + * reserved. + * Copyright (c) 2015-2017 Cisco Systems, Inc. All rights reserved * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -39,6 +41,7 @@ #include "opal/util/if.h" #include "opal/util/net.h" #include "opal/util/proc.h" +#include "opal/util/show_help.h" #include "btl_tcp.h" #include "btl_tcp_proc.h" @@ -46,21 +49,24 @@ static void mca_btl_tcp_proc_construct(mca_btl_tcp_proc_t* proc); static void mca_btl_tcp_proc_destruct(mca_btl_tcp_proc_t* proc); -static mca_btl_tcp_interface_t** local_interfaces = NULL; -static int local_kindex_to_index[MAX_KERNEL_INTERFACE_INDEX]; -static size_t num_local_interfaces, max_local_interfaces; -static mca_btl_tcp_interface_t** peer_interfaces = NULL; -static size_t num_peer_interfaces, max_peer_interfaces; -static int peer_kindex_to_index[MAX_KERNEL_INTERFACE_INDEX]; -static unsigned int *best_assignment; -static int max_assignment_weight; -static int max_assignment_cardinality; -static enum mca_btl_tcp_connection_quality **weights; -static struct mca_btl_tcp_addr_t ***best_addr; - -OBJ_CLASS_INSTANCE( mca_btl_tcp_proc_t, - opal_list_item_t, - mca_btl_tcp_proc_construct, +struct mca_btl_tcp_proc_data_t { + mca_btl_tcp_interface_t** local_interfaces; + int local_kindex_to_index[MAX_KERNEL_INTERFACE_INDEX]; + size_t num_local_interfaces, max_local_interfaces; + size_t num_peer_interfaces; + int peer_kindex_to_index[MAX_KERNEL_INTERFACE_INDEX]; + unsigned int *best_assignment; + int max_assignment_weight; + int max_assignment_cardinality; + enum mca_btl_tcp_connection_quality **weights; + struct mca_btl_tcp_addr_t ***best_addr; +}; + +typedef struct mca_btl_tcp_proc_data_t mca_btl_tcp_proc_data_t; + +OBJ_CLASS_INSTANCE( mca_btl_tcp_proc_t, + opal_list_item_t, + mca_btl_tcp_proc_construct, mca_btl_tcp_proc_destruct ); void mca_btl_tcp_proc_construct(mca_btl_tcp_proc_t* tcp_proc) @@ -82,7 +88,7 @@ void mca_btl_tcp_proc_destruct(mca_btl_tcp_proc_t* tcp_proc) if( NULL != tcp_proc->proc_opal ) { /* remove from list of all proc instances */ OPAL_THREAD_LOCK(&mca_btl_tcp_component.tcp_lock); - opal_proc_table_remove_value(&mca_btl_tcp_component.tcp_procs, + opal_proc_table_remove_value(&mca_btl_tcp_component.tcp_procs, tcp_proc->proc_opal->proc_name); OPAL_THREAD_UNLOCK(&mca_btl_tcp_component.tcp_lock); OBJ_RELEASE(tcp_proc->proc_opal); @@ -101,7 +107,7 @@ void mca_btl_tcp_proc_destruct(mca_btl_tcp_proc_t* tcp_proc) /* * Create a TCP process structure. There is a one-to-one correspondence * between a opal_proc_t and a mca_btl_tcp_proc_t instance. We cache - * additional data (specifically the list of mca_btl_tcp_endpoint_t instances, + * additional data (specifically the list of mca_btl_tcp_endpoint_t instances, * and published addresses) associated w/ a given destination on this * datastructure. */ @@ -113,59 +119,62 @@ mca_btl_tcp_proc_t* mca_btl_tcp_proc_create(opal_proc_t* proc) int rc; OPAL_THREAD_LOCK(&mca_btl_tcp_component.tcp_lock); - rc = opal_proc_table_get_value(&mca_btl_tcp_component.tcp_procs, + rc = opal_proc_table_get_value(&mca_btl_tcp_component.tcp_procs, proc->proc_name, (void**)&btl_proc); if(OPAL_SUCCESS == rc) { OPAL_THREAD_UNLOCK(&mca_btl_tcp_component.tcp_lock); return btl_proc; } - btl_proc = OBJ_NEW(mca_btl_tcp_proc_t); - if(NULL == btl_proc) - return NULL; - btl_proc->proc_opal = proc; - OBJ_RETAIN(btl_proc->proc_opal); + do { /* This loop is only necessary so that we can break out of the serial code */ + btl_proc = OBJ_NEW(mca_btl_tcp_proc_t); + if(NULL == btl_proc) { + rc = OPAL_ERR_OUT_OF_RESOURCE; + break; + } - /* add to hash table of all proc instance */ - opal_proc_table_set_value(&mca_btl_tcp_component.tcp_procs, - proc->proc_name, btl_proc); - OPAL_THREAD_UNLOCK(&mca_btl_tcp_component.tcp_lock); + /* Retain the proc, but don't store the ref into the btl_proc just yet. This + * provides a way to release the btl_proc in case of failure without having to + * unlock the mutex. + */ + OBJ_RETAIN(proc); + + /* lookup tcp parameters exported by this proc */ + OPAL_MODEX_RECV(rc, &mca_btl_tcp_component.super.btl_version, + &proc->proc_name, (uint8_t**)&btl_proc->proc_addrs, &size); + if(rc != OPAL_SUCCESS) { + if(OPAL_ERR_NOT_FOUND != rc) + BTL_ERROR(("opal_modex_recv: failed with return value=%d", rc)); + break; + } - /* lookup tcp parameters exported by this proc */ - OPAL_MODEX_RECV(rc, &mca_btl_tcp_component.super.btl_version, - proc, (uint8_t**)&btl_proc->proc_addrs, &size); - if(rc != OPAL_SUCCESS) { - if(OPAL_ERR_NOT_FOUND != rc) - BTL_ERROR(("opal_modex_recv: failed with return value=%d", rc)); - OBJ_RELEASE(btl_proc); - return NULL; - } - if(0 != (size % sizeof(mca_btl_tcp_addr_t))) { - BTL_ERROR(("opal_modex_recv: invalid size %lu: btl-size: %lu\n", - (unsigned long) size, (unsigned long)sizeof(mca_btl_tcp_addr_t))); - return NULL; - } - btl_proc->proc_addr_count = size / sizeof(mca_btl_tcp_addr_t); - - /* allocate space for endpoint array - one for each exported address */ - btl_proc->proc_endpoints = (mca_btl_base_endpoint_t**) - malloc((1 + btl_proc->proc_addr_count) * - sizeof(mca_btl_base_endpoint_t*)); - if(NULL == btl_proc->proc_endpoints) { - OBJ_RELEASE(btl_proc); - return NULL; - } + if(0 != (size % sizeof(mca_btl_tcp_addr_t))) { + BTL_ERROR(("opal_modex_recv: invalid size %lu: btl-size: %lu\n", + (unsigned long) size, (unsigned long)sizeof(mca_btl_tcp_addr_t))); + rc = OPAL_ERROR; + break; + } + + btl_proc->proc_addr_count = size / sizeof(mca_btl_tcp_addr_t); + + /* allocate space for endpoint array - one for each exported address */ + btl_proc->proc_endpoints = (mca_btl_base_endpoint_t**) + malloc((1 + btl_proc->proc_addr_count) * + sizeof(mca_btl_base_endpoint_t*)); + if(NULL == btl_proc->proc_endpoints) { + rc = OPAL_ERR_OUT_OF_RESOURCE; + break; + } + + if(NULL == mca_btl_tcp_component.tcp_local && (proc == opal_proc_local_get())) { + mca_btl_tcp_component.tcp_local = btl_proc; + } - if(NULL == mca_btl_tcp_component.tcp_local && (proc == opal_proc_local_get())) { - mca_btl_tcp_component.tcp_local = btl_proc; - } - { /* convert the OPAL addr_family field to OS constants, * so we can check for AF_INET (or AF_INET6) and don't have * to deal with byte ordering anymore. */ - unsigned int i; - for (i = 0; i < btl_proc->proc_addr_count; i++) { + for (unsigned int i = 0; i < btl_proc->proc_addr_count; i++) { if (MCA_BTL_TCP_AF_INET == btl_proc->proc_addrs[i].addr_family) { btl_proc->proc_addrs[i].addr_family = AF_INET; } @@ -175,55 +184,71 @@ mca_btl_tcp_proc_t* mca_btl_tcp_proc_create(opal_proc_t* proc) } #endif } + } while (0); + + if (OPAL_SUCCESS == rc) { + btl_proc->proc_opal = proc; /* link with the proc */ + /* add to hash table of all proc instance. */ + opal_proc_table_set_value(&mca_btl_tcp_component.tcp_procs, + proc->proc_name, btl_proc); + } else { + if (btl_proc) { + OBJ_RELEASE(btl_proc); /* release the local proc */ + OBJ_RELEASE(proc); /* and the ref on the OMPI proc */ + btl_proc = NULL; + } } + + OPAL_THREAD_UNLOCK(&mca_btl_tcp_component.tcp_lock); + return btl_proc; } -static void evaluate_assignment(int *a) { +static void evaluate_assignment(mca_btl_tcp_proc_data_t *proc_data, int *a) { size_t i; - unsigned int max_interfaces = num_local_interfaces; + unsigned int max_interfaces = proc_data->num_local_interfaces; int assignment_weight = 0; int assignment_cardinality = 0; - if(max_interfaces < num_peer_interfaces) { - max_interfaces = num_peer_interfaces; + if(max_interfaces < proc_data->num_peer_interfaces) { + max_interfaces = proc_data->num_peer_interfaces; } for(i = 0; i < max_interfaces; ++i) { - if(0 < weights[i][a[i]-1]) { + if(0 < proc_data->weights[i][a[i]-1]) { ++assignment_cardinality; - assignment_weight += weights[i][a[i]-1]; + assignment_weight += proc_data->weights[i][a[i]-1]; } } /* * check wether current solution beats all previous solutions */ - if(assignment_cardinality > max_assignment_cardinality - || (assignment_cardinality == max_assignment_cardinality - && assignment_weight > max_assignment_weight)) { + if(assignment_cardinality > proc_data->max_assignment_cardinality + || (assignment_cardinality == proc_data->max_assignment_cardinality + && assignment_weight > proc_data->max_assignment_weight)) { for(i = 0; i < max_interfaces; ++i) { - best_assignment[i] = a[i]-1; + proc_data->best_assignment[i] = a[i]-1; } - max_assignment_weight = assignment_weight; - max_assignment_cardinality = assignment_cardinality; + proc_data->max_assignment_weight = assignment_weight; + proc_data->max_assignment_cardinality = assignment_cardinality; } } -static void visit(int k, int level, int siz, int *a) +static void visit(mca_btl_tcp_proc_data_t *proc_data, int k, int level, int siz, int *a) { level = level+1; a[k] = level; if (level == siz) { - evaluate_assignment(a); + evaluate_assignment(proc_data, a); } else { int i; for ( i = 0; i < siz; i++) if (a[i] == 0) - visit(i, level, siz, a); + visit(proc_data, i, level, siz, a); } level = level-1; a[k] = 0; @@ -241,23 +266,25 @@ static void mca_btl_tcp_initialise_interface(mca_btl_tcp_interface_t* tcp_interf tcp_interface->inuse = 0; } -static mca_btl_tcp_interface_t** mca_btl_tcp_retrieve_local_interfaces(void) +static mca_btl_tcp_interface_t** mca_btl_tcp_retrieve_local_interfaces(mca_btl_tcp_proc_data_t *proc_data) { struct sockaddr_storage local_addr; char local_if_name[IF_NAMESIZE]; char **include, **exclude, **argv; int idx; + mca_btl_tcp_interface_t * local_interface; - if( NULL != local_interfaces ) - return local_interfaces; + assert (NULL == proc_data->local_interfaces); + if( NULL != proc_data->local_interfaces ) + return proc_data->local_interfaces; - max_local_interfaces = MAX_KERNEL_INTERFACES; - num_local_interfaces = 0; - local_interfaces = (mca_btl_tcp_interface_t**)calloc( max_local_interfaces, sizeof(mca_btl_tcp_interface_t*) ); - if( NULL == local_interfaces ) + proc_data->max_local_interfaces = MAX_KERNEL_INTERFACES; + proc_data->num_local_interfaces = 0; + proc_data->local_interfaces = (mca_btl_tcp_interface_t**)calloc( proc_data->max_local_interfaces, sizeof(mca_btl_tcp_interface_t*) ); + if( NULL == proc_data->local_interfaces ) return NULL; - memset(local_kindex_to_index, -1, sizeof(int)*MAX_KERNEL_INTERFACE_INDEX); + memset(proc_data->local_kindex_to_index, -1, sizeof(int)*MAX_KERNEL_INTERFACE_INDEX); /* Collect up the list of included and excluded interfaces, if any */ include = opal_argv_split(mca_btl_tcp_component.tcp_if_include,','); @@ -317,25 +344,26 @@ static mca_btl_tcp_interface_t** mca_btl_tcp_retrieve_local_interfaces(void) } kindex = opal_ifindextokindex(idx); - index = local_kindex_to_index[kindex]; + index = proc_data->local_kindex_to_index[kindex]; /* create entry for this kernel index previously not seen */ if(-1 == index) { - index = num_local_interfaces++; - local_kindex_to_index[kindex] = index; - - if( num_local_interfaces == max_local_interfaces ) { - max_local_interfaces <<= 1; - local_interfaces = (mca_btl_tcp_interface_t**)realloc( local_interfaces, - max_local_interfaces * sizeof(mca_btl_tcp_interface_t*) ); - if( NULL == local_interfaces ) + index = proc_data->num_local_interfaces++; + proc_data->local_kindex_to_index[kindex] = index; + + if( proc_data->num_local_interfaces == proc_data->max_local_interfaces ) { + proc_data->max_local_interfaces <<= 1; + proc_data->local_interfaces = (mca_btl_tcp_interface_t**)realloc( proc_data->local_interfaces, + proc_data->max_local_interfaces * sizeof(mca_btl_tcp_interface_t*) ); + if( NULL == proc_data->local_interfaces ) goto cleanup; } - local_interfaces[index] = (mca_btl_tcp_interface_t *) malloc(sizeof(mca_btl_tcp_interface_t)); - assert(NULL != local_interfaces[index]); - mca_btl_tcp_initialise_interface(local_interfaces[index], kindex, index); + proc_data->local_interfaces[index] = (mca_btl_tcp_interface_t *) malloc(sizeof(mca_btl_tcp_interface_t)); + assert(NULL != proc_data->local_interfaces[index]); + mca_btl_tcp_initialise_interface(proc_data->local_interfaces[index], kindex, index); } + local_interface = proc_data->local_interfaces[proc_data->local_kindex_to_index[kindex]]; switch(local_addr.ss_family) { case AF_INET: /* if AF is disabled, skip it completely */ @@ -343,12 +371,12 @@ static mca_btl_tcp_interface_t** mca_btl_tcp_retrieve_local_interfaces(void) continue; } - local_interfaces[local_kindex_to_index[kindex]]->ipv4_address = + local_interface->ipv4_address = (struct sockaddr_storage*) malloc(sizeof(local_addr)); - memcpy(local_interfaces[local_kindex_to_index[kindex]]->ipv4_address, + memcpy(local_interface->ipv4_address, &local_addr, sizeof(local_addr)); - opal_ifindextomask(idx, - &local_interfaces[local_kindex_to_index[kindex]]->ipv4_netmask, + opal_ifindextomask(idx, + &local_interface->ipv4_netmask, sizeof(int)); break; case AF_INET6: @@ -357,12 +385,12 @@ static mca_btl_tcp_interface_t** mca_btl_tcp_retrieve_local_interfaces(void) continue; } - local_interfaces[local_kindex_to_index[kindex]]->ipv6_address + local_interface->ipv6_address = (struct sockaddr_storage*) malloc(sizeof(local_addr)); - memcpy(local_interfaces[local_kindex_to_index[kindex]]->ipv6_address, + memcpy(local_interface->ipv6_address, &local_addr, sizeof(local_addr)); - opal_ifindextomask(idx, - &local_interfaces[local_kindex_to_index[kindex]]->ipv6_netmask, + opal_ifindextomask(idx, + &local_interface->ipv6_netmask, sizeof(int)); break; default: @@ -378,14 +406,14 @@ static mca_btl_tcp_interface_t** mca_btl_tcp_retrieve_local_interfaces(void) opal_argv_free(exclude); } - return local_interfaces; + return proc_data->local_interfaces; } /* * Note that this routine must be called with the lock on the process - * already held. Insert a btl instance into the proc array and assign + * already held. Insert a btl instance into the proc array and assign * it an address. */ -int mca_btl_tcp_proc_insert( mca_btl_tcp_proc_t* btl_proc, +int mca_btl_tcp_proc_insert( mca_btl_tcp_proc_t* btl_proc, mca_btl_base_endpoint_t* btl_endpoint ) { struct sockaddr_storage endpoint_addr_ss; @@ -393,6 +421,10 @@ int mca_btl_tcp_proc_insert( mca_btl_tcp_proc_t* btl_proc, unsigned int perm_size; int rc, *a = NULL; size_t i, j; + mca_btl_tcp_interface_t** peer_interfaces; + mca_btl_tcp_proc_data_t _proc_data, *proc_data=&_proc_data; + size_t max_peer_interfaces; + memset(proc_data, 0, sizeof(mca_btl_tcp_proc_data_t)); if (NULL == (proc_hostname = opal_get_proc_hostname(btl_proc->proc_opal))) { return OPAL_ERR_UNREACH; @@ -414,21 +446,17 @@ int mca_btl_tcp_proc_insert( mca_btl_tcp_proc_t* btl_proc, btl_proc->proc_endpoints[btl_proc->proc_endpoint_count++] = btl_endpoint; /* sanity checks */ - if( NULL == local_interfaces ) { - if( NULL == mca_btl_tcp_retrieve_local_interfaces() ) - return OPAL_ERR_OUT_OF_RESOURCE; - } - if( 0 == num_local_interfaces ) { + if( NULL == mca_btl_tcp_retrieve_local_interfaces(proc_data) ) + return OPAL_ERR_OUT_OF_RESOURCE; + if( 0 == proc_data->num_local_interfaces ) { return OPAL_ERR_UNREACH; } - if( NULL == peer_interfaces ) { - max_peer_interfaces = max_local_interfaces; - peer_interfaces = (mca_btl_tcp_interface_t**)malloc( max_peer_interfaces * sizeof(mca_btl_tcp_interface_t*) ); - } - num_peer_interfaces = 0; - memset(peer_kindex_to_index, -1, sizeof(int)*MAX_KERNEL_INTERFACE_INDEX); - memset(peer_interfaces, 0, max_peer_interfaces * sizeof(mca_btl_tcp_interface_t*)); + max_peer_interfaces = proc_data->max_local_interfaces; + peer_interfaces = (mca_btl_tcp_interface_t**)calloc( max_peer_interfaces, sizeof(mca_btl_tcp_interface_t*) ); + assert(NULL != peer_interfaces); + proc_data->num_peer_interfaces = 0; + memset(proc_data->peer_kindex_to_index, -1, sizeof(int)*MAX_KERNEL_INTERFACE_INDEX); /* * identify all kernel interfaces and the associated addresses of @@ -443,12 +471,12 @@ int mca_btl_tcp_proc_insert( mca_btl_tcp_proc_t* btl_proc, mca_btl_tcp_proc_tosocks (endpoint_addr, &endpoint_addr_ss); - index = peer_kindex_to_index[endpoint_addr->addr_ifkindex]; + index = proc_data->peer_kindex_to_index[endpoint_addr->addr_ifkindex]; if(-1 == index) { - index = num_peer_interfaces++; - peer_kindex_to_index[endpoint_addr->addr_ifkindex] = index; - if( num_peer_interfaces == max_peer_interfaces ) { + index = proc_data->num_peer_interfaces++; + proc_data->peer_kindex_to_index[endpoint_addr->addr_ifkindex] = index; + if( proc_data->num_peer_interfaces == max_peer_interfaces ) { max_peer_interfaces <<= 1; peer_interfaces = (mca_btl_tcp_interface_t**)realloc( peer_interfaces, max_peer_interfaces * sizeof(mca_btl_tcp_interface_t*) ); @@ -456,10 +484,10 @@ int mca_btl_tcp_proc_insert( mca_btl_tcp_proc_t* btl_proc, return OPAL_ERR_OUT_OF_RESOURCE; } peer_interfaces[index] = (mca_btl_tcp_interface_t *) malloc(sizeof(mca_btl_tcp_interface_t)); - mca_btl_tcp_initialise_interface(peer_interfaces[index], + mca_btl_tcp_initialise_interface(peer_interfaces[index], endpoint_addr->addr_ifkindex, index); - } - + } + /* * in case one of the peer addresses is already in use, * mark the complete peer interface as 'not available' @@ -472,13 +500,13 @@ int mca_btl_tcp_proc_insert( mca_btl_tcp_proc_t* btl_proc, case AF_INET: peer_interfaces[index]->ipv4_address = (struct sockaddr_storage*) malloc(sizeof(endpoint_addr_ss)); peer_interfaces[index]->ipv4_endpoint_addr = endpoint_addr; - memcpy(peer_interfaces[index]->ipv4_address, + memcpy(peer_interfaces[index]->ipv4_address, &endpoint_addr_ss, sizeof(endpoint_addr_ss)); break; case AF_INET6: peer_interfaces[index]->ipv6_address = (struct sockaddr_storage*) malloc(sizeof(endpoint_addr_ss)); peer_interfaces[index]->ipv6_endpoint_addr = endpoint_addr; - memcpy(peer_interfaces[index]->ipv6_address, + memcpy(peer_interfaces[index]->ipv6_address, &endpoint_addr_ss, sizeof(endpoint_addr_ss)); break; default: @@ -492,101 +520,82 @@ int mca_btl_tcp_proc_insert( mca_btl_tcp_proc_t* btl_proc, } /* - * assign weights to each possible pair of interfaces + * assign weights to each possible pair of interfaces */ - perm_size = num_local_interfaces; - if(num_peer_interfaces > perm_size) { - perm_size = num_peer_interfaces; + perm_size = proc_data->num_local_interfaces; + if(proc_data->num_peer_interfaces > perm_size) { + perm_size = proc_data->num_peer_interfaces; } - weights = (enum mca_btl_tcp_connection_quality**) malloc(perm_size + proc_data->weights = (enum mca_btl_tcp_connection_quality**) malloc(perm_size * sizeof(enum mca_btl_tcp_connection_quality*)); - - best_addr = (mca_btl_tcp_addr_t ***) malloc(perm_size + assert(NULL != proc_data->weights); + + proc_data->best_addr = (mca_btl_tcp_addr_t ***) malloc(perm_size * sizeof(mca_btl_tcp_addr_t **)); + assert(NULL != proc_data->best_addr); for(i = 0; i < perm_size; ++i) { - weights[i] = (enum mca_btl_tcp_connection_quality*) malloc(perm_size * + proc_data->weights[i] = (enum mca_btl_tcp_connection_quality*) calloc(perm_size, sizeof(enum mca_btl_tcp_connection_quality)); - memset(weights[i], 0, perm_size * sizeof(enum mca_btl_tcp_connection_quality)); + assert(NULL != proc_data->weights[i]); - best_addr[i] = (mca_btl_tcp_addr_t **) malloc(perm_size * + proc_data->best_addr[i] = (mca_btl_tcp_addr_t **) calloc(perm_size, sizeof(mca_btl_tcp_addr_t *)); - memset(best_addr[i], 0, perm_size * sizeof(mca_btl_tcp_addr_t *)); + assert(NULL != proc_data->best_addr[i]); } - - for(i=0; inum_local_interfaces; ++i ) { + mca_btl_tcp_interface_t* local_interface = proc_data->local_interfaces[i]; + for( j = 0; j < proc_data->num_peer_interfaces; ++j ) { /* initially, assume no connection is possible */ - weights[i][j] = CQ_NO_CONNECTION; + proc_data->weights[i][j] = CQ_NO_CONNECTION; /* check state of ipv4 address pair */ - if(NULL != local_interfaces[i]->ipv4_address && + if(NULL != proc_data->local_interfaces[i]->ipv4_address && NULL != peer_interfaces[j]->ipv4_address) { - /* check for loopback */ - if ((opal_net_islocalhost((struct sockaddr *)local_interfaces[i]->ipv4_address) && - !opal_net_islocalhost((struct sockaddr *)peer_interfaces[j]->ipv4_address)) || - (opal_net_islocalhost((struct sockaddr *)peer_interfaces[j]->ipv4_address) && - !opal_net_islocalhost((struct sockaddr *)local_interfaces[i]->ipv4_address)) || - (opal_net_islocalhost((struct sockaddr *)local_interfaces[i]->ipv4_address) && - !opal_ifislocal(proc_hostname))) { - - /* No connection is possible on these interfaces */ - - /* check for RFC1918 */ - } else if(opal_net_addr_isipv4public((struct sockaddr*) local_interfaces[i]->ipv4_address) && - opal_net_addr_isipv4public((struct sockaddr*) peer_interfaces[j]->ipv4_address)) { - if(opal_net_samenetwork((struct sockaddr*) local_interfaces[i]->ipv4_address, + if(opal_net_addr_isipv4public((struct sockaddr*) local_interface->ipv4_address) && + opal_net_addr_isipv4public((struct sockaddr*) peer_interfaces[j]->ipv4_address)) { + if(opal_net_samenetwork((struct sockaddr*) local_interface->ipv4_address, (struct sockaddr*) peer_interfaces[j]->ipv4_address, - local_interfaces[i]->ipv4_netmask)) { - weights[i][j] = CQ_PUBLIC_SAME_NETWORK; + local_interface->ipv4_netmask)) { + proc_data->weights[i][j] = CQ_PUBLIC_SAME_NETWORK; } else { - weights[i][j] = CQ_PUBLIC_DIFFERENT_NETWORK; + proc_data->weights[i][j] = CQ_PUBLIC_DIFFERENT_NETWORK; } - best_addr[i][j] = peer_interfaces[j]->ipv4_endpoint_addr; + proc_data->best_addr[i][j] = peer_interfaces[j]->ipv4_endpoint_addr; continue; + } + if(opal_net_samenetwork((struct sockaddr*) local_interface->ipv4_address, + (struct sockaddr*) peer_interfaces[j]->ipv4_address, + local_interface->ipv4_netmask)) { + proc_data->weights[i][j] = CQ_PRIVATE_SAME_NETWORK; } else { - if(opal_net_samenetwork((struct sockaddr*) local_interfaces[i]->ipv4_address, - (struct sockaddr*) peer_interfaces[j]->ipv4_address, - local_interfaces[i]->ipv4_netmask)) { - weights[i][j] = CQ_PRIVATE_SAME_NETWORK; - } else { - weights[i][j] = CQ_PRIVATE_DIFFERENT_NETWORK; - } - best_addr[i][j] = peer_interfaces[j]->ipv4_endpoint_addr; - continue; + proc_data->weights[i][j] = CQ_PRIVATE_DIFFERENT_NETWORK; } + proc_data->best_addr[i][j] = peer_interfaces[j]->ipv4_endpoint_addr; + continue; } /* check state of ipv6 address pair - ipv6 is always public, * since link-local addresses are skipped in opal_ifinit() */ - if(NULL != local_interfaces[i]->ipv6_address && + if(NULL != local_interface->ipv6_address && NULL != peer_interfaces[j]->ipv6_address) { - /* check for loopback */ - if ((opal_net_islocalhost((struct sockaddr *)local_interfaces[i]->ipv6_address) && - !opal_net_islocalhost((struct sockaddr *)peer_interfaces[j]->ipv6_address)) || - (opal_net_islocalhost((struct sockaddr *)peer_interfaces[j]->ipv6_address) && - !opal_net_islocalhost((struct sockaddr *)local_interfaces[i]->ipv6_address)) || - (opal_net_islocalhost((struct sockaddr *)local_interfaces[i]->ipv6_address) && - !opal_ifislocal(proc_hostname))) { - - /* No connection is possible on these interfaces */ - - } else if(opal_net_samenetwork((struct sockaddr*) local_interfaces[i]->ipv6_address, - (struct sockaddr*) peer_interfaces[j]->ipv6_address, - local_interfaces[i]->ipv6_netmask)) { - weights[i][j] = CQ_PUBLIC_SAME_NETWORK; + if(opal_net_samenetwork((struct sockaddr*) local_interface->ipv6_address, + (struct sockaddr*) peer_interfaces[j]->ipv6_address, + local_interface->ipv6_netmask)) { + proc_data->weights[i][j] = CQ_PUBLIC_SAME_NETWORK; } else { - weights[i][j] = CQ_PUBLIC_DIFFERENT_NETWORK; + proc_data->weights[i][j] = CQ_PUBLIC_DIFFERENT_NETWORK; } - best_addr[i][j] = peer_interfaces[j]->ipv6_endpoint_addr; + proc_data->best_addr[i][j] = peer_interfaces[j]->ipv6_endpoint_addr; continue; - } + } } /* for each peer interface */ } /* for each local interface */ @@ -596,7 +605,7 @@ int mca_btl_tcp_proc_insert( mca_btl_tcp_proc_t* btl_proc, * interfaces */ - best_assignment = (unsigned int *) malloc (perm_size * sizeof(int)); + proc_data->best_assignment = (unsigned int *) malloc (perm_size * sizeof(int)); a = (int *) malloc(perm_size * sizeof(int)); if (NULL == a) { @@ -605,25 +614,26 @@ int mca_btl_tcp_proc_insert( mca_btl_tcp_proc_t* btl_proc, /* Can only find the best set of connections when the number of * interfaces is not too big. When it gets larger, we fall back - * to a simpler and faster (and not as optimal) algorithm. - * See ticket https://svn.open-mpi.org/trac/ompi/ticket/2031 + * to a simpler and faster (and not as optimal) algorithm. + * See ticket https://svn.open-mpi.org/trac/ompi/ticket/2031 * for more details about this issue. */ if (perm_size <= MAX_PERMUTATION_INTERFACES) { memset(a, 0, perm_size * sizeof(int)); - max_assignment_cardinality = -1; - max_assignment_weight = -1; - visit(0, -1, perm_size, a); + proc_data->max_assignment_cardinality = -1; + proc_data->max_assignment_weight = -1; + visit(proc_data, 0, -1, perm_size, a); rc = OPAL_ERR_UNREACH; for(i = 0; i < perm_size; ++i) { - if(best_assignment[i] > num_peer_interfaces - || weights[i][best_assignment[i]] == CQ_NO_CONNECTION - || peer_interfaces[best_assignment[i]]->inuse - || NULL == peer_interfaces[best_assignment[i]]) { + unsigned int best = proc_data->best_assignment[i]; + if(best > proc_data->num_peer_interfaces + || proc_data->weights[i][best] == CQ_NO_CONNECTION + || peer_interfaces[best]->inuse + || NULL == peer_interfaces[best]) { continue; - } - peer_interfaces[best_assignment[i]]->inuse++; - btl_endpoint->endpoint_addr = best_addr[i][best_assignment[i]]; + } + peer_interfaces[best]->inuse++; + btl_endpoint->endpoint_addr = proc_data->best_addr[i][best]; btl_endpoint->endpoint_addr->addr_inuse++; rc = OPAL_SUCCESS; break; @@ -634,11 +644,11 @@ int mca_btl_tcp_proc_insert( mca_btl_tcp_proc_t* btl_proc, /* Find the best connection that is not in use. Save away * the indices of the best location. */ max = CQ_NO_CONNECTION; - for(i=0; inum_local_interfaces; ++i) { + for(j=0; jnum_peer_interfaces; ++j) { if (!peer_interfaces[j]->inuse) { - if (weights[i][j] > max) { - max = weights[i][j]; + if (proc_data->weights[i][j] > max) { + max = proc_data->weights[i][j]; i_max = i; j_max = j; } @@ -649,18 +659,18 @@ int mca_btl_tcp_proc_insert( mca_btl_tcp_proc_t* btl_proc, rc = OPAL_ERR_UNREACH; if (CQ_NO_CONNECTION != max) { peer_interfaces[j_max]->inuse++; - btl_endpoint->endpoint_addr = best_addr[i_max][j_max]; + btl_endpoint->endpoint_addr = proc_data->best_addr[i_max][j_max]; btl_endpoint->endpoint_addr->addr_inuse++; rc = OPAL_SUCCESS; } } for(i = 0; i < perm_size; ++i) { - free(weights[i]); - free(best_addr[i]); + free(proc_data->weights[i]); + free(proc_data->best_addr[i]); } - for(i = 0; i < num_peer_interfaces; ++i) { + for(i = 0; i < proc_data->num_peer_interfaces; ++i) { if(NULL != peer_interfaces[i]->ipv4_address) { free(peer_interfaces[i]->ipv4_address); } @@ -670,25 +680,22 @@ int mca_btl_tcp_proc_insert( mca_btl_tcp_proc_t* btl_proc, free(peer_interfaces[i]); } free(peer_interfaces); - peer_interfaces = NULL; - max_peer_interfaces = 0; - for(i = 0; i < num_local_interfaces; ++i) { - if(NULL != local_interfaces[i]->ipv4_address) { - free(local_interfaces[i]->ipv4_address); + for(i = 0; i < proc_data->num_local_interfaces; ++i) { + if(NULL != proc_data->local_interfaces[i]->ipv4_address) { + free(proc_data->local_interfaces[i]->ipv4_address); } - if(NULL != local_interfaces[i]->ipv6_address) { - free(local_interfaces[i]->ipv6_address); + if(NULL != proc_data->local_interfaces[i]->ipv6_address) { + free(proc_data->local_interfaces[i]->ipv6_address); } - free(local_interfaces[i]); + free(proc_data->local_interfaces[i]); } - free(local_interfaces); - local_interfaces = NULL; - max_local_interfaces = 0; + free(proc_data->local_interfaces); + proc_data->max_local_interfaces = 0; - free(weights); - free(best_addr); - free(best_assignment); + free(proc_data->weights); + free(proc_data->best_addr); + free(proc_data->best_assignment); free(a); return rc; @@ -734,10 +741,35 @@ int mca_btl_tcp_proc_remove(mca_btl_tcp_proc_t* btl_proc, mca_btl_base_endpoint_ mca_btl_tcp_proc_t* mca_btl_tcp_proc_lookup(const opal_process_name_t *name) { mca_btl_tcp_proc_t* proc = NULL; + OPAL_THREAD_LOCK(&mca_btl_tcp_component.tcp_lock); - opal_proc_table_get_value(&mca_btl_tcp_component.tcp_procs, + opal_proc_table_get_value(&mca_btl_tcp_component.tcp_procs, *name, (void**)&proc); OPAL_THREAD_UNLOCK(&mca_btl_tcp_component.tcp_lock); + if (OPAL_UNLIKELY(NULL == proc)) { + mca_btl_base_endpoint_t *endpoint; + opal_proc_t *opal_proc; + + BTL_VERBOSE(("adding tcp proc for unknown peer {.jobid = 0x%x, .vpid = 0x%x}", + name->jobid, name->vpid)); + + opal_proc = opal_proc_for_name (*name); + if (NULL == opal_proc) { + return NULL; + } + + /* try adding this proc to each btl until */ + for( uint32_t i = 0; i < mca_btl_tcp_component.tcp_num_btls; ++i ) { + endpoint = NULL; + (void) mca_btl_tcp_add_procs (&mca_btl_tcp_component.tcp_btls[i]->super, 1, &opal_proc, + &endpoint, NULL); + if (NULL != endpoint && NULL == proc) { + /* get the proc and continue on (could probably just break here) */ + proc = endpoint->endpoint_proc; + } + } + } + return proc; } @@ -747,9 +779,8 @@ mca_btl_tcp_proc_t* mca_btl_tcp_proc_lookup(const opal_process_name_t *name) */ void mca_btl_tcp_proc_accept(mca_btl_tcp_proc_t* btl_proc, struct sockaddr* addr, int sd) { - size_t i; OPAL_THREAD_LOCK(&btl_proc->proc_lock); - for( i = 0; i < btl_proc->proc_endpoint_count; i++ ) { + for( size_t i = 0; i < btl_proc->proc_endpoint_count; i++ ) { mca_btl_base_endpoint_t* btl_endpoint = btl_proc->proc_endpoints[i]; /* Check all conditions before going to try to accept the connection. */ if( btl_endpoint->endpoint_addr->addr_family != addr->sa_family ) { @@ -781,9 +812,38 @@ void mca_btl_tcp_proc_accept(mca_btl_tcp_proc_t* btl_proc, struct sockaddr* addr OPAL_THREAD_UNLOCK(&btl_proc->proc_lock); return; } - OPAL_THREAD_UNLOCK(&btl_proc->proc_lock); /* No further use of this socket. Close it */ CLOSE_THE_SOCKET(sd); + { + size_t len = 1024; + char* addr_str = (char*)malloc(len); + if( NULL != addr_str ) { + memset(addr_str, 0, len); + for (size_t i = 0; i < btl_proc->proc_endpoint_count; i++) { + mca_btl_base_endpoint_t* btl_endpoint = btl_proc->proc_endpoints[i]; + if (btl_endpoint->endpoint_addr->addr_family != addr->sa_family) { + continue; + } + + if (addr_str[0] != '\0') { + strncat(addr_str, ", ", len); + len -= 2; + } + strncat(addr_str, inet_ntop(AF_INET6, (void*)(struct in6_addr*)&btl_endpoint->endpoint_addr->addr_inet, + addr_str + 1024 - len, INET6_ADDRSTRLEN), len); + len = 1024 - strlen(addr_str); + } + } + opal_show_help("help-mpi-btl-tcp.txt", "dropped inbound connection", + true, opal_process_info.nodename, + getpid(), + btl_proc->proc_opal->proc_hostname, + OPAL_NAME_PRINT(btl_proc->proc_opal->proc_name), + opal_net_get_hostname((struct sockaddr*)addr), + addr_str); + free(addr_str); + } + OPAL_THREAD_UNLOCK(&btl_proc->proc_lock); } /* @@ -818,7 +878,7 @@ bool mca_btl_tcp_proc_tosocks(mca_btl_tcp_addr_t* proc_addr, opal_output( 0, "mca_btl_tcp_proc: unknown af_family received: %d\n", proc_addr->addr_family ); return false; - } + } return true; } diff --git a/opal/mca/btl/tcp/btl_tcp_proc.h b/opal/mca/btl/tcp/btl_tcp_proc.h index 2f7312f7be9..5e56cd7deab 100644 --- a/opal/mca/btl/tcp/btl_tcp_proc.h +++ b/opal/mca/btl/tcp/btl_tcp_proc.h @@ -5,15 +5,15 @@ * Copyright (c) 2004-2006 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2010 Oracle and/or its affiliates. All rights reserved * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -44,16 +44,16 @@ struct mca_btl_tcp_proc_t { struct mca_btl_tcp_addr_t* proc_addrs; /**< array of addresses exported by peer */ - size_t proc_addr_count; + size_t proc_addr_count; /**< number of addresses published by endpoint */ - struct mca_btl_base_endpoint_t **proc_endpoints; - /**< array of endpoints that have been created to access this proc */ + struct mca_btl_base_endpoint_t **proc_endpoints; + /**< array of endpoints that have been created to access this proc */ - size_t proc_endpoint_count; + size_t proc_endpoint_count; /**< number of endpoints */ - opal_mutex_t proc_lock; + opal_mutex_t proc_lock; /**< lock to protect against concurrent access to proc state */ }; typedef struct mca_btl_tcp_proc_t mca_btl_tcp_proc_t; @@ -95,7 +95,7 @@ typedef struct mca_btl_tcp_interface_t mca_btl_tcp_interface_t; * describes the quality of a possible connection between a local and * a remote network interface */ -enum mca_btl_tcp_connection_quality { +enum mca_btl_tcp_connection_quality { CQ_NO_CONNECTION, CQ_PRIVATE_DIFFERENT_NETWORK, CQ_PRIVATE_SAME_NETWORK, diff --git a/opal/mca/btl/tcp/configure.m4 b/opal/mca/btl/tcp/configure.m4 index e333cc8b450..9eb66631ce1 100644 --- a/opal/mca/btl/tcp/configure.m4 +++ b/opal/mca/btl/tcp/configure.m4 @@ -6,15 +6,17 @@ # Copyright (c) 2004-2005 The University of Tennessee and The University # of Tennessee Research Foundation. All rights # reserved. -# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, # University of Stuttgart. All rights reserved. # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. -# Copyright (c) 2010 Cisco Systems, Inc. All rights reserved. +# Copyright (c) 2010-2016 Cisco Systems, Inc. All rights reserved. +# Copyright (c) 2016 Los Alamos National Security, LLC. All rights +# reserved. # $COPYRIGHT$ -# +# # Additional copyrights may follow -# +# # $HEADER$ # @@ -24,11 +26,15 @@ AC_DEFUN([MCA_opal_btl_tcp_CONFIG],[ AC_CONFIG_FILES([opal/mca/btl/tcp/Makefile]) # check for sockaddr_in (a good sign we have TCP) - AC_CHECK_TYPES([struct sockaddr_in], - [$1], - [$2], + AC_CHECK_TYPES([struct sockaddr_in], + [opal_btl_tcp_happy=yes + $1], + [opal_btl_tcp_happy=no + $2], [AC_INCLUDES_DEFAULT #ifdef HAVE_NETINET_IN_H #include -#endif]) +#endif + ]) + OPAL_SUMMARY_ADD([[Transports]],[[TCP]],[[btl_tcp]],[$opal_btl_tcp_happy]) ])dnl diff --git a/opal/mca/btl/tcp/help-mpi-btl-tcp.txt b/opal/mca/btl/tcp/help-mpi-btl-tcp.txt index b53e6cc511d..5fbb63763bb 100644 --- a/opal/mca/btl/tcp/help-mpi-btl-tcp.txt +++ b/opal/mca/btl/tcp/help-mpi-btl-tcp.txt @@ -1,10 +1,13 @@ # -*- text -*- # -# Copyright (c) 2009-2014 Cisco Systems, Inc. All rights reserved. +# Copyright (c) 2009-2017 Cisco Systems, Inc. All rights reserved +# Copyright (c) 2015-2016 The University of Tennessee and The University +# of Tennessee Research Foundation. All rights +# reserved. # $COPYRIGHT$ -# +# # Additional copyrights may follow -# +# # $HEADER$ # # This is the US/English help file for Open MPI's TCP support @@ -56,11 +59,50 @@ most common causes when it does occur are: * The operating system ran out of file descriptors * The operating system ran out of memory -Your Open MPI job will likely hang until the failure resason is fixed -(e.g., more file descriptors and/or memory becomes available), and may -eventually timeout / abort. +Your Open MPI job will likely hang (or crash) until the failure +resason is fixed (e.g., more file descriptors and/or memory becomes +available), and may eventually timeout / abort. Local host: %s PID: %d Errno: %d (%s) # +[unsuported progress thread] +WARNING: Support for the TCP progress thread has not been compiled in. +Fall back to the normal progress. + + Local host: %s + Value: %s + Message: %s +# +[peer hung up] +An MPI communication peer process has unexpectedly disconnected. This +usually indicates a failure in the peer process (e.g., a crash or +otherwise exiting without calling MPI_FINALIZE first). + +Although this local MPI process will likely now behave unpredictably +(it may even hang or crash), the root cause of this problem is the +failure of the peer -- that is what you need to investigate. For +example, there may be a core file that you can examine. More +generally: such peer hangups are frequently caused by application bugs +or other external events. + + Local host: %s + Local PID: %d + Peer host: %s +# +[dropped inbound connection] +Open MPI detected an inbound MPI TCP connection request from a peer +that appears to be part of this MPI job (i.e., it identified itself as +part of this Open MPI job), but it is from an IP address that is +unexpected. This is highly unusual. + +The inbound connection has been dropped, and the peer should simply +try again with a different IP interface (i.e., the job should +hopefully be able to continue). + + Local host: %s + Local PID: %d + Peer hostname: %s (%s) + Source IP of socket: %s + Known IPs of peer: %s diff --git a/opal/mca/btl/template/Makefile.am b/opal/mca/btl/template/Makefile.am index 2a7a074a9a8..4257b99fb98 100644 --- a/opal/mca/btl/template/Makefile.am +++ b/opal/mca/btl/template/Makefile.am @@ -5,15 +5,15 @@ # Copyright (c) 2004-2005 The University of Tennessee and The University # of Tennessee Research Foundation. All rights # reserved. -# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, # University of Stuttgart. All rights reserved. # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. # Copyright (c) 2010 Cisco Systems, Inc. All rights reserved. # $COPYRIGHT$ -# +# # Additional copyrights may follow -# +# # $HEADER$ # diff --git a/opal/mca/btl/template/btl_template.c b/opal/mca/btl/template/btl_template.c index 8387c6e239c..80da0983e31 100644 --- a/opal/mca/btl/template/btl_template.c +++ b/opal/mca/btl/template/btl_template.c @@ -6,16 +6,16 @@ * Copyright (c) 2004-2013 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2014-2015 Los Alamos National Security, LLC. All rights * reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -23,12 +23,12 @@ #include #include "opal/class/opal_bitmap.h" #include "opal/mca/btl/btl.h" -#include "opal/datatype/opal_convertor.h" -#include "opal/mca/mpool/base/base.h" -#include "opal/mca/mpool/mpool.h" +#include "opal/datatype/opal_convertor.h" +#include "opal/mca/mpool/base/base.h" +#include "opal/mca/mpool/mpool.h" #include "btl_template.h" -#include "btl_template_frag.h" +#include "btl_template_frag.h" #include "btl_template_proc.h" #include "btl_template_endpoint.h" @@ -48,7 +48,7 @@ mca_btl_template_module_t mca_btl_template_module = { .btl_get = mca_btl_template_get, .btl_register_mem = mca_btl_template_register_mem, .btl_deregister_mem = mca_btl_template_deregister_mem, - .btl_ft_event = mca_btl_template_ft_event + .btl_ft_event = NULL } }; @@ -57,10 +57,10 @@ mca_btl_template_module_t mca_btl_template_module = { */ int mca_btl_template_add_procs( - struct mca_btl_base_module_t* btl, - size_t nprocs, - struct opal_proc_t **opal_procs, - struct mca_btl_base_endpoint_t** peers, + struct mca_btl_base_module_t* btl, + size_t nprocs, + struct opal_proc_t **opal_procs, + struct mca_btl_base_endpoint_t** peers, opal_bitmap_t* reachable) { mca_btl_template_module_t* template_btl = (mca_btl_template_module_t*)btl; @@ -85,15 +85,15 @@ int mca_btl_template_add_procs( } /* - * Check to make sure that the peer has at least as many interface - * addresses exported as we are trying to use. If not, then + * Check to make sure that the peer has at least as many interface + * addresses exported as we are trying to use. If not, then * don't bind this BTL instance to the proc. */ OPAL_THREAD_LOCK(&template_proc->proc_lock); /* The btl_proc datastructure is shared by all TEMPLATE BTL - * instances that are trying to reach this destination. + * instances that are trying to reach this destination. * Cache the peer instance on the btl_proc. */ template_endpoint = OBJ_NEW(mca_btl_template_endpoint_t); @@ -118,9 +118,9 @@ int mca_btl_template_add_procs( return OPAL_SUCCESS; } -int mca_btl_template_del_procs(struct mca_btl_base_module_t* btl, - size_t nprocs, - struct opal_proc_t **procs, +int mca_btl_template_del_procs(struct mca_btl_base_module_t* btl, + size_t nprocs, + struct opal_proc_t **procs, struct mca_btl_base_endpoint_t ** peers) { /* TODO */ @@ -133,9 +133,9 @@ int mca_btl_template_del_procs(struct mca_btl_base_module_t* btl, */ int mca_btl_template_register( - struct mca_btl_base_module_t* btl, - mca_btl_base_tag_t tag, - mca_btl_base_module_recv_cb_fn_t cbfunc, + struct mca_btl_base_module_t* btl, + mca_btl_base_tag_t tag, + mca_btl_base_module_recv_cb_fn_t cbfunc, void* cbdata) { return OPAL_SUCCESS; @@ -156,20 +156,20 @@ mca_btl_base_descriptor_t* mca_btl_template_alloc( size_t size, uint32_t flags) { - mca_btl_template_module_t* template_btl = (mca_btl_template_module_t*) btl; + mca_btl_template_module_t* template_btl = (mca_btl_template_module_t*) btl; mca_btl_template_frag_t* frag = NULL; - - if(size <= btl->btl_eager_limit){ - MCA_BTL_TEMPLATE_FRAG_ALLOC_EAGER(template_btl, frag); - } else { - MCA_BTL_TEMPLATE_FRAG_ALLOC_MAX(template_btl, frag); + + if(size <= btl->btl_eager_limit){ + MCA_BTL_TEMPLATE_FRAG_ALLOC_EAGER(template_btl, frag); + } else { + MCA_BTL_TEMPLATE_FRAG_ALLOC_MAX(template_btl, frag); } if( OPAL_UNLIKELY(NULL != frag) ) { return NULL; } - + frag->segment.seg_len = size; - frag->base.des_flags = 0; + frag->base.des_flags = 0; return (mca_btl_base_descriptor_t*)frag; } @@ -179,23 +179,23 @@ mca_btl_base_descriptor_t* mca_btl_template_alloc( */ int mca_btl_template_free( - struct mca_btl_base_module_t* btl, - mca_btl_base_descriptor_t* des) + struct mca_btl_base_module_t* btl, + mca_btl_base_descriptor_t* des) { - mca_btl_template_frag_t* frag = (mca_btl_template_frag_t*)des; + mca_btl_template_frag_t* frag = (mca_btl_template_frag_t*)des; if(frag->size == 0) { #if MCA_BTL_HAS_MPOOL OBJ_RELEASE(frag->registration); #endif - MCA_BTL_TEMPLATE_FRAG_RETURN_USER(btl, frag); - } else if(frag->size == btl->btl_eager_limit){ - MCA_BTL_TEMPLATE_FRAG_RETURN_EAGER(btl, frag); + MCA_BTL_TEMPLATE_FRAG_RETURN_USER(btl, frag); + } else if(frag->size == btl->btl_eager_limit){ + MCA_BTL_TEMPLATE_FRAG_RETURN_EAGER(btl, frag); } else if(frag->size == btl->btl_max_send_size) { - MCA_BTL_TEMPLATE_FRAG_RETURN_EAGER(btl, frag); + MCA_BTL_TEMPLATE_FRAG_RETURN_EAGER(btl, frag); } else { return OPAL_ERR_BAD_PARAM; } - return OPAL_SUCCESS; + return OPAL_SUCCESS; } /** @@ -220,7 +220,7 @@ mca_btl_base_descriptor_t* mca_btl_template_prepare_src( uint32_t iov_count = 1; size_t max_data = *size; int rc; - + /* * if we aren't pinning the data and the requested size is less @@ -245,12 +245,12 @@ mca_btl_base_descriptor_t* mca_btl_template_prepare_src( frag->segment.seg_len = max_data + reserve; } - /* + /* * otherwise pack as much data as we can into a fragment * that is the max send size. */ else { - + MCA_BTL_TEMPLATE_FRAG_ALLOC_MAX(btl, frag); if(OPAL_UNLIKELY(NULL == frag)) { return NULL; @@ -260,10 +260,10 @@ mca_btl_base_descriptor_t* mca_btl_template_prepare_src( } iov.iov_len = max_data; iov.iov_base = (unsigned char*) frag->segment.seg_addr.pval + reserve; - + rc = opal_convertor_pack(convertor, &iov, &iov_count, &max_data ); *size = max_data; - + if( rc < 0 ) { MCA_BTL_TEMPLATE_FRAG_RETURN_MAX(btl, frag); return NULL; @@ -287,16 +287,16 @@ mca_btl_base_descriptor_t* mca_btl_template_prepare_src( * @param tag (IN) The tag value used to notify the peer. */ -int mca_btl_template_send( +int mca_btl_template_send( struct mca_btl_base_module_t* btl, struct mca_btl_base_endpoint_t* endpoint, - struct mca_btl_base_descriptor_t* descriptor, + struct mca_btl_base_descriptor_t* descriptor, mca_btl_base_tag_t tag) - + { /* mca_btl_template_module_t* template_btl = (mca_btl_template_module_t*) btl; */ - mca_btl_template_frag_t* frag = (mca_btl_template_frag_t*)descriptor; - frag->endpoint = endpoint; + mca_btl_template_frag_t* frag = (mca_btl_template_frag_t*)descriptor; + frag->endpoint = endpoint; /* TODO */ return OPAL_ERR_NOT_IMPLEMENTED; } @@ -318,7 +318,7 @@ int mca_btl_template_put (struct mca_btl_base_module_t *btl, { /* mca_btl_template_module_t* template_btl = (mca_btl_template_module_t*) btl; */ /* TODO */ - return OPAL_ERR_NOT_IMPLEMENTED; + return OPAL_ERR_NOT_IMPLEMENTED; } @@ -339,7 +339,7 @@ int mca_btl_template_get (struct mca_btl_base_module_t *btl, { /* mca_btl_template_module_t* template_btl = (mca_btl_template_module_t*) btl; */ /* TODO */ - return OPAL_ERR_NOT_IMPLEMENTED; + return OPAL_ERR_NOT_IMPLEMENTED; } /** @@ -396,7 +396,7 @@ int mca_btl_template_deregister_mem (struct mca_btl_base_module_t* btl, int mca_btl_template_finalize(struct mca_btl_base_module_t* btl) { - mca_btl_template_module_t* template_btl = (mca_btl_template_module_t*) btl; + mca_btl_template_module_t* template_btl = (mca_btl_template_module_t*) btl; OBJ_DESTRUCT(&template_btl->template_lock); OBJ_DESTRUCT(&template_btl->template_frag_eager); OBJ_DESTRUCT(&template_btl->template_frag_max); @@ -405,22 +405,3 @@ int mca_btl_template_finalize(struct mca_btl_base_module_t* btl) return OPAL_SUCCESS; } -int mca_btl_template_ft_event(int state) { - if(OPAL_CRS_CHECKPOINT == state) { - ; - } - else if(OPAL_CRS_CONTINUE == state) { - ; - } - else if(OPAL_CRS_RESTART == state) { - ; - } - else if(OPAL_CRS_TERM == state ) { - ; - } - else { - ; - } - - return OPAL_SUCCESS; -} diff --git a/opal/mca/btl/template/btl_template.h b/opal/mca/btl/template/btl_template.h index efd7b889d3d..06268e0e033 100644 --- a/opal/mca/btl/template/btl_template.h +++ b/opal/mca/btl/template/btl_template.h @@ -6,16 +6,16 @@ * Copyright (c) 2004-2009 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2015 Los Alamos National Security, LLC. All rights * reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ /** @@ -32,7 +32,7 @@ #include "opal/mca/event/event.h" #include "opal/mca/btl/btl.h" #include "opal/mca/btl/base/base.h" -#include "opal/mca/mpool/mpool.h" +#include "opal/mca/mpool/mpool.h" BEGIN_C_DECLS @@ -44,7 +44,7 @@ BEGIN_C_DECLS struct mca_btl_template_component_t { mca_btl_base_component_3_0_0_t super; /**< base BTL component */ - + uint32_t template_num_btls; /**< number of hcas available to the TEMPLATE component */ @@ -66,12 +66,12 @@ struct mca_btl_template_component_t { opal_mutex_t template_lock; /**< lock for accessing module state */ - char* template_mpool_name; - /**< name of memory pool */ + char* template_mpool_name; + /**< name of memory pool */ bool leave_pinned; /**< pin memory on first use and leave pinned */ -}; +}; typedef struct mca_btl_template_component_t mca_btl_template_component_t; OPAL_MODULE_DECLSPEC extern mca_btl_template_component_t mca_btl_template_component; @@ -93,20 +93,20 @@ struct mca_btl_template_module_t { #if MCA_BTL_HAS_MPOOL struct mca_mpool_base_module_t* template_mpool; #endif -}; +}; typedef struct mca_btl_template_module_t mca_btl_template_module_t; extern mca_btl_template_module_t mca_btl_template_module; /** * TEMPLATE component initialization. - * + * * @param num_btl_modules (OUT) Number of BTLs returned in BTL array. * @param allow_multi_user_threads (OUT) Flag indicating wether BTL supports user threads (TRUE) * @param have_hidden_threads (OUT) Flag indicating wether BTL uses threads (TRUE) */ extern mca_btl_base_module_t** mca_btl_template_component_init( - int *num_btl_modules, + int *num_btl_modules, bool allow_multi_user_threads, bool have_hidden_threads ); @@ -121,7 +121,7 @@ extern int mca_btl_template_component_progress(void); /** * Cleanup any resources held by the BTL. - * + * * @param btl BTL instance. * @return OPAL_SUCCESS or error status on failure. */ @@ -133,14 +133,14 @@ extern int mca_btl_template_finalize( /** * PML->BTL notification of change in the process list. - * + * * @param btl (IN) * @param nprocs (IN) Number of processes * @param procs (IN) Set of processes * @param peers (OUT) Set of (optional) peer addressing info. * @param peers (IN/OUT) Set of processes that are reachable via this BTL. * @return OPAL_SUCCESS or error status on failure. - * + * */ extern int mca_btl_template_add_procs( @@ -182,7 +182,7 @@ extern int mca_btl_template_del_procs( extern int mca_btl_template_send( struct mca_btl_base_module_t* btl, struct mca_btl_base_endpoint_t* btl_peer, - struct mca_btl_base_descriptor_t* descriptor, + struct mca_btl_base_descriptor_t* descriptor, mca_btl_base_tag_t tag ); @@ -308,11 +308,11 @@ int mca_btl_template_deregister_mem (struct mca_btl_base_module_t* btl, */ extern int mca_btl_template_register( - struct mca_btl_base_module_t* btl, - mca_btl_base_tag_t tag, - mca_btl_base_module_recv_cb_fn_t cbfunc, - void* cbdata); - + struct mca_btl_base_module_t* btl, + mca_btl_base_tag_t tag, + mca_btl_base_module_recv_cb_fn_t cbfunc, + void* cbdata); + /** * Allocate a descriptor with a segment of the requested size. * Note that the BTL layer may choose to return a smaller size @@ -327,7 +327,7 @@ extern mca_btl_base_descriptor_t* mca_btl_template_alloc( struct mca_btl_base_endpoint_t* endpoint, uint8_t order, size_t size, - uint32_t flags); + uint32_t flags); /** @@ -338,9 +338,9 @@ extern mca_btl_base_descriptor_t* mca_btl_template_alloc( */ extern int mca_btl_template_free( - struct mca_btl_base_module_t* btl, - mca_btl_base_descriptor_t* des); - + struct mca_btl_base_module_t* btl, + mca_btl_base_descriptor_t* des); + /** * Prepare a descriptor for send/rdma using the supplied @@ -353,7 +353,7 @@ extern int mca_btl_template_free( * @param endpoint (IN) BTL peer addressing * @param convertor (IN) Data type convertor * @param reserve (IN) Additional bytes requested by upper layer to precede user data - * @param size (IN/OUT) Number of bytes to prepare (IN), number of bytes actually prepared (OUT) + * @param size (IN/OUT) Number of bytes to prepare (IN), number of bytes actually prepared (OUT) */ mca_btl_base_descriptor_t* mca_btl_template_prepare_src( diff --git a/opal/mca/btl/template/btl_template_component.c b/opal/mca/btl/template/btl_template_component.c index 53f39383a3f..9bb2bad342a 100644 --- a/opal/mca/btl/template/btl_template_component.c +++ b/opal/mca/btl/template/btl_template_component.c @@ -6,16 +6,16 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2014 Los Alamos National Security, LLC. All rights * reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -24,12 +24,12 @@ #include "opal/constants.h" #include "opal/mca/event/event.h" #include "opal/mca/btl/btl.h" -#include "opal/mca/mpool/base/base.h" -#include "opal/mca/btl/base/base.h" +#include "opal/mca/mpool/base/base.h" +#include "opal/mca/btl/base/base.h" #include "btl_template.h" #include "btl_template_frag.h" -#include "btl_template_endpoint.h" +#include "btl_template_endpoint.h" /** * Register any MCA parameters associated with this component @@ -74,12 +74,12 @@ static int mca_btl_template_component_open(void) } static int mca_btl_template_component_register(void) -{ +{ /* initialize state */ mca_btl_template_component.template_num_btls=0; mca_btl_template_component.template_btls=NULL; - - /* initialize objects */ + + /* initialize objects */ OBJ_CONSTRUCT(&mca_btl_template_component.template_procs, opal_list_t); /* register TEMPLATE component parameters */ @@ -137,7 +137,7 @@ static int mca_btl_template_component_close(void) * (3) register BTL parameters with the MCA */ -mca_btl_base_module_t** mca_btl_template_component_init(int *num_btl_modules, +mca_btl_base_module_t** mca_btl_template_component_init(int *num_btl_modules, bool enable_progress_threads, bool enable_mpi_threads) { diff --git a/opal/mca/btl/template/btl_template_endpoint.c b/opal/mca/btl/template/btl_template_endpoint.c index b9795b9dbff..71d205de07e 100644 --- a/opal/mca/btl/template/btl_template_endpoint.c +++ b/opal/mca/btl/template/btl_template_endpoint.c @@ -5,14 +5,14 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -21,7 +21,7 @@ #include #include #include "btl_template.h" -#include "btl_template_endpoint.h" +#include "btl_template_endpoint.h" #include "btl_template_proc.h" #include "btl_template_frag.h" @@ -48,8 +48,8 @@ static void mca_btl_template_endpoint_destruct(mca_btl_base_endpoint_t* endpoint OBJ_CLASS_INSTANCE( - mca_btl_template_endpoint_t, - opal_list_item_t, - mca_btl_template_endpoint_construct, + mca_btl_template_endpoint_t, + opal_list_item_t, + mca_btl_template_endpoint_construct, mca_btl_template_endpoint_destruct); diff --git a/opal/mca/btl/template/btl_template_endpoint.h b/opal/mca/btl/template/btl_template_endpoint.h index 54fa21ac0d8..9e497a46cef 100644 --- a/opal/mca/btl/template/btl_template_endpoint.h +++ b/opal/mca/btl/template/btl_template_endpoint.h @@ -5,14 +5,14 @@ * Copyright (c) 2004-2006 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ diff --git a/opal/mca/btl/template/btl_template_frag.c b/opal/mca/btl/template/btl_template_frag.c index eef57b93c63..b22d38138ce 100644 --- a/opal/mca/btl/template/btl_template_frag.c +++ b/opal/mca/btl/template/btl_template_frag.c @@ -1,47 +1,47 @@ -#include "btl_template_frag.h" +#include "btl_template_frag.h" -static void mca_btl_template_frag_eager_constructor(mca_btl_template_frag_t* frag) -{ +static void mca_btl_template_frag_eager_constructor(mca_btl_template_frag_t* frag) +{ frag->registration = NULL; - frag->size = mca_btl_template_module.super.btl_eager_limit; + frag->size = mca_btl_template_module.super.btl_eager_limit; } -static void mca_btl_template_frag_max_constructor(mca_btl_template_frag_t* frag) -{ +static void mca_btl_template_frag_max_constructor(mca_btl_template_frag_t* frag) +{ frag->registration = NULL; - frag->size = mca_btl_template_module.super.btl_max_send_size; + frag->size = mca_btl_template_module.super.btl_max_send_size; } -static void mca_btl_template_frag_user_constructor(mca_btl_template_frag_t* frag) -{ - frag->size = 0; +static void mca_btl_template_frag_user_constructor(mca_btl_template_frag_t* frag) +{ + frag->size = 0; frag->registration = NULL; } OBJ_CLASS_INSTANCE( - mca_btl_template_frag_t, - mca_btl_base_descriptor_t, - NULL, - NULL); + mca_btl_template_frag_t, + mca_btl_base_descriptor_t, + NULL, + NULL); OBJ_CLASS_INSTANCE( - mca_btl_template_frag_eager_t, - mca_btl_base_descriptor_t, - mca_btl_template_frag_eager_constructor, - NULL); + mca_btl_template_frag_eager_t, + mca_btl_base_descriptor_t, + mca_btl_template_frag_eager_constructor, + NULL); OBJ_CLASS_INSTANCE( - mca_btl_template_frag_max_t, - mca_btl_base_descriptor_t, - mca_btl_template_frag_max_constructor, - NULL); + mca_btl_template_frag_max_t, + mca_btl_base_descriptor_t, + mca_btl_template_frag_max_constructor, + NULL); OBJ_CLASS_INSTANCE( - mca_btl_template_frag_user_t, - mca_btl_base_descriptor_t, - mca_btl_template_frag_user_constructor, - NULL); + mca_btl_template_frag_user_t, + mca_btl_base_descriptor_t, + mca_btl_template_frag_user_constructor, + NULL); diff --git a/opal/mca/btl/template/btl_template_frag.h b/opal/mca/btl/template/btl_template_frag.h index d09e23b5335..aab1f1502ca 100644 --- a/opal/mca/btl/template/btl_template_frag.h +++ b/opal/mca/btl/template/btl_template_frag.h @@ -6,16 +6,16 @@ * Copyright (c) 2004-2013 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2015 Los Alamos National Security, LLC. All rights * reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -25,7 +25,7 @@ #define MCA_BTL_TEMPLATE_FRAG_ALIGN (8) #include "opal_config.h" -#include "btl_template.h" +#include "btl_template.h" BEGIN_C_DECLS @@ -33,29 +33,29 @@ BEGIN_C_DECLS * TEMPLATE send fratemplateent derived type. */ struct mca_btl_template_frag_t { - mca_btl_base_descriptor_t base; - mca_btl_base_segment_t segment; - struct mca_btl_base_endpoint_t *endpoint; + mca_btl_base_descriptor_t base; + mca_btl_base_segment_t segment; + struct mca_btl_base_endpoint_t *endpoint; mca_btl_base_header_t *hdr; - size_t size; + size_t size; #if MCA_BTL_HAS_MPOOL struct mca_mpool_base_registration_t* registration; #endif -}; -typedef struct mca_btl_template_frag_t mca_btl_template_frag_t; -OBJ_CLASS_DECLARATION(mca_btl_template_frag_t); +}; +typedef struct mca_btl_template_frag_t mca_btl_template_frag_t; +OBJ_CLASS_DECLARATION(mca_btl_template_frag_t); -typedef struct mca_btl_template_frag_t mca_btl_template_frag_eager_t; - -OBJ_CLASS_DECLARATION(mca_btl_template_frag_eager_t); +typedef struct mca_btl_template_frag_t mca_btl_template_frag_eager_t; -typedef struct mca_btl_template_frag_t mca_btl_template_frag_max_t; - -OBJ_CLASS_DECLARATION(mca_btl_template_frag_max_t); +OBJ_CLASS_DECLARATION(mca_btl_template_frag_eager_t); -typedef struct mca_btl_template_frag_t mca_btl_template_frag_user_t; - -OBJ_CLASS_DECLARATION(mca_btl_template_frag_user_t); +typedef struct mca_btl_template_frag_t mca_btl_template_frag_max_t; + +OBJ_CLASS_DECLARATION(mca_btl_template_frag_max_t); + +typedef struct mca_btl_template_frag_t mca_btl_template_frag_user_t; + +OBJ_CLASS_DECLARATION(mca_btl_template_frag_user_t); /* diff --git a/opal/mca/btl/template/btl_template_proc.c b/opal/mca/btl/template/btl_template_proc.c index a08c7036d2f..76f6f52f677 100644 --- a/opal/mca/btl/template/btl_template_proc.c +++ b/opal/mca/btl/template/btl_template_proc.c @@ -5,14 +5,14 @@ * Copyright (c) 2004-2011 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -25,8 +25,8 @@ static void mca_btl_template_proc_construct(mca_btl_template_proc_t* proc); static void mca_btl_template_proc_destruct(mca_btl_template_proc_t* proc); -OBJ_CLASS_INSTANCE(mca_btl_template_proc_t, - opal_list_item_t, mca_btl_template_proc_construct, +OBJ_CLASS_INSTANCE(mca_btl_template_proc_t, + opal_list_item_t, mca_btl_template_proc_construct, mca_btl_template_proc_destruct); void mca_btl_template_proc_construct(mca_btl_template_proc_t* template_proc) @@ -93,7 +93,7 @@ static mca_btl_template_proc_t* mca_btl_template_proc_lookup_opal(opal_proc_t* o /* * Create a TEMPLATE process structure. There is a one-to-one correspondence * between a opal_proc_t and a mca_btl_template_proc_t instance. We cache - * additional data (specifically the list of mca_btl_template_endpoint_t instances, + * additional data (specifically the list of mca_btl_template_endpoint_t instances, * and published addresses) associated w/ a given destination on this * datastructure. */ @@ -127,7 +127,7 @@ mca_btl_template_proc_t* mca_btl_template_proc_create(opal_proc_t* opal_proc) module_proc->proc_addr_count = 1; /* XXX: Right now, there can be only 1 peer associated - * with a proc. Needs a little bit change in + * with a proc. Needs a little bit change in * mca_btl_template_proc_t to allow on demand increasing of * number of endpoints for this proc */ @@ -144,10 +144,10 @@ mca_btl_template_proc_t* mca_btl_template_proc_create(opal_proc_t* opal_proc) /* * Note that this routine must be called with the lock on the process - * already held. Insert a btl instance into the proc array and assign + * already held. Insert a btl instance into the proc array and assign * it an address. */ -int mca_btl_template_proc_insert(mca_btl_template_proc_t* module_proc, +int mca_btl_template_proc_insert(mca_btl_template_proc_t* module_proc, mca_btl_base_endpoint_t* module_endpoint) { /* insert into endpoint array */ diff --git a/opal/mca/btl/template/btl_template_proc.h b/opal/mca/btl/template/btl_template_proc.h index 09e9afded8b..5e19d8400c8 100644 --- a/opal/mca/btl/template/btl_template_proc.h +++ b/opal/mca/btl/template/btl_template_proc.h @@ -5,14 +5,14 @@ * Copyright (c) 2004-2011 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -33,22 +33,22 @@ BEGIN_C_DECLS * BTL instance that attempts to open a connection to the process. */ struct mca_btl_template_proc_t { - opal_list_item_t super; + opal_list_item_t super; /**< allow proc to be placed on a list */ - opal_proc_t *proc_opal; + opal_proc_t *proc_opal; /**< pointer to corresponding opal_proc_t */ - size_t proc_addr_count; + size_t proc_addr_count; /**< number of addresses published by endpoint */ - struct mca_btl_base_endpoint_t **proc_endpoints; - /**< array of endpoints that have been created to access this proc */ + struct mca_btl_base_endpoint_t **proc_endpoints; + /**< array of endpoints that have been created to access this proc */ - size_t proc_endpoint_count; + size_t proc_endpoint_count; /**< number of endpoints */ - opal_mutex_t proc_lock; + opal_mutex_t proc_lock; /**< lock to protect against concurrent access to proc state */ }; typedef struct mca_btl_template_proc_t mca_btl_template_proc_t; diff --git a/opal/mca/btl/template/netpipe-btl-template.txt b/opal/mca/btl/template/netpipe-btl-template.txt index 8550098d914..2581918fdc2 100644 --- a/opal/mca/btl/template/netpipe-btl-template.txt +++ b/opal/mca/btl/template/netpipe-btl-template.txt @@ -1,12 +1,12 @@ # -# For your btl you may want special mca parameters for a benchmark -# such as netpipe, or better yet, your application, one can always +# For your btl you may want special mca parameters for a benchmark +# such as netpipe, or better yet, your application, one can always # dream. # # Example: -# btl_template_flags=1 -# btl_template_eager_limit=4096 +# btl_template_flags=1 +# btl_template_eager_limit=4096 diff --git a/opal/mca/btl/ugni/Makefile.am b/opal/mca/btl/ugni/Makefile.am index cff4f734a47..371b83e2cf0 100644 --- a/opal/mca/btl/ugni/Makefile.am +++ b/opal/mca/btl/ugni/Makefile.am @@ -3,9 +3,9 @@ # Copyright (c) 2011-2012 Los Alamos National Security, LLC. All rights # reserved. # Copyright (c) 2011 UT-Battelle, LLC. All rights reserved. -# +# # Additional copyrights may follow -# +# # $HEADER$ # diff --git a/opal/mca/btl/ugni/btl_ugni.h b/opal/mca/btl/ugni/btl_ugni.h index d6ea8a68114..bc9c6b67b50 100644 --- a/opal/mca/btl/ugni/btl_ugni.h +++ b/opal/mca/btl/ugni/btl_ugni.h @@ -25,7 +25,8 @@ #include "opal/mca/mpool/mpool.h" #include "opal/mca/mpool/base/base.h" -#include "opal/mca/mpool/udreg/mpool_udreg.h" +#include "opal/mca/rcache/base/base.h" +#include "opal/mca/rcache/udreg/rcache_udreg.h" #include "opal/util/output.h" #include "opal_stdint.h" #include "opal/mca/btl/btl.h" @@ -49,15 +50,15 @@ /* ompi and smsg endpoint attributes */ typedef struct mca_btl_ugni_endpoint_attr_t { - uint64_t proc_id; + opal_process_name_t proc_name; uint32_t index; gni_smsg_attr_t smsg_attr; gni_mem_handle_t rmt_irq_mem_hndl; } mca_btl_ugni_endpoint_attr_t; enum { - MCA_BTL_UGNI_MPOOL_UDREG, - MCA_BTL_UGNI_MPOOL_GRDMA + MCA_BTL_UGNI_RCACHE_UDREG, + MCA_BTL_UGNI_RCACHE_GRDMA }; typedef struct mca_btl_ugni_module_t { @@ -67,6 +68,7 @@ typedef struct mca_btl_ugni_module_t { opal_common_ugni_device_t *device; + opal_mutex_t endpoint_lock; size_t endpoint_count; opal_pointer_array_t endpoints; opal_hash_table_t id_to_endpoint; @@ -85,7 +87,7 @@ typedef struct mca_btl_ugni_module_t { opal_free_list_t post_descriptors; - mca_mpool_base_module_t *smsg_mpool; + mca_mpool_base_module_t *mpool; opal_free_list_t smsg_mboxes; gni_ep_handle_t wildcard_ep; @@ -127,6 +129,8 @@ typedef struct mca_btl_ugni_module_t { int nlocal_procs; volatile int active_send_count; + + mca_rcache_base_module_t *rcache; } mca_btl_ugni_module_t; typedef struct mca_btl_ugni_component_t { @@ -176,8 +180,11 @@ typedef struct mca_btl_ugni_component_t { /* Page size to use for SMSG allocations (udreg mpool) */ unsigned int smsg_page_size; - /* mpool type (grdma or udreg) */ - int mpool_type; + /* rcache type (grdma or udreg) */ + int rcache_type; + + /* memory pool hints */ + char *mpool_hints; /* Number of mailboxes to allocate in each block */ unsigned int mbox_increment; @@ -194,7 +201,7 @@ int mca_btl_ugni_module_init (mca_btl_ugni_module_t *ugni_module, opal_common_ugni_device_t *device); /** - * BML->BTL notification of change in the process list. + * BML->BTL notification of change in the process list. * * location: btl_ugni_add_procs.c * @@ -229,6 +236,8 @@ mca_btl_ugni_del_procs (struct mca_btl_base_module_t *btl, struct opal_proc_t **procs, struct mca_btl_base_endpoint_t **peers); +struct mca_btl_base_endpoint_t *mca_btl_ugni_get_ep (struct mca_btl_base_module_t *module, opal_proc_t *proc); + /** * Initiate an asynchronous send. * @@ -246,7 +255,7 @@ mca_btl_ugni_send (struct mca_btl_base_module_t *btl, mca_btl_base_tag_t tag); /** - * Initiate an immediate blocking send. + * Initiate an immediate blocking send. * * location: btl_ugni_sendi.c * @@ -309,11 +318,11 @@ struct mca_btl_base_registration_handle_t { }; typedef struct mca_btl_ugni_reg_t { - mca_mpool_base_registration_t base; + mca_rcache_base_registration_t base; mca_btl_base_registration_handle_t handle; } mca_btl_ugni_reg_t; -/* Global structures */ +/* Global structures */ OPAL_MODULE_DECLSPEC extern mca_btl_ugni_component_t mca_btl_ugni_component; OPAL_MODULE_DECLSPEC extern mca_btl_ugni_module_t mca_btl_ugni_module; diff --git a/opal/mca/btl/ugni/btl_ugni_add_procs.c b/opal/mca/btl/ugni/btl_ugni_add_procs.c index b7e5d1f8280..0bd1b0005b0 100644 --- a/opal/mca/btl/ugni/btl_ugni_add_procs.c +++ b/opal/mca/btl/ugni/btl_ugni_add_procs.c @@ -3,7 +3,7 @@ * Copyright (c) 2011-2015 Los Alamos National Security, LLC. All rights * reserved. * Copyright (c) 2011 UT-Battelle, LLC. All rights reserved. - * Copyright (c) 2014 Intel, Inc. All rights reserved. + * Copyright (c) 2014-2015 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -18,7 +18,7 @@ #include "btl_ugni_smsg.h" #include "opal/include/opal/align.h" -#include "opal/mca/dstore/dstore.h" +#include "opal/mca/pmix/pmix.h" #define INITIAL_GNI_EPS 10000 @@ -28,13 +28,11 @@ static void mca_btl_ugni_module_set_max_reg (mca_btl_ugni_module_t *ugni_module, int nlocal_procs); static int mca_btl_ugni_smsg_setup (int nprocs); -int mca_btl_ugni_add_procs(struct mca_btl_base_module_t* btl, - size_t nprocs, - struct opal_proc_t **procs, - struct mca_btl_base_endpoint_t **peers, - opal_bitmap_t *reachable) { +int mca_btl_ugni_add_procs (struct mca_btl_base_module_t* btl, size_t nprocs, + struct opal_proc_t **procs, + struct mca_btl_base_endpoint_t **peers, + opal_bitmap_t *reachable) { mca_btl_ugni_module_t *ugni_module = (mca_btl_ugni_module_t *) btl; - size_t i; int rc; void *mmap_start_addr; @@ -59,36 +57,45 @@ int mca_btl_ugni_add_procs(struct mca_btl_base_module_t* btl, } } - for (i = 0 ; i < nprocs ; ++i) { + for (size_t i = 0 ; i < nprocs ; ++i) { struct opal_proc_t *opal_proc = procs[i]; uint64_t proc_id = mca_btl_ugni_proc_name_to_id(opal_proc->proc_name); - if (OPAL_PROC_ON_LOCAL_NODE(opal_proc->proc_flags)) { - ugni_module->nlocal_procs++; + /* check for an existing endpoint */ + OPAL_THREAD_LOCK(&ugni_module->endpoint_lock); + if (OPAL_SUCCESS != opal_hash_table_get_value_uint64 (&ugni_module->id_to_endpoint, proc_id, (void **) (peers + i))) { + if (OPAL_PROC_ON_LOCAL_NODE(opal_proc->proc_flags)) { + ugni_module->nlocal_procs++; - /* ugni is allowed on local processes to provide support for network - * atomic operations */ - } + /* ugni is allowed on local processes to provide support for network + * atomic operations */ + } - /* Create and Init endpoints */ - rc = mca_btl_ugni_init_ep (ugni_module, peers + i, (mca_btl_ugni_module_t *) btl, opal_proc); - if (OPAL_UNLIKELY(OPAL_SUCCESS != rc)) { - BTL_ERROR(("btl/ugni error initializing endpoint")); - return rc; - } + /* Create and Init endpoints */ + rc = mca_btl_ugni_init_ep (ugni_module, peers + i, (mca_btl_ugni_module_t *) btl, opal_proc); + if (OPAL_UNLIKELY(OPAL_SUCCESS != rc)) { + OPAL_THREAD_UNLOCK(&ugni_module->endpoint_lock); + BTL_ERROR(("btl/ugni error initializing endpoint")); + return rc; + } - /* go ahead and connect the local endpoint for RDMA/CQ write */ - if (opal_proc == opal_proc_local_get ()) { - ugni_module->local_ep = peers[i]; - } + /* go ahead and connect the local endpoint for RDMA/CQ write */ + if (opal_proc == opal_proc_local_get ()) { + ugni_module->local_ep = peers[i]; + } - /* Add this endpoint to the pointer array. */ - BTL_VERBOSE(("initialized uGNI endpoint for proc id: 0x%" PRIx64 " ptr: %p", proc_id, (void *) peers[i])); - opal_hash_table_set_value_uint64 (&ugni_module->id_to_endpoint, proc_id, peers[i]); + /* Add this endpoint to the pointer array. */ + BTL_VERBOSE(("initialized uGNI endpoint for proc id: 0x%" PRIx64 " ptr: %p", proc_id, (void *) peers[i])); + opal_hash_table_set_value_uint64 (&ugni_module->id_to_endpoint, proc_id, peers[i]); - /* Set the reachable bit */ - rc = opal_bitmap_set_bit (reachable, i); - ++ugni_module->endpoint_count; + ++ugni_module->endpoint_count; + } + OPAL_THREAD_UNLOCK(&ugni_module->endpoint_lock); + + /* Set the reachable bit if necessary */ + if (reachable) { + rc = opal_bitmap_set_bit (reachable, i); + } } mca_btl_ugni_module_set_max_reg (ugni_module, ugni_module->nlocal_procs); @@ -224,21 +231,71 @@ int mca_btl_ugni_del_procs (struct mca_btl_base_module_t *btl, return OPAL_SUCCESS; } -static int ugni_reg_rdma_mem (void *reg_data, void *base, size_t size, - mca_mpool_base_registration_t *reg) + +struct mca_btl_base_endpoint_t *mca_btl_ugni_get_ep (struct mca_btl_base_module_t *module, opal_proc_t *proc) +{ + mca_btl_ugni_module_t *ugni_module = (mca_btl_ugni_module_t *) module; + uint64_t proc_id = mca_btl_ugni_proc_name_to_id(proc->proc_name); + mca_btl_base_endpoint_t *ep; + int rc; + + OPAL_THREAD_LOCK(&ugni_module->endpoint_lock); + + do { + rc = opal_hash_table_get_value_uint64 (&ugni_module->id_to_endpoint, proc_id, (void **) &ep); + if (OPAL_SUCCESS == rc) { + break; + } + + /* Create and Init endpoints */ + rc = mca_btl_ugni_init_ep (ugni_module, &ep, ugni_module, proc); + if (OPAL_UNLIKELY(OPAL_SUCCESS != rc)) { + BTL_ERROR(("btl/ugni error initializing endpoint")); + break; + } + + /* Add this endpoint to the pointer array. */ + BTL_VERBOSE(("initialized uGNI endpoint for proc id: 0x%" PRIx64 " ptr: %p", proc_id, (void *) ep)); + opal_hash_table_set_value_uint64 (&ugni_module->id_to_endpoint, proc_id, ep); + } while (0); + + OPAL_THREAD_UNLOCK(&ugni_module->endpoint_lock); + + return ep; +} + + +static int ugni_reg_mem (void *reg_data, void *base, size_t size, + mca_rcache_base_registration_t *reg) { mca_btl_ugni_module_t *ugni_module = (mca_btl_ugni_module_t *) reg_data; mca_btl_ugni_reg_t *ugni_reg = (mca_btl_ugni_reg_t *) reg; + gni_cq_handle_t cq = NULL; gni_return_t rc; + int flags; if (ugni_module->reg_count >= ugni_module->reg_max) { return OPAL_ERR_OUT_OF_RESOURCE; } - + + if (reg->access_flags & (MCA_RCACHE_ACCESS_REMOTE_WRITE | MCA_RCACHE_ACCESS_LOCAL_WRITE | + MCA_RCACHE_ACCESS_REMOTE_ATOMIC)) { + flags = GNI_MEM_READWRITE; + } else { + flags = GNI_MEM_READ_ONLY; + } + + if (!(reg->flags & MCA_RCACHE_FLAGS_SO_MEM)) { + flags |= GNI_MEM_RELAXED_PI_ORDERING; + } + + if (reg->flags & MCA_RCACHE_FLAGS_RESV0) { + cq = ugni_module->smsg_remote_cq; + } + OPAL_THREAD_LOCK(&ugni_module->device->dev_lock); rc = GNI_MemRegister (ugni_module->device->dev_handle, (uint64_t) base, - size, NULL, GNI_MEM_READWRITE | GNI_MEM_RELAXED_PI_ORDERING, - -1, &(ugni_reg->handle.gni_handle)); + size, cq, flags, -1, &(ugni_reg->handle.gni_handle)); OPAL_THREAD_UNLOCK(&ugni_module->device->dev_lock); if (OPAL_UNLIKELY(GNI_RC_SUCCESS != rc)) { @@ -250,24 +307,8 @@ static int ugni_reg_rdma_mem (void *reg_data, void *base, size_t size, return OPAL_SUCCESS; } - -static int ugni_reg_smsg_mem (void *reg_data, void *base, size_t size, - mca_mpool_base_registration_t *reg) -{ - mca_btl_ugni_module_t *ugni_module = (mca_btl_ugni_module_t *) reg_data; - mca_btl_ugni_reg_t *ugni_reg = (mca_btl_ugni_reg_t *) reg; - gni_return_t rc; - - OPAL_THREAD_LOCK(&ugni_module->device->dev_lock); - rc = GNI_MemRegister (ugni_module->device->dev_handle, (uint64_t) base, - size, ugni_module->smsg_remote_cq, GNI_MEM_READWRITE, -1, - &(ugni_reg->handle.gni_handle)); - OPAL_THREAD_UNLOCK(&ugni_module->device->dev_lock); - return opal_common_rc_ugni_to_opal (rc); -} - static int -ugni_dereg_mem (void *reg_data, mca_mpool_base_registration_t *reg) +ugni_dereg_mem (void *reg_data, mca_rcache_base_registration_t *reg) { mca_btl_ugni_module_t *ugni_module = (mca_btl_ugni_module_t *) reg_data; mca_btl_ugni_reg_t *ugni_reg = (mca_btl_ugni_reg_t *)reg; @@ -288,12 +329,11 @@ ugni_dereg_mem (void *reg_data, mca_mpool_base_registration_t *reg) static int mca_btl_ugni_setup_mpools (mca_btl_ugni_module_t *ugni_module) { - struct mca_mpool_base_resources_t mpool_resources; - unsigned int mbox_increment, nprocs; - const char *mpool_name; + mca_rcache_udreg_resources_t rcache_resources; + unsigned int mbox_increment; + uint32_t nprocs, *u32; + char *rcache_name; int rc; - opal_list_t vals; - opal_value_t *kv; rc = opal_pointer_array_init (&ugni_module->pending_smsg_frags_bb, 0, 1 << 30, 32768); @@ -302,16 +342,13 @@ mca_btl_ugni_setup_mpools (mca_btl_ugni_module_t *ugni_module) } /* determine how many procs are in the job (might want to check universe size here) */ - OBJ_CONSTRUCT(&vals, opal_list_t); - if (OPAL_SUCCESS == opal_dstore.fetch(opal_dstore_internal, &OPAL_PROC_MY_NAME, - OPAL_DSTORE_UNIV_SIZE, &vals)) { - /* the number of procs in the job is in the uint32 field */ - kv = (opal_value_t*)opal_list_get_first(&vals); - nprocs = kv->data.uint32; - } else { + u32 = &nprocs; + OPAL_MODEX_RECV_VALUE(rc, OPAL_PMIX_UNIV_SIZE, &OPAL_PROC_MY_NAME, + &u32, OPAL_UINT32); + if (OPAL_SUCCESS != rc) { + /* take a wild conservative guess */ nprocs = 512; } - OPAL_LIST_DESTRUCT(&vals); rc = mca_btl_ugni_smsg_setup (nprocs); if (OPAL_UNLIKELY(OPAL_SUCCESS != rc)) { @@ -357,43 +394,35 @@ mca_btl_ugni_setup_mpools (mca_btl_ugni_module_t *ugni_module) return rc; } - mpool_resources.pool_name = "ompi.ugni"; - mpool_resources.reg_data = (void *) ugni_module; - mpool_resources.sizeof_reg = sizeof (mca_btl_ugni_reg_t); - mpool_resources.register_mem = ugni_reg_rdma_mem; - mpool_resources.deregister_mem = ugni_dereg_mem; + ugni_module->super.btl_mpool = mca_mpool_base_module_lookup (mca_btl_ugni_component.mpool_hints); + if (NULL == ugni_module->super.btl_mpool) { + BTL_ERROR(("could not find mpool matching hints %s", mca_btl_ugni_component.mpool_hints)); + return OPAL_ERROR; + } - if (MCA_BTL_UGNI_MPOOL_UDREG == mca_btl_ugni_component.mpool_type) { + rcache_resources.base.cache_name = "ompi.ugni"; + rcache_resources.base.reg_data = (void *) ugni_module; + rcache_resources.base.sizeof_reg = sizeof (mca_btl_ugni_reg_t); + rcache_resources.base.register_mem = ugni_reg_mem; + rcache_resources.base.deregister_mem = ugni_dereg_mem; + + if (MCA_BTL_UGNI_RCACHE_UDREG == mca_btl_ugni_component.rcache_type) { /* additional settings for the udreg mpool */ /* 4k should be large enough for any Gemini/Ares system */ - mpool_resources.max_entries = 4096; - mpool_resources.use_kernel_cache = true; - - /* request a specific page size. this request may not be honored if the - * page size does not exist. */ - mpool_resources.page_size = mca_btl_ugni_component.smsg_page_size; + rcache_resources.max_entries = 4096; + rcache_resources.use_kernel_cache = true; - mpool_resources.use_evict_w_unreg = false; - mpool_name = "udreg"; + rcache_resources.use_evict_w_unreg = false; + rcache_name = "udreg"; } else { - mpool_name = "grdma"; + rcache_name = "grdma"; } - ugni_module->super.btl_mpool = - mca_mpool_base_module_create(mpool_name, ugni_module->device, &mpool_resources); - - mpool_resources.register_mem = ugni_reg_smsg_mem; - - ugni_module->smsg_mpool = - mca_mpool_base_module_create(mpool_name, ugni_module->device, &mpool_resources); - - if (NULL == ugni_module->super.btl_mpool) { - BTL_ERROR(("error creating rdma mpool")); - return OPAL_ERROR; - } + ugni_module->rcache = + mca_rcache_base_module_create (rcache_name, ugni_module->device, &rcache_resources.base); - if (NULL == ugni_module->smsg_mpool) { - BTL_ERROR(("error creating smsg mpool")); + if (NULL == ugni_module->rcache) { + BTL_ERROR(("error creating registration cache")); return OPAL_ERROR; } @@ -404,7 +433,7 @@ mca_btl_ugni_setup_mpools (mca_btl_ugni_module_t *ugni_module) mca_btl_ugni_component.ugni_eager_num, mca_btl_ugni_component.ugni_eager_max, mca_btl_ugni_component.ugni_eager_inc, - ugni_module->super.btl_mpool, 0, NULL, + ugni_module->super.btl_mpool, 0, ugni_module->rcache, (opal_free_list_item_init_fn_t) mca_btl_ugni_frag_init, (void *) ugni_module); if (OPAL_UNLIKELY(OPAL_SUCCESS != rc)) { @@ -419,7 +448,7 @@ mca_btl_ugni_setup_mpools (mca_btl_ugni_module_t *ugni_module) mca_btl_ugni_component.ugni_eager_num, mca_btl_ugni_component.ugni_eager_max, mca_btl_ugni_component.ugni_eager_inc, - ugni_module->super.btl_mpool, 0, NULL, + ugni_module->super.btl_mpool, 0, ugni_module->rcache, (opal_free_list_item_init_fn_t) mca_btl_ugni_frag_init, (void *) ugni_module); if (OPAL_UNLIKELY(OPAL_SUCCESS != rc)) { @@ -440,12 +469,14 @@ mca_btl_ugni_setup_mpools (mca_btl_ugni_module_t *ugni_module) mbox_increment = mca_btl_ugni_component.mbox_increment; } + /* use the MCA_RCACHE_FLAGS_RESV0 to signal this is smsg memory */ rc = opal_free_list_init (&ugni_module->smsg_mboxes, sizeof (mca_btl_ugni_smsg_mbox_t), 8, OBJ_CLASS(mca_btl_ugni_smsg_mbox_t), mca_btl_ugni_component.smsg_mbox_size, 128, - 32, -1, mbox_increment, ugni_module->smsg_mpool, - 0, NULL, NULL, NULL); + 32, -1, mbox_increment, ugni_module->super.btl_mpool, + MCA_RCACHE_FLAGS_SO_MEM | MCA_RCACHE_FLAGS_RESV0, + ugni_module->rcache, NULL, NULL); if (OPAL_UNLIKELY(OPAL_SUCCESS != rc)) { BTL_ERROR(("error creating smsg mailbox free list")); return rc; diff --git a/opal/mca/btl/ugni/btl_ugni_atomic.c b/opal/mca/btl/ugni/btl_ugni_atomic.c index 981bc759ee9..3c62670da89 100644 --- a/opal/mca/btl/ugni/btl_ugni_atomic.c +++ b/opal/mca/btl/ugni/btl_ugni_atomic.c @@ -1,6 +1,6 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ /* - * Copyright (c) 2014 Los Alamos National Security, LLC. All rights + * Copyright (c) 2014-2016 Los Alamos National Security, LLC. All rights * reserved. * $COPYRIGHT$ * @@ -11,18 +11,66 @@ #include "btl_ugni_rdma.h" -static gni_fma_cmd_type_t famo_cmds[] = { - [MCA_BTL_ATOMIC_ADD] = GNI_FMA_ATOMIC_FADD, - [MCA_BTL_ATOMIC_AND] = GNI_FMA_ATOMIC_FAND, - [MCA_BTL_ATOMIC_OR] = GNI_FMA_ATOMIC_FOR, - [MCA_BTL_ATOMIC_XOR] = GNI_FMA_ATOMIC_FXOR, +static gni_fma_cmd_type_t amo_cmds[][MCA_BTL_ATOMIC_LAST] = { + [OPAL_INT32] = { + [MCA_BTL_ATOMIC_ADD] = GNI_FMA_ATOMIC2_IADD_S, + [MCA_BTL_ATOMIC_LAND] = GNI_FMA_ATOMIC2_AND_S, + [MCA_BTL_ATOMIC_LOR] = GNI_FMA_ATOMIC2_OR_S, + [MCA_BTL_ATOMIC_LXOR] = GNI_FMA_ATOMIC2_XOR_S, + [MCA_BTL_ATOMIC_SWAP] = GNI_FMA_ATOMIC2_SWAP_S, + [MCA_BTL_ATOMIC_MIN] = GNI_FMA_ATOMIC2_IMIN_S, + [MCA_BTL_ATOMIC_MAX] = GNI_FMA_ATOMIC2_IMAX_S, + }, + [OPAL_INT64] = { + [MCA_BTL_ATOMIC_ADD] = GNI_FMA_ATOMIC_ADD, + [MCA_BTL_ATOMIC_AND] = GNI_FMA_ATOMIC_AND, + [MCA_BTL_ATOMIC_OR] = GNI_FMA_ATOMIC_OR, + [MCA_BTL_ATOMIC_XOR] = GNI_FMA_ATOMIC_XOR, + [MCA_BTL_ATOMIC_SWAP] = GNI_FMA_ATOMIC2_SWAP, + [MCA_BTL_ATOMIC_MIN] = GNI_FMA_ATOMIC2_IMIN, + [MCA_BTL_ATOMIC_MAX] = GNI_FMA_ATOMIC2_IMAX, + }, + [OPAL_FLOAT] = { + [MCA_BTL_ATOMIC_ADD] = GNI_FMA_ATOMIC2_FPADD_S, + [MCA_BTL_ATOMIC_MIN] = GNI_FMA_ATOMIC2_FPMIN_S, + [MCA_BTL_ATOMIC_MAX] = GNI_FMA_ATOMIC2_FPMAX_S, + }, + [OPAL_DOUBLE] = { + [MCA_BTL_ATOMIC_ADD] = GNI_FMA_ATOMIC2_FPADD, + [MCA_BTL_ATOMIC_MIN] = GNI_FMA_ATOMIC2_FPMIN, + [MCA_BTL_ATOMIC_MAX] = GNI_FMA_ATOMIC2_FPMAX, + }, }; -static gni_fma_cmd_type_t amo_cmds[] = { - [MCA_BTL_ATOMIC_ADD] = GNI_FMA_ATOMIC_ADD, - [MCA_BTL_ATOMIC_AND] = GNI_FMA_ATOMIC_AND, - [MCA_BTL_ATOMIC_OR] = GNI_FMA_ATOMIC_OR, - [MCA_BTL_ATOMIC_XOR] = GNI_FMA_ATOMIC_XOR, +static gni_fma_cmd_type_t famo_cmds[][MCA_BTL_ATOMIC_LAST] = { + [OPAL_INT32] = { + [MCA_BTL_ATOMIC_ADD] = GNI_FMA_ATOMIC2_FIADD_S, + [MCA_BTL_ATOMIC_LAND] = GNI_FMA_ATOMIC2_FAND_S, + [MCA_BTL_ATOMIC_LOR] = GNI_FMA_ATOMIC2_FOR_S, + [MCA_BTL_ATOMIC_LXOR] = GNI_FMA_ATOMIC2_FXOR_S, + [MCA_BTL_ATOMIC_SWAP] = GNI_FMA_ATOMIC2_FSWAP_S, + [MCA_BTL_ATOMIC_MIN] = GNI_FMA_ATOMIC2_FIMIN_S, + [MCA_BTL_ATOMIC_MAX] = GNI_FMA_ATOMIC2_FIMAX_S, + }, + [OPAL_INT64] = { + [MCA_BTL_ATOMIC_ADD] = GNI_FMA_ATOMIC_FADD, + [MCA_BTL_ATOMIC_AND] = GNI_FMA_ATOMIC_FAND, + [MCA_BTL_ATOMIC_OR] = GNI_FMA_ATOMIC_FOR, + [MCA_BTL_ATOMIC_XOR] = GNI_FMA_ATOMIC_FXOR, + [MCA_BTL_ATOMIC_SWAP] = GNI_FMA_ATOMIC2_FSWAP, + [MCA_BTL_ATOMIC_MIN] = GNI_FMA_ATOMIC2_FIMIN, + [MCA_BTL_ATOMIC_MAX] = GNI_FMA_ATOMIC2_FIMAX, + }, + [OPAL_FLOAT] = { + [MCA_BTL_ATOMIC_ADD] = GNI_FMA_ATOMIC2_FFPADD_S, + [MCA_BTL_ATOMIC_MIN] = GNI_FMA_ATOMIC2_FFPMIN_S, + [MCA_BTL_ATOMIC_MAX] = GNI_FMA_ATOMIC2_FFPMAX_S, + }, + [OPAL_DOUBLE] = { + [MCA_BTL_ATOMIC_ADD] = GNI_FMA_ATOMIC2_FFPADD, + [MCA_BTL_ATOMIC_MIN] = GNI_FMA_ATOMIC2_FFPMIN, + [MCA_BTL_ATOMIC_MAX] = GNI_FMA_ATOMIC2_FFPMAX, + }, }; int mca_btl_ugni_aop (struct mca_btl_base_module_t *btl, struct mca_btl_base_endpoint_t *endpoint, @@ -32,7 +80,20 @@ int mca_btl_ugni_aop (struct mca_btl_base_module_t *btl, struct mca_btl_base_end { gni_mem_handle_t dummy = {0, 0}; mca_btl_ugni_post_descriptor_t *post_desc; - int rc; + int gni_op, rc, type; + size_t size; + + size = (MCA_BTL_ATOMIC_FLAG_32BIT & flags) ? 4 : 8; + if (MCA_BTL_ATOMIC_FLAG_FLOAT & flags) { + type = (MCA_BTL_ATOMIC_FLAG_32BIT & flags) ? OPAL_FLOAT : OPAL_DOUBLE; + } else { + type = (MCA_BTL_ATOMIC_FLAG_32BIT & flags) ? OPAL_INT32 : OPAL_INT64; + } + + gni_op = amo_cmds[type][op]; + if (0 == gni_op) { + return OPAL_ERR_NOT_SUPPORTED; + } rc = mca_btl_ugni_check_endpoint_state_rdma (endpoint); if (OPAL_UNLIKELY(OPAL_SUCCESS != rc)) { @@ -45,8 +106,8 @@ int mca_btl_ugni_aop (struct mca_btl_base_module_t *btl, struct mca_btl_base_end } init_gni_post_desc (&post_desc->desc, order, GNI_POST_AMO, 0, dummy, remote_address, - remote_handle->gni_handle, 8, 0); - post_desc->desc.base.amo_cmd = amo_cmds[op]; + remote_handle->gni_handle, size, 0); + post_desc->desc.base.amo_cmd = gni_op; post_desc->desc.base.first_operand = operand; @@ -54,6 +115,10 @@ int mca_btl_ugni_aop (struct mca_btl_base_module_t *btl, struct mca_btl_base_end rc = GNI_PostFma (endpoint->rdma_ep_handle, &post_desc->desc.base); OPAL_THREAD_UNLOCK(&endpoint->btl->device->dev_lock); if (GNI_RC_SUCCESS != rc) { + mca_btl_ugni_return_post_descriptor (endpoint->btl, post_desc); + if (GNI_RC_ILLEGAL_OP == rc) { + return OPAL_ERR_NOT_SUPPORTED; + } return OPAL_ERR_OUT_OF_RESOURCE; } @@ -67,7 +132,20 @@ int mca_btl_ugni_afop (struct mca_btl_base_module_t *btl, struct mca_btl_base_en void *cbcontext, void *cbdata) { mca_btl_ugni_post_descriptor_t *post_desc; - int rc; + int gni_op, rc, type; + size_t size; + + size = (MCA_BTL_ATOMIC_FLAG_32BIT & flags) ? 4 : 8; + if (MCA_BTL_ATOMIC_FLAG_FLOAT & flags) { + type = (MCA_BTL_ATOMIC_FLAG_32BIT & flags) ? OPAL_FLOAT : OPAL_DOUBLE; + } else { + type = (MCA_BTL_ATOMIC_FLAG_32BIT & flags) ? OPAL_INT32 : OPAL_INT64; + } + + gni_op = famo_cmds[type][op]; + if (0 == gni_op) { + return OPAL_ERR_NOT_SUPPORTED; + } rc = mca_btl_ugni_check_endpoint_state_rdma (endpoint); if (OPAL_UNLIKELY(OPAL_SUCCESS != rc)) { @@ -81,8 +159,8 @@ int mca_btl_ugni_afop (struct mca_btl_base_module_t *btl, struct mca_btl_base_en init_gni_post_desc (&post_desc->desc, order, GNI_POST_AMO, (intptr_t) local_address, local_handle->gni_handle, - remote_address, remote_handle->gni_handle, 8, 0); - post_desc->desc.base.amo_cmd = famo_cmds[op]; + remote_address, remote_handle->gni_handle, size, 0); + post_desc->desc.base.amo_cmd = gni_op; post_desc->desc.base.first_operand = operand; @@ -91,6 +169,9 @@ int mca_btl_ugni_afop (struct mca_btl_base_module_t *btl, struct mca_btl_base_en OPAL_THREAD_UNLOCK(&endpoint->btl->device->dev_lock); if (GNI_RC_SUCCESS != rc) { mca_btl_ugni_return_post_descriptor (endpoint->btl, post_desc); + if (GNI_RC_ILLEGAL_OP == rc) { + return OPAL_ERR_NOT_SUPPORTED; + } return OPAL_ERR_OUT_OF_RESOURCE; } @@ -103,7 +184,11 @@ int mca_btl_ugni_acswap (struct mca_btl_base_module_t *btl, struct mca_btl_base_ int order, mca_btl_base_rdma_completion_fn_t cbfunc, void *cbcontext, void *cbdata) { mca_btl_ugni_post_descriptor_t *post_desc; - int rc; + int gni_op, rc; + size_t size; + + gni_op = (MCA_BTL_ATOMIC_FLAG_32BIT & flags) ? GNI_FMA_ATOMIC2_CSWAP_S : GNI_FMA_ATOMIC_CSWAP; + size = (MCA_BTL_ATOMIC_FLAG_32BIT & flags) ? 4 : 8; rc = mca_btl_ugni_check_endpoint_state_rdma (endpoint); if (OPAL_UNLIKELY(OPAL_SUCCESS != rc)) { @@ -117,8 +202,8 @@ int mca_btl_ugni_acswap (struct mca_btl_base_module_t *btl, struct mca_btl_base_ init_gni_post_desc (&post_desc->desc, order, GNI_POST_AMO, (intptr_t) local_address, local_handle->gni_handle, - remote_address, remote_handle->gni_handle, 8, 0); - post_desc->desc.base.amo_cmd = GNI_FMA_ATOMIC_CSWAP; + remote_address, remote_handle->gni_handle, size, 0); + post_desc->desc.base.amo_cmd = gni_op; post_desc->desc.base.first_operand = compare; post_desc->desc.base.second_operand = value; diff --git a/opal/mca/btl/ugni/btl_ugni_component.c b/opal/mca/btl/ugni/btl_ugni_component.c index 1e0fc91555d..dcd6acc3ff5 100644 --- a/opal/mca/btl/ugni/btl_ugni_component.c +++ b/opal/mca/btl/ugni/btl_ugni_component.c @@ -1,6 +1,6 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ /* - * Copyright (c) 2011-2015 Los Alamos National Security, LLC. All rights + * Copyright (c) 2011-2016 Los Alamos National Security, LLC. All rights * reserved. * Copyright (c) 2011 UT-Battelle, LLC. All rights reserved. * $COPYRIGHT$ @@ -15,6 +15,11 @@ #include "btl_ugni_rdma.h" #include "btl_ugni_smsg.h" +#include "opal/util/sys_limits.h" + +#include +#include + #include "opal/memoryhooks/memory.h" #include "opal/runtime/opal_params.h" @@ -25,6 +30,7 @@ static int btl_ugni_component_open(void); static int btl_ugni_component_close(void); static mca_btl_base_module_t **mca_btl_ugni_component_init(int *, bool, bool); static int mca_btl_ugni_component_progress(void); +static unsigned long mca_btl_ugni_ugni_page_size = 0; mca_btl_ugni_component_t mca_btl_ugni_component = { .super = { @@ -44,9 +50,9 @@ mca_btl_ugni_component_t mca_btl_ugni_component = { } }; -mca_base_var_enum_value_t mpool_values[] = { - {MCA_BTL_UGNI_MPOOL_UDREG, "udreg"}, - {MCA_BTL_UGNI_MPOOL_GRDMA, "grdma"}, +mca_base_var_enum_value_t rcache_values[] = { + {MCA_BTL_UGNI_RCACHE_UDREG, "udreg"}, + {MCA_BTL_UGNI_RCACHE_GRDMA, "grdma"}, {-1, NULL} /* sentinal */ }; @@ -55,6 +61,7 @@ btl_ugni_component_register(void) { mca_base_var_enum_t *new_enum; gni_nic_device_t device_type; + char *mpool_hints_tmp = NULL; int rc; (void) mca_base_var_group_component_register(&mca_btl_ugni_component.super.btl_version, @@ -174,10 +181,29 @@ btl_ugni_component_register(void) MCA_BASE_VAR_FLAG_SETTABLE, OPAL_INFO_LVL_3, MCA_BASE_VAR_SCOPE_LOCAL, &mca_btl_ugni_component.mbox_increment); + /* determine if there are get alignment restrictions */ + GNI_GetDeviceType (&device_type); + + mca_btl_ugni_component.smsg_page_size = 2 << 20; + if (GNI_DEVICE_GEMINI == device_type) { + if (access ("/sys/class/gemini/ghal0/mrt", R_OK)) { + int fd = open ("/sys/class/gemini/ghal0/mrt", O_RDONLY); + char buffer[10]; + + if (0 <= fd) { + memset (buffer, 0, sizeof (buffer)); + read (fd, buffer, sizeof (buffer) - 1); + close (fd); + mca_btl_ugni_ugni_page_size = strtol (buffer, NULL, 10) * 1024; + mca_btl_ugni_component.smsg_page_size = mca_btl_ugni_ugni_page_size; + } + } + } + (void) mca_base_component_var_register(&mca_btl_ugni_component.super.btl_version, "smsg_page_size", "Page size to use for SMSG " - "mailbox allocation (default 2M)", + "mailbox allocation (default: detect)", MCA_BASE_VAR_TYPE_INT, NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE, OPAL_INFO_LVL_3, MCA_BASE_VAR_SCOPE_LOCAL, @@ -202,21 +228,40 @@ btl_ugni_component_register(void) MCA_BASE_PVAR_FLAG_READONLY | MCA_BASE_PVAR_FLAG_CONTINUOUS, NULL, NULL, NULL, &mca_btl_ugni_progress_thread_wakeups); - /* btl/ugni can only support only a fixed set of mpools (these mpools have compatible resource + /* btl/ugni can only support only a fixed set of rcache components (these rcache components have compatible resource * structures) */ - rc = mca_base_var_enum_create ("btl_ugni_mpool", mpool_values, &new_enum); + rc = mca_base_var_enum_create ("btl_ugni_rcache", rcache_values, &new_enum); if (OPAL_SUCCESS != rc) { return rc; } - mca_btl_ugni_component.mpool_type = MCA_BTL_UGNI_MPOOL_UDREG; + mca_btl_ugni_component.rcache_type = MCA_BTL_UGNI_RCACHE_UDREG; (void) mca_base_component_var_register(&mca_btl_ugni_component.super.btl_version, - "mpool", "mpool to use", MCA_BASE_VAR_TYPE_INT, new_enum, + "rcache", "registration cache to use", MCA_BASE_VAR_TYPE_INT, new_enum, 0, MCA_BASE_VAR_FLAG_SETTABLE, OPAL_INFO_LVL_3, - MCA_BASE_VAR_SCOPE_LOCAL, &mca_btl_ugni_component.mpool_type); + MCA_BASE_VAR_SCOPE_LOCAL, &mca_btl_ugni_component.rcache_type); OBJ_RELEASE(new_enum); - mca_btl_ugni_module.super.btl_exclusivity = MCA_BTL_EXCLUSIVITY_HIGH; + if (mca_btl_ugni_ugni_page_size) { + rc = asprintf (&mpool_hints_tmp, "page_size=%lu", mca_btl_ugni_ugni_page_size); + if (rc < 0) { + return OPAL_ERR_OUT_OF_RESOURCE; + } + + mca_btl_ugni_component.mpool_hints = mpool_hints_tmp; + } else { + mca_btl_ugni_component.mpool_hints = "page_size=2M"; + } + + (void) mca_base_component_var_register(&mca_btl_ugni_component.super.btl_version, + "mpool_hints", "hints to use when selecting a memory pool (default: " + "\"page_size=2M\")", MCA_BASE_VAR_TYPE_STRING, NULL, 0, + MCA_BASE_VAR_FLAG_SETTABLE, OPAL_INFO_LVL_3, + MCA_BASE_VAR_SCOPE_LOCAL, &mca_btl_ugni_component.mpool_hints); + free (mpool_hints_tmp); + + /* ensure we loose send exclusivity to sm and vader if they are enabled */ + mca_btl_ugni_module.super.btl_exclusivity = MCA_BTL_EXCLUSIVITY_HIGH - 2; /* smsg threshold */ mca_btl_ugni_module.super.btl_eager_limit = 8 * 1024; @@ -227,14 +272,11 @@ btl_ugni_component_register(void) mca_btl_ugni_module.super.btl_get_limit = 1 * 1024 * 1024; - /* determine if there are get alignment restrictions */ - GNI_GetDeviceType (&device_type); - - if (GNI_DEVICE_GEMINI == device_type) { - mca_btl_ugni_module.super.btl_get_alignment = 4; - } else { - mca_btl_ugni_module.super.btl_get_alignment = 0; - } + /* + * see def. of ALIGNMENT_MASK to figure this one out + */ + /* both gemini and aries have a 4-byte alignment requirement on remote addresses */ + mca_btl_ugni_module.super.btl_get_alignment = 4; /* threshold for put */ mca_btl_ugni_module.super.btl_min_rdma_pipeline_size = 8 * 1024; @@ -246,6 +288,13 @@ btl_ugni_component_register(void) MCA_BTL_ATOMIC_SUPPORTS_AND | MCA_BTL_ATOMIC_SUPPORTS_OR | MCA_BTL_ATOMIC_SUPPORTS_XOR | MCA_BTL_ATOMIC_SUPPORTS_CSWAP; + if (GNI_DEVICE_ARIES == device_type) { + /* aries supports additional atomic operations */ + mca_btl_ugni_module.super.btl_atomic_flags |= MCA_BTL_ATOMIC_SUPPORTS_MIN | MCA_BTL_ATOMIC_SUPPORTS_MAX | + MCA_BTL_ATOMIC_SUPPORTS_LAND | MCA_BTL_ATOMIC_SUPPORTS_LOR | MCA_BTL_ATOMIC_SUPPORTS_LXOR | + MCA_BTL_ATOMIC_SUPPORTS_32BIT | MCA_BTL_ATOMIC_SUPPORTS_FLOAT; + } + mca_btl_ugni_module.super.btl_registration_handle_size = sizeof (mca_btl_base_registration_handle_t); mca_btl_ugni_module.super.btl_bandwidth = 40000; /* Mbs */ @@ -286,26 +335,6 @@ btl_ugni_component_close(void) return OPAL_SUCCESS; } -static void mca_btl_ugni_autoset_leave_pinned (void) { - if (MCA_BTL_UGNI_MPOOL_UDREG != mca_btl_ugni_component.mpool_type) { - int value = opal_mem_hooks_support_level(); - if ((OPAL_MEMORY_FREE_SUPPORT | OPAL_MEMORY_MUNMAP_SUPPORT) == - ((OPAL_MEMORY_FREE_SUPPORT | OPAL_MEMORY_MUNMAP_SUPPORT) & value)) { - /* Set leave pinned to 1 if leave pinned pipeline is not set */ - if (-1 == opal_leave_pinned) { - opal_leave_pinned = !opal_leave_pinned_pipeline; - } - } else { - opal_leave_pinned = 0; - opal_leave_pinned_pipeline = 0; - } - } else if (-1 == opal_leave_pinned) { - /* if udreg is in use we can set leave pinned without checking for the - * memory hooks. */ - opal_leave_pinned = !opal_leave_pinned_pipeline; - } -} - static mca_btl_base_module_t ** mca_btl_ugni_component_init (int *num_btl_modules, bool enable_progress_threads, @@ -358,7 +387,11 @@ mca_btl_ugni_component_init (int *num_btl_modules, return NULL; } - mca_btl_ugni_autoset_leave_pinned (); + if (mca_btl_ugni_component.smsg_page_size != (unsigned long) opal_getpagesize ()) { + if (mca_btl_ugni_ugni_page_size > mca_btl_ugni_component.smsg_page_size) { + mca_btl_ugni_component.smsg_page_size = mca_btl_ugni_ugni_page_size; + } + } mca_btl_ugni_module.super.btl_rdma_pipeline_send_length = mca_btl_ugni_module.super.btl_eager_limit; @@ -386,8 +419,8 @@ mca_btl_ugni_component_init (int *num_btl_modules, static inline int mca_btl_ugni_progress_datagram (mca_btl_ugni_module_t *ugni_module) { + uint64_t datagram_id, data, proc_id; uint32_t remote_addr, remote_id; - uint64_t datagram_id, data; mca_btl_base_endpoint_t *ep; gni_post_state_t post_state; gni_ep_handle_t handle; @@ -397,8 +430,8 @@ mca_btl_ugni_progress_datagram (mca_btl_ugni_module_t *ugni_module) /* check for datagram completion */ OPAL_THREAD_LOCK(&ugni_module->device->dev_lock); /* TODO: may not need lock for this function */ grc = GNI_PostDataProbeById (ugni_module->device->dev_handle, &datagram_id); - OPAL_THREAD_UNLOCK(&ugni_module->device->dev_lock); if (OPAL_LIKELY(GNI_RC_SUCCESS != grc)) { + OPAL_THREAD_UNLOCK(&ugni_module->device->dev_lock); return 0; } @@ -414,10 +447,9 @@ mca_btl_ugni_progress_datagram (mca_btl_ugni_module_t *ugni_module) } /* wait for the incoming datagram to complete (in case it isn't) */ - OPAL_THREAD_LOCK(&ugni_module->device->dev_lock); /* TODO: may not need lock for this function */ grc = GNI_EpPostDataWaitById (handle, datagram_id, -1, &post_state, &remote_addr, &remote_id); - OPAL_THREAD_UNLOCK(&ugni_module->device->dev_lock); + OPAL_THREAD_UNLOCK(&ugni_module->device->dev_lock); if (GNI_RC_SUCCESS != grc) { BTL_ERROR(("GNI_EpPostDataWaitById failed with rc = %d", grc)); return opal_common_rc_ugni_to_opal (grc); @@ -425,15 +457,24 @@ mca_btl_ugni_progress_datagram (mca_btl_ugni_module_t *ugni_module) /* if this is a wildcard endpoint lookup the remote peer by the proc id we received */ if (handle == ugni_module->wildcard_ep) { - BTL_VERBOSE(("received connection attempt on wildcard endpoint from proc id: %" PRIx64, ugni_module->wc_remote_attr.proc_id)); - rc = opal_hash_table_get_value_uint64 (&ugni_module->id_to_endpoint, - ugni_module->wc_remote_attr.proc_id, - (void *) &ep); + proc_id = mca_btl_ugni_proc_name_to_id (ugni_module->wc_remote_attr.proc_name); + + BTL_VERBOSE(("received connection attempt on wildcard endpoint from proc id: %" PRIx64, + proc_id)); + + OPAL_THREAD_LOCK(&ugni_module->endpoint_lock); + rc = opal_hash_table_get_value_uint64 (&ugni_module->id_to_endpoint, proc_id, (void **) &ep); + OPAL_THREAD_UNLOCK(&ugni_module->endpoint_lock); + /* check if the endpoint is known */ if (OPAL_UNLIKELY(OPAL_SUCCESS != rc || NULL == ep)) { - BTL_ERROR(("received connection attempt from an unknown peer. rc: %d, ep: %p, id: 0x%" PRIx64, - rc, (void *) ep, ugni_module->wc_remote_attr.proc_id)); - return OPAL_ERR_NOT_FOUND; + struct opal_proc_t *remote_proc = opal_proc_for_name (ugni_module->wc_remote_attr.proc_name); + BTL_VERBOSE(("Got connection request from an unknown peer {jobid = 0x%x, vid = 0x%x}", + ugni_module->wc_remote_attr.proc_name.jobid, ugni_module->wc_remote_attr.proc_name.vpid)); + ep = mca_btl_ugni_get_ep (&ugni_module->super, remote_proc); + if (OPAL_UNLIKELY(NULL == ep)) { + return rc; + } } } else { BTL_VERBOSE(("directed datagram complete for endpoint %p", (void *) ep)); @@ -447,7 +488,14 @@ mca_btl_ugni_progress_datagram (mca_btl_ugni_module_t *ugni_module) data, (void *) ep, remote_id)); /* NTH: TODO -- error handling */ + opal_mutex_lock (&ep->lock); + if (handle != ugni_module->wildcard_ep) { + /* directed post complete */ + ep->dg_posted = false; + } + (void) mca_btl_ugni_ep_connect_progress (ep); + opal_mutex_unlock (&ep->lock); if (MCA_BTL_UGNI_EP_STATE_CONNECTED == ep->state) { /* process messages waiting in the endpoint's smsg mailbox */ @@ -576,32 +624,21 @@ mca_btl_ugni_progress_wait_list (mca_btl_ugni_module_t *ugni_module) int count; OPAL_THREAD_LOCK(&ugni_module->ep_wait_list_lock); - count = opal_list_get_size(&ugni_module->ep_wait_list); - OPAL_THREAD_UNLOCK(&ugni_module->ep_wait_list_lock); + count = opal_list_get_size(&ugni_module->ep_wait_list); do { - OPAL_THREAD_LOCK(&ugni_module->ep_wait_list_lock); endpoint = (mca_btl_base_endpoint_t *) opal_list_remove_first (&ugni_module->ep_wait_list); - OPAL_THREAD_UNLOCK(&ugni_module->ep_wait_list_lock); if (endpoint != NULL) { - - endpoint->wait_listed = false; - rc = mca_btl_ugni_progress_send_wait_list (endpoint); - if (OPAL_SUCCESS != rc && false == endpoint->wait_listed) { - - endpoint->wait_listed = true; - OPAL_THREAD_LOCK(&ugni_module->ep_wait_list_lock); + if (OPAL_SUCCESS != rc) { opal_list_append (&ugni_module->ep_wait_list, &endpoint->super); - OPAL_THREAD_UNLOCK(&ugni_module->ep_wait_list_lock); + } else { + endpoint->wait_listed = false; } } - - --count; - if (count == 0) break; - - } while (endpoint != NULL) ; + } while (endpoint != NULL && --count > 0) ; + OPAL_THREAD_UNLOCK(&ugni_module->ep_wait_list_lock); return rc; } diff --git a/opal/mca/btl/ugni/btl_ugni_endpoint.c b/opal/mca/btl/ugni/btl_ugni_endpoint.c index 49551f8a8db..e06d3666192 100644 --- a/opal/mca/btl/ugni/btl_ugni_endpoint.c +++ b/opal/mca/btl/ugni/btl_ugni_endpoint.c @@ -1,6 +1,6 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ /* - * Copyright (c) 2011-2015 Los Alamos National Security, LLC. All rights + * Copyright (c) 2011-2016 Los Alamos National Security, LLC. All rights * reserved. * Copyright (c) 2011-2013 UT-Battelle, LLC. All rights reserved. * $COPYRIGHT$ @@ -17,7 +17,7 @@ static void mca_btl_ugni_ep_construct (mca_btl_base_endpoint_t *ep) { memset ((char *) ep + sizeof(ep->super), 0, sizeof (*ep) - sizeof (ep->super)); OBJ_CONSTRUCT(&ep->frag_wait_list, opal_list_t); - OBJ_CONSTRUCT(&ep->lock, opal_mutex_t); + OBJ_CONSTRUCT(&ep->lock, opal_recursive_mutex_t); } static void mca_btl_ugni_ep_destruct (mca_btl_base_endpoint_t *ep) @@ -158,9 +158,11 @@ static inline int mca_btl_ugni_ep_connect_finish (mca_btl_base_endpoint_t *ep) { rc = mca_btl_ugni_progress_send_wait_list (ep); if (OPAL_UNLIKELY(OPAL_SUCCESS != rc)) { - ep->wait_listed = true; OPAL_THREAD_LOCK(&ep->btl->ep_wait_list_lock); - opal_list_append (&ep->btl->ep_wait_list, &ep->super); + if (false == ep->wait_listed) { + opal_list_append (&ep->btl->ep_wait_list, &ep->super); + ep->wait_listed = true; + } OPAL_THREAD_UNLOCK(&ep->btl->ep_wait_list_lock); } @@ -198,11 +200,17 @@ int mca_btl_ugni_ep_connect_progress (mca_btl_base_endpoint_t *ep) { if (GNI_SMSG_TYPE_INVALID == ep->remote_attr.smsg_attr.msg_type) { /* use datagram to exchange connection information with the remote peer */ - rc = mca_btl_ugni_directed_ep_post (ep); - if (OPAL_SUCCESS == rc) { - rc = OPAL_ERR_RESOURCE_BUSY; + if (!ep->dg_posted) { + rc = mca_btl_ugni_directed_ep_post (ep); + if (OPAL_SUCCESS == rc) { + ep->dg_posted = true; + rc = OPAL_ERR_RESOURCE_BUSY; + } + + return rc; } - return rc; + + return OPAL_SUCCESS; } return mca_btl_ugni_ep_connect_finish (ep); diff --git a/opal/mca/btl/ugni/btl_ugni_endpoint.h b/opal/mca/btl/ugni/btl_ugni_endpoint.h index 79908471f90..308bae9ac8b 100644 --- a/opal/mca/btl/ugni/btl_ugni_endpoint.h +++ b/opal/mca/btl/ugni/btl_ugni_endpoint.h @@ -1,6 +1,6 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ /* - * Copyright (c) 2011-2014 Los Alamos National Security, LLC. All rights + * Copyright (c) 2011-2016 Los Alamos National Security, LLC. All rights * reserved. * Copyright (c) 2011 UT-Battelle, LLC. All rights reserved. * $COPYRIGHT$ @@ -10,13 +10,14 @@ * $HEADER$ */ -#ifndef MCA_BTL_UGNI_ENDPOINT_H +#ifndef MCA_BTL_UGNI_ENDPOINT_H #define MCA_BTL_UGNI_ENDPOINT_H #include "btl_ugni.h" enum mca_btl_ugni_endpoint_state_t { MCA_BTL_UGNI_EP_STATE_INIT = 0, + MCA_BTL_UGNI_EP_STATE_START, MCA_BTL_UGNI_EP_STATE_RDMA, MCA_BTL_UGNI_EP_STATE_CONNECTING, MCA_BTL_UGNI_EP_STATE_CONNECTED @@ -30,7 +31,10 @@ typedef struct mca_btl_base_endpoint_t { opal_proc_t *peer_proc; - opal_mutex_t lock; + /** may need to lock recursively as the modex lookup could call opal_progress + * and hence our progress function. if this changes modify this mutex to not + * be recursive. also need to update the constructor function. */ + opal_recursive_mutex_t lock; mca_btl_ugni_endpoint_state_t state; opal_common_ugni_endpoint_t *common; @@ -48,6 +52,8 @@ typedef struct mca_btl_base_endpoint_t { opal_list_t frag_wait_list; bool wait_listed; + /** protect against race on connection */ + bool dg_posted; int32_t smsg_progressing; @@ -74,7 +80,6 @@ static inline int mca_btl_ugni_init_ep (mca_btl_ugni_module_t *ugni_module, endpoint->btl = btl; endpoint->peer_proc = peer_proc; - endpoint->common = NULL; endpoint->index = opal_pointer_array_add (&ugni_module->endpoints, endpoint); *ep = endpoint; @@ -116,6 +121,7 @@ static inline int mca_btl_ugni_check_endpoint_state (mca_btl_ugni_endpoint_t *ep switch (ep->state) { case MCA_BTL_UGNI_EP_STATE_INIT: case MCA_BTL_UGNI_EP_STATE_RDMA: + case MCA_BTL_UGNI_EP_STATE_START: rc = mca_btl_ugni_ep_connect_progress (ep); if (OPAL_SUCCESS != rc) { break; @@ -139,7 +145,15 @@ static inline int mca_btl_ugni_ep_connect_rdma (mca_btl_base_endpoint_t *ep) { return OPAL_SUCCESS; } - /* get the modex info for this endpoint and setup a ugni endpoint */ + /* protect against re-entry from opal_progress */ + if (OPAL_UNLIKELY(MCA_BTL_UGNI_EP_STATE_START == ep->state)) { + return OPAL_ERR_RESOURCE_BUSY; + } + + ep->state = MCA_BTL_UGNI_EP_STATE_START; + + /* get the modex info for this endpoint and setup a ugni endpoint. this call may lead + * to re-entry through opal_progress(). */ rc = opal_common_ugni_endpoint_for_proc (ep->btl->device, ep->peer_proc, &ep->common); if (OPAL_SUCCESS != rc) { assert (0); diff --git a/opal/mca/btl/ugni/btl_ugni_frag.h b/opal/mca/btl/ugni/btl_ugni_frag.h index c912b9abc52..8257ee2d701 100644 --- a/opal/mca/btl/ugni/btl_ugni_frag.h +++ b/opal/mca/btl/ugni/btl_ugni_frag.h @@ -1,6 +1,6 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ /* - * Copyright (c) 2011-2015 Los Alamos National Security, LLC. All rights + * Copyright (c) 2011-2016 Los Alamos National Security, LLC. All rights * reserved. * Copyright (c) 2011 UT-Battelle, LLC. All rights reserved. * Copyright (c) 2013 The University of Tennessee and The University @@ -66,6 +66,7 @@ struct mca_btl_ugni_base_frag_t; typedef struct mca_btl_ugni_base_frag_t { mca_btl_base_descriptor_t base; + volatile int32_t ref_cnt; uint32_t msg_id; uint16_t hdr_size; uint16_t flags; @@ -148,6 +149,7 @@ static inline int mca_btl_ugni_frag_alloc (mca_btl_base_endpoint_t *ep, if (OPAL_LIKELY(NULL != *frag)) { (*frag)->my_list = list; (*frag)->endpoint = ep; + (*frag)->ref_cnt = 1; return OPAL_SUCCESS; } @@ -157,8 +159,8 @@ static inline int mca_btl_ugni_frag_alloc (mca_btl_base_endpoint_t *ep, static inline int mca_btl_ugni_frag_return (mca_btl_ugni_base_frag_t *frag) { if (frag->registration) { - frag->endpoint->btl->super.btl_mpool->mpool_deregister(frag->endpoint->btl->super.btl_mpool, - (mca_mpool_base_registration_t *) frag->registration); + frag->endpoint->btl->rcache->rcache_deregister (frag->endpoint->btl->rcache, + (mca_rcache_base_registration_t *) frag->registration); frag->registration = NULL; } @@ -169,10 +171,16 @@ static inline int mca_btl_ugni_frag_return (mca_btl_ugni_base_frag_t *frag) return OPAL_SUCCESS; } -static inline void mca_btl_ugni_frag_complete (mca_btl_ugni_base_frag_t *frag, int rc) { - frag->flags |= MCA_BTL_UGNI_FRAG_COMPLETE; +static inline bool mca_btl_ugni_frag_del_ref (mca_btl_ugni_base_frag_t *frag, int rc) { + int32_t ref_cnt; - BTL_VERBOSE(("frag complete. flags = %d", frag->base.des_flags)); + opal_atomic_mb (); + + ref_cnt = OPAL_THREAD_ADD32(&frag->ref_cnt, -1); + if (ref_cnt) { + assert (ref_cnt > 0); + return false; + } /* call callback if specified */ if (frag->base.des_flags & MCA_BTL_DES_SEND_ALWAYS_CALLBACK) { @@ -182,6 +190,20 @@ static inline void mca_btl_ugni_frag_complete (mca_btl_ugni_base_frag_t *frag, i if (frag->base.des_flags & MCA_BTL_DES_FLAGS_BTL_OWNERSHIP) { mca_btl_ugni_frag_return (frag); } + + return true; +} + +static inline void mca_btl_ugni_frag_complete (mca_btl_ugni_base_frag_t *frag, int rc) { + BTL_VERBOSE(("frag complete. flags = %d", frag->base.des_flags)); + + frag->flags |= MCA_BTL_UGNI_FRAG_COMPLETE; + + mca_btl_ugni_frag_del_ref (frag, rc); +} + +static inline bool mca_btl_ugni_frag_check_complete (mca_btl_ugni_base_frag_t *frag) { + return !!(MCA_BTL_UGNI_FRAG_COMPLETE & frag->flags); } #define MCA_BTL_UGNI_FRAG_ALLOC_SMSG(ep, frag) \ diff --git a/opal/mca/btl/ugni/btl_ugni_get.c b/opal/mca/btl/ugni/btl_ugni_get.c index f244035a56c..e152939d96a 100644 --- a/opal/mca/btl/ugni/btl_ugni_get.c +++ b/opal/mca/btl/ugni/btl_ugni_get.c @@ -13,6 +13,12 @@ #include "btl_ugni_rdma.h" #include "btl_ugni_smsg.h" +/* + * taken from osc_rdma_comm.h, ugh. + */ + +#define ALIGNMENT_MASK(x) ((x) ? (x) - 1 : 0) + int mca_btl_ugni_get (mca_btl_base_module_t *btl, struct mca_btl_base_endpoint_t *endpoint, void *local_address, uint64_t remote_address, mca_btl_base_registration_handle_t *local_handle, mca_btl_base_registration_handle_t *remote_handle, size_t size, int flags, @@ -21,7 +27,8 @@ int mca_btl_ugni_get (mca_btl_base_module_t *btl, struct mca_btl_base_endpoint_t bool check; /* Check if the get is aligned/sized on a multiple of 4 */ - check = !!((remote_address | (uint64_t)(intptr_t) local_address | size) & (mca_btl_ugni_module.super.btl_get_alignment - 1)); + check = !!((remote_address | (uint64_t)(intptr_t) local_address | size) & + ALIGNMENT_MASK(mca_btl_ugni_module.super.btl_get_alignment)); if (OPAL_UNLIKELY(check || size > mca_btl_ugni_module.super.btl_get_limit)) { BTL_VERBOSE(("RDMA/FMA Get not available due to size or alignment restrictions")); diff --git a/opal/mca/btl/ugni/btl_ugni_module.c b/opal/mca/btl/ugni/btl_ugni_module.c index 4977659fc15..a884ef59f99 100644 --- a/opal/mca/btl/ugni/btl_ugni_module.c +++ b/opal/mca/btl/ugni/btl_ugni_module.c @@ -91,6 +91,7 @@ mca_btl_ugni_module_init (mca_btl_ugni_module_t *ugni_module, OBJ_CONSTRUCT(&ugni_module->pending_smsg_frags_bb, opal_pointer_array_t); OBJ_CONSTRUCT(&ugni_module->ep_wait_list_lock,opal_mutex_t); OBJ_CONSTRUCT(&ugni_module->ep_wait_list, opal_list_t); + OBJ_CONSTRUCT(&ugni_module->endpoint_lock, opal_mutex_t); OBJ_CONSTRUCT(&ugni_module->endpoints, opal_pointer_array_t); OBJ_CONSTRUCT(&ugni_module->id_to_endpoint, opal_hash_table_t); OBJ_CONSTRUCT(&ugni_module->smsg_mboxes, opal_free_list_t); @@ -208,22 +209,14 @@ mca_btl_ugni_module_finalize (struct mca_btl_base_module_t *btl) OBJ_DESTRUCT(&ugni_module->smsg_mboxes); OBJ_DESTRUCT(&ugni_module->pending_smsg_frags_bb); OBJ_DESTRUCT(&ugni_module->id_to_endpoint); + OBJ_DESTRUCT(&ugni_module->endpoint_lock); OBJ_DESTRUCT(&ugni_module->endpoints); OBJ_DESTRUCT(&ugni_module->eager_get_pending); OBJ_DESTRUCT(&ugni_module->eager_get_pending_lock); - if (ugni_module->initialized) { - /* need to tear down the mpools *after* the free lists */ - if (NULL != ugni_module->smsg_mpool) { - (void) mca_mpool_base_module_destroy (ugni_module->smsg_mpool); - ugni_module->smsg_mpool = NULL; - } - - if (NULL != ugni_module->super.btl_mpool) { - (void) mca_mpool_base_module_destroy (ugni_module->super.btl_mpool); - ugni_module->super.btl_mpool = NULL; - } + if (ugni_module->rcache) { + mca_rcache_base_module_destroy (ugni_module->rcache); } ugni_module->initialized = false; @@ -301,11 +294,13 @@ static mca_btl_base_registration_handle_t * mca_btl_ugni_register_mem (mca_btl_base_module_t *btl, mca_btl_base_endpoint_t *endpoint, void *base, size_t size, uint32_t flags) { + mca_btl_ugni_module_t *ugni_module = (mca_btl_ugni_module_t *) btl; mca_btl_ugni_reg_t *reg; + int access_flags = flags & MCA_BTL_REG_FLAG_ACCESS_ANY; int rc; - rc = btl->btl_mpool->mpool_register(btl->btl_mpool, base, size, 0, - (mca_mpool_base_registration_t **) ®); + rc = ugni_module->rcache->rcache_register (ugni_module->rcache, base, size, 0, access_flags, + (mca_rcache_base_registration_t **) ®); if (OPAL_UNLIKELY(OPAL_SUCCESS != rc)) { return NULL; } @@ -315,10 +310,11 @@ mca_btl_ugni_register_mem (mca_btl_base_module_t *btl, mca_btl_base_endpoint_t * static int mca_btl_ugni_deregister_mem (mca_btl_base_module_t *btl, mca_btl_base_registration_handle_t *handle) { + mca_btl_ugni_module_t *ugni_module = (mca_btl_ugni_module_t *) btl; mca_btl_ugni_reg_t *reg = (mca_btl_ugni_reg_t *)((intptr_t) handle - offsetof (mca_btl_ugni_reg_t, handle)); - (void) btl->btl_mpool->mpool_deregister (btl->btl_mpool, ®->base); + (void) ugni_module->rcache->rcache_deregister (ugni_module->rcache, ®->base); return OPAL_SUCCESS; } diff --git a/opal/mca/btl/ugni/btl_ugni_prepare.h b/opal/mca/btl/ugni/btl_ugni_prepare.h index bd46aa227a9..093c9f6cb02 100644 --- a/opal/mca/btl/ugni/btl_ugni_prepare.h +++ b/opal/mca/btl/ugni/btl_ugni_prepare.h @@ -57,6 +57,7 @@ mca_btl_ugni_prepare_src_send_inplace (struct mca_btl_base_module_t *btl, uint32_t flags) { bool use_eager_get = (*size + reserve) > mca_btl_ugni_component.smsg_max_data; + mca_btl_ugni_module_t *ugni_module = (mca_btl_ugni_module_t *) btl; mca_btl_ugni_base_frag_t *frag = NULL; mca_btl_ugni_reg_t *registration = NULL; void *data_ptr; @@ -74,8 +75,9 @@ mca_btl_ugni_prepare_src_send_inplace (struct mca_btl_base_module_t *btl, (unsigned int)(*size + reserve))); if (OPAL_UNLIKELY(true == use_eager_get)) { - rc = btl->btl_mpool->mpool_register(btl->btl_mpool, data_ptr, *size, 0, - (mca_mpool_base_registration_t **)®istration); + rc = ugni_module->rcache->rcache_register (ugni_module->rcache, data_ptr, *size, 0, + MCA_RCACHE_ACCESS_REMOTE_READ, + (mca_rcache_base_registration_t **)®istration); if (OPAL_UNLIKELY(OPAL_SUCCESS != rc)) { mca_btl_ugni_frag_return (frag); return NULL; diff --git a/opal/mca/btl/ugni/btl_ugni_progress_thread.c b/opal/mca/btl/ugni/btl_ugni_progress_thread.c index 559f522468f..2af2a4ad754 100644 --- a/opal/mca/btl/ugni/btl_ugni_progress_thread.c +++ b/opal/mca/btl/ugni/btl_ugni_progress_thread.c @@ -76,7 +76,7 @@ int mca_btl_ugni_spawn_progress_thread(struct mca_btl_base_module_t *btl) goto fn_exit; } - rc = pthread_create(&mca_btl_ugni_progress_thread_id, + rc = pthread_create(&mca_btl_ugni_progress_thread_id, &attr, mca_btl_ugni_prog_thread_fn, (void *)btl); if (0 != rc) { BTL_ERROR(("btl/ugni pthread_create returned %s ",strerror(rc))); diff --git a/opal/mca/btl/ugni/btl_ugni_send.c b/opal/mca/btl/ugni/btl_ugni_send.c index d20881aca0a..45e17ec13f9 100644 --- a/opal/mca/btl/ugni/btl_ugni_send.c +++ b/opal/mca/btl/ugni/btl_ugni_send.c @@ -1,6 +1,6 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ /* - * Copyright (c) 2011-2014 Los Alamos National Security, LLC. All rights + * Copyright (c) 2011-2015 Los Alamos National Security, LLC. All rights * reserved. * Copyright (c) 2011 UT-Battelle, LLC. All rights reserved. * Copyright (c) 2014 Research Organization for Information Science @@ -25,7 +25,6 @@ int mca_btl_ugni_send (struct mca_btl_base_module_t *btl, mca_btl_ugni_base_frag_t *frag = (mca_btl_ugni_base_frag_t *) descriptor; size_t size = frag->segments[0].seg_len + frag->segments[1].seg_len; mca_btl_ugni_module_t *ugni_module = (mca_btl_ugni_module_t *) btl; - int flags_save = frag->base.des_flags; int rc; /* tag and len are at the same location in eager and smsg frag hdrs */ @@ -43,42 +42,48 @@ int mca_btl_ugni_send (struct mca_btl_base_module_t *btl, BTL_VERBOSE(("btl/ugni sending descriptor %p from %d -> %d. length = %" PRIu64, (void *)descriptor, OPAL_PROC_MY_NAME.vpid, endpoint->common->ep_rem_id, size)); - /* temporarily disable ownership and callback flags so we can reliably check the complete flag */ - frag->base.des_flags &= ~(MCA_BTL_DES_FLAGS_BTL_OWNERSHIP | MCA_BTL_DES_SEND_ALWAYS_CALLBACK); + /* add a reference to prevent the fragment from being returned until after the + * completion flag is checked. */ + ++frag->ref_cnt; frag->flags &= ~MCA_BTL_UGNI_FRAG_COMPLETE; rc = mca_btl_ugni_send_frag (endpoint, frag); - - if (OPAL_LIKELY(frag->flags & MCA_BTL_UGNI_FRAG_COMPLETE)) { + if (OPAL_LIKELY(mca_btl_ugni_frag_check_complete (frag))) { /* fast path: remote side has received the frag */ - frag->base.des_flags = flags_save; - mca_btl_ugni_frag_complete (frag, OPAL_SUCCESS); + (void) mca_btl_ugni_frag_del_ref (frag, OPAL_SUCCESS); return 1; } - if ((OPAL_SUCCESS == rc) && (frag->flags & MCA_BTL_UGNI_FRAG_BUFFERED) && (flags_save & MCA_BTL_DES_FLAGS_BTL_OWNERSHIP)) { + if ((OPAL_SUCCESS == rc) && (frag->flags & MCA_BTL_UGNI_FRAG_BUFFERED) && (frag->flags & MCA_BTL_DES_FLAGS_BTL_OWNERSHIP)) { /* fast(ish) path: btl owned buffered frag. report send as complete */ - frag->base.des_flags = flags_save & ~MCA_BTL_DES_SEND_ALWAYS_CALLBACK; + bool call_callback = !!(frag->flags & MCA_BTL_DES_SEND_ALWAYS_CALLBACK); + frag->flags &= ~MCA_BTL_DES_SEND_ALWAYS_CALLBACK; - if (OPAL_LIKELY(flags_save & MCA_BTL_DES_SEND_ALWAYS_CALLBACK)) { + if (call_callback) { frag->base.des_cbfunc(&frag->endpoint->btl->super, frag->endpoint, &frag->base, rc); } + (void) mca_btl_ugni_frag_del_ref (frag, OPAL_SUCCESS); + return 1; } /* slow(ish) path: remote side hasn't received the frag. call the frag's callback when we get the local smsg/msgq or remote rdma completion */ - frag->base.des_flags = flags_save | MCA_BTL_DES_SEND_ALWAYS_CALLBACK; + frag->base.des_flags |= MCA_BTL_DES_SEND_ALWAYS_CALLBACK; + + mca_btl_ugni_frag_del_ref (frag, OPAL_SUCCESS); if (OPAL_UNLIKELY(OPAL_ERR_OUT_OF_RESOURCE == rc)) { /* queue up request */ if (false == endpoint->wait_listed) { OPAL_THREAD_LOCK(&ugni_module->ep_wait_list_lock); - opal_list_append (&ugni_module->ep_wait_list, &endpoint->super); + if (false == endpoint->wait_listed) { + opal_list_append (&ugni_module->ep_wait_list, &endpoint->super); + endpoint->wait_listed = true; + } OPAL_THREAD_UNLOCK(&ugni_module->ep_wait_list_lock); - endpoint->wait_listed = true; } OPAL_THREAD_LOCK(&endpoint->lock); diff --git a/opal/mca/btl/ugni/btl_ugni_smsg.c b/opal/mca/btl/ugni/btl_ugni_smsg.c index f4f255edfb1..b7848bfc66e 100644 --- a/opal/mca/btl/ugni/btl_ugni_smsg.c +++ b/opal/mca/btl/ugni/btl_ugni_smsg.c @@ -16,8 +16,8 @@ static void mca_btl_ugni_smsg_mbox_construct (mca_btl_ugni_smsg_mbox_t *mbox) { struct mca_btl_ugni_reg_t *ugni_reg = (struct mca_btl_ugni_reg_t *) mbox->super.registration; - struct mca_mpool_base_registration_t *base_reg = - (struct mca_mpool_base_registration_t *) ugni_reg; + mca_rcache_base_registration_t *base_reg = + (mca_rcache_base_registration_t *) ugni_reg; /* initialize mailbox attributes */ mbox->attr.smsg_attr.msg_type = GNI_SMSG_TYPE_MBOX_AUTO_RETRANSMIT; @@ -27,7 +27,7 @@ static void mca_btl_ugni_smsg_mbox_construct (mca_btl_ugni_smsg_mbox_t *mbox) { mbox->attr.smsg_attr.msg_buffer = base_reg->base; mbox->attr.smsg_attr.buff_size = mca_btl_ugni_component.smsg_mbox_size; mbox->attr.smsg_attr.mem_hndl = ugni_reg->handle.gni_handle; - mbox->attr.proc_id = mca_btl_ugni_proc_name_to_id (OPAL_PROC_MY_NAME); + mbox->attr.proc_name = OPAL_PROC_MY_NAME; mbox->attr.rmt_irq_mem_hndl = mca_btl_ugni_component.modules[0].device->smsg_irq_mhndl; } diff --git a/opal/mca/btl/usnic/Makefile.am b/opal/mca/btl/usnic/Makefile.am index 9cc9140a085..76f49a08aef 100644 --- a/opal/mca/btl/usnic/Makefile.am +++ b/opal/mca/btl/usnic/Makefile.am @@ -12,6 +12,8 @@ # Copyright (c) 2006 Sandia National Laboratories. All rights # reserved. # Copyright (c) 2010-2015 Cisco Systems, Inc. All rights reserved. +# Copyright (c) 2015 Intel, Inc. All rights reserved. +# Copyright (c) 2016 IBM Corporation. All rights reserved. # $COPYRIGHT$ # # Additional copyrights may follow @@ -19,7 +21,7 @@ # $HEADER$ # -AM_CPPFLAGS = -DBTL_IN_OPAL=1 $(opal_btl_usnic_CPPFLAGS) +AM_CPPFLAGS = -DBTL_IN_OPAL=1 $(opal_common_libfabric_CPPFLAGS) -DOMPI_LIBMPI_NAME=\"$(OMPI_LIBMPI_NAME)\" EXTRA_DIST = README.txt README.test @@ -48,6 +50,7 @@ sources = \ btl_usnic_frag.h \ btl_usnic_graph.h \ btl_usnic_graph.c \ + btl_usnic_hwloc.c \ btl_usnic_hwloc.h \ btl_usnic_map.c \ btl_usnic_mca.c \ @@ -65,10 +68,6 @@ sources = \ btl_usnic_test.h \ $(test_sources) -if OPAL_HAVE_HWLOC -sources += btl_usnic_hwloc.c -endif - # Make the output library in this directory, and name it either # mca__.la (for DSO builds) or libmca__.la # (for static builds). @@ -91,12 +90,12 @@ mca_btl_usnic_la_SOURCES = $(component_sources) mca_btl_usnic_la_LDFLAGS = \ $(opal_btl_usnic_LDFLAGS) \ -module -avoid-version -mca_btl_usnic_la_LIBADD = $(opal_btl_usnic_LIBS) +mca_btl_usnic_la_LIBADD = \ + $(OPAL_TOP_BUILDDIR)/opal/mca/common/libfabric/lib@OPAL_LIB_PREFIX@mca_common_libfabric.la noinst_LTLIBRARIES = $(lib) libmca_btl_usnic_la_SOURCES = $(lib_sources) libmca_btl_usnic_la_LDFLAGS = -module -avoid-version $(opal_btl_usnic_LDFLAGS) -libmca_btl_usnic_la_LIBADD = $(opal_btl_usnic_LIBS) if OPAL_BTL_USNIC_BUILD_UNIT_TESTS usnic_btl_run_tests_CPPFLAGS = \ diff --git a/opal/mca/btl/usnic/README.txt b/opal/mca/btl/usnic/README.txt index 6166ce37f54..ab0b7d12b76 100644 --- a/opal/mca/btl/usnic/README.txt +++ b/opal/mca/btl/usnic/README.txt @@ -315,7 +315,7 @@ have to make 3 changes in the resulting code in master: *** Note 2: CARE MUST BE TAKEN WHEN COPYING THE OTHER DIRECTION! It is *not* as simple as simple s/opal/ompi/gi in configure.m4 and Makefile.am. It certainly can be done, but there's a few strings - that need to stay "opal" or "OPAL" (e.g., OPAL_HAVE_HWLOC). + that need to stay "opal" or "OPAL" (e.g., OPAL_HAVE_FOO). Hence, the string replace will likely need to be done via manual inspection. @@ -335,3 +335,40 @@ libfabric abstractions: fi_fabric: corresponds to a VIC PF fi_domain: corresponds to a VIC VF fi_endpoint: resources inside the VIC VF (basically a QP) + +====================================== + +MPI_THREAD_MULTIPLE support + +In order to make usnic btl thread-safe, the mutex locks are issued +to protect the critical path. ie; libfabric routines, book keeping, etc. + +The said lock is btl_usnic_lock. It is a RECURSIVE lock, meaning that +the same thread can take the lock again even if it already has the lock to +allow the callback function to post another segment right away if we know +that the current segment is completed inline. (So we can call send in send +without deadlocking) + +These two functions taking care of hotel checkin/checkout and we +have to protect that part. So we take the mutex lock before we enter the +function. + +- opal_btl_usnic_check_rts() +- opal_btl_usnic_handle_ack() + +We also have to protect the call to libfabric routines + +- opal_btl_usnic_endpoint_send_segment() (fi_send) +- opal_btl_usnic_recv_call() (fi_recvmsg) + +have to be protected as well. + +Also cclient connection checking (opal_btl_usnic_connectivity_ping) has to be +protected. This happens only in the beginning but cclient communicate with cagent +through opal_fd_read/write() and if two or more clients do opal_fd_write() at the +same time, the data might be corrupt. + +With this concept, many functions in btl/usnic that make calls to the +listed functions are protected by OPAL_THREAD_LOCK macro which will only +be active if the user specify MPI_Init_thread() with MPI_THREAD_MULTIPLE +support. diff --git a/opal/mca/btl/usnic/btl_usnic.h b/opal/mca/btl/usnic/btl_usnic.h index aafd7ff055a..e8f6dafa2de 100644 --- a/opal/mca/btl/usnic/btl_usnic.h +++ b/opal/mca/btl/usnic/btl_usnic.h @@ -1,3 +1,4 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ /* * Copyright (c) 2004-2008 The Trustees of Indiana University and Indiana * University Research and Technology @@ -11,7 +12,9 @@ * All rights reserved. * Copyright (c) 2006 Sandia National Laboratories. All rights * reserved. - * Copyright (c) 2011-2015 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2011-2016 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015-2016 Los Alamos National Security, LLC. All rights + * reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -37,7 +40,13 @@ #include "opal/mca/btl/btl.h" #include "opal/mca/btl/base/btl_base_error.h" #include "opal/mca/btl/base/base.h" +#include "opal/mca/rcache/rcache.h" + +#include "btl_usnic_compat.h" + +#if RCACHE_VERSION < 30 #include "opal/mca/mpool/grdma/mpool_grdma.h" +#endif #else #include "ompi/mca/btl/btl.h" #include "ompi/mca/btl/base/btl_base_error.h" @@ -45,8 +54,6 @@ #include "ompi/mca/mpool/grdma/mpool_grdma.h" #endif -#include "btl_usnic_compat.h" - BEGIN_C_DECLS /* @@ -56,6 +63,10 @@ BEGIN_C_DECLS * at other times as needed or as tuning dictates. */ extern uint64_t opal_btl_usnic_ticks; + +/* Lock for MPU_THREAD_MULTIPLE support */ +extern opal_recursive_mutex_t btl_usnic_lock; + static inline uint64_t get_nsec(void) { @@ -93,7 +104,7 @@ extern opal_rng_buff_t opal_btl_usnic_rand_buff; /* Set to >0 to randomly drop received frags. The higher the number, the more frequent the drops. */ -#define WANT_RECV_FRAG_DROPS 0 +#define WANT_RECV_DROPS 0 /* Set to >0 to randomly fail to send an ACK, mimicing a lost ACK. The higher the number, the more frequent the failed-to-send-ACK. */ #define WANT_FAIL_TO_SEND_ACK 0 @@ -102,10 +113,10 @@ extern opal_rng_buff_t opal_btl_usnic_rand_buff; the failed-to-resend-frag. */ #define WANT_FAIL_TO_RESEND_FRAG 0 -#if WANT_RECV_FRAG_DROPS > 0 -#define FAKE_RECV_FRAG_DROP (opal_rand(&opal_btl_usnic_rand_buff) < WANT_RECV_FRAG_DROPS) +#if WANT_RECV_DROPS > 0 +#define FAKE_RECV_DROP (opal_rand(&opal_btl_usnic_rand_buff) < WANT_RECV_DROPS) #else -#define FAKE_RECV_FRAG_DROP 0 +#define FAKE_RECV_DROP 0 #endif #if WANT_FAIL_TO_SEND_ACK > 0 @@ -153,8 +164,16 @@ typedef struct opal_btl_usnic_component_t { /** list of usnic proc structures */ opal_list_t usnic_procs; +#if RCACHE_VERSION == 30 + /** memory pool hints */ + char* usnic_mpool_hints; + + /** registration cache name */ + char *usnic_rcache_name; +#else /** name of memory pool */ char* usnic_mpool_name; +#endif char *if_include; char *if_exclude; @@ -181,6 +200,9 @@ typedef struct opal_btl_usnic_component_t { /** max completion queue entries per module */ int32_t cq_num; + /** max number of entries in AV EQ */ + int32_t av_eq_num; + /** retrans characteristics */ int retrans_timeout; @@ -213,6 +235,15 @@ typedef struct opal_btl_usnic_component_t { /* Prefix for the connectivity map filename (map will be output if the prefix is non-NULL) */ char *connectivity_map_prefix; + + /** Offset into the send buffer where the payload will go. For + libfabric v1.0.0 / API v1.0, this is 0. For libfabric >=v1.1 + / API >=v1.1, this is the endpoint.msg_prefix_size (i.e., + component.transport_header_len). */ + uint32_t prefix_send_offset; + + /* OPAL async progress event base */ + opal_event_base_t *opal_evbase; } opal_btl_usnic_component_t; OPAL_MODULE_DECLSPEC extern opal_btl_usnic_component_t mca_btl_usnic_component; diff --git a/opal/mca/btl/usnic/btl_usnic_ack.c b/opal/mca/btl/usnic/btl_usnic_ack.c index 03492bc5489..b7415e427d7 100644 --- a/opal/mca/btl/usnic/btl_usnic_ack.c +++ b/opal/mca/btl/usnic/btl_usnic_ack.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2013-2015 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2013-2017 Cisco Systems, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -26,18 +26,24 @@ #include "btl_usnic_connectivity.h" /* - * Force a retrans of a segment + * Special case: we know exactly which segment is missing at the + * receive; explicitly force retrans of that segment. */ static void -opal_btl_usnic_force_retrans( +opal_btl_usnic_fast_retrans( opal_btl_usnic_endpoint_t *endpoint, opal_btl_usnic_seq_t ack_seq) { opal_btl_usnic_send_segment_t *sseg; int is; - is = WINDOW_SIZE_MOD(ack_seq+1); + is = WINDOW_SIZE_MOD(ack_seq + 1); sseg = endpoint->endpoint_sent_segs[is]; + + // If the sseg is NULL, then there's nothing to retransmit. If + // the hotel room is -1, the segment has already been queued up + // for retransmit and there's nothing additional we need to do + // here. if (sseg == NULL || sseg->ss_hotel_room == -1) { return; } @@ -79,12 +85,14 @@ opal_btl_usnic_handle_ack( #endif ++module->stats.num_old_dup_acks; return; + } - /* A duplicate ACK means next seg was lost */ - } else if (ack_seq == endpoint->endpoint_ack_seq_rcvd) { + /* A duplicate ACK means the sender did not receive the next + seg that we sent */ + else if (ack_seq == endpoint->endpoint_ack_seq_rcvd) { ++module->stats.num_dup_acks; - opal_btl_usnic_force_retrans(endpoint, ack_seq); + opal_btl_usnic_fast_retrans(endpoint, ack_seq); return; } @@ -114,12 +122,11 @@ opal_btl_usnic_handle_ack( already been evicted and queued for resend. If it's not in the hotel, don't check it out! */ if (OPAL_LIKELY(sseg->ss_hotel_room != -1)) { - opal_hotel_checkout(&endpoint->endpoint_hotel, sseg->ss_hotel_room); sseg->ss_hotel_room = -1; - + } /* hotel_room == -1 means queued for resend, remove it */ - } else { + else { opal_list_remove_item((&module->pending_resend_segs), &sseg->ss_base.us_list.super); } @@ -191,25 +198,31 @@ opal_btl_usnic_handle_ack( /* * Send an ACK */ -void +int opal_btl_usnic_ack_send( opal_btl_usnic_module_t *module, opal_btl_usnic_endpoint_t *endpoint) { opal_btl_usnic_ack_segment_t *ack; + /* If we don't have any send credits in the priority channel, + don't send it */ + if (module->mod_channels[USNIC_PRIORITY_CHANNEL].credits < 1) { + return OPAL_ERR_OUT_OF_RESOURCE; + } + /* Get an ACK frag. If we don't get one, just discard this ACK. */ ack = opal_btl_usnic_ack_segment_alloc(module); if (OPAL_UNLIKELY(NULL == ack)) { - opal_output(0, "====================== No frag for sending the ACK -- skipped"); - abort(); + return OPAL_ERR_OUT_OF_RESOURCE; } + --module->mod_channels[USNIC_PRIORITY_CHANNEL].credits; + /* send the seq of the lowest item in the window that we've received */ ack->ss_base.us_btl_header->ack_seq = - endpoint->endpoint_next_contig_seq_to_recv - 1; - + SEQ_DIFF(endpoint->endpoint_next_contig_seq_to_recv, 1); ack->ss_len = sizeof(opal_btl_usnic_btl_header_t); #if MSGDEBUG1 @@ -241,7 +254,7 @@ opal_btl_usnic_ack_send( /* Stats */ ++module->stats.num_ack_sends; - return; + return OPAL_SUCCESS; } /* @@ -251,7 +264,9 @@ void opal_btl_usnic_ack_complete(opal_btl_usnic_module_t *module, opal_btl_usnic_ack_segment_t *ack) { + ++module->mod_channels[USNIC_PRIORITY_CHANNEL].credits; opal_btl_usnic_ack_segment_return(module, ack); + ++module->mod_channels[ack->ss_channel].credits; } /*****************************************************************************/ @@ -292,4 +307,3 @@ opal_btl_usnic_ack_timeout( /* Stats */ ++module->stats.num_timeout_retrans; } - diff --git a/opal/mca/btl/usnic/btl_usnic_ack.h b/opal/mca/btl/usnic/btl_usnic_ack.h index 0aaf8306d70..e1d4fe6fc87 100644 --- a/opal/mca/btl/usnic/btl_usnic_ack.h +++ b/opal/mca/btl/usnic/btl_usnic_ack.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2013-2015 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2013-2017 Cisco Systems, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -67,7 +67,7 @@ void opal_btl_usnic_ack_complete(opal_btl_usnic_module_t *module, /* * Send an ACK */ -void opal_btl_usnic_ack_send(opal_btl_usnic_module_t *module, +int opal_btl_usnic_ack_send(opal_btl_usnic_module_t *module, opal_btl_usnic_endpoint_t *endpoint); /* @@ -92,7 +92,7 @@ opal_btl_usnic_piggyback_ack( if (endpoint->endpoint_ack_needed) { opal_btl_usnic_remove_from_endpoints_needing_ack(endpoint); sseg->ss_base.us_btl_header->ack_seq = - endpoint->endpoint_next_contig_seq_to_recv - 1; + SEQ_DIFF(endpoint->endpoint_next_contig_seq_to_recv, 1); sseg->ss_base.us_btl_header->ack_present = 1; #if MSGDEBUG1 opal_output(0, "Piggy-backing ACK for sequence %"UDSEQ"\n", diff --git a/opal/mca/btl/usnic/btl_usnic_cagent.c b/opal/mca/btl/usnic/btl_usnic_cagent.c index dd1ba507ab9..386aec0a260 100644 --- a/opal/mca/btl/usnic/btl_usnic_cagent.c +++ b/opal/mca/btl/usnic/btl_usnic_cagent.c @@ -1,5 +1,7 @@ /* - * Copyright (c) 2014-2015 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2014-2016 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -14,7 +16,9 @@ #include #include #include +#ifdef HAVE_ALLOCA_H #include +#endif #include "opal_stdint.h" #include "opal/threads/mutex.h" @@ -40,12 +44,12 @@ static opal_event_t ipc_event; static struct timeval ack_timeout; static opal_list_t udp_port_listeners; static opal_list_t ipc_listeners; +static volatile int ipc_accepts = 0; /* JMS The pings_pending and ping_results should probably both be hash tables for more efficient lookups */ static opal_list_t pings_pending; static opal_list_t ping_results; -static volatile bool agent_thread_time_to_exit = false; -static opal_event_base_t *evbase = NULL; +static volatile bool agent_initialized = false; /* @@ -93,6 +97,10 @@ typedef enum { AGENT_MSG_TYPE_ACK } agent_udp_message_type_t; +// Arbitrary 64 bit numbers +#define MAGIC_ORIGINATOR 0x9a9e2fbce63a11e5 +#define MAGIC_TARGET 0x60735c68f368aace + /* * Ping and ACK messages */ @@ -106,6 +114,11 @@ typedef struct { uint32_t src_ipv4_addr; uint32_t src_udp_port; + /* A magic number that helps determine that the sender was Open + MPI */ + uint64_t magic_number; + uint32_t major_version, minor_version; + /* If this is a PING, the message should be this size. If this is an ACK, we are ACKing a ping of this size. */ uint32_t size; @@ -195,7 +208,7 @@ static void udp_port_listener_destructor(agent_udp_port_listener_t *obj) } /* If the "active" flag is set, then the event is active and the - item is on the ipc_listeners list */ + item is on the udp_port_listeners list */ if (obj->active) { opal_event_del(&obj->event); opal_list_remove_item(&udp_port_listeners, &obj->super); @@ -298,9 +311,16 @@ static void agent_sendto(int fd, char *buffer, ssize_t numbytes, } else if (rc < 0) { if (errno == EAGAIN || errno == EINTR) { continue; + } else if (errno == EPERM) { + // We're sending too fast + usleep(5); + continue; } - ABORT("Unexpected sendto() error"); + char *msg; + asprintf(&msg, "Unexpected sendto() error: errno=%d (%s)", + errno, strerror(errno)); + ABORT(msg); /* Will not return */ } @@ -316,58 +336,6 @@ static void agent_sendto(int fd, char *buffer, ssize_t numbytes, * All of the following functions run in agent thread **************************************************************************/ -/* - * A dummy function invoked in an event just for the purposes of - * waking up the agent main thread (in case it was blocked in the - * event loop with no other events to wake it up). - */ -static void agent_thread_noop(int fd, short flags, void *context) -{ - /* Intentionally a no op */ -} - -/* - * Check to ensure that we expected to receive a ping from this sender - * on the interface in which it was received (i.e., did the usnic - * module corresponding to the received interface choose to pair - * itself with the sender's interface). If not, discard it. - * - * Note that there may be a race condition here. We may get a ping - * before we've setup endpoints on the module in question. It's no - * problem -- if we don't find it, we'll drop the PING and let the - * sender try again later. - */ -static bool agent_thread_is_ping_expected(opal_btl_usnic_module_t *module, - uint32_t src_ipv4_addr) -{ - bool found = false; - opal_list_item_t *item; - - /* If we have a NULL value for the module, it means that the MPI - process that is the agent hasn't submitted the LISTEN command - yet (which can happen for a fast sender / slow receiver). So - just return "ping is not [yet] expected". */ - if (NULL == module) { - return false; - } - - opal_mutex_lock(&module->all_endpoints_lock); - if (module->all_endpoints_constructed) { - OPAL_LIST_FOREACH(item, &module->all_endpoints, opal_list_item_t) { - opal_btl_usnic_endpoint_t *ep; - ep = container_of(item, opal_btl_usnic_endpoint_t, - endpoint_endpoint_li); - if (src_ipv4_addr == ep->endpoint_remote_modex.ipv4_addr) { - found = true; - break; - } - } - } - opal_mutex_unlock(&module->all_endpoints_lock); - - return found; -} - /* * Handle an incoming PING message (send an ACK) */ @@ -410,18 +378,24 @@ static void agent_thread_handle_ping(agent_udp_port_listener_t *listener, return; } - /* Finally, check that the ping is from an interface that the - module expects */ - if (!agent_thread_is_ping_expected(listener->module, - src_addr_in->sin_addr.s_addr)) { + if (msg->magic_number != MAGIC_ORIGINATOR) { + opal_output_verbose(20, USNIC_OUT, + "usNIC connectivity got bad ping (magic number: %" PRIu64 ", discarded)", + msg->magic_number); + return; + } + if (msg->major_version != OPAL_MAJOR_VERSION || + msg->minor_version != OPAL_MINOR_VERSION) { opal_output_verbose(20, USNIC_OUT, - "usNIC connectivity got bad ping (from unexpected address: listener %s not paired with peer interface %s, discarded)", - listener->ipv4_addr_str, - real_ipv4_addr_str); + "usNIC connectivity got bad ping (originator version: %d.%d, expected %d.%d, discarded)", + msg->major_version, msg->minor_version, + OPAL_MAJOR_VERSION, OPAL_MINOR_VERSION); return; } - /* Ok, this is a good ping. Send the ACK back */ + /* Ok, this is a good ping. Send the ACK back. The PING sender + will verify that the ACK came back from the IP address that it + expected. */ opal_output_verbose(20, USNIC_OUT, "usNIC connectivity got PING (size=%ld) from %s; sending ACK", @@ -429,10 +403,11 @@ static void agent_thread_handle_ping(agent_udp_port_listener_t *listener, /* Send back an ACK. No need to allocate a new buffer; just re-use the same buffer we just got. Note that msg->size is - already set. */ + already set. We simply echo back the sender's IP address/port + in the msg (the sender will use the msg fields and the + recvfrom() src_addr to check for a match). */ msg->message_type = AGENT_MSG_TYPE_ACK; - msg->src_ipv4_addr = listener->ipv4_addr; - msg->src_udp_port = listener->udp_port; + msg->magic_number = MAGIC_TARGET; agent_sendto(listener->fd, (char*) listener->buffer, sizeof(*msg), from); } @@ -456,12 +431,22 @@ static void agent_thread_handle_ack(agent_udp_port_listener_t *listener, (int) numbytes, str, (int) sizeof(*msg)); return; } + if (msg->magic_number != MAGIC_TARGET) { + opal_output_verbose(20, USNIC_OUT, + "usNIC connectivity got bad ACK (magic number: %" PRIu64 ", discarded)", + msg->magic_number); + return; + } - /* Find the pending ping request that this ACK is for */ + /* Find the pending ping request (on this interface) for this ACK. + If we don't find a match, we'll drop it. */ agent_ping_t *ap; + uint32_t src_in_port = ntohs(src_addr_in->sin_port); OPAL_LIST_FOREACH(ap, &pings_pending, agent_ping_t) { - if (ap->dest_ipv4_addr == msg->src_ipv4_addr && - ap->dest_udp_port == msg->src_udp_port) { + if (ap->dest_ipv4_addr == src_addr_in->sin_addr.s_addr && + ap->dest_udp_port == src_in_port && + ap->src_ipv4_addr == msg->src_ipv4_addr && + ap->src_udp_port == msg->src_udp_port) { /* Found it -- indicate that it has been acked */ for (int i = 0; i < NUM_PING_SIZES; ++i) { if (ap->sizes[i] == msg->size) { @@ -697,7 +682,8 @@ static void agent_thread_cmd_listen(agent_ipc_listener_t *ipc_listener) } /* Create a listening event */ - opal_event_set(evbase, &udp_listener->event, udp_listener->fd, + opal_event_set(mca_btl_usnic_component.opal_evbase, + &udp_listener->event, udp_listener->fd, OPAL_EV_READ | OPAL_EV_PERSIST, agent_thread_receive_ping, udp_listener); opal_event_add(&udp_listener->event, 0); @@ -801,7 +787,7 @@ static void agent_thread_send_ping(int fd, short flags, void *context) } /* Set a timer to check if these pings are ACKed */ - opal_event_set(evbase, &ap->timer, + opal_event_set(mca_btl_usnic_component.opal_evbase, &ap->timer, -1, 0, agent_thread_send_ping, ap); opal_event_add(&ap->timer, &ack_timeout); ap->timer_active = true; @@ -891,9 +877,8 @@ static void agent_thread_cmd_ping(agent_ipc_listener_t *ipc_listener) all IP options are enabled, which is 60 bytes), and then also subtract off the UDP header (which is 8 bytes). So we need to subtract off 68 bytes from the MTU, and that's the largest ping - payload we can send. - max_msg_size allows for minimal UDP header, be more conservative */ - ap->sizes[1] = cmd.max_msg_size - (68 - 42); + payload we can send. */ + ap->sizes[1] = cmd.max_msg_size - 68; /* Allocate a buffer for each size. Make sure the smallest size is at least sizeof(agent_udp_message_t). */ @@ -911,6 +896,9 @@ static void agent_thread_cmd_ping(agent_ipc_listener_t *ipc_listener) msg->message_type = AGENT_MSG_TYPE_PING; msg->src_ipv4_addr = ap->src_ipv4_addr; msg->src_udp_port = ap->src_udp_port; + msg->magic_number = MAGIC_ORIGINATOR; + msg->major_version = OPAL_MAJOR_VERSION; + msg->minor_version = OPAL_MINOR_VERSION; msg->size = ap->sizes[i]; } @@ -1029,6 +1017,9 @@ static void agent_thread_accept(int fd, short flags, void *context) return; } + /* Remember how many accepts we have successfully completed */ + ++ipc_accepts; + /* Make a listener object for this peer */ listener = OBJ_NEW(agent_ipc_listener_t); listener->client_fd = client_fd; @@ -1043,7 +1034,8 @@ static void agent_thread_accept(int fd, short flags, void *context) } /* Add this IPC listener to the event base */ - opal_event_set(evbase, &listener->event, client_fd, + opal_event_set(mca_btl_usnic_component.opal_evbase, + &listener->event, client_fd, OPAL_EV_READ | OPAL_EV_PERSIST, agent_thread_ipc_receive, listener); opal_event_add(&listener->event, 0); @@ -1057,24 +1049,90 @@ static void agent_thread_accept(int fd, short flags, void *context) } /* - * Agent progress thread main entry point + * Tear down all active events. + * + * This is done as an event callback in the agent threaf so that there + * is no race condition in the teardown. Specifically: the progress + * thread will only fire one event at a time. Therefore, this one + * event can "atomically" delete all the events and data structures + * and not have to worry about concurrent access from some event + * firing in the middle of the teardown process. */ -static void *agent_thread_main(opal_object_t *obj) +static void agent_thread_finalize(int fd, short flags, void *context) { - while (!agent_thread_time_to_exit) { - opal_event_loop(evbase, OPAL_EVLOOP_ONCE); + /* Free the event that triggered this call */ + free(context); + + /* Ensure that all the local IPC clients have connected to me (so + that we don't shut down before someone tries to connect to me), + or 10 seconds have passed (i.e., if 10 seconds pass and they + don't all connect to me, then something else is wrong, and we + should just give up). */ + static bool first = true; + static time_t timestamp = 0; + if (first) { + timestamp = time(NULL); + first = false; + } + + if (ipc_accepts < opal_process_info.num_local_peers && + time(NULL) < timestamp + 10) { + opal_output_verbose(20, USNIC_OUT, + "usNIC connectivity agent delaying shutdown until all clients connect..."); + + opal_event_t *ev = calloc(sizeof(*ev), 1); + struct timeval finalize_retry = { + .tv_sec = 0, + .tv_usec = 10000 + }; + + opal_event_set(mca_btl_usnic_component.opal_evbase, + ev, -1, 0, agent_thread_finalize, ev); + opal_event_add(ev, &finalize_retry); + return; + } + if (ipc_accepts < opal_process_info.num_local_peers) { + opal_output_verbose(20, USNIC_OUT, + "usNIC connectivity agent: only %d of %d clients connected, but timeout has expired -- exiting anyway", ipc_accepts, opal_process_info.num_local_peers); } - return NULL; + /* Remove the agent listening event from the opal async event + base */ + opal_event_del(&ipc_event); + + /* Shut down all active udp_port_listeners */ + agent_udp_port_listener_t *udp_listener, *ulnext; + OPAL_LIST_FOREACH_SAFE(udp_listener, ulnext, &udp_port_listeners, + agent_udp_port_listener_t) { + OBJ_RELEASE(udp_listener); + } + + /* Destroy the pending pings and ping results */ + agent_ping_t *request, *pnext; + OPAL_LIST_FOREACH_SAFE(request, pnext, &pings_pending, agent_ping_t) { + opal_list_remove_item(&pings_pending, &request->super); + OBJ_RELEASE(request); + } + + OPAL_LIST_FOREACH_SAFE(request, pnext, &ping_results, agent_ping_t) { + opal_list_remove_item(&ping_results, &request->super); + OBJ_RELEASE(request); + } + + /* Shut down all active ipc_listeners */ + agent_ipc_listener_t *ipc_listener, *inext; + OPAL_LIST_FOREACH_SAFE(ipc_listener, inext, &ipc_listeners, + agent_ipc_listener_t) { + OBJ_RELEASE(ipc_listener); + } + + agent_initialized = false; } /************************************************************************** * All of the following functions run in the main application thread **************************************************************************/ -static bool agent_initialized = false; -static opal_thread_t agent_thread; - /* * Setup the agent and start its event loop running in a dedicated * thread @@ -1090,9 +1148,6 @@ int opal_btl_usnic_connectivity_agent_init(void) return OPAL_SUCCESS; } - /* Create the event base */ - evbase = opal_event_base_create(); - /* Make a struct timeval for use with timer events. Note that the MCA param is expressed in terms of *milli*seconds, but the timeval timeout is expressed in terms of *micro*seconds. */ @@ -1154,23 +1209,12 @@ int opal_btl_usnic_connectivity_agent_init(void) } /* Add the socket to the event base */ - opal_event_set(evbase, &ipc_event, ipc_accept_fd, + opal_event_set(mca_btl_usnic_component.opal_evbase, + &ipc_event, ipc_accept_fd, OPAL_EV_READ | OPAL_EV_PERSIST, agent_thread_accept, NULL); opal_event_add(&ipc_event, 0); - /* Spawn the agent thread event loop */ - OBJ_CONSTRUCT(&agent_thread, opal_thread_t); - agent_thread.t_run = agent_thread_main; - agent_thread.t_arg = NULL; - int ret; - ret = opal_thread_start(&agent_thread); - if (OPAL_SUCCESS != ret) { - OPAL_ERROR_LOG(ret); - ABORT("Failed to start usNIC agent thread"); - /* Will not return */ - } - opal_output_verbose(20, USNIC_OUT, "usNIC connectivity agent initialized"); agent_initialized = true; @@ -1182,45 +1226,26 @@ int opal_btl_usnic_connectivity_agent_init(void) */ int opal_btl_usnic_connectivity_agent_finalize(void) { - agent_initialized = false; - /* Only do this if I have the agent running */ - if (NULL == evbase) { + if (!agent_initialized) { return OPAL_SUCCESS; } - /* Shut down the event loop. Send it a no-op event so that it - wakes up and exits the loop. */ - opal_event_t ev; - agent_thread_time_to_exit = true; - opal_event_set(evbase, &ev, -1, OPAL_EV_WRITE, agent_thread_noop, NULL); - opal_event_active(&ev, OPAL_EV_WRITE, 1); - opal_thread_join(&agent_thread, NULL); - - /* Shut down all active udp_port_listeners */ - agent_udp_port_listener_t *udp_listener, *ulnext; - OPAL_LIST_FOREACH_SAFE(udp_listener, ulnext, &udp_port_listeners, - agent_udp_port_listener_t) { - OBJ_RELEASE(udp_listener); - } - - /* Destroy the pending pings and ping results */ - agent_ping_t *request, *pnext; - OPAL_LIST_FOREACH_SAFE(request, pnext, &pings_pending, agent_ping_t) { - opal_list_remove_item(&pings_pending, &request->super); - OBJ_RELEASE(request); - } - - OPAL_LIST_FOREACH_SAFE(request, pnext, &ping_results, agent_ping_t) { - opal_list_remove_item(&ping_results, &request->super); - OBJ_RELEASE(request); - } - - /* Shut down all active ipc_listeners */ - agent_ipc_listener_t *ipc_listener, *inext; - OPAL_LIST_FOREACH_SAFE(ipc_listener, inext, &ipc_listeners, - agent_ipc_listener_t) { - OBJ_RELEASE(ipc_listener); + /* Submit an event to the async thread and tell it to delete all + the usNIC events. See the rationale for doing this in the + comment in the agent_thread_finalize() function. */ + opal_event_t *ev = calloc(sizeof(*ev), 1); + opal_event_set(mca_btl_usnic_component.opal_evbase, + ev, -1, OPAL_EV_WRITE, agent_thread_finalize, ev); + opal_event_active(ev, OPAL_EV_WRITE, 1); + + /* Wait for the event to fire and complete */ + while (agent_initialized) { + struct timespec tp = { + .tv_sec = 0, + .tv_nsec = 1000 + }; + nanosleep(&tp, NULL); } /* Close the local IPC socket and remove the file */ diff --git a/opal/mca/btl/usnic/btl_usnic_cclient.c b/opal/mca/btl/usnic/btl_usnic_cclient.c index 13c9331cc18..77615937e47 100644 --- a/opal/mca/btl/usnic/btl_usnic_cclient.c +++ b/opal/mca/btl/usnic/btl_usnic_cclient.c @@ -1,5 +1,7 @@ /* - * Copyright (c) 2014-2015 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2014-2016 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -16,7 +18,9 @@ #include #include #include +#ifdef HAVE_ALLOCA_H #include +#endif #include #include "opal_stdint.h" @@ -101,7 +105,23 @@ int opal_btl_usnic_connectivity_client_init(void) address.sun_family = AF_UNIX; strncpy(address.sun_path, ipc_filename, sizeof(address.sun_path) - 1); - if (0 != connect(agent_fd, (struct sockaddr*) &address, sizeof(address))) { + int count = 0; + while (1) { + int ret = connect(agent_fd, (struct sockaddr*) &address, + sizeof(address)); + if (0 == ret) { + break; + } + + // If we get ECONNREFUSED, delay a little and try again + if (ECONNREFUSED == errno) { + if (count < mca_btl_usnic_component.connectivity_num_retries) { + usleep(100); + ++count; + continue; + } + } + OPAL_ERROR_LOG(OPAL_ERR_IN_ERRNO); ABORT("connect() failed"); /* Will not return */ @@ -177,7 +197,7 @@ int opal_btl_usnic_connectivity_listen(opal_btl_usnic_module_t *module) /* Ensure to NULL-terminate the passed strings */ strncpy(cmd.nodename, opal_process_info.nodename, CONNECTIVITY_NODENAME_LEN - 1); - strncpy(cmd.usnic_name, module->fabric_info->fabric_attr->name, + strncpy(cmd.usnic_name, module->linux_device_name, CONNECTIVITY_IFNAME_LEN - 1); if (OPAL_SUCCESS != opal_fd_write(agent_fd, sizeof(cmd), &cmd)) { @@ -214,6 +234,9 @@ int opal_btl_usnic_connectivity_ping(uint32_t src_ipv4_addr, int src_port, return OPAL_SUCCESS; } + /* Protect opal_fd_write for multithreaded case */ + OPAL_THREAD_LOCK(&btl_usnic_lock); + /* Send the PING command */ int id = CONNECTIVITY_AGENT_CMD_PING; if (OPAL_SUCCESS != opal_fd_write(agent_fd, sizeof(id), &id)) { @@ -240,6 +263,9 @@ int opal_btl_usnic_connectivity_ping(uint32_t src_ipv4_addr, int src_port, /* Will not return */ } + /* Unlock and return */ + OPAL_THREAD_UNLOCK(&btl_usnic_lock); + return OPAL_SUCCESS; } diff --git a/opal/mca/btl/usnic/btl_usnic_compat.c b/opal/mca/btl/usnic/btl_usnic_compat.c index fb08c8378fa..de649cb5147 100644 --- a/opal/mca/btl/usnic/btl_usnic_compat.c +++ b/opal/mca/btl/usnic/btl_usnic_compat.c @@ -1,5 +1,6 @@ /* - * Copyright (c) 2014-2015 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2014-2016 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -41,7 +42,7 @@ void usnic_compat_modex_send(int *rc, opal_btl_usnic_modex_t *modexes, size_t size) { - OPAL_MODEX_SEND(*rc, PMIX_SYNC_REQD, PMIX_REMOTE, component, + OPAL_MODEX_SEND(*rc, OPAL_PMIX_REMOTE, component, modexes, size); } @@ -51,7 +52,8 @@ void usnic_compat_modex_recv(int *rc, opal_btl_usnic_modex_t **modexes, size_t *size) { - OPAL_MODEX_RECV(*rc, component, proc, (uint8_t**) modexes, size); + OPAL_MODEX_RECV(*rc, component, &proc->proc_name, + (uint8_t**) modexes, size); } uint64_t usnic_compat_rte_hash_name(opal_process_name_t *pname) @@ -148,6 +150,82 @@ int usnic_compat_free_list_init(opal_free_list_t *free_list, mpool); } +static volatile bool agent_thread_time_to_exit = false; +static opal_thread_t agent_thread; +static opal_event_t blocker; // event to block on +static opal_event_base_t *agent_evbase = NULL; + +static struct timeval long_timeout = { + .tv_sec = 3600, + .tv_usec = 0 +}; + +/* + * If this event is fired, just restart it so that this event base + * continues to have something to block on. + */ +static void blocker_timeout_cb(int fd, short args, void *cbdata) +{ + opal_event_add(&blocker, &long_timeout); +} + +/* + * Agent progress thread main entry point + */ +static void *agent_thread_main(opal_object_t *obj) +{ + while (!agent_thread_time_to_exit) { + opal_event_loop(agent_evbase, OPAL_EVLOOP_ONCE); + } + + return NULL; +} + +opal_event_base_t *opal_progress_thread_init(const char *name) +{ + assert(NULL == name); + + /* Create the event base */ + agent_evbase = opal_event_base_create(); + if (NULL == agent_evbase) { + return NULL; + } + + /* add an event to the new event base (if there are no events, + opal_event_loop() will return immediately) */ + opal_event_set(agent_evbase, &blocker, -1, OPAL_EV_PERSIST, + blocker_timeout_cb, NULL); + opal_event_add(&blocker, &long_timeout); + + /* Spawn the agent thread event loop */ + OBJ_CONSTRUCT(&agent_thread, opal_thread_t); + agent_thread.t_run = agent_thread_main; + agent_thread.t_arg = NULL; + int ret; + ret = opal_thread_start(&agent_thread); + if (OPAL_SUCCESS != ret) { + OPAL_ERROR_LOG(ret); + ABORT("Failed to start usNIC agent thread"); + /* Will not return */ + } + + return agent_evbase; +} + +int opal_progress_thread_finalize(const char *name) +{ + assert(NULL == name); + + agent_thread_time_to_exit = true; + + /* break the event loop - this will cause the loop to exit upon + completion of any current event */ + opal_event_base_loopbreak(agent_evbase); + opal_thread_join(&agent_thread, NULL); + + return OPAL_SUCCESS; +} + #endif /* OMPI version */ /************************************************************************/ @@ -431,6 +509,7 @@ opal_btl_usnic_prepare_src( size_t* size, uint32_t flags) { + OPAL_THREAD_LOCK(&btl_usnic_lock); opal_btl_usnic_module_t *module = (opal_btl_usnic_module_t*) base_module; opal_btl_usnic_send_frag_t *frag; uint32_t payload_len; @@ -457,7 +536,7 @@ opal_btl_usnic_prepare_src( #if MSGDEBUG2 opal_output(0, "prep_src: %s %s frag %p, size=%d+%u (was %u), conv=%p\n", - module->fabric_info->fabric_attr->name, + module->linux_device_name, (reserve + *size) <= module->max_frag_payload?"small":"large", (void *)frag, (int)reserve, (unsigned)*size, (unsigned)osize, (void *)convertor); @@ -474,6 +553,7 @@ opal_btl_usnic_prepare_src( #endif #endif + OPAL_THREAD_UNLOCK(&btl_usnic_lock); return &frag->sf_base.uf_base; } @@ -643,7 +723,7 @@ opal_btl_usnic_prepare_src(struct mca_btl_base_module_t *base_module, #if MSGDEBUG2 opal_output(0, "prep_src: %s %s frag %p, size=%d+%u (was %u), conv=%p\n", - module->fabric_info->fabric_attr->name, + module->linux_device_name, (reserve + *size) <= module->max_frag_payload?"small":"large", (void *)frag, (int)reserve, (unsigned)*size, (unsigned)osize, (void *)convertor); diff --git a/opal/mca/btl/usnic/btl_usnic_compat.h b/opal/mca/btl/usnic/btl_usnic_compat.h index 632612e7d9a..da99d13be26 100644 --- a/opal/mca/btl/usnic/btl_usnic_compat.h +++ b/opal/mca/btl/usnic/btl_usnic_compat.h @@ -1,5 +1,8 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ /* - * Copyright (c) 2013-2015 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2013-2016 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Los Alamos National Security, LLC. All rights + * reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -8,11 +11,13 @@ */ /* This header contains macros to help minimize usnic BTL differences - * between v1.7/v1.8 and v1.9/v2.0. */ + * between v1.7/v1.8, v1.9/v2.0, and v2.0/v2.1. */ #ifndef BTL_USNIC_COMPAT_H #define BTL_USNIC_COMPAT_H +#include "opal/mca/rcache/rcache.h" + /************************************************************************/ /* v2.0 and beyond */ @@ -31,6 +36,19 @@ /* Free lists are unified into OPAL free lists */ # include "opal/class/opal_free_list.h" +/* Inclue the progress thread stuff */ +# include "opal/runtime/opal_progress_threads.h" + +/* Hwloc support is now guaranteed, and the rest of the code base does + not define OPAL_HAVE_HWLOC any more (because it would always be 1). + + Note: The usnic BTL still uses OPAL_HAVE_HWLOC because Cisco + continues to sync it against a v1.10-based tree (where + OPAL_HAVE_HWLOC may still be 0 or 1). Once Cisco stops syncing the + usnic BTL against v1.10.x, all the OPAL_HAVE_HWLOC code in the + usnic BTL can go away. */ +# define OPAL_HAVE_HWLOC 1 + # define USNIC_OUT opal_btl_base_framework.framework_output /* JMS Really want to be able to get the job size somehow... But for now, so that we can compile, just set it to a constant :-( */ @@ -96,6 +114,8 @@ usnic_compat_proc_name_compare(opal_process_name_t a, # define proc_bound() (ompi_rte_proc_is_bound) # define opal_proc_local_get() ompi_proc_local() +# define opal_sync_event_base opal_event_base + # define opal_process_info orte_process_info # define opal_proc_t ompi_proc_t @@ -185,6 +205,25 @@ usnic_compat_proc_name_compare(opal_process_name_t a, return ompi_rte_compare_name_fields(OMPI_RTE_CMP_ALL, &a, &b); } +/* Hotels in v1.8 */ +# include "opal/class/opal_hotel.h" + +/* + * Performance critical; needs to be inline + */ +static inline int +usnic_compat_opal_hotel_init(opal_hotel_t *hotel, int num_rooms, + opal_event_base_t *evbase, + uint32_t eviction_timeout, + int eviction_event_priority, + opal_hotel_eviction_callback_fn_t evict_callback_fn) +{ + return opal_hotel_init(hotel, num_rooms, eviction_timeout, + eviction_event_priority, evict_callback_fn); +} +#define opal_hotel_init usnic_compat_opal_hotel_init + + /* * Replicate functions that exist on master */ @@ -208,6 +247,17 @@ int usnic_compat_free_list_init(opal_free_list_t *free_list, opal_free_list_item_init_fn_t item_init, void *ctx); +/* + * Start the connectivity checker progress thread + */ +opal_event_base_t *opal_progress_thread_init(const char *name); + +/* + * Stop the connectivity checker progress thread + */ +int opal_progress_thread_finalize(const char *name); + + /************************************************************************/ #else @@ -313,4 +363,18 @@ opal_btl_usnic_put(struct mca_btl_base_module_t *base_module, #endif /* BTL_VERSION */ +#if defined(RCACHE_MAJOR_VERSION) && RCACHE_MAJOR_VERSION >= 3 + +#define RCACHE_VERSION 30 + +/* these structures got renamed with the mpool/rcache rewrite */ +#define mca_mpool_base_registration_t mca_rcache_base_registration_t +#define mca_mpool_base_resources_t mca_rcache_base_resources_t + +#else + +#define RCACHE_VERSION 20 + +#endif + #endif /* BTL_USNIC_COMPAT_H */ diff --git a/opal/mca/btl/usnic/btl_usnic_component.c b/opal/mca/btl/usnic/btl_usnic_component.c index c2bd218e2b1..f841cb585b2 100644 --- a/opal/mca/btl/usnic/btl_usnic_component.c +++ b/opal/mca/btl/usnic/btl_usnic_component.c @@ -12,7 +12,7 @@ * All rights reserved. * Copyright (c) 2006 Sandia National Laboratories. All rights * reserved. - * Copyright (c) 2008-2015 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2008-2017 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2012-2014 Los Alamos National Security, LLC. All rights * reserved. * Copyright (c) 2014 Intel, Inc. All rights reserved. @@ -86,8 +86,11 @@ #define OPAL_BTL_USNIC_NUM_COMPLETIONS 500 +/* MPI_THREAD_MULTIPLE_SUPPORT */ +opal_recursive_mutex_t btl_usnic_lock = OPAL_RECURSIVE_MUTEX_STATIC_INIT; + /* RNG buffer definition */ -opal_rng_buff_t opal_btl_usnic_rand_buff; +opal_rng_buff_t opal_btl_usnic_rand_buff = {{0}}; /* simulated clock */ uint64_t opal_btl_usnic_ticks = 0; @@ -163,6 +166,7 @@ static int usnic_component_open(void) mca_btl_usnic_component.usnic_all_modules = NULL; mca_btl_usnic_component.usnic_active_modules = NULL; mca_btl_usnic_component.transport_header_len = -1; + mca_btl_usnic_component.prefix_send_offset = 0; /* initialize objects */ OBJ_CONSTRUCT(&mca_btl_usnic_component.usnic_procs, opal_list_t); @@ -209,6 +213,9 @@ static int usnic_component_close(void) opal_btl_usnic_connectivity_client_finalize(); opal_btl_usnic_connectivity_agent_finalize(); } + if (mca_btl_usnic_component.opal_evbase) { + opal_progress_thread_finalize(NULL); + } free(mca_btl_usnic_component.usnic_all_modules); free(mca_btl_usnic_component.usnic_active_modules); @@ -218,6 +225,8 @@ static int usnic_component_close(void) opal_btl_usnic_cleanup_tests(); #endif + OBJ_DESTRUCT(&btl_usnic_lock); + return OPAL_SUCCESS; } @@ -318,9 +327,7 @@ static int check_usnic_config(opal_btl_usnic_module_t *module, char str[128]; unsigned unlp; struct fi_usnic_info *uip; - struct fi_info *info; - info = module->fabric_info; uip = &module->usnic_info; /* Note: we add one to num_local_procs to account for *this* @@ -333,11 +340,11 @@ static int check_usnic_config(opal_btl_usnic_module_t *module, 1. num_vfs (i.e., "usNICs") >= num_local_procs (to ensure that each MPI process will be able to have its own protection domain), and - 2. num_vfs * num_qps_per_vf >= num_local_procs * NUM_CHANNELS + 2. num_qps_per_vf >= NUM_CHANNELS (to ensure that each MPI process will be able to get the number of QPs it needs -- we know that every VF will have the same number of QPs), and - 3. num_vfs * num_cqs_per_vf >= num_local_procs * NUM_CHANNELS + 3. num_cqs_per_vf >= NUM_CHANNELS (to ensure that each MPI process will be able to get the number of CQs that it needs) */ if (uip->ui.v1.ui_num_vf < unlp) { @@ -346,19 +353,17 @@ static int check_usnic_config(opal_btl_usnic_module_t *module, goto error; } - if (uip->ui.v1.ui_num_vf * uip->ui.v1.ui_qp_per_vf < - unlp * USNIC_NUM_CHANNELS) { - snprintf(str, sizeof(str), "Not enough WQ/RQ (found %d, need %d)", - uip->ui.v1.ui_num_vf * uip->ui.v1.ui_qp_per_vf, - unlp * USNIC_NUM_CHANNELS); + if (uip->ui.v1.ui_qp_per_vf < USNIC_NUM_CHANNELS) { + snprintf(str, sizeof(str), "Not enough transmit/receive queues per usNIC (found %d, need %d)", + uip->ui.v1.ui_qp_per_vf, + USNIC_NUM_CHANNELS); goto error; } - if (uip->ui.v1.ui_num_vf * uip->ui.v1.ui_cq_per_vf < - unlp * USNIC_NUM_CHANNELS) { + if (uip->ui.v1.ui_cq_per_vf < USNIC_NUM_CHANNELS) { snprintf(str, sizeof(str), - "Not enough CQ per usNIC (found %d, need %d)", - uip->ui.v1.ui_num_vf * uip->ui.v1.ui_cq_per_vf, - unlp * USNIC_NUM_CHANNELS); + "Not enough completion queues per usNIC (found %d, need %d)", + uip->ui.v1.ui_cq_per_vf, + USNIC_NUM_CHANNELS); goto error; } @@ -371,7 +376,7 @@ static int check_usnic_config(opal_btl_usnic_module_t *module, "not enough usnic resources", true, opal_process_info.nodename, - info->fabric_attr->name, + module->linux_device_name, str); return OPAL_ERROR; } @@ -536,10 +541,12 @@ static bool filter_module(opal_btl_usnic_module_t *module, struct fi_usnic_info *uip; struct fi_info *info; bool match; + const char *linux_device_name; info = module->fabric_info; uip = &module->usnic_info; src = info->src_addr; + linux_device_name = module->linux_device_name; module_mask = src->sin_addr.s_addr & uip->ui.v1.ui_netmask_be; match = false; for (i = 0; i < filter->n_elt; ++i) { @@ -552,7 +559,7 @@ static bool filter_module(opal_btl_usnic_module_t *module, } } else { - if (strcmp(filter->elts[i].if_name, info->fabric_attr->name) == 0) { + if (strcmp(filter->elts[i].if_name, linux_device_name) == 0) { match = true; break; } @@ -604,23 +611,101 @@ static mca_btl_base_module_t** usnic_component_init(int* num_btl_modules, int min_distance, num_local_procs; struct fi_info *info_list; struct fi_info *info; - struct fi_info hints = {0}; - struct fi_ep_attr ep_attr = {0}; - struct fi_fabric_attr fabric_attr = {0}; struct fid_fabric *fabric; struct fid_domain *domain; int ret; *num_btl_modules = 0; - /* Currently refuse to run if MPI_THREAD_MULTIPLE is enabled */ + /* MPI_THREAD_MULTIPLE is only supported in 2.0+ */ if (want_mpi_threads && !mca_btl_base_thread_multiple_override) { + if (OPAL_MAJOR_VERSION >= 2) { + opal_output_verbose(5, USNIC_OUT, + "btl:usnic: MPI_THREAD_MULTIPLE support is in testing phase."); + } + else { + opal_output_verbose(5, USNIC_OUT, + "btl:usnic: MPI_THREAD_MULTIPLE is not supported in version < 2."); + return NULL; + } + } + + OBJ_CONSTRUCT(&btl_usnic_lock, opal_recursive_mutex_t); + + /* There are multiple dimensions to consider when requesting an + API version number from libfabric: + + 1. This code understands libfabric API versions v1.3 through + v1.4. + + 2. Open MPI may be *compiled* against one version of libfabric, + but may be *running* with another. + + 3. There were usnic-specific bugs in Libfabric prior to + libfabric v1.3.0 (where "v1.3.0" is the tarball/package + version, not the API version; but happily, the API version + was also 1.3 in Libfabric v1.3.0): + + - In libfabric v1.0.0 (i.e., API v1.0), the usnic provider + did not check the value of the "version" parameter passed + into fi_getinfo() + - If you pass FI_VERSION(1,0) to libfabric v1.1.0 (i.e., API + v1.1), the usnic provider will disable FI_MSG_PREFIX + support (on the assumption that the application will not + handle FI_MSG_PREFIX properly). This can happen if you + compile OMPI against libfabric v1.0.0 (i.e., API v1.0) and + run OMPI against libfabric v1.1.0 (i.e., API v1.1). + - Some critical AV bug fixes were included in libfabric + v1.3.0; prior versions can fail in fi_av_* operations in + unexpected ways (libnl: you win again!). + + So always request a minimum API version of v1.3. + + Note that the FI_MAJOR_VERSION and FI_MINOR_VERSION in + represent the API version, not the Libfabric + package (i.e., tarball) version. As of Libfabric v1.3, there + is currently no way to know a) what package version of + Libfabric you were compiled against, and b) what package + version of Libfabric you are running with. + + Also note that the usnic provider changed the strings in the + fabric and domain names in API v1.4. With API <= v1.3: + + - fabric name is "usnic_X" (device name) + - domain name is NULL + + With libfabric API >= v1.4, all Libfabric IP-based providers + (including usnic) follow the same convention: + + - fabric name is "a.b.c.d/e" (CIDR notation of network) + - domain name is "usnic_X" (device name) + + NOTE: The configure.m4 in this component will require libfabric + >= v1.1.0 (i.e., it won't accept v1.0.0) because it needs + access to the usNIC extension header structures that only + became available in v1.1.0.*/ + + /* First, check to see if the libfabric we are running with is <= + libfabric v1.3. If so, don't bother going further. */ + uint32_t libfabric_api; + libfabric_api = fi_version(); + if (libfabric_api < FI_VERSION(1, 3)) { opal_output_verbose(5, USNIC_OUT, - "btl:usnic: MPI_THREAD_MULTIPLE not supported; skipping this component"); + "btl:usnic: disqualifiying myself because Libfabric does not support v1.3 of the API (v1.3 is *required* for correct usNIC functionality)."); return NULL; } - /* We only want providers named "usnic that are of type EP_DGRAM */ + /* Libfabric API 1.3 is fine. Above that, we know that Open MPI + works with libfabric API v1.4, so just use that. */ + if (libfabric_api > FI_VERSION(1, 3)) { + libfabric_api = FI_VERSION(1, 4); + } + + struct fi_info hints = {0}; + struct fi_ep_attr ep_attr = {0}; + struct fi_fabric_attr fabric_attr = {0}; + + /* We only want providers named "usnic" that are of type EP_DGRAM */ fabric_attr.prov_name = "usnic"; ep_attr.type = FI_EP_DGRAM; @@ -630,10 +715,10 @@ static mca_btl_base_module_t** usnic_component_init(int* num_btl_modules, hints.ep_attr = &ep_attr; hints.fabric_attr = &fabric_attr; - ret = fi_getinfo(FI_VERSION(1, 0), NULL, 0, 0, &hints, &info_list); + ret = fi_getinfo(libfabric_api, NULL, 0, 0, &hints, &info_list); if (0 != ret) { opal_output_verbose(5, USNIC_OUT, - "btl:usnic: disqualifiying myself due to fi_getinfo failure: %s (%d)", strerror(-ret), ret); + "btl:usnic: disqualifiying myself due to fi_getinfo(3) failure: %s (%d)", strerror(-ret), ret); return NULL; } @@ -664,7 +749,6 @@ static mca_btl_base_module_t** usnic_component_init(int* num_btl_modules, opal_output_verbose(5, USNIC_OUT, "btl:usnic: usNIC fabrics found"); - /* libnl initialization */ opal_proc_t *me = opal_proc_local_get(); opal_process_name_t *name = &(me->proc_name); mca_btl_usnic_component.my_hashed_rte_name = @@ -728,13 +812,21 @@ static mca_btl_base_module_t** usnic_component_init(int* num_btl_modules, i < mca_btl_usnic_component.max_modules); ++i, info = info->next) { + // The fabric/domain names changed at libfabric API v1.4 (see above). + char *linux_device_name; + if (libfabric_api <= FI_VERSION(1, 3)) { + linux_device_name = info->fabric_attr->name; + } else { + linux_device_name = info->domain_attr->name; + } + ret = fi_fabric(info->fabric_attr, &fabric, NULL); if (0 != ret) { opal_show_help("help-mpi-btl-usnic.txt", "libfabric API failed", true, opal_process_info.nodename, - info->fabric_attr->name, + linux_device_name, "fi_fabric()", __FILE__, __LINE__, ret, strerror(-ret)); @@ -748,7 +840,7 @@ static mca_btl_base_module_t** usnic_component_init(int* num_btl_modules, "libfabric API failed", true, opal_process_info.nodename, - info->fabric_attr->name, + linux_device_name, "fi_domain()", __FILE__, __LINE__, ret, strerror(-ret)); @@ -757,8 +849,8 @@ static mca_btl_base_module_t** usnic_component_init(int* num_btl_modules, opal_memchecker_base_mem_defined(&domain, sizeof(domain)); opal_output_verbose(5, USNIC_OUT, - "btl:usnic: found: usNIC direct device %s", - info->fabric_attr->name); + "btl:usnic: found: usNIC device %s", + linux_device_name); /* Save a little info on the module that we have already gathered. The rest of the module will be filled in @@ -769,6 +861,12 @@ static mca_btl_base_module_t** usnic_component_init(int* num_btl_modules, module->fabric = fabric; module->domain = domain; module->fabric_info = info; + module->libfabric_api = libfabric_api; + module->linux_device_name = strdup(linux_device_name); + if (NULL == module->linux_device_name) { + OPAL_ERROR_LOG(OPAL_ERR_OUT_OF_RESOURCE); + goto error; + } /* Obtain usnic-specific device info (e.g., netmask) that doesn't come in the normal fi_getinfo(). This allows us to @@ -778,27 +876,27 @@ static mca_btl_base_module_t** usnic_component_init(int* num_btl_modules, if (ret != 0) { opal_output_verbose(5, USNIC_OUT, "btl:usnic: device %s fabric_open_ops failed %d (%s)", - info->fabric_attr->name, ret, fi_strerror(-ret)); + module->linux_device_name, ret, fi_strerror(-ret)); fi_close(&domain->fid); fi_close(&fabric->fid); continue; } ret = - module->usnic_fabric_ops->getinfo(FI_EXT_USNIC_INFO_VERSION, + module->usnic_fabric_ops->getinfo(1, fabric, &module->usnic_info); if (ret != 0) { opal_output_verbose(5, USNIC_OUT, "btl:usnic: device %s usnic_getinfo failed %d (%s)", - info->fabric_attr->name, ret, fi_strerror(-ret)); + module->linux_device_name, ret, fi_strerror(-ret)); fi_close(&domain->fid); fi_close(&fabric->fid); continue; } opal_output_verbose(5, USNIC_OUT, "btl:usnic: device %s usnic_info: link speed=%d, netmask=0x%x, ifname=%s, num_vf=%d, qp/vf=%d, cq/vf=%d", - info->fabric_attr->name, + module->linux_device_name, (unsigned int) module->usnic_info.ui.v1.ui_link_speed, (unsigned int) module->usnic_info.ui.v1.ui_netmask_be, module->usnic_info.ui.v1.ui_ifname, @@ -812,7 +910,7 @@ static mca_btl_base_module_t** usnic_component_init(int* num_btl_modules, opal_output_verbose(5, USNIC_OUT, "btl:usnic: %s %s due to %s", (keep_module ? "keeping" : "skipping"), - info->fabric_attr->name, + module->linux_device_name, (filter_incl ? "if_include" : "if_exclude")); if (!keep_module) { fi_close(&domain->fid); @@ -830,7 +928,7 @@ static mca_btl_base_module_t** usnic_component_init(int* num_btl_modules, check_usnic_config(module, num_local_procs) != OPAL_SUCCESS) { opal_output_verbose(5, USNIC_OUT, "btl:usnic: device %s is not provisioned with enough resources -- skipping", - info->fabric_attr->name); + module->linux_device_name); fi_close(&domain->fid); fi_close(&fabric->fid); @@ -844,7 +942,7 @@ static mca_btl_base_module_t** usnic_component_init(int* num_btl_modules, opal_output_verbose(5, USNIC_OUT, "btl:usnic: device %s looks good!", - info->fabric_attr->name); + module->linux_device_name); /* Let this module advance to the next round! */ btls[j++] = &(module->super); @@ -861,8 +959,10 @@ static mca_btl_base_module_t** usnic_component_init(int* num_btl_modules, checking agent and client. */ if (mca_btl_usnic_component.num_modules > 0 && mca_btl_usnic_component.connectivity_enabled) { + mca_btl_usnic_component.opal_evbase = opal_progress_thread_init(NULL); if (OPAL_SUCCESS != opal_btl_usnic_connectivity_agent_init() || OPAL_SUCCESS != opal_btl_usnic_connectivity_client_init()) { + opal_progress_thread_finalize(NULL); return NULL; } } @@ -892,13 +992,14 @@ static mca_btl_base_module_t** usnic_component_init(int* num_btl_modules, btls[num_final_modules++] = &(module->super); /* Output all of this module's values. */ - const char *devname = module->fabric_info->fabric_attr->name; + const char *devname = module->linux_device_name; opal_output_verbose(5, USNIC_OUT, - "btl:usnic: %s num sqe=%d, num rqe=%d, num cqe=%d", + "btl:usnic: %s num sqe=%d, num rqe=%d, num cqe=%d, num aveqe=%d", devname, module->sd_num, module->rd_num, - module->cq_num); + module->cq_num, + module->av_eq_num); opal_output_verbose(5, USNIC_OUT, "btl:usnic: %s priority MTU = %" PRIsize_t, devname, @@ -972,7 +1073,7 @@ static mca_btl_base_module_t** usnic_component_init(int* num_btl_modules, } /* start timer to guarantee synthetic clock advances */ - opal_event_set(opal_event_base, &usnic_clock_timer_event, + opal_event_set(opal_sync_event_base, &usnic_clock_timer_event, -1, 0, usnic_clock_callback, &usnic_clock_timeout); usnic_clock_timer_event_set = true; @@ -1087,6 +1188,11 @@ static int usnic_handle_completion( seg = (opal_btl_usnic_segment_t*)completion->op_context; rseg = (opal_btl_usnic_recv_segment_t*)seg; + /* Make the completion be Valgrind-defined */ + opal_memchecker_base_mem_defined(seg, sizeof(*seg)); + + OPAL_THREAD_LOCK(&btl_usnic_lock); + /* Handle work completions */ switch(seg->us_type) { @@ -1094,27 +1200,20 @@ static int usnic_handle_completion( case OPAL_BTL_USNIC_SEG_ACK: opal_btl_usnic_ack_complete(module, (opal_btl_usnic_ack_segment_t *)seg); -{ opal_btl_usnic_send_segment_t *sseg = (opal_btl_usnic_send_segment_t *)seg; -++module->mod_channels[sseg->ss_channel].credits; -} break; - /**** Send of frag segment completion ****/ + /**** Send of frag segment completion (i.e., the MPI message's + one-and-only segment has completed sending) ****/ case OPAL_BTL_USNIC_SEG_FRAG: opal_btl_usnic_frag_send_complete(module, (opal_btl_usnic_frag_segment_t*)seg); -{ opal_btl_usnic_send_segment_t *sseg = (opal_btl_usnic_send_segment_t *)seg; -++module->mod_channels[sseg->ss_channel].credits; -} break; - /**** Send of chunk segment completion ****/ + /**** Send of chunk segment completion (i.e., part of a large MPI + message is done sending) ****/ case OPAL_BTL_USNIC_SEG_CHUNK: opal_btl_usnic_chunk_send_complete(module, (opal_btl_usnic_chunk_segment_t*)seg); -{ opal_btl_usnic_send_segment_t *sseg = (opal_btl_usnic_send_segment_t *)seg; -++module->mod_channels[sseg->ss_channel].credits; -} break; /**** Receive completions ****/ @@ -1126,6 +1225,8 @@ static int usnic_handle_completion( BTL_ERROR(("Unhandled completion segment type %d", seg->us_type)); break; } + + OPAL_THREAD_UNLOCK(&btl_usnic_lock); return 1; } @@ -1139,23 +1240,32 @@ usnic_handle_cq_error(opal_btl_usnic_module_t* module, if (cq_ret != -FI_EAVAIL) { BTL_ERROR(("%s: cq_read ret = %d (%s)", - module->fabric_info->fabric_attr->name, cq_ret, + module->linux_device_name, cq_ret, fi_strerror(-cq_ret))); channel->chan_error = true; } rc = fi_cq_readerr(channel->cq, &err_entry, 0); - if (rc != sizeof(err_entry)) { - BTL_ERROR(("%s: cq_readerr ret = %d", - module->fabric_info->fabric_attr->name, rc)); + if (rc == -FI_EAGAIN) { + return; + } else if (rc != 1) { + BTL_ERROR(("%s: cq_readerr ret = %d (expected 1)", + module->linux_device_name, rc)); channel->chan_error = true; - } else if (err_entry.prov_errno == 1) { + } + + /* Silently count CRC errors. Truncation errors are usually a + different symptom of a CRC error. */ + else if (FI_ECRC == err_entry.prov_errno || + FI_ETRUNC == err_entry.prov_errno) { #if MSGDEBUG1 static int once = 0; if (once++ == 0) { - BTL_ERROR(("%s: Channel %d, CRC error", - module->fabric_info->fabric_attr->name, - channel->chan_index)); + BTL_ERROR(("%s: Channel %d, %s", + module->linux_device_name, + channel->chan_index, + FI_ECRC == err_entry.prov_errno ? + "CRC error" : "message truncation")); } #endif @@ -1171,23 +1281,10 @@ usnic_handle_cq_error(opal_btl_usnic_module_t* module, rseg->rs_next = channel->repost_recv_head; channel->repost_recv_head = rseg; } - } else if (FI_ETRUNC == err_entry.prov_errno) { - /* This error is usually a different symptom of a CRC error */ -#if MSGDEBUG1 - static int once = 0; - if (once++ == 0) { - BTL_ERROR(("%s: Channel %d, message truncation", - module->fabric_info->fabric_attr->name, - channel->chan_index)); - } -#endif - - /* silently count CRC errors */ - ++module->stats.num_crc_errors; } else { BTL_ERROR(("%s: CQ[%d] prov_err = %d", - module->fabric_info->fabric_attr->name, channel->chan_index, - err_entry.prov_errno)); + module->linux_device_name, channel->chan_index, + err_entry.prov_errno)); channel->chan_error = true; } } @@ -1399,7 +1496,7 @@ void opal_btl_usnic_component_debug(void) module = mca_btl_usnic_component.usnic_active_modules[i]; opal_output(0, "active_modules[%d]=%p %s max{frag,chunk,tiny}=%llu,%llu,%llu\n", - i, (void *)module, module->fabric_info->fabric_attr->name, + i, (void *)module, module->linux_device_name, (unsigned long long)module->max_frag_payload, (unsigned long long)module->max_chunk_payload, (unsigned long long)module->max_tiny_payload); diff --git a/opal/mca/btl/usnic/btl_usnic_endpoint.c b/opal/mca/btl/usnic/btl_usnic_endpoint.c index 998e4576c3f..40a1faee1bb 100644 --- a/opal/mca/btl/usnic/btl_usnic_endpoint.c +++ b/opal/mca/btl/usnic/btl_usnic_endpoint.c @@ -14,6 +14,7 @@ * Copyright (c) 2007 The Regents of the University of California. * All rights reserved. * Copyright (c) 2013-2014 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Intel, Inc. All rights reserved * $COPYRIGHT$ * * Additional copyrights may follow @@ -86,6 +87,7 @@ static void endpoint_construct(mca_btl_base_endpoint_t* endpoint) OBJ_CONSTRUCT(&endpoint->endpoint_hotel, opal_hotel_t); opal_hotel_init(&endpoint->endpoint_hotel, WINDOW_SIZE, + opal_sync_event_base, mca_btl_usnic_component.retrans_timeout, 0, opal_btl_usnic_ack_timeout); diff --git a/opal/mca/btl/usnic/btl_usnic_endpoint.h b/opal/mca/btl/usnic/btl_usnic_endpoint.h index c76eee6d95a..faebb9b6d66 100644 --- a/opal/mca/btl/usnic/btl_usnic_endpoint.h +++ b/opal/mca/btl/usnic/btl_usnic_endpoint.h @@ -11,7 +11,7 @@ * All rights reserved. * Copyright (c) 2006 Sandia National Laboratories. All rights * reserved. - * Copyright (c) 2013-2015 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2013-2017 Cisco Systems, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -69,6 +69,7 @@ typedef struct opal_btl_usnic_modex_t { uint32_t ipv4_addr; /* Stored in host order */ uint32_t ports[USNIC_NUM_CHANNELS]; + /* Stored in network order */ uint32_t netmask; /* Stored in host order */ uint32_t connectivity_udp_port; @@ -159,6 +160,8 @@ typedef struct mca_btl_base_endpoint_t { opal_btl_usnic_seq_t endpoint_next_seq_to_send; /* n_t */ opal_btl_usnic_seq_t endpoint_ack_seq_rcvd; /* n_a */ + /* Table where sent segments sit while waiting for their ACKs. + When a segment is ACKed, it is removed from this table. */ struct opal_btl_usnic_send_segment_t *endpoint_sent_segs[WINDOW_SIZE]; /* Values for the current proc to receive from this endpoint on diff --git a/opal/mca/btl/usnic/btl_usnic_frag.c b/opal/mca/btl/usnic/btl_usnic_frag.c index 5944e02cbad..6a8129a3f7e 100644 --- a/opal/mca/btl/usnic/btl_usnic_frag.c +++ b/opal/mca/btl/usnic/btl_usnic_frag.c @@ -11,7 +11,7 @@ * All rights reserved. * Copyright (c) 2006 Sandia National Laboratories. All rights * reserved. - * Copyright (c) 2013-2015 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2013-2017 Cisco Systems, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -30,36 +30,35 @@ #include "btl_usnic_ack.h" static void -common_send_seg_helper( - opal_btl_usnic_send_segment_t *seg, - int offset) +common_send_seg_helper(opal_btl_usnic_send_segment_t *seg) { opal_btl_usnic_segment_t *bseg; - bseg = &seg->ss_base; - - bseg->us_btl_header = (opal_btl_usnic_btl_header_t *) - (((char*) bseg->us_list.ptr) + offset); - bseg->us_btl_header->sender = mca_btl_usnic_component.my_hashed_rte_name; - + /* send ptr for fi_send(). ss_len will be filled in right before + the actual send. */ + seg->ss_ptr = (uint8_t *) seg->ss_base.us_list.ptr; seg->ss_send_posted = 0; seg->ss_ack_pending = false; - /* send ptr, len will be filled in just before send */ - seg->ss_ptr = (uint8_t *)bseg->us_btl_header; + /* Offset the BTL header by (prefix_send_offset) bytes into the + raw buffer */ + bseg = &seg->ss_base; + bseg->us_btl_header = (opal_btl_usnic_btl_header_t *) + (seg->ss_ptr + mca_btl_usnic_component.prefix_send_offset); + bseg->us_btl_header->sender = mca_btl_usnic_component.my_hashed_rte_name; } static void chunk_seg_constructor( - opal_btl_usnic_send_segment_t *seg) + opal_btl_usnic_chunk_segment_t *cseg) { opal_btl_usnic_segment_t *bseg; - bseg = &seg->ss_base; + bseg = &cseg->ss_base; bseg->us_type = OPAL_BTL_USNIC_SEG_CHUNK; /* some more common initializaiton */ - common_send_seg_helper(seg, mca_btl_usnic_component.transport_header_len); + common_send_seg_helper(cseg); /* payload starts next byte beyond BTL chunk header */ bseg->us_payload.raw = (uint8_t *)(bseg->us_btl_chunk_header + 1); @@ -69,15 +68,15 @@ chunk_seg_constructor( static void frag_seg_constructor( - opal_btl_usnic_send_segment_t *seg) + opal_btl_usnic_frag_segment_t *fseg) { opal_btl_usnic_segment_t *bseg; - bseg = &seg->ss_base; + bseg = &fseg->ss_base; bseg->us_type = OPAL_BTL_USNIC_SEG_FRAG; /* some more common initializaiton */ - common_send_seg_helper(seg, mca_btl_usnic_component.transport_header_len); + common_send_seg_helper(fseg); /* payload starts next byte beyond BTL header */ bseg->us_payload.raw = (uint8_t *)(bseg->us_btl_header + 1); @@ -87,7 +86,7 @@ frag_seg_constructor( static void ack_seg_constructor( - opal_btl_usnic_send_segment_t *ack) + opal_btl_usnic_ack_segment_t *ack) { opal_btl_usnic_segment_t *bseg; @@ -95,7 +94,7 @@ ack_seg_constructor( bseg->us_type = OPAL_BTL_USNIC_SEG_ACK; /* some more common initializaiton */ - common_send_seg_helper(ack, mca_btl_usnic_component.transport_header_len); + common_send_seg_helper(ack); /* ACK value embedded in BTL header */ bseg->us_btl_header->payload_type = OPAL_BTL_USNIC_PAYLOAD_TYPE_ACK; @@ -176,12 +175,13 @@ send_frag_constructor(opal_btl_usnic_send_frag_t *frag) static void send_frag_destructor(opal_btl_usnic_send_frag_t *frag) { - mca_btl_base_descriptor_t *desc; - +#if OPAL_ENABLE_DEBUG /* make sure nobody twiddled these values after the constructor */ + mca_btl_base_descriptor_t *desc; desc = &frag->sf_base.uf_base; assert(desc->USNIC_SEND_LOCAL == frag->sf_base.uf_local_seg); assert(0 == frag->sf_base.uf_local_seg[0].seg_len); +#endif /* PML may change desc->des_remote to point elsewhere, cannot assert that it * still points to our embedded segment */ diff --git a/opal/mca/btl/usnic/btl_usnic_frag.h b/opal/mca/btl/usnic/btl_usnic_frag.h index c1f67037bf2..771a031ca17 100644 --- a/opal/mca/btl/usnic/btl_usnic_frag.h +++ b/opal/mca/btl/usnic/btl_usnic_frag.h @@ -11,7 +11,7 @@ * All rights reserved. * Copyright (c) 2006 Sandia National Laboratories. All rights * reserved. - * Copyright (c) 2013-2015 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2013-2017 Cisco Systems, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -370,6 +370,7 @@ opal_btl_usnic_small_send_frag_alloc(opal_btl_usnic_module_t *module) /* this belongs in constructor... */ frag->ssf_base.sf_base.uf_freelist = &(module->small_send_frags); + frag->ssf_segment.ss_send_posted = 0; assert(frag); assert(OPAL_BTL_USNIC_FRAG_SMALL_SEND == frag->ssf_base.sf_base.uf_type); @@ -480,6 +481,14 @@ opal_btl_usnic_frag_return( } } + /* Reset the "send_posted" flag on the embedded segment for small + fragments */ + else if (frag->uf_type == OPAL_BTL_USNIC_FRAG_SMALL_SEND) { + opal_btl_usnic_small_send_frag_t *sfrag; + sfrag = (opal_btl_usnic_small_send_frag_t *) frag; + sfrag->ssf_segment.ss_send_posted = 0; + } + USNIC_COMPAT_FREE_LIST_RETURN(frag->uf_freelist, &(frag->uf_base.super)); } diff --git a/opal/mca/btl/usnic/btl_usnic_hwloc.c b/opal/mca/btl/usnic/btl_usnic_hwloc.c index 79dac73892a..75ca62a04df 100644 --- a/opal/mca/btl/usnic/btl_usnic_hwloc.c +++ b/opal/mca/btl/usnic/btl_usnic_hwloc.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2013-2015 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2013-2016 Cisco Systems, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -7,21 +7,9 @@ * $HEADER$ */ -/* - * This file is only compiled (via AM_CONDITIONAL) if OPAL_HAVE_HWLOC - * is set. - */ - #include "opal_config.h" -/* Define this before including hwloc.h so that we also get the hwloc - verbs helper header file, too. We have to do this level of - indirection because the hwloc subsystem is a component -- we don't - know its exact path. We have to rely on the framework header files - to find the right hwloc verbs helper file for us. */ -#define OPAL_HWLOC_WANT_VERBS_HELPER 1 -#include "opal/mca/hwloc/hwloc.h" - +#include "opal/mca/hwloc/hwloc-internal.h" #include "opal/constants.h" #if BTL_IN_OPAL @@ -174,7 +162,7 @@ static hwloc_obj_t find_device_numa(opal_btl_usnic_module_t *module) if (obj->type != HWLOC_OBJ_NODE) { opal_output_verbose(5, USNIC_OUT, "btl:usnic:filter_numa: could not find NUMA node for %s; filtering by NUMA distance not possible", - module->fabric_info->fabric_attr->name); + module->linux_device_name); return NULL; } @@ -230,7 +218,7 @@ int opal_btl_usnic_hwloc_distance(opal_btl_usnic_module_t *module) opal_output_verbose(5, USNIC_OUT, "btl:usnic:filter_numa: %s is distance %d from me", - module->fabric_info->fabric_attr->name, + module->linux_device_name, module->numa_distance); } diff --git a/opal/mca/btl/usnic/btl_usnic_hwloc.h b/opal/mca/btl/usnic/btl_usnic_hwloc.h index 4dc5b3689d5..afb31504198 100644 --- a/opal/mca/btl/usnic/btl_usnic_hwloc.h +++ b/opal/mca/btl/usnic/btl_usnic_hwloc.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2013 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2013-2015 Cisco Systems, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow diff --git a/opal/mca/btl/usnic/btl_usnic_map.c b/opal/mca/btl/usnic/btl_usnic_map.c index ce2aca6abea..c9cbd8a83c2 100644 --- a/opal/mca/btl/usnic/btl_usnic_map.c +++ b/opal/mca/btl/usnic/btl_usnic_map.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2013-2015 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2013-2016 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2014 Intel, Inc. All rights reserved * $COPYRIGHT$ * @@ -30,8 +30,8 @@ static int map_compare_modules(const void *aa, const void *bb) opal_btl_usnic_module_t *a = *((opal_btl_usnic_module_t**) aa); opal_btl_usnic_module_t *b = *((opal_btl_usnic_module_t**) bb); - return strcmp(a->fabric_info->fabric_attr->name, - b->fabric_info->fabric_attr->name); + return strcmp(a->linux_device_name, + b->linux_device_name); } /* @@ -74,7 +74,7 @@ static int map_output_modules(FILE *fp) prefix_len); fprintf(fp, "device=%s,ip=%s,mss=%" PRIsize_t "\n", - modules[i]->fabric_info->fabric_attr->name, + modules[i]->linux_device_name, ipv4, modules[i]->fabric_info->ep_attr->max_msg_size); } @@ -102,8 +102,8 @@ static int map_compare_endpoints(const void *aa, const void *bb) return -1; } - return strcmp(a->endpoint_module->fabric_info->fabric_attr->name, - b->endpoint_module->fabric_info->fabric_attr->name); + return strcmp(a->endpoint_module->linux_device_name, + b->endpoint_module->linux_device_name); } /* @@ -148,7 +148,7 @@ static int map_output_endpoints(FILE *fp, opal_btl_usnic_proc_t *proc) eps[i]->endpoint_remote_modex.netmask); fprintf(fp, "device=%s@peer_ip=%s", - eps[i]->endpoint_module->fabric_info->fabric_attr->name, + eps[i]->endpoint_module->linux_device_name, ipv4); ++num_output; } diff --git a/opal/mca/btl/usnic/btl_usnic_mca.c b/opal/mca/btl/usnic/btl_usnic_mca.c index 087951c235d..84f987cf22c 100644 --- a/opal/mca/btl/usnic/btl_usnic_mca.c +++ b/opal/mca/btl/usnic/btl_usnic_mca.c @@ -1,3 +1,4 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ /* * Copyright (c) 2004-2008 The Trustees of Indiana University and Indiana * University Research and Technology @@ -11,9 +12,10 @@ * All rights reserved. * Copyright (c) 2006 Sandia National Laboratories. All rights * reserved. - * Copyright (c) 2008-2015 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2012 Los Alamos National Security, LLC. All rights + * Copyright (c) 2008-2016 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2012-2016 Los Alamos National Security, LLC. All rights * reserved. + * Copyright (c) 2015 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -23,9 +25,7 @@ #include "opal_config.h" -#ifdef HAVE_STRING_H #include -#endif #include #include "opal/mca/base/mca_base_var.h" @@ -163,6 +163,7 @@ int opal_btl_usnic_component_register(void) static int prio_sd_num; static int prio_rd_num; static int cq_num; + static int av_eq_num; static int udp_port_base; static int max_tiny_msg_size; static int eager_limit; @@ -203,9 +204,20 @@ int opal_btl_usnic_component_register(void) 0, &stats_relative, 0, OPAL_INFO_LVL_4)); mca_btl_usnic_component.stats_relative = (bool) stats_relative; +#if RCACHE_VERSION == 30 + CHECK(reg_string("mpool_hints", "Hints to use when selecting mpool", + NULL, &mca_btl_usnic_component.usnic_mpool_hints, + REGSTR_EMPTY_OK, + OPAL_INFO_LVL_5)); + + CHECK(reg_string("rcache", "Name of the registration cache to be used", + "grdma", &mca_btl_usnic_component.usnic_rcache_name, 0, + OPAL_INFO_LVL_5)); +#else CHECK(reg_string("mpool", "Name of the memory pool to be used", "grdma", &mca_btl_usnic_component.usnic_mpool_name, 0, OPAL_INFO_LVL_5)); +#endif want_numa_device_assignment = OPAL_HAVE_HWLOC ? 1 : -1; CHECK(reg_int("want_numa_device_assignment", @@ -236,12 +248,16 @@ int opal_btl_usnic_component_register(void) -1, &cq_num, REGINT_NEG_ONE_OK, OPAL_INFO_LVL_5)); mca_btl_usnic_component.cq_num = (int32_t) cq_num; + CHECK(reg_int("av_eq_num", "Number of event queue entries for peer address resolution", + 1024, &av_eq_num, REGINT_GE_ONE, OPAL_INFO_LVL_5)); + mca_btl_usnic_component.av_eq_num = (int32_t) av_eq_num; + CHECK(reg_int("base_udp_port", "Base UDP port to use for usNIC communications. If 0, system will pick the port number. If non-zero, it will be added to each process' local rank to obtain the final port number (default: 0)", 0, &udp_port_base, REGINT_GE_ZERO, OPAL_INFO_LVL_5)); mca_btl_usnic_component.udp_port_base = (int) udp_port_base; CHECK(reg_int("retrans_timeout", "Number of microseconds before retransmitting a frame", - 1000, &mca_btl_usnic_component.retrans_timeout, + 5000, &mca_btl_usnic_component.retrans_timeout, REGINT_GE_ONE, OPAL_INFO_LVL_5)); CHECK(reg_int("priority_limit", "Max size of \"priority\" messages (0 = use pre-set defaults; depends on number and type of devices available)", diff --git a/opal/mca/btl/usnic/btl_usnic_module.c b/opal/mca/btl/usnic/btl_usnic_module.c index 2b36af126bd..efad1ed2b7c 100644 --- a/opal/mca/btl/usnic/btl_usnic_module.c +++ b/opal/mca/btl/usnic/btl_usnic_module.c @@ -12,8 +12,8 @@ * All rights reserved. * Copyright (c) 2006 Sandia National Laboratories. All rights * reserved. - * Copyright (c) 2009-2015 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2014 Los Alamos National Security, LLC. All rights + * Copyright (c) 2009-2017 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2014-2016 Los Alamos National Security, LLC. All rights * reserved. * Copyright (c) 2014 Intel, Inc. All rights reserved * $COPYRIGHT$ @@ -44,6 +44,8 @@ #include "opal/mca/btl/base/btl_base_error.h" #include "opal/mca/mpool/base/base.h" #include "opal/mca/mpool/mpool.h" +#include "opal/mca/rcache/base/base.h" +#include "opal/mca/rcache/rcache.h" #else #include "ompi/mca/btl/btl.h" #include "ompi/mca/btl/base/btl_base_error.h" @@ -67,15 +69,40 @@ static void finalize_one_channel(opal_btl_usnic_module_t *module, struct opal_btl_usnic_channel_t *channel); +static int channel_addr2str(opal_btl_usnic_module_t *module, int channel, + char *str, size_t len_param) +{ + size_t len; + + len = len_param; + fi_av_straddr(module->av, module->mod_channels[channel].info->src_addr, + str, &len); + if (len > len_param) { + opal_show_help("help-mpi-btl-usnic.txt", + "libfabric API failed", + true, + opal_process_info.nodename, + module->linux_device_name, + "fi_av_straddr", __FILE__, __LINE__, + FI_ENODATA, + "Failed to convert address to string: buffer too short"); + + return OPAL_ERR_OUT_OF_RESOURCE; + } + + return OPAL_SUCCESS; +} + /* - * Loop over all procs sent to us in add_procs and see if we want to - * add a proc/endpoint for them. + * Loop over a block of procs sent to us in add_procs and see if we + * want to add a proc/endpoint for them. */ -static int add_procs_create_endpoints(opal_btl_usnic_module_t *module, - size_t nprocs, - opal_proc_t **procs, - mca_btl_base_endpoint_t **endpoints) +static int add_procs_block_create_endpoints(opal_btl_usnic_module_t *module, + size_t block_offset, + size_t block_len, + opal_proc_t **procs, + mca_btl_base_endpoint_t **endpoints) { int rc; opal_proc_t* my_proc; @@ -87,8 +114,8 @@ static int add_procs_create_endpoints(opal_btl_usnic_module_t *module, return OPAL_ERR_OUT_OF_RESOURCE; } - /* Loop over the procs we were given */ - for (size_t i = 0; i < nprocs; i++) { + /* Loop over a block in the procs we were given */ + for (size_t i = block_offset; i < (block_offset + block_len); i++) { struct opal_proc_t* opal_proc = procs[i]; opal_btl_usnic_proc_t* usnic_proc; mca_btl_base_endpoint_t* usnic_endpoint; @@ -97,11 +124,18 @@ static int add_procs_create_endpoints(opal_btl_usnic_module_t *module, /* Do not create loopback usnic connections */ if (opal_proc == my_proc) { + opal_output_verbose(75, USNIC_OUT, + "btl:usnic:add_procs:%s: not connecting to self", + module->linux_device_name); continue; } /* usNIC does not support loopback to the same machine */ if (OPAL_PROC_ON_LOCAL_NODE(opal_proc->proc_flags)) { + opal_output_verbose(75, USNIC_OUT, + "btl:usnic:add_procs:%s: not connecting to %s on same server", + module->linux_device_name, + usnic_compat_proc_name_print(&opal_proc->proc_name)); continue; } @@ -114,6 +148,11 @@ static int add_procs_create_endpoints(opal_btl_usnic_module_t *module, if (OPAL_ERR_UNREACH == rc) { /* If the peer doesn't have usnic modex info, then we just skip it */ + opal_output_verbose(75, USNIC_OUT, + "btl:usnic:add_procs:%s: peer %s on %s does not have usnic modex info; skipping", + module->linux_device_name, + usnic_compat_proc_name_print(&opal_proc->proc_name), + opal_get_proc_hostname(opal_proc)); continue; } else if (OPAL_SUCCESS != rc) { return OPAL_ERR_OUT_OF_RESOURCE; @@ -126,8 +165,10 @@ static int add_procs_create_endpoints(opal_btl_usnic_module_t *module, &usnic_endpoint); if (OPAL_SUCCESS != rc) { opal_output_verbose(5, USNIC_OUT, - "btl:usnic:%s: unable to create endpoint for module=%p proc=%p\n", - __func__, (void *)module, (void *)usnic_proc); + "btl:usnic:add_procs:%s: unable to create endpoint to peer %s on %s", + module->linux_device_name, + usnic_compat_proc_name_print(&opal_proc->proc_name), + opal_get_proc_hostname(opal_proc)); OBJ_RELEASE(usnic_proc); continue; } @@ -142,11 +183,29 @@ static int add_procs_create_endpoints(opal_btl_usnic_module_t *module, modex->ipv4_addr, modex->netmask); + char local_pri_addr[64] = {0}; + rc = channel_addr2str(module, USNIC_PRIORITY_CHANNEL, + local_pri_addr, sizeof(local_pri_addr)); + if (OPAL_SUCCESS != rc) { + OBJ_RELEASE(usnic_proc); + continue; + } + + char local_data_addr[64] = {0}; + rc = channel_addr2str(module, USNIC_DATA_CHANNEL, + local_data_addr, sizeof(local_data_addr)); + if (OPAL_SUCCESS != rc) { + OBJ_RELEASE(usnic_proc); + continue; + } + opal_output_verbose(5, USNIC_OUT, - "btl:usnic: new usnic peer endpoint: %s, proirity port %d, data port %d", - str, - modex->ports[USNIC_PRIORITY_CHANNEL], - modex->ports[USNIC_DATA_CHANNEL]); + "btl:usnic:add_procs:%s: new usnic peer endpoint: pri=%s:%d, data=%s:%d (local: pri=%s, data=%s)", + module->linux_device_name, + str, modex->ports[USNIC_PRIORITY_CHANNEL], + str, modex->ports[USNIC_DATA_CHANNEL], + local_pri_addr, + local_data_addr); endpoints[i] = usnic_endpoint; ++num_created; @@ -179,14 +238,14 @@ static void add_procs_warn_unreachable(opal_btl_usnic_module_t *module, opal_output_verbose(15, USNIC_OUT, "btl:usnic: %s (which is %s) couldn't reach peer %s", - module->fabric_info->fabric_attr->name, + module->linux_device_name, module->if_ipv4_addr_str, remote); opal_show_help("help-mpi-btl-usnic.txt", "unreachable peer IP", true, opal_process_info.nodename, module->if_ipv4_addr_str, - module->fabric_info->fabric_attr->name, + module->linux_device_name, opal_get_proc_hostname(endpoint->endpoint_proc->proc_opal), remote); } @@ -195,9 +254,10 @@ static void add_procs_warn_unreachable(opal_btl_usnic_module_t *module, * invoked. Go reap them all. */ static int -add_procs_reap_fi_av_inserts(opal_btl_usnic_module_t *module, - size_t array_len, - struct mca_btl_base_endpoint_t **endpoints) +add_procs_block_reap_fi_av_inserts(opal_btl_usnic_module_t *module, + size_t block_offset, + size_t block_len, + struct mca_btl_base_endpoint_t **endpoints) { int ret = OPAL_SUCCESS; int num_left; @@ -205,12 +265,11 @@ add_procs_reap_fi_av_inserts(opal_btl_usnic_module_t *module, uint32_t event; struct fi_eq_entry entry; struct fi_eq_err_entry err_entry; - bool error_occurred = false; /* compute num fi_av_insert completions we are waiting for */ num_left = 0; - for (i = 0; i < array_len; ++i) { + for (i = block_offset; i < (block_offset + block_len); ++i) { if (NULL != endpoints[i]) { num_left += USNIC_NUM_CHANNELS; } @@ -266,7 +325,7 @@ add_procs_reap_fi_av_inserts(opal_btl_usnic_module_t *module, We therefore only want to print a pretty warning about (and OBJ_RELEASE) that endpoint the *first* time it is reported. */ - for (i = 0; i < array_len; ++i) { + for (i = block_offset; i < (block_offset + block_len); ++i) { if (endpoints[i] == context->endpoint) { add_procs_warn_unreachable(module, context->endpoint); @@ -285,7 +344,7 @@ add_procs_reap_fi_av_inserts(opal_btl_usnic_module_t *module, "libfabric API failed", true, opal_process_info.nodename, - module->fabric_info->fabric_attr->name, + module->linux_device_name, "async insertion result", __FILE__, __LINE__, err_entry.err, "Failed to insert address to AV"); @@ -309,7 +368,7 @@ add_procs_reap_fi_av_inserts(opal_btl_usnic_module_t *module, "internal error during init", true, opal_process_info.nodename, - module->fabric_info->fabric_attr->name, + module->linux_device_name, "fi_eq_readerr()", __FILE__, __LINE__, ret, "Returned != sizeof(err_entry)"); @@ -330,7 +389,7 @@ add_procs_reap_fi_av_inserts(opal_btl_usnic_module_t *module, "internal error during init", true, opal_process_info.nodename, - module->fabric_info->fabric_attr->name, + module->linux_device_name, "fi_eq_sread()", __FILE__, __LINE__, ret, "Returned != (sizeof(entry) or -FI_EAVAIL)"); @@ -348,7 +407,7 @@ add_procs_reap_fi_av_inserts(opal_btl_usnic_module_t *module, - If an otherwise-valid endpoint has no dest, that means we timed out trying to resolve it, so just release that endpoint. */ size_t num_endpoints_created = 0; - for (i = 0; i < array_len; i++) { + for (i = block_offset; i < (block_offset + block_len); i++) { if (NULL != endpoints[i]) { bool happy; @@ -382,6 +441,79 @@ add_procs_reap_fi_av_inserts(opal_btl_usnic_module_t *module, return ret; } +/* + * Create endpoints for the procs we were given in add_procs. + */ +static int add_procs_create_endpoints(struct opal_btl_usnic_module_t* module, + size_t nprocs, + struct opal_proc_t **procs, + struct mca_btl_base_endpoint_t** endpoints) +{ + /* We need to ensure that we don't overrun the libfabric AV EQ. + Divide up all the peer address resolutions we need to do into a + series of blocks; insert and complete each block before moving + to the next (note: if performance mandates it, we can move to a + sliding window style of AV inserts to get better concurrency of + AV resolution). */ + + /* Leave a few empty slots in the AV EQ, just for good measure */ + if (module->av_eq_size < 8) { + opal_show_help("help-mpi-btl-usnic.txt", "fi_av_eq too small", + true, + opal_process_info.nodename, + module->av_eq_size, + 8); + return OPAL_ERR_OUT_OF_RESOURCE; + } + + size_t eq_size = module->av_eq_size - 8; + size_t block_len = eq_size; + size_t num_av_inserts = nprocs * USNIC_NUM_CHANNELS; + size_t num_blocks = num_av_inserts / block_len; + if (num_av_inserts % block_len != 0) { + ++num_blocks; + } + + /* Per above, the blocks are expressed in terms of number of AV + inserts. Convert them to be expressed in terms of number of + procs. */ + block_len /= USNIC_NUM_CHANNELS; + + /* Per above, loop over creating the endpoints so that we do not + overrun the libfabric AV EQ. */ + int rc; + for (size_t block_offset = 0, block = 0; block < num_blocks; + block_offset += block_len, ++block) { + /* Adjust for the last block */ + if (block_len > (nprocs - block_offset)) { + block_len = nprocs - block_offset; + } + + /* First, create endpoints (and procs, if they're not already + created) for the usnic-reachable procs we were given. */ + rc = add_procs_block_create_endpoints(module, + block_offset, block_len, + procs, endpoints); + if (OPAL_SUCCESS != rc) { + return rc; + } + + /* For each endpoint that was created, we initiated the + process to create NUM_CHANNELS fi_addrs. Go finish all of + those. This will be the final determination of whether we + can use the endpoint or not because we'll find out if each + endpoint is reachable or not. */ + rc = add_procs_block_reap_fi_av_inserts(module, + block_offset, block_len, + endpoints); + if (OPAL_SUCCESS != rc) { + return rc; + } + } + + return OPAL_SUCCESS; +} + /* * Add procs to this BTL module, receiving endpoint information from * the modex. This is done in 2 phases: @@ -408,26 +540,16 @@ static int usnic_add_procs(struct mca_btl_base_module_t* base_module, opal_btl_usnic_module_t* module = (opal_btl_usnic_module_t*) base_module; int rc; - /* First, create endpoints (and procs, if they're not already - created) for all the usnic-reachable procs we were given. */ + /* Go create the endpoints (including all relevant address + resolution) */ rc = add_procs_create_endpoints(module, nprocs, procs, endpoints); if (OPAL_SUCCESS != rc) { goto fail; } - /* For each endpoint that was created, we initiated the process to - create NUM_CHANNELS fi_addrs. Go finish all of those. This - will be the final determination of whether we can use the - endpoint or not because we'll find out if each endpoint is - reachable or not. */ - rc = add_procs_reap_fi_av_inserts(module, nprocs, endpoints); - if (OPAL_SUCCESS != rc) { - goto fail; - } - /* Find all the endpoints with a complete set of USD destinations and mark them as reachable */ - for (size_t i = 0; i < nprocs; ++i) { + for (size_t i = 0; NULL != reachable && i < nprocs; ++i) { if (NULL != endpoints[i]) { bool happy = true; for (int channel = 0; channel < USNIC_NUM_CHANNELS; ++channel) { @@ -805,7 +927,11 @@ static int usnic_finalize(struct mca_btl_base_module_t* btl) OBJ_DESTRUCT(&module->chunk_segs); OBJ_DESTRUCT(&module->senders); +#if RCACHE_VERSION == 30 + mca_rcache_base_module_destroy(module->rcache); +#else mca_mpool_base_module_destroy(module->super.btl_mpool); +#endif if (NULL != module->av) { fi_close(&module->av->fid); @@ -819,6 +945,8 @@ static int usnic_finalize(struct mca_btl_base_module_t* btl) fi_close(&module->domain->fid); fi_close(&module->fabric->fid); + free(module->linux_device_name); + return OPAL_SUCCESS; } @@ -859,7 +987,7 @@ usnic_do_resends( /* resends are always standard segments */ sseg->ss_channel = USNIC_DATA_CHANNEL; - /* re-send the segment */ + /* re-send the segment (we have a send credit available) */ opal_btl_usnic_post_segment(module, endpoint, sseg); /* consume a send credit for this endpoint. May send us @@ -889,6 +1017,9 @@ usnic_do_resends( * endpoint_send_segment() it. Takes care of subsequent frag * cleanup/bookkeeping (dequeue, descriptor callback, etc.) if this frag was * completed by this segment. + * + * ASSUMES THAT THE CALLER HAS ALREADY CHECKED TO SEE IF WE HAVE + * A SEND CREDIT! */ static void usnic_handle_large_send( @@ -942,7 +1073,8 @@ usnic_handle_large_send( /* payload length into the header*/ sseg->ss_base.us_btl_header->payload_len = payload_len; - /* do the send */ + // We assume that the caller has checked to see that we have a + // send credit, so do the send. opal_btl_usnic_endpoint_send_segment(module, sseg); /* do fragment bookkeeping */ @@ -1001,6 +1133,7 @@ opal_btl_usnic_module_progress_sends( /* * Handle all the retransmits we can */ + OPAL_THREAD_LOCK(&btl_usnic_lock); if (OPAL_UNLIKELY(!opal_list_is_empty(&module->pending_resend_segs))) { usnic_do_resends(module); } @@ -1053,7 +1186,7 @@ opal_btl_usnic_module_progress_sends( sseg->ss_base.us_btl_header->tag); #endif - /* post the send */ + /* post the send (we have a send credit available) */ opal_btl_usnic_endpoint_send_segment(module, sseg); /* don't do callback yet if this is a put */ @@ -1104,12 +1237,18 @@ opal_btl_usnic_module_progress_sends( /* Is it time to send ACK? */ if (endpoint->endpoint_acktime == 0 || endpoint->endpoint_acktime <= get_nsec()) { - opal_btl_usnic_ack_send(module, endpoint); - opal_btl_usnic_remove_from_endpoints_needing_ack(endpoint); + if (OPAL_LIKELY(opal_btl_usnic_ack_send(module, endpoint) == OPAL_SUCCESS)) { + opal_btl_usnic_remove_from_endpoints_needing_ack(endpoint); + } else { + // If we fail, it means we're out of send credits on + // the ACK channel + break; + } } endpoint = next_endpoint; } + OPAL_THREAD_UNLOCK(&btl_usnic_lock); } /* @@ -1144,6 +1283,7 @@ usnic_send( opal_btl_usnic_module_t *module; opal_btl_usnic_send_segment_t *sseg; + OPAL_THREAD_LOCK(&btl_usnic_lock); endpoint = (opal_btl_usnic_endpoint_t *)base_endpoint; module = (opal_btl_usnic_module_t *)base_module; frag = (opal_btl_usnic_send_frag_t*) descriptor; @@ -1178,8 +1318,8 @@ usnic_send( if (frag->sf_base.uf_type == OPAL_BTL_USNIC_FRAG_SMALL_SEND && frag->sf_ack_bytes_left < module->max_tiny_payload && WINDOW_OPEN(endpoint) && - (get_send_credits(&module->mod_channels[USNIC_PRIORITY_CHANNEL]) >= - module->mod_channels[USNIC_PRIORITY_CHANNEL].fastsend_wqe_thresh)) { + (get_send_credits(&module->mod_channels[USNIC_DATA_CHANNEL]) >= + module->mod_channels[USNIC_DATA_CHANNEL].fastsend_wqe_thresh)) { size_t payload_len; sfrag = (opal_btl_usnic_small_send_frag_t *)frag; @@ -1205,13 +1345,13 @@ usnic_send( /* assign length */ sseg->ss_len = sizeof(opal_btl_usnic_btl_header_t) + frag->sf_size; - sseg->ss_channel = USNIC_PRIORITY_CHANNEL; + sseg->ss_channel = USNIC_DATA_CHANNEL; sseg->ss_base.us_btl_header->tag = tag; #if MSGDEBUG1 opal_output(0, "INLINE send, sseg=%p", (void *)sseg); #endif - /* post the segment now */ + /* post the segment now (we have a send credit available) */ opal_btl_usnic_endpoint_send_segment(module, sseg); /* If we own the frag and callback was requested, callback now, @@ -1252,6 +1392,7 @@ usnic_send( ++module->stats.pml_module_sends; + OPAL_THREAD_UNLOCK(&btl_usnic_lock); return rc; } @@ -1334,7 +1475,7 @@ static void module_async_event_callback(int fd, short flags, void *arg) opal_show_help("help-mpi-btl-usnic.txt", "libfabric API failed", true, opal_process_info.nodename, - module->fabric_info->fabric_attr->name, + module->linux_device_name, "fi_eq_read()", __FILE__, __LINE__, ret, "Failed to get domain event"); @@ -1353,7 +1494,7 @@ static void module_async_event_callback(int fd, short flags, void *arg) ignore it. */ opal_output_verbose(10, USNIC_OUT, "btl:usnic: got LINK_UP on %s", - module->fabric_info->fabric_attr->name); + module->linux_device_name); break; case 1: // USD_EVENT_LINK_DOWN: @@ -1372,7 +1513,7 @@ static void module_async_event_callback(int fd, short flags, void *arg) opal_show_help("help-mpi-btl-usnic.txt", "async event", true, opal_process_info.nodename, - module->fabric_info->fabric_attr->name, + module->linux_device_name, str, entry.data); fatal = true; } @@ -1403,7 +1544,7 @@ static int create_ep(opal_btl_usnic_module_t* module, "internal error during init", true, opal_process_info.nodename, - module->fabric_info->fabric_attr->name, + module->linux_device_name, "fi_dupinfo() failed", __FILE__, __LINE__, -1, "Unknown"); return OPAL_ERR_OUT_OF_RESOURCE; @@ -1421,14 +1562,14 @@ static int create_ep(opal_btl_usnic_module_t* module, opal_process_info.my_local_rank); } - rc = fi_getinfo(FI_VERSION(1, 0), NULL, 0, 0, hint, &channel->info); + rc = fi_getinfo(module->libfabric_api, NULL, 0, 0, hint, &channel->info); fi_freeinfo(hint); if (0 != rc) { opal_show_help("help-mpi-btl-usnic.txt", "internal error during init", true, opal_process_info.nodename, - module->fabric_info->fabric_attr->name, + module->linux_device_name, "fi_getinfo() failed", __FILE__, __LINE__, rc, fi_strerror(-rc)); return OPAL_ERR_OUT_OF_RESOURCE; @@ -1450,12 +1591,13 @@ static int create_ep(opal_btl_usnic_module_t* module, sa = (struct sockaddr *)channel->info->src_addr; assert(AF_INET == sa->sa_family); } +#endif + sin = (struct sockaddr_in *)channel->info->src_addr; assert(sizeof(struct sockaddr_in) == channel->info->src_addrlen); /* no matter the version of libfabric, this should hold */ assert(0 == sin->sin_port); -#endif rc = fi_endpoint(module->domain, channel->info, &channel->ep, NULL); if (0 != rc || NULL == channel->ep) { @@ -1463,12 +1605,37 @@ static int create_ep(opal_btl_usnic_module_t* module, "internal error during init", true, opal_process_info.nodename, - module->fabric_info->fabric_attr->name, + module->linux_device_name, "fi_endpoint() failed", __FILE__, __LINE__, rc, fi_strerror(-rc)); return OPAL_ERR_OUT_OF_RESOURCE; } + /* Check to ensure that the RX/TX queue lengths are at least as + long as we asked for */ + if ((int) channel->info->rx_attr->size < channel->chan_rd_num) { + rc = FI_ETOOSMALL; + opal_show_help("help-mpi-btl-usnic.txt", + "internal error during init", + true, + opal_process_info.nodename, + module->linux_device_name, + "endpoint RX queue length is too short", __FILE__, __LINE__, + rc, fi_strerror(rc)); + return OPAL_ERR_OUT_OF_RESOURCE; + } + if ((int) channel->info->tx_attr->size < channel->chan_sd_num) { + rc = FI_ETOOSMALL; + opal_show_help("help-mpi-btl-usnic.txt", + "internal error during init", + true, + opal_process_info.nodename, + module->linux_device_name, + "endpoint TX queue length is too short", __FILE__, __LINE__, + rc, fi_strerror(rc)); + return OPAL_ERR_OUT_OF_RESOURCE; + } + /* attach CQ to EP */ rc = fi_ep_bind(channel->ep, &channel->cq->fid, FI_SEND); if (0 != rc) { @@ -1476,7 +1643,7 @@ static int create_ep(opal_btl_usnic_module_t* module, "internal error during init", true, opal_process_info.nodename, - module->fabric_info->fabric_attr->name, + module->linux_device_name, "fi_ep_bind() SCQ to EP failed", __FILE__, __LINE__, rc, fi_strerror(-rc)); return OPAL_ERR_OUT_OF_RESOURCE; @@ -1487,7 +1654,7 @@ static int create_ep(opal_btl_usnic_module_t* module, "internal error during init", true, opal_process_info.nodename, - module->fabric_info->fabric_attr->name, + module->linux_device_name, "fi_ep_bind() RCQ to EP failed", __FILE__, __LINE__, rc, fi_strerror(-rc)); return OPAL_ERR_OUT_OF_RESOURCE; @@ -1498,7 +1665,7 @@ static int create_ep(opal_btl_usnic_module_t* module, "internal error during init", true, opal_process_info.nodename, - module->fabric_info->fabric_attr->name, + module->linux_device_name, "fi_ep_bind() AV to EP failed", __FILE__, __LINE__, rc, fi_strerror(-rc)); return OPAL_ERR_OUT_OF_RESOURCE; @@ -1511,7 +1678,7 @@ static int create_ep(opal_btl_usnic_module_t* module, "internal error during init", true, opal_process_info.nodename, - module->fabric_info->fabric_attr->name, + module->linux_device_name, "fi_enable() failed", __FILE__, __LINE__, rc, fi_strerror(-rc)); return OPAL_ERR_OUT_OF_RESOURCE; @@ -1533,7 +1700,7 @@ static int create_ep(opal_btl_usnic_module_t* module, "internal error during init", true, opal_process_info.nodename, - module->fabric_info->fabric_attr->name, + module->linux_device_name, "fi_getname() failed", __FILE__, __LINE__, rc, fi_strerror(-rc)); return OPAL_ERR_OUT_OF_RESOURCE; @@ -1541,9 +1708,20 @@ static int create_ep(opal_btl_usnic_module_t* module, assert(0 != sin->sin_port); } - /* actual sizes */ - channel->chan_rd_num = channel->info->rx_attr->size; - channel->chan_sd_num = channel->info->tx_attr->size; + char *str; + if (USNIC_PRIORITY_CHANNEL == channel->chan_index) { + str = "priority"; + } else if (USNIC_DATA_CHANNEL == channel->chan_index) { + str = "data"; + } else { + str = "UNKNOWN"; + } + opal_output_verbose(15, USNIC_OUT, + "btl:usnic:create_ep:%s: new usnic local endpoint channel %s: %s:%d", + module->linux_device_name, + str, + inet_ntoa(sin->sin_addr), + ntohs(sin->sin_port)); return OPAL_SUCCESS; } @@ -1587,7 +1765,8 @@ static int init_one_channel(opal_btl_usnic_module_t *module, int index, int max_msg_size, int rd_num, - int sd_num) + int sd_num, + int cq_num) { int i; int rc; @@ -1608,6 +1787,7 @@ static int init_one_channel(opal_btl_usnic_module_t *module, channel->fastsend_wqe_thresh = sd_num - 10; channel->credits = sd_num; + channel->rx_post_cnt = 0; /* We did math up in component_init() to know that there should be enough CQs available. So if create_cq() fails, then either the @@ -1616,15 +1796,30 @@ static int init_one_channel(opal_btl_usnic_module_t *module, memset(&cq_attr, 0, sizeof(cq_attr)); cq_attr.format = FI_CQ_FORMAT_CONTEXT; cq_attr.wait_obj = FI_WAIT_NONE; - cq_attr.size = module->cq_num; + cq_attr.size = cq_num; rc = fi_cq_open(module->domain, &cq_attr, &channel->cq, NULL); if (0 != rc) { opal_show_help("help-mpi-btl-usnic.txt", "internal error during init", true, opal_process_info.nodename, - module->fabric_info->fabric_attr->name, - "failed to create CQ", __FILE__, __LINE__); + module->linux_device_name, + "failed to create CQ", __FILE__, __LINE__, + rc, fi_strerror(-rc)); + goto error; + } + + /* Ensure that we got a CQ that is at least as long as we asked + for */ + if ((int) cq_attr.size < cq_num) { + rc = FI_ETOOSMALL; + opal_show_help("help-mpi-btl-usnic.txt", + "internal error during init", + true, + opal_process_info.nodename, + module->linux_device_name, + "created CQ is too small", __FILE__, __LINE__, + rc, fi_strerror(rc)); goto error; } @@ -1634,6 +1829,18 @@ static int init_one_channel(opal_btl_usnic_module_t *module, goto error; } + assert(channel->info->ep_attr->msg_prefix_size == + (uint32_t) mca_btl_usnic_component.transport_header_len); + + opal_output_verbose(15, USNIC_OUT, + "btl:usnic:init_one_channel:%s: channel %s, rx queue size=%" PRIsize_t ", tx queue size=%" PRIsize_t ", cq size=%" PRIsize_t ", send credits=%d", + module->linux_device_name, + (index == USNIC_PRIORITY_CHANNEL) ? "priority" : "data", + channel->info->rx_attr->size, + channel->info->tx_attr->size, + cq_attr.size, + channel->credits); + /* * Initialize pool of receive segments. Round MTU up to cache * line size so that each segment is guaranteed to start on a @@ -1652,9 +1859,9 @@ static int init_one_channel(opal_btl_usnic_module_t *module, rd_num /* num erorments to alloc */, rd_num /* max elements to alloc */, rd_num /* num elements per alloc */, - module->super.btl_mpool /* mpool for reg */, + module->super.btl_mpool /* mpool for (1.x, 2.0: reg, 2.1+: allocation) */, 0 /* mpool reg flags */, - NULL /* unused0 */, + module->rcache /* registration cache for 2.1+ */, NULL /* item_init */, NULL /* item_init_context */); channel->recv_segs.ctx = module; /* must come after @@ -1676,7 +1883,7 @@ static int init_one_channel(opal_btl_usnic_module_t *module, "internal error during init", true, opal_process_info.nodename, - module->fabric_info->fabric_attr->name, + module->linux_device_name, "Failed to get receive buffer from freelist", __FILE__, __LINE__); goto error; @@ -1692,7 +1899,7 @@ static int init_one_channel(opal_btl_usnic_module_t *module, "internal error during init", true, opal_process_info.nodename, - module->fabric_info->fabric_attr->name, + module->linux_device_name, "Failed to post receive buffer", __FILE__, __LINE__); goto error; @@ -1759,7 +1966,7 @@ static void init_local_modex_part1(opal_btl_usnic_module_t *module) opal_output_verbose(5, USNIC_OUT, "btl:usnic: %s IP charactertics: %s, %u Mbps", - module->fabric_info->fabric_attr->name, + module->linux_device_name, module->if_ipv4_addr_str, modex->link_speed_mbps); } @@ -1777,6 +1984,33 @@ static void init_find_transport_header_len(opal_btl_usnic_module_t *module) module->fabric_info->ep_attr->msg_prefix_size; mca_btl_usnic_component.transport_protocol = module->fabric_info->ep_attr->protocol; + + /* The usnic provider in libfabric v1.0.0 (i.e., API v1.0) treated + FI_MSG_PREFIX inconsistently between senders and receivers. It + was corrected in libfabric v1.1.0 (i.e., API v1.1), meaning + that FI_MSG_PREFIX is treated consistently between senders and + receivers. + + So check what version of the libfabric API we have, and setup + to use the "old" (inconsistent) MSG_PREFIX behavior, or the + "new" MSG_PREFIX (consistent) behavior. + + NOTE: This is a little redundant; we're setting a + component-level attribute during each module's setup. We do + this here (and not earlier, when we check fi_version() during + the component setup) because we can't obtain the value of the + endpoint msg_prefix_size until we setup the first module. + Also, it's safe because each module will set the component + attribute to the same value. So it's ok. */ + uint32_t libfabric_api; + libfabric_api = fi_version(); + if (1 == FI_MAJOR(libfabric_api) && + 0 == FI_MINOR(libfabric_api)) { + mca_btl_usnic_component.prefix_send_offset = 0; + } else { + mca_btl_usnic_component.prefix_send_offset = + module->fabric_info->ep_attr->msg_prefix_size; + } } /* @@ -1784,6 +2018,11 @@ static void init_find_transport_header_len(opal_btl_usnic_module_t *module) */ static void init_queue_lengths(opal_btl_usnic_module_t *module) { + bool cq_is_sum = false; + if (-1 == mca_btl_usnic_component.cq_num) { + cq_is_sum = true; + } + if (-1 == mca_btl_usnic_component.sd_num) { module->sd_num = module->fabric_info->tx_attr->size; } else { @@ -1794,11 +2033,12 @@ static void init_queue_lengths(opal_btl_usnic_module_t *module) } else { module->rd_num = mca_btl_usnic_component.rd_num; } - if (-1 == mca_btl_usnic_component.cq_num) { + if (cq_is_sum) { module->cq_num = module->rd_num + module->sd_num; } else { module->cq_num = mca_btl_usnic_component.cq_num; } + module->av_eq_num = mca_btl_usnic_component.av_eq_num; /* * Queue sizes for priority channel scale with # of endpoint. A @@ -1828,6 +2068,11 @@ static void init_queue_lengths(opal_btl_usnic_module_t *module) module->fabric_info->rx_attr->size) { module->prio_rd_num = module->fabric_info->rx_attr->size; } + if (cq_is_sum) { + module->prio_cq_num = module->prio_rd_num + module->prio_sd_num; + } else { + module->prio_cq_num = module->cq_num; + } } static void init_payload_lengths(opal_btl_usnic_module_t *module) @@ -1835,13 +2080,15 @@ static void init_payload_lengths(opal_btl_usnic_module_t *module) /* Find the max payload this port can handle */ module->max_frag_payload = module->local_modex.max_msg_size - /* start with the MTU */ - sizeof(opal_btl_usnic_btl_header_t); /* subtract size of - the BTL header */ + sizeof(opal_btl_usnic_btl_header_t) - /* subtract size of + the BTL header */ + mca_btl_usnic_component.prefix_send_offset; /* same, but use chunk header */ module->max_chunk_payload = module->local_modex.max_msg_size - - sizeof(opal_btl_usnic_btl_chunk_header_t); + sizeof(opal_btl_usnic_btl_chunk_header_t) - + mca_btl_usnic_component.prefix_send_offset; /* Priorirty queue MTU and max size */ if (0 == module->max_tiny_msg_size) { @@ -1940,17 +2187,35 @@ static int init_mpool(opal_btl_usnic_module_t *module) mpool_resources.sizeof_reg = sizeof(opal_btl_usnic_reg_t); mpool_resources.register_mem = usnic_reg_mr; mpool_resources.deregister_mem = usnic_dereg_mr; +#if RCACHE_VERSION == 30 + mpool_resources.cache_name = mca_btl_usnic_component.usnic_rcache_name; + module->rcache = + mca_rcache_base_module_create (mca_btl_usnic_component.usnic_rcache_name, + &module->super, &mpool_resources); + if (NULL == module->rcache) { + opal_show_help("help-mpi-btl-usnic.txt", + "internal error during init", + true, + opal_process_info.nodename, + module->linux_device_name, + "create rcache", __FILE__, __LINE__); + return OPAL_ERROR; + } + module->super.btl_mpool = + mca_mpool_base_module_lookup (mca_btl_usnic_component.usnic_mpool_hints); +#else asprintf(&mpool_resources.pool_name, "%s", - module->fabric_info->fabric_attr->name); + module->linux_device_name); module->super.btl_mpool = mca_mpool_base_module_create(mca_btl_usnic_component.usnic_mpool_name, &module->super, &mpool_resources); +#endif if (NULL == module->super.btl_mpool) { opal_show_help("help-mpi-btl-usnic.txt", "internal error during init", true, opal_process_info.nodename, - module->fabric_info->fabric_attr->name, + module->linux_device_name, "create mpool", __FILE__, __LINE__); return OPAL_ERROR; } @@ -1984,12 +2249,15 @@ static int init_channels(opal_btl_usnic_module_t *module) } memset(&eq_attr, 0, sizeof(eq_attr)); - eq_attr.size = 1024; + eq_attr.size = module->av_eq_num; eq_attr.wait_obj = FI_WAIT_UNSPEC; rc = fi_eq_open(module->fabric, &eq_attr, &module->av_eq, NULL); if (rc != OPAL_SUCCESS) { goto destroy; } + // Save the size of the created EQ + module->av_eq_size = eq_attr.size; + eq_attr.wait_obj = FI_WAIT_FD; rc = fi_eq_open(module->fabric, &eq_attr, &module->dom_eq, NULL); if (rc != OPAL_SUCCESS) { @@ -2010,14 +2278,14 @@ static int init_channels(opal_btl_usnic_module_t *module) rc = init_one_channel(module, USNIC_PRIORITY_CHANNEL, module->max_tiny_msg_size, - module->prio_rd_num, module->prio_sd_num); + module->prio_rd_num, module->prio_sd_num, module->prio_cq_num); if (rc != OPAL_SUCCESS) { goto destroy; } rc = init_one_channel(module, USNIC_DATA_CHANNEL, module->fabric_info->ep_attr->max_msg_size, - module->rd_num, module->sd_num); + module->rd_num, module->sd_num, module->cq_num); if (rc != OPAL_SUCCESS) { goto destroy; } @@ -2060,15 +2328,16 @@ static void init_async_event(opal_btl_usnic_module_t *module) "libfabric API failed", true, opal_process_info.nodename, - module->fabric_info->fabric_attr->name, + module->linux_device_name, "fi_control(eq, FI_GETWAIT)", __FILE__, __LINE__, ret, fi_strerror(-ret)); return; } - /* Get the fd to receive events on this device */ - opal_event_set(opal_event_base, &(module->device_async_event), fd, + /* Get the fd to receive events on this device. Keep this in the + sync event base (not the async event base) */ + opal_event_set(opal_sync_event_base, &(module->device_async_event), fd, OPAL_EV_READ | OPAL_EV_PERSIST, module_async_event_callback, module); opal_event_add(&(module->device_async_event), NULL); @@ -2093,11 +2362,10 @@ static void init_random_objects(opal_btl_usnic_module_t *module) static void init_freelists(opal_btl_usnic_module_t *module) { - int rc; + int rc __opal_attribute_unused__; uint32_t segsize; segsize = (module->local_modex.max_msg_size + - module->fabric_info->ep_attr->msg_prefix_size + opal_cache_line_size - 1) & ~(opal_cache_line_size - 1); @@ -2105,7 +2373,7 @@ static void init_freelists(opal_btl_usnic_module_t *module) OBJ_CONSTRUCT(&module->small_send_frags, opal_free_list_t); rc = usnic_compat_free_list_init(&module->small_send_frags, sizeof(opal_btl_usnic_small_send_frag_t) + - mca_btl_usnic_component.transport_header_len, + mca_btl_usnic_component.prefix_send_offset, opal_cache_line_size, OBJ_CLASS(opal_btl_usnic_small_send_frag_t), segsize, @@ -2115,7 +2383,7 @@ static void init_freelists(opal_btl_usnic_module_t *module) module->sd_num / 2, module->super.btl_mpool, 0 /* mpool reg flags */, - NULL /* unused0 */, + module->rcache, NULL /* item_init */, NULL /* item_init_context */); assert(OPAL_SUCCESS == rc); @@ -2123,7 +2391,7 @@ static void init_freelists(opal_btl_usnic_module_t *module) OBJ_CONSTRUCT(&module->large_send_frags, opal_free_list_t); rc = usnic_compat_free_list_init(&module->large_send_frags, sizeof(opal_btl_usnic_large_send_frag_t) + - mca_btl_usnic_component.transport_header_len, + mca_btl_usnic_component.prefix_send_offset, opal_cache_line_size, OBJ_CLASS(opal_btl_usnic_large_send_frag_t), 0, /* payload size */ @@ -2141,7 +2409,7 @@ static void init_freelists(opal_btl_usnic_module_t *module) OBJ_CONSTRUCT(&module->put_dest_frags, opal_free_list_t); rc = usnic_compat_free_list_init(&module->put_dest_frags, sizeof(opal_btl_usnic_put_dest_frag_t) + - mca_btl_usnic_component.transport_header_len, + mca_btl_usnic_component.prefix_send_offset, opal_cache_line_size, OBJ_CLASS(opal_btl_usnic_put_dest_frag_t), 0, /* payload size */ @@ -2160,7 +2428,7 @@ static void init_freelists(opal_btl_usnic_module_t *module) OBJ_CONSTRUCT(&module->chunk_segs, opal_free_list_t); rc = usnic_compat_free_list_init(&module->chunk_segs, sizeof(opal_btl_usnic_chunk_segment_t) + - mca_btl_usnic_component.transport_header_len, + mca_btl_usnic_component.prefix_send_offset, opal_cache_line_size, OBJ_CLASS(opal_btl_usnic_chunk_segment_t), segsize, @@ -2170,7 +2438,7 @@ static void init_freelists(opal_btl_usnic_module_t *module) module->sd_num / 2, module->super.btl_mpool, 0 /* mpool reg flags */, - NULL /* unused0 */, + module->rcache, NULL /* item_init */, NULL /* item_init_context */); assert(OPAL_SUCCESS == rc); @@ -2178,12 +2446,11 @@ static void init_freelists(opal_btl_usnic_module_t *module) /* ACK segments freelist */ uint32_t ack_segment_len; ack_segment_len = (sizeof(opal_btl_usnic_btl_header_t) + - module->fabric_info->ep_attr->msg_prefix_size + opal_cache_line_size - 1) & ~(opal_cache_line_size - 1); OBJ_CONSTRUCT(&module->ack_segs, opal_free_list_t); rc = usnic_compat_free_list_init(&module->ack_segs, sizeof(opal_btl_usnic_ack_segment_t) + - mca_btl_usnic_component.transport_header_len, + mca_btl_usnic_component.prefix_send_offset, opal_cache_line_size, OBJ_CLASS(opal_btl_usnic_ack_segment_t), ack_segment_len, @@ -2193,7 +2460,7 @@ static void init_freelists(opal_btl_usnic_module_t *module) module->sd_num / 2, module->super.btl_mpool, 0 /* mpool reg flags */, - NULL /* unused0 */, + module->rcache, NULL /* item_init */, NULL /* item_init_context */); assert(OPAL_SUCCESS == rc); @@ -2250,7 +2517,11 @@ int opal_btl_usnic_module_init(opal_btl_usnic_module_t *module) int ret; if (OPAL_SUCCESS != (ret = init_mpool(module)) || OPAL_SUCCESS != (ret = init_channels(module))) { +#if RCACHE_VERSION == 30 + mca_rcache_base_module_destroy (module->rcache); +#else mca_mpool_base_module_destroy(module->super.btl_mpool); +#endif return ret; } @@ -2308,8 +2579,11 @@ opal_btl_usnic_module_t opal_btl_usnic_module_template = { .btl_exclusivity = MCA_BTL_EXCLUSIVITY_DEFAULT, .btl_flags = MCA_BTL_FLAGS_SEND | - MCA_BTL_FLAGS_PUT | - MCA_BTL_FLAGS_SEND_INPLACE, + MCA_BTL_FLAGS_SEND_INPLACE | + /* Need to set FLAGS_SINGLE_ADD_PROCS until + btl_recv.h:lookup_sender() can handle an incoming + message with an unknown sender. */ + MCA_BTL_FLAGS_SINGLE_ADD_PROCS, .btl_add_procs = usnic_add_procs, .btl_del_procs = usnic_del_procs, diff --git a/opal/mca/btl/usnic/btl_usnic_module.h b/opal/mca/btl/usnic/btl_usnic_module.h index ea6e44cfab8..17de3d090be 100644 --- a/opal/mca/btl/usnic/btl_usnic_module.h +++ b/opal/mca/btl/usnic/btl_usnic_module.h @@ -1,3 +1,4 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ /* * Copyright (c) 2004-2008 The Trustees of Indiana University and Indiana * University Research and Technology @@ -11,7 +12,9 @@ * All rights reserved. * Copyright (c) 2006 Sandia National Laboratories. All rights * reserved. - * Copyright (c) 2011-2015 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2011-2017 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015-2016 Los Alamos National Security, LLC. All rights + * reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -68,7 +71,8 @@ typedef struct opal_btl_usnic_channel_t { int chan_rd_num; int chan_sd_num; - int credits; /* RFXXX until libfab credits fixed */ + int credits; + uint32_t rx_post_cnt; /* fastsend enabled if num_credits_available >= fastsend_wqe_thresh */ unsigned fastsend_wqe_thresh; @@ -99,8 +103,10 @@ typedef struct opal_btl_usnic_module_t { /* Cache for use during component_init to associate a module with the libfabric device that it came from. */ + uint32_t libfabric_api; struct fid_fabric *fabric; struct fid_domain *domain; + char *linux_device_name; struct fi_info *fabric_info; struct fi_usnic_ops_fabric *usnic_fabric_ops; struct fi_usnic_ops_av *usnic_av_ops; @@ -109,6 +115,8 @@ typedef struct opal_btl_usnic_module_t { struct fid_eq *av_eq; struct fid_av *av; + size_t av_eq_size; + mca_btl_base_module_error_cb_fn_t pml_error_callback; /* Information about the events */ @@ -126,8 +134,10 @@ typedef struct opal_btl_usnic_module_t { int sd_num; int rd_num; int cq_num; + int av_eq_num; int prio_sd_num; int prio_rd_num; + int prio_cq_num; /* * Fragments larger than max_frag_payload will be broken up into @@ -193,6 +203,9 @@ typedef struct opal_btl_usnic_module_t { /* Performance / debugging statistics */ opal_btl_usnic_module_stats_t stats; + + /** registration cache module (v2.1+) */ + mca_rcache_base_module_t *rcache; } opal_btl_usnic_module_t; struct opal_btl_usnic_frag_t; diff --git a/opal/mca/btl/usnic/btl_usnic_proc.c b/opal/mca/btl/usnic/btl_usnic_proc.c index 9d71a6ed9d5..f0fefbff964 100644 --- a/opal/mca/btl/usnic/btl_usnic_proc.c +++ b/opal/mca/btl/usnic/btl_usnic_proc.c @@ -11,7 +11,7 @@ * All rights reserved. * Copyright (c) 2006 Sandia National Laboratories. All rights * reserved. - * Copyright (c) 2013-2015 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2013-2016 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2013-2014 Intel, Inc. All rights reserved * $COPYRIGHT$ * @@ -643,7 +643,7 @@ static int match_modex(opal_btl_usnic_module_t *module, opal_show_help("help-mpi-btl-usnic.txt", "MTU mismatch", true, opal_process_info.nodename, - module->fabric_info->fabric_attr->name, + module->linux_device_name, module->fabric_info->ep_attr->max_msg_size, (NULL == proc->proc_opal->proc_hostname) ? "unknown" : proc->proc_opal->proc_hostname, @@ -700,7 +700,7 @@ static int start_av_insert(opal_btl_usnic_module_t *module, opal_show_help("help-mpi-btl-usnic.txt", "libfabric API failed", true, opal_process_info.nodename, - module->fabric_info->fabric_attr->name, + module->linux_device_name, "fi_av_insert()", __FILE__, __LINE__, ret, "Failed to initiate AV insert"); diff --git a/opal/mca/btl/usnic/btl_usnic_recv.c b/opal/mca/btl/usnic/btl_usnic_recv.c index 0f94c131be5..443e2b0e961 100644 --- a/opal/mca/btl/usnic/btl_usnic_recv.c +++ b/opal/mca/btl/usnic/btl_usnic_recv.c @@ -77,7 +77,7 @@ void opal_btl_usnic_recv_call(opal_btl_usnic_module_t *module, /* Find out who sent this segment */ endpoint = seg->rs_endpoint; - if (FAKE_RECV_FRAG_DROP || OPAL_UNLIKELY(NULL == endpoint)) { + if (FAKE_RECV_DROP || OPAL_UNLIKELY(NULL == endpoint)) { /* No idea who this was from, so drop it */ #if MSGDEBUG1 opal_output(0, "=== Unknown sender; dropped: seq %" UDSEQ, @@ -340,8 +340,9 @@ void opal_btl_usnic_recv_call(opal_btl_usnic_module_t *module, opal_output(0, " Received ACK for sequence number %" UDSEQ " from %s to %s\n", bseg->us_btl_header->ack_seq, remote_ip, local_ip); #endif + OPAL_THREAD_LOCK(&btl_usnic_lock); opal_btl_usnic_handle_ack(endpoint, ack_seq); - + OPAL_THREAD_UNLOCK(&btl_usnic_lock); goto repost; } diff --git a/opal/mca/btl/usnic/btl_usnic_recv.h b/opal/mca/btl/usnic/btl_usnic_recv.h index 227c5e62fdc..70ffa7d4db2 100644 --- a/opal/mca/btl/usnic/btl_usnic_recv.h +++ b/opal/mca/btl/usnic/btl_usnic_recv.h @@ -23,12 +23,27 @@ void opal_btl_usnic_recv_call(opal_btl_usnic_module_t *module, static inline int opal_btl_usnic_post_recv_list(opal_btl_usnic_channel_t *channel) { + struct iovec iov; + struct fi_msg msg; + uint64_t flag; opal_btl_usnic_recv_segment_t *rseg; int rc; + msg.msg_iov = &iov; + msg.iov_count = 1; for (rseg = channel->repost_recv_head; NULL != rseg; rseg = rseg->rs_next) { - rc = fi_recv(channel->ep, rseg->rs_protocol_header, - rseg->rs_len, NULL, FI_ADDR_UNSPEC, rseg); + msg.context = rseg; + iov.iov_base = rseg->rs_protocol_header; + iov.iov_len = rseg->rs_len; + + ++channel->rx_post_cnt; + if (OPAL_UNLIKELY((channel->rx_post_cnt & 15) == 0)) { + flag = 0; + } else { + flag = FI_MORE; + } + + rc = fi_recvmsg(channel->ep, &msg, flag); if (0 != rc) { return rc; } @@ -142,8 +157,10 @@ opal_btl_usnic_check_rx_seq( #if MSGDEBUG1 opal_output(0, "Handle piggy-packed ACK seq %"UDSEQ"\n", seg->rs_base.us_btl_header->ack_seq); #endif + OPAL_THREAD_LOCK(&btl_usnic_lock); opal_btl_usnic_handle_ack(endpoint, seg->rs_base.us_btl_header->ack_seq); + OPAL_THREAD_UNLOCK(&btl_usnic_lock); } /* Do we have room in the endpoint's receiver window? @@ -267,6 +284,9 @@ opal_btl_usnic_recv_fast(opal_btl_usnic_module_t *module, int delta; int i; + /* Make the whole payload Valgrind defined */ + opal_memchecker_base_mem_defined(seg->rs_protocol_header, seg->rs_len); + bseg = &seg->rs_base; /* Find out who sent this segment */ @@ -286,10 +306,6 @@ opal_btl_usnic_dump_hex(bseg->us_btl_header, bseg->us_btl_header->payload_len + bseg->us_btl_header->payload_type) && seg->rs_base.us_btl_header->put_addr == NULL) { - /* Valgrind help */ - opal_memchecker_base_mem_defined( - (void*)(seg->rs_protocol_header), seg->rs_len); - seq = seg->rs_base.us_btl_header->pkt_seq; delta = SEQ_DIFF(seq, endpoint->endpoint_next_contig_seq_to_recv); if (delta < 0 || delta >= WINDOW_SIZE) { @@ -382,6 +398,9 @@ opal_btl_usnic_recv(opal_btl_usnic_module_t *module, opal_btl_usnic_endpoint_t *endpoint; int rc; + /* Make the whole payload Valgrind defined */ + opal_memchecker_base_mem_defined(seg->rs_protocol_header, seg->rs_len); + bseg = &seg->rs_base; /* Find out who sent this segment */ diff --git a/opal/mca/btl/usnic/btl_usnic_send.c b/opal/mca/btl/usnic/btl_usnic_send.c index aff68722aa1..3bfb68a8e7b 100644 --- a/opal/mca/btl/usnic/btl_usnic_send.c +++ b/opal/mca/btl/usnic/btl_usnic_send.c @@ -11,7 +11,7 @@ * All rights reserved. * Copyright (c) 2006 Sandia National Laboratories. All rights * reserved. - * Copyright (c) 2008-2015 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2008-2017 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2012 Los Alamos National Security, LLC. All rights * reserved. * $COPYRIGHT$ @@ -43,8 +43,9 @@ /* - * This function is called when a send of a full-fragment segment completes - * Return the WQE and also return the segment if no ACK pending + * This function is called when a send of a segment completes that is + * the one-and-only segment of an MPI message. Return the WQE and + * also return the segment if no ACK pending. */ void opal_btl_usnic_frag_send_complete(opal_btl_usnic_module_t *module, @@ -59,18 +60,27 @@ opal_btl_usnic_frag_send_complete(opal_btl_usnic_module_t *module, --frag->sf_seg_post_cnt; /* checks for returnability made inside */ + opal_btl_usnic_endpoint_t *ep = frag->sf_endpoint; opal_btl_usnic_send_frag_return_cond(module, frag); + // In a short frag segment, the sseg is embedded in the frag. So + // there's no need to return the sseg (because we already returned + // the frag). + /* do bookkeeping */ - ++frag->sf_endpoint->endpoint_send_credits; + ++ep->endpoint_send_credits; /* see if this endpoint needs to be made ready-to-send */ - opal_btl_usnic_check_rts(frag->sf_endpoint); + opal_btl_usnic_check_rts(ep); + + ++module->mod_channels[sseg->ss_channel].credits; } /* - * This function is called when a send segment completes - * Return the WQE and also return the segment if no ACK pending + * This function is called when a send segment completes that is part + * of a larger MPI message (ie., there may still be other chunk + * segments that have not yet completed sending). Return the WQE and + * also return the segment if no ACK pending. */ void opal_btl_usnic_chunk_send_complete(opal_btl_usnic_module_t *module, @@ -97,6 +107,8 @@ opal_btl_usnic_chunk_send_complete(opal_btl_usnic_module_t *module, /* see if this endpoint needs to be made ready-to-send */ opal_btl_usnic_check_rts(frag->sf_endpoint); + + ++module->mod_channels[sseg->ss_channel].credits; } /* Responsible for completing non-fastpath parts of a put or send operation, diff --git a/opal/mca/btl/usnic/btl_usnic_send.h b/opal/mca/btl/usnic/btl_usnic_send.h index 02fd2e90795..4d093b80156 100644 --- a/opal/mca/btl/usnic/btl_usnic_send.h +++ b/opal/mca/btl/usnic/btl_usnic_send.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2013-2014 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2013-2017 Cisco Systems, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -52,7 +52,10 @@ opal_btl_usnic_check_rts( } /* - * Common point for posting a segment + * Common point for posting a segment. + * + * ASSUMES THAT THE CALLER HAS ALREADY CHECKED TO SEE IF WE HAVE + * A SEND CREDIT! */ static inline void opal_btl_usnic_post_segment( @@ -76,10 +79,13 @@ opal_btl_usnic_post_segment( sseg->ss_len); #endif + assert(channel_id == USNIC_DATA_CHANNEL); + assert(channel->credits > 1); + /* Send the segment */ ret = fi_send(channel->ep, sseg->ss_ptr, - sseg->ss_len, + sseg->ss_len + mca_btl_usnic_component.prefix_send_offset, NULL, endpoint->endpoint_remote_addrs[channel_id], sseg); @@ -103,6 +109,9 @@ opal_btl_usnic_post_segment( /* * Common point for posting an ACK + * + * ASSUMES THAT THE CALLER HAS ALREADY CHECKED TO SEE IF WE HAVE + * A SEND CREDIT! */ static inline void opal_btl_usnic_post_ack( @@ -126,9 +135,12 @@ opal_btl_usnic_post_ack( sseg->ss_len); #endif + assert(channel_id == USNIC_PRIORITY_CHANNEL); + assert(channel->credits > 1); + ret = fi_send(channel->ep, sseg->ss_ptr, - sseg->ss_len, + sseg->ss_len + mca_btl_usnic_component.prefix_send_offset, NULL, endpoint->endpoint_remote_addrs[channel_id], sseg); @@ -212,7 +224,7 @@ opal_btl_usnic_endpoint_send_segment( "CHUNK" : "FRAG", sseg->ss_base.us_btl_header->pkt_seq, sseg->ss_base.us_btl_header->sender, - endpoint->endpoint_module->fabric_info->fabric_attr->name, + endpoint->endpoint_module->linux_device_name, local_ip, module->local_modex.ports[sseg->ss_channel], (void*)sseg, @@ -226,11 +238,12 @@ opal_btl_usnic_endpoint_send_segment( /* do the actual send */ opal_btl_usnic_post_segment(module, endpoint, sseg); - /* Track this header by stashing in an array on the endpoint that - is the same length as the sender's window (i.e., WINDOW_SIZE). - To find a unique slot in this array, use (seq % WINDOW_SIZE). - */ + /* Stash this segment in an array on the endpoint that is the same + length as the sender's window (i.e., WINDOW_SIZE) until it + receives its ACK. To find a unique slot in this array, use + (seq % WINDOW_SIZE). */ sfi = WINDOW_SIZE_MOD(sseg->ss_base.us_btl_header->pkt_seq); + assert(NULL == endpoint->endpoint_sent_segs[sfi]); endpoint->endpoint_sent_segs[sfi] = sseg; sseg->ss_ack_pending = true; diff --git a/opal/mca/btl/usnic/btl_usnic_stats.c b/opal/mca/btl/usnic/btl_usnic_stats.c index 6e9051284ef..cc8913f41eb 100644 --- a/opal/mca/btl/usnic/btl_usnic_stats.c +++ b/opal/mca/btl/usnic/btl_usnic_stats.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2013-2015 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2013-2016 Cisco Systems, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -82,10 +82,12 @@ void opal_btl_usnic_print_stats( char tmp[128], str[2048]; /* The usuals */ - snprintf(str, sizeof(str), "%s:MCW:%3u, ST(P+D)/F/C/R(T+F)/A:%8lu(%8u+%8u)/%8lu/%8lu/%4lu(%4lu+%4lu)/%8lu, RcvTot/Chk/F/C/L/H/D/BF/A:%8lu/%c%c/%8lu/%8lu/%4lu+%2lu/%4lu/%4lu/%6lu OA/DA %4lu/%4lu CRC:%4lu ", + snprintf(str, sizeof(str), "%s:MCW:%3u, %s, ST(P+D)/F/C/R(T+F)/A:%8lu(%8u+%8u)/%8lu/%8lu/%4lu(%4lu+%4lu)/%8lu, RcvTot/Chk/F/C/L/H/D/BF/A:%8lu/%c%c/%8lu/%8lu/%4lu+%2lu/%4lu/%4lu/%6lu OA/DA %4lu/%4lu CRC:%4lu ", prefix, opal_proc_local_get()->proc_name.vpid, + module->linux_device_name, + module->stats.num_total_sends, module->mod_channels[USNIC_PRIORITY_CHANNEL].num_channel_sends, module->mod_channels[USNIC_DATA_CHANNEL].num_channel_sends, @@ -121,6 +123,11 @@ void opal_btl_usnic_print_stats( module->stats.num_crc_errors); + // Shouldn't happen, but just in case the string ever grows long + // enough to someday potentially get truncated by snprintf, ensure + // that the string is terminated. + str[sizeof(str) - 1] = '\0'; + /* If our PML calls were 0, then show send and receive window extents instead */ if (module->stats.pml_module_sends + @@ -143,8 +150,9 @@ void opal_btl_usnic_print_stats( /* Number of un-acked sends (i.e., sends for which we're still waiting for ACK) */ send_unacked = - endpoint->endpoint_next_seq_to_send - - endpoint->endpoint_ack_seq_rcvd - 1; + SEQ_DIFF(endpoint->endpoint_next_seq_to_send, + SEQ_DIFF(endpoint->endpoint_ack_seq_rcvd, 1)); + if (send_unacked > su_max) su_max = send_unacked; if (send_unacked < su_min) su_min = send_unacked; @@ -194,11 +202,6 @@ static void usnic_stats_callback(int fd, short flags, void *arg) opal_btl_usnic_print_stats(module, tmp, /*reset=*/mca_btl_usnic_component.stats_relative); - - /* In OMPI v1.6, we have to re-add this event (because there's an - old libevent in OMPI v1.6) */ - opal_event_add(&(module->stats.timer_event), - &(module->stats.timeout)); } /* @@ -212,7 +215,8 @@ int opal_btl_usnic_stats_init(opal_btl_usnic_module_t *module) module->stats.timeout.tv_sec = mca_btl_usnic_component.stats_frequency; module->stats.timeout.tv_usec = 0; - opal_event_set(opal_event_base, &(module->stats.timer_event), + opal_event_set(mca_btl_usnic_component.opal_evbase, + &(module->stats.timer_event), -1, EV_TIMEOUT | EV_PERSIST, &usnic_stats_callback, module); opal_event_add(&(module->stats.timer_event), @@ -395,7 +399,7 @@ static void setup_mpit_pvars_enum(void) devices[i].value = i; rc = asprintf(&str, "%s,%hhu.%hhu.%hhu.%hhu/%" PRIu32, - m->fabric_info->fabric_attr->name, + m->linux_device_name, c[0], c[1], c[2], c[3], usnic_netmask_to_cidrlen(sin->sin_addr.s_addr)); assert(rc > 0); diff --git a/opal/mca/btl/usnic/btl_usnic_util.c b/opal/mca/btl/usnic/btl_usnic_util.c index a66303d8867..17eeb7650db 100644 --- a/opal/mca/btl/usnic/btl_usnic_util.c +++ b/opal/mca/btl/usnic/btl_usnic_util.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2013-2014 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2013-2016 Cisco Systems, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -25,13 +25,17 @@ void opal_btl_usnic_exit(opal_btl_usnic_module_t *module) if (NULL == module) { /* Find the first module with an error callback */ for (int i = 0; i < mca_btl_usnic_component.num_modules; ++i) { - if (NULL != mca_btl_usnic_component.usnic_active_modules[i]->pml_error_callback) { + if (NULL != mca_btl_usnic_component.usnic_active_modules && + NULL != mca_btl_usnic_component.usnic_active_modules[i] && + NULL != mca_btl_usnic_component.usnic_active_modules[i]->pml_error_callback) { module = mca_btl_usnic_component.usnic_active_modules[i]; break; } } /* If we didn't find a PML error callback, just exit. */ if (NULL == module) { + fprintf(stderr, "*** The Open MPI usnic BTL is aborting the MPI job (via exit(3)).\n"); + fflush(stderr); exit(1); } } @@ -45,7 +49,7 @@ void opal_btl_usnic_exit(opal_btl_usnic_module_t *module) module->pml_error_callback(&module->super, MCA_BTL_ERROR_FLAGS_FATAL, (opal_proc_t*) opal_proc_local_get(), - "usnic"); + "The usnic BTL is aborting the MPI job (via PML error callback)."); } /* If the PML error callback returns (or if there wasn't one), @@ -63,7 +67,7 @@ void opal_btl_usnic_util_abort(const char *msg, const char *file, int line) opal_show_help("help-mpi-btl-usnic.txt", "internal error after init", true, opal_process_info.nodename, - msg, file, line); + file, line, msg); opal_btl_usnic_exit(NULL); /* Never returns */ @@ -113,24 +117,27 @@ opal_btl_usnic_dump_hex(void *vaddr, int len) * using inet_ntop()). */ void opal_btl_usnic_snprintf_ipv4_addr(char *out, size_t maxlen, - uint32_t addr, uint32_t netmask) + uint32_t addr_be, uint32_t netmask_be) { int prefixlen; + uint32_t netmask = ntohl(netmask_be); + uint32_t addr = ntohl(addr_be); uint8_t *p = (uint8_t*) &addr; + if (netmask != 0) { prefixlen = 33 - ffs(netmask); snprintf(out, maxlen, "%u.%u.%u.%u/%u", - p[0], - p[1], - p[2], p[3], + p[2], + p[1], + p[0], prefixlen); } else { snprintf(out, maxlen, "%u.%u.%u.%u", - p[0], - p[1], + p[3], p[2], - p[3]); + p[1], + p[0]); } } diff --git a/opal/mca/btl/usnic/btl_usnic_util.h b/opal/mca/btl/usnic/btl_usnic_util.h index 4fd08f93b93..389deafd652 100644 --- a/opal/mca/btl/usnic/btl_usnic_util.h +++ b/opal/mca/btl/usnic/btl_usnic_util.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2013-2014 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2013-2016 Cisco Systems, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -113,7 +113,7 @@ void opal_btl_usnic_util_abort(const char *msg, const char *file, int line); * expected to be in network byte order. */ void opal_btl_usnic_snprintf_ipv4_addr(char *out, size_t maxlen, - uint32_t addr, uint32_t netmask); + uint32_t addr_be, uint32_t netmask_be); void opal_btl_usnic_snprintf_bool_array(char *s, size_t slen, bool a[], size_t alen); diff --git a/opal/mca/btl/usnic/configure.m4 b/opal/mca/btl/usnic/configure.m4 index 62d63e5fbb5..406a8ffa06a 100644 --- a/opal/mca/btl/usnic/configure.m4 +++ b/opal/mca/btl/usnic/configure.m4 @@ -47,7 +47,7 @@ AC_DEFUN([MCA_opal_btl_usnic_CONFIG],[ ]) AC_DEFUN([_OPAL_BTL_USNIC_DO_CONFIG],[ - OPAL_VAR_SCOPE_PUSH([unit_tests opal_btl_usnic_CPPFLAGS_save]) + OPAL_VAR_SCOPE_PUSH([unit_tests]) # see README.test for information about this scheme AC_ARG_ENABLE([opal-btl-usnic-unit-tests], @@ -95,14 +95,37 @@ AC_DEFUN([_OPAL_BTL_USNIC_DO_CONFIG],[ # The usnic BTL requires libfabric support. AS_IF([test "$opal_btl_usnic_happy" = "yes"], - [OPAL_CHECK_LIBFABRIC([opal_btl_usnic], - [opal_btl_usnic_happy=yes], - [opal_btl_usnic_happy=no])]) + [AC_MSG_CHECKING([whether libfabric support is available]) + AS_IF([test "$opal_common_libfabric_happy" = "yes"], + [opal_btl_usnic_happy=yes], + [opal_btl_usnic_happy=no]) + AC_MSG_RESULT([$opal_btl_usnic_happy]) + ]) + + # The usnic BTL requires at least libfabric v1.1 (there was a + # critical bug in libfabric v1.0). + AS_IF([test "$opal_btl_usnic_happy" = "yes"], + [AC_MSG_CHECKING([whether libfabric is >= v1.1]) + opal_btl_usnic_CPPFLAGS_save=$CPPFLAGS + CPPFLAGS="$opal_common_libfabric_CPPFLAGS $CPPFLAGS" + AC_COMPILE_IFELSE([AC_LANG_PROGRAM([[#include ]], +[[ +#if !defined(FI_MAJOR_VERSION) +#error your version of libfabric is too old +#elif FI_VERSION(FI_MAJOR_VERSION, FI_MINOR_VERSION) < FI_VERSION(1, 1) +#error your version of libfabric is too old +#endif +]])], + [opal_btl_usnic_happy=yes], + [opal_btl_usnic_happy=no]) + AC_MSG_RESULT([$opal_btl_usnic_happy]) + CPPFLAGS=$opal_btl_usnic_CPPFLAGS_save + ]) # Make sure we can find the libfabric usnic extensions header AS_IF([test "$opal_btl_usnic_happy" = "yes" ], [opal_btl_usnic_CPPFLAGS_save=$CPPFLAGS - CPPFLAGS="$opal_btl_usnic_CPPFLAGS $CPPFLAGS" + CPPFLAGS="$opal_common_libfabric_CPPFLAGS $CPPFLAGS" AC_CHECK_HEADER([rdma/fi_ext_usnic.h], [], [opal_btl_usnic_happy=no]) diff --git a/opal/mca/btl/usnic/help-mpi-btl-usnic.txt b/opal/mca/btl/usnic/help-mpi-btl-usnic.txt index d6efab02681..a10a905a064 100644 --- a/opal/mca/btl/usnic/help-mpi-btl-usnic.txt +++ b/opal/mca/btl/usnic/help-mpi-btl-usnic.txt @@ -1,6 +1,6 @@ # -*- text -*- # -# Copyright (c) 2012-2014 Cisco Systems, Inc. All rights reserved. +# Copyright (c) 2012-2016 Cisco Systems, Inc. All rights reserved. # # $COPYRIGHT$ # @@ -18,7 +18,7 @@ This means that you have either not provisioned enough usNICs on this VIC, or there are not enough total receive, transmit, or completion queues on the provisioned usNICs. On each VIC in a given server, you need to provision at least as many usNICs as MPI processes on that -server. In each usNIC, you need to provision at least two each of the +server. In each usNIC, you need to provision enough of each of the following: send queues, receive queues, and completion queues. Open MPI will skip this usNIC interface in the usnic BTL, which may @@ -77,10 +77,9 @@ something wrong with the usNIC or OpenFabrics configuration on this server. Server: %s - Message: %s File: %s Line: %d - Error: %s + Message: %s # [check_reg_mem_basics fail] The usNIC BTL failed to initialize while trying to register some @@ -241,6 +240,19 @@ abort. usNIC interface: %s Current ARP timeout: %d (btl_usnic_arp_timeout MCA param) # +[fi_av_eq too small] +The usnic BTL was told to create an address resolution queue that was +too small via the mca_btl_usnic_av_eq_num MCA parameter. This +parameter controls how many outstanding peer address resolutions can +be outstanding at a time. Larger values allow more concurrent address +resolutions, but consume more memory. + + Server: %s + av_eq_num param value: %d + av_eq_num minimum value: %d + +Your job will likely either perform poorly, or will abort. +# [unreachable peer IP] WARNING: Open MPI failed to find a route to a peer IP address via a specific usNIC interface. This usually indicates a problem in the IP diff --git a/opal/mca/btl/usnic/test/usnic_btl_run_tests.c b/opal/mca/btl/usnic/test/usnic_btl_run_tests.c index 3abfdf48fdd..5b79e85fb78 100644 --- a/opal/mca/btl/usnic/test/usnic_btl_run_tests.c +++ b/opal/mca/btl/usnic/test/usnic_btl_run_tests.c @@ -1,5 +1,6 @@ /* * Copyright (c) 2014 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2016 IBM Corporation. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -38,7 +39,7 @@ int main(int argc, char **argv) char *to; int path_len; - mpi_handle = dlopen("libmpi.so", RTLD_NOW|RTLD_GLOBAL); + mpi_handle = dlopen("lib" OMPI_LIBMPI_NAME ".so", RTLD_NOW|RTLD_GLOBAL); if (mpi_handle == NULL) { fprintf(stderr, "mpi_handle=NULL dlerror()=%s\n", dlerror()); abort(); diff --git a/opal/mca/btl/vader/Makefile.am b/opal/mca/btl/vader/Makefile.am index a8ce0dfb111..deaf5e06cb2 100644 --- a/opal/mca/btl/vader/Makefile.am +++ b/opal/mca/btl/vader/Makefile.am @@ -5,7 +5,7 @@ # Copyright (c) 2004-2009 The University of Tennessee and The University # of Tennessee Research Foundation. All rights # reserved. -# Copyright (c) 2004-2009 High Performance Computing Center Stuttgart, +# Copyright (c) 2004-2009 High Performance Computing Center Stuttgart, # University of Stuttgart. All rights reserved. # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. @@ -13,9 +13,9 @@ # Copyright (c) 2011-2014 Los Alamos National Security, LLC. All rights # reserved. # $COPYRIGHT$ -# +# # Additional copyrights may follow -# +# # $HEADER$ # diff --git a/opal/mca/btl/vader/btl_vader.h b/opal/mca/btl/vader/btl_vader.h index bbaaf7eb6cf..5290a7faa78 100644 --- a/opal/mca/btl/vader/btl_vader.h +++ b/opal/mca/btl/vader/btl_vader.h @@ -14,6 +14,8 @@ * Copyright (c) 2009-2010 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2010-2015 Los Alamos National Security, LLC. All rights * reserved. + * Copyright (c) 2015 Mellanox Technologies. All rights reserved. + * * $COPYRIGHT$ * * Additional copyrights may follow @@ -32,9 +34,7 @@ #include #include -#ifdef HAVE_STDINT_H # include -#endif /* HAVE_STDINT_H */ #ifdef HAVE_SCHED_H # include #endif /* HAVE_SCHED_H */ @@ -47,8 +47,8 @@ #include "opal/class/opal_free_list.h" #include "opal/sys/atomic.h" #include "opal/mca/btl/btl.h" -#include "opal/mca/mpool/mpool.h" -#include "opal/mca/mpool/base/base.h" +#include "opal/mca/rcache/rcache.h" +#include "opal/mca/rcache/base/rcache_base_vma.h" #include "opal/mca/btl/base/base.h" #include "opal/mca/rcache/rcache.h" #include "opal/mca/rcache/base/base.h" @@ -104,6 +104,7 @@ struct mca_btl_vader_component_t { int vader_free_list_inc; /**< number of elements to alloc when growing free lists */ #if OPAL_BTL_VADER_HAVE_XPMEM xpmem_segid_t my_seg_id; /**< this rank's xpmem segment id */ + mca_rcache_base_vma_module_t *vma_module; /**< registration cache for xpmem segments */ #endif opal_shmem_ds_t seg_ds; /**< this rank's shared memory segment (when not using xpmem) */ @@ -152,6 +153,9 @@ struct mca_btl_vader_t { mca_btl_base_module_error_cb_fn_t error_cb; #if OPAL_BTL_VADER_HAVE_KNEM int knem_fd; + + /* registration cache */ + mca_rcache_base_module_t *knem_rcache; #endif }; typedef struct mca_btl_vader_t mca_btl_vader_t; @@ -265,6 +269,14 @@ mca_btl_base_descriptor_t* mca_btl_vader_alloc (struct mca_btl_base_module_t* bt struct mca_btl_base_endpoint_t* endpoint, uint8_t order, size_t size, uint32_t flags); +/** + * Return a segment allocated by this BTL. + * + * @param btl (IN) BTL module + * @param segment (IN) Allocated segment. + */ +int mca_btl_vader_free (struct mca_btl_base_module_t *btl, mca_btl_base_descriptor_t *des); + END_C_DECLS diff --git a/opal/mca/btl/vader/btl_vader_component.c b/opal/mca/btl/vader/btl_vader_component.c index 2289991e332..23a93a3bbf8 100644 --- a/opal/mca/btl/vader/btl_vader_component.c +++ b/opal/mca/btl/vader/btl_vader_component.c @@ -15,7 +15,7 @@ * Copyright (c) 2010-2015 Los Alamos National Security, LLC. * All rights reserved. * Copyright (c) 2011 NVIDIA Corporation. All rights reserved. - * Copyright (c) 2014 Intel, Inc. All rights reserved. + * Copyright (c) 2014-2015 Intel, Inc. All rights reserved. * Copyright (c) 2014 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ @@ -239,8 +239,10 @@ static int mca_btl_vader_component_register (void) mca_btl_vader.super.btl_rdma_pipeline_send_length = mca_btl_vader.super.btl_eager_limit; mca_btl_vader.super.btl_rdma_pipeline_frag_size = mca_btl_vader.super.btl_eager_limit; + mca_btl_vader.super.btl_flags = MCA_BTL_FLAGS_SEND_INPLACE | MCA_BTL_FLAGS_SEND; + if (MCA_BTL_VADER_NONE != mca_btl_vader_component.single_copy_mechanism) { - mca_btl_vader.super.btl_flags = MCA_BTL_FLAGS_RDMA | MCA_BTL_FLAGS_SEND_INPLACE; + mca_btl_vader.super.btl_flags |= MCA_BTL_FLAGS_RDMA; /* Single copy mechanisms should provide better bandwidth */ mca_btl_vader.super.btl_bandwidth = 40000; /* Mbs */ @@ -248,7 +250,6 @@ static int mca_btl_vader_component_register (void) mca_btl_vader.super.btl_get = (mca_btl_base_module_get_fn_t) mca_btl_vader_dummy_rdma; mca_btl_vader.super.btl_put = (mca_btl_base_module_get_fn_t) mca_btl_vader_dummy_rdma; } else { - mca_btl_vader.super.btl_flags = MCA_BTL_FLAGS_SEND_INPLACE; mca_btl_vader.super.btl_bandwidth = 10000; /* Mbs */ } @@ -296,10 +297,13 @@ static int mca_btl_vader_component_close(void) OBJ_DESTRUCT(&mca_btl_vader_component.pending_endpoints); OBJ_DESTRUCT(&mca_btl_vader_component.pending_fragments); - if (NULL != mca_btl_vader_component.my_segment) { + if (MCA_BTL_VADER_XPMEM == mca_btl_vader_component.single_copy_mechanism && + NULL != mca_btl_vader_component.my_segment) { munmap (mca_btl_vader_component.my_segment, mca_btl_vader_component.segment_size); } + mca_btl_vader_component.my_segment = NULL; + #if OPAL_BTL_VADER_HAVE_KNEM mca_btl_vader_knem_fini (); #endif @@ -327,7 +331,7 @@ static int mca_btl_base_vader_modex_send (void) } #endif - OPAL_MODEX_SEND(rc, PMIX_SYNC_REQD, PMIX_LOCAL, + OPAL_MODEX_SEND(rc, OPAL_PMIX_LOCAL, &mca_btl_vader_component.super.btl_version, &modex, modex_size); return rc; @@ -569,13 +573,14 @@ void mca_btl_vader_poll_handle_frag (mca_btl_vader_hdr_t *hdr, struct mca_btl_ba segments[0].seg_len = hdr->len; if (hdr->flags & MCA_BTL_VADER_FLAG_SINGLE_COPY) { - mca_mpool_base_registration_t *xpmem_reg; + mca_rcache_base_registration_t *xpmem_reg; xpmem_reg = vader_get_registation (endpoint, hdr->sc_iov.iov_base, hdr->sc_iov.iov_len, 0, &segments[1].seg_addr.pval); + assert (NULL != xpmem_reg); - segments[1].seg_len = hdr->sc_iov.iov_len; + segments[1].seg_len = hdr->sc_iov.iov_len; frag.des_segment_count = 2; /* recv upcall */ @@ -629,22 +634,21 @@ static void mca_btl_vader_progress_waiting (mca_btl_base_endpoint_t *ep) return; } - OPAL_THREAD_LOCK(&ep->lock); + OPAL_THREAD_LOCK(&ep->pending_frags_lock); OPAL_LIST_FOREACH_SAFE(frag, next, &ep->pending_frags, mca_btl_vader_frag_t) { - OPAL_THREAD_UNLOCK(&ep->lock); ret = vader_fifo_write_ep (frag->hdr, ep); if (!ret) { + OPAL_THREAD_UNLOCK(&ep->pending_frags_lock); return; } - OPAL_THREAD_LOCK(&ep->lock); (void) opal_list_remove_first (&ep->pending_frags); } ep->waiting = false; opal_list_remove_item (&mca_btl_vader_component.pending_endpoints, &ep->super); - OPAL_THREAD_UNLOCK(&ep->lock); + OPAL_THREAD_UNLOCK(&ep->pending_frags_lock); } /** diff --git a/opal/mca/btl/vader/btl_vader_endpoint.h b/opal/mca/btl/vader/btl_vader_endpoint.h index bc57d2d95a5..d3a39e08f24 100644 --- a/opal/mca/btl/vader/btl_vader_endpoint.h +++ b/opal/mca/btl/vader/btl_vader_endpoint.h @@ -11,7 +11,7 @@ * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2006-2007 Voltaire. All rights reserved. - * Copyright (c) 2012-2015 Los Alamos National Security, LLC. All rights + * Copyright (c) 2012-2016 Los Alamos National Security, LLC. All rights * reserved. * $COPYRIGHT$ * @@ -62,7 +62,7 @@ typedef struct mca_btl_base_endpoint_t { int32_t peer_smp_rank; /**< my peer's SMP process rank. Used for accessing * SMP specfic data structures. */ - uint32_t send_count; /**< number of fragments sent to this peer */ + volatile size_t send_count; /**< number of fragments sent to this peer */ char *segment_base; /**< start of the peer's segment (in the address space * of this process) */ @@ -74,7 +74,6 @@ typedef struct mca_btl_base_endpoint_t { union { #if OPAL_BTL_VADER_HAVE_XPMEM struct { - struct mca_rcache_base_module_t *rcache; xpmem_apid_t apid; /**< xpmem apid for remote peer */ } xpmem; #endif @@ -84,6 +83,7 @@ typedef struct mca_btl_base_endpoint_t { } other; } segment_data; + opal_mutex_t pending_frags_lock; /**< protect pending_frags */ opal_list_t pending_frags; /**< fragments pending fast box space */ bool waiting; /**< endpoint is on the component wait list */ } mca_btl_base_endpoint_t; @@ -94,15 +94,15 @@ OBJ_CLASS_DECLARATION(mca_btl_vader_endpoint_t); static inline void mca_btl_vader_endpoint_setup_fbox_recv (struct mca_btl_base_endpoint_t *endpoint, void *base) { - endpoint->fbox_in.buffer = base; endpoint->fbox_in.startp = (uint32_t *) base; endpoint->fbox_in.start = MCA_BTL_VADER_FBOX_ALIGNMENT; endpoint->fbox_in.seq = 0; + opal_atomic_wmb (); + endpoint->fbox_in.buffer = base; } static inline void mca_btl_vader_endpoint_setup_fbox_send (struct mca_btl_base_endpoint_t *endpoint, void *base) { - endpoint->fbox_out.buffer = base; endpoint->fbox_out.start = MCA_BTL_VADER_FBOX_ALIGNMENT; endpoint->fbox_out.end = MCA_BTL_VADER_FBOX_ALIGNMENT; endpoint->fbox_out.startp = (uint32_t *) base; @@ -111,6 +111,9 @@ static inline void mca_btl_vader_endpoint_setup_fbox_send (struct mca_btl_base_e /* zero out the first header in the fast box */ memset ((char *) base + MCA_BTL_VADER_FBOX_ALIGNMENT, 0, MCA_BTL_VADER_FBOX_ALIGNMENT); + + opal_atomic_wmb (); + endpoint->fbox_out.buffer = base; } #endif /* MCA_BTL_VADER_ENDPOINT_H */ diff --git a/opal/mca/btl/vader/btl_vader_fbox.h b/opal/mca/btl/vader/btl_vader_fbox.h index d646263054a..6f09cb6c513 100644 --- a/opal/mca/btl/vader/btl_vader_fbox.h +++ b/opal/mca/btl/vader/btl_vader_fbox.h @@ -117,6 +117,7 @@ static inline unsigned char *mca_btl_vader_reserve_fbox (mca_btl_base_endpoint_t if (OPAL_UNLIKELY(buffer_free < size)) { ep->fbox_out.end = (hbs << 31) | end; + opal_atomic_wmb (); OPAL_THREAD_UNLOCK(&ep->lock); return NULL; } @@ -141,6 +142,7 @@ static inline unsigned char *mca_btl_vader_reserve_fbox (mca_btl_base_endpoint_t /* align the buffer */ ep->fbox_out.end = ((uint32_t) hbs << 31) | end; + opal_atomic_wmb (); OPAL_THREAD_UNLOCK(&ep->lock); return dst + sizeof (mca_btl_vader_fbox_hdr_t); @@ -247,6 +249,7 @@ static inline bool mca_btl_vader_check_fboxes (void) /* save where we left off */ /* let the sender know where we stopped */ + opal_atomic_mb (); ep->fbox_in.start = ep->fbox_in.startp[0] = ((uint32_t) hbs << 31) | start; processed = true; } @@ -258,8 +261,7 @@ static inline bool mca_btl_vader_check_fboxes (void) static inline void mca_btl_vader_try_fbox_setup (mca_btl_base_endpoint_t *ep, mca_btl_vader_hdr_t *hdr) { - if (NULL == ep->fbox_out.buffer && mca_btl_vader_component.fbox_threshold == ++ep->send_count) { - + if (OPAL_UNLIKELY(NULL == ep->fbox_out.buffer && mca_btl_vader_component.fbox_threshold == OPAL_THREAD_ADD_SIZE_T (&ep->send_count, 1))) { /* protect access to mca_btl_vader_component.segment_offset */ OPAL_THREAD_LOCK(&mca_btl_vader_component.lock); diff --git a/opal/mca/btl/vader/btl_vader_fifo.h b/opal/mca/btl/vader/btl_vader_fifo.h index d63ffb49ad2..5f6488b44bf 100644 --- a/opal/mca/btl/vader/btl_vader_fifo.h +++ b/opal/mca/btl/vader/btl_vader_fifo.h @@ -13,7 +13,7 @@ * Copyright (c) 2006-2007 Voltaire. All rights reserved. * Copyright (c) 2009-2010 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2010-2014 Los Alamos National Security, LLC. - * All rights reserved. + * All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -150,7 +150,7 @@ static inline mca_btl_vader_hdr_t *vader_fifo_read (vader_fifo_t *fifo, struct m } opal_atomic_wmb (); - return hdr; + return hdr; } static inline void vader_fifo_init (vader_fifo_t *fifo) diff --git a/opal/mca/btl/vader/btl_vader_get.c b/opal/mca/btl/vader/btl_vader_get.c index ce8d7b89d84..f77a1df8216 100644 --- a/opal/mca/btl/vader/btl_vader_get.c +++ b/opal/mca/btl/vader/btl_vader_get.c @@ -38,7 +38,7 @@ int mca_btl_vader_get_xpmem (mca_btl_base_module_t *btl, mca_btl_base_endpoint_t mca_btl_base_registration_handle_t *remote_handle, size_t size, int flags, int order, mca_btl_base_rdma_completion_fn_t cbfunc, void *cbcontext, void *cbdata) { - mca_mpool_base_registration_t *reg; + mca_rcache_base_registration_t *reg; void *rem_ptr; /* silence warning about unused arguments */ diff --git a/opal/mca/btl/vader/btl_vader_knem.c b/opal/mca/btl/vader/btl_vader_knem.c index 157dc04ae20..96a7e775272 100644 --- a/opal/mca/btl/vader/btl_vader_knem.c +++ b/opal/mca/btl/vader/btl_vader_knem.c @@ -19,12 +19,11 @@ #include #include "opal/util/show_help.h" -#include "opal/mca/mpool/grdma/mpool_grdma.h" -OBJ_CLASS_INSTANCE(mca_btl_vader_registration_handle_t, mca_mpool_base_registration_t, NULL, NULL); +OBJ_CLASS_INSTANCE(mca_btl_vader_registration_handle_t, mca_rcache_base_registration_t, NULL, NULL); static int mca_btl_vader_knem_reg (void *reg_data, void *base, size_t size, - mca_mpool_base_registration_t *reg) + mca_rcache_base_registration_t *reg) { mca_btl_vader_registration_handle_t *knem_reg = (mca_btl_vader_registration_handle_t *) reg; struct knem_cmd_create_region knem_cr; @@ -35,8 +34,15 @@ static int mca_btl_vader_knem_reg (void *reg_data, void *base, size_t size, knem_cr.iovec_array = (uintptr_t) &knem_iov; knem_cr.iovec_nr = 1; - /* TODO -- set proper access flags when the protection is passed down */ - knem_cr.protection = PROT_READ | PROT_WRITE; + knem_cr.protection = 0; + + if (reg->access_flags & (MCA_RCACHE_ACCESS_LOCAL_WRITE | MCA_RCACHE_ACCESS_REMOTE_WRITE)) { + knem_cr.protection |= PROT_WRITE; + } + + if (reg->access_flags & MCA_RCACHE_ACCESS_REMOTE_READ) { + knem_cr.protection |= PROT_READ; + } /* Vader will explicitly destroy this cookie */ knem_cr.flags = 0; @@ -50,7 +56,7 @@ static int mca_btl_vader_knem_reg (void *reg_data, void *base, size_t size, return OPAL_SUCCESS; } -static int mca_btl_vader_knem_dereg (void *reg_data, mca_mpool_base_registration_t *reg) +static int mca_btl_vader_knem_dereg (void *reg_data, mca_rcache_base_registration_t *reg) { mca_btl_vader_registration_handle_t *knem_reg = (mca_btl_vader_registration_handle_t *) reg; @@ -65,11 +71,14 @@ mca_btl_vader_register_mem_knem (struct mca_btl_base_module_t* btl, struct mca_btl_base_endpoint_t *endpoint, void *base, size_t size, uint32_t flags) { + mca_btl_vader_t *vader_module = (mca_btl_vader_t *) btl; mca_btl_vader_registration_handle_t *reg = NULL; + int access_flags = flags & MCA_BTL_REG_FLAG_ACCESS_ANY; int rc; - rc = btl->btl_mpool->mpool_register (btl->btl_mpool, base, size, 0, - (mca_mpool_base_registration_t **) ®); + rc = vader_module->knem_rcache->rcache_register (vader_module->knem_rcache, base, size, 0, + access_flags, + (mca_rcache_base_registration_t **) ®); if (OPAL_UNLIKELY(OPAL_SUCCESS != rc)) { return NULL; } @@ -80,18 +89,19 @@ mca_btl_vader_register_mem_knem (struct mca_btl_base_module_t* btl, static int mca_btl_vader_deregister_mem_knem (struct mca_btl_base_module_t *btl, struct mca_btl_base_registration_handle_t *handle) { + mca_btl_vader_t *vader_module = (mca_btl_vader_t *) btl; mca_btl_vader_registration_handle_t *reg = (mca_btl_vader_registration_handle_t *)((intptr_t) handle - offsetof (mca_btl_vader_registration_handle_t, btl_handle)); - btl->btl_mpool->mpool_deregister (btl->btl_mpool, ®->base); + vader_module->knem_rcache->rcache_deregister (vader_module->knem_rcache, ®->base); return OPAL_SUCCESS; } int mca_btl_vader_knem_init (void) { - mca_mpool_base_resources_t mpool_resources = { - .pool_name = "vader", .reg_data = NULL, + mca_rcache_base_resources_t rcache_resources = { + .cache_name = "vader", .reg_data = NULL, .sizeof_reg = sizeof (mca_btl_vader_registration_handle_t), .register_mem = mca_btl_vader_knem_reg, .deregister_mem = mca_btl_vader_knem_dereg @@ -99,6 +109,7 @@ int mca_btl_vader_knem_init (void) struct knem_cmd_info knem_info; int rc; + signal (SIGSEGV, SIG_DFL); /* Open the knem device. Try to print a helpful message if we fail to open it. */ mca_btl_vader.knem_fd = open("/dev/knem", O_RDWR); @@ -122,6 +133,7 @@ int mca_btl_vader_knem_init (void) do { /* Check that the ABI if kernel module running is the same * as what we were compiled against. */ + memset (&knem_info, 0, sizeof (knem_info)); rc = ioctl(mca_btl_vader.knem_fd, KNEM_CMD_GET_INFO, &knem_info); if (rc < 0) { opal_show_help("help-btl-vader.txt", "knem get ABI fail", @@ -153,9 +165,9 @@ int mca_btl_vader_knem_init (void) mca_btl_vader.super.btl_deregister_mem = mca_btl_vader_deregister_mem_knem; mca_btl_vader.super.btl_registration_handle_size = sizeof (mca_btl_base_registration_handle_t); - mca_btl_vader.super.btl_mpool = mca_mpool_base_module_create ("grdma", NULL, - &mpool_resources); - if (NULL == mca_btl_vader.super.btl_mpool) { + mca_btl_vader.knem_rcache = mca_rcache_base_module_create ("grdma", NULL, + &rcache_resources); + if (NULL == mca_btl_vader.knem_rcache) { return OPAL_ERR_OUT_OF_RESOURCE; } @@ -174,9 +186,9 @@ int mca_btl_vader_knem_fini (void) mca_btl_vader.knem_fd = -1; } - if (mca_btl_vader.super.btl_mpool) { - (void) mca_mpool_base_module_destroy (mca_btl_vader.super.btl_mpool); - mca_btl_vader.super.btl_mpool = NULL; + if (mca_btl_vader.knem_rcache) { + (void) mca_rcache_base_module_destroy (mca_btl_vader.knem_rcache); + mca_btl_vader.knem_rcache = NULL; } return OPAL_SUCCESS; diff --git a/opal/mca/btl/vader/btl_vader_knem.h b/opal/mca/btl/vader/btl_vader_knem.h index 8d3b8402099..76fa6e10548 100644 --- a/opal/mca/btl/vader/btl_vader_knem.h +++ b/opal/mca/btl/vader/btl_vader_knem.h @@ -24,7 +24,7 @@ struct mca_btl_base_registration_handle_t { }; struct mca_btl_vader_registration_handle_t { - mca_mpool_base_registration_t base; + mca_rcache_base_registration_t base; mca_btl_base_registration_handle_t btl_handle; }; typedef struct mca_btl_vader_registration_handle_t mca_btl_vader_registration_handle_t; diff --git a/opal/mca/btl/vader/btl_vader_module.c b/opal/mca/btl/vader/btl_vader_module.c index deec2ed4f46..6a1c1f2c822 100644 --- a/opal/mca/btl/vader/btl_vader_module.c +++ b/opal/mca/btl/vader/btl_vader_module.c @@ -14,9 +14,11 @@ * Copyright (c) 2009 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2010-2015 Los Alamos National Security, LLC. All rights * reserved. - * Copyright (c) 2014 Intel, Inc. All rights reserved. + * Copyright (c) 2014-2015 Intel, Inc. All rights reserved. * Copyright (c) 2014-2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. + * Copyright (c) 2015 Mellanox Technologies. All rights reserved. + * * $COPYRIGHT$ * * Additional copyrights may follow @@ -43,8 +45,6 @@ static int vader_register_error_cb (struct mca_btl_base_module_t* btl, static int vader_finalize (struct mca_btl_base_module_t* btl); -static int vader_free (struct mca_btl_base_module_t* btl, mca_btl_base_descriptor_t* des); - static struct mca_btl_base_descriptor_t *vader_prepare_src ( struct mca_btl_base_module_t *btl, struct mca_btl_base_endpoint_t *endpoint, @@ -60,8 +60,6 @@ static int vader_add_procs(struct mca_btl_base_module_t* btl, struct mca_btl_base_endpoint_t** peers, struct opal_bitmap_t* reachability); -static int vader_ft_event (int state); - mca_btl_vader_t mca_btl_vader = { { &mca_btl_vader_component.super, @@ -69,13 +67,13 @@ mca_btl_vader_t mca_btl_vader = { .btl_del_procs = vader_del_procs, .btl_finalize = vader_finalize, .btl_alloc = mca_btl_vader_alloc, - .btl_free = vader_free, + .btl_free = mca_btl_vader_free, .btl_prepare_src = vader_prepare_src, .btl_send = mca_btl_vader_send, .btl_sendi = mca_btl_vader_sendi, .btl_dump = mca_btl_base_dump, .btl_register_error = vader_register_error_cb, - .btl_ft_event = vader_ft_event + .btl_ft_event = NULL } }; @@ -147,6 +145,12 @@ static int vader_btl_first_time_init(mca_btl_vader_t *vader_btl, int n) /* set flag indicating btl has been inited */ vader_btl->btl_inited = true; +#if OPAL_BTL_VADER_HAVE_XPMEM + if (MCA_BTL_VADER_XPMEM == mca_btl_vader_component.single_copy_mechanism) { + mca_btl_vader_component.vma_module = mca_rcache_base_vma_module_alloc (); + } +#endif + return OPAL_SUCCESS; } @@ -162,7 +166,8 @@ static int init_vader_endpoint (struct mca_btl_base_endpoint_t *ep, struct opal_ ep->peer_smp_rank = remote_rank; if (remote_rank != MCA_BTL_VADER_LOCAL_RANK) { - OPAL_MODEX_RECV(rc, &component->super.btl_version, proc, (void **) &modex, &msg_size); + OPAL_MODEX_RECV(rc, &component->super.btl_version, + &proc->proc_name, (void **) &modex, &msg_size); if (OPAL_SUCCESS != rc) { return rc; } @@ -172,9 +177,8 @@ static int init_vader_endpoint (struct mca_btl_base_endpoint_t *ep, struct opal_ if (MCA_BTL_VADER_XPMEM == mca_btl_vader_component.single_copy_mechanism) { /* always use xpmem if it is available */ ep->segment_data.xpmem.apid = xpmem_get (modex->xpmem.seg_id, XPMEM_RDWR, XPMEM_PERMIT_MODE, (void *) 0666); - ep->segment_data.xpmem.rcache = mca_rcache_base_module_create("vma"); (void) vader_get_registation (ep, modex->xpmem.segment_base, mca_btl_vader_component.segment_size, - MCA_MPOOL_FLAGS_PERSIST, (void **) &ep->segment_base); + MCA_RCACHE_FLAGS_PERSIST, (void **) &ep->segment_base); } else { #endif /* store a copy of the segment information for detach */ @@ -355,6 +359,12 @@ static int vader_finalize(struct mca_btl_base_module_t *btl) opal_shmem_segment_detach (&mca_btl_vader_component.seg_ds); } +#if OPAL_BTL_VADER_HAVE_XPMEM + if (NULL != mca_btl_vader_component.vma_module) { + OBJ_RELEASE(mca_btl_vader_component.vma_module); + } +#endif + return OPAL_SUCCESS; } @@ -410,7 +420,7 @@ mca_btl_base_descriptor_t *mca_btl_vader_alloc(struct mca_btl_base_module_t *btl * @param btl (IN) BTL module * @param segment (IN) Allocated segment. */ -static int vader_free (struct mca_btl_base_module_t *btl, mca_btl_base_descriptor_t *des) +int mca_btl_vader_free (struct mca_btl_base_module_t *btl, mca_btl_base_descriptor_t *des) { MCA_BTL_VADER_FRAG_RETURN((mca_btl_vader_frag_t *) des); @@ -435,6 +445,7 @@ static struct mca_btl_base_descriptor_t *vader_prepare_src (struct mca_btl_base_ int rc; opal_convertor_get_current_pointer (convertor, &data_ptr); + assert (NULL != data_ptr); /* in place send fragment */ if (OPAL_UNLIKELY(opal_convertor_need_buffers(convertor))) { @@ -522,52 +533,24 @@ static struct mca_btl_base_descriptor_t *vader_prepare_src (struct mca_btl_base_ return &frag->base; } -/** - * Fault Tolerance Event Notification Function - * @param state Checkpoint Stae - * @return OPAL_SUCCESS or failure status - */ -static int vader_ft_event (int state) -{ - return OPAL_SUCCESS; -} - static void mca_btl_vader_endpoint_constructor (mca_btl_vader_endpoint_t *ep) { OBJ_CONSTRUCT(&ep->pending_frags, opal_list_t); + OBJ_CONSTRUCT(&ep->pending_frags_lock, opal_mutex_t); ep->fifo = NULL; } +#if OPAL_BTL_VADER_HAVE_XPMEM +#endif + static void mca_btl_vader_endpoint_destructor (mca_btl_vader_endpoint_t *ep) { OBJ_DESTRUCT(&ep->pending_frags); + OBJ_DESTRUCT(&ep->pending_frags_lock); #if OPAL_BTL_VADER_HAVE_XPMEM if (MCA_BTL_VADER_XPMEM == mca_btl_vader_component.single_copy_mechanism) { - if (ep->segment_data.xpmem.rcache) { - /* clean out the registration cache */ - const int nregs = 100; - mca_mpool_base_registration_t *regs[nregs]; - int reg_cnt; - - do { - reg_cnt = ep->segment_data.xpmem.rcache->rcache_find_all(ep->segment_data.xpmem.rcache, 0, (size_t)-1, - regs, nregs); - - for (int i = 0 ; i < reg_cnt ; ++i) { - /* otherwise dereg will fail on assert */ - regs[i]->ref_count = 0; - OBJ_RELEASE(regs[i]); - } - } while (reg_cnt == nregs); - - ep->segment_data.xpmem.rcache = NULL; - } - - if (ep->segment_base) { - xpmem_release (ep->segment_data.xpmem.apid); - ep->segment_data.xpmem.apid = 0; - } + mca_btl_vader_xpmem_cleanup_endpoint (ep); } else #endif if (ep->segment_data.other.seg_ds) { diff --git a/opal/mca/btl/vader/btl_vader_put.c b/opal/mca/btl/vader/btl_vader_put.c index 3107f420b33..c3d21124126 100644 --- a/opal/mca/btl/vader/btl_vader_put.c +++ b/opal/mca/btl/vader/btl_vader_put.c @@ -40,7 +40,7 @@ int mca_btl_vader_put_xpmem (mca_btl_base_module_t *btl, mca_btl_base_endpoint_t mca_btl_base_registration_handle_t *remote_handle, size_t size, int flags, int order, mca_btl_base_rdma_completion_fn_t cbfunc, void *cbcontext, void *cbdata) { - mca_mpool_base_registration_t *reg; + mca_rcache_base_registration_t *reg; void *rem_ptr; reg = vader_get_registation (endpoint, (void *)(intptr_t) remote_address, size, 0, &rem_ptr); diff --git a/opal/mca/btl/vader/btl_vader_send.c b/opal/mca/btl/vader/btl_vader_send.c index 59a10c366ad..08bfa5a6238 100644 --- a/opal/mca/btl/vader/btl_vader_send.c +++ b/opal/mca/btl/vader/btl_vader_send.c @@ -57,7 +57,7 @@ int mca_btl_vader_send (struct mca_btl_base_module_t *btl, /* post the relative address of the descriptor into the peer's fifo */ if (opal_list_get_size (&endpoint->pending_frags) || !vader_fifo_write_ep (frag->hdr, endpoint)) { frag->base.des_flags |= MCA_BTL_DES_SEND_ALWAYS_CALLBACK; - OPAL_THREAD_LOCK(&endpoint->lock); + OPAL_THREAD_LOCK(&endpoint->pending_frags_lock); opal_list_append (&endpoint->pending_frags, (opal_list_item_t *) frag); if (!endpoint->waiting) { OPAL_THREAD_LOCK(&mca_btl_vader_component.lock); @@ -65,7 +65,7 @@ int mca_btl_vader_send (struct mca_btl_base_module_t *btl, OPAL_THREAD_UNLOCK(&mca_btl_vader_component.lock); endpoint->waiting = true; } - OPAL_THREAD_UNLOCK(&endpoint->lock); + OPAL_THREAD_UNLOCK(&endpoint->pending_frags_lock); return OPAL_SUCCESS; } diff --git a/opal/mca/btl/vader/btl_vader_sendi.c b/opal/mca/btl/vader/btl_vader_sendi.c index be9768d53c0..4b48560fb20 100644 --- a/opal/mca/btl/vader/btl_vader_sendi.c +++ b/opal/mca/btl/vader/btl_vader_sendi.c @@ -14,6 +14,8 @@ * Copyright (c) 2009 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2010-2015 Los Alamos National Security, LLC. All rights * reserved. + * Copyright (c) 2015 Mellanox Technologies. All rights reserved. + * * $COPYRIGHT$ * * Additional copyrights may follow @@ -105,6 +107,8 @@ int mca_btl_vader_sendi (struct mca_btl_base_module_t *btl, if (!vader_fifo_write_ep (frag->hdr, endpoint)) { if (descriptor) { *descriptor = &frag->base; + } else { + mca_btl_vader_free (btl, &frag->base); } return OPAL_ERR_OUT_OF_RESOURCE; } diff --git a/opal/mca/btl/vader/btl_vader_xpmem.c b/opal/mca/btl/vader/btl_vader_xpmem.c index 8ad57bb44a6..09203c92023 100644 --- a/opal/mca/btl/vader/btl_vader_xpmem.c +++ b/opal/mca/btl/vader/btl_vader_xpmem.c @@ -32,118 +32,153 @@ int mca_btl_vader_xpmem_init (void) return OPAL_SUCCESS; } +struct vader_check_reg_ctx_t { + mca_rcache_base_vma_module_t *vma_module; + mca_btl_base_endpoint_t *ep; + mca_rcache_base_registration_t **reg; + uintptr_t base; + uintptr_t bound; +}; +typedef struct vader_check_reg_ctx_t vader_check_reg_ctx_t; + +static int vader_check_reg (mca_rcache_base_registration_t *reg, void *ctx) +{ + vader_check_reg_ctx_t *vader_ctx = (vader_check_reg_ctx_t *) ctx; + + if ((intptr_t) reg->alloc_base != vader_ctx->ep->peer_smp_rank || + (reg->flags & MCA_RCACHE_FLAGS_PERSIST)) { + /* ignore this registration */ + return OPAL_SUCCESS; + } + + vader_ctx->reg[0] = reg; + + if (vader_ctx->bound <= (uintptr_t) reg->bound && vader_ctx->base >= (uintptr_t) reg->base) { + (void)opal_atomic_add (®->ref_count, 1); + return 1; + } + + /* remove this pointer from the rcache and decrement its reference count + (so it is detached later) */ + mca_rcache_base_vma_delete (vader_ctx->vma_module, reg); + + return 2; +} + /* look up the remote pointer in the peer rcache and attach if * necessary */ -mca_mpool_base_registration_t *vader_get_registation (struct mca_btl_base_endpoint_t *ep, void *rem_ptr, - size_t size, int flags, void **local_ptr) +mca_rcache_base_registration_t *vader_get_registation (struct mca_btl_base_endpoint_t *ep, void *rem_ptr, + size_t size, int flags, void **local_ptr) { - struct mca_rcache_base_module_t *rcache = ep->segment_data.xpmem.rcache; - mca_mpool_base_registration_t *regs[10], *reg = NULL; + mca_rcache_base_vma_module_t *vma_module = mca_btl_vader_component.vma_module; + uint64_t attach_align = 1 << mca_btl_vader_component.log_attach_align; + mca_rcache_base_registration_t *reg = NULL; + vader_check_reg_ctx_t check_ctx = {.ep = ep, .reg = ®, .vma_module = vma_module}; xpmem_addr_t xpmem_addr; uintptr_t base, bound; int rc, i; - /* protect rcache access */ - OPAL_THREAD_LOCK(&ep->lock); - - /* use btl/self for self communication */ - assert (ep->peer_smp_rank != MCA_BTL_VADER_LOCAL_RANK); - - base = (uintptr_t) down_align_addr(rem_ptr, mca_btl_vader_component.log_attach_align); - bound = (uintptr_t) up_align_addr((void *)((uintptr_t) rem_ptr + size - 1), - mca_btl_vader_component.log_attach_align) + 1; + base = OPAL_DOWN_ALIGN((uintptr_t) rem_ptr, attach_align, uintptr_t); + bound = OPAL_ALIGN((uintptr_t) rem_ptr + size - 1, attach_align, uintptr_t) + 1; if (OPAL_UNLIKELY(bound > VADER_MAX_ADDRESS)) { bound = VADER_MAX_ADDRESS; } - /* several segments may match the base pointer */ - rc = rcache->rcache_find_all (rcache, (void *) base, bound - base, regs, 10); - for (i = 0 ; i < rc ; ++i) { - if (bound <= (uintptr_t)regs[i]->bound && base >= (uintptr_t)regs[i]->base) { - (void)opal_atomic_add (®s[i]->ref_count, 1); - reg = regs[i]; - goto reg_found; - } - - if (regs[i]->flags & MCA_MPOOL_FLAGS_PERSIST) { - continue; - } - - /* remove this pointer from the rcache and decrement its reference count - (so it is detached later) */ - rc = rcache->rcache_delete (rcache, regs[i]); - if (OPAL_UNLIKELY(0 != rc)) { - /* someone beat us to it? */ - break; - } + check_ctx.base = base; + check_ctx.bound = bound; + /* several segments may match the base pointer */ + rc = mca_rcache_base_vma_iterate (vma_module, (void *) base, bound - base, vader_check_reg, &check_ctx); + if (2 == rc) { /* start the new segment from the lower of the two bases */ - base = (uintptr_t) regs[i]->base < base ? (uintptr_t) regs[i]->base : base; - - (void)opal_atomic_add (®s[i]->ref_count, -1); + base = (uintptr_t) reg->base < base ? (uintptr_t) reg->base : base; - if (OPAL_LIKELY(0 == regs[i]->ref_count)) { + if (OPAL_LIKELY(0 == opal_atomic_add_32 (®->ref_count, -1))) { /* this pointer is not in use */ - (void) xpmem_detach (regs[i]->alloc_base); - OBJ_RELEASE(regs[i]); + (void) xpmem_detach (reg->rcache_context); + OBJ_RELEASE(reg); } - break; + reg = NULL; } - reg = OBJ_NEW(mca_mpool_base_registration_t); - if (OPAL_LIKELY(NULL != reg)) { - /* stick around for awhile */ - reg->ref_count = 2; - reg->base = (unsigned char *) base; - reg->bound = (unsigned char *) bound; - reg->flags = flags; + if (NULL == reg) { + reg = OBJ_NEW(mca_rcache_base_registration_t); + if (OPAL_LIKELY(NULL != reg)) { + /* stick around for awhile */ + reg->ref_count = 2; + reg->base = (unsigned char *) base; + reg->bound = (unsigned char *) bound; + reg->flags = flags; + reg->alloc_base = (void *) (intptr_t) ep->peer_smp_rank; #if defined(HAVE_SN_XPMEM_H) - xpmem_addr.id = ep->segment_data.xpmem.apid; + xpmem_addr.id = ep->segment_data.xpmem.apid; #else - xpmem_addr.apid = ep->segment_data.xpmem.apid; + xpmem_addr.apid = ep->segment_data.xpmem.apid; #endif - xpmem_addr.offset = base; + xpmem_addr.offset = base; - reg->alloc_base = xpmem_attach (xpmem_addr, bound - base, NULL); - if (OPAL_UNLIKELY((void *)-1 == reg->alloc_base)) { - OPAL_THREAD_UNLOCK(&ep->lock); - OBJ_RELEASE(reg); - return NULL; - } + reg->rcache_context = xpmem_attach (xpmem_addr, bound - base, NULL); + if (OPAL_UNLIKELY((void *)-1 == reg->rcache_context)) { + OBJ_RELEASE(reg); + return NULL; + } - opal_memchecker_base_mem_defined (reg->alloc_base, bound - base); + opal_memchecker_base_mem_defined (reg->rcache_context, bound - base); - rcache->rcache_insert (rcache, reg, 0); + mca_rcache_base_vma_insert (vma_module, reg, 0); + } } -reg_found: opal_atomic_wmb (); - *local_ptr = (void *) ((uintptr_t) reg->alloc_base + + *local_ptr = (void *) ((uintptr_t) reg->rcache_context + (ptrdiff_t)((uintptr_t) rem_ptr - (uintptr_t) reg->base)); - OPAL_THREAD_UNLOCK(&ep->lock); - return reg; } -void vader_return_registration (mca_mpool_base_registration_t *reg, struct mca_btl_base_endpoint_t *ep) +void vader_return_registration (mca_rcache_base_registration_t *reg, struct mca_btl_base_endpoint_t *ep) { - struct mca_rcache_base_module_t *rcache = ep->segment_data.xpmem.rcache; + mca_rcache_base_vma_module_t *vma_module = mca_btl_vader_component.vma_module; int32_t ref_count; ref_count = opal_atomic_add_32 (®->ref_count, -1); - if (OPAL_UNLIKELY(0 == ref_count && !(reg->flags & MCA_MPOOL_FLAGS_PERSIST))) { + if (OPAL_UNLIKELY(0 == ref_count && !(reg->flags & MCA_RCACHE_FLAGS_PERSIST))) { /* protect rcache access */ - OPAL_THREAD_LOCK(&ep->lock); - rcache->rcache_delete (rcache, reg); - OPAL_THREAD_UNLOCK(&ep->lock); + mca_rcache_base_vma_delete (vma_module, reg); - opal_memchecker_base_mem_noaccess (reg->alloc_base, (uintptr_t)(reg->bound - reg->base)); - (void)xpmem_detach (reg->alloc_base); + opal_memchecker_base_mem_noaccess (reg->rcache_context, (uintptr_t)(reg->bound - reg->base)); + (void)xpmem_detach (reg->rcache_context); OBJ_RELEASE (reg); } } +static int mca_btl_vader_endpoint_xpmem_rcache_cleanup (mca_rcache_base_registration_t *reg, void *ctx) +{ + mca_rcache_base_vma_module_t *vma_module = mca_btl_vader_component.vma_module; + mca_btl_vader_endpoint_t *ep = (mca_btl_vader_endpoint_t *) ctx; + if ((intptr_t) reg->alloc_base == ep->peer_smp_rank) { + /* otherwise dereg will fail on assert */ + reg->ref_count = 0; + (void) mca_rcache_base_vma_delete (vma_module, reg); + OBJ_RELEASE(reg); + } + + return OPAL_SUCCESS; +} + +void mca_btl_vader_xpmem_cleanup_endpoint (struct mca_btl_base_endpoint_t *ep) +{ + /* clean out the registration cache */ + (void) mca_rcache_base_vma_iterate (mca_btl_vader_component.vma_module, + NULL, (size_t) -1, + mca_btl_vader_endpoint_xpmem_rcache_cleanup, + (void *) ep); + if (ep->segment_base) { + xpmem_release (ep->segment_data.xpmem.apid); + ep->segment_data.xpmem.apid = 0; + } +} + #endif /* OPAL_BTL_VADER_HAVE_XPMEM */ diff --git a/opal/mca/btl/vader/btl_vader_xpmem.h b/opal/mca/btl/vader/btl_vader_xpmem.h index e040e26f309..fa47773697c 100644 --- a/opal/mca/btl/vader/btl_vader_xpmem.h +++ b/opal/mca/btl/vader/btl_vader_xpmem.h @@ -1,7 +1,8 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ /* * Copyright (c) 2013-2014 Los Alamos National Security, LLC. All rights - * reserved. + * reserved. + * Copyright (c) 2016 ARM, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -25,23 +26,33 @@ typedef int64_t xpmem_apid_t; #endif +#include +#include + /* look up the remote pointer in the peer rcache and attach if * necessary */ /* largest address we can attach to using xpmem */ +#if defined(__x86_64__) #define VADER_MAX_ADDRESS ((uintptr_t)0x7ffffffff000ul) +#else +#define VADER_MAX_ADDRESS XPMEM_MAXADDR_SIZE +#endif + +struct mca_btl_base_endpoint_t; int mca_btl_vader_xpmem_init (void); -mca_mpool_base_registration_t *vader_get_registation (struct mca_btl_base_endpoint_t *endpoint, void *rem_ptr, - size_t size, int flags, void **local_ptr); +mca_rcache_base_registration_t *vader_get_registation (struct mca_btl_base_endpoint_t *endpoint, void *rem_ptr, + size_t size, int flags, void **local_ptr); -void vader_return_registration (mca_mpool_base_registration_t *reg, struct mca_btl_base_endpoint_t *endpoint); +void vader_return_registration (mca_rcache_base_registration_t *reg, struct mca_btl_base_endpoint_t *endpoint); +void mca_btl_vader_xpmem_cleanup_endpoint (struct mca_btl_base_endpoint_t *ep); #else -static inline mca_mpool_base_registration_t *vader_get_registation (struct mca_btl_base_endpoint_t *endpoint, void *rem_ptr, - size_t size, int flags, void **local_ptr) +static inline mca_rcache_base_registration_t *vader_get_registation (struct mca_btl_base_endpoint_t *endpoint, void *rem_ptr, + size_t size, int flags, void **local_ptr) { (void) endpoint; (void) rem_ptr; @@ -51,7 +62,7 @@ static inline mca_mpool_base_registration_t *vader_get_registation (struct mca_b return NULL; } -static inline void vader_return_registration (mca_mpool_base_registration_t *reg, struct mca_btl_base_endpoint_t *endpoint) +static inline void vader_return_registration (mca_rcache_base_registration_t *reg, struct mca_btl_base_endpoint_t *endpoint) { (void) reg; (void) endpoint; diff --git a/opal/mca/btl/vader/configure.m4 b/opal/mca/btl/vader/configure.m4 index bd6fa606632..328d8f3094a 100644 --- a/opal/mca/btl/vader/configure.m4 +++ b/opal/mca/btl/vader/configure.m4 @@ -3,9 +3,11 @@ # Copyright (c) 2009 The University of Tennessee and The University # of Tennessee Research Foundation. All rights # reserved. -# Copyright (c) 2009-2010 Cisco Systems, Inc. All rights reserved. +# Copyright (c) 2009-2016 Cisco Systems, Inc. All rights reserved. # Copyright (c) 2011-2014 Los Alamos National Security, LLC. All rights # reserved. +# Copyright (c) 2015 Research Organization for Information Science +# and Technology (RIST). All rights reserved. # $COPYRIGHT$ # # Additional copyrights may follow @@ -13,47 +15,6 @@ # $HEADER$ # -# OPAL_CHECK_XPMEM(prefix, [action-if-found], [action-if-not-found]) -# -------------------------------------------------------- -# check if XPMEM support can be found. sets prefix_{CPPFLAGS, -# LDFLAGS, LIBS} as needed and runs action-if-found if there is -# support, otherwise executes action-if-not-found -AC_DEFUN([OPAL_CHECK_XPMEM], [ - OPAL_VAR_SCOPE_PUSH([opal_check_xpmem_happy]) - AC_ARG_WITH([xpmem], - [AC_HELP_STRING([--with-xpmem(=DIR)], - [Build with XPMEM kernel module support, searching for headers in DIR])]) - OPAL_CHECK_WITHDIR([xpmem], [$with_xpmem], [include/xpmem.h]) - - AC_ARG_WITH([xpmem-libdir], - [AC_HELP_STRING([--with-xpmem-libdir=DIR], - [Search for XPMEM library in DIR])]) - OPAL_CHECK_WITHDIR([xpmem-libdir], [$with_xpmem_libdir], [libxpmem.*]) - - opal_check_xpmem_happy="no" - - if test ! "$with_xpmem" = "no" ; then - if test ! -z "$with_xpmem" -a "$with_xpmem" != "yes" ; then - opal_check_xpmem_dir="$with_xpmem" - fi - - if test ! -z "$with_xpmem_libdir" -a "$with_xpmem_libdir" != "yes" ; then - opal_check_xpmem_libdir="$with_xpmem_libdir" - fi - - OPAL_CHECK_PACKAGE([$1],[xpmem.h],[xpmem],[xpmem_make],[], - [$opal_check_xpmem_dir],[$opal_check_xpmem_libdir], [opal_check_xpmem_happy="yes"], []) - - if test "$opal_check_xpmem_happy" = "no" -a -n "$with_xpmem" -a "$with_xpmem" != "yes" ; then - AC_MSG_ERROR([XPMEM support requested but not found. Aborting]) - fi - fi - - AS_IF([test "$opal_check_xpmem_happy" = "yes"], [$2], [$3]) - - OPAL_VAR_SCOPE_POP -])dnl - # MCA_btl_vader_CONFIG([action-if-can-compile], # [action-if-cant-compile]) # ------------------------------------------------ @@ -64,11 +25,7 @@ AC_DEFUN([MCA_opal_btl_vader_CONFIG],[ # Check for single-copy APIs - OPAL_CHECK_CRAY_XPMEM([btl_vader], [btl_vader_xpmem_happy=1], [btl_vader_xpmem_happy=0]) - - AS_IF([test "$btl_vader_xpmem_happy" -eq 0], - [OPAL_CHECK_XPMEM([btl_vader], [btl_vader_xpmem_happy=1], [btl_vader_xpmem_happy=0])],[]) - + OPAL_CHECK_XPMEM([btl_vader], [btl_vader_xpmem_happy=1], [btl_vader_xpmem_happy=0]) OPAL_CHECK_KNEM([btl_vader], [btl_vader_knem_happy=1],[btl_vader_knem_happy=0]) OPAL_CHECK_CMA([btl_vader], [AC_CHECK_HEADER([sys/prctl.h]) btl_vader_cma_happy=1], [btl_vader_cma_happy=0]) @@ -86,7 +43,9 @@ AC_DEFUN([MCA_opal_btl_vader_CONFIG],[ # always happy [$1] - # substitute in the things needed to build with XPMEM support + OPAL_SUMMARY_ADD([[Transports]],[[Shared memory/copy in+copy out]],[$1],[yes]) + +# substitute in the things needed to build with XPMEM support AC_SUBST([btl_vader_CFLAGS]) AC_SUBST([btl_vader_CPPFLAGS]) AC_SUBST([btl_vader_LDFLAGS]) diff --git a/opal/mca/common/Makefile.am b/opal/mca/common/Makefile.am index 33bbb5f2a33..4567c654307 100644 --- a/opal/mca/common/Makefile.am +++ b/opal/mca/common/Makefile.am @@ -5,15 +5,15 @@ # Copyright (c) 2004-2005 The University of Tennessee and The University # of Tennessee Research Foundation. All rights # reserved. -# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, # University of Stuttgart. All rights reserved. # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. # Copyright (c) 2010 Cisco Systems, Inc. All rights reserved. # $COPYRIGHT$ -# +# # Additional copyrights may follow -# +# # $HEADER$ # diff --git a/opal/mca/common/cuda/Makefile.am b/opal/mca/common/cuda/Makefile.am index 77ed48d1da1..38b0434c1d5 100644 --- a/opal/mca/common/cuda/Makefile.am +++ b/opal/mca/common/cuda/Makefile.am @@ -5,16 +5,16 @@ # Copyright (c) 2004-2013 The University of Tennessee and The University # of Tennessee Research Foundation. All rights # reserved. -# Copyright (c) 2004-2009 High Performance Computing Center Stuttgart, +# Copyright (c) 2004-2009 High Performance Computing Center Stuttgart, # University of Stuttgart. All rights reserved. # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. # Copyright (c) 2011-2013 NVIDIA Corporation. All rights reserved. -# Copyright (c) 2014 Cisco Systems, Inc. All rights reserved. +# Copyright (c) 2014-2015 Cisco Systems, Inc. All rights reserved. # $COPYRIGHT$ -# +# # Additional copyrights may follow -# +# # $HEADER$ # @@ -82,7 +82,7 @@ endif lib@OPAL_LIB_PREFIX@mca_common_cuda_la_SOURCES = $(headers) $(sources) lib@OPAL_LIB_PREFIX@mca_common_cuda_la_LDFLAGS = \ - -version-info $(libmca_common_cuda_so_version) + -version-info $(libmca_opal_common_cuda_so_version) lib@OPAL_LIB_PREFIX@mca_common_cuda_la_LIBADD = $(common_cuda_LIBS) lib@OPAL_LIB_PREFIX@mca_common_cuda_noinst_la_SOURCES = $(headers) $(sources) diff --git a/opal/mca/common/cuda/common_cuda.c b/opal/mca/common/cuda/common_cuda.c index 966d9bdf0b8..2ce3b20539f 100644 --- a/opal/mca/common/cuda/common_cuda.c +++ b/opal/mca/common/cuda/common_cuda.c @@ -10,7 +10,9 @@ * Copyright (c) 2004-2006 The Regents of the University of California. * All rights reserved. * Copyright (c) 2011-2015 NVIDIA Corporation. All rights reserved. - * Copyright (c) 2015 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -20,9 +22,7 @@ /** * This file contains various support functions for doing CUDA - * operations. Some of the features are only available in CUDA 4.1 - * and later, so some code is conditionalized around the - * OPAL_CUDA_SUPPORT_41 macro. + * operations. */ #include "opal_config.h" @@ -36,8 +36,9 @@ #include "opal/util/output.h" #include "opal/util/show_help.h" #include "opal/util/proc.h" +#include "opal/util/argv.h" -#include "opal/mca/mpool/base/base.h" +#include "opal/mca/rcache/base/base.h" #include "opal/runtime/opal_params.h" #include "opal/mca/timer/base/base.h" #include "opal/mca/dl/base/base.h" @@ -88,13 +89,11 @@ struct cudaFunctionTable { int (*cuEventDestroy)(CUevent); int (*cuStreamWaitEvent)(CUstream, CUevent, unsigned int); int (*cuMemGetAddressRange)(CUdeviceptr*, size_t*, CUdeviceptr); -#if OPAL_CUDA_SUPPORT_41 int (*cuIpcGetEventHandle)(CUipcEventHandle*, CUevent); int (*cuIpcOpenEventHandle)(CUevent*, CUipcEventHandle); int (*cuIpcOpenMemHandle)(CUdeviceptr*, CUipcMemHandle, unsigned int); int (*cuIpcCloseMemHandle)(CUdeviceptr); int (*cuIpcGetMemHandle)(CUipcMemHandle*, CUdeviceptr); -#endif /* OPAL_CUDA_SUPPORT_41 */ int (*cuCtxGetDevice)(CUdevice *); int (*cuDeviceCanAccessPeer)(int *, CUdevice, CUdevice); int (*cuDeviceGet)(CUdevice *, int); @@ -108,13 +107,14 @@ struct cudaFunctionTable { #if OPAL_CUDA_GET_ATTRIBUTES int (*cuPointerGetAttributes)(unsigned int, CUpointer_attribute *, void **, CUdeviceptr); #endif /* OPAL_CUDA_GET_ATTRIBUTES */ -} cudaFunctionTable; +}; typedef struct cudaFunctionTable cudaFunctionTable_t; -cudaFunctionTable_t cuFunc; +static cudaFunctionTable_t cuFunc; static int stage_one_init_ref_count = 0; static bool stage_three_init_complete = false; static bool common_cuda_initialized = false; +static bool common_cuda_mca_parames_registered = false; static int mca_common_cuda_verbose; static int mca_common_cuda_output = 0; bool mca_common_cuda_enabled = false; @@ -125,6 +125,7 @@ static CUstream ipcStream = NULL; static CUstream dtohStream = NULL; static CUstream htodStream = NULL; static CUstream memcpyStream = NULL; +static int mca_common_cuda_gpu_mem_check_workaround = (CUDA_VERSION > 7000) ? 0 : 1; static opal_mutex_t common_cuda_init_lock; static opal_mutex_t common_cuda_htod_lock; static opal_mutex_t common_cuda_dtoh_lock; @@ -154,7 +155,6 @@ OBJ_CLASS_INSTANCE(common_cuda_mem_regs_t, NULL, NULL); -#if OPAL_CUDA_SUPPORT_41 static int mca_common_cuda_async = 1; static int mca_common_cuda_cumemcpy_async; #if OPAL_ENABLE_DEBUG @@ -174,13 +174,13 @@ struct mca_btl_base_descriptor_t **cuda_event_dtoh_frag_array = NULL; struct mca_btl_base_descriptor_t **cuda_event_htod_frag_array = NULL; /* First free/available location in cuda_event_status_array */ -int cuda_event_ipc_first_avail, cuda_event_dtoh_first_avail, cuda_event_htod_first_avail; +static int cuda_event_ipc_first_avail, cuda_event_dtoh_first_avail, cuda_event_htod_first_avail; /* First currently-being used location in the cuda_event_status_array */ -int cuda_event_ipc_first_used, cuda_event_dtoh_first_used, cuda_event_htod_first_used; +static int cuda_event_ipc_first_used, cuda_event_dtoh_first_used, cuda_event_htod_first_used; /* Number of status items currently in use */ -int cuda_event_ipc_num_used, cuda_event_dtoh_num_used, cuda_event_htod_num_used; +static int cuda_event_ipc_num_used, cuda_event_dtoh_num_used, cuda_event_htod_num_used; /* Size of array holding events */ int cuda_event_max = 400; @@ -221,44 +221,14 @@ static void cuda_dump_memhandle(int, void *, char *) __opal_attribute_unused__ ; #define CUDA_DUMP_EVTHANDLE(a) #endif /* OPAL_ENABLE_DEBUG */ -#endif /* OPAL_CUDA_SUPPORT_41 */ - - -/** - * This is the first stage of initialization. This function is - * called explicitly by any BTLs that can support CUDA-aware. - * It is called during the component open phase of initialization. - * This function will register some mca variables and then open - * and load the symbols needed from the CUDA driver library. Look for - * the SONAME of the library which is libcuda.so.1. In most cases, - * this will result in the library found. However, there are some - * setups that require the extra steps for searching. Any failure - * will result in this initialization failing and status will be set - * showing that. - */ -int mca_common_cuda_stage_one_init(void) +/* This is a seperate function so we can see these variables with ompi_info and + * also set them with the tools interface */ +void mca_common_cuda_register_mca_variables(void) { - int retval, i, j; - char *cudalibs[] = {"libcuda.so.1", "libcuda.dylib", NULL}; - char *searchpaths[] = {"", "/usr/lib64", NULL}; - char **errmsgs = NULL; - char *errmsg = NULL; - int errsize; - bool stage_one_init_passed = false; - stage_one_init_ref_count++; - if (stage_one_init_ref_count > 1) { - opal_output_verbose(10, mca_common_cuda_output, - "CUDA: stage_one_init_ref_count is now %d, no need to init", - stage_one_init_ref_count); - return OPAL_SUCCESS; + if (false == common_cuda_mca_parames_registered) { + common_cuda_mca_parames_registered = true; } - - OBJ_CONSTRUCT(&common_cuda_init_lock, opal_mutex_t); - OBJ_CONSTRUCT(&common_cuda_htod_lock, opal_mutex_t); - OBJ_CONSTRUCT(&common_cuda_dtoh_lock, opal_mutex_t); - OBJ_CONSTRUCT(&common_cuda_ipc_lock, opal_mutex_t); - /* Set different levels of verbosity in the cuda related code. */ mca_common_cuda_verbose = 0; (void) mca_base_var_register("ompi", "mpi", "common_cuda", "verbose", @@ -268,7 +238,7 @@ int mca_common_cuda_stage_one_init(void) MCA_BASE_VAR_SCOPE_READONLY, &mca_common_cuda_verbose); - /* Control whether system buffers get CUDA pinned or not. Allows for + /* Control whether system buffers get CUDA pinned or not. Allows for * performance analysis. */ mca_common_cuda_register_memory = true; (void) mca_base_var_register("ompi", "mpi", "common_cuda", "register_memory", @@ -289,7 +259,6 @@ int mca_common_cuda_stage_one_init(void) MCA_BASE_VAR_SCOPE_READONLY, &mca_common_cuda_warning); -#if OPAL_CUDA_SUPPORT_41 /* Use this flag to test async vs sync copies */ mca_common_cuda_async = 1; (void) mca_base_var_register("ompi", "mpi", "common_cuda", "memcpy_async", @@ -306,7 +275,6 @@ int mca_common_cuda_stage_one_init(void) OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_READONLY, &cuda_event_max); -#endif /* OPAL_CUDA_SUPPORT_41 */ /* Use this flag to test cuMemcpyAsync vs cuMemcpy */ mca_common_cuda_cumemcpy_async = 1; @@ -328,6 +296,50 @@ int mca_common_cuda_stage_one_init(void) &mca_common_cuda_cumemcpy_timing); #endif /* OPAL_ENABLE_DEBUG */ + (void) mca_base_var_register("ompi", "mpi", "common_cuda", "gpu_mem_check_workaround", + "Set to 0 to disable GPU memory check workaround. A user would rarely have to do this.", + MCA_BASE_VAR_TYPE_INT, NULL, 0, 0, + OPAL_INFO_LVL_9, + MCA_BASE_VAR_SCOPE_READONLY, + &mca_common_cuda_gpu_mem_check_workaround); +} + +/** + * This is the first stage of initialization. This function is called + * explicitly by any BTLs that can support CUDA-aware. It is called during + * the component open phase of initialization. This fuction will look for + * the SONAME of the library which is libcuda.so.1. In most cases, this will + * result in the library found. However, there are some setups that require + * the extra steps for searching. This function will then load the symbols + * needed from the CUDA driver library. Any failure will result in this + * initialization failing and status will be set showing that. + */ +int mca_common_cuda_stage_one_init(void) +{ + int retval, i, j; + char *cudalibs[] = {"libcuda.so.1", "libcuda.dylib", NULL}; + char *searchpaths[] = {"", "/usr/lib64", NULL}; + char **errmsgs = NULL; + char *errmsg = NULL; + int errsize; + bool stage_one_init_passed = false; + + stage_one_init_ref_count++; + if (stage_one_init_ref_count > 1) { + opal_output_verbose(10, mca_common_cuda_output, + "CUDA: stage_one_init_ref_count is now %d, no need to init", + stage_one_init_ref_count); + return OPAL_SUCCESS; + } + + /* This is a no-op in most cases as the parameters were registered earlier */ + mca_common_cuda_register_mca_variables(); + + OBJ_CONSTRUCT(&common_cuda_init_lock, opal_mutex_t); + OBJ_CONSTRUCT(&common_cuda_htod_lock, opal_mutex_t); + OBJ_CONSTRUCT(&common_cuda_dtoh_lock, opal_mutex_t); + OBJ_CONSTRUCT(&common_cuda_ipc_lock, opal_mutex_t); + mca_common_cuda_output = opal_output_open(NULL); opal_output_set_verbosity(mca_common_cuda_output, mca_common_cuda_verbose); @@ -423,7 +435,7 @@ int mca_common_cuda_stage_one_init(void) } opal_argv_free(errmsgs); free(errmsg); - + if (true != stage_one_init_passed) { return 1; } @@ -447,13 +459,11 @@ int mca_common_cuda_stage_one_init(void) OPAL_CUDA_DLSYM(libcuda_handle, cuMemFree); OPAL_CUDA_DLSYM(libcuda_handle, cuMemAlloc); OPAL_CUDA_DLSYM(libcuda_handle, cuMemGetAddressRange); -#if OPAL_CUDA_SUPPORT_41 OPAL_CUDA_DLSYM(libcuda_handle, cuIpcGetEventHandle); OPAL_CUDA_DLSYM(libcuda_handle, cuIpcOpenEventHandle); OPAL_CUDA_DLSYM(libcuda_handle, cuIpcOpenMemHandle); OPAL_CUDA_DLSYM(libcuda_handle, cuIpcCloseMemHandle); OPAL_CUDA_DLSYM(libcuda_handle, cuIpcGetMemHandle); -#endif /* OPAL_CUDA_SUPPORT_41 */ OPAL_CUDA_DLSYM(libcuda_handle, cuCtxGetDevice); OPAL_CUDA_DLSYM(libcuda_handle, cuDeviceCanAccessPeer); OPAL_CUDA_DLSYM(libcuda_handle, cuDeviceGet); @@ -496,7 +506,7 @@ static int mca_common_cuda_stage_two_init(opal_common_cuda_function_table_t *fta /** * This is the last phase of initialization. This is triggered when we examine * a buffer pointer and determine it is a GPU buffer. We then assume the user - * has selected their GPU and we can go ahead with all the CUDA related + * has selected their GPU and we can go ahead with all the CUDA related * initializations. If we get an error, just return. Cleanup of resources * will happen when fini is called. */ @@ -577,7 +587,6 @@ static int mca_common_cuda_stage_three_init(void) return OPAL_ERROR; } -#if OPAL_CUDA_SUPPORT_41 if (true == mca_common_cuda_enabled) { /* Set up an array to store outstanding IPC async copy events */ cuda_event_ipc_num_used = 0; @@ -615,7 +624,6 @@ static int mca_common_cuda_stage_three_init(void) } } -#endif /* OPAL_CUDA_SUPPORT_41 */ if (true == mca_common_cuda_enabled) { /* Set up an array to store outstanding async dtoh events. Used on the * sending side for asynchronous copies. */ @@ -704,7 +712,7 @@ static int mca_common_cuda_stage_three_init(void) OPAL_PROC_MY_HOSTNAME, res, mem_reg->msg); } else { opal_output_verbose(20, mca_common_cuda_output, - "CUDA: cuMemHostRegister OK on mpool %s: " + "CUDA: cuMemHostRegister OK on rcache %s: " "address=%p, bufsize=%d", mem_reg->msg, mem_reg->ptr, (int)mem_reg->amount); } @@ -764,6 +772,9 @@ static int mca_common_cuda_stage_three_init(void) "CUDA: cuMemHostRegister OK on test region"); } + opal_output_verbose(20, mca_common_cuda_output, + "CUDA: the extra gpu memory check is %s", (mca_common_cuda_gpu_mem_check_workaround == 1) ? "on":"off"); + opal_output_verbose(30, mca_common_cuda_output, "CUDA: initialized"); opal_atomic_mb(); /* Make sure next statement does not get reordered */ @@ -784,7 +795,7 @@ static int mca_common_cuda_stage_three_init(void) * Cleanup all CUDA resources. * * Note: Still figuring out how to get cuMemHostUnregister called from the smcuda sm - * mpool. Looks like with the memory pool from openib (grdma), the unregistering is + * rcache. Looks like with the memory pool from openib (grdma), the unregistering is * called as the free list is destructed. Not true for the sm mpool. This means we * are currently still leaking some host memory we registered with CUDA. */ @@ -832,7 +843,7 @@ void mca_common_cuda_fini(void) if (NULL != cuda_event_ipc_array[i]) { cuFunc.cuEventDestroy(cuda_event_ipc_array[i]); } - } + } } free(cuda_event_ipc_array); } @@ -899,7 +910,7 @@ void mca_common_cuda_fini(void) stage_one_init_ref_count); } stage_one_init_ref_count--; -} +} /** * Call the CUDA register function so we pin the memory in the CUDA @@ -934,11 +945,11 @@ void mca_common_cuda_register(void *ptr, size_t amount, char *msg) { /* If registering the memory fails, print a message and continue. * This is not a fatal error. */ opal_show_help("help-mpi-common-cuda.txt", "cuMemHostRegister failed", - true, ptr, amount, + true, ptr, amount, OPAL_PROC_MY_HOSTNAME, res, msg); } else { opal_output_verbose(20, mca_common_cuda_output, - "CUDA: cuMemHostRegister OK on mpool %s: " + "CUDA: cuMemHostRegister OK on rcache %s: " "address=%p, bufsize=%d", msg, ptr, (int)amount); } @@ -973,26 +984,25 @@ void mca_common_cuda_unregister(void *ptr, char *msg) { /* If unregistering the memory fails, just continue. This is during * shutdown. Only print when running in verbose mode. */ opal_output_verbose(20, mca_common_cuda_output, - "CUDA: cuMemHostUnregister failed: ptr=%p, res=%d, mpool=%s", + "CUDA: cuMemHostUnregister failed: ptr=%p, res=%d, rcache=%s", ptr, res, msg); } else { opal_output_verbose(20, mca_common_cuda_output, - "CUDA: cuMemHostUnregister OK on mpool %s: " + "CUDA: cuMemHostUnregister OK on rcache %s: " "address=%p", msg, ptr); } } } -#if OPAL_CUDA_SUPPORT_41 /* * Get the memory handle of a local section of memory that can be sent * to the remote size so it can access the memory. This is the * registration function for the sending side of a message transfer. */ -int cuda_getmemhandle(void *base, size_t size, mca_mpool_base_registration_t *newreg, - mca_mpool_base_registration_t *hdrreg) +int cuda_getmemhandle(void *base, size_t size, mca_rcache_base_registration_t *newreg, + mca_rcache_base_registration_t *hdrreg) { CUmemorytype memType; @@ -1001,7 +1011,7 @@ int cuda_getmemhandle(void *base, size_t size, mca_mpool_base_registration_t *ne CUdeviceptr pbase; size_t psize; - mca_mpool_common_cuda_reg_t *cuda_reg = (mca_mpool_common_cuda_reg_t*)newreg; + mca_rcache_common_cuda_reg_t *cuda_reg = (mca_rcache_common_cuda_reg_t*)newreg; memHandle = (CUipcMemHandle *)cuda_reg->data.memHandle; /* We should only be there if this is a CUDA device pointer */ @@ -1080,27 +1090,27 @@ int cuda_getmemhandle(void *base, size_t size, mca_mpool_base_registration_t *ne * This function is called by the local side that called the cuda_getmemhandle. * There is nothing to be done so just return. */ -int cuda_ungetmemhandle(void *reg_data, mca_mpool_base_registration_t *reg) +int cuda_ungetmemhandle(void *reg_data, mca_rcache_base_registration_t *reg) { - CUDA_DUMP_EVTHANDLE((100, ((mca_mpool_common_cuda_reg_t *)reg)->data.evtHandle, "cuda_ungetmemhandle")); opal_output_verbose(10, mca_common_cuda_output, "CUDA: cuda_ungetmemhandle (no-op): base=%p", reg->base); + CUDA_DUMP_MEMHANDLE((100, ((mca_rcache_common_cuda_reg_t *)reg)->data.memHandle, "cuda_ungetmemhandle")); return OPAL_SUCCESS; } -/* +/* * Open a memory handle that refers to remote memory so we can get an address * that works on the local side. This is the registration function for the * remote side of a transfer. newreg contains the new handle. hddrreg contains * the memory handle that was received from the remote side. */ -int cuda_openmemhandle(void *base, size_t size, mca_mpool_base_registration_t *newreg, - mca_mpool_base_registration_t *hdrreg) +int cuda_openmemhandle(void *base, size_t size, mca_rcache_base_registration_t *newreg, + mca_rcache_base_registration_t *hdrreg) { CUresult result; CUipcMemHandle *memHandle; - mca_mpool_common_cuda_reg_t *cuda_newreg = (mca_mpool_common_cuda_reg_t*)newreg; + mca_rcache_common_cuda_reg_t *cuda_newreg = (mca_rcache_common_cuda_reg_t*)newreg; /* Save in local variable to avoid ugly casting */ memHandle = (CUipcMemHandle *)cuda_newreg->data.memHandle; @@ -1134,13 +1144,13 @@ int cuda_openmemhandle(void *base, size_t size, mca_mpool_base_registration_t *n return OPAL_SUCCESS; } -/* - * Close a memory handle that refers to remote memory. +/* + * Close a memory handle that refers to remote memory. */ -int cuda_closememhandle(void *reg_data, mca_mpool_base_registration_t *reg) +int cuda_closememhandle(void *reg_data, mca_rcache_base_registration_t *reg) { CUresult result; - mca_mpool_common_cuda_reg_t *cuda_reg = (mca_mpool_common_cuda_reg_t*)reg; + mca_rcache_common_cuda_reg_t *cuda_reg = (mca_rcache_common_cuda_reg_t*)reg; /* Only attempt to close if we have valid context. This can change if a call * to the fini function is made and we discover context is gone. */ @@ -1203,7 +1213,7 @@ void mca_common_cuda_destruct_event(uintptr_t event) * Put remote event on stream to ensure that the the start of the * copy does not start until the completion of the event. */ -void mca_common_wait_stream_synchronize(mca_mpool_common_cuda_reg_t *rget_reg) +void mca_common_wait_stream_synchronize(mca_rcache_common_cuda_reg_t *rget_reg) { #if OPAL_CUDA_SYNC_MEMOPS /* No need for any of this with SYNC_MEMOPS feature */ @@ -1253,7 +1263,7 @@ void mca_common_wait_stream_synchronize(mca_mpool_common_cuda_reg_t *rget_reg) * Start the asynchronous copy. Then record and save away an event that will * be queried to indicate the copy has completed. */ -int mca_common_cuda_memcpy(void *dst, void *src, size_t amount, char *msg, +int mca_common_cuda_memcpy(void *dst, void *src, size_t amount, char *msg, struct mca_btl_base_descriptor_t *frag, int *done) { CUresult result; @@ -1371,7 +1381,7 @@ int mca_common_cuda_memcpy(void *dst, void *src, size_t amount, char *msg, cuda_event_ipc_first_used = 0; } *done = 1; - } + } OPAL_THREAD_UNLOCK(&common_cuda_ipc_lock); return OPAL_SUCCESS; } @@ -1459,7 +1469,7 @@ int mca_common_cuda_record_htod_event(char *msg, struct mca_btl_base_descriptor_ return OPAL_ERROR; } cuda_event_htod_frag_array[cuda_event_htod_first_avail] = frag; - + /* Bump up the first available slot and number used by 1 */ cuda_event_htod_first_avail++; if (cuda_event_htod_first_avail >= cuda_event_max) { @@ -1631,10 +1641,10 @@ int progress_one_cuda_htod_event(struct mca_btl_base_descriptor_t **frag) { /** * Need to make sure the handle we are retrieving from the cache is still - * valid. Compare the cached handle to the one received. + * valid. Compare the cached handle to the one received. */ -int mca_common_cuda_memhandle_matches(mca_mpool_common_cuda_reg_t *new_reg, - mca_mpool_common_cuda_reg_t *old_reg) +int mca_common_cuda_memhandle_matches(mca_rcache_common_cuda_reg_t *new_reg, + mca_rcache_common_cuda_reg_t *old_reg) { if (0 == memcmp(new_reg->data.memHandle, old_reg->data.memHandle, sizeof(new_reg->data.memHandle))) { @@ -1642,16 +1652,16 @@ int mca_common_cuda_memhandle_matches(mca_mpool_common_cuda_reg_t *new_reg, } else { return 0; } - + } /* - * Function to dump memory handle information. This is based on + * Function to dump memory handle information. This is based on * definitions from cuiinterprocess_private.h. */ static void cuda_dump_memhandle(int verbose, void *memHandle, char *str) { - struct InterprocessMemHandleInternal + struct InterprocessMemHandleInternal { /* The first two entries are the CUinterprocessCtxHandle */ int64_t ctxId; /* unique (within a process) id of the sharing context */ @@ -1670,26 +1680,23 @@ static void cuda_dump_memhandle(int verbose, void *memHandle, char *str) { } memcpy(&memH, memHandle, sizeof(memH)); opal_output_verbose(verbose, mca_common_cuda_output, - "%s:ctxId=%d, pid=%d, size=%d, blocksize=%d, offset=%d, gpuId=%d, " - "subDeviceIndex=%d, serial=%d", - str, (int)memH.ctxId, memH.pid, (int)memH.size, (int)memH.blocksize, (int)memH.offset, - memH.gpuId, memH.subDeviceIndex, (int)memH.serial); + "%s:ctxId=0x%" PRIx64 ", pid=%d, size=%" PRIu64 ", blocksize=%" PRIu64 ", offset=%" + PRIu64 ", gpuId=%d, subDeviceIndex=%d, serial=%" PRIu64, + str, memH.ctxId, memH.pid, memH.size, memH.blocksize, memH.offset, + memH.gpuId, memH.subDeviceIndex, memH.serial); } /* - * Function to dump memory handle information. This is based on + * Function to dump memory handle information. This is based on * definitions from cuiinterprocess_private.h. */ static void cuda_dump_evthandle(int verbose, void *evtHandle, char *str) { - struct InterprocessEventHandleInternal + struct InterprocessEventHandleInternal { - /* The first two entries are the CUinterprocessCtxHandle */ - int64_t ctxId; /* unique (within a process) id of the sharing context */ - int pid; /* pid of sharing context */ - - int pad; /* pad to match the structure */ - int index; + unsigned long pid; + unsigned long serial; + int index; } evtH; if (NULL == str) { @@ -1697,17 +1704,17 @@ static void cuda_dump_evthandle(int verbose, void *evtHandle, char *str) { } memcpy(&evtH, evtHandle, sizeof(evtH)); opal_output_verbose(verbose, mca_common_cuda_output, - "CUDA: %s:ctxId=%d, pid=%d, index=%d", - str, (int)evtH.ctxId, evtH.pid, (int)evtH.index); + "CUDA: %s:pid=%lu, serial=%lu, index=%d", + str, evtH.pid, evtH.serial, evtH.index); } /* Return microseconds of elapsed time. Microseconds are relevant when - * trying to understand the fixed overhead of the communication. Used + * trying to understand the fixed overhead of the communication. Used * when trying to time various functions. * * Cut and past the following to get timings where wanted. - * + * * clock_gettime(CLOCK_MONOTONIC, &ts_start); * FUNCTION OF INTEREST * clock_gettime(CLOCK_MONOTONIC, &ts_end); @@ -1721,24 +1728,25 @@ static float mydifftime(opal_timer_t ts_start, opal_timer_t ts_end) { } #endif /* OPAL_ENABLE_DEBUG */ -#endif /* OPAL_CUDA_SUPPORT_41 */ - /* Routines that get plugged into the opal datatype code */ static int mca_common_cuda_is_gpu_buffer(const void *pUserBuf, opal_convertor_t *convertor) { int res; CUmemorytype memType = 0; CUdeviceptr dbuf = (CUdeviceptr)pUserBuf; - CUcontext ctx = NULL; + CUcontext ctx = NULL, memCtx = NULL; #if OPAL_CUDA_GET_ATTRIBUTES uint32_t isManaged = 0; /* With CUDA 7.0, we can get multiple attributes with a single call */ CUpointer_attribute attributes[3] = {CU_POINTER_ATTRIBUTE_MEMORY_TYPE, CU_POINTER_ATTRIBUTE_CONTEXT, CU_POINTER_ATTRIBUTE_IS_MANAGED}; - void *attrdata[] = {(void *)&memType, (void *)&ctx, (void *)&isManaged}; + void *attrdata[] = {(void *)&memType, (void *)&memCtx, (void *)&isManaged}; res = cuFunc.cuPointerGetAttributes(3, attributes, attrdata, dbuf); + OPAL_OUTPUT_VERBOSE((101, mca_common_cuda_output, + "dbuf=%p, memType=%d, memCtx=%p, isManaged=%d, res=%d", + (void *)dbuf, (int)memType, (void *)memCtx, isManaged, res)); /* Mark unified memory buffers with a flag. This will allow all unified * memory to be forced through host buffers. Note that this memory can @@ -1774,6 +1782,7 @@ static int mca_common_cuda_is_gpu_buffer(const void *pUserBuf, opal_convertor_t } /* Must be a device pointer */ assert(memType == CU_MEMORYTYPE_DEVICE); +#endif /* OPAL_CUDA_GET_ATTRIBUTES */ /* This piece of code was added in to handle in a case involving * OMP threads. The user had initialized CUDA and then spawned @@ -1784,25 +1793,25 @@ static int mca_common_cuda_is_gpu_buffer(const void *pUserBuf, opal_convertor_t * and set the current context to that. It is rare that we will not * have a context. */ res = cuFunc.cuCtxGetCurrent(&ctx); -#endif /* OPAL_CUDA_GET_ATTRIBUTES */ if (OPAL_UNLIKELY(NULL == ctx)) { if (CUDA_SUCCESS == res) { - res = cuFunc.cuPointerGetAttribute(&ctx, +#if !OPAL_CUDA_GET_ATTRIBUTES + res = cuFunc.cuPointerGetAttribute(&memCtx, CU_POINTER_ATTRIBUTE_CONTEXT, dbuf); - if (res != CUDA_SUCCESS) { + if (OPAL_UNLIKELY(res != CUDA_SUCCESS)) { opal_output(0, "CUDA: error calling cuPointerGetAttribute: " "res=%d, ptr=%p aborting...", res, pUserBuf); return OPAL_ERROR; + } +#endif /* OPAL_CUDA_GET_ATTRIBUTES */ + res = cuFunc.cuCtxSetCurrent(memCtx); + if (OPAL_UNLIKELY(res != CUDA_SUCCESS)) { + opal_output(0, "CUDA: error calling cuCtxSetCurrent: " + "res=%d, ptr=%p aborting...", res, pUserBuf); + return OPAL_ERROR; } else { - res = cuFunc.cuCtxSetCurrent(ctx); - if (res != CUDA_SUCCESS) { - opal_output(0, "CUDA: error calling cuCtxSetCurrent: " - "res=%d, ptr=%p aborting...", res, pUserBuf); - return OPAL_ERROR; - } else { - opal_output_verbose(10, mca_common_cuda_output, - "CUDA: cuCtxSetCurrent passed: ptr=%p", pUserBuf); - } + OPAL_OUTPUT_VERBOSE((10, mca_common_cuda_output, + "CUDA: cuCtxSetCurrent passed: ptr=%p", pUserBuf)); } } else { /* Print error and proceed */ @@ -1812,6 +1821,26 @@ static int mca_common_cuda_is_gpu_buffer(const void *pUserBuf, opal_convertor_t } } + /* WORKAROUND - They are times when the above code determines a pice of memory + * is GPU memory, but it actually is not. That has been seen on multi-GPU systems + * with 6 or 8 GPUs on them. Therefore, we will do this extra check. Note if we + * made it this far, then the assumption at this point is we have GPU memory. + * Unfotunately, this extra call is costing us another 100 ns almost doubling + * the cost of this entire function. */ + if (OPAL_LIKELY(mca_common_cuda_gpu_mem_check_workaround)) { + CUdeviceptr pbase; + size_t psize; + res = cuFunc.cuMemGetAddressRange(&pbase, &psize, dbuf); + if (CUDA_SUCCESS != res) { + opal_output_verbose(5, mca_common_cuda_output, + "CUDA: cuMemGetAddressRange failed on this pointer: res=%d, buf=%p " + "Overriding check and setting to host pointer. ", + res, (void *)dbuf); + /* This cannot be GPU memory if the previous call failed */ + return 0; + } + } + /* First access on a device pointer finalizes CUDA support initialization. * If initialization fails, disable support. */ if (!stage_three_init_complete) { @@ -1819,13 +1848,14 @@ static int mca_common_cuda_is_gpu_buffer(const void *pUserBuf, opal_convertor_t opal_cuda_support = 0; } } + return 1; } static int mca_common_cuda_cu_memcpy_async(void *dest, const void *src, size_t size, opal_convertor_t* convertor) { - return cuFunc.cuMemcpyAsync((CUdeviceptr)dest, (CUdeviceptr)src, size, + return cuFunc.cuMemcpyAsync((CUdeviceptr)dest, (CUdeviceptr)src, size, (CUstream)convertor->stream); } @@ -1881,7 +1911,7 @@ static int mca_common_cuda_cu_memcpy(void *dest, const void *src, size_t size) accum, (int)size, src, memTypeSrc, dest, memTypeDst); } } -#endif +#endif return OPAL_SUCCESS; } @@ -1978,7 +2008,7 @@ int mca_common_cuda_get_address_range(void *pbase, size_t *psize, void *base) * not matching the BUFFER_ID of the buffer we are checking. Return false * if the registration is still good. */ -bool mca_common_cuda_previously_freed_memory(mca_mpool_base_registration_t *reg) +bool mca_common_cuda_previously_freed_memory(mca_rcache_base_registration_t *reg) { int res; unsigned long long bufID; @@ -2010,7 +2040,7 @@ bool mca_common_cuda_previously_freed_memory(mca_mpool_base_registration_t *reg) * Also set SYNC_MEMOPS on any GPU registration to ensure that * synchronous copies complete before the buffer is accessed. */ -void mca_common_cuda_get_buffer_id(mca_mpool_base_registration_t *reg) +void mca_common_cuda_get_buffer_id(mca_rcache_base_registration_t *reg) { int res; unsigned long long bufID = 0; @@ -2031,4 +2061,4 @@ void mca_common_cuda_get_buffer_id(mca_mpool_base_registration_t *reg) true, OPAL_PROC_MY_HOSTNAME, res, dbuf); } } -#endif /* OPAL_CUDA_GDR_SUPPORT */ +#endif /* OPAL_CUDA_GDR_SUPPORT */ diff --git a/opal/mca/common/cuda/common_cuda.h b/opal/mca/common/cuda/common_cuda.h index 7da817de512..3ff95405299 100644 --- a/opal/mca/common/cuda/common_cuda.h +++ b/opal/mca/common/cuda/common_cuda.h @@ -28,29 +28,29 @@ #define MEMHANDLE_SIZE 8 #define EVTHANDLE_SIZE 8 -struct mca_mpool_common_cuda_reg_data_t { +struct mca_rcache_common_cuda_reg_data_t { uint64_t memHandle[MEMHANDLE_SIZE]; uint64_t evtHandle[EVTHANDLE_SIZE]; uint64_t event; opal_ptr_t memh_seg_addr; size_t memh_seg_len; }; -typedef struct mca_mpool_common_cuda_reg_data_t mca_mpool_common_cuda_reg_data_t; +typedef struct mca_rcache_common_cuda_reg_data_t mca_rcache_common_cuda_reg_data_t; -struct mca_mpool_common_cuda_reg_t { - mca_mpool_base_registration_t base; - mca_mpool_common_cuda_reg_data_t data; +struct mca_rcache_common_cuda_reg_t { + mca_rcache_base_registration_t base; + mca_rcache_common_cuda_reg_data_t data; }; -typedef struct mca_mpool_common_cuda_reg_t mca_mpool_common_cuda_reg_t; +typedef struct mca_rcache_common_cuda_reg_t mca_rcache_common_cuda_reg_t; extern bool mca_common_cuda_enabled; -OPAL_DECLSPEC int mca_common_cuda_register_mca_variables(void); +OPAL_DECLSPEC void mca_common_cuda_register_mca_variables(void); OPAL_DECLSPEC void mca_common_cuda_register(void *ptr, size_t amount, char *msg); OPAL_DECLSPEC void mca_common_cuda_unregister(void *ptr, char *msg); -OPAL_DECLSPEC void mca_common_wait_stream_synchronize(mca_mpool_common_cuda_reg_t *rget_reg); +OPAL_DECLSPEC void mca_common_wait_stream_synchronize(mca_rcache_common_cuda_reg_t *rget_reg); OPAL_DECLSPEC int mca_common_cuda_memcpy(void *dst, void *src, size_t amount, char *msg, struct mca_btl_base_descriptor_t *, int *done); @@ -69,26 +69,26 @@ OPAL_DECLSPEC int progress_one_cuda_ipc_event(struct mca_btl_base_descriptor_t * OPAL_DECLSPEC int progress_one_cuda_dtoh_event(struct mca_btl_base_descriptor_t **); OPAL_DECLSPEC int progress_one_cuda_htod_event(struct mca_btl_base_descriptor_t **); -OPAL_DECLSPEC int mca_common_cuda_memhandle_matches(mca_mpool_common_cuda_reg_t *new_reg, - mca_mpool_common_cuda_reg_t *old_reg); +OPAL_DECLSPEC int mca_common_cuda_memhandle_matches(mca_rcache_common_cuda_reg_t *new_reg, + mca_rcache_common_cuda_reg_t *old_reg); OPAL_DECLSPEC void mca_common_cuda_construct_event_and_handle(uintptr_t *event, void *handle); OPAL_DECLSPEC void mca_common_cuda_destruct_event(uintptr_t event); -OPAL_DECLSPEC int cuda_getmemhandle(void *base, size_t, mca_mpool_base_registration_t *newreg, - mca_mpool_base_registration_t *hdrreg); -OPAL_DECLSPEC int cuda_ungetmemhandle(void *reg_data, mca_mpool_base_registration_t *reg); -OPAL_DECLSPEC int cuda_openmemhandle(void *base, size_t size, mca_mpool_base_registration_t *newreg, - mca_mpool_base_registration_t *hdrreg); -OPAL_DECLSPEC int cuda_closememhandle(void *reg_data, mca_mpool_base_registration_t *reg); +OPAL_DECLSPEC int cuda_getmemhandle(void *base, size_t, mca_rcache_base_registration_t *newreg, + mca_rcache_base_registration_t *hdrreg); +OPAL_DECLSPEC int cuda_ungetmemhandle(void *reg_data, mca_rcache_base_registration_t *reg); +OPAL_DECLSPEC int cuda_openmemhandle(void *base, size_t size, mca_rcache_base_registration_t *newreg, + mca_rcache_base_registration_t *hdrreg); +OPAL_DECLSPEC int cuda_closememhandle(void *reg_data, mca_rcache_base_registration_t *reg); OPAL_DECLSPEC int mca_common_cuda_get_device(int *devicenum); OPAL_DECLSPEC int mca_common_cuda_device_can_access_peer(int *access, int dev1, int dev2); OPAL_DECLSPEC int mca_common_cuda_stage_one_init(void); OPAL_DECLSPEC int mca_common_cuda_get_address_range(void *pbase, size_t *psize, void *base); OPAL_DECLSPEC void mca_common_cuda_fini(void); #if OPAL_CUDA_GDR_SUPPORT -OPAL_DECLSPEC bool mca_common_cuda_previously_freed_memory(mca_mpool_base_registration_t *reg); -OPAL_DECLSPEC void mca_common_cuda_get_buffer_id(mca_mpool_base_registration_t *reg); +OPAL_DECLSPEC bool mca_common_cuda_previously_freed_memory(mca_rcache_base_registration_t *reg); +OPAL_DECLSPEC void mca_common_cuda_get_buffer_id(mca_rcache_base_registration_t *reg); #endif /* OPAL_CUDA_GDR_SUPPORT */ /** * Return: 0 if no packing is required for sending (the upper layer diff --git a/opal/mca/common/cuda/help-mpi-common-cuda.txt b/opal/mca/common/cuda/help-mpi-common-cuda.txt index f4e0f23b471..039766256b2 100644 --- a/opal/mca/common/cuda/help-mpi-common-cuda.txt +++ b/opal/mca/common/cuda/help-mpi-common-cuda.txt @@ -41,13 +41,13 @@ NOTE: You can turn off this warning by setting the MCA parameter The call to cuMemHostRegister(%p, %d, 0) failed. Host: %s cuMemHostRegister return value: %d - Memory Pool: %s + Registration cache: %s # [cuMemHostRegister failed] The call to cuMemHostRegister(%p, %d, 0) failed. Host: %s cuMemHostRegister return value: %d - Memory Pool: %s + Registration cache: %s # [cuIpcGetMemHandle failed] The call to cuIpcGetMemHandle failed. This means the GPU RDMA protocol @@ -83,7 +83,7 @@ for this is not enough free device memory. Try to reduce the device memory footprint of your application. # [cuIpcCloseMemHandle failed] -The call to cuIpcCloseMemHandle failed. This is a warning and the program +The call to cuIpcCloseMemHandle failed. This is a warning and the program will continue to run. cuIpcCloseMemHandle return value: %d address: %p @@ -147,17 +147,17 @@ cause the program to abort. cuStreamCreate return value: %d Check the cuda.h file for what the return vale means. # -[dlopen disabled] +[dlopen disabled] Open MPI was compiled without dynamic library support (e.g., with the - --disable-dlopen flag), and therefore cannot utilize CUDA support. + --disable-dlopen flag), and therefore cannot utilize CUDA support. If you need CUDA support, reconfigure Open MPI with dynamic library support enabled. # [dlopen failed] -The library attempted to open the following supporting CUDA libraries, +The library attempted to open the following supporting CUDA libraries, but each of them failed. CUDA-aware support is disabled. %s -If you are not interested in CUDA-aware support, then run with +If you are not interested in CUDA-aware support, then run with --mca mpi_cuda_support 0 to suppress this message. If you are interested in CUDA-aware support, then try setting LD_LIBRARY_PATH to the location of libcuda.so.1 to get passed this issue. diff --git a/opal/mca/common/libfabric/Makefile.am b/opal/mca/common/libfabric/Makefile.am new file mode 100644 index 00000000000..5da6be35cd6 --- /dev/null +++ b/opal/mca/common/libfabric/Makefile.am @@ -0,0 +1,102 @@ +# +# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana +# University Research and Technology +# Corporation. All rights reserved. +# Copyright (c) 2004-2013 The University of Tennessee and The University +# of Tennessee Research Foundation. All rights +# reserved. +# Copyright (c) 2004-2009 High Performance Computing Center Stuttgart, +# University of Stuttgart. All rights reserved. +# Copyright (c) 2004-2005 The Regents of the University of California. +# All rights reserved. +# Copyright (c) 2011-2013 NVIDIA Corporation. All rights reserved. +# Copyright (c) 2014 Cisco Systems, Inc. All rights reserved. +# Copyright (c) 2015 Intel, Inc. All rights reserved. +# $COPYRIGHT$ +# +# Additional copyrights may follow +# +# $HEADER$ +# +# A word of explanation... +# +# This library is linked against various MCA components because the +# support for libfabrics is needed in various places. +# +# Note that building this common component statically and linking +# against other dynamic components is *not* supported! + +AM_CPPFLAGS = $(opal_common_libfabric_CPPFLAGS) + +# Header files + +headers = \ + common_libfabric.h + +# Source files + +sources = \ + common_libfabric.c + +# As per above, we'll either have an installable or noinst result. +# The installable one should follow the same MCA prefix naming rules +# (i.e., libmca__.la). The noinst one can be named +# whatever it wants, although libmca___noinst.la is +# recommended. + +# To simplify components that link to this library, we will *always* +# have an output libtool library named libmca__.la -- even +# for case 2) described above (i.e., so there's no conditional logic +# necessary in component Makefile.am's that link to this library). +# Hence, if we're creating a noinst version of this library (i.e., +# case 2), we sym link it to the libmca__.la name +# (libtool will do the Right Things under the covers). See the +# all-local and clean-local rules, below, for how this is effected. + +lib_LTLIBRARIES = +noinst_LTLIBRARIES = +comp_inst = lib@OPAL_LIB_PREFIX@mca_common_libfabric.la +comp_noinst = lib@OPAL_LIB_PREFIX@mca_common_libfabric_noinst.la + +if MCA_BUILD_opal_common_libfabric_DSO +lib_LTLIBRARIES += $(comp_inst) +else +noinst_LTLIBRARIES += $(comp_noinst) +endif + +lib@OPAL_LIB_PREFIX@mca_common_libfabric_la_SOURCES = $(headers) $(sources) +lib@OPAL_LIB_PREFIX@mca_common_libfabric_la_LDFLAGS = \ + $(opal_common_libfabric_LDFLAGS) \ + -version-info $(libmca_opal_common_libfabric_so_version) +lib@OPAL_LIB_PREFIX@mca_common_libfabric_la_LIBADD = $(opal_common_libfabric_LIBS) + +lib@OPAL_LIB_PREFIX@mca_common_libfabric_noinst_la_SOURCES = $(headers) $(sources) +lib@OPAL_LIB_PREFIX@mca_common_libfabric_noinst_la_LDFLAGS = $(opal_common_libfabric_LDFLAGS) +lib@OPAL_LIB_PREFIX@mca_common_libfabric_noinst_la_LIBADD = $(opal_common_libfabric_LIBS) + +# Conditionally install the header files + +if WANT_INSTALL_HEADERS +opaldir = $(opalincludedir)/$(subdir) +opal_HEADERS = $(headers) +endif + +# These two rules will sym link the "noinst" libtool library filename +# to the installable libtool library filename in the case where we are +# compiling this component statically (case 2), described above). + +V=0 +OMPI_V_LN_SCOMP = $(ompi__v_LN_SCOMP_$V) +ompi__v_LN_SCOMP_ = $(ompi__v_LN_SCOMP_$AM_DEFAULT_VERBOSITY) +ompi__v_LN_SCOMP_0 = @echo " LN_S " `basename $(comp_inst)`; + +all-local: + $(OMPI_V_LN_SCOMP) if test -z "$(lib_LTLIBRARIES)"; then \ + rm -f "$(comp_inst)"; \ + $(LN_S) "$(comp_noinst)" "$(comp_inst)"; \ + fi + +clean-local: + if test -z "$(lib_LTLIBRARIES)"; then \ + rm -f "$(comp_inst)"; \ + fi diff --git a/opal/mca/common/libfabric/common_libfabric.c b/opal/mca/common/libfabric/common_libfabric.c new file mode 100644 index 00000000000..cb989af93c5 --- /dev/null +++ b/opal/mca/common/libfabric/common_libfabric.c @@ -0,0 +1,21 @@ +/* + * Copyright (c) 2015 Intel, Inc. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include "opal_config.h" +#include "opal/constants.h" + +#include +#include + +#include "common_libfabric.h" + +int mca_common_libfabric_register_mca_variables(void) +{ + return OPAL_SUCCESS; +} diff --git a/opal/mca/common/libfabric/common_libfabric.h b/opal/mca/common/libfabric/common_libfabric.h new file mode 100644 index 00000000000..10bc05598f8 --- /dev/null +++ b/opal/mca/common/libfabric/common_libfabric.h @@ -0,0 +1,16 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ +/* + * Copyright (c) 2015 Intel, Inc. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#ifndef OPAL_MCA_COMMON_LIBFABRIC_H +#define OPAL_MCA_COMMON_LIBFABRIC_H + +OPAL_DECLSPEC int mca_common_libfabric_register_mca_variables(void); + +#endif /* OPAL_MCA_COMMON_LIBFABRIC_H */ diff --git a/opal/mca/common/libfabric/configure.m4 b/opal/mca/common/libfabric/configure.m4 new file mode 100644 index 00000000000..49e7d46c895 --- /dev/null +++ b/opal/mca/common/libfabric/configure.m4 @@ -0,0 +1,30 @@ +# -*- shell-script -*- +# +# Copyright (c) 2011-2013 NVIDIA Corporation. All rights reserved. +# Copyright (c) 2013 The University of Tennessee and The University +# of Tennessee Research Foundation. All rights +# reserved. +# Copyright (c) 2015 Intel, Inc. All rights reserved. +# Copyright (c) 2015 Cisco Systems, Inc. All rights reserved. +# $COPYRIGHT$ +# +# Additional copyrights may follow +# +# $HEADER$ +# + +AC_DEFUN([MCA_opal_common_libfabric_CONFIG],[ + AC_CONFIG_FILES([opal/mca/common/libfabric/Makefile]) + + # Check for libfabric. Note that $opal_common_libfabric_happy is + # used in other configure.m4's to know if libfabric configured + # successfully. + OPAL_CHECK_LIBFABRIC([opal_common_libfabric], + [opal_common_libfabric_happy=yes + common_libfabric_WRAPPER_EXTRA_LDFLAGS=$opal_common_libfabric_LDFLAGS + common_libfabric_WRAPPER_EXTRA_LIBS=$opal_common_libfabric_LIBS + $1], + [opal_common_libfabric_happy=no + $2]) + +])dnl diff --git a/opal/mca/common/libfabric/owner.txt b/opal/mca/common/libfabric/owner.txt new file mode 100644 index 00000000000..5fe87e2d40c --- /dev/null +++ b/opal/mca/common/libfabric/owner.txt @@ -0,0 +1,7 @@ +# +# owner/status file +# owner: institution that is responsible for this package +# status: e.g. active, maintenance, unmaintained +# +owner: Intel +status:active diff --git a/opal/mca/common/sm/Makefile.am b/opal/mca/common/sm/Makefile.am index e2b116c0f8b..ba57c100da5 100644 --- a/opal/mca/common/sm/Makefile.am +++ b/opal/mca/common/sm/Makefile.am @@ -5,24 +5,24 @@ # Copyright (c) 2004-2005 The University of Tennessee and The University # of Tennessee Research Foundation. All rights # reserved. -# Copyright (c) 2004-2009 High Performance Computing Center Stuttgart, +# Copyright (c) 2004-2009 High Performance Computing Center Stuttgart, # University of Stuttgart. All rights reserved. # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. -# Copyright (c) 2010-2014 Cisco Systems, Inc. All rights reserved. -# Copyright (c) 2010-2013 Los Alamos National Security, LLC. +# Copyright (c) 2010-2015 Cisco Systems, Inc. All rights reserved. +# Copyright (c) 2010-2015 Los Alamos National Security, LLC. # All rights reserved. # $COPYRIGHT$ -# +# # Additional copyrights may follow -# +# # $HEADER$ # # A word of explanation... # # This library is linked against various MCA components because all -# shared-memory based components (e.g., mpool, ptl, etc.) need to +# shared-memory based components (e.g., btl/sm, btl/smcuda, etc.) need to # share some common code and data. There's two cases: # # 1. libmca_common_sm.la is a shared library. By linking that shared @@ -44,12 +44,14 @@ # Header files headers = \ - common_sm.h + common_sm.h \ + common_sm_mpool.h # Source files sources = \ - common_sm.c + common_sm.c \ + common_sm_mpool.c # Help file @@ -84,7 +86,7 @@ endif lib@OPAL_LIB_PREFIX@mca_common_sm_la_SOURCES = \ $(headers) $(sources) lib@OPAL_LIB_PREFIX@mca_common_sm_la_LDFLAGS = \ - -version-info $(libmca_common_sm_so_version) + -version-info $(libmca_opal_common_sm_so_version) lib@OPAL_LIB_PREFIX@mca_common_sm_noinst_la_SOURCES = \ $(headers) $(sources) diff --git a/opal/mca/common/sm/common_sm.c b/opal/mca/common/sm/common_sm.c index 44854c679a2..6c076d3f386 100644 --- a/opal/mca/common/sm/common_sm.c +++ b/opal/mca/common/sm/common_sm.c @@ -1,3 +1,4 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ /* * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana * University Research and Technology @@ -11,7 +12,7 @@ * All rights reserved. * Copyright (c) 2007 Sun Microsystems, Inc. All rights reserved. * Copyright (c) 2008-2010 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2010-2013 Los Alamos National Security, LLC. + * Copyright (c) 2010-2015 Los Alamos National Security, LLC. * All rights reserved. * Copyright (c) 2014 Intel, Inc. All rights reserved * $COPYRIGHT$ @@ -36,19 +37,14 @@ #include "opal/util/show_help.h" #include "opal/util/error.h" #include "opal/mca/shmem/base/base.h" -#if OPAL_ENABLE_FT_CR == 1 -#include "opal/runtime/opal_cr.h" -#endif +#include "common_sm.h" #include "opal/constants.h" -#include "opal/mca/mpool/sm/mpool_sm.h" -OBJ_CLASS_INSTANCE( - mca_common_sm_module_t, - opal_list_item_t, - NULL, - NULL -); + +OBJ_CLASS_INSTANCE(mca_common_sm_module_t,opal_list_item_t, + NULL, NULL); + /* ////////////////////////////////////////////////////////////////////////// */ /* static utility functions */ @@ -258,13 +254,10 @@ mca_common_sm_local_proc_reorder(opal_proc_t **procs, * * @retval addr virtual address */ -void * -mca_common_sm_seg_alloc(struct mca_mpool_base_module_t *mpool, - size_t *size, - mca_mpool_base_registration_t **registration) +void *mca_common_sm_seg_alloc (void *ctx, size_t *size) { - mca_mpool_sm_module_t *sm_module = (mca_mpool_sm_module_t *)mpool; - mca_common_sm_seg_header_t *seg = sm_module->sm_common_module->module_seg; + mca_common_sm_module_t *sm_module = (mca_common_sm_module_t *) ctx; + mca_common_sm_seg_header_t *seg = sm_module->module_seg; void *addr; opal_atomic_lock(&seg->seg_lock); @@ -275,7 +268,7 @@ mca_common_sm_seg_alloc(struct mca_mpool_base_module_t *mpool, size_t fixup; /* add base address to segment offset */ - addr = sm_module->sm_common_module->module_data_addr + seg->seg_offset; + addr = sm_module->module_data_addr + seg->seg_offset; seg->seg_offset += *size; /* fix up seg_offset so next allocation is aligned on a @@ -286,9 +279,7 @@ mca_common_sm_seg_alloc(struct mca_mpool_base_module_t *mpool, seg->seg_offset += sizeof(long) - fixup; } } - if (NULL != registration) { - *registration = NULL; - } + opal_atomic_unlock(&seg->seg_lock); return addr; } diff --git a/opal/mca/common/sm/common_sm.h b/opal/mca/common/sm/common_sm.h index 7c4e6310dba..819b82f6eef 100644 --- a/opal/mca/common/sm/common_sm.h +++ b/opal/mca/common/sm/common_sm.h @@ -32,7 +32,7 @@ #include "opal/mca/btl/base/base.h" #include "opal/util/proc.h" #include "opal/mca/btl/base/btl_base_error.h" -#include "opal/mca/mpool/mpool.h" +#include "common_sm_mpool.h" BEGIN_C_DECLS @@ -66,6 +66,8 @@ typedef struct mca_common_sm_module_t { unsigned char *module_data_addr; /* shared memory backing facility object that encapsulates shmem info */ opal_shmem_ds_t shmem_ds; + /* memory pool interface to shared-memory region */ + mca_mpool_base_module_t *mpool; } mca_common_sm_module_t; OBJ_CLASS_DECLARATION(mca_common_sm_module_t); @@ -126,10 +128,7 @@ mca_common_sm_module_unlink(mca_common_sm_module_t *modp); /** * callback from the sm mpool */ -OPAL_DECLSPEC extern void * -mca_common_sm_seg_alloc(struct mca_mpool_base_module_t *mpool, - size_t *size, - mca_mpool_base_registration_t **registration); +OPAL_DECLSPEC extern void *mca_common_sm_seg_alloc (void *ctx, size_t *size); /** * This function will release all local resources attached to the @@ -150,6 +149,7 @@ mca_common_sm_fini(mca_common_sm_module_t *mca_common_sm_module); */ OPAL_DECLSPEC extern mca_common_sm_module_t *mca_common_sm_module; + END_C_DECLS #endif /* _COMMON_SM_H_ */ diff --git a/opal/mca/common/sm/common_sm_mpool.c b/opal/mca/common/sm/common_sm_mpool.c new file mode 100644 index 00000000000..545c6ad8a6b --- /dev/null +++ b/opal/mca/common/sm/common_sm_mpool.c @@ -0,0 +1,187 @@ +/* + * Copyright (c) 2004-2011 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2005 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2009-2017 Cisco Systems, Inc. All rights reserved + * Copyright (c) 2011-2015 Los Alamos National Security, LLC. + * All rights reserved. + * Copyright (c) 2011-2014 NVIDIA Corporation. All rights reserved. + * Copyright (c) 2015 Intel, Inc. All rights reserved + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include "opal_config.h" +#include +#include "common_sm_mpool.h" +#include "opal/mca/common/sm/common_sm.h" +#include "opal/mca/common/cuda/common_cuda.h" +#include "opal/mca/allocator/base/base.h" +#ifdef HAVE_UNISTD_H +#include +#endif +#include "opal/mca/hwloc/base/base.h" + +static void sm_module_finalize(mca_mpool_base_module_t* module); + +/* + * Returns base address of shared memory mapping. + */ +static void *mca_common_sm_mpool_base (mca_mpool_base_module_t *mpool); + +/** + * Allocate block of shared memory. + */ +static void *mca_common_sm_mpool_alloc (mca_mpool_base_module_t *mpool, + size_t size, size_t align, + uint32_t flags); + +/** + * free function typedef + */ +static void mca_common_sm_mpool_free(mca_mpool_base_module_t *mpool, + void *addr); + +/* + * Initializes the mpool module. + */ +static void mca_common_sm_mpool_module_init(mca_common_sm_mpool_module_t* mpool) +{ + mpool->super.mpool_base = mca_common_sm_mpool_base; + mpool->super.mpool_alloc = mca_common_sm_mpool_alloc; + mpool->super.mpool_free = mca_common_sm_mpool_free; + mpool->super.mpool_finalize = sm_module_finalize; + mpool->super.mpool_ft_event = NULL; + mpool->super.flags = 0; + + mpool->sm_size = 0; + mpool->sm_allocator = NULL; + mpool->sm_mmap = NULL; + mpool->sm_common_module = NULL; + mpool->mem_node = -1; +} + +mca_mpool_base_module_t *common_sm_mpool_create (mca_common_sm_mpool_resources_t *resources) +{ + mca_common_sm_mpool_module_t *mpool_module; + mca_allocator_base_component_t* allocator_component; + + /* Make a new mpool module */ + mpool_module = (mca_common_sm_mpool_module_t *) malloc (sizeof (*mpool_module)); + mca_common_sm_mpool_module_init(mpool_module); + + /* set sm_size */ + mpool_module->sm_size = resources->size; + + allocator_component = mca_allocator_component_lookup(resources->allocator); + + /* if specified allocator cannot be loaded - look for an alternative */ + if (NULL == allocator_component) { + if (opal_list_get_size(&opal_allocator_base_framework.framework_components) == 0) { + mca_base_component_list_item_t *item = + (mca_base_component_list_item_t *) + opal_list_get_first(&opal_allocator_base_framework.framework_components); + allocator_component = + (mca_allocator_base_component_t *)item->cli_component; + opal_output( + 0, "mca_common_sm_mpool_init: " + "unable to locate allocator: %s - using %s\n", + resources->allocator, + allocator_component->allocator_version.mca_component_name); + } else { + opal_output(0, "mca_common_sm_mpool_init: " + "unable to locate allocator: %s\n", + resources->allocator); + free(mpool_module); + return NULL; + } + } + + mpool_module->mem_node = resources->mem_node; + + if (NULL == (mpool_module->sm_common_module = + mca_common_sm_module_attach(&resources->bs_meta_buf, + sizeof(mca_common_sm_module_t), 8))) { + opal_output(0, "mca_common_sm_mpool_init: " + "unable to create shared memory mapping (%s)", + resources->bs_meta_buf.seg_name); + free(mpool_module); + return NULL; + } + + /* setup allocator */ + mpool_module->sm_allocator = + allocator_component->allocator_init (true, mca_common_sm_seg_alloc, + NULL, mpool_module->sm_common_module); + if (NULL == mpool_module->sm_allocator) { + opal_output(0, "mca_common_sm_mpool_init: unable to initialize allocator"); + free(mpool_module); + return NULL; + } + + return &mpool_module->super; +} + + +/* + * base address of shared memory mapping + */ +static void *mca_common_sm_mpool_base(mca_mpool_base_module_t *mpool) +{ + mca_common_sm_mpool_module_t *sm_mpool = (mca_common_sm_mpool_module_t *) mpool; + return (NULL != sm_mpool->sm_common_module) ? + sm_mpool->sm_common_module->module_seg_addr : NULL; +} + +/** + * allocate function + */ +static void *mca_common_sm_mpool_alloc (mca_mpool_base_module_t* mpool, + size_t size, size_t align, uint32_t flags) +{ + mca_common_sm_mpool_module_t* mpool_sm = (mca_common_sm_mpool_module_t*)mpool; + opal_hwloc_base_memory_segment_t mseg; + + mseg.mbs_start_addr = + mpool_sm->sm_allocator->alc_alloc(mpool_sm->sm_allocator, size, align); + + if (mpool_sm->mem_node >= 0) { + mseg.mbs_len = size; + opal_hwloc_base_membind(&mseg, 1, mpool_sm->mem_node); + } + + return mseg.mbs_start_addr; +} + +/** + * free function + */ +void mca_common_sm_mpool_free(mca_mpool_base_module_t *mpool, void *addr) +{ + mca_common_sm_mpool_module_t* mpool_sm = (mca_common_sm_mpool_module_t*)mpool; + mpool_sm->sm_allocator->alc_free(mpool_sm->sm_allocator, addr); +} + +static void sm_module_finalize(mca_mpool_base_module_t* module) +{ + mca_common_sm_mpool_module_t *sm_module = (mca_common_sm_mpool_module_t*) module; + + if (NULL != sm_module->sm_common_module) { + if (OPAL_SUCCESS == + mca_common_sm_fini(sm_module->sm_common_module)) { + unlink(sm_module->sm_common_module->shmem_ds.seg_name); + } + OBJ_RELEASE(sm_module->sm_common_module); + sm_module->sm_common_module = NULL; + } +} diff --git a/opal/mca/common/sm/common_sm_mpool.h b/opal/mca/common/sm/common_sm_mpool.h new file mode 100644 index 00000000000..8d70bd51b19 --- /dev/null +++ b/opal/mca/common/sm/common_sm_mpool.h @@ -0,0 +1,62 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ +/* + * Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2006 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2007 Sun Microsystems, Inc. All rights reserved. + * Copyright (c) 2009 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2010-2015 Los Alamos National Security, LLC. + * All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ +/** + * @file + */ +#ifndef MCA_COMMON_SM_MPOOL_H +#define MCA_COMMON_SM_MPOOL_H + +#include "opal_config.h" + +#include "opal/mca/event/event.h" +#include "opal/mca/shmem/shmem.h" + +#include "opal/mca/mpool/mpool.h" +#include "opal/mca/allocator/allocator.h" + +BEGIN_C_DECLS + +struct mca_common_sm_module_t; + +typedef struct mca_common_sm_mpool_resources_t { + size_t size; + int32_t mem_node; + const char *allocator; + /* backing store metadata */ + opal_shmem_ds_t bs_meta_buf; +} mca_common_sm_mpool_resources_t; + +typedef struct mca_common_sm_mpool_module_t { + mca_mpool_base_module_t super; + long sm_size; + mca_allocator_base_module_t *sm_allocator; + struct mca_common_sm_mpool_mmap_t *sm_mmap; + struct mca_common_sm_module_t *sm_common_module; + int32_t mem_node; +} mca_common_sm_mpool_module_t; + +OPAL_DECLSPEC mca_mpool_base_module_t *common_sm_mpool_create (mca_common_sm_mpool_resources_t *); + +END_C_DECLS + +#endif diff --git a/opal/mca/common/ugni/Makefile.am b/opal/mca/common/ugni/Makefile.am index 42ba0f864a5..ac7482c345c 100644 --- a/opal/mca/common/ugni/Makefile.am +++ b/opal/mca/common/ugni/Makefile.am @@ -3,9 +3,9 @@ # Copyright (c) 2011-2013 Los Alamos National Security, LLC. All rights # reserved. # Copyright (c) 2011 UT-Battelle, LLC. All rights reserved. -# +# # Additional copyrights may follow -# +# # $HEADER$ # @@ -35,7 +35,7 @@ lib@OPAL_LIB_PREFIX@mca_common_ugni_la_SOURCES = $(headers) $(ugni_SOURCES) nodist_lib@OPAL_LIB_PREFIX@mca_common_ugni_la_SOURCES = $(ugni_nodist_SOURCES) lib@OPAL_LIB_PREFIX@mca_common_ugni_la_LIBADD = $(common_ugni_LIBS) lib@OPAL_LIB_PREFIX@mca_common_ugni_la_LDFLAGS = \ - -version-info $(libmca_common_ugni_so_version) \ + -version-info $(libmca_opal_common_ugni_so_version) \ $(common_ugni_LDFLAGS) lib@OPAL_LIB_PREFIX@mca_common_ugni_noinst_la_SOURCES = \ diff --git a/opal/mca/common/ugni/common_ugni.c b/opal/mca/common/ugni/common_ugni.c index 4cd86993c26..2877d046d0e 100644 --- a/opal/mca/common/ugni/common_ugni.c +++ b/opal/mca/common/ugni/common_ugni.c @@ -3,7 +3,7 @@ * Copyright (c) 2011-2013 Los Alamos National Security, LLC. All rights * reserved. * Copyright (c) 2011 UT-Battelle, LLC. All rights reserved. - * Copyright (c) 2014 Intel, Inc. All rights reserved. + * Copyright (c) 2014-2015 Intel, Inc. All rights reserved. * Copyright (c) 2014 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ @@ -193,7 +193,7 @@ static int opal_common_ugni_send_modex (int my_cdm_id) * new ranks started on the same nodes as the spawnee ranks, etc. */ - OPAL_MODEX_SEND(rc, PMIX_ASYNC_RDY, PMIX_GLOBAL, + OPAL_MODEX_SEND(rc, OPAL_PMIX_GLOBAL, &opal_common_ugni_component, modex_msg, total_msg_size); @@ -258,7 +258,8 @@ int opal_common_ugni_init (void) /* Create a communication domain */ modes = GNI_CDM_MODE_FORK_FULLCOPY | GNI_CDM_MODE_CACHED_AMO_ENABLED | - GNI_CDM_MODE_ERR_NO_KILL | GNI_CDM_MODE_FAST_DATAGRAM_POLL; + GNI_CDM_MODE_ERR_NO_KILL | GNI_CDM_MODE_FAST_DATAGRAM_POLL | + GNI_CDM_MODE_FMA_SHARED; /* collect uGNI information */ rc = get_ptag(&opal_common_ugni_module.ptag); diff --git a/opal/mca/common/ugni/common_ugni.h b/opal/mca/common/ugni/common_ugni.h index 50a2dbdd2bd..5f39fd18513 100644 --- a/opal/mca/common/ugni/common_ugni.h +++ b/opal/mca/common/ugni/common_ugni.h @@ -39,7 +39,7 @@ typedef struct opal_common_ugni_modex_t opal_common_ugni_modex_t; struct opal_common_ugni_device_t { opal_object_t super; - + gni_nic_handle_t dev_handle; /* Minor number of the Gemini NIC */ diff --git a/opal/mca/common/ugni/common_ugni_ep.c b/opal/mca/common/ugni/common_ugni_ep.c index e1b64460204..dadf39ac526 100644 --- a/opal/mca/common/ugni/common_ugni_ep.c +++ b/opal/mca/common/ugni/common_ugni_ep.c @@ -3,7 +3,7 @@ * Copyright (c) 2011-2013 Los Alamos National Security, LLC. All rights * reserved. * Copyright (c) 2011 UT-Battelle, LLC. All rights reserved. - * Copyright (c) 2014 Intel, Inc. All rights reserved. + * Copyright (c) 2014-2015 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -34,7 +34,7 @@ int opal_common_ugni_endpoint_for_proc (opal_common_ugni_device_t *dev, opal_pro /* Receive the modex */ OPAL_MODEX_RECV(rc, &opal_common_ugni_component, - peer_proc, (void **)&modex, &msg_size); + &peer_proc->proc_name, (void **)&modex, &msg_size); if (OPAL_UNLIKELY(OPAL_SUCCESS != rc)) { OPAL_OUTPUT((-1, "btl/ugni error receiving modex")); return rc; diff --git a/opal/mca/common/ugni/common_ugni_ep.h b/opal/mca/common/ugni/common_ugni_ep.h index e6de548fcf2..99f8d07ced9 100644 --- a/opal/mca/common/ugni/common_ugni_ep.h +++ b/opal/mca/common/ugni/common_ugni_ep.h @@ -25,7 +25,7 @@ typedef struct opal_common_ugni_endpoint_t opal_common_ugni_endpoint_t; OBJ_CLASS_DECLARATION(opal_common_ugni_endpoint_t); -/* +/* * Get (and retain) a reference to an endpoint to peer_proc. This endpoint * needs to be returned with opal_common_ugni_endpoint_return. * diff --git a/opal/mca/common/verbs/Makefile.am b/opal/mca/common/verbs/Makefile.am index b03abce0160..8cd08eb2ac0 100644 --- a/opal/mca/common/verbs/Makefile.am +++ b/opal/mca/common/verbs/Makefile.am @@ -3,9 +3,9 @@ # Copyright (c) 2009-2012 Oak Ridge National Laboratory. All rights reserved. # Copyright (c) 2012-2015 Cisco Systems, Inc. All rights reserved. # $COPYRIGHT$ -# +# # Additional copyrights may follow -# +# # $HEADER$ # @@ -17,7 +17,6 @@ headers = \ sources = \ common_verbs_basics.c \ common_verbs_devlist.c \ - common_verbs_fake.c \ common_verbs_find_max_inline.c \ common_verbs_find_ports.c \ common_verbs_mca.c \ @@ -50,7 +49,7 @@ endif lib@OPAL_LIB_PREFIX@mca_common_verbs_la_SOURCES = $(headers) $(sources) lib@OPAL_LIB_PREFIX@mca_common_verbs_la_CPPFLAGS = $(common_verbs_CPPFLAGS) lib@OPAL_LIB_PREFIX@mca_common_verbs_la_LDFLAGS = \ - -version-info $(libmca_common_verbs_so_version) \ + -version-info $(libmca_opal_common_verbs_so_version) \ $(common_verbs_LDFLAGS) lib@OPAL_LIB_PREFIX@mca_common_verbs_la_LIBADD = $(common_verbs_LIBS) lib@OPAL_LIB_PREFIX@mca_common_verbs_noinst_la_SOURCES = $(headers) $(sources) diff --git a/opal/mca/common/verbs/common_verbs.h b/opal/mca/common/verbs/common_verbs.h index 795f89a6637..36ce3d85d1f 100644 --- a/opal/mca/common/verbs/common_verbs.h +++ b/opal/mca/common/verbs/common_verbs.h @@ -7,9 +7,9 @@ * of Tennessee Research Foundation. All rights * reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -92,7 +92,7 @@ enum { }; /** - * Find a list of ibv_device ports that match a specific criteria. + * Find a list of ibv_device ports that match a specific criteria. * * @param if_include (IN): comma-delimited list of interfaces to use * @param if_exclude (IN): comma-delimited list of interfaces to NOT use @@ -122,8 +122,8 @@ enum { * port_items referring to it have been freed). */ OPAL_DECLSPEC opal_list_t * -opal_common_verbs_find_ports(const char *if_include, - const char *if_exclude, +opal_common_verbs_find_ports(const char *if_include, + const char *if_exclude, int flags, int verbose_stream); @@ -163,7 +163,7 @@ opal_common_verbs_find_max_inline(struct ibv_device *device, * Flags can be the logical OR of OPAL_COMMON_VERBS_FLAGS_RC and/or * OPAL_COMMON_VERBS_FLAGS_UD. All other values are ignored. */ -OPAL_DECLSPEC int opal_common_verbs_qp_test(struct ibv_context *device_context, +OPAL_DECLSPEC int opal_common_verbs_qp_test(struct ibv_context *device_context, int flags); /* * ibv_fork_init testing - if fork support is requested then ibv_fork_init @@ -180,11 +180,6 @@ OPAL_DECLSPEC int opal_common_verbs_qp_test(struct ibv_context *device_context, */ int opal_common_verbs_fork_test(void); -/* - * Register fake verbs drivers - */ -void opal_common_verbs_register_fake_drivers(void); - END_C_DECLS #endif diff --git a/opal/mca/common/verbs/common_verbs_basics.c b/opal/mca/common/verbs/common_verbs_basics.c index e79a16d6a8c..bd23f08ea3d 100644 --- a/opal/mca/common/verbs/common_verbs_basics.c +++ b/opal/mca/common/verbs/common_verbs_basics.c @@ -1,10 +1,10 @@ /* - * Copyright (c) 2012-2015 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2012-2016 Cisco Systems, Inc. All rights reserved. * * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -21,6 +21,10 @@ #include #endif +#if OPAL_COMMON_VERBS_USNIC_HAPPY +#include "opal/mca/common/verbs_usnic/common_verbs_usnic.h" +#endif + /* This is crummy, but doesn't work on all platforms with all compilers. Specifically, trying to include it on RHEL4U3 with the PGI 32 bit compiler will cause problems because @@ -89,10 +93,14 @@ int opal_common_verbs_fork_test(void) } #endif - /* Now rgister any necessary fake libibverbs drivers. We +#if OPAL_COMMON_VERBS_USNIC_HAPPY + /* Now register any necessary fake libibverbs drivers. We piggyback loading these fake drivers on the fork test because - they must be loaded before ibv_get_device_list() is invoked. */ - opal_common_verbs_register_fake_drivers(); + they must be loaded before ibv_get_device_list() is invoked. + Note that this routine is in a different common component (see + comments over there for an explanation why). */ + opal_common_verbs_usnic_register_fake_drivers(); +#endif return ret; } diff --git a/opal/mca/common/verbs/common_verbs_fake.c b/opal/mca/common/verbs/common_verbs_fake.c deleted file mode 100644 index 1b9f65102d4..00000000000 --- a/opal/mca/common/verbs/common_verbs_fake.c +++ /dev/null @@ -1,125 +0,0 @@ -/* - * Copyright (c) 2015 Cisco Systems, Inc. All rights reserved. - * - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -/* - * The code in this file prevents spurious libibverbs warnings on - * stderr about devices that it doesn't recognize. - * - * Specifically, Cisco usNIC devices are exposed through the Linux - * InfiniBand kernel interface (i.e., they show up in - * /sys/class/infiniband). However, the userspace side of these - * drivers is not exposed through libibverbs (i.e., there is no - * libibverbs provider/plugin for usNIC). Therefore, when - * ibv_get_device_list() is invoked, libibverbs cannot find a plugin - * for usnic devices. This causes libibverbs to emit a spurious - * warning message on stderr. - * - * To avoid these extra stderr warnings, we insert a fake usnic verbs - * libibverbs provider that safely squelches these warnings. - * - * More specifically: the userspace side of usNIC is exposed through - * libfabric; we don't need libibverbs warnings about not being able - * to find a usnic driver. - */ - -#include "opal_config.h" - -#include -#include -#include -#include -#ifdef HAVE_INFINIBAND_DRIVER_H -#include -#endif - -#include "common_verbs.h" - -/***********************************************************************/ - -#define PCI_VENDOR_ID_CISCO (0x1137) - -static struct ibv_context *fake_alloc_context(struct ibv_device *ibdev, - int cmd_fd) -{ - /* Nothing to do here */ - return NULL; -} - -static void fake_free_context(struct ibv_context *ibctx) -{ - /* Nothing to do here */ -} - -/* Put just enough in here to convince libibverbs that this is a valid - device, and a little extra just in case someone looks at this - struct in a debugger. */ -static struct ibv_device fake_dev = { - .ops = { - .alloc_context = fake_alloc_context, - .free_context = fake_free_context - }, - .name = "fake ibv_device inserted by Open MPI for non-verbs devices" -}; - -static struct ibv_device *fake_driver_init(const char *uverbs_sys_path, - int abi_version) -{ - char value[8]; - int vendor; - - /* This function should only be invoked for - /sys/class/infiniband/usnic_X devices, but double check just to - be absolutely sure: read the vendor ID and ensure that it is - Cisco. */ - if (ibv_read_sysfs_file(uverbs_sys_path, "device/vendor", - value, sizeof(value)) < 0) { - return NULL; - } - sscanf(value, "%i", &vendor); - - if (vendor == PCI_VENDOR_ID_CISCO) { - return &fake_dev; - } - - /* We didn't find a device that we want to support */ - return NULL; -} - - -void opal_common_verbs_register_fake_drivers(void) -{ - /* No need to do this more than once */ - static bool already_done = false; - if (already_done) { - return; - } - already_done = true; - - /* If there are any usnic devices, then register a fake driver */ - DIR *class_dir; - class_dir = opendir("/sys/class/infiniband"); - if (NULL == class_dir) { - return; - } - - bool found = false; - struct dirent *dent; - while ((dent = readdir(class_dir)) != NULL) { - if (strncmp(dent->d_name, "usnic_", 6) == 0) { - found = true; - break; - } - } - closedir(class_dir); - - if (found) { - ibv_register_driver("usnic_verbs", fake_driver_init); - } -} diff --git a/opal/mca/common/verbs/common_verbs_find_max_inline.c b/opal/mca/common/verbs/common_verbs_find_max_inline.c index 576dd453d1d..4ea1c7a66bf 100644 --- a/opal/mca/common/verbs/common_verbs_find_max_inline.c +++ b/opal/mca/common/verbs/common_verbs_find_max_inline.c @@ -57,7 +57,7 @@ int opal_common_verbs_find_max_inline(struct ibv_device *device, struct ibv_cq *cq; struct ibv_qp_init_attr init_attr; uint32_t max_inline_data; - + *max_inline_arg = 0; /* Make a dummy CQ */ @@ -74,7 +74,7 @@ int opal_common_verbs_find_max_inline(struct ibv_device *device, ibv_get_device_name(device)); return OPAL_ERR_NOT_AVAILABLE; } - + /* Setup the QP attributes */ memset(&init_attr, 0, sizeof(init_attr)); init_attr.qp_type = IBV_QPT_RC; @@ -84,7 +84,7 @@ int opal_common_verbs_find_max_inline(struct ibv_device *device, init_attr.cap.max_send_sge = 1; init_attr.cap.max_recv_sge = 1; init_attr.cap.max_recv_wr = 1; - + /* Loop over max_inline_data values; just check powers of 2 -- that's good enough */ init_attr.cap.max_inline_data = max_inline_data = 1 << 20; @@ -100,7 +100,7 @@ int opal_common_verbs_find_max_inline(struct ibv_device *device, max_inline_data >>= 1; init_attr.cap.max_inline_data = max_inline_data; } - + /* Destroy the temp CQ */ ibv_destroy_cq(cq); diff --git a/opal/mca/common/verbs/common_verbs_find_ports.c b/opal/mca/common/verbs/common_verbs_find_ports.c index fc2575c54f7..baf6698fc8e 100644 --- a/opal/mca/common/verbs/common_verbs_find_ports.c +++ b/opal/mca/common/verbs/common_verbs_find_ports.c @@ -105,7 +105,7 @@ OBJ_CLASS_INSTANCE(opal_common_verbs_port_item_t, * Given a list of include or exclude items (never both), determine * whether we want the current port or not. */ -static bool want_this_port(char **include_list, char **exclude_list, +static bool want_this_port(char **include_list, char **exclude_list, opal_common_verbs_device_item_t *di, int port) { int i; @@ -134,7 +134,7 @@ static bool want_this_port(char **include_list, char **exclude_list, /* Didn't find it. So we don't want it. */ return false; - } + } /* Search the exclude list */ else { @@ -166,7 +166,7 @@ static const char *link_layer_to_str(int link_type) switch(link_type) { case IBV_LINK_LAYER_INFINIBAND: return "IB"; case IBV_LINK_LAYER_ETHERNET: return "IWARP"; - case IBV_LINK_LAYER_UNSPECIFIED: + case IBV_LINK_LAYER_UNSPECIFIED: default: return "unspecified"; } } @@ -212,8 +212,8 @@ static void check_sanity(char ***if_sanity_list, const char *dev_name, int port) /* * Find a list of ibv_ports matching a set of criteria. */ -opal_list_t *opal_common_verbs_find_ports(const char *if_include, - const char *if_exclude, +opal_list_t *opal_common_verbs_find_ports(const char *if_include, + const char *if_exclude, int flags, int stream) { @@ -255,12 +255,12 @@ opal_list_t *opal_common_verbs_find_ports(const char *if_include, } if (NULL != if_include) { - opal_output_verbose(5, stream, "finding verbs interfaces, including %s", + opal_output_verbose(5, stream, "finding verbs interfaces, including %s", if_include); if_include_list = opal_argv_split(if_include, ','); if_sanity_list = opal_argv_copy(if_include_list); } else if (NULL != if_exclude) { - opal_output_verbose(5, stream, "finding verbs interfaces, excluding %s", + opal_output_verbose(5, stream, "finding verbs interfaces, excluding %s", if_exclude); if_exclude_list = opal_argv_split(if_exclude, ','); if_sanity_list = opal_argv_copy(if_exclude_list); @@ -415,7 +415,7 @@ opal_list_t *opal_common_verbs_find_ports(const char *if_include, OPAL_COMMON_VERBS_FLAGS_LINK_LAYER_ETHERNET)) == 0) { /* If they specified neither link layer, then we want this port */ want = true; - } + } #if HAVE_DECL_IBV_LINK_LAYER_ETHERNET else if (flags & OPAL_COMMON_VERBS_FLAGS_LINK_LAYER_IB) { if (IBV_LINK_LAYER_INFINIBAND == port_attr.link_layer) { @@ -445,7 +445,7 @@ opal_list_t *opal_common_verbs_find_ports(const char *if_include, if (NULL == pi) { goto err_free_port_list; } - pi->device = di; + pi->device = di; pi->port_num = j; pi->port_attr = port_attr; OBJ_RETAIN(di); diff --git a/opal/mca/common/verbs/common_verbs_qp_type.c b/opal/mca/common/verbs/common_verbs_qp_type.c index 2e5132e15ac..9f5b6a70871 100644 --- a/opal/mca/common/verbs/common_verbs_qp_type.c +++ b/opal/mca/common/verbs/common_verbs_qp_type.c @@ -12,9 +12,7 @@ #include "opal/constants.h" #include -#ifdef HAVE_STRING_H #include -#endif /* HAVE_STRING_H */ #include #include "common_verbs.h" @@ -42,7 +40,7 @@ static bool make_qp(struct ibv_pd *pd, struct ibv_cq *cq, enum ibv_qp_type type) qpia.cap.max_inline_data = 0; qpia.qp_type = type; qpia.sq_sig_all = 0; - + qp = ibv_create_qp(pd, &qpia); if (NULL != qp) { ibv_destroy_qp(qp); @@ -59,9 +57,9 @@ int opal_common_verbs_qp_test(struct ibv_context *device_context, int flags) struct ibv_cq *cq = NULL; /* Bozo check */ - if (NULL == device_context || + if (NULL == device_context || (0 == (flags & (OPAL_COMMON_VERBS_FLAGS_RC | OPAL_COMMON_VERBS_FLAGS_UD)))) { - return OPAL_ERR_BAD_PARAM; + return OPAL_ERR_BAD_PARAM; } /* Try to make both the PD and CQ */ diff --git a/opal/mca/common/verbs/configure.m4 b/opal/mca/common/verbs/configure.m4 index b70332a81fc..1b70b0d4ca9 100644 --- a/opal/mca/common/verbs/configure.m4 +++ b/opal/mca/common/verbs/configure.m4 @@ -20,7 +20,7 @@ # $HEADER$ # -# MCA_opal_common_verbs_CONFIG([action-if-can-compile], +# MCA_opal_common_verbs_CONFIG([action-if-can-compile], # [action-if-cant-compile]) # ------------------------------------------------ AC_DEFUN([MCA_opal_common_verbs_CONFIG],[ diff --git a/opal/mca/common/verbs/help-opal-common-verbs.txt b/opal/mca/common/verbs/help-opal-common-verbs.txt index e8c3ec257f6..854b182e2fe 100644 --- a/opal/mca/common/verbs/help-opal-common-verbs.txt +++ b/opal/mca/common/verbs/help-opal-common-verbs.txt @@ -2,9 +2,9 @@ # Copyright (c) 2012-2014 Cisco Systems, Inc. All rights reserved. # # $COPYRIGHT$ -# +# # Additional copyrights may follow -# +# # $HEADER$ # [ibv_open_device fail] diff --git a/opal/mca/common/verbs_usnic/Makefile.am b/opal/mca/common/verbs_usnic/Makefile.am new file mode 100644 index 00000000000..182628d0c7a --- /dev/null +++ b/opal/mca/common/verbs_usnic/Makefile.am @@ -0,0 +1,40 @@ +# +# Copyright (c) 2009-2012 Mellanox Technologies. All rights reserved. +# Copyright (c) 2009-2012 Oak Ridge National Laboratory. All rights reserved. +# Copyright (c) 2012-2015 Cisco Systems, Inc. All rights reserved. +# $COPYRIGHT$ +# +# Additional copyrights may follow +# +# $HEADER$ +# + +headers = common_verbs_usnic.h + +sources = common_verbs_usnic_fake.c + +# This component is always linked statically. It has code that is +# registered as a driver for libibverbs. There is no corresponding +# *un*register API in libibverbs, so this code can never be dlclosed. +# And therefore it must be in the libopen-pal library, not a DSO or +# dependent library. + +noinst_LTLIBRARIES = lib@OPAL_LIB_PREFIX@mca_common_verbs_usnic.la + +lib@OPAL_LIB_PREFIX@mca_common_verbs_usnic_la_SOURCES = \ + $(headers) $(sources) +lib@OPAL_LIB_PREFIX@mca_common_verbs_usnic_la_CPPFLAGS = \ + $(common_verbs_usnic_CPPFLAGS) +lib@OPAL_LIB_PREFIX@mca_common_verbs_usnic_la_LDFLAGS = \ + $(common_verbs_usnic_LDFLAGS) +lib@OPAL_LIB_PREFIX@mca_common_verbs_usnic_la_LIBADD = \ + $(common_verbs_usnic_LIBS) + +# Conditionally install the header files + +if WANT_INSTALL_HEADERS +opaldir = $(opalincludedir)/opal/mca/common/verbs_usnic +opal_HEADERS = $(headers) +else +opaldir = $(includedir) +endif diff --git a/opal/mca/common/verbs_usnic/common_verbs_usnic.h b/opal/mca/common/verbs_usnic/common_verbs_usnic.h new file mode 100644 index 00000000000..da2be73d63b --- /dev/null +++ b/opal/mca/common/verbs_usnic/common_verbs_usnic.h @@ -0,0 +1,27 @@ +/* + * Copyright (c) 2015 Cisco Systems, Inc. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#ifndef _COMMON_VERBS_USNIC_H_ +#define _COMMON_VERBS_USNIC_H_ + +#include "opal_config.h" + +#include +#include + +BEGIN_C_DECLS + +/* + * Register fake verbs drivers + */ +void opal_common_verbs_usnic_register_fake_drivers(void); + +END_C_DECLS + +#endif diff --git a/opal/mca/common/verbs_usnic/common_verbs_usnic_fake.c b/opal/mca/common/verbs_usnic/common_verbs_usnic_fake.c new file mode 100644 index 00000000000..c21a07c3d2c --- /dev/null +++ b/opal/mca/common/verbs_usnic/common_verbs_usnic_fake.c @@ -0,0 +1,133 @@ +/* + * Copyright (c) 2015 Cisco Systems, Inc. All rights reserved. + * + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +/* + * The code in this file prevents spurious libibverbs warnings on + * stderr about devices that it doesn't recognize. + * + * Specifically, Cisco usNIC devices are exposed through the Linux + * InfiniBand kernel interface (i.e., they show up in + * /sys/class/infiniband). However, the userspace side of these + * drivers is not exposed through libibverbs (i.e., there is no + * libibverbs provider/plugin for usNIC). Therefore, when + * ibv_get_device_list() is invoked, libibverbs cannot find a plugin + * for usnic devices. This causes libibverbs to emit a spurious + * warning message on stderr. + * + * To avoid these extra stderr warnings, we insert a fake usnic verbs + * libibverbs provider that safely squelches these warnings. + * + * More specifically: the userspace side of usNIC is exposed through + * libfabric; we don't need libibverbs warnings about not being able + * to find a usnic driver. + * + * Note: this code is statically linked into libopen-pal. It is + * registered via ibv_register_driver(), and there is no corresponding + * *un*register IBV API. Hence, we cannot allow this code to be + * dlclosed (e.g., if it is a DSO or a dependent common library) -- it + * must be in libopen-pal itself, which will stay resident in the MPI + * application. + */ + +#include "opal_config.h" + +#include +#include +#include +#include +#include +#ifdef HAVE_INFINIBAND_DRIVER_H +#include +#endif + +#include "common_verbs_usnic.h" + +/***********************************************************************/ + +#define PCI_VENDOR_ID_CISCO (0x1137) + +static struct ibv_context *fake_alloc_context(struct ibv_device *ibdev, + int cmd_fd) +{ + /* Nothing to do here */ + return NULL; +} + +static void fake_free_context(struct ibv_context *ibctx) +{ + /* Nothing to do here */ +} + +/* Put just enough in here to convince libibverbs that this is a valid + device, and a little extra just in case someone looks at this + struct in a debugger. */ +static struct ibv_device fake_dev = { + .ops = { + .alloc_context = fake_alloc_context, + .free_context = fake_free_context + }, + .name = "fake ibv_device inserted by Open MPI for non-verbs devices" +}; + +static struct ibv_device *fake_driver_init(const char *uverbs_sys_path, + int abi_version) +{ + char value[8]; + int vendor; + + /* This function should only be invoked for + /sys/class/infiniband/usnic_X devices, but double check just to + be absolutely sure: read the vendor ID and ensure that it is + Cisco. */ + if (ibv_read_sysfs_file(uverbs_sys_path, "device/vendor", + value, sizeof(value)) < 0) { + return NULL; + } + sscanf(value, "%i", &vendor); + + if (vendor == PCI_VENDOR_ID_CISCO) { + return &fake_dev; + } + + /* We didn't find a device that we want to support */ + return NULL; +} + + +void opal_common_verbs_usnic_register_fake_drivers(void) +{ + /* No need to do this more than once */ + static bool already_done = false; + if (already_done) { + return; + } + already_done = true; + + /* If there are any usnic devices, then register a fake driver */ + DIR *class_dir; + class_dir = opendir("/sys/class/infiniband"); + if (NULL == class_dir) { + return; + } + + bool found = false; + struct dirent *dent; + while ((dent = readdir(class_dir)) != NULL) { + if (strncmp(dent->d_name, "usnic_", 6) == 0) { + found = true; + break; + } + } + closedir(class_dir); + + if (found) { + ibv_register_driver("usnic_verbs", fake_driver_init); + } +} diff --git a/opal/mca/common/verbs_usnic/configure.m4 b/opal/mca/common/verbs_usnic/configure.m4 new file mode 100644 index 00000000000..68fed9404b8 --- /dev/null +++ b/opal/mca/common/verbs_usnic/configure.m4 @@ -0,0 +1,84 @@ +# -*- shell-script -*- +# +# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana +# University Research and Technology +# Corporation. All rights reserved. +# Copyright (c) 2004-2005 The University of Tennessee and The University +# of Tennessee Research Foundation. All rights +# reserved. +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# University of Stuttgart. All rights reserved. +# Copyright (c) 2004-2005 The Regents of the University of California. +# All rights reserved. +# Copyright (c) 2007-2016 Cisco Systems, Inc. All rights reserved. +# Copyright (c) 2009-2012 Mellanox Technologies. All rights reserved. +# Copyright (c) 2009-2012 Oak Ridge National Laboratory. All rights reserved. +# $COPYRIGHT$ +# +# Additional copyrights may follow +# +# $HEADER$ +# + +# +# This component is a workaround to a bug in libibverbs that prints a +# dire warning that usNIC devices are not supported (of course not -- +# usNIC devices provide functionality through libfabric, not +# libibverbs). This component was written before a better workaround +# was created: a "no op" libibverbs plugin for usNIC devices +# (https://github.com/cisco/libusnic_verbs, and is also available in +# binary form on cisco.com). +# +# Hence, this component no longer builds by default. It's still +# available if a user specifically asks for it (e.g., if they do not +# want to install the "no op" libibverbs plugin), but it's not the +# default. This component also has the side-effect of making +# libopen-pal.so depend on libibverbs.so, which can be annoying for +# packagers (which is another reason it isn't built by default any +# more). +# +# This component must be linked statically into libopen-pal because it +# registers a provider for libibverbs at run time, and there's no +# libibverbs API to *un*register a plugin. Hence, we can't allow this +# code to be dlclosed/removed from the process. Hence: it must be +# compiled statically into libopen-pal. +# +AC_DEFUN([MCA_opal_common_verbs_usnic_COMPILE_MODE], [ + AC_MSG_CHECKING([for MCA component $2:$3 compile mode]) + $4="static" + AC_MSG_RESULT([$$4]) +]) + +# MCA_opal_common_verbs_usnic_CONFIG([action-if-can-compile], +# [action-if-cant-compile]) +# ------------------------------------------------ +AC_DEFUN([MCA_opal_common_verbs_usnic_CONFIG],[ + AC_CONFIG_FILES([opal/mca/common/verbs_usnic/Makefile]) + common_verbs_usnic_happy=0 + + AC_ARG_WITH(verbs-usnic, + AC_HELP_STRING([--with-verbs-usnic], + [Add support in Open MPI to defeat a seemingly dire warning message from libibverbs that Cisco usNIC devices are not supported. This support is not compiled by default because you can also avoid this libibverbs bug by installing the libibverbs_usnic "no no" plugin, available from https://github.com/cisco/libusnic_verbs or in binary form from cisco.com])) + + AS_IF([test "$with_verbs_usnic" = "yes"], + [common_verbs_usnic_happy=1]) + + AS_IF([test $common_verbs_usnic_happy -eq 1], + [OPAL_CHECK_OPENFABRICS([common_verbs_usnic], + [common_verbs_usnic_happy=1], + [common_verbs_usnic_happy=0]) + ]) + + AC_DEFINE_UNQUOTED([OPAL_COMMON_VERBS_USNIC_HAPPY], + [$common_verbs_usnic_happy], + [Whether the common/usnic_verbs component is being built or not]) + + AS_IF([test $common_verbs_usnic_happy -eq 1], + [$1], + [$2]) + + # substitute in the things needed to build openib + AC_SUBST([common_verbs_usnic_CPPFLAGS]) + AC_SUBST([common_verbs_usnic_LDFLAGS]) + AC_SUBST([common_verbs_usnic_LIBS]) +])dnl diff --git a/opal/mca/common/verbs_usnic/owner.txt b/opal/mca/common/verbs_usnic/owner.txt new file mode 100644 index 00000000000..4fcebe52449 --- /dev/null +++ b/opal/mca/common/verbs_usnic/owner.txt @@ -0,0 +1,7 @@ +# +# owner/status file +# owner: institution that is responsible for this package +# status: e.g. active, maintenance, unmaintained +# +owner: Cisco +status: maintenance diff --git a/opal/mca/compress/Makefile.am b/opal/mca/compress/Makefile.am deleted file mode 100644 index f276264fa61..00000000000 --- a/opal/mca/compress/Makefile.am +++ /dev/null @@ -1,36 +0,0 @@ -# -# Copyright (c) 2004-2010 The Trustees of Indiana University and Indiana -# University Research and Technology -# Corporation. All rights reserved. -# Copyright (c) 2010-2014 Cisco Systems, Inc. All rights reserved. -# $COPYRIGHT$ -# -# Additional copyrights may follow -# -# $HEADER$ -# - -# main library setup -noinst_LTLIBRARIES = libmca_compress.la -libmca_compress_la_SOURCES = - -# local files -headers = compress.h -libmca_compress_la_SOURCES += $(headers) - -# Ensure that the man pages are rebuilt if the opal_config.h file -# changes; a "good enough" way to know if configure was run again (and -# therefore the release date or version may have changed) -$(nodist_man_MANS): $(top_builddir)/opal/include/opal_config.h - -# Conditionally install the header files -if WANT_INSTALL_HEADERS -opaldir = $(opalincludedir)/$(subdir) -nobase_opal_HEADERS = $(headers) -endif - -include base/Makefile.am - -distclean-local: - rm -f base/static-components.h - rm -f $(nodist_man_MANS) diff --git a/opal/mca/compress/base/Makefile.am b/opal/mca/compress/base/Makefile.am deleted file mode 100644 index 2bdf725a8bc..00000000000 --- a/opal/mca/compress/base/Makefile.am +++ /dev/null @@ -1,20 +0,0 @@ -# -# Copyright (c) 2004-2010 The Trustees of Indiana University and Indiana -# University Research and Technology -# Corporation. All rights reserved. -# Copyright (c) 2014 Cisco Systems, Inc. All rights reserved. -# $COPYRIGHT$ -# -# Additional copyrights may follow -# -# $HEADER$ -# - -headers += \ - base/base.h - -libmca_compress_la_SOURCES += \ - base/compress_base_open.c \ - base/compress_base_close.c \ - base/compress_base_select.c \ - base/compress_base_fns.c diff --git a/opal/mca/compress/base/base.h b/opal/mca/compress/base/base.h deleted file mode 100644 index 238532f1eda..00000000000 --- a/opal/mca/compress/base/base.h +++ /dev/null @@ -1,77 +0,0 @@ -/* - * Copyright (c) 2004-2010 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ -#ifndef OPAL_COMPRESS_BASE_H -#define OPAL_COMPRESS_BASE_H - -#include "opal_config.h" -#include "opal/mca/compress/compress.h" -#include "opal/util/opal_environ.h" -#include "opal/runtime/opal_cr.h" - -#include "opal/mca/base/base.h" - -/* - * Global functions for MCA overall COMPRESS - */ - -#if defined(c_plusplus) || defined(__cplusplus) -extern "C" { -#endif - - /** - * Initialize the COMPRESS MCA framework - * - * @retval OPAL_SUCCESS Upon success - * @retval OPAL_ERROR Upon failures - * - * This function is invoked during opal_init(); - */ - OPAL_DECLSPEC int opal_compress_base_open(mca_base_open_flag_t flags); - - /** - * Select an available component. - * - * @retval OPAL_SUCCESS Upon Success - * @retval OPAL_NOT_FOUND If no component can be selected - * @retval OPAL_ERROR Upon other failure - * - */ - OPAL_DECLSPEC int opal_compress_base_select(void); - - /** - * Finalize the COMPRESS MCA framework - * - * @retval OPAL_SUCCESS Upon success - * @retval OPAL_ERROR Upon failures - * - * This function is invoked during opal_finalize(); - */ - OPAL_DECLSPEC int opal_compress_base_close(void); - - /** - * Globals - */ - OPAL_DECLSPEC extern mca_base_framework_t opal_compress_base_framework; - OPAL_DECLSPEC extern opal_compress_base_component_t opal_compress_base_selected_component; - OPAL_DECLSPEC extern opal_compress_base_module_t opal_compress; - - /** - * - */ - OPAL_DECLSPEC int opal_compress_base_tar_create(char ** target); - OPAL_DECLSPEC int opal_compress_base_tar_extract(char ** target); - -#if defined(c_plusplus) || defined(__cplusplus) -} -#endif - -#endif /* OPAL_COMPRESS_BASE_H */ diff --git a/opal/mca/compress/base/compress_base_close.c b/opal/mca/compress/base/compress_base_close.c deleted file mode 100644 index b8ad5dfc75c..00000000000 --- a/opal/mca/compress/base/compress_base_close.c +++ /dev/null @@ -1,36 +0,0 @@ -/* - * Copyright (c) 2004-2010 The Trustees of Indiana University. - * All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#include "opal_config.h" - -#include -#include "opal/mca/mca.h" -#include "opal/mca/base/base.h" -#include "opal/include/opal/constants.h" -#include "opal/mca/compress/compress.h" -#include "opal/mca/compress/base/base.h" - -int opal_compress_base_close(void) -{ - /* Compression currently only used with C/R */ - if( !opal_cr_is_enabled ) { - opal_output_verbose(10, opal_compress_base_framework.framework_output, - "compress:open: FT is not enabled, skipping!"); - return OPAL_SUCCESS; - } - - /* Call the component's finalize routine */ - if( NULL != opal_compress.finalize ) { - opal_compress.finalize(); - } - - /* Close all available modules that are open */ - return mca_base_framework_components_close (&opal_compress_base_framework, NULL); -} diff --git a/opal/mca/compress/base/compress_base_fns.c b/opal/mca/compress/base/compress_base_fns.c deleted file mode 100644 index f06b32fa0d7..00000000000 --- a/opal/mca/compress/base/compress_base_fns.c +++ /dev/null @@ -1,135 +0,0 @@ -/* - * Copyright (c) 2004-2010 The Trustees of Indiana University. - * All rights reserved. - * Copyright (c) 2015 Research Organization for Information Science - * and Technology (RIST). All rights reserved. - * - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#include "opal_config.h" - -#include -#include -#if HAVE_SYS_TYPES_H -#include -#endif -#if HAVE_UNISTD_H -#include -#endif -#ifdef HAVE_FCNTL_H -#include -#endif /* HAVE_FCNTL_H */ -#ifdef HAVE_SYS_STAT_H -#include -#endif - -#include "opal/mca/mca.h" -#include "opal/mca/base/base.h" -#include "opal/include/opal/constants.h" -#include "opal/util/os_dirpath.h" -#include "opal/util/output.h" -#include "opal/util/argv.h" - -#include "opal/mca/compress/compress.h" -#include "opal/mca/compress/base/base.h" - -/****************** - * Local Function Defs - ******************/ - -/****************** - * Object stuff - ******************/ - -int opal_compress_base_tar_create(char ** target) -{ - int exit_status = OPAL_SUCCESS; - char *tar_target = NULL; - char **argv = NULL; - pid_t child_pid = 0; - int status = 0; - - asprintf(&tar_target, "%s.tar", *target); - - child_pid = fork(); - if( 0 == child_pid ) { /* Child */ - char *cmd; - asprintf(&cmd, "tar -cf %s %s", tar_target, *target); - - argv = opal_argv_split(cmd, ' '); - status = execvp(argv[0], argv); - - opal_output(0, "compress:base: Tar:: Failed to exec child [%s] status = %d\n", cmd, status); - exit(OPAL_ERROR); - } - else if(0 < child_pid) { - waitpid(child_pid, &status, 0); - - if( !WIFEXITED(status) ) { - exit_status = OPAL_ERROR; - goto cleanup; - } - - free(*target); - *target = strdup(tar_target); - } - else { - exit_status = OPAL_ERROR; - goto cleanup; - } - - cleanup: - if( NULL != tar_target ) { - free(tar_target); - } - - return exit_status; -} - -int opal_compress_base_tar_extract(char ** target) -{ - int exit_status = OPAL_SUCCESS; - char **argv = NULL; - pid_t child_pid = 0; - int status = 0; - - child_pid = fork(); - if( 0 == child_pid ) { /* Child */ - char *cmd; - asprintf(&cmd, "tar -xf %s", *target); - - argv = opal_argv_split(cmd, ' '); - status = execvp(argv[0], argv); - - opal_output(0, "compress:base: Tar:: Failed to exec child [%s] status = %d\n", cmd, status); - exit(OPAL_ERROR); - } - else if(0 < child_pid) { - waitpid(child_pid, &status, 0); - - if( !WIFEXITED(status) ) { - exit_status = OPAL_ERROR; - goto cleanup; - } - - /* Strip off the '.tar' */ - (*target)[strlen(*target)-4] = '\0'; - } - else { - exit_status = OPAL_ERROR; - goto cleanup; - } - - cleanup: - - return exit_status; -} - -/****************** - * Local Functions - ******************/ diff --git a/opal/mca/compress/base/compress_base_open.c b/opal/mca/compress/base/compress_base_open.c deleted file mode 100644 index dfa49004099..00000000000 --- a/opal/mca/compress/base/compress_base_open.c +++ /dev/null @@ -1,63 +0,0 @@ -/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ -/* - * Copyright (c) 2004-2010 The Trustees of Indiana University. - * All rights reserved. - * Copyright (c) 2011-2013 Los Alamos National Security, LLC. - * All rights reserved. - * Copyright (c) 2015 Research Organization for Information Science - * and Technology (RIST). All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#include "opal_config.h" - -#include "opal/mca/base/base.h" -#include "opal/mca/compress/base/base.h" - -#include "opal/mca/compress/base/static-components.h" - -/* - * Globals - */ -opal_compress_base_module_t opal_compress = { - NULL, /* init */ - NULL, /* finalize */ - NULL, /* compress */ - NULL, /* compress_nb */ - NULL, /* decompress */ - NULL /* decompress_nb */ -}; - -opal_compress_base_component_t opal_compress_base_selected_component = {{0}}; - -static int opal_compress_base_register(mca_base_register_flag_t flags); - -MCA_BASE_FRAMEWORK_DECLARE(opal, compress, "COMPRESS MCA", - opal_compress_base_register, opal_compress_base_open, - opal_compress_base_close, mca_compress_base_static_components, 0); - -static int opal_compress_base_register(mca_base_register_flag_t flags) -{ - return OPAL_SUCCESS; -} - -/** - * Function for finding and opening either all MCA components, - * or the one that was specifically requested via a MCA parameter. - */ -int opal_compress_base_open(mca_base_open_flag_t flags) -{ - /* Compression currently only used with C/R */ - if(!opal_cr_is_enabled) { - opal_output_verbose(10, opal_compress_base_framework.framework_output, - "compress:open: FT is not enabled, skipping!"); - return OPAL_SUCCESS; - } - - /* Open up all available components */ - return mca_base_framework_components_open(&opal_compress_base_framework, flags); -} diff --git a/opal/mca/compress/base/compress_base_select.c b/opal/mca/compress/base/compress_base_select.c deleted file mode 100644 index 6dc43825bb5..00000000000 --- a/opal/mca/compress/base/compress_base_select.c +++ /dev/null @@ -1,66 +0,0 @@ -/* - * Copyright (c) 2004-2010 The Trustees of Indiana University. - * All rights reserved. - * Copyright (c) 2015 Research Organization for Information Science - * and Technology (RIST). All rights reserved. - * - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#include "opal_config.h" - -#ifdef HAVE_UNISTD_H -#include "unistd.h" -#endif - -#include "opal/include/opal/constants.h" -#include "opal/util/output.h" -#include "opal/mca/mca.h" -#include "opal/mca/base/base.h" -#include "opal/mca/compress/compress.h" -#include "opal/mca/compress/base/base.h" - -int opal_compress_base_select(void) -{ - int ret, exit_status = OPAL_SUCCESS; - opal_compress_base_component_t *best_component = NULL; - opal_compress_base_module_t *best_module = NULL; - - /* Compression currently only used with C/R */ - if( !opal_cr_is_enabled ) { - opal_output_verbose(10, opal_compress_base_framework.framework_output, - "compress:open: FT is not enabled, skipping!"); - return OPAL_SUCCESS; - } - - /* - * Select the best component - */ - if( OPAL_SUCCESS != mca_base_select("compress", opal_compress_base_framework.framework_output, - &opal_compress_base_framework.framework_components, - (mca_base_module_t **) &best_module, - (mca_base_component_t **) &best_component) ) { - /* This will only happen if no component was selected */ - exit_status = OPAL_ERROR; - goto cleanup; - } - - /* Save the winner */ - opal_compress_base_selected_component = *best_component; - - /* Initialize the winner */ - if (NULL != best_module) { - if (OPAL_SUCCESS != (ret = best_module->init()) ) { - exit_status = ret; - goto cleanup; - } - opal_compress = *best_module; - } - - cleanup: - return exit_status; -} diff --git a/opal/mca/compress/base/owner.txt b/opal/mca/compress/base/owner.txt deleted file mode 100644 index b1efc765f07..00000000000 --- a/opal/mca/compress/base/owner.txt +++ /dev/null @@ -1,7 +0,0 @@ -# -# owner/status file -# owner: institution that is responsible for this package -# status: e.g. active, maintenance, unmaintained -# -owner:project -status:maintenance diff --git a/opal/mca/compress/bzip/Makefile.am b/opal/mca/compress/bzip/Makefile.am deleted file mode 100644 index ae36f5efcaa..00000000000 --- a/opal/mca/compress/bzip/Makefile.am +++ /dev/null @@ -1,36 +0,0 @@ -# -# Copyright (c) 2004-2010 The Trustees of Indiana University. -# All rights reserved. -# Copyright (c) 2014-2015 Cisco Systems, Inc. All rights reserved. -# $COPYRIGHT$ -# -# Additional copyrights may follow -# -# $HEADER$ -# - -sources = \ - compress_bzip.h \ - compress_bzip_component.c \ - compress_bzip_module.c - -# Make the output library in this directory, and name it either -# mca__.la (for DSO builds) or libmca__.la -# (for static builds). - -if MCA_BUILD_opal_compress_bzip_DSO -component_noinst = -component_install = mca_compress_bzip.la -else -component_noinst = libmca_compress_bzip.la -component_install = -endif - -mcacomponentdir = $(opallibdir) -mcacomponent_LTLIBRARIES = $(component_install) -mca_compress_bzip_la_SOURCES = $(sources) -mca_compress_bzip_la_LDFLAGS = -module -avoid-version - -noinst_LTLIBRARIES = $(component_noinst) -libmca_compress_bzip_la_SOURCES = $(sources) -libmca_compress_bzip_la_LDFLAGS = -module -avoid-version diff --git a/opal/mca/compress/bzip/compress_bzip.h b/opal/mca/compress/bzip/compress_bzip.h deleted file mode 100644 index 448430c263d..00000000000 --- a/opal/mca/compress/bzip/compress_bzip.h +++ /dev/null @@ -1,63 +0,0 @@ -/* - * Copyright (c) 2004-2010 The Trustees of Indiana University. - * All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -/** - * @file - * - * BZIP COMPRESS component - * - * Uses the bzip library - */ - -#ifndef MCA_COMPRESS_BZIP_EXPORT_H -#define MCA_COMPRESS_BZIP_EXPORT_H - -#include "opal_config.h" - -#include "opal/util/output.h" - -#include "opal/mca/mca.h" -#include "opal/mca/compress/compress.h" - -#if defined(c_plusplus) || defined(__cplusplus) -extern "C" { -#endif - - /* - * Local Component structures - */ - struct opal_compress_bzip_component_t { - opal_compress_base_component_t super; /** Base COMPRESS component */ - - }; - typedef struct opal_compress_bzip_component_t opal_compress_bzip_component_t; - OPAL_MODULE_DECLSPEC extern opal_compress_bzip_component_t mca_compress_bzip_component; - - int opal_compress_bzip_component_query(mca_base_module_t **module, int *priority); - - /* - * Module functions - */ - int opal_compress_bzip_module_init(void); - int opal_compress_bzip_module_finalize(void); - - /* - * Actual funcationality - */ - int opal_compress_bzip_compress(char *fname, char **cname, char **postfix); - int opal_compress_bzip_compress_nb(char *fname, char **cname, char **postfix, pid_t *child_pid); - int opal_compress_bzip_decompress(char *cname, char **fname); - int opal_compress_bzip_decompress_nb(char *cname, char **fname, pid_t *child_pid); - -#if defined(c_plusplus) || defined(__cplusplus) -} -#endif - -#endif /* MCA_COMPRESS_BZIP_EXPORT_H */ diff --git a/opal/mca/compress/bzip/compress_bzip_component.c b/opal/mca/compress/bzip/compress_bzip_component.c deleted file mode 100644 index c43242813af..00000000000 --- a/opal/mca/compress/bzip/compress_bzip_component.c +++ /dev/null @@ -1,149 +0,0 @@ -/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ -/* - * Copyright (c) 2004-2010 The Trustees of Indiana University. - * All rights reserved. - * Copyright (c) 2015 Los Alamos National Security, LLC. All rights - * reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#include "opal_config.h" - -#include "opal/constants.h" -#include "opal/mca/compress/compress.h" -#include "opal/mca/compress/base/base.h" -#include "compress_bzip.h" - -/* - * Public string for version number - */ -const char *opal_compress_bzip_component_version_string = -"OPAL COMPRESS bzip MCA component version " OPAL_VERSION; - -/* - * Local functionality - */ -static int compress_bzip_register (void); -static int compress_bzip_open(void); -static int compress_bzip_close(void); - -/* - * Instantiate the public struct with all of our public information - * and pointer to our public functions in it - */ -opal_compress_bzip_component_t mca_compress_bzip_component = { - /* First do the base component stuff */ - { - /* Handle the general mca_component_t struct containing - * meta information about the component itbzip - */ - .base_version = { - OPAL_COMPRESS_BASE_VERSION_2_0_0, - - /* Component name and version */ - .mca_component_name = "bzip", - MCA_BASE_MAKE_VERSION(component, OPAL_MAJOR_VERSION, OPAL_MINOR_VERSION, - OPAL_RELEASE_VERSION), - - /* Component open and close functions */ - .mca_open_component = compress_bzip_open, - .mca_close_component = compress_bzip_close, - .mca_query_component = opal_compress_bzip_component_query, - .mca_register_component_params = compress_bzip_register - }, - .base_data = { - /* The component is checkpoint ready */ - MCA_BASE_METADATA_PARAM_CHECKPOINT - }, - - .verbose = 0, - .output_handle = -1, - } -}; - -/* - * Bzip module - */ -static opal_compress_base_module_t loc_module = { - /** Initialization Function */ - opal_compress_bzip_module_init, - /** Finalization Function */ - opal_compress_bzip_module_finalize, - - /** Compress Function */ - opal_compress_bzip_compress, - opal_compress_bzip_compress_nb, - - /** Decompress Function */ - opal_compress_bzip_decompress, - opal_compress_bzip_decompress_nb -}; - -static int compress_bzip_register (void) -{ - int ret; - - mca_compress_bzip_component.super.priority = 10; - ret = mca_base_component_var_register (&mca_compress_bzip_component.super.base_version, - "priority", "Priority of the COMPRESS bzip component " - "(default: 10)", MCA_BASE_VAR_TYPE_INT, NULL, 0, - MCA_BASE_VAR_FLAG_SETTABLE, - OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_ALL_EQ, - &mca_compress_bzip_component.super.priority); - if (0 > ret) { - return ret; - } - - ret = mca_base_component_var_register (&mca_compress_bzip_component.super.base_version, - "verbose", - "Verbose level for the COMPRESS bzip component", - MCA_BASE_VAR_TYPE_INT, NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE, - OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_LOCAL, - &mca_compress_bzip_component.super.verbose); - return (0 > ret) ? ret : OPAL_SUCCESS; -} - -static int compress_bzip_open(void) -{ - /* If there is a custom verbose level for this component than use it - * otherwise take our parents level and output channel - */ - if ( 0 != mca_compress_bzip_component.super.verbose) { - mca_compress_bzip_component.super.output_handle = opal_output_open(NULL); - opal_output_set_verbosity(mca_compress_bzip_component.super.output_handle, - mca_compress_bzip_component.super.verbose); - } else { - mca_compress_bzip_component.super.output_handle = opal_compress_base_framework.framework_output; - } - - /* - * Debug output - */ - opal_output_verbose(10, mca_compress_bzip_component.super.output_handle, - "compress:bzip: open()"); - opal_output_verbose(20, mca_compress_bzip_component.super.output_handle, - "compress:bzip: open: priority = %d", - mca_compress_bzip_component.super.priority); - opal_output_verbose(20, mca_compress_bzip_component.super.output_handle, - "compress:bzip: open: verbosity = %d", - mca_compress_bzip_component.super.verbose); - return OPAL_SUCCESS; -} - -static int compress_bzip_close(void) -{ - return OPAL_SUCCESS; -} - -int opal_compress_bzip_component_query(mca_base_module_t **module, int *priority) -{ - *module = (mca_base_module_t *)&loc_module; - *priority = mca_compress_bzip_component.super.priority; - - return OPAL_SUCCESS; -} - diff --git a/opal/mca/compress/bzip/compress_bzip_module.c b/opal/mca/compress/bzip/compress_bzip_module.c deleted file mode 100644 index f2b2b529349..00000000000 --- a/opal/mca/compress/bzip/compress_bzip_module.c +++ /dev/null @@ -1,239 +0,0 @@ -/* - * Copyright (c) 2004-2010 The Trustees of Indiana University. - * All rights reserved. - * Copyright (c) 2010 Oracle and/or its affiliates. All rights reserved. - * - * Copyright (c) 2014 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2015 Research Organization for Information Science - * and Technology (RIST). All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#include "opal_config.h" - -#include -#include -#include -#include -#if HAVE_UNISTD_H -#include -#endif /* HAVE_UNISTD_H */ - -#include "opal/util/opal_environ.h" -#include "opal/util/output.h" -#include "opal/util/argv.h" -#include "opal/util/opal_environ.h" - -#include "opal/constants.h" -#include "opal/util/basename.h" - -#include "opal/mca/compress/compress.h" -#include "opal/mca/compress/base/base.h" -#include "opal/runtime/opal_cr.h" - -#include "compress_bzip.h" - -static bool is_directory(char *fname ); - -int opal_compress_bzip_module_init(void) -{ - return OPAL_SUCCESS; -} - -int opal_compress_bzip_module_finalize(void) -{ - return OPAL_SUCCESS; -} - -int opal_compress_bzip_compress(char * fname, char **cname, char **postfix) -{ - pid_t child_pid = 0; - int status = 0; - - opal_output_verbose(10, mca_compress_bzip_component.super.output_handle, - "compress:bzip: compress(%s)", - fname); - - opal_compress_bzip_compress_nb(fname, cname, postfix, &child_pid); - waitpid(child_pid, &status, 0); - - if( WIFEXITED(status) ) { - return OPAL_SUCCESS; - } else { - return OPAL_ERROR; - } -} - -int opal_compress_bzip_compress_nb(char * fname, char **cname, char **postfix, pid_t *child_pid) -{ - char **argv = NULL; - char * base_fname = NULL; - char * dir_fname = NULL; - int status; - bool is_dir; - - is_dir = is_directory(fname); - - *child_pid = fork(); - if( *child_pid == 0 ) { /* Child */ - char * cmd; - - dir_fname = opal_dirname(fname); - base_fname = opal_basename(fname); - - chdir(dir_fname); - - if( is_dir ) { -#if 0 - opal_compress_base_tar_create(&base_fname); - asprintf(cname, "%s.bz2", base_fname); - asprintf(&cmd, "bzip2 %s", base_fname); -#else - asprintf(cname, "%s.tar.bz2", base_fname); - asprintf(&cmd, "tar -jcf %s %s", *cname, base_fname); -#endif - } else { - asprintf(cname, "%s.bz2", base_fname); - asprintf(&cmd, "bzip2 %s", base_fname); - } - - opal_output_verbose(10, mca_compress_bzip_component.super.output_handle, - "compress:bzip: compress_nb(%s -> [%s])", - fname, *cname); - opal_output_verbose(10, mca_compress_bzip_component.super.output_handle, - "compress:bzip: compress_nb() command [%s]", - cmd); - - argv = opal_argv_split(cmd, ' '); - status = execvp(argv[0], argv); - - opal_output(0, "compress:bzip: compress_nb: Failed to exec child [%s] status = %d\n", cmd, status); - exit(OPAL_ERROR); - } - else if( *child_pid > 0 ) { - if( is_dir ) { - *postfix = strdup(".tar.bz2"); - } else { - *postfix = strdup(".bz2"); - } - asprintf(cname, "%s%s", fname, *postfix); - } - else { - return OPAL_ERROR; - } - - return OPAL_SUCCESS; -} - -int opal_compress_bzip_decompress(char * cname, char **fname) -{ - pid_t child_pid = 0; - int status = 0; - - opal_output_verbose(10, mca_compress_bzip_component.super.output_handle, - "compress:bzip: decompress(%s)", - cname); - - opal_compress_bzip_decompress_nb(cname, fname, &child_pid); - waitpid(child_pid, &status, 0); - - if( WIFEXITED(status) ) { - return OPAL_SUCCESS; - } else { - return OPAL_ERROR; - } -} - -int opal_compress_bzip_decompress_nb(char * cname, char **fname, pid_t *child_pid) -{ - char **argv = NULL; - char * dir_cname = NULL; - pid_t loc_pid = 0; - int status; - bool is_tar = false; - - if( 0 == strncmp(&(cname[strlen(cname)-8]), ".tar.bz2", strlen(".tar.bz2")) ) { - is_tar = true; - } - - *fname = strdup(cname); - if( is_tar ) { - (*fname)[strlen(cname)-8] = '\0'; - } else { - (*fname)[strlen(cname)-4] = '\0'; - } - - opal_output_verbose(10, mca_compress_bzip_component.super.output_handle, - "compress:bzip: decompress_nb(%s -> [%s])", - cname, *fname); - - *child_pid = fork(); - if( *child_pid == 0 ) { /* Child */ - dir_cname = opal_dirname(cname); - - chdir(dir_cname); - - /* Fork(bunzip) */ - loc_pid = fork(); - if( loc_pid == 0 ) { /* Child */ - char * cmd; - asprintf(&cmd, "bunzip2 %s", cname); - - opal_output_verbose(10, mca_compress_bzip_component.super.output_handle, - "compress:bzip: decompress_nb() command [%s]", - cmd); - - argv = opal_argv_split(cmd, ' '); - status = execvp(argv[0], argv); - - opal_output(0, "compress:bzip: decompress_nb: Failed to exec child [%s] status = %d\n", cmd, status); - exit(OPAL_ERROR); - } - else if( loc_pid > 0 ) { /* Parent */ - waitpid(loc_pid, &status, 0); - if( !WIFEXITED(status) ) { - opal_output(0, "compress:bzip: decompress_nb: Failed to bunzip the file [%s] status = %d\n", cname, status); - exit(OPAL_ERROR); - } - } - else { - exit(OPAL_ERROR); - } - - /* tar_decompress */ - if( is_tar ) { - /* Strip off '.bz2' leaving just '.tar' */ - cname[strlen(cname)-4] = '\0'; - opal_compress_base_tar_extract(&cname); - } - - /* Once this child is done, then directly exit */ - exit(OPAL_SUCCESS); - } - else if( *child_pid > 0 ) { - ; - } - else { - return OPAL_ERROR; - } - - return OPAL_SUCCESS; -} - -static bool is_directory(char *fname ) { - struct stat file_status; - int rc; - - if(0 != (rc = stat(fname, &file_status) ) ) { - return false; - } - if(S_ISDIR(file_status.st_mode)) { - return true; - } - - return false; -} diff --git a/opal/mca/compress/bzip/owner.txt b/opal/mca/compress/bzip/owner.txt deleted file mode 100644 index b1efc765f07..00000000000 --- a/opal/mca/compress/bzip/owner.txt +++ /dev/null @@ -1,7 +0,0 @@ -# -# owner/status file -# owner: institution that is responsible for this package -# status: e.g. active, maintenance, unmaintained -# -owner:project -status:maintenance diff --git a/opal/mca/compress/compress.h b/opal/mca/compress/compress.h deleted file mode 100644 index e3688e980ae..00000000000 --- a/opal/mca/compress/compress.h +++ /dev/null @@ -1,137 +0,0 @@ -/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ -/* - * Copyright (c) 2004-2010 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2015 Los Alamos National Security, LLC. All rights - * reserved. - * - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ -/** - * @file - * - * Compression Framework - * - * General Description: - * - * The OPAL Compress framework has been created to provide an abstract interface - * to the compression agent library on the host machine. This fromework is useful - * when distributing files that can be compressed before sending to dimish the - * load on the network. - * - */ - -#ifndef MCA_COMPRESS_H -#define MCA_COMPRESS_H - -#include "opal_config.h" -#include "opal/mca/mca.h" -#include "opal/mca/base/base.h" -#include "opal/class/opal_object.h" - -#if defined(c_plusplus) || defined(__cplusplus) -extern "C" { -#endif - -/** - * Module initialization function. - * Returns OPAL_SUCCESS - */ -typedef int (*opal_compress_base_module_init_fn_t) - (void); - -/** - * Module finalization function. - * Returns OPAL_SUCCESS - */ -typedef int (*opal_compress_base_module_finalize_fn_t) - (void); - -/** - * Compress the file provided - * - * Arguments: - * fname = Filename to compress - * cname = Compressed filename - * postfix = postfix added to filename to create compressed filename - * Returns: - * OPAL_SUCCESS on success, ow OPAL_ERROR - */ -typedef int (*opal_compress_base_module_compress_fn_t) - (char * fname, char **cname, char **postfix); - -typedef int (*opal_compress_base_module_compress_nb_fn_t) - (char * fname, char **cname, char **postfix, pid_t *child_pid); - -/** - * Decompress the file provided - * - * Arguments: - * fname = Filename to compress - * cname = Compressed filename - * Returns: - * OPAL_SUCCESS on success, ow OPAL_ERROR - */ -typedef int (*opal_compress_base_module_decompress_fn_t) - (char * cname, char **fname); -typedef int (*opal_compress_base_module_decompress_nb_fn_t) - (char * cname, char **fname, pid_t *child_pid); - -/** - * Structure for COMPRESS components. - */ -struct opal_compress_base_component_2_0_0_t { - /** MCA base component */ - mca_base_component_t base_version; - /** MCA base data */ - mca_base_component_data_t base_data; - - /** Verbosity Level */ - int verbose; - /** Output Handle for opal_output */ - int output_handle; - /** Default Priority */ - int priority; -}; -typedef struct opal_compress_base_component_2_0_0_t opal_compress_base_component_2_0_0_t; -typedef struct opal_compress_base_component_2_0_0_t opal_compress_base_component_t; - -/** - * Structure for COMPRESS modules - */ -struct opal_compress_base_module_1_0_0_t { - /** Initialization Function */ - opal_compress_base_module_init_fn_t init; - /** Finalization Function */ - opal_compress_base_module_finalize_fn_t finalize; - - /** Compress interface */ - opal_compress_base_module_compress_fn_t compress; - opal_compress_base_module_compress_nb_fn_t compress_nb; - - /** Decompress Interface */ - opal_compress_base_module_decompress_fn_t decompress; - opal_compress_base_module_decompress_nb_fn_t decompress_nb; -}; -typedef struct opal_compress_base_module_1_0_0_t opal_compress_base_module_1_0_0_t; -typedef struct opal_compress_base_module_1_0_0_t opal_compress_base_module_t; - -OPAL_DECLSPEC extern opal_compress_base_module_t opal_compress; - -/** - * Macro for use in components that are of type COMPRESS - */ -#define OPAL_COMPRESS_BASE_VERSION_2_0_0 \ - OPAL_MCA_BASE_VERSION_2_1_0("compress", 2, 0, 0) - -#if defined(c_plusplus) || defined(__cplusplus) -} -#endif - -#endif /* OPAL_COMPRESS_H */ - diff --git a/opal/mca/compress/gzip/Makefile.am b/opal/mca/compress/gzip/Makefile.am deleted file mode 100644 index 24f0d056cb1..00000000000 --- a/opal/mca/compress/gzip/Makefile.am +++ /dev/null @@ -1,36 +0,0 @@ -# -# Copyright (c) 2004-2010 The Trustees of Indiana University. -# All rights reserved. -# Copyright (c) 2014-2015 Cisco Systems, Inc. All rights reserved. -# $COPYRIGHT$ -# -# Additional copyrights may follow -# -# $HEADER$ -# - -sources = \ - compress_gzip.h \ - compress_gzip_component.c \ - compress_gzip_module.c - -# Make the output library in this directory, and name it either -# mca__.la (for DSO builds) or libmca__.la -# (for static builds). - -if MCA_BUILD_opal_compress_gzip_DSO -component_noinst = -component_install = mca_compress_gzip.la -else -component_noinst = libmca_compress_gzip.la -component_install = -endif - -mcacomponentdir = $(opallibdir) -mcacomponent_LTLIBRARIES = $(component_install) -mca_compress_gzip_la_SOURCES = $(sources) -mca_compress_gzip_la_LDFLAGS = -module -avoid-version - -noinst_LTLIBRARIES = $(component_noinst) -libmca_compress_gzip_la_SOURCES = $(sources) -libmca_compress_gzip_la_LDFLAGS = -module -avoid-version diff --git a/opal/mca/compress/gzip/compress_gzip.h b/opal/mca/compress/gzip/compress_gzip.h deleted file mode 100644 index d470cbae2f0..00000000000 --- a/opal/mca/compress/gzip/compress_gzip.h +++ /dev/null @@ -1,63 +0,0 @@ -/* - * Copyright (c) 2004-2010 The Trustees of Indiana University. - * All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -/** - * @file - * - * GZIP COMPRESS component - * - * Uses the gzip library - */ - -#ifndef MCA_COMPRESS_GZIP_EXPORT_H -#define MCA_COMPRESS_GZIP_EXPORT_H - -#include "opal_config.h" - -#include "opal/util/output.h" - -#include "opal/mca/mca.h" -#include "opal/mca/compress/compress.h" - -#if defined(c_plusplus) || defined(__cplusplus) -extern "C" { -#endif - - /* - * Local Component structures - */ - struct opal_compress_gzip_component_t { - opal_compress_base_component_t super; /** Base COMPRESS component */ - - }; - typedef struct opal_compress_gzip_component_t opal_compress_gzip_component_t; - OPAL_MODULE_DECLSPEC extern opal_compress_gzip_component_t mca_compress_gzip_component; - - int opal_compress_gzip_component_query(mca_base_module_t **module, int *priority); - - /* - * Module functions - */ - int opal_compress_gzip_module_init(void); - int opal_compress_gzip_module_finalize(void); - - /* - * Actual funcationality - */ - int opal_compress_gzip_compress(char *fname, char **cname, char **postfix); - int opal_compress_gzip_compress_nb(char *fname, char **cname, char **postfix, pid_t *child_pid); - int opal_compress_gzip_decompress(char *cname, char **fname); - int opal_compress_gzip_decompress_nb(char *cname, char **fname, pid_t *child_pid); - -#if defined(c_plusplus) || defined(__cplusplus) -} -#endif - -#endif /* MCA_COMPRESS_GZIP_EXPORT_H */ diff --git a/opal/mca/compress/gzip/compress_gzip_component.c b/opal/mca/compress/gzip/compress_gzip_component.c deleted file mode 100644 index 368ba687a2f..00000000000 --- a/opal/mca/compress/gzip/compress_gzip_component.c +++ /dev/null @@ -1,150 +0,0 @@ -/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ -/* - * Copyright (c) 2004-2010 The Trustees of Indiana University. - * All rights reserved. - * Copyright (c) 2015 Los Alamos National Security, LLC. All rights - * reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#include "opal_config.h" - -#include "opal/constants.h" -#include "opal/mca/compress/compress.h" -#include "opal/mca/compress/base/base.h" -#include "compress_gzip.h" - -/* - * Public string for version number - */ -const char *opal_compress_gzip_component_version_string = -"OPAL COMPRESS gzip MCA component version " OPAL_VERSION; - -/* - * Local functionality - */ -static int compress_gzip_register (void); -static int compress_gzip_open(void); -static int compress_gzip_close(void); - -/* - * Instantiate the public struct with all of our public information - * and pointer to our public functions in it - */ -opal_compress_gzip_component_t mca_compress_gzip_component = { - /* First do the base component stuff */ - { - /* Handle the general mca_component_t struct containing - * meta information about the component itgzip - */ - .base_version = { - OPAL_COMPRESS_BASE_VERSION_2_0_0, - - /* Component name and version */ - .mca_component_name = "gzip", - MCA_BASE_MAKE_VERSION(component, OPAL_MAJOR_VERSION, OPAL_MINOR_VERSION, - OPAL_RELEASE_VERSION), - - /* Component open and close functions */ - .mca_open_component = compress_gzip_open, - .mca_close_component = compress_gzip_close, - .mca_query_component = opal_compress_gzip_component_query, - .mca_register_component_params = compress_gzip_register - }, - .base_data = { - /* The component is checkpoint ready */ - MCA_BASE_METADATA_PARAM_CHECKPOINT - }, - - .verbose = 0, - .output_handle = -1, - } -}; - -/* - * Gzip module - */ -static opal_compress_base_module_t loc_module = { - /** Initialization Function */ - opal_compress_gzip_module_init, - /** Finalization Function */ - opal_compress_gzip_module_finalize, - - /** Compress Function */ - opal_compress_gzip_compress, - opal_compress_gzip_compress_nb, - - /** Decompress Function */ - opal_compress_gzip_decompress, - opal_compress_gzip_decompress_nb -}; - -static int compress_gzip_register (void) -{ - int ret; - - mca_compress_gzip_component.super.priority = 15; - ret = mca_base_component_var_register (&mca_compress_gzip_component.super.base_version, - "priority", "Priority of the COMPRESS gzip component " - "(default: 15)", MCA_BASE_VAR_TYPE_INT, NULL, 0, - MCA_BASE_VAR_FLAG_SETTABLE, - OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_ALL_EQ, - &mca_compress_gzip_component.super.priority); - if (0 > ret) { - return ret; - } - - mca_compress_gzip_component.super.verbose = 0; - ret = mca_base_component_var_register (&mca_compress_gzip_component.super.base_version, - "verbose", - "Verbose level for the COMPRESS gzip component", - MCA_BASE_VAR_TYPE_INT, NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE, - OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_LOCAL, - &mca_compress_gzip_component.super.verbose); - return (0 > ret) ? ret : OPAL_SUCCESS; -} - -static int compress_gzip_open(void) -{ - /* If there is a custom verbose level for this component than use it - * otherwise take our parents level and output channel - */ - if ( 0 != mca_compress_gzip_component.super.verbose) { - mca_compress_gzip_component.super.output_handle = opal_output_open(NULL); - opal_output_set_verbosity(mca_compress_gzip_component.super.output_handle, - mca_compress_gzip_component.super.verbose); - } else { - mca_compress_gzip_component.super.output_handle = opal_compress_base_framework.framework_output; - } - - /* - * Debug output - */ - opal_output_verbose(10, mca_compress_gzip_component.super.output_handle, - "compress:gzip: open()"); - opal_output_verbose(20, mca_compress_gzip_component.super.output_handle, - "compress:gzip: open: priority = %d", - mca_compress_gzip_component.super.priority); - opal_output_verbose(20, mca_compress_gzip_component.super.output_handle, - "compress:gzip: open: verbosity = %d", - mca_compress_gzip_component.super.verbose); - return OPAL_SUCCESS; -} - -static int compress_gzip_close(void) -{ - return OPAL_SUCCESS; -} - -int opal_compress_gzip_component_query(mca_base_module_t **module, int *priority) -{ - *module = (mca_base_module_t *)&loc_module; - *priority = mca_compress_gzip_component.super.priority; - - return OPAL_SUCCESS; -} - diff --git a/opal/mca/compress/gzip/compress_gzip_module.c b/opal/mca/compress/gzip/compress_gzip_module.c deleted file mode 100644 index 2093ddd79b3..00000000000 --- a/opal/mca/compress/gzip/compress_gzip_module.c +++ /dev/null @@ -1,242 +0,0 @@ -/* - * Copyright (c) 2004-2010 The Trustees of Indiana University. - * All rights reserved. - * Copyright (c) 2010 Oracle and/or its affiliates. All rights reserved. - * - * Copyright (c) 2014 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2015 Research Organization for Information Science - * and Technology (RIST). All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#include "opal_config.h" - -#include -#include -#include -#include -#if HAVE_UNISTD_H -#include -#endif /* HAVE_UNISTD_H */ - -#include "opal/util/opal_environ.h" -#include "opal/util/output.h" -#include "opal/util/argv.h" -#include "opal/util/opal_environ.h" - -#include "opal/constants.h" -#include "opal/util/basename.h" - -#include "opal/mca/compress/compress.h" -#include "opal/mca/compress/base/base.h" -#include "opal/runtime/opal_cr.h" - -#include "compress_gzip.h" - -static bool is_directory(char *fname ); - -int opal_compress_gzip_module_init(void) -{ - return OPAL_SUCCESS; -} - -int opal_compress_gzip_module_finalize(void) -{ - return OPAL_SUCCESS; -} - -int opal_compress_gzip_compress(char * fname, char **cname, char **postfix) -{ - pid_t child_pid = 0; - int status = 0; - - opal_output_verbose(10, mca_compress_gzip_component.super.output_handle, - "compress:gzip: compress(%s)", - fname); - - opal_compress_gzip_compress_nb(fname, cname, postfix, &child_pid); - waitpid(child_pid, &status, 0); - - if( WIFEXITED(status) ) { - return OPAL_SUCCESS; - } else { - return OPAL_ERROR; - } -} - -int opal_compress_gzip_compress_nb(char * fname, char **cname, char **postfix, pid_t *child_pid) -{ - char **argv = NULL; - char * base_fname = NULL; - char * dir_fname = NULL; - int status; - bool is_dir; - - is_dir = is_directory(fname); - - *child_pid = fork(); - if( *child_pid == 0 ) { /* Child */ - char * cmd = NULL; - - dir_fname = opal_dirname(fname); - base_fname = opal_basename(fname); - - chdir(dir_fname); - - if( is_dir ) { -#if 0 - opal_compress_base_tar_create(&base_fname); - asprintf(cname, "%s.gz", base_fname); - asprintf(&cmd, "gzip %s", base_fname); -#else - asprintf(cname, "%s.tar.gz", base_fname); - asprintf(&cmd, "tar -zcf %s %s", *cname, base_fname); -#endif - } else { - asprintf(cname, "%s.gz", base_fname); - asprintf(&cmd, "gzip %s", base_fname); - } - - opal_output_verbose(10, mca_compress_gzip_component.super.output_handle, - "compress:gzip: compress_nb(%s -> [%s])", - fname, *cname); - opal_output_verbose(10, mca_compress_gzip_component.super.output_handle, - "compress:gzip: compress_nb() command [%s]", - cmd); - - argv = opal_argv_split(cmd, ' '); - status = execvp(argv[0], argv); - - opal_output(0, "compress:gzip: compress_nb: Failed to exec child [%s] status = %d\n", cmd, status); - exit(OPAL_ERROR); - } - else if( *child_pid > 0 ) { - if( is_dir ) { - *postfix = strdup(".tar.gz"); - } else { - *postfix = strdup(".gz"); - } - asprintf(cname, "%s%s", fname, *postfix); - - } - else { - return OPAL_ERROR; - } - - return OPAL_SUCCESS; -} - -int opal_compress_gzip_decompress(char * cname, char **fname) -{ - pid_t child_pid = 0; - int status = 0; - - opal_output_verbose(10, mca_compress_gzip_component.super.output_handle, - "compress:gzip: decompress(%s)", - cname); - - opal_compress_gzip_decompress_nb(cname, fname, &child_pid); - waitpid(child_pid, &status, 0); - - if( WIFEXITED(status) ) { - return OPAL_SUCCESS; - } else { - return OPAL_ERROR; - } -} - -int opal_compress_gzip_decompress_nb(char * cname, char **fname, pid_t *child_pid) -{ - char **argv = NULL; - char * dir_cname = NULL; - pid_t loc_pid = 0; - int status; - bool is_tar = false; - - if( 0 == strncmp(&(cname[strlen(cname)-7]), ".tar.gz", strlen(".tar.gz")) ) { - is_tar = true; - } - - *fname = strdup(cname); - if( is_tar ) { - /* Strip off '.tar.gz' */ - (*fname)[strlen(cname)-7] = '\0'; - } else { - /* Strip off '.gz' */ - (*fname)[strlen(cname)-3] = '\0'; - } - - opal_output_verbose(10, mca_compress_gzip_component.super.output_handle, - "compress:gzip: decompress_nb(%s -> [%s])", - cname, *fname); - - *child_pid = fork(); - if( *child_pid == 0 ) { /* Child */ - char * cmd; - dir_cname = opal_dirname(cname); - - chdir(dir_cname); - - /* Fork(gunzip) */ - loc_pid = fork(); - if( loc_pid == 0 ) { /* Child */ - asprintf(&cmd, "gunzip %s", cname); - - opal_output_verbose(10, mca_compress_gzip_component.super.output_handle, - "compress:gzip: decompress_nb() command [%s]", - cmd); - - argv = opal_argv_split(cmd, ' '); - status = execvp(argv[0], argv); - - opal_output(0, "compress:gzip: decompress_nb: Failed to exec child [%s] status = %d\n", cmd, status); - exit(OPAL_ERROR); - } - else if( loc_pid > 0 ) { /* Parent */ - waitpid(loc_pid, &status, 0); - if( !WIFEXITED(status) ) { - opal_output(0, "compress:gzip: decompress_nb: Failed to bunzip the file [%s] status = %d\n", cname, status); - exit(OPAL_ERROR); - } - } - else { - exit(OPAL_ERROR); - } - - /* tar_decompress */ - if( is_tar ) { - /* Strip off '.gz' leaving just '.tar' */ - cname[strlen(cname)-3] = '\0'; - opal_compress_base_tar_extract(&cname); - } - - /* Once this child is done, then directly exit */ - exit(OPAL_SUCCESS); - } - else if( *child_pid > 0 ) { - ; - } - else { - return OPAL_ERROR; - } - - return OPAL_SUCCESS; -} - -static bool is_directory(char *fname ) { - struct stat file_status; - int rc; - - if(0 != (rc = stat(fname, &file_status) ) ) { - return false; - } - if(S_ISDIR(file_status.st_mode)) { - return true; - } - - return false; -} diff --git a/opal/mca/compress/gzip/owner.txt b/opal/mca/compress/gzip/owner.txt deleted file mode 100644 index b1efc765f07..00000000000 --- a/opal/mca/compress/gzip/owner.txt +++ /dev/null @@ -1,7 +0,0 @@ -# -# owner/status file -# owner: institution that is responsible for this package -# status: e.g. active, maintenance, unmaintained -# -owner:project -status:maintenance diff --git a/opal/mca/crs/Makefile.am b/opal/mca/crs/Makefile.am deleted file mode 100644 index 830f3160dd9..00000000000 --- a/opal/mca/crs/Makefile.am +++ /dev/null @@ -1,50 +0,0 @@ -# -# Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana -# University Research and Technology -# Corporation. All rights reserved. -# Copyright (c) 2004-2005 The University of Tennessee and The University -# of Tennessee Research Foundation. All rights -# reserved. -# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, -# University of Stuttgart. All rights reserved. -# Copyright (c) 2004-2005 The Regents of the University of California. -# All rights reserved. -# Copyright (c) 2008 Sun Microsystems, Inc. All rights reserved. -# Copyright (c) 2008-2014 Cisco Systems, Inc. All rights reserved. -# $COPYRIGHT$ -# -# Additional copyrights may follow -# -# $HEADER$ -# - -include $(top_srcdir)/Makefile.ompi-rules - -# main library setup -noinst_LTLIBRARIES = libmca_crs.la -libmca_crs_la_SOURCES = - -# local files -headers = crs.h -libmca_crs_la_SOURCES += $(headers) - -# Manual pages -nodist_man_MANS = opal_crs.7 -EXTRA_DIST = $(nodist_man_MANS:.7=.7in) - -# Ensure that the man pages are rebuilt if the opal_config.h file -# changes; a "good enough" way to know if configure was run again (and -# therefore the release date or version may have changed) -$(nodist_man_MANS): $(top_builddir)/opal/include/opal_config.h - -# Conditionally install the header files -if WANT_INSTALL_HEADERS -opaldir = $(opalincludedir)/$(subdir) -nobase_opal_HEADERS = $(headers) -endif - -include base/Makefile.am - -distclean-local: - rm -f base/static-components.h - rm -f $(nodist_man_MANS) diff --git a/opal/mca/crs/base/Makefile.am b/opal/mca/crs/base/Makefile.am deleted file mode 100644 index 00847b4cc52..00000000000 --- a/opal/mca/crs/base/Makefile.am +++ /dev/null @@ -1,27 +0,0 @@ -# -# Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana -# University Research and Technology -# Corporation. All rights reserved. -# Copyright (c) 2004-2005 The University of Tennessee and The University -# of Tennessee Research Foundation. All rights -# reserved. -# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, -# University of Stuttgart. All rights reserved. -# Copyright (c) 2004-2005 The Regents of the University of California. -# All rights reserved. -# Copyright (c) 2014 Cisco Systems, Inc. All rights reserved. -# $COPYRIGHT$ -# -# Additional copyrights may follow -# -# $HEADER$ -# - -headers += \ - base/base.h - -libmca_crs_la_SOURCES += \ - base/crs_base_open.c \ - base/crs_base_close.c \ - base/crs_base_select.c \ - base/crs_base_fns.c diff --git a/opal/mca/crs/base/base.h b/opal/mca/crs/base/base.h deleted file mode 100644 index 244f12b5d39..00000000000 --- a/opal/mca/crs/base/base.h +++ /dev/null @@ -1,136 +0,0 @@ -/* - * Copyright (c) 2004-2010 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2005 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * Copyright (c) 2007 Evergrid, Inc. All rights reserved. - * - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ -#ifndef OPAL_CRS_BASE_H -#define OPAL_CRS_BASE_H - -#include "opal_config.h" -#include "opal/mca/base/base.h" -#include "opal/mca/crs/crs.h" -#include "opal/util/opal_environ.h" -#include "opal/runtime/opal_cr.h" - -/* - * Global functions for MCA overall CRS - */ - -BEGIN_C_DECLS - -/* Some local strings to use genericly with the local metadata file */ -#define CRS_METADATA_BASE ("# ") -#define CRS_METADATA_COMP ("# OPAL CRS Component: ") -#define CRS_METADATA_PID ("# PID: ") -#define CRS_METADATA_CONTEXT ("# CONTEXT: ") -#define CRS_METADATA_MKDIR ("# MKDIR: ") -#define CRS_METADATA_TOUCH ("# TOUCH: ") - - /** - * Initialize the CRS MCA framework - * - * @retval OPAL_SUCCESS Upon success - * @retval OPAL_ERROR Upon failures - * - * This function is invoked during opal_init(); - */ - OPAL_DECLSPEC int opal_crs_base_open(mca_base_open_flag_t flags); - - /** - * Select an available component. - * - * @retval OPAL_SUCCESS Upon Success - * @retval OPAL_NOT_FOUND If no component can be selected - * @retval OPAL_ERROR Upon other failure - * - */ - OPAL_DECLSPEC int opal_crs_base_select(void); - - /** - * Finalize the CRS MCA framework - * - * @retval OPAL_SUCCESS Upon success - * @retval OPAL_ERROR Upon failures - * - * This function is invoked during opal_finalize(); - */ - OPAL_DECLSPEC int opal_crs_base_close(void); - - /** - * Globals - */ - OPAL_DECLSPEC extern mca_base_framework_t opal_crs_base_framework; - OPAL_DECLSPEC extern opal_crs_base_component_t opal_crs_base_selected_component; - OPAL_DECLSPEC extern opal_crs_base_module_t opal_crs; - - /** - * Some utility functions - */ - OPAL_DECLSPEC char * opal_crs_base_state_str(opal_crs_state_type_t state); - - /* - * Extract the expected component and pid from the metadata - */ - OPAL_DECLSPEC int opal_crs_base_extract_expected_component(FILE *metadata, char ** component_name, int *prev_pid); - - /* - * Read a token to the metadata file - */ - OPAL_DECLSPEC int opal_crs_base_metadata_read_token(FILE *metadata, char * token, char ***value); - - /* - * Register a file for cleanup. - * Useful in C/R when files only need to temporarily exist for restart - */ - OPAL_DECLSPEC int opal_crs_base_cleanup_append(char* filename, bool is_dir); - - /* - * Flush the cleanup of all registered files. - */ - OPAL_DECLSPEC int opal_crs_base_cleanup_flush(void); - - /* - * Copy the options structure - */ - OPAL_DECLSPEC int opal_crs_base_copy_options(opal_crs_base_ckpt_options_t *from, - opal_crs_base_ckpt_options_t *to); - /* - * Clear the options structure - */ - OPAL_DECLSPEC int opal_crs_base_clear_options(opal_crs_base_ckpt_options_t *target); - - /* - * CRS self application interface functions - */ - typedef int (*opal_crs_base_self_checkpoint_fn_t)(char **restart_cmd); - typedef int (*opal_crs_base_self_restart_fn_t)(void); - typedef int (*opal_crs_base_self_continue_fn_t)(void); - - extern opal_crs_base_self_checkpoint_fn_t crs_base_self_checkpoint_fn; - extern opal_crs_base_self_restart_fn_t crs_base_self_restart_fn; - extern opal_crs_base_self_continue_fn_t crs_base_self_continue_fn; - - OPAL_DECLSPEC int opal_crs_base_self_register_checkpoint_callback - (opal_crs_base_self_checkpoint_fn_t function); - OPAL_DECLSPEC int opal_crs_base_self_register_restart_callback - (opal_crs_base_self_restart_fn_t function); - OPAL_DECLSPEC int opal_crs_base_self_register_continue_callback - (opal_crs_base_self_continue_fn_t function); - -END_C_DECLS - -#endif /* OPAL_CRS_BASE_H */ diff --git a/opal/mca/crs/base/crs_base_close.c b/opal/mca/crs/base/crs_base_close.c deleted file mode 100644 index c978b882054..00000000000 --- a/opal/mca/crs/base/crs_base_close.c +++ /dev/null @@ -1,41 +0,0 @@ -/* - * Copyright (c) 2004-2010 The Trustees of Indiana University. - * All rights reserved. - * Copyright (c) 2004-2005 The Trustees of the University of Tennessee. - * All rights reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#include "opal_config.h" - -#include "opal/mca/mca.h" -#include "opal/mca/base/base.h" -#include "opal/constants.h" -#include "opal/mca/crs/crs.h" -#include "opal/mca/crs/base/base.h" - -int opal_crs_base_close(void) -{ - if( !opal_cr_is_enabled ) { - opal_output_verbose(10, opal_crs_base_framework.framework_output, - "crs:close: FT is not enabled, skipping!"); - return OPAL_SUCCESS; - } - - /* Call the component's finalize routine */ - if( NULL != opal_crs.crs_finalize ) { - opal_crs.crs_finalize(); - } - - /* Close all available modules that are open */ - return mca_base_framework_components_close (&opal_crs_base_framework, - NULL); -} diff --git a/opal/mca/crs/base/crs_base_fns.c b/opal/mca/crs/base/crs_base_fns.c deleted file mode 100644 index fbcadb3356c..00000000000 --- a/opal/mca/crs/base/crs_base_fns.c +++ /dev/null @@ -1,454 +0,0 @@ -/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ -/* - * Copyright (c) 2004-2010 The Trustees of Indiana University. - * All rights reserved. - * Copyright (c) 2004-2005 The Trustees of the University of Tennessee. - * All rights reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * Copyright (c) 2007 Evergrid, Inc. All rights reserved. - * Copyright (c) 2015 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2015 Research Organization for Information Science - * and Technology (RIST). All rights reserved. - * Copyright (c) 2015 Los Alamos National Security, LLC. All rights - * reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#include "opal_config.h" - -#ifdef HAVE_STRING_H -#include -#endif -#ifdef HAVE_SYS_TYPES_H -#include -#endif -#ifdef HAVE_UNISTD_H -#include -#endif -#ifdef HAVE_FCNTL_H -#include -#endif /* HAVE_FCNTL_H */ -#ifdef HAVE_SYS_STAT_H -#include -#endif - -#include "opal/mca/mca.h" -#include "opal/mca/base/base.h" -#include "opal/constants.h" -#include "opal/util/os_dirpath.h" -#include "opal/util/output.h" -#include "opal/util/argv.h" - -#include "opal/mca/crs/crs.h" -#include "opal/mca/crs/base/base.h" - -opal_crs_base_self_checkpoint_fn_t crs_base_self_checkpoint_fn = NULL; -opal_crs_base_self_restart_fn_t crs_base_self_restart_fn = NULL; -opal_crs_base_self_continue_fn_t crs_base_self_continue_fn = NULL; - -/****************** - * Local Functions - ******************/ -static int metadata_extract_next_token(FILE *file, char **token, char **value); - -static char **cleanup_file_argv = NULL; -static char **cleanup_dir_argv = NULL; - -/****************** - * Object stuff - ******************/ -static void opal_crs_base_construct(opal_crs_base_snapshot_t *snapshot) -{ - snapshot->component_name = NULL; - - snapshot->metadata_filename = NULL; - snapshot->metadata = NULL; - snapshot->snapshot_directory = NULL; - - snapshot->cold_start = false; -} - -static void opal_crs_base_destruct( opal_crs_base_snapshot_t *snapshot) -{ - if(NULL != snapshot->metadata_filename ) { - free(snapshot->metadata_filename); - snapshot->metadata_filename = NULL; - } - - if(NULL != snapshot->metadata) { - fclose(snapshot->metadata); - snapshot->metadata = NULL; - } - - if(NULL != snapshot->snapshot_directory ) { - free(snapshot->snapshot_directory); - snapshot->snapshot_directory = NULL; - } -} - -OBJ_CLASS_INSTANCE(opal_crs_base_snapshot_t, - opal_list_item_t, - opal_crs_base_construct, - opal_crs_base_destruct); - -static void opal_crs_base_ckpt_options_construct(opal_crs_base_ckpt_options_t *opts) { - opal_crs_base_clear_options(opts); -} - -static void opal_crs_base_ckpt_options_destruct(opal_crs_base_ckpt_options_t *opts) { - opal_crs_base_clear_options(opts); -} - -OBJ_CLASS_INSTANCE(opal_crs_base_ckpt_options_t, - opal_object_t, - opal_crs_base_ckpt_options_construct, - opal_crs_base_ckpt_options_destruct); - -/* - * Utility functions - */ -int opal_crs_base_metadata_read_token(FILE *metadata, char * token, char ***value) { - int argc = 0; - - /* Dummy check */ - if (NULL == token || NULL == metadata) { - return OPAL_ERROR; - } - - /* - * Extract each token and make the records - */ - rewind(metadata); - do { - char *loc_token = NULL, *loc_value = NULL; - - /* Get next token */ - if( OPAL_SUCCESS != metadata_extract_next_token(metadata, &loc_token, &loc_value) ) { - break; - } - - /* Check token to see if it matches */ - if(0 == strncmp(token, loc_token, strlen(loc_token)) ) { - opal_argv_append(&argc, value, loc_value); - } - - free (loc_token); - free (loc_value); - } while (0 == feof(metadata)); - - return OPAL_SUCCESS; -} - -int opal_crs_base_extract_expected_component(FILE *metadata, char ** component_name, int *prev_pid) -{ - int exit_status = OPAL_SUCCESS; - char **pid_argv = NULL; - char **name_argv = NULL; - - /* Dummy check */ - if( NULL == metadata ) { - exit_status = OPAL_ERROR; - goto cleanup; - } - - opal_crs_base_metadata_read_token(metadata, CRS_METADATA_PID, &pid_argv); - if( NULL != pid_argv && NULL != pid_argv[0] ) { - *prev_pid = atoi(pid_argv[0]); - } else { - opal_output(0, "Error: expected_component: PID information unavailable!"); - exit_status = OPAL_ERROR; - goto cleanup; - } - - opal_crs_base_metadata_read_token(metadata, CRS_METADATA_COMP, &name_argv); - if( NULL != name_argv && NULL != name_argv[0] ) { - *component_name = strdup(name_argv[0]); - } else { - opal_output(0, "Error: expected_component: Component Name information unavailable!"); - exit_status = OPAL_ERROR; - goto cleanup; - } - - cleanup: - if( NULL != pid_argv ) { - opal_argv_free(pid_argv); - pid_argv = NULL; - } - - if( NULL != name_argv ) { - opal_argv_free(name_argv); - name_argv = NULL; - } - - return exit_status; -} - -int opal_crs_base_cleanup_append(char* filename, bool is_dir) -{ - if( NULL == filename ) { - return OPAL_SUCCESS; - } - - if( is_dir ) { - opal_output_verbose(15, opal_crs_base_framework.framework_output, - "opal:crs: cleanup_append: Append Dir <%s>\n", - filename); - opal_argv_append_nosize(&cleanup_dir_argv, filename); - } else { - opal_output_verbose(15, opal_crs_base_framework.framework_output, - "opal:crs: cleanup_append: Append File <%s>\n", - filename); - opal_argv_append_nosize(&cleanup_file_argv, filename); - } - - return OPAL_SUCCESS; -} - -int opal_crs_base_cleanup_flush(void) -{ - int argc, i; - - /* - * Cleanup files first - */ - if( NULL != cleanup_file_argv ) { - argc = opal_argv_count(cleanup_file_argv); - for( i = 0; i < argc; ++i) { - opal_output_verbose(15, opal_crs_base_framework.framework_output, - "opal:crs: cleanup_flush: Remove File <%s>\n", cleanup_file_argv[i]); - unlink(cleanup_file_argv[i]); - } - - opal_argv_free(cleanup_file_argv); - cleanup_file_argv = NULL; - } - - /* - * Try to cleanup directories next - */ - if( NULL != cleanup_dir_argv ) { - argc = opal_argv_count(cleanup_dir_argv); - for( i = 0; i < argc; ++i) { - opal_output_verbose(15, opal_crs_base_framework.framework_output, - "opal:crs: cleanup_flush: Remove Dir <%s>\n", cleanup_dir_argv[i]); - opal_os_dirpath_destroy(cleanup_dir_argv[i], true, NULL); - } - - opal_argv_free(cleanup_dir_argv); - cleanup_dir_argv = NULL; - } - - return OPAL_SUCCESS; -} - -char * opal_crs_base_state_str(opal_crs_state_type_t state) -{ - char *str = NULL; - - switch(state) { - case OPAL_CRS_CHECKPOINT: - str = strdup("Checkpoint"); - break; - case OPAL_CRS_RESTART: - str = strdup("Restart"); - break; - case OPAL_CRS_CONTINUE: - str = strdup("Continue"); - break; - case OPAL_CRS_TERM: - str = strdup("Terminate"); - break; - case OPAL_CRS_RUNNING: - str = strdup("Running"); - break; - case OPAL_CRS_ERROR: - str = strdup("Error"); - break; - default: - str = strdup("Unknown"); - break; - } - - return str; -} - -int opal_crs_base_copy_options(opal_crs_base_ckpt_options_t *from, - opal_crs_base_ckpt_options_t *to) -{ - if( NULL == from ) { - opal_output(opal_crs_base_framework.framework_output, - "opal:crs:base: copy_options: Error: from value is NULL\n"); - return OPAL_ERROR; - } - - if( NULL == to ) { - opal_output(opal_crs_base_framework.framework_output, - "opal:crs:base: copy_options: Error: to value is NULL\n"); - return OPAL_ERROR; - } - - to->term = from->term; - to->stop = from->stop; - - to->inc_prep_only = from->inc_prep_only; - to->inc_recover_only = from->inc_recover_only; - -#if OPAL_ENABLE_CRDEBUG == 1 - to->attach_debugger = from->attach_debugger; - to->detach_debugger = from->detach_debugger; -#endif - - return OPAL_SUCCESS; -} - -int opal_crs_base_clear_options(opal_crs_base_ckpt_options_t *target) -{ - if( NULL == target ) { - opal_output(opal_crs_base_framework.framework_output, - "opal:crs:base: copy_options: Error: target value is NULL\n"); - return OPAL_ERROR; - } - - target->term = false; - target->stop = false; - - target->inc_prep_only = false; - target->inc_recover_only = false; - -#if OPAL_ENABLE_CRDEBUG == 1 - target->attach_debugger = false; - target->detach_debugger = false; -#endif - - return OPAL_SUCCESS; -} - -int opal_crs_base_self_register_checkpoint_callback(opal_crs_base_self_checkpoint_fn_t function) -{ - crs_base_self_checkpoint_fn = function; - return OPAL_SUCCESS; -} - -int opal_crs_base_self_register_restart_callback(opal_crs_base_self_restart_fn_t function) -{ - crs_base_self_restart_fn = function; - return OPAL_SUCCESS; -} - -int opal_crs_base_self_register_continue_callback(opal_crs_base_self_continue_fn_t function) -{ - crs_base_self_continue_fn = function; - return OPAL_SUCCESS; -} - - -/****************** - * Local Functions - ******************/ -static int metadata_extract_next_token(FILE *file, char **token, char **value) -{ - int exit_status = OPAL_SUCCESS; - const int max_len = 256; - /* NTH: as long as max_len remains small (256 bytes) there is no need - * to allocate line on the heap */ - char line[256]; - int line_len = 0, value_len; - char *local_value = NULL; - bool end_of_line = false; - char *tmp; - - /* - * If we are at the end of the file, then just return - */ - do { - /* - * Other wise grab the next token/value pair - */ - if (NULL == fgets(line, max_len, file) ) { - /* the calling code doesn't distinguish error types so - * returning OPAL_ERROR on error or EOF is ok. if this - * changes re-add the check for EOF. */ - return OPAL_ERROR; - } - - line_len = strlen(line); - - /* Strip off the new line if it is there */ - end_of_line = ('\n' == line[line_len-1]); - - if (end_of_line) { - line[--line_len] = '\0'; - } - - /* Ignore lines with just '#' too */ - } while (line_len <= 2); - - /* - * Extract the token from the set - */ - tmp = strchr (line, ':'); - if (!tmp) { - /* no separator */ - return OPAL_ERROR; - } - - *tmp = '\0'; - - *token = strdup (line); - if (NULL == *token) { - return OPAL_ERR_OUT_OF_RESOURCE; - } - local_value = strdup (tmp + 1); - if (NULL == local_value) { - free(*token); - *token = NULL; - return OPAL_ERR_OUT_OF_RESOURCE; - } - - value_len = strlen (local_value) + 1; - - /* - * Extract the value from the set - */ - while(!end_of_line) { - if (NULL == fgets(line, max_len, file) ) { - exit_status = OPAL_ERROR; - break; - } - - line_len = strlen(line); - - /* Strip off the new line if it is there */ - end_of_line = ('\n' == line[line_len-1]); - - if (end_of_line) { - line[--line_len] = '\0'; - } - - value_len += line_len; - - tmp = (char *) realloc(local_value, value_len); - if (NULL == tmp) { - exit_status = OPAL_ERR_OUT_OF_RESOURCE; - break; - } - local_value = tmp; - - strcat (local_value, line); - } - - if (OPAL_SUCCESS == exit_status) { - *value = local_value; - } else { - free (local_value); - } - - return exit_status; -} diff --git a/opal/mca/crs/base/crs_base_open.c b/opal/mca/crs/base/crs_base_open.c deleted file mode 100644 index 81576950dc5..00000000000 --- a/opal/mca/crs/base/crs_base_open.c +++ /dev/null @@ -1,91 +0,0 @@ -/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ -/* - * Copyright (c) 2004-2010 The Trustees of Indiana University. - * All rights reserved. - * Copyright (c) 2004-2005 The Trustees of the University of Tennessee. - * All rights reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * Copyright (c) 2007 Evergrid, Inc. All rights reserved. - * Copyright (c) 2011-2013 Los Alamos National Security, LLC. - * All rights reserved. - * Copyright (c) 2015 Research Organization for Information Science - * and Technology (RIST). All rights reserved. - * - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#include "opal_config.h" - -#include "opal/mca/base/base.h" -#include "opal/mca/crs/base/base.h" - -#include "opal/mca/crs/base/static-components.h" - -/* - * Globals - */ -opal_crs_base_module_t opal_crs = { - NULL, /* crs_init */ - NULL, /* crs_finalize */ - NULL, /* crs_checkpoint */ - NULL, /* crs_restart_cmd */ - NULL, /* crs_disable_checkpoint */ - NULL, /* crs_enable_checkpoint */ - NULL, /* crs_prelaunch */ - NULL /* crs_reg_thread */ -}; - -opal_crs_base_component_t opal_crs_base_selected_component = {{0}}; - -extern bool opal_crs_base_do_not_select; -static int opal_crs_base_register(mca_base_register_flag_t flags); - -/* Use default select */ -MCA_BASE_FRAMEWORK_DECLARE(opal, crs, "Checkpoint and Restart Service (CRS)", - opal_crs_base_register, opal_crs_base_open, - opal_crs_base_close, mca_crs_base_static_components, 0); - -static int opal_crs_base_register(mca_base_register_flag_t flags) -{ - int ret; - /* - * Note: If we are a tool, then we will manually run the selection routine - * for the checkpointer. The tool will set the MCA parameter - * 'crs_base_do_not_select' before opal_init and then reset it after to - * disable the selection logic. - * This is useful for opal_restart because it reads the metadata file - * that indicates the checkpointer to be used after calling opal_init. - * Therefore it would need to select a specific module, but it doesn't - * know which one until later. It will set the MCA parameter 'crs' - * before calling select. - */ - ret = mca_base_framework_var_register(&opal_crs_base_framework, "do_not_select", - "Do not do the selection of the CRS component", - MCA_BASE_VAR_TYPE_BOOL, NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE | - MCA_BASE_VAR_FLAG_INTERNAL, OPAL_INFO_LVL_9, - MCA_BASE_VAR_SCOPE_ALL_EQ, &opal_crs_base_do_not_select); - - return (0 > ret) ? ret : OPAL_SUCCESS; -} - -/** - * Function for finding and opening either all MCA components, - * or the one that was specifically requested via a MCA parameter. - */ -int opal_crs_base_open(mca_base_open_flag_t flags) -{ - if(!opal_cr_is_enabled) { - opal_output_verbose(10, opal_crs_base_framework.framework_output, - "crs:open: FT is not enabled, skipping!"); - return OPAL_SUCCESS; - } - - return mca_base_framework_components_open(&opal_crs_base_framework, flags); -} diff --git a/opal/mca/crs/base/crs_base_select.c b/opal/mca/crs/base/crs_base_select.c deleted file mode 100644 index 09d0e09b54f..00000000000 --- a/opal/mca/crs/base/crs_base_select.c +++ /dev/null @@ -1,78 +0,0 @@ -/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ -/* - * Copyright (c) 2004-2010 The Trustees of Indiana University. - * All rights reserved. - * Copyright (c) 2004-2005 The Trustees of the University of Tennessee. - * All rights reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * Copyright (c) 2007 Evergrid, Inc. All rights reserved. - * Copyright (c) 2015 Los Alamos National Security, LLC. All rights - * reserved. - * - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#include "opal_config.h" - -#ifdef HAVE_UNISTD_H -#include "unistd.h" -#endif - -#include "opal/constants.h" -#include "opal/util/output.h" -#include "opal/mca/mca.h" -#include "opal/mca/base/base.h" -#include "opal/mca/crs/crs.h" -#include "opal/mca/crs/base/base.h" - -bool opal_crs_base_do_not_select = false; - -int opal_crs_base_select(void) -{ - opal_crs_base_component_t *best_component = NULL; - opal_crs_base_module_t *best_module = NULL; - int ret; - - if( !opal_cr_is_enabled ) { - opal_output_verbose(10, opal_crs_base_framework.framework_output, - "crs:select: FT is not enabled, skipping!"); - return OPAL_SUCCESS; - } - - if( opal_crs_base_do_not_select ) { - opal_output_verbose(10, opal_crs_base_framework.framework_output, - "crs:select: Not selecting at this time!"); - return OPAL_SUCCESS; - } - - /* - * Select the best component - */ - if( OPAL_SUCCESS != mca_base_select("crs", opal_crs_base_framework.framework_output, - &opal_crs_base_framework.framework_components, - (mca_base_module_t **) &best_module, - (mca_base_component_t **) &best_component) ) { - /* This will only happen if no component was selected */ - return OPAL_ERROR; - } - - /* best_module and best_component should not be NULL here */ - - /* Save the winner */ - opal_crs_base_selected_component = *best_component; - opal_crs = *best_module; - - /* Initialize the winner */ - if (OPAL_SUCCESS != (ret = opal_crs.crs_init()) ) { - return ret; - } - - return OPAL_SUCCESS; -} diff --git a/opal/mca/crs/base/owner.txt b/opal/mca/crs/base/owner.txt deleted file mode 100644 index 2d23c9be654..00000000000 --- a/opal/mca/crs/base/owner.txt +++ /dev/null @@ -1,7 +0,0 @@ -# -# owner/status file -# owner: institution that is responsible for this package -# status: e.g. active, maintenance, unmaintained -# -owner: project -status: maintenance diff --git a/opal/mca/crs/blcr/Makefile.am b/opal/mca/crs/blcr/Makefile.am deleted file mode 100644 index 78fa768c015..00000000000 --- a/opal/mca/crs/blcr/Makefile.am +++ /dev/null @@ -1,49 +0,0 @@ -# -# Copyright (c) 2004-2007 The Trustees of Indiana University. -# All rights reserved. -# Copyright (c) 2004-2005 The Trustees of the University of Tennessee. -# All rights reserved. -# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, -# University of Stuttgart. All rights reserved. -# Copyright (c) 2004-2005 The Regents of the University of California. -# All rights reserved. -# Copyright (c) 2010 Cisco Systems, Inc. All rights reserved. -# $COPYRIGHT$ -# -# Additional copyrights may follow -# -# $HEADER$ -# - -CFLAGS = $(crs_blcr_CFLAGS) -AM_CPPFLAGS = $(crs_blcr_CPPFLAGS) - -dist_opaldata_DATA = help-opal-crs-blcr.txt - -sources = \ - crs_blcr.h \ - crs_blcr_component.c \ - crs_blcr_module.c - -# Make the output library in this directory, and name it either -# mca__.la (for DSO builds) or libmca__.la -# (for static builds). - -if MCA_BUILD_opal_crs_blcr_DSO -component_noinst = -component_install = mca_crs_blcr.la -else -component_noinst = libmca_crs_blcr.la -component_install = -endif - -mcacomponentdir = $(opallibdir) -mcacomponent_LTLIBRARIES = $(component_install) -mca_crs_blcr_la_SOURCES = $(sources) -mca_crs_blcr_la_LDFLAGS = -module -avoid-version $(crs_blcr_LDFLAGS) -mca_crs_blcr_la_LIBADD = $(crs_blcr_LIBS) - -noinst_LTLIBRARIES = $(component_noinst) -libmca_crs_blcr_la_SOURCES = $(sources) -libmca_crs_blcr_la_LDFLAGS = -module -avoid-version $(crs_blcr_LDFLAGS) -libmca_crs_blcr_la_LIBADD = $(crs_blcr_LIBS) diff --git a/opal/mca/crs/blcr/configure.m4 b/opal/mca/crs/blcr/configure.m4 deleted file mode 100644 index 6948639207e..00000000000 --- a/opal/mca/crs/blcr/configure.m4 +++ /dev/null @@ -1,202 +0,0 @@ -# -*- shell-script -*- -# -# Copyright (c) 2004-2010 The Trustees of Indiana University. -# All rights reserved. -# Copyright (c) 2004-2005 The Trustees of the University of Tennessee. -# All rights reserved. -# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, -# University of Stuttgart. All rights reserved. -# Copyright (c) 2004-2006 The Regents of the University of California. -# All rights reserved. -# Copyright (c) 2009-2014 Cisco Systems, Inc. All rights reserved. -# Copyright (c) 2011 Oak Ridge National Labs. All rights reserved. -# $COPYRIGHT$ -# -# Additional copyrights may follow -# -# $HEADER$ -# - -# MCA_crs_blcr_CONFIG([action-if-found], [action-if-not-found]) -# ----------------------------------------------------------- -AC_DEFUN([MCA_opal_crs_blcr_CONFIG],[ - AC_CONFIG_FILES([opal/mca/crs/blcr/Makefile]) - - AC_ARG_WITH([blcr], - [AC_HELP_STRING([--with-blcr(=DIR)], - [Path to BLCR Installation])]) - OPAL_CHECK_WITHDIR([blcr], [$with_blcr], [include/libcr.h]) - AC_ARG_WITH([blcr-libdir], - [AC_HELP_STRING([--with-blcr-libdir=DIR], - [Search for BLCR libraries in DIR])]) - OPAL_CHECK_WITHDIR([blcr-libdir], [$with_blcr_libdir], [libcr.*]) - - check_crs_blcr_good="no" - - # If we do not want FT, don't compile this component - # - # If we wanted BLCR, but did not specify the FT option, - # error out with a warning for the user - AS_IF([test "$opal_want_ft_cr" = "0"], - [$2 - check_crs_blcr_good="no" - AS_IF([test ! -z "$with_blcr" -a "$with_blcr" != "no"], - [AC_MSG_WARN([BLCR support requested, but FT support not requested. You need to specify the --with-ft=cr configure option.]) - AC_MSG_ERROR([Aborting.])]) - ], - [check_crs_blcr_good="yes"]) - - # If we do not want BLCR, then do not compile it - AS_IF([test "$with_blcr" = "no" -o "$check_crs_blcr_good" = "no"], - [$2 - check_crs_blcr_good="no"], - [check_crs_blcr_good="yes"]) - - # Defaults - check_crs_blcr_dir_msg="compiler default" - check_crs_blcr_libdir_msg="linker default" - check_crs_blcr_dir="" - check_crs_blcr_libdir="" - - # Determine the search paths for the headers and libraries - AS_IF([test "$check_crs_blcr_good" != "yes"], [$2], - [AS_IF([test ! -z "$with_blcr" -a "$with_blcr" != "yes"], - [check_crs_blcr_dir="$with_blcr" - check_crs_blcr_dir_msg="$with_blcr (from --with-blcr)"]) - AS_IF([test ! -z "$with_blcr_libdir" -a "$with_blcr_libdir" != "yes"], - [check_crs_blcr_libdir="$with_blcr_libdir" - check_crs_blcr_libdir_msg="$with_blcr_libdir (from --with-blcr-libdir)"]) - ]) - - AS_IF([test "$check_crs_blcr_good" != "yes"], [$2], - [AC_MSG_CHECKING([for BLCR dir]) - AC_MSG_RESULT([$check_crs_blcr_dir_msg]) - AC_MSG_CHECKING([for BLCR library dir]) - AC_MSG_RESULT([$check_crs_blcr_libdir_msg]) - OPAL_CHECK_PACKAGE([crs_blcr_check], - [libcr.h], - [cr], - [cr_init], - [], - [$check_crs_blcr_dir], - [$check_crs_blcr_libdir], - [check_crs_blcr_good="yes"], - [check_crs_blcr_good="no"]) - ]) - - crs_blcr_save_CFLAGS="$CFLAGS" - crs_blcr_save_CPPFLAGS="$CPPFLAGS" - crs_blcr_save_LDFLAGS="$LDFLAGS" - crs_blcr_save_LIBS="$LIBS" - - crs_blcr_CFLAGS="$CFLAGS $crs_blcr_check_CFLAGS" - crs_blcr_CPPFLAGS="$CPPFLAGS $crs_blcr_check_CPPFLAGS" - crs_blcr_LDFLAGS="$LDFLAGS $crs_blcr_check_LDFLAGS" - crs_blcr_LIBS="$LIBS $crs_blcr_check_LIBS" - - # Check to see if we found the BLCR libcr.h library - AS_IF([test "$check_crs_blcr_good" != "yes"], [$2], - [ - # - # Since BLCR libraries are not fully ISO99 C compliant - # -pedantic and -Wundef raise a bunch of warnings, so - # we just strip them off for this component - AC_MSG_WARN([Removed -pedantic and -Wundef from CFLAGS for blcr component because libcr.h is not really ANSI C]) - # Strip off problematic arguments - crs_blcr_CFLAGS="`echo $crs_blcr_CFLAGS | sed 's/-pedantic//g'`" - crs_blcr_CFLAGS="`echo $crs_blcr_CFLAGS | sed 's/-Wundef//g'`" - crs_blcr_CPPFLAGS="`echo $crs_blcr_CPPFLAGS | sed 's/-pedantic//g'`" - crs_blcr_CPPFLAGS="`echo $crs_blcr_CPPFLAGS | sed 's/-Wundef//g'`" - crs_blcr_LDFLAGS="$crs_blcr_LDFLAGS" - crs_blcr_LIBS="$crs_blcr_LIBS" - $1]) - - # - # Check for version difference which may have: - # - working cr_request_file - # - working cr_request_checkpoint (which should be used instead of cr_request_file) - # - 'requester' parameter to checkpoint_info - # - AS_IF([test "$check_crs_blcr_good" != "yes"], [$2], [ - CFLAGS="$crs_blcr_CFLAGS" - CPPFLAGS="$crs_blcr_CPPFLAGS" - LDFLAGS="$crs_blcr_LDFLAGS" - LIBS="$crs_blcr_LIBS" - # - # First look for the cr_request_file function - # - crs_blcr_have_working_cr_request=0 - AC_MSG_CHECKING(for BLCR working cr_request) - OPAL_SEARCH_LIBS_COMPONENT([crs_blcr], [cr_request_file],[cr], - [AC_TRY_COMPILE([#include ], - [#if CR_RELEASE_MAJOR <= 0 && CR_RELEASE_MINOR < 6 - #error Version earlier than 0.6.0 - #endif - ], - [crs_blcr_have_working_cr_request=1 - ], - [crs_blcr_have_working_cr_request=0 - AC_MSG_WARN([This BLCR version does not contain a known working version of cr_request_file]) - ])], - [crs_blcr_have_working_cr_request=0 - AC_MSG_WARN([This BLCR version does not contain the cr_request_file function]) - ]) - AC_DEFINE_UNQUOTED([CRS_BLCR_HAVE_CR_REQUEST], [$crs_blcr_have_working_cr_request], - [BLCR cr_request_file check]) - - # - # Look for the cr_request_checkpoint function - # - crs_blcr_have_cr_request_checkpoint=0 - AC_MSG_CHECKING(for BLCR cr_request_checkpoint) - OPAL_SEARCH_LIBS_COMPONENT([crs_blcr], - [cr_request_checkpoint],[cr], - [crs_blcr_have_cr_request_checkpoint=1 - ], - [crs_blcr_have_cr_request_checkpoint=0 - AC_MSG_WARN([This BLCR version does not contain the cr_request_checkpoint function]) - ]) - AC_DEFINE_UNQUOTED([CRS_BLCR_HAVE_CR_REQUEST_CHECKPOINT], [$crs_blcr_have_cr_request_checkpoint], - [BLCR cr_request_checkpoint check]) - - # - # Look for the cr_checkpoint_info.requester member - # - crs_blcr_have_info_requester=0 - AC_CHECK_MEMBER([struct cr_checkpoint_info.requester], - [crs_blcr_have_info_requester=1], - [AC_MSG_WARN([This BLCR version does not contain a 'requester' member of the 'cr_checkpoint_info' struct])], - [#include ]) - AC_DEFINE_UNQUOTED([CRS_BLCR_HAVE_INFO_REQUESTER], [$crs_blcr_have_info_requester], - [BLCRs cr_checkpoint_info.requester member availability]) - $1]) - - # - # Require either a working cr_request_file() or cr_request_checkpoint() function - # - AS_IF([test "$crs_blcr_have_working_cr_request" = "0" -a "$crs_blcr_have_cr_request_checkpoint" = "0"], - [$2 - check_crs_blcr_good="no" - AC_MSG_WARN([The BLCR CRS component requires either the cr_request_checkpoint() or cr_request_file() functions])]) - - # - # Reset the flags - # - CFLAGS="$crs_blcr_save_CFLAGS" - CPPFLAGS="$crs_blcr_save_CPPFLAGS" - LDFLAGS="$crs_blcr_save_LDFLAGS" - LIBS="$crs_blcr_save_LIBS" - - # - AS_IF([test "$check_crs_blcr_good" = "yes"], - [ AC_SUBST([crs_blcr_CFLAGS]) - AC_SUBST([crs_blcr_CPPFLAGS]) - AC_SUBST([crs_blcr_LDFLAGS]) - AC_SUBST([crs_blcr_LIBS]) - $1], - [AS_IF([test ! -z "$with_blcr" -a "$with_blcr" != "no"], - [AC_MSG_WARN([BLCR support requested but not found. Perhaps you need to specify the location of the BLCR libraries.]) - AC_MSG_ERROR([Aborting.])]) - $3]) - -])dnl diff --git a/opal/mca/crs/blcr/crs_blcr.h b/opal/mca/crs/blcr/crs_blcr.h deleted file mode 100644 index c1c6e5d0b40..00000000000 --- a/opal/mca/crs/blcr/crs_blcr.h +++ /dev/null @@ -1,84 +0,0 @@ -/* - * Copyright (c) 2004-2009 The Trustees of Indiana University. - * All rights reserved. - * Copyright (c) 2004-2005 The Trustees of the University of Tennessee. - * All rights reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -/** - * @file - * - * BLCR CRS component - * - */ - -#ifndef MCA_CRS_BLCR_EXPORT_H -#define MCA_CRS_BLCR_EXPORT_H - -#include "opal_config.h" - - -#include "opal/mca/mca.h" -#include "opal/mca/crs/crs.h" -#include "opal/mca/base/base.h" - -#include - -BEGIN_C_DECLS - - /* - * Local Component structures - */ - struct opal_crs_blcr_component_t { - /** Base CRS component */ - opal_crs_base_component_t super; - }; - typedef struct opal_crs_blcr_component_t opal_crs_blcr_component_t; - OPAL_MODULE_DECLSPEC extern opal_crs_blcr_component_t mca_crs_blcr_component; - - int opal_crs_blcr_component_query(mca_base_module_t **module, int *priority); - - extern bool opal_crs_blcr_dev_null; - - /* - * Module functions - */ - int opal_crs_blcr_module_init(void); - int opal_crs_blcr_module_finalize(void); - - /* - * Actual funcationality - */ - int opal_crs_blcr_checkpoint( pid_t pid, - opal_crs_base_snapshot_t *snapshot, - opal_crs_base_ckpt_options_t *options, - opal_crs_state_type_t *state); - - int opal_crs_blcr_restart( opal_crs_base_snapshot_t *snapshot, - bool spawn_child, - pid_t *child_pid); - - int opal_crs_blcr_disable_checkpoint(void); - int opal_crs_blcr_enable_checkpoint(void); - - int opal_crs_blcr_prelaunch(int32_t rank, - char *base_snapshot_dir, - char **app, - char **cwd, - char ***argv, - char ***env); - - int opal_crs_blcr_reg_thread(void); - -END_C_DECLS - -#endif /* MCA_CRS_BLCR_EXPORT_H */ diff --git a/opal/mca/crs/blcr/crs_blcr_component.c b/opal/mca/crs/blcr/crs_blcr_component.c deleted file mode 100644 index 65b71c64cd0..00000000000 --- a/opal/mca/crs/blcr/crs_blcr_component.c +++ /dev/null @@ -1,145 +0,0 @@ -/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ -/* - * Copyright (c) 2004-2009 The Trustees of Indiana University. - * All rights reserved. - * Copyright (c) 2004-2005 The Trustees of the University of Tennessee. - * All rights reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * Copyright (c) 2015 Los Alamos National Security, LLC. All rights - * reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#include "opal_config.h" - -#include "opal/util/output.h" - -#include "opal/constants.h" -#include "opal/mca/crs/crs.h" -#include "opal/mca/crs/base/base.h" -#include "crs_blcr.h" - -/* - * Local functionality - */ -static int crs_blcr_register (void); -static int crs_blcr_open(void); -static int crs_blcr_close(void); - -bool opal_crs_blcr_dev_null = false; - -/* - * Instantiate the public struct with all of our public information - * and pointer to our public functions in it - */ -opal_crs_blcr_component_t mca_crs_blcr_component = { - /* First do the base component stuff */ - { - /* Handle the general mca_component_t struct containing - * meta information about the component itself - */ - .base_version = { - OPAL_CRS_BASE_VERSION_2_0_0, - - /* Component name and version */ - .mca_component_name = "blcr", - MCA_BASE_MAKE_VERSION(component, OPAL_MAJOR_VERSION, OPAL_MINOR_VERSION, - OPAL_RELEASE_VERSION), - - /* Component open and close functions */ - .mca_open_component = crs_blcr_open, - .mca_close_component = crs_blcr_close, - .mca_query_component = opal_crs_blcr_component_query, - .mca_register_component_params = crs_blcr_register - }, - .base_data = { - /* The component is checkpoint ready */ - MCA_BASE_METADATA_PARAM_CHECKPOINT - }, - - .verbose = 0, - .output_handle = -1, - } -}; - -static int crs_blcr_register (void) -{ - int ret; - - mca_crs_blcr_component.super.priority = 10; - ret = mca_base_component_var_register (&mca_crs_blcr_component.super.base_version, - "priority", "Priority of the CRS blcr component " - "(default: 10)". MCA_BASE_VAR_TYPE_INT, NULL, - MCA_BASE_VAR_FLAG_SETTABLE, - OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_ALL_EQ, - &mca_crs_blcr_component.super.priority); - if (0 > ret) { - return ret; - } - - mca_crs_blcr_component.super.verbose = 0; - ret = mca_base_component_var_register (&mca_crs_blcr_component.super.base_version, - "verbose", - "Verbose level for the CRS blcr component", - MCA_BASE_VAR_TYPE_INT, NULL, MCA_BASE_VAR_FLAG_SETTABLE, - OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_LOCAL, - &mca_crs_blcr_component.super.verbose); - if (0 > ret) { - return ret; - } - - opal_crs_blcr_dev_null = false; - ret = mca_base_component_var_register (&mca_crs_blcr_component.super.base_version, - "dev_null", - "Not for general use! For debugging only! Save checkpoint to /dev/null. [Default = disabled]", - MCA_BASE_VAR_TYPE_BOOL, NULL, MCA_BASE_VAR_FLAG_SETTABLE, - OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_ALL_EQ, - &opal_crs_blcr_dev_null); - return (0 > ret) ? ret : OPAL_SUCCESS -} - -static int crs_blcr_open(void) -{ - /* If there is a custom verbose level for this component than use it - * otherwise take our parents level and output channel - */ - if ( 0 != mca_crs_blcr_component.super.verbose) { - mca_crs_blcr_component.super.output_handle = opal_output_open(NULL); - opal_output_set_verbosity(mca_crs_blcr_component.super.output_handle, - mca_crs_blcr_component.super.verbose); - } else { - mca_crs_blcr_component.super.output_handle = opal_crs_base_framework.framework_output; - } - - /* - * Debug output - */ - opal_output_verbose(10, mca_crs_blcr_component.super.output_handle, - "crs:blcr: open()"); - opal_output_verbose(20, mca_crs_blcr_component.super.output_handle, - "crs:blcr: open: priority = %d", - mca_crs_blcr_component.super.priority); - opal_output_verbose(20, mca_crs_blcr_component.super.output_handle, - "crs:blcr: open: verbosity = %d", - mca_crs_blcr_component.super.verbose); - opal_output_verbose(10, mca_crs_blcr_component.super.output_handle, - "crs:blcr: open: dev_null = %s", - (opal_crs_blcr_dev_null == true ? "True" : "False")); - - return OPAL_SUCCESS; -} - -static int crs_blcr_close(void) -{ - opal_output_verbose(10, mca_crs_blcr_component.super.output_handle, - "crs:blcr: close()"); - - return OPAL_SUCCESS; -} diff --git a/opal/mca/crs/blcr/crs_blcr_module.c b/opal/mca/crs/blcr/crs_blcr_module.c deleted file mode 100644 index c73bad1ce48..00000000000 --- a/opal/mca/crs/blcr/crs_blcr_module.c +++ /dev/null @@ -1,865 +0,0 @@ -/* - * Copyright (c) 2004-2010 The Trustees of Indiana University. - * All rights reserved. - * Copyright (c) 2004-2005 The Trustees of the University of Tennessee. - * All rights reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * Copyright (c) 2007 Evergrid, Inc. All rights reserved. - * Copyright (c) 2011 Oak Ridge National Labs. All rights reserved. - * - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#include "opal_config.h" - -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include "opal/util/show_help.h" -#include "opal/util/output.h" -#include "opal/util/argv.h" -#include "opal/constants.h" - -#include "opal/mca/base/mca_base_var.h" - -#include "opal/threads/threads.h" -#include "opal/threads/mutex.h" -#include "opal/threads/condition.h" - -#include "opal/mca/event/event.h" - -#include "opal/mca/crs/crs.h" -#include "opal/mca/crs/base/base.h" - -#include "crs_blcr.h" - -/* - * Blcr module - */ -static opal_crs_base_module_t blcr_module = { - /** Initialization Function */ - opal_crs_blcr_module_init, - /** Finalization Function */ - opal_crs_blcr_module_finalize, - - /** Checkpoint interface */ - opal_crs_blcr_checkpoint, - - /** Restart Command Access */ - opal_crs_blcr_restart, - - /** Disable checkpoints */ - opal_crs_blcr_disable_checkpoint, - /** Enable checkpoints */ - opal_crs_blcr_enable_checkpoint, - - /** Prelaunch */ - opal_crs_blcr_prelaunch, - - /** Register Thread */ - opal_crs_blcr_reg_thread -}; - -/*************************** - * Snapshot Class Functions - ***************************/ -OBJ_CLASS_DECLARATION(opal_crs_blcr_snapshot_t); - -struct opal_crs_blcr_snapshot_t { - /** Base CRS snapshot type */ - opal_crs_base_snapshot_t super; - char * context_filename; -}; -typedef struct opal_crs_blcr_snapshot_t opal_crs_blcr_snapshot_t; - -void opal_crs_blcr_construct(opal_crs_blcr_snapshot_t *obj); -void opal_crs_blcr_destruct( opal_crs_blcr_snapshot_t *obj); - -OBJ_CLASS_INSTANCE(opal_crs_blcr_snapshot_t, - opal_crs_base_snapshot_t, - opal_crs_blcr_construct, - opal_crs_blcr_destruct); - -/****************** - * Local Functions - ******************/ -static int blcr_get_checkpoint_filename(char **fname, pid_t pid); -static int opal_crs_blcr_thread_callback(void *arg); -static int opal_crs_blcr_signal_callback(void *arg); - -static int opal_crs_blcr_restart_cmd(char *fname, char **cmd); - -static int blcr_cold_start(opal_crs_blcr_snapshot_t *snapshot); - -#if OPAL_ENABLE_CRDEBUG == 1 -static void MPIR_checkpoint_debugger_crs_hook(cr_hook_event_t event); -#endif - -/************************* - * Local Global Variables - *************************/ -#if OPAL_ENABLE_CRDEBUG == 1 -static opal_thread_t *checkpoint_thread_id = NULL; -static bool blcr_crdebug_refreshed_env = false; -#endif - -static cr_client_id_t client_id; -static cr_callback_id_t cr_thread_callback_id; -static cr_callback_id_t cr_signal_callback_id; -static int blcr_current_state = OPAL_CRS_NONE; - -static char *blcr_restart_cmd = NULL; -static char *blcr_checkpoint_cmd = NULL; - -static opal_condition_t blcr_cond; -static opal_mutex_t blcr_lock; - -static pid_t my_pid = -1; - -void opal_crs_blcr_construct(opal_crs_blcr_snapshot_t *snapshot) { - snapshot->context_filename = NULL; - snapshot->super.component_name = strdup(mca_crs_blcr_component.super.base_version.mca_component_name); -} - -void opal_crs_blcr_destruct( opal_crs_blcr_snapshot_t *snapshot) { - if(NULL != snapshot->context_filename) { - free(snapshot->context_filename); - snapshot->context_filename = NULL; - } -} - -/***************** - * MCA Functions - *****************/ -int opal_crs_blcr_component_query(mca_base_module_t **module, int *priority) -{ - opal_output_verbose(10, mca_crs_blcr_component.super.output_handle, - "crs:blcr: component_query()"); - - *priority = mca_crs_blcr_component.super.priority; - *module = (mca_base_module_t *)&blcr_module; - - return OPAL_SUCCESS; -} - -int opal_crs_blcr_module_init(void) -{ - void *crs_blcr_thread_callback_arg = NULL; - void *crs_blcr_signal_callback_arg = NULL; - - opal_output_verbose(10, mca_crs_blcr_component.super.output_handle, - "crs:blcr: module_init()"); - - blcr_restart_cmd = strdup("cr_restart"); - blcr_checkpoint_cmd = strdup("cr_checkpoint"); - - my_pid = getpid(); - - if( !opal_cr_is_tool ) { - /* We need to make the lock and condition variable before - * starting the thread, since the thread uses these vars. - */ - OBJ_CONSTRUCT(&blcr_lock, opal_mutex_t); - OBJ_CONSTRUCT(&blcr_cond, opal_condition_t); - - /* - * Initialize BLCR - */ - client_id = cr_init(); - if (0 > client_id) { - opal_output(mca_crs_blcr_component.super.output_handle, - "Error: crs:blcr: module_init: cr_init failed (%d)\n", client_id); - return OPAL_ERROR; - } - } - -#if OPAL_ENABLE_CRDEBUG == 1 - blcr_crdebug_refreshed_env = false; -#endif - - blcr_restart_cmd = strdup("cr_restart"); - blcr_checkpoint_cmd = strdup("cr_checkpoint"); - - if( !opal_cr_is_tool ) { - /* - * Register the thread handler - */ - cr_thread_callback_id = cr_register_callback(opal_crs_blcr_thread_callback, - crs_blcr_thread_callback_arg, - CR_THREAD_CONTEXT); - /* - * Register the signal handler - * - even though we do not use it - */ - cr_signal_callback_id = cr_register_callback(opal_crs_blcr_signal_callback, - crs_blcr_signal_callback_arg, - CR_SIGNAL_CONTEXT); - -#if OPAL_ENABLE_CRDEBUG == 1 - /* - * Checkpoint/restart enabled debugging hooks - * "NO_CALLBACKS" -> non-MPI threads - * "SIGNAL_CONTEXT" -> MPI threads - * "THREAD_CONTEXT" -> BLCR threads - */ - cr_register_hook(CR_HOOK_CONT_NO_CALLBACKS, MPIR_checkpoint_debugger_crs_hook); - cr_register_hook(CR_HOOK_CONT_SIGNAL_CONTEXT, MPIR_checkpoint_debugger_crs_hook); - - cr_register_hook(CR_HOOK_RSTRT_NO_CALLBACKS, MPIR_checkpoint_debugger_crs_hook); - cr_register_hook(CR_HOOK_RSTRT_SIGNAL_CONTEXT, MPIR_checkpoint_debugger_crs_hook); -#endif - } - - /* - * Now that we are done with init, set the state to running - */ - blcr_current_state = OPAL_CRS_RUNNING; - - opal_output_verbose(10, mca_crs_blcr_component.super.output_handle, - "crs:blcr: module_init() --> Finished [%d]", - opal_cr_is_tool); - - return OPAL_SUCCESS; -} - -int opal_crs_blcr_prelaunch(int32_t rank, - char *base_snapshot_dir, - char **app, - char **cwd, - char ***argv, - char ***env) -{ - char * tmp_env_var = NULL; - - (void) mca_base_var_env_name("opal_cr_is_tool", &tmp_env_var); - opal_setenv(tmp_env_var, - "0", true, env); - free(tmp_env_var); - tmp_env_var = NULL; - - return OPAL_SUCCESS; -} - -int opal_crs_blcr_reg_thread(void) -{ - cr_client_id_t loc_client_id; - - /* - * Initialize BLCR - */ - loc_client_id = cr_init(); - if (0 > loc_client_id) { - opal_output(mca_crs_blcr_component.super.output_handle, - "Error: crs:blcr: reg_thread: cr_init failed (%d)\n", loc_client_id); - return OPAL_ERROR; - } - - return OPAL_SUCCESS; -} - -int opal_crs_blcr_module_finalize(void) -{ - opal_output_verbose(10, mca_crs_blcr_component.super.output_handle, - "crs:blcr: module_finalize()"); - - /* Cleanup some memory */ - if( NULL != blcr_restart_cmd ) { - free(blcr_restart_cmd); - blcr_restart_cmd = NULL; - } - if( NULL != blcr_checkpoint_cmd ) { - free(blcr_checkpoint_cmd); - blcr_checkpoint_cmd = NULL; - } - - if( !opal_cr_is_tool ) { - OBJ_DESTRUCT(&blcr_lock); - OBJ_DESTRUCT(&blcr_cond); - - if( OPAL_CRS_RUNNING == blcr_current_state ) { - /* Unload the thread callback */ - cr_replace_callback(cr_thread_callback_id, NULL, NULL, CR_THREAD_CONTEXT); - /* Unload the signal callback */ - cr_replace_callback(cr_signal_callback_id, NULL, NULL, CR_SIGNAL_CONTEXT); - } - -#if OPAL_ENABLE_CRDEBUG == 1 - /* - * Checkpoint/restart enabled debugging hooks - */ - cr_register_hook(CR_HOOK_CONT_NO_CALLBACKS, NULL); - cr_register_hook(CR_HOOK_CONT_SIGNAL_CONTEXT, NULL); - - cr_register_hook(CR_HOOK_RSTRT_NO_CALLBACKS, NULL); - cr_register_hook(CR_HOOK_RSTRT_SIGNAL_CONTEXT, NULL); -#endif - } - - /* BLCR does not have a finalization routine */ - blcr_current_state = OPAL_CRS_NONE; - - return OPAL_SUCCESS; -} - -int opal_crs_blcr_checkpoint(pid_t pid, - opal_crs_base_snapshot_t *base_snapshot, - opal_crs_base_ckpt_options_t *options, - opal_crs_state_type_t *state) -{ - int ret, exit_status = OPAL_SUCCESS; - opal_crs_blcr_snapshot_t *snapshot = NULL; -#if CRS_BLCR_HAVE_CR_REQUEST_CHECKPOINT == 1 - cr_checkpoint_args_t cr_args; - static cr_checkpoint_handle_t cr_handle = (cr_checkpoint_handle_t)(-1); -#endif - int fd = 0; - char *loc_fname = NULL; - - if( pid != my_pid ) { - opal_output(0, "crs:blcr: checkpoint(%d, ---): Checkpointing of peers not allowed!", pid); - exit_status = OPAL_ERROR; - goto cleanup; - } - - opal_output_verbose(10, mca_crs_blcr_component.super.output_handle, - "crs:blcr: checkpoint(%d, ---)", pid); - - snapshot = (opal_crs_blcr_snapshot_t *)base_snapshot; - - /* - * Update the snapshot metadata - */ - snapshot->super.component_name = strdup(mca_crs_blcr_component.super.base_version.mca_component_name); - blcr_get_checkpoint_filename(&(snapshot->context_filename), pid); - - if( NULL == snapshot->super.metadata ) { - if (NULL == (snapshot->super.metadata = fopen(snapshot->super.metadata_filename, "a")) ) { - opal_output(mca_crs_blcr_component.super.output_handle, - "crs:blcr: checkpoint(): Error: Unable to open the file (%s)", - snapshot->super.metadata_filename); - exit_status = OPAL_ERROR; - goto cleanup; - } - } - fprintf(snapshot->super.metadata, "%s%s\n", CRS_METADATA_COMP, snapshot->super.component_name); - fprintf(snapshot->super.metadata, "%s%s\n", CRS_METADATA_CONTEXT, snapshot->context_filename); - - fclose(snapshot->super.metadata ); - snapshot->super.metadata = NULL; - - /* - * If we can checkpointing ourselves do so: - * use cr_request_checkpoint() if available, and cr_request_file() if not - */ - if( opal_crs_blcr_dev_null ) { - loc_fname = strdup("/dev/null"); - } else { - asprintf(&loc_fname, "%s/%s", snapshot->super.snapshot_directory, snapshot->context_filename); - } - -#if OPAL_ENABLE_CRDEBUG == 1 - /* Make sure to identify the checkpointing thread, so that it is not - * prevented from requesting the checkpoint after the debugger detaches - */ - opal_cr_debug_set_current_ckpt_thread_self(); - checkpoint_thread_id = opal_thread_get_self(); - blcr_crdebug_refreshed_env = false; - - /* If checkpoint/restart enabled debugging then mark detachment place */ - if( MPIR_debug_with_checkpoint ) { - opal_output_verbose(10, mca_crs_blcr_component.super.output_handle, - "crs:blcr: checkpoint(): Detaching debugger..."); - MPIR_checkpoint_debugger_detach(); - } -#endif - - opal_output_verbose(10, mca_crs_blcr_component.super.output_handle, - "crs:blcr: checkpoint SELF <%s>", - loc_fname); - -#if CRS_BLCR_HAVE_CR_REQUEST_CHECKPOINT == 1 || CRS_BLCR_HAVE_CR_REQUEST == 1 -#if CRS_BLCR_HAVE_CR_REQUEST_CHECKPOINT == 1 - fd = open(loc_fname, - O_WRONLY | O_CREAT | O_TRUNC | O_LARGEFILE, - S_IRUSR | S_IWUSR); - if( fd < 0 ) { - *state = OPAL_CRS_ERROR; - opal_output(mca_crs_blcr_component.super.output_handle, - "crs:blcr: checkpoint(): Error: Unable to open checkpoint file (%s) for pid (%d)", - loc_fname, pid); - exit_status = OPAL_ERROR; - goto cleanup; - } - - cr_initialize_checkpoint_args_t(&cr_args); - cr_args.cr_scope = CR_SCOPE_PROC; - cr_args.cr_fd = fd; - if( options->stop ) { - cr_args.cr_signal = SIGSTOP; - } - - ret = cr_request_checkpoint(&cr_args, &cr_handle); - if( ret < 0 ) { - close(cr_args.cr_fd); - *state = OPAL_CRS_ERROR; - opal_output(mca_crs_blcr_component.super.output_handle, - "crs:blcr: checkpoint(): Error: Unable to checkpoint pid (%d) to file (%s)", - pid, loc_fname); - exit_status = ret; - goto cleanup; - } - - /* Wait for checkpoint to finish */ - do { - ret = cr_poll_checkpoint(&cr_handle, NULL); - if( ret < 0 ) { - /* Check if restarting. This is not an error. */ - if( (ret == CR_POLL_CHKPT_ERR_POST) && (errno == CR_ERESTARTED) ) { - ret = 0; - break; - } - /* If Call was interrupted by a signal, retry the call */ - else if (errno == EINTR) { - ; - } - /* Otherwise this is a real error that we need to deal with */ - else { - *state = OPAL_CRS_ERROR; - opal_output(mca_crs_blcr_component.super.output_handle, - "crs:blcr: checkpoint(): Error: Unable to checkpoint pid (%d) to file (%s) - poll failed with (%d)", - pid, loc_fname, ret); - exit_status = ret; - goto cleanup; - } - } - } while( ret < 0 ); - - /* Close the file */ - close(cr_args.cr_fd); -#else - /* Request a checkpoint be taken of the current process. - * Since we are not guaranteed to finish the checkpoint before this - * returns, we also need to wait for it. - */ - cr_request_file(loc_fname); - - /* Wait for checkpoint to finish */ - do { - usleep(1000); /* JJH Do we really want to sleep? */ - } while(CR_STATE_IDLE != cr_status()); -#endif -#endif - - *state = blcr_current_state; - free(loc_fname); - - cleanup: - if( NULL != snapshot->super.metadata ) { - fclose(snapshot->super.metadata ); - snapshot->super.metadata = NULL; - } - - return exit_status; -} - -int opal_crs_blcr_restart(opal_crs_base_snapshot_t *base_snapshot, bool spawn_child, pid_t *child_pid) -{ - opal_crs_blcr_snapshot_t *snapshot = OBJ_NEW(opal_crs_blcr_snapshot_t); - char **cr_argv = NULL; - char *cr_cmd = NULL; - char *cr_full_cmd = NULL; - int ret; - int exit_status = OPAL_SUCCESS; - int status; - - snapshot->super = *base_snapshot; - - opal_output_verbose(10, mca_crs_blcr_component.super.output_handle, - "crs:blcr: restart(--, %d)", spawn_child); - - /* - * If we need to reconstruct the snapshot, - */ - if(snapshot->super.cold_start) { - if( OPAL_SUCCESS != (ret = blcr_cold_start(snapshot)) ) { - exit_status = OPAL_ERROR; - opal_output(mca_crs_blcr_component.super.output_handle, - "crs:blcr: blcr_restart: Unable to reconstruct the snapshot."); - goto cleanup; - } - } - - - /* - * Get the restart command - */ - if ( OPAL_SUCCESS != (ret = opal_crs_blcr_restart_cmd(snapshot->context_filename, &cr_cmd)) ) { - exit_status = ret; - goto cleanup; - } - if ( NULL == (cr_argv = opal_argv_split(cr_cmd, ' ')) ) { - exit_status = OPAL_ERROR; - goto cleanup; - } - - /* Need to shutdown the event engine before this. - * for some reason the BLCR checkpointer and our event engine don't get - * along very well. - */ - opal_progress_finalize(); - (void) mca_base_framework_close(&opal_event_base_framework); - - if (!spawn_child) { - cr_full_cmd = opal_argv_join(cr_argv, ' '); - opal_output_verbose(10, mca_crs_blcr_component.super.output_handle, - "crs:blcr: blcr_restart: SELF: exec :(%s, %s):", - blcr_restart_cmd, cr_full_cmd); - - status = execvp(blcr_restart_cmd, cr_argv); - - if(status < 0) { - opal_output(mca_crs_blcr_component.super.output_handle, - "crs:blcr: blcr_restart: SELF: Child failed to execute :(%d):", status); - } - opal_show_help("help-opal-crs-blcr.txt", "blcr:restart_failed_exec", true, - status, - blcr_restart_cmd, - cr_full_cmd); - - exit_status = status; - goto cleanup; - } - /* - * Restart by starting a new process - */ - else { - *child_pid = fork(); - - if( 0 == *child_pid) { - /* Child Process */ - opal_output_verbose(10, mca_crs_blcr_component.super.output_handle, - "crs:blcr: blcr_restart: CHILD: exec :(%s, %s):", - blcr_restart_cmd, - opal_argv_join(cr_argv, ' ')); - - status = execvp(blcr_restart_cmd, cr_argv); - - if(status < 0) { - opal_output(mca_crs_blcr_component.super.output_handle, - "crs:blcr: blcr_restart: CHILD: Child failed to execute :(%d):", status); - } - opal_output(mca_crs_blcr_component.super.output_handle, - "crs:blcr: blcr_restart: CHILD: execvp returned %d", status); - - exit_status = status; - goto cleanup; - } - else if(*child_pid > 0) { - /* Parent is done once it is started. */ - ; - } - else { - opal_output(mca_crs_blcr_component.super.output_handle, - "crs:blcr: blcr_restart: CHILD: fork failed :(%d):", *child_pid); - } - } - - cleanup: - if(NULL != cr_cmd) - free(cr_cmd); - if(NULL != cr_argv) - opal_argv_free(cr_argv); - - return exit_status; -} - -int opal_crs_blcr_disable_checkpoint(void) -{ - opal_output_verbose(10, mca_crs_blcr_component.super.output_handle, - "crs:blcr: disable_checkpoint()"); - /* - * Enter the BLCR Critical Section - */ - cr_enter_cs(client_id); - - return OPAL_SUCCESS; -} - -int opal_crs_blcr_enable_checkpoint(void) -{ - opal_output_verbose(10, mca_crs_blcr_component.super.output_handle, - "crs:blcr: enable_checkpoint()"); - /* - * Leave the BLCR Critical Section - */ - cr_leave_cs(client_id); - - return OPAL_SUCCESS; -} - -/***************************** - * Local Function Definitions - *****************************/ -static int opal_crs_blcr_thread_callback(void *arg) { - const struct cr_checkpoint_info *ckpt_info = cr_get_checkpoint_info(); - int ret; - - opal_output_verbose(10, mca_crs_blcr_component.super.output_handle, - "crs:blcr: thread_callback()"); - - OPAL_THREAD_LOCK(&blcr_lock); - blcr_current_state = OPAL_CRS_CHECKPOINT; - - /* - * Allow the checkpoint to be taken, if we requested it - */ -#if CRS_BLCR_HAVE_INFO_REQUESTER == 1 - if( ckpt_info->requester != my_pid ) { - ret = cr_checkpoint(CR_CHECKPOINT_OMIT); - blcr_current_state = OPAL_CRS_RUNNING; - opal_output_verbose(10, mca_crs_blcr_component.super.output_handle, - "crs:blcr: thread_callback(); WARNING: An external agent attempted to checkpoint this process " - "when it did not expect to be checkpointed. Skipping this checkpoint request." - " [%d != %d].", ckpt_info->requester, my_pid); - return 0; - } - else -#endif - { - if(OPAL_SUCCESS != (ret = trigger_user_inc_callback(OPAL_CR_INC_CRS_PRE_CKPT, - OPAL_CR_INC_STATE_PREPARE)) ) { - ; - } - - ret = cr_checkpoint(0); - } - - /* - * Restarting - */ - if ( 0 < ret ) { - opal_output_verbose(10, mca_crs_blcr_component.super.output_handle, - "crs:blcr: thread_callback: Restarting."); - blcr_current_state = OPAL_CRS_RESTART; - } - /* - * Continuing - */ - else { - opal_output_verbose(10, mca_crs_blcr_component.super.output_handle, - "crs:blcr: thread_callback: Continue."); - blcr_current_state = OPAL_CRS_CONTINUE; - } - - if( OPAL_SUCCESS != (ret = trigger_user_inc_callback(OPAL_CR_INC_CRS_POST_CKPT, - (blcr_current_state == OPAL_CRS_CONTINUE ? - OPAL_CR_INC_STATE_CONTINUE : - OPAL_CR_INC_STATE_RESTART))) ) { - ; - } - - OPAL_THREAD_UNLOCK(&blcr_lock); - opal_condition_signal(&blcr_cond); - - return 0; -} - -static int opal_crs_blcr_signal_callback(void *arg) { - const struct cr_checkpoint_info *ckpt_info = cr_get_checkpoint_info(); - int ret; - - /* - * Allow the checkpoint to be taken, if we requested it - */ -#if CRS_BLCR_HAVE_INFO_REQUESTER == 1 - if( ckpt_info->requester != my_pid ) { - ret = cr_checkpoint(CR_CHECKPOINT_OMIT); - return 0; - } - else -#endif - { - ret = cr_checkpoint(0); - } - - return 0; -} - -static int opal_crs_blcr_restart_cmd(char *fname, char **cmd) -{ - opal_output_verbose(10, mca_crs_blcr_component.super.output_handle, - "crs:blcr: restart_cmd(%s, ---)", fname); - - if (NULL == fname) { - opal_output_verbose(10, opal_crs_base_framework.framework_output, - "crs:blcr: restart_cmd: Error: filename is NULL!"); - return OPAL_CRS_ERROR; - } - - asprintf(cmd, "%s %s", blcr_restart_cmd, fname); - - return OPAL_SUCCESS; -} - -static int blcr_get_checkpoint_filename(char **fname, pid_t pid) -{ - opal_output_verbose(10, mca_crs_blcr_component.super.output_handle, - "crs:blcr: get_checkpoint_filename(--, %d)", pid); - - asprintf(fname, "ompi_blcr_context.%d", pid); - - return OPAL_SUCCESS; -} - -static int blcr_cold_start(opal_crs_blcr_snapshot_t *snapshot) { - int ret, exit_status = OPAL_SUCCESS; - char **tmp_argv = NULL; - char * component_name = NULL; - int prev_pid; - - opal_output_verbose(10, mca_crs_blcr_component.super.output_handle, - "crs:blcr: cold_start()"); - - /* - * Find the snapshot directory, read the metadata file - */ - if( NULL == snapshot->super.metadata ) { - if (NULL == (snapshot->super.metadata = fopen(snapshot->super.metadata_filename, "r")) ) { - opal_output(mca_crs_blcr_component.super.output_handle, - "crs:blcr: checkpoint(): Error: Unable to open the file (%s)", - snapshot->super.metadata_filename); - exit_status = OPAL_ERROR; - goto cleanup; - } - } - if( OPAL_SUCCESS != (ret = opal_crs_base_extract_expected_component(snapshot->super.metadata, - &component_name, &prev_pid) ) ) { - opal_output(mca_crs_blcr_component.super.output_handle, - "crs:blcr: blcr_cold_start: Error: Failed to extract the metadata from the local snapshot (%s). Returned %d.", - snapshot->super.metadata_filename, ret); - exit_status = ret; - goto cleanup; - } - - snapshot->super.component_name = strdup(component_name); - - /* Compare the component strings to make sure this is our snapshot before going further */ - if ( 0 != strncmp(mca_crs_blcr_component.super.base_version.mca_component_name, - component_name, strlen(component_name)) ) { - exit_status = OPAL_ERROR; - opal_output(mca_crs_blcr_component.super.output_handle, - "crs:blcr: blcr_cold_start: Error: This snapshot (%s) is not intended for us (%s)\n", - component_name, mca_crs_blcr_component.super.base_version.mca_component_name); - goto cleanup; - } - - /* - * Context Filename - */ - opal_crs_base_metadata_read_token(snapshot->super.metadata, CRS_METADATA_CONTEXT, &tmp_argv); - if( NULL == tmp_argv ) { - opal_output(mca_crs_blcr_component.super.output_handle, - "crs:blcr: blcr_cold_start: Error: Failed to read the %s token from the local checkpoint in %s", - CRS_METADATA_CONTEXT, snapshot->super.snapshot_directory); - exit_status = OPAL_ERROR; - goto cleanup; - } - asprintf(&snapshot->context_filename, "%s/%s", snapshot->super.snapshot_directory, tmp_argv[0]); - - /* - * Reset the cold_start flag - */ - snapshot->super.cold_start = false; - - cleanup: - if(NULL != tmp_argv) { - opal_argv_free(tmp_argv); - tmp_argv = NULL; - } - - if( NULL != snapshot->super.metadata ) { - fclose(snapshot->super.metadata); - snapshot->super.metadata = NULL; - } - - return exit_status; -} - -#if OPAL_ENABLE_CRDEBUG == 1 -static void MPIR_checkpoint_debugger_crs_hook(cr_hook_event_t event) { - opal_thread_t *my_thread_id = NULL; - my_thread_id = opal_thread_get_self(); - - /* Non-MPI threads */ - if(event == CR_HOOK_RSTRT_NO_CALLBACKS ) { - /* wait for the MPI thread to refresh the environment for us */ - while(!blcr_crdebug_refreshed_env) { - sched_yield(); - } - } - /* MPI threads */ - else if(event == CR_HOOK_RSTRT_SIGNAL_CONTEXT ) { - if( opal_thread_self_compare(checkpoint_thread_id) ) { - opal_cr_refresh_environ(my_pid); - blcr_crdebug_refreshed_env = true; - } else { - while(!blcr_crdebug_refreshed_env) { - sched_yield(); - } - } - } - - /* - * Some debugging output - */ - /* Non-MPI threads */ - if( event == CR_HOOK_CONT_NO_CALLBACKS ) { - opal_output_verbose(10, mca_crs_blcr_component.super.output_handle, - "crs:blcr: MPIR_checkpoint_debugger_crs_hook: Waiting in Continue (Non-MPI). (%d)", - (int)my_thread_id->t_handle); - } - else if(event == CR_HOOK_RSTRT_NO_CALLBACKS ) { - opal_output_verbose(10, mca_crs_blcr_component.super.output_handle, - "crs:blcr: MPIR_checkpoint_debugger_crs_hook: Waiting in Restart (Non-MPI). (%d)", - (int)my_thread_id->t_handle); - } - /* MPI Threads */ - else if( event == CR_HOOK_CONT_SIGNAL_CONTEXT ) { - opal_output_verbose(10, mca_crs_blcr_component.super.output_handle, - "crs:blcr: MPIR_checkpoint_debugger_crs_hook: Waiting in Continue (MPI)."); - } - else if(event == CR_HOOK_RSTRT_SIGNAL_CONTEXT ) { - opal_output_verbose(10, mca_crs_blcr_component.super.output_handle, - "crs:blcr: MPIR_checkpoint_debugger_crs_hook: Waiting in Restart (MPI)."); - } - - /* - * Enter the breakpoint function. - * If no debugger intends on attaching, then this function is expected to - * return immediately. - * - * If this is an MPI thread then odds are that this is the checkpointing - * thread, in which case this function will return immediately allowing - * it to prepare the MPI library before signaling to the debugger that - * it is safe to attach, if necessary. - */ - MPIR_checkpoint_debugger_waitpoint(); - - opal_output_verbose(10, mca_crs_blcr_component.super.output_handle, - "crs:blcr: MPIR_checkpoint_debugger_crs_hook: Finished..."); - } -#endif diff --git a/opal/mca/crs/blcr/help-opal-crs-blcr.txt b/opal/mca/crs/blcr/help-opal-crs-blcr.txt deleted file mode 100644 index efb015d716b..00000000000 --- a/opal/mca/crs/blcr/help-opal-crs-blcr.txt +++ /dev/null @@ -1,28 +0,0 @@ - -*- text -*- -# -# Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana -# University Research and Technology -# Corporation. All rights reserved. -# Copyright (c) 2004-2005 The University of Tennessee and The University -# of Tennessee Research Foundation. All rights -# reserved. -# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, -# University of Stuttgart. All rights reserved. -# Copyright (c) 2004-2005 The Regents of the University of California. -# All rights reserved. -# Copyright (c) 2011 Oak Ridge National Labs. All rights reserved. -# $COPYRIGHT$ -# -# Additional copyrights may follow -# -# $HEADER$ -# -# This is the US/English general help file for Open PAL CRS framework. -# -[blcr:restart_failed_exec] -Error: BLCR was not able to restart the process because exec failed. - Check the installation of BLCR on all of the machines in your - system. The following information may be of help: - Return Code : %d - BLCR Restart Command : %s - Restart Command Line : %s diff --git a/opal/mca/crs/criu/Makefile.am b/opal/mca/crs/criu/Makefile.am deleted file mode 100644 index 4754afe1296..00000000000 --- a/opal/mca/crs/criu/Makefile.am +++ /dev/null @@ -1,49 +0,0 @@ -# -# Copyright (c) 2004-2007 The Trustees of Indiana University. -# All rights reserved. -# Copyright (c) 2004-2005 The Trustees of the University of Tennessee. -# All rights reserved. -# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, -# University of Stuttgart. All rights reserved. -# Copyright (c) 2004-2005 The Regents of the University of California. -# All rights reserved. -# Copyright (c) 2010 Cisco Systems, Inc. All rights reserved. -# Copyright (c) 2014 Hochschule Esslingen. All rights reserved. -# -# $COPYRIGHT$ -# -# Additional copyrights may follow -# -# $HEADER$ -# - -CFLAGS = $(crs_criu_CFLAGS) -AM_CPPFLAGS = $(crs_criu_CPPFLAGS) - -sources = \ - crs_criu.h \ - crs_criu_component.c \ - crs_criu_module.c - -# Make the output library in this directory, and name it either -# mca__.la (for DSO builds) or libmca__.la -# (for static builds). - -if MCA_BUILD_opal_crs_criu_DSO -component_noinst = -component_install = mca_crs_criu.la -else -component_noinst = libmca_crs_criu.la -component_install = -endif - -mcacomponentdir = $(opallibdir) -mcacomponent_LTLIBRARIES = $(component_install) -mca_crs_criu_la_SOURCES = $(sources) -mca_crs_criu_la_LDFLAGS = -module -avoid-version $(crs_criu_LDFLAGS) -mca_crs_criu_la_LIBADD = $(crs_criu_LIBS) - -noinst_LTLIBRARIES = $(component_noinst) -libmca_crs_criu_la_SOURCES = $(sources) -libmca_crs_criu_la_LDFLAGS = -module -avoid-version $(crs_criu_LDFLAGS) -libmca_crs_criu_la_LIBADD = $(crs_criu_LIBS) diff --git a/opal/mca/crs/criu/configure.m4 b/opal/mca/crs/criu/configure.m4 deleted file mode 100644 index d1f28bbdc18..00000000000 --- a/opal/mca/crs/criu/configure.m4 +++ /dev/null @@ -1,91 +0,0 @@ -# -*- shell-script -*- -# -# Copyright (c) 2004-2010 The Trustees of Indiana University. -# All rights reserved. -# Copyright (c) 2004-2005 The Trustees of the University of Tennessee. -# All rights reserved. -# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, -# University of Stuttgart. All rights reserved. -# Copyright (c) 2004-2006 The Regents of the University of California. -# All rights reserved. -# Copyright (c) 2009-2014 Cisco Systems, Inc. All rights reserved. -# Copyright (c) 2011 Oak Ridge National Labs. All rights reserved. -# Copyright (c) 2014 Hochschule Esslingen. All rights reserved. -# -# $COPYRIGHT$ -# -# Additional copyrights may follow -# -# $HEADER$ -# - -# MCA_crs_criu_CONFIG([action-if-found], [action-if-not-found]) -# ----------------------------------------------------------- -AC_DEFUN([MCA_opal_crs_criu_CONFIG],[ - OPAL_VAR_SCOPE_PUSH([check_crs_criu_good check_crs_criu_dir_msg check_crs_criu_libdir_msg check_crs_criu_dir check_crs_criu_libdir]) - AC_CONFIG_FILES([opal/mca/crs/criu/Makefile]) - - AC_ARG_WITH([criu], - [AC_HELP_STRING([--with-criu(=DIR)], - [Path to CRIU Installation])]) - OPAL_CHECK_WITHDIR([criu], [$with_criu], [include/criu/criu.h]) - AC_ARG_WITH([criu-libdir], - [AC_HELP_STRING([--with-criu-libdir=DIR], - [Search for CRIU libraries in DIR])]) - OPAL_CHECK_WITHDIR([criu-libdir], [$with_criu_libdir], [libcriu.*]) - - # If we do not want FT or CRIU, don't compile this component - AS_IF([test "$opal_want_ft_cr" = "1" && test "$with_criu" = "yes"], - [check_crs_criu_good=yes], - [check_crs_criu_good=no]) - - # Defaults - check_crs_criu_dir_msg="compiler default" - check_crs_criu_libdir_msg="linker default" - check_crs_criu_dir="" - check_crs_criu_libdir="" - - # Determine the search paths for the headers and libraries - AS_IF([test $check_crs_criu_good = yes], - [AS_IF([test ! -z "$with_criu" -a "$with_criu" != "yes"], - [check_crs_criu_dir="$with_criu" - check_crs_criu_dir_msg="$with_criu (from --with-criu)"]) - AS_IF([test ! -z "$with_criu_libdir" -a "$with_criu_libdir" != "yes"], - [check_crs_criu_libdir="$with_criu_libdir" - check_crs_criu_libdir_msg="$with_criu_libdir (from --with-criu-libdir)"]) - ]) - - AS_IF([test $check_crs_criu_good = yes], - [AC_MSG_CHECKING([for CRIU dir]) - AC_MSG_RESULT([$check_crs_criu_dir_msg]) - AC_MSG_CHECKING([for CRIU library dir]) - AC_MSG_RESULT([$check_crs_criu_libdir_msg]) - OPAL_CHECK_PACKAGE([crs_criu_check], - [criu/criu.h], - [criu], - [criu_init_opts], - [], - [$check_crs_criu_dir], - [$check_crs_criu_libdir], - [check_crs_criu_good="yes"], - [check_crs_criu_good="no"]) - ]) - - crs_criu_CFLAGS="$CFLAGS $crs_criu_check_CFLAGS" - crs_criu_CPPFLAGS="$CPPFLAGS $crs_criu_check_CPPFLAGS" - crs_criu_LDFLAGS="$LDFLAGS $crs_criu_check_LDFLAGS" - crs_criu_LIBS="$LIBS $crs_criu_check_LIBS" - - AS_IF([test $check_crs_criu_good = yes], - [ AC_SUBST([crs_criu_CFLAGS]) - AC_SUBST([crs_criu_CPPFLAGS]) - AC_SUBST([crs_criu_LDFLAGS]) - AC_SUBST([crs_criu_LIBS]) - $1], - [AS_IF([test ! -z "$with_criu" && test "$with_criu" != "no"], - [AC_MSG_WARN([CRIU support requested but not found. Perhaps you need to enable FT support, or specify the location of the CRIU libraries...?]) - AC_MSG_ERROR([Aborting.])]) - $2]) - - OPAL_VAR_SCOPE_POP -])dnl diff --git a/opal/mca/crs/criu/crs_criu.h b/opal/mca/crs/criu/crs_criu.h deleted file mode 100644 index 96dbbd4598e..00000000000 --- a/opal/mca/crs/criu/crs_criu.h +++ /dev/null @@ -1,88 +0,0 @@ -/* - * Copyright (c) 2004-2009 The Trustees of Indiana University. - * All rights reserved. - * Copyright (c) 2004-2005 The Trustees of the University of Tennessee. - * All rights reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * Copyright (c) 2014 Hochschule Esslingen. All rights reserved. - * - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -/** - * @file - * - * CRIU CRS component - support checkpoint/restart using CRIU - */ - -#ifndef MCA_CRS_CRIU_EXPORT_H -#define MCA_CRS_CRIU_EXPORT_H - -#include "opal_config.h" - - -#include "opal/mca/mca.h" -#include "opal/mca/crs/crs.h" -#include "opal/mca/base/base.h" - -#include - -BEGIN_C_DECLS - -#define LOG_FILE ("criu.log") - -/* Local Component structures */ -struct opal_crs_criu_component_t { - /* Base CRS component */ - opal_crs_base_component_t super; - - /* criu log file */ - char *log_file; - /* criu log level */ - int log_level; - /* criu tcp established */ - bool tcp_established; - /* criu shell job */ - bool shell_job; - /* criu external unix sockets */ - bool ext_unix_sk; - /* criu leave tasks in running state after checkpoint */ - bool leave_running; -}; -typedef struct opal_crs_criu_component_t opal_crs_criu_component_t; - -OPAL_MODULE_DECLSPEC extern opal_crs_criu_component_t mca_crs_criu_component; - -int opal_crs_criu_component_query(mca_base_module_t **module, int *priority); - -/* - * Module functions - */ -int opal_crs_criu_module_init(void); -int opal_crs_criu_module_finalize(void); -int opal_crs_criu_checkpoint(pid_t pid, opal_crs_base_snapshot_t *snapshot, - opal_crs_base_ckpt_options_t *options, - opal_crs_state_type_t *state); - -int opal_crs_criu_restart(opal_crs_base_snapshot_t *snapshot, - bool spawn_child, pid_t *child_pid); - -int opal_crs_criu_disable_checkpoint(void); -int opal_crs_criu_enable_checkpoint(void); - -int opal_crs_criu_prelaunch(int32_t rank, char *base_snapshot_dir, char **app, - char **cwd, char ***argv, char ***env); - -int opal_crs_criu_reg_thread(void); - - -END_C_DECLS - -#endif /* MCA_CRS_CRIU_EXPORT_H */ diff --git a/opal/mca/crs/criu/crs_criu_component.c b/opal/mca/crs/criu/crs_criu_component.c deleted file mode 100644 index e56be920c70..00000000000 --- a/opal/mca/crs/criu/crs_criu_component.c +++ /dev/null @@ -1,213 +0,0 @@ -/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ -/* - * Copyright (c) 2004-2009 The Trustees of Indiana University. - * All rights reserved. - * Copyright (c) 2004-2005 The Trustees of the University of Tennessee. - * All rights reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * Copyright (c) 2014 Hochschule Esslingen. All rights reserved. - * Copyright (c) 2015 Los Alamos National Security, LLC. All rights - * reserved. - * - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#include "opal_config.h" - -#include "opal/util/output.h" - -#include "opal/constants.h" -#include "opal/mca/crs/crs.h" -#include "opal/mca/crs/base/base.h" -#include "crs_criu.h" - -/* Local functionality */ -static int crs_criu_register(void); -static int crs_criu_open(void); -static int crs_criu_close(void); - -/* - * Instantiate the public struct with all of our public information - * and pointer to our public functions in it - */ -opal_crs_criu_component_t mca_crs_criu_component = { - /* First do the base component stuff */ - { - /* Handle the general mca_component_t struct containing - * meta information about the component itself - */ - .base_version = { - OPAL_CRS_BASE_VERSION_2_0_0, - - /* Component name and version */ - .mca_component_name = "criu", - MCA_BASE_MAKE_VERSION(component, OPAL_MAJOR_VERSION, OPAL_MINOR_VERSION, - OPAL_RELEASE_VERSION), - - /* Component open and close functions */ - .mca_open_component = crs_criu_open, - .mca_close_component = crs_criu_close, - .mca_query_component = opal_crs_criu_component_query, - .mca_register_component_params = crs_criu_register, - }, - .base_data = { - /* The component is checkpoint ready */ - MCA_BASE_METADATA_PARAM_CHECKPOINT - }, - - .verbose = 0, - .output_handle = -1, - }, - /* criu log file */ - LOG_FILE, - /* criu log level */ - 0, - /* criu tcp established */ - true, - /* criu shell job */ - true, - /* criu external unix sockets */ - true, - /* criu leave tasks in running state after checkpoint */ - true -}; - -static int crs_criu_register(void) -{ - int ret; - - mca_base_component_t *component = &mca_crs_criu_component.super.base_version; - - mca_crs_criu_component.super.priority = 10; - ret = mca_base_component_var_register(component, "priority", - "Priority of the CRS criu component (default: 10)", - MCA_BASE_VAR_TYPE_INT, NULL, 0, 0, - OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_ALL_EQ, - &mca_crs_criu_component.super.priority); - if (0 > ret) { - return ret; - } - - mca_crs_criu_component.super.verbose = 0; - ret = mca_base_component_var_register(component, "verbose", - "Verbose level for the CRS criu component", - MCA_BASE_VAR_TYPE_INT, NULL, 0, 0, - OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_LOCAL, - &mca_crs_criu_component.super.verbose); - - if (0 > ret) { - return ret; - } - - ret = mca_base_component_var_register(component, "log", "Name of CRIU logfile (default: criu.log)", - MCA_BASE_VAR_TYPE_STRING, NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE, - OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_LOCAL, - &mca_crs_criu_component.log_file); - - if (0 > ret) { - return ret; - } - - ret = mca_base_component_var_register(component, "log_level", - "Verbose level for the CRS criu component (default: 0)", - MCA_BASE_VAR_TYPE_INT, NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE, - OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_LOCAL, - &mca_crs_criu_component.log_level); - - if (0 > ret) { - return ret; - } - - ret = mca_base_component_var_register(component, "tcp_established", - "Checkpoint/restore established TCP connections (default: true)", - MCA_BASE_VAR_TYPE_BOOL, NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE, - OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_LOCAL, - &mca_crs_criu_component.tcp_established); - - if (0 > ret) { - return ret; - } - - ret = mca_base_component_var_register(component, "shell_job", - "Allow to dump and restore shell jobs (default: true)", - MCA_BASE_VAR_TYPE_BOOL, NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE, - OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_LOCAL, - &mca_crs_criu_component.shell_job); - - if (0 > ret) { - return ret; - } - - ret = mca_base_component_var_register(component, "ext_unix_sk", - "Allow external unix connections (default: true)", - MCA_BASE_VAR_TYPE_BOOL, NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE, - OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_LOCAL, - &mca_crs_criu_component.ext_unix_sk); - - if (0 > ret) { - return ret; - } - - ret = mca_base_component_var_register(component, "leave_running", - "Leave tasks in running state after checkpoint (default: true)", - MCA_BASE_VAR_TYPE_BOOL, NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE, - OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_LOCAL, - &mca_crs_criu_component.leave_running); - - return (0 > ret) ? ret : OPAL_SUCCESS; -} - -static int crs_criu_open(void) -{ - int oh; - - /* If there is a custom verbose level for this component than use it - * otherwise take our parents level and output channel - */ - if (0 != mca_crs_criu_component.super.verbose) { - mca_crs_criu_component.super.output_handle = opal_output_open(NULL); - opal_output_set_verbosity(mca_crs_criu_component.super.output_handle, - mca_crs_criu_component.super.verbose); - } else { - mca_crs_criu_component.super.output_handle = opal_crs_base_framework.framework_output; - } - - oh = mca_crs_criu_component.super.output_handle; - /* - * Debug output - */ - opal_output_verbose(10, oh, "crs:criu: open()"); - opal_output_verbose(20, oh, "crs:criu: open: priority = %d", - mca_crs_criu_component.super.priority); - opal_output_verbose(20, oh, "crs:criu: open: verbosity = %d", - mca_crs_criu_component.super.verbose); - opal_output_verbose(20, oh, "crs:criu: open: log_file = %s", - mca_crs_criu_component.log_file); - opal_output_verbose(20, oh, "crs:criu: open: log_level = %d", - mca_crs_criu_component.log_level); - opal_output_verbose(20, oh, "crs:criu: open: tcp_established = %d", - mca_crs_criu_component.tcp_established); - opal_output_verbose(20, oh, "crs:criu: open: shell_job = %d", - mca_crs_criu_component.shell_job); - opal_output_verbose(20, oh, "crs:criu: open: ext_unix_sk = %d", - mca_crs_criu_component.ext_unix_sk); - opal_output_verbose(20, oh, "crs:criu: open: leave_running = %d", - mca_crs_criu_component.leave_running); - - return OPAL_SUCCESS; -} - -static int crs_criu_close(void) -{ - opal_output_verbose(10, mca_crs_criu_component.super.output_handle, - "crs:criu: close()"); - - return OPAL_SUCCESS; -} diff --git a/opal/mca/crs/criu/crs_criu_module.c b/opal/mca/crs/criu/crs_criu_module.c deleted file mode 100644 index e4b12c4717c..00000000000 --- a/opal/mca/crs/criu/crs_criu_module.c +++ /dev/null @@ -1,261 +0,0 @@ -/* - * Copyright (c) 2004-2010 The Trustees of Indiana University. - * All rights reserved. - * Copyright (c) 2004-2005 The Trustees of the University of Tennessee. - * All rights reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * Copyright (c) 2007 Evergrid, Inc. All rights reserved. - * Copyright (c) 2011 Oak Ridge National Labs. All rights reserved. - * Copyright (c) 2014 Hochschule Esslingen. All rights reserved. - * - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#include "opal_config.h" - -#include -#include -#include -#include - -#include "opal/util/show_help.h" -#include "opal/util/output.h" -#include "opal/util/argv.h" -#include "opal/constants.h" - -#include "opal/mca/base/mca_base_var.h" - -#include "opal/mca/crs/crs.h" -#include "opal/mca/crs/base/base.h" - -#include "crs_criu.h" - -/* CRIU module */ -static opal_crs_base_module_t criu_module = { - /* Initialization Function */ - opal_crs_criu_module_init, - /* Finalization Function */ - opal_crs_criu_module_finalize, - - /* Checkpoint interface */ - opal_crs_criu_checkpoint, - - /* Restart Command Access */ - opal_crs_criu_restart, - - /* Disable checkpoints */ - opal_crs_criu_disable_checkpoint, - /* Enable checkpoints */ - opal_crs_criu_enable_checkpoint, - - /* Prelaunch */ - opal_crs_criu_prelaunch, - - /* Register Thread */ - opal_crs_criu_reg_thread -}; - -/* Snapshot Class Functions */ -OBJ_CLASS_DECLARATION(opal_crs_criu_snapshot_t); - -struct opal_crs_criu_snapshot_t { - /* Base CRS snapshot type */ - opal_crs_base_snapshot_t super; -}; -typedef struct opal_crs_criu_snapshot_t opal_crs_criu_snapshot_t; - -void opal_crs_criu_construct(opal_crs_criu_snapshot_t *obj); -void opal_crs_criu_destruct(opal_crs_criu_snapshot_t *obj); - -OBJ_CLASS_INSTANCE(opal_crs_criu_snapshot_t, - opal_crs_base_snapshot_t, - opal_crs_criu_construct, - opal_crs_criu_destruct); - -void opal_crs_criu_construct(opal_crs_criu_snapshot_t *snapshot) -{ - snapshot->super.component_name = strdup(mca_crs_criu_component.super.base_version.mca_component_name); -} - -void opal_crs_criu_destruct(opal_crs_criu_snapshot_t *snapshot) -{ -} - -int opal_crs_criu_component_query(mca_base_module_t **module, int *priority) -{ - opal_output_verbose(10, mca_crs_criu_component.super.output_handle, - "crs:criu: component_query()"); - - *priority = mca_crs_criu_component.super.priority; - *module = (mca_base_module_t *)&criu_module; - - return OPAL_SUCCESS; -} - -int opal_crs_criu_module_init(void) -{ - opal_output_verbose(10, mca_crs_criu_component.super.output_handle, - "crs:criu: module_init()"); - - return OPAL_SUCCESS; -} - -int opal_crs_criu_module_finalize(void) -{ - opal_output_verbose(10, mca_crs_criu_component.super.output_handle, - "crs:criu: module_finalize()"); - - return OPAL_SUCCESS; -} - -static void criu_error(int ret, pid_t pid) -{ - switch (ret) { - case -EBADE: - opal_output(0, "crs:criu:(PID:%d):RPC has returned fail", pid); - break; - case -ECONNREFUSED: - opal_output(0, "crs:criu:(PID:%d):Unable to connect to CRIU", pid); - break; - case -ECOMM: - opal_output(0, "crs:criu:(PID:%d):Unable to send/recv msg to/from CRIU", pid); - break; - case -EINVAL: - opal_output(0, "crs:criu:(PID:%d):CRIU doesn't support this type of request." - "You should probably update CRIU", pid); - break; - case -EBADMSG: - opal_output(0, "crs:criu:(PID:%d):Unexpected response from CRIU." - "You should probably update CRIU", pid); - break; - default: - opal_output(0, "crs:criu:(PID:%d):Unknown error type code." - "You should probably update CRIU", pid); - } -} - -int opal_crs_criu_checkpoint(pid_t pid, opal_crs_base_snapshot_t *base_snapshot, - opal_crs_base_ckpt_options_t *options, - opal_crs_state_type_t *state) -{ - int ret; - int fd = 0; - int oh = mca_crs_criu_component.super.output_handle; - opal_crs_criu_snapshot_t *snapshot = NULL; - char *dest = NULL; - - opal_output_verbose(10, oh, "crs:criu: checkpoint(%d, ---)", pid); - - snapshot = (opal_crs_criu_snapshot_t *)base_snapshot; - snapshot->super.component_name = strdup(mca_crs_criu_component.super.base_version.mca_component_name); - - if (NULL == snapshot->super.metadata) { - if (NULL == (snapshot->super.metadata = fopen(snapshot->super.metadata_filename, "a"))) { - opal_output(oh, "crs:criu: checkpoint(): Error: Unable to open the file (%s)", - snapshot->super.metadata_filename); - *state = OPAL_CRS_ERROR; - goto cleanup; - } - } - fprintf(snapshot->super.metadata, "%s%s\n", CRS_METADATA_COMP, snapshot->super.component_name); - - fclose(snapshot->super.metadata); - snapshot->super.metadata = NULL; - - ret = criu_init_opts(); - - if (ret < 0) { - criu_error(ret, pid); - *state = OPAL_CRS_ERROR; - goto cleanup; - } - - opal_output_verbose(10, oh, "crs:criu: criu_init_opts() returned %d", ret); - - dest = snapshot->super.snapshot_directory; - opal_output_verbose(10, oh, "crs:criu: opening snapshot directory %s", dest); - fd = open(dest, O_DIRECTORY); - - if (fd < 0) { - *state = OPAL_CRS_ERROR; - opal_output(oh, "crs:criu: checkpoint(): Error: Unable to open checkpoint " - "directory (%s) for pid (%d)", dest, pid); - goto cleanup; - } - - /* http://criu.org/C_API */ - criu_set_images_dir_fd(fd); - criu_set_pid(pid); - - criu_set_log_file(mca_crs_criu_component.log_file); - criu_set_log_level(mca_crs_criu_component.log_level); - criu_set_tcp_established(mca_crs_criu_component.tcp_established); - criu_set_shell_job(mca_crs_criu_component.shell_job); - criu_set_ext_unix_sk(mca_crs_criu_component.ext_unix_sk); - criu_set_leave_running(mca_crs_criu_component.leave_running); - ret = criu_dump(); - - if (ret < 0) { - criu_error(ret, pid); - *state = OPAL_CRS_ERROR; - goto cleanup; - } - - *state = OPAL_CRS_CONTINUE; - - cleanup: - - if (fd > 0) { - close(fd); - } - - if (OPAL_CRS_ERROR == *state) { - return OPAL_ERROR; - } - return OPAL_SUCCESS; -} - -int opal_crs_criu_restart(opal_crs_base_snapshot_t *snapshot, - bool spawn_child, pid_t *child_pid) -{ - opal_output_verbose(10, mca_crs_criu_component.super.output_handle, - "crs:criu: %s", __func__); - return OPAL_SUCCESS; -} - -int opal_crs_criu_disable_checkpoint(void) -{ - opal_output_verbose(10, mca_crs_criu_component.super.output_handle, - "crs:criu: %s", __func__); - return OPAL_SUCCESS; -} - -int opal_crs_criu_enable_checkpoint(void) -{ - opal_output_verbose(10, mca_crs_criu_component.super.output_handle, - "crs:criu: %s", __func__); - return OPAL_SUCCESS; -} - -int opal_crs_criu_prelaunch(int32_t rank, char *base_snapshot_dir, - char **app, char **cwd, char ***argv, - char ***env) -{ - opal_output_verbose(10, mca_crs_criu_component.super.output_handle, - "crs:criu: %s", __func__); - return OPAL_SUCCESS; -} - -int opal_crs_criu_reg_thread(void) -{ - opal_output_verbose(10, mca_crs_criu_component.super.output_handle, - "crs:criu: %s", __func__); - return OPAL_SUCCESS; -} diff --git a/opal/mca/crs/criu/owner.txt b/opal/mca/crs/criu/owner.txt deleted file mode 100644 index 0cc0384f0eb..00000000000 --- a/opal/mca/crs/criu/owner.txt +++ /dev/null @@ -1,7 +0,0 @@ -# -# owner/status file -# owner: institution that is responsible for this package -# status: e.g. active, maintenance, unmaintained -# -owner: CISCO -status: maintenance diff --git a/opal/mca/crs/crs.h b/opal/mca/crs/crs.h deleted file mode 100644 index 08c239e480c..00000000000 --- a/opal/mca/crs/crs.h +++ /dev/null @@ -1,307 +0,0 @@ -/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ -/* - * Copyright (c) 2004-2010 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2005 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * Copyright (c) 2007 Evergrid, Inc. All rights reserved. - * Copyright (c) 2010-2011 Oak Ridge National Labs. All rights reserved. - * Copyright (c) 2015 Los Alamos National Security, LLC. All rights - * reserved. - * - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ -/** - * @file - * - * Checkpoint and Restart Service (CRS) Interface - * - * General Description: - * - * The OPAL Checkpoint and Restart Service (CRS) has been created to create an - * abstract notion of a single process checkpointer for upper levels to - * incorporate checkpoint/restart calls genericly into their code. This keeps - * the upper levels from becoming too tied to a specfic checkpoint and restart - * implementation. - * - * This interface will change in the future to allow for some additional - * specialized functionality such as memory inclusion/exclusion, explicit - * restarting while running, and others. - * - * Words to the Wise: - * - * The CRS module must adhere to the API exactly inorder to be fully supported. - * How the module goes about conforming to the API is an internal module issue - * and in no cases should the module impose restrictions upon the upper layers - * as this is an API violation. - * - */ - -#ifndef MCA_CRS_H -#define MCA_CRS_H - -#include "opal_config.h" -#include "opal/mca/mca.h" -#include "opal/mca/base/base.h" -#include "opal/class/opal_object.h" - -BEGIN_C_DECLS - -/** - * States of the module - */ -enum opal_crs_state_type_t { - OPAL_CRS_NONE = 0, - OPAL_CRS_CHECKPOINT = 1, - OPAL_CRS_RESTART_PRE = 2, - OPAL_CRS_RESTART = 3, /* RESTART_POST */ - OPAL_CRS_CONTINUE = 4, - OPAL_CRS_TERM = 5, - OPAL_CRS_RUNNING = 6, - OPAL_CRS_ERROR = 7, - OPAL_CRS_STATE_MAX = 8 -}; -typedef enum opal_crs_state_type_t opal_crs_state_type_t; - -/* - * Possible checkpoint options - */ -struct opal_crs_base_ckpt_options_1_0_0_t { - /** Parent is an object type */ - opal_object_t super; - - /** Terminate after checkpoint */ - bool term; - /** Send SIGSTOP after checkpoint */ - bool stop; - - /** INC Prep Only */ - bool inc_prep_only; - - /** INC Recover Only */ - bool inc_recover_only; - -#if OPAL_ENABLE_CRDEBUG == 1 - /** Wait for debugger to attach after checkpoint */ - bool attach_debugger; - /** Do not wait for debugger to reattach after checkpoint */ - bool detach_debugger; -#endif -}; -typedef struct opal_crs_base_ckpt_options_1_0_0_t opal_crs_base_ckpt_options_1_0_0_t; -typedef struct opal_crs_base_ckpt_options_1_0_0_t opal_crs_base_ckpt_options_t; -OPAL_DECLSPEC OBJ_CLASS_DECLARATION(opal_crs_base_ckpt_options_t); - -/** - * Structure for Single process snapshot - * Each component is assumed to have extened this definition - * in the same way they exten the opal_crs_base_compoinent_t below. - */ -struct opal_crs_base_snapshot_1_0_0_t { - /** This is an object, so must have super */ - opal_list_item_t super; - - /** MCA Component name */ - char * component_name; - - /** Metadata filename */ - char * metadata_filename; - - /** Metadata fd */ - FILE * metadata; - - /** Absolute path the the snapshot directory */ - char * snapshot_directory; - - /** Cold Start: - * If we are restarting cold, then we need to recreate this structure - * opal_restart would set this, and let the component do the heavy lifting - * of recreating the structure, sicne it doesn't know exactly how to. - */ - bool cold_start; -}; -typedef struct opal_crs_base_snapshot_1_0_0_t opal_crs_base_snapshot_1_0_0_t; -typedef struct opal_crs_base_snapshot_1_0_0_t opal_crs_base_snapshot_t; - -OPAL_DECLSPEC OBJ_CLASS_DECLARATION(opal_crs_base_snapshot_t); - -/** - * Module initialization function. - * Returns OPAL_SUCCESS - */ -typedef int (*opal_crs_base_module_init_fn_t) - (void); - -/** - * Module finalization function. - * Returns OPAL_SUCCESS - */ -typedef int (*opal_crs_base_module_finalize_fn_t) - (void); - -/** - * Call the underlying checkpointer. - * Returns OPAL_SUCCESS upon success, and OPAL_ERROR otherwise. - * - * Arguments: - * pid = PID of the process to checkpoint, or 0 if checkpointing self. - * fname = the filename where the checkpoint has been written. - * state = The state at which the checkpoint is exiting - * - OPAL_CRS_CONTINUE - * Continuing after a checkpoint has been taken - * - OPAL_CRS_RESTART - * Restarting from a checkpoint - * - OPAL_CRS_ERROR - * Checkpoint was not successful. - * - * The 'fname' string is owned by the caller: if appropriate, it must be eventually - * freed by the caller. - */ -typedef int (*opal_crs_base_module_checkpoint_fn_t) - (pid_t pid, - opal_crs_base_snapshot_t *snapshot, - opal_crs_base_ckpt_options_t *options, - opal_crs_state_type_t *state); - -/** - * Call the underlying restart command for this process - * Returns OPAL_SUCCESS or OPAL_CRS_ERROR - * - * Arguments: - * fname = Checkpoint filename - * spawn_child = true if the restarted process should be forked as a new process, - * in which case 'child_pid' will be returned. - * false if the restarted process should overwrite the current - * process space. - * child_pid = PID of the child that was started, if applicable - * - */ -typedef int (*opal_crs_base_module_restart_fn_t) - (opal_crs_base_snapshot_t *snapshot, - bool spawn_child, - pid_t *child_pid); - -/** - * Disable the checkpointer - * Returns OPAL_SUCCESS or OPAL_CRS_ERROR - * - * This should set a flag/mutex to disallow checkpoints to occur. - * If a checkpoint were to occur while checkpoints are disabled, - * they should block until reenabled. - * A quality module implementation would notify the user that the - * checkpoint has been delayed until the program is out of this critical - * section of code. - */ -typedef int (*opal_crs_base_module_disable_checkpoint_fn_t) - (void); - -/** - * Enable the checkpointer - * Returns OPAL_SUCCESS or OPAL_CRS_ERROR - * - * This should set a flag/mutex to allow checkpoints to occur - */ -typedef int (*opal_crs_base_module_enable_checkpoint_fn_t) - (void); - -/** - * Prepare the CRS component for process launch. - * Some CRS components need to take action before the - * process is ever launched to do such things as: - * - seed the process environment - * - LD_PRELOAD - * - Analyze the binary before launch - * - * @param rank Rank of the process to be started - * @param app Absolute pathname of argv[0] - * @param argv Standard argv-style array, including a final NULL pointer - * @param env Standard environ-style array, including a final NULL pointer - */ -typedef int (*opal_crs_base_module_prelaunch_fn_t) - (int32_t rank, - char *base_snapshot_dir, - char **app, - char **cwd, - char ***argv, - char ***env); - -/** - * Register another thread that may call this library. - * Some CR systems require that each thread that will call into their library - * register individually before doing so. - * - * Returns OPAL_SUCCESS or OPAL_ERROR - */ -typedef int (*opal_crs_base_module_reg_thread_fn_t) - (void); - -/** - * Structure for CRS components. - */ -struct opal_crs_base_component_2_0_0_t { - /** MCA base component */ - mca_base_component_t base_version; - /** MCA base data */ - mca_base_component_data_t base_data; - - /** Verbosity Level */ - int verbose; - /** Output Handle for opal_output */ - int output_handle; - /** Default Priority */ - int priority; -}; -typedef struct opal_crs_base_component_2_0_0_t opal_crs_base_component_2_0_0_t; -typedef struct opal_crs_base_component_2_0_0_t opal_crs_base_component_t; - -/** - * Structure for CRS modules - */ -struct opal_crs_base_module_1_0_0_t { - /** Initialization Function */ - opal_crs_base_module_init_fn_t crs_init; - /** Finalization Function */ - opal_crs_base_module_finalize_fn_t crs_finalize; - - /** Checkpoint interface */ - opal_crs_base_module_checkpoint_fn_t crs_checkpoint; - - /** Restart Interface */ - opal_crs_base_module_restart_fn_t crs_restart; - - /** Disable checkpoints */ - opal_crs_base_module_disable_checkpoint_fn_t crs_disable_checkpoint; - /** Enable checkpoints */ - opal_crs_base_module_enable_checkpoint_fn_t crs_enable_checkpoint; - - /** Pre Launch */ - opal_crs_base_module_prelaunch_fn_t crs_prelaunch; - - /** Per thread registration */ - opal_crs_base_module_reg_thread_fn_t crs_reg_thread; -}; -typedef struct opal_crs_base_module_1_0_0_t opal_crs_base_module_1_0_0_t; -typedef struct opal_crs_base_module_1_0_0_t opal_crs_base_module_t; - -OPAL_DECLSPEC extern opal_crs_base_module_t opal_crs; - -/** - * Macro for use in components that are of type CRS - */ -#define OPAL_CRS_BASE_VERSION_2_0_0 \ - OPAL_MCA_BASE_VERSION_2_1_0("crs", 2, 0, 0) - -END_C_DECLS - -#endif /* OPAL_CRS_H */ - diff --git a/opal/mca/crs/dmtcp/Makefile.am b/opal/mca/crs/dmtcp/Makefile.am deleted file mode 100644 index 166c047a761..00000000000 --- a/opal/mca/crs/dmtcp/Makefile.am +++ /dev/null @@ -1,41 +0,0 @@ -# -# Copyright (c) 2010 The Trustees of Indiana University. -# All rights reserved. -# Copyright (c) 2014 Cisco Systems, Inc. All rights reserved. -# $COPYRIGHT$ -# -# Additional copyrights may follow -# -# $HEADER$ -# - -CFLAGS = $(crs_dmtcp_CFLAGS) -AM_CPPFLAGS = $(crs_dmtcp_CPPFLAGS) - -sources = \ - crs_dmtcp.h \ - crs_dmtcp_component.c \ - crs_dmtcp_module.c - -# Make the output library in this directory, and name it either -# mca__.la (for DSO builds) or libmca__.la -# (for static builds). - -if MCA_BUILD_opal_crs_dmtcp_DSO -component_noinst = -component_install = mca_crs_dmtcp.la -else -component_noinst = libmca_crs_dmtcp.la -component_install = -endif - -mcacomponentdir = $(opallibdir) -mcacomponent_LTLIBRARIES = $(component_install) -mca_crs_dmtcp_la_SOURCES = $(sources) -mca_crs_dmtcp_la_LDFLAGS = -module -avoid-version $(crs_dmtcp_LDFLAGS) -mca_crs_dmtcp_la_LIBADD = $(crs_dmtcp_LIBS) - -noinst_LTLIBRARIES = $(component_noinst) -libmca_crs_dmtcp_la_SOURCES = $(sources) -libmca_crs_dmtcp_la_LDFLAGS = -module -avoid-version $(crs_dmtcp_LDFLAGS) -libmca_crs_dmtcp_la_LIBADD = $(crs_dmtcp_LIBS) diff --git a/opal/mca/crs/dmtcp/configure.m4 b/opal/mca/crs/dmtcp/configure.m4 deleted file mode 100644 index 420fb554c93..00000000000 --- a/opal/mca/crs/dmtcp/configure.m4 +++ /dev/null @@ -1,138 +0,0 @@ -# -*- shell-script -*- -# -# Copyright (c) 2010 The Trustees of Indiana University. -# All rights reserved. -# Copyright (c) 2010 Cisco Systems, Inc. All rights reserved. -# $COPYRIGHT$ -# -# Additional copyrights may follow -# -# $HEADER$ -# - -# MCA_opal_crs_dmtcp_CONFIG([action-if-found], [action-if-not-found]) -# ----------------------------------------------------------- -AC_DEFUN([MCA_opal_crs_dmtcp_CONFIG],[ - AC_CONFIG_FILES([opal/mca/crs/dmtcp/Makefile]) - - OPAL_VAR_SCOPE_PUSH([opal_check_crs_dmtcp_good opal_opal_check_crs_dmtcp_save_CPPFLAGS opal_opal_check_crs_dmtcp_save_LDFLAGS opal_opal_check_crs_dmtcp_save_LIBS opal_check_crs_dmtcp_dir_msg opal_check_crs_dmtcp_libdir_msg opal_check_crs_dmtcp_dir opal_check_crs_dmtcp_libdir]) - - - opal_check_crs_dmtcp_good="no" - - # Configure option to specify where to look for DMTCP headers - # --with-dmtcp(=DIR) - AC_ARG_WITH([dmtcp], - [AC_HELP_STRING([--with-dmtcp(=DIR)], - [Path to DMTCP Installation])]) - OPAL_CHECK_WITHDIR([dmtcp], [$with_dmtcp], [include/mtcp.h]) - - # Configure option to specify where to look for DMTCP libraries - # (Default: $with_dmtcp/lib) - # --with-dmtcp-libdir=DIR - AC_ARG_WITH([dmtcp-libdir], - [AC_HELP_STRING([--with-dmtcp-libdir=DIR], - [Search for DMTCP libraries in DIR])]) - OPAL_CHECK_WITHDIR([dmtcp-libdir], [$with_dmtcp_libdir], [libmtcp.so]) - - # - # Check if Open MPI was compiled with Checkpoint/Restart support - # If not, then we do not compile this component - # - AS_IF([test "$opal_want_ft" = "0"], - [opal_check_crs_dmtcp_good="no"], - [opal_check_crs_dmtcp_good="yes"]) - - # - # Check if the user explicitly requested -not- to build the DMTCP component - # If so, the we do not compile this component - # - AS_IF([test "$with_dmtcp" = "no" -o "$opal_check_crs_dmtcp_good" = "no"], - [opal_check_crs_dmtcp_good="no"], - [opal_check_crs_dmtcp_good="yes"]) - - # Save some flags - opal_opal_check_crs_dmtcp_save_CPPFLAGS=$CPPFLAGS - opal_opal_check_crs_dmtcp_save_LDFLAGS=$LDFLAGS - opal_opal_check_crs_dmtcp_save_LIBS=$LIBS - - # - # Now to check if the library is usable - # - opal_check_crs_dmtcp_dir_msg="compiler default" - opal_check_crs_dmtcp_libdir_msg="linker default" - opal_check_crs_dmtcp_dir="" - opal_check_crs_dmtcp_libdir="" - - # Determine the search paths for the headers and libraries - AS_IF([test "$opal_check_crs_dmtcp_good" = "yes"], - [AS_IF([test ! -z "$with_dmtcp" -a "$with_dmtcp" != "yes"], - [opal_check_crs_dmtcp_dir="$with_dmtcp" - opal_check_crs_dmtcp_dir_msg="$with_dmtcp (from --with-dmtcp)"]) - AS_IF([test ! -z "$with_dmtcp_libdir" -a "$with_dmtcp_libdir" != "yes"], - [opal_check_crs_dmtcp_libdir="$with_dmtcp_libdir" - opal_check_crs_dmtcp_libdir_msg="$with_dmtcp_libdir (from --with-dmtcp-libdir)"]) - ]) - - # Look for DMTCP. - AS_IF([test "$opal_check_crs_dmtcp_good" = "yes"], - [AC_MSG_CHECKING([for DMTCP dir]) - AC_MSG_RESULT([$opal_check_crs_dmtcp_dir_msg]) - AC_MSG_CHECKING([for DMTCP library dir]) - AC_MSG_RESULT([$opal_check_crs_dmtcp_libdir_msg]) - OPAL_CHECK_PACKAGE([crs_dmtcp_check], - [mtcp.h], - [mtcp], - [mtcp_init], - [], - [$opal_check_crs_dmtcp_dir], - [$opal_check_crs_dmtcp_libdir], - [opal_check_crs_dmtcp_good="yes"], - [opal_check_crs_dmtcp_good="no"]) - ]) - - # When we restart a thread, we use execlp() to exec the "mtcp_restart" - # command. We don't care what its path is, but it does need to exist in - # the PATH. - AC_CHECK_PROG([mtcp_restart_command_exists], ["mtcp_restart"], ["yes"], ["no"]) - AS_IF([test "$mtcp_restart_command_exists" = "no"], - [opal_check_crs_dmtcp_good="no" - AS_IF([test ! -z "$with_dmtcp" -a "$with_dmtcp" != "no"], - [AC_MSG_WARN([mtcp_restart not found in PATH.]) - AC_MSG_ERROR([Aborting.])])]) - - # - # If '-lmtcp' or - # '-I' or '-L' was needed to link to MTCP, then OPAL_CHECK_PACKAGE - # sets the crs_mtcp_check_* variables, which we use below. - # - - crs_dmtcp_CFLAGS="$CFLAGS $crs_dmtcp_check_CFLAGS" - crs_dmtcp_CPPFLAGS="$CPPFLAGS $crs_dmtcp_check_CPPFLAGS" - crs_dmtcp_LDFLAGS="$LDFLAGS $crs_dmtcp_check_LDFLAGS" - crs_dmtcp_LIBS="$crs_dmtcp_check_LIBS $LIBS" - - AS_IF([test "$opal_check_crs_dmtcp_good" = "yes"], - [$1]) - - CPPFLAGS=$opal_opal_check_crs_dmtcp_save_CPPFLAGS - LDFLAGS="$crs_dmtcp_check_LDFLAGS $opal_opal_check_crs_dmtcp_save_LDFLAGS" - LIBS="$crs_dmtcp_LIBS $opal_opal_check_crs_dmtcp_save_LIBS" - - AC_SUBST([crs_dmtcp_CFLAGS]) - AC_SUBST([crs_dmtcp_CPPFLAGS]) - AC_SUBST([crs_dmtcp_LDFLAGS]) - AC_SUBST([crs_dmtcp_LIBS]) - - # If all is good at this point then post any compiler options to - # the build environment. If all is not good at this point and - # DMTCP was explicitly requested, then error out. - - AS_IF([test "$opal_check_crs_dmtcp_good" = "yes"], - [$1], - [AS_IF([test ! -z "$with_dmtcp" -a "$with_dmtcp" != "no"], - [AC_MSG_WARN([DMTCP support requested but not found. Perhaps you need to specify the location of the DMTCP libraries.]) - AC_MSG_ERROR([Aborting.])]) - $2]) - OPAL_VAR_SCOPE_POP -])dnl diff --git a/opal/mca/crs/dmtcp/crs_dmtcp.h b/opal/mca/crs/dmtcp/crs_dmtcp.h deleted file mode 100644 index a3b2837a75f..00000000000 --- a/opal/mca/crs/dmtcp/crs_dmtcp.h +++ /dev/null @@ -1,87 +0,0 @@ -/* - * Copyright (c) 2010 The Trustees of Indiana University. - * All rights reserved. - * Copyright (c) 2010-2011 Alex Brick . - * All rights reserved. - * - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -/** - * @file - * - * DMTCP CRS component - * - */ - -#ifndef MCA_CRS_DMTCP_EXPORT_H -#define MCA_CRS_DMTCP_EXPORT_H - -#include "opal_config.h" - - -#include "opal/mca/mca.h" -#include "opal/mca/crs/crs.h" -#include "opal/mca/base/base.h" - -/* JJH NOTE: Include your library header here */ -/* #include */ -#include - -BEGIN_C_DECLS - - /* - * Local Component Structure - */ - struct opal_crs_dmtcp_component_t { - /** Base CRS component */ - opal_crs_base_component_t super; - - /** JJH: Add additional items here as needed internally */ - }; - typedef struct opal_crs_dmtcp_component_t opal_crs_dmtcp_component_t; - OPAL_MODULE_DECLSPEC extern opal_crs_dmtcp_component_t mca_crs_dmtcp_component; - - /* - * Component query command - * - Called during opal_init() to determine if this component should be selected. - */ - int opal_crs_dmtcp_component_query(mca_base_module_t **module, int *priority); - - /* - * Module functions - */ - int opal_crs_dmtcp_module_init(void); - int opal_crs_dmtcp_module_finalize(void); - - /* - * Actual CRS funcationality - */ - int opal_crs_dmtcp_checkpoint( pid_t pid, - opal_crs_base_snapshot_t *snapshot, - opal_crs_base_ckpt_options_t *options, - opal_crs_state_type_t *state); - - int opal_crs_dmtcp_restart( opal_crs_base_snapshot_t *snapshot, - bool spawn_child, - pid_t *child_pid); - - int opal_crs_dmtcp_disable_checkpoint(void); - int opal_crs_dmtcp_enable_checkpoint(void); - - int opal_crs_dmtcp_prelaunch(int32_t rank, - char *base_snapshot_dir, - char **app, - char **cwd, - char ***argv, - char ***env); - - int opal_crs_dmtcp_reg_thread(void); - -END_C_DECLS - -#endif /* MCA_CRS_DMTCP_EXPORT_H */ diff --git a/opal/mca/crs/dmtcp/crs_dmtcp_component.c b/opal/mca/crs/dmtcp/crs_dmtcp_component.c deleted file mode 100644 index 76f25020278..00000000000 --- a/opal/mca/crs/dmtcp/crs_dmtcp_component.c +++ /dev/null @@ -1,133 +0,0 @@ -/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ -/* - * Copyright (c) 2010 The Trustees of Indiana University. - * All rights reserved. - * Copyright (c) 2010-2011 Alex Brick . - * All rights reserved. - * Copyright (c) 2015 Los Alamos National Security, LLC. All rights - * reserved. - * - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#include "opal_config.h" - -#include "opal/util/output.h" - -#include "opal/constants.h" -#include "opal/mca/crs/crs.h" -#include "opal/mca/crs/base/base.h" -#include "crs_dmtcp.h" - -/* - * Local functionality - */ -static int crs_dmtcp_register (void); -static int crs_dmtcp_open(void); -static int crs_dmtcp_close(void); - -/* - * Instantiate the public struct with all of our public information - * and pointer to our public functions in it - */ -opal_crs_dmtcp_component_t mca_crs_dmtcp_component = { - /* First do the base component stuff */ - { - /* Handle the general mca_component_t struct containing - * meta information about the component itself - */ - .base_version = { - OPAL_CRS_BASE_VERSION_2_0_0, - - /* Component name and version */ - .mca_component_name = "dmtcp", - MCA_BASE_MAKE_VERSION(component, OPAL_MAJOR_VERSION, OPAL_MINOR_VERSION, - OPAL_RELEASE_VERSION), - - /* Component open and close functions */ - .mca_open_component = crs_dmtcp_open, - .mca_close_component = crs_dmtcp_close, - .mca_query_component = opal_crs_dmtcp_component_query, - .mca_register_component_params = crs_dmtcp_register, - }, - .base_data = { - /* The component is checkpoint ready */ - MCA_BASE_METADATA_PARAM_CHECKPOINT - }, - - .verbose = 0, - .output_handle = -1 - } -}; - -static int crs_dmtcp_register (void) -{ - int ret; - /* - * User can adjust the relative priority of this component with respect - * to other CRS components available for selection. - */ - mca_crs_dmtcp_component.super.priority = 20 - ret = mca_base_component_var_register (&mca_crs_dmtcp_component.super.base_version, - "priority", "Priority of the CRS dmtcp component " - "(default: 20)", MCA_BASE_VAR_TYPE_INT, NULL, - MCA_BASE_VAR_FLAG_SETTABLE, - OPAL_INFO_LVL_9, - MCA_BASE_VAR_SCOPE_ALL_EQ, - &mca_crs_dmtcp_component.super.priority); - if (0 > ret) { - return ret; - } - - /* - * Adjust the verbosity level for this component. Default off or 0. - */ - mca_crs_dmtcp_component.super.verbose = 0; - ret = mca_base_component_var_register (&mca_crs_dmtcp_component.super.base_version, - "verbose", - "Verbose level for the CRS dmtcp component", - MCA_BASE_VAR_TYPE_INT, NULL,MCA_BASE_VAR_FLAG_SETTABLE, - OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_LOCAL, - &mca_crs_dmtcp_component.super.verbose); - return (0 > ret) ? ret : OPAL_SUCCESS; -} - -static int crs_dmtcp_open(void) -{ - /* If there is a custom verbose level for this component than use it - * otherwise take our parents level and output channel - */ - if ( 0 != mca_crs_dmtcp_component.super.verbose) { - mca_crs_dmtcp_component.super.output_handle = opal_output_open(NULL); - opal_output_set_verbosity(mca_crs_dmtcp_component.super.output_handle, - mca_crs_dmtcp_component.super.verbose); - } else { - mca_crs_dmtcp_component.super.output_handle = opal_crs_base_framework.framework_output; - } - - /* - * Debug output - */ - opal_output_verbose(10, mca_crs_dmtcp_component.super.output_handle, - "crs:dmtcp: open()"); - opal_output_verbose(20, mca_crs_dmtcp_component.super.output_handle, - "crs:dmtcp: open: priority = %d", - mca_crs_dmtcp_component.super.priority); - opal_output_verbose(20, mca_crs_dmtcp_component.super.output_handle, - "crs:dmtcp: open: verbosity = %d", - mca_crs_dmtcp_component.super.verbose); - - return OPAL_SUCCESS; -} - -static int crs_dmtcp_close(void) -{ - opal_output_verbose(10, mca_crs_dmtcp_component.super.output_handle, - "crs:dmtcp: close()"); - - return OPAL_SUCCESS; -} diff --git a/opal/mca/crs/dmtcp/crs_dmtcp_module.c b/opal/mca/crs/dmtcp/crs_dmtcp_module.c deleted file mode 100644 index e18626ff577..00000000000 --- a/opal/mca/crs/dmtcp/crs_dmtcp_module.c +++ /dev/null @@ -1,709 +0,0 @@ -/* - * Copyright (c) 2010 The Trustees of Indiana University. - * All rights reserved. - * Copyright (c) 2010-2011 Alex Brick . - * All rights reserved. - * - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#include "opal_config.h" - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include "opal/util/output.h" -#include "opal/util/argv.h" -#include "opal/constants.h" - -#include "opal/mca/base/mca_base_var.h" - -#include "opal/threads/mutex.h" -#include "opal/threads/condition.h" - -#include "opal/mca/event/event.h" - -#include "opal/mca/crs/crs.h" -#include "opal/mca/crs/base/base.h" - -#include "crs_dmtcp.h" - -#define MTCP_RESTART_COMMAND "mtcp_restart" - -/* - * DMTCP module - */ -static opal_crs_base_module_t dmtcp_module = { - /** Initialization Function */ - opal_crs_dmtcp_module_init, - /** Finalization Function */ - opal_crs_dmtcp_module_finalize, - - /** Checkpoint interface */ - opal_crs_dmtcp_checkpoint, - - /** Restart Command Access */ - opal_crs_dmtcp_restart, - - /** Disable checkpoints */ - opal_crs_dmtcp_disable_checkpoint, - /** Enable checkpoints */ - opal_crs_dmtcp_enable_checkpoint, - - /** Prelaunch */ - opal_crs_dmtcp_prelaunch, - - /** Register Thread */ - opal_crs_dmtcp_reg_thread -}; - -/*************************** - * Snapshot Class Functions - ***************************/ -OBJ_CLASS_DECLARATION(opal_crs_dmtcp_snapshot_t); - -struct opal_crs_dmtcp_snapshot_t { - /** Base CRS snapshot type */ - opal_crs_base_snapshot_t super; - char * context_filename; -}; -typedef struct opal_crs_dmtcp_snapshot_t opal_crs_dmtcp_snapshot_t; - -void opal_crs_dmtcp_construct(opal_crs_dmtcp_snapshot_t *obj); -void opal_crs_dmtcp_destruct(opal_crs_dmtcp_snapshot_t *obj); - -OBJ_CLASS_INSTANCE(opal_crs_dmtcp_snapshot_t, - opal_crs_base_snapshot_t, - opal_crs_dmtcp_construct, - opal_crs_dmtcp_destruct); - -/****************** - * Local Functions - ******************/ -static int dmtcp_cold_start(opal_crs_dmtcp_snapshot_t *snapshot); -static int dmtcp_generate_full_ckpt_path(opal_crs_dmtcp_snapshot_t *snapshot); -static void dmtcp_sleep_between_ckpt_callback(int interval); -static void dmtcp_pre_ckpt_callback(char **ckpt_filename); -static void dmtcp_post_ckpt_callback(int is_restarting, - char *mtcp_restore_argv_start_addr); -static int dmtcp_should_ckpt_fd_callback(int fd); - -/************************* - * Local Global Variables - *************************/ -static char *full_ckpt_path = NULL; -static pthread_cond_t checkpoint_cond = PTHREAD_COND_INITIALIZER; -static pthread_cond_t checkpoint_done_cond = PTHREAD_COND_INITIALIZER; -static pthread_mutex_t checkpoint_mutex = PTHREAD_MUTEX_INITIALIZER; -static int post_ckpt_state; - -void opal_crs_dmtcp_construct(opal_crs_dmtcp_snapshot_t *snapshot) { - snapshot->context_filename = NULL; - snapshot->super.component_name = - strdup(mca_crs_dmtcp_component.super.base_version.mca_component_name); -} - -void opal_crs_dmtcp_destruct( opal_crs_dmtcp_snapshot_t *snapshot) { - if(NULL != snapshot->context_filename) { - free(snapshot->context_filename); - snapshot->context_filename = NULL; - } -} - -/***************** - * MCA Functions - *****************/ -int opal_crs_dmtcp_component_query(mca_base_module_t **module, int *priority) -{ - opal_output_verbose(10, mca_crs_dmtcp_component.super.output_handle, - "crs:dmtcp: component_query()"); - - *priority = mca_crs_dmtcp_component.super.priority; - *module = (mca_base_module_t *)&dmtcp_module; - - return OPAL_SUCCESS; -} - -int opal_crs_dmtcp_module_init(void) -{ - char *temp_checkpoint_name; - opal_output_verbose(10, mca_crs_dmtcp_component.super.output_handle, - "crs:dmtcp: module_init()"); - - /* - * JJH NOTE: Call any initialization routines you require - */ - mtcp_set_callbacks(dmtcp_sleep_between_ckpt_callback, /* sleep_between_ckpt */ - dmtcp_pre_ckpt_callback, /* pre_ckpt */ - dmtcp_post_ckpt_callback, /* post_ckpt */ - dmtcp_should_ckpt_fd_callback, /* ckpt_fd */ - NULL); /* write_ckpt_header */ - - /* This serves to simply initialize MTCP. The checkpoint file will - * actually be set by our pre_ckpt callback (which takes it from the - * snapshot given to the CRS checkpoint function), and the interval will be - * ignored, substituted for a synchronization signal that is handled by our - * sleep_between_ckpt callback. - */ - - asprintf(&temp_checkpoint_name, "checkpoint.dmtcp.%ld", syscall(SYS_getpid)); - mtcp_init(temp_checkpoint_name, 0, 1); - mtcp_ok(); - - opal_output_verbose(10, mca_crs_dmtcp_component.super.output_handle, - "crs:dmtcp: leaving module_init()"); - - free(temp_checkpoint_name); - - return OPAL_SUCCESS; -} - -int opal_crs_dmtcp_module_finalize(void) -{ - opal_output_verbose(10, mca_crs_dmtcp_component.super.output_handle, - "crs:dmtcp: module_finalize()"); - - /* - * JJH NOTE: Call any finalization routines you require - */ - - return OPAL_SUCCESS; -} - -int opal_crs_dmtcp_prelaunch(int32_t rank, - char *base_snapshot_dir, - char **app, - char **cwd, - char ***argv, - char ***env) -{ - char * tmp_env_var = NULL; - - /* - * The below should be left untouched for now - */ - (void) mca_base_var_env_name("opal_cr_is_tool", &tmp_env_var); - opal_setenv(tmp_env_var, - "0", true, env); - free(tmp_env_var); - tmp_env_var = NULL; - - opal_output_verbose(10, mca_crs_dmtcp_component.super.output_handle, - "crs:dmtcp: leaving module_prelaunch()"); - - return OPAL_SUCCESS; -} - -int opal_crs_dmtcp_reg_thread(void) -{ - /* - * JJH NOTE: If you require that all threads that may call into MTCP - * explicitly register with MTCP, then place the necessary - * initialization here. - */ - - opal_output_verbose(10, mca_crs_dmtcp_component.super.output_handle, - "crs:dmtcp: leaving module_reg_thread()"); - - return OPAL_SUCCESS; -} - -int opal_crs_dmtcp_checkpoint(pid_t pid, - opal_crs_base_snapshot_t *base_snapshot, - opal_crs_base_ckpt_options_t *options, - opal_crs_state_type_t *state) -{ - int unlock_retval, exit_status = OPAL_SUCCESS; - char buf[BUFSIZ]; - opal_crs_dmtcp_snapshot_t *snapshot; - - opal_output_verbose(10, mca_crs_dmtcp_component.super.output_handle, - "crs:dmtcp: about to lock mutex for checkpoint()"); - - pthread_mutex_lock(&checkpoint_mutex); - snapshot = (opal_crs_dmtcp_snapshot_t *) base_snapshot; - - opal_output_verbose(10, mca_crs_dmtcp_component.super.output_handle, - "crs:dmtcp: checkpoint(%d, ---)", pid); - - /* Are we checkpointing ourselves or a peer. - * JJH NOTE: This will only ever be called when pid == getpid() - * This is an old interface argument, that is no longer used. - */ - - /* bricka (2010-05-14): According to crs.h, 0 also indicates checkpointing - * self. - */ - if((pid != 0) && (pid != syscall(SYS_getpid)) ) { - /* MTCP can only checkpoint a single process: we can only checkpoint - * ourself. */ - *state = OPAL_CRS_ERROR; - exit_status = OPAL_ERROR; - goto cleanup; - } - - /* the metadata file should always be NULL at this point */ - if ( NULL != snapshot->super.metadata) { - opal_output(mca_crs_dmtcp_component.super.output_handle, - "crs:dmtcp: checkpoint(): Error: Metadata file already open"); - exit_status = OPAL_ERROR; - goto cleanup; - } - - /* - * Update the snapshot metadata with the component name so opal-restart can - * pick the correct CRS to restart with. - */ - snapshot->super.component_name = strdup(mca_crs_dmtcp_component.super.base_version.mca_component_name); - - if( NULL == snapshot->super.metadata ) { - if (NULL == (snapshot->super.metadata = fopen(snapshot->super.metadata_filename, "a")) ) { - opal_output(mca_crs_dmtcp_component.super.output_handle, - "crs:dmtcp: checkpoint(): Error: Unable to open the file (%s)", - snapshot->super.metadata_filename); - exit_status = OPAL_ERROR; - goto cleanup; - } - } - - /* The filename of the checkpoint will be changed by our pre_ckpt hook - * based on the options given to this function. */ - if(dmtcp_generate_full_ckpt_path(snapshot) == -1) { - opal_output(mca_crs_dmtcp_component.super.output_handle, - "crs:dmtcp: dmtcp_checkpoint: unable to generate context filename."); - - exit_status = OPAL_ERROR; - goto cleanup; - } - - /* - * JJH NOTE: You can write however much or little data you want to the - * metadata file. The metadata file is stored with the local - * checkpoint, and provided at restart time to help the - * CRS component deteremine how to restart from any files - * that is left in this directory during checkpoint. - * Use the command below to write key/value strings to the - * metadata file. - * (Just as we did above with the component name). - */ - if ( 0 > fprintf(snapshot->super.metadata, "%s%s\n", CRS_METADATA_COMP, snapshot->super.component_name)) { - opal_output(mca_crs_dmtcp_component.super.output_handle, - "crs:dmtcp: dmtcp_checkpoint: unable to print component name to metadata"); - } - - if ( 0 > fprintf(snapshot->super.metadata, "%s%s\n", CRS_METADATA_CONTEXT, snapshot->context_filename)) { - opal_output(mca_crs_dmtcp_component.super.output_handle, - "crs:dmtcp: dmtcp_checkpoint: unable to print context name to metadata"); - } - - fclose(snapshot->super.metadata ); - snapshot->super.metadata = NULL; - - /* - * JJH NOTE: Setup and request a checkpoint of this process. - */ - - opal_output_verbose(10, mca_crs_dmtcp_component.super.output_handle, - "crs:dmtcp: dmtcp_checkpoint: will checkpoint to file: %s", - full_ckpt_path); - - opal_output_verbose(10, mca_crs_dmtcp_component.super.output_handle, - "crs:dmtcp: dmtcp_checkpoint: about to signal checkpoint"); - - /* Now that we have set the requested filename, we simply need to start - * the checkpoint. */ - pthread_cond_signal(&checkpoint_cond); - - opal_output_verbose(10, mca_crs_dmtcp_component.super.output_handle, - "crs:dmtcp: dmtcp_checkpoint: signalled checkpoint"); - - /* We want to wait for the checkpoint to finish before we continue (in - * particular, we need the post_ckpt hook to happen so that we know the - * status of the checkpoint) - */ - pthread_cond_wait(&checkpoint_done_cond, &checkpoint_mutex); - - opal_output_verbose(10, mca_crs_dmtcp_component.super.output_handle, - "crs:dmtcp: dmtcp_checkpoint: received checkpoint_done signal"); - - /* We have now been checkpointed. Note that the state of the checkpoint - * (OPAL_CRS_CONTINUE, etc.) has been recorded by the post_ckpt hook. - */ - *state = post_ckpt_state; - exit_status = OPAL_SUCCESS; - - free(full_ckpt_path); - - cleanup: - unlock_retval = pthread_mutex_unlock(&checkpoint_mutex); - - if( 0 != unlock_retval ) { - opal_output(mca_crs_dmtcp_component.super.output_handle, - "crs:dmtcp: dmtcp_checkpoint: unable to unlock mutex at end of checkpoint: %s", - strerror_r(unlock_retval, buf, BUFSIZ)); - - exit_status = OPAL_ERROR; - } - - if( NULL != snapshot->super.metadata ) { - fclose(snapshot->super.metadata ); - snapshot->super.metadata = NULL; - } - - return exit_status; -} - -int opal_crs_dmtcp_restart(opal_crs_base_snapshot_t *base_snapshot, bool spawn_child, pid_t *child_pid) -{ - int ret, exit_status = OPAL_SUCCESS; - int exec_status; - - opal_crs_dmtcp_snapshot_t *snapshot = OBJ_NEW(opal_crs_dmtcp_snapshot_t); - snapshot->super = *base_snapshot; - - opal_output_verbose(10, mca_crs_dmtcp_component.super.output_handle, - "crs:dmtcp: restart(--, %d)", spawn_child); - - /* - * JJH NOTE: 'cold_start' indicates that this process is being restarted from - * opal-restart instead of from within an already running process. - * In the current code base, this is always set to true since it - * does not allow a process to request a restart of itself. - */ - if(snapshot->super.cold_start) { - /* - * Read the metadata left by the checkpoint() of this process - */ - if( OPAL_SUCCESS != (ret = dmtcp_cold_start(snapshot)) ) { - opal_output(mca_crs_dmtcp_component.super.output_handle, - "crs:dmtcp: dmtcp_restart: Unable to reconstruct the snapshot."); - exit_status = OPAL_ERROR; - goto cleanup; - } - } - - /* JJH NOTE: Nearly all of the time the 'spawn_child' argument is set to - * 'false' indicating that the restart function is expected to - * call exec() directly. It is only set to 'true' if the user - * explicitly tells opal-restart to spawn off the child, which - * rarely/never happens. So I would not worry about that option. - */ - if( spawn_child ) { - pid_t child_pid = fork(); - - if(child_pid > 0) - goto cleanup; - else if(child_pid < 0) { - opal_output(mca_crs_dmtcp_component.super.output_handle, - "crs:dmtcp: dmtcp_restart: Unable to spawn child."); - exit_status = OPAL_ERROR; - goto cleanup; - } - } - - /* - * JJH NOTE: Restart the process by replacing this process - */ - - opal_output_verbose(10, mca_crs_dmtcp_component.super.output_handle, - "crs:dmtcp: dmtcp_restart: About to invoke command: %s with argv: %s %s", - MTCP_RESTART_COMMAND, - MTCP_RESTART_COMMAND, - snapshot->context_filename); - - exec_status = execlp(MTCP_RESTART_COMMAND, MTCP_RESTART_COMMAND, snapshot->context_filename, NULL); - - /* If we get down here, something has broken. */ - - if(exec_status < 0) - opal_output(mca_crs_dmtcp_component.super.output_handle, - "crs:dmtcp: dmtcp_restart: error in replacing process: %s", - strerror(errno)); - else - opal_output(mca_crs_dmtcp_component.super.output_handle, - "crs:dmtcp: dmtcp_restart: exec() returned!"); - - exit_status = OPAL_ERROR; - goto cleanup; - - cleanup: - return exit_status; -} - -int opal_crs_dmtcp_disable_checkpoint(void) -{ - opal_output_verbose(10, mca_crs_dmtcp_component.super.output_handle, - "crs:dmtcp: disable_checkpoint()"); - - /* - * JJH NOTE: Enter a critical section. This is not really used in the code - * at the moment. - */ - mtcp_no(); - - return OPAL_SUCCESS; -} - -int opal_crs_dmtcp_enable_checkpoint(void) -{ - opal_output_verbose(10, mca_crs_dmtcp_component.super.output_handle, - "crs:dmtcp: enable_checkpoint()"); - /* - * JJH NOTE: Leave a critical section. This is not really used in the code - * at the moment. - */ - mtcp_ok(); - - return OPAL_SUCCESS; -} - -/***************************** - * Local Function Definitions - *****************************/ -static int dmtcp_cold_start(opal_crs_dmtcp_snapshot_t *snapshot) { - int ret, exit_status = OPAL_SUCCESS; - char **tmp_argv = NULL; - char * component_name = NULL; - int prev_pid; - - /* - * Find the snapshot directory, read the metadata file for - * component name and previous pid - */ - if( NULL == snapshot->super.metadata ) { - if (NULL == (snapshot->super.metadata = fopen(snapshot->super.metadata_filename, "r")) ) { - opal_output(mca_crs_dmtcp_component.super.output_handle, - "crs:dmtcp: dmtcp_cold_start(): Error: Unable to open the file (%s)", - snapshot->super.metadata_filename); - exit_status = OPAL_ERROR; - goto cleanup; - } - } - if( OPAL_SUCCESS != (ret = opal_crs_base_extract_expected_component(snapshot->super.metadata, - &component_name, &prev_pid) ) ) { - opal_output(mca_crs_dmtcp_component.super.output_handle, - "crs:dmtcp: dmtcp_cold_start: Error: Failed to extract the metadata from the local snapshot (%s). Returned %d.", - snapshot->super.metadata_filename, ret); - exit_status = ret; - goto cleanup; - } - - snapshot->super.component_name = strdup(component_name); - - /* - * Compare the component strings to make sure this is our snapshot before going further. - * JJH NOTE: This will nearly always be true since opal-restart also checks this metadata. - */ - if ( 0 != strncmp(mca_crs_dmtcp_component.super.base_version.mca_component_name, - component_name, strlen(component_name)) ) { - opal_output(mca_crs_dmtcp_component.super.output_handle, - "crs:dmtcp: dmtcp_cold_start: Error: This snapshot (%s) is not intended for us (%s)\n", - component_name, mca_crs_dmtcp_component.super.base_version.mca_component_name); - exit_status = OPAL_ERROR; - goto cleanup; - } - - /* - * Read context information from the metadata file - */ - opal_crs_base_metadata_read_token(snapshot->super.metadata, CRS_METADATA_CONTEXT, &tmp_argv); - if( NULL == tmp_argv ) { - opal_output(mca_crs_dmtcp_component.super.output_handle, - "crs:dmtcp: dmtcp_cold_start: Error: Failed to read the %s token from the local checkpoint in %s", - CRS_METADATA_CONTEXT, snapshot->super.snapshot_directory); - exit_status = OPAL_ERROR; - goto cleanup; - } - - asprintf(&(snapshot->context_filename), "%s/%s", snapshot->super.snapshot_directory, tmp_argv[0]); - - opal_output_verbose(10, mca_crs_dmtcp_component.super.output_handle, - "crs:dmtcp: cold_start(%s)", snapshot->context_filename); - - /* - * Reset the cold_start flag - */ - snapshot->super.cold_start = false; - - cleanup: - if(NULL != tmp_argv) { - opal_argv_free(tmp_argv); - tmp_argv = NULL; - } - - if( NULL != snapshot->super.metadata ) { - fclose(snapshot->super.metadata); - snapshot->super.metadata = NULL; - } - - return exit_status; -} - -/** - * Given a snapshot, generate the context filename and its full path. - * - * @param snapshot the snapshot with request information - */ -static int dmtcp_generate_full_ckpt_path(opal_crs_dmtcp_snapshot_t *snapshot) -{ - int retval; - retval = asprintf(&(snapshot->context_filename), "ompi_dmtcp_context.%ld", syscall(SYS_getpid)); - if(retval == -1) - return -1; - - return asprintf(&full_ckpt_path, "%s/%s", snapshot->super.snapshot_directory, snapshot->context_filename); -} - -/** - * This is a callback function to call the actual checkpointing routine. - * Instead of waiting for a specific interval as MTCP does, we will wait on a - * synchronization signal that will allow us to checkpoint on demand. The - * argument to this function will be ignored. - */ -static void dmtcp_sleep_between_ckpt_callback(int interval) -{ - int signal_retval; - char buf[BUFSIZ]; - - opal_output_verbose(10, mca_crs_dmtcp_component.super.output_handle, - "crs:dmtcp: called sleep_between_ckpt callback"); - - pthread_mutex_lock(&checkpoint_mutex); - - /* If the MPI checkpoint thread is waiting on the checkpoint_done_cond and - * this thread is here, it means that a checkpoint has just completed. - * Let's signal the MPI checkpoint thread to resume. */ - signal_retval = pthread_cond_signal(&checkpoint_done_cond); - - if( 0 != signal_retval) { - opal_output(mca_crs_dmtcp_component.super.output_handle, - "crs:dmtcp: post_ckpt_callback(): Unable to signal checkpoint done: %s", - strerror_r(signal_retval, buf, BUFSIZ)); - } - - /* now we simply wait for the signal to checkpoint */ - pthread_cond_wait(&checkpoint_cond, &checkpoint_mutex); - - opal_output_verbose(10, mca_crs_dmtcp_component.super.output_handle, - "crs:dmtcp: received sync signal to checkpoint."); - - /* We have now been instructed to checkpoint, so we return. Note that the - * mutex is still locked: the post_ckpt callback will unlock it. */ -} - -/** - * This is a callback function that is invoked before the checkpoint actually - * occurs. It enables us to do any logging that is necessary, as well as change - * the filename that the checkpoint will be written to. We expect that this - * filename will be pulled from the checkpoint options. - * - * @param ckpt_filename a pointer in which to store the desired checkpoint - * filename - */ -static void dmtcp_pre_ckpt_callback(char **ckpt_filename) -{ - *ckpt_filename = full_ckpt_path; -} - -/** - * This is a callback function that is invoked after the checkpoint has - * finished. It enables us to do any logging that is necessary, as well as - * report whether this is called from a restart or a checkpoint. We will report - * this status, signal the CRS code to continue running, and then release the - * mutex that we are holding. - * - * @param is_restarting whether or not this is being called as part of a restart - * @param mtcp_restore_argv_start_addr unused - */ -static void dmtcp_post_ckpt_callback(int is_restarting, char *mtcp_restore_argv_start_addr) -{ - int unlock_retval; - char buf[BUFSIZ]; - - opal_output_verbose(10, mca_crs_dmtcp_component.super.output_handle, - "crs:dmtcp: in post_ckpt_callback, restarting: %d", is_restarting); - if(is_restarting) - post_ckpt_state = OPAL_CRS_RESTART; - else - post_ckpt_state = OPAL_CRS_CONTINUE; - - opal_output_verbose(10, mca_crs_dmtcp_component.super.output_handle, - "crs:dmtcp: unlocking at end of post_ckpt_callback"); - - unlock_retval = pthread_mutex_unlock(&checkpoint_mutex); - - if( 0 != unlock_retval) { - opal_output(mca_crs_dmtcp_component.super.output_handle, - "crs:dmtcp: post_ckpt_callback(): Unable to unlock mutex: %s", - strerror_r(unlock_retval, buf, BUFSIZ)); - } -} - -/** - * This is a callback function that is invoked by DMTCP to see if it should - * checkpoint the given file descriptor. - * - * If the file descriptor is a socket, named-pipe or pseudo-terminal, DMTCP - * should skip checkpointing them. - * - * If we can't determine the type of fd (stat and/or readlink failed), we ask - * DMTCP to try to checkpoint them anyways with the assumption that DMTCP would - * warn users of any such case. - * - * @param fd file descriptor to checkpoint - * @return: 1 if DMTCP should ckpt the file descriptor, 0 otherwise. - */ -static int dmtcp_should_ckpt_fd_callback(int fd) -{ - struct stat stat_buf; - char device_name[PATH_MAX]; - char proc_filename[64]; - char buf[BUFSIZ]; - - if (fstat(fd, &stat_buf) != 0) { - opal_output(mca_crs_dmtcp_component.super.output_handle, - "crs:dmtcp: should_ckpt_fd_callback(): error stat()'ing %d: %s", - fd, strerror_r(errno, buf, BUFSIZ)); - return 1; - /* Don't checkpoint sockets and FIFOs */ - } else if (S_ISSOCK(stat_buf.st_mode) || S_ISFIFO(stat_buf.st_mode)) { - opal_output_verbose(10, mca_crs_dmtcp_component.super.output_handle, - "crs:dmtcp: skipping checkpointing socket/fifo: %d", - fd); - return 0; - } - - memset(device_name, 0, sizeof device_name); - sprintf(proc_filename, "/proc/self/fd/%d", fd); - if (readlink(proc_filename, device_name, sizeof(device_name) - 1) <= 0) { - opal_output(mca_crs_dmtcp_component.super.output_handle, - "crs:dmtcp: should_ckpt_fd_callback(): readlink(%d) failed: %s", - fd, strerror_r(errno, buf, BUFSIZ)); - return 1; - } - - /* Don't checkpoint ptys */ - if (strstr(device_name, "/dev/pts/") == 0 || - strstr(device_name, "/dev/pty") == 0 || - strstr(device_name, "/dev/tty") == 0) { - opal_output_verbose(10, mca_crs_dmtcp_component.super.output_handle, - "crs:dmtcp: skipping checkpointing %s", - device_name); - return 0; - } - - /* Checkpoint fd by default */ - return 1; -} diff --git a/opal/mca/crs/dmtcp/owner.txt b/opal/mca/crs/dmtcp/owner.txt deleted file mode 100644 index b6eb68b0d24..00000000000 --- a/opal/mca/crs/dmtcp/owner.txt +++ /dev/null @@ -1,7 +0,0 @@ -# -# owner/status file -# owner: institution that is responsible for this package -# status: e.g. active, maintenance, unmaintained -# -owner: UTK -status: inactive diff --git a/opal/mca/crs/none/Makefile.am b/opal/mca/crs/none/Makefile.am deleted file mode 100644 index 123735766b4..00000000000 --- a/opal/mca/crs/none/Makefile.am +++ /dev/null @@ -1,40 +0,0 @@ -# -# Copyright (c) 2004-2008 The Trustees of Indiana University. -# All rights reserved. -# Copyright (c) 2009 High Performance Computing Center Stuttgart, -# University of Stuttgart. All rights reserved. -# Copyright (c) 2010-2015 Cisco Systems, Inc. All rights reserved. -# $COPYRIGHT$ -# -# Additional copyrights may follow -# -# $HEADER$ -# - -dist_opaldata_DATA = help-opal-crs-none.txt - -sources = \ - crs_none.h \ - crs_none_component.c \ - crs_none_module.c - -# Make the output library in this directory, and name it either -# mca__.la (for DSO builds) or libmca__.la -# (for static builds). - -if MCA_BUILD_opal_crs_none_DSO -component_noinst = -component_install = mca_crs_none.la -else -component_noinst = libmca_crs_none.la -component_install = -endif - -mcacomponentdir = $(opallibdir) -mcacomponent_LTLIBRARIES = $(component_install) -mca_crs_none_la_SOURCES = $(sources) -mca_crs_none_la_LDFLAGS = -module -avoid-version - -noinst_LTLIBRARIES = $(component_noinst) -libmca_crs_none_la_SOURCES = $(sources) -libmca_crs_none_la_LDFLAGS = -module -avoid-version diff --git a/opal/mca/crs/none/crs_none.h b/opal/mca/crs/none/crs_none.h deleted file mode 100644 index 922b92f6fd3..00000000000 --- a/opal/mca/crs/none/crs_none.h +++ /dev/null @@ -1,74 +0,0 @@ -/* - * Copyright (c) 2004-2009 The Trustees of Indiana University. - * All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -/** - * @file - * - * NONE CRS component - * - * Simple, braindead implementation. - */ - -#ifndef MCA_CRS_NONE_EXPORT_H -#define MCA_CRS_NONE_EXPORT_H - -#include "opal_config.h" - - -#include "opal/mca/mca.h" -#include "opal/mca/crs/crs.h" - -BEGIN_C_DECLS - - /* - * Local Component structures - */ - struct opal_crs_none_component_t { - opal_crs_base_component_t super; /** Base CRS component */ - - }; - typedef struct opal_crs_none_component_t opal_crs_none_component_t; - OPAL_MODULE_DECLSPEC extern opal_crs_none_component_t mca_crs_none_component; - - int opal_crs_none_component_query(mca_base_module_t **module, int *priority); - - /* - * Module functions - */ - int opal_crs_none_module_init(void); - int opal_crs_none_module_finalize(void); - - /* - * Actual funcationality - */ - int opal_crs_none_checkpoint( pid_t pid, - opal_crs_base_snapshot_t *snapshot, - opal_crs_base_ckpt_options_t *options, - opal_crs_state_type_t *state); - - int opal_crs_none_restart( opal_crs_base_snapshot_t *snapshot, bool spawn_child, pid_t *child_pid); - - int opal_crs_none_disable_checkpoint(void); - int opal_crs_none_enable_checkpoint(void); - - int opal_crs_none_prelaunch(int32_t rank, - char *base_snapshot_dir, - char **app, - char **cwd, - char ***argv, - char ***env); - - int opal_crs_none_reg_thread(void); - - extern bool opal_crs_none_select_warning; - -END_C_DECLS - -#endif /* MCA_CRS_NONE_EXPORT_H */ diff --git a/opal/mca/crs/none/crs_none_component.c b/opal/mca/crs/none/crs_none_component.c deleted file mode 100644 index e7ce2ee9079..00000000000 --- a/opal/mca/crs/none/crs_none_component.c +++ /dev/null @@ -1,138 +0,0 @@ -/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ -/* - * Copyright (c) 2004-2009 The Trustees of Indiana University. - * All rights reserved. - * Copyright (c) 2015 Los Alamos National Security, LLC. All rights - * reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#include "opal_config.h" - -#include "opal/constants.h" -#include "opal/mca/crs/crs.h" -#include "opal/mca/crs/base/base.h" -#include "crs_none.h" - -/* - * Public string for version number - */ -const char *opal_crs_none_component_version_string = -"OPAL CRS none MCA component version " OPAL_VERSION; - -/* - * Local functionality - */ -static int crs_none_register (void); -static int crs_none_open(void); -static int crs_none_close(void); - -/* - * Instantiate the public struct with all of our public information - * and pointer to our public functions in it - */ -opal_crs_none_component_t mca_crs_none_component = { - /* First do the base component stuff */ - { - /* Handle the general mca_component_t struct containing - * meta information about the component itnone - */ - .base_version = { - OPAL_CRS_BASE_VERSION_2_0_0, - - /* Component name and version */ - .mca_component_name = "none", - MCA_BASE_MAKE_VERSION(component, OPAL_MAJOR_VERSION, OPAL_MINOR_VERSION, - OPAL_RELEASE_VERSION), - - /* Component open and close functions */ - .mca_open_component = crs_none_open, - .mca_close_component = crs_none_close, - .mca_query_component = opal_crs_none_component_query, - .mca_register_component_params = crs_none_register, - }, - .base_data = { - /* The component is checkpoint ready */ - MCA_BASE_METADATA_PARAM_CHECKPOINT - }, - - .verbose = 0, - .output_handle = -1, - .priority = 1, - } -}; - -/* - * None module - */ -static opal_crs_base_module_t loc_module = { - /** Initialization Function */ - opal_crs_none_module_init, - /** Finalization Function */ - opal_crs_none_module_finalize, - - /** Checkpoint interface */ - opal_crs_none_checkpoint, - - /** Restart Command Access */ - opal_crs_none_restart, - - /** Disable checkpoints */ - opal_crs_none_disable_checkpoint, - /** Enable checkpoints */ - opal_crs_none_enable_checkpoint, - - /** Prelaunch */ - opal_crs_none_prelaunch, - - /** Register Thread */ - opal_crs_none_reg_thread -}; - -bool opal_crs_none_select_warning = false; - -static int crs_none_register (void) -{ - int ret; - - (void) mca_base_component_var_register (&mca_crs_none_component.super.base_version, - "priority", "Priority of the crs none " - "component", MCA_BASE_VAR_TYPE_INT, NULL, - 0, MCA_BASE_VAR_FLAG_DEFAULT_ONLY, - OPAL_INFO_LVL_3, - MCA_BASE_VAR_SCOPE_CONSTANT, - &mca_crs_none_component.super.priority); - - opal_crs_none_select_warning = false; - ret = mca_base_component_var_register (&mca_crs_none_component.super.base_version, - "select_warning", - "Enable warning when the 'none' component is selected when checkpoint/restart functionality is requested." - "[Default = disabled/no-warning]", - MCA_BASE_VAR_TYPE_BOOL, NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE, - OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_ALL, - &opal_crs_none_select_warning); - return (0 > ret) ? ret : OPAL_SUCCESS; -} - -static int crs_none_open(void) -{ - return OPAL_SUCCESS; -} - -static int crs_none_close(void) -{ - return OPAL_SUCCESS; -} - -int opal_crs_none_component_query(mca_base_module_t **module, int *priority) -{ - *module = (mca_base_module_t *)&loc_module; - *priority = mca_crs_none_component.super.priority; - - return OPAL_SUCCESS; -} - diff --git a/opal/mca/crs/none/crs_none_module.c b/opal/mca/crs/none/crs_none_module.c deleted file mode 100644 index c05327359f2..00000000000 --- a/opal/mca/crs/none/crs_none_module.c +++ /dev/null @@ -1,196 +0,0 @@ -/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ -/* - * Copyright (c) 2004-2010 The Trustees of Indiana University. - * All rights reserved. - * - * Copyright (c) 2015 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2015 Los Alamos National Security, LLC. All rights - * reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#include "opal_config.h" - -#ifdef HAVE_STRING_H -#include -#endif -#include -#ifdef HAVE_UNISTD_H -#include -#endif /* HAVE_UNISTD_H */ - -#include "opal/util/opal_environ.h" -#include "opal/util/output.h" -#include "opal/util/argv.h" -#include "opal/util/show_help.h" -#include "opal/util/opal_environ.h" - -#include "opal/constants.h" -#include "opal/mca/base/mca_base_var.h" - -#include "opal/mca/crs/crs.h" -#include "opal/mca/crs/base/base.h" -#include "opal/runtime/opal_cr.h" - -#include "crs_none.h" - -int opal_crs_none_module_init(void) -{ - /* - * If not a tool, and requesting C/R support print a warning. - */ - if( opal_crs_none_select_warning && - !opal_cr_is_tool && opal_cr_is_enabled ) { - opal_show_help("help-opal-crs-none.txt", "none:select-warning", - true); - } - - return OPAL_SUCCESS; -} - -int opal_crs_none_module_finalize(void) -{ - return OPAL_SUCCESS; -} - -int opal_crs_none_checkpoint(pid_t pid, - opal_crs_base_snapshot_t *snapshot, - opal_crs_base_ckpt_options_t *options, - opal_crs_state_type_t *state) -{ - *state = OPAL_CRS_CONTINUE; - - snapshot->component_name = strdup("none"); - snapshot->cold_start = false; - - /* - * Update the snapshot metadata - */ - if( NULL == snapshot->metadata ) { - if (NULL == (snapshot->metadata = fopen(snapshot->metadata_filename, "a")) ) { - opal_output(0, - "crs:none: checkpoint(): Error: Unable to open the file (%s)", - snapshot->metadata_filename); - return OPAL_ERROR; - } - } - fprintf(snapshot->metadata, "%s%s\n", CRS_METADATA_COMP, snapshot->component_name); - fclose(snapshot->metadata); - snapshot->metadata = NULL; - - if( options->stop ) { - opal_output(0, - "crs:none: checkpoint(): Error: SIGSTOP Not currently supported!"); - } - - return OPAL_SUCCESS; -} - -int opal_crs_none_restart(opal_crs_base_snapshot_t *base_snapshot, bool spawn_child, pid_t *child_pid) -{ - int exit_status = OPAL_SUCCESS; - char **tmp_argv = NULL; - char **cr_argv = NULL; - int status; - - *child_pid = getpid(); - - if( NULL == base_snapshot->metadata ) { - if (NULL == (base_snapshot->metadata = fopen(base_snapshot->metadata_filename, "a")) ) { - opal_output(0, - "crs:none: checkpoint(): Error: Unable to open the file (%s)", - base_snapshot->metadata_filename); - return OPAL_ERROR; - } - } - - opal_crs_base_metadata_read_token(base_snapshot->metadata, CRS_METADATA_CONTEXT, &tmp_argv); - - if( NULL == tmp_argv ) { - opal_output(opal_crs_base_framework.framework_output, - "crs:none: none_restart: Error: Failed to read the %s token from the local checkpoint in %s", - CRS_METADATA_CONTEXT, base_snapshot->metadata_filename); - exit_status = OPAL_ERROR; - goto cleanup; - } - - if( opal_argv_count(tmp_argv) <= 0 ) { - opal_output_verbose(10, opal_crs_base_framework.framework_output, - "crs:none: none_restart: No command line to exec, so just returning"); - exit_status = OPAL_SUCCESS; - goto cleanup; - } - - if ( NULL == (cr_argv = opal_argv_split(tmp_argv[0], ' ')) ) { - exit_status = OPAL_ERROR; - goto cleanup; - } - - if( !spawn_child ) { - opal_output_verbose(10, opal_crs_base_framework.framework_output, - "crs:none: none_restart: exec :(%s, %s):", - cr_argv[0], tmp_argv[0]); - - status = execvp(cr_argv[0], cr_argv); - - if(status < 0) { - opal_output(opal_crs_base_framework.framework_output, - "crs:none: none_restart: Child failed to execute :(%d):", status); - } - opal_output(opal_crs_base_framework.framework_output, - "crs:none: none_restart: execvp returned %d", status); - exit_status = status; - goto cleanup; - } else { - opal_output(opal_crs_base_framework.framework_output, - "crs:none: none_restart: Spawn not implemented"); - exit_status = OPAL_ERR_NOT_IMPLEMENTED; - goto cleanup; - } - - cleanup: - if (cr_argv) { - opal_argv_free (cr_argv); - } - - fclose(base_snapshot->metadata); - - return exit_status; -} - -int opal_crs_none_disable_checkpoint(void) -{ - return OPAL_SUCCESS; -} - -int opal_crs_none_enable_checkpoint(void) -{ - return OPAL_SUCCESS; -} - -int opal_crs_none_prelaunch(int32_t rank, - char *base_snapshot_dir, - char **app, - char **cwd, - char ***argv, - char ***env) -{ - char * tmp_env_var = NULL; - - (void) mca_base_var_env_name("opal_cr_is_tool", &tmp_env_var); - opal_setenv(tmp_env_var, - "0", true, env); - free(tmp_env_var); - tmp_env_var = NULL; - - return OPAL_SUCCESS; -} - -int opal_crs_none_reg_thread(void) -{ - return OPAL_SUCCESS; -} diff --git a/opal/mca/crs/none/help-opal-crs-none.txt b/opal/mca/crs/none/help-opal-crs-none.txt deleted file mode 100644 index 097de36c743..00000000000 --- a/opal/mca/crs/none/help-opal-crs-none.txt +++ /dev/null @@ -1,20 +0,0 @@ - -*- text -*- -# -# Copyright (c) 2004-2008 The Trustees of Indiana University and Indiana -# University Research and Technology -# Corporation. All rights reserved. -# $COPYRIGHT$ -# -# Additional copyrights may follow -# -# $HEADER$ -# -# This is the US/English general help file for Open PAL CRS framework. -# -[none:select-warning] -Error: 'none' component selected. - Checkpoint/Restart functionality may not work properly. - Make sure that you have configured with and are using a fully functional - CRS component. - To disable this warning set the following MCA parmeter: - --mca crs_none_select_warning 0 diff --git a/opal/mca/crs/none/owner.txt b/opal/mca/crs/none/owner.txt deleted file mode 100644 index c47a2d510b1..00000000000 --- a/opal/mca/crs/none/owner.txt +++ /dev/null @@ -1,7 +0,0 @@ -# -# owner/status file -# owner: institution that is responsible for this package -# status: e.g. active, maintenance, unmaintained -# -owner: UTK -status: maintenance diff --git a/opal/mca/crs/opal_crs.7in b/opal/mca/crs/opal_crs.7in deleted file mode 100644 index e94453db3fe..00000000000 --- a/opal/mca/crs/opal_crs.7in +++ /dev/null @@ -1,179 +0,0 @@ -.\" -.\" Copyright (c) 2004-2010 The Trustees of Indiana University and Indiana -.\" University Research and Technology -.\" Corporation. All rights reserved. -.\" Copyright (c) 2009 Sun Microsystems, Inc. All rights reserved. -.\" -.\" Man page for OPAL's CRS Functionality -.\" -.\" .TH name section center-footer left-footer center-header -.TH OPAL_CRS 7 "#OPAL_DATE#" "#PACKAGE_VERSION#" "#PACKAGE_NAME#" - -.\" ************************** -.\" Name Section -.\" ************************** -.SH NAME -. -OPAL_CRS \- Open PAL MCA Checkpoint/Restart Service (CRS): Overview of Open PAL's -CRS framework, and selected modules. #PACKAGE_NAME# #PACKAGE_VERSION#. -. -.\" ************************** -.\" Description Section -.\" ************************** -.SH DESCRIPTION -. -.PP -Open PAL can involuntarily checkpoint and restart sequential programs. -Doing so requires that Open PAL was compiled with thread support and -that the back-end checkpointing systems are available at run-time. -. -.SS Phases of Checkpoint / Restart -.PP -Open PAL defines three phases for checkpoint / restart support in a -procress: -. -.TP 4 -Checkpoint -When the checkpoint request arrives, the procress is notified of the -request before the checkpoint is taken. -. -.TP 4 -Continue -After a checkpoint has successfully completed, the same process as the -checkpoint is notified of its successful continuation of execution. -. -.TP 4 -Restart -After a checkpoint has successfully completed, a new / restarted -process is notified of its successful restart. -. -.PP -The Continue and Restart phases are identical except for the process -in which they are invoked. The Continue phase is invoked in the same process -as the Checkpoint phase was invoked. The Restart phase is only invoked in newly -restarted processes. -. -.\" ************************** -.\" General Process Requirements Section -.\" ************************** -.SH GENERAL PROCESS REQUIREMENTS -.PP -In order for a process to use the Open PAL CRS components it must adhear to a -few programmatic requirements. -.PP -First, the program must call \fIOPAL_INIT\fR early in its execution. This -should only be called once, and it is not possible to checkpoint the process -without it first having called this function. -.PP -The program must call \fIOPAL_FINALIZE\fR before termination. This does a -significant amount of cleanup. If it is not called, then it is very likely that -remnants are left in the filesystem. -.PP -To checkpoint and restart a process you must use the Open PAL tools to do -so. Using the backend checkpointer's checkpoint and restart tools will lead -to undefined behavior. -To checkpoint a process use \fIopal_checkpoint\fR (opal_checkpoint(1)). -To restart a process use \fIopal_restart\fR (opal_restart(1)). -. -.\" ********************************** -.\" Available Components Section -.\" ********************************** -.SH AVAILABLE COMPONENTS -.PP -Open PAL ships with two CRS components: \fIself\fR and \fIblcr\fR. -. -.PP -The following MCA parameters apply to all components: -. -.TP 4 -crs_base_verbose -Set the verbosity level for all components. Default is 0, or silent except on error. -. -.\" Self Component -.\" ****************** -.SS self CRS Component -.PP -The \fIself\fR component invokes user-defined functions to save and restore -checkpoints. It is simply a mechanism for user-defined functions to be invoked -at Open PAL's Checkpoint, Continue, and Restart phases. Hence, the only data -that is saved during the checkpoint is what is written in the user's checkpoint -function. No libary state is saved at all. -. -.PP -As such, the model for the \fIself\fR component is slightly differnt than for -other components. Specifically, the Restart function is not invoked in the same -process image of the process that was checkpointed. The Restart phase is -invoked during \fBOPAL_INIT\fR of the new instance of the applicaiton (i.e., it -starts over from main()). -. -.PP -The \fIself\fR component has the following MCA parameters: -.TP 4 -crs_self_prefix -Speficy a string prefix for the name of the checkpoint, continue, and restart -functions that Open PAL will invoke during the respective stages. That is, -by specifying "-mca crs_self_prefix foo" means that Open PAL expects to find -three functions at run-time: - - int foo_checkpoint() - - int foo_continue() - - int foo_restart() - -By default, the prefix is set to "opal_crs_self_user". -. -.TP 4 -crs_self_priority -Set the \fIself\fR components default priority -. -.TP 4 -crs_self_verbose -Set the verbosity level. Default is 0, or silent except on error. -. -.TP 4 -crs_self_do_restart -This is mostly internally used. A general user should never need to set this -value. This is set to non-0 when a the new process should invoke the restart -callback in \fIOPAL_INIT\fR. Default is 0, or normal execution. -. -.\" BLCR Component -.\" ****************** -.SS blcr CRS Component -.PP -The Berkeley Lab Checkpoint/Restart (BLCR) single-process checkpoint is a -software system developed at Lawrence Berkeley National Laboratory. See the -project website for more details: - - \fI http://ftg.lbl.gov/CheckpointRestart/CheckpointRestart.shtml \fR -. -.PP -The \fIblcr\fR component has the following MCA parameters: -.TP 4 -crs_blcr_priority -Set the \fIblcr\fR components default priority. -. -.TP 4 -crs_blcr_verbose -Set the verbosity level. Default is 0, or silent except on error. -. -.\" Special 'none' option -.\" ************************ -.SS none CRS Component -.PP -The \fInone\fP component simply selects no CRS component. All of the CRS -function calls return immediately with OPAL_SUCCESS. -. -.PP -This component is the last component to be selected by default. This means that if -another component is available, and the \fInone\fP component was not explicity -requested then OPAL will attempt to activate all of the available components -before falling back to this component. -. -.\" ************************** -.\" See Also Section -.\" ************************** -. -.SH SEE ALSO - opal_checkpoint(1), opal_restart(1) -.\", orte_crs(7), ompi_crs(7) diff --git a/opal/mca/crs/self/Makefile.am b/opal/mca/crs/self/Makefile.am deleted file mode 100644 index a8bb36b8172..00000000000 --- a/opal/mca/crs/self/Makefile.am +++ /dev/null @@ -1,44 +0,0 @@ -# -# Copyright (c) 2004-2007 The Trustees of Indiana University. -# All rights reserved. -# Copyright (c) 2004-2005 The Trustees of the University of Tennessee. -# All rights reserved. -# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, -# University of Stuttgart. All rights reserved. -# Copyright (c) 2004-2005 The Regents of the University of California. -# All rights reserved. -# Copyright (c) 2010-2015 Cisco Systems, Inc. All rights reserved. -# $COPYRIGHT$ -# -# Additional copyrights may follow -# -# $HEADER$ -# - -dist_opaldata_DATA = help-opal-crs-self.txt - -sources = \ - crs_self.h \ - crs_self_component.c \ - crs_self_module.c - -# Make the output library in this directory, and name it either -# mca__.la (for DSO builds) or libmca__.la -# (for static builds). - -if MCA_BUILD_opal_crs_self_DSO -component_noinst = -component_install = mca_crs_self.la -else -component_noinst = libmca_crs_self.la -component_install = -endif - -mcacomponentdir = $(opallibdir) -mcacomponent_LTLIBRARIES = $(component_install) -mca_crs_self_la_SOURCES = $(sources) -mca_crs_self_la_LDFLAGS = -module -avoid-version - -noinst_LTLIBRARIES = $(component_noinst) -libmca_crs_self_la_SOURCES = $(sources) -libmca_crs_self_la_LDFLAGS = -module -avoid-version diff --git a/opal/mca/crs/self/configure.m4 b/opal/mca/crs/self/configure.m4 deleted file mode 100644 index 03b6d36307d..00000000000 --- a/opal/mca/crs/self/configure.m4 +++ /dev/null @@ -1,46 +0,0 @@ -# -*- shell-script -*- -# -# Copyright (c) 2004-2010 The Trustees of Indiana University. -# All rights reserved. -# Copyright (c) 2004-2005 The Trustees of the University of Tennessee. -# All rights reserved. -# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, -# University of Stuttgart. All rights reserved. -# Copyright (c) 2004-2005 The Regents of the University of California. -# All rights reserved. -# Copyright (c) 2010 Cisco Systems, Inc. All rights reserved. -# $COPYRIGHT$ -# -# Additional copyrights may follow -# -# $HEADER$ -# - -# MCA_crs_self_CONFIG([action-if-found], [action-if-not-found]) -# ----------------------------------------------------------- -AC_DEFUN([MCA_opal_crs_self_CONFIG],[ - AC_CONFIG_FILES([opal/mca/crs/self/Makefile]) - - # If we don't want FT, don't compile this component - AS_IF([test "$opal_want_ft_cr" = "1"], - [crs_self_good="yes"], - [crs_self_good="no"]) - - # We need the dlfcn.h so we can access dlsym and friends - AS_IF([test "$crs_self_good" = "yes"], - [AC_CHECK_HEADER([dlfcn.h], - [crs_self_good="yes"], - [crs_self_good="no"])]) - - # If they did not ask for dlopen support, - # they probably do not want this component either - AS_IF([test "$crs_self_good" = "yes"], - [AS_IF([test "$OPAL_ENABLE_DLOPEN_SUPPORT" = "1"], - [crs_self_good="yes"], - [crs_self_good="no"])]) - - AS_IF([test "$crs_self_good" = "yes"], - [$1], - [$2]) - -])dnl diff --git a/opal/mca/crs/self/crs_self.h b/opal/mca/crs/self/crs_self.h deleted file mode 100644 index 3abe40f664a..00000000000 --- a/opal/mca/crs/self/crs_self.h +++ /dev/null @@ -1,96 +0,0 @@ -/* - * Copyright (c) 2004-2009 The Trustees of Indiana University. - * All rights reserved. - * Copyright (c) 2004-2005 The Trustees of the University of Tennessee. - * All rights reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -/** - * @file - * - * SELF CRS component - * - * Simple, braindead implementation. - */ - -#ifndef MCA_CRS_SELF_EXPORT_H -#define MCA_CRS_SELF_EXPORT_H - -#include "opal_config.h" - - -#include "opal/mca/mca.h" -#include "opal/mca/crs/crs.h" - -BEGIN_C_DECLS - -#define PREFIX_DEFAULT ("opal_crs_self_user") -#define SUFFIX_CHECKPOINT ("checkpoint") -#define SUFFIX_CONTINUE ("continue") -#define SUFFIX_RESTART ("restart") - - typedef int (*opal_crs_self_checkpoint_callback_fn_t)(char **restart_cmd); - typedef int (*opal_crs_self_continue_callback_fn_t)(void); - typedef int (*opal_crs_self_restart_callback_fn_t)(void); - - /* - * Local Component structures - */ - struct opal_crs_self_component_t { - opal_crs_base_component_t super; /** Base CRS component */ - - char *prefix; /** Prefix for user callbacks */ - bool do_restart; /** Start by calling user restart routine in opal_init? */ - bool can_checkpoint; /** If checkpointing is enabled */ - - /** User defined functions */ - opal_crs_self_checkpoint_callback_fn_t ucb_checkpoint_fn; - opal_crs_self_continue_callback_fn_t ucb_continue_fn; - opal_crs_self_restart_callback_fn_t ucb_restart_fn; - }; - typedef struct opal_crs_self_component_t opal_crs_self_component_t; - OPAL_MODULE_DECLSPEC extern opal_crs_self_component_t mca_crs_self_component; - - int opal_crs_self_component_query(mca_base_module_t **module, int *priority); - - /* - * Module functions - */ - int opal_crs_self_module_init(void); - int opal_crs_self_module_finalize(void); - - /* - * Actual funcationality - */ - int opal_crs_self_checkpoint( pid_t pid, - opal_crs_base_snapshot_t *snapshot, - opal_crs_base_ckpt_options_t *options, - opal_crs_state_type_t *state); - - int opal_crs_self_restart( opal_crs_base_snapshot_t *snapshot, bool spawn_child, pid_t *child_pid); - - int opal_crs_self_disable_checkpoint(void); - int opal_crs_self_enable_checkpoint(void); - - int opal_crs_self_prelaunch(int32_t rank, - char *base_snapshot_dir, - char **app, - char **cwd, - char ***argv, - char ***env); - - int opal_crs_self_reg_thread(void); - - -END_C_DECLS - -#endif /* MCA_CRS_SELF_EXPORT_H */ diff --git a/opal/mca/crs/self/crs_self_component.c b/opal/mca/crs/self/crs_self_component.c deleted file mode 100644 index 7a684fbc986..00000000000 --- a/opal/mca/crs/self/crs_self_component.c +++ /dev/null @@ -1,182 +0,0 @@ -/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ -/* - * Copyright (c) 2004-2009 The Trustees of Indiana University. - * All rights reserved. - * Copyright (c) 2004-2005 The Trustees of the University of Tennessee. - * All rights reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * Copyright (c) 2015 Los Alamos National Security, Inc. All rights - * reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#include "opal_config.h" - -#include "opal/constants.h" -#include "opal/mca/crs/crs.h" -#include "opal/mca/crs/base/base.h" -#include "opal/util/output.h" -#include "crs_self.h" - -/* - * Public string for version number - */ -const char *opal_crs_self_component_version_string = -"OPAL CRS self MCA component version " OPAL_VERSION; - -/* - * Local functionality - */ -static int crs_self_register (void); -static int crs_self_open(void); -static int crs_self_close(void); - -/* - * Instantiate the public struct with all of our public information - * and pointer to our public functions in it - */ -opal_crs_self_component_t mca_crs_self_component = { - /* First do the base component stuff */ - { - /* Handle the general mca_component_t struct containing - * meta information about the component itself - */ - .base_version = { - OPAL_CRS_BASE_VERSION_2_0_0, - - /* Component name and version */ - .mca_component_name = "self", - MCA_BASE_MAKE_VERSION(component, OPAL_MAJOR_VERSION, OPAL_MINOR_VERSION, - OPAL_RELEASE_VERSION), - - /* Component open and close functions */ - .mca_open_component = crs_self_open, - .mca_close_component = crs_self_close, - .mca_query_component = opal_crs_self_component_query, - .mca_register_component_params = crs_self_register, - }, - { - /* The component is checkpoint ready */ - MCA_BASE_METADATA_PARAM_CHECKPOINT - }, - - /* Verbosity level */ - 0, - /* opal_output handler */ - -1 - }, - /* Default prefix */ - PREFIX_DEFAULT, - /* If we are restarting right out of the gate */ - false, - /* Checkpointing enabled */ - true, - /* Callbacks */ - NULL, - NULL, - NULL -}; - -static int crs_self_register (void) -{ - int ret; - - /* Default priority */ - mca_crs_self_component.super.priority = 20; - ret = mca_base_component_var_register (&mca_crs_self_component.super.base_version, - "priority", "Priority of the CRS self component " - "(default: 20)", MCA_BASE_VAR_TYPE_INT, NULL, 0, - MCA_BASE_VAR_FLAG_SETTABLE, - OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_ALL_EQ, - &mca_crs_self_component.super.priority); - if (0 > ret) { - return ret; - } - - mca_crs_self_component.super.verbose = 0; - ret = mca_base_component_var_register (&mca_crs_self_component.super.base_version, - "verbose", - "Verbose level for the CRS self component", - MCA_BASE_VAR_TYPE_INT, NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE, - OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_LOCAL, - &mca_crs_self_component.super.verbose); - if (0 > ret) { - return ret; - } - - /* - * Handler names - */ - mca_crs_self_component.prefix = NULL; - ret = mca_base_component_var_register (&mca_crs_self_component.super.base_version, - "prefix", - "Prefix for user defined callback functions", - MCA_BASE_VAR_TYPE_STRING, NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE, - OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_LOCAL, - &mca_crs_self_component.prefix); - if (0 > ret) { - return ret; - } - - ret = mca_base_component_var_register (&mca_crs_self_component.super.base_version, - "do_restart", - "Start execution by calling restart callback", - MCA_BASE_VAR_TYPE_BOOL, NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE, - OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_LOCAL, - &mca_crs_self_component.do_restart); - return (0 > ret) ? ret : OPAL_SUCCESS; -} - -static int crs_self_open(void) -{ - /* - * This should be the last componet to ever get used since - * it doesn't do anything. - */ - - /* If there is a custom verbose level for this component than use it - * otherwise take our parents level and output channel - */ - if ( 0 != mca_crs_self_component.super.verbose) { - mca_crs_self_component.super.output_handle = opal_output_open(NULL); - opal_output_set_verbosity(mca_crs_self_component.super.output_handle, - mca_crs_self_component.super.verbose); - } else { - mca_crs_self_component.super.output_handle = opal_crs_base_framework.framework_output; - } - - /* - * Debug Output - */ - opal_output_verbose(10, mca_crs_self_component.super.output_handle, - "crs:self: open()"); - opal_output_verbose(20, mca_crs_self_component.super.output_handle, - "crs:self: open: priority = %d", - mca_crs_self_component.super.priority); - opal_output_verbose(20, mca_crs_self_component.super.output_handle, - "crs:self: open: verbosity = %d", - mca_crs_self_component.super.verbose); - opal_output_verbose(20, mca_crs_self_component.super.output_handle, - "crs:self: open: prefix = %s", - mca_crs_self_component.prefix); - opal_output_verbose(20, mca_crs_self_component.super.output_handle, - "crs:self: open: do_restart = %d", - mca_crs_self_component.do_restart); - - return OPAL_SUCCESS; -} - -static int crs_self_close(void) -{ - opal_output_verbose(10, mca_crs_self_component.super.output_handle, - "crs:self: close()"); - - return OPAL_SUCCESS; -} diff --git a/opal/mca/crs/self/crs_self_module.c b/opal/mca/crs/self/crs_self_module.c deleted file mode 100644 index 3d83653a2e2..00000000000 --- a/opal/mca/crs/self/crs_self_module.c +++ /dev/null @@ -1,757 +0,0 @@ -/* - * Copyright (c) 2004-2010 The Trustees of Indiana University. - * All rights reserved. - * Copyright (c) 2004-2005 The Trustees of the University of Tennessee. - * All rights reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * Copyright (c) 2007 Los Alamos National Security, LLC. All rights - * reserved. - * Copyright (c) 2007 Evergrid, Inc. All rights reserved. - * - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#include "opal_config.h" - -#include -#ifdef HAVE_UNISTD_H -#include -#endif /* HAVE_UNISTD_H */ -#ifdef HAVE_STRING_H -#include -#endif -#ifdef HAVE_DLFCN_H -#include -#endif - -#include "opal/util/opal_environ.h" -#include "opal/util/output.h" -#include "opal/util/show_help.h" -#include "opal/util/argv.h" -#include "opal/util/opal_environ.h" - -#include "opal/constants.h" -#include "opal/mca/base/mca_base_var.h" - -#include "opal/mca/crs/crs.h" -#include "opal/mca/crs/base/base.h" -#include "opal/runtime/opal_cr.h" - -#include "crs_self.h" - -/* - * Self module - */ -static opal_crs_base_module_t loc_module = { - /** Initialization Function */ - opal_crs_self_module_init, - /** Finalization Function */ - opal_crs_self_module_finalize, - - /** Checkpoint interface */ - opal_crs_self_checkpoint, - - /** Restart Command Access */ - opal_crs_self_restart, - - /** Disable checkpoints */ - opal_crs_self_disable_checkpoint, - /** Enable checkpoints */ - opal_crs_self_enable_checkpoint, - - /** Prelaunch */ - opal_crs_self_prelaunch, - - /** Register Thread */ - opal_crs_self_reg_thread -}; - -/* - * Snapshot structure - */ -OBJ_CLASS_DECLARATION(opal_crs_self_snapshot_t); - -struct opal_crs_self_snapshot_t { - /** Base CRS snapshot type */ - opal_crs_base_snapshot_t super; - /** Command Line used to restart the app */ - char * cmd_line; -}; -typedef struct opal_crs_self_snapshot_t opal_crs_self_snapshot_t; - -static void opal_crs_self_construct(opal_crs_self_snapshot_t *obj); -static void opal_crs_self_destruct( opal_crs_self_snapshot_t *obj); - -OBJ_CLASS_INSTANCE(opal_crs_self_snapshot_t, - opal_crs_base_snapshot_t, - opal_crs_self_construct, - opal_crs_self_destruct); - - -typedef void (*opal_crs_self_dlsym_dummy_fn_t)(void); - -/************************************ - * Locally Global vars & functions :) - ************************************/ -static int crs_self_find_function(char *prefix, char *suffix, - opal_crs_self_dlsym_dummy_fn_t *fn_ptr); - -static int self_update_snapshot_metadata(opal_crs_self_snapshot_t *snapshot); - -static int opal_crs_self_restart_cmd(opal_crs_self_snapshot_t *snapshot, char **cmd); -static int self_cold_start(opal_crs_self_snapshot_t *snapshot); - -void opal_crs_self_construct(opal_crs_self_snapshot_t *snapshot) -{ - snapshot->cmd_line = NULL; -} - -void opal_crs_self_destruct( opal_crs_self_snapshot_t *snapshot) -{ - if(NULL != snapshot->cmd_line) - free(snapshot->cmd_line); -} - -static int opal_crs_self_extract_callbacks(void); - -/* - * MCA Functions - */ -int opal_crs_self_component_query(mca_base_module_t **module, int *priority) -{ - int ret; - - opal_output_verbose(10, mca_crs_self_component.super.output_handle, - "crs:self: component_query()"); - - /* - * If this is a tool, then return a module with the lowest priority. - * This allows 'mpirun' to select the 'none' component since it has - * a priority higher than 0. - * But also allows 'opal-restart' to select this component if needed - * since it only ever requests that a specific component be opened - * that is defined in the snapshot metadata file. - */ - if( opal_cr_is_tool ) { - *priority = 0; - *module = (mca_base_module_t *)&loc_module; - return OPAL_SUCCESS; - } - - /* - * Extract the user level callbacks if they exist - */ - ret = opal_crs_self_extract_callbacks(); - - if( OPAL_SUCCESS != ret || - !mca_crs_self_component.can_checkpoint ) { - *priority = -1; - *module = NULL; - return OPAL_ERROR; - } - else { - *priority = mca_crs_self_component.super.priority; - *module = (mca_base_module_t *)&loc_module; - return OPAL_SUCCESS; - } -} - -static int opal_crs_self_extract_callbacks(void) -{ - opal_crs_self_dlsym_dummy_fn_t loc_fn; - - /* - * Find the function names - */ - crs_self_find_function(mca_crs_self_component.prefix, - SUFFIX_CHECKPOINT, - &loc_fn); - mca_crs_self_component.ucb_checkpoint_fn = (opal_crs_self_checkpoint_callback_fn_t)loc_fn; - - crs_self_find_function(mca_crs_self_component.prefix, - SUFFIX_CONTINUE, - &loc_fn); - mca_crs_self_component.ucb_continue_fn = (opal_crs_self_continue_callback_fn_t)loc_fn; - - crs_self_find_function(mca_crs_self_component.prefix, - SUFFIX_RESTART, - &loc_fn); - mca_crs_self_component.ucb_restart_fn = (opal_crs_self_restart_callback_fn_t)loc_fn; - - /* - * Sanity check - */ - mca_crs_self_component.can_checkpoint = true; - - if(NULL == mca_crs_self_component.ucb_checkpoint_fn) { - mca_crs_self_component.can_checkpoint = false; - } - if(NULL == mca_crs_self_component.ucb_continue_fn) { - } - if(NULL == mca_crs_self_component.ucb_restart_fn) { - } - - return OPAL_SUCCESS; -} - -int opal_crs_self_module_init(void) -{ - bool callback_matched = true; - - opal_output_verbose(10, mca_crs_self_component.super.output_handle, - "crs:self: module_init()"); - - if( opal_cr_is_tool ) { - return OPAL_SUCCESS; - } - - /* - * Sanity check - */ - if(NULL == mca_crs_self_component.ucb_checkpoint_fn) { - callback_matched = false; - mca_crs_self_component.can_checkpoint = false; - } - if(NULL == mca_crs_self_component.ucb_continue_fn) { - callback_matched = false; - } - if(NULL == mca_crs_self_component.ucb_restart_fn) { - callback_matched = false; - } - if( !callback_matched ) { - if( 1 <= mca_crs_self_component.super.verbose ) { - opal_show_help("help-opal-crs-self.txt", "self:no_callback", false, - "checkpoint", mca_crs_self_component.prefix, SUFFIX_CHECKPOINT, - "continue ", mca_crs_self_component.prefix, SUFFIX_CONTINUE, - "restart ", mca_crs_self_component.prefix, SUFFIX_RESTART, - PREFIX_DEFAULT); - } - } - - /* - * If the user requested that we do_restart, then call their callback - */ - if(mca_crs_self_component.do_restart) { - opal_output_verbose(10, mca_crs_self_component.super.output_handle, - "crs:self: module_init: Call their restart function"); - if( NULL != mca_crs_self_component.ucb_restart_fn) - mca_crs_self_component.ucb_restart_fn(); - } - - return OPAL_SUCCESS; -} - -int opal_crs_self_module_finalize(void) -{ - opal_output_verbose(10, mca_crs_self_component.super.output_handle, - "crs:self: module_finalize()"); - - return OPAL_SUCCESS; -} - - -int opal_crs_self_checkpoint(pid_t pid, - opal_crs_base_snapshot_t *base_snapshot, - opal_crs_base_ckpt_options_t *options, - opal_crs_state_type_t *state) -{ - opal_crs_self_snapshot_t *snapshot = OBJ_NEW(opal_crs_self_snapshot_t); - int ret, exit_status = OPAL_SUCCESS; - char * restart_cmd = NULL; - - /* - * This function should never be called by a tool - */ - if( opal_cr_is_tool ) { - return OPAL_ERR_NOT_SUPPORTED; - } - - if( options->stop ) { - opal_output(0, - "crs:self: checkpoint(): Error: SIGSTOP Not currently supported!"); - } - - /* - * Setup for snapshot directory creation - */ - snapshot->super = *base_snapshot; -#if 0 - snapshot->super.snapshot_directory = strdup(base_snapshot->snapshot_directory); - snapshot->super.metadata_filename = strdup(base_snapshot->metadata_filename); -#endif - - opal_output_verbose(10, mca_crs_self_component.super.output_handle, - "crs:self: checkpoint(%d, ---)", pid); - - if(!mca_crs_self_component.can_checkpoint) { - opal_show_help("help-opal-crs-self.txt", "self:ckpt_disabled", false); - exit_status = OPAL_ERROR; - goto cleanup; - } - - /* - * Update the snapshot metadata - */ - snapshot->super.component_name = strdup(mca_crs_self_component.super.base_version.mca_component_name); - if( NULL == snapshot->super.metadata ) { - if (NULL == (snapshot->super.metadata = fopen(snapshot->super.metadata_filename, "a")) ) { - opal_output(mca_crs_self_component.super.output_handle, - "crs:self: checkpoint(): Error: Unable to open the file (%s)", - snapshot->super.metadata_filename); - exit_status = OPAL_ERROR; - goto cleanup; - } - } - fprintf(snapshot->super.metadata, "%s%s\n", CRS_METADATA_COMP, snapshot->super.component_name); - - /* - * Call the user callback function - */ - if(NULL != mca_crs_self_component.ucb_checkpoint_fn) { - mca_crs_self_component.ucb_checkpoint_fn(&restart_cmd); - } - - /* - * Save the restart command - */ - if( NULL == restart_cmd) { - *state = OPAL_CRS_ERROR; - opal_show_help("help-opal-crs-self.txt", "self:no-restart-cmd", - true); - exit_status = OPAL_ERROR; - goto cleanup; - } - else { - snapshot->cmd_line = strdup(restart_cmd); - - opal_output_verbose(10, mca_crs_self_component.super.output_handle, - "crs:self: checkpoint: Restart Command (%s)", snapshot->cmd_line); - } - - /* - * The best we can do is update the metadata file with the - * application argv and argc we started with. - */ - if( OPAL_SUCCESS != (ret = self_update_snapshot_metadata(snapshot)) ) { - *state = OPAL_CRS_ERROR; - opal_output(mca_crs_self_component.super.output_handle, - "crs:self: checkpoint(): Error: Unable to update metadata for snapshot (%s).", - snapshot->super.metadata_filename); - exit_status = ret; - goto cleanup; - } - - - *state = OPAL_CRS_CONTINUE; - - /* - * Call their continue routine for completeness - */ - if(NULL != mca_crs_self_component.ucb_continue_fn) { - mca_crs_self_component.ucb_continue_fn(); - } - - base_snapshot = &(snapshot->super); - - cleanup: - if( NULL != restart_cmd) { - free(restart_cmd); - restart_cmd = NULL; - } - - return exit_status; -} - -/* - * Notice that the user restart callback is not called here, but always from - * opal_init for the self module. - */ -int opal_crs_self_restart(opal_crs_base_snapshot_t *base_snapshot, bool spawn_child, pid_t *child_pid) -{ - opal_crs_self_snapshot_t *snapshot = OBJ_NEW(opal_crs_self_snapshot_t); - char **cr_argv = NULL; - char * cr_cmd = NULL; - int ret; - int exit_status = OPAL_SUCCESS; - int status; - - snapshot->super = *base_snapshot; - - opal_output_verbose(10, mca_crs_self_component.super.output_handle, - "crs:self: restart(%d)", spawn_child); - - /* - * If we need to reconstruct the snapshot - */ - if(snapshot->super.cold_start) { - if( OPAL_SUCCESS != (ret = self_cold_start(snapshot)) ){ - exit_status = ret; - opal_output(mca_crs_self_component.super.output_handle, - "crs:blcr: blcr_restart: Unable to reconstruct the snapshot."); - goto cleanup; - } - } - - /* - * JJH: Check to make sure the application exists? - */ - - /* - * Get the restart command - */ - if ( OPAL_SUCCESS != (ret = opal_crs_self_restart_cmd(snapshot, &cr_cmd)) ) { - exit_status = ret; - goto cleanup; - } - if ( NULL == (cr_argv = opal_argv_split(cr_cmd, ' ')) ) { - exit_status = OPAL_ERROR; - goto cleanup; - } - - - if (!spawn_child) { - opal_output_verbose(10, mca_crs_self_component.super.output_handle, - "crs:self: self_restart: SELF: exec :(%s, %s):", - strdup(cr_argv[0]), - opal_argv_join(cr_argv, ' ')); - - status = execvp(strdup(cr_argv[0]), cr_argv); - - if(status < 0) { - opal_output(mca_crs_self_component.super.output_handle, - "crs:self: self_restart: SELF: Child failed to execute :(%d):", status); - } - opal_output(mca_crs_self_component.super.output_handle, - "crs:self: self_restart: SELF: execvp returned %d", status); - exit_status = status; - goto cleanup; - } - else { - *child_pid = fork(); - if( *child_pid == 0) { - /* Child Process */ - opal_output_verbose(10, mca_crs_self_component.super.output_handle, - "crs:self: self_restart: CHILD: exec :(%s, %s):", - strdup(cr_argv[0]), - opal_argv_join(cr_argv, ' ')); - - status = execvp(strdup(cr_argv[0]), cr_argv); - - if(status < 0) { - opal_output(mca_crs_self_component.super.output_handle, - "crs:self: self_restart: CHILD: Child failed to execute :(%d):", status); - } - opal_output(mca_crs_self_component.super.output_handle, - "crs:self: self_restart: CHILD: execvp returned %d", status); - exit_status = status; - goto cleanup; - } - else if(*child_pid > 0) { - /* Parent is done once it is started. */ - ; - } - else { - opal_output(mca_crs_self_component.super.output_handle, - "crs:self: self_restart: CHILD: fork failed :(%d):", *child_pid); - } - } - - cleanup: - if( NULL != cr_cmd) - free(cr_cmd); - if( NULL != cr_argv) - opal_argv_free(cr_argv); - - return exit_status; -} - -int opal_crs_self_disable_checkpoint(void) -{ - /* - * This function should never be called by a tool - */ - if( opal_cr_is_tool ) { - return OPAL_ERR_NOT_SUPPORTED; - } - - opal_output_verbose(10, mca_crs_self_component.super.output_handle, - "crs:self: disable_checkpoint()"); - - mca_crs_self_component.can_checkpoint = false; - - return OPAL_SUCCESS; -} - -int opal_crs_self_enable_checkpoint(void) -{ - /* - * This function should never be called by a tool - */ - if( opal_cr_is_tool ) { - return OPAL_ERR_NOT_SUPPORTED; - } - - opal_output_verbose(10, mca_crs_self_component.super.output_handle, - "crs:self: enable_checkpoint()"); - - mca_crs_self_component.can_checkpoint = true; - - return OPAL_SUCCESS; -} - -int opal_crs_self_prelaunch(int32_t rank, - char *base_snapshot_dir, - char **app, - char **cwd, - char ***argv, - char ***env) -{ - char * tmp_env_var = NULL; - - /* - * This function should never be called by a tool - */ - if( opal_cr_is_tool ) { - return OPAL_ERR_NOT_SUPPORTED; - } - - (void) mca_base_var_env_name("opal_cr_is_tool", &tmp_env_var); - opal_setenv(tmp_env_var, - "0", true, env); - free(tmp_env_var); - tmp_env_var = NULL; - - return OPAL_SUCCESS; -} - -int opal_crs_self_reg_thread(void) -{ - /* - * This function should never be called by a tool - */ - if( opal_cr_is_tool ) { - return OPAL_ERR_NOT_SUPPORTED; - } - - return OPAL_SUCCESS; -} - -/****************** - * Local functions - ******************/ -static int crs_self_find_function(char *prefix, char *suffix, - opal_crs_self_dlsym_dummy_fn_t *fn_ptr) { - char *func_to_find = NULL; - - if( NULL == prefix || 0 >= strlen(prefix) ) { - opal_output(mca_crs_self_component.super.output_handle, - "crs:self: crs_self_find_function: Error: prefix is NULL or empty string!"); - *fn_ptr = NULL; - return OPAL_ERROR; - } - if( NULL == suffix || 0 >= strlen(suffix) ) { - opal_output(mca_crs_self_component.super.output_handle, - "crs:self: crs_self_find_function: Error: suffix is NULL or empty string!"); - *fn_ptr = NULL; - return OPAL_ERROR; - } - - opal_output_verbose(10, mca_crs_self_component.super.output_handle, - "crs:self: crs_self_find_function(--, %s, %s)", - prefix, suffix); - - asprintf(&func_to_find, "%s_%s", prefix, suffix); - - /* The RTLD_DEFAULT is a special handle that searches the default libraries - * including the current application for the indicated symbol. This allows - * us to not have to dlopen/dlclose the executable. A bit of short hand - * really. - */ - *((void**) fn_ptr) = dlsym(RTLD_DEFAULT, func_to_find); - if( NULL == fn_ptr) { - opal_output_verbose(12, mca_crs_self_component.super.output_handle, - "crs:self: crs_self_find_function: WARNING: Function \"%s\" not found", - func_to_find); - } - else { - opal_output_verbose(10, mca_crs_self_component.super.output_handle, - "crs:self: crs_self_find_function: Found function \"%s\"", - func_to_find); - } - - if( NULL == func_to_find) { - free(func_to_find); - } - - return OPAL_SUCCESS; -} - -/* - * Self is a special case. The 'fname' here is the command line that the user - * wishes to execute. This function takes this command line and adds - * -mca crs_self_do_restart 1 - * Which will trigger the restart callback once the program has been run. - * - * For example, The user starts their program with: - * $ my_prog arg1 arg2 - * - * They checkpoint it: - * $ opal_checkpoint -mca crs self 1234 - * - * They restart it: - * $ opal_restart -mca crs self my_prog arg1 arg2 - * - * fname is then: - * fname = "my_prog arg1 arg2" - * - * This funciton translates that to the command: - * cmd = "my_prog arg1 arg2 -mca crs self -mca crs_self_do_restart 1" - * - * Which will cause the program "my_prog" to call their restart function - * upon opal_init time. - * - * Note: The user could bypass the opal_restart routine safely by simply calling - * $ my_prog arg1 arg2 -mca crs self -mca crs_self_do_restart 1 - * However, for consistency sake, we should not encourage this as it won't work for - * all of the other checkpointers. - */ -static int opal_crs_self_restart_cmd(opal_crs_self_snapshot_t *snapshot, char **cmd) -{ - char * tmp_env_var = NULL; - - opal_output_verbose(10, mca_crs_self_component.super.output_handle, - "crs:self: restart_cmd(%s, ---)", snapshot->cmd_line); - - (void) mca_base_var_env_name("crs", &tmp_env_var); - opal_setenv(tmp_env_var, - "self", - true, &environ); - free(tmp_env_var); - tmp_env_var = NULL; - - (void) mca_base_var_env_name("crs_self_do_restart", &tmp_env_var); - opal_setenv(tmp_env_var, - "1", - true, &environ); - free(tmp_env_var); - tmp_env_var = NULL; - - (void) mca_base_var_env_name("crs_self_prefix", &tmp_env_var); - opal_setenv(tmp_env_var, - mca_crs_self_component.prefix, - true, &environ); - free(tmp_env_var); - tmp_env_var = NULL; - - /* Instead of adding it to the command line, we should use the environment - * to pass the values. This allow sthe OPAL application to be braindead - * WRT MCA parameters - * add_args = strdup("-mca crs self -mca crs_self_do_restart 1"); - */ - - asprintf(cmd, "%s", snapshot->cmd_line); - - return OPAL_SUCCESS; -} - -static int self_cold_start(opal_crs_self_snapshot_t *snapshot) { - int ret, exit_status = OPAL_SUCCESS; - char **tmp_argv = NULL; - char * component_name = NULL; - int prev_pid; - - opal_output_verbose(10, mca_crs_self_component.super.output_handle, - "crs:self: cold_start()"); - - /* - * Find the snapshot directory, read the metadata file - */ - if( NULL == snapshot->super.metadata ) { - if (NULL == (snapshot->super.metadata = fopen(snapshot->super.metadata_filename, "a")) ) { - opal_output(mca_crs_self_component.super.output_handle, - "crs:self: checkpoint(): Error: Unable to open the file (%s)", - snapshot->super.metadata_filename); - exit_status = OPAL_ERROR; - goto cleanup; - } - } - if( OPAL_SUCCESS != (ret = opal_crs_base_extract_expected_component(snapshot->super.metadata, - &component_name, &prev_pid) ) ) { - opal_output(mca_crs_self_component.super.output_handle, - "crs:self: self_cold_start: Error: Failed to extract the metadata from the local snapshot (%s). Returned %d.", - snapshot->super.metadata_filename, ret); - exit_status = ret; - goto cleanup; - } - - snapshot->super.component_name = strdup(component_name); - - /* Compare the strings to make sure this is our snapshot before going further */ - if ( 0 != strncmp(mca_crs_self_component.super.base_version.mca_component_name, - component_name, strlen(component_name)) ) { - exit_status = OPAL_ERROR; - opal_output(mca_crs_self_component.super.output_handle, - "crs:self: self_cold_start: Error: This snapshot (%s) is not intended for us (%s)\n", - component_name, mca_crs_self_component.super.base_version.mca_component_name); - goto cleanup; - } - - /* - * Restart command - * JJH: Command lines limited to 256 chars. - */ - opal_crs_base_metadata_read_token(snapshot->super.metadata, CRS_METADATA_CONTEXT, &tmp_argv); - if( NULL == tmp_argv ) { - opal_output(mca_crs_self_component.super.output_handle, - "crs:self: self_cold_start: Error: Failed to read the %s token from the local checkpoint in %s", - CRS_METADATA_CONTEXT, snapshot->super.snapshot_directory); - exit_status = OPAL_ERROR; - goto cleanup; - } - asprintf(&snapshot->cmd_line, "%s", tmp_argv[0]); - - /* - * Reset the cold_start flag - */ - snapshot->super.cold_start = false; - - cleanup: - if(NULL != tmp_argv) { - opal_argv_free(tmp_argv); - tmp_argv = NULL; - } - - return exit_status; - -} - -static int self_update_snapshot_metadata(opal_crs_self_snapshot_t *snapshot) { - int exit_status = OPAL_SUCCESS; - - if(NULL == snapshot->cmd_line) { - opal_show_help("help-opal-crs-self.txt", "self:no-restart-cmd", - true); - exit_status = OPAL_ERROR; - goto cleanup; - } - - opal_output_verbose(10, mca_crs_self_component.super.output_handle, - "crs:self: update_snapshot_metadata(%s)", - snapshot->super.metadata_filename); - - /* - * Append to the metadata file the command line to restart with - * - How user wants us to restart - */ - fprintf(snapshot->super.metadata, "%s%s\n", CRS_METADATA_CONTEXT, snapshot->cmd_line); - - cleanup: - return exit_status; -} diff --git a/opal/mca/crs/self/help-opal-crs-self.txt b/opal/mca/crs/self/help-opal-crs-self.txt deleted file mode 100644 index b6eacb6f1e2..00000000000 --- a/opal/mca/crs/self/help-opal-crs-self.txt +++ /dev/null @@ -1,49 +0,0 @@ -# -*- text -*- -# -# Copyright (c) 2004-2009 The Trustees of Indiana University and Indiana -# University Research and Technology -# Corporation. All rights reserved. -# Copyright (c) 2004-2005 The University of Tennessee and The University -# of Tennessee Research Foundation. All rights -# reserved. -# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, -# University of Stuttgart. All rights reserved. -# Copyright (c) 2004-2005 The Regents of the University of California. -# All rights reserved. -# Copyright (c) 2014 Cisco Systems, Inc. All rights reserved. -# $COPYRIGHT$ -# -# Additional copyrights may follow -# -# $HEADER$ -# -# This is the US/English general help file for Open PAL CRS framework. -# -[self:no_callback] -Error: We were unable to find one of the following callbacks: - %s = %s_%s - %s = %s_%s - %s = %s_%s - Possible causes of this problem are: - - Linker did not export all of the proper symbols. - This is usally enabled with a flag such as '-export'. - - The function does not exist in your program. - - If you have a custom prefix for your functions you may specify - them with the 'crs_self_prefix' mca parameter. For example, - $ prog -mca crs_self_prefix my_prefix - This will then look for the following functions: - int my_prefix_checkpoint(void) - int my_prefix_continue(void) - int my_prefix_restart(void) - Otherwise it will search for functions with the default prefix of: - '%s' -# -[self:ckpt_disabled] -Error: The checkpointer is currently disabled. - Unable to proceed with the checkpoint! -# -[self:no-restart-cmd] -Error: The checkpointer was unable to determine how to restart your - application. Specify the restart command in the checkpoint - function. diff --git a/opal/mca/crs/self/owner.txt b/opal/mca/crs/self/owner.txt deleted file mode 100644 index c47a2d510b1..00000000000 --- a/opal/mca/crs/self/owner.txt +++ /dev/null @@ -1,7 +0,0 @@ -# -# owner/status file -# owner: institution that is responsible for this package -# status: e.g. active, maintenance, unmaintained -# -owner: UTK -status: maintenance diff --git a/opal/mca/dl/base/base.h b/opal/mca/dl/base/base.h index 0a0ad79dcf2..af79f26b83c 100644 --- a/opal/mca/dl/base/base.h +++ b/opal/mca/dl/base/base.h @@ -3,6 +3,8 @@ * University Research and Technology * Corporation. All rights reserved. * Copyright (c) 2015 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Los Alamos National Security, LLC. All rights + * reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -16,7 +18,6 @@ #include "opal_config.h" #include "opal/mca/dl/dl.h" #include "opal/util/opal_environ.h" -#include "opal/runtime/opal_cr.h" #include "opal/mca/base/base.h" diff --git a/opal/mca/dl/base/dl_base_select.c b/opal/mca/dl/base/dl_base_select.c index 4abcccdbb0f..8db0b5e99b7 100644 --- a/opal/mca/dl/base/dl_base_select.c +++ b/opal/mca/dl/base/dl_base_select.c @@ -1,9 +1,12 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ /* * Copyright (c) 2004-2010 The Trustees of Indiana University. * All rights reserved. * * Copyright (c) 2015 Cisco Systems, Inc. All rights reserved. * $COPYRIGHT$ + * Copyright (c) 2015 Los Alamos National Security, LLC. All rights + * reserved. * * Additional copyrights may follow * @@ -37,7 +40,7 @@ int opal_dl_base_select(void) opal_dl_base_framework.framework_output, &opal_dl_base_framework.framework_components, (mca_base_module_t **) &best_module, - (mca_base_component_t **) &best_component) ) { + (mca_base_component_t **) &best_component, NULL) ) { /* This will only happen if no component was selected */ exit_status = OPAL_ERROR; goto cleanup; diff --git a/opal/mca/dl/dlopen/dl_dlopen_module.c b/opal/mca/dl/dlopen/dl_dlopen_module.c index 7a7ab2d73c2..192e7af4400 100644 --- a/opal/mca/dl/dlopen/dl_dlopen_module.c +++ b/opal/mca/dl/dlopen/dl_dlopen_module.c @@ -1,5 +1,9 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ /* * Copyright (c) 2015 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Los Alamos National Security, LLC. All rights + * reserved. + * Copyright (c) 2016 IBM Corporation. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -29,7 +33,6 @@ static void do_dlopen(const char *fname, int flags, void **handle, char **err_msg) { - assert(fname); assert(handle); *handle = dlopen(fname, flags); @@ -47,7 +50,6 @@ static void do_dlopen(const char *fname, int flags, static int dlopen_open(const char *fname, bool use_ext, bool private_namespace, opal_dl_handle_t **handle, char **err_msg) { - assert(fname); assert(handle); *handle = NULL; @@ -63,7 +65,7 @@ static int dlopen_open(const char *fname, bool use_ext, bool private_namespace, /* If the caller wants to use filename extensions, loop through them */ void *local_handle = NULL; - if (use_ext) { + if (use_ext && NULL != fname) { int i; char *ext; @@ -106,7 +108,12 @@ static int dlopen_open(const char *fname, bool use_ext, bool private_namespace, (*handle)->dlopen_handle = local_handle; #if OPAL_ENABLE_DEBUG - (*handle)->filename = strdup(fname); + if( NULL != fname ) { + (*handle)->filename = strdup(fname); + } + else { + (*handle)->filename = strdup("(null)"); + } #endif } return (NULL != local_handle) ? OPAL_SUCCESS : OPAL_ERROR; @@ -202,6 +209,7 @@ static int dlopen_foreachfile(const char *search_path, /* Skip libtool files */ if (strcmp(ptr, ".la") == 0 || strcmp(ptr, ".lo") == 0) { + free (abs_name); continue; } diff --git a/opal/mca/dl/libltdl/dl_libltdl_module.c b/opal/mca/dl/libltdl/dl_libltdl_module.c index ce853ac6c49..4704d000984 100644 --- a/opal/mca/dl/libltdl/dl_libltdl_module.c +++ b/opal/mca/dl/libltdl/dl_libltdl_module.c @@ -1,5 +1,6 @@ /* * Copyright (c) 2015 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2016 IBM Corporation. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -18,7 +19,6 @@ static int libltdl_open(const char *fname, bool use_ext, bool private_namespace, opal_dl_handle_t **handle, char **err_msg) { - assert(fname); assert(handle); *handle = NULL; @@ -53,7 +53,12 @@ static int libltdl_open(const char *fname, bool use_ext, bool private_namespace, (*handle)->ltdl_handle = local_handle; #if OPAL_ENABLE_DEBUG - (*handle)->filename = strdup(fname); + if( NULL != fname ) { + (*handle)->filename = strdup(fname); + } + else { + (*handle)->filename = strdup("(null)"); + } #endif return OPAL_SUCCESS; diff --git a/opal/mca/dstore/Makefile.am b/opal/mca/dstore/Makefile.am deleted file mode 100644 index c9a80176153..00000000000 --- a/opal/mca/dstore/Makefile.am +++ /dev/null @@ -1,32 +0,0 @@ -# -# Copyright (c) 2010 Cisco Systems, Inc. All rights reserved. -# $COPYRIGHT$ -# -# Additional copyrights may follow -# -# $HEADER$ -# - -AM_CPPFLAGS = $(LTDLINCL) - -# main library setup -noinst_LTLIBRARIES = libmca_dstore.la -libmca_dstore_la_SOURCES = - -# pkgdata setup -dist_opaldata_DATA = - -# local files -headers = dstore.h dstore_types.h -libmca_dstore_la_SOURCES += $(headers) - -# Conditionally install the header files -if WANT_INSTALL_HEADERS -opaldir = $(opalincludedir)/$(subdir) -nobase_opal_HEADERS = $(headers) -endif - -include base/Makefile.am - -distclean-local: - rm -f base/static-components.h diff --git a/opal/mca/dstore/base/Makefile.am b/opal/mca/dstore/base/Makefile.am deleted file mode 100644 index 0eba3ce4a72..00000000000 --- a/opal/mca/dstore/base/Makefile.am +++ /dev/null @@ -1,20 +0,0 @@ -# -# Copyright (c) 2010 Cisco Systems, Inc. All rights reserved. -# Copyright (c) 2012-2013 Los Alamos National Security, Inc. All rights reserved. -# Copyright (c) 2014 Intel, Inc. All rights reserved. -# $COPYRIGHT$ -# -# Additional copyrights may follow -# -# $HEADER$ -# - -dist_opaldata_DATA += base/help-dstore-base.txt - -headers += \ - base/base.h - -libmca_dstore_la_SOURCES += \ - base/dstore_base_frame.c \ - base/dstore_base_select.c \ - base/dstore_base_stubs.c diff --git a/opal/mca/dstore/base/base.h b/opal/mca/dstore/base/base.h deleted file mode 100644 index b05f2043778..00000000000 --- a/opal/mca/dstore/base/base.h +++ /dev/null @@ -1,131 +0,0 @@ -/* - * Copyright (c) 2010 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2012-2013 Los Alamos National Security, Inc. All rights reserved. - * Copyright (c) 2013-2014 Intel, Inc. All rights reserved. - * Copyright (c) 2014 Research Organization for Information Science - * and Technology (RIST). All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ -/** @file: - */ - -#ifndef MCA_DSTORE_BASE_H -#define MCA_DSTORE_BASE_H - -#include "opal_config.h" -#include "opal/types.h" - -#include "opal/mca/mca.h" -#include "opal/mca/base/mca_base_framework.h" -#include "opal/mca/event/event.h" -#include "opal/class/opal_hash_table.h" -#include "opal/class/opal_list.h" -#include "opal/class/opal_pointer_array.h" -#include "opal/dss/dss.h" -#include "opal/util/proc.h" - -#include "opal/mca/dstore/dstore.h" - -BEGIN_C_DECLS - -OPAL_DECLSPEC extern mca_base_framework_t opal_dstore_base_framework; - -/** - * Select a dstore module - */ -OPAL_DECLSPEC int opal_dstore_base_select(void); - -/* DSTORE is an oddball framework in that it: - * - * has an active storage component that issues handle-specific - * modules. This is done to provide separate storage areas that - * are isolated from each other, and thus don't have to worry - * about overlapping keys - * - * a backfill module used to attempt to retrieve data that has - * been requested, but that the handle-specific storage module - * does not contain. This is used in situations where data has - * not been provided at startup, and we need to retrieve it - * solely on-demand - */ -typedef struct { - opal_dstore_base_component_t *storage_component; - opal_dstore_base_module_t *backfill_module; - opal_pointer_array_t handles; // array of open datastore handles - opal_list_t available_components; -} opal_dstore_base_t; - -OPAL_DECLSPEC extern opal_dstore_base_t opal_dstore_base; - -typedef struct { - opal_object_t super; - char *name; - opal_dstore_base_module_t *module; - opal_dstore_base_component_t *storage_component; -} opal_dstore_handle_t; -OBJ_CLASS_DECLARATION(opal_dstore_handle_t); - -/** - * Data for a particular opal process - * The name association is maintained in the - * proc_data hash table. - */ -typedef struct { - /** Structure can be put on lists (including in hash tables) */ - opal_list_item_t super; - bool loaded; - /* List of opal_value_t structures containing all data - received from this process, sorted by key. */ - opal_list_t data; -} opal_dstore_proc_data_t; -OBJ_CLASS_DECLARATION(opal_dstore_proc_data_t); - -/** - * Attribute structure to update tracker object - * (used in dstore sm component) - */ -typedef struct { - opal_list_item_t super; - uint32_t jobid; - char *connection_info; -} opal_dstore_attr_t; -OBJ_CLASS_DECLARATION(opal_dstore_attr_t); - -typedef struct { - int32_t seg_index; - uint32_t offset; - int32_t data_size; -} meta_info; - -#define META_OFFSET 65536 - -OPAL_DECLSPEC int opal_dstore_base_open(const char *name, char* desired_components, opal_list_t *attrs); -OPAL_DECLSPEC int opal_dstore_base_update(int dstorehandle, opal_list_t *attrs); -OPAL_DECLSPEC int opal_dstore_base_close(int dstorehandle); -OPAL_DECLSPEC int opal_dstore_base_store(int dstorehandle, - const opal_process_name_t *id, - opal_value_t *kv); -OPAL_DECLSPEC int opal_dstore_base_fetch(int dstorehandle, - const opal_process_name_t *id, - const char *key, - opal_list_t *kvs); -OPAL_DECLSPEC int opal_dstore_base_remove_data(int dstorehandle, - const opal_process_name_t *id, - const char *key); -OPAL_DECLSPEC int opal_dstore_base_get_handle(int dstorehandle, void **dhdl); - -/* support */ -OPAL_DECLSPEC opal_dstore_proc_data_t* opal_dstore_base_lookup_proc(opal_proc_table_t *jtable, - opal_process_name_t id, bool create); - -OPAL_DECLSPEC opal_value_t* opal_dstore_base_lookup_keyval(opal_dstore_proc_data_t *proc_data, - const char *key); - - -END_C_DECLS - -#endif diff --git a/opal/mca/dstore/base/dstore_base_frame.c b/opal/mca/dstore/base/dstore_base_frame.c deleted file mode 100644 index 47d2db5313e..00000000000 --- a/opal/mca/dstore/base/dstore_base_frame.c +++ /dev/null @@ -1,150 +0,0 @@ -/* - * Copyright (c) 2010 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2012-2013 Los Alamos National Security, Inc. All rights reserved. - * Copyright (c) 2014-2015 Intel, Inc. All rights reserved. - * Copyright (c) 2014-2015 Research Organization for Information Science - * and Technology (RIST). All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - - -#include "opal_config.h" -#include "opal/constants.h" - -#include "opal/mca/mca.h" -#include "opal/util/output.h" -#include "opal/mca/base/base.h" -#include "opal/dss/dss_types.h" - -#include "opal/mca/dstore/base/base.h" - - -/* - * The following file was created by configure. It contains extern - * dstorements and the definition of an array of pointers to each - * module's public mca_base_module_t struct. - */ - -#include "opal/mca/dstore/base/static-components.h" - -opal_dstore_base_API_t opal_dstore = { - opal_dstore_base_open, - opal_dstore_base_update, - opal_dstore_base_close, - opal_dstore_base_store, - opal_dstore_base_fetch, - opal_dstore_base_remove_data, - opal_dstore_base_get_handle -}; -opal_dstore_base_t opal_dstore_base = {0}; - -int opal_dstore_internal = -1; - -static int opal_dstore_base_frame_close(void) -{ - opal_dstore_handle_t *hdl; - opal_list_item_t *item; - int i; - - /* cycle across all the active dstore handles and let them cleanup - order - * doesn't matter in this case - */ - for (i=0; i < opal_dstore_base.handles.size; i++) { - if (NULL != (hdl = (opal_dstore_handle_t*)opal_pointer_array_get_item(&opal_dstore_base.handles, i))) { - OBJ_RELEASE(hdl); - } - } - OBJ_DESTRUCT(&opal_dstore_base.handles); - - for (item = opal_list_remove_first(&opal_dstore_base.available_components); - NULL != item; - item = opal_list_remove_first(&opal_dstore_base.available_components)) { - OBJ_RELEASE(item); - } - OBJ_DESTRUCT(&opal_dstore_base.available_components); - - /* let the backfill module finalize, should it wish to do so */ - if (NULL != opal_dstore_base.backfill_module && NULL != opal_dstore_base.backfill_module->finalize) { - opal_dstore_base.backfill_module->finalize((struct opal_dstore_base_module_t*)opal_dstore_base.backfill_module); - } - - return mca_base_framework_components_close(&opal_dstore_base_framework, NULL); -} - -static int opal_dstore_base_frame_open(mca_base_open_flag_t flags) -{ - OBJ_CONSTRUCT(&opal_dstore_base.handles, opal_pointer_array_t); - opal_pointer_array_init(&opal_dstore_base.handles, 5, INT_MAX, 1); - - OBJ_CONSTRUCT(&opal_dstore_base.available_components, opal_list_t); - - /* Open up all available components */ - return mca_base_framework_components_open(&opal_dstore_base_framework, flags); -} - -MCA_BASE_FRAMEWORK_DECLARE(opal, dstore, NULL, NULL, - opal_dstore_base_frame_open, - opal_dstore_base_frame_close, - mca_dstore_base_static_components, 0); - -/*** CLASS INSTANCES ***/ -static void hdl_con(opal_dstore_handle_t *p) -{ - p->name = NULL; - p->module = NULL; - p->storage_component = NULL; -} -static void hdl_des(opal_dstore_handle_t *p) -{ - opal_dstore_base_module_t *mod; - - if (NULL != p->name) { - free(p->name); - } - if (NULL != p->module) { - mod = (opal_dstore_base_module_t*)p->module; - if (NULL != mod->finalize) { - mod->finalize((struct opal_dstore_base_module_t*)mod); - } - free(mod); - } -} -OBJ_CLASS_INSTANCE(opal_dstore_handle_t, - opal_object_t, - hdl_con, hdl_des); - -static void proc_data_construct(opal_dstore_proc_data_t *ptr) -{ - ptr->loaded = false; - OBJ_CONSTRUCT(&ptr->data, opal_list_t); -} - -static void proc_data_destruct(opal_dstore_proc_data_t *ptr) -{ - OPAL_LIST_DESTRUCT(&ptr->data); -} -OBJ_CLASS_INSTANCE(opal_dstore_proc_data_t, - opal_list_item_t, - proc_data_construct, - proc_data_destruct); - -static void attr_construct(opal_dstore_attr_t *attr) -{ - attr->connection_info = NULL; -} - -static void attr_destruct(opal_dstore_attr_t *attr) -{ - if (NULL != attr->connection_info) { - free(attr->connection_info); - } -} -OBJ_CLASS_INSTANCE(opal_dstore_attr_t, - opal_list_item_t, - attr_construct, attr_destruct); - - diff --git a/opal/mca/dstore/base/dstore_base_select.c b/opal/mca/dstore/base/dstore_base_select.c deleted file mode 100644 index 8864c51aff6..00000000000 --- a/opal/mca/dstore/base/dstore_base_select.c +++ /dev/null @@ -1,105 +0,0 @@ -/* - * Copyright (c) 2010 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2012-2013 Los Alamos National Security, Inc. All rights reserved. - * Copyright (c) 2013-2014 Intel, Inc. All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#include "opal_config.h" -#include "opal/constants.h" - -#include "opal/class/opal_list.h" -#include "opal/mca/mca.h" -#include "opal/mca/base/base.h" -#include "opal/mca/base/mca_base_component_repository.h" -#include "opal/util/output.h" - -#include "opal/mca/dstore/base/base.h" - -static bool selected = false; - -int -opal_dstore_base_select(void) -{ - mca_base_component_list_item_t *cli, *copy_cli; - mca_base_component_t *cmp; - mca_base_module_t *md; - int priority, cmp_pri, mod_pri; - opal_dstore_base_module_t *mod=NULL; - opal_dstore_base_component_t *comp=NULL; - - if (selected) { - /* ensure we don't do this twice */ - return OPAL_SUCCESS; - } - selected = true; - - /* Query all available components and ask if they have a module */ - cmp_pri = -100000; - mod_pri = -100000; - OPAL_LIST_FOREACH(cli, &opal_dstore_base_framework.framework_components, mca_base_component_list_item_t) { - cmp = (mca_base_component_t*)cli->cli_component; - - opal_output_verbose(5, opal_dstore_base_framework.framework_output, - "mca:dstore:select: checking available component %s", - cmp->mca_component_name); - - /* If there's no query function, skip it */ - if (NULL == cmp->mca_query_component) { - opal_output_verbose(5, opal_dstore_base_framework.framework_output, - "mca:dstore:select: Skipping component [%s]. It does not implement a query function", - cmp->mca_component_name ); - continue; - } - - /* Query the component */ - opal_output_verbose(5, opal_dstore_base_framework.framework_output, - "mca:dstore:select: Querying component [%s]", - cmp->mca_component_name); - - /* If the component reports failure, then skip component - however, - * it is okay to return a NULL module */ - if (OPAL_SUCCESS != cmp->mca_query_component(&md, &priority)) { - opal_output_verbose(5, opal_dstore_base_framework.framework_output, - "mca:dstore:select: Skipping component [%s] - not available", - cmp->mca_component_name ); - continue; - } - - copy_cli = OBJ_NEW(mca_base_component_list_item_t); - if (NULL != copy_cli) { - copy_cli->cli_component = cmp; - opal_list_append(&opal_dstore_base.available_components, (opal_list_item_t *)copy_cli); - } - /* track the highest priority component that returned a NULL module - this - * will become our storage element */ - if (NULL == md) { - if (0 < priority && priority > cmp_pri) { - comp = (opal_dstore_base_component_t*)cmp; - cmp_pri = priority; - } - } else { - /* track the highest priority module that was returned - this - * will become our backfill element */ - if (priority > mod_pri) { - mod = (opal_dstore_base_module_t*)md; - mod_pri = priority; - } - } - } - - if (NULL == comp) { - /* no components available - that's bad */ - return OPAL_ERROR; - } - opal_dstore_base.storage_component = comp; - - /* it's okay not to have a backfill module */ - opal_dstore_base.backfill_module = mod; - - return OPAL_SUCCESS;; -} diff --git a/opal/mca/dstore/base/dstore_base_stubs.c b/opal/mca/dstore/base/dstore_base_stubs.c deleted file mode 100644 index ef92897f731..00000000000 --- a/opal/mca/dstore/base/dstore_base_stubs.c +++ /dev/null @@ -1,276 +0,0 @@ -/* - * Copyright (c) 2012-2013 Los Alamos National Security, Inc. All rights reserved. - * Copyright (c) 2013-2015 Intel Inc. All rights reserved - * Copyright (c) 2014 Mellanox Technologies, Inc. - * All rights reserved. - * Copyright (c) 2014 Research Organization for Information Science - * and Technology (RIST). All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - - -#include "opal_config.h" -#include "opal/constants.h" -#include "opal_stdint.h" - -#include "opal/mca/mca.h" -#include "opal/util/argv.h" -#include "opal/util/error.h" -#include "opal/util/output.h" -#include "opal/mca/base/base.h" -#include "opal/dss/dss_types.h" - -#include "opal/mca/dstore/base/base.h" - - -int opal_dstore_base_open(const char *name, char* desired_components, opal_list_t *attrs) -{ - opal_dstore_handle_t *hdl; - int index; - opal_dstore_base_module_t *mod; - int i; - mca_base_component_list_item_t* cli; - char** tokens; - - if (NULL != desired_components) { - tokens = opal_argv_split(desired_components, ','); - for (i = 0; NULL != tokens[i]; i++) { - OPAL_LIST_FOREACH(cli, &opal_dstore_base.available_components, mca_base_component_list_item_t) { - if (0 == strncmp(tokens[i], cli->cli_component->mca_component_name, strlen(tokens[i]))) { - if (NULL != ((opal_dstore_base_component_t*)cli->cli_component)->create_handle && NULL != (mod = ((opal_dstore_base_component_t*)cli->cli_component)->create_handle(attrs))) { - /* have our module, so create a new dstore_handle */ - hdl = OBJ_NEW(opal_dstore_handle_t); - if (NULL != name) { - hdl->name = strdup(name); - } - hdl->module = mod; - hdl->storage_component = (opal_dstore_base_component_t*)cli->cli_component; - if (0 > (index = opal_pointer_array_add(&opal_dstore_base.handles, hdl))) { - OPAL_ERROR_LOG(index); - OBJ_RELEASE(hdl); - } - opal_argv_free(tokens); - opal_output_verbose(1, opal_dstore_base_framework.framework_output, - "Created handle for %s dstore to component %s", - (NULL == hdl->name) ? "NULL" : hdl->name, - cli->cli_component->mca_component_name); - return index; - } - } - } - } - opal_argv_free(tokens); - } else { - OPAL_LIST_FOREACH(cli, &opal_dstore_base.available_components, mca_base_component_list_item_t) { - if (NULL != ((opal_dstore_base_component_t*)cli->cli_component)->create_handle && NULL != (mod = ((opal_dstore_base_component_t*)cli->cli_component)->create_handle(attrs))) { - /* have our module, so create a new dstore_handle */ - hdl = OBJ_NEW(opal_dstore_handle_t); - if (NULL != name) { - hdl->name = strdup(name); - } - hdl->module = mod; - hdl->storage_component = (opal_dstore_base_component_t*)cli->cli_component; - if (0 > (index = opal_pointer_array_add(&opal_dstore_base.handles, hdl))) { - OPAL_ERROR_LOG(index); - OBJ_RELEASE(hdl); - } - opal_output_verbose(1, opal_dstore_base_framework.framework_output, - "Created handle for %s dstore to component %s", - (NULL == hdl->name) ? "NULL" : hdl->name, - cli->cli_component->mca_component_name); - return index; - } - } - } - - /* if we get here, then we were unable to create a module - * for this scope - */ - return OPAL_ERROR; -} - -int opal_dstore_base_update(int dstorehandle, opal_list_t *attrs) -{ - int rc; - opal_dstore_handle_t *hdl; - - if (dstorehandle < 0) { - return OPAL_ERR_NOT_INITIALIZED; - } - - if (NULL == (hdl = (opal_dstore_handle_t*)opal_pointer_array_get_item(&opal_dstore_base.handles, dstorehandle))) { - OPAL_ERROR_LOG(OPAL_ERR_NOT_FOUND); - return OPAL_ERR_NOT_FOUND; - } - - if (NULL == hdl->storage_component->update_handle) { - return OPAL_SUCCESS; - } - - if (OPAL_SUCCESS != (rc = hdl->storage_component->update_handle(dstorehandle, attrs))) { - OPAL_ERROR_LOG(rc); - } - - return rc; -} - -int opal_dstore_base_close(int dstorehandle) -{ - opal_dstore_handle_t *hdl; - int i; - - /* if the handle is -1, then close all handles */ - if (dstorehandle < 0) { - for (i=0; i < opal_dstore_base.handles.size; i++) { - if (NULL != (hdl = (opal_dstore_handle_t*)opal_pointer_array_get_item(&opal_dstore_base.handles, i))) { - OBJ_RELEASE(hdl); - opal_pointer_array_set_item(&opal_dstore_base.handles, i, NULL); - } - } - return OPAL_SUCCESS; - } - - /* get the datastore handle */ - if (NULL == (hdl = (opal_dstore_handle_t*)opal_pointer_array_get_item(&opal_dstore_base.handles, dstorehandle))) { - return OPAL_ERR_NOT_FOUND; - } - opal_pointer_array_set_item(&opal_dstore_base.handles, dstorehandle, NULL); - /* release the handle - this will also finalize and free the module */ - OBJ_RELEASE(hdl); - - return OPAL_SUCCESS; -} - - -int opal_dstore_base_store(int dstorehandle, - const opal_process_name_t *id, - opal_value_t *kv) -{ - opal_dstore_handle_t *hdl; - - if (dstorehandle < 0) { - return OPAL_ERR_NOT_INITIALIZED; - } - - if (NULL == (hdl = (opal_dstore_handle_t*)opal_pointer_array_get_item(&opal_dstore_base.handles, dstorehandle))) { - OPAL_ERROR_LOG(OPAL_ERR_NOT_FOUND); - return OPAL_ERR_NOT_FOUND; - } - - opal_output_verbose(1, opal_dstore_base_framework.framework_output, - "storing data in %s dstore", (NULL == hdl->name) ? "NULL" : hdl->name); - - return hdl->module->store((struct opal_dstore_base_module_t*)hdl->module, id, kv); -} - -int opal_dstore_base_fetch(int dstorehandle, - const opal_process_name_t *id, - const char *key, - opal_list_t *kvs) -{ - opal_dstore_handle_t *hdl; - int rc; - - if (dstorehandle < 0) { - return OPAL_ERR_NOT_INITIALIZED; - } - - if (NULL == (hdl = (opal_dstore_handle_t*)opal_pointer_array_get_item(&opal_dstore_base.handles, dstorehandle))) { - OPAL_ERROR_LOG(OPAL_ERR_NOT_FOUND); - return OPAL_ERR_NOT_FOUND; - } - - opal_output_verbose(1, opal_dstore_base_framework.framework_output, - "fetching data from %s dstore", (NULL == hdl->name) ? "NULL" : hdl->name); - - if (OPAL_SUCCESS == (rc = hdl->module->fetch((struct opal_dstore_base_module_t*)hdl->module, id, key, kvs))) { - /* found the data, so we can just return it */ - return rc; - } - - /* if the storage module didn't find it, then let the backfill module try - * to retrieve it if we have one */ - if (NULL != opal_dstore_base.backfill_module) { - rc = opal_dstore_base.backfill_module->fetch((struct opal_dstore_base_module_t*)opal_dstore_base.backfill_module, id, key, kvs); - } - return rc; -} - -int opal_dstore_base_remove_data(int dstorehandle, - const opal_process_name_t *id, - const char *key) -{ - opal_dstore_handle_t *hdl; - - if (dstorehandle < 0) { - return OPAL_ERR_NOT_INITIALIZED; - } - - if (NULL == (hdl = (opal_dstore_handle_t*)opal_pointer_array_get_item(&opal_dstore_base.handles, dstorehandle))) { - OPAL_ERROR_LOG(OPAL_ERR_NOT_FOUND); - return OPAL_ERR_NOT_FOUND; - } - - opal_output_verbose(1, opal_dstore_base_framework.framework_output, - "removing data from %s dstore", (NULL == hdl->name) ? "NULL" : hdl->name); - - return hdl->module->remove((struct opal_dstore_base_module_t*)hdl->module, id, key); -} - -int opal_dstore_base_get_handle(int dstorehandle, void **dhdl) -{ - opal_dstore_handle_t *hdl; - - if (NULL == (hdl = (opal_dstore_handle_t*)opal_pointer_array_get_item(&opal_dstore_base.handles, dstorehandle))) { - OPAL_ERROR_LOG(OPAL_ERR_NOT_FOUND); - return OPAL_ERR_NOT_FOUND; - } - - *dhdl = (void*)hdl; - return OPAL_SUCCESS; -} - -/** - * Find data for a given key in a given proc_data_t - * container. - */ -opal_value_t* opal_dstore_base_lookup_keyval(opal_dstore_proc_data_t *proc_data, - const char *key) -{ - opal_value_t *kv; - - OPAL_LIST_FOREACH(kv, &proc_data->data, opal_value_t) { - if (0 == strcmp(key, kv->key)) { - return kv; - } - } - return NULL; -} - - -/** - * Find proc_data_t container associated with given - * opal_process_name_t. - */ -opal_dstore_proc_data_t* opal_dstore_base_lookup_proc(opal_proc_table_t *ptable, - opal_process_name_t id, bool create) -{ - opal_dstore_proc_data_t *proc_data = NULL; - - opal_proc_table_get_value(ptable, id, (void**)&proc_data); - if (NULL == proc_data && create) { - proc_data = OBJ_NEW(opal_dstore_proc_data_t); - if (NULL == proc_data) { - opal_output(0, "dstore:hash:lookup_opal_proc: unable to allocate proc_data_t\n"); - return NULL; - } - opal_proc_table_set_value(ptable, id, proc_data); - } - - return proc_data; -} - diff --git a/opal/mca/dstore/base/help-dstore-base.txt b/opal/mca/dstore/base/help-dstore-base.txt deleted file mode 100644 index f603f2eaf31..00000000000 --- a/opal/mca/dstore/base/help-dstore-base.txt +++ /dev/null @@ -1,19 +0,0 @@ - -*- text -*- -# -# Copyright (c) 2010 Cisco Systems, Inc. All rights reserved. -# -# $COPYRIGHT$ -# -# Additional copyrights may follow -# -# $HEADER$ -# -# This is the US/English general help file for OPAL Errmgr HNP module. -# -[errmgr-hnp:unknown-job-error] -An error has occurred in an unknown job. This generally should not happen -except due to an internal OPAL error. - -Job state: %s - -This information should probably be repopald to the OMPI developers. diff --git a/opal/mca/dstore/base/owner.txt b/opal/mca/dstore/base/owner.txt deleted file mode 100644 index e6150b6b0fc..00000000000 --- a/opal/mca/dstore/base/owner.txt +++ /dev/null @@ -1,7 +0,0 @@ -# -# owner/status file -# owner: institution that is responsible for this package -# status: e.g. active, maintenance, unmaintained -# -owner: project -status: active diff --git a/opal/mca/dstore/dstore.h b/opal/mca/dstore/dstore.h deleted file mode 100644 index 198a56b4ed3..00000000000 --- a/opal/mca/dstore/dstore.h +++ /dev/null @@ -1,222 +0,0 @@ -/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ -/* - * Copyright (c) 2010 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2012-2014 Los Alamos National Security, Inc. All rights - * reserved. - * Copyright (c) 2013-2014 Intel, Inc. All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ -/** @file: - * - * The Database Framework - used for internal storage of - * information relating to modex and other OMPI operations - * - */ - -#ifndef OPAL_DB_H -#define OPAL_DB_H - -#include "opal_config.h" -#include "opal/types.h" - -#include "opal/mca/mca.h" -#include "opal/mca/event/event.h" -#include "opal/dss/dss_types.h" -#include "opal/util/proc.h" - -#include "opal/mca/dstore/dstore_types.h" - -/** - * DATABASE DESIGN - * - * Each API function is treated as blocking. - * - */ - -BEGIN_C_DECLS - -/* declare a global handle until such time - * as someone figures out how to separate the various - * datastore channels - */ -OPAL_DECLSPEC extern int opal_dstore_peer; -OPAL_DECLSPEC extern int opal_dstore_internal; -OPAL_DECLSPEC extern int opal_dstore_nonpeer; - -/**** DEFINE THE PUBLIC API'S ****/ -/* - * Open a database - * - * Open a database for access. The name field is purely for - * debug purposes and has no implementation relevance. - * Just like the standard POSIX file open, the call will return - * a unique "handle" that must be provided with any subsequent - * call to store or fetch data from this database. - * - * The attributes parameter can be used to pass any desired - * optional directives to the active storage component. These - * are passed as a list of opal_value_t's. - * - * NOTE: calls to these APIs must be thread-protected as there - * is NO internal thread safety. - */ -typedef int (*opal_dstore_base_API_open_fn_t)(const char *name, char* desired_components, - opal_list_t *attributes); - -/* - * Update an existing handle - * - * Sometimes an existing handle requires an update to its attributes, so - * provide an API for doing so - */ -typedef int (*opal_dstore_base_API_update_fn_t)(int dstorehandle, - opal_list_t *attributes); - -/* - * Close a database handle - * - * Close the specified database handle. A -1 handle indicates - * that ALL open database handles are to be closed. - */ -typedef int (*opal_dstore_base_API_close_fn_t)(int dstorehandle); - -/* - * Store a data value against the primary key - overwrites any data - * of matching key that is already present. The data is copied into the database - * and therefore does not need to be preserved by the caller. - */ -typedef int (*opal_dstore_base_API_store_fn_t)(int dstorehandle, - const opal_process_name_t *id, - opal_value_t *kv); - -/* - * Retrieve data - * - * Retrieve data for the given primary key associated with the specified key. Wildcards - * are supported here as well. Caller is responsible for releasing the returned list - * of opal_value_t objects. - */ -typedef int (*opal_dstore_base_API_fetch_fn_t)(int dstorehandle, - const opal_process_name_t *id, - const char *key, - opal_list_t *kvs); - -/* - * Delete data - * - * Delete the data for the given primary key that is associated with the specified key. - * If a NULL key is provided, all data for the given primary key will be deleted. - */ -typedef int (*opal_dstore_base_API_remove_fn_t)(int dstorehandle, - const opal_process_name_t *id, - const char *key); - - -/* - * Get active dstore handle - * Get dstore handle asocciated with the passed id. - */ -typedef int (*opal_dstore_base_API_get_handle_fn_t)(int dstorehandle, void **dhdl); - - -/* - * the standard public API data structure - */ -typedef struct { - opal_dstore_base_API_open_fn_t open; - opal_dstore_base_API_update_fn_t update; - opal_dstore_base_API_close_fn_t close; - opal_dstore_base_API_store_fn_t store; - opal_dstore_base_API_fetch_fn_t fetch; - opal_dstore_base_API_remove_fn_t remove; - opal_dstore_base_API_get_handle_fn_t get_handle; -} opal_dstore_base_API_t; - - - -/**** DEFINE THE MODULE API'S ****/ -/* Note that each datastore handle will be associated with - * a single active module. Thus, storing and fetching data - * from that module does not require that we pass in the - * handle itself. - * - * NOTE: the call to actually store/fetch data in a given - * datastore handle must be protected against threaded operations - * as there is NO thread protection inside the various modules. - */ -struct opal_dstore_base_module_t; - -/* - * Initialize the module - */ -typedef int (*opal_dstore_base_module_init_fn_t)(struct opal_dstore_base_module_t *mod); - -/* - * Finalize the module - */ -typedef void (*opal_dstore_base_module_finalize_fn_t)(struct opal_dstore_base_module_t *mod); - -/* store the data in this module */ -typedef int (*opal_dstore_base_module_store_fn_t)(struct opal_dstore_base_module_t *mod, - const opal_process_name_t *id, - opal_value_t *kv); - -/* fetch data from the module */ -typedef int (*opal_dstore_base_module_fetch_fn_t)(struct opal_dstore_base_module_t *mod, - const opal_process_name_t *id, - const char *key, - opal_list_t *kvs); - -/* remove data */ -typedef int (*opal_dstore_base_module_remove_fn_t)(struct opal_dstore_base_module_t *mod, - const opal_process_name_t *id, - const char *key); - -/* - * the standard module data structure - */ -typedef struct { - opal_dstore_base_module_init_fn_t init; - opal_dstore_base_module_finalize_fn_t finalize; - opal_dstore_base_module_store_fn_t store; - opal_dstore_base_module_fetch_fn_t fetch; - opal_dstore_base_module_remove_fn_t remove; -} opal_dstore_base_module_t; - -/* - * the component data structure - */ - -/* create and return a datastore module */ -typedef opal_dstore_base_module_t* (*mca_dstore_base_component_create_hdl_fn_t)(opal_list_t *attributes); - -/* update an existing handle */ -typedef int (*mca_dstore_base_component_update_hdl_fn_t)(int hdl, opal_list_t *attributes); - -/* provide a chance for the component to finalize */ -typedef void (*mca_dstore_base_component_finalize_fn_t)(void); - -typedef struct { - mca_base_component_t base_version; - mca_base_component_data_t base_data; - mca_dstore_base_component_create_hdl_fn_t create_handle; - mca_dstore_base_component_update_hdl_fn_t update_handle; - mca_dstore_base_component_finalize_fn_t finalize; -} opal_dstore_base_component_t; - -/* - * Macro for use in components that are of type dstore - */ -#define OPAL_DSTORE_BASE_VERSION_2_0_0 \ - OPAL_MCA_BASE_VERSION_2_1_0("dstore", 2, 0, 0) - -/* Global structure for accessing store functions */ -OPAL_DECLSPEC extern opal_dstore_base_API_t opal_dstore; /* holds base function pointers */ - -END_C_DECLS - -#endif diff --git a/opal/mca/dstore/dstore_types.h b/opal/mca/dstore/dstore_types.h deleted file mode 100644 index 3a957c394a4..00000000000 --- a/opal/mca/dstore/dstore_types.h +++ /dev/null @@ -1,66 +0,0 @@ -/* - * Copyright (c) 2012-2013 Los Alamos National Security, Inc. All rights reserved. - * Copyright (c) 2014 Intel, Inc. All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ -/** @file: - * - * The OPAL Database Framework - * - */ - -#ifndef OPAL_DSTORE_TYPES_H -#define OPAL_DSTORE_TYPES_H - -#include "opal_config.h" -#include "opal/types.h" - -#include "opal/dss/dss_types.h" -#include "opal/mca/pmix/pmix.h" - -BEGIN_C_DECLS - -/* some values are provided by an external entity such - * as the resource manager. These values enter the - * system via the PMIx interface at startup, but are - * not explicitly retrieved by processes. Instead, procs - * access them after RTE-init has stored them. For ease-of-use, - * we define equivalent dstore names here. PMIx attributes - * not listed here should be directly accessed via the - * OPAL pmix framework */ -#define OPAL_DSTORE_CPUSET PMIX_CPUSET -#define OPAL_DSTORE_CREDENTIAL PMIX_CREDENTIAL -#define OPAL_DSTORE_TMPDIR PMIX_TMPDIR -#define OPAL_DSTORE_JOBID PMIX_JOBID -#define OPAL_DSTORE_APPNUM PMIX_APPNUM -#define OPAL_DSTORE_RANK PMIX_RANK -#define OPAL_DSTORE_GLOBAL_RANK PMIX_GLOBAL_RANK -#define OPAL_DSTORE_LOCALRANK PMIX_LOCAL_RANK -#define OPAL_DSTORE_NODERANK PMIX_NODE_RANK -#define OPAL_DSTORE_LOCALLDR PMIX_LOCALLDR -#define OPAL_DSTORE_APPLDR PMIX_APPLDR -#define OPAL_DSTORE_LOCAL_PEERS PMIX_LOCAL_PEERS -#define OPAL_DSTORE_UNIV_SIZE PMIX_UNIV_SIZE -#define OPAL_DSTORE_JOB_SIZE PMIX_JOB_SIZE -#define OPAL_DSTORE_LOCAL_SIZE PMIX_LOCAL_SIZE -#define OPAL_DSTORE_NODE_SIZE PMIX_NODE_SIZE -#define OPAL_DSTORE_MAX_PROCS PMIX_MAX_PROCS -#define OPAL_DSTORE_NPROC_OFFSET PMIX_NPROC_OFFSET -#define OPAL_DSTORE_HOSTNAME PMIX_HOSTNAME -#define OPAL_DSTORE_NODEID PMIX_NODE_ID - -/* some OPAL-appropriate key definitions */ -#define OPAL_DSTORE_LOCALITY "opal.locality" // (uint16_t) relative locality of a peer -/* proc-specific scratch dirs */ -#define OPAL_DSTORE_JOB_SDIR "opal.job.session.dir" // (char*) job-level session dir -#define OPAL_DSTORE_MY_SDIR "opal.my.session.dir" // (char*) session dir for this proc -#define OPAL_DSTORE_URI "opal.uri" // (char*) uri of specified proc -#define OPAL_DSTORE_ARCH "opal.arch" // (uint32_t) arch for specified proc - -END_C_DECLS - -#endif diff --git a/opal/mca/dstore/hash/Makefile.am b/opal/mca/dstore/hash/Makefile.am deleted file mode 100644 index eb935eb0cc2..00000000000 --- a/opal/mca/dstore/hash/Makefile.am +++ /dev/null @@ -1,36 +0,0 @@ -# -# Copyright (c) 2010 Cisco Systems, Inc. All rights reserved. -# Copyright (c) 2012-2013 Los Alamos National Security, Inc. All rights reserved. -# $COPYRIGHT$ -# -# Additional copyrights may follow -# -# $HEADER$ -# - -sources = \ - dstore_hash.h \ - dstore_hash_component.c \ - dstore_hash.c - -# Make the output library in this directory, and name it either -# mca__.la (for DSO builds) or libmca__.la -# (for static builds). - -if MCA_BUILD_opal_dstore_hash_DSO -component_noinst = -component_install = mca_dstore_hash.la -else -component_noinst = libmca_dstore_hash.la -component_install = -endif - -mcacomponentdir = $(opallibdir) -mcacomponent_LTLIBRARIES = $(component_install) -mca_dstore_hash_la_SOURCES = $(sources) -mca_dstore_hash_la_LDFLAGS = -module -avoid-version -mca_dstore_hash_la_LIBADD = $(dstore_hash_LIBS) - -noinst_LTLIBRARIES = $(component_noinst) -libmca_dstore_hash_la_SOURCES =$(sources) -libmca_dstore_hash_la_LDFLAGS = -module -avoid-version diff --git a/opal/mca/dstore/hash/dstore_hash.c b/opal/mca/dstore/hash/dstore_hash.c deleted file mode 100644 index 6a749038354..00000000000 --- a/opal/mca/dstore/hash/dstore_hash.c +++ /dev/null @@ -1,270 +0,0 @@ -/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ -/* - * Copyright (c) 2010 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2004-2011 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2011-2014 Los Alamos National Security, LLC. All rights - * reserved. - * Copyright (c) 2014 Intel, Inc. All rights reserved. - * Copyright (c) 2014 Research Organization for Information Science - * and Technology (RIST). All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - * - */ - -#include "opal_config.h" -#include "opal/constants.h" - -#include -#include - -#include "opal_stdint.h" -#include "opal/class/opal_hash_table.h" -#include "opal/class/opal_pointer_array.h" -#include "opal/dss/dss_types.h" -#include "opal/util/error.h" -#include "opal/util/output.h" -#include "opal/util/proc.h" -#include "opal/util/show_help.h" - -#include "opal/mca/dstore/base/base.h" -#include "dstore_hash.h" - -static int init(struct opal_dstore_base_module_t *imod); -static void finalize(struct opal_dstore_base_module_t *imod); -static int store(struct opal_dstore_base_module_t *imod, - const opal_process_name_t *proc, - opal_value_t *val); -static int fetch(struct opal_dstore_base_module_t *imod, - const opal_process_name_t *proc, - const char *key, - opal_list_t *kvs); -static int remove_data(struct opal_dstore_base_module_t *imod, - const opal_process_name_t *proc, const char *key); - -mca_dstore_hash_module_t opal_dstore_hash_module = { - { - init, - finalize, - store, - fetch, - remove_data - } -}; - -/* Initialize our hash table */ -static int init(struct opal_dstore_base_module_t *imod) -{ - mca_dstore_hash_module_t *mod; - - mod = (mca_dstore_hash_module_t*)imod; - OBJ_CONSTRUCT(&mod->ptable, opal_proc_table_t); - opal_proc_table_init(&mod->ptable, 16, 256); - return OPAL_SUCCESS; -} - -static void finalize(struct opal_dstore_base_module_t *imod) -{ - opal_dstore_proc_data_t *proc_data; - opal_process_name_t key; - void *node1, *node2; - mca_dstore_hash_module_t *mod; - - mod = (mca_dstore_hash_module_t*)imod; - - /* to assist in getting a clean valgrind, cycle thru the hash table - * and release all data stored in it - */ - if (OPAL_SUCCESS == opal_proc_table_get_first_key(&mod->ptable, &key, - (void**)&proc_data, - &node1, &node2)) { - if (NULL != proc_data) { - OBJ_RELEASE(proc_data); - } - while (OPAL_SUCCESS == opal_proc_table_get_next_key(&mod->ptable, &key, - (void**)&proc_data, - node1, &node1, - node2, &node2)) { - if (NULL != proc_data) { - OBJ_RELEASE(proc_data); - } - } - } - OBJ_DESTRUCT(&mod->ptable); -} - - - -static int store(struct opal_dstore_base_module_t *imod, - const opal_process_name_t *id, - opal_value_t *val) -{ - opal_dstore_proc_data_t *proc_data; - opal_value_t *kv; - mca_dstore_hash_module_t *mod; - int rc; - - mod = (mca_dstore_hash_module_t*)imod; - - opal_output_verbose(1, opal_dstore_base_framework.framework_output, - "%s dstore:hash:store storing data for proc %s", - OPAL_NAME_PRINT(OPAL_PROC_MY_NAME), OPAL_NAME_PRINT(*id)); - - /* lookup the proc data object for this proc */ - if (NULL == (proc_data = opal_dstore_base_lookup_proc(&mod->ptable, *id, true))) { - /* unrecoverable error */ - OPAL_OUTPUT_VERBOSE((5, opal_dstore_base_framework.framework_output, - "%s dstore:hash:store: storing data for proc %s unrecoverably failed", - OPAL_NAME_PRINT(OPAL_PROC_MY_NAME), OPAL_NAME_PRINT(*id))); - return OPAL_ERR_OUT_OF_RESOURCE; - } - - /* see if we already have this key in the data - means we are updating - * a pre-existing value - */ - kv = opal_dstore_base_lookup_keyval(proc_data, val->key); -#if OPAL_ENABLE_DEBUG - char *_data_type = opal_dss.lookup_data_type(val->type); - OPAL_OUTPUT_VERBOSE((5, opal_dstore_base_framework.framework_output, - "%s dstore:hash:store: %s key %s[%s] for proc %s", - OPAL_NAME_PRINT(OPAL_PROC_MY_NAME), - (NULL == kv ? "storing" : "updating"), - val->key, _data_type, OPAL_NAME_PRINT(*id))); - free (_data_type); -#endif - - if (NULL != kv) { - opal_list_remove_item(&proc_data->data, &kv->super); - OBJ_RELEASE(kv); - } - /* create the copy */ - if (OPAL_SUCCESS != (rc = opal_dss.copy((void**)&kv, val, OPAL_VALUE))) { - OPAL_ERROR_LOG(rc); - return rc; - } - opal_list_append(&proc_data->data, &kv->super); - - return OPAL_SUCCESS; -} - -static int fetch(struct opal_dstore_base_module_t *imod, - const opal_process_name_t *id, - const char *key, opal_list_t *kvs) -{ - opal_dstore_proc_data_t *proc_data; - opal_value_t *kv, *knew; - mca_dstore_hash_module_t *mod; - int rc; - - mod = (mca_dstore_hash_module_t*)imod; - - OPAL_OUTPUT_VERBOSE((5, opal_dstore_base_framework.framework_output, - "%s dstore:hash:fetch: searching for key %s on proc %s", - OPAL_NAME_PRINT(OPAL_PROC_MY_NAME), - (NULL == key) ? "NULL" : key, OPAL_NAME_PRINT(*id))); - - /* lookup the proc data object for this proc */ - if (NULL == (proc_data = opal_dstore_base_lookup_proc(&mod->ptable, *id, true))) { - OPAL_OUTPUT_VERBOSE((5, opal_dstore_base_framework.framework_output, - "%s dstore_hash:fetch data for proc %s not found", - OPAL_NAME_PRINT(OPAL_PROC_MY_NAME), - OPAL_NAME_PRINT(*id))); - return OPAL_ERR_NOT_FOUND; - } - - /* if the key is NULL, that we want everything */ - if (NULL == key) { - /* must provide an output list or this makes no sense */ - if (NULL == kvs) { - OPAL_ERROR_LOG(OPAL_ERR_BAD_PARAM); - return OPAL_ERR_BAD_PARAM; - } - OPAL_LIST_FOREACH(kv, &proc_data->data, opal_value_t) { - /* copy the value */ - if (OPAL_SUCCESS != (rc = opal_dss.copy((void**)&knew, kv, OPAL_VALUE))) { - OPAL_ERROR_LOG(rc); - return rc; - } - OPAL_OUTPUT_VERBOSE((5, opal_dstore_base_framework.framework_output, - "%s dstore:hash:fetch: adding data for key %s on proc %s", - OPAL_NAME_PRINT(OPAL_PROC_MY_NAME), - (NULL == kv->key) ? "NULL" : kv->key, - OPAL_NAME_PRINT(*id))); - - /* add it to the output list */ - opal_list_append(kvs, &knew->super); - } - return OPAL_SUCCESS; - } - - /* find the value */ - if (NULL == (kv = opal_dstore_base_lookup_keyval(proc_data, key))) { - OPAL_OUTPUT_VERBOSE((5, opal_dstore_base_framework.framework_output, - "%s dstore_hash:fetch key %s for proc %s not found", - OPAL_NAME_PRINT(OPAL_PROC_MY_NAME), - (NULL == key) ? "NULL" : key, - OPAL_NAME_PRINT(*id))); - return OPAL_ERR_NOT_FOUND; - } - - /* if the user provided a NULL list object, then they - * just wanted to know if the key was present */ - if (NULL == kvs) { - return OPAL_SUCCESS; - } - - /* create the copy */ - if (OPAL_SUCCESS != (rc = opal_dss.copy((void**)&knew, kv, OPAL_VALUE))) { - OPAL_ERROR_LOG(rc); - return rc; - } - /* add it to the output list */ - opal_list_append(kvs, &knew->super); - - return OPAL_SUCCESS; -} - -static int remove_data(struct opal_dstore_base_module_t *imod, - const opal_process_name_t *id, const char *key) -{ - opal_dstore_proc_data_t *proc_data; - opal_value_t *kv; - mca_dstore_hash_module_t *mod; - - mod = (mca_dstore_hash_module_t*)imod; - - /* lookup the specified proc */ - if (NULL == (proc_data = opal_dstore_base_lookup_proc(&mod->ptable, *id, false))) { - /* no data for this proc */ - return OPAL_SUCCESS; - } - - /* if key is NULL, remove all data for this proc */ - if (NULL == key) { - while (NULL != (kv = (opal_value_t *) opal_list_remove_first(&proc_data->data))) { - OBJ_RELEASE(kv); - } - /* remove the proc_data object itself from the jtable */ - opal_proc_table_remove_value(&mod->ptable, *id); - /* cleanup */ - OBJ_RELEASE(proc_data); - return OPAL_SUCCESS; - } - - /* remove this item */ - OPAL_LIST_FOREACH(kv, &proc_data->data, opal_value_t) { - if (0 == strcmp(key, kv->key)) { - opal_list_remove_item(&proc_data->data, &kv->super); - OBJ_RELEASE(kv); - break; - } - } - - return OPAL_SUCCESS; -} - diff --git a/opal/mca/dstore/hash/dstore_hash.h b/opal/mca/dstore/hash/dstore_hash.h deleted file mode 100644 index 688eeddf3f6..00000000000 --- a/opal/mca/dstore/hash/dstore_hash.h +++ /dev/null @@ -1,33 +0,0 @@ -/* - * Copyright (c) 2010 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2012 Los Alamos National Security, Inc. All rights reserved. - * Copyright (c) 2014 Intel, Inc. All rights reserved. - * Copyright (c) 2014 Research Organization for Information Science - * and Technology (RIST). All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#ifndef OPAL_DSTORE_HASH_H -#define OPAL_DSTORE_HASH_H - -#include "opal/class/opal_hash_table.h" -#include "opal/mca/dstore/dstore.h" - -BEGIN_C_DECLS - - -OPAL_MODULE_DECLSPEC extern opal_dstore_base_component_t mca_dstore_hash_component; - -typedef struct { - opal_dstore_base_module_t api; - opal_proc_table_t ptable; -} mca_dstore_hash_module_t; -OPAL_MODULE_DECLSPEC extern mca_dstore_hash_module_t opal_dstore_hash_module; - -END_C_DECLS - -#endif /* OPAL_DSTORE_HASH_H */ diff --git a/opal/mca/dstore/hash/dstore_hash_component.c b/opal/mca/dstore/hash/dstore_hash_component.c deleted file mode 100644 index bbed898a678..00000000000 --- a/opal/mca/dstore/hash/dstore_hash_component.c +++ /dev/null @@ -1,84 +0,0 @@ -/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ -/* - * Copyright (c) 2010 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2012 Los Alamos National Security, Inc. All rights reserved. - * Copyright (c) 2013-2014 Intel, Inc. All rights reserved. - * Copyright (c) 2015 Los Alamos National Security, LLC. All rights - * reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - * - * These symbols are in a file by themselves to provide nice linker - * semantics. Since linkers generally pull in symbols by object - * files, keeping these symbols as the only symbols in this file - * prevents utility programs such as "ompi_info" from having to import - * entire components just to query their version and parameters. - */ - -#include "opal_config.h" -#include "opal/constants.h" - -#include "opal/mca/base/base.h" -#include "opal/util/error.h" - -#include "opal/mca/dstore/dstore.h" -#include "opal/mca/dstore/base/base.h" -#include "dstore_hash.h" - -static opal_dstore_base_module_t *component_create(opal_list_t *attrs); -static int dstore_hash_query(mca_base_module_t **module, int *priority); - -/* - * Instantiate the public struct with all of our public information - * and pointers to our public functions in it - */ -opal_dstore_base_component_t mca_dstore_hash_component = { - .base_version = { - OPAL_DSTORE_BASE_VERSION_2_0_0, - - /* Component name and version */ - .mca_component_name = "hash", - MCA_BASE_MAKE_VERSION(component, OPAL_MAJOR_VERSION, OPAL_MINOR_VERSION, - OPAL_RELEASE_VERSION), - - /* Component open and close functions */ - .mca_query_component = dstore_hash_query, - }, - .base_data = { - /* The component is checkpoint ready */ - MCA_BASE_METADATA_PARAM_CHECKPOINT - }, - .create_handle = component_create, -}; - -static int dstore_hash_query(mca_base_module_t **module, int *priority) -{ - /* we are always available, but only as storage */ - *priority = 80; - *module = NULL; - return OPAL_SUCCESS; -} - -/* this component ignores any input attributes */ -static opal_dstore_base_module_t *component_create(opal_list_t *attrs) -{ - mca_dstore_hash_module_t *mod; - - mod = (mca_dstore_hash_module_t*)malloc(sizeof(mca_dstore_hash_module_t)); - if (NULL == mod) { - OPAL_ERROR_LOG(OPAL_ERR_OUT_OF_RESOURCE); - return NULL; - } - /* copy the APIs across */ - memcpy(mod, &opal_dstore_hash_module.api, sizeof(opal_dstore_base_module_t)); - /* let the module init itself */ - if (OPAL_SUCCESS != mod->api.init((struct opal_dstore_base_module_t*)mod)) { - /* release the module and return the error */ - free(mod); - return NULL; - } - return (opal_dstore_base_module_t*)mod; -} diff --git a/opal/mca/dstore/hash/owner.txt b/opal/mca/dstore/hash/owner.txt deleted file mode 100644 index e6150b6b0fc..00000000000 --- a/opal/mca/dstore/hash/owner.txt +++ /dev/null @@ -1,7 +0,0 @@ -# -# owner/status file -# owner: institution that is responsible for this package -# status: e.g. active, maintenance, unmaintained -# -owner: project -status: active diff --git a/opal/mca/event/Makefile.am b/opal/mca/event/Makefile.am index 64b79705a10..3c2a1ec2334 100644 --- a/opal/mca/event/Makefile.am +++ b/opal/mca/event/Makefile.am @@ -1,9 +1,9 @@ # -# Copyright (c) 2010 Cisco Systems, Inc. All rights reserved. +# Copyright (c) 2010 Cisco Systems, Inc. All rights reserved. # $COPYRIGHT$ -# +# # Additional copyrights may follow -# +# # $HEADER$ # diff --git a/opal/mca/event/base/Makefile.am b/opal/mca/event/base/Makefile.am index 2705498e01e..afe829d5482 100644 --- a/opal/mca/event/base/Makefile.am +++ b/opal/mca/event/base/Makefile.am @@ -1,9 +1,9 @@ # # Copyright (c) 2010 Cisco Systems, Inc. All rights reserved. # $COPYRIGHT$ -# +# # Additional copyrights may follow -# +# # $HEADER$ # diff --git a/opal/mca/event/base/README.openmpi b/opal/mca/event/base/README.openmpi index 1aa893afc1e..d566e5705a6 100644 --- a/opal/mca/event/base/README.openmpi +++ b/opal/mca/event/base/README.openmpi @@ -27,7 +27,7 @@ through a set of wrappers - this is done for three reasons: support provide erroneous results on some platforms (as determined by our empirical testing). OPAL therefore provides enhanced tests to correctly assess those environments. - + 3. Enable greater flexibility in configuring Libevent for the specific environment. In particular, OPAL has no need of Libevent's dns, http, and rpc events, so configuration options to remove that code diff --git a/opal/mca/event/base/base.h b/opal/mca/event/base/base.h index f0c4a1d8b7d..fd06d0fe1af 100644 --- a/opal/mca/event/base/base.h +++ b/opal/mca/event/base/base.h @@ -3,9 +3,9 @@ * Copyright (c) 2012 Los Alamos National Security, LLC. * All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ diff --git a/opal/mca/event/base/event_base_frame.c b/opal/mca/event/base/event_base_frame.c index 96e0c1af44d..f7f584fc179 100644 --- a/opal/mca/event/base/event_base_frame.c +++ b/opal/mca/event/base/event_base_frame.c @@ -1,10 +1,10 @@ /* - * Copyright (c) 2010 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2014 Intel, Inc. All rights reserved. + * Copyright (c) 2010-2015 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2014-2015 Intel, Inc. All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -38,7 +38,7 @@ * and opens event components, etc. * * This function is invoked during opal_init(). - * + * * This function fills in the internal global variable * opal_event_base_components_opened, which is a list of all * event components that were successfully opened. This @@ -60,11 +60,11 @@ static int opal_event_base_close(void) return mca_base_framework_components_close (&opal_event_base_framework, NULL); } - + /* * Globals */ -opal_event_base_t *opal_event_base=NULL; +opal_event_base_t *opal_sync_event_base=NULL; static int opal_event_base_open(mca_base_open_flag_t flags) { @@ -84,13 +84,13 @@ static int opal_event_base_open(mca_base_open_flag_t flags) opal_event_use_threads(); /* get our event base */ - if (NULL == (opal_event_base = opal_event_base_create())) { + if (NULL == (opal_sync_event_base = opal_event_base_create())) { return OPAL_ERROR; } /* set the number of priorities */ if (0 < OPAL_EVENT_NUM_PRI) { - opal_event_base_priority_init(opal_event_base, OPAL_EVENT_NUM_PRI); + opal_event_base_priority_init(opal_sync_event_base, OPAL_EVENT_NUM_PRI); } return rc; diff --git a/opal/mca/event/base/owner.txt b/opal/mca/event/base/owner.txt index 36389e7f941..dcc598ef943 100644 --- a/opal/mca/event/base/owner.txt +++ b/opal/mca/event/base/owner.txt @@ -1,5 +1,5 @@ # -# owner/status file +# owner/status file # owner: institution that is responsible for this package # status: e.g. active, maintenance, unmaintained # diff --git a/opal/mca/event/configure.m4 b/opal/mca/event/configure.m4 index 81c912f0a01..f8040446009 100644 --- a/opal/mca/event/configure.m4 +++ b/opal/mca/event/configure.m4 @@ -2,9 +2,9 @@ dnl -*- shell-script -*- dnl dnl Copyright (c) 2010-2013 Cisco Systems, Inc. All rights reserved. dnl $COPYRIGHT$ -dnl +dnl dnl Additional copyrights may follow -dnl +dnl dnl $HEADER$ dnl @@ -22,7 +22,7 @@ m4_define(MCA_opal_event_CONFIGURE_MODE, STOP_AT_FIRST) AC_DEFUN([MCA_opal_event_CONFIG],[ opal_event_base_include= - + # configure all the components MCA_CONFIGURE_FRAMEWORK($1, $2, 1) @@ -47,8 +47,8 @@ AC_DEFUN([MCA_opal_event_CONFIG],[ [AS_IF([test "$OPAL_HAVE_WORKING_EVENTOPS" = "1"], [AC_MSG_RESULT([yes])], [AC_MSG_RESULT([no])])]) - AC_DEFINE_UNQUOTED(OPAL_HAVE_WORKING_EVENTOPS, - [$OPAL_HAVE_WORKING_EVENTOPS], + AC_DEFINE_UNQUOTED(OPAL_HAVE_WORKING_EVENTOPS, + [$OPAL_HAVE_WORKING_EVENTOPS], [Whether our event component has working event operations or not (if not, then assumedly it only has working timers and signals)]) # The winning component will have told us where their header file diff --git a/opal/mca/event/event.h b/opal/mca/event/event.h index abf2f6feaca..b0958b64e09 100644 --- a/opal/mca/event/event.h +++ b/opal/mca/event/event.h @@ -2,12 +2,12 @@ /* * Copyright (c) 2010 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2010 Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2014 Intel, Inc. All rights reserved. + * Copyright (c) 2014-2015 Intel, Inc. All rights reserved. * Copyright (c) 2015 Los Alamos National Security, LLC. All rights * reserved. * * $COPYRIGHT$ - * + * * Additional copyrights may follow */ @@ -22,12 +22,8 @@ #ifdef HAVE_SYS_TIME_H #include #endif -#ifdef HAVE_STDINT_H #include -#endif -#ifdef HAVE_STDARG_H #include -#endif #include "opal/class/opal_pointer_array.h" diff --git a/opal/mca/event/external/Makefile.am b/opal/mca/event/external/Makefile.am index debbb53f1b6..3c9ade76513 100644 --- a/opal/mca/event/external/Makefile.am +++ b/opal/mca/event/external/Makefile.am @@ -1,10 +1,10 @@ # # Copyright (c) 2011-2012 Cisco Systems, Inc. All rights reserved. -# Copyright (c) 2013 Los Alamos National Security, LLC. All rights reserved. +# Copyright (c) 2013 Los Alamos National Security, LLC. All rights reserved. # $COPYRIGHT$ -# +# # Additional copyrights may follow -# +# # $HEADER$ # diff --git a/opal/mca/event/external/configure.m4 b/opal/mca/event/external/configure.m4 index 110fe554cab..cc789e3726c 100644 --- a/opal/mca/event/external/configure.m4 +++ b/opal/mca/event/external/configure.m4 @@ -1,12 +1,14 @@ # -*- shell-script -*- # -# Copyright (c) 2009-2013 Cisco Systems, Inc. All rights reserved. -# Copyright (c) 2013 Los Alamos National Security, LLC. All rights reserved. +# Copyright (c) 2009-2013 Cisco Systems, Inc. All rights reserved. +# Copyright (c) 2013 Los Alamos National Security, LLC. All rights reserved. +# Copyright (c) 2015 Research Organization for Information Science +# and Technology (RIST). All rights reserved. # # $COPYRIGHT$ -# +# # Additional copyrights may follow -# +# # $HEADER$ # @@ -29,9 +31,9 @@ AC_DEFUN([MCA_opal_event_external_COMPILE_MODE], [ # --------------------------------- AC_DEFUN([MCA_opal_event_external_POST_CONFIG],[ # If we won, then do all the rest of the setup - AS_IF([test "$1" = "1"], - [AC_DEFINE_UNQUOTED([EVENT_EXTERNAL_EVENT_VERSION], - [external], + AS_IF([test "$1" = "1"], + [AC_DEFINE_UNQUOTED([EVENT_EXTERNAL_EVENT_VERSION], + [external], [Version of event]) # Set this variable so that the framework m4 knows what @@ -71,7 +73,7 @@ AC_DEFUN([MCA_opal_event_external_CONFIG],[ # Make sure the user didn't specify --with-libevent=internal and # --with-libevent-libdir=whatever (because you can only specify # --with-libevent-libdir when external libevent is being used). - AS_IF([test "$with_libevent" = "internal" -a "$with_libevent_libdir" != ""], + AS_IF([test "$with_libevent" = "internal" && test -n "$with_libevent_libdir"], [AC_MSG_WARN([Both --with-libevent=internal and --with-libevent-libdir=DIR]) AC_MSG_WARN([were specified, which does not make sense.]) AC_MSG_ERROR([Cannot continue])]) @@ -80,24 +82,24 @@ AC_DEFUN([MCA_opal_event_external_CONFIG],[ # but hopefully slightly more clear...) opal_event_external_want=no AS_IF([test "$with_libevent" = "external"], [opal_event_external_want=yes]) - AS_IF([test "$with_libevent_libdir" != ""], [opal_event_external_want=yes]) - AS_IF([test "$with_libevent" != "" -a "$with_libevent" != "no" -a "$with_libevent" != "internal"], [opal_event_external_want=yes]) + AS_IF([test -n "$with_libevent_libdir"], [opal_event_external_want=yes]) + AS_IF([test -n "$with_libevent" && test "$with_libevent" != "no" && test "$with_libevent" != "internal"], [opal_event_external_want=yes]) # If we want external support, try it AS_IF([test "$opal_event_external_want" = "yes"], [ # Error out if the specified dir does not exist - OPAL_CHECK_WITHDIR([libevent-libdir], [$with_libevent_libdir], + OPAL_CHECK_WITHDIR([libevent-libdir], [$with_libevent_libdir], [libevent.*]) AC_MSG_CHECKING([for external libevent in]) - AS_IF([test "$with_libevent" != "external" -a "$with_libevent" != "yes"], + AS_IF([test "$with_libevent" != "external" && test "$with_libevent" != "yes"], [opal_event_dir=$with_libevent AC_MSG_RESULT([$opal_event_dir]) - OPAL_CHECK_WITHDIR([libevent], [$with_libdir], + OPAL_CHECK_WITHDIR([libevent], [$with_libdir], [include/event.h]) ], [AC_MSG_RESULT([(default search paths)])]) - AS_IF([test ! -z "$with_libevent_libdir" -a "$with_libevent_libdir" != "yes"], + AS_IF([test ! -z "$with_libevent_libdir" && test "$with_libevent_libdir" != "yes"], [opal_event_libdir="$with_libevent_libdir"]) opal_event_external_CPPFLAGS_save=$CPPFLAGS diff --git a/opal/mca/event/external/event_external_component.c b/opal/mca/event/external/event_external_component.c index acf2120c40d..7856b7b06b8 100644 --- a/opal/mca/event/external/event_external_component.c +++ b/opal/mca/event/external/event_external_component.c @@ -1,13 +1,14 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ /* - * Copyright (c) 2011 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2011 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2013-2015 Los Alamos National Security, LLC. All rights * reserved. + * Copyright (c) 2017 IBM Corporation. All rights reserved. * * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -16,6 +17,10 @@ #include "opal/mca/event/event.h" +#include "event.h" + +#include "opal/util/argv.h" + /* * Public string showing the sysinfo ompi_linux component version number */ @@ -27,7 +32,9 @@ const char *opal_event_external_component_version_string = * Local function */ static int event_external_open(void); +static int event_external_register (void); +char *event_module_include = NULL; /* * Instantiate the public struct with all of our public information @@ -49,6 +56,7 @@ const opal_event_component_t mca_event_external_component = { /* Component open and close functions */ .mca_open_component = event_external_open, + .mca_register_component_params = event_external_register }, .base_data = { /* The component is checkpoint ready */ @@ -62,3 +70,47 @@ static int event_external_open(void) eliminate the whole file */ return OPAL_SUCCESS; } + +static int event_external_register (void) { + const char **all_available_eventops; + char *avail = NULL; + char *help_msg = NULL; + int ret; + + // Get supported methods + all_available_eventops = event_get_supported_methods(); + +#ifdef __APPLE__ + event_module_include ="select"; +#else + event_module_include = "poll"; +#endif + + avail = opal_argv_join(all_available_eventops, ','); + asprintf( &help_msg, + "Comma-delimited list of libevent subsystems " + "to use (%s -- available on your platform)", + avail ); + + ret = mca_base_component_var_register (&mca_event_external_component.base_version, + "event_include", help_msg, + MCA_BASE_VAR_TYPE_STRING, NULL, 0, + MCA_BASE_VAR_FLAG_SETTABLE, + OPAL_INFO_LVL_3, + MCA_BASE_VAR_SCOPE_LOCAL, + &event_module_include); + free(help_msg); /* release the help message */ + free(avail); + avail = NULL; + + if (0 > ret) { + return ret; + } + + ret = mca_base_var_register_synonym (ret, "opal", "opal", "event", "include", 0); + if (0 > ret) { + return ret; + } + + return OPAL_SUCCESS; +} diff --git a/opal/mca/event/external/event_external_module.c b/opal/mca/event/external/event_external_module.c index 8761a4e22a0..1bbe9bdd6fe 100644 --- a/opal/mca/event/external/event_external_module.c +++ b/opal/mca/event/external/event_external_module.c @@ -4,6 +4,7 @@ * Copyright (c) 2010 Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2012-2013 Los Alamos National Security, LLC. * All rights reserved. + * Copyright (c) 2017 IBM Corporation. All rights reserved. */ #include "opal_config.h" #include "opal/constants.h" @@ -12,12 +13,63 @@ #include "opal/mca/event/base/base.h" #include "external.h" +#include "opal/util/argv.h" + +extern char *event_module_include; +static struct event_config *config = NULL; + +opal_event_base_t* opal_event_base_create(void) +{ + opal_event_base_t *base; + + base = event_base_new_with_config(config); + if (NULL == base) { + /* there is no backend method that does what we want */ + opal_output(0, "No event method available"); + } + return base; +} + int opal_event_init(void) { + const char **all_available_eventops = NULL; + char **includes=NULL; + bool dumpit=false; + int i, j; + if (opal_output_get_verbosity(opal_event_base_framework.framework_output) > 4) { event_enable_debug_mode(); } + all_available_eventops = event_get_supported_methods(); + + if (NULL == event_module_include) { + /* Shouldn't happen, but... */ + event_module_include = strdup("select"); + } + includes = opal_argv_split(event_module_include,','); + + /* get a configuration object */ + config = event_config_new(); + /* cycle thru the available subsystems */ + for (i = 0 ; NULL != all_available_eventops[i] ; ++i) { + /* if this module isn't included in the given ones, + * then exclude it + */ + dumpit = true; + for (j=0; NULL != includes[j]; j++) { + if (0 == strcmp("all", includes[j]) || + 0 == strcmp(all_available_eventops[i], includes[j])) { + dumpit = false; + break; + } + } + if (dumpit) { + event_config_avoid_method(config, all_available_eventops[i]); + } + } + opal_argv_free(includes); + return OPAL_SUCCESS; } diff --git a/opal/mca/event/external/external.h b/opal/mca/event/external/external.h index 183ded78dfa..bfebe9c5f94 100644 --- a/opal/mca/event/external/external.h +++ b/opal/mca/event/external/external.h @@ -1,11 +1,15 @@ /* - * Copyright (c) 2011-2013 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2013 Los Alamos National Security, LLC. All rights reserved. + * Copyright (c) 2011-2015 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2013 Los Alamos National Security, LLC. All rights reserved. + * Copyright (c) 2015 Intel, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. + * Copyright (c) 2017 IBM Corporation. All rights reserved. * * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ * * When this component is used, this file is included in the rest of @@ -16,6 +20,8 @@ #ifndef MCA_OPAL_EVENT_EXTERNAL_H #define MCA_OPAL_EVENT_EXTERNAL_H +#include "opal_config.h" + #include "event.h" #include "event2/event.h" #include "event2/thread.h" @@ -27,7 +33,7 @@ BEGIN_C_DECLS typedef struct event_base opal_event_base_t; typedef struct event opal_event_t; -OPAL_DECLSPEC extern opal_event_base_t *opal_event_base; +OPAL_DECLSPEC extern opal_event_base_t *opal_sync_event_base; #define OPAL_EV_TIMEOUT EV_TIMEOUT #define OPAL_EV_READ EV_READ @@ -40,7 +46,7 @@ OPAL_DECLSPEC extern opal_event_base_t *opal_event_base; #define OPAL_EVLOOP_NONBLOCK EVLOOP_NONBLOCK /**< Do not block. */ /* Global function to create and release an event base */ -#define opal_event_base_create() event_base_new() +OPAL_DECLSPEC opal_event_base_t* opal_event_base_create(void); #define opal_event_base_free(x) event_base_free(x) @@ -80,7 +86,7 @@ OPAL_DECLSPEC opal_event_t* opal_event_alloc(void); #define opal_event_free(x) event_free((x)) /* Timer APIs */ -#define opal_event_evtimer_new(b, cb, arg) opal_event_new((b), -1, 0, (cb), (arg)) +#define opal_event_evtimer_new(b, cb, arg) opal_event_new((b), -1, 0, (cb), (arg)) #define opal_event_evtimer_add(x, tv) opal_event_add((x), (tv)) diff --git a/opal/mca/event/libevent2022/Makefile.am b/opal/mca/event/libevent2022/Makefile.am index 143f5f951e3..5fff134f99d 100644 --- a/opal/mca/event/libevent2022/Makefile.am +++ b/opal/mca/event/libevent2022/Makefile.am @@ -4,10 +4,12 @@ # Copyright (c) 2012-2013 Los Alamos National Security, LLC. # All rights reserved. # Copyright (c) 2015 Intel, Inc. All rights reserved +# Copyright (c) 2016 Research Organization for Information Science +# and Technology (RIST). All rights reserved. # $COPYRIGHT$ -# +# # Additional copyrights may follow -# +# # $HEADER$ # @@ -68,17 +70,12 @@ nobase_opal_HEADERS = $(headers) nobase_nodist_opal_HEADERS = libevent/include/event2/event-config.h endif -# Make the output library in this directory, and name it either -# mca__.la (for DSO builds) or libmca__.la -# (for static builds). +# Make the output library in this directory, and name it +# libmca__.la because build is forced to be static-only -if MCA_BUILD_opal_event_libevent2022_DSO -component_noinst = -component_install = mca_event_libevent2022.la -else component_noinst = libmca_event_libevent2022.la component_install = -endif + # We only ever build this component statically @@ -86,4 +83,5 @@ noinst_LTLIBRARIES = $(component_noinst) libmca_event_libevent2022_la_SOURCES =$(sources) libmca_event_libevent2022_la_LDFLAGS = -module -avoid-version libmca_event_libevent2022_la_LIBADD = $(builddir)/libevent/libevent.la +libmca_event_libevent2022_la_DEPENDENCIES = $(builddir)/libevent/libevent.la diff --git a/opal/mca/event/libevent2022/configure.m4 b/opal/mca/event/libevent2022/configure.m4 index 995070a61f0..b3c375062e4 100644 --- a/opal/mca/event/libevent2022/configure.m4 +++ b/opal/mca/event/libevent2022/configure.m4 @@ -1,13 +1,15 @@ # -*- shell-script -*- # -# Copyright (c) 2009-2013 Cisco Systems, Inc. All rights reserved. +# Copyright (c) 2009-2015 Cisco Systems, Inc. All rights reserved. # Copyright (c) 2012-2013 Los Alamos National Security, LLC. All rights reserved. # Copyright (c) 2015 Intel, Inc. All rights reserved. +# Copyright (c) 2015-2016 Research Organization for Information Science +# and Technology (RIST). All rights reserved. # # $COPYRIGHT$ -# +# # Additional copyrights may follow -# +# # $HEADER$ # AC_DEFUN([MCA_opal_event_libevent2022_PRIORITY], [80]) @@ -23,7 +25,7 @@ AC_DEFUN([MCA_opal_event_libevent2022_COMPILE_MODE], [ AC_DEFUN([MCA_opal_event_libevent2022_POST_CONFIG], [ AM_CONDITIONAL(OPAL_EVENT_HAVE_THREAD_SUPPORT, test "$enable_event_thread_support" = "yes") - AS_IF([test "$1" = "1"], + AS_IF([test "$1" = "1"], [ # Build libevent/include/event2/event-config.h. If we # don't do it here, then libevent's Makefile.am will build # it during "make all", which is too late for us (because @@ -80,7 +82,7 @@ EOF ]) ]) -# MCA_event_libevent2022_CONFIG([action-if-can-compile], +# MCA_event_libevent2022_CONFIG([action-if-can-compile], # [action-if-cant-compile]) # ------------------------------------------------ AC_DEFUN([MCA_opal_event_libevent2022_CONFIG],[ @@ -89,15 +91,6 @@ AC_DEFUN([MCA_opal_event_libevent2022_CONFIG],[ AC_CONFIG_FILES([opal/mca/event/libevent2022/Makefile]) libevent_basedir="opal/mca/event/libevent2022" - # If we're not building externally, configure this component - AS_IF([test "$with_libevent" = "internal" -o "$with_libevent" = "" -o "$with_libevent" = "yes"], - [MCA_opal_event_libevent2022_DO_THE_CONFIG], - [AC_MSG_WARN([using an external libevent; disqualifiying this component]) - $2]) - OPAL_VAR_SCOPE_POP -]) - -AC_DEFUN([MCA_opal_event_libevent2022_DO_THE_CONFIG], [ CFLAGS_save="$CFLAGS" CFLAGS="$OPAL_CFLAGS_BEFORE_PICKY $OPAL_VISIBILITY_CFLAGS" CPPFLAGS_save="$CPPFLAGS" @@ -165,8 +158,13 @@ AC_DEFUN([MCA_opal_event_libevent2022_DO_THE_CONFIG], [ AC_MSG_RESULT([$event_args]) - OPAL_CONFIG_SUBDIR([$libevent_basedir/libevent], - [$event_args $opal_subdir_args], + # We define "random" to be "opal_random" so that Libevent will not + # use random(3) internally (and potentially unexpectedly perturb + # values returned by rand(3) to the application). + + CPPFLAGS="$CPPFLAGS -Drandom=opal_random" + OPAL_CONFIG_SUBDIR([$libevent_basedir/libevent], + [$event_args $opal_subdir_args 'CPPFLAGS=$CPPFLAGS'], [libevent_happy="yes"], [libevent_happy="no"]) if test "$libevent_happy" = "no"; then AC_MSG_WARN([Event library failed to configure]) @@ -176,7 +174,7 @@ AC_DEFUN([MCA_opal_event_libevent2022_DO_THE_CONFIG], [ # Finally, add some flags to the wrapper compiler if we're # building with developer headers so that our headers can # be found. - event_libevent2022_WRAPPER_EXTRA_CPPFLAGS='-I${includedir}/openmpi/opal/mca/event/libevent2022/libevent -I${includedir}/openmpi/opal/mca/event/libevent2022/libevent/include' + event_libevent2022_WRAPPER_EXTRA_CPPFLAGS='-I${pkgincludedir}/opal/mca/event/libevent2022/libevent -I${pkgincludedir}/opal/mca/event/libevent2022/libevent/include' CFLAGS="$CFLAGS_save" CPPFLAGS="$CPPFLAGS_save" @@ -186,9 +184,25 @@ AC_DEFUN([MCA_opal_event_libevent2022_DO_THE_CONFIG], [ # libevent/include/event2/event-config.h!). Otherwise, set it to # 0. libevent_file=$libevent_basedir/libevent/config.h - AS_IF([test "$libevent_happy" = "yes" -a -r $libevent_file], - [OPAL_HAVE_WORKING_EVENTOPS=`grep HAVE_WORKING_EVENTOPS $libevent_file | awk '{print [$]3 }'` - $1], - [$2 - OPAL_HAVE_WORKING_EVENTOPS=0]) + + # If we are not building the internal libevent, then indicate that + # this component should not be built. NOTE: we still did all the + # above configury so that all the proper GNU Autotools + # infrastructure is setup properly (e.g., w.r.t. SUBDIRS=libevent in + # this directory's Makefile.am, we still need the Autotools "make + # distclean" infrastructure to work properly). + + AS_IF([test "$with_libevent" != "internal" && test -n "$with_libevent" && test "$with_libevent" != "yes"], + [AC_MSG_WARN([using an external libevent; disqualifying this component]) + libevent_happy=no], + + [AS_IF([test "$libevent_happy" = "yes" && test -r $libevent_file], + [OPAL_HAVE_WORKING_EVENTOPS=`grep HAVE_WORKING_EVENTOPS $libevent_file | awk '{print [$]3 }'` + $1], + [$2 + OPAL_HAVE_WORKING_EVENTOPS=0]) + ] + ) + + OPAL_VAR_SCOPE_POP ]) diff --git a/opal/mca/event/libevent2022/libevent/ChangeLog b/opal/mca/event/libevent2022/libevent/ChangeLog index 8c0a9f8c4a7..5e60a730bfa 100644 --- a/opal/mca/event/libevent2022/libevent/ChangeLog +++ b/opal/mca/event/libevent2022/libevent/ChangeLog @@ -1308,11 +1308,11 @@ Changes in 2.0.1-alpha (17 Apr 2009): o Correct handling of trailing headers in chunked replies; from Scott Lamb. o Support multi-line HTTP headers; based on a patch from Moshe Litvin o Reject negative Content-Length headers; anonymous bug report - o Detect CLOCK_MONOTONIC at runtime for evdns; anonymous bug report + o Detect CLOCK_MONOTONIC at runtime for evdns; anonymous bug report o Various HTTP correctness fixes from Scott Lamb o Fix a bug where deleting signals with the kqueue backend would cause subsequent adds to fail o Support multiple events listening on the same signal; make signals regular events that go on the same event queue; problem report by Alexander Drozdov. - o Fix a problem with epoll() and reinit; problem report by Alexander Drozdov. + o Fix a problem with epoll() and reinit; problem report by Alexander Drozdov. o Fix off-by-one errors in devpoll; from Ian Bell o Make event_add not change any state if it fails; reported by Ian Bell. o Fix a bug where headers arriving in multiple packets were not parsed; fix from Jiang Hong; test by me. diff --git a/opal/mca/event/libevent2022/libevent/Doxyfile b/opal/mca/event/libevent2022/libevent/Doxyfile index 5d3865e7e79..64aa8122708 100644 --- a/opal/mca/event/libevent2022/libevent/Doxyfile +++ b/opal/mca/event/libevent2022/libevent/Doxyfile @@ -14,7 +14,7 @@ # Project related configuration options #--------------------------------------------------------------------------- -# The PROJECT_NAME tag is a single word (or a sequence of words surrounded +# The PROJECT_NAME tag is a single word (or a sequence of words surrounded # by quotes) that should identify the project. PROJECT_NAME = libevent @@ -23,33 +23,33 @@ PROJECT_NAME = libevent OUTPUT_DIRECTORY = doxygen/ -# If the JAVADOC_AUTOBRIEF tag is set to YES then Doxygen -# will interpret the first line (until the first dot) of a JavaDoc-style -# comment as the brief description. If set to NO, the JavaDoc -# comments will behave just like the Qt-style comments (thus requiring an +# If the JAVADOC_AUTOBRIEF tag is set to YES then Doxygen +# will interpret the first line (until the first dot) of a JavaDoc-style +# comment as the brief description. If set to NO, the JavaDoc +# comments will behave just like the Qt-style comments (thus requiring an # explicit @brief command for a brief description. JAVADOC_AUTOBRIEF = YES -# Set the OPTIMIZE_OUTPUT_FOR_C tag to YES if your project consists of C -# sources only. Doxygen will then generate output that is more tailored for C. -# For instance, some of the names that are used will be different. The list +# Set the OPTIMIZE_OUTPUT_FOR_C tag to YES if your project consists of C +# sources only. Doxygen will then generate output that is more tailored for C. +# For instance, some of the names that are used will be different. The list # of all members will be omitted, etc. OPTIMIZE_OUTPUT_FOR_C = YES -# If the SORT_BRIEF_DOCS tag is set to YES then doxygen will sort the -# brief documentation of file, namespace and class members alphabetically -# by member name. If set to NO (the default) the members will appear in +# If the SORT_BRIEF_DOCS tag is set to YES then doxygen will sort the +# brief documentation of file, namespace and class members alphabetically +# by member name. If set to NO (the default) the members will appear in # declaration order. SORT_BRIEF_DOCS = YES -# If the FULL_PATH_NAMES tag is set to YES then the STRIP_FROM_PATH tag -# can be used to strip a user-defined part of the path. Stripping is -# only done if one of the specified strings matches the left-hand part of -# the path. The tag can be used to show relative paths in the file list. -# If left blank the directory from which doxygen is run is used as the +# If the FULL_PATH_NAMES tag is set to YES then the STRIP_FROM_PATH tag +# can be used to strip a user-defined part of the path. Stripping is +# only done if one of the specified strings matches the left-hand part of +# the path. The tag can be used to show relative paths in the file list. +# If left blank the directory from which doxygen is run is used as the # path to strip. STRIP_FROM_PATH = include/ @@ -58,9 +58,9 @@ STRIP_FROM_PATH = include/ # configuration options related to the input files #--------------------------------------------------------------------------- -# The INPUT tag can be used to specify the files and/or directories that contain -# documented source files. You may enter file names like "myfile.cpp" or -# directories like "/usr/src/myproject". Separate the files or directories +# The INPUT tag can be used to specify the files and/or directories that contain +# documented source files. You may enter file names like "myfile.cpp" or +# directories like "/usr/src/myproject". Separate the files or directories # with spaces. INPUT = \ @@ -87,7 +87,7 @@ INPUT = \ # configuration options related to the HTML output #--------------------------------------------------------------------------- -# If the GENERATE_HTML tag is set to YES (the default) Doxygen will +# If the GENERATE_HTML tag is set to YES (the default) Doxygen will # generate HTML output. GENERATE_HTML = YES @@ -96,74 +96,74 @@ GENERATE_HTML = YES # configuration options related to the LaTeX output #--------------------------------------------------------------------------- -# If the GENERATE_LATEX tag is set to YES (the default) Doxygen will +# If the GENERATE_LATEX tag is set to YES (the default) Doxygen will # generate Latex output. GENERATE_LATEX = YES -# The LATEX_OUTPUT tag is used to specify where the LaTeX docs will be put. -# If a relative path is entered the value of OUTPUT_DIRECTORY will be +# The LATEX_OUTPUT tag is used to specify where the LaTeX docs will be put. +# If a relative path is entered the value of OUTPUT_DIRECTORY will be # put in front of it. If left blank `latex' will be used as the default path. LATEX_OUTPUT = latex -# The LATEX_CMD_NAME tag can be used to specify the LaTeX command name to be +# The LATEX_CMD_NAME tag can be used to specify the LaTeX command name to be # invoked. If left blank `latex' will be used as the default command name. LATEX_CMD_NAME = latex -# The MAKEINDEX_CMD_NAME tag can be used to specify the command name to -# generate index for LaTeX. If left blank `makeindex' will be used as the +# The MAKEINDEX_CMD_NAME tag can be used to specify the command name to +# generate index for LaTeX. If left blank `makeindex' will be used as the # default command name. MAKEINDEX_CMD_NAME = makeindex -# If the COMPACT_LATEX tag is set to YES Doxygen generates more compact -# LaTeX documents. This may be useful for small projects and may help to +# If the COMPACT_LATEX tag is set to YES Doxygen generates more compact +# LaTeX documents. This may be useful for small projects and may help to # save some trees in general. COMPACT_LATEX = NO -# The PAPER_TYPE tag can be used to set the paper type that is used -# by the printer. Possible values are: a4, a4wide, letter, legal and +# The PAPER_TYPE tag can be used to set the paper type that is used +# by the printer. Possible values are: a4, a4wide, letter, legal and # executive. If left blank a4wide will be used. PAPER_TYPE = a4wide -# The EXTRA_PACKAGES tag can be to specify one or more names of LaTeX +# The EXTRA_PACKAGES tag can be to specify one or more names of LaTeX # packages that should be included in the LaTeX output. -EXTRA_PACKAGES = +EXTRA_PACKAGES = -# The LATEX_HEADER tag can be used to specify a personal LaTeX header for -# the generated latex document. The header should contain everything until -# the first chapter. If it is left blank doxygen will generate a +# The LATEX_HEADER tag can be used to specify a personal LaTeX header for +# the generated latex document. The header should contain everything until +# the first chapter. If it is left blank doxygen will generate a # standard header. Notice: only use this tag if you know what you are doing! -LATEX_HEADER = +LATEX_HEADER = -# If the PDF_HYPERLINKS tag is set to YES, the LaTeX that is generated -# is prepared for conversion to pdf (using ps2pdf). The pdf file will -# contain links (just like the HTML output) instead of page references +# If the PDF_HYPERLINKS tag is set to YES, the LaTeX that is generated +# is prepared for conversion to pdf (using ps2pdf). The pdf file will +# contain links (just like the HTML output) instead of page references # This makes the output suitable for online browsing using a pdf viewer. PDF_HYPERLINKS = NO -# If the USE_PDFLATEX tag is set to YES, pdflatex will be used instead of -# plain latex in the generated Makefile. Set this option to YES to get a +# If the USE_PDFLATEX tag is set to YES, pdflatex will be used instead of +# plain latex in the generated Makefile. Set this option to YES to get a # higher quality PDF documentation. USE_PDFLATEX = YES -# If the LATEX_BATCHMODE tag is set to YES, doxygen will add the \\batchmode. -# command to the generated LaTeX files. This will instruct LaTeX to keep -# running if errors occur, instead of asking the user for help. +# If the LATEX_BATCHMODE tag is set to YES, doxygen will add the \\batchmode. +# command to the generated LaTeX files. This will instruct LaTeX to keep +# running if errors occur, instead of asking the user for help. # This option is also used when generating formulas in HTML. LATEX_BATCHMODE = NO -# If LATEX_HIDE_INDICES is set to YES then doxygen will not -# include the index chapters (such as File Index, Compound Index, etc.) +# If LATEX_HIDE_INDICES is set to YES then doxygen will not +# include the index chapters (such as File Index, Compound Index, etc.) # in the output. LATEX_HIDE_INDICES = NO @@ -172,86 +172,86 @@ LATEX_HIDE_INDICES = NO # configuration options related to the man page output #--------------------------------------------------------------------------- -# If the GENERATE_MAN tag is set to YES (the default) Doxygen will +# If the GENERATE_MAN tag is set to YES (the default) Doxygen will # generate man pages GENERATE_MAN = NO -# The MAN_EXTENSION tag determines the extension that is added to +# The MAN_EXTENSION tag determines the extension that is added to # the generated man pages (default is the subroutine's section .3) MAN_EXTENSION = .3 -# If the MAN_LINKS tag is set to YES and Doxygen generates man output, -# then it will generate one additional man file for each entity -# documented in the real man page(s). These additional files -# only source the real man page, but without them the man command +# If the MAN_LINKS tag is set to YES and Doxygen generates man output, +# then it will generate one additional man file for each entity +# documented in the real man page(s). These additional files +# only source the real man page, but without them the man command # would be unable to find the correct page. The default is NO. MAN_LINKS = YES #--------------------------------------------------------------------------- -# Configuration options related to the preprocessor +# Configuration options related to the preprocessor #--------------------------------------------------------------------------- -# If the ENABLE_PREPROCESSING tag is set to YES (the default) Doxygen will -# evaluate all C-preprocessor directives found in the sources and include +# If the ENABLE_PREPROCESSING tag is set to YES (the default) Doxygen will +# evaluate all C-preprocessor directives found in the sources and include # files. ENABLE_PREPROCESSING = YES -# If the MACRO_EXPANSION tag is set to YES Doxygen will expand all macro -# names in the source code. If set to NO (the default) only conditional -# compilation will be performed. Macro expansion can be done in a controlled +# If the MACRO_EXPANSION tag is set to YES Doxygen will expand all macro +# names in the source code. If set to NO (the default) only conditional +# compilation will be performed. Macro expansion can be done in a controlled # way by setting EXPAND_ONLY_PREDEF to YES. MACRO_EXPANSION = NO -# If the EXPAND_ONLY_PREDEF and MACRO_EXPANSION tags are both set to YES -# then the macro expansion is limited to the macros specified with the +# If the EXPAND_ONLY_PREDEF and MACRO_EXPANSION tags are both set to YES +# then the macro expansion is limited to the macros specified with the # PREDEFINED and EXPAND_AS_DEFINED tags. EXPAND_ONLY_PREDEF = NO -# If the SEARCH_INCLUDES tag is set to YES (the default) the includes files +# If the SEARCH_INCLUDES tag is set to YES (the default) the includes files # in the INCLUDE_PATH (see below) will be search if a #include is found. SEARCH_INCLUDES = YES -# The INCLUDE_PATH tag can be used to specify one or more directories that -# contain include files that are not input files but should be processed by +# The INCLUDE_PATH tag can be used to specify one or more directories that +# contain include files that are not input files but should be processed by # the preprocessor. -INCLUDE_PATH = +INCLUDE_PATH = -# You can use the INCLUDE_FILE_PATTERNS tag to specify one or more wildcard -# patterns (like *.h and *.hpp) to filter out the header-files in the -# directories. If left blank, the patterns specified with FILE_PATTERNS will +# You can use the INCLUDE_FILE_PATTERNS tag to specify one or more wildcard +# patterns (like *.h and *.hpp) to filter out the header-files in the +# directories. If left blank, the patterns specified with FILE_PATTERNS will # be used. -INCLUDE_FILE_PATTERNS = +INCLUDE_FILE_PATTERNS = -# The PREDEFINED tag can be used to specify one or more macro names that -# are defined before the preprocessor is started (similar to the -D option of -# gcc). The argument of the tag is a list of macros of the form: name -# or name=definition (no spaces). If the definition and the = are -# omitted =1 is assumed. To prevent a macro definition from being -# undefined via #undef or recursively expanded use the := operator +# The PREDEFINED tag can be used to specify one or more macro names that +# are defined before the preprocessor is started (similar to the -D option of +# gcc). The argument of the tag is a list of macros of the form: name +# or name=definition (no spaces). If the definition and the = are +# omitted =1 is assumed. To prevent a macro definition from being +# undefined via #undef or recursively expanded use the := operator # instead of the = operator. PREDEFINED = TAILQ_ENTRY RB_ENTRY _EVENT_DEFINED_TQENTRY _EVENT_IN_DOXYGEN -# If the MACRO_EXPANSION and EXPAND_ONLY_PREDEF tags are set to YES then -# this tag can be used to specify a list of macro names that should be expanded. -# The macro definition that is found in the sources will be used. +# If the MACRO_EXPANSION and EXPAND_ONLY_PREDEF tags are set to YES then +# this tag can be used to specify a list of macro names that should be expanded. +# The macro definition that is found in the sources will be used. # Use the PREDEFINED tag if you want to use a different macro definition. -EXPAND_AS_DEFINED = +EXPAND_AS_DEFINED = -# If the SKIP_FUNCTION_MACROS tag is set to YES (the default) then -# doxygen's preprocessor will remove all function-like macros that are alone -# on a line, have an all uppercase name, and do not end with a semicolon. Such -# function macros are typically used for boiler-plate code, and will confuse +# If the SKIP_FUNCTION_MACROS tag is set to YES (the default) then +# doxygen's preprocessor will remove all function-like macros that are alone +# on a line, have an all uppercase name, and do not end with a semicolon. Such +# function macros are typically used for boiler-plate code, and will confuse # the parser if not removed. SKIP_FUNCTION_MACROS = YES diff --git a/opal/mca/event/libevent2022/libevent/Makefile.nmake b/opal/mca/event/libevent2022/libevent/Makefile.nmake index 4cd04c13c9c..009d977880e 100644 --- a/opal/mca/event/libevent2022/libevent/Makefile.nmake +++ b/opal/mca/event/libevent2022/libevent/Makefile.nmake @@ -28,7 +28,7 @@ all: static_libs tests static_libs: $(STATIC_LIBS) libevent_core.lib: $(CORE_OBJS) $(WIN_OBJS) - lib $(LIBFLAGS) $(CORE_OBJS) $(WIN_OBJS) /out:libevent_core.lib + lib $(LIBFLAGS) $(CORE_OBJS) $(WIN_OBJS) /out:libevent_core.lib libevent_extras.lib: $(EXTRA_OBJS) lib $(LIBFLAGS) $(EXTRA_OBJS) /out:libevent_extras.lib diff --git a/opal/mca/event/libevent2022/libevent/bufferevent_openssl.c b/opal/mca/event/libevent2022/libevent/bufferevent_openssl.c index 7582d9b4be0..fad31c35125 100644 --- a/opal/mca/event/libevent2022/libevent/bufferevent_openssl.c +++ b/opal/mca/event/libevent2022/libevent/bufferevent_openssl.c @@ -788,7 +788,7 @@ consider_reading(struct bufferevent_openssl *bev_ssl) if (bev_ssl->bev.read_suspended) break; - + /* Read all pending data. This won't hit the network * again, and will (most importantly) put us in a state * where we don't need to read anything else until the diff --git a/opal/mca/event/libevent2022/libevent/configure.ac b/opal/mca/event/libevent2022/libevent/configure.ac index f04e6989d77..5dd85aff473 100644 --- a/opal/mca/event/libevent2022/libevent/configure.ac +++ b/opal/mca/event/libevent2022/libevent/configure.ac @@ -597,7 +597,7 @@ if test "$enable_epoll" != "no" ; then # the libevent library. Badness ensues. Therefore, check to # see that this struct gets correctly passed between userspace # and the kernel. - + # In this test, we use epoll in Level Triggered mode. We create # a pipe and the write only file descriptor of the pipe is # added to the epoll set. The test is successful if @@ -610,8 +610,6 @@ if test "$enable_epoll" != "no" ; then AC_INCLUDES_DEFAULT #include ],[[ -int main(int argc, char **argv) -{ struct epoll_event epevin; struct epoll_event epevout; int res; @@ -639,7 +637,6 @@ int main(int argc, char **argv) } } /* SUCCESS */ -} ]])], [haveepoll=yes # OMPI: Don't use AC_LIBOBJ @@ -671,9 +668,6 @@ AC_INCLUDES_DEFAULT #include #include ],[[ -int -main(int argc, char **argv) -{ struct epoll_event epevin; struct epoll_event epevout; int res; @@ -689,7 +683,7 @@ main(int argc, char **argv) memset(&epevin.data.ptr, 5, sizeof(epevin.data.ptr)); epevin.events = EPOLLIN | EPOLLOUT; - if (syscall(__NR_epoll_ctl, epfd, + if (syscall(__NR_epoll_ctl, epfd, EPOLL_CTL_ADD, fildes[[1]], &epevin) == -1) exit(1); @@ -702,7 +696,6 @@ main(int argc, char **argv) } } /* SUCCESS */ -} ]])], [haveepollsyscall=yes # OMPI: don't use AC_LIBOBJ diff --git a/opal/mca/event/libevent2022/libevent/evdns.c b/opal/mca/event/libevent2022/libevent/evdns.c index 60b10485116..f55a50a0bc5 100644 --- a/opal/mca/event/libevent2022/libevent/evdns.c +++ b/opal/mca/event/libevent2022/libevent/evdns.c @@ -48,6 +48,8 @@ * Version: 0.1b */ +#include "opal_config.h" + #include #include "event2/event-config.h" @@ -121,10 +123,6 @@ #define EVDNS_LOG_WARN 1 #define EVDNS_LOG_MSG 2 -#ifndef HOST_NAME_MAX -#define HOST_NAME_MAX 255 -#endif - #include #undef MIN @@ -3108,7 +3106,7 @@ evdns_search_ndots_set(const int ndots) { static void search_set_from_hostname(struct evdns_base *base) { - char hostname[HOST_NAME_MAX + 1], *domainname; + char hostname[OPAL_MAXHOSTNAMELEN], *domainname; ASSERT_LOCKED(base); search_postfix_clear(base); diff --git a/opal/mca/event/libevent2022/libevent/event.c b/opal/mca/event/libevent2022/libevent/event.c index dfee932868a..b9f47c42a08 100644 --- a/opal/mca/event/libevent2022/libevent/event.c +++ b/opal/mca/event/libevent2022/libevent/event.c @@ -1566,8 +1566,11 @@ event_base_loop(struct event_base *base, int flags) EVBASE_ACQUIRE_LOCK(base, th_base_lock); if (base->running_loop) { - event_warnx("%s: reentrant invocation. Only one event_base_loop" - " can run on each event_base at once.", __func__); +/***** OMPI change ****/ +#if OPAL_ENABLE_DEBUG + event_warnx("%s: reentrant invocation. Only one event_base_loop" + " can run on each event_base at once.", __func__); +#endif EVBASE_RELEASE_LOCK(base, th_base_lock); return -1; } diff --git a/opal/mca/event/libevent2022/libevent/http.c b/opal/mca/event/libevent2022/libevent/http.c index 377597eabe8..8340583c5a6 100644 --- a/opal/mca/event/libevent2022/libevent/http.c +++ b/opal/mca/event/libevent2022/libevent/http.c @@ -1950,7 +1950,7 @@ evhttp_get_body(struct evhttp_connection *evcon, struct evhttp_request *req) now, just optimistically tell the client to send their message body. */ if (req->ntoread > 0) { - /* ntoread is ev_int64_t, max_body_size is ev_uint64_t */ + /* ntoread is ev_int64_t, max_body_size is ev_uint64_t */ if ((req->evcon->max_body_size <= EV_INT64_MAX) && (ev_uint64_t)req->ntoread > req->evcon->max_body_size) { evhttp_send_error(req, HTTP_ENTITYTOOLARGE, NULL); return; diff --git a/opal/mca/event/libevent2022/libevent/opal_rename.h b/opal/mca/event/libevent2022/libevent/opal_rename.h index 78480ee0d25..be9b64ba248 100644 --- a/opal/mca/event/libevent2022/libevent/opal_rename.h +++ b/opal/mca/event/libevent2022/libevent/opal_rename.h @@ -7,9 +7,9 @@ * Copyright (c) 2015 Intel, Inc. All rights reserved. * * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ * */ diff --git a/opal/mca/event/libevent2022/libevent/test/regress.gen.c b/opal/mca/event/libevent2022/libevent/test/regress.gen.c index 141f4f42529..0c4097114cc 100644 --- a/opal/mca/event/libevent2022/libevent/test/regress.gen.c +++ b/opal/mca/event/libevent2022/libevent/test/regress.gen.c @@ -88,7 +88,7 @@ msg_run_expand_to_hold_more(struct msg *msg) msg->run_num_allocated = tobe_allocated; return 0;} -struct run* +struct run* msg_run_add(struct msg *msg) { if (++msg->run_length >= msg->run_num_allocated) { diff --git a/opal/mca/event/libevent2022/libevent2022.h b/opal/mca/event/libevent2022/libevent2022.h index 8ee0079b3e1..23deade6e86 100644 --- a/opal/mca/event/libevent2022/libevent2022.h +++ b/opal/mca/event/libevent2022/libevent2022.h @@ -1,14 +1,14 @@ /* - * Copyright (c) 2010 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2010-2015 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2010 Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2012-2013 Los Alamos National Security, LLC. * All rights reserved. * Copyright (c) 2015 Intel, Inc. All rights reserved. * * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ * * When this component is used, this file is included in the rest of @@ -67,7 +67,7 @@ BEGIN_C_DECLS typedef struct event_base opal_event_base_t; typedef struct event opal_event_t; -OPAL_DECLSPEC extern opal_event_base_t *opal_event_base; +OPAL_DECLSPEC extern opal_event_base_t *opal_sync_event_base; #define OPAL_EV_TIMEOUT EV_TIMEOUT #define OPAL_EV_READ EV_READ @@ -120,7 +120,7 @@ OPAL_DECLSPEC opal_event_t* opal_event_alloc(void); #define opal_event_free(x) event_free((x)) /* Timer APIs */ -#define opal_event_evtimer_new(b, cb, arg) opal_event_new((b), -1, 0, (cb), (arg)) +#define opal_event_evtimer_new(b, cb, arg) opal_event_new((b), -1, 0, (cb), (arg)) #define opal_event_evtimer_add(x, tv) opal_event_add((x), (tv)) diff --git a/opal/mca/event/libevent2022/libevent2022_component.c b/opal/mca/event/libevent2022/libevent2022_component.c index c87e51772cc..caf5b82fc40 100644 --- a/opal/mca/event/libevent2022/libevent2022_component.c +++ b/opal/mca/event/libevent2022/libevent2022_component.c @@ -1,13 +1,13 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ /* - * Copyright (c) 2010 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2010 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2012-2015 Los Alamos National Security, LLC. All rights reserved. * Copyright (c) 2015 Intel, Inc. All rights reserved. * * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ * * These symbols are in a file by themselves to provide nice linker @@ -121,7 +121,7 @@ const opal_event_component_t mca_event_libevent2022_component = { }; static int libevent2022_register (void) -{ +{ const struct eventop** _eventop = eventops; char available_eventops[1024] = "none"; char *help_msg = NULL; @@ -144,7 +144,7 @@ static int libevent2022_register (void) * won't be used with libevent. For example, we currently have * ompi_mpi_init() set to use "all" (to include epoll and friends) * so that the TCP BTL can be a bit more scalable -- because we - * *know* that MPI apps don't use pty's with libevent. + * *know* that MPI apps don't use pty's with libevent. * Note that other tools explicitly *do* use pty's with libevent: * * - orted @@ -170,7 +170,7 @@ static int libevent2022_register (void) event_module_include = "poll"; #endif - asprintf( &help_msg, + asprintf( &help_msg, "Comma-delimited list of libevent subsystems " "to use (%s -- available on your platform)", available_eventops ); @@ -197,6 +197,6 @@ static int libevent2022_register (void) } static int libevent2022_open(void) -{ +{ return OPAL_SUCCESS; } diff --git a/opal/mca/hwloc/Makefile.am b/opal/mca/hwloc/Makefile.am index 14b13c796d7..fdda561a64f 100644 --- a/opal/mca/hwloc/Makefile.am +++ b/opal/mca/hwloc/Makefile.am @@ -1,18 +1,22 @@ # -# Copyright (c) 2011 Cisco Systems, Inc. All rights reserved. +# Copyright (c) 2011-2017 Cisco Systems, Inc. All rights reserved +# Copyright (c) 2016 Research Organization for Information Science +# and Technology (RIST). All rights reserved. # $COPYRIGHT$ -# +# # Additional copyrights may follow -# +# # $HEADER$ # +EXTRA_DIST = autogen.options + # main library setup noinst_LTLIBRARIES = libmca_hwloc.la libmca_hwloc_la_SOURCES = # local files -headers = hwloc.h +headers = hwloc-internal.h libmca_hwloc_la_SOURCES += $(headers) # Conditionally install the header files diff --git a/opal/mca/hwloc/README.txt b/opal/mca/hwloc/README.txt index 480b6e930b2..f2554c185da 100644 --- a/opal/mca/hwloc/README.txt +++ b/opal/mca/hwloc/README.txt @@ -13,7 +13,7 @@ Notes for hwloc component maintainers: 1a. As a consequence, if you're adding a new hwloc version component, you'll need to .ompi_ignore all others while you're testing the new - one. + one. 2. If someone wants to fix #1 someday, we might be able to do what we do for libevent: OPAL_CONFIG_SUBDIR (instead of slurping in hwloc's diff --git a/opal/mca/hwloc/autogen.options b/opal/mca/hwloc/autogen.options new file mode 100644 index 00000000000..2c5ecb747b6 --- /dev/null +++ b/opal/mca/hwloc/autogen.options @@ -0,0 +1,13 @@ +# Copyright (c) 2017 Cisco Systems, Inc. All rights reserved +# $COPYRIGHT$ +# + +# Per https://github.com/open-mpi/ompi/issues/2616, we cannot have an +# "hwloc.h" in the framework directory (due to conflicts of finding +# the system hwloc.h file when using "--with-hwloc=external"). Hence, +# we need to deviate from the framework norm of having +# framework/framework.h (i.e., hwloc/hwloc.h). Instead, we'll have +# hwloc/hwloc-internal.h to make sure that this header file can never +# be mistaken for a system-level hwloc.h. + +framework_header = hwloc-internal.h diff --git a/opal/mca/hwloc/base/Makefile.am b/opal/mca/hwloc/base/Makefile.am index 877bfe8c894..f5ae3d0445a 100644 --- a/opal/mca/hwloc/base/Makefile.am +++ b/opal/mca/hwloc/base/Makefile.am @@ -1,9 +1,10 @@ # # Copyright (c) 2011-2012 Cisco Systems, Inc. All rights reserved. +# Copyright (c) 2015 Intel, Inc. All rights reserved. # $COPYRIGHT$ -# +# # Additional copyrights may follow -# +# # $HEADER$ # @@ -13,12 +14,8 @@ headers += \ base/base.h libmca_hwloc_la_SOURCES += \ - base/hwloc_base_frame.c - -if OPAL_HAVE_HWLOC -libmca_hwloc_la_SOURCES += \ + base/hwloc_base_frame.c \ base/hwloc_base_dt.c \ base/hwloc_base_util.c \ base/hwloc_base_maffinity.c -endif diff --git a/opal/mca/hwloc/base/base.h b/opal/mca/hwloc/base/base.h index 3552c069ba8..01fcc441f9a 100644 --- a/opal/mca/hwloc/base/base.h +++ b/opal/mca/hwloc/base/base.h @@ -1,10 +1,10 @@ /* - * Copyright (c) 2011-2012 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2013-2014 Intel, Inc. All rights reserved. + * Copyright (c) 2011-2017 Cisco Systems, Inc. All rights reserved + * Copyright (c) 2013-2017 Intel, Inc. All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -15,7 +15,7 @@ #include "opal/dss/dss_types.h" -#include "opal/mca/hwloc/hwloc.h" +#include "opal/mca/hwloc/hwloc-internal.h" /* * Global functions for MCA overall hwloc open and close @@ -57,7 +57,6 @@ opal_hwloc_print_buffers_t *opal_hwloc_get_print_buffer(void); extern char* opal_hwloc_print_null; OPAL_DECLSPEC char* opal_hwloc_base_print_locality(opal_hwloc_locality_t locality); -#if OPAL_HAVE_HWLOC OPAL_DECLSPEC extern char *opal_hwloc_base_slot_list; OPAL_DECLSPEC extern char *opal_hwloc_base_cpu_set; OPAL_DECLSPEC extern hwloc_cpuset_t opal_hwloc_base_given_cpus; @@ -90,7 +89,7 @@ OPAL_DECLSPEC int opal_hwloc_base_set_binding_policy(opal_binding_policy_t *poli /** * Loads opal_hwloc_my_cpuset (global variable in - * opal/mca/hwloc/hwloc.h) for this process. opal_hwloc_my_cpuset + * opal/mca/hwloc/hwloc-internal.h) for this process. opal_hwloc_my_cpuset * will be loaded with this process' binding, or, if the process is * not bound, use the hwloc root object's (available and online) * cpuset. @@ -173,8 +172,8 @@ OPAL_DECLSPEC unsigned int opal_hwloc_base_get_obj_idx(hwloc_topology_t topo, hwloc_obj_t obj, opal_hwloc_resource_type_t rtype); -OPAL_DECLSPEC int opal_hwloc_get_sorted_numa_list(hwloc_topology_t topo, - char* device_name, +OPAL_DECLSPEC int opal_hwloc_get_sorted_numa_list(hwloc_topology_t topo, + char* device_name, opal_list_t *sorted_list); /** @@ -209,7 +208,7 @@ OPAL_DECLSPEC char* opal_hwloc_base_check_on_coprocessor(void); */ OPAL_DECLSPEC int opal_hwloc_base_report_bind_failure(const char *file, int line, - const char *msg, + const char *msg, int rc); /** @@ -256,7 +255,7 @@ OPAL_DECLSPEC int opal_hwloc_base_cset2str(char *str, int len, hwloc_cpuset_t cpuset); /** - * Make a prettyprint string for a cset in a map format. + * Make a prettyprint string for a cset in a map format. * Example: [B./..] * Key: [] - signifies socket * / - divider between cores @@ -276,7 +275,6 @@ OPAL_DECLSPEC hwloc_obj_t opal_hwloc_base_get_pu(hwloc_topology_t topo, * if responsible for freeing the returned string */ OPAL_DECLSPEC char* opal_hwloc_base_get_topo_signature(hwloc_topology_t topo); -#endif END_C_DECLS diff --git a/opal/mca/hwloc/base/help-opal-hwloc-base.txt b/opal/mca/hwloc/base/help-opal-hwloc-base.txt index 7e9274f1637..a2c6af0c444 100644 --- a/opal/mca/hwloc/base/help-opal-hwloc-base.txt +++ b/opal/mca/hwloc/base/help-opal-hwloc-base.txt @@ -3,9 +3,9 @@ # Copyright (c) 2011-2014 Cisco Systems, Inc. All rights reserved. # Copyright (c) 2014 Intel, Inc. All rights reserved. # $COPYRIGHT$ -# +# # Additional copyrights may follow -# +# # $HEADER$ # # This is the US/English help file for Open MPI's hwloc base support diff --git a/opal/mca/hwloc/base/hwloc_base_dt.c b/opal/mca/hwloc/base/hwloc_base_dt.c index 10e3af81233..13763ea895f 100644 --- a/opal/mca/hwloc/base/hwloc_base_dt.c +++ b/opal/mca/hwloc/base/hwloc_base_dt.c @@ -2,7 +2,7 @@ * Copyright (c) 2011-2012 Cisco Systems, Inc. All rights reserved. * * $COPYRIGHT$ - * + * * Additional copyrights may follow */ @@ -168,7 +168,7 @@ int opal_hwloc_compare(const hwloc_topology_t topo1, char *x1=NULL, *x2=NULL; int l1, l2; int s; - + /* stop stupid compiler warnings */ t1 = (hwloc_topology_t)topo1; t2 = (hwloc_topology_t)topo2; @@ -181,7 +181,7 @@ int opal_hwloc_compare(const hwloc_topology_t topo1, } else if (d2 > d1) { return OPAL_VALUE2_GREATER; } - + /* do the comparison the "cheat" way - get an xml representation * of each tree, and strcmp! This will work fine for inventory @@ -205,7 +205,7 @@ int opal_hwloc_compare(const hwloc_topology_t topo1, } else if (s < 0) { return OPAL_VALUE2_GREATER; } - + /* compare the available support - hwloc unfortunately does * not include this info in its xml support! */ diff --git a/opal/mca/hwloc/base/hwloc_base_frame.c b/opal/mca/hwloc/base/hwloc_base_frame.c index 5164329b9af..a4b9048e631 100644 --- a/opal/mca/hwloc/base/hwloc_base_frame.c +++ b/opal/mca/hwloc/base/hwloc_base_frame.c @@ -1,10 +1,12 @@ /* - * Copyright (c) 2011-2014 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2013-2015 Intel, Inc. All rights reserved. + * Copyright (c) 2011-2017 Cisco Systems, Inc. All rights reserved + * Copyright (c) 2013-2017 Intel, Inc. All rights reserved. + * Copyright (c) 2016 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -20,7 +22,7 @@ #include "opal/mca/base/base.h" #include "opal/threads/tsd.h" -#include "opal/mca/hwloc/hwloc.h" +#include "opal/mca/hwloc/hwloc-internal.h" #include "opal/mca/hwloc/base/base.h" @@ -36,7 +38,6 @@ * Globals */ bool opal_hwloc_base_inited = false; -#if OPAL_HAVE_HWLOC hwloc_topology_t opal_hwloc_topology=NULL; hwloc_cpuset_t opal_hwloc_my_cpuset=NULL; hwloc_cpuset_t opal_hwloc_base_given_cpus=NULL; @@ -58,9 +59,7 @@ hwloc_obj_type_t opal_hwloc_levels[] = { }; bool opal_hwloc_use_hwthreads_as_cpus = false; char *opal_hwloc_base_topo_file = NULL; -#endif -#if OPAL_HAVE_HWLOC static mca_base_var_enum_value_t hwloc_base_map[] = { {OPAL_HWLOC_BASE_MAP_NONE, "none"}, {OPAL_HWLOC_BASE_MAP_LOCAL_ONLY, "local_only"}, @@ -73,7 +72,6 @@ static mca_base_var_enum_value_t hwloc_failure_action[] = { {OPAL_HWLOC_BASE_MBFA_ERROR, "error"}, {0, NULL} }; -#endif static int opal_hwloc_base_register(mca_base_register_flag_t flags); static int opal_hwloc_base_open(mca_base_open_flag_t flags); @@ -82,15 +80,12 @@ static int opal_hwloc_base_close(void); MCA_BASE_FRAMEWORK_DECLARE(opal, hwloc, NULL, opal_hwloc_base_register, opal_hwloc_base_open, opal_hwloc_base_close, mca_hwloc_base_static_components, 0); -#if OPAL_HAVE_HWLOC static char *opal_hwloc_base_binding_policy = NULL; static bool opal_hwloc_base_bind_to_core = false; static bool opal_hwloc_base_bind_to_socket = false; -#endif static int opal_hwloc_base_register(mca_base_register_flag_t flags) { -#if OPAL_HAVE_HWLOC mca_base_var_enum_t *new_enum; int ret; @@ -172,142 +167,132 @@ static int opal_hwloc_base_register(mca_base_register_flag_t flags) MCA_BASE_VAR_TYPE_STRING, NULL, 0, 0, OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_READONLY, &opal_hwloc_base_topo_file); -#endif /* register parameters */ return OPAL_SUCCESS; } static int opal_hwloc_base_open(mca_base_open_flag_t flags) { + int rc; + opal_data_type_t tmp; + if (opal_hwloc_base_inited) { return OPAL_SUCCESS; } opal_hwloc_base_inited = true; -#if OPAL_HAVE_HWLOC - { - int rc; - opal_data_type_t tmp; + if (OPAL_SUCCESS != (rc = opal_hwloc_base_set_binding_policy(&opal_hwloc_binding_policy, + opal_hwloc_base_binding_policy))) { + return rc; + } - if (OPAL_SUCCESS != (rc = opal_hwloc_base_set_binding_policy(&opal_hwloc_binding_policy, - opal_hwloc_base_binding_policy))) { - return rc; + if (opal_hwloc_base_bind_to_core) { + opal_show_help("help-opal-hwloc-base.txt", "deprecated", true, + "--bind-to-core", "--bind-to core", + "hwloc_base_bind_to_core", "hwloc_base_binding_policy=core"); + /* set binding policy to core - error if something else already set */ + if (OPAL_BINDING_POLICY_IS_SET(opal_hwloc_binding_policy) && + OPAL_GET_BINDING_POLICY(opal_hwloc_binding_policy) != OPAL_BIND_TO_CORE) { + /* error - cannot redefine the default ranking policy */ + opal_show_help("help-opal-hwloc-base.txt", "redefining-policy", true, + "core", opal_hwloc_base_print_binding(opal_hwloc_binding_policy)); + return OPAL_ERR_BAD_PARAM; } + OPAL_SET_BINDING_POLICY(opal_hwloc_binding_policy, OPAL_BIND_TO_CORE); + } - if (opal_hwloc_base_bind_to_core) { - opal_show_help("help-opal-hwloc-base.txt", "deprecated", true, - "--bind-to-core", "--bind-to core", - "hwloc_base_bind_to_core", "hwloc_base_binding_policy=core"); - /* set binding policy to core - error if something else already set */ - if (OPAL_BINDING_POLICY_IS_SET(opal_hwloc_binding_policy) && - OPAL_GET_BINDING_POLICY(opal_hwloc_binding_policy) != OPAL_BIND_TO_CORE) { - /* error - cannot redefine the default ranking policy */ - opal_show_help("help-opal-hwloc-base.txt", "redefining-policy", true, - "core", opal_hwloc_base_print_binding(opal_hwloc_binding_policy)); - return OPAL_ERR_BAD_PARAM; - } - OPAL_SET_BINDING_POLICY(opal_hwloc_binding_policy, OPAL_BIND_TO_CORE); + if (opal_hwloc_base_bind_to_socket) { + opal_show_help("help-opal-hwloc-base.txt", "deprecated", true, + "--bind-to-socket", "--bind-to socket", + "hwloc_base_bind_to_socket", "hwloc_base_binding_policy=socket"); + /* set binding policy to socket - error if something else already set */ + if (OPAL_BINDING_POLICY_IS_SET(opal_hwloc_binding_policy) && + OPAL_GET_BINDING_POLICY(opal_hwloc_binding_policy) != OPAL_BIND_TO_SOCKET) { + /* error - cannot redefine the default ranking policy */ + opal_show_help("help-opal-hwloc-base.txt", "redefining-policy", true, + "socket", opal_hwloc_base_print_binding(opal_hwloc_binding_policy)); + return OPAL_ERR_SILENT; } + OPAL_SET_BINDING_POLICY(opal_hwloc_binding_policy, OPAL_BIND_TO_SOCKET); + } - if (opal_hwloc_base_bind_to_socket) { - opal_show_help("help-opal-hwloc-base.txt", "deprecated", true, - "--bind-to-socket", "--bind-to socket", - "hwloc_base_bind_to_socket", "hwloc_base_binding_policy=socket"); - /* set binding policy to socket - error if something else already set */ - if (OPAL_BINDING_POLICY_IS_SET(opal_hwloc_binding_policy) && - OPAL_GET_BINDING_POLICY(opal_hwloc_binding_policy) != OPAL_BIND_TO_SOCKET) { - /* error - cannot redefine the default ranking policy */ - opal_show_help("help-opal-hwloc-base.txt", "redefining-policy", true, - "socket", opal_hwloc_base_print_binding(opal_hwloc_binding_policy)); - return OPAL_ERR_SILENT; - } - OPAL_SET_BINDING_POLICY(opal_hwloc_binding_policy, OPAL_BIND_TO_SOCKET); + /* did the user provide a slot list? */ + if (NULL != opal_hwloc_base_slot_list) { + /* if we already were given a policy, then this is an error */ + if (OPAL_BINDING_POLICY_IS_SET(opal_hwloc_binding_policy)) { + opal_show_help("help-opal-hwloc-base.txt", "redefining-policy", true, + "socket", opal_hwloc_base_print_binding(opal_hwloc_binding_policy)); + return OPAL_ERR_SILENT; } + OPAL_SET_BINDING_POLICY(opal_hwloc_binding_policy, OPAL_BIND_TO_CPUSET); + } - /* did the user provide a slot list? */ - if (NULL != opal_hwloc_base_slot_list) { - /* if we already were given a policy, then this is an error */ - if (OPAL_BINDING_POLICY_IS_SET(opal_hwloc_binding_policy)) { - opal_show_help("help-opal-hwloc-base.txt", "redefining-policy", true, - "socket", opal_hwloc_base_print_binding(opal_hwloc_binding_policy)); - return OPAL_ERR_SILENT; - } + /* cpu allocation specification */ + if (NULL != opal_hwloc_base_cpu_set) { + if (!OPAL_BINDING_POLICY_IS_SET(opal_hwloc_binding_policy)) { + /* it is okay if a binding policy was already given - just ensure that + * we do bind to the given cpus if provided, otherwise this would be + * ignored if someone didn't also specify a binding policy + */ OPAL_SET_BINDING_POLICY(opal_hwloc_binding_policy, OPAL_BIND_TO_CPUSET); } + } - /* cpu allocation specification */ - if (NULL != opal_hwloc_base_cpu_set) { - if (!OPAL_BINDING_POLICY_IS_SET(opal_hwloc_binding_policy)) { - /* it is okay if a binding policy was already given - just ensure that - * we do bind to the given cpus if provided, otherwise this would be - * ignored if someone didn't also specify a binding policy - */ - OPAL_SET_BINDING_POLICY(opal_hwloc_binding_policy, OPAL_BIND_TO_CPUSET); - } - } - - /* if we are binding to hwthreads, then we must use hwthreads as cpus */ - if (OPAL_GET_BINDING_POLICY(opal_hwloc_binding_policy) == OPAL_BIND_TO_HWTHREAD) { - opal_hwloc_use_hwthreads_as_cpus = true; - } + /* if we are binding to hwthreads, then we must use hwthreads as cpus */ + if (OPAL_GET_BINDING_POLICY(opal_hwloc_binding_policy) == OPAL_BIND_TO_HWTHREAD) { + opal_hwloc_use_hwthreads_as_cpus = true; + } - /* to support tools such as ompi_info, add the components - * to a list - */ - if (OPAL_SUCCESS != - mca_base_framework_components_open(&opal_hwloc_base_framework, flags)) { - return OPAL_ERROR; - } + /* to support tools such as ompi_info, add the components + * to a list + */ + if (OPAL_SUCCESS != + mca_base_framework_components_open(&opal_hwloc_base_framework, flags)) { + return OPAL_ERROR; + } - /* declare the hwloc data types */ - tmp = OPAL_HWLOC_TOPO; - if (OPAL_SUCCESS != (rc = opal_dss.register_type(opal_hwloc_pack, - opal_hwloc_unpack, - (opal_dss_copy_fn_t)opal_hwloc_copy, - (opal_dss_compare_fn_t)opal_hwloc_compare, - (opal_dss_print_fn_t)opal_hwloc_print, - OPAL_DSS_STRUCTURED, - "OPAL_HWLOC_TOPO", &tmp))) { - return rc; - } + /* declare the hwloc data types */ + tmp = OPAL_HWLOC_TOPO; + if (OPAL_SUCCESS != (rc = opal_dss.register_type(opal_hwloc_pack, + opal_hwloc_unpack, + (opal_dss_copy_fn_t)opal_hwloc_copy, + (opal_dss_compare_fn_t)opal_hwloc_compare, + (opal_dss_print_fn_t)opal_hwloc_print, + OPAL_DSS_STRUCTURED, + "OPAL_HWLOC_TOPO", &tmp))) { + return rc; } -#endif return OPAL_SUCCESS; } static int opal_hwloc_base_close(void) { + int ret; if (!opal_hwloc_base_inited) { return OPAL_SUCCESS; } -#if OPAL_HAVE_HWLOC - { - int ret; - - /* no need to close the component as it was statically opened */ + /* no need to close the component as it was statically opened */ - /* for support of tools such as ompi_info */ - ret = mca_base_framework_components_close (&opal_hwloc_base_framework, NULL); - if (OPAL_SUCCESS != ret) { - return ret; - } - - /* free memory */ - if (NULL != opal_hwloc_my_cpuset) { - hwloc_bitmap_free(opal_hwloc_my_cpuset); - opal_hwloc_my_cpuset = NULL; - } + /* for support of tools such as ompi_info */ + ret = mca_base_framework_components_close (&opal_hwloc_base_framework, NULL); + if (OPAL_SUCCESS != ret) { + return ret; + } - /* destroy the topology */ - if (NULL != opal_hwloc_topology) { - opal_hwloc_base_free_topology(opal_hwloc_topology); - opal_hwloc_topology = NULL; - } + /* free memory */ + if (NULL != opal_hwloc_my_cpuset) { + hwloc_bitmap_free(opal_hwloc_my_cpuset); + opal_hwloc_my_cpuset = NULL; + } + /* destroy the topology */ + if (NULL != opal_hwloc_topology) { + opal_hwloc_base_free_topology(opal_hwloc_topology); + opal_hwloc_topology = NULL; } -#endif + /* All done */ opal_hwloc_base_inited = false; @@ -322,7 +307,7 @@ static void buffer_cleanup(void *value) { int i; opal_hwloc_print_buffers_t *ptr; - + if (NULL != value) { ptr = (opal_hwloc_print_buffers_t*)value; for (i=0; i < OPAL_HWLOC_PRINT_NUM_BUFS; i++) { @@ -335,7 +320,7 @@ opal_hwloc_print_buffers_t *opal_hwloc_get_print_buffer(void) { opal_hwloc_print_buffers_t *ptr; int ret, i; - + if (!fns_init) { /* setup the print_args function */ if (OPAL_SUCCESS != (ret = opal_tsd_key_create(&print_tsd_key, buffer_cleanup))) { @@ -343,10 +328,10 @@ opal_hwloc_print_buffers_t *opal_hwloc_get_print_buffer(void) } fns_init = true; } - + ret = opal_tsd_getspecific(print_tsd_key, (void**)&ptr); if (OPAL_SUCCESS != ret) return NULL; - + if (NULL == ptr) { ptr = (opal_hwloc_print_buffers_t*)malloc(sizeof(opal_hwloc_print_buffers_t)); for (i=0; i < OPAL_HWLOC_PRINT_NUM_BUFS; i++) { @@ -355,7 +340,7 @@ opal_hwloc_print_buffers_t *opal_hwloc_get_print_buffer(void) ptr->cntr = 0; ret = opal_tsd_setspecific(print_tsd_key, (void*)ptr); } - + return (opal_hwloc_print_buffers_t*) ptr; } @@ -441,11 +426,10 @@ char* opal_hwloc_base_print_locality(opal_hwloc_locality_t locality) ptr->buffers[ptr->cntr][idx++] = 'K'; ptr->buffers[ptr->cntr][idx++] = '\0'; } - + return ptr->buffers[ptr->cntr]; } -#if OPAL_HAVE_HWLOC static void obj_data_const(opal_hwloc_obj_data_t *ptr) { ptr->available = NULL; @@ -515,7 +499,7 @@ int opal_hwloc_base_set_binding_policy(opal_binding_policy_t *policy, char *spec int i; opal_binding_policy_t tmp; char **tmpvals, **quals; - + /* set default */ tmp = 0; @@ -586,4 +570,3 @@ int opal_hwloc_base_set_binding_policy(opal_binding_policy_t *policy, char *spec *policy = tmp; return OPAL_SUCCESS; } -#endif diff --git a/opal/mca/hwloc/base/hwloc_base_maffinity.c b/opal/mca/hwloc/base/hwloc_base_maffinity.c index 98ed295597e..26298bdca1d 100644 --- a/opal/mca/hwloc/base/hwloc_base_maffinity.c +++ b/opal/mca/hwloc/base/hwloc_base_maffinity.c @@ -1,9 +1,10 @@ /* - * Copyright (c) 2011-2012 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2011-2017 Cisco Systems, Inc. All rights reserved + * Copyright (c) 2016 Intel, Inc. All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -12,7 +13,7 @@ #include "opal/constants.h" -#include "opal/mca/hwloc/hwloc.h" +#include "opal/mca/hwloc/hwloc-internal.h" #include "opal/mca/hwloc/base/base.h" @@ -42,21 +43,21 @@ int opal_hwloc_base_set_process_membind_policy(void) policy = HWLOC_MEMBIND_BIND; flags = HWLOC_MEMBIND_STRICT; break; - + case OPAL_HWLOC_BASE_MAP_NONE: default: policy = HWLOC_MEMBIND_DEFAULT; flags = 0; break; } - + cpuset = hwloc_bitmap_alloc(); if (NULL == cpuset) { rc = OPAL_ERR_OUT_OF_RESOURCE; } else { int e; hwloc_get_cpubind(opal_hwloc_topology, cpuset, 0); - rc = hwloc_set_membind(opal_hwloc_topology, + rc = hwloc_set_membind(opal_hwloc_topology, cpuset, policy, flags); e = errno; hwloc_bitmap_free(cpuset); @@ -69,7 +70,7 @@ int opal_hwloc_base_set_process_membind_policy(void) rc = 0; } } - + return (0 == rc) ? OPAL_SUCCESS : OPAL_ERROR; } @@ -99,10 +100,10 @@ int opal_hwloc_base_memory_set(opal_hwloc_base_memory_segment_t *segments, } hwloc_get_cpubind(opal_hwloc_topology, cpuset, 0); for (i = 0; i < num_segments; ++i) { - if (0 != hwloc_set_area_membind(opal_hwloc_topology, + if (0 != hwloc_set_area_membind(opal_hwloc_topology, segments[i].mbs_start_addr, segments[i].mbs_len, cpuset, - HWLOC_MEMBIND_BIND, + HWLOC_MEMBIND_BIND, HWLOC_MEMBIND_STRICT)) { rc = OPAL_ERROR; msg = "hwloc_set_area_membind() failure"; @@ -151,10 +152,10 @@ int opal_hwloc_base_membind(opal_hwloc_base_memory_segment_t *segs, } hwloc_bitmap_set(cpuset, node_id); for(i = 0; i < count; i++) { - if (0 != hwloc_set_area_membind(opal_hwloc_topology, + if (0 != hwloc_set_area_membind(opal_hwloc_topology, segs[i].mbs_start_addr, segs[i].mbs_len, cpuset, - HWLOC_MEMBIND_BIND, + HWLOC_MEMBIND_BIND, HWLOC_MEMBIND_STRICT)) { rc = OPAL_ERROR; msg = "hwloc_set_area_membind() failure"; diff --git a/opal/mca/hwloc/base/hwloc_base_proc_mempolicy.c b/opal/mca/hwloc/base/hwloc_base_proc_mempolicy.c index d6d2d46d1a1..54c55648a78 100644 --- a/opal/mca/hwloc/base/hwloc_base_proc_mempolicy.c +++ b/opal/mca/hwloc/base/hwloc_base_proc_mempolicy.c @@ -1,9 +1,9 @@ /* - * Copyright (c) 2011 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2011-2017 Cisco Systems, Inc. All rights reserved * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -12,7 +12,7 @@ #include "opal/constants.h" -#include "opal/mca/hwloc/hwloc.h" +#include "opal/mca/hwloc/hwloc-internal.h" #include "opal/mca/hwloc/base/base.h" @@ -42,21 +42,21 @@ int opal_hwloc_base_set_process_membind_policy(void) policy = HWLOC_MEMBIND_BIND; flags = HWLOC_MEMBIND_STRICT; break; - + case OPAL_HWLOC_BASE_MAP_NONE: default: policy = HWLOC_MEMBIND_DEFAULT; flags = 0; break; } - + cpuset = hwloc_bitmap_alloc(); if (NULL == cpuset) { rc = OPAL_ERR_OUT_OF_RESOURCE; } else { int e; hwloc_get_cpubind(opal_hwloc_topology, cpuset, 0); - rc = hwloc_set_membind(opal_hwloc_topology, + rc = hwloc_set_membind(opal_hwloc_topology, cpuset, policy, flags); e = errno; hwloc_bitmap_free(cpuset); @@ -69,6 +69,6 @@ int opal_hwloc_base_set_process_membind_policy(void) rc = 0; } } - + return (0 == rc) ? OPAL_SUCCESS : OPAL_ERROR; } diff --git a/opal/mca/hwloc/base/hwloc_base_util.c b/opal/mca/hwloc/base/hwloc_base_util.c index 8dfd3044303..e91c18aa838 100644 --- a/opal/mca/hwloc/base/hwloc_base_util.c +++ b/opal/mca/hwloc/base/hwloc_base_util.c @@ -1,3 +1,4 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ /* * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana * University Research and Technology @@ -5,20 +6,20 @@ * Copyright (c) 2004-2006 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. - * Copyright (c) 2011-2012 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2012-2013 Los Alamos National Security, LLC. + * Copyright (c) 2011-2017 Cisco Systems, Inc. All rights reserved + * Copyright (c) 2012-2015 Los Alamos National Security, LLC. * All rights reserved. * Copyright (c) 2013-2014 Intel, Inc. All rights reserved. - * Copyright (c) 2015 Research Organization for Information Science + * Copyright (c) 2015-2016 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -40,7 +41,7 @@ #include "opal/util/show_help.h" #include "opal/threads/tsd.h" -#include "opal/mca/hwloc/hwloc.h" +#include "opal/mca/hwloc/hwloc-internal.h" #include "opal/mca/hwloc/base/base.h" /* @@ -56,10 +57,10 @@ hwloc_obj_t opal_hwloc_base_get_pu(hwloc_topology_t topo, { hwloc_obj_type_t obj_type = HWLOC_OBJ_CORE; hwloc_obj_t obj; - + /* hwloc isn't able to find cores on all platforms. Example: PPC64 running RHEL 5.4 (linux kernel 2.6.18) only reports NUMA - nodes and PU's. Fine. + nodes and PU's. Fine. However, note that hwloc_get_obj_by_type() will return NULL in 2 (effectively) different cases: @@ -70,7 +71,7 @@ hwloc_obj_t opal_hwloc_base_get_pu(hwloc_topology_t topo, So first we have to see if we can find *any* cores by looking for the 0th core. If we find it, then try to find the Nth core. Otherwise, try to find the Nth PU. */ - if (NULL == hwloc_get_obj_by_type(topo, HWLOC_OBJ_CORE, 0)) { + if (opal_hwloc_use_hwthreads_as_cpus || (NULL == hwloc_get_obj_by_type(topo, HWLOC_OBJ_CORE, 0))) { obj_type = HWLOC_OBJ_PU; } @@ -90,10 +91,10 @@ hwloc_obj_t opal_hwloc_base_get_pu(hwloc_topology_t topo, } return obj; } - + opal_output_verbose(5, opal_hwloc_base_framework.framework_output, "Searching for %d LOGICAL PU", lid); - + /* Now do the actual lookup. */ obj = hwloc_get_obj_by_type(topo, obj_type, lid); OPAL_OUTPUT_VERBOSE((5, opal_hwloc_base_framework.framework_output, @@ -246,7 +247,7 @@ int opal_hwloc_base_get_topology(void) if (NULL == opal_hwloc_base_topo_file) { if (0 != hwloc_topology_init(&opal_hwloc_topology) || - 0 != hwloc_topology_set_flags(opal_hwloc_topology, + 0 != hwloc_topology_set_flags(opal_hwloc_topology, (HWLOC_TOPOLOGY_FLAG_WHOLE_SYSTEM | HWLOC_TOPOLOGY_FLAG_IO_DEVICES)) || 0 != hwloc_topology_load(opal_hwloc_topology)) { @@ -379,8 +380,8 @@ void opal_hwloc_base_get_local_cpuset(void) } /* get the cpus we are bound to */ - if (hwloc_get_cpubind(opal_hwloc_topology, - opal_hwloc_my_cpuset, + if (hwloc_get_cpubind(opal_hwloc_topology, + opal_hwloc_my_cpuset, HWLOC_CPUBIND_PROCESS) < 0) { /* we are not bound - use the root's available cpuset */ root = hwloc_get_root_obj(opal_hwloc_topology); @@ -398,7 +399,7 @@ int opal_hwloc_base_report_bind_failure(const char *file, if (!already_reported && OPAL_HWLOC_BASE_MBFA_SILENT != opal_hwloc_base_mbfa) { - char hostname[64]; + char hostname[OPAL_MAXHOSTNAMELEN]; gethostname(hostname, sizeof(hostname)); opal_show_help("help-opal-hwloc-base.txt", "mbind failure", true, @@ -491,6 +492,13 @@ static void df_search_cores(hwloc_obj_t obj, unsigned int *cnt) obj->userdata = (void*)data; } if (NULL == opal_hwloc_base_cpu_set) { + if (!hwloc_bitmap_intersects(obj->cpuset, obj->allowed_cpuset)) { + /* + * do not count not allowed cores (e.g. cores with zero allowed PU) + * if SMT is enabled, do count cores with at least one allowed hwthread + */ + return; + } data->npus = 1; } *cnt += data->npus; @@ -759,7 +767,7 @@ static hwloc_obj_t df_search(hwloc_topology_t topo, return obj; } } - + return NULL; } @@ -864,7 +872,7 @@ static hwloc_obj_t df_search_min_bound(hwloc_topology_t topo, data = OBJ_NEW(opal_hwloc_obj_data_t); start->userdata = data; } - + OPAL_OUTPUT_VERBOSE((5, opal_hwloc_base_framework.framework_output, "hwloc:base:min_bound_under_obj object %s:%u nbound %u min %u", hwloc_obj_type_string(target), start->logical_index, @@ -892,7 +900,7 @@ static hwloc_obj_t df_search_min_bound(hwloc_topology_t topo, break; } } - + return save; } @@ -1058,7 +1066,7 @@ static int socket_to_cpu_set(char *cpus, res = opal_hwloc_base_get_available_cpus(topo, obj); hwloc_bitmap_or(cpumask, cpumask, res); break; - + case 2: /* range of sockets was given */ lower_range = atoi(range[0]); upper_range = atoi(range[1]); @@ -1098,7 +1106,7 @@ static int socket_core_to_cpu_set(char *socket_core_list, socket_core = opal_argv_split(socket_core_list, ':'); socket_id = atoi(socket_core[0]); - + /* get the object for this socket id */ if (NULL == (socket = opal_hwloc_base_get_obj_by_type(topo, HWLOC_OBJ_SOCKET, 0, socket_id, rtype))) { @@ -1153,7 +1161,7 @@ static int socket_core_to_cpu_set(char *socket_core_list, } opal_argv_free(list); break; - + case 2: /* range of core id's was given */ opal_output_verbose(5, opal_hwloc_base_framework.framework_output, "range of cores given: start %s stop %s", @@ -1176,7 +1184,7 @@ static int socket_core_to_cpu_set(char *socket_core_list, hwloc_bitmap_or(cpumask, cpumask, res); } break; - + default: opal_argv_free(range); opal_argv_free(socket_core); @@ -1210,14 +1218,14 @@ int opal_hwloc_base_slot_list_parse(const char *slot_str, if (NULL == slot_str || 0 == strlen(slot_str)) { return OPAL_ERR_BAD_PARAM; } - + opal_output_verbose(5, opal_hwloc_base_framework.framework_output, "slot assignment: slot_list == %s", slot_str); /* split at ';' */ item = opal_argv_split(slot_str, ';'); - + /* start with a clean mask */ hwloc_bitmap_zero(cpumask); /* loop across the items and accumulate the mask */ @@ -1299,7 +1307,7 @@ int opal_hwloc_base_slot_list_parse(const char *slot_str, } opal_argv_free(list); break; - + case 2: /* range of core id's was given */ lower_range = atoi(range[0]); upper_range = atoi(range[1]); @@ -1317,7 +1325,7 @@ int opal_hwloc_base_slot_list_parse(const char *slot_str, hwloc_bitmap_or(cpumask, cpumask, pucpus); } break; - + default: opal_argv_free(range); opal_argv_free(item); @@ -1349,7 +1357,7 @@ opal_hwloc_locality_t opal_hwloc_base_get_relative_locality(hwloc_topology_t top * NOTE: we may alter that latter part as hwloc's ability to * sense multi-cu, multi-cluster systems grows */ - locality = OPAL_PROC_ON_NODE; + locality = OPAL_PROC_ON_NODE | OPAL_PROC_ON_HOST | OPAL_PROC_ON_CU | OPAL_PROC_ON_CLUSTER; /* if either cpuset is NULL, then that isn't bound */ if (NULL == cpuset1 || NULL == cpuset2) { @@ -1397,25 +1405,25 @@ opal_hwloc_locality_t opal_hwloc_base_get_relative_locality(hwloc_topology_t top shared = true; switch(obj->type) { case HWLOC_OBJ_NODE: - locality = OPAL_PROC_ON_NUMA; + locality |= OPAL_PROC_ON_NUMA; break; case HWLOC_OBJ_SOCKET: - locality = OPAL_PROC_ON_SOCKET; + locality |= OPAL_PROC_ON_SOCKET; break; case HWLOC_OBJ_CACHE: if (3 == obj->attr->cache.depth) { - locality = OPAL_PROC_ON_L3CACHE; + locality |= OPAL_PROC_ON_L3CACHE; } else if (2 == obj->attr->cache.depth) { - locality = OPAL_PROC_ON_L2CACHE; + locality |= OPAL_PROC_ON_L2CACHE; } else { - locality = OPAL_PROC_ON_L1CACHE; + locality |= OPAL_PROC_ON_L1CACHE; } break; case HWLOC_OBJ_CORE: - locality = OPAL_PROC_ON_CORE; + locality |= OPAL_PROC_ON_CORE; break; case HWLOC_OBJ_PU: - locality = OPAL_PROC_ON_HWTHREAD; + locality |= OPAL_PROC_ON_HWTHREAD; break; default: /* just ignore it */ @@ -1505,7 +1513,7 @@ static char *hwloc_getline(FILE *fp) buff = strdup(input); return buff; } - + return NULL; } @@ -1700,29 +1708,26 @@ static char *bitmap2rangestr(int bitmap) /* * Make a map of socket/core/hwthread tuples */ -static int build_map(int *num_sockets_arg, int *num_cores_arg, +static int build_map(int *num_sockets_arg, int *num_cores_arg, hwloc_cpuset_t cpuset, int ***map, hwloc_topology_t topo) { - static int num_sockets = -1, num_cores = -1; + int num_sockets, num_cores; int socket_index, core_index, pu_index; hwloc_obj_t socket, core, pu; int **data; - /* Find out how many sockets we have (cached so that we don't have - to look this up every time) */ - if (num_sockets < 0) { - num_sockets = hwloc_get_nbobjs_by_type(topo, HWLOC_OBJ_SOCKET); - /* some systems (like the iMac) only have one - * socket and so don't report a socket - */ - if (0 == num_sockets) { - num_sockets = 1; - } - /* Lazy: take the total number of cores that we have in the - topology; that'll be more than the max number of cores - under any given socket */ - num_cores = hwloc_get_nbobjs_by_type(topo, HWLOC_OBJ_CORE); + /* Find out how many sockets we have */ + num_sockets = hwloc_get_nbobjs_by_type(topo, HWLOC_OBJ_SOCKET); + /* some systems (like the iMac) only have one + * socket and so don't report a socket + */ + if (0 == num_sockets) { + num_sockets = 1; } + /* Lazy: take the total number of cores that we have in the + topology; that'll be more than the max number of cores + under any given socket */ + num_cores = hwloc_get_nbobjs_by_type(topo, HWLOC_OBJ_CORE); *num_sockets_arg = num_sockets; *num_cores_arg = num_cores; @@ -1744,11 +1749,11 @@ static int build_map(int *num_sockets_arg, int *num_cores_arg, the socket/core/pu triples */ for (pu_index = 0, pu = hwloc_get_obj_inside_cpuset_by_type(topo, - cpuset, HWLOC_OBJ_PU, + cpuset, HWLOC_OBJ_PU, pu_index); NULL != pu; pu = hwloc_get_obj_inside_cpuset_by_type(topo, - cpuset, HWLOC_OBJ_PU, + cpuset, HWLOC_OBJ_PU, ++pu_index)) { /* Go upward and find the core this PU belongs to */ core = pu; @@ -1791,7 +1796,7 @@ int opal_hwloc_base_cset2str(char *str, int len, int ret, socket_index, core_index; char tmp[BUFSIZ]; const int stmp = sizeof(tmp) - 1; - int **map; + int **map=NULL; hwloc_obj_t root; opal_hwloc_topo_data_t *sum; @@ -1819,7 +1824,6 @@ int opal_hwloc_base_cset2str(char *str, int len, if (OPAL_SUCCESS != (ret = build_map(&num_sockets, &num_cores, cpuset, &map, topo))) { return ret; } - /* Iterate over the data matrix and build up the string */ first = true; for (socket_index = 0; socket_index < num_sockets; ++socket_index) { @@ -1830,21 +1834,25 @@ int opal_hwloc_base_cset2str(char *str, int len, } first = false; - snprintf(tmp, stmp, "socket %d[core %d[hwt %s]]", + snprintf(tmp, stmp, "socket %d[core %d[hwt %s]]", socket_index, core_index, bitmap2rangestr(map[socket_index][core_index])); strncat(str, tmp, len - strlen(str)); } } } - free(map[0]); - free(map); + if (NULL != map) { + if (NULL != map[0]) { + free(map[0]); + } + free(map); + } return OPAL_SUCCESS; } /* - * Make a prettyprint string for a cset in a map format. + * Make a prettyprint string for a cset in a map format. * Example: [B./..] * Key: [] - signifies socket * / - divider between cores @@ -1885,18 +1893,18 @@ int opal_hwloc_base_cset2mapstr(char *str, int len, /* Iterate over all existing sockets */ for (socket = hwloc_get_obj_by_type(topo, HWLOC_OBJ_SOCKET, 0); - NULL != socket; + NULL != socket; socket = socket->next_cousin) { strncat(str, "[", len - strlen(str)); /* Iterate over all existing cores in this socket */ core_index = 0; for (core = hwloc_get_obj_inside_cpuset_by_type(topo, - socket->cpuset, + socket->cpuset, HWLOC_OBJ_CORE, core_index); - NULL != core; + NULL != core; core = hwloc_get_obj_inside_cpuset_by_type(topo, - socket->cpuset, + socket->cpuset, HWLOC_OBJ_CORE, ++core_index)) { if (core_index > 0) { strncat(str, "/", len - strlen(str)); @@ -1905,11 +1913,11 @@ int opal_hwloc_base_cset2mapstr(char *str, int len, /* Iterate over all existing PUs in this core */ pu_index = 0; for (pu = hwloc_get_obj_inside_cpuset_by_type(topo, - core->cpuset, + core->cpuset, HWLOC_OBJ_PU, pu_index); - NULL != pu; + NULL != pu; pu = hwloc_get_obj_inside_cpuset_by_type(topo, - core->cpuset, + core->cpuset, HWLOC_OBJ_PU, ++pu_index)) { /* Is this PU in the cpuset? */ @@ -2015,7 +2023,7 @@ static void sort_by_dist(hwloc_topology_t topo, char* device_name, opal_list_t * } } -static int find_devices(hwloc_topology_t topo, char** device_name) +static int find_devices(hwloc_topology_t topo, char** device_name) { hwloc_obj_t device_obj = NULL; int count = 0; @@ -2049,7 +2057,7 @@ int opal_hwloc_get_sorted_numa_list(hwloc_topology_t topo, char* device_name, op item = opal_list_get_next(item)) { sum = (opal_hwloc_summary_t*)item; if (HWLOC_OBJ_NODE == sum->type) { - if (opal_list_get_size(&sum->sorted_by_dist_list) > 0) { + if (opal_list_get_size(&sum->sorted_by_dist_list) > 0) { OPAL_LIST_FOREACH(numa, &(sum->sorted_by_dist_list), opal_rmaps_numa_node_t) { copy_numa = OBJ_NEW(opal_rmaps_numa_node_t); copy_numa->index = numa->index; @@ -2095,7 +2103,7 @@ char* opal_hwloc_base_get_topo_signature(hwloc_topology_t topo) char *sig=NULL, *arch=NULL; hwloc_obj_t obj; unsigned i; - + nnuma = opal_hwloc_base_get_nbobjs_by_type(topo, HWLOC_OBJ_NODE, 0, OPAL_HWLOC_AVAILABLE); nsocket = opal_hwloc_base_get_nbobjs_by_type(topo, HWLOC_OBJ_SOCKET, 0, OPAL_HWLOC_AVAILABLE); nl3 = opal_hwloc_base_get_nbobjs_by_type(topo, HWLOC_OBJ_CACHE, 3, OPAL_HWLOC_AVAILABLE); diff --git a/opal/mca/hwloc/configure.m4 b/opal/mca/hwloc/configure.m4 index b5d19e92a41..1f933c34f30 100644 --- a/opal/mca/hwloc/configure.m4 +++ b/opal/mca/hwloc/configure.m4 @@ -1,10 +1,10 @@ dnl -*- shell-script -*- dnl -dnl Copyright (c) 2010-2013 Cisco Systems, Inc. All rights reserved. +dnl Copyright (c) 2010-2017 Cisco Systems, Inc. All rights reserved dnl $COPYRIGHT$ -dnl +dnl dnl Additional copyrights may follow -dnl +dnl dnl $HEADER$ dnl @@ -13,7 +13,7 @@ dnl # their priorities in their configure.m4 files. They must also set # the shell variable $opal_hwloc_base_include to a header file # name (relative to the top OMPI source directory) that will be -# included in opal/mca/hwloc/hwloc.h. +# included in opal/mca/hwloc/hwloc-internal.h. dnl We only want one winning component (vs. STOP_AT_FIRST_PRIORITY, dnl which will allow all components of the same priority who succeed to @@ -24,9 +24,7 @@ m4_define(MCA_opal_hwloc_CONFIGURE_MODE, STOP_AT_FIRST) # available. As such, we may need to artificially force this # framework to be configured first. Hence, we move the entirety of # the hwloc framework's m4 to a separate macro and AC REQUIRE it. -# Other components can do this as well. This will guarantee that -# OPAL_HAVE_HWLOC is set to 0 or 1 *before* some component needs to -# check it. +# Other components can do this as well. AC_DEFUN([MCA_opal_hwloc_CONFIG],[ # Use a crude shell variable to know whether this component is @@ -48,10 +46,10 @@ AC_DEFUN([MCA_opal_hwloc_CONFIG_REQUIRE],[ [echo " " echo "==> Pre-emptively configuring the hwloc framework to satisfy dependencies."]) - # See if we want hwloc, and if so, internal vs external + # See if we want internal vs external hwloc AC_ARG_WITH(hwloc, AC_HELP_STRING([--with-hwloc(=DIR)], - [Build hwloc support. DIR can take one of three values: "internal", "external", or a valid directory name. "internal" (or no DIR value) forces Open MPI to use its internal copy of hwloc. "external" forces Open MPI to use an external installation of hwloc. Supplying a valid directory name also forces Open MPI to use an external installation of hwloc, and adds DIR/include, DIR/lib, and DIR/lib64 to the search path for headers and libraries.])) + [Build hwloc support. DIR can take one of three values: "internal", "external", or a valid directory name. "internal" (or no DIR value) forces Open MPI to use its internal copy of hwloc. "external" forces Open MPI to use an external installation of hwloc. Supplying a valid directory name also forces Open MPI to use an external installation of hwloc, and adds DIR/include, DIR/lib, and DIR/lib64 to the search path for headers and libraries. Note that Open MPI no longer supports --without-hwloc.])) # Whether to enable or disable PCI support in embedded hwloc # support. @@ -70,12 +68,18 @@ AC_DEFUN([MCA_opal_hwloc_CONFIG_REQUIRE],[ enable_pci=]) ]) - # set defaults of not having any support + # check for error + AS_IF([test "$with_hwloc" = "no"], + [AC_MSG_WARN([Open MPI requires HWLOC support. It can be built]) + AC_MSG_WARN([with either its own internal copy of HWLOC, or with]) + AC_MSG_WARN([an external copy that you supply.]) + AC_MSG_ERROR([Cannot continue])]) + + # set default opal_hwloc_base_enable_xml=0 - OPAL_HAVE_HWLOC=0 - # Configure all the components - always have to do this, even if - # we configure --without-hwloc. Note that instead of passing in + # Configure all the components - always have to do this. Note that + # instead of passing in # the traditional $1 and $2 as the first arguments, we hard-code # "opal" and "hwloc", because this macro is invoked via AC # REQUIRE. @@ -88,9 +92,8 @@ AC_DEFUN([MCA_opal_hwloc_CONFIG_REQUIRE],[ # component's configure.m4 output. echo " " - # Unless --with-hwloc[=] was given, it's ok to have no hwloc - # component. - AS_IF([test "$with_hwloc" = "no" -o "$with_hwloc" = ""], [], + # If we aren't given a specific component, then we must find one + AS_IF([test "$with_hwloc" = ""], [], [ # STOP_AT_FIRST_PRIORITY will guarantee that we find at most # one. We need to check here that we found *at least* one. AS_IF([test "$MCA_opal_hwloc_STATIC_COMPONENTS" = ""], @@ -101,8 +104,6 @@ AC_DEFUN([MCA_opal_hwloc_CONFIG_REQUIRE],[ # If we have a winning component, do some more logic AS_IF([test "$MCA_opal_hwloc_STATIC_COMPONENTS" != ""], [ # We had a winner -- w00t! - OPAL_HAVE_HWLOC=1 - # The winning component will have told us where their header file # is located AC_MSG_CHECKING([for winning hwloc component header file]) @@ -142,10 +143,6 @@ AC_DEFUN([MCA_opal_hwloc_CONFIG_REQUIRE],[ OPAL_VAR_SCOPE_POP ]) - AM_CONDITIONAL(OPAL_HAVE_HWLOC, test $OPAL_HAVE_HWLOC -eq 1) - AC_DEFINE_UNQUOTED(OPAL_HAVE_HWLOC, $OPAL_HAVE_HWLOC, - [Whether we have hwloc support or not]) - # Similar to above, if this m4 is being invoked "early" via AC # REQUIRE, print out a nice banner that we have now finished # pre-emption and are returning to the Normal Order Of Things. diff --git a/opal/mca/hwloc/external/Makefile.am b/opal/mca/hwloc/external/Makefile.am index 3dc62c8c2d6..81f551b3883 100644 --- a/opal/mca/hwloc/external/Makefile.am +++ b/opal/mca/hwloc/external/Makefile.am @@ -1,9 +1,9 @@ # # Copyright (c) 2011-2012 Cisco Systems, Inc. All rights reserved. # $COPYRIGHT$ -# +# # Additional copyrights may follow -# +# # $HEADER$ # diff --git a/opal/mca/hwloc/external/configure.m4 b/opal/mca/hwloc/external/configure.m4 index 07868e8d4e2..c3f17deb4b6 100644 --- a/opal/mca/hwloc/external/configure.m4 +++ b/opal/mca/hwloc/external/configure.m4 @@ -1,7 +1,7 @@ # -*- shell-script -*- # -# Copyright (c) 2009-2015 Cisco Systems, Inc. All rights reserved. -# Copyright (c) 2014 Research Organization for Information Science +# Copyright (c) 2009-2017 Cisco Systems, Inc. All rights reserved +# Copyright (c) 2014-2016 Research Organization for Information Science # and Technology (RIST). All rights reserved. # # $COPYRIGHT$ @@ -38,7 +38,7 @@ AC_DEFUN([MCA_opal_hwloc_external_POST_CONFIG],[ [Version of hwloc]) # Set this variable so that the framework m4 knows what - # file to include in opal/mca/hwloc/hwloc.h + # file to include in opal/mca/hwloc/hwloc-internal.h opal_hwloc_external_basedir=opal/mca/hwloc/external opal_hwloc_base_include="$opal_hwloc_external_basedir/external.h" @@ -61,22 +61,17 @@ AC_DEFUN([MCA_opal_hwloc_external_POST_CONFIG],[ # OPAL_HWLOC_WANT_VERBS_HELPER is set, that file will # include the external hwloc/openfabrics-verbs.h file (via # the MCA_hwloc_external_openfabrics_helper define). + AS_IF([test "$opal_hwloc_dir" != ""], + [opal_hwloc_include="$opal_hwloc_dir/include/hwloc.h" + opal_hwloc_openfabrics_include="$opal_hwloc_dir/include/hwloc/openfabrics-verbs.h"], + [opal_hwloc_include="hwloc.h" + opal_hwloc_openfabrics_include="hwloc/openfabrics-verbs.h"]) AC_DEFINE_UNQUOTED(MCA_hwloc_external_header, - ["$opal_hwloc_dir/include/hwloc.h"], + ["$opal_hwloc_include"], [Location of external hwloc header]) AC_DEFINE_UNQUOTED(MCA_hwloc_external_openfabrics_header, - ["$opal_hwloc_dir/include/hwloc/openfabrics-verbs.h"], - [Location of external hwloc header]) - - # These flags need to get passed to the wrapper compilers - # (this is unnecessary for the internal/embedded hwloc) - - # Finally, add some flags to the wrapper compiler if we're - # building with developer headers so that our headers can - # be found. - hwloc_external_WRAPPER_EXTRA_CPPFLAGS=$opal_hwloc_external_CPPFLAGS - hwloc_external_WRAPPER_EXTRA_LDFLAGS=$opal_hwloc_external_LDFLAGS - hwloc_external_WRAPPER_EXTRA_LIBS=$opal_hwloc_external_LIBS + ["$opal_hwloc_openfabrics_include"], + [Location of external hwloc OpenFabrics header]) ]) OPAL_VAR_SCOPE_POP ])dnl @@ -183,6 +178,17 @@ AC_DEFUN([MCA_opal_hwloc_external_CONFIG],[ AC_MSG_ERROR([Cannot continue])]) AS_IF([test "$opal_hwloc_dir" != ""], [CFLAGS=$opal_hwloc_external_CFLAGS_save]) + + # These flags need to get passed to the wrapper compilers + # (this is unnecessary for the internal/embedded hwloc) + + # Finally, add some flags to the wrapper compiler if we're + # building with developer headers so that our headers can + # be found. + hwloc_external_WRAPPER_EXTRA_CPPFLAGS=$opal_hwloc_external_CPPFLAGS + hwloc_external_WRAPPER_EXTRA_LDFLAGS=$opal_hwloc_external_LDFLAGS + hwloc_external_WRAPPER_EXTRA_LIBS=$opal_hwloc_external_LIBS + $1], [$2]) diff --git a/opal/mca/hwloc/external/external.h b/opal/mca/hwloc/external/external.h index 25071e5cb3e..ed76ece744e 100644 --- a/opal/mca/hwloc/external/external.h +++ b/opal/mca/hwloc/external/external.h @@ -1,14 +1,16 @@ /* - * Copyright (c) 2011-2014 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2011-2017 Cisco Systems, Inc. All rights reserved + * Copyright (c) 2016 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ * * When this component is used, this file is included in the rest of - * the OPAL/ORTE/OMPI code base via opal/mca/hwloc/hwloc.h. As such, + * the OPAL/ORTE/OMPI code base via opal/mca/hwloc/hwloc-internal.h. As such, * this header represents the public interface to this static component. */ @@ -25,7 +27,7 @@ BEGIN_C_DECLS functions in that file that invoke ibv_* functions. Some linkers (e.g., Solaris Studio Compilers) will instantiate those static inline functions even if we don't use them, and therefore we need - to be able to resolve the ibv_* symbols at link time. + to be able to resolve the ibv_* symbols at link time. Since -libverbs is only specified in places where we use other ibv_* functions (e.g., the OpenFabrics-based BTLs), that means that diff --git a/opal/mca/hwloc/external/hwloc_external_component.c b/opal/mca/hwloc/external/hwloc_external_component.c index 690beab2bf8..025267bbe53 100644 --- a/opal/mca/hwloc/external/hwloc_external_component.c +++ b/opal/mca/hwloc/external/hwloc_external_component.c @@ -1,20 +1,20 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ /* - * Copyright (c) 2011 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2011-2017 Cisco Systems, Inc. All rights reserved * Copyright (c) 2015 Los Alamos National Security, LLC. All rights * reserved. * * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ #include "opal_config.h" #include "opal/constants.h" -#include "opal/mca/hwloc/hwloc.h" +#include "opal/mca/hwloc/hwloc-internal.h" /* * Public string showing the sysinfo ompi_linux component version number diff --git a/opal/mca/hwloc/hwloc-internal.h b/opal/mca/hwloc/hwloc-internal.h new file mode 100644 index 00000000000..a074be86e03 --- /dev/null +++ b/opal/mca/hwloc/hwloc-internal.h @@ -0,0 +1,223 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ +/* + * Copyright (c) 2011-2012 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2013-2015 Intel, Inc. All rights reserved. + * Copyright (c) 2015 Los Alamos National Security, LLC. All rights + * reserved. + * + * $COPYRIGHT$ + * + * Additional copyrights may follow + */ + +#ifndef OPAL_MCA_HWLOC_H +#define OPAL_MCA_HWLOC_H + +#include "opal_config.h" + +#ifdef HAVE_SYS_TYPES_H +#include +#endif +#ifdef HAVE_SYS_TIME_H +#include +#endif +#include +#include + +#include "opal/class/opal_list.h" +#include "opal/class/opal_value_array.h" + +#include "opal/mca/mca.h" +#include "opal/mca/base/base.h" + +BEGIN_C_DECLS + +#ifdef WIN32 +#define WIN32_LEAN_AND_MEAN +#include +#undef WIN32_LEAN_AND_MEAN +typedef unsigned char u_char; +typedef unsigned short u_short; +#endif + +/** + * Structure for hwloc components. + */ +struct opal_hwloc_base_component_2_0_0_t { + /** MCA base component */ + mca_base_component_t base_version; + /** MCA base data */ + mca_base_component_data_t base_data; +}; + +/** + * Convenience typedef + */ +typedef struct opal_hwloc_base_component_2_0_0_t opal_hwloc_base_component_2_0_0_t; +typedef struct opal_hwloc_base_component_2_0_0_t opal_hwloc_component_t; + +/** + * Macro for use in components that are of type hwloc + */ +#define OPAL_HWLOC_BASE_VERSION_2_0_0 \ + OPAL_MCA_BASE_VERSION_2_1_0("hwloc", 2, 0, 0) + + +/* ******************************************************************** */ +/* Although we cannot bind if --without-hwloc is set, + * we do still need to know some basic locality data + * like on_node and not_on_node. So ensure that we + * always have access to that much info by including + * the definitions here, outside the if-have-hwloc test + */ +typedef uint16_t opal_hwloc_locality_t; +#define OPAL_HWLOC_LOCALITY_T OPAL_UINT16 + +/** Process locality definitions */ +enum { + OPAL_PROC_LOCALITY_UNKNOWN = 0x0000, + OPAL_PROC_NON_LOCAL = 0x8000, + OPAL_PROC_ON_CLUSTER = 0x0001, + OPAL_PROC_ON_CU = 0x0002, + OPAL_PROC_ON_HOST = 0x0004, + OPAL_PROC_ON_BOARD = 0x0008, + OPAL_PROC_ON_NODE = 0x000c, // same host and board + OPAL_PROC_ON_NUMA = 0x0010, + OPAL_PROC_ON_SOCKET = 0x0020, + OPAL_PROC_ON_L3CACHE = 0x0040, + OPAL_PROC_ON_L2CACHE = 0x0080, + OPAL_PROC_ON_L1CACHE = 0x0100, + OPAL_PROC_ON_CORE = 0x0200, + OPAL_PROC_ON_HWTHREAD = 0x0400, + OPAL_PROC_ALL_LOCAL = 0x0fff, +}; + +/** Process locality macros */ +#define OPAL_PROC_ON_LOCAL_CLUSTER(n) (!!((n) & OPAL_PROC_ON_CLUSTER)) +#define OPAL_PROC_ON_LOCAL_CU(n) (!!((n) & OPAL_PROC_ON_CU)) +#define OPAL_PROC_ON_LOCAL_HOST(n) (!!((n) & OPAL_PROC_ON_HOST)) +#define OPAL_PROC_ON_LOCAL_BOARD(n) (!!((n) & OPAL_PROC_ON_BOARD)) +#define OPAL_PROC_ON_LOCAL_NODE(n) (OPAL_PROC_ON_LOCAL_HOST(n) && OPAL_PROC_ON_LOCAL_BOARD(n)) +#define OPAL_PROC_ON_LOCAL_NUMA(n) (!!((n) & OPAL_PROC_ON_NUMA)) +#define OPAL_PROC_ON_LOCAL_SOCKET(n) (!!((n) & OPAL_PROC_ON_SOCKET)) +#define OPAL_PROC_ON_LOCAL_L3CACHE(n) (!!((n) & OPAL_PROC_ON_L3CACHE)) +#define OPAL_PROC_ON_LOCAL_L2CACHE(n) (!!((n) & OPAL_PROC_ON_L2CACHE)) +#define OPAL_PROC_ON_LOCAL_L1CACHE(n) (!!((n) & OPAL_PROC_ON_L1CACHE)) +#define OPAL_PROC_ON_LOCAL_CORE(n) (!!((n) & OPAL_PROC_ON_CORE)) +#define OPAL_PROC_ON_LOCAL_HWTHREAD(n) (!!((n) & OPAL_PROC_ON_HWTHREAD)) + +/* ******************************************************************** */ + +/** + * Struct used to describe a section of memory (starting address + * and length). This is really the same thing as an iovec, but + * we include a separate type for it for at least 2 reasons: + * + * 1. Some OS's iovec definitions are exceedingly lame (e.g., + * Solaris 9 has the length argument as an int, instead of a + * size_t). + * + * 2. We reserve the right to expand/change this struct in the + * future. + */ +typedef struct { + /** Starting address of segment */ + void *mbs_start_addr; + /** Length of segment */ + size_t mbs_len; +} opal_hwloc_base_memory_segment_t; + +/* include implementation to call */ +#include MCA_hwloc_IMPLEMENTATION_HEADER + +/* define type of processor info requested */ +typedef uint8_t opal_hwloc_resource_type_t; +#define OPAL_HWLOC_PHYSICAL 1 +#define OPAL_HWLOC_LOGICAL 2 +#define OPAL_HWLOC_AVAILABLE 3 + +/* structs for storing info on objects */ +typedef struct { + opal_object_t super; + hwloc_cpuset_t available; + bool npus_calculated; + unsigned int npus; + unsigned int idx; + unsigned int num_bound; +} opal_hwloc_obj_data_t; +OBJ_CLASS_DECLARATION(opal_hwloc_obj_data_t); + +typedef struct { + opal_list_item_t super; + hwloc_obj_type_t type; + unsigned cache_level; + unsigned int num_objs; + opal_hwloc_resource_type_t rtype; + opal_list_t sorted_by_dist_list; +} opal_hwloc_summary_t; +OBJ_CLASS_DECLARATION(opal_hwloc_summary_t); + +typedef struct { + opal_object_t super; + hwloc_cpuset_t available; + opal_list_t summaries; + + /** \brief Additional space for custom data */ + void *userdata; +} opal_hwloc_topo_data_t; +OPAL_DECLSPEC OBJ_CLASS_DECLARATION(opal_hwloc_topo_data_t); + +/* define binding policies */ +typedef uint16_t opal_binding_policy_t; +#define OPAL_BINDING_POLICY OPAL_UINT16 + +/* binding directives */ +#define OPAL_BIND_IF_SUPPORTED 0x1000 +#define OPAL_BIND_ALLOW_OVERLOAD 0x2000 +#define OPAL_BIND_GIVEN 0x4000 +/* binding policies - any changes in these + * values must be reflected in orte/mca/rmaps/rmaps.h + */ +#define OPAL_BIND_TO_NONE 1 +#define OPAL_BIND_TO_BOARD 2 +#define OPAL_BIND_TO_NUMA 3 +#define OPAL_BIND_TO_SOCKET 4 +#define OPAL_BIND_TO_L3CACHE 5 +#define OPAL_BIND_TO_L2CACHE 6 +#define OPAL_BIND_TO_L1CACHE 7 +#define OPAL_BIND_TO_CORE 8 +#define OPAL_BIND_TO_HWTHREAD 9 +#define OPAL_BIND_TO_CPUSET 10 +#define OPAL_GET_BINDING_POLICY(pol) \ + ((pol) & 0x0fff) +#define OPAL_SET_BINDING_POLICY(target, pol) \ + (target) = (pol) | (((target) & 0xf000) | OPAL_BIND_GIVEN) +#define OPAL_SET_DEFAULT_BINDING_POLICY(target, pol) \ + do { \ + if (!OPAL_BINDING_POLICY_IS_SET((target))) { \ + (target) = (pol) | (((target) & 0xf000) | \ + OPAL_BIND_IF_SUPPORTED); \ + } \ + } while(0); + +/* check if policy is set */ +#define OPAL_BINDING_POLICY_IS_SET(pol) \ + ((pol) & 0x4000) +/* macro to detect if binding was qualified */ +#define OPAL_BINDING_REQUIRED(n) \ + (!(OPAL_BIND_IF_SUPPORTED & (n))) +/* macro to detect if binding is forced */ +#define OPAL_BIND_OVERLOAD_ALLOWED(n) \ + (OPAL_BIND_ALLOW_OVERLOAD & (n)) + +/* some global values */ +OPAL_DECLSPEC extern hwloc_topology_t opal_hwloc_topology; +OPAL_DECLSPEC extern opal_binding_policy_t opal_hwloc_binding_policy; +OPAL_DECLSPEC extern hwloc_cpuset_t opal_hwloc_my_cpuset; +OPAL_DECLSPEC extern bool opal_hwloc_report_bindings; +OPAL_DECLSPEC extern hwloc_obj_type_t opal_hwloc_levels[]; +OPAL_DECLSPEC extern bool opal_hwloc_use_hwthreads_as_cpus; + +END_C_DECLS + +#endif /* OPAL_HWLOC_H_ */ diff --git a/opal/mca/hwloc/hwloc.h b/opal/mca/hwloc/hwloc.h deleted file mode 100644 index 319fa909a82..00000000000 --- a/opal/mca/hwloc/hwloc.h +++ /dev/null @@ -1,230 +0,0 @@ -/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ -/* - * Copyright (c) 2011-2012 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2013 Intel, Inc. All rights reserved. - * Copyright (c) 2015 Los Alamos National Security, LLC. All rights - * reserved. - * - * $COPYRIGHT$ - * - * Additional copyrights may follow - */ - -#ifndef OPAL_MCA_HWLOC_H -#define OPAL_MCA_HWLOC_H - -#include "opal_config.h" - -#ifdef HAVE_SYS_TYPES_H -#include -#endif -#ifdef HAVE_SYS_TIME_H -#include -#endif -#ifdef HAVE_STDINT_H -#include -#endif -#ifdef HAVE_STDARG_H -#include -#endif - -#include "opal/class/opal_list.h" -#include "opal/class/opal_value_array.h" - -#include "opal/mca/mca.h" -#include "opal/mca/base/base.h" - -BEGIN_C_DECLS - -#ifdef WIN32 -#define WIN32_LEAN_AND_MEAN -#include -#undef WIN32_LEAN_AND_MEAN -typedef unsigned char u_char; -typedef unsigned short u_short; -#endif - -/** - * Structure for hwloc components. - */ -struct opal_hwloc_base_component_2_0_0_t { - /** MCA base component */ - mca_base_component_t base_version; - /** MCA base data */ - mca_base_component_data_t base_data; -}; - -/** - * Convenience typedef - */ -typedef struct opal_hwloc_base_component_2_0_0_t opal_hwloc_base_component_2_0_0_t; -typedef struct opal_hwloc_base_component_2_0_0_t opal_hwloc_component_t; - -/** - * Macro for use in components that are of type hwloc - */ -#define OPAL_HWLOC_BASE_VERSION_2_0_0 \ - OPAL_MCA_BASE_VERSION_2_1_0("hwloc", 2, 0, 0) - - -/* ******************************************************************** */ -/* Although we cannot bind if --without-hwloc is set, - * we do still need to know some basic locality data - * like on_node and not_on_node. So ensure that we - * always have access to that much info by including - * the definitions here, outside the if-have-hwloc test - */ -typedef uint16_t opal_hwloc_locality_t; -#define OPAL_HWLOC_LOCALITY_T OPAL_UINT16 - -/** Process locality definitions */ -enum { - OPAL_PROC_LOCALITY_UNKNOWN = 0x0000, - OPAL_PROC_NON_LOCAL = 0x8000, - OPAL_PROC_ON_CLUSTER = 0x0001, - OPAL_PROC_ON_CU = 0x0003, - OPAL_PROC_ON_HOST = 0x0007, - OPAL_PROC_ON_BOARD = 0x000f, - OPAL_PROC_ON_NODE = 0x000f, // same host and board - OPAL_PROC_ON_NUMA = 0x001f, - OPAL_PROC_ON_SOCKET = 0x003f, - OPAL_PROC_ON_L3CACHE = 0x007f, - OPAL_PROC_ON_L2CACHE = 0x00ff, - OPAL_PROC_ON_L1CACHE = 0x01ff, - OPAL_PROC_ON_CORE = 0x03ff, - OPAL_PROC_ON_HWTHREAD = 0x07ff, - OPAL_PROC_ALL_LOCAL = 0x0fff -}; - -/** Process locality macros */ -#define OPAL_PROC_ON_LOCAL_CLUSTER(n) ((n) & OPAL_PROC_ON_CLUSTER) -#define OPAL_PROC_ON_LOCAL_CU(n) (!(((n) & OPAL_PROC_ON_CU) ^ OPAL_PROC_ON_CU)) -#define OPAL_PROC_ON_LOCAL_HOST(n) (!(((n) & OPAL_PROC_ON_HOST) ^ OPAL_PROC_ON_HOST)) -#define OPAL_PROC_ON_LOCAL_BOARD(n) (!(((n) & OPAL_PROC_ON_BOARD) ^ OPAL_PROC_ON_BOARD)) -#define OPAL_PROC_ON_LOCAL_NODE(n) (OPAL_PROC_ON_LOCAL_HOST(n) && OPAL_PROC_ON_LOCAL_BOARD(n)) -#define OPAL_PROC_ON_LOCAL_NUMA(n) (!(((n) & OPAL_PROC_ON_NUMA) ^ OPAL_PROC_ON_NUMA)) -#define OPAL_PROC_ON_LOCAL_SOCKET(n) (!(((n) & OPAL_PROC_ON_SOCKET) ^ OPAL_PROC_ON_SOCKET)) -#define OPAL_PROC_ON_LOCAL_L3CACHE(n) (!(((n) & OPAL_PROC_ON_L3CACHE) ^ OPAL_PROC_ON_L3CACHE)) -#define OPAL_PROC_ON_LOCAL_L2CACHE(n) (!(((n) & OPAL_PROC_ON_L2CACHE) ^ OPAL_PROC_ON_L2CACHE)) -#define OPAL_PROC_ON_LOCAL_L1CACHE(n) (!(((n) & OPAL_PROC_ON_L1CACHE) ^ OPAL_PROC_ON_L1CACHE)) -#define OPAL_PROC_ON_LOCAL_CORE(n) (!(((n) & OPAL_PROC_ON_CORE) ^ OPAL_PROC_ON_CORE)) -#define OPAL_PROC_ON_LOCAL_HWTHREAD(n) (!(((n) & OPAL_PROC_ON_HWTHREAD) ^ OPAL_PROC_ON_HWTHREAD)) - -/* ******************************************************************** */ - -/** - * Struct used to describe a section of memory (starting address - * and length). This is really the same thing as an iovec, but - * we include a separate type for it for at least 2 reasons: - * - * 1. Some OS's iovec definitions are exceedingly lame (e.g., - * Solaris 9 has the length argument as an int, instead of a - * size_t). - * - * 2. We reserve the right to expand/change this struct in the - * future. - */ -typedef struct { - /** Starting address of segment */ - void *mbs_start_addr; - /** Length of segment */ - size_t mbs_len; -} opal_hwloc_base_memory_segment_t; - -/* include implementation to call */ -#if OPAL_HAVE_HWLOC -#include MCA_hwloc_IMPLEMENTATION_HEADER - -/* define type of processor info requested */ -typedef uint8_t opal_hwloc_resource_type_t; -#define OPAL_HWLOC_PHYSICAL 1 -#define OPAL_HWLOC_LOGICAL 2 -#define OPAL_HWLOC_AVAILABLE 3 - -/* structs for storing info on objects */ -typedef struct { - opal_object_t super; - hwloc_cpuset_t available; - bool npus_calculated; - unsigned int npus; - unsigned int idx; - unsigned int num_bound; -} opal_hwloc_obj_data_t; -OBJ_CLASS_DECLARATION(opal_hwloc_obj_data_t); - -typedef struct { - opal_list_item_t super; - hwloc_obj_type_t type; - unsigned cache_level; - unsigned int num_objs; - opal_hwloc_resource_type_t rtype; - opal_list_t sorted_by_dist_list; -} opal_hwloc_summary_t; -OBJ_CLASS_DECLARATION(opal_hwloc_summary_t); - -typedef struct { - opal_object_t super; - hwloc_cpuset_t available; - opal_list_t summaries; - - /** \brief Additional space for custom data */ - void *userdata; -} opal_hwloc_topo_data_t; -OPAL_DECLSPEC OBJ_CLASS_DECLARATION(opal_hwloc_topo_data_t); - -/* define binding policies */ -typedef uint16_t opal_binding_policy_t; -#define OPAL_BINDING_POLICY OPAL_UINT16 - -/* binding directives */ -#define OPAL_BIND_IF_SUPPORTED 0x1000 -#define OPAL_BIND_ALLOW_OVERLOAD 0x2000 -#define OPAL_BIND_GIVEN 0x4000 -/* binding policies - any changes in these - * values must be reflected in orte/mca/rmaps/rmaps.h - */ -#define OPAL_BIND_TO_NONE 1 -#define OPAL_BIND_TO_BOARD 2 -#define OPAL_BIND_TO_NUMA 3 -#define OPAL_BIND_TO_SOCKET 4 -#define OPAL_BIND_TO_L3CACHE 5 -#define OPAL_BIND_TO_L2CACHE 6 -#define OPAL_BIND_TO_L1CACHE 7 -#define OPAL_BIND_TO_CORE 8 -#define OPAL_BIND_TO_HWTHREAD 9 -#define OPAL_BIND_TO_CPUSET 10 -#define OPAL_GET_BINDING_POLICY(pol) \ - ((pol) & 0x0fff) -#define OPAL_SET_BINDING_POLICY(target, pol) \ - (target) = (pol) | (((target) & 0xf000) | OPAL_BIND_GIVEN) -#define OPAL_SET_DEFAULT_BINDING_POLICY(target, pol) \ - do { \ - if (!OPAL_BINDING_POLICY_IS_SET((target))) { \ - (target) = (pol) | (((target) & 0xf000) | \ - OPAL_BIND_IF_SUPPORTED); \ - } \ - } while(0); - -/* check if policy is set */ -#define OPAL_BINDING_POLICY_IS_SET(pol) \ - ((pol) & 0x4000) -/* macro to detect if binding was qualified */ -#define OPAL_BINDING_REQUIRED(n) \ - (!(OPAL_BIND_IF_SUPPORTED & (n))) -/* macro to detect if binding is forced */ -#define OPAL_BIND_OVERLOAD_ALLOWED(n) \ - (OPAL_BIND_ALLOW_OVERLOAD & (n)) - -/* some global values */ -OPAL_DECLSPEC extern hwloc_topology_t opal_hwloc_topology; -OPAL_DECLSPEC extern opal_binding_policy_t opal_hwloc_binding_policy; -OPAL_DECLSPEC extern hwloc_cpuset_t opal_hwloc_my_cpuset; -OPAL_DECLSPEC extern bool opal_hwloc_report_bindings; -OPAL_DECLSPEC extern hwloc_obj_type_t opal_hwloc_levels[]; -OPAL_DECLSPEC extern bool opal_hwloc_use_hwthreads_as_cpus; - -#endif - -END_C_DECLS - -#endif /* OPAL_HWLOC_H_ */ diff --git a/opal/mca/hwloc/hwloc1110/Makefile.am b/opal/mca/hwloc/hwloc1110/Makefile.am deleted file mode 100644 index 4fb47162acd..00000000000 --- a/opal/mca/hwloc/hwloc1110/Makefile.am +++ /dev/null @@ -1,83 +0,0 @@ -# -# Copyright (c) 2011-2014 Cisco Systems, Inc. All rights reserved. -# Copyright (c) 2014-2015 Intel, Inc. All right reserved. -# $COPYRIGHT$ -# -# Additional copyrights may follow -# -# $HEADER$ -# - -# Due to what might be a bug in Automake, we need to remove stamp-h? -# files manually. See -# http://debbugs.gnu.org/cgi/bugreport.cgi?bug=19418. -DISTCLEANFILES = \ - hwloc/include/hwloc/autogen/stamp-h? \ - hwloc/include/private/autogen/stamp-h? - -# Need to include these files so that these directories are carried in -# the tarball (in case someone invokes autogen.sh on a dist tarball). -EXTRA_DIST = \ - hwloc/doc/README.txt \ - hwloc/tests/README.txt \ - hwloc/utils/README.txt - -SUBDIRS = hwloc - -# Headers and sources -headers = hwloc1110.h -sources = hwloc1110_component.c - -# We only ever build this component statically -noinst_LTLIBRARIES = libmca_hwloc_hwloc1110.la -libmca_hwloc_hwloc1110_la_SOURCES = $(headers) $(sources) -nodist_libmca_hwloc_hwloc1110_la_SOURCES = $(nodist_headers) -libmca_hwloc_hwloc1110_la_LDFLAGS = -module -avoid-version $(opal_hwloc_hwloc1110_LDFLAGS) -libmca_hwloc_hwloc1110_la_LIBADD = $(opal_hwloc_hwloc1110_LIBS) -libmca_hwloc_hwloc1110_la_DEPENDENCIES = \ - $(HWLOC_top_builddir)/src/libhwloc_embedded.la - -# Since the rest of the code base includes the underlying hwloc.h, we -# also have to install the underlying header files when -# --with-devel-headers is specified. hwloc doesn't support this; the -# least gross way to make this happen is just to list all of hwloc's -# header files here. :-( -headers += \ - hwloc/include/hwloc.h \ - hwloc/include/hwloc/bitmap.h \ - hwloc/include/hwloc/cuda.h \ - hwloc/include/hwloc/cudart.h \ - hwloc/include/hwloc/deprecated.h \ - hwloc/include/hwloc/diff.h \ - hwloc/include/hwloc/gl.h \ - hwloc/include/hwloc/helper.h \ - hwloc/include/hwloc/inlines.h \ - hwloc/include/hwloc/intel-mic.h \ - hwloc/include/hwloc/myriexpress.h \ - hwloc/include/hwloc/nvml.h \ - hwloc/include/hwloc/opencl.h \ - hwloc/include/hwloc/openfabrics-verbs.h \ - hwloc/include/hwloc/plugins.h \ - hwloc/include/hwloc/rename.h \ - hwloc/include/private/private.h \ - hwloc/include/private/debug.h \ - hwloc/include/private/misc.h \ - hwloc/include/private/cpuid-x86.h -nodist_headers = hwloc/include/hwloc/autogen/config.h - -if HWLOC_HAVE_LINUX -headers += \ - hwloc/include/hwloc/linux.h \ - hwloc/include/hwloc/linux-libnuma.h -endif HWLOC_HAVE_LINUX - -if HWLOC_HAVE_SCHED_SETAFFINITY -headers += hwloc/include/hwloc/glibc-sched.h -endif HWLOC_HAVE_SCHED_SETAFFINITY - -# Conditionally install the header files -if WANT_INSTALL_HEADERS -opaldir = $(opalincludedir)/$(subdir) -nobase_opal_HEADERS = $(headers) -nobase_nodist_opal_HEADERS = $(nodist_headers) -endif diff --git a/opal/mca/hwloc/hwloc1110/README-ompi.txt b/opal/mca/hwloc/hwloc1110/README-ompi.txt deleted file mode 100644 index 948285aaff0..00000000000 --- a/opal/mca/hwloc/hwloc1110/README-ompi.txt +++ /dev/null @@ -1,35 +0,0 @@ -Applied the following patches from the upstream hwloc 1.9 branch after -the v1.9.1 release: - -All relevant commits up to open-mpi/hwloc@4e23b12 (i.e., the HEAD as -of 27 March 2015). "Relevant" commits are defined as those that -included files that are embedded in the Open MPI tree (e.g., updates -to files in docs/, utils/, etc. aren't relevant because they are not -embedded in the Open MPI tree). To be specific, the following commits -have been cherry-picked over to Open MPI: - -* open-mpi/hwloc@7c03216 v1.9.1 released, doing 1.9.2rc1 now -* open-mpi/hwloc@b35ced8 misc.h: Fix hwloc_strncasecmp() build under strict flags on BSD -* open-mpi/hwloc@d8c3f3d misc.h: Fix hwloc_strncasecmp() with some icc -* open-mpi/hwloc@f705a23 Use gcc's __asm__ version of the asm extension, which can be used in all standards -* open-mpi/hwloc@307726a configure: fix the check for X11/Xutil.h -* open-mpi/hwloc@ec58c05 errors: improve the advice to send hwloc-gather-topology files in the OS error message -* open-mpi/hwloc@35c743d NEWS update -* open-mpi/hwloc@868170e API: clearly state that os_index isn't unique while logical_index is -* open-mpi/hwloc@851532d x86 and OSF: Don't forget to set NUMA node nodeset -* open-mpi/hwloc@790aa2e cpuid-x86: Fix duplicate asm labels in case of heavy inlining on x86-32 -* open-mpi/hwloc@dd09aa5 debug: fix an overzealous assertion about the parent cpuset vs its children -* open-mpi/hwloc@769b9b5 core: fix the merging of identical objects in presence of Misc objects -* open-mpi/hwloc@71da0f1 core: reorder children in merge_useless_child() as well -* open-mpi/hwloc@c9cef07 hpux: improve hwloc_hpux_find_ldom() looking for NUMA node -* open-mpi/hwloc@cdffea6 x86: use ulong for cache sizes, uint won't be enough in the near future -* open-mpi/hwloc@55b0676 x86: use Group instead of Misc for unknown x2apic levels -* open-mpi/hwloc@7764ce5 synthetic: Misc levels are not allowed in the synthetic description -* open-mpi/hwloc@5b2dce1 error: point to the FAQ when displaying the big OS error message -* open-mpi/hwloc@c7bd9e6 pci: fix SR-IOV VF vendor/device names -* open-mpi/hwloc@a0f72ef distances: when we fail to insert an intermediate group, don't try to group further above -* open-mpi/hwloc@e419811 AIX: Fix PU os_index -* open-mpi/hwloc@08ab793 groups: add complete sets when inserting distance/pci groups -* open-mpi/hwloc@c66e714 core: only update root->complete sets if insert succeeds -* open-mpi/hwloc@01da9b9 bitmap: fix a corner case in hwloc_bitmap_isincluded() with infinite sets -* open-mpi/hwloc@e7b192b pci: fix bridge depth diff --git a/opal/mca/hwloc/hwloc1110/configure.m4 b/opal/mca/hwloc/hwloc1110/configure.m4 deleted file mode 100644 index 72a249f8f9e..00000000000 --- a/opal/mca/hwloc/hwloc1110/configure.m4 +++ /dev/null @@ -1,170 +0,0 @@ -# -*- shell-script -*- -# -# Copyright (c) 2009-2014 Cisco Systems, Inc. All rights reserved. -# Copyright (c) 2014-2015 Intel, Inc. All rights reserved. -# -# $COPYRIGHT$ -# -# Additional copyrights may follow -# -# $HEADER$ -# - -# -# Priority -# -AC_DEFUN([MCA_opal_hwloc_hwloc1110_PRIORITY], [90]) - -# -# Force this component to compile in static-only mode -# -AC_DEFUN([MCA_opal_hwloc_hwloc1110_COMPILE_MODE], [ - AC_MSG_CHECKING([for MCA component $2:$3 compile mode]) - $4="static" - AC_MSG_RESULT([$$4]) -]) - -# Include hwloc m4 files -m4_include(opal/mca/hwloc/hwloc1110/hwloc/config/hwloc.m4) -m4_include(opal/mca/hwloc/hwloc1110/hwloc/config/hwloc_pkg.m4) -m4_include(opal/mca/hwloc/hwloc1110/hwloc/config/hwloc_check_attributes.m4) -m4_include(opal/mca/hwloc/hwloc1110/hwloc/config/hwloc_check_visibility.m4) -m4_include(opal/mca/hwloc/hwloc1110/hwloc/config/hwloc_check_vendor.m4) -m4_include(opal/mca/hwloc/hwloc1110/hwloc/config/hwloc_components.m4) - -# MCA_hwloc_hwloc1110_POST_CONFIG() -# --------------------------------- -AC_DEFUN([MCA_opal_hwloc_hwloc1110_POST_CONFIG],[ - OPAL_VAR_SCOPE_PUSH([opal_hwloc_hwloc1110_basedir]) - - # If we won, then do all the rest of the setup - AS_IF([test "$1" = "1" && test "$opal_hwloc_hwloc1110_support" = "yes"], - [ - # Set this variable so that the framework m4 knows what - # file to include in opal/mca/hwloc/hwloc.h - opal_hwloc_hwloc1110_basedir=opal/mca/hwloc/hwloc1110 - opal_hwloc_base_include="$opal_hwloc_hwloc1110_basedir/hwloc1110.h" - - # Add some stuff to CPPFLAGS so that the rest of the source - # tree can be built - file=$opal_hwloc_hwloc1110_basedir/hwloc - CPPFLAGS="$CPPFLAGS -I$OPAL_TOP_SRCDIR/$file/include" - AS_IF([test "$OPAL_TOP_BUILDDIR" != "$OPAL_TOP_SRCDIR"], - [CPPFLAGS="$CPPFLAGS -I$OPAL_TOP_BUILDDIR/$file/include"]) - unset file - ]) - OPAL_VAR_SCOPE_POP - - # This must be run unconditionally - HWLOC_DO_AM_CONDITIONALS -])dnl - - -# MCA_hwloc_hwloc1110_CONFIG([action-if-found], [action-if-not-found]) -# -------------------------------------------------------------------- -AC_DEFUN([MCA_opal_hwloc_hwloc1110_CONFIG],[ - # Hwloc needs to know if we have Verbs support - AC_REQUIRE([OPAL_CHECK_VERBS_DIR]) - - AC_CONFIG_FILES([opal/mca/hwloc/hwloc1110/Makefile]) - - OPAL_VAR_SCOPE_PUSH([HWLOC_VERSION opal_hwloc_hwloc1110_save_CPPFLAGS opal_hwloc_hwloc1110_save_LDFLAGS opal_hwloc_hwloc1110_save_LIBS opal_hwloc_hwloc1110_save_cairo opal_hwloc_hwloc1110_save_xml opal_hwloc_hwloc1110_basedir opal_hwloc_hwloc1110_file opal_hwloc_hwloc1110_save_cflags CPPFLAGS_save LIBS_save]) - - # default to this component not providing support - opal_hwloc_hwloc1110_basedir=opal/mca/hwloc/hwloc1110 - opal_hwloc_hwloc1110_support=no - - if test "$with_hwloc" = "internal" -o "$with_hwloc" = "" -o "$with_hwloc" = "yes"; then - opal_hwloc_hwloc1110_save_CPPFLAGS=$CPPFLAGS - opal_hwloc_hwloc1110_save_LDFLAGS=$LDFLAGS - opal_hwloc_hwloc1110_save_LIBS=$LIBS - - # Run the hwloc configuration - set the prefix to minimize - # the chance that someone will use the internal symbols - HWLOC_SET_SYMBOL_PREFIX([opal_hwloc1110_]) - - # save XML or graphical options - opal_hwloc_hwloc1110_save_cairo=$enable_cairo - opal_hwloc_hwloc1110_save_xml=$enable_xml - opal_hwloc_hwloc1110_save_static=$enable_static - opal_hwloc_hwloc1110_save_shared=$enable_shared - opal_hwloc_hwloc1110_save_plugins=$enable_plugins - - # never enable hwloc's graphical option - enable_cairo=no - - # never enable hwloc's plugin system - enable_plugins=no - enable_static=yes - enable_shared=no - - # Override -- disable hwloc's libxml2 support, but enable the - # native hwloc XML support - enable_libxml2=no - enable_xml=yes - - # hwloc checks for compiler visibility, and its needs to do - # this without "picky" flags. - opal_hwloc_hwloc1110_save_cflags=$CFLAGS - CFLAGS=$OPAL_CFLAGS_BEFORE_PICKY - HWLOC_SETUP_CORE([opal/mca/hwloc/hwloc1110/hwloc], - [AC_MSG_CHECKING([whether hwloc configure succeeded]) - AC_MSG_RESULT([yes]) - HWLOC_VERSION="internal v`$srcdir/$opal_hwloc_hwloc1110_basedir/hwloc/config/hwloc_get_version.sh $srcdir/$opal_hwloc_hwloc1110_basedir/hwloc/VERSION`" - - # Build flags for our Makefile.am - opal_hwloc_hwloc1110_LDFLAGS='$(HWLOC_EMBEDDED_LDFLAGS)' - opal_hwloc_hwloc1110_LIBS='$(OPAL_TOP_BUILDDIR)/'"$opal_hwloc_hwloc1110_basedir"'/hwloc/src/libhwloc_embedded.la $(HWLOC_EMBEDDED_LIBS)' - opal_hwloc_hwloc1110_support=yes - - AC_DEFINE_UNQUOTED([HWLOC_HWLOC1110_HWLOC_VERSION], - ["$HWLOC_VERSION"], - [Version of hwloc]) - - # Do we have verbs support? - CPPFLAGS_save=$CPPFLAGS - AS_IF([test "$opal_want_verbs" = "yes"], - [CPPFLAGS="-I$opal_verbs_dir/include $CPPFLAGS"]) - AC_CHECK_HEADERS([infiniband/verbs.h]) - CPPFLAGS=$CPPFLAGS_save - ], - [AC_MSG_CHECKING([whether hwloc configure succeeded]) - AC_MSG_RESULT([no]) - opal_hwloc_hwloc1110_support=no]) - CFLAGS=$opal_hwloc_hwloc1110_save_cflags - - # Restore some env variables, if necessary - AS_IF([test -n "$opal_hwloc_hwloc1110_save_cairo"], - [enable_cairo=$opal_hwloc_hwloc1110_save_cairo]) - AS_IF([test -n "$opal_hwloc_hwloc1110_save_xml"], - [enable_xml=$opal_hwloc_hwloc1110_save_xml]) - AS_IF([test -n "$opal_hwloc_hwloc1110_save_static"], - [enable_static=$opal_hwloc_hwloc1110_save_static]) - AS_IF([test -n "$opal_hwloc_hwloc1110_save_shared"], - [enable_shared=$opal_hwloc_hwloc1110_save_shared]) - AS_IF([test -n "$opal_hwloc_hwloc1110_save_plugins"], - [enable_plugins=$opal_hwloc_hwloc1110_save_shared]) - - CPPFLAGS=$opal_hwloc_hwloc1110_save_CPPFLAGS - LDFLAGS=$opal_hwloc_hwloc1110_save_LDFLAGS - LIBS=$opal_hwloc_hwloc1110_save_LIBS - - AC_SUBST([opal_hwloc_hwloc1110_CFLAGS]) - AC_SUBST([opal_hwloc_hwloc1110_CPPFLAGS]) - AC_SUBST([opal_hwloc_hwloc1110_LDFLAGS]) - AC_SUBST([opal_hwloc_hwloc1110_LIBS]) - - # Finally, add some flags to the wrapper compiler so that our - # headers can be found. - hwloc_hwloc1110_WRAPPER_EXTRA_LDFLAGS="$HWLOC_EMBEDDED_LDFLAGS" - hwloc_hwloc1110_WRAPPER_EXTRA_LIBS="$HWLOC_EMBEDDED_LIBS" - hwloc_hwloc1110_WRAPPER_EXTRA_CPPFLAGS='-I${includedir}/openmpi/'"$opal_hwloc_hwloc1110_basedir/hwloc/include" - fi - - # Done! - AS_IF([test "$opal_hwloc_hwloc1110_support" = "yes"], - [$1], - [$2]) - - OPAL_VAR_SCOPE_POP -])dnl diff --git a/opal/mca/hwloc/hwloc1110/hwloc/NEWS b/opal/mca/hwloc/hwloc1110/hwloc/NEWS deleted file mode 100644 index 91f8c654f44..00000000000 --- a/opal/mca/hwloc/hwloc1110/hwloc/NEWS +++ /dev/null @@ -1,1158 +0,0 @@ -Copyright © 2009 CNRS -Copyright © 2009-2015 Inria. All rights reserved. -Copyright © 2009-2013 Université Bordeaux -Copyright © 2009-2011 Cisco Systems, Inc. All rights reserved. - -$COPYRIGHT$ - -Additional copyrights may follow - -$HEADER$ - -=========================================================================== - -This file contains the main features as well as overviews of specific -bug fixes (and other actions) for each version of hwloc since version -0.9 (as initially released as "libtopology", then re-branded to "hwloc" -in v0.9.1). - - -Version 1.11.0 --------------- -* API - + Socket objects are renamed into Package to align with the terminology - used by processor vendors. The old HWLOC_OBJ_SOCKET type and "Socket" - name are still supported for backward compatibility. - + HWLOC_OBJ_NODE is replaced with HWLOC_OBJ_NUMANODE for clarification. - HWLOC_OBJ_NODE is still supported for backward compatibility. - "Node" and "NUMANode" strings are supported as in earlier releases. -* Detection improvements - + Add support for Intel Knights Landing Xeon Phi. - Thanks to Grzegorz Andrejczuk and Lukasz Anaczkowski. - + Add Vendor, Model, Revision, SerialNumber, Type and LinuxDeviceID - info attributes to Block OS devices on Linux. Thanks to Vineet Pedaballe - for the help. - - Add --disable-libudev to avoid dependency on the libudev library. - + Add "MemoryDevice" Misc objects with information about DIMMs, on Linux - when privileged and when I/O is enabled. - Thanks to Vineet Pedaballe for the help. - + Add a PCISlot attribute to PCI devices on Linux when supported to - identify the physical PCI slot where the board is plugged. - + Add CPUStepping info attribute on x86 processors, - thanks to Thomas Röhl for the suggestion. - + Ignore the device-tree on non-Power architectures to avoid buggy - detection on ARM. Thanks to Orion Poplawski for reporting the issue. - + Work-around buggy Xeon E5v3 BIOS reporting invalid PCI-NUMA affinity - for the PCI links on the second processor. - + Add support for CUDA compute capability 5.x, thanks Benjamin Worpitz. - + Many fixes to the x86 backend - - Add L1i and fix L2/L3 type on old AMD processors without topoext support. - - Fix Intel CPU family and model numbers when basic family isn't 6 or 15. - - Fix package IDs on recent AMD processors. - - Fix misc issues due to incomplete APIC IDs on x2APIC processors. - - Avoid buggy discovery on old SGI Altix UVs with non-unique APIC IDs. - + Gather total machine memory on NetBSD. -* Tools - + lstopo - - Collapse identical PCI devices unless --no-collapse is given. - This avoids gigantic outputs when a PCI device contains dozens of - identical virtual functions. - - The ASCII art output is now called "ascii", for instance in - "lstopo -.ascii". - The former "txt" extension is retained for backward compatibility. - - Automatically scales graphical box width to the inner text in Cairo, - ASCII and Windows outputs. - - Add --rect to lstopo to force rectangular layout even for NUMA nodes. - - Objects may have a Type info attribute to specific a better type name - and display it in lstopo. - + hwloc-annotate - - May now operate on all types of objects, including I/O. - - May now insert Misc objects in the topology. - - Do not drop instruction caches and I/O devices from the output anymore. - + Fix lstopo path in hwloc-gather-topology after install. -* Misc - + Fix hwloc/cudart.h for machines with multiple PCI domains, - thanks to Imre Kerr for reporting the problem. - + Fix PCI Bridge-specific depth attribute. - + Fix hwloc_bitmap_intersect() for two infinite bitmaps. - + Improve the performance of object insertion by cpuset for large - topologies. - + Prefix verbose XML import errors with the source name. - + Improve pkg-config checks and error messages. - + Fix excluding after a component with an argument in the HWLOC_COMPONENTS - environment variable. - + Fix the recommended way in documentation and examples to allocate memory - on some node, it should use HWLOC_MEMBIND_BIND. - Thanks to Nicolas Bouzat for reporting the issue. - + Add a "Miscellaneous objects" section in the documentation. - + Add a FAQ entry "What happens to my topology if I disable symmetric - multithreading, hyper-threading, etc. ?" to the documentation. - - -Version 1.10.1 --------------- -* Actually remove disallowed NUMA nodes from nodesets when the whole-system - flag isn't enabled. -* Fix the gathering of PCI domains. Thanks to James Custer for reporting - the issue and providing a patch. -* Fix the merging of identical parent and child in presence of Misc objects. - Thanks to Dave Love for reporting the issue. -* Fix some misordering of children when merging with ignore_keep_structure() - in partially allowed topologies. -* Fix an overzealous assertion in the debug code when running on a single-PU - host with I/O. Thanks to Thomas Van Doren for reporting the issue. -* Don't forget to setup NUMA node object nodesets in x86 backend (for BSDs) - and OSF/Tru64 backend. -* Fix cpuid-x86 build error with gcc -O3 on x86-32. Thanks to Thomas Van Doren - for reporting the issue. -* Fix support for future very large caches in the x86 backend. -* Fix vendor/device names for SR-IOV PCI devices on Linux. -* Fix an unlikely crash in case of buggy hierarchical distance matrix. -* Fix PU os_index on some AIX releases. Thanks to Hendryk Bockelmann and - Erik Schnetter for helping debugging. -* Fix hwloc_bitmap_isincluded() in case of infinite sets. -* Change hwloc-ls.desktop into a lstopo.desktop and only install it if - lstopo is built with Cairo/X11 support. It cannot work with a non-graphical - lstopo or hwloc-ls. -* Add support for the renaming of Socket into Package in future releases. -* Add support for the replacement of HWLOC_OBJ_NODE with HWLOC_OBJ_NUMANODE - in future releases. -* Clarify the documentation of distance matrices in hwloc.h and in the manpage - of the hwloc-distances. Thanks to Dave Love for the suggestion. -* Improve some error messages by displaying more information about the - hwloc library in use. -* Document how to deal with the ABI break when upgrading to the upcoming 2.0 - See "How do I handle ABI breaks and API upgrades ?" in the FAQ. - - -Version 1.10.0 --------------- -* API - + Add hwloc_topology_export_synthetic() to export a topology to a - synthetic string without using lstopo. See the Synthetic topologies - section in the documentation. - + Add hwloc_topology_set/get_userdata() to let the application save - a private pointer in the topology whenever it needs a way to find - its own object corresponding to a topology. - + Add hwloc_get_numanode_obj_by_os_index() and document that this function - as well as hwloc_get_pu_obj_by_os_index() are good at converting - nodesets and cpusets into objects. - + hwloc_distrib() does not ignore any objects anymore when there are - too many of them. They get merged with others instead. - Thanks to Tim Creech for reporting the issue. -* Tools - + hwloc-bind --get now executes the command after displaying - the binding instead of ignoring the command entirely. - Thanks to John Donners for the suggestion. - + Clarify that memory sizes shown in lstopo are local by default - unless specified (total memory added in the root object). -* Synthetic topologies - + Synthetic topology descriptions may now specify attributes such as - memory sizes and OS indexes. See the Synthetic topologies section - in the documentation. - + lstopo now exports in this fully-detailed format by default. - The new option --export-synthetic-flags may be used to revert - back the old format. -* Documentation - + Add the doc/examples/ subdirectory with several real-life examples, - including the already existing hwloc-hello.C for basics. - Thanks to Rob Aulwes for the suggestion. - + Improve the documentation of CPU and memory binding in the API. - + Add a FAQ entry about operating system errors, especially on AMD - platforms with buggy cache information. - + Add a FAQ entry about loading many topologies in a single program. -* Misc - + Work around buggy Linux kernels reporting 2 sockets instead - 1 socket with 2 NUMA nodes for each Xeon E5 v3 (Haswell) processor. - + pciutils/libpci support is now removed since libpciaccess works - well and there's also a Linux-specific PCI backend. For the record, - pciutils was GPL and therefore disabled by default since v1.6.2. - + Add --disable-cpuid configure flag to work around buggy processor - simulators reporting invalid CPUID information. - Thanks for Andrew Friedley for reporting the issue. - + Fix a racy use of libltdl when manipulating multiple topologies in - different threads. - Thanks to Andra Hugo for reporting the issue and testing patches. - + Fix some build failures in private/misc.h. - Thanks to Pavan Balaji and Ralph Castain for the reports. - + Fix failures to detect X11/Xutil.h on some Solaris platforms. - Thanks to Siegmar Gross for reporting the failure. - + The plugin ABI has changed, this release will not load plugins - built against previous hwloc releases. - - -Version 1.9.1 -------------- -* Fix a crash when the PCI locality is invalid. Attach to the root object - instead. Thanks to Nicolas Denoyelle for reporting the issue. -* Fix -f in lstopo manpage. Thanks to Jirka Hladky for reporting the issue. -* Fix hwloc_obj_type_sscanf() and others when strncasecmp() is not properly - available. Thanks to Nick Papior Andersen for reporting the problem. -* Mark Linux file descriptors as close-on-exec to avoid leaks on exec. -* Fix some minor memory leaks. - - -Version 1.9.0 -------------- -* API - + Add hwloc_obj_type_sscanf() to extend hwloc_obj_type_of_string() with - type-specific attributes such as Cache/Group depth and Cache type. - hwloc_obj_type_of_string() is moved to hwloc/deprecated.h. - + Add hwloc_linux_get_tid_last_cpu_location() for retrieving the - last CPU where a Linux thread given by TID ran. - + Add hwloc_distrib() to extend the old hwloc_distribute[v]() functions. - hwloc_distribute[v]() is moved to hwloc/deprecated.h. - + Don't mix total and local memory when displaying verbose object attributes - with hwloc_obj_attr_snprintf() or in lstopo. -* Backends - + Add CPUVendor, CPUModelNumber and CPUFamilyNumber info attributes for - x86, ia64 and Xeon Phi sockets on Linux, to extend the x86-specific - support added in v1.8.1. Requested by Ralph Castain. - + Add many CPU- and Platform-related info attributes on ARM and POWER - platforms, in the Machine and Socket objects. - + Add CUDA info attributes describing the number of multiprocessors and - cores and the size of the global, shared and L2 cache memories in CUDA - OS devices. - + Add OpenCL info attributes describing the number of compute units and - the global memory size in OpenCL OS devices. - + The synthetic backend now accepts extended types such as L2Cache, L1i or - Group3. lstopo also exports synthetic strings using these extended types. -* Tools - + lstopo - - Do not overwrite output files by default anymore. - Pass -f or --force to enforce it. - - Display OpenCL, CUDA and Xeon Phi numbers of cores and memory sizes - in the graphical output. - - Fix export to stdout when specifying a Cairo-based output type - with --of. - + hwloc-ps - - Add -e or --get-last-cpu-location to report where processes/threads - run instead of where they are bound. - - Report locations as likely-more-useful objects such as Cores or Sockets - instead of Caches when possible. - + hwloc-bind - - Fix failure on Windows when not using --pid. - - Add -e as a synonym to --get-last-cpu-location. - + hwloc-distrib - - Add --reverse to distribute using last objects first and singlify - into last bits first. Thanks to Jirka Hladky for the suggestion. - + hwloc-info - - Report unified caches when looking for data or instruction cache - ancestor objects. -* Misc - + Add experimental Visual Studio support under contrib/windows. - Thanks to Eloi Gaudry for his help and for providing the first draft. - + Fix some overzealous assertions and warnings about the ordering of - objects on a level with respect to cpusets. The ordering is only - guaranteed for complete cpusets (based on the first bit in sets). - + Fix some memory leaks when importing xml diffs and when exporting a - "too complex" entry. - - -Version 1.8.1 -------------- -* Fix the cpuid code on Windows 64bits so that the x86 backend gets - enabled as expected and can populate CPU information. - Thanks to Robin Scher for reporting the problem. -* Add CPUVendor/CPUModelNumber/CPUFamilyNumber attributes when running - on x86 architecture. Thanks to Ralph Castain for the suggestion. -* Work around buggy BIOS reporting duplicate NUMA nodes on Linux. - Thanks to Jeff Becker for reporting the problem and testing the patch. -* Add a name to the lstopo graphical window. Thanks to Michael Prokop - for reporting the issue. - - -Version 1.8.0 -------------- -* New components - + Add the "linuxpci" component that always works on Linux even when - libpciaccess and libpci aren't available (and even with a modified - file-system root). By default the old "pci" component runs first - because "linuxpci" lacks device names (obj->name is always NULL). -* API - + Add the topology difference API in hwloc/diff.h for manipulating - many similar topologies. - + Add hwloc_topology_dup() for duplicating an entire topology. - + hwloc.h and hwloc/helper.h have been reorganized to clarify the - documentation sections. The actual inline code has moved out of hwloc.h - into the new hwloc/inlines.h. - + Deprecated functions are now in hwloc/deprecated.h, and not in the - official documentation anymore. -* Tools - + Add hwloc-diff and hwloc-patch tools together with the new diff API. - + Add hwloc-compress-dir to (de)compress an entire directory of XML files - using hwloc-diff and hwloc-patch. - + Object colors in the graphical output of lstopo may be changed by adding - a "lstopoStyle" info attribute. See CUSTOM COLORS in the lstopo(1) manpage - for details. Thanks to Jirka Hladky for discussing the idea. - + hwloc-gather-topology may now gather I/O-related files on Linux when - --io is given. Only the linuxpci component supports discovering I/O - objects from these extended tarballs. - + hwloc-annotate now supports --ri to remove/replace info attributes with - a given name. - + hwloc-info supports "root" and "all" special locations for dumping - information about the root object. - + lstopo now supports --append-legend to append custom lines of text - to the legend in the graphical output. Thanks to Jirka Hladky for - discussing the idea. - + hwloc-calc and friends have a more robust parsing of locations given - on the command-line and they report useful error messages about it. - + Add --whole-system to hwloc-bind, hwloc-calc, hwloc-distances and - hwloc-distrib, and add --restrict to hwloc-bind for uniformity among - tools. -* Misc - + Calling hwloc_topology_load() or hwloc_topology_set_*() on an already - loaded topology now returns an error (deprecated since release 1.6.1). - + Fix the initialisation of cpusets and nodesets in Group objects added - when inserting PCI hostbridges. - + Never merge Group objects that were added explicitly by the user with - hwloc_custom_insert_group_object_by_parent(). - + Add a sanity check during dynamic plugin loading to prevent some - crashes when hwloc is dynamically loaded by another plugin mechanisms. - + Add --with-hwloc-plugins-path to specify the install/load directories - of plugins. - + Add the MICSerialNumber info attribute to the root object when running - hwloc inside a Xeon Phi to match the same attribute in the MIC OS device - when running in the host. - - -Version 1.7.2 -------------- -* Do not create invalid block OS devices on very old Linux kernel such - as RHEL4 2.6.9. -* Fix PCI subvendor/device IDs. -* Fix the management of Misc objects inserted by parent. - Thanks to Jirka Hladky for reporting the problem. -* Add a PortState into attribute to OpenFabrics OS devices. -* Add a MICSerialNumber info attribute to Xeon PHI/MIC OS devices. -* Improve verbose error messages when failing to load from XML. - - -Version 1.7.1 -------------- -* Fix a failed assertion in the distance grouping code when loading a XML - file that already contains some groups. - Thanks to Laercio Lima Pilla for reporting the problem. -* Remove unexpected Group objects when loading XML topologies with I/O - objects and NUMA distances. - Thanks to Elena Elkina for reporting the problem and testing patches. -* Fix PCI link speed discovery when using libpciaccess. -* Fix invalid libpciaccess virtual function device/vendor IDs when using - SR-IOV PCI devices on Linux. -* Fix GL component build with old NVCtrl releases. - Thanks to Jirka Hladky for reporting the problem. -* Fix embedding breakage caused by libltdl. - Thanks to Pavan Balaji for reporting the problem. -* Always use the system-wide libltdl instead of shipping one inside hwloc. -* Document issues when enabling plugins while embedding hwloc in another - project, in the documentation section Embedding hwloc in Other Software. -* Add a FAQ entry "How to get useful topology information on NetBSD?" - in the documentation. -* Somes fixes in the renaming code for embedding. -* Miscellaneous minor build fixes. - - -Version 1.7.0 -------------- -* New operating system backends - + Add BlueGene/Q compute node kernel (CNK) support. See the FAQ in the - documentation for details. Thanks to Jeff Hammond, Christopher Samuel - and Erik Schnetter for their help. - + Add NetBSD support, thanks to Aleksej Saushev. -* New I/O device discovery - + Add co-processor OS devices such as "mic0" for Intel Xeon Phi (MIC) - on Linux. Thanks to Jerome Vienne for helping. - + Add co-processor OS devices such as "cuda0" for NVIDIA CUDA-capable GPUs. - + Add co-processor OS devices such as "opencl0d0" for OpenCL GPU devices - on the AMD OpenCL implementation. - + Add GPU OS devices such as ":0.0" for NVIDIA X11 displays. - + Add GPU OS devices such as "nvml0" for NVIDIA GPUs. - Thanks to Marwan Abdellah and Stefan Eilemann for helping. - These new OS devices have some string info attributes such as CoProcType, - GPUModel, etc. to better identify them. - See the I/O Devices and Attributes documentation sections for details. -* New components - + Add the "opencl", "cuda", "nvml" and "gl" components for I/O device - discovery. - + "nvml" also improves the discovery of NVIDIA GPU PCIe link speed. - All of these new components may be built as plugins. They may also be - disabled entirely by passing --disable-opencl/cuda/nvml/gl to configure. - See the I/O Devices, Components and Plugins, and FAQ documentation - sections for details. -* API - + Add hwloc_topology_get_flags(). - + Add hwloc/plugins.h for building external plugins. - See the Adding new discovery components and plugins section. -* Interoperability - + Add hwloc/opencl.h, hwloc/nvml.h, hwloc/gl.h and hwloc/intel-mic.h - to retrieve the locality of OS devices that correspond to AMD OpenCL - GPU devices or indexes, to NVML devices or indexes, to NVIDIA X11 - displays, or to Intel Xeon Phi (MIC) device indexes. - + Add new helpers in hwloc/cuda.h and hwloc/cudart.h to convert - between CUDA devices or indexes and hwloc OS devices. - + Add hwloc_ibv_get_device_osdev() and clarify the requirements - of the OpenFabrics Verbs helpers in hwloc/openfabrics-verbs.h. -* Tools - + hwloc-info is not only a synonym of lstopo -s anymore, it also - dumps information about objects given on the command-line. -* Documentation - + Add a section "Existing components and plugins". - + Add a list of common OS devices in section "Software devices". - + Add a new FAQ entry "Why is lstopo slow?" about lstopo slowness - issues because of GPUs. - + Clarify the documentation of inline helpers in hwloc/myriexpress.h - and hwloc/openfabrics-verbs.h. -* Misc - + Improve cache detection on AIX. - + The HWLOC_COMPONENTS variable now excludes the components whose - names are prefixed with '-'. - + lstopo --ignore PU now works when displaying the topology in - graphical and textual mode (not when exporting to XML). - + Make sure I/O options always appear in lstopo usage, not only when - using pciutils/libpci. - + Remove some unneeded Linux specific includes from some interoperability - headers. - + Fix some inconsistencies in hwloc-distrib and hwloc-assembler-remote - manpages. Thanks to Guy Streeter for the report. - + Fix a memory leak on AIX when getting memory binding. - + Fix many small memory leaks on Linux. - + The `libpci' component is now called `pci' but the old name is still - accepted in the HWLOC_COMPONENTS variable for backward compatibility. - - -Version 1.6.2 -------------- -* Use libpciaccess instead of pciutils/libpci by default for I/O discovery. - pciutils/libpci is only used if --enable-libpci is given to configure - because its GPL license may taint hwloc. See the Installation section - in the documentation for details. -* Fix get_cpubind on Solaris when bound to a single PU with - processor_bind(). Thanks to Eugene Loh for reporting the problem - and providing a patch. - - -Version 1.6.1 -------------- -* Fix some crash or buggy detection in the x86 backend when Linux - cgroups/cpusets restrict the available CPUs. -* Fix the pkg-config output with --libs --static. - Thanks to Erik Schnetter for reporting one of the problems. -* Fix the output of hwloc-calc -H --hierarchical when using logical - indexes in the output. -* Calling hwloc_topology_load() multiple times on the same topology - is officially deprecated. hwloc will warn in such cases. -* Add some documentation about existing plugins/components, package - dependencies, and I/O devices specification on the command-line. - - -Version 1.6.0 -------------- -* Major changes - + Reorganize the backend infrastructure to support dynamic selection - of components and dynamic loading of plugins. For details, see the - new documentation section Components and plugins. - - The HWLOC_COMPONENTS variable lets one replace the default discovery - components. - - Dynamic loading of plugins may be enabled with --enable-plugins - (except on AIX and Windows). It will build libxml2 and libpci - support as separated modules. This helps reducing the dependencies - of the core hwloc library when distributed as a binary package. -* Backends - + Add CPUModel detection on Darwin and x86/FreeBSD. - Thanks to Robin Scher for providing ways to implement this. - + The x86 backend now adds CPUModel info attributes to socket objects - created by other backends that do not natively support this attribute. - + Fix detection on FreeBSD in case of cpuset restriction. Thanks to - Sebastian Kuzminsky for reporting the problem. -* XML - + Add hwloc_topology_set_userdata_import/export_callback(), - hwloc_export_obj_userdata() and _userdata_base64() to let - applications specify how to save/restore the custom data they placed - in the userdata private pointer field of hwloc objects. -* Tools - + Add hwloc-annotate program to add string info attributes to XML - topologies. - + Add --pid-cmd to hwloc-ps to append the output of a command to each - PID line. May be used for showing Open MPI process ranks, see the - hwloc-ps(1) manpage for details. - + hwloc-bind now exits with an error if binding fails; the executable - is not launched unless binding suceeeded or --force was given. - + Add --quiet to hwloc-calc and hwloc-bind to hide non-fatal error - messages. - + Fix command-line pid support in windows tools. - + All programs accept --verbose as a synonym to -v. -* Misc - + Fix some DIR descriptor leaks on Linux. - + Fix I/O device lists when some were filtered out after a XML import. - + Fix the removal of I/O objects when importing a I/O-enabled XML topology - without any I/O topology flag. - + When merging objects with HWLOC_IGNORE_TYPE_KEEP_STRUCTURE or - lstopo --merge, compare object types before deciding which one of two - identical object to remove (e.g. keep sockets in favor of caches). - + Add some GUID- and LID-related info attributes to OpenFabrics - OS devices. - + Only add CPUType socket attributes on Solaris/Sparc. Other cases - don't report reliable information (Solaris/x86), and a replacement - is available as the Architecture string info in the Machine object. - + Add missing Backend string info on Solaris in most cases. - + Document object attributes and string infos in a new Attributes - section in the documentation. - + Add a section about Synthetic topologies in the documentation. - - -Version 1.5.2 (some of these changes are in v1.6.2 but not in v1.6) -------------- -* Use libpciaccess instead of pciutils/libpci by default for I/O discovery. - pciutils/libpci is only used if --enable-libpci is given to configure - because its GPL license may taint hwloc. See the Installation section - in the documentation for details. -* Fix get_cpubind on Solaris when bound to a single PU with - processor_bind(). Thanks to Eugene Loh for reporting the problem - and providing a patch. -* Fix some DIR descriptor leaks on Linux. -* Fix I/O device lists when some were filtered out after a XML import. -* Add missing Backend string info on Solaris in most cases. -* Fix the removal of I/O objects when importing a I/O-enabled XML topology - without any I/O topology flag. -* Fix the output of hwloc-calc -H --hierarchical when using logical - indexes in the output. -* Fix the pkg-config output with --libs --static. - Thanks to Erik Schnetter for reporting one of the problems. - - -Version 1.5.1 -------------- -* Fix block OS device detection on Linux kernel 3.3 and later. - Thanks to Guy Streeter for reporting the problem and testing the fix. -* Fix the cpuid code in the x86 backend (for FreeBSD). Thanks to - Sebastian Kuzminsky for reporting problems and testing patches. -* Fix 64bit detection on FreeBSD. -* Fix some corner cases in the management of the thissystem flag with - respect to topology flags and environment variables. -* Fix some corner cases in command-line parsing checks in hwloc-distrib - and hwloc-distances. -* Make sure we do not miss some block OS devices on old Linux kernels - when a single PCI device has multiple IDE hosts/devices behind it. -* Do not disable I/O devices or instruction caches in hwloc-assembler output. - - -Version 1.5.0 -------------- -* Backends - + Do not limit the number of processors to 1024 on Solaris anymore. - + Gather total machine memory on FreeBSD. Thanks to Cyril Roelandt. - + XML topology files do not depend on the locale anymore. Float numbers - such as NUMA distances or PCI link speeds now always use a dot as a - decimal separator. - + Add instruction caches detection on Linux, AIX, Windows and Darwin. - + Add get_last_cpu_location() support for the current thread on AIX. - + Support binding on AIX when threads or processes were bound with - bindprocessor(). Thanks to Hendryk Bockelmann for reporting the issue - and testing patches, and to Farid Parpia for explaining the binding - interfaces. - + Improve AMD topology detection in the x86 backend (for FreeBSD) using - the topoext feature. -* API - + Increase HWLOC_API_VERSION to 0x00010500 so that API changes may be - detected at build-time. - + Add a cache type attribute describind Data, Instruction and Unified - caches. Caches with different types but same depth (for instance L1d - and L1i) are placed on different levels. - + Add hwloc_get_cache_type_depth() to retrieve the hwloc level depth of - of the given cache depth and type, for instance L1i or L2. - It helps disambiguating the case where hwloc_get_type_depth() returns - HWLOC_TYPE_DEPTH_MULTIPLE. - + Instruction caches are ignored unless HWLOC_TOPOLOGY_FLAG_ICACHES is - passed to hwloc_topology_set_flags() before load. - + Add hwloc_ibv_get_device_osdev_by_name() OpenFabrics helper in - openfabrics-verbs.h to find the hwloc OS device object corresponding to - an OpenFabrics device. -* Tools - + Add lstopo-no-graphics, a lstopo built without graphical support to - avoid dependencies on external libraries such as Cairo and X11. When - supported, graphical outputs are only available in the original lstopo - program. - - Packagers splitting lstopo and lstopo-no-graphics into different - packages are advised to use the alternatives system so that lstopo - points to the best available binary. - + Instruction caches are enabled in lstopo by default. Use --no-icaches - to disable them. - + Add -t/--threads to show threads in hwloc-ps. -* Removal of obsolete components - + Remove the old cpuset interface (hwloc/cpuset.h) which is deprecated and - superseded by the bitmap API (hwloc/bitmap.h) since v1.1. - hwloc_cpuset and nodeset types are still defined, but all hwloc_cpuset_* - compatibility wrappers are now gone. - + Remove Linux libnuma conversion helpers for the deprecated and - broken nodemask_t interface. - + Remove support for "Proc" type name, it was superseded by "PU" in v1.0. - + Remove hwloc-mask symlinks, it was replaced by hwloc-calc in v1.0. -* Misc - + Fix PCIe 3.0 link speed computation. - + Non-printable characters are dropped from strings during XML export. - + Fix importing of escaped characters with the minimalistic XML backend. - + Assert hwloc_is_thissystem() in several I/O related helpers. - + Fix some memory leaks in the x86 backend for FreeBSD. - + Minor fixes to ease native builds on Windows. - + Limit the number of retries when operating on all threads within a - process on Linux if the list of threads is heavily getting modified. - - -Version 1.4.3 -------------- -* This release is only meant to fix the pciutils license issue when upgrading - to hwloc v1.5 or later is not possible. It contains several other minor - fixes but ignores many of them that are only in v1.5 or later. -* Use libpciaccess instead of pciutils/libpci by default for I/O discovery. - pciutils/libpci is only used if --enable-libpci is given to configure - because its GPL license may taint hwloc. See the Installation section - in the documentation for details. -* Fix PCIe 3.0 link speed computation. -* Fix importing of escaped characters with the minimalistic XML backend. -* Fix a memory leak in the x86 backend. - - -Version 1.4.2 -------------- -* Fix build on Solaris 9 and earlier when fabsf() is not a compiler - built-in. Thanks to Igor Galić for reporting the problem. -* Fix support for more than 32 processors on Windows. Thanks to Hartmut - Kaiser for reporting the problem. -* Fix process-wide binding and cpulocation routines on Linux when some - threads disappear in the meantime. Thanks to Vlad Roubtsov for reporting - the issue. -* Make installed scripts executable. Thanks to Jirka Hladky for reporting - the problem. -* Fix libtool revision management when building for Windows. This fix was - also released as hwloc v1.4.1.1 Windows builds. Thanks to Hartmut Kaiser - for reporting the problem. -* Fix the __hwloc_inline keyword in public headers when compiling with a - C++ compiler. -* Add Port info attribute to network OS devices inside OpenFabrics PCI - devices so as to identify which interface corresponds to which port. -* Document requirements for interoperability helpers: I/O devices discovery - is required for some of them; the topology must match the current host - for most of them. - - -Version 1.4.1 -------------- -* This release contains all changes from v1.3.2. -* Fix hwloc_alloc_membind, thanks Karl Napf for reporting the issue. -* Fix memory leaks in some get_membind() functions. -* Fix helpers converting from Linux libnuma to hwloc (hwloc/linux-libnuma.h) - in case of out-of-order NUMA node ids. -* Fix some overzealous assertions in the distance grouping code. -* Workaround BIOS reporting empty I/O locality in CUDA and OpenFabrics - helpers on Linux. Thanks to Albert Solernou for reporting the problem. -* Install a valgrind suppressions file hwloc-valgrind.supp (see the FAQ). -* Fix memory binding documentation. Thanks to Karl Napf for reporting the - issues. - - -Version 1.4.0 (does not contain all v1.3.2 changes) -------------- -* Major features - + Add "custom" interface and "assembler" tools to build multi-node - topology. See the Multi-node Topologies section in the documentation - for details. -* Interface improvements - + Add symmetric_subtree object attribute to ease assumptions when consulting - regular symmetric topologies. - + Add a CPUModel and CPUType info attribute to Socket objects on Linux - and Solaris. - + Add hwloc_get_obj_index_inside_cpuset() to retrieve the "logical" index - of an object within a subtree of the topology. - + Add more NVIDIA CUDA helpers in cuda.h and cudart.h to find hwloc objects - corresponding to CUDA devices. -* Discovery improvements - + Add a group object above partial distance matrices to make sure - the matrices are available in the final topology, except when this - new object would contradict the existing hierarchy. - + Grouping by distances now also works when loading from XML. - + Fix some corner cases in object insertion, for instance when dealing - with NUMA nodes without any CPU. -* Backends - + Implement hwloc_get_area_membind() on Linux. - + Honor I/O topology flags when importing from XML. - + Further improve XML-related error checking and reporting. - + Hide synthetic topology error messages unless HWLOC_SYNTHETIC_VERBOSE=1. -* Tools - + Add synthetic exporting of symmetric topologies to lstopo. - + lstopo --horiz and --vert can now be applied to some specific object types. - + lstopo -v -p now displays distance matrices with physical indexes. - + Add hwloc-distances utility to list distances. -* Documentation - + Fix and/or document the behavior of most inline functions in hwloc/helper.h - when the topology contains some I/O or Misc objects. - + Backend documentation enhancements. -* Bug fixes - + Fix missing last bit in hwloc_linux_get_thread_cpubind(). - Thanks to Carolina Gómez-Tostón Gutiérrez for reporting the issue. - + Fix FreeBSD build without cpuid support. - + Fix several Windows build issues. - + Fix inline keyword definition in public headers. - + Fix dependencies in the embedded library. - + Improve visibility support detection. Thanks to Dave Love for providing - the patch. - + Remove references to internal symbols in the tools. - - -Version 1.3.3 -------------- -* This release is only meant to fix the pciutils license issue when upgrading - to hwloc v1.4 or later is not possible. It contains several other minor - fixes but ignores many of them that are only in v1.4 or later. -* Use libpciaccess instead of pciutils/libpci by default for I/O discovery. - pciutils/libpci is only used if --enable-libpci is given to configure - because its GPL license may taint hwloc. See the Installation section - in the documentation for details. - - -Version 1.3.2 -------------- -* Fix missing last bit in hwloc_linux_get_thread_cpubind(). - Thanks to Carolina Gómez-Tostón Gutiérrez for reporting the issue. -* Fix build with -mcmodel=medium. Thanks to Devendar Bureddy for reporting - the issue. -* Fix build with Solaris Studio 12 compiler when XML is disabled. - Thanks to Paul H. Hargrove for reporting the problem. -* Fix installation with old GNU sed, for instance on Red Hat 8. - Thanks to Paul H. Hargrove for reporting the problem. -* Fix PCI locality when Linux cgroups restrict the available CPUs. -* Fix floating point issue when grouping by distance on mips64 architecture. - Thanks to Paul H. Hargrove for reporting the problem. -* Fix conversion from/to Linux libnuma when some NUMA nodes have no memory. -* Fix support for gccfss compilers with broken ffs() support. Thanks to - Paul H. Hargrove for reporting the problem and providing a patch. -* Fix FreeBSD build without cpuid support. -* Fix several Windows build issues. -* Fix inline keyword definition in public headers. -* Fix dependencies in the embedded library. -* Detect when a compiler such as xlc may not report compile errors - properly, causing some configure checks to be wrong. Thanks to - Paul H. Hargrove for reporting the problem and providing a patch. -* Improve visibility support detection. Thanks to Dave Love for providing - the patch. -* Remove references to internal symbols in the tools. -* Fix installation on systems with limited command-line size. - Thanks to Paul H. Hargrove for reporting the problem. -* Further improve XML-related error checking and reporting. - - -Version 1.3.1 -------------- -* Fix pciutils detection with pkg-config when not installed in standard - directories. -* Fix visibility options detection with the Solaris Studio compiler. - Thanks to Igor Galić and Terry Dontje for reporting the problems. -* Fix support for old Linux sched.h headers such as those found - on Red Hat 8. Thanks to Paul H. Hargrove for reporting the problems. -* Fix inline and attribute support for Solaris compilers. Thanks to - Dave Love for reporting the problems. -* Print a short summary at the end of the configure output. Thanks to - Stefan Eilemann for the suggestion. -* Add --disable-libnuma configure option to disable libnuma-based - memory binding support on Linux. Thanks to Rayson Ho for the - suggestion. -* Make hwloc's configure script properly obey $PKG_CONFIG. Thanks to - Nathan Phillip Brink for raising the issue. -* Silence some harmless pciutils warnings, thanks to Paul H. Hargrove - for reporting the problem. -* Fix the documentation with respect to hwloc_pid_t and hwloc_thread_t - being either pid_t and pthread_t on Unix, or HANDLE on Windows. - - -Version 1.3.0 -------------- -* Major features - + Add I/O devices and bridges to the topology using the pciutils - library. Only enabled after setting the relevant flag with - hwloc_topology_set_flags() before hwloc_topology_load(). See the - I/O Devices section in the documentation for details. -* Discovery improvements - + Add associativity to the cache attributes. - + Add support for s390/z11 "books" on Linux. - + Add the HWLOC_GROUPING_ACCURACY environment variable to relax - distance-based grouping constraints. See the Environment Variables - section in the documentation for details about grouping behavior - and configuration. - + Allow user-given distance matrices to remove or replace those - discovered by the OS backend. -* XML improvements - + XML is now always supported: a minimalistic custom import/export - code is used when libxml2 is not available. It is only guaranteed - to read XML files generated by hwloc. - + hwloc_topology_export_xml() and export_xmlbuffer() now return an - integer. - + Add hwloc_free_xmlbuffer() to free the buffer allocated by - hwloc_topology_export_xmlbuffer(). - + Hide XML topology error messages unless HWLOC_XML_VERBOSE=1. -* Minor API updates - + Add hwloc_obj_add_info to customize object info attributes. -* Tools - + lstopo now displays I/O devices by default. Several options are - added to configure the I/O discovery. - + hwloc-calc and hwloc-bind now accept I/O devices as input. - + Add --restrict option to hwloc-calc and hwloc-distribute. - + Add --sep option to change the output field separator in hwloc-calc. - + Add --whole-system option to hwloc-ps. - - -Version 1.2.2 -------------- -* Fix build on AIX 5.2, thanks Utpal Kumar Ray for the report. -* Fix XML import of very large page sizes or counts on 32bits platform, - thanks to Karsten Hopp for the RedHat ticket. -* Fix crash when administrator limitations such as Linux cgroup require - to restrict distance matrices. Thanks to Ake Sandgren for reporting the - problem. -* Fix the removal of objects such as AMD Magny-Cours dual-node sockets - in case of administrator restrictions. -* Improve error reporting and messages in case of wrong synthetic topology - description. -* Several other minor internal fixes and documentation improvements. - - -Version 1.2.1 -------------- -* Improve support of AMD Bulldozer "Compute-Unit" modules by detecting - logical processors with different core IDs on Linux. -* Fix hwloc-ps crash when listing processes from another Linux cpuset. - Thanks to Carl Smith for reporting the problem. -* Fix build on AIX and Solaris. Thanks to Carl Smith and Andreas Kupries - for reporting the problems. -* Fix cache size detection on Darwin. Thanks to Erkcan Özcan for reporting - the problem. -* Make configure fail if --enable-xml or --enable-cairo is given and - proper support cannot be found. Thanks to Andreas Kupries for reporting - the XML problem. -* Fix spurious L1 cache detection on AIX. Thanks to Hendryk Bockelmann - for reporting the problem. -* Fix hwloc_get_last_cpu_location(THREAD) on Linux. Thanks to Gabriele - Fatigati for reporting the problem. -* Fix object distance detection on Solaris. -* Add pthread_self weak symbol to ease static linking. -* Minor documentation fixes. - - -Version 1.2.0 -------------- -* Major features - + Expose latency matrices in the API as an array of distance structures - within objects. Add several helpers to find distances. - + Add hwloc_topology_set_distance_matrix() and environment variables - to provide a matrix of distances between a given set of objects. - + Add hwloc_get_last_cpu_location() and hwloc_get_proc_last_cpu_location() - to retrieve the processors where a process or thread recently ran. - - Add the corresponding --get-last-cpu-location option to hwloc-bind. - + Add hwloc_topology_restrict() to restrict an existing topology to a - given cpuset. - - Add the corresponding --restrict option to lstopo. -* Minor API updates - + Add hwloc_bitmap_list_sscanf/snprintf/asprintf to convert between bitmaps - and strings such as 4-5,7-9,12,15- - + hwloc_bitmap_set/clr_range() now support infinite ranges. - + Clarify the difference between inserting Misc objects by cpuset or by - parent. - + hwloc_insert_misc_object_by_cpuset() now returns NULL in case of error. -* Discovery improvements - + x86 backend (for freebsd): add x2APIC support - + Support standard device-tree phandle, to get better support on e.g. ARM - systems providing it. - + Detect cache size on AIX. Thanks Christopher and IBM. - + Improve grouping to support asymmetric topologies. -* Tools - + Command-line tools now support "all" and "root" special locations - consisting in the entire topology, as well as type names with depth - attributes such as L2 or Group4. - + hwloc-calc improvements: - - Add --number-of/-N option to report the number of objects of a given - type or depth. - - -I is now equivalent to --intersect for listing the indexes of - objects of a given type or depth that intersects the input. - - Add -H to report the output as a hierarchical combination of types - and depths. - + Add --thissystem to lstopo. - + Add lstopo-win, a console-less lstopo variant on Windows. -* Miscellaneous - + Remove C99 usage from code base. - + Rename hwloc-gather-topology.sh into hwloc-gather-topology - + Fix AMD cache discovery on freebsd when there is no L3 cache, thanks - Andriy Gapon for the fix. - - -Version 1.1.2 -------------- -* Fix a segfault in the distance-based grouping code when some objects - are not placed in any group. Thanks to Bernd Kallies for reporting - the problem and providing a patch. -* Fix the command-line parsing of hwloc-bind --mempolicy interleave. - Thanks to Guy Streeter for reporting the problem. -* Stop truncating the output in hwloc_obj_attr_snprintf() and in the - corresponding lstopo output. Thanks to Guy Streeter for reporting the - problem. -* Fix object levels ordering in synthetic topologies. -* Fix potential incoherency between device tree and kernel information, - when SMT is disabled on Power machines. -* Fix and document the behavior of hwloc_topology_set_synthetic() in case - of invalid argument. Thanks to Guy Streeter for reporting the problem. -* Add some verbose error message reporting when it looks like the OS - gives erroneous information. -* Do not include unistd.h and stdint.h in public headers on Windows. -* Move config.h files into their own subdirectories to avoid name - conflicts when AC_CONFIG_HEADERS adds -I's for them. -* Remove the use of declaring variables inside "for" loops. -* Some other minor fixes. -* Many minor documentation fixes. - - -Version 1.1.1 -------------- -* Add hwloc_get_api_version() which returns the version of hwloc used - at runtime. Thanks to Guy Streeter for the suggestion. -* Fix the number of hugepages reported for NUMA nodes on Linux. -* Fix hwloc_bitmap_to_ulong() right after allocating the bitmap. - Thanks to Bernd Kallies for reporting the problem. -* Fix hwloc_bitmap_from_ith_ulong() to properly zero the first ulong. - Thanks to Guy Streeter for reporting the problem. -* Fix hwloc_get_membind_nodeset() on Linux. - Thanks to Bernd Kallies for reporting the problem and providing a patch. -* Fix some file descriptor leaks in the Linux discovery. -* Fix the minimum width of NUMA nodes, caches and the legend in the graphical - lstopo output. Thanks to Jirka Hladky for reporting the problem. -* Various fixes to bitmap conversion from/to taskset-strings. -* Fix and document snprintf functions behavior when the buffer size is too - small or zero. Thanks to Guy Streeter for reporting the problem. -* Fix configure to avoid spurious enabling of the cpuid backend. - Thanks to Tim Anderson for reporting the problem. -* Cleanup error management in hwloc-gather-topology.sh. - Thanks to Jirka Hladky for reporting the problem and providing a patch. -* Add a manpage and usage for hwloc-gather-topology.sh on Linux. - Thanks to Jirka Hladky for providing a patch. -* Memory binding documentation enhancements. - - -Version 1.1.0 -------------- - -* API - + Increase HWLOC_API_VERSION to 0x00010100 so that API changes may be - detected at build-time. - + Add a memory binding interface. - + The cpuset API (hwloc/cpuset.h) is now deprecated. It is replaced by - the bitmap API (hwloc/bitmap.h) which offers the same features with more - generic names since it applies to CPU sets, node sets and more. - Backward compatibility with the cpuset API and ABI is still provided but - it will be removed in a future release. - Old types (hwloc_cpuset_t, ...) are still available as a way to clarify - what kind of hwloc_bitmap_t each API function manipulates. - Upgrading to the new API only requires to replace hwloc_cpuset_ function - calls with the corresponding hwloc_bitmap_ calls, with the following - renaming exceptions: - - hwloc_cpuset_cpu -> hwloc_bitmap_only - - hwloc_cpuset_all_but_cpu -> hwloc_bitmap_allbut - - hwloc_cpuset_from_string -> hwloc_bitmap_sscanf - + Add an `infos' array in each object to store couples of info names and - values. It enables generic storage of things like the old dmi board infos - that were previously stored in machine specific attributes. - + Add linesize cache attribute. -* Features - + Bitmaps (and thus CPU sets and node sets) are dynamically (re-)allocated, - the maximal number of CPUs (HWLOC_NBMAXCPUS) has been removed. - + Improve the distance-based grouping code to better support irregular - distance matrices. - + Add support for device-tree to get cache information (useful on Power - architectures). -* Helpers - + Add NVIDIA CUDA helpers in cuda.h and cudart.h to ease interoperability - with CUDA Runtime and Driver APIs. - + Add Myrinet Express helper in myriexpress.h to ease interoperability. -* Tools - + lstopo now displays physical/OS indexes by default in graphical mode - (use -l to switch back to logical indexes). The textual output still uses - logical by default (use -p to switch to physical indexes). - + lstopo prefixes logical indexes with `L#' and physical indexes with `P#'. - Physical indexes are also printed as `P#N' instead of `phys=N' within - object attributes (in parentheses). - + Add a legend at the bottom of the lstopo graphical output, use --no-legend - to remove it. - + Add hwloc-ps to list process' bindings. - + Add --membind and --mempolicy options to hwloc-bind. - + Improve tools command-line options by adding a generic --input option - (and more) which replaces the old --xml, --synthetic and --fsys-root. - + Cleanup lstopo output configuration by adding --output-format. - + Add --intersect in hwloc-calc, and replace --objects with --largest. - + Add the ability to work on standard input in hwloc-calc. - + Add --from, --to and --at in hwloc-distrib. - + Add taskset-specific functions and command-line tools options to - manipulate CPU set strings in the format of the taskset program. - + Install hwloc-gather-topology.sh on Linux. - - -Version 1.0.3 -------------- - -* Fix support for Linux cpuset when emulated by a cgroup mount point. -* Remove unneeded runtime dependency on libibverbs.so in the library and - all utils programs. -* Fix hwloc_cpuset_to_linux_libnuma_ulongs in case of non-linear OS-indexes - for NUMA nodes. -* lstopo now displays physical/OS indexes by default in graphical mode - (use -l to switch back to logical indexes). The textual output still uses - logical by default (use -p to switch to physical indexes). - - -Version 1.0.2 -------------- - -* Public headers can now be included directly from C++ programs. -* Solaris fix for non-contiguous cpu numbers. Thanks to Rolf vandeVaart for - reporting the issue. -* Darwin 10.4 fix. Thanks to Olivier Cessenat for reporting the issue. -* Revert 1.0.1 patch that ignored sockets with unknown ID values since it - only slightly helped POWER7 machines with old Linux kernels while it - prevents recent kernels from getting the complete POWER7 topology. -* Fix hwloc_get_common_ancestor_obj(). -* Remove arch-specific bits in public headers. -* Some fixes in the lstopo graphical output. -* Various man page clarifications and minor updates. - - -Version 1.0.1 -------------- - -* Various Solaris fixes. Thanks to Yannick Martin for reporting the issue. -* Fix "non-native" builds on x86 platforms (e.g., when building 32 - bit executables with compilers that natively build 64 bit). -* Ignore sockets with unknown ID values (which fixes issues on POWER7 - machines). Thanks to Greg Bauer for reporting the issue. -* Various man page clarifications and minor updates. -* Fixed memory leaks in hwloc_setup_group_from_min_distance_clique(). -* Fix cache type filtering on MS Windows 7. Thanks to Αλέξανδρος - Παπαδογιαννάκ for reporting the issue. -* Fixed warnings when compiling with -DNDEBUG. - - -Version 1.0.0 -------------- - -* The ABI of the library has changed. -* Backend updates - + Add FreeBSD support. - + Add x86 cpuid based backend. - + Add Linux cgroup support to the Linux cpuset code. - + Support binding of entire multithreaded process on Linux. - + Fix and enable Group support in Windows. - + Cleanup XML export/import. -* Objects - + HWLOC_OBJ_PROC is renamed into HWLOC_OBJ_PU for "Processing Unit", - its stringified type name is now "PU". - + Use new HWLOC_OBJ_GROUP objects instead of MISC when grouping - objects according to NUMA distances or arbitrary OS aggregation. - + Rework memory attributes. - + Add different cpusets in each object to specify processors that - are offline, unavailable, ... - + Cleanup the storage of object names and DMI infos. -* Features - + Add support for looking up specific PID topology information. - + Add hwloc_topology_export_xml() to export the topology in a XML file. - + Add hwloc_topology_get_support() to retrieve the supported features - for the current topology context. - + Support non-SYSTEM object as the root of the tree, use MACHINE in - most common cases. - + Add hwloc_get_*cpubind() routines to retrieve the current binding - of processes and threads. -* API - + Add HWLOC_API_VERSION to help detect the currently used API version. - + Add missing ending "e" to *compare* functions. - + Add several routines to emulate PLPA functions. - + Rename and rework the cpuset and/or/xor/not/clear operators to output - their result in a dedicated argument instead of modifying one input. - + Deprecate hwloc_obj_snprintf() in favor of hwloc_obj_type/attr_snprintf(). - + Clarify the use of parent and ancestor in the API, do not use father. - + Replace hwloc_get_system_obj() with hwloc_get_root_obj(). - + Return -1 instead of HWLOC_OBJ_TYPE_MAX in the API since the latter - isn't public. - + Relax constraints in hwloc_obj_type_of_string(). - + Improve displaying of memory sizes. - + Add 0x prefix to cpuset strings. -* Tools - + lstopo now displays logical indexes by default, use --physical to - revert back to OS/physical indexes. - + Add colors in the lstopo graphical outputs to distinguish between online, - offline, reserved, ... objects. - + Extend lstopo to show cpusets, filter objects by type, ... - + Renamed hwloc-mask into hwloc-calc which supports many new options. -* Documentation - + Add a hwloc(7) manpage containing general information. - + Add documentation about how to switch from PLPA to hwloc. - + Cleanup the distributed documentation files. -* Miscellaneous - + Many compilers warning fixes. - + Cleanup the ABI by using the visibility attribute. - + Add project embedding support. - - -Version 0.9.4 (unreleased) --------------------------- - -* Fix reseting colors to normal in lstopo -.txt output. -* Fix Linux pthread_t binding error report. - - -Version 0.9.3 -------------- - -* Fix autogen.sh to work with Autoconf 2.63. -* Fix various crashes in particular conditions: - - xml files with root attributes - - offline CPUs - - partial sysfs support - - unparseable /proc/cpuinfo - - ignoring NUMA level while Misc level have been generated -* Tweak documentation a bit -* Do not require the pthread library for binding the current thread on Linux -* Do not erroneously consider the sched_setaffinity prototype is the old version - when there is actually none. -* Fix _syscall3 compilation on archs for which we do not have the - sched_setaffinity system call number. -* Fix AIX binding. -* Fix libraries dependencies: now only lstopo depends on libtermcap, fix - binutils-gold link -* Have make check always build and run hwloc-hello.c -* Do not limit size of a cpuset. - - -Version 0.9.2 -------------- - -* Trivial documentation changes. - - -Version 0.9.1 -------------- - -* Re-branded to "hwloc" and moved to the Open MPI project, relicensed under the - BSD license. -* The prefix of all functions and tools is now hwloc, and some public - functions were also renamed for real. -* Group NUMA nodes into Misc objects according to their physical distance - that may be reported by the OS/BIOS. - May be ignored by setting HWLOC_IGNORE_DISTANCES=1 in the environment. -* Ignore offline CPUs on Solaris. -* Improved binding support on AIX. -* Add HP-UX support. -* CPU sets are now allocated/freed dynamically. -* Add command line options to tune the lstopo graphical output, add - semi-graphical textual output -* Extend topobind to support multiple cpusets or objects on the command - line as topomask does. -* Add an Infiniband-specific helper hwloc/openfabrics-verbs.h to retrieve - the physical location of IB devices. - - -Version 0.9 (libtopology) -------------------------- - -* First release. diff --git a/opal/mca/hwloc/hwloc1110/hwloc/README b/opal/mca/hwloc/hwloc1110/hwloc/README deleted file mode 100644 index 9c3ae62d28a..00000000000 --- a/opal/mca/hwloc/hwloc1110/hwloc/README +++ /dev/null @@ -1,446 +0,0 @@ -Introduction - -hwloc provides command line tools and a C API to obtain the -hierarchical map of key computing elements, such as: NUMA memory nodes, -shared caches, processor packages, processor cores, processing units -(logical processors or "threads") and even I/O devices. hwloc also -gathers various attributes such as cache and memory information, and is -portable across a variety of different operating systems and platforms. -Additionally it may assemble the topologies of multiple machines into a -single one so as to let applications consult the topology of an entire -fabric or cluster at once. - -hwloc primarily aims at helping high-performance computing (HPC) -applications, but is also applicable to any project seeking to exploit -code and/or data locality on modern computing platforms. - -Note that the hwloc project represents the merger of the libtopology -project from inria and the Portable Linux Processor Affinity (PLPA) -sub-project from Open MPI. Both of these prior projects are now -deprecated. The first hwloc release was essentially a "re-branding" of -the libtopology code base, but with both a few genuinely new features -and a few PLPA-like features added in. Prior releases of hwloc included -documentation about switching from PLPA to hwloc; this documentation -has been dropped on the assumption that everyone who was using PLPA has -already switched to hwloc. - -hwloc supports the following operating systems: - * Linux (including old kernels not having sysfs topology information, - with knowledge of cpusets, offline CPUs, ScaleMP vSMP, NumaScale - NumaConnect, and Kerrighed support) on all supported hardware, - including Intel Xeon Phi (either standalone or as a coprocessor). - * Solaris - * AIX - * Darwin / OS X - * FreeBSD and its variants (such as kFreeBSD/GNU) - * NetBSD - * OSF/1 (a.k.a., Tru64) - * HP-UX - * Microsoft Windows - * IBM BlueGene/Q Compute Node Kernel (CNK) - -Since it uses standard Operating System information, hwloc's support is -mostly independant from the processor type (x86, powerpc, ...) and just -relies on the Operating System support. The only exception to this is -kFreeBSD, which does not support topology information, and hwloc thus -uses an x86-only CPUID-based backend (which can be used for other OSes -too, see the Components and plugins section). - -To check whether hwloc works on a particular machine, just try to build -it and run lstopo or lstopo-no-graphics. If some things do not look -right (e.g. bogus or missing cache information), see Questions and Bugs -below. - -hwloc only reports the number of processors on unsupported operating -systems; no topology information is available. - -For development and debugging purposes, hwloc also offers the ability -to work on "fake" topologies: - * Symmetrical tree of resources generated from a list of level - arities - * Remote machine simulation through the gathering of Linux sysfs - topology files - -hwloc can display the topology in a human-readable format, either in -graphical mode (X11), or by exporting in one of several different -formats, including: plain text, PDF, PNG, and FIG (see CLI Examples -below). Note that some of the export formats require additional support -libraries. - -hwloc offers a programming interface for manipulating topologies and -objects. It also brings a powerful CPU bitmap API that is used to -describe topology objects location on physical/logical processors. See -the Programming Interface below. It may also be used to binding -applications onto certain cores or memory nodes. Several utility -programs are also provided to ease command-line manipulation of -topology objects, binding of processes, and so on. - -Perl bindings are available from Bernd Kallies on CPAN. - -Python bindings are available from Guy Streeter: - * Fedora RPM and tarball. - * git tree (html). - -Installation - -hwloc (http://www.open-mpi.org/projects/hwloc/) is available under the -BSD license. It is hosted as a sub-project of the overall Open MPI -project (http://www.open-mpi.org/). Note that hwloc does not require -any functionality from Open MPI -- it is a wholly separate (and much -smaller!) project and code base. It just happens to be hosted as part -of the overall Open MPI project. - -Nightly development snapshots are available on the web site. -Additionally, the code can be directly cloned from Git: -shell$ git clone https://github.com/open-mpi/hwloc.git -shell$ cd hwloc -shell$ ./autogen.sh - -Note that GNU Autoconf >=2.63, Automake >=1.10 and Libtool >=2.2.6 are -required when building from a Git clone. - -Installation by itself is the fairly common GNU-based process: -shell$ ./configure --prefix=... -shell$ make -shell$ make install - -The hwloc command-line tool "lstopo" produces human-readable topology -maps, as mentioned above. It can also export maps to the "fig" file -format. Support for PDF, Postscript, and PNG exporting is provided if -the "Cairo" development package (usually cairo-devel or libcairo2-dev) -can be found in "lstopo" when hwloc is configured and build. - -The hwloc core may also benefit from the following development -packages: - * libnuma for memory binding and migration support on Linux - (numactl-devel or libnuma-dev package). - * libpciaccess for full I/O device discovery (libpciaccess-devel or - libpciaccess-dev package). On Linux, PCI discovery may still be - performed (without vendor/device names) even if libpciaccess cannot - be used. - * the AMD OpenCL implementation for OpenCL device discovery. - * the NVIDIA CUDA Toolkit for CUDA device discovery. - * the NVIDIA Tesla Development Kit for NVML device discovery. - * the NV-CONTROL X extension library (NVCtrl) for NVIDIA display - discovery. - * libxml2 for full XML import/export support (otherwise, the internal - minimalistic parser will only be able to import XML files that were - exported by the same hwloc release). See Importing and exporting - topologies from/to XML files for details. The relevant development - package is usually libxml2-devel or libxml2-dev. - * libudev on Linux for easier discovery of OS device information - (otherwise hwloc will try to manually parse udev raw files). The - relevant development package is usually libudev-devel or - libudev-dev. - * libtool's ltdl library for dynamic plugin loading. The relevant - development package is usually libtool-ltdl-devel or libltdl-dev. - -PCI and XML support may be statically built inside the main hwloc -library, or as separate dynamically-loaded plugins (see the Components -and plugins section). - -Note that because of the possibility of GPL taint, the pciutils library -libpci will not be used (remember that hwloc is BSD-licensed). - -Also note that if you install supplemental libraries in non-standard -locations, hwloc's configure script may not be able to find them -without some help. You may need to specify additional CPPFLAGS, -LDFLAGS, or PKG_CONFIG_PATH values on the configure command line. - -For example, if libpciaccess was installed into /opt/pciaccess, hwloc's -configure script may not find it be default. Try adding PKG_CONFIG_PATH -to the ./configure command line, like this: -./configure PKG_CONFIG_PATH=/opt/pciaccess/lib/pkgconfig ... - -CLI Examples - -On a 4-package 2-core machine with hyper-threading, the lstopo tool may -show the following graphical output: - - dudley.png - -Here's the equivalent output in textual form: -Machine (16GB) - Package L#0 + L3 L#0 (4096KB) - L2 L#0 (1024KB) + L1 L#0 (16KB) + Core L#0 - PU L#0 (P#0) - PU L#1 (P#8) - L2 L#1 (1024KB) + L1 L#1 (16KB) + Core L#1 - PU L#2 (P#4) - PU L#3 (P#12) - Package L#1 + L3 L#1 (4096KB) - L2 L#2 (1024KB) + L1 L#2 (16KB) + Core L#2 - PU L#4 (P#1) - PU L#5 (P#9) - L2 L#3 (1024KB) + L1 L#3 (16KB) + Core L#3 - PU L#6 (P#5) - PU L#7 (P#13) - Package L#2 + L3 L#2 (4096KB) - L2 L#4 (1024KB) + L1 L#4 (16KB) + Core L#4 - PU L#8 (P#2) - PU L#9 (P#10) - L2 L#5 (1024KB) + L1 L#5 (16KB) + Core L#5 - PU L#10 (P#6) - PU L#11 (P#14) - Package L#3 + L3 L#3 (4096KB) - L2 L#6 (1024KB) + L1 L#6 (16KB) + Core L#6 - PU L#12 (P#3) - PU L#13 (P#11) - L2 L#7 (1024KB) + L1 L#7 (16KB) + Core L#7 - PU L#14 (P#7) - PU L#15 (P#15) - -Note that there is also an equivalent output in XML that is meant for -exporting/importing topologies but it is hardly readable to -human-beings (see Importing and exporting topologies from/to XML files -for details). - -On a 4-package 2-core Opteron NUMA machine, the lstopo tool may show -the following graphical output: - - hagrid.png - -Here's the equivalent output in textual form: -Machine (32GB) - NUMANode L#0 (P#0 8190MB) + Package L#0 - L2 L#0 (1024KB) + L1 L#0 (64KB) + Core L#0 + PU L#0 (P#0) - L2 L#1 (1024KB) + L1 L#1 (64KB) + Core L#1 + PU L#1 (P#1) - NUMANode L#1 (P#1 8192MB) + Package L#1 - L2 L#2 (1024KB) + L1 L#2 (64KB) + Core L#2 + PU L#2 (P#2) - L2 L#3 (1024KB) + L1 L#3 (64KB) + Core L#3 + PU L#3 (P#3) - NUMANode L#2 (P#2 8192MB) + Package L#2 - L2 L#4 (1024KB) + L1 L#4 (64KB) + Core L#4 + PU L#4 (P#4) - L2 L#5 (1024KB) + L1 L#5 (64KB) + Core L#5 + PU L#5 (P#5) - NUMANode L#3 (P#3 8192MB) + Package L#3 - L2 L#6 (1024KB) + L1 L#6 (64KB) + Core L#6 + PU L#6 (P#6) - L2 L#7 (1024KB) + L1 L#7 (64KB) + Core L#7 + PU L#7 (P#7) - -On a 2-package quad-core Xeon (pre-Nehalem, with 2 dual-core dies into -each package): - - emmett.png - -Here's the same output in textual form: -Machine (16GB) - Package L#0 - L2 L#0 (4096KB) - L1 L#0 (32KB) + Core L#0 + PU L#0 (P#0) - L1 L#1 (32KB) + Core L#1 + PU L#1 (P#4) - L2 L#1 (4096KB) - L1 L#2 (32KB) + Core L#2 + PU L#2 (P#2) - L1 L#3 (32KB) + Core L#3 + PU L#3 (P#6) - Package L#1 - L2 L#2 (4096KB) - L1 L#4 (32KB) + Core L#4 + PU L#4 (P#1) - L1 L#5 (32KB) + Core L#5 + PU L#5 (P#5) - L2 L#3 (4096KB) - L1 L#6 (32KB) + Core L#6 + PU L#6 (P#3) - L1 L#7 (32KB) + Core L#7 + PU L#7 (P#7) - -Programming Interface - -The basic interface is available in hwloc.h. Some higher-level -functions are available in hwloc/helper.h to reduce the need to -manually manipulate objects and follow links between them. -Documentation for all these is provided later in this document. -Developers may also want to look at hwloc/inlines.h which contains the -actual inline code of some hwloc.h routines, and at this document, -which provides good higher-level topology traversal examples. - -To precisely define the vocabulary used by hwloc, a Terms and -Definitions section is available and should probably be read first. - -Each hwloc object contains a cpuset describing the list of processing -units that it contains. These bitmaps may be used for CPU binding and -Memory binding. hwloc offers an extensive bitmap manipulation interface -in hwloc/bitmap.h. - -Moreover, hwloc also comes with additional helpers for interoperability -with several commonly used environments. See the Interoperability With -Other Software section for details. - -The complete API documentation is available in a full set of HTML -pages, man pages, and self-contained PDF files (formatted for both both -US letter and A4 formats) in the source tarball in doc/doxygen-doc/. - -NOTE: If you are building the documentation from a Git clone, you will -need to have Doxygen and pdflatex installed -- the documentation will -be built during the normal "make" process. The documentation is -installed during "make install" to $prefix/share/doc/hwloc/ and your -systems default man page tree (under $prefix, of course). - -Portability - -As shown in CLI Examples, hwloc can obtain information on a wide -variety of hardware topologies. However, some platforms and/or -operating system versions will only report a subset of this -information. For example, on an PPC64-based system with 32 cores (each -with 2 hardware threads) running a default 2.6.18-based kernel from -RHEL 5.4, hwloc is only able to glean information about NUMA nodes and -processor units (PUs). No information about caches, packages, or cores -is available. - -Similarly, Operating System have varying support for CPU and memory -binding, e.g. while some Operating Systems provide interfaces for all -kinds of CPU and memory bindings, some others provide only interfaces -for a limited number of kinds of CPU and memory binding, and some do -not provide any binding interface at all. Hwloc's binding functions -would then simply return the ENOSYS error (Function not implemented), -meaning that the underlying Operating System does not provide any -interface for them. CPU binding and Memory binding provide more -information on which hwloc binding functions should be preferred -because interfaces for them are usually available on the supported -Operating Systems. - -Here's the graphical output from lstopo on this platform when -Simultaneous Multi-Threading (SMT) is enabled: - - ppc64-with-smt.png - -And here's the graphical output from lstopo on this platform when SMT -is disabled: - - ppc64-without-smt.png - -Notice that hwloc only sees half the PUs when SMT is disabled. PU #15, -for example, seems to change location from NUMA node #0 to #1. In -reality, no PUs "moved" -- they were simply re-numbered when hwloc only -saw half as many. Hence, PU #15 in the SMT-disabled picture probably -corresponds to PU #30 in the SMT-enabled picture. - -This same "PUs have disappeared" effect can be seen on other platforms --- even platforms / OSs that provide much more information than the -above PPC64 system. This is an unfortunate side-effect of how operating -systems report information to hwloc. - -Note that upgrading the Linux kernel on the same PPC64 system mentioned -above to 2.6.34, hwloc is able to discover all the topology -information. The following picture shows the entire topology layout -when SMT is enabled: - - ppc64-full-with-smt.png - -Developers using the hwloc API or XML output for portable applications -should therefore be extremely careful to not make any assumptions about -the structure of data that is returned. For example, per the above -reported PPC topology, it is not safe to assume that PUs will always be -descendants of cores. - -Additionally, future hardware may insert new topology elements that are -not available in this version of hwloc. Long-lived applications that -are meant to span multiple different hardware platforms should also be -careful about making structure assumptions. For example, there may -someday be an element "lower" than a PU, or perhaps a new element may -exist between a core and a PU. - -API Example - -The following small C example (named ``hwloc-hello.c'') prints the -topology of the machine and bring the process to the first logical -processor of the second core of the machine. More examples are -available in the doc/examples/ directory of the source tree. - -hwloc provides a pkg-config executable to obtain relevant compiler and -linker flags. For example, it can be used thusly to compile -applications that utilize the hwloc library (assuming GNU Make): -CFLAGS += $(pkg-config --cflags hwloc) -LDLIBS += $(pkg-config --libs hwloc) -cc hwloc-hello.c $(CFLAGS) -o hwloc-hello $(LDLIBS) - -On a machine with 4GB of RAM and 2 processor packages -- each package -of which has two processing cores -- the output from running -hwloc-hello could be something like the following: -shell$ ./hwloc-hello -*** Objects at level 0 -Index 0: Machine(3938MB) -*** Objects at level 1 -Index 0: Package#0 -Index 1: Package#1 -*** Objects at level 2 -Index 0: Core#0 -Index 1: Core#1 -Index 2: Core#3 -Index 3: Core#2 -*** Objects at level 3 -Index 0: PU#0 -Index 1: PU#1 -Index 2: PU#2 -Index 3: PU#3 -*** Printing overall tree -Machine(3938MB) - Package#0 - Core#0 - PU#0 - Core#1 - PU#1 - Package#1 - Core#3 - PU#2 - Core#2 - PU#3 -*** 2 package(s) -shell$ - -Questions and Bugs - -Questions should be sent to the devel mailing list -(http://www.open-mpi.org/community/lists/hwloc.php). Bug reports should -be reported in the tracker (https://git.open-mpi.org/trac/hwloc/). - -If hwloc discovers an incorrect topology for your machine, the very -first thing you should check is to ensure that you have the most recent -updates installed for your operating system. Indeed, most of hwloc -topology discovery relies on hardware information retrieved through the -operation system (e.g., via the /sys virtual filesystem of the Linux -kernel). If upgrading your OS or Linux kernel does not solve your -problem, you may also want to ensure that you are running the most -recent version of the BIOS for your machine. - -If those things fail, contact us on the mailing list for additional -help. Please attach the output of lstopo after having given the ---enable-debug option to ./configure and rebuilt completely, to get -debugging output. Also attach the /proc + /sys tarball generated by the -installed script hwloc-gather-topology when submitting problems about -Linux, or send the output of kstat cpu_info in the Solaris case, or the -output of sysctl hw in the Darwin or BSD cases. - -History / Credits - -hwloc is the evolution and merger of the libtopology -(http://runtime.bordeaux.inria.fr/libtopology/) project and the -Portable Linux Processor Affinity (PLPA) -(http://www.open-mpi.org/projects/plpa/) project. Because of functional -and ideological overlap, these two code bases and ideas were merged and -released under the name "hwloc" as an Open MPI sub-project. - -libtopology was initially developed by the inria Runtime Team-Project -(http://runtime.bordeaux.inria.fr/) (headed by Raymond Namyst -(http://dept-info.labri.fr/~namyst/). PLPA was initially developed by -the Open MPI development team as a sub-project. Both are now deprecated -in favor of hwloc, which is distributed as an Open MPI sub-project. - -Further Reading - -The documentation chapters include - * Terms and Definitions - * Command-Line Tools - * Environment Variables - * CPU and Memory Binding Overview - * I/O Devices - * Miscellaneous objects - * Multi-node Topologies - * Object attributes - * Importing and exporting topologies from/to XML files - * Synthetic topologies - * Interoperability With Other Software - * Thread Safety - * Components and plugins - * Embedding hwloc in Other Software - * Frequently Asked Questions - -Make sure to have had a look at those too! - __________________________________________________________________ - - - Generated on 5 Jun 2015 for Hardware Locality (hwloc) by doxygen - 1.6.1 diff --git a/opal/mca/hwloc/hwloc1110/hwloc/VERSION b/opal/mca/hwloc/hwloc1110/hwloc/VERSION deleted file mode 100644 index fae47659b67..00000000000 --- a/opal/mca/hwloc/hwloc1110/hwloc/VERSION +++ /dev/null @@ -1,42 +0,0 @@ -# This is the VERSION file for hwloc, describing the precise version -# of hwloc in this distribution. The various components of the version -# number below are combined to form a single version number string. - -# major, minor, and release are generally combined in the form -# ... If release is zero, then it is omitted. - -major=1 -minor=11 -release=0 - -# greek is used for alpha or beta release tags. If it is non-empty, -# it will be appended to the version number. It does not have to be -# numeric. Common examples include a1 (alpha release 1), b1 (beta -# release 1), sc2005 (Super Computing 2005 release). The only -# requirement is that it must be entirely printable ASCII characters -# and have no white space. - -greek=rc2 - -# The date when this release was created - -date="Unreleased developer copy" - -# If snapshot=1, then use the value from snapshot_version as the -# entire hwloc version (i.e., ignore major, minor, release, and -# greek). This is only set to 1 when making snapshot tarballs. -snapshot=1 -snapshot_version=dev-450-g1cc3012 - -# The shared library version of hwloc's public library. This version -# is maintained in accordance with the "Library Interface Versions" -# chapter from the GNU Libtool documentation. Notes: - -# 1. Since version numbers are associated with *releases*, the version -# number maintained on the hwloc git master (and developer branches) -# is always 0:0:0. - -# 2. Version numbers are described in the Libtool current:revision:age -# format. - -libhwloc_so_version=11:6:6 diff --git a/opal/mca/hwloc/hwloc1110/hwloc/include/hwloc.h b/opal/mca/hwloc/hwloc1110/hwloc/include/hwloc.h deleted file mode 100644 index 66e122089c0..00000000000 --- a/opal/mca/hwloc/hwloc1110/hwloc/include/hwloc.h +++ /dev/null @@ -1,2413 +0,0 @@ -/* - * Copyright © 2009 CNRS - * Copyright © 2009-2015 Inria. All rights reserved. - * Copyright © 2009-2012 Université Bordeaux - * Copyright © 2009-2011 Cisco Systems, Inc. All rights reserved. - * See COPYING in top-level directory. - */ - -/*===================================================================== - * PLEASE GO READ THE DOCUMENTATION! - * ------------------------------------------------ - * $tarball_directory/doc/doxygen-doc/ - * or - * http://www.open-mpi.org/projects/hwloc/doc/ - *===================================================================== - * - * FAIR WARNING: Do NOT expect to be able to figure out all the - * subtleties of hwloc by simply reading function prototypes and - * constant descrptions here in this file. - * - * Hwloc has wonderful documentation in both PDF and HTML formats for - * your reading pleasure. The formal documentation explains a LOT of - * hwloc-specific concepts, provides definitions, and discusses the - * "big picture" for many of the things that you'll find here in this - * header file. - * - * The PDF/HTML documentation was generated via Doxygen; much of what - * you'll see in there is also here in this file. BUT THERE IS A LOT - * THAT IS IN THE PDF/HTML THAT IS ***NOT*** IN hwloc.h! - * - * There are entire paragraph-length descriptions, discussions, and - * pretty prictures to explain subtle corner cases, provide concrete - * examples, etc. - * - * Please, go read the documentation. :-) - * - * Moreover there are several examples of hwloc use under doc/examples - * in the source tree. - * - *=====================================================================*/ - -/** \file - * \brief The hwloc API. - * - * See hwloc/bitmap.h for bitmap specific macros. - * See hwloc/helper.h for high-level topology traversal helpers. - * See hwloc/inlines.h for the actual inline code of some functions below. - */ - -#ifndef HWLOC_H -#define HWLOC_H - -#include -#include -#include -#include -#include - -/* - * Symbol transforms - */ -#include - -/* - * Bitmap definitions - */ - -#include - - -#ifdef __cplusplus -extern "C" { -#endif - - -/** \defgroup hwlocality_api_version API version - * @{ - */ - -/** \brief Indicate at build time which hwloc API version is being used. */ -#define HWLOC_API_VERSION 0x00010b00 - -/** \brief Indicate at runtime which hwloc API version was used at build time. */ -HWLOC_DECLSPEC unsigned hwloc_get_api_version(void); - -/** \brief Current component and plugin ABI version (see hwloc/plugins.h) */ -#define HWLOC_COMPONENT_ABI 4 - -/** @} */ - - - -/** \defgroup hwlocality_object_sets Object Sets (hwloc_cpuset_t and hwloc_nodeset_t) - * - * Hwloc uses bitmaps to represent two distinct kinds of object sets: - * CPU sets (::hwloc_cpuset_t) and NUMA node sets (::hwloc_nodeset_t). - * These types are both typedefs to a common back end type - * (::hwloc_bitmap_t), and therefore all the hwloc bitmap functions - * are applicable to both ::hwloc_cpuset_t and ::hwloc_nodeset_t (see - * \ref hwlocality_bitmap). - * - * The rationale for having two different types is that even though - * the actions one wants to perform on these types are the same (e.g., - * enable and disable individual items in the set/mask), they're used - * in very different contexts: one for specifying which processors to - * use and one for specifying which NUMA nodes to use. Hence, the - * name difference is really just to reflect the intent of where the - * type is used. - * - * @{ - */ - -/** \brief A CPU set is a bitmap whose bits are set according to CPU - * physical OS indexes. - * - * It may be consulted and modified with the bitmap API as any - * ::hwloc_bitmap_t (see hwloc/bitmap.h). - * - * Each bit may be converted into a PU object using - * hwloc_get_pu_obj_by_os_index(). - */ -typedef hwloc_bitmap_t hwloc_cpuset_t; -/** \brief A non-modifiable ::hwloc_cpuset_t. */ -typedef hwloc_const_bitmap_t hwloc_const_cpuset_t; - -/** \brief A node set is a bitmap whose bits are set according to NUMA - * memory node physical OS indexes. - * - * It may be consulted and modified with the bitmap API as any - * ::hwloc_bitmap_t (see hwloc/bitmap.h). - * Each bit may be converted into a NUMA node object using - * hwloc_get_numanode_obj_by_os_index(). - * - * When binding memory on a system without any NUMA node - * (when the whole memory is considered as a single memory bank), - * the nodeset may be either empty (no memory selected) - * or full (whole system memory selected). - * - * See also \ref hwlocality_helper_nodeset_convert. - */ -typedef hwloc_bitmap_t hwloc_nodeset_t; -/** \brief A non-modifiable ::hwloc_nodeset_t. - */ -typedef hwloc_const_bitmap_t hwloc_const_nodeset_t; - -/** @} */ - - - -/** \defgroup hwlocality_object_types Object Types - * @{ - */ - -/** \brief Type of topology object. - * - * \note Do not rely on the ordering or completeness of the values as new ones - * may be defined in the future! If you need to compare types, use - * hwloc_compare_types() instead. - */ -typedef enum { - /* *************************************************************** - WARNING WARNING WARNING WARNING WARNING WARNING WARNING WARNING - - If new enum values are added here, you MUST also go update the - obj_type_order[] and obj_order_type[] arrays in src/topology.c. - - WARNING WARNING WARNING WARNING WARNING WARNING WARNING WARNING - *************************************************************** */ - - HWLOC_OBJ_SYSTEM, /**< \brief Whole system (may be a cluster of machines). - * The whole system that is accessible to hwloc. - * That may comprise several machines in SSI systems - * like Kerrighed. - */ - HWLOC_OBJ_MACHINE, /**< \brief Machine. - * The typical root object type. - * A set of processors and memory with cache - * coherency. - */ - HWLOC_OBJ_NUMANODE, /**< \brief NUMA node. - * A set of processors around memory which the - * processors can directly access. - */ - HWLOC_OBJ_PACKAGE, /**< \brief Physical package, what goes into a socket. - * In the physical meaning, i.e. that you can add - * or remove physically. - */ - HWLOC_OBJ_CACHE, /**< \brief Cache. - * Can be L1i, L1d, L2, L3, ... - */ - HWLOC_OBJ_CORE, /**< \brief Core. - * A computation unit (may be shared by several - * logical processors). - */ - HWLOC_OBJ_PU, /**< \brief Processing Unit, or (Logical) Processor. - * An execution unit (may share a core with some - * other logical processors, e.g. in the case of - * an SMT core). - * - * Objects of this kind are always reported and can - * thus be used as fallback when others are not. - */ - - HWLOC_OBJ_GROUP, /**< \brief Group objects. - * Objects which do not fit in the above but are - * detected by hwloc and are useful to take into - * account for affinity. For instance, some operating systems - * expose their arbitrary processors aggregation this - * way. And hwloc may insert such objects to group - * NUMA nodes according to their distances. - * - * These objects are ignored when they do not bring - * any structure. - */ - - HWLOC_OBJ_MISC, /**< \brief Miscellaneous objects. - * Objects without particular meaning, that can e.g. be - * added by the application for its own use, or by hwloc - * for miscellaneous objects such as MemoryDevice. - */ - - HWLOC_OBJ_BRIDGE, /**< \brief Bridge. - * Any bridge that connects the host or an I/O bus, - * to another I/O bus. - * Bridge objects have neither CPU sets nor node sets. - * They are not added to the topology unless I/O discovery - * is enabled with hwloc_topology_set_flags(). - */ - HWLOC_OBJ_PCI_DEVICE, /**< \brief PCI device. - * These objects have neither CPU sets nor node sets. - * They are not added to the topology unless I/O discovery - * is enabled with hwloc_topology_set_flags(). - */ - HWLOC_OBJ_OS_DEVICE, /**< \brief Operating system device. - * These objects have neither CPU sets nor node sets. - * They are not added to the topology unless I/O discovery - * is enabled with hwloc_topology_set_flags(). - */ - - HWLOC_OBJ_TYPE_MAX /**< \private Sentinel value */ - - /* *************************************************************** - WARNING WARNING WARNING WARNING WARNING WARNING WARNING WARNING - - If new enum values are added here, you MUST also go update the - obj_type_order[] and obj_order_type[] arrays in src/topology.c. - - WARNING WARNING WARNING WARNING WARNING WARNING WARNING WARNING - *************************************************************** */ -} hwloc_obj_type_t; - -/** \brief Cache type. */ -typedef enum hwloc_obj_cache_type_e { - HWLOC_OBJ_CACHE_UNIFIED, /**< \brief Unified cache. */ - HWLOC_OBJ_CACHE_DATA, /**< \brief Data cache. */ - HWLOC_OBJ_CACHE_INSTRUCTION /**< \brief Instruction cache. - * Only used when the HWLOC_TOPOLOGY_FLAG_ICACHES topology flag is set. */ -} hwloc_obj_cache_type_t; - -/** \brief Type of one side (upstream or downstream) of an I/O bridge. */ -typedef enum hwloc_obj_bridge_type_e { - HWLOC_OBJ_BRIDGE_HOST, /**< \brief Host-side of a bridge, only possible upstream. */ - HWLOC_OBJ_BRIDGE_PCI /**< \brief PCI-side of a bridge. */ -} hwloc_obj_bridge_type_t; - -/** \brief Type of a OS device. */ -typedef enum hwloc_obj_osdev_type_e { - HWLOC_OBJ_OSDEV_BLOCK, /**< \brief Operating system block device. - * For instance "sda" on Linux. */ - HWLOC_OBJ_OSDEV_GPU, /**< \brief Operating system GPU device. - * For instance ":0.0" for a GL display, - * "card0" for a Linux DRM device. */ - HWLOC_OBJ_OSDEV_NETWORK, /**< \brief Operating system network device. - * For instance the "eth0" interface on Linux. */ - HWLOC_OBJ_OSDEV_OPENFABRICS, /**< \brief Operating system openfabrics device. - * For instance the "mlx4_0" InfiniBand HCA device on Linux. */ - HWLOC_OBJ_OSDEV_DMA, /**< \brief Operating system dma engine device. - * For instance the "dma0chan0" DMA channel on Linux. */ - HWLOC_OBJ_OSDEV_COPROC /**< \brief Operating system co-processor device. - * For instance "mic0" for a Xeon Phi (MIC) on Linux, - * "opencl0d0" for a OpenCL device, - * "cuda0" for a CUDA device. */ -} hwloc_obj_osdev_type_t; - -/** \brief Compare the depth of two object types - * - * Types shouldn't be compared as they are, since newer ones may be added in - * the future. This function returns less than, equal to, or greater than zero - * respectively if \p type1 objects usually include \p type2 objects, are the - * same as \p type2 objects, or are included in \p type2 objects. If the types - * can not be compared (because neither is usually contained in the other), - * HWLOC_TYPE_UNORDERED is returned. Object types containing CPUs can always - * be compared (usually, a system contains machines which contain nodes which - * contain packages which contain caches, which contain cores, which contain - * processors). - * - * \note HWLOC_OBJ_PU will always be the deepest. - * \note This does not mean that the actual topology will respect that order: - * e.g. as of today cores may also contain caches, and packages may also contain - * nodes. This is thus just to be seen as a fallback comparison method. - */ -HWLOC_DECLSPEC int hwloc_compare_types (hwloc_obj_type_t type1, hwloc_obj_type_t type2) __hwloc_attribute_const; - -enum hwloc_compare_types_e { - HWLOC_TYPE_UNORDERED = INT_MAX /**< \brief Value returned by hwloc_compare_types when types can not be compared. \hideinitializer */ -}; - -/** @} */ - - - -/** \defgroup hwlocality_objects Object Structure and Attributes - * @{ - */ - -union hwloc_obj_attr_u; - -/** \brief Object memory */ -struct hwloc_obj_memory_s { - hwloc_uint64_t total_memory; /**< \brief Total memory (in bytes) in this object and its children */ - hwloc_uint64_t local_memory; /**< \brief Local memory (in bytes) */ - - /** \brief Size of array \p page_types */ - unsigned page_types_len; - /** \brief Array of local memory page types, \c NULL if no local memory and \p page_types is 0. - * - * The array is sorted by increasing \p size fields. - * It contains \p page_types_len slots. - */ - struct hwloc_obj_memory_page_type_s { - hwloc_uint64_t size; /**< \brief Size of pages */ - hwloc_uint64_t count; /**< \brief Number of pages of this size */ - } * page_types; -}; - -/** \brief Structure of a topology object - * - * Applications must not modify any field except hwloc_obj.userdata. - */ -struct hwloc_obj { - /* physical information */ - hwloc_obj_type_t type; /**< \brief Type of object */ - unsigned os_index; /**< \brief OS-provided physical index number. - * It is not guaranteed unique across the entire machine, - * except for PUs and NUMA nodes. - */ - char *name; /**< \brief Object description if any */ - - struct hwloc_obj_memory_s memory; /**< \brief Memory attributes */ - - union hwloc_obj_attr_u *attr; /**< \brief Object type-specific Attributes, - * may be \c NULL if no attribute value was found */ - - /* global position */ - unsigned depth; /**< \brief Vertical index in the hierarchy. - * If the topology is symmetric, this is equal to the - * parent depth plus one, and also equal to the number - * of parent/child links from the root object to here. - */ - unsigned logical_index; /**< \brief Horizontal index in the whole list of similar objects, - * hence guaranteed unique across the entire machine. - * Could be a "cousin_rank" since it's the rank within the "cousin" list below - */ - signed os_level; /**< \brief OS-provided physical level, -1 if unknown or meaningless */ - - /* cousins are all objects of the same type (and depth) across the entire topology */ - struct hwloc_obj *next_cousin; /**< \brief Next object of same type and depth */ - struct hwloc_obj *prev_cousin; /**< \brief Previous object of same type and depth */ - - /* children of the same parent are siblings, even if they may have different type and depth */ - struct hwloc_obj *parent; /**< \brief Parent, \c NULL if root (system object) */ - unsigned sibling_rank; /**< \brief Index in parent's \c children[] array */ - struct hwloc_obj *next_sibling; /**< \brief Next object below the same parent */ - struct hwloc_obj *prev_sibling; /**< \brief Previous object below the same parent */ - - /* children array below this object */ - unsigned arity; /**< \brief Number of children */ - struct hwloc_obj **children; /**< \brief Children, \c children[0 .. arity -1] */ - struct hwloc_obj *first_child; /**< \brief First child */ - struct hwloc_obj *last_child; /**< \brief Last child */ - - /* misc */ - void *userdata; /**< \brief Application-given private data pointer, - * initialized to \c NULL, use it as you wish. - * See hwloc_topology_set_userdata_export_callback() - * if you wish to export this field to XML. */ - - /* cpusets and nodesets */ - hwloc_cpuset_t cpuset; /**< \brief CPUs covered by this object - * - * This is the set of CPUs for which there are PU objects in the topology - * under this object, i.e. which are known to be physically contained in this - * object and known how (the children path between this object and the PU - * objects). - * - * If the HWLOC_TOPOLOGY_FLAG_WHOLE_SYSTEM configuration flag is set, some of - * these CPUs may be offline, or not allowed for binding, see online_cpuset - * and allowed_cpuset. - * - * \note Its value must not be changed, hwloc_bitmap_dup must be used instead. - */ - hwloc_cpuset_t complete_cpuset; /**< \brief The complete CPU set of logical processors of this object, - * - * This includes not only the same as the cpuset field, but also the CPUs for - * which topology information is unknown or incomplete, and the CPUs that are - * ignored when the HWLOC_TOPOLOGY_FLAG_WHOLE_SYSTEM flag is not set. - * Thus no corresponding PU object may be found in the topology, because the - * precise position is undefined. It is however known that it would be somewhere - * under this object. - * - * \note Its value must not be changed, hwloc_bitmap_dup must be used instead. - */ - hwloc_cpuset_t online_cpuset; /**< \brief The CPU set of online logical processors - * - * This includes the CPUs contained in this object that are online, i.e. draw - * power and can execute threads. It may however not be allowed to bind to - * them due to administration rules, see allowed_cpuset. - * - * \note Its value must not be changed, hwloc_bitmap_dup must be used instead. - */ - hwloc_cpuset_t allowed_cpuset; /**< \brief The CPU set of allowed logical processors - * - * This includes the CPUs contained in this object which are allowed for - * binding, i.e. passing them to the hwloc binding functions should not return - * permission errors. This is usually restricted by administration rules. - * Some of them may however be offline so binding to them may still not be - * possible, see online_cpuset. - * - * \note Its value must not be changed, hwloc_bitmap_dup must be used instead. - */ - - hwloc_nodeset_t nodeset; /**< \brief NUMA nodes covered by this object or containing this object - * - * This is the set of NUMA nodes for which there are NODE objects in the - * topology under or above this object, i.e. which are known to be physically - * contained in this object or containing it and known how (the children path - * between this object and the NODE objects). - * - * In the end, these nodes are those that are close to the current object. - * - * If the HWLOC_TOPOLOGY_FLAG_WHOLE_SYSTEM configuration flag is set, some of - * these nodes may not be allowed for allocation, see allowed_nodeset. - * - * If there are no NUMA nodes in the machine, all the memory is close to this - * object, so \p nodeset is full. - * - * \note Its value must not be changed, hwloc_bitmap_dup must be used instead. - */ - hwloc_nodeset_t complete_nodeset; /**< \brief The complete NUMA node set of this object, - * - * This includes not only the same as the nodeset field, but also the NUMA - * nodes for which topology information is unknown or incomplete, and the nodes - * that are ignored when the HWLOC_TOPOLOGY_FLAG_WHOLE_SYSTEM flag is not set. - * Thus no corresponding NODE object may be found in the topology, because the - * precise position is undefined. It is however known that it would be - * somewhere under this object. - * - * If there are no NUMA nodes in the machine, all the memory is close to this - * object, so \p complete_nodeset is full. - * - * \note Its value must not be changed, hwloc_bitmap_dup must be used instead. - */ - hwloc_nodeset_t allowed_nodeset; /**< \brief The set of allowed NUMA memory nodes - * - * This includes the NUMA memory nodes contained in this object which are - * allowed for memory allocation, i.e. passing them to NUMA node-directed - * memory allocation should not return permission errors. This is usually - * restricted by administration rules. - * - * If there are no NUMA nodes in the machine, all the memory is close to this - * object, so \p allowed_nodeset is full. - * - * \note Its value must not be changed, hwloc_bitmap_dup must be used instead. - */ - - struct hwloc_distances_s **distances; /**< \brief Distances between all objects at same depth below this object */ - unsigned distances_count; - - struct hwloc_obj_info_s *infos; /**< \brief Array of stringified info type=name. */ - unsigned infos_count; /**< \brief Size of infos array. */ - - int symmetric_subtree; /**< \brief Set if the subtree of objects below this object is symmetric, - * which means all children and their children have identical subtrees. - * If set in the topology root object, lstopo may export the topology - * as a synthetic string. - */ -}; -/** - * \brief Convenience typedef; a pointer to a struct hwloc_obj. - */ -typedef struct hwloc_obj * hwloc_obj_t; - -/** \brief Object type-specific Attributes */ -union hwloc_obj_attr_u { - /** \brief Cache-specific Object Attributes */ - struct hwloc_cache_attr_s { - hwloc_uint64_t size; /**< \brief Size of cache in bytes */ - unsigned depth; /**< \brief Depth of cache (e.g., L1, L2, ...etc.) */ - unsigned linesize; /**< \brief Cache-line size in bytes. 0 if unknown */ - int associativity; /**< \brief Ways of associativity, - * -1 if fully associative, 0 if unknown */ - hwloc_obj_cache_type_t type; /**< \brief Cache type */ - } cache; - /** \brief Group-specific Object Attributes */ - struct hwloc_group_attr_s { - unsigned depth; /**< \brief Depth of group object */ - } group; - /** \brief PCI Device specific Object Attributes */ - struct hwloc_pcidev_attr_s { - unsigned short domain; - unsigned char bus, dev, func; - unsigned short class_id; - unsigned short vendor_id, device_id, subvendor_id, subdevice_id; - unsigned char revision; - float linkspeed; /* in GB/s */ - } pcidev; - /** \brief Bridge specific Object Attribues */ - struct hwloc_bridge_attr_s { - union { - struct hwloc_pcidev_attr_s pci; - } upstream; - hwloc_obj_bridge_type_t upstream_type; - union { - struct { - unsigned short domain; - unsigned char secondary_bus, subordinate_bus; - } pci; - } downstream; - hwloc_obj_bridge_type_t downstream_type; - unsigned depth; - } bridge; - /** \brief OS Device specific Object Attributes */ - struct hwloc_osdev_attr_s { - hwloc_obj_osdev_type_t type; - } osdev; -}; - -/** \brief Distances between objects - * - * One object may contain a distance structure describing distances - * between all its descendants at a given relative depth. If the - * containing object is the root object of the topology, then the - * distances are available for all objects in the machine. - * - * If the \p latency pointer is not \c NULL, the pointed array contains - * memory latencies (non-zero values), see below. - * - * In the future, some other types of distances may be considered. - * In these cases, \p latency may be \c NULL. - */ -struct hwloc_distances_s { - unsigned relative_depth; /**< \brief Relative depth of the considered objects - * below the object containing this distance information. */ - unsigned nbobjs; /**< \brief Number of objects considered in the matrix. - * It is the number of descendant objects at \p relative_depth - * below the containing object. - * It corresponds to the result of hwloc_get_nbobjs_inside_cpuset_by_depth(). */ - - float *latency; /**< \brief Matrix of latencies between objects, stored as a one-dimension array. - * May be \c NULL if the distances considered here are not latencies. - * - * Unless defined by the user, this currently contains latencies - * between NUMA nodes (as reported in the System Locality Distance Information Table - * (SLIT) in the ACPI specification), which may or may not be accurate. - * It corresponds to the latency for accessing the memory of one node - * from a core in another node. - * - * Values are normalized to get 1.0 as the minimal value in the matrix. - * Latency from i-th to j-th object is stored in slot i*nbobjs+j. - */ - float latency_max; /**< \brief The maximal value in the latency matrix. */ - float latency_base; /**< \brief The multiplier that should be applied to latency matrix - * to retrieve the original OS-provided latencies. - * Usually 10 on Linux since ACPI SLIT uses 10 for local latency. - */ -}; - -/** \brief Object info */ -struct hwloc_obj_info_s { - char *name; /**< \brief Info name */ - char *value; /**< \brief Info value */ -}; - -/** @} */ - - - -/** \defgroup hwlocality_creation Topology Creation and Destruction - * @{ - */ - -struct hwloc_topology; -/** \brief Topology context - * - * To be initialized with hwloc_topology_init() and built with hwloc_topology_load(). - */ -typedef struct hwloc_topology * hwloc_topology_t; - -/** \brief Allocate a topology context. - * - * \param[out] topologyp is assigned a pointer to the new allocated context. - * - * \return 0 on success, -1 on error. - */ -HWLOC_DECLSPEC int hwloc_topology_init (hwloc_topology_t *topologyp); - -/** \brief Build the actual topology - * - * Build the actual topology once initialized with hwloc_topology_init() and - * tuned with \ref hwlocality_configuration routines. - * No other routine may be called earlier using this topology context. - * - * \param topology is the topology to be loaded with objects. - * - * \return 0 on success, -1 on error. - * - * \note On failure, the topology is reinitialized. It should be either - * destroyed with hwloc_topology_destroy() or configured and loaded again. - * - * \note This function may be called only once per topology. - * - * \sa hwlocality_configuration - */ -HWLOC_DECLSPEC int hwloc_topology_load(hwloc_topology_t topology); - -/** \brief Terminate and free a topology context - * - * \param topology is the topology to be freed - */ -HWLOC_DECLSPEC void hwloc_topology_destroy (hwloc_topology_t topology); - -/** \brief Duplicate a topology. - * - * The entire topology structure as well as its objects - * are duplicated into a new one. - * - * This is useful for keeping a backup while modifying a topology. - */ -HWLOC_DECLSPEC int hwloc_topology_dup(hwloc_topology_t *newtopology, hwloc_topology_t oldtopology); - -/** \brief Run internal checks on a topology structure - * - * The program aborts if an inconsistency is detected in the given topology. - * - * \param topology is the topology to be checked - * - * \note This routine is only useful to developers. - * - * \note The input topology should have been previously loaded with - * hwloc_topology_load(). - */ -HWLOC_DECLSPEC void hwloc_topology_check(hwloc_topology_t topology); - -/** @} */ - - - -/** \defgroup hwlocality_configuration Topology Detection Configuration and Query - * - * Several functions can optionally be called between hwloc_topology_init() and - * hwloc_topology_load() to configure how the detection should be performed, - * e.g. to ignore some objects types, define a synthetic topology, etc. - * - * If none of them is called, the default is to detect all the objects of the - * machine that the caller is allowed to access. - * - * This default behavior may also be modified through environment variables - * if the application did not modify it already. - * Setting HWLOC_XMLFILE in the environment enforces the discovery from a XML - * file as if hwloc_topology_set_xml() had been called. - * HWLOC_FSROOT switches to reading the topology from the specified Linux - * filesystem root as if hwloc_topology_set_fsroot() had been called. - * Finally, HWLOC_THISSYSTEM enforces the return value of - * hwloc_topology_is_thissystem(). - * - * @{ - */ - -/** \brief Ignore an object type. - * - * Ignore all objects from the given type. - * The bottom-level type HWLOC_OBJ_PU may not be ignored. - * The top-level object of the hierarchy will never be ignored, even if this function - * succeeds. - * Group objects are always ignored if they do not bring any structure - * since they are designed to add structure to the topology. - * I/O objects may not be ignored, topology flags should be used to configure - * their discovery instead. - */ -HWLOC_DECLSPEC int hwloc_topology_ignore_type(hwloc_topology_t topology, hwloc_obj_type_t type); - -/** \brief Ignore an object type if it does not bring any structure. - * - * Ignore all objects from the given type as long as they do not bring any structure: - * Each ignored object should have a single children or be the only child of its parent. - * The bottom-level type HWLOC_OBJ_PU may not be ignored. - * I/O objects may not be ignored, topology flags should be used to configure - * their discovery instead. - */ -HWLOC_DECLSPEC int hwloc_topology_ignore_type_keep_structure(hwloc_topology_t topology, hwloc_obj_type_t type); - -/** \brief Ignore all objects that do not bring any structure. - * - * Ignore all objects that do not bring any structure: - * Each ignored object should have a single children or be the only child of its parent. - * I/O objects may not be ignored, topology flags should be used to configure - * their discovery instead. - */ -HWLOC_DECLSPEC int hwloc_topology_ignore_all_keep_structure(hwloc_topology_t topology); - -/** \brief Flags to be set onto a topology context before load. - * - * Flags should be given to hwloc_topology_set_flags(). - * They may also be returned by hwloc_topology_get_flags(). - */ -enum hwloc_topology_flags_e { - /** \brief Detect the whole system, ignore reservations and offline settings. - * - * Gather all resources, even if some were disabled by the administrator. - * For instance, ignore Linux Cgroup/Cpusets and gather all processors and memory nodes, - * and ignore the fact that some resources may be offline. - * \hideinitializer - */ - HWLOC_TOPOLOGY_FLAG_WHOLE_SYSTEM = (1UL<<0), - - /** \brief Assume that the selected backend provides the topology for the - * system on which we are running. - * - * This forces hwloc_topology_is_thissystem to return 1, i.e. makes hwloc assume that - * the selected backend provides the topology for the system on which we are running, - * even if it is not the OS-specific backend but the XML backend for instance. - * This means making the binding functions actually call the OS-specific - * system calls and really do binding, while the XML backend would otherwise - * provide empty hooks just returning success. - * - * Setting the environment variable HWLOC_THISSYSTEM may also result in the - * same behavior. - * - * This can be used for efficiency reasons to first detect the topology once, - * save it to an XML file, and quickly reload it later through the XML - * backend, but still having binding functions actually do bind. - * \hideinitializer - */ - HWLOC_TOPOLOGY_FLAG_IS_THISSYSTEM = (1UL<<1), - - /** \brief Detect PCI devices. - * - * By default, I/O devices are ignored. This flag enables I/O device - * detection using the pci backend. Only the common PCI devices (GPUs, - * NICs, block devices, ...) and host bridges (objects that connect the host - * objects to an I/O subsystem) will be added to the topology. - * Additionally it also enables MemoryDevice misc objects. - * Uncommon devices and other bridges (such as PCI-to-PCI bridges) will be - * ignored. - * \hideinitializer - */ - HWLOC_TOPOLOGY_FLAG_IO_DEVICES = (1UL<<2), - - /** \brief Detect PCI bridges. - * - * This flag should be combined with HWLOC_TOPOLOGY_FLAG_IO_DEVICES to enable - * the detection of both common devices and of all useful bridges (bridges that - * have at least one device behind them). - * \hideinitializer - */ - HWLOC_TOPOLOGY_FLAG_IO_BRIDGES = (1UL<<3), - - /** \brief Detect the whole PCI hierarchy. - * - * This flag enables detection of all I/O devices (even the uncommon ones) - * and bridges (even those that have no device behind them) using the pci - * backend. - * This implies HWLOC_TOPOLOGY_FLAG_IO_DEVICES. - * \hideinitializer - */ - HWLOC_TOPOLOGY_FLAG_WHOLE_IO = (1UL<<4), - - /** \brief Detect instruction caches. - * - * This flag enables detection of Instruction caches, - * instead of only Data and Unified caches. - * \hideinitializer - */ - HWLOC_TOPOLOGY_FLAG_ICACHES = (1UL<<5) -}; - -/** \brief Set OR'ed flags to non-yet-loaded topology. - * - * Set a OR'ed set of ::hwloc_topology_flags_e onto a topology that was not yet loaded. - * - * If this function is called multiple times, the last invokation will erase - * and replace the set of flags that was previously set. - * - * The flags set in a topology may be retrieved with hwloc_topology_get_flags() - */ -HWLOC_DECLSPEC int hwloc_topology_set_flags (hwloc_topology_t topology, unsigned long flags); - -/** \brief Get OR'ed flags of a topology. - * - * Get the OR'ed set of ::hwloc_topology_flags_e of a topology. - * - * \return the flags previously set with hwloc_topology_set_flags(). - */ -HWLOC_DECLSPEC unsigned long hwloc_topology_get_flags (hwloc_topology_t topology); - -/** \brief Change which pid the topology is viewed from - * - * On some systems, processes may have different views of the machine, for - * instance the set of allowed CPUs. By default, hwloc exposes the view from - * the current process. Calling hwloc_topology_set_pid() permits to make it - * expose the topology of the machine from the point of view of another - * process. - * - * \note \p hwloc_pid_t is \p pid_t on Unix platforms, - * and \p HANDLE on native Windows platforms. - * - * \note -1 is returned and errno is set to ENOSYS on platforms that do not - * support this feature. - */ -HWLOC_DECLSPEC int hwloc_topology_set_pid(hwloc_topology_t __hwloc_restrict topology, hwloc_pid_t pid); - -/** \brief Change the file-system root path when building the topology from sysfs/procfs. - * - * On Linux system, use sysfs and procfs files as if they were mounted on the given - * \p fsroot_path instead of the main file-system root. Setting the environment - * variable HWLOC_FSROOT may also result in this behavior. - * Not using the main file-system root causes hwloc_topology_is_thissystem() - * to return 0. - * - * Note that this function does not actually load topology - * information; it just tells hwloc where to load it from. You'll - * still need to invoke hwloc_topology_load() to actually load the - * topology information. - * - * \return -1 with errno set to ENOSYS on non-Linux and on Linux systems that - * do not support it. - * \return -1 with the appropriate errno if \p fsroot_path cannot be used. - * - * \note For convenience, this backend provides empty binding hooks which just - * return success. To have hwloc still actually call OS-specific hooks, the - * HWLOC_TOPOLOGY_FLAG_IS_THISSYSTEM has to be set to assert that the loaded - * file is really the underlying system. - * - * \note On success, the Linux component replaces the previously enabled - * component (if any), but the topology is not actually modified until - * hwloc_topology_load(). - */ -HWLOC_DECLSPEC int hwloc_topology_set_fsroot(hwloc_topology_t __hwloc_restrict topology, const char * __hwloc_restrict fsroot_path); - -/** \brief Enable synthetic topology. - * - * Gather topology information from the given \p description, - * a space-separated string of numbers describing - * the arity of each level. - * Each number may be prefixed with a type and a colon to enforce the type - * of a level. If only some level types are enforced, hwloc will try to - * choose the other types according to usual topologies, but it may fail - * and you may have to specify more level types manually. - * See also the \ref synthetic. - * - * If \p description was properly parsed and describes a valid topology - * configuration, this function returns 0. - * Otherwise -1 is returned and errno is set to EINVAL. - * - * Note that this function does not actually load topology - * information; it just tells hwloc where to load it from. You'll - * still need to invoke hwloc_topology_load() to actually load the - * topology information. - * - * \note For convenience, this backend provides empty binding hooks which just - * return success. - * - * \note On success, the synthetic component replaces the previously enabled - * component (if any), but the topology is not actually modified until - * hwloc_topology_load(). - */ -HWLOC_DECLSPEC int hwloc_topology_set_synthetic(hwloc_topology_t __hwloc_restrict topology, const char * __hwloc_restrict description); - -/** \brief Enable XML-file based topology. - * - * Gather topology information from the XML file given at \p xmlpath. - * Setting the environment variable HWLOC_XMLFILE may also result in this behavior. - * This file may have been generated earlier with hwloc_topology_export_xml() - * or lstopo file.xml. - * - * Note that this function does not actually load topology - * information; it just tells hwloc where to load it from. You'll - * still need to invoke hwloc_topology_load() to actually load the - * topology information. - * - * \return -1 with errno set to EINVAL on failure to read the XML file. - * - * \note See also hwloc_topology_set_userdata_import_callback() - * for importing application-specific object userdata. - * - * \note For convenience, this backend provides empty binding hooks which just - * return success. To have hwloc still actually call OS-specific hooks, the - * HWLOC_TOPOLOGY_FLAG_IS_THISSYSTEM has to be set to assert that the loaded - * file is really the underlying system. - * - * \note On success, the XML component replaces the previously enabled - * component (if any), but the topology is not actually modified until - * hwloc_topology_load(). - */ -HWLOC_DECLSPEC int hwloc_topology_set_xml(hwloc_topology_t __hwloc_restrict topology, const char * __hwloc_restrict xmlpath); - -/** \brief Enable XML based topology using a memory buffer (instead of - * a file, as with hwloc_topology_set_xml()). - * - * Gather topology information from the XML memory buffer given at \p - * buffer and of length \p size. This buffer may have been filled - * earlier with hwloc_topology_export_xmlbuffer(). - * - * Note that this function does not actually load topology - * information; it just tells hwloc where to load it from. You'll - * still need to invoke hwloc_topology_load() to actually load the - * topology information. - * - * \return -1 with errno set to EINVAL on failure to read the XML buffer. - * - * \note See also hwloc_topology_set_userdata_import_callback() - * for importing application-specific object userdata. - * - * \note For convenience, this backend provides empty binding hooks which just - * return success. To have hwloc still actually call OS-specific hooks, the - * HWLOC_TOPOLOGY_FLAG_IS_THISSYSTEM has to be set to assert that the loaded - * file is really the underlying system. - * - * \note On success, the XML component replaces the previously enabled - * component (if any), but the topology is not actually modified until - * hwloc_topology_load(). - */ -HWLOC_DECLSPEC int hwloc_topology_set_xmlbuffer(hwloc_topology_t __hwloc_restrict topology, const char * __hwloc_restrict buffer, int size); - -/** \brief Prepare the topology for custom assembly. - * - * The topology then contains a single root object. - * It must then be built by inserting other topologies with - * hwloc_custom_insert_topology() or single objects with - * hwloc_custom_insert_group_object_by_parent(). - * hwloc_topology_load() must be called to finalize the new - * topology as usual. - * - * \note If nothing is inserted in the topology, - * hwloc_topology_load() will fail with errno set to EINVAL. - * - * \note The cpuset and nodeset of the root object are NULL because - * these sets are meaningless when assembling multiple topologies. - * - * \note On success, the custom component replaces the previously enabled - * component (if any), but the topology is not actually modified until - * hwloc_topology_load(). - */ -HWLOC_DECLSPEC int hwloc_topology_set_custom(hwloc_topology_t topology); - -/** \brief Provide a distance matrix. - * - * Provide the matrix of distances between a set of objects of the given type. - * The set may or may not contain all the existing objects of this type. - * The objects are specified by their OS/physical index in the \p os_index - * array. The \p distances matrix follows the same order. - * The distance from object i to object j in the i*nbobjs+j. - * - * A single latency matrix may be defined for each type. - * If another distance matrix already exists for the given type, - * either because the user specified it or because the OS offers it, - * it will be replaced by the given one. - * If \p nbobjs is \c 0, \p os_index is \c NULL and \p distances is \c NULL, - * the existing distance matrix for the given type is removed. - * - * \note Distance matrices are ignored in multi-node topologies. - */ -HWLOC_DECLSPEC int hwloc_topology_set_distance_matrix(hwloc_topology_t __hwloc_restrict topology, - hwloc_obj_type_t type, unsigned nbobjs, - unsigned *os_index, float *distances); - -/** \brief Does the topology context come from this system? - * - * \return 1 if this topology context was built using the system - * running this program. - * \return 0 instead (for instance if using another file-system root, - * a XML topology file, or a synthetic topology). - */ -HWLOC_DECLSPEC int hwloc_topology_is_thissystem(hwloc_topology_t __hwloc_restrict topology) __hwloc_attribute_pure; - -/** \brief Flags describing actual discovery support for this topology. */ -struct hwloc_topology_discovery_support { - /** \brief Detecting the number of PU objects is supported. */ - unsigned char pu; -}; - -/** \brief Flags describing actual PU binding support for this topology. */ -struct hwloc_topology_cpubind_support { - /** Binding the whole current process is supported. */ - unsigned char set_thisproc_cpubind; - /** Getting the binding of the whole current process is supported. */ - unsigned char get_thisproc_cpubind; - /** Binding a whole given process is supported. */ - unsigned char set_proc_cpubind; - /** Getting the binding of a whole given process is supported. */ - unsigned char get_proc_cpubind; - /** Binding the current thread only is supported. */ - unsigned char set_thisthread_cpubind; - /** Getting the binding of the current thread only is supported. */ - unsigned char get_thisthread_cpubind; - /** Binding a given thread only is supported. */ - unsigned char set_thread_cpubind; - /** Getting the binding of a given thread only is supported. */ - unsigned char get_thread_cpubind; - /** Getting the last processors where the whole current process ran is supported */ - unsigned char get_thisproc_last_cpu_location; - /** Getting the last processors where a whole process ran is supported */ - unsigned char get_proc_last_cpu_location; - /** Getting the last processors where the current thread ran is supported */ - unsigned char get_thisthread_last_cpu_location; -}; - -/** \brief Flags describing actual memory binding support for this topology. */ -struct hwloc_topology_membind_support { - /** Binding the whole current process is supported. */ - unsigned char set_thisproc_membind; - /** Getting the binding of the whole current process is supported. */ - unsigned char get_thisproc_membind; - /** Binding a whole given process is supported. */ - unsigned char set_proc_membind; - /** Getting the binding of a whole given process is supported. */ - unsigned char get_proc_membind; - /** Binding the current thread only is supported. */ - unsigned char set_thisthread_membind; - /** Getting the binding of the current thread only is supported. */ - unsigned char get_thisthread_membind; - /** Binding a given memory area is supported. */ - unsigned char set_area_membind; - /** Getting the binding of a given memory area is supported. */ - unsigned char get_area_membind; - /** Allocating a bound memory area is supported. */ - unsigned char alloc_membind; - /** First-touch policy is supported. */ - unsigned char firsttouch_membind; - /** Bind policy is supported. */ - unsigned char bind_membind; - /** Interleave policy is supported. */ - unsigned char interleave_membind; - /** Replication policy is supported. */ - unsigned char replicate_membind; - /** Next-touch migration policy is supported. */ - unsigned char nexttouch_membind; - - /** Migration flags is supported. */ - unsigned char migrate_membind; -}; - -/** \brief Set of flags describing actual support for this topology. - * - * This is retrieved with hwloc_topology_get_support() and will be valid until - * the topology object is destroyed. Note: the values are correct only after - * discovery. - */ -struct hwloc_topology_support { - struct hwloc_topology_discovery_support *discovery; - struct hwloc_topology_cpubind_support *cpubind; - struct hwloc_topology_membind_support *membind; -}; - -/** \brief Retrieve the topology support. */ -HWLOC_DECLSPEC const struct hwloc_topology_support *hwloc_topology_get_support(hwloc_topology_t __hwloc_restrict topology); - -/** \brief Set the topology-specific userdata pointer. - * - * Each topology may store one application-given private data pointer. - * It is initialized to \c NULL. - * hwloc will never modify it. - * - * Use it as you wish, after hwloc_topology_init() and until hwloc_topolog_destroy(). - * - * This pointer is not exported to XML. - */ -HWLOC_DECLSPEC void hwloc_topology_set_userdata(hwloc_topology_t topology, const void *userdata); - -/** \brief Retrieve the topology-specific userdata pointer. - * - * Retrieve the application-given private data pointer that was - * previously set with hwloc_topology_set_userdata(). - */ -HWLOC_DECLSPEC void * hwloc_topology_get_userdata(hwloc_topology_t topology); - -/** @} */ - - - -/** \defgroup hwlocality_levels Object levels, depths and types - * @{ - * - * Be sure to see the figure in \ref termsanddefs that shows a - * complete topology tree, including depths, child/sibling/cousin - * relationships, and an example of an asymmetric topology where one - * package has fewer caches than its peers. - */ - -/** \brief Get the depth of the hierarchical tree of objects. - * - * This is the depth of HWLOC_OBJ_PU objects plus one. - */ -HWLOC_DECLSPEC unsigned hwloc_topology_get_depth(hwloc_topology_t __hwloc_restrict topology) __hwloc_attribute_pure; - -/** \brief Returns the depth of objects of type \p type. - * - * If no object of this type is present on the underlying architecture, or if - * the OS doesn't provide this kind of information, the function returns - * HWLOC_TYPE_DEPTH_UNKNOWN. - * - * If type is absent but a similar type is acceptable, see also - * hwloc_get_type_or_below_depth() and hwloc_get_type_or_above_depth(). - * - * If some objects of the given type exist in different levels, - * for instance L1 and L2 caches, or L1i and L1d caches, - * the function returns HWLOC_TYPE_DEPTH_MULTIPLE. - * See hwloc_get_cache_type_depth() in hwloc/helper.h to better handle this - * case. - * - * If an I/O object type is given, the function returns a virtual value - * because I/O objects are stored in special levels that are not CPU-related. - * This virtual depth may be passed to other hwloc functions such as - * hwloc_get_obj_by_depth() but it should not be considered as an actual - * depth by the application. In particular, it should not be compared with - * any other object depth or with the entire topology depth. - */ -HWLOC_DECLSPEC int hwloc_get_type_depth (hwloc_topology_t topology, hwloc_obj_type_t type); - -enum hwloc_get_type_depth_e { - HWLOC_TYPE_DEPTH_UNKNOWN = -1, /**< \brief No object of given type exists in the topology. \hideinitializer */ - HWLOC_TYPE_DEPTH_MULTIPLE = -2, /**< \brief Objects of given type exist at different depth in the topology. \hideinitializer */ - HWLOC_TYPE_DEPTH_BRIDGE = -3, /**< \brief Virtual depth for bridge object level. \hideinitializer */ - HWLOC_TYPE_DEPTH_PCI_DEVICE = -4, /**< \brief Virtual depth for PCI device object level. \hideinitializer */ - HWLOC_TYPE_DEPTH_OS_DEVICE = -5 /**< \brief Virtual depth for software device object level. \hideinitializer */ -}; - -/** \brief Returns the depth of objects of type \p type or below - * - * If no object of this type is present on the underlying architecture, the - * function returns the depth of the first "present" object typically found - * inside \p type. - * - * If some objects of the given type exist in different levels, for instance - * L1 and L2 caches, the function returns HWLOC_TYPE_DEPTH_MULTIPLE. - */ -static __hwloc_inline int -hwloc_get_type_or_below_depth (hwloc_topology_t topology, hwloc_obj_type_t type) __hwloc_attribute_pure; - -/** \brief Returns the depth of objects of type \p type or above - * - * If no object of this type is present on the underlying architecture, the - * function returns the depth of the first "present" object typically - * containing \p type. - * - * If some objects of the given type exist in different levels, for instance - * L1 and L2 caches, the function returns HWLOC_TYPE_DEPTH_MULTIPLE. - */ -static __hwloc_inline int -hwloc_get_type_or_above_depth (hwloc_topology_t topology, hwloc_obj_type_t type) __hwloc_attribute_pure; - -/** \brief Returns the type of objects at depth \p depth. - * - * \return -1 if depth \p depth does not exist. - */ -HWLOC_DECLSPEC hwloc_obj_type_t hwloc_get_depth_type (hwloc_topology_t topology, unsigned depth) __hwloc_attribute_pure; - -/** \brief Returns the width of level at depth \p depth. - */ -HWLOC_DECLSPEC unsigned hwloc_get_nbobjs_by_depth (hwloc_topology_t topology, unsigned depth) __hwloc_attribute_pure; - -/** \brief Returns the width of level type \p type - * - * If no object for that type exists, 0 is returned. - * If there are several levels with objects of that type, -1 is returned. - */ -static __hwloc_inline int -hwloc_get_nbobjs_by_type (hwloc_topology_t topology, hwloc_obj_type_t type) __hwloc_attribute_pure; - -/** \brief Returns the top-object of the topology-tree. - * - * Its type is typically ::HWLOC_OBJ_MACHINE but it could be different - * for complex topologies. - */ -static __hwloc_inline hwloc_obj_t -hwloc_get_root_obj (hwloc_topology_t topology) __hwloc_attribute_pure; - -/** \brief Returns the topology object at logical index \p idx from depth \p depth */ -HWLOC_DECLSPEC hwloc_obj_t hwloc_get_obj_by_depth (hwloc_topology_t topology, unsigned depth, unsigned idx) __hwloc_attribute_pure; - -/** \brief Returns the topology object at logical index \p idx with type \p type - * - * If no object for that type exists, \c NULL is returned. - * If there are several levels with objects of that type, \c NULL is returned - * and ther caller may fallback to hwloc_get_obj_by_depth(). - */ -static __hwloc_inline hwloc_obj_t -hwloc_get_obj_by_type (hwloc_topology_t topology, hwloc_obj_type_t type, unsigned idx) __hwloc_attribute_pure; - -/** \brief Returns the next object at depth \p depth. - * - * If \p prev is \c NULL, return the first object at depth \p depth. - */ -static __hwloc_inline hwloc_obj_t -hwloc_get_next_obj_by_depth (hwloc_topology_t topology, unsigned depth, hwloc_obj_t prev); - -/** \brief Returns the next object of type \p type. - * - * If \p prev is \c NULL, return the first object at type \p type. If - * there are multiple or no depth for given type, return \c NULL and - * let the caller fallback to hwloc_get_next_obj_by_depth(). - */ -static __hwloc_inline hwloc_obj_t -hwloc_get_next_obj_by_type (hwloc_topology_t topology, hwloc_obj_type_t type, - hwloc_obj_t prev); - -/** @} */ - - - -/** \defgroup hwlocality_object_strings Manipulating Object Type, Sets and Attributes as Strings - * @{ - */ - -/** \brief Return a stringified topology object type */ -HWLOC_DECLSPEC const char * hwloc_obj_type_string (hwloc_obj_type_t type) __hwloc_attribute_const; - -/** \brief Return an object type and attributes from a type string. - * - * Convert strings such as "Package" or "Cache" into the corresponding types. - * Matching is case-insensitive, and only the first letters are actually - * required to match. - * - * Types that have specific attributes, for instance caches and groups, - * may be returned in \p depthattrp and \p typeattrp. They are ignored - * when these pointers are \c NULL. - * - * For instance "L2i" or "L2iCache" would return - * type HWLOC_OBJ_CACHE in \p typep, 2 in \p depthattrp, - * and HWLOC_OBJ_CACHE_TYPE_INSTRUCTION in \p typeattrp - * (this last pointer should point to a hwloc_obj_cache_type_t). - * "Group3" would return type HWLOC_OBJ_GROUP type and 3 in \p depthattrp. - * Attributes that are not specified in the string (for instance "Group" - * without a depth, or "L2Cache" without a cache type) are set to -1. - * - * \p typeattrd is only filled if the size specified in \p typeattrsize - * is large enough. It is currently only used for caches, and the required - * size is at least the size of hwloc_obj_cache_type_t. - * - * \return 0 if a type was correctly identified, otherwise -1. - * - * \note This is an extended version of the now deprecated hwloc_obj_type_of_string() - */ -HWLOC_DECLSPEC int hwloc_obj_type_sscanf(const char *string, - hwloc_obj_type_t *typep, - int *depthattrp, - void *typeattrp, size_t typeattrsize); - -/** \brief Stringify the type of a given topology object into a human-readable form. - * - * It differs from hwloc_obj_type_string() because it prints type attributes such - * as cache depth and type. - * - * If \p size is 0, \p string may safely be \c NULL. - * - * \return the number of character that were actually written if not truncating, - * or that would have been written (not including the ending \\0). - */ -HWLOC_DECLSPEC int hwloc_obj_type_snprintf(char * __hwloc_restrict string, size_t size, hwloc_obj_t obj, - int verbose); - -/** \brief Stringify the attributes of a given topology object into a human-readable form. - * - * Attribute values are separated by \p separator. - * - * Only the major attributes are printed in non-verbose mode. - * - * If \p size is 0, \p string may safely be \c NULL. - * - * \return the number of character that were actually written if not truncating, - * or that would have been written (not including the ending \\0). - */ -HWLOC_DECLSPEC int hwloc_obj_attr_snprintf(char * __hwloc_restrict string, size_t size, hwloc_obj_t obj, const char * __hwloc_restrict separator, - int verbose); - -/** \brief Stringify the cpuset containing a set of objects. - * - * If \p size is 0, \p string may safely be \c NULL. - * - * \return the number of character that were actually written if not truncating, - * or that would have been written (not including the ending \\0). - */ -HWLOC_DECLSPEC int hwloc_obj_cpuset_snprintf(char * __hwloc_restrict str, size_t size, size_t nobj, const hwloc_obj_t * __hwloc_restrict objs); - -/** \brief Search the given key name in object infos and return the corresponding value. - * - * If multiple keys match the given name, only the first one is returned. - * - * \return \c NULL if no such key exists. - */ -static __hwloc_inline const char * -hwloc_obj_get_info_by_name(hwloc_obj_t obj, const char *name) __hwloc_attribute_pure; - -/** \brief Add the given info name and value pair to the given object. - * - * The info is appended to the existing info array even if another key - * with the same name already exists. - * - * The input strings are copied before being added in the object infos. - * - * \note This function may be used to enforce object colors in the lstopo - * graphical output by using "lstopoStyle" as a name and "Background=#rrggbb" - * as a value. See CUSTOM COLORS in the lstopo(1) manpage for details. - * - * \note If \p value contains some non-printable characters, they will - * be dropped when exporting to XML, see hwloc_topology_export_xml(). - */ -HWLOC_DECLSPEC void hwloc_obj_add_info(hwloc_obj_t obj, const char *name, const char *value); - -/** @} */ - - - -/** \defgroup hwlocality_cpubinding CPU binding - * - * It is often useful to call hwloc_bitmap_singlify() first so that a single CPU - * remains in the set. This way, the process will not even migrate between - * different CPUs inside the given set. - * Some operating systems also only support that kind of binding. - * - * Some operating systems do not provide all hwloc-supported - * mechanisms to bind processes, threads, etc. - * hwloc_topology_get_support() may be used to query about the actual CPU - * binding support in the currently used operating system. - * - * When the requested binding operation is not available and the - * ::HWLOC_CPUBIND_STRICT flag was passed, the function returns -1. - * \p errno is set to \c ENOSYS when it is not possible to bind the requested kind of object - * processes/threads. errno is set to \c EXDEV when the requested cpuset - * can not be enforced (e.g. some systems only allow one CPU, and some - * other systems only allow one NUMA node). - * - * If ::HWLOC_CPUBIND_STRICT was not passed, the function may fail as well, - * or the operating system may use a slightly different operation - * (with side-effects, smaller binding set, etc.) - * when the requested operation is not exactly supported. - * - * The most portable version that should be preferred over the others, - * whenever possible, is the following one which just binds the current program, - * assuming it is single-threaded: - * - * \code - * hwloc_set_cpubind(topology, set, 0), - * \endcode - * - * If the program may be multithreaded, the following one should be preferred - * to only bind the current thread: - * - * \code - * hwloc_set_cpubind(topology, set, HWLOC_CPUBIND_THREAD), - * \endcode - * - * \sa Some example codes are available under doc/examples/ in the source tree. - * - * \note To unbind, just call the binding function with either a full cpuset or - * a cpuset equal to the system cpuset. - * - * \note On some operating systems, CPU binding may have effects on memory binding, see - * ::HWLOC_CPUBIND_NOMEMBIND - * - * \note Running lstopo --top or hwloc-ps can be a very convenient tool to check - * how binding actually happened. - * @{ - */ - -/** \brief Process/Thread binding flags. - * - * These bit flags can be used to refine the binding policy. - * - * The default (0) is to bind the current process, assumed to be - * single-threaded, in a non-strict way. This is the most portable - * way to bind as all operating systems usually provide it. - * - * \note Not all systems support all kinds of binding. See the - * "Detailed Description" section of \ref hwlocality_cpubinding for a - * description of errors that can occur. - */ -typedef enum { - /** \brief Bind all threads of the current (possibly) multithreaded process. - * \hideinitializer */ - HWLOC_CPUBIND_PROCESS = (1<<0), - - /** \brief Bind current thread of current process. - * \hideinitializer */ - HWLOC_CPUBIND_THREAD = (1<<1), - - /** \brief Request for strict binding from the OS. - * - * By default, when the designated CPUs are all busy while other - * CPUs are idle, operating systems may execute the thread/process - * on those other CPUs instead of the designated CPUs, to let them - * progress anyway. Strict binding means that the thread/process - * will _never_ execute on other cpus than the designated CPUs, even - * when those are busy with other tasks and other CPUs are idle. - * - * \note Depending on the operating system, strict binding may not - * be possible (e.g., the OS does not implement it) or not allowed - * (e.g., for an administrative reasons), and the function will fail - * in that case. - * - * When retrieving the binding of a process, this flag checks - * whether all its threads actually have the same binding. If the - * flag is not given, the binding of each thread will be - * accumulated. - * - * \note This flag is meaningless when retrieving the binding of a - * thread. - * \hideinitializer - */ - HWLOC_CPUBIND_STRICT = (1<<2), - - /** \brief Avoid any effect on memory binding - * - * On some operating systems, some CPU binding function would also - * bind the memory on the corresponding NUMA node. It is often not - * a problem for the application, but if it is, setting this flag - * will make hwloc avoid using OS functions that would also bind - * memory. This will however reduce the support of CPU bindings, - * i.e. potentially return -1 with errno set to ENOSYS in some - * cases. - * - * This flag is only meaningful when used with functions that set - * the CPU binding. It is ignored when used with functions that get - * CPU binding information. - * \hideinitializer - */ - HWLOC_CPUBIND_NOMEMBIND = (1<<3) -} hwloc_cpubind_flags_t; - -/** \brief Bind current process or thread on cpus given in physical bitmap \p set. - * - * \return -1 with errno set to ENOSYS if the action is not supported - * \return -1 with errno set to EXDEV if the binding cannot be enforced - */ -HWLOC_DECLSPEC int hwloc_set_cpubind(hwloc_topology_t topology, hwloc_const_cpuset_t set, int flags); - -/** \brief Get current process or thread binding. - * - * Writes into \p set the physical cpuset which the process or thread (according to \e - * flags) was last bound to. - */ -HWLOC_DECLSPEC int hwloc_get_cpubind(hwloc_topology_t topology, hwloc_cpuset_t set, int flags); - -/** \brief Bind a process \p pid on cpus given in physical bitmap \p set. - * - * \note \p hwloc_pid_t is \p pid_t on Unix platforms, - * and \p HANDLE on native Windows platforms. - * - * \note As a special case on Linux, if a tid (thread ID) is supplied - * instead of a pid (process ID) and HWLOC_CPUBIND_THREAD is passed in flags, - * the binding is applied to that specific thread. - * - * \note On non-Linux systems, HWLOC_CPUBIND_THREAD can not be used in \p flags. - */ -HWLOC_DECLSPEC int hwloc_set_proc_cpubind(hwloc_topology_t topology, hwloc_pid_t pid, hwloc_const_cpuset_t set, int flags); - -/** \brief Get the current physical binding of process \p pid. - * - * \note \p hwloc_pid_t is \p pid_t on Unix platforms, - * and \p HANDLE on native Windows platforms. - * - * \note As a special case on Linux, if a tid (thread ID) is supplied - * instead of a pid (process ID) and HWLOC_CPUBIND_THREAD is passed in flags, - * the binding for that specific thread is returned. - * - * \note On non-Linux systems, HWLOC_CPUBIND_THREAD can not be used in \p flags. - */ -HWLOC_DECLSPEC int hwloc_get_proc_cpubind(hwloc_topology_t topology, hwloc_pid_t pid, hwloc_cpuset_t set, int flags); - -#ifdef hwloc_thread_t -/** \brief Bind a thread \p thread on cpus given in physical bitmap \p set. - * - * \note \p hwloc_thread_t is \p pthread_t on Unix platforms, - * and \p HANDLE on native Windows platforms. - * - * \note HWLOC_CPUBIND_PROCESS can not be used in \p flags. - */ -HWLOC_DECLSPEC int hwloc_set_thread_cpubind(hwloc_topology_t topology, hwloc_thread_t thread, hwloc_const_cpuset_t set, int flags); -#endif - -#ifdef hwloc_thread_t -/** \brief Get the current physical binding of thread \p tid. - * - * \note \p hwloc_thread_t is \p pthread_t on Unix platforms, - * and \p HANDLE on native Windows platforms. - * - * \note HWLOC_CPUBIND_PROCESS can not be used in \p flags. - */ -HWLOC_DECLSPEC int hwloc_get_thread_cpubind(hwloc_topology_t topology, hwloc_thread_t thread, hwloc_cpuset_t set, int flags); -#endif - -/** \brief Get the last physical CPU where the current process or thread ran. - * - * The operating system may move some tasks from one processor - * to another at any time according to their binding, - * so this function may return something that is already - * outdated. - * - * \p flags can include either HWLOC_CPUBIND_PROCESS or HWLOC_CPUBIND_THREAD to - * specify whether the query should be for the whole process (union of all CPUs - * on which all threads are running), or only the current thread. If the - * process is single-threaded, flags can be set to zero to let hwloc use - * whichever method is available on the underlying OS. - */ -HWLOC_DECLSPEC int hwloc_get_last_cpu_location(hwloc_topology_t topology, hwloc_cpuset_t set, int flags); - -/** \brief Get the last physical CPU where a process ran. - * - * The operating system may move some tasks from one processor - * to another at any time according to their binding, - * so this function may return something that is already - * outdated. - * - * \note \p hwloc_pid_t is \p pid_t on Unix platforms, - * and \p HANDLE on native Windows platforms. - * - * \note As a special case on Linux, if a tid (thread ID) is supplied - * instead of a pid (process ID) and HWLOC_CPUBIND_THREAD is passed in flags, - * the last CPU location of that specific thread is returned. - * - * \note On non-Linux systems, HWLOC_CPUBIND_THREAD can not be used in \p flags. - */ -HWLOC_DECLSPEC int hwloc_get_proc_last_cpu_location(hwloc_topology_t topology, hwloc_pid_t pid, hwloc_cpuset_t set, int flags); - -/** @} */ - - - -/** \defgroup hwlocality_membinding Memory binding - * - * Memory binding can be done three ways: - * - * - explicit memory allocation thanks to hwloc_alloc_membind() and friends: - * the binding will have effect on the memory allocated by these functions. - * - implicit memory binding through binding policy: hwloc_set_membind() and - * friends only define the current policy of the process, which will be - * applied to the subsequent calls to malloc() and friends. - * - migration of existing memory ranges, thanks to hwloc_set_area_membind() - * and friends, which move already-allocated data. - * - * Not all operating systems support all three ways. - * hwloc_topology_get_support() may be used to query about the actual memory - * binding support in the currently used operating system. - * - * When the requested binding operation is not available and the - * ::HWLOC_MEMBIND_STRICT flag was passed, the function returns -1. - * \p errno will be set to \c ENOSYS when the system does support - * the specified action or policy - * (e.g., some systems only allow binding memory on a per-thread - * basis, whereas other systems only allow binding memory for all - * threads in a process). - * \p errno will be set to EXDEV when the requested cpuset can not be enforced - * (e.g., some systems only allow binding memory to a single NUMA node). - * - * If ::HWLOC_MEMBIND_STRICT was not passed, the function may fail as well, - * or the operating system may use a slightly different operation - * (with side-effects, smaller binding set, etc.) - * when the requested operation is not exactly supported. - * - * The most portable form that should be preferred over the others - * whenever possible is as follows. - * It allocates some memory hopefully bound to the specified set. - * To do so, hwloc will possibly have to change the current memory - * binding policy in order to actually get the memory bound, if the OS - * does not provide any other way to simply allocate bound memory - * without changing the policy for all allocations. That is the - * difference with hwloc_alloc_membind(), which will never change the - * current memory binding policy. - * - * \code - * hwloc_alloc_membind_policy(topology, size, set, - * HWLOC_MEMBIND_BIND, 0); - * \endcode - * - * Each hwloc memory binding function is available in two forms: one - * that takes a CPU set argument and another that takes a NUMA memory - * node set argument (see \ref hwlocality_object_sets and \ref - * hwlocality_bitmap for a discussion of CPU sets and NUMA memory node - * sets). The names of the latter form end with _nodeset. It is also - * possible to convert between CPU set and node set using - * hwloc_cpuset_to_nodeset() or hwloc_cpuset_from_nodeset(). - * - * \sa Some example codes are available under doc/examples/ in the source tree. - * - * \note On some operating systems, memory binding affects the CPU - * binding; see ::HWLOC_MEMBIND_NOCPUBIND - * @{ - */ - -/** \brief Memory binding policy. - * - * These constants can be used to choose the binding policy. Only one policy can - * be used at a time (i.e., the values cannot be OR'ed together). - * - * Not all systems support all kinds of binding. - * hwloc_topology_get_support() may be used to query about the actual memory - * binding policy support in the currently used operating system. - * See the "Detailed Description" section of \ref hwlocality_membinding - * for a description of errors that can occur. - */ -typedef enum { - /** \brief Reset the memory allocation policy to the system default. - * Depending on the operating system, this may correspond to - * HWLOC_MEMBIND_FIRSTTOUCH (Linux), - * or HWLOC_MEMBIND_BIND (AIX, HP-UX, OSF, Solaris, Windows). - * \hideinitializer */ - HWLOC_MEMBIND_DEFAULT = 0, - - /** \brief Allocate memory - * but do not immediately bind it to a specific locality. Instead, - * each page in the allocation is bound only when it is first - * touched. Pages are individually bound to the local NUMA node of - * the first thread that touches it. If there is not enough memory - * on the node, allocation may be done in the specified cpuset - * before allocating on other nodes. - * \hideinitializer */ - HWLOC_MEMBIND_FIRSTTOUCH = 1, - - /** \brief Allocate memory on the specified nodes. - * \hideinitializer */ - HWLOC_MEMBIND_BIND = 2, - - /** \brief Allocate memory on the given nodes in an interleaved - * / round-robin manner. The precise layout of the memory across - * multiple NUMA nodes is OS/system specific. Interleaving can be - * useful when threads distributed across the specified NUMA nodes - * will all be accessing the whole memory range concurrently, since - * the interleave will then balance the memory references. - * \hideinitializer */ - HWLOC_MEMBIND_INTERLEAVE = 3, - - /** \brief Replicate memory on the given nodes; reads from this - * memory will attempt to be serviced from the NUMA node local to - * the reading thread. Replicating can be useful when multiple - * threads from the specified NUMA nodes will be sharing the same - * read-only data. - * - * This policy can only be used with existing memory allocations - * (i.e., the hwloc_set_*membind*() functions); it cannot be used - * with functions that allocate new memory (i.e., the hwloc_alloc*() - * functions). - * \hideinitializer */ - HWLOC_MEMBIND_REPLICATE = 4, - - /** \brief For each page bound with this policy, by next time - * it is touched (and next time only), it is moved from its current - * location to the local NUMA node of the thread where the memory - * reference occurred (if it needs to be moved at all). - * \hideinitializer */ - HWLOC_MEMBIND_NEXTTOUCH = 5, - - /** \brief Returned by get_membind() functions when multiple - * threads or parts of a memory area have differing memory binding - * policies. - * \hideinitializer */ - HWLOC_MEMBIND_MIXED = -1 -} hwloc_membind_policy_t; - -/** \brief Memory binding flags. - * - * These flags can be used to refine the binding policy. - * All flags can be logically OR'ed together with the exception of - * ::HWLOC_MEMBIND_PROCESS and ::HWLOC_MEMBIND_THREAD; - * these two flags are mutually exclusive. - * - * Not all systems support all kinds of binding. - * hwloc_topology_get_support() may be used to query about the actual memory - * binding support in the currently used operating system. - * See the "Detailed Description" section of \ref hwlocality_membinding - * for a description of errors that can occur. - */ -typedef enum { - /** \brief Set policy for all threads of the specified (possibly - * multithreaded) process. This flag is mutually exclusive with - * ::HWLOC_MEMBIND_THREAD. - * \hideinitializer */ - HWLOC_MEMBIND_PROCESS = (1<<0), - - /** \brief Set policy for a specific thread of the current process. - * This flag is mutually exclusive with ::HWLOC_MEMBIND_PROCESS. - * \hideinitializer */ - HWLOC_MEMBIND_THREAD = (1<<1), - - /** Request strict binding from the OS. The function will fail if - * the binding can not be guaranteed / completely enforced. - * - * This flag has slightly different meanings depending on which - * function it is used with. - * \hideinitializer */ - HWLOC_MEMBIND_STRICT = (1<<2), - - /** \brief Migrate existing allocated memory. If the memory cannot - * be migrated and the ::HWLOC_MEMBIND_STRICT flag is passed, an error - * will be returned. - * \hideinitializer */ - HWLOC_MEMBIND_MIGRATE = (1<<3), - - /** \brief Avoid any effect on CPU binding. - * - * On some operating systems, some underlying memory binding - * functions also bind the application to the corresponding CPU(s). - * Using this flag will cause hwloc to avoid using OS functions that - * could potentially affect CPU bindings. Note, however, that using - * NOCPUBIND may reduce hwloc's overall memory binding - * support. Specifically: some of hwloc's memory binding functions - * may fail with errno set to ENOSYS when used with NOCPUBIND. - * \hideinitializer - */ - HWLOC_MEMBIND_NOCPUBIND = (1<<4) -} hwloc_membind_flags_t; - -/** \brief Set the default memory binding policy of the current - * process or thread to prefer the NUMA node(s) specified by physical \p nodeset - * - * If neither ::HWLOC_MEMBIND_PROCESS nor ::HWLOC_MEMBIND_THREAD is - * specified, the current process is assumed to be single-threaded. - * This is the most portable form as it permits hwloc to use either - * process-based OS functions or thread-based OS functions, depending - * on which are available. - * - * \return -1 with errno set to ENOSYS if the action is not supported - * \return -1 with errno set to EXDEV if the binding cannot be enforced - */ -HWLOC_DECLSPEC int hwloc_set_membind_nodeset(hwloc_topology_t topology, hwloc_const_nodeset_t nodeset, hwloc_membind_policy_t policy, int flags); - -/** \brief Set the default memory binding policy of the current - * process or thread to prefer the NUMA node(s) near the specified physical \p - * cpuset - * - * If neither ::HWLOC_MEMBIND_PROCESS nor ::HWLOC_MEMBIND_THREAD is - * specified, the current process is assumed to be single-threaded. - * This is the most portable form as it permits hwloc to use either - * process-based OS functions or thread-based OS functions, depending - * on which are available. - * - * \return -1 with errno set to ENOSYS if the action is not supported - * \return -1 with errno set to EXDEV if the binding cannot be enforced - */ -HWLOC_DECLSPEC int hwloc_set_membind(hwloc_topology_t topology, hwloc_const_cpuset_t cpuset, hwloc_membind_policy_t policy, int flags); - -/** \brief Query the default memory binding policy and physical locality of the - * current process or thread. - * - * This function has two output parameters: \p nodeset and \p policy. - * The values returned in these parameters depend on both the \p flags - * passed in and the current memory binding policies and nodesets in - * the queried target. - * - * Passing the ::HWLOC_MEMBIND_PROCESS flag specifies that the query - * target is the current policies and nodesets for all the threads in - * the current process. Passing ::HWLOC_MEMBIND_THREAD specifies that - * the query target is the current policy and nodeset for only the - * thread invoking this function. - * - * If neither of these flags are passed (which is the most portable - * method), the process is assumed to be single threaded. This allows - * hwloc to use either process-based OS functions or thread-based OS - * functions, depending on which are available. - * - * ::HWLOC_MEMBIND_STRICT is only meaningful when ::HWLOC_MEMBIND_PROCESS - * is also specified. In this case, hwloc will check the default - * memory policies and nodesets for all threads in the process. If - * they are not identical, -1 is returned and errno is set to EXDEV. - * If they are identical, the values are returned in \p nodeset and \p - * policy. - * - * Otherwise, if ::HWLOC_MEMBIND_PROCESS is specified (and - * ::HWLOC_MEMBIND_STRICT is \em not specified), \p nodeset is set to - * the logical OR of all threads' default nodeset. If all threads' - * default policies are the same, \p policy is set to that policy. If - * they are different, \p policy is set to ::HWLOC_MEMBIND_MIXED. - * - * In the ::HWLOC_MEMBIND_THREAD case (or when neither - * ::HWLOC_MEMBIND_PROCESS or ::HWLOC_MEMBIND_THREAD is specified), there - * is only one nodeset and policy; they are returned in \p nodeset and - * \p policy, respectively. - * - * If any other flags are specified, -1 is returned and errno is set - * to EINVAL. - */ -HWLOC_DECLSPEC int hwloc_get_membind_nodeset(hwloc_topology_t topology, hwloc_nodeset_t nodeset, hwloc_membind_policy_t * policy, int flags); - -/** \brief Query the default memory binding policy and physical locality of the - * current process or thread (the locality is returned in \p cpuset as - * CPUs near the locality's actual NUMA node(s)). - * - * This function has two output parameters: \p cpuset and \p policy. - * The values returned in these parameters depend on both the \p flags - * passed in and the current memory binding policies and nodesets in - * the queried target. - * - * Passing the ::HWLOC_MEMBIND_PROCESS flag specifies that the query - * target is the current policies and nodesets for all the threads in - * the current process. Passing ::HWLOC_MEMBIND_THREAD specifies that - * the query target is the current policy and nodeset for only the - * thread invoking this function. - * - * If neither of these flags are passed (which is the most portable - * method), the process is assumed to be single threaded. This allows - * hwloc to use either process-based OS functions or thread-based OS - * functions, depending on which are available. - * - * ::HWLOC_MEMBIND_STRICT is only meaningful when ::HWLOC_MEMBIND_PROCESS - * is also specified. In this case, hwloc will check the default - * memory policies and nodesets for all threads in the process. If - * they are not identical, -1 is returned and errno is set to EXDEV. - * If they are identical, the policy is returned in \p policy. \p - * cpuset is set to the union of CPUs near the NUMA node(s) in the - * nodeset. - * - * Otherwise, if ::HWLOC_MEMBIND_PROCESS is specified (and - * ::HWLOC_MEMBIND_STRICT is \em not specified), the default nodeset - * from each thread is logically OR'ed together. \p cpuset is set to - * the union of CPUs near the NUMA node(s) in the resulting nodeset. - * If all threads' default policies are the same, \p policy is set to - * that policy. If they are different, \p policy is set to - * ::HWLOC_MEMBIND_MIXED. - * - * In the ::HWLOC_MEMBIND_THREAD case (or when neither - * ::HWLOC_MEMBIND_PROCESS or ::HWLOC_MEMBIND_THREAD is specified), there - * is only one nodeset and policy. The policy is returned in \p - * policy; \p cpuset is set to the union of CPUs near the NUMA node(s) - * in the \p nodeset. - * - * If any other flags are specified, -1 is returned and errno is set - * to EINVAL. - */ -HWLOC_DECLSPEC int hwloc_get_membind(hwloc_topology_t topology, hwloc_cpuset_t cpuset, hwloc_membind_policy_t * policy, int flags); - -/** \brief Set the default memory binding policy of the specified - * process to prefer the NUMA node(s) specified by physical \p nodeset - * - * \return -1 with errno set to ENOSYS if the action is not supported - * \return -1 with errno set to EXDEV if the binding cannot be enforced - * - * \note \p hwloc_pid_t is \p pid_t on Unix platforms, - * and \p HANDLE on native Windows platforms. - */ -HWLOC_DECLSPEC int hwloc_set_proc_membind_nodeset(hwloc_topology_t topology, hwloc_pid_t pid, hwloc_const_nodeset_t nodeset, hwloc_membind_policy_t policy, int flags); - -/** \brief Set the default memory binding policy of the specified - * process to prefer the NUMA node(s) near the specified physical \p cpuset - * - * \return -1 with errno set to ENOSYS if the action is not supported - * \return -1 with errno set to EXDEV if the binding cannot be enforced - * - * \note \p hwloc_pid_t is \p pid_t on Unix platforms, - * and \p HANDLE on native Windows platforms. - */ -HWLOC_DECLSPEC int hwloc_set_proc_membind(hwloc_topology_t topology, hwloc_pid_t pid, hwloc_const_cpuset_t cpuset, hwloc_membind_policy_t policy, int flags); - -/** \brief Query the default memory binding policy and physical locality of the - * specified process. - * - * This function has two output parameters: \p nodeset and \p policy. - * The values returned in these parameters depend on both the \p flags - * passed in and the current memory binding policies and nodesets in - * the queried target. - * - * Passing the ::HWLOC_MEMBIND_PROCESS flag specifies that the query - * target is the current policies and nodesets for all the threads in - * the specified process. If ::HWLOC_MEMBIND_PROCESS is not specified - * (which is the most portable method), the process is assumed to be - * single threaded. This allows hwloc to use either process-based OS - * functions or thread-based OS functions, depending on which are - * available. - * - * Note that it does not make sense to pass ::HWLOC_MEMBIND_THREAD to - * this function. - * - * If ::HWLOC_MEMBIND_STRICT is specified, hwloc will check the default - * memory policies and nodesets for all threads in the specified - * process. If they are not identical, -1 is returned and errno is - * set to EXDEV. If they are identical, the values are returned in \p - * nodeset and \p policy. - * - * Otherwise, \p nodeset is set to the logical OR of all threads' - * default nodeset. If all threads' default policies are the same, \p - * policy is set to that policy. If they are different, \p policy is - * set to ::HWLOC_MEMBIND_MIXED. - * - * If any other flags are specified, -1 is returned and errno is set - * to EINVAL. - * - * \note \p hwloc_pid_t is \p pid_t on Unix platforms, - * and \p HANDLE on native Windows platforms. - */ -HWLOC_DECLSPEC int hwloc_get_proc_membind_nodeset(hwloc_topology_t topology, hwloc_pid_t pid, hwloc_nodeset_t nodeset, hwloc_membind_policy_t * policy, int flags); - -/** \brief Query the default memory binding policy and physical locality of the - * specified process (the locality is returned in \p cpuset as CPUs - * near the locality's actual NUMA node(s)). - * - * This function has two output parameters: \p cpuset and \p policy. - * The values returned in these parameters depend on both the \p flags - * passed in and the current memory binding policies and nodesets in - * the queried target. - * - * Passing the ::HWLOC_MEMBIND_PROCESS flag specifies that the query - * target is the current policies and nodesets for all the threads in - * the specified process. If ::HWLOC_MEMBIND_PROCESS is not specified - * (which is the most portable method), the process is assumed to be - * single threaded. This allows hwloc to use either process-based OS - * functions or thread-based OS functions, depending on which are - * available. - * - * Note that it does not make sense to pass ::HWLOC_MEMBIND_THREAD to - * this function. - * - * If ::HWLOC_MEMBIND_STRICT is specified, hwloc will check the default - * memory policies and nodesets for all threads in the specified - * process. If they are not identical, -1 is returned and errno is - * set to EXDEV. If they are identical, the policy is returned in \p - * policy. \p cpuset is set to the union of CPUs near the NUMA - * node(s) in the nodeset. - * - * Otherwise, the default nodeset from each thread is logically OR'ed - * together. \p cpuset is set to the union of CPUs near the NUMA - * node(s) in the resulting nodeset. If all threads' default policies - * are the same, \p policy is set to that policy. If they are - * different, \p policy is set to ::HWLOC_MEMBIND_MIXED. - * - * If any other flags are specified, -1 is returned and errno is set - * to EINVAL. - * - * \note \p hwloc_pid_t is \p pid_t on Unix platforms, - * and \p HANDLE on native Windows platforms. - */ -HWLOC_DECLSPEC int hwloc_get_proc_membind(hwloc_topology_t topology, hwloc_pid_t pid, hwloc_cpuset_t cpuset, hwloc_membind_policy_t * policy, int flags); - -/** \brief Bind the already-allocated memory identified by (addr, len) - * to the NUMA node(s) in physical \p nodeset. - * - * \return -1 with errno set to ENOSYS if the action is not supported - * \return -1 with errno set to EXDEV if the binding cannot be enforced - */ -HWLOC_DECLSPEC int hwloc_set_area_membind_nodeset(hwloc_topology_t topology, const void *addr, size_t len, hwloc_const_nodeset_t nodeset, hwloc_membind_policy_t policy, int flags); - -/** \brief Bind the already-allocated memory identified by (addr, len) - * to the NUMA node(s) near physical \p cpuset. - * - * \return -1 with errno set to ENOSYS if the action is not supported - * \return -1 with errno set to EXDEV if the binding cannot be enforced - */ -HWLOC_DECLSPEC int hwloc_set_area_membind(hwloc_topology_t topology, const void *addr, size_t len, hwloc_const_cpuset_t cpuset, hwloc_membind_policy_t policy, int flags); - -/** \brief Query the physical NUMA node(s) and binding policy of the memory - * identified by (\p addr, \p len ). - * - * This function has two output parameters: \p nodeset and \p policy. - * The values returned in these parameters depend on both the \p flags - * passed in and the memory binding policies and nodesets of the pages - * in the address range. - * - * If ::HWLOC_MEMBIND_STRICT is specified, the target pages are first - * checked to see if they all have the same memory binding policy and - * nodeset. If they do not, -1 is returned and errno is set to EXDEV. - * If they are identical across all pages, the nodeset and policy are - * returned in \p nodeset and \p policy, respectively. - * - * If ::HWLOC_MEMBIND_STRICT is not specified, \p nodeset is set to the - * union of all NUMA node(s) containing pages in the address range. - * If all pages in the target have the same policy, it is returned in - * \p policy. Otherwise, \p policy is set to ::HWLOC_MEMBIND_MIXED. - * - * If any other flags are specified, -1 is returned and errno is set - * to EINVAL. - */ -HWLOC_DECLSPEC int hwloc_get_area_membind_nodeset(hwloc_topology_t topology, const void *addr, size_t len, hwloc_nodeset_t nodeset, hwloc_membind_policy_t * policy, int flags); - -/** \brief Query the CPUs near the physical NUMA node(s) and binding policy of - * the memory identified by (\p addr, \p len ). - * - * This function has two output parameters: \p cpuset and \p policy. - * The values returned in these parameters depend on both the \p flags - * passed in and the memory binding policies and nodesets of the pages - * in the address range. - * - * If ::HWLOC_MEMBIND_STRICT is specified, the target pages are first - * checked to see if they all have the same memory binding policy and - * nodeset. If they do not, -1 is returned and errno is set to EXDEV. - * If they are identical across all pages, the policy is returned in - * \p policy. \p cpuset is set to the union of CPUs near the NUMA - * node(s) in the nodeset. - * - * If ::HWLOC_MEMBIND_STRICT is not specified, the union of all NUMA - * node(s) containing pages in the address range is calculated. \p - * cpuset is then set to the CPUs near the NUMA node(s) in this union. - * If all pages in the target have the same policy, it is returned in - * \p policy. Otherwise, \p policy is set to ::HWLOC_MEMBIND_MIXED. - * - * If any other flags are specified, -1 is returned and errno is set - * to EINVAL. - */ -HWLOC_DECLSPEC int hwloc_get_area_membind(hwloc_topology_t topology, const void *addr, size_t len, hwloc_cpuset_t cpuset, hwloc_membind_policy_t * policy, int flags); - -/** \brief Allocate some memory - * - * This is equivalent to malloc(), except that it tries to allocate - * page-aligned memory from the OS. - * - * \note The allocated memory should be freed with hwloc_free(). - */ -HWLOC_DECLSPEC void *hwloc_alloc(hwloc_topology_t topology, size_t len); - -/** \brief Allocate some memory on the given physical nodeset \p nodeset - * - * \return NULL with errno set to ENOSYS if the action is not supported - * and ::HWLOC_MEMBIND_STRICT is given - * \return NULL with errno set to EXDEV if the binding cannot be enforced - * and ::HWLOC_MEMBIND_STRICT is given - * \return NULL with errno set to ENOMEM if the memory allocation failed - * even before trying to bind. - * - * \note The allocated memory should be freed with hwloc_free(). - */ -HWLOC_DECLSPEC void *hwloc_alloc_membind_nodeset(hwloc_topology_t topology, size_t len, hwloc_const_nodeset_t nodeset, hwloc_membind_policy_t policy, int flags) __hwloc_attribute_malloc; - -/** \brief Allocate some memory on memory nodes near the given physical cpuset \p cpuset - * - * \return NULL with errno set to ENOSYS if the action is not supported - * and ::HWLOC_MEMBIND_STRICT is given - * \return NULL with errno set to EXDEV if the binding cannot be enforced - * and ::HWLOC_MEMBIND_STRICT is given - * \return NULL with errno set to ENOMEM if the memory allocation failed - * even before trying to bind. - * - * \note The allocated memory should be freed with hwloc_free(). - */ -HWLOC_DECLSPEC void *hwloc_alloc_membind(hwloc_topology_t topology, size_t len, hwloc_const_cpuset_t cpuset, hwloc_membind_policy_t policy, int flags) __hwloc_attribute_malloc; - -/** \brief Allocate some memory on the given nodeset \p nodeset - * - * This is similar to hwloc_alloc_membind except that it is allowed to change - * the current memory binding policy, thus providing more binding support, at - * the expense of changing the current state. - */ -static __hwloc_inline void * -hwloc_alloc_membind_policy_nodeset(hwloc_topology_t topology, size_t len, hwloc_const_nodeset_t nodeset, hwloc_membind_policy_t policy, int flags) __hwloc_attribute_malloc; - -/** \brief Allocate some memory on the memory nodes near given cpuset \p cpuset - * - * This is similar to hwloc_alloc_membind_policy_nodeset, but for a given cpuset. - */ -static __hwloc_inline void * -hwloc_alloc_membind_policy(hwloc_topology_t topology, size_t len, hwloc_const_cpuset_t set, hwloc_membind_policy_t policy, int flags) __hwloc_attribute_malloc; - -/** \brief Free memory that was previously allocated by hwloc_alloc() - * or hwloc_alloc_membind(). - */ -HWLOC_DECLSPEC int hwloc_free(hwloc_topology_t topology, void *addr, size_t len); - -/** @} */ - - - -/** \defgroup hwlocality_tinker Modifying a loaded Topology - * @{ - */ - -/** \brief Add a MISC object to the topology - * - * A new MISC object will be created and inserted into the topology at the - * position given by bitmap \p cpuset. This offers a way to add new - * intermediate levels to the topology hierarchy. - * - * \p cpuset and \p name will be copied to setup the new object attributes. - * - * \return the newly-created object. - * \return \c NULL if the insertion conflicts with the existing topology tree. - * - * \note If \p name contains some non-printable characters, they will - * be dropped when exporting to XML, see hwloc_topology_export_xml(). - */ -HWLOC_DECLSPEC hwloc_obj_t hwloc_topology_insert_misc_object_by_cpuset(hwloc_topology_t topology, hwloc_const_cpuset_t cpuset, const char *name); - -/** \brief Add a MISC object as a leaf of the topology - * - * A new MISC object will be created and inserted into the topology at the - * position given by parent. It is appended to the list of existing children, - * without ever adding any intermediate hierarchy level. This is useful for - * annotating the topology without actually changing the hierarchy. - * - * \p name will be copied to the setup the new object attributes. - * However, the new leaf object will not have any \p cpuset. - * - * \return the newly-created object - * - * \note If \p name contains some non-printable characters, they will - * be dropped when exporting to XML, see hwloc_topology_export_xml(). - */ -HWLOC_DECLSPEC hwloc_obj_t hwloc_topology_insert_misc_object_by_parent(hwloc_topology_t topology, hwloc_obj_t parent, const char *name); - -/** \brief Flags to be given to hwloc_topology_restrict(). */ -enum hwloc_restrict_flags_e { - /** \brief Adapt distance matrices according to objects being removed during restriction. - * If this flag is not set, distance matrices are removed. - * \hideinitializer - */ - HWLOC_RESTRICT_FLAG_ADAPT_DISTANCES = (1<<0), - - /** \brief Move Misc objects to ancestors if their parents are removed during restriction. - * If this flag is not set, Misc objects are removed when their parents are removed. - * \hideinitializer - */ - HWLOC_RESTRICT_FLAG_ADAPT_MISC = (1<<1), - - /** \brief Move I/O objects to ancestors if their parents are removed during restriction. - * If this flag is not set, I/O devices and bridges are removed when their parents are removed. - * \hideinitializer - */ - HWLOC_RESTRICT_FLAG_ADAPT_IO = (1<<2) -}; - -/** \brief Restrict the topology to the given CPU set. - * - * Topology \p topology is modified so as to remove all objects that - * are not included (or partially included) in the CPU set \p cpuset. - * All objects CPU and node sets are restricted accordingly. - * - * \p flags is a OR'ed set of ::hwloc_restrict_flags_e. - * - * \note This call may not be reverted by restricting back to a larger - * cpuset. Once dropped during restriction, objects may not be brought - * back, except by loading another topology with hwloc_topology_load(). - * - * \return 0 on success. - * - * \return -1 with errno set to EINVAL if the input cpuset is invalid. - * The topology is not modified in this case. - * - * \return -1 with errno set to ENOMEM on failure to allocate internal data. - * The topology is reinitialized in this case. It should be either - * destroyed with hwloc_topology_destroy() or configured and loaded again. - */ -HWLOC_DECLSPEC int hwloc_topology_restrict(hwloc_topology_t __hwloc_restrict topology, hwloc_const_cpuset_t cpuset, unsigned long flags); - -/** @} */ - - - -/** \defgroup hwlocality_custom Building Custom Topologies - * - * A custom topology may be initialized by calling hwloc_topology_set_custom() - * after hwloc_topology_init(). It may then be modified by inserting objects - * or entire topologies. Once done assembling, hwloc_topology_load() should - * be invoked as usual to finalize the topology. - * @{ - */ - -/** \brief Insert an existing topology inside a custom topology - * - * Duplicate the existing topology \p oldtopology inside a new - * custom topology \p newtopology as a leaf of object \p newparent. - * - * If \p oldroot is not \c NULL, duplicate \p oldroot and all its - * children instead of the entire \p oldtopology. Passing the root - * object of \p oldtopology in \p oldroot is equivalent to passing - * \c NULL. - * - * The custom topology \p newtopology must have been prepared with - * hwloc_topology_set_custom() and not loaded with hwloc_topology_load() - * yet. - * - * \p newparent may be either the root of \p newtopology or an object - * that was added through hwloc_custom_insert_group_object_by_parent(). - * - * \note The cpuset and nodeset of the \p newparent object are not - * modified based on the contents of \p oldtopology. - */ -HWLOC_DECLSPEC int hwloc_custom_insert_topology(hwloc_topology_t newtopology, hwloc_obj_t newparent, hwloc_topology_t oldtopology, hwloc_obj_t oldroot); - -/** \brief Insert a new group object inside a custom topology - * - * An object with type ::HWLOC_OBJ_GROUP is inserted as a new child - * of object \p parent. - * - * \p groupdepth is the depth attribute to be given to the new object. - * It may for instance be 0 for top-level groups, 1 for their children, - * and so on. - * - * The custom topology \p newtopology must have been prepared with - * hwloc_topology_set_custom() and not loaded with hwloc_topology_load() - * yet. - * - * \p parent may be either the root of \p topology or an object that - * was added earlier through hwloc_custom_insert_group_object_by_parent(). - * - * \note The cpuset and nodeset of the new group object are NULL because - * these sets are meaningless when assembling multiple topologies. - * - * \note The cpuset and nodeset of the \p parent object are not modified. - */ -HWLOC_DECLSPEC hwloc_obj_t hwloc_custom_insert_group_object_by_parent(hwloc_topology_t topology, hwloc_obj_t parent, int groupdepth); - -/** @} */ - - - -/** \defgroup hwlocality_xmlexport Exporting Topologies to XML - * @{ - */ - -/** \brief Export the topology into an XML file. - * - * This file may be loaded later through hwloc_topology_set_xml(). - * - * \return -1 if a failure occured. - * - * \note See also hwloc_topology_set_userdata_export_callback() - * for exporting application-specific object userdata. - * - * \note The topology-specific userdata pointer is ignored when exporting to XML. - * - * \note Only printable characters may be exported to XML string attributes. - * Any other character, especially any non-ASCII character, will be silently - * dropped. - * - * \note If \p name is "-", the XML output is sent to the standard output. - */ -HWLOC_DECLSPEC int hwloc_topology_export_xml(hwloc_topology_t topology, const char *xmlpath); - -/** \brief Export the topology into a newly-allocated XML memory buffer. - * - * \p xmlbuffer is allocated by the callee and should be freed with - * hwloc_free_xmlbuffer() later in the caller. - * - * This memory buffer may be loaded later through hwloc_topology_set_xmlbuffer(). - * - * \return -1 if a failure occured. - * - * \note See also hwloc_topology_set_userdata_export_callback() - * for exporting application-specific object userdata. - * - * \note The topology-specific userdata pointer is ignored when exporting to XML. - * - * \note Only printable characters may be exported to XML string attributes. - * Any other character, especially any non-ASCII character, will be silently - * dropped. - */ -HWLOC_DECLSPEC int hwloc_topology_export_xmlbuffer(hwloc_topology_t topology, char **xmlbuffer, int *buflen); - -/** \brief Free a buffer allocated by hwloc_topology_export_xmlbuffer() */ -HWLOC_DECLSPEC void hwloc_free_xmlbuffer(hwloc_topology_t topology, char *xmlbuffer); - -/** \brief Set the application-specific callback for exporting object userdata - * - * The object userdata pointer is not exported to XML by default because hwloc - * does not know what it contains. - * - * This function lets applications set \p export_cb to a callback function - * that converts this opaque userdata into an exportable string. - * - * \p export_cb is invoked during XML export for each object whose - * \p userdata pointer is not \c NULL. - * The callback should use hwloc_export_obj_userdata() or - * hwloc_export_obj_userdata_base64() to actually export - * something to XML (possibly multiple times per object). - * - * \p export_cb may be set to \c NULL if userdata should not be exported to XML. - * - * \note The topology-specific userdata pointer is ignored when exporting to XML. - */ -HWLOC_DECLSPEC void hwloc_topology_set_userdata_export_callback(hwloc_topology_t topology, - void (*export_cb)(void *reserved, hwloc_topology_t topology, hwloc_obj_t obj)); - -/** \brief Export some object userdata to XML - * - * This function may only be called from within the export() callback passed - * to hwloc_topology_set_userdata_export_callback(). - * It may be invoked one of multiple times to export some userdata to XML. - * The \p buffer content of length \p length is stored with optional name - * \p name. - * - * When importing this XML file, the import() callback (if set) will be - * called exactly as many times as hwloc_export_obj_userdata() was called - * during export(). It will receive the corresponding \p name, \p buffer - * and \p length arguments. - * - * \p reserved, \p topology and \p obj must be the first three parameters - * that were given to the export callback. - * - * Only printable characters may be exported to XML string attributes. - * If a non-printable character is passed in \p name or \p buffer, - * the function returns -1 with errno set to EINVAL. - * - * If exporting binary data, the application should first encode into - * printable characters only (or use hwloc_export_obj_userdata_base64()). - * It should also take care of portability issues if the export may - * be reimported on a different architecture. - */ -HWLOC_DECLSPEC int hwloc_export_obj_userdata(void *reserved, hwloc_topology_t topology, hwloc_obj_t obj, const char *name, const void *buffer, size_t length); - -/** \brief Encode and export some object userdata to XML - * - * This function is similar to hwloc_export_obj_userdata() but it encodes - * the input buffer into printable characters before exporting. - * On import, decoding is automatically performed before the data is given - * to the import() callback if any. - * - * This function may only be called from within the export() callback passed - * to hwloc_topology_set_userdata_export_callback(). - * - * The function does not take care of portability issues if the export - * may be reimported on a different architecture. - */ -HWLOC_DECLSPEC int hwloc_export_obj_userdata_base64(void *reserved, hwloc_topology_t topology, hwloc_obj_t obj, const char *name, const void *buffer, size_t length); - -/** \brief Set the application-specific callback for importing userdata - * - * On XML import, userdata is ignored by default because hwloc does not know - * how to store it in memory. - * - * This function lets applications set \p import_cb to a callback function - * that will get the XML-stored userdata and store it in the object as expected - * by the application. - * - * \p import_cb is called during hwloc_topology_load() as many times as - * hwloc_export_obj_userdata() was called during export. The topology - * is not entirely setup yet. Object attributes are ready to consult, - * but links between objects are not. - * - * \p import_cb may be \c NULL if userdata should be ignored during import. - * - * \note \p buffer contains \p length characters followed by a null byte ('\0'). - * - * \note This function should be called before hwloc_topology_load(). - * - * \note The topology-specific userdata pointer is ignored when importing from XML. - */ -HWLOC_DECLSPEC void hwloc_topology_set_userdata_import_callback(hwloc_topology_t topology, - void (*import_cb)(hwloc_topology_t topology, hwloc_obj_t obj, const char *name, const void *buffer, size_t length)); - -/** @} */ - - -/** \defgroup hwlocality_syntheticexport Exporting Topologies to Synthetic - * @{ - */ - -/** \brief Flags for exporting synthetic topologies. - * - * Flags to be given as a OR'ed set to hwloc_topology_export_synthetic(). - */ -enum hwloc_topology_export_synthetic_flags_e { - /** \brief Export extended types such as L2dcache as basic types such as Cache. - * - * This is required if loading the synthetic description with hwloc < 1.9. - * \hideinitializer - */ - HWLOC_TOPOLOGY_EXPORT_SYNTHETIC_FLAG_NO_EXTENDED_TYPES = (1UL<<0), - - /** \brief Do not export level attributes. - * - * Ignore level attributes such as memory/cache sizes or PU indexes. - * This is required if loading the synthetic description with hwloc < 1.10. - * \hideinitializer - */ - HWLOC_TOPOLOGY_EXPORT_SYNTHETIC_FLAG_NO_ATTRS = (1UL<<1) -}; - -/** \brief Export the topology as a synthetic string. - * - * At most \p buflen characters will be written in \p buffer, - * including the terminating \0. - * - * This exported string may be given back to hwloc_topology_set_synthetic(). - * - * \p flags is a OR'ed set of hwloc_topology_export_synthetic_flags_e. - * - * \return The number of characters that were written, - * not including the terminating \0. - * - * \return -1 if the topology could not be exported, - * for instance if it is not symmetric. - * - * \note A 1024-byte buffer should be large enough for exporting - * topologies in the vast majority of cases. - */ - HWLOC_DECLSPEC int hwloc_topology_export_synthetic(hwloc_topology_t topology, char *buffer, size_t buflen, unsigned long flags); - -/** @} */ - - - -#ifdef __cplusplus -} /* extern "C" */ -#endif - - -/* high-level helpers */ -#include - -/* inline code of some functions above */ -#include - -/* topology diffs */ -#include - -/* deprecated headers */ -#include - -#endif /* HWLOC_H */ diff --git a/opal/mca/hwloc/hwloc1110/hwloc/include/hwloc/rename.h b/opal/mca/hwloc/hwloc1110/hwloc/include/hwloc/rename.h deleted file mode 100644 index 7b4b8a3c225..00000000000 --- a/opal/mca/hwloc/hwloc1110/hwloc/include/hwloc/rename.h +++ /dev/null @@ -1,651 +0,0 @@ -/* - * Copyright © 2009-2011 Cisco Systems, Inc. All rights reserved. - * Copyright © 2010-2014 Inria. All rights reserved. - * See COPYING in top-level directory. - */ - -#ifndef HWLOC_RENAME_H -#define HWLOC_RENAME_H - -#include - - -#ifdef __cplusplus -extern "C" { -#endif - - -/* Only enact these defines if we're actually renaming the symbols - (i.e., avoid trying to have no-op defines if we're *not* - renaming). */ - -#if HWLOC_SYM_TRANSFORM - -/* Use a preprocessor two-step in order to get the prefixing right. - Make 2 macros: HWLOC_NAME and HWLOC_NAME_CAPS for renaming - things. */ - -#define HWLOC_MUNGE_NAME(a, b) HWLOC_MUNGE_NAME2(a, b) -#define HWLOC_MUNGE_NAME2(a, b) a ## b -#define HWLOC_NAME(name) HWLOC_MUNGE_NAME(HWLOC_SYM_PREFIX, hwloc_ ## name) -#define HWLOC_NAME_CAPS(name) HWLOC_MUNGE_NAME(HWLOC_SYM_PREFIX_CAPS, hwloc_ ## name) - -/* Now define all the "real" names to be the prefixed names. This - allows us to use the real names throughout the code base (i.e., - "hwloc_"); the preprocessor will adjust to have the prefixed - name under the covers. */ - -/* Names from hwloc.h */ - -#define hwloc_get_api_version HWLOC_NAME(get_api_version) - -#define hwloc_topology HWLOC_NAME(topology) -#define hwloc_topology_t HWLOC_NAME(topology_t) - -#define hwloc_cpuset_t HWLOC_NAME(cpuset_t) -#define hwloc_const_cpuset_t HWLOC_NAME(const_cpuset_t) -#define hwloc_nodeset_t HWLOC_NAME(nodeset_t) -#define hwloc_const_nodeset_t HWLOC_NAME(const_nodeset_t) - -#define HWLOC_OBJ_SYSTEM HWLOC_NAME_CAPS(OBJ_SYSTEM) -#define HWLOC_OBJ_MACHINE HWLOC_NAME_CAPS(OBJ_MACHINE) -#define HWLOC_OBJ_NUMANODE HWLOC_NAME_CAPS(OBJ_NUMANODE) -#define HWLOC_OBJ_PACKAGE HWLOC_NAME_CAPS(OBJ_PACKAGE) -#define HWLOC_OBJ_CACHE HWLOC_NAME_CAPS(OBJ_CACHE) -#define HWLOC_OBJ_CORE HWLOC_NAME_CAPS(OBJ_CORE) -#define HWLOC_OBJ_PU HWLOC_NAME_CAPS(OBJ_PU) -#define HWLOC_OBJ_MISC HWLOC_NAME_CAPS(OBJ_MISC) -#define HWLOC_OBJ_GROUP HWLOC_NAME_CAPS(OBJ_GROUP) -#define HWLOC_OBJ_BRIDGE HWLOC_NAME_CAPS(OBJ_BRIDGE) -#define HWLOC_OBJ_PCI_DEVICE HWLOC_NAME_CAPS(OBJ_PCI_DEVICE) -#define HWLOC_OBJ_OS_DEVICE HWLOC_NAME_CAPS(OBJ_OS_DEVICE) -#define HWLOC_OBJ_TYPE_MAX HWLOC_NAME_CAPS(OBJ_TYPE_MAX) -#define hwloc_obj_type_t HWLOC_NAME(obj_type_t) - -#define hwloc_obj_cache_type_e HWLOC_NAME(obj_cache_type_e) -#define hwloc_obj_cache_type_t HWLOC_NAME(obj_cache_type_t) -#define HWLOC_OBJ_CACHE_UNIFIED HWLOC_NAME_CAPS(OBJ_CACHE_UNIFIED) -#define HWLOC_OBJ_CACHE_DATA HWLOC_NAME_CAPS(OBJ_CACHE_DATA) -#define HWLOC_OBJ_CACHE_INSTRUCTION HWLOC_NAME_CAPS(OBJ_CACHE_INSTRUCTION) - -#define hwloc_obj_bridge_type_e HWLOC_NAME(obj_bridge_type_e) -#define hwloc_obj_bridge_type_t HWLOC_NAME(obj_bridge_type_t) -#define HWLOC_OBJ_BRIDGE_HOST HWLOC_NAME_CAPS(OBJ_BRIDGE_HOST) -#define HWLOC_OBJ_BRIDGE_PCI HWLOC_NAME_CAPS(OBJ_BRIDGE_PCI) - -#define hwloc_obj_osdev_type_e HWLOC_NAME(obj_osdev_type_e) -#define hwloc_obj_osdev_type_t HWLOC_NAME(obj_osdev_type_t) -#define HWLOC_OBJ_OSDEV_BLOCK HWLOC_NAME_CAPS(OBJ_OSDEV_BLOCK) -#define HWLOC_OBJ_OSDEV_GPU HWLOC_NAME_CAPS(OBJ_OSDEV_GPU) -#define HWLOC_OBJ_OSDEV_NETWORK HWLOC_NAME_CAPS(OBJ_OSDEV_NETWORK) -#define HWLOC_OBJ_OSDEV_OPENFABRICS HWLOC_NAME_CAPS(OBJ_OSDEV_OPENFABRICS) -#define HWLOC_OBJ_OSDEV_DMA HWLOC_NAME_CAPS(OBJ_OSDEV_DMA) -#define HWLOC_OBJ_OSDEV_COPROC HWLOC_NAME_CAPS(OBJ_OSDEV_COPROC) - -#define hwloc_compare_types HWLOC_NAME(compare_types) - -#define hwloc_compare_types_e HWLOC_NAME(compare_types_e) -#define HWLOC_TYPE_UNORDERED HWLOC_NAME_CAPS(TYPE_UNORDERED) - -#define hwloc_obj_memory_s HWLOC_NAME(obj_memory_s) -#define hwloc_obj_memory_page_type_s HWLOC_NAME(obj_memory_page_type_s) - -#define hwloc_obj HWLOC_NAME(obj) -#define hwloc_obj_t HWLOC_NAME(obj_t) - -#define hwloc_distances_s HWLOC_NAME(distances_s) -#define hwloc_obj_info_s HWLOC_NAME(obj_info_s) - -#define hwloc_obj_attr_u HWLOC_NAME(obj_attr_u) -#define hwloc_cache_attr_s HWLOC_NAME(cache_attr_s) -#define hwloc_group_attr_s HWLOC_NAME(group_attr_s) -#define hwloc_pcidev_attr_s HWLOC_NAME(pcidev_attr_s) -#define hwloc_bridge_attr_s HWLOC_NAME(bridge_attr_s) -#define hwloc_osdev_attr_s HWLOC_NAME(osdev_attr_s) - -#define hwloc_topology_init HWLOC_NAME(topology_init) -#define hwloc_topology_load HWLOC_NAME(topology_load) -#define hwloc_topology_destroy HWLOC_NAME(topology_destroy) -#define hwloc_topology_dup HWLOC_NAME(topology_dup) -#define hwloc_topology_check HWLOC_NAME(topology_check) -#define hwloc_topology_ignore_type HWLOC_NAME(topology_ignore_type) -#define hwloc_topology_ignore_type_keep_structure HWLOC_NAME(topology_ignore_type_keep_structure) -#define hwloc_topology_ignore_all_keep_structure HWLOC_NAME(topology_ignore_all_keep_structure) - -#define hwloc_topology_flags_e HWLOC_NAME(topology_flags_e) - -#define HWLOC_TOPOLOGY_FLAG_WHOLE_SYSTEM HWLOC_NAME_CAPS(TOPOLOGY_FLAG_WHOLE_SYSTEM) -#define HWLOC_TOPOLOGY_FLAG_IS_THISSYSTEM HWLOC_NAME_CAPS(TOPOLOGY_FLAG_IS_THISSYSTEM) -#define HWLOC_TOPOLOGY_FLAG_IO_DEVICES HWLOC_NAME_CAPS(TOPOLOGY_FLAG_IO_DEVICES) -#define HWLOC_TOPOLOGY_FLAG_IO_BRIDGES HWLOC_NAME_CAPS(TOPOLOGY_FLAG_IO_BRIDGES) -#define HWLOC_TOPOLOGY_FLAG_WHOLE_IO HWLOC_NAME_CAPS(TOPOLOGY_FLAG_WHOLE_IO) -#define HWLOC_TOPOLOGY_FLAG_ICACHES HWLOC_NAME_CAPS(TOPOLOGY_FLAG_ICACHES) - -#define hwloc_topology_set_flags HWLOC_NAME(topology_set_flags) -#define hwloc_topology_set_fsroot HWLOC_NAME(topology_set_fsroot) -#define hwloc_topology_set_pid HWLOC_NAME(topology_set_pid) -#define hwloc_topology_set_synthetic HWLOC_NAME(topology_set_synthetic) -#define hwloc_topology_set_xml HWLOC_NAME(topology_set_xml) -#define hwloc_topology_set_xmlbuffer HWLOC_NAME(topology_set_xmlbuffer) -#define hwloc_topology_set_custom HWLOC_NAME(topology_set_custom) -#define hwloc_topology_set_distance_matrix HWLOC_NAME(topology_set_distance_matrix) - -#define hwloc_topology_discovery_support HWLOC_NAME(topology_discovery_support) -#define hwloc_topology_cpubind_support HWLOC_NAME(topology_cpubind_support) -#define hwloc_topology_membind_support HWLOC_NAME(topology_membind_support) -#define hwloc_topology_support HWLOC_NAME(topology_support) -#define hwloc_topology_get_support HWLOC_NAME(topology_get_support) -#define hwloc_topology_set_userdata HWLOC_NAME(topology_set_userdata) -#define hwloc_topology_get_userdata HWLOC_NAME(topology_get_userdata) - -#define hwloc_topology_export_xml HWLOC_NAME(topology_export_xml) -#define hwloc_topology_export_xmlbuffer HWLOC_NAME(topology_export_xmlbuffer) -#define hwloc_free_xmlbuffer HWLOC_NAME(free_xmlbuffer) -#define hwloc_topology_set_userdata_export_callback HWLOC_NAME(topology_set_userdata_export_callback) -#define hwloc_export_obj_userdata HWLOC_NAME(export_obj_userdata) -#define hwloc_export_obj_userdata_base64 HWLOC_NAME(export_obj_userdata_base64) -#define hwloc_topology_set_userdata_import_callback HWLOC_NAME(topology_set_userdata_import_callback) - -#define hwloc_topology_export_synthetic_flags_e HWLOC_NAME(topology_export_synthetic_flags_e) -#define HWLOC_TOPOLOGY_EXPORT_SYNTHETIC_FLAG_NO_EXTENDED_TYPES HWLOC_NAME_CAPS(TOPOLOGY_EXPORT_SYNTHETIC_FLAG_NO_EXTENDED_TYPES) -#define HWLOC_TOPOLOGY_EXPORT_SYNTHETIC_FLAG_NO_ATTRS HWLOC_NAME_CAPS(TOPOLOGY_EXPORT_SYNTHETIC_FLAG_NO_ATTRS) -#define hwloc_topology_export_synthetic HWLOC_NAME(topology_export_synthetic) - -#define hwloc_topology_insert_misc_object_by_cpuset HWLOC_NAME(topology_insert_misc_object_by_cpuset) -#define hwloc_topology_insert_misc_object_by_parent HWLOC_NAME(topology_insert_misc_object_by_parent) - -#define hwloc_custom_insert_topology HWLOC_NAME(custom_insert_topology) -#define hwloc_custom_insert_group_object_by_parent HWLOC_NAME(custom_insert_group_object_by_parent) - -#define hwloc_restrict_flags_e HWLOC_NAME(restrict_flags_e) -#define HWLOC_RESTRICT_FLAG_ADAPT_DISTANCES HWLOC_NAME_CAPS(RESTRICT_FLAG_ADAPT_DISTANCES) -#define HWLOC_RESTRICT_FLAG_ADAPT_MISC HWLOC_NAME_CAPS(RESTRICT_FLAG_ADAPT_MISC) -#define HWLOC_RESTRICT_FLAG_ADAPT_IO HWLOC_NAME_CAPS(RESTRICT_FLAG_ADAPT_IO) -#define hwloc_topology_restrict HWLOC_NAME(topology_restrict) - -#define hwloc_topology_get_depth HWLOC_NAME(topology_get_depth) -#define hwloc_get_type_depth HWLOC_NAME(get_type_depth) - -#define hwloc_get_type_depth_e HWLOC_NAME(get_type_depth_e) -#define HWLOC_TYPE_DEPTH_UNKNOWN HWLOC_NAME_CAPS(TYPE_DEPTH_UNKNOWN) -#define HWLOC_TYPE_DEPTH_MULTIPLE HWLOC_NAME_CAPS(TYPE_DEPTH_MULTIPLE) -#define HWLOC_TYPE_DEPTH_BRIDGE HWLOC_NAME_CAPS(TYPE_DEPTH_BRIDGE) -#define HWLOC_TYPE_DEPTH_PCI_DEVICE HWLOC_NAME_CAPS(TYPE_DEPTH_PCI_DEVICE) -#define HWLOC_TYPE_DEPTH_OS_DEVICE HWLOC_NAME_CAPS(TYPE_DEPTH_OS_DEVICE) - -#define hwloc_get_depth_type HWLOC_NAME(get_depth_type) -#define hwloc_get_nbobjs_by_depth HWLOC_NAME(get_nbobjs_by_depth) -#define hwloc_get_nbobjs_by_type HWLOC_NAME(get_nbobjs_by_type) - -#define hwloc_topology_is_thissystem HWLOC_NAME(topology_is_thissystem) -#define hwloc_topology_get_flags HWLOC_NAME(topology_get_flags) - -#define hwloc_get_obj_by_depth HWLOC_NAME(get_obj_by_depth ) -#define hwloc_get_obj_by_type HWLOC_NAME(get_obj_by_type ) - -#define hwloc_obj_type_string HWLOC_NAME(obj_type_string ) -#define hwloc_obj_type_sscanf HWLOC_NAME(obj_type_sscanf) -#define hwloc_obj_type_snprintf HWLOC_NAME(obj_type_snprintf ) -#define hwloc_obj_attr_snprintf HWLOC_NAME(obj_attr_snprintf ) -#define hwloc_obj_cpuset_snprintf HWLOC_NAME(obj_cpuset_snprintf) -#define hwloc_obj_get_info_by_name HWLOC_NAME(obj_get_info_by_name) -#define hwloc_obj_add_info HWLOC_NAME(obj_add_info) - -#define HWLOC_CPUBIND_PROCESS HWLOC_NAME_CAPS(CPUBIND_PROCESS) -#define HWLOC_CPUBIND_THREAD HWLOC_NAME_CAPS(CPUBIND_THREAD) -#define HWLOC_CPUBIND_STRICT HWLOC_NAME_CAPS(CPUBIND_STRICT) -#define HWLOC_CPUBIND_NOMEMBIND HWLOC_NAME_CAPS(CPUBIND_NOMEMBIND) - -#define hwloc_cpubind_flags_t HWLOC_NAME(cpubind_flags_t) - -#define hwloc_set_cpubind HWLOC_NAME(set_cpubind) -#define hwloc_get_cpubind HWLOC_NAME(get_cpubind) -#define hwloc_set_proc_cpubind HWLOC_NAME(set_proc_cpubind) -#define hwloc_get_proc_cpubind HWLOC_NAME(get_proc_cpubind) -#define hwloc_set_thread_cpubind HWLOC_NAME(set_thread_cpubind) -#define hwloc_get_thread_cpubind HWLOC_NAME(get_thread_cpubind) - -#define hwloc_get_last_cpu_location HWLOC_NAME(get_last_cpu_location) -#define hwloc_get_proc_last_cpu_location HWLOC_NAME(get_proc_last_cpu_location) - -#define HWLOC_MEMBIND_DEFAULT HWLOC_NAME_CAPS(MEMBIND_DEFAULT) -#define HWLOC_MEMBIND_FIRSTTOUCH HWLOC_NAME_CAPS(MEMBIND_FIRSTTOUCH) -#define HWLOC_MEMBIND_BIND HWLOC_NAME_CAPS(MEMBIND_BIND) -#define HWLOC_MEMBIND_INTERLEAVE HWLOC_NAME_CAPS(MEMBIND_INTERLEAVE) -#define HWLOC_MEMBIND_REPLICATE HWLOC_NAME_CAPS(MEMBIND_REPLICATE) -#define HWLOC_MEMBIND_NEXTTOUCH HWLOC_NAME_CAPS(MEMBIND_NEXTTOUCH) -#define HWLOC_MEMBIND_MIXED HWLOC_NAME_CAPS(MEMBIND_MIXED) - -#define hwloc_membind_policy_t HWLOC_NAME(membind_policy_t) - -#define HWLOC_MEMBIND_PROCESS HWLOC_NAME_CAPS(MEMBIND_PROCESS) -#define HWLOC_MEMBIND_THREAD HWLOC_NAME_CAPS(MEMBIND_THREAD) -#define HWLOC_MEMBIND_STRICT HWLOC_NAME_CAPS(MEMBIND_STRICT) -#define HWLOC_MEMBIND_MIGRATE HWLOC_NAME_CAPS(MEMBIND_MIGRATE) -#define HWLOC_MEMBIND_NOCPUBIND HWLOC_NAME_CAPS(MEMBIND_NOCPUBIND) - -#define hwloc_membind_flags_t HWLOC_NAME(membind_flags_t) - -#define hwloc_set_membind_nodeset HWLOC_NAME(set_membind_nodeset) -#define hwloc_set_membind HWLOC_NAME(set_membind) -#define hwloc_get_membind_nodeset HWLOC_NAME(get_membind_nodeset) -#define hwloc_get_membind HWLOC_NAME(get_membind) -#define hwloc_set_proc_membind_nodeset HWLOC_NAME(set_proc_membind_nodeset) -#define hwloc_set_proc_membind HWLOC_NAME(set_proc_membind) -#define hwloc_get_proc_membind_nodeset HWLOC_NAME(get_proc_membind_nodeset) -#define hwloc_get_proc_membind HWLOC_NAME(get_proc_membind) -#define hwloc_set_area_membind_nodeset HWLOC_NAME(set_area_membind_nodeset) -#define hwloc_set_area_membind HWLOC_NAME(set_area_membind) -#define hwloc_get_area_membind_nodeset HWLOC_NAME(get_area_membind_nodeset) -#define hwloc_get_area_membind HWLOC_NAME(get_area_membind) -#define hwloc_alloc_membind_nodeset HWLOC_NAME(alloc_membind_nodeset) -#define hwloc_alloc_membind HWLOC_NAME(alloc_membind) -#define hwloc_alloc HWLOC_NAME(alloc) -#define hwloc_free HWLOC_NAME(free) - -#define hwloc_get_non_io_ancestor_obj HWLOC_NAME(get_non_io_ancestor_obj) -#define hwloc_get_next_pcidev HWLOC_NAME(get_next_pcidev) -#define hwloc_get_pcidev_by_busid HWLOC_NAME(get_pcidev_by_busid) -#define hwloc_get_pcidev_by_busidstring HWLOC_NAME(get_pcidev_by_busidstring) -#define hwloc_get_next_osdev HWLOC_NAME(get_next_osdev) -#define hwloc_get_next_bridge HWLOC_NAME(get_next_bridge) -#define hwloc_bridge_covers_pcibus HWLOC_NAME(bridge_covers_pcibus) -#define hwloc_get_hostbridge_by_pcibus HWLOC_NAME(get_hostbridge_by_pcibus) - -/* hwloc/bitmap.h */ - -#define hwloc_bitmap_s HWLOC_NAME(bitmap_s) -#define hwloc_bitmap_t HWLOC_NAME(bitmap_t) -#define hwloc_const_bitmap_t HWLOC_NAME(const_bitmap_t) - -#define hwloc_bitmap_alloc HWLOC_NAME(bitmap_alloc) -#define hwloc_bitmap_alloc_full HWLOC_NAME(bitmap_alloc_full) -#define hwloc_bitmap_free HWLOC_NAME(bitmap_free) -#define hwloc_bitmap_dup HWLOC_NAME(bitmap_dup) -#define hwloc_bitmap_copy HWLOC_NAME(bitmap_copy) -#define hwloc_bitmap_snprintf HWLOC_NAME(bitmap_snprintf) -#define hwloc_bitmap_asprintf HWLOC_NAME(bitmap_asprintf) -#define hwloc_bitmap_sscanf HWLOC_NAME(bitmap_sscanf) -#define hwloc_bitmap_list_snprintf HWLOC_NAME(bitmap_list_snprintf) -#define hwloc_bitmap_list_asprintf HWLOC_NAME(bitmap_list_asprintf) -#define hwloc_bitmap_list_sscanf HWLOC_NAME(bitmap_list_sscanf) -#define hwloc_bitmap_taskset_snprintf HWLOC_NAME(bitmap_taskset_snprintf) -#define hwloc_bitmap_taskset_asprintf HWLOC_NAME(bitmap_taskset_asprintf) -#define hwloc_bitmap_taskset_sscanf HWLOC_NAME(bitmap_taskset_sscanf) -#define hwloc_bitmap_zero HWLOC_NAME(bitmap_zero) -#define hwloc_bitmap_fill HWLOC_NAME(bitmap_fill) -#define hwloc_bitmap_from_ulong HWLOC_NAME(bitmap_from_ulong) - -#define hwloc_bitmap_from_ith_ulong HWLOC_NAME(bitmap_from_ith_ulong) -#define hwloc_bitmap_to_ulong HWLOC_NAME(bitmap_to_ulong) -#define hwloc_bitmap_to_ith_ulong HWLOC_NAME(bitmap_to_ith_ulong) -#define hwloc_bitmap_only HWLOC_NAME(bitmap_only) -#define hwloc_bitmap_allbut HWLOC_NAME(bitmap_allbut) -#define hwloc_bitmap_set HWLOC_NAME(bitmap_set) -#define hwloc_bitmap_set_range HWLOC_NAME(bitmap_set_range) -#define hwloc_bitmap_set_ith_ulong HWLOC_NAME(bitmap_set_ith_ulong) -#define hwloc_bitmap_clr HWLOC_NAME(bitmap_clr) -#define hwloc_bitmap_clr_range HWLOC_NAME(bitmap_clr_range) -#define hwloc_bitmap_isset HWLOC_NAME(bitmap_isset) -#define hwloc_bitmap_iszero HWLOC_NAME(bitmap_iszero) -#define hwloc_bitmap_isfull HWLOC_NAME(bitmap_isfull) -#define hwloc_bitmap_isequal HWLOC_NAME(bitmap_isequal) -#define hwloc_bitmap_intersects HWLOC_NAME(bitmap_intersects) -#define hwloc_bitmap_isincluded HWLOC_NAME(bitmap_isincluded) -#define hwloc_bitmap_or HWLOC_NAME(bitmap_or) -#define hwloc_bitmap_and HWLOC_NAME(bitmap_and) -#define hwloc_bitmap_andnot HWLOC_NAME(bitmap_andnot) -#define hwloc_bitmap_xor HWLOC_NAME(bitmap_xor) -#define hwloc_bitmap_not HWLOC_NAME(bitmap_not) -#define hwloc_bitmap_first HWLOC_NAME(bitmap_first) -#define hwloc_bitmap_last HWLOC_NAME(bitmap_last) -#define hwloc_bitmap_next HWLOC_NAME(bitmap_next) -#define hwloc_bitmap_singlify HWLOC_NAME(bitmap_singlify) -#define hwloc_bitmap_compare_first HWLOC_NAME(bitmap_compare_first) -#define hwloc_bitmap_compare HWLOC_NAME(bitmap_compare) -#define hwloc_bitmap_weight HWLOC_NAME(bitmap_weight) - -/* hwloc/helper.h */ - -#define hwloc_get_type_or_below_depth HWLOC_NAME(get_type_or_below_depth) -#define hwloc_get_type_or_above_depth HWLOC_NAME(get_type_or_above_depth) -#define hwloc_get_root_obj HWLOC_NAME(get_root_obj) -#define hwloc_get_ancestor_obj_by_depth HWLOC_NAME(get_ancestor_obj_by_depth) -#define hwloc_get_ancestor_obj_by_type HWLOC_NAME(get_ancestor_obj_by_type) -#define hwloc_get_next_obj_by_depth HWLOC_NAME(get_next_obj_by_depth) -#define hwloc_get_next_obj_by_type HWLOC_NAME(get_next_obj_by_type) -#define hwloc_get_pu_obj_by_os_index HWLOC_NAME(get_pu_obj_by_os_index) -#define hwloc_get_numanode_obj_by_os_index HWLOC_NAME(get_numanode_obj_by_os_index) -#define hwloc_get_next_child HWLOC_NAME(get_next_child) -#define hwloc_get_common_ancestor_obj HWLOC_NAME(get_common_ancestor_obj) -#define hwloc_obj_is_in_subtree HWLOC_NAME(obj_is_in_subtree) -#define hwloc_get_first_largest_obj_inside_cpuset HWLOC_NAME(get_first_largest_obj_inside_cpuset) -#define hwloc_get_largest_objs_inside_cpuset HWLOC_NAME(get_largest_objs_inside_cpuset) -#define hwloc_get_next_obj_inside_cpuset_by_depth HWLOC_NAME(get_next_obj_inside_cpuset_by_depth) -#define hwloc_get_next_obj_inside_cpuset_by_type HWLOC_NAME(get_next_obj_inside_cpuset_by_type) -#define hwloc_get_obj_inside_cpuset_by_depth HWLOC_NAME(get_obj_inside_cpuset_by_depth) -#define hwloc_get_obj_inside_cpuset_by_type HWLOC_NAME(get_obj_inside_cpuset_by_type) -#define hwloc_get_nbobjs_inside_cpuset_by_depth HWLOC_NAME(get_nbobjs_inside_cpuset_by_depth) -#define hwloc_get_nbobjs_inside_cpuset_by_type HWLOC_NAME(get_nbobjs_inside_cpuset_by_type) -#define hwloc_get_obj_index_inside_cpuset HWLOC_NAME(get_obj_index_inside_cpuset) -#define hwloc_get_child_covering_cpuset HWLOC_NAME(get_child_covering_cpuset) -#define hwloc_get_obj_covering_cpuset HWLOC_NAME(get_obj_covering_cpuset) -#define hwloc_get_next_obj_covering_cpuset_by_depth HWLOC_NAME(get_next_obj_covering_cpuset_by_depth) -#define hwloc_get_next_obj_covering_cpuset_by_type HWLOC_NAME(get_next_obj_covering_cpuset_by_type) -#define hwloc_get_cache_type_depth HWLOC_NAME(get_cache_type_depth) -#define hwloc_get_cache_covering_cpuset HWLOC_NAME(get_cache_covering_cpuset) -#define hwloc_get_shared_cache_covering_obj HWLOC_NAME(get_shared_cache_covering_obj) -#define hwloc_get_closest_objs HWLOC_NAME(get_closest_objs) -#define hwloc_get_obj_below_by_type HWLOC_NAME(get_obj_below_by_type) -#define hwloc_get_obj_below_array_by_type HWLOC_NAME(get_obj_below_array_by_type) -#define hwloc_distrib_flags_e HWLOC_NAME(distrib_flags_e) -#define HWLOC_DISTRIB_FLAG_REVERSE HWLOC_NAME_CAPS(DISTRIB_FLAG_REVERSE) -#define hwloc_distrib HWLOC_NAME(distrib) -#define hwloc_alloc_membind_policy HWLOC_NAME(alloc_membind_policy) -#define hwloc_alloc_membind_policy_nodeset HWLOC_NAME(alloc_membind_policy_nodeset) -#define hwloc_topology_get_complete_cpuset HWLOC_NAME(topology_get_complete_cpuset) -#define hwloc_topology_get_topology_cpuset HWLOC_NAME(topology_get_topology_cpuset) -#define hwloc_topology_get_online_cpuset HWLOC_NAME(topology_get_online_cpuset) -#define hwloc_topology_get_allowed_cpuset HWLOC_NAME(topology_get_allowed_cpuset) -#define hwloc_topology_get_complete_nodeset HWLOC_NAME(topology_get_complete_nodeset) -#define hwloc_topology_get_topology_nodeset HWLOC_NAME(topology_get_topology_nodeset) -#define hwloc_topology_get_allowed_nodeset HWLOC_NAME(topology_get_allowed_nodeset) -#define hwloc_cpuset_to_nodeset HWLOC_NAME(cpuset_to_nodeset) -#define hwloc_cpuset_to_nodeset_strict HWLOC_NAME(cpuset_to_nodeset_strict) -#define hwloc_cpuset_from_nodeset HWLOC_NAME(cpuset_from_nodeset) -#define hwloc_cpuset_from_nodeset_strict HWLOC_NAME(cpuset_from_nodeset_strict) -#define hwloc_get_whole_distance_matrix_by_depth HWLOC_NAME(get_whole_distance_matrix_by_depth) -#define hwloc_get_whole_distance_matrix_by_type HWLOC_NAME(get_whole_distance_matrix_by_type) -#define hwloc_get_distance_matrix_covering_obj_by_depth HWLOC_NAME(get_distance_matrix_covering_obj_by_depth) -#define hwloc_get_latency HWLOC_NAME(get_latency) - -/* diff.h */ - -#define hwloc_topology_diff_obj_attr_type_e HWLOC_NAME(topology_diff_obj_attr_type_e) -#define hwloc_topology_diff_obj_attr_type_t HWLOC_NAME(topology_diff_obj_attr_type_t) -#define HWLOC_TOPOLOGY_DIFF_OBJ_ATTR_SIZE HWLOC_NAME_CAPS(TOPOLOGY_DIFF_OBJ_ATTR_SIZE) -#define HWLOC_TOPOLOGY_DIFF_OBJ_ATTR_NAME HWLOC_NAME_CAPS(TOPOLOGY_DIFF_OBJ_ATTR_NAME) -#define HWLOC_TOPOLOGY_DIFF_OBJ_ATTR_INFO HWLOC_NAME_CAPS(TOPOLOGY_DIFF_OBJ_ATTR_INFO) -#define hwloc_topology_diff_obj_attr_u HWLOC_NAME(topology_diff_obj_attr_u) -#define hwloc_topology_diff_obj_attr_generic_s HWLOC_NAME(topology_diff_obj_attr_generic_s) -#define hwloc_topology_diff_obj_attr_uint64_s HWLOC_NAME(topology_diff_obj_attr_uint64_s) -#define hwloc_topology_diff_obj_attr_string_s HWLOC_NAME(topology_diff_obj_attr_string_s) -#define hwloc_topology_diff_type_e HWLOC_NAME(topology_diff_type_e) -#define hwloc_topology_diff_type_t HWLOC_NAME(topology_diff_type_t) -#define HWLOC_TOPOLOGY_DIFF_OBJ_ATTR HWLOC_NAME_CAPS(TOPOLOGY_DIFF_OBJ_ATTR) -#define HWLOC_TOPOLOGY_DIFF_TOO_COMPLEX HWLOC_NAME_CAPS(TOPOLOGY_DIFF_TOO_COMPLEX) -#define hwloc_topology_diff_u HWLOC_NAME(topology_diff_u) -#define hwloc_topology_diff_t HWLOC_NAME(topology_diff_t) -#define hwloc_topology_diff_generic_s HWLOC_NAME(topology_diff_generic_s) -#define hwloc_topology_diff_obj_attr_s HWLOC_NAME(topology_diff_obj_attr_s) -#define hwloc_topology_diff_too_complex_s HWLOC_NAME(topology_diff_too_complex_s) -#define hwloc_topology_diff_build HWLOC_NAME(topology_diff_build) -#define hwloc_topology_diff_apply_flags_e HWLOC_NAME(topology_diff_apply_flags_e) -#define HWLOC_TOPOLOGY_DIFF_APPLY_REVERSE HWLOC_NAME_CAPS(TOPOLOGY_DIFF_APPLY_REVERSE) -#define hwloc_topology_diff_apply HWLOC_NAME(topology_diff_apply) -#define hwloc_topology_diff_destroy HWLOC_NAME(topology_diff_destroy) -#define hwloc_topology_diff_load_xml HWLOC_NAME(topology_diff_load_xml) -#define hwloc_topology_diff_export_xml HWLOC_NAME(topology_diff_export_xml) -#define hwloc_topology_diff_load_xmlbuffer HWLOC_NAME(topology_diff_load_xmlbuffer) -#define hwloc_topology_diff_export_xmlbuffer HWLOC_NAME(topology_diff_export_xmlbuffer) - -/* glibc-sched.h */ - -#define hwloc_cpuset_to_glibc_sched_affinity HWLOC_NAME(cpuset_to_glibc_sched_affinity) -#define hwloc_cpuset_from_glibc_sched_affinity HWLOC_NAME(cpuset_from_glibc_sched_affinity) - -/* linux-libnuma.h */ - -#define hwloc_cpuset_to_linux_libnuma_ulongs HWLOC_NAME(cpuset_to_linux_libnuma_ulongs) -#define hwloc_nodeset_to_linux_libnuma_ulongs HWLOC_NAME(nodeset_to_linux_libnuma_ulongs) -#define hwloc_cpuset_from_linux_libnuma_ulongs HWLOC_NAME(cpuset_from_linux_libnuma_ulongs) -#define hwloc_nodeset_from_linux_libnuma_ulongs HWLOC_NAME(nodeset_from_linux_libnuma_ulongs) -#define hwloc_cpuset_to_linux_libnuma_bitmask HWLOC_NAME(cpuset_to_linux_libnuma_bitmask) -#define hwloc_nodeset_to_linux_libnuma_bitmask HWLOC_NAME(nodeset_to_linux_libnuma_bitmask) -#define hwloc_cpuset_from_linux_libnuma_bitmask HWLOC_NAME(cpuset_from_linux_libnuma_bitmask) -#define hwloc_nodeset_from_linux_libnuma_bitmask HWLOC_NAME(nodeset_from_linux_libnuma_bitmask) - -/* linux.h */ - -#define hwloc_linux_parse_cpumap_file HWLOC_NAME(linux_parse_cpumap_file) -#define hwloc_linux_set_tid_cpubind HWLOC_NAME(linux_set_tid_cpubind) -#define hwloc_linux_get_tid_cpubind HWLOC_NAME(linux_get_tid_cpubind) -#define hwloc_linux_get_tid_last_cpu_location HWLOC_NAME(linux_get_tid_last_cpu_location) - -/* openfabrics-verbs.h */ - -#define hwloc_ibv_get_device_cpuset HWLOC_NAME(ibv_get_device_cpuset) -#define hwloc_ibv_get_device_osdev HWLOC_NAME(ibv_get_device_osdev) -#define hwloc_ibv_get_device_osdev_by_name HWLOC_NAME(ibv_get_device_osdev_by_name) - -/* myriexpress.h */ - -#define hwloc_mx_board_get_device_cpuset HWLOC_NAME(mx_board_get_device_cpuset) -#define hwloc_mx_endpoint_get_device_cpuset HWLOC_NAME(mx_endpoint_get_device_cpuset) - -/* intel-mic.h */ - -#define hwloc_intel_mic_get_device_cpuset HWLOC_NAME(intel_mic_get_device_cpuset) -#define hwloc_intel_mic_get_device_osdev_by_index HWLOC_NAME(intel_mic_get_device_osdev_by_index) - -/* opencl.h */ - -#define hwloc_opencl_get_device_cpuset HWLOC_NAME(opencl_get_device_cpuset) -#define hwloc_opencl_get_device_osdev HWLOC_NAME(opencl_get_device_osdev) -#define hwloc_opencl_get_device_osdev_by_index HWLOC_NAME(opencl_get_device_osdev_by_index) - -/* cuda.h */ - -#define hwloc_cuda_get_device_pci_ids HWLOC_NAME(cuda_get_device_pci_ids) -#define hwloc_cuda_get_device_cpuset HWLOC_NAME(cuda_get_device_cpuset) -#define hwloc_cuda_get_device_pcidev HWLOC_NAME(cuda_get_device_pcidev) -#define hwloc_cuda_get_device_osdev HWLOC_NAME(cuda_get_device_osdev) -#define hwloc_cuda_get_device_osdev_by_index HWLOC_NAME(cuda_get_device_osdev_by_index) - -/* cudart.h */ - -#define hwloc_cudart_get_device_pci_ids HWLOC_NAME(cudart_get_device_pci_ids) -#define hwloc_cudart_get_device_cpuset HWLOC_NAME(cudart_get_device_cpuset) -#define hwloc_cudart_get_device_pcidev HWLOC_NAME(cudart_get_device_pcidev) -#define hwloc_cudart_get_device_osdev_by_index HWLOC_NAME(cudart_get_device_osdev_by_index) - -/* nvml.h */ - -#define hwloc_nvml_get_device_cpuset HWLOC_NAME(nvml_get_device_cpuset) -#define hwloc_nvml_get_device_osdev HWLOC_NAME(nvml_get_device_osdev) -#define hwloc_nvml_get_device_osdev_by_index HWLOC_NAME(nvml_get_device_osdev_by_index) - -/* gl.h */ - -#define hwloc_gl_get_display_osdev_by_port_device HWLOC_NAME(gl_get_display_osdev_by_port_device) -#define hwloc_gl_get_display_osdev_by_name HWLOC_NAME(gl_get_display_osdev_by_name) -#define hwloc_gl_get_display_by_osdev HWLOC_NAME(gl_get_display_by_osdev) - -/* hwloc/plugins.h */ - -#define hwloc_disc_component_type_e HWLOC_NAME(disc_component_type_e) -#define HWLOC_DISC_COMPONENT_TYPE_CPU HWLOC_NAME_CAPS(DISC_COMPONENT_TYPE_CPU) -#define HWLOC_DISC_COMPONENT_TYPE_GLOBAL HWLOC_NAME_CAPS(DISC_COMPONENT_TYPE_GLOBAL) -#define HWLOC_DISC_COMPONENT_TYPE_MISC HWLOC_NAME_CAPS(DISC_COMPONENT_TYPE_MISC) -#define hwloc_disc_component_type_t HWLOC_NAME(disc_component_type_t) -#define hwloc_disc_component HWLOC_NAME(disc_component) - -#define hwloc_backend HWLOC_NAME(backend) -#define hwloc_backend_flag_e HWLOC_NAME(backend_flag_e) -#define HWLOC_BACKEND_FLAG_NEED_LEVELS HWLOC_NAME_CAPS(BACKEND_FLAG_NEED_LEVELS) - -#define hwloc_backend_alloc HWLOC_NAME(backend_alloc) -#define hwloc_backend_enable HWLOC_NAME(backend_enable) -#define hwloc_backends_get_obj_cpuset HWLOC_NAME(backends_get_obj_cpuset) -#define hwloc_backends_notify_new_object HWLOC_NAME(backends_notify_new_object) - -#define hwloc_component_type_e HWLOC_NAME(component_type_e) -#define HWLOC_COMPONENT_TYPE_DISC HWLOC_NAME_CAPS(COMPONENT_TYPE_DISC) -#define HWLOC_COMPONENT_TYPE_XML HWLOC_NAME_CAPS(COMPONENT_TYPE_XML) -#define hwloc_component_type_t HWLOC_NAME(component_type_t) -#define hwloc_component HWLOC_NAME(component) - -#define hwloc_plugin_check_namespace HWLOC_NAME(plugin_check_namespace) - -#define hwloc_insert_object_by_cpuset HWLOC_NAME(insert_object_by_cpuset) -#define hwloc_report_error_t HWLOC_NAME(report_error_t) -#define hwloc_report_os_error HWLOC_NAME(report_os_error) -#define hwloc_hide_errors HWLOC_NAME(hide_errors) -#define hwloc__insert_object_by_cpuset HWLOC_NAME(_insert_object_by_cpuset) -#define hwloc_insert_object_by_parent HWLOC_NAME(insert_object_by_parent) -#define hwloc_alloc_setup_object HWLOC_NAME(alloc_setup_object) -#define hwloc_fill_object_sets HWLOC_NAME(fill_object_sets) - -#define hwloc_insert_pci_device_list HWLOC_NAME(insert_pci_device_list) -#define hwloc_pci_find_cap HWLOC_NAME(pci_find_cap) -#define hwloc_pci_find_linkspeed HWLOC_NAME(pci_find_linkspeed) -#define hwloc_pci_prepare_bridge HWLOC_NAME(pci_prepare_bridge) - -/* hwloc/deprecated.h */ - -#define hwloc_obj_type_of_string HWLOC_NAME(obj_type_of_string ) -#define hwloc_obj_snprintf HWLOC_NAME(obj_snprintf) -#define hwloc_distributev HWLOC_NAME(distributev) -#define hwloc_distribute HWLOC_NAME(distribute) - -/* private/debug.h */ - -#define hwloc_debug HWLOC_NAME(debug) - -/* private/misc.h */ - -#define hwloc_snprintf HWLOC_NAME(snprintf) -#define hwloc_namecoloncmp HWLOC_NAME(namecoloncmp) -#define hwloc_ffsl_manual HWLOC_NAME(ffsl_manual) -#define hwloc_ffs32 HWLOC_NAME(ffs32) -#define hwloc_ffsl_from_ffs32 HWLOC_NAME(ffsl_from_ffs32) -#define hwloc_flsl_manual HWLOC_NAME(flsl_manual) -#define hwloc_fls32 HWLOC_NAME(fls32) -#define hwloc_flsl_from_fls32 HWLOC_NAME(flsl_from_fls32) -#define hwloc_weight_long HWLOC_NAME(weight_long) -#define hwloc_strncasecmp HWLOC_NAME(strncasecmp) - -/* private/cpuid-x86.h */ - -#define hwloc_have_x86_cpuid HWLOC_NAME(have_x86_cpuid) -#define hwloc_x86_cpuid HWLOC_NAME(x86_cpuid) - -/* private/xml.h */ - -#define hwloc__xml_verbose HWLOC_NAME(_xml_verbose) - -#define hwloc__xml_import_state_s HWLOC_NAME(_xml_import_state_s) -#define hwloc__xml_import_state_t HWLOC_NAME(_xml_import_state_t) -#define hwloc__xml_import_diff HWLOC_NAME(_xml_import_diff) -#define hwloc_xml_backend_data_s HWLOC_NAME(xml_backend_data_s) -#define hwloc__xml_export_state_s HWLOC_NAME(_xml_export_state_s) -#define hwloc__xml_export_state_t HWLOC_NAME(_xml_export_state_t) -#define hwloc__xml_export_object HWLOC_NAME(_xml_export_object) -#define hwloc__xml_export_diff HWLOC_NAME(_xml_export_diff) - -#define hwloc_xml_callbacks HWLOC_NAME(xml_callbacks) -#define hwloc_xml_component HWLOC_NAME(xml_component) -#define hwloc_xml_callbacks_register HWLOC_NAME(xml_callbacks_register) -#define hwloc_xml_callbacks_reset HWLOC_NAME(xml_callbacks_reset) - -/* private/components.h */ - -#define hwloc_disc_component_force_enable HWLOC_NAME(disc_component_force_enable) -#define hwloc_disc_components_enable_others HWLOC_NAME(disc_components_instantiate_others) - -#define hwloc_backends_disable_all HWLOC_NAME(backends_disable_all) -#define hwloc_backends_is_thissystem HWLOC_NAME(backends_is_thissystem) - -#define hwloc_components_init HWLOC_NAME(components_init) -#define hwloc_components_destroy_all HWLOC_NAME(components_destroy_all) - -/* private/private.h */ - -#define hwloc_ignore_type_e HWLOC_NAME(ignore_type_e) - -#define HWLOC_IGNORE_TYPE_NEVER HWLOC_NAME_CAPS(IGNORE_TYPE_NEVER) -#define HWLOC_IGNORE_TYPE_KEEP_STRUCTURE HWLOC_NAME_CAPS(IGNORE_TYPE_KEEP_STRUCTURE) -#define HWLOC_IGNORE_TYPE_ALWAYS HWLOC_NAME_CAPS(IGNORE_TYPE_ALWAYS) - -#define hwloc_os_distances_s HWLOC_NAME(os_distances_s) - -#define hwloc_xml_imported_distances_s HWLOC_NAME(xml_imported_distances_s) - -#define hwloc_alloc_obj_cpusets HWLOC_NAME(alloc_obj_cpusets) -#define hwloc_setup_pu_level HWLOC_NAME(setup_pu_level) -#define hwloc_get_sysctlbyname HWLOC_NAME(get_sysctlbyname) -#define hwloc_get_sysctl HWLOC_NAME(get_sysctl) -#define hwloc_fallback_nbprocessors HWLOC_NAME(fallback_nbprocessors) -#define hwloc_connect_children HWLOC_NAME(connect_children) -#define hwloc_connect_levels HWLOC_NAME(connect_levels) - -#define hwloc__object_cpusets_compare_first HWLOC_NAME(_object_cpusets_compare_first) - -#define hwloc_topology_setup_defaults HWLOC_NAME(topology_setup_defaults) -#define hwloc_topology_clear HWLOC_NAME(topology_clear) - -#define hwloc__add_info HWLOC_NAME(_add_info) -#define hwloc__find_info_slot HWLOC_NAME(_find_info_slot) -#define hwloc__move_infos HWLOC_NAME(_move_infos) -#define hwloc__free_infos HWLOC_NAME(_free_infos) - -#define hwloc_binding_hooks HWLOC_NAME(binding_hooks) -#define hwloc_set_native_binding_hooks HWLOC_NAME(set_native_binding_hooks) -#define hwloc_set_binding_hooks HWLOC_NAME(set_binding_hooks) - -#define hwloc_set_linuxfs_hooks HWLOC_NAME(set_linuxfs_hooks) -#define hwloc_set_bgq_hooks HWLOC_NAME(set_bgq_hooks) -#define hwloc_set_solaris_hooks HWLOC_NAME(set_solaris_hooks) -#define hwloc_set_aix_hooks HWLOC_NAME(set_aix_hooks) -#define hwloc_set_osf_hooks HWLOC_NAME(set_osf_hooks) -#define hwloc_set_windows_hooks HWLOC_NAME(set_windows_hooks) -#define hwloc_set_darwin_hooks HWLOC_NAME(set_darwin_hooks) -#define hwloc_set_freebsd_hooks HWLOC_NAME(set_freebsd_hooks) -#define hwloc_set_netbsd_hooks HWLOC_NAME(set_netbsd_hooks) -#define hwloc_set_hpux_hooks HWLOC_NAME(set_hpux_hooks) - -#define hwloc_add_uname_info HWLOC_NAME(add_uname_info) -#define hwloc_free_unlinked_object HWLOC_NAME(free_unlinked_object) -#define hwloc__duplicate_objects HWLOC_NAME(_duplicate_objects) - -#define hwloc_alloc_heap HWLOC_NAME(alloc_heap) -#define hwloc_alloc_mmap HWLOC_NAME(alloc_mmap) -#define hwloc_free_heap HWLOC_NAME(free_heap) -#define hwloc_free_mmap HWLOC_NAME(free_mmap) -#define hwloc_alloc_or_fail HWLOC_NAME(alloc_or_fail) - -#define hwloc_distances_init HWLOC_NAME(distances_init) -#define hwloc_distances_destroy HWLOC_NAME(distances_destroy) -#define hwloc_distances_set HWLOC_NAME(distances_set) -#define hwloc_distances_set_from_env HWLOC_NAME(distances_set_from_env) -#define hwloc_distances_restrict_os HWLOC_NAME(distances_restrict_os) -#define hwloc_distances_restrict HWLOC_NAME(distances_restrict) -#define hwloc_distances_finalize_os HWLOC_NAME(distances_finalize_os) -#define hwloc_distances_finalize_logical HWLOC_NAME(distances_finalize_logical) -#define hwloc_clear_object_distances HWLOC_NAME(clear_object_distances) -#define hwloc_clear_object_distances_one HWLOC_NAME(clear_object_distances_one) -#define hwloc_group_by_distances HWLOC_NAME(group_by_distances) - -#define hwloc_encode_to_base64 HWLOC_NAME(encode_to_base64) -#define hwloc_decode_from_base64 HWLOC_NAME(decode_from_base64) - -#define hwloc_obj_add_info_nodup HWLOC_NAME(obj_add_info_nodup) - -#define hwloc_progname HWLOC_NAME(progname) - -#define hwloc_bitmap_compare_inclusion HWLOC_NAME(bitmap_compare_inclusion) - -/* private/solaris-chiptype.h */ - -#define hwloc_solaris_get_chip_type HWLOC_NAME(solaris_get_chip_type) -#define hwloc_solaris_get_chip_model HWLOC_NAME(solaris_get_chip_model) - -#endif /* HWLOC_SYM_TRANSFORM */ - - -#ifdef __cplusplus -} /* extern "C" */ -#endif - - -#endif /* HWLOC_RENAME_H */ diff --git a/opal/mca/hwloc/hwloc1110/hwloc/include/private/debug.h b/opal/mca/hwloc/hwloc1110/hwloc/include/private/debug.h deleted file mode 100644 index 4de91bf8ae8..00000000000 --- a/opal/mca/hwloc/hwloc1110/hwloc/include/private/debug.h +++ /dev/null @@ -1,57 +0,0 @@ -/* - * Copyright © 2009 CNRS - * Copyright © 2009-2012 Inria. All rights reserved. - * Copyright © 2009, 2011 Université Bordeaux - * Copyright © 2011 Cisco Systems, Inc. All rights reserved. - * See COPYING in top-level directory. - */ - -/* The configuration file */ - -#ifndef HWLOC_DEBUG_H -#define HWLOC_DEBUG_H - -#include - -#ifdef HWLOC_DEBUG -#include -#include -#endif - -static __hwloc_inline void hwloc_debug(const char *s __hwloc_attribute_unused, ...) -{ -#ifdef HWLOC_DEBUG - va_list ap; - - va_start(ap, s); - vfprintf(stderr, s, ap); - va_end(ap); -#endif -} - -#ifdef HWLOC_DEBUG -#define hwloc_debug_bitmap(fmt, bitmap) do { \ - char *s; \ - hwloc_bitmap_asprintf(&s, bitmap); \ - fprintf(stderr, fmt, s); \ - free(s); \ -} while (0) -#define hwloc_debug_1arg_bitmap(fmt, arg1, bitmap) do { \ - char *s; \ - hwloc_bitmap_asprintf(&s, bitmap); \ - fprintf(stderr, fmt, arg1, s); \ - free(s); \ -} while (0) -#define hwloc_debug_2args_bitmap(fmt, arg1, arg2, bitmap) do { \ - char *s; \ - hwloc_bitmap_asprintf(&s, bitmap); \ - fprintf(stderr, fmt, arg1, arg2, s); \ - free(s); \ -} while (0) -#else -#define hwloc_debug_bitmap(s, bitmap) do { } while(0) -#define hwloc_debug_1arg_bitmap(s, arg1, bitmap) do { } while(0) -#define hwloc_debug_2args_bitmap(s, arg1, arg2, bitmap) do { } while(0) -#endif - -#endif /* HWLOC_DEBUG_H */ diff --git a/opal/mca/hwloc/hwloc1110/hwloc/src/Makefile.am b/opal/mca/hwloc/hwloc1110/hwloc/src/Makefile.am deleted file mode 100644 index 41aa35c14e9..00000000000 --- a/opal/mca/hwloc/hwloc1110/hwloc/src/Makefile.am +++ /dev/null @@ -1,235 +0,0 @@ -# Copyright © 2009-2014 Inria. All rights reserved. -# Copyright © 2009-2012 Université Bordeaux -# Copyright © 2009-2014 Cisco Systems, Inc. All rights reserved. -# Copyright © 2011-2012 Oracle and/or its affiliates. All rights reserved. -# See COPYING in top-level directory. - -AM_CFLAGS = $(HWLOC_CFLAGS) -AM_CPPFLAGS = $(HWLOC_CPPFLAGS) -DHWLOC_INSIDE_LIBHWLOC -AM_LDFLAGS = $(HWLOC_LDFLAGS) - -EXTRA_DIST = dolib.c - -# If we're in standalone mode, build the installable library. -# Otherwise, build the embedded library. - -if HWLOC_BUILD_STANDALONE -lib_LTLIBRARIES = libhwloc.la -else -noinst_LTLIBRARIES = libhwloc_embedded.la -endif - -pluginsdir = @HWLOC_PLUGINS_DIR@ -plugins_LTLIBRARIES = -plugins_ldflags = -module -avoid-version -lltdl -AM_CPPFLAGS += -DHWLOC_PLUGINS_PATH=\"$(HWLOC_PLUGINS_PATH)\" - -# Sources and ldflags - -sources = \ - topology.c \ - traversal.c \ - distances.c \ - components.c \ - bind.c \ - bitmap.c \ - pci-common.c \ - diff.c \ - misc.c \ - base64.c \ - topology-noos.c \ - topology-synthetic.c \ - topology-custom.c \ - topology-xml.c \ - topology-xml-nolibxml.c -ldflags = - -# Conditionally add to the sources and ldflags - -if HWLOC_HAVE_LIBXML2 -if HWLOC_XML_LIBXML_BUILD_STATIC -sources += topology-xml-libxml.c -else -plugins_LTLIBRARIES += hwloc_xml_libxml.la -hwloc_xml_libxml_la_SOURCES = topology-xml-libxml.c -hwloc_xml_libxml_la_CFLAGS = $(AM_CFLAGS) $(HWLOC_LIBXML2_CFLAGS) -DHWLOC_INSIDE_PLUGIN -hwloc_xml_libxml_la_LDFLAGS = $(plugins_ldflags) $(HWLOC_LIBXML2_LIBS) -endif -endif HWLOC_HAVE_LIBXML2 - -if HWLOC_HAVE_PCI -if HWLOC_PCI_BUILD_STATIC -sources += topology-pci.c -else -plugins_LTLIBRARIES += hwloc_pci.la -hwloc_pci_la_SOURCES = topology-pci.c -hwloc_pci_la_CFLAGS = $(AM_CFLAGS) $(HWLOC_PCIACCESS_CFLAGS) -DHWLOC_INSIDE_PLUGIN -hwloc_pci_la_LDFLAGS = $(plugins_ldflags) $(HWLOC_PCIACCESS_LIBS) -endif -endif HWLOC_HAVE_PCI - -if HWLOC_HAVE_OPENCL -if HWLOC_OPENCL_BUILD_STATIC -sources += topology-opencl.c -else -plugins_LTLIBRARIES += hwloc_opencl.la -hwloc_opencl_la_SOURCES = topology-opencl.c -hwloc_opencl_la_CFLAGS = $(AM_CFLAGS) $(HWLOC_OPENCL_CFLAGS) -DHWLOC_INSIDE_PLUGIN -hwloc_opencl_la_LDFLAGS = $(plugins_ldflags) $(HWLOC_OPENCL_LIBS) -endif -endif HWLOC_HAVE_OPENCL - -if HWLOC_HAVE_CUDART -if HWLOC_CUDA_BUILD_STATIC -sources += topology-cuda.c -else -plugins_LTLIBRARIES += hwloc_cuda.la -hwloc_cuda_la_SOURCES = topology-cuda.c -hwloc_cuda_la_CFLAGS = $(AM_CFLAGS) $(HWLOC_CUDA_CFLAGS) -DHWLOC_INSIDE_PLUGIN -hwloc_cuda_la_LDFLAGS = $(plugins_ldflags) $(HWLOC_CUDA_LIBS) -endif -endif HWLOC_HAVE_CUDART - -if HWLOC_HAVE_NVML -if HWLOC_NVML_BUILD_STATIC -sources += topology-nvml.c -else -plugins_LTLIBRARIES += hwloc_nvml.la -hwloc_nvml_la_SOURCES = topology-nvml.c -hwloc_nvml_la_CFLAGS = $(AM_CFLAGS) $(HWLOC_NVML_CFLAGS) -DHWLOC_INSIDE_PLUGIN -hwloc_nvml_la_LDFLAGS = $(plugins_ldflags) $(HWLOC_NVML_LIBS) -endif -endif HWLOC_HAVE_NVML - -if HWLOC_HAVE_GL -if HWLOC_GL_BUILD_STATIC -sources += topology-gl.c -else -plugins_LTLIBRARIES += hwloc_gl.la -hwloc_gl_la_SOURCES = topology-gl.c -hwloc_gl_la_CFLAGS = $(AM_CFLAGS) $(HWLOC_GL_CFLAGS) -DHWLOC_INSIDE_PLUGIN -hwloc_gl_la_LDFLAGS = $(plugins_ldflags) $(HWLOC_GL_LIBS) -endif -endif HWLOC_HAVE_GL - -if HWLOC_HAVE_SOLARIS -sources += topology-solaris.c -sources += topology-solaris-chiptype.c -endif HWLOC_HAVE_SOLARIS - -if HWLOC_HAVE_LINUX -sources += topology-linux.c -endif HWLOC_HAVE_LINUX - -if HWLOC_HAVE_BGQ -sources += topology-bgq.c -endif HWLOC_HAVE_BGQ - -if HWLOC_HAVE_AIX -sources += topology-aix.c -ldflags += -lpthread -endif HWLOC_HAVE_AIX - -if HWLOC_HAVE_OSF -sources += topology-osf.c -ldflags += -lnuma -lpthread -endif HWLOC_HAVE_OSF - -if HWLOC_HAVE_HPUX -sources += topology-hpux.c -ldflags += -lpthread -endif HWLOC_HAVE_HPUX - -if HWLOC_HAVE_WINDOWS -sources += topology-windows.c -endif HWLOC_HAVE_WINDOWS - -if HWLOC_HAVE_DARWIN -sources += topology-darwin.c -endif HWLOC_HAVE_DARWIN - -if HWLOC_HAVE_FREEBSD -sources += topology-freebsd.c -endif HWLOC_HAVE_FREEBSD - -if HWLOC_HAVE_NETBSD -sources += topology-netbsd.c -ldflags += -lpthread -endif HWLOC_HAVE_NETBSD - -if HWLOC_HAVE_X86_CPUID -sources += topology-x86.c -endif HWLOC_HAVE_X86_CPUID - -if HWLOC_HAVE_GCC -ldflags += -no-undefined -endif HWLOC_HAVE_GCC - - -if HWLOC_HAVE_WINDOWS -# Windows specific rules - -LC_MESSAGES=C -export LC_MESSAGES -ldflags += -Xlinker --output-def -Xlinker .libs/libhwloc.def - -if HWLOC_HAVE_MS_LIB -dolib$(EXEEXT): dolib.c - $(CC_FOR_BUILD) $< -o $@ -.libs/libhwloc.lib: libhwloc.la dolib$(EXEEXT) - [ ! -r .libs/libhwloc.def ] || ./dolib$(EXEEXT) "$(HWLOC_MS_LIB)" $(HWLOC_MS_LIB_ARCH) .libs/libhwloc.def $(libhwloc_so_version) .libs/libhwloc.lib -all-local: .libs/libhwloc.lib -clean-local: - $(RM) dolib$(EXEEXT) -endif HWLOC_HAVE_MS_LIB - -install-exec-hook: - [ ! -r .libs/libhwloc.def ] || $(INSTALL) .libs/libhwloc.def $(DESTDIR)$(libdir) -if HWLOC_HAVE_MS_LIB - [ ! -r .libs/libhwloc.def ] || $(INSTALL) .libs/libhwloc.lib $(DESTDIR)$(libdir) - [ ! -r .libs/libhwloc.def ] || $(INSTALL) .libs/libhwloc.exp $(DESTDIR)$(libdir) -endif HWLOC_HAVE_MS_LIB - -uninstall-local: - rm -f $(DESTDIR)$(libdir)/libhwloc.def -if HWLOC_HAVE_MS_LIB - rm -f $(DESTDIR)$(libdir)/libhwloc.lib $(DESTDIR)$(libdir)/libhwloc.exp -endif HWLOC_HAVE_MS_LIB - -# End of Windows specific rules -endif HWLOC_HAVE_WINDOWS - - -# Installable library - -libhwloc_la_SOURCES = $(sources) -libhwloc_la_LDFLAGS = $(ldflags) -version-info $(libhwloc_so_version) $(HWLOC_LIBS) - -if HWLOC_HAVE_PLUGINS -AM_CPPFLAGS += $(LTDLINCL) -libhwloc_la_LDFLAGS += -export-dynamic -libhwloc_la_LIBADD = $(LIBLTDL) -endif - -# Embedded library (note the lack of a .so version number -- that -# intentionally only appears in the installable library). Also note -# the lack of _LDFLAGS -- all libs are added by the upper layer (via -# HWLOC_EMBEDDED_LIBS). - -libhwloc_embedded_la_SOURCES = $(sources) - -# XML data (only install if we're building in standalone mode) - -if HWLOC_BUILD_STANDALONE -xml_DATA = $(srcdir)/hwloc.dtd -xmldir = $(pkgdatadir) -EXTRA_DIST += hwloc.dtd -endif - -DISTCLEANFILES = static-components.h - -if HWLOC_HAVE_PLUGINS -check_LTLIBRARIES = hwloc_fake.la -hwloc_fake_la_SOURCES = topology-fake.c -hwloc_fake_la_LDFLAGS = $(plugins_ldflags) -rpath /nowhere # force libtool to build a shared-library even it's check-only -endif diff --git a/opal/mca/hwloc/hwloc1110/hwloc/src/topology-windows.c b/opal/mca/hwloc/hwloc1110/hwloc/src/topology-windows.c deleted file mode 100644 index 371aaa1c8bf..00000000000 --- a/opal/mca/hwloc/hwloc1110/hwloc/src/topology-windows.c +++ /dev/null @@ -1,812 +0,0 @@ -/* - * Copyright © 2009 CNRS - * Copyright © 2009-2015 Inria. All rights reserved. - * Copyright © 2009-2012 Université Bordeaux - * Copyright © 2011 Cisco Systems, Inc. All rights reserved. - * See COPYING in top-level directory. - */ - -/* To try to get all declarations duplicated below. */ -#define _WIN32_WINNT 0x0601 - -#include -#include -#include -#include - -#include - -#ifndef HAVE_KAFFINITY -typedef ULONG_PTR KAFFINITY, *PKAFFINITY; -#endif - -#ifndef HAVE_PROCESSOR_CACHE_TYPE -typedef enum _PROCESSOR_CACHE_TYPE { - CacheUnified, - CacheInstruction, - CacheData, - CacheTrace -} PROCESSOR_CACHE_TYPE; -#endif - -#ifndef CACHE_FULLY_ASSOCIATIVE -#define CACHE_FULLY_ASSOCIATIVE 0xFF -#endif - -#ifndef HAVE_CACHE_DESCRIPTOR -typedef struct _CACHE_DESCRIPTOR { - BYTE Level; - BYTE Associativity; - WORD LineSize; - DWORD Size; /* in bytes */ - PROCESSOR_CACHE_TYPE Type; -} CACHE_DESCRIPTOR, *PCACHE_DESCRIPTOR; -#endif - -#ifndef HAVE_LOGICAL_PROCESSOR_RELATIONSHIP -typedef enum _LOGICAL_PROCESSOR_RELATIONSHIP { - RelationProcessorCore, - RelationNumaNode, - RelationCache, - RelationProcessorPackage, - RelationGroup, - RelationAll = 0xffff -} LOGICAL_PROCESSOR_RELATIONSHIP; -#else /* HAVE_LOGICAL_PROCESSOR_RELATIONSHIP */ -# ifndef HAVE_RELATIONPROCESSORPACKAGE -# define RelationProcessorPackage 3 -# define RelationGroup 4 -# define RelationAll 0xffff -# endif /* HAVE_RELATIONPROCESSORPACKAGE */ -#endif /* HAVE_LOGICAL_PROCESSOR_RELATIONSHIP */ - -#ifndef HAVE_SYSTEM_LOGICAL_PROCESSOR_INFORMATION -typedef struct _SYSTEM_LOGICAL_PROCESSOR_INFORMATION { - ULONG_PTR ProcessorMask; - LOGICAL_PROCESSOR_RELATIONSHIP Relationship; - _ANONYMOUS_UNION - union { - struct { - BYTE flags; - } ProcessorCore; - struct { - DWORD NodeNumber; - } NumaNode; - CACHE_DESCRIPTOR Cache; - ULONGLONG Reserved[2]; - } DUMMYUNIONNAME; -} SYSTEM_LOGICAL_PROCESSOR_INFORMATION, *PSYSTEM_LOGICAL_PROCESSOR_INFORMATION; -#endif - -/* Extended interface, for group support */ - -#ifndef HAVE_GROUP_AFFINITY -typedef struct _GROUP_AFFINITY { - KAFFINITY Mask; - WORD Group; - WORD Reserved[3]; -} GROUP_AFFINITY, *PGROUP_AFFINITY; -#endif - -#ifndef HAVE_PROCESSOR_RELATIONSHIP -typedef struct _PROCESSOR_RELATIONSHIP { - BYTE Flags; - BYTE Reserved[21]; - WORD GroupCount; - GROUP_AFFINITY GroupMask[ANYSIZE_ARRAY]; -} PROCESSOR_RELATIONSHIP, *PPROCESSOR_RELATIONSHIP; -#endif - -#ifndef HAVE_NUMA_NODE_RELATIONSHIP -typedef struct _NUMA_NODE_RELATIONSHIP { - DWORD NodeNumber; - BYTE Reserved[20]; - GROUP_AFFINITY GroupMask; -} NUMA_NODE_RELATIONSHIP, *PNUMA_NODE_RELATIONSHIP; -#endif - -#ifndef HAVE_CACHE_RELATIONSHIP -typedef struct _CACHE_RELATIONSHIP { - BYTE Level; - BYTE Associativity; - WORD LineSize; - DWORD CacheSize; - PROCESSOR_CACHE_TYPE Type; - BYTE Reserved[20]; - GROUP_AFFINITY GroupMask; -} CACHE_RELATIONSHIP, *PCACHE_RELATIONSHIP; -#endif - -#ifndef HAVE_PROCESSOR_GROUP_INFO -typedef struct _PROCESSOR_GROUP_INFO { - BYTE MaximumProcessorCount; - BYTE ActiveProcessorCount; - BYTE Reserved[38]; - KAFFINITY ActiveProcessorMask; -} PROCESSOR_GROUP_INFO, *PPROCESSOR_GROUP_INFO; -#endif - -#ifndef HAVE_GROUP_RELATIONSHIP -typedef struct _GROUP_RELATIONSHIP { - WORD MaximumGroupCount; - WORD ActiveGroupCount; - ULONGLONG Reserved[2]; - PROCESSOR_GROUP_INFO GroupInfo[ANYSIZE_ARRAY]; -} GROUP_RELATIONSHIP, *PGROUP_RELATIONSHIP; -#endif - -#ifndef HAVE_SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX -typedef struct _SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX { - LOGICAL_PROCESSOR_RELATIONSHIP Relationship; - DWORD Size; - _ANONYMOUS_UNION - union { - PROCESSOR_RELATIONSHIP Processor; - NUMA_NODE_RELATIONSHIP NumaNode; - CACHE_RELATIONSHIP Cache; - GROUP_RELATIONSHIP Group; - /* Odd: no member to tell the cpu mask of the package... */ - } DUMMYUNIONNAME; -} SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX, *PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX; -#endif - -#ifndef HAVE_PSAPI_WORKING_SET_EX_BLOCK -typedef union _PSAPI_WORKING_SET_EX_BLOCK { - ULONG_PTR Flags; - struct { - unsigned Valid :1; - unsigned ShareCount :3; - unsigned Win32Protection :11; - unsigned Shared :1; - unsigned Node :6; - unsigned Locked :1; - unsigned LargePage :1; - }; -} PSAPI_WORKING_SET_EX_BLOCK; -#endif - -#ifndef HAVE_PSAPI_WORKING_SET_EX_INFORMATION -typedef struct _PSAPI_WORKING_SET_EX_INFORMATION { - PVOID VirtualAddress; - PSAPI_WORKING_SET_EX_BLOCK VirtualAttributes; -} PSAPI_WORKING_SET_EX_INFORMATION; -#endif - -static void hwloc_bitmap_set_ith_ULONG_PTR(hwloc_bitmap_t set, unsigned i, ULONG_PTR mask) -{ - /* ULONG_PTR is 64/32bits depending on the arch - * while unsigned long is always 32bits */ -#if SIZEOF_VOID_P == 8 - hwloc_bitmap_set_ith_ulong(set, 2*i, mask & 0xffffffff); - hwloc_bitmap_set_ith_ulong(set, 2*i+1, mask >> 32); -#else - hwloc_bitmap_set_ith_ulong(set, i, mask); -#endif -} - -/* TODO: SetThreadIdealProcessor */ - -static int -hwloc_win_set_thread_cpubind(hwloc_topology_t topology __hwloc_attribute_unused, hwloc_thread_t thread, hwloc_const_bitmap_t hwloc_set, int flags) -{ - DWORD mask; - - if (flags & HWLOC_CPUBIND_NOMEMBIND) { - errno = ENOSYS; - return -1; - } - /* TODO: groups SetThreadGroupAffinity */ - /* The resulting binding is always strict */ - mask = hwloc_bitmap_to_ulong(hwloc_set); - if (!SetThreadAffinityMask(thread, mask)) - return -1; - return 0; -} - -/* TODO: SetThreadGroupAffinity to get affinity */ - -static int -hwloc_win_set_thisthread_cpubind(hwloc_topology_t topology, hwloc_const_bitmap_t hwloc_set, int flags) -{ - return hwloc_win_set_thread_cpubind(topology, GetCurrentThread(), hwloc_set, flags); -} - -static int -hwloc_win_set_thisthread_membind(hwloc_topology_t topology, hwloc_const_nodeset_t nodeset, hwloc_membind_policy_t policy, int flags) -{ - int ret; - hwloc_cpuset_t cpuset; - - if ((policy != HWLOC_MEMBIND_DEFAULT && policy != HWLOC_MEMBIND_BIND) - || flags & HWLOC_MEMBIND_NOCPUBIND) { - errno = ENOSYS; - return -1; - } - - cpuset = hwloc_bitmap_alloc(); - hwloc_cpuset_from_nodeset(topology, cpuset, nodeset); - ret = hwloc_win_set_thisthread_cpubind(topology, cpuset, flags & HWLOC_MEMBIND_STRICT?HWLOC_CPUBIND_STRICT:0); - hwloc_bitmap_free(cpuset); - return ret; -} - -static int -hwloc_win_set_proc_cpubind(hwloc_topology_t topology __hwloc_attribute_unused, hwloc_pid_t proc, hwloc_const_bitmap_t hwloc_set, int flags) -{ - DWORD mask; - if (flags & HWLOC_CPUBIND_NOMEMBIND) { - errno = ENOSYS; - return -1; - } - /* TODO: groups, hard: has to manually bind all threads into the other group, - * and the bind the process inside the group */ - /* The resulting binding is always strict */ - mask = hwloc_bitmap_to_ulong(hwloc_set); - if (!SetProcessAffinityMask(proc, mask)) - return -1; - return 0; -} - -static int -hwloc_win_set_proc_membind(hwloc_topology_t topology, hwloc_pid_t pid, hwloc_const_nodeset_t nodeset, hwloc_membind_policy_t policy, int flags) -{ - int ret; - hwloc_cpuset_t cpuset; - - if ((policy != HWLOC_MEMBIND_DEFAULT && policy != HWLOC_MEMBIND_BIND) - || flags & HWLOC_MEMBIND_NOCPUBIND) { - errno = ENOSYS; - return -1; - } - - cpuset = hwloc_bitmap_alloc(); - hwloc_cpuset_from_nodeset(topology, cpuset, nodeset); - ret = hwloc_win_set_proc_cpubind(topology, pid, cpuset, flags & HWLOC_MEMBIND_STRICT?HWLOC_CPUBIND_STRICT:0); - hwloc_bitmap_free(cpuset); - return ret; -} - -static int -hwloc_win_get_proc_cpubind(hwloc_topology_t topology __hwloc_attribute_unused, hwloc_pid_t proc, hwloc_bitmap_t hwloc_set, int flags) -{ - DWORD_PTR proc_mask, sys_mask; - if (flags & HWLOC_CPUBIND_NOMEMBIND) { - errno = ENOSYS; - return -1; - } - /* TODO: groups, GetProcessGroupAffinity, or merge SetThreadGroupAffinity for all threads */ - if (!GetProcessAffinityMask(proc, &proc_mask, &sys_mask)) - return -1; - hwloc_bitmap_from_ulong(hwloc_set, proc_mask); - return 0; -} - -static int -hwloc_win_get_proc_membind(hwloc_topology_t topology, hwloc_pid_t pid, hwloc_nodeset_t nodeset, hwloc_membind_policy_t * policy, int flags) -{ - int ret; - hwloc_cpuset_t cpuset = hwloc_bitmap_alloc(); - ret = hwloc_win_get_proc_cpubind(topology, pid, cpuset, flags & HWLOC_MEMBIND_STRICT?HWLOC_CPUBIND_STRICT:0); - if (!ret) { - *policy = HWLOC_MEMBIND_BIND; - hwloc_cpuset_to_nodeset(topology, cpuset, nodeset); - } - hwloc_bitmap_free(cpuset); - return ret; -} - -static int -hwloc_win_set_thisproc_cpubind(hwloc_topology_t topology, hwloc_const_bitmap_t hwloc_set, int flags) -{ - return hwloc_win_set_proc_cpubind(topology, GetCurrentProcess(), hwloc_set, flags); -} - -static int -hwloc_win_set_thisproc_membind(hwloc_topology_t topology, hwloc_const_nodeset_t nodeset, hwloc_membind_policy_t policy, int flags) -{ - return hwloc_win_set_proc_membind(topology, GetCurrentProcess(), nodeset, policy, flags); -} - -static int -hwloc_win_get_thisproc_cpubind(hwloc_topology_t topology, hwloc_bitmap_t hwloc_cpuset, int flags) -{ - return hwloc_win_get_proc_cpubind(topology, GetCurrentProcess(), hwloc_cpuset, flags); -} - -static int -hwloc_win_get_thisproc_membind(hwloc_topology_t topology, hwloc_nodeset_t nodeset, hwloc_membind_policy_t * policy, int flags) -{ - return hwloc_win_get_proc_membind(topology, GetCurrentProcess(), nodeset, policy, flags); -} - -static LPVOID (WINAPI *VirtualAllocExNumaProc)(HANDLE hProcess, LPVOID lpAddress, SIZE_T dwSize, DWORD flAllocationType, DWORD flProtect, DWORD nndPreferred); -static BOOL (WINAPI *VirtualFreeExProc)(HANDLE hProcess, LPVOID lpAddress, SIZE_T dwSize, DWORD dwFreeType); -static BOOL (WINAPI *QueryWorkingSetExProc)(HANDLE hProcess, PVOID pv, DWORD cb); - -static int hwloc_win_get_VirtualAllocExNumaProc(void) { - if (VirtualAllocExNumaProc == NULL) { - FARPROC alloc_fun = NULL, free_fun = NULL; - HMODULE kernel32; - - kernel32 = LoadLibrary("kernel32.dll"); - if (kernel32) { - alloc_fun = GetProcAddress(kernel32, "VirtualAllocExNuma"); - free_fun = GetProcAddress(kernel32, "VirtualFreeEx"); - } - - if (!alloc_fun || !free_fun) { - VirtualAllocExNumaProc = (FARPROC) -1; - errno = ENOSYS; - return -1; - } - - VirtualAllocExNumaProc = alloc_fun; - VirtualFreeExProc = free_fun; - } else if ((FARPROC) VirtualAllocExNumaProc == (FARPROC)-1) { - errno = ENOSYS; - return -1; - } - - return 0; -} - -static void * -hwloc_win_alloc(hwloc_topology_t topology __hwloc_attribute_unused, size_t len) { - return VirtualAlloc(NULL, len, MEM_COMMIT|MEM_RESERVE, PAGE_EXECUTE_READWRITE); -} - -static void * -hwloc_win_alloc_membind(hwloc_topology_t topology __hwloc_attribute_unused, size_t len, hwloc_const_nodeset_t nodeset, hwloc_membind_policy_t policy, int flags) { - int node; - - switch (policy) { - case HWLOC_MEMBIND_DEFAULT: - case HWLOC_MEMBIND_BIND: - break; - default: - errno = ENOSYS; - return hwloc_alloc_or_fail(topology, len, flags); - } - - if (flags & HWLOC_MEMBIND_STRICT) { - errno = ENOSYS; - return NULL; - } - - if (hwloc_bitmap_weight(nodeset) != 1) { - /* Not a single node, can't do this */ - errno = EXDEV; - return hwloc_alloc_or_fail(topology, len, flags); - } - - node = hwloc_bitmap_first(nodeset); - return VirtualAllocExNumaProc(GetCurrentProcess(), NULL, len, MEM_COMMIT|MEM_RESERVE, PAGE_EXECUTE_READWRITE, node); -} - -static int -hwloc_win_free_membind(hwloc_topology_t topology __hwloc_attribute_unused, void *addr, size_t len __hwloc_attribute_unused) { - if (!addr) - return 0; - if (!VirtualFreeExProc(GetCurrentProcess(), addr, 0, MEM_RELEASE)) - return -1; - return 0; -} - -static int hwloc_win_get_QueryWorkingSetExProc(void) { - if (QueryWorkingSetExProc == NULL) { - FARPROC fun = NULL; - HMODULE kernel32, psapi; - - kernel32 = LoadLibrary("kernel32.dll"); - if (kernel32) - fun = GetProcAddress(kernel32, "K32QueryWorkingSetEx"); - if (!fun) { - psapi = LoadLibrary("psapi.dll"); - if (psapi) - fun = GetProcAddress(psapi, "QueryWorkingSetEx"); - } - - if (!fun) { - QueryWorkingSetExProc = (FARPROC) -1; - errno = ENOSYS; - return -1; - } - - QueryWorkingSetExProc = fun; - } else if ((FARPROC) QueryWorkingSetExProc == (FARPROC)-1) { - errno = ENOSYS; - return -1; - } - - return 0; -} - -static int -hwloc_win_get_area_membind(hwloc_topology_t topology __hwloc_attribute_unused, const void *addr, size_t len, hwloc_nodeset_t nodeset, hwloc_membind_policy_t * policy, int flags) -{ - SYSTEM_INFO SystemInfo; - DWORD page_size; - uintptr_t start; - unsigned nb; - - GetSystemInfo(&SystemInfo); - page_size = SystemInfo.dwPageSize; - - start = (((uintptr_t) addr) / page_size) * page_size; - nb = (((uintptr_t) addr + len - start) + page_size - 1) / page_size; - - if (!nb) - nb = 1; - - { - PSAPI_WORKING_SET_EX_INFORMATION *pv; - unsigned i; - - pv = calloc(nb, sizeof(*pv)); - - for (i = 0; i < nb; i++) - pv[i].VirtualAddress = (void*) (start + i * page_size); - if (!QueryWorkingSetExProc(GetCurrentProcess(), pv, nb * sizeof(*pv))) { - free(pv); - return -1; - } - *policy = HWLOC_MEMBIND_BIND; - if (flags & HWLOC_MEMBIND_STRICT) { - unsigned node = pv[0].VirtualAttributes.Node; - for (i = 1; i < nb; i++) { - if (pv[i].VirtualAttributes.Node != node) { - errno = EXDEV; - free(pv); - return -1; - } - } - hwloc_bitmap_only(nodeset, node); - free(pv); - return 0; - } - hwloc_bitmap_zero(nodeset); - for (i = 0; i < nb; i++) - hwloc_bitmap_set(nodeset, pv[i].VirtualAttributes.Node); - free(pv); - return 0; - } -} - -static int -hwloc_look_windows(struct hwloc_backend *backend) -{ - struct hwloc_topology *topology = backend->topology; - BOOL (WINAPI *GetLogicalProcessorInformationProc)(PSYSTEM_LOGICAL_PROCESSOR_INFORMATION Buffer, PDWORD ReturnLength); - BOOL (WINAPI *GetLogicalProcessorInformationExProc)(LOGICAL_PROCESSOR_RELATIONSHIP relationship, PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX Buffer, PDWORD ReturnLength); - BOOL (WINAPI *GetNumaAvailableMemoryNodeProc)(UCHAR Node, PULONGLONG AvailableBytes); - BOOL (WINAPI *GetNumaAvailableMemoryNodeExProc)(USHORT Node, PULONGLONG AvailableBytes); - SYSTEM_INFO SystemInfo; - - DWORD length; - - HMODULE kernel32; - - if (topology->levels[0][0]->cpuset) - /* somebody discovered things */ - return 0; - - hwloc_alloc_obj_cpusets(topology->levels[0][0]); - - GetSystemInfo(&SystemInfo); - - kernel32 = LoadLibrary("kernel32.dll"); - if (kernel32) { - GetLogicalProcessorInformationProc = GetProcAddress(kernel32, "GetLogicalProcessorInformation"); - GetNumaAvailableMemoryNodeProc = GetProcAddress(kernel32, "GetNumaAvailableMemoryNode"); - GetNumaAvailableMemoryNodeExProc = GetProcAddress(kernel32, "GetNumaAvailableMemoryNodeEx"); - GetLogicalProcessorInformationExProc = GetProcAddress(kernel32, "GetLogicalProcessorInformationEx"); - - if (!GetLogicalProcessorInformationExProc && GetLogicalProcessorInformationProc) { - PSYSTEM_LOGICAL_PROCESSOR_INFORMATION procInfo; - unsigned id; - unsigned i; - struct hwloc_obj *obj; - hwloc_obj_type_t type; - - length = 0; - procInfo = NULL; - - while (1) { - if (GetLogicalProcessorInformationProc(procInfo, &length)) - break; - if (GetLastError() != ERROR_INSUFFICIENT_BUFFER) - return -1; - procInfo = realloc(procInfo, length); - } - - assert(!length || procInfo); - - for (i = 0; i < length / sizeof(*procInfo); i++) { - - /* Ignore unknown caches */ - if (procInfo->Relationship == RelationCache - && procInfo->Cache.Type != CacheUnified - && procInfo->Cache.Type != CacheData - && procInfo->Cache.Type != CacheInstruction) - continue; - - id = -1; - switch (procInfo[i].Relationship) { - case RelationNumaNode: - type = HWLOC_OBJ_NUMANODE; - id = procInfo[i].NumaNode.NodeNumber; - break; - case RelationProcessorPackage: - type = HWLOC_OBJ_PACKAGE; - break; - case RelationCache: - type = HWLOC_OBJ_CACHE; - break; - case RelationProcessorCore: - type = HWLOC_OBJ_CORE; - break; - case RelationGroup: - default: - type = HWLOC_OBJ_GROUP; - break; - } - - obj = hwloc_alloc_setup_object(type, id); - obj->cpuset = hwloc_bitmap_alloc(); - hwloc_debug("%s#%u mask %lx\n", hwloc_obj_type_string(type), id, procInfo[i].ProcessorMask); - /* ProcessorMask is a ULONG_PTR */ - hwloc_bitmap_set_ith_ULONG_PTR(obj->cpuset, 0, procInfo[i].ProcessorMask); - hwloc_debug_2args_bitmap("%s#%u bitmap %s\n", hwloc_obj_type_string(type), id, obj->cpuset); - - switch (type) { - case HWLOC_OBJ_NUMANODE: - { - ULONGLONG avail; - obj->nodeset = hwloc_bitmap_alloc(); - hwloc_bitmap_set(obj->nodeset, id); - if ((GetNumaAvailableMemoryNodeExProc && GetNumaAvailableMemoryNodeExProc(id, &avail)) - || (GetNumaAvailableMemoryNodeProc && GetNumaAvailableMemoryNodeProc(id, &avail))) - obj->memory.local_memory = avail; - obj->memory.page_types_len = 2; - obj->memory.page_types = malloc(2 * sizeof(*obj->memory.page_types)); - memset(obj->memory.page_types, 0, 2 * sizeof(*obj->memory.page_types)); - obj->memory.page_types_len = 1; - obj->memory.page_types[0].size = SystemInfo.dwPageSize; -#ifdef HAVE__SC_LARGE_PAGESIZE - obj->memory.page_types_len++; - obj->memory.page_types[1].size = sysconf(_SC_LARGE_PAGESIZE); -#endif - break; - } - case HWLOC_OBJ_CACHE: - obj->attr->cache.size = procInfo[i].Cache.Size; - obj->attr->cache.associativity = procInfo[i].Cache.Associativity == CACHE_FULLY_ASSOCIATIVE ? -1 : procInfo[i].Cache.Associativity ; - obj->attr->cache.linesize = procInfo[i].Cache.LineSize; - obj->attr->cache.depth = procInfo[i].Cache.Level; - switch (procInfo->Cache.Type) { - case CacheUnified: - obj->attr->cache.type = HWLOC_OBJ_CACHE_UNIFIED; - break; - case CacheData: - obj->attr->cache.type = HWLOC_OBJ_CACHE_DATA; - break; - case CacheInstruction: - obj->attr->cache.type = HWLOC_OBJ_CACHE_INSTRUCTION; - break; - default: - hwloc_free_unlinked_object(obj); - continue; - } - break; - case HWLOC_OBJ_GROUP: - obj->attr->group.depth = procInfo[i].Relationship == RelationGroup; - break; - default: - break; - } - hwloc_insert_object_by_cpuset(topology, obj); - } - - free(procInfo); - } - - if (GetLogicalProcessorInformationExProc) { - PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX procInfoTotal, procInfo; - - unsigned id; - struct hwloc_obj *obj; - hwloc_obj_type_t type; - - length = 0; - procInfoTotal = NULL; - - while (1) { - if (GetLogicalProcessorInformationExProc(RelationAll, procInfoTotal, &length)) - break; - if (GetLastError() != ERROR_INSUFFICIENT_BUFFER) - return -1; - procInfoTotal = realloc(procInfoTotal, length); - } - - for (procInfo = procInfoTotal; - (void*) procInfo < (void*) ((uintptr_t) procInfoTotal + length); - procInfo = (void*) ((uintptr_t) procInfo + procInfo->Size)) { - unsigned num, i; - GROUP_AFFINITY *GroupMask; - - /* Ignore unknown caches */ - if (procInfo->Relationship == RelationCache - && procInfo->Cache.Type != CacheUnified - && procInfo->Cache.Type != CacheData - && procInfo->Cache.Type != CacheInstruction) - continue; - - id = -1; - switch (procInfo->Relationship) { - case RelationNumaNode: - type = HWLOC_OBJ_NUMANODE; - num = 1; - GroupMask = &procInfo->NumaNode.GroupMask; - id = procInfo->NumaNode.NodeNumber; - break; - case RelationProcessorPackage: - type = HWLOC_OBJ_PACKAGE; - num = procInfo->Processor.GroupCount; - GroupMask = procInfo->Processor.GroupMask; - break; - case RelationCache: - type = HWLOC_OBJ_CACHE; - num = 1; - GroupMask = &procInfo->Cache.GroupMask; - break; - case RelationProcessorCore: - type = HWLOC_OBJ_CORE; - num = procInfo->Processor.GroupCount; - GroupMask = procInfo->Processor.GroupMask; - break; - case RelationGroup: - /* So strange an interface... */ - for (id = 0; id < procInfo->Group.ActiveGroupCount; id++) { - KAFFINITY mask; - obj = hwloc_alloc_setup_object(HWLOC_OBJ_GROUP, id); - obj->cpuset = hwloc_bitmap_alloc(); - mask = procInfo->Group.GroupInfo[id].ActiveProcessorMask; - hwloc_debug("group %u %d cpus mask %lx\n", id, - procInfo->Group.GroupInfo[id].ActiveProcessorCount, mask); - /* KAFFINITY is ULONG_PTR */ - hwloc_bitmap_set_ith_ULONG_PTR(obj->cpuset, id, mask); - hwloc_debug_2args_bitmap("group %u %d bitmap %s\n", id, procInfo->Group.GroupInfo[id].ActiveProcessorCount, obj->cpuset); - hwloc_insert_object_by_cpuset(topology, obj); - } - continue; - default: - /* Don't know how to get the mask. */ - hwloc_debug("unknown relation %d\n", procInfo->Relationship); - continue; - } - - obj = hwloc_alloc_setup_object(type, id); - obj->cpuset = hwloc_bitmap_alloc(); - for (i = 0; i < num; i++) { - hwloc_debug("%s#%u %d: mask %d:%lx\n", hwloc_obj_type_string(type), id, i, GroupMask[i].Group, GroupMask[i].Mask); - /* GROUP_AFFINITY.Mask is KAFFINITY, which is ULONG_PTR */ - hwloc_bitmap_set_ith_ULONG_PTR(obj->cpuset, GroupMask[i].Group, GroupMask[i].Mask); - } - hwloc_debug("%s#%u bitmap %s\n", hwloc_obj_type_string(type), id, obj->cpuset); - - switch (type) { - case HWLOC_OBJ_NUMANODE: - { - ULONGLONG avail; - obj->nodeset = hwloc_bitmap_alloc(); - hwloc_bitmap_set(obj->nodeset, id); - if ((GetNumaAvailableMemoryNodeExProc && GetNumaAvailableMemoryNodeExProc(id, &avail)) - || (GetNumaAvailableMemoryNodeProc && GetNumaAvailableMemoryNodeProc(id, &avail))) - obj->memory.local_memory = avail; - obj->memory.page_types = malloc(2 * sizeof(*obj->memory.page_types)); - memset(obj->memory.page_types, 0, 2 * sizeof(*obj->memory.page_types)); - obj->memory.page_types_len = 1; - obj->memory.page_types[0].size = SystemInfo.dwPageSize; -#ifdef HAVE__SC_LARGE_PAGESIZE - obj->memory.page_types_len++; - obj->memory.page_types[1].size = sysconf(_SC_LARGE_PAGESIZE); -#endif - break; - } - case HWLOC_OBJ_CACHE: - obj->attr->cache.size = procInfo->Cache.CacheSize; - obj->attr->cache.associativity = procInfo->Cache.Associativity == CACHE_FULLY_ASSOCIATIVE ? -1 : procInfo->Cache.Associativity ; - obj->attr->cache.linesize = procInfo->Cache.LineSize; - obj->attr->cache.depth = procInfo->Cache.Level; - switch (procInfo->Cache.Type) { - case CacheUnified: - obj->attr->cache.type = HWLOC_OBJ_CACHE_UNIFIED; - break; - case CacheData: - obj->attr->cache.type = HWLOC_OBJ_CACHE_DATA; - break; - case CacheInstruction: - obj->attr->cache.type = HWLOC_OBJ_CACHE_INSTRUCTION; - break; - default: - hwloc_free_unlinked_object(obj); - continue; - } - break; - default: - break; - } - hwloc_insert_object_by_cpuset(topology, obj); - } - free(procInfoTotal); - } - } - - /* add PU objects */ - hwloc_setup_pu_level(topology, hwloc_fallback_nbprocessors(topology)); - - hwloc_obj_add_info(topology->levels[0][0], "Backend", "Windows"); - if (topology->is_thissystem) - hwloc_add_uname_info(topology, NULL); - return 1; -} - -void -hwloc_set_windows_hooks(struct hwloc_binding_hooks *hooks, - struct hwloc_topology_support *support) -{ - hooks->set_proc_cpubind = hwloc_win_set_proc_cpubind; - hooks->get_proc_cpubind = hwloc_win_get_proc_cpubind; - hooks->set_thread_cpubind = hwloc_win_set_thread_cpubind; - hooks->set_thisproc_cpubind = hwloc_win_set_thisproc_cpubind; - hooks->get_thisproc_cpubind = hwloc_win_get_thisproc_cpubind; - hooks->set_thisthread_cpubind = hwloc_win_set_thisthread_cpubind; - /* TODO: get_last_cpu_location: use GetCurrentProcessorNumber */ - - hooks->set_proc_membind = hwloc_win_set_proc_membind; - hooks->get_proc_membind = hwloc_win_get_proc_membind; - hooks->set_thisproc_membind = hwloc_win_set_thisproc_membind; - hooks->get_thisproc_membind = hwloc_win_get_thisproc_membind; - hooks->set_thisthread_membind = hwloc_win_set_thisthread_membind; - - if (!hwloc_win_get_VirtualAllocExNumaProc()) { - hooks->alloc_membind = hwloc_win_alloc_membind; - hooks->alloc = hwloc_win_alloc; - hooks->free_membind = hwloc_win_free_membind; - support->membind->bind_membind = 1; - } - - if (!hwloc_win_get_QueryWorkingSetExProc()) - hooks->get_area_membind = hwloc_win_get_area_membind; -} - -static struct hwloc_backend * -hwloc_windows_component_instantiate(struct hwloc_disc_component *component, - const void *_data1 __hwloc_attribute_unused, - const void *_data2 __hwloc_attribute_unused, - const void *_data3 __hwloc_attribute_unused) -{ - struct hwloc_backend *backend; - backend = hwloc_backend_alloc(component); - if (!backend) - return NULL; - backend->discover = hwloc_look_windows; - return backend; -} - -static struct hwloc_disc_component hwloc_windows_disc_component = { - HWLOC_DISC_COMPONENT_TYPE_CPU, - "windows", - HWLOC_DISC_COMPONENT_TYPE_GLOBAL, - hwloc_windows_component_instantiate, - 50, - NULL -}; - -const struct hwloc_component hwloc_windows_component = { - HWLOC_COMPONENT_ABI, - NULL, NULL, - HWLOC_COMPONENT_TYPE_DISC, - 0, - &hwloc_windows_disc_component -}; diff --git a/opal/mca/hwloc/hwloc1110/hwloc1110.h b/opal/mca/hwloc/hwloc1110/hwloc1110.h deleted file mode 100644 index 35215b45576..00000000000 --- a/opal/mca/hwloc/hwloc1110/hwloc1110.h +++ /dev/null @@ -1,45 +0,0 @@ -/* - * Copyright (c) 2011-2013 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2014-2015 Intel, Inc. All rights reserved. - * - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - * - * When this component is used, this file is included in the rest of - * the OPAL/ORTE/OMPI code base via opal/mca/hwloc/hwloc.h. As such, - * this header represents the public interface to this static component. - */ - -#ifndef MCA_OPAL_HWLOC_HWLOC1110_H -#define MCA_OPAL_HWLOC_HWLOC1110_H - -BEGIN_C_DECLS - -#include "hwloc/include/hwloc.h" - -/* If the including file requested it, also include the hwloc verbs - helper file. We can't just always include this file (even if we - know we have ) because there are some inline - functions in that file that invoke ibv_* functions. Some linkers - (e.g., Solaris Studio Compilers) will instantiate those static - inline functions even if we don't use them, and therefore we need - to be able to resolve the ibv_* symbols at link time. - - Since -libverbs is only specified in places where we use other - ibv_* functions (e.g., the OpenFabrics-based BTLs), that means that - linking random executables can/will fail (e.g., orterun). - */ -#if defined(OPAL_HWLOC_WANT_VERBS_HELPER) && OPAL_HWLOC_WANT_VERBS_HELPER -# if defined(HAVE_INFINIBAND_VERBS_H) -# include "hwloc/include/hwloc/openfabrics-verbs.h" -# else -# error Tried to include hwloc verbs helper file, but hwloc was compiled with no OpenFabrics support -# endif -#endif - -END_C_DECLS - -#endif /* MCA_OPAL_HWLOC_HWLOC1110_H */ diff --git a/opal/mca/hwloc/hwloc1110/hwloc1110_component.c b/opal/mca/hwloc/hwloc1110/hwloc1110_component.c deleted file mode 100644 index 02322fca4b4..00000000000 --- a/opal/mca/hwloc/hwloc1110/hwloc1110_component.c +++ /dev/null @@ -1,55 +0,0 @@ -/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ -/* - * Copyright (c) 2011-2013 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2014-2015 Intel, Inc. All rights reserved. - * Copyright (c) 2015 Los Alamos National Security, LLC. All rights - * reserved. - * - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - * - * These symbols are in a file by themselves to provide nice linker - * semantics. Since linkers generally pull in symbols by object - * files, keeping these symbols as the only symbols in this file - * prevents utility programs such as "ompi_info" from having to import - * entire components just to query their version and parameters. - */ - -#include "opal_config.h" -#include "opal/constants.h" - -#include "opal/mca/hwloc/hwloc.h" -#include "hwloc1110.h" - -/* - * Public string showing the sysinfo ompi_linux component version number - */ -const char *opal_hwloc_hwloc1110_component_version_string = - "OPAL hwloc1110 hwloc MCA component version " OPAL_VERSION; - -/* - * Instantiate the public struct with all of our public information - * and pointers to our public functions in it - */ - -const opal_hwloc_component_t mca_hwloc_hwloc1110_component = { - - /* First, the mca_component_t struct containing meta information - about the component itself */ - - .base_version = { - OPAL_HWLOC_BASE_VERSION_2_0_0, - - /* Component name and version */ - .mca_component_name = "hwloc1110", - MCA_BASE_MAKE_VERSION(component, OPAL_MAJOR_VERSION, OPAL_MINOR_VERSION, - OPAL_RELEASE_VERSION), - }, - .base_data = { - /* The component is checkpoint ready */ - MCA_BASE_METADATA_PARAM_CHECKPOINT - } -}; diff --git a/opal/mca/hwloc/hwloc1112/Makefile.am b/opal/mca/hwloc/hwloc1112/Makefile.am new file mode 100644 index 00000000000..5ab0b36f50f --- /dev/null +++ b/opal/mca/hwloc/hwloc1112/Makefile.am @@ -0,0 +1,85 @@ +# +# Copyright (c) 2011-2014 Cisco Systems, Inc. All rights reserved. +# Copyright (c) 2014-2015 Intel, Inc. All right reserved. +# Copyright (c) 2016 Los Alamos National Security, LLC. All rights +# reserved. +# $COPYRIGHT$ +# +# Additional copyrights may follow +# +# $HEADER$ +# + +# Due to what might be a bug in Automake, we need to remove stamp-h? +# files manually. See +# http://debbugs.gnu.org/cgi/bugreport.cgi?bug=19418. +DISTCLEANFILES = \ + hwloc/include/hwloc/autogen/stamp-h? \ + hwloc/include/private/autogen/stamp-h? + +# Need to include these files so that these directories are carried in +# the tarball (in case someone invokes autogen.sh on a dist tarball). +EXTRA_DIST = \ + hwloc/doc/README.txt \ + hwloc/tests/README.txt \ + hwloc/utils/README.txt + +SUBDIRS = hwloc + +# Headers and sources +headers = hwloc1112.h +sources = hwloc1112_component.c + +# We only ever build this component statically +noinst_LTLIBRARIES = libmca_hwloc_hwloc1112.la +libmca_hwloc_hwloc1112_la_SOURCES = $(headers) $(sources) +nodist_libmca_hwloc_hwloc1112_la_SOURCES = $(nodist_headers) +libmca_hwloc_hwloc1112_la_LDFLAGS = -module -avoid-version $(opal_hwloc_hwloc1112_LDFLAGS) +libmca_hwloc_hwloc1112_la_LIBADD = $(opal_hwloc_hwloc1112_LIBS) +libmca_hwloc_hwloc1112_la_DEPENDENCIES = \ + $(HWLOC_top_builddir)/src/libhwloc_embedded.la + +# Since the rest of the code base includes the underlying hwloc.h, we +# also have to install the underlying header files when +# --with-devel-headers is specified. hwloc doesn't support this; the +# least gross way to make this happen is just to list all of hwloc's +# header files here. :-( +headers += \ + hwloc/include/hwloc.h \ + hwloc/include/hwloc/bitmap.h \ + hwloc/include/hwloc/cuda.h \ + hwloc/include/hwloc/cudart.h \ + hwloc/include/hwloc/deprecated.h \ + hwloc/include/hwloc/diff.h \ + hwloc/include/hwloc/gl.h \ + hwloc/include/hwloc/helper.h \ + hwloc/include/hwloc/inlines.h \ + hwloc/include/hwloc/intel-mic.h \ + hwloc/include/hwloc/myriexpress.h \ + hwloc/include/hwloc/nvml.h \ + hwloc/include/hwloc/opencl.h \ + hwloc/include/hwloc/openfabrics-verbs.h \ + hwloc/include/hwloc/plugins.h \ + hwloc/include/hwloc/rename.h \ + hwloc/include/private/private.h \ + hwloc/include/private/debug.h \ + hwloc/include/private/misc.h \ + hwloc/include/private/cpuid-x86.h +nodist_headers = hwloc/include/hwloc/autogen/config.h + +if HWLOC_HAVE_LINUX +headers += \ + hwloc/include/hwloc/linux.h \ + hwloc/include/hwloc/linux-libnuma.h +endif HWLOC_HAVE_LINUX + +if HWLOC_HAVE_SCHED_SETAFFINITY +headers += hwloc/include/hwloc/glibc-sched.h +endif HWLOC_HAVE_SCHED_SETAFFINITY + +# Conditionally install the header files +if WANT_INSTALL_HEADERS +opaldir = $(opalincludedir)/$(subdir) +nobase_opal_HEADERS = $(headers) +nobase_nodist_opal_HEADERS = $(nodist_headers) +endif diff --git a/opal/mca/hwloc/hwloc1112/README-ompi.txt b/opal/mca/hwloc/hwloc1112/README-ompi.txt new file mode 100644 index 00000000000..60d73d103ad --- /dev/null +++ b/opal/mca/hwloc/hwloc1112/README-ompi.txt @@ -0,0 +1,4 @@ +Applied the following patches from the upstream hwloc 1.11 branch after +the v1.11.1 release: + +...none yet... diff --git a/opal/mca/hwloc/hwloc1112/configure.m4 b/opal/mca/hwloc/hwloc1112/configure.m4 new file mode 100644 index 00000000000..e103918a0b5 --- /dev/null +++ b/opal/mca/hwloc/hwloc1112/configure.m4 @@ -0,0 +1,187 @@ +# -*- shell-script -*- +# +# Copyright (c) 2009-2015 Cisco Systems, Inc. All rights reserved. +# Copyright (c) 2014-2015 Intel, Inc. All rights reserved. +# Copyright (c) 2015-2016 Research Organization for Information Science +# and Technology (RIST). All rights reserved. +# Copyright (c) 2016 Los Alamos National Security, LLC. All rights +# reserved. +# +# $COPYRIGHT$ +# +# Additional copyrights may follow +# +# $HEADER$ +# + +# +# Priority +# +AC_DEFUN([MCA_opal_hwloc_hwloc1112_PRIORITY], [90]) + +# +# Force this component to compile in static-only mode +# +AC_DEFUN([MCA_opal_hwloc_hwloc1112_COMPILE_MODE], [ + AC_MSG_CHECKING([for MCA component $2:$3 compile mode]) + $4="static" + AC_MSG_RESULT([$$4]) +]) + +# Include hwloc m4 files +m4_include(opal/mca/hwloc/hwloc1112/hwloc/config/hwloc.m4) +m4_include(opal/mca/hwloc/hwloc1112/hwloc/config/hwloc_pkg.m4) +m4_include(opal/mca/hwloc/hwloc1112/hwloc/config/hwloc_check_attributes.m4) +m4_include(opal/mca/hwloc/hwloc1112/hwloc/config/hwloc_check_visibility.m4) +m4_include(opal/mca/hwloc/hwloc1112/hwloc/config/hwloc_check_vendor.m4) +m4_include(opal/mca/hwloc/hwloc1112/hwloc/config/hwloc_components.m4) + +# MCA_hwloc_hwloc1112_POST_CONFIG() +# --------------------------------- +AC_DEFUN([MCA_opal_hwloc_hwloc1112_POST_CONFIG],[ + OPAL_VAR_SCOPE_PUSH([opal_hwloc_hwloc1112_basedir]) + + # If we won, then do all the rest of the setup + AS_IF([test "$1" = "1" && test "$opal_hwloc_hwloc1112_support" = "yes"], + [ + # Set this variable so that the framework m4 knows what + # file to include in opal/mca/hwloc/hwloc.h + opal_hwloc_hwloc1112_basedir=opal/mca/hwloc/hwloc1112 + opal_hwloc_base_include="$opal_hwloc_hwloc1112_basedir/hwloc1112.h" + + # Add some stuff to CPPFLAGS so that the rest of the source + # tree can be built + file=$opal_hwloc_hwloc1112_basedir/hwloc + CPPFLAGS="$CPPFLAGS -I$OPAL_TOP_SRCDIR/$file/include" + AS_IF([test "$OPAL_TOP_BUILDDIR" != "$OPAL_TOP_SRCDIR"], + [CPPFLAGS="$CPPFLAGS -I$OPAL_TOP_BUILDDIR/$file/include"]) + unset file + ]) + OPAL_VAR_SCOPE_POP + + # This must be run unconditionally + HWLOC_DO_AM_CONDITIONALS +])dnl + + +# MCA_hwloc_hwloc1112_CONFIG([action-if-found], [action-if-not-found]) +# -------------------------------------------------------------------- +AC_DEFUN([MCA_opal_hwloc_hwloc1112_CONFIG],[ + # Hwloc needs to know if we have Verbs support + AC_REQUIRE([OPAL_CHECK_VERBS_DIR]) + + AC_CONFIG_FILES([opal/mca/hwloc/hwloc1112/Makefile]) + + OPAL_VAR_SCOPE_PUSH([HWLOC_VERSION opal_hwloc_hwloc1112_save_CPPFLAGS opal_hwloc_hwloc1112_save_LDFLAGS opal_hwloc_hwloc1112_save_LIBS opal_hwloc_hwloc1112_save_cairo opal_hwloc_hwloc1112_save_xml opal_hwloc_hwloc1112_basedir opal_hwloc_hwloc1112_file opal_hwloc_hwloc1112_save_cflags CPPFLAGS_save LIBS_save opal_hwloc_external]) + + # default to this component not providing support + opal_hwloc_hwloc1112_basedir=opal/mca/hwloc/hwloc1112 + opal_hwloc_hwloc1112_support=no + + AS_IF([test "$with_hwloc" = "internal" || test -z "$with_hwloc" || test "$with_hwloc" = "yes"], + [opal_hwloc_external="no"], + [opal_hwloc_external="yes"]) + + opal_hwloc_hwloc1112_save_CPPFLAGS=$CPPFLAGS + opal_hwloc_hwloc1112_save_LDFLAGS=$LDFLAGS + opal_hwloc_hwloc1112_save_LIBS=$LIBS + + # Run the hwloc configuration - if no external hwloc, then set the prefix + # to minimize the chance that someone will use the internal symbols + AS_IF([test "$opal_hwloc_external" = "no"], + [HWLOC_SET_SYMBOL_PREFIX([opal_hwloc1112_])]) + + # save XML or graphical options + opal_hwloc_hwloc1112_save_cairo=$enable_cairo + opal_hwloc_hwloc1112_save_xml=$enable_xml + opal_hwloc_hwloc1112_save_static=$enable_static + opal_hwloc_hwloc1112_save_shared=$enable_shared + opal_hwloc_hwloc1112_save_plugins=$enable_plugins + + # never enable hwloc's graphical option + enable_cairo=no + + # never enable hwloc's plugin system + enable_plugins=no + enable_static=yes + enable_shared=no + + # Override -- disable hwloc's libxml2 support, but enable the + # native hwloc XML support + enable_libxml2=no + enable_xml=yes + + # hwloc checks for compiler visibility, and its needs to do + # this without "picky" flags. + opal_hwloc_hwloc1112_save_cflags=$CFLAGS + CFLAGS=$OPAL_CFLAGS_BEFORE_PICKY + HWLOC_SETUP_CORE([opal/mca/hwloc/hwloc1112/hwloc], + [AC_MSG_CHECKING([whether hwloc configure succeeded]) + AC_MSG_RESULT([yes]) + HWLOC_VERSION="internal v`$srcdir/$opal_hwloc_hwloc1112_basedir/hwloc/config/hwloc_get_version.sh $srcdir/$opal_hwloc_hwloc1112_basedir/hwloc/VERSION`" + + # Build flags for our Makefile.am + opal_hwloc_hwloc1112_LDFLAGS='$(HWLOC_EMBEDDED_LDFLAGS)' + opal_hwloc_hwloc1112_LIBS='$(OPAL_TOP_BUILDDIR)/'"$opal_hwloc_hwloc1112_basedir"'/hwloc/src/libhwloc_embedded.la $(HWLOC_EMBEDDED_LIBS)' + opal_hwloc_hwloc1112_support=yes + + AC_DEFINE_UNQUOTED([HWLOC_HWLOC1112_HWLOC_VERSION], + ["$HWLOC_VERSION"], + [Version of hwloc]) + + # Do we have verbs support? + CPPFLAGS_save=$CPPFLAGS + AS_IF([test "$opal_want_verbs" = "yes"], + [CPPFLAGS="-I$opal_verbs_dir/include $CPPFLAGS"]) + AC_CHECK_HEADERS([infiniband/verbs.h]) + CPPFLAGS=$CPPFLAGS_save + ], + [AC_MSG_CHECKING([whether hwloc configure succeeded]) + AC_MSG_RESULT([no]) + opal_hwloc_hwloc1112_support=no]) + CFLAGS=$opal_hwloc_hwloc1112_save_cflags + + # Restore some env variables, if necessary + AS_IF([test -n "$opal_hwloc_hwloc1112_save_cairo"], + [enable_cairo=$opal_hwloc_hwloc1112_save_cairo]) + AS_IF([test -n "$opal_hwloc_hwloc1112_save_xml"], + [enable_xml=$opal_hwloc_hwloc1112_save_xml]) + AS_IF([test -n "$opal_hwloc_hwloc1112_save_static"], + [enable_static=$opal_hwloc_hwloc1112_save_static]) + AS_IF([test -n "$opal_hwloc_hwloc1112_save_shared"], + [enable_shared=$opal_hwloc_hwloc1112_save_shared]) + AS_IF([test -n "$opal_hwloc_hwloc1112_save_plugins"], + [enable_plugins=$opal_hwloc_hwloc1112_save_shared]) + + CPPFLAGS=$opal_hwloc_hwloc1112_save_CPPFLAGS + LDFLAGS=$opal_hwloc_hwloc1112_save_LDFLAGS + LIBS=$opal_hwloc_hwloc1112_save_LIBS + + AC_SUBST([opal_hwloc_hwloc1112_CFLAGS]) + AC_SUBST([opal_hwloc_hwloc1112_CPPFLAGS]) + AC_SUBST([opal_hwloc_hwloc1112_LDFLAGS]) + AC_SUBST([opal_hwloc_hwloc1112_LIBS]) + + # Finally, add some flags to the wrapper compiler so that our + # headers can be found. + hwloc_hwloc1112_WRAPPER_EXTRA_LDFLAGS="$HWLOC_EMBEDDED_LDFLAGS" + hwloc_hwloc1112_WRAPPER_EXTRA_LIBS="$HWLOC_EMBEDDED_LIBS" + hwloc_hwloc1112_WRAPPER_EXTRA_CPPFLAGS='-I${pkgincludedir}/'"$opal_hwloc_hwloc1112_basedir/hwloc/include" + + # If we are not building the internal hwloc, then indicate that + # this component should not be built. NOTE: we still did all the + # above configury so that all the proper GNU Autotools + # infrastructure is setup properly (e.g., w.r.t. SUBDIRS=hwloc in + # this directory's Makefile.am, we still need the Autotools "make + # distclean" infrastructure to work properly). + AS_IF([test "$opal_hwloc_external" = "yes"], + [AC_MSG_WARN([using an external hwloc; disqualifying this component]) + opal_hwloc_hwloc1112_support=no]) + + # Done! + AS_IF([test "$opal_hwloc_hwloc1112_support" = "yes"], + [$1], + [$2]) + + OPAL_VAR_SCOPE_POP +])dnl diff --git a/opal/mca/hwloc/hwloc1110/hwloc/AUTHORS b/opal/mca/hwloc/hwloc1112/hwloc/AUTHORS similarity index 77% rename from opal/mca/hwloc/hwloc1110/hwloc/AUTHORS rename to opal/mca/hwloc/hwloc1112/hwloc/AUTHORS index 837b27f2ca9..0e52215789f 100644 --- a/opal/mca/hwloc/hwloc1110/hwloc/AUTHORS +++ b/opal/mca/hwloc/hwloc1112/hwloc/AUTHORS @@ -1,8 +1,10 @@ Cédric Augonnet +Guillaume Beauchamp Jérôme Clet-Ortega Ludovic Courtès -Brice Goglin Nathalie Furmento -Samuel Thibault -Jeff Squyres +Brice Goglin Alexey Kardashevskiy +Antoine Rougier (University of Bordeaux intern) +Jeff Squyres +Samuel Thibault diff --git a/opal/mca/hwloc/hwloc1110/hwloc/COPYING b/opal/mca/hwloc/hwloc1112/hwloc/COPYING similarity index 100% rename from opal/mca/hwloc/hwloc1110/hwloc/COPYING rename to opal/mca/hwloc/hwloc1112/hwloc/COPYING diff --git a/opal/mca/hwloc/hwloc1110/hwloc/Makefile.am b/opal/mca/hwloc/hwloc1112/hwloc/Makefile.am similarity index 100% rename from opal/mca/hwloc/hwloc1110/hwloc/Makefile.am rename to opal/mca/hwloc/hwloc1112/hwloc/Makefile.am diff --git a/opal/mca/hwloc/hwloc1112/hwloc/NEWS b/opal/mca/hwloc/hwloc1112/hwloc/NEWS new file mode 100644 index 00000000000..d81dfddf8a3 --- /dev/null +++ b/opal/mca/hwloc/hwloc1112/hwloc/NEWS @@ -0,0 +1,1262 @@ +Copyright © 2009 CNRS +Copyright © 2009-2016 Inria. All rights reserved. +Copyright © 2009-2013 Université Bordeaux +Copyright © 2009-2011 Cisco Systems, Inc. All rights reserved. + +$COPYRIGHT$ + +Additional copyrights may follow + +$HEADER$ + +=========================================================================== + +This file contains the main features as well as overviews of specific +bug fixes (and other actions) for each version of hwloc since version +0.9 (as initially released as "libtopology", then re-branded to "hwloc" +in v0.9.1). + + +Version 1.11.4 +-------------- +* Fix Linux build with -m32 with respect to libudev. + Thanks to Paul Hargrove for reporting the issue. + + +Version 1.11.3 +-------------- +* Fix /proc/mounts parsing on Linux by using mntent.h. + Thanks to Nathan Hjelm for reporting the issue. + + +Version 1.11.2 +-------------- +* Improve support for Intel Knights Landing Xeon Phi on Linux: + + Group local NUMA nodes of normal memory (DDR) and high-bandwidth memory + (MCDRAM) together through "Cluster" groups so that the local MCDRAM is + easy to find. + - See "How do I find the local MCDRAM NUMA node on Intel Knights + Landing Xeon Phi?" in the documentation. + - For uniformity across all KNL configurations, always have a NUMA node + object even if the host is UMA. + + Fix the detection of the memory-side cache: + - Add the hwloc-dump-hwdata superuser utility to dump SMBIOS information + into /var/run/hwloc/ as root during boot, and load this dumped + information from the hwloc library at runtime. + - See "Why do I need hwloc-dump-hwdata for caches on Intel Knights + Landing Xeon Phi?" in the documentation. + Thanks to Grzegorz Andrejczuk for the patches and for the help. +* The x86 and linux backends may now be combined for discovering CPUs + through x86 CPUID and memory from the Linux kernel. + This is useful for working around buggy CPU information reported by Linux + (for instance the AMD Bulldozer/Piledriver bug below). + Combination is enabled by passing HWLOC_COMPONENTS=x86 in the environment. +* Fix L3 cache sharing on AMD Opteron 63xx (Piledriver) and 62xx (Bulldozer) + in the x86 backend. Thanks to many users who helped. +* Fix the overzealous L3 cache sharing fix added to the x86 backend in 1.11.1 + for AMD Opteron 61xx (Magny-Cours) processors. +* The x86 backend may now add the info attribute Inclusive=0 or 1 to caches + it discovers, or to caches discovered by other backends earlier. + Thanks to Guillaume Beauchamp for the patch. +* Fix the management on alloc_membind() allocation failures on AIX, HP-UX + and OSF/Tru64. +* Fix spurious failures to load with ENOMEM on AIX in case of Misc objects + below PUs. +* lstopo improvements in X11 and Windows graphical mode: + + Add + - f 1 shortcuts to manually zoom-in, zoom-out, reset the scale, + or fit the entire window. + + Display all keyboard shortcuts in the console. +* Debug messages may be disabled at runtime by passing HWLOC_DEBUG_VERBOSE=0 + in the environment when --enable-debug was passed to configure. +* Add a FAQ entry "What are these Group objects in my topology?". + + +Version 1.11.1 +-------------- +* Detection fixes + + Hardwire the topology of Fujitsu K-computer, FX10, FX100 servers to + workaround buggy Linux kernels. + Thanks to Takahiro Kawashima and Gilles Gouaillardet. + + Fix L3 cache information on AMD Opteron 61xx Magny-Cours processors + in the x86 backend. Thanks to Guillaume Beauchamp for the patch. + + Detect block devices directly attached to PCI without a controller, + for instance NVMe disks. Thanks to Barry M. Tannenbaum. + + Add the PCISlot attribute to all PCI functions instead of only the + first one. +* Miscellaneous internal fixes + + Ignore PCI bridges that could fail assertions by reporting buggy + secondary-subordinate bus numbers + Thanks to George Bosilca for reporting the issue. + + Fix an overzealous assertion when inserting an intermediate Group object + while Groups are totally ignored. + + Fix a memory leak on Linux on AMD processors with dual-core compute units. + Thanks to Bob Benner. + + Fix a memory leak on failure to load a xml diff file. + + Fix some segfaults when inputting an invalid synthetic description. + + Fix a segfault when plugins fail to find core symbols. + Thanks to Guy Streeter. +* Many fixes and improvements in the Windows backend: + + Fix the discovery of more than 32 processors and multiple processor + groups. Thanks to Barry M. Tannenbaum for the help. + + Add thread binding set support in case of multiple process groups. + + Add thread binding get support. + + Add get_last_cpu_location() support for the current thread. + + Disable the unsupported process binding in case of multiple processor + groups. + + Fix/update the Visual Studio support under contrib/windows. + Thanks to Eloi Gaudry for the help. +* Tools fixes + + Fix a segfault when displaying logical indexes in the graphical lstopo. + Thanks to Guillaume Mercier for reporting the issue. + + Fix lstopo linking with X11 libraries, for instance on Mac OS X. + Thanks to Scott Atchley and Pierre Ramet for reporting the issue. + + hwloc-annotate, hwloc-diff and hwloc-patch do not drop unavailable + resources from the output anymore and those may be annotated as well. + + Command-line tools may now import XML from the standard input with -i -.xml + + Add missing documentation for the hwloc-info --no-icaches option. + + +Version 1.11.0 +-------------- +* API + + Socket objects are renamed into Package to align with the terminology + used by processor vendors. The old HWLOC_OBJ_SOCKET type and "Socket" + name are still supported for backward compatibility. + + HWLOC_OBJ_NODE is replaced with HWLOC_OBJ_NUMANODE for clarification. + HWLOC_OBJ_NODE is still supported for backward compatibility. + "Node" and "NUMANode" strings are supported as in earlier releases. +* Detection improvements + + Add support for Intel Knights Landing Xeon Phi. + Thanks to Grzegorz Andrejczuk and Lukasz Anaczkowski. + + Add Vendor, Model, Revision, SerialNumber, Type and LinuxDeviceID + info attributes to Block OS devices on Linux. Thanks to Vineet Pedaballe + for the help. + - Add --disable-libudev to avoid dependency on the libudev library. + + Add "MemoryModule" Misc objects with information about DIMMs, on Linux + when privileged and when I/O is enabled. + Thanks to Vineet Pedaballe for the help. + + Add a PCISlot attribute to PCI devices on Linux when supported to + identify the physical PCI slot where the board is plugged. + + Add CPUStepping info attribute on x86 processors, + thanks to Thomas Röhl for the suggestion. + + Ignore the device-tree on non-Power architectures to avoid buggy + detection on ARM. Thanks to Orion Poplawski for reporting the issue. + + Work-around buggy Xeon E5v3 BIOS reporting invalid PCI-NUMA affinity + for the PCI links on the second processor. + + Add support for CUDA compute capability 5.x, thanks Benjamin Worpitz. + + Many fixes to the x86 backend + - Add L1i and fix L2/L3 type on old AMD processors without topoext support. + - Fix Intel CPU family and model numbers when basic family isn't 6 or 15. + - Fix package IDs on recent AMD processors. + - Fix misc issues due to incomplete APIC IDs on x2APIC processors. + - Avoid buggy discovery on old SGI Altix UVs with non-unique APIC IDs. + + Gather total machine memory on NetBSD. +* Tools + + lstopo + - Collapse identical PCI devices unless --no-collapse is given. + This avoids gigantic outputs when a PCI device contains dozens of + identical virtual functions. + - The ASCII art output is now called "ascii", for instance in + "lstopo -.ascii". + The former "txt" extension is retained for backward compatibility. + - Automatically scales graphical box width to the inner text in Cairo, + ASCII and Windows outputs. + - Add --rect to lstopo to force rectangular layout even for NUMA nodes. + - Add --restrict-flags to configure the behavior of --restrict. + - Objects may have a "Type" info attribute to specify a better type name + and display it in lstopo. + - Really export all verbose information to the given output file. + + hwloc-annotate + - May now operate on all types of objects, including I/O. + - May now insert Misc objects in the topology. + - Do not drop instruction caches and I/O devices from the output anymore. + + Fix lstopo path in hwloc-gather-topology after install. +* Misc + + Fix hwloc/cudart.h for machines with multiple PCI domains, + thanks to Imre Kerr for reporting the problem. + + Fix PCI Bridge-specific depth attribute. + + Fix hwloc_bitmap_intersect() for two infinite bitmaps. + + Fix some corner cases in the building of levels on large NUMA machines + with non-uniform NUMA groups and I/Os. + + Improve the performance of object insertion by cpuset for large + topologies. + + Prefix verbose XML import errors with the source name. + + Improve pkg-config checks and error messages. + + Fix excluding after a component with an argument in the HWLOC_COMPONENTS + environment variable. +* Documentation + + Fix the recommended way in documentation and examples to allocate memory + on some node, it should use HWLOC_MEMBIND_BIND. + Thanks to Nicolas Bouzat for reporting the issue. + + Add a "Miscellaneous objects" section in the documentation. + + Add a FAQ entry "What happens to my topology if I disable symmetric + multithreading, hyper-threading, etc. ?" to the documentation. + + +Version 1.10.1 +-------------- +* Actually remove disallowed NUMA nodes from nodesets when the whole-system + flag isn't enabled. +* Fix the gathering of PCI domains. Thanks to James Custer for reporting + the issue and providing a patch. +* Fix the merging of identical parent and child in presence of Misc objects. + Thanks to Dave Love for reporting the issue. +* Fix some misordering of children when merging with ignore_keep_structure() + in partially allowed topologies. +* Fix an overzealous assertion in the debug code when running on a single-PU + host with I/O. Thanks to Thomas Van Doren for reporting the issue. +* Don't forget to setup NUMA node object nodesets in x86 backend (for BSDs) + and OSF/Tru64 backend. +* Fix cpuid-x86 build error with gcc -O3 on x86-32. Thanks to Thomas Van Doren + for reporting the issue. +* Fix support for future very large caches in the x86 backend. +* Fix vendor/device names for SR-IOV PCI devices on Linux. +* Fix an unlikely crash in case of buggy hierarchical distance matrix. +* Fix PU os_index on some AIX releases. Thanks to Hendryk Bockelmann and + Erik Schnetter for helping debugging. +* Fix hwloc_bitmap_isincluded() in case of infinite sets. +* Change hwloc-ls.desktop into a lstopo.desktop and only install it if + lstopo is built with Cairo/X11 support. It cannot work with a non-graphical + lstopo or hwloc-ls. +* Add support for the renaming of Socket into Package in future releases. +* Add support for the replacement of HWLOC_OBJ_NODE with HWLOC_OBJ_NUMANODE + in future releases. +* Clarify the documentation of distance matrices in hwloc.h and in the manpage + of the hwloc-distances. Thanks to Dave Love for the suggestion. +* Improve some error messages by displaying more information about the + hwloc library in use. +* Document how to deal with the ABI break when upgrading to the upcoming 2.0 + See "How do I handle ABI breaks and API upgrades ?" in the FAQ. + + +Version 1.10.0 +-------------- +* API + + Add hwloc_topology_export_synthetic() to export a topology to a + synthetic string without using lstopo. See the Synthetic topologies + section in the documentation. + + Add hwloc_topology_set/get_userdata() to let the application save + a private pointer in the topology whenever it needs a way to find + its own object corresponding to a topology. + + Add hwloc_get_numanode_obj_by_os_index() and document that this function + as well as hwloc_get_pu_obj_by_os_index() are good at converting + nodesets and cpusets into objects. + + hwloc_distrib() does not ignore any objects anymore when there are + too many of them. They get merged with others instead. + Thanks to Tim Creech for reporting the issue. +* Tools + + hwloc-bind --get now executes the command after displaying + the binding instead of ignoring the command entirely. + Thanks to John Donners for the suggestion. + + Clarify that memory sizes shown in lstopo are local by default + unless specified (total memory added in the root object). +* Synthetic topologies + + Synthetic topology descriptions may now specify attributes such as + memory sizes and OS indexes. See the Synthetic topologies section + in the documentation. + + lstopo now exports in this fully-detailed format by default. + The new option --export-synthetic-flags may be used to revert + back the old format. +* Documentation + + Add the doc/examples/ subdirectory with several real-life examples, + including the already existing hwloc-hello.C for basics. + Thanks to Rob Aulwes for the suggestion. + + Improve the documentation of CPU and memory binding in the API. + + Add a FAQ entry about operating system errors, especially on AMD + platforms with buggy cache information. + + Add a FAQ entry about loading many topologies in a single program. +* Misc + + Work around buggy Linux kernels reporting 2 sockets instead + 1 socket with 2 NUMA nodes for each Xeon E5 v3 (Haswell) processor. + + pciutils/libpci support is now removed since libpciaccess works + well and there's also a Linux-specific PCI backend. For the record, + pciutils was GPL and therefore disabled by default since v1.6.2. + + Add --disable-cpuid configure flag to work around buggy processor + simulators reporting invalid CPUID information. + Thanks for Andrew Friedley for reporting the issue. + + Fix a racy use of libltdl when manipulating multiple topologies in + different threads. + Thanks to Andra Hugo for reporting the issue and testing patches. + + Fix some build failures in private/misc.h. + Thanks to Pavan Balaji and Ralph Castain for the reports. + + Fix failures to detect X11/Xutil.h on some Solaris platforms. + Thanks to Siegmar Gross for reporting the failure. + + The plugin ABI has changed, this release will not load plugins + built against previous hwloc releases. + + +Version 1.9.1 +------------- +* Fix a crash when the PCI locality is invalid. Attach to the root object + instead. Thanks to Nicolas Denoyelle for reporting the issue. +* Fix -f in lstopo manpage. Thanks to Jirka Hladky for reporting the issue. +* Fix hwloc_obj_type_sscanf() and others when strncasecmp() is not properly + available. Thanks to Nick Papior Andersen for reporting the problem. +* Mark Linux file descriptors as close-on-exec to avoid leaks on exec. +* Fix some minor memory leaks. + + +Version 1.9.0 +------------- +* API + + Add hwloc_obj_type_sscanf() to extend hwloc_obj_type_of_string() with + type-specific attributes such as Cache/Group depth and Cache type. + hwloc_obj_type_of_string() is moved to hwloc/deprecated.h. + + Add hwloc_linux_get_tid_last_cpu_location() for retrieving the + last CPU where a Linux thread given by TID ran. + + Add hwloc_distrib() to extend the old hwloc_distribute[v]() functions. + hwloc_distribute[v]() is moved to hwloc/deprecated.h. + + Don't mix total and local memory when displaying verbose object attributes + with hwloc_obj_attr_snprintf() or in lstopo. +* Backends + + Add CPUVendor, CPUModelNumber and CPUFamilyNumber info attributes for + x86, ia64 and Xeon Phi sockets on Linux, to extend the x86-specific + support added in v1.8.1. Requested by Ralph Castain. + + Add many CPU- and Platform-related info attributes on ARM and POWER + platforms, in the Machine and Socket objects. + + Add CUDA info attributes describing the number of multiprocessors and + cores and the size of the global, shared and L2 cache memories in CUDA + OS devices. + + Add OpenCL info attributes describing the number of compute units and + the global memory size in OpenCL OS devices. + + The synthetic backend now accepts extended types such as L2Cache, L1i or + Group3. lstopo also exports synthetic strings using these extended types. +* Tools + + lstopo + - Do not overwrite output files by default anymore. + Pass -f or --force to enforce it. + - Display OpenCL, CUDA and Xeon Phi numbers of cores and memory sizes + in the graphical output. + - Fix export to stdout when specifying a Cairo-based output type + with --of. + + hwloc-ps + - Add -e or --get-last-cpu-location to report where processes/threads + run instead of where they are bound. + - Report locations as likely-more-useful objects such as Cores or Sockets + instead of Caches when possible. + + hwloc-bind + - Fix failure on Windows when not using --pid. + - Add -e as a synonym to --get-last-cpu-location. + + hwloc-distrib + - Add --reverse to distribute using last objects first and singlify + into last bits first. Thanks to Jirka Hladky for the suggestion. + + hwloc-info + - Report unified caches when looking for data or instruction cache + ancestor objects. +* Misc + + Add experimental Visual Studio support under contrib/windows. + Thanks to Eloi Gaudry for his help and for providing the first draft. + + Fix some overzealous assertions and warnings about the ordering of + objects on a level with respect to cpusets. The ordering is only + guaranteed for complete cpusets (based on the first bit in sets). + + Fix some memory leaks when importing xml diffs and when exporting a + "too complex" entry. + + +Version 1.8.1 +------------- +* Fix the cpuid code on Windows 64bits so that the x86 backend gets + enabled as expected and can populate CPU information. + Thanks to Robin Scher for reporting the problem. +* Add CPUVendor/CPUModelNumber/CPUFamilyNumber attributes when running + on x86 architecture. Thanks to Ralph Castain for the suggestion. +* Work around buggy BIOS reporting duplicate NUMA nodes on Linux. + Thanks to Jeff Becker for reporting the problem and testing the patch. +* Add a name to the lstopo graphical window. Thanks to Michael Prokop + for reporting the issue. + + +Version 1.8.0 +------------- +* New components + + Add the "linuxpci" component that always works on Linux even when + libpciaccess and libpci aren't available (and even with a modified + file-system root). By default the old "pci" component runs first + because "linuxpci" lacks device names (obj->name is always NULL). +* API + + Add the topology difference API in hwloc/diff.h for manipulating + many similar topologies. + + Add hwloc_topology_dup() for duplicating an entire topology. + + hwloc.h and hwloc/helper.h have been reorganized to clarify the + documentation sections. The actual inline code has moved out of hwloc.h + into the new hwloc/inlines.h. + + Deprecated functions are now in hwloc/deprecated.h, and not in the + official documentation anymore. +* Tools + + Add hwloc-diff and hwloc-patch tools together with the new diff API. + + Add hwloc-compress-dir to (de)compress an entire directory of XML files + using hwloc-diff and hwloc-patch. + + Object colors in the graphical output of lstopo may be changed by adding + a "lstopoStyle" info attribute. See CUSTOM COLORS in the lstopo(1) manpage + for details. Thanks to Jirka Hladky for discussing the idea. + + hwloc-gather-topology may now gather I/O-related files on Linux when + --io is given. Only the linuxpci component supports discovering I/O + objects from these extended tarballs. + + hwloc-annotate now supports --ri to remove/replace info attributes with + a given name. + + hwloc-info supports "root" and "all" special locations for dumping + information about the root object. + + lstopo now supports --append-legend to append custom lines of text + to the legend in the graphical output. Thanks to Jirka Hladky for + discussing the idea. + + hwloc-calc and friends have a more robust parsing of locations given + on the command-line and they report useful error messages about it. + + Add --whole-system to hwloc-bind, hwloc-calc, hwloc-distances and + hwloc-distrib, and add --restrict to hwloc-bind for uniformity among + tools. +* Misc + + Calling hwloc_topology_load() or hwloc_topology_set_*() on an already + loaded topology now returns an error (deprecated since release 1.6.1). + + Fix the initialisation of cpusets and nodesets in Group objects added + when inserting PCI hostbridges. + + Never merge Group objects that were added explicitly by the user with + hwloc_custom_insert_group_object_by_parent(). + + Add a sanity check during dynamic plugin loading to prevent some + crashes when hwloc is dynamically loaded by another plugin mechanisms. + + Add --with-hwloc-plugins-path to specify the install/load directories + of plugins. + + Add the MICSerialNumber info attribute to the root object when running + hwloc inside a Xeon Phi to match the same attribute in the MIC OS device + when running in the host. + + +Version 1.7.2 +------------- +* Do not create invalid block OS devices on very old Linux kernel such + as RHEL4 2.6.9. +* Fix PCI subvendor/device IDs. +* Fix the management of Misc objects inserted by parent. + Thanks to Jirka Hladky for reporting the problem. +* Add a PortState into attribute to OpenFabrics OS devices. +* Add a MICSerialNumber info attribute to Xeon PHI/MIC OS devices. +* Improve verbose error messages when failing to load from XML. + + +Version 1.7.1 +------------- +* Fix a failed assertion in the distance grouping code when loading a XML + file that already contains some groups. + Thanks to Laercio Lima Pilla for reporting the problem. +* Remove unexpected Group objects when loading XML topologies with I/O + objects and NUMA distances. + Thanks to Elena Elkina for reporting the problem and testing patches. +* Fix PCI link speed discovery when using libpciaccess. +* Fix invalid libpciaccess virtual function device/vendor IDs when using + SR-IOV PCI devices on Linux. +* Fix GL component build with old NVCtrl releases. + Thanks to Jirka Hladky for reporting the problem. +* Fix embedding breakage caused by libltdl. + Thanks to Pavan Balaji for reporting the problem. +* Always use the system-wide libltdl instead of shipping one inside hwloc. +* Document issues when enabling plugins while embedding hwloc in another + project, in the documentation section Embedding hwloc in Other Software. +* Add a FAQ entry "How to get useful topology information on NetBSD?" + in the documentation. +* Somes fixes in the renaming code for embedding. +* Miscellaneous minor build fixes. + + +Version 1.7.0 +------------- +* New operating system backends + + Add BlueGene/Q compute node kernel (CNK) support. See the FAQ in the + documentation for details. Thanks to Jeff Hammond, Christopher Samuel + and Erik Schnetter for their help. + + Add NetBSD support, thanks to Aleksej Saushev. +* New I/O device discovery + + Add co-processor OS devices such as "mic0" for Intel Xeon Phi (MIC) + on Linux. Thanks to Jerome Vienne for helping. + + Add co-processor OS devices such as "cuda0" for NVIDIA CUDA-capable GPUs. + + Add co-processor OS devices such as "opencl0d0" for OpenCL GPU devices + on the AMD OpenCL implementation. + + Add GPU OS devices such as ":0.0" for NVIDIA X11 displays. + + Add GPU OS devices such as "nvml0" for NVIDIA GPUs. + Thanks to Marwan Abdellah and Stefan Eilemann for helping. + These new OS devices have some string info attributes such as CoProcType, + GPUModel, etc. to better identify them. + See the I/O Devices and Attributes documentation sections for details. +* New components + + Add the "opencl", "cuda", "nvml" and "gl" components for I/O device + discovery. + + "nvml" also improves the discovery of NVIDIA GPU PCIe link speed. + All of these new components may be built as plugins. They may also be + disabled entirely by passing --disable-opencl/cuda/nvml/gl to configure. + See the I/O Devices, Components and Plugins, and FAQ documentation + sections for details. +* API + + Add hwloc_topology_get_flags(). + + Add hwloc/plugins.h for building external plugins. + See the Adding new discovery components and plugins section. +* Interoperability + + Add hwloc/opencl.h, hwloc/nvml.h, hwloc/gl.h and hwloc/intel-mic.h + to retrieve the locality of OS devices that correspond to AMD OpenCL + GPU devices or indexes, to NVML devices or indexes, to NVIDIA X11 + displays, or to Intel Xeon Phi (MIC) device indexes. + + Add new helpers in hwloc/cuda.h and hwloc/cudart.h to convert + between CUDA devices or indexes and hwloc OS devices. + + Add hwloc_ibv_get_device_osdev() and clarify the requirements + of the OpenFabrics Verbs helpers in hwloc/openfabrics-verbs.h. +* Tools + + hwloc-info is not only a synonym of lstopo -s anymore, it also + dumps information about objects given on the command-line. +* Documentation + + Add a section "Existing components and plugins". + + Add a list of common OS devices in section "Software devices". + + Add a new FAQ entry "Why is lstopo slow?" about lstopo slowness + issues because of GPUs. + + Clarify the documentation of inline helpers in hwloc/myriexpress.h + and hwloc/openfabrics-verbs.h. +* Misc + + Improve cache detection on AIX. + + The HWLOC_COMPONENTS variable now excludes the components whose + names are prefixed with '-'. + + lstopo --ignore PU now works when displaying the topology in + graphical and textual mode (not when exporting to XML). + + Make sure I/O options always appear in lstopo usage, not only when + using pciutils/libpci. + + Remove some unneeded Linux specific includes from some interoperability + headers. + + Fix some inconsistencies in hwloc-distrib and hwloc-assembler-remote + manpages. Thanks to Guy Streeter for the report. + + Fix a memory leak on AIX when getting memory binding. + + Fix many small memory leaks on Linux. + + The `libpci' component is now called `pci' but the old name is still + accepted in the HWLOC_COMPONENTS variable for backward compatibility. + + +Version 1.6.2 +------------- +* Use libpciaccess instead of pciutils/libpci by default for I/O discovery. + pciutils/libpci is only used if --enable-libpci is given to configure + because its GPL license may taint hwloc. See the Installation section + in the documentation for details. +* Fix get_cpubind on Solaris when bound to a single PU with + processor_bind(). Thanks to Eugene Loh for reporting the problem + and providing a patch. + + +Version 1.6.1 +------------- +* Fix some crash or buggy detection in the x86 backend when Linux + cgroups/cpusets restrict the available CPUs. +* Fix the pkg-config output with --libs --static. + Thanks to Erik Schnetter for reporting one of the problems. +* Fix the output of hwloc-calc -H --hierarchical when using logical + indexes in the output. +* Calling hwloc_topology_load() multiple times on the same topology + is officially deprecated. hwloc will warn in such cases. +* Add some documentation about existing plugins/components, package + dependencies, and I/O devices specification on the command-line. + + +Version 1.6.0 +------------- +* Major changes + + Reorganize the backend infrastructure to support dynamic selection + of components and dynamic loading of plugins. For details, see the + new documentation section Components and plugins. + - The HWLOC_COMPONENTS variable lets one replace the default discovery + components. + - Dynamic loading of plugins may be enabled with --enable-plugins + (except on AIX and Windows). It will build libxml2 and libpci + support as separated modules. This helps reducing the dependencies + of the core hwloc library when distributed as a binary package. +* Backends + + Add CPUModel detection on Darwin and x86/FreeBSD. + Thanks to Robin Scher for providing ways to implement this. + + The x86 backend now adds CPUModel info attributes to socket objects + created by other backends that do not natively support this attribute. + + Fix detection on FreeBSD in case of cpuset restriction. Thanks to + Sebastian Kuzminsky for reporting the problem. +* XML + + Add hwloc_topology_set_userdata_import/export_callback(), + hwloc_export_obj_userdata() and _userdata_base64() to let + applications specify how to save/restore the custom data they placed + in the userdata private pointer field of hwloc objects. +* Tools + + Add hwloc-annotate program to add string info attributes to XML + topologies. + + Add --pid-cmd to hwloc-ps to append the output of a command to each + PID line. May be used for showing Open MPI process ranks, see the + hwloc-ps(1) manpage for details. + + hwloc-bind now exits with an error if binding fails; the executable + is not launched unless binding suceeeded or --force was given. + + Add --quiet to hwloc-calc and hwloc-bind to hide non-fatal error + messages. + + Fix command-line pid support in windows tools. + + All programs accept --verbose as a synonym to -v. +* Misc + + Fix some DIR descriptor leaks on Linux. + + Fix I/O device lists when some were filtered out after a XML import. + + Fix the removal of I/O objects when importing a I/O-enabled XML topology + without any I/O topology flag. + + When merging objects with HWLOC_IGNORE_TYPE_KEEP_STRUCTURE or + lstopo --merge, compare object types before deciding which one of two + identical object to remove (e.g. keep sockets in favor of caches). + + Add some GUID- and LID-related info attributes to OpenFabrics + OS devices. + + Only add CPUType socket attributes on Solaris/Sparc. Other cases + don't report reliable information (Solaris/x86), and a replacement + is available as the Architecture string info in the Machine object. + + Add missing Backend string info on Solaris in most cases. + + Document object attributes and string infos in a new Attributes + section in the documentation. + + Add a section about Synthetic topologies in the documentation. + + +Version 1.5.2 (some of these changes are in v1.6.2 but not in v1.6) +------------- +* Use libpciaccess instead of pciutils/libpci by default for I/O discovery. + pciutils/libpci is only used if --enable-libpci is given to configure + because its GPL license may taint hwloc. See the Installation section + in the documentation for details. +* Fix get_cpubind on Solaris when bound to a single PU with + processor_bind(). Thanks to Eugene Loh for reporting the problem + and providing a patch. +* Fix some DIR descriptor leaks on Linux. +* Fix I/O device lists when some were filtered out after a XML import. +* Add missing Backend string info on Solaris in most cases. +* Fix the removal of I/O objects when importing a I/O-enabled XML topology + without any I/O topology flag. +* Fix the output of hwloc-calc -H --hierarchical when using logical + indexes in the output. +* Fix the pkg-config output with --libs --static. + Thanks to Erik Schnetter for reporting one of the problems. + + +Version 1.5.1 +------------- +* Fix block OS device detection on Linux kernel 3.3 and later. + Thanks to Guy Streeter for reporting the problem and testing the fix. +* Fix the cpuid code in the x86 backend (for FreeBSD). Thanks to + Sebastian Kuzminsky for reporting problems and testing patches. +* Fix 64bit detection on FreeBSD. +* Fix some corner cases in the management of the thissystem flag with + respect to topology flags and environment variables. +* Fix some corner cases in command-line parsing checks in hwloc-distrib + and hwloc-distances. +* Make sure we do not miss some block OS devices on old Linux kernels + when a single PCI device has multiple IDE hosts/devices behind it. +* Do not disable I/O devices or instruction caches in hwloc-assembler output. + + +Version 1.5.0 +------------- +* Backends + + Do not limit the number of processors to 1024 on Solaris anymore. + + Gather total machine memory on FreeBSD. Thanks to Cyril Roelandt. + + XML topology files do not depend on the locale anymore. Float numbers + such as NUMA distances or PCI link speeds now always use a dot as a + decimal separator. + + Add instruction caches detection on Linux, AIX, Windows and Darwin. + + Add get_last_cpu_location() support for the current thread on AIX. + + Support binding on AIX when threads or processes were bound with + bindprocessor(). Thanks to Hendryk Bockelmann for reporting the issue + and testing patches, and to Farid Parpia for explaining the binding + interfaces. + + Improve AMD topology detection in the x86 backend (for FreeBSD) using + the topoext feature. +* API + + Increase HWLOC_API_VERSION to 0x00010500 so that API changes may be + detected at build-time. + + Add a cache type attribute describind Data, Instruction and Unified + caches. Caches with different types but same depth (for instance L1d + and L1i) are placed on different levels. + + Add hwloc_get_cache_type_depth() to retrieve the hwloc level depth of + of the given cache depth and type, for instance L1i or L2. + It helps disambiguating the case where hwloc_get_type_depth() returns + HWLOC_TYPE_DEPTH_MULTIPLE. + + Instruction caches are ignored unless HWLOC_TOPOLOGY_FLAG_ICACHES is + passed to hwloc_topology_set_flags() before load. + + Add hwloc_ibv_get_device_osdev_by_name() OpenFabrics helper in + openfabrics-verbs.h to find the hwloc OS device object corresponding to + an OpenFabrics device. +* Tools + + Add lstopo-no-graphics, a lstopo built without graphical support to + avoid dependencies on external libraries such as Cairo and X11. When + supported, graphical outputs are only available in the original lstopo + program. + - Packagers splitting lstopo and lstopo-no-graphics into different + packages are advised to use the alternatives system so that lstopo + points to the best available binary. + + Instruction caches are enabled in lstopo by default. Use --no-icaches + to disable them. + + Add -t/--threads to show threads in hwloc-ps. +* Removal of obsolete components + + Remove the old cpuset interface (hwloc/cpuset.h) which is deprecated and + superseded by the bitmap API (hwloc/bitmap.h) since v1.1. + hwloc_cpuset and nodeset types are still defined, but all hwloc_cpuset_* + compatibility wrappers are now gone. + + Remove Linux libnuma conversion helpers for the deprecated and + broken nodemask_t interface. + + Remove support for "Proc" type name, it was superseded by "PU" in v1.0. + + Remove hwloc-mask symlinks, it was replaced by hwloc-calc in v1.0. +* Misc + + Fix PCIe 3.0 link speed computation. + + Non-printable characters are dropped from strings during XML export. + + Fix importing of escaped characters with the minimalistic XML backend. + + Assert hwloc_is_thissystem() in several I/O related helpers. + + Fix some memory leaks in the x86 backend for FreeBSD. + + Minor fixes to ease native builds on Windows. + + Limit the number of retries when operating on all threads within a + process on Linux if the list of threads is heavily getting modified. + + +Version 1.4.3 +------------- +* This release is only meant to fix the pciutils license issue when upgrading + to hwloc v1.5 or later is not possible. It contains several other minor + fixes but ignores many of them that are only in v1.5 or later. +* Use libpciaccess instead of pciutils/libpci by default for I/O discovery. + pciutils/libpci is only used if --enable-libpci is given to configure + because its GPL license may taint hwloc. See the Installation section + in the documentation for details. +* Fix PCIe 3.0 link speed computation. +* Fix importing of escaped characters with the minimalistic XML backend. +* Fix a memory leak in the x86 backend. + + +Version 1.4.2 +------------- +* Fix build on Solaris 9 and earlier when fabsf() is not a compiler + built-in. Thanks to Igor Galić for reporting the problem. +* Fix support for more than 32 processors on Windows. Thanks to Hartmut + Kaiser for reporting the problem. +* Fix process-wide binding and cpulocation routines on Linux when some + threads disappear in the meantime. Thanks to Vlad Roubtsov for reporting + the issue. +* Make installed scripts executable. Thanks to Jirka Hladky for reporting + the problem. +* Fix libtool revision management when building for Windows. This fix was + also released as hwloc v1.4.1.1 Windows builds. Thanks to Hartmut Kaiser + for reporting the problem. +* Fix the __hwloc_inline keyword in public headers when compiling with a + C++ compiler. +* Add Port info attribute to network OS devices inside OpenFabrics PCI + devices so as to identify which interface corresponds to which port. +* Document requirements for interoperability helpers: I/O devices discovery + is required for some of them; the topology must match the current host + for most of them. + + +Version 1.4.1 +------------- +* This release contains all changes from v1.3.2. +* Fix hwloc_alloc_membind, thanks Karl Napf for reporting the issue. +* Fix memory leaks in some get_membind() functions. +* Fix helpers converting from Linux libnuma to hwloc (hwloc/linux-libnuma.h) + in case of out-of-order NUMA node ids. +* Fix some overzealous assertions in the distance grouping code. +* Workaround BIOS reporting empty I/O locality in CUDA and OpenFabrics + helpers on Linux. Thanks to Albert Solernou for reporting the problem. +* Install a valgrind suppressions file hwloc-valgrind.supp (see the FAQ). +* Fix memory binding documentation. Thanks to Karl Napf for reporting the + issues. + + +Version 1.4.0 (does not contain all v1.3.2 changes) +------------- +* Major features + + Add "custom" interface and "assembler" tools to build multi-node + topology. See the Multi-node Topologies section in the documentation + for details. +* Interface improvements + + Add symmetric_subtree object attribute to ease assumptions when consulting + regular symmetric topologies. + + Add a CPUModel and CPUType info attribute to Socket objects on Linux + and Solaris. + + Add hwloc_get_obj_index_inside_cpuset() to retrieve the "logical" index + of an object within a subtree of the topology. + + Add more NVIDIA CUDA helpers in cuda.h and cudart.h to find hwloc objects + corresponding to CUDA devices. +* Discovery improvements + + Add a group object above partial distance matrices to make sure + the matrices are available in the final topology, except when this + new object would contradict the existing hierarchy. + + Grouping by distances now also works when loading from XML. + + Fix some corner cases in object insertion, for instance when dealing + with NUMA nodes without any CPU. +* Backends + + Implement hwloc_get_area_membind() on Linux. + + Honor I/O topology flags when importing from XML. + + Further improve XML-related error checking and reporting. + + Hide synthetic topology error messages unless HWLOC_SYNTHETIC_VERBOSE=1. +* Tools + + Add synthetic exporting of symmetric topologies to lstopo. + + lstopo --horiz and --vert can now be applied to some specific object types. + + lstopo -v -p now displays distance matrices with physical indexes. + + Add hwloc-distances utility to list distances. +* Documentation + + Fix and/or document the behavior of most inline functions in hwloc/helper.h + when the topology contains some I/O or Misc objects. + + Backend documentation enhancements. +* Bug fixes + + Fix missing last bit in hwloc_linux_get_thread_cpubind(). + Thanks to Carolina Gómez-Tostón Gutiérrez for reporting the issue. + + Fix FreeBSD build without cpuid support. + + Fix several Windows build issues. + + Fix inline keyword definition in public headers. + + Fix dependencies in the embedded library. + + Improve visibility support detection. Thanks to Dave Love for providing + the patch. + + Remove references to internal symbols in the tools. + + +Version 1.3.3 +------------- +* This release is only meant to fix the pciutils license issue when upgrading + to hwloc v1.4 or later is not possible. It contains several other minor + fixes but ignores many of them that are only in v1.4 or later. +* Use libpciaccess instead of pciutils/libpci by default for I/O discovery. + pciutils/libpci is only used if --enable-libpci is given to configure + because its GPL license may taint hwloc. See the Installation section + in the documentation for details. + + +Version 1.3.2 +------------- +* Fix missing last bit in hwloc_linux_get_thread_cpubind(). + Thanks to Carolina Gómez-Tostón Gutiérrez for reporting the issue. +* Fix build with -mcmodel=medium. Thanks to Devendar Bureddy for reporting + the issue. +* Fix build with Solaris Studio 12 compiler when XML is disabled. + Thanks to Paul H. Hargrove for reporting the problem. +* Fix installation with old GNU sed, for instance on Red Hat 8. + Thanks to Paul H. Hargrove for reporting the problem. +* Fix PCI locality when Linux cgroups restrict the available CPUs. +* Fix floating point issue when grouping by distance on mips64 architecture. + Thanks to Paul H. Hargrove for reporting the problem. +* Fix conversion from/to Linux libnuma when some NUMA nodes have no memory. +* Fix support for gccfss compilers with broken ffs() support. Thanks to + Paul H. Hargrove for reporting the problem and providing a patch. +* Fix FreeBSD build without cpuid support. +* Fix several Windows build issues. +* Fix inline keyword definition in public headers. +* Fix dependencies in the embedded library. +* Detect when a compiler such as xlc may not report compile errors + properly, causing some configure checks to be wrong. Thanks to + Paul H. Hargrove for reporting the problem and providing a patch. +* Improve visibility support detection. Thanks to Dave Love for providing + the patch. +* Remove references to internal symbols in the tools. +* Fix installation on systems with limited command-line size. + Thanks to Paul H. Hargrove for reporting the problem. +* Further improve XML-related error checking and reporting. + + +Version 1.3.1 +------------- +* Fix pciutils detection with pkg-config when not installed in standard + directories. +* Fix visibility options detection with the Solaris Studio compiler. + Thanks to Igor Galić and Terry Dontje for reporting the problems. +* Fix support for old Linux sched.h headers such as those found + on Red Hat 8. Thanks to Paul H. Hargrove for reporting the problems. +* Fix inline and attribute support for Solaris compilers. Thanks to + Dave Love for reporting the problems. +* Print a short summary at the end of the configure output. Thanks to + Stefan Eilemann for the suggestion. +* Add --disable-libnuma configure option to disable libnuma-based + memory binding support on Linux. Thanks to Rayson Ho for the + suggestion. +* Make hwloc's configure script properly obey $PKG_CONFIG. Thanks to + Nathan Phillip Brink for raising the issue. +* Silence some harmless pciutils warnings, thanks to Paul H. Hargrove + for reporting the problem. +* Fix the documentation with respect to hwloc_pid_t and hwloc_thread_t + being either pid_t and pthread_t on Unix, or HANDLE on Windows. + + +Version 1.3.0 +------------- +* Major features + + Add I/O devices and bridges to the topology using the pciutils + library. Only enabled after setting the relevant flag with + hwloc_topology_set_flags() before hwloc_topology_load(). See the + I/O Devices section in the documentation for details. +* Discovery improvements + + Add associativity to the cache attributes. + + Add support for s390/z11 "books" on Linux. + + Add the HWLOC_GROUPING_ACCURACY environment variable to relax + distance-based grouping constraints. See the Environment Variables + section in the documentation for details about grouping behavior + and configuration. + + Allow user-given distance matrices to remove or replace those + discovered by the OS backend. +* XML improvements + + XML is now always supported: a minimalistic custom import/export + code is used when libxml2 is not available. It is only guaranteed + to read XML files generated by hwloc. + + hwloc_topology_export_xml() and export_xmlbuffer() now return an + integer. + + Add hwloc_free_xmlbuffer() to free the buffer allocated by + hwloc_topology_export_xmlbuffer(). + + Hide XML topology error messages unless HWLOC_XML_VERBOSE=1. +* Minor API updates + + Add hwloc_obj_add_info to customize object info attributes. +* Tools + + lstopo now displays I/O devices by default. Several options are + added to configure the I/O discovery. + + hwloc-calc and hwloc-bind now accept I/O devices as input. + + Add --restrict option to hwloc-calc and hwloc-distribute. + + Add --sep option to change the output field separator in hwloc-calc. + + Add --whole-system option to hwloc-ps. + + +Version 1.2.2 +------------- +* Fix build on AIX 5.2, thanks Utpal Kumar Ray for the report. +* Fix XML import of very large page sizes or counts on 32bits platform, + thanks to Karsten Hopp for the RedHat ticket. +* Fix crash when administrator limitations such as Linux cgroup require + to restrict distance matrices. Thanks to Ake Sandgren for reporting the + problem. +* Fix the removal of objects such as AMD Magny-Cours dual-node sockets + in case of administrator restrictions. +* Improve error reporting and messages in case of wrong synthetic topology + description. +* Several other minor internal fixes and documentation improvements. + + +Version 1.2.1 +------------- +* Improve support of AMD Bulldozer "Compute-Unit" modules by detecting + logical processors with different core IDs on Linux. +* Fix hwloc-ps crash when listing processes from another Linux cpuset. + Thanks to Carl Smith for reporting the problem. +* Fix build on AIX and Solaris. Thanks to Carl Smith and Andreas Kupries + for reporting the problems. +* Fix cache size detection on Darwin. Thanks to Erkcan Özcan for reporting + the problem. +* Make configure fail if --enable-xml or --enable-cairo is given and + proper support cannot be found. Thanks to Andreas Kupries for reporting + the XML problem. +* Fix spurious L1 cache detection on AIX. Thanks to Hendryk Bockelmann + for reporting the problem. +* Fix hwloc_get_last_cpu_location(THREAD) on Linux. Thanks to Gabriele + Fatigati for reporting the problem. +* Fix object distance detection on Solaris. +* Add pthread_self weak symbol to ease static linking. +* Minor documentation fixes. + + +Version 1.2.0 +------------- +* Major features + + Expose latency matrices in the API as an array of distance structures + within objects. Add several helpers to find distances. + + Add hwloc_topology_set_distance_matrix() and environment variables + to provide a matrix of distances between a given set of objects. + + Add hwloc_get_last_cpu_location() and hwloc_get_proc_last_cpu_location() + to retrieve the processors where a process or thread recently ran. + - Add the corresponding --get-last-cpu-location option to hwloc-bind. + + Add hwloc_topology_restrict() to restrict an existing topology to a + given cpuset. + - Add the corresponding --restrict option to lstopo. +* Minor API updates + + Add hwloc_bitmap_list_sscanf/snprintf/asprintf to convert between bitmaps + and strings such as 4-5,7-9,12,15- + + hwloc_bitmap_set/clr_range() now support infinite ranges. + + Clarify the difference between inserting Misc objects by cpuset or by + parent. + + hwloc_insert_misc_object_by_cpuset() now returns NULL in case of error. +* Discovery improvements + + x86 backend (for freebsd): add x2APIC support + + Support standard device-tree phandle, to get better support on e.g. ARM + systems providing it. + + Detect cache size on AIX. Thanks Christopher and IBM. + + Improve grouping to support asymmetric topologies. +* Tools + + Command-line tools now support "all" and "root" special locations + consisting in the entire topology, as well as type names with depth + attributes such as L2 or Group4. + + hwloc-calc improvements: + - Add --number-of/-N option to report the number of objects of a given + type or depth. + - -I is now equivalent to --intersect for listing the indexes of + objects of a given type or depth that intersects the input. + - Add -H to report the output as a hierarchical combination of types + and depths. + + Add --thissystem to lstopo. + + Add lstopo-win, a console-less lstopo variant on Windows. +* Miscellaneous + + Remove C99 usage from code base. + + Rename hwloc-gather-topology.sh into hwloc-gather-topology + + Fix AMD cache discovery on freebsd when there is no L3 cache, thanks + Andriy Gapon for the fix. + + +Version 1.1.2 +------------- +* Fix a segfault in the distance-based grouping code when some objects + are not placed in any group. Thanks to Bernd Kallies for reporting + the problem and providing a patch. +* Fix the command-line parsing of hwloc-bind --mempolicy interleave. + Thanks to Guy Streeter for reporting the problem. +* Stop truncating the output in hwloc_obj_attr_snprintf() and in the + corresponding lstopo output. Thanks to Guy Streeter for reporting the + problem. +* Fix object levels ordering in synthetic topologies. +* Fix potential incoherency between device tree and kernel information, + when SMT is disabled on Power machines. +* Fix and document the behavior of hwloc_topology_set_synthetic() in case + of invalid argument. Thanks to Guy Streeter for reporting the problem. +* Add some verbose error message reporting when it looks like the OS + gives erroneous information. +* Do not include unistd.h and stdint.h in public headers on Windows. +* Move config.h files into their own subdirectories to avoid name + conflicts when AC_CONFIG_HEADERS adds -I's for them. +* Remove the use of declaring variables inside "for" loops. +* Some other minor fixes. +* Many minor documentation fixes. + + +Version 1.1.1 +------------- +* Add hwloc_get_api_version() which returns the version of hwloc used + at runtime. Thanks to Guy Streeter for the suggestion. +* Fix the number of hugepages reported for NUMA nodes on Linux. +* Fix hwloc_bitmap_to_ulong() right after allocating the bitmap. + Thanks to Bernd Kallies for reporting the problem. +* Fix hwloc_bitmap_from_ith_ulong() to properly zero the first ulong. + Thanks to Guy Streeter for reporting the problem. +* Fix hwloc_get_membind_nodeset() on Linux. + Thanks to Bernd Kallies for reporting the problem and providing a patch. +* Fix some file descriptor leaks in the Linux discovery. +* Fix the minimum width of NUMA nodes, caches and the legend in the graphical + lstopo output. Thanks to Jirka Hladky for reporting the problem. +* Various fixes to bitmap conversion from/to taskset-strings. +* Fix and document snprintf functions behavior when the buffer size is too + small or zero. Thanks to Guy Streeter for reporting the problem. +* Fix configure to avoid spurious enabling of the cpuid backend. + Thanks to Tim Anderson for reporting the problem. +* Cleanup error management in hwloc-gather-topology.sh. + Thanks to Jirka Hladky for reporting the problem and providing a patch. +* Add a manpage and usage for hwloc-gather-topology.sh on Linux. + Thanks to Jirka Hladky for providing a patch. +* Memory binding documentation enhancements. + + +Version 1.1.0 +------------- + +* API + + Increase HWLOC_API_VERSION to 0x00010100 so that API changes may be + detected at build-time. + + Add a memory binding interface. + + The cpuset API (hwloc/cpuset.h) is now deprecated. It is replaced by + the bitmap API (hwloc/bitmap.h) which offers the same features with more + generic names since it applies to CPU sets, node sets and more. + Backward compatibility with the cpuset API and ABI is still provided but + it will be removed in a future release. + Old types (hwloc_cpuset_t, ...) are still available as a way to clarify + what kind of hwloc_bitmap_t each API function manipulates. + Upgrading to the new API only requires to replace hwloc_cpuset_ function + calls with the corresponding hwloc_bitmap_ calls, with the following + renaming exceptions: + - hwloc_cpuset_cpu -> hwloc_bitmap_only + - hwloc_cpuset_all_but_cpu -> hwloc_bitmap_allbut + - hwloc_cpuset_from_string -> hwloc_bitmap_sscanf + + Add an `infos' array in each object to store couples of info names and + values. It enables generic storage of things like the old dmi board infos + that were previously stored in machine specific attributes. + + Add linesize cache attribute. +* Features + + Bitmaps (and thus CPU sets and node sets) are dynamically (re-)allocated, + the maximal number of CPUs (HWLOC_NBMAXCPUS) has been removed. + + Improve the distance-based grouping code to better support irregular + distance matrices. + + Add support for device-tree to get cache information (useful on Power + architectures). +* Helpers + + Add NVIDIA CUDA helpers in cuda.h and cudart.h to ease interoperability + with CUDA Runtime and Driver APIs. + + Add Myrinet Express helper in myriexpress.h to ease interoperability. +* Tools + + lstopo now displays physical/OS indexes by default in graphical mode + (use -l to switch back to logical indexes). The textual output still uses + logical by default (use -p to switch to physical indexes). + + lstopo prefixes logical indexes with `L#' and physical indexes with `P#'. + Physical indexes are also printed as `P#N' instead of `phys=N' within + object attributes (in parentheses). + + Add a legend at the bottom of the lstopo graphical output, use --no-legend + to remove it. + + Add hwloc-ps to list process' bindings. + + Add --membind and --mempolicy options to hwloc-bind. + + Improve tools command-line options by adding a generic --input option + (and more) which replaces the old --xml, --synthetic and --fsys-root. + + Cleanup lstopo output configuration by adding --output-format. + + Add --intersect in hwloc-calc, and replace --objects with --largest. + + Add the ability to work on standard input in hwloc-calc. + + Add --from, --to and --at in hwloc-distrib. + + Add taskset-specific functions and command-line tools options to + manipulate CPU set strings in the format of the taskset program. + + Install hwloc-gather-topology.sh on Linux. + + +Version 1.0.3 +------------- + +* Fix support for Linux cpuset when emulated by a cgroup mount point. +* Remove unneeded runtime dependency on libibverbs.so in the library and + all utils programs. +* Fix hwloc_cpuset_to_linux_libnuma_ulongs in case of non-linear OS-indexes + for NUMA nodes. +* lstopo now displays physical/OS indexes by default in graphical mode + (use -l to switch back to logical indexes). The textual output still uses + logical by default (use -p to switch to physical indexes). + + +Version 1.0.2 +------------- + +* Public headers can now be included directly from C++ programs. +* Solaris fix for non-contiguous cpu numbers. Thanks to Rolf vandeVaart for + reporting the issue. +* Darwin 10.4 fix. Thanks to Olivier Cessenat for reporting the issue. +* Revert 1.0.1 patch that ignored sockets with unknown ID values since it + only slightly helped POWER7 machines with old Linux kernels while it + prevents recent kernels from getting the complete POWER7 topology. +* Fix hwloc_get_common_ancestor_obj(). +* Remove arch-specific bits in public headers. +* Some fixes in the lstopo graphical output. +* Various man page clarifications and minor updates. + + +Version 1.0.1 +------------- + +* Various Solaris fixes. Thanks to Yannick Martin for reporting the issue. +* Fix "non-native" builds on x86 platforms (e.g., when building 32 + bit executables with compilers that natively build 64 bit). +* Ignore sockets with unknown ID values (which fixes issues on POWER7 + machines). Thanks to Greg Bauer for reporting the issue. +* Various man page clarifications and minor updates. +* Fixed memory leaks in hwloc_setup_group_from_min_distance_clique(). +* Fix cache type filtering on MS Windows 7. Thanks to Αλέξανδρος + Παπαδογιαννάκ for reporting the issue. +* Fixed warnings when compiling with -DNDEBUG. + + +Version 1.0.0 +------------- + +* The ABI of the library has changed. +* Backend updates + + Add FreeBSD support. + + Add x86 cpuid based backend. + + Add Linux cgroup support to the Linux cpuset code. + + Support binding of entire multithreaded process on Linux. + + Fix and enable Group support in Windows. + + Cleanup XML export/import. +* Objects + + HWLOC_OBJ_PROC is renamed into HWLOC_OBJ_PU for "Processing Unit", + its stringified type name is now "PU". + + Use new HWLOC_OBJ_GROUP objects instead of MISC when grouping + objects according to NUMA distances or arbitrary OS aggregation. + + Rework memory attributes. + + Add different cpusets in each object to specify processors that + are offline, unavailable, ... + + Cleanup the storage of object names and DMI infos. +* Features + + Add support for looking up specific PID topology information. + + Add hwloc_topology_export_xml() to export the topology in a XML file. + + Add hwloc_topology_get_support() to retrieve the supported features + for the current topology context. + + Support non-SYSTEM object as the root of the tree, use MACHINE in + most common cases. + + Add hwloc_get_*cpubind() routines to retrieve the current binding + of processes and threads. +* API + + Add HWLOC_API_VERSION to help detect the currently used API version. + + Add missing ending "e" to *compare* functions. + + Add several routines to emulate PLPA functions. + + Rename and rework the cpuset and/or/xor/not/clear operators to output + their result in a dedicated argument instead of modifying one input. + + Deprecate hwloc_obj_snprintf() in favor of hwloc_obj_type/attr_snprintf(). + + Clarify the use of parent and ancestor in the API, do not use father. + + Replace hwloc_get_system_obj() with hwloc_get_root_obj(). + + Return -1 instead of HWLOC_OBJ_TYPE_MAX in the API since the latter + isn't public. + + Relax constraints in hwloc_obj_type_of_string(). + + Improve displaying of memory sizes. + + Add 0x prefix to cpuset strings. +* Tools + + lstopo now displays logical indexes by default, use --physical to + revert back to OS/physical indexes. + + Add colors in the lstopo graphical outputs to distinguish between online, + offline, reserved, ... objects. + + Extend lstopo to show cpusets, filter objects by type, ... + + Renamed hwloc-mask into hwloc-calc which supports many new options. +* Documentation + + Add a hwloc(7) manpage containing general information. + + Add documentation about how to switch from PLPA to hwloc. + + Cleanup the distributed documentation files. +* Miscellaneous + + Many compilers warning fixes. + + Cleanup the ABI by using the visibility attribute. + + Add project embedding support. + + +Version 0.9.4 (unreleased) +-------------------------- + +* Fix reseting colors to normal in lstopo -.txt output. +* Fix Linux pthread_t binding error report. + + +Version 0.9.3 +------------- + +* Fix autogen.sh to work with Autoconf 2.63. +* Fix various crashes in particular conditions: + - xml files with root attributes + - offline CPUs + - partial sysfs support + - unparseable /proc/cpuinfo + - ignoring NUMA level while Misc level have been generated +* Tweak documentation a bit +* Do not require the pthread library for binding the current thread on Linux +* Do not erroneously consider the sched_setaffinity prototype is the old version + when there is actually none. +* Fix _syscall3 compilation on archs for which we do not have the + sched_setaffinity system call number. +* Fix AIX binding. +* Fix libraries dependencies: now only lstopo depends on libtermcap, fix + binutils-gold link +* Have make check always build and run hwloc-hello.c +* Do not limit size of a cpuset. + + +Version 0.9.2 +------------- + +* Trivial documentation changes. + + +Version 0.9.1 +------------- + +* Re-branded to "hwloc" and moved to the Open MPI project, relicensed under the + BSD license. +* The prefix of all functions and tools is now hwloc, and some public + functions were also renamed for real. +* Group NUMA nodes into Misc objects according to their physical distance + that may be reported by the OS/BIOS. + May be ignored by setting HWLOC_IGNORE_DISTANCES=1 in the environment. +* Ignore offline CPUs on Solaris. +* Improved binding support on AIX. +* Add HP-UX support. +* CPU sets are now allocated/freed dynamically. +* Add command line options to tune the lstopo graphical output, add + semi-graphical textual output +* Extend topobind to support multiple cpusets or objects on the command + line as topomask does. +* Add an Infiniband-specific helper hwloc/openfabrics-verbs.h to retrieve + the physical location of IB devices. + + +Version 0.9 (libtopology) +------------------------- + +* First release. diff --git a/opal/mca/hwloc/hwloc1112/hwloc/README b/opal/mca/hwloc/hwloc1112/hwloc/README new file mode 100644 index 00000000000..592d459f842 --- /dev/null +++ b/opal/mca/hwloc/hwloc1112/hwloc/README @@ -0,0 +1,612 @@ +Introduction + +hwloc provides command line tools and a C API to obtain the hierarchical map of +key computing elements, such as: NUMA memory nodes, shared caches, processor +packages, processor cores, processing units (logical processors or "threads") +and even I/O devices. hwloc also gathers various attributes such as cache and +memory information, and is portable across a variety of different operating +systems and platforms. Additionally it may assemble the topologies of multiple +machines into a single one so as to let applications consult the topology of an +entire fabric or cluster at once. + +hwloc primarily aims at helping high-performance computing (HPC) applications, +but is also applicable to any project seeking to exploit code and/or data +locality on modern computing platforms. + +Note that the hwloc project represents the merger of the libtopology project +from inria and the Portable Linux Processor Affinity (PLPA) sub-project from +Open MPI. Both of these prior projects are now deprecated. The first hwloc +release was essentially a "re-branding" of the libtopology code base, but with +both a few genuinely new features and a few PLPA-like features added in. Prior +releases of hwloc included documentation about switching from PLPA to hwloc; +this documentation has been dropped on the assumption that everyone who was +using PLPA has already switched to hwloc. + +hwloc supports the following operating systems: + + * Linux (including old kernels not having sysfs topology information, with + knowledge of cpusets, offline CPUs, ScaleMP vSMP and Kerrighed support) on + all supported hardware, including Intel Xeon Phi (KNL and KNC, either + standalone or as a coprocessor) and NumaScale NumaConnect. + * Solaris + * AIX + * Darwin / OS X + * FreeBSD and its variants (such as kFreeBSD/GNU) + * NetBSD + * OSF/1 (a.k.a., Tru64) + * HP-UX + * Microsoft Windows + * IBM BlueGene/Q Compute Node Kernel (CNK) + +Since it uses standard Operating System information, hwloc's support is mostly +independant from the processor type (x86, powerpc, ...) and just relies on the +Operating System support. The only exception to this is kFreeBSD, which does +not support topology information, and hwloc thus uses an x86-only CPUID-based +backend (which can be used for other OSes too, see the Components and plugins +section). + +To check whether hwloc works on a particular machine, just try to build it and +run lstopo or lstopo-no-graphics. If some things do not look right (e.g. bogus +or missing cache information), see Questions and Bugs below. + +hwloc only reports the number of processors on unsupported operating systems; +no topology information is available. + +For development and debugging purposes, hwloc also offers the ability to work +on "fake" topologies: + + * Symmetrical tree of resources generated from a list of level arities + * Remote machine simulation through the gathering of Linux sysfs topology + files + +hwloc can display the topology in a human-readable format, either in graphical +mode (X11), or by exporting in one of several different formats, including: +plain text, PDF, PNG, and FIG (see CLI Examples below). Note that some of the +export formats require additional support libraries. + +hwloc offers a programming interface for manipulating topologies and objects. +It also brings a powerful CPU bitmap API that is used to describe topology +objects location on physical/logical processors. See the Programming Interface +below. It may also be used to binding applications onto certain cores or memory +nodes. Several utility programs are also provided to ease command-line +manipulation of topology objects, binding of processes, and so on. + +Perl bindings are available from Bernd Kallies on CPAN. + +Python bindings are available from Guy Streeter: + + * Fedora RPM and tarball. + * git tree (html). + +Installation + +hwloc (http://www.open-mpi.org/projects/hwloc/) is available under the BSD +license. It is hosted as a sub-project of the overall Open MPI project (http:// +www.open-mpi.org/). Note that hwloc does not require any functionality from +Open MPI -- it is a wholly separate (and much smaller!) project and code base. +It just happens to be hosted as part of the overall Open MPI project. + +Nightly development snapshots are available on the web site. Additionally, the +code can be directly cloned from Git: + +shell$ git clone https://github.com/open-mpi/hwloc.git +shell$ cd hwloc +shell$ ./autogen.sh + +Note that GNU Autoconf >=2.63, Automake >=1.10 and Libtool >=2.2.6 are required +when building from a Git clone. + +Installation by itself is the fairly common GNU-based process: + +shell$ ./configure --prefix=... +shell$ make +shell$ make install + +The hwloc command-line tool "lstopo" produces human-readable topology maps, as +mentioned above. It can also export maps to the "fig" file format. Support for +PDF, Postscript, and PNG exporting is provided if the "Cairo" development +package (usually cairo-devel or libcairo2-dev) can be found in "lstopo" when +hwloc is configured and build. + +The hwloc core may also benefit from the following development packages: + + * libnuma for memory binding and migration support on Linux (numactl-devel or + libnuma-dev package). + * libpciaccess for full I/O device discovery (libpciaccess-devel or + libpciaccess-dev package). On Linux, PCI discovery may still be performed + (without vendor/device names) even if libpciaccess cannot be used. + + * the AMD OpenCL implementation for OpenCL device discovery. + * the NVIDIA CUDA Toolkit for CUDA device discovery. + * the NVIDIA Tesla Development Kit for NVML device discovery. + * the NV-CONTROL X extension library (NVCtrl) for NVIDIA display discovery. + * libxml2 for full XML import/export support (otherwise, the internal + minimalistic parser will only be able to import XML files that were + exported by the same hwloc release). See Importing and exporting topologies + from/to XML files for details. The relevant development package is usually + libxml2-devel or libxml2-dev. + * libudev on Linux for easier discovery of OS device information (otherwise + hwloc will try to manually parse udev raw files). The relevant development + package is usually libudev-devel or libudev-dev. + * libtool's ltdl library for dynamic plugin loading. The relevant development + package is usually libtool-ltdl-devel or libltdl-dev. + +PCI and XML support may be statically built inside the main hwloc library, or +as separate dynamically-loaded plugins (see the Components and plugins +section). + +Note that because of the possibility of GPL taint, the pciutils library libpci +will not be used (remember that hwloc is BSD-licensed). + +Also note that if you install supplemental libraries in non-standard locations, +hwloc's configure script may not be able to find them without some help. You +may need to specify additional CPPFLAGS, LDFLAGS, or PKG_CONFIG_PATH values on +the configure command line. + +For example, if libpciaccess was installed into /opt/pciaccess, hwloc's +configure script may not find it be default. Try adding PKG_CONFIG_PATH to the +./configure command line, like this: + +./configure PKG_CONFIG_PATH=/opt/pciaccess/lib/pkgconfig ... + +CLI Examples + +On a 4-package 2-core machine with hyper-threading, the lstopo tool may show +the following graphical output: + +dudley.png + +Here's the equivalent output in textual form: + +Machine (16GB) + Package L#0 + L3 L#0 (4096KB) + L2 L#0 (1024KB) + L1 L#0 (16KB) + Core L#0 + PU L#0 (P#0) + PU L#1 (P#8) + L2 L#1 (1024KB) + L1 L#1 (16KB) + Core L#1 + PU L#2 (P#4) + PU L#3 (P#12) + Package L#1 + L3 L#1 (4096KB) + L2 L#2 (1024KB) + L1 L#2 (16KB) + Core L#2 + PU L#4 (P#1) + PU L#5 (P#9) + L2 L#3 (1024KB) + L1 L#3 (16KB) + Core L#3 + PU L#6 (P#5) + PU L#7 (P#13) + Package L#2 + L3 L#2 (4096KB) + L2 L#4 (1024KB) + L1 L#4 (16KB) + Core L#4 + PU L#8 (P#2) + PU L#9 (P#10) + L2 L#5 (1024KB) + L1 L#5 (16KB) + Core L#5 + PU L#10 (P#6) + PU L#11 (P#14) + Package L#3 + L3 L#3 (4096KB) + L2 L#6 (1024KB) + L1 L#6 (16KB) + Core L#6 + PU L#12 (P#3) + PU L#13 (P#11) + L2 L#7 (1024KB) + L1 L#7 (16KB) + Core L#7 + PU L#14 (P#7) + PU L#15 (P#15) + +Note that there is also an equivalent output in XML that is meant for exporting +/importing topologies but it is hardly readable to human-beings (see Importing +and exporting topologies from/to XML files for details). + +On a 4-package 2-core Opteron NUMA machine, the lstopo tool may show the +following graphical output: + +hagrid.png + +Here's the equivalent output in textual form: + +Machine (32GB) + NUMANode L#0 (P#0 8190MB) + Package L#0 + L2 L#0 (1024KB) + L1 L#0 (64KB) + Core L#0 + PU L#0 (P#0) + L2 L#1 (1024KB) + L1 L#1 (64KB) + Core L#1 + PU L#1 (P#1) + NUMANode L#1 (P#1 8192MB) + Package L#1 + L2 L#2 (1024KB) + L1 L#2 (64KB) + Core L#2 + PU L#2 (P#2) + L2 L#3 (1024KB) + L1 L#3 (64KB) + Core L#3 + PU L#3 (P#3) + NUMANode L#2 (P#2 8192MB) + Package L#2 + L2 L#4 (1024KB) + L1 L#4 (64KB) + Core L#4 + PU L#4 (P#4) + L2 L#5 (1024KB) + L1 L#5 (64KB) + Core L#5 + PU L#5 (P#5) + NUMANode L#3 (P#3 8192MB) + Package L#3 + L2 L#6 (1024KB) + L1 L#6 (64KB) + Core L#6 + PU L#6 (P#6) + L2 L#7 (1024KB) + L1 L#7 (64KB) + Core L#7 + PU L#7 (P#7) + +On a 2-package quad-core Xeon (pre-Nehalem, with 2 dual-core dies into each +package): + +emmett.png + +Here's the same output in textual form: + +Machine (16GB) + Package L#0 + L2 L#0 (4096KB) + L1 L#0 (32KB) + Core L#0 + PU L#0 (P#0) + L1 L#1 (32KB) + Core L#1 + PU L#1 (P#4) + L2 L#1 (4096KB) + L1 L#2 (32KB) + Core L#2 + PU L#2 (P#2) + L1 L#3 (32KB) + Core L#3 + PU L#3 (P#6) + Package L#1 + L2 L#2 (4096KB) + L1 L#4 (32KB) + Core L#4 + PU L#4 (P#1) + L1 L#5 (32KB) + Core L#5 + PU L#5 (P#5) + L2 L#3 (4096KB) + L1 L#6 (32KB) + Core L#6 + PU L#6 (P#3) + L1 L#7 (32KB) + Core L#7 + PU L#7 (P#7) + +Programming Interface + +The basic interface is available in hwloc.h. Some higher-level functions are +available in hwloc/helper.h to reduce the need to manually manipulate objects +and follow links between them. Documentation for all these is provided later in +this document. Developers may also want to look at hwloc/inlines.h which +contains the actual inline code of some hwloc.h routines, and at this document, +which provides good higher-level topology traversal examples. + +To precisely define the vocabulary used by hwloc, a Terms and Definitions +section is available and should probably be read first. + +Each hwloc object contains a cpuset describing the list of processing units +that it contains. These bitmaps may be used for CPU binding and Memory binding. +hwloc offers an extensive bitmap manipulation interface in hwloc/bitmap.h. + +Moreover, hwloc also comes with additional helpers for interoperability with +several commonly used environments. See the Interoperability With Other +Software section for details. + +The complete API documentation is available in a full set of HTML pages, man +pages, and self-contained PDF files (formatted for both both US letter and A4 +formats) in the source tarball in doc/doxygen-doc/. + +NOTE: If you are building the documentation from a Git clone, you will need to +have Doxygen and pdflatex installed -- the documentation will be built during +the normal "make" process. The documentation is installed during "make install" +to $prefix/share/doc/hwloc/ and your systems default man page tree (under +$prefix, of course). + +Portability + +As shown in CLI Examples, hwloc can obtain information on a wide variety of +hardware topologies. However, some platforms and/or operating system versions +will only report a subset of this information. For example, on an PPC64-based +system with 32 cores (each with 2 hardware threads) running a default +2.6.18-based kernel from RHEL 5.4, hwloc is only able to glean information +about NUMA nodes and processor units (PUs). No information about caches, +packages, or cores is available. + +Similarly, Operating System have varying support for CPU and memory binding, +e.g. while some Operating Systems provide interfaces for all kinds of CPU and +memory bindings, some others provide only interfaces for a limited number of +kinds of CPU and memory binding, and some do not provide any binding interface +at all. Hwloc's binding functions would then simply return the ENOSYS error +(Function not implemented), meaning that the underlying Operating System does +not provide any interface for them. CPU binding and Memory binding provide more +information on which hwloc binding functions should be preferred because +interfaces for them are usually available on the supported Operating Systems. + +Here's the graphical output from lstopo on this platform when Simultaneous +Multi-Threading (SMT) is enabled: + +ppc64-with-smt.png + +And here's the graphical output from lstopo on this platform when SMT is +disabled: + +ppc64-without-smt.png + +Notice that hwloc only sees half the PUs when SMT is disabled. PU #15, for +example, seems to change location from NUMA node #0 to #1. In reality, no PUs +"moved" -- they were simply re-numbered when hwloc only saw half as many. +Hence, PU #15 in the SMT-disabled picture probably corresponds to PU #30 in the +SMT-enabled picture. + +This same "PUs have disappeared" effect can be seen on other platforms -- even +platforms / OSs that provide much more information than the above PPC64 system. +This is an unfortunate side-effect of how operating systems report information +to hwloc. + +Note that upgrading the Linux kernel on the same PPC64 system mentioned above +to 2.6.34, hwloc is able to discover all the topology information. The +following picture shows the entire topology layout when SMT is enabled: + +ppc64-full-with-smt.png + +Developers using the hwloc API or XML output for portable applications should +therefore be extremely careful to not make any assumptions about the structure +of data that is returned. For example, per the above reported PPC topology, it +is not safe to assume that PUs will always be descendants of cores. + +Additionally, future hardware may insert new topology elements that are not +available in this version of hwloc. Long-lived applications that are meant to +span multiple different hardware platforms should also be careful about making +structure assumptions. For example, there may someday be an element "lower" +than a PU, or perhaps a new element may exist between a core and a PU. + +API Example + +The following small C example (named ``hwloc-hello.c'') prints the topology of +the machine and bring the process to the first logical processor of the second +core of the machine. More examples are available in the doc/examples/ directory +of the source tree. + +/* Example hwloc API program. + * + * See other examples under doc/examples/ in the source tree + * for more details. + * + * Copyright (c) 2009-2015 Inria. All rights reserved. + * Copyright (c) 2009-2011 Universit?eacute; Bordeaux + * Copyright (c) 2009-2010 Cisco Systems, Inc. All rights reserved. + * See COPYING in top-level directory. + * + * hwloc-hello.c + */ + +#include +#include +#include +#include + +static void print_children(hwloc_topology_t topology, hwloc_obj_t obj, + int depth) +{ + char type[32], attr[1024]; + unsigned i; + + hwloc_obj_type_snprintf(type, sizeof(type), obj, 0); + printf("%*s%s", 2*depth, "", type); + if (obj->os_index != (unsigned) -1) + printf("#%u", obj->os_index); + hwloc_obj_attr_snprintf(attr, sizeof(attr), obj, " ", 0); + if (*attr) + printf("(%s)", attr); + printf("\n"); + for (i = 0; i < obj->arity; i++) { + print_children(topology, obj->children[i], depth + 1); + } +} + +int main(void) +{ + int depth; + unsigned i, n; + unsigned long size; + int levels; + char string[128]; + int topodepth; + hwloc_topology_t topology; + hwloc_cpuset_t cpuset; + hwloc_obj_t obj; + + /* Allocate and initialize topology object. */ + hwloc_topology_init(&topology); + + /* ... Optionally, put detection configuration here to ignore + some objects types, define a synthetic topology, etc.... + + The default is to detect all the objects of the machine that + the caller is allowed to access. See Configure Topology + Detection. */ + + /* Perform the topology detection. */ + hwloc_topology_load(topology); + + /* Optionally, get some additional topology information + in case we need the topology depth later. */ + topodepth = hwloc_topology_get_depth(topology); + + /***************************************************************** + * First example: + * Walk the topology with an array style, from level 0 (always + * the system level) to the lowest level (always the proc level). + *****************************************************************/ + for (depth = 0; depth < topodepth; depth++) { + printf("*** Objects at level %d\n", depth); + for (i = 0; i < hwloc_get_nbobjs_by_depth(topology, depth); + i++) { + hwloc_obj_type_snprintf(string, sizeof(string), + hwloc_get_obj_by_depth +(topology, depth, i), 0); + printf("Index %u: %s\n", i, string); + } + } + + /***************************************************************** + * Second example: + * Walk the topology with a tree style. + *****************************************************************/ + printf("*** Printing overall tree\n"); + print_children(topology, hwloc_get_root_obj(topology), 0); + + /***************************************************************** + * Third example: + * Print the number of packages. + *****************************************************************/ + depth = hwloc_get_type_depth(topology, HWLOC_OBJ_PACKAGE); + if (depth == HWLOC_TYPE_DEPTH_UNKNOWN) { + printf("*** The number of packages is unknown\n"); + } else { + printf("*** %u package(s)\n", + hwloc_get_nbobjs_by_depth(topology, depth)); + } + + /***************************************************************** + * Fourth example: + * Compute the amount of cache that the first logical processor + * has above it. + *****************************************************************/ + levels = 0; + size = 0; + for (obj = hwloc_get_obj_by_type(topology, HWLOC_OBJ_PU, 0); + obj; + obj = obj->parent) + if (obj->type == HWLOC_OBJ_CACHE) { + levels++; + size += obj->attr->cache.size; + } + printf("*** Logical processor 0 has %d caches totaling %luKB\n", + levels, size / 1024); + + /***************************************************************** + * Fifth example: + * Bind to only one thread of the last core of the machine. + * + * First find out where cores are, or else smaller sets of CPUs if + * the OS doesn't have the notion of a "core". + *****************************************************************/ + depth = hwloc_get_type_or_below_depth(topology, HWLOC_OBJ_CORE); + + /* Get last core. */ + obj = hwloc_get_obj_by_depth(topology, depth, + hwloc_get_nbobjs_by_depth(topology, depth) - 1); + if (obj) { + /* Get a copy of its cpuset that we may modify. */ + cpuset = hwloc_bitmap_dup(obj->cpuset); + + /* Get only one logical processor (in case the core is + SMT/hyper-threaded). */ + hwloc_bitmap_singlify(cpuset); + + /* And try to bind ourself there. */ + if (hwloc_set_cpubind(topology, cpuset, 0)) { + char *str; + int error = errno; + hwloc_bitmap_asprintf(&str, obj->cpuset); + printf("Couldn't bind to cpuset %s: %s\n", str, strerror(error)); + free(str); + } + + /* Free our cpuset copy */ + hwloc_bitmap_free(cpuset); + } + + /***************************************************************** + * Sixth example: + * Allocate some memory on the last NUMA node, bind some existing + * memory to the last NUMA node. + *****************************************************************/ + /* Get last node. */ + n = hwloc_get_nbobjs_by_type(topology, HWLOC_OBJ_NUMANODE); + if (n) { + void *m; + size = 1024*1024; + + obj = hwloc_get_obj_by_type(topology, HWLOC_OBJ_NUMANODE, n - 1); + m = hwloc_alloc_membind_nodeset(topology, size, obj->nodeset, + HWLOC_MEMBIND_BIND, 0); + hwloc_free(topology, m, size); + + m = malloc(size); + hwloc_set_area_membind_nodeset(topology, m, size, obj->nodeset, + HWLOC_MEMBIND_BIND, 0); + free(m); + } + + /* Destroy topology object. */ + hwloc_topology_destroy(topology); + + return 0; +} + +hwloc provides a pkg-config executable to obtain relevant compiler and linker +flags. For example, it can be used thusly to compile applications that utilize +the hwloc library (assuming GNU Make): + +CFLAGS += $(pkg-config --cflags hwloc) +LDLIBS += $(pkg-config --libs hwloc) +cc hwloc-hello.c $(CFLAGS) -o hwloc-hello $(LDLIBS) + +On a machine with 4GB of RAM and 2 processor packages -- each package of which +has two processing cores -- the output from running hwloc-hello could be +something like the following: + +shell$ ./hwloc-hello +*** Objects at level 0 +Index 0: Machine(3938MB) +*** Objects at level 1 +Index 0: Package#0 +Index 1: Package#1 +*** Objects at level 2 +Index 0: Core#0 +Index 1: Core#1 +Index 2: Core#3 +Index 3: Core#2 +*** Objects at level 3 +Index 0: PU#0 +Index 1: PU#1 +Index 2: PU#2 +Index 3: PU#3 +*** Printing overall tree +Machine(3938MB) + Package#0 + Core#0 + PU#0 + Core#1 + PU#1 + Package#1 + Core#3 + PU#2 + Core#2 + PU#3 +*** 2 package(s) +shell$ + +Questions and Bugs + +Questions should be sent to the devel mailing list (http://www.open-mpi.org/ +community/lists/hwloc.php). Bug reports should be reported in the tracker ( +https://github.com/open-mpi/hwloc/issues). + +If hwloc discovers an incorrect topology for your machine, the very first thing +you should check is to ensure that you have the most recent updates installed +for your operating system. Indeed, most of hwloc topology discovery relies on +hardware information retrieved through the operation system (e.g., via the /sys +virtual filesystem of the Linux kernel). If upgrading your OS or Linux kernel +does not solve your problem, you may also want to ensure that you are running +the most recent version of the BIOS for your machine. + +If those things fail, contact us on the mailing list for additional help. +Please attach the output of lstopo after having given the --enable-debug option +to ./configure and rebuilt completely, to get debugging output. Also attach the +/proc + /sys tarball generated by the installed script hwloc-gather-topology +when submitting problems about Linux, or send the output of kstat cpu_info in +the Solaris case, or the output of sysctl hw in the Darwin or BSD cases. + +History / Credits + +hwloc is the evolution and merger of the libtopology (http:// +runtime.bordeaux.inria.fr/libtopology/) project and the Portable Linux +Processor Affinity (PLPA) (http://www.open-mpi.org/projects/plpa/) project. +Because of functional and ideological overlap, these two code bases and ideas +were merged and released under the name "hwloc" as an Open MPI sub-project. + +libtopology was initially developed by the inria Runtime Team-Project (http:// +runtime.bordeaux.inria.fr/) (headed by Raymond Namyst (http:// +dept-info.labri.fr/~namyst/). PLPA was initially developed by the Open MPI +development team as a sub-project. Both are now deprecated in favor of hwloc, +which is distributed as an Open MPI sub-project. + +Further Reading + +The documentation chapters include + + * Terms and Definitions + * Command-Line Tools + * Environment Variables + * CPU and Memory Binding Overview + * I/O Devices + * Miscellaneous objects + * Multi-node Topologies + * Object attributes + * Importing and exporting topologies from/to XML files + * Synthetic topologies + * Interoperability With Other Software + * Thread Safety + * Components and plugins + * Embedding hwloc in Other Software + * Frequently Asked Questions + +Make sure to have had a look at those too! + diff --git a/opal/mca/hwloc/hwloc1112/hwloc/README-ompi.txt b/opal/mca/hwloc/hwloc1112/hwloc/README-ompi.txt new file mode 100644 index 00000000000..745631bf93c --- /dev/null +++ b/opal/mca/hwloc/hwloc1112/hwloc/README-ompi.txt @@ -0,0 +1,5 @@ +Cherry-picked commits after 1.11.2: + +open-mpi/hwloc@d2d07b9a2268699e13e1644b4f2ef7a53ef7396c +open-mpi/hwloc@9549fd59af04dca2e2340e17f0e685f8c552d818 +open-mpi/hwloc@0ab7af5e90fc2b58be30b2126cc2a73f9f7ecfe9 diff --git a/opal/mca/hwloc/hwloc1112/hwloc/VERSION b/opal/mca/hwloc/hwloc1112/hwloc/VERSION new file mode 100644 index 00000000000..c809f10a05a --- /dev/null +++ b/opal/mca/hwloc/hwloc1112/hwloc/VERSION @@ -0,0 +1,42 @@ +# This is the VERSION file for hwloc, describing the precise version +# of hwloc in this distribution. The various components of the version +# number below are combined to form a single version number string. + +# major, minor, and release are generally combined in the form +# ... If release is zero, then it is omitted. + +major=1 +minor=11 +release=2 + +# greek is used for alpha or beta release tags. If it is non-empty, +# it will be appended to the version number. It does not have to be +# numeric. Common examples include a1 (alpha release 1), b1 (beta +# release 1), sc2005 (Super Computing 2005 release). The only +# requirement is that it must be entirely printable ASCII characters +# and have no white space. + +greek= + +# The date when this release was created + +date="Dec 17, 2015" + +# If snapshot=1, then use the value from snapshot_version as the +# entire hwloc version (i.e., ignore major, minor, release, and +# greek). This is only set to 1 when making snapshot tarballs. +snapshot=0 +snapshot_version=${major}.${minor}.${release}${greek}-git + +# The shared library version of hwloc's public library. This version +# is maintained in accordance with the "Library Interface Versions" +# chapter from the GNU Libtool documentation. Notes: + +# 1. Since version numbers are associated with *releases*, the version +# number maintained on the hwloc git master (and developer branches) +# is always 0:0:0. + +# 2. Version numbers are described in the Libtool current:revision:age +# format. + +libhwloc_so_version=11:8:6 diff --git a/opal/mca/hwloc/hwloc1110/hwloc/config/distscript.sh b/opal/mca/hwloc/hwloc1112/hwloc/config/distscript.sh similarity index 100% rename from opal/mca/hwloc/hwloc1110/hwloc/config/distscript.sh rename to opal/mca/hwloc/hwloc1112/hwloc/config/distscript.sh diff --git a/opal/mca/hwloc/hwloc1110/hwloc/config/hwloc.m4 b/opal/mca/hwloc/hwloc1112/hwloc/config/hwloc.m4 similarity index 98% rename from opal/mca/hwloc/hwloc1110/hwloc/config/hwloc.m4 rename to opal/mca/hwloc/hwloc1112/hwloc/config/hwloc.m4 index 2f44504fd10..24e1beac1ba 100644 --- a/opal/mca/hwloc/hwloc1110/hwloc/config/hwloc.m4 +++ b/opal/mca/hwloc/hwloc1112/hwloc/config/hwloc.m4 @@ -1,7 +1,7 @@ dnl -*- Autoconf -*- dnl dnl Copyright © 2009-2015 Inria. All rights reserved. -dnl Copyright © 2009-2012 Université Bordeaux +dnl Copyright © 2009-2012, 2015 Université Bordeaux dnl Copyright © 2004-2005 The Trustees of Indiana University and Indiana dnl University Research and Technology dnl Corporation. All rights reserved. @@ -398,7 +398,8 @@ EOF]) GROUP_RELATIONSHIP, SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX, PSAPI_WORKING_SET_EX_BLOCK, - PSAPI_WORKING_SET_EX_INFORMATION], + PSAPI_WORKING_SET_EX_INFORMATION, + PROCESSOR_NUMBER], [],[],[[#include ]]) CPPFLAGS="$old_CPPFLAGS" AC_CHECK_LIB([gdi32], [main], @@ -573,15 +574,13 @@ EOF]) AC_MSG_RESULT([yes])], [AC_MSG_RESULT([no])]) - AC_MSG_CHECKING([for working _syscall3]) + AC_MSG_CHECKING([for working syscall]) AC_LINK_IFELSE([ AC_LANG_PROGRAM([[ - #include - #include - #define __NR_hwloc_test 123 - _syscall3(int, hwloc_test, int, param1, int, param2, int, param3); - ]], [[ hwloc_test(1, 2, 3); ]])], - [AC_DEFINE([HWLOC_HAVE__SYSCALL3], [1], [Define to 1 if the _syscall3 macro works]) + #include + #include + ]], [[syscall(1, 2, 3);]])], + [AC_DEFINE([HWLOC_HAVE_SYSCALL], [1], [Define to 1 if function `syscall' is available]) AC_MSG_RESULT([yes])], [AC_MSG_RESULT([no])]) @@ -715,7 +714,10 @@ EOF]) # Linux libudev support if test "x$enable_libudev" != xno; then AC_CHECK_HEADERS([libudev.h], [ - AC_CHECK_LIB([udev], [udev_device_new_from_subsystem_sysname], [HWLOC_LIBS="$HWLOC_LIBS -ludev"]) + AC_CHECK_LIB([udev], [udev_device_new_from_subsystem_sysname], [ + HWLOC_LIBS="$HWLOC_LIBS -ludev" + AC_DEFINE([HWLOC_HAVE_LIBUDEV], [1], [Define to 1 if you have libudev.]) + ]) ]) fi @@ -885,8 +887,11 @@ EOF]) # lstopo needs more AC_CHECK_HEADERS([X11/Xutil.h], [AC_CHECK_HEADERS([X11/keysym.h], - [AC_DEFINE([HWLOC_HAVE_X11_KEYSYM], [1], [Define to 1 if X11 headers including Xutil.h and keysym.h are available.])]) - AC_SUBST([HWLOC_X11_LIBS], ["-lX11"]) + [AC_DEFINE([HWLOC_HAVE_X11_KEYSYM], [1], [Define to 1 if X11 headers including Xutil.h and keysym.h are available.]) + HWLOC_X11_CPPFLAGS="$X_CFLAGS" + AC_SUBST([HWLOC_X11_CPPFLAGS]) + HWLOC_X11_LIBS="$X_PRE_LIBS $X_LIBS -lX11 $X_EXTRA_LIBS" + AC_SUBST([HWLOC_X11_LIBS])]) ], [], [#include ]) ]) ]) @@ -1211,6 +1216,7 @@ AC_DEFUN([HWLOC_DO_AM_CONDITIONALS],[ AM_CONDITIONAL([HWLOC_HAVE_WINDOWS], [test "x$hwloc_windows" = "xyes"]) AM_CONDITIONAL([HWLOC_HAVE_MINGW32], [test "x$target_os" = "xmingw32"]) + AM_CONDITIONAL([HWLOC_HAVE_X86], [test "x$hwloc_x86_32" = "xyes" -o "x$hwloc_x86_64" = "xyes"]) AM_CONDITIONAL([HWLOC_HAVE_X86_32], [test "x$hwloc_x86_32" = "xyes"]) AM_CONDITIONAL([HWLOC_HAVE_X86_64], [test "x$hwloc_x86_64" = "xyes"]) AM_CONDITIONAL([HWLOC_HAVE_X86_CPUID], [test "x$hwloc_have_x86_cpuid" = "xyes"]) diff --git a/opal/mca/hwloc/hwloc1110/hwloc/config/hwloc_check_attributes.m4 b/opal/mca/hwloc/hwloc1112/hwloc/config/hwloc_check_attributes.m4 similarity index 99% rename from opal/mca/hwloc/hwloc1110/hwloc/config/hwloc_check_attributes.m4 rename to opal/mca/hwloc/hwloc1112/hwloc/config/hwloc_check_attributes.m4 index 765407168ba..96348e819ee 100644 --- a/opal/mca/hwloc/hwloc1110/hwloc/config/hwloc_check_attributes.m4 +++ b/opal/mca/hwloc/hwloc1112/hwloc/config/hwloc_check_attributes.m4 @@ -322,9 +322,10 @@ AC_DEFUN([_HWLOC_CHECK_ATTRIBUTES], [ # Attribute may_alias: No suitable cross-check available, that works for non-supporting compilers # Ignored by intel-9.1.045 -- turn off with -wd1292 # Ignored by PGI-6.2.5; ignore not detected due to missing cross-check + # The test case is chosen to match our only use in topology-xml-*.c, and reproduces an xlc-13.1.0 bug. # _HWLOC_CHECK_SPECIFIC_ATTRIBUTE([may_alias], - [int * p_value __attribute__ ((__may_alias__));], + [struct { int i; } __attribute__ ((__may_alias__)) * p_value;], [], []) diff --git a/opal/mca/hwloc/hwloc1110/hwloc/config/hwloc_check_vendor.m4 b/opal/mca/hwloc/hwloc1112/hwloc/config/hwloc_check_vendor.m4 similarity index 100% rename from opal/mca/hwloc/hwloc1110/hwloc/config/hwloc_check_vendor.m4 rename to opal/mca/hwloc/hwloc1112/hwloc/config/hwloc_check_vendor.m4 diff --git a/opal/mca/hwloc/hwloc1110/hwloc/config/hwloc_check_visibility.m4 b/opal/mca/hwloc/hwloc1112/hwloc/config/hwloc_check_visibility.m4 similarity index 100% rename from opal/mca/hwloc/hwloc1110/hwloc/config/hwloc_check_visibility.m4 rename to opal/mca/hwloc/hwloc1112/hwloc/config/hwloc_check_visibility.m4 diff --git a/opal/mca/hwloc/hwloc1110/hwloc/config/hwloc_components.m4 b/opal/mca/hwloc/hwloc1112/hwloc/config/hwloc_components.m4 similarity index 100% rename from opal/mca/hwloc/hwloc1110/hwloc/config/hwloc_components.m4 rename to opal/mca/hwloc/hwloc1112/hwloc/config/hwloc_components.m4 diff --git a/opal/mca/hwloc/hwloc1110/hwloc/config/hwloc_get_version.sh b/opal/mca/hwloc/hwloc1112/hwloc/config/hwloc_get_version.sh similarity index 100% rename from opal/mca/hwloc/hwloc1110/hwloc/config/hwloc_get_version.sh rename to opal/mca/hwloc/hwloc1112/hwloc/config/hwloc_get_version.sh diff --git a/opal/mca/hwloc/hwloc1110/hwloc/config/hwloc_internal.m4 b/opal/mca/hwloc/hwloc1112/hwloc/config/hwloc_internal.m4 similarity index 100% rename from opal/mca/hwloc/hwloc1110/hwloc/config/hwloc_internal.m4 rename to opal/mca/hwloc/hwloc1112/hwloc/config/hwloc_internal.m4 diff --git a/opal/mca/hwloc/hwloc1110/hwloc/config/hwloc_pkg.m4 b/opal/mca/hwloc/hwloc1112/hwloc/config/hwloc_pkg.m4 similarity index 100% rename from opal/mca/hwloc/hwloc1110/hwloc/config/hwloc_pkg.m4 rename to opal/mca/hwloc/hwloc1112/hwloc/config/hwloc_pkg.m4 diff --git a/opal/mca/hwloc/hwloc1110/hwloc/config/test-driver b/opal/mca/hwloc/hwloc1112/hwloc/config/test-driver similarity index 100% rename from opal/mca/hwloc/hwloc1110/hwloc/config/test-driver rename to opal/mca/hwloc/hwloc1112/hwloc/config/test-driver diff --git a/opal/mca/hwloc/hwloc1110/hwloc/configure.ac b/opal/mca/hwloc/hwloc1112/hwloc/configure.ac similarity index 100% rename from opal/mca/hwloc/hwloc1110/hwloc/configure.ac rename to opal/mca/hwloc/hwloc1112/hwloc/configure.ac diff --git a/opal/mca/hwloc/hwloc1110/hwloc/contrib/hwloc-valgrind.supp b/opal/mca/hwloc/hwloc1112/hwloc/contrib/hwloc-valgrind.supp similarity index 100% rename from opal/mca/hwloc/hwloc1110/hwloc/contrib/hwloc-valgrind.supp rename to opal/mca/hwloc/hwloc1112/hwloc/contrib/hwloc-valgrind.supp diff --git a/opal/mca/hwloc/hwloc1112/hwloc/contrib/windows/README b/opal/mca/hwloc/hwloc1112/hwloc/contrib/windows/README new file mode 100644 index 00000000000..c7d8d472bb8 --- /dev/null +++ b/opal/mca/hwloc/hwloc1112/hwloc/contrib/windows/README @@ -0,0 +1,36 @@ +This Visual Studio support is experimental. +Thanks to Eloi Gaudry for contributing the first draft of files. + + +No PlatformToolset is specified, so that the default target is always +used during build. +That means your build may not be compatible with older systems. +Enforce a specific toolset before building if needed. + + +hwloc-assembler-remote is not built because it requires a hardwired +path to hwloc-assembler and it uses ssh. + +hwloc-compress-dir not built because needs work. + +hwloc-gather-topology is Linux specific. + +hwloc-ps is not built because it does nothing on Windows anyway. +Possible code proposed by Eloi Gaudry: + // Get the process list snapshot + HANDLE hProcessSnapShot = CreateToolhelp32Snapshot(TH32CS_SNAPALL, 0); + // Initialize the process entry structure + PROCESSENTRY32 ProcessEntry = { 0 } ; + ProcessEntry.dwSize = sizeof(ProcessEntry); + // Get the first process info + if (Process32First( hProcessSnapShot, &ProcessEntry)) { + do { + HANDLE hProcess = OpenProcess(PROCESS_QUERY_INFORMATION, FALSE, ProcessEntry.th32ProcessID) ; + if (hProcess) { + std::cerr << ProcessEntry.th32ProcessID << "(" << ProcessEntry.szExeFile << ")" << std::endl ; + } + // check the PROCESSENTRY32 for other members. + } while (Process32Next(hProcessSnapShot, &ProcessEntry)); + // Close the handle + CloseHandle( hProcessSnapShot ) ; + } diff --git a/opal/mca/hwloc/hwloc1112/hwloc/contrib/windows/hwloc-annotate.vcxproj b/opal/mca/hwloc/hwloc1112/hwloc/contrib/windows/hwloc-annotate.vcxproj new file mode 100644 index 00000000000..42e9ca0f1a3 --- /dev/null +++ b/opal/mca/hwloc/hwloc1112/hwloc/contrib/windows/hwloc-annotate.vcxproj @@ -0,0 +1,90 @@ + + + + {9DE76C6D-4773-4766-9F93-69C56166CB8F} + hwloc-bind + Win32Proj + + + + Release + x64 + + + + + Application + false + true + MultiByte + v110 + + + + + + + + + + false + true + $(ProjectName)-5 + $(SolutionDir)$(Platform)\$(Configuration)\ + $(SolutionDir)$(Platform)\$(Configuration)\ + + + + Level3 + + + MaxSpeed + true + MultiThreadedDLL + true + true + PSAPI_VERSION=1;WIN32;NDEBUG;WIN64;_WINDOWS;_USRDLL;_CRT_SECURE_NO_WARNINGS;%(PreprocessorDefinitions) + ..\..\include;..\..\utils\hwloc + + + $(IntDir)$(TargetFileName).intermediate.manifest + false + false + false + libcmt.lib;%(IgnoreSpecificDefaultLibraries) + true + $(OutDir)$(TargetName).exe + $(OutDir)$(ProjectName).pdb + LinkVerboseLib + Console + true + MachineX64 + NoErrorReport + $(SolutionDir)$(Platform)\Release;$(AdditionalLibraryDirectories) + libhwloc.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies) + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/opal/mca/hwloc/hwloc1112/hwloc/contrib/windows/hwloc-annotate.vcxproj.filters b/opal/mca/hwloc/hwloc1112/hwloc/contrib/windows/hwloc-annotate.vcxproj.filters new file mode 100644 index 00000000000..8c95a9b8ea8 --- /dev/null +++ b/opal/mca/hwloc/hwloc1112/hwloc/contrib/windows/hwloc-annotate.vcxproj.filters @@ -0,0 +1,69 @@ + + + + + {4FC737F1-C7A5-4376-A066-2A32D752A2FF} + cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx + + + {93995380-89BD-4b04-88EB-625FBE52EBFB} + h;hpp;hxx;hm;inl;inc;xsd + + + {67DA6AB6-F800-4c08-8B7A-83BB121AAD01} + rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms + + + + + Source Files + + + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + diff --git a/opal/mca/hwloc/hwloc1112/hwloc/contrib/windows/hwloc-assembler.vcxproj b/opal/mca/hwloc/hwloc1112/hwloc/contrib/windows/hwloc-assembler.vcxproj new file mode 100644 index 00000000000..aba819a5725 --- /dev/null +++ b/opal/mca/hwloc/hwloc1112/hwloc/contrib/windows/hwloc-assembler.vcxproj @@ -0,0 +1,90 @@ + + + + {9DE76C6D-4773-4766-9F93-69C56166CB9A} + hwloc-bind + Win32Proj + + + + Release + x64 + + + + + Application + false + true + MultiByte + v110 + + + + + + + + + + false + true + $(ProjectName)-5 + $(SolutionDir)$(Platform)\$(Configuration)\ + $(SolutionDir)$(Platform)\$(Configuration)\ + + + + Level3 + + + MaxSpeed + true + MultiThreadedDLL + true + true + PSAPI_VERSION=1;WIN32;NDEBUG;WIN64;_WINDOWS;_USRDLL;_CRT_SECURE_NO_WARNINGS;%(PreprocessorDefinitions) + ..\..\include;..\..\utils\hwloc + + + $(IntDir)$(TargetFileName).intermediate.manifest + false + false + false + libcmt.lib;%(IgnoreSpecificDefaultLibraries) + true + $(OutDir)$(TargetName).exe + $(OutDir)$(ProjectName).pdb + LinkVerboseLib + Console + true + MachineX64 + NoErrorReport + $(SolutionDir)$(Platform)\Release;$(AdditionalLibraryDirectories) + libhwloc.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies) + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/opal/mca/hwloc/hwloc1112/hwloc/contrib/windows/hwloc-assembler.vcxproj.filters b/opal/mca/hwloc/hwloc1112/hwloc/contrib/windows/hwloc-assembler.vcxproj.filters new file mode 100644 index 00000000000..2a1cb719c64 --- /dev/null +++ b/opal/mca/hwloc/hwloc1112/hwloc/contrib/windows/hwloc-assembler.vcxproj.filters @@ -0,0 +1,69 @@ + + + + + {4FC737F1-C7A5-4376-A066-2A32D752A2FF} + cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx + + + {93995380-89BD-4b04-88EB-625FBE52EBFB} + h;hpp;hxx;hm;inl;inc;xsd + + + {67DA6AB6-F800-4c08-8B7A-83BB121AAD01} + rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms + + + + + Source Files + + + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + diff --git a/opal/mca/hwloc/hwloc1112/hwloc/contrib/windows/hwloc-bind.vcxproj b/opal/mca/hwloc/hwloc1112/hwloc/contrib/windows/hwloc-bind.vcxproj new file mode 100644 index 00000000000..a544967d80f --- /dev/null +++ b/opal/mca/hwloc/hwloc1112/hwloc/contrib/windows/hwloc-bind.vcxproj @@ -0,0 +1,91 @@ + + + + {9DE76C6D-4773-4766-9F93-69C56166CB91} + hwloc-bind + Win32Proj + + + + Release + x64 + + + + + Application + false + true + MultiByte + v110 + + + + + + + + + + false + true + $(ProjectName)-5 + $(SolutionDir)$(Platform)\$(Configuration)\ + $(SolutionDir)$(Platform)\$(Configuration)\ + + + + Level3 + + + MaxSpeed + true + MultiThreadedDLL + true + true + PSAPI_VERSION=1;WIN32;NDEBUG;WIN64;_WINDOWS;_USRDLL;_CRT_SECURE_NO_WARNINGS;%(PreprocessorDefinitions) + ..\..\include;..\..\utils\hwloc + + + $(IntDir)$(TargetFileName).intermediate.manifest + false + false + false + libcmt.lib;%(IgnoreSpecificDefaultLibraries) + true + $(OutDir)$(TargetName).exe + $(OutDir)$(ProjectName).pdb + LinkVerboseLib + Console + true + MachineX64 + NoErrorReport + $(SolutionDir)$(Platform)\Release;$(AdditionalLibraryDirectories) + libhwloc.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies) + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/opal/mca/hwloc/hwloc1112/hwloc/contrib/windows/hwloc-bind.vcxproj.filters b/opal/mca/hwloc/hwloc1112/hwloc/contrib/windows/hwloc-bind.vcxproj.filters new file mode 100644 index 00000000000..dea4bf6ee0d --- /dev/null +++ b/opal/mca/hwloc/hwloc1112/hwloc/contrib/windows/hwloc-bind.vcxproj.filters @@ -0,0 +1,72 @@ + + + + + {4FC737F1-C7A5-4376-A066-2A32D752A2FF} + cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx + + + {93995380-89BD-4b04-88EB-625FBE52EBFB} + h;hpp;hxx;hm;inl;inc;xsd + + + {67DA6AB6-F800-4c08-8B7A-83BB121AAD01} + rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms + + + + + Source Files + + + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + diff --git a/opal/mca/hwloc/hwloc1112/hwloc/contrib/windows/hwloc-calc.vcxproj b/opal/mca/hwloc/hwloc1112/hwloc/contrib/windows/hwloc-calc.vcxproj new file mode 100644 index 00000000000..65e172d9bfb --- /dev/null +++ b/opal/mca/hwloc/hwloc1112/hwloc/contrib/windows/hwloc-calc.vcxproj @@ -0,0 +1,91 @@ + + + + {9DE76C6D-4773-4766-9F93-69C56166CB92} + hwloc-calc + Win32Proj + + + + Release + x64 + + + + + Application + false + true + MultiByte + v110 + + + + + + + + + + false + true + $(ProjectName)-5 + $(SolutionDir)$(Platform)\$(Configuration)\ + $(SolutionDir)$(Platform)\$(Configuration)\ + + + + Level3 + + + MaxSpeed + true + MultiThreadedDLL + true + true + PSAPI_VERSION=1;WIN32;NDEBUG;WIN64;_WINDOWS;_USRDLL;_CRT_SECURE_NO_WARNINGS;%(PreprocessorDefinitions) + ..\..\include;..\..\utils\hwloc + + + $(IntDir)$(TargetFileName).intermediate.manifest + false + false + false + libcmt.lib;%(IgnoreSpecificDefaultLibraries) + true + $(OutDir)$(TargetName).exe + $(OutDir)$(ProjectName).pdb + LinkVerboseLib + Console + true + MachineX64 + NoErrorReport + $(SolutionDir)$(Platform)\Release;$(AdditionalLibraryDirectories) + libhwloc.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies) + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/opal/mca/hwloc/hwloc1112/hwloc/contrib/windows/hwloc-calc.vcxproj.filters b/opal/mca/hwloc/hwloc1112/hwloc/contrib/windows/hwloc-calc.vcxproj.filters new file mode 100644 index 00000000000..96caf5dbf6a --- /dev/null +++ b/opal/mca/hwloc/hwloc1112/hwloc/contrib/windows/hwloc-calc.vcxproj.filters @@ -0,0 +1,72 @@ + + + + + {4FC737F1-C7A5-4376-A066-2A32D752A2FF} + cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx + + + {93995380-89BD-4b04-88EB-625FBE52EBFB} + h;hpp;hxx;hm;inl;inc;xsd + + + {67DA6AB6-F800-4c08-8B7A-83BB121AAD01} + rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms + + + + + Source Files + + + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + diff --git a/opal/mca/hwloc/hwloc1112/hwloc/contrib/windows/hwloc-diff.vcxproj b/opal/mca/hwloc/hwloc1112/hwloc/contrib/windows/hwloc-diff.vcxproj new file mode 100644 index 00000000000..c5d3750380b --- /dev/null +++ b/opal/mca/hwloc/hwloc1112/hwloc/contrib/windows/hwloc-diff.vcxproj @@ -0,0 +1,89 @@ + + + + {9DE76C6D-4773-4766-9F93-69C56166CB93} + hwloc-bind + Win32Proj + + + + Release + x64 + + + + + Application + false + true + MultiByte + v110 + + + + + + + + + + false + true + $(ProjectName)-5 + $(SolutionDir)$(Platform)\$(Configuration)\ + $(SolutionDir)$(Platform)\$(Configuration)\ + + + + Level3 + + + MaxSpeed + true + MultiThreadedDLL + true + true + PSAPI_VERSION=1;WIN32;NDEBUG;WIN64;_WINDOWS;_USRDLL;_CRT_SECURE_NO_WARNINGS;%(PreprocessorDefinitions) + ..\..\include;..\..\utils\hwloc + + + $(IntDir)$(TargetFileName).intermediate.manifest + false + false + false + libcmt.lib;%(IgnoreSpecificDefaultLibraries) + true + $(OutDir)$(TargetName).exe + $(OutDir)$(ProjectName).pdb + LinkVerboseLib + Console + true + MachineX64 + NoErrorReport + $(SolutionDir)$(Platform)\Release;$(AdditionalLibraryDirectories) + libhwloc.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies) + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/opal/mca/hwloc/hwloc1112/hwloc/contrib/windows/hwloc-diff.vcxproj.filters b/opal/mca/hwloc/hwloc1112/hwloc/contrib/windows/hwloc-diff.vcxproj.filters new file mode 100644 index 00000000000..7b648e90e9b --- /dev/null +++ b/opal/mca/hwloc/hwloc1112/hwloc/contrib/windows/hwloc-diff.vcxproj.filters @@ -0,0 +1,66 @@ + + + + + {4FC737F1-C7A5-4376-A066-2A32D752A2FF} + cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx + + + {93995380-89BD-4b04-88EB-625FBE52EBFB} + h;hpp;hxx;hm;inl;inc;xsd + + + {67DA6AB6-F800-4c08-8B7A-83BB121AAD01} + rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms + + + + + Source Files + + + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + diff --git a/opal/mca/hwloc/hwloc1112/hwloc/contrib/windows/hwloc-distances.vcxproj b/opal/mca/hwloc/hwloc1112/hwloc/contrib/windows/hwloc-distances.vcxproj new file mode 100644 index 00000000000..aadbb45d0eb --- /dev/null +++ b/opal/mca/hwloc/hwloc1112/hwloc/contrib/windows/hwloc-distances.vcxproj @@ -0,0 +1,90 @@ + + + + {9DE76C6D-4773-4766-9F93-69C56166CB94} + hwloc-bind + Win32Proj + + + + Release + x64 + + + + + Application + false + true + MultiByte + v110 + + + + + + + + + + false + true + $(ProjectName)-5 + $(SolutionDir)$(Platform)\$(Configuration)\ + $(SolutionDir)$(Platform)\$(Configuration)\ + + + + Level3 + + + MaxSpeed + true + MultiThreadedDLL + true + true + PSAPI_VERSION=1;WIN32;NDEBUG;WIN64;_WINDOWS;_USRDLL;_CRT_SECURE_NO_WARNINGS;%(PreprocessorDefinitions) + ..\..\include;..\..\utils\hwloc + + + $(IntDir)$(TargetFileName).intermediate.manifest + false + false + false + libcmt.lib;%(IgnoreSpecificDefaultLibraries) + true + $(OutDir)$(TargetName).exe + $(OutDir)$(ProjectName).pdb + LinkVerboseLib + Console + true + MachineX64 + NoErrorReport + $(SolutionDir)$(Platform)\Release;$(AdditionalLibraryDirectories) + libhwloc.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies) + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/opal/mca/hwloc/hwloc1112/hwloc/contrib/windows/hwloc-distances.vcxproj.filters b/opal/mca/hwloc/hwloc1112/hwloc/contrib/windows/hwloc-distances.vcxproj.filters new file mode 100644 index 00000000000..7a1af4833f1 --- /dev/null +++ b/opal/mca/hwloc/hwloc1112/hwloc/contrib/windows/hwloc-distances.vcxproj.filters @@ -0,0 +1,69 @@ + + + + + {4FC737F1-C7A5-4376-A066-2A32D752A2FF} + cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx + + + {93995380-89BD-4b04-88EB-625FBE52EBFB} + h;hpp;hxx;hm;inl;inc;xsd + + + {67DA6AB6-F800-4c08-8B7A-83BB121AAD01} + rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms + + + + + Source Files + + + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + diff --git a/opal/mca/hwloc/hwloc1112/hwloc/contrib/windows/hwloc-distrib.vcxproj b/opal/mca/hwloc/hwloc1112/hwloc/contrib/windows/hwloc-distrib.vcxproj new file mode 100644 index 00000000000..5407924ef36 --- /dev/null +++ b/opal/mca/hwloc/hwloc1112/hwloc/contrib/windows/hwloc-distrib.vcxproj @@ -0,0 +1,90 @@ + + + + {9DE76C6D-4773-4766-9F93-69C56166CB95} + hwloc-bind + Win32Proj + + + + Release + x64 + + + + + Application + false + true + MultiByte + v110 + + + + + + + + + + false + true + $(ProjectName)-5 + $(SolutionDir)$(Platform)\$(Configuration)\ + $(SolutionDir)$(Platform)\$(Configuration)\ + + + + Level3 + + + MaxSpeed + true + MultiThreadedDLL + true + true + PSAPI_VERSION=1;WIN32;NDEBUG;WIN64;_WINDOWS;_USRDLL;_CRT_SECURE_NO_WARNINGS;%(PreprocessorDefinitions) + ..\..\include;..\..\utils\hwloc + + + $(IntDir)$(TargetFileName).intermediate.manifest + false + false + false + libcmt.lib;%(IgnoreSpecificDefaultLibraries) + true + $(OutDir)$(TargetName).exe + $(OutDir)$(ProjectName).pdb + LinkVerboseLib + Console + true + MachineX64 + NoErrorReport + $(SolutionDir)$(Platform)\Release;$(AdditionalLibraryDirectories) + libhwloc.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies) + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/opal/mca/hwloc/hwloc1112/hwloc/contrib/windows/hwloc-distrib.vcxproj.filters b/opal/mca/hwloc/hwloc1112/hwloc/contrib/windows/hwloc-distrib.vcxproj.filters new file mode 100644 index 00000000000..7b77f48587a --- /dev/null +++ b/opal/mca/hwloc/hwloc1112/hwloc/contrib/windows/hwloc-distrib.vcxproj.filters @@ -0,0 +1,69 @@ + + + + + {4FC737F1-C7A5-4376-A066-2A32D752A2FF} + cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx + + + {93995380-89BD-4b04-88EB-625FBE52EBFB} + h;hpp;hxx;hm;inl;inc;xsd + + + {67DA6AB6-F800-4c08-8B7A-83BB121AAD01} + rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms + + + + + Source Files + + + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + diff --git a/opal/mca/hwloc/hwloc1112/hwloc/contrib/windows/hwloc-info.vcxproj b/opal/mca/hwloc/hwloc1112/hwloc/contrib/windows/hwloc-info.vcxproj new file mode 100644 index 00000000000..ce01efe074c --- /dev/null +++ b/opal/mca/hwloc/hwloc1112/hwloc/contrib/windows/hwloc-info.vcxproj @@ -0,0 +1,91 @@ + + + + {9DE76C6D-4773-4766-9F93-69C56166CB96} + hwloc-info + Win32Proj + + + + Release + x64 + + + + + Application + false + true + MultiByte + v110 + + + + + + + + + + false + true + $(ProjectName)-5 + $(SolutionDir)$(Platform)\$(Configuration)\ + $(SolutionDir)$(Platform)\$(Configuration)\ + + + + Level3 + + + MaxSpeed + true + MultiThreadedDLL + true + true + PSAPI_VERSION=1;WIN32;NDEBUG;WIN64;_WINDOWS;_USRDLL;_CRT_SECURE_NO_WARNINGS;%(PreprocessorDefinitions) + ..\..\include;..\..\utils\hwloc + + + $(IntDir)$(TargetFileName).intermediate.manifest + false + false + false + libcmt.lib;%(IgnoreSpecificDefaultLibraries) + true + $(OutDir)$(TargetName).exe + $(OutDir)$(ProjectName).pdb + LinkVerboseLib + Console + true + MachineX64 + NoErrorReport + $(SolutionDir)$(Platform)\Release;$(AdditionalLibraryDirectories) + libhwloc.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies) + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/opal/mca/hwloc/hwloc1112/hwloc/contrib/windows/hwloc-info.vcxproj.filters b/opal/mca/hwloc/hwloc1112/hwloc/contrib/windows/hwloc-info.vcxproj.filters new file mode 100644 index 00000000000..9769af14a1d --- /dev/null +++ b/opal/mca/hwloc/hwloc1112/hwloc/contrib/windows/hwloc-info.vcxproj.filters @@ -0,0 +1,72 @@ + + + + + {4FC737F1-C7A5-4376-A066-2A32D752A2FF} + cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx + + + {93995380-89BD-4b04-88EB-625FBE52EBFB} + h;hpp;hxx;hm;inl;inc;xsd + + + {67DA6AB6-F800-4c08-8B7A-83BB121AAD01} + rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms + + + + + Source Files + + + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + diff --git a/opal/mca/hwloc/hwloc1112/hwloc/contrib/windows/hwloc-patch.vcxproj b/opal/mca/hwloc/hwloc1112/hwloc/contrib/windows/hwloc-patch.vcxproj new file mode 100644 index 00000000000..64faf426e4b --- /dev/null +++ b/opal/mca/hwloc/hwloc1112/hwloc/contrib/windows/hwloc-patch.vcxproj @@ -0,0 +1,89 @@ + + + + {9DE76C6D-4773-4766-9F93-69C56166CB97} + hwloc-bind + Win32Proj + + + + Release + x64 + + + + + Application + false + true + MultiByte + v110 + + + + + + + + + + false + true + $(ProjectName)-5 + $(SolutionDir)$(Platform)\$(Configuration)\ + $(SolutionDir)$(Platform)\$(Configuration)\ + + + + Level3 + + + MaxSpeed + true + MultiThreadedDLL + true + true + PSAPI_VERSION=1;WIN32;NDEBUG;WIN64;_WINDOWS;_USRDLL;_CRT_SECURE_NO_WARNINGS;%(PreprocessorDefinitions) + ..\..\include;..\..\utils\hwloc + + + $(IntDir)$(TargetFileName).intermediate.manifest + false + false + false + libcmt.lib;%(IgnoreSpecificDefaultLibraries) + true + $(OutDir)$(TargetName).exe + $(OutDir)$(ProjectName).pdb + LinkVerboseLib + Console + true + MachineX64 + NoErrorReport + $(SolutionDir)$(Platform)\Release;$(AdditionalLibraryDirectories) + libhwloc.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies) + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/opal/mca/hwloc/hwloc1112/hwloc/contrib/windows/hwloc-patch.vcxproj.filters b/opal/mca/hwloc/hwloc1112/hwloc/contrib/windows/hwloc-patch.vcxproj.filters new file mode 100644 index 00000000000..de6769da896 --- /dev/null +++ b/opal/mca/hwloc/hwloc1112/hwloc/contrib/windows/hwloc-patch.vcxproj.filters @@ -0,0 +1,66 @@ + + + + + {4FC737F1-C7A5-4376-A066-2A32D752A2FF} + cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx + + + {93995380-89BD-4b04-88EB-625FBE52EBFB} + h;hpp;hxx;hm;inl;inc;xsd + + + {67DA6AB6-F800-4c08-8B7A-83BB121AAD01} + rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms + + + + + Source Files + + + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + diff --git a/opal/mca/hwloc/hwloc1112/hwloc/contrib/windows/hwloc.sln b/opal/mca/hwloc/hwloc1112/hwloc/contrib/windows/hwloc.sln new file mode 100644 index 00000000000..bfb53e27dbf --- /dev/null +++ b/opal/mca/hwloc/hwloc1112/hwloc/contrib/windows/hwloc.sln @@ -0,0 +1,121 @@ + +Microsoft Visual Studio Solution File, Format Version 12.00 +# Visual Studio Express 2012 for Windows Desktop +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "libhwloc", "libhwloc.vcxproj", "{9DE76C6D-4773-4766-9F93-69C56166CB8D}" +EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "lstopo-no-graphics", "lstopo-no-graphics.vcxproj", "{9DE76C6D-4773-4766-9F93-69C56166CB8E}" + ProjectSection(ProjectDependencies) = postProject + {9DE76C6D-4773-4766-9F93-69C56166CB8D} = {9DE76C6D-4773-4766-9F93-69C56166CB8D} + EndProjectSection +EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "hwloc-annotate", "hwloc-annotate.vcxproj", "{9DE76C6D-4773-4766-9F93-69C56166CB8F}" + ProjectSection(ProjectDependencies) = postProject + {9DE76C6D-4773-4766-9F93-69C56166CB8D} = {9DE76C6D-4773-4766-9F93-69C56166CB8D} + EndProjectSection +EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "hwloc-assembler", "hwloc-assembler.vcxproj", "{9DE76C6D-4773-4766-9F93-69C56166CB90}" + ProjectSection(ProjectDependencies) = postProject + {9DE76C6D-4773-4766-9F93-69C56166CB8D} = {9DE76C6D-4773-4766-9F93-69C56166CB8D} + EndProjectSection +EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "hwloc-bind", "hwloc-bind.vcxproj", "{9DE76C6D-4773-4766-9F93-69C56166CB91}" + ProjectSection(ProjectDependencies) = postProject + {9DE76C6D-4773-4766-9F93-69C56166CB8D} = {9DE76C6D-4773-4766-9F93-69C56166CB8D} + EndProjectSection +EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "hwloc-calc", "hwloc-calc.vcxproj", "{9DE76C6D-4773-4766-9F93-69C56166CB92}" + ProjectSection(ProjectDependencies) = postProject + {9DE76C6D-4773-4766-9F93-69C56166CB8D} = {9DE76C6D-4773-4766-9F93-69C56166CB8D} + EndProjectSection +EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "hwloc-diff", "hwloc-diff.vcxproj", "{9DE76C6D-4773-4766-9F93-69C56166CB93}" + ProjectSection(ProjectDependencies) = postProject + {9DE76C6D-4773-4766-9F93-69C56166CB8D} = {9DE76C6D-4773-4766-9F93-69C56166CB8D} + EndProjectSection +EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "hwloc-distances", "hwloc-distances.vcxproj", "{9DE76C6D-4773-4766-9F93-69C56166CB94}" + ProjectSection(ProjectDependencies) = postProject + {9DE76C6D-4773-4766-9F93-69C56166CB8D} = {9DE76C6D-4773-4766-9F93-69C56166CB8D} + EndProjectSection +EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "hwloc-distrib", "hwloc-distrib.vcxproj", "{9DE76C6D-4773-4766-9F93-69C56166CB95}" + ProjectSection(ProjectDependencies) = postProject + {9DE76C6D-4773-4766-9F93-69C56166CB8D} = {9DE76C6D-4773-4766-9F93-69C56166CB8D} + EndProjectSection +EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "hwloc-info", "hwloc-info.vcxproj", "{9DE76C6D-4773-4766-9F93-69C56166CB96}" + ProjectSection(ProjectDependencies) = postProject + {9DE76C6D-4773-4766-9F93-69C56166CB8D} = {9DE76C6D-4773-4766-9F93-69C56166CB8D} + EndProjectSection +EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "hwloc-patch", "hwloc-patch.vcxproj", "{9DE76C6D-4773-4766-9F93-69C56166CB97}" + ProjectSection(ProjectDependencies) = postProject + {9DE76C6D-4773-4766-9F93-69C56166CB8D} = {9DE76C6D-4773-4766-9F93-69C56166CB8D} + EndProjectSection +EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "lstopo", "lstopo.vcxproj", "{9DE76C6D-4773-4766-9F93-69C56166CB98}" + ProjectSection(ProjectDependencies) = postProject + {9DE76C6D-4773-4766-9F93-69C56166CB8E} = {9DE76C6D-4773-4766-9F93-69C56166CB8E} + EndProjectSection +EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "lstopo-win", "lstopo-win.vcxproj", "{9DE76C6D-4773-4766-9F93-69C56166CB99}" + ProjectSection(ProjectDependencies) = postProject + {9DE76C6D-4773-4766-9F93-69C56166CB98} = {9DE76C6D-4773-4766-9F93-69C56166CB98} + EndProjectSection +EndProject +Global + GlobalSection(SolutionConfigurationPlatforms) = preSolution + Release|x64 = Release|x64 + ReleaseStatic|x64 = ReleaseStatic|x64 + EndGlobalSection + GlobalSection(ProjectConfigurationPlatforms) = postSolution + {9DE76C6D-4773-4766-9F93-69C56166CB8D}.Release|x64.ActiveCfg = Release|x64 + {9DE76C6D-4773-4766-9F93-69C56166CB8D}.Release|x64.Build.0 = Release|x64 + {9DE76C6D-4773-4766-9F93-69C56166CB8D}.ReleaseStatic|x64.ActiveCfg = ReleaseStatic|x64 + {9DE76C6D-4773-4766-9F93-69C56166CB8D}.ReleaseStatic|x64.Build.0 = ReleaseStatic|x64 + {9DE76C6D-4773-4766-9F93-69C56166CB8E}.Release|x64.ActiveCfg = Release|x64 + {9DE76C6D-4773-4766-9F93-69C56166CB8E}.Release|x64.Build.0 = Release|x64 + {9DE76C6D-4773-4766-9F93-69C56166CB8E}.ReleaseStatic|x64.ActiveCfg = Release|x64 + {9DE76C6D-4773-4766-9F93-69C56166CB8F}.Release|x64.ActiveCfg = Release|x64 + {9DE76C6D-4773-4766-9F93-69C56166CB8F}.Release|x64.Build.0 = Release|x64 + {9DE76C6D-4773-4766-9F93-69C56166CB8F}.ReleaseStatic|x64.ActiveCfg = Release|x64 + {9DE76C6D-4773-4766-9F93-69C56166CB90}.Release|x64.ActiveCfg = Release|x64 + {9DE76C6D-4773-4766-9F93-69C56166CB90}.Release|x64.Build.0 = Release|x64 + {9DE76C6D-4773-4766-9F93-69C56166CB90}.ReleaseStatic|x64.ActiveCfg = Release|x64 + {9DE76C6D-4773-4766-9F93-69C56166CB91}.Release|x64.ActiveCfg = Release|x64 + {9DE76C6D-4773-4766-9F93-69C56166CB91}.Release|x64.Build.0 = Release|x64 + {9DE76C6D-4773-4766-9F93-69C56166CB91}.ReleaseStatic|x64.ActiveCfg = Release|x64 + {9DE76C6D-4773-4766-9F93-69C56166CB92}.Release|x64.ActiveCfg = Release|x64 + {9DE76C6D-4773-4766-9F93-69C56166CB92}.Release|x64.Build.0 = Release|x64 + {9DE76C6D-4773-4766-9F93-69C56166CB92}.ReleaseStatic|x64.ActiveCfg = Release|x64 + {9DE76C6D-4773-4766-9F93-69C56166CB93}.Release|x64.ActiveCfg = Release|x64 + {9DE76C6D-4773-4766-9F93-69C56166CB93}.Release|x64.Build.0 = Release|x64 + {9DE76C6D-4773-4766-9F93-69C56166CB93}.ReleaseStatic|x64.ActiveCfg = Release|x64 + {9DE76C6D-4773-4766-9F93-69C56166CB94}.Release|x64.ActiveCfg = Release|x64 + {9DE76C6D-4773-4766-9F93-69C56166CB94}.Release|x64.Build.0 = Release|x64 + {9DE76C6D-4773-4766-9F93-69C56166CB94}.ReleaseStatic|x64.ActiveCfg = Release|x64 + {9DE76C6D-4773-4766-9F93-69C56166CB95}.Release|x64.ActiveCfg = Release|x64 + {9DE76C6D-4773-4766-9F93-69C56166CB95}.Release|x64.Build.0 = Release|x64 + {9DE76C6D-4773-4766-9F93-69C56166CB95}.ReleaseStatic|x64.ActiveCfg = Release|x64 + {9DE76C6D-4773-4766-9F93-69C56166CB96}.Release|x64.ActiveCfg = Release|x64 + {9DE76C6D-4773-4766-9F93-69C56166CB96}.Release|x64.Build.0 = Release|x64 + {9DE76C6D-4773-4766-9F93-69C56166CB96}.ReleaseStatic|x64.ActiveCfg = Release|x64 + {9DE76C6D-4773-4766-9F93-69C56166CB97}.Release|x64.ActiveCfg = Release|x64 + {9DE76C6D-4773-4766-9F93-69C56166CB97}.Release|x64.Build.0 = Release|x64 + {9DE76C6D-4773-4766-9F93-69C56166CB97}.ReleaseStatic|x64.ActiveCfg = Release|x64 + {9DE76C6D-4773-4766-9F93-69C56166CB98}.Release|x64.ActiveCfg = Release|x64 + {9DE76C6D-4773-4766-9F93-69C56166CB98}.Release|x64.Build.0 = Release|x64 + {9DE76C6D-4773-4766-9F93-69C56166CB98}.ReleaseStatic|x64.ActiveCfg = Release|x64 + {9DE76C6D-4773-4766-9F93-69C56166CB99}.Release|x64.ActiveCfg = Release|x64 + {9DE76C6D-4773-4766-9F93-69C56166CB99}.Release|x64.Build.0 = Release|x64 + {9DE76C6D-4773-4766-9F93-69C56166CB99}.ReleaseStatic|x64.ActiveCfg = Release|x64 + {9DE76C6D-4773-4766-9F93-69C56166CB99}.ReleaseStatic|x64.Build.0 = Release|x64 + {9DE76C6D-4773-4766-9F93-69C56166CB9A}.Release|x64.ActiveCfg = Release|x64 + {9DE76C6D-4773-4766-9F93-69C56166CB9A}.Release|x64.Build.0 = Release|x64 + {9DE76C6D-4773-4766-9F93-69C56166CB9A}.ReleaseStatic|x64.ActiveCfg = Release|x64 + {9DE76C6D-4773-4766-9F93-69C56166CB9A}.ReleaseStatic|x64.Build.0 = Release|x64 + EndGlobalSection + GlobalSection(SolutionProperties) = preSolution + HideSolutionNode = FALSE + EndGlobalSection +EndGlobal diff --git a/opal/mca/hwloc/hwloc1112/hwloc/contrib/windows/hwloc_config.h b/opal/mca/hwloc/hwloc1112/hwloc/contrib/windows/hwloc_config.h new file mode 100644 index 00000000000..49237163b0a --- /dev/null +++ b/opal/mca/hwloc/hwloc1112/hwloc/contrib/windows/hwloc_config.h @@ -0,0 +1,62 @@ +/* + * Copyright © 2009 CNRS + * Copyright © 2009-2014 Inria. All rights reserved. + * Copyright © 2009-2012 Université Bordeaux + * Copyright © 2009-2011 Cisco Systems, Inc. All rights reserved. + * See COPYING in top-level directory. + */ + +/* The configuration file */ + +#ifndef HWLOC_CONFIG_H +#define HWLOC_CONFIG_H + +#define __hwloc_restrict +#define __hwloc_inline __inline + +#define __hwloc_attribute_unused +#define __hwloc_attribute_malloc +#define __hwloc_attribute_const +#define __hwloc_attribute_pure +#define __hwloc_attribute_deprecated +#define __hwloc_attribute_may_alias + +/* Defined to 1 if you have the `windows.h' header. */ +#define HWLOC_HAVE_WINDOWS_H 1 +#define hwloc_pid_t HANDLE +#define hwloc_thread_t HANDLE + +#include +#include +typedef DWORDLONG hwloc_uint64_t; +typedef SSIZE_T ssize_t; +#define snprintf _snprintf +#define strcasecmp _stricmp +#define strncasecmp _strnicmp +#define strdup _strdup +#define strtoull _strtoui64 +#define strtoll _strtoi64 +#define S_ISREG(m) ((m)&_S_IFREG) +#define S_ISDIR( m ) (((m) & S_IFMT) == S_IFDIR) +#define putenv _putenv + +#if defined( _USRDLL ) /* dynamic linkage */ +#if defined( DECLSPEC_EXPORTS ) +#define HWLOC_DECLSPEC __declspec(dllexport) +#else +#define HWLOC_DECLSPEC __declspec(dllimport) +#endif +#else /* static linkage */ +#define HWLOC_DECLSPEC +#endif + +/* Whether we need to re-define all the hwloc public symbols or not */ +#define HWLOC_SYM_TRANSFORM 0 + +/* The hwloc symbol prefix */ +#define HWLOC_SYM_PREFIX hwloc_ + +/* The hwloc symbol prefix in all caps */ +#define HWLOC_SYM_PREFIX_CAPS HWLOC_ + +#endif /* HWLOC_CONFIG_H */ diff --git a/opal/mca/hwloc/hwloc1112/hwloc/contrib/windows/libhwloc.vcxproj b/opal/mca/hwloc/hwloc1112/hwloc/contrib/windows/libhwloc.vcxproj new file mode 100644 index 00000000000..e1475e6df93 --- /dev/null +++ b/opal/mca/hwloc/hwloc1112/hwloc/contrib/windows/libhwloc.vcxproj @@ -0,0 +1,238 @@ + + + + {9DE76C6D-4773-4766-9F93-69C56166CB8D} + libhwloc + Win32Proj + + + + DebugStatic + x64 + + + DebugDll + x64 + + + ReleaseStatic + x64 + + + Release + x64 + + + + + DynamicLibrary + true + MultiByte + v110 + + + StaticLibrary + true + MultiByte + v110 + + + DynamicLibrary + false + true + MultiByte + v110 + + + StaticLibrary + false + true + MultiByte + v110 + + + + + + + + + + + + + + + + + + + false + true + $(ProjectName)-5 + $(SolutionDir)$(Platform)\$(Configuration)\ + $(SolutionDir)$(Platform)\$(Configuration)\ + + + false + true + $(ProjectName)-5 + $(SolutionDir)$(Platform)\$(Configuration)\ + $(SolutionDir)$(Platform)\$(Configuration)\ + + + false + true + $(ProjectName)-5 + $(SolutionDir)$(Platform)\$(Configuration)\ + $(SolutionDir)$(Platform)\$(Configuration)\ + + + false + true + $(ProjectName)-5 + $(SolutionDir)$(Platform)\$(Configuration)\ + $(SolutionDir)$(Platform)\$(Configuration)\ + + + + copy /V /Y private_config.h ..\..\include\private\autogen\config.h && copy /V /Y hwloc_config.h ..\..\include\hwloc\autogen\config.h && copy /V /Y static-components.h ..\..\src\static-components.h + + + + + Level3 + Disabled + WIN32;WIN64;_DEBUG;_WINDOWS;_USRDLL;_CRT_SECURE_NO_WARNINGS;%(PreprocessorDefinitions) + ..\..\include;%(AdditionalIncludeDirectories) + + + Console + true + + + + + copy /V /Y private_config.h ..\..\include\private\autogen\config.h && copy /V /Y hwloc_config.h ..\..\include\hwloc\autogen\config.h && copy /V /Y static-components.h ..\..\src\static-components.h + + + + + Level3 + Disabled + WIN32;WIN64;_DEBUG;_WINDOWS;_CRT_SECURE_NO_WARNINGS;%(PreprocessorDefinitions) + ..\..\include;%(AdditionalIncludeDirectories) + + + Console + true + + + + + copy /V /Y private_config.h ..\..\include\private\autogen\config.h && copy /V /Y hwloc_config.h ..\..\include\hwloc\autogen\config.h && copy /V /Y static-components.h ..\..\src\static-components.h + + + Level3 + + + MaxSpeed + true + MultiThreadedDLL + true + true + PSAPI_VERSION=1;WIN32;NDEBUG;WIN64;_WINDOWS;_USRDLL;_CRT_SECURE_NO_WARNINGS;%(PreprocessorDefinitions) + ..\..\include;..\..\src + + + $(IntDir)$(TargetFileName).intermediate.manifest + false + false + false + libcmt.lib;%(IgnoreSpecificDefaultLibraries) + $(OutDir)$(ProjectName).lib + true + $(OutDir)$(TargetName).dll + $(OutDir)$(ProjectName).pdb + LinkVerboseLib + Console + true + MachineX64 + NoErrorReport + + + + + copy /V /Y private_config.h ..\..\include\private\autogen\config.h && copy /V /Y hwloc_config.h ..\..\include\hwloc\autogen\config.h && copy /V /Y static-components.h ..\..\src\static-components.h + + + Level3 + + + MaxSpeed + true + MultiThreadedDLL + true + true + PSAPI_VERSION=1;WIN32;NDEBUG;WIN64;_WINDOWS;_CRT_SECURE_NO_WARNINGS;%(PreprocessorDefinitions) + ..\..\include;..\..\src + + + $(IntDir)$(TargetFileName).intermediate.manifest + false + false + false + libcmt.lib;%(IgnoreSpecificDefaultLibraries) + $(OutDir)$(ProjectName).lib + true + $(OutDir)$(TargetName).dll + $(OutDir)$(ProjectName).pdb + LinkVerboseLib + Console + true + MachineX64 + NoErrorReport + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/opal/mca/hwloc/hwloc1112/hwloc/contrib/windows/libhwloc.vcxproj.filters b/opal/mca/hwloc/hwloc1112/hwloc/contrib/windows/libhwloc.vcxproj.filters new file mode 100644 index 00000000000..77b4f779015 --- /dev/null +++ b/opal/mca/hwloc/hwloc1112/hwloc/contrib/windows/libhwloc.vcxproj.filters @@ -0,0 +1,123 @@ + + + + + {4FC737F1-C7A5-4376-A066-2A32D752A2FF} + cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx + + + {93995380-89BD-4b04-88EB-625FBE52EBFB} + h;hpp;hxx;hm;inl;inc;xsd + + + {67DA6AB6-F800-4c08-8B7A-83BB121AAD01} + rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms + + + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + diff --git a/opal/mca/hwloc/hwloc1112/hwloc/contrib/windows/lstopo-no-graphics.vcxproj b/opal/mca/hwloc/hwloc1112/hwloc/contrib/windows/lstopo-no-graphics.vcxproj new file mode 100644 index 00000000000..48659e8049a --- /dev/null +++ b/opal/mca/hwloc/hwloc1112/hwloc/contrib/windows/lstopo-no-graphics.vcxproj @@ -0,0 +1,97 @@ + + + + {9DE76C6D-4773-4766-9F93-69C56166CB8E} + lstopo-no-graphics + Win32Proj + + + + Release + x64 + + + + + Application + false + true + MultiByte + v110 + + + + + + + + + + false + true + $(ProjectName)-5 + $(SolutionDir)$(Platform)\$(Configuration)\ + $(SolutionDir)$(Platform)\$(Configuration)\ + + + + Level3 + + + MaxSpeed + true + MultiThreadedDLL + true + true + PSAPI_VERSION=1;WIN32;NDEBUG;WIN64;_WINDOWS;_USRDLL;_CRT_SECURE_NO_WARNINGS;%(PreprocessorDefinitions) + ..\..\include;..\..\utils\hwloc;..\..\utils\lstopo + + + $(IntDir)$(TargetFileName).intermediate.manifest + false + false + false + libcmt.lib;%(IgnoreSpecificDefaultLibraries) + true + $(OutDir)$(TargetName).exe + $(OutDir)$(ProjectName).pdb + LinkVerboseLib + Console + true + MachineX64 + NoErrorReport + $(SolutionDir)$(Platform)\Release;$(AdditionalLibraryDirectories) + libhwloc.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies) + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/opal/mca/hwloc/hwloc1112/hwloc/contrib/windows/lstopo-no-graphics.vcxproj.filters b/opal/mca/hwloc/hwloc1112/hwloc/contrib/windows/lstopo-no-graphics.vcxproj.filters new file mode 100644 index 00000000000..d888f3a1836 --- /dev/null +++ b/opal/mca/hwloc/hwloc1112/hwloc/contrib/windows/lstopo-no-graphics.vcxproj.filters @@ -0,0 +1,90 @@ + + + + + {4FC737F1-C7A5-4376-A066-2A32D752A2FF} + cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx + + + {93995380-89BD-4b04-88EB-625FBE52EBFB} + h;hpp;hxx;hm;inl;inc;xsd + + + {67DA6AB6-F800-4c08-8B7A-83BB121AAD01} + rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms + + + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + diff --git a/opal/mca/hwloc/hwloc1112/hwloc/contrib/windows/lstopo-win.vcxproj b/opal/mca/hwloc/hwloc1112/hwloc/contrib/windows/lstopo-win.vcxproj new file mode 100644 index 00000000000..fdbefca8ccc --- /dev/null +++ b/opal/mca/hwloc/hwloc1112/hwloc/contrib/windows/lstopo-win.vcxproj @@ -0,0 +1,99 @@ + + + + {9DE76C6D-4773-4766-9F93-69C56166CB99} + lstopo-no-graphics + Win32Proj + + + + Release + x64 + + + + + Application + false + true + MultiByte + v110 + + + + + + + + + + false + true + $(ProjectName)-5 + $(SolutionDir)$(Platform)\$(Configuration)\ + $(SolutionDir)$(Platform)\$(Configuration)\ + + + + Level3 + + + MaxSpeed + true + MultiThreadedDLL + true + true + LSTOPO_HAVE_GRAPHICS=1;PSAPI_VERSION=1;WIN32;NDEBUG;WIN64;_WINDOWS;_USRDLL;_CRT_SECURE_NO_WARNINGS;%(PreprocessorDefinitions) + ..\..\include;..\..\utils\hwloc;..\..\utils\lstopo + + + $(IntDir)$(TargetFileName).intermediate.manifest + false + false + false + libcmt.lib;%(IgnoreSpecificDefaultLibraries) + true + $(OutDir)$(TargetName).exe + $(OutDir)$(ProjectName).pdb + LinkVerboseLib + Windows + true + MachineX64 + NoErrorReport + $(SolutionDir)$(Platform)\Release;$(AdditionalLibraryDirectories) + libhwloc.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies) + mainCRTStartup + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/opal/mca/hwloc/hwloc1112/hwloc/contrib/windows/lstopo-win.vcxproj.filters b/opal/mca/hwloc/hwloc1112/hwloc/contrib/windows/lstopo-win.vcxproj.filters new file mode 100644 index 00000000000..4ce0ed87fbe --- /dev/null +++ b/opal/mca/hwloc/hwloc1112/hwloc/contrib/windows/lstopo-win.vcxproj.filters @@ -0,0 +1,93 @@ + + + + + {4FC737F1-C7A5-4376-A066-2A32D752A2FF} + cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx + + + {93995380-89BD-4b04-88EB-625FBE52EBFB} + h;hpp;hxx;hm;inl;inc;xsd + + + {67DA6AB6-F800-4c08-8B7A-83BB121AAD01} + rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms + + + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + diff --git a/opal/mca/hwloc/hwloc1112/hwloc/contrib/windows/lstopo.vcxproj b/opal/mca/hwloc/hwloc1112/hwloc/contrib/windows/lstopo.vcxproj new file mode 100644 index 00000000000..ba75246409a --- /dev/null +++ b/opal/mca/hwloc/hwloc1112/hwloc/contrib/windows/lstopo.vcxproj @@ -0,0 +1,98 @@ + + + + {9DE76C6D-4773-4766-9F93-69C56166CB98} + lstopo-no-graphics + Win32Proj + + + + Release + x64 + + + + + Application + false + true + MultiByte + v110 + + + + + + + + + + false + true + $(ProjectName)-5 + $(SolutionDir)$(Platform)\$(Configuration)\ + $(SolutionDir)$(Platform)\$(Configuration)\ + + + + Level3 + + + MaxSpeed + true + MultiThreadedDLL + true + true + LSTOPO_HAVE_GRAPHICS=1;PSAPI_VERSION=1;WIN32;NDEBUG;WIN64;_WINDOWS;_USRDLL;_CRT_SECURE_NO_WARNINGS;%(PreprocessorDefinitions) + ..\..\include;..\..\utils\hwloc;..\..\utils\lstopo + + + $(IntDir)$(TargetFileName).intermediate.manifest + false + false + false + libcmt.lib;%(IgnoreSpecificDefaultLibraries) + true + $(OutDir)$(TargetName).exe + $(OutDir)$(ProjectName).pdb + LinkVerboseLib + Console + true + MachineX64 + NoErrorReport + $(SolutionDir)$(Platform)\Release;$(AdditionalLibraryDirectories) + libhwloc.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies) + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/opal/mca/hwloc/hwloc1112/hwloc/contrib/windows/lstopo.vcxproj.filters b/opal/mca/hwloc/hwloc1112/hwloc/contrib/windows/lstopo.vcxproj.filters new file mode 100644 index 00000000000..4ce0ed87fbe --- /dev/null +++ b/opal/mca/hwloc/hwloc1112/hwloc/contrib/windows/lstopo.vcxproj.filters @@ -0,0 +1,93 @@ + + + + + {4FC737F1-C7A5-4376-A066-2A32D752A2FF} + cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx + + + {93995380-89BD-4b04-88EB-625FBE52EBFB} + h;hpp;hxx;hm;inl;inc;xsd + + + {67DA6AB6-F800-4c08-8B7A-83BB121AAD01} + rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms + + + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + diff --git a/opal/mca/hwloc/hwloc1112/hwloc/contrib/windows/private_config.h b/opal/mca/hwloc/hwloc1112/hwloc/contrib/windows/private_config.h new file mode 100644 index 00000000000..942aed787e6 --- /dev/null +++ b/opal/mca/hwloc/hwloc1112/hwloc/contrib/windows/private_config.h @@ -0,0 +1,674 @@ +/* + * Copyright © 2009, 2011, 2012 CNRS. All rights reserved. + * Copyright © 2009-2015 Inria. All rights reserved. + * Copyright © 2009, 2011, 2012, 2015 Université Bordeaux. All rights reserved. + * Copyright © 2009 Cisco Systems, Inc. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#ifndef HWLOC_CONFIGURE_H +#define HWLOC_CONFIGURE_H + +#define DECLSPEC_EXPORTS + +#define HWLOC_HAVE_MSVC_CPUIDEX 1 + +/* Define to 1 if the system has the type `CACHE_DESCRIPTOR'. */ +#define HAVE_CACHE_DESCRIPTOR 0 + +/* Define to 1 if the system has the type `CACHE_RELATIONSHIP'. */ +#define HAVE_CACHE_RELATIONSHIP 0 + +/* Define to 1 if you have the `clz' function. */ +/* #undef HAVE_CLZ */ + +/* Define to 1 if you have the `clzl' function. */ +/* #undef HAVE_CLZL */ + +/* Define to 1 if you have the header file. */ +/* #undef HAVE_CL_CL_EXT_H */ + +/* Define to 1 if you have the `cpuset_setaffinity' function. */ +/* #undef HAVE_CPUSET_SETAFFINITY */ + +/* Define to 1 if you have the `cpuset_setid' function. */ +/* #undef HAVE_CPUSET_SETID */ + +/* Define to 1 if we have -lcuda */ +/* #undef HAVE_CUDA */ + +/* Define to 1 if you have the header file. */ +/* #undef HAVE_CUDA_H */ + +/* Define to 1 if you have the header file. */ +/* #undef HAVE_CUDA_RUNTIME_API_H */ + +/* Define to 1 if you have the declaration of `CL_DEVICE_TOPOLOGY_AMD', and to + 0 if you don't. */ +/* #undef HAVE_DECL_CL_DEVICE_TOPOLOGY_AMD */ + +/* Define to 1 if you have the declaration of `CTL_HW', and to 0 if you don't. + */ +/* #undef HAVE_DECL_CTL_HW */ + +/* Define to 1 if you have the declaration of `fabsf', and to 0 if you don't. + */ +#define HAVE_DECL_FABSF 1 + +/* Define to 1 if you have the declaration of `HW_NCPU', and to 0 if you + don't. */ +/* #undef HAVE_DECL_HW_NCPU */ + +/* Define to 1 if you have the declaration of + `nvmlDeviceGetMaxPcieLinkGeneration', and to 0 if you don't. */ +/* #undef HAVE_DECL_NVMLDEVICEGETMAXPCIELINKGENERATION */ + +/* Define to 1 if you have the declaration of `pthread_getaffinity_np', and to + 0 if you don't. */ +#define HAVE_DECL_PTHREAD_GETAFFINITY_NP 0 + +/* Define to 1 if you have the declaration of `pthread_setaffinity_np', and to + 0 if you don't. */ +#define HAVE_DECL_PTHREAD_SETAFFINITY_NP 0 + +/* Define to 1 if you have the declaration of `strtoull', and to 0 if you + don't. */ +#define HAVE_DECL_STRTOULL 1 + +/* Define to 1 if you have the declaration of `_SC_LARGE_PAGESIZE', and to 0 + if you don't. */ +#define HAVE_DECL__SC_LARGE_PAGESIZE 0 + +/* Define to 1 if you have the declaration of `_SC_NPROCESSORS_CONF', and to 0 + if you don't. */ +#define HAVE_DECL__SC_NPROCESSORS_CONF 0 + +/* Define to 1 if you have the declaration of `_SC_NPROCESSORS_ONLN', and to 0 + if you don't. */ +#define HAVE_DECL__SC_NPROCESSORS_ONLN 0 + +/* Define to 1 if you have the declaration of `_SC_NPROC_CONF', and to 0 if + you don't. */ +#define HAVE_DECL__SC_NPROC_CONF 0 + +/* Define to 1 if you have the declaration of `_SC_NPROC_ONLN', and to 0 if + you don't. */ +#define HAVE_DECL__SC_NPROC_ONLN 0 + +/* Define to 1 if you have the declaration of `_SC_PAGESIZE', and to 0 if you + don't. */ +#define HAVE_DECL__SC_PAGESIZE 0 + +/* Define to 1 if you have the declaration of `_SC_PAGE_SIZE', and to 0 if you + don't. */ +#define HAVE_DECL__SC_PAGE_SIZE 0 + +/* Define to 1 if you have the header file. */ +/* #define HAVE_DIRENT_H 1 */ +#undef HAVE_DIRENT_H + +/* Define to 1 if you have the header file. */ +/* #undef HAVE_DLFCN_H */ + +/* Define to 1 if you have the `ffs' function. */ +/* #undef HAVE_FFS */ + +/* Define to 1 if you have the `ffsl' function. */ +/* #undef HAVE_FFSL */ + +/* Define to 1 if you have the `fls' function. */ +/* #undef HAVE_FLS */ + +/* Define to 1 if you have the `flsl' function. */ +/* #undef HAVE_FLSL */ + +/* Define to 1 if you have the `getpagesize' function. */ +#define HAVE_GETPAGESIZE 1 + +/* Define to 1 if the system has the type `GROUP_AFFINITY'. */ +#define HAVE_GROUP_AFFINITY 1 + +/* Define to 1 if the system has the type `GROUP_RELATIONSHIP'. */ +#define HAVE_GROUP_RELATIONSHIP 1 + +/* Define to 1 if you have the `host_info' function. */ +/* #undef HAVE_HOST_INFO */ + +/* Define to 1 if you have the header file. */ +/* #undef HAVE_INFINIBAND_VERBS_H */ + +/* Define to 1 if you have the header file. */ +#define HAVE_INTTYPES_H 1 + +/* Define to 1 if the system has the type `KAFFINITY'. */ +#define HAVE_KAFFINITY 1 + +/* Define to 1 if you have the header file. */ +/* #undef HAVE_KSTAT_H */ + +/* Define to 1 if you have the header file. */ +/* #undef HAVE_LANGINFO_H */ + +/* Define to 1 if we have -lgdi32 */ +#define HAVE_LIBGDI32 1 + +/* Define to 1 if we have -libverbs */ +/* #undef HAVE_LIBIBVERBS */ + +/* Define to 1 if we have -lkstat */ +/* #undef HAVE_LIBKSTAT */ + +/* Define to 1 if we have -llgrp */ +/* #undef HAVE_LIBLGRP */ + +/* Define to 1 if you have the header file. */ +#define HAVE_LOCALE_H 1 + +/* Define to 1 if the system has the type `LOGICAL_PROCESSOR_RELATIONSHIP'. */ +#define HAVE_LOGICAL_PROCESSOR_RELATIONSHIP 1 + +/* Define to 1 if you have the header file. */ +/* #undef HAVE_MACH_MACH_HOST_H */ + +/* Define to 1 if you have the header file. */ +/* #undef HAVE_MACH_MACH_INIT_H */ + +/* Define to 1 if you have the header file. */ +#define HAVE_MALLOC_H 1 + +/* Define to 1 if you have the `memalign' function. */ +/* #undef HAVE_MEMALIGN */ + +/* Define to 1 if you have the header file. */ +#define HAVE_MEMORY_H 1 + +/* Define to 1 if we have -lmyriexpress */ +/* #undef HAVE_MYRIEXPRESS */ + +/* Define to 1 if you have the header file. */ +/* #undef HAVE_MYRIEXPRESS_H */ + +/* Define to 1 if you have the `nl_langinfo' function. */ +/* #undef HAVE_NL_LANGINFO */ + +/* Define to 1 if you have the header file. */ +/* #undef HAVE_NUMAIF_H */ + +/* Define to 1 if the system has the type `NUMA_NODE_RELATIONSHIP'. */ +#define HAVE_NUMA_NODE_RELATIONSHIP 1 + +/* Define to 1 if you have the header file. */ +/* #undef HAVE_NVCTRL_NVCTRL_H */ + +/* Define to 1 if you have the header file. */ +/* #undef HAVE_NVML_H */ + +/* Define to 1 if you have the `openat' function. */ +/* #undef HAVE_OPENAT */ + +/* Define to 1 if you have the header file. */ +/* #undef HAVE_PICL_H */ + +/* Define to 1 if you have the `posix_memalign' function. */ +/* #undef HAVE_POSIX_MEMALIGN */ + +/* Define to 1 if the system has the type `PROCESSOR_CACHE_TYPE'. */ +#define HAVE_PROCESSOR_CACHE_TYPE 1 + +/* Define to 1 if the system has the type `PROCESSOR_GROUP_INFO'. */ +#define HAVE_PROCESSOR_GROUP_INFO 1 + +/* Define to 1 if the system has the type `PROCESSOR_RELATIONSHIP'. */ +#define HAVE_PROCESSOR_RELATIONSHIP 1 + +/* Define to 1 if the system has the type `PSAPI_WORKING_SET_EX_BLOCK'. */ +/* #undef HAVE_PSAPI_WORKING_SET_EX_BLOCK */ + +/* Define to 1 if the system has the type `PSAPI_WORKING_SET_EX_INFORMATION'. + */ +/* #undef HAVE_PSAPI_WORKING_SET_EX_INFORMATION */ + +/* Define to 1 if the system has the type `PROCESSOR_NUMBER'. */ +#define HAVE_PROCESSOR_NUMBER 1 + +/* Define to 1 if you have the header file. */ +/* #undef HAVE_PTHREAD_NP_H */ + +/* Define to 1 if the system has the type `pthread_t'. */ +/* #undef HAVE_PTHREAD_T */ +#undef HAVE_PTHREAD_T + +/* Define to 1 if you have the `putwc' function. */ +#define HAVE_PUTWC 1 + +/* Define to 1 if the system has the type `RelationProcessorPackage'. */ +/* #undef HAVE_RELATIONPROCESSORPACKAGE */ + +/* Define to 1 if you have the `setlocale' function. */ +#define HAVE_SETLOCALE 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_STDINT_H 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_STDLIB_H 1 + +/* Define to 1 if you have the `strftime' function. */ +#define HAVE_STRFTIME 1 + +/* Define to 1 if you have the header file. */ +/* #define HAVE_STRINGS_H 1*/ +#undef HAVE_STRINGS_H + +/* Define to 1 if you have the header file. */ +#define HAVE_STRING_H 1 + +/* Define to 1 if you have the `strncasecmp' function. */ +#define HAVE_STRNCASECMP 1 + +/* Define to '1' if sysctl is present and usable */ +/* #undef HAVE_SYSCTL */ + +/* Define to '1' if sysctlbyname is present and usable */ +/* #undef HAVE_SYSCTLBYNAME */ + +/* Define to 1 if the system has the type + `SYSTEM_LOGICAL_PROCESSOR_INFORMATION'. */ +#define HAVE_SYSTEM_LOGICAL_PROCESSOR_INFORMATION 1 + +/* Define to 1 if the system has the type + `SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX'. */ +#define HAVE_SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX 1 + +/* Define to 1 if you have the header file. */ +/* #undef HAVE_SYS_CPUSET_H */ + +/* Define to 1 if you have the header file. */ +/* #undef HAVE_SYS_LGRP_USER_H */ + +/* Define to 1 if you have the header file. */ +/* #undef HAVE_SYS_MMAN_H */ + +/* Define to 1 if you have the header file. */ +/* #define HAVE_SYS_PARAM_H 1 */ +#undef HAVE_SYS_PARAM_H + +/* Define to 1 if you have the header file. */ +#define HAVE_SYS_STAT_H 1 + +/* Define to 1 if you have the header file. */ +/* #undef HAVE_SYS_SYSCTL_H */ + +/* Define to 1 if you have the header file. */ +#define HAVE_SYS_TYPES_H 1 + +/* Define to 1 if you have the header file. */ +/* #undef HAVE_SYS_UTSNAME_H */ + +/* Define to 1 if you have the `uname' function. */ +/* #undef HAVE_UNAME */ + +/* Define to 1 if you have the header file. */ +/* #define HAVE_UNISTD_H 1 */ +#undef HAVE_UNISTD_H + +/* Define to 1 if you have the `uselocale' function. */ +/* #undef HAVE_USELOCALE */ + +/* Define to 1 if the system has the type `wchar_t'. */ +#define HAVE_WCHAR_T 1 + +/* Define to 1 if you have the header file. */ +/* #undef HAVE_X11_KEYSYM_H */ + +/* Define to 1 if you have the header file. */ +/* #undef HAVE_X11_XLIB_H */ + +/* Define to 1 if you have the header file. */ +/* #undef HAVE_X11_XUTIL_H */ + +/* Define to 1 if you have the header file. */ +/* #undef HAVE_XLOCALE_H */ + +/* Define to 1 on AIX */ +/* #undef HWLOC_AIX_SYS */ + +/* Define to 1 on BlueGene/Q */ +/* #undef HWLOC_BGQ_SYS */ + +/* Whether C compiler supports symbol visibility or not */ +#define HWLOC_C_HAVE_VISIBILITY 0 + +/* Define to 1 on Darwin */ +/* #undef HWLOC_DARWIN_SYS */ + +/* Whether we are in debugging mode or not */ +/* #undef HWLOC_DEBUG */ + +/* Define to 1 on *FREEBSD */ +/* #undef HWLOC_FREEBSD_SYS */ + +/* Whether your compiler has __attribute__ or not */ +/* #define HWLOC_HAVE_ATTRIBUTE 1 */ +#undef HWLOC_HAVE_ATTRIBUTE + +/* Whether your compiler has __attribute__ aligned or not */ +/* #define HWLOC_HAVE_ATTRIBUTE_ALIGNED 1 */ + +/* Whether your compiler has __attribute__ always_inline or not */ +/* #define HWLOC_HAVE_ATTRIBUTE_ALWAYS_INLINE 1 */ + +/* Whether your compiler has __attribute__ cold or not */ +/* #define HWLOC_HAVE_ATTRIBUTE_COLD 1 */ + +/* Whether your compiler has __attribute__ const or not */ +/* #define HWLOC_HAVE_ATTRIBUTE_CONST 1 */ + +/* Whether your compiler has __attribute__ deprecated or not */ +/* #define HWLOC_HAVE_ATTRIBUTE_DEPRECATED 1 */ + +/* Whether your compiler has __attribute__ format or not */ +/* #define HWLOC_HAVE_ATTRIBUTE_FORMAT 1 */ + +/* Whether your compiler has __attribute__ hot or not */ +/* #define HWLOC_HAVE_ATTRIBUTE_HOT 1 */ + +/* Whether your compiler has __attribute__ malloc or not */ +/* #define HWLOC_HAVE_ATTRIBUTE_MALLOC 1 */ + +/* Whether your compiler has __attribute__ may_alias or not */ +/* #define HWLOC_HAVE_ATTRIBUTE_MAY_ALIAS 1 */ + +/* Whether your compiler has __attribute__ nonnull or not */ +/* #define HWLOC_HAVE_ATTRIBUTE_NONNULL 1 */ + +/* Whether your compiler has __attribute__ noreturn or not */ +/* #define HWLOC_HAVE_ATTRIBUTE_NORETURN 1 */ + +/* Whether your compiler has __attribute__ no_instrument_function or not */ +/* #define HWLOC_HAVE_ATTRIBUTE_NO_INSTRUMENT_FUNCTION 1 */ + +/* Whether your compiler has __attribute__ packed or not */ +/* #define HWLOC_HAVE_ATTRIBUTE_PACKED 1 */ + +/* Whether your compiler has __attribute__ pure or not */ +/* #define HWLOC_HAVE_ATTRIBUTE_PURE 1 */ + +/* Whether your compiler has __attribute__ sentinel or not */ +/* #define HWLOC_HAVE_ATTRIBUTE_SENTINEL 1 */ + +/* Whether your compiler has __attribute__ unused or not */ +/* #define HWLOC_HAVE_ATTRIBUTE_UNUSED 1 */ + +/* Whether your compiler has __attribute__ warn unused result or not */ +/* #define HWLOC_HAVE_ATTRIBUTE_WARN_UNUSED_RESULT 1 */ + +/* Whether your compiler has __attribute__ weak alias or not */ +/* #define HWLOC_HAVE_ATTRIBUTE_WEAK_ALIAS 1 */ + +/* Define to 1 if your `ffs' function is known to be broken. */ +/* #undef HWLOC_HAVE_BROKEN_FFS */ + +/* Define to 1 if you have the `cairo' library. */ +/* #undef HWLOC_HAVE_CAIRO */ + +/* Define to 1 if you have the `clz' function. */ +/* #undef HWLOC_HAVE_CLZ */ + +/* Define to 1 if you have the `clzl' function. */ +/* #undef HWLOC_HAVE_CLZL */ + +/* Define to 1 if you have cpuid */ +/* #undef HWLOC_HAVE_CPUID */ + +/* Define to 1 if the CPU_SET macro works */ +/* #undef HWLOC_HAVE_CPU_SET */ + +/* Define to 1 if the CPU_SET_S macro works */ +/* #undef HWLOC_HAVE_CPU_SET_S */ + +/* Define to 1 if you have the `cudart' SDK. */ +/* #undef HWLOC_HAVE_CUDART */ + +/* Define to 1 if function `clz' is declared by system headers */ +/* #undef HWLOC_HAVE_DECL_CLZ */ + +/* Define to 1 if function `clzl' is declared by system headers */ +/* #undef HWLOC_HAVE_DECL_CLZL */ + +/* Define to 1 if function `ffs' is declared by system headers */ +/* #undef HWLOC_HAVE_DECL_FFS */ + +/* Define to 1 if function `ffsl' is declared by system headers */ +/* #undef HWLOC_HAVE_DECL_FFSL */ + +/* Define to 1 if function `fls' is declared by system headers */ +/* #undef HWLOC_HAVE_DECL_FLS */ + +/* Define to 1 if function `flsl' is declared by system headers */ +/* #undef HWLOC_HAVE_DECL_FLSL */ + +/* Define to 1 if you have the `ffs' function. */ +/* #undef HWLOC_HAVE_FFS */ + +/* Define to 1 if you have the `ffsl' function. */ +/* #undef HWLOC_HAVE_FFSL */ + +/* Define to 1 if you have the `fls' function. */ +/* #undef HWLOC_HAVE_FLS */ + +/* Define to 1 if you have the `flsl' function. */ +/* #undef HWLOC_HAVE_FLSL */ + +/* Define to 1 if you have the GL module components. */ +/* #undef HWLOC_HAVE_GL */ + +/* Define to 1 if you have a library providing the termcap interface */ +/* #undef HWLOC_HAVE_LIBTERMCAP */ + +/* Define to 1 if you have the `libxml2' library. */ +/* #undef HWLOC_HAVE_LIBXML2 */ + +/* Define to 1 if building the Linux PCI component */ +/* #undef HWLOC_HAVE_LINUXPCI */ + +/* Define to 1 if mbind is available. */ +/* #undef HWLOC_HAVE_MBIND */ + +/* Define to 1 if migrate_pages is available. */ +/* #undef HWLOC_HAVE_MIGRATE_PAGES */ + +/* Define to 1 if you have the `NVML' library. */ +/* #undef HWLOC_HAVE_NVML */ + +/* Define to 1 if glibc provides the old prototype (without length) of + sched_setaffinity() */ +/* #undef HWLOC_HAVE_OLD_SCHED_SETAFFINITY */ + +/* Define to 1 if you have the `OpenCL' library. */ +/* #undef HWLOC_HAVE_OPENCL */ + +/* Define to 1 if the hwloc library should support dynamically-loaded plugins + */ +/* #undef HWLOC_HAVE_PLUGINS */ + +/* `Define to 1 if you have pthread_getthrds_np' */ +/* #undef HWLOC_HAVE_PTHREAD_GETTHRDS_NP */ + +/* Define to 1 if pthread mutexes are available */ +/* #undef HWLOC_HAVE_PTHREAD_MUTEX */ + +/* Define to 1 if glibc provides a prototype of sched_setaffinity() */ +#define HWLOC_HAVE_SCHED_SETAFFINITY 1 + +/* Define to 1 if set_mempolicy is available. */ +/* #undef HWLOC_HAVE_SET_MEMPOLICY */ + +/* Define to 1 if you have the header file. */ +#define HWLOC_HAVE_STDINT_H 1 + +/* Define to 1 if you have the `windows.h' header. */ +#define HWLOC_HAVE_WINDOWS_H 1 + +/* Define to 1 if X11 headers including Xutil.h and keysym.h are available. */ +/* #undef HWLOC_HAVE_X11_KEYSYM */ + +/* Define to 1 if function `syscall' is available */ +/* #undef HWLOC_HAVE_SYSCALL */ + +/* Define to 1 on HP-UX */ +/* #undef HWLOC_HPUX_SYS */ + +/* Define to 1 on Irix */ +/* #undef HWLOC_IRIX_SYS */ + +/* Define to 1 on Linux */ +/* #undef HWLOC_LINUX_SYS */ + +/* Define to 1 on *NETBSD */ +/* #undef HWLOC_NETBSD_SYS */ + +/* Define to 1 on OSF */ +/* #undef HWLOC_OSF_SYS */ + +/* The size of `unsigned int', as computed by sizeof */ +#define HWLOC_SIZEOF_UNSIGNED_INT 4 + +/* The size of `unsigned long', as computed by sizeof */ +#define HWLOC_SIZEOF_UNSIGNED_LONG 4 + +/* Define to 1 on Solaris */ +/* #undef HWLOC_SOLARIS_SYS */ + +/* The hwloc symbol prefix */ +#define HWLOC_SYM_PREFIX hwloc_ + +/* The hwloc symbol prefix in all caps */ +#define HWLOC_SYM_PREFIX_CAPS HWLOC_ + +/* Whether we need to re-define all the hwloc public symbols or not */ +#define HWLOC_SYM_TRANSFORM 0 + +/* Define to 1 on unsupported systems */ +/* #undef HWLOC_UNSUPPORTED_SYS */ + +/* Define to 1 if ncurses works, preferred over curses */ +/* #undef HWLOC_USE_NCURSES */ + +/* Define to 1 on WINDOWS */ +#define HWLOC_WIN_SYS 1 + +/* Define to 1 on x86_32 */ +/* #undef HWLOC_X86_32_ARCH */ + +/* Define to 1 on x86_64 */ +#define HWLOC_X86_64_ARCH 1 + +/* Define to the sub-directory in which libtool stores uninstalled libraries. + */ +#define LT_OBJDIR ".libs/" + +/* Name of package */ +#define PACKAGE "hwloc" + +/* Define to the address where bug reports for this package should be sent. */ +#define PACKAGE_BUGREPORT "http://www.open-mpi.org/projects/hwloc/" + +/* Define to the full name of this package. */ +#define PACKAGE_NAME "hwloc" + +/* Define to the full name and version of this package. */ +#define PACKAGE_STRING "hwloc" + +/* Define to the one symbol short name of this package. */ +#define PACKAGE_TARNAME "hwloc" + +/* Define to the home page for this package. */ +#define PACKAGE_URL "" + +/* Define to the version of this package. */ +#define PACKAGE_VERSION HWLOC_VERSION + +/* The size of `unsigned int', as computed by sizeof. */ +#define SIZEOF_UNSIGNED_INT 4 + +/* The size of `unsigned long', as computed by sizeof. */ +#define SIZEOF_UNSIGNED_LONG 4 + +/* The size of `void *', as computed by sizeof. */ +#define SIZEOF_VOID_P 8 + +/* Define to 1 if you have the ANSI C header files. */ +#define STDC_HEADERS 1 + +/* Enable extensions on HP-UX. */ +#ifndef _HPUX_SOURCE +# define _HPUX_SOURCE 1 +#endif + + +/* Enable extensions on AIX 3, Interix. */ +/* +#ifndef _ALL_SOURCE +# define _ALL_SOURCE 1 +#endif +*/ + +/* Enable GNU extensions on systems that have them. */ +/* +#ifndef _GNU_SOURCE +# define _GNU_SOURCE 1 +#endif +*/ +/* Enable threading extensions on Solaris. */ +/* +#ifndef _POSIX_PTHREAD_SEMANTICS +# define _POSIX_PTHREAD_SEMANTICS 1 +#endif +*/ +/* Enable extensions on HP NonStop. */ +/* +#ifndef _TANDEM_SOURCE +# define _TANDEM_SOURCE 1 +#endif +*/ +/* Enable general extensions on Solaris. */ +/* +#ifndef __EXTENSIONS__ +# define __EXTENSIONS__ 1 +#endif +*/ + + +/* Version number of package */ +#define HWLOC_VERSION "1.11.1" +#define VERSION HWLOC_VERSION + +/* Define to 1 if the X Window System is missing or not being used. */ +#define X_DISPLAY_MISSING 1 + +/* Define to 1 if on MINIX. */ +/* #undef _MINIX */ + +/* Define to 2 if the system does not provide POSIX.1 features except with + this defined. */ +/* #undef _POSIX_1_SOURCE */ + +/* Define to 1 if you need to in order for `stat' and other things to work. */ +/* #undef _POSIX_SOURCE */ + +/* Define this to the process ID type */ +#define hwloc_pid_t HANDLE + +/* Define this to either strncasecmp or strncmp */ +#define hwloc_strncasecmp strncasecmp + +/* Define this to the thread ID type */ +#define hwloc_thread_t HANDLE + + +#endif /* HWLOC_CONFIGURE_H */ diff --git a/opal/mca/hwloc/hwloc1110/hwloc/doc/README.txt b/opal/mca/hwloc/hwloc1112/hwloc/doc/README.txt similarity index 100% rename from opal/mca/hwloc/hwloc1110/hwloc/doc/README.txt rename to opal/mca/hwloc/hwloc1112/hwloc/doc/README.txt diff --git a/opal/mca/hwloc/hwloc1110/hwloc/hwloc.pc.in b/opal/mca/hwloc/hwloc1112/hwloc/hwloc.pc.in similarity index 92% rename from opal/mca/hwloc/hwloc1110/hwloc/hwloc.pc.in rename to opal/mca/hwloc/hwloc1112/hwloc/hwloc.pc.in index 23327a8c9fc..266319bb383 100644 --- a/opal/mca/hwloc/hwloc1110/hwloc/hwloc.pc.in +++ b/opal/mca/hwloc/hwloc1112/hwloc/hwloc.pc.in @@ -5,7 +5,7 @@ includedir=@includedir@ Name: hwloc Description: Hardware locality detection and management library -Version: @VERSION@ +Version: @HWLOC_VERSION@ Requires.private: @HWLOC_REQUIRES@ Cflags: -I${includedir} Libs: -L${libdir} -lhwloc diff --git a/opal/mca/hwloc/hwloc1110/hwloc/include/Makefile.am b/opal/mca/hwloc/hwloc1112/hwloc/include/Makefile.am similarity index 100% rename from opal/mca/hwloc/hwloc1110/hwloc/include/Makefile.am rename to opal/mca/hwloc/hwloc1112/hwloc/include/Makefile.am diff --git a/opal/mca/hwloc/hwloc1112/hwloc/include/hwloc.h b/opal/mca/hwloc/hwloc1112/hwloc/include/hwloc.h new file mode 100644 index 00000000000..deb5141fc7e --- /dev/null +++ b/opal/mca/hwloc/hwloc1112/hwloc/include/hwloc.h @@ -0,0 +1,2422 @@ +/* + * Copyright © 2009 CNRS + * Copyright © 2009-2015 Inria. All rights reserved. + * Copyright © 2009-2012 Université Bordeaux + * Copyright © 2009-2011 Cisco Systems, Inc. All rights reserved. + * See COPYING in top-level directory. + */ + +/*===================================================================== + * PLEASE GO READ THE DOCUMENTATION! + * ------------------------------------------------ + * $tarball_directory/doc/doxygen-doc/ + * or + * http://www.open-mpi.org/projects/hwloc/doc/ + *===================================================================== + * + * FAIR WARNING: Do NOT expect to be able to figure out all the + * subtleties of hwloc by simply reading function prototypes and + * constant descrptions here in this file. + * + * Hwloc has wonderful documentation in both PDF and HTML formats for + * your reading pleasure. The formal documentation explains a LOT of + * hwloc-specific concepts, provides definitions, and discusses the + * "big picture" for many of the things that you'll find here in this + * header file. + * + * The PDF/HTML documentation was generated via Doxygen; much of what + * you'll see in there is also here in this file. BUT THERE IS A LOT + * THAT IS IN THE PDF/HTML THAT IS ***NOT*** IN hwloc.h! + * + * There are entire paragraph-length descriptions, discussions, and + * pretty prictures to explain subtle corner cases, provide concrete + * examples, etc. + * + * Please, go read the documentation. :-) + * + * Moreover there are several examples of hwloc use under doc/examples + * in the source tree. + * + *=====================================================================*/ + +/** \file + * \brief The hwloc API. + * + * See hwloc/bitmap.h for bitmap specific macros. + * See hwloc/helper.h for high-level topology traversal helpers. + * See hwloc/inlines.h for the actual inline code of some functions below. + */ + +#ifndef HWLOC_H +#define HWLOC_H + +#include +#include +#include +#include +#include + +/* + * Symbol transforms + */ +#include + +/* + * Bitmap definitions + */ + +#include + + +#ifdef __cplusplus +extern "C" { +#endif + + +/** \defgroup hwlocality_api_version API version + * @{ + */ + +/** \brief Indicate at build time which hwloc API version is being used. */ +#define HWLOC_API_VERSION 0x00010b00 + +/** \brief Indicate at runtime which hwloc API version was used at build time. + * + * Should be ::HWLOC_API_VERSION if running on the same version. + */ +HWLOC_DECLSPEC unsigned hwloc_get_api_version(void); + +/** \brief Current component and plugin ABI version (see hwloc/plugins.h) */ +#define HWLOC_COMPONENT_ABI 4 + +/** @} */ + + + +/** \defgroup hwlocality_object_sets Object Sets (hwloc_cpuset_t and hwloc_nodeset_t) + * + * Hwloc uses bitmaps to represent two distinct kinds of object sets: + * CPU sets (::hwloc_cpuset_t) and NUMA node sets (::hwloc_nodeset_t). + * These types are both typedefs to a common back end type + * (::hwloc_bitmap_t), and therefore all the hwloc bitmap functions + * are applicable to both ::hwloc_cpuset_t and ::hwloc_nodeset_t (see + * \ref hwlocality_bitmap). + * + * The rationale for having two different types is that even though + * the actions one wants to perform on these types are the same (e.g., + * enable and disable individual items in the set/mask), they're used + * in very different contexts: one for specifying which processors to + * use and one for specifying which NUMA nodes to use. Hence, the + * name difference is really just to reflect the intent of where the + * type is used. + * + * @{ + */ + +/** \brief A CPU set is a bitmap whose bits are set according to CPU + * physical OS indexes. + * + * It may be consulted and modified with the bitmap API as any + * ::hwloc_bitmap_t (see hwloc/bitmap.h). + * + * Each bit may be converted into a PU object using + * hwloc_get_pu_obj_by_os_index(). + */ +typedef hwloc_bitmap_t hwloc_cpuset_t; +/** \brief A non-modifiable ::hwloc_cpuset_t. */ +typedef hwloc_const_bitmap_t hwloc_const_cpuset_t; + +/** \brief A node set is a bitmap whose bits are set according to NUMA + * memory node physical OS indexes. + * + * It may be consulted and modified with the bitmap API as any + * ::hwloc_bitmap_t (see hwloc/bitmap.h). + * Each bit may be converted into a NUMA node object using + * hwloc_get_numanode_obj_by_os_index(). + * + * When binding memory on a system without any NUMA node + * (when the whole memory is considered as a single memory bank), + * the nodeset may be either empty (no memory selected) + * or full (whole system memory selected). + * + * See also \ref hwlocality_helper_nodeset_convert. + */ +typedef hwloc_bitmap_t hwloc_nodeset_t; +/** \brief A non-modifiable ::hwloc_nodeset_t. + */ +typedef hwloc_const_bitmap_t hwloc_const_nodeset_t; + +/** @} */ + + + +/** \defgroup hwlocality_object_types Object Types + * @{ + */ + +/** \brief Type of topology object. + * + * \note Do not rely on the ordering or completeness of the values as new ones + * may be defined in the future! If you need to compare types, use + * hwloc_compare_types() instead. + */ +typedef enum { + /* *************************************************************** + WARNING WARNING WARNING WARNING WARNING WARNING WARNING WARNING + + If new enum values are added here, you MUST also go update the + obj_type_order[] and obj_order_type[] arrays in src/topology.c. + + WARNING WARNING WARNING WARNING WARNING WARNING WARNING WARNING + *************************************************************** */ + + HWLOC_OBJ_SYSTEM, /**< \brief Whole system (may be a cluster of machines). + * The whole system that is accessible to hwloc. + * That may comprise several machines in SSI systems + * like Kerrighed. + */ + HWLOC_OBJ_MACHINE, /**< \brief Machine. + * The typical root object type. + * A set of processors and memory with cache + * coherency. + */ + HWLOC_OBJ_NUMANODE, /**< \brief NUMA node. + * A set of processors around memory which the + * processors can directly access. + */ + HWLOC_OBJ_PACKAGE, /**< \brief Physical package, what goes into a socket. + * In the physical meaning, i.e. that you can add + * or remove physically. + */ + HWLOC_OBJ_CACHE, /**< \brief Cache. + * Can be L1i, L1d, L2, L3, ... + */ + HWLOC_OBJ_CORE, /**< \brief Core. + * A computation unit (may be shared by several + * logical processors). + */ + HWLOC_OBJ_PU, /**< \brief Processing Unit, or (Logical) Processor. + * An execution unit (may share a core with some + * other logical processors, e.g. in the case of + * an SMT core). + * + * Objects of this kind are always reported and can + * thus be used as fallback when others are not. + */ + + HWLOC_OBJ_GROUP, /**< \brief Group objects. + * Objects which do not fit in the above but are + * detected by hwloc and are useful to take into + * account for affinity. For instance, some operating systems + * expose their arbitrary processors aggregation this + * way. And hwloc may insert such objects to group + * NUMA nodes according to their distances. + * See also \ref faq_groups. + * + * These objects are ignored when they do not bring + * any structure. + */ + + HWLOC_OBJ_MISC, /**< \brief Miscellaneous objects. + * Objects without particular meaning, that can e.g. be + * added by the application for its own use, or by hwloc + * for miscellaneous objects such as MemoryModule (DIMMs). + */ + + HWLOC_OBJ_BRIDGE, /**< \brief Bridge. + * Any bridge that connects the host or an I/O bus, + * to another I/O bus. + * Bridge objects have neither CPU sets nor node sets. + * They are not added to the topology unless I/O discovery + * is enabled with hwloc_topology_set_flags(). + */ + HWLOC_OBJ_PCI_DEVICE, /**< \brief PCI device. + * These objects have neither CPU sets nor node sets. + * They are not added to the topology unless I/O discovery + * is enabled with hwloc_topology_set_flags(). + */ + HWLOC_OBJ_OS_DEVICE, /**< \brief Operating system device. + * These objects have neither CPU sets nor node sets. + * They are not added to the topology unless I/O discovery + * is enabled with hwloc_topology_set_flags(). + */ + + HWLOC_OBJ_TYPE_MAX /**< \private Sentinel value */ + + /* *************************************************************** + WARNING WARNING WARNING WARNING WARNING WARNING WARNING WARNING + + If new enum values are added here, you MUST also go update the + obj_type_order[] and obj_order_type[] arrays in src/topology.c. + + WARNING WARNING WARNING WARNING WARNING WARNING WARNING WARNING + *************************************************************** */ +} hwloc_obj_type_t; + +/** \brief Cache type. */ +typedef enum hwloc_obj_cache_type_e { + HWLOC_OBJ_CACHE_UNIFIED, /**< \brief Unified cache. */ + HWLOC_OBJ_CACHE_DATA, /**< \brief Data cache. */ + HWLOC_OBJ_CACHE_INSTRUCTION /**< \brief Instruction cache. + * Only used when the ::HWLOC_TOPOLOGY_FLAG_ICACHES topology flag is set. */ +} hwloc_obj_cache_type_t; + +/** \brief Type of one side (upstream or downstream) of an I/O bridge. */ +typedef enum hwloc_obj_bridge_type_e { + HWLOC_OBJ_BRIDGE_HOST, /**< \brief Host-side of a bridge, only possible upstream. */ + HWLOC_OBJ_BRIDGE_PCI /**< \brief PCI-side of a bridge. */ +} hwloc_obj_bridge_type_t; + +/** \brief Type of a OS device. */ +typedef enum hwloc_obj_osdev_type_e { + HWLOC_OBJ_OSDEV_BLOCK, /**< \brief Operating system block device. + * For instance "sda" on Linux. */ + HWLOC_OBJ_OSDEV_GPU, /**< \brief Operating system GPU device. + * For instance ":0.0" for a GL display, + * "card0" for a Linux DRM device. */ + HWLOC_OBJ_OSDEV_NETWORK, /**< \brief Operating system network device. + * For instance the "eth0" interface on Linux. */ + HWLOC_OBJ_OSDEV_OPENFABRICS, /**< \brief Operating system openfabrics device. + * For instance the "mlx4_0" InfiniBand HCA device on Linux. */ + HWLOC_OBJ_OSDEV_DMA, /**< \brief Operating system dma engine device. + * For instance the "dma0chan0" DMA channel on Linux. */ + HWLOC_OBJ_OSDEV_COPROC /**< \brief Operating system co-processor device. + * For instance "mic0" for a Xeon Phi (MIC) on Linux, + * "opencl0d0" for a OpenCL device, + * "cuda0" for a CUDA device. */ +} hwloc_obj_osdev_type_t; + +/** \brief Compare the depth of two object types + * + * Types shouldn't be compared as they are, since newer ones may be added in + * the future. This function returns less than, equal to, or greater than zero + * respectively if \p type1 objects usually include \p type2 objects, are the + * same as \p type2 objects, or are included in \p type2 objects. If the types + * can not be compared (because neither is usually contained in the other), + * ::HWLOC_TYPE_UNORDERED is returned. Object types containing CPUs can always + * be compared (usually, a system contains machines which contain nodes which + * contain packages which contain caches, which contain cores, which contain + * processors). + * + * \note ::HWLOC_OBJ_PU will always be the deepest. + * \note This does not mean that the actual topology will respect that order: + * e.g. as of today cores may also contain caches, and packages may also contain + * nodes. This is thus just to be seen as a fallback comparison method. + */ +HWLOC_DECLSPEC int hwloc_compare_types (hwloc_obj_type_t type1, hwloc_obj_type_t type2) __hwloc_attribute_const; + +enum hwloc_compare_types_e { + HWLOC_TYPE_UNORDERED = INT_MAX /**< \brief Value returned by hwloc_compare_types() when types can not be compared. \hideinitializer */ +}; + +/** @} */ + + + +/** \defgroup hwlocality_objects Object Structure and Attributes + * @{ + */ + +union hwloc_obj_attr_u; + +/** \brief Object memory */ +struct hwloc_obj_memory_s { + hwloc_uint64_t total_memory; /**< \brief Total memory (in bytes) in this object and its children */ + hwloc_uint64_t local_memory; /**< \brief Local memory (in bytes) */ + + /** \brief Size of array \p page_types */ + unsigned page_types_len; + /** \brief Array of local memory page types, \c NULL if no local memory and \p page_types is 0. + * + * The array is sorted by increasing \p size fields. + * It contains \p page_types_len slots. + */ + struct hwloc_obj_memory_page_type_s { + hwloc_uint64_t size; /**< \brief Size of pages */ + hwloc_uint64_t count; /**< \brief Number of pages of this size */ + } * page_types; +}; + +/** \brief Structure of a topology object + * + * Applications must not modify any field except hwloc_obj.userdata. + */ +struct hwloc_obj { + /* physical information */ + hwloc_obj_type_t type; /**< \brief Type of object */ + unsigned os_index; /**< \brief OS-provided physical index number. + * It is not guaranteed unique across the entire machine, + * except for PUs and NUMA nodes. + */ + char *name; /**< \brief Object description if any */ + + struct hwloc_obj_memory_s memory; /**< \brief Memory attributes */ + + union hwloc_obj_attr_u *attr; /**< \brief Object type-specific Attributes, + * may be \c NULL if no attribute value was found */ + + /* global position */ + unsigned depth; /**< \brief Vertical index in the hierarchy. + * If the topology is symmetric, this is equal to the + * parent depth plus one, and also equal to the number + * of parent/child links from the root object to here. + */ + unsigned logical_index; /**< \brief Horizontal index in the whole list of similar objects, + * hence guaranteed unique across the entire machine. + * Could be a "cousin_rank" since it's the rank within the "cousin" list below + */ + signed os_level; /**< \brief OS-provided physical level, -1 if unknown or meaningless */ + + /* cousins are all objects of the same type (and depth) across the entire topology */ + struct hwloc_obj *next_cousin; /**< \brief Next object of same type and depth */ + struct hwloc_obj *prev_cousin; /**< \brief Previous object of same type and depth */ + + /* children of the same parent are siblings, even if they may have different type and depth */ + struct hwloc_obj *parent; /**< \brief Parent, \c NULL if root (system object) */ + unsigned sibling_rank; /**< \brief Index in parent's \c children[] array */ + struct hwloc_obj *next_sibling; /**< \brief Next object below the same parent */ + struct hwloc_obj *prev_sibling; /**< \brief Previous object below the same parent */ + + /* children array below this object */ + unsigned arity; /**< \brief Number of children */ + struct hwloc_obj **children; /**< \brief Children, \c children[0 .. arity -1] */ + struct hwloc_obj *first_child; /**< \brief First child */ + struct hwloc_obj *last_child; /**< \brief Last child */ + + /* misc */ + void *userdata; /**< \brief Application-given private data pointer, + * initialized to \c NULL, use it as you wish. + * See hwloc_topology_set_userdata_export_callback() + * if you wish to export this field to XML. */ + + /* cpusets and nodesets */ + hwloc_cpuset_t cpuset; /**< \brief CPUs covered by this object + * + * This is the set of CPUs for which there are PU objects in the topology + * under this object, i.e. which are known to be physically contained in this + * object and known how (the children path between this object and the PU + * objects). + * + * If the ::HWLOC_TOPOLOGY_FLAG_WHOLE_SYSTEM configuration flag is set, some of + * these CPUs may be offline, or not allowed for binding, see online_cpuset + * and allowed_cpuset. + * + * \note Its value must not be changed, hwloc_bitmap_dup() must be used instead. + */ + hwloc_cpuset_t complete_cpuset; /**< \brief The complete CPU set of logical processors of this object, + * + * This includes not only the same as the cpuset field, but also the CPUs for + * which topology information is unknown or incomplete, and the CPUs that are + * ignored when the ::HWLOC_TOPOLOGY_FLAG_WHOLE_SYSTEM flag is not set. + * Thus no corresponding PU object may be found in the topology, because the + * precise position is undefined. It is however known that it would be somewhere + * under this object. + * + * \note Its value must not be changed, hwloc_bitmap_dup() must be used instead. + */ + hwloc_cpuset_t online_cpuset; /**< \brief The CPU set of online logical processors + * + * This includes the CPUs contained in this object that are online, i.e. draw + * power and can execute threads. It may however not be allowed to bind to + * them due to administration rules, see allowed_cpuset. + * + * \note Its value must not be changed, hwloc_bitmap_dup() must be used instead. + */ + hwloc_cpuset_t allowed_cpuset; /**< \brief The CPU set of allowed logical processors + * + * This includes the CPUs contained in this object which are allowed for + * binding, i.e. passing them to the hwloc binding functions should not return + * permission errors. This is usually restricted by administration rules. + * Some of them may however be offline so binding to them may still not be + * possible, see online_cpuset. + * + * \note Its value must not be changed, hwloc_bitmap_dup() must be used instead. + */ + + hwloc_nodeset_t nodeset; /**< \brief NUMA nodes covered by this object or containing this object + * + * This is the set of NUMA nodes for which there are NODE objects in the + * topology under or above this object, i.e. which are known to be physically + * contained in this object or containing it and known how (the children path + * between this object and the NODE objects). + * + * In the end, these nodes are those that are close to the current object. + * + * If the ::HWLOC_TOPOLOGY_FLAG_WHOLE_SYSTEM configuration flag is set, some of + * these nodes may not be allowed for allocation, see allowed_nodeset. + * + * If there are no NUMA nodes in the machine, all the memory is close to this + * object, so \p nodeset is full. + * + * \note Its value must not be changed, hwloc_bitmap_dup() must be used instead. + */ + hwloc_nodeset_t complete_nodeset; /**< \brief The complete NUMA node set of this object, + * + * This includes not only the same as the nodeset field, but also the NUMA + * nodes for which topology information is unknown or incomplete, and the nodes + * that are ignored when the ::HWLOC_TOPOLOGY_FLAG_WHOLE_SYSTEM flag is not set. + * Thus no corresponding NODE object may be found in the topology, because the + * precise position is undefined. It is however known that it would be + * somewhere under this object. + * + * If there are no NUMA nodes in the machine, all the memory is close to this + * object, so \p complete_nodeset is full. + * + * \note Its value must not be changed, hwloc_bitmap_dup() must be used instead. + */ + hwloc_nodeset_t allowed_nodeset; /**< \brief The set of allowed NUMA memory nodes + * + * This includes the NUMA memory nodes contained in this object which are + * allowed for memory allocation, i.e. passing them to NUMA node-directed + * memory allocation should not return permission errors. This is usually + * restricted by administration rules. + * + * If there are no NUMA nodes in the machine, all the memory is close to this + * object, so \p allowed_nodeset is full. + * + * \note Its value must not be changed, hwloc_bitmap_dup() must be used instead. + */ + + struct hwloc_distances_s **distances; /**< \brief Distances between all objects at same depth below this object */ + unsigned distances_count; + + struct hwloc_obj_info_s *infos; /**< \brief Array of stringified info type=name. */ + unsigned infos_count; /**< \brief Size of infos array. */ + + int symmetric_subtree; /**< \brief Set if the subtree of objects below this object is symmetric, + * which means all children and their children have identical subtrees. + * If set in the topology root object, lstopo may export the topology + * as a synthetic string. + */ +}; +/** + * \brief Convenience typedef; a pointer to a struct hwloc_obj. + */ +typedef struct hwloc_obj * hwloc_obj_t; + +/** \brief Object type-specific Attributes */ +union hwloc_obj_attr_u { + /** \brief Cache-specific Object Attributes */ + struct hwloc_cache_attr_s { + hwloc_uint64_t size; /**< \brief Size of cache in bytes */ + unsigned depth; /**< \brief Depth of cache (e.g., L1, L2, ...etc.) */ + unsigned linesize; /**< \brief Cache-line size in bytes. 0 if unknown */ + int associativity; /**< \brief Ways of associativity, + * -1 if fully associative, 0 if unknown */ + hwloc_obj_cache_type_t type; /**< \brief Cache type */ + } cache; + /** \brief Group-specific Object Attributes */ + struct hwloc_group_attr_s { + unsigned depth; /**< \brief Depth of group object */ + } group; + /** \brief PCI Device specific Object Attributes */ + struct hwloc_pcidev_attr_s { + unsigned short domain; + unsigned char bus, dev, func; + unsigned short class_id; + unsigned short vendor_id, device_id, subvendor_id, subdevice_id; + unsigned char revision; + float linkspeed; /* in GB/s */ + } pcidev; + /** \brief Bridge specific Object Attribues */ + struct hwloc_bridge_attr_s { + union { + struct hwloc_pcidev_attr_s pci; + } upstream; + hwloc_obj_bridge_type_t upstream_type; + union { + struct { + unsigned short domain; + unsigned char secondary_bus, subordinate_bus; + } pci; + } downstream; + hwloc_obj_bridge_type_t downstream_type; + unsigned depth; + } bridge; + /** \brief OS Device specific Object Attributes */ + struct hwloc_osdev_attr_s { + hwloc_obj_osdev_type_t type; + } osdev; +}; + +/** \brief Distances between objects + * + * One object may contain a distance structure describing distances + * between all its descendants at a given relative depth. If the + * containing object is the root object of the topology, then the + * distances are available for all objects in the machine. + * + * If the \p latency pointer is not \c NULL, the pointed array contains + * memory latencies (non-zero values), see below. + * + * In the future, some other types of distances may be considered. + * In these cases, \p latency may be \c NULL. + */ +struct hwloc_distances_s { + unsigned relative_depth; /**< \brief Relative depth of the considered objects + * below the object containing this distance information. */ + unsigned nbobjs; /**< \brief Number of objects considered in the matrix. + * It is the number of descendant objects at \p relative_depth + * below the containing object. + * It corresponds to the result of hwloc_get_nbobjs_inside_cpuset_by_depth(). */ + + float *latency; /**< \brief Matrix of latencies between objects, stored as a one-dimension array. + * May be \c NULL if the distances considered here are not latencies. + * + * Unless defined by the user, this currently contains latencies + * between NUMA nodes (as reported in the System Locality Distance Information Table + * (SLIT) in the ACPI specification), which may or may not be accurate. + * It corresponds to the latency for accessing the memory of one node + * from a core in another node. + * + * Values are normalized to get 1.0 as the minimal value in the matrix. + * Latency from i-th to j-th object is stored in slot i*nbobjs+j. + */ + float latency_max; /**< \brief The maximal value in the latency matrix. */ + float latency_base; /**< \brief The multiplier that should be applied to latency matrix + * to retrieve the original OS-provided latencies. + * Usually 10 on Linux since ACPI SLIT uses 10 for local latency. + */ +}; + +/** \brief Object info */ +struct hwloc_obj_info_s { + char *name; /**< \brief Info name */ + char *value; /**< \brief Info value */ +}; + +/** @} */ + + + +/** \defgroup hwlocality_creation Topology Creation and Destruction + * @{ + */ + +struct hwloc_topology; +/** \brief Topology context + * + * To be initialized with hwloc_topology_init() and built with hwloc_topology_load(). + */ +typedef struct hwloc_topology * hwloc_topology_t; + +/** \brief Allocate a topology context. + * + * \param[out] topologyp is assigned a pointer to the new allocated context. + * + * \return 0 on success, -1 on error. + */ +HWLOC_DECLSPEC int hwloc_topology_init (hwloc_topology_t *topologyp); + +/** \brief Build the actual topology + * + * Build the actual topology once initialized with hwloc_topology_init() and + * tuned with \ref hwlocality_configuration routines. + * No other routine may be called earlier using this topology context. + * + * \param topology is the topology to be loaded with objects. + * + * \return 0 on success, -1 on error. + * + * \note On failure, the topology is reinitialized. It should be either + * destroyed with hwloc_topology_destroy() or configured and loaded again. + * + * \note This function may be called only once per topology. + * + * \sa hwlocality_configuration + */ +HWLOC_DECLSPEC int hwloc_topology_load(hwloc_topology_t topology); + +/** \brief Terminate and free a topology context + * + * \param topology is the topology to be freed + */ +HWLOC_DECLSPEC void hwloc_topology_destroy (hwloc_topology_t topology); + +/** \brief Duplicate a topology. + * + * The entire topology structure as well as its objects + * are duplicated into a new one. + * + * This is useful for keeping a backup while modifying a topology. + */ +HWLOC_DECLSPEC int hwloc_topology_dup(hwloc_topology_t *newtopology, hwloc_topology_t oldtopology); + +/** \brief Run internal checks on a topology structure + * + * The program aborts if an inconsistency is detected in the given topology. + * + * \param topology is the topology to be checked + * + * \note This routine is only useful to developers. + * + * \note The input topology should have been previously loaded with + * hwloc_topology_load(). + */ +HWLOC_DECLSPEC void hwloc_topology_check(hwloc_topology_t topology); + +/** @} */ + + + +/** \defgroup hwlocality_configuration Topology Detection Configuration and Query + * + * Several functions can optionally be called between hwloc_topology_init() and + * hwloc_topology_load() to configure how the detection should be performed, + * e.g. to ignore some objects types, define a synthetic topology, etc. + * + * If none of them is called, the default is to detect all the objects of the + * machine that the caller is allowed to access. + * + * This default behavior may also be modified through environment variables + * if the application did not modify it already. + * Setting HWLOC_XMLFILE in the environment enforces the discovery from a XML + * file as if hwloc_topology_set_xml() had been called. + * HWLOC_FSROOT switches to reading the topology from the specified Linux + * filesystem root as if hwloc_topology_set_fsroot() had been called. + * Finally, HWLOC_THISSYSTEM enforces the return value of + * hwloc_topology_is_thissystem(). + * + * @{ + */ + +/** \brief Ignore an object type. + * + * Ignore all objects from the given type. + * The bottom-level type ::HWLOC_OBJ_PU may not be ignored. + * The top-level object of the hierarchy will never be ignored, even if this function + * succeeds. + * Group objects are always ignored if they do not bring any structure + * since they are designed to add structure to the topology. + * I/O objects may not be ignored, topology flags should be used to configure + * their discovery instead. + */ +HWLOC_DECLSPEC int hwloc_topology_ignore_type(hwloc_topology_t topology, hwloc_obj_type_t type); + +/** \brief Ignore an object type if it does not bring any structure. + * + * Ignore all objects from the given type as long as they do not bring any structure: + * Each ignored object should have a single children or be the only child of its parent. + * The bottom-level type ::HWLOC_OBJ_PU may not be ignored. + * I/O objects may not be ignored, topology flags should be used to configure + * their discovery instead. + */ +HWLOC_DECLSPEC int hwloc_topology_ignore_type_keep_structure(hwloc_topology_t topology, hwloc_obj_type_t type); + +/** \brief Ignore all objects that do not bring any structure. + * + * Ignore all objects that do not bring any structure: + * This is equivalent to calling hwloc_topology_ignore_type_keep_structure() + * for all object types. + */ +HWLOC_DECLSPEC int hwloc_topology_ignore_all_keep_structure(hwloc_topology_t topology); + +/** \brief Flags to be set onto a topology context before load. + * + * Flags should be given to hwloc_topology_set_flags(). + * They may also be returned by hwloc_topology_get_flags(). + */ +enum hwloc_topology_flags_e { + /** \brief Detect the whole system, ignore reservations and offline settings. + * + * Gather all resources, even if some were disabled by the administrator. + * For instance, ignore Linux Cgroup/Cpusets and gather all processors and memory nodes, + * and ignore the fact that some resources may be offline. + * + * When this flag is not set, PUs that are disallowed are not added to the topology. + * Parent objects (package, core, cache, etc.) are added only if some of their children are allowed. + * NUMA nodes are always added but their available memory is set to 0 when disallowed. + * \hideinitializer + */ + HWLOC_TOPOLOGY_FLAG_WHOLE_SYSTEM = (1UL<<0), + + /** \brief Assume that the selected backend provides the topology for the + * system on which we are running. + * + * This forces hwloc_topology_is_thissystem() to return 1, i.e. makes hwloc assume that + * the selected backend provides the topology for the system on which we are running, + * even if it is not the OS-specific backend but the XML backend for instance. + * This means making the binding functions actually call the OS-specific + * system calls and really do binding, while the XML backend would otherwise + * provide empty hooks just returning success. + * + * Setting the environment variable HWLOC_THISSYSTEM may also result in the + * same behavior. + * + * This can be used for efficiency reasons to first detect the topology once, + * save it to an XML file, and quickly reload it later through the XML + * backend, but still having binding functions actually do bind. + * \hideinitializer + */ + HWLOC_TOPOLOGY_FLAG_IS_THISSYSTEM = (1UL<<1), + + /** \brief Detect PCI devices. + * + * By default, I/O devices are ignored. This flag enables I/O device + * detection using the pci backend. Only the common PCI devices (GPUs, + * NICs, block devices, ...) and host bridges (objects that connect the host + * objects to an I/O subsystem) will be added to the topology. + * Additionally it also enables MemoryModule misc objects. + * Uncommon devices and other bridges (such as PCI-to-PCI bridges) will be + * ignored. + * \hideinitializer + */ + HWLOC_TOPOLOGY_FLAG_IO_DEVICES = (1UL<<2), + + /** \brief Detect PCI bridges. + * + * This flag should be combined with ::HWLOC_TOPOLOGY_FLAG_IO_DEVICES to enable + * the detection of both common devices and of all useful bridges (bridges that + * have at least one device behind them). + * \hideinitializer + */ + HWLOC_TOPOLOGY_FLAG_IO_BRIDGES = (1UL<<3), + + /** \brief Detect the whole PCI hierarchy. + * + * This flag enables detection of all I/O devices (even the uncommon ones + * such as DMA channels) and bridges (even those that have no device behind + * them) using the pci backend. + * This implies ::HWLOC_TOPOLOGY_FLAG_IO_DEVICES. + * \hideinitializer + */ + HWLOC_TOPOLOGY_FLAG_WHOLE_IO = (1UL<<4), + + /** \brief Detect instruction caches. + * + * This flag enables detection of Instruction caches, + * instead of only Data and Unified caches. + * \hideinitializer + */ + HWLOC_TOPOLOGY_FLAG_ICACHES = (1UL<<5) +}; + +/** \brief Set OR'ed flags to non-yet-loaded topology. + * + * Set a OR'ed set of ::hwloc_topology_flags_e onto a topology that was not yet loaded. + * + * If this function is called multiple times, the last invokation will erase + * and replace the set of flags that was previously set. + * + * The flags set in a topology may be retrieved with hwloc_topology_get_flags() + */ +HWLOC_DECLSPEC int hwloc_topology_set_flags (hwloc_topology_t topology, unsigned long flags); + +/** \brief Get OR'ed flags of a topology. + * + * Get the OR'ed set of ::hwloc_topology_flags_e of a topology. + * + * \return the flags previously set with hwloc_topology_set_flags(). + */ +HWLOC_DECLSPEC unsigned long hwloc_topology_get_flags (hwloc_topology_t topology); + +/** \brief Change which process the topology is viewed from + * + * On some systems, processes may have different views of the machine, for + * instance the set of allowed CPUs. By default, hwloc exposes the view from + * the current process. Calling hwloc_topology_set_pid() permits to make it + * expose the topology of the machine from the point of view of another + * process. + * + * \note \p hwloc_pid_t is \p pid_t on Unix platforms, + * and \p HANDLE on native Windows platforms. + * + * \note -1 is returned and errno is set to ENOSYS on platforms that do not + * support this feature. + */ +HWLOC_DECLSPEC int hwloc_topology_set_pid(hwloc_topology_t __hwloc_restrict topology, hwloc_pid_t pid); + +/** \brief Change the file-system root path when building the topology from sysfs/procfs. + * + * On Linux system, use sysfs and procfs files as if they were mounted on the given + * \p fsroot_path instead of the main file-system root. Setting the environment + * variable HWLOC_FSROOT may also result in this behavior. + * Not using the main file-system root causes hwloc_topology_is_thissystem() + * to return 0. + * + * Note that this function does not actually load topology + * information; it just tells hwloc where to load it from. You'll + * still need to invoke hwloc_topology_load() to actually load the + * topology information. + * + * \return -1 with errno set to ENOSYS on non-Linux and on Linux systems that + * do not support it. + * \return -1 with the appropriate errno if \p fsroot_path cannot be used. + * + * \note For convenience, this backend provides empty binding hooks which just + * return success. To have hwloc still actually call OS-specific hooks, the + * ::HWLOC_TOPOLOGY_FLAG_IS_THISSYSTEM has to be set to assert that the loaded + * file is really the underlying system. + * + * \note On success, the Linux component replaces the previously enabled + * component (if any), but the topology is not actually modified until + * hwloc_topology_load(). + */ +HWLOC_DECLSPEC int hwloc_topology_set_fsroot(hwloc_topology_t __hwloc_restrict topology, const char * __hwloc_restrict fsroot_path); + +/** \brief Enable synthetic topology. + * + * Gather topology information from the given \p description, + * a space-separated string of numbers describing + * the arity of each level. + * Each number may be prefixed with a type and a colon to enforce the type + * of a level. If only some level types are enforced, hwloc will try to + * choose the other types according to usual topologies, but it may fail + * and you may have to specify more level types manually. + * See also the \ref synthetic. + * + * If \p description was properly parsed and describes a valid topology + * configuration, this function returns 0. + * Otherwise -1 is returned and errno is set to EINVAL. + * + * Note that this function does not actually load topology + * information; it just tells hwloc where to load it from. You'll + * still need to invoke hwloc_topology_load() to actually load the + * topology information. + * + * \note For convenience, this backend provides empty binding hooks which just + * return success. + * + * \note On success, the synthetic component replaces the previously enabled + * component (if any), but the topology is not actually modified until + * hwloc_topology_load(). + */ +HWLOC_DECLSPEC int hwloc_topology_set_synthetic(hwloc_topology_t __hwloc_restrict topology, const char * __hwloc_restrict description); + +/** \brief Enable XML-file based topology. + * + * Gather topology information from the XML file given at \p xmlpath. + * Setting the environment variable HWLOC_XMLFILE may also result in this behavior. + * This file may have been generated earlier with hwloc_topology_export_xml() + * or lstopo file.xml. + * + * Note that this function does not actually load topology + * information; it just tells hwloc where to load it from. You'll + * still need to invoke hwloc_topology_load() to actually load the + * topology information. + * + * \return -1 with errno set to EINVAL on failure to read the XML file. + * + * \note See also hwloc_topology_set_userdata_import_callback() + * for importing application-specific object userdata. + * + * \note For convenience, this backend provides empty binding hooks which just + * return success. To have hwloc still actually call OS-specific hooks, the + * ::HWLOC_TOPOLOGY_FLAG_IS_THISSYSTEM has to be set to assert that the loaded + * file is really the underlying system. + * + * \note On success, the XML component replaces the previously enabled + * component (if any), but the topology is not actually modified until + * hwloc_topology_load(). + */ +HWLOC_DECLSPEC int hwloc_topology_set_xml(hwloc_topology_t __hwloc_restrict topology, const char * __hwloc_restrict xmlpath); + +/** \brief Enable XML based topology using a memory buffer (instead of + * a file, as with hwloc_topology_set_xml()). + * + * Gather topology information from the XML memory buffer given at \p + * buffer and of length \p size. This buffer may have been filled + * earlier with hwloc_topology_export_xmlbuffer(). + * + * Note that this function does not actually load topology + * information; it just tells hwloc where to load it from. You'll + * still need to invoke hwloc_topology_load() to actually load the + * topology information. + * + * \return -1 with errno set to EINVAL on failure to read the XML buffer. + * + * \note See also hwloc_topology_set_userdata_import_callback() + * for importing application-specific object userdata. + * + * \note For convenience, this backend provides empty binding hooks which just + * return success. To have hwloc still actually call OS-specific hooks, the + * ::HWLOC_TOPOLOGY_FLAG_IS_THISSYSTEM has to be set to assert that the loaded + * file is really the underlying system. + * + * \note On success, the XML component replaces the previously enabled + * component (if any), but the topology is not actually modified until + * hwloc_topology_load(). + */ +HWLOC_DECLSPEC int hwloc_topology_set_xmlbuffer(hwloc_topology_t __hwloc_restrict topology, const char * __hwloc_restrict buffer, int size); + +/** \brief Prepare the topology for custom assembly. + * + * The topology then contains a single root object. + * It must then be built by inserting other topologies with + * hwloc_custom_insert_topology() or single objects with + * hwloc_custom_insert_group_object_by_parent(). + * hwloc_topology_load() must be called to finalize the new + * topology as usual. + * + * \note If nothing is inserted in the topology, + * hwloc_topology_load() will fail with errno set to EINVAL. + * + * \note The cpuset and nodeset of the root object are NULL because + * these sets are meaningless when assembling multiple topologies. + * + * \note On success, the custom component replaces the previously enabled + * component (if any), but the topology is not actually modified until + * hwloc_topology_load(). + */ +HWLOC_DECLSPEC int hwloc_topology_set_custom(hwloc_topology_t topology); + +/** \brief Provide a distance matrix. + * + * Provide the matrix of distances between a set of objects of the given type. + * The set may or may not contain all the existing objects of this type. + * The objects are specified by their OS/physical index in the \p os_index + * array. The \p distances matrix follows the same order. + * The distance from object i to object j in the i*nbobjs+j. + * + * A single latency matrix may be defined for each type. + * If another distance matrix already exists for the given type, + * either because the user specified it or because the OS offers it, + * it will be replaced by the given one. + * If \p nbobjs is \c 0, \p os_index is \c NULL and \p distances is \c NULL, + * the existing distance matrix for the given type is removed. + * + * \note Distance matrices are ignored in multi-node topologies. + */ +HWLOC_DECLSPEC int hwloc_topology_set_distance_matrix(hwloc_topology_t __hwloc_restrict topology, + hwloc_obj_type_t type, unsigned nbobjs, + unsigned *os_index, float *distances); + +/** \brief Does the topology context come from this system? + * + * \return 1 if this topology context was built using the system + * running this program. + * \return 0 instead (for instance if using another file-system root, + * a XML topology file, or a synthetic topology). + */ +HWLOC_DECLSPEC int hwloc_topology_is_thissystem(hwloc_topology_t __hwloc_restrict topology) __hwloc_attribute_pure; + +/** \brief Flags describing actual discovery support for this topology. */ +struct hwloc_topology_discovery_support { + /** \brief Detecting the number of PU objects is supported. */ + unsigned char pu; +}; + +/** \brief Flags describing actual PU binding support for this topology. */ +struct hwloc_topology_cpubind_support { + /** Binding the whole current process is supported. */ + unsigned char set_thisproc_cpubind; + /** Getting the binding of the whole current process is supported. */ + unsigned char get_thisproc_cpubind; + /** Binding a whole given process is supported. */ + unsigned char set_proc_cpubind; + /** Getting the binding of a whole given process is supported. */ + unsigned char get_proc_cpubind; + /** Binding the current thread only is supported. */ + unsigned char set_thisthread_cpubind; + /** Getting the binding of the current thread only is supported. */ + unsigned char get_thisthread_cpubind; + /** Binding a given thread only is supported. */ + unsigned char set_thread_cpubind; + /** Getting the binding of a given thread only is supported. */ + unsigned char get_thread_cpubind; + /** Getting the last processors where the whole current process ran is supported */ + unsigned char get_thisproc_last_cpu_location; + /** Getting the last processors where a whole process ran is supported */ + unsigned char get_proc_last_cpu_location; + /** Getting the last processors where the current thread ran is supported */ + unsigned char get_thisthread_last_cpu_location; +}; + +/** \brief Flags describing actual memory binding support for this topology. */ +struct hwloc_topology_membind_support { + /** Binding the whole current process is supported. */ + unsigned char set_thisproc_membind; + /** Getting the binding of the whole current process is supported. */ + unsigned char get_thisproc_membind; + /** Binding a whole given process is supported. */ + unsigned char set_proc_membind; + /** Getting the binding of a whole given process is supported. */ + unsigned char get_proc_membind; + /** Binding the current thread only is supported. */ + unsigned char set_thisthread_membind; + /** Getting the binding of the current thread only is supported. */ + unsigned char get_thisthread_membind; + /** Binding a given memory area is supported. */ + unsigned char set_area_membind; + /** Getting the binding of a given memory area is supported. */ + unsigned char get_area_membind; + /** Allocating a bound memory area is supported. */ + unsigned char alloc_membind; + /** First-touch policy is supported. */ + unsigned char firsttouch_membind; + /** Bind policy is supported. */ + unsigned char bind_membind; + /** Interleave policy is supported. */ + unsigned char interleave_membind; + /** Replication policy is supported. */ + unsigned char replicate_membind; + /** Next-touch migration policy is supported. */ + unsigned char nexttouch_membind; + + /** Migration flags is supported. */ + unsigned char migrate_membind; +}; + +/** \brief Set of flags describing actual support for this topology. + * + * This is retrieved with hwloc_topology_get_support() and will be valid until + * the topology object is destroyed. Note: the values are correct only after + * discovery. + */ +struct hwloc_topology_support { + struct hwloc_topology_discovery_support *discovery; + struct hwloc_topology_cpubind_support *cpubind; + struct hwloc_topology_membind_support *membind; +}; + +/** \brief Retrieve the topology support. */ +HWLOC_DECLSPEC const struct hwloc_topology_support *hwloc_topology_get_support(hwloc_topology_t __hwloc_restrict topology); + +/** \brief Set the topology-specific userdata pointer. + * + * Each topology may store one application-given private data pointer. + * It is initialized to \c NULL. + * hwloc will never modify it. + * + * Use it as you wish, after hwloc_topology_init() and until hwloc_topolog_destroy(). + * + * This pointer is not exported to XML. + */ +HWLOC_DECLSPEC void hwloc_topology_set_userdata(hwloc_topology_t topology, const void *userdata); + +/** \brief Retrieve the topology-specific userdata pointer. + * + * Retrieve the application-given private data pointer that was + * previously set with hwloc_topology_set_userdata(). + */ +HWLOC_DECLSPEC void * hwloc_topology_get_userdata(hwloc_topology_t topology); + +/** @} */ + + + +/** \defgroup hwlocality_levels Object levels, depths and types + * @{ + * + * Be sure to see the figure in \ref termsanddefs that shows a + * complete topology tree, including depths, child/sibling/cousin + * relationships, and an example of an asymmetric topology where one + * package has fewer caches than its peers. + */ + +/** \brief Get the depth of the hierarchical tree of objects. + * + * This is the depth of ::HWLOC_OBJ_PU objects plus one. + */ +HWLOC_DECLSPEC unsigned hwloc_topology_get_depth(hwloc_topology_t __hwloc_restrict topology) __hwloc_attribute_pure; + +/** \brief Returns the depth of objects of type \p type. + * + * If no object of this type is present on the underlying architecture, or if + * the OS doesn't provide this kind of information, the function returns + * ::HWLOC_TYPE_DEPTH_UNKNOWN. + * + * If type is absent but a similar type is acceptable, see also + * hwloc_get_type_or_below_depth() and hwloc_get_type_or_above_depth(). + * + * If some objects of the given type exist in different levels, + * for instance L1 and L2 caches, or L1i and L1d caches, + * the function returns ::HWLOC_TYPE_DEPTH_MULTIPLE. + * See hwloc_get_cache_type_depth() in hwloc/helper.h to better handle this + * case. + * + * If an I/O object type is given, the function returns a virtual value + * because I/O objects are stored in special levels that are not CPU-related. + * This virtual depth may be passed to other hwloc functions such as + * hwloc_get_obj_by_depth() but it should not be considered as an actual + * depth by the application. In particular, it should not be compared with + * any other object depth or with the entire topology depth. + */ +HWLOC_DECLSPEC int hwloc_get_type_depth (hwloc_topology_t topology, hwloc_obj_type_t type); + +enum hwloc_get_type_depth_e { + HWLOC_TYPE_DEPTH_UNKNOWN = -1, /**< \brief No object of given type exists in the topology. \hideinitializer */ + HWLOC_TYPE_DEPTH_MULTIPLE = -2, /**< \brief Objects of given type exist at different depth in the topology. \hideinitializer */ + HWLOC_TYPE_DEPTH_BRIDGE = -3, /**< \brief Virtual depth for bridge object level. \hideinitializer */ + HWLOC_TYPE_DEPTH_PCI_DEVICE = -4, /**< \brief Virtual depth for PCI device object level. \hideinitializer */ + HWLOC_TYPE_DEPTH_OS_DEVICE = -5 /**< \brief Virtual depth for software device object level. \hideinitializer */ +}; + +/** \brief Returns the depth of objects of type \p type or below + * + * If no object of this type is present on the underlying architecture, the + * function returns the depth of the first "present" object typically found + * inside \p type. + * + * If some objects of the given type exist in different levels, for instance + * L1 and L2 caches, the function returns ::HWLOC_TYPE_DEPTH_MULTIPLE. + */ +static __hwloc_inline int +hwloc_get_type_or_below_depth (hwloc_topology_t topology, hwloc_obj_type_t type) __hwloc_attribute_pure; + +/** \brief Returns the depth of objects of type \p type or above + * + * If no object of this type is present on the underlying architecture, the + * function returns the depth of the first "present" object typically + * containing \p type. + * + * If some objects of the given type exist in different levels, for instance + * L1 and L2 caches, the function returns ::HWLOC_TYPE_DEPTH_MULTIPLE. + */ +static __hwloc_inline int +hwloc_get_type_or_above_depth (hwloc_topology_t topology, hwloc_obj_type_t type) __hwloc_attribute_pure; + +/** \brief Returns the type of objects at depth \p depth. + * + * \p depth should between 0 and hwloc_topology_get_depth()-1. + * + * \return -1 if depth \p depth does not exist. + */ +HWLOC_DECLSPEC hwloc_obj_type_t hwloc_get_depth_type (hwloc_topology_t topology, unsigned depth) __hwloc_attribute_pure; + +/** \brief Returns the width of level at depth \p depth. + */ +HWLOC_DECLSPEC unsigned hwloc_get_nbobjs_by_depth (hwloc_topology_t topology, unsigned depth) __hwloc_attribute_pure; + +/** \brief Returns the width of level type \p type + * + * If no object for that type exists, 0 is returned. + * If there are several levels with objects of that type, -1 is returned. + */ +static __hwloc_inline int +hwloc_get_nbobjs_by_type (hwloc_topology_t topology, hwloc_obj_type_t type) __hwloc_attribute_pure; + +/** \brief Returns the top-object of the topology-tree. + * + * Its type is typically ::HWLOC_OBJ_MACHINE but it could be different + * for complex topologies. + */ +static __hwloc_inline hwloc_obj_t +hwloc_get_root_obj (hwloc_topology_t topology) __hwloc_attribute_pure; + +/** \brief Returns the topology object at logical index \p idx from depth \p depth */ +HWLOC_DECLSPEC hwloc_obj_t hwloc_get_obj_by_depth (hwloc_topology_t topology, unsigned depth, unsigned idx) __hwloc_attribute_pure; + +/** \brief Returns the topology object at logical index \p idx with type \p type + * + * If no object for that type exists, \c NULL is returned. + * If there are several levels with objects of that type, \c NULL is returned + * and ther caller may fallback to hwloc_get_obj_by_depth(). + */ +static __hwloc_inline hwloc_obj_t +hwloc_get_obj_by_type (hwloc_topology_t topology, hwloc_obj_type_t type, unsigned idx) __hwloc_attribute_pure; + +/** \brief Returns the next object at depth \p depth. + * + * If \p prev is \c NULL, return the first object at depth \p depth. + */ +static __hwloc_inline hwloc_obj_t +hwloc_get_next_obj_by_depth (hwloc_topology_t topology, unsigned depth, hwloc_obj_t prev); + +/** \brief Returns the next object of type \p type. + * + * If \p prev is \c NULL, return the first object at type \p type. If + * there are multiple or no depth for given type, return \c NULL and + * let the caller fallback to hwloc_get_next_obj_by_depth(). + */ +static __hwloc_inline hwloc_obj_t +hwloc_get_next_obj_by_type (hwloc_topology_t topology, hwloc_obj_type_t type, + hwloc_obj_t prev); + +/** @} */ + + + +/** \defgroup hwlocality_object_strings Manipulating Object Type, Sets and Attributes as Strings + * @{ + */ + +/** \brief Return a stringified topology object type */ +HWLOC_DECLSPEC const char * hwloc_obj_type_string (hwloc_obj_type_t type) __hwloc_attribute_const; + +/** \brief Return an object type and attributes from a type string. + * + * Convert strings such as "Package" or "Cache" into the corresponding types. + * Matching is case-insensitive, and only the first letters are actually + * required to match. + * + * Types that have specific attributes, for instance caches and groups, + * may be returned in \p depthattrp and \p typeattrp. They are ignored + * when these pointers are \c NULL. + * + * For instance "L2i" or "L2iCache" would return + * type HWLOC_OBJ_CACHE in \p typep, 2 in \p depthattrp, + * and HWLOC_OBJ_CACHE_TYPE_INSTRUCTION in \p typeattrp + * (this last pointer should point to a hwloc_obj_cache_type_t). + * "Group3" would return type HWLOC_OBJ_GROUP type and 3 in \p depthattrp. + * Attributes that are not specified in the string (for instance "Group" + * without a depth, or "L2Cache" without a cache type) are set to -1. + * + * \p typeattrp is only filled if the size specified in \p typeattrsize + * is large enough. It is currently only used for caches, and the required + * size is at least the size of hwloc_obj_cache_type_t. + * + * \return 0 if a type was correctly identified, otherwise -1. + * + * \note This is an extended version of the now deprecated hwloc_obj_type_of_string() + */ +HWLOC_DECLSPEC int hwloc_obj_type_sscanf(const char *string, + hwloc_obj_type_t *typep, + int *depthattrp, + void *typeattrp, size_t typeattrsize); + +/** \brief Stringify the type of a given topology object into a human-readable form. + * + * It differs from hwloc_obj_type_string() because it prints type attributes such + * as cache depth and type. + * + * If \p size is 0, \p string may safely be \c NULL. + * + * \return the number of character that were actually written if not truncating, + * or that would have been written (not including the ending \\0). + */ +HWLOC_DECLSPEC int hwloc_obj_type_snprintf(char * __hwloc_restrict string, size_t size, hwloc_obj_t obj, + int verbose); + +/** \brief Stringify the attributes of a given topology object into a human-readable form. + * + * Attribute values are separated by \p separator. + * + * Only the major attributes are printed in non-verbose mode. + * + * If \p size is 0, \p string may safely be \c NULL. + * + * \return the number of character that were actually written if not truncating, + * or that would have been written (not including the ending \\0). + */ +HWLOC_DECLSPEC int hwloc_obj_attr_snprintf(char * __hwloc_restrict string, size_t size, hwloc_obj_t obj, const char * __hwloc_restrict separator, + int verbose); + +/** \brief Stringify the cpuset containing a set of objects. + * + * If \p size is 0, \p string may safely be \c NULL. + * + * \return the number of character that were actually written if not truncating, + * or that would have been written (not including the ending \\0). + */ +HWLOC_DECLSPEC int hwloc_obj_cpuset_snprintf(char * __hwloc_restrict str, size_t size, size_t nobj, const hwloc_obj_t * __hwloc_restrict objs); + +/** \brief Search the given key name in object infos and return the corresponding value. + * + * If multiple keys match the given name, only the first one is returned. + * + * \return \c NULL if no such key exists. + */ +static __hwloc_inline const char * +hwloc_obj_get_info_by_name(hwloc_obj_t obj, const char *name) __hwloc_attribute_pure; + +/** \brief Add the given info name and value pair to the given object. + * + * The info is appended to the existing info array even if another key + * with the same name already exists. + * + * The input strings are copied before being added in the object infos. + * + * \note This function may be used to enforce object colors in the lstopo + * graphical output by using "lstopoStyle" as a name and "Background=#rrggbb" + * as a value. See CUSTOM COLORS in the lstopo(1) manpage for details. + * + * \note If \p value contains some non-printable characters, they will + * be dropped when exporting to XML, see hwloc_topology_export_xml(). + */ +HWLOC_DECLSPEC void hwloc_obj_add_info(hwloc_obj_t obj, const char *name, const char *value); + +/** @} */ + + + +/** \defgroup hwlocality_cpubinding CPU binding + * + * It is often useful to call hwloc_bitmap_singlify() first so that a single CPU + * remains in the set. This way, the process will not even migrate between + * different CPUs inside the given set. + * Some operating systems also only support that kind of binding. + * + * Some operating systems do not provide all hwloc-supported + * mechanisms to bind processes, threads, etc. + * hwloc_topology_get_support() may be used to query about the actual CPU + * binding support in the currently used operating system. + * + * When the requested binding operation is not available and the + * ::HWLOC_CPUBIND_STRICT flag was passed, the function returns -1. + * \p errno is set to \c ENOSYS when it is not possible to bind the requested kind of object + * processes/threads. errno is set to \c EXDEV when the requested cpuset + * can not be enforced (e.g. some systems only allow one CPU, and some + * other systems only allow one NUMA node). + * + * If ::HWLOC_CPUBIND_STRICT was not passed, the function may fail as well, + * or the operating system may use a slightly different operation + * (with side-effects, smaller binding set, etc.) + * when the requested operation is not exactly supported. + * + * The most portable version that should be preferred over the others, + * whenever possible, is the following one which just binds the current program, + * assuming it is single-threaded: + * + * \code + * hwloc_set_cpubind(topology, set, 0), + * \endcode + * + * If the program may be multithreaded, the following one should be preferred + * to only bind the current thread: + * + * \code + * hwloc_set_cpubind(topology, set, HWLOC_CPUBIND_THREAD), + * \endcode + * + * \sa Some example codes are available under doc/examples/ in the source tree. + * + * \note To unbind, just call the binding function with either a full cpuset or + * a cpuset equal to the system cpuset. + * + * \note On some operating systems, CPU binding may have effects on memory binding, see + * ::HWLOC_CPUBIND_NOMEMBIND + * + * \note Running lstopo --top or hwloc-ps can be a very convenient tool to check + * how binding actually happened. + * @{ + */ + +/** \brief Process/Thread binding flags. + * + * These bit flags can be used to refine the binding policy. + * + * The default (0) is to bind the current process, assumed to be + * single-threaded, in a non-strict way. This is the most portable + * way to bind as all operating systems usually provide it. + * + * \note Not all systems support all kinds of binding. See the + * "Detailed Description" section of \ref hwlocality_cpubinding for a + * description of errors that can occur. + */ +typedef enum { + /** \brief Bind all threads of the current (possibly) multithreaded process. + * \hideinitializer */ + HWLOC_CPUBIND_PROCESS = (1<<0), + + /** \brief Bind current thread of current process. + * \hideinitializer */ + HWLOC_CPUBIND_THREAD = (1<<1), + + /** \brief Request for strict binding from the OS. + * + * By default, when the designated CPUs are all busy while other + * CPUs are idle, operating systems may execute the thread/process + * on those other CPUs instead of the designated CPUs, to let them + * progress anyway. Strict binding means that the thread/process + * will _never_ execute on other cpus than the designated CPUs, even + * when those are busy with other tasks and other CPUs are idle. + * + * \note Depending on the operating system, strict binding may not + * be possible (e.g., the OS does not implement it) or not allowed + * (e.g., for an administrative reasons), and the function will fail + * in that case. + * + * When retrieving the binding of a process, this flag checks + * whether all its threads actually have the same binding. If the + * flag is not given, the binding of each thread will be + * accumulated. + * + * \note This flag is meaningless when retrieving the binding of a + * thread. + * \hideinitializer + */ + HWLOC_CPUBIND_STRICT = (1<<2), + + /** \brief Avoid any effect on memory binding + * + * On some operating systems, some CPU binding function would also + * bind the memory on the corresponding NUMA node. It is often not + * a problem for the application, but if it is, setting this flag + * will make hwloc avoid using OS functions that would also bind + * memory. This will however reduce the support of CPU bindings, + * i.e. potentially return -1 with errno set to ENOSYS in some + * cases. + * + * This flag is only meaningful when used with functions that set + * the CPU binding. It is ignored when used with functions that get + * CPU binding information. + * \hideinitializer + */ + HWLOC_CPUBIND_NOMEMBIND = (1<<3) +} hwloc_cpubind_flags_t; + +/** \brief Bind current process or thread on cpus given in physical bitmap \p set. + * + * \return -1 with errno set to ENOSYS if the action is not supported + * \return -1 with errno set to EXDEV if the binding cannot be enforced + */ +HWLOC_DECLSPEC int hwloc_set_cpubind(hwloc_topology_t topology, hwloc_const_cpuset_t set, int flags); + +/** \brief Get current process or thread binding. + * + * Writes into \p set the physical cpuset which the process or thread (according to \e + * flags) was last bound to. + */ +HWLOC_DECLSPEC int hwloc_get_cpubind(hwloc_topology_t topology, hwloc_cpuset_t set, int flags); + +/** \brief Bind a process \p pid on cpus given in physical bitmap \p set. + * + * \note \p hwloc_pid_t is \p pid_t on Unix platforms, + * and \p HANDLE on native Windows platforms. + * + * \note As a special case on Linux, if a tid (thread ID) is supplied + * instead of a pid (process ID) and ::HWLOC_CPUBIND_THREAD is passed in flags, + * the binding is applied to that specific thread. + * + * \note On non-Linux systems, ::HWLOC_CPUBIND_THREAD can not be used in \p flags. + */ +HWLOC_DECLSPEC int hwloc_set_proc_cpubind(hwloc_topology_t topology, hwloc_pid_t pid, hwloc_const_cpuset_t set, int flags); + +/** \brief Get the current physical binding of process \p pid. + * + * \note \p hwloc_pid_t is \p pid_t on Unix platforms, + * and \p HANDLE on native Windows platforms. + * + * \note As a special case on Linux, if a tid (thread ID) is supplied + * instead of a pid (process ID) and ::HWLOC_CPUBIND_THREAD is passed in flags, + * the binding for that specific thread is returned. + * + * \note On non-Linux systems, ::HWLOC_CPUBIND_THREAD can not be used in \p flags. + */ +HWLOC_DECLSPEC int hwloc_get_proc_cpubind(hwloc_topology_t topology, hwloc_pid_t pid, hwloc_cpuset_t set, int flags); + +#ifdef hwloc_thread_t +/** \brief Bind a thread \p thread on cpus given in physical bitmap \p set. + * + * \note \p hwloc_thread_t is \p pthread_t on Unix platforms, + * and \p HANDLE on native Windows platforms. + * + * \note ::HWLOC_CPUBIND_PROCESS can not be used in \p flags. + */ +HWLOC_DECLSPEC int hwloc_set_thread_cpubind(hwloc_topology_t topology, hwloc_thread_t thread, hwloc_const_cpuset_t set, int flags); +#endif + +#ifdef hwloc_thread_t +/** \brief Get the current physical binding of thread \p tid. + * + * \note \p hwloc_thread_t is \p pthread_t on Unix platforms, + * and \p HANDLE on native Windows platforms. + * + * \note ::HWLOC_CPUBIND_PROCESS can not be used in \p flags. + */ +HWLOC_DECLSPEC int hwloc_get_thread_cpubind(hwloc_topology_t topology, hwloc_thread_t thread, hwloc_cpuset_t set, int flags); +#endif + +/** \brief Get the last physical CPU where the current process or thread ran. + * + * The operating system may move some tasks from one processor + * to another at any time according to their binding, + * so this function may return something that is already + * outdated. + * + * \p flags can include either ::HWLOC_CPUBIND_PROCESS or ::HWLOC_CPUBIND_THREAD to + * specify whether the query should be for the whole process (union of all CPUs + * on which all threads are running), or only the current thread. If the + * process is single-threaded, flags can be set to zero to let hwloc use + * whichever method is available on the underlying OS. + */ +HWLOC_DECLSPEC int hwloc_get_last_cpu_location(hwloc_topology_t topology, hwloc_cpuset_t set, int flags); + +/** \brief Get the last physical CPU where a process ran. + * + * The operating system may move some tasks from one processor + * to another at any time according to their binding, + * so this function may return something that is already + * outdated. + * + * \note \p hwloc_pid_t is \p pid_t on Unix platforms, + * and \p HANDLE on native Windows platforms. + * + * \note As a special case on Linux, if a tid (thread ID) is supplied + * instead of a pid (process ID) and ::HWLOC_CPUBIND_THREAD is passed in flags, + * the last CPU location of that specific thread is returned. + * + * \note On non-Linux systems, ::HWLOC_CPUBIND_THREAD can not be used in \p flags. + */ +HWLOC_DECLSPEC int hwloc_get_proc_last_cpu_location(hwloc_topology_t topology, hwloc_pid_t pid, hwloc_cpuset_t set, int flags); + +/** @} */ + + + +/** \defgroup hwlocality_membinding Memory binding + * + * Memory binding can be done three ways: + * + * - explicit memory allocation thanks to hwloc_alloc_membind() and friends: + * the binding will have effect on the memory allocated by these functions. + * - implicit memory binding through binding policy: hwloc_set_membind() and + * friends only define the current policy of the process, which will be + * applied to the subsequent calls to malloc() and friends. + * - migration of existing memory ranges, thanks to hwloc_set_area_membind() + * and friends, which move already-allocated data. + * + * Not all operating systems support all three ways. + * hwloc_topology_get_support() may be used to query about the actual memory + * binding support in the currently used operating system. + * + * When the requested binding operation is not available and the + * ::HWLOC_MEMBIND_STRICT flag was passed, the function returns -1. + * \p errno will be set to \c ENOSYS when the system does support + * the specified action or policy + * (e.g., some systems only allow binding memory on a per-thread + * basis, whereas other systems only allow binding memory for all + * threads in a process). + * \p errno will be set to EXDEV when the requested cpuset can not be enforced + * (e.g., some systems only allow binding memory to a single NUMA node). + * + * If ::HWLOC_MEMBIND_STRICT was not passed, the function may fail as well, + * or the operating system may use a slightly different operation + * (with side-effects, smaller binding set, etc.) + * when the requested operation is not exactly supported. + * + * The most portable form that should be preferred over the others + * whenever possible is as follows. + * It allocates some memory hopefully bound to the specified set. + * To do so, hwloc will possibly have to change the current memory + * binding policy in order to actually get the memory bound, if the OS + * does not provide any other way to simply allocate bound memory + * without changing the policy for all allocations. That is the + * difference with hwloc_alloc_membind(), which will never change the + * current memory binding policy. + * + * \code + * hwloc_alloc_membind_policy(topology, size, set, + * HWLOC_MEMBIND_BIND, 0); + * \endcode + * + * Each hwloc memory binding function is available in two forms: one + * that takes a CPU set argument and another that takes a NUMA memory + * node set argument (see \ref hwlocality_object_sets and \ref + * hwlocality_bitmap for a discussion of CPU sets and NUMA memory node + * sets). The names of the latter form end with _nodeset. It is also + * possible to convert between CPU set and node set using + * hwloc_cpuset_to_nodeset() or hwloc_cpuset_from_nodeset(). + * + * \sa Some example codes are available under doc/examples/ in the source tree. + * + * \note On some operating systems, memory binding affects the CPU + * binding; see ::HWLOC_MEMBIND_NOCPUBIND + * @{ + */ + +/** \brief Memory binding policy. + * + * These constants can be used to choose the binding policy. Only one policy can + * be used at a time (i.e., the values cannot be OR'ed together). + * + * Not all systems support all kinds of binding. + * hwloc_topology_get_support() may be used to query about the actual memory + * binding policy support in the currently used operating system. + * See the "Detailed Description" section of \ref hwlocality_membinding + * for a description of errors that can occur. + */ +typedef enum { + /** \brief Reset the memory allocation policy to the system default. + * Depending on the operating system, this may correspond to + * ::HWLOC_MEMBIND_FIRSTTOUCH (Linux), + * or ::HWLOC_MEMBIND_BIND (AIX, HP-UX, OSF, Solaris, Windows). + * \hideinitializer */ + HWLOC_MEMBIND_DEFAULT = 0, + + /** \brief Allocate memory + * but do not immediately bind it to a specific locality. Instead, + * each page in the allocation is bound only when it is first + * touched. Pages are individually bound to the local NUMA node of + * the first thread that touches it. If there is not enough memory + * on the node, allocation may be done in the specified cpuset + * before allocating on other nodes. + * \hideinitializer */ + HWLOC_MEMBIND_FIRSTTOUCH = 1, + + /** \brief Allocate memory on the specified nodes. + * \hideinitializer */ + HWLOC_MEMBIND_BIND = 2, + + /** \brief Allocate memory on the given nodes in an interleaved + * / round-robin manner. The precise layout of the memory across + * multiple NUMA nodes is OS/system specific. Interleaving can be + * useful when threads distributed across the specified NUMA nodes + * will all be accessing the whole memory range concurrently, since + * the interleave will then balance the memory references. + * \hideinitializer */ + HWLOC_MEMBIND_INTERLEAVE = 3, + + /** \brief Replicate memory on the given nodes; reads from this + * memory will attempt to be serviced from the NUMA node local to + * the reading thread. Replicating can be useful when multiple + * threads from the specified NUMA nodes will be sharing the same + * read-only data. + * + * This policy can only be used with existing memory allocations + * (i.e., the hwloc_set_*membind*() functions); it cannot be used + * with functions that allocate new memory (i.e., the hwloc_alloc*() + * functions). + * \hideinitializer */ + HWLOC_MEMBIND_REPLICATE = 4, + + /** \brief For each page bound with this policy, by next time + * it is touched (and next time only), it is moved from its current + * location to the local NUMA node of the thread where the memory + * reference occurred (if it needs to be moved at all). + * \hideinitializer */ + HWLOC_MEMBIND_NEXTTOUCH = 5, + + /** \brief Returned by get_membind() functions when multiple + * threads or parts of a memory area have differing memory binding + * policies. + * \hideinitializer */ + HWLOC_MEMBIND_MIXED = -1 +} hwloc_membind_policy_t; + +/** \brief Memory binding flags. + * + * These flags can be used to refine the binding policy. + * All flags can be logically OR'ed together with the exception of + * ::HWLOC_MEMBIND_PROCESS and ::HWLOC_MEMBIND_THREAD; + * these two flags are mutually exclusive. + * + * Not all systems support all kinds of binding. + * hwloc_topology_get_support() may be used to query about the actual memory + * binding support in the currently used operating system. + * See the "Detailed Description" section of \ref hwlocality_membinding + * for a description of errors that can occur. + */ +typedef enum { + /** \brief Set policy for all threads of the specified (possibly + * multithreaded) process. This flag is mutually exclusive with + * ::HWLOC_MEMBIND_THREAD. + * \hideinitializer */ + HWLOC_MEMBIND_PROCESS = (1<<0), + + /** \brief Set policy for a specific thread of the current process. + * This flag is mutually exclusive with ::HWLOC_MEMBIND_PROCESS. + * \hideinitializer */ + HWLOC_MEMBIND_THREAD = (1<<1), + + /** Request strict binding from the OS. The function will fail if + * the binding can not be guaranteed / completely enforced. + * + * This flag has slightly different meanings depending on which + * function it is used with. + * \hideinitializer */ + HWLOC_MEMBIND_STRICT = (1<<2), + + /** \brief Migrate existing allocated memory. If the memory cannot + * be migrated and the ::HWLOC_MEMBIND_STRICT flag is passed, an error + * will be returned. + * \hideinitializer */ + HWLOC_MEMBIND_MIGRATE = (1<<3), + + /** \brief Avoid any effect on CPU binding. + * + * On some operating systems, some underlying memory binding + * functions also bind the application to the corresponding CPU(s). + * Using this flag will cause hwloc to avoid using OS functions that + * could potentially affect CPU bindings. Note, however, that using + * NOCPUBIND may reduce hwloc's overall memory binding + * support. Specifically: some of hwloc's memory binding functions + * may fail with errno set to ENOSYS when used with NOCPUBIND. + * \hideinitializer + */ + HWLOC_MEMBIND_NOCPUBIND = (1<<4) +} hwloc_membind_flags_t; + +/** \brief Set the default memory binding policy of the current + * process or thread to prefer the NUMA node(s) specified by physical \p nodeset + * + * If neither ::HWLOC_MEMBIND_PROCESS nor ::HWLOC_MEMBIND_THREAD is + * specified, the current process is assumed to be single-threaded. + * This is the most portable form as it permits hwloc to use either + * process-based OS functions or thread-based OS functions, depending + * on which are available. + * + * \return -1 with errno set to ENOSYS if the action is not supported + * \return -1 with errno set to EXDEV if the binding cannot be enforced + */ +HWLOC_DECLSPEC int hwloc_set_membind_nodeset(hwloc_topology_t topology, hwloc_const_nodeset_t nodeset, hwloc_membind_policy_t policy, int flags); + +/** \brief Set the default memory binding policy of the current + * process or thread to prefer the NUMA node(s) near the specified physical \p + * cpuset + * + * If neither ::HWLOC_MEMBIND_PROCESS nor ::HWLOC_MEMBIND_THREAD is + * specified, the current process is assumed to be single-threaded. + * This is the most portable form as it permits hwloc to use either + * process-based OS functions or thread-based OS functions, depending + * on which are available. + * + * \return -1 with errno set to ENOSYS if the action is not supported + * \return -1 with errno set to EXDEV if the binding cannot be enforced + */ +HWLOC_DECLSPEC int hwloc_set_membind(hwloc_topology_t topology, hwloc_const_cpuset_t cpuset, hwloc_membind_policy_t policy, int flags); + +/** \brief Query the default memory binding policy and physical locality of the + * current process or thread. + * + * This function has two output parameters: \p nodeset and \p policy. + * The values returned in these parameters depend on both the \p flags + * passed in and the current memory binding policies and nodesets in + * the queried target. + * + * Passing the ::HWLOC_MEMBIND_PROCESS flag specifies that the query + * target is the current policies and nodesets for all the threads in + * the current process. Passing ::HWLOC_MEMBIND_THREAD specifies that + * the query target is the current policy and nodeset for only the + * thread invoking this function. + * + * If neither of these flags are passed (which is the most portable + * method), the process is assumed to be single threaded. This allows + * hwloc to use either process-based OS functions or thread-based OS + * functions, depending on which are available. + * + * ::HWLOC_MEMBIND_STRICT is only meaningful when ::HWLOC_MEMBIND_PROCESS + * is also specified. In this case, hwloc will check the default + * memory policies and nodesets for all threads in the process. If + * they are not identical, -1 is returned and errno is set to EXDEV. + * If they are identical, the values are returned in \p nodeset and \p + * policy. + * + * Otherwise, if ::HWLOC_MEMBIND_PROCESS is specified (and + * ::HWLOC_MEMBIND_STRICT is \em not specified), \p nodeset is set to + * the logical OR of all threads' default nodeset. If all threads' + * default policies are the same, \p policy is set to that policy. If + * they are different, \p policy is set to ::HWLOC_MEMBIND_MIXED. + * + * In the ::HWLOC_MEMBIND_THREAD case (or when neither + * ::HWLOC_MEMBIND_PROCESS or ::HWLOC_MEMBIND_THREAD is specified), there + * is only one nodeset and policy; they are returned in \p nodeset and + * \p policy, respectively. + * + * If any other flags are specified, -1 is returned and errno is set + * to EINVAL. + */ +HWLOC_DECLSPEC int hwloc_get_membind_nodeset(hwloc_topology_t topology, hwloc_nodeset_t nodeset, hwloc_membind_policy_t * policy, int flags); + +/** \brief Query the default memory binding policy and physical locality of the + * current process or thread (the locality is returned in \p cpuset as + * CPUs near the locality's actual NUMA node(s)). + * + * This function has two output parameters: \p cpuset and \p policy. + * The values returned in these parameters depend on both the \p flags + * passed in and the current memory binding policies and nodesets in + * the queried target. + * + * Passing the ::HWLOC_MEMBIND_PROCESS flag specifies that the query + * target is the current policies and nodesets for all the threads in + * the current process. Passing ::HWLOC_MEMBIND_THREAD specifies that + * the query target is the current policy and nodeset for only the + * thread invoking this function. + * + * If neither of these flags are passed (which is the most portable + * method), the process is assumed to be single threaded. This allows + * hwloc to use either process-based OS functions or thread-based OS + * functions, depending on which are available. + * + * ::HWLOC_MEMBIND_STRICT is only meaningful when ::HWLOC_MEMBIND_PROCESS + * is also specified. In this case, hwloc will check the default + * memory policies and nodesets for all threads in the process. If + * they are not identical, -1 is returned and errno is set to EXDEV. + * If they are identical, the policy is returned in \p policy. \p + * cpuset is set to the union of CPUs near the NUMA node(s) in the + * nodeset. + * + * Otherwise, if ::HWLOC_MEMBIND_PROCESS is specified (and + * ::HWLOC_MEMBIND_STRICT is \em not specified), the default nodeset + * from each thread is logically OR'ed together. \p cpuset is set to + * the union of CPUs near the NUMA node(s) in the resulting nodeset. + * If all threads' default policies are the same, \p policy is set to + * that policy. If they are different, \p policy is set to + * ::HWLOC_MEMBIND_MIXED. + * + * In the ::HWLOC_MEMBIND_THREAD case (or when neither + * ::HWLOC_MEMBIND_PROCESS or ::HWLOC_MEMBIND_THREAD is specified), there + * is only one nodeset and policy. The policy is returned in \p + * policy; \p cpuset is set to the union of CPUs near the NUMA node(s) + * in the \p nodeset. + * + * If any other flags are specified, -1 is returned and errno is set + * to EINVAL. + */ +HWLOC_DECLSPEC int hwloc_get_membind(hwloc_topology_t topology, hwloc_cpuset_t cpuset, hwloc_membind_policy_t * policy, int flags); + +/** \brief Set the default memory binding policy of the specified + * process to prefer the NUMA node(s) specified by physical \p nodeset + * + * \return -1 with errno set to ENOSYS if the action is not supported + * \return -1 with errno set to EXDEV if the binding cannot be enforced + * + * \note \p hwloc_pid_t is \p pid_t on Unix platforms, + * and \p HANDLE on native Windows platforms. + */ +HWLOC_DECLSPEC int hwloc_set_proc_membind_nodeset(hwloc_topology_t topology, hwloc_pid_t pid, hwloc_const_nodeset_t nodeset, hwloc_membind_policy_t policy, int flags); + +/** \brief Set the default memory binding policy of the specified + * process to prefer the NUMA node(s) near the specified physical \p cpuset + * + * \return -1 with errno set to ENOSYS if the action is not supported + * \return -1 with errno set to EXDEV if the binding cannot be enforced + * + * \note \p hwloc_pid_t is \p pid_t on Unix platforms, + * and \p HANDLE on native Windows platforms. + */ +HWLOC_DECLSPEC int hwloc_set_proc_membind(hwloc_topology_t topology, hwloc_pid_t pid, hwloc_const_cpuset_t cpuset, hwloc_membind_policy_t policy, int flags); + +/** \brief Query the default memory binding policy and physical locality of the + * specified process. + * + * This function has two output parameters: \p nodeset and \p policy. + * The values returned in these parameters depend on both the \p flags + * passed in and the current memory binding policies and nodesets in + * the queried target. + * + * Passing the ::HWLOC_MEMBIND_PROCESS flag specifies that the query + * target is the current policies and nodesets for all the threads in + * the specified process. If ::HWLOC_MEMBIND_PROCESS is not specified + * (which is the most portable method), the process is assumed to be + * single threaded. This allows hwloc to use either process-based OS + * functions or thread-based OS functions, depending on which are + * available. + * + * Note that it does not make sense to pass ::HWLOC_MEMBIND_THREAD to + * this function. + * + * If ::HWLOC_MEMBIND_STRICT is specified, hwloc will check the default + * memory policies and nodesets for all threads in the specified + * process. If they are not identical, -1 is returned and errno is + * set to EXDEV. If they are identical, the values are returned in \p + * nodeset and \p policy. + * + * Otherwise, \p nodeset is set to the logical OR of all threads' + * default nodeset. If all threads' default policies are the same, \p + * policy is set to that policy. If they are different, \p policy is + * set to ::HWLOC_MEMBIND_MIXED. + * + * If any other flags are specified, -1 is returned and errno is set + * to EINVAL. + * + * \note \p hwloc_pid_t is \p pid_t on Unix platforms, + * and \p HANDLE on native Windows platforms. + */ +HWLOC_DECLSPEC int hwloc_get_proc_membind_nodeset(hwloc_topology_t topology, hwloc_pid_t pid, hwloc_nodeset_t nodeset, hwloc_membind_policy_t * policy, int flags); + +/** \brief Query the default memory binding policy and physical locality of the + * specified process (the locality is returned in \p cpuset as CPUs + * near the locality's actual NUMA node(s)). + * + * This function has two output parameters: \p cpuset and \p policy. + * The values returned in these parameters depend on both the \p flags + * passed in and the current memory binding policies and nodesets in + * the queried target. + * + * Passing the ::HWLOC_MEMBIND_PROCESS flag specifies that the query + * target is the current policies and nodesets for all the threads in + * the specified process. If ::HWLOC_MEMBIND_PROCESS is not specified + * (which is the most portable method), the process is assumed to be + * single threaded. This allows hwloc to use either process-based OS + * functions or thread-based OS functions, depending on which are + * available. + * + * Note that it does not make sense to pass ::HWLOC_MEMBIND_THREAD to + * this function. + * + * If ::HWLOC_MEMBIND_STRICT is specified, hwloc will check the default + * memory policies and nodesets for all threads in the specified + * process. If they are not identical, -1 is returned and errno is + * set to EXDEV. If they are identical, the policy is returned in \p + * policy. \p cpuset is set to the union of CPUs near the NUMA + * node(s) in the nodeset. + * + * Otherwise, the default nodeset from each thread is logically OR'ed + * together. \p cpuset is set to the union of CPUs near the NUMA + * node(s) in the resulting nodeset. If all threads' default policies + * are the same, \p policy is set to that policy. If they are + * different, \p policy is set to ::HWLOC_MEMBIND_MIXED. + * + * If any other flags are specified, -1 is returned and errno is set + * to EINVAL. + * + * \note \p hwloc_pid_t is \p pid_t on Unix platforms, + * and \p HANDLE on native Windows platforms. + */ +HWLOC_DECLSPEC int hwloc_get_proc_membind(hwloc_topology_t topology, hwloc_pid_t pid, hwloc_cpuset_t cpuset, hwloc_membind_policy_t * policy, int flags); + +/** \brief Bind the already-allocated memory identified by (addr, len) + * to the NUMA node(s) in physical \p nodeset. + * + * \return -1 with errno set to ENOSYS if the action is not supported + * \return -1 with errno set to EXDEV if the binding cannot be enforced + */ +HWLOC_DECLSPEC int hwloc_set_area_membind_nodeset(hwloc_topology_t topology, const void *addr, size_t len, hwloc_const_nodeset_t nodeset, hwloc_membind_policy_t policy, int flags); + +/** \brief Bind the already-allocated memory identified by (addr, len) + * to the NUMA node(s) near physical \p cpuset. + * + * \return -1 with errno set to ENOSYS if the action is not supported + * \return -1 with errno set to EXDEV if the binding cannot be enforced + */ +HWLOC_DECLSPEC int hwloc_set_area_membind(hwloc_topology_t topology, const void *addr, size_t len, hwloc_const_cpuset_t cpuset, hwloc_membind_policy_t policy, int flags); + +/** \brief Query the physical NUMA node(s) and binding policy of the memory + * identified by (\p addr, \p len ). + * + * This function has two output parameters: \p nodeset and \p policy. + * The values returned in these parameters depend on both the \p flags + * passed in and the memory binding policies and nodesets of the pages + * in the address range. + * + * If ::HWLOC_MEMBIND_STRICT is specified, the target pages are first + * checked to see if they all have the same memory binding policy and + * nodeset. If they do not, -1 is returned and errno is set to EXDEV. + * If they are identical across all pages, the nodeset and policy are + * returned in \p nodeset and \p policy, respectively. + * + * If ::HWLOC_MEMBIND_STRICT is not specified, \p nodeset is set to the + * union of all NUMA node(s) containing pages in the address range. + * If all pages in the target have the same policy, it is returned in + * \p policy. Otherwise, \p policy is set to ::HWLOC_MEMBIND_MIXED. + * + * If any other flags are specified, -1 is returned and errno is set + * to EINVAL. + */ +HWLOC_DECLSPEC int hwloc_get_area_membind_nodeset(hwloc_topology_t topology, const void *addr, size_t len, hwloc_nodeset_t nodeset, hwloc_membind_policy_t * policy, int flags); + +/** \brief Query the CPUs near the physical NUMA node(s) and binding policy of + * the memory identified by (\p addr, \p len ). + * + * This function has two output parameters: \p cpuset and \p policy. + * The values returned in these parameters depend on both the \p flags + * passed in and the memory binding policies and nodesets of the pages + * in the address range. + * + * If ::HWLOC_MEMBIND_STRICT is specified, the target pages are first + * checked to see if they all have the same memory binding policy and + * nodeset. If they do not, -1 is returned and errno is set to EXDEV. + * If they are identical across all pages, the policy is returned in + * \p policy. \p cpuset is set to the union of CPUs near the NUMA + * node(s) in the nodeset. + * + * If ::HWLOC_MEMBIND_STRICT is not specified, the union of all NUMA + * node(s) containing pages in the address range is calculated. \p + * cpuset is then set to the CPUs near the NUMA node(s) in this union. + * If all pages in the target have the same policy, it is returned in + * \p policy. Otherwise, \p policy is set to ::HWLOC_MEMBIND_MIXED. + * + * If any other flags are specified, -1 is returned and errno is set + * to EINVAL. + */ +HWLOC_DECLSPEC int hwloc_get_area_membind(hwloc_topology_t topology, const void *addr, size_t len, hwloc_cpuset_t cpuset, hwloc_membind_policy_t * policy, int flags); + +/** \brief Allocate some memory + * + * This is equivalent to malloc(), except that it tries to allocate + * page-aligned memory from the OS. + * + * \note The allocated memory should be freed with hwloc_free(). + */ +HWLOC_DECLSPEC void *hwloc_alloc(hwloc_topology_t topology, size_t len); + +/** \brief Allocate some memory on the given physical nodeset \p nodeset + * + * \return NULL with errno set to ENOSYS if the action is not supported + * and ::HWLOC_MEMBIND_STRICT is given + * \return NULL with errno set to EXDEV if the binding cannot be enforced + * and ::HWLOC_MEMBIND_STRICT is given + * \return NULL with errno set to ENOMEM if the memory allocation failed + * even before trying to bind. + * + * \note The allocated memory should be freed with hwloc_free(). + */ +HWLOC_DECLSPEC void *hwloc_alloc_membind_nodeset(hwloc_topology_t topology, size_t len, hwloc_const_nodeset_t nodeset, hwloc_membind_policy_t policy, int flags) __hwloc_attribute_malloc; + +/** \brief Allocate some memory on memory nodes near the given physical cpuset \p cpuset + * + * \return NULL with errno set to ENOSYS if the action is not supported + * and ::HWLOC_MEMBIND_STRICT is given + * \return NULL with errno set to EXDEV if the binding cannot be enforced + * and ::HWLOC_MEMBIND_STRICT is given + * \return NULL with errno set to ENOMEM if the memory allocation failed + * even before trying to bind. + * + * \note The allocated memory should be freed with hwloc_free(). + */ +HWLOC_DECLSPEC void *hwloc_alloc_membind(hwloc_topology_t topology, size_t len, hwloc_const_cpuset_t cpuset, hwloc_membind_policy_t policy, int flags) __hwloc_attribute_malloc; + +/** \brief Allocate some memory on the given nodeset \p nodeset + * + * This is similar to hwloc_alloc_membind() except that it is allowed to change + * the current memory binding policy, thus providing more binding support, at + * the expense of changing the current state. + */ +static __hwloc_inline void * +hwloc_alloc_membind_policy_nodeset(hwloc_topology_t topology, size_t len, hwloc_const_nodeset_t nodeset, hwloc_membind_policy_t policy, int flags) __hwloc_attribute_malloc; + +/** \brief Allocate some memory on the memory nodes near given cpuset \p cpuset + * + * This is similar to hwloc_alloc_membind_policy_nodeset(), but for a given cpuset. + */ +static __hwloc_inline void * +hwloc_alloc_membind_policy(hwloc_topology_t topology, size_t len, hwloc_const_cpuset_t set, hwloc_membind_policy_t policy, int flags) __hwloc_attribute_malloc; + +/** \brief Free memory that was previously allocated by hwloc_alloc() + * or hwloc_alloc_membind(). + */ +HWLOC_DECLSPEC int hwloc_free(hwloc_topology_t topology, void *addr, size_t len); + +/** @} */ + + + +/** \defgroup hwlocality_tinker Modifying a loaded Topology + * @{ + */ + +/** \brief Add a MISC object to the topology + * + * A new MISC object will be created and inserted into the topology at the + * position given by bitmap \p cpuset. This offers a way to add new + * intermediate levels to the topology hierarchy. + * + * \p cpuset and \p name will be copied to setup the new object attributes. + * + * \return the newly-created object. + * \return \c NULL if the insertion conflicts with the existing topology tree. + * + * \note If \p name contains some non-printable characters, they will + * be dropped when exporting to XML, see hwloc_topology_export_xml(). + */ +HWLOC_DECLSPEC hwloc_obj_t hwloc_topology_insert_misc_object_by_cpuset(hwloc_topology_t topology, hwloc_const_cpuset_t cpuset, const char *name); + +/** \brief Add a MISC object as a leaf of the topology + * + * A new MISC object will be created and inserted into the topology at the + * position given by parent. It is appended to the list of existing children, + * without ever adding any intermediate hierarchy level. This is useful for + * annotating the topology without actually changing the hierarchy. + * + * \p name will be copied to the setup the new object attributes. + * However, the new leaf object will not have any \p cpuset. + * + * \return the newly-created object + * + * \note If \p name contains some non-printable characters, they will + * be dropped when exporting to XML, see hwloc_topology_export_xml(). + */ +HWLOC_DECLSPEC hwloc_obj_t hwloc_topology_insert_misc_object_by_parent(hwloc_topology_t topology, hwloc_obj_t parent, const char *name); + +/** \brief Flags to be given to hwloc_topology_restrict(). */ +enum hwloc_restrict_flags_e { + /** \brief Adapt distance matrices according to objects being removed during restriction. + * If this flag is not set, distance matrices are removed. + * \hideinitializer + */ + HWLOC_RESTRICT_FLAG_ADAPT_DISTANCES = (1<<0), + + /** \brief Move Misc objects to ancestors if their parents are removed during restriction. + * If this flag is not set, Misc objects are removed when their parents are removed. + * \hideinitializer + */ + HWLOC_RESTRICT_FLAG_ADAPT_MISC = (1<<1), + + /** \brief Move I/O objects to ancestors if their parents are removed during restriction. + * If this flag is not set, I/O devices and bridges are removed when their parents are removed. + * \hideinitializer + */ + HWLOC_RESTRICT_FLAG_ADAPT_IO = (1<<2) +}; + +/** \brief Restrict the topology to the given CPU set. + * + * Topology \p topology is modified so as to remove all objects that + * are not included (or partially included) in the CPU set \p cpuset. + * All objects CPU and node sets are restricted accordingly. + * + * \p flags is a OR'ed set of ::hwloc_restrict_flags_e. + * + * \note This call may not be reverted by restricting back to a larger + * cpuset. Once dropped during restriction, objects may not be brought + * back, except by loading another topology with hwloc_topology_load(). + * + * \return 0 on success. + * + * \return -1 with errno set to EINVAL if the input cpuset is invalid. + * The topology is not modified in this case. + * + * \return -1 with errno set to ENOMEM on failure to allocate internal data. + * The topology is reinitialized in this case. It should be either + * destroyed with hwloc_topology_destroy() or configured and loaded again. + */ +HWLOC_DECLSPEC int hwloc_topology_restrict(hwloc_topology_t __hwloc_restrict topology, hwloc_const_cpuset_t cpuset, unsigned long flags); + +/** @} */ + + + +/** \defgroup hwlocality_custom Building Custom Topologies + * + * A custom topology may be initialized by calling hwloc_topology_set_custom() + * after hwloc_topology_init(). It may then be modified by inserting objects + * or entire topologies. Once done assembling, hwloc_topology_load() should + * be invoked as usual to finalize the topology. + * @{ + */ + +/** \brief Insert an existing topology inside a custom topology + * + * Duplicate the existing topology \p oldtopology inside a new + * custom topology \p newtopology as a leaf of object \p newparent. + * + * If \p oldroot is not \c NULL, duplicate \p oldroot and all its + * children instead of the entire \p oldtopology. Passing the root + * object of \p oldtopology in \p oldroot is equivalent to passing + * \c NULL. + * + * The custom topology \p newtopology must have been prepared with + * hwloc_topology_set_custom() and not loaded with hwloc_topology_load() + * yet. + * + * \p newparent may be either the root of \p newtopology or an object + * that was added through hwloc_custom_insert_group_object_by_parent(). + * + * \note The cpuset and nodeset of the \p newparent object are not + * modified based on the contents of \p oldtopology. + */ +HWLOC_DECLSPEC int hwloc_custom_insert_topology(hwloc_topology_t newtopology, hwloc_obj_t newparent, hwloc_topology_t oldtopology, hwloc_obj_t oldroot); + +/** \brief Insert a new group object inside a custom topology + * + * An object with type ::HWLOC_OBJ_GROUP is inserted as a new child + * of object \p parent. + * + * \p groupdepth is the depth attribute to be given to the new object. + * It may for instance be 0 for top-level groups, 1 for their children, + * and so on. + * + * The custom topology \p newtopology must have been prepared with + * hwloc_topology_set_custom() and not loaded with hwloc_topology_load() + * yet. + * + * \p parent may be either the root of \p topology or an object that + * was added earlier through hwloc_custom_insert_group_object_by_parent(). + * + * \note The cpuset and nodeset of the new group object are NULL because + * these sets are meaningless when assembling multiple topologies. + * + * \note The cpuset and nodeset of the \p parent object are not modified. + */ +HWLOC_DECLSPEC hwloc_obj_t hwloc_custom_insert_group_object_by_parent(hwloc_topology_t topology, hwloc_obj_t parent, int groupdepth); + +/** @} */ + + + +/** \defgroup hwlocality_xmlexport Exporting Topologies to XML + * @{ + */ + +/** \brief Export the topology into an XML file. + * + * This file may be loaded later through hwloc_topology_set_xml(). + * + * \return -1 if a failure occured. + * + * \note See also hwloc_topology_set_userdata_export_callback() + * for exporting application-specific object userdata. + * + * \note The topology-specific userdata pointer is ignored when exporting to XML. + * + * \note Only printable characters may be exported to XML string attributes. + * Any other character, especially any non-ASCII character, will be silently + * dropped. + * + * \note If \p name is "-", the XML output is sent to the standard output. + */ +HWLOC_DECLSPEC int hwloc_topology_export_xml(hwloc_topology_t topology, const char *xmlpath); + +/** \brief Export the topology into a newly-allocated XML memory buffer. + * + * \p xmlbuffer is allocated by the callee and should be freed with + * hwloc_free_xmlbuffer() later in the caller. + * + * This memory buffer may be loaded later through hwloc_topology_set_xmlbuffer(). + * + * \return -1 if a failure occured. + * + * \note See also hwloc_topology_set_userdata_export_callback() + * for exporting application-specific object userdata. + * + * \note The topology-specific userdata pointer is ignored when exporting to XML. + * + * \note Only printable characters may be exported to XML string attributes. + * Any other character, especially any non-ASCII character, will be silently + * dropped. + */ +HWLOC_DECLSPEC int hwloc_topology_export_xmlbuffer(hwloc_topology_t topology, char **xmlbuffer, int *buflen); + +/** \brief Free a buffer allocated by hwloc_topology_export_xmlbuffer() */ +HWLOC_DECLSPEC void hwloc_free_xmlbuffer(hwloc_topology_t topology, char *xmlbuffer); + +/** \brief Set the application-specific callback for exporting object userdata + * + * The object userdata pointer is not exported to XML by default because hwloc + * does not know what it contains. + * + * This function lets applications set \p export_cb to a callback function + * that converts this opaque userdata into an exportable string. + * + * \p export_cb is invoked during XML export for each object whose + * \p userdata pointer is not \c NULL. + * The callback should use hwloc_export_obj_userdata() or + * hwloc_export_obj_userdata_base64() to actually export + * something to XML (possibly multiple times per object). + * + * \p export_cb may be set to \c NULL if userdata should not be exported to XML. + * + * \note The topology-specific userdata pointer is ignored when exporting to XML. + */ +HWLOC_DECLSPEC void hwloc_topology_set_userdata_export_callback(hwloc_topology_t topology, + void (*export_cb)(void *reserved, hwloc_topology_t topology, hwloc_obj_t obj)); + +/** \brief Export some object userdata to XML + * + * This function may only be called from within the export() callback passed + * to hwloc_topology_set_userdata_export_callback(). + * It may be invoked one of multiple times to export some userdata to XML. + * The \p buffer content of length \p length is stored with optional name + * \p name. + * + * When importing this XML file, the import() callback (if set) will be + * called exactly as many times as hwloc_export_obj_userdata() was called + * during export(). It will receive the corresponding \p name, \p buffer + * and \p length arguments. + * + * \p reserved, \p topology and \p obj must be the first three parameters + * that were given to the export callback. + * + * Only printable characters may be exported to XML string attributes. + * If a non-printable character is passed in \p name or \p buffer, + * the function returns -1 with errno set to EINVAL. + * + * If exporting binary data, the application should first encode into + * printable characters only (or use hwloc_export_obj_userdata_base64()). + * It should also take care of portability issues if the export may + * be reimported on a different architecture. + */ +HWLOC_DECLSPEC int hwloc_export_obj_userdata(void *reserved, hwloc_topology_t topology, hwloc_obj_t obj, const char *name, const void *buffer, size_t length); + +/** \brief Encode and export some object userdata to XML + * + * This function is similar to hwloc_export_obj_userdata() but it encodes + * the input buffer into printable characters before exporting. + * On import, decoding is automatically performed before the data is given + * to the import() callback if any. + * + * This function may only be called from within the export() callback passed + * to hwloc_topology_set_userdata_export_callback(). + * + * The function does not take care of portability issues if the export + * may be reimported on a different architecture. + */ +HWLOC_DECLSPEC int hwloc_export_obj_userdata_base64(void *reserved, hwloc_topology_t topology, hwloc_obj_t obj, const char *name, const void *buffer, size_t length); + +/** \brief Set the application-specific callback for importing userdata + * + * On XML import, userdata is ignored by default because hwloc does not know + * how to store it in memory. + * + * This function lets applications set \p import_cb to a callback function + * that will get the XML-stored userdata and store it in the object as expected + * by the application. + * + * \p import_cb is called during hwloc_topology_load() as many times as + * hwloc_export_obj_userdata() was called during export. The topology + * is not entirely setup yet. Object attributes are ready to consult, + * but links between objects are not. + * + * \p import_cb may be \c NULL if userdata should be ignored during import. + * + * \note \p buffer contains \p length characters followed by a null byte ('\0'). + * + * \note This function should be called before hwloc_topology_load(). + * + * \note The topology-specific userdata pointer is ignored when importing from XML. + */ +HWLOC_DECLSPEC void hwloc_topology_set_userdata_import_callback(hwloc_topology_t topology, + void (*import_cb)(hwloc_topology_t topology, hwloc_obj_t obj, const char *name, const void *buffer, size_t length)); + +/** @} */ + + +/** \defgroup hwlocality_syntheticexport Exporting Topologies to Synthetic + * @{ + */ + +/** \brief Flags for exporting synthetic topologies. + * + * Flags to be given as a OR'ed set to hwloc_topology_export_synthetic(). + */ +enum hwloc_topology_export_synthetic_flags_e { + /** \brief Export extended types such as L2dcache as basic types such as Cache. + * + * This is required if loading the synthetic description with hwloc < 1.9. + * \hideinitializer + */ + HWLOC_TOPOLOGY_EXPORT_SYNTHETIC_FLAG_NO_EXTENDED_TYPES = (1UL<<0), + + /** \brief Do not export level attributes. + * + * Ignore level attributes such as memory/cache sizes or PU indexes. + * This is required if loading the synthetic description with hwloc < 1.10. + * \hideinitializer + */ + HWLOC_TOPOLOGY_EXPORT_SYNTHETIC_FLAG_NO_ATTRS = (1UL<<1) +}; + +/** \brief Export the topology as a synthetic string. + * + * At most \p buflen characters will be written in \p buffer, + * including the terminating \0. + * + * This exported string may be given back to hwloc_topology_set_synthetic(). + * + * \p flags is a OR'ed set of hwloc_topology_export_synthetic_flags_e. + * + * \return The number of characters that were written, + * not including the terminating \0. + * + * \return -1 if the topology could not be exported, + * for instance if it is not symmetric. + * + * \note A 1024-byte buffer should be large enough for exporting + * topologies in the vast majority of cases. + */ + HWLOC_DECLSPEC int hwloc_topology_export_synthetic(hwloc_topology_t topology, char *buffer, size_t buflen, unsigned long flags); + +/** @} */ + + + +#ifdef __cplusplus +} /* extern "C" */ +#endif + + +/* high-level helpers */ +#include + +/* inline code of some functions above */ +#include + +/* topology diffs */ +#include + +/* deprecated headers */ +#include + +#endif /* HWLOC_H */ diff --git a/opal/mca/hwloc/hwloc1110/hwloc/include/hwloc/autogen/config.h.in b/opal/mca/hwloc/hwloc1112/hwloc/include/hwloc/autogen/config.h.in similarity index 100% rename from opal/mca/hwloc/hwloc1110/hwloc/include/hwloc/autogen/config.h.in rename to opal/mca/hwloc/hwloc1112/hwloc/include/hwloc/autogen/config.h.in diff --git a/opal/mca/hwloc/hwloc1110/hwloc/include/hwloc/bitmap.h b/opal/mca/hwloc/hwloc1112/hwloc/include/hwloc/bitmap.h similarity index 96% rename from opal/mca/hwloc/hwloc1110/hwloc/include/hwloc/bitmap.h rename to opal/mca/hwloc/hwloc1112/hwloc/include/hwloc/bitmap.h index bb18f650498..5626428ba61 100644 --- a/opal/mca/hwloc/hwloc1110/hwloc/include/hwloc/bitmap.h +++ b/opal/mca/hwloc/hwloc1112/hwloc/include/hwloc/bitmap.h @@ -38,7 +38,7 @@ extern "C" { * * \note Several examples of using the bitmap API are available under the * doc/examples/ directory in the source tree. - * Regression tests such as tests/hwloc_bitmap*.c also make intensive use + * Regression tests such as tests/hwloc/hwloc_bitmap*.c also make intensive use * of this API. * @{ */ @@ -257,27 +257,35 @@ HWLOC_DECLSPEC int hwloc_bitmap_last(hwloc_const_bitmap_t bitmap) __hwloc_attrib HWLOC_DECLSPEC int hwloc_bitmap_weight(hwloc_const_bitmap_t bitmap) __hwloc_attribute_pure; /** \brief Loop macro iterating on bitmap \p bitmap - * \hideinitializer + * + * The loop must start with hwloc_bitmap_foreach_begin() and end + * with hwloc_bitmap_foreach_end() followed by a terminating ';'. * * \p index is the loop variable; it should be an unsigned int. The * first iteration will set \p index to the lowest index in the bitmap. * Successive iterations will iterate through, in order, all remaining - * indexes that in the bitmap. To be specific: each iteration will return a + * indexes set in the bitmap. To be specific: each iteration will return a * value for \p index such that hwloc_bitmap_isset(bitmap, index) is true. * * The assert prevents the loop from being infinite if the bitmap is infinite. + * + * \hideinitializer */ #define hwloc_bitmap_foreach_begin(id, bitmap) \ do { \ assert(hwloc_bitmap_weight(bitmap) != -1); \ for (id = hwloc_bitmap_first(bitmap); \ (unsigned) id != (unsigned) -1; \ - id = hwloc_bitmap_next(bitmap, id)) { \ -/** \brief End of loop. Needs a terminating ';'. - * \hideinitializer + id = hwloc_bitmap_next(bitmap, id)) { + +/** \brief End of loop macro iterating on a bitmap. + * + * Needs a terminating ';'. * - * \sa hwloc_bitmap_foreach_begin */ -#define hwloc_bitmap_foreach_end() \ + * \sa hwloc_bitmap_foreach_begin() + * \hideinitializer + */ +#define hwloc_bitmap_foreach_end() \ } \ } while (0) diff --git a/opal/mca/hwloc/hwloc1110/hwloc/include/hwloc/cuda.h b/opal/mca/hwloc/hwloc1112/hwloc/include/hwloc/cuda.h similarity index 100% rename from opal/mca/hwloc/hwloc1110/hwloc/include/hwloc/cuda.h rename to opal/mca/hwloc/hwloc1112/hwloc/include/hwloc/cuda.h diff --git a/opal/mca/hwloc/hwloc1110/hwloc/include/hwloc/cudart.h b/opal/mca/hwloc/hwloc1112/hwloc/include/hwloc/cudart.h similarity index 100% rename from opal/mca/hwloc/hwloc1110/hwloc/include/hwloc/cudart.h rename to opal/mca/hwloc/hwloc1112/hwloc/include/hwloc/cudart.h diff --git a/opal/mca/hwloc/hwloc1110/hwloc/include/hwloc/deprecated.h b/opal/mca/hwloc/hwloc1112/hwloc/include/hwloc/deprecated.h similarity index 97% rename from opal/mca/hwloc/hwloc1110/hwloc/include/hwloc/deprecated.h rename to opal/mca/hwloc/hwloc1112/hwloc/include/hwloc/deprecated.h index 3d092034498..2a58120278b 100644 --- a/opal/mca/hwloc/hwloc1110/hwloc/include/hwloc/deprecated.h +++ b/opal/mca/hwloc/hwloc1112/hwloc/include/hwloc/deprecated.h @@ -81,7 +81,7 @@ hwloc_distribute(hwloc_topology_t topology, hwloc_obj_t root, hwloc_cpuset_t *se /** \brief Distribute \p n items over the topology under \p roots * - * This is the same as hwloc_distribute, but takes an array of roots instead of + * This is the same as hwloc_distribute(), but takes an array of roots instead of * just one root. * * \note This function requires the \p roots objects to have a CPU set. diff --git a/opal/mca/hwloc/hwloc1110/hwloc/include/hwloc/diff.h b/opal/mca/hwloc/hwloc1112/hwloc/include/hwloc/diff.h similarity index 89% rename from opal/mca/hwloc/hwloc1110/hwloc/include/hwloc/diff.h rename to opal/mca/hwloc/hwloc1112/hwloc/include/hwloc/diff.h index 3f1beb12641..8b2fe92c32d 100644 --- a/opal/mca/hwloc/hwloc1110/hwloc/include/hwloc/diff.h +++ b/opal/mca/hwloc/hwloc1112/hwloc/include/hwloc/diff.h @@ -1,5 +1,5 @@ /* - * Copyright © 2013-2014 Inria. All rights reserved. + * Copyright © 2013-2015 Inria. All rights reserved. * See COPYING in top-level directory. */ @@ -59,19 +59,19 @@ extern "C" { */ typedef enum hwloc_topology_diff_obj_attr_type_e { /** \brief The object local memory is modified. - * The union is a hwloc_topology_diff_obj_attr_uint64_s + * The union is a hwloc_topology_diff_obj_attr_u::hwloc_topology_diff_obj_attr_uint64_s * (and the index field is ignored). */ HWLOC_TOPOLOGY_DIFF_OBJ_ATTR_SIZE, /** \brief The object name is modified. - * The union is a hwloc_topology_diff_obj_attr_string_s + * The union is a hwloc_topology_diff_obj_attr_u::hwloc_topology_diff_obj_attr_string_s * (and the name field is ignored). */ HWLOC_TOPOLOGY_DIFF_OBJ_ATTR_NAME, /** \brief the value of an info attribute is modified. - * The union is a hwloc_topology_diff_obj_attr_string_s. + * The union is a hwloc_topology_diff_obj_attr_u::hwloc_topology_diff_obj_attr_string_s. */ HWLOC_TOPOLOGY_DIFF_OBJ_ATTR_INFO } hwloc_topology_diff_obj_attr_type_t; @@ -107,17 +107,17 @@ union hwloc_topology_diff_obj_attr_u { /** \brief Type of one element of a difference list. */ typedef enum hwloc_topology_diff_type_e { - /*< \brief An object attribute was changed. - * The union is a hwloc_topology_diff_obj_attr_s. - */ + /** \brief An object attribute was changed. + * The union is a hwloc_topology_diff_obj_attr_u::hwloc_topology_diff_obj_attr_s. + */ HWLOC_TOPOLOGY_DIFF_OBJ_ATTR, - /*< \brief The difference is too complex, + /** \brief The difference is too complex, * it cannot be represented. The difference below * this object has not been checked. * hwloc_topology_diff_build() will return 1. * - * The union is a hwloc_topology_diff_too_complex_s. + * The union is a hwloc_topology_diff_obj_attr_u::hwloc_topology_diff_too_complex_s. */ HWLOC_TOPOLOGY_DIFF_TOO_COMPLEX } hwloc_topology_diff_type_t; @@ -133,7 +133,7 @@ typedef union hwloc_topology_diff_u { /* A difference in an object attribute. */ struct hwloc_topology_diff_obj_attr_s { - hwloc_topology_diff_type_t type; /* must be HWLOC_TOPOLOGY_DIFF_OBJ_ATTR */ + hwloc_topology_diff_type_t type; /* must be ::HWLOC_TOPOLOGY_DIFF_OBJ_ATTR */ union hwloc_topology_diff_u * next; /* List of attribute differences for a single object */ unsigned obj_depth; @@ -143,7 +143,7 @@ typedef union hwloc_topology_diff_u { /* A difference that is too complex. */ struct hwloc_topology_diff_too_complex_s { - hwloc_topology_diff_type_t type; /* must be HWLOC_TOPOLOGY_DIFF_TOO_COMPLEX */ + hwloc_topology_diff_type_t type; /* must be ::HWLOC_TOPOLOGY_DIFF_TOO_COMPLEX */ union hwloc_topology_diff_u * next; /* Where we had to stop computing the diff in the first topology */ unsigned obj_depth; @@ -154,14 +154,14 @@ typedef union hwloc_topology_diff_u { /** \brief Compute the difference between 2 topologies. * - * The difference is stored as a list of hwloc_topology_diff_t entries + * The difference is stored as a list of ::hwloc_topology_diff_t entries * starting at \p diff. * It is computed by doing a depth-first traversal of both topology trees * simultaneously. * * If the difference between 2 objects is too complex to be represented * (for instance if some objects have different types, or different numbers - * of children), a special diff entry of type HWLOC_TOPOLOGY_DIFF_TOO_COMPLEX + * of children), a special diff entry of type ::HWLOC_TOPOLOGY_DIFF_TOO_COMPLEX * is queued. * The computation of the diff does not continue below these objects. * So each such diff entry means that the difference between two subtrees @@ -173,7 +173,7 @@ typedef union hwloc_topology_diff_u { * between the topologies. * * \return 1 if the difference is too complex (see above). Some entries in - * the list will be of type HWLOC_TOPOLOGY_DIFF_TOO_COMPLEX. + * the list will be of type ::HWLOC_TOPOLOGY_DIFF_TOO_COMPLEX. * * \return -1 on any other error. * @@ -183,7 +183,7 @@ typedef union hwloc_topology_diff_u { * * \note The output diff can only be exported to XML or passed to * hwloc_topology_diff_apply() if 0 was returned, i.e. if no entry of type - * HWLOC_TOPOLOGY_DIFF_TOO_COMPLEX is listed. + * ::HWLOC_TOPOLOGY_DIFF_TOO_COMPLEX is listed. * * \note The output diff may be modified by removing some entries from * the list. The removed entries should be freed by passing them to @@ -202,7 +202,7 @@ enum hwloc_topology_diff_apply_flags_e { /** \brief Apply a topology diff to an existing topology. * - * \p flags is an OR'ed set of hwloc_topology_diff_apply_flags_e. + * \p flags is an OR'ed set of ::hwloc_topology_diff_apply_flags_e. * * The new topology is modified in place. hwloc_topology_dup() * may be used to duplicate it before patching. diff --git a/opal/mca/hwloc/hwloc1110/hwloc/include/hwloc/gl.h b/opal/mca/hwloc/hwloc1112/hwloc/include/hwloc/gl.h similarity index 100% rename from opal/mca/hwloc/hwloc1110/hwloc/include/hwloc/gl.h rename to opal/mca/hwloc/hwloc1112/hwloc/include/hwloc/gl.h diff --git a/opal/mca/hwloc/hwloc1110/hwloc/include/hwloc/glibc-sched.h b/opal/mca/hwloc/hwloc1112/hwloc/include/hwloc/glibc-sched.h similarity index 100% rename from opal/mca/hwloc/hwloc1110/hwloc/include/hwloc/glibc-sched.h rename to opal/mca/hwloc/hwloc1112/hwloc/include/hwloc/glibc-sched.h diff --git a/opal/mca/hwloc/hwloc1110/hwloc/include/hwloc/helper.h b/opal/mca/hwloc/hwloc1112/hwloc/include/hwloc/helper.h similarity index 98% rename from opal/mca/hwloc/hwloc1110/hwloc/include/hwloc/helper.h rename to opal/mca/hwloc/hwloc1112/hwloc/include/hwloc/helper.h index 70d7d170de8..029f2a37efc 100644 --- a/opal/mca/hwloc/hwloc1110/hwloc/include/hwloc/helper.h +++ b/opal/mca/hwloc/hwloc1112/hwloc/include/hwloc/helper.h @@ -379,7 +379,7 @@ hwloc_get_ancestor_obj_by_type (hwloc_topology_t topology __hwloc_attribute_unus return ancestor; } -/** \brief Returns the common parent object to objects lvl1 and lvl2 */ +/** \brief Returns the common parent object to objects \p obj1 and \p obj2 */ static __hwloc_inline hwloc_obj_t hwloc_get_common_ancestor_obj (hwloc_topology_t topology __hwloc_attribute_unused, hwloc_obj_t obj1, hwloc_obj_t obj2) __hwloc_attribute_pure; static __hwloc_inline hwloc_obj_t @@ -442,19 +442,19 @@ hwloc_get_next_child (hwloc_topology_t topology __hwloc_attribute_unused, hwloc_ * Return the depth of the topology level that contains cache objects * whose attributes match \p cachedepth and \p cachetype. This function * intends to disambiguate the case where hwloc_get_type_depth() returns - * \p HWLOC_TYPE_DEPTH_MULTIPLE. + * ::HWLOC_TYPE_DEPTH_MULTIPLE. * - * If no cache level matches, \p HWLOC_TYPE_DEPTH_UNKNOWN is returned. + * If no cache level matches, ::HWLOC_TYPE_DEPTH_UNKNOWN is returned. * - * If \p cachetype is \p HWLOC_OBJ_CACHE_UNIFIED, the depth of the + * If \p cachetype is ::HWLOC_OBJ_CACHE_UNIFIED, the depth of the * unique matching unified cache level is returned. * - * If \p cachetype is \p HWLOC_OBJ_CACHE_DATA or \p HWLOC_OBJ_CACHE_INSTRUCTION, + * If \p cachetype is ::HWLOC_OBJ_CACHE_DATA or ::HWLOC_OBJ_CACHE_INSTRUCTION, * either a matching cache, or a unified cache is returned. * * If \p cachetype is \c -1, it is ignored and multiple levels may * match. The function returns either the depth of a uniquely matching - * level or \p HWLOC_TYPE_DEPTH_MULTIPLE. + * level or ::HWLOC_TYPE_DEPTH_MULTIPLE. */ static __hwloc_inline int hwloc_get_cache_type_depth (hwloc_topology_t topology, @@ -941,7 +941,7 @@ hwloc_cpuset_to_nodeset(hwloc_topology_t topology, hwloc_const_cpuset_t _cpuset, /** \brief Convert a CPU set into a NUMA node set without handling non-NUMA cases * - * This is the strict variant of ::hwloc_cpuset_to_nodeset. It does not fix + * This is the strict variant of hwloc_cpuset_to_nodeset(). It does not fix * non-NUMA cases. If the topology contains some NUMA nodes, behave exactly * the same. However, if the topology contains no NUMA nodes, return an empty * nodeset. @@ -993,7 +993,7 @@ hwloc_cpuset_from_nodeset(hwloc_topology_t topology, hwloc_cpuset_t _cpuset, hwl /** \brief Convert a NUMA node set into a CPU set without handling non-NUMA cases * - * This is the strict variant of ::hwloc_cpuset_from_nodeset. It does not fix + * This is the strict variant of hwloc_cpuset_from_nodeset(). It does not fix * non-NUMA cases. If the topology contains some NUMA nodes, behave exactly * the same. However, if the topology contains no NUMA nodes, return an empty * cpuset. diff --git a/opal/mca/hwloc/hwloc1110/hwloc/include/hwloc/inlines.h b/opal/mca/hwloc/hwloc1112/hwloc/include/hwloc/inlines.h similarity index 100% rename from opal/mca/hwloc/hwloc1110/hwloc/include/hwloc/inlines.h rename to opal/mca/hwloc/hwloc1112/hwloc/include/hwloc/inlines.h diff --git a/opal/mca/hwloc/hwloc1110/hwloc/include/hwloc/intel-mic.h b/opal/mca/hwloc/hwloc1112/hwloc/include/hwloc/intel-mic.h similarity index 100% rename from opal/mca/hwloc/hwloc1110/hwloc/include/hwloc/intel-mic.h rename to opal/mca/hwloc/hwloc1112/hwloc/include/hwloc/intel-mic.h diff --git a/opal/mca/hwloc/hwloc1110/hwloc/include/hwloc/linux-libnuma.h b/opal/mca/hwloc/hwloc1112/hwloc/include/hwloc/linux-libnuma.h similarity index 100% rename from opal/mca/hwloc/hwloc1110/hwloc/include/hwloc/linux-libnuma.h rename to opal/mca/hwloc/hwloc1112/hwloc/include/hwloc/linux-libnuma.h diff --git a/opal/mca/hwloc/hwloc1110/hwloc/include/hwloc/linux.h b/opal/mca/hwloc/hwloc1112/hwloc/include/hwloc/linux.h similarity index 94% rename from opal/mca/hwloc/hwloc1110/hwloc/include/hwloc/linux.h rename to opal/mca/hwloc/hwloc1112/hwloc/include/hwloc/linux.h index 4ddc90090eb..565bfeec2ac 100644 --- a/opal/mca/hwloc/hwloc1110/hwloc/include/hwloc/linux.h +++ b/opal/mca/hwloc/hwloc1112/hwloc/include/hwloc/linux.h @@ -1,6 +1,6 @@ /* * Copyright © 2009 CNRS - * Copyright © 2009-2013 Inria. All rights reserved. + * Copyright © 2009-2015 Inria. All rights reserved. * Copyright © 2009-2011 Université Bordeaux * See COPYING in top-level directory. */ @@ -55,14 +55,14 @@ HWLOC_DECLSPEC int hwloc_linux_set_tid_cpubind(hwloc_topology_t topology, pid_t * but uses a hwloc cpuset. * * \note This is equivalent to calling hwloc_get_proc_cpubind() with - * HWLOC_CPUBIND_THREAD as flags. + * ::HWLOC_CPUBIND_THREAD as flags. */ HWLOC_DECLSPEC int hwloc_linux_get_tid_cpubind(hwloc_topology_t topology, pid_t tid, hwloc_cpuset_t set); /** \brief Get the last physical CPU where thread \p tid ran. * * \note This is equivalent to calling hwloc_get_proc_last_cpu_location() with - * HWLOC_CPUBIND_THREAD as flags. + * ::HWLOC_CPUBIND_THREAD as flags. */ HWLOC_DECLSPEC int hwloc_linux_get_tid_last_cpu_location(hwloc_topology_t topology, pid_t tid, hwloc_bitmap_t set); diff --git a/opal/mca/hwloc/hwloc1110/hwloc/include/hwloc/myriexpress.h b/opal/mca/hwloc/hwloc1112/hwloc/include/hwloc/myriexpress.h similarity index 100% rename from opal/mca/hwloc/hwloc1110/hwloc/include/hwloc/myriexpress.h rename to opal/mca/hwloc/hwloc1112/hwloc/include/hwloc/myriexpress.h diff --git a/opal/mca/hwloc/hwloc1110/hwloc/include/hwloc/nvml.h b/opal/mca/hwloc/hwloc1112/hwloc/include/hwloc/nvml.h similarity index 100% rename from opal/mca/hwloc/hwloc1110/hwloc/include/hwloc/nvml.h rename to opal/mca/hwloc/hwloc1112/hwloc/include/hwloc/nvml.h diff --git a/opal/mca/hwloc/hwloc1110/hwloc/include/hwloc/opencl.h b/opal/mca/hwloc/hwloc1112/hwloc/include/hwloc/opencl.h similarity index 100% rename from opal/mca/hwloc/hwloc1110/hwloc/include/hwloc/opencl.h rename to opal/mca/hwloc/hwloc1112/hwloc/include/hwloc/opencl.h diff --git a/opal/mca/hwloc/hwloc1110/hwloc/include/hwloc/openfabrics-verbs.h b/opal/mca/hwloc/hwloc1112/hwloc/include/hwloc/openfabrics-verbs.h similarity index 100% rename from opal/mca/hwloc/hwloc1110/hwloc/include/hwloc/openfabrics-verbs.h rename to opal/mca/hwloc/hwloc1112/hwloc/include/hwloc/openfabrics-verbs.h diff --git a/opal/mca/hwloc/hwloc1110/hwloc/include/hwloc/plugins.h b/opal/mca/hwloc/hwloc1112/hwloc/include/hwloc/plugins.h similarity index 97% rename from opal/mca/hwloc/hwloc1110/hwloc/include/hwloc/plugins.h rename to opal/mca/hwloc/hwloc1112/hwloc/include/hwloc/plugins.h index 238a6895aa3..510157bcf51 100644 --- a/opal/mca/hwloc/hwloc1110/hwloc/include/hwloc/plugins.h +++ b/opal/mca/hwloc/hwloc1112/hwloc/include/hwloc/plugins.h @@ -56,7 +56,7 @@ struct hwloc_disc_component { */ const char *name; - /** \brief Component types to exclude, as an OR'ed set of HWLOC_DISC_COMPONENT_TYPE_*. + /** \brief Component types to exclude, as an OR'ed set of ::hwloc_disc_component_type_e. * * For a GLOBAL component, this usually includes all other types (~0). * @@ -121,7 +121,7 @@ struct hwloc_backend { /** \private Reserved for the core. Used internally to list backends topology->backends. */ struct hwloc_backend * next; - /** \brief Backend flags, as an OR'ed set of HWLOC_BACKEND_FLAG_* */ + /** \brief Backend flags, as an OR'ed set of ::hwloc_backend_flag_e */ unsigned long flags; /** \brief Backend-specific 'is_custom' property. @@ -143,7 +143,7 @@ struct hwloc_backend { /** \brief Main discovery callback. * returns > 0 if it modified the topology tree, -1 on error, 0 otherwise. - * May be NULL if type is HWLOC_DISC_COMPONENT_TYPE_MISC. */ + * May be NULL if type is ::HWLOC_DISC_COMPONENT_TYPE_MISC. */ int (*discover)(struct hwloc_backend *backend); /** \brief Callback used by the PCI backend to retrieve the locality of a PCI object from the OS/cpu backend. @@ -213,7 +213,7 @@ typedef enum hwloc_component_type_e { * or dynamically loaded as a plugin. */ struct hwloc_component { - /** \brief Component ABI version, set to HWLOC_COMPONENT_ABI */ + /** \brief Component ABI version, set to ::HWLOC_COMPONENT_ABI */ unsigned abi; /** \brief Process-wide component initialization callback. @@ -379,7 +379,7 @@ hwloc_plugin_check_namespace(const char *pluginname __hwloc_attribute_unused, co static int verboseenv_value = 0; if (!verboseenv_checked) { const char *verboseenv = getenv("HWLOC_PLUGINS_VERBOSE"); - verboseenv_value = atoi(verboseenv); + verboseenv_value = verboseenv ? atoi(verboseenv) : 0; verboseenv_checked = 1; } if (verboseenv_value) @@ -426,6 +426,8 @@ HWLOC_DECLSPEC int hwloc_pci_find_linkspeed(const unsigned char *config, unsigne /** \brief Modify the PCI device object into a bridge and fill its attribute if a bridge is found in the PCI config space. * * This function requires 64 bytes of common configuration header at the beginning of config. + * + * Returns -1 and destroys /p obj if bridge fields are invalid. */ HWLOC_DECLSPEC int hwloc_pci_prepare_bridge(hwloc_obj_t obj, const unsigned char *config); diff --git a/opal/mca/hwloc/hwloc1112/hwloc/include/hwloc/rename.h b/opal/mca/hwloc/hwloc1112/hwloc/include/hwloc/rename.h new file mode 100644 index 00000000000..27a6f9d12c6 --- /dev/null +++ b/opal/mca/hwloc/hwloc1112/hwloc/include/hwloc/rename.h @@ -0,0 +1,656 @@ +/* + * Copyright © 2009-2011 Cisco Systems, Inc. All rights reserved. + * Copyright © 2010-2015 Inria. All rights reserved. + * See COPYING in top-level directory. + */ + +#ifndef HWLOC_RENAME_H +#define HWLOC_RENAME_H + +#include + + +#ifdef __cplusplus +extern "C" { +#endif + + +/* Only enact these defines if we're actually renaming the symbols + (i.e., avoid trying to have no-op defines if we're *not* + renaming). */ + +#if HWLOC_SYM_TRANSFORM + +/* Use a preprocessor two-step in order to get the prefixing right. + Make 2 macros: HWLOC_NAME and HWLOC_NAME_CAPS for renaming + things. */ + +#define HWLOC_MUNGE_NAME(a, b) HWLOC_MUNGE_NAME2(a, b) +#define HWLOC_MUNGE_NAME2(a, b) a ## b +#define HWLOC_NAME(name) HWLOC_MUNGE_NAME(HWLOC_SYM_PREFIX, hwloc_ ## name) +#define HWLOC_NAME_CAPS(name) HWLOC_MUNGE_NAME(HWLOC_SYM_PREFIX_CAPS, hwloc_ ## name) + +/* Now define all the "real" names to be the prefixed names. This + allows us to use the real names throughout the code base (i.e., + "hwloc_"); the preprocessor will adjust to have the prefixed + name under the covers. */ + +/* Names from hwloc.h */ + +#define hwloc_get_api_version HWLOC_NAME(get_api_version) + +#define hwloc_topology HWLOC_NAME(topology) +#define hwloc_topology_t HWLOC_NAME(topology_t) + +#define hwloc_cpuset_t HWLOC_NAME(cpuset_t) +#define hwloc_const_cpuset_t HWLOC_NAME(const_cpuset_t) +#define hwloc_nodeset_t HWLOC_NAME(nodeset_t) +#define hwloc_const_nodeset_t HWLOC_NAME(const_nodeset_t) + +#define HWLOC_OBJ_SYSTEM HWLOC_NAME_CAPS(OBJ_SYSTEM) +#define HWLOC_OBJ_MACHINE HWLOC_NAME_CAPS(OBJ_MACHINE) +#define HWLOC_OBJ_NUMANODE HWLOC_NAME_CAPS(OBJ_NUMANODE) +#define HWLOC_OBJ_PACKAGE HWLOC_NAME_CAPS(OBJ_PACKAGE) +#define HWLOC_OBJ_CACHE HWLOC_NAME_CAPS(OBJ_CACHE) +#define HWLOC_OBJ_CORE HWLOC_NAME_CAPS(OBJ_CORE) +#define HWLOC_OBJ_PU HWLOC_NAME_CAPS(OBJ_PU) +#define HWLOC_OBJ_MISC HWLOC_NAME_CAPS(OBJ_MISC) +#define HWLOC_OBJ_GROUP HWLOC_NAME_CAPS(OBJ_GROUP) +#define HWLOC_OBJ_BRIDGE HWLOC_NAME_CAPS(OBJ_BRIDGE) +#define HWLOC_OBJ_PCI_DEVICE HWLOC_NAME_CAPS(OBJ_PCI_DEVICE) +#define HWLOC_OBJ_OS_DEVICE HWLOC_NAME_CAPS(OBJ_OS_DEVICE) +#define HWLOC_OBJ_TYPE_MAX HWLOC_NAME_CAPS(OBJ_TYPE_MAX) +#define hwloc_obj_type_t HWLOC_NAME(obj_type_t) + +#define hwloc_obj_cache_type_e HWLOC_NAME(obj_cache_type_e) +#define hwloc_obj_cache_type_t HWLOC_NAME(obj_cache_type_t) +#define HWLOC_OBJ_CACHE_UNIFIED HWLOC_NAME_CAPS(OBJ_CACHE_UNIFIED) +#define HWLOC_OBJ_CACHE_DATA HWLOC_NAME_CAPS(OBJ_CACHE_DATA) +#define HWLOC_OBJ_CACHE_INSTRUCTION HWLOC_NAME_CAPS(OBJ_CACHE_INSTRUCTION) + +#define hwloc_obj_bridge_type_e HWLOC_NAME(obj_bridge_type_e) +#define hwloc_obj_bridge_type_t HWLOC_NAME(obj_bridge_type_t) +#define HWLOC_OBJ_BRIDGE_HOST HWLOC_NAME_CAPS(OBJ_BRIDGE_HOST) +#define HWLOC_OBJ_BRIDGE_PCI HWLOC_NAME_CAPS(OBJ_BRIDGE_PCI) + +#define hwloc_obj_osdev_type_e HWLOC_NAME(obj_osdev_type_e) +#define hwloc_obj_osdev_type_t HWLOC_NAME(obj_osdev_type_t) +#define HWLOC_OBJ_OSDEV_BLOCK HWLOC_NAME_CAPS(OBJ_OSDEV_BLOCK) +#define HWLOC_OBJ_OSDEV_GPU HWLOC_NAME_CAPS(OBJ_OSDEV_GPU) +#define HWLOC_OBJ_OSDEV_NETWORK HWLOC_NAME_CAPS(OBJ_OSDEV_NETWORK) +#define HWLOC_OBJ_OSDEV_OPENFABRICS HWLOC_NAME_CAPS(OBJ_OSDEV_OPENFABRICS) +#define HWLOC_OBJ_OSDEV_DMA HWLOC_NAME_CAPS(OBJ_OSDEV_DMA) +#define HWLOC_OBJ_OSDEV_COPROC HWLOC_NAME_CAPS(OBJ_OSDEV_COPROC) + +#define hwloc_compare_types HWLOC_NAME(compare_types) + +#define hwloc_compare_types_e HWLOC_NAME(compare_types_e) +#define HWLOC_TYPE_UNORDERED HWLOC_NAME_CAPS(TYPE_UNORDERED) + +#define hwloc_obj_memory_s HWLOC_NAME(obj_memory_s) +#define hwloc_obj_memory_page_type_s HWLOC_NAME(obj_memory_page_type_s) + +#define hwloc_obj HWLOC_NAME(obj) +#define hwloc_obj_t HWLOC_NAME(obj_t) + +#define hwloc_distances_s HWLOC_NAME(distances_s) +#define hwloc_obj_info_s HWLOC_NAME(obj_info_s) + +#define hwloc_obj_attr_u HWLOC_NAME(obj_attr_u) +#define hwloc_cache_attr_s HWLOC_NAME(cache_attr_s) +#define hwloc_group_attr_s HWLOC_NAME(group_attr_s) +#define hwloc_pcidev_attr_s HWLOC_NAME(pcidev_attr_s) +#define hwloc_bridge_attr_s HWLOC_NAME(bridge_attr_s) +#define hwloc_osdev_attr_s HWLOC_NAME(osdev_attr_s) + +#define hwloc_topology_init HWLOC_NAME(topology_init) +#define hwloc_topology_load HWLOC_NAME(topology_load) +#define hwloc_topology_destroy HWLOC_NAME(topology_destroy) +#define hwloc_topology_dup HWLOC_NAME(topology_dup) +#define hwloc_topology_check HWLOC_NAME(topology_check) +#define hwloc_topology_ignore_type HWLOC_NAME(topology_ignore_type) +#define hwloc_topology_ignore_type_keep_structure HWLOC_NAME(topology_ignore_type_keep_structure) +#define hwloc_topology_ignore_all_keep_structure HWLOC_NAME(topology_ignore_all_keep_structure) + +#define hwloc_topology_flags_e HWLOC_NAME(topology_flags_e) + +#define HWLOC_TOPOLOGY_FLAG_WHOLE_SYSTEM HWLOC_NAME_CAPS(TOPOLOGY_FLAG_WHOLE_SYSTEM) +#define HWLOC_TOPOLOGY_FLAG_IS_THISSYSTEM HWLOC_NAME_CAPS(TOPOLOGY_FLAG_IS_THISSYSTEM) +#define HWLOC_TOPOLOGY_FLAG_IO_DEVICES HWLOC_NAME_CAPS(TOPOLOGY_FLAG_IO_DEVICES) +#define HWLOC_TOPOLOGY_FLAG_IO_BRIDGES HWLOC_NAME_CAPS(TOPOLOGY_FLAG_IO_BRIDGES) +#define HWLOC_TOPOLOGY_FLAG_WHOLE_IO HWLOC_NAME_CAPS(TOPOLOGY_FLAG_WHOLE_IO) +#define HWLOC_TOPOLOGY_FLAG_ICACHES HWLOC_NAME_CAPS(TOPOLOGY_FLAG_ICACHES) + +#define hwloc_topology_set_flags HWLOC_NAME(topology_set_flags) +#define hwloc_topology_set_fsroot HWLOC_NAME(topology_set_fsroot) +#define hwloc_topology_set_pid HWLOC_NAME(topology_set_pid) +#define hwloc_topology_set_synthetic HWLOC_NAME(topology_set_synthetic) +#define hwloc_topology_set_xml HWLOC_NAME(topology_set_xml) +#define hwloc_topology_set_xmlbuffer HWLOC_NAME(topology_set_xmlbuffer) +#define hwloc_topology_set_custom HWLOC_NAME(topology_set_custom) +#define hwloc_topology_set_distance_matrix HWLOC_NAME(topology_set_distance_matrix) + +#define hwloc_topology_discovery_support HWLOC_NAME(topology_discovery_support) +#define hwloc_topology_cpubind_support HWLOC_NAME(topology_cpubind_support) +#define hwloc_topology_membind_support HWLOC_NAME(topology_membind_support) +#define hwloc_topology_support HWLOC_NAME(topology_support) +#define hwloc_topology_get_support HWLOC_NAME(topology_get_support) +#define hwloc_topology_set_userdata HWLOC_NAME(topology_set_userdata) +#define hwloc_topology_get_userdata HWLOC_NAME(topology_get_userdata) + +#define hwloc_topology_export_xml HWLOC_NAME(topology_export_xml) +#define hwloc_topology_export_xmlbuffer HWLOC_NAME(topology_export_xmlbuffer) +#define hwloc_free_xmlbuffer HWLOC_NAME(free_xmlbuffer) +#define hwloc_topology_set_userdata_export_callback HWLOC_NAME(topology_set_userdata_export_callback) +#define hwloc_export_obj_userdata HWLOC_NAME(export_obj_userdata) +#define hwloc_export_obj_userdata_base64 HWLOC_NAME(export_obj_userdata_base64) +#define hwloc_topology_set_userdata_import_callback HWLOC_NAME(topology_set_userdata_import_callback) + +#define hwloc_topology_export_synthetic_flags_e HWLOC_NAME(topology_export_synthetic_flags_e) +#define HWLOC_TOPOLOGY_EXPORT_SYNTHETIC_FLAG_NO_EXTENDED_TYPES HWLOC_NAME_CAPS(TOPOLOGY_EXPORT_SYNTHETIC_FLAG_NO_EXTENDED_TYPES) +#define HWLOC_TOPOLOGY_EXPORT_SYNTHETIC_FLAG_NO_ATTRS HWLOC_NAME_CAPS(TOPOLOGY_EXPORT_SYNTHETIC_FLAG_NO_ATTRS) +#define hwloc_topology_export_synthetic HWLOC_NAME(topology_export_synthetic) + +#define hwloc_topology_insert_misc_object_by_cpuset HWLOC_NAME(topology_insert_misc_object_by_cpuset) +#define hwloc_topology_insert_misc_object_by_parent HWLOC_NAME(topology_insert_misc_object_by_parent) + +#define hwloc_custom_insert_topology HWLOC_NAME(custom_insert_topology) +#define hwloc_custom_insert_group_object_by_parent HWLOC_NAME(custom_insert_group_object_by_parent) + +#define hwloc_restrict_flags_e HWLOC_NAME(restrict_flags_e) +#define HWLOC_RESTRICT_FLAG_ADAPT_DISTANCES HWLOC_NAME_CAPS(RESTRICT_FLAG_ADAPT_DISTANCES) +#define HWLOC_RESTRICT_FLAG_ADAPT_MISC HWLOC_NAME_CAPS(RESTRICT_FLAG_ADAPT_MISC) +#define HWLOC_RESTRICT_FLAG_ADAPT_IO HWLOC_NAME_CAPS(RESTRICT_FLAG_ADAPT_IO) +#define hwloc_topology_restrict HWLOC_NAME(topology_restrict) + +#define hwloc_topology_get_depth HWLOC_NAME(topology_get_depth) +#define hwloc_get_type_depth HWLOC_NAME(get_type_depth) + +#define hwloc_get_type_depth_e HWLOC_NAME(get_type_depth_e) +#define HWLOC_TYPE_DEPTH_UNKNOWN HWLOC_NAME_CAPS(TYPE_DEPTH_UNKNOWN) +#define HWLOC_TYPE_DEPTH_MULTIPLE HWLOC_NAME_CAPS(TYPE_DEPTH_MULTIPLE) +#define HWLOC_TYPE_DEPTH_BRIDGE HWLOC_NAME_CAPS(TYPE_DEPTH_BRIDGE) +#define HWLOC_TYPE_DEPTH_PCI_DEVICE HWLOC_NAME_CAPS(TYPE_DEPTH_PCI_DEVICE) +#define HWLOC_TYPE_DEPTH_OS_DEVICE HWLOC_NAME_CAPS(TYPE_DEPTH_OS_DEVICE) + +#define hwloc_get_depth_type HWLOC_NAME(get_depth_type) +#define hwloc_get_nbobjs_by_depth HWLOC_NAME(get_nbobjs_by_depth) +#define hwloc_get_nbobjs_by_type HWLOC_NAME(get_nbobjs_by_type) + +#define hwloc_topology_is_thissystem HWLOC_NAME(topology_is_thissystem) +#define hwloc_topology_get_flags HWLOC_NAME(topology_get_flags) + +#define hwloc_get_obj_by_depth HWLOC_NAME(get_obj_by_depth ) +#define hwloc_get_obj_by_type HWLOC_NAME(get_obj_by_type ) + +#define hwloc_obj_type_string HWLOC_NAME(obj_type_string ) +#define hwloc_obj_type_sscanf HWLOC_NAME(obj_type_sscanf) +#define hwloc_obj_type_snprintf HWLOC_NAME(obj_type_snprintf ) +#define hwloc_obj_attr_snprintf HWLOC_NAME(obj_attr_snprintf ) +#define hwloc_obj_cpuset_snprintf HWLOC_NAME(obj_cpuset_snprintf) +#define hwloc_obj_get_info_by_name HWLOC_NAME(obj_get_info_by_name) +#define hwloc_obj_add_info HWLOC_NAME(obj_add_info) + +#define HWLOC_CPUBIND_PROCESS HWLOC_NAME_CAPS(CPUBIND_PROCESS) +#define HWLOC_CPUBIND_THREAD HWLOC_NAME_CAPS(CPUBIND_THREAD) +#define HWLOC_CPUBIND_STRICT HWLOC_NAME_CAPS(CPUBIND_STRICT) +#define HWLOC_CPUBIND_NOMEMBIND HWLOC_NAME_CAPS(CPUBIND_NOMEMBIND) + +#define hwloc_cpubind_flags_t HWLOC_NAME(cpubind_flags_t) + +#define hwloc_set_cpubind HWLOC_NAME(set_cpubind) +#define hwloc_get_cpubind HWLOC_NAME(get_cpubind) +#define hwloc_set_proc_cpubind HWLOC_NAME(set_proc_cpubind) +#define hwloc_get_proc_cpubind HWLOC_NAME(get_proc_cpubind) +#define hwloc_set_thread_cpubind HWLOC_NAME(set_thread_cpubind) +#define hwloc_get_thread_cpubind HWLOC_NAME(get_thread_cpubind) + +#define hwloc_get_last_cpu_location HWLOC_NAME(get_last_cpu_location) +#define hwloc_get_proc_last_cpu_location HWLOC_NAME(get_proc_last_cpu_location) + +#define HWLOC_MEMBIND_DEFAULT HWLOC_NAME_CAPS(MEMBIND_DEFAULT) +#define HWLOC_MEMBIND_FIRSTTOUCH HWLOC_NAME_CAPS(MEMBIND_FIRSTTOUCH) +#define HWLOC_MEMBIND_BIND HWLOC_NAME_CAPS(MEMBIND_BIND) +#define HWLOC_MEMBIND_INTERLEAVE HWLOC_NAME_CAPS(MEMBIND_INTERLEAVE) +#define HWLOC_MEMBIND_REPLICATE HWLOC_NAME_CAPS(MEMBIND_REPLICATE) +#define HWLOC_MEMBIND_NEXTTOUCH HWLOC_NAME_CAPS(MEMBIND_NEXTTOUCH) +#define HWLOC_MEMBIND_MIXED HWLOC_NAME_CAPS(MEMBIND_MIXED) + +#define hwloc_membind_policy_t HWLOC_NAME(membind_policy_t) + +#define HWLOC_MEMBIND_PROCESS HWLOC_NAME_CAPS(MEMBIND_PROCESS) +#define HWLOC_MEMBIND_THREAD HWLOC_NAME_CAPS(MEMBIND_THREAD) +#define HWLOC_MEMBIND_STRICT HWLOC_NAME_CAPS(MEMBIND_STRICT) +#define HWLOC_MEMBIND_MIGRATE HWLOC_NAME_CAPS(MEMBIND_MIGRATE) +#define HWLOC_MEMBIND_NOCPUBIND HWLOC_NAME_CAPS(MEMBIND_NOCPUBIND) + +#define hwloc_membind_flags_t HWLOC_NAME(membind_flags_t) + +#define hwloc_set_membind_nodeset HWLOC_NAME(set_membind_nodeset) +#define hwloc_set_membind HWLOC_NAME(set_membind) +#define hwloc_get_membind_nodeset HWLOC_NAME(get_membind_nodeset) +#define hwloc_get_membind HWLOC_NAME(get_membind) +#define hwloc_set_proc_membind_nodeset HWLOC_NAME(set_proc_membind_nodeset) +#define hwloc_set_proc_membind HWLOC_NAME(set_proc_membind) +#define hwloc_get_proc_membind_nodeset HWLOC_NAME(get_proc_membind_nodeset) +#define hwloc_get_proc_membind HWLOC_NAME(get_proc_membind) +#define hwloc_set_area_membind_nodeset HWLOC_NAME(set_area_membind_nodeset) +#define hwloc_set_area_membind HWLOC_NAME(set_area_membind) +#define hwloc_get_area_membind_nodeset HWLOC_NAME(get_area_membind_nodeset) +#define hwloc_get_area_membind HWLOC_NAME(get_area_membind) +#define hwloc_alloc_membind_nodeset HWLOC_NAME(alloc_membind_nodeset) +#define hwloc_alloc_membind HWLOC_NAME(alloc_membind) +#define hwloc_alloc HWLOC_NAME(alloc) +#define hwloc_free HWLOC_NAME(free) + +#define hwloc_get_non_io_ancestor_obj HWLOC_NAME(get_non_io_ancestor_obj) +#define hwloc_get_next_pcidev HWLOC_NAME(get_next_pcidev) +#define hwloc_get_pcidev_by_busid HWLOC_NAME(get_pcidev_by_busid) +#define hwloc_get_pcidev_by_busidstring HWLOC_NAME(get_pcidev_by_busidstring) +#define hwloc_get_next_osdev HWLOC_NAME(get_next_osdev) +#define hwloc_get_next_bridge HWLOC_NAME(get_next_bridge) +#define hwloc_bridge_covers_pcibus HWLOC_NAME(bridge_covers_pcibus) +#define hwloc_get_hostbridge_by_pcibus HWLOC_NAME(get_hostbridge_by_pcibus) + +/* hwloc/bitmap.h */ + +#define hwloc_bitmap_s HWLOC_NAME(bitmap_s) +#define hwloc_bitmap_t HWLOC_NAME(bitmap_t) +#define hwloc_const_bitmap_t HWLOC_NAME(const_bitmap_t) + +#define hwloc_bitmap_alloc HWLOC_NAME(bitmap_alloc) +#define hwloc_bitmap_alloc_full HWLOC_NAME(bitmap_alloc_full) +#define hwloc_bitmap_free HWLOC_NAME(bitmap_free) +#define hwloc_bitmap_dup HWLOC_NAME(bitmap_dup) +#define hwloc_bitmap_copy HWLOC_NAME(bitmap_copy) +#define hwloc_bitmap_snprintf HWLOC_NAME(bitmap_snprintf) +#define hwloc_bitmap_asprintf HWLOC_NAME(bitmap_asprintf) +#define hwloc_bitmap_sscanf HWLOC_NAME(bitmap_sscanf) +#define hwloc_bitmap_list_snprintf HWLOC_NAME(bitmap_list_snprintf) +#define hwloc_bitmap_list_asprintf HWLOC_NAME(bitmap_list_asprintf) +#define hwloc_bitmap_list_sscanf HWLOC_NAME(bitmap_list_sscanf) +#define hwloc_bitmap_taskset_snprintf HWLOC_NAME(bitmap_taskset_snprintf) +#define hwloc_bitmap_taskset_asprintf HWLOC_NAME(bitmap_taskset_asprintf) +#define hwloc_bitmap_taskset_sscanf HWLOC_NAME(bitmap_taskset_sscanf) +#define hwloc_bitmap_zero HWLOC_NAME(bitmap_zero) +#define hwloc_bitmap_fill HWLOC_NAME(bitmap_fill) +#define hwloc_bitmap_from_ulong HWLOC_NAME(bitmap_from_ulong) + +#define hwloc_bitmap_from_ith_ulong HWLOC_NAME(bitmap_from_ith_ulong) +#define hwloc_bitmap_to_ulong HWLOC_NAME(bitmap_to_ulong) +#define hwloc_bitmap_to_ith_ulong HWLOC_NAME(bitmap_to_ith_ulong) +#define hwloc_bitmap_only HWLOC_NAME(bitmap_only) +#define hwloc_bitmap_allbut HWLOC_NAME(bitmap_allbut) +#define hwloc_bitmap_set HWLOC_NAME(bitmap_set) +#define hwloc_bitmap_set_range HWLOC_NAME(bitmap_set_range) +#define hwloc_bitmap_set_ith_ulong HWLOC_NAME(bitmap_set_ith_ulong) +#define hwloc_bitmap_clr HWLOC_NAME(bitmap_clr) +#define hwloc_bitmap_clr_range HWLOC_NAME(bitmap_clr_range) +#define hwloc_bitmap_isset HWLOC_NAME(bitmap_isset) +#define hwloc_bitmap_iszero HWLOC_NAME(bitmap_iszero) +#define hwloc_bitmap_isfull HWLOC_NAME(bitmap_isfull) +#define hwloc_bitmap_isequal HWLOC_NAME(bitmap_isequal) +#define hwloc_bitmap_intersects HWLOC_NAME(bitmap_intersects) +#define hwloc_bitmap_isincluded HWLOC_NAME(bitmap_isincluded) +#define hwloc_bitmap_or HWLOC_NAME(bitmap_or) +#define hwloc_bitmap_and HWLOC_NAME(bitmap_and) +#define hwloc_bitmap_andnot HWLOC_NAME(bitmap_andnot) +#define hwloc_bitmap_xor HWLOC_NAME(bitmap_xor) +#define hwloc_bitmap_not HWLOC_NAME(bitmap_not) +#define hwloc_bitmap_first HWLOC_NAME(bitmap_first) +#define hwloc_bitmap_last HWLOC_NAME(bitmap_last) +#define hwloc_bitmap_next HWLOC_NAME(bitmap_next) +#define hwloc_bitmap_singlify HWLOC_NAME(bitmap_singlify) +#define hwloc_bitmap_compare_first HWLOC_NAME(bitmap_compare_first) +#define hwloc_bitmap_compare HWLOC_NAME(bitmap_compare) +#define hwloc_bitmap_weight HWLOC_NAME(bitmap_weight) + +/* hwloc/helper.h */ + +#define hwloc_get_type_or_below_depth HWLOC_NAME(get_type_or_below_depth) +#define hwloc_get_type_or_above_depth HWLOC_NAME(get_type_or_above_depth) +#define hwloc_get_root_obj HWLOC_NAME(get_root_obj) +#define hwloc_get_ancestor_obj_by_depth HWLOC_NAME(get_ancestor_obj_by_depth) +#define hwloc_get_ancestor_obj_by_type HWLOC_NAME(get_ancestor_obj_by_type) +#define hwloc_get_next_obj_by_depth HWLOC_NAME(get_next_obj_by_depth) +#define hwloc_get_next_obj_by_type HWLOC_NAME(get_next_obj_by_type) +#define hwloc_get_pu_obj_by_os_index HWLOC_NAME(get_pu_obj_by_os_index) +#define hwloc_get_numanode_obj_by_os_index HWLOC_NAME(get_numanode_obj_by_os_index) +#define hwloc_get_next_child HWLOC_NAME(get_next_child) +#define hwloc_get_common_ancestor_obj HWLOC_NAME(get_common_ancestor_obj) +#define hwloc_obj_is_in_subtree HWLOC_NAME(obj_is_in_subtree) +#define hwloc_get_first_largest_obj_inside_cpuset HWLOC_NAME(get_first_largest_obj_inside_cpuset) +#define hwloc_get_largest_objs_inside_cpuset HWLOC_NAME(get_largest_objs_inside_cpuset) +#define hwloc_get_next_obj_inside_cpuset_by_depth HWLOC_NAME(get_next_obj_inside_cpuset_by_depth) +#define hwloc_get_next_obj_inside_cpuset_by_type HWLOC_NAME(get_next_obj_inside_cpuset_by_type) +#define hwloc_get_obj_inside_cpuset_by_depth HWLOC_NAME(get_obj_inside_cpuset_by_depth) +#define hwloc_get_obj_inside_cpuset_by_type HWLOC_NAME(get_obj_inside_cpuset_by_type) +#define hwloc_get_nbobjs_inside_cpuset_by_depth HWLOC_NAME(get_nbobjs_inside_cpuset_by_depth) +#define hwloc_get_nbobjs_inside_cpuset_by_type HWLOC_NAME(get_nbobjs_inside_cpuset_by_type) +#define hwloc_get_obj_index_inside_cpuset HWLOC_NAME(get_obj_index_inside_cpuset) +#define hwloc_get_child_covering_cpuset HWLOC_NAME(get_child_covering_cpuset) +#define hwloc_get_obj_covering_cpuset HWLOC_NAME(get_obj_covering_cpuset) +#define hwloc_get_next_obj_covering_cpuset_by_depth HWLOC_NAME(get_next_obj_covering_cpuset_by_depth) +#define hwloc_get_next_obj_covering_cpuset_by_type HWLOC_NAME(get_next_obj_covering_cpuset_by_type) +#define hwloc_get_cache_type_depth HWLOC_NAME(get_cache_type_depth) +#define hwloc_get_cache_covering_cpuset HWLOC_NAME(get_cache_covering_cpuset) +#define hwloc_get_shared_cache_covering_obj HWLOC_NAME(get_shared_cache_covering_obj) +#define hwloc_get_closest_objs HWLOC_NAME(get_closest_objs) +#define hwloc_get_obj_below_by_type HWLOC_NAME(get_obj_below_by_type) +#define hwloc_get_obj_below_array_by_type HWLOC_NAME(get_obj_below_array_by_type) +#define hwloc_distrib_flags_e HWLOC_NAME(distrib_flags_e) +#define HWLOC_DISTRIB_FLAG_REVERSE HWLOC_NAME_CAPS(DISTRIB_FLAG_REVERSE) +#define hwloc_distrib HWLOC_NAME(distrib) +#define hwloc_alloc_membind_policy HWLOC_NAME(alloc_membind_policy) +#define hwloc_alloc_membind_policy_nodeset HWLOC_NAME(alloc_membind_policy_nodeset) +#define hwloc_topology_get_complete_cpuset HWLOC_NAME(topology_get_complete_cpuset) +#define hwloc_topology_get_topology_cpuset HWLOC_NAME(topology_get_topology_cpuset) +#define hwloc_topology_get_online_cpuset HWLOC_NAME(topology_get_online_cpuset) +#define hwloc_topology_get_allowed_cpuset HWLOC_NAME(topology_get_allowed_cpuset) +#define hwloc_topology_get_complete_nodeset HWLOC_NAME(topology_get_complete_nodeset) +#define hwloc_topology_get_topology_nodeset HWLOC_NAME(topology_get_topology_nodeset) +#define hwloc_topology_get_allowed_nodeset HWLOC_NAME(topology_get_allowed_nodeset) +#define hwloc_cpuset_to_nodeset HWLOC_NAME(cpuset_to_nodeset) +#define hwloc_cpuset_to_nodeset_strict HWLOC_NAME(cpuset_to_nodeset_strict) +#define hwloc_cpuset_from_nodeset HWLOC_NAME(cpuset_from_nodeset) +#define hwloc_cpuset_from_nodeset_strict HWLOC_NAME(cpuset_from_nodeset_strict) +#define hwloc_get_whole_distance_matrix_by_depth HWLOC_NAME(get_whole_distance_matrix_by_depth) +#define hwloc_get_whole_distance_matrix_by_type HWLOC_NAME(get_whole_distance_matrix_by_type) +#define hwloc_get_distance_matrix_covering_obj_by_depth HWLOC_NAME(get_distance_matrix_covering_obj_by_depth) +#define hwloc_get_latency HWLOC_NAME(get_latency) + +/* diff.h */ + +#define hwloc_topology_diff_obj_attr_type_e HWLOC_NAME(topology_diff_obj_attr_type_e) +#define hwloc_topology_diff_obj_attr_type_t HWLOC_NAME(topology_diff_obj_attr_type_t) +#define HWLOC_TOPOLOGY_DIFF_OBJ_ATTR_SIZE HWLOC_NAME_CAPS(TOPOLOGY_DIFF_OBJ_ATTR_SIZE) +#define HWLOC_TOPOLOGY_DIFF_OBJ_ATTR_NAME HWLOC_NAME_CAPS(TOPOLOGY_DIFF_OBJ_ATTR_NAME) +#define HWLOC_TOPOLOGY_DIFF_OBJ_ATTR_INFO HWLOC_NAME_CAPS(TOPOLOGY_DIFF_OBJ_ATTR_INFO) +#define hwloc_topology_diff_obj_attr_u HWLOC_NAME(topology_diff_obj_attr_u) +#define hwloc_topology_diff_obj_attr_generic_s HWLOC_NAME(topology_diff_obj_attr_generic_s) +#define hwloc_topology_diff_obj_attr_uint64_s HWLOC_NAME(topology_diff_obj_attr_uint64_s) +#define hwloc_topology_diff_obj_attr_string_s HWLOC_NAME(topology_diff_obj_attr_string_s) +#define hwloc_topology_diff_type_e HWLOC_NAME(topology_diff_type_e) +#define hwloc_topology_diff_type_t HWLOC_NAME(topology_diff_type_t) +#define HWLOC_TOPOLOGY_DIFF_OBJ_ATTR HWLOC_NAME_CAPS(TOPOLOGY_DIFF_OBJ_ATTR) +#define HWLOC_TOPOLOGY_DIFF_TOO_COMPLEX HWLOC_NAME_CAPS(TOPOLOGY_DIFF_TOO_COMPLEX) +#define hwloc_topology_diff_u HWLOC_NAME(topology_diff_u) +#define hwloc_topology_diff_t HWLOC_NAME(topology_diff_t) +#define hwloc_topology_diff_generic_s HWLOC_NAME(topology_diff_generic_s) +#define hwloc_topology_diff_obj_attr_s HWLOC_NAME(topology_diff_obj_attr_s) +#define hwloc_topology_diff_too_complex_s HWLOC_NAME(topology_diff_too_complex_s) +#define hwloc_topology_diff_build HWLOC_NAME(topology_diff_build) +#define hwloc_topology_diff_apply_flags_e HWLOC_NAME(topology_diff_apply_flags_e) +#define HWLOC_TOPOLOGY_DIFF_APPLY_REVERSE HWLOC_NAME_CAPS(TOPOLOGY_DIFF_APPLY_REVERSE) +#define hwloc_topology_diff_apply HWLOC_NAME(topology_diff_apply) +#define hwloc_topology_diff_destroy HWLOC_NAME(topology_diff_destroy) +#define hwloc_topology_diff_load_xml HWLOC_NAME(topology_diff_load_xml) +#define hwloc_topology_diff_export_xml HWLOC_NAME(topology_diff_export_xml) +#define hwloc_topology_diff_load_xmlbuffer HWLOC_NAME(topology_diff_load_xmlbuffer) +#define hwloc_topology_diff_export_xmlbuffer HWLOC_NAME(topology_diff_export_xmlbuffer) + +/* glibc-sched.h */ + +#define hwloc_cpuset_to_glibc_sched_affinity HWLOC_NAME(cpuset_to_glibc_sched_affinity) +#define hwloc_cpuset_from_glibc_sched_affinity HWLOC_NAME(cpuset_from_glibc_sched_affinity) + +/* linux-libnuma.h */ + +#define hwloc_cpuset_to_linux_libnuma_ulongs HWLOC_NAME(cpuset_to_linux_libnuma_ulongs) +#define hwloc_nodeset_to_linux_libnuma_ulongs HWLOC_NAME(nodeset_to_linux_libnuma_ulongs) +#define hwloc_cpuset_from_linux_libnuma_ulongs HWLOC_NAME(cpuset_from_linux_libnuma_ulongs) +#define hwloc_nodeset_from_linux_libnuma_ulongs HWLOC_NAME(nodeset_from_linux_libnuma_ulongs) +#define hwloc_cpuset_to_linux_libnuma_bitmask HWLOC_NAME(cpuset_to_linux_libnuma_bitmask) +#define hwloc_nodeset_to_linux_libnuma_bitmask HWLOC_NAME(nodeset_to_linux_libnuma_bitmask) +#define hwloc_cpuset_from_linux_libnuma_bitmask HWLOC_NAME(cpuset_from_linux_libnuma_bitmask) +#define hwloc_nodeset_from_linux_libnuma_bitmask HWLOC_NAME(nodeset_from_linux_libnuma_bitmask) + +/* linux.h */ + +#define hwloc_linux_parse_cpumap_file HWLOC_NAME(linux_parse_cpumap_file) +#define hwloc_linux_set_tid_cpubind HWLOC_NAME(linux_set_tid_cpubind) +#define hwloc_linux_get_tid_cpubind HWLOC_NAME(linux_get_tid_cpubind) +#define hwloc_linux_get_tid_last_cpu_location HWLOC_NAME(linux_get_tid_last_cpu_location) + +/* openfabrics-verbs.h */ + +#define hwloc_ibv_get_device_cpuset HWLOC_NAME(ibv_get_device_cpuset) +#define hwloc_ibv_get_device_osdev HWLOC_NAME(ibv_get_device_osdev) +#define hwloc_ibv_get_device_osdev_by_name HWLOC_NAME(ibv_get_device_osdev_by_name) + +/* myriexpress.h */ + +#define hwloc_mx_board_get_device_cpuset HWLOC_NAME(mx_board_get_device_cpuset) +#define hwloc_mx_endpoint_get_device_cpuset HWLOC_NAME(mx_endpoint_get_device_cpuset) + +/* intel-mic.h */ + +#define hwloc_intel_mic_get_device_cpuset HWLOC_NAME(intel_mic_get_device_cpuset) +#define hwloc_intel_mic_get_device_osdev_by_index HWLOC_NAME(intel_mic_get_device_osdev_by_index) + +/* opencl.h */ + +#define hwloc_opencl_get_device_cpuset HWLOC_NAME(opencl_get_device_cpuset) +#define hwloc_opencl_get_device_osdev HWLOC_NAME(opencl_get_device_osdev) +#define hwloc_opencl_get_device_osdev_by_index HWLOC_NAME(opencl_get_device_osdev_by_index) + +/* cuda.h */ + +#define hwloc_cuda_get_device_pci_ids HWLOC_NAME(cuda_get_device_pci_ids) +#define hwloc_cuda_get_device_cpuset HWLOC_NAME(cuda_get_device_cpuset) +#define hwloc_cuda_get_device_pcidev HWLOC_NAME(cuda_get_device_pcidev) +#define hwloc_cuda_get_device_osdev HWLOC_NAME(cuda_get_device_osdev) +#define hwloc_cuda_get_device_osdev_by_index HWLOC_NAME(cuda_get_device_osdev_by_index) + +/* cudart.h */ + +#define hwloc_cudart_get_device_pci_ids HWLOC_NAME(cudart_get_device_pci_ids) +#define hwloc_cudart_get_device_cpuset HWLOC_NAME(cudart_get_device_cpuset) +#define hwloc_cudart_get_device_pcidev HWLOC_NAME(cudart_get_device_pcidev) +#define hwloc_cudart_get_device_osdev_by_index HWLOC_NAME(cudart_get_device_osdev_by_index) + +/* nvml.h */ + +#define hwloc_nvml_get_device_cpuset HWLOC_NAME(nvml_get_device_cpuset) +#define hwloc_nvml_get_device_osdev HWLOC_NAME(nvml_get_device_osdev) +#define hwloc_nvml_get_device_osdev_by_index HWLOC_NAME(nvml_get_device_osdev_by_index) + +/* gl.h */ + +#define hwloc_gl_get_display_osdev_by_port_device HWLOC_NAME(gl_get_display_osdev_by_port_device) +#define hwloc_gl_get_display_osdev_by_name HWLOC_NAME(gl_get_display_osdev_by_name) +#define hwloc_gl_get_display_by_osdev HWLOC_NAME(gl_get_display_by_osdev) + +/* hwloc/plugins.h */ + +#define hwloc_disc_component_type_e HWLOC_NAME(disc_component_type_e) +#define HWLOC_DISC_COMPONENT_TYPE_CPU HWLOC_NAME_CAPS(DISC_COMPONENT_TYPE_CPU) +#define HWLOC_DISC_COMPONENT_TYPE_GLOBAL HWLOC_NAME_CAPS(DISC_COMPONENT_TYPE_GLOBAL) +#define HWLOC_DISC_COMPONENT_TYPE_MISC HWLOC_NAME_CAPS(DISC_COMPONENT_TYPE_MISC) +#define hwloc_disc_component_type_t HWLOC_NAME(disc_component_type_t) +#define hwloc_disc_component HWLOC_NAME(disc_component) + +#define hwloc_backend HWLOC_NAME(backend) +#define hwloc_backend_flag_e HWLOC_NAME(backend_flag_e) +#define HWLOC_BACKEND_FLAG_NEED_LEVELS HWLOC_NAME_CAPS(BACKEND_FLAG_NEED_LEVELS) + +#define hwloc_backend_alloc HWLOC_NAME(backend_alloc) +#define hwloc_backend_enable HWLOC_NAME(backend_enable) +#define hwloc_backends_get_obj_cpuset HWLOC_NAME(backends_get_obj_cpuset) +#define hwloc_backends_notify_new_object HWLOC_NAME(backends_notify_new_object) + +#define hwloc_component_type_e HWLOC_NAME(component_type_e) +#define HWLOC_COMPONENT_TYPE_DISC HWLOC_NAME_CAPS(COMPONENT_TYPE_DISC) +#define HWLOC_COMPONENT_TYPE_XML HWLOC_NAME_CAPS(COMPONENT_TYPE_XML) +#define hwloc_component_type_t HWLOC_NAME(component_type_t) +#define hwloc_component HWLOC_NAME(component) + +#define hwloc_plugin_check_namespace HWLOC_NAME(plugin_check_namespace) + +#define hwloc_insert_object_by_cpuset HWLOC_NAME(insert_object_by_cpuset) +#define hwloc_report_error_t HWLOC_NAME(report_error_t) +#define hwloc_report_os_error HWLOC_NAME(report_os_error) +#define hwloc_hide_errors HWLOC_NAME(hide_errors) +#define hwloc__insert_object_by_cpuset HWLOC_NAME(_insert_object_by_cpuset) +#define hwloc_insert_object_by_parent HWLOC_NAME(insert_object_by_parent) +#define hwloc_alloc_setup_object HWLOC_NAME(alloc_setup_object) +#define hwloc_fill_object_sets HWLOC_NAME(fill_object_sets) + +#define hwloc_insert_pci_device_list HWLOC_NAME(insert_pci_device_list) +#define hwloc_pci_find_cap HWLOC_NAME(pci_find_cap) +#define hwloc_pci_find_linkspeed HWLOC_NAME(pci_find_linkspeed) +#define hwloc_pci_prepare_bridge HWLOC_NAME(pci_prepare_bridge) + +/* hwloc/deprecated.h */ + +#define hwloc_obj_type_of_string HWLOC_NAME(obj_type_of_string ) +#define hwloc_obj_snprintf HWLOC_NAME(obj_snprintf) +#define hwloc_distributev HWLOC_NAME(distributev) +#define hwloc_distribute HWLOC_NAME(distribute) + +/* private/debug.h */ + +#define hwloc_debug_enabled HWLOC_NAME(debug_enabled) +#define hwloc_debug HWLOC_NAME(debug) + +/* private/misc.h */ + +#define hwloc_snprintf HWLOC_NAME(snprintf) +#define hwloc_namecoloncmp HWLOC_NAME(namecoloncmp) +#define hwloc_ffsl_manual HWLOC_NAME(ffsl_manual) +#define hwloc_ffs32 HWLOC_NAME(ffs32) +#define hwloc_ffsl_from_ffs32 HWLOC_NAME(ffsl_from_ffs32) +#define hwloc_flsl_manual HWLOC_NAME(flsl_manual) +#define hwloc_fls32 HWLOC_NAME(fls32) +#define hwloc_flsl_from_fls32 HWLOC_NAME(flsl_from_fls32) +#define hwloc_weight_long HWLOC_NAME(weight_long) +#define hwloc_strncasecmp HWLOC_NAME(strncasecmp) + +/* private/cpuid-x86.h */ + +#define hwloc_have_x86_cpuid HWLOC_NAME(have_x86_cpuid) +#define hwloc_x86_cpuid HWLOC_NAME(x86_cpuid) + +/* private/xml.h */ + +#define hwloc__xml_verbose HWLOC_NAME(_xml_verbose) + +#define hwloc__xml_import_state_s HWLOC_NAME(_xml_import_state_s) +#define hwloc__xml_import_state_t HWLOC_NAME(_xml_import_state_t) +#define hwloc__xml_import_diff HWLOC_NAME(_xml_import_diff) +#define hwloc_xml_backend_data_s HWLOC_NAME(xml_backend_data_s) +#define hwloc__xml_export_state_s HWLOC_NAME(_xml_export_state_s) +#define hwloc__xml_export_state_t HWLOC_NAME(_xml_export_state_t) +#define hwloc__xml_export_object HWLOC_NAME(_xml_export_object) +#define hwloc__xml_export_diff HWLOC_NAME(_xml_export_diff) + +#define hwloc_xml_callbacks HWLOC_NAME(xml_callbacks) +#define hwloc_xml_component HWLOC_NAME(xml_component) +#define hwloc_xml_callbacks_register HWLOC_NAME(xml_callbacks_register) +#define hwloc_xml_callbacks_reset HWLOC_NAME(xml_callbacks_reset) + +/* private/components.h */ + +#define hwloc_disc_component_force_enable HWLOC_NAME(disc_component_force_enable) +#define hwloc_disc_components_enable_others HWLOC_NAME(disc_components_instantiate_others) + +#define hwloc_backends_disable_all HWLOC_NAME(backends_disable_all) +#define hwloc_backends_is_thissystem HWLOC_NAME(backends_is_thissystem) + +#define hwloc_components_init HWLOC_NAME(components_init) +#define hwloc_components_destroy_all HWLOC_NAME(components_destroy_all) + +/* private/private.h */ + +#define hwloc_ignore_type_e HWLOC_NAME(ignore_type_e) + +#define HWLOC_IGNORE_TYPE_NEVER HWLOC_NAME_CAPS(IGNORE_TYPE_NEVER) +#define HWLOC_IGNORE_TYPE_KEEP_STRUCTURE HWLOC_NAME_CAPS(IGNORE_TYPE_KEEP_STRUCTURE) +#define HWLOC_IGNORE_TYPE_ALWAYS HWLOC_NAME_CAPS(IGNORE_TYPE_ALWAYS) + +#define hwloc_os_distances_s HWLOC_NAME(os_distances_s) + +#define hwloc_xml_imported_distances_s HWLOC_NAME(xml_imported_distances_s) + +#define hwloc_alloc_obj_cpusets HWLOC_NAME(alloc_obj_cpusets) +#define hwloc_setup_pu_level HWLOC_NAME(setup_pu_level) +#define hwloc_get_sysctlbyname HWLOC_NAME(get_sysctlbyname) +#define hwloc_get_sysctl HWLOC_NAME(get_sysctl) +#define hwloc_fallback_nbprocessors HWLOC_NAME(fallback_nbprocessors) +#define hwloc_connect_children HWLOC_NAME(connect_children) +#define hwloc_connect_levels HWLOC_NAME(connect_levels) + +#define hwloc__object_cpusets_compare_first HWLOC_NAME(_object_cpusets_compare_first) + +#define hwloc_topology_setup_defaults HWLOC_NAME(topology_setup_defaults) +#define hwloc_topology_clear HWLOC_NAME(topology_clear) + +#define hwloc__add_info HWLOC_NAME(_add_info) +#define hwloc__find_info_slot HWLOC_NAME(_find_info_slot) +#define hwloc__move_infos HWLOC_NAME(_move_infos) +#define hwloc__free_infos HWLOC_NAME(_free_infos) + +#define hwloc_binding_hooks HWLOC_NAME(binding_hooks) +#define hwloc_set_native_binding_hooks HWLOC_NAME(set_native_binding_hooks) +#define hwloc_set_binding_hooks HWLOC_NAME(set_binding_hooks) + +#define hwloc_set_linuxfs_hooks HWLOC_NAME(set_linuxfs_hooks) +#define hwloc_set_bgq_hooks HWLOC_NAME(set_bgq_hooks) +#define hwloc_set_solaris_hooks HWLOC_NAME(set_solaris_hooks) +#define hwloc_set_aix_hooks HWLOC_NAME(set_aix_hooks) +#define hwloc_set_osf_hooks HWLOC_NAME(set_osf_hooks) +#define hwloc_set_windows_hooks HWLOC_NAME(set_windows_hooks) +#define hwloc_set_darwin_hooks HWLOC_NAME(set_darwin_hooks) +#define hwloc_set_freebsd_hooks HWLOC_NAME(set_freebsd_hooks) +#define hwloc_set_netbsd_hooks HWLOC_NAME(set_netbsd_hooks) +#define hwloc_set_hpux_hooks HWLOC_NAME(set_hpux_hooks) + +#define hwloc_look_hardwired_fujitsu_k HWLOC_NAME(look_hardwired_fujitsu_k) +#define hwloc_look_hardwired_fujitsu_fx10 HWLOC_NAME(look_hardwired_fujitsu_fx10) +#define hwloc_look_hardwired_fujitsu_fx100 HWLOC_NAME(look_hardwired_fujitsu_fx100) + +#define hwloc_add_uname_info HWLOC_NAME(add_uname_info) +#define hwloc_free_unlinked_object HWLOC_NAME(free_unlinked_object) +#define hwloc__duplicate_objects HWLOC_NAME(_duplicate_objects) + +#define hwloc_alloc_heap HWLOC_NAME(alloc_heap) +#define hwloc_alloc_mmap HWLOC_NAME(alloc_mmap) +#define hwloc_free_heap HWLOC_NAME(free_heap) +#define hwloc_free_mmap HWLOC_NAME(free_mmap) +#define hwloc_alloc_or_fail HWLOC_NAME(alloc_or_fail) + +#define hwloc_distances_init HWLOC_NAME(distances_init) +#define hwloc_distances_destroy HWLOC_NAME(distances_destroy) +#define hwloc_distances_set HWLOC_NAME(distances_set) +#define hwloc_distances_set_from_env HWLOC_NAME(distances_set_from_env) +#define hwloc_distances_restrict_os HWLOC_NAME(distances_restrict_os) +#define hwloc_distances_restrict HWLOC_NAME(distances_restrict) +#define hwloc_distances_finalize_os HWLOC_NAME(distances_finalize_os) +#define hwloc_distances_finalize_logical HWLOC_NAME(distances_finalize_logical) +#define hwloc_clear_object_distances HWLOC_NAME(clear_object_distances) +#define hwloc_clear_object_distances_one HWLOC_NAME(clear_object_distances_one) +#define hwloc_group_by_distances HWLOC_NAME(group_by_distances) + +#define hwloc_encode_to_base64 HWLOC_NAME(encode_to_base64) +#define hwloc_decode_from_base64 HWLOC_NAME(decode_from_base64) + +#define hwloc_obj_add_info_nodup HWLOC_NAME(obj_add_info_nodup) + +#define hwloc_progname HWLOC_NAME(progname) + +#define hwloc_bitmap_compare_inclusion HWLOC_NAME(bitmap_compare_inclusion) + +/* private/solaris-chiptype.h */ + +#define hwloc_solaris_get_chip_type HWLOC_NAME(solaris_get_chip_type) +#define hwloc_solaris_get_chip_model HWLOC_NAME(solaris_get_chip_model) + +#endif /* HWLOC_SYM_TRANSFORM */ + + +#ifdef __cplusplus +} /* extern "C" */ +#endif + + +#endif /* HWLOC_RENAME_H */ diff --git a/opal/mca/hwloc/hwloc1110/hwloc/include/private/autogen/config.h.in b/opal/mca/hwloc/hwloc1112/hwloc/include/private/autogen/config.h.in similarity index 99% rename from opal/mca/hwloc/hwloc1110/hwloc/include/private/autogen/config.h.in rename to opal/mca/hwloc/hwloc1112/hwloc/include/private/autogen/config.h.in index 9a3d7094fe5..d02c357936e 100644 --- a/opal/mca/hwloc/hwloc1110/hwloc/include/private/autogen/config.h.in +++ b/opal/mca/hwloc/hwloc1112/hwloc/include/private/autogen/config.h.in @@ -240,6 +240,9 @@ /* Define to 1 if the system has the type `PROCESSOR_GROUP_INFO'. */ #undef HAVE_PROCESSOR_GROUP_INFO +/* Define to 1 if the system has the type `PROCESSOR_NUMBER'. */ +#undef HAVE_PROCESSOR_NUMBER + /* Define to 1 if the system has the type `PROCESSOR_RELATIONSHIP'. */ #undef HAVE_PROCESSOR_RELATIONSHIP @@ -527,6 +530,9 @@ /* Define to 1 if you have the header file. */ #undef HWLOC_HAVE_STDINT_H +/* Define to 1 if function `syscall' is available */ +#undef HWLOC_HAVE_SYSCALL + /* Define to 1 if you have the `windows.h' header. */ #undef HWLOC_HAVE_WINDOWS_H @@ -536,9 +542,6 @@ /* Define to 1 if you have x86 cpuid */ #undef HWLOC_HAVE_X86_CPUID -/* Define to 1 if the _syscall3 macro works */ -#undef HWLOC_HAVE__SYSCALL3 - /* Define to 1 on HP-UX */ #undef HWLOC_HPUX_SYS diff --git a/opal/mca/hwloc/hwloc1110/hwloc/include/private/components.h b/opal/mca/hwloc/hwloc1112/hwloc/include/private/components.h similarity index 100% rename from opal/mca/hwloc/hwloc1110/hwloc/include/private/components.h rename to opal/mca/hwloc/hwloc1112/hwloc/include/private/components.h diff --git a/opal/mca/hwloc/hwloc1110/hwloc/include/private/cpuid-x86.h b/opal/mca/hwloc/hwloc1112/hwloc/include/private/cpuid-x86.h similarity index 100% rename from opal/mca/hwloc/hwloc1110/hwloc/include/private/cpuid-x86.h rename to opal/mca/hwloc/hwloc1112/hwloc/include/private/cpuid-x86.h diff --git a/opal/mca/hwloc/hwloc1112/hwloc/include/private/debug.h b/opal/mca/hwloc/hwloc1112/hwloc/include/private/debug.h new file mode 100644 index 00000000000..2038a4cfe5e --- /dev/null +++ b/opal/mca/hwloc/hwloc1112/hwloc/include/private/debug.h @@ -0,0 +1,78 @@ +/* + * Copyright © 2009 CNRS + * Copyright © 2009-2015 Inria. All rights reserved. + * Copyright © 2009, 2011 Université Bordeaux + * Copyright © 2011 Cisco Systems, Inc. All rights reserved. + * See COPYING in top-level directory. + */ + +/* The configuration file */ + +#ifndef HWLOC_DEBUG_H +#define HWLOC_DEBUG_H + +#include + +#ifdef HWLOC_DEBUG +#include +#include +#endif + +#ifdef HWLOC_DEBUG +static __hwloc_inline int hwloc_debug_enabled(void) +{ + static int checked = 0; + static int enabled = 1; + if (!checked) { + const char *env = getenv("HWLOC_DEBUG_VERBOSE"); + if (env) + enabled = atoi(env); + if (enabled) + fprintf(stderr, "hwloc verbose debug enabled, may be disabled with HWLOC_DEBUG_VERBOSE=0 in the environment.\n"); + checked = 1; + } + return enabled; +} +#endif + +static __hwloc_inline void hwloc_debug(const char *s __hwloc_attribute_unused, ...) +{ +#ifdef HWLOC_DEBUG + if (hwloc_debug_enabled()) { + va_list ap; + va_start(ap, s); + vfprintf(stderr, s, ap); + va_end(ap); + } +#endif +} + +#ifdef HWLOC_DEBUG +#define hwloc_debug_bitmap(fmt, bitmap) do { \ +if (hwloc_debug_enabled()) { \ + char *s; \ + hwloc_bitmap_asprintf(&s, bitmap); \ + fprintf(stderr, fmt, s); \ + free(s); \ +} } while (0) +#define hwloc_debug_1arg_bitmap(fmt, arg1, bitmap) do { \ +if (hwloc_debug_enabled()) { \ + char *s; \ + hwloc_bitmap_asprintf(&s, bitmap); \ + fprintf(stderr, fmt, arg1, s); \ + free(s); \ +} } while (0) +#define hwloc_debug_2args_bitmap(fmt, arg1, arg2, bitmap) do { \ +if (hwloc_debug_enabled()) { \ + char *s; \ + hwloc_bitmap_asprintf(&s, bitmap); \ + fprintf(stderr, fmt, arg1, arg2, s); \ + free(s); \ +} } while (0) +#else +#define hwloc_debug_bitmap(s, bitmap) do { } while(0) +#define hwloc_debug_1arg_bitmap(s, arg1, bitmap) do { } while(0) +#define hwloc_debug_2args_bitmap(s, arg1, arg2, bitmap) do { } while(0) +#endif + +#endif /* HWLOC_DEBUG_H */ diff --git a/opal/mca/hwloc/hwloc1110/hwloc/include/private/misc.h b/opal/mca/hwloc/hwloc1112/hwloc/include/private/misc.h similarity index 100% rename from opal/mca/hwloc/hwloc1110/hwloc/include/private/misc.h rename to opal/mca/hwloc/hwloc1112/hwloc/include/private/misc.h diff --git a/opal/mca/hwloc/hwloc1110/hwloc/include/private/private.h b/opal/mca/hwloc/hwloc1112/hwloc/include/private/private.h similarity index 97% rename from opal/mca/hwloc/hwloc1110/hwloc/include/private/private.h rename to opal/mca/hwloc/hwloc1112/hwloc/include/private/private.h index 0d592986ddc..dafc26670a0 100644 --- a/opal/mca/hwloc/hwloc1110/hwloc/include/private/private.h +++ b/opal/mca/hwloc/hwloc1112/hwloc/include/private/private.h @@ -192,6 +192,10 @@ extern void hwloc_set_netbsd_hooks(struct hwloc_binding_hooks *binding_hooks, st extern void hwloc_set_hpux_hooks(struct hwloc_binding_hooks *binding_hooks, struct hwloc_topology_support *support); #endif /* HWLOC_HPUX_SYS */ +extern int hwloc_look_hardwired_fujitsu_k(struct hwloc_topology *topology); +extern int hwloc_look_hardwired_fujitsu_fx10(struct hwloc_topology *topology); +extern int hwloc_look_hardwired_fujitsu_fx100(struct hwloc_topology *topology); + /* Insert uname-specific names/values in the object infos array. * If cached_uname isn't NULL, it is used as a struct utsname instead of recalling uname. * Any field that starts with \0 is ignored. @@ -256,8 +260,13 @@ extern void hwloc_group_by_distances(struct hwloc_topology *topology); } \ } while(0) #else /* HAVE_USELOCALE */ +#if __HWLOC_HAVE_ATTRIBUTE_UNUSED #define hwloc_localeswitch_declare int __dummy_nolocale __hwloc_attribute_unused #define hwloc_localeswitch_init() +#else +#define hwloc_localeswitch_declare int __dummy_nolocale +#define hwloc_localeswitch_init() (void)__dummy_nolocale +#endif #define hwloc_localeswitch_fini() #endif /* HAVE_USELOCALE */ diff --git a/opal/mca/hwloc/hwloc1110/hwloc/include/private/solaris-chiptype.h b/opal/mca/hwloc/hwloc1112/hwloc/include/private/solaris-chiptype.h similarity index 100% rename from opal/mca/hwloc/hwloc1110/hwloc/include/private/solaris-chiptype.h rename to opal/mca/hwloc/hwloc1112/hwloc/include/private/solaris-chiptype.h diff --git a/opal/mca/hwloc/hwloc1110/hwloc/include/private/xml.h b/opal/mca/hwloc/hwloc1112/hwloc/include/private/xml.h similarity index 100% rename from opal/mca/hwloc/hwloc1110/hwloc/include/private/xml.h rename to opal/mca/hwloc/hwloc1112/hwloc/include/private/xml.h diff --git a/opal/mca/hwloc/hwloc1112/hwloc/src/Makefile.am b/opal/mca/hwloc/hwloc1112/hwloc/src/Makefile.am new file mode 100644 index 00000000000..a241d703f39 --- /dev/null +++ b/opal/mca/hwloc/hwloc1112/hwloc/src/Makefile.am @@ -0,0 +1,235 @@ +# Copyright © 2009-2014 Inria. All rights reserved. +# Copyright © 2009-2012 Université Bordeaux +# Copyright © 2009-2014 Cisco Systems, Inc. All rights reserved. +# Copyright © 2011-2012 Oracle and/or its affiliates. All rights reserved. +# See COPYING in top-level directory. + +AM_CFLAGS = $(HWLOC_CFLAGS) +AM_CPPFLAGS = $(HWLOC_CPPFLAGS) -DHWLOC_INSIDE_LIBHWLOC +AM_LDFLAGS = $(HWLOC_LDFLAGS) + +EXTRA_DIST = dolib.c + +# If we're in standalone mode, build the installable library. +# Otherwise, build the embedded library. + +if HWLOC_BUILD_STANDALONE +lib_LTLIBRARIES = libhwloc.la +else +noinst_LTLIBRARIES = libhwloc_embedded.la +endif + +pluginsdir = @HWLOC_PLUGINS_DIR@ +plugins_LTLIBRARIES = +plugins_ldflags = -module -avoid-version -lltdl +AM_CPPFLAGS += -DHWLOC_PLUGINS_PATH=\"$(HWLOC_PLUGINS_PATH)\" + +# Sources and ldflags + +sources = \ + topology.c \ + traversal.c \ + distances.c \ + components.c \ + bind.c \ + bitmap.c \ + pci-common.c \ + diff.c \ + misc.c \ + base64.c \ + topology-noos.c \ + topology-synthetic.c \ + topology-custom.c \ + topology-xml.c \ + topology-xml-nolibxml.c +ldflags = + +# Conditionally add to the sources and ldflags + +if HWLOC_HAVE_LIBXML2 +if HWLOC_XML_LIBXML_BUILD_STATIC +sources += topology-xml-libxml.c +else +plugins_LTLIBRARIES += hwloc_xml_libxml.la +hwloc_xml_libxml_la_SOURCES = topology-xml-libxml.c +hwloc_xml_libxml_la_CFLAGS = $(AM_CFLAGS) $(HWLOC_LIBXML2_CFLAGS) -DHWLOC_INSIDE_PLUGIN +hwloc_xml_libxml_la_LDFLAGS = $(plugins_ldflags) $(HWLOC_LIBXML2_LIBS) +endif +endif HWLOC_HAVE_LIBXML2 + +if HWLOC_HAVE_PCI +if HWLOC_PCI_BUILD_STATIC +sources += topology-pci.c +else +plugins_LTLIBRARIES += hwloc_pci.la +hwloc_pci_la_SOURCES = topology-pci.c +hwloc_pci_la_CFLAGS = $(AM_CFLAGS) $(HWLOC_PCIACCESS_CFLAGS) -DHWLOC_INSIDE_PLUGIN +hwloc_pci_la_LDFLAGS = $(plugins_ldflags) $(HWLOC_PCIACCESS_LIBS) +endif +endif HWLOC_HAVE_PCI + +if HWLOC_HAVE_OPENCL +if HWLOC_OPENCL_BUILD_STATIC +sources += topology-opencl.c +else +plugins_LTLIBRARIES += hwloc_opencl.la +hwloc_opencl_la_SOURCES = topology-opencl.c +hwloc_opencl_la_CFLAGS = $(AM_CFLAGS) $(HWLOC_OPENCL_CFLAGS) -DHWLOC_INSIDE_PLUGIN +hwloc_opencl_la_LDFLAGS = $(plugins_ldflags) $(HWLOC_OPENCL_LIBS) +endif +endif HWLOC_HAVE_OPENCL + +if HWLOC_HAVE_CUDART +if HWLOC_CUDA_BUILD_STATIC +sources += topology-cuda.c +else +plugins_LTLIBRARIES += hwloc_cuda.la +hwloc_cuda_la_SOURCES = topology-cuda.c +hwloc_cuda_la_CFLAGS = $(AM_CFLAGS) $(HWLOC_CUDA_CFLAGS) -DHWLOC_INSIDE_PLUGIN +hwloc_cuda_la_LDFLAGS = $(plugins_ldflags) $(HWLOC_CUDA_LIBS) +endif +endif HWLOC_HAVE_CUDART + +if HWLOC_HAVE_NVML +if HWLOC_NVML_BUILD_STATIC +sources += topology-nvml.c +else +plugins_LTLIBRARIES += hwloc_nvml.la +hwloc_nvml_la_SOURCES = topology-nvml.c +hwloc_nvml_la_CFLAGS = $(AM_CFLAGS) $(HWLOC_NVML_CFLAGS) -DHWLOC_INSIDE_PLUGIN +hwloc_nvml_la_LDFLAGS = $(plugins_ldflags) $(HWLOC_NVML_LIBS) +endif +endif HWLOC_HAVE_NVML + +if HWLOC_HAVE_GL +if HWLOC_GL_BUILD_STATIC +sources += topology-gl.c +else +plugins_LTLIBRARIES += hwloc_gl.la +hwloc_gl_la_SOURCES = topology-gl.c +hwloc_gl_la_CFLAGS = $(AM_CFLAGS) $(HWLOC_GL_CFLAGS) -DHWLOC_INSIDE_PLUGIN +hwloc_gl_la_LDFLAGS = $(plugins_ldflags) $(HWLOC_GL_LIBS) +endif +endif HWLOC_HAVE_GL + +if HWLOC_HAVE_SOLARIS +sources += topology-solaris.c +sources += topology-solaris-chiptype.c +endif HWLOC_HAVE_SOLARIS + +if HWLOC_HAVE_LINUX +sources += topology-linux.c topology-hardwired.c +endif HWLOC_HAVE_LINUX + +if HWLOC_HAVE_BGQ +sources += topology-bgq.c +endif HWLOC_HAVE_BGQ + +if HWLOC_HAVE_AIX +sources += topology-aix.c +ldflags += -lpthread +endif HWLOC_HAVE_AIX + +if HWLOC_HAVE_OSF +sources += topology-osf.c +ldflags += -lnuma -lpthread +endif HWLOC_HAVE_OSF + +if HWLOC_HAVE_HPUX +sources += topology-hpux.c +ldflags += -lpthread +endif HWLOC_HAVE_HPUX + +if HWLOC_HAVE_WINDOWS +sources += topology-windows.c +endif HWLOC_HAVE_WINDOWS + +if HWLOC_HAVE_DARWIN +sources += topology-darwin.c +endif HWLOC_HAVE_DARWIN + +if HWLOC_HAVE_FREEBSD +sources += topology-freebsd.c +endif HWLOC_HAVE_FREEBSD + +if HWLOC_HAVE_NETBSD +sources += topology-netbsd.c +ldflags += -lpthread +endif HWLOC_HAVE_NETBSD + +if HWLOC_HAVE_X86_CPUID +sources += topology-x86.c +endif HWLOC_HAVE_X86_CPUID + +if HWLOC_HAVE_GCC +ldflags += -no-undefined +endif HWLOC_HAVE_GCC + + +if HWLOC_HAVE_WINDOWS +# Windows specific rules + +LC_MESSAGES=C +export LC_MESSAGES +ldflags += -Xlinker --output-def -Xlinker .libs/libhwloc.def + +if HWLOC_HAVE_MS_LIB +dolib$(EXEEXT): dolib.c + $(CC_FOR_BUILD) $< -o $@ +.libs/libhwloc.lib: libhwloc.la dolib$(EXEEXT) + [ ! -r .libs/libhwloc.def ] || ./dolib$(EXEEXT) "$(HWLOC_MS_LIB)" $(HWLOC_MS_LIB_ARCH) .libs/libhwloc.def $(libhwloc_so_version) .libs/libhwloc.lib +all-local: .libs/libhwloc.lib +clean-local: + $(RM) dolib$(EXEEXT) +endif HWLOC_HAVE_MS_LIB + +install-exec-hook: + [ ! -r .libs/libhwloc.def ] || $(INSTALL) .libs/libhwloc.def $(DESTDIR)$(libdir) +if HWLOC_HAVE_MS_LIB + [ ! -r .libs/libhwloc.def ] || $(INSTALL) .libs/libhwloc.lib $(DESTDIR)$(libdir) + [ ! -r .libs/libhwloc.def ] || $(INSTALL) .libs/libhwloc.exp $(DESTDIR)$(libdir) +endif HWLOC_HAVE_MS_LIB + +uninstall-local: + rm -f $(DESTDIR)$(libdir)/libhwloc.def +if HWLOC_HAVE_MS_LIB + rm -f $(DESTDIR)$(libdir)/libhwloc.lib $(DESTDIR)$(libdir)/libhwloc.exp +endif HWLOC_HAVE_MS_LIB + +# End of Windows specific rules +endif HWLOC_HAVE_WINDOWS + + +# Installable library + +libhwloc_la_SOURCES = $(sources) +libhwloc_la_LDFLAGS = $(ldflags) -version-info $(libhwloc_so_version) $(HWLOC_LIBS) + +if HWLOC_HAVE_PLUGINS +AM_CPPFLAGS += $(LTDLINCL) +libhwloc_la_LDFLAGS += -export-dynamic +libhwloc_la_LIBADD = $(LIBLTDL) +endif + +# Embedded library (note the lack of a .so version number -- that +# intentionally only appears in the installable library). Also note +# the lack of _LDFLAGS -- all libs are added by the upper layer (via +# HWLOC_EMBEDDED_LIBS). + +libhwloc_embedded_la_SOURCES = $(sources) + +# XML data (only install if we're building in standalone mode) + +if HWLOC_BUILD_STANDALONE +xml_DATA = $(srcdir)/hwloc.dtd +xmldir = $(pkgdatadir) +EXTRA_DIST += hwloc.dtd +endif + +DISTCLEANFILES = static-components.h + +if HWLOC_HAVE_PLUGINS +check_LTLIBRARIES = hwloc_fake.la +hwloc_fake_la_SOURCES = topology-fake.c +hwloc_fake_la_LDFLAGS = $(plugins_ldflags) -rpath /nowhere # force libtool to build a shared-library even it's check-only +endif diff --git a/opal/mca/hwloc/hwloc1110/hwloc/src/base64.c b/opal/mca/hwloc/hwloc1112/hwloc/src/base64.c similarity index 99% rename from opal/mca/hwloc/hwloc1110/hwloc/src/base64.c rename to opal/mca/hwloc/hwloc1112/hwloc/src/base64.c index 7a3392fab8a..4e1976fde4b 100644 --- a/opal/mca/hwloc/hwloc1110/hwloc/src/base64.c +++ b/opal/mca/hwloc/hwloc1112/hwloc/src/base64.c @@ -178,7 +178,7 @@ hwloc_encode_to_base64(const char *src, size_t srclength, char *target, size_t t if (datalength >= targsize) return (-1); target[datalength] = '\0'; /* Returned value doesn't count \0. */ - return (datalength); + return (int)(datalength); } /* skips all whitespace anywhere. @@ -213,7 +213,7 @@ hwloc_decode_from_base64(char const *src, char *target, size_t targsize) if (target) { if (tarindex >= targsize) return (-1); - target[tarindex] = (pos - Base64) << 2; + target[tarindex] = (char)(pos - Base64) << 2; } state = 1; break; diff --git a/opal/mca/hwloc/hwloc1110/hwloc/src/bind.c b/opal/mca/hwloc/hwloc1112/hwloc/src/bind.c similarity index 93% rename from opal/mca/hwloc/hwloc1110/hwloc/src/bind.c rename to opal/mca/hwloc/hwloc1112/hwloc/src/bind.c index e2b5a063e42..afef5e8f0db 100644 --- a/opal/mca/hwloc/hwloc1110/hwloc/src/bind.c +++ b/opal/mca/hwloc/hwloc1112/hwloc/src/bind.c @@ -2,7 +2,7 @@ * Copyright © 2009 CNRS * Copyright © 2009-2011 inria. All rights reserved. * Copyright © 2009-2010, 2012 Université Bordeaux - * Copyright © 2011 Cisco Systems, Inc. All rights reserved. + * Copyright © 2011-2015 Cisco Systems, Inc. All rights reserved. * See COPYING in top-level directory. */ @@ -74,9 +74,13 @@ hwloc_set_cpubind(hwloc_topology_t topology, hwloc_const_bitmap_t set, int flags if (topology->binding_hooks.set_thisthread_cpubind) return topology->binding_hooks.set_thisthread_cpubind(topology, set, flags); } else { - if (topology->binding_hooks.set_thisproc_cpubind) - return topology->binding_hooks.set_thisproc_cpubind(topology, set, flags); - else if (topology->binding_hooks.set_thisthread_cpubind) + if (topology->binding_hooks.set_thisproc_cpubind) { + int err = topology->binding_hooks.set_thisproc_cpubind(topology, set, flags); + if (err >= 0 || errno != ENOSYS) + return err; + /* ENOSYS, fallback */ + } + if (topology->binding_hooks.set_thisthread_cpubind) return topology->binding_hooks.set_thisthread_cpubind(topology, set, flags); } @@ -94,9 +98,13 @@ hwloc_get_cpubind(hwloc_topology_t topology, hwloc_bitmap_t set, int flags) if (topology->binding_hooks.get_thisthread_cpubind) return topology->binding_hooks.get_thisthread_cpubind(topology, set, flags); } else { - if (topology->binding_hooks.get_thisproc_cpubind) - return topology->binding_hooks.get_thisproc_cpubind(topology, set, flags); - else if (topology->binding_hooks.get_thisthread_cpubind) + if (topology->binding_hooks.get_thisproc_cpubind) { + int err = topology->binding_hooks.get_thisproc_cpubind(topology, set, flags); + if (err >= 0 || errno != ENOSYS) + return err; + /* ENOSYS, fallback */ + } + if (topology->binding_hooks.get_thisthread_cpubind) return topology->binding_hooks.get_thisthread_cpubind(topology, set, flags); } @@ -164,9 +172,13 @@ hwloc_get_last_cpu_location(hwloc_topology_t topology, hwloc_bitmap_t set, int f if (topology->binding_hooks.get_thisthread_last_cpu_location) return topology->binding_hooks.get_thisthread_last_cpu_location(topology, set, flags); } else { - if (topology->binding_hooks.get_thisproc_last_cpu_location) - return topology->binding_hooks.get_thisproc_last_cpu_location(topology, set, flags); - else if (topology->binding_hooks.get_thisthread_last_cpu_location) + if (topology->binding_hooks.get_thisproc_last_cpu_location) { + int err = topology->binding_hooks.get_thisproc_last_cpu_location(topology, set, flags); + if (err >= 0 || errno != ENOSYS) + return err; + /* ENOSYS, fallback */ + } + if (topology->binding_hooks.get_thisthread_last_cpu_location) return topology->binding_hooks.get_thisthread_last_cpu_location(topology, set, flags); } @@ -272,9 +284,13 @@ hwloc_set_membind_nodeset(hwloc_topology_t topology, hwloc_const_nodeset_t nodes if (topology->binding_hooks.set_thisthread_membind) return topology->binding_hooks.set_thisthread_membind(topology, nodeset, policy, flags); } else { - if (topology->binding_hooks.set_thisproc_membind) - return topology->binding_hooks.set_thisproc_membind(topology, nodeset, policy, flags); - else if (topology->binding_hooks.set_thisthread_membind) + if (topology->binding_hooks.set_thisproc_membind) { + int err = topology->binding_hooks.set_thisproc_membind(topology, nodeset, policy, flags); + if (err >= 0 || errno != ENOSYS) + return err; + /* ENOSYS, fallback */ + } + if (topology->binding_hooks.set_thisthread_membind) return topology->binding_hooks.set_thisthread_membind(topology, nodeset, policy, flags); } @@ -307,9 +323,13 @@ hwloc_get_membind_nodeset(hwloc_topology_t topology, hwloc_nodeset_t nodeset, hw if (topology->binding_hooks.get_thisthread_membind) return topology->binding_hooks.get_thisthread_membind(topology, nodeset, policy, flags); } else { - if (topology->binding_hooks.get_thisproc_membind) - return topology->binding_hooks.get_thisproc_membind(topology, nodeset, policy, flags); - else if (topology->binding_hooks.get_thisthread_membind) + if (topology->binding_hooks.get_thisproc_membind) { + int err = topology->binding_hooks.get_thisproc_membind(topology, nodeset, policy, flags); + if (err >= 0 || errno != ENOSYS) + return err; + /* ENOSYS, fallback */ + } + if (topology->binding_hooks.get_thisthread_membind) return topology->binding_hooks.get_thisthread_membind(topology, nodeset, policy, flags); } @@ -447,7 +467,7 @@ hwloc_get_area_membind(hwloc_topology_t topology, const void *addr, size_t len, void * hwloc_alloc_heap(hwloc_topology_t topology __hwloc_attribute_unused, size_t len) { - void *p; + void *p = NULL; #if defined(hwloc_getpagesize) && defined(HAVE_POSIX_MEMALIGN) errno = posix_memalign(&p, hwloc_getpagesize(), len); if (errno) diff --git a/opal/mca/hwloc/hwloc1110/hwloc/src/bitmap.c b/opal/mca/hwloc/hwloc1112/hwloc/src/bitmap.c similarity index 99% rename from opal/mca/hwloc/hwloc1110/hwloc/src/bitmap.c rename to opal/mca/hwloc/hwloc1112/hwloc/src/bitmap.c index 1e78a96bf34..d6b5c5ec5f2 100644 --- a/opal/mca/hwloc/hwloc1110/hwloc/src/bitmap.c +++ b/opal/mca/hwloc/hwloc1112/hwloc/src/bitmap.c @@ -241,7 +241,7 @@ int hwloc_bitmap_snprintf(char * __hwloc_restrict buf, size_t buflen, const stru return -1; ret += res; if (res >= size) - res = size>0 ? size - 1 : 0; + res = size>0 ? (int)size - 1 : 0; tmp += res; size -= res; } @@ -291,7 +291,7 @@ int hwloc_bitmap_snprintf(char * __hwloc_restrict buf, size_t buflen, const stru #endif if (res >= size) - res = size>0 ? size - 1 : 0; + res = size>0 ? (int)size - 1 : 0; tmp += res; size -= res; @@ -422,7 +422,7 @@ int hwloc_bitmap_list_snprintf(char * __hwloc_restrict buf, size_t buflen, const ret += res; if (res >= size) - res = size>0 ? size - 1 : 0; + res = size>0 ? (int)size - 1 : 0; tmp += res; size -= res; @@ -526,7 +526,7 @@ int hwloc_bitmap_taskset_snprintf(char * __hwloc_restrict buf, size_t buflen, co return -1; ret += res; if (res >= size) - res = size>0 ? size - 1 : 0; + res = size>0 ? (int)size - 1 : 0; tmp += res; size -= res; } @@ -562,7 +562,7 @@ int hwloc_bitmap_taskset_snprintf(char * __hwloc_restrict buf, size_t buflen, co return -1; ret += res; if (res >= size) - res = size>0 ? size - 1 : 0; + res = size>0 ? (int)size - 1 : 0; tmp += res; size -= res; } @@ -620,7 +620,7 @@ int hwloc_bitmap_taskset_sscanf(struct hwloc_bitmap_s *set, const char * __hwloc } /* we know there are other characters now */ - chars = strlen(current); + chars = (int)strlen(current); count = (chars * 4 + HWLOC_BITS_PER_LONG - 1) / HWLOC_BITS_PER_LONG; hwloc_bitmap_reset_by_ulongs(set, count); diff --git a/opal/mca/hwloc/hwloc1110/hwloc/src/components.c b/opal/mca/hwloc/hwloc1112/hwloc/src/components.c similarity index 99% rename from opal/mca/hwloc/hwloc1110/hwloc/src/components.c rename to opal/mca/hwloc/hwloc1112/hwloc/src/components.c index dc656e39853..aa0c8f93981 100644 --- a/opal/mca/hwloc/hwloc1110/hwloc/src/components.c +++ b/opal/mca/hwloc/hwloc1112/hwloc/src/components.c @@ -520,11 +520,11 @@ hwloc_disc_components_enable_others(struct hwloc_topology *topology) char c, d; /* replace libpci with pci for backward compatibility with v1.6 */ - if (!strncmp(curenv, "libpci", s)) { + if (!strncmp(curenv, "libpci", 6) && s == 6) { curenv[0] = curenv[1] = curenv[2] = *HWLOC_COMPONENT_SEPS; curenv += 3; s -= 3; - } else if (curenv[0] == HWLOC_COMPONENT_EXCLUDE_CHAR && !strncmp(curenv+1, "libpci", s-1)) { + } else if (curenv[0] == HWLOC_COMPONENT_EXCLUDE_CHAR && !strncmp(curenv+1, "libpci", 6) && s == 7 ) { curenv[3] = curenv[0]; curenv[0] = curenv[1] = curenv[2] = *HWLOC_COMPONENT_SEPS; curenv += 3; @@ -583,7 +583,7 @@ hwloc_disc_components_enable_others(struct hwloc_topology *topology) char *curenv = env; while (*curenv) { size_t s = strcspn(curenv, HWLOC_COMPONENT_SEPS); - if (curenv[0] == HWLOC_COMPONENT_EXCLUDE_CHAR && !strncmp(curenv+1, comp->name, s-1)) { + if (curenv[0] == HWLOC_COMPONENT_EXCLUDE_CHAR && !strncmp(curenv+1, comp->name, s-1) && strlen(comp->name) == s-1) { if (hwloc_components_verbose) fprintf(stderr, "Excluding %s discovery component `%s' because of HWLOC_COMPONENTS environment variable\n", hwloc_disc_component_type_string(comp->type), comp->name); diff --git a/opal/mca/hwloc/hwloc1110/hwloc/src/diff.c b/opal/mca/hwloc/hwloc1112/hwloc/src/diff.c similarity index 100% rename from opal/mca/hwloc/hwloc1110/hwloc/src/diff.c rename to opal/mca/hwloc/hwloc1112/hwloc/src/diff.c diff --git a/opal/mca/hwloc/hwloc1110/hwloc/src/distances.c b/opal/mca/hwloc/hwloc1112/hwloc/src/distances.c similarity index 100% rename from opal/mca/hwloc/hwloc1110/hwloc/src/distances.c rename to opal/mca/hwloc/hwloc1112/hwloc/src/distances.c diff --git a/opal/mca/hwloc/hwloc1110/hwloc/src/dolib.c b/opal/mca/hwloc/hwloc1112/hwloc/src/dolib.c similarity index 100% rename from opal/mca/hwloc/hwloc1110/hwloc/src/dolib.c rename to opal/mca/hwloc/hwloc1112/hwloc/src/dolib.c diff --git a/opal/mca/hwloc/hwloc1110/hwloc/src/hwloc.dtd b/opal/mca/hwloc/hwloc1112/hwloc/src/hwloc.dtd similarity index 100% rename from opal/mca/hwloc/hwloc1110/hwloc/src/hwloc.dtd rename to opal/mca/hwloc/hwloc1112/hwloc/src/hwloc.dtd diff --git a/opal/mca/hwloc/hwloc1110/hwloc/src/misc.c b/opal/mca/hwloc/hwloc1112/hwloc/src/misc.c similarity index 100% rename from opal/mca/hwloc/hwloc1110/hwloc/src/misc.c rename to opal/mca/hwloc/hwloc1112/hwloc/src/misc.c diff --git a/opal/mca/hwloc/hwloc1110/hwloc/src/pci-common.c b/opal/mca/hwloc/hwloc1112/hwloc/src/pci-common.c similarity index 93% rename from opal/mca/hwloc/hwloc1110/hwloc/src/pci-common.c rename to opal/mca/hwloc/hwloc1112/hwloc/src/pci-common.c index 01d4e79b664..367d3860440 100644 --- a/opal/mca/hwloc/hwloc1110/hwloc/src/pci-common.c +++ b/opal/mca/hwloc/hwloc1112/hwloc/src/pci-common.c @@ -6,6 +6,7 @@ #include #include #include +#include #include #ifdef HWLOC_DEBUG @@ -469,7 +470,7 @@ hwloc_pci_find_linkspeed(const unsigned char *config, * PCIe Gen2 = 5 GT/s signal-rate per lane with 8/10 encoding = 0.5 GB/s data-rate per lane * PCIe Gen3 = 8 GT/s signal-rate per lane with 128/130 encoding = 1 GB/s data-rate per lane */ - lanespeed = speed <= 2 ? 2.5 * speed * 0.8 : 8.0 * 128/130; /* Gbit/s per lane */ + lanespeed = speed <= 2 ? 2.5f * speed * 0.8f : 8.0f * 128/130; /* Gbit/s per lane */ *linkspeed = lanespeed * width / 8; /* GB/s */ return 0; } @@ -499,9 +500,14 @@ hwloc_pci_prepare_bridge(hwloc_obj_t obj, battr = &obj->attr->bridge; - if (config[HWLOC_PCI_PRIMARY_BUS] != pattr->bus) + if (config[HWLOC_PCI_PRIMARY_BUS] != pattr->bus) { + /* Sometimes the config space contains 00 instead of the actual primary bus number. + * Always trust the bus ID because it was built by the system which has more information + * to workaround such problems (e.g. ACPI information about PCI parent/children). + */ hwloc_debug(" %04x:%02x:%02x.%01x bridge with (ignored) invalid PCI_PRIMARY_BUS %02x\n", pattr->domain, pattr->bus, pattr->dev, pattr->func, config[HWLOC_PCI_PRIMARY_BUS]); + } obj->type = HWLOC_OBJ_BRIDGE; battr->upstream_type = HWLOC_OBJ_BRIDGE_PCI; @@ -510,5 +516,21 @@ hwloc_pci_prepare_bridge(hwloc_obj_t obj, battr->downstream.pci.secondary_bus = config[HWLOC_PCI_SECONDARY_BUS]; battr->downstream.pci.subordinate_bus = config[HWLOC_PCI_SUBORDINATE_BUS]; + if (battr->downstream.pci.secondary_bus <= pattr->bus + || battr->downstream.pci.subordinate_bus <= pattr->bus + || battr->downstream.pci.secondary_bus > battr->downstream.pci.subordinate_bus) { + /* This should catch most cases of invalid bridge information + * (e.g. 00 for secondary and subordinate). + * Ideally we would also check that [secondary-subordinate] is included + * in the parent bridge [secondary+1:subordinate]. But that's hard to do + * because objects may be discovered out of order (especially in the fsroot case). + */ + hwloc_debug(" %04x:%02x:%02x.%01x bridge has invalid secondary-subordinate buses [%02x-%02x]\n", + pattr->domain, pattr->bus, pattr->dev, pattr->func, + battr->downstream.pci.secondary_bus, battr->downstream.pci.subordinate_bus); + hwloc_free_unlinked_object(obj); + return -1; + } + return 0; } diff --git a/opal/mca/hwloc/hwloc1110/hwloc/src/topology-aix.c b/opal/mca/hwloc/hwloc1112/hwloc/src/topology-aix.c similarity index 99% rename from opal/mca/hwloc/hwloc1110/hwloc/src/topology-aix.c rename to opal/mca/hwloc/hwloc1112/hwloc/src/topology-aix.c index 1b98ba6b92b..37812f3ab81 100644 --- a/opal/mca/hwloc/hwloc1110/hwloc/src/topology-aix.c +++ b/opal/mca/hwloc/hwloc1112/hwloc/src/topology-aix.c @@ -585,7 +585,7 @@ hwloc_aix_alloc_membind(hwloc_topology_t topology, size_t len, hwloc_const_nodes ret = ra_mmap(NULL, len, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0, R_RSET, rsid, aix_policy); rs_free(rsid.at_rset); - return ret; + return ret == (void*)-1 ? NULL : ret; } #endif /* P_DEFAULT */ diff --git a/opal/mca/hwloc/hwloc1110/hwloc/src/topology-bgq.c b/opal/mca/hwloc/hwloc1112/hwloc/src/topology-bgq.c similarity index 98% rename from opal/mca/hwloc/hwloc1110/hwloc/src/topology-bgq.c rename to opal/mca/hwloc/hwloc1112/hwloc/src/topology-bgq.c index 1258b654208..f3aec626074 100644 --- a/opal/mca/hwloc/hwloc1110/hwloc/src/topology-bgq.c +++ b/opal/mca/hwloc/hwloc1112/hwloc/src/topology-bgq.c @@ -15,6 +15,8 @@ #include #include +#ifndef HWLOC_DISABLE_BGQ_PORT_TEST + static int hwloc_look_bgq(struct hwloc_backend *backend) { @@ -239,3 +241,5 @@ const struct hwloc_component hwloc_bgq_component = { 0, &hwloc_bgq_disc_component }; + +#endif /* !HWLOC_DISABLE_BGQ_PORT_TEST */ diff --git a/opal/mca/hwloc/hwloc1110/hwloc/src/topology-cuda.c b/opal/mca/hwloc/hwloc1112/hwloc/src/topology-cuda.c similarity index 100% rename from opal/mca/hwloc/hwloc1110/hwloc/src/topology-cuda.c rename to opal/mca/hwloc/hwloc1112/hwloc/src/topology-cuda.c diff --git a/opal/mca/hwloc/hwloc1110/hwloc/src/topology-custom.c b/opal/mca/hwloc/hwloc1112/hwloc/src/topology-custom.c similarity index 100% rename from opal/mca/hwloc/hwloc1110/hwloc/src/topology-custom.c rename to opal/mca/hwloc/hwloc1112/hwloc/src/topology-custom.c diff --git a/opal/mca/hwloc/hwloc1110/hwloc/src/topology-darwin.c b/opal/mca/hwloc/hwloc1112/hwloc/src/topology-darwin.c similarity index 100% rename from opal/mca/hwloc/hwloc1110/hwloc/src/topology-darwin.c rename to opal/mca/hwloc/hwloc1112/hwloc/src/topology-darwin.c diff --git a/opal/mca/hwloc/hwloc1110/hwloc/src/topology-fake.c b/opal/mca/hwloc/hwloc1112/hwloc/src/topology-fake.c similarity index 100% rename from opal/mca/hwloc/hwloc1110/hwloc/src/topology-fake.c rename to opal/mca/hwloc/hwloc1112/hwloc/src/topology-fake.c diff --git a/opal/mca/hwloc/hwloc1110/hwloc/src/topology-freebsd.c b/opal/mca/hwloc/hwloc1112/hwloc/src/topology-freebsd.c similarity index 100% rename from opal/mca/hwloc/hwloc1110/hwloc/src/topology-freebsd.c rename to opal/mca/hwloc/hwloc1112/hwloc/src/topology-freebsd.c diff --git a/opal/mca/hwloc/hwloc1110/hwloc/src/topology-gl.c b/opal/mca/hwloc/hwloc1112/hwloc/src/topology-gl.c similarity index 100% rename from opal/mca/hwloc/hwloc1110/hwloc/src/topology-gl.c rename to opal/mca/hwloc/hwloc1112/hwloc/src/topology-gl.c diff --git a/opal/mca/hwloc/hwloc1112/hwloc/src/topology-hardwired.c b/opal/mca/hwloc/hwloc1112/hwloc/src/topology-hardwired.c new file mode 100644 index 00000000000..03ffc83408c --- /dev/null +++ b/opal/mca/hwloc/hwloc1112/hwloc/src/topology-hardwired.c @@ -0,0 +1,191 @@ +/* + * Copyright © 2015 Inria. All rights reserved. + * See COPYING in top-level directory. + */ + +#include + +#include +#include + +int hwloc_look_hardwired_fujitsu_k(struct hwloc_topology *topology) +{ + /* FIXME: what if a broken core is disabled? */ + unsigned i; + hwloc_obj_t obj; + hwloc_bitmap_t set; + + for(i=0; i<8; i++) { + set = hwloc_bitmap_alloc(); + hwloc_bitmap_set(set, i); + + obj = hwloc_alloc_setup_object(HWLOC_OBJ_CACHE, -1); + obj->cpuset = hwloc_bitmap_dup(set); + obj->attr->cache.type = HWLOC_OBJ_CACHE_INSTRUCTION; + obj->attr->cache.depth = 1; + obj->attr->cache.size = 32*1024; + obj->attr->cache.linesize = 128; + obj->attr->cache.associativity = 2; + hwloc_insert_object_by_cpuset(topology, obj); + + obj = hwloc_alloc_setup_object(HWLOC_OBJ_CACHE, -1); + obj->cpuset = hwloc_bitmap_dup(set); + obj->attr->cache.type = HWLOC_OBJ_CACHE_DATA; + obj->attr->cache.depth = 1; + obj->attr->cache.size = 32*1024; + obj->attr->cache.linesize = 128; + obj->attr->cache.associativity = 2; + hwloc_insert_object_by_cpuset(topology, obj); + + obj = hwloc_alloc_setup_object(HWLOC_OBJ_CORE, i); + obj->cpuset = set; + hwloc_insert_object_by_cpuset(topology, obj); + } + + set = hwloc_bitmap_alloc(); + hwloc_bitmap_set_range(set, 0, 7); + + obj = hwloc_alloc_setup_object(HWLOC_OBJ_CACHE, -1); + obj->cpuset = hwloc_bitmap_dup(set); + obj->attr->cache.type = HWLOC_OBJ_CACHE_UNIFIED; + obj->attr->cache.depth = 2; + obj->attr->cache.size = 6*1024*1024; + obj->attr->cache.linesize = 128; + obj->attr->cache.associativity = 12; + hwloc_insert_object_by_cpuset(topology, obj); + + obj = hwloc_alloc_setup_object(HWLOC_OBJ_PACKAGE, 0); + obj->cpuset = set; + hwloc_obj_add_info(obj, "CPUVendor", "Fujitsu"); + hwloc_obj_add_info(obj, "CPUModel", "SPARC64 VIIIfx"); + hwloc_insert_object_by_cpuset(topology, obj); + + hwloc_setup_pu_level(topology, 8); + + return 0; +} + +int hwloc_look_hardwired_fujitsu_fx10(struct hwloc_topology *topology) +{ + /* FIXME: what if a broken core is disabled? */ + unsigned i; + hwloc_obj_t obj; + hwloc_bitmap_t set; + + for(i=0; i<16; i++) { + set = hwloc_bitmap_alloc(); + hwloc_bitmap_set(set, i); + + obj = hwloc_alloc_setup_object(HWLOC_OBJ_CACHE, -1); + obj->cpuset = hwloc_bitmap_dup(set); + obj->attr->cache.type = HWLOC_OBJ_CACHE_INSTRUCTION; + obj->attr->cache.depth = 1; + obj->attr->cache.size = 32*1024; + obj->attr->cache.linesize = 128; + obj->attr->cache.associativity = 2; + hwloc_insert_object_by_cpuset(topology, obj); + + obj = hwloc_alloc_setup_object(HWLOC_OBJ_CACHE, -1); + obj->cpuset = hwloc_bitmap_dup(set); + obj->attr->cache.type = HWLOC_OBJ_CACHE_DATA; + obj->attr->cache.depth = 1; + obj->attr->cache.size = 32*1024; + obj->attr->cache.linesize = 128; + obj->attr->cache.associativity = 2; + hwloc_insert_object_by_cpuset(topology, obj); + + obj = hwloc_alloc_setup_object(HWLOC_OBJ_CORE, i); + obj->cpuset = set; + hwloc_insert_object_by_cpuset(topology, obj); + } + + set = hwloc_bitmap_alloc(); + hwloc_bitmap_set_range(set, 0, 15); + + obj = hwloc_alloc_setup_object(HWLOC_OBJ_CACHE, -1); + obj->cpuset = hwloc_bitmap_dup(set); + obj->attr->cache.type = HWLOC_OBJ_CACHE_UNIFIED; + obj->attr->cache.depth = 2; + obj->attr->cache.size = 12*1024*1024; + obj->attr->cache.linesize = 128; + obj->attr->cache.associativity = 24; + hwloc_insert_object_by_cpuset(topology, obj); + + obj = hwloc_alloc_setup_object(HWLOC_OBJ_PACKAGE, 0); + obj->cpuset = set; + hwloc_obj_add_info(obj, "CPUVendor", "Fujitsu"); + hwloc_obj_add_info(obj, "CPUModel", "SPARC64 IXfx"); + hwloc_insert_object_by_cpuset(topology, obj); + + hwloc_setup_pu_level(topology, 16); + + return 0; +} + +int hwloc_look_hardwired_fujitsu_fx100(struct hwloc_topology *topology) +{ + /* FIXME: what if a broken core is disabled? */ + unsigned i; + hwloc_obj_t obj; + hwloc_bitmap_t set; + + for(i=0; i<34; i++) { + set = hwloc_bitmap_alloc(); + hwloc_bitmap_set(set, i); + + obj = hwloc_alloc_setup_object(HWLOC_OBJ_CACHE, -1); + obj->cpuset = hwloc_bitmap_dup(set); + obj->attr->cache.type = HWLOC_OBJ_CACHE_INSTRUCTION; + obj->attr->cache.depth = 1; + obj->attr->cache.size = 64*1024; + obj->attr->cache.linesize = 256; + obj->attr->cache.associativity = 4; + hwloc_insert_object_by_cpuset(topology, obj); + + obj = hwloc_alloc_setup_object(HWLOC_OBJ_CACHE, -1); + obj->cpuset = hwloc_bitmap_dup(set); + obj->attr->cache.type = HWLOC_OBJ_CACHE_DATA; + obj->attr->cache.depth = 1; + obj->attr->cache.size = 64*1024; + obj->attr->cache.linesize = 256; + obj->attr->cache.associativity = 4; + hwloc_insert_object_by_cpuset(topology, obj); + + obj = hwloc_alloc_setup_object(HWLOC_OBJ_CORE, i); + obj->cpuset = set; + hwloc_insert_object_by_cpuset(topology, obj); + } + + obj = hwloc_alloc_setup_object(HWLOC_OBJ_CACHE, -1); + obj->cpuset = hwloc_bitmap_alloc(); + hwloc_bitmap_set_range(obj->cpuset, 0, 15); + hwloc_bitmap_set(obj->cpuset, 32); + obj->attr->cache.type = HWLOC_OBJ_CACHE_UNIFIED; + obj->attr->cache.depth = 2; + obj->attr->cache.size = 12*1024*1024; + obj->attr->cache.linesize = 256; + obj->attr->cache.associativity = 24; + hwloc_insert_object_by_cpuset(topology, obj); + + obj = hwloc_alloc_setup_object(HWLOC_OBJ_CACHE, -1); + obj->cpuset = hwloc_bitmap_alloc(); + hwloc_bitmap_set_range(obj->cpuset, 16, 31); + hwloc_bitmap_set(obj->cpuset, 33); + obj->attr->cache.type = HWLOC_OBJ_CACHE_UNIFIED; + obj->attr->cache.depth = 2; + obj->attr->cache.size = 12*1024*1024; + obj->attr->cache.linesize = 256; + obj->attr->cache.associativity = 24; + hwloc_insert_object_by_cpuset(topology, obj); + + obj = hwloc_alloc_setup_object(HWLOC_OBJ_PACKAGE, 0); + obj->cpuset = hwloc_bitmap_alloc(); + hwloc_bitmap_set_range(obj->cpuset, 0, 33); + hwloc_obj_add_info(obj, "CPUVendor", "Fujitsu"); + hwloc_obj_add_info(obj, "CPUModel", "SPARC64 XIfx"); + hwloc_insert_object_by_cpuset(topology, obj); + + hwloc_setup_pu_level(topology, 34); + + return 0; +} diff --git a/opal/mca/hwloc/hwloc1110/hwloc/src/topology-hpux.c b/opal/mca/hwloc/hwloc1112/hwloc/src/topology-hpux.c similarity index 98% rename from opal/mca/hwloc/hwloc1110/hwloc/src/topology-hpux.c rename to opal/mca/hwloc/hwloc1112/hwloc/src/topology-hpux.c index 44a4a4c41aa..44258cc2a2a 100644 --- a/opal/mca/hwloc/hwloc1110/hwloc/src/topology-hpux.c +++ b/opal/mca/hwloc/hwloc1112/hwloc/src/topology-hpux.c @@ -142,6 +142,7 @@ static void* hwloc_hpux_alloc_membind(hwloc_topology_t topology, size_t len, hwloc_const_nodeset_t nodeset, hwloc_membind_policy_t policy, int flags) { int mmap_flags; + void *p; /* Can not give a set of nodes. */ if (!hwloc_bitmap_isequal(nodeset, hwloc_topology_get_complete_nodeset(topology))) { @@ -165,7 +166,8 @@ hwloc_hpux_alloc_membind(hwloc_topology_t topology, size_t len, hwloc_const_node return NULL; } - return mmap(NULL, len, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS | mmap_flags, -1, 0); + p = mmap(NULL, len, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS | mmap_flags, -1, 0); + return p == MAP_FAILED ? NULL : p; } #endif /* MAP_MEM_FIRST_TOUCH */ diff --git a/opal/mca/hwloc/hwloc1110/hwloc/src/topology-linux.c b/opal/mca/hwloc/hwloc1112/hwloc/src/topology-linux.c similarity index 91% rename from opal/mca/hwloc/hwloc1110/hwloc/src/topology-linux.c rename to opal/mca/hwloc/hwloc1112/hwloc/src/topology-linux.c index 2d9d4da9cd4..8e3a80af640 100644 --- a/opal/mca/hwloc/hwloc1110/hwloc/src/topology-linux.c +++ b/opal/mca/hwloc/hwloc1112/hwloc/src/topology-linux.c @@ -1,7 +1,7 @@ /* * Copyright © 2009 CNRS - * Copyright © 2009-2015 Inria. All rights reserved. - * Copyright © 2009-2013 Université Bordeaux + * Copyright © 2009-2016 Inria. All rights reserved. + * Copyright © 2009-2013, 2015 Université Bordeaux * Copyright © 2009-2014 Cisco Systems, Inc. All rights reserved. * Copyright © 2015 Intel, Inc. All rights reserved. * Copyright © 2010 IBM @@ -27,7 +27,7 @@ #ifdef HAVE_UNISTD_H #include #endif -#ifdef HAVE_LIBUDEV_H +#ifdef HWLOC_HAVE_LIBUDEV #include #endif #include @@ -36,18 +36,21 @@ #include #include #include +#include #if defined HWLOC_HAVE_SET_MEMPOLICY || defined HWLOC_HAVE_MBIND #define migratepages migrate_pages /* workaround broken migratepages prototype in numaif.h before libnuma 2.0.2 */ #include #endif struct hwloc_linux_backend_data_s { + char *root_path; /* NULL if unused */ int root_fd; /* The file descriptor for the file system root, used when browsing, e.g., Linux' sysfs and procfs. */ int is_real_fsroot; /* Boolean saying whether root_fd points to the real filesystem root of the system */ -#ifdef HAVE_LIBUDEV_H +#ifdef HWLOC_HAVE_LIBUDEV struct udev *udev; /* Global udev context */ #endif - + char *dumped_hwdata_dirname; + int is_knl; struct utsname utsname; /* fields contain \0 when unknown */ int deprecated_classlinks_model; /* -2 if never tried, -1 if unknown, 0 if new (device contains class/name), 1 if old (device contains class:name) */ @@ -61,8 +64,8 @@ struct hwloc_linux_backend_data_s { * Misc Abstraction layers * ***************************/ -#if !(defined HWLOC_HAVE_SCHED_SETAFFINITY) && (defined HWLOC_HAVE__SYSCALL3) -/* libc doesn't have support for sched_setaffinity, build system call +#if !(defined HWLOC_HAVE_SCHED_SETAFFINITY) && (defined HWLOC_HAVE_SYSCALL) +/* libc doesn't have support for sched_setaffinity, make system call * ourselves: */ # include # ifndef __NR_sched_setaffinity @@ -96,7 +99,7 @@ struct hwloc_linux_backend_data_s { # endif # endif # ifndef sched_setaffinity - _syscall3(int, sched_setaffinity, pid_t, pid, unsigned int, lg, const void *, mask) +# define sched_setaffinity(pid, lg, mask) syscall(__NR_sched_setaffinity, pid, lg, mask) # endif # ifndef __NR_sched_getaffinity # ifdef __i386__ @@ -129,7 +132,7 @@ struct hwloc_linux_backend_data_s { # endif # endif # ifndef sched_getaffinity - _syscall3(int, sched_getaffinity, pid_t, pid, unsigned int, lg, void *, mask) +# define sched_getaffinity(pid, lg, mask) (syscall(__NR_sched_getaffinity, pid, lg, mask) < 0 ? -1 : 0) # endif #endif @@ -347,7 +350,7 @@ hwloc_linux_set_tid_cpubind(hwloc_topology_t topology __hwloc_attribute_unused, #else /* HWLOC_HAVE_OLD_SCHED_SETAFFINITY */ return sched_setaffinity(tid, sizeof(linux_set), &linux_set); #endif /* HWLOC_HAVE_OLD_SCHED_SETAFFINITY */ -#elif defined(HWLOC_HAVE__SYSCALL3) +#elif defined(HWLOC_HAVE_SYSCALL) unsigned long mask = hwloc_bitmap_to_ulong(hwloc_set); #ifdef HWLOC_HAVE_OLD_SCHED_SETAFFINITY @@ -355,10 +358,10 @@ hwloc_linux_set_tid_cpubind(hwloc_topology_t topology __hwloc_attribute_unused, #else /* HWLOC_HAVE_OLD_SCHED_SETAFFINITY */ return sched_setaffinity(tid, sizeof(mask), (void*) &mask); #endif /* HWLOC_HAVE_OLD_SCHED_SETAFFINITY */ -#else /* !_SYSCALL3 */ +#else /* !SYSCALL */ errno = ENOSYS; return -1; -#endif /* !_SYSCALL3 */ +#endif /* !SYSCALL */ } #if defined(HWLOC_HAVE_CPU_SET_S) && !defined(HWLOC_HAVE_OLD_SCHED_SETAFFINITY) @@ -427,7 +430,7 @@ hwloc_linux_find_kernel_nr_cpus(hwloc_topology_t topology) /* start from scratch, the topology isn't ready yet (complete_cpuset is missing (-1) or empty (0))*/ nr_cpus = 1; - possible = fopen("/sys/devices/system/cpu/possible", "r"); + possible = fopen("/sys/devices/system/cpu/possible", "r"); /* binding only supported in real fsroot, no need for data->root_fd */ if (possible) { hwloc_bitmap_t possible_bitmap = hwloc_bitmap_alloc(); if (hwloc_linux_parse_cpuset_file(possible, possible_bitmap) == 0) { @@ -510,7 +513,7 @@ hwloc_linux_get_tid_cpubind(hwloc_topology_t topology __hwloc_attribute_unused, for(cpu=0; cpumnt_type, "cpuset")) { + hwloc_debug("Found cpuset mount point on %s\n", mntent->mnt_dir); + *cpuset_mntpnt = strdup(mntent->mnt_dir); break; - - } else if (!strncmp(type, "cgroup ", 7)) { + } else if (!strcmp(mntent->mnt_type, "cgroup")) { /* found a cgroup mntpnt */ - char *opt, *opts; + char *opt, *opts = mntent->mnt_opts; int cpuset_opt = 0; int noprefix_opt = 0; - - /* find options */ - tmp = strchr(type, ' '); - if (!tmp) - continue; - opts = tmp+1; - /* look at options */ while ((opt = strsep(&opts, ",")) != NULL) { if (!strcmp(opt, "cpuset")) @@ -1750,19 +1697,18 @@ hwloc_find_linux_cpuset_mntpnt(char **cgroup_mntpnt, char **cpuset_mntpnt, int f } if (!cpuset_opt) continue; - if (noprefix_opt) { - hwloc_debug("Found cgroup emulating a cpuset mount point on %s\n", path); - *cpuset_mntpnt = hwloc_strdup_mntpath(path, type-path); + hwloc_debug("Found cgroup emulating a cpuset mount point on %s\n", mntent->mnt_dir); + *cpuset_mntpnt = strdup(mntent->mnt_dir); } else { - hwloc_debug("Found cgroup/cpuset mount point on %s\n", path); - *cgroup_mntpnt = hwloc_strdup_mntpath(path, type-path); + hwloc_debug("Found cgroup/cpuset mount point on %s\n", mntent->mnt_dir); + *cgroup_mntpnt = strdup(mntent->mnt_dir); } break; } } - fclose(fd); + endmntent(fd); } /* @@ -2801,6 +2747,96 @@ look_powerpc_device_tree(struct hwloc_topology *topology, free(cpus.p); } +/* Try to add memory-side caches for KNL. + * Returns 0 on success and -1 otherwise */ +static int hwloc_linux_try_add_knl_mcdram_caches(hwloc_topology_t topology, struct hwloc_linux_backend_data_s *data, hwloc_obj_t *nodes, unsigned nbnodes) +{ + char *knl_cache_file; + long long int cache_size = -1; + int associativity = -1; + int inclusiveness = -1; + int line_size = -1; + unsigned i; + FILE *f; + char buffer[512] = {0}; + char *data_beg = NULL; + char *data_end = NULL; + + if (asprintf(&knl_cache_file, "%s/knl_memoryside_cache", data->dumped_hwdata_dirname) < 0) + return -1; + + hwloc_debug("Reading knl cache data from: %s\n", knl_cache_file); + f = hwloc_fopen(knl_cache_file, "r", data->root_fd); + if (!f) { + hwloc_debug("Unable to open KNL data file `%s' (%s)\n", knl_cache_file, strerror(errno)); + free(knl_cache_file); + return -1; + } + free(knl_cache_file); + + data_beg = &buffer[0]; + data_end = data_beg + fread(buffer, 1, sizeof(buffer), f); + + /* file must start with version information, only 1 accepted for now */ + if (strncmp("version: 1\n", data_beg, strlen("version: 1\n"))) { + fprintf(stderr, "Invalid knl_memoryside_cache header, expected \"version: 1\".\n"); + fclose(f); + return -1; + } + data_beg += strlen("version: 1\n"); + + while (data_beg < data_end) { + char *line_end = strstr(data_beg, "\n"); + if (!line_end) + break; + if (!strncmp("cache_size:", data_beg, strlen("cache_size"))) { + sscanf(data_beg, "cache_size: %lld", &cache_size); + hwloc_debug("read cache_size=%lld\n", cache_size); + } else if (!strncmp("line_size:", data_beg, strlen("line_size:"))) { + sscanf(data_beg, "line_size: %d", &line_size); + hwloc_debug("read line_size=%d\n", line_size); + } else if (!strncmp("inclusiveness:", data_beg, strlen("inclusiveness:"))) { + sscanf(data_beg, "inclusiveness: %d", &inclusiveness); + hwloc_debug("read inclusiveness=%d\n", inclusiveness); + } else if (!strncmp("associativity:", data_beg, strlen("associativity:"))) { + sscanf(data_beg, "associativity: %d\n", &associativity); + hwloc_debug("read associativity=%d\n", associativity); + } + data_beg += line_end - data_beg +1; + } + + fclose(f); + + if (line_size == -1 || cache_size == -1 || associativity == -1 || inclusiveness == -1) { + hwloc_debug("Incorrect file format line_size=%d cache_size=%lld associativity=%d inclusiveness=%d\n", + line_size, cache_size, associativity, inclusiveness); + return -1; + } + + for(i=0; icpuset)) + /* one L3 per DDR, none for MCDRAM nodes */ + continue; + + cache = hwloc_alloc_setup_object(HWLOC_OBJ_CACHE, -1); + if (!cache) + return -1; + + cache->attr->cache.depth = 3; + cache->attr->cache.type = HWLOC_OBJ_CACHE_UNIFIED; + cache->attr->cache.associativity = associativity; + hwloc_obj_add_info(cache, "Inclusive", inclusiveness ? "1" : "0"); + cache->attr->cache.size = cache_size; + cache->attr->cache.linesize = line_size; + cache->cpuset = hwloc_bitmap_dup(nodes[i]->cpuset); + hwloc_obj_add_info(cache, "Type", "MemorySideCache"); + hwloc_insert_object_by_cpuset(topology, cache); + } + return 0; +} + /************************************** @@ -2838,18 +2874,17 @@ look_sysfsnode(struct hwloc_topology *topology, else return -1; - if (nbnodes <= 1) - { - hwloc_bitmap_free(nodeset); - return 0; - } + if (!nbnodes || (nbnodes == 1 && !data->is_knl)) { /* always keep NUMA for KNL, or configs might look too different */ + hwloc_bitmap_free(nodeset); + return 0; + } /* For convenience, put these declarations inside a block. */ { hwloc_obj_t * nodes = calloc(nbnodes, sizeof(hwloc_obj_t)); unsigned *indexes = calloc(nbnodes, sizeof(unsigned)); - float * distances; + float * distances = NULL; int failednodes = 0; unsigned index_; @@ -2886,6 +2921,7 @@ look_sysfsnode(struct hwloc_topology *topology, char nodepath[SYSFS_NUMA_NODE_PATH_LEN]; hwloc_bitmap_t cpuset; hwloc_obj_t node, res_obj; + int annotate; osnode = indexes[index_]; @@ -2897,32 +2933,43 @@ look_sysfsnode(struct hwloc_topology *topology, continue; } - node = hwloc_alloc_setup_object(HWLOC_OBJ_NUMANODE, osnode); - node->cpuset = cpuset; - node->nodeset = hwloc_bitmap_alloc(); - hwloc_bitmap_set(node->nodeset, osnode); - + node = hwloc_get_numanode_obj_by_os_index(topology, osnode); + annotate = (node != NULL); + if (!annotate) { + /* create a new node */ + node = hwloc_alloc_setup_object(HWLOC_OBJ_NUMANODE, osnode); + node->cpuset = cpuset; + node->nodeset = hwloc_bitmap_alloc(); + hwloc_bitmap_set(node->nodeset, osnode); + } hwloc_sysfs_node_meminfo_info(topology, data, path, osnode, &node->memory); hwloc_debug_1arg_bitmap("os node %u has cpuset %s\n", osnode, node->cpuset); - res_obj = hwloc_insert_object_by_cpuset(topology, node); - if (node == res_obj) { + + if (annotate) { nodes[index_] = node; } else { - /* We got merged somehow, could be a buggy BIOS reporting wrong NUMA node cpuset. - * This object disappeared, we'll ignore distances */ - failednodes++; + res_obj = hwloc_insert_object_by_cpuset(topology, node); + if (node == res_obj) { + nodes[index_] = node; + } else { + /* We got merged somehow, could be a buggy BIOS reporting wrong NUMA node cpuset. + * This object disappeared, we'll ignore distances */ + failednodes++; + } } } + if (!failednodes && data->is_knl) + hwloc_linux_try_add_knl_mcdram_caches(topology, data, nodes, nbnodes); + if (failednodes) { /* failed to read/create some nodes, don't bother reading/fixing * a distance matrix that would likely be wrong anyway. */ nbnodes -= failednodes; - distances = NULL; - } else { + } else if (nbnodes > 1) { distances = calloc(nbnodes*nbnodes, sizeof(float)); } @@ -2944,6 +2991,46 @@ look_sysfsnode(struct hwloc_topology *topology, hwloc_parse_node_distance(nodepath, nbnodes, distances+index_*nbnodes, data->root_fd); } + if (data->is_knl) { + char *env = getenv("HWLOC_KNL_NUMA_QUIRK"); + if (!(env && !atoi(env)) && nbnodes>=2) { /* SNC2 or SNC4, with 0 or 2/4 MCDRAM, and 0-4 DDR nodes */ + unsigned i, j, closest; + for(i=0; icpuset)) + /* nodes with CPU, that's DDR, skip it */ + continue; + hwloc_obj_add_info(nodes[i], "Type", "MCDRAM"); + + /* DDR is the closest node with CPUs */ + closest = (unsigned)-1; + for(j=0; jcpuset)) + /* nodes without CPU, that's another MCDRAM, skip it */ + continue; + if (closest == (unsigned)-1 || distances[i*nbnodes+j]cpuset = hwloc_bitmap_dup(nodes[i]->cpuset); + cluster->nodeset = hwloc_bitmap_dup(nodes[i]->nodeset); + hwloc_bitmap_or(cluster->cpuset, cluster->cpuset, nodes[closest]->cpuset); + hwloc_bitmap_or(cluster->nodeset, cluster->nodeset, nodes[closest]->nodeset); + hwloc_obj_add_info(cluster, "Type", "Cluster"); + hwloc_insert_object_by_cpuset(topology, cluster); + } + } + /* drop the distance matrix, it contradicts the above NUMA layout groups */ + free(distances); + free(nodes); + free(indexes); + goto out; + } + } + hwloc_distances_set(topology, HWLOC_OBJ_NUMANODE, nbnodes, indexes, nodes, distances, 0 /* OS cannot force */); } @@ -3134,11 +3221,11 @@ look_sysfscpu(struct hwloc_topology *topology, hwloc_bitmap_set(core->cpuset, i); } else { core->cpuset = coreset; + coreset = NULL; /* don't free it */ } hwloc_debug_1arg_bitmap("os core %u has cpuset %s\n", - mycoreid, coreset); + mycoreid, core->cpuset); hwloc_insert_object_by_cpuset(topology, core); - coreset = NULL; /* don't free it */ } /* look at the books */ @@ -3224,6 +3311,11 @@ look_sysfscpu(struct hwloc_topology *topology, kB = atol(str2); /* in kB */ fclose(fd); } + /* KNL reports L3 with size=0 and full cpuset in cpuid. + * Let hwloc_linux_try_add_knl_mcdram_cache() detect it better. + */ + if (!kB && depth == 2 && data->is_knl) + continue; /* get the line size */ sprintf(mappath, "%s/cpu%d/cache/index%d/coherency_line_size", path, i, j); @@ -3459,6 +3551,7 @@ hwloc_linux_parse_cpuinfo_generic(const char *prefix, const char *value, return 0; } +/* Lprocs_p set to NULL unless returns > 0 */ static int hwloc_linux_parse_cpuinfo(struct hwloc_linux_backend_data_s *data, const char *path, @@ -3606,6 +3699,7 @@ hwloc_linux_parse_cpuinfo(struct hwloc_linux_backend_data_s *data, fclose(fd); free(str); free(Lprocs); + *Lprocs_p = NULL; return -1; } @@ -3625,18 +3719,13 @@ hwloc_linux_free_cpuinfo(struct hwloc_linux_cpuinfo_proc * Lprocs, unsigned nump static int look_cpuinfo(struct hwloc_topology *topology, - struct hwloc_linux_backend_data_s *data, - const char *path, hwloc_bitmap_t online_cpuset) + struct hwloc_linux_cpuinfo_proc * Lprocs, + unsigned numprocs, hwloc_bitmap_t online_cpuset) { - struct hwloc_linux_cpuinfo_proc * Lprocs = NULL; - struct hwloc_obj_info_s *global_infos = NULL; - unsigned global_infos_count = 0; /* P for physical/OS index, L for logical (e.g. in we order we get them, not in the final hwloc logical order) */ unsigned *Lcore_to_Pcore; unsigned *Lcore_to_Ppkg; /* needed because Lcore is equivalent to Pcore+Ppkg, not to Pcore alone */ unsigned *Lpkg_to_Ppkg; - int _numprocs; - unsigned numprocs; unsigned numpkgs=0; unsigned numcores=0; unsigned long Lproc; @@ -3645,20 +3734,6 @@ look_cpuinfo(struct hwloc_topology *topology, unsigned i,j; hwloc_bitmap_t cpuset; - /* parse the entire cpuinfo first, fill the Lprocs array and numprocs */ - _numprocs = hwloc_linux_parse_cpuinfo(data, path, &Lprocs, &global_infos, &global_infos_count); - - - /* setup root info */ - hwloc__move_infos(&hwloc_get_root_obj(topology)->infos, &hwloc_get_root_obj(topology)->infos_count, - &global_infos, &global_infos_count); - - - if (_numprocs <= 0) - /* found no processor */ - return -1; - numprocs = _numprocs; - /* initialize misc arrays, there can be at most numprocs entries */ Lcore_to_Pcore = malloc(numprocs * sizeof(*Lcore_to_Pcore)); Lcore_to_Ppkg = malloc(numprocs * sizeof(*Lcore_to_Ppkg)); @@ -3713,7 +3788,7 @@ look_cpuinfo(struct hwloc_topology *topology, * provide bogus information. We should rather drop it. */ missingpkg=0; for(j=0; jtopology; + struct hwloc_linux_backend_data_s *data = backend->private_data; + FILE *fd; + char line[128]; + + if (getenv("HWLOC_NO_HARDWIRED_TOPOLOGY")) + return -1; + + if (!strcmp(data->utsname.machine, "s64fx")) { + /* Fujistu K-computer, FX10, and FX100 use specific processors + * whose Linux topology support is broken until 4.1 (acc455cffa75070d55e74fc7802b49edbc080e92and) + * and existing machines will likely never be fixed by kernel upgrade. + */ + + /* /proc/cpuinfo starts with one of these lines: + * "cpu : Fujitsu SPARC64 VIIIfx" + * "cpu : Fujitsu SPARC64 XIfx" + * "cpu : Fujitsu SPARC64 IXfx" + */ + fd = hwloc_fopen("/proc/cpuinfo", "r", data->root_fd); + if (!fd) + return -1; + + if (!fgets(line, sizeof(line), fd)) { + fclose(fd); + return -1; + } + fclose(fd); + + if (strncmp(line, "cpu ", 4)) + return -1; + + if (strstr(line, "Fujitsu SPARC64 VIIIfx")) + return hwloc_look_hardwired_fujitsu_k(topology); + else if (strstr(line, "Fujitsu SPARC64 IXfx")) + return hwloc_look_hardwired_fujitsu_fx10(topology); + else if (strstr(line, "FUJITSU SPARC64 XIfx")) + return hwloc_look_hardwired_fujitsu_fx100(topology); + } + return -1; +} + static int hwloc_look_linuxfs(struct hwloc_backend *backend) { @@ -3906,18 +4023,59 @@ hwloc_look_linuxfs(struct hwloc_backend *backend) DIR *nodes_dir; unsigned nbnodes; char *cpuset_mntpnt, *cgroup_mntpnt, *cpuset_name = NULL; + struct hwloc_linux_cpuinfo_proc * Lprocs = NULL; + struct hwloc_obj_info_s *global_infos = NULL; + unsigned global_infos_count = 0; + int numprocs = 0; + int already_pus; int err; - if (topology->levels[0][0]->cpuset) - /* somebody discovered things */ - return 0; + already_pus = (topology->levels[0][0]->complete_cpuset != NULL + && !hwloc_bitmap_iszero(topology->levels[0][0]->complete_cpuset)); + /* if there are PUs, still look at memory information + * since x86 misses NUMA node information (unless the processor supports topoext) + * memory size. + */ + /* allocate root sets in case not done yet */ + hwloc_alloc_obj_cpusets(topology->levels[0][0]); + + /********************************* + * Platform information for later + */ hwloc_gather_system_info(topology, data); - hwloc_alloc_obj_cpusets(topology->levels[0][0]); + /********************** + * /proc/cpuinfo + */ + numprocs = hwloc_linux_parse_cpuinfo(data, "/proc/cpuinfo", &Lprocs, &global_infos, &global_infos_count); - /* Gather the list of admin-disabled cpus and mems */ - hwloc_find_linux_cpuset_mntpnt(&cgroup_mntpnt, &cpuset_mntpnt, data->root_fd); + /* detect models for quirks */ + if (numprocs > 0) { + /* KNL */ + if (!strncmp(data->utsname.machine, "x86", 3)) { /* supports 32bits? */ + unsigned i; + const char *cpuvendor = NULL, *cpufamilynumber = NULL, *cpumodelnumber = NULL; + for(i=0; iis_knl = 1; + } + } + + /********************** + * Gather the list of admin-disabled cpus and mems + */ + hwloc_find_linux_cpuset_mntpnt(&cgroup_mntpnt, &cpuset_mntpnt, data->root_path); if (cgroup_mntpnt || cpuset_mntpnt) { cpuset_name = hwloc_read_linux_cpuset_name(data->root_fd, topology->pid); if (cpuset_name) { @@ -3936,6 +4094,10 @@ hwloc_look_linuxfs(struct hwloc_backend *backend) hwloc_obj_t machine; hwloc_bitmap_t machine_online_set; + if (already_pus) + /* we don't support extending kerrighed topologies */ + return 0; + /* replace top-level object type with SYSTEM and add some MACHINE underneath */ topology->levels[0][0]->type = HWLOC_OBJ_SYSTEM; @@ -3944,13 +4106,19 @@ hwloc_look_linuxfs(struct hwloc_backend *backend) /* No cpuset support for now. */ /* No sys support for now. */ while ((dirent = readdir(nodes_dir)) != NULL) { + struct hwloc_linux_cpuinfo_proc * machine_Lprocs = NULL; + struct hwloc_obj_info_s *machine_global_infos = NULL; + unsigned machine_global_infos_count = 0; + int machine_numprocs = 0; unsigned long node; if (strncmp(dirent->d_name, "node", 4)) continue; machine_online_set = hwloc_bitmap_alloc(); node = strtoul(dirent->d_name+4, NULL, 0); snprintf(path, sizeof(path), "/proc/nodes/node%lu/cpuinfo", node); - err = look_cpuinfo(topology, data, path, machine_online_set); + machine_numprocs = hwloc_linux_parse_cpuinfo(data, path, &machine_Lprocs, &machine_global_infos, &machine_global_infos_count); + err = look_cpuinfo(topology, machine_Lprocs, machine_numprocs, machine_online_set); + hwloc_linux_free_cpuinfo(machine_Lprocs, machine_numprocs, machine_global_infos, machine_global_infos_count); if (err < 0) { hwloc_bitmap_free(machine_online_set); continue; @@ -3972,6 +4140,10 @@ hwloc_look_linuxfs(struct hwloc_backend *backend) } closedir(nodes_dir); } else { + /********************* + * Memory information + */ + /* Get the machine memory attributes */ hwloc_get_procfs_meminfo_info(topology, data, &topology->levels[0][0]->memory); @@ -3988,7 +4160,23 @@ hwloc_look_linuxfs(struct hwloc_backend *backend) topology->levels[0][0]->memory.page_types[i].count = 0; } + /********************** + * CPU information + */ + + /* Don't rediscover CPU resources if already done */ + if (already_pus) + goto done; + /* Gather the list of cpus now */ + err = hwloc_linux_try_hardwired_cpuinfo(backend); + if (!err) + goto done; + + /* setup root info */ + hwloc__move_infos(&hwloc_get_root_obj(topology)->infos, &hwloc_get_root_obj(topology)->infos_count, + &global_infos, &global_infos_count); + if (getenv("HWLOC_LINUX_USE_CPUINFO") || (hwloc_access("/sys/devices/system/cpu/cpu0/topology/core_siblings", R_OK, data->root_fd) < 0 && hwloc_access("/sys/devices/system/cpu/cpu0/topology/thread_siblings", R_OK, data->root_fd) < 0 @@ -3996,26 +4184,28 @@ hwloc_look_linuxfs(struct hwloc_backend *backend) && hwloc_access("/sys/bus/cpu/devices/cpu0/topology/core_siblings", R_OK, data->root_fd) < 0)) { /* revert to reading cpuinfo only if /sys/.../topology unavailable (before 2.6.16) * or not containing anything interesting */ - err = look_cpuinfo(topology, data, "/proc/cpuinfo", topology->levels[0][0]->online_cpuset); + if (numprocs > 0) + err = look_cpuinfo(topology, Lprocs, numprocs, topology->levels[0][0]->online_cpuset); + else + err = -1; if (err < 0) hwloc_linux_fallback_pu_level(topology); + look_powerpc_device_tree(topology, data); } else { - struct hwloc_linux_cpuinfo_proc * Lprocs = NULL; - struct hwloc_obj_info_s *global_infos = NULL; - unsigned global_infos_count = 0; - int numprocs = hwloc_linux_parse_cpuinfo(data, "/proc/cpuinfo", &Lprocs, &global_infos, &global_infos_count); - if (numprocs <= 0) - Lprocs = NULL; + /* sysfs */ if (look_sysfscpu(topology, data, "/sys/bus/cpu/devices", Lprocs, numprocs) < 0) if (look_sysfscpu(topology, data, "/sys/devices/system/cpu", Lprocs, numprocs) < 0) /* sysfs but we failed to read cpu topology, fallback */ hwloc_linux_fallback_pu_level(topology); - hwloc__move_infos(&hwloc_get_root_obj(topology)->infos, &hwloc_get_root_obj(topology)->infos_count, - &global_infos, &global_infos_count); - hwloc_linux_free_cpuinfo(Lprocs, numprocs, global_infos, global_infos_count); } + done: + + /********************** + * Misc + */ + /* Gather DMI info */ hwloc__get_dmi_id_info(data, topology->levels[0][0]); if (hwloc_topology_get_flags(topology) & (HWLOC_TOPOLOGY_FLAG_IO_DEVICES|HWLOC_TOPOLOGY_FLAG_WHOLE_IO)) @@ -4033,6 +4223,7 @@ hwloc_look_linuxfs(struct hwloc_backend *backend) /* data->utsname was filled with real uname or \0, we can safely pass it */ hwloc_add_uname_info(topology, &data->utsname); + hwloc_linux_free_cpuinfo(Lprocs, numprocs, global_infos, global_infos_count); return 1; } @@ -4405,7 +4596,7 @@ hwloc_linux_block_class_fillinfos(struct hwloc_backend *backend, *tmp = '\0'; hwloc_obj_add_info(obj, "LinuxDeviceID", line); -#ifdef HAVE_LIBUDEV_H +#ifdef HWLOC_HAVE_LIBUDEV if (data->udev) { struct udev_device *dev; const char *prop; @@ -4413,20 +4604,30 @@ hwloc_linux_block_class_fillinfos(struct hwloc_backend *backend, if (!dev) return; prop = udev_device_get_property_value(dev, "ID_VENDOR"); - if (prop) - strcpy(vendor, prop); + if (prop) { + strncpy(vendor, prop, sizeof(vendor)); + vendor[sizeof(vendor)-1] = '\0'; + } prop = udev_device_get_property_value(dev, "ID_MODEL"); - if (prop) - strcpy(model, prop); + if (prop) { + strncpy(model, prop, sizeof(model)); + model[sizeof(model)-1] = '\0'; + } prop = udev_device_get_property_value(dev, "ID_REVISION"); - if (prop) - strcpy(revision, prop); + if (prop) { + strncpy(revision, prop, sizeof(revision)); + revision[sizeof(revision)-1] = '\0'; + } prop = udev_device_get_property_value(dev, "ID_SERIAL_SHORT"); - if (prop) - strcpy(serial, prop); + if (prop) { + strncpy(serial, prop, sizeof(serial)); + serial[sizeof(serial)-1] = '\0'; + } prop = udev_device_get_property_value(dev, "ID_TYPE"); - if (prop) - strcpy(blocktype, prop); + if (prop) { + strncpy(blocktype, prop, sizeof(blocktype)); + blocktype[sizeof(blocktype)-1] = '\0'; + } udev_device_unref(dev); } else @@ -4443,15 +4644,20 @@ hwloc_linux_block_class_fillinfos(struct hwloc_backend *backend, if (tmp) *tmp = '\0'; if (!strncmp(line, "E:ID_VENDOR=", strlen("E:ID_VENDOR="))) { - strcpy(vendor, line+strlen("E:ID_VENDOR=")); + strncpy(vendor, line+strlen("E:ID_VENDOR="), sizeof(vendor)); + vendor[sizeof(vendor)-1] = '\0'; } else if (!strncmp(line, "E:ID_MODEL=", strlen("E:ID_MODEL="))) { - strcpy(model, line+strlen("E:ID_MODEL=")); + strncpy(model, line+strlen("E:ID_MODEL="), sizeof(model)); + model[sizeof(model)-1] = '\0'; } else if (!strncmp(line, "E:ID_REVISION=", strlen("E:ID_REVISION="))) { - strcpy(revision, line+strlen("E:ID_REVISION=")); + strncpy(revision, line+strlen("E:ID_REVISION="), sizeof(revision)); + revision[sizeof(revision)-1] = '\0'; } else if (!strncmp(line, "E:ID_SERIAL_SHORT=", strlen("E:ID_SERIAL_SHORT="))) { - strcpy(serial, line+strlen("E:ID_SERIAL_SHORT=")); + strncpy(serial, line+strlen("E:ID_SERIAL_SHORT="), sizeof(serial)); + serial[sizeof(serial)-1] = '\0'; } else if (!strncmp(line, "E:ID_TYPE=", strlen("E:ID_TYPE="))) { - strcpy(blocktype, line+strlen("E:ID_TYPE=")); + strncpy(blocktype, line+strlen("E:ID_TYPE="), sizeof(blocktype)); + blocktype[sizeof(blocktype)-1] = '\0'; } } fclose(fd); @@ -4592,6 +4798,14 @@ hwloc_linux_lookup_block_class(struct hwloc_backend *backend, strcpy(path, pcidevpath); pathlen = strlen(path); + /* look for a direct block device here (such as NVMe, something without controller subdirs in the middle) */ + res += hwloc_linux_class_readdir(backend, pcidev, path, + HWLOC_OBJ_OSDEV_BLOCK, "block", + hwloc_linux_block_class_fillinfos); + if (res) + return res; + /* otherwise try to find controller subdirectories */ + devicedir = hwloc_opendir(pcidevpath, root_fd); if (!devicedir) return 0; @@ -4772,7 +4986,7 @@ hwloc_linux_directlookup_mic_class(struct hwloc_backend *backend, /* read the entire class and find the max id of mic%u dirents */ dir = hwloc_opendir("/sys/devices/virtual/mic", root_fd); if (!dir) { - dir = opendir("/sys/class/mic"); + dir = hwloc_opendir("/sys/class/mic", root_fd); if (!dir) return 0; } @@ -4888,9 +5102,11 @@ hwloc_linux_backend_disable(struct hwloc_backend *backend) { struct hwloc_linux_backend_data_s *data = backend->private_data; #ifdef HAVE_OPENAT + if (data->root_path) + free(data->root_path); close(data->root_fd); #endif -#ifdef HAVE_LIBUDEV_H +#ifdef HWLOC_HAVE_LIBUDEV if (data->udev) udev_unref(data->udev); #endif @@ -4919,13 +5135,16 @@ hwloc_linux_component_instantiate(struct hwloc_disc_component *component, } backend->private_data = data; + backend->flags = HWLOC_BACKEND_FLAG_NEED_LEVELS; backend->discover = hwloc_look_linuxfs; backend->get_obj_cpuset = hwloc_linux_backend_get_obj_cpuset; backend->notify_new_object = hwloc_linux_backend_notify_new_object; backend->disable = hwloc_linux_backend_disable; /* default values */ + data->is_knl = 0; data->is_real_fsroot = 1; + data->root_path = NULL; if (!fsroot_path) fsroot_path = "/"; @@ -4937,6 +5156,7 @@ hwloc_linux_component_instantiate(struct hwloc_disc_component *component, if (strcmp(fsroot_path, "/")) { backend->is_thissystem = 0; data->is_real_fsroot = 0; + data->root_path = strdup(fsroot_path); } /* Since this fd stays open after hwloc returns, mark it as @@ -4957,13 +5177,17 @@ hwloc_linux_component_instantiate(struct hwloc_disc_component *component, #endif data->root_fd = root; -#ifdef HAVE_LIBUDEV_H +#ifdef HWLOC_HAVE_LIBUDEV data->udev = NULL; if (data->is_real_fsroot) { data->udev = udev_new(); } #endif + data->dumped_hwdata_dirname = getenv("HWLOC_DUMPED_HWDATA_DIR"); + if (!data->dumped_hwdata_dirname) + data->dumped_hwdata_dirname = "/var/run/hwloc/"; + data->deprecated_classlinks_model = -2; /* never tried */ data->mic_need_directlookup = -1; /* not initialized */ data->mic_directlookup_id_max = -1; /* not initialized */ @@ -4971,6 +5195,10 @@ hwloc_linux_component_instantiate(struct hwloc_disc_component *component, return backend; out_with_data: +#ifdef HAVE_OPENAT + if (data->root_path) + free(data->root_path); +#endif free(data); out_with_backend: free(backend); @@ -5134,7 +5362,8 @@ hwloc_look_linuxfs_pci(struct hwloc_backend *backend) fclose(file); /* is this a bridge? */ - hwloc_pci_prepare_bridge(obj, config_space_cache); + if (hwloc_pci_prepare_bridge(obj, config_space_cache) < 0) + continue; /* get the revision */ attr->revision = config_space_cache[HWLOC_PCI_REVISION_ID]; @@ -5170,10 +5399,8 @@ hwloc_look_linuxfs_pci(struct hwloc_backend *backend) while (obj) { if (obj->attr->pcidev.domain == domain && obj->attr->pcidev.bus == bus - && obj->attr->pcidev.dev == dev - && obj->attr->pcidev.func == 0) { + && obj->attr->pcidev.dev == dev) { hwloc_obj_add_info(obj, "PCISlot", dirent->d_name); - break; } obj = obj->next_sibling; } diff --git a/opal/mca/hwloc/hwloc1110/hwloc/src/topology-netbsd.c b/opal/mca/hwloc/hwloc1112/hwloc/src/topology-netbsd.c similarity index 100% rename from opal/mca/hwloc/hwloc1110/hwloc/src/topology-netbsd.c rename to opal/mca/hwloc/hwloc1112/hwloc/src/topology-netbsd.c diff --git a/opal/mca/hwloc/hwloc1110/hwloc/src/topology-noos.c b/opal/mca/hwloc/hwloc1112/hwloc/src/topology-noos.c similarity index 100% rename from opal/mca/hwloc/hwloc1110/hwloc/src/topology-noos.c rename to opal/mca/hwloc/hwloc1112/hwloc/src/topology-noos.c diff --git a/opal/mca/hwloc/hwloc1110/hwloc/src/topology-nvml.c b/opal/mca/hwloc/hwloc1112/hwloc/src/topology-nvml.c similarity index 100% rename from opal/mca/hwloc/hwloc1110/hwloc/src/topology-nvml.c rename to opal/mca/hwloc/hwloc1112/hwloc/src/topology-nvml.c diff --git a/opal/mca/hwloc/hwloc1110/hwloc/src/topology-opencl.c b/opal/mca/hwloc/hwloc1112/hwloc/src/topology-opencl.c similarity index 100% rename from opal/mca/hwloc/hwloc1110/hwloc/src/topology-opencl.c rename to opal/mca/hwloc/hwloc1112/hwloc/src/topology-opencl.c diff --git a/opal/mca/hwloc/hwloc1110/hwloc/src/topology-osf.c b/opal/mca/hwloc/hwloc1112/hwloc/src/topology-osf.c similarity index 99% rename from opal/mca/hwloc/hwloc1110/hwloc/src/topology-osf.c rename to opal/mca/hwloc/hwloc1112/hwloc/src/topology-osf.c index 57158883d67..b403d1343fc 100644 --- a/opal/mca/hwloc/hwloc1110/hwloc/src/topology-osf.c +++ b/opal/mca/hwloc/hwloc1112/hwloc/src/topology-osf.c @@ -1,6 +1,6 @@ /* * Copyright © 2009 CNRS - * Copyright © 2009-2014 Inria. All rights reserved. + * Copyright © 2009-2015 Inria. All rights reserved. * Copyright © 2009-2011 Université Bordeaux * Copyright © 2011 Cisco Systems, Inc. All rights reserved. * See COPYING in top-level directory. @@ -234,7 +234,7 @@ hwloc_osf_alloc_membind(hwloc_topology_t topology, size_t len, hwloc_const_nodes ptr = nmmap(NULL, len, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0, &mattr); radsetdestroy(&mattr.mattr_radset); - return ptr; + return ptr == MAP_FAILED ? NULL : ptr; } static int diff --git a/opal/mca/hwloc/hwloc1110/hwloc/src/topology-pci.c b/opal/mca/hwloc/hwloc1112/hwloc/src/topology-pci.c similarity index 95% rename from opal/mca/hwloc/hwloc1110/hwloc/src/topology-pci.c rename to opal/mca/hwloc/hwloc1112/hwloc/src/topology-pci.c index 3a3ad6bb558..779bf17ba8f 100644 --- a/opal/mca/hwloc/hwloc1110/hwloc/src/topology-pci.c +++ b/opal/mca/hwloc/hwloc1112/hwloc/src/topology-pci.c @@ -1,8 +1,10 @@ /* * Copyright © 2009 CNRS - * Copyright © 2009-2014 Inria. All rights reserved. + * Copyright © 2009-2015 Inria. All rights reserved. * Copyright © 2009-2011, 2013 Université Bordeaux * Copyright © 2014 Cisco Systems, Inc. All rights reserved. + * Copyright © 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * See COPYING in top-level directory. */ @@ -206,7 +208,8 @@ hwloc_look_pci(struct hwloc_backend *backend) if (offset > 0 && offset + 20 /* size of PCI express block up to link status */ <= CONFIG_SPACE_CACHESIZE) hwloc_pci_find_linkspeed(config_space_cache, offset, &obj->attr->pcidev.linkspeed); - hwloc_pci_prepare_bridge(obj, config_space_cache); + if (hwloc_pci_prepare_bridge(obj, config_space_cache) < 0) + continue; if (obj->type == HWLOC_OBJ_PCI_DEVICE) { memcpy(&tmp16, &config_space_cache[PCI_SUBSYSTEM_VENDOR_ID], sizeof(tmp16)); @@ -273,10 +276,8 @@ hwloc_look_pci(struct hwloc_backend *backend) while (obj) { if (obj->attr->pcidev.domain == domain && obj->attr->pcidev.bus == bus - && obj->attr->pcidev.dev == dev - && obj->attr->pcidev.func == 0) { + && obj->attr->pcidev.dev == dev) { hwloc_obj_add_info(obj, "PCISlot", dirent->d_name); - break; } obj = obj->next_sibling; } @@ -305,7 +306,12 @@ hwloc_pci_component_instantiate(struct hwloc_disc_component *component, if (!backend) return NULL; backend->flags = HWLOC_BACKEND_FLAG_NEED_LEVELS; - backend->discover = hwloc_look_pci; +#ifdef HWLOC_SOLARIS_SYS + if ((uid_t)0 != geteuid()) + backend->discover = NULL; + else +#endif + backend->discover = hwloc_look_pci; return backend; } diff --git a/opal/mca/hwloc/hwloc1110/hwloc/src/topology-solaris-chiptype.c b/opal/mca/hwloc/hwloc1112/hwloc/src/topology-solaris-chiptype.c similarity index 100% rename from opal/mca/hwloc/hwloc1110/hwloc/src/topology-solaris-chiptype.c rename to opal/mca/hwloc/hwloc1112/hwloc/src/topology-solaris-chiptype.c diff --git a/opal/mca/hwloc/hwloc1110/hwloc/src/topology-solaris.c b/opal/mca/hwloc/hwloc1112/hwloc/src/topology-solaris.c similarity index 99% rename from opal/mca/hwloc/hwloc1110/hwloc/src/topology-solaris.c rename to opal/mca/hwloc/hwloc1112/hwloc/src/topology-solaris.c index cd0e5aa339f..255c5fca4d6 100644 --- a/opal/mca/hwloc/hwloc1110/hwloc/src/topology-solaris.c +++ b/opal/mca/hwloc/hwloc1112/hwloc/src/topology-solaris.c @@ -732,7 +732,6 @@ hwloc_look_solaris(struct hwloc_backend *backend) hwloc_look_lgrp(topology); #endif /* HAVE_LIBLGRP */ #ifdef HAVE_LIBKSTAT - nbprocs = 0; if (hwloc_look_kstat(topology) > 0) alreadypus = 1; #endif /* HAVE_LIBKSTAT */ diff --git a/opal/mca/hwloc/hwloc1110/hwloc/src/topology-synthetic.c b/opal/mca/hwloc/hwloc1112/hwloc/src/topology-synthetic.c similarity index 95% rename from opal/mca/hwloc/hwloc1110/hwloc/src/topology-synthetic.c rename to opal/mca/hwloc/hwloc1112/hwloc/src/topology-synthetic.c index db7087bb923..5e7a4260470 100644 --- a/opal/mca/hwloc/hwloc1110/hwloc/src/topology-synthetic.c +++ b/opal/mca/hwloc/hwloc1112/hwloc/src/topology-synthetic.c @@ -60,7 +60,7 @@ hwloc_synthetic_process_level_indexes(struct hwloc_synthetic_backend_data_s *dat unsigned long length = curlevel->index_string_length; unsigned *array = NULL; struct hwloc_synthetic_intlv_loop_s * loops = NULL; - unsigned long i; + size_t i; if (!attr) return; @@ -198,7 +198,7 @@ hwloc_synthetic_process_level_indexes(struct hwloc_synthetic_backend_data_s *dat && cachetypeattr != (hwloc_obj_cache_type_t) -1 && cachetypeattr != data->level[i].cachetype) continue; - loops[cur_loop].level_depth = i; + loops[cur_loop].level_depth = (unsigned)i; break; } if (i == curleveldepth) { @@ -326,7 +326,7 @@ hwloc_synthetic_parse_level_attrs(const char *attrs, const char **next_posp, const char *next_pos; hwloc_uint64_t memorysize = 0; const char *index_string = NULL; - unsigned long index_string_length = 0; + size_t index_string_length = 0; next_pos = (const char *) strchr(attrs, ')'); if (!next_pos) { @@ -368,7 +368,7 @@ hwloc_synthetic_parse_level_attrs(const char *attrs, const char **next_posp, curlevel->memorysize = memorysize; curlevel->index_string = index_string; - curlevel->index_string_length = index_string_length; + curlevel->index_string_length = (unsigned long)index_string_length; *next_posp = next_pos+1; return 0; } @@ -428,7 +428,7 @@ hwloc_backend_synthetic_init(struct hwloc_synthetic_backend_data_s *data, errno = EINVAL; goto error; } - if (type == HWLOC_OBJ_MISC || type == HWLOC_OBJ_BRIDGE || type == HWLOC_OBJ_PCI_DEVICE || type == HWLOC_OBJ_OS_DEVICE) { + if (type == HWLOC_OBJ_SYSTEM || type == HWLOC_OBJ_MISC || type == HWLOC_OBJ_BRIDGE || type == HWLOC_OBJ_PCI_DEVICE || type == HWLOC_OBJ_OS_DEVICE) { if (verbose) fprintf(stderr, "Synthetic string with disallowed object type at '%s'\n", pos); errno = EINVAL; @@ -455,6 +455,12 @@ hwloc_backend_synthetic_init(struct hwloc_synthetic_backend_data_s *data, errno = EINVAL; goto error; } + if (!item) { + if (verbose) + fprintf(stderr,"Synthetic string with disallow 0 number of objects at '%s'\n", pos); + errno = EINVAL; + goto error; + } data->level[count-1].arity = (unsigned)item; totalarity *= item; @@ -497,6 +503,19 @@ hwloc_backend_synthetic_init(struct hwloc_synthetic_backend_data_s *data, type = curlevel->type; + if (i == count-1 && type != HWLOC_OBJ_TYPE_UNKNOWN && type != HWLOC_OBJ_PU) { + if (verbose) + fprintf(stderr, "Synthetic string cannot use non-PU type for last level\n"); + errno = EINVAL; + return -1; + } + if (i != count-1 && type == HWLOC_OBJ_PU) { + if (verbose) + fprintf(stderr, "Synthetic string cannot use PU type for non-last level\n"); + errno = EINVAL; + return -1; + } + if (type == HWLOC_OBJ_TYPE_UNKNOWN) { if (i == count-1) type = HWLOC_OBJ_PU; @@ -908,7 +927,7 @@ static int hwloc_topology_export_synthetic_indexes(struct hwloc_topology * topol } ret += res; if (res >= tmplen) - res = tmplen>0 ? tmplen - 1 : 0; + res = tmplen>0 ? (int)tmplen - 1 : 0; tmp += res; tmplen -= res; } @@ -931,7 +950,7 @@ static int hwloc_topology_export_synthetic_indexes(struct hwloc_topology * topol return -1; ret += res; if (res >= tmplen) - res = tmplen>0 ? tmplen - 1 : 0; + res = tmplen>0 ? (int)tmplen - 1 : 0; tmp += res; tmplen -= res; cur = cur->next_cousin; @@ -979,7 +998,7 @@ static int hwloc_topology_export_synthetic_obj_attr(struct hwloc_topology * topo return -1; ret += res; if (res >= tmplen) - res = tmplen>0 ? tmplen - 1 : 0; + res = tmplen>0 ? (int)tmplen - 1 : 0; tmp += res; tmplen -= res; @@ -989,7 +1008,7 @@ static int hwloc_topology_export_synthetic_obj_attr(struct hwloc_topology * topo return -1; ret += res; if (res >= tmplen) - res = tmplen>0 ? tmplen - 1 : 0; + res = tmplen>0 ? (int)tmplen - 1 : 0; tmp += res; tmplen -= res; @@ -998,7 +1017,7 @@ static int hwloc_topology_export_synthetic_obj_attr(struct hwloc_topology * topo return -1; ret += res; if (res >= tmplen) - res = tmplen>0 ? tmplen - 1 : 0; + res = tmplen>0 ? (int)tmplen - 1 : 0; tmp += res; tmplen -= res; } @@ -1053,7 +1072,7 @@ hwloc_topology_export_synthetic(struct hwloc_topology * topology, if (ret > 0) prefix = separator; if (res >= tmplen) - res = tmplen>0 ? tmplen - 1 : 0; + res = tmplen>0 ? (int)tmplen - 1 : 0; tmp += res; tmplen -= res; } @@ -1073,7 +1092,7 @@ hwloc_topology_export_synthetic(struct hwloc_topology * topology, return -1; ret += res; if (res >= tmplen) - res = tmplen>0 ? tmplen - 1 : 0; + res = tmplen>0 ? (int)tmplen - 1 : 0; tmp += res; tmplen -= res; @@ -1084,7 +1103,7 @@ hwloc_topology_export_synthetic(struct hwloc_topology * topology, return -1; ret += res; if (res >= tmplen) - res = tmplen>0 ? tmplen - 1 : 0; + res = tmplen>0 ? (int)tmplen - 1 : 0; tmp += res; tmplen -= res; } diff --git a/opal/mca/hwloc/hwloc1112/hwloc/src/topology-windows.c b/opal/mca/hwloc/hwloc1112/hwloc/src/topology-windows.c new file mode 100644 index 00000000000..83b54921562 --- /dev/null +++ b/opal/mca/hwloc/hwloc1112/hwloc/src/topology-windows.c @@ -0,0 +1,1131 @@ +/* + * Copyright © 2009 CNRS + * Copyright © 2009-2015 Inria. All rights reserved. + * Copyright © 2009-2012 Université Bordeaux + * Copyright © 2011 Cisco Systems, Inc. All rights reserved. + * See COPYING in top-level directory. + */ + +/* To try to get all declarations duplicated below. */ +#define _WIN32_WINNT 0x0601 + +#include +#include +#include +#include + +#include + +#ifndef HAVE_KAFFINITY +typedef ULONG_PTR KAFFINITY, *PKAFFINITY; +#endif + +#ifndef HAVE_PROCESSOR_CACHE_TYPE +typedef enum _PROCESSOR_CACHE_TYPE { + CacheUnified, + CacheInstruction, + CacheData, + CacheTrace +} PROCESSOR_CACHE_TYPE; +#endif + +#ifndef CACHE_FULLY_ASSOCIATIVE +#define CACHE_FULLY_ASSOCIATIVE 0xFF +#endif + +#ifndef MAXIMUM_PROC_PER_GROUP /* missing in MinGW */ +#define MAXIMUM_PROC_PER_GROUP 64 +#endif + +#ifndef HAVE_CACHE_DESCRIPTOR +typedef struct _CACHE_DESCRIPTOR { + BYTE Level; + BYTE Associativity; + WORD LineSize; + DWORD Size; /* in bytes */ + PROCESSOR_CACHE_TYPE Type; +} CACHE_DESCRIPTOR, *PCACHE_DESCRIPTOR; +#endif + +#ifndef HAVE_LOGICAL_PROCESSOR_RELATIONSHIP +typedef enum _LOGICAL_PROCESSOR_RELATIONSHIP { + RelationProcessorCore, + RelationNumaNode, + RelationCache, + RelationProcessorPackage, + RelationGroup, + RelationAll = 0xffff +} LOGICAL_PROCESSOR_RELATIONSHIP; +#else /* HAVE_LOGICAL_PROCESSOR_RELATIONSHIP */ +# ifndef HAVE_RELATIONPROCESSORPACKAGE +# define RelationProcessorPackage 3 +# define RelationGroup 4 +# define RelationAll 0xffff +# endif /* HAVE_RELATIONPROCESSORPACKAGE */ +#endif /* HAVE_LOGICAL_PROCESSOR_RELATIONSHIP */ + +#ifndef HAVE_SYSTEM_LOGICAL_PROCESSOR_INFORMATION +typedef struct _SYSTEM_LOGICAL_PROCESSOR_INFORMATION { + ULONG_PTR ProcessorMask; + LOGICAL_PROCESSOR_RELATIONSHIP Relationship; + _ANONYMOUS_UNION + union { + struct { + BYTE flags; + } ProcessorCore; + struct { + DWORD NodeNumber; + } NumaNode; + CACHE_DESCRIPTOR Cache; + ULONGLONG Reserved[2]; + } DUMMYUNIONNAME; +} SYSTEM_LOGICAL_PROCESSOR_INFORMATION, *PSYSTEM_LOGICAL_PROCESSOR_INFORMATION; +#endif + +/* Extended interface, for group support */ + +#ifndef HAVE_GROUP_AFFINITY +typedef struct _GROUP_AFFINITY { + KAFFINITY Mask; + WORD Group; + WORD Reserved[3]; +} GROUP_AFFINITY, *PGROUP_AFFINITY; +#endif + +#ifndef HAVE_PROCESSOR_RELATIONSHIP +typedef struct _PROCESSOR_RELATIONSHIP { + BYTE Flags; + BYTE Reserved[21]; + WORD GroupCount; + GROUP_AFFINITY GroupMask[ANYSIZE_ARRAY]; +} PROCESSOR_RELATIONSHIP, *PPROCESSOR_RELATIONSHIP; +#endif + +#ifndef HAVE_NUMA_NODE_RELATIONSHIP +typedef struct _NUMA_NODE_RELATIONSHIP { + DWORD NodeNumber; + BYTE Reserved[20]; + GROUP_AFFINITY GroupMask; +} NUMA_NODE_RELATIONSHIP, *PNUMA_NODE_RELATIONSHIP; +#endif + +#ifndef HAVE_CACHE_RELATIONSHIP +typedef struct _CACHE_RELATIONSHIP { + BYTE Level; + BYTE Associativity; + WORD LineSize; + DWORD CacheSize; + PROCESSOR_CACHE_TYPE Type; + BYTE Reserved[20]; + GROUP_AFFINITY GroupMask; +} CACHE_RELATIONSHIP, *PCACHE_RELATIONSHIP; +#endif + +#ifndef HAVE_PROCESSOR_GROUP_INFO +typedef struct _PROCESSOR_GROUP_INFO { + BYTE MaximumProcessorCount; + BYTE ActiveProcessorCount; + BYTE Reserved[38]; + KAFFINITY ActiveProcessorMask; +} PROCESSOR_GROUP_INFO, *PPROCESSOR_GROUP_INFO; +#endif + +#ifndef HAVE_GROUP_RELATIONSHIP +typedef struct _GROUP_RELATIONSHIP { + WORD MaximumGroupCount; + WORD ActiveGroupCount; + ULONGLONG Reserved[2]; + PROCESSOR_GROUP_INFO GroupInfo[ANYSIZE_ARRAY]; +} GROUP_RELATIONSHIP, *PGROUP_RELATIONSHIP; +#endif + +#ifndef HAVE_SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX +typedef struct _SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX { + LOGICAL_PROCESSOR_RELATIONSHIP Relationship; + DWORD Size; + _ANONYMOUS_UNION + union { + PROCESSOR_RELATIONSHIP Processor; + NUMA_NODE_RELATIONSHIP NumaNode; + CACHE_RELATIONSHIP Cache; + GROUP_RELATIONSHIP Group; + /* Odd: no member to tell the cpu mask of the package... */ + } DUMMYUNIONNAME; +} SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX, *PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX; +#endif + +#ifndef HAVE_PSAPI_WORKING_SET_EX_BLOCK +typedef union _PSAPI_WORKING_SET_EX_BLOCK { + ULONG_PTR Flags; + struct { + unsigned Valid :1; + unsigned ShareCount :3; + unsigned Win32Protection :11; + unsigned Shared :1; + unsigned Node :6; + unsigned Locked :1; + unsigned LargePage :1; + }; +} PSAPI_WORKING_SET_EX_BLOCK; +#endif + +#ifndef HAVE_PSAPI_WORKING_SET_EX_INFORMATION +typedef struct _PSAPI_WORKING_SET_EX_INFORMATION { + PVOID VirtualAddress; + PSAPI_WORKING_SET_EX_BLOCK VirtualAttributes; +} PSAPI_WORKING_SET_EX_INFORMATION; +#endif + +#ifndef HAVE_PROCESSOR_NUMBER +typedef struct _PROCESSOR_NUMBER { + WORD Group; + BYTE Number; + BYTE Reserved; +} PROCESSOR_NUMBER, *PPROCESSOR_NUMBER; +#endif + +/* Function pointers */ + +typedef WORD (WINAPI *PFN_GETACTIVEPROCESSORGROUPCOUNT)(void); +static PFN_GETACTIVEPROCESSORGROUPCOUNT GetActiveProcessorGroupCountProc; + +static unsigned long nr_processor_groups = 1; + +typedef WORD (WINAPI *PFN_GETACTIVEPROCESSORCOUNT)(WORD); +static PFN_GETACTIVEPROCESSORCOUNT GetActiveProcessorCountProc; + +typedef DWORD (WINAPI *PFN_GETCURRENTPROCESSORNUMBER)(void); +static PFN_GETCURRENTPROCESSORNUMBER GetCurrentProcessorNumberProc; + +typedef VOID (WINAPI *PFN_GETCURRENTPROCESSORNUMBEREX)(PPROCESSOR_NUMBER); +static PFN_GETCURRENTPROCESSORNUMBEREX GetCurrentProcessorNumberExProc; + +typedef BOOL (WINAPI *PFN_GETLOGICALPROCESSORINFORMATION)(PSYSTEM_LOGICAL_PROCESSOR_INFORMATION Buffer, PDWORD ReturnLength); +static PFN_GETLOGICALPROCESSORINFORMATION GetLogicalProcessorInformationProc; + +typedef BOOL (WINAPI *PFN_GETLOGICALPROCESSORINFORMATIONEX)(LOGICAL_PROCESSOR_RELATIONSHIP relationship, PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX Buffer, PDWORD ReturnLength); +static PFN_GETLOGICALPROCESSORINFORMATIONEX GetLogicalProcessorInformationExProc; + +typedef BOOL (WINAPI *PFN_SETTHREADGROUPAFFINITY)(HANDLE hThread, const GROUP_AFFINITY *GroupAffinity, PGROUP_AFFINITY PreviousGroupAffinity); +static PFN_SETTHREADGROUPAFFINITY SetThreadGroupAffinityProc; + +typedef BOOL (WINAPI *PFN_GETTHREADGROUPAFFINITY)(HANDLE hThread, PGROUP_AFFINITY GroupAffinity); +static PFN_GETTHREADGROUPAFFINITY GetThreadGroupAffinityProc; + +typedef BOOL (WINAPI *PFN_GETNUMAAVAILABLEMEMORYNODE)(UCHAR Node, PULONGLONG AvailableBytes); +static PFN_GETNUMAAVAILABLEMEMORYNODE GetNumaAvailableMemoryNodeProc; + +typedef BOOL (WINAPI *PFN_GETNUMAAVAILABLEMEMORYNODEEX)(USHORT Node, PULONGLONG AvailableBytes); +static PFN_GETNUMAAVAILABLEMEMORYNODEEX GetNumaAvailableMemoryNodeExProc; + +typedef LPVOID (WINAPI *PFN_VIRTUALALLOCEXNUMA)(HANDLE hProcess, LPVOID lpAddress, SIZE_T dwSize, DWORD flAllocationType, DWORD flProtect, DWORD nndPreferred); +static PFN_VIRTUALALLOCEXNUMA VirtualAllocExNumaProc; + +typedef BOOL (WINAPI *PFN_VIRTUALFREEEX)(HANDLE hProcess, LPVOID lpAddress, SIZE_T dwSize, DWORD dwFreeType); +static PFN_VIRTUALFREEEX VirtualFreeExProc; + +typedef BOOL (WINAPI *PFN_QUERYWORKINGSETEX)(HANDLE hProcess, PVOID pv, DWORD cb); +static PFN_QUERYWORKINGSETEX QueryWorkingSetExProc; + +static void hwloc_win_get_function_ptrs(void) +{ + static int done = 0; + if (!done) { + HMODULE kernel32; + + kernel32 = LoadLibrary("kernel32.dll"); + if (kernel32) { + GetActiveProcessorGroupCountProc = + (PFN_GETACTIVEPROCESSORGROUPCOUNT) GetProcAddress(kernel32, "GetActiveProcessorGroupCount"); + GetActiveProcessorCountProc = + (PFN_GETACTIVEPROCESSORCOUNT) GetProcAddress(kernel32, "GetActiveProcessorCount"); + GetLogicalProcessorInformationProc = + (PFN_GETLOGICALPROCESSORINFORMATION) GetProcAddress(kernel32, "GetLogicalProcessorInformation"); + GetCurrentProcessorNumberProc = + (PFN_GETCURRENTPROCESSORNUMBER) GetProcAddress(kernel32, "GetCurrentProcessorNumber"); + GetCurrentProcessorNumberExProc = + (PFN_GETCURRENTPROCESSORNUMBEREX) GetProcAddress(kernel32, "GetCurrentProcessorNumberEx"); + SetThreadGroupAffinityProc = + (PFN_SETTHREADGROUPAFFINITY) GetProcAddress(kernel32, "SetThreadGroupAffinity"); + GetThreadGroupAffinityProc = + (PFN_GETTHREADGROUPAFFINITY) GetProcAddress(kernel32, "GetThreadGroupAffinity"); + GetNumaAvailableMemoryNodeProc = + (PFN_GETNUMAAVAILABLEMEMORYNODE) GetProcAddress(kernel32, "GetNumaAvailableMemoryNode"); + GetNumaAvailableMemoryNodeExProc = + (PFN_GETNUMAAVAILABLEMEMORYNODEEX) GetProcAddress(kernel32, "GetNumaAvailableMemoryNodeEx"); + GetLogicalProcessorInformationExProc = + (PFN_GETLOGICALPROCESSORINFORMATIONEX)GetProcAddress(kernel32, "GetLogicalProcessorInformationEx"); + VirtualAllocExNumaProc = + (PFN_VIRTUALALLOCEXNUMA) GetProcAddress(kernel32, "K32QueryWorkingSetEx"); + VirtualAllocExNumaProc =* + (PFN_VIRTUALALLOCEXNUMA) GetProcAddress(kernel32, "VirtualAllocExNuma"); + VirtualFreeExProc = + (PFN_VIRTUALFREEEX) GetProcAddress(kernel32, "VirtualFreeEx"); + } + + if (GetActiveProcessorGroupCountProc) + nr_processor_groups = GetActiveProcessorGroupCountProc(); + + if (!VirtualAllocExNumaProc) { + HMODULE psapi = LoadLibrary("psapi.dll"); + if (psapi) + VirtualAllocExNumaProc = (PFN_VIRTUALALLOCEXNUMA) GetProcAddress(psapi, "QueryWorkingSetEx"); + } + + done = 1; + } +} + +/* + * ULONG_PTR and DWORD_PTR are 64/32bits depending on the arch + * while bitmaps use unsigned long (always 32bits) + */ + +static void hwloc_bitmap_from_ULONG_PTR(hwloc_bitmap_t set, ULONG_PTR mask) +{ +#if SIZEOF_VOID_P == 8 + hwloc_bitmap_from_ulong(set, mask & 0xffffffff); + hwloc_bitmap_set_ith_ulong(set, 1, mask >> 32); +#else + hwloc_bitmap_from_ulong(set, mask); +#endif +} + +static void hwloc_bitmap_from_ith_ULONG_PTR(hwloc_bitmap_t set, unsigned i, ULONG_PTR mask) +{ +#if SIZEOF_VOID_P == 8 + hwloc_bitmap_from_ith_ulong(set, 2*i, mask & 0xffffffff); + hwloc_bitmap_set_ith_ulong(set, 2*i+1, mask >> 32); +#else + hwloc_bitmap_from_ith_ulong(set, i, mask); +#endif +} + +static void hwloc_bitmap_set_ith_ULONG_PTR(hwloc_bitmap_t set, unsigned i, ULONG_PTR mask) +{ +#if SIZEOF_VOID_P == 8 + hwloc_bitmap_set_ith_ulong(set, 2*i, mask & 0xffffffff); + hwloc_bitmap_set_ith_ulong(set, 2*i+1, mask >> 32); +#else + hwloc_bitmap_set_ith_ulong(set, i, mask); +#endif +} + +static ULONG_PTR hwloc_bitmap_to_ULONG_PTR(hwloc_const_bitmap_t set) +{ +#if SIZEOF_VOID_P == 8 + ULONG_PTR up = hwloc_bitmap_to_ith_ulong(set, 1); + up <<= 32; + up |= hwloc_bitmap_to_ulong(set); + return up; +#else + return hwloc_bitmap_to_ulong(set); +#endif +} + +static ULONG_PTR hwloc_bitmap_to_ith_ULONG_PTR(hwloc_const_bitmap_t set, unsigned i) +{ +#if SIZEOF_VOID_P == 8 + ULONG_PTR up = hwloc_bitmap_to_ith_ulong(set, 2*i+1); + up <<= 32; + up |= hwloc_bitmap_to_ith_ulong(set, 2*i); + return up; +#else + return hwloc_bitmap_to_ith_ulong(set, i); +#endif +} + +/* convert set into index+mask if all set bits are in the same ULONG. + * otherwise return -1. + */ +static int hwloc_bitmap_to_single_ULONG_PTR(hwloc_const_bitmap_t set, unsigned *index, ULONG_PTR *mask) +{ + unsigned first_ulp, last_ulp; + if (hwloc_bitmap_weight(set) == -1) + return -1; + first_ulp = hwloc_bitmap_first(set) / (sizeof(ULONG_PTR)*8); + last_ulp = hwloc_bitmap_last(set) / (sizeof(ULONG_PTR)*8); + if (first_ulp != last_ulp) + return -1; + *mask = hwloc_bitmap_to_ith_ULONG_PTR(set, first_ulp); + *index = first_ulp; + return 0; +} + +/************************************************************** + * hwloc PU numbering with respect to Windows processor groups + * + * Everywhere below we reserve 64 physical indexes per processor groups because that's + * the maximum (MAXIMUM_PROC_PER_GROUP). Windows may actually use less bits than that + * in some groups (either to avoid splitting NUMA nodes across groups, or because of OS + * tweaks such as "bcdedit /set groupsize 8") but we keep some unused indexes for simplicity. + * That means PU physical indexes and cpusets may be non-contigous. + * That also means hwloc_fallback_nbprocessors() below must return the last PU index + 1 + * instead the actual number of processors. + */ + +/******************** + * last_cpu_location + */ + +static int +hwloc_win_get_thisthread_last_cpu_location(hwloc_topology_t topology __hwloc_attribute_unused, hwloc_cpuset_t set, int flags __hwloc_attribute_unused) +{ + assert(GetCurrentProcessorNumberExProc || (GetCurrentProcessorNumberProc && nr_processor_groups == 1)); + + if (nr_processor_groups > 1 || !GetCurrentProcessorNumberProc) { + PROCESSOR_NUMBER num; + GetCurrentProcessorNumberExProc(&num); + hwloc_bitmap_from_ith_ULONG_PTR(set, num.Group, ((ULONG_PTR)1) << num.Number); + return 0; + } + + hwloc_bitmap_from_ith_ULONG_PTR(set, 0, ((ULONG_PTR)1) << GetCurrentProcessorNumberProc()); + return 0; +} + +/* TODO: hwloc_win_get_thisproc_last_cpu_location() using + * CreateToolhelp32Snapshot(), Thread32First/Next() + * th.th32OwnerProcessID == GetCurrentProcessId() for filtering within process + * OpenThread(THREAD_SET_INFORMATION|THREAD_QUERY_INFORMATION, FALSE, te32.th32ThreadID) to get a handle. + */ + + +/****************************** + * set cpu/membind for threads + */ + +/* TODO: SetThreadIdealProcessor{,Ex} */ + +static int +hwloc_win_set_thread_cpubind(hwloc_topology_t topology __hwloc_attribute_unused, hwloc_thread_t thread, hwloc_const_bitmap_t hwloc_set, int flags) +{ + DWORD_PTR mask; + unsigned group; + + if (flags & HWLOC_CPUBIND_NOMEMBIND) { + errno = ENOSYS; + return -1; + } + + if (hwloc_bitmap_to_single_ULONG_PTR(hwloc_set, &group, &mask) < 0) { + errno = ENOSYS; + return -1; + } + + assert(nr_processor_groups == 1 || SetThreadGroupAffinityProc); + + if (nr_processor_groups > 1) { + GROUP_AFFINITY aff; + memset(&aff, 0, sizeof(aff)); /* we get Invalid Parameter error if Reserved field isn't cleared */ + aff.Group = group; + aff.Mask = mask; + if (!SetThreadGroupAffinityProc(thread, &aff, NULL)) + return -1; + + } else { + /* SetThreadAffinityMask() only changes the mask inside the current processor group */ + /* The resulting binding is always strict */ + if (!SetThreadAffinityMask(thread, mask)) + return -1; + } + return 0; +} + +/* TODO: SetThreadGroupAffinity to get affinity */ + +static int +hwloc_win_set_thisthread_cpubind(hwloc_topology_t topology, hwloc_const_bitmap_t hwloc_set, int flags) +{ + return hwloc_win_set_thread_cpubind(topology, GetCurrentThread(), hwloc_set, flags); +} + +static int +hwloc_win_set_thisthread_membind(hwloc_topology_t topology, hwloc_const_nodeset_t nodeset, hwloc_membind_policy_t policy, int flags) +{ + int ret; + hwloc_cpuset_t cpuset; + + if ((policy != HWLOC_MEMBIND_DEFAULT && policy != HWLOC_MEMBIND_BIND) + || flags & HWLOC_MEMBIND_NOCPUBIND) { + errno = ENOSYS; + return -1; + } + + cpuset = hwloc_bitmap_alloc(); + hwloc_cpuset_from_nodeset(topology, cpuset, nodeset); + ret = hwloc_win_set_thisthread_cpubind(topology, cpuset, flags & HWLOC_MEMBIND_STRICT?HWLOC_CPUBIND_STRICT:0); + hwloc_bitmap_free(cpuset); + return ret; +} + + +/****************************** + * get cpu/membind for threads + */ + + static int +hwloc_win_get_thread_cpubind(hwloc_topology_t topology __hwloc_attribute_unused, hwloc_thread_t thread, hwloc_cpuset_t set, int flags __hwloc_attribute_unused) +{ + GROUP_AFFINITY aff; + + assert(GetThreadGroupAffinityProc); + + if (!GetThreadGroupAffinityProc(thread, &aff)) + return -1; + hwloc_bitmap_from_ith_ULONG_PTR(set, aff.Group, aff.Mask); + return 0; +} + +static int +hwloc_win_get_thisthread_cpubind(hwloc_topology_t topology __hwloc_attribute_unused, hwloc_cpuset_t set, int flags __hwloc_attribute_unused) +{ + return hwloc_win_get_thread_cpubind(topology, GetCurrentThread(), set, flags); +} + +static int +hwloc_win_get_thisthread_membind(hwloc_topology_t topology, hwloc_nodeset_t nodeset, hwloc_membind_policy_t * policy, int flags) +{ + int ret; + hwloc_cpuset_t cpuset = hwloc_bitmap_alloc(); + ret = hwloc_win_get_thread_cpubind(topology, GetCurrentThread(), cpuset, flags); + if (!ret) { + *policy = HWLOC_MEMBIND_BIND; + hwloc_cpuset_to_nodeset(topology, cpuset, nodeset); + } + hwloc_bitmap_free(cpuset); + return ret; +} + + +/******************************** + * set cpu/membind for processes + */ + +static int +hwloc_win_set_proc_cpubind(hwloc_topology_t topology __hwloc_attribute_unused, hwloc_pid_t proc, hwloc_const_bitmap_t hwloc_set, int flags) +{ + DWORD_PTR mask; + + assert(nr_processor_groups == 1); + + if (flags & HWLOC_CPUBIND_NOMEMBIND) { + errno = ENOSYS; + return -1; + } + + /* TODO: SetThreadGroupAffinity() for all threads doesn't enforce the whole process affinity, + * maybe because of process-specific resource locality */ + /* TODO: if we are in a single group (check with GetProcessGroupAffinity()), + * SetProcessAffinityMask() changes the binding within that same group. + */ + /* TODO: NtSetInformationProcess() works very well for binding to any mask in a single group, + * but it's an internal routine. + */ + /* TODO: checks whether hwloc-bind.c needs to pass INHERIT_PARENT_AFFINITY to CreateProcess() instead of execvp(). */ + + /* The resulting binding is always strict */ + mask = hwloc_bitmap_to_ULONG_PTR(hwloc_set); + if (!SetProcessAffinityMask(proc, mask)) + return -1; + return 0; +} + +static int +hwloc_win_set_thisproc_cpubind(hwloc_topology_t topology, hwloc_const_bitmap_t hwloc_set, int flags) +{ + return hwloc_win_set_proc_cpubind(topology, GetCurrentProcess(), hwloc_set, flags); +} + +static int +hwloc_win_set_proc_membind(hwloc_topology_t topology, hwloc_pid_t pid, hwloc_const_nodeset_t nodeset, hwloc_membind_policy_t policy, int flags) +{ + int ret; + hwloc_cpuset_t cpuset; + + if ((policy != HWLOC_MEMBIND_DEFAULT && policy != HWLOC_MEMBIND_BIND) + || flags & HWLOC_MEMBIND_NOCPUBIND) { + errno = ENOSYS; + return -1; + } + + cpuset = hwloc_bitmap_alloc(); + hwloc_cpuset_from_nodeset(topology, cpuset, nodeset); + ret = hwloc_win_set_proc_cpubind(topology, pid, cpuset, flags & HWLOC_MEMBIND_STRICT?HWLOC_CPUBIND_STRICT:0); + hwloc_bitmap_free(cpuset); + return ret; +} + +static int +hwloc_win_set_thisproc_membind(hwloc_topology_t topology, hwloc_const_nodeset_t nodeset, hwloc_membind_policy_t policy, int flags) +{ + return hwloc_win_set_proc_membind(topology, GetCurrentProcess(), nodeset, policy, flags); +} + + +/******************************** + * get cpu/membind for processes + */ + +static int +hwloc_win_get_proc_cpubind(hwloc_topology_t topology __hwloc_attribute_unused, hwloc_pid_t proc, hwloc_bitmap_t hwloc_set, int flags) +{ + DWORD_PTR proc_mask, sys_mask; + + assert(nr_processor_groups == 1); + + if (flags & HWLOC_CPUBIND_NOMEMBIND) { + errno = ENOSYS; + return -1; + } + + /* TODO: if we are in a single group (check with GetProcessGroupAffinity()), + * GetProcessAffinityMask() gives the mask within that group. + */ + /* TODO: if we are in multiple groups, GetProcessGroupAffinity() gives their IDs, + * but we don't know their masks. + */ + /* TODO: GetThreadGroupAffinity() for all threads can be smaller than the whole process affinity, + * maybe because of process-specific resource locality. + */ + + if (!GetProcessAffinityMask(proc, &proc_mask, &sys_mask)) + return -1; + hwloc_bitmap_from_ULONG_PTR(hwloc_set, proc_mask); + return 0; +} + +static int +hwloc_win_get_proc_membind(hwloc_topology_t topology, hwloc_pid_t pid, hwloc_nodeset_t nodeset, hwloc_membind_policy_t * policy, int flags) +{ + int ret; + hwloc_cpuset_t cpuset = hwloc_bitmap_alloc(); + ret = hwloc_win_get_proc_cpubind(topology, pid, cpuset, flags & HWLOC_MEMBIND_STRICT?HWLOC_CPUBIND_STRICT:0); + if (!ret) { + *policy = HWLOC_MEMBIND_BIND; + hwloc_cpuset_to_nodeset(topology, cpuset, nodeset); + } + hwloc_bitmap_free(cpuset); + return ret; +} + +static int +hwloc_win_get_thisproc_cpubind(hwloc_topology_t topology, hwloc_bitmap_t hwloc_cpuset, int flags) +{ + return hwloc_win_get_proc_cpubind(topology, GetCurrentProcess(), hwloc_cpuset, flags); +} + +static int +hwloc_win_get_thisproc_membind(hwloc_topology_t topology, hwloc_nodeset_t nodeset, hwloc_membind_policy_t * policy, int flags) +{ + return hwloc_win_get_proc_membind(topology, GetCurrentProcess(), nodeset, policy, flags); +} + + +/************************ + * membind alloc/free + */ + +static void * +hwloc_win_alloc(hwloc_topology_t topology __hwloc_attribute_unused, size_t len) { + return VirtualAlloc(NULL, len, MEM_COMMIT|MEM_RESERVE, PAGE_EXECUTE_READWRITE); +} + +static void * +hwloc_win_alloc_membind(hwloc_topology_t topology __hwloc_attribute_unused, size_t len, hwloc_const_nodeset_t nodeset, hwloc_membind_policy_t policy, int flags) { + int node; + + switch (policy) { + case HWLOC_MEMBIND_DEFAULT: + case HWLOC_MEMBIND_BIND: + break; + default: + errno = ENOSYS; + return hwloc_alloc_or_fail(topology, len, flags); + } + + if (flags & HWLOC_MEMBIND_STRICT) { + errno = ENOSYS; + return NULL; + } + + if (hwloc_bitmap_weight(nodeset) != 1) { + /* Not a single node, can't do this */ + errno = EXDEV; + return hwloc_alloc_or_fail(topology, len, flags); + } + + node = hwloc_bitmap_first(nodeset); + return VirtualAllocExNumaProc(GetCurrentProcess(), NULL, len, MEM_COMMIT|MEM_RESERVE, PAGE_EXECUTE_READWRITE, node); +} + +static int +hwloc_win_free_membind(hwloc_topology_t topology __hwloc_attribute_unused, void *addr, size_t len __hwloc_attribute_unused) { + if (!addr) + return 0; + if (!VirtualFreeExProc(GetCurrentProcess(), addr, 0, MEM_RELEASE)) + return -1; + return 0; +} + + +/********************** + * membind for areas + */ + +static int +hwloc_win_get_area_membind(hwloc_topology_t topology __hwloc_attribute_unused, const void *addr, size_t len, hwloc_nodeset_t nodeset, hwloc_membind_policy_t * policy, int flags) +{ + SYSTEM_INFO SystemInfo; + DWORD page_size; + uintptr_t start; + unsigned nb; + + GetSystemInfo(&SystemInfo); + page_size = SystemInfo.dwPageSize; + + start = (((uintptr_t) addr) / page_size) * page_size; + nb = (unsigned)((((uintptr_t) addr + len - start) + page_size - 1) / page_size); + + if (!nb) + nb = 1; + + { + PSAPI_WORKING_SET_EX_INFORMATION *pv; + unsigned i; + + pv = calloc(nb, sizeof(*pv)); + + for (i = 0; i < nb; i++) + pv[i].VirtualAddress = (void*) (start + i * page_size); + if (!QueryWorkingSetExProc(GetCurrentProcess(), pv, nb * sizeof(*pv))) { + free(pv); + return -1; + } + *policy = HWLOC_MEMBIND_BIND; + if (flags & HWLOC_MEMBIND_STRICT) { + unsigned node = pv[0].VirtualAttributes.Node; + for (i = 1; i < nb; i++) { + if (pv[i].VirtualAttributes.Node != node) { + errno = EXDEV; + free(pv); + return -1; + } + } + hwloc_bitmap_only(nodeset, node); + free(pv); + return 0; + } + hwloc_bitmap_zero(nodeset); + for (i = 0; i < nb; i++) + hwloc_bitmap_set(nodeset, pv[i].VirtualAttributes.Node); + free(pv); + return 0; + } +} + + +/************************* + * discovery + */ + +static int +hwloc_look_windows(struct hwloc_backend *backend) +{ + struct hwloc_topology *topology = backend->topology; + hwloc_bitmap_t groups_pu_set = NULL; + SYSTEM_INFO SystemInfo; + DWORD length; + + hwloc_win_get_function_ptrs(); + + if (topology->levels[0][0]->cpuset) + /* somebody discovered things */ + return 0; + + hwloc_alloc_obj_cpusets(topology->levels[0][0]); + + GetSystemInfo(&SystemInfo); + + if (!GetLogicalProcessorInformationExProc && GetLogicalProcessorInformationProc) { + PSYSTEM_LOGICAL_PROCESSOR_INFORMATION procInfo; + unsigned id; + unsigned i; + struct hwloc_obj *obj; + hwloc_obj_type_t type; + + length = 0; + procInfo = NULL; + + while (1) { + if (GetLogicalProcessorInformationProc(procInfo, &length)) + break; + if (GetLastError() != ERROR_INSUFFICIENT_BUFFER) + return -1; + procInfo = realloc(procInfo, length); + } + + assert(!length || procInfo); + + for (i = 0; i < length / sizeof(*procInfo); i++) { + + /* Ignore unknown caches */ + if (procInfo->Relationship == RelationCache + && procInfo->Cache.Type != CacheUnified + && procInfo->Cache.Type != CacheData + && procInfo->Cache.Type != CacheInstruction) + continue; + + id = -1; + switch (procInfo[i].Relationship) { + case RelationNumaNode: + type = HWLOC_OBJ_NUMANODE; + id = procInfo[i].NumaNode.NodeNumber; + break; + case RelationProcessorPackage: + type = HWLOC_OBJ_PACKAGE; + break; + case RelationCache: + type = HWLOC_OBJ_CACHE; + break; + case RelationProcessorCore: + type = HWLOC_OBJ_CORE; + break; + case RelationGroup: + default: + type = HWLOC_OBJ_GROUP; + break; + } + + obj = hwloc_alloc_setup_object(type, id); + obj->cpuset = hwloc_bitmap_alloc(); + hwloc_debug("%s#%u mask %lx\n", hwloc_obj_type_string(type), id, procInfo[i].ProcessorMask); + /* ProcessorMask is a ULONG_PTR */ + hwloc_bitmap_set_ith_ULONG_PTR(obj->cpuset, 0, procInfo[i].ProcessorMask); + hwloc_debug_2args_bitmap("%s#%u bitmap %s\n", hwloc_obj_type_string(type), id, obj->cpuset); + + switch (type) { + case HWLOC_OBJ_NUMANODE: + { + ULONGLONG avail; + obj->nodeset = hwloc_bitmap_alloc(); + hwloc_bitmap_set(obj->nodeset, id); + if ((GetNumaAvailableMemoryNodeExProc && GetNumaAvailableMemoryNodeExProc(id, &avail)) + || (GetNumaAvailableMemoryNodeProc && GetNumaAvailableMemoryNodeProc(id, &avail))) + obj->memory.local_memory = avail; + obj->memory.page_types_len = 2; + obj->memory.page_types = malloc(2 * sizeof(*obj->memory.page_types)); + memset(obj->memory.page_types, 0, 2 * sizeof(*obj->memory.page_types)); + obj->memory.page_types_len = 1; + obj->memory.page_types[0].size = SystemInfo.dwPageSize; +#ifdef HAVE__SC_LARGE_PAGESIZE + obj->memory.page_types_len++; + obj->memory.page_types[1].size = sysconf(_SC_LARGE_PAGESIZE); +#endif + break; + } + case HWLOC_OBJ_CACHE: + obj->attr->cache.size = procInfo[i].Cache.Size; + obj->attr->cache.associativity = procInfo[i].Cache.Associativity == CACHE_FULLY_ASSOCIATIVE ? -1 : procInfo[i].Cache.Associativity ; + obj->attr->cache.linesize = procInfo[i].Cache.LineSize; + obj->attr->cache.depth = procInfo[i].Cache.Level; + switch (procInfo->Cache.Type) { + case CacheUnified: + obj->attr->cache.type = HWLOC_OBJ_CACHE_UNIFIED; + break; + case CacheData: + obj->attr->cache.type = HWLOC_OBJ_CACHE_DATA; + break; + case CacheInstruction: + obj->attr->cache.type = HWLOC_OBJ_CACHE_INSTRUCTION; + break; + default: + hwloc_free_unlinked_object(obj); + continue; + } + break; + case HWLOC_OBJ_GROUP: + obj->attr->group.depth = procInfo[i].Relationship == RelationGroup; + break; + default: + break; + } + hwloc_insert_object_by_cpuset(topology, obj); + } + + free(procInfo); + } + + if (GetLogicalProcessorInformationExProc) { + PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX procInfoTotal, procInfo; + + unsigned id; + struct hwloc_obj *obj; + hwloc_obj_type_t type; + + length = 0; + procInfoTotal = NULL; + + while (1) { + if (GetLogicalProcessorInformationExProc(RelationAll, procInfoTotal, &length)) + break; + if (GetLastError() != ERROR_INSUFFICIENT_BUFFER) + return -1; + procInfoTotal = realloc(procInfoTotal, length); + } + + for (procInfo = procInfoTotal; + (void*) procInfo < (void*) ((uintptr_t) procInfoTotal + length); + procInfo = (void*) ((uintptr_t) procInfo + procInfo->Size)) { + unsigned num, i; + GROUP_AFFINITY *GroupMask; + + /* Ignore unknown caches */ + if (procInfo->Relationship == RelationCache + && procInfo->Cache.Type != CacheUnified + && procInfo->Cache.Type != CacheData + && procInfo->Cache.Type != CacheInstruction) + continue; + + id = -1; + switch (procInfo->Relationship) { + case RelationNumaNode: + type = HWLOC_OBJ_NUMANODE; + num = 1; + GroupMask = &procInfo->NumaNode.GroupMask; + id = procInfo->NumaNode.NodeNumber; + break; + case RelationProcessorPackage: + type = HWLOC_OBJ_PACKAGE; + num = procInfo->Processor.GroupCount; + GroupMask = procInfo->Processor.GroupMask; + break; + case RelationCache: + type = HWLOC_OBJ_CACHE; + num = 1; + GroupMask = &procInfo->Cache.GroupMask; + break; + case RelationProcessorCore: + type = HWLOC_OBJ_CORE; + num = procInfo->Processor.GroupCount; + GroupMask = procInfo->Processor.GroupMask; + break; + case RelationGroup: + /* So strange an interface... */ + for (id = 0; id < procInfo->Group.ActiveGroupCount; id++) { + KAFFINITY mask; + obj = hwloc_alloc_setup_object(HWLOC_OBJ_GROUP, id); + obj->cpuset = hwloc_bitmap_alloc(); + mask = procInfo->Group.GroupInfo[id].ActiveProcessorMask; + hwloc_debug("group %u %d cpus mask %lx\n", id, + procInfo->Group.GroupInfo[id].ActiveProcessorCount, mask); + /* KAFFINITY is ULONG_PTR */ + hwloc_bitmap_set_ith_ULONG_PTR(obj->cpuset, id, mask); + hwloc_debug_2args_bitmap("group %u %d bitmap %s\n", id, procInfo->Group.GroupInfo[id].ActiveProcessorCount, obj->cpuset); + + /* save the set of PUs so that we can create them at the end */ + if (!groups_pu_set) + groups_pu_set = hwloc_bitmap_alloc(); + hwloc_bitmap_or(groups_pu_set, groups_pu_set, obj->cpuset); + + hwloc_insert_object_by_cpuset(topology, obj); + } + continue; + default: + /* Don't know how to get the mask. */ + hwloc_debug("unknown relation %d\n", procInfo->Relationship); + continue; + } + + obj = hwloc_alloc_setup_object(type, id); + obj->cpuset = hwloc_bitmap_alloc(); + for (i = 0; i < num; i++) { + hwloc_debug("%s#%u %d: mask %d:%lx\n", hwloc_obj_type_string(type), id, i, GroupMask[i].Group, GroupMask[i].Mask); + /* GROUP_AFFINITY.Mask is KAFFINITY, which is ULONG_PTR */ + hwloc_bitmap_set_ith_ULONG_PTR(obj->cpuset, GroupMask[i].Group, GroupMask[i].Mask); + } + hwloc_debug_2args_bitmap("%s#%u bitmap %s\n", hwloc_obj_type_string(type), id, obj->cpuset); + + switch (type) { + case HWLOC_OBJ_NUMANODE: + { + ULONGLONG avail; + obj->nodeset = hwloc_bitmap_alloc(); + hwloc_bitmap_set(obj->nodeset, id); + if ((GetNumaAvailableMemoryNodeExProc && GetNumaAvailableMemoryNodeExProc(id, &avail)) + || (GetNumaAvailableMemoryNodeProc && GetNumaAvailableMemoryNodeProc(id, &avail))) + obj->memory.local_memory = avail; + obj->memory.page_types = malloc(2 * sizeof(*obj->memory.page_types)); + memset(obj->memory.page_types, 0, 2 * sizeof(*obj->memory.page_types)); + obj->memory.page_types_len = 1; + obj->memory.page_types[0].size = SystemInfo.dwPageSize; +#ifdef HAVE__SC_LARGE_PAGESIZE + obj->memory.page_types_len++; + obj->memory.page_types[1].size = sysconf(_SC_LARGE_PAGESIZE); +#endif + break; + } + case HWLOC_OBJ_CACHE: + obj->attr->cache.size = procInfo->Cache.CacheSize; + obj->attr->cache.associativity = procInfo->Cache.Associativity == CACHE_FULLY_ASSOCIATIVE ? -1 : procInfo->Cache.Associativity ; + obj->attr->cache.linesize = procInfo->Cache.LineSize; + obj->attr->cache.depth = procInfo->Cache.Level; + switch (procInfo->Cache.Type) { + case CacheUnified: + obj->attr->cache.type = HWLOC_OBJ_CACHE_UNIFIED; + break; + case CacheData: + obj->attr->cache.type = HWLOC_OBJ_CACHE_DATA; + break; + case CacheInstruction: + obj->attr->cache.type = HWLOC_OBJ_CACHE_INSTRUCTION; + break; + default: + hwloc_free_unlinked_object(obj); + continue; + } + break; + default: + break; + } + hwloc_insert_object_by_cpuset(topology, obj); + } + free(procInfoTotal); + } + + if (groups_pu_set) { + /* the system supports multiple Groups. + * PU indexes may be discontiguous, especially if Groups contain less than 64 procs. + */ + hwloc_obj_t obj; + unsigned idx; + hwloc_bitmap_foreach_begin(idx, groups_pu_set) { + obj = hwloc_alloc_setup_object(HWLOC_OBJ_PU, idx); + obj->cpuset = hwloc_bitmap_alloc(); + hwloc_bitmap_only(obj->cpuset, idx); + hwloc_debug_1arg_bitmap("cpu %u has cpuset %s\n", + idx, obj->cpuset); + hwloc_insert_object_by_cpuset(topology, obj); + } hwloc_bitmap_foreach_end(); + hwloc_bitmap_free(groups_pu_set); + } else { + /* no processor groups */ + SYSTEM_INFO sysinfo; + hwloc_obj_t obj; + unsigned idx; + GetSystemInfo(&sysinfo); + for(idx=0; idx<32; idx++) + if (sysinfo.dwActiveProcessorMask & (((DWORD_PTR)1)<cpuset = hwloc_bitmap_alloc(); + hwloc_bitmap_only(obj->cpuset, idx); + hwloc_debug_1arg_bitmap("cpu %u has cpuset %s\n", + idx, obj->cpuset); + hwloc_insert_object_by_cpuset(topology, obj); + } + } + + hwloc_obj_add_info(topology->levels[0][0], "Backend", "Windows"); + if (topology->is_thissystem) + hwloc_add_uname_info(topology, NULL); + return 1; +} + +void +hwloc_set_windows_hooks(struct hwloc_binding_hooks *hooks, + struct hwloc_topology_support *support) +{ + hwloc_win_get_function_ptrs(); + + if (GetCurrentProcessorNumberExProc || (GetCurrentProcessorNumberProc && nr_processor_groups == 1)) + hooks->get_thisthread_last_cpu_location = hwloc_win_get_thisthread_last_cpu_location; + + if (nr_processor_groups == 1) { + hooks->set_proc_cpubind = hwloc_win_set_proc_cpubind; + hooks->get_proc_cpubind = hwloc_win_get_proc_cpubind; + hooks->set_thisproc_cpubind = hwloc_win_set_thisproc_cpubind; + hooks->get_thisproc_cpubind = hwloc_win_get_thisproc_cpubind; + hooks->set_proc_membind = hwloc_win_set_proc_membind; + hooks->get_proc_membind = hwloc_win_get_proc_membind; + hooks->set_thisproc_membind = hwloc_win_set_thisproc_membind; + hooks->get_thisproc_membind = hwloc_win_get_thisproc_membind; + } + if (nr_processor_groups == 1 || SetThreadGroupAffinityProc) { + hooks->set_thread_cpubind = hwloc_win_set_thread_cpubind; + hooks->set_thisthread_cpubind = hwloc_win_set_thisthread_cpubind; + hooks->set_thisthread_membind = hwloc_win_set_thisthread_membind; + } + if (GetThreadGroupAffinityProc) { + hooks->get_thread_cpubind = hwloc_win_get_thread_cpubind; + hooks->get_thisthread_cpubind = hwloc_win_get_thisthread_cpubind; + hooks->get_thisthread_membind = hwloc_win_get_thisthread_membind; + } + + if (VirtualAllocExNumaProc) { + hooks->alloc_membind = hwloc_win_alloc_membind; + hooks->alloc = hwloc_win_alloc; + hooks->free_membind = hwloc_win_free_membind; + support->membind->bind_membind = 1; + } + + if (QueryWorkingSetExProc) + hooks->get_area_membind = hwloc_win_get_area_membind; +} + +static struct hwloc_backend * +hwloc_windows_component_instantiate(struct hwloc_disc_component *component, + const void *_data1 __hwloc_attribute_unused, + const void *_data2 __hwloc_attribute_unused, + const void *_data3 __hwloc_attribute_unused) +{ + struct hwloc_backend *backend; + backend = hwloc_backend_alloc(component); + if (!backend) + return NULL; + backend->discover = hwloc_look_windows; + return backend; +} + +static struct hwloc_disc_component hwloc_windows_disc_component = { + HWLOC_DISC_COMPONENT_TYPE_CPU, + "windows", + HWLOC_DISC_COMPONENT_TYPE_GLOBAL, + hwloc_windows_component_instantiate, + 50, + NULL +}; + +const struct hwloc_component hwloc_windows_component = { + HWLOC_COMPONENT_ABI, + NULL, NULL, + HWLOC_COMPONENT_TYPE_DISC, + 0, + &hwloc_windows_disc_component +}; + +unsigned +hwloc_fallback_nbprocessors(struct hwloc_topology *topology) { + int n; + SYSTEM_INFO sysinfo; + + /* by default, ignore groups (return only the number in the current group) */ + GetSystemInfo(&sysinfo); + n = sysinfo.dwNumberOfProcessors; /* FIXME could be non-contigous, rather return a mask from dwActiveProcessorMask? */ + + hwloc_win_get_function_ptrs(); + + if (nr_processor_groups > 1) { + /* assume n-1 groups are complete, since that's how we store things in cpusets */ + if (GetActiveProcessorCountProc) + n = MAXIMUM_PROC_PER_GROUP*(nr_processor_groups-1) + + GetActiveProcessorCountProc((WORD)nr_processor_groups-1); + else + n = MAXIMUM_PROC_PER_GROUP*nr_processor_groups; + } + + if (n >= 1) + topology->support.discovery->pu = 1; + else + n = 1; + return n; +} diff --git a/opal/mca/hwloc/hwloc1110/hwloc/src/topology-x86.c b/opal/mca/hwloc/hwloc1112/hwloc/src/topology-x86.c similarity index 87% rename from opal/mca/hwloc/hwloc1110/hwloc/src/topology-x86.c rename to opal/mca/hwloc/hwloc1112/hwloc/src/topology-x86.c index db91a5ef6d5..ab6de7c9343 100644 --- a/opal/mca/hwloc/hwloc1110/hwloc/src/topology-x86.c +++ b/opal/mca/hwloc/hwloc1112/hwloc/src/topology-x86.c @@ -26,6 +26,7 @@ struct hwloc_x86_backend_data_s { unsigned nbprocs; hwloc_bitmap_t apicid_set; int apicid_unique; + int is_knl; }; #define has_topoext(features) ((features)[6] & (1 << 22)) @@ -35,9 +36,11 @@ struct cacheinfo { unsigned type; unsigned level; unsigned nbthreads_sharing; + unsigned cacheid; unsigned linesize; unsigned linepart; + int inclusive; int ways; unsigned sets; unsigned long size; @@ -99,6 +102,8 @@ static void fill_amd_cache(struct procinfo *infos, unsigned level, int type, uns cache->nbthreads_sharing = infos->max_log_proc; cache->linesize = cpuid & 0xff; cache->linepart = 0; + cache->inclusive = 0; /* old AMD (K8-K10) supposed to have exclusive caches */ + if (level == 1) { cache->ways = (cpuid >> 16) & 0xff; if (cache->ways == 0xff) @@ -169,6 +174,9 @@ static void look_proc(struct hwloc_backend *backend, struct procinfo *infos, uns } infos->cpustepping = eax & 0xf; + if (cpuid_type == intel && infos->cpufamilynumber == 0x6 && infos->cpumodelnumber == 0x57) + data->is_knl = 1; + /* Get cpu vendor string from cpuid 0x00 */ memset(regs, 0, sizeof(regs)); regs[0] = 0; @@ -283,6 +291,7 @@ static void look_proc(struct hwloc_backend *backend, struct procinfo *infos, uns cache->ways = ways; cache->sets = sets = ecx + 1; cache->size = linesize * linepart * ways * sets; + cache->inclusive = edx & 0x2; hwloc_debug("cache %u type %u L%u t%u c%u linesize %lu linepart %lu ways %lu sets %lu, size %uKB\n", cachenum, cache->type, cache->level, cache->nbthreads_sharing, infos->max_nbcores, linesize, linepart, ways, sets, cache->size >> 10); @@ -310,9 +319,6 @@ static void look_proc(struct hwloc_backend *backend, struct procinfo *infos, uns fill_amd_cache(infos, 2, 3, ecx); /* L2u */ if (edx & 0xf000) fill_amd_cache(infos, 3, 3, edx); /* L3u */ - /* FIXME: AMD MagnyCours family 0x10 model 0x9 with 8 cores or more actually - * have the L3 split in two halves, and associativity is divided as well (48) - */ } } @@ -320,6 +326,7 @@ static void look_proc(struct hwloc_backend *backend, struct procinfo *infos, uns * (not supported on AMD) */ if (cpuid_type != amd && highest_cpuid >= 0x04) { + unsigned level; for (cachenum = 0; ; cachenum++) { unsigned type; eax = 0x04; @@ -332,6 +339,10 @@ static void look_proc(struct hwloc_backend *backend, struct procinfo *infos, uns if (type == 0) break; + level = (eax >> 5) & 0x7; + if (data->is_knl && level == 3) + /* KNL reports wrong L3 information (size always 0, cpuset always the entire machine, ignore it */ + break; infos->numcaches++; if (!cachenum) { @@ -358,9 +369,13 @@ static void look_proc(struct hwloc_backend *backend, struct procinfo *infos, uns if (type == 0) break; + level = (eax >> 5) & 0x7; + if (data->is_knl && level == 3) + /* KNL reports wrong L3 information (size always 0, cpuset always the entire machine, ignore it */ + break; cache->type = type; - cache->level = (eax >> 5) & 0x7; + cache->level = level; cache->nbthreads_sharing = ((eax >> 14) & 0xfff) + 1; cache->linesize = linesize = (ebx & 0xfff) + 1; @@ -373,6 +388,7 @@ static void look_proc(struct hwloc_backend *backend, struct procinfo *infos, uns cache->ways = ways; cache->sets = sets = ecx + 1; cache->size = linesize * linepart * ways * sets; + cache->inclusive = edx & 0x2; hwloc_debug("cache %u type %u L%u t%u c%u linesize %lu linepart %lu ways %lu sets %lu, size %uKB\n", cachenum, cache->type, cache->level, cache->nbthreads_sharing, infos->max_nbcores, linesize, linepart, ways, sets, cache->size >> 10); @@ -430,6 +446,48 @@ static void look_proc(struct hwloc_backend *backend, struct procinfo *infos, uns } } + /* Now that we have all info, compute cacheids and apply quirks */ + for (cachenum = 0; cachenum < infos->numcaches; cachenum++) { + struct cacheinfo *cache = &infos->cache[cachenum]; + + /* default cacheid value */ + cache->cacheid = infos->apicid / cache->nbthreads_sharing; + + /* AMD quirk */ + if (cpuid_type == amd + && infos->cpufamilynumber== 0x10 && infos->cpumodelnumber == 0x9 + && cache->level == 3 + && (cache->ways == -1 || (cache->ways % 2 == 0)) && cache->nbthreads_sharing >= 8) { + /* Fix AMD family 0x10 model 0x9 (Magny-Cours) with 8 or 12 cores. + * The L3 (and its associativity) is actually split into two halves). + */ + if (cache->nbthreads_sharing == 16) + cache->nbthreads_sharing = 12; /* nbthreads_sharing is a power of 2 but the processor actually has 8 or 12 cores */ + cache->nbthreads_sharing /= 2; + cache->size /= 2; + if (cache->ways != -1) + cache->ways /= 2; + /* AMD Magny-Cours 12-cores processor reserve APIC ids as AAAAAABBBBBB.... + * among first L3 (A), second L3 (B), and unexisting cores (.). + * On multi-socket servers, L3 in non-first sockets may have APIC id ranges + * such as [16-21] that are not aligned on multiple of nbthreads_sharing (6). + * That means, we can't just compare apicid/nbthreads_sharing to identify siblings. + */ + cache->cacheid = (infos->apicid % infos->max_log_proc) / cache->nbthreads_sharing /* cacheid within the package */ + + 2 * (infos->apicid / infos->max_log_proc); /* add 2 caches per previous package */ + + } else if (cpuid_type == amd + && infos->cpufamilynumber == 0x15 + && (infos->cpumodelnumber == 0x1 /* Bulldozer */ || infos->cpumodelnumber == 0x2 /* Piledriver */) + && cache->level == 3 && cache->nbthreads_sharing == 6) { + /* AMD Bulldozer and Piledriver 12-core processors have same APIC ids as Magny-Cours above, + * but we can't merge the checks because the original nbthreads_sharing must be exactly 6 here. + */ + cache->cacheid = (infos->apicid % infos->max_log_proc) / cache->nbthreads_sharing /* cacheid within the package */ + + 2 * (infos->apicid / infos->max_log_proc); /* add 2 cache per previous package */ + } + } + if (hwloc_bitmap_isset(data->apicid_set, infos->apicid)) data->apicid_unique = 0; else @@ -620,6 +678,7 @@ static void summarize(struct hwloc_backend *backend, struct procinfo *infos, int } unit = hwloc_alloc_setup_object(HWLOC_OBJ_GROUP, unitid); unit->cpuset = unit_cpuset; + hwloc_obj_add_info(unit, "Type", "ComputeUnit"); hwloc_debug_1arg_bitmap("os unit %u has cpuset %s\n", unitid, unit_cpuset); hwloc_insert_object_by_cpuset(topology, unit); @@ -696,6 +755,20 @@ static void summarize(struct hwloc_backend *backend, struct procinfo *infos, int hwloc_bitmap_free(cores_cpuset); } + /* Look for PUs */ + if (fulldiscovery) { + unsigned i; + hwloc_debug("%s", "\n\n * CPU cpusets *\n\n"); + for (i=0; icpuset = hwloc_bitmap_alloc(); + hwloc_bitmap_only(obj->cpuset, i); + hwloc_debug_1arg_bitmap("PU %u has cpuset %s\n", i, obj->cpuset); + hwloc_insert_object_by_cpuset(topology, obj); + } + } + /* Look for caches */ /* First find max level */ level = 0; @@ -703,18 +776,14 @@ static void summarize(struct hwloc_backend *backend, struct procinfo *infos, int for (j = 0; j < infos[i].numcaches; j++) if (infos[i].cache[j].level > level) level = infos[i].cache[j].level; - - /* Look for known types */ - if (fulldiscovery) while (level > 0) { + while (level > 0) { for (type = 1; type <= 3; type++) { /* Look for caches of that type at level level */ { hwloc_bitmap_t caches_cpuset = hwloc_bitmap_dup(complete_cpuset); - hwloc_bitmap_t cache_cpuset; hwloc_obj_t cache; while ((i = hwloc_bitmap_first(caches_cpuset)) != (unsigned) -1) { - unsigned packageid = infos[i].packageid; for (l = 0; l < infos[i].numcaches; l++) { if (infos[i].cache[l].level == level && infos[i].cache[l].type == type) @@ -726,9 +795,12 @@ static void summarize(struct hwloc_backend *backend, struct procinfo *infos, int continue; } - /* Found a matching cache, now look for others sharing it */ - { - unsigned cacheid = infos[i].apicid / infos[i].cache[l].nbthreads_sharing; + if (fulldiscovery) { + /* Add caches */ + hwloc_bitmap_t cache_cpuset; + unsigned packageid = infos[i].packageid; + unsigned cacheid = infos[i].cache[l].cacheid; + /* Found a matching cache, now look for others sharing it */ cache_cpuset = hwloc_bitmap_alloc(); for (j = i; j < nbprocs; j++) { @@ -742,7 +814,7 @@ static void summarize(struct hwloc_backend *backend, struct procinfo *infos, int hwloc_bitmap_clr(caches_cpuset, j); continue; } - if (infos[j].packageid == packageid && infos[j].apicid / infos[j].cache[l2].nbthreads_sharing == cacheid) { + if (infos[j].packageid == packageid && infos[j].cache[l2].cacheid == cacheid) { hwloc_bitmap_set(cache_cpuset, j); hwloc_bitmap_clr(caches_cpuset, j); } @@ -764,9 +836,31 @@ static void summarize(struct hwloc_backend *backend, struct procinfo *infos, int break; } cache->cpuset = cache_cpuset; + hwloc_obj_add_info(cache, "Inclusive", infos[i].cache[l].inclusive ? "1" : "0"); hwloc_debug_2args_bitmap("os L%u cache %u has cpuset %s\n", level, cacheid, cache_cpuset); hwloc_insert_object_by_cpuset(topology, cache); + + } else { + /* Annotate existing caches */ + hwloc_bitmap_t set = hwloc_bitmap_alloc(); + hwloc_obj_t cache = NULL; + int depth; + hwloc_bitmap_set(set, i); + depth = hwloc_get_cache_type_depth(topology, level, + type == 1 ? HWLOC_OBJ_CACHE_DATA : type == 2 ? HWLOC_OBJ_CACHE_INSTRUCTION : HWLOC_OBJ_CACHE_UNIFIED); + if (depth != HWLOC_TYPE_DEPTH_UNKNOWN) + cache = hwloc_get_next_obj_covering_cpuset_by_depth(topology, set, depth, NULL); + hwloc_bitmap_free(set); + if (cache) { + /* Found cache above that PU, annotate if no such attribute yet */ + if (!hwloc_obj_get_info_by_name(cache, "Inclusive")) + hwloc_obj_add_info(cache, "Inclusive", infos[i].cache[l].inclusive ? "1" : "0"); + hwloc_bitmap_andnot(caches_cpuset, caches_cpuset, cache->cpuset); + } else { + /* No cache above that PU?! */ + hwloc_bitmap_clr(caches_cpuset, i); + } } } hwloc_bitmap_free(caches_cpuset); @@ -948,6 +1042,7 @@ int hwloc_look_x86(struct hwloc_backend *backend, int fulldiscovery) if (highest_cpuid >= 0x7) { eax = 0x7; + ecx = 0; hwloc_x86_cpuid(&eax, &ebx, &ecx, &edx); features[9] = ebx; } @@ -1021,11 +1116,11 @@ hwloc_x86_discover(struct hwloc_backend *backend) } fulldiscovery: - hwloc_look_x86(backend, 1); - /* if failed, just continue and create PUs */ - - if (!alreadypus) - hwloc_setup_pu_level(topology, data->nbprocs); + if (hwloc_look_x86(backend, 1) < 0) { + /* if failed, create PUs */ + if (!alreadypus) + hwloc_setup_pu_level(topology, data->nbprocs); + } hwloc_obj_add_info(topology->levels[0][0], "Backend", "x86"); @@ -1075,6 +1170,7 @@ hwloc_x86_component_instantiate(struct hwloc_disc_component *component, backend->disable = hwloc_x86_backend_disable; /* default values */ + data->is_knl = 0; data->apicid_set = hwloc_bitmap_alloc(); data->apicid_unique = 1; diff --git a/opal/mca/hwloc/hwloc1110/hwloc/src/topology-xml-libxml.c b/opal/mca/hwloc/hwloc1112/hwloc/src/topology-xml-libxml.c similarity index 99% rename from opal/mca/hwloc/hwloc1110/hwloc/src/topology-xml-libxml.c rename to opal/mca/hwloc/hwloc1112/hwloc/src/topology-xml-libxml.c index ce3250c2850..46fe4aec292 100644 --- a/opal/mca/hwloc/hwloc1110/hwloc/src/topology-xml-libxml.c +++ b/opal/mca/hwloc/hwloc1112/hwloc/src/topology-xml-libxml.c @@ -257,7 +257,8 @@ hwloc_libxml_import_diff(struct hwloc__xml_import_state_s *state, const char *xm if (state->global->next_attr(state, &attrname, &attrvalue) < 0) break; if (!strcmp(attrname, "refname")) { - free(refname); + if (refname) + free(refname); refname = strdup(attrvalue); } else goto out_with_doc; @@ -266,13 +267,15 @@ hwloc_libxml_import_diff(struct hwloc__xml_import_state_s *state, const char *xm ret = hwloc__xml_import_diff(state, firstdiffp); if (refnamep && !ret) *refnamep = refname; - else + else if (refname) free(refname); xmlFreeDoc(doc); return ret; out_with_doc: + if (refname) + free(refname); xmlFreeDoc(doc); out: return -1; /* failed */ diff --git a/opal/mca/hwloc/hwloc1110/hwloc/src/topology-xml-nolibxml.c b/opal/mca/hwloc/hwloc1112/hwloc/src/topology-xml-nolibxml.c similarity index 97% rename from opal/mca/hwloc/hwloc1110/hwloc/src/topology-xml-nolibxml.c rename to opal/mca/hwloc/hwloc1112/hwloc/src/topology-xml-nolibxml.c index a93d9d49557..ba522087fec 100644 --- a/opal/mca/hwloc/hwloc1110/hwloc/src/topology-xml-nolibxml.c +++ b/opal/mca/hwloc/hwloc1112/hwloc/src/topology-xml-nolibxml.c @@ -1,6 +1,6 @@ /* * Copyright © 2009 CNRS - * Copyright © 2009-2014 Inria. All rights reserved. + * Copyright © 2009-2015 Inria. All rights reserved. * Copyright © 2009-2011 Université Bordeaux * Copyright © 2009-2011 Cisco Systems, Inc. All rights reserved. * See COPYING in top-level directory. @@ -48,7 +48,7 @@ static int hwloc__nolibxml_import_next_attr(hwloc__xml_import_state_t state, char **namep, char **valuep) { hwloc__nolibxml_import_state_data_t nstate = (void*) state->data; - int namelen; + size_t namelen; size_t len, escaped; char *buffer, *value, *end; @@ -116,7 +116,7 @@ hwloc__nolibxml_import_find_child(hwloc__xml_import_state_t state, hwloc__nolibxml_import_state_data_t nchildstate = (void*) childstate->data; char *buffer = nstate->tagbuffer; char *end; - int namelen; + size_t namelen; childstate->parent = state; childstate->global = state->global; @@ -502,7 +502,7 @@ hwloc__nolibxml_export_update_buffer(hwloc__nolibxml_export_state_data_t ndata, if (res >= 0) { ndata->written += res; if (res >= (int) ndata->remaining) - res = ndata->remaining>0 ? ndata->remaining-1 : 0; + res = ndata->remaining>0 ? (int)ndata->remaining-1 : 0; ndata->buffer += res; ndata->remaining -= res; } @@ -511,7 +511,7 @@ hwloc__nolibxml_export_update_buffer(hwloc__nolibxml_export_state_data_t ndata, static char * hwloc__nolibxml_export_escape_string(const char *src) { - int fulllen, sublen; + size_t fulllen, sublen; char *escaped, *dst; fulllen = strlen(src); @@ -675,15 +675,15 @@ hwloc_nolibxml_export_buffer(hwloc_topology_t topology, char **bufferp, int *buf bufferlen = 16384; /* random guess for large enough default */ buffer = malloc(bufferlen); - res = hwloc___nolibxml_prepare_export(topology, buffer, bufferlen); + res = hwloc___nolibxml_prepare_export(topology, buffer, (int)bufferlen); if (res > bufferlen) { buffer = realloc(buffer, res); - hwloc___nolibxml_prepare_export(topology, buffer, res); + hwloc___nolibxml_prepare_export(topology, buffer, (int)res); } *bufferp = buffer; - *buflenp = res; + *buflenp = (int)res; return 0; } @@ -709,7 +709,7 @@ hwloc_nolibxml_export_file(hwloc_topology_t topology, const char *filename) } } - ret = fwrite(buffer, 1, bufferlen-1 /* don't write the ending \0 */, file); + ret = (int)fwrite(buffer, 1, bufferlen-1 /* don't write the ending \0 */, file); if (ret == bufferlen-1) { ret = 0; } else { @@ -767,15 +767,15 @@ hwloc_nolibxml_export_diff_buffer(hwloc_topology_diff_t diff, const char *refnam bufferlen = 16384; /* random guess for large enough default */ buffer = malloc(bufferlen); - res = hwloc___nolibxml_prepare_export_diff(diff, refname, buffer, bufferlen); + res = hwloc___nolibxml_prepare_export_diff(diff, refname, buffer, (int)bufferlen); if (res > bufferlen) { buffer = realloc(buffer, res); - hwloc___nolibxml_prepare_export_diff(diff, refname, buffer, res); + hwloc___nolibxml_prepare_export_diff(diff, refname, buffer, (int)res); } *bufferp = buffer; - *buflenp = res; + *buflenp = (int)res; return 0; } @@ -801,7 +801,7 @@ hwloc_nolibxml_export_diff_file(hwloc_topology_diff_t diff, const char *refname, } } - ret = fwrite(buffer, 1, bufferlen-1 /* don't write the ending \0 */, file); + ret = (int)fwrite(buffer, 1, bufferlen-1 /* don't write the ending \0 */, file); if (ret == bufferlen-1) { ret = 0; } else { diff --git a/opal/mca/hwloc/hwloc1110/hwloc/src/topology-xml.c b/opal/mca/hwloc/hwloc1112/hwloc/src/topology-xml.c similarity index 99% rename from opal/mca/hwloc/hwloc1110/hwloc/src/topology-xml.c rename to opal/mca/hwloc/hwloc1112/hwloc/src/topology-xml.c index d9e2e464471..52b7ddc46b3 100644 --- a/opal/mca/hwloc/hwloc1110/hwloc/src/topology-xml.c +++ b/opal/mca/hwloc/hwloc1112/hwloc/src/topology-xml.c @@ -998,6 +998,7 @@ hwloc_topology_diff_load_xml(hwloc_topology_t topology __hwloc_attribute_unused, fakedata.msgprefix = strdup(basename); if (!hwloc_libxml_callbacks && !hwloc_nolibxml_callbacks) { + free(fakedata.msgprefix); errno = ENOSYS; return -1; } @@ -1037,9 +1038,10 @@ hwloc_topology_diff_load_xmlbuffer(hwloc_topology_t topology __hwloc_attribute_u int ret; state.global = &fakedata; - fakedata.msgprefix = "xmldiffbuffer"; + fakedata.msgprefix = strdup("xmldiffbuffer"); if (!hwloc_libxml_callbacks && !hwloc_nolibxml_callbacks) { + free(fakedata.msgprefix); errno = ENOSYS; return -1; } @@ -1061,6 +1063,8 @@ hwloc_topology_diff_load_xmlbuffer(hwloc_topology_t topology __hwloc_attribute_u } hwloc_localeswitch_fini(); + + free(fakedata.msgprefix); return ret; } diff --git a/opal/mca/hwloc/hwloc1110/hwloc/src/topology.c b/opal/mca/hwloc/hwloc1112/hwloc/src/topology.c similarity index 95% rename from opal/mca/hwloc/hwloc1110/hwloc/src/topology.c rename to opal/mca/hwloc/hwloc1112/hwloc/src/topology.c index 01be27453b4..a6d6d921f27 100644 --- a/opal/mca/hwloc/hwloc1110/hwloc/src/topology.c +++ b/opal/mca/hwloc/hwloc1112/hwloc/src/topology.c @@ -132,6 +132,7 @@ int hwloc_get_sysctl(int name[], unsigned namelen, int *ret) reading sysfs on Linux, this method is not virtualizable; thus it's only used as a fall-back method, allowing `hwloc_set_fsroot ()' to have the desired effect. */ +#ifndef HWLOC_WIN_SYS /* The windows implementation is in topology-windows.c */ unsigned hwloc_fallback_nbprocessors(struct hwloc_topology *topology) { int n; @@ -157,10 +158,6 @@ hwloc_fallback_nbprocessors(struct hwloc_topology *topology) { static int name[2] = {CTL_HW, HW_NPCU}; if (hwloc_get_sysctl(name, sizeof(name)/sizeof(*name)), &n) n = -1; -#elif defined(HWLOC_WIN_SYS) - SYSTEM_INFO sysinfo; - GetSystemInfo(&sysinfo); - n = sysinfo.dwNumberOfProcessors; #else #ifdef __GNUC__ #warning No known way to discover number of available processors on this system @@ -174,6 +171,7 @@ hwloc_fallback_nbprocessors(struct hwloc_topology *topology) { n = 1; return n; } +#endif /* !HWLOC_WIN_SYS */ /* * Use the given number of processors and the optional online cpuset if given @@ -351,9 +349,8 @@ void hwloc_obj_add_info_nodup(hwloc_obj_t obj, const char *name, const char *val /* Get pointer to next childect. */ \ child = *pchild) -/* Free an object and all its content. */ -void -hwloc_free_unlinked_object(hwloc_obj_t obj) +static void +hwloc__free_object_contents(hwloc_obj_t obj) { switch (obj->type) { default: @@ -372,9 +369,34 @@ hwloc_free_unlinked_object(hwloc_obj_t obj) hwloc_bitmap_free(obj->nodeset); hwloc_bitmap_free(obj->complete_nodeset); hwloc_bitmap_free(obj->allowed_nodeset); +} + +/* Free an object and all its content. */ +void +hwloc_free_unlinked_object(hwloc_obj_t obj) +{ + hwloc__free_object_contents(obj); free(obj); } +/* Replace old with contents of new object, and make new freeable by the caller. + * Only updates next_sibling/first_child pointers, + * so may only be used during early discovery. + */ +static void +hwloc_replace_linked_object(hwloc_obj_t old, hwloc_obj_t new) +{ + /* drop old fields */ + hwloc__free_object_contents(old); + /* copy old tree pointers to new */ + new->next_sibling = old->next_sibling; + new->first_child = old->first_child; + /* copy new contents to old now that tree pointers are OK */ + memcpy(old, new, sizeof(*old)); + /* clear new to that we may free it */ + memset(new, 0,sizeof(*new)); +} + /* insert the (non-empty) list of sibling starting at firstnew as new children of newparent, * and return the address of the pointer to the next one */ @@ -501,7 +523,9 @@ hwloc_topology_dup(hwloc_topology_t *newp, return -1; } - hwloc_topology_init(&new); + if (0 != hwloc_topology_init(&new)) { + return -1; + } new->flags = old->flags; memcpy(new->ignored_types, old->ignored_types, sizeof(old->ignored_types)); @@ -576,20 +600,6 @@ hwloc_topology_dup(hwloc_topology_t *newp, return -1; } -/* - * How to compare objects based on types. - * - * Note that HIGHER/LOWER is only a (consistent) heuristic, used to sort - * objects with same cpuset consistently. - * Only EQUAL / not EQUAL can be relied upon. - */ - -enum hwloc_type_cmp_e { - HWLOC_TYPE_HIGHER, - HWLOC_TYPE_DEEPER, - HWLOC_TYPE_EQUAL -}; - /* WARNING: The indexes of this array MUST match the ordering that of the obj_order_type[] array, below. Specifically, the values must be laid out such that: @@ -702,7 +712,15 @@ int hwloc_compare_types (hwloc_obj_type_t type1, hwloc_obj_type_t type2) return order1 - order2; } -static enum hwloc_type_cmp_e +enum hwloc_obj_cmp_e { + HWLOC_OBJ_EQUAL = HWLOC_BITMAP_EQUAL, /**< \brief Equal */ + HWLOC_OBJ_INCLUDED = HWLOC_BITMAP_INCLUDED, /**< \brief Strictly included into */ + HWLOC_OBJ_CONTAINS = HWLOC_BITMAP_CONTAINS, /**< \brief Strictly contains */ + HWLOC_OBJ_INTERSECTS = HWLOC_BITMAP_INTERSECTS, /**< \brief Intersects, but no inclusion! */ + HWLOC_OBJ_DIFFERENT = HWLOC_BITMAP_DIFFERENT /**< \brief No intersection */ +}; + +static enum hwloc_obj_cmp_e hwloc_type_cmp(hwloc_obj_t obj1, hwloc_obj_t obj2) { hwloc_obj_type_t type1 = obj1->type; @@ -711,105 +729,106 @@ hwloc_type_cmp(hwloc_obj_t obj1, hwloc_obj_t obj2) compare = hwloc_compare_types(type1, type2); if (compare == HWLOC_TYPE_UNORDERED) - return HWLOC_TYPE_EQUAL; /* we cannot do better */ + return HWLOC_OBJ_DIFFERENT; /* we cannot do better */ if (compare > 0) - return HWLOC_TYPE_DEEPER; + return HWLOC_OBJ_INCLUDED; if (compare < 0) - return HWLOC_TYPE_HIGHER; + return HWLOC_OBJ_CONTAINS; /* Caches have the same types but can have different depths. */ if (type1 == HWLOC_OBJ_CACHE) { if (obj1->attr->cache.depth < obj2->attr->cache.depth) - return HWLOC_TYPE_DEEPER; + return HWLOC_OBJ_INCLUDED; else if (obj1->attr->cache.depth > obj2->attr->cache.depth) - return HWLOC_TYPE_HIGHER; + return HWLOC_OBJ_CONTAINS; else if (obj1->attr->cache.type > obj2->attr->cache.type) /* consider icache deeper than dcache and dcache deeper than unified */ - return HWLOC_TYPE_DEEPER; + return HWLOC_OBJ_INCLUDED; else if (obj1->attr->cache.type < obj2->attr->cache.type) /* consider icache deeper than dcache and dcache deeper than unified */ - return HWLOC_TYPE_HIGHER; + return HWLOC_OBJ_CONTAINS; } /* Group objects have the same types but can have different depths. */ if (type1 == HWLOC_OBJ_GROUP) { if (obj1->attr->group.depth == (unsigned) -1 || obj2->attr->group.depth == (unsigned) -1) - return HWLOC_TYPE_EQUAL; + return HWLOC_OBJ_EQUAL; if (obj1->attr->group.depth < obj2->attr->group.depth) - return HWLOC_TYPE_DEEPER; + return HWLOC_OBJ_INCLUDED; else if (obj1->attr->group.depth > obj2->attr->group.depth) - return HWLOC_TYPE_HIGHER; + return HWLOC_OBJ_CONTAINS; } /* Bridges objects have the same types but can have different depths. */ if (type1 == HWLOC_OBJ_BRIDGE) { if (obj1->attr->bridge.depth < obj2->attr->bridge.depth) - return HWLOC_TYPE_DEEPER; + return HWLOC_OBJ_INCLUDED; else if (obj1->attr->bridge.depth > obj2->attr->bridge.depth) - return HWLOC_TYPE_HIGHER; + return HWLOC_OBJ_CONTAINS; } - return HWLOC_TYPE_EQUAL; + return HWLOC_OBJ_EQUAL; } /* * How to compare objects based on cpusets. */ -enum hwloc_obj_cmp_e { - HWLOC_OBJ_EQUAL = HWLOC_BITMAP_EQUAL, /**< \brief Equal */ - HWLOC_OBJ_INCLUDED = HWLOC_BITMAP_INCLUDED, /**< \brief Strictly included into */ - HWLOC_OBJ_CONTAINS = HWLOC_BITMAP_CONTAINS, /**< \brief Strictly contains */ - HWLOC_OBJ_INTERSECTS = HWLOC_BITMAP_INTERSECTS, /**< \brief Intersects, but no inclusion! */ - HWLOC_OBJ_DIFFERENT = HWLOC_BITMAP_DIFFERENT /**< \brief No intersection */ -}; - static int hwloc_obj_cmp_sets(hwloc_obj_t obj1, hwloc_obj_t obj2) { hwloc_bitmap_t set1, set2; + int res = HWLOC_OBJ_DIFFERENT; - /* compare cpusets if possible, or fallback to nodeset, or return */ - if (obj1->cpuset && !hwloc_bitmap_iszero(obj1->cpuset) - && obj2->cpuset && !hwloc_bitmap_iszero(obj2->cpuset)) { + /* compare cpusets first */ + if (obj1->complete_cpuset && obj2->complete_cpuset) { + set1 = obj1->complete_cpuset; + set2 = obj2->complete_cpuset; + } else { set1 = obj1->cpuset; set2 = obj2->cpuset; - } else if (obj1->nodeset && !hwloc_bitmap_iszero(obj1->nodeset) - && obj2->nodeset && !hwloc_bitmap_iszero(obj2->nodeset)) { + } + if (set1 && set2 && !hwloc_bitmap_iszero(set1) && !hwloc_bitmap_iszero(set2)) { + res = hwloc_bitmap_compare_inclusion(set1, set2); + if (res == HWLOC_OBJ_INTERSECTS) + return HWLOC_OBJ_INTERSECTS; + } + + /* then compare nodesets, and combine the results */ + if (obj1->complete_nodeset && obj2->complete_nodeset) { + set1 = obj1->complete_nodeset; + set2 = obj2->complete_nodeset; + } else { set1 = obj1->nodeset; set2 = obj2->nodeset; - } else { - return HWLOC_OBJ_DIFFERENT; } + if (set1 && set2 && !hwloc_bitmap_iszero(set1) && !hwloc_bitmap_iszero(set2)) { + int noderes = hwloc_bitmap_compare_inclusion(set1, set2); + /* deal with conflicting cpusets/nodesets inclusions */ + if (noderes == HWLOC_OBJ_INCLUDED) { + if (res == HWLOC_OBJ_CONTAINS) + /* contradicting order for cpusets and nodesets */ + return HWLOC_OBJ_INTERSECTS; + res = HWLOC_OBJ_INCLUDED; - return hwloc_bitmap_compare_inclusion(set1, set2); -} + } else if (noderes == HWLOC_OBJ_CONTAINS) { + if (res == HWLOC_OBJ_INCLUDED) + /* contradicting order for cpusets and nodesets */ + return HWLOC_OBJ_INTERSECTS; + res = HWLOC_OBJ_CONTAINS; -static int -hwloc_obj_cmp_types(hwloc_obj_t obj1, hwloc_obj_t obj2) -{ - /* Same sets, subsort by type to have a consistent ordering. */ - int typeres = hwloc_type_cmp(obj1, obj2); - if (typeres == HWLOC_TYPE_DEEPER) - return HWLOC_OBJ_INCLUDED; - if (typeres == HWLOC_TYPE_HIGHER) - return HWLOC_OBJ_CONTAINS; + } else if (noderes == HWLOC_OBJ_INTERSECTS) { + return HWLOC_OBJ_INTERSECTS; - /* HWLOC_TYPE_EQUAL */ + } else { + /* nodesets are different, keep the cpuset order */ + /* FIXME: with upcoming multiple levels of NUMA, we may have to report INCLUDED or CONTAINED here */ - if (obj1->type == HWLOC_OBJ_MISC) { - /* Misc objects may vary by name */ - int res = strcmp(obj1->name, obj2->name); - if (res < 0) - return HWLOC_OBJ_INCLUDED; - if (res > 0) - return HWLOC_OBJ_CONTAINS; - if (res == 0) - return HWLOC_OBJ_EQUAL; + } } - /* Same sets and types! Let's hope it's coherent. */ - return HWLOC_OBJ_EQUAL; + + return res; } /* Compare object cpusets based on complete_cpuset if defined (always correctly ordered), @@ -889,9 +908,7 @@ merge_insert_equal(hwloc_obj_t new, hwloc_obj_t old) &new->infos, &new->infos_count); } - if (new->name) { - if (old->name) - free(old->name); + if (new->name && !old->name) { old->name = new->name; new->name = NULL; } @@ -900,21 +917,17 @@ merge_insert_equal(hwloc_obj_t new, hwloc_obj_t old) switch(new->type) { case HWLOC_OBJ_NUMANODE: - /* Do not check these, it may change between calls */ - merge_sizes(new, old, memory.local_memory); - merge_sizes(new, old, memory.total_memory); - /* if both newects have a page_types array, just keep the biggest one for now */ - if (new->memory.page_types_len && old->memory.page_types_len) - hwloc_debug("%s", "merging page_types by keeping the biggest one only\n"); - if (new->memory.page_types_len < old->memory.page_types_len) { - free(new->memory.page_types); - } else { - free(old->memory.page_types); + if (new->memory.local_memory && !old->memory.local_memory) { + /* no memory in old, use new memory */ + old->memory.local_memory = new->memory.local_memory; + if (old->memory.page_types) + free(old->memory.page_types); old->memory.page_types_len = new->memory.page_types_len; old->memory.page_types = new->memory.page_types; new->memory.page_types = NULL; new->memory.page_types_len = 0; } + /* old->memory.total_memory will be updated by propagate_total_memory() */ break; case HWLOC_OBJ_CACHE: merge_sizes(new, old, attr->cache.size); @@ -960,15 +973,33 @@ hwloc___insert_object_by_cpuset(struct hwloc_topology *topology, hwloc_obj_t cur if (res == HWLOC_OBJ_EQUAL) { if (obj->type == HWLOC_OBJ_GROUP) { - /* Group are ignored keep_structure. ignored always are handled earlier. Non-ignored Groups isn't possible. */ - assert(topology->ignored_types[HWLOC_OBJ_GROUP] == HWLOC_IGNORE_TYPE_KEEP_STRUCTURE); + /* Groups are ignored keep_structure or always. Non-ignored Groups isn't possible. */ + assert(topology->ignored_types[HWLOC_OBJ_GROUP] != HWLOC_IGNORE_TYPE_NEVER); /* Remove the Group now. The normal ignore code path wouldn't tell us whether the Group was removed or not. * - * Keep EQUAL so that the Group gets merged. + * The Group doesn't contain anything to keep, just let the caller free it. + */ + return child; + + } else if (child->type == HWLOC_OBJ_GROUP) { + + /* Replace the Group with the new object contents + * and let the caller free the new object */ + hwloc_replace_linked_object(child, obj); + return child; + } else { /* otherwise compare actual types to decide of the inclusion */ - res = hwloc_obj_cmp_types(obj, child); + res = hwloc_type_cmp(obj, child); + if (res == HWLOC_OBJ_EQUAL && obj->type == HWLOC_OBJ_MISC) { + /* Misc objects may vary by name */ + int ret = strcmp(obj->name, child->name); + if (ret < 0) + res = HWLOC_OBJ_INCLUDED; + else if (ret > 0) + res = HWLOC_OBJ_CONTAINS; + } } } @@ -984,7 +1015,9 @@ hwloc___insert_object_by_cpuset(struct hwloc_topology *topology, hwloc_obj_t cur } return NULL; } - /* Can be two objects with same type. Or one Group and anything else. */ + /* Two objects with same type. + * Groups are handled above. + */ if (obj->type == child->type && (obj->type == HWLOC_OBJ_PU || obj->type == HWLOC_OBJ_NUMANODE) && obj->os_index != child->os_index) { @@ -2056,18 +2089,23 @@ hwloc_connect_children(hwloc_obj_t parent) } /* - * Check whether there is an object below ROOT that has the same type as OBJ + * Check whether there is an object below ROOT that has the same type as OBJ. + * Only used for building levels. + * Stop at I/O or Misc since these don't go into levels, and we never have + * normal objects under them. */ static int find_same_type(hwloc_obj_t root, hwloc_obj_t obj) { hwloc_obj_t child; - if (hwloc_type_cmp(root, obj) == HWLOC_TYPE_EQUAL) + if (hwloc_type_cmp(root, obj) == HWLOC_OBJ_EQUAL) return 1; for (child = root->first_child; child; child = child->next_sibling) - if (find_same_type(child, obj)) + if (!hwloc_obj_type_is_io(child->type) + && child->type != HWLOC_OBJ_MISC + && find_same_type(child, obj)) return 1; return 0; @@ -2088,7 +2126,7 @@ hwloc_level_take_objects(hwloc_obj_t top_obj, unsigned i, j; for (i = 0; i < n_current_objs; i++) - if (hwloc_type_cmp(top_obj, current_objs[i]) == HWLOC_TYPE_EQUAL) { + if (hwloc_type_cmp(top_obj, current_objs[i]) == HWLOC_OBJ_EQUAL) { /* Take it, add children. */ taken_objs[taken_i++] = current_objs[i]; for (j = 0; j < current_objs[i]->arity; j++) @@ -2158,6 +2196,12 @@ hwloc_level_filter_objects(hwloc_topology_t topology, /* count interesting objects and allocate the new array */ for(i=0, nnew=0; itype_depth[l] = HWLOC_TYPE_DEPTH_UNKNOWN; /* initialize root type depth */ topology->type_depth[topology->levels[0][0]->type] = 0; @@ -2234,17 +2278,14 @@ hwloc_connect_levels(hwloc_topology_t topology) topology->bridge_level = NULL; topology->bridge_nbobjects = 0; topology->first_bridge = topology->last_bridge = NULL; - topology->type_depth[HWLOC_OBJ_BRIDGE] = HWLOC_TYPE_DEPTH_BRIDGE; free(topology->pcidev_level); topology->pcidev_level = NULL; topology->pcidev_nbobjects = 0; topology->first_pcidev = topology->last_pcidev = NULL; - topology->type_depth[HWLOC_OBJ_PCI_DEVICE] = HWLOC_TYPE_DEPTH_PCI_DEVICE; free(topology->osdev_level); topology->osdev_level = NULL; topology->osdev_nbobjects = 0; topology->first_osdev = topology->last_osdev = NULL; - topology->type_depth[HWLOC_OBJ_OS_DEVICE] = HWLOC_TYPE_DEPTH_OS_DEVICE; /* Start with children of the whole system. */ n_objs = topology->levels[0][0]->arity; @@ -2276,7 +2317,7 @@ hwloc_connect_levels(hwloc_topology_t topology) /* See if this is actually the topmost object */ for (i = 0; i < n_objs; i++) { - if (hwloc_type_cmp(top_obj, objs[i]) != HWLOC_TYPE_EQUAL) { + if (hwloc_type_cmp(top_obj, objs[i]) != HWLOC_OBJ_EQUAL) { if (find_same_type(objs[i], top_obj)) { /* OBJS[i] is strictly above an object of the same type as TOP_OBJ, so it * is above TOP_OBJ. */ @@ -2292,7 +2333,7 @@ hwloc_connect_levels(hwloc_topology_t topology) n_taken_objs = 0; n_new_objs = 0; for (i = 0; i < n_objs; i++) - if (hwloc_type_cmp(top_obj, objs[i]) == HWLOC_TYPE_EQUAL) { + if (hwloc_type_cmp(top_obj, objs[i]) == HWLOC_OBJ_EQUAL) { n_taken_objs++; n_new_objs += objs[i]->arity; } @@ -2372,13 +2413,20 @@ hwloc_connect_levels(hwloc_topology_t topology) void hwloc_alloc_obj_cpusets(hwloc_obj_t obj) { - obj->cpuset = hwloc_bitmap_alloc_full(); - obj->complete_cpuset = hwloc_bitmap_alloc(); - obj->online_cpuset = hwloc_bitmap_alloc_full(); - obj->allowed_cpuset = hwloc_bitmap_alloc_full(); - obj->nodeset = hwloc_bitmap_alloc(); - obj->complete_nodeset = hwloc_bitmap_alloc(); - obj->allowed_nodeset = hwloc_bitmap_alloc_full(); + if (!obj->cpuset) + obj->cpuset = hwloc_bitmap_alloc_full(); + if (!obj->complete_cpuset) + obj->complete_cpuset = hwloc_bitmap_alloc(); + if (!obj->online_cpuset) + obj->online_cpuset = hwloc_bitmap_alloc_full(); + if (!obj->allowed_cpuset) + obj->allowed_cpuset = hwloc_bitmap_alloc_full(); + if (!obj->nodeset) + obj->nodeset = hwloc_bitmap_alloc(); + if (!obj->complete_nodeset) + obj->complete_nodeset = hwloc_bitmap_alloc(); + if (!obj->allowed_nodeset) + obj->allowed_nodeset = hwloc_bitmap_alloc_full(); } /* Main discovery loop */ @@ -2624,6 +2672,7 @@ void hwloc_topology_setup_defaults(struct hwloc_topology *topology) { struct hwloc_obj *root_obj; + unsigned l; /* reset support */ memset(&topology->binding_hooks, 0, sizeof(topology->binding_hooks)); @@ -2644,6 +2693,12 @@ hwloc_topology_setup_defaults(struct hwloc_topology *topology) topology->first_bridge = topology->last_bridge = NULL; topology->first_pcidev = topology->last_pcidev = NULL; topology->first_osdev = topology->last_osdev = NULL; + /* sane values to type_depth */ + for (l = HWLOC_OBJ_SYSTEM; l < HWLOC_OBJ_MISC; l++) + topology->type_depth[l] = HWLOC_TYPE_DEPTH_UNKNOWN; + topology->type_depth[HWLOC_OBJ_BRIDGE] = HWLOC_TYPE_DEPTH_BRIDGE; + topology->type_depth[HWLOC_OBJ_PCI_DEVICE] = HWLOC_TYPE_DEPTH_PCI_DEVICE; + topology->type_depth[HWLOC_OBJ_OS_DEVICE] = HWLOC_TYPE_DEPTH_OS_DEVICE; /* Create the actual machine object, but don't touch its attributes yet * since the OS backend may still change the object into something else @@ -3075,6 +3130,7 @@ hwloc__check_children(struct hwloc_obj *parent) assert(prev_firstchild < firstchild); prev_firstchild = firstchild; } + (void)prev_firstchild; // silence compiler warning } /* checks for all children */ @@ -3150,7 +3206,7 @@ hwloc_topology_check(struct hwloc_topology *topology) assert(obj->logical_index == j); /* check that all objects in the level have the same type */ if (prev) { - assert(hwloc_type_cmp(obj, prev) == HWLOC_TYPE_EQUAL); + assert(hwloc_type_cmp(obj, prev) == HWLOC_OBJ_EQUAL); assert(prev->next_cousin == obj); assert(obj->prev_cousin == prev); } diff --git a/opal/mca/hwloc/hwloc1110/hwloc/src/traversal.c b/opal/mca/hwloc/hwloc1112/hwloc/src/traversal.c similarity index 97% rename from opal/mca/hwloc/hwloc1110/hwloc/src/traversal.c rename to opal/mca/hwloc/hwloc1112/hwloc/src/traversal.c index 97b3e5b3b81..e28bba1ace3 100644 --- a/opal/mca/hwloc/hwloc1110/hwloc/src/traversal.c +++ b/opal/mca/hwloc/hwloc1112/hwloc/src/traversal.c @@ -207,7 +207,7 @@ hwloc_obj_type_of_string (const char * string) if (!strcasecmp(string, "Cache")) return HWLOC_OBJ_CACHE; if (!strcasecmp(string, "Core")) return HWLOC_OBJ_CORE; if (!strcasecmp(string, "PU")) return HWLOC_OBJ_PU; - if (!strcasecmp(string, "Bridge")) return HWLOC_OBJ_BRIDGE; + if (!strcasecmp(string, "Bridge") || !strcasecmp(string, "HostBridge") || !strcasecmp(string, "PCIBridge")) return HWLOC_OBJ_BRIDGE; if (!strcasecmp(string, "PCIDev")) return HWLOC_OBJ_PCI_DEVICE; if (!strcasecmp(string, "OSDev")) return HWLOC_OBJ_OS_DEVICE; return (hwloc_obj_type_t) -1; @@ -238,7 +238,9 @@ hwloc_obj_type_sscanf(const char *string, hwloc_obj_type_t *typep, int *depthatt type = HWLOC_OBJ_PU; } else if (!hwloc_strncasecmp(string, "misc", 2)) { type = HWLOC_OBJ_MISC; - } else if (!hwloc_strncasecmp(string, "bridge", 2)) { + } else if (!hwloc_strncasecmp(string, "bridge", 2) + || !hwloc_strncasecmp(string, "hostbridge", 6) + || !hwloc_strncasecmp(string, "pcibridge", 5)) { type = HWLOC_OBJ_BRIDGE; } else if (!hwloc_strncasecmp(string, "pci", 2)) { type = HWLOC_OBJ_PCI_DEVICE; @@ -261,7 +263,7 @@ hwloc_obj_type_sscanf(const char *string, hwloc_obj_type_t *typep, int *depthatt } } else if (!hwloc_strncasecmp(string, "group", 2)) { - int length; + size_t length; type = HWLOC_OBJ_GROUP; length = strcspn(string, "0123456789"); if (length <= 5 && !hwloc_strncasecmp(string, "group", length) @@ -499,7 +501,8 @@ hwloc_obj_type_snprintf(char * __hwloc_restrict string, size_t size, hwloc_obj_t case HWLOC_OBJ_OSDEV_GPU: return hwloc_snprintf(string, size, "GPU"); case HWLOC_OBJ_OSDEV_COPROC: return hwloc_snprintf(string, size, verbose ? "Co-Processor" : "CoProc"); default: - *string = '\0'; + if (size > 0) + *string = '\0'; return 0; } break; @@ -552,7 +555,7 @@ hwloc_obj_attr_snprintf(char * __hwloc_restrict string, size_t size, hwloc_obj_t if (ret > 0) prefix = separator; if (res >= tmplen) - res = tmplen>0 ? tmplen - 1 : 0; + res = tmplen>0 ? (int)tmplen - 1 : 0; tmp += res; tmplen -= res; @@ -626,7 +629,7 @@ hwloc_obj_attr_snprintf(char * __hwloc_restrict string, size_t size, hwloc_obj_t if (ret > 0) prefix = separator; if (res >= tmplen) - res = tmplen>0 ? tmplen - 1 : 0; + res = tmplen>0 ? (int)tmplen - 1 : 0; tmp += res; tmplen -= res; @@ -648,7 +651,7 @@ hwloc_obj_attr_snprintf(char * __hwloc_restrict string, size_t size, hwloc_obj_t return -1; ret += res; if (res >= tmplen) - res = tmplen>0 ? tmplen - 1 : 0; + res = tmplen>0 ? (int)tmplen - 1 : 0; tmp += res; tmplen -= res; if (ret > 0) diff --git a/opal/mca/hwloc/hwloc1110/hwloc/tests/README.txt b/opal/mca/hwloc/hwloc1112/hwloc/tests/README.txt similarity index 100% rename from opal/mca/hwloc/hwloc1110/hwloc/tests/README.txt rename to opal/mca/hwloc/hwloc1112/hwloc/tests/README.txt diff --git a/opal/mca/hwloc/hwloc1110/hwloc/utils/README.txt b/opal/mca/hwloc/hwloc1112/hwloc/utils/README.txt similarity index 100% rename from opal/mca/hwloc/hwloc1110/hwloc/utils/README.txt rename to opal/mca/hwloc/hwloc1112/hwloc/utils/README.txt diff --git a/opal/mca/hwloc/hwloc1112/hwloc1112.h b/opal/mca/hwloc/hwloc1112/hwloc1112.h new file mode 100644 index 00000000000..78946e55acb --- /dev/null +++ b/opal/mca/hwloc/hwloc1112/hwloc1112.h @@ -0,0 +1,48 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ +/* + * Copyright (c) 2011-2017 Cisco Systems, Inc. All rights reserved + * Copyright (c) 2014-2015 Intel, Inc. All rights reserved. + * Copyright (c) 2016 Los Alamos National Security, LLC. All rights + * reserved. + * + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + * + * When this component is used, this file is included in the rest of + * the OPAL/ORTE/OMPI code base via opal/mca/hwloc/hwloc-internal.h. As such, + * this header represents the public interface to this static component. + */ + +#ifndef MCA_OPAL_HWLOC_HWLOC1112_H +#define MCA_OPAL_HWLOC_HWLOC1112_H + +BEGIN_C_DECLS + +#include "hwloc/include/hwloc.h" + +/* If the including file requested it, also include the hwloc verbs + helper file. We can't just always include this file (even if we + know we have ) because there are some inline + functions in that file that invoke ibv_* functions. Some linkers + (e.g., Solaris Studio Compilers) will instantiate those static + inline functions even if we don't use them, and therefore we need + to be able to resolve the ibv_* symbols at link time. + + Since -libverbs is only specified in places where we use other + ibv_* functions (e.g., the OpenFabrics-based BTLs), that means that + linking random executables can/will fail (e.g., orterun). + */ +#if defined(OPAL_HWLOC_WANT_VERBS_HELPER) && OPAL_HWLOC_WANT_VERBS_HELPER +# if defined(HAVE_INFINIBAND_VERBS_H) +# include "hwloc/include/hwloc/openfabrics-verbs.h" +# else +# error Tried to include hwloc verbs helper file, but hwloc was compiled with no OpenFabrics support +# endif +#endif + +END_C_DECLS + +#endif /* MCA_OPAL_HWLOC_HWLOC1112_H */ diff --git a/opal/mca/hwloc/hwloc1112/hwloc1112_component.c b/opal/mca/hwloc/hwloc1112/hwloc1112_component.c new file mode 100644 index 00000000000..a06e5344be8 --- /dev/null +++ b/opal/mca/hwloc/hwloc1112/hwloc1112_component.c @@ -0,0 +1,55 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ +/* + * Copyright (c) 2011-2017 Cisco Systems, Inc. All rights reserved + * Copyright (c) 2014-2015 Intel, Inc. All rights reserved. + * Copyright (c) 2015-2016 Los Alamos National Security, LLC. All rights + * reserved. + * + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + * + * These symbols are in a file by themselves to provide nice linker + * semantics. Since linkers generally pull in symbols by object + * files, keeping these symbols as the only symbols in this file + * prevents utility programs such as "ompi_info" from having to import + * entire components just to query their version and parameters. + */ + +#include "opal_config.h" +#include "opal/constants.h" + +#include "opal/mca/hwloc/hwloc-internal.h" +#include "hwloc1112.h" + +/* + * Public string showing the sysinfo ompi_linux component version number + */ +const char *opal_hwloc_hwloc1112_component_version_string = + "OPAL hwloc1112 hwloc MCA component version " OPAL_VERSION; + +/* + * Instantiate the public struct with all of our public information + * and pointers to our public functions in it + */ + +const opal_hwloc_component_t mca_hwloc_hwloc1112_component = { + + /* First, the mca_component_t struct containing meta information + about the component itself */ + + .base_version = { + OPAL_HWLOC_BASE_VERSION_2_0_0, + + /* Component name and version */ + .mca_component_name = "hwloc1112", + MCA_BASE_MAKE_VERSION(component, OPAL_MAJOR_VERSION, OPAL_MINOR_VERSION, + OPAL_RELEASE_VERSION), + }, + .base_data = { + /* The component is checkpoint ready */ + MCA_BASE_METADATA_PARAM_CHECKPOINT + } +}; diff --git a/opal/mca/hwloc/hwloc1110/owner.txt b/opal/mca/hwloc/hwloc1112/owner.txt similarity index 100% rename from opal/mca/hwloc/hwloc1110/owner.txt rename to opal/mca/hwloc/hwloc1112/owner.txt diff --git a/opal/mca/if/Makefile.am b/opal/mca/if/Makefile.am index 739a11e8464..fc9aad5c47d 100644 --- a/opal/mca/if/Makefile.am +++ b/opal/mca/if/Makefile.am @@ -1,9 +1,9 @@ # -# Copyright (c) 2010 Cisco Systems, Inc. All rights reserved. +# Copyright (c) 2010 Cisco Systems, Inc. All rights reserved. # $COPYRIGHT$ -# +# # Additional copyrights may follow -# +# # $HEADER$ # diff --git a/opal/mca/if/base/Makefile.am b/opal/mca/if/base/Makefile.am index f8451f65904..65ebd44cb14 100644 --- a/opal/mca/if/base/Makefile.am +++ b/opal/mca/if/base/Makefile.am @@ -1,9 +1,9 @@ # -# Copyright (c) 2010 Cisco Systems, Inc. All rights reserved. +# Copyright (c) 2010 Cisco Systems, Inc. All rights reserved. # $COPYRIGHT$ -# +# # Additional copyrights may follow -# +# # $HEADER$ # diff --git a/opal/mca/if/base/base.h b/opal/mca/if/base/base.h index 25c004e4837..24ae771c303 100644 --- a/opal/mca/if/base/base.h +++ b/opal/mca/if/base/base.h @@ -1,9 +1,9 @@ /* * Copyright (c) 2010 Cisco Systems, Inc. All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ * */ diff --git a/opal/mca/if/base/if_base_components.c b/opal/mca/if/base/if_base_components.c index 6dde724ef9a..02da51a6c9e 100644 --- a/opal/mca/if/base/if_base_components.c +++ b/opal/mca/if/base/if_base_components.c @@ -4,9 +4,9 @@ * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ * */ @@ -64,7 +64,7 @@ static int opal_if_base_open (mca_base_open_flag_t flags) return OPAL_SUCCESS; } frameopen = true; - + /* setup the global list */ OBJ_CONSTRUCT(&opal_if_list, opal_list_t); @@ -79,7 +79,7 @@ static int opal_if_base_close(void) if (!frameopen) { return OPAL_SUCCESS; } - + while (NULL != (item = opal_list_remove_first(&opal_if_list))) { OBJ_RELEASE(item); } diff --git a/opal/mca/if/bsdx_ipv4/Makefile.am b/opal/mca/if/bsdx_ipv4/Makefile.am index 349d61b75d9..ab259799665 100644 --- a/opal/mca/if/bsdx_ipv4/Makefile.am +++ b/opal/mca/if/bsdx_ipv4/Makefile.am @@ -1,9 +1,9 @@ # -# Copyright (c) 2010 Cisco Systems, Inc. All rights reserved. +# Copyright (c) 2010 Cisco Systems, Inc. All rights reserved. # $COPYRIGHT$ -# +# # Additional copyrights may follow -# +# # $HEADER$ # diff --git a/opal/mca/if/bsdx_ipv4/configure.m4 b/opal/mca/if/bsdx_ipv4/configure.m4 index 5e90b475139..d572cc44d70 100644 --- a/opal/mca/if/bsdx_ipv4/configure.m4 +++ b/opal/mca/if/bsdx_ipv4/configure.m4 @@ -1,10 +1,12 @@ # -*- shell-script -*- # -# Copyright (c) 2010 Cisco Systems, Inc. All rights reserved. +# Copyright (c) 2010 Cisco Systems, Inc. All rights reserved. +# Copyright (c) 2015 Research Organization for Information Science +# and Technology (RIST). All rights reserved. # $COPYRIGHT$ -# +# # Additional copyrights may follow -# +# # $HEADER$ # @@ -14,7 +16,7 @@ AC_DEFUN([MCA_opal_if_bsdx_ipv4_COMPILE_MODE], [ AC_MSG_RESULT([$$4]) ]) -# MCA_if_config_CONFIG(action-if-can-compile, +# MCA_if_config_CONFIG(action-if-can-compile, # [action-if-cant-compile]) # ------------------------------------------------ AC_DEFUN([MCA_opal_if_bsdx_ipv4_CONFIG], [ @@ -30,7 +32,10 @@ AC_DEFUN([MCA_opal_if_bsdx_ipv4_CONFIG], [ AS_IF([test "$opal_found_sockaddr" = "yes"], [AC_MSG_RESULT([yes (cached)]) AC_MSG_CHECKING([NetBSD, FreeBSD, OpenBSD, or DragonFly]) - AS_IF([test "$opal_found_netbsd" = "yes" -o "$opal_found_freebsd" = "yes" -o "$opal_found_openbsd" = "yes" -o "$opal_found_dragonfly" = "yes"], + AS_IF([test "$opal_found_netbsd" = "yes" || \ + test "$opal_found_freebsd" = "yes" || \ + test "$opal_found_openbsd" = "yes" || \ + test "$opal_found_dragonfly" = "yes"], [AC_MSG_RESULT([yes]) $1], [AC_MSG_RESULT([no]) diff --git a/opal/mca/if/bsdx_ipv4/if_bsdx.c b/opal/mca/if/bsdx_ipv4/if_bsdx.c index 0e2024909d1..e67f9b7f6bd 100644 --- a/opal/mca/if/bsdx_ipv4/if_bsdx.c +++ b/opal/mca/if/bsdx_ipv4/if_bsdx.c @@ -2,9 +2,9 @@ * Copyright (c) 2010 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2010 Oracle and/or its affiliates. All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -72,7 +72,7 @@ static int if_bsdx_open(void) struct ifaddrs *cur_ifaddrs; struct sockaddr_in* sin_addr; - /* + /* * the manpage claims that getifaddrs() allocates the memory, * and freeifaddrs() is later used to release the allocated memory. * however, without this malloc the call to getifaddrs() segfaults @@ -86,7 +86,7 @@ static int if_bsdx_open(void) return OPAL_ERROR; } - for (cur_ifaddrs = *ifadd_list; NULL != cur_ifaddrs; + for (cur_ifaddrs = *ifadd_list; NULL != cur_ifaddrs; cur_ifaddrs = cur_ifaddrs->ifa_next) { opal_if_t *intf; struct in_addr a4; @@ -124,7 +124,7 @@ static int if_bsdx_open(void) /* fill values into the opal_if_t */ memcpy(&a4, &(sin_addr->sin_addr), sizeof(struct in_addr)); - + strncpy(intf->if_name, cur_ifaddrs->ifa_name, IF_NAMESIZE); intf->if_index = opal_list_get_size(&opal_if_list) + 1; ((struct sockaddr_in*) &intf->if_addr)->sin_addr = a4; @@ -134,7 +134,7 @@ static int if_bsdx_open(void) intf->if_mask = prefix( sin_addr->sin_addr.s_addr); intf->if_flags = cur_ifaddrs->ifa_flags; - intf->if_kernel_index = + intf->if_kernel_index = (uint16_t) if_nametoindex(cur_ifaddrs->ifa_name); opal_list_append(&opal_if_list, &(intf->super)); diff --git a/opal/mca/if/bsdx_ipv6/Makefile.am b/opal/mca/if/bsdx_ipv6/Makefile.am index b0ffd520679..1bcd974f668 100644 --- a/opal/mca/if/bsdx_ipv6/Makefile.am +++ b/opal/mca/if/bsdx_ipv6/Makefile.am @@ -1,9 +1,9 @@ # # Copyright (c) 2010 Cisco Systems, Inc. All rights reserved. # $COPYRIGHT$ -# +# # Additional copyrights may follow -# +# # $HEADER$ # diff --git a/opal/mca/if/bsdx_ipv6/configure.m4 b/opal/mca/if/bsdx_ipv6/configure.m4 index ffdbceb08ee..4b2122f7012 100644 --- a/opal/mca/if/bsdx_ipv6/configure.m4 +++ b/opal/mca/if/bsdx_ipv6/configure.m4 @@ -1,10 +1,12 @@ # -*- shell-script -*- # -# Copyright (c) 2010 Cisco Systems, Inc. All rights reserved. +# Copyright (c) 2010 Cisco Systems, Inc. All rights reserved. +# Copyright (c) 2015 Research Organization for Information Science +# and Technology (RIST). All rights reserved. # $COPYRIGHT$ -# +# # Additional copyrights may follow -# +# # $HEADER$ # @@ -14,7 +16,7 @@ AC_DEFUN([MCA_opal_if_bsdx_ipv6_COMPILE_MODE], [ AC_MSG_RESULT([$$4]) ]) -# MCA_if_config_CONFIG(action-if-can-compile, +# MCA_if_config_CONFIG(action-if-can-compile, # [action-if-cant-compile]) # ------------------------------------------------ AC_DEFUN([MCA_opal_if_bsdx_ipv6_CONFIG], [ @@ -25,12 +27,17 @@ AC_DEFUN([MCA_opal_if_bsdx_ipv6_CONFIG], [ # If we found struct sockaddr and we're on any of the BSDs, we're # happy. I.e., this: #if defined( __NetBSD__) || defined(__OpenBSD__) || defined(__FreeBSD__) || \ - # defined(__386BSD__) || defined(__bsdi__) || defined(__APPLE__) + # defined(__386BSD__) || defined(__bsdi__) || defined(__APPLE__) AC_MSG_CHECKING([struct sockaddr]) AS_IF([test "$opal_found_sockaddr" = "yes"], [AC_MSG_RESULT([yes (cached)]) AC_MSG_CHECKING([some flavor of BSD]) - AS_IF([test "$opal_found_netbsd" = "yes" -o "$opal_found_freebsd" = "yes" -o "$opal_found_openbsd" = "yes" -o "$opal_found_386bsd" = "yes" -o "$opal_found_bsdi" = "yes" -o "$opal_found_apple" = "yes"], + AS_IF([test "$opal_found_netbsd" = "yes" || \ + test "$opal_found_freebsd" = "yes" || \ + test "$opal_found_openbsd" = "yes" || \ + test "$opal_found_386bsd" = "yes" || \ + test "$opal_found_bsdi" = "yes" || + test "$opal_found_apple" = "yes"], [AC_MSG_RESULT([yes]) $1], [AC_MSG_RESULT([no]) diff --git a/opal/mca/if/bsdx_ipv6/if_bsdx_ipv6.c b/opal/mca/if/bsdx_ipv6/if_bsdx_ipv6.c index 2273da192e9..d6cf3d6861f 100644 --- a/opal/mca/if/bsdx_ipv6/if_bsdx_ipv6.c +++ b/opal/mca/if/bsdx_ipv6/if_bsdx_ipv6.c @@ -2,9 +2,9 @@ * Copyright (c) 2010 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2010 Oracle and/or its affiliates. All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -108,7 +108,7 @@ static int if_bsdx_ipv6_open(void) opal_output_verbose(1, opal_if_base_framework.framework_output, "searching for IPv6 interfaces"); - /* + /* * the manpage claims that getifaddrs() allocates the memory, * and freeifaddrs() is later used to release the allocated memory. * however, without this malloc the call to getifaddrs() segfaults @@ -123,7 +123,7 @@ static int if_bsdx_ipv6_open(void) return OPAL_ERROR; } - for (cur_ifaddrs = *ifadd_list; NULL != cur_ifaddrs; + for (cur_ifaddrs = *ifadd_list; NULL != cur_ifaddrs; cur_ifaddrs = cur_ifaddrs->ifa_next) { opal_if_t *intf; struct in6_addr a6; @@ -160,21 +160,21 @@ static int if_bsdx_ipv6_open(void) sin_addr = (struct sockaddr_in6 *) cur_ifaddrs->ifa_addr; - /* + /* * skip IPv6 address starting with fe80:, as this is supposed to be * link-local scope. sockaddr_in6->sin6_scope_id doesn't always work - * TODO: test whether scope id is set to a sensible value on + * TODO: test whether scope id is set to a sensible value on * linux and/or bsd (including osx) * * MacOSX: fe80::... has a scope of 0, but ifconfig -a shows - * a scope of 4 on that particular machine, + * a scope of 4 on that particular machine, * so the scope returned by getifaddrs() isn't working properly */ if ((IN6_IS_ADDR_LINKLOCAL (&sin_addr->sin6_addr))) { opal_output_verbose(1, opal_if_base_framework.framework_output, "skipping link-local ipv6 address on interface " - "%s with scope %d.\n", + "%s with scope %d.\n", cur_ifaddrs->ifa_name, sin_addr->sin6_scope_id); continue; } @@ -182,14 +182,14 @@ static int if_bsdx_ipv6_open(void) if (0 < opal_output_get_verbosity(opal_if_base_framework.framework_output)) { char *addr_name = (char *) malloc(48*sizeof(char)); inet_ntop(AF_INET6, &sin_addr->sin6_addr, addr_name, 48*sizeof(char)); - opal_output(0, "ipv6 capable interface %s discovered, address %s.\n", + opal_output(0, "ipv6 capable interface %s discovered, address %s.\n", cur_ifaddrs->ifa_name, addr_name); free(addr_name); } /* fill values into the opal_if_t */ memcpy(&a6, &(sin_addr->sin6_addr), sizeof(struct in6_addr)); - + intf = OBJ_NEW(opal_if_t); if (NULL == intf) { opal_output(0, "opal_ifinit: unable to allocate %lu bytes\n", @@ -217,7 +217,7 @@ static int if_bsdx_ipv6_open(void) * (or create our own), getifaddrs() does not contain such * data */ - intf->if_kernel_index = + intf->if_kernel_index = (uint16_t) if_nametoindex(cur_ifaddrs->ifa_name); opal_list_append(&opal_if_list, &(intf->super)); } /* of for loop over ifaddrs list */ diff --git a/opal/mca/if/if.h b/opal/mca/if/if.h index c9da6a8e12a..5b3ae793c1b 100644 --- a/opal/mca/if/if.h +++ b/opal/mca/if/if.h @@ -4,9 +4,9 @@ * Copyright (c) 2015 Los Alamos National Security, LLC. All rights * reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ diff --git a/opal/mca/if/linux_ipv6/Makefile.am b/opal/mca/if/linux_ipv6/Makefile.am index 8c7505ced7b..7449b04b1d4 100644 --- a/opal/mca/if/linux_ipv6/Makefile.am +++ b/opal/mca/if/linux_ipv6/Makefile.am @@ -1,9 +1,9 @@ # # Copyright (c) 2010 Cisco Systems, Inc. All rights reserved. # $COPYRIGHT$ -# +# # Additional copyrights may follow -# +# # $HEADER$ # diff --git a/opal/mca/if/linux_ipv6/configure.m4 b/opal/mca/if/linux_ipv6/configure.m4 index 4d53edb51a6..583d59e93fe 100644 --- a/opal/mca/if/linux_ipv6/configure.m4 +++ b/opal/mca/if/linux_ipv6/configure.m4 @@ -1,10 +1,12 @@ # -*- shell-script -*- # -# Copyright (c) 2010 Cisco Systems, Inc. All rights reserved. +# Copyright (c) 2010 Cisco Systems, Inc. All rights reserved. +# Copyright (c) 2015 Research Organization for Information Science +# and Technology (RIST). All rights reserved. # $COPYRIGHT$ -# +# # Additional copyrights may follow -# +# # $HEADER$ # @@ -14,7 +16,7 @@ AC_DEFUN([MCA_opal_if_linux_ipv6_COMPILE_MODE], [ AC_MSG_RESULT([$$4]) ]) -# MCA_if_config_CONFIG(action-if-can-compile, +# MCA_if_config_CONFIG(action-if-can-compile, # [action-if-cant-compile]) # ------------------------------------------------ AC_DEFUN([MCA_opal_if_linux_ipv6_CONFIG], [ @@ -25,7 +27,7 @@ AC_DEFUN([MCA_opal_if_linux_ipv6_CONFIG], [ AC_MSG_CHECKING([if we are on Linux with TCP]) # If we have struct sockaddr and we're on Linux, then we're # happy. - AS_IF([test "$opal_found_sockaddr" = "yes" -a "$opal_found_linux" = "yes"], + AS_IF([test "$opal_found_sockaddr" = "yes" && test "$opal_found_linux" = "yes"], [AC_MSG_RESULT([yes]) $1], [AC_MSG_RESULT([no]) diff --git a/opal/mca/if/linux_ipv6/if_linux_ipv6.c b/opal/mca/if/linux_ipv6/if_linux_ipv6.c index 5e25a960c92..eb2a8fc0920 100644 --- a/opal/mca/if/linux_ipv6/if_linux_ipv6.c +++ b/opal/mca/if/linux_ipv6/if_linux_ipv6.c @@ -2,9 +2,9 @@ * Copyright (c) 2010 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2010 Oracle and/or its affiliates. All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -155,7 +155,7 @@ static int if_linux_ipv6_open(void) } else { intf->if_flags = IFF_UP; } - + /* copy new interface information to heap and append to list */ opal_list_append(&opal_if_list, &(intf->super)); diff --git a/opal/mca/if/posix_ipv4/Makefile.am b/opal/mca/if/posix_ipv4/Makefile.am index 6e8a0842edd..0c77b658ef0 100644 --- a/opal/mca/if/posix_ipv4/Makefile.am +++ b/opal/mca/if/posix_ipv4/Makefile.am @@ -1,9 +1,9 @@ # # Copyright (c) 2010 Cisco Systems, Inc. All rights reserved. # $COPYRIGHT$ -# +# # Additional copyrights may follow -# +# # $HEADER$ # diff --git a/opal/mca/if/posix_ipv4/configure.m4 b/opal/mca/if/posix_ipv4/configure.m4 index 8ff48b5c889..73548efb985 100644 --- a/opal/mca/if/posix_ipv4/configure.m4 +++ b/opal/mca/if/posix_ipv4/configure.m4 @@ -1,10 +1,12 @@ # -*- shell-script -*- # -# Copyright (c) 2010 Cisco Systems, Inc. All rights reserved. +# Copyright (c) 2010 Cisco Systems, Inc. All rights reserved. +# Copyright (c) 2015 Research Organization for Information Science +# and Technology (RIST). All rights reserved. # $COPYRIGHT$ -# +# # Additional copyrights may follow -# +# # $HEADER$ # @@ -14,7 +16,7 @@ AC_DEFUN([MCA_opal_if_posix_ipv4_COMPILE_MODE], [ AC_MSG_RESULT([$$4]) ]) -# MCA_if_config_CONFIG(action-if-can-compile, +# MCA_if_config_CONFIG(action-if-can-compile, # [action-if-cant-compile]) # ------------------------------------------------ AC_DEFUN([MCA_opal_if_posix_ipv4_CONFIG], [ @@ -33,7 +35,7 @@ AC_DEFUN([MCA_opal_if_posix_ipv4_CONFIG], [ AS_IF([test "$opal_found_sockaddr" = "yes"], [AC_MSG_RESULT([yes (cached)]) AC_MSG_CHECKING([not NetBSD, FreeBSD, OpenBSD, or DragonFly]) - AS_IF([test "$opal_found_netbsd" = "no" -a "$opal_found_freebsd" = "no" -a "$opal_found_openbsd" = "no" -a "$opal_found_dragonfly" = "no"], + AS_IF([test "$opal_found_netbsd" = "no" && test "$opal_found_freebsd" = "no" && test "$opal_found_openbsd" = "no" && test "$opal_found_dragonfly" = "no"], [AC_MSG_RESULT([yes]) opal_if_posix_ipv4_happy=yes], [AC_MSG_RESULT([no])] diff --git a/opal/mca/if/posix_ipv4/if_posix.c b/opal/mca/if/posix_ipv4/if_posix.c index 6f755334366..c50ae35c2d2 100644 --- a/opal/mca/if/posix_ipv4/if_posix.c +++ b/opal/mca/if/posix_ipv4/if_posix.c @@ -6,9 +6,9 @@ * reserved. * Copyright (c) 2015 Intel, Inc. All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -81,19 +81,19 @@ static int if_posix_open(void) using AF_UNSPEC or AF_INET6 will cause everything to fail. */ if ((sd = socket(AF_INET, SOCK_DGRAM, 0)) < 0) { - opal_output(0, "opal_ifinit: socket() failed with errno=%d\n", + opal_output(0, "opal_ifinit: socket() failed with errno=%d\n", errno); return OPAL_ERROR; } /* - * Get Network Interface configuration + * Get Network Interface configuration * * Some notes on the behavior of ioctl(..., SIOCGIFCONF,...) * when not enough space is allocated for all the entries. * * - Solaris returns -1, errno EINVAL if there is not enough - * space + * space * - OS X returns 0, sets .ifc_len to the space used by the * by the entries that did fit. * - Linux returns 0, sets .ifc_len to the space required to @@ -112,20 +112,20 @@ static int if_posix_open(void) close(sd); return OPAL_ERROR; } - + /* initialize the memory so valgrind and purify won't * complain. Since this isn't performance critical, just * always memset. */ memset(ifconf.ifc_req, 0, ifconf.ifc_len); - + if (ioctl(sd, SIOCGIFCONF, &ifconf) < 0) { /* if we got an einval, we probably don't have enough space. so we'll fall down and try to expand our space */ if (errno != EINVAL && lastlen != 0) { opal_output(0, "opal_ifinit: ioctl(SIOCGIFCONF) \ - failed with errno=%d", + failed with errno=%d", errno); free(ifconf.ifc_req); close(sd); @@ -142,7 +142,7 @@ static int if_posix_open(void) } lastlen = ifconf.ifc_len; } - + /* Yes, we overflowed (or had an EINVAL on the ioctl). Loop back around and try again with a bigger buffer */ free(ifconf.ifc_req); @@ -153,9 +153,9 @@ static int if_posix_open(void) close(sd); return OPAL_ERR_FATAL; } - - /* - * Setup indexes + + /* + * Setup indexes */ ptr = (char*) ifconf.ifc_req; rem = ifconf.ifc_len; @@ -169,24 +169,24 @@ static int if_posix_open(void) /* compute offset for entries */ #ifdef HAVE_STRUCT_SOCKADDR_SA_LEN length = sizeof(struct sockaddr); - + if (ifr->ifr_addr.sa_len > length) { length = ifr->ifr_addr.sa_len; } - + length += sizeof(ifr->ifr_name); #else length = sizeof(struct ifreq); #endif - + rem -= length; ptr += length; - + /* see if we like this entry */ if (AF_INET != ifr->ifr_addr.sa_family) { continue; } - + if (ioctl(sd, SIOCGIFFLAGS, ifr) < 0) { opal_output(0, "opal_ifinit: ioctl(SIOCGIFFLAGS) failed with errno=%d", errno); continue; @@ -206,7 +206,7 @@ static int if_posix_open(void) continue; } #endif - + intf = OBJ_NEW(opal_if_t); if (NULL == intf) { opal_output(0, "opal_ifinit: unable to allocated %lu bytes\n", (unsigned long)sizeof(opal_if_t)); @@ -220,13 +220,13 @@ static int if_posix_open(void) memset(intf->if_name, 0, sizeof(intf->if_name)); strncpy(intf->if_name, ifr->ifr_name, sizeof(intf->if_name) - 1); intf->if_flags = ifr->ifr_flags; - + /* every new address gets its own internal if_index */ intf->if_index = opal_list_get_size(&opal_if_list)+1; opal_output_verbose(1, opal_if_base_framework.framework_output, "found interface %s", intf->if_name); - + /* assign the kernel index to distinguish different NICs */ #ifndef SIOCGIFINDEX intf->if_kernel_index = intf->if_index; @@ -244,7 +244,7 @@ static int if_posix_open(void) intf->if_kernel_index = -1; #endif #endif /* SIOCGIFINDEX */ - + /* This call returns IPv4 addresses only. Use SIOCGLIFADDR instead */ if (ioctl(sd, SIOCGIFADDR, ifr) < 0) { @@ -256,19 +256,19 @@ static int if_posix_open(void) OBJ_RELEASE(intf); continue; } - + /* based on above, we know this is an IPv4 address... */ memcpy(&intf->if_addr, &ifr->ifr_addr, sizeof(struct sockaddr_in)); - + if (ioctl(sd, SIOCGIFNETMASK, ifr) < 0) { opal_output(0, "opal_ifinit: ioctl(SIOCGIFNETMASK) failed with errno=%d", errno); OBJ_RELEASE(intf); continue; } - + /* generate CIDR and assign to netmask */ intf->if_mask = prefix(((struct sockaddr_in*) &ifr->ifr_addr)->sin_addr.s_addr); - + #if defined(SIOCGIFHWADDR) && defined(HAVE_STRUCT_IFREQ_IFR_HWADDR) /* get the MAC address */ if (ioctl(sd, SIOCGIFHWADDR, ifr) < 0) { diff --git a/opal/mca/if/solaris_ipv6/Makefile.am b/opal/mca/if/solaris_ipv6/Makefile.am index 04e6c0a336f..b8e9606c327 100644 --- a/opal/mca/if/solaris_ipv6/Makefile.am +++ b/opal/mca/if/solaris_ipv6/Makefile.am @@ -1,9 +1,9 @@ # # Copyright (c) 2010 Cisco Systems, Inc. All rights reserved. # $COPYRIGHT$ -# +# # Additional copyrights may follow -# +# # $HEADER$ # diff --git a/opal/mca/if/solaris_ipv6/configure.m4 b/opal/mca/if/solaris_ipv6/configure.m4 index 92567455bd0..eecf6f435dd 100644 --- a/opal/mca/if/solaris_ipv6/configure.m4 +++ b/opal/mca/if/solaris_ipv6/configure.m4 @@ -1,10 +1,10 @@ # -*- shell-script -*- # -# Copyright (c) 2010 Cisco Systems, Inc. All rights reserved. +# Copyright (c) 2010 Cisco Systems, Inc. All rights reserved. # $COPYRIGHT$ -# +# # Additional copyrights may follow -# +# # $HEADER$ # @@ -14,7 +14,7 @@ AC_DEFUN([MCA_opal_if_solaris_ipv6_COMPILE_MODE], [ AC_MSG_RESULT([$$4]) ]) -# MCA_if_config_CONFIG(action-if-can-compile, +# MCA_if_config_CONFIG(action-if-can-compile, # [action-if-cant-compile]) # ------------------------------------------------ AC_DEFUN([MCA_opal_if_solaris_ipv6_CONFIG], [ diff --git a/opal/mca/if/solaris_ipv6/if_solaris_ipv6.c b/opal/mca/if/solaris_ipv6/if_solaris_ipv6.c index 9bae55ad632..6f718065539 100644 --- a/opal/mca/if/solaris_ipv6/if_solaris_ipv6.c +++ b/opal/mca/if/solaris_ipv6/if_solaris_ipv6.c @@ -2,9 +2,9 @@ * Copyright (c) 2010 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2010 Oracle and/or its affiliates. All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ diff --git a/opal/mca/if/solaris_ipv6/owner.txt b/opal/mca/if/solaris_ipv6/owner.txt index ef8355c226a..6b13fdc7a25 100644 --- a/opal/mca/if/solaris_ipv6/owner.txt +++ b/opal/mca/if/solaris_ipv6/owner.txt @@ -3,5 +3,5 @@ # owner: institution that is responsible for this package # status: e.g. active, maintenance, unmaintained # -owner: ORACLE? +owner: nobody status: maintenance diff --git a/opal/mca/installdirs/Makefile.am b/opal/mca/installdirs/Makefile.am index b73f484030f..3f9fa35860e 100644 --- a/opal/mca/installdirs/Makefile.am +++ b/opal/mca/installdirs/Makefile.am @@ -1,11 +1,11 @@ # # Copyright (c) 2006 Los Alamos National Security, LLC. All rights -# reserved. +# reserved. # Copyright (c) 2010 Cisco Systems, Inc. All rights reserved. # $COPYRIGHT$ -# +# # Additional copyrights may follow -# +# # $HEADER$ # diff --git a/opal/mca/installdirs/base/Makefile.am b/opal/mca/installdirs/base/Makefile.am index 9923b7d325e..73cd71c9534 100644 --- a/opal/mca/installdirs/base/Makefile.am +++ b/opal/mca/installdirs/base/Makefile.am @@ -1,10 +1,10 @@ # # Copyright (c) 2006 Los Alamos National Security, LLC. All rights -# reserved. +# reserved. # $COPYRIGHT$ -# +# # Additional copyrights may follow -# +# # $HEADER$ # diff --git a/opal/mca/installdirs/base/base.h b/opal/mca/installdirs/base/base.h index d6e0216a0e4..94ad6fbfed9 100644 --- a/opal/mca/installdirs/base/base.h +++ b/opal/mca/installdirs/base/base.h @@ -1,12 +1,12 @@ /* * Copyright (c) 2006-2013 Los Alamos National Security, LLC. All rights - * reserved. + * reserved. * Copyright (c) 2007-2010 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2010 Sandia National Laboratories. All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ * */ diff --git a/opal/mca/installdirs/base/installdirs_base_components.c b/opal/mca/installdirs/base/installdirs_base_components.c index 4bf9f83ef24..0c10e12fb6f 100644 --- a/opal/mca/installdirs/base/installdirs_base_components.c +++ b/opal/mca/installdirs/base/installdirs_base_components.c @@ -1,14 +1,14 @@ /* * Copyright (c) 2006-2012 Los Alamos National Security, LLC. All rights - * reserved. + * reserved. * Copyright (c) 2007 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2010 Sandia National Laboratories. All rights reserved. * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ * */ @@ -47,7 +47,7 @@ opal_installdirs_base_open(mca_base_open_flag_t flags) (const opal_installdirs_base_component_t *) component_item->cli_component; /* copy over the data, if something isn't already there */ - CONDITIONAL_COPY(opal_install_dirs, component->install_dirs_data, + CONDITIONAL_COPY(opal_install_dirs, component->install_dirs_data, prefix); CONDITIONAL_COPY(opal_install_dirs, component->install_dirs_data, exec_prefix); @@ -61,62 +61,62 @@ opal_installdirs_base_open(mca_base_open_flag_t flags) datarootdir); CONDITIONAL_COPY(opal_install_dirs, component->install_dirs_data, datadir); - CONDITIONAL_COPY(opal_install_dirs, component->install_dirs_data, + CONDITIONAL_COPY(opal_install_dirs, component->install_dirs_data, sysconfdir); - CONDITIONAL_COPY(opal_install_dirs, component->install_dirs_data, + CONDITIONAL_COPY(opal_install_dirs, component->install_dirs_data, sharedstatedir); - CONDITIONAL_COPY(opal_install_dirs, component->install_dirs_data, + CONDITIONAL_COPY(opal_install_dirs, component->install_dirs_data, localstatedir); - CONDITIONAL_COPY(opal_install_dirs, component->install_dirs_data, + CONDITIONAL_COPY(opal_install_dirs, component->install_dirs_data, libdir); - CONDITIONAL_COPY(opal_install_dirs, component->install_dirs_data, + CONDITIONAL_COPY(opal_install_dirs, component->install_dirs_data, includedir); - CONDITIONAL_COPY(opal_install_dirs, component->install_dirs_data, + CONDITIONAL_COPY(opal_install_dirs, component->install_dirs_data, infodir); - CONDITIONAL_COPY(opal_install_dirs, component->install_dirs_data, + CONDITIONAL_COPY(opal_install_dirs, component->install_dirs_data, mandir); CONDITIONAL_COPY(opal_install_dirs, component->install_dirs_data, opaldatadir); - CONDITIONAL_COPY(opal_install_dirs, component->install_dirs_data, + CONDITIONAL_COPY(opal_install_dirs, component->install_dirs_data, opallibdir); - CONDITIONAL_COPY(opal_install_dirs, component->install_dirs_data, + CONDITIONAL_COPY(opal_install_dirs, component->install_dirs_data, opalincludedir); } /* expand out all the fields */ - opal_install_dirs.prefix = + opal_install_dirs.prefix = opal_install_dirs_expand_setup(opal_install_dirs.prefix); - opal_install_dirs.exec_prefix = + opal_install_dirs.exec_prefix = opal_install_dirs_expand_setup(opal_install_dirs.exec_prefix); - opal_install_dirs.bindir = + opal_install_dirs.bindir = opal_install_dirs_expand_setup(opal_install_dirs.bindir); - opal_install_dirs.sbindir = + opal_install_dirs.sbindir = opal_install_dirs_expand_setup(opal_install_dirs.sbindir); - opal_install_dirs.libexecdir = + opal_install_dirs.libexecdir = opal_install_dirs_expand_setup(opal_install_dirs.libexecdir); - opal_install_dirs.datarootdir = + opal_install_dirs.datarootdir = opal_install_dirs_expand_setup(opal_install_dirs.datarootdir); - opal_install_dirs.datadir = + opal_install_dirs.datadir = opal_install_dirs_expand_setup(opal_install_dirs.datadir); - opal_install_dirs.sysconfdir = + opal_install_dirs.sysconfdir = opal_install_dirs_expand_setup(opal_install_dirs.sysconfdir); - opal_install_dirs.sharedstatedir = + opal_install_dirs.sharedstatedir = opal_install_dirs_expand_setup(opal_install_dirs.sharedstatedir); - opal_install_dirs.localstatedir = + opal_install_dirs.localstatedir = opal_install_dirs_expand_setup(opal_install_dirs.localstatedir); - opal_install_dirs.libdir = + opal_install_dirs.libdir = opal_install_dirs_expand_setup(opal_install_dirs.libdir); - opal_install_dirs.includedir = + opal_install_dirs.includedir = opal_install_dirs_expand_setup(opal_install_dirs.includedir); - opal_install_dirs.infodir = + opal_install_dirs.infodir = opal_install_dirs_expand_setup(opal_install_dirs.infodir); - opal_install_dirs.mandir = + opal_install_dirs.mandir = opal_install_dirs_expand_setup(opal_install_dirs.mandir); - opal_install_dirs.opaldatadir = + opal_install_dirs.opaldatadir = opal_install_dirs_expand_setup(opal_install_dirs.opaldatadir); - opal_install_dirs.opallibdir = + opal_install_dirs.opallibdir = opal_install_dirs_expand_setup(opal_install_dirs.opallibdir); - opal_install_dirs.opalincludedir = + opal_install_dirs.opalincludedir = opal_install_dirs_expand_setup(opal_install_dirs.opalincludedir); #if 0 diff --git a/opal/mca/installdirs/base/installdirs_base_expand.c b/opal/mca/installdirs/base/installdirs_base_expand.c index e609b2f578d..139ec229a02 100644 --- a/opal/mca/installdirs/base/installdirs_base_expand.c +++ b/opal/mca/installdirs/base/installdirs_base_expand.c @@ -1,13 +1,13 @@ /* * Copyright (c) 2006-2007 Los Alamos National Security, LLC. All rights - * reserved. + * reserved. * Copyright (c) 2007-2010 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2007 Sun Microsystem, Inc. All rights reserved. * Copyright (c) 2010 Sandia National Laboratories. All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ * */ @@ -63,11 +63,11 @@ opal_install_dirs_expand_internal(const char* input, bool is_setup) char *destdir = NULL; size_t destdir_offset = 0; - /* This is subtle, and worth explaining. + /* This is subtle, and worth explaining. If we substitute in any ${FIELD} values, we need to prepend it with the value of the $OPAL_DESTDIR environment variable -- if - it is set. + it is set. We need to handle at least three cases properly (assume that configure was invoked with --prefix=/opt/openmpi and no other diff --git a/opal/mca/installdirs/config/Makefile.am b/opal/mca/installdirs/config/Makefile.am index e020e7230b2..2ec99d31234 100644 --- a/opal/mca/installdirs/config/Makefile.am +++ b/opal/mca/installdirs/config/Makefile.am @@ -1,13 +1,13 @@ # # Copyright (c) 2006 Los Alamos National Security, LLC. All rights -# reserved. +# reserved. # Copyright (c) 2007 Cisco Systems, Inc. All rights reserved. -# Copyright (c) 2009 High Performance Computing Center Stuttgart, +# Copyright (c) 2009 High Performance Computing Center Stuttgart, # University of Stuttgart. All rights reserved. # $COPYRIGHT$ -# +# # Additional copyrights may follow -# +# # $HEADER$ # diff --git a/opal/mca/installdirs/config/configure.m4 b/opal/mca/installdirs/config/configure.m4 index 771d993c268..bc910fdcc5d 100644 --- a/opal/mca/installdirs/config/configure.m4 +++ b/opal/mca/installdirs/config/configure.m4 @@ -1,12 +1,12 @@ # -*- shell-script -*- # # Copyright (c) 2006 Los Alamos National Security, LLC. All rights -# reserved. +# reserved. # Copyright (c) 2010 Cisco Systems, Inc. All rights reserved. # $COPYRIGHT$ -# +# # Additional copyrights may follow -# +# # $HEADER$ # @@ -19,7 +19,7 @@ AC_DEFUN([MCA_opal_installdirs_config_COMPILE_MODE], [ ]) -# MCA_installdirs_config_CONFIG(action-if-can-compile, +# MCA_installdirs_config_CONFIG(action-if-can-compile, # [action-if-cant-compile]) # ------------------------------------------------ AC_DEFUN([MCA_opal_installdirs_config_CONFIG],[ diff --git a/opal/mca/installdirs/config/install_dirs.h.in b/opal/mca/installdirs/config/install_dirs.h.in index e81785b73be..0cde2478fb4 100644 --- a/opal/mca/installdirs/config/install_dirs.h.in +++ b/opal/mca/installdirs/config/install_dirs.h.in @@ -5,16 +5,16 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2007 Los Alamos National Security, LLC. - * All rights reserved. + * All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ * * This file should be included by any file that needs the @@ -67,7 +67,7 @@ #define OPAL_DATAROOTDIR "@datarootdir@" /* The directory for installing idiosyncratic read-only - architecture-independent data files for this program. + architecture-independent data files for this program. The definition of ‘datadir’ is the same for all packages, so you should install your data in a subdirectory thereof. Most packages diff --git a/opal/mca/installdirs/config/opal_installdirs_config.c b/opal/mca/installdirs/config/opal_installdirs_config.c index 9fcdf45baaa..d2f8fa8d1de 100644 --- a/opal/mca/installdirs/config/opal_installdirs_config.c +++ b/opal/mca/installdirs/config/opal_installdirs_config.c @@ -1,10 +1,10 @@ /* * Copyright (c) 2006-2007 Los Alamos National Security, LLC. All rights - * reserved. + * reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ diff --git a/opal/mca/installdirs/configure.m4 b/opal/mca/installdirs/configure.m4 index db0e907718c..081a3dcfc67 100644 --- a/opal/mca/installdirs/configure.m4 +++ b/opal/mca/installdirs/configure.m4 @@ -2,9 +2,9 @@ dnl -*- shell-script -*- dnl dnl Copyright (c) 2006-2010 Sandia National Laboratories. All rights reserved. dnl $COPYRIGHT$ -dnl +dnl dnl Additional copyrights may follow -dnl +dnl dnl $HEADER$ dnl diff --git a/opal/mca/installdirs/env/Makefile.am b/opal/mca/installdirs/env/Makefile.am index 2cb6f739275..82f4603764e 100644 --- a/opal/mca/installdirs/env/Makefile.am +++ b/opal/mca/installdirs/env/Makefile.am @@ -1,12 +1,12 @@ # # Copyright (c) 2006 Los Alamos National Security, LLC. All rights -# reserved. -# Copyright (c) 2009 High Performance Computing Center Stuttgart, +# reserved. +# Copyright (c) 2009 High Performance Computing Center Stuttgart, # University of Stuttgart. All rights reserved. # $COPYRIGHT$ -# +# # Additional copyrights may follow -# +# # $HEADER$ # diff --git a/opal/mca/installdirs/env/configure.m4 b/opal/mca/installdirs/env/configure.m4 index 74b98205338..201b3673368 100644 --- a/opal/mca/installdirs/env/configure.m4 +++ b/opal/mca/installdirs/env/configure.m4 @@ -1,12 +1,12 @@ # -*- shell-script -*- # # Copyright (c) 2006 Los Alamos National Security, LLC. All rights -# reserved. +# reserved. # Copyright (c) 2010 Cisco Systems, Inc. All rights reserved. # $COPYRIGHT$ -# +# # Additional copyrights may follow -# +# # $HEADER$ # @@ -18,7 +18,7 @@ AC_DEFUN([MCA_opal_installdirs_env_COMPILE_MODE], [ AC_MSG_RESULT([$$4]) ]) -# MCA_installdirs_config_CONFIG(action-if-can-compile, +# MCA_installdirs_config_CONFIG(action-if-can-compile, # [action-if-cant-compile]) # ------------------------------------------------ AC_DEFUN([MCA_opal_installdirs_env_CONFIG], [ diff --git a/opal/mca/installdirs/env/opal_installdirs_env.c b/opal/mca/installdirs/env/opal_installdirs_env.c index 48978da12cc..340008e91d4 100644 --- a/opal/mca/installdirs/env/opal_installdirs_env.c +++ b/opal/mca/installdirs/env/opal_installdirs_env.c @@ -1,11 +1,11 @@ /* * Copyright (c) 2006-2007 Los Alamos National Security, LLC. All rights - * reserved. + * reserved. * Copyright (c) 2007 Cisco Systems, Inc. All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ diff --git a/opal/mca/installdirs/installdirs.h b/opal/mca/installdirs/installdirs.h index e3833d52491..7015adf31ea 100644 --- a/opal/mca/installdirs/installdirs.h +++ b/opal/mca/installdirs/installdirs.h @@ -1,11 +1,11 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ /* * Copyright (c) 2006-2015 Los Alamos National Security, LLC. All rights - * reserved. + * reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ diff --git a/opal/mca/mca.h b/opal/mca/mca.h index 0bd945c1602..7a545743653 100644 --- a/opal/mca/mca.h +++ b/opal/mca/mca.h @@ -6,7 +6,7 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. @@ -14,13 +14,13 @@ * Copyright (c) 2015 Los Alamos National Security, LLC. All rights * reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ -/** - * @file +/** + * @file * * Top-level interface for \em all MCA components. * @@ -43,7 +43,7 @@ * the change). If we need to add more space to the struct, we'll * increment the major version number. * - The MCA base component struct now has a version number in it - * (starting with Open MPI v1.3, it is 2.0.0). + * (starting with Open MPI v1.3, it is 2.0.0). * - As was an unstated assumption in prior versions of Open MPI, the * unversioned versions of struct names (both in the MCA base and in * individual framework bases) are intended for components who want @@ -53,7 +53,7 @@ * struct version name. Please note, however, the Open MPI * developers may not generally provide older versions of framework * interface structs unless they know if someone outside of the Open - * MPI community needs it. + * MPI community needs it. * * ***IF YOU NEED BACKWARDS SOURCE OR BINARY COMPATIBILITY, you must * let us know!*** @@ -124,7 +124,7 @@ typedef struct mca_base_module_2_0_0_t mca_base_module_2_0_0_t; * be unloaded from the process). * * All MCA components can have an "open" function that is invoked once - * per process, when the component is located and loaded. + * per process, when the component is located and loaded. * * This function should avoid registering MCA parameters (use the * component "register" function for that; i.e., @@ -151,7 +151,7 @@ typedef struct mca_base_module_2_0_0_t mca_base_module_2_0_0_t; */ typedef int (*mca_base_open_component_1_0_0_fn_t)(void); -/** +/** * MCA component close function. * * @retval OPAL_SUCCESS The component successfully shut down. @@ -174,7 +174,7 @@ typedef int (*mca_base_open_component_1_0_0_fn_t)(void); */ typedef int (*mca_base_close_component_1_0_0_fn_t)(void); -/** +/** * MCA component query function. * * @retval OPAL_SUCCESS The component successfully queried. @@ -199,7 +199,7 @@ typedef int (*mca_base_query_component_2_0_0_fn_t)(mca_base_module_2_0_0_t **mod * @retval OPAL_SUCCESS This component successfully registered its * parameters and can be used in this process. * @retval OPAL_ERR_BAD_PARAM Indicates that the register function - * failed because an MCA parameter got an invalid/incorrect value. + * failed because an MCA parameter got an invalid/incorrect value. * * @retval anything_else The MCA will ignore this component for the * duration of the process. @@ -266,7 +266,7 @@ typedef int (*mca_base_register_component_params_2_0_0_fn_t)(void); */ struct mca_base_component_2_1_0_t { - int mca_major_version; + int mca_major_version; /**< Major number of the MCA. */ int mca_minor_version; /**< Minor number of the MCA. */ @@ -305,7 +305,7 @@ struct mca_base_component_2_1_0_t { /**< This component's minor version number. */ int mca_component_release_version; /**< This component's release version number. */ - + mca_base_open_component_1_0_0_fn_t mca_open_component; /**< Method for opening this component. */ mca_base_close_component_1_0_0_fn_t mca_close_component; @@ -351,7 +351,7 @@ typedef struct mca_base_component_data_2_0_0_t mca_base_component_data_t; typedef struct mca_base_component_data_2_0_0_t mca_base_component_data_2_0_0_t; /** - * Macro for framework author convenience. + * Macro for framework author convenience. * * This macro is used by frameworks defining their component types, * indicating that they subscribe to the MCA version 2.0.0. See diff --git a/opal/mca/memchecker/Makefile.am b/opal/mca/memchecker/Makefile.am index 1fa8328b244..f8955b567e0 100644 --- a/opal/mca/memchecker/Makefile.am +++ b/opal/mca/memchecker/Makefile.am @@ -1,11 +1,11 @@ # -# Copyright (c) 2004-2007 High Performance Computing Center Stuttgart, +# Copyright (c) 2004-2007 High Performance Computing Center Stuttgart, # University of Stuttgart. All rights reserved. # Copyright (c) 2010 Cisco Systems, Inc. All rights reserved. # $COPYRIGHT$ -# +# # Additional copyrights may follow -# +# # $HEADER$ # diff --git a/opal/mca/memchecker/base/Makefile.am b/opal/mca/memchecker/base/Makefile.am index a5f930bfc5f..ff23d6f256a 100644 --- a/opal/mca/memchecker/base/Makefile.am +++ b/opal/mca/memchecker/base/Makefile.am @@ -1,11 +1,11 @@ # -# Copyright (c) 2004-2007 High Performance Computing Center Stuttgart, +# Copyright (c) 2004-2007 High Performance Computing Center Stuttgart, # University of Stuttgart. All rights reserved. # # $COPYRIGHT$ -# +# # Additional copyrights may follow -# +# # $HEADER$ # diff --git a/opal/mca/memchecker/base/base.h b/opal/mca/memchecker/base/base.h index fa488cb8446..a9796193413 100644 --- a/opal/mca/memchecker/base/base.h +++ b/opal/mca/memchecker/base/base.h @@ -1,10 +1,10 @@ /* - * Copyright (c) 2004-2006 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2006 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ * */ @@ -245,7 +245,7 @@ OPAL_DECLSPEC int opal_memchecker_base_set_vbits(void * p, char * vbits, size_t #if OPAL_WANT_MEMCHECKER == 0 #define opal_memchecker_base_set_vbits(p, vbits, len) #endif - + END_C_DECLS #endif /* OPAL_MEMCHECKER_BASE_H */ diff --git a/opal/mca/memchecker/base/memchecker_base_open.c b/opal/mca/memchecker/base/memchecker_base_open.c index 40eccb51408..69eabb3370b 100644 --- a/opal/mca/memchecker/base/memchecker_base_open.c +++ b/opal/mca/memchecker/base/memchecker_base_open.c @@ -1,10 +1,10 @@ /* - * Copyright (c) 2004-2007 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2007 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ diff --git a/opal/mca/memchecker/base/memchecker_base_select.c b/opal/mca/memchecker/base/memchecker_base_select.c index 37e0377ca96..3e701ed2edf 100644 --- a/opal/mca/memchecker/base/memchecker_base_select.c +++ b/opal/mca/memchecker/base/memchecker_base_select.c @@ -1,12 +1,15 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ /* - * Copyright (c) 2004-2007 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2007 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2008 The Trustees of Indiana University. * All rights reserved. + * Copyright (c) 2015 Los Alamos National Security, LLC. All rights + * reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -40,7 +43,7 @@ int opal_memchecker_base_select(void) if( OPAL_SUCCESS != mca_base_select("memchecker", opal_memchecker_base_framework.framework_output, &opal_memchecker_base_framework.framework_components, (mca_base_module_t **) &best_module, - (mca_base_component_t **) &best_component) ) { + (mca_base_component_t **) &best_component, NULL) ) { /* This will only happen if no component was selected */ exit_status = OPAL_ERR_NOT_FOUND; goto cleanup; diff --git a/opal/mca/memchecker/base/memchecker_base_wrappers.c b/opal/mca/memchecker/base/memchecker_base_wrappers.c index 4d6d23b60a0..babadb918fb 100644 --- a/opal/mca/memchecker/base/memchecker_base_wrappers.c +++ b/opal/mca/memchecker/base/memchecker_base_wrappers.c @@ -1,10 +1,10 @@ /* - * Copyright (c) 2004-2007 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2007 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ diff --git a/opal/mca/memchecker/base/owner.txt b/opal/mca/memchecker/base/owner.txt index 6bf8abd5c25..e6967790514 100644 --- a/opal/mca/memchecker/base/owner.txt +++ b/opal/mca/memchecker/base/owner.txt @@ -4,4 +4,4 @@ # status: e.g. active, maintenance, unmaintained # owner: project -status:active +status: unmaintained diff --git a/opal/mca/memchecker/configure.m4 b/opal/mca/memchecker/configure.m4 index 20335a94fc1..3ccd3786adb 100644 --- a/opal/mca/memchecker/configure.m4 +++ b/opal/mca/memchecker/configure.m4 @@ -1,12 +1,14 @@ dnl -*- shell-script -*- dnl -dnl Copyright (c) 2004-2006 High Performance Computing Center Stuttgart, +dnl Copyright (c) 2004-2006 High Performance Computing Center Stuttgart, dnl University of Stuttgart. All rights reserved. dnl Copyright (c) 2008 Cisco Systems, Inc. All rights reserved. +dnl Copyright (c) 2015 Research Organization for Information Science +dnl and Technology (RIST). All rights reserved. dnl $COPYRIGHT$ -dnl +dnl dnl Additional copyrights may follow -dnl +dnl dnl $HEADER$ dnl @@ -43,10 +45,10 @@ AC_DEFUN([MCA_opal_memchecker_CONFIG],[ # first, compile all the components MCA_CONFIGURE_FRAMEWORK($1, $2, 1) - AS_IF([test "$MCA_opal_memchecker_STATIC_COMPONENTS" != "" -o "$MCA_opal_memchecker_DSO_COMPONENTS" != ""], + AS_IF([test "$MCA_opal_memchecker_STATIC_COMPONENTS" != "" || test "$MCA_opal_memchecker_DSO_COMPONENTS" != ""], [memchecker_base_found=1], [memchecker_base_found=0]) - AS_IF([test $WANT_MEMCHECKER -eq 1 -a $memchecker_base_found -eq 0], + AS_IF([test $WANT_MEMCHECKER -eq 1 && test $memchecker_base_found -eq 0], [AC_MSG_WARN([Memchecker support requested, but no memchecker]) AC_MSG_WARN([components configured successfully. Did you]) AC_MSG_WARN([forget --with-valgrind?]) diff --git a/opal/mca/memchecker/memchecker.h b/opal/mca/memchecker/memchecker.h index 11b0d10d7a9..120b0a417c7 100644 --- a/opal/mca/memchecker/memchecker.h +++ b/opal/mca/memchecker/memchecker.h @@ -1,6 +1,6 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ /* - * Copyright (c) 2004-2007 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2007 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2008 The Trustees of Indiana University. * All rights reserved. @@ -8,9 +8,9 @@ * reserved. * * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -162,10 +162,10 @@ struct opal_memchecker_base_module_1_0_0_t { /** Module function to check for any leaks */ opal_memchecker_base_module_leakcheck_fn_t leakcheck; - + /** Module function to get vbits */ opal_memchecker_base_module_get_vbits_fn_t get_vbits; - + /** Module function to set vbits */ opal_memchecker_base_module_set_vbits_fn_t set_vbits; }; diff --git a/opal/mca/memchecker/valgrind/Makefile.am b/opal/mca/memchecker/valgrind/Makefile.am index 6bcaa05a0ec..3fa127aff09 100644 --- a/opal/mca/memchecker/valgrind/Makefile.am +++ b/opal/mca/memchecker/valgrind/Makefile.am @@ -1,11 +1,11 @@ # -# Copyright (c) 2004-2007 High Performance Computing Center Stuttgart, +# Copyright (c) 2004-2007 High Performance Computing Center Stuttgart, # University of Stuttgart. All rights reserved. # Copyright (c) 2008-2010 Cisco Systems, Inc. All rights reserved. # $COPYRIGHT$ -# +# # Additional copyrights may follow -# +# # $HEADER$ # diff --git a/opal/mca/memchecker/valgrind/configure.m4 b/opal/mca/memchecker/valgrind/configure.m4 index 6ea77fc3f08..28ddccbc533 100644 --- a/opal/mca/memchecker/valgrind/configure.m4 +++ b/opal/mca/memchecker/valgrind/configure.m4 @@ -1,12 +1,14 @@ # -*- shell-script -*- # -# Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, +# Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, # University of Stuttgart. All rights reserved. # Copyright (c) 2008-2014 Cisco Systems, Inc. All rights reserved. +# Copyright (c) 2015 Research Organization for Information Science +# and Technology (RIST). All rights reserved. # $COPYRIGHT$ -# +# # Additional copyrights may follow -# +# # $HEADER$ # @@ -35,12 +37,12 @@ AC_DEFUN([MCA_opal_memchecker_valgrind_CONFIG],[ opal_memchecker_valgrind_save_CPPFLAGS="$CPPFLAGS" opal_memchecker_valgrind_happy=no AS_IF([test "$with_valgrind" != "no"], - [AS_IF([test ! -z "$with_valgrind" -a "$with_valgrind" != "yes"], + [AS_IF([test -n "$with_valgrind" && test "$with_valgrind" != "yes"], [opal_memchecker_valgrind_CPPFLAGS="-I$with_valgrind/include" # We need this -I to stay in CPPFLAGS when we're done CPPFLAGS="$CPPFLAGS -I$with_valgrind/include" opal_memchecker_valgrind_save_CPPFLAGS=$CPPFLAGS]) - AC_CHECK_HEADERS([valgrind/valgrind.h], + AC_CHECK_HEADERS([valgrind/valgrind.h], [AC_MSG_CHECKING([for VALGRIND_CHECK_MEM_IS_ADDRESSABLE]) AC_COMPILE_IFELSE([AC_LANG_PROGRAM([[ #include "valgrind/memcheck.h" @@ -60,7 +62,7 @@ AC_DEFUN([MCA_opal_memchecker_valgrind_CONFIG],[ CPPFLAGS="$opal_memchecker_valgrind_save_CPPFLAGS" # If we specifically requested this component and can't build it, error - AS_IF([test "$with_valgrind" != "no" -a "$with_valgrind" != "" -a "$opal_memchecker_valgrind_happy" != "yes"], + AS_IF([test "$with_valgrind" != "no" && test -n "$with_valgrind" && test "$opal_memchecker_valgrind_happy" != "yes"], [AC_MSG_ERROR([Cannot continue])]) AS_IF([test "$opal_memchecker_valgrind_happy" = "yes"], diff --git a/opal/mca/memchecker/valgrind/memchecker_valgrind.h b/opal/mca/memchecker/valgrind/memchecker_valgrind.h index 3957c13da06..ed504f05d39 100644 --- a/opal/mca/memchecker/valgrind/memchecker_valgrind.h +++ b/opal/mca/memchecker/valgrind/memchecker_valgrind.h @@ -1,12 +1,12 @@ /* - * Copyright (c) 2004-2007 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2007 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2008 The Trustees of Indiana University. * All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ diff --git a/opal/mca/memchecker/valgrind/memchecker_valgrind_component.c b/opal/mca/memchecker/valgrind/memchecker_valgrind_component.c index b8a1eb25451..b06d882ec4a 100644 --- a/opal/mca/memchecker/valgrind/memchecker_valgrind_component.c +++ b/opal/mca/memchecker/valgrind/memchecker_valgrind_component.c @@ -1,6 +1,6 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ /* - * Copyright (c) 2004-2007 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2007 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2008 The Trustees of Indiana University. * All rights reserved. @@ -8,9 +8,9 @@ * Copyright (c) 2015 Los Alamos National Security, LLC. All rights * reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ diff --git a/opal/mca/memchecker/valgrind/owner.txt b/opal/mca/memchecker/valgrind/owner.txt index a0ac5fc377e..e7bcdd98ce4 100644 --- a/opal/mca/memchecker/valgrind/owner.txt +++ b/opal/mca/memchecker/valgrind/owner.txt @@ -4,4 +4,4 @@ # status: e.g. active, maintenance, unmaintained # owner: HLRS? -status: maintenance +status: unmaintained diff --git a/opal/mca/memcpy/Makefile.am b/opal/mca/memcpy/Makefile.am index b74bac9878b..51eac0bfdb7 100644 --- a/opal/mca/memcpy/Makefile.am +++ b/opal/mca/memcpy/Makefile.am @@ -4,9 +4,9 @@ # reserved. # Copyright (c) 2010 Cisco Systems, Inc. All rights reserved. # $COPYRIGHT$ -# +# # Additional copyrights may follow -# +# # $HEADER$ # @@ -16,7 +16,7 @@ libmca_memcpy_la_SOURCES = # local files headers = memcpy.h -nodist_headers = +nodist_headers = libmca_memcpy_la_SOURCES += $(headers) # Conditionally install the header files diff --git a/opal/mca/memcpy/base/Makefile.am b/opal/mca/memcpy/base/Makefile.am index 1dec1dd2fa2..19186536fac 100644 --- a/opal/mca/memcpy/base/Makefile.am +++ b/opal/mca/memcpy/base/Makefile.am @@ -3,9 +3,9 @@ # of Tennessee Research Foundation. All rights # reserved. # $COPYRIGHT$ -# +# # Additional copyrights may follow -# +# # $HEADER$ # diff --git a/opal/mca/memcpy/base/base.h b/opal/mca/memcpy/base/base.h index 157ec59c33a..7bd44254d14 100644 --- a/opal/mca/memcpy/base/base.h +++ b/opal/mca/memcpy/base/base.h @@ -3,9 +3,9 @@ * of Tennessee Research Foundation. All rights * reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ * */ diff --git a/opal/mca/memcpy/base/memcpy_base_default.h b/opal/mca/memcpy/base/memcpy_base_default.h index 07464466fb8..c567602ca43 100644 --- a/opal/mca/memcpy/base/memcpy_base_default.h +++ b/opal/mca/memcpy/base/memcpy_base_default.h @@ -3,9 +3,9 @@ * of Tennessee Research Foundation. All rights * reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ diff --git a/opal/mca/memcpy/base/memcpy_base_open.c b/opal/mca/memcpy/base/memcpy_base_open.c index 19f9f77d87d..9b737f71984 100644 --- a/opal/mca/memcpy/base/memcpy_base_open.c +++ b/opal/mca/memcpy/base/memcpy_base_open.c @@ -3,9 +3,9 @@ * of Tennessee Research Foundation. All rights * reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ diff --git a/opal/mca/memcpy/configure.m4 b/opal/mca/memcpy/configure.m4 index 1fc3814f32e..cc340de0982 100644 --- a/opal/mca/memcpy/configure.m4 +++ b/opal/mca/memcpy/configure.m4 @@ -6,9 +6,9 @@ dnl reserved. dnl Copyright (c) 2004-2006 The Regents of the University of California. dnl All rights reserved. dnl $COPYRIGHT$ -dnl +dnl dnl Additional copyrights may follow -dnl +dnl dnl $HEADER$ dnl diff --git a/opal/mca/memcpy/memcpy.h b/opal/mca/memcpy/memcpy.h index 9f7ea62910a..d8744844c4d 100644 --- a/opal/mca/memcpy/memcpy.h +++ b/opal/mca/memcpy/memcpy.h @@ -6,9 +6,9 @@ * Copyright (c) 2015 Los Alamos National Security, LLC. All rights * reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ diff --git a/opal/mca/memory/Makefile.am b/opal/mca/memory/Makefile.am index 016f224ab1b..bf6ed605612 100644 --- a/opal/mca/memory/Makefile.am +++ b/opal/mca/memory/Makefile.am @@ -5,15 +5,15 @@ # Copyright (c) 2004-2005 The University of Tennessee and The University # of Tennessee Research Foundation. All rights # reserved. -# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, # University of Stuttgart. All rights reserved. # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. # Copyright (c) 2010 Cisco Systems, Inc. All rights reserved. # $COPYRIGHT$ -# +# # Additional copyrights may follow -# +# # $HEADER$ # diff --git a/opal/mca/memory/base/Makefile.am b/opal/mca/memory/base/Makefile.am index e3d73d0ee4d..d3239de4cd8 100644 --- a/opal/mca/memory/base/Makefile.am +++ b/opal/mca/memory/base/Makefile.am @@ -5,15 +5,15 @@ # Copyright (c) 2004-2005 The University of Tennessee and The University # of Tennessee Research Foundation. All rights # reserved. -# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, # University of Stuttgart. All rights reserved. # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. # Copyright (c) 2009 Cisco Systems, Inc. All rights reserved. # $COPYRIGHT$ -# +# # Additional copyrights may follow -# +# # $HEADER$ # diff --git a/opal/mca/memory/base/base.h b/opal/mca/memory/base/base.h index 72070604d5b..63e5b568031 100644 --- a/opal/mca/memory/base/base.h +++ b/opal/mca/memory/base/base.h @@ -5,14 +5,14 @@ * Copyright (c) 2004-2006 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ * */ @@ -32,5 +32,7 @@ BEGIN_C_DECLS */ OPAL_DECLSPEC extern mca_base_framework_t opal_memory_base_framework; +OPAL_DECLSPEC void opal_memory_base_malloc_init_hook (void); + END_C_DECLS #endif /* OPAL_BASE_MEMORY_H */ diff --git a/opal/mca/memory/base/empty.h b/opal/mca/memory/base/empty.h index 5067bf2f074..46cacc56e34 100644 --- a/opal/mca/memory/base/empty.h +++ b/opal/mca/memory/base/empty.h @@ -1,9 +1,11 @@ /* - * Copyright (c) 2009 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2009 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2016 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -38,7 +40,7 @@ BEGIN_C_DECLS * * See opal/mca/memory/memory.h for a description of the parameters. */ -OPAL_DECLSPEC int opal_memory_base_component_register_empty(void *start, +OPAL_DECLSPEC int opal_memory_base_component_register_empty(void *start, size_t len, uint64_t cookie); @@ -47,10 +49,19 @@ OPAL_DECLSPEC int opal_memory_base_component_register_empty(void *start, * * See opal/mca/memory/memory.h for a description of the parameters. */ -OPAL_DECLSPEC int opal_memory_base_component_deregister_empty(void *start, +OPAL_DECLSPEC int opal_memory_base_component_deregister_empty(void *start, size_t len, uint64_t cookie); +/** + * Default (empty) implementation of the memoryc_set_alignment function + * + * See opal/mca/memory/memory.h for a description of the parameters. + */ +OPAL_DECLSPEC void opal_memory_base_component_set_alignment_empty(int use_memalign, + size_t memalign_threshold); + + END_C_DECLS #endif diff --git a/opal/mca/memory/base/memory_base_empty.c b/opal/mca/memory/base/memory_base_empty.c index 804116a47b0..c31db234873 100644 --- a/opal/mca/memory/base/memory_base_empty.c +++ b/opal/mca/memory/base/memory_base_empty.c @@ -5,14 +5,16 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. + * Copyright (c) 2016 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -22,15 +24,21 @@ #include "opal/mca/memory/base/empty.h" -int opal_memory_base_component_register_empty(void *base, size_t len, +int opal_memory_base_component_register_empty(void *base, size_t len, uint64_t cookie) { return OPAL_SUCCESS; } -int opal_memory_base_component_deregister_empty(void *base, size_t len, +int opal_memory_base_component_deregister_empty(void *base, size_t len, uint64_t cookie) { return OPAL_SUCCESS; } + +void opal_memory_base_component_set_alignment_empty(int use_memalign, + size_t memalign_threshold) +{ +} + diff --git a/opal/mca/memory/base/memory_base_open.c b/opal/mca/memory/base/memory_base_open.c index 75d3b8379f3..a777848af4b 100644 --- a/opal/mca/memory/base/memory_base_open.c +++ b/opal/mca/memory/base/memory_base_open.c @@ -6,15 +6,19 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2009 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2016 Research Organization for Information Science + * and Technology (RIST). All rights reserved. + * Copyright (c) 2016 Los Alamos National Security, LLC. All rights + * reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -41,19 +45,22 @@ static int empty_process(void) return OPAL_SUCCESS; } +static int empty_query (int *priority) +{ + *priority = 0; + return OPAL_SUCCESS; +} /* * Local variables */ static opal_memory_base_component_2_0_0_t empty_component = { - /* Don't care about the version info */ - { 0, }, - /* Don't care about the data */ - { 0, }, /* Empty / safe functions to call if no memory componet is selected */ - empty_process, - opal_memory_base_component_register_empty, - opal_memory_base_component_deregister_empty, + .memoryc_query = empty_query, + .memoryc_process = empty_process, + .memoryc_register = opal_memory_base_component_register_empty, + .memoryc_deregister = opal_memory_base_component_deregister_empty, + .memoryc_set_alignment = opal_memory_base_component_set_alignment_empty, }; @@ -63,6 +70,12 @@ static opal_memory_base_component_2_0_0_t empty_component = { opal_memory_base_component_2_0_0_t *opal_memory = &empty_component; +void opal_memory_base_malloc_init_hook (void) +{ + if (opal_memory->memoryc_init_hook) { + opal_memory->memoryc_init_hook (); + } +} /* * Function for finding and opening either all MCA components, or the one @@ -70,23 +83,37 @@ opal_memory_base_component_2_0_0_t *opal_memory = &empty_component; */ static int opal_memory_base_open(mca_base_open_flag_t flags) { + mca_base_component_list_item_t *item, *next; + opal_memory_base_component_2_0_0_t *tmp; + int priority, highest_priority = 0; int ret; - /* Open up all available components */ + /* can only be zero or one */ + OPAL_LIST_FOREACH(item, &opal_memory_base_framework.framework_components, mca_base_component_list_item_t) { + tmp = (opal_memory_base_component_2_0_0_t *) item->cli_component; + + ret = tmp->memoryc_query (&priority); + if (OPAL_SUCCESS != ret || priority < highest_priority) { + continue; + } + + highest_priority = priority; + opal_memory = tmp; + } + + OPAL_LIST_FOREACH_SAFE(item, next, &opal_memory_base_framework.framework_components, mca_base_component_list_item_t) { + if ((void *) opal_memory != (void *) item->cli_component) { + mca_base_component_unload (item->cli_component, opal_memory_base_framework.framework_output); + opal_list_remove_item (&opal_memory_base_framework.framework_components, &item->super); + } + } + + /* open remaining component */ ret = mca_base_framework_components_open (&opal_memory_base_framework, flags); if (ret != OPAL_SUCCESS) { return ret; } - /* can only be zero or one */ - if (opal_list_get_size(&opal_memory_base_framework.framework_components) == 1) { - mca_base_component_list_item_t *item; - item = (mca_base_component_list_item_t*) - opal_list_get_first(&opal_memory_base_framework.framework_components); - opal_memory = (opal_memory_base_component_2_0_0_t*) - item->cli_component; - } - /* All done */ return OPAL_SUCCESS; } diff --git a/opal/mca/memory/configure.m4 b/opal/mca/memory/configure.m4 index 7962c3c2253..828a072d6e0 100644 --- a/opal/mca/memory/configure.m4 +++ b/opal/mca/memory/configure.m4 @@ -6,15 +6,15 @@ dnl Corporation. All rights reserved. dnl Copyright (c) 2004-2005 The University of Tennessee and The University dnl of Tennessee Research Foundation. All rights dnl reserved. -dnl Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +dnl Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, dnl University of Stuttgart. All rights reserved. dnl Copyright (c) 2004-2005 The Regents of the University of California. dnl All rights reserved. dnl Copyright (c) 2009 Cisco Systems, Inc. All rights reserved. dnl $COPYRIGHT$ -dnl +dnl dnl Additional copyrights may follow -dnl +dnl dnl $HEADER$ dnl diff --git a/opal/mca/memory/linux/COPYRIGHT-ptmalloc2.txt b/opal/mca/memory/linux/COPYRIGHT-ptmalloc2.txt deleted file mode 100644 index 4615c2637cc..00000000000 --- a/opal/mca/memory/linux/COPYRIGHT-ptmalloc2.txt +++ /dev/null @@ -1,19 +0,0 @@ -Copyright (c) 2001-2004 Wolfram Gloger - -Permission to use, copy, modify, distribute, and sell this software -and its documentation for any purpose is hereby granted without fee, -provided that (i) the above copyright notices and this permission -notice appear in all copies of the software and related documentation, -and (ii) the name of Wolfram Gloger may not be used in any advertising -or publicity relating to the software. - -THE SOFTWARE IS PROVIDED "AS-IS" AND WITHOUT WARRANTY OF ANY KIND, -EXPRESS, IMPLIED OR OTHERWISE, INCLUDING WITHOUT LIMITATION, ANY -WARRANTY OF MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE. - -IN NO EVENT SHALL WOLFRAM GLOGER BE LIABLE FOR ANY SPECIAL, -INCIDENTAL, INDIRECT OR CONSEQUENTIAL DAMAGES OF ANY KIND, OR ANY -DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, -WHETHER OR NOT ADVISED OF THE POSSIBILITY OF DAMAGE, AND ON ANY THEORY -OF LIABILITY, ARISING OUT OF OR IN CONNECTION WITH THE USE OR -PERFORMANCE OF THIS SOFTWARE. diff --git a/opal/mca/memory/linux/ChangeLog-ptmalloc2.txt b/opal/mca/memory/linux/ChangeLog-ptmalloc2.txt deleted file mode 100644 index b4db8b65a64..00000000000 --- a/opal/mca/memory/linux/ChangeLog-ptmalloc2.txt +++ /dev/null @@ -1,181 +0,0 @@ -2004-11-05 Wolfram Gloger - - * malloc/hooks.c (malloc_starter, memalign_starter): Call - ptmalloc_init_minimal(). - -2004-11-04 Wolfram Gloger - - * malloc/malloc.c (USE_STARTER): New macro. - * malloc/hooks.c: Use USE_STARTER. - * malloc/arena.c: Use USE_STARTER. - -2004-08-13 Ulrich Drepper - - * malloc/malloc.c: Use strong_alias instead of weak_alias wherever - possible. - -2002-12-06 Roland McGrath - - * malloc/arena.c (ptmalloc_init_minimal): New function, broken out - of ptmalloc_init. - -2002-08-23 Roland McGrath - - * malloc/hooks.c (__malloc_initialize_hook, __free_hook, - __malloc_hook, __realloc_hook, __memalign_hook, - __after_morecore_hook): Variable definitions moved to ... - * malloc/malloc.c: ... here, so as to be before all references. - -2004-10-19 Wolfram Gloger - - * malloc/hooks.c (mem2chunk_check, top_check): Handle - non-contiguous arena. Reported by Michael Dalton - [BZ #457]. Add further checks for top - chunk. - -2004-08-08 Wolfram Gloger - - * include/malloc.h (mstate): Move type declaration from here... - * malloc/malloc.h: ...to here. - (struct malloc_arena_info, struct malloc_global_info): New types. - (_int_get_arena, _int_get_arena_info, _int_get_global_info): New - functions. - * malloc/malloc.c (mSTATS, public_mSTATs, mALLINFo): Remove. - (_int_get_arena_info, _int_get_global_info): New functions. - * malloc/arena.c (_int_get_arena): New function. - * malloc/malloc-stats.c: New file. - * malloc/tst-mstats.c: New file. - * malloc/Makefile (tests): Add tst-mstats. - (distribute): Remove no-longer existing thread-m.h. - (dist-routines): Add malloc-stats. - * malloc/Versions: Add _int_get_arena, _int_get_arena_info, - _int_get_global_info. - -2004-07-25 Wolfram Gloger - - * sysdeps/generic/thread-st.h: New file. - * sysdeps/pthread/thread-st.h: New file. - * sysdeps/sproc/thread-st.h: New file. - * sysdeps/solaris/thread-st.h: New file. - * thread-st.h: Removed. - -2004-03-18 Ulrich Drepper - - * malloc/malloc.c (__posix_memalign): Correct alignment check. - Reported by Don Heller . - -2003-12-17 Jakub Jelinek - - * malloc/malloc.c (__posix_memalign): If __memalign_hook != NULL, - call it directly instead of memalign_internal. - -2003-09-27 Wolfram Gloger - - * malloc/malloc.c: Include earlier instead of - "thread-m.h", so that default parameters can be overridden in a - system-specific malloc-machine.h. Remove extra ; from extern "C" - closing brace. - * sysdeps/generic/malloc-machine.h: New file. - * malloc/thread-m.h: Removed. - -2003-09-08 Wolfram Gloger - - * malloc/malloc.c (sYSMALLOc): Move foreign sbrk accounting into - contiguous case. Bug report from Prem Gopalan - . - -2003-08-18 Art Haas - - * malloc/malloc.h: Remove unneeded ';' where closing the C++ - extern block. - -2003-06-18 Ulrich Drepper - - * malloc/malloc.c (public_mALLINFo): Initialize malloc if it - hasn't happened yet. - -2003-05-28 Roland McGrath - - * malloc/malloc.h [! __GNUC__] (__const): Define if undefined. - -2003-05-04 H.J. Lu - - * malloc/arena.c (arena_get2): Add atomic_write_barrier. - * malloc/thread-m.h: Include . - (atomic_full_barrier): Provide default. - (atomic_read_barrier): Likewise. - (atomic_write_barrier): Likewise. - -2003-05-01 Ulrich Drepper - - * malloc/malloc.c (mSTATs): Call ptmalloc_init if necessary. - -2003-01-27 Wolfram Gloger - - * malloc/hooks.c (mem2chunk_check): Check alignment of mem - pointer, not of the computed chunk. Bug report from Carlos - O'Donell . - -2002-12-27 Jakub Jelinek - - * malloc/arena.c (ptmalloc_init): Don't call next_env_entry if - _environ is NULL. - -2002-12-17 Ulrich Drepper - - * malloc/malloc.c (mALLOPt): Make sure malloc is initialized. - -2002-12-06 Roland McGrath - - * malloc/hooks.c [_LIBC && (USE___THREAD || (USE_TLS && !SHARED))] - (malloc_starter, memalign_starter, free_starter): Don't define these. - - * malloc/hooks.c (memalign_starter): New function. - * malloc/malloc.c: Declare it. - * malloc/arena.c (save_memalign_hook): New variable. - (ptmalloc_init): Set __memalign_hook to memalign_starter. - -2002-11-18 Wolfram Gloger - - * malloc/arena.c - (ptmalloc_lock_all, ptmalloc_unlock_all, ptmalloc_unlock_all2): Do - nothing if not initialized. Bug report from Marcus Brinkmann - . - -2002-10-07 Wolfram Gloger - - * malloc/malloc.c (sYSMALLOc): Only check for breakage due - to foreign sbrk()'s if arena is contiguous. Bug report from - Bruno Haible . - -2002-07-11 Wolfram Gloger - - * malloc/hooks.c: typo fix in NO_THREADS case, realloc_check - fix in HAVE_MREMAP case. - -2002-06-11 Wolfram Gloger - - * malloc/malloc.c: Fix error path when new_heap() returns NULL. - Reported by Michael Meissner . - -2002-03-29 Wolfram Gloger - - * malloc/malloc.c: Add short description and prototypes for - malloc_get_state, malloc_set_state and posix_memalign, for - consistency and to avoid warnings with -Wstrict-prototypes. - Reported by Andreas Jaeger . - -2002-03-13 Wolfram Gloger - - * malloc/malloc.c (sYSMALLOc): Don't change brk if mmap - failed. - -2002-01-18 Wolfram Gloger - - * malloc/malloc.c: Rewrite, adapted from Doug Lea's malloc-2.7.0.c. - * malloc/malloc.h: Likewise. - * malloc/arena.c: New file. - * malloc/hooks.c: New file. - * malloc/tst-mallocstate.c: New file. - * malloc/Makefile: Add new testcase tst-mallocstate. - Add arena.c and hooks.c to distribute. Fix commented CPPFLAGS. diff --git a/opal/mca/memory/linux/Makefile.am b/opal/mca/memory/linux/Makefile.am deleted file mode 100644 index 2d81d81d431..00000000000 --- a/opal/mca/memory/linux/Makefile.am +++ /dev/null @@ -1,92 +0,0 @@ -# -# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana -# University Research and Technology -# Corporation. All rights reserved. -# Copyright (c) 2004-2005 The University of Tennessee and The University -# of Tennessee Research Foundation. All rights -# reserved. -# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, -# University of Stuttgart. All rights reserved. -# Copyright (c) 2004-2005 The Regents of the University of California. -# All rights reserved. -# Copyright (c) 2009-2010 Cisco Systems, Inc. All rights reserved. -# Copyright (c) 2015 Research Organization for Information Science -# and Technology (RIST). All rights reserved. -# $COPYRIGHT$ -# -# Additional copyrights may follow -# -# $HEADER$ -# - -AM_CPPFLAGS = -DMALLOC_DEBUG=0 - -AM_CPPFLAGS += \ - -D_GNU_SOURCE=1 \ - -DMALLOC_HOOKS=1 \ - -I$(srcdir)/sysdeps/pthread -# this must come *after* the threads -Is -AM_CPPFLAGS += -I$(srcdir)/sysdeps/generic - -# ptmalloc requires us to include the copyright notice in the -# software. So install it in the same place that we install ROMIO's -# copyright notices. - -docdir = $(opaldatadir)/doc -doc_DATA = COPYRIGHT-ptmalloc2.txt - -# Help file -dist_opaldata_DATA = help-opal-memory-linux.txt - -# This component is only ever built statically (i.e., slurped into -# libopen-pal) -- it is never built as a DSO. -noinst_LTLIBRARIES = libmca_memory_linux.la -libmca_memory_linux_la_SOURCES = \ - memory_linux.h \ - memory_linux_component.c -libmca_memory_linux_la_LDFLAGS = \ - -module -avoid-version $(memory_linux_LDFLAGS) -libmca_memory_linux_la_LIBADD = $(memory_linux_LIBS) - -# Do we have ptmalloc2 support? -if MEMORY_LINUX_PTMALLOC2 -libmca_memory_linux_la_SOURCES += \ - memory_linux_ptmalloc2.c \ - memory_linux_munmap.c \ - rename.h \ - malloc.c \ - malloc-stats.c \ - malloc.h -endif - -# Do we have ummunotify support? -if MEMORY_LINUX_UMMUNOTIFY -libmca_memory_linux_la_SOURCES += memory_linux_ummunotify.c public.h -endif - -# these are included directly and shouldn't be built solo -EXTRA_libmca_memory_linux_la_SOURCES = \ - arena.c \ - hooks.c - -EXTRA_DIST = \ - README-open-mpi.txt \ - README-ptmalloc2.txt \ - ChangeLog-ptmalloc2.txt \ - COPYRIGHT-ptmalloc2.txt \ - lran2.h \ - t-test.h \ - t-test1.c \ - t-test2.c \ - tst-mallocstate.c \ - tst-mstats.c \ - sysdeps/sproc/malloc-machine.h \ - sysdeps/sproc/thread-st.h \ - sysdeps/pthread/malloc-machine.h \ - sysdeps/pthread/thread-st.h \ - sysdeps/solaris/malloc-machine.h \ - sysdeps/solaris/thread-st.h \ - sysdeps/generic/malloc-machine.h \ - sysdeps/generic/thread-st.h \ - sysdeps/generic/atomic.h \ - $(doc_DATA) diff --git a/opal/mca/memory/linux/README-open-mpi.txt b/opal/mca/memory/linux/README-open-mpi.txt deleted file mode 100644 index 5f83d373914..00000000000 --- a/opal/mca/memory/linux/README-open-mpi.txt +++ /dev/null @@ -1,161 +0,0 @@ -30 March 2009 - -This file documents Open MPI's usage of ptmalloc2. This is perhaps -our 7,208,499th iteration of ptmalloc2 support, so let's document it -here so that some future developer might spend *slightly* less time -understanding what the heck is going on. - -See glibc documentation about malloc hooks before continuing. This is -pretty much required reading before reading the rest of this file / -having a hope of understanding what's going on here: - - http://www.gnu.org/software/libc/manual/html_mono/libc.html#Hooks-for-Malloc - -The overall goal is that we're using the Linux glibc hooks to wholly -replace the underlying allocator. We *used* to use horrid linker -tricks to interpose OMPI's ptmalloc2 symbols with the glibc ones -- -meaning that user apps would call our symbols and not the glibc ones. -But that scheme is fraught with problems, not the least of which is -that *all* MPI applications will be forced to use our overridden -allocator (not just the ones that need it, such as the ones running on -OpenFabrics-based networks). Instead, what we do here is, frankly, -quite similar to what is done in MX: we use the 4 glibc hooks to -assert our own malloc, realloc, free, and memalign functions. This -allows the decision as to whether to use this internal ptmalloc2 -allocate to be a run-time decision. This is quite important; using -this internal allocator has both benefits (allowing using -mpi_leave_pinned=1 behavior) and drawbacks (breaking some debuggers, -being unnecessary for non-OpenFabrics-based networks, etc.). - -Here's how it works... - -This component *must* be linked statically as part of libopen-pal; it -*cannot* be a DSO. Specifically, this library must be present during -pre-main() initialization phases so that its __malloc_initialize_hook -can be found and executed. Loading it as a DSO during MPI_INIT is far -too late. In configure.m4, we define the M4 macro -MCA_memory_ptmalloc2_COMPILE_MODE to always compile this component in -static mode. Yay flexible build system. - -This component provides an munmap() function that will intercept calls -to munmap() and do the Right Thing. That is fairly straightforward to -do. Intercepting the malloc/free/etc. allocator is much more -complicated. - -All the ptmalloc2 public symbols in this component have been name -shifted via the rename.h file. Hence, what used to be "malloc" is now -opal_memory_ptmalloc2_malloc. Since all the public symbols are -name-shifted, we can safely link this component in all MPI -applications. Specifically: just because this ptmalloc2 allocator is -present in all OMPI executables and user-level applications, it won't -necessarily be used -- it's a separate/run-time decision as to whether -it will be used. - -We set the __malloc_initialize_hook variable to point to -opal_memory_ptmalloc2_malloc_init_hook (in hooks.c). This function is -called by the underlying glibc allocator before any allocations occur -and before the memory allocation subsystem is setup. As such, this -function is *extremely* restricted in what it can do. It cannot call -any form of malloc, for example (which seems fairly obvious, but it's -worth mentioning :-) ). This function is one of the determining -steps as to whether we'll use the internal ptmalloc2 allocator or -not. Several checks are performed: - -- Was either the MCA params mpi_leave_pinned or - mpi_leave_pinned_pipeline set? -- Is a driver found to be active indicating that an OS-bypass network - is in effect (OpenFabrics, MX, Open-MX, ...etc.) -- Was an environment variable set indicating that we want to disable - this component? - -If the $OMPI_MCA_memory_ptmalloc2_disable or the $FAKEROOTKEY env -variables are set, we don't enable the memory hooks. - -We then use the following matrix to determine whether to enable the -memory hooks or not (explanation of the matrix is below): - - lp / lpp yes no runtime not found - yes yes yes yes yes - no yes no no no - runtime yes no runtime runtime - not found yes no runtime runtime - -lp = leave_pinned (the rows), lpp = leave_pinned_pipeline (the columns) -yes = found that variable to be set to "yes" (i.e., 1) -no = found that variable to be set to "no" (i.e., 0) -runtime = found that variable to be set to "determine at runtime" (i.e., -1) -not found = that variable was not set at all - -Hence, if we end up on a "yes" block in the matrix, we enable the -hooks. If we end up in a "no" block in the matrix, we disable the -hooks. If we end up in a "runtime" block in the matrix, then we -enable the hooks *if* we can find indications that an OS bypass -network is present and available for use (e.g., OpenFabrics, MX, -Open-MX, ...etc.). - -To be clear: sometime during process startup, this function will -definitely be called. It will either set the 4 hook functions to -point to our name-shifted ptmalloc2 functions, or it won't. If the 4 -hook functions are set, then the underlying glibc allocator will -always call our 4 functions in all the relevant places instead of -calling its own functions. Specifically: the process is calling the -underlying glibc allocator, but that underlying glibc allocator will -make function pointer callbacks to our name-shifted ptmalloc2 -functions to actually do the work. - -Note that because we know our ptmalloc will not be providing all 5 -hook variables (because we want to use the underlying glibc hook -variables), they are #if 0'ed out in our malloc.c. This has the -direct consequence that the *_hook_ini() in hooks.c are never used. -So to avoid compiler/linker warnings, I #if 0'ed those out as well. - -All the public functions in malloc.c that call hook functions were -modified to #if 0 the hook function invocations. After all, that's -something that we want the *underlying* glibc allocator to do -- but -we are putting these functions as the hooks, so we don't want to -invoke ourselves in an infinite loop! - -The next thing that happens in the startup sequence is that the -ptmalloc2 memory component's "open" function is called during -MPI_INIT. But we need to test to see if the glibc memory hooks have -been overridden before MPI_INIT was invoked. If so, we need to signal -that our allocator support may not be complete. - -Patrick Geoffray/MX suggests a simple test: malloc() 4MB and then free -it. Watch to see if our name-shifted ptmalloc2 free() function was -invoked. If it was, then all of our hooks are probably in place and -we can proceed. If not, then set flags indicating that this memory -allocator only supports MUNMAP (not FREE/CHUNK). - -We actually perform this test for malloc, realloc, and memalign. If -they all pass, then we say that the memory allocator supports -everything. If any of them fail, then we say that the memory -allocator does not support FREE/CHUNK. - -NOTE: we *used* to simply set the FREE/CHUNK support flags during our -ptmalloc2's internal ptmalloc_init() function. This is not a good -idea becaus even after our ptmalloc_init() function has been invoked, -someone may come in an override our memory hooks. Doing tests during -the ptmalloc2 memory component's open function seems to be the safest -way to test whether we *actually* support FREE/CHUNK (this is what MX -does, too). - -As stated above, we always intercept munmap() -- this is acceptable in -all environments. But we test that, too, just to be sure that the -munmap intercept is working. If we verify that it is working -properly, then we set that we have MUNMAP support. - -Much later in the init sequence during MPI_INIT, components indicate -whether they want to use mpi_leave_pinned[_pipeline] support or not. -For example, the openib BTL queries the opal_mem_hooks_support_level() -function to see if FREE and MUNMAP are supported. If they are, then -the openib BTL sets mpi_leave_pinned = 1. - -Finally, the mpool base does a final check. If -mpi_leave_pinned[_pipeline] is set to 1 and/or use_mem_hooks is set, -if FREE/MUNMAP are not set in the supported flags, then a warning is -printed. Otherwise, life continues (assumedly using -mpi_leave_pinned[_pipeline] support). - -Simple, right? - diff --git a/opal/mca/memory/linux/README-ptmalloc2.txt b/opal/mca/memory/linux/README-ptmalloc2.txt deleted file mode 100644 index fed69951346..00000000000 --- a/opal/mca/memory/linux/README-ptmalloc2.txt +++ /dev/null @@ -1,192 +0,0 @@ -ptmalloc2 - a multi-thread malloc implementation -================================================ - -Wolfram Gloger (wg@malloc.de) - -Nov 2004 - - -Introduction -============ - -This package is a modified version of Doug Lea's malloc-2.7.1pre -implementation (available seperately from ftp://g.oswego.edu/pub/misc) -that I adapted for multiple threads, while trying to avoid lock -contention as much as possible. Many thanks should go to Doug Lea -(dl@cs.oswego.edu) for the great original malloc implementation. - -As part of the GNU C library, the source files are available under the -GNU Library General Public License (see the comments in the files). -But as part of this stand-alone package, the code is also available -under the (probably less restrictive) conditions described in the file -'COPYRIGHT'. In any case, there is no warranty whatsoever for this -package. - -The current distribution should be available from: - -http://www.malloc.de/malloc/ptmalloc2.tar.gz - - -Compilation -=========== - -It should be possible to build ptmalloc2 on any UN*X-like system that -implements the sbrk(), mmap(), munmap() and mprotect() calls. If -mmap() is not available, it is only possible to produce a -non-threadsafe implementation. Since there are now several source -files, a library (libmalloc.a) is generated. See the Makefile for -examples of the compile-time options. - -Note that support for non-ANSI compilers is no longer a significant -goal. - -Several example targets are provided in the Makefile: - - o Posix threads (pthreads), compile with "make posix" - - o Posix threads with explicit initialization, compile with - "make posix-explicit" (known to be required on HPUX) - - o Posix threads without "tsd data hack" (see below), compile with - "make posix-with-tsd" - - o Solaris threads, compile with "make solaris" - - o SGI sproc() threads, compile with "make sproc" - - o no threads, compile with "make nothreads" - -For Linux: - - o make "linux-pthread" (almost the same as "make posix") - -Note that some compilers need special flags for multi-threaded code, -e.g. with Solaris cc with Posix threads, one should use: - -% make posix SYS_FLAGS='-mt' - -Some additional targets, ending in `-libc', are also provided in the -Makefile, to compare performance of the test programs to the case when -linking with the standard malloc implementation in libc. - -A potential problem remains: If any of the system-specific functions -for getting/setting thread-specific data or for locking a mutex call -one of the malloc-related functions internally, the implementation -cannot work at all due to infinite recursion. One example seems to be -Solaris 2.4. I would like to hear if this problem occurs on other -systems, and whether similar workarounds could be applied. - -For Posix threads, too, an optional hack like that has been integrated -(activated when defining USE_TSD_DATA_HACK) which depends on -`pthread_t' being convertible to an integral type (which is of course -not generally guaranteed). USE_TSD_DATA_HACK is now the default -because I haven't yet found a non-glibc pthreads system where this -hack is _not_ needed. - -*NEW* and _important_: In (currently) one place in the ptmalloc2 -source, a write memory barrier is needed, named -atomic_write_barrier(). This macro needs to be defined at the end of -malloc-machine.h. For gcc, a fallback in the form of a full memory -barrier is already defined, but you may need to add another definition -if you don't use gcc. - -Usage -===== - -Just link libmalloc.a into your application. - -Some wicked systems (e.g. HPUX apparently) won't let malloc call _any_ -thread-related functions before main(). On these systems, -USE_STARTER=2 must be defined during compilation (see "make -posix-explicit" above) and the global initialization function -ptmalloc_init() must be called explitly, preferably at the start of -main(). - -Otherwise, when using ptmalloc2, no special precautions are necessary. - -Link order is important -======================= - -On some systems, when overriding malloc and linking against shared -libraries, the link order becomes very important. E.g., when linking -C++ programs on Solaris, don't rely on libC being included by default, -but instead put `-lthread' behind `-lC' on the command line: - - CC ... libmalloc.a -lC -lthread - -This is because there are global constructors in libC that need -malloc/ptmalloc, which in turn needs to have the thread library to be -already initialized. - -Debugging hooks -=============== - -All calls to malloc(), realloc(), free() and memalign() are routed -through the global function pointers __malloc_hook, __realloc_hook, -__free_hook and __memalign_hook if they are not NULL (see the malloc.h -header file for declarations of these pointers). Therefore the malloc -implementation can be changed at runtime, if care is taken not to call -free() or realloc() on pointers obtained with a different -implementation than the one currently in effect. (The easiest way to -guarantee this is to set up the hooks before any malloc call, e.g. -with a function pointed to by the global variable -__malloc_initialize_hook). - -A useful application of the hooks is built-in into ptmalloc2: The -implementation is usually very unforgiving with respect to misuse, -such as free()ing a pointer twice or free()ing a pointer not obtained -with malloc() (these will typically crash the application -immediately). To debug in such situations, you can set the -environment variable `MALLOC_CHECK_' (note the trailing underscore). -Performance will suffer somewhat, but you will get more controlled -behaviour in the case of misuse. If MALLOC_CHECK_=0, wrong free()s -will be silently ignored, if MALLOC_CHECK_=1, diagnostics will be -printed on stderr, and if MALLOC_CHECK_=2, abort() will be called on -any error. - -You can now also tune other malloc parameters (normally adjused via -mallopt() calls from the application) with environment variables: - - MALLOC_TRIM_THRESHOLD_ for deciding to shrink the heap (in bytes) - - MALLOC_TOP_PAD_ how much extra memory to allocate on - each system call (in bytes) - - MALLOC_MMAP_THRESHOLD_ min. size for chunks allocated via - mmap() (in bytes) - - MALLOC_MMAP_MAX_ max. number of mmapped regions to use - -Tests -===== - -Two testing applications, t-test1 and t-test2, are included in this -source distribution. Both perform pseudo-random sequences of -allocations/frees, and can be given numeric arguments (all arguments -are optional): - -% t-test[12] - - n-total = total number of threads executed (default 10) - n-parallel = number of threads running in parallel (2) - n-allocs = number of malloc()'s / free()'s per thread (10000) - size-max = max. size requested with malloc() in bytes (10000) - bins = number of bins to maintain - -The first test `t-test1' maintains a completely seperate pool of -allocated bins for each thread, and should therefore show full -parallelism. On the other hand, `t-test2' creates only a single pool -of bins, and each thread randomly allocates/frees any bin. Some lock -contention is to be expected in this case, as the threads frequently -cross each others arena. - -Performance results from t-test1 should be quite repeatable, while the -behaviour of t-test2 depends on scheduling variations. - -Conclusion -========== - -I'm always interested in performance data and feedback, just send mail -to ptmalloc@malloc.de. - -Good luck! diff --git a/opal/mca/memory/linux/arena.c b/opal/mca/memory/linux/arena.c deleted file mode 100644 index ee8d652dd94..00000000000 --- a/opal/mca/memory/linux/arena.c +++ /dev/null @@ -1,805 +0,0 @@ -/* Malloc implementation for multiple threads without lock contention. - Copyright (C) 2001 Free Software Foundation, Inc. - This file is part of the GNU C Library. - Contributed by Wolfram Gloger , 2001. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Library General Public License as - published by the Free Software Foundation; either version 2 of the - License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Library General Public License for more details. - - You should have received a copy of the GNU Library General Public - License along with the GNU C Library; see the file COPYING.LIB. If not, - write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, - Boston, MA 02111-1307, USA. */ - -/* $Id: arena.c,v 1.9 2004/11/05 14:42:23 wg Exp $ */ - -/* Compile-time constants. */ - -#define HEAP_MIN_SIZE (32*1024) -#ifndef HEAP_MAX_SIZE -#define HEAP_MAX_SIZE (1024*1024) /* must be a power of two */ -#endif - -/* HEAP_MIN_SIZE and HEAP_MAX_SIZE limit the size of mmap()ed heaps - that are dynamically created for multi-threaded programs. The - maximum size must be a power of two, for fast determination of - which heap belongs to a chunk. It should be much larger than the - mmap threshold, so that requests with a size just below that - threshold can be fulfilled without creating too many heaps. */ - - -#ifndef THREAD_STATS -#define THREAD_STATS 0 -#endif - -/* If THREAD_STATS is non-zero, some statistics on mutex locking are - computed. */ - -/***************************************************************************/ - -#define top(ar_ptr) ((ar_ptr)->top) - -/* A heap is a single contiguous memory region holding (coalesceable) - malloc_chunks. It is allocated with mmap() and always starts at an - address aligned to HEAP_MAX_SIZE. Not used unless compiling with - USE_ARENAS. */ - -typedef struct _heap_info { - mstate ar_ptr; /* Arena for this heap. */ - struct _heap_info *prev; /* Previous heap. */ - size_t size; /* Current size in bytes. */ - size_t pad; /* Make sure the following data is properly aligned. */ -} heap_info; - -/* Thread specific data */ - -static tsd_key_t arena_key; -static mutex_t list_lock; - -#if THREAD_STATS -static int stat_n_heaps; -#define THREAD_STAT(x) x -#else -#define THREAD_STAT(x) do ; while(0) -#endif - -/* Mapped memory in non-main arenas (reliable only for NO_THREADS). */ -static unsigned long arena_mem; - -/* Already initialized? */ -int __malloc_initialized = -1; - -/**************************************************************************/ - -#if USE_ARENAS - -/* arena_get() acquires an arena and locks the corresponding mutex. - First, try the one last locked successfully by this thread. (This - is the common case and handled with a macro for speed.) Then, loop - once over the circularly linked list of arenas. If no arena is - readily available, create a new one. In this latter case, `size' - is just a hint as to how much memory will be required immediately - in the new arena. */ - -#define arena_get(ptr, size) do { \ - Void_t *vptr = NULL; \ - ptr = (mstate)tsd_getspecific(arena_key, vptr); \ - if(ptr && !mutex_trylock(&ptr->mutex)) { \ - THREAD_STAT(++(ptr->stat_lock_direct)); \ - } else \ - ptr = arena_get2(ptr, (size)); \ -} while(0) - -/* find the heap and corresponding arena for a given ptr */ - -#define heap_for_ptr(ptr) \ - ((heap_info *)((unsigned long)(ptr) & ~(HEAP_MAX_SIZE-1))) -#define arena_for_chunk(ptr) \ - (chunk_non_main_arena(ptr) ? heap_for_ptr(ptr)->ar_ptr : &main_arena) - -#else /* !USE_ARENAS */ - -/* There is only one arena, main_arena. */ - -#if THREAD_STATS -#define arena_get(ar_ptr, sz) do { \ - ar_ptr = &main_arena; \ - if(!mutex_trylock(&ar_ptr->mutex)) \ - ++(ar_ptr->stat_lock_direct); \ - else { \ - (void)mutex_lock(&ar_ptr->mutex); \ - ++(ar_ptr->stat_lock_wait); \ - } \ -} while(0) -#else -#define arena_get(ar_ptr, sz) do { \ - ar_ptr = &main_arena; \ - (void)mutex_lock(&ar_ptr->mutex); \ -} while(0) -#endif -#define arena_for_chunk(ptr) (&main_arena) - -#endif /* USE_ARENAS */ - -/**************************************************************************/ - -#ifndef NO_THREADS - -/* atfork support. */ - -static __malloc_ptr_t (*save_malloc_hook) __MALLOC_P ((size_t __size, - __const __malloc_ptr_t)); -# if !defined _LIBC || !defined USE_TLS || (defined SHARED && !USE___THREAD) -static __malloc_ptr_t (*save_memalign_hook) __MALLOC_P ((size_t align, - size_t __size, - __const __malloc_ptr_t)); -# endif -static void (*save_free_hook) __MALLOC_P ((__malloc_ptr_t __ptr, - __const __malloc_ptr_t)); -static Void_t* save_arena; - -/* Magic value for the thread-specific arena pointer when - malloc_atfork() is in use. */ - -#define ATFORK_ARENA_PTR ((Void_t*)-1) - -/* The following hooks are used while the `atfork' handling mechanism - is active. */ - -static Void_t* -malloc_atfork(size_t sz, const Void_t *caller) -{ - Void_t *vptr = NULL; - Void_t *victim; - - tsd_getspecific(arena_key, vptr); - if(vptr == ATFORK_ARENA_PTR) { - /* We are the only thread that may allocate at all. */ - if(save_malloc_hook != malloc_check) { - return _int_malloc(&main_arena, sz); - } else { - if(top_check()<0) - return 0; - victim = _int_malloc(&main_arena, sz+1); - return mem2mem_check(victim, sz); - } - } else { - /* Suspend the thread until the `atfork' handlers have completed. - By that time, the hooks will have been reset as well, so that - mALLOc() can be used again. */ - (void)mutex_lock(&list_lock); - (void)mutex_unlock(&list_lock); - return public_mALLOc(sz); - } -} - -static void -free_atfork(Void_t* mem, const Void_t *caller) -{ - Void_t *vptr = NULL; - mstate ar_ptr; - mchunkptr p; /* chunk corresponding to mem */ - - if (mem == 0) /* free(0) has no effect */ - return; - - p = mem2chunk(mem); /* do not bother to replicate free_check here */ - -#if HAVE_MMAP - if (chunk_is_mmapped(p)) /* release mmapped memory. */ - { - munmap_chunk(p); - return; - } -#endif - - ar_ptr = arena_for_chunk(p); - tsd_getspecific(arena_key, vptr); - if(vptr != ATFORK_ARENA_PTR) - (void)mutex_lock(&ar_ptr->mutex); - _int_free(ar_ptr, mem); - if(vptr != ATFORK_ARENA_PTR) - (void)mutex_unlock(&ar_ptr->mutex); -} - -/* The following two functions are registered via thread_atfork() to - make sure that the mutexes remain in a consistent state in the - fork()ed version of a thread. Also adapt the malloc and free hooks - temporarily, because the `atfork' handler mechanism may use - malloc/free internally (e.g. in LinuxThreads). */ - -static void -ptmalloc_lock_all __MALLOC_P((void)) -{ - mstate ar_ptr; - - if(__malloc_initialized < 1) - return; - (void)mutex_lock(&list_lock); - for(ar_ptr = &main_arena;;) { - (void)mutex_lock(&ar_ptr->mutex); - ar_ptr = ar_ptr->next; - if(ar_ptr == &main_arena) break; - } - save_malloc_hook = __malloc_hook; - save_free_hook = __free_hook; - __malloc_hook = malloc_atfork; - __free_hook = free_atfork; - /* Only the current thread may perform malloc/free calls now. */ - tsd_getspecific(arena_key, save_arena); - tsd_setspecific(arena_key, ATFORK_ARENA_PTR); -} - -static void -ptmalloc_unlock_all __MALLOC_P((void)) -{ - mstate ar_ptr; - - if(__malloc_initialized < 1) - return; - tsd_setspecific(arena_key, save_arena); - __malloc_hook = save_malloc_hook; - __free_hook = save_free_hook; - for(ar_ptr = &main_arena;;) { - (void)mutex_unlock(&ar_ptr->mutex); - ar_ptr = ar_ptr->next; - if(ar_ptr == &main_arena) break; - } - (void)mutex_unlock(&list_lock); -} - -#ifdef __linux__ - -/* In LinuxThreads, unlocking a mutex in the child process after a - fork() is currently unsafe, whereas re-initializing it is safe and - does not leak resources. Therefore, a special atfork handler is - installed for the child. */ - -static void -ptmalloc_unlock_all2 __MALLOC_P((void)) -{ - mstate ar_ptr; - - if(__malloc_initialized < 1) - return; -#if defined _LIBC || defined MALLOC_HOOKS - tsd_setspecific(arena_key, save_arena); - __malloc_hook = save_malloc_hook; - __free_hook = save_free_hook; -#endif - for(ar_ptr = &main_arena;;) { - (void)mutex_init(&ar_ptr->mutex); - ar_ptr = ar_ptr->next; - if(ar_ptr == &main_arena) break; - } - (void)mutex_init(&list_lock); -} - -#else - -#define ptmalloc_unlock_all2 ptmalloc_unlock_all - -#endif - -#endif /* !defined NO_THREADS */ - -/* Initialization routine. */ -#ifdef _LIBC -#include -extern char **_environ; - -static char * -internal_function -next_env_entry (char ***position) -{ - char **current = *position; - char *result = NULL; - - while (*current != NULL) - { - if (__builtin_expect ((*current)[0] == 'M', 0) - && (*current)[1] == 'A' - && (*current)[2] == 'L' - && (*current)[3] == 'L' - && (*current)[4] == 'O' - && (*current)[5] == 'C' - && (*current)[6] == '_') - { - result = &(*current)[7]; - - /* Save current position for next visit. */ - *position = ++current; - - break; - } - - ++current; - } - - return result; -} -#endif /* _LIBC */ - -/* Set up basic state so that _int_malloc et al can work. */ -static void -ptmalloc_init_minimal __MALLOC_P((void)) -{ -#if DEFAULT_TOP_PAD != 0 - mp_.top_pad = DEFAULT_TOP_PAD; -#endif - mp_.n_mmaps_max = DEFAULT_MMAP_MAX; - mp_.mmap_threshold = DEFAULT_MMAP_THRESHOLD; - mp_.trim_threshold = DEFAULT_TRIM_THRESHOLD; - mp_.pagesize = malloc_getpagesize; -} - - -#if !(USE_STARTER & 2) -static -#endif -void -ptmalloc_init __MALLOC_P((void)) -{ -#if __STD_C - const char* s; -#else - char* s; -#endif - int secure = 0; - - if(__malloc_initialized >= 0) return; - __malloc_initialized = 0; - - if (mp_.pagesize == 0) - ptmalloc_init_minimal(); - -#ifndef NO_THREADS -# if USE_STARTER & 1 - /* With some threads implementations, creating thread-specific data - or initializing a mutex may call malloc() itself. Provide a - simple starter version (realloc() won't work). */ - save_malloc_hook = __malloc_hook; - save_memalign_hook = __memalign_hook; - save_free_hook = __free_hook; - __malloc_hook = malloc_starter; - __memalign_hook = memalign_starter; - __free_hook = free_starter; -# ifdef _LIBC - /* Initialize the pthreads interface. */ - if (__pthread_initialize != NULL) - __pthread_initialize(); -# endif /* !defined _LIBC */ -# endif /* USE_STARTER & 1 */ -#endif /* !defined NO_THREADS */ - mutex_init(&main_arena.mutex); - main_arena.next = &main_arena; - - mutex_init(&list_lock); - tsd_key_create(&arena_key, NULL); - tsd_setspecific(arena_key, (Void_t *)&main_arena); - thread_atfork(ptmalloc_lock_all, ptmalloc_unlock_all, ptmalloc_unlock_all2); -#ifndef NO_THREADS -# if USE_STARTER & 1 - __malloc_hook = save_malloc_hook; - __memalign_hook = save_memalign_hook; - __free_hook = save_free_hook; -# endif -# if USE_STARTER & 2 - __malloc_hook = 0; - __memalign_hook = 0; - __free_hook = 0; -# endif -#endif -#ifdef _LIBC - secure = __libc_enable_secure; - s = NULL; - if (__builtin_expect (_environ != NULL, 1)) - { - char **runp = _environ; - char *envline; - - while (__builtin_expect ((envline = next_env_entry (&runp)) != NULL, - 0)) - { - size_t len = strcspn (envline, "="); - - if (envline[len] != '=') - /* This is a "MALLOC_" variable at the end of the string - without a '=' character. Ignore it since otherwise we - will access invalid memory below. */ - continue; - - switch (len) - { - case 6: - if (memcmp (envline, "CHECK_", 6) == 0) - s = &envline[7]; - break; - case 8: - if (! secure && memcmp (envline, "TOP_PAD_", 8) == 0) - mALLOPt(M_TOP_PAD, atoi(&envline[9])); - break; - case 9: - if (! secure && memcmp (envline, "MMAP_MAX_", 9) == 0) - mALLOPt(M_MMAP_MAX, atoi(&envline[10])); - break; - case 15: - if (! secure) - { - if (memcmp (envline, "TRIM_THRESHOLD_", 15) == 0) - mALLOPt(M_TRIM_THRESHOLD, atoi(&envline[16])); - else if (memcmp (envline, "MMAP_THRESHOLD_", 15) == 0) - mALLOPt(M_MMAP_THRESHOLD, atoi(&envline[16])); - } - break; - default: - break; - } - } - } -#else - if (! secure) - { - if((s = getenv("MALLOC_TRIM_THRESHOLD_"))) - mALLOPt(M_TRIM_THRESHOLD, atoi(s)); - if((s = getenv("MALLOC_TOP_PAD_"))) - mALLOPt(M_TOP_PAD, atoi(s)); - if((s = getenv("MALLOC_MMAP_THRESHOLD_"))) - mALLOPt(M_MMAP_THRESHOLD, atoi(s)); - if((s = getenv("MALLOC_MMAP_MAX_"))) - mALLOPt(M_MMAP_MAX, atoi(s)); - } - s = getenv("MALLOC_CHECK_"); -#endif - if(s) { - if(s[0]) mALLOPt(M_CHECK_ACTION, (int)(s[0] - '0')); - __malloc_check_init(); - } -#if 0 - /* OMPI Change: Don't call the initialize hook; it was us. */ - if(__malloc_initialize_hook != NULL) - (*__malloc_initialize_hook)(); -#endif - - __malloc_initialized = 1; -} - -/* There are platforms (e.g. Hurd) with a link-time hook mechanism. */ -#ifdef thread_atfork_static -thread_atfork_static(ptmalloc_lock_all, ptmalloc_unlock_all, \ - ptmalloc_unlock_all2) -#endif - - - -/* Managing heaps and arenas (for concurrent threads) */ - -#if USE_ARENAS - -#if MALLOC_DEBUG > 1 - -/* Print the complete contents of a single heap to stderr. */ - -static void -#if __STD_C -dump_heap(heap_info *heap) -#else -dump_heap(heap) heap_info *heap; -#endif -{ - char *ptr; - mchunkptr p; - - fprintf(stderr, "Heap %p, size %10lx:\n", heap, (long)heap->size); - ptr = (heap->ar_ptr != (mstate)(heap+1)) ? - (char*)(heap + 1) : (char*)(heap + 1) + sizeof(struct malloc_state); - p = (mchunkptr)(((unsigned long)ptr + MALLOC_ALIGN_MASK) & - ~MALLOC_ALIGN_MASK); - for(;;) { - fprintf(stderr, "chunk %p size %10lx", p, (long)p->size); - if(p == top(heap->ar_ptr)) { - fprintf(stderr, " (top)\n"); - break; - } else if(p->size == (0|PREV_INUSE)) { - fprintf(stderr, " (fence)\n"); - break; - } - fprintf(stderr, "\n"); - p = next_chunk(p); - } -} - -#endif /* MALLOC_DEBUG > 1 */ - -/* Create a new heap. size is automatically rounded up to a multiple - of the page size. */ - -static heap_info * -internal_function -#if __STD_C -new_heap(size_t size, size_t top_pad) -#else -new_heap(size, top_pad) size_t size, top_pad; -#endif -{ - size_t page_mask = malloc_getpagesize - 1; - char *p1, *p2; - unsigned long ul; - heap_info *h; - - if(size+top_pad < HEAP_MIN_SIZE) - size = HEAP_MIN_SIZE; - else if(size+top_pad <= HEAP_MAX_SIZE) - size += top_pad; - else if(size > HEAP_MAX_SIZE) - return 0; - else - size = HEAP_MAX_SIZE; - size = (size + page_mask) & ~page_mask; - - /* A memory region aligned to a multiple of HEAP_MAX_SIZE is needed. - No swap space needs to be reserved for the following large - mapping (on Linux, this is the case for all non-writable mappings - anyway). */ - p1 = (char *)MMAP(0, HEAP_MAX_SIZE<<1, PROT_NONE, MAP_PRIVATE|MAP_NORESERVE); - if(p1 != MAP_FAILED) { - p2 = (char *)(((unsigned long)p1 + (HEAP_MAX_SIZE-1)) & ~(HEAP_MAX_SIZE-1)); - ul = p2 - p1; - munmap(p1, ul); - munmap(p2 + HEAP_MAX_SIZE, HEAP_MAX_SIZE - ul); - } else { - /* Try to take the chance that an allocation of only HEAP_MAX_SIZE - is already aligned. */ - p2 = (char *)MMAP(0, HEAP_MAX_SIZE, PROT_NONE, MAP_PRIVATE|MAP_NORESERVE); - if(p2 == MAP_FAILED) - return 0; - if((unsigned long)p2 & (HEAP_MAX_SIZE-1)) { - munmap(p2, HEAP_MAX_SIZE); - return 0; - } - } - if(mprotect(p2, size, PROT_READ|PROT_WRITE) != 0) { - munmap(p2, HEAP_MAX_SIZE); - return 0; - } - h = (heap_info *)p2; - h->size = size; - THREAD_STAT(stat_n_heaps++); - return h; -} - -/* Grow or shrink a heap. size is automatically rounded up to a - multiple of the page size if it is positive. */ - -static int -#if __STD_C -grow_heap(heap_info *h, long diff) -#else -grow_heap(h, diff) heap_info *h; long diff; -#endif -{ - size_t page_mask = malloc_getpagesize - 1; - long new_size; - - if(diff >= 0) { - diff = (diff + page_mask) & ~page_mask; - new_size = (long)h->size + diff; - if(new_size > HEAP_MAX_SIZE) - return -1; - if(mprotect((char *)h + h->size, diff, PROT_READ|PROT_WRITE) != 0) - return -2; - } else { - new_size = (long)h->size + diff; - if(new_size < (long)sizeof(*h)) - return -1; - - if(mprotect((char *)h + new_size, -diff, PROT_NONE) != 0) - return -2; - /*fprintf(stderr, "shrink %p %08lx\n", h, new_size);*/ - } - h->size = new_size; - return 0; -} - -/* Delete a heap. */ - -#define delete_heap(heap) munmap((char*)(heap), HEAP_MAX_SIZE) - -static int -internal_function -#if __STD_C -heap_trim(heap_info *heap, size_t pad) -#else -heap_trim(heap, pad) heap_info *heap; size_t pad; -#endif -{ - mstate ar_ptr = heap->ar_ptr; - unsigned long pagesz = mp_.pagesize; - mchunkptr top_chunk = top(ar_ptr), p, bck, fwd; - heap_info *prev_heap; - long new_size, top_size, extra; - - /* Can this heap go away completely? */ - while(top_chunk == chunk_at_offset(heap, sizeof(*heap))) { - prev_heap = heap->prev; - p = chunk_at_offset(prev_heap, prev_heap->size - (MINSIZE-2*SIZE_SZ)); - assert(p->size == (0|PREV_INUSE)); /* must be fencepost */ - p = prev_chunk(p); - new_size = chunksize(p) + (MINSIZE-2*SIZE_SZ); - assert(new_size>0 && new_size<(long)(2*MINSIZE)); - if(!prev_inuse(p)) - new_size += p->prev_size; - assert(new_size>0 && new_sizesize) < pad + MINSIZE + pagesz) - break; - ar_ptr->system_mem -= heap->size; - arena_mem -= heap->size; - delete_heap(heap); - heap = prev_heap; - if(!prev_inuse(p)) { /* consolidate backward */ - p = prev_chunk(p); - unlink(p, bck, fwd); - } - assert(((unsigned long)((char*)p + new_size) & (pagesz-1)) == 0); - assert( ((char*)p + new_size) == ((char*)heap + heap->size) ); - top(ar_ptr) = top_chunk = p; - set_head(top_chunk, new_size | PREV_INUSE); - /*check_chunk(ar_ptr, top_chunk);*/ - } - top_size = chunksize(top_chunk); - extra = ((top_size - pad - MINSIZE + (pagesz-1))/pagesz - 1) * pagesz; - if(extra < (long)pagesz) - return 0; - /* Try to shrink. */ - if(grow_heap(heap, -extra) != 0) - return 0; - ar_ptr->system_mem -= extra; - arena_mem -= extra; - - /* Success. Adjust top accordingly. */ - set_head(top_chunk, (top_size - extra) | PREV_INUSE); - /*check_chunk(ar_ptr, top_chunk);*/ - return 1; -} - -static mstate -internal_function -#if __STD_C -arena_get2(mstate a_tsd, size_t size) -#else -arena_get2(a_tsd, size) mstate a_tsd; size_t size; -#endif -{ - mstate a; - int err; - - if(!a_tsd) - a = a_tsd = &main_arena; - else { - a = a_tsd->next; - if(!a) { - /* This can only happen while initializing the new arena. */ - (void)mutex_lock(&main_arena.mutex); - THREAD_STAT(++(main_arena.stat_lock_wait)); - return &main_arena; - } - } - - /* Check the global, circularly linked list for available arenas. */ - repeat: - do { - if(!mutex_trylock(&a->mutex)) { - THREAD_STAT(++(a->stat_lock_loop)); - tsd_setspecific(arena_key, (Void_t *)a); - return a; - } - a = a->next; - } while(a != a_tsd); - - /* If not even the list_lock can be obtained, try again. This can - happen during `atfork', or for example on systems where thread - creation makes it temporarily impossible to obtain _any_ - locks. */ - if(mutex_trylock(&list_lock)) { - a = a_tsd; - goto repeat; - } - (void)mutex_unlock(&list_lock); - - /* Nothing immediately available, so generate a new arena. */ - a = _int_new_arena(size); - if(!a) - return 0; - - tsd_setspecific(arena_key, (Void_t *)a); - mutex_init(&a->mutex); - err = mutex_lock(&a->mutex); /* remember result */ - - /* Add the new arena to the global list. */ - (void)mutex_lock(&list_lock); - a->next = main_arena.next; -/* OMPI: use our barriers - atomic_write_barrier (); -*/ - opal_atomic_wmb(); - main_arena.next = a; - (void)mutex_unlock(&list_lock); - - if(err) /* locking failed; keep arena for further attempts later */ - return 0; - - THREAD_STAT(++(a->stat_lock_loop)); - return a; -} - -/* Create a new arena with initial size "size". */ - -mstate -_int_new_arena(size_t size) -{ - mstate a; - heap_info *h; - char *ptr; - unsigned long misalign; - - h = new_heap(size + (sizeof(*h) + sizeof(*a) + MALLOC_ALIGNMENT), - mp_.top_pad); - if(!h) { - /* Maybe size is too large to fit in a single heap. So, just try - to create a minimally-sized arena and let _int_malloc() attempt - to deal with the large request via mmap_chunk(). */ - h = new_heap(sizeof(*h) + sizeof(*a) + MALLOC_ALIGNMENT, mp_.top_pad); - if(!h) - return 0; - } - a = h->ar_ptr = (mstate)(h+1); - malloc_init_state(a); - /*a->next = NULL;*/ - a->system_mem = a->max_system_mem = h->size; - arena_mem += h->size; -#ifdef NO_THREADS - if((unsigned long)(mp_.mmapped_mem + arena_mem + main_arena.system_mem) > - mp_.max_total_mem) - mp_.max_total_mem = mp_.mmapped_mem + arena_mem + main_arena.system_mem; -#endif - - /* Set up the top chunk, with proper alignment. */ - ptr = (char *)(a + 1); - misalign = (unsigned long)chunk2mem(ptr) & MALLOC_ALIGN_MASK; - if (misalign > 0) - ptr += MALLOC_ALIGNMENT - misalign; - top(a) = (mchunkptr)ptr; - set_head(top(a), (((char*)h + h->size) - ptr) | PREV_INUSE); - - return a; -} - -/* Obtain the arena number n. Needed in malloc_stats. */ - -mstate -_int_get_arena (int n) -{ - mstate a = &main_arena; - - while (n-- != 0) { - a = a->next; - if (a == &main_arena) - return 0; - } - return a; -} - -#endif /* USE_ARENAS */ - -/* - * Local variables: - * c-basic-offset: 2 - * End: - */ diff --git a/opal/mca/memory/linux/configure.m4 b/opal/mca/memory/linux/configure.m4 deleted file mode 100644 index ae6c5257e0b..00000000000 --- a/opal/mca/memory/linux/configure.m4 +++ /dev/null @@ -1,198 +0,0 @@ -# -*- shell-script -*- -# -# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana -# University Research and Technology -# Corporation. All rights reserved. -# Copyright (c) 2004-2005 The University of Tennessee and The University -# of Tennessee Research Foundation. All rights -# reserved. -# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, -# University of Stuttgart. All rights reserved. -# Copyright (c) 2004-2005 The Regents of the University of California. -# All rights reserved. -# Copyright (c) 2008-2010 Cisco Systems, Inc. All rights reserved. -# $COPYRIGHT$ -# -# Additional copyrights may follow -# -# $HEADER$ -# -AC_DEFUN([MCA_opal_memory_linux_PRIORITY], [40]) - -AC_DEFUN([MCA_opal_memory_linux_COMPILE_MODE], [ - AC_MSG_CHECKING([for MCA component $2:$3 compile mode]) - $4="static" - AC_MSG_RESULT([$$4]) -]) - - -# MCA_memory_linux_CONFIG(action-if-can-compile, -# [action-if-cant-compile]) -# ------------------------------------------------ -AC_DEFUN([MCA_opal_memory_linux_CONFIG],[ - AC_CONFIG_FILES([opal/mca/memory/linux/Makefile]) - - OPAL_VAR_SCOPE_PUSH([memory_linux_ptmalloc2_happy memory_linux_ummu_happy memory_linux_requested icc_major_ver icc_minor_ver memory_linux_mmap memory_linux_munmap memory_linux_LIBS_SAVE]) - - # Only allow this component to build on Linux-based systems - - AC_MSG_CHECKING([operating system]) - case $host in - *linux*) - AC_MSG_RESULT([$host -- supported]) - memory_linux_ptmalloc2_happy=yes - memory_linux_ummu_happy=yes - ;; - *) - AC_MSG_RESULT([$host -- unsupported]) - memory_linux_ptmalloc2_happy=no - memory_linux_ummu_happy=no - ;; - esac - - AS_IF([test "$with_memory_manager" = "linux"], - [memory_linux_ptmalloc2_happy=yes - memory_linux_ummu_happy=yes - memory_linux_requested=1], - [memory_linux_requested=0 - AS_IF([test "$with_memory_manager" = "" -o "$with_memory_manager" = "yes"], - [memory_linux_ptmalloc2_happy=yes - memory_linux_ummu_happy=yes], - [memory_linux_ptmalloc2_happy=no - memory_linux_ummu_happy=no])]) - - ###################################################################### - # ptmalloc2 - ###################################################################### - - # Per ticket #227, Intel 9.0 v20051201 on ia64 with optimization - # of -O2 or higher will bork linux in strange in mysterious ways. - # Doh! So if the compiler vendor is intel and we're on an ia64 - # box, run "icc --version" and snarf the version string. If it's - # 9.0 and the version is <= 20051201, then disable ptmalloc2. - # Executive decision: ignore optimization levels (even though -O1 - # and -O0 seem to work). The upgrade to 9.1 is free, so that's a - # better path than trying to make a much more complicated test - # here. - - AS_IF([test "$memory_linux_ptmalloc2_happy" = yes], - [case $host in - ia64-*) - AS_IF([test "$opal_c_vendor" = "intel"], - [# check for v9.0 <= 20051201 - icc_major_ver="`$CC --version | head -n 1 | awk '{ print [$]3 }'`" - icc_minor_ver="`$CC --version | head -n 1 | awk '{ print [$]4 }'`" - AS_IF([test "$icc_major_ver" = "9.0" -a "`expr $icc_minor_ver \<= 20051201`" = "1"], - [memory_linux_ptmalloc2_happy=no - AC_MSG_WARN([*** Detected Intel C compiler v9.0 <= 20051201 on ia64]) - AC_MSG_WARN([*** This compiler/platform combination has known problems with ptmalloc2]) - AC_MSG_WARN([*** Disabling ptmalloc2])])]) - ;; - esac]) - - AS_IF([test "$memory_linux_ptmalloc2_happy" = yes], - [# check for malloc.h - AC_CHECK_HEADER([malloc.h], - [memory_linux_ptmalloc2_happy=yes], - [memory_linux_ptmalloc2_happy=no])]) - - AS_IF([test "$memory_linux_ptmalloc2_happy" = yes], - [# check for init hook symbol - AC_CHECK_DECL([__malloc_initialize_hook], - [memory_linux_ptmalloc2_happy=yes], - [memory_linux_ptmalloc2_happy=no], - [AC_INCLUDES_DEFAULT - #include ])]) - - # - # See if we have sbrk prototyped - # - AS_IF([test "$memory_linux_ptmalloc2_happy" = yes], - [AC_CHECK_DECLS([sbrk])]) - - # - # Figure out how we're going to call mmap/munmap for real - # - AS_IF([test "$memory_linux_ptmalloc2_happy" = yes], - [memory_linux_mmap=0 - memory_linux_munmap=1 - - # it's nearly impossible to call mmap from syscall(), so - # only go this route if we can't get at munmap any other - # way. - AC_CHECK_HEADER([syscall.h], - [AC_CHECK_FUNCS([syscall], [], [memory_linux_munmap=0])]) - - # Always look for __munmap and __mmap - AC_CHECK_FUNCS([__munmap], [memory_linux_mmap=1]) - AC_CHECK_FUNCS([__mmap]) - - # only allow dlsym (and therefore add -ldl) if we - # really need to - AS_IF([test "$memory_linux_mmap" = "0"], - [memory_linux_LIBS_SAVE="$LIBS" - AC_CHECK_LIB([dl], - [dlsym], - [LIBS="$LIBS -ldl" - memory_linux_LIBS="-ldl" - memory_linux_mmap=1]) - AC_CHECK_FUNCS([dlsym]) - LIBS="$memory_linux_LIBS_SAVE"]) - - AS_IF([test "$memory_linux_mmap" = "0" -a "$memory_linux_munmap" = "0"], - [memory_linux_ptmalloc2_happy=no])]) - - # If all is good, save the extra libs for the wrapper - AS_IF([test "$memory_linux_ptmalloc2_happy" = yes], - [value=1], - [value=0]) - AC_DEFINE_UNQUOTED([MEMORY_LINUX_PTMALLOC2], [$value], - [Whether ptmalloc2 is supported on this system or not]) - AM_CONDITIONAL([MEMORY_LINUX_PTMALLOC2], - [test "$memory_linux_ptmalloc2_happy" = yes]) - - ###################################################################### - # ummunotify - ###################################################################### - - # Check for the relevant header - AS_IF([test "$memory_linux_ummu_happy" = yes], - [# check for linux/ummunotify.h - AC_CHECK_HEADER([linux/ummunotify.h], - [memory_linux_ummu_happy=yes], - [memory_linux_ummu_happy=no])]) - - # has the Linux declaration for ioctl - AC_CHECK_HEADERS([stropts.h]) - - # If all is good, set the header file that we want the rest of the - # code base to use - AS_IF([test "$memory_linux_ummu_happy" = yes], - [memory_base_include="linux/public.h" - value=1], - [value=0]) - AC_DEFINE_UNQUOTED([MEMORY_LINUX_UMMUNOTIFY], [$value], - [Whether ummunotify is supported on this system or not]) - AM_CONDITIONAL([MEMORY_LINUX_UMMUNOTIFY], - [test "$memory_linux_ummu_happy" = yes]) - - ###################################################################### - # post processing - ###################################################################### - - AS_IF([test "$memory_malloc_hooks_requested" = 1 -a \ - "$memory_linux_ptmalloc2_happy" = no -a \ - "$memory_linux_ummu_happy" = no], - [AC_MSG_ERROR([linux memory management requested but neither ptmalloc2 nor ummunotify are available. Aborting.])]) - AC_SUBST([memory_linux_LIBS]) - - AS_IF([test "$memory_linux_ptmalloc2_happy" = yes -o \ - "$memory_linux_ummu_happy" = yes], - [memory_base_found=1 - $1], - [memory_base_found=0 - memory_base_include= - $2]) - - OPAL_VAR_SCOPE_POP -]) diff --git a/opal/mca/memory/linux/help-opal-memory-linux.txt b/opal/mca/memory/linux/help-opal-memory-linux.txt deleted file mode 100644 index 1625b070ad6..00000000000 --- a/opal/mca/memory/linux/help-opal-memory-linux.txt +++ /dev/null @@ -1,29 +0,0 @@ -# -*- text -*- -# -# Copyright (c) 2009 Sun Microsystems, Inc. All rights reserved. -# Copyright (c) 2010-2014 Cisco Systems, Inc. All rights reserved. -# $COPYRIGHT$ -# -# Additional copyrights may follow -# -# $HEADER$ -# -# This is the US/English help file for Open MPI's memory/linux component. -# -[ummunotify eaccess] -Open MPI was unable to open the UMMU notification device. This is -likely a permissions problem on the device itself. UMMU notification -support is therefore disabled in this process; an alternate memory -hook manager *may* be used instead (if available). - - Local host: %s - UMMU device: %s -# -[ummunotify open error] -Open MPI was unable to open the UMMU notification device. UMMU -notification support is therefore disabled in this process; an -alternate memory hook manager *may* be used instead (if available). - - Local host: %s - UMMU device: %s - Error: %s (%d) diff --git a/opal/mca/memory/linux/hooks.c b/opal/mca/memory/linux/hooks.c deleted file mode 100644 index 03d9a095b15..00000000000 --- a/opal/mca/memory/linux/hooks.c +++ /dev/null @@ -1,891 +0,0 @@ -/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ -/* - * Copyright (c) 2009-2010 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2012-2013 Los Alamos National Security, LLC. All rights - * reserved. - * - * Additional copyrights may follow. - */ -/* Malloc implementation for multiple threads without lock contention. - Copyright (C) 2001, 2002, 2003 Free Software Foundation, Inc. - This file is part of the GNU C Library. - Contributed by Wolfram Gloger , 2001. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Library General Public License as - published by the Free Software Foundation; either version 2 of the - License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Library General Public License for more details. - - You should have received a copy of the GNU Library General Public - License along with the GNU C Library; see the file COPYING.LIB. If not, - write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, - Boston, MA 02111-1307, USA. */ - -/* $Id: hooks.c,v 1.12 2004/11/05 14:42:32 wg Exp $ */ - -#include "opal_config.h" - -#include "opal/mca/mca.h" -#include "opal/mca/memory/memory.h" -#include "opal/constants.h" - -#include "opal/mca/memory/linux/memory_linux.h" - -#ifndef DEFAULT_CHECK_ACTION -#define DEFAULT_CHECK_ACTION 1 -#endif - -#ifdef HAVE_SYS_STAT_H -#include /* for stat */ -#endif /* HAVE_SYS_STAT_H */ - -/* Defined in memory_linux_component.c */ -extern bool opal_memory_linux_disable; - -/* What to do if the standard debugging hooks are in place and a - corrupt pointer is detected: do nothing (0), print an error message - (1), or call abort() (2). */ - -/* Hooks for debugging versions. The initial hooks just call the - initialization routine, then do the normal work. */ - -#if !(USE_STARTER & 2) - -/* OMPI change: these aren't used (avoid a compiler warning by if - 0'ing them out */ -#if 0 -static Void_t* -#if __STD_C -malloc_hook_ini(size_t sz, const __malloc_ptr_t caller) -#else -malloc_hook_ini(sz, caller) - size_t sz; const __malloc_ptr_t caller; -#endif -{ - __malloc_hook = NULL; - ptmalloc_init(); - return public_mALLOc(sz); -} -#endif - -/* OMPI change: these aren't used (avoid a compiler warning by if - 0'ing them out */ -#if 0 -static Void_t* -#if __STD_C -realloc_hook_ini(Void_t* ptr, size_t sz, const __malloc_ptr_t caller) -#else -realloc_hook_ini(ptr, sz, caller) - Void_t* ptr; size_t sz; const __malloc_ptr_t caller; -#endif -{ - __malloc_hook = NULL; - __realloc_hook = NULL; - ptmalloc_init(); - return public_rEALLOc(ptr, sz); -} -#endif - -/* OMPI change: these aren't used (avoid a compiler warning by if - 0'ing them out */ -#if 0 -static Void_t* -#if __STD_C -memalign_hook_ini(size_t alignment, size_t sz, const __malloc_ptr_t caller) -#else -memalign_hook_ini(alignment, sz, caller) - size_t alignment; size_t sz; const __malloc_ptr_t caller; -#endif -{ - __memalign_hook = NULL; - ptmalloc_init(); - return public_mEMALIGn(alignment, sz); -} -#endif - -#endif /* !(USE_STARTER & 2) */ - -static int check_action = DEFAULT_CHECK_ACTION; - -/* Whether we are using malloc checking. */ -static int using_malloc_checking; - -/* A flag that is set by malloc_set_state, to signal that malloc checking - must not be enabled on the request from the user (via the MALLOC_CHECK_ - environment variable). It is reset by __malloc_check_init to tell - malloc_set_state that the user has requested malloc checking. - - The purpose of this flag is to make sure that malloc checking is not - enabled when the heap to be restored was constructed without malloc - checking, and thus does not contain the required magic bytes. - Otherwise the heap would be corrupted by calls to free and realloc. If - it turns out that the heap was created with malloc checking and the - user has requested it malloc_set_state just calls __malloc_check_init - again to enable it. On the other hand, reusing such a heap without - further malloc checking is safe. */ -static int disallow_malloc_check; - -/* Activate a standard set of debugging hooks. */ -void -__malloc_check_init() -{ - if (disallow_malloc_check) { - disallow_malloc_check = 0; - return; - } - using_malloc_checking = 1; - __malloc_hook = malloc_check; - __free_hook = free_check; - __realloc_hook = realloc_check; - __memalign_hook = memalign_check; - if(check_action & 1) - fprintf(stderr, "malloc: using debugging hooks\n"); -} - -/* A simple, standard set of debugging hooks. Overhead is `only' one - byte per chunk; still this will catch most cases of double frees or - overruns. The goal here is to avoid obscure crashes due to invalid - usage, unlike in the MALLOC_DEBUG code. */ - -#define MAGICBYTE(p) ( ( ((size_t)p >> 3) ^ ((size_t)p >> 11)) & 0xFF ) - -/* Instrument a chunk with overrun detector byte(s) and convert it - into a user pointer with requested size sz. */ - -static Void_t* -internal_function -#if __STD_C -mem2mem_check(Void_t *ptr, size_t sz) -#else -mem2mem_check(ptr, sz) Void_t *ptr; size_t sz; -#endif -{ - mchunkptr p; - unsigned char* m_ptr = (unsigned char*)BOUNDED_N(ptr, sz); - size_t i; - - if (!ptr) - return ptr; - p = mem2chunk(ptr); - for(i = chunksize(p) - (chunk_is_mmapped(p) ? 2*SIZE_SZ+1 : SIZE_SZ+1); - i > sz; - i -= 0xFF) { - if(i-sz < 0x100) { - m_ptr[i] = (unsigned char)(i-sz); - break; - } - m_ptr[i] = 0xFF; - } - m_ptr[sz] = MAGICBYTE(p); - return (Void_t*)m_ptr; -} - -/* Convert a pointer to be free()d or realloc()ed to a valid chunk - pointer. If the provided pointer is not valid, return NULL. */ - -static mchunkptr -internal_function -#if __STD_C -mem2chunk_check(Void_t* mem) -#else -mem2chunk_check(mem) Void_t* mem; -#endif -{ - mchunkptr p; - INTERNAL_SIZE_T sz, c; - unsigned char magic; - - if(!aligned_OK(mem)) return NULL; - p = mem2chunk(mem); - if (!chunk_is_mmapped(p)) { - /* Must be a chunk in conventional heap memory. */ - int contig = contiguous(&main_arena); - sz = chunksize(p); - if((contig && - ((char*)p=(mp_.sbrk_base+main_arena.system_mem) )) || - szprev_size&MALLOC_ALIGN_MASK || - (contig && (char*)prev_chunk(p)size & PREV_INUSE) || - ( (((unsigned long)p - p->prev_size) & page_mask) != 0 ) || - ( (sz = chunksize(p)), ((p->prev_size + sz) & page_mask) != 0 ) ) - return NULL; - magic = MAGICBYTE(p); - for(sz -= 1; (c = ((unsigned char*)p)[sz]) != magic; sz -= c) { - if(c<=0 || sz<(c+2*SIZE_SZ)) return NULL; - } - ((unsigned char*)p)[sz] ^= 0xFF; - } - return p; -} - -/* Check for corruption of the top chunk, and try to recover if - necessary. */ - -static int -internal_function -#if __STD_C -top_check(void) -#else -top_check() -#endif -{ - mchunkptr t = top(&main_arena); - char* brk, * new_brk; - INTERNAL_SIZE_T front_misalign, sbrk_size; - unsigned long pagesz = malloc_getpagesize; - - if (t == initial_top(&main_arena) || - (!chunk_is_mmapped(t) && - chunksize(t)>=MINSIZE && - prev_inuse(t) && - (!contiguous(&main_arena) || - (char*)t + chunksize(t) == mp_.sbrk_base + main_arena.system_mem))) - return 0; - - if(check_action & 1) - fprintf(stderr, "malloc: top chunk is corrupt\n"); - if(check_action & 2) - abort(); - - /* Try to set up a new top chunk. */ - brk = MORECORE(0); - front_misalign = (unsigned long)chunk2mem(brk) & MALLOC_ALIGN_MASK; - if (front_misalign > 0) - front_misalign = MALLOC_ALIGNMENT - front_misalign; - sbrk_size = front_misalign + mp_.top_pad + MINSIZE; - sbrk_size += pagesz - ((unsigned long)(brk + sbrk_size) & (pagesz - 1)); - new_brk = (char*)(MORECORE (sbrk_size)); - if (new_brk == (char*)(MORECORE_FAILURE)) return -1; - /* Call the `morecore' hook if necessary. */ - if (__after_morecore_hook) - (*__after_morecore_hook) (); - main_arena.system_mem = (new_brk - mp_.sbrk_base) + sbrk_size; - - top(&main_arena) = (mchunkptr)(brk + front_misalign); - set_head(top(&main_arena), (sbrk_size - front_misalign) | PREV_INUSE); - - return 0; -} - -static Void_t* -#if __STD_C -malloc_check(size_t sz, const Void_t *caller) -#else -malloc_check(sz, caller) size_t sz; const Void_t *caller; -#endif -{ - Void_t *victim; - - (void)mutex_lock(&main_arena.mutex); - victim = (top_check() >= 0) ? _int_malloc(&main_arena, sz+1) : NULL; - (void)mutex_unlock(&main_arena.mutex); - return mem2mem_check(victim, sz); -} - -static void -#if __STD_C -free_check(Void_t* mem, const Void_t *caller) -#else -free_check(mem, caller) Void_t* mem; const Void_t *caller; -#endif -{ - mchunkptr p; - - if(!mem) return; - (void)mutex_lock(&main_arena.mutex); - p = mem2chunk_check(mem); - if(!p) { - (void)mutex_unlock(&main_arena.mutex); - if(check_action & 1) - fprintf(stderr, "free(): invalid pointer %p!\n", mem); - if(check_action & 2) - abort(); - return; - } -#if HAVE_MMAP - if (chunk_is_mmapped(p)) { - (void)mutex_unlock(&main_arena.mutex); - munmap_chunk(p); - return; - } -#endif -#if 0 /* Erase freed memory. */ - memset(mem, 0, chunksize(p) - (SIZE_SZ+1)); -#endif - _int_free(&main_arena, mem); - (void)mutex_unlock(&main_arena.mutex); -} - -static Void_t* -#if __STD_C -realloc_check(Void_t* oldmem, size_t bytes, const Void_t *caller) -#else -realloc_check(oldmem, bytes, caller) - Void_t* oldmem; size_t bytes; const Void_t *caller; -#endif -{ - mchunkptr oldp; - INTERNAL_SIZE_T nb, oldsize; - Void_t* newmem = 0; - - if (oldmem == 0) return malloc_check(bytes, NULL); - (void)mutex_lock(&main_arena.mutex); - oldp = mem2chunk_check(oldmem); - (void)mutex_unlock(&main_arena.mutex); - if(!oldp) { - if(check_action & 1) - fprintf(stderr, "realloc(): invalid pointer %p!\n", oldmem); - if(check_action & 2) - abort(); - return malloc_check(bytes, NULL); - } - oldsize = chunksize(oldp); - - checked_request2size(bytes+1, nb); - (void)mutex_lock(&main_arena.mutex); - -#if HAVE_MMAP - if (chunk_is_mmapped(oldp)) { -#if HAVE_MREMAP - mchunkptr newp = mremap_chunk(oldp, nb); - if(newp) - newmem = chunk2mem(newp); - else -#endif - { - /* Note the extra SIZE_SZ overhead. */ - if(oldsize - SIZE_SZ >= nb) - newmem = oldmem; /* do nothing */ - else { - /* Must alloc, copy, free. */ - if (top_check() >= 0) - newmem = _int_malloc(&main_arena, bytes+1); - if (newmem) { - MALLOC_COPY(BOUNDED_N(newmem, bytes+1), oldmem, oldsize - 2*SIZE_SZ); - munmap_chunk(oldp); - } - } - } - } else { -#endif /* HAVE_MMAP */ - if (top_check() >= 0) - newmem = _int_realloc(&main_arena, oldmem, bytes+1); -#if 0 /* Erase freed memory. */ - if(newmem) - newp = mem2chunk(newmem); - nb = chunksize(newp); - if(oldp=chunk_at_offset(newp, nb)) { - memset((char*)oldmem + 2*sizeof(mbinptr), 0, - oldsize - (2*sizeof(mbinptr)+2*SIZE_SZ+1)); - } else if(nb > oldsize+SIZE_SZ) { - memset((char*)BOUNDED_N(chunk2mem(newp), bytes) + oldsize, - 0, nb - (oldsize+SIZE_SZ)); - } -#endif -#if HAVE_MMAP - } -#endif - (void)mutex_unlock(&main_arena.mutex); - - return mem2mem_check(newmem, bytes); -} - -static Void_t* -#if __STD_C -memalign_check(size_t alignment, size_t bytes, const Void_t *caller) -#else -memalign_check(alignment, bytes, caller) - size_t alignment; size_t bytes; const Void_t *caller; -#endif -{ - INTERNAL_SIZE_T nb; - Void_t* mem; - - if (alignment <= MALLOC_ALIGNMENT) return malloc_check(bytes, NULL); - if (alignment < MINSIZE) alignment = MINSIZE; - - checked_request2size(bytes+1, nb); - (void)mutex_lock(&main_arena.mutex); - mem = (top_check() >= 0) ? _int_memalign(&main_arena, alignment, bytes+1) : - NULL; - (void)mutex_unlock(&main_arena.mutex); - return mem2mem_check(mem, bytes); -} - -#if !defined NO_THREADS && USE_STARTER - -/* The following hooks are used when the global initialization in - ptmalloc_init() hasn't completed yet. */ - -static Void_t* -#if __STD_C -malloc_starter(size_t sz, const Void_t *caller) -#else -malloc_starter(sz, caller) size_t sz; const Void_t *caller; -#endif -{ - Void_t* victim; - - ptmalloc_init_minimal(); - victim = _int_malloc(&main_arena, sz); - - return victim ? BOUNDED_N(victim, sz) : 0; -} - -static Void_t* -#if __STD_C -memalign_starter(size_t align, size_t sz, const Void_t *caller) -#else -memalign_starter(align, sz, caller) size_t align, sz; const Void_t *caller; -#endif -{ - Void_t* victim; - - ptmalloc_init_minimal(); - victim = _int_memalign(&main_arena, align, sz); - - return victim ? BOUNDED_N(victim, sz) : 0; -} - -static void -#if __STD_C -free_starter(Void_t* mem, const Void_t *caller) -#else -free_starter(mem, caller) Void_t* mem; const Void_t *caller; -#endif -{ - mchunkptr p; - - if(!mem) return; - p = mem2chunk(mem); -#if HAVE_MMAP - if (chunk_is_mmapped(p)) { - munmap_chunk(p); - return; - } -#endif - _int_free(&main_arena, mem); -} - -#endif /* !defined NO_THREADS && USE_STARTER */ - - -/* Get/set state: malloc_get_state() records the current state of all - malloc variables (_except_ for the actual heap contents and `hook' - function pointers) in a system dependent, opaque data structure. - This data structure is dynamically allocated and can be free()d - after use. malloc_set_state() restores the state of all malloc - variables to the previously obtained state. This is especially - useful when using this malloc as part of a shared library, and when - the heap contents are saved/restored via some other method. The - primary example for this is GNU Emacs with its `dumping' procedure. - `Hook' function pointers are never saved or restored by these - functions, with two exceptions: If malloc checking was in use when - malloc_get_state() was called, then malloc_set_state() calls - __malloc_check_init() if possible; if malloc checking was not in - use in the recorded state but the user requested malloc checking, - then the hooks are reset to 0. */ - -#define MALLOC_STATE_MAGIC 0x444c4541l -#define MALLOC_STATE_VERSION (0*0x100l + 2l) /* major*0x100 + minor */ - -struct malloc_save_state { - long magic; - long version; - mbinptr av[NBINS * 2 + 2]; - char* sbrk_base; - int sbrked_mem_bytes; - unsigned long trim_threshold; - unsigned long top_pad; - unsigned int n_mmaps_max; - unsigned long mmap_threshold; - int check_action; - unsigned long max_sbrked_mem; - unsigned long max_total_mem; - unsigned int n_mmaps; - unsigned int max_n_mmaps; - unsigned long mmapped_mem; - unsigned long max_mmapped_mem; - int using_malloc_checking; -}; - -Void_t* -public_gET_STATe(void) -{ - struct malloc_save_state* ms; - int i; - mbinptr b; - - ms = (struct malloc_save_state*)public_mALLOc(sizeof(*ms)); - if (!ms) - return 0; - (void)mutex_lock(&main_arena.mutex); - malloc_consolidate(&main_arena); - ms->magic = MALLOC_STATE_MAGIC; - ms->version = MALLOC_STATE_VERSION; - ms->av[0] = 0; - ms->av[1] = 0; /* used to be binblocks, now no longer used */ - ms->av[2] = top(&main_arena); - ms->av[3] = 0; /* used to be undefined */ - for(i=1; iav[2*i+2] = ms->av[2*i+3] = 0; /* empty bin */ - else { - ms->av[2*i+2] = first(b); - ms->av[2*i+3] = last(b); - } - } - ms->sbrk_base = mp_.sbrk_base; - ms->sbrked_mem_bytes = main_arena.system_mem; - ms->trim_threshold = mp_.trim_threshold; - ms->top_pad = mp_.top_pad; - ms->n_mmaps_max = mp_.n_mmaps_max; - ms->mmap_threshold = mp_.mmap_threshold; - ms->check_action = check_action; - ms->max_sbrked_mem = main_arena.max_system_mem; -#ifdef NO_THREADS - ms->max_total_mem = mp_.max_total_mem; -#else - ms->max_total_mem = 0; -#endif - ms->n_mmaps = mp_.n_mmaps; - ms->max_n_mmaps = mp_.max_n_mmaps; - ms->mmapped_mem = mp_.mmapped_mem; - ms->max_mmapped_mem = mp_.max_mmapped_mem; - ms->using_malloc_checking = using_malloc_checking; - (void)mutex_unlock(&main_arena.mutex); - return (Void_t*)ms; -} - -int -public_sET_STATe(Void_t* msptr) -{ - struct malloc_save_state* ms = (struct malloc_save_state*)msptr; - int i; - mbinptr b; - - disallow_malloc_check = 1; - ptmalloc_init(); - if(ms->magic != MALLOC_STATE_MAGIC) return -1; - /* Must fail if the major version is too high. */ - if((ms->version & ~0xffl) > (MALLOC_STATE_VERSION & ~0xffl)) return -2; - (void)mutex_lock(&main_arena.mutex); - /* There are no fastchunks. */ - clear_fastchunks(&main_arena); - set_max_fast(&main_arena, DEFAULT_MXFAST); - for (i=0; i<(int)NFASTBINS; ++i) - main_arena.fastbins[i] = 0; - for (i=0; i<(int)BINMAPSIZE; ++i) - main_arena.binmap[i] = 0; - top(&main_arena) = ms->av[2]; - main_arena.last_remainder = 0; - for(i=1; iav[2*i+2] == 0) { - assert(ms->av[2*i+3] == 0); - first(b) = last(b) = b; - } else { - if(i<(int)NSMALLBINS || ((int)largebin_index(chunksize(ms->av[2*i+2]))==i && - (int)largebin_index(chunksize(ms->av[2*i+3]))==i)) { - first(b) = ms->av[2*i+2]; - last(b) = ms->av[2*i+3]; - /* Make sure the links to the bins within the heap are correct. */ - first(b)->bk = b; - last(b)->fd = b; - /* Set bit in binblocks. */ - mark_bin(&main_arena, i); - } else { - /* Oops, index computation from chunksize must have changed. - Link the whole list into unsorted_chunks. */ - first(b) = last(b) = b; - b = unsorted_chunks(&main_arena); - ms->av[2*i+2]->bk = b; - ms->av[2*i+3]->fd = b->fd; - b->fd->bk = ms->av[2*i+3]; - b->fd = ms->av[2*i+2]; - } - } - } - mp_.sbrk_base = ms->sbrk_base; - main_arena.system_mem = ms->sbrked_mem_bytes; - mp_.trim_threshold = ms->trim_threshold; - mp_.top_pad = ms->top_pad; - mp_.n_mmaps_max = ms->n_mmaps_max; - mp_.mmap_threshold = ms->mmap_threshold; - check_action = ms->check_action; - main_arena.max_system_mem = ms->max_sbrked_mem; -#ifdef NO_THREADS - mp_.max_total_mem = ms->max_total_mem; -#endif - mp_.n_mmaps = ms->n_mmaps; - mp_.max_n_mmaps = ms->max_n_mmaps; - mp_.mmapped_mem = ms->mmapped_mem; - mp_.max_mmapped_mem = ms->max_mmapped_mem; - /* add version-dependent code here */ - if (ms->version >= 1) { - /* Check whether it is safe to enable malloc checking, or whether - it is necessary to disable it. */ - if (ms->using_malloc_checking && !using_malloc_checking && - !disallow_malloc_check) - __malloc_check_init (); - else if (!ms->using_malloc_checking && using_malloc_checking) { - __malloc_hook = 0; - __free_hook = 0; - __realloc_hook = 0; - __memalign_hook = 0; - using_malloc_checking = 0; - } - } - check_malloc_state(&main_arena); - - (void)mutex_unlock(&main_arena.mutex); - return 0; -} - - -/*------------------------------------------------------------------------- - OMPI change: Per - http://www.gnu.org/software/libc/manual/html_mono/libc.html#Hooks-for-Malloc, - we can define the __malloc_initialize_hook variable to be a - function that is invoked before the first allocation is ever - performed. We use this hook to wholly replace the underlying - allocator to our own allocator if a few conditions are met. - - Remember that this hook is called probably at the very very very - beginning of the process. MCA parameters haven't been setup yet -- - darn near nothing has been setup yet. Indeed, we're effectively in - signal context because we can't call anything that calls malloc. - So we can basically have some hard-coded tests for things to see if - we want to setup to use our internal ptmalloc2 or not. */ - -static void *opal_memory_linux_malloc_hook(size_t sz, - const __malloc_ptr_t caller) -{ - return public_mALLOc(sz); -} - -static void *opal_memory_linux_realloc_hook(Void_t* ptr, size_t sz, - const __malloc_ptr_t caller) -{ - return public_rEALLOc(ptr, sz); -} - -static void *opal_memory_linux_memalign_hook(size_t alignment, size_t sz, - const __malloc_ptr_t caller) -{ - return public_mEMALIGn(alignment, sz); -} - -static void opal_memory_linux_free_hook(__malloc_ptr_t __ptr, - const __malloc_ptr_t caller) -{ - public_fREe(__ptr); -} - -typedef enum { - RESULT_NO, - RESULT_YES, - RESULT_RUNTIME, - RESULT_NOT_FOUND -} check_result_t; - -static check_result_t check(const char *name) -{ - char *s = getenv(name); - if (NULL == s) { - return RESULT_NOT_FOUND; - } - - if ('0' == s[0] && '\0' == s[1]) { - /* A value of 0 means "don't use!" */ - return RESULT_NO; - } else if ('-' == s[0] && '1' == s[1] && '\0' == s[2]) { - /* A value of -1 means "use it if it would be advantageous */ - return RESULT_RUNTIME; - } else { - /* Any other value means "use the hooks, Luke!" */ - return RESULT_YES; - } -} - -/* OMPI's init function */ -void opal_memory_linux_malloc_init_hook(void) -{ - check_result_t r1, lp, lpp; - bool want_rcache = false, found_driver = false; - - /* First, check for a FAKEROOT environment. If we're in a - fakeroot, then access() (and likely others) have been replaced - and are not safe to call here in this pre-main environment. So - check for the environment markers that we're in a FAKEROOT. - And if so, return immediately. - - Note that this check was inspired by a problem with Debian's - "fakeroot" build environment that allocates memory during - stat() (see http://bugs.debian.org/531522). It may not be - necessary any more since we're using access(), not stat(). But - we'll leave the check, anyway. - - This is also an issue when using Gentoo's version of - 'fakeroot', sandbox v2.5. Sandbox environments can also be - detected fairly easily by looking for SANDBOX_ON. */ - if (getenv("FAKEROOTKEY") != NULL || - getenv("FAKED_MODE") != NULL || - getenv("SANDBOX_ON") != NULL ) { - return; - } - -#if MEMORY_LINUX_UMMUNOTIFY - /* Next, check if ummunotify is present on the system. If it is, - and if we were compile with ummunotify support, then we don't - need to do the following ptmalloc2 hacks. open/mmap on the - device may fail during init, but if /dev/ummunotify exists, we - assume that the user/administrator *wants* to use - ummunotify. */ - if (access("/dev/ummunotify", F_OK) == 0) { - return; - } -#endif - - /* Yes, checking for an MPI MCA parameter here is an abstraction - violation. Cope. Yes, even checking for *any* MCA parameter - here (without going through the MCA param API) is an - abstraction violation. Fricken' cope, will ya? - (unfortunately, there's really no good way to do this other - than this abstraction violation :-( ) */ - lp = check("OPAL_MCA_leave_pinned"); - if( RESULT_NOT_FOUND == lp ) lp = check(OPAL_MCA_PREFIX"mpi_leave_pinned"); - lpp = check("OPAL_MCA_leave_pinned_pipeline"); - if( RESULT_NOT_FOUND == lpp ) lpp = check(OPAL_MCA_PREFIX"mpi_leave_pinned_pipeline"); - - /* See if we want to disable this component. */ - r1 = check(OPAL_MCA_PREFIX"memory_linux_disable"); - if (RESULT_NOT_FOUND != r1 && RESULT_NO != r1) { - return; - } - - /* Look for sentinel files (directories) to see if various network - drivers are loaded (yes, I know, further abstraction - violations...). - - * All OpenFabrics devices will have files in - /sys/class/infiniband (even iWARP) - * Open-MX doesn't currently use a reg cache, but it might - someday. So be conservative and check for /dev/open-mx. - * MX will have one or more of /dev/myri[0-9]. Yuck. - */ - if (0 == access("/sys/class/infiniband", F_OK) || - 0 == access("/dev/open-mx", F_OK) || - 0 == access("/dev/myri0", F_OK) || - 0 == access("/dev/myri1", F_OK) || - 0 == access("/dev/myri2", F_OK) || - 0 == access("/dev/myri3", F_OK) || - 0 == access("/dev/myri4", F_OK) || - 0 == access("/dev/myri5", F_OK) || - 0 == access("/dev/myri6", F_OK) || - 0 == access("/dev/myri7", F_OK) || - 0 == access("/dev/myri8", F_OK) || - 0 == access("/dev/myri9", F_OK) || - 0 == access("/dev/ipath", F_OK) || - 0 == access("/dev/kgni0", F_OK) || - 0 == access("/dev/mic/scif", F_OK) || - 0 == access("/dev/scif", F_OK)) { - found_driver = true; - } - - /* Simple combination of the results of these two environment - variables (if both "yes" and "no" are specified, then be - conservative and assume "yes"): - - lp / lpp yes no runtime not found - yes yes yes yes yes - no yes no no no - runtime yes no runtime runtime - not found yes no runtime runtime - */ - if (RESULT_YES == lp || RESULT_YES == lpp) { - want_rcache = true; - } else if (RESULT_NO == lp || RESULT_NO == lpp) { - want_rcache = false; - } else { - want_rcache = found_driver; - } - - if (want_rcache) { - /* Initialize ptmalloc */ - ptmalloc_init(); - - /* Now set the hooks to point to our functions */ - __free_hook = opal_memory_linux_free_hook; - __malloc_hook = opal_memory_linux_malloc_hook; - __memalign_hook = opal_memory_linux_memalign_hook; - __realloc_hook = opal_memory_linux_realloc_hook; - } -} - - -/* OMPI change: prototype the function below, otherwise we'll get - warnings about it not being declared (at least in developer/debug - builds). This function is not DECLSPEC'ed because we don't want it - visible outside of this component (i.e., libopen-pal, since this - component is never built as a DSO; it's always slurped into - libopen-pal). This declaration is not in malloc.h because this - function only exists as a horrid workaround to force linkers to - pull in this .o file (see explanation below). */ -void opal_memory_linux_hook_pull(bool *want_hooks); - -/* OMPI change: add a dummy function here that will be called by the - linux component open() function. This dummy function is - necessary for when OMPI is built as --disable-shared - --enable-static --disable-dlopen, because we won't use - -Wl,--export-dynamic when building OMPI. So we need to ensure that - not only that all the symbols in this file end up in libopen-pal.a, - but they also end up in the final exectuable (so that - __malloc_initialize_hook is there, overrides the weak symbol in - glibc, ....etc.). */ -void opal_memory_linux_hook_pull(bool *want_hooks) -{ - /* Make this slightly less than a dummy function -- register the - MCA parameter here (that way we keep the name of this MCA - parameter here within this one, single file). Register solely - so that it shows up in ompi_info -- by the time we register it, - the _malloc_init_hook() has almost certainly already fired, so - whatever value was set via normal MCA mechanisms likely won't - be see if it wasn't already see by the getenv() in the - _malloc_init_hook(). */ - *want_hooks = !opal_memory_linux_disable; -} - - - -/* - * Local variables: - * c-basic-offset: 4 - * End: - */ diff --git a/opal/mca/memory/linux/lran2.h b/opal/mca/memory/linux/lran2.h deleted file mode 100644 index cea9920282a..00000000000 --- a/opal/mca/memory/linux/lran2.h +++ /dev/null @@ -1,51 +0,0 @@ -/* lran2.h - * by Wolfram Gloger 1996. - * - * A small, portable pseudo-random number generator. - */ - -#ifndef _LRAN2_H -#define _LRAN2_H - -#define LRAN2_MAX 714025l /* constants for portable */ -#define IA 1366l /* random number generator */ -#define IC 150889l /* (see e.g. `Numerical Recipes') */ - -struct lran2_st { - long x, y, v[97]; -}; - -static void -lran2_init(struct lran2_st* d, long seed) -{ - long x; - int j; - - x = (IC - seed) % LRAN2_MAX; - if(x < 0) x = -x; - for(j=0; j<97; j++) { - x = (IA*x + IC) % LRAN2_MAX; - d->v[j] = x; - } - d->x = (IA*x + IC) % LRAN2_MAX; - d->y = d->x; -} - -#ifdef __GNUC__ -__inline__ -#endif -static long -lran2(struct lran2_st* d) -{ - int j = (d->y % 97); - - d->y = d->v[j]; - d->x = (IA*d->x + IC) % LRAN2_MAX; - d->v[j] = d->x; - return d->y; -} - -#undef IA -#undef IC - -#endif diff --git a/opal/mca/memory/linux/malloc-stats.c b/opal/mca/memory/linux/malloc-stats.c deleted file mode 100644 index cfb9fb23df4..00000000000 --- a/opal/mca/memory/linux/malloc-stats.c +++ /dev/null @@ -1,169 +0,0 @@ -/* - * Copyright (c) 2009-2010 Cisco Systems, Inc. All rights reserved. - * - * Additional copyrights may follow. - */ -/* Malloc implementation for multiple threads; statistics printing. - Copyright (C) 2004 Free Software Foundation, Inc. - This file is part of the GNU C Library. - Contributed by Wolfram Gloger , 2004. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, write to the Free - Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA - 02111-1307 USA. */ - -/* $Id: $ */ - -/* OMPI change: Name-shift all the internal symbols */ -#include "opal/mca/memory/linux/rename.h" - -#include /* needed for malloc_stats */ - -#include - -#include "malloc.h" - -/* - Define HAVE_MMAP as true to optionally make malloc() use mmap() to - allocate very large blocks. These will be returned to the - operating system immediately after a free(). Also, if mmap - is available, it is used as a backup strategy in cases where - MORECORE fails to provide space from system. - - This malloc is best tuned to work with mmap for large requests. - If you do not have mmap, operations involving very large chunks (1MB - or so) may be slower than you'd like. -*/ - -#ifndef HAVE_MMAP -#define HAVE_MMAP 1 -#endif - -#ifdef USE_DL_PREFIX - -#define public_mSTATs dlmalloc_stats - -#else /* USE_DL_PREFIX */ -#ifdef _LIBC - -#define public_mSTATs __malloc_stats - -#else /* !_LIBC */ - -#define public_mSTATs malloc_stats - -#endif /* _LIBC */ -#endif /* USE_DL_PREFIX */ - -/* - malloc_stats(); - Prints on stderr the amount of space obtained from the system (both - via sbrk and mmap), the maximum amount (which may be more than - current if malloc_trim and/or munmap got called), and the current - number of bytes allocated via malloc (or realloc, etc) but not yet - freed. Note that this is the number of bytes allocated, not the - number requested. It will be larger than the number requested - because of alignment and bookkeeping overhead. Because it includes - alignment wastage as being in use, this figure may be greater than - zero even when no user-level chunks are allocated. - - The reported current and maximum system memory can be inaccurate if - a program makes other calls to system memory allocation functions - (normally sbrk) outside of malloc. - - malloc_stats prints only the most commonly interesting statistics. - More information can be obtained by calling mallinfo. - -*/ -void public_mSTATs __MALLOC_P((void)); - -/* - ------------------------------ malloc_stats ------------------------------ -*/ - -void public_mSTATs() -{ - int i; - mstate ar_ptr; - struct malloc_global_info mgi; - struct malloc_arena_info mai; - unsigned long in_use_b, system_b, avail_b; -#if defined(THREAD_STATS) && THREAD_STATS - long stat_lock_direct = 0, stat_lock_loop = 0, stat_lock_wait = 0; -#endif - -#if 0 - if(__malloc_initialized < 0) - ptmalloc_init (); -#endif - _int_get_global_info(&mgi); - system_b = in_use_b = mgi.mmapped_mem; -#ifdef _LIBC - _IO_flockfile (stderr); - int old_flags2 = ((_IO_FILE *) stderr)->_flags2; - ((_IO_FILE *) stderr)->_flags2 |= _IO_FLAGS2_NOTCANCEL; -#endif - for (i=0; (ar_ptr = _int_get_arena(i)); i++) { - _int_get_arena_info(ar_ptr, &mai); - avail_b = mai.fastavail + mai.binavail + mai.top_size; - fprintf(stderr, "Arena %d:\n", i); - fprintf(stderr, "system bytes = %10lu\n", - (unsigned long)mai.system_mem); - fprintf(stderr, "in use bytes = %10lu\n", - (unsigned long)(mai.system_mem - avail_b)); -#if MALLOC_DEBUG > 1 - if (i > 0) - dump_heap(heap_for_ptr(top(ar_ptr))); -#endif - system_b += mai.system_mem; - in_use_b += mai.system_mem - avail_b; -#if defined(THREAD_STATS) && THREAD_STATS - stat_lock_direct += mai.stat_lock_direct; - stat_lock_loop += mai.stat_lock_loop; - stat_lock_wait += mai.stat_lock_wait; -#endif - } -#if HAVE_MMAP - fprintf(stderr, "Total (incl. mmap):\n"); -#else - fprintf(stderr, "Total:\n"); -#endif - fprintf(stderr, "system bytes = %10lu\n", system_b); - fprintf(stderr, "in use bytes = %10lu\n", in_use_b); -#ifdef NO_THREADS - fprintf(stderr, "max system bytes = %10lu\n", - (unsigned long)mgi.max_total_mem); -#endif -#if HAVE_MMAP - fprintf(stderr, "max mmap regions = %10u\n", (unsigned int)mgi.max_n_mmaps); - fprintf(stderr, "max mmap bytes = %10lu\n", - (unsigned long)mgi.max_mmapped_mem); -#endif -#if defined(THREAD_STATS) && THREAD_STATS - fprintf(stderr, "heaps created = %10d\n", mgi.stat_n_heaps); - fprintf(stderr, "locked directly = %10ld\n", stat_lock_direct); - fprintf(stderr, "locked in loop = %10ld\n", stat_lock_loop); - fprintf(stderr, "locked waiting = %10ld\n", stat_lock_wait); - fprintf(stderr, "locked total = %10ld\n", - stat_lock_direct + stat_lock_loop + stat_lock_wait); -#endif -#ifdef _LIBC - ((_IO_FILE *) stderr)->_flags2 |= old_flags2; - _IO_funlockfile (stderr); -#endif -} - -#ifdef _LIBC -weak_alias (__malloc_stats, malloc_stats) -#endif diff --git a/opal/mca/memory/linux/malloc.c b/opal/mca/memory/linux/malloc.c deleted file mode 100644 index d02180956a6..00000000000 --- a/opal/mca/memory/linux/malloc.c +++ /dev/null @@ -1,5599 +0,0 @@ -/********************** BEGIN OMPI CHANGES *****************************/ -/* - * Copyright (c) 2009-2010 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2015 Research Organization for Information Science - * and Technology (RIST). All rights reserved. - * - * Additional copyrights may follow. - */ - -#define OPAL_DISABLE_ENABLE_MEM_DEBUG 1 -#include "opal_config.h" - -#include -#include - -#include "opal/sys/atomic.h" -#include "opal/memoryhooks/memory_internal.h" -#include "opal/mca/memory/linux/memory_linux.h" -/* Name-shift all the internal symbols */ -#include "opal/mca/memory/linux/rename.h" - -/* - * Not all systems have sbrk() declared, since it's technically not a - * POSIX function. - */ -#if !HAVE_DECL_SBRK -void *sbrk(); -#endif - - -static void *opal_memory_linux_free_ptmalloc2_sbrk(int inc) -{ - if (inc < 0) { - long oldp = (long) sbrk(0); - opal_mem_hooks_release_hook((void*) (oldp + inc), -inc, 1); - } - - return sbrk(inc); -} - -/* if we are trying to catch only allocations from and releases to the - operating system, intercept sbrk, mmap, and munmap. If we want to - intercept every call to malloc/realloc/free/etc., don't do this, as - we need to add something into each of those calls anyway. */ -#define MORECORE opal_memory_linux_free_ptmalloc2_sbrk -#define munmap(a,b) opal_memory_linux_free_ptmalloc2_munmap(a,b,1) - -/* make some non-GCC compilers happy */ -#ifndef __GNUC__ -#define __const const -#endif - -/********************* END OMPI CHANGES ******************************/ - - - - -/* Malloc implementation for multiple threads without lock contention. - Copyright (C) 1996-2002, 2003, 2004 Free Software Foundation, Inc. - This file is part of the GNU C Library. - Contributed by Wolfram Gloger - and Doug Lea , 2001. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public License as - published by the Free Software Foundation; either version 2.1 of the - License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; see the file COPYING.LIB. If not, - write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, - Boston, MA 02111-1307, USA. */ - -/* - This is a version (aka ptmalloc2) of malloc/free/realloc written by - Doug Lea and adapted to multiple threads/arenas by Wolfram Gloger. - -* Version ptmalloc2-20011215 - $Id: malloc.c,v 1.20 2004/11/04 17:31:04 wg Exp $ - based on: - VERSION 2.7.1pre1 Sat May 12 07:41:21 2001 Doug Lea (dl at gee) - - Note: There may be an updated version of this malloc obtainable at - http://www.malloc.de/malloc/ptmalloc2.tar.gz - Check before installing! - -* Quickstart - - In order to compile this implementation, a Makefile is provided with - the ptmalloc2 distribution, which has pre-defined targets for some - popular systems (e.g. "make posix" for Posix threads). All that is - typically required with regard to compiler flags is the selection of - an appropriate malloc-machine.h include file via -I directives. - Many/most systems will additionally require USE_TSD_DATA_HACK to be - defined, so this is the default for "make posix". - -* Why use this malloc? - - This is not the fastest, most space-conserving, most portable, or - most tunable malloc ever written. However it is among the fastest - while also being among the most space-conserving, portable and tunable. - Consistent balance across these factors results in a good general-purpose - allocator for malloc-intensive programs. - - The main properties of the algorithms are: - * For large (>= 512 bytes) requests, it is a pure best-fit allocator, - with ties normally decided via FIFO (i.e. least recently used). - * For small (<= 64 bytes by default) requests, it is a caching - allocator, that maintains pools of quickly recycled chunks. - * In between, and for combinations of large and small requests, it does - the best it can trying to meet both goals at once. - * For very large requests (>= 128KB by default), it relies on system - memory mapping facilities, if supported. - - For a longer but slightly out of date high-level description, see - http://gee.cs.oswego.edu/dl/html/malloc.html - - You may already by default be using a C library containing a malloc - that is based on some version of this malloc (for example in - linux). You might still want to use the one in this file in order to - customize settings or to avoid overheads associated with library - versions. - -* Contents, described in more detail in "description of public routines" below. - - Standard (ANSI/SVID/...) functions: - malloc(size_t n); - calloc(size_t n_elements, size_t element_size); - free(Void_t* p); - realloc(Void_t* p, size_t n); - memalign(size_t alignment, size_t n); - valloc(size_t n); - mallinfo() - mallopt(int parameter_number, int parameter_value) - - Additional functions: - independent_calloc(size_t n_elements, size_t size, Void_t* chunks[]); - independent_comalloc(size_t n_elements, size_t sizes[], Void_t* chunks[]); - pvalloc(size_t n); - cfree(Void_t* p); - malloc_trim(size_t pad); - malloc_usable_size(Void_t* p); - malloc_stats(); - -* Vital statistics: - - Supported pointer representation: 4 or 8 bytes - Supported size_t representation: 4 or 8 bytes - Note that size_t is allowed to be 4 bytes even if pointers are 8. - You can adjust this by defining INTERNAL_SIZE_T - - Alignment: 2 * sizeof(size_t) (default) - (i.e., 8 byte alignment with 4byte size_t). This suffices for - nearly all current machines and C compilers. However, you can - define MALLOC_ALIGNMENT to be wider than this if necessary. - - Minimum overhead per allocated chunk: 4 or 8 bytes - Each malloced chunk has a hidden word of overhead holding size - and status information. - - Minimum allocated size: 4-byte ptrs: 16 bytes (including 4 overhead) - 8-byte ptrs: 24/32 bytes (including, 4/8 overhead) - - When a chunk is freed, 12 (for 4byte ptrs) or 20 (for 8 byte - ptrs but 4 byte size) or 24 (for 8/8) additional bytes are - needed; 4 (8) for a trailing size field and 8 (16) bytes for - free list pointers. Thus, the minimum allocatable size is - 16/24/32 bytes. - - Even a request for zero bytes (i.e., malloc(0)) returns a - pointer to something of the minimum allocatable size. - - The maximum overhead wastage (i.e., number of extra bytes - allocated than were requested in malloc) is less than or equal - to the minimum size, except for requests >= mmap_threshold that - are serviced via mmap(), where the worst case wastage is 2 * - sizeof(size_t) bytes plus the remainder from a system page (the - minimal mmap unit); typically 4096 or 8192 bytes. - - Maximum allocated size: 4-byte size_t: 2^32 minus about two pages - 8-byte size_t: 2^64 minus about two pages - - It is assumed that (possibly signed) size_t values suffice to - represent chunk sizes. `Possibly signed' is due to the fact - that `size_t' may be defined on a system as either a signed or - an unsigned type. The ISO C standard says that it must be - unsigned, but a few systems are known not to adhere to this. - Additionally, even when size_t is unsigned, sbrk (which is by - default used to obtain memory from system) accepts signed - arguments, and may not be able to handle size_t-wide arguments - with negative sign bit. Generally, values that would - appear as negative after accounting for overhead and alignment - are supported only via mmap(), which does not have this - limitation. - - Requests for sizes outside the allowed range will perform an optional - failure action and then return null. (Requests may also - also fail because a system is out of memory.) - - Thread-safety: thread-safe unless NO_THREADS is defined - - Compliance: I believe it is compliant with the 1997 Single Unix Specification - (See http://www.opennc.org). Also SVID/XPG, ANSI C, and probably - others as well. - -* Synopsis of compile-time options: - - People have reported using previous versions of this malloc on all - versions of Unix, sometimes by tweaking some of the defines - below. It has been tested most extensively on Solaris and - Linux. It is also reported to work on WIN32 platforms. - People also report using it in stand-alone embedded systems. - - The implementation is in straight, hand-tuned ANSI C. It is not - at all modular. (Sorry!) It uses a lot of macros. To be at all - usable, this code should be compiled using an optimizing compiler - (for example gcc -O3) that can simplify expressions and control - paths. (FAQ: some macros import variables as arguments rather than - declare locals because people reported that some debuggers - otherwise get confused.) - - OPTION DEFAULT VALUE - - Compilation Environment options: - - __STD_C derived from C compiler defines - WIN32 NOT defined - HAVE_MEMCPY defined - USE_MEMCPY 1 if HAVE_MEMCPY is defined - HAVE_MMAP defined as 1 - MMAP_CLEARS 1 - HAVE_MREMAP 0 unless linux defined - USE_ARENAS the same as HAVE_MMAP - USE_STARTER 1 - malloc_getpagesize derived from system #includes, or 4096 if not - HAVE_USR_INCLUDE_MALLOC_H NOT defined - LACKS_UNISTD_H NOT defined unless WIN32 - LACKS_SYS_PARAM_H NOT defined unless WIN32 - LACKS_SYS_MMAN_H NOT defined unless WIN32 - - Changing default word sizes: - - INTERNAL_SIZE_T size_t - MALLOC_ALIGNMENT 2 * sizeof(INTERNAL_SIZE_T) - - Configuration and functionality options: - - USE_DL_PREFIX NOT defined - USE_PUBLIC_MALLOC_WRAPPERS NOT defined - USE_MALLOC_LOCK NOT defined - MALLOC_DEBUG NOT defined - REALLOC_ZERO_BYTES_FREES 1 - MALLOC_FAILURE_ACTION errno = ENOMEM, if __STD_C defined, else no-op - TRIM_FASTBINS 0 - FIRST_SORTED_BIN_SIZE 512 - - Options for customizing MORECORE: - - MORECORE sbrk - MORECORE_FAILURE -1 - MORECORE_CONTIGUOUS 1 - MORECORE_CANNOT_TRIM NOT defined - MORECORE_CLEARS 1 - MMAP_AS_MORECORE_SIZE (1024 * 1024) - - Tuning options that are also dynamically changeable via mallopt: - - DEFAULT_MXFAST 64 - DEFAULT_TRIM_THRESHOLD 128 * 1024 - DEFAULT_TOP_PAD 0 - DEFAULT_MMAP_THRESHOLD 128 * 1024 - DEFAULT_MMAP_MAX 65536 - - There are several other #defined constants and macros that you - probably don't want to touch unless you are extending or adapting malloc. */ - -/* - __STD_C should be nonzero if using ANSI-standard C compiler, a C++ - compiler, or a C compiler sufficiently close to ANSI to get away - with it. -*/ - -#ifndef __STD_C -#if defined(__STDC__) || defined(__cplusplus) -#define __STD_C 1 -#else -#define __STD_C 0 -#endif -#endif /*__STD_C*/ - - -/* - Void_t* is the pointer type that malloc should say it returns -*/ - -#ifndef Void_t -#if (__STD_C || defined(WIN32)) -#define Void_t void -#else -#define Void_t char -#endif -#endif /*Void_t*/ - -#if __STD_C -#include /* for size_t */ -#include /* for getenv(), abort() */ -#else -#include -#endif - -#include - -#ifdef __cplusplus -extern "C" { -#endif - -/* define LACKS_UNISTD_H if your system does not have a . */ - -/* #define LACKS_UNISTD_H */ - -#ifndef LACKS_UNISTD_H -#include -#endif - -/* define LACKS_SYS_PARAM_H if your system does not have a . */ - -/* #define LACKS_SYS_PARAM_H */ - - -#include /* needed for malloc_stats */ -#include /* needed for optional MALLOC_FAILURE_ACTION */ - - -/* - Debugging: - - Because freed chunks may be overwritten with bookkeeping fields, this - malloc will often die when freed memory is overwritten by user - programs. This can be very effective (albeit in an annoying way) - in helping track down dangling pointers. - - If you compile with -DMALLOC_DEBUG, a number of assertion checks are - enabled that will catch more memory errors. You probably won't be - able to make much sense of the actual assertion errors, but they - should help you locate incorrectly overwritten memory. The checking - is fairly extensive, and will slow down execution - noticeably. Calling malloc_stats or mallinfo with MALLOC_DEBUG set - will attempt to check every non-mmapped allocated and free chunk in - the course of computing the summmaries. (By nature, mmapped regions - cannot be checked very much automatically.) - - Setting MALLOC_DEBUG may also be helpful if you are trying to modify - this code. The assertions in the check routines spell out in more - detail the assumptions and invariants underlying the algorithms. - - Setting MALLOC_DEBUG does NOT provide an automated mechanism for - checking that all accesses to malloced memory stay within their - bounds. However, there are several add-ons and adaptations of this - or other mallocs available that do this. -*/ - -#if MALLOC_DEBUG -#include -#else -#undef assert -#define assert(x) ((void)0) -#endif - - -/* - INTERNAL_SIZE_T is the word-size used for internal bookkeeping - of chunk sizes. - - The default version is the same as size_t. - - While not strictly necessary, it is best to define this as an - unsigned type, even if size_t is a signed type. This may avoid some - artificial size limitations on some systems. - - On a 64-bit machine, you may be able to reduce malloc overhead by - defining INTERNAL_SIZE_T to be a 32 bit `unsigned int' at the - expense of not being able to handle more than 2^32 of malloced - space. If this limitation is acceptable, you are encouraged to set - this unless you are on a platform requiring 16byte alignments. In - this case the alignment requirements turn out to negate any - potential advantages of decreasing size_t word size. - - Implementors: Beware of the possible combinations of: - - INTERNAL_SIZE_T might be signed or unsigned, might be 32 or 64 bits, - and might be the same width as int or as long - - size_t might have different width and signedness as INTERNAL_SIZE_T - - int and long might be 32 or 64 bits, and might be the same width - To deal with this, most comparisons and difference computations - among INTERNAL_SIZE_Ts should cast them to unsigned long, being - aware of the fact that casting an unsigned int to a wider long does - not sign-extend. (This also makes checking for negative numbers - awkward.) Some of these casts result in harmless compiler warnings - on some systems. -*/ - -#ifndef INTERNAL_SIZE_T -#define INTERNAL_SIZE_T size_t -#endif - -/* The corresponding word size */ -#define SIZE_SZ (sizeof(INTERNAL_SIZE_T)) - - -/* - MALLOC_ALIGNMENT is the minimum alignment for malloc'ed chunks. - It must be a power of two at least 2 * SIZE_SZ, even on machines - for which smaller alignments would suffice. It may be defined as - larger than this though. Note however that code and data structures - are optimized for the case of 8-byte alignment. -*/ - - -#ifndef MALLOC_ALIGNMENT -#define MALLOC_ALIGNMENT (2 * SIZE_SZ) -#endif - -/* The corresponding bit mask value */ -#define MALLOC_ALIGN_MASK (MALLOC_ALIGNMENT - 1) - - - -/* - REALLOC_ZERO_BYTES_FREES should be set if a call to - realloc with zero bytes should be the same as a call to free. - This is required by the C standard. Otherwise, since this malloc - returns a unique pointer for malloc(0), so does realloc(p, 0). -*/ - -#ifndef REALLOC_ZERO_BYTES_FREES -#define REALLOC_ZERO_BYTES_FREES 1 -#endif - -/* - TRIM_FASTBINS controls whether free() of a very small chunk can - immediately lead to trimming. Setting to true (1) can reduce memory - footprint, but will almost always slow down programs that use a lot - of small chunks. - - Define this only if you are willing to give up some speed to more - aggressively reduce system-level memory footprint when releasing - memory in programs that use many small chunks. You can get - essentially the same effect by setting MXFAST to 0, but this can - lead to even greater slowdowns in programs using many small chunks. - TRIM_FASTBINS is an in-between compile-time option, that disables - only those chunks bordering topmost memory from being placed in - fastbins. -*/ - -#ifndef TRIM_FASTBINS -#define TRIM_FASTBINS 0 -#endif - - -/* - USE_DL_PREFIX will prefix all public routines with the string 'dl'. - This is necessary when you only want to use this malloc in one part - of a program, using your regular system malloc elsewhere. -*/ - -/* #define USE_DL_PREFIX */ - - -/* - Two-phase name translation. - All of the actual routines are given mangled names. - When wrappers are used, they become the public callable versions. - When DL_PREFIX is used, the callable names are prefixed. -*/ - -#ifdef USE_DL_PREFIX -#define public_cALLOc dlcalloc -#define public_fREe dlfree -#define public_cFREe dlcfree -#define public_mALLOc dlmalloc -#define public_mEMALIGn dlmemalign -#define public_rEALLOc dlrealloc -#define public_vALLOc dlvalloc -#define public_pVALLOc dlpvalloc -#define public_mALLINFo dlmallinfo -#define public_mALLOPt dlmallopt -#define public_mTRIm dlmalloc_trim -#define public_mSTATs dlmalloc_stats -#define public_mUSABLe dlmalloc_usable_size -#define public_iCALLOc dlindependent_calloc -#define public_iCOMALLOc dlindependent_comalloc -#define public_gET_STATe dlget_state -#define public_sET_STATe dlset_state -#else /* USE_DL_PREFIX */ -#ifdef _LIBC - -/* Special defines for the GNU C library. */ -#define public_cALLOc __libc_calloc -#define public_fREe __libc_free -#define public_cFREe __libc_cfree -#define public_mALLOc __libc_malloc -#define public_mEMALIGn __libc_memalign -#define public_rEALLOc __libc_realloc -#define public_vALLOc __libc_valloc -#define public_pVALLOc __libc_pvalloc -#define public_mALLINFo __libc_mallinfo -#define public_mALLOPt __libc_mallopt -#define public_mTRIm __malloc_trim -#define public_mSTATs __malloc_stats -#define public_mUSABLe __malloc_usable_size -#define public_iCALLOc __libc_independent_calloc -#define public_iCOMALLOc __libc_independent_comalloc -#define public_gET_STATe __malloc_get_state -#define public_sET_STATe __malloc_set_state -#define malloc_getpagesize __getpagesize() -#define open __open -#define mmap __mmap -#define munmap __munmap -#define mremap __mremap -#define mprotect __mprotect -#define MORECORE (*__morecore) -#define MORECORE_FAILURE 0 - -Void_t * __default_morecore (ptrdiff_t); -Void_t *(*__morecore)(ptrdiff_t) = __default_morecore; - -#else /* !_LIBC */ - /* OMPI change: put "opal_memory_ptmalloc2_" in front of all of these */ -#define public_cALLOc opal_memory_ptmalloc2_calloc -#define public_fREe opal_memory_ptmalloc2_free -#define public_cFREe opal_memory_ptmalloc2_cfree -#define public_mALLOc opal_memory_ptmalloc2_malloc -#define public_mEMALIGn opal_memory_ptmalloc2_memalign -#define public_rEALLOc opal_memory_ptmalloc2_realloc -#define public_vALLOc opal_memory_ptmalloc2_valloc -#define public_pVALLOc opal_memory_ptmalloc2_pvalloc -#define public_mALLINFo opal_memory_ptmalloc2_mallinfo -#define public_mALLOPt opal_memory_ptmalloc2_mallopt -#define public_mTRIm opal_memory_ptmalloc2_malloc_trim -#define public_mSTATs opal_memory_ptmalloc2_malloc_stats -#define public_mUSABLe opal_memory_ptmalloc2_malloc_usable_size -#define public_iCALLOc opal_memory_ptmalloc2_independent_calloc -#define public_iCOMALLOc opal_memory_ptmalloc2_independent_comalloc -#define public_gET_STATe opal_memory_ptmalloc2_malloc_get_state -#define public_sET_STATe opal_memory_ptmalloc2_malloc_set_state -#endif /* _LIBC */ -#endif /* USE_DL_PREFIX */ - - -#if !defined _LIBC && (!defined __GNUC__ || __GNUC__<3) -#ifndef __builtin_expect -#define __builtin_expect(expr, val) (expr) -#endif -#endif - -/* - HAVE_MEMCPY should be defined if you are not otherwise using - ANSI STD C, but still have memcpy and memset in your C library - and want to use them in calloc and realloc. Otherwise simple - macro versions are defined below. - - USE_MEMCPY should be defined as 1 if you actually want to - have memset and memcpy called. People report that the macro - versions are faster than libc versions on some systems. - - Even if USE_MEMCPY is set to 1, loops to copy/clear small chunks - (of <= 36 bytes) are manually unrolled in realloc and calloc. -*/ - -#define HAVE_MEMCPY - -#ifndef USE_MEMCPY -#ifdef HAVE_MEMCPY -#define USE_MEMCPY 1 -#else -#define USE_MEMCPY 0 -#endif -#endif - - -#if (__STD_C || defined(HAVE_MEMCPY)) - -#ifdef _LIBC -# include -#else -#ifdef WIN32 -/* On Win32 memset and memcpy are already declared in windows.h */ -#else -#if __STD_C -void* memset(void*, int, size_t); -void* memcpy(void*, const void*, size_t); -#else -Void_t* memset(); -Void_t* memcpy(); -#endif -#endif -#endif -#endif - -/* - MALLOC_FAILURE_ACTION is the action to take before "return 0" when - malloc fails to be able to return memory, either because memory is - exhausted or because of illegal arguments. - - By default, sets errno if running on STD_C platform, else does nothing. -*/ - -#ifndef MALLOC_FAILURE_ACTION -#if __STD_C -#define MALLOC_FAILURE_ACTION \ - errno = ENOMEM; - -#else -#define MALLOC_FAILURE_ACTION -#endif -#endif - -/* - MORECORE-related declarations. By default, rely on sbrk -*/ - - -#ifdef LACKS_UNISTD_H -#if !defined(__FreeBSD__) && !defined(__OpenBSD__) && !defined(__NetBSD__) -#if __STD_C -extern Void_t* sbrk(ptrdiff_t); -#else -extern Void_t* sbrk(); -#endif -#endif -#endif - -/* - MORECORE is the name of the routine to call to obtain more memory - from the system. See below for general guidance on writing - alternative MORECORE functions, as well as a version for WIN32 and a - sample version for pre-OSX macos. -*/ - -#ifndef MORECORE -#define MORECORE sbrk -#endif - -/* - MORECORE_FAILURE is the value returned upon failure of MORECORE - as well as mmap. Since it cannot be an otherwise valid memory address, - and must reflect values of standard sys calls, you probably ought not - try to redefine it. -*/ - -#ifndef MORECORE_FAILURE -#define MORECORE_FAILURE (-1) -#endif - -/* - If MORECORE_CONTIGUOUS is true, take advantage of fact that - consecutive calls to MORECORE with positive arguments always return - contiguous increasing addresses. This is true of unix sbrk. Even - if not defined, when regions happen to be contiguous, malloc will - permit allocations spanning regions obtained from different - calls. But defining this when applicable enables some stronger - consistency checks and space efficiencies. -*/ - -#ifndef MORECORE_CONTIGUOUS -#define MORECORE_CONTIGUOUS 1 -#endif - -/* - Define MORECORE_CANNOT_TRIM if your version of MORECORE - cannot release space back to the system when given negative - arguments. This is generally necessary only if you are using - a hand-crafted MORECORE function that cannot handle negative arguments. -*/ - -/* #define MORECORE_CANNOT_TRIM */ - -/* MORECORE_CLEARS (default 1) - The degree to which the routine mapped to MORECORE zeroes out - memory: never (0), only for newly allocated space (1) or always - (2). The distinction between (1) and (2) is necessary because on - some systems, if the application first decrements and then - increments the break value, the contents of the reallocated space - are unspecified. -*/ - -#ifndef MORECORE_CLEARS -#define MORECORE_CLEARS 1 -#endif - - -/* - Define HAVE_MMAP as true to optionally make malloc() use mmap() to - allocate very large blocks. These will be returned to the - operating system immediately after a free(). Also, if mmap - is available, it is used as a backup strategy in cases where - MORECORE fails to provide space from system. - - This malloc is best tuned to work with mmap for large requests. - If you do not have mmap, operations involving very large chunks (1MB - or so) may be slower than you'd like. -*/ - -#ifndef HAVE_MMAP -#define HAVE_MMAP 1 - -/* - Standard unix mmap using /dev/zero clears memory so calloc doesn't - need to. -*/ - -#ifndef MMAP_CLEARS -#define MMAP_CLEARS 1 -#endif - -#else /* no mmap */ -#ifndef MMAP_CLEARS -#define MMAP_CLEARS 0 -#endif -#endif - - -/* - MMAP_AS_MORECORE_SIZE is the minimum mmap size argument to use if - sbrk fails, and mmap is used as a backup (which is done only if - HAVE_MMAP). The value must be a multiple of page size. This - backup strategy generally applies only when systems have "holes" in - address space, so sbrk cannot perform contiguous expansion, but - there is still space available on system. On systems for which - this is known to be useful (i.e. most linux kernels), this occurs - only when programs allocate huge amounts of memory. Between this, - and the fact that mmap regions tend to be limited, the size should - be large, to avoid too many mmap calls and thus avoid running out - of kernel resources. -*/ - -#ifndef MMAP_AS_MORECORE_SIZE -#define MMAP_AS_MORECORE_SIZE (1024 * 1024) -#endif - -/* - Define HAVE_MREMAP to make realloc() use mremap() to re-allocate - large blocks. This is currently only possible on Linux with - kernel versions newer than 1.3.77. -*/ - -#ifndef HAVE_MREMAP -#ifdef linux -#define HAVE_MREMAP 1 -#else -#define HAVE_MREMAP 0 -#endif - -#endif /* HAVE_MMAP */ - -/* Define USE_ARENAS to enable support for multiple `arenas'. These - are allocated using mmap(), are necessary for threads and - occasionally useful to overcome address space limitations affecting - sbrk(). */ - -#ifndef USE_ARENAS -#define USE_ARENAS HAVE_MMAP -#endif - -/* USE_STARTER determines if and when the special "starter" hook - functions are used: not at all (0), during ptmalloc_init (first bit - set), or from the beginning until an explicit call to ptmalloc_init - (second bit set). This is necessary if thread-related - initialization functions (e.g. pthread_key_create) require - malloc() calls (set USE_STARTER=1), or if those functions initially - cannot be used at all (set USE_STARTER=2 and perform an explicit - ptmalloc_init() when the thread library is ready, typically at the - start of main()). */ - -#ifndef USE_STARTER -# ifndef _LIBC -# define USE_STARTER 1 -# else -# if USE___THREAD || (defined USE_TLS && !defined SHARED) - /* These routines are never needed in this configuration. */ -# define USE_STARTER 0 -# else -# define USE_STARTER (USE_TLS ? 4 : 1) -# endif -# endif -#endif - - -/* - The system page size. To the extent possible, this malloc manages - memory from the system in page-size units. Note that this value is - cached during initialization into a field of malloc_state. So even - if malloc_getpagesize is a function, it is only called once. - - The following mechanics for getpagesize were adapted from bsd/gnu - getpagesize.h. If none of the system-probes here apply, a value of - 4096 is used, which should be OK: If they don't apply, then using - the actual value probably doesn't impact performance. -*/ - - -#ifndef malloc_getpagesize - -#ifndef LACKS_UNISTD_H -# include -#endif - -# ifdef _SC_PAGESIZE /* some SVR4 systems omit an underscore */ -# ifndef _SC_PAGE_SIZE -# define _SC_PAGE_SIZE _SC_PAGESIZE -# endif -# endif - -# ifdef _SC_PAGE_SIZE -# define malloc_getpagesize sysconf(_SC_PAGE_SIZE) -# else -# if defined(BSD) || defined(DGUX) || defined(HAVE_GETPAGESIZE) - extern size_t getpagesize(); -# define malloc_getpagesize getpagesize() -# else -# ifdef WIN32 /* use supplied emulation of getpagesize */ -# define malloc_getpagesize getpagesize() -# else -# ifndef LACKS_SYS_PARAM_H -# include -# endif -# ifdef EXEC_PAGESIZE -# define malloc_getpagesize EXEC_PAGESIZE -# else -# ifdef NBPG -# ifndef CLSIZE -# define malloc_getpagesize NBPG -# else -# define malloc_getpagesize (NBPG * CLSIZE) -# endif -# else -# ifdef NBPC -# define malloc_getpagesize NBPC -# else -# ifdef PAGESIZE -# define malloc_getpagesize PAGESIZE -# else /* just guess */ -# define malloc_getpagesize (4096) -# endif -# endif -# endif -# endif -# endif -# endif -# endif -#endif - -/* - This version of malloc supports the standard SVID/XPG mallinfo - routine that returns a struct containing usage properties and - statistics. It should work on any SVID/XPG compliant system that has - a /usr/include/malloc.h defining struct mallinfo. (If you'd like to - install such a thing yourself, cut out the preliminary declarations - as described above and below and save them in a malloc.h file. But - there's no compelling reason to bother to do this.) - - The main declaration needed is the mallinfo struct that is returned - (by-copy) by mallinfo(). The SVID/XPG malloinfo struct contains a - bunch of fields that are not even meaningful in this version of - malloc. These fields are are instead filled by mallinfo() with - other numbers that might be of interest. - - HAVE_USR_INCLUDE_MALLOC_H should be set if you have a - /usr/include/malloc.h file that includes a declaration of struct - mallinfo. If so, it is included; else an SVID2/XPG2 compliant - version is declared below. These must be precisely the same for - mallinfo() to work. The original SVID version of this struct, - defined on most systems with mallinfo, declares all fields as - ints. But some others define as unsigned long. If your system - defines the fields using a type of different width than listed here, - you must #include your system version and #define - HAVE_USR_INCLUDE_MALLOC_H. -*/ - -/* #define HAVE_USR_INCLUDE_MALLOC_H */ - -#ifdef HAVE_USR_INCLUDE_MALLOC_H -#include "/usr/include/malloc.h" -#endif - - -/* ---------- description of public routines ------------ */ - -/* - malloc(size_t n) - Returns a pointer to a newly allocated chunk of at least n bytes, or null - if no space is available. Additionally, on failure, errno is - set to ENOMEM on ANSI C systems. - - If n is zero, malloc returns a minumum-sized chunk. (The minimum - size is 16 bytes on most 32bit systems, and 24 or 32 bytes on 64bit - systems.) On most systems, size_t is an unsigned type, so calls - with negative arguments are interpreted as requests for huge amounts - of space, which will often fail. The maximum supported value of n - differs across systems, but is in all cases less than the maximum - representable value of a size_t. -*/ -#if __STD_C -Void_t* public_mALLOc(size_t); -#else -Void_t* public_mALLOc(); -#endif -#ifdef libc_hidden_proto -libc_hidden_proto (public_mALLOc) -#endif - -/* - free(Void_t* p) - Releases the chunk of memory pointed to by p, that had been previously - allocated using malloc or a related routine such as realloc. - It has no effect if p is null. It can have arbitrary (i.e., bad!) - effects if p has already been freed. - - Unless disabled (using mallopt), freeing very large spaces will - when possible, automatically trigger operations that give - back unused memory to the system, thus reducing program footprint. -*/ -#if __STD_C -void public_fREe(Void_t*); -#else -void public_fREe(); -#endif -#ifdef libc_hidden_proto -libc_hidden_proto (public_fREe) -#endif - -/* - calloc(size_t n_elements, size_t element_size); - Returns a pointer to n_elements * element_size bytes, with all locations - set to zero. -*/ -#if __STD_C -Void_t* public_cALLOc(size_t, size_t); -#else -Void_t* public_cALLOc(); -#endif - -/* - realloc(Void_t* p, size_t n) - Returns a pointer to a chunk of size n that contains the same data - as does chunk p up to the minimum of (n, p's size) bytes, or null - if no space is available. - - The returned pointer may or may not be the same as p. The algorithm - prefers extending p when possible, otherwise it employs the - equivalent of a malloc-copy-free sequence. - - If p is null, realloc is equivalent to malloc. - - If space is not available, realloc returns null, errno is set (if on - ANSI) and p is NOT freed. - - if n is for fewer bytes than already held by p, the newly unused - space is lopped off and freed if possible. Unless the #define - REALLOC_ZERO_BYTES_FREES is set, realloc with a size argument of - zero (re)allocates a minimum-sized chunk. - - Large chunks that were internally obtained via mmap will always - be reallocated using malloc-copy-free sequences unless - the system supports MREMAP (currently only linux). - - The old unix realloc convention of allowing the last-free'd chunk - to be used as an argument to realloc is not supported. -*/ -#if __STD_C -Void_t* public_rEALLOc(Void_t*, size_t); -#else -Void_t* public_rEALLOc(); -#endif -#ifdef libc_hidden_proto -libc_hidden_proto (public_rEALLOc) -#endif - -/* - memalign(size_t alignment, size_t n); - Returns a pointer to a newly allocated chunk of n bytes, aligned - in accord with the alignment argument. - - The alignment argument should be a power of two. If the argument is - not a power of two, the nearest greater power is used. - 8-byte alignment is guaranteed by normal malloc calls, so don't - bother calling memalign with an argument of 8 or less. - - Overreliance on memalign is a sure way to fragment space. -*/ -#if __STD_C -Void_t* public_mEMALIGn(size_t, size_t); -#else -Void_t* public_mEMALIGn(); -#endif -#ifdef libc_hidden_proto -libc_hidden_proto (public_mEMALIGn) -#endif - -/* - valloc(size_t n); - Equivalent to memalign(pagesize, n), where pagesize is the page - size of the system. If the pagesize is unknown, 4096 is used. -*/ -#if __STD_C -Void_t* public_vALLOc(size_t); -#else -Void_t* public_vALLOc(); -#endif - - - -/* - mallopt(int parameter_number, int parameter_value) - Sets tunable parameters The format is to provide a - (parameter-number, parameter-value) pair. mallopt then sets the - corresponding parameter to the argument value if it can (i.e., so - long as the value is meaningful), and returns 1 if successful else - 0. SVID/XPG/ANSI defines four standard param numbers for mallopt, - normally defined in malloc.h. Only one of these (M_MXFAST) is used - in this malloc. The others (M_NLBLKS, M_GRAIN, M_KEEP) don't apply, - so setting them has no effect. But this malloc also supports four - other options in mallopt. See below for details. Briefly, supported - parameters are as follows (listed defaults are for "typical" - configurations). - - Symbol param # default allowed param values - M_MXFAST 1 64 0-80 (0 disables fastbins) - M_TRIM_THRESHOLD -1 128*1024 any (-1U disables trimming) - M_TOP_PAD -2 0 any - M_MMAP_THRESHOLD -3 128*1024 any (or 0 if no MMAP support) - M_MMAP_MAX -4 65536 any (0 disables use of mmap) -*/ -#if __STD_C -int public_mALLOPt(int, int); -#else -int public_mALLOPt(); -#endif - - -/* - mallinfo() - Returns (by copy) a struct containing various summary statistics: - - arena: current total non-mmapped bytes allocated from system - ordblks: the number of free chunks - smblks: the number of fastbin blocks (i.e., small chunks that - have been freed but not use resused or consolidated) - hblks: current number of mmapped regions - hblkhd: total bytes held in mmapped regions - usmblks: the maximum total allocated space. This will be greater - than current total if trimming has occurred. - fsmblks: total bytes held in fastbin blocks - uordblks: current total allocated space (normal or mmapped) - fordblks: total free space - keepcost: the maximum number of bytes that could ideally be released - back to system via malloc_trim. ("ideally" means that - it ignores page restrictions etc.) - - Because these fields are ints, but internal bookkeeping may - be kept as longs, the reported values may wrap around zero and - thus be inaccurate. -*/ -#if __STD_C -struct mallinfo public_mALLINFo(void); -#else -struct mallinfo public_mALLINFo(); -#endif - -/* - independent_calloc(size_t n_elements, size_t element_size, Void_t* chunks[]); - - independent_calloc is similar to calloc, but instead of returning a - single cleared space, it returns an array of pointers to n_elements - independent elements that can hold contents of size elem_size, each - of which starts out cleared, and can be independently freed, - realloc'ed etc. The elements are guaranteed to be adjacently - allocated (this is not guaranteed to occur with multiple callocs or - mallocs), which may also improve cache locality in some - applications. - - The "chunks" argument is optional (i.e., may be null, which is - probably the most typical usage). If it is null, the returned array - is itself dynamically allocated and should also be freed when it is - no longer needed. Otherwise, the chunks array must be of at least - n_elements in length. It is filled in with the pointers to the - chunks. - - In either case, independent_calloc returns this pointer array, or - null if the allocation failed. If n_elements is zero and "chunks" - is null, it returns a chunk representing an array with zero elements - (which should be freed if not wanted). - - Each element must be individually freed when it is no longer - needed. If you'd like to instead be able to free all at once, you - should instead use regular calloc and assign pointers into this - space to represent elements. (In this case though, you cannot - independently free elements.) - - independent_calloc simplifies and speeds up implementations of many - kinds of pools. It may also be useful when constructing large data - structures that initially have a fixed number of fixed-sized nodes, - but the number is not known at compile time, and some of the nodes - may later need to be freed. For example: - - struct Node { int item; struct Node* next; }; - - struct Node* build_list() { - struct Node** pool; - int n = read_number_of_nodes_needed(); - if (n <= 0) return 0; - pool = (struct Node**)(independent_calloc(n, sizeof(struct Node), 0); - if (pool == 0) die(); - // organize into a linked list... - struct Node* first = pool[0]; - for (i = 0; i < n-1; ++i) - pool[i]->next = pool[i+1]; - free(pool); // Can now free the array (or not, if it is needed later) - return first; - } -*/ -#if __STD_C -Void_t** public_iCALLOc(size_t, size_t, Void_t**); -#else -Void_t** public_iCALLOc(); -#endif - -/* - independent_comalloc(size_t n_elements, size_t sizes[], Void_t* chunks[]); - - independent_comalloc allocates, all at once, a set of n_elements - chunks with sizes indicated in the "sizes" array. It returns - an array of pointers to these elements, each of which can be - independently freed, realloc'ed etc. The elements are guaranteed to - be adjacently allocated (this is not guaranteed to occur with - multiple callocs or mallocs), which may also improve cache locality - in some applications. - - The "chunks" argument is optional (i.e., may be null). If it is null - the returned array is itself dynamically allocated and should also - be freed when it is no longer needed. Otherwise, the chunks array - must be of at least n_elements in length. It is filled in with the - pointers to the chunks. - - In either case, independent_comalloc returns this pointer array, or - null if the allocation failed. If n_elements is zero and chunks is - null, it returns a chunk representing an array with zero elements - (which should be freed if not wanted). - - Each element must be individually freed when it is no longer - needed. If you'd like to instead be able to free all at once, you - should instead use a single regular malloc, and assign pointers at - particular offsets in the aggregate space. (In this case though, you - cannot independently free elements.) - - independent_comallac differs from independent_calloc in that each - element may have a different size, and also that it does not - automatically clear elements. - - independent_comalloc can be used to speed up allocation in cases - where several structs or objects must always be allocated at the - same time. For example: - - struct Head { ... } - struct Foot { ... } - - void send_message(char* msg) { - int msglen = strlen(msg); - size_t sizes[3] = { sizeof(struct Head), msglen, sizeof(struct Foot) }; - void* chunks[3]; - if (independent_comalloc(3, sizes, chunks) == 0) - die(); - struct Head* head = (struct Head*)(chunks[0]); - char* body = (char*)(chunks[1]); - struct Foot* foot = (struct Foot*)(chunks[2]); - // ... - } - - In general though, independent_comalloc is worth using only for - larger values of n_elements. For small values, you probably won't - detect enough difference from series of malloc calls to bother. - - Overuse of independent_comalloc can increase overall memory usage, - since it cannot reuse existing noncontiguous small chunks that - might be available for some of the elements. -*/ -#if __STD_C -Void_t** public_iCOMALLOc(size_t, size_t*, Void_t**); -#else -Void_t** public_iCOMALLOc(); -#endif - - -/* - pvalloc(size_t n); - Equivalent to valloc(minimum-page-that-holds(n)), that is, - round up n to nearest pagesize. - */ -#if __STD_C -Void_t* public_pVALLOc(size_t); -#else -Void_t* public_pVALLOc(); -#endif - -/* - cfree(Void_t* p); - Equivalent to free(p). - - cfree is needed/defined on some systems that pair it with calloc, - for odd historical reasons (such as: cfree is used in example - code in the first edition of K&R). -*/ -#if __STD_C -void public_cFREe(Void_t*); -#else -void public_cFREe(); -#endif - -/* - malloc_trim(size_t pad); - - If possible, gives memory back to the system (via negative - arguments to sbrk) if there is unused memory at the `high' end of - the malloc pool. You can call this after freeing large blocks of - memory to potentially reduce the system-level memory requirements - of a program. However, it cannot guarantee to reduce memory. Under - some allocation patterns, some large free blocks of memory will be - locked between two used chunks, so they cannot be given back to - the system. - - The `pad' argument to malloc_trim represents the amount of free - trailing space to leave untrimmed. If this argument is zero, - only the minimum amount of memory to maintain internal data - structures will be left (one page or less). Non-zero arguments - can be supplied to maintain enough trailing space to service - future expected allocations without having to re-obtain memory - from the system. - - Malloc_trim returns 1 if it actually released any memory, else 0. - On systems that do not support "negative sbrks", it will always - rreturn 0. -*/ -#if __STD_C -int public_mTRIm(size_t); -#else -int public_mTRIm(); -#endif - -/* - malloc_usable_size(Void_t* p); - - Returns the number of bytes you can actually use in - an allocated chunk, which may be more than you requested (although - often not) due to alignment and minimum size constraints. - You can use this many bytes without worrying about - overwriting other allocated objects. This is not a particularly great - programming practice. malloc_usable_size can be more useful in - debugging and assertions, for example: - - p = malloc(n); - assert(malloc_usable_size(p) >= 256); - -*/ -#if __STD_C -size_t public_mUSABLe(Void_t*); -#else -size_t public_mUSABLe(); -#endif - -/* - malloc_stats(); - Prints on stderr the amount of space obtained from the system (both - via sbrk and mmap), the maximum amount (which may be more than - current if malloc_trim and/or munmap got called), and the current - number of bytes allocated via malloc (or realloc, etc) but not yet - freed. Note that this is the number of bytes allocated, not the - number requested. It will be larger than the number requested - because of alignment and bookkeeping overhead. Because it includes - alignment wastage as being in use, this figure may be greater than - zero even when no user-level chunks are allocated. - - The reported current and maximum system memory can be inaccurate if - a program makes other calls to system memory allocation functions - (normally sbrk) outside of malloc. - - malloc_stats prints only the most commonly interesting statistics. - More information can be obtained by calling mallinfo. - -*/ -#if __STD_C -void public_mSTATs(void); -#else -void public_mSTATs(); -#endif - -/* - malloc_get_state(void); - - Returns the state of all malloc variables in an opaque data - structure. -*/ -#if __STD_C -Void_t* public_gET_STATe(void); -#else -Void_t* public_gET_STATe(); -#endif - -/* - malloc_set_state(Void_t* state); - - Restore the state of all malloc variables from data obtained with - malloc_get_state(). -*/ -#if __STD_C -int public_sET_STATe(Void_t*); -#else -int public_sET_STATe(); -#endif - -#ifdef _LIBC -/* - posix_memalign(void **memptr, size_t alignment, size_t size); - - POSIX wrapper like memalign(), checking for validity of size. -*/ -int __posix_memalign(void **, size_t, size_t); -#endif - -/* mallopt tuning options */ - -/* - M_MXFAST is the maximum request size used for "fastbins", special bins - that hold returned chunks without consolidating their spaces. This - enables future requests for chunks of the same size to be handled - very quickly, but can increase fragmentation, and thus increase the - overall memory footprint of a program. - - This malloc manages fastbins very conservatively yet still - efficiently, so fragmentation is rarely a problem for values less - than or equal to the default. The maximum supported value of MXFAST - is 80. You wouldn't want it any higher than this anyway. Fastbins - are designed especially for use with many small structs, objects or - strings -- the default handles structs/objects/arrays with sizes up - to 8 4byte fields, or small strings representing words, tokens, - etc. Using fastbins for larger objects normally worsens - fragmentation without improving speed. - - M_MXFAST is set in REQUEST size units. It is internally used in - chunksize units, which adds padding and alignment. You can reduce - M_MXFAST to 0 to disable all use of fastbins. This causes the malloc - algorithm to be a closer approximation of fifo-best-fit in all cases, - not just for larger requests, but will generally cause it to be - slower. -*/ - - -/* M_MXFAST is a standard SVID/XPG tuning option, usually listed in malloc.h */ -#ifndef M_MXFAST -#define M_MXFAST 1 -#endif - -#ifndef DEFAULT_MXFAST -#define DEFAULT_MXFAST 64 -#endif - - -/* - M_TRIM_THRESHOLD is the maximum amount of unused top-most memory - to keep before releasing via malloc_trim in free(). - - Automatic trimming is mainly useful in long-lived programs. - Because trimming via sbrk can be slow on some systems, and can - sometimes be wasteful (in cases where programs immediately - afterward allocate more large chunks) the value should be high - enough so that your overall system performance would improve by - releasing this much memory. - - The trim threshold and the mmap control parameters (see below) - can be traded off with one another. Trimming and mmapping are - two different ways of releasing unused memory back to the - system. Between these two, it is often possible to keep - system-level demands of a long-lived program down to a bare - minimum. For example, in one test suite of sessions measuring - the XF86 X server on Linux, using a trim threshold of 128K and a - mmap threshold of 192K led to near-minimal long term resource - consumption. - - If you are using this malloc in a long-lived program, it should - pay to experiment with these values. As a rough guide, you - might set to a value close to the average size of a process - (program) running on your system. Releasing this much memory - would allow such a process to run in memory. Generally, it's - worth it to tune for trimming rather tham memory mapping when a - program undergoes phases where several large chunks are - allocated and released in ways that can reuse each other's - storage, perhaps mixed with phases where there are no such - chunks at all. And in well-behaved long-lived programs, - controlling release of large blocks via trimming versus mapping - is usually faster. - - However, in most programs, these parameters serve mainly as - protection against the system-level effects of carrying around - massive amounts of unneeded memory. Since frequent calls to - sbrk, mmap, and munmap otherwise degrade performance, the default - parameters are set to relatively high values that serve only as - safeguards. - - The trim value It must be greater than page size to have any useful - effect. To disable trimming completely, you can set to - (unsigned long)(-1) - - Trim settings interact with fastbin (MXFAST) settings: Unless - TRIM_FASTBINS is defined, automatic trimming never takes place upon - freeing a chunk with size less than or equal to MXFAST. Trimming is - instead delayed until subsequent freeing of larger chunks. However, - you can still force an attempted trim by calling malloc_trim. - - Also, trimming is not generally possible in cases where - the main arena is obtained via mmap. - - Note that the trick some people use of mallocing a huge space and - then freeing it at program startup, in an attempt to reserve system - memory, doesn't have the intended effect under automatic trimming, - since that memory will immediately be returned to the system. -*/ - -#define M_TRIM_THRESHOLD -1 - -#ifndef DEFAULT_TRIM_THRESHOLD -#define DEFAULT_TRIM_THRESHOLD (128 * 1024) -#endif - -/* - M_TOP_PAD is the amount of extra `padding' space to allocate or - retain whenever sbrk is called. It is used in two ways internally: - - * When sbrk is called to extend the top of the arena to satisfy - a new malloc request, this much padding is added to the sbrk - request. - - * When malloc_trim is called automatically from free(), - it is used as the `pad' argument. - - In both cases, the actual amount of padding is rounded - so that the end of the arena is always a system page boundary. - - The main reason for using padding is to avoid calling sbrk so - often. Having even a small pad greatly reduces the likelihood - that nearly every malloc request during program start-up (or - after trimming) will invoke sbrk, which needlessly wastes - time. - - Automatic rounding-up to page-size units is normally sufficient - to avoid measurable overhead, so the default is 0. However, in - systems where sbrk is relatively slow, it can pay to increase - this value, at the expense of carrying around more memory than - the program needs. -*/ - -#define M_TOP_PAD -2 - -#ifndef DEFAULT_TOP_PAD -#define DEFAULT_TOP_PAD (0) -#endif - -/* - M_MMAP_THRESHOLD is the request size threshold for using mmap() - to service a request. Requests of at least this size that cannot - be allocated using already-existing space will be serviced via mmap. - (If enough normal freed space already exists it is used instead.) - - Using mmap segregates relatively large chunks of memory so that - they can be individually obtained and released from the host - system. A request serviced through mmap is never reused by any - other request (at least not directly; the system may just so - happen to remap successive requests to the same locations). - - Segregating space in this way has the benefits that: - - 1. Mmapped space can ALWAYS be individually released back - to the system, which helps keep the system level memory - demands of a long-lived program low. - 2. Mapped memory can never become `locked' between - other chunks, as can happen with normally allocated chunks, which - means that even trimming via malloc_trim would not release them. - 3. On some systems with "holes" in address spaces, mmap can obtain - memory that sbrk cannot. - - However, it has the disadvantages that: - - 1. The space cannot be reclaimed, consolidated, and then - used to service later requests, as happens with normal chunks. - 2. It can lead to more wastage because of mmap page alignment - requirements - 3. It causes malloc performance to be more dependent on host - system memory management support routines which may vary in - implementation quality and may impose arbitrary - limitations. Generally, servicing a request via normal - malloc steps is faster than going through a system's mmap. - - The advantages of mmap nearly always outweigh disadvantages for - "large" chunks, but the value of "large" varies across systems. The - default is an empirically derived value that works well in most - systems. -*/ - -#define M_MMAP_THRESHOLD -3 - -#ifndef DEFAULT_MMAP_THRESHOLD -#define DEFAULT_MMAP_THRESHOLD (128 * 1024) -#endif - -/* - M_MMAP_MAX is the maximum number of requests to simultaneously - service using mmap. This parameter exists because - some systems have a limited number of internal tables for - use by mmap, and using more than a few of them may degrade - performance. - - The default is set to a value that serves only as a safeguard. - Setting to 0 disables use of mmap for servicing large requests. If - HAVE_MMAP is not set, the default value is 0, and attempts to set it - to non-zero values in mallopt will fail. -*/ - -#define M_MMAP_MAX -4 - -#ifndef DEFAULT_MMAP_MAX -#if HAVE_MMAP -#define DEFAULT_MMAP_MAX (65536) -#else -#define DEFAULT_MMAP_MAX (0) -#endif -#endif - -#ifdef __cplusplus -} /* end of extern "C" */ -#endif - -#include "malloc.h" - -#ifndef BOUNDED_N -#define BOUNDED_N(ptr, sz) (ptr) -#endif -#ifndef RETURN_ADDRESS -#define RETURN_ADDRESS(X_) (NULL) -#endif - -/* On some platforms we can compile internal, not exported functions better. - Let the environment provide a macro and define it to be empty if it - is not available. */ -#ifndef internal_function -# define internal_function -#endif - -/* Forward declarations. */ -struct malloc_chunk; -typedef struct malloc_chunk* mchunkptr; - -/* Internal routines. */ - -#if __STD_C - -Void_t* _int_malloc(mstate, size_t); -void _int_free(mstate, Void_t*); -Void_t* _int_realloc(mstate, Void_t*, size_t); -Void_t* _int_memalign(mstate, size_t, size_t); -Void_t* _int_valloc(mstate, size_t); -static Void_t* _int_pvalloc(mstate, size_t); -/*static Void_t* cALLOc(size_t, size_t);*/ -static Void_t** _int_icalloc(mstate, size_t, size_t, Void_t**); -static Void_t** _int_icomalloc(mstate, size_t, size_t*, Void_t**); -static int mTRIm(size_t); -static size_t mUSABLe(Void_t*); -static int mALLOPt(int, int); - -static Void_t* internal_function mem2mem_check(Void_t *p, size_t sz); -static int internal_function top_check(void); -static void internal_function munmap_chunk(mchunkptr p); -#if HAVE_MREMAP -static mchunkptr internal_function mremap_chunk(mchunkptr p, size_t new_size); -#endif - -static Void_t* malloc_check(size_t sz, const Void_t *caller); -static void free_check(Void_t* mem, const Void_t *caller); -static Void_t* realloc_check(Void_t* oldmem, size_t bytes, - const Void_t *caller); -static Void_t* memalign_check(size_t alignment, size_t bytes, - const Void_t *caller); -#ifndef NO_THREADS -# if USE_STARTER -static Void_t* malloc_starter(size_t sz, const Void_t *caller); -static Void_t* memalign_starter(size_t aln, size_t sz, const Void_t *caller); -static void free_starter(Void_t* mem, const Void_t *caller); -# endif -static Void_t* malloc_atfork(size_t sz, const Void_t *caller); -static void free_atfork(Void_t* mem, const Void_t *caller); -#endif - -#else - -Void_t* _int_malloc(); -void _int_free(); -Void_t* _int_realloc(); -Void_t* _int_memalign(); -Void_t* _int_valloc(); -Void_t* _int_pvalloc(); -/*static Void_t* cALLOc();*/ -static Void_t** _int_icalloc(); -static Void_t** _int_icomalloc(); -static int mTRIm(); -static size_t mUSABLe(); -static int mALLOPt(); - -#endif - - - - -/* ------------- Optional versions of memcopy ---------------- */ - - -#if USE_MEMCPY - -/* - Note: memcpy is ONLY invoked with non-overlapping regions, - so the (usually slower) memmove is not needed. -*/ - -#define MALLOC_COPY(dest, src, nbytes) memcpy(dest, src, nbytes) -#define MALLOC_ZERO(dest, nbytes) memset(dest, 0, nbytes) - -#else /* !USE_MEMCPY */ - -/* Use Duff's device for good zeroing/copying performance. */ - -#define MALLOC_ZERO(charp, nbytes) \ -do { \ - INTERNAL_SIZE_T* mzp = (INTERNAL_SIZE_T*)(charp); \ - unsigned long mctmp = (nbytes)/sizeof(INTERNAL_SIZE_T); \ - long mcn; \ - if (mctmp < 8) mcn = 0; else { mcn = (mctmp-1)/8; mctmp %= 8; } \ - switch (mctmp) { \ - case 0: for(;;) { *mzp++ = 0; \ - case 7: *mzp++ = 0; \ - case 6: *mzp++ = 0; \ - case 5: *mzp++ = 0; \ - case 4: *mzp++ = 0; \ - case 3: *mzp++ = 0; \ - case 2: *mzp++ = 0; \ - case 1: *mzp++ = 0; if(mcn <= 0) break; mcn--; } \ - } \ -} while(0) - -#define MALLOC_COPY(dest,src,nbytes) \ -do { \ - INTERNAL_SIZE_T* mcsrc = (INTERNAL_SIZE_T*) src; \ - INTERNAL_SIZE_T* mcdst = (INTERNAL_SIZE_T*) dest; \ - unsigned long mctmp = (nbytes)/sizeof(INTERNAL_SIZE_T); \ - long mcn; \ - if (mctmp < 8) mcn = 0; else { mcn = (mctmp-1)/8; mctmp %= 8; } \ - switch (mctmp) { \ - case 0: for(;;) { *mcdst++ = *mcsrc++; \ - case 7: *mcdst++ = *mcsrc++; \ - case 6: *mcdst++ = *mcsrc++; \ - case 5: *mcdst++ = *mcsrc++; \ - case 4: *mcdst++ = *mcsrc++; \ - case 3: *mcdst++ = *mcsrc++; \ - case 2: *mcdst++ = *mcsrc++; \ - case 1: *mcdst++ = *mcsrc++; if(mcn <= 0) break; mcn--; } \ - } \ -} while(0) - -#endif - -/* ------------------ MMAP support ------------------ */ - - -#if HAVE_MMAP - -#include -#ifndef LACKS_SYS_MMAN_H -#include -#endif - -#if !defined(MAP_ANONYMOUS) && defined(MAP_ANON) -# define MAP_ANONYMOUS MAP_ANON -#endif -#if !defined(MAP_FAILED) -# define MAP_FAILED ((char*)-1) -#endif - -#ifndef MAP_NORESERVE -# ifdef MAP_AUTORESRV -# define MAP_NORESERVE MAP_AUTORESRV -# else -# define MAP_NORESERVE 0 -# endif -#endif - -/* - Nearly all versions of mmap support MAP_ANONYMOUS, - so the following is unlikely to be needed, but is - supplied just in case. -*/ - -#ifndef MAP_ANONYMOUS - -static int dev_zero_fd = -1; /* Cached file descriptor for /dev/zero. */ - -#define MMAP(addr, size, prot, flags) ((dev_zero_fd < 0) ? \ - (dev_zero_fd = open("/dev/zero", O_RDWR), \ - mmap((addr), (size), (prot), (flags), dev_zero_fd, 0)) : \ - mmap((addr), (size), (prot), (flags), dev_zero_fd, 0)) - -#else - -#define MMAP(addr, size, prot, flags) \ - (mmap((addr), (size), (prot), (flags)|MAP_ANONYMOUS, -1, 0)) - -#endif - - -#endif /* HAVE_MMAP */ - - -/* - ----------------------- Chunk representations ----------------------- -*/ - - -/* - This struct declaration is misleading (but accurate and necessary). - It declares a "view" into memory allowing access to necessary - fields at known offsets from a given base. See explanation below. -*/ - -struct malloc_chunk { - - INTERNAL_SIZE_T prev_size; /* Size of previous chunk (if free). */ - INTERNAL_SIZE_T size; /* Size in bytes, including overhead. */ - - struct malloc_chunk* fd; /* double links -- used only if free. */ - struct malloc_chunk* bk; -}; - - -/* - malloc_chunk details: - - (The following includes lightly edited explanations by Colin Plumb.) - - Chunks of memory are maintained using a `boundary tag' method as - described in e.g., Knuth or Standish. (See the paper by Paul - Wilson ftp://ftp.cs.utexas.edu/pub/garbage/allocsrv.ps for a - survey of such techniques.) Sizes of free chunks are stored both - in the front of each chunk and at the end. This makes - consolidating fragmented chunks into bigger chunks very fast. The - size fields also hold bits representing whether chunks are free or - in use. - - An allocated chunk looks like this: - - - chunk-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - | Size of previous chunk, if allocated | | - +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - | Size of chunk, in bytes |P| - mem-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - | User data starts here... . - . . - . (malloc_usable_space() bytes) . - . | -nextchunk-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - | Size of chunk | - +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - - - Where "chunk" is the front of the chunk for the purpose of most of - the malloc code, but "mem" is the pointer that is returned to the - user. "Nextchunk" is the beginning of the next contiguous chunk. - - Chunks always begin on even word boundries, so the mem portion - (which is returned to the user) is also on an even word boundary, and - thus at least double-word aligned. - - Free chunks are stored in circular doubly-linked lists, and look like this: - - chunk-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - | Size of previous chunk | - +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - `head:' | Size of chunk, in bytes |P| - mem-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - | Forward pointer to next chunk in list | - +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - | Back pointer to previous chunk in list | - +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - | Unused space (may be 0 bytes long) . - . . - . | -nextchunk-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - `foot:' | Size of chunk, in bytes | - +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - - The P (PREV_INUSE) bit, stored in the unused low-order bit of the - chunk size (which is always a multiple of two words), is an in-use - bit for the *previous* chunk. If that bit is *clear*, then the - word before the current chunk size contains the previous chunk - size, and can be used to find the front of the previous chunk. - The very first chunk allocated always has this bit set, - preventing access to non-existent (or non-owned) memory. If - prev_inuse is set for any given chunk, then you CANNOT determine - the size of the previous chunk, and might even get a memory - addressing fault when trying to do so. - - Note that the `foot' of the current chunk is actually represented - as the prev_size of the NEXT chunk. This makes it easier to - deal with alignments etc but can be very confusing when trying - to extend or adapt this code. - - The two exceptions to all this are - - 1. The special chunk `top' doesn't bother using the - trailing size field since there is no next contiguous chunk - that would have to index off it. After initialization, `top' - is forced to always exist. If it would become less than - MINSIZE bytes long, it is replenished. - - 2. Chunks allocated via mmap, which have the second-lowest-order - bit (IS_MMAPPED) set in their size fields. Because they are - allocated one-by-one, each must contain its own trailing size field. - -*/ - -/* - ---------- Size and alignment checks and conversions ---------- -*/ - -/* conversion from malloc headers to user pointers, and back */ - -#define chunk2mem(p) ((Void_t*)((char*)(p) + 2*SIZE_SZ)) -#define mem2chunk(mem) ((mchunkptr)((char*)(mem) - 2*SIZE_SZ)) - -/* The smallest possible chunk */ -#define MIN_CHUNK_SIZE (sizeof(struct malloc_chunk)) - -/* The smallest size we can malloc is an aligned minimal chunk */ - -#define MINSIZE \ - (unsigned long)(((MIN_CHUNK_SIZE+MALLOC_ALIGN_MASK) & ~MALLOC_ALIGN_MASK)) - -/* Check if m has acceptable alignment */ - -#define aligned_OK(m) (((unsigned long)((m)) & (MALLOC_ALIGN_MASK)) == 0) - - -/* - Check if a request is so large that it would wrap around zero when - padded and aligned. To simplify some other code, the bound is made - low enough so that adding MINSIZE will also not wrap around zero. -*/ - -#define REQUEST_OUT_OF_RANGE(req) \ - ((unsigned long)(req) >= \ - (unsigned long)(INTERNAL_SIZE_T)(-2 * MINSIZE)) - -/* pad request bytes into a usable size -- internal version */ - -#define request2size(req) \ - (((req) + SIZE_SZ + MALLOC_ALIGN_MASK < MINSIZE) ? \ - MINSIZE : \ - ((req) + SIZE_SZ + MALLOC_ALIGN_MASK) & ~MALLOC_ALIGN_MASK) - -/* Same, except also perform argument check */ - -#define checked_request2size(req, sz) \ - if (REQUEST_OUT_OF_RANGE(req)) { \ - MALLOC_FAILURE_ACTION; \ - return 0; \ - } \ - (sz) = request2size(req); - -/* - --------------- Physical chunk operations --------------- -*/ - - -/* size field is or'ed with PREV_INUSE when previous adjacent chunk in use */ -#define PREV_INUSE 0x1 - -/* extract inuse bit of previous chunk */ -#define prev_inuse(p) ((p)->size & PREV_INUSE) - - -/* size field is or'ed with IS_MMAPPED if the chunk was obtained with mmap() */ -#define IS_MMAPPED 0x2 - -/* check for mmap()'ed chunk */ -#define chunk_is_mmapped(p) ((p)->size & IS_MMAPPED) - - -/* size field is or'ed with NON_MAIN_ARENA if the chunk was obtained - from a non-main arena. This is only set immediately before handing - the chunk to the user, if necessary. */ -#define NON_MAIN_ARENA 0x4 - -/* check for chunk from non-main arena */ -#define chunk_non_main_arena(p) ((p)->size & NON_MAIN_ARENA) - - -/* - Bits to mask off when extracting size - - Note: IS_MMAPPED is intentionally not masked off from size field in - macros for which mmapped chunks should never be seen. This should - cause helpful core dumps to occur if it is tried by accident by - people extending or adapting this malloc. -*/ -#define SIZE_BITS (PREV_INUSE|IS_MMAPPED|NON_MAIN_ARENA) - -/* Get size, ignoring use bits */ -#define chunksize(p) ((p)->size & ~(SIZE_BITS)) - - -/* Ptr to next physical malloc_chunk. */ -#define next_chunk(p) ((mchunkptr)( ((char*)(p)) + ((p)->size & ~SIZE_BITS) )) - -/* Ptr to previous physical malloc_chunk */ -#define prev_chunk(p) ((mchunkptr)( ((char*)(p)) - ((p)->prev_size) )) - -/* Treat space at ptr + offset as a chunk */ -#define chunk_at_offset(p, s) ((mchunkptr)(((char*)(p)) + (s))) - -/* extract p's inuse bit */ -#define inuse(p)\ -((((mchunkptr)(((char*)(p))+((p)->size & ~SIZE_BITS)))->size) & PREV_INUSE) - -/* set/clear chunk as being inuse without otherwise disturbing */ -#define set_inuse(p)\ -((mchunkptr)(((char*)(p)) + ((p)->size & ~SIZE_BITS)))->size |= PREV_INUSE - -#define clear_inuse(p)\ -((mchunkptr)(((char*)(p)) + ((p)->size & ~SIZE_BITS)))->size &= ~(PREV_INUSE) - - -/* check/set/clear inuse bits in known places */ -#define inuse_bit_at_offset(p, s)\ - (((mchunkptr)(((char*)(p)) + (s)))->size & PREV_INUSE) - -#define set_inuse_bit_at_offset(p, s)\ - (((mchunkptr)(((char*)(p)) + (s)))->size |= PREV_INUSE) - -#define clear_inuse_bit_at_offset(p, s)\ - (((mchunkptr)(((char*)(p)) + (s)))->size &= ~(PREV_INUSE)) - - -/* Set size at head, without disturbing its use bit */ -#define set_head_size(p, s) ((p)->size = (((p)->size & SIZE_BITS) | (s))) - -/* Set size/use field */ -#define set_head(p, s) ((p)->size = (s)) - -/* Set size at footer (only when chunk is not in use) */ -#define set_foot(p, s) (((mchunkptr)((char*)(p) + (s)))->prev_size = (s)) - - -/* - -------------------- Internal data structures -------------------- - - All internal state is held in an instance of malloc_state defined - below. There are no other static variables, except in two optional - cases: - * If USE_MALLOC_LOCK is defined, the mALLOC_MUTEx declared above. - * If HAVE_MMAP is true, but mmap doesn't support - MAP_ANONYMOUS, a dummy file descriptor for mmap. - - Beware of lots of tricks that minimize the total bookkeeping space - requirements. The result is a little over 1K bytes (for 4byte - pointers and size_t.) -*/ - -/* - Bins - - An array of bin headers for free chunks. Each bin is doubly - linked. The bins are approximately proportionally (log) spaced. - There are a lot of these bins (128). This may look excessive, but - works very well in practice. Most bins hold sizes that are - unusual as malloc request sizes, but are more usual for fragments - and consolidated sets of chunks, which is what these bins hold, so - they can be found quickly. All procedures maintain the invariant - that no consolidated chunk physically borders another one, so each - chunk in a list is known to be preceeded and followed by either - inuse chunks or the ends of memory. - - Chunks in bins are kept in size order, with ties going to the - approximately least recently used chunk. Ordering isn't needed - for the small bins, which all contain the same-sized chunks, but - facilitates best-fit allocation for larger chunks. These lists - are just sequential. Keeping them in order almost never requires - enough traversal to warrant using fancier ordered data - structures. - - Chunks of the same size are linked with the most - recently freed at the front, and allocations are taken from the - back. This results in LRU (FIFO) allocation order, which tends - to give each chunk an equal opportunity to be consolidated with - adjacent freed chunks, resulting in larger free chunks and less - fragmentation. - - To simplify use in double-linked lists, each bin header acts - as a malloc_chunk. This avoids special-casing for headers. - But to conserve space and improve locality, we allocate - only the fd/bk pointers of bins, and then use repositioning tricks - to treat these as the fields of a malloc_chunk*. -*/ - -typedef struct malloc_chunk* mbinptr; - -/* addressing -- note that bin_at(0) does not exist */ -#define bin_at(m, i) ((mbinptr)((char*)&((m)->bins[(i)<<1]) - (SIZE_SZ<<1))) - -/* analog of ++bin */ -#define next_bin(b) ((mbinptr)((char*)(b) + (sizeof(mchunkptr)<<1))) - -/* Reminders about list directionality within bins */ -#define first(b) ((b)->fd) -#define last(b) ((b)->bk) - -/* Take a chunk off a bin list */ -#define unlink(P, BK, FD) { \ - FD = P->fd; \ - BK = P->bk; \ - FD->bk = BK; \ - BK->fd = FD; \ -} - -/* - Indexing - - Bins for sizes < 512 bytes contain chunks of all the same size, spaced - 8 bytes apart. Larger bins are approximately logarithmically spaced: - - 64 bins of size 8 - 32 bins of size 64 - 16 bins of size 512 - 8 bins of size 4096 - 4 bins of size 32768 - 2 bins of size 262144 - 1 bin of size what's left - - There is actually a little bit of slop in the numbers in bin_index - for the sake of speed. This makes no difference elsewhere. - - The bins top out around 1MB because we expect to service large - requests via mmap. -*/ - -#define NBINS 128 -#define NSMALLBINS 64 -#define SMALLBIN_WIDTH 8 -#define MIN_LARGE_SIZE 512 - -#define in_smallbin_range(sz) \ - ((unsigned long)(sz) < (unsigned long)MIN_LARGE_SIZE) - -#define smallbin_index(sz) (((unsigned)(sz)) >> 3) - -#define largebin_index(sz) \ -(((((unsigned long)(sz)) >> 6) <= 32)? 56 + (((unsigned long)(sz)) >> 6): \ - ((((unsigned long)(sz)) >> 9) <= 20)? 91 + (((unsigned long)(sz)) >> 9): \ - ((((unsigned long)(sz)) >> 12) <= 10)? 110 + (((unsigned long)(sz)) >> 12): \ - ((((unsigned long)(sz)) >> 15) <= 4)? 119 + (((unsigned long)(sz)) >> 15): \ - ((((unsigned long)(sz)) >> 18) <= 2)? 124 + (((unsigned long)(sz)) >> 18): \ - 126) - -#define bin_index(sz) \ - ((in_smallbin_range(sz)) ? smallbin_index(sz) : largebin_index(sz)) - -/* - FIRST_SORTED_BIN_SIZE is the chunk size corresponding to the - first bin that is maintained in sorted order. This must - be the smallest size corresponding to a given bin. - - Normally, this should be MIN_LARGE_SIZE. But you can weaken - best fit guarantees to sometimes speed up malloc by increasing value. - Doing this means that malloc may choose a chunk that is - non-best-fitting by up to the width of the bin. - - Some useful cutoff values: - 512 - all bins sorted - 2560 - leaves bins <= 64 bytes wide unsorted - 12288 - leaves bins <= 512 bytes wide unsorted - 65536 - leaves bins <= 4096 bytes wide unsorted - 262144 - leaves bins <= 32768 bytes wide unsorted - -1 - no bins sorted (not recommended!) -*/ - -#define FIRST_SORTED_BIN_SIZE MIN_LARGE_SIZE -/* #define FIRST_SORTED_BIN_SIZE 65536 */ - -/* - Unsorted chunks - - All remainders from chunk splits, as well as all returned chunks, - are first placed in the "unsorted" bin. They are then placed - in regular bins after malloc gives them ONE chance to be used before - binning. So, basically, the unsorted_chunks list acts as a queue, - with chunks being placed on it in free (and malloc_consolidate), - and taken off (to be either used or placed in bins) in malloc. - - The NON_MAIN_ARENA flag is never set for unsorted chunks, so it - does not have to be taken into account in size comparisons. -*/ - -/* The otherwise unindexable 1-bin is used to hold unsorted chunks. */ -#define unsorted_chunks(M) (bin_at(M, 1)) - -/* - Top - - The top-most available chunk (i.e., the one bordering the end of - available memory) is treated specially. It is never included in - any bin, is used only if no other chunk is available, and is - released back to the system if it is very large (see - M_TRIM_THRESHOLD). Because top initially - points to its own bin with initial zero size, thus forcing - extension on the first malloc request, we avoid having any special - code in malloc to check whether it even exists yet. But we still - need to do so when getting memory from system, so we make - initial_top treat the bin as a legal but unusable chunk during the - interval between initialization and the first call to - sYSMALLOc. (This is somewhat delicate, since it relies on - the 2 preceding words to be zero during this interval as well.) -*/ - -/* Conveniently, the unsorted bin can be used as dummy top on first call */ -#define initial_top(M) (unsorted_chunks(M)) - -/* - Binmap - - To help compensate for the large number of bins, a one-level index - structure is used for bin-by-bin searching. `binmap' is a - bitvector recording whether bins are definitely empty so they can - be skipped over during during traversals. The bits are NOT always - cleared as soon as bins are empty, but instead only - when they are noticed to be empty during traversal in malloc. -*/ - -/* Conservatively use 32 bits per map word, even if on 64bit system */ -#define BINMAPSHIFT 5 -#define BITSPERMAP (1U << BINMAPSHIFT) -#define BINMAPSIZE (NBINS / BITSPERMAP) - -#define idx2block(i) ((i) >> BINMAPSHIFT) -#define idx2bit(i) ((1U << ((i) & ((1U << BINMAPSHIFT)-1)))) - -#define mark_bin(m,i) ((m)->binmap[idx2block(i)] |= idx2bit(i)) -#define unmark_bin(m,i) ((m)->binmap[idx2block(i)] &= ~(idx2bit(i))) -#define get_binmap(m,i) ((m)->binmap[idx2block(i)] & idx2bit(i)) - -/* - Fastbins - - An array of lists holding recently freed small chunks. Fastbins - are not doubly linked. It is faster to single-link them, and - since chunks are never removed from the middles of these lists, - double linking is not necessary. Also, unlike regular bins, they - are not even processed in FIFO order (they use faster LIFO) since - ordering doesn't much matter in the transient contexts in which - fastbins are normally used. - - Chunks in fastbins keep their inuse bit set, so they cannot - be consolidated with other free chunks. malloc_consolidate - releases all chunks in fastbins and consolidates them with - other free chunks. -*/ - -typedef struct malloc_chunk* mfastbinptr; - -/* offset 2 to use otherwise unindexable first 2 bins */ -#define fastbin_index(sz) ((((unsigned int)(sz)) >> 3) - 2) - -/* The maximum fastbin request size we support */ -#define MAX_FAST_SIZE 80 - -#define NFASTBINS (fastbin_index(request2size(MAX_FAST_SIZE))+1) - -/* - FASTBIN_CONSOLIDATION_THRESHOLD is the size of a chunk in free() - that triggers automatic consolidation of possibly-surrounding - fastbin chunks. This is a heuristic, so the exact value should not - matter too much. It is defined at half the default trim threshold as a - compromise heuristic to only attempt consolidation if it is likely - to lead to trimming. However, it is not dynamically tunable, since - consolidation reduces fragmentation surrounding large chunks even - if trimming is not used. -*/ - -#define FASTBIN_CONSOLIDATION_THRESHOLD (65536UL) - -/* - Since the lowest 2 bits in max_fast don't matter in size comparisons, - they are used as flags. -*/ - -/* - FASTCHUNKS_BIT held in max_fast indicates that there are probably - some fastbin chunks. It is set true on entering a chunk into any - fastbin, and cleared only in malloc_consolidate. - - The truth value is inverted so that have_fastchunks will be true - upon startup (since statics are zero-filled), simplifying - initialization checks. -*/ - -#define FASTCHUNKS_BIT (1U) - -#define have_fastchunks(M) (((M)->max_fast & FASTCHUNKS_BIT) == 0) -#define clear_fastchunks(M) ((M)->max_fast |= FASTCHUNKS_BIT) -#define set_fastchunks(M) ((M)->max_fast &= ~FASTCHUNKS_BIT) - -/* - NONCONTIGUOUS_BIT indicates that MORECORE does not return contiguous - regions. Otherwise, contiguity is exploited in merging together, - when possible, results from consecutive MORECORE calls. - - The initial value comes from MORECORE_CONTIGUOUS, but is - changed dynamically if mmap is ever used as an sbrk substitute. -*/ - -#define NONCONTIGUOUS_BIT (2U) - -#define contiguous(M) (((M)->max_fast & NONCONTIGUOUS_BIT) == 0) -#define noncontiguous(M) (((M)->max_fast & NONCONTIGUOUS_BIT) != 0) -#define set_noncontiguous(M) ((M)->max_fast |= NONCONTIGUOUS_BIT) -#define set_contiguous(M) ((M)->max_fast &= ~NONCONTIGUOUS_BIT) - -/* - Set value of max_fast. - Use impossibly small value if 0. - Precondition: there are no existing fastbin chunks. - Setting the value clears fastchunk bit but preserves noncontiguous bit. -*/ - -#define set_max_fast(M, s) \ - (M)->max_fast = (((s) == 0)? SMALLBIN_WIDTH: request2size(s)) | \ - FASTCHUNKS_BIT | \ - ((M)->max_fast & NONCONTIGUOUS_BIT) - - -/* - ----------- Internal state representation and initialization ----------- -*/ - -struct malloc_state { - /* Serialize access. */ - mutex_t mutex; - - /* Statistics for locking. Only used if THREAD_STATS is defined. */ - long stat_lock_direct, stat_lock_loop, stat_lock_wait; - long pad0_[1]; /* try to give the mutex its own cacheline */ - - /* The maximum chunk size to be eligible for fastbin */ - INTERNAL_SIZE_T max_fast; /* low 2 bits used as flags */ - - /* Fastbins */ - mfastbinptr fastbins[NFASTBINS]; - - /* Base of the topmost chunk -- not otherwise kept in a bin */ - mchunkptr top; - - /* The remainder from the most recent split of a small request */ - mchunkptr last_remainder; - - /* Normal bins packed as described above */ - mchunkptr bins[NBINS * 2]; - - /* Bitmap of bins */ - unsigned int binmap[BINMAPSIZE]; - - /* Linked list */ - struct malloc_state *next; - - /* Memory allocated from the system in this arena. */ - INTERNAL_SIZE_T system_mem; - INTERNAL_SIZE_T max_system_mem; -}; - -struct malloc_par { - /* Tunable parameters */ - unsigned long trim_threshold; - INTERNAL_SIZE_T top_pad; - INTERNAL_SIZE_T mmap_threshold; - - /* Memory map support */ - int n_mmaps; - int n_mmaps_max; - int max_n_mmaps; - - /* Cache malloc_getpagesize */ - unsigned int pagesize; - - /* Statistics */ - INTERNAL_SIZE_T mmapped_mem; - /*INTERNAL_SIZE_T sbrked_mem;*/ - /*INTERNAL_SIZE_T max_sbrked_mem;*/ - INTERNAL_SIZE_T max_mmapped_mem; - INTERNAL_SIZE_T max_total_mem; /* only kept for NO_THREADS */ - - /* First address handed out by MORECORE/sbrk. */ - char* sbrk_base; -}; - -/* There are several instances of this struct ("arenas") in this - malloc. If you are adapting this malloc in a way that does NOT use - a static or mmapped malloc_state, you MUST explicitly zero-fill it - before using. This malloc relies on the property that malloc_state - is initialized to all zeroes (as is true of C statics). */ - -static struct malloc_state main_arena; - -/* There is only one instance of the malloc parameters. */ - -static struct malloc_par mp_; - -/* - Initialize a malloc_state struct. - - This is called only from within malloc_consolidate, which needs - be called in the same contexts anyway. It is never called directly - outside of malloc_consolidate because some optimizing compilers try - to inline it at all call points, which turns out not to be an - optimization at all. (Inlining it in malloc_consolidate is fine though.) -*/ - -#if __STD_C -static void malloc_init_state(mstate av) -#else -static void malloc_init_state(av) mstate av; -#endif -{ - int i; - mbinptr bin; - - /* Establish circular links for normal bins */ - for (i = 1; i < NBINS; ++i) { - bin = bin_at(av,i); - bin->fd = bin->bk = bin; - } - -#if MORECORE_CONTIGUOUS - if (av != &main_arena) -#endif - set_noncontiguous(av); - - set_max_fast(av, DEFAULT_MXFAST); - - av->top = initial_top(av); -} - -/* - Other internal utilities operating on mstates -*/ - -#if __STD_C -static Void_t* sYSMALLOc(INTERNAL_SIZE_T, mstate); -#ifndef MORECORE_CANNOT_TRIM -static int sYSTRIm(size_t, mstate); -#endif -static void malloc_consolidate(mstate); -static Void_t** iALLOc(mstate, size_t, size_t*, int, Void_t**); -#else -static Void_t* sYSMALLOc(); -#ifndef MORECORE_CANNOT_TRIM -static int sYSTRIm(); -#endif -static void malloc_consolidate(); -static Void_t** iALLOc(); -#endif - - -/* -------------- Early definitions for debugging hooks ---------------- */ - -/* Define and initialize the hook variables. These weak definitions must - appear before any use of the variables in a function (arena.c uses one). */ -#ifndef weak_variable -#ifndef _LIBC -#define weak_variable /**/ -#else -/* In GNU libc we want the hook variables to be weak definitions to - avoid a problem with Emacs. */ -#define weak_variable weak_function -#endif -#endif - -/* OMPI change: these aren't used */ -#if 0 -#if !(USE_STARTER & 2) -# define free_hook_ini NULL -/* Forward declarations. */ -static Void_t* malloc_hook_ini __MALLOC_P ((size_t sz, - const __malloc_ptr_t caller)); -static Void_t* realloc_hook_ini __MALLOC_P ((Void_t* ptr, size_t sz, - const __malloc_ptr_t caller)); -static Void_t* memalign_hook_ini __MALLOC_P ((size_t alignment, size_t sz, - const __malloc_ptr_t caller)); -#else -# define free_hook_ini free_starter -# define malloc_hook_ini malloc_starter -# define realloc_hook_ini NULL -# define memalign_hook_ini memalign_starter -#endif -#endif - -/* OMPI change: we don't want any of these -- we want to use the - underlying allocator's symbols */ -#if 0 -void weak_variable (*__malloc_initialize_hook) __MALLOC_PMT ((void)) = NULL; -void weak_variable (*__free_hook) __MALLOC_PMT ((__malloc_ptr_t __ptr, - const __malloc_ptr_t)) - = free_hook_ini; -__malloc_ptr_t weak_variable (*__malloc_hook) - __MALLOC_PMT ((size_t __size, const __malloc_ptr_t)) = malloc_hook_ini; -__malloc_ptr_t weak_variable (*__realloc_hook) - __MALLOC_PMT ((__malloc_ptr_t __ptr, size_t __size, const __malloc_ptr_t)) - = realloc_hook_ini; -__malloc_ptr_t weak_variable (*__memalign_hook) - __MALLOC_PMT ((size_t __alignment, size_t __size, const __malloc_ptr_t)) - = memalign_hook_ini; -void weak_variable (*__after_morecore_hook) __MALLOC_P ((void)) = NULL; -#endif - - -/* ------------------- Support for multiple arenas -------------------- */ -#include "arena.c" - -/* - Debugging support - - These routines make a number of assertions about the states - of data structures that should be true at all times. If any - are not true, it's very likely that a user program has somehow - trashed memory. (It's also possible that there is a coding error - in malloc. In which case, please report it!) -*/ - -#if ! MALLOC_DEBUG - -#define check_chunk(A,P) -#define check_free_chunk(A,P) -#define check_inuse_chunk(A,P) -#define check_remalloced_chunk(A,P,N) -#define check_malloced_chunk(A,P,N) -#define check_malloc_state(A) - -#else - -#define check_chunk(A,P) do_check_chunk(A,P) -#define check_free_chunk(A,P) do_check_free_chunk(A,P) -#define check_inuse_chunk(A,P) do_check_inuse_chunk(A,P) -#define check_remalloced_chunk(A,P,N) do_check_remalloced_chunk(A,P,N) -#define check_malloced_chunk(A,P,N) do_check_malloced_chunk(A,P,N) -#define check_malloc_state(A) do_check_malloc_state(A) - -/* - Properties of all chunks -*/ - -#if __STD_C -static void do_check_chunk(mstate av, mchunkptr p) -#else -static void do_check_chunk(av, p) mstate av; mchunkptr p; -#endif -{ - unsigned long sz = chunksize(p); - /* min and max possible addresses assuming contiguous allocation */ - char* max_address = (char*)(av->top) + chunksize(av->top); - char* min_address = max_address - av->system_mem; - - if (!chunk_is_mmapped(p)) { - - /* Has legal address ... */ - if (p != av->top) { - if (contiguous(av)) { - assert(((char*)p) >= min_address); - assert(((char*)p + sz) <= ((char*)(av->top))); - } - } - else { - /* top size is always at least MINSIZE */ - assert((unsigned long)(sz) >= MINSIZE); - /* top predecessor always marked inuse */ - assert(prev_inuse(p)); - } - - } - else { -#if HAVE_MMAP - /* address is outside main heap */ - if (contiguous(av) && av->top != initial_top(av)) { - assert(((char*)p) < min_address || ((char*)p) > max_address); - } - /* chunk is page-aligned */ - assert(((p->prev_size + sz) & (mp_.pagesize-1)) == 0); - /* mem is aligned */ - assert(aligned_OK(chunk2mem(p))); -#else - /* force an appropriate assert violation if debug set */ - assert(!chunk_is_mmapped(p)); -#endif - } -} - -/* - Properties of free chunks -*/ - -#if __STD_C -static void do_check_free_chunk(mstate av, mchunkptr p) -#else -static void do_check_free_chunk(av, p) mstate av; mchunkptr p; -#endif -{ - INTERNAL_SIZE_T sz = p->size & ~(PREV_INUSE|NON_MAIN_ARENA); - mchunkptr next = chunk_at_offset(p, sz); - - do_check_chunk(av, p); - - /* Chunk must claim to be free ... */ - assert(!inuse(p)); - assert (!chunk_is_mmapped(p)); - - /* Unless a special marker, must have OK fields */ - if ((unsigned long)(sz) >= MINSIZE) - { - assert((sz & MALLOC_ALIGN_MASK) == 0); - assert(aligned_OK(chunk2mem(p))); - /* ... matching footer field */ - assert(next->prev_size == sz); - /* ... and is fully consolidated */ - assert(prev_inuse(p)); - assert (next == av->top || inuse(next)); - - /* ... and has minimally sane links */ - assert(p->fd->bk == p); - assert(p->bk->fd == p); - } - else /* markers are always of size SIZE_SZ */ - assert(sz == SIZE_SZ); -} - -/* - Properties of inuse chunks -*/ - -#if __STD_C -static void do_check_inuse_chunk(mstate av, mchunkptr p) -#else -static void do_check_inuse_chunk(av, p) mstate av; mchunkptr p; -#endif -{ - mchunkptr next; - - do_check_chunk(av, p); - - if (chunk_is_mmapped(p)) - return; /* mmapped chunks have no next/prev */ - - /* Check whether it claims to be in use ... */ - assert(inuse(p)); - - next = next_chunk(p); - - /* ... and is surrounded by OK chunks. - Since more things can be checked with free chunks than inuse ones, - if an inuse chunk borders them and debug is on, it's worth doing them. - */ - if (!prev_inuse(p)) { - /* Note that we cannot even look at prev unless it is not inuse */ - mchunkptr prv = prev_chunk(p); - assert(next_chunk(prv) == p); - do_check_free_chunk(av, prv); - } - - if (next == av->top) { - assert(prev_inuse(next)); - assert(chunksize(next) >= MINSIZE); - } - else if (!inuse(next)) - do_check_free_chunk(av, next); -} - -/* - Properties of chunks recycled from fastbins -*/ - -#if __STD_C -static void do_check_remalloced_chunk(mstate av, mchunkptr p, INTERNAL_SIZE_T s) -#else -static void do_check_remalloced_chunk(av, p, s) -mstate av; mchunkptr p; INTERNAL_SIZE_T s; -#endif -{ - INTERNAL_SIZE_T sz = p->size & ~(PREV_INUSE|NON_MAIN_ARENA); - - if (!chunk_is_mmapped(p)) { - assert(av == arena_for_chunk(p)); - if (chunk_non_main_arena(p)) - assert(av != &main_arena); - else - assert(av == &main_arena); - } - - do_check_inuse_chunk(av, p); - - /* Legal size ... */ - assert((sz & MALLOC_ALIGN_MASK) == 0); - assert((unsigned long)(sz) >= MINSIZE); - /* ... and alignment */ - assert(aligned_OK(chunk2mem(p))); - /* chunk is less than MINSIZE more than request */ - assert((long)(sz) - (long)(s) >= 0); - assert((long)(sz) - (long)(s + MINSIZE) < 0); -} - -/* - Properties of nonrecycled chunks at the point they are malloced -*/ - -#if __STD_C -static void do_check_malloced_chunk(mstate av, mchunkptr p, INTERNAL_SIZE_T s) -#else -static void do_check_malloced_chunk(av, p, s) -mstate av; mchunkptr p; INTERNAL_SIZE_T s; -#endif -{ - /* same as recycled case ... */ - do_check_remalloced_chunk(av, p, s); - - /* - ... plus, must obey implementation invariant that prev_inuse is - always true of any allocated chunk; i.e., that each allocated - chunk borders either a previously allocated and still in-use - chunk, or the base of its memory arena. This is ensured - by making all allocations from the the `lowest' part of any found - chunk. This does not necessarily hold however for chunks - recycled via fastbins. - */ - - assert(prev_inuse(p)); -} - - -/* - Properties of malloc_state. - - This may be useful for debugging malloc, as well as detecting user - programmer errors that somehow write into malloc_state. - - If you are extending or experimenting with this malloc, you can - probably figure out how to hack this routine to print out or - display chunk addresses, sizes, bins, and other instrumentation. -*/ - -static void do_check_malloc_state(mstate av) -{ - int i; - mchunkptr p; - mchunkptr q; - mbinptr b; - unsigned int binbit; - int empty; - unsigned int idx; - INTERNAL_SIZE_T size; - unsigned long total = 0; - int max_fast_bin; - - /* internal size_t must be no wider than pointer type */ - assert(sizeof(INTERNAL_SIZE_T) <= sizeof(char*)); - - /* alignment is a power of 2 */ - assert((MALLOC_ALIGNMENT & (MALLOC_ALIGNMENT-1)) == 0); - - /* cannot run remaining checks until fully initialized */ - if (av->top == 0 || av->top == initial_top(av)) - return; - - /* pagesize is a power of 2 */ - assert((mp_.pagesize & (mp_.pagesize-1)) == 0); - - /* A contiguous main_arena is consistent with sbrk_base. */ - if (av == &main_arena && contiguous(av)) - assert((char*)mp_.sbrk_base + av->system_mem == - (char*)av->top + chunksize(av->top)); - - /* properties of fastbins */ - - /* max_fast is in allowed range */ - assert((av->max_fast & ~1) <= request2size(MAX_FAST_SIZE)); - - max_fast_bin = fastbin_index(av->max_fast); - - for (i = 0; i < NFASTBINS; ++i) { - p = av->fastbins[i]; - - /* all bins past max_fast are empty */ - if (i > max_fast_bin) - assert(p == 0); - - while (p != 0) { - /* each chunk claims to be inuse */ - do_check_inuse_chunk(av, p); - total += chunksize(p); - /* chunk belongs in this bin */ - assert(fastbin_index(chunksize(p)) == i); - p = p->fd; - } - } - - if (total != 0) - assert(have_fastchunks(av)); - else if (!have_fastchunks(av)) - assert(total == 0); - - /* check normal bins */ - for (i = 1; i < NBINS; ++i) { - b = bin_at(av,i); - - /* binmap is accurate (except for bin 1 == unsorted_chunks) */ - if (i >= 2) { - binbit = get_binmap(av,i); - empty = last(b) == b; - if (!binbit) - assert(empty); - else if (!empty) - assert(binbit); - } - - for (p = last(b); p != b; p = p->bk) { - /* each chunk claims to be free */ - do_check_free_chunk(av, p); - size = chunksize(p); - total += size; - if (i >= 2) { - /* chunk belongs in bin */ - idx = bin_index(size); - assert(idx == i); - /* lists are sorted */ - if ((unsigned long) size >= (unsigned long)(FIRST_SORTED_BIN_SIZE)) { - assert(p->bk == b || - (unsigned long)chunksize(p->bk) >= - (unsigned long)chunksize(p)); - } - } - /* chunk is followed by a legal chain of inuse chunks */ - for (q = next_chunk(p); - (q != av->top && inuse(q) && - (unsigned long)(chunksize(q)) >= MINSIZE); - q = next_chunk(q)) - do_check_inuse_chunk(av, q); - } - } - - /* top chunk is OK */ - check_chunk(av, av->top); - - /* sanity checks for statistics */ - -#ifdef NO_THREADS - assert(total <= (unsigned long)(mp_.max_total_mem)); - assert(mp_.n_mmaps >= 0); -#endif - assert(mp_.n_mmaps <= mp_.n_mmaps_max); - assert(mp_.n_mmaps <= mp_.max_n_mmaps); - - assert((unsigned long)(av->system_mem) <= - (unsigned long)(av->max_system_mem)); - - assert((unsigned long)(mp_.mmapped_mem) <= - (unsigned long)(mp_.max_mmapped_mem)); - -#ifdef NO_THREADS - assert((unsigned long)(mp_.max_total_mem) >= - (unsigned long)(mp_.mmapped_mem) + (unsigned long)(av->system_mem)); -#endif -} -#endif - - -/* ----------------- Support for debugging hooks -------------------- */ -#include "hooks.c" - -/* ----------- Routines dealing with system allocation -------------- */ - -/* - sysmalloc handles malloc cases requiring more memory from the system. - On entry, it is assumed that av->top does not have enough - space to service request for nb bytes, thus requiring that av->top - be extended or replaced. -*/ - -#if __STD_C -static Void_t* sYSMALLOc(INTERNAL_SIZE_T nb, mstate av) -#else -static Void_t* sYSMALLOc(nb, av) INTERNAL_SIZE_T nb; mstate av; -#endif -{ - mchunkptr old_top; /* incoming value of av->top */ - INTERNAL_SIZE_T old_size; /* its size */ - char* old_end; /* its end address */ - - long size; /* arg to first MORECORE or mmap call */ - char* brk; /* return value from MORECORE */ - - long correction; /* arg to 2nd MORECORE call */ - char* snd_brk; /* 2nd return val */ - - INTERNAL_SIZE_T front_misalign; /* unusable bytes at front of new space */ - INTERNAL_SIZE_T end_misalign; /* partial page left at end of new space */ - char* aligned_brk; /* aligned offset into brk */ - - mchunkptr p; /* the allocated/returned chunk */ - mchunkptr remainder; /* remainder from allocation */ - unsigned long remainder_size; /* its size */ - - unsigned long sum; /* for updating stats */ - - size_t pagemask = mp_.pagesize - 1; - - -#if HAVE_MMAP - - /* - If have mmap, and the request size meets the mmap threshold, and - the system supports mmap, and there are few enough currently - allocated mmapped regions, try to directly map this request - rather than expanding top. - */ - - if ((unsigned long)(nb) >= (unsigned long)(mp_.mmap_threshold) && - (mp_.n_mmaps < mp_.n_mmaps_max)) { - - char* mm; /* return value from mmap call*/ - - /* - Round up size to nearest page. For mmapped chunks, the overhead - is one SIZE_SZ unit larger than for normal chunks, because there - is no following chunk whose prev_size field could be used. - */ - size = (nb + SIZE_SZ + MALLOC_ALIGN_MASK + pagemask) & ~pagemask; - - /* Don't try if size wraps around 0 */ - if ((unsigned long)(size) > (unsigned long)(nb)) { - - mm = (char*)(MMAP(0, size, PROT_READ|PROT_WRITE, MAP_PRIVATE)); - - if (mm != MAP_FAILED) { - - /* - The offset to the start of the mmapped region is stored - in the prev_size field of the chunk. This allows us to adjust - returned start address to meet alignment requirements here - and in memalign(), and still be able to compute proper - address argument for later munmap in free() and realloc(). - */ - - front_misalign = (INTERNAL_SIZE_T)chunk2mem(mm) & MALLOC_ALIGN_MASK; - if (front_misalign > 0) { - correction = MALLOC_ALIGNMENT - front_misalign; - p = (mchunkptr)(mm + correction); - p->prev_size = correction; - set_head(p, (size - correction) |IS_MMAPPED); - } - else { - p = (mchunkptr)mm; - set_head(p, size|IS_MMAPPED); - } - - /* update statistics */ - - if (++mp_.n_mmaps > mp_.max_n_mmaps) - mp_.max_n_mmaps = mp_.n_mmaps; - - sum = mp_.mmapped_mem += size; - if (sum > (unsigned long)(mp_.max_mmapped_mem)) - mp_.max_mmapped_mem = sum; -#ifdef NO_THREADS - sum += av->system_mem; - if (sum > (unsigned long)(mp_.max_total_mem)) - mp_.max_total_mem = sum; -#endif - - check_chunk(av, p); - - return chunk2mem(p); - } - } - } -#endif - - /* Record incoming configuration of top */ - - old_top = av->top; - old_size = chunksize(old_top); - old_end = (char*)(chunk_at_offset(old_top, old_size)); - - brk = snd_brk = (char*)(MORECORE_FAILURE); - - /* - If not the first time through, we require old_size to be - at least MINSIZE and to have prev_inuse set. - */ - - assert((old_top == initial_top(av) && old_size == 0) || - ((unsigned long) (old_size) >= MINSIZE && - prev_inuse(old_top) && - ((unsigned long)old_end & pagemask) == 0)); - - /* Precondition: not enough current space to satisfy nb request */ - assert((unsigned long)(old_size) < (unsigned long)(nb + MINSIZE)); - - /* Precondition: all fastbins are consolidated */ - assert(!have_fastchunks(av)); - - - if (av != &main_arena) { - - heap_info *old_heap, *heap; - size_t old_heap_size; - - /* First try to extend the current heap. */ - old_heap = heap_for_ptr(old_top); - old_heap_size = old_heap->size; - if (grow_heap(old_heap, MINSIZE + nb - old_size) == 0) { - av->system_mem += old_heap->size - old_heap_size; - arena_mem += old_heap->size - old_heap_size; -#if 0 - if(mmapped_mem + arena_mem + sbrked_mem > max_total_mem) - max_total_mem = mmapped_mem + arena_mem + sbrked_mem; -#endif - set_head(old_top, (((char *)old_heap + old_heap->size) - (char *)old_top) - | PREV_INUSE); - } - else if ((heap = new_heap(nb + (MINSIZE + sizeof(*heap)), mp_.top_pad))) { - /* Use a newly allocated heap. */ - heap->ar_ptr = av; - heap->prev = old_heap; - av->system_mem += heap->size; - arena_mem += heap->size; -#if 0 - if((unsigned long)(mmapped_mem + arena_mem + sbrked_mem) > max_total_mem) - max_total_mem = mmapped_mem + arena_mem + sbrked_mem; -#endif - /* Set up the new top. */ - top(av) = chunk_at_offset(heap, sizeof(*heap)); - set_head(top(av), (heap->size - sizeof(*heap)) | PREV_INUSE); - - /* Setup fencepost and free the old top chunk. */ - /* The fencepost takes at least MINSIZE bytes, because it might - become the top chunk again later. Note that a footer is set - up, too, although the chunk is marked in use. */ - old_size -= MINSIZE; - set_head(chunk_at_offset(old_top, old_size + 2*SIZE_SZ), 0|PREV_INUSE); - if (old_size >= MINSIZE) { - set_head(chunk_at_offset(old_top, old_size), (2*SIZE_SZ)|PREV_INUSE); - set_foot(chunk_at_offset(old_top, old_size), (2*SIZE_SZ)); - set_head(old_top, old_size|PREV_INUSE|NON_MAIN_ARENA); - _int_free(av, chunk2mem(old_top)); - } else { - set_head(old_top, (old_size + 2*SIZE_SZ)|PREV_INUSE); - set_foot(old_top, (old_size + 2*SIZE_SZ)); - } - } - - } else { /* av == main_arena */ - - - /* Request enough space for nb + pad + overhead */ - - size = nb + mp_.top_pad + MINSIZE; - - /* - If contiguous, we can subtract out existing space that we hope to - combine with new space. We add it back later only if - we don't actually get contiguous space. - */ - - if (contiguous(av)) - size -= old_size; - - /* - Round to a multiple of page size. - If MORECORE is not contiguous, this ensures that we only call it - with whole-page arguments. And if MORECORE is contiguous and - this is not first time through, this preserves page-alignment of - previous calls. Otherwise, we correct to page-align below. - */ - - size = (size + pagemask) & ~pagemask; - - /* - Don't try to call MORECORE if argument is so big as to appear - negative. Note that since mmap takes size_t arg, it may succeed - below even if we cannot call MORECORE. - */ - - if (size > 0) - brk = (char*)(MORECORE(size)); - - if (brk != (char*)(MORECORE_FAILURE)) { - /* Call the `morecore' hook if necessary. */ - if (__after_morecore_hook) - (*__after_morecore_hook) (); - } else { - /* - If have mmap, try using it as a backup when MORECORE fails or - cannot be used. This is worth doing on systems that have "holes" in - address space, so sbrk cannot extend to give contiguous space, but - space is available elsewhere. Note that we ignore mmap max count - and threshold limits, since the space will not be used as a - segregated mmap region. - */ - -#if HAVE_MMAP - /* Cannot merge with old top, so add its size back in */ - if (contiguous(av)) - size = (size + old_size + pagemask) & ~pagemask; - - /* If we are relying on mmap as backup, then use larger units */ - if ((unsigned long)(size) < (unsigned long)(MMAP_AS_MORECORE_SIZE)) - size = MMAP_AS_MORECORE_SIZE; - - /* Don't try if size wraps around 0 */ - if ((unsigned long)(size) > (unsigned long)(nb)) { - - char *mbrk = (char*)(MMAP(0, size, PROT_READ|PROT_WRITE, MAP_PRIVATE)); - - if (mbrk != MAP_FAILED) { - - /* We do not need, and cannot use, another sbrk call to find end */ - brk = mbrk; - snd_brk = brk + size; - - /* - Record that we no longer have a contiguous sbrk region. - After the first time mmap is used as backup, we do not - ever rely on contiguous space since this could incorrectly - bridge regions. - */ - set_noncontiguous(av); - } - } -#endif - } - - if (brk != (char*)(MORECORE_FAILURE)) { - if (mp_.sbrk_base == 0) - mp_.sbrk_base = brk; - av->system_mem += size; - - /* - If MORECORE extends previous space, we can likewise extend top size. - */ - - if (brk == old_end && snd_brk == (char*)(MORECORE_FAILURE)) - set_head(old_top, (size + old_size) | PREV_INUSE); - - else if (contiguous(av) && old_size && brk < old_end) { - /* Oops! Someone else killed our space.. Can't touch anything. */ - assert(0); - } - - /* - Otherwise, make adjustments: - - * If the first time through or noncontiguous, we need to call sbrk - just to find out where the end of memory lies. - - * We need to ensure that all returned chunks from malloc will meet - MALLOC_ALIGNMENT - - * If there was an intervening foreign sbrk, we need to adjust sbrk - request size to account for fact that we will not be able to - combine new space with existing space in old_top. - - * Almost all systems internally allocate whole pages at a time, in - which case we might as well use the whole last page of request. - So we allocate enough more memory to hit a page boundary now, - which in turn causes future contiguous calls to page-align. - */ - - else { - front_misalign = 0; - end_misalign = 0; - correction = 0; - aligned_brk = brk; - - /* handle contiguous cases */ - if (contiguous(av)) { - - /* Count foreign sbrk as system_mem. */ - if (old_size) - av->system_mem += brk - old_end; - - /* Guarantee alignment of first new chunk made from this space */ - - front_misalign = (INTERNAL_SIZE_T)chunk2mem(brk) & MALLOC_ALIGN_MASK; - if (front_misalign > 0) { - - /* - Skip over some bytes to arrive at an aligned position. - We don't need to specially mark these wasted front bytes. - They will never be accessed anyway because - prev_inuse of av->top (and any chunk created from its start) - is always true after initialization. - */ - - correction = MALLOC_ALIGNMENT - front_misalign; - aligned_brk += correction; - } - - /* - If this isn't adjacent to existing space, then we will not - be able to merge with old_top space, so must add to 2nd request. - */ - - correction += old_size; - - /* Extend the end address to hit a page boundary */ - end_misalign = (INTERNAL_SIZE_T)(brk + size + correction); - correction += ((end_misalign + pagemask) & ~pagemask) - end_misalign; - - assert(correction >= 0); - snd_brk = (char*)(MORECORE(correction)); - - /* - If can't allocate correction, try to at least find out current - brk. It might be enough to proceed without failing. - - Note that if second sbrk did NOT fail, we assume that space - is contiguous with first sbrk. This is a safe assumption unless - program is multithreaded but doesn't use locks and a foreign sbrk - occurred between our first and second calls. - */ - - if (snd_brk == (char*)(MORECORE_FAILURE)) { - correction = 0; - snd_brk = (char*)(MORECORE(0)); - } else - /* Call the `morecore' hook if necessary. */ - if (__after_morecore_hook) - (*__after_morecore_hook) (); - } - - /* handle non-contiguous cases */ - else { - /* MORECORE/mmap must correctly align */ - assert(((unsigned long)chunk2mem(brk) & MALLOC_ALIGN_MASK) == 0); - - /* Find out current end of memory */ - if (snd_brk == (char*)(MORECORE_FAILURE)) { - snd_brk = (char*)(MORECORE(0)); - } - } - - /* Adjust top based on results of second sbrk */ - if (snd_brk != (char*)(MORECORE_FAILURE)) { - av->top = (mchunkptr)aligned_brk; - set_head(av->top, (snd_brk - aligned_brk + correction) | PREV_INUSE); - av->system_mem += correction; - - /* - If not the first time through, we either have a - gap due to foreign sbrk or a non-contiguous region. Insert a - double fencepost at old_top to prevent consolidation with space - we don't own. These fenceposts are artificial chunks that are - marked as inuse and are in any case too small to use. We need - two to make sizes and alignments work out. - */ - - if (old_size != 0) { - /* - Shrink old_top to insert fenceposts, keeping size a - multiple of MALLOC_ALIGNMENT. We know there is at least - enough space in old_top to do this. - */ - old_size = (old_size - 4*SIZE_SZ) & ~MALLOC_ALIGN_MASK; - set_head(old_top, old_size | PREV_INUSE); - - /* - Note that the following assignments completely overwrite - old_top when old_size was previously MINSIZE. This is - intentional. We need the fencepost, even if old_top otherwise gets - lost. - */ - chunk_at_offset(old_top, old_size )->size = - (2*SIZE_SZ)|PREV_INUSE; - - chunk_at_offset(old_top, old_size + 2*SIZE_SZ)->size = - (2*SIZE_SZ)|PREV_INUSE; - - /* If possible, release the rest. */ - if (old_size >= MINSIZE) { - _int_free(av, chunk2mem(old_top)); - } - - } - } - } - - /* Update statistics */ -#ifdef NO_THREADS - sum = av->system_mem + mp_.mmapped_mem; - if (sum > (unsigned long)(mp_.max_total_mem)) - mp_.max_total_mem = sum; -#endif - - } - - } /* if (av != &main_arena) */ - - if ((unsigned long)av->system_mem > (unsigned long)(av->max_system_mem)) - av->max_system_mem = av->system_mem; - check_malloc_state(av); - - /* finally, do the allocation */ - p = av->top; - size = chunksize(p); - - /* check that one of the above allocation paths succeeded */ - if ((unsigned long)(size) >= (unsigned long)(nb + MINSIZE)) { - remainder_size = size - nb; - remainder = chunk_at_offset(p, nb); - av->top = remainder; - set_head(p, nb | PREV_INUSE | (av != &main_arena ? NON_MAIN_ARENA : 0)); - set_head(remainder, remainder_size | PREV_INUSE); - check_malloced_chunk(av, p, nb); - return chunk2mem(p); - } - - /* catch all failure paths */ - MALLOC_FAILURE_ACTION; - return 0; -} - - -#ifndef MORECORE_CANNOT_TRIM -/* - sYSTRIm is an inverse of sorts to sYSMALLOc. It gives memory back - to the system (via negative arguments to sbrk) if there is unused - memory at the `high' end of the malloc pool. It is called - automatically by free() when top space exceeds the trim - threshold. It is also called by the public malloc_trim routine. It - returns 1 if it actually released any memory, else 0. -*/ - -#if __STD_C -static int sYSTRIm(size_t pad, mstate av) -#else -static int sYSTRIm(pad, av) size_t pad; mstate av; -#endif -{ - long top_size; /* Amount of top-most memory */ - long extra; /* Amount to release */ - long released; /* Amount actually released */ - char* current_brk; /* address returned by pre-check sbrk call */ - char* new_brk; /* address returned by post-check sbrk call */ - size_t pagesz; - - pagesz = mp_.pagesize; - top_size = chunksize(av->top); - - /* Release in pagesize units, keeping at least one page */ - extra = ((top_size - pad - MINSIZE + (pagesz-1)) / pagesz - 1) * pagesz; - - if (extra > 0) { - - /* - Only proceed if end of memory is where we last set it. - This avoids problems if there were foreign sbrk calls. - */ - current_brk = (char*)(MORECORE(0)); - if (current_brk == (char*)(av->top) + top_size) { - - /* - Attempt to release memory. We ignore MORECORE return value, - and instead call again to find out where new end of memory is. - This avoids problems if first call releases less than we asked, - of if failure somehow altered brk value. (We could still - encounter problems if it altered brk in some very bad way, - but the only thing we can do is adjust anyway, which will cause - some downstream failure.) - */ - - MORECORE(-extra); - /* Call the `morecore' hook if necessary. */ - if (__after_morecore_hook) - (*__after_morecore_hook) (); - new_brk = (char*)(MORECORE(0)); - - if (new_brk != (char*)MORECORE_FAILURE) { - released = (long)(current_brk - new_brk); - - if (released != 0) { - /* Success. Adjust top. */ - av->system_mem -= released; - set_head(av->top, (top_size - released) | PREV_INUSE); - check_malloc_state(av); - return 1; - } - } - } - } - return 0; -} -#endif - -#ifdef HAVE_MMAP - -static void -internal_function -#if __STD_C -munmap_chunk(mchunkptr p) -#else -munmap_chunk(p) mchunkptr p; -#endif -{ - INTERNAL_SIZE_T size = chunksize(p); - int ret; - - assert (chunk_is_mmapped(p)); -#if 0 - assert(! ((char*)p >= mp_.sbrk_base && (char*)p < mp_.sbrk_base + mp_.sbrked_mem)); - assert((mp_.n_mmaps > 0)); -#endif - assert(((p->prev_size + size) & (mp_.pagesize-1)) == 0); - - mp_.n_mmaps--; - mp_.mmapped_mem -= (size + p->prev_size); - - ret = munmap((char *)p - p->prev_size, size + p->prev_size); - - /* munmap returns non-zero on failure */ - assert(ret == 0); -} - -#if HAVE_MREMAP - -static mchunkptr -internal_function -#if __STD_C -mremap_chunk(mchunkptr p, size_t new_size) -#else -mremap_chunk(p, new_size) mchunkptr p; size_t new_size; -#endif -{ - size_t page_mask = mp_.pagesize - 1; - INTERNAL_SIZE_T offset = p->prev_size; - INTERNAL_SIZE_T size = chunksize(p); - char *cp; - - assert (chunk_is_mmapped(p)); -#if 0 - assert(! ((char*)p >= mp_.sbrk_base && (char*)p < mp_.sbrk_base + mp_.sbrked_mem)); - assert((mp_.n_mmaps > 0)); -#endif - assert(((size + offset) & (mp_.pagesize-1)) == 0); - - /* Note the extra SIZE_SZ overhead as in mmap_chunk(). */ - new_size = (new_size + offset + SIZE_SZ + page_mask) & ~page_mask; - - /* OMPI change - take pessimistic approach and assume going to move */ - opal_mem_hooks_release_hook((char *)p - offset, size, 1); - cp = (char *)mremap((char *)p - offset, size + offset, new_size, - MREMAP_MAYMOVE); - - if (cp == MAP_FAILED) return 0; - - p = (mchunkptr)(cp + offset); - - assert(aligned_OK(chunk2mem(p))); - - assert((p->prev_size == offset)); - set_head(p, (new_size - offset)|IS_MMAPPED); - - mp_.mmapped_mem -= size + offset; - mp_.mmapped_mem += new_size; - if ((unsigned long)mp_.mmapped_mem > (unsigned long)mp_.max_mmapped_mem) - mp_.max_mmapped_mem = mp_.mmapped_mem; -#ifdef NO_THREADS - if ((unsigned long)(mp_.mmapped_mem + arena_mem + main_arena.system_mem) > - mp_.max_total_mem) - mp_.max_total_mem = mp_.mmapped_mem + arena_mem + main_arena.system_mem; -#endif - return p; -} - -#endif /* HAVE_MREMAP */ - -#endif /* HAVE_MMAP */ - -/*------------------------ Public wrappers. --------------------------------*/ - -Void_t* -public_mALLOc(size_t bytes) -{ - mstate ar_ptr; - Void_t *victim; - - /* OMPI change: the hook is us -- don't call the hook */ -#if 0 - __malloc_ptr_t (*hook) __MALLOC_P ((size_t, __const __malloc_ptr_t)) = - __malloc_hook; - if (hook != NULL) - return (*hook)(bytes, RETURN_ADDRESS (0)); -#endif - - /* OMPI change: put in a flag so that we can know that this function - was invoked. This flag is checked in the memory/linux component - init to ensure that this ptmalloc is actually being used. Used a - simple "extern" here to get the flag symbol rather than putting - it in a new .h file that would only contain a small number of - symbols. */ - mca_memory_linux_component.malloc_invoked = true; - - arena_get(ar_ptr, bytes); - if(!ar_ptr) - return 0; - victim = _int_malloc(ar_ptr, bytes); - if(!victim) { - /* Maybe the failure is due to running out of mmapped areas. */ - if(ar_ptr != &main_arena) { - (void)mutex_unlock(&ar_ptr->mutex); - (void)mutex_lock(&main_arena.mutex); - victim = _int_malloc(&main_arena, bytes); - (void)mutex_unlock(&main_arena.mutex); - } else { -#if USE_ARENAS - /* ... or sbrk() has failed and there is still a chance to mmap() */ - ar_ptr = arena_get2(ar_ptr->next ? ar_ptr : 0, bytes); - (void)mutex_unlock(&main_arena.mutex); - if(ar_ptr) { - victim = _int_malloc(ar_ptr, bytes); - (void)mutex_unlock(&ar_ptr->mutex); - } -#endif - } - } else - (void)mutex_unlock(&ar_ptr->mutex); - assert(!victim || chunk_is_mmapped(mem2chunk(victim)) || - ar_ptr == arena_for_chunk(mem2chunk(victim))); - - return victim; -} -#ifdef libc_hidden_def -libc_hidden_def(public_mALLOc) -#endif - -void -public_fREe(Void_t* mem) -{ - mstate ar_ptr; - mchunkptr p; /* chunk corresponding to mem */ - - /* OMPI change: the hook is us -- don't call the hook */ -#if 0 - void (*hook) __MALLOC_P ((__malloc_ptr_t, __const __malloc_ptr_t)) = - __free_hook; - if (hook != NULL) { - (*hook)(mem, RETURN_ADDRESS (0)); - return; - } -#endif - - /* OMPI change: put in a flag so that we can know that this function - was invoked. This flag is checked in the memory/linux component - init to ensure that this ptmalloc is actually being used. Used a - simple "extern" here to get the flag symbol rather than putting - it in a new .h file that would only contain a small number of - symbols. */ - mca_memory_linux_component.free_invoked = true; - - if (mem == 0) /* free(0) has no effect */ - return; - - p = mem2chunk(mem); - -#if HAVE_MMAP - if (chunk_is_mmapped(p)) /* release mmapped memory. */ - { - munmap_chunk(p); - return; - } -#endif - - ar_ptr = arena_for_chunk(p); -#if THREAD_STATS - if(!mutex_trylock(&ar_ptr->mutex)) - ++(ar_ptr->stat_lock_direct); - else { - (void)mutex_lock(&ar_ptr->mutex); - ++(ar_ptr->stat_lock_wait); - } -#else - (void)mutex_lock(&ar_ptr->mutex); -#endif - _int_free(ar_ptr, mem); - (void)mutex_unlock(&ar_ptr->mutex); -} -#ifdef libc_hidden_def -libc_hidden_def (public_fREe) -#endif - -Void_t* -public_rEALLOc(Void_t* oldmem, size_t bytes) -{ - mstate ar_ptr; - INTERNAL_SIZE_T nb; /* padded request size */ - - mchunkptr oldp; /* chunk corresponding to oldmem */ - INTERNAL_SIZE_T oldsize; /* its size */ - - Void_t* newp; /* chunk to return */ - - /* OMPI change: the hook is us -- don't call the hook */ -#if 0 - __malloc_ptr_t (*hook) __MALLOC_P ((__malloc_ptr_t, size_t, - __const __malloc_ptr_t)) = - __realloc_hook; - if (hook != NULL) - return (*hook)(oldmem, bytes, RETURN_ADDRESS (0)); -#endif - - /* OMPI change: put in a flag so that we can know that this function - was invoked. This flag is checked in the memory/linux component - init to ensure that this ptmalloc is actually being used. Used a - simple "extern" here to get the flag symbol rather than putting - it in a new .h file that would only contain a small number of - symbols. */ - mca_memory_linux_component.realloc_invoked = true; - -#if REALLOC_ZERO_BYTES_FREES - if (bytes == 0 && oldmem != NULL) { public_fREe(oldmem); return 0; } -#endif - - /* realloc of null is supposed to be same as malloc */ - if (oldmem == 0) return public_mALLOc(bytes); - - oldp = mem2chunk(oldmem); - oldsize = chunksize(oldp); - - checked_request2size(bytes, nb); - -#if HAVE_MMAP - if (chunk_is_mmapped(oldp)) - { - Void_t* newmem; - -#if HAVE_MREMAP - newp = mremap_chunk(oldp, nb); - if(newp) { - return chunk2mem(newp); - } -#endif - /* Note the extra SIZE_SZ overhead. */ - if(oldsize - SIZE_SZ >= nb) { - return oldmem; /* do nothing */ - } - /* Must alloc, copy, free. */ - newmem = public_mALLOc(bytes); - if (newmem == 0) return 0; /* propagate failure */ - MALLOC_COPY(newmem, oldmem, oldsize - 2*SIZE_SZ); - munmap_chunk(oldp); - return newmem; - } -#endif - - ar_ptr = arena_for_chunk(oldp); -#if THREAD_STATS - if(!mutex_trylock(&ar_ptr->mutex)) - ++(ar_ptr->stat_lock_direct); - else { - (void)mutex_lock(&ar_ptr->mutex); - ++(ar_ptr->stat_lock_wait); - } -#else - (void)mutex_lock(&ar_ptr->mutex); -#endif - -#ifndef NO_THREADS - /* As in malloc(), remember this arena for the next allocation. */ - tsd_setspecific(arena_key, (Void_t *)ar_ptr); -#endif - - newp = _int_realloc(ar_ptr, oldmem, bytes); - - (void)mutex_unlock(&ar_ptr->mutex); - assert(!newp || chunk_is_mmapped(mem2chunk(newp)) || - ar_ptr == arena_for_chunk(mem2chunk(newp))); - return newp; -} -#ifdef libc_hidden_def -libc_hidden_def (public_rEALLOc) -#endif - -Void_t* -public_mEMALIGn(size_t alignment, size_t bytes) -{ - mstate ar_ptr; - Void_t *p; - - /* OMPI change: the hook is us -- don't call the hook */ -#if 0 - __malloc_ptr_t (*hook) __MALLOC_PMT ((size_t, size_t, - __const __malloc_ptr_t)) = - __memalign_hook; - if (hook != NULL) - return (*hook)(alignment, bytes, RETURN_ADDRESS (0)); -#endif - - /* OMPI change: put in a flag so that we can know that this function - was invoked. This flag is checked in the memory/linux component - init to ensure that this ptmalloc is actually being used. Used a - simple "extern" here to get the flag symbol rather than putting - it in a new .h file that would only contain a small number of - symbols. */ - mca_memory_linux_component.memalign_invoked = true; - - /* If need less alignment than we give anyway, just relay to malloc */ - if (alignment <= MALLOC_ALIGNMENT) return public_mALLOc(bytes); - - /* Otherwise, ensure that it is at least a minimum chunk size */ - if (alignment < MINSIZE) alignment = MINSIZE; - - arena_get(ar_ptr, bytes + alignment + MINSIZE); - if(!ar_ptr) - return 0; - p = _int_memalign(ar_ptr, alignment, bytes); - (void)mutex_unlock(&ar_ptr->mutex); - if(!p) { - /* Maybe the failure is due to running out of mmapped areas. */ - if(ar_ptr != &main_arena) { - (void)mutex_lock(&main_arena.mutex); - p = _int_memalign(&main_arena, alignment, bytes); - (void)mutex_unlock(&main_arena.mutex); - } else { -#if USE_ARENAS - /* ... or sbrk() has failed and there is still a chance to mmap() */ - ar_ptr = arena_get2(ar_ptr->next ? ar_ptr : 0, bytes); - if(ar_ptr) { - p = _int_memalign(ar_ptr, alignment, bytes); - (void)mutex_unlock(&ar_ptr->mutex); - } -#endif - } - } - assert(!p || chunk_is_mmapped(mem2chunk(p)) || - ar_ptr == arena_for_chunk(mem2chunk(p))); - return p; -} -#ifdef libc_hidden_def -libc_hidden_def (public_mEMALIGn) -#endif - -Void_t* -public_vALLOc(size_t bytes) -{ - mstate ar_ptr; - Void_t *p; - - if(__malloc_initialized < 0) - ptmalloc_init (); - arena_get(ar_ptr, bytes + mp_.pagesize + MINSIZE); - if(!ar_ptr) - return 0; - p = _int_valloc(ar_ptr, bytes); - (void)mutex_unlock(&ar_ptr->mutex); - return p; -} - -Void_t* -public_pVALLOc(size_t bytes) -{ - mstate ar_ptr; - Void_t *p; - - if(__malloc_initialized < 0) - ptmalloc_init (); - arena_get(ar_ptr, bytes + 2*mp_.pagesize + MINSIZE); - p = _int_pvalloc(ar_ptr, bytes); - (void)mutex_unlock(&ar_ptr->mutex); - return p; -} - -Void_t* -public_cALLOc(size_t n, size_t elem_size) -{ - mstate av; - mchunkptr oldtop, p; - INTERNAL_SIZE_T bytes, sz, csz, oldtopsize; - Void_t* mem; - unsigned long clearsize; - unsigned long nclears; - INTERNAL_SIZE_T* d; - /* OMPI change: the hook is us -- don't call the hook */ -#if 0 - __malloc_ptr_t (*hook) __MALLOC_PMT ((size_t, __const __malloc_ptr_t)) = - __malloc_hook; -#endif - - /* size_t is unsigned so the behavior on overflow is defined. */ - bytes = n * elem_size; -#define HALF_INTERNAL_SIZE_T \ - (((INTERNAL_SIZE_T) 1) << (8 * sizeof (INTERNAL_SIZE_T) / 2)) - if (__builtin_expect ((n | elem_size) >= HALF_INTERNAL_SIZE_T, 0)) { - if (elem_size != 0 && bytes / elem_size != n) { - MALLOC_FAILURE_ACTION; - return 0; - } - } - - /* OMPI change: the hook is us -- don't call the hook */ -#if 0 - if (hook != NULL) { - sz = bytes; - mem = (*hook)(sz, RETURN_ADDRESS (0)); - if(mem == 0) - return 0; -#ifdef HAVE_MEMCPY - return memset(mem, 0, sz); -#else - while(sz > 0) ((char*)mem)[--sz] = 0; /* rather inefficient */ - return mem; -#endif - } -#endif - - sz = bytes; - - arena_get(av, sz); - if(!av) - return 0; - - /* Check if we hand out the top chunk, in which case there may be no - need to clear. */ -#if MORECORE_CLEARS - oldtop = top(av); - oldtopsize = chunksize(top(av)); -#if MORECORE_CLEARS < 2 - /* Only newly allocated memory is guaranteed to be cleared. */ - if (av == &main_arena && - oldtopsize < (INTERNAL_SIZE_T)(mp_.sbrk_base + av->max_system_mem - (char *)oldtop)) - oldtopsize = (mp_.sbrk_base + av->max_system_mem - (char *)oldtop); -#endif -#endif - mem = _int_malloc(av, sz); - - /* Only clearing follows, so we can unlock early. */ - (void)mutex_unlock(&av->mutex); - - assert(!mem || chunk_is_mmapped(mem2chunk(mem)) || - av == arena_for_chunk(mem2chunk(mem))); - - if (mem == 0) { - /* Maybe the failure is due to running out of mmapped areas. */ - if(av != &main_arena) { - (void)mutex_lock(&main_arena.mutex); - mem = _int_malloc(&main_arena, sz); - (void)mutex_unlock(&main_arena.mutex); - } else { -#if USE_ARENAS - /* ... or sbrk() has failed and there is still a chance to mmap() */ - (void)mutex_lock(&main_arena.mutex); - av = arena_get2(av->next ? av : 0, sz); - (void)mutex_unlock(&main_arena.mutex); - if(av) { - mem = _int_malloc(av, sz); - (void)mutex_unlock(&av->mutex); - } -#endif - } - if (mem == 0) return 0; - } - p = mem2chunk(mem); - - /* Two optional cases in which clearing not necessary */ -#if HAVE_MMAP - if (chunk_is_mmapped(p)) { - return mem; - } -#endif - - csz = chunksize(p); - -#if MORECORE_CLEARS - if (p == oldtop && csz > oldtopsize) { - /* clear only the bytes from non-freshly-sbrked memory */ - csz = oldtopsize; - } -#endif - - /* Unroll clear of <= 36 bytes (72 if 8byte sizes). We know that - contents have an odd number of INTERNAL_SIZE_T-sized words; - minimally 3. */ - d = (INTERNAL_SIZE_T*)mem; - clearsize = csz - SIZE_SZ; - nclears = clearsize / sizeof(INTERNAL_SIZE_T); - assert(nclears >= 3); - - if (nclears > 9) - MALLOC_ZERO(d, clearsize); - - else { - *(d+0) = 0; - *(d+1) = 0; - *(d+2) = 0; - if (nclears > 4) { - *(d+3) = 0; - *(d+4) = 0; - if (nclears > 6) { - *(d+5) = 0; - *(d+6) = 0; - if (nclears > 8) { - *(d+7) = 0; - *(d+8) = 0; - } - } - } - } - - return mem; -} - -Void_t** -public_iCALLOc(size_t n, size_t elem_size, Void_t** chunks) -{ - mstate ar_ptr; - Void_t** m; - - arena_get(ar_ptr, n*elem_size); - if(!ar_ptr) - return 0; - - m = _int_icalloc(ar_ptr, n, elem_size, chunks); - (void)mutex_unlock(&ar_ptr->mutex); - return m; -} - -Void_t** -public_iCOMALLOc(size_t n, size_t sizes[], Void_t** chunks) -{ - mstate ar_ptr; - Void_t** m; - - arena_get(ar_ptr, 0); - if(!ar_ptr) - return 0; - - m = _int_icomalloc(ar_ptr, n, sizes, chunks); - (void)mutex_unlock(&ar_ptr->mutex); - return m; -} - -#ifndef _LIBC - -void -public_cFREe(Void_t* m) -{ - public_fREe(m); -} - -#endif /* _LIBC */ - -int -public_mTRIm(size_t s) -{ - int result; - - (void)mutex_lock(&main_arena.mutex); - result = mTRIm(s); - (void)mutex_unlock(&main_arena.mutex); - return result; -} - -size_t -public_mUSABLe(Void_t* m) -{ - size_t result; - - result = mUSABLe(m); - return result; -} - -/* This exists mainly for backward compatibility. Calling - _int_get_arena_info() directly is more useful. */ -struct mallinfo -public_mALLINFo() -{ - struct malloc_arena_info mai; - struct mallinfo m; - size_t avail; - - if(__malloc_initialized < 0) - ptmalloc_init (); - _int_get_arena_info(&main_arena, &mai); - /* Account for top */ - avail = mai.fastavail + mai.binavail + mai.top_size; - m.smblks = mai.nfastblocks; - m.ordblks = mai.nbinblocks + 1; - m.fordblks = avail; - m.uordblks = mai.system_mem - avail; - m.arena = mai.system_mem; - m.hblks = mp_.n_mmaps; - m.hblkhd = mp_.mmapped_mem; - m.fsmblks = mai.fastavail; - m.keepcost = mai.top_size; - m.usmblks = mp_.max_total_mem; - return m; -} - -int -public_mALLOPt(int p, int v) -{ - int result; - result = mALLOPt(p, v); - return result; -} - -/* - ------------------------------ malloc ------------------------------ -*/ - -/* With Intel Composer XE V12.1.0, release 2011.6.233, any launch */ -/* fails, even before main(), due to a bug in the vectorizer (see */ -/* https://svn.open-mpi.org/trac/ompi/changeset/25290). The fix is */ -/* to disable vectorization by reducing the optimization level to */ -/* -O1 for _int_malloc(). The only reliable method to identify */ -/* release 2011.6.233 is the predefined __INTEL_COMPILER_BUILD_DATE */ -/* macro, which will have the value 20110811 (Linux, Windows, and */ -/* Mac OS X). (The predefined __INTEL_COMPILER macro is nonsense, */ -/* 9999, and both the 2011.6.233 and 2011.7.256 releases identify */ -/* themselves as V12.1.0 from the -v command line option.) */ - -#ifdef __INTEL_COMPILER_BUILD_DATE -# if __INTEL_COMPILER_BUILD_DATE == 20110811 -# pragma GCC optimization_level 1 -# endif -#endif - -Void_t* -_int_malloc(mstate av, size_t bytes) -{ - INTERNAL_SIZE_T nb; /* normalized request size */ - unsigned int idx; /* associated bin index */ - mbinptr bin; /* associated bin */ - mfastbinptr* fb; /* associated fastbin */ - - mchunkptr victim; /* inspected/selected chunk */ - INTERNAL_SIZE_T size; /* its size */ - int victim_index; /* its bin index */ - - mchunkptr remainder; /* remainder from a split */ - unsigned long remainder_size; /* its size */ - - unsigned int block; /* bit map traverser */ - unsigned int bit; /* bit map traverser */ - unsigned int map; /* current word of binmap */ - - mchunkptr fwd; /* misc temp for linking */ - mchunkptr bck; /* misc temp for linking */ - - /* - Convert request size to internal form by adding SIZE_SZ bytes - overhead plus possibly more to obtain necessary alignment and/or - to obtain a size of at least MINSIZE, the smallest allocatable - size. Also, checked_request2size traps (returning 0) request sizes - that are so large that they wrap around zero when padded and - aligned. - */ - - checked_request2size(bytes, nb); - - /* - If the size qualifies as a fastbin, first check corresponding bin. - This code is safe to execute even if av is not yet initialized, so we - can try it without checking, which saves some time on this fast path. - */ - - if ((unsigned long)(nb) <= (unsigned long)(av->max_fast)) { - fb = &(av->fastbins[(fastbin_index(nb))]); - if ( (victim = *fb) != 0) { - *fb = victim->fd; - check_remalloced_chunk(av, victim, nb); - return chunk2mem(victim); - } - } - - /* - If a small request, check regular bin. Since these "smallbins" - hold one size each, no searching within bins is necessary. - (For a large request, we need to wait until unsorted chunks are - processed to find best fit. But for small ones, fits are exact - anyway, so we can check now, which is faster.) - */ - - if (in_smallbin_range(nb)) { - idx = smallbin_index(nb); - bin = bin_at(av,idx); - - if ( (victim = last(bin)) != bin) { - if (victim == 0) /* initialization check */ - malloc_consolidate(av); - else { - bck = victim->bk; - set_inuse_bit_at_offset(victim, nb); - bin->bk = bck; - bck->fd = bin; - - if (av != &main_arena) - victim->size |= NON_MAIN_ARENA; - check_malloced_chunk(av, victim, nb); - return chunk2mem(victim); - } - } - } - - /* - If this is a large request, consolidate fastbins before continuing. - While it might look excessive to kill all fastbins before - even seeing if there is space available, this avoids - fragmentation problems normally associated with fastbins. - Also, in practice, programs tend to have runs of either small or - large requests, but less often mixtures, so consolidation is not - invoked all that often in most programs. And the programs that - it is called frequently in otherwise tend to fragment. - */ - - else { - idx = largebin_index(nb); - if (have_fastchunks(av)) - malloc_consolidate(av); - } - - /* - Process recently freed or remaindered chunks, taking one only if - it is exact fit, or, if this a small request, the chunk is remainder from - the most recent non-exact fit. Place other traversed chunks in - bins. Note that this step is the only place in any routine where - chunks are placed in bins. - - The outer loop here is needed because we might not realize until - near the end of malloc that we should have consolidated, so must - do so and retry. This happens at most once, and only when we would - otherwise need to expand memory to service a "small" request. - */ - - for(;;) { - - while ( (victim = unsorted_chunks(av)->bk) != unsorted_chunks(av)) { - bck = victim->bk; - size = chunksize(victim); - - /* - If a small request, try to use last remainder if it is the - only chunk in unsorted bin. This helps promote locality for - runs of consecutive small requests. This is the only - exception to best-fit, and applies only when there is - no exact fit for a small chunk. - */ - - if (in_smallbin_range(nb) && - bck == unsorted_chunks(av) && - victim == av->last_remainder && - (unsigned long)(size) > (unsigned long)(nb + MINSIZE)) { - - /* split and reattach remainder */ - remainder_size = size - nb; - remainder = chunk_at_offset(victim, nb); - unsorted_chunks(av)->bk = unsorted_chunks(av)->fd = remainder; - av->last_remainder = remainder; - remainder->bk = remainder->fd = unsorted_chunks(av); - - set_head(victim, nb | PREV_INUSE | - (av != &main_arena ? NON_MAIN_ARENA : 0)); - set_head(remainder, remainder_size | PREV_INUSE); - set_foot(remainder, remainder_size); - - check_malloced_chunk(av, victim, nb); - return chunk2mem(victim); - } - - /* remove from unsorted list */ - unsorted_chunks(av)->bk = bck; - bck->fd = unsorted_chunks(av); - - /* Take now instead of binning if exact fit */ - - if (size == nb) { - set_inuse_bit_at_offset(victim, size); - if (av != &main_arena) - victim->size |= NON_MAIN_ARENA; - check_malloced_chunk(av, victim, nb); - return chunk2mem(victim); - } - - /* place chunk in bin */ - - if (in_smallbin_range(size)) { - victim_index = smallbin_index(size); - bck = bin_at(av, victim_index); - fwd = bck->fd; - } - else { - victim_index = largebin_index(size); - bck = bin_at(av, victim_index); - fwd = bck->fd; - - if (fwd != bck) { - /* if smaller than smallest, place first */ - assert((bck->bk->size & NON_MAIN_ARENA) == 0); - if ((unsigned long)(size) < (unsigned long)(bck->bk->size)) { - fwd = bck; - bck = bck->bk; - } - else if ((unsigned long)(size) >= - (unsigned long)(FIRST_SORTED_BIN_SIZE)) { - - /* maintain large bins in sorted order */ - size |= PREV_INUSE; /* Or with inuse bit to speed comparisons */ - assert((fwd->size & NON_MAIN_ARENA) == 0); - while ((unsigned long)(size) < (unsigned long)(fwd->size)) { - fwd = fwd->fd; - assert((fwd->size & NON_MAIN_ARENA) == 0); - } - bck = fwd->bk; - } - } - } - - mark_bin(av, victim_index); - victim->bk = bck; - victim->fd = fwd; - fwd->bk = victim; - bck->fd = victim; - } - - /* - If a large request, scan through the chunks of current bin in - sorted order to find smallest that fits. This is the only step - where an unbounded number of chunks might be scanned without doing - anything useful with them. However the lists tend to be short. - */ - - if (!in_smallbin_range(nb)) { - bin = bin_at(av, idx); - - for (victim = last(bin); victim != bin; victim = victim->bk) { - size = chunksize(victim); - - if ((unsigned long)(size) >= (unsigned long)(nb)) { - remainder_size = size - nb; - unlink(victim, bck, fwd); - - /* Exhaust */ - if (remainder_size < MINSIZE) { - set_inuse_bit_at_offset(victim, size); - if (av != &main_arena) - victim->size |= NON_MAIN_ARENA; - check_malloced_chunk(av, victim, nb); - return chunk2mem(victim); - } - /* Split */ - else { - remainder = chunk_at_offset(victim, nb); - unsorted_chunks(av)->bk = unsorted_chunks(av)->fd = remainder; - remainder->bk = remainder->fd = unsorted_chunks(av); - set_head(victim, nb | PREV_INUSE | - (av != &main_arena ? NON_MAIN_ARENA : 0)); - set_head(remainder, remainder_size | PREV_INUSE); - set_foot(remainder, remainder_size); - check_malloced_chunk(av, victim, nb); - return chunk2mem(victim); - } - } - } - } - - /* - Search for a chunk by scanning bins, starting with next largest - bin. This search is strictly by best-fit; i.e., the smallest - (with ties going to approximately the least recently used) chunk - that fits is selected. - - The bitmap avoids needing to check that most blocks are nonempty. - The particular case of skipping all bins during warm-up phases - when no chunks have been returned yet is faster than it might look. - */ - - ++idx; - bin = bin_at(av,idx); - block = idx2block(idx); - map = av->binmap[block]; - bit = idx2bit(idx); - - for (;;) { - - /* Skip rest of block if there are no more set bits in this block. */ - if (bit > map || bit == 0) { - do { - if (++block >= BINMAPSIZE) /* out of bins */ - goto use_top; - } while ( (map = av->binmap[block]) == 0); - - bin = bin_at(av, (block << BINMAPSHIFT)); - bit = 1; - } - - /* Advance to bin with set bit. There must be one. */ - while ((bit & map) == 0) { - bin = next_bin(bin); - bit <<= 1; - assert(bit != 0); - } - - /* Inspect the bin. It is likely to be non-empty */ - victim = last(bin); - - /* If a false alarm (empty bin), clear the bit. */ - if (victim == bin) { - av->binmap[block] = map &= ~bit; /* Write through */ - bin = next_bin(bin); - bit <<= 1; - } - - else { - size = chunksize(victim); - - /* We know the first chunk in this bin is big enough to use. */ - assert((unsigned long)(size) >= (unsigned long)(nb)); - - remainder_size = size - nb; - - /* unlink */ - bck = victim->bk; - bin->bk = bck; - bck->fd = bin; - - /* Exhaust */ - if (remainder_size < MINSIZE) { - set_inuse_bit_at_offset(victim, size); - if (av != &main_arena) - victim->size |= NON_MAIN_ARENA; - check_malloced_chunk(av, victim, nb); - return chunk2mem(victim); - } - - /* Split */ - else { - remainder = chunk_at_offset(victim, nb); - - unsorted_chunks(av)->bk = unsorted_chunks(av)->fd = remainder; - remainder->bk = remainder->fd = unsorted_chunks(av); - /* advertise as last remainder */ - if (in_smallbin_range(nb)) - av->last_remainder = remainder; - - set_head(victim, nb | PREV_INUSE | - (av != &main_arena ? NON_MAIN_ARENA : 0)); - set_head(remainder, remainder_size | PREV_INUSE); - set_foot(remainder, remainder_size); - check_malloced_chunk(av, victim, nb); - return chunk2mem(victim); - } - } - } - - use_top: - /* - If large enough, split off the chunk bordering the end of memory - (held in av->top). Note that this is in accord with the best-fit - search rule. In effect, av->top is treated as larger (and thus - less well fitting) than any other available chunk since it can - be extended to be as large as necessary (up to system - limitations). - - We require that av->top always exists (i.e., has size >= - MINSIZE) after initialization, so if it would otherwise be - exhuasted by current request, it is replenished. (The main - reason for ensuring it exists is that we may need MINSIZE space - to put in fenceposts in sysmalloc.) - */ - - victim = av->top; - size = chunksize(victim); - - if ((unsigned long)(size) >= (unsigned long)(nb + MINSIZE)) { - remainder_size = size - nb; - remainder = chunk_at_offset(victim, nb); - av->top = remainder; - set_head(victim, nb | PREV_INUSE | - (av != &main_arena ? NON_MAIN_ARENA : 0)); - set_head(remainder, remainder_size | PREV_INUSE); - - check_malloced_chunk(av, victim, nb); - return chunk2mem(victim); - } - - /* - If there is space available in fastbins, consolidate and retry, - to possibly avoid expanding memory. This can occur only if nb is - in smallbin range so we didn't consolidate upon entry. - */ - - else if (have_fastchunks(av)) { - assert(in_smallbin_range(nb)); - malloc_consolidate(av); - idx = smallbin_index(nb); /* restore original bin index */ - } - - /* - Otherwise, relay to handle system-dependent cases - */ - else - return sYSMALLOc(nb, av); - } -} - -/* - ------------------------------ free ------------------------------ -*/ - -void -_int_free(mstate av, Void_t* mem) -{ - mchunkptr p; /* chunk corresponding to mem */ - INTERNAL_SIZE_T size; /* its size */ - mfastbinptr* fb; /* associated fastbin */ - mchunkptr nextchunk; /* next contiguous chunk */ - INTERNAL_SIZE_T nextsize; /* its size */ - int nextinuse; /* true if nextchunk is used */ - INTERNAL_SIZE_T prevsize; /* size of previous contiguous chunk */ - mchunkptr bck; /* misc temp for linking */ - mchunkptr fwd; /* misc temp for linking */ - - - /* free(0) has no effect */ - if (mem != 0) { - p = mem2chunk(mem); - size = chunksize(p); - - check_inuse_chunk(av, p); - - /* - If eligible, place chunk on a fastbin so it can be found - and used quickly in malloc. - */ - - if ((unsigned long)(size) <= (unsigned long)(av->max_fast) - -#if TRIM_FASTBINS - /* - If TRIM_FASTBINS set, don't place chunks - bordering top into fastbins - */ - && (chunk_at_offset(p, size) != av->top) -#endif - ) { - - set_fastchunks(av); - fb = &(av->fastbins[fastbin_index(size)]); - p->fd = *fb; - *fb = p; - } - - /* - Consolidate other non-mmapped chunks as they arrive. - */ - - else if (!chunk_is_mmapped(p)) { - nextchunk = chunk_at_offset(p, size); - nextsize = chunksize(nextchunk); - assert(nextsize > 0); - - /* consolidate backward */ - if (!prev_inuse(p)) { - prevsize = p->prev_size; - size += prevsize; - p = chunk_at_offset(p, -((long) prevsize)); - unlink(p, bck, fwd); - } - - if (nextchunk != av->top) { - /* get and clear inuse bit */ - nextinuse = inuse_bit_at_offset(nextchunk, nextsize); - - /* consolidate forward */ - if (!nextinuse) { - unlink(nextchunk, bck, fwd); - size += nextsize; - } else - clear_inuse_bit_at_offset(nextchunk, 0); - - /* - Place the chunk in unsorted chunk list. Chunks are - not placed into regular bins until after they have - been given one chance to be used in malloc. - */ - - bck = unsorted_chunks(av); - fwd = bck->fd; - p->bk = bck; - p->fd = fwd; - bck->fd = p; - fwd->bk = p; - - set_head(p, size | PREV_INUSE); - set_foot(p, size); - - check_free_chunk(av, p); - } - - /* - If the chunk borders the current high end of memory, - consolidate into top - */ - - else { - size += nextsize; - set_head(p, size | PREV_INUSE); - av->top = p; - check_chunk(av, p); - } - - /* - If freeing a large space, consolidate possibly-surrounding - chunks. Then, if the total unused topmost memory exceeds trim - threshold, ask malloc_trim to reduce top. - - Unless max_fast is 0, we don't know if there are fastbins - bordering top, so we cannot tell for sure whether threshold - has been reached unless fastbins are consolidated. But we - don't want to consolidate on each free. As a compromise, - consolidation is performed if FASTBIN_CONSOLIDATION_THRESHOLD - is reached. - */ - - if ((unsigned long)(size) >= FASTBIN_CONSOLIDATION_THRESHOLD) { - if (have_fastchunks(av)) - malloc_consolidate(av); - - if (av == &main_arena) { -#ifndef MORECORE_CANNOT_TRIM - if ((unsigned long)(chunksize(av->top)) >= - (unsigned long)(mp_.trim_threshold)) - sYSTRIm(mp_.top_pad, av); -#endif - } else { - /* Always try heap_trim(), even if the top chunk is not - large, because the corresponding heap might go away. */ - heap_info *heap = heap_for_ptr(top(av)); - - assert(heap->ar_ptr == av); - heap_trim(heap, mp_.top_pad); - } - } - - } - /* - If the chunk was allocated via mmap, release via munmap(). Note - that if HAVE_MMAP is false but chunk_is_mmapped is true, then - user must have overwritten memory. There's nothing we can do to - catch this error unless MALLOC_DEBUG is set, in which case - check_inuse_chunk (above) will have triggered error. - */ - - else { -#if HAVE_MMAP - int ret; - INTERNAL_SIZE_T offset = p->prev_size; - mp_.n_mmaps--; - mp_.mmapped_mem -= (size + offset); - ret = munmap((char*)p - offset, size + offset); - /* munmap returns non-zero on failure */ - assert(ret == 0); -#endif - } - } -} - -/* - ------------------------- malloc_consolidate ------------------------- - - malloc_consolidate is a specialized version of free() that tears - down chunks held in fastbins. Free itself cannot be used for this - purpose since, among other things, it might place chunks back onto - fastbins. So, instead, we need to use a minor variant of the same - code. - - Also, because this routine needs to be called the first time through - malloc anyway, it turns out to be the perfect place to trigger - initialization code. -*/ - -#if __STD_C -static void malloc_consolidate(mstate av) -#else -static void malloc_consolidate(av) mstate av; -#endif -{ - mfastbinptr* fb; /* current fastbin being consolidated */ - mfastbinptr* maxfb; /* last fastbin (for loop control) */ - mchunkptr p; /* current chunk being consolidated */ - mchunkptr nextp; /* next chunk to consolidate */ - mchunkptr unsorted_bin; /* bin header */ - mchunkptr first_unsorted; /* chunk to link to */ - - /* These have same use as in free() */ - mchunkptr nextchunk; - INTERNAL_SIZE_T size; - INTERNAL_SIZE_T nextsize; - INTERNAL_SIZE_T prevsize; - int nextinuse; - mchunkptr bck; - mchunkptr fwd; - - /* - If max_fast is 0, we know that av hasn't - yet been initialized, in which case do so below - */ - - if (av->max_fast != 0) { - clear_fastchunks(av); - - unsorted_bin = unsorted_chunks(av); - - /* - Remove each chunk from fast bin and consolidate it, placing it - then in unsorted bin. Among other reasons for doing this, - placing in unsorted bin avoids needing to calculate actual bins - until malloc is sure that chunks aren't immediately going to be - reused anyway. - */ - - maxfb = &(av->fastbins[fastbin_index(av->max_fast)]); - fb = &(av->fastbins[0]); - do { - if ( (p = *fb) != 0) { - *fb = 0; - - do { - check_inuse_chunk(av, p); - nextp = p->fd; - - /* Slightly streamlined version of consolidation code in free() */ - size = p->size & ~(PREV_INUSE|NON_MAIN_ARENA); - nextchunk = chunk_at_offset(p, size); - nextsize = chunksize(nextchunk); - - if (!prev_inuse(p)) { - prevsize = p->prev_size; - size += prevsize; - p = chunk_at_offset(p, -((long) prevsize)); - unlink(p, bck, fwd); - } - - if (nextchunk != av->top) { - nextinuse = inuse_bit_at_offset(nextchunk, nextsize); - - if (!nextinuse) { - size += nextsize; - unlink(nextchunk, bck, fwd); - } else - clear_inuse_bit_at_offset(nextchunk, 0); - - first_unsorted = unsorted_bin->fd; - unsorted_bin->fd = p; - first_unsorted->bk = p; - - set_head(p, size | PREV_INUSE); - p->bk = unsorted_bin; - p->fd = first_unsorted; - set_foot(p, size); - } - - else { - size += nextsize; - set_head(p, size | PREV_INUSE); - av->top = p; - } - - } while ( (p = nextp) != 0); - - } - } while (fb++ != maxfb); - } - else { - malloc_init_state(av); - check_malloc_state(av); - } -} - -/* - ------------------------------ realloc ------------------------------ -*/ - -Void_t* -_int_realloc(mstate av, Void_t* oldmem, size_t bytes) -{ - INTERNAL_SIZE_T nb; /* padded request size */ - - mchunkptr oldp; /* chunk corresponding to oldmem */ - INTERNAL_SIZE_T oldsize; /* its size */ - - mchunkptr newp; /* chunk to return */ - INTERNAL_SIZE_T newsize; /* its size */ - Void_t* newmem; /* corresponding user mem */ - - mchunkptr next; /* next contiguous chunk after oldp */ - - mchunkptr remainder; /* extra space at end of newp */ - unsigned long remainder_size; /* its size */ - - mchunkptr bck; /* misc temp for linking */ - mchunkptr fwd; /* misc temp for linking */ - - unsigned long copysize; /* bytes to copy */ - unsigned int ncopies; /* INTERNAL_SIZE_T words to copy */ - INTERNAL_SIZE_T* s; /* copy source */ - INTERNAL_SIZE_T* d; /* copy destination */ - - -#if REALLOC_ZERO_BYTES_FREES - if (bytes == 0) { - _int_free(av, oldmem); - return 0; - } -#endif - - /* realloc of null is supposed to be same as malloc */ - if (oldmem == 0) return _int_malloc(av, bytes); - - checked_request2size(bytes, nb); - - oldp = mem2chunk(oldmem); - oldsize = chunksize(oldp); - - check_inuse_chunk(av, oldp); - - if (!chunk_is_mmapped(oldp)) { - - if ((unsigned long)(oldsize) >= (unsigned long)(nb)) { - /* already big enough; split below */ - newp = oldp; - newsize = oldsize; - } - - else { - next = chunk_at_offset(oldp, oldsize); - - /* Try to expand forward into top */ - if (next == av->top && - (unsigned long)(newsize = oldsize + chunksize(next)) >= - (unsigned long)(nb + MINSIZE)) { - set_head_size(oldp, nb | (av != &main_arena ? NON_MAIN_ARENA : 0)); - av->top = chunk_at_offset(oldp, nb); - set_head(av->top, (newsize - nb) | PREV_INUSE); - check_inuse_chunk(av, oldp); - return chunk2mem(oldp); - } - - /* Try to expand forward into next chunk; split off remainder below */ - else if (next != av->top && - !inuse(next) && - (unsigned long)(newsize = oldsize + chunksize(next)) >= - (unsigned long)(nb)) { - newp = oldp; - unlink(next, bck, fwd); - } - - /* allocate, copy, free */ - else { - newmem = _int_malloc(av, nb - MALLOC_ALIGN_MASK); - if (newmem == 0) - return 0; /* propagate failure */ - - newp = mem2chunk(newmem); - newsize = chunksize(newp); - - /* - Avoid copy if newp is next chunk after oldp. - */ - if (newp == next) { - newsize += oldsize; - newp = oldp; - } - else { - /* - Unroll copy of <= 36 bytes (72 if 8byte sizes) - We know that contents have an odd number of - INTERNAL_SIZE_T-sized words; minimally 3. - */ - - copysize = oldsize - SIZE_SZ; - s = (INTERNAL_SIZE_T*)(oldmem); - d = (INTERNAL_SIZE_T*)(newmem); - ncopies = copysize / sizeof(INTERNAL_SIZE_T); - assert(ncopies >= 3); - - if (ncopies > 9) - MALLOC_COPY(d, s, copysize); - - else { - *(d+0) = *(s+0); - *(d+1) = *(s+1); - *(d+2) = *(s+2); - if (ncopies > 4) { - *(d+3) = *(s+3); - *(d+4) = *(s+4); - if (ncopies > 6) { - *(d+5) = *(s+5); - *(d+6) = *(s+6); - if (ncopies > 8) { - *(d+7) = *(s+7); - *(d+8) = *(s+8); - } - } - } - } - - _int_free(av, oldmem); - check_inuse_chunk(av, newp); - return chunk2mem(newp); - } - } - } - - /* If possible, free extra space in old or extended chunk */ - - assert((unsigned long)(newsize) >= (unsigned long)(nb)); - - remainder_size = newsize - nb; - - if (remainder_size < MINSIZE) { /* not enough extra to split off */ - set_head_size(newp, newsize | (av != &main_arena ? NON_MAIN_ARENA : 0)); - set_inuse_bit_at_offset(newp, newsize); - } - else { /* split remainder */ - remainder = chunk_at_offset(newp, nb); - set_head_size(newp, nb | (av != &main_arena ? NON_MAIN_ARENA : 0)); - set_head(remainder, remainder_size | PREV_INUSE | - (av != &main_arena ? NON_MAIN_ARENA : 0)); - /* Mark remainder as inuse so free() won't complain */ - set_inuse_bit_at_offset(remainder, remainder_size); - _int_free(av, chunk2mem(remainder)); - } - - check_inuse_chunk(av, newp); - return chunk2mem(newp); - } - - /* - Handle mmap cases - */ - - else { -#if HAVE_MMAP - -#if HAVE_MREMAP - INTERNAL_SIZE_T offset = oldp->prev_size; - size_t pagemask = mp_.pagesize - 1; - char *cp; - unsigned long sum; - - /* Note the extra SIZE_SZ overhead */ - newsize = (nb + offset + SIZE_SZ + pagemask) & ~pagemask; - - /* don't need to remap if still within same page */ - if (oldsize == newsize - offset) - return oldmem; - - /* OMPI change - take pessimistic approach and assume going to move */ - opal_mem_hooks_release_hook((char *)oldp - offset, oldsize + offset, 1); - cp = (char*)mremap((char*)oldp - offset, oldsize + offset, newsize, 1); - - if (cp != MAP_FAILED) { - - newp = (mchunkptr)(cp + offset); - set_head(newp, (newsize - offset)|IS_MMAPPED); - - assert(aligned_OK(chunk2mem(newp))); - assert((newp->prev_size == offset)); - - /* update statistics */ - sum = mp_.mmapped_mem += newsize - oldsize; - if (sum > (unsigned long)(mp_.max_mmapped_mem)) - mp_.max_mmapped_mem = sum; -#ifdef NO_THREADS - sum += main_arena.system_mem; - if (sum > (unsigned long)(mp_.max_total_mem)) - mp_.max_total_mem = sum; -#endif - - return chunk2mem(newp); - } -#endif - - /* Note the extra SIZE_SZ overhead. */ - if ((unsigned long)(oldsize) >= (unsigned long)(nb + SIZE_SZ)) - newmem = oldmem; /* do nothing */ - else { - /* Must alloc, copy, free. */ - newmem = _int_malloc(av, nb - MALLOC_ALIGN_MASK); - if (newmem != 0) { - MALLOC_COPY(newmem, oldmem, oldsize - 2*SIZE_SZ); - _int_free(av, oldmem); - } - } - return newmem; - -#else - /* If !HAVE_MMAP, but chunk_is_mmapped, user must have overwritten mem */ - check_malloc_state(av); - MALLOC_FAILURE_ACTION; - return 0; -#endif - } -} - -/* - ------------------------------ memalign ------------------------------ -*/ - -Void_t* -_int_memalign(mstate av, size_t alignment, size_t bytes) -{ - INTERNAL_SIZE_T nb; /* padded request size */ - char* m; /* memory returned by malloc call */ - mchunkptr p; /* corresponding chunk */ - char* brk; /* alignment point within p */ - mchunkptr newp; /* chunk to return */ - INTERNAL_SIZE_T newsize; /* its size */ - INTERNAL_SIZE_T leadsize; /* leading space before alignment point */ - mchunkptr remainder; /* spare room at end to split off */ - unsigned long remainder_size; /* its size */ - INTERNAL_SIZE_T size; - - /* If need less alignment than we give anyway, just relay to malloc */ - - if (alignment <= MALLOC_ALIGNMENT) return _int_malloc(av, bytes); - - /* Otherwise, ensure that it is at least a minimum chunk size */ - - if (alignment < MINSIZE) alignment = MINSIZE; - - /* Make sure alignment is power of 2 (in case MINSIZE is not). */ - if ((alignment & (alignment - 1)) != 0) { - size_t a = MALLOC_ALIGNMENT * 2; - while ((unsigned long)a < (unsigned long)alignment) a <<= 1; - alignment = a; - } - - checked_request2size(bytes, nb); - - /* - Strategy: find a spot within that chunk that meets the alignment - request, and then possibly free the leading and trailing space. - */ - - - /* Call malloc with worst case padding to hit alignment. */ - - m = (char*)(_int_malloc(av, nb + alignment + MINSIZE)); - - if (m == 0) return 0; /* propagate failure */ - - p = mem2chunk(m); - - if ((((unsigned long)(m)) % alignment) != 0) { /* misaligned */ - - /* - Find an aligned spot inside chunk. Since we need to give back - leading space in a chunk of at least MINSIZE, if the first - calculation places us at a spot with less than MINSIZE leader, - we can move to the next aligned spot -- we've allocated enough - total room so that this is always possible. - */ - - brk = (char*)mem2chunk(((unsigned long)(m + alignment - 1)) & - -((signed long) alignment)); - if ((unsigned long)(brk - (char*)(p)) < MINSIZE) - brk += alignment; - - newp = (mchunkptr)brk; - leadsize = brk - (char*)(p); - newsize = chunksize(p) - leadsize; - - /* For mmapped chunks, just adjust offset */ - if (chunk_is_mmapped(p)) { - newp->prev_size = p->prev_size + leadsize; - set_head(newp, newsize|IS_MMAPPED); - return chunk2mem(newp); - } - - /* Otherwise, give back leader, use the rest */ - set_head(newp, newsize | PREV_INUSE | - (av != &main_arena ? NON_MAIN_ARENA : 0)); - set_inuse_bit_at_offset(newp, newsize); - set_head_size(p, leadsize | (av != &main_arena ? NON_MAIN_ARENA : 0)); - _int_free(av, chunk2mem(p)); - p = newp; - - assert (newsize >= nb && - (((unsigned long)(chunk2mem(p))) % alignment) == 0); - } - - /* Also give back spare room at the end */ - if (!chunk_is_mmapped(p)) { - size = chunksize(p); - if ((unsigned long)(size) > (unsigned long)(nb + MINSIZE)) { - remainder_size = size - nb; - remainder = chunk_at_offset(p, nb); - set_head(remainder, remainder_size | PREV_INUSE | - (av != &main_arena ? NON_MAIN_ARENA : 0)); - set_head_size(p, nb); - _int_free(av, chunk2mem(remainder)); - } - } - - check_inuse_chunk(av, p); - return chunk2mem(p); -} - -#if 0 -/* - ------------------------------ calloc ------------------------------ -*/ - -#if __STD_C -Void_t* cALLOc(size_t n_elements, size_t elem_size) -#else -Void_t* cALLOc(n_elements, elem_size) size_t n_elements; size_t elem_size; -#endif -{ - mchunkptr p; - unsigned long clearsize; - unsigned long nclears; - INTERNAL_SIZE_T* d; - - Void_t* mem = mALLOc(n_elements * elem_size); - - if (mem != 0) { - p = mem2chunk(mem); - -#if MMAP_CLEARS - if (!chunk_is_mmapped(p)) /* don't need to clear mmapped space */ -#endif - { - /* - Unroll clear of <= 36 bytes (72 if 8byte sizes) - We know that contents have an odd number of - INTERNAL_SIZE_T-sized words; minimally 3. - */ - - d = (INTERNAL_SIZE_T*)mem; - clearsize = chunksize(p) - SIZE_SZ; - nclears = clearsize / sizeof(INTERNAL_SIZE_T); - assert(nclears >= 3); - - if (nclears > 9) - MALLOC_ZERO(d, clearsize); - - else { - *(d+0) = 0; - *(d+1) = 0; - *(d+2) = 0; - if (nclears > 4) { - *(d+3) = 0; - *(d+4) = 0; - if (nclears > 6) { - *(d+5) = 0; - *(d+6) = 0; - if (nclears > 8) { - *(d+7) = 0; - *(d+8) = 0; - } - } - } - } - } - } - return mem; -} -#endif /* 0 */ - -/* - ------------------------- independent_calloc ------------------------- -*/ - -Void_t** -#if __STD_C -_int_icalloc(mstate av, size_t n_elements, size_t elem_size, Void_t* chunks[]) -#else -_int_icalloc(av, n_elements, elem_size, chunks) -mstate av; size_t n_elements; size_t elem_size; Void_t* chunks[]; -#endif -{ - size_t sz[1] = {elem_size}; - /* opts arg of 3 means all elements are same size, and should be cleared */ - return iALLOc(av, n_elements, sz, 3, chunks); -} - -/* - ------------------------- independent_comalloc ------------------------- -*/ - -Void_t** -#if __STD_C -_int_icomalloc(mstate av, size_t n_elements, size_t sizes[], Void_t* chunks[]) -#else -_int_icomalloc(av, n_elements, sizes, chunks) -mstate av; size_t n_elements; size_t sizes[]; Void_t* chunks[]; -#endif -{ - return iALLOc(av, n_elements, sizes, 0, chunks); -} - - -/* - ------------------------------ ialloc ------------------------------ - ialloc provides common support for independent_X routines, handling all of - the combinations that can result. - - The opts arg has: - bit 0 set if all elements are same size (using sizes[0]) - bit 1 set if elements should be zeroed -*/ - - -static Void_t** -#if __STD_C -iALLOc(mstate av, size_t n_elements, size_t* sizes, int opts, Void_t* chunks[]) -#else -iALLOc(av, n_elements, sizes, opts, chunks) -mstate av; size_t n_elements; size_t* sizes; int opts; Void_t* chunks[]; -#endif -{ - INTERNAL_SIZE_T element_size; /* chunksize of each element, if all same */ - INTERNAL_SIZE_T contents_size; /* total size of elements */ - INTERNAL_SIZE_T array_size; /* request size of pointer array */ - Void_t* mem; /* malloced aggregate space */ - mchunkptr p; /* corresponding chunk */ - INTERNAL_SIZE_T remainder_size; /* remaining bytes while splitting */ - Void_t** marray; /* either "chunks" or malloced ptr array */ - mchunkptr array_chunk; /* chunk for malloced ptr array */ - int mmx; /* to disable mmap */ - INTERNAL_SIZE_T size; - INTERNAL_SIZE_T size_flags; - size_t i; - - /* Ensure initialization/consolidation */ - if (have_fastchunks(av)) malloc_consolidate(av); - - /* compute array length, if needed */ - if (chunks != 0) { - if (n_elements == 0) - return chunks; /* nothing to do */ - marray = chunks; - array_size = 0; - } - else { - /* if empty req, must still return chunk representing empty array */ - if (n_elements == 0) - return (Void_t**) _int_malloc(av, 0); - marray = 0; - array_size = request2size(n_elements * (sizeof(Void_t*))); - } - - /* compute total element size */ - if (opts & 0x1) { /* all-same-size */ - element_size = request2size(*sizes); - contents_size = n_elements * element_size; - } - else { /* add up all the sizes */ - element_size = 0; - contents_size = 0; - for (i = 0; i != n_elements; ++i) - contents_size += request2size(sizes[i]); - } - - /* subtract out alignment bytes from total to minimize overallocation */ - size = contents_size + array_size - MALLOC_ALIGN_MASK; - - /* - Allocate the aggregate chunk. - But first disable mmap so malloc won't use it, since - we would not be able to later free/realloc space internal - to a segregated mmap region. - */ - mmx = mp_.n_mmaps_max; /* disable mmap */ - mp_.n_mmaps_max = 0; - mem = _int_malloc(av, size); - mp_.n_mmaps_max = mmx; /* reset mmap */ - if (mem == 0) - return 0; - - p = mem2chunk(mem); - assert(!chunk_is_mmapped(p)); - remainder_size = chunksize(p); - - if (opts & 0x2) { /* optionally clear the elements */ - MALLOC_ZERO(mem, remainder_size - SIZE_SZ - array_size); - } - - size_flags = PREV_INUSE | (av != &main_arena ? NON_MAIN_ARENA : 0); - - /* If not provided, allocate the pointer array as final part of chunk */ - if (marray == 0) { - array_chunk = chunk_at_offset(p, contents_size); - marray = (Void_t**) (chunk2mem(array_chunk)); - set_head(array_chunk, (remainder_size - contents_size) | size_flags); - remainder_size = contents_size; - } - - /* split out elements */ - for (i = 0; ; ++i) { - marray[i] = chunk2mem(p); - if (i != n_elements-1) { - if (element_size != 0) - size = element_size; - else - size = request2size(sizes[i]); - remainder_size -= size; - set_head(p, size | size_flags); - p = chunk_at_offset(p, size); - } - else { /* the final element absorbs any overallocation slop */ - set_head(p, remainder_size | size_flags); - break; - } - } - -#if MALLOC_DEBUG - if (marray != chunks) { - /* final element must have exactly exhausted chunk */ - if (element_size != 0) - assert(remainder_size == element_size); - else - assert(remainder_size == request2size(sizes[i])); - check_inuse_chunk(av, mem2chunk(marray)); - } - - for (i = 0; i != n_elements; ++i) - check_inuse_chunk(av, mem2chunk(marray[i])); -#endif - - return marray; -} - - -/* - ------------------------------ valloc ------------------------------ -*/ - -Void_t* -#if __STD_C -_int_valloc(mstate av, size_t bytes) -#else -_int_valloc(av, bytes) mstate av; size_t bytes; -#endif -{ - /* Ensure initialization/consolidation */ - if (have_fastchunks(av)) malloc_consolidate(av); - return _int_memalign(av, mp_.pagesize, bytes); -} - -/* - ------------------------------ pvalloc ------------------------------ -*/ - - -Void_t* -#if __STD_C -_int_pvalloc(mstate av, size_t bytes) -#else -_int_pvalloc(av, bytes) mstate av, size_t bytes; -#endif -{ - size_t pagesz; - - /* Ensure initialization/consolidation */ - if (have_fastchunks(av)) malloc_consolidate(av); - pagesz = mp_.pagesize; - return _int_memalign(av, pagesz, (bytes + pagesz - 1) & ~(pagesz - 1)); -} - - -/* - ------------------------------ malloc_trim ------------------------------ -*/ - -#if __STD_C -int mTRIm(size_t pad) -#else -int mTRIm(pad) size_t pad; -#endif -{ - mstate av = &main_arena; /* already locked */ - - /* Ensure initialization/consolidation */ - malloc_consolidate(av); - -#ifndef MORECORE_CANNOT_TRIM - return sYSTRIm(pad, av); -#else - return 0; -#endif -} - - -/* - ------------------------- malloc_usable_size ------------------------- -*/ - -#if __STD_C -size_t mUSABLe(Void_t* mem) -#else -size_t mUSABLe(mem) Void_t* mem; -#endif -{ - mchunkptr p; - if (mem != 0) { - p = mem2chunk(mem); - if (chunk_is_mmapped(p)) - return chunksize(p) - 2*SIZE_SZ; - else if (inuse(p)) - return chunksize(p) - SIZE_SZ; - } - return 0; -} - -/* - ---------------------- internal mallinfo ----------------------------- -*/ - -void _int_get_arena_info(mstate av, struct malloc_arena_info *mai) -{ - size_t i; - mbinptr b; - mchunkptr p; - size_t binavail = 0; - size_t fastavail = 0; - int nbinblocks = 0; - int nfastblocks = 0; - - (void)mutex_lock(&av->mutex); - - /* Ensure initialization */ - if (av->top == 0) malloc_consolidate(av); - - check_malloc_state(av); - - /* traverse fastbins */ - for (i = 0; i < NFASTBINS; ++i) { - for (p = av->fastbins[i]; p != 0; p = p->fd) { - ++nfastblocks; - fastavail += chunksize(p); - } - } - - /* traverse regular bins */ - for (i = 1; i < NBINS; ++i) { - b = bin_at(av, i); - for (p = last(b); p != b; p = p->bk) { - ++nbinblocks; - binavail += chunksize(p); - } - } - - mai->nfastblocks = nfastblocks; - mai->nbinblocks = nbinblocks; - mai->fastavail = fastavail; - mai->binavail = binavail; - mai->top_size = chunksize(av->top); - mai->system_mem = av->system_mem; - mai->max_system_mem = av->max_system_mem; - mai->stat_lock_direct = av->stat_lock_direct; - mai->stat_lock_loop = av->stat_lock_loop; - mai->stat_lock_wait = av->stat_lock_wait; - - (void)mutex_unlock(&av->mutex); -} - -void -_int_get_global_info (struct malloc_global_info *mgi) -{ - mgi->n_mmaps = mp_.n_mmaps; - mgi->max_n_mmaps = mp_.max_n_mmaps; - mgi->mmapped_mem = mp_.mmapped_mem; - mgi->max_mmapped_mem = mp_.max_mmapped_mem; - mgi->max_total_mem = mp_.max_total_mem; -#if THREAD_STATS - mgi->stat_n_heaps = stat_n_heaps; -#else - mgi->stat_n_heaps = 0; -#endif -} - -/* - ------------------------------ malloc_stats ------------------------------ -*/ - -/* Now in separate file, malloc-stats.c. */ - -/* - ------------------------------ mallopt ------------------------------ -*/ - -#if __STD_C -int mALLOPt(int param_number, int value) -#else -int mALLOPt(param_number, value) int param_number; int value; -#endif -{ - mstate av = &main_arena; - int res = 1; - - if(__malloc_initialized < 0) - ptmalloc_init (); - (void)mutex_lock(&av->mutex); - /* Ensure initialization/consolidation */ - malloc_consolidate(av); - - switch(param_number) { - case M_MXFAST: - if (value >= 0 && value <= MAX_FAST_SIZE) { - set_max_fast(av, value); - } - else - res = 0; - break; - - case M_TRIM_THRESHOLD: - mp_.trim_threshold = value; - break; - - case M_TOP_PAD: - mp_.top_pad = value; - break; - - case M_MMAP_THRESHOLD: -#if USE_ARENAS - /* Forbid setting the threshold too high. */ - if((unsigned long)value > HEAP_MAX_SIZE/2) - res = 0; - else -#endif - mp_.mmap_threshold = value; - break; - - case M_MMAP_MAX: -#if !HAVE_MMAP - if (value != 0) - res = 0; - else -#endif - mp_.n_mmaps_max = value; - break; - - case M_CHECK_ACTION: - check_action = value; - break; - } - (void)mutex_unlock(&av->mutex); - return res; -} - - -/* - -------------------- Alternative MORECORE functions -------------------- -*/ - - -/* - General Requirements for MORECORE. - - The MORECORE function must have the following properties: - - If MORECORE_CONTIGUOUS is false: - - * MORECORE must allocate in multiples of pagesize. It will - only be called with arguments that are multiples of pagesize. - - * MORECORE(0) must return an address that is at least - MALLOC_ALIGNMENT aligned. (Page-aligning always suffices.) - - else (i.e. If MORECORE_CONTIGUOUS is true): - - * Consecutive calls to MORECORE with positive arguments - return increasing addresses, indicating that space has been - contiguously extended. - - * MORECORE need not allocate in multiples of pagesize. - Calls to MORECORE need not have args of multiples of pagesize. - - * MORECORE need not page-align. - - In either case: - - * MORECORE may allocate more memory than requested. (Or even less, - but this will generally result in a malloc failure.) - - * MORECORE must not allocate memory when given argument zero, but - instead return one past the end address of memory from previous - nonzero call. This malloc does NOT call MORECORE(0) - until at least one call with positive arguments is made, so - the initial value returned is not important. - - * Even though consecutive calls to MORECORE need not return contiguous - addresses, it must be OK for malloc'ed chunks to span multiple - regions in those cases where they do happen to be contiguous. - - * MORECORE need not handle negative arguments -- it may instead - just return MORECORE_FAILURE when given negative arguments. - Negative arguments are always multiples of pagesize. MORECORE - must not misinterpret negative args as large positive unsigned - args. You can suppress all such calls from even occurring by defining - MORECORE_CANNOT_TRIM, - - There is some variation across systems about the type of the - argument to sbrk/MORECORE. If size_t is unsigned, then it cannot - actually be size_t, because sbrk supports negative args, so it is - normally the signed type of the same width as size_t (sometimes - declared as "intptr_t", and sometimes "ptrdiff_t"). It doesn't much - matter though. Internally, we use "long" as arguments, which should - work across all reasonable possibilities. - - Additionally, if MORECORE ever returns failure for a positive - request, and HAVE_MMAP is true, then mmap is used as a noncontiguous - system allocator. This is a useful backup strategy for systems with - holes in address spaces -- in this case sbrk cannot contiguously - expand the heap, but mmap may be able to map noncontiguous space. - - If you'd like mmap to ALWAYS be used, you can define MORECORE to be - a function that always returns MORECORE_FAILURE. - - If you are using this malloc with something other than sbrk (or its - emulation) to supply memory regions, you probably want to set - MORECORE_CONTIGUOUS as false. As an example, here is a custom - allocator kindly contributed for pre-OSX macOS. It uses virtually - but not necessarily physically contiguous non-paged memory (locked - in, present and won't get swapped out). You can use it by - uncommenting this section, adding some #includes, and setting up the - appropriate defines above: - - #define MORECORE osMoreCore - #define MORECORE_CONTIGUOUS 0 - - There is also a shutdown routine that should somehow be called for - cleanup upon program exit. - - #define MAX_POOL_ENTRIES 100 - #define MINIMUM_MORECORE_SIZE (64 * 1024) - static int next_os_pool; - void *our_os_pools[MAX_POOL_ENTRIES]; - - void *osMoreCore(int size) - { - void *ptr = 0; - static void *sbrk_top = 0; - - if (size > 0) - { - if (size < MINIMUM_MORECORE_SIZE) - size = MINIMUM_MORECORE_SIZE; - if (CurrentExecutionLevel() == kTaskLevel) - ptr = PoolAllocateResident(size + RM_PAGE_SIZE, 0); - if (ptr == 0) - { - return (void *) MORECORE_FAILURE; - } - // save ptrs so they can be freed during cleanup - our_os_pools[next_os_pool] = ptr; - next_os_pool++; - ptr = (void *) ((((unsigned long) ptr) + RM_PAGE_MASK) & ~RM_PAGE_MASK); - sbrk_top = (char *) ptr + size; - return ptr; - } - else if (size < 0) - { - // we don't currently support shrink behavior - return (void *) MORECORE_FAILURE; - } - else - { - return sbrk_top; - } - } - - // cleanup any allocated memory pools - // called as last thing before shutting down driver - - void osCleanupMem(void) - { - void **ptr; - - for (ptr = our_os_pools; ptr < &our_os_pools[MAX_POOL_ENTRIES]; ptr++) - if (*ptr) - { - PoolDeallocate(*ptr); - *ptr = 0; - } - } - -*/ - -/* OMPI: Need to expose our own posix_memalign, or the wrong one will - be used */ -# include - -/* We need a wrapper function for one of the additions of POSIX. */ -int -posix_memalign (void **memptr, size_t alignment, size_t size) -{ - void *mem; - - /* OMPI change: the hook is us -- don't call the hook */ -#if 0 - __malloc_ptr_t (*hook) __MALLOC_PMT ((size_t, size_t, - __const __malloc_ptr_t)) = - __memalign_hook; -#endif - - /* Test whether the SIZE argument is valid. It must be a power of - two multiple of sizeof (void *). */ - if (alignment % sizeof (void *) != 0 - || !powerof2 (alignment / sizeof (void *)) != 0 - || alignment == 0) - return EINVAL; - - /* OMPI change: the hook is us -- don't call the hook */ -#if 0 - /* Call the hook here, so that caller is posix_memalign's caller - and not posix_memalign itself. */ - if (hook != NULL) - mem = (*hook)(alignment, size, RETURN_ADDRESS (0)); - else - mem = public_mEMALIGn (alignment, size); -#else - mem = public_mEMALIGn (alignment, size); -#endif - - if (mem != NULL) { - *memptr = mem; - return 0; - } - - return ENOMEM; -} - -#ifdef _LIBC -weak_alias (__posix_memalign, posix_memalign) - -strong_alias (__libc_calloc, __calloc) weak_alias (__libc_calloc, calloc) -strong_alias (__libc_free, __cfree) weak_alias (__libc_free, cfree) -strong_alias (__libc_free, __free) strong_alias (__libc_free, free) -strong_alias (__libc_malloc, __malloc) strong_alias (__libc_malloc, malloc) -strong_alias (__libc_memalign, __memalign) -weak_alias (__libc_memalign, memalign) -strong_alias (__libc_realloc, __realloc) strong_alias (__libc_realloc, realloc) -strong_alias (__libc_valloc, __valloc) weak_alias (__libc_valloc, valloc) -strong_alias (__libc_pvalloc, __pvalloc) weak_alias (__libc_pvalloc, pvalloc) -strong_alias (__libc_mallinfo, __mallinfo) -weak_alias (__libc_mallinfo, mallinfo) -strong_alias (__libc_mallopt, __mallopt) weak_alias (__libc_mallopt, mallopt) - -weak_alias (__malloc_usable_size, malloc_usable_size) -weak_alias (__malloc_trim, malloc_trim) -weak_alias (__malloc_get_state, malloc_get_state) -weak_alias (__malloc_set_state, malloc_set_state) - -#endif /* _LIBC */ - -/* ------------------------------------------------------------ -History: - -[see ftp://g.oswego.edu/pub/misc/malloc.c for the history of dlmalloc] - -*/ -/* - * Local variables: - * c-basic-offset: 2 - * End: - */ diff --git a/opal/mca/memory/linux/malloc.h b/opal/mca/memory/linux/malloc.h deleted file mode 100644 index bd0430e3b2f..00000000000 --- a/opal/mca/memory/linux/malloc.h +++ /dev/null @@ -1,300 +0,0 @@ -/* Prototypes and definition for malloc implementation. - Copyright (C) 1996,97,99,2000,2002,2003,2004 Free Software Foundation, Inc. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, write to the Free - Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA - 02111-1307 USA. */ - -#ifndef _MALLOC_H -#define _MALLOC_H 1 - -/* add the opal config header file for the OPAL_DECLSPEC */ -#include "opal_config.h" - - -#ifdef _LIBC -#include -#endif - -/* - $Id: malloc.h,v 1.7 2004/08/08 12:34:57 wg Exp $ - `ptmalloc2', a malloc implementation for multiple threads without - lock contention, by Wolfram Gloger . - - VERSION 2.7.0 - - This work is mainly derived from malloc-2.7.0 by Doug Lea - , which is available from: - - ftp://gee.cs.oswego.edu/pub/misc/malloc.c - - This trimmed-down header file only provides function prototypes and - the exported data structures. For more detailed function - descriptions and compile-time options, see the source file - `malloc.c'. -*/ - -#if defined(__STDC__) || defined (__cplusplus) -# include -# define __malloc_ptr_t void * -#else -# undef size_t -# define size_t unsigned int -# undef ptrdiff_t -# define ptrdiff_t int -# define __malloc_ptr_t char * -#endif - -#ifdef _LIBC -/* Used by GNU libc internals. */ -# define __malloc_size_t size_t -# define __malloc_ptrdiff_t ptrdiff_t -#elif !defined __attribute_malloc__ -# define __attribute_malloc__ -#endif - -#ifdef __GNUC__ - -/* GCC can always grok prototypes. For C++ programs we add throw() - to help it optimize the function calls. But this works only with - gcc 2.8.x and egcs. */ -# if defined __cplusplus && (__GNUC__ >= 3 || __GNUC_MINOR__ >= 8) -# define __THROW throw () -# else -#ifdef __THROW -# undef __THROW -#endif -# define __THROW -# endif -# define __MALLOC_P(args) args __THROW -/* This macro will be used for functions which might take C++ callback - functions. */ -# define __MALLOC_PMT(args) args - -#else /* Not GCC. */ - -# define __THROW - -# if (defined __STDC__ && __STDC__) || defined __cplusplus - -# define __MALLOC_P(args) args -# define __MALLOC_PMT(args) args - -# ifndef __const -# define __const const -# endif - -# else /* Not ANSI C or C++. */ - -# define __MALLOC_P(args) () /* No prototypes. */ -# define __MALLOC_PMT(args) () - -# ifndef __const -# define __const -# endif - -# endif /* ANSI C or C++. */ - -#endif /* GCC. */ - -#ifndef NULL -# ifdef __cplusplus -# define NULL 0 -# else -# define NULL ((__malloc_ptr_t) 0) -# endif -#endif - -#ifdef __cplusplus -extern "C" { -#endif - -/* Nonzero if the malloc is already initialized. */ -#ifdef _LIBC -/* In the GNU libc we rename the global variable - `__malloc_initialized' to `__libc_malloc_initialized'. */ -# define __malloc_initialized __libc_malloc_initialized -#endif -extern int __malloc_initialized; - -/* Allocate SIZE bytes of memory. */ -OPAL_DECLSPEC extern __malloc_ptr_t malloc __MALLOC_P ((size_t __size)) __attribute_malloc__; - -/* Allocate NMEMB elements of SIZE bytes each, all initialized to 0. */ -OPAL_DECLSPEC extern __malloc_ptr_t calloc __MALLOC_P ((size_t __nmemb, size_t __size)) - __attribute_malloc__; - -/* Re-allocate the previously allocated block in __ptr, making the new - block SIZE bytes long. */ -OPAL_DECLSPEC extern __malloc_ptr_t realloc __MALLOC_P ((__malloc_ptr_t __ptr, - size_t __size)) - __attribute_malloc__; - -/* Free a block allocated by `malloc', `realloc' or `calloc'. */ -OPAL_DECLSPEC extern void free __MALLOC_P ((__malloc_ptr_t __ptr)); - -/* Free a block allocated by `calloc'. */ -OPAL_DECLSPEC extern void cfree __MALLOC_P ((__malloc_ptr_t __ptr)); - -/* Allocate SIZE bytes allocated to ALIGNMENT bytes. */ -OPAL_DECLSPEC extern __malloc_ptr_t memalign __MALLOC_P ((size_t __alignment, size_t __size)); - -/* Allocate SIZE bytes on a page boundary. */ -OPAL_DECLSPEC extern __malloc_ptr_t valloc __MALLOC_P ((size_t __size)) __attribute_malloc__; - -/* Equivalent to valloc(minimum-page-that-holds(n)), that is, round up - __size to nearest pagesize. */ -OPAL_DECLSPEC extern __malloc_ptr_t pvalloc __MALLOC_P ((size_t __size)) - __attribute_malloc__; - -/* Underlying allocation function; successive calls should return - contiguous pieces of memory. */ -OPAL_DECLSPEC extern __malloc_ptr_t (*__morecore) __MALLOC_PMT ((ptrdiff_t __size)); - -/* Default value of `__morecore'. */ -OPAL_DECLSPEC extern __malloc_ptr_t __default_morecore __MALLOC_P ((ptrdiff_t __size)) - __attribute_malloc__; - -/* SVID2/XPG mallinfo structure */ - -struct mallinfo { - int arena; /* non-mmapped space allocated from system */ - int ordblks; /* number of free chunks */ - int smblks; /* number of fastbin blocks */ - int hblks; /* number of mmapped regions */ - int hblkhd; /* space in mmapped regions */ - int usmblks; /* maximum total allocated space */ - int fsmblks; /* space available in freed fastbin blocks */ - int uordblks; /* total allocated space */ - int fordblks; /* total free space */ - int keepcost; /* top-most, releasable (via malloc_trim) space */ -}; - -/* Returns a copy of the updated current mallinfo. */ -OPAL_DECLSPEC extern struct mallinfo mallinfo __MALLOC_P ((void)); - -/* SVID2/XPG mallopt options */ -#ifndef M_MXFAST -# define M_MXFAST 1 /* maximum request size for "fastbins" */ -#endif -#ifndef M_NLBLKS -# define M_NLBLKS 2 /* UNUSED in this malloc */ -#endif -#ifndef M_GRAIN -# define M_GRAIN 3 /* UNUSED in this malloc */ -#endif -#ifndef M_KEEP -# define M_KEEP 4 /* UNUSED in this malloc */ -#endif - -/* mallopt options that actually do something */ -#define M_TRIM_THRESHOLD -1 -#define M_TOP_PAD -2 -#define M_MMAP_THRESHOLD -3 -#define M_MMAP_MAX -4 -#define M_CHECK_ACTION -5 - -/* General SVID/XPG interface to tunable parameters. */ -OPAL_DECLSPEC extern int mallopt __MALLOC_P ((int __param, int __val)); - -/* Release all but __pad bytes of freed top-most memory back to the - system. Return 1 if successful, else 0. */ -OPAL_DECLSPEC extern int malloc_trim __MALLOC_P ((size_t __pad)); - -/* Report the number of usable allocated bytes associated with allocated - chunk __ptr. */ -OPAL_DECLSPEC extern size_t malloc_usable_size __MALLOC_P ((__malloc_ptr_t __ptr)); - -/* Prints brief summary statistics on stderr. */ -OPAL_DECLSPEC extern void malloc_stats __MALLOC_P ((void)); - -/* Record the state of all malloc variables in an opaque data structure. */ -OPAL_DECLSPEC extern __malloc_ptr_t malloc_get_state __MALLOC_P ((void)); - -/* Restore the state of all malloc variables from data obtained with - malloc_get_state(). */ -OPAL_DECLSPEC extern int malloc_set_state __MALLOC_P ((__malloc_ptr_t __ptr)); - -/* Called once when malloc is initialized; redefining this variable in - the application provides the preferred way to set up the hook - pointers. */ -OPAL_DECLSPEC extern void (*__malloc_initialize_hook) __MALLOC_PMT ((void)); -/* Hooks for debugging and user-defined versions. */ -OPAL_DECLSPEC extern void (*__free_hook) __MALLOC_PMT ((__malloc_ptr_t __ptr, - __const __malloc_ptr_t)); -OPAL_DECLSPEC extern __malloc_ptr_t (*__malloc_hook) __MALLOC_PMT ((size_t __size, - __const __malloc_ptr_t)); -OPAL_DECLSPEC extern __malloc_ptr_t (*__realloc_hook) __MALLOC_PMT ((__malloc_ptr_t __ptr, - size_t __size, - __const __malloc_ptr_t)); -OPAL_DECLSPEC extern __malloc_ptr_t (*__memalign_hook) __MALLOC_PMT ((size_t __alignment, - size_t __size, - __const __malloc_ptr_t)); -OPAL_DECLSPEC extern void (*__after_morecore_hook) __MALLOC_PMT ((void)); - -/* Activate a standard set of debugging hooks. */ -OPAL_DECLSPEC extern void __malloc_check_init __MALLOC_P ((void)); - -/* Internal routines, operating on "arenas". */ -struct malloc_state; -typedef struct malloc_state *mstate; - -OPAL_DECLSPEC extern mstate _int_new_arena __MALLOC_P ((size_t __ini_size)); -OPAL_DECLSPEC extern __malloc_ptr_t _int_malloc __MALLOC_P ((mstate __m, size_t __size)); -OPAL_DECLSPEC extern void _int_free __MALLOC_P ((mstate __m, __malloc_ptr_t __ptr)); -OPAL_DECLSPEC extern __malloc_ptr_t _int_realloc __MALLOC_P ((mstate __m, - __malloc_ptr_t __ptr, - size_t __size)); -OPAL_DECLSPEC extern __malloc_ptr_t _int_memalign __MALLOC_P ((mstate __m, size_t __alignment, - size_t __size)); -/* Return arena number __n, or 0 if out of bounds. Arena 0 is the - main arena. */ -OPAL_DECLSPEC extern mstate _int_get_arena __MALLOC_P ((int __n)); - -/* Implementation-specific mallinfo. More detailed than mallinfo, and - also works for size_t wider than int. */ -struct malloc_arena_info { - int nfastblocks; /* number of freed "fastchunks" */ - int nbinblocks; /* number of available chunks in bins */ - size_t fastavail; /* total space in freed "fastchunks" */ - size_t binavail; /* total space in binned chunks */ - size_t top_size; /* size of top chunk */ - size_t system_mem; /* bytes allocated from system in this arena */ - size_t max_system_mem; /* max. bytes allocated from system */ - /* Statistics for locking. Only kept if THREAD_STATS is defined - at compile time. */ - long stat_lock_direct, stat_lock_loop, stat_lock_wait; -}; - -struct malloc_global_info { - int n_mmaps; /* number of mmap'ed chunks */ - int max_n_mmaps; /* max. number of mmap'ed chunks reached */ - size_t mmapped_mem; /* total bytes allocated in mmap'ed chunks */ - size_t max_mmapped_mem; /* max. bytes allocated in mmap'ed chunks */ - size_t max_total_mem; /* only kept for NO_THREADS */ - int stat_n_heaps; /* only kept if THREAD_STATS is defined */ -}; - -OPAL_DECLSPEC extern void _int_get_arena_info __MALLOC_P ((mstate __m, - struct malloc_arena_info *__ma)); -OPAL_DECLSPEC extern void _int_get_global_info __MALLOC_P ((struct malloc_global_info *__m)); - -OPAL_DECLSPEC extern int posix_memalign (void **memptr, size_t alignment, size_t size); - -#ifdef __cplusplus -} /* end of extern "C" */ -#endif - -#endif /* malloc.h */ diff --git a/opal/mca/memory/linux/memory_linux.h b/opal/mca/memory/linux/memory_linux.h deleted file mode 100644 index fdf5312398e..00000000000 --- a/opal/mca/memory/linux/memory_linux.h +++ /dev/null @@ -1,68 +0,0 @@ -/* - * Copyright (c) 2010 Cisco Systems, Inc. All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#ifndef OPAL_MEMORY_LINUX_H -#define OPAL_MEMORY_LINUX_H - -#include "opal_config.h" - -#include "opal/mca/memory/memory.h" - -BEGIN_C_DECLS - -/* Component structure */ - -typedef struct opal_memory_linux_component_t { - opal_memory_base_component_2_0_0_t super; - - /* Component data */ - int verbose_level; - int enable_ummunotify; - int enable_ptmalloc2; - -#if MEMORY_LINUX_UMMUNOTIFY - /* Ummunotify-specific data */ - int ummunotify_fd; -#endif - -#if MEMORY_LINUX_PTMALLOC2 - /* Ptmalloc2-specific data */ - bool free_invoked; - bool malloc_invoked; - bool realloc_invoked; - bool memalign_invoked; - bool munmap_invoked; -#endif -} opal_memory_linux_component_t; - -/* memory_linux_component.c */ - -extern opal_memory_linux_component_t mca_memory_linux_component; - - -#if MEMORY_LINUX_UMMUNOTIFY -/* memory_linux_ummunotify.c */ -int opal_memory_linux_ummunotify_open(void); -int opal_memory_linux_ummunotify_close(void); -#endif /* MEMORY_LINUX_UMMUNOTIFY */ - -#if MEMORY_LINUX_PTMALLOC2 -/* memory_linux_ptmalloc2.c */ -int opal_memory_linux_ptmalloc2_open(void); -int opal_memory_linux_ptmalloc2_close(void); -OPAL_DECLSPEC void opal_memory_linux_malloc_init_hook(void); - -/* memory_linux_munmap.c */ -OPAL_DECLSPEC int opal_memory_linux_free_ptmalloc2_munmap(void *start, size_t length, int from_alloc); -OPAL_DECLSPEC int munmap(void* addr, size_t len); -#endif /* !MEMORY_LINUX_PTMALLOC2 */ - -END_C_DECLS - -#endif diff --git a/opal/mca/memory/linux/memory_linux_component.c b/opal/mca/memory/linux/memory_linux_component.c deleted file mode 100644 index 03de3335442..00000000000 --- a/opal/mca/memory/linux/memory_linux_component.c +++ /dev/null @@ -1,251 +0,0 @@ -/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ -/* - * Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2005 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * Copyright (c) 2009-2014 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2013-2015 Los Alamos National Security, LLC. All rights - * reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -/* This component basically fronts two different memory management - schemes: the Linux "ummunotify" kernel module and hooking in a - substitute ptmalloc2 allocator. Both of these mechanisms are - unified under a single component because the "memory" framework - both only allows one component to be selected, and that one - component must be compile-time linked into libopen-pal. Hence, if - we want to try to use either one of these mechanisms, we have to - have them both in a single component. - - When using ptmalloc2, the goal of this component is to wholly - replace the underlying allocator with our internal ptmalloc2 - allocator. See the file README-open-mpi.txt for details of how it - works. - - When using ummunotify, we can probe to find out when the MMU map - has been changed (i.e., memory has been released back to the OS). */ - -#include "opal_config.h" - -#include "opal/constants.h" -#include "opal/mca/base/mca_base_var.h" -#include "opal/mca/memory/memory.h" -#include "opal/mca/memory/base/empty.h" -#include "opal/memoryhooks/memory.h" -#include "opal/util/output.h" - -#include "opal/mca/memory/linux/memory_linux.h" -#undef opal_memory_changed -#include "opal/mca/memory/linux/public.h" - -static int linux_open(void); -static int linux_close(void); -static int linux_register(void); - -#if MEMORY_LINUX_UMMUNOTIFY -static bool ummunotify_opened = false; -#endif -#if MEMORY_LINUX_PTMALLOC2 -static bool ptmalloc2_opened = false; -#endif - -bool opal_memory_linux_disable = false; - -opal_memory_linux_component_t mca_memory_linux_component = { - /* First, the opal_memory_base_component_2_0_0_t */ - { - /* First, the mca_component_t struct containing meta - information about the component itself */ - .memoryc_version = { - OPAL_MEMORY_BASE_VERSION_2_0_0, - - /* Component name and version */ - .mca_component_name = "linux", - MCA_BASE_MAKE_VERSION(component, OPAL_MAJOR_VERSION, OPAL_MINOR_VERSION, - OPAL_RELEASE_VERSION), - - /* Component open and close functions */ - .mca_open_component = linux_open, - .mca_close_component = linux_close, - .mca_register_component_params = linux_register, - }, - .memoryc_data = { - /* The component is checkpoint ready */ - MCA_BASE_METADATA_PARAM_CHECKPOINT - }, - - /* Memory framework functions. These function pointer values - are replaced by memory_linux_ummunotify.c at run time if we - end up using ummunotify support. */ - .memoryc_register = opal_memory_base_component_register_empty, - .memoryc_deregister = opal_memory_base_component_deregister_empty, - }, - - /* Component-specific data, filled in later (compiler will 0/NULL - it out) */ -}; - -static bool ptmalloc2_available = MEMORY_LINUX_PTMALLOC2; -static bool ummunotify_available = MEMORY_LINUX_UMMUNOTIFY; - -/* - * Register MCA params - */ -static int linux_register(void) -{ - int ret; - /* Information only */ - ret = mca_base_component_var_register (&mca_memory_linux_component.super.memoryc_version, - "ptmalloc2_available", - "Whether ptmalloc2 support is included in Open MPI or not (1 = yes, 0 = no)", - MCA_BASE_VAR_TYPE_BOOL, NULL, 0, 0, - OPAL_INFO_LVL_3, - MCA_BASE_VAR_SCOPE_CONSTANT, - &ptmalloc2_available); - if (0 > ret) { - return ret; - } - - ret = mca_base_component_var_register (&mca_memory_linux_component.super.memoryc_version, - "ummunotify_available", - "Whether ummunotify support is included in Open MPI or not (1 = yes, 0 = no)", - MCA_BASE_VAR_TYPE_BOOL, NULL, 0, 0, - OPAL_INFO_LVL_3, - MCA_BASE_VAR_SCOPE_CONSTANT, - &ummunotify_available); - if (0 > ret) { - return ret; - } - - /* Allow user to manually enable/disable */ - mca_memory_linux_component.enable_ptmalloc2 = -1; - ret = mca_base_component_var_register (&mca_memory_linux_component.super.memoryc_version, - "ptmalloc2_enable", - "Whether to enable ptmalloc2 support or not (negative = try to enable, but continue even if support is not available, 0 = do not enable support, positive = try to enable and fail if support is not available)", - MCA_BASE_VAR_TYPE_INT, NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE, - OPAL_INFO_LVL_3, - MCA_BASE_VAR_SCOPE_ALL_EQ, - &mca_memory_linux_component.enable_ptmalloc2); - if (0 > ret) { - return ret; - } - - mca_memory_linux_component.enable_ummunotify = -1; - ret = mca_base_component_var_register (&mca_memory_linux_component.super.memoryc_version, - "ummunotify_enable", - "Whether to enable ummunotify support or not (negative = try to enable, but continue even if support is not available, 0 = do not enable support, positive = try to enable and fail if support is not available)", - MCA_BASE_VAR_TYPE_INT, NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE, - OPAL_INFO_LVL_3, - MCA_BASE_VAR_SCOPE_ALL_EQ, - &mca_memory_linux_component.enable_ummunotify); - if (0 > ret) { - return ret; - } - - opal_memory_linux_disable = false; - (void) mca_base_component_var_register (&mca_memory_linux_component.super.memoryc_version, - "disable", - "If this MCA parameter is set to 1 **VIA ENVIRONMENT VARIABLE ONLY*** (this MCA parameter *CANNOT* be set in a file or on the mpirun command line!), this component will be disabled and will not attempt to use either ummunotify or memory hook support", - MCA_BASE_VAR_TYPE_BOOL, NULL, 0, MCA_BASE_VAR_FLAG_ENVIRONMENT_ONLY, - OPAL_INFO_LVL_3, - MCA_BASE_VAR_SCOPE_READONLY, - &opal_memory_linux_disable); - - return (0 > ret) ? ret : OPAL_SUCCESS; -} - - -static int linux_open(void) -{ - const int *verbose = NULL; - int i; - - i = mca_base_var_find("opal", "memory", NULL, "base_verbose"); - mca_base_var_get_value(i, &verbose, NULL, NULL); - mca_memory_linux_component.verbose_level = verbose ? verbose[0] : 0; - - /* Try initializing ummunotify first; if that fails, try - ptmalloc2. */ -#if MEMORY_LINUX_UMMUNOTIFY - if (mca_memory_linux_component.enable_ummunotify) { - if (mca_memory_linux_component.verbose_level >= 10) { - opal_output(0, "memory:linux: attempting to initialize ummunotify support"); - } - if (OPAL_SUCCESS == opal_memory_linux_ummunotify_open()) { - ummunotify_opened = true; - if (mca_memory_linux_component.verbose_level >= 10) { - opal_output(0, "memory:linux: ummunotify successfully initialized; we'll use that"); - } - return OPAL_SUCCESS; - } - if (mca_memory_linux_component.verbose_level >= 10) { - opal_output(0, "memory:linux: ummunotify failed to initialize"); - } - } -#endif - -#if MEMORY_LINUX_PTMALLOC2 - if (mca_memory_linux_component.enable_ptmalloc2) { - if (mca_memory_linux_component.verbose_level >= 10) { - opal_output(0, "memory:linux: attempting to initialize ptmalloc2 support"); - } - if (OPAL_SUCCESS == opal_memory_linux_ptmalloc2_open()) { - ptmalloc2_opened = true; - if (mca_memory_linux_component.verbose_level >= 10) { - opal_output(0, "memory:linux: ptmalloc2 successfully initialized; we'll use that"); - } - return OPAL_SUCCESS; - } - if (mca_memory_linux_component.verbose_level >= 10) { - opal_output(0, "memory:linux: ptmalloc2 failed to initialize"); - } - } -#endif - - /* We can return OPAL_ERR_NOT_AVAILABLE if nothing is - available; that will make the MCA base silently disregard this - component. */ - - if (mca_memory_linux_component.verbose_level >= 10) { - opal_output(0, "memory:linux: no memory hooks available in this process"); - } - return OPAL_ERR_NOT_AVAILABLE; -} - -static int linux_close(void) -{ - int v = mca_memory_linux_component.verbose_level; - -#if MEMORY_LINUX_UMMUNOTIFY - if (ummunotify_opened) { - if (v >= 10) { - opal_output(0, "memory:linux: shutting down ummunotify support"); - } - opal_memory_linux_ummunotify_close(); - ummunotify_opened = false; - } -#endif -#if MEMORY_LINUX_PTMALLOC2 - if (ptmalloc2_opened) { - if (v >= 10) { - opal_output(0, "memory:linux: shutting down ptmalloc2 support"); - } - opal_memory_linux_ptmalloc2_close(); - ptmalloc2_opened = false; - } -#endif - - return OPAL_SUCCESS; -} diff --git a/opal/mca/memory/linux/memory_linux_munmap.c b/opal/mca/memory/linux/memory_linux_munmap.c deleted file mode 100644 index 118cce60102..00000000000 --- a/opal/mca/memory/linux/memory_linux_munmap.c +++ /dev/null @@ -1,92 +0,0 @@ -/* - * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2005 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * Copyright (c) 2010 Cisco Systems, Inc. All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#include "opal_config.h" - -#include -#include -#include -#if defined(HAVE___MUNMAP) -/* here so we only include others if we absolutely have to */ -#elif defined(HAVE_SYSCALL) -#include -#include -#endif -#if defined(HAVE_DLSYM) -#ifndef __USE_GNU -#define __USE_GNU -#endif -#include -#endif - -#include "opal/memoryhooks/memory_internal.h" - -#include "memory_linux.h" - -/* - * munmap is always intercepted - */ -#if defined(HAVE___MUNMAP) -int __munmap(void* addr, size_t len); -#endif - - -/* intercept munmap, as the user can give back memory that way as well. */ -OPAL_DECLSPEC int munmap(void* addr, size_t len) -{ - return opal_memory_linux_free_ptmalloc2_munmap(addr, len, 0); -} - - -/* three ways to call munmap. Prefered is to just call syscall, so - that we can intercept both munmap and __munmap. If that isn't - possible, try calling __munmap from munmap and let __munmap go. If - that doesn't work, try dlsym */ -int opal_memory_linux_free_ptmalloc2_munmap(void *start, size_t length, - int from_alloc) -{ -#if !defined(HAVE___MUNMAP) && \ - !(defined(HAVE_SYSCALL) && defined(__NR_munmap)) && defined(HAVE_DLSYM) - static int (*realmunmap)(void*, size_t); -#endif - - mca_memory_linux_component.munmap_invoked = true; - - opal_mem_hooks_release_hook(start, length, from_alloc); - -#if defined(HAVE___MUNMAP) - return __munmap(start, length); -#elif defined(HAVE_SYSCALL) && defined(__NR_munmap) - return syscall(__NR_munmap, start, length); -#elif defined(HAVE_DLSYM) - if (NULL == realmunmap) { - union { - int (*munmap_fp)(void*, size_t); - void *munmap_p; - } tmp; - - tmp.munmap_p = dlsym(RTLD_NEXT, "munmap"); - realmunmap = tmp.munmap_fp; - } - - return realmunmap(start, length); -#else - #error "Can not determine how to call munmap" -#endif -} diff --git a/opal/mca/memory/linux/memory_linux_ptmalloc2.c b/opal/mca/memory/linux/memory_linux_ptmalloc2.c deleted file mode 100644 index 3350e056f62..00000000000 --- a/opal/mca/memory/linux/memory_linux_ptmalloc2.c +++ /dev/null @@ -1,136 +0,0 @@ -/* - * Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2005 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * Copyright (c) 2009-2010 Cisco Systems, Inc. All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#include "opal_config.h" - -#include -#include -#include - -#include "opal/constants.h" -#include "opal/util/output.h" -#include "opal/memoryhooks/memory.h" -#include "opal/memoryhooks/memory_internal.h" - -#include "opal/mca/memory/linux/memory_linux.h" - - -/* Need to call a function in hooks.c to ensure that all those symbols - get pulled in at link time (e.g., when building libmpi.a, so that - those symbols end up in the final executable -- especially if we - use --disable-dlopen and therefore -Wl,--export-dynamic isn't used - when we build OMPI). */ -extern void opal_memory_linux_hook_pull(bool *want_hooks); - - -/* - * Try to initialize ptmalloc2 - */ -int opal_memory_linux_ptmalloc2_open(void) -{ - int val = 0; - void *p; - bool want_hooks = true; - - /* Call a [somewhat] dummy function in hooks.c. ***Do not remove - this call!*** See comment at the beginning of this file - explaining why it is here. It will also check to see if an - environment variable has been set to disable this component - (note that OPAL_ERR_NOT_AVAILABLE is a special return value - that will silently fail the open component call; all others - will issue an error). */ - opal_memory_linux_hook_pull(&want_hooks); - if (!want_hooks) { - return OPAL_ERR_NOT_AVAILABLE; - } - - /* We will also provide malloc/free support if we've been - activated. We don't exclusively rely on the - __malloc_initialize_hook() previously being called because it's - possible that our hook was called, but then someone else reset - the hooks to point to something else (i.e., before MPI_INIT). - So explicitly test here if our hooks are still in place. If - they are, then enable FREE|CHUNK_SUPPORT. If not, then don't - enable that support -- just leave it at MUNMAP_SUPPORT. - - (Look in hooks.c for the __malloc_initialize_hook setup) */ - - /* Do a simple set of tests to see if our hooks are still the ones - installed. Explicitly reset the flags indicating that our - functions were invoked */ - p = malloc(1024 * 1024 * 4); - if (NULL == p) { - return OPAL_ERR_OUT_OF_RESOURCE; - } - p = realloc(p, 1024 * 1024 * 4 + 32); - if (NULL == p) { - return OPAL_ERR_OUT_OF_RESOURCE; - } - free(p); - p = memalign(4, 1024 * 1024); - if (NULL == p) { - return OPAL_ERR_OUT_OF_RESOURCE; - } - free(p); - -#if HAVE_POSIX_MEMALIGN - /* Double check for posix_memalign, too */ - if (mca_memory_linux_component.memalign_invoked) { - mca_memory_linux_component.memalign_invoked = false; - if (0 != posix_memalign(&p, sizeof(void*), 1024 * 1024)) { - return OPAL_ERR_IN_ERRNO; - } - free(p); - } -#endif - - if (mca_memory_linux_component.malloc_invoked && - mca_memory_linux_component.realloc_invoked && - mca_memory_linux_component.memalign_invoked && - mca_memory_linux_component.free_invoked) { - /* Happiness; our functions were invoked */ - val |= OPAL_MEMORY_FREE_SUPPORT | OPAL_MEMORY_CHUNK_SUPPORT; - } - - /* Check if our mmap layering is working */ - p = mmap(NULL, 4096, PROT_READ, (MAP_ANONYMOUS | MAP_PRIVATE), -1, 0); - if (MAP_FAILED == p) { - return OPAL_ERR_OUT_OF_RESOURCE; - } - munmap(p, 4096); - if (mca_memory_linux_component.munmap_invoked) { - val |= OPAL_MEMORY_MUNMAP_SUPPORT; - } - - /* All done */ - if (val > 0) { - opal_mem_hooks_set_support(val); - return OPAL_SUCCESS; - } - return OPAL_ERR_NOT_AVAILABLE; -} - - -int opal_memory_linux_ptmalloc2_close(void) -{ - /* Nothing to do, really. This function exists just for - symmetry. */ - - return OPAL_SUCCESS; -} diff --git a/opal/mca/memory/linux/memory_linux_ummunotify.c b/opal/mca/memory/linux/memory_linux_ummunotify.c deleted file mode 100644 index 2ffdb4ca911..00000000000 --- a/opal/mca/memory/linux/memory_linux_ummunotify.c +++ /dev/null @@ -1,235 +0,0 @@ -/* - * Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2005 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * Copyright (c) 2009-2010 Cisco Systems, Inc. All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#include "opal_config.h" - -#include -#include -#include -#include -#include -#ifdef HAVE_STROPTS_H -#include -#endif -#include -#include -#include - -#include - -#include "opal_stdint.h" -#include "opal/constants.h" -#include "opal/util/output.h" -#include "opal/util/show_help.h" -#include "opal/mca/memory/memory.h" -#include "opal/memoryhooks/memory.h" -#include "opal/memoryhooks/memory_internal.h" - -#include "opal/mca/memory/linux/memory_linux.h" -#include "opal/mca/memory/linux/public.h" - -#define DEV_UMMUNOTIFY "/dev/ummunotify" - - -/* - * Local functions - */ -static int ummunotify_process(void); -static int ummunotify_register(void *start, size_t len, uint64_t cookie); -static int ummunotify_deregister(void *start, size_t len, uint64_t cookie); - - -/* - * Local variables - */ -static bool initialized = false; - - -/* - * Global variables (these need to be global variables rather than in - * the component struct because they are accessed in the - * opal_memory_changed() macro defined in public.h, and we don't want - * to have to include the component structure definition in public.h). - */ -uint64_t opal_memory_linux_ummunotify_counter_last_value = 0; -volatile uint64_t *opal_memory_linux_ummunotify_counter = - &opal_memory_linux_ummunotify_counter_last_value; - - -int opal_memory_linux_ummunotify_open(void) -{ - uint64_t *p; - - /* Just to be safe... */ - opal_memory_linux_ummunotify_counter_last_value = 0; - opal_memory_linux_ummunotify_counter = - &opal_memory_linux_ummunotify_counter_last_value; - - /* Open the device. Try to give a meaningful error message if - we're unable to open it. */ - mca_memory_linux_component.ummunotify_fd = - open(DEV_UMMUNOTIFY, O_RDONLY | O_NONBLOCK); - if (mca_memory_linux_component.ummunotify_fd < 0) { - char hostname[HOST_NAME_MAX]; - gethostname(hostname, sizeof(hostname)); - - if (EACCES == errno) { - /* This will get a proper show_help when merged into the - linux component */ - opal_show_help("help-opal-memory-linux.txt", - "ummunotify eaccess", true, - hostname, DEV_UMMUNOTIFY); - } else if (ENOENT != errno) { - /* Don't print an error if DEV_UMMUNOTIFY simply doesn't exist */ - opal_show_help("help-opal-memory-linux.txt", - "ummunotify open error", true, - hostname, DEV_UMMUNOTIFY, - strerror(errno), errno); - } - return OPAL_ERR_NOT_SUPPORTED; - } - - p = mmap(NULL, sizeof(*opal_memory_linux_ummunotify_counter), - PROT_READ, MAP_SHARED, - mca_memory_linux_component.ummunotify_fd, 0); - if (MAP_FAILED == opal_memory_linux_ummunotify_counter) { - close(mca_memory_linux_component.ummunotify_fd); - mca_memory_linux_component.ummunotify_fd = -1; - return OPAL_ERR_NOT_SUPPORTED; - } - opal_memory_linux_ummunotify_counter = p; - - /* If everything went well, tell OMPI that we have full support - for the memory hooks and fill in the component function - pointers */ - opal_mem_hooks_set_support(OPAL_MEMORY_FREE_SUPPORT | - OPAL_MEMORY_CHUNK_SUPPORT | - OPAL_MEMORY_MUNMAP_SUPPORT); - mca_memory_linux_component.super.memoryc_process = ummunotify_process; - mca_memory_linux_component.super.memoryc_register = ummunotify_register; - mca_memory_linux_component.super.memoryc_deregister = ummunotify_deregister; - initialized = true; - - return OPAL_SUCCESS; -} - - -/* - * Called during opal_finalize (usually during MPI_FINALIZE) to tear - * down anything that can/should be torn down to disable this - * component. The application may continue for a while after - * MPI_FINALIZE, so we should do as much as possible to disable - * anything we enabled during ummunotify_open(). - */ -int opal_memory_linux_ummunotify_close(void) -{ - if (initialized && mca_memory_linux_component.ummunotify_fd >= 0) { - munmap((void*) opal_memory_linux_ummunotify_counter, - sizeof(*opal_memory_linux_ummunotify_counter)); - close(mca_memory_linux_component.ummunotify_fd); - mca_memory_linux_component.ummunotify_fd = -1; - opal_memory_linux_ummunotify_counter = - &opal_memory_linux_ummunotify_counter_last_value; - initialized = false; - } - - return OPAL_SUCCESS; -} - -/* - * Called when opal_memory_changed() returns 1 - */ -static int ummunotify_process(void) -{ - int n; - unsigned int i; - struct ummunotify_event events[128]; - - /* Loop reading from the ummunot fd until there's nothing left to - read. If we get a LAST event, re-record the counter. */ - while (initialized) { - n = read(mca_memory_linux_component.ummunotify_fd, - &events, sizeof(events)); - if (n <= 0) { - return (EAGAIN == errno) ? OPAL_SUCCESS : OPAL_ERR_IN_ERRNO; - } - - for (i = 0; i < n / sizeof(events[0]); ++i) { - switch (events[i].type) { - case UMMUNOTIFY_EVENT_TYPE_INVAL: - /* 0 => this callback did not come from malloc */ - OPAL_OUTPUT((-1, "ummunot: invalidate start %p, end %p", - (void*) events[i].hint_start, - (void*) events[i].hint_end)); - opal_mem_hooks_release_hook((void *) (uintptr_t) events[i].hint_start, - events[i].hint_end - events[i].hint_start, - 0); - break; - - case UMMUNOTIFY_EVENT_TYPE_LAST: - opal_memory_linux_ummunotify_counter_last_value = - events[i].user_cookie_counter; - /* Are there more events to read? */ - if (opal_memory_linux_ummunotify_counter_last_value == - *opal_memory_linux_ummunotify_counter) { - OPAL_OUTPUT((-1, "ummunot: LAST; done")); - return OPAL_SUCCESS; - } - OPAL_OUTPUT((-1, "ummunot: LAST; but looping around")); - break; - } - } - } - - /* Will only get here if this component has not been - initialized */ - return OPAL_SUCCESS; -} - -static int ummunotify_register(void *start, size_t len, uint64_t cookie) -{ - struct ummunotify_register_ioctl r; - r.reserved = 0; - r.start = (unsigned long) start; - r.end = (unsigned long) start + len; - r.user_cookie = cookie; - - OPAL_OUTPUT((-1, "ummunot: register %p - %p", - start, ((char*) start) + len)); - if (initialized && ioctl(mca_memory_linux_component.ummunotify_fd, - UMMUNOTIFY_REGISTER_REGION, &r)) { - OPAL_OUTPUT((-1, "Error in ioctl register!")); - return OPAL_ERR_IN_ERRNO; - } - - return OPAL_SUCCESS; -} - -static int ummunotify_deregister(void *start, size_t len, uint64_t cookie) -{ - OPAL_OUTPUT((-1, "ummunot: deregister %p - %p", - start, ((char*) start) + len)); - if (initialized && ioctl(mca_memory_linux_component.ummunotify_fd, - UMMUNOTIFY_UNREGISTER_REGION, &cookie)) { - OPAL_OUTPUT((-1, "Error in ioctl unregister!")); - return OPAL_ERR_IN_ERRNO; - } - - return OPAL_SUCCESS; -} diff --git a/opal/mca/memory/linux/owner.txt b/opal/mca/memory/linux/owner.txt deleted file mode 100644 index 2662d53d1d7..00000000000 --- a/opal/mca/memory/linux/owner.txt +++ /dev/null @@ -1,7 +0,0 @@ -# -# owner/status file -# owner: institution that is responsible for this package -# status: e.g. active, maintenance, unmaintained -# -owner: MELLANOX,CISCO -status: maintenance diff --git a/opal/mca/memory/linux/public.h b/opal/mca/memory/linux/public.h deleted file mode 100644 index 4248a853b68..00000000000 --- a/opal/mca/memory/linux/public.h +++ /dev/null @@ -1,24 +0,0 @@ -/* - * Copyright (c) 2009-2010 Cisco Systems, Inc. All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#ifndef OPAL_MEMORY_LINUX_PUBLIC_H -#define OPAL_MEMORY_LINUX_PUBLIC_H - -#include "opal_config.h" - -#include - -OPAL_DECLSPEC extern volatile uint64_t *opal_memory_linux_ummunotify_counter; -OPAL_DECLSPEC extern uint64_t opal_memory_linux_ummunotify_counter_last_value; - -#define opal_memory_changed() \ - (opal_memory_linux_ummunotify_counter_last_value != \ - *opal_memory_linux_ummunotify_counter) - -#endif diff --git a/opal/mca/memory/linux/rename.h b/opal/mca/memory/linux/rename.h deleted file mode 100644 index 4acada394f1..00000000000 --- a/opal/mca/memory/linux/rename.h +++ /dev/null @@ -1,52 +0,0 @@ -/* - * Copyright (c) 2009 Cisco Systems, Inc. All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -/* Name-shift all the internal ptmalloc22 symbols to guarantee to not - conflict / confuse / override the internal glibc symbols. */ - -#define __default_morecore opal_memory_ptmalloc2_default_morecore - -#define _int_malloc opal_memory_ptmalloc2_int_malloc -#define _int_free opal_memory_ptmalloc2_int_free -#define _int_realloc opal_memory_ptmalloc2_int_realloc -#define _int_memalign opal_memory_ptmalloc2_int_memalign -#define _int_valloc opal_memory_ptmalloc2_int_valloc -#define _int_pvalloc opal_memory_ptmalloc2_int_pvalloc -#define _int_icalloc opal_memory_ptmalloc2_int_icalloc -#define _int_icomalloc opal_memory_ptmalloc2_int_icomalloc - -#define mTRIm opal_memory_ptmalloc2_mTRIm -#define mUSABLe opal_memory_ptmalloc2_mUSABLe -#define mALLOPt opal_memory_ptmalloc2_mALLOPt - -#define mem2mem_check opal_memory_ptmalloc2_mem2mem_check -#define top_check opal_memory_ptmalloc2_top_check -#define munmap_chunk opal_memory_ptmalloc2_munmap_chunk -#define mremap_chunk opal_memory_ptmalloc2_mremap_chunk - -#define malloc_check opal_memory_ptmalloc2_malloc_check -#define free_check opal_memory_ptmalloc2_free_check -#define realloc_check opal_memory_ptmalloc2_realloc_check -#define memalign_check opal_memory_ptmalloc2_memalign_check - -#define malloc_starter opal_memory_ptmalloc2_malloc_starter -#define memalign_starter opal_memory_ptmalloc2_memalign_starter -#define free_starter opal_memory_ptmalloc2_free_starter - -#define malloc_atfork opal_memory_ptmalloc2_malloc_atfork -#define free_atfork opal_memory_ptmalloc2_free_atfork - -#define _int_get_arena opal_memory_ptmalloc2_int_get_arena -#define _int_get_arena_info opal_memory_ptmalloc2_int_get_arena_info -#define _int_get_global_info opal_memory_ptmalloc2_int_get_global_info -#define _int_new_arena opal_memory_ptmalloc2_int_new_arena -#define __malloc_check_init opal_memory_ptmalloc2_malloc_check_init -#define malloc_stats opal_memory_ptmalloc2_malloc_stats - -#define posix_memalign opal_memory_ptmalloc2_posix_memalign diff --git a/opal/mca/memory/linux/sysdeps/generic/atomic.h b/opal/mca/memory/linux/sysdeps/generic/atomic.h deleted file mode 100644 index a3aeed13e89..00000000000 --- a/opal/mca/memory/linux/sysdeps/generic/atomic.h +++ /dev/null @@ -1 +0,0 @@ -/* Empty placeholder */ diff --git a/opal/mca/memory/linux/sysdeps/generic/malloc-machine.h b/opal/mca/memory/linux/sysdeps/generic/malloc-machine.h deleted file mode 100644 index 345137060d0..00000000000 --- a/opal/mca/memory/linux/sysdeps/generic/malloc-machine.h +++ /dev/null @@ -1,68 +0,0 @@ -/* Basic platform-independent macro definitions for mutexes, - thread-specific data and parameters for malloc. - Copyright (C) 2003 Free Software Foundation, Inc. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, write to the Free - Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA - 02111-1307 USA. */ - -#ifndef _GENERIC_MALLOC_MACHINE_H -#define _GENERIC_MALLOC_MACHINE_H - -#include - -#ifndef mutex_init /* No threads, provide dummy macros */ - -# define NO_THREADS - -/* The mutex functions used to do absolutely nothing, i.e. lock, - trylock and unlock would always just return 0. However, even - without any concurrently active threads, a mutex can be used - legitimately as an `in use' flag. To make the code that is - protected by a mutex async-signal safe, these macros would have to - be based on atomic test-and-set operations, for example. */ -typedef int mutex_t; - -# define mutex_init(m) (*(m) = 0) -# define mutex_lock(m) ((*(m) = 1), 0) -# define mutex_trylock(m) (*(m) ? 1 : ((*(m) = 1), 0)) -# define mutex_unlock(m) (*(m) = 0) - -typedef void *tsd_key_t; -# define tsd_key_create(key, destr) do {} while(0) -# define tsd_setspecific(key, data) ((key) = (data)) -# define tsd_getspecific(key, vptr) (vptr = (key)) - -# define thread_atfork(prepare, parent, child) do {} while(0) - -#endif /* !defined mutex_init */ - -#ifndef atomic_full_barrier -# define atomic_full_barrier() __asm ("" ::: "memory") -#endif - -#ifndef atomic_read_barrier -# define atomic_read_barrier() atomic_full_barrier () -#endif - -#ifndef atomic_write_barrier -# define atomic_write_barrier() atomic_full_barrier () -#endif - -#ifndef DEFAULT_TOP_PAD -# define DEFAULT_TOP_PAD 131072 -#endif - -#endif /* !defined(_GENERIC_MALLOC_MACHINE_H) */ diff --git a/opal/mca/memory/linux/sysdeps/generic/thread-st.h b/opal/mca/memory/linux/sysdeps/generic/thread-st.h deleted file mode 100644 index 0243774b7cf..00000000000 --- a/opal/mca/memory/linux/sysdeps/generic/thread-st.h +++ /dev/null @@ -1,48 +0,0 @@ -/* - * $Id:$ - * Generic version: no threads. - * by Wolfram Gloger 2004 - */ - -#include - -struct thread_st { - char *sp; /* stack pointer, can be 0 */ - void (*func)(struct thread_st* st); /* must be set by user */ - int id; - int flags; - struct user_data u; -}; - -static void -thread_init(void) -{ - printf("No threads.\n"); -} - -/* Create a thread. */ -static int -thread_create(struct thread_st *st) -{ - st->flags = 0; - st->id = 1; - st->func(st); - return 0; -} - -/* Wait for one of several subthreads to finish. */ -static void -wait_for_thread(struct thread_st st[], int n_thr, - int (*end_thr)(struct thread_st*)) -{ - int i; - for(i=0; i. - -Permission to use, copy, modify, distribute, and sell this software -and its documentation for any purpose is hereby granted without fee, -provided that (i) the above copyright notices and this permission -notice appear in all copies of the software and related documentation, -and (ii) the name of Wolfram Gloger may not be used in any advertising -or publicity relating to the software. - -THE SOFTWARE IS PROVIDED "AS-IS" AND WITHOUT WARRANTY OF ANY KIND, -EXPRESS, IMPLIED OR OTHERWISE, INCLUDING WITHOUT LIMITATION, ANY -WARRANTY OF MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE. - -IN NO EVENT SHALL WOLFRAM GLOGER BE LIABLE FOR ANY SPECIAL, -INCIDENTAL, INDIRECT OR CONSEQUENTIAL DAMAGES OF ANY KIND, OR ANY -DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, -WHETHER OR NOT ADVISED OF THE POSSIBILITY OF DAMAGE, AND ON ANY THEORY -OF LIABILITY, ARISING OUT OF OR IN CONNECTION WITH THE USE OR -PERFORMANCE OF THIS SOFTWARE. -*/ - -#ifndef _PTHREAD_MALLOC_MACHINE_H -#define _PTHREAD_MALLOC_MACHINE_H - -#include - -#undef thread_atfork_static - -/* Use fast inline spinlocks with gcc. */ -#if (defined __i386__ || defined __x86_64__) && defined __GNUC__ && \ - !defined USE_NO_SPINLOCKS - -#include -#include - -typedef struct { - volatile unsigned int lock; - int pad0_; -} mutex_t; - -#define MUTEX_INITIALIZER { 0 } -#define mutex_init(m) ((m)->lock = 0) -static inline int mutex_lock(mutex_t *m) { - int cnt = 0, r; - struct timespec tm; - - for(;;) { - __asm__ __volatile__ - ("xchgl %0, %1" - : "=r"(r), "=m"(m->lock) - : "0"(1), "m"(m->lock) - : "memory"); - if(!r) - return 0; - if(cnt < 50) { - sched_yield(); - cnt++; - } else { - tm.tv_sec = 0; - tm.tv_nsec = 2000001; - nanosleep(&tm, NULL); - cnt = 0; - } - } -} -static inline int mutex_trylock(mutex_t *m) { - int r; - - __asm__ __volatile__ - ("xchgl %0, %1" - : "=r"(r), "=m"(m->lock) - : "0"(1), "m"(m->lock) - : "memory"); - return r; -} -static inline int mutex_unlock(mutex_t *m) { - m->lock = 0; - __asm __volatile__ ("" : : : "memory"); - return 0; -} - -#else - -/* Normal pthread mutex. */ -typedef pthread_mutex_t mutex_t; - -#define MUTEX_INITIALIZER PTHREAD_MUTEX_INITIALIZER -#define mutex_init(m) pthread_mutex_init(m, NULL) -#define mutex_lock(m) pthread_mutex_lock(m) -#define mutex_trylock(m) pthread_mutex_trylock(m) -#define mutex_unlock(m) pthread_mutex_unlock(m) - -#endif /* (__i386__ || __x86_64__) && __GNUC__ && !USE_NO_SPINLOCKS */ - -/* thread specific data */ -#if defined(__sgi) || defined(USE_TSD_DATA_HACK) - -/* Hack for thread-specific data, e.g. on Irix 6.x. We can't use - pthread_setspecific because that function calls malloc() itself. - The hack only works when pthread_t can be converted to an integral - type. */ - -typedef void *tsd_key_t[256]; -#define tsd_key_create(key, destr) do { \ - int i; \ - for(i=0; i<256; i++) (*key)[i] = 0; \ -} while(0) -#define tsd_setspecific(key, data) \ - (key[(unsigned)pthread_self() % 256] = (data)) -#define tsd_getspecific(key, vptr) \ - (vptr = key[(unsigned)pthread_self() % 256]) - -#else - -typedef pthread_key_t tsd_key_t; - -#define tsd_key_create(key, destr) pthread_key_create(key, destr) -#define tsd_setspecific(key, data) pthread_setspecific(key, data) -#define tsd_getspecific(key, vptr) (vptr = pthread_getspecific(key)) - -#endif - -/* at fork */ -#define thread_atfork(prepare, parent, child) \ - pthread_atfork(prepare, parent, child) - -#include - -#endif /* !defined(_MALLOC_MACHINE_H) */ diff --git a/opal/mca/memory/linux/sysdeps/pthread/thread-st.h b/opal/mca/memory/linux/sysdeps/pthread/thread-st.h deleted file mode 100644 index f97a0a35527..00000000000 --- a/opal/mca/memory/linux/sysdeps/pthread/thread-st.h +++ /dev/null @@ -1,111 +0,0 @@ -/* - * $Id: thread-st.h$ - * pthread version - * by Wolfram Gloger 2004 - */ - -#include -#include - -pthread_cond_t finish_cond = PTHREAD_COND_INITIALIZER; -pthread_mutex_t finish_mutex = PTHREAD_MUTEX_INITIALIZER; - -#ifndef USE_PTHREADS_STACKS -#define USE_PTHREADS_STACKS 0 -#endif - -#ifndef STACKSIZE -#define STACKSIZE 32768 -#endif - -struct thread_st { - char *sp; /* stack pointer, can be 0 */ - void (*func)(struct thread_st* st); /* must be set by user */ - pthread_t id; - int flags; - struct user_data u; -}; - -static void -thread_init(void) -{ - printf("Using posix threads.\n"); - pthread_cond_init(&finish_cond, NULL); - pthread_mutex_init(&finish_mutex, NULL); -} - -static void * -thread_wrapper(void *ptr) -{ - struct thread_st *st = (struct thread_st*)ptr; - - /*printf("begin %p\n", st->sp);*/ - st->func(st); - pthread_mutex_lock(&finish_mutex); - st->flags = 1; - pthread_mutex_unlock(&finish_mutex); - pthread_cond_signal(&finish_cond); - /*printf("end %p\n", st->sp);*/ - return NULL; -} - -/* Create a thread. */ -static int -thread_create(struct thread_st *st) -{ - st->flags = 0; - { - pthread_attr_t* attr_p = 0; -#if USE_PTHREADS_STACKS - pthread_attr_t attr; - - pthread_attr_init (&attr); - if(!st->sp) - st->sp = malloc(STACKSIZE+16); - if(!st->sp) - return -1; - if(pthread_attr_setstacksize(&attr, STACKSIZE)) - fprintf(stderr, "error setting stacksize"); - else - pthread_attr_setstackaddr(&attr, st->sp + STACKSIZE); - /*printf("create %p\n", st->sp);*/ - attr_p = &attr; -#endif - return pthread_create(&st->id, attr_p, thread_wrapper, st); - } - return 0; -} - -/* Wait for one of several subthreads to finish. */ -static void -wait_for_thread(struct thread_st st[], int n_thr, - int (*end_thr)(struct thread_st*)) -{ - int i; - - pthread_mutex_lock(&finish_mutex); - for(;;) { - int term = 0; - for(i=0; i 0) - break; - pthread_cond_wait(&finish_cond, &finish_mutex); - } - pthread_mutex_unlock(&finish_mutex); -} - -/* - * Local variables: - * tab-width: 4 - * End: - */ diff --git a/opal/mca/memory/linux/sysdeps/solaris/malloc-machine.h b/opal/mca/memory/linux/sysdeps/solaris/malloc-machine.h deleted file mode 100644 index 00e33b08014..00000000000 --- a/opal/mca/memory/linux/sysdeps/solaris/malloc-machine.h +++ /dev/null @@ -1,51 +0,0 @@ -/* Basic platform-independent macro definitions for mutexes, - thread-specific data and parameters for malloc. - Solaris threads version. - Copyright (C) 2004 Wolfram Gloger . - -Permission to use, copy, modify, distribute, and sell this software -and its documentation for any purpose is hereby granted without fee, -provided that (i) the above copyright notices and this permission -notice appear in all copies of the software and related documentation, -and (ii) the name of Wolfram Gloger may not be used in any advertising -or publicity relating to the software. - -THE SOFTWARE IS PROVIDED "AS-IS" AND WITHOUT WARRANTY OF ANY KIND, -EXPRESS, IMPLIED OR OTHERWISE, INCLUDING WITHOUT LIMITATION, ANY -WARRANTY OF MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE. - -IN NO EVENT SHALL WOLFRAM GLOGER BE LIABLE FOR ANY SPECIAL, -INCIDENTAL, INDIRECT OR CONSEQUENTIAL DAMAGES OF ANY KIND, OR ANY -DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, -WHETHER OR NOT ADVISED OF THE POSSIBILITY OF DAMAGE, AND ON ANY THEORY -OF LIABILITY, ARISING OUT OF OR IN CONNECTION WITH THE USE OR -PERFORMANCE OF THIS SOFTWARE. -*/ - -#ifndef _SOLARIS_MALLOC_MACHINE_H -#define _SOLARIS_MALLOC_MACHINE_H - -#include - -typedef thread_t thread_id; - -#define MUTEX_INITIALIZER { 0 } -#define mutex_init(m) mutex_init(m, USYNC_THREAD, NULL) - -/* - * Hack for thread-specific data on Solaris. We can't use thr_setspecific - * because that function calls malloc() itself. - */ -typedef void *tsd_key_t[256]; -#define tsd_key_create(key, destr) do { \ - int i; \ - for(i=0; i<256; i++) (*key)[i] = 0; \ -} while(0) -#define tsd_setspecific(key, data) (key[(unsigned)thr_self() % 256] = (data)) -#define tsd_getspecific(key, vptr) (vptr = key[(unsigned)thr_self() % 256]) - -#define thread_atfork(prepare, parent, child) do {} while(0) - -#include - -#endif /* !defined(_SOLARIS_MALLOC_MACHINE_H) */ diff --git a/opal/mca/memory/linux/sysdeps/solaris/thread-st.h b/opal/mca/memory/linux/sysdeps/solaris/thread-st.h deleted file mode 100644 index dbb4b097e8b..00000000000 --- a/opal/mca/memory/linux/sysdeps/solaris/thread-st.h +++ /dev/null @@ -1,72 +0,0 @@ -/* - * $Id:$ - * Solaris version - * by Wolfram Gloger 2004 - */ - -#include -#include - -#ifndef STACKSIZE -#define STACKSIZE 32768 -#endif - -struct thread_st { - char *sp; /* stack pointer, can be 0 */ - void (*func)(struct thread_st* st); /* must be set by user */ - thread_id id; - int flags; - struct user_data u; -}; - -static void -thread_init(void) -{ - printf("Using Solaris threads.\n"); -} - -static void * -thread_wrapper(void *ptr) -{ - struct thread_st *st = (struct thread_st*)ptr; - - /*printf("begin %p\n", st->sp);*/ - st->func(st); - /*printf("end %p\n", st->sp);*/ - return NULL; -} - -/* Create a thread. */ -static int -thread_create(struct thread_st *st) -{ - st->flags = 0; - if(!st->sp) - st->sp = malloc(STACKSIZE); - if(!st->sp) return -1; - thr_create(st->sp, STACKSIZE, thread_wrapper, st, THR_NEW_LWP, &st->id); - return 0; -} - -/* Wait for one of several subthreads to finish. */ -static void -wait_for_thread(struct thread_st st[], int n_thr, - int (*end_thr)(struct thread_st*)) -{ - int i; - thread_t id; - - thr_join(0, &id, NULL); - for(i=0; i. - -Permission to use, copy, modify, distribute, and sell this software -and its documentation for any purpose is hereby granted without fee, -provided that (i) the above copyright notices and this permission -notice appear in all copies of the software and related documentation, -and (ii) the name of Wolfram Gloger may not be used in any advertising -or publicity relating to the software. - -THE SOFTWARE IS PROVIDED "AS-IS" AND WITHOUT WARRANTY OF ANY KIND, -EXPRESS, IMPLIED OR OTHERWISE, INCLUDING WITHOUT LIMITATION, ANY -WARRANTY OF MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE. - -IN NO EVENT SHALL WOLFRAM GLOGER BE LIABLE FOR ANY SPECIAL, -INCIDENTAL, INDIRECT OR CONSEQUENTIAL DAMAGES OF ANY KIND, OR ANY -DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, -WHETHER OR NOT ADVISED OF THE POSSIBILITY OF DAMAGE, AND ON ANY THEORY -OF LIABILITY, ARISING OUT OF OR IN CONNECTION WITH THE USE OR -PERFORMANCE OF THIS SOFTWARE. -*/ - -#ifndef _SPROC_MALLOC_MACHINE_H -#define _SPROC_MALLOC_MACHINE_H - -#include -#include -#include -#include - -typedef abilock_t mutex_t; - -#define MUTEX_INITIALIZER { 0 } -#define mutex_init(m) init_lock(m) -#define mutex_lock(m) (spin_lock(m), 0) -#define mutex_trylock(m) acquire_lock(m) -#define mutex_unlock(m) release_lock(m) - -typedef int tsd_key_t; -int tsd_key_next; -#define tsd_key_create(key, destr) ((*key) = tsd_key_next++) -#define tsd_setspecific(key, data) (((void **)(&PRDA->usr_prda))[key] = data) -#define tsd_getspecific(key, vptr) (vptr = ((void **)(&PRDA->usr_prda))[key]) - -#define thread_atfork(prepare, parent, child) do {} while(0) - -#include - -#endif /* !defined(_SPROC_MALLOC_MACHINE_H) */ diff --git a/opal/mca/memory/linux/sysdeps/sproc/thread-st.h b/opal/mca/memory/linux/sysdeps/sproc/thread-st.h deleted file mode 100644 index a512f921306..00000000000 --- a/opal/mca/memory/linux/sysdeps/sproc/thread-st.h +++ /dev/null @@ -1,84 +0,0 @@ -/* - * $Id:$ - * sproc version - * by Wolfram Gloger 2001, 2004 - */ - -#include -#include -#include - -#ifndef STACKSIZE -#define STACKSIZE 32768 -#endif - -struct thread_st { - char *sp; /* stack pointer, can be 0 */ - void (*func)(struct thread_st* st); /* must be set by user */ - thread_id id; - int flags; - struct user_data u; -}; - -static void -thread_init(void) -{ - printf("Using sproc() threads.\n"); -} - -static void -thread_wrapper(void *ptr, size_t stack_len) -{ - struct thread_st *st = (struct thread_st*)ptr; - - /*printf("begin %p\n", st->sp);*/ - st->func(st); - /*printf("end %p\n", st->sp);*/ -} - -/* Create a thread. */ -static int -thread_create(struct thread_st *st) -{ - st->flags = 0; - if(!st->sp) - st->sp = malloc(STACKSIZE); - if(!st->sp) return -1; - st->id = sprocsp(thread_wrapper, PR_SALL, st, st->sp+STACKSIZE, STACKSIZE); - if(st->id < 0) { - return -1; - } - return 0; -} - -/* Wait for one of several subthreads to finish. */ -static void -wait_for_thread(struct thread_st st[], int n_thr, - int (*end_thr)(struct thread_st*)) -{ - int i; - int id; - - int status = 0; - id = wait(&status); - if(status != 0) { - if(WIFSIGNALED(status)) - printf("thread %id terminated by signal %d\n", - id, WTERMSIG(status)); - else - printf("thread %id exited with status %d\n", - id, WEXITSTATUS(status)); - } - for(i=0; i 0 - -static void -mem_init(unsigned char *ptr, unsigned long size) -{ - unsigned long i, j; - - if(size == 0) return; - for(i=0; i>8)) & 0xFF); - } - j = (unsigned long)ptr ^ (size-1); - ptr[size-1] = ((j ^ (j>>8)) & 0xFF); -} - -static int -mem_check(unsigned char *ptr, unsigned long size) -{ - unsigned long i, j; - - if(size == 0) return 0; - for(i=0; i>8)) & 0xFF)) return 1; - } - j = (unsigned long)ptr ^ (size-1); - if(ptr[size-1] != ((j ^ (j>>8)) & 0xFF)) return 2; - return 0; -} - -static int -zero_check(unsigned* ptr, unsigned long size) -{ - unsigned char* ptr2; - - while(size >= sizeof(*ptr)) { - if(*ptr++ != 0) - return -1; - size -= sizeof(*ptr); - } - ptr2 = (unsigned char*)ptr; - while(size > 0) { - if(*ptr2++ != 0) - return -1; - --size; - } - return 0; -} - -#endif /* TEST > 0 */ - -/* Allocate a bin with malloc(), realloc() or memalign(). r must be a - random number >= 1024. */ - -static void -bin_alloc(struct bin *m, unsigned long size, int r) -{ -#if TEST > 0 - if(mem_check(m->ptr, m->size)) { - printf("memory corrupt!\n"); - exit(1); - } -#endif - r %= 1024; - /*printf("%d ", r);*/ - if(r < 4) { /* memalign */ - if(m->size > 0) free(m->ptr); - m->ptr = (unsigned char *)memalign(sizeof(int) << r, size); - } else if(r < 20) { /* calloc */ - if(m->size > 0) free(m->ptr); - m->ptr = (unsigned char *)calloc(size, 1); -#if TEST > 0 - if(zero_check((unsigned*)m->ptr, size)) { - long i; - for(i=0; iptr[i] != 0) - break; - printf("calloc'ed memory non-zero (ptr=%p, i=%ld)!\n", m->ptr, i); - exit(1); - } -#endif - } else if(r < 100 && m->size < REALLOC_MAX) { /* realloc */ - if(m->size == 0) m->ptr = NULL; - m->ptr = realloc(m->ptr, size); - } else { /* plain malloc */ - if(m->size > 0) free(m->ptr); - m->ptr = (unsigned char *)malloc(size); - } - if(!m->ptr) { - printf("out of memory (r=%d, size=%ld)!\n", r, (long)size); - exit(1); - } - m->size = size; -#if TEST > 0 - mem_init(m->ptr, m->size); -#endif -} - -/* Free a bin. */ - -static void -bin_free(struct bin *m) -{ - if(m->size == 0) return; -#if TEST > 0 - if(mem_check(m->ptr, m->size)) { - printf("memory corrupt!\n"); - exit(1); - } -#endif - free(m->ptr); - m->size = 0; -} - -/* - * Local variables: - * tab-width: 4 - * End: - */ diff --git a/opal/mca/memory/linux/t-test1.c b/opal/mca/memory/linux/t-test1.c deleted file mode 100644 index 15dc7c6aaea..00000000000 --- a/opal/mca/memory/linux/t-test1.c +++ /dev/null @@ -1,285 +0,0 @@ -/* - * $Id: t-test1.c,v 1.2 2004/11/04 14:58:45 wg Exp $ - * by Wolfram Gloger 1996-1999, 2001, 2004 - * A multi-thread test for malloc performance, maintaining one pool of - * allocated bins per thread. - */ - -#if (defined __STDC__ && __STDC__) || defined __cplusplus -# include -#endif -#include -#include -#include -#include -#include -#include -#include - -#if !USE_MALLOC -#include -#else -#include "malloc.h" -#endif - -#include "lran2.h" -#include "t-test.h" - -struct user_data { - int bins, max; - unsigned long size; - long seed; -}; -#include "thread-st.h" - -#define N_TOTAL 10 -#ifndef N_THREADS -#define N_THREADS 2 -#endif -#ifndef N_TOTAL_PRINT -#define N_TOTAL_PRINT 50 -#endif -#ifndef MEMORY -#define MEMORY 8000000l -#endif -#define SIZE 10000 -#define I_MAX 10000 -#define ACTIONS_MAX 30 -#ifndef TEST_FORK -#define TEST_FORK 0 -#endif - -#define RANDOM(d,s) (lran2(d) % (s)) - -struct bin_info { - struct bin *m; - unsigned long size, bins; -}; - -#if TEST > 0 - -void -bin_test(struct bin_info *p) -{ - int b; - - for(b=0; bbins; b++) { - if(mem_check(p->m[b].ptr, p->m[b].size)) { - printf("memory corrupt!\n"); - abort(); - } - } -} - -#endif - -void -malloc_test(struct thread_st *st) -{ - int b, i, j, actions, pid = 1; - struct bin_info p; - struct lran2_st ld; /* data for random number generator */ - - lran2_init(&ld, st->u.seed); -#if TEST_FORK>0 - if(RANDOM(&ld, TEST_FORK) == 0) { - int status; - -#if !USE_THR - pid = fork(); -#else - pid = fork1(); -#endif - if(pid > 0) { - /*printf("forked, waiting for %d...\n", pid);*/ - waitpid(pid, &status, 0); - printf("done with %d...\n", pid); - if(!WIFEXITED(status)) { - printf("child term with signal %d\n", WTERMSIG(status)); - exit(1); - } - return; - } - exit(0); - } -#endif - p.m = (struct bin *)malloc(st->u.bins*sizeof(*p.m)); - p.bins = st->u.bins; - p.size = st->u.size; - for(b=0; bu.max;) { -#if TEST > 1 - bin_test(&p); -#endif - actions = RANDOM(&ld, ACTIONS_MAX); -#if USE_MALLOC && MALLOC_DEBUG - if(actions < 2) { mallinfo(); } -#endif - for(j=0; j 2 - bin_test(&p); -#endif - } -#if 0 /* Test illegal free()s while setting MALLOC_CHECK_ */ - for(j=0; j<8; j++) { - b = RANDOM(&ld, p.bins); - if(p.m[b].ptr) { - int offset = (RANDOM(&ld, 11) - 5)*8; - char *rogue = (char*)(p.m[b].ptr) + offset; - /*printf("p=%p rogue=%p\n", p.m[b].ptr, rogue);*/ - free(rogue); - } - } -#endif - i += actions; - } - for(b=0; bid); -#endif - if(n_total >= n_total_max) { - n_running--; - } else if(st->u.seed++, thread_create(st)) { - printf("Creating thread #%d failed.\n", n_total); - } else { - n_total++; - if(n_total%N_TOTAL_PRINT == 0) - printf("n_total = %d\n", n_total); - } - return 0; -} - -#if 0 -/* Protect address space for allocation of n threads by LinuxThreads. */ -static void -protect_stack(int n) -{ - char buf[2048*1024]; - char* guard; - size_t guard_size = 2*2048*1024UL*(n+2); - - buf[0] = '\0'; - guard = (char*)(((unsigned long)buf - 4096)& ~4095UL) - guard_size; - printf("Setting up stack guard at %p\n", guard); - if(mmap(guard, guard_size, PROT_NONE, MAP_PRIVATE|MAP_ANONYMOUS|MAP_FIXED, - -1, 0) - != guard) - printf("failed!\n"); -} -#endif - -int -main(int argc, char *argv[]) -{ - int i, bins; - int n_thr=N_THREADS; - int i_max=I_MAX; - unsigned long size=SIZE; - struct thread_st *st; - -#if USE_MALLOC && USE_STARTER==2 - ptmalloc_init(); - printf("ptmalloc_init\n"); -#endif - - if(argc > 1) n_total_max = atoi(argv[1]); - if(n_total_max < 1) n_thr = 1; - if(argc > 2) n_thr = atoi(argv[2]); - if(n_thr < 1) n_thr = 1; - if(n_thr > 100) n_thr = 100; - if(argc > 3) i_max = atoi(argv[3]); - - if(argc > 4) size = atol(argv[4]); - if(size < 2) size = 2; - - bins = MEMORY/(size*n_thr); - if(argc > 5) bins = atoi(argv[5]); - if(bins < 4) bins = 4; - - /*protect_stack(n_thr);*/ - - thread_init(); - printf("total=%d threads=%d i_max=%d size=%ld bins=%d\n", - n_total_max, n_thr, i_max, size, bins); - - st = (struct thread_st *)malloc(n_thr*sizeof(*st)); - if(!st) exit(-1); - -#if !defined NO_THREADS && (defined __sun__ || defined sun) - /* I know of no other way to achieve proper concurrency with Solaris. */ - thr_setconcurrency(n_thr); -#endif - - /* Start all n_thr threads. */ - for(i=0; i0;) { - wait_for_thread(st, n_thr, my_end_thread); - } - for(i=0; i -#endif -#include -#include -#include -#include -#include -#include - -#if !USE_MALLOC -#include -#else -#include "malloc.h" -#endif - -#include "lran2.h" -#include "t-test.h" - -struct user_data { - int max; - unsigned long size; - long seed; -}; -#include "thread-st.h" -#include "malloc-machine.h" /* for mutex */ - -#define N_TOTAL 10 -#ifndef N_THREADS -#define N_THREADS 2 -#endif -#ifndef N_TOTAL_PRINT -#define N_TOTAL_PRINT 50 -#endif -#define STACKSIZE 32768 -#ifndef MEMORY -#define MEMORY 8000000l -#endif -#define SIZE 10000 -#define I_MAX 10000 -#define BINS_PER_BLOCK 20 - -#define RANDOM(d,s) (lran2(d) % (s)) - -struct block { - struct bin b[BINS_PER_BLOCK]; - mutex_t mutex; -} *blocks; - -int n_blocks; - -#if TEST > 0 - -void -bin_test(void) -{ - int b, i; - - for(b=0; bu.seed); - for(i=0; i<=st->u.max;) { -#if TEST > 1 - bin_test(); -#endif - bl = &blocks[RANDOM(&ld, n_blocks)]; - r = RANDOM(&ld, 1024); - if(r < 200) { /* free only */ - mutex_lock(&bl->mutex); - for(b=0; bb[b]); - mutex_unlock(&bl->mutex); - i += BINS_PER_BLOCK; - } else { /* alloc/realloc */ - /* Generate random numbers in advance. */ - for(b=0; bu.size) + 1; - rnum[b] = lran2(&ld); - } - mutex_lock(&bl->mutex); - for(b=0; bb[b], rsize[b], rnum[b]); - mutex_unlock(&bl->mutex); - i += BINS_PER_BLOCK; - } -#if TEST > 2 - bin_test(); -#endif - } -} - -int n_total=0, n_total_max=N_TOTAL, n_running; - -int -my_end_thread(struct thread_st *st) -{ - /* Thread st has finished. Start a new one. */ -#if 0 - printf("Thread %lx terminated.\n", (long)st->id); -#endif - if(n_total >= n_total_max) { - n_running--; - } else if(st->u.seed++, thread_create(st)) { - printf("Creating thread #%d failed.\n", n_total); - } else { - n_total++; - if(n_total%N_TOTAL_PRINT == 0) - printf("n_total = %d\n", n_total); - } - return 0; -} - -int -main(int argc, char *argv[]) -{ - int i, j, bins; - int n_thr=N_THREADS; - int i_max=I_MAX; - unsigned long size=SIZE; - struct thread_st *st; - -#if USE_MALLOC && USE_STARTER==2 - ptmalloc_init(); - printf("ptmalloc_init\n"); -#endif - - if(argc > 1) n_total_max = atoi(argv[1]); - if(n_total_max < 1) n_thr = 1; - if(argc > 2) n_thr = atoi(argv[2]); - if(n_thr < 1) n_thr = 1; - if(n_thr > 100) n_thr = 100; - if(argc > 3) i_max = atoi(argv[3]); - - if(argc > 4) size = atol(argv[4]); - if(size < 2) size = 2; - - bins = MEMORY/size; - if(argc > 5) bins = atoi(argv[5]); - if(bins < BINS_PER_BLOCK) bins = BINS_PER_BLOCK; - - n_blocks = bins/BINS_PER_BLOCK; - blocks = (struct block *)malloc(n_blocks*sizeof(*blocks)); - if(!blocks) - exit(1); - - thread_init(); - printf("total=%d threads=%d i_max=%d size=%ld bins=%d\n", - n_total_max, n_thr, i_max, size, n_blocks*BINS_PER_BLOCK); - - for(i=0; i0;) { - wait_for_thread(st, n_thr, my_end_thread); - } - - for(i=0; i, 2001. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, write to the Free - Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA - 02111-1307 USA. */ - -#include -#include -#include "malloc.h" - -static int errors = 0; - -static void -merror (const char *msg) -{ - ++errors; - printf ("Error: %s\n", msg); -} - -int -main (void) -{ - void *p1, *p2; - void *save_state; - long i; - - errno = 0; - - p1 = malloc (10); - if (p1 == NULL) - merror ("malloc (10) failed."); - - p2 = malloc (20); - if (p2 == NULL) - merror ("malloc (20) failed."); - - free (malloc (10)); - - for (i=0; i<100; ++i) - { - save_state = malloc_get_state (); - if (save_state == NULL) - { - merror ("malloc_get_state () failed."); - break; - } - /*free (malloc (10)); This could change the top chunk! */ - malloc_set_state (save_state); - p1 = realloc (p1, i*4 + 4); - if (p1 == NULL) - merror ("realloc (i*4) failed."); - free (save_state); - } - - p1 = realloc (p1, 40); - free (p2); - p2 = malloc (10); - if (p2 == NULL) - merror ("malloc (10) failed."); - free (p1); - - return errors != 0; -} - -/* - * Local variables: - * c-basic-offset: 2 - * End: - */ diff --git a/opal/mca/memory/linux/tst-mstats.c b/opal/mca/memory/linux/tst-mstats.c deleted file mode 100644 index 5d843128413..00000000000 --- a/opal/mca/memory/linux/tst-mstats.c +++ /dev/null @@ -1,100 +0,0 @@ -/* Copyright (C) 2004 Free Software Foundation, Inc. - This file is part of the GNU C Library. - Contributed by Wolfram Gloger , 2004. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, write to the Free - Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA - 02111-1307 USA. */ - -#include -#include -#include "malloc.h" - -static int errors = 0; - -static void -merror (const char *msg) -{ - ++errors; - printf ("Error: %s\n", msg); -} - -int -main (void) -{ - void *p1, *p2; - long i; - mstate a; - struct malloc_arena_info mai; - int nfree; - unsigned long navail; - - errno = 0; - - malloc_stats(); /* check that it works even without initialization */ - a = _int_get_arena(0); - if (!a) { - merror ("Can't get main arena."); - return 1; - } - free (malloc (10)); - _int_get_arena_info(a, &mai); - printf("nfree = %d\navail = %lu\nfastavail = %lu\ntop_size = %lu\n", - mai.nbinblocks + mai.nfastblocks, - (unsigned long)mai.binavail, - (unsigned long)mai.fastavail, - (unsigned long)mai.top_size); - if (mai.nfastblocks+mai.nbinblocks < 1) - merror ("initial _int_get_arena_info() failed."); - nfree = mai.nbinblocks + mai.nfastblocks; - navail = mai.binavail + mai.fastavail; - - p1 = malloc (10); - if (p1 == NULL) - merror ("malloc (10) failed."); - p2 = malloc (30); - if (p2 == NULL) - merror ("malloc (30) failed."); - - free (malloc (10)); - - for (i=0; i<100; ++i) - { - p1 = realloc (p1, i*7 + 3); - if (p1 == NULL) - merror ("realloc (i*7 + 3) failed."); - } - free (p2); - - _int_get_arena_info(a, &mai); - printf("nfree = %d\navail = %lu\nfastavail = %lu\ntop_size = %lu\n", - mai.nbinblocks + mai.nfastblocks, - (unsigned long)mai.binavail, - (unsigned long)mai.fastavail, - (unsigned long)mai.top_size); - /* Assume that no memory is returned to the system from these small - chunks. */ - if (mai.nbinblocks+mai.nfastblocks < nfree || - mai.binavail+mai.fastavail < navail) - merror ("final _int_get_arena_info() failed."); - malloc_stats(); - - return errors != 0; -} - -/* - * Local variables: - * c-basic-offset: 2 - * End: - */ diff --git a/opal/mca/memory/malloc_solaris/Makefile.am b/opal/mca/memory/malloc_solaris/Makefile.am index 87040b5089e..e2d270efd36 100644 --- a/opal/mca/memory/malloc_solaris/Makefile.am +++ b/opal/mca/memory/malloc_solaris/Makefile.am @@ -5,15 +5,15 @@ # Copyright (c) 2004-2005 The University of Tennessee and The University # of Tennessee Research Foundation. All rights # reserved. -# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, # University of Stuttgart. All rights reserved. # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. # Copyright (c) 2007-2008 Sun Microsystems, Inc. All rights reserved. # $COPYRIGHT$ -# +# # Additional copyrights may follow -# +# # $HEADER$ # diff --git a/opal/mca/memory/malloc_solaris/configure.m4 b/opal/mca/memory/malloc_solaris/configure.m4 index 158b6be180c..42d42333374 100644 --- a/opal/mca/memory/malloc_solaris/configure.m4 +++ b/opal/mca/memory/malloc_solaris/configure.m4 @@ -6,16 +6,18 @@ # Copyright (c) 2004-2005 The University of Tennessee and The University # of Tennessee Research Foundation. All rights # reserved. -# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, # University of Stuttgart. All rights reserved. # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. # Copyright (c) 2007-2011 Oracle and/or its affiliates. All rights reserved. # Copyright (c) 2010-2011 Cisco Systems, Inc. All rights reserved. +# Copyright (c) 2015 Research Organization for Information Science +# and Technology (RIST). All rights reserved. # $COPYRIGHT$ -# +# # Additional copyrights may follow -# +# # $HEADER$ # AC_DEFUN([MCA_opal_memory_malloc_solaris_PRIORITY], [0]) @@ -26,7 +28,7 @@ AC_DEFUN([MCA_opal_memory_malloc_solaris_COMPILE_MODE], [ AC_MSG_RESULT([$$4]) ]) -# MCA_memory_malloc_solaris_CONFIG(action-if-can-compile, +# MCA_memory_malloc_solaris_CONFIG(action-if-can-compile, # [action-if-cant-compile]) # ------------------------------------------------ AC_DEFUN([MCA_opal_memory_malloc_solaris_CONFIG],[ @@ -50,14 +52,14 @@ AC_DEFUN([MCA_opal_memory_malloc_solaris_CONFIG],[ [memory_malloc_solaris_happy="no"])]) AS_IF([test "$memory_malloc_solaris_happy" = "yes"], - [AC_CHECK_HEADER([malloc.h], [], + [AC_CHECK_HEADER([malloc.h], [], [memory_malloc_solaris_happy="no"])]) AS_IF([test "$memory_malloc_solaris_happy" = "yes"], [memory_malloc_solaris_munmap=0 - AC_CHECK_HEADER([sys/syscall.h], - [AC_CHECK_FUNCS([syscall], + AC_CHECK_HEADER([sys/syscall.h], + [AC_CHECK_FUNCS([syscall], [memory_malloc_solaris_munmap=1])]) AC_CHECK_FUNCS([__munmap], [memory_malloc_solaris_munmap=1]) @@ -76,7 +78,7 @@ AC_DEFUN([MCA_opal_memory_malloc_solaris_CONFIG],[ AS_IF([test "$memory_malloc_solaris_munmap" = "0"], [memory_malloc_solaris_happy="no"])]) - # There is a difference in the munmap prototypes for different + # There is a difference in the munmap prototypes for different # Solaris versions. So determine whether we are to use Legacy # S10 or later prototypes. memory_alloc_solaris_legacy=0 @@ -93,8 +95,8 @@ AC_DEFUN([MCA_opal_memory_malloc_solaris_CONFIG],[ [Whether to use the legacy Solaris munmap prototype or not]) ]) - AS_IF([test "$memory_malloc_solaris_happy" = "no" -a \ - "$memory_malloc_solaris_should_use" = "1"], + AS_IF([test "$memory_malloc_solaris_happy" = "no" && \ + test "$memory_malloc_solaris_should_use" = "1"], [AC_MSG_ERROR([malloc_solaris memory management requested but not available. Aborting.])]) AC_SUBST(memory_malloc_solaris_LIBS) @@ -106,7 +108,7 @@ AC_DEFUN([MCA_opal_memory_malloc_solaris_CONFIG],[ [memory_malloc_solaris_happy="no" memory_malloc_solaris_should_use=0]) ;; - esac + esac AS_IF([test "$memory_malloc_solaris_happy" = "yes"], [memory_base_found=1 diff --git a/opal/mca/memory/malloc_solaris/memory_malloc_solaris_component.c b/opal/mca/memory/malloc_solaris/memory_malloc_solaris_component.c index d5629492941..035d14ade41 100644 --- a/opal/mca/memory/malloc_solaris/memory_malloc_solaris_component.c +++ b/opal/mca/memory/malloc_solaris/memory_malloc_solaris_component.c @@ -6,18 +6,20 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2007-2011 Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2009-2011 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2015 Los Alamos National Security, LLC. All rights + * Copyright (c) 2015-2016 Los Alamos National Security, LLC. All rights * reserved. + * Copyright (c) 2016 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -44,6 +46,7 @@ int __munmap(caddr_t addr, size_t len); #endif static int opal_memory_malloc_open(void); +static int opal_memory_malloc_query(int *); const opal_memory_base_component_2_0_0_t mca_memory_malloc_solaris_component = { /* First, the mca_component_t struct containing meta information @@ -66,8 +69,10 @@ const opal_memory_base_component_2_0_0_t mca_memory_malloc_solaris_component = { /* This component doesn't need these functions, but need to provide safe/empty register/deregister functions to call */ + .memoryc_query = opal_memory_malloc_query, .memoryc_register = opal_memory_base_component_register_empty, .memoryc_deregister = opal_memory_base_component_deregister_empty, + .memoryc_set_alignment = opal_memory_base_component_set_alignment_empty, }; /* @@ -90,6 +95,11 @@ opal_memory_malloc_open(void) return OPAL_SUCCESS; } +static int opal_memory_malloc_query (int *priority) +{ + *priority = 79; + return OPAL_SUCCESS; +} /* * Three ways to call munmap. Prefered is to call __munmap, which @@ -120,7 +130,7 @@ munmap(void *addr, size_t len) return syscall(SYS_munmap, addr, len); #elif defined(HAVE_DLSYM) if (NULL == realmunmap) { - union { + union { int (*munmap_fp)(void*, size_t); void *munmap_p; } tmp; diff --git a/opal/mca/memory/memory.h b/opal/mca/memory/memory.h index 6ca9d4b3aa3..b5968fd2640 100644 --- a/opal/mca/memory/memory.h +++ b/opal/mca/memory/memory.h @@ -6,17 +6,19 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2009 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2015 Los Alamos National Security, LLC. All rights + * Copyright (c) 2015-2016 Los Alamos National Security, LLC. All rights * reserved. + * Copyright (c) 2016 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -76,6 +78,12 @@ BEGIN_C_DECLS */ typedef int (*opal_memory_base_component_process_fn_t)(void); +/** + * Prototype for a function that is invoked when the memory base is + * trying to select a component. This funtionality is required. + */ +typedef int (*opal_memory_base_component_query_fn_t)(int *priority); + /** * Prototype for a function that is invoked when Open MPI starts to * "care" about a specific memory region. That is, Open MPI declares @@ -86,13 +94,13 @@ typedef int (*opal_memory_base_component_process_fn_t)(void); * can use the value opal_memory_base_register_empty (an empty * implementation of this function). */ -typedef int (*opal_memory_base_component_register_fn_t)(void *base, +typedef int (*opal_memory_base_component_register_fn_t)(void *base, size_t len, uint64_t cookie); /** - * Prototype for a function that is the opposite of + * Prototype for a function that is the opposite of * opal_memory_base_component_register_fn_t: this function is invoked * when Open MPI stops to "caring" about a specific memory region. * That is, Open MPI declares that it no longer wants to be notified @@ -106,11 +114,22 @@ typedef int (*opal_memory_base_component_register_fn_t)(void *base, * can use the value opal_memory_base_deregister_empty (an empty * implementation of this function). */ -typedef int (*opal_memory_base_component_deregister_fn_t)(void *base, +typedef int (*opal_memory_base_component_deregister_fn_t)(void *base, size_t len, uint64_t cookie); +/** + * Prototype for a function that set the memory alignment + */ +typedef void (*opal_memory_base_component_set_alignment_fn_t)(int use_memalign, + size_t memalign_threshold); + +/** + * Function to be called when initializing malloc hooks + */ +typedef void (*opal_memory_base_component_init_hook_fn_t)(void); + /** * Structure for memory components. */ @@ -120,6 +139,12 @@ typedef struct opal_memory_base_component_2_0_0_t { /** MCA base data */ mca_base_component_data_t memoryc_data; + opal_memory_base_component_query_fn_t memoryc_query; + + /** This function will be called when the malloc hooks are + * initialized. It may be NULL if no hooks are needed. */ + opal_memory_base_component_init_hook_fn_t memoryc_init_hook; + /** Function to call when something has changed, as indicated by opal_memory_changed(). Will be ignored if the component does not provide an opal_memory_changed() macro that returns @@ -132,6 +157,8 @@ typedef struct opal_memory_base_component_2_0_0_t { /** Function invoked when Open MPI stops "caring" about a specific memory region */ opal_memory_base_component_deregister_fn_t memoryc_deregister; + /** Function invoked in order to set malloc'ed memory alignment */ + opal_memory_base_component_set_alignment_fn_t memoryc_set_alignment; } opal_memory_base_component_2_0_0_t; OPAL_DECLSPEC extern opal_memory_base_component_2_0_0_t *opal_memory; diff --git a/opal/mca/memory/patcher/Makefile.am b/opal/mca/memory/patcher/Makefile.am new file mode 100644 index 00000000000..ce4172617f3 --- /dev/null +++ b/opal/mca/memory/patcher/Makefile.am @@ -0,0 +1,32 @@ +# +# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana +# University Research and Technology +# Corporation. All rights reserved. +# Copyright (c) 2004-2005 The University of Tennessee and The University +# of Tennessee Research Foundation. All rights +# reserved. +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# University of Stuttgart. All rights reserved. +# Copyright (c) 2004-2005 The Regents of the University of California. +# All rights reserved. +# Copyright (c) 2009-2010 Cisco Systems, Inc. All rights reserved. +# Copyright (c) 2015 Research Organization for Information Science +# and Technology (RIST). All rights reserved. +# Copyright (c) 2016 Los Alamos National Security, LLC. All rights +# reserved. +# $COPYRIGHT$ +# +# Additional copyrights may follow +# +# $HEADER$ +# + +# This component is only ever built statically (i.e., slurped into +# libopen-pal) -- it is never built as a DSO. +noinst_LTLIBRARIES = libmca_memory_patcher.la +libmca_memory_patcher_la_SOURCES = \ + memory_patcher.h \ + memory_patcher_component.c +libmca_memory_patcher_la_LDFLAGS = \ + -module -avoid-version $(memory_patcher_LDFLAGS) +libmca_memory_patcher_la_LIBADD = $(memory_patcher_LIBS) diff --git a/opal/mca/memory/patcher/configure.m4 b/opal/mca/memory/patcher/configure.m4 new file mode 100644 index 00000000000..6881ec69366 --- /dev/null +++ b/opal/mca/memory/patcher/configure.m4 @@ -0,0 +1,52 @@ +# -*- shell-script -*- +# +# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana +# University Research and Technology +# Corporation. All rights reserved. +# Copyright (c) 2004-2005 The University of Tennessee and The University +# of Tennessee Research Foundation. All rights +# reserved. +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# University of Stuttgart. All rights reserved. +# Copyright (c) 2004-2005 The Regents of the University of California. +# All rights reserved. +# Copyright (c) 2008-2016 Cisco Systems, Inc. All rights reserved. +# Copyright (c) 2015 Research Organization for Information Science +# and Technology (RIST). All rights reserved. +# Copyright (c) 2016 Los Alamos National Security, LLC. All rights +# reserved. +# $COPYRIGHT$ +# +# Additional copyrights may follow +# +# $HEADER$ +# +AC_DEFUN([MCA_opal_memory_patcher_PRIORITY], [41]) + +AC_DEFUN([MCA_opal_memory_patcher_COMPILE_MODE], [ + AC_MSG_CHECKING([for MCA component $2:$3 compile mode]) + $4="static" + AC_MSG_RESULT([$$4]) +]) + + +# MCA_memory_patcher_CONFIG(action-if-can-compile, +# [action-if-cant-compile]) +# ------------------------------------------------ +AC_DEFUN([MCA_opal_memory_patcher_CONFIG],[ + AC_CONFIG_FILES([opal/mca/memory/patcher/Makefile]) + + AC_CHECK_FUNCS([__curbrk]) + + AC_CHECK_HEADERS([linux/mman.h sys/syscall.h]) + + AC_CHECK_DECLS([__mmap], [], [], [#include ]) + + AC_CHECK_FUNCS([__mmap]) + + AC_CHECK_DECLS([__syscall], [], [], [#include ]) + + AC_CHECK_FUNCS([__syscall]) + + [$1] +]) diff --git a/opal/mca/memory/patcher/memory_patcher.h b/opal/mca/memory/patcher/memory_patcher.h new file mode 100644 index 00000000000..1909443c549 --- /dev/null +++ b/opal/mca/memory/patcher/memory_patcher.h @@ -0,0 +1,27 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ +/* + * Copyright (c) 2016 Los Alamos National Security, LLC. All rights + * reserved. + * Copyright (c) 2016 IBM Corporation. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#if !defined(OPAL_MEMORY_PATCHER_H) +#define OPAL_MEMORY_PATCHER_H + +#include "opal_config.h" + +#include "opal/mca/memory/memory.h" +#include "opal/mca/patcher/patcher.h" + +typedef struct opal_memory_patcher_component_t { + opal_memory_base_component_2_0_0_t super; +} opal_memory_patcher_component_t; + +extern opal_memory_patcher_component_t mca_memory_patcher_component; + +#endif /* !defined(OPAL_MEMORY_PATCHER_H) */ diff --git a/opal/mca/memory/patcher/memory_patcher_component.c b/opal/mca/memory/patcher/memory_patcher_component.c new file mode 100644 index 00000000000..c49cb8ce51c --- /dev/null +++ b/opal/mca/memory/patcher/memory_patcher_component.c @@ -0,0 +1,531 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ +/* + * Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2005 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2009-2016 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2013-2017 Los Alamos National Security, LLC. All rights + * reserved. + * Copyright (c) 2016 Research Organization for Information Science + * and Technology (RIST). All rights reserved. + * Copyright (c) 2016 IBM Corporation. All rights reserved. + * + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include "memory_patcher.h" + +#include "opal/util/output.h" +#include "opal/util/show_help.h" +#include "opal/mca/memory/base/empty.h" +#include "opal/mca/memory/base/base.h" +#include "opal/memoryhooks/memory.h" +#include "opal/mca/patcher/base/base.h" + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#if defined(HAVE_SYS_SYSCALL_H) +#include +#endif +#if defined(HAVE_LINUX_MMAN_H) +#include +#endif + +#include "memory_patcher.h" +#undef opal_memory_changed + +static int patcher_open(void); +static int patcher_close(void); +static int patcher_register(void); +static int patcher_query (int *); + +static int mca_memory_patcher_priority; + +opal_memory_patcher_component_t mca_memory_patcher_component = { + .super = { + .memoryc_version = { + OPAL_MEMORY_BASE_VERSION_2_0_0, + + /* Component name and version */ + .mca_component_name = "patcher", + MCA_BASE_MAKE_VERSION(component, OPAL_MAJOR_VERSION, OPAL_MINOR_VERSION, + OPAL_RELEASE_VERSION), + + /* Component open and close functions */ + .mca_open_component = patcher_open, + .mca_close_component = patcher_close, + .mca_register_component_params = patcher_register, + }, + .memoryc_data = { + /* The component is checkpoint ready */ + MCA_BASE_METADATA_PARAM_CHECKPOINT + }, + + /* Memory framework functions. */ + .memoryc_query = patcher_query, + .memoryc_register = opal_memory_base_component_register_empty, + .memoryc_deregister = opal_memory_base_component_deregister_empty, + .memoryc_set_alignment = opal_memory_base_component_set_alignment_empty, + }, + + /* Component-specific data, filled in later (compiler will 0/NULL + it out) */ +}; + +#if HAVE_DECL___SYSCALL && defined(HAVE___SYSCALL) +/* calling __syscall is preferred on some systems when some arguments may be 64-bit. it also + * has the benefit of having an off_t return type */ +#define memory_patcher_syscall __syscall +#else +#define memory_patcher_syscall syscall +#endif + +/* All the hooks in this file have two levels. The first level has the OPAL_PATCHER_* macros + * around the call to the second level. This was done because with xlc the compiler was + * generating an access to r2 before the OPAL_PATCHER_* assembly. This was loading invalid + * data. If this can be resolved the two levels can be joined. + */ + +/* + * The following block of code is #if 0'ed out because we do not need + * to intercept mmap() any more (mmap() only deals with memory + * protection; it does not invalidate any rcache entries for a given + * region). But if we do someday, this is the code that we'll need. + * It's a little non-trivial, so we might as well keep it (and #if 0 + * it out). + */ +#if 0 + +#if defined(HAVE___MMAP) && !HAVE_DECL___MMAP +/* prototype for Apple's internal mmap function */ +void *__mmap (void *start, size_t length, int prot, int flags, int fd, off_t offset); +#endif + +static void *(*original_mmap)(void *, size_t, int, int, int, off_t); + +static void *intercept_mmap(void *start, size_t length, int prot, int flags, int fd, off_t offset) +{ + OPAL_PATCHER_BEGIN; + void *result = 0; + + if (prot == PROT_NONE) { + opal_mem_hooks_release_hook (start, length, true); + } + + if (!original_mmap) { +#ifdef HAVE___MMAP + /* the darwin syscall returns an int not a long so call the underlying __mmap function */ + result = __mmap (start, length, prot, flags, fd, offset); +#else + result = (void*)(intptr_t) memory_patcher_syscall(SYS_mmap, start, length, prot, flags, fd, offset); +#endif + + // I thought we had some issue in the past with the above line for IA32, + // like maybe syscall() wouldn't handle that many arguments. But just now + // I used gcc -m32 and it worked on a recent system. But there's a possibility + // that older ia32 systems may need some other code to make the above syscall. + } else { + result = original_mmap (start, length, prot, flags, fd, offset); + } + + OPAL_PATCHER_END; + return result; +} + +#endif + +static int (*original_munmap) (void *, size_t); + +static int _intercept_munmap(void *start, size_t length) +{ + int result = 0; + + /* could be in a malloc implementation */ + opal_mem_hooks_release_hook (start, length, true); + + if (!original_munmap) { + result = memory_patcher_syscall(SYS_munmap, start, length); + } else { + result = original_munmap (start, length); + } + + return result; +} + +static int intercept_munmap(void *start, size_t length) +{ + OPAL_PATCHER_BEGIN; + int result = _intercept_munmap (start, length); + OPAL_PATCHER_END; + return result; +} + +#if defined (SYS_mremap) + +#if defined(__linux__) +/* on linux this function has an optional extra argument but ... can not be used here because it + * causes issues when intercepting a 4-argument mremap call */ +static void *(*original_mremap) (void *, size_t, size_t, int, void *); +#else +/* mremap has a different signature on BSD systems */ +static void *(*original_mremap) (void *, size_t, void *, size_t, int); +#endif + +#if defined(__linux__) +static void *_intercept_mremap (void *start, size_t oldlen, size_t newlen, int flags, void *new_address) +#else +static void *_intercept_mremap (void *start, size_t oldlen, void *new_address, size_t newlen, int flags) +#endif +{ + void *result = MAP_FAILED; + + if (MAP_FAILED != start && oldlen > 0) { + opal_mem_hooks_release_hook (start, oldlen, true); + } + +#if defined(MREMAP_FIXED) + if (!(flags & MREMAP_FIXED)) { + new_address = NULL; + } +#endif + +#if defined(__linux__) + if (!original_mremap) { + result = (void *)(intptr_t) memory_patcher_syscall (SYS_mremap, start, oldlen, newlen, flags, new_address); + } else { + result = original_mremap (start, oldlen, newlen, flags, new_address); + } +#else + if (!original_mremap) { + result = (void *)(intptr_t) memory_patcher_syscall (SYS_mremap, start, oldlen, new_address, newlen, flags); + } else { + result = original_mremap (start, oldlen, new_address, newlen, flags); + } +#endif + + return result; +} + +#if defined(__linux__) +static void *intercept_mremap (void *start, size_t oldlen, size_t newlen, int flags, void *new_address) +{ + OPAL_PATCHER_BEGIN; + void *result = _intercept_mremap (start, oldlen, newlen, flags, new_address); + OPAL_PATCHER_END; + return result; +} +#else +static void *intercept_mremap (void *start, size_t oldlen, void *new_address, size_t newlen, int flags) +{ + OPAL_PATCHER_BEGIN; + void *result = _intercept_mremap (start, oldlen, new_address, newlen, flags); + OPAL_PATCHER_END; + return result; +} +#endif + +#endif + +#if defined (SYS_madvise) + +static int (*original_madvise) (void *, size_t, int); + +static int _intercept_madvise (void *start, size_t length, int advice) +{ + int result = 0; + + if (advice == MADV_DONTNEED || +#ifdef MADV_REMOVE + advice == MADV_REMOVE || +#endif + advice == POSIX_MADV_DONTNEED) + { + opal_mem_hooks_release_hook (start, length, false); + } + + if (!original_madvise) { + result = memory_patcher_syscall(SYS_madvise, start, length, advice); + } else { + result = original_madvise (start, length, advice); + } + + return result; +} +static int intercept_madvise (void *start, size_t length, int advice) +{ + OPAL_PATCHER_BEGIN; + int result = _intercept_madvise (start, length, advice); + OPAL_PATCHER_END; + return result; +} + +#endif + +#if defined SYS_brk + +#ifdef HAVE___CURBRK +extern void *__curbrk; /* in libc */ +#endif + +static int (*original_brk) (void *); + +static int _intercept_brk (void *addr) +{ + int result = 0; + void *old_addr, *new_addr; + +#ifdef HAVE___CURBRK + old_addr = __curbrk; +#else + old_addr = sbrk (0); +#endif + + if (!original_brk) { + /* get the current_addr */ + new_addr = (void *) (intptr_t) memory_patcher_syscall(SYS_brk, addr); + +#ifdef HAVE___CURBRK + /* + * Note: if we were using glibc brk/sbrk, their __curbrk would get + * updated, but since we're going straight to the syscall, we have + * to update __curbrk or else glibc won't see it. + */ + __curbrk = new_addr; +#endif + } else { + result = original_brk (addr); +#ifdef HAVE___CURBRK + new_addr = __curbrk; +#else + new_addr = sbrk (0); +#endif + } + + if (new_addr < addr) { + errno = ENOMEM; + result = -1; + } else if (new_addr < old_addr) { + opal_mem_hooks_release_hook (new_addr, (intptr_t) old_addr - (intptr_t) new_addr, true); + } + return result; +} + +static int intercept_brk (void *addr) +{ + OPAL_PATCHER_BEGIN; + int result = _intercept_brk (addr); + OPAL_PATCHER_END; + return result; +} + +#endif + +#if defined(SYS_shmdt) && defined(__linux__) + +#include +#include +#include + +static size_t memory_patcher_get_shm_seg_size (const void *shmaddr) +{ + unsigned long start_addr, end_addr; + char *ptr, *newline; + char buffer[1024]; + size_t seg_size = 0; + int fd; + + seg_size = 0; + + fd = open ("/proc/self/maps", O_RDONLY); + if (fd < 0) { + return 0; + } + + for (size_t read_offset = 0 ; ; ) { + ssize_t nread = read(fd, buffer + read_offset, sizeof(buffer) - 1 - read_offset); + if (nread <= 0) { + if (errno == EINTR) { + continue; + } + + break; + } else { + buffer[nread + read_offset] = '\0'; + } + + ptr = buffer; + while ( (newline = strchr(ptr, '\n')) != NULL ) { + /* 00400000-0040b000 r-xp ... \n */ + int ret = sscanf(ptr, "%lx-%lx ", &start_addr, &end_addr); + if (ret != 2) { + continue; + } + + if (start_addr == (uintptr_t)shmaddr) { + seg_size = end_addr - start_addr; + goto out_close; + } + + newline = strchr(ptr, '\n'); + if (newline == NULL) { + break; + } + + ptr = newline + 1; + } + + read_offset = strlen(ptr); + memmove(buffer, ptr, read_offset); + } + + out_close: + close(fd); + return seg_size; +} + +static int (*original_shmdt) (const void *); + +static int _intercept_shmdt (const void *shmaddr) +{ + int result; + + /* opal_mem_hooks_release_hook should probably be updated to take a const void *. + * for now just cast away the const */ + opal_mem_hooks_release_hook ((void *) shmaddr, memory_patcher_get_shm_seg_size (shmaddr), false); + + if (original_shmdt) { + result = original_shmdt (shmaddr); + } else { + result = memory_patcher_syscall (SYS_shmdt, shmaddr); + } + + return result; +} + +static int intercept_shmdt (const void *shmaddr) +{ + OPAL_PATCHER_BEGIN; + int result = _intercept_shmdt (shmaddr); + OPAL_PATCHER_END; + return result; +} +#endif + +static int patcher_register (void) +{ + mca_memory_patcher_priority = 80; + mca_base_component_var_register (&mca_memory_patcher_component.super.memoryc_version, + "priority", "Priority of the patcher memory hook component", + MCA_BASE_VAR_TYPE_INT, NULL, 0, 0, OPAL_INFO_LVL_5, + MCA_BASE_VAR_SCOPE_CONSTANT, &mca_memory_patcher_priority); + + return OPAL_SUCCESS; +} + +static int patcher_query (int *priority) +{ + int rc; + + rc = mca_base_framework_open (&opal_patcher_base_framework, 0); + if (OPAL_SUCCESS != rc) { + *priority = -1; + return OPAL_SUCCESS; + } + + *priority = mca_memory_patcher_priority; + + return OPAL_SUCCESS; +} + +static int patcher_open (void) +{ + static int was_executed_already = 0; + int rc; + + if (was_executed_already) { + return OPAL_SUCCESS; + } + + was_executed_already = 1; + + rc = opal_patcher_base_select (); + if (OPAL_SUCCESS != rc) { + mca_base_framework_close (&opal_patcher_base_framework); + return OPAL_ERR_NOT_AVAILABLE; + } + + /* set memory hooks support level */ + opal_mem_hooks_set_support (OPAL_MEMORY_FREE_SUPPORT | OPAL_MEMORY_MUNMAP_SUPPORT); + +#if 0 + /* See above block to see why mmap() functionality is #if 0'ed + out */ + rc = opal_patcher->patch_symbol ("mmap", (uintptr_t) intercept_mmap, (uintptr_t *) &original_mmap); + if (OPAL_SUCCESS != rc) { + return rc; + } +#endif + + rc = opal_patcher->patch_symbol ("munmap", (uintptr_t)intercept_munmap, (uintptr_t *) &original_munmap); + if (OPAL_SUCCESS != rc) { + return rc; + } + +#if defined (SYS_mremap) + rc = opal_patcher->patch_symbol ("mremap",(uintptr_t)intercept_mremap, (uintptr_t *) &original_mremap); + if (OPAL_SUCCESS != rc) { + return rc; + } +#endif + + /* NTH: we can't currently allow madvise to be intercepted due to a deadlock when running with glibc. in + * the future we may re-enable this hook if the deadlock can be resolved. */ +#if 0 +#if defined (SYS_madvise) + rc = opal_patcher->patch_symbol ("madvise", (uintptr_t)intercept_madvise, (uintptr_t *) &original_madvise); + if (OPAL_SUCCESS != rc) { + return rc; + } +#endif +#endif + +#if defined(SYS_shmdt) && defined(__linux__) + rc = opal_patcher->patch_symbol ("shmdt", (uintptr_t) intercept_shmdt, (uintptr_t *) &original_shmdt); + if (OPAL_SUCCESS != rc) { + return rc; + } +#endif + +#if defined (SYS_brk) + rc = opal_patcher->patch_symbol ("brk", (uintptr_t)intercept_brk, (uintptr_t *) &original_brk); +#endif + + return rc; +} + +static int patcher_close(void) +{ + mca_base_framework_close (&opal_patcher_base_framework); + + /* Note that we don't need to unpatch any symbols here; the + patcher framework will take care of all of that for us. */ + return OPAL_SUCCESS; +} diff --git a/opal/mca/mpool/Makefile.am b/opal/mca/mpool/Makefile.am index 773d96e1a92..27501f794fc 100644 --- a/opal/mca/mpool/Makefile.am +++ b/opal/mca/mpool/Makefile.am @@ -5,15 +5,15 @@ # Copyright (c) 2004-2005 The University of Tennessee and The University # of Tennessee Research Foundation. All rights # reserved. -# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, # University of Stuttgart. All rights reserved. # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. # Copyright (c) 2007-2010 Cisco Systems, Inc. All rights reserved. # $COPYRIGHT$ -# +# # Additional copyrights may follow -# +# # $HEADER$ # diff --git a/opal/mca/mpool/base/Makefile.am b/opal/mca/mpool/base/Makefile.am index 64c3e025584..646444e231d 100644 --- a/opal/mca/mpool/base/Makefile.am +++ b/opal/mca/mpool/base/Makefile.am @@ -5,29 +5,30 @@ # Copyright (c) 2004-2005 The University of Tennessee and The University # of Tennessee Research Foundation. All rights # reserved. -# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, # University of Stuttgart. All rights reserved. # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. # Copyright (c) 2007 Cisco Systems, Inc. All rights reserved. +# Copyright (c) 2015 Los Alamos National Security, LLC. All rights +# reserved. # $COPYRIGHT$ -# +# # Additional copyrights may follow -# +# # $HEADER$ # headers += \ base/base.h \ - base/mpool_base_mem_cb.h \ base/mpool_base_tree.h + libmca_mpool_la_SOURCES += \ base/mpool_base_frame.c \ - base/mpool_base_init.c \ base/mpool_base_lookup.c \ base/mpool_base_alloc.c \ - base/mpool_base_mem_cb.c \ - base/mpool_base_tree.c + base/mpool_base_tree.c \ + base/mpool_base_default.c dist_opaldata_DATA += \ base/help-mpool-base.txt diff --git a/opal/mca/mpool/base/base.h b/opal/mca/mpool/base/base.h index a699e84981b..88a99cad01a 100644 --- a/opal/mca/mpool/base/base.h +++ b/opal/mca/mpool/base/base.h @@ -1,3 +1,4 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ /* * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana * University Research and Technology @@ -5,15 +6,17 @@ * Copyright (c) 2004-2006 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2008-2009 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015-2016 Los Alamos National Security, LLC. All rights + * reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ /** @@ -30,28 +33,10 @@ BEGIN_C_DECLS -static inline unsigned int my_log2(unsigned long val) { - unsigned int count = 0; - while(val > 0) { - val = val >> 1; - count++; - } - return count > 0 ? count-1: 0; -} -static inline void *down_align_addr(void* addr, unsigned int shift) { - return (void*) (((intptr_t) addr) & (~(intptr_t) 0) << shift); -} - -static inline void *up_align_addr(void*addr, unsigned int shift) { - return (void*) ((((intptr_t) addr) | ~((~(intptr_t) 0) << shift))); -} - struct mca_mpool_base_selected_module_t { opal_list_item_t super; mca_mpool_base_component_t *mpool_component; mca_mpool_base_module_t *mpool_module; - void* user_data; - struct mca_mpool_base_resources_t *mpool_resources; }; typedef struct mca_mpool_base_selected_module_t mca_mpool_base_selected_module_t; @@ -65,27 +50,19 @@ OPAL_DECLSPEC OBJ_CLASS_DECLARATION(mca_mpool_base_selected_module_t); * Global functions for MCA: overall mpool open and close */ -OPAL_DECLSPEC int mca_mpool_base_init(bool enable_progress_threads, bool enable_mpi_threads); OPAL_DECLSPEC mca_mpool_base_component_t* mca_mpool_base_component_lookup(const char* name); -OPAL_DECLSPEC mca_mpool_base_module_t* mca_mpool_base_module_create( - const char* name, - void* user_data, - struct mca_mpool_base_resources_t* mpool_resources); OPAL_DECLSPEC mca_mpool_base_module_t* mca_mpool_base_module_lookup(const char* name); -OPAL_DECLSPEC int mca_mpool_base_module_destroy(mca_mpool_base_module_t *module); - + /* * Globals */ extern opal_list_t mca_mpool_base_modules; -OPAL_DECLSPEC extern uint32_t mca_mpool_base_page_size; -OPAL_DECLSPEC extern uint32_t mca_mpool_base_page_size_log; +extern mca_mpool_base_module_t *mca_mpool_base_default_module; +extern int mca_mpool_base_default_priority; -/* only used within base -- no need to DECLSPEC */ -extern int mca_mpool_base_used_mem_hooks; OPAL_DECLSPEC extern mca_base_framework_t opal_mpool_base_framework; - + END_C_DECLS #endif /* MCA_MEM_BASE_H */ diff --git a/opal/mca/mpool/base/help-mpool-base.txt b/opal/mca/mpool/base/help-mpool-base.txt index 42a48c5e7b0..17e01111fdd 100644 --- a/opal/mca/mpool/base/help-mpool-base.txt +++ b/opal/mca/mpool/base/help-mpool-base.txt @@ -2,9 +2,9 @@ # # Copyright (c) 2007-2009 Cisco Systems, Inc. All rights reserved. # $COPYRIGHT$ -# +# # Additional copyrights may follow -# +# # $HEADER$ # [all mem leaks] @@ -30,31 +30,3 @@ PID: %d %d additional leak%s recorded but %s not displayed here. Set the MCA parameter mpi_show_mpi_alloc_mem_leaks to a larger number to see that many leaks, or set it to a negative number to see all leaks. -# -[leave pinned failed] -A process attempted to use the "leave pinned" MPI feature, but no -memory registration hooks were found on the system at run time. This -may be the result of running on a system that does not support memory -hooks or having some other software subvert Open MPI's use of the -memory hooks. You can disable Open MPI's use of memory hooks by -setting both the mpi_leave_pinned and mpi_leave_pinned_pipeline MCA -parameters to 0. - -Open MPI will disable any transports that are attempting to use the -leave pinned functionality; your job may still run, but may fall back -to a slower network transport (such as TCP). - - Mpool name: %s - Process: %s - Local host: %s -# -[cannot deregister in-use memory] -Open MPI intercepted a call to free memory that is still being used by -an ongoing MPI communication. This usually reflects an error in the -MPI application; it may signify memory corruption. Open MPI will now -abort your job. - - Mpool name: %s - Local host: %s - Buffer address: %p - Buffer size: %lu diff --git a/opal/mca/mpool/base/mpool_base_alloc.c b/opal/mca/mpool/base/mpool_base_alloc.c index 6a253d2d87a..605ffbdf280 100644 --- a/opal/mca/mpool/base/mpool_base_alloc.c +++ b/opal/mca/mpool/base/mpool_base_alloc.c @@ -22,16 +22,11 @@ */ #include "opal_config.h" -#ifdef HAVE_STDINT_H #include -#endif -#ifdef HAVE_STRING_H #include -#endif /* HAVE_STRING_H */ #include "opal/mca/mpool/mpool.h" #include "base.h" #include "mpool_base_tree.h" -#include "mpool_base_mem_cb.h" #include "opal/threads/mutex.h" struct opal_info_t { @@ -48,63 +43,24 @@ struct opal_info_t { }; typedef struct opal_info_t opal_info_t; -/** - * Memory Pool Registration - */ - -static void mca_mpool_base_registration_constructor( mca_mpool_base_registration_t * reg ) -{ - reg->mpool = NULL; - reg->base = NULL; - reg->bound = NULL; - reg->alloc_base = NULL; - reg->ref_count = 0; - reg->flags = 0; -} - -static void mca_mpool_base_registration_destructor( mca_mpool_base_registration_t * reg ) -{ - -} - -OBJ_CLASS_INSTANCE( - mca_mpool_base_registration_t, - opal_free_list_item_t, - mca_mpool_base_registration_constructor, - mca_mpool_base_registration_destructor); static void unregister_tree_item(mca_mpool_base_tree_item_t *mpool_tree_item) { mca_mpool_base_module_t *mpool; - mca_mpool_base_registration_t *reg; - int i; - for(i = 1; i < mpool_tree_item->count; i++) { - mpool = mpool_tree_item->mpools[i]; - reg = mpool_tree_item->regs[i]; - if(mpool && mpool->mpool_deregister) { - mpool->mpool_deregister(mpool, reg); - } - } - - mpool = mpool_tree_item->mpools[0]; - reg = mpool_tree_item->regs[0]; - mpool->mpool_free(mpool, mpool_tree_item->key, reg); + mpool = mpool_tree_item->mpool; + mpool->mpool_free(mpool, mpool_tree_item->key); } /** * Function to allocate special memory according to what the user requests in * the info object. * - * If the user passes in a valid info structure then the function will - * try to allocate the memory and register it with every mpool that there is a - * key for it in the info struct. If it fails at registering the memory with - * one of the requested mpools, an error will be returned. Also, if there is a - * key in info that does not match any mpool, an error will be returned. - * * If the info parameter is MPI_INFO_NULL, then this function will try to allocate - * the memory and register it with as many mpools as possible. However, - * if any of the registratons fail the mpool will simply be ignored. + * the memory with the optionally named mpool or malloc and try to register the + * pointer with as many registration caches as possible. Registration caches that + * fail to register the region will be ignored. The mpool name can optionally be + * specified in the info object. * * @param size the size of the memory area to allocate * @param info an info object which tells us what kind of memory to allocate @@ -112,176 +68,37 @@ static void unregister_tree_item(mca_mpool_base_tree_item_t *mpool_tree_item) * @retval pointer to the allocated memory * @retval NULL on failure */ -void *mca_mpool_base_alloc(size_t size, opal_info_t *info) +void *mca_mpool_base_alloc(size_t size, opal_info_t *info, const char *hints) { - opal_list_item_t * item; - int num_modules = opal_list_get_size(&mca_mpool_base_modules); - int reg_module_num = 0, i; - mca_mpool_base_selected_module_t * current; - mca_mpool_base_selected_module_t * no_reg_function = NULL; - mca_mpool_base_selected_module_t ** has_reg_function = NULL; - mca_mpool_base_registration_t * registration; - mca_mpool_base_tree_item_t* mpool_tree_item = NULL; + mca_mpool_base_tree_item_t *mpool_tree_item = NULL; mca_mpool_base_module_t *mpool; - void * mem = NULL; + void *mem = NULL; #if defined(TODO_BTL_GB) int flag = 0; - bool match_found = false; #endif /* defined(TODO_BTL_GB) */ - bool mpool_requested = false; - if(num_modules > 0) { - has_reg_function = (mca_mpool_base_selected_module_t **) - malloc(num_modules * sizeof(mca_mpool_base_module_t *)); - if(!has_reg_function) - goto out; + mpool_tree_item = mca_mpool_base_tree_item_get (); + if (!mpool_tree_item) { + return NULL; } - mpool_tree_item = mca_mpool_base_tree_item_get(); - - if(!mpool_tree_item) - goto out; - mpool_tree_item->num_bytes = size; mpool_tree_item->count = 0; -#if defined(TODO_BTL_GB) - if(&ompi_mpi_info_null.info == info) -#endif /* defined(TODO_BTL_GB) */ - { - for(item = opal_list_get_first(&mca_mpool_base_modules); - item != opal_list_get_end(&mca_mpool_base_modules); - item = opal_list_get_next(item)) { - current = ((mca_mpool_base_selected_module_t *) item); - if(current->mpool_module->flags & MCA_MPOOL_FLAGS_MPI_ALLOC_MEM) { - if(NULL == current->mpool_module->mpool_register){ - no_reg_function = current; - } - else { - has_reg_function[reg_module_num++] = current; - } - } - } - } -#if defined(TODO_BTL_GB) - else - { - int num_keys; - char key[MPI_MAX_INFO_KEY + 1]; - char value[MPI_MAX_INFO_VAL + 1]; - - ompi_info_get_nkeys(info, &num_keys); - for(i = 0; i < num_keys; i++) - { - ompi_info_get_nthkey(info, i, key); - if ( 0 != strcmp(key, "mpool") ) { - continue; - } - mpool_requested = true; - ompi_info_get(info, key, MPI_MAX_INFO_VAL, value, &flag); - if ( !flag ) { - continue; - } - match_found = false; - for(item = opal_list_get_first(&mca_mpool_base_modules); - item != opal_list_get_end(&mca_mpool_base_modules); - item = opal_list_get_next(item)) - { - current = ((mca_mpool_base_selected_module_t *)item); - if(0 == strcmp(value, - current->mpool_module->mpool_component->mpool_version.mca_component_name)) - { - match_found = true; - if(NULL == current->mpool_module->mpool_register) - { - if(NULL != no_reg_function) - { - /* there was more than one requested mpool that lacks - * a registration function, so return failure */ - goto out; - } - no_reg_function = current; - } - else - { - has_reg_function[reg_module_num++] = current; - } - } - } - if(!match_found) - { - /* one of the keys given to us by the user did not match any - * mpools, so return an error */ - goto out; - } - } + mpool = mca_mpool_base_module_lookup (hints); + if (NULL != mpool) { + mem = mpool->mpool_alloc (mpool, size, 0, 0); } -#endif /* defined(TODO_BTL_GB) */ - - if(NULL == no_reg_function && 0 == reg_module_num) - { - if(!mpool_requested) - { - /* if the info argument was NULL and there were no useable mpools - * or there user provided info object but did not specifiy a "mpool" key, - * just malloc the memory and return it */ - mem = malloc(size); - goto out; - } - - /* the user passed info but we were not able to use any of the mpools - * specified */ - goto out; - } - - for(i = -1; i < reg_module_num; i++) { - if(-1 == i) { - if(NULL != no_reg_function) - mpool = no_reg_function->mpool_module; - else - continue; - } else { - mpool = has_reg_function[i]->mpool_module; - } - if(NULL == mem) { - mem = mpool->mpool_alloc(mpool, size, 0, MCA_MPOOL_FLAGS_PERSIST, - ®istration); - if(NULL == mem) { - if(mpool_requested) - goto out; - continue; - } - mpool_tree_item->key = mem; - mpool_tree_item->mpools[mpool_tree_item->count] = mpool; - mpool_tree_item->regs[mpool_tree_item->count++] = registration; - } else { - if(mpool->mpool_register(mpool, mem, size, MCA_MPOOL_FLAGS_PERSIST, - ®istration) != OPAL_SUCCESS) { - if(mpool_requested) { - unregister_tree_item(mpool_tree_item); - goto out; - } - continue; - } - mpool_tree_item->mpools[mpool_tree_item->count] = mpool; - mpool_tree_item->regs[mpool_tree_item->count++] = registration; - } - } - - if(NULL == mem) { + if (NULL == mem) { + /* fall back on malloc */ mem = malloc(size); - goto out; - } - - mca_mpool_base_tree_insert(mpool_tree_item); - mpool_tree_item = NULL; /* prevent it to be deleted below */ -out: - if(mpool_tree_item) - mca_mpool_base_tree_item_put(mpool_tree_item); - if(has_reg_function) - free(has_reg_function); + mca_mpool_base_tree_item_put (mpool_tree_item); + } else { + mpool_tree_item->mpool = mpool; + mca_mpool_base_tree_insert (mpool_tree_item); + } return mem; } @@ -305,7 +122,7 @@ int mca_mpool_base_free(void *base) mpool_tree_item = mca_mpool_base_tree_find(base); - if(!mpool_tree_item) { + if(!mpool_tree_item) { /* nothing in the tree this was just plain old malloc'd memory */ free(base); return OPAL_SUCCESS; @@ -316,6 +133,6 @@ int mca_mpool_base_free(void *base) unregister_tree_item(mpool_tree_item); mca_mpool_base_tree_item_put(mpool_tree_item); } - + return rc; } diff --git a/opal/mca/mpool/base/mpool_base_default.c b/opal/mca/mpool/base/mpool_base_default.c new file mode 100644 index 00000000000..45d592b03e4 --- /dev/null +++ b/opal/mca/mpool/base/mpool_base_default.c @@ -0,0 +1,87 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ +/* + * Copyright (c) 2015 Los Alamos National Security, LLC. All rights + * reserved. + * Copyright (c) 2016 Intel, Inc. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include "opal_config.h" +#include "opal/align.h" + +#include +#include +#ifdef HAVE_UNISTD_H +#include +#endif /* HAVE_UNISTD_H */ + +#include "opal/mca/mca.h" +#include "opal/mca/base/base.h" +#include "opal/mca/mpool/base/base.h" +#include "opal/constants.h" +#include "opal/util/sys_limits.h" + +static void *mca_mpool_default_alloc (mca_mpool_base_module_t *mpool, size_t size, + size_t align, uint32_t flags) +{ +#if HAVE_POSIX_MEMALIGN + void *addr = NULL; + + (void) posix_memalign (&addr, align, size); + return addr; +#else + void *addr, *ret; + + addr = malloc (size + align + sizeof (void *)); + ret = OPAL_ALIGN_PTR((intptr_t) addr + 8, align, void *); + *((void **) ret - 1) = addr; + return ret; +#endif +} + +static void *mca_mpool_default_realloc (mca_mpool_base_module_t *mpool, void *addr, size_t size) +{ +#if HAVE_POSIX_MEMALIGN + return realloc (addr, size); +#else + if (NULL != addr) { + void *base = *((void **) addr - 1); + void *ptr = realloc (base, size + (intptr_t) addr - (intptr_t) - size); + void *ret = (void *)((intptr_t) ptr + (intptr_t) addr - (intptr_t) - size); + *((void **) ret - 1) = ptr; + return ret; + } else { + return mca_mpool_default_alloc (mpool, size, 8, 0); + } +#endif +} + +static void mca_mpool_default_free (mca_mpool_base_module_t *mpool, void *addr) +{ +#if HAVE_POSIX_MEMALIGN + free (addr); +#else + if (NULL != addr) { + void *base = *((void **) addr - 1); + free (base); + } +#endif +} + +static void mca_mpool_default_finalize (struct mca_mpool_base_module_t *mpool) +{ +} + +static mca_mpool_base_module_t mca_mpool_malloc_module = { + .mpool_alloc = mca_mpool_default_alloc, + .mpool_realloc = mca_mpool_default_realloc, + .mpool_free = mca_mpool_default_free, + .mpool_finalize = mca_mpool_default_finalize, + .flags = MCA_MPOOL_FLAGS_MPI_ALLOC_MEM, +}; + +mca_mpool_base_module_t *mca_mpool_base_default_module = &mca_mpool_malloc_module; diff --git a/opal/mca/mpool/base/mpool_base_frame.c b/opal/mca/mpool/base/mpool_base_frame.c index ea9cd6ca805..1c3002d587d 100644 --- a/opal/mca/mpool/base/mpool_base_frame.c +++ b/opal/mca/mpool/base/mpool_base_frame.c @@ -1,3 +1,4 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ /* * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana * University Research and Technology @@ -5,7 +6,7 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. @@ -13,10 +14,12 @@ * Copyright (c) 2013 NVIDIA Corporation. All rights reserved. * Copyright (c) 2014-2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. + * Copyright (c) 2015-2016 Los Alamos National Security, LLC. All rights + * reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -25,14 +28,12 @@ #include #include #ifdef HAVE_UNISTD_H -#include +#include #endif /* HAVE_UNISTD_H */ #include "opal/mca/mca.h" #include "opal/mca/base/base.h" -#include "opal/memoryhooks/memory.h" #include "opal/mca/mpool/base/base.h" -#include "mpool_base_mem_cb.h" #include "opal/constants.h" #include "opal/util/sys_limits.h" @@ -48,13 +49,33 @@ * Global variables */ -/* whether we actually used the mem hooks or not */ -int mca_mpool_base_used_mem_hooks = 0; +opal_list_t mca_mpool_base_modules = {{0}}; +static char *mca_mpool_base_default_hints; + +int mca_mpool_base_default_priority = 50; -uint32_t mca_mpool_base_page_size = 0; -uint32_t mca_mpool_base_page_size_log = 0; +OBJ_CLASS_INSTANCE(mca_mpool_base_selected_module_t, opal_list_item_t, NULL, NULL); -opal_list_t mca_mpool_base_modules = {{0}}; +static int mca_mpool_base_register (mca_base_register_flag_t flags) +{ + mca_mpool_base_default_hints = NULL; + (void) mca_base_var_register ("opal", "mpool", "base", "default_hints", + "Hints to use when selecting the default memory pool", + MCA_BASE_VAR_TYPE_STRING, NULL, 0, + MCA_BASE_VAR_FLAG_INTERNAL, + OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_LOCAL, + &mca_mpool_base_default_hints); + + mca_mpool_base_default_priority = 50; + (void) mca_base_var_register ("opal", "mpool", "base", "default_priority", + "Priority of the default mpool module", + MCA_BASE_VAR_TYPE_INT, NULL, 0, + MCA_BASE_VAR_FLAG_INTERNAL, + OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_LOCAL, + &mca_mpool_base_default_priority); + + return OPAL_SUCCESS; +} /** * Function for finding and opening either all MCA components, or the one @@ -64,23 +85,22 @@ static int mca_mpool_base_open(mca_base_open_flag_t flags) { /* Open up all available components - and populate the opal_mpool_base_framework.framework_components list */ - if (OPAL_SUCCESS != + if (OPAL_SUCCESS != mca_base_framework_components_open(&opal_mpool_base_framework, flags)) { return OPAL_ERROR; } - + + if (mca_mpool_base_default_hints) { + mca_mpool_base_default_module = mca_mpool_base_module_lookup (mca_mpool_base_default_hints); + } + /* Initialize the list so that in mca_mpool_base_close(), we can iterate over it (even if it's empty, as in the case of opal_info) */ - OBJ_CONSTRUCT(&mca_mpool_base_modules, opal_list_t); - - /* get the page size for this architecture*/ - mca_mpool_base_page_size = opal_getpagesize(); - mca_mpool_base_page_size_log = my_log2(mca_mpool_base_page_size); - /* setup tree for tracking MPI_Alloc_mem */ + /* setup tree for tracking MPI_Alloc_mem */ mca_mpool_base_tree_init(); - + return OPAL_SUCCESS; } @@ -88,12 +108,6 @@ static int mca_mpool_base_close(void) { opal_list_item_t *item; mca_mpool_base_selected_module_t *sm; - int32_t modules_length; - - /* Need the initial length in order to know if some of the initializations - * are done in the open function. - */ - modules_length = opal_list_get_size(&mca_mpool_base_modules); /* Finalize all the mpool components and free their list items */ @@ -115,15 +129,8 @@ static int mca_mpool_base_close(void) OMPI RTE program, or [possibly] multiple if this is opal_info) */ (void) mca_base_framework_components_close(&opal_mpool_base_framework, NULL); - /* deregister memory free callback */ - if( (modules_length > 0) && mca_mpool_base_used_mem_hooks && - 0 != (OPAL_MEMORY_FREE_SUPPORT & opal_mem_hooks_support_level())) { - opal_mem_hooks_unregister_release(mca_mpool_base_mem_cb); - } - /* All done */ - return OPAL_SUCCESS; } -MCA_BASE_FRAMEWORK_DECLARE(opal, mpool, NULL, NULL, mca_mpool_base_open, +MCA_BASE_FRAMEWORK_DECLARE(opal, mpool, "Memory pools", mca_mpool_base_register, mca_mpool_base_open, mca_mpool_base_close, mca_mpool_base_static_components, 0); diff --git a/opal/mca/mpool/base/mpool_base_init.c b/opal/mca/mpool/base/mpool_base_init.c deleted file mode 100644 index c462d85714e..00000000000 --- a/opal/mca/mpool/base/mpool_base_init.c +++ /dev/null @@ -1,43 +0,0 @@ -/* - * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2005 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#include "opal_config.h" - -#include "opal/mca/mca.h" -#include "opal/mca/base/base.h" -#include "opal/mca/mpool/base/base.h" - -OBJ_CLASS_INSTANCE(mca_mpool_base_selected_module_t, opal_list_item_t, NULL, NULL); -static bool mca_mpool_enable_progress_threads = true; -static bool mca_mpool_enable_mpi_thread_multiple = true; - -/** - * Function for weeding out mpool modules that don't want to run. - * - * Call the init function on all available components to find out if they - * want to run. Select all components that don't fail. Failing modules - * will be closed and unloaded. The selected modules will be returned - * to the caller in a opal_list_t. - */ -int mca_mpool_base_init(bool enable_progress_threads, bool enable_mpi_thread_multiple) -{ - mca_mpool_enable_progress_threads = enable_progress_threads; - mca_mpool_enable_mpi_thread_multiple = enable_mpi_thread_multiple; - return OPAL_SUCCESS; -} - diff --git a/opal/mca/mpool/base/mpool_base_lookup.c b/opal/mca/mpool/base/mpool_base_lookup.c index 88f91ef3656..fa0e0ce34af 100644 --- a/opal/mca/mpool/base/mpool_base_lookup.c +++ b/opal/mca/mpool/base/mpool_base_lookup.c @@ -1,4 +1,4 @@ -/* -*- Mode: C; c-basic-offset:4 ; -*- */ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ /* * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana * University Research and Technology @@ -6,17 +6,17 @@ * Copyright (c) 2004-2013 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2006-2007 Mellanox Technologies. All rights reserved. * Copyright (c) 2008-2014 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2012 Los Alamos National Security, LLC. All rights reserved. + * Copyright (c) 2012-2016 Los Alamos National Security, LLC. All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -30,105 +30,19 @@ #include "opal/mca/base/base.h" #include "opal/util/show_help.h" #include "opal/util/proc.h" -#include "opal/runtime/opal_params.h" #include "opal/mca/mpool/mpool.h" #include "opal/mca/mpool/base/base.h" -#include "opal/memoryhooks/memory.h" -#include "mpool_base_mem_cb.h" -mca_mpool_base_component_t* mca_mpool_base_component_lookup(const char* name) +mca_mpool_base_component_t* mca_mpool_base_component_lookup(const char *name) { - /* Traverse the list of available modules; call their init functions. */ - opal_list_item_t* item; - for (item = opal_list_get_first(&opal_mpool_base_framework.framework_components); - item != opal_list_get_end(&opal_mpool_base_framework.framework_components); - item = opal_list_get_next(item)) { - mca_base_component_list_item_t *cli = - (mca_base_component_list_item_t *) item; - mca_mpool_base_component_t* component = - (mca_mpool_base_component_t *) cli->cli_component; - if(strcmp(component->mpool_version.mca_component_name, name) == 0) { - return component; - } - } - return NULL; -} - - -mca_mpool_base_module_t* mca_mpool_base_module_create( - const char* name, - void* user_data, - struct mca_mpool_base_resources_t* resources) -{ - mca_mpool_base_component_t* component = NULL; - mca_mpool_base_module_t* module = NULL; mca_base_component_list_item_t *cli; - mca_mpool_base_selected_module_t *sm; + /* Traverse the list of available modules; call their init functions. */ OPAL_LIST_FOREACH(cli, &opal_mpool_base_framework.framework_components, mca_base_component_list_item_t) { - component = (mca_mpool_base_component_t *) cli->cli_component; - if(0 == strcmp(component->mpool_version.mca_component_name, name)) { - module = component->mpool_init(resources); - break; - } - } - - if ( NULL == module ) { - return NULL; - } - sm = OBJ_NEW(mca_mpool_base_selected_module_t); - sm->mpool_component = component; - sm->mpool_module = module; - sm->user_data = user_data; - sm->mpool_resources = resources; - opal_list_append(&mca_mpool_base_modules, (opal_list_item_t*) sm); - /* on the very first creation of a module we init the memory - callback */ - if (opal_list_get_size(&mca_mpool_base_modules) == 1) { - /* Default to not using memory hooks */ - int use_mem_hooks = 0; - - /* Use the memory hooks if leave_pinned or - leave_pinned_pipeline is enabled (note that either of these - leave_pinned variables may have been set by a user MCA - param or elsewhere in the code base). Yes, we could have - coded this more succinctly, but this is more clear. Do not - check memory hooks if the mpool explicity asked us not to. */ - if ((opal_leave_pinned > 0 || opal_leave_pinned_pipeline) && - !(module->flags & MCA_MPOOL_FLAGS_NO_HOOKS)) { - use_mem_hooks = 1; - } - - if (use_mem_hooks) { - if ((OPAL_MEMORY_FREE_SUPPORT | OPAL_MEMORY_MUNMAP_SUPPORT) == - ((OPAL_MEMORY_FREE_SUPPORT | OPAL_MEMORY_MUNMAP_SUPPORT) & - opal_mem_hooks_support_level())) { - opal_mem_hooks_register_release(mca_mpool_base_mem_cb, NULL); - } else { - opal_show_help("help-mpool-base.txt", "leave pinned failed", - true, name, OPAL_NAME_PRINT(OPAL_PROC_MY_NAME), - opal_proc_local_get()->proc_hostname); - return NULL; - } - - /* Set this to true so that mpool_base_close knows to - cleanup */ - mca_mpool_base_used_mem_hooks = 1; - } - } - return module; -} - - -mca_mpool_base_module_t* mca_mpool_base_module_lookup(const char* name) -{ - mca_mpool_base_selected_module_t *mli; - - OPAL_LIST_FOREACH(mli, &mca_mpool_base_modules, mca_mpool_base_selected_module_t) { - if(0 == strcmp(mli->mpool_component->mpool_version.mca_component_name, - name)) { - return mli->mpool_module; + mca_mpool_base_component_t* component = (mca_mpool_base_component_t *) cli->cli_component; + if (strcmp(component->mpool_version.mca_component_name, name) == 0) { + return component; } } @@ -136,20 +50,27 @@ mca_mpool_base_module_t* mca_mpool_base_module_lookup(const char* name) } -int mca_mpool_base_module_destroy(mca_mpool_base_module_t *module) + +mca_mpool_base_module_t *mca_mpool_base_module_lookup (const char *hints) { - mca_mpool_base_selected_module_t *sm, *next; + mca_mpool_base_module_t *best_module = mca_mpool_base_default_module; + mca_base_component_list_item_t *cli; + int best_priority = mca_mpool_base_default_priority; + int rc; - OPAL_LIST_FOREACH_SAFE(sm, next, &mca_mpool_base_modules, mca_mpool_base_selected_module_t) { - if (module == sm->mpool_module) { - opal_list_remove_item(&mca_mpool_base_modules, (opal_list_item_t*)sm); - if (NULL != sm->mpool_module->mpool_finalize) { - sm->mpool_module->mpool_finalize(sm->mpool_module); - } - OBJ_RELEASE(sm); - return OPAL_SUCCESS; - } + OPAL_LIST_FOREACH(cli, &opal_mpool_base_framework.framework_components, mca_base_component_list_item_t) { + mca_mpool_base_component_t *component = (mca_mpool_base_component_t *) cli->cli_component; + mca_mpool_base_module_t *module; + int priority; + + rc = component->mpool_query (hints, &priority, &module); + if (OPAL_SUCCESS == rc) { + if (priority > best_priority) { + best_priority = priority; + best_module = module; + } + } } - return OPAL_ERR_NOT_FOUND; + return best_module; } diff --git a/opal/mca/mpool/base/mpool_base_mem_cb.c b/opal/mca/mpool/base/mpool_base_mem_cb.c deleted file mode 100644 index 4292bbf3991..00000000000 --- a/opal/mca/mpool/base/mpool_base_mem_cb.c +++ /dev/null @@ -1,101 +0,0 @@ -/* -*- Mode: C; c-basic-offset:4 ; -*- */ -/* - * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2007 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * Copyright (c) 2009 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2012 Los Alamos National Security, LLC. - * All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ -/** - * @file - */ -#include "opal_config.h" - -#ifdef HAVE_UNISTD_H -#include -#endif - -#include "opal/util/show_help.h" -#include "opal/util/proc.h" -#include "opal/runtime/opal_params.h" - -#include "opal/mca/mpool/base/mpool_base_mem_cb.h" -#include "opal/mca/mpool/base/base.h" - -#include "opal/mca/mca.h" -#include "opal/memoryhooks/memory.h" - -static char msg[512]; - - -/* - * memory hook callback, called when memory is free'd out from under - * us. Be wary of the from_alloc flag -- if you're called with - * from_alloc==true, then you cannot call malloc (or any of its - * friends)! - */ -void mca_mpool_base_mem_cb(void* base, size_t size, void* cbdata, - bool from_alloc) -{ - mca_mpool_base_selected_module_t* current; - int rc; - opal_list_item_t* item; - - /* Only do anything meaningful if the OPAL layer is up and running - and size != 0 */ - if ((from_alloc && (!opal_initialized)) || - size == 0) { - return; - } - - for(item = opal_list_get_first(&mca_mpool_base_modules); - item != opal_list_get_end(&mca_mpool_base_modules); - item = opal_list_get_next(item)) { - - current = (mca_mpool_base_selected_module_t*) item; - - if(current->mpool_module->mpool_release_memory != NULL) { - rc = current->mpool_module->mpool_release_memory(current->mpool_module, - base, size); - - if (rc != OPAL_SUCCESS) { - if (from_alloc) { - int len; - len = snprintf(msg, sizeof(msg), "[%s:%d] Attempt to free memory that is still in use by an ongoing MPI communication (buffer %p, size %lu). MPI job will now abort.\n", - opal_proc_local_get()->proc_hostname, - getpid(), - base, (unsigned long) size); - msg[sizeof(msg) - 1] = '\0'; - write(2, msg, len); - } else { - opal_show_help("help-mpool-base.txt", - "cannot deregister in-use memory", true, - current->mpool_component->mpool_version.mca_component_name, - opal_proc_local_get()->proc_hostname, - base, (unsigned long) size); - } - - /* We're in a callback from somewhere; we can't do - anything meaningful to pass an error back up. :-( - So just exit. Call _exit() so that we don't try to - call anything on the way out -- just exit! - (remember that we're in a callback, and state may - be very undefined at this point...) */ - _exit(1); - } - } - } -} diff --git a/opal/mca/mpool/base/mpool_base_mem_cb.h b/opal/mca/mpool/base/mpool_base_mem_cb.h deleted file mode 100644 index 5abf51935a7..00000000000 --- a/opal/mca/mpool/base/mpool_base_mem_cb.h +++ /dev/null @@ -1,39 +0,0 @@ -/* - * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2005 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ -/** - * @file - */ -#ifndef MCA_MPOOL_BASE_MEM_CB_H -#define MCA_MPOOL_BASE_MEM_CB_H - -#include "opal_config.h" - -BEGIN_C_DECLS - -/* - * memory hook callback, called when memory is free'd out from under us - */ -void mca_mpool_base_mem_cb(void* base, size_t size, void* cbdata, - bool from_alloc); - -END_C_DECLS - -#endif /* MCA_MPOOL_BASE_MEM_CB_H */ - - - diff --git a/opal/mca/mpool/base/mpool_base_tree.c b/opal/mca/mpool/base/mpool_base_tree.c index 8fc3a47c9ea..5f306200625 100644 --- a/opal/mca/mpool/base/mpool_base_tree.c +++ b/opal/mca/mpool/base/mpool_base_tree.c @@ -6,7 +6,7 @@ * Copyright (c) 2004-2013 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved.5A @@ -18,15 +18,11 @@ * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ -/** - * @file - * Description of the Registration Cache framework - */ #include "opal_config.h" @@ -47,15 +43,15 @@ static void action(void *key, void *value); OBJ_CLASS_INSTANCE(mca_mpool_base_tree_item_t, opal_free_list_item_t, NULL, NULL); -/* - * use globals for the tree and the tree_item free list.. +/* + * use globals for the tree and the tree_item free list.. */ -opal_rb_tree_t mca_mpool_base_tree = {{0}}; +opal_rb_tree_t mca_mpool_base_tree = {{0}}; opal_free_list_t mca_mpool_base_tree_item_free_list = {{{0}}}; static opal_mutex_t tree_lock; /* - * simple minded compare function... + * simple minded compare function... */ int mca_mpool_base_tree_node_compare(void * key1, void * key2) { @@ -76,18 +72,18 @@ int mca_mpool_base_tree_node_compare(void * key1, void * key2) /* * initialize the rb tree */ -int mca_mpool_base_tree_init(void) { +int mca_mpool_base_tree_init(void) { int rc; - OBJ_CONSTRUCT(&mca_mpool_base_tree, opal_rb_tree_t); + OBJ_CONSTRUCT(&mca_mpool_base_tree, opal_rb_tree_t); OBJ_CONSTRUCT(&mca_mpool_base_tree_item_free_list, opal_free_list_t); OBJ_CONSTRUCT(&tree_lock, opal_mutex_t); rc = opal_free_list_init (&mca_mpool_base_tree_item_free_list, - sizeof(mca_mpool_base_tree_item_t), + sizeof(mca_mpool_base_tree_item_t), opal_cache_line_size, - OBJ_CLASS(mca_mpool_base_tree_item_t), + OBJ_CLASS(mca_mpool_base_tree_item_t), 0,opal_cache_line_size, 0, -1 , 4, NULL, 0, NULL, NULL, NULL); - if(OPAL_SUCCESS == rc) { + if(OPAL_SUCCESS == rc) { rc = opal_rb_tree_init(&mca_mpool_base_tree, mca_mpool_base_tree_node_compare); } return rc; @@ -104,10 +100,10 @@ int mca_mpool_base_tree_fini(void) return OPAL_SUCCESS; } -/* - * insert an item in the rb tree - */ -int mca_mpool_base_tree_insert(mca_mpool_base_tree_item_t* item) { +/* + * insert an item in the rb tree + */ +int mca_mpool_base_tree_insert(mca_mpool_base_tree_item_t* item) { int rc; OPAL_THREAD_LOCK(&tree_lock); @@ -117,17 +113,17 @@ int mca_mpool_base_tree_insert(mca_mpool_base_tree_item_t* item) { return rc; } -/* - * remove an item from the rb tree +/* + * remove an item from the rb tree * Does not put the item back onto the free list. That * must be done separately by calling mca_mpool_base_tree_item_put. - * This allows a caller to remove an item from the tree + * This allows a caller to remove an item from the tree * before safely cleaning up the item and only then returning it * to the free list. If the item is returned to the free list too soon * race conditions can occur * */ -int mca_mpool_base_tree_delete(mca_mpool_base_tree_item_t* item) { +int mca_mpool_base_tree_delete(mca_mpool_base_tree_item_t* item) { int rc; OPAL_THREAD_LOCK(&tree_lock); @@ -138,7 +134,7 @@ int mca_mpool_base_tree_delete(mca_mpool_base_tree_item_t* item) { } /** - * find the item in the rb tree + * find the item in the rb tree */ mca_mpool_base_tree_item_t* mca_mpool_base_tree_find(void* base) { mca_mpool_base_tree_item_t* item; @@ -150,11 +146,11 @@ mca_mpool_base_tree_item_t* mca_mpool_base_tree_find(void* base) { return item; } - -/* - * get a tree item from the free list + +/* + * get a tree item from the free list */ -mca_mpool_base_tree_item_t* mca_mpool_base_tree_item_get(void) { +mca_mpool_base_tree_item_t* mca_mpool_base_tree_item_get(void) { return (mca_mpool_base_tree_item_t *) opal_free_list_get (&mca_mpool_base_tree_item_free_list); } @@ -162,7 +158,7 @@ mca_mpool_base_tree_item_t* mca_mpool_base_tree_item_get(void) { /* * put an item back into the free list */ -void mca_mpool_base_tree_item_put(mca_mpool_base_tree_item_t* item) { +void mca_mpool_base_tree_item_put(mca_mpool_base_tree_item_t* item) { opal_free_list_return (&mca_mpool_base_tree_item_free_list, &item->super); } @@ -182,6 +178,9 @@ void mca_mpool_base_tree_print(int show_up_to_mem_leaks) num_leaks = 0; max_mem_leaks = show_up_to_mem_leaks; opal_rb_tree_traverse(&mca_mpool_base_tree, condition, action); + if (0 == num_leaks) { + return; + } if (num_leaks <= show_up_to_mem_leaks || show_up_to_mem_leaks < 0) { diff --git a/opal/mca/mpool/base/mpool_base_tree.h b/opal/mca/mpool/base/mpool_base_tree.h index 0742f448679..2a31175a77f 100644 --- a/opal/mca/mpool/base/mpool_base_tree.h +++ b/opal/mca/mpool/base/mpool_base_tree.h @@ -6,7 +6,7 @@ * Copyright (c) 2004-2006 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2006 The Regents of the University of California. * All rights reserved. @@ -14,9 +14,9 @@ * Copyright (c) 2015 Los Alamos National Security, LLC. All rights * reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -28,12 +28,13 @@ #define MCA_MPOOL_BASE_TREE_MAX 8 #include "opal/mca/mca.h" #include "opal/mca/mpool/mpool.h" +#include "opal/mca/rcache/rcache.h" BEGIN_C_DECLS /* * Data structures for the tree of allocated memory - * used for MPI_Alloc_mem and MPI_Free_mem + * used for MPI_Alloc_mem and MPI_Free_mem */ /** @@ -46,8 +47,9 @@ struct mca_mpool_base_tree_item_t size_t num_bytes; /**< the number of bytes in this alloc, only for debugging reporting with mpi_show_mpi_alloc_mem_leaks */ - mca_mpool_base_module_t* mpools[MCA_MPOOL_BASE_TREE_MAX]; /**< the mpools */ - mca_mpool_base_registration_t* regs[MCA_MPOOL_BASE_TREE_MAX]; /**< the registrations */ + mca_mpool_base_module_t *mpool; + mca_rcache_base_module_t *rcaches[MCA_MPOOL_BASE_TREE_MAX]; /**< the registration caches */ + mca_rcache_base_registration_t *regs[MCA_MPOOL_BASE_TREE_MAX]; /**< the registrations */ uint8_t count; /**< length of the mpools/regs array */ }; typedef struct mca_mpool_base_tree_item_t mca_mpool_base_tree_item_t; @@ -57,34 +59,34 @@ OPAL_DECLSPEC OBJ_CLASS_DECLARATION(mca_mpool_base_tree_item_t); /* * initialize/finalize the rb tree */ -int mca_mpool_base_tree_init(void); +int mca_mpool_base_tree_init(void); int mca_mpool_base_tree_fini(void); -/* - * insert an item in the rb tree - */ -int mca_mpool_base_tree_insert(mca_mpool_base_tree_item_t* item); +/* + * insert an item in the rb tree + */ +int mca_mpool_base_tree_insert(mca_mpool_base_tree_item_t* item); -/* - * remove an item from the rb tree +/* + * remove an item from the rb tree */ -int mca_mpool_base_tree_delete(mca_mpool_base_tree_item_t* item); +int mca_mpool_base_tree_delete(mca_mpool_base_tree_item_t* item); /** - * find the item in the rb tree + * find the item in the rb tree */ mca_mpool_base_tree_item_t* mca_mpool_base_tree_find(void* base); -/* - * get a tree item from the free list +/* + * get a tree item from the free list */ -mca_mpool_base_tree_item_t* mca_mpool_base_tree_item_get(void); +mca_mpool_base_tree_item_t* mca_mpool_base_tree_item_get(void); -/* - * put tree item back into the free list +/* + * put tree item back into the free list */ -void mca_mpool_base_tree_item_put(mca_mpool_base_tree_item_t* item); +void mca_mpool_base_tree_item_put(mca_mpool_base_tree_item_t* item); /* * For debugging, print a show_help kind of message if there are items diff --git a/opal/mca/mpool/gpusm/Makefile.am b/opal/mca/mpool/gpusm/Makefile.am deleted file mode 100644 index ce0f64f390b..00000000000 --- a/opal/mca/mpool/gpusm/Makefile.am +++ /dev/null @@ -1,57 +0,0 @@ -# -# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana -# University Research and Technology -# Corporation. All rights reserved. -# Copyright (c) 2004-2005 The University of Tennessee and The University -# of Tennessee Research Foundation. All rights -# reserved. -# Copyright (c) 2004-2009 High Performance Computing Center Stuttgart, -# University of Stuttgart. All rights reserved. -# Copyright (c) 2004-2005 The Regents of the University of California. -# All rights reserved. -# Copyright (c) 2010-2014 Cisco Systems, Inc. All rights reserved. -# Copyright (c) 2012 NVIDIA Corporation. All rights reserved. -# $COPYRIGHT$ -# -# Additional copyrights may follow -# -# $HEADER$ -# - -AM_CPPFLAGS = $(mpool_gpusm_CPPFLAGS) - -sources = \ - mpool_gpusm_module.c \ - mpool_gpusm_component.c - -if WANT_INSTALL_HEADERS -opaldir = $(opalincludedir)/$(subdir) -opal_HEADERS = mpool_gpusm.h -endif - -# Make the output library in this directory, and name it either -# mca__.la (for DSO builds) or libmca__.la -# (for static builds). - -if MCA_BUILD_opal_mpool_gpusm_DSO -component_noinst = -component_install = mca_mpool_gpusm.la -else -component_noinst = libmca_mpool_gpusm.la -component_install = -endif - -mcacomponentdir = $(opallibdir) -mcacomponent_LTLIBRARIES = $(component_install) -mca_mpool_gpusm_la_SOURCES = $(sources) -mca_mpool_gpusm_la_LDFLAGS = -module -avoid-version -mca_mpool_gpusm_la_LIBADD = $(mpool_gpusm_LIBS) -if OPAL_cuda_support -mca_mpool_gpusm_la_LIBADD += \ - $(OPAL_TOP_BUILDDIR)/opal/mca/common/cuda/lib@OPAL_LIB_PREFIX@mca_common_cuda.la -endif - -noinst_LTLIBRARIES = $(component_noinst) -libmca_mpool_gpusm_la_SOURCES = $(sources) -libmca_mpool_gpusm_la_LDFLAGS = -module -avoid-version -libmca_mpool_gpusm_la_LIBADD = $(mpool_gpusm_LIBS) diff --git a/opal/mca/mpool/gpusm/configure.m4 b/opal/mca/mpool/gpusm/configure.m4 deleted file mode 100644 index fd2871a853d..00000000000 --- a/opal/mca/mpool/gpusm/configure.m4 +++ /dev/null @@ -1,25 +0,0 @@ -# -*- shell-script -*- -# -# Copyright (c) 2012 NVIDIA Corporation. All rights reserved. -# $COPYRIGHT$ -# -# Additional copyrights may follow -# -# $HEADER$ -# - -# -# If CUDA support was requested, then build the CUDA memory pools. -# This code checks the variable CUDA_SUPPORT which was set earlier in -# the configure sequence by the opal_configure_options.m4 code. -# - -AC_DEFUN([MCA_opal_mpool_gpusm_CONFIG],[ - AC_CONFIG_FILES([opal/mca/mpool/gpusm/Makefile]) - - # Use CUDA_SUPPORT which was filled in by the opal configure code. - AS_IF([test "x$CUDA_SUPPORT_41" = "x1"], - [$1], - [$2]) - -])dnl diff --git a/opal/mca/mpool/gpusm/mpool_gpusm.h b/opal/mca/mpool/gpusm/mpool_gpusm.h deleted file mode 100644 index e22441212a9..00000000000 --- a/opal/mca/mpool/gpusm/mpool_gpusm.h +++ /dev/null @@ -1,105 +0,0 @@ -/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ -/* - * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2006 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * Copyright (c) 2006 Voltaire. All rights reserved. - * Copyright (c) 2012-2015 NVIDIA Corporation. All rights reserved. - * Copyright (c) 2015 Los Alamos National Security, LLC. All rights - * reserved. - * - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ -/** - * @file - */ -#ifndef MCA_MPOOL_GPUSM_H -#define MCA_MPOOL_GPUSM_H - -#include "opal_config.h" -#include "opal/class/opal_list.h" -#include "opal/mca/mpool/mpool.h" - -BEGIN_C_DECLS - -#define MEMHANDLE_SIZE 8 -#define EVTHANDLE_SIZE 8 -struct mca_mpool_gpusm_registration_t { - mca_mpool_base_registration_t base; - uint64_t memHandle[MEMHANDLE_SIZE]; /* CUipcMemHandle */ - uint64_t evtHandle[EVTHANDLE_SIZE]; /* CUipcEventHandle */ - uintptr_t event; /* CUevent */ -}; -typedef struct mca_mpool_gpusm_registration_t mca_mpool_gpusm_registration_t; -OPAL_DECLSPEC OBJ_CLASS_DECLARATION(mca_mpool_gpusm_registration_t); - -struct mca_mpool_gpusm_component_t { - mca_mpool_base_component_t super; -}; -typedef struct mca_mpool_gpusm_component_t mca_mpool_gpusm_component_t; - -OPAL_DECLSPEC extern mca_mpool_gpusm_component_t mca_mpool_gpusm_component; - -struct mca_mpool_base_resources_t { - void *reg_data; - size_t sizeof_reg; - int (*register_mem)(void *base, size_t size, mca_mpool_base_registration_t *newreg, - mca_mpool_base_registration_t *hdrreg); - int (*deregister_mem)(void *reg_data, mca_mpool_base_registration_t *reg); -}; -typedef struct mca_mpool_base_resources_t mca_mpool_base_resources_t; - -struct mca_mpool_gpusm_module_t { - mca_mpool_base_module_t super; - struct mca_mpool_base_resources_t resources; - opal_free_list_t reg_list; -}; typedef struct mca_mpool_gpusm_module_t mca_mpool_gpusm_module_t; - -/* - * Initializes the mpool module. - */ -void mca_mpool_gpusm_module_init(mca_mpool_gpusm_module_t *mpool); - -/** - * register block of memory - */ -int mca_mpool_gpusm_register(mca_mpool_base_module_t* mpool, void *addr, - size_t size, uint32_t flags, mca_mpool_base_registration_t **reg); - -/** - * deregister memory - */ -int mca_mpool_gpusm_deregister(mca_mpool_base_module_t *mpool, - mca_mpool_base_registration_t *reg); - -/** - * find registration for a given block of memory - */ -int mca_mpool_gpusm_find(struct mca_mpool_base_module_t* mpool, void* addr, - size_t size, mca_mpool_base_registration_t **reg); - -/** - * finalize mpool - */ -void mca_mpool_gpusm_finalize(struct mca_mpool_base_module_t *mpool); - -/** - * Fault Tolerance Event Notification Function - * @param state Checkpoint Stae - * @return OPAL_SUCCESS or failure status - */ -int mca_mpool_gpusm_ft_event(int state); - -END_C_DECLS -#endif diff --git a/opal/mca/mpool/gpusm/mpool_gpusm_component.c b/opal/mca/mpool/gpusm/mpool_gpusm_component.c deleted file mode 100644 index 9a444e1466d..00000000000 --- a/opal/mca/mpool/gpusm/mpool_gpusm_component.c +++ /dev/null @@ -1,103 +0,0 @@ -/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ -/* - * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2005 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * Copyright (c) 2006 Voltaire. All rights reserved. - * Copyright (c) 2007-2009 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2012 NVIDIA Corporation. All rights reserved. - * Copyright (c) 2015 Los Alamos National Security, LLC. All rights - * reserved. - * - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#define OPAL_DISABLE_ENABLE_MEM_DEBUG 1 -#include "opal_config.h" -#include "opal/mca/base/base.h" -#include "mpool_gpusm.h" -#ifdef HAVE_UNISTD_H -#include -#endif -#ifdef HAVE_MALLOC_H -#include -#endif - -/* - * Local functions - */ -static int gpusm_open(void); -static int gpusm_close(void); -static int gpusm_register(void); -static mca_mpool_base_module_t* gpusm_init(struct mca_mpool_base_resources_t* resources); - -mca_mpool_gpusm_component_t mca_mpool_gpusm_component = { - { - /* First, the mca_base_component_t struct containing meta - information about the component itself */ - - .mpool_version = { - MCA_MPOOL_BASE_VERSION_2_0_0, - - .mca_component_name = "gpusm", - MCA_BASE_MAKE_VERSION(component, OPAL_MAJOR_VERSION, OPAL_MINOR_VERSION, - OPAL_RELEASE_VERSION), - .mca_open_component = gpusm_open, - .mca_close_component = gpusm_close, - .mca_register_component_params = gpusm_register, - }, - .mpool_data = { - /* The component is checkpoint ready */ - MCA_BASE_METADATA_PARAM_CHECKPOINT - }, - - .mpool_init = gpusm_init, - } -}; - -/** - * Component open/close/init/register functions. Most do not do anything, - * but keep around for placeholders. - */ -static int gpusm_open(void) -{ - return OPAL_SUCCESS; -} - - -static int gpusm_register(void) -{ - return OPAL_SUCCESS; -} - - -static int gpusm_close(void) -{ - return OPAL_SUCCESS; -} - - -static mca_mpool_base_module_t* gpusm_init(struct mca_mpool_base_resources_t *resources) -{ - mca_mpool_gpusm_module_t* mpool_module; - - mpool_module = - (mca_mpool_gpusm_module_t*)malloc(sizeof(mca_mpool_gpusm_module_t)); - - mpool_module->resources = *resources; - - mca_mpool_gpusm_module_init(mpool_module); - - return &mpool_module->super; -} diff --git a/opal/mca/mpool/gpusm/mpool_gpusm_module.c b/opal/mca/mpool/gpusm/mpool_gpusm_module.c deleted file mode 100644 index e3a47aa4f80..00000000000 --- a/opal/mca/mpool/gpusm/mpool_gpusm_module.c +++ /dev/null @@ -1,199 +0,0 @@ -/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ -/* - * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2013 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * Copyright (c) 2006-2009 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2006 Voltaire. All rights reserved. - * Copyright (c) 2007 Mellanox Technologies. All rights reserved. - * Copyright (c) 2010 IBM Corporation. All rights reserved. - * Copyright (c) 2012-2015 NVIDIA Corporation. All rights reserved. - * Copyright (c) 2015 Los Alamos National Security, LLC. All rights - * reserved. - * - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -/** - * @file: - * - * This file implements a simple memory pool that is used by the GPU - * buffer on the sending side. It just gets a memory handle and event - * handle that can be sent to the remote side which can then use the - * handles to get access to the memory and the event to determine when - * it can start accessing the memory. There is no caching of the - * memory handles as getting new ones is fast. The event handles are - * cached by the cuda_common code. - */ - -#include "opal_config.h" -#include "opal/mca/mpool/base/base.h" -#include "opal/mca/mpool/gpusm/mpool_gpusm.h" -#include "opal/mca/common/cuda/common_cuda.h" - -/** - * Called when the registration free list is created. An event is created - * for each entry. - */ -static void mca_mpool_gpusm_registration_constructor( mca_mpool_gpusm_registration_t *item ) -{ - mca_common_cuda_construct_event_and_handle(&item->event, - (void *)&item->evtHandle); -} - -/** - * Called when the program is exiting. This destroys the events. - */ -static void mca_mpool_gpusm_registration_destructor( mca_mpool_gpusm_registration_t *item ) -{ - mca_common_cuda_destruct_event(item->event); -} - -OBJ_CLASS_INSTANCE(mca_mpool_gpusm_registration_t, mca_mpool_base_registration_t, - mca_mpool_gpusm_registration_constructor, - mca_mpool_gpusm_registration_destructor); - -/* - * Initializes the mpool module. - */ -void mca_mpool_gpusm_module_init(mca_mpool_gpusm_module_t* mpool) -{ - mpool->super.mpool_component = &mca_mpool_gpusm_component.super; - mpool->super.mpool_base = NULL; - mpool->super.mpool_alloc = NULL; - mpool->super.mpool_realloc = NULL; - mpool->super.mpool_free = NULL; - mpool->super.mpool_register = mca_mpool_gpusm_register; - mpool->super.mpool_find = mca_mpool_gpusm_find; - mpool->super.mpool_deregister = mca_mpool_gpusm_deregister; - mpool->super.mpool_release_memory = NULL; - mpool->super.mpool_finalize = mca_mpool_gpusm_finalize; - mpool->super.mpool_ft_event = mca_mpool_gpusm_ft_event; - mpool->super.rcache = NULL; - mpool->super.flags = 0; - - mpool->resources.reg_data = NULL; - mpool->resources.sizeof_reg = sizeof(struct mca_mpool_common_cuda_reg_t); - mpool->resources.register_mem = cuda_getmemhandle; - mpool->resources.deregister_mem = cuda_ungetmemhandle; - - OBJ_CONSTRUCT(&mpool->reg_list, opal_free_list_t); - - /* Start with 0 entries in the free list since CUDA may not have - * been initialized when this free list is created and there is - * some CUDA specific activities that need to be done. */ - opal_free_list_init (&mpool->reg_list, mpool->resources.sizeof_reg, - opal_cache_line_size, - OBJ_CLASS(mca_mpool_gpusm_registration_t), - 0,opal_cache_line_size, - 0, -1, 64, NULL, 0, NULL, NULL, NULL); - -} - -/** - * Just go ahead and get a new registration. The find and register - * functions are the same thing for this memory pool. - */ -int mca_mpool_gpusm_find(mca_mpool_base_module_t *mpool, void *addr, - size_t size, - mca_mpool_base_registration_t **reg) -{ - return mca_mpool_gpusm_register(mpool, addr, size, 0, reg); -} - -/* - * This is the one function that does all the work. It will call into - * the register function to get the memory handle for the sending - * buffer. There is no need to deregister the memory handle so the - * deregister function is a no-op. - */ -int mca_mpool_gpusm_register(mca_mpool_base_module_t *mpool, void *addr, - size_t size, uint32_t flags, - mca_mpool_base_registration_t **reg) -{ - mca_mpool_gpusm_module_t *mpool_gpusm = (mca_mpool_gpusm_module_t*)mpool; - mca_mpool_base_registration_t *gpusm_reg; - opal_free_list_item_t *item; - unsigned char *base, *bound; - int rc; - - /* In spite of the fact we return an error code, the existing code - * checks the registration for a NULL value rather than looking at - * the return code. So, initialize the registration to NULL in - * case we run into a failure. */ - *reg = NULL; - - base = addr; - bound = (unsigned char *)addr + size - 1; - - item = opal_free_list_get (&mpool_gpusm->reg_list); - if(NULL == item) { - return OPAL_ERR_OUT_OF_RESOURCE; - } - gpusm_reg = (mca_mpool_base_registration_t*)item; - - gpusm_reg->mpool = mpool; - gpusm_reg->base = base; - gpusm_reg->bound = bound; - gpusm_reg->flags = flags; - - rc = mpool_gpusm->resources.register_mem(base, size, gpusm_reg, NULL); - - if(rc != OPAL_SUCCESS) { - opal_free_list_return (&mpool_gpusm->reg_list, item); - return rc; - } - - *reg = gpusm_reg; - (*reg)->ref_count++; - return OPAL_SUCCESS; - -} - -/* - * Return the registration to the free list. - */ -int mca_mpool_gpusm_deregister(struct mca_mpool_base_module_t *mpool, - mca_mpool_base_registration_t *reg) -{ - int rc; - mca_mpool_gpusm_module_t *mpool_gpusm = (mca_mpool_gpusm_module_t *)mpool; - - rc = mpool_gpusm->resources.deregister_mem(mpool, reg); - opal_free_list_return (&mpool_gpusm->reg_list, (opal_free_list_item_t *) reg); - return OPAL_SUCCESS; -} - -/** - * Free up the resources. - */ -void mca_mpool_gpusm_finalize(struct mca_mpool_base_module_t *mpool) -{ - opal_free_list_item_t *item; - mca_mpool_gpusm_module_t *mpool_gpusm = (mca_mpool_gpusm_module_t *)mpool; - - /* Need to run the destructor on each item in the free list explicitly. - * The destruction of the free list only runs the destructor on the - * main free list, not each item. */ - while (NULL != (item = (opal_free_list_item_t *)opal_lifo_pop(&(mpool_gpusm->reg_list.super)))) { - OBJ_DESTRUCT(item); - } - - OBJ_DESTRUCT(&mpool_gpusm->reg_list); - return; -} - -int mca_mpool_gpusm_ft_event(int state) { - return OPAL_SUCCESS; -} diff --git a/opal/mca/mpool/grdma/Makefile.am b/opal/mca/mpool/grdma/Makefile.am deleted file mode 100644 index 7f29b2eab74..00000000000 --- a/opal/mca/mpool/grdma/Makefile.am +++ /dev/null @@ -1,58 +0,0 @@ -# -# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana -# University Research and Technology -# Corporation. All rights reserved. -# Copyright (c) 2004-2005 The University of Tennessee and The University -# of Tennessee Research Foundation. All rights -# reserved. -# Copyright (c) 2004-2009 High Performance Computing Center Stuttgart, -# University of Stuttgart. All rights reserved. -# Copyright (c) 2004-2005 The Regents of the University of California. -# All rights reserved. -# Copyright (c) 2010-2014 Cisco Systems, Inc. All rights reserved. -# Copyright (c) 2012 Los Alamos National Security, LLC. All rights -# reserved. -# $COPYRIGHT$ -# -# Additional copyrights may follow -# -# $HEADER$ -# - -AM_CPPFLAGS = $(mpool_grdma_CPPFLAGS) - -sources = \ - mpool_grdma_module.c \ - mpool_grdma_component.c - -if WANT_INSTALL_HEADERS -opaldir = $(opalincludedir)/$(subdir) -opal_HEADERS = mpool_grdma.h -endif - -# Make the output library in this directory, and name it either -# mca__.la (for DSO builds) or libmca__.la -# (for static builds). - -if MCA_BUILD_opal_mpool_grdma_DSO -component_noinst = -component_install = mca_mpool_grdma.la -else -component_noinst = libmca_mpool_grdma.la -component_install = -endif - -mcacomponentdir = $(opallibdir) -mcacomponent_LTLIBRARIES = $(component_install) -mca_mpool_grdma_la_SOURCES = $(sources) -mca_mpool_grdma_la_LDFLAGS = -module -avoid-version -mca_mpool_grdma_la_LIBADD = $(mpool_grdma_LIBS) -if OPAL_cuda_support -mca_mpool_grdma_la_LIBADD += \ - $(OPAL_TOP_BUILDDIR)/opal/mca/common/cuda/lib@OPAL_LIB_PREFIX@mca_common_cuda.la -endif - -noinst_LTLIBRARIES = $(component_noinst) -libmca_mpool_grdma_la_SOURCES = $(sources) -libmca_mpool_grdma_la_LDFLAGS = -module -avoid-version -libmca_mpool_grdma_la_LIBADD = $(mpool_grdma_LIBS) diff --git a/opal/mca/mpool/grdma/mpool_grdma.h b/opal/mca/mpool/grdma/mpool_grdma.h deleted file mode 100644 index 4f5362149b0..00000000000 --- a/opal/mca/mpool/grdma/mpool_grdma.h +++ /dev/null @@ -1,160 +0,0 @@ -/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ -/* - * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2006 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * Copyright (c) 2006 Voltaire. All rights reserved. - * Copyright (c) 2011-2015 Los Alamos National Security, LLC. All rights - * reserved. - * - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ -/** - * @file - */ -#ifndef MCA_MPOOL_OPENIB_H -#define MCA_MPOOL_OPENIB_H - -#include "opal_config.h" -#include "opal/class/opal_list.h" -#include "opal/mca/event/event.h" -#include "opal/mca/mpool/mpool.h" -#if HAVE_SYS_MMAN_H -#include -#endif - -BEGIN_C_DECLS - -#define MCA_MPOOL_GRDMA_NAME_MAX 256 - -struct mca_mpool_grdma_pool_t { - opal_list_item_t super; - char *pool_name; - opal_list_t lru_list; - opal_list_t gc_list; - struct mca_rcache_base_module_t *rcache; -}; -typedef struct mca_mpool_grdma_pool_t mca_mpool_grdma_pool_t; - -OBJ_CLASS_DECLARATION(mca_mpool_grdma_pool_t); - -struct mca_mpool_grdma_component_t { - mca_mpool_base_component_t super; - opal_list_t pools; - char *rcache_name; - bool print_stats; - int leave_pinned; -}; -typedef struct mca_mpool_grdma_component_t mca_mpool_grdma_component_t; - -OPAL_DECLSPEC extern mca_mpool_grdma_component_t mca_mpool_grdma_component; - -struct mca_mpool_grdma_module_t; - -struct mca_mpool_base_resources_t { - char *pool_name; - void *reg_data; - size_t sizeof_reg; - int (*register_mem)(void *reg_data, void *base, size_t size, - mca_mpool_base_registration_t *reg); - int (*deregister_mem)(void *reg_data, mca_mpool_base_registration_t *reg); -}; -typedef struct mca_mpool_base_resources_t mca_mpool_base_resources_t; - -struct mca_mpool_grdma_module_t { - mca_mpool_base_module_t super; - struct mca_mpool_base_resources_t resources; - mca_mpool_grdma_pool_t *pool; - opal_free_list_t reg_list; - uint32_t stat_cache_hit; - uint32_t stat_cache_miss; - uint32_t stat_evicted; - uint32_t stat_cache_found; - uint32_t stat_cache_notfound; -}; -typedef struct mca_mpool_grdma_module_t mca_mpool_grdma_module_t; - -/* - * Initializes the mpool module. - */ -void mca_mpool_grdma_module_init(mca_mpool_grdma_module_t *mpool, mca_mpool_grdma_pool_t *pool); - -/* - * Returns base address of shared memory mapping. - */ -void *mca_mpool_grdma_base(mca_mpool_base_module_t *mpool); - -/** - * Allocate block of registered memory. - */ -void* mca_mpool_grdma_alloc(mca_mpool_base_module_t *mpool, size_t size, - size_t align, uint32_t flags, - mca_mpool_base_registration_t** registration); - -/** - * realloc block of registered memory - */ -void* mca_mpool_grdma_realloc( mca_mpool_base_module_t *mpool, void* addr, - size_t size, mca_mpool_base_registration_t** registration); - -/** - * register block of memory - */ -int mca_mpool_grdma_register(mca_mpool_base_module_t* mpool, void *addr, - size_t size, uint32_t flags, mca_mpool_base_registration_t **reg); - -/** - * deregister memory - */ -int mca_mpool_grdma_deregister(mca_mpool_base_module_t *mpool, - mca_mpool_base_registration_t *reg); - -/** - * free memory allocated by alloc function - */ -void mca_mpool_grdma_free(mca_mpool_base_module_t *mpool, void * addr, - mca_mpool_base_registration_t *reg); - -/** - * find registration for a given block of memory - */ -int mca_mpool_grdma_find(struct mca_mpool_base_module_t* mpool, void* addr, - size_t size, mca_mpool_base_registration_t **reg); - -/** - * unregister all registration covering the block of memory - */ -int mca_mpool_grdma_release_memory(mca_mpool_base_module_t* mpool, void *base, - size_t size); - -/** - * finalize mpool - */ -void mca_mpool_grdma_finalize(struct mca_mpool_base_module_t *mpool); - -/** - * Fault Tolerance Event Notification Function - * @param state Checkpoint Stae - * @return OPAL_SUCCESS or failure status - */ -int mca_mpool_grdma_ft_event(int state); - -/** - * evict one unused registration from the mpool's lru. - * @return true on success, false on failure - */ -bool mca_mpool_grdma_evict (struct mca_mpool_base_module_t *mpool); - -END_C_DECLS -#endif diff --git a/opal/mca/mpool/grdma/mpool_grdma_component.c b/opal/mca/mpool/grdma/mpool_grdma_component.c deleted file mode 100644 index 2247768bf13..00000000000 --- a/opal/mca/mpool/grdma/mpool_grdma_component.c +++ /dev/null @@ -1,157 +0,0 @@ -/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ -/* - * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2005 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * Copyright (c) 2006 Voltaire. All rights reserved. - * Copyright (c) 2007-2014 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2012-2015 Los Alamos National Security, LLC. All rights - * reserved. - * - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#define OPAL_DISABLE_ENABLE_MEM_DEBUG 1 -#include "opal_config.h" -#include "opal/mca/base/base.h" -#include "opal/runtime/opal_params.h" -#include "mpool_grdma.h" -#ifdef HAVE_UNISTD_H -#include -#endif -#include -#include - -/* - * Local functions - */ -static int grdma_open(void); -static int grdma_close(void); -static int grdma_register(void); -static mca_mpool_base_module_t* grdma_init( - struct mca_mpool_base_resources_t* resources); - -mca_mpool_grdma_component_t mca_mpool_grdma_component = { - { - /* First, the mca_base_component_t struct containing meta - information about the component itself */ - - .mpool_version = { - MCA_MPOOL_BASE_VERSION_2_0_0, - - .mca_component_name = "grdma", - MCA_BASE_MAKE_VERSION(component, OPAL_MAJOR_VERSION, OPAL_MINOR_VERSION, - OPAL_RELEASE_VERSION), - .mca_open_component = grdma_open, - .mca_close_component = grdma_close, - .mca_register_component_params = grdma_register, - }, - .mpool_data = { - /* The component is checkpoint ready */ - MCA_BASE_METADATA_PARAM_CHECKPOINT - }, - - .mpool_init = grdma_init, - } -}; - -/** - * component open/close/init function - */ -static int grdma_open(void) -{ - OBJ_CONSTRUCT(&mca_mpool_grdma_component.pools, opal_list_t); - - return OPAL_SUCCESS; -} - - -static int grdma_register(void) -{ - mca_mpool_grdma_component.rcache_name = "vma"; - (void) mca_base_component_var_register(&mca_mpool_grdma_component.super.mpool_version, - "rcache_name", - "The name of the registration cache the mpool should use", - MCA_BASE_VAR_TYPE_STRING, NULL, 0, 0, - OPAL_INFO_LVL_9, - MCA_BASE_VAR_SCOPE_READONLY, - &mca_mpool_grdma_component.rcache_name); - - mca_mpool_grdma_component.print_stats = false; - (void) mca_base_component_var_register(&mca_mpool_grdma_component.super.mpool_version, - "print_stats", "print pool usage statistics at the end of the run", - MCA_BASE_VAR_TYPE_BOOL, NULL, 0, 0, - OPAL_INFO_LVL_9, - MCA_BASE_VAR_SCOPE_READONLY, - &mca_mpool_grdma_component.print_stats); - - return OPAL_SUCCESS; -} - - -static int grdma_close(void) -{ - OBJ_DESTRUCT(&mca_mpool_grdma_component.pools); - - return OPAL_SUCCESS; -} - - -static mca_mpool_base_module_t * -grdma_init(struct mca_mpool_base_resources_t *resources) -{ - mca_mpool_grdma_module_t* mpool_module; - mca_mpool_grdma_pool_t *pool = NULL; - opal_list_item_t *item; - - /* Set this here (vs in component.c) because - opal_leave_pinned* may have been set after MCA params were - read (e.g., by the openib btl) */ - mca_mpool_grdma_component.leave_pinned = (int) - (1 == opal_leave_pinned || opal_leave_pinned_pipeline); - - /* find the specified pool */ - for (item = opal_list_get_first (&mca_mpool_grdma_component.pools) ; - item != opal_list_get_end (&mca_mpool_grdma_component.pools) ; - item = opal_list_get_next (item)) { - pool = (mca_mpool_grdma_pool_t *) item; - - if (0 == strcmp (pool->pool_name, resources->pool_name)) { - break; - } - - pool = NULL; - } - - if (NULL == pool) { - /* create new pool */ - pool = OBJ_NEW(mca_mpool_grdma_pool_t); - if (NULL == pool) { - return NULL; - } - - pool->pool_name = strdup (resources->pool_name); - - opal_list_append (&mca_mpool_grdma_component.pools, &pool->super); - } - - mpool_module = - (mca_mpool_grdma_module_t *) malloc (sizeof (mca_mpool_grdma_module_t)); - - mpool_module->resources = *resources; - - mca_mpool_grdma_module_init(mpool_module, pool); - - return &mpool_module->super; -} diff --git a/opal/mca/mpool/grdma/mpool_grdma_module.c b/opal/mca/mpool/grdma/mpool_grdma_module.c deleted file mode 100644 index d474dc1c377..00000000000 --- a/opal/mca/mpool/grdma/mpool_grdma_module.c +++ /dev/null @@ -1,568 +0,0 @@ -/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ -/* - * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2013 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * Copyright (c) 2006-2014 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2006 Voltaire. All rights reserved. - * Copyright (c) 2007 Mellanox Technologies. All rights reserved. - * Copyright (c) 2010 IBM Corporation. All rights reserved. - * Copyright (c) 2011-2015 Los Alamos National Security, LLC. All rights - * reserved. - * Copyright (c) 2013 NVIDIA Corporation. All rights reserved. - * - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#define OPAL_DISABLE_ENABLE_MEM_DEBUG 1 -#include "opal_config.h" - -#include -#include -#include - -#include "opal/align.h" - -#include "opal/util/proc.h" -#if OPAL_CUDA_GDR_SUPPORT -#include "opal/mca/common/cuda/common_cuda.h" -#endif /* OPAL_CUDA_GDR_SUPPORT */ -#include "opal/mca/rcache/rcache.h" -#include "opal/mca/rcache/base/base.h" - -#include "opal/mca/mpool/base/base.h" -#include "mpool_grdma.h" - -#if OPAL_CUDA_GDR_SUPPORT -static int check_for_cuda_freed_memory(mca_mpool_base_module_t *mpool, void *addr, size_t size); -#endif /* OPAL_CUDA_GDR_SUPPORT */ -static void mca_mpool_grdma_pool_contructor (mca_mpool_grdma_pool_t *pool) -{ - memset ((void *)((uintptr_t)pool + sizeof (pool->super)), 0, sizeof (*pool) - sizeof (pool->super)); - - OBJ_CONSTRUCT(&pool->lru_list, opal_list_t); - OBJ_CONSTRUCT(&pool->gc_list, opal_list_t); - - pool->rcache = mca_rcache_base_module_create(mca_mpool_grdma_component.rcache_name); -} - -static void mca_mpool_grdma_pool_destructor (mca_mpool_grdma_pool_t *pool) -{ - OBJ_DESTRUCT(&pool->lru_list); - OBJ_DESTRUCT(&pool->gc_list); - - free (pool->pool_name); -} - -OBJ_CLASS_INSTANCE(mca_mpool_grdma_pool_t, opal_list_item_t, - mca_mpool_grdma_pool_contructor, - mca_mpool_grdma_pool_destructor); - -/* - * Initializes the mpool module. - */ -void mca_mpool_grdma_module_init(mca_mpool_grdma_module_t* mpool, mca_mpool_grdma_pool_t *pool) -{ - OBJ_RETAIN(pool); - mpool->pool = pool; - - mpool->super.mpool_component = &mca_mpool_grdma_component.super; - mpool->super.mpool_base = NULL; /* no base .. */ - mpool->super.mpool_alloc = mca_mpool_grdma_alloc; - mpool->super.mpool_realloc = mca_mpool_grdma_realloc; - mpool->super.mpool_free = mca_mpool_grdma_free; - mpool->super.mpool_register = mca_mpool_grdma_register; - mpool->super.mpool_find = mca_mpool_grdma_find; - mpool->super.mpool_deregister = mca_mpool_grdma_deregister; - mpool->super.mpool_release_memory = mca_mpool_grdma_release_memory; - mpool->super.mpool_finalize = mca_mpool_grdma_finalize; - mpool->super.mpool_ft_event = mca_mpool_grdma_ft_event; - mpool->super.flags = MCA_MPOOL_FLAGS_MPI_ALLOC_MEM; - mpool->super.rcache = pool->rcache; - - mpool->stat_cache_hit = mpool->stat_cache_miss = mpool->stat_evicted = 0; - mpool->stat_cache_found = mpool->stat_cache_notfound = 0; - - OBJ_CONSTRUCT(&mpool->reg_list, opal_free_list_t); - opal_free_list_init (&mpool->reg_list, mpool->resources.sizeof_reg, - opal_cache_line_size, - OBJ_CLASS(mca_mpool_base_registration_t), - 0, opal_cache_line_size, 0, -1, 32, NULL, 0, - NULL, NULL, NULL); -} - -static inline int dereg_mem(mca_mpool_base_registration_t *reg) -{ - mca_mpool_grdma_module_t *mpool_grdma = (mca_mpool_grdma_module_t *) reg->mpool; - int rc; - - if(!(reg->flags & MCA_MPOOL_FLAGS_CACHE_BYPASS)) - reg->mpool->rcache->rcache_delete(reg->mpool->rcache, reg); - - /* Drop the rcache lock before deregistring the memory */ - OPAL_THREAD_UNLOCK(®->mpool->rcache->lock); - rc = mpool_grdma->resources.deregister_mem(mpool_grdma->resources.reg_data, - reg); - OPAL_THREAD_LOCK(®->mpool->rcache->lock); - - if (OPAL_LIKELY(OPAL_SUCCESS == rc)) { - opal_free_list_return (&mpool_grdma->reg_list, - (opal_free_list_item_t *) reg); - } - - return rc; -} - -/** - * allocate function - */ -void* mca_mpool_grdma_alloc(mca_mpool_base_module_t *mpool, size_t size, - size_t align, uint32_t flags, mca_mpool_base_registration_t **reg) -{ - void *base_addr, *addr; - - if(0 == align) - align = mca_mpool_base_page_size; - -#if OPAL_CUDA_SUPPORT - /* CUDA cannot handle registering overlapping regions, so make - * sure each region is page sized and page aligned. */ - align = mca_mpool_base_page_size; - size = OPAL_ALIGN(size, mca_mpool_base_page_size, size_t); -#endif - -#ifdef HAVE_POSIX_MEMALIGN - if((errno = posix_memalign(&base_addr, align, size)) != 0) - return NULL; - - addr = base_addr; -#else - base_addr = malloc(size + align); - if(NULL == base_addr) - return NULL; - - addr = (void*)OPAL_ALIGN((uintptr_t)base_addr, align, uintptr_t); -#endif - - if(OPAL_SUCCESS != mca_mpool_grdma_register(mpool, addr, size, flags, reg)) { - free(base_addr); - return NULL; - } - (*reg)->alloc_base = (unsigned char *) base_addr; - - return addr; -} - -/* This function must be called with the rcache lock held */ -static inline void do_unregistration_gc(struct mca_mpool_base_module_t *mpool) -{ - mca_mpool_grdma_module_t *mpool_grdma = (mca_mpool_grdma_module_t*)mpool; - opal_list_item_t *item; - - /* Remove registration from garbage collection list - before deregistering it */ - while (NULL != - (item = opal_list_remove_first(&mpool_grdma->pool->gc_list))) { - dereg_mem((mca_mpool_base_registration_t *) item); - } -} - -static inline bool mca_mpool_grdma_evict_lru_local (mca_mpool_grdma_pool_t *pool) -{ - mca_mpool_grdma_module_t *mpool_grdma; - mca_mpool_base_registration_t *old_reg; - - old_reg = (mca_mpool_base_registration_t *) - opal_list_remove_first (&pool->lru_list); - if (NULL == old_reg) { - return false; - } - - mpool_grdma = (mca_mpool_grdma_module_t *) old_reg->mpool; - - (void) dereg_mem (old_reg); - - mpool_grdma->stat_evicted++; - - return true; -} - -enum { - MCA_MPOOL_GRDMA_MSG_EMPTY = 0, - MCA_MPOOL_GRDMA_MSG_NEED_DEREG = 1, - MCA_MPOOL_GRDMA_MSG_BUSY = 2, - MCA_MPOOL_GRDMA_MSG_COMPLETE = 3 -}; - -bool mca_mpool_grdma_evict (struct mca_mpool_base_module_t *mpool) -{ - return mca_mpool_grdma_evict_lru_local (((mca_mpool_grdma_module_t *) mpool)->pool); -} - -/* - * register memory - */ -int mca_mpool_grdma_register(mca_mpool_base_module_t *mpool, void *addr, - size_t size, uint32_t flags, - mca_mpool_base_registration_t **reg) -{ - mca_mpool_grdma_module_t *mpool_grdma = (mca_mpool_grdma_module_t*)mpool; - const bool bypass_cache = !!(flags & MCA_MPOOL_FLAGS_CACHE_BYPASS); - const bool persist = !!(flags & MCA_MPOOL_FLAGS_PERSIST); - mca_mpool_base_registration_t *grdma_reg; - opal_free_list_item_t *item; - unsigned char *base, *bound; - int rc; - - OPAL_THREAD_LOCK(&mpool->rcache->lock); - - /* if cache bypass is requested don't use the cache */ - base = (unsigned char *) down_align_addr(addr, mca_mpool_base_page_size_log); - bound = (unsigned char *) up_align_addr((void*)((char*) addr + size - 1), - mca_mpool_base_page_size_log); - if (!opal_list_is_empty (&mpool_grdma->pool->gc_list)) - do_unregistration_gc(mpool); - -#if OPAL_CUDA_GDR_SUPPORT - if (flags & MCA_MPOOL_FLAGS_CUDA_GPU_MEM) { - size_t psize; - mca_common_cuda_get_address_range(&base, &psize, addr); - bound = base + psize - 1; - /* Check to see if this memory is in the cache and if it has been freed. If so, - * this call will boot it out of the cache. */ - check_for_cuda_freed_memory(mpool, base, psize); - } -#endif /* OPAL_CUDA_GDR_SUPPORT */ - - /* look through existing regs if not persistent registration requested. - * Persistent registration are always registered and placed in the cache */ - if(!(bypass_cache || persist)) { - /* check to see if memory is registered */ - mpool->rcache->rcache_find(mpool->rcache, base, bound - base + 1, reg); - if (*reg && !(flags & MCA_MPOOL_FLAGS_INVALID)) { - if (0 == (*reg)->ref_count) { - /* Leave pinned must be set for this to still be in the rcache. */ - opal_list_remove_item(&mpool_grdma->pool->lru_list, - (opal_list_item_t *)(*reg)); - } - - /* This segment fits fully within an existing segment. */ - mpool_grdma->stat_cache_hit++; - (*reg)->ref_count++; - OPAL_THREAD_UNLOCK(&mpool->rcache->lock); - return OPAL_SUCCESS; - } - - mpool_grdma->stat_cache_miss++; - *reg = NULL; /* in case previous find found something */ - - /* Unless explicitly requested by the caller always store the - * registration in the rcache. This will speed up the case where - * no leave pinned protocol is in use but the same segment is in - * use in multiple simultaneous transactions. We used to set bypass_cache - * here is !mca_mpool_grdma_component.leave_pinned. */ - } - - item = opal_free_list_get (&mpool_grdma->reg_list); - if(NULL == item) { - OPAL_THREAD_UNLOCK(&mpool->rcache->lock); - return OPAL_ERR_OUT_OF_RESOURCE; - } - grdma_reg = (mca_mpool_base_registration_t*)item; - - grdma_reg->mpool = mpool; - grdma_reg->base = base; - grdma_reg->bound = bound; - grdma_reg->flags = flags; -#if OPAL_CUDA_GDR_SUPPORT - if (flags & MCA_MPOOL_FLAGS_CUDA_GPU_MEM) { - mca_common_cuda_get_buffer_id(grdma_reg); - } -#endif /* OPAL_CUDA_GDR_SUPPORT */ - - if (false == bypass_cache) { - rc = mpool->rcache->rcache_insert(mpool->rcache, grdma_reg, 0); - - if (OPAL_UNLIKELY(rc != OPAL_SUCCESS)) { - OPAL_THREAD_UNLOCK(&mpool->rcache->lock); - opal_free_list_return (&mpool_grdma->reg_list, item); - return rc; - } - } - - while (OPAL_ERR_OUT_OF_RESOURCE == - (rc = mpool_grdma->resources.register_mem(mpool_grdma->resources.reg_data, - base, bound - base + 1, grdma_reg))) { - /* try to remove one unused reg and retry */ - if (!mca_mpool_grdma_evict (mpool)) { - break; - } - } - - if (OPAL_UNLIKELY(rc != OPAL_SUCCESS)) { - if (false == bypass_cache) { - mpool->rcache->rcache_delete(mpool->rcache, grdma_reg); - } - OPAL_THREAD_UNLOCK(&mpool->rcache->lock); - opal_free_list_return (&mpool_grdma->reg_list, item); - return rc; - } - - *reg = grdma_reg; - (*reg)->ref_count++; - OPAL_THREAD_UNLOCK(&mpool->rcache->lock); - - /* Cleanup any vmas that we have deferred deletion on */ - mpool->rcache->rcache_clean(mpool->rcache); - return OPAL_SUCCESS; -} - - -/** - * realloc function - */ -void* mca_mpool_grdma_realloc(mca_mpool_base_module_t *mpool, void *addr, - size_t size, mca_mpool_base_registration_t **reg) -{ - mca_mpool_base_registration_t *old_reg = *reg; - void *new_mem = mca_mpool_grdma_alloc(mpool, size, 0, old_reg->flags, reg); - memcpy(new_mem, addr, old_reg->bound - old_reg->base + 1); - mca_mpool_grdma_free(mpool, addr, old_reg); - - return new_mem; -} - -/** - * free function - */ -void mca_mpool_grdma_free(mca_mpool_base_module_t *mpool, void *addr, - mca_mpool_base_registration_t *registration) -{ - void *alloc_base = registration->alloc_base; - mca_mpool_grdma_deregister(mpool, registration); - free(alloc_base); -} - -int mca_mpool_grdma_find(struct mca_mpool_base_module_t *mpool, void *addr, - size_t size, mca_mpool_base_registration_t **reg) -{ - mca_mpool_grdma_module_t *mpool_grdma = (mca_mpool_grdma_module_t*)mpool; - unsigned char *base, *bound; - int rc; - - base = (unsigned char *) down_align_addr(addr, mca_mpool_base_page_size_log); - bound = (unsigned char *) up_align_addr((void*)((char*) addr + size - 1), - mca_mpool_base_page_size_log); - - OPAL_THREAD_LOCK(&mpool->rcache->lock); - - rc = mpool->rcache->rcache_find(mpool->rcache, base, bound - base + 1, reg); - if(NULL != *reg && - (mca_mpool_grdma_component.leave_pinned || - ((*reg)->flags & MCA_MPOOL_FLAGS_PERSIST) || - ((*reg)->base == base && (*reg)->bound == bound))) { - assert(((void*)(*reg)->bound) >= addr); - if(0 == (*reg)->ref_count && - mca_mpool_grdma_component.leave_pinned) { - opal_list_remove_item(&mpool_grdma->pool->lru_list, - (opal_list_item_t*)(*reg)); - } - mpool_grdma->stat_cache_found++; - (*reg)->ref_count++; - } else { - mpool_grdma->stat_cache_notfound++; - } - - OPAL_THREAD_UNLOCK(&mpool->rcache->lock); - - return rc; -} - -static inline bool registration_is_cacheable(mca_mpool_base_registration_t *reg) -{ - return (mca_mpool_grdma_component.leave_pinned && - !(reg->flags & - (MCA_MPOOL_FLAGS_CACHE_BYPASS | - MCA_MPOOL_FLAGS_PERSIST | - MCA_MPOOL_FLAGS_INVALID))); -} - -int mca_mpool_grdma_deregister(struct mca_mpool_base_module_t *mpool, - mca_mpool_base_registration_t *reg) -{ - mca_mpool_grdma_module_t *mpool_grdma = (mca_mpool_grdma_module_t *) mpool; - int rc = OPAL_SUCCESS; - assert(reg->ref_count > 0); - - OPAL_THREAD_LOCK(&mpool->rcache->lock); - reg->ref_count--; - if(reg->ref_count > 0) { - OPAL_THREAD_UNLOCK(&mpool->rcache->lock); - return OPAL_SUCCESS; - } - - if(registration_is_cacheable(reg)) { - opal_list_append(&mpool_grdma->pool->lru_list, (opal_list_item_t *) reg); - } else { - rc = dereg_mem (reg); - } - OPAL_THREAD_UNLOCK(&mpool->rcache->lock); - - /* Cleanup any vmas that we have deferred deletion on */ - mpool->rcache->rcache_clean(mpool->rcache); - - return rc; -} - -#define GRDMA_MPOOL_NREGS 100 - -int mca_mpool_grdma_release_memory(struct mca_mpool_base_module_t *mpool, - void *base, size_t size) -{ - mca_mpool_grdma_module_t *mpool_grdma = (mca_mpool_grdma_module_t *) mpool; - mca_mpool_base_registration_t *regs[GRDMA_MPOOL_NREGS]; - int reg_cnt, i, rc = OPAL_SUCCESS; - - OPAL_THREAD_LOCK(&mpool->rcache->lock); - do { - reg_cnt = mpool->rcache->rcache_find_all(mpool->rcache, base, size, - regs, GRDMA_MPOOL_NREGS); - - for(i = 0 ; i < reg_cnt ; ++i) { - regs[i]->flags |= MCA_MPOOL_FLAGS_INVALID; - if (regs[i]->ref_count) { - /* memory is being freed, but there are registration in use that - * covers the memory. This can happen even in a correct program, - * but may also be an user error. We can't tell. Mark the - * registration as invalid. It will not be used any more and - * will be unregistered when ref_count will become zero */ - rc = OPAL_ERROR; /* tell caller that something was wrong */ - } else { - opal_list_remove_item(&mpool_grdma->pool->lru_list,(opal_list_item_t *) regs[i]); - opal_list_append(&mpool_grdma->pool->gc_list, (opal_list_item_t *) regs[i]); - } - } - } while(reg_cnt == GRDMA_MPOOL_NREGS); - - OPAL_THREAD_UNLOCK(&mpool->rcache->lock); - - return rc; -} - -/* Make sure this registration request is not stale. In other words, ensure - * that we do not have a cuMemAlloc, cuMemFree, cuMemAlloc state. If we do - * kick out the regisrations and deregister. This function needs to be called - * with the mpool->rcache->lock held. */ -#if OPAL_CUDA_GDR_SUPPORT -static int check_for_cuda_freed_memory(mca_mpool_base_module_t *mpool, void *addr, size_t size) -{ - mca_mpool_grdma_module_t *mpool_grdma = (mca_mpool_grdma_module_t *) mpool; - mca_mpool_base_registration_t *regs[GRDMA_MPOOL_NREGS]; - int reg_cnt, i, rc = OPAL_SUCCESS; - mca_mpool_base_registration_t *reg; - - mpool->rcache->rcache_find(mpool->rcache, addr, size, ®); - if (NULL == reg) { - return OPAL_SUCCESS; - } - - /* If not previously freed memory, just return 0 */ - if (!(mca_common_cuda_previously_freed_memory(reg))) { - return OPAL_SUCCESS; - } - - /* mpool->rcache->rcache_dump_range(mpool->rcache, 0, (size_t)-1, "Before free"); */ - - /* This memory has been freed. Find all registrations and delete */ - do { - reg_cnt = mpool->rcache->rcache_find_all(mpool->rcache, reg->base, reg->bound - reg->base + 1, - regs, GRDMA_MPOOL_NREGS); - for(i = 0 ; i < reg_cnt ; ++i) { - regs[i]->flags |= MCA_MPOOL_FLAGS_INVALID; - if (regs[i]->ref_count) { - opal_output(0, "Release FAILED: ref_count=%d, base=%p, bound=%p, size=%d", - regs[i]->ref_count, regs[i]->base, regs[i]->bound, - (int) (regs[i]->bound - regs[i]->base + 1)); - /* memory is being freed, but there are registration in use that - * covers the memory. This can happen even in a correct program, - * but may also be an user error. We can't tell. Mark the - * registration as invalid. It will not be used any more and - * will be unregistered when ref_count will become zero */ - rc = OPAL_ERROR; /* tell caller that something was wrong */ - } else { - opal_list_remove_item(&mpool_grdma->pool->lru_list,(opal_list_item_t *) regs[i]); - /* Now deregister. Do not use gc_list as we need to kick this out now. */ - dereg_mem(regs[i]); - } - } - } while(reg_cnt == GRDMA_MPOOL_NREGS); - - OPAL_THREAD_UNLOCK(&mpool->rcache->lock); - /* mpool->rcache->rcache_dump_range(mpool->rcache, 0, (size_t)-1, "After free");*/ - - return rc; -} -#endif /* OPAL_CUDA_GDR_SUPPORT */ - -void mca_mpool_grdma_finalize(struct mca_mpool_base_module_t *mpool) -{ - mca_mpool_grdma_module_t *mpool_grdma = (mca_mpool_grdma_module_t*)mpool; - mca_mpool_base_registration_t *regs[GRDMA_MPOOL_NREGS]; - int reg_cnt, i; - - /* Statistic */ - if (true == mca_mpool_grdma_component.print_stats) { - opal_output(0, "%s grdma: stats " - "(hit/miss/found/not found/evicted): %d/%d/%d/%d/%d\n", - OPAL_NAME_PRINT(OPAL_PROC_MY_NAME), - mpool_grdma->stat_cache_hit, mpool_grdma->stat_cache_miss, - mpool_grdma->stat_cache_found, mpool_grdma->stat_cache_notfound, - mpool_grdma->stat_evicted); - } - - OPAL_THREAD_LOCK(&mpool->rcache->lock); - - do_unregistration_gc(mpool); - - do { - reg_cnt = mpool->rcache->rcache_find_all(mpool->rcache, 0, (size_t)-1, - regs, GRDMA_MPOOL_NREGS); - - for (i = 0 ; i < reg_cnt ; ++i) { - if (regs[i]->ref_count) { - regs[i]->ref_count = 0; /* otherwise dereg will fail on assert */ - } else if (mca_mpool_grdma_component.leave_pinned) { - opal_list_remove_item(&mpool_grdma->pool->lru_list, - (opal_list_item_t *) regs[i]); - } - - (void) dereg_mem(regs[i]); - } - } while (reg_cnt == GRDMA_MPOOL_NREGS); - - OBJ_RELEASE(mpool_grdma->pool); - - OBJ_DESTRUCT(&mpool_grdma->reg_list); - OPAL_THREAD_UNLOCK(&mpool->rcache->lock); - - /* Cleanup any vmas that we have deferred deletion on */ - mpool->rcache->rcache_clean(mpool->rcache); - - /* this mpool was allocated by grdma_init in mpool_grdma_component.c */ - free(mpool); -} - -int mca_mpool_grdma_ft_event(int state) { - return OPAL_SUCCESS; -} diff --git a/opal/mca/mpool/hugepage/Makefile.am b/opal/mca/mpool/hugepage/Makefile.am new file mode 100644 index 00000000000..621574b1cbd --- /dev/null +++ b/opal/mca/mpool/hugepage/Makefile.am @@ -0,0 +1,52 @@ +# +# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana +# University Research and Technology +# Corporation. All rights reserved. +# Copyright (c) 2004-2005 The University of Tennessee and The University +# of Tennessee Research Foundation. All rights +# reserved. +# Copyright (c) 2004-2009 High Performance Computing Center Stuttgart, +# University of Stuttgart. All rights reserved. +# Copyright (c) 2004-2005 The Regents of the University of California. +# All rights reserved. +# Copyright (c) 2010-2014 Cisco Systems, Inc. All rights reserved. +# Copyright (c) 2012-2015 Los Alamos National Security, LLC. All rights +# reserved. +# $COPYRIGHT$ +# +# Additional copyrights may follow +# +# $HEADER$ +# + +AM_CPPFLAGS = $(mpool_hugepage_CPPFLAGS) + +sources = mpool_hugepage_module.c mpool_hugepage_component.c + +if WANT_INSTALL_HEADERS +opaldir = $(opalincludedir)/$(subdir) +opal_HEADERS = mpool_hugepage.h +endif + +# Make the output library in this directory, and name it either +# mca__.la (for DSO builds) or libmca__.la +# (for static builds). + +if MCA_BUILD_opal_mpool_hugepage_DSO +component_noinst = +component_install = mca_mpool_hugepage.la +else +component_noinst = libmca_mpool_hugepage.la +component_install = +endif + +mcacomponentdir = $(opallibdir) +mcacomponent_LTLIBRARIES = $(component_install) +mca_mpool_hugepage_la_SOURCES = $(sources) +mca_mpool_hugepage_la_LDFLAGS = -module -avoid-version +mca_mpool_hugepage_la_LIBADD = $(mpool_hugepage_LIBS) + +noinst_LTLIBRARIES = $(component_noinst) +libmca_mpool_hugepage_la_SOURCES = $(sources) +libmca_mpool_hugepage_la_LDFLAGS = -module -avoid-version +libmca_mpool_hugepage_la_LIBADD = $(mpool_hugepage_LIBS) diff --git a/opal/mca/mpool/hugepage/mpool_hugepage.h b/opal/mca/mpool/hugepage/mpool_hugepage.h new file mode 100644 index 00000000000..cd97711168e --- /dev/null +++ b/opal/mca/mpool/hugepage/mpool_hugepage.h @@ -0,0 +1,90 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ +/* + * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2013 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2006 Voltaire. All rights reserved. + * Copyright (c) 2011-2015 Los Alamos National Security, LLC. All rights + * reserved. + * + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ +/** + * @file + */ +#ifndef MCA_MPOOL_HUGEPAGE_H +#define MCA_MPOOL_HUGEPAGE_H + +#include "opal_config.h" +#include "opal/class/opal_list.h" +#include "opal/class/opal_free_list.h" +#include "opal/mca/event/event.h" +#include "opal/mca/mpool/mpool.h" +#include "opal/util/proc.h" +#include "opal/mca/allocator/allocator.h" +#include "opal/util/sys_limits.h" + +BEGIN_C_DECLS +struct mca_mpool_hugepage_module_t; +typedef struct mca_mpool_hugepage_module_t mca_mpool_hugepage_module_t; + +struct mca_mpool_hugepage_component_t { + mca_mpool_base_component_t super; + bool print_stats; + opal_list_t huge_pages; + mca_mpool_hugepage_module_t *modules; + int module_count; + unsigned long bytes_allocated; +}; +typedef struct mca_mpool_hugepage_component_t mca_mpool_hugepage_component_t; + +OPAL_DECLSPEC extern mca_mpool_hugepage_component_t mca_mpool_hugepage_component; + +struct mca_mpool_hugepage_module_t; + +struct mca_mpool_hugepage_hugepage_t { + /** opal list item superclass */ + opal_list_item_t super; + /** page size in bytes */ + unsigned long page_size; + /** path for mmapped files */ + char *path; + /** counter to help ensure unique file names for mmaped files */ + volatile int32_t count; + /** some platforms allow allocation of hugepages through mmap flags */ + int mmap_flags; +}; +typedef struct mca_mpool_hugepage_hugepage_t mca_mpool_hugepage_hugepage_t; + +OBJ_CLASS_DECLARATION(mca_mpool_hugepage_hugepage_t); + +struct mca_mpool_hugepage_module_t { + mca_mpool_base_module_t super; + mca_mpool_hugepage_hugepage_t *huge_page; + mca_allocator_base_module_t *allocator; + opal_mutex_t lock; + opal_rb_tree_t allocation_tree; +}; + +/* + * Initializes the mpool module. + */ +int mca_mpool_hugepage_module_init (mca_mpool_hugepage_module_t *mpool, + mca_mpool_hugepage_hugepage_t *huge_page); + +void *mca_mpool_hugepage_seg_alloc (void *ctx, size_t *sizep); +void mca_mpool_hugepage_seg_free (void *ctx, void *addr); + +END_C_DECLS +#endif diff --git a/opal/mca/mpool/hugepage/mpool_hugepage_component.c b/opal/mca/mpool/hugepage/mpool_hugepage_component.c new file mode 100644 index 00000000000..0460f5a8563 --- /dev/null +++ b/opal/mca/mpool/hugepage/mpool_hugepage_component.c @@ -0,0 +1,394 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ +/* + * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2013 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2006 Voltaire. All rights reserved. + * Copyright (c) 2007-2009 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2012-2016 Los Alamos National Security, LLC. All rights + * reserved. + * Copyright (c) 2016 Intel, Inc. All rights reserved. + * Copyright (c) 2016 Research Organization for Information Science + * and Technology (RIST). All rights reserved. + * + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#define OPAL_DISABLE_ENABLE_MEM_DEBUG 1 +#include "opal_config.h" +#include "opal/mca/base/base.h" +#include "opal/runtime/opal_params.h" +#include "opal/mca/base/mca_base_pvar.h" +#include "opal/mca/mpool/base/base.h" +#include "opal/mca/allocator/base/base.h" + +#include "opal/util/argv.h" + +#include "mpool_hugepage.h" + +#ifdef HAVE_UNISTD_H +#include +#endif +#ifdef HAVE_MALLOC_H +#include +#endif +#ifdef HAVE_SYS_VFS_H +#include +#endif +#ifdef HAVE_SYS_MOUNT_H +#include +#endif +#ifdef HAVE_SYS_PARAM_H +#include +#endif +#ifdef HAVE_SYS_MMAN_H +#include +#endif +#ifdef HAVE_MNTENT_H +#include +#endif + +#include + +/* + * Note that some OS's (e.g., NetBSD and Solaris) have statfs(), but + * no struct statfs (!). So check to make sure we have struct statfs + * before allowing the use of statfs(). + */ +#if defined(HAVE_STATFS) && (defined(HAVE_STRUCT_STATFS_F_FSTYPENAME) || \ + defined(HAVE_STRUCT_STATFS_F_TYPE)) +#define USE_STATFS 1 +#endif + + +/* + * Local functions + */ +static int mca_mpool_hugepage_open (void); +static int mca_mpool_hugepage_close (void); +static int mca_mpool_hugepage_register (void); +static int mca_mpool_hugepage_query (const char *hints, int *priority, + mca_mpool_base_module_t **module); +static void mca_mpool_hugepage_find_hugepages (void); + +static int mca_mpool_hugepage_priority; +static unsigned long mca_mpool_hugepage_page_size; + +mca_mpool_hugepage_component_t mca_mpool_hugepage_component = { + { + /* First, the mca_base_component_t struct containing meta + information about the component itself */ + + .mpool_version ={ + MCA_MPOOL_BASE_VERSION_3_0_0, + + .mca_component_name = "hugepage", + MCA_BASE_MAKE_VERSION(component, OPAL_MAJOR_VERSION, OPAL_MINOR_VERSION, + OPAL_RELEASE_VERSION), + .mca_open_component = mca_mpool_hugepage_open, + .mca_close_component = mca_mpool_hugepage_close, + .mca_register_component_params = mca_mpool_hugepage_register, + }, + .mpool_data = { + /* The component is checkpoint ready */ + MCA_BASE_METADATA_PARAM_CHECKPOINT + }, + + .mpool_query = mca_mpool_hugepage_query, + }, +}; + +/** + * component open/close/init function + */ + +static int mca_mpool_hugepage_register(void) +{ + mca_mpool_hugepage_priority = 50; + (void) mca_base_component_var_register (&mca_mpool_hugepage_component.super.mpool_version, + "priority", "Default priority of the hugepage mpool component " + "(default: 50)", MCA_BASE_VAR_TYPE_INT, NULL, 0, 0, + OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_LOCAL, + &mca_mpool_hugepage_priority); + + mca_mpool_hugepage_page_size = 1 << 21; + (void) mca_base_component_var_register (&mca_mpool_hugepage_component.super.mpool_version, + "page_size", "Default huge page size of the hugepage mpool component " + "(default: 2M)", MCA_BASE_VAR_TYPE_INT, NULL, 0, 0, + OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_LOCAL, + &mca_mpool_hugepage_page_size); + + mca_mpool_hugepage_component.bytes_allocated = 0; + (void) mca_base_component_pvar_register (&mca_mpool_hugepage_component.super.mpool_version, + "bytes_allocated", "Number of bytes currently allocated in the mpool " + "hugepage component", OPAL_INFO_LVL_3, MCA_BASE_PVAR_CLASS_SIZE, + MCA_BASE_VAR_TYPE_UNSIGNED_LONG, NULL, MCA_BASE_VAR_BIND_NO_OBJECT, + MCA_BASE_PVAR_FLAG_READONLY | MCA_BASE_PVAR_FLAG_CONTINUOUS, + NULL, NULL, NULL, &mca_mpool_hugepage_component.bytes_allocated); + + return OPAL_SUCCESS; +} + +static int mca_mpool_hugepage_open (void) +{ + mca_mpool_hugepage_module_t *hugepage_module; + mca_mpool_hugepage_hugepage_t *hp; + int module_index, rc; + + OBJ_CONSTRUCT(&mca_mpool_hugepage_component.huge_pages, opal_list_t); + mca_mpool_hugepage_find_hugepages (); + + if (0 == opal_list_get_size (&mca_mpool_hugepage_component.huge_pages)) { + return OPAL_SUCCESS; + } + + mca_mpool_hugepage_component.modules = (mca_mpool_hugepage_module_t *) + calloc (opal_list_get_size (&mca_mpool_hugepage_component.huge_pages), + sizeof (mca_mpool_hugepage_module_t)); + if (NULL == mca_mpool_hugepage_component.modules) { + return OPAL_ERR_OUT_OF_RESOURCE; + } + + module_index = 0; + OPAL_LIST_FOREACH(hp, &mca_mpool_hugepage_component.huge_pages, mca_mpool_hugepage_hugepage_t) { + hugepage_module = mca_mpool_hugepage_component.modules + module_index; + rc = mca_mpool_hugepage_module_init (hugepage_module, hp); + if (OPAL_SUCCESS != rc) { + continue; + } + module_index++; + } + + mca_mpool_hugepage_component.module_count = module_index; + + return OPAL_SUCCESS; +} + +static int mca_mpool_hugepage_close (void) +{ + OPAL_LIST_DESTRUCT(&mca_mpool_hugepage_component.huge_pages); + + for (int i = 0 ; i < mca_mpool_hugepage_component.module_count ; ++i) { + mca_mpool_hugepage_module_t *module = mca_mpool_hugepage_component.modules + i; + module->super.mpool_finalize (&module->super); + } + + free (mca_mpool_hugepage_component.modules); + mca_mpool_hugepage_component.modules = NULL; + + return OPAL_SUCCESS; +} + +static int page_compare (opal_list_item_t **a, opal_list_item_t **b) { + mca_mpool_hugepage_hugepage_t *pagea = (mca_mpool_hugepage_hugepage_t *) *a; + mca_mpool_hugepage_hugepage_t *pageb = (mca_mpool_hugepage_hugepage_t *) *b; + if (pagea->page_size > pageb->page_size) { + return 1; + } else if (pagea->page_size < pageb->page_size) { + return -1; + } + + return 0; +} + +static void mca_mpool_hugepage_find_hugepages (void) { +#ifdef HAVE_MNTENT_H + mca_mpool_hugepage_hugepage_t *hp; + FILE *fh; + struct mntent *mntent; + char *opts, *tok, *ctx; + + fh = setmntent ("/proc/mounts", "r"); + if (NULL == fh) { + return; + } + + while (NULL != (mntent = getmntent(fh))) { + unsigned long page_size = 0; + + if (0 != strcmp(mntent->mnt_type, "hugetlbfs")) { + continue; + } + + opts = strdup(mntent->mnt_opts); + if (NULL == opts) { + break; + } + + tok = strtok_r (opts, ",", &ctx); + + do { + if (0 == strncmp (tok, "pagesize", 8)) { + break; + } + tok = strtok_r (NULL, ",", &ctx); + } while (tok); + + if (!tok) { +#if defined(USE_STATFS) + struct statfs info; + + statfs (mntent->mnt_dir, &info); +#elif defined(HAVE_STATVFS) + struct statvfs info; + statvfs (mntent->mnt_dir, &info); +#endif + page_size = info.f_bsize; + } else { + (void) sscanf (tok, "pagesize=%lu", &page_size); + } + free(opts); + + if (0 == page_size) { + /* could not get page size */ + continue; + } + + hp = OBJ_NEW(mca_mpool_hugepage_hugepage_t); + if (NULL == hp) { + break; + } + + hp->path = strdup (mntent->mnt_dir); + hp->page_size = page_size; + + OPAL_OUTPUT_VERBOSE((MCA_BASE_VERBOSE_INFO, opal_mpool_base_framework.framework_output, + "found huge page with size = %lu, path = %s, mmap flags = 0x%x", + hp->page_size, hp->path, hp->mmap_flags)); + + opal_list_append (&mca_mpool_hugepage_component.huge_pages, &hp->super); + } + + opal_list_sort (&mca_mpool_hugepage_component.huge_pages, page_compare); + + endmntent (fh); +#endif +} + +static int mca_mpool_hugepage_query (const char *hints, int *priority_out, + mca_mpool_base_module_t **module) +{ + unsigned long page_size = 0; + char **hints_array; + int my_priority = mca_mpool_hugepage_priority; + char *tmp; + bool found = false; + + if (0 == mca_mpool_hugepage_component.module_count) { + return OPAL_ERR_NOT_AVAILABLE; + } + + if (hints) { + hints_array = opal_argv_split (hints, ','); + if (NULL == hints_array) { + return OPAL_ERR_OUT_OF_RESOURCE; + } + + for (int i = 0 ; hints_array[i] ; ++i) { + char *key = hints_array[i]; + char *value = NULL; + + if (NULL != (tmp = strchr (key, '='))) { + value = tmp + 1; + *tmp = '\0'; + } + + if (0 == strcasecmp ("mpool", key)) { + if (value && 0 == strcasecmp ("hugepage", value)) { + /* this mpool was requested by name */ + my_priority = 100; + opal_output_verbose (MCA_BASE_VERBOSE_INFO, opal_mpool_base_framework.framework_output, + "hugepage mpool matches hint: %s=%s", key, value); + } else { + /* different mpool requested */ + my_priority = 0; + opal_output_verbose (MCA_BASE_VERBOSE_INFO, opal_mpool_base_framework.framework_output, + "hugepage mpool does not match hint: %s=%s", key, value); + opal_argv_free (hints_array); + return OPAL_ERR_NOT_FOUND; + } + } + + if (0 == strcasecmp ("page_size", key) && value) { + page_size = strtoul (value, &tmp, 0); + if (*tmp) { + switch (*tmp) { + case 'g': + case 'G': + page_size *= 1024; + /* fall through */ + case 'm': + case 'M': + page_size *= 1024; + /* fall through */ + case 'k': + case 'K': + page_size *= 1024; + break; + default: + page_size = -1; + } + } + opal_output_verbose (MCA_BASE_VERBOSE_INFO, opal_mpool_base_framework.framework_output, + "hugepage mpool requested page size: %lu", page_size); + } + } + + opal_argv_free (hints_array); + } + + if (0 == page_size) { + /* use default huge page size */ + page_size = mca_mpool_hugepage_page_size; + if (my_priority < 100) { + /* take a priority hit if this mpool was not asked for by name */ + my_priority = 0; + } + opal_output_verbose (MCA_BASE_VERBOSE_WARN, opal_mpool_base_framework.framework_output, + "hugepage mpool did not match any hints: %s", hints); + } + + for (int i = 0 ; i < mca_mpool_hugepage_component.module_count ; ++i) { + mca_mpool_hugepage_module_t *hugepage_module = mca_mpool_hugepage_component.modules + i; + + if (hugepage_module->huge_page->page_size != page_size) { + continue; + } + + my_priority = (my_priority < 80) ? my_priority + 20 : 100; + + if (module) { + *module = &hugepage_module->super; + } + + opal_output_verbose (MCA_BASE_VERBOSE_INFO, opal_mpool_base_framework.framework_output, + "matches page size hint. page size: %lu, path: %s, mmap flags: " + "0x%x", page_size, hugepage_module->huge_page->path, + hugepage_module->huge_page->mmap_flags); + found = true; + break; + } + + if (!found) { + opal_output_verbose (MCA_BASE_VERBOSE_WARN, opal_mpool_base_framework.framework_output, + "could not find page matching page request: %lu", page_size); + return OPAL_ERR_NOT_FOUND; + } + + if (priority_out) { + *priority_out = my_priority; + } + + return OPAL_SUCCESS; +} diff --git a/opal/mca/mpool/hugepage/mpool_hugepage_module.c b/opal/mca/mpool/hugepage/mpool_hugepage_module.c new file mode 100644 index 00000000000..87dbf8a4185 --- /dev/null +++ b/opal/mca/mpool/hugepage/mpool_hugepage_module.c @@ -0,0 +1,258 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ +/* + * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2013 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2006-2009 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2006 Voltaire. All rights reserved. + * Copyright (c) 2007 Mellanox Technologies. All rights reserved. + * Copyright (c) 2010 IBM Corporation. All rights reserved. + * Copyright (c) 2011-2016 Los Alamos National Security, LLC. All rights + * reserved. + * + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#define OPAL_DISABLE_ENABLE_MEM_DEBUG 1 +#include "opal_config.h" +#include "opal/align.h" +#include "mpool_hugepage.h" +#include +#include +#ifdef HAVE_MALLOC_H +#include +#endif +#include "opal/mca/mpool/base/base.h" +#include "opal/runtime/opal_params.h" +#include "opal/include/opal_stdint.h" +#include "opal/mca/allocator/base/base.h" + +#include +#include + + +static void *mca_mpool_hugepage_alloc (mca_mpool_base_module_t *mpool, size_t size, size_t align, + uint32_t flags); +static void *mca_mpool_hugepage_realloc (mca_mpool_base_module_t *mpool, void *addr, size_t size); +static void mca_mpool_hugepage_free (mca_mpool_base_module_t *mpool, void *addr); +static void mca_mpool_hugepage_finalize (mca_mpool_base_module_t *mpool); +static int mca_mpool_hugepage_ft_event (int state); + +static void mca_mpool_hugepage_hugepage_constructor (mca_mpool_hugepage_hugepage_t *huge_page) +{ + memset ((char *)huge_page + sizeof(huge_page->super), 0, sizeof (*huge_page) - sizeof (huge_page->super)); +} + +static void mca_mpool_hugepage_hugepage_destructor (mca_mpool_hugepage_hugepage_t *huge_page) +{ + free (huge_page->path); +} + +OBJ_CLASS_INSTANCE(mca_mpool_hugepage_hugepage_t, opal_list_item_t, + mca_mpool_hugepage_hugepage_constructor, + mca_mpool_hugepage_hugepage_destructor); + +static int mca_mpool_rb_hugepage_compare (void *key1, void *key2) +{ + if (key1 == key2) { + return 0; + } + + return (key1 < key2) ? -1 : 1; +} + +/* + * Initializes the mpool module. + */ +int mca_mpool_hugepage_module_init(mca_mpool_hugepage_module_t *mpool, + mca_mpool_hugepage_hugepage_t *huge_page) +{ + mca_allocator_base_component_t *allocator_component; + int rc; + + mpool->super.mpool_component = &mca_mpool_hugepage_component.super; + mpool->super.mpool_base = NULL; /* no base .. */ + mpool->super.mpool_alloc = mca_mpool_hugepage_alloc; + mpool->super.mpool_realloc = mca_mpool_hugepage_realloc; + mpool->super.mpool_free = mca_mpool_hugepage_free; + mpool->super.mpool_finalize = mca_mpool_hugepage_finalize; + mpool->super.mpool_ft_event = mca_mpool_hugepage_ft_event; + mpool->super.flags = MCA_MPOOL_FLAGS_MPI_ALLOC_MEM; + + OBJ_CONSTRUCT(&mpool->lock, opal_mutex_t); + + mpool->huge_page = huge_page; + + /* use an allocator component to reduce waste when making small allocations */ + allocator_component = mca_allocator_component_lookup ("bucket"); + if (NULL == allocator_component) { + return OPAL_ERR_NOT_AVAILABLE; + } + + mpool->allocator = allocator_component->allocator_init (true, mca_mpool_hugepage_seg_alloc, + mca_mpool_hugepage_seg_free, mpool); + + OBJ_CONSTRUCT(&mpool->allocation_tree, opal_rb_tree_t); + rc = opal_rb_tree_init (&mpool->allocation_tree, mca_mpool_rb_hugepage_compare); + if (OPAL_SUCCESS != rc) { + OBJ_DESTRUCT(&mpool->allocation_tree); + return OPAL_ERR_NOT_AVAILABLE; + } + + return OPAL_SUCCESS; +} + +void *mca_mpool_hugepage_seg_alloc (void *ctx, size_t *sizep) +{ + mca_mpool_hugepage_module_t *hugepage_module = (mca_mpool_hugepage_module_t *) ctx; + mca_mpool_hugepage_hugepage_t *huge_page = hugepage_module->huge_page; + size_t size = *sizep; + void *base = NULL; + char *path = NULL; + int flags = MAP_PRIVATE; + int fd = -1; + int rc; + + size = OPAL_ALIGN(size, huge_page->page_size, size_t); + + if (huge_page->path) { + int32_t count; + + count = opal_atomic_add_32 (&huge_page->count, 1); + + rc = asprintf (&path, "%s/hugepage.openmpi.%d.%d", huge_page->path, + getpid (), count); + if (0 > rc) { + return NULL; + } + + fd = open (path, O_RDWR | O_CREAT, 0600); + if (-1 == fd) { + free (path); + return NULL; + } + + if (0 != ftruncate (fd, size)) { + close (fd); + unlink (path); + free (path); + return NULL; + } + } else { +#if defined(MAP_ANONYMOUS) + flags |= MAP_ANONYMOUS; +#elif defined(MAP_ANON) + /* older versions of OS X do not define MAP_ANONYMOUS (10.9.x and older) */ + flags |= MAP_ANON; +#endif + } + + base = mmap (NULL, size, PROT_READ | PROT_WRITE, flags | huge_page->mmap_flags, fd, 0); + if (path) { + unlink (path); + free (path); + } + + if (fd >= 0) { + close (fd); + } + + if (MAP_FAILED == base) { + opal_output_verbose (MCA_BASE_VERBOSE_WARN, opal_mpool_base_framework.framework_verbose, + "could not allocate huge page(s). falling back on standard pages"); + /* fall back on regular pages */ + base = mmap (NULL, size, PROT_READ | PROT_WRITE, flags, -1, 0); + } + + if (MAP_FAILED == base) { + return NULL; + } + + opal_mutex_lock (&hugepage_module->lock); + opal_rb_tree_insert (&hugepage_module->allocation_tree, base, (void *) (intptr_t) size); + opal_atomic_add (&mca_mpool_hugepage_component.bytes_allocated, (int64_t) size); + opal_mutex_unlock (&hugepage_module->lock); + + OPAL_OUTPUT_VERBOSE((MCA_BASE_VERBOSE_TRACE, opal_mpool_base_framework.framework_verbose, + "allocated segment %p of size %lu bytes", base, size)); + + *sizep = size; + + return base; +} + +void mca_mpool_hugepage_seg_free (void *ctx, void *addr) +{ + mca_mpool_hugepage_module_t *hugepage_module = (mca_mpool_hugepage_module_t *) ctx; + size_t size; + + opal_mutex_lock (&hugepage_module->lock); + + size = (size_t) (intptr_t) opal_rb_tree_find (&hugepage_module->allocation_tree, addr); + if (size > 0) { + opal_rb_tree_delete (&hugepage_module->allocation_tree, addr); + OPAL_OUTPUT_VERBOSE((MCA_BASE_VERBOSE_TRACE, opal_mpool_base_framework.framework_verbose, + "freeing segment %p of size %lu bytes", addr, size)); + munmap (addr, size); + opal_atomic_add (&mca_mpool_hugepage_component.bytes_allocated, -(int64_t) size); + } + + opal_mutex_unlock (&hugepage_module->lock); +} + +/** + * allocate function + */ +static void *mca_mpool_hugepage_alloc (mca_mpool_base_module_t *mpool, size_t size, + size_t align, uint32_t flags) +{ + mca_mpool_hugepage_module_t *hugepage_module = (mca_mpool_hugepage_module_t *) mpool; + return hugepage_module->allocator->alc_alloc (hugepage_module->allocator, size, align); +} + +/** + * allocate function + */ +static void *mca_mpool_hugepage_realloc (mca_mpool_base_module_t *mpool, void *addr, size_t size) +{ + mca_mpool_hugepage_module_t *hugepage_module = (mca_mpool_hugepage_module_t *) mpool; + + return hugepage_module->allocator->alc_realloc (hugepage_module->allocator, addr, size); +} + +/** + * free function + */ +static void mca_mpool_hugepage_free (mca_mpool_base_module_t *mpool, void *addr) +{ + mca_mpool_hugepage_module_t *hugepage_module = (mca_mpool_hugepage_module_t *) mpool; + + hugepage_module->allocator->alc_free (hugepage_module->allocator, addr); +} + +static void mca_mpool_hugepage_finalize (struct mca_mpool_base_module_t *mpool) +{ + mca_mpool_hugepage_module_t *hugepage_module = (mca_mpool_hugepage_module_t *) mpool; + + OBJ_DESTRUCT(&hugepage_module->lock); + + if (hugepage_module->allocator) { + (void) hugepage_module->allocator->alc_finalize (hugepage_module->allocator); + hugepage_module->allocator = NULL; + } +} + +static int mca_mpool_hugepage_ft_event (int state) { + return OPAL_SUCCESS; +} diff --git a/opal/mca/mpool/memkind/Makefile.am b/opal/mca/mpool/memkind/Makefile.am new file mode 100644 index 00000000000..b945e067270 --- /dev/null +++ b/opal/mca/mpool/memkind/Makefile.am @@ -0,0 +1,41 @@ +# -*- indent-tabs-mode:nil -*- +# +# Copyright (c) 2011-2013 Los Alamos National Security, LLC. All rights +# reserved. +# +# Additional copyrights may follow +# +# $HEADER$ +# + +# Make the output library in this directory, and name it either +# mca__.la (for DSO builds) or libmca__.la +# (for static builds). + +AM_CPPFLAGS = $(mpool_memkind_CPPFLAGS) + +if MCA_BUILD_opal_mpool_memkind_DSO +component_noinst = +component_install = mca_mpool_memkind.la +else +component_noinst = libmca_mpool_memkind.la +component_install = +endif + +memkind_SOURCES = \ + mpool_memkind_component.c \ + mpool_memkind_module.c \ + mpool_memkind.h + +mcacomponentdir = $(opallibdir) +mcacomponent_LTLIBRARIES = $(component_install) +mca_mpool_memkind_la_SOURCES = $(memkind_SOURCES) +nodist_mca_mpool_memkind_la_SOURCES = $(memkind_nodist_SOURCES) +mca_mpool_memkind_la_LIBADD = $(mpool_memkind_LIBS) +mca_mpool_memkind_la_LDFLAGS = -module -avoid-version $(mpool_memkind_LDFLAGS) + +noinst_LTLIBRARIES = $(component_noinst) +libmca_mpool_memkind_la_SOURCES = $(memkind_SOURCES) +nodist_libmca_mpool_memkind_la_SOURCES = $(memkind_nodist_SOURCES) +libmca_mpool_memkind_la_LIBADD = $(mpool_memkind_LIBS) +libmca_mpool_memkind_la_LDFLAGS = -module -avoid-version $(mpool_memkind_LDFLAGS) diff --git a/opal/mca/mpool/memkind/configure.m4 b/opal/mca/mpool/memkind/configure.m4 new file mode 100644 index 00000000000..bc6e5a32863 --- /dev/null +++ b/opal/mca/mpool/memkind/configure.m4 @@ -0,0 +1,42 @@ +# -*- shell-script -*- +# +# Copyright (c) 2013-2014 Los Alamos National Security, LLC. All rights +# reserved. +# $COPYRIGHT$ +# +# Additional copyrights may follow +# +# $HEADER$ +# + +AC_DEFUN([MCA_opal_mpool_memkind_CONFIG],[ + OPAL_VAR_SCOPE_PUSH([opal_mpool_memkind_happy]) + AC_CONFIG_FILES([opal/mca/mpool/memkind/Makefile]) + + AC_ARG_WITH([memkind], [AC_HELP_STRING([--with-memkind(=DIR)]), + [Build with MEMKIND, searching for headers in DIR])]) + OPAL_CHECK_WITHDIR([memkind], [$with_memkind], [include/memkind.h]) + + opal_mpool_memkind_happy="no" + + if test "$with_memkind" != "no" ; then + if test -n "$with_memkind" -a "$with_memkind" != "yes" ; then + opal_check_memkind_dir=$with_memkind + fi + + OPAL_CHECK_PACKAGE([mpool_memkind], [memkind.h], [memkind], [memkind_malloc], [ -lnuma], + [$opal_check_memkind_dir], [], [opal_mpool_memkind_happy="yes"], []) + + if test "$opal_mpool_memkind_happy" != "yes" -a -n "$with_memkind" ; then + AC_MSG_ERROR([MEMKIND support requested but not found. Aborting]) + fi + fi + + AS_IF([test "$opal_mpool_memkind_happy" = "yes"], [$1], [$2]) + + # substitute in the things needed to build memkind + AC_SUBST([mpool_memkind_CPPFLAGS]) + AC_SUBST([mpool_memkind_LDFLAGS]) + AC_SUBST([mpool_memkind_LIBS]) + OPAL_VAR_SCOPE_POP +])dnl diff --git a/opal/mca/mpool/memkind/mpool_memkind.h b/opal/mca/mpool/memkind/mpool_memkind.h new file mode 100644 index 00000000000..afc0b6d8b78 --- /dev/null +++ b/opal/mca/mpool/memkind/mpool_memkind.h @@ -0,0 +1,94 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ +/* + * Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2006 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2007 Sun Microsystems, Inc. All rights reserved. + * Copyright (c) 2009 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2010-2015 Los Alamos National Security, LLC. All rights + * reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ +/** + * @file + */ +#ifndef MCA_MPOOL_MEMKIND_H +#define MCA_MPOOL_MEMKIND_H + +#include "opal_config.h" + +#include "opal/mca/event/event.h" +#include "opal/mca/mpool/mpool.h" + +#include "opal/mca/allocator/allocator.h" +#include + +BEGIN_C_DECLS + +static const int mca_mpool_memkind_default_pagesize = 4096; + +struct mca_mpool_memkind_module_t { + mca_mpool_base_module_t super; + memkind_t kind; + int page_size; +}; +typedef struct mca_mpool_memkind_module_t mca_mpool_memkind_module_t; + +struct mca_mpool_memkind_component_t { + mca_mpool_base_component_t super; + int hbw; + int pagesize; + int bind; + int default_partition; + int priority; + char *memkind_file; + int output; + mca_mpool_memkind_module_t modules[MEMKIND_NUM_BASE_KIND]; +}; +typedef struct mca_mpool_memkind_component_t mca_mpool_memkind_component_t; +OPAL_MODULE_DECLSPEC extern mca_mpool_memkind_component_t mca_mpool_memkind_component; + +/* + * Initializes the mpool module. +*/ + +void mca_mpool_memkind_module_init(mca_mpool_memkind_module_t *mpool, int partition); + +/** + * Allocate block of high bandwidth memory. + */ +void* mca_mpool_memkind_alloc( + mca_mpool_base_module_t* mpool, + size_t size, + size_t align, + uint32_t flags); + +/** + * realloc function typedef + */ +void* mca_mpool_memkind_realloc( + mca_mpool_base_module_t* mpool, + void* addr, + size_t size); + +/** + * free function typedef + */ +void mca_mpool_memkind_free( + mca_mpool_base_module_t* mpool, + void * addr); + +END_C_DECLS + +#endif diff --git a/opal/mca/mpool/memkind/mpool_memkind_component.c b/opal/mca/mpool/memkind/mpool_memkind_component.c new file mode 100644 index 00000000000..9ac5690f6f7 --- /dev/null +++ b/opal/mca/mpool/memkind/mpool_memkind_component.c @@ -0,0 +1,267 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ +/* + * Copyright (c) 2004-2010 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2006 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2007-2009 Sun Microsystems, Inc. All rights reserved. + * Copyright (c) 2008-2009 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2010-2016 Los Alamos National Security, LLC. All rights + * reserved. + * Copyright (c) 2014 NVIDIA Corporation. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include "opal_config.h" +#ifdef HAVE_UNISTD_H +#include +#endif /* HAVE_UNISTD_H*/ +#ifdef HAVE_STDLIB_H +#include +#endif /* HAVE_STDLIB_H */ +#include +#include +#include "opal/mca/base/base.h" +#include "opal/mca/allocator/base/base.h" +#include "mpool_memkind.h" + + +/* + * Local functions + */ + +static int +mca_mpool_memkind_register(void); + +static int +mca_mpool_memkind_open(void); + +static int +mca_mpool_memkind_close(void); + +static int mca_mpool_memkind_query (const char *hints, int *priority, + mca_mpool_base_module_t **module); + +mca_mpool_memkind_component_t mca_mpool_memkind_component = { + { + /* First, the mca_base_component_t struct containing meta + information about the component itself */ + .mpool_version = { + MCA_MPOOL_BASE_VERSION_3_0_0, + "memkind", /* MCA component name */ + MCA_BASE_MAKE_VERSION(component, OPAL_MAJOR_VERSION, OPAL_MINOR_VERSION, + OPAL_RELEASE_VERSION), + .mca_open_component = mca_mpool_memkind_open, + .mca_close_component = mca_mpool_memkind_close, + .mca_register_component_params = mca_mpool_memkind_register + }, + .mpool_data = { + /* The component is checkpoint ready */ + MCA_BASE_METADATA_PARAM_CHECKPOINT + }, + + .mpool_query = mca_mpool_memkind_query, + } +}; + +static mca_base_var_enum_value_t memory_kinds[] = { + {.value = MEMKIND_PARTITION_DEFAULT, .string = "memkind_default"}, + {.value = MEMKIND_PARTITION_HBW, .string = "memkind_hbw"}, + {.value = MEMKIND_PARTITION_HBW_HUGETLB, .string = "memkind_hwb_hugetlb"}, + {.value = MEMKIND_PARTITION_HBW_PREFERRED, .string = "memkind_hbw_preferred"}, + {.value = MEMKIND_PARTITION_HBW_PREFERRED_HUGETLB, .string = "memkind_hbw_preferred_hugetlb"}, + {.value = MEMKIND_PARTITION_HUGETLB, .string = "memkind_hugetlb"}, + {.value = MEMKIND_PARTITION_HBW_GBTLB, .string = "memkind_hbw_gbtlb"}, + {.value = MEMKIND_PARTITION_HBW_PREFERRED_GBTLB, .string = "memkind_hbw_preferred_gbtlb"}, + {.value = MEMKIND_PARTITION_GBTLB, .string = "memkind_gbtlb"}, + {.value = MEMKIND_PARTITION_HBW_INTERLEAVE, .string = "memkind_hbw_interleave"}, + {.value = MEMKIND_PARTITION_INTERLEAVE, .string = "memkind_interleave"}, + {.string = NULL}, +}; + +static mca_base_var_enum_t *mca_mpool_memkind_enum = NULL; + +static int opal_mpool_memkind_verbose; +static int mca_mpool_memkind_register(void) +{ + int rc; + + /* register MEMKIND component parameters */ + mca_mpool_memkind_component.default_partition = memory_kinds[0].value; + + rc = mca_base_var_enum_create ("memkind partition types", memory_kinds, &mca_mpool_memkind_enum); + if (OPAL_SUCCESS != rc) { + return rc; + } + + (void) mca_base_component_var_register(&mca_mpool_memkind_component.super.mpool_version, + "default_partition", "Default memkind partition to use", + MCA_BASE_VAR_TYPE_INT, mca_mpool_memkind_enum, 0, 0, + OPAL_INFO_LVL_5, MCA_BASE_VAR_SCOPE_LOCAL, + &mca_mpool_memkind_component.default_partition); + + mca_mpool_memkind_component.priority = 10; + (void) mca_base_component_var_register(&mca_mpool_memkind_component.super.mpool_version, + "priority", "Default priority of the memkind component", + MCA_BASE_VAR_TYPE_INT, NULL, 0, 0, + OPAL_INFO_LVL_5, MCA_BASE_VAR_SCOPE_LOCAL, + &mca_mpool_memkind_component.priority); + + opal_mpool_memkind_verbose = 0; + (void) mca_base_component_var_register(&mca_mpool_memkind_component.super.mpool_version, + "verbose", "Verbosity of the memkind mpool component", + MCA_BASE_VAR_TYPE_INT, &mca_base_var_enum_verbose, 0, 0, + OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_LOCAL, + &opal_mpool_memkind_verbose); + + return OPAL_SUCCESS; +} + +/** + * component open/close/init function + */ +static int mca_mpool_memkind_open (void) +{ + memkind_t default_kind; + int rc; + + if (opal_mpool_memkind_verbose != 0) { + mca_mpool_memkind_component.output = opal_output_open(NULL); + } else { + mca_mpool_memkind_component.output = -1; + } + + rc = memkind_get_kind_by_partition (mca_mpool_memkind_component.default_partition, + &default_kind); + if (0 != rc) { + return OPAL_ERR_NOT_AVAILABLE; + } + + if (memkind_check_available (default_kind)) { + char *kind_string; + + mca_mpool_memkind_enum->string_from_value (mca_mpool_memkind_enum, + mca_mpool_memkind_component.default_partition, + &kind_string); + opal_output_verbose (MCA_BASE_VERBOSE_WARN, mca_mpool_memkind_component.output, + "default kind %s not available", kind_string); + free (kind_string); + return OPAL_ERR_NOT_AVAILABLE; + } + + for (int i = 0 ; i < MEMKIND_NUM_BASE_KIND ; ++i) { + mca_mpool_memkind_module_init (mca_mpool_memkind_component.modules + i, i); + } + + return OPAL_SUCCESS; +} + +static int mca_mpool_memkind_close(void) +{ + opal_output_close (mca_mpool_memkind_component.output); + mca_mpool_memkind_component.output = -1; + + if (mca_mpool_memkind_enum) { + OBJ_RELEASE(mca_mpool_memkind_enum); + mca_mpool_memkind_enum = NULL; + } + + return OPAL_SUCCESS; +} + +static int mca_mpool_memkind_query (const char *hints, int *priority_out, + mca_mpool_base_module_t **module) +{ + int my_priority = mca_mpool_memkind_component.priority; + char **hint_array, *partition_name; + int partition = -1, rc; + + if (module) { + *module = &mca_mpool_memkind_component.modules[mca_mpool_memkind_component.default_partition].super; + } + + if (NULL == hints) { + if (priority_out) { + *priority_out = my_priority; + } + return OPAL_SUCCESS; + } + + hint_array = opal_argv_split (hints, ','); + if (NULL == hint_array) { + if (priority_out) { + *priority_out = my_priority; + } + return OPAL_SUCCESS; + } + + for (int i = 0 ; hint_array[i] ; ++i) { + char *tmp, *key, *value; + + key = hint_array[i]; + tmp = strchr (key, '='); + if (tmp) { + *tmp = '\0'; + value = tmp + 1; + } + + if (0 == strcasecmp (key, "mpool")) { + if (0 == strcasecmp (value, "memkind")) { + /* specifically selected */ + + my_priority = 100; + } else { + if (priority_out) { + *priority_out = 0; + } + return OPAL_SUCCESS; + } + } else if (0 == strcasecmp (key, "partition")) { + rc = mca_mpool_memkind_enum->value_from_string (mca_mpool_memkind_enum, + value, &partition); + if (OPAL_SUCCESS != rc) { + opal_output_verbose (MCA_BASE_VERBOSE_WARN, mca_mpool_memkind_component.output, + "invalid partition %s specified", value); + } + + partition_name = value; + } + } + + if (-1 != partition) { + memkind_t kind; + + my_priority = 0; + + if (!memkind_get_kind_by_partition (partition, &kind)) { + if (memkind_check_available (kind)) { + opal_output_verbose (MCA_BASE_VERBOSE_WARN, mca_mpool_memkind_component.output, + "kind %s not available", partition_name); + } else { + my_priority = 100; + } + } + + if (module) { + *module = &mca_mpool_memkind_component.modules[partition].super; + } + } + + opal_argv_free (hint_array); + + if (priority_out) { + *priority_out = my_priority; + } + + return OPAL_SUCCESS; +} diff --git a/opal/mca/mpool/memkind/mpool_memkind_module.c b/opal/mca/mpool/memkind/mpool_memkind_module.c new file mode 100644 index 00000000000..f870e3b59ab --- /dev/null +++ b/opal/mca/mpool/memkind/mpool_memkind_module.c @@ -0,0 +1,83 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ +/* + * Copyright (c) 2004-2011 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2005 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2009-2012 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2011-2015 Los Alamos National Security, LLC. All rights + * reserved. + * Copyright (c) 2011-2014 NVIDIA Corporation. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include "opal_config.h" +#include +#include "opal/mca/mpool/memkind/mpool_memkind.h" +#ifdef HAVE_UNISTD_H +#include +#endif +#include "opal/mca/mpool/base/base.h" + +size_t partition_page_sizes[MEMKIND_NUM_BASE_KIND] = { + 4096, 4069, 2097152, 4096, 2097152, 2097152, + 1073741824, 1073741824, 1073741824, 4096, 4096, +}; + +/* + * Initializes the mpool module. + */ +void mca_mpool_memkind_module_init(mca_mpool_memkind_module_t *mpool, int partition) +{ + mpool->super.mpool_component = &mca_mpool_memkind_component.super; + mpool->super.mpool_alloc = mca_mpool_memkind_alloc; + mpool->super.mpool_realloc = mca_mpool_memkind_realloc; + mpool->super.mpool_free = mca_mpool_memkind_free; + mpool->super.flags = MCA_MPOOL_FLAGS_MPI_ALLOC_MEM; + memkind_get_kind_by_partition (partition, &mpool->kind); + mpool->page_size = partition_page_sizes[partition]; +} + + +void* mca_mpool_memkind_alloc( + mca_mpool_base_module_t* mpool, + size_t size, + size_t align, + uint32_t flags) +{ + mca_mpool_memkind_module_t *memkind_module = (mca_mpool_memkind_module_t *) mpool; + void *addr; + + if (0 == align) { + align = memkind_module->page_size; + } + + if ((errno = memkind_posix_memalign(memkind_module->kind, &addr, align, size))!= 0){ + return NULL; + } + + return addr; +} + +void* mca_mpool_memkind_realloc(mca_mpool_base_module_t *mpool, void *addr, + size_t size) +{ + mca_mpool_memkind_module_t *memkind_module = (mca_mpool_memkind_module_t *) mpool; + return memkind_realloc (memkind_module->kind, addr, size); +} + +void mca_mpool_memkind_free(mca_mpool_base_module_t *mpool, void *addr) +{ + mca_mpool_memkind_module_t *memkind_module = (mca_mpool_memkind_module_t *) mpool; + memkind_free(memkind_module->kind, addr); +} diff --git a/opal/mca/mpool/mpool.h b/opal/mca/mpool/mpool.h index fbfb5ba7846..12de8619e88 100644 --- a/opal/mca/mpool/mpool.h +++ b/opal/mca/mpool/mpool.h @@ -29,128 +29,67 @@ #include "opal_config.h" #include "opal/mca/mca.h" #include "opal/class/opal_free_list.h" +#include "opal/mca/rcache/base/rcache_base_vma.h" -#include "opal/mca/crs/crs.h" -#include "opal/mca/crs/base/base.h" +#define MCA_MPOOL_ALLOC_FLAG_DEFAULT 0x00 +#define MCA_MPOOL_ALLOC_FLAG_USER 0x01 -#define MCA_MPOOL_FLAGS_CACHE_BYPASS 0x01 -#define MCA_MPOOL_FLAGS_PERSIST 0x02 -#define MCA_MPOOL_FLAGS_MPI_ALLOC_MEM 0x04 -#define MCA_MPOOL_FLAGS_INVALID 0x08 -#define MCA_MPOOL_FLAGS_SO_MEM 0x10 -#define MCA_MPOOL_FLAGS_CUDA_REGISTER_MEM 0x20 +#define MCA_MPOOL_FLAGS_MPI_ALLOC_MEM 0x80 struct opal_info_t; - -#define MCA_MPOOL_FLAGS_CUDA_GPU_MEM 0x40 - -/* Only valid in mpool flags. Used to indicate that no external memory - * hooks (ptmalloc2, etc) are required. */ -#define MCA_MPOOL_FLAGS_NO_HOOKS 0x80 - - -struct mca_mpool_base_resources_t; - -struct mca_mpool_base_registration_t { - opal_free_list_item_t super; - struct mca_mpool_base_module_t *mpool; - unsigned char* base; - unsigned char* bound; - unsigned char* alloc_base; - int32_t ref_count; - uint32_t flags; - void *mpool_context; -#if OPAL_CUDA_GDR_SUPPORT - unsigned long long gpu_bufID; -#endif /* OPAL_CUDA_GDR_SUPPORT */ -}; - -typedef struct mca_mpool_base_registration_t mca_mpool_base_registration_t; - -OPAL_DECLSPEC OBJ_CLASS_DECLARATION(mca_mpool_base_registration_t); +struct mca_mpool_base_module_t; +typedef struct mca_mpool_base_module_t mca_mpool_base_module_t; /** - * component initialize + * component query function + * + * @param[in] hints memory pool hints in order of priority. this should + * be replaced by opal_info_t when the work to move + * info down to opal is complete. + * @param[out] priority relative priority of this memory pool component + * @param[out] module best match module + * + * This function should parse the provided hints and return a relative priority + * of the component based on the number of hints matched. For example, if the + * hints are "page_size=2M,high-bandwidth" and a pool matches the page_size but + * not the high-bandwidth hint then the component should return a lower priority + * than if both matched but a higher priority than if a pool matches only the + * high-bandwidth hint. + * + * Memory pools should try to support at a minimum name=value but can define + * any additional keys. */ -typedef struct mca_mpool_base_module_t* (*mca_mpool_base_component_init_fn_t)( - struct mca_mpool_base_resources_t*); +typedef int (*mca_mpool_base_component_query_fn_t) (const char *hints, int *priority, + mca_mpool_base_module_t **module); /** * allocate function typedef */ -typedef void* (*mca_mpool_base_module_alloc_fn_t)( - struct mca_mpool_base_module_t* mpool, - size_t size, - size_t align, - uint32_t flags, - mca_mpool_base_registration_t** registration); - +typedef void *(*mca_mpool_base_module_alloc_fn_t) (mca_mpool_base_module_t *mpool, + size_t size, size_t align, + uint32_t flags); + /** - * realloc function typedef + * allocate function typedef */ -typedef void* (*mca_mpool_base_module_realloc_fn_t)( - struct mca_mpool_base_module_t* mpool, - void* addr, - size_t size, - mca_mpool_base_registration_t** registration); - +typedef void *(*mca_mpool_base_module_realloc_fn_t) (mca_mpool_base_module_t *mpool, + void *addr, size_t size); + /** * free function typedef */ -typedef void (*mca_mpool_base_module_free_fn_t)( - struct mca_mpool_base_module_t* mpool, - void *addr, - mca_mpool_base_registration_t* registration); - -/** - * register memory - */ -typedef int (*mca_mpool_base_module_register_fn_t)( - struct mca_mpool_base_module_t* mpool, - void * addr, - size_t size, - uint32_t flags, - mca_mpool_base_registration_t** registration); - -/** - * deregister memory - */ -typedef int (*mca_mpool_base_module_deregister_fn_t)( - struct mca_mpool_base_module_t* mpool, - mca_mpool_base_registration_t* registration); - -/** - * find registration in this memory pool - */ - -typedef int (*mca_mpool_base_module_find_fn_t) ( - struct mca_mpool_base_module_t* mpool, void* addr, size_t size, - mca_mpool_base_registration_t **reg); - -/** - * release registration - */ - -typedef int (*mca_mpool_base_module_release_fn_t) ( - struct mca_mpool_base_module_t* mpool, - mca_mpool_base_registration_t* registration); - - -/** - * release memory region - */ -typedef int (*mca_mpool_base_module_release_memory_fn_t) ( - struct mca_mpool_base_module_t* mpool, void *base, size_t size); +typedef void (*mca_mpool_base_module_free_fn_t) (mca_mpool_base_module_t *mpool, + void *addr); /** * if appropriate - returns base address of memory pool */ -typedef void* (*mca_mpool_base_module_address_fn_t)(struct mca_mpool_base_module_t* mpool); +typedef void* (*mca_mpool_base_module_address_fn_t) (mca_mpool_base_module_t *mpool); /** * finalize */ -typedef void (*mca_mpool_base_module_finalize_fn_t)(struct mca_mpool_base_module_t*); +typedef void (*mca_mpool_base_module_finalize_fn_t)(mca_mpool_base_module_t *mpool); /** @@ -166,10 +105,10 @@ typedef int (*mca_mpool_base_module_ft_event_fn_t)(int state); * and open/close/init functions. */ struct mca_mpool_base_component_2_0_0_t { - mca_base_component_t mpool_version; /**< version */ - mca_base_component_data_t mpool_data;/**< metadata */ + mca_base_component_t mpool_version; /**< version */ + mca_base_component_data_t mpool_data;/**< metadata */ - mca_mpool_base_component_init_fn_t mpool_init; /**< init function */ + mca_mpool_base_component_query_fn_t mpool_query; /**< query for matching pools */ }; /** * Convenience typedef. @@ -186,25 +125,19 @@ typedef struct mca_mpool_base_component_2_0_0_t mca_mpool_base_component_t; * details. */ struct mca_mpool_base_module_t { - mca_mpool_base_component_t *mpool_component; /**< component stuct */ + mca_mpool_base_component_t *mpool_component; /**< component stuct */ mca_mpool_base_module_address_fn_t mpool_base; /**< returns the base address */ mca_mpool_base_module_alloc_fn_t mpool_alloc; /**< allocate function */ mca_mpool_base_module_realloc_fn_t mpool_realloc; /**< reallocate function */ mca_mpool_base_module_free_fn_t mpool_free; /**< free function */ - mca_mpool_base_module_register_fn_t mpool_register; /**< register memory */ - mca_mpool_base_module_deregister_fn_t mpool_deregister; /**< deregister memory */ - mca_mpool_base_module_find_fn_t mpool_find; /**< find regisrations in the cache */ - mca_mpool_base_module_release_fn_t mpool_release; /**< release a registration from the cache */ - mca_mpool_base_module_release_memory_fn_t mpool_release_memory; /**< release memor region from the cache */ + mca_mpool_base_module_finalize_fn_t mpool_finalize; /**< finalize */ mca_mpool_base_module_ft_event_fn_t mpool_ft_event; /**< ft_event */ - struct mca_rcache_base_module_t *rcache; /* the rcache associated with this mpool */ uint32_t flags; /**< mpool flags */ + + size_t mpool_allocation_unit; /**< allocation unit used by this mpool */ + char *mpool_name; /**< name of this pool module */ }; -/** - * Convenience typedef - */ -typedef struct mca_mpool_base_module_t mca_mpool_base_module_t; /** @@ -227,7 +160,7 @@ typedef struct mca_mpool_base_module_t mca_mpool_base_module_t; * @retval pointer to the allocated memory * @retval NULL on failure */ -OPAL_DECLSPEC void * mca_mpool_base_alloc(size_t size, struct opal_info_t * info); +OPAL_DECLSPEC void * mca_mpool_base_alloc(size_t size, struct opal_info_t * info, const char *hints); /** * Function to free memory previously allocated by mca_mpool_base_alloc @@ -237,7 +170,7 @@ OPAL_DECLSPEC void * mca_mpool_base_alloc(size_t size, struct opal_info_t * info * @retval OPAL_SUCCESS * @retval OPAL_ERR_BAD_PARAM if the passed base pointer was invalid */ -OPAL_DECLSPEC int mca_mpool_base_free(void * base); +OPAL_DECLSPEC int mca_mpool_base_free(void * base); /** * Function for the red black tree to compare 2 keys @@ -251,21 +184,11 @@ OPAL_DECLSPEC int mca_mpool_base_free(void * base); */ OPAL_DECLSPEC int mca_mpool_base_tree_node_compare(void * key1, void * key2); - -OPAL_DECLSPEC int mca_mpool_base_insert( - void * addr, - size_t size, - mca_mpool_base_module_t* mpool, - void* user_in, - mca_mpool_base_registration_t* registration); - -OPAL_DECLSPEC int mca_mpool_base_remove(void * base); - /** * Macro for use in components that are of type mpool */ -#define MCA_MPOOL_BASE_VERSION_2_0_0 \ - OPAL_MCA_BASE_VERSION_2_1_0("mpool", 2, 0, 0) +#define MCA_MPOOL_BASE_VERSION_3_0_0 \ + OPAL_MCA_BASE_VERSION_2_1_0("mpool", 3, 0, 0) #endif /* MCA_MPOOL_H */ diff --git a/opal/mca/mpool/rgpusm/Makefile.am b/opal/mca/mpool/rgpusm/Makefile.am deleted file mode 100644 index eecc5e941ed..00000000000 --- a/opal/mca/mpool/rgpusm/Makefile.am +++ /dev/null @@ -1,57 +0,0 @@ -# -# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana -# University Research and Technology -# Corporation. All rights reserved. -# Copyright (c) 2004-2005 The University of Tennessee and The University -# of Tennessee Research Foundation. All rights -# reserved. -# Copyright (c) 2004-2009 High Performance Computing Center Stuttgart, -# University of Stuttgart. All rights reserved. -# Copyright (c) 2004-2005 The Regents of the University of California. -# All rights reserved. -# Copyright (c) 2010-2014 Cisco Systems, Inc. All rights reserved. -# Copyright (c) 2012 NVIDIA Corporation. All rights reserved. -# $COPYRIGHT$ -# -# Additional copyrights may follow -# -# $HEADER$ -# - -AM_CPPFLAGS = $(mpool_rgpusm_CPPFLAGS) - -sources = \ - mpool_rgpusm_module.c \ - mpool_rgpusm_component.c - -if WANT_INSTALL_HEADERS -opaldir = $(opalincludedir)/$(subdir) -opal_HEADERS = mpool_rgpusm.h -endif - -# Make the output library in this directory, and name it either -# mca__.la (for DSO builds) or libmca__.la -# (for static builds). - -if MCA_BUILD_opal_mpool_rgpusm_DSO -component_noinst = -component_install = mca_mpool_rgpusm.la -else -component_noinst = libmca_mpool_rgpusm.la -component_install = -endif - -mcacomponentdir = $(opallibdir) -mcacomponent_LTLIBRARIES = $(component_install) -mca_mpool_rgpusm_la_SOURCES = $(sources) -mca_mpool_rgpusm_la_LDFLAGS = -module -avoid-version -mca_mpool_rgpusm_la_LIBADD = $(mpool_rgpusm_LIBS) -if OPAL_cuda_support -mca_mpool_rgpusm_la_LIBADD += \ - $(OPAL_TOP_BUILDDIR)/opal/mca/common/cuda/lib@OPAL_LIB_PREFIX@mca_common_cuda.la -endif - -noinst_LTLIBRARIES = $(component_noinst) -libmca_mpool_rgpusm_la_SOURCES = $(sources) -libmca_mpool_rgpusm_la_LDFLAGS = -module -avoid-version -libmca_mpool_rgpusm_la_LIBADD = $(mpool_rgpusm_LIBS) diff --git a/opal/mca/mpool/rgpusm/configure.m4 b/opal/mca/mpool/rgpusm/configure.m4 deleted file mode 100644 index 3d887ec0bb2..00000000000 --- a/opal/mca/mpool/rgpusm/configure.m4 +++ /dev/null @@ -1,25 +0,0 @@ -# -*- shell-script -*- -# -# Copyright (c) 2012 NVIDIA Corporation. All rights reserved. -# $COPYRIGHT$ -# -# Additional copyrights may follow -# -# $HEADER$ -# - -# -# If CUDA support was requested, then build the CUDA memory pools. -# This code checks the variable CUDA_SUPPORT which was set earlier in -# the configure sequence by the opal_configure_options.m4 code. -# - -AC_DEFUN([MCA_opal_mpool_rgpusm_CONFIG],[ - AC_CONFIG_FILES([opal/mca/mpool/rgpusm/Makefile]) - - # Use CUDA_SUPPORT which was filled in by the opal configure code. - AS_IF([test "x$CUDA_SUPPORT_41" = "x1"], - [$1], - [$2]) - -])dnl diff --git a/opal/mca/mpool/rgpusm/mpool_rgpusm.h b/opal/mca/mpool/rgpusm/mpool_rgpusm.h deleted file mode 100644 index bcca8942ad6..00000000000 --- a/opal/mca/mpool/rgpusm/mpool_rgpusm.h +++ /dev/null @@ -1,120 +0,0 @@ -/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ -/* - * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2006 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * Copyright (c) 2006 Voltaire. All rights reserved. - * Copyright (c) 2012 NVIDIA Corporation. All rights reserved. - * Copyright (c) 2015 Los Alamos National Security, LLC. All rights - * reserved. - * - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ -/** - * @file - */ -#ifndef MCA_MPOOL_RGPUSM_H -#define MCA_MPOOL_RGPUSM_H - -#include "opal_config.h" -#include "opal/class/opal_list.h" -#include "opal/class/opal_free_list.h" -#include "opal/mca/mpool/mpool.h" - -BEGIN_C_DECLS - -struct mca_mpool_rgpusm_component_t { - mca_mpool_base_component_t super; - char* rcache_name; - unsigned long long rcache_size_limit; - bool print_stats; - int leave_pinned; - int output; -}; -typedef struct mca_mpool_rgpusm_component_t mca_mpool_rgpusm_component_t; - -OPAL_DECLSPEC extern mca_mpool_rgpusm_component_t mca_mpool_rgpusm_component; - -struct mca_mpool_base_resources_t { - void *reg_data; - size_t sizeof_reg; - int (*register_mem)(void *base, size_t size, mca_mpool_base_registration_t *newreg, - mca_mpool_base_registration_t *hdrreg); - int (*deregister_mem)(void *reg_data, mca_mpool_base_registration_t *reg); -}; -typedef struct mca_mpool_base_resources_t mca_mpool_base_resources_t; - -struct mca_mpool_rgpusm_module_t { - mca_mpool_base_module_t super; - struct mca_mpool_base_resources_t resources; - opal_free_list_t reg_list; - opal_list_t lru_list; - uint32_t stat_cache_hit; - uint32_t stat_cache_valid; - uint32_t stat_cache_invalid; - uint32_t stat_cache_miss; - uint32_t stat_evicted; - uint32_t stat_cache_found; - uint32_t stat_cache_notfound; -}; typedef struct mca_mpool_rgpusm_module_t mca_mpool_rgpusm_module_t; - -/* - * Initializes the mpool module. - */ -void mca_mpool_rgpusm_module_init(mca_mpool_rgpusm_module_t *mpool); - -/** - * register block of memory - */ -int mca_mpool_rgpusm_register(mca_mpool_base_module_t* mpool, void *addr, - size_t size, uint32_t flags, mca_mpool_base_registration_t **reg); - -/** - * deregister memory - */ -int mca_mpool_rgpusm_deregister(mca_mpool_base_module_t *mpool, - mca_mpool_base_registration_t *reg); - -/** - * free memory allocated by alloc function - */ -void mca_mpool_rgpusm_free(mca_mpool_base_module_t *mpool, void * addr, - mca_mpool_base_registration_t *reg); - -/** - * find registration for a given block of memory - */ -int mca_mpool_rgpusm_find(struct mca_mpool_base_module_t* mpool, void* addr, - size_t size, mca_mpool_base_registration_t **reg); - -/** - * unregister all registration covering the block of memory - */ -int mca_mpool_rgpusm_release_memory(mca_mpool_base_module_t* mpool, void *base, - size_t size); - -/** - * finalize mpool - */ -void mca_mpool_rgpusm_finalize(struct mca_mpool_base_module_t *mpool); - -/** - * Fault Tolerance Event Notification Function - * @param state Checkpoint Stae - * @return OPAL_SUCCESS or failure status - */ -int mca_mpool_rgpusm_ft_event(int state); - -END_C_DECLS -#endif diff --git a/opal/mca/mpool/rgpusm/mpool_rgpusm_component.c b/opal/mca/mpool/rgpusm/mpool_rgpusm_component.c deleted file mode 100644 index 20179db4635..00000000000 --- a/opal/mca/mpool/rgpusm/mpool_rgpusm_component.c +++ /dev/null @@ -1,153 +0,0 @@ -/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ -/* - * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2005 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * Copyright (c) 2006 Voltaire. All rights reserved. - * Copyright (c) 2007-2009 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2012 NVIDIA Corporation. All rights reserved. - * Copyright (c) 2015 Los Alamos National Security, LLC. All rights - * reserved. - * - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#define OPAL_DISABLE_ENABLE_MEM_DEBUG 1 -#include "opal_config.h" -#include "opal/mca/base/base.h" -#include "mpool_rgpusm.h" -#ifdef HAVE_UNISTD_H -#include -#endif -#ifdef HAVE_MALLOC_H -#include -#endif - -/* - * Local functions - */ -static int rgpusm_open(void); -static int rgpusm_close(void); -static int rgpusm_register(void); -static mca_mpool_base_module_t* rgpusm_init(struct mca_mpool_base_resources_t* resources); - -static int opal_mpool_rgpusm_verbose = 0; - -mca_mpool_rgpusm_component_t mca_mpool_rgpusm_component = { - { - /* First, the mca_base_component_t struct containing meta - information about the component itself */ - - .mpool_version = { - MCA_MPOOL_BASE_VERSION_2_0_0, - - .mca_component_name = "rgpusm", - MCA_BASE_MAKE_VERSION(component, OPAL_MAJOR_VERSION, OPAL_MINOR_VERSION, - OPAL_RELEASE_VERSION), - .mca_open_component = rgpusm_open, - .mca_close_component = rgpusm_close, - .mca_register_component_params = rgpusm_register, - }, - .mpool_data = { - /* The component is checkpoint ready */ - MCA_BASE_METADATA_PARAM_CHECKPOINT - }, - - .mpool_init = rgpusm_init - } -}; - -/** - * component open/close/init function - */ -static int rgpusm_open(void) -{ - mca_mpool_rgpusm_component.output = opal_output_open(NULL); - opal_output_set_verbosity(mca_mpool_rgpusm_component.output, opal_mpool_rgpusm_verbose); - - return OPAL_SUCCESS; -} - - -static int rgpusm_register(void) -{ - mca_mpool_rgpusm_component.rcache_name = "vma"; - (void) mca_base_component_var_register(&mca_mpool_rgpusm_component.super.mpool_version, - "rcache_name", - "The name of the registration cache the mpool should use", - MCA_BASE_VAR_TYPE_STRING, NULL, 0, 0, - OPAL_INFO_LVL_9, - MCA_BASE_VAR_SCOPE_READONLY, - &mca_mpool_rgpusm_component.rcache_name); - mca_mpool_rgpusm_component.rcache_size_limit = 0; - (void) mca_base_component_var_register(&mca_mpool_rgpusm_component.super.mpool_version, - "rcache_size_limit", - "the maximum size of registration cache in bytes. " - "0 is unlimited (default 0)", - MCA_BASE_VAR_TYPE_UNSIGNED_LONG_LONG, NULL, 0, 0, - OPAL_INFO_LVL_9, - MCA_BASE_VAR_SCOPE_READONLY, - &mca_mpool_rgpusm_component.rcache_size_limit); - - mca_mpool_rgpusm_component.leave_pinned = 1; - (void) mca_base_component_var_register(&mca_mpool_rgpusm_component.super.mpool_version, - "leave_pinned", - "Whether to keep memory handles around or release them when done. ", - MCA_BASE_VAR_TYPE_INT, NULL, 0, 0, - OPAL_INFO_LVL_9, - MCA_BASE_VAR_SCOPE_READONLY, - &mca_mpool_rgpusm_component.leave_pinned); - - mca_mpool_rgpusm_component.print_stats = false; - (void) mca_base_component_var_register(&mca_mpool_rgpusm_component.super.mpool_version, - "print_stats", - "print pool usage statistics at the end of the run", - MCA_BASE_VAR_TYPE_BOOL, NULL, 0, 0, - OPAL_INFO_LVL_9, - MCA_BASE_VAR_SCOPE_READONLY, - &mca_mpool_rgpusm_component.print_stats); - - /* Set different levels of verbosity in the rgpusm related code. */ - opal_mpool_rgpusm_verbose = 0; - (void) mca_base_component_var_register(&mca_mpool_rgpusm_component.super.mpool_version, - "verbose", "Set level of mpool rgpusm verbosity", - MCA_BASE_VAR_TYPE_INT, NULL, 0, 0, - OPAL_INFO_LVL_9, - MCA_BASE_VAR_SCOPE_READONLY, - &opal_mpool_rgpusm_verbose); - - return OPAL_SUCCESS; -} - - -static int rgpusm_close(void) -{ - return OPAL_SUCCESS; -} - - -static mca_mpool_base_module_t* rgpusm_init( - struct mca_mpool_base_resources_t *resources) -{ - mca_mpool_rgpusm_module_t* mpool_module; - - mpool_module = - (mca_mpool_rgpusm_module_t*)malloc(sizeof(mca_mpool_rgpusm_module_t)); - - mpool_module->resources = *resources; - - mca_mpool_rgpusm_module_init(mpool_module); - - return &mpool_module->super; -} diff --git a/opal/mca/mpool/rgpusm/mpool_rgpusm_module.c b/opal/mca/mpool/rgpusm/mpool_rgpusm_module.c deleted file mode 100644 index 59c7296b42a..00000000000 --- a/opal/mca/mpool/rgpusm/mpool_rgpusm_module.c +++ /dev/null @@ -1,637 +0,0 @@ -/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ -/* - * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2013 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * Copyright (c) 2006-2009 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2006 Voltaire. All rights reserved. - * Copyright (c) 2007 Mellanox Technologies. All rights reserved. - * Copyright (c) 2010 IBM Corporation. All rights reserved. - * Copyright (c) 2012-2014 NVIDIA Corporation. All rights reserved. - * Copyright (c) 2015 Los Alamos National Security, LLC. All rights - * reserved. - * - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -/** - * @file: - * - * This memory pool is used for getting the memory handle of remote - * GPU memory when using CUDA. Hence, the name is "rgpusm" for "remote - * CUDA" GPU memory. There is a cache that can be used to store the - * remote handles in case they are reused to save on the registration - * cost as that can be expensive, on the order of 100 usecs. The - * cache can also be used just to track how many handles are in use at - * a time. It is best to look at this with the three different - * scenarios that are possible. - * 1. mpool_rgpusm_leave_pinned=0, cache_size=unlimited - * 2. mpool_rgpusm_leave_pinned=0, cache_size=limited - * 3. mpool_rgpusm_leave_pinned=1, cache_size=unlimited (default) - * 4. mpool_rgpusm_leave_pinned=1, cache_size=limited. - * - * Case 1: The cache is unused and remote memory is registered and - * unregistered for each transaction. The amount of outstanding - * registered memory is unlimited. - * Case 2: The cache keeps track of how much memory is registered at a - * time. Since leave pinned is 0, any memory that is registered is in - * use. If the amount to register exceeds the amount, we will error - * out. This could be handled more gracefully, but this is not a - * common way to run, so we will leave as is. - * Case 3: The cache is needed to track current and past transactions. - * However, there is no limit on the number that can be stored. - * Therefore, once memory enters the cache, and gets registered, it - * stays that way forever. - * Case 4: The cache is needed to track current and past transactions. - * In addition, a list of most recently used (but no longer in use) - * registrations is stored so that it can be used to evict - * registrations from the cache. In addition, these registrations are - * deregistered. - * - * I also want to capture how we can run into the case where we do not - * find something in the cache, but when we try to register it, we get - * an error back from the CUDA library saying the memory is in use. - * This can happen in the following scenario. The application mallocs - * a buffer of size 32K. The library loads this in the cache and - * registers it. The application then frees the buffer. It then - * mallocs a buffer of size 64K. This malloc returns the same base - * address as the first 32K allocation. The library searches the - * cache, but since the size is larger than the original allocation it - * does not find the registration. It then attempts to register this. - * The CUDA library returns an error saying it is already mapped. To - * handle this, we return an error of OPAL_ERR_WOULD_BLOCK to the - * memory pool. The memory pool then looks for the registration based - * on the base address and a size of 4. We use the small size to make - * sure that we find the registration. This registration is evicted, - * and we try to register again. - */ - -#define OPAL_DISABLE_ENABLE_MEM_DEBUG 1 -#include "opal_config.h" -#include "opal/align.h" -#include "opal/mca/mpool/rgpusm/mpool_rgpusm.h" -#include -#include -#ifdef HAVE_MALLOC_H -#include -#endif -#include "opal/util/proc.h" -#include "opal/mca/rcache/rcache.h" -#include "opal/mca/rcache/base/base.h" -#include "opal/mca/mpool/base/base.h" -#include "opal/mca/common/cuda/common_cuda.h" - - -static int mca_mpool_rgpusm_deregister_no_lock(struct mca_mpool_base_module_t *, - mca_mpool_base_registration_t *); -static inline bool mca_mpool_rgpusm_deregister_lru (mca_mpool_base_module_t *mpool) { - mca_mpool_rgpusm_module_t *mpool_rgpusm = (mca_mpool_rgpusm_module_t *) mpool; - mca_mpool_base_registration_t *old_reg; - int rc; - - /* Remove the registration from the cache and list before - deregistering the memory */ - old_reg = (mca_mpool_base_registration_t*) - opal_list_remove_first (&mpool_rgpusm->lru_list); - if (NULL == old_reg) { - return false; - } - - mpool->rcache->rcache_delete(mpool->rcache, old_reg); - - /* Drop the rcache lock while we deregister the memory */ - OPAL_THREAD_UNLOCK(&mpool->rcache->lock); - assert(old_reg->ref_count == 0); - rc = mpool_rgpusm->resources.deregister_mem(mpool_rgpusm->resources.reg_data, - old_reg); - OPAL_THREAD_LOCK(&mpool->rcache->lock); - - /* This introduces a potential leak of registrations if - the deregistration fails to occur as we no longer have - a reference to it. Is this possible? */ - if (OPAL_SUCCESS != rc) { - return false; - } - - opal_free_list_return (&mpool_rgpusm->reg_list, - (opal_free_list_item_t*)old_reg); - mpool_rgpusm->stat_evicted++; - - return true; -} - - -/* - * Initializes the mpool module. - */ -void mca_mpool_rgpusm_module_init(mca_mpool_rgpusm_module_t* mpool) -{ - mpool->super.mpool_component = &mca_mpool_rgpusm_component.super; - mpool->super.mpool_base = NULL; /* no base .. */ - mpool->super.mpool_alloc = NULL; - mpool->super.mpool_realloc = NULL; - mpool->super.mpool_free = mca_mpool_rgpusm_free; - mpool->super.mpool_register = mca_mpool_rgpusm_register; - mpool->super.mpool_find = mca_mpool_rgpusm_find; - mpool->super.mpool_deregister = mca_mpool_rgpusm_deregister; - mpool->super.mpool_release_memory = NULL; - mpool->super.mpool_finalize = mca_mpool_rgpusm_finalize; - mpool->super.mpool_ft_event = mca_mpool_rgpusm_ft_event; - mpool->super.rcache = - mca_rcache_base_module_create(mca_mpool_rgpusm_component.rcache_name); - mpool->super.flags = 0; - - mpool->resources.reg_data = NULL; - mpool->resources.sizeof_reg = sizeof(struct mca_mpool_common_cuda_reg_t); - mpool->resources.register_mem = cuda_openmemhandle; - mpool->resources.deregister_mem = cuda_closememhandle; - - OBJ_CONSTRUCT(&mpool->reg_list, opal_free_list_t); - opal_free_list_init (&mpool->reg_list, mpool->resources.sizeof_reg, - opal_cache_line_size, - OBJ_CLASS(mca_mpool_base_registration_t), - 0,opal_cache_line_size, - 0, -1, 32, NULL, 0, NULL, NULL, NULL); - OBJ_CONSTRUCT(&mpool->lru_list, opal_list_t); - mpool->stat_cache_hit = mpool->stat_cache_miss = mpool->stat_evicted = 0; - mpool->stat_cache_found = mpool->stat_cache_notfound = 0; - mpool->stat_cache_valid = mpool->stat_cache_invalid = 0; - -} - -/* - * This function opens and handle using the handle that was received - * from the remote memory. It uses the addr and size of the remote - * memory for caching the registration. - */ -int mca_mpool_rgpusm_register(mca_mpool_base_module_t *mpool, void *addr, - size_t size, uint32_t flags, - mca_mpool_base_registration_t **reg) -{ - mca_mpool_rgpusm_module_t *mpool_rgpusm = (mca_mpool_rgpusm_module_t*)mpool; - mca_mpool_common_cuda_reg_t *rgpusm_reg; - mca_mpool_common_cuda_reg_t *rget_reg; - opal_free_list_item_t *item; - int rc; - int mypeer; /* just for debugging */ - - /* In order to preserve the signature of the mca_mpool_rgpusm_register - * function, we are using the **reg variable to not only get back the - * registration information, but to hand in the memory handle received - * from the remote side. */ - rget_reg = (mca_mpool_common_cuda_reg_t *)*reg; - - mypeer = flags; - flags = 0; - /* No need to support MCA_MPOOL_FLAGS_CACHE_BYPASS in here. It is not used. */ - assert(0 == (flags & MCA_MPOOL_FLAGS_CACHE_BYPASS)); - - /* This chunk of code handles the case where leave pinned is not - * set and we do not use the cache. This is not typically how we - * will be running. This means that one can have an unlimited - * number of registrations occuring at the same time. Since we - * are not leaving the registrations pinned, the number of - * registrations is unlimited and there is no need for a cache. */ - if(!mca_mpool_rgpusm_component.leave_pinned && 0 == mca_mpool_rgpusm_component.rcache_size_limit) { - item = opal_free_list_get (&mpool_rgpusm->reg_list); - if(NULL == item) { - return OPAL_ERR_OUT_OF_RESOURCE; - } - rgpusm_reg = (mca_mpool_common_cuda_reg_t*)item; - rgpusm_reg->base.mpool = mpool; - rgpusm_reg->base.base = addr; - rgpusm_reg->base.bound = (unsigned char *)addr + size - 1;; - rgpusm_reg->base.flags = flags; - - /* Copy the memory handle received into the registration */ - memcpy(rgpusm_reg->data.memHandle, rget_reg->data.memHandle, sizeof(rget_reg->data.memHandle)); - - /* The rget_reg registration is holding the memory handle needed - * to register the remote memory. This was received from the remote - * process. A pointer to the memory is returned in the alloc_base field. */ - rc = mpool_rgpusm->resources.register_mem(addr, size, - (mca_mpool_base_registration_t *)rgpusm_reg, - (mca_mpool_base_registration_t *)rget_reg); - - /* This error should not happen with no cache in use. */ - assert(OPAL_ERR_WOULD_BLOCK != rc); - - if(rc != OPAL_SUCCESS) { - opal_free_list_return (&mpool_rgpusm->reg_list, item); - return rc; - } - rgpusm_reg->base.ref_count++; - *reg = (mca_mpool_base_registration_t *)rgpusm_reg; - return OPAL_SUCCESS; - } - - /* Check to see if memory is registered and stored in the cache. */ - OPAL_THREAD_LOCK(&mpool->rcache->lock); - mpool->rcache->rcache_find(mpool->rcache, addr, size, reg); - - /* If *reg is not NULL, we have a registration. Let us see if the - * memory handle matches the one we were looking for. If not, the - * registration is invalid and needs to be removed. This happens - * if memory was allocated, freed, and allocated again and ends up - * with the same virtual address and within the limits of the - * previous registration. The memory handle check will catch that - * scenario as the handles have unique serial numbers. */ - if (*reg != NULL) { - mpool_rgpusm->stat_cache_hit++; - opal_output_verbose(10, mca_mpool_rgpusm_component.output, - "RGPUSM: Found addr=%p,size=%d (base=%p,size=%d) in cache", - addr, (int)size, (*reg)->base, - (int)((*reg)->bound - (*reg)->base)); - - if (mca_common_cuda_memhandle_matches((mca_mpool_common_cuda_reg_t *)*reg, rget_reg)) { - /* Registration matches what was requested. All is good. */ - mpool_rgpusm->stat_cache_valid++; - } else { - /* This is an old registration. Need to boot it. */ - opal_output_verbose(10, mca_mpool_rgpusm_component.output, - "RGPUSM: Mismatched Handle: Evicting/unregistering " - "addr=%p,size=%d (base=%p,size=%d) from cache", - addr, (int)size, (*reg)->base, - (int)((*reg)->bound - (*reg)->base)); - - /* The ref_count has to be zero as this memory cannot possibly - * be in use. Assert on that just to make sure. */ - assert(0 == (*reg)->ref_count); - if (mca_mpool_rgpusm_component.leave_pinned) { - opal_list_remove_item(&mpool_rgpusm->lru_list, - (opal_list_item_t*)(*reg)); - } - - /* Bump the reference count to keep things copacetic in deregister */ - (*reg)->ref_count++; - /* Invalidate the registration so it will get booted out. */ - (*reg)->flags |= MCA_MPOOL_FLAGS_INVALID; - mca_mpool_rgpusm_deregister_no_lock(mpool, *reg); - *reg = NULL; - mpool_rgpusm->stat_cache_invalid++; - } - } else { - /* Nothing was found in the cache. */ - mpool_rgpusm->stat_cache_miss++; - } - - /* If we have a registration here, then we know it is valid. */ - if (*reg != NULL) { - opal_output_verbose(10, mca_mpool_rgpusm_component.output, - "RGPUSM: CACHE HIT is good: ep=%d, addr=%p, size=%d in cache", - mypeer, addr, (int)size); - - /* When using leave pinned, we keep an LRU list. */ - if ((0 == (*reg)->ref_count) && mca_mpool_rgpusm_component.leave_pinned) { - opal_output_verbose(20, mca_mpool_rgpusm_component.output, - "RGPUSM: POP OFF LRU: ep=%d, addr=%p, size=%d in cache", - mypeer, addr, (int)size); - opal_list_remove_item(&mpool_rgpusm->lru_list, - (opal_list_item_t*)(*reg)); - } - (*reg)->ref_count++; - OPAL_THREAD_UNLOCK(&mpool->rcache->lock); - opal_output(-1, "reg->ref_count=%d", (int)(*reg)->ref_count); - opal_output_verbose(80, mca_mpool_rgpusm_component.output, - "RGPUSM: Found entry in cache addr=%p, size=%d", addr, (int)size); - return OPAL_SUCCESS; - } - - /* If we are here, then we did not find a registration, or it was invalid, - * so this is a new one, and we are going to use the cache. */ - assert(NULL == *reg); - opal_output_verbose(10, mca_mpool_rgpusm_component.output, - "RGPUSM: New registration ep=%d, addr=%p, size=%d. Need to register and insert in cache", - mypeer, addr, (int)size); - - item = opal_free_list_get (&mpool_rgpusm->reg_list); - if(NULL == item) { - OPAL_THREAD_UNLOCK(&mpool->rcache->lock); - return OPAL_ERR_OUT_OF_RESOURCE; - } - rgpusm_reg = (mca_mpool_common_cuda_reg_t*)item; - - rgpusm_reg->base.mpool = mpool; - rgpusm_reg->base.base = addr; - rgpusm_reg->base.bound = (unsigned char *)addr + size - 1; - rgpusm_reg->base.flags = flags; - - /* Need the memory handle saved in the registration */ - memcpy(rgpusm_reg->data.memHandle, rget_reg->data.memHandle, sizeof(rget_reg->data.memHandle)); - - /* Actually register the memory, which opens the memory handle. - * Need to do this prior to putting in the cache as the base and - * bound values may be changed by the registration. The memory - * associated with the handle comes back in the alloc_base - * value. */ - rc = mpool_rgpusm->resources.register_mem(addr, size, (mca_mpool_base_registration_t *)rgpusm_reg, - (mca_mpool_base_registration_t *)rget_reg); - /* There is a chance we can get the OPAL_ERR_WOULD_BLOCK from the - * CUDA codes attempt to register the memory. The case that this - * can happen is as follows. A block of memory is registered. - * Then the sending side frees the memory. The sending side then - * cuMemAllocs memory again and gets the same base - * address. However, it cuMemAllocs a block that is larger than - * the one in the cache. The cache will return that memory is not - * registered and call into CUDA to register it. However, that - * will fail with CUDA_ERROR_ALREADY_MAPPED. Therefore we need to - * boot that previous allocation out and deregister it first. - */ - if (OPAL_ERR_WOULD_BLOCK == rc) { - mca_mpool_base_registration_t *oldreg; - - /* Need to make sure it is at least 4 bytes in size This will - * ensure we get the hit in the cache. */ - mpool->rcache->rcache_find(mpool->rcache, addr, 4, &oldreg); - - /* For most cases, we will find a registration that overlaps. - * Removal of it should allow the registration we are - * attempting to succeed. */ - if (NULL != oldreg) { - /* The ref_count has to be zero as this memory cannot - * possibly be in use. Assert on that just to make sure. */ - assert(0 == oldreg->ref_count); - if (mca_mpool_rgpusm_component.leave_pinned) { - opal_list_remove_item(&mpool_rgpusm->lru_list, - (opal_list_item_t*)oldreg); - } - - /* Bump the reference count to keep things copacetic in deregister */ - oldreg->ref_count++; - /* Invalidate the registration so it will get booted out. */ - oldreg->flags |= MCA_MPOOL_FLAGS_INVALID; - mca_mpool_rgpusm_deregister_no_lock(mpool, oldreg); - mpool_rgpusm->stat_evicted++; - - /* And try again. This one usually works. */ - rc = mpool_rgpusm->resources.register_mem(addr, size, (mca_mpool_base_registration_t *)rgpusm_reg, - (mca_mpool_base_registration_t *)rget_reg); - } - - /* There is a chance that another registration is blocking our - * ability to register. Check the rc to see if we still need - * to try and clear out registrations. */ - while (OPAL_SUCCESS != rc) { - if (true != mca_mpool_rgpusm_deregister_lru(mpool)) { - rc = OPAL_ERROR; - break; - } - /* Clear out one registration. */ - rc = mpool_rgpusm->resources.register_mem(addr, size, (mca_mpool_base_registration_t *)rgpusm_reg, - (mca_mpool_base_registration_t *)rget_reg); - } - } - - if(rc != OPAL_SUCCESS) { - OPAL_THREAD_UNLOCK(&mpool->rcache->lock); - opal_free_list_return (&mpool_rgpusm->reg_list, item); - return rc; - } - - opal_output_verbose(80, mca_mpool_rgpusm_component.output, - "RGPUSM: About to insert in rgpusm cache addr=%p, size=%d", addr, (int)size); - while((rc = mpool->rcache->rcache_insert(mpool->rcache, (mca_mpool_base_registration_t *)rgpusm_reg, - mca_mpool_rgpusm_component.rcache_size_limit)) == - OPAL_ERR_TEMP_OUT_OF_RESOURCE) { - opal_output(-1, "No room in the cache - boot one out"); - if (!mca_mpool_rgpusm_deregister_lru(mpool)) { - break; - } - } - - if(rc != OPAL_SUCCESS) { - OPAL_THREAD_UNLOCK(&mpool->rcache->lock); - opal_free_list_return (&mpool_rgpusm->reg_list, item); - /* We cannot recover from this. We can be here if the size of - * the cache is smaller than the amount of memory we are - * trying to register in a single transfer. In that case, rc - * is MPI_ERR_OUT_OF_RESOURCES, but everything is stuck at - * that point. Therefore, just error out completely. - */ - return OPAL_ERROR; - } - - rgpusm_reg->base.ref_count++; - *reg = (mca_mpool_base_registration_t *)rgpusm_reg; - OPAL_THREAD_UNLOCK(&mpool->rcache->lock); - - /* Cleanup any vmas that we have deferred deletion on */ - mpool->rcache->rcache_clean(mpool->rcache); - return OPAL_SUCCESS; -} - - -/** - * free function - */ -void mca_mpool_rgpusm_free(mca_mpool_base_module_t *mpool, void *addr, - mca_mpool_base_registration_t *registration) -{ - void *alloc_base = registration->alloc_base; - mca_mpool_rgpusm_deregister(mpool, registration); - free(alloc_base); -} - -int mca_mpool_rgpusm_find(struct mca_mpool_base_module_t *mpool, void *addr, - size_t size, mca_mpool_base_registration_t **reg) -{ - mca_mpool_rgpusm_module_t *mpool_rgpusm = (mca_mpool_rgpusm_module_t*)mpool; - int rc; - unsigned char *base, *bound; - - base = addr; - bound = base + size - 1; /* To keep cache hits working correctly */ - - OPAL_THREAD_LOCK(&mpool->rcache->lock); - opal_output(-1, "Looking for addr=%p, size=%d", addr, (int)size); - rc = mpool->rcache->rcache_find(mpool->rcache, addr, size, reg); - if(*reg != NULL && mca_mpool_rgpusm_component.leave_pinned) { - if(0 == (*reg)->ref_count && mca_mpool_rgpusm_component.leave_pinned) { - opal_list_remove_item(&mpool_rgpusm->lru_list, (opal_list_item_t*)(*reg)); - } - mpool_rgpusm->stat_cache_found++; - (*reg)->ref_count++; - } else { - mpool_rgpusm->stat_cache_notfound++; - } - OPAL_THREAD_UNLOCK(&mpool->rcache->lock); - - return rc; -} - -static inline bool registration_is_cachebale(mca_mpool_base_registration_t *reg) -{ - return !(reg->flags & - (MCA_MPOOL_FLAGS_CACHE_BYPASS | - MCA_MPOOL_FLAGS_INVALID)); -} - -int mca_mpool_rgpusm_deregister(struct mca_mpool_base_module_t *mpool, - mca_mpool_base_registration_t *reg) -{ - mca_mpool_rgpusm_module_t *mpool_rgpusm = (mca_mpool_rgpusm_module_t*)mpool; - int rc = OPAL_SUCCESS; - assert(reg->ref_count > 0); - - OPAL_THREAD_LOCK(&mpool->rcache->lock); - reg->ref_count--; - opal_output(-1, "Deregister: reg->ref_count=%d", (int)reg->ref_count); - if(reg->ref_count > 0) { - OPAL_THREAD_UNLOCK(&mpool->rcache->lock); - return OPAL_SUCCESS; - } - if(mca_mpool_rgpusm_component.leave_pinned && registration_is_cachebale(reg)) - { - /* if leave_pinned is set don't deregister memory, but put it - * on LRU list for future use */ - opal_list_prepend(&mpool_rgpusm->lru_list, (opal_list_item_t*)reg); - } else { - /* Remove from rcache first */ - if(!(reg->flags & MCA_MPOOL_FLAGS_CACHE_BYPASS)) - mpool->rcache->rcache_delete(mpool->rcache, reg); - - /* Drop the rcache lock before deregistring the memory */ - OPAL_THREAD_UNLOCK(&mpool->rcache->lock); - - { - mca_mpool_rgpusm_module_t *mpool_rgpusm = (mca_mpool_rgpusm_module_t *)mpool; - - assert(reg->ref_count == 0); - rc = mpool_rgpusm->resources.deregister_mem(mpool_rgpusm->resources.reg_data, - reg); - } - - OPAL_THREAD_LOCK(&mpool->rcache->lock); - - if(OPAL_SUCCESS == rc) { - opal_free_list_return (&mpool_rgpusm->reg_list, - (opal_free_list_item_t*)reg); - } - } - OPAL_THREAD_UNLOCK(&mpool->rcache->lock); - - /* Cleanup any vmas that we have deferred deletion on */ - mpool->rcache->rcache_clean(mpool->rcache); - - return rc; -} - -int mca_mpool_rgpusm_deregister_no_lock(struct mca_mpool_base_module_t *mpool, - mca_mpool_base_registration_t *reg) -{ - mca_mpool_rgpusm_module_t *mpool_rgpusm = (mca_mpool_rgpusm_module_t*)mpool; - int rc = OPAL_SUCCESS; - assert(reg->ref_count > 0); - - reg->ref_count--; - opal_output(-1, "Deregister: reg->ref_count=%d", (int)reg->ref_count); - if(reg->ref_count > 0) { - return OPAL_SUCCESS; - } - if(mca_mpool_rgpusm_component.leave_pinned && registration_is_cachebale(reg)) - { - /* if leave_pinned is set don't deregister memory, but put it - * on LRU list for future use */ - opal_list_prepend(&mpool_rgpusm->lru_list, (opal_list_item_t*)reg); - } else { - /* Remove from rcache first */ - if(!(reg->flags & MCA_MPOOL_FLAGS_CACHE_BYPASS)) - mpool->rcache->rcache_delete(mpool->rcache, reg); - - { - mca_mpool_rgpusm_module_t *mpool_rgpusm = (mca_mpool_rgpusm_module_t *)mpool; - - assert(reg->ref_count == 0); - rc = mpool_rgpusm->resources.deregister_mem(mpool_rgpusm->resources.reg_data, - reg); - } - - if(OPAL_SUCCESS == rc) { - opal_free_list_return (&mpool_rgpusm->reg_list, - (opal_free_list_item_t*)reg); - } - } - - return rc; -} - -#define RGPUSM_MPOOL_NREGS 100 - -void mca_mpool_rgpusm_finalize(struct mca_mpool_base_module_t *mpool) -{ - mca_mpool_rgpusm_module_t *mpool_rgpusm = (mca_mpool_rgpusm_module_t*)mpool; - mca_mpool_base_registration_t *reg; - mca_mpool_base_registration_t *regs[RGPUSM_MPOOL_NREGS]; - int reg_cnt, i; - int rc; - - /* Statistic */ - if(true == mca_mpool_rgpusm_component.print_stats) { - opal_output(0, "%s rgpusm: stats " - "(hit/valid/invalid/miss/evicted): %d/%d/%d/%d/%d\n", - OPAL_NAME_PRINT(OPAL_PROC_MY_NAME), - mpool_rgpusm->stat_cache_hit, mpool_rgpusm->stat_cache_valid, - mpool_rgpusm->stat_cache_invalid, mpool_rgpusm->stat_cache_miss, - mpool_rgpusm->stat_evicted); - } - - OPAL_THREAD_LOCK(&mpool->rcache->lock); - do { - reg_cnt = mpool->rcache->rcache_find_all(mpool->rcache, 0, (size_t)-1, - regs, RGPUSM_MPOOL_NREGS); - opal_output(-1, "Registration size at finalize = %d", reg_cnt); - - for(i = 0; i < reg_cnt; i++) { - reg = regs[i]; - - if(reg->ref_count) { - reg->ref_count = 0; /* otherway dereg will fail on assert */ - } else if (mca_mpool_rgpusm_component.leave_pinned) { - opal_list_remove_item(&mpool_rgpusm->lru_list, - (opal_list_item_t*)reg); - } - - /* Remove from rcache first */ - mpool->rcache->rcache_delete(mpool->rcache, reg); - - /* Drop lock before deregistering memory */ - OPAL_THREAD_UNLOCK(&mpool->rcache->lock); - assert(reg->ref_count == 0); - rc = mpool_rgpusm->resources.deregister_mem(mpool_rgpusm->resources.reg_data, - reg); - OPAL_THREAD_LOCK(&mpool->rcache->lock); - - if(rc != OPAL_SUCCESS) { - /* Potentially lose track of registrations - do we have to put it back? */ - continue; - } - - opal_free_list_return (&mpool_rgpusm->reg_list, - (opal_free_list_item_t *) reg); - } - } while(reg_cnt == RGPUSM_MPOOL_NREGS); - - OBJ_DESTRUCT(&mpool_rgpusm->lru_list); - OBJ_DESTRUCT(&mpool_rgpusm->reg_list); - OPAL_THREAD_UNLOCK(&mpool->rcache->lock); - - /* Cleanup any vmas that we have deferred deletion on */ - mpool->rcache->rcache_clean(mpool->rcache); - -} - -int mca_mpool_rgpusm_ft_event(int state) { - return OPAL_SUCCESS; -} diff --git a/opal/mca/mpool/sm/Makefile.am b/opal/mca/mpool/sm/Makefile.am deleted file mode 100644 index 528242e46ef..00000000000 --- a/opal/mca/mpool/sm/Makefile.am +++ /dev/null @@ -1,54 +0,0 @@ -# -# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana -# University Research and Technology -# Corporation. All rights reserved. -# Copyright (c) 2004-2013 The University of Tennessee and The University -# of Tennessee Research Foundation. All rights -# reserved. -# Copyright (c) 2004-2009 High Performance Computing Center Stuttgart, -# University of Stuttgart. All rights reserved. -# Copyright (c) 2004-2005 The Regents of the University of California. -# All rights reserved. -# Copyright (c) 2010-2014 Cisco Systems, Inc. All rights reserved. -# Copyright (c) 2011 NVIDIA Corporation. All rights reserved. -# $COPYRIGHT$ -# -# Additional copyrights may follow -# -# $HEADER$ -# - -sources = \ - mpool_sm.h \ - mpool_sm_module.c \ - mpool_sm_component.c - -# Make the output library in this directory, and name it either -# mca__.la (for DSO builds) or libmca__.la -# (for static builds). - -if MCA_BUILD_opal_mpool_sm_DSO -component_noinst = -component_install = mca_mpool_sm.la -else -component_noinst = libmca_mpool_sm.la -component_install = -endif - -# See opal/mca/common/sm/Makefile.am for an explanation of -# libmca_common_sm.la. - -mcacomponentdir = $(opallibdir) -mcacomponent_LTLIBRARIES = $(component_install) -mca_mpool_sm_la_SOURCES = $(sources) -mca_mpool_sm_la_LDFLAGS = -module -avoid-version -mca_mpool_sm_la_LIBADD = \ - $(OPAL_TOP_BUILDDIR)/opal/mca/common/sm/lib@OPAL_LIB_PREFIX@mca_common_sm.la -if OPAL_cuda_support -mca_mpool_sm_la_LIBADD += \ - $(OPAL_TOP_BUILDDIR)/opal/mca/common/cuda/lib@OPAL_LIB_PREFIX@mca_common_cuda.la -endif - -noinst_LTLIBRARIES = $(component_noinst) -libmca_mpool_sm_la_SOURCES = $(sources) -libmca_mpool_sm_la_LDFLAGS = -module -avoid-version diff --git a/opal/mca/mpool/sm/mpool_sm.h b/opal/mca/mpool/sm/mpool_sm.h deleted file mode 100644 index d48415c56da..00000000000 --- a/opal/mca/mpool/sm/mpool_sm.h +++ /dev/null @@ -1,113 +0,0 @@ -/* - * Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2006 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * Copyright (c) 2007 Sun Microsystems, Inc. All rights reserved. - * Copyright (c) 2009 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2010-2012 Los Alamos National Security, LLC. - * All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ -/** - * @file - */ -#ifndef MCA_MPOOL_SM_H -#define MCA_MPOOL_SM_H - -#include "opal_config.h" - -#include "opal/mca/event/event.h" -#include "opal/mca/shmem/shmem.h" - -#include "opal/mca/common/sm/common_sm.h" -#include "opal/mca/mpool/mpool.h" -#include "opal/mca/allocator/allocator.h" - -BEGIN_C_DECLS - -struct mca_mpool_sm_component_t { - mca_mpool_base_component_t super; - /* mca_allocator_base_module_t* sm_allocator; */ - char *sm_allocator_name; - int verbose; - /* struct mca_mpool_sm_mmap_t *sm_mmap; */ -}; -typedef struct mca_mpool_sm_component_t mca_mpool_sm_component_t; - -typedef struct mca_mpool_base_resources_t { - size_t size; - int32_t mem_node; - /* backing store metadata */ - opal_shmem_ds_t bs_meta_buf; -} mca_mpool_base_resources_t; - -OPAL_MODULE_DECLSPEC extern mca_mpool_sm_component_t mca_mpool_sm_component; - -typedef struct mca_mpool_sm_module_t { - mca_mpool_base_module_t super; - long sm_size; - mca_allocator_base_module_t *sm_allocator; - struct mca_mpool_sm_mmap_t *sm_mmap; - mca_common_sm_module_t *sm_common_module; - int32_t mem_node; -} mca_mpool_sm_module_t; - -/* - * Initializes the mpool module. - */ -void mca_mpool_sm_module_init(mca_mpool_sm_module_t* mpool); - - -/* - * Returns base address of shared memory mapping. - */ -void* mca_mpool_sm_base(mca_mpool_base_module_t*); - -/** - * Allocate block of shared memory. - */ -void* mca_mpool_sm_alloc( - mca_mpool_base_module_t* mpool, - size_t size, - size_t align, - uint32_t flags, - mca_mpool_base_registration_t** registration); - -/** - * realloc function typedef - */ -void* mca_mpool_sm_realloc( - mca_mpool_base_module_t* mpool, - void* addr, - size_t size, - mca_mpool_base_registration_t** registration); - -/** - * free function typedef - */ -void mca_mpool_sm_free( - mca_mpool_base_module_t* mpool, - void * addr, - mca_mpool_base_registration_t* registration); - -/** - * Fault Tolerance Event Notification Function - * @param state Checkpoint Stae - * @return OPAL_SUCCESS or failure status - */ -int mca_mpool_sm_ft_event(int state); - -END_C_DECLS - -#endif diff --git a/opal/mca/mpool/sm/mpool_sm_component.c b/opal/mca/mpool/sm/mpool_sm_component.c deleted file mode 100644 index 644389a449f..00000000000 --- a/opal/mca/mpool/sm/mpool_sm_component.c +++ /dev/null @@ -1,212 +0,0 @@ -/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ -/* - * Copyright (c) 2004-2010 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2006 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * Copyright (c) 2007-2009 Sun Microsystems, Inc. All rights reserved. - * Copyright (c) 2008-2009 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2010-2015 Los Alamos National Security, LLC. All rights - * reserved. - * Copyright (c) 2014 NVIDIA Corporation. All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#include "opal_config.h" -#ifdef HAVE_UNISTD_H -#include -#endif /* HAVE_UNISTD_H*/ -#ifdef HAVE_STDLIB_H -#include -#endif /* HAVE_STDLIB_H */ -#include -#include "opal/mca/base/base.h" - -#include "opal/mca/allocator/base/base.h" -#include "mpool_sm.h" -#include "opal/mca/common/sm/common_sm.h" - -#if OPAL_ENABLE_FT_CR == 1 -#include "opal/runtime/opal_cr.h" -#endif - -/* - * Local functions - */ -static int -mca_mpool_sm_register(void); - -static int -mca_mpool_sm_open(void); - -static int -mca_mpool_sm_close(void); - -static mca_mpool_base_module_t * -mca_mpool_sm_init(struct mca_mpool_base_resources_t* resources); - -mca_mpool_sm_component_t mca_mpool_sm_component = { - { - /* First, the mca_base_component_t struct containing meta - information about the component itself */ - - .mpool_version = { - MCA_MPOOL_BASE_VERSION_2_0_0, - - .mca_component_name = "sm", - MCA_BASE_MAKE_VERSION(component, OPAL_MAJOR_VERSION, OPAL_MINOR_VERSION, - OPAL_RELEASE_VERSION), - .mca_open_component = mca_mpool_sm_open, - .mca_close_component = mca_mpool_sm_close, - .mca_register_component_params = mca_mpool_sm_register, - }, - .mpool_data = { - /* The component is checkpoint ready */ - MCA_BASE_METADATA_PARAM_CHECKPOINT - }, - - .mpool_init = mca_mpool_sm_init, - } -}; - -static long default_min = 134217728; -static unsigned long long opal_mpool_sm_min_size; -static int opal_mpool_sm_verbose; - -static int mca_mpool_sm_register(void) -{ - /* register SM component parameters */ - (void) mca_base_var_group_component_register(&mca_mpool_sm_component.super.mpool_version, - "Shared memory pool"); - - mca_mpool_sm_component.sm_allocator_name = "bucket"; - (void) mca_base_component_var_register(&mca_mpool_sm_component.super.mpool_version, - "allocator", "Name of allocator component " - "to use with sm mpool", MCA_BASE_VAR_TYPE_STRING, - NULL, 0, 0, OPAL_INFO_LVL_9, - MCA_BASE_VAR_SCOPE_READONLY, - &mca_mpool_sm_component.sm_allocator_name); - - /* register as an unsigned long long to get up to 64 bits for the size */ - opal_mpool_sm_min_size = default_min; - (void) mca_base_component_var_register(&mca_mpool_sm_component.super.mpool_version, - "min_size", "Minimum size of the sm mpool shared memory file", - MCA_BASE_VAR_TYPE_UNSIGNED_LONG_LONG, NULL, 0, 0, - OPAL_INFO_LVL_9, - MCA_BASE_VAR_SCOPE_READONLY, - &opal_mpool_sm_min_size); - - opal_mpool_sm_verbose = 0; - (void) mca_base_component_var_register(&mca_mpool_sm_component.super.mpool_version, - "verbose", "Enable verbose output for mpool sm component", - MCA_BASE_VAR_TYPE_INT, NULL, 0, 0, - OPAL_INFO_LVL_9, - MCA_BASE_VAR_SCOPE_READONLY, - &opal_mpool_sm_verbose); - - return OPAL_SUCCESS; -} - -/** - * component open/close/init function - */ -static int mca_mpool_sm_open(void) -{ - if (opal_mpool_sm_verbose != 0) { - mca_mpool_sm_component.verbose = opal_output_open(NULL); - } else { - mca_mpool_sm_component.verbose = -1; - } - - return OPAL_SUCCESS; -} - -static int mca_mpool_sm_close( void ) -{ - return OPAL_SUCCESS; -} - -static mca_mpool_base_module_t * -mca_mpool_sm_init(struct mca_mpool_base_resources_t *resources) -{ - mca_mpool_sm_module_t *mpool_module; - mca_allocator_base_component_t* allocator_component; - - /* Make a new mpool module */ - mpool_module = - (mca_mpool_sm_module_t *)malloc(sizeof(mca_mpool_sm_module_t)); - mca_mpool_sm_module_init(mpool_module); - - /* set sm_size */ - mpool_module->sm_size = resources->size; - - /* clip at the min size */ - if (mpool_module->sm_size < (long) opal_mpool_sm_min_size) { - mpool_module->sm_size = (long) opal_mpool_sm_min_size; - } - - allocator_component = mca_allocator_component_lookup( - mca_mpool_sm_component.sm_allocator_name); - - /* if specified allocator cannot be loaded - look for an alternative */ - if (NULL == allocator_component) { - if (opal_list_get_size(&opal_allocator_base_framework.framework_components) == 0) { - mca_base_component_list_item_t *item = - (mca_base_component_list_item_t *) - opal_list_get_first(&opal_allocator_base_framework.framework_components); - allocator_component = - (mca_allocator_base_component_t *)item->cli_component; - opal_output( - 0, "mca_mpool_sm_init: " - "unable to locate allocator: %s - using %s\n", - mca_mpool_sm_component.sm_allocator_name, - allocator_component->allocator_version.mca_component_name); - } else { - opal_output(0, "mca_mpool_sm_init: " - "unable to locate allocator: %s\n", - mca_mpool_sm_component.sm_allocator_name); - free(mpool_module); - return NULL; - } - } - - mpool_module->mem_node = resources->mem_node; - - opal_output(mca_mpool_sm_component.verbose, - "mca_mpool_sm_init: shared memory size used: (%ld)", - mpool_module->sm_size); - - if (NULL == (mpool_module->sm_common_module = - mca_common_sm_module_attach(&resources->bs_meta_buf, - sizeof(mca_common_sm_module_t), 8))) { - opal_output(mca_mpool_sm_component.verbose, "mca_mpool_sm_init: " - "unable to create shared memory mapping (%s)", - resources->bs_meta_buf.seg_name); - free(mpool_module); - return NULL; - } - - /* setup allocator */ - mpool_module->sm_allocator = - allocator_component->allocator_init(true, - mca_common_sm_seg_alloc, - NULL, &(mpool_module->super)); - if (NULL == mpool_module->sm_allocator) { - opal_output(0, "mca_mpool_sm_init: unable to initialize allocator"); - free(mpool_module); - return NULL; - } - - return &mpool_module->super; -} - diff --git a/opal/mca/mpool/sm/mpool_sm_module.c b/opal/mca/mpool/sm/mpool_sm_module.c deleted file mode 100644 index b725270b7e8..00000000000 --- a/opal/mca/mpool/sm/mpool_sm_module.c +++ /dev/null @@ -1,221 +0,0 @@ -/* - * Copyright (c) 2004-2011 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2005 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * Copyright (c) 2009-2012 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2011-2012 Los Alamos National Security, LLC. - * All rights reserved. - * Copyright (c) 2011-2014 NVIDIA Corporation. All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#include "opal_config.h" -#include -#include "opal/mca/mpool/sm/mpool_sm.h" -#include "opal/mca/common/sm/common_sm.h" -#include "opal/mca/common/cuda/common_cuda.h" -#ifdef HAVE_UNISTD_H -#include -#endif -#include "opal/mca/hwloc/base/base.h" - -#if OPAL_ENABLE_FT_CR == 1 -#include "orte/mca/sstore/sstore.h" -#include "opal/mca/mpool/base/base.h" -#include "ompi/runtime/ompi_cr.h" /* TODO */ -#endif - -static void sm_module_finalize(mca_mpool_base_module_t* module); - -/* - * Initializes the mpool module. - */ -void mca_mpool_sm_module_init(mca_mpool_sm_module_t* mpool) -{ - mpool->super.mpool_component = &mca_mpool_sm_component.super; - mpool->super.mpool_base = mca_mpool_sm_base; - mpool->super.mpool_alloc = mca_mpool_sm_alloc; - mpool->super.mpool_realloc = mca_mpool_sm_realloc; - mpool->super.mpool_free = mca_mpool_sm_free; - mpool->super.mpool_find = NULL; - mpool->super.mpool_register = NULL; - mpool->super.mpool_deregister = NULL; - mpool->super.mpool_release_memory = NULL; - mpool->super.mpool_finalize = sm_module_finalize; - mpool->super.mpool_ft_event = mca_mpool_sm_ft_event; - mpool->super.flags = 0; - - mpool->sm_size = 0; - mpool->sm_allocator = NULL; - mpool->sm_mmap = NULL; - mpool->sm_common_module = NULL; - mpool->mem_node = -1; -} - -/* - * base address of shared memory mapping - */ -void* mca_mpool_sm_base(mca_mpool_base_module_t* mpool) -{ - mca_mpool_sm_module_t *sm_mpool = (mca_mpool_sm_module_t*) mpool; - return (NULL != sm_mpool->sm_common_module) ? - sm_mpool->sm_common_module->module_seg_addr : NULL; -} - -/** - * allocate function - */ -void* mca_mpool_sm_alloc( - mca_mpool_base_module_t* mpool, - size_t size, - size_t align, - uint32_t flags, - mca_mpool_base_registration_t** registration) -{ - mca_mpool_sm_module_t* mpool_sm = (mca_mpool_sm_module_t*)mpool; - opal_hwloc_base_memory_segment_t mseg; - - mseg.mbs_start_addr = - mpool_sm->sm_allocator->alc_alloc(mpool_sm->sm_allocator, size, align, registration); - - if(mpool_sm->mem_node >= 0) { - mseg.mbs_len = size; -#if OPAL_HAVE_HWLOC - opal_hwloc_base_membind(&mseg, 1, mpool_sm->mem_node); -#endif - } - - return mseg.mbs_start_addr; -} - -/** - * realloc function - */ -void* mca_mpool_sm_realloc( - mca_mpool_base_module_t* mpool, - void* addr, - size_t size, - mca_mpool_base_registration_t** registration) -{ - mca_mpool_sm_module_t* mpool_sm = (mca_mpool_sm_module_t*)mpool; - opal_hwloc_base_memory_segment_t mseg; - - mseg.mbs_start_addr = - mpool_sm->sm_allocator->alc_realloc(mpool_sm->sm_allocator, addr, size, - registration); - if(mpool_sm->mem_node >= 0) { - mseg.mbs_len = size; -#if OPAL_HAVE_HWLOC - opal_hwloc_base_membind(&mseg, 1, mpool_sm->mem_node); -#endif - } - - return mseg.mbs_start_addr; -} - -/** - * free function - */ -void mca_mpool_sm_free(mca_mpool_base_module_t* mpool, void * addr, - mca_mpool_base_registration_t* registration) -{ - mca_mpool_sm_module_t* mpool_sm = (mca_mpool_sm_module_t*)mpool; - mpool_sm->sm_allocator->alc_free(mpool_sm->sm_allocator, addr); -} - -static void sm_module_finalize(mca_mpool_base_module_t* module) -{ - mca_mpool_sm_module_t *sm_module = (mca_mpool_sm_module_t*) module; - - if (NULL != sm_module->sm_common_module) { - if (OPAL_SUCCESS == - mca_common_sm_fini(sm_module->sm_common_module)) { -#if OPAL_ENABLE_FT_CR == 1 - /* Only unlink the file if we are *not* restarting. If we - are restarting the file will be unlinked at a later - time. */ - if (OPAL_CR_STATUS_RESTART_PRE != opal_cr_checkpointing_state && - OPAL_CR_STATUS_RESTART_POST != opal_cr_checkpointing_state ) { - unlink(sm_module->sm_common_module->shmem_ds.seg_name); - } -#else - unlink(sm_module->sm_common_module->shmem_ds.seg_name); -#endif - } - OBJ_RELEASE(sm_module->sm_common_module); - sm_module->sm_common_module = NULL; - } -} - -#if OPAL_ENABLE_FT_CR == 0 -int mca_mpool_sm_ft_event(int state) { - return OPAL_SUCCESS; -} -#else -int mca_mpool_sm_ft_event(int state) { - mca_mpool_base_module_t *self_module = NULL; - mca_mpool_sm_module_t *self_sm_module = NULL; - char * file_name = NULL; - - if(OPAL_CRS_CHECKPOINT == state) { - /* Record the shared memory filename */ - asprintf( &file_name, "%s"OPAL_PATH_SEP"shared_mem_pool.%s", - opal_process_info.job_session_dir, - opal_proc_local_get()->proc_hostname ); - /* Disabled to get FT code compiled again - * TODO: FIXIT soon - orte_sstore.set_attr(orte_sstore_handle_current, SSTORE_METADATA_LOCAL_TOUCH, file_name); - */ - free(file_name); - file_name = NULL; - } - else if(OPAL_CRS_CONTINUE == state) { - if (opal_cr_continue_like_restart) { - /* Find the sm module */ - self_module = mca_mpool_base_module_lookup("sm"); - self_sm_module = (mca_mpool_sm_module_t*) self_module; - - /* Mark the old sm file for eventual removal via CRS */ - if (NULL != self_sm_module->sm_common_module) { - opal_crs_base_cleanup_append(self_sm_module->sm_common_module->shmem_ds.seg_name, false); - } - - /* Remove self from the list of all modules */ - mca_mpool_base_module_destroy(self_module); - } - } - else if(OPAL_CRS_RESTART == state || - OPAL_CRS_RESTART_PRE == state) { - /* Find the sm module */ - self_module = mca_mpool_base_module_lookup("sm"); - self_sm_module = (mca_mpool_sm_module_t*) self_module; - - /* Mark the old sm file for eventual removal via CRS */ - if (NULL != self_sm_module->sm_common_module) { - opal_crs_base_cleanup_append(self_sm_module->sm_common_module->shmem_ds.seg_name, false); - } - - /* Remove self from the list of all modules */ - mca_mpool_base_module_destroy(self_module); - } - else if(OPAL_CRS_TERM == state ) { - ; - } - else { - ; - } - - return OPAL_SUCCESS; -} -#endif /* OPAL_ENABLE_FT_CR */ diff --git a/opal/mca/mpool/udreg/Makefile.am b/opal/mca/mpool/udreg/Makefile.am deleted file mode 100644 index dae4519606c..00000000000 --- a/opal/mca/mpool/udreg/Makefile.am +++ /dev/null @@ -1,52 +0,0 @@ -# -# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana -# University Research and Technology -# Corporation. All rights reserved. -# Copyright (c) 2004-2013 The University of Tennessee and The University -# of Tennessee Research Foundation. All rights -# reserved. -# Copyright (c) 2004-2009 High Performance Computing Center Stuttgart, -# University of Stuttgart. All rights reserved. -# Copyright (c) 2004-2005 The Regents of the University of California. -# All rights reserved. -# Copyright (c) 2010 Cisco Systems, Inc. All rights reserved. -# Copyright (c) 2012-2013 Los Alamos National Security, LLC. All rights -# reserved. -# $COPYRIGHT$ -# -# Additional copyrights may follow -# -# $HEADER$ -# - -AM_CPPFLAGS = $(mpool_udreg_CPPFLAGS) - -sources = mpool_udreg_module.c mpool_udreg_component.c - -if WANT_INSTALL_HEADERS -opaldir = $(opalincludedir)/$(subdir) -opal_HEADERS = mpool_udreg.h -endif - -# Make the output library in this directory, and name it either -# mca__.la (for DSO builds) or libmca__.la -# (for static builds). - -if MCA_BUILD_opal_mpool_udreg_DSO -component_noinst = -component_install = mca_mpool_udreg.la -else -component_noinst = libmca_mpool_udreg.la -component_install = -endif - -mcacomponentdir = $(opallibdir) -mcacomponent_LTLIBRARIES = $(component_install) -mca_mpool_udreg_la_SOURCES = $(sources) -mca_mpool_udreg_la_LDFLAGS = -module -avoid-version $(mpool_udreg_LDFLAGS) -mca_mpool_udreg_la_LIBADD = $(mpool_udreg_LIBS) - -noinst_LTLIBRARIES = $(component_noinst) -libmca_mpool_udreg_la_SOURCES = $(sources) -libmca_mpool_udreg_la_LIBADD = $(mpool_udreg_LIBS) -libmca_mpool_udreg_la_LDFLAGS = -module -avoid-version $(mpool_udreg_LDFLAGS) diff --git a/opal/mca/mpool/udreg/configure.m4 b/opal/mca/mpool/udreg/configure.m4 deleted file mode 100644 index 9a1db3ec5a8..00000000000 --- a/opal/mca/mpool/udreg/configure.m4 +++ /dev/null @@ -1,48 +0,0 @@ -# -*- shell-script -*- -# -# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana -# University Research and Technology -# Corporation. All rights reserved. -# Copyright (c) 2004-2013 The University of Tennessee and The University -# of Tennessee Research Foundation. All rights -# reserved. -# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, -# University of Stuttgart. All rights reserved. -# Copyright (c) 2004-2006 The Regents of the University of California. -# All rights reserved. -# Copyright (c) 2006 QLogic Corp. All rights reserved. -# Copyright (c) 2009 Cisco Systems, Inc. All rights reserved. -# Copyright (c) 2011-2013 Los Alamos National Security, LLC. -# All rights reserved. -# $COPYRIGHT$ -# -# Additional copyrights may follow -# -# $HEADER$ -# - -AC_DEFUN([MCA_opal_mpool_udreg_CONFIG],[ - AC_CONFIG_FILES([opal/mca/mpool/udreg/Makefile]) - - AC_ARG_WITH([udreg], [AC_HELP_STRING([--with-udreg], - [Build support for Cray udreg support. Set PKG_CONFIG_PATH env. variable to specify alternate path.])]) - - mpool_udreg_happy="no" - - AS_IF([test "$with_udreg" = "no"], - [mpool_udreg_happy="no"], - [PKG_CHECK_MODULES([CRAY_UDREG], [cray-udreg], - [mpool_udreg_LDFLAGS="$CRAY_UDREG_LIBS" - mpool_udreg_CPPFLAGS="$CRAY_UDREG_CFLAGS" - mpool_udreg_happy="yes"], - [AC_MSG_RESULT([no]) - mpool_udreg_happ="no"])]) - - AS_IF([test "$mpool_udreg_happy" = "yes"], [$1], [$2]) - - - # substitute in the things needed to build udreg/mpool - AC_SUBST([mpool_udreg_CPPFLAGS]) - AC_SUBST([mpool_udreg_LDFLAGS]) - AC_SUBST([mpool_udreg_LIBS]) -])dnl diff --git a/opal/mca/mpool/udreg/mpool_udreg.h b/opal/mca/mpool/udreg/mpool_udreg.h deleted file mode 100644 index e1fb2240601..00000000000 --- a/opal/mca/mpool/udreg/mpool_udreg.h +++ /dev/null @@ -1,171 +0,0 @@ -/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ -/* - * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2013 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * Copyright (c) 2006 Voltaire. All rights reserved. - * Copyright (c) 2011-2015 Los Alamos National Security, LLC. All rights - * reserved. - * - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ -/** - * @file - */ -#ifndef MCA_MPOOL_UDREG_H -#define MCA_MPOOL_UDREG_H - -#include "opal_config.h" -#include "opal/class/opal_list.h" -#include "opal/class/opal_free_list.h" -#include "opal/mca/event/event.h" -#include "opal/mca/mpool/mpool.h" -#include "opal/util/proc.h" -#if HAVE_SYS_MMAN_H -#include -#endif - -BEGIN_C_DECLS - -struct mca_mpool_udreg_component_t { - mca_mpool_base_component_t super; - bool print_stats; - int leave_pinned; - opal_list_t huge_pages; - bool use_huge_pages; -}; -typedef struct mca_mpool_udreg_component_t mca_mpool_udreg_component_t; - -OPAL_DECLSPEC extern mca_mpool_udreg_component_t mca_mpool_udreg_component; - -struct mca_mpool_udreg_module_t; - -struct mca_mpool_base_resources_t { - /* the start of this mpool should match grdma */ - char *pool_name; - void *reg_data; - size_t sizeof_reg; - int (*register_mem)(void *reg_data, void *base, size_t size, - mca_mpool_base_registration_t *reg); - int (*deregister_mem)(void *reg_data, mca_mpool_base_registration_t *reg); - - /* udreg specific resources */ - bool use_kernel_cache; - bool use_evict_w_unreg; - int max_entries; - size_t page_size; -}; -typedef struct mca_mpool_base_resources_t mca_mpool_base_resources_t; - -struct mca_mpool_udreg_hugepage_t { - opal_list_item_t super; - unsigned long page_size; - char *path; - opal_list_t allocations; - int cnt; -}; -typedef struct mca_mpool_udreg_hugepage_t mca_mpool_udreg_hugepage_t; - -OBJ_CLASS_DECLARATION(mca_mpool_udreg_hugepage_t); - -struct mca_mpool_udreg_hugepage_alloc_t { - opal_list_item_t super; - int fd; - char *path; - void *ptr; - size_t size; - mca_mpool_udreg_hugepage_t *huge_table; -}; -typedef struct mca_mpool_udreg_hugepage_alloc_t mca_mpool_udreg_hugepage_alloc_t; - -OBJ_CLASS_DECLARATION(mca_mpool_udreg_hugepage_pool_item_t); - -struct mca_mpool_udreg_module_t { - mca_mpool_base_module_t super; - struct mca_mpool_base_resources_t resources; - opal_free_list_t reg_list; - mca_mpool_udreg_hugepage_t *huge_page; - opal_mutex_t lock; - void *udreg_handle; -}; -typedef struct mca_mpool_udreg_module_t mca_mpool_udreg_module_t; - - -/* - * Initializes the mpool module. - */ -int mca_mpool_udreg_module_init(mca_mpool_udreg_module_t *mpool); - -/* - * Returns base address of shared memory mapping. - */ -void *mca_mpool_udreg_base(mca_mpool_base_module_t *mpool); - -/** - * Allocate block of registered memory. - */ -void* mca_mpool_udreg_alloc(mca_mpool_base_module_t *mpool, size_t size, - size_t align, uint32_t flags, - mca_mpool_base_registration_t** registration); - -/** - * realloc block of registered memory - */ -void* mca_mpool_udreg_realloc( mca_mpool_base_module_t *mpool, void* addr, - size_t size, mca_mpool_base_registration_t** registration); - -/** - * register block of memory - */ -int mca_mpool_udreg_register(mca_mpool_base_module_t* mpool, void *addr, - size_t size, uint32_t flags, mca_mpool_base_registration_t **reg); - -/** - * deregister memory - */ -int mca_mpool_udreg_deregister(mca_mpool_base_module_t *mpool, - mca_mpool_base_registration_t *reg); - -/** - * free memory allocated by alloc function - */ -void mca_mpool_udreg_free(mca_mpool_base_module_t *mpool, void * addr, - mca_mpool_base_registration_t *reg); - -/** - * find registration for a given block of memory - */ -int mca_mpool_udreg_find(struct mca_mpool_base_module_t* mpool, void* addr, - size_t size, mca_mpool_base_registration_t **reg); - -/** - * finalize mpool - */ -void mca_mpool_udreg_finalize(struct mca_mpool_base_module_t *mpool); - -/** - * Fault Tolerance Event Notification Function - * @param state Checkpoint Stae - * @return OPAL_SUCCESS or failure status - */ -int mca_mpool_udreg_ft_event(int state); - -/** - * evict one unused registration from the mpool's lru. - * @return true on success, false on failure - */ -bool mca_mpool_udreg_evict (struct mca_mpool_base_module_t *mpool); - -END_C_DECLS -#endif diff --git a/opal/mca/mpool/udreg/mpool_udreg_component.c b/opal/mca/mpool/udreg/mpool_udreg_component.c deleted file mode 100644 index 64e7da92e0d..00000000000 --- a/opal/mca/mpool/udreg/mpool_udreg_component.c +++ /dev/null @@ -1,206 +0,0 @@ -/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ -/* - * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2013 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * Copyright (c) 2006 Voltaire. All rights reserved. - * Copyright (c) 2007-2009 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2012-2015 Los Alamos National Security, LLC. All rights - * reserved. - * - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#define OPAL_DISABLE_ENABLE_MEM_DEBUG 1 -#include "opal_config.h" -#include "opal/mca/base/base.h" -#include "opal/runtime/opal_params.h" -#include "mpool_udreg.h" -#ifdef HAVE_UNISTD_H -#include -#endif -#ifdef HAVE_MALLOC_H -#include -#endif - -#include - -/* - * Local functions - */ -static int udreg_open(void); -static int udreg_close(void); -static int udreg_register(void); -static mca_mpool_base_module_t* udreg_init( - struct mca_mpool_base_resources_t* resources); - -mca_mpool_udreg_component_t mca_mpool_udreg_component = { - { - /* First, the mca_base_component_t struct containing meta - information about the component itself */ - - .mpool_version ={ - MCA_MPOOL_BASE_VERSION_2_0_0, - - .mca_component_name = "udreg", - MCA_BASE_MAKE_VERSION(component, OPAL_MAJOR_VERSION, OPAL_MINOR_VERSION, - OPAL_RELEASE_VERSION), - .mca_open_component = udreg_open, - .mca_close_component = udreg_close, - .mca_register_component_params = udreg_register, - }, - .mpool_data = { - /* The component is checkpoint ready */ - MCA_BASE_METADATA_PARAM_CHECKPOINT - }, - - .mpool_init = udreg_init - } -}; - -/** - * component open/close/init function - */ -static int udreg_open(void) -{ - OBJ_CONSTRUCT(&mca_mpool_udreg_component.huge_pages, opal_list_t); - - return OPAL_SUCCESS; -} - - -static int udreg_register(void) -{ - mca_mpool_udreg_component.print_stats = false; - (void) mca_base_component_var_register(&mca_mpool_udreg_component.super.mpool_version, - "print_stats", "print pool usage statistics at the end of the run", - MCA_BASE_VAR_TYPE_BOOL, NULL, 0, 0, - OPAL_INFO_LVL_9, - MCA_BASE_VAR_SCOPE_READONLY, - &mca_mpool_udreg_component.print_stats); - - return OPAL_SUCCESS; -} - - -static int udreg_close(void) -{ - opal_list_item_t *item; - - while (NULL != (item = opal_list_remove_first (&mca_mpool_udreg_component.huge_pages))) { - OBJ_RELEASE(item); - } - - OBJ_DESTRUCT(&mca_mpool_udreg_component.huge_pages); - - return OPAL_SUCCESS; -} - -static int page_compare (opal_list_item_t **a, - opal_list_item_t **b) { - mca_mpool_udreg_hugepage_t *pagea = (mca_mpool_udreg_hugepage_t *) *a; - mca_mpool_udreg_hugepage_t *pageb = (mca_mpool_udreg_hugepage_t *) *b; - if (pagea->page_size > pageb->page_size) { - return 1; - } else if (pagea->page_size < pageb->page_size) { - return -1; - } - - return 0; -} - -static void udreg_find_hugepages (void) { - FILE *fh; - char *path; - char buffer[1024]; - char *ctx, *tok; - - fh = fopen ("/proc/mounts", "r"); - if (NULL == fh) { - return; - } - - while (fgets (buffer, 1024, fh)) { - mca_mpool_udreg_hugepage_t *pool; - - (void) strtok_r (buffer, " ", &ctx); - path = strtok_r (NULL, " ", &ctx); - tok = strtok_r (NULL, " ", &ctx); - - if (0 != strcmp (tok, "hugetlbfs")) { - continue; - } - - pool = OBJ_NEW(mca_mpool_udreg_hugepage_t); - if (NULL == pool) { - break; - } - - pool->path = strdup (path); - - tok = strtok_r (NULL, " ", &ctx); - tok = strtok_r (tok, ",", &ctx); - - do { - if (0 == strncmp (tok, "pagesize", 8)) { - break; - } - tok = strtok_r (NULL, ",", &ctx); - } while (tok); - sscanf (tok, "pagesize=%lu", &pool->page_size); - - opal_list_append (&mca_mpool_udreg_component.huge_pages, &pool->super); - } - - fclose (fh); - - opal_list_sort (&mca_mpool_udreg_component.huge_pages, page_compare); - - mca_mpool_udreg_component.use_huge_pages = - !!(opal_list_get_size (&mca_mpool_udreg_component.huge_pages)); -} - - - -static mca_mpool_base_module_t * -udreg_init(struct mca_mpool_base_resources_t *resources) -{ - mca_mpool_udreg_module_t* mpool_module; - static int inited = false; - int rc; - - /* Set this here (vs in component.c) because - opal_leave_pinned* may have been set after MCA params were - read (e.g., by the openib btl) */ - mca_mpool_udreg_component.leave_pinned = (int) - (1 == opal_leave_pinned || opal_leave_pinned_pipeline); - - if (!inited) { - inited = true; - udreg_find_hugepages (); - } - - mpool_module = - (mca_mpool_udreg_module_t *) malloc (sizeof (mca_mpool_udreg_module_t)); - - memmove (&mpool_module->resources, resources, sizeof (*resources)); - - rc = mca_mpool_udreg_module_init(mpool_module); - if (OPAL_SUCCESS != rc) { - free (mpool_module); - return NULL; - } - - return &mpool_module->super; -} diff --git a/opal/mca/mpool/udreg/mpool_udreg_module.c b/opal/mca/mpool/udreg/mpool_udreg_module.c deleted file mode 100644 index 1c10829a3ea..00000000000 --- a/opal/mca/mpool/udreg/mpool_udreg_module.c +++ /dev/null @@ -1,499 +0,0 @@ -/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ -/* - * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2013 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * Copyright (c) 2006-2009 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2006 Voltaire. All rights reserved. - * Copyright (c) 2007 Mellanox Technologies. All rights reserved. - * Copyright (c) 2010 IBM Corporation. All rights reserved. - * Copyright (c) 2011-2015 Los Alamos National Security, LLC. All rights - * reserved. - * - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#define OPAL_DISABLE_ENABLE_MEM_DEBUG 1 -#include "opal_config.h" -#include "opal/align.h" -#include "mpool_udreg.h" -#include -#include -#ifdef HAVE_MALLOC_H -#include -#endif -#include "opal/mca/mpool/base/base.h" -#include "opal/runtime/opal_params.h" -#include "opal/include/opal_stdint.h" - -#include - -#include - -#include - -static void *mca_mpool_udreg_reg_func (void *addr, uint64_t len, void *reg_context); -static uint32_t mca_mpool_udreg_dereg_func (void *device_data, void *dreg_context); - -static void mca_mpool_udreg_hugepage_constructor (mca_mpool_udreg_hugepage_t *huge_page) -{ - memset ((char *)huge_page + sizeof(huge_page->super), 0, sizeof (*huge_page) - sizeof (huge_page->super)); - OBJ_CONSTRUCT(&huge_page->allocations, opal_list_t); -} - -static void mca_mpool_udreg_hugepage_destructor (mca_mpool_udreg_hugepage_t *huge_page) -{ - opal_list_item_t *item; - - if (huge_page->path) { - free (huge_page->path); - } - - while (NULL != (item = opal_list_remove_first (&huge_page->allocations))) { - OBJ_RELEASE(item); - } - - OBJ_DESTRUCT(&huge_page->allocations); -} - -OBJ_CLASS_INSTANCE(mca_mpool_udreg_hugepage_t, opal_list_item_t, - mca_mpool_udreg_hugepage_constructor, - mca_mpool_udreg_hugepage_destructor); - -static void mca_mpool_udreg_hugepage_alloc_constructor (mca_mpool_udreg_hugepage_alloc_t *alloc) -{ - memset ((char *)alloc + sizeof(alloc->super), 0, sizeof (*alloc) - sizeof (alloc->super)); - alloc->fd = -1; -} - -static void mca_mpool_udreg_hugepage_alloc_destructor (mca_mpool_udreg_hugepage_alloc_t *alloc) -{ - if (NULL != alloc->ptr) { - munmap (alloc->ptr, alloc->size); - } - - if (NULL == alloc->path) { - return; - } - - free (alloc->path); -} - -OBJ_CLASS_INSTANCE(mca_mpool_udreg_hugepage_alloc_t, opal_list_item_t, - mca_mpool_udreg_hugepage_alloc_constructor, - mca_mpool_udreg_hugepage_alloc_destructor); - - -static mca_mpool_udreg_hugepage_t *udreg_find_matching_pagesize (size_t size) { - mca_mpool_udreg_hugepage_t *huge_table; - opal_list_item_t *item; - - for (item = opal_list_get_first (&mca_mpool_udreg_component.huge_pages) ; - item != opal_list_get_end (&mca_mpool_udreg_component.huge_pages) ; - item = opal_list_get_next (item)) { - huge_table = (mca_mpool_udreg_hugepage_t *) item; - - if (huge_table->page_size == size) { - return huge_table; - } - } - - return NULL; -} - - -/* - * Initializes the mpool module. - */ -int mca_mpool_udreg_module_init(mca_mpool_udreg_module_t* mpool) -{ - struct udreg_cache_attr cache_attr; - int urc; - - mpool->super.mpool_component = &mca_mpool_udreg_component.super; - mpool->super.mpool_base = NULL; /* no base .. */ - mpool->super.mpool_alloc = mca_mpool_udreg_alloc; - mpool->super.mpool_realloc = mca_mpool_udreg_realloc; - mpool->super.mpool_free = mca_mpool_udreg_free; - mpool->super.mpool_register = mca_mpool_udreg_register; - mpool->super.mpool_find = mca_mpool_udreg_find; - mpool->super.mpool_deregister = mca_mpool_udreg_deregister; - /* This module relies on udreg for notification of memory release */ - mpool->super.mpool_release_memory = NULL; - mpool->super.mpool_finalize = mca_mpool_udreg_finalize; - mpool->super.mpool_ft_event = mca_mpool_udreg_ft_event; - mpool->super.flags = MCA_MPOOL_FLAGS_MPI_ALLOC_MEM | MCA_MPOOL_FLAGS_NO_HOOKS; - - if (4096 < mpool->resources.page_size) { - mpool->huge_page = udreg_find_matching_pagesize (mpool->resources.page_size); - } else { - mpool->huge_page = NULL; - } - - cache_attr.modes = 0; - - /* Create udreg cache */ - if (mpool->resources.use_kernel_cache) { - cache_attr.modes |= UDREG_CC_MODE_USE_KERNEL_CACHE; - } - - if (mpool->resources.use_evict_w_unreg) { - cache_attr.modes |= UDREG_CC_MODE_USE_EVICT_W_UNREG; - } - - if (mca_mpool_udreg_component.leave_pinned) { - cache_attr.modes |= UDREG_CC_MODE_USE_LAZY_DEREG; - } - - OBJ_CONSTRUCT(&mpool->lock,opal_mutex_t); - - strncpy (cache_attr.cache_name, mpool->resources.pool_name, UDREG_MAX_CACHENAME_LEN); - cache_attr.max_entries = mpool->resources.max_entries; - cache_attr.debug_mode = 0; - cache_attr.debug_rank = 0; - cache_attr.reg_context = mpool; - cache_attr.dreg_context = mpool; - cache_attr.destructor_context = mpool; - cache_attr.device_reg_func = mca_mpool_udreg_reg_func; - cache_attr.device_dereg_func = mca_mpool_udreg_dereg_func; - cache_attr.destructor_callback = NULL; - - /* attempt to create the udreg cache. this will fail if one already exists */ - (void) UDREG_CacheCreate (&cache_attr); - - urc = UDREG_CacheAccess (mpool->resources.pool_name, (udreg_cache_handle_t *) &mpool->udreg_handle); - if (UDREG_RC_SUCCESS != urc) { - return OPAL_ERROR; - } - - OBJ_CONSTRUCT(&mpool->reg_list, opal_free_list_t); - opal_free_list_init (&mpool->reg_list, mpool->resources.sizeof_reg, - opal_cache_line_size, - OBJ_CLASS(mca_mpool_base_registration_t), - 0, opal_cache_line_size, 0, -1, 32, NULL, 0, - NULL, NULL, NULL); - - return OPAL_SUCCESS; -} - -/* udreg callback functions */ -static void *mca_mpool_udreg_reg_func (void *addr, uint64_t len, void *reg_context) -{ - mca_mpool_udreg_module_t *mpool_udreg = (mca_mpool_udreg_module_t *) reg_context; - mca_mpool_base_registration_t *udreg_reg; - opal_free_list_item_t *item; - int rc; - - item = opal_free_list_get (&mpool_udreg->reg_list); - if (NULL == item) { - return NULL; - } - udreg_reg = (mca_mpool_base_registration_t *) item; - - udreg_reg->mpool = reg_context; - udreg_reg->base = addr; - udreg_reg->bound = (void *)((uintptr_t) addr + len); - - rc = mpool_udreg->resources.register_mem(mpool_udreg->resources.reg_data, - addr, len, udreg_reg); - if (OPAL_SUCCESS != rc) { - opal_free_list_return (&mpool_udreg->reg_list, item); - udreg_reg = NULL; - } - - return udreg_reg; -} - -static uint32_t mca_mpool_udreg_dereg_func (void *device_data, void *dreg_context) -{ - mca_mpool_udreg_module_t *mpool_udreg = (mca_mpool_udreg_module_t *) dreg_context; - mca_mpool_base_registration_t *udreg_reg = (mca_mpool_base_registration_t *) device_data; - int rc; - - rc = mpool_udreg->resources.deregister_mem(mpool_udreg->resources.reg_data, udreg_reg); - - if (OPAL_LIKELY(OPAL_SUCCESS == rc)) { - opal_free_list_return (&mpool_udreg->reg_list, - (opal_free_list_item_t *) udreg_reg); - } - /* might be worth printing out a warning if an error occurs here */ - - return 0; -} - -/* */ - -static int mca_mpool_udreg_alloc_huge (mca_mpool_udreg_module_t *mpool, size_t size, - void **addr, void **base_addr) { - mca_mpool_udreg_hugepage_alloc_t *alloc; - int rc; - - alloc = OBJ_NEW(mca_mpool_udreg_hugepage_alloc_t); - alloc->size = size; - - rc = asprintf (&alloc->path, "%s/hugepage.openmpi.%d.%d", mpool->huge_page->path, - getpid (), mpool->huge_page->cnt++); - if (0 > rc) { - OBJ_RELEASE(alloc); - return -1; - } - - alloc->fd = open (alloc->path, O_RDWR | O_CREAT, 0600); - if (-1 == alloc->fd) { - OBJ_RELEASE(alloc); - return -1; - } - - if (0 != ftruncate (alloc->fd, size)) { - close (alloc->fd); - unlink (alloc->path); - OBJ_RELEASE(alloc); - return -1; - } - - alloc->ptr = mmap (NULL, size, PROT_READ | PROT_WRITE, MAP_SHARED, - alloc->fd, 0); - if (NULL == alloc->ptr) { - OBJ_RELEASE(alloc); - return -1; - } - - close (alloc->fd); - unlink (alloc->path); - - alloc->huge_table = mpool->huge_page; - - opal_list_append (&mpool->huge_page->allocations, &alloc->super); - - *addr = alloc->ptr; - *base_addr = alloc; - - return 0; -} - - -static void mca_mpool_udreg_free_huge (mca_mpool_udreg_hugepage_alloc_t *alloc) { - opal_list_remove_item (&alloc->huge_table->allocations, &alloc->super); - OBJ_RELEASE(alloc); -} - -/** - * allocate function - */ -void* mca_mpool_udreg_alloc(mca_mpool_base_module_t *mpool, size_t size, - size_t align, uint32_t flags, mca_mpool_base_registration_t **reg) -{ - mca_mpool_udreg_module_t *udreg_module = (mca_mpool_udreg_module_t *) mpool; - void *base_addr, *addr; - - if(0 == align) - align = mca_mpool_base_page_size; - -#if OPAL_CUDA_SUPPORT - /* CUDA cannot handle registering overlapping regions, so make - * sure each region is page sized and page aligned. */ - align = mca_mpool_base_page_size; - size = OPAL_ALIGN(size, mca_mpool_base_page_size, size_t); -#endif - - addr = base_addr = NULL; - - if (NULL != udreg_module->huge_page) { - size = OPAL_ALIGN(size, udreg_module->huge_page->page_size, size_t); - mca_mpool_udreg_alloc_huge (udreg_module, size, &addr, &base_addr); - } else { -#ifdef HAVE_POSIX_MEMALIGN - if((errno = posix_memalign(&base_addr, align, size)) != 0) - return NULL; - - addr = base_addr; -#else - base_addr = malloc(size + align); - if(NULL == base_addr) - return NULL; - - addr = (void*)OPAL_ALIGN((uintptr_t)base_addr, align, uintptr_t); -#endif - } - - if (OPAL_SUCCESS != mca_mpool_udreg_register(mpool, addr, size, flags, reg)) { - if (udreg_module->huge_page) { - mca_mpool_udreg_free_huge ((mca_mpool_udreg_hugepage_alloc_t *) base_addr); - } else { - free(base_addr); - } - - return NULL; - } - - (*reg)->alloc_base = (unsigned char *) base_addr; - - return addr; -} - -bool mca_mpool_udreg_evict (struct mca_mpool_base_module_t *mpool) -{ - mca_mpool_udreg_module_t *mpool_udreg = (mca_mpool_udreg_module_t *) mpool; - udreg_return_t urc; - - urc = UDREG_Evict (mpool_udreg->udreg_handle); - return (UDREG_RC_SUCCESS == urc); -} - -/* - * register memory - */ -int mca_mpool_udreg_register(mca_mpool_base_module_t *mpool, void *addr, - size_t size, uint32_t flags, - mca_mpool_base_registration_t **reg) -{ - mca_mpool_udreg_module_t *mpool_udreg = (mca_mpool_udreg_module_t *) mpool; - mca_mpool_base_registration_t *udreg_reg; - bool bypass_cache = !!(flags & MCA_MPOOL_FLAGS_CACHE_BYPASS); - udreg_entry_t *udreg_entry; - udreg_return_t urc; - - if (false == bypass_cache) { - /* Get a udreg entry for this region */ - OPAL_THREAD_LOCK(&mpool_udreg->lock); - while (UDREG_RC_SUCCESS != - (urc = UDREG_Register (mpool_udreg->udreg_handle, addr, size, &udreg_entry))) { - /* try to remove one unused reg and retry */ - if (!mca_mpool_udreg_evict (mpool)) { - *reg = NULL; - OPAL_THREAD_UNLOCK(&mpool_udreg->lock); - return OPAL_ERR_OUT_OF_RESOURCE; - } - } - OPAL_THREAD_UNLOCK(&mpool_udreg->lock); - - udreg_reg = (mca_mpool_base_registration_t *) udreg_entry->device_data; - udreg_reg->mpool_context = udreg_entry; - } else { - /* if cache bypass is requested don't use the udreg cache */ - while (NULL == (udreg_reg = mca_mpool_udreg_reg_func (addr, size, mpool))) { - /* try to remove one unused reg and retry */ - if (!mca_mpool_udreg_evict (mpool)) { - *reg = NULL; - return OPAL_ERR_OUT_OF_RESOURCE; - } - } - udreg_reg->mpool_context = NULL; - } - - udreg_reg->flags = flags; - - *reg = udreg_reg; - (*reg)->ref_count++; - - return OPAL_SUCCESS; -} - - -/** - * realloc function - */ -void* mca_mpool_udreg_realloc(mca_mpool_base_module_t *mpool, void *addr, - size_t size, mca_mpool_base_registration_t **reg) -{ - mca_mpool_base_registration_t *old_reg = *reg; - void *new_mem = mca_mpool_udreg_alloc(mpool, size, 0, old_reg->flags, reg); - memcpy(new_mem, addr, old_reg->bound - old_reg->base + 1); - mca_mpool_udreg_free(mpool, addr, old_reg); - - return new_mem; -} - -/** - * free function - */ -void mca_mpool_udreg_free(mca_mpool_base_module_t *mpool, void *addr, - mca_mpool_base_registration_t *registration) -{ - mca_mpool_udreg_module_t *udreg_module = (mca_mpool_udreg_module_t *) mpool; - mca_mpool_udreg_deregister(mpool, registration); - - if (udreg_module->huge_page) { - mca_mpool_udreg_free_huge ((mca_mpool_udreg_hugepage_alloc_t *) registration->alloc_base); - } else { - free (registration->alloc_base); - } -} - -int mca_mpool_udreg_find(struct mca_mpool_base_module_t *mpool, void *addr, - size_t size, mca_mpool_base_registration_t **reg) -{ - *reg = NULL; - return OPAL_ERR_NOT_FOUND; -} - -int mca_mpool_udreg_deregister(struct mca_mpool_base_module_t *mpool, - mca_mpool_base_registration_t *reg) -{ - mca_mpool_udreg_module_t *mpool_udreg = (mca_mpool_udreg_module_t *) mpool; - - assert(reg->ref_count > 0); - - reg->ref_count--; - - if (0 == reg->ref_count && reg->flags & MCA_MPOOL_FLAGS_CACHE_BYPASS) { - mca_mpool_udreg_dereg_func (reg, mpool); - } else if (!(reg->flags & MCA_MPOOL_FLAGS_CACHE_BYPASS)) { - OPAL_THREAD_LOCK(&mpool_udreg->lock); - UDREG_DecrRefcount (mpool_udreg->udreg_handle, reg->mpool_context); - OPAL_THREAD_UNLOCK(&mpool_udreg->lock); - } - - return OPAL_SUCCESS; -} - -void mca_mpool_udreg_finalize(struct mca_mpool_base_module_t *mpool) -{ - mca_mpool_udreg_module_t *mpool_udreg = (mca_mpool_udreg_module_t*)mpool; - - /* Statistic */ - if (true == mca_mpool_udreg_component.print_stats) { - uint64_t hit = 0, miss = 0, evicted = 0; - - (void) UDREG_GetStat (mpool_udreg->udreg_handle, - UDREG_STAT_CACHE_HIT, &hit); - - (void) UDREG_GetStat (mpool_udreg->udreg_handle, - UDREG_STAT_CACHE_MISS, &miss); - - (void) UDREG_GetStat (mpool_udreg->udreg_handle, - UDREG_STAT_CACHE_EVICTED, &evicted); - - opal_output(0, "%s udreg: stats (hit/miss/evicted): %" PRIu64 "/%" PRIu64 "/%" PRIu64 "\n", - OPAL_NAME_PRINT(OPAL_PROC_MY_NAME), hit, miss, evicted); - } - - UDREG_CacheRelease (mpool_udreg->udreg_handle); - OBJ_DESTRUCT(&mpool_udreg->reg_list); - OBJ_DESTRUCT(&mpool_udreg->lock); -} - -int mca_mpool_udreg_ft_event(int state) { - return OPAL_SUCCESS; -} - - - - - - - - - - diff --git a/opal/mca/mpool/udreg/owner.txt b/opal/mca/mpool/udreg/owner.txt deleted file mode 100644 index 52961b5d12f..00000000000 --- a/opal/mca/mpool/udreg/owner.txt +++ /dev/null @@ -1,7 +0,0 @@ -# -# owner/status file -# owner: institution that is responsible for this package -# status: e.g. active, maintenance, unmaintained -# -owner: LANL -status: maintenance diff --git a/opal/mca/patcher/Makefile.am b/opal/mca/patcher/Makefile.am new file mode 100644 index 00000000000..664683bad89 --- /dev/null +++ b/opal/mca/patcher/Makefile.am @@ -0,0 +1,39 @@ +# +# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana +# University Research and Technology +# Corporation. All rights reserved. +# Copyright (c) 2004-2005 The University of Tennessee and The University +# of Tennessee Research Foundation. All rights +# reserved. +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# University of Stuttgart. All rights reserved. +# Copyright (c) 2004-2005 The Regents of the University of California. +# All rights reserved. +# Copyright (c) 2010 Cisco Systems, Inc. All rights reserved. +# Copyright (c) 2016 Los Alamos National Security, LLC. All rights +# reserved. +# $COPYRIGHT$ +# +# Additional copyrights may follow +# +# $HEADER$ +# + +# main library setup +noinst_LTLIBRARIES = libmca_patcher.la +libmca_patcher_la_SOURCES = + +# local files +headers = patcher.h +libmca_patcher_la_SOURCES += $(headers) + +# Conditionally install the header files +if WANT_INSTALL_HEADERS +opaldir = $(opalincludedir)/$(subdir) +nobase_opal_HEADERS = $(headers) +endif + +include base/Makefile.am + +distclean-local: + rm -f base/static-components.h diff --git a/opal/mca/patcher/base/Makefile.am b/opal/mca/patcher/base/Makefile.am new file mode 100644 index 00000000000..441e1c645d2 --- /dev/null +++ b/opal/mca/patcher/base/Makefile.am @@ -0,0 +1,25 @@ +# +# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana +# University Research and Technology +# Corporation. All rights reserved. +# Copyright (c) 2004-2005 The University of Tennessee and The University +# of Tennessee Research Foundation. All rights +# reserved. +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# University of Stuttgart. All rights reserved. +# Copyright (c) 2004-2005 The Regents of the University of California. +# All rights reserved. +# Copyright (c) 2009 Cisco Systems, Inc. All rights reserved. +# Copyright (c) 2016 Los Alamos National Security, LLC. All rights +# reserved. +# $COPYRIGHT$ +# +# Additional copyrights may follow +# +# $HEADER$ +# + +headers += base/base.h + +libmca_patcher_la_SOURCES += base/patcher_base_frame.c \ + base/patcher_base_patch.c diff --git a/opal/mca/patcher/base/base.h b/opal/mca/patcher/base/base.h new file mode 100644 index 00000000000..0be3cf1dd7c --- /dev/null +++ b/opal/mca/patcher/base/base.h @@ -0,0 +1,83 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ +/* + * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2006 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2016 Los Alamos National Security, LLC. All rights + * reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + * + */ + +#ifndef OPAL_PATCHER_BASE_H +#define OPAL_PATCHER_BASE_H + +#include "opal_config.h" +#include "opal/mca/base/mca_base_framework.h" +#include "opal/mca/patcher/patcher.h" + + +BEGIN_C_DECLS + +#define MCA_BASE_PATCHER_MAX_PATCH 32 + +struct mca_patcher_base_patch_t; + +typedef void (*mca_patcher_base_restore_fn_t) (struct mca_patcher_base_patch_t *); + +struct mca_patcher_base_patch_t { + /** patches are list items */ + opal_list_item_t super; + /** name symbol to patch */ + char *patch_symbol; + /** address of function to call instead */ + uintptr_t patch_value; + /** original address of function */ + uintptr_t patch_orig; + /** patch data */ + unsigned char patch_data[MCA_BASE_PATCHER_MAX_PATCH]; + /** original data */ + unsigned char patch_orig_data[MCA_BASE_PATCHER_MAX_PATCH]; + /** size of patch data */ + unsigned patch_data_size; + /** function to undo the patch */ + mca_patcher_base_restore_fn_t patch_restore; +}; + +typedef struct mca_patcher_base_patch_t mca_patcher_base_patch_t; + +OBJ_CLASS_DECLARATION(mca_patcher_base_patch_t); + +/** + * Framework struct declaration for this framework + */ +OPAL_DECLSPEC extern mca_base_framework_t opal_patcher_base_framework; +OPAL_DECLSPEC int opal_patcher_base_select (void); +OPAL_DECLSPEC int mca_patcher_base_patch_hook (mca_patcher_base_module_t *module, uintptr_t hook); +OPAL_DECLSPEC void mca_base_patcher_patch_apply_binary (mca_patcher_base_patch_t *patch); + +static inline uintptr_t mca_patcher_base_addr_text (uintptr_t addr) { +#if (OPAL_ASSEMBLY_ARCH == OPAL_POWERPC64) && (!defined (_CALL_ELF) || (_CALL_ELF != 2)) + struct odp_t { + uintptr_t text; + uintptr_t toc; + } *odp = (struct odp_t *) addr; + return (odp)?odp->text:0; +#else + return addr; +#endif +} + +END_C_DECLS +#endif /* OPAL_BASE_PATCHER_H */ diff --git a/opal/mca/patcher/base/patcher_base_frame.c b/opal/mca/patcher/base/patcher_base_frame.c new file mode 100644 index 00000000000..8d685d3fa43 --- /dev/null +++ b/opal/mca/patcher/base/patcher_base_frame.c @@ -0,0 +1,81 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ +/* + * Copyright (c) 2016 Los Alamos National Security, LLC. All rights + * reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include "opal_config.h" + +#include "opal/mca/patcher/patcher.h" +#include "opal/mca/patcher/base/base.h" +#include "opal/mca/patcher/base/static-components.h" + +/* + * Local variables + */ +static mca_patcher_base_module_t empty_module; + +/* + * Globals + */ +mca_patcher_base_module_t *opal_patcher = &empty_module; + +int opal_patcher_base_select (void) +{ + mca_patcher_base_module_t *best_module; + mca_patcher_base_component_t *best_component; + int rc, priority; + + rc = mca_base_select ("patcher", opal_patcher_base_framework.framework_output, + &opal_patcher_base_framework.framework_components, + (mca_base_module_t **) &best_module, (mca_base_component_t **) &best_component, + &priority); + if (OPAL_SUCCESS != rc) { + return rc; + } + + OBJ_CONSTRUCT(&best_module->patch_list, opal_list_t); + OBJ_CONSTRUCT(&best_module->patch_list_mutex, opal_mutex_t); + + if (best_module->patch_init) { + rc = best_module->patch_init (); + if (OPAL_SUCCESS != rc) { + return rc; + } + } + + opal_patcher = best_module; + + return OPAL_SUCCESS; +} + +static int opal_patcher_base_close (void) +{ + if (opal_patcher == &empty_module) { + return OPAL_SUCCESS; + } + + mca_patcher_base_patch_t *patch; + OPAL_LIST_FOREACH_REV(patch, &opal_patcher->patch_list, mca_patcher_base_patch_t) { + patch->patch_restore (patch); + } + + OPAL_LIST_DESTRUCT(&opal_patcher->patch_list); + OBJ_DESTRUCT(&opal_patcher->patch_list_mutex); + + if (opal_patcher->patch_fini) { + return opal_patcher->patch_fini (); + } + + return OPAL_SUCCESS; +} + +/* Use default register/open functions */ +MCA_BASE_FRAMEWORK_DECLARE(opal, patcher, "runtime code patching", NULL, NULL, + opal_patcher_base_close, mca_patcher_base_static_components, + 0); diff --git a/opal/mca/patcher/base/patcher_base_patch.c b/opal/mca/patcher/base/patcher_base_patch.c new file mode 100644 index 00000000000..8f731f8afb6 --- /dev/null +++ b/opal/mca/patcher/base/patcher_base_patch.c @@ -0,0 +1,193 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ +/* + * Copyright (c) 2016 Los Alamos National Security, LLC. All rights + * reserved. + * Copyright (c) 2017 Research Organization for Information Science + * and Technology (RIST). All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include "opal_config.h" + +#include "opal/mca/patcher/patcher.h" +#include "opal/mca/patcher/base/base.h" +#include "opal/util/sys_limits.h" +#include "opal/prefetch.h" +#include + +static void mca_patcher_base_patch_construct (mca_patcher_base_patch_t *patch) +{ + patch->patch_symbol = NULL; + patch->patch_data_size = 0; +} + +static void mca_patcher_base_patch_destruct (mca_patcher_base_patch_t *patch) +{ + free (patch->patch_symbol); +} + +OBJ_CLASS_INSTANCE(mca_patcher_base_patch_t, opal_list_item_t, + mca_patcher_base_patch_construct, + mca_patcher_base_patch_destruct); + +#if defined(__PPC__) + +// PowerPC instructions used in patching +// Reference: "PowerPC User Instruction Set Architecture" +static unsigned int addis(unsigned int RT, unsigned int RS, unsigned int UI) { + return (15<<26) + (RT<<21) + (RS<<16) + (UI&0xffff); +} +static unsigned int ori(unsigned int RT, unsigned int RS, unsigned int UI) { + return (24<<26) + (RS<<21) + (RT<<16) + (UI&0xffff); +} +static unsigned int oris(unsigned int RT, unsigned int RS, unsigned int UI) { + return (25<<26) + (RS<<21) + (RT<<16) + (UI&0xffff); +} +static unsigned int mtspr(unsigned int SPR, unsigned int RS) { + return (31<<26) + (RS<<21) + ((SPR&0x1f)<<16) + ((SPR>>5)<<11) + (467<<1); +} +static unsigned int bcctr(unsigned int BO, unsigned int BI, unsigned int BH) { + return (19<<26) + (BO<<21) + (BI<<16) + (BH<<11) + (528<<1); +} +static unsigned int rldicr(unsigned int RT, unsigned int RS, unsigned int SH, unsigned int MB) +{ + return (30<<26) + (RS<<21) + (RT<<16) + ((SH&0x1f)<<11) + ((SH>>5)<<1) + + ((MB&0x1f)<<6) + ((MB>>5)<<5) + (1<<2); +} + +static int PatchLoadImm (uintptr_t addr, unsigned int reg, size_t value) +{ +#if defined(__PPC64__) + *(unsigned int *) (addr + 0) = addis ( reg, 0, (value >> 48)); + *(unsigned int *) (addr + 4) = ori ( reg, reg, (value >> 32)); + *(unsigned int *) (addr + 8) = rldicr( reg, reg, 32, 31); + *(unsigned int *) (addr +12) = oris ( reg, reg, (value >> 16)); + *(unsigned int *) (addr +16) = ori ( reg, reg, (value >> 0)); + return 20; +#else + *(unsigned int *) (addr + 0) = addis ( reg, 0, (value >> 16)); + *(unsigned int *) (addr + 4) = ori ( reg, reg, (value >> 0)); + return 8; +#endif +} + +#endif + +static void flush_and_invalidate_cache (unsigned long a) +{ +#if OPAL_ASSEMBLY_ARCH == OPAL_IA32 + static int have_clflush = -1; + + if (OPAL_UNLIKELY(-1 == have_clflush)) { + int32_t cpuid1, cpuid2, tmp; + const int32_t level = 1; + + /* cpuid clobbers ebx but it must be restored for -fPIC so save + * then restore ebx */ + __asm__ volatile ("xchgl %%ebx, %2\n" + "cpuid\n" + "xchgl %%ebx, %2\n": + "=a" (cpuid1), "=d" (cpuid2), "=r" (tmp) : + "a" (level) : + "ecx"); + /* clflush is in edx bit 19 */ + have_clflush = !!(cpuid2 & (1 << 19)); + } + + if (have_clflush) { + /* does not work with AMD processors */ + __asm__ volatile("mfence;clflush %0;mfence" : :"m" (*(char*)a)); + } +#elif OPAL_ASSEMBLY_ARCH == OPAL_X86_64 + __asm__ volatile("mfence;clflush %0;mfence" : :"m" (*(char*)a)); +#elif OPAL_ASSEMBLY_ARCH == OPAL_IA64 + __asm__ volatile ("fc %0;; sync.i;; srlz.i;;" : : "r"(a) : "memory"); +#endif +} + +// modify protection of memory range +static void ModifyMemoryProtection (uintptr_t addr, size_t length, int prot) +{ + long page_size = opal_getpagesize (); + uintptr_t base = (addr & ~(page_size-1)); + uintptr_t bound = ((addr + length + page_size-1) & ~(page_size-1)); + + length = bound - base; + +#if defined(__PPC__) + /* NTH: is a loop necessary here? */ + do { + if (mprotect((void *)base, page_size, prot)) + perror("MemHook: mprotect failed"); + base += page_size; + } while (base < bound); +#else + if (mprotect((void *) base, length, prot)) { + perror("MemHook: mprotect failed"); + } +#endif +} + +static inline void apply_patch (unsigned char *patch_data, uintptr_t address, size_t data_size) +{ + ModifyMemoryProtection (address, data_size, PROT_EXEC|PROT_READ|PROT_WRITE); + memcpy ((void *) address, patch_data, data_size); + for (size_t i = 0 ; i < data_size ; i += 16) { + flush_and_invalidate_cache (address + i); + } + + ModifyMemoryProtection (address, data_size, PROT_EXEC|PROT_READ); +} + +static void mca_base_patcher_patch_unapply_binary (mca_patcher_base_patch_t *patch) +{ + apply_patch (patch->patch_orig_data, patch->patch_orig, patch->patch_data_size); +} + +void mca_base_patcher_patch_apply_binary (mca_patcher_base_patch_t *patch) +{ + memcpy (patch->patch_orig_data, (void *) patch->patch_orig, patch->patch_data_size); + apply_patch (patch->patch_data, patch->patch_orig, patch->patch_data_size); + patch->patch_restore = mca_base_patcher_patch_unapply_binary; +} + + +int mca_patcher_base_patch_hook (mca_patcher_base_module_t *module, uintptr_t hook_addr) +{ +#if (OPAL_ASSEMBLY_ARCH == OPAL_POWERPC64) + mca_patcher_base_patch_t *hook_patch; + const unsigned int nop = 0x60000000; + + hook_patch = OBJ_NEW(mca_patcher_base_patch_t); + if (OPAL_UNLIKELY(NULL == hook_patch)) { + return OPAL_ERR_OUT_OF_RESOURCE; + } + + // locate reserved code space in hook function + for (unsigned int *nop_addr = (unsigned int *)hook_addr ; ; nop_addr++) { + if (nop_addr[0] == nop && nop_addr[1] == nop && nop_addr[2] == nop + && nop_addr[3] == nop && nop_addr[4] == nop) { + hook_patch->patch_orig = (uintptr_t) nop_addr; + break; + } + } + + // generate code to restore TOC + unsigned long toc; + + asm volatile ("std 2, %0" : "=m" (toc)); + + hook_patch->patch_data_size = PatchLoadImm((uintptr_t)hook_patch->patch_data, 2, toc); + + /* put the hook patch on the patch list so it will be undone on finalize */ + opal_list_append (&module->patch_list, &hook_patch->super); + + mca_base_patcher_patch_apply_binary (hook_patch); +#endif + + return OPAL_SUCCESS; +} diff --git a/opal/mca/patcher/overwrite/Makefile.am b/opal/mca/patcher/overwrite/Makefile.am new file mode 100644 index 00000000000..e9e4a317181 --- /dev/null +++ b/opal/mca/patcher/overwrite/Makefile.am @@ -0,0 +1,47 @@ +# +# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana +# University Research and Technology +# Corporation. All rights reserved. +# Copyright (c) 2004-2005 The University of Tennessee and The University +# of Tennessee Research Foundation. All rights +# reserved. +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# University of Stuttgart. All rights reserved. +# Copyright (c) 2004-2005 The Regents of the University of California. +# All rights reserved. +# Copyright (c) 2009-2010 Cisco Systems, Inc. All rights reserved. +# Copyright (c) 2015 Research Organization for Information Science +# and Technology (RIST). All rights reserved. +# Copyright (c) 2016 Los Alamos National Security, LLC. All rights +# reserved. +# $COPYRIGHT$ +# +# Additional copyrights may follow +# +# $HEADER$ +# + +if MCA_BUILD_opal_patcher_overwrite_DSO +component_noinst = +component_install = mca_patcher_overwrite.la +else +component_noinst = libmca_patcher_overwrite.la +component_install = +endif + +overwrite_SOURCES = \ + patcher_overwrite.h \ + patcher_overwrite_module.c \ + patcher_overwrite_component.c + +mcacomponentdir = $(opallibdir) +mcacomponent_LTLIBRARIES = $(component_install) +mca_patcher_overwrite_la_SOURCES = $(overwrite_SOURCES) +nodist_mca_patcher_overwrite_la_SOURCES = $(overwrite_nodist_SOURCES) +mca_patcher_overwrite_la_LDFLAGS = -module -avoid-version + +noinst_LTLIBRARIES = $(component_noinst) +libmca_patcher_overwrite_la_SOURCES = $(overwrite_SOURCES) +nodist_libmca_patcher_overwrite_la_SOURCES = $(overwrite_nodist_SOURCES) +libmca_patcher_overwrite_la_LIBADD = $(patcher_overwrite_LIBS) +libmca_patcher_overwrite_la_LDFLAGS = -module -avoid-version diff --git a/opal/mca/patcher/overwrite/configure.m4 b/opal/mca/patcher/overwrite/configure.m4 new file mode 100644 index 00000000000..24494726851 --- /dev/null +++ b/opal/mca/patcher/overwrite/configure.m4 @@ -0,0 +1,42 @@ +# -*- shell-script -*- +# +# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana +# University Research and Technology +# Corporation. All rights reserved. +# Copyright (c) 2004-2005 The University of Tennessee and The University +# of Tennessee Research Foundation. All rights +# reserved. +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# University of Stuttgart. All rights reserved. +# Copyright (c) 2004-2005 The Regents of the University of California. +# All rights reserved. +# Copyright (c) 2008-2010 Cisco Systems, Inc. All rights reserved. +# Copyright (c) 2015 Research Organization for Information Science +# and Technology (RIST). All rights reserved. +# Copyright (c) 2016 Los Alamos National Security, LLC. All rights +# reserved. +# $COPYRIGHT$ +# +# Additional copyrights may follow +# +# $HEADER$ +# + +# MCA_patcher_overwrite_CONFIG(action-if-can-compile, +# [action-if-cant-compile]) +# ------------------------------------------------ +AC_DEFUN([MCA_opal_patcher_overwrite_CONFIG],[ + AC_CONFIG_FILES([opal/mca/patcher/overwrite/Makefile]) + + opal_patcher_overwrite_happy=no + if test $OPAL_ENABLE_DLOPEN_SUPPORT = 1; then +# Disable ia64 for now. We can revive it later if anyone cares + AC_COMPILE_IFELSE([AC_LANG_PROGRAM([[ +#if !defined(__i386__) && !defined(__x86_64__) && !defined(__PPC__) +#error "platform not supported" +#endif +]],[])],[opal_patcher_overwrite_happy=yes],[]) + fi + + AS_IF([test $opal_patcher_overwrite_happy = yes], [$1], [$2]) +]) diff --git a/opal/mca/patcher/overwrite/patcher_overwrite.h b/opal/mca/patcher/overwrite/patcher_overwrite.h new file mode 100644 index 00000000000..9c2ad58dfd5 --- /dev/null +++ b/opal/mca/patcher/overwrite/patcher_overwrite.h @@ -0,0 +1,32 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ +/* + * Copyright (c) 2016 Los Alamos National Security, LLC. All rights + * reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ +/** + * @file pather_overwrite.h + * + * This component works by overwritting the first couple instructions in + * the target function with a jump instruction to the hook function. The + * hook function will be expected to implement the functionality of the + * hooked function when using this module. + * + * Note: This component only supports x86, x86_64, ia64, and powerpc/power. + */ + +#if !defined(OPAL_PATCHER_OVERWRITE_H) +#define OPAL_PATCHER_OVERWRITE_H + +#include "opal_config.h" +#include "opal/mca/patcher/patcher.h" +#include "opal/class/opal_list.h" + +extern mca_patcher_base_module_t mca_patcher_overwrite_module; +extern mca_patcher_base_component_t mca_patcher_overwrite_component; + +#endif /* !defined(OPAL_PATCHER_OVERWRITE_H) */ diff --git a/opal/mca/patcher/overwrite/patcher_overwrite_component.c b/opal/mca/patcher/overwrite/patcher_overwrite_component.c new file mode 100644 index 00000000000..5211d4deaef --- /dev/null +++ b/opal/mca/patcher/overwrite/patcher_overwrite_component.c @@ -0,0 +1,45 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ +/* + * Copyright (c) 2016 Los Alamos National Security, LLC. All rights + * reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include "patcher_overwrite.h" +#include "opal/mca/mca.h" +#include "opal/mca/base/base.h" + +static int mca_patcher_overwrite_priority; + +static int mca_patcher_overwrite_register (void) +{ + mca_patcher_overwrite_priority = 37; + mca_base_component_var_register (&mca_patcher_overwrite_component.patcherc_version, + "priority", "Priority of the overwrite binary patcher component", + MCA_BASE_VAR_TYPE_INT, NULL, 0, 0, OPAL_INFO_LVL_5, + MCA_BASE_VAR_SCOPE_CONSTANT, &mca_patcher_overwrite_priority); + + return OPAL_SUCCESS; +} + +static int mca_patcher_overwrite_query (mca_base_module_t **module, int *priority) +{ + *module = &mca_patcher_overwrite_module.super; + *priority = mca_patcher_overwrite_priority; + return OPAL_SUCCESS; +} + +mca_patcher_base_component_t mca_patcher_overwrite_component = { + .patcherc_version = { + OPAL_PATCHER_BASE_VERSION_1_0_0, + .mca_component_name = "overwrite", + MCA_BASE_MAKE_VERSION(component, OPAL_MAJOR_VERSION, OPAL_MINOR_VERSION, + OPAL_RELEASE_VERSION), + .mca_query_component = mca_patcher_overwrite_query, + .mca_register_component_params = mca_patcher_overwrite_register, + }, +}; diff --git a/opal/mca/patcher/overwrite/patcher_overwrite_module.c b/opal/mca/patcher/overwrite/patcher_overwrite_module.c new file mode 100644 index 00000000000..b8a290d0430 --- /dev/null +++ b/opal/mca/patcher/overwrite/patcher_overwrite_module.c @@ -0,0 +1,309 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ +/* + * Copyright (c) 2016 Los Alamos National Security, LLC. All rights + * reserved. + * Copyright (c) 2016 IBM Corporation. All rights reserved. + * Copyright (c) 2017 Research Organization for Information Science + * and Technology (RIST). All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include "patcher_overwrite.h" + +#include "opal/mca/patcher/base/base.h" + +#include "opal/constants.h" +#include "opal/util/sys_limits.h" +#include "opal/util/output.h" +#include "opal/prefetch.h" + +#include +#include +#include +#include +#include +#include +#include +#include + +#if (OPAL_ASSEMBLY_ARCH == OPAL_IA32) || (OPAL_ASSEMBLY_ARCH == OPAL_IA64) || (OPAL_ASSEMBLY_ARCH == OPAL_X86_64) + +#if (OPAL_ASSEMBLY_ARCH == OPAL_IA64) + +#define INSERT_BIT(d,p,v) do { \ + unsigned char c=*(d); \ + assert(((p) < 8) && ((p) >= 0)); \ + c&= ~(1<<(p)); \ + c|= ((v)<<(p)); \ + *(d) = c; \ + } while (0) + +static inline void +copy_instr_slot(unsigned char **dst, int *dst_bitpos, unsigned long instr_slot) +{ + for (int i = 40 ; i >= 0 ; --i) { + INSERT_BIT(*dst, *dst_bitpos, (instr_slot>>i)&1); + if (*dst_bitpos == 0) { + ++*dst; + *dst_bitpos = 7; + } else { + --*dst_bitpos; + } + } +} + +static void make_ia64_bundle (unsigned char *dst, + unsigned long i2, + unsigned long i1, + unsigned long i0, + unsigned template) +{ +/* + * each instr is 41 bits, template is 5 bits + * + * generate the bit concatenation of i2:i1:i0:t, all in all 128 bits + * + */ + + int dst_bitpos = 7; + + copy_instr_slot(&dst, &dst_bitpos, i2); + copy_instr_slot(&dst, &dst_bitpos, i1); + copy_instr_slot(&dst, &dst_bitpos, i0); + + assert(dst_bitpos == 4); + + for (int i = 4 ; i >= 0 ; --i) { + INSERT_BIT(dst, dst_bitpos, (template>>i)&1); + --dst_bitpos; + } +} +#endif /* defined(__ia64__) */ + +static int mca_patcher_overwrite_apply_patch (mca_patcher_base_patch_t *patch) +{ + uintptr_t func_new_addr = patch->patch_value; + + { +#if (OPAL_ASSEMBLY_ARCH == OPAL_IA32) + patch->patch_data_size = 5; + *(unsigned char *)(patch->patch_data+0) = 0xe9; + *(unsigned int *) (patch->patch_data+1) = (unsigned int)(func_new_addr - patch->patch_orig - 5); +#elif (OPAL_ASSEMBLY_ARCH == OPAL_X86_64) + patch->patch_data_size = 13; + *(unsigned short*)(patch->patch_data + 0) = 0xbb49; + *(unsigned long* )(patch->patch_data + 2) = (unsigned long) func_new_addr; + *(unsigned char*) (patch->patch_data +10) = 0x41; + *(unsigned char*) (patch->patch_data +11) = 0xff; + *(unsigned char*) (patch->patch_data +12) = 0xe3; +#elif (OPAL_ASSEMBLY_ARCH == OPAL_IA64) + { +/* + * target64 = IP + ((i << 59 | imm39 << 20 | imm20) << 4) + * imm64 = i << 63 | imm41 << 22 | ic << 21 | imm5c << 16 | imm9d << 7 | imm7b + */ + unsigned char buf[16]; + unsigned long long imm64 = func_new_addr - patch->patch_orig - 16; + register unsigned long long glb_ptr __asm__("r1"); + unsigned long long nop = + (0x0ULL<<37) | /* O */ + (0x0ULL<<36) | /* i */ + (0x0ULL<<33) | /* x3 */ + (0x1ULL<<27) | /* x6 */ + (0x0ULL<< 6) | /* imm20 */ + (0x0ULL<< 0); /* qp */ + unsigned long long brl = + (0xcULL << 37) | + (((imm64>>63)&0x1ULL) << 36) | + (0x0ULL << 35) | + (0x0ULL << 33) | + (((imm64>>4)&0xFFFFFULL) << 13) | + (0x0ULL << 6) | + (0x0ULL << 0); + unsigned long long movl = + (0x6ULL << 37) | + (((glb_ptr>>63)&0x1ULL) << 36) | + (((glb_ptr>> 7)&0x1FFULL) << 27) | + (((glb_ptr>>16)&0x1FULL) << 22) | + (((glb_ptr>>21)&0x1ULL) << 21) | + (0ULL << 20) | + (((glb_ptr>> 0)&0x7FULL) << 13) | + (1ULL << 6) | + (0x0ULL << 0); + + patch->patch_data_size = 32; + + make_ia64_bundle(buf, movl, (glb_ptr>>22)&0x1FFFFFFFFFFULL, nop, 5); + for (int i = 0 ; i < 16 ; ++i) { + patch->patch_data[16-i-1] = buf[i]; + } + + make_ia64_bundle(buf, brl, ((imm64>>24)&0x7FFFFFFFFFULL)<<2, nop, 5); + for (int i = 0 ; i < 16 ; ++i) { + patch->patch_data[32-i-1] = buf[i]; + } + } +#endif + } + + mca_base_patcher_patch_apply_binary (patch); + + return OPAL_SUCCESS; +} + +/* end of #if defined(__i386__) || defined(__x86_64__) || defined(__ia64__) */ +// ------------------------------------------------- PPC equivalent: +#elif (OPAL_ASSEMBLY_ARCH == OPAL_POWERPC32) || (OPAL_ASSEMBLY_ARCH == OPAL_POWERPC64) + +// PowerPC instructions used in patching +// Reference: "PowerPC User Instruction Set Architecture" +static unsigned int addis(unsigned int RT, unsigned int RS, unsigned int UI) { + return (15<<26) + (RT<<21) + (RS<<16) + (UI&0xffff); +} +static unsigned int ori(unsigned int RT, unsigned int RS, unsigned int UI) { + return (24<<26) + (RS<<21) + (RT<<16) + (UI&0xffff); +} +static unsigned int oris(unsigned int RT, unsigned int RS, unsigned int UI) { + return (25<<26) + (RS<<21) + (RT<<16) + (UI&0xffff); +} +static unsigned int mtspr(unsigned int SPR, unsigned int RS) { + return (31<<26) + (RS<<21) + ((SPR&0x1f)<<16) + ((SPR>>5)<<11) + (467<<1); +} +static unsigned int bcctr(unsigned int BO, unsigned int BI, unsigned int BH) { + return (19<<26) + (BO<<21) + (BI<<16) + (BH<<11) + (528<<1); +} +static unsigned int rldicr(unsigned int RT, unsigned int RS, unsigned int SH, unsigned int MB) +{ + return (30<<26) + (RS<<21) + (RT<<16) + ((SH&0x1f)<<11) + ((SH>>5)<<1) + + ((MB&0x1f)<<6) + ((MB>>5)<<5) + (1<<2); +} + +static int +PatchLoadImm(uintptr_t addr, unsigned int reg, size_t value) +{ +#if (OPAL_ASSEMBLY_ARCH == OPAL_POWERPC64) + *(unsigned int *) (addr + 0) = addis ( reg, 0, (value >> 48)); + *(unsigned int *) (addr + 4) = ori ( reg, reg, (value >> 32)); + *(unsigned int *) (addr + 8) = rldicr( reg, reg, 32, 31); + *(unsigned int *) (addr +12) = oris ( reg, reg, (value >> 16)); + *(unsigned int *) (addr +16) = ori ( reg, reg, (value >> 0)); + return 20; +#else + *(unsigned int *) (addr + 0) = addis ( reg, 0, (value >> 16)); + *(unsigned int *) (addr + 4) = ori ( reg, reg, (value >> 0)); + return 8; +#endif +} + + +static int mca_patcher_overwrite_apply_patch (mca_patcher_base_patch_t *patch) +{ + uintptr_t sys_addr, hook_addr; + int offset, rc; + + // get system function address + sys_addr = mca_patcher_base_addr_text(patch->patch_orig); + hook_addr = mca_patcher_base_addr_text(patch->patch_value); + +// Patch for hook function: +#if (OPAL_ASSEMBLY_ARCH == OPAL_POWERPC64) + rc = mca_patcher_base_patch_hook (&mca_patcher_overwrite_module, hook_addr); + if (OPAL_SUCCESS != rc) { + return rc; + } + +#if defined(_CALL_ELF) && (_CALL_ELF == 2) + sys_addr += 8; + hook_addr += 8; +#endif /* _CALL_ELF == 2*/ +#endif + + // Patch for system function: + // generate patch code + // r11 is a volatile register according to PowerPC EABI + const unsigned int gr = 11; + offset = PatchLoadImm ((uintptr_t) patch->patch_data, gr, hook_addr); + *(unsigned int *) (patch->patch_data + offset + 0) = mtspr (9, gr); // 9 = CTR + *(unsigned int *) (patch->patch_data + offset + 4) = bcctr (20, 0, 0);// 20 = always + patch->patch_data_size = offset + 8; + patch->patch_orig = sys_addr; + + mca_base_patcher_patch_apply_binary (patch); + + return OPAL_SUCCESS; +} + +#endif + +static int mca_patcher_overwrite_patch_address (uintptr_t sys_addr, uintptr_t hook_addr) +{ + mca_patcher_base_patch_t *patch; + int rc; + + patch = OBJ_NEW(mca_patcher_base_patch_t); + if (OPAL_UNLIKELY(NULL == patch)) { + return OPAL_ERR_OUT_OF_RESOURCE; + } + + patch->patch_orig = sys_addr; + patch->patch_value = hook_addr; + + opal_mutex_lock (&mca_patcher_overwrite_module.patch_list_mutex); + do { + rc = mca_patcher_overwrite_apply_patch (patch); + if (OPAL_SUCCESS != rc) { + break; + } + + opal_list_append (&mca_patcher_overwrite_module.patch_list, &patch->super); + } while (0); + + opal_mutex_unlock (&mca_patcher_overwrite_module.patch_list_mutex); + + return OPAL_SUCCESS; +} + +static int mca_patcher_overwrite_patch_symbol (const char *func_symbol_name, uintptr_t func_new_addr, + uintptr_t *func_old_addr) +{ + void *sym_addr; + char *error; + uintptr_t old_addr; + + /* NTH: might want to update opal/mca/dl to handle lookups in the default + * handle. */ + sym_addr = dlsym (RTLD_NEXT, func_symbol_name); + if (NULL == sym_addr) { + sym_addr = dlsym(RTLD_DEFAULT, func_symbol_name); + if ( (sym_addr == NULL) && ((error = dlerror()) != NULL) ) { + opal_output(-1, "error locating symbol %s to patch. %s", func_symbol_name, + error); + return OPAL_ERR_NOT_FOUND; + } + } + + old_addr = (unsigned long)sym_addr; + +#if (OPAL_ASSEMBLY_ARCH == OPAL_IA64) + /* On IA64 addresses are all indirect */ + func_new_addr = *(unsigned long *)func_new_addr; + old_addr = *(unsigned long *) old_addr; +#endif + + if (func_old_addr) { + /* we will be overwritting part of the original function. do not return + * its address */ + *func_old_addr = 0; + } + + return mca_patcher_overwrite_patch_address (old_addr, func_new_addr); +} + +mca_patcher_base_module_t mca_patcher_overwrite_module = { + .patch_symbol = mca_patcher_overwrite_patch_symbol, + .patch_address = mca_patcher_overwrite_patch_address, +}; diff --git a/opal/mca/patcher/patcher.h b/opal/mca/patcher/patcher.h new file mode 100644 index 00000000000..8e8d13a1c93 --- /dev/null +++ b/opal/mca/patcher/patcher.h @@ -0,0 +1,121 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ +/* + * Copyright (c) 2016 Los Alamos National Security, LLC. All rights + * reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#ifndef OPAL_MCA_PATCHER_PATCHER_H +#define OPAL_MCA_PATCHER_PATCHER_H + +#include "opal_config.h" + +#include "opal/mca/mca.h" +#include "opal/mca/base/base.h" +#include "opal/class/opal_list.h" + +/* Any function being patched in as a hook must use SYMBOLPATCH_BEGIN at the top, + * and SYMBOLPATCH_END before it returns (this is just for PPC). */ + +#if (OPAL_ASSEMBLY_ARCH == OPAL_POWERPC64) + +/* special processing for ppc64 to save and restore TOC (r2) + * Reference: "64-bit PowerPC ELF Application Binary Interface Supplement 1.9" */ +#define OPAL_PATCHER_BEGIN \ + unsigned long toc_save; \ + asm volatile ("std 2, %0" : "=m" (toc_save)); \ + asm volatile ("nop; nop; nop; nop; nop"); +#define OPAL_PATCHER_END \ + asm volatile ("ld 2, %0" : : "m" (toc_save)); + +#else /* !__PPC64__ */ + +#define OPAL_PATCHER_BEGIN +#define OPAL_PATCHER_END + +#endif + +/** + * Make any calls to the named function redirect to a new function + * + * @param[in] func_symbol_name function to hook + * @param[in] func_new_addr function pointer of hook + * @param[out] func_old_addr address of func_symbol_name + * + * This function redirects all calls to the function func_symbol_name to + * the function pointer func_new_addr. If it is possible for the hook + * function to call the original function the patcher module will return + * the old function's address in func_old_addr. + */ +typedef int (*mca_patcher_base_patch_symbol_fn_t)(const char *func_symbol_name, uintptr_t func_new_addr, + uintptr_t *func_old_addr); + +/** + * Make any calls to a function redirect to a new function + * + * @param[in] func_symbol_name function to hook + * @param[in] func_new_addr function pointer of hook + * @param[out] func_old_addr address of func_symbol_name + * + * This function redirects all calls to the function at func_addr to + * the function pointer func_new_addr. + */ +typedef int (*mca_patcher_base_patch_address_fn_t)(uintptr_t func_addr, uintptr_t func_new_addr); + +/** + * Set up the patcher module + */ +typedef int (*mca_patcher_base_init_fn_t) (void); + +/** + * Finalize the patcher module + */ +typedef int (*mca_patcher_base_fini_fn_t) (void); + +/** + * Structure for patcher modules. + */ +typedef struct mca_patcher_base_module_t { + mca_base_module_t super; + /** list of patches */ + opal_list_t patch_list; + /** lock for patch list */ + opal_mutex_t patch_list_mutex; + /** function to call if the patcher module is used. can + * be NULL. */ + mca_patcher_base_init_fn_t patch_init; + /** function to call when patcher is unloaded. this function + * MUST clean up all active patches. can be NULL. */ + mca_patcher_base_fini_fn_t patch_fini; + /** hook a symbol. may be NULL */ + mca_patcher_base_patch_symbol_fn_t patch_symbol; + /** hook a function pointer. may be NULL */ + mca_patcher_base_patch_address_fn_t patch_address; +} mca_patcher_base_module_t; + + +OPAL_DECLSPEC extern mca_patcher_base_module_t *opal_patcher; + +/** + * Structure for patcher components. + */ +typedef struct mca_patcher_base_component_1_0_0_t { + /** MCA base component */ + mca_base_component_t patcherc_version; + /** MCA base data */ + mca_base_component_data_t patcherc_data; +} mca_patcher_base_component_1_0_0_t; + +typedef mca_patcher_base_component_1_0_0_t mca_patcher_base_component_t; + +/* + * Macro for use in components that are of type patcher + */ +#define OPAL_PATCHER_BASE_VERSION_1_0_0 \ + OPAL_MCA_BASE_VERSION_2_1_0("patcher", 1, 0, 0) + +#endif /* OPAL_MCA_PATCHER_PATCHER_H */ diff --git a/opal/mca/pmix/Makefile.am b/opal/mca/pmix/Makefile.am index 9726f810a88..0d71f702cdb 100644 --- a/opal/mca/pmix/Makefile.am +++ b/opal/mca/pmix/Makefile.am @@ -1,9 +1,9 @@ # -# Copyright (c) 2014 Intel, Inc. All rights reserved. +# Copyright (c) 2014 Intel, Inc. All rights reserved. # $COPYRIGHT$ -# +# # Additional copyrights may follow -# +# # $HEADER$ # @@ -17,7 +17,7 @@ libmca_pmix_la_SOURCES = dist_opaldata_DATA = # local files -headers = pmix.h +headers = pmix.h pmix_types.h pmix_server.h libmca_pmix_la_SOURCES += $(headers) # Conditionally install the header files diff --git a/opal/mca/pmix/base/Makefile.am b/opal/mca/pmix/base/Makefile.am index 51aeb5e83f8..3af899e2b02 100644 --- a/opal/mca/pmix/base/Makefile.am +++ b/opal/mca/pmix/base/Makefile.am @@ -1,11 +1,11 @@ # -# Copyright (c) 2010 Cisco Systems, Inc. All rights reserved. -# Copyright (c) 2012-2013 Los Alamos National Security, Inc. All rights reserved. -# Copyright (c) 2014 Intel, Inc. All rights reserved. +# Copyright (c) 2010 Cisco Systems, Inc. All rights reserved. +# Copyright (c) 2012-2013 Los Alamos National Security, Inc. All rights reserved. +# Copyright (c) 2014-2015 Intel, Inc. All rights reserved. # $COPYRIGHT$ -# +# # Additional copyrights may follow -# +# # $HEADER$ # @@ -13,9 +13,11 @@ dist_opaldata_DATA += base/help-pmix-base.txt headers += \ base/base.h \ - base/pmix_base_fns.h + base/pmix_base_fns.h \ + base/pmix_base_hash.h libmca_pmix_la_SOURCES += \ base/pmix_base_frame.c \ base/pmix_base_select.c \ - base/pmix_base_fns.c + base/pmix_base_fns.c \ + base/pmix_base_hash.c diff --git a/opal/mca/pmix/base/base.h b/opal/mca/pmix/base/base.h index 65fa670e6dc..c1aec6e4e3a 100644 --- a/opal/mca/pmix/base/base.h +++ b/opal/mca/pmix/base/base.h @@ -1,9 +1,9 @@ /* - * Copyright (c) 2014 Intel, Inc. All rights reserved. + * Copyright (c) 2014-2016 Intel, Inc. All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ /** @file: @@ -18,6 +18,7 @@ #include "opal/mca/mca.h" #include "opal/mca/base/mca_base_framework.h" +#include "opal/mca/pmix/pmix_types.h" #include "opal/mca/pmix/pmix.h" BEGIN_C_DECLS @@ -31,10 +32,28 @@ OPAL_DECLSPEC int opal_pmix_base_select(void); OPAL_DECLSPEC extern bool opal_pmix_base_allow_delayed_server; -OPAL_DECLSPEC void opal_pmix_base_register_handler(opal_pmix_errhandler_fn_t err); -OPAL_DECLSPEC void opal_pmix_base_deregister_handler(void); -OPAL_DECLSPEC void opal_pmix_base_errhandler(int error); - +OPAL_DECLSPEC void opal_pmix_base_register_handler(opal_list_t *info, + opal_pmix_notification_fn_t errhandler, + opal_pmix_errhandler_reg_cbfunc_t cbfunc, + void *cbdata); +OPAL_DECLSPEC void opal_pmix_base_deregister_handler(int errhandler, + opal_pmix_op_cbfunc_t cbfunc, + void *cbdata); +OPAL_DECLSPEC void opal_pmix_base_errhandler(int status, + opal_list_t *procs, + opal_list_t *info, + opal_pmix_release_cbfunc_t cbfunc, void *cbdata); +OPAL_DECLSPEC int opal_pmix_base_exchange(opal_value_t *info, + opal_pmix_pdata_t *pdat, + int timeout); + +OPAL_DECLSPEC void opal_pmix_base_set_evbase(opal_event_base_t *evbase); + +typedef struct { + opal_event_base_t *evbase; +} opal_pmix_base_t; + +extern opal_pmix_base_t opal_pmix_base; END_C_DECLS diff --git a/opal/mca/pmix/base/help-pmix-base.txt b/opal/mca/pmix/base/help-pmix-base.txt index f603f2eaf31..0b8ce5860eb 100644 --- a/opal/mca/pmix/base/help-pmix-base.txt +++ b/opal/mca/pmix/base/help-pmix-base.txt @@ -1,6 +1,8 @@ -*- text -*- # -# Copyright (c) 2010 Cisco Systems, Inc. All rights reserved. +# Copyright (c) 2010 Cisco Systems, Inc. All rights reserved. +# Copyright (c) 2016 Los Alamos National Security, LLC. All rights +# reserved. # # $COPYRIGHT$ # @@ -8,12 +10,11 @@ # # $HEADER$ # -# This is the US/English general help file for OPAL Errmgr HNP module. +# This is the US/English general help file for OPAL PMIx base. # -[errmgr-hnp:unknown-job-error] -An error has occurred in an unknown job. This generally should not happen -except due to an internal OPAL error. +[pmix2-init-failed] +PMI2_Init failed to intialize. Return code: %d -Job state: %s - -This information should probably be repopald to the OMPI developers. +[pmix2-init-returned-bad-values] +PMI2_Init was intialized but negative values for job size and/or +rank was returned. diff --git a/opal/mca/pmix/base/pmix_base_fns.c b/opal/mca/pmix/base/pmix_base_fns.c index 30d091d181a..03521b8ebef 100644 --- a/opal/mca/pmix/base/pmix_base_fns.c +++ b/opal/mca/pmix/base/pmix_base_fns.c @@ -1,14 +1,16 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ /* * Copyright (c) 2012-2015 Los Alamos National Security, LLC. All rights - * reserved. - * Copyright (c) 2014 Intel, Inc. All rights reserved. - * Copyright (c) 2014-2015 Research Organization for Information Science + * reserved. + * Copyright (c) 2014-2016 Intel, Inc. All rights reserved. + * Copyright (c) 2014-2016 Research Organization for Information Science * and Technology (RIST). All rights reserved. + * Copyright (c) 2016 Mellanox Technologies, Inc. + * All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ * */ @@ -21,6 +23,9 @@ #include #include +#ifdef HAVE_UNISTD_H +#include +#endif #include "opal_stdint.h" #include "opal/class/opal_pointer_array.h" @@ -28,31 +33,181 @@ #include "opal/util/output.h" #include "opal/util/proc.h" #include "opal/util/show_help.h" +#include "opal/errhandler/opal_errhandler.h" #include "opal/mca/pmix/base/base.h" #include "opal/mca/pmix/base/pmix_base_fns.h" +#include "opal/mca/pmix/base/pmix_base_hash.h" #define OPAL_PMI_PAD 10 -static opal_pmix_errhandler_fn_t errhandler = NULL; +void opal_pmix_base_set_evbase(opal_event_base_t *evbase) +{ + opal_pmix_base.evbase = evbase; +} + +/******** ERRHANDLER SUPPORT FOR COMPONENTS THAT + ******** DO NOT NATIVELY SUPPORT IT + ********/ +static opal_pmix_notification_fn_t errhandler = NULL; -void opal_pmix_base_register_handler(opal_pmix_errhandler_fn_t err) +void opal_pmix_base_register_handler(opal_list_t *info, + opal_pmix_notification_fn_t err, + opal_pmix_errhandler_reg_cbfunc_t cbfunc, + void *cbdata) { errhandler = err; + if (NULL != cbfunc) { + cbfunc(OPAL_SUCCESS, 0, cbdata); + } } -void opal_pmix_base_errhandler(int error) +void opal_pmix_base_errhandler(int status, + opal_list_t *procs, + opal_list_t *info, + opal_pmix_release_cbfunc_t cbfunc, void *cbdata) { if (NULL != errhandler) { - errhandler(error); + errhandler(status, procs, info, cbfunc, cbdata); + } else { + opal_invoke_errhandler(OPAL_ERROR, NULL); } } -void opal_pmix_base_deregister_handler(void) +void opal_pmix_base_deregister_handler(int errid, + opal_pmix_op_cbfunc_t cbfunc, + void *cbdata) { errhandler = NULL; + if (NULL != cbfunc) { + cbfunc(OPAL_SUCCESS, cbdata); + } +} + +struct lookup_caddy_t { + volatile bool active; + int status; + opal_pmix_pdata_t *pdat; +}; + +/******** DATA EXCHANGE ********/ +static void lookup_cbfunc(int status, opal_list_t *data, void *cbdata) +{ + struct lookup_caddy_t *cd = (struct lookup_caddy_t*)cbdata; + cd->status = status; + if (OPAL_SUCCESS == status && NULL != data) { + opal_pmix_pdata_t *p = (opal_pmix_pdata_t*)opal_list_get_first(data); + if (NULL != p) { + cd->pdat->proc = p->proc; + if (p->value.type == cd->pdat->value.type) { + (void)opal_value_xfer(&cd->pdat->value, &p->value); + } + } + } + cd->active = false; +} + +int opal_pmix_base_exchange(opal_value_t *indat, + opal_pmix_pdata_t *outdat, + int timeout) +{ + int rc; + opal_list_t ilist, mlist; + opal_value_t *info; + opal_pmix_pdata_t *pdat; + struct lookup_caddy_t caddy; + char **keys; + + /* protect the incoming value */ + opal_dss.copy((void**)&info, indat, OPAL_VALUE); + OBJ_CONSTRUCT(&ilist, opal_list_t); + opal_list_append(&ilist, &info->super); + /* tell the server to delete upon read */ + info = OBJ_NEW(opal_value_t); + info->key = strdup(OPAL_PMIX_PERSISTENCE); + info->type = OPAL_INT; + info->data.integer = OPAL_PMIX_PERSIST_FIRST_READ; + opal_list_append(&ilist, &info->super); + + /* publish it with "session" scope */ + rc = opal_pmix.publish(&ilist); + OPAL_LIST_DESTRUCT(&ilist); + if (OPAL_SUCCESS != rc) { + OPAL_ERROR_LOG(rc); + return rc; + } + + /* lookup the other side's info - if a non-blocking form + * of lookup isn't available, then we use the blocking + * form and trust that the underlying system will WAIT + * until the other side publishes its data */ + OBJ_CONSTRUCT(&ilist, opal_list_t); + pdat = OBJ_NEW(opal_pmix_pdata_t); + pdat->value.key = strdup(outdat->value.key); + pdat->value.type = outdat->value.type; + opal_list_append(&ilist, &pdat->super); + /* setup the constraints */ + OBJ_CONSTRUCT(&mlist, opal_list_t); + /* tell it to wait for the data to arrive */ + info = OBJ_NEW(opal_value_t); + info->key = strdup(OPAL_PMIX_WAIT); + info->type = OPAL_BOOL; + info->data.flag = true; + opal_list_append(&mlist, &info->super); + /* give it a decent timeout as we don't know when + * the other side will publish - it doesn't + * have to be simultaneous */ + info = OBJ_NEW(opal_value_t); + info->key = strdup(OPAL_PMIX_TIMEOUT); + info->type = OPAL_INT; + info->data.integer = timeout; + opal_list_append(&mlist, &info->super); + + /* if a non-blocking version of lookup isn't + * available, then use the blocking version */ + if (NULL == opal_pmix.lookup_nb) { + rc = opal_pmix.lookup(&ilist, &mlist); + OPAL_LIST_DESTRUCT(&mlist); + if (OPAL_SUCCESS != rc) { + OPAL_ERROR_LOG(rc); + OPAL_LIST_DESTRUCT(&ilist); + return rc; + } + } else { + caddy.active = true; + caddy.pdat = pdat; + keys = NULL; + opal_argv_append_nosize(&keys, pdat->value.key); + rc = opal_pmix.lookup_nb(keys, &mlist, lookup_cbfunc, &caddy); + if (OPAL_SUCCESS != rc) { + OPAL_ERROR_LOG(rc); + OPAL_LIST_DESTRUCT(&ilist); + OPAL_LIST_DESTRUCT(&mlist); + opal_argv_free(keys); + return rc; + } + while (caddy.active) { + usleep(10); + } + opal_argv_free(keys); + OPAL_LIST_DESTRUCT(&mlist); + if (OPAL_SUCCESS != caddy.status) { + OPAL_ERROR_LOG(caddy.status); + OPAL_LIST_DESTRUCT(&ilist); + return caddy.status; + } + } + + /* pass back the result */ + outdat->proc = pdat->proc; + rc = opal_value_xfer(&outdat->value, &pdat->value); + OPAL_LIST_DESTRUCT(&ilist); + return rc; } + +/******** DATA CONSOLIDATION ********/ + static char* setup_key(const opal_process_name_t* name, const char *key, int pmix_keylen_max); static char *pmi_encode(const void *val, size_t vallen); static uint8_t *pmi_decode (const char *data, size_t *retlen); @@ -380,7 +535,7 @@ int opal_pmix_base_cache_keys_locally(const opal_process_name_t* id, const char* /* first try to fetch data from data storage */ OBJ_CONSTRUCT(&values, opal_list_t); - rc = opal_dstore.fetch(opal_dstore_internal, id, key, &values); + rc = opal_pmix_base_fetch(id, key, &values); if (OPAL_SUCCESS == rc) { kv = (opal_value_t*)opal_list_get_first(&values); /* create the copy */ @@ -404,7 +559,7 @@ int opal_pmix_base_cache_keys_locally(const opal_process_name_t* id, const char* } /* search for each key in the decoded data */ - for (offset = 0 ; offset < len && '\0' != tmp_val[offset] ; ) { + for (offset = 0 ; offset < len ; ) { /* type */ tmp = tmp_val + offset + strlen (tmp_val + offset) + 1; /* size */ @@ -475,10 +630,9 @@ int opal_pmix_base_cache_keys_locally(const opal_process_name_t* id, const char* return OPAL_ERROR; } /* store data in local hash table */ - if (OPAL_SUCCESS != (rc = opal_dstore.store(opal_dstore_internal, id, kv))) { + if (OPAL_SUCCESS != (rc = opal_pmix_base_store(id, kv))) { OPAL_ERROR_LOG(rc); } - /* keep going and cache everything locally */ offset = (size_t) (tmp3 - tmp_val) + size; if (0 == strcmp(kv->key, key)) { @@ -518,11 +672,11 @@ static inline unsigned char pmi_base64_encsym (unsigned char value) { assert (value < 64); if (value < 26) { - return 'A' + value; + return 'A' + value; } else if (value < 52) { - return 'a' + (value - 26); + return 'a' + (value - 26); } else if (value < 62) { - return '0' + (value - 52); + return '0' + (value - 52); } return (62 == value) ? '+' : '/'; @@ -530,19 +684,18 @@ static inline unsigned char pmi_base64_encsym (unsigned char value) { static inline unsigned char pmi_base64_decsym (unsigned char value) { if ('+' == value) { - return 62; + return 62; } else if ('/' == value) { - return 63; + return 63; } else if (' ' == value) { - return 64; + return 64; } else if (value <= '9') { - return (value - '0') + 52; + return (value - '0') + 52; } else if (value <= 'Z') { - return (value - 'A'); + return (value - 'A'); } else if (value <= 'z') { - return (value - 'a') + 26; + return (value - 'a') + 26; } - return 64; } @@ -564,12 +717,12 @@ static inline int pmi_base64_decode_block (const char in[4], unsigned char out[3 out[0] = in_dec[0] << 2 | in_dec[1] >> 4; if (64 == in_dec[2]) { - return 1; + return 1; } out[1] = in_dec[1] << 4 | in_dec[2] >> 2; if (64 == in_dec[3]) { - return 2; + return 2; } out[2] = ((in_dec[2] << 6) & 0xc0) | in_dec[3]; @@ -578,14 +731,14 @@ static inline int pmi_base64_decode_block (const char in[4], unsigned char out[3 /* PMI only supports strings. For now, do a simple base64. */ -static char *pmi_encode(const void *val, size_t vallen) +static char *pmi_encode(const void *val, size_t vallen) { char *outdata, *tmp; size_t i; outdata = calloc (((2 + vallen) * 4) / 3 + 2, 1); if (NULL == outdata) { - return NULL; + return NULL; } for (i = 0, tmp = outdata ; i < vallen ; i += 3, tmp += 4) { @@ -597,9 +750,9 @@ static char *pmi_encode(const void *val, size_t vallen) return outdata; } -static uint8_t *pmi_decode (const char *data, size_t *retlen) +static uint8_t *pmi_decode (const char *data, size_t *retlen) { - size_t input_len = (strlen (data) - 1) / 4; + size_t input_len = strlen (data) / 4; unsigned char *ret; int out_len; size_t i; @@ -607,16 +760,13 @@ static uint8_t *pmi_decode (const char *data, size_t *retlen) /* default */ *retlen = 0; - ret = calloc (1, 3 * input_len + 1); + ret = calloc (1, 3 * input_len); if (NULL == ret) { return ret; } - for (i = 0, out_len = 0 ; i < input_len ; i++, data += 4) { - out_len += pmi_base64_decode_block(data, ret + 3 * i); + out_len += pmi_base64_decode_block(data, ret + 3 * i); } - - ret[out_len] = '\0'; *retlen = out_len; return ret; } diff --git a/opal/mca/pmix/base/pmix_base_fns.h b/opal/mca/pmix/base/pmix_base_fns.h index a00bc65d068..e46bee0e3df 100644 --- a/opal/mca/pmix/base/pmix_base_fns.h +++ b/opal/mca/pmix/base/pmix_base_fns.h @@ -3,9 +3,9 @@ * Copyright (c) 2014 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ diff --git a/opal/mca/pmix/base/pmix_base_frame.c b/opal/mca/pmix/base/pmix_base_frame.c index bd97c870ccf..9eacc571763 100644 --- a/opal/mca/pmix/base/pmix_base_frame.c +++ b/opal/mca/pmix/base/pmix_base_frame.c @@ -1,6 +1,6 @@ /* - * Copyright (c) 2014 Intel, Inc. All rights reserved. - * Copyright (c) 2015 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2014-2015 Intel, Inc. All rights reserved. + * Copyright (c) 2015 Cisco Systems, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -13,6 +13,7 @@ #include "opal/constants.h" #include "opal/mca/mca.h" +#include "opal/util/argv.h" #include "opal/util/output.h" #include "opal/mca/base/base.h" @@ -31,18 +32,28 @@ /* Note that this initializer is important -- do not remove it! See https://github.com/open-mpi/ompi/issues/375 for details. */ opal_pmix_base_module_t opal_pmix = { 0 }; -bool opal_pmix_use_collective = false; -bool opal_pmix_base_allow_delayed_server = false; +bool opal_pmix_collect_all_data = true; +int opal_pmix_verbose_output = -1; +bool opal_pmix_base_async_modex = false; +opal_pmix_base_t opal_pmix_base = {0}; static int opal_pmix_base_frame_register(mca_base_register_flag_t flags) { + opal_pmix_base_async_modex = false; + (void) mca_base_var_register("opal", "pmix", "base", "async_modex", "Use asynchronous modex mode", + MCA_BASE_VAR_TYPE_BOOL, NULL, 0, 0, OPAL_INFO_LVL_9, + MCA_BASE_VAR_SCOPE_READONLY, &opal_pmix_base_async_modex); + opal_pmix_collect_all_data = true; + (void) mca_base_var_register("opal", "pmix", "base", "collect_data", "Collect all data during modex", + MCA_BASE_VAR_TYPE_BOOL, NULL, 0, 0, OPAL_INFO_LVL_9, + MCA_BASE_VAR_SCOPE_READONLY, &opal_pmix_collect_all_data); return OPAL_SUCCESS; } static int opal_pmix_base_frame_close(void) { int rc; - + rc = mca_base_framework_components_close(&opal_pmix_base_framework, NULL); /* reset the opal_pmix function pointers to NULL */ memset(&opal_pmix, 0, sizeof(opal_pmix)); @@ -52,11 +63,13 @@ static int opal_pmix_base_frame_close(void) static int opal_pmix_base_frame_open(mca_base_open_flag_t flags) { int rc; - + /* Open up all available components */ rc = mca_base_framework_components_open(&opal_pmix_base_framework, flags); /* ensure the function pointers are NULL */ memset(&opal_pmix, 0, sizeof(opal_pmix)); + /* pass across the verbosity */ + opal_pmix_verbose_output = opal_pmix_base_framework.framework_output; return rc; } @@ -66,7 +79,60 @@ MCA_BASE_FRAMEWORK_DECLARE(opal, pmix, "OPAL PMI Client Framework", opal_pmix_base_frame_close, mca_pmix_base_static_components, 0); -OBJ_CLASS_INSTANCE(pmix_info_t, +/**** PMIX FRAMEWORK OBJECTS ****/ +static void lkcon(opal_pmix_pdata_t *p) +{ + p->proc.jobid = OPAL_JOBID_INVALID; + p->proc.vpid = OPAL_VPID_INVALID; + OBJ_CONSTRUCT(&p->value, opal_value_t); +} +static void lkdes(opal_pmix_pdata_t *p) +{ + OBJ_DESTRUCT(&p->value); +} +OBJ_CLASS_INSTANCE(opal_pmix_pdata_t, opal_list_item_t, - NULL, NULL); + lkcon, lkdes); +static void mdcon(opal_pmix_modex_data_t *p) +{ + p->proc.jobid = OPAL_JOBID_INVALID; + p->proc.vpid = OPAL_VPID_INVALID; + p->blob = NULL; + p->size = 0; +} +static void mddes(opal_pmix_modex_data_t *p) +{ + if (NULL != p->blob) { + free(p->blob); + } +} +OBJ_CLASS_INSTANCE(opal_pmix_modex_data_t, + opal_list_item_t, + mdcon, mddes); + +static void apcon(opal_pmix_app_t *p) +{ + p->cmd = NULL; + p->argc = 0; + p->argv = NULL; + p->env = NULL; + p->maxprocs = 0; + OBJ_CONSTRUCT(&p->info, opal_list_t); +} +static void apdes(opal_pmix_app_t *p) +{ + if (NULL != p->cmd) { + free(p->cmd); + } + if (NULL != p->argv) { + opal_argv_free(p->argv); + } + if (NULL != p->env) { + opal_argv_free(p->env); + } + OPAL_LIST_DESTRUCT(&p->info); +} +OBJ_CLASS_INSTANCE(opal_pmix_app_t, + opal_list_item_t, + apcon, apdes); diff --git a/opal/mca/pmix/base/pmix_base_hash.c b/opal/mca/pmix/base/pmix_base_hash.c new file mode 100644 index 00000000000..fff708e7bba --- /dev/null +++ b/opal/mca/pmix/base/pmix_base_hash.c @@ -0,0 +1,299 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ +/* + * Copyright (c) 2010 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2004-2011 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2011-2014 Los Alamos National Security, LLC. All rights + * reserved. + * Copyright (c) 2014-2015 Intel, Inc. All rights reserved. + * Copyright (c) 2014 Research Organization for Information Science + * and Technology (RIST). All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + * + */ + +#include "opal_config.h" +#include "opal/constants.h" + +#include +#include + +#include "opal_stdint.h" +#include "opal/class/opal_hash_table.h" +#include "opal/class/opal_pointer_array.h" +#include "opal/dss/dss_types.h" +#include "opal/util/error.h" +#include "opal/util/output.h" +#include "opal/util/proc.h" +#include "opal/util/show_help.h" + +#include "opal/mca/pmix/base/base.h" +#include "opal/mca/pmix/base/pmix_base_hash.h" + +/** + * Data for a particular opal process + * The name association is maintained in the + * proc_data hash table. + */ +typedef struct { + /** Structure can be put on lists (including in hash tables) */ + opal_list_item_t super; + bool loaded; + /* List of opal_value_t structures containing all data + received from this process, sorted by key. */ + opal_list_t data; +} opal_pmix_proc_data_t; +static void proc_data_construct(opal_pmix_proc_data_t *ptr) +{ + ptr->loaded = false; + OBJ_CONSTRUCT(&ptr->data, opal_list_t); +} + +static void proc_data_destruct(opal_pmix_proc_data_t *ptr) +{ + OPAL_LIST_DESTRUCT(&ptr->data); +} +OBJ_CLASS_INSTANCE(opal_pmix_proc_data_t, + opal_list_item_t, + proc_data_construct, + proc_data_destruct); + +/** + * Find data for a given key in a given proc_data_t + * container. + */ +static opal_value_t* lookup_keyval(opal_pmix_proc_data_t *proc_data, + const char *key) +{ + opal_value_t *kv; + + OPAL_LIST_FOREACH(kv, &proc_data->data, opal_value_t) { + if (0 == strcmp(key, kv->key)) { + return kv; + } + } + return NULL; +} + +/** + * Find proc_data_t container associated with given + * opal_process_name_t. + */ +static opal_pmix_proc_data_t* lookup_proc(opal_proc_table_t *ptable, + opal_process_name_t id, bool create) +{ + opal_pmix_proc_data_t *proc_data = NULL; + + opal_proc_table_get_value(ptable, id, (void**)&proc_data); + if (NULL == proc_data && create) { + proc_data = OBJ_NEW(opal_pmix_proc_data_t); + if (NULL == proc_data) { + opal_output(0, "pmix:hash:lookup_proc: unable to allocate proc_data_t\n"); + return NULL; + } + opal_proc_table_set_value(ptable, id, proc_data); + } + + return proc_data; +} + + +static opal_proc_table_t ptable; + +/* Initialize our hash table */ +void opal_pmix_base_hash_init(void) +{ + OBJ_CONSTRUCT(&ptable, opal_proc_table_t); + opal_proc_table_init(&ptable, 16, 256); +} + +void opal_pmix_base_hash_finalize(void) +{ + opal_pmix_proc_data_t *proc_data; + opal_process_name_t key; + void *node1, *node2; + + /* to assist in getting a clean valgrind, cycle thru the hash table + * and release all data stored in it + */ + if (OPAL_SUCCESS == opal_proc_table_get_first_key(&ptable, &key, + (void**)&proc_data, + &node1, &node2)) { + if (NULL != proc_data) { + OBJ_RELEASE(proc_data); + } + while (OPAL_SUCCESS == opal_proc_table_get_next_key(&ptable, &key, + (void**)&proc_data, + node1, &node1, + node2, &node2)) { + if (NULL != proc_data) { + OBJ_RELEASE(proc_data); + } + } + } + OBJ_DESTRUCT(&ptable); +} + + + +int opal_pmix_base_store(const opal_process_name_t *id, + opal_value_t *val) +{ + opal_pmix_proc_data_t *proc_data; + opal_value_t *kv; + int rc; + + opal_output_verbose(1, opal_pmix_base_framework.framework_output, + "%s pmix:hash:store storing data for proc %s", + OPAL_NAME_PRINT(OPAL_PROC_MY_NAME), OPAL_NAME_PRINT(*id)); + + /* lookup the proc data object for this proc */ + if (NULL == (proc_data = lookup_proc(&ptable, *id, true))) { + /* unrecoverable error */ + OPAL_OUTPUT_VERBOSE((5, opal_pmix_base_framework.framework_output, + "%s pmix:hash:store: storing data for proc %s unrecoverably failed", + OPAL_NAME_PRINT(OPAL_PROC_MY_NAME), OPAL_NAME_PRINT(*id))); + return OPAL_ERR_OUT_OF_RESOURCE; + } + + /* see if we already have this key in the data - means we are updating + * a pre-existing value + */ + kv = lookup_keyval(proc_data, val->key); +#if OPAL_ENABLE_DEBUG + char *_data_type = opal_dss.lookup_data_type(val->type); + OPAL_OUTPUT_VERBOSE((5, opal_pmix_base_framework.framework_output, + "%s pmix:hash:store: %s key %s[%s] for proc %s", + OPAL_NAME_PRINT(OPAL_PROC_MY_NAME), + (NULL == kv ? "storing" : "updating"), + val->key, _data_type, OPAL_NAME_PRINT(*id))); + free (_data_type); +#endif + + if (NULL != kv) { + opal_list_remove_item(&proc_data->data, &kv->super); + OBJ_RELEASE(kv); + } + /* create the copy */ + if (OPAL_SUCCESS != (rc = opal_dss.copy((void**)&kv, val, OPAL_VALUE))) { + OPAL_ERROR_LOG(rc); + return rc; + } + opal_list_append(&proc_data->data, &kv->super); + + return OPAL_SUCCESS; +} + +int opal_pmix_base_fetch(const opal_process_name_t *id, + const char *key, opal_list_t *kvs) +{ + opal_pmix_proc_data_t *proc_data; + opal_value_t *kv, *knew; + int rc; + + OPAL_OUTPUT_VERBOSE((5, opal_pmix_base_framework.framework_output, + "%s pmix:hash:fetch: searching for key %s on proc %s", + OPAL_NAME_PRINT(OPAL_PROC_MY_NAME), + (NULL == key) ? "NULL" : key, OPAL_NAME_PRINT(*id))); + + /* lookup the proc data object for this proc */ + if (NULL == (proc_data = lookup_proc(&ptable, *id, true))) { + OPAL_OUTPUT_VERBOSE((5, opal_pmix_base_framework.framework_output, + "%s pmix_hash:fetch data for proc %s not found", + OPAL_NAME_PRINT(OPAL_PROC_MY_NAME), + OPAL_NAME_PRINT(*id))); + return OPAL_ERR_NOT_FOUND; + } + + /* if the key is NULL, that we want everything */ + if (NULL == key) { + /* must provide an output list or this makes no sense */ + if (NULL == kvs) { + OPAL_ERROR_LOG(OPAL_ERR_BAD_PARAM); + return OPAL_ERR_BAD_PARAM; + } + OPAL_LIST_FOREACH(kv, &proc_data->data, opal_value_t) { + /* copy the value */ + if (OPAL_SUCCESS != (rc = opal_dss.copy((void**)&knew, kv, OPAL_VALUE))) { + OPAL_ERROR_LOG(rc); + return rc; + } + OPAL_OUTPUT_VERBOSE((5, opal_pmix_base_framework.framework_output, + "%s pmix:hash:fetch: adding data for key %s on proc %s", + OPAL_NAME_PRINT(OPAL_PROC_MY_NAME), + (NULL == kv->key) ? "NULL" : kv->key, + OPAL_NAME_PRINT(*id))); + + /* add it to the output list */ + opal_list_append(kvs, &knew->super); + } + return OPAL_SUCCESS; + } + + /* find the value */ + if (NULL == (kv = lookup_keyval(proc_data, key))) { + OPAL_OUTPUT_VERBOSE((5, opal_pmix_base_framework.framework_output, + "%s pmix_hash:fetch key %s for proc %s not found", + OPAL_NAME_PRINT(OPAL_PROC_MY_NAME), + (NULL == key) ? "NULL" : key, + OPAL_NAME_PRINT(*id))); + return OPAL_ERR_NOT_FOUND; + } + + /* if the user provided a NULL list object, then they + * just wanted to know if the key was present */ + if (NULL == kvs) { + return OPAL_SUCCESS; + } + + /* create the copy */ + if (OPAL_SUCCESS != (rc = opal_dss.copy((void**)&knew, kv, OPAL_VALUE))) { + OPAL_ERROR_LOG(rc); + return rc; + } + /* add it to the output list */ + opal_list_append(kvs, &knew->super); + + return OPAL_SUCCESS; +} + +int opal_pmix_base_remove(const opal_process_name_t *id, const char *key) +{ + opal_pmix_proc_data_t *proc_data; + opal_value_t *kv; + + /* lookup the specified proc */ + if (NULL == (proc_data = lookup_proc(&ptable, *id, false))) { + /* no data for this proc */ + return OPAL_SUCCESS; + } + + /* if key is NULL, remove all data for this proc */ + if (NULL == key) { + while (NULL != (kv = (opal_value_t *) opal_list_remove_first(&proc_data->data))) { + OBJ_RELEASE(kv); + } + /* remove the proc_data object itself from the jtable */ + opal_proc_table_remove_value(&ptable, *id); + /* cleanup */ + OBJ_RELEASE(proc_data); + return OPAL_SUCCESS; + } + + /* remove this item */ + OPAL_LIST_FOREACH(kv, &proc_data->data, opal_value_t) { + if (0 == strcmp(key, kv->key)) { + opal_list_remove_item(&proc_data->data, &kv->super); + OBJ_RELEASE(kv); + break; + } + } + + return OPAL_SUCCESS; +} + diff --git a/opal/mca/pmix/base/pmix_base_hash.h b/opal/mca/pmix/base/pmix_base_hash.h new file mode 100644 index 00000000000..5ab3e0ffa00 --- /dev/null +++ b/opal/mca/pmix/base/pmix_base_hash.h @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2010 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2012 Los Alamos National Security, Inc. All rights reserved. + * Copyright (c) 2014-2015 Intel, Inc. All rights reserved. + * Copyright (c) 2014 Research Organization for Information Science + * and Technology (RIST). All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#ifndef OPAL_PMIX_HASH_H +#define OPAL_PMIX_HASH_H + +#include "opal/class/opal_list.h" +#include "opal/class/opal_hash_table.h" +#include "opal/dss/dss.h" +#include "opal/util/proc.h" + +BEGIN_C_DECLS + +OPAL_DECLSPEC void opal_pmix_base_hash_init(void); +OPAL_DECLSPEC void opal_pmix_base_hash_finalize(void); + +OPAL_DECLSPEC int opal_pmix_base_store(const opal_process_name_t *id, + opal_value_t *val); + +OPAL_DECLSPEC int opal_pmix_base_fetch(const opal_process_name_t *id, + const char *key, opal_list_t *kvs); + +OPAL_DECLSPEC int opal_pmix_base_remove(const opal_process_name_t *id, const char *key); + +END_C_DECLS + +#endif /* OPAL_DSTORE_HASH_H */ diff --git a/opal/mca/pmix/base/pmix_base_select.c b/opal/mca/pmix/base/pmix_base_select.c index 3ad484323f7..0a4009b95c7 100644 --- a/opal/mca/pmix/base/pmix_base_select.c +++ b/opal/mca/pmix/base/pmix_base_select.c @@ -1,9 +1,12 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ /* - * Copyright (c) 2014 Intel, Inc. All rights reserved. + * Copyright (c) 2014-2015 Intel, Inc. All rights reserved. + * Copyright (c) 2015 Los Alamos National Security, LLC. All rights + * reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -22,7 +25,6 @@ int opal_pmix_base_select(void) { - int ret, exit_status = OPAL_SUCCESS; opal_pmix_base_component_t *best_component = NULL; opal_pmix_base_module_t *best_module = NULL; @@ -32,7 +34,7 @@ int opal_pmix_base_select(void) if( OPAL_SUCCESS != mca_base_select("pmix", opal_pmix_base_framework.framework_output, &opal_pmix_base_framework.framework_components, (mca_base_module_t **) &best_module, - (mca_base_component_t **) &best_component) ) { + (mca_base_component_t **) &best_component, NULL) ) { /* notify caller that no available component found */ return OPAL_ERR_NOT_FOUND; } @@ -40,16 +42,9 @@ int opal_pmix_base_select(void) /* Save the winner */ opal_pmix = *best_module; - /* Initialize the winner */ - if (OPAL_SUCCESS != (ret = opal_pmix.init()) ) { - /* connection not available is okay - just means - * that a server hasn't already been defined */ - if (OPAL_ERR_SERVER_NOT_AVAIL == ret) { - exit_status = OPAL_SUCCESS; - } else { - exit_status = ret; - } - } - - return exit_status; + /* do not initialize the module here as the type + * of process determines which init (client or server) + * should be done */ + + return OPAL_SUCCESS; } diff --git a/opal/mca/pmix/cray/Makefile.am b/opal/mca/pmix/cray/Makefile.am index bc86578028e..c1324d1a302 100644 --- a/opal/mca/pmix/cray/Makefile.am +++ b/opal/mca/pmix/cray/Makefile.am @@ -1,9 +1,9 @@ # # Copyright (c) 2014 Intel, Inc. All rights reserved. # $COPYRIGHT$ -# +# # Additional copyrights may follow -# +# # $HEADER$ # diff --git a/opal/mca/pmix/cray/configure.m4 b/opal/mca/pmix/cray/configure.m4 index 9b41c15ff9d..bfadbb47720 100644 --- a/opal/mca/pmix/cray/configure.m4 +++ b/opal/mca/pmix/cray/configure.m4 @@ -2,9 +2,9 @@ # # Copyright (c) 2014 Intel, Inc. All rights reserved. # $COPYRIGHT$ -# +# # Additional copyrights may follow -# +# # $HEADER$ # @@ -17,7 +17,7 @@ AC_DEFUN([MCA_opal_pmix_cray_CONFIG], [ # check specifically for Cray PMI here # OPAL_CHECK_CRAY_PMI([pmix_cray], [pmix_cray_good=1], [pmix_cray_good=0]) - + # Evaluate succeed / fail AS_IF([test "$pmix_cray_good" = 1], [$1 diff --git a/opal/mca/pmix/cray/pmix_cray.c b/opal/mca/pmix/cray/pmix_cray.c index 30b4f1dfcba..5984273129e 100644 --- a/opal/mca/pmix/cray/pmix_cray.c +++ b/opal/mca/pmix/cray/pmix_cray.c @@ -6,7 +6,7 @@ * Copyright (c) 2011-2015 Los Alamos National Security, LLC. All * rights reserved. * Copyright (c) 2013-2015 Intel, Inc. All rights reserved. - * Copyright (c) 2014 Research Organization for Information Science + * Copyright (c) 2014-2016 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * @@ -21,6 +21,7 @@ #include "opal_stdint.h" #include "opal/mca/hwloc/base/base.h" +#include "opal/util/argv.h" #include "opal/util/opal_environ.h" #include "opal/util/output.h" #include "opal/util/proc.h" @@ -32,61 +33,104 @@ #include #include "opal/mca/pmix/base/base.h" +#include "opal/mca/pmix/base/pmix_base_hash.h" #include "pmix_cray.h" +static char cray_pmi_version[128]; + static int cray_init(void); static int cray_fini(void); -static bool cray_initialized(void); -static int cray_abort(int flag, const char msg[]); -static int cray_spawn(int count, const char * cmds[], - int argcs[], const char ** argvs[], - const int maxprocs[], - opal_list_t *info_keyval_vector, - opal_list_t *preput_keyval_vector, - char jobId[], int jobIdSize, - int errors[]); -static int cray_job_connect(const char jobId[]); -static int cray_job_disconnect(const char jobId[]); +static int cray_initialized(void); +static int cray_abort(int flat, const char *msg, + opal_list_t *procs); +static int cray_spawn(opal_list_t *jobinfo, opal_list_t *apps, opal_jobid_t *jobid); +static int cray_spawn_nb(opal_list_t *jobinfo, opal_list_t *apps, + opal_pmix_spawn_cbfunc_t cbfunc, + void *cbdata); +static int cray_job_connect(opal_list_t *procs); +static int cray_job_disconnect(opal_list_t *procs); +static int cray_job_disconnect_nb(opal_list_t *procs, + opal_pmix_op_cbfunc_t cbfunc, + void *cbdata); +static int cray_resolve_peers(const char *nodename, + opal_jobid_t jobid, + opal_list_t *procs); +static int cray_resolve_nodes(opal_jobid_t jobid, char **nodelist); static int cray_put(opal_pmix_scope_t scope, opal_value_t *kv); -static int cray_fence(opal_process_name_t *procs, size_t nprocs); +static int cray_fencenb(opal_list_t *procs, int collect_data, + opal_pmix_op_cbfunc_t cbfunc, void *cbdata); +static int cray_commit(void); static int cray_get(const opal_process_name_t *id, - const char *key, + const char *key, opal_list_t *info, opal_value_t **kv); -static int cray_publish(const char service_name[], - opal_list_t *info, - const char port[]); -static int cray_lookup(const char service_name[], +static int cray_get_nb(const opal_process_name_t *id, const char *key, opal_list_t *info, - char port[], int portLen); -static int cray_unpublish(const char service_name[], - opal_list_t *info); + opal_pmix_value_cbfunc_t cbfunc, void *cbdata); +static int cray_publish(opal_list_t *info); +static int cray_publish_nb(opal_list_t *info, + opal_pmix_op_cbfunc_t cbfunc, void *cbdata); +static int cray_lookup(opal_list_t *data, opal_list_t *info); +static int cray_lookup_nb(char **keys, opal_list_t *info, + opal_pmix_lookup_cbfunc_t cbfunc, void *cbdata); +static int cray_unpublish(char **keys, opal_list_t *info); +static int cray_unpublish_nb(char **keys, opal_list_t *info, + opal_pmix_op_cbfunc_t cbfunc, void *cbdata); +static const char *cray_get_version(void); +static int cray_store_local(const opal_process_name_t *proc, + opal_value_t *val); +static const char *cray_get_nspace(opal_jobid_t jobid); +static void cray_register_jobid(opal_jobid_t jobid, const char *nspace); + +#if 0 static bool cray_get_attr(const char *attr, opal_value_t **kv); +#endif const opal_pmix_base_module_t opal_pmix_cray_module = { - cray_init, - cray_fini, - cray_initialized, - cray_abort, - cray_fence, - NULL, - cray_put, - cray_get, - NULL, - cray_publish, - cray_lookup, - cray_unpublish, - cray_get_attr, - NULL, - cray_spawn, - cray_job_connect, - cray_job_disconnect, - NULL, - NULL + .init = cray_init, + .finalize = cray_fini, + .initialized = cray_initialized, + .abort = cray_abort, + .commit = cray_commit, + .fence = NULL, + .fence_nb = cray_fencenb, + .put = cray_put, + .get = cray_get, + .get_nb = cray_get_nb, + .publish = cray_publish, + .publish_nb = cray_publish_nb, + .lookup = cray_lookup, + .lookup_nb = cray_lookup_nb, + .unpublish = cray_unpublish, + .unpublish_nb = cray_unpublish_nb, + .spawn = cray_spawn, + .spawn_nb = cray_spawn_nb, + .connect = cray_job_connect, + .disconnect = cray_job_disconnect, + .disconnect_nb = cray_job_disconnect_nb, + .resolve_peers = cray_resolve_peers, + .resolve_nodes = cray_resolve_nodes, + .get_version = cray_get_version, + .register_errhandler = opal_pmix_base_register_handler, + .deregister_errhandler = opal_pmix_base_deregister_handler, + .store_local = cray_store_local, + .get_nspace = cray_get_nspace, + .register_jobid = cray_register_jobid }; // usage accounting static int pmix_init_count = 0; +// local object +typedef struct { + opal_object_t super; + opal_event_t ev; + opal_pmix_op_cbfunc_t opcbfunc; + void *cbdata; +} pmi_opcaddy_t; +static OBJ_CLASS_INSTANCE(pmi_opcaddy_t, + opal_object_t, + NULL, NULL); + // PMI constant values: static int pmix_kvslen_max = 0; static int pmix_keylen_max = 0; @@ -121,12 +165,20 @@ static int cray_init(void) char *pmapping = NULL; char buf[PMI2_MAX_ATTRVALUE]; int found; + int major, minor, revision; uint32_t jobfam; + opal_value_t kv; + opal_process_name_t ldr; + char nmtmp[64]; + char *str, **localranks = NULL; ++pmix_init_count; /* if we can't startup PMI, we can't be used */ if ( PMI2_Initialized () ) { + opal_output_verbose(10, opal_pmix_base_framework.framework_output, + "%s pmix:cray: pmi already initialized", + OPAL_NAME_PRINT(pmix_pname)); return OPAL_SUCCESS; } size = -1; @@ -151,12 +203,16 @@ static int cray_init(void) pmix_vallen_threshold = PMI2_MAX_VALLEN * 3; pmix_vallen_threshold >>= 2; - rc = PMI2_Info_GetJobAttr("universeSize", buf, 16, &found); - if( PMI_SUCCESS != rc ) { - OPAL_PMI_ERROR(rc, "PMI_Get_universe_size"); - goto err_exit; + /* + * get the version info + */ + + if (PMI_SUCCESS != PMI_Get_version_info(&major,&minor,&revision)) { + return OPAL_ERROR; } - pmix_usize = atoi(buf); + + snprintf(cray_pmi_version, sizeof(cray_pmi_version), + "%d.%d.%d", major, minor, revision); pmix_kvs_name = (char*)malloc(pmix_kvslen_max); if( pmix_kvs_name == NULL ){ @@ -164,6 +220,7 @@ static int cray_init(void) ret = OPAL_ERR_OUT_OF_RESOURCE; goto err_exit; } + rc = PMI2_Job_GetId(pmix_kvs_name, pmix_kvslen_max); if( PMI_SUCCESS != rc ) { OPAL_PMI_ERROR(rc, "PMI2_Job_GetId"); @@ -172,7 +229,9 @@ static int cray_init(void) rc = sscanf(pmix_kvs_name,"kvs_%u",&jobfam); if (rc != 1) { - OPAL_PMI_ERROR(rc, "PMI2_Job_GetId"); + opal_output_verbose(10, opal_pmix_base_framework.framework_output, + "%s pmix:cray: pmix_kvs_name %s", + OPAL_NAME_PRINT(pmix_pname), pmix_kvs_name); rc = OPAL_ERROR; goto err_exit; } @@ -212,15 +271,137 @@ static int cray_init(void) free(pmapping); - /* find ourselves */ + // setup hash table + opal_pmix_base_hash_init(); + + /* save the job size */ + OBJ_CONSTRUCT(&kv, opal_value_t); + kv.key = strdup(OPAL_PMIX_JOB_SIZE); + kv.type = OPAL_UINT32; + kv.data.uint32 = pmix_size; + if (OPAL_SUCCESS != (rc = opal_pmix_base_store(&OPAL_PROC_MY_NAME, &kv))) { + OPAL_ERROR_LOG(rc); + OBJ_DESTRUCT(&kv); + goto err_exit; + } + OBJ_DESTRUCT(&kv); + + /* save the appnum */ + OBJ_CONSTRUCT(&kv, opal_value_t); + kv.key = strdup(OPAL_PMIX_APPNUM); + kv.type = OPAL_UINT32; + kv.data.uint32 = pmix_appnum; + if (OPAL_SUCCESS != (ret = opal_pmix_base_store(&OPAL_PROC_MY_NAME, &kv))) { + OPAL_ERROR_LOG(ret); + OBJ_DESTRUCT(&kv); + goto err_exit; + } + OBJ_DESTRUCT(&kv); + + rc = PMI2_Info_GetJobAttr("universeSize", buf, 16, &found); + if( PMI_SUCCESS != rc ) { + OPAL_PMI_ERROR(rc, "PMI_Get_universe_size"); + goto err_exit; + } + + pmix_usize = atoi(buf); + + OBJ_CONSTRUCT(&kv, opal_value_t); + kv.key = strdup(OPAL_PMIX_UNIV_SIZE); + kv.type = OPAL_UINT32; + kv.data.uint32 = pmix_usize; + if (OPAL_SUCCESS != (rc = opal_pmix_base_store(&OPAL_PROC_MY_NAME, &kv))) { + OPAL_ERROR_LOG(rc); + OBJ_DESTRUCT(&kv); + goto err_exit; + } + OBJ_DESTRUCT(&kv); + + OBJ_CONSTRUCT(&kv, opal_value_t); + kv.key = strdup(OPAL_PMIX_JOBID); + kv.type = OPAL_UINT32; + kv.data.uint32 = pmix_jobid; + if (OPAL_SUCCESS != (ret = opal_pmix_base_store(&OPAL_PROC_MY_NAME, &kv))) { + OPAL_ERROR_LOG(ret); + OBJ_DESTRUCT(&kv); + goto err_exit; + } + OBJ_DESTRUCT(&kv); + + /* save the local size */ + OBJ_CONSTRUCT(&kv, opal_value_t); + kv.key = strdup(OPAL_PMIX_LOCAL_SIZE); + kv.type = OPAL_UINT32; + kv.data.uint32 = pmix_nlranks; + if (OPAL_SUCCESS != (rc = opal_pmix_base_store(&OPAL_PROC_MY_NAME, &kv))) { + OPAL_ERROR_LOG(rc); + OBJ_DESTRUCT(&kv); + goto err_exit; + } + OBJ_DESTRUCT(&kv); + + ldr.vpid = pmix_lranks[0]; + ldr.jobid = pmix_pname.jobid; + + /* find ourselves and build up a string for local peer info */ + memset(nmtmp, 0, 64); for (i=0; i < pmix_nlranks; i++) { + ret = snprintf(nmtmp, 64, "%d", pmix_lranks[i]); + opal_argv_append_nosize(&localranks, nmtmp); if (pmix_rank == pmix_lranks[i]) { pmix_lrank = i; pmix_nrank = i; - break; } } + str = opal_argv_join(localranks, ','); + opal_argv_free(localranks); + + OBJ_CONSTRUCT(&kv, opal_value_t); + kv.key = strdup(OPAL_PMIX_LOCAL_PEERS); + kv.type = OPAL_STRING; + kv.data.string = str; + if (OPAL_SUCCESS != (ret = opal_pmix_base_store(&OPAL_PROC_MY_NAME, &kv))) { + OPAL_ERROR_LOG(ret); + OBJ_DESTRUCT(&kv); + goto err_exit; + } + OBJ_DESTRUCT(&kv); + + /* save the local leader */ + OBJ_CONSTRUCT(&kv, opal_value_t); + kv.key = strdup(OPAL_PMIX_LOCALLDR); + kv.type = OPAL_UINT64; + kv.data.uint64 = *(uint64_t*)&ldr; + if (OPAL_SUCCESS != (ret = opal_pmix_base_store(&OPAL_PROC_MY_NAME, &kv))) { + OPAL_ERROR_LOG(ret); + OBJ_DESTRUCT(&kv); + goto err_exit; + } + + /* save our local rank */ + OBJ_CONSTRUCT(&kv, opal_value_t); + kv.key = strdup(OPAL_PMIX_LOCAL_RANK); + kv.type = OPAL_UINT16; + kv.data.uint16 = pmix_lrank; + if (OPAL_SUCCESS != (ret = opal_pmix_base_store(&OPAL_PROC_MY_NAME, &kv))) { + OPAL_ERROR_LOG(ret); + OBJ_DESTRUCT(&kv); + goto err_exit; + } + + /* and our node rank */ + OBJ_CONSTRUCT(&kv, opal_value_t); + kv.key = strdup(OPAL_PMIX_NODE_RANK); + kv.type = OPAL_UINT16; + kv.data.uint16 = pmix_nrank; + if (OPAL_SUCCESS != (ret = opal_pmix_base_store(&OPAL_PROC_MY_NAME, &kv))) { + OPAL_ERROR_LOG(ret); + OBJ_DESTRUCT(&kv); + goto err_exit; + } + OBJ_DESTRUCT(&kv); + return OPAL_SUCCESS; err_exit: PMI2_Finalize(); @@ -250,36 +431,58 @@ static int cray_fini(void) { return OPAL_SUCCESS; } -static bool cray_initialized(void) +static int cray_initialized(void) { if (0 < pmix_init_count) { - return true; + return 1; } - return false; + return 0; } -static int cray_abort(int status, const char *msg) +static int cray_abort(int flag, const char *msg, + opal_list_t *procs) { - return PMI2_Abort(status, msg); + PMI2_Abort(flag, msg); + return OPAL_SUCCESS; } -static int cray_spawn(int count, const char * cmds[], - int argcs[], const char ** argvs[], - const int maxprocs[], - opal_list_t *info_keyval_vector, - opal_list_t *preput_keyval_vector, - char jobId[], int jobIdSize, - int errors[]) +static int cray_spawn(opal_list_t *jobinfo, opal_list_t *apps, opal_jobid_t *jobid) { - return OPAL_ERR_NOT_IMPLEMENTED; + return OPAL_ERR_NOT_SUPPORTED; +} + +static int cray_spawn_nb(opal_list_t *jobinfo, opal_list_t *apps, + opal_pmix_spawn_cbfunc_t cbfunc, + void *cbdata) +{ + return OPAL_ERR_NOT_SUPPORTED; +} + +static int cray_job_connect(opal_list_t *procs) +{ + return OPAL_ERR_NOT_SUPPORTED; +} + +static int cray_job_disconnect(opal_list_t *procs) +{ + return OPAL_ERR_NOT_SUPPORTED; +} + +static int cray_job_disconnect_nb(opal_list_t *procs, + opal_pmix_op_cbfunc_t cbfunc, + void *cbdata) +{ + return OPAL_ERR_NOT_SUPPORTED; } -static int cray_job_connect(const char jobId[]) +static int cray_resolve_peers(const char *nodename, + opal_jobid_t jobid, + opal_list_t *procs) { return OPAL_ERR_NOT_IMPLEMENTED; } -static int cray_job_disconnect(const char jobId[]) +static int cray_resolve_nodes(opal_jobid_t jobid, char **nodelist) { return OPAL_ERR_NOT_IMPLEMENTED; } @@ -292,6 +495,11 @@ static int cray_put(opal_pmix_scope_t scope, opal_output_verbose(10, opal_pmix_base_framework.framework_output, "%s pmix:cray cray_put key %s scope %d\n", OPAL_NAME_PRINT(OPAL_PROC_MY_NAME), kv->key, scope); + + if (!pmix_init_count) { + return OPAL_ERROR; + } + /* * for now just always just global cache */ @@ -311,8 +519,14 @@ static int cray_put(opal_pmix_scope_t scope, return rc; } -static int cray_fence(opal_process_name_t *procs, size_t nprocs) +static int cray_commit(void) { + return OPAL_SUCCESS; +} + +static void fencenb(int sd, short args, void *cbdata) +{ + pmi_opcaddy_t *op = (pmi_opcaddy_t*)cbdata; int rc, cnt; int32_t i; int *all_lens = NULL; @@ -335,11 +549,12 @@ static int cray_fence(opal_process_name_t *procs, size_t nprocs) char *cpuset = NULL; opal_output_verbose(2, opal_pmix_base_framework.framework_output, - "%s pmix:cray executing fence on %u procs cache_global %p cache_local %p", - OPAL_NAME_PRINT(OPAL_PROC_MY_NAME), (unsigned int)nprocs, + "%s pmix:cray executing fence cache_global %p cache_local %p", + OPAL_NAME_PRINT(OPAL_PROC_MY_NAME), (void *)mca_pmix_cray_component.cache_global, (void *)mca_pmix_cray_component.cache_local); + /* * "unload" the cache_local/cache_global buffers, first copy * it so we can continue to use the local buffers if further @@ -348,7 +563,8 @@ static int cray_fence(opal_process_name_t *procs, size_t nprocs) send_buffer = OBJ_NEW(opal_buffer_t); if (NULL == send_buffer) { - return OPAL_ERR_OUT_OF_RESOURCE; + rc = OPAL_ERR_OUT_OF_RESOURCE; + goto fn_exit; } opal_dss.copy_payload(send_buffer, mca_pmix_cray_component.cache_global); @@ -429,10 +645,9 @@ static int cray_fence(opal_process_name_t *procs, size_t nprocs) cnt = 1; while (OPAL_SUCCESS == (rc = opal_dss.unpack(buf, &kp, &cnt, OPAL_VALUE))) { opal_output_verbose(20, opal_pmix_base_framework.framework_output, - "%s pmix:cray unpacked kp with key %s type(%d) for id %s", + "%s pmix:cray unpacked kp with key %s type(%d) for id %s", OPAL_NAME_PRINT(OPAL_PROC_MY_NAME), kp->key, kp->type, OPAL_NAME_PRINT(id)); - if (OPAL_SUCCESS != (rc = opal_dstore.store(opal_dstore_internal, - &id, kp))) { + if (OPAL_SUCCESS != (rc = opal_pmix_base_store(&id, kp))) { OPAL_ERROR_LOG(rc); goto fn_exit; } @@ -451,21 +666,25 @@ static int cray_fence(opal_process_name_t *procs, size_t nprocs) "%s pmix:cray kvs_fence complete", OPAL_NAME_PRINT(OPAL_PROC_MY_NAME)); -#if OPAL_HAVE_HWLOC /* fetch my cpuset */ OBJ_CONSTRUCT(&vals, opal_list_t); - if (OPAL_SUCCESS == (rc = opal_dstore.fetch(opal_dstore_internal, &pmix_pname, - OPAL_DSTORE_CPUSET, &vals))) { + if (OPAL_SUCCESS == (rc = opal_pmix_base_fetch(&pmix_pname, + OPAL_PMIX_CPUSET, &vals))) { kp = (opal_value_t*)opal_list_get_first(&vals); cpuset = strdup(kp->data.string); } else { cpuset = NULL; } OPAL_LIST_DESTRUCT(&vals); -#endif - /* we only need to set locality for each local rank as "not found" - * equates to "non-local" */ + /* Get the modex data from each local process and set the + * localities to avoid having the MPI layer fetch data + * for every process in the job. + * + * we only need to set locality for each local rank as "not found" + * equates to "non-local" + */ + for (i=0; i < pmix_nlranks; i++) { id.vpid = pmix_lranks[i]; id.jobid = pmix_jobid; @@ -474,10 +693,9 @@ static int cray_fence(opal_process_name_t *procs, size_t nprocs) OPAL_NAME_PRINT(OPAL_PROC_MY_NAME), OPAL_NAME_PRINT(id)); /* fetch cpuset for this vpid */ -#if OPAL_HAVE_HWLOC OBJ_CONSTRUCT(&vals, opal_list_t); - if (OPAL_SUCCESS != (rc = opal_dstore.fetch(opal_dstore_internal, &pmix_pname, - OPAL_DSTORE_CPUSET, &vals))) { + if (OPAL_SUCCESS != (rc = opal_pmix_base_fetch(&id, + OPAL_PMIX_CPUSET, &vals))) { opal_output_verbose(2, opal_pmix_base_framework.framework_output, "%s cpuset for local proc %s not found", OPAL_NAME_PRINT(OPAL_PROC_MY_NAME), @@ -501,10 +719,6 @@ static int cray_fence(opal_process_name_t *procs, size_t nprocs) } OPAL_LIST_DESTRUCT(&vals); } -#else - /* all we know is we share a node */ - locality = OPAL_PROC_ON_CLUSTER | OPAL_PROC_ON_CU | OPAL_PROC_ON_NODE; -#endif OPAL_OUTPUT_VERBOSE((1, opal_pmix_base_framework.framework_output, "%s pmix:cray proc %s locality %s", OPAL_NAME_PRINT(OPAL_PROC_MY_NAME), @@ -512,19 +726,17 @@ static int cray_fence(opal_process_name_t *procs, size_t nprocs) opal_hwloc_base_print_locality(locality))); OBJ_CONSTRUCT(&kvn, opal_value_t); - kvn.key = strdup(OPAL_DSTORE_LOCALITY); + kvn.key = strdup(OPAL_PMIX_LOCALITY); kvn.type = OPAL_UINT16; kvn.data.uint16 = locality; - (void)opal_dstore.store(opal_dstore_internal, &id, &kvn); + opal_pmix_base_store(&id, &kvn); OBJ_DESTRUCT(&kvn); } fn_exit: -#if OPAL_HAVE_HWLOC if (NULL != cpuset) { free(cpuset); } -#endif if (all_lens != NULL) { free(all_lens); } @@ -534,10 +746,30 @@ static int cray_fence(opal_process_name_t *procs, size_t nprocs) if (r_bytes_and_ranks != NULL) { free(r_bytes_and_ranks); } - return rc; + if (NULL != op->opcbfunc) { + op->opcbfunc(rc, op->cbdata); + } + OBJ_RELEASE(op); + return; } -static int cray_get(const opal_process_name_t *id, const char *key, opal_value_t **kv) +static int cray_fencenb(opal_list_t *procs, int collect_data, + opal_pmix_op_cbfunc_t cbfunc, void *cbdata) +{ + pmi_opcaddy_t *op; + + /* thread-shift this so we don't block in Cray's barrier */ + op = OBJ_NEW(pmi_opcaddy_t); + op->opcbfunc = cbfunc; + op->cbdata = cbdata; + event_assign(&op->ev, opal_pmix_base.evbase, -1, + EV_WRITE, fencenb, op); + event_active(&op->ev, EV_WRITE, 1); + + return OPAL_SUCCESS; +} + +static int cray_get(const opal_process_name_t *id, const char *key, opal_list_t *info, opal_value_t **kv) { int rc; opal_list_t vals; @@ -548,7 +780,7 @@ static int cray_get(const opal_process_name_t *id, const char *key, opal_value_t OPAL_NAME_PRINT(*id), key); OBJ_CONSTRUCT(&vals, opal_list_t); - rc = opal_dstore.fetch(opal_dstore_internal, id, key, &vals); + rc = opal_pmix_base_fetch(id, key, &vals); if (OPAL_SUCCESS == rc) { *kv = (opal_value_t*)opal_list_remove_first(&vals); return OPAL_SUCCESS; @@ -560,146 +792,69 @@ static int cray_get(const opal_process_name_t *id, const char *key, opal_value_t OPAL_LIST_DESTRUCT(&vals); return rc; + } -static int cray_publish(const char service_name[], - opal_list_t *info, - const char port[]) +static int cray_get_nb(const opal_process_name_t *id, const char *key, + opal_list_t *info, opal_pmix_value_cbfunc_t cbfunc, void *cbdata) { - int rc; - - if (PMI_SUCCESS != (rc = PMI2_Nameserv_publish(service_name, NULL, port))) { - OPAL_PMI_ERROR(rc, "PMI2_Nameserv_publish"); - return OPAL_ERROR; - } - return OPAL_SUCCESS; + return OPAL_ERR_NOT_IMPLEMENTED; } -static int cray_lookup(const char service_name[], - opal_list_t *info, - char port[], int portLen) +static int cray_publish(opal_list_t *info) { - int rc; - - if (PMI_SUCCESS != (rc = PMI2_Nameserv_lookup(service_name, NULL, port, portLen))) { - OPAL_PMI_ERROR(rc, "PMI2_Nameserv_lookup"); - return OPAL_ERROR; - } + return OPAL_ERR_NOT_SUPPORTED; +} - return OPAL_SUCCESS; +static int cray_publish_nb(opal_list_t *info, + opal_pmix_op_cbfunc_t cbfunc, void *cbdata) +{ + return OPAL_ERR_NOT_SUPPORTED; } -static int cray_unpublish(const char service_name[], - opal_list_t *info) +static int cray_lookup(opal_list_t *data, opal_list_t *info) { - int rc; + return OPAL_ERR_NOT_SUPPORTED; +} - if (PMI_SUCCESS != (rc = PMI2_Nameserv_unpublish(service_name, NULL))) { - OPAL_PMI_ERROR(rc, "PMI2_Nameserv_unpublish"); - return OPAL_ERROR; - } - return OPAL_SUCCESS;; +static int cray_lookup_nb(char **keys, opal_list_t *info, + opal_pmix_lookup_cbfunc_t cbfunc, void *cbdata) +{ + return OPAL_ERR_NOT_SUPPORTED; } -static bool cray_get_attr(const char *attr, opal_value_t **kv) +static int cray_unpublish(char **keys, opal_list_t *info) { - int rc, i; - opal_value_t *kp; - - if (0 == strcmp(PMIX_JOBID, attr)) { - kp = OBJ_NEW(opal_value_t); - kp->key = strdup(attr); - kp->type = OPAL_UINT32; - kp->data.uint32 = pmix_jobid; - *kv = kp; - return true; - } - - if (0 == strcmp(PMIX_RANK, attr)) { - rc = PMI_Get_rank(&i); - if( PMI_SUCCESS != rc ) { - OPAL_PMI_ERROR(rc, "PMI_Get_rank"); - return false; - } - kp = OBJ_NEW(opal_value_t); - kp->key = strdup(attr); - kp->type = OPAL_UINT32; - kp->data.uint32 = i; - *kv = kp; - return true; - } - - if (0 == strcmp(PMIX_UNIV_SIZE, attr)) { - rc = PMI_Get_universe_size(&i); - if( PMI_SUCCESS != rc ) { - OPAL_PMI_ERROR(rc, "PMI_Get_universe_size"); - return false; - } - kp = OBJ_NEW(opal_value_t); - kp->key = strdup(attr); - kp->type = OPAL_UINT32; - kp->data.uint32 = i; - *kv = kp; - return true; - } - - if (0 == strcmp(PMIX_JOB_SIZE, attr)) { - rc = PMI_Get_size(&i); - if( PMI_SUCCESS != rc ) { - OPAL_PMI_ERROR(rc, "PMI_Get_size"); - return false; - } - kp = OBJ_NEW(opal_value_t); - kp->key = strdup(attr); - kp->type = OPAL_UINT32; - kp->data.uint32 = i; - *kv = kp; - return true; - } + return OPAL_ERR_NOT_SUPPORTED; +} +static int cray_unpublish_nb(char **keys, opal_list_t *info, + opal_pmix_op_cbfunc_t cbfunc, void *cbdata) +{ + return OPAL_ERR_NOT_SUPPORTED; +} - if (0 == strcmp(PMIX_APPNUM, attr)) { - rc = PMI_Get_appnum(&i); - if( PMI_SUCCESS != rc ) { - OPAL_PMI_ERROR(rc, "PMI_Get_appnum"); - return false; - } - kp = OBJ_NEW(opal_value_t); - kp->key = strdup(attr); - kp->type = OPAL_UINT32; - kp->data.uint32 = i; - *kv = kp; - return true; - } +static const char *cray_get_version(void) +{ + return cray_pmi_version; +} - if (0 == strcmp(PMIX_LOCAL_RANK, attr)) { - kp = OBJ_NEW(opal_value_t); - kp->key = strdup(attr); - kp->type = OPAL_UINT32; - kp->data.uint32 = pmix_lrank; - *kv = kp; - return true; - } +static int cray_store_local(const opal_process_name_t *proc, + opal_value_t *val) +{ + opal_pmix_base_store(proc, val); - if (0 == strcmp(PMIX_NODE_RANK, attr)) { - kp = OBJ_NEW(opal_value_t); - kp->key = strdup(attr); - kp->type = OPAL_UINT32; - kp->data.uint32 = pmix_nrank; - *kv = kp; - return true; - } + return OPAL_SUCCESS; +} - if (0 == strcmp(PMIX_LOCAL_SIZE, attr)) { - kp = OBJ_NEW(opal_value_t); - kp->key = strdup(attr); - kp->type = OPAL_UINT32; - kp->data.uint32 = pmix_nlranks; - *kv = kp; - return true; - } +static const char *cray_get_nspace(opal_jobid_t jobid) +{ + return "N/A"; +} - return OPAL_ERR_NOT_IMPLEMENTED; +static void cray_register_jobid(opal_jobid_t jobid, const char *nspace) +{ + return; } static char* pmix_error(int pmix_err) diff --git a/opal/mca/pmix/cray/pmix_cray.h b/opal/mca/pmix/cray/pmix_cray.h index 5a14c645740..58af6a23b8f 100644 --- a/opal/mca/pmix/cray/pmix_cray.h +++ b/opal/mca/pmix/cray/pmix_cray.h @@ -1,9 +1,9 @@ /* * Copyright (c) 2014 Intel, Inc. All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ diff --git a/opal/mca/pmix/cray/pmix_cray_component.c b/opal/mca/pmix/cray/pmix_cray_component.c index 75e6dc67e6e..20d13ea1137 100644 --- a/opal/mca/pmix/cray/pmix_cray_component.c +++ b/opal/mca/pmix/cray/pmix_cray_component.c @@ -4,9 +4,9 @@ * Copyright (c) 2015 Los Alamos National Security, LLC. All rights * reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ * * These symbols are in a file by themselves to provide nice linker @@ -21,7 +21,7 @@ #include "opal/constants.h" #include "opal/mca/pmix/pmix.h" #include "pmix_cray.h" -#include +#include #include /* @@ -51,7 +51,7 @@ opal_pmix_cray_component_t mca_pmix_cray_component = { .base_version = { /* Indicate that we are a pmix v1.1.0 component (which also implies a specific MCA version) */ - + OPAL_PMIX_BASE_VERSION_2_0_0, /* Component name and version */ @@ -88,9 +88,10 @@ static int pmix_cray_component_query(mca_base_module_t **module, int *priority) FILE *fd = NULL, *fd_task_is_app = NULL; char task_is_app_fname[PATH_MAX]; - /* disqualify ourselves if not running in a Cray PAGG container */ + /* disqualify ourselves if not running in a Cray PAGG container, or we + were launched by the orte/mpirun launcher */ fd = fopen(proc_job_file, "r"); - if (fd == NULL) { + if ((fd == NULL) || (getenv("OMPI_NO_USE_CRAY_PMI") != NULL)) { *priority = 0; *module = NULL; rc = OPAL_ERROR; @@ -98,7 +99,7 @@ static int pmix_cray_component_query(mca_base_module_t **module, int *priority) snprintf(task_is_app_fname,sizeof(task_is_app_fname), "/proc/self/task/%ld/task_is_app",syscall(SYS_gettid)); fd_task_is_app = fopen(task_is_app_fname, "r"); - if (fd_task_is_app != NULL) { /* okay we're in a PAGG container, + if (fd_task_is_app != NULL) { /* okay we're in a PAGG container, and we are an app task (not just a process running on a mom node, for example), so we should give cray pmi a shot. */ diff --git a/opal/mca/pmix/cray/pmix_cray_pmap_parser.c b/opal/mca/pmix/cray/pmix_cray_pmap_parser.c index e4a5d2c0177..e09c2c07422 100644 --- a/opal/mca/pmix/cray/pmix_cray_pmap_parser.c +++ b/opal/mca/pmix/cray/pmix_cray_pmap_parser.c @@ -1,5 +1,5 @@ /* -*- Mode: C; c-basic-offset:4 ; -*- */ -/* +/* * * Copyright (c) 2013 Mellanox Technologies, Inc. * All rights reserved. @@ -14,7 +14,7 @@ #include "pmix_cray_pmap_parser.h" /** - pmi2 process mapping is returned as a + pmi2 process mapping is returned as a comma separated list of tuples: ex: (vector,(0,4,4),(0,4,1)) slurm cyclic distro of 4 ranks over 2 nodes: @@ -26,7 +26,7 @@ slurm block distro of 4 ranks over 2 nodes: Format of each tuple is (base, H, L), where H is number of nodes spawned by tuple, L is number of ranks per node, - base is offset from node 0. + base is offset from node 0. Tuple can be visualized as a rectangle on two dimensional (Hosts, Local Ranks) plane: @@ -38,7 +38,7 @@ slurm block distro of 4 ranks over 2 nodes: | | | L | +--------+ Local Ranks - V + V Note that ranks increase by column. Tuple (0,2,3) looks like: 0 3 @@ -120,13 +120,13 @@ static int *find_lrs(char *map, int my_node, int *nlrs) /** * @param pmap process map as returned by PMI_process_mapping * attribute - * @param my_rank + * @param my_rank * @param node set to my node id * @param nlrs set to the number of local ranks returned * * @return array that contains ranks local to my_rank or NULL - * on failure. Array must be freed by the caller. - */ + * on failure. Array must be freed by the caller. + */ int *pmix_cray_parse_pmap(char *pmap, int my_rank, int *node, int *nlrs) { @@ -202,7 +202,7 @@ int main(int argc, char **argv) assert(memcmp(lrs, a2, 2) == 0); free(lrs); - + /* cyclic distro which skips node 0 */ pmap = "(vector,(1,2,1),(1,2,1))"; me = 0; diff --git a/opal/mca/pmix/cray/pmix_cray_pmap_parser.h b/opal/mca/pmix/cray/pmix_cray_pmap_parser.h index 335231d7953..433bf5adac8 100644 --- a/opal/mca/pmix/cray/pmix_cray_pmap_parser.h +++ b/opal/mca/pmix/cray/pmix_cray_pmap_parser.h @@ -1,5 +1,5 @@ /* -*- Mode: C; c-basic-offset:4 ; -*- */ -/* +/* * * Copyright (c) 2013 Mellanox Technologies, Inc. * All rights reserved. diff --git a/opal/mca/pmix/external/Makefile.am b/opal/mca/pmix/external/Makefile.am new file mode 100644 index 00000000000..2b56cc619fc --- /dev/null +++ b/opal/mca/pmix/external/Makefile.am @@ -0,0 +1,48 @@ +# +# Copyright (c) 2014-2015 Intel, Inc. All rights reserved. +# Copyright (c) 2015 Cisco Systems, Inc. All rights reserved. +# Copyright (c) 2014-2015 Mellanox Technologies, Inc. +# All rights reserved. +# $COPYRIGHT$ +# +# Additional copyrights may follow +# +# $HEADER$ +# + +sources = \ + pmix_ext.h \ + pmix_ext_component.c \ + pmix_ext.c \ + pmix_ext_client.c \ + pmix_ext_server_south.c \ + pmix_ext_server_north.c + +# Make the output library in this directory, and name it either +# mca__.la (for DSO builds) or libmca__.la +# (for static builds). + +if MCA_BUILD_opal_pmix_external_DSO +component_noinst = +component_install = mca_pmix_external.la +else +component_noinst = libmca_pmix_external.la +component_install = +endif + +mcacomponentdir = $(opallibdir) +mcacomponent_LTLIBRARIES = $(component_install) +mca_pmix_external_la_SOURCES = $(sources) +mca_pmix_external_la_CFLAGS = +mca_pmix_external_la_CPPFLAGS = $(opal_pmix_external_CPPFLAGS) +mca_pmix_external_la_LDFLAGS = -module -avoid-version $(opal_pmix_external_LDFLAGS) +mca_pmix_external_la_LIBADD = $(opal_pmix_external_LIBS) \ + $(OPAL_TOP_BUILDDIR)/opal/mca/event/lib@OPAL_LIB_PREFIX@mca_event.la \ + $(OPAL_TOP_BUILDDIR)/opal/mca/hwloc/lib@OPAL_LIB_PREFIX@mca_hwloc.la + +noinst_LTLIBRARIES = $(component_noinst) +libmca_pmix_external_la_SOURCES =$(sources) +libmca_pmix_external_la_CFLAGS = +libmca_pmix_external_la_CPPFLAGS = $(opal_pmix_external_CPPFLAGS) +libmca_pmix_external_la_LDFLAGS = -module -avoid-version $(opal_pmix_external_LDFLAGS) +libmca_pmix_external_la_LIBADD = $(opal_pmix_external_LIBS) diff --git a/opal/mca/pmix/external/configure.m4 b/opal/mca/pmix/external/configure.m4 new file mode 100644 index 00000000000..df3b022a51c --- /dev/null +++ b/opal/mca/pmix/external/configure.m4 @@ -0,0 +1,71 @@ +# -*- shell-script -*- +# +# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana +# University Research and Technology +# Corporation. All rights reserved. +# Copyright (c) 2004-2005 The University of Tennessee and The University +# of Tennessee Research Foundation. All rights +# reserved. +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# University of Stuttgart. All rights reserved. +# Copyright (c) 2004-2005 The Regents of the University of California. +# All rights reserved. +# Copyright (c) 2011-2013 Los Alamos National Security, LLC. +# All rights reserved. +# Copyright (c) 2010-2015 Cisco Systems, Inc. All rights reserved. +# Copyright (c) 2013-2015 Intel, Inc. All rights reserved. +# Copyright (c) 2015 Research Organization for Information Science +# and Technology (RIST). All rights reserved. +# Copyright (c) 2014-2015 Mellanox Technologies, Inc. +# All rights reserved. +# Copyright (c) 2016 IBM Corporation. All rights reserved. +# $COPYRIGHT$ +# +# Additional copyrights may follow +# +# $HEADER$ +# + +# MCA_pmix_external_CONFIG([action-if-found], [action-if-not-found]) +# ----------------------------------------------------------- +AC_DEFUN([MCA_opal_pmix_external_CONFIG],[ + AC_CONFIG_FILES([opal/mca/pmix/external/Makefile]) + + AS_IF([test "$opal_external_pmix_happy" = "yes"], + [AS_IF([test "$opal_event_external_support" != "yes"], + [AC_MSG_WARN([EXTERNAL PMIX SUPPORT REQUIRES USE OF EXTERNAL LIBEVENT]) + AC_MSG_WARN([LIBRARY. THIS LIBRARY MUST POINT TO THE SAME ONE USED]) + AC_MSG_WARN([TO BUILD PMIX OR ELSE UNPREDICTABLE BEHAVIOR MAY RESULT]) + AC_MSG_ERROR([PLEASE CORRECT THE CONFIGURE COMMAND LINE AND REBUILD])]) + AS_IF([test "$opal_hwloc_external_support" != "yes"], + [AC_MSG_WARN([EXTERNAL PMIX SUPPORT REQUIRES USE OF EXTERNAL HWLOC]) + AC_MSG_WARN([LIBRARY THIS LIBRARY MUST POINT TO THE SAME ONE USED ]) + AC_MSG_WARN([TO BUILD PMIX OR ELSE UNPREDICTABLE BEHAVIOR MAY RESULT]) + AC_MSG_ERROR([PLEASE CORRECT THE CONFIGURE COMMAND LINE AND REBUILD])]) + + # check for the 1.1.4 version + AC_MSG_CHECKING([if external component is version 1.1.4 or compatible]) + AS_IF([test "$opal_external_pmix_version" = "11" || + test "$opal_external_pmix_version" = "114" || + test "$opal_external_pmix_version" = "1X"], + [AC_MSG_RESULT([yes]) + opal_pmix_external_11_happy=yes], + [AC_MSG_RESULT([no]) + opal_pmix_external_11_happy=no]) + + AS_IF([test "$opal_pmix_external_11_happy" = "yes"], + [$1 + # need to set the wrapper flags for static builds + pmix_external_WRAPPER_EXTRA_LDFLAGS=$opal_external_pmix_LDFLAGS + pmix_external_WRAPPER_EXTRA_LIBS=$opal_external_pmix_LIBS], + [$2])], + [$2]) + + opal_pmix_external_CPPFLAGS=$opal_external_pmix_CPPFLAGS + opal_pmix_external_LDFLAGS=$opal_external_pmix_LDFLAGS + opal_pmix_external_LIBS=$opal_external_pmix_LIBS + + AC_SUBST([opal_pmix_external_CPPFLAGS]) + AC_SUBST([opal_pmix_external_LDFLAGS]) + AC_SUBST([opal_pmix_external_LIBS]) +])dnl diff --git a/opal/mca/pmix/external/pmix_ext.c b/opal/mca/pmix/external/pmix_ext.c new file mode 100644 index 00000000000..bb8db3aafd1 --- /dev/null +++ b/opal/mca/pmix/external/pmix_ext.c @@ -0,0 +1,542 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ +/* + * Copyright (c) 2014-2015 Intel, Inc. All rights reserved. + * Copyright (c) 2014-2017 Research Organization for Information Science + * and Technology (RIST). All rights reserved. + * Copyright (c) 2014 Mellanox Technologies, Inc. + * All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include "opal_config.h" +#include "opal/constants.h" +#include "opal/types.h" + +#ifdef HAVE_STRING_H +#include +#endif +#ifdef HAVE_UNISTD_H +#include +#endif + +#include "opal/dss/dss.h" +#include "opal/mca/event/event.h" +#include "opal/mca/hwloc/base/base.h" +#include "opal/runtime/opal.h" +#include "opal/runtime/opal_progress_threads.h" +#include "opal/util/argv.h" +#include "opal/util/error.h" +#include "opal/util/output.h" +#include "opal/util/proc.h" +#include "opal/util/show_help.h" + +#include "pmix_ext.h" +#include "opal/mca/pmix/base/base.h" + +#include + +/**** C.O.M.M.O.N I.N.T.E.R.F.A.C.E.S ****/ + +/* These are functions used by both client and server to + * access common functions in the embedded PMIx library */ + +static const char *pmix_ext_get_nspace(opal_jobid_t jobid); +static void pmix_ext_register_jobid(opal_jobid_t jobid, const char *nspace); + +const opal_pmix_base_module_t opal_pmix_external_module = { + /* client APIs */ + .init = pmix_ext_client_init, + .finalize = pmix_ext_client_finalize, + .initialized = pmix_ext_initialized, + .abort = pmix_ext_abort, + .commit = pmix_ext_commit, + .fence = pmix_ext_fence, + .fence_nb = pmix_ext_fencenb, + .put = pmix_ext_put, + .get = pmix_ext_get, + .get_nb = pmix_ext_getnb, + .publish = pmix_ext_publish, + .publish_nb = pmix_ext_publishnb, + .lookup = pmix_ext_lookup, + .lookup_nb = pmix_ext_lookupnb, + .unpublish = pmix_ext_unpublish, + .unpublish_nb = pmix_ext_unpublishnb, + .spawn = pmix_ext_spawn, + .spawn_nb = pmix_ext_spawnnb, + .connect = pmix_ext_connect, + .connect_nb = pmix_ext_connectnb, + .disconnect = pmix_ext_disconnect, + .disconnect_nb = pmix_ext_disconnectnb, + .resolve_peers = pmix_ext_resolve_peers, + .resolve_nodes = pmix_ext_resolve_nodes, + /* server APIs */ + .server_init = pmix_ext_server_init, + .server_finalize = pmix_ext_server_finalize, + .generate_regex = pmix_ext_server_gen_regex, + .generate_ppn = pmix_ext_server_gen_ppn, + .server_register_nspace = pmix_ext_server_register_nspace, + .server_deregister_nspace = pmix_ext_server_deregister_nspace, + .server_register_client = pmix_ext_server_register_client, + .server_deregister_client = pmix_ext_server_deregister_client, + .server_setup_fork = pmix_ext_server_setup_fork, + .server_dmodex_request = pmix_ext_server_dmodex, + .server_notify_error = pmix_ext_server_notify_error, + /* utility APIs */ + .get_version = PMIx_Get_version, + .register_errhandler = opal_pmix_base_register_handler, + .deregister_errhandler = opal_pmix_base_deregister_handler, + .store_local = pmix_ext_store_local, + .get_nspace = pmix_ext_get_nspace, + .register_jobid = pmix_ext_register_jobid +}; + +static const char *pmix_ext_get_nspace(opal_jobid_t jobid) +{ + opal_pmix_ext_jobid_trkr_t *jptr; + + OPAL_LIST_FOREACH(jptr, &mca_pmix_external_component.jobids, opal_pmix_ext_jobid_trkr_t) { + if (jptr->jobid == jobid) { + return jptr->nspace; + } + } + return NULL; +} + +static void pmix_ext_register_jobid(opal_jobid_t jobid, const char *nspace) +{ + opal_pmix_ext_jobid_trkr_t *jptr; + + /* if we don't already have it, add this to our jobid tracker */ + OPAL_LIST_FOREACH(jptr, &mca_pmix_external_component.jobids, opal_pmix_ext_jobid_trkr_t) { + if (jptr->jobid == jobid) { + return; + } + } + jptr = OBJ_NEW(opal_pmix_ext_jobid_trkr_t); + (void)strncpy(jptr->nspace, nspace, PMIX_MAX_NSLEN); + jptr->jobid = jobid; + opal_list_append(&mca_pmix_external_component.jobids, &jptr->super); +} + +pmix_status_t pmix_ext_convert_opalrc(int rc) +{ + switch (rc) { + case OPAL_ERR_UNPACK_READ_PAST_END_OF_BUFFER: + return PMIX_ERR_UNPACK_READ_PAST_END_OF_BUFFER; + case OPAL_ERR_COMM_FAILURE: + return PMIX_ERR_COMM_FAILURE; + case OPAL_ERR_NOT_IMPLEMENTED: + return PMIX_ERR_NOT_IMPLEMENTED; + case OPAL_ERR_NOT_SUPPORTED: + return PMIX_ERR_NOT_SUPPORTED; + case OPAL_ERR_NOT_FOUND: + return PMIX_ERR_NOT_FOUND; + case OPAL_ERR_SERVER_NOT_AVAIL: + return PMIX_ERR_SERVER_NOT_AVAIL; + + case OPAL_ERR_BAD_PARAM: + return PMIX_ERR_BAD_PARAM; + case OPAL_ERR_OUT_OF_RESOURCE: + return PMIX_ERR_NOMEM; + + case OPAL_ERR_DATA_VALUE_NOT_FOUND: + return PMIX_ERR_DATA_VALUE_NOT_FOUND; + case OPAL_ERR_IN_ERRNO: + return PMIX_ERR_IN_ERRNO; + case OPAL_ERR_UNREACH: + return PMIX_ERR_UNREACH; + case OPAL_ERR_TIMEOUT: + return PMIX_ERR_TIMEOUT; + case OPAL_ERR_PERM: + return PMIX_ERR_NO_PERMISSIONS; + case OPAL_ERR_PACK_MISMATCH: + return PMIX_ERR_PACK_MISMATCH; + case OPAL_ERR_PACK_FAILURE: + return PMIX_ERR_PACK_FAILURE; + + case OPAL_ERR_UNPACK_FAILURE: + return PMIX_ERR_UNPACK_FAILURE; + case OPAL_ERR_UNPACK_INADEQUATE_SPACE: + return PMIX_ERR_UNPACK_INADEQUATE_SPACE; + case OPAL_ERR_TYPE_MISMATCH: + return PMIX_ERR_TYPE_MISMATCH; + case OPAL_ERR_PROC_ENTRY_NOT_FOUND: + return PMIX_ERR_PROC_ENTRY_NOT_FOUND; + case OPAL_ERR_UNKNOWN_DATA_TYPE: + return PMIX_ERR_UNKNOWN_DATA_TYPE; + case OPAL_ERR_WOULD_BLOCK: + return PMIX_ERR_WOULD_BLOCK; + case OPAL_EXISTS: + return PMIX_EXISTS; + + case OPAL_ERR_SILENT: + return PMIX_ERR_SILENT; + case OPAL_ERROR: + return PMIX_ERROR; + case OPAL_SUCCESS: + return PMIX_SUCCESS; + default: + return PMIX_ERROR; + } +} + +int pmix_ext_convert_rc(pmix_status_t rc) +{ + switch (rc) { + case PMIX_ERR_UNPACK_READ_PAST_END_OF_BUFFER: + return OPAL_ERR_UNPACK_READ_PAST_END_OF_BUFFER; + case PMIX_ERR_COMM_FAILURE: + return OPAL_ERR_COMM_FAILURE; + case PMIX_ERR_NOT_IMPLEMENTED: + return OPAL_ERR_NOT_IMPLEMENTED; + case PMIX_ERR_NOT_SUPPORTED: + return OPAL_ERR_NOT_SUPPORTED; + case PMIX_ERR_NOT_FOUND: + return OPAL_ERR_NOT_FOUND; + case PMIX_ERR_SERVER_NOT_AVAIL: + return OPAL_ERR_SERVER_NOT_AVAIL; + + case PMIX_ERR_INVALID_NAMESPACE: + case PMIX_ERR_INVALID_SIZE: + case PMIX_ERR_INVALID_KEYVALP: + case PMIX_ERR_INVALID_NUM_PARSED: + case PMIX_ERR_INVALID_ARGS: + case PMIX_ERR_INVALID_NUM_ARGS: + case PMIX_ERR_INVALID_LENGTH: + case PMIX_ERR_INVALID_VAL_LENGTH: + case PMIX_ERR_INVALID_VAL: + case PMIX_ERR_INVALID_KEY_LENGTH: + case PMIX_ERR_INVALID_KEY: + case PMIX_ERR_INVALID_ARG: + return OPAL_ERR_BAD_PARAM; + case PMIX_ERR_NOMEM: + return OPAL_ERR_OUT_OF_RESOURCE; + case PMIX_ERR_INIT: + return OPAL_ERROR; + + case PMIX_ERR_DATA_VALUE_NOT_FOUND: + return OPAL_ERR_DATA_VALUE_NOT_FOUND; + case PMIX_ERR_OUT_OF_RESOURCE: + return OPAL_ERR_OUT_OF_RESOURCE; + case PMIX_ERR_RESOURCE_BUSY: + return OPAL_ERR_TEMP_OUT_OF_RESOURCE; + case PMIX_ERR_BAD_PARAM: + return OPAL_ERR_BAD_PARAM; + case PMIX_ERR_IN_ERRNO: + return OPAL_ERR_IN_ERRNO; + case PMIX_ERR_UNREACH: + return OPAL_ERR_UNREACH; + case PMIX_ERR_TIMEOUT: + return OPAL_ERR_TIMEOUT; + case PMIX_ERR_NO_PERMISSIONS: + return OPAL_ERR_PERM; + case PMIX_ERR_PACK_MISMATCH: + return OPAL_ERR_PACK_MISMATCH; + case PMIX_ERR_PACK_FAILURE: + return OPAL_ERR_PACK_FAILURE; + + case PMIX_ERR_UNPACK_FAILURE: + return OPAL_ERR_UNPACK_FAILURE; + case PMIX_ERR_UNPACK_INADEQUATE_SPACE: + return OPAL_ERR_UNPACK_INADEQUATE_SPACE; + case PMIX_ERR_TYPE_MISMATCH: + return OPAL_ERR_TYPE_MISMATCH; + case PMIX_ERR_PROC_ENTRY_NOT_FOUND: + return OPAL_ERR_PROC_ENTRY_NOT_FOUND; + case PMIX_ERR_UNKNOWN_DATA_TYPE: + return OPAL_ERR_UNKNOWN_DATA_TYPE; + case PMIX_ERR_WOULD_BLOCK: + return OPAL_ERR_WOULD_BLOCK; + case PMIX_ERR_READY_FOR_HANDSHAKE: + case PMIX_ERR_HANDSHAKE_FAILED: + case PMIX_ERR_INVALID_CRED: + return OPAL_ERR_COMM_FAILURE; + case PMIX_EXISTS: + return OPAL_EXISTS; + + case PMIX_ERR_SILENT: + return OPAL_ERR_SILENT; + case PMIX_ERROR: + return OPAL_ERROR; + case PMIX_SUCCESS: + return OPAL_SUCCESS; + default: + return OPAL_ERROR; + } +} + +void pmix_ext_value_load(pmix_value_t *v, + opal_value_t *kv) +{ + switch(kv->type) { + case OPAL_UNDEF: + v->type = PMIX_UNDEF; + opal_output(0, "TYPE WAS UNDEF"); + break; + case OPAL_BOOL: + v->type = PMIX_BOOL; + memcpy(&(v->data.flag), &kv->data.flag, 1); + break; + case OPAL_BYTE: + v->type = PMIX_BYTE; + memcpy(&(v->data.byte), &kv->data.byte, 1); + break; + case OPAL_STRING: + v->type = PMIX_STRING; + if (NULL != kv->data.string) { + v->data.string = strdup(kv->data.string); + } else { + v->data.string = NULL; + } + break; + case OPAL_SIZE: + v->type = PMIX_SIZE; + v->data.size = (size_t)kv->data.size; + break; + case OPAL_PID: + v->type = PMIX_PID; + memcpy(&(v->data.pid), &kv->data.pid, sizeof(pid_t)); + break; + case OPAL_INT: + v->type = PMIX_INT; + memcpy(&(v->data.integer), &kv->data.integer, sizeof(int)); + break; + case OPAL_INT8: + v->type = PMIX_INT8; + memcpy(&(v->data.int8), &kv->data.int8, 1); + break; + case OPAL_INT16: + v->type = PMIX_INT16; + memcpy(&(v->data.int16), &kv->data.int16, 2); + break; + case OPAL_INT32: + v->type = PMIX_INT32; + memcpy(&(v->data.int32), &kv->data.int32, 4); + break; + case OPAL_INT64: + v->type = PMIX_INT64; + memcpy(&(v->data.int64), &kv->data.int64, 8); + break; + case OPAL_UINT: + v->type = PMIX_UINT; + memcpy(&(v->data.uint), &kv->data.uint, sizeof(int)); + break; + case OPAL_UINT8: + v->type = PMIX_UINT8; + memcpy(&(v->data.uint8), &kv->data.uint8, 1); + break; + case OPAL_UINT16: + v->type = PMIX_UINT16; + memcpy(&(v->data.uint16), &kv->data.uint16, 2); + break; + case OPAL_UINT32: + v->type = PMIX_UINT32; + memcpy(&(v->data.uint32), &kv->data.uint32, 4); + break; + case OPAL_UINT64: + v->type = PMIX_UINT64; + memcpy(&(v->data.uint64), &kv->data.uint64, 8); + break; + case OPAL_FLOAT: + v->type = PMIX_FLOAT; + memcpy(&(v->data.fval), &kv->data.fval, sizeof(float)); + break; + case OPAL_DOUBLE: + v->type = PMIX_DOUBLE; + memcpy(&(v->data.dval), &kv->data.dval, sizeof(double)); + break; + case OPAL_TIMEVAL: + v->type = PMIX_TIMEVAL; + memcpy(&(v->data.tv), &kv->data.tv, sizeof(struct timeval)); + break; + case OPAL_BYTE_OBJECT: + v->type = PMIX_BYTE_OBJECT; + if (NULL != kv->data.bo.bytes) { + v->data.bo.bytes = (char*)malloc(kv->data.bo.size); + memcpy(v->data.bo.bytes, kv->data.bo.bytes, kv->data.bo.size); + v->data.bo.size = (size_t)kv->data.bo.size; + } else { + v->data.bo.bytes = NULL; + v->data.bo.size = 0; + } + break; + default: + /* silence warnings */ + break; + } +} + +int pmix_ext_value_unload(opal_value_t *kv, + const pmix_value_t *v) +{ + int rc=OPAL_SUCCESS; + + + switch(v->type) { + case PMIX_UNDEF: + rc = OPAL_ERR_UNKNOWN_DATA_TYPE; + break; + case PMIX_BOOL: + kv->type = OPAL_BOOL; + memcpy(&kv->data.flag, &(v->data.flag), 1); + break; + case PMIX_BYTE: + kv->type = OPAL_BYTE; + memcpy(&kv->data.byte, &(v->data.byte), 1); + break; + case PMIX_STRING: + kv->type = OPAL_STRING; + if (NULL != v->data.string) { + kv->data.string = strdup(v->data.string); + } + break; + case PMIX_SIZE: + kv->type = OPAL_SIZE; + kv->data.size = (int)v->data.size; + break; + case PMIX_PID: + kv->type = OPAL_PID; + memcpy(&kv->data.pid, &(v->data.pid), sizeof(pid_t)); + break; + case PMIX_INT: + kv->type = OPAL_INT; + memcpy(&kv->data.integer, &(v->data.integer), sizeof(int)); + break; + case PMIX_INT8: + kv->type = OPAL_INT8; + memcpy(&kv->data.int8, &(v->data.int8), 1); + break; + case PMIX_INT16: + kv->type = OPAL_INT16; + memcpy(&kv->data.int16, &(v->data.int16), 2); + break; + case PMIX_INT32: + kv->type = OPAL_INT32; + memcpy(&kv->data.int32, &(v->data.int32), 4); + break; + case PMIX_INT64: + kv->type = OPAL_INT64; + memcpy(&kv->data, &(v->data.int64), 8); + break; + case PMIX_UINT: + kv->type = OPAL_UINT; + memcpy(&kv->data, &(v->data.uint), sizeof(int)); + break; + case PMIX_UINT8: + kv->type = OPAL_UINT8; + memcpy(&kv->data, &(v->data.uint8), 1); + break; + case PMIX_UINT16: + kv->type = OPAL_UINT16; + memcpy(&kv->data, &(v->data.uint16), 2); + break; + case PMIX_UINT32: + kv->type = OPAL_UINT32; + memcpy(&kv->data, &(v->data.uint32), 4); + break; + case PMIX_UINT64: + kv->type = OPAL_UINT64; + memcpy(&kv->data, &(v->data.uint64), 8); + break; + case PMIX_FLOAT: + kv->type = OPAL_FLOAT; + memcpy(&kv->data, &(v->data.fval), sizeof(float)); + break; + case PMIX_DOUBLE: + kv->type = OPAL_DOUBLE; + memcpy(&kv->data, &(v->data.dval), sizeof(double)); + break; + case PMIX_TIMEVAL: + kv->type = OPAL_TIMEVAL; + memcpy(&kv->data, &(v->data.tv), sizeof(struct timeval)); + break; + case PMIX_BYTE_OBJECT: + kv->type = OPAL_BYTE_OBJECT; + if (NULL != v->data.bo.bytes && 0 < v->data.bo.size) { + kv->data.bo.bytes = (uint8_t*)malloc(v->data.bo.size); + memcpy(kv->data.bo.bytes, v->data.bo.bytes, v->data.bo.size); + kv->data.bo.size = (int)v->data.bo.size; + } else { + kv->data.bo.bytes = NULL; + kv->data.bo.size = 0; + } + break; + default: + /* silence warnings */ + rc = OPAL_ERROR; + break; + } + return rc; +} + + +/**** INSTANTIATE INTERNAL CLASSES ****/ +OBJ_CLASS_INSTANCE(opal_pmix_ext_jobid_trkr_t, + opal_list_item_t, + NULL, NULL); + +static void opcon(pmix_ext_opcaddy_t *p) +{ + memset(&p->p, 0, sizeof(pmix_proc_t)); + p->procs = NULL; + p->nprocs = 0; + p->error_procs = NULL; + p->nerror_procs = 0; + p->info = NULL; + p->ninfo = 0; + p->apps = NULL; + p->sz = 0; + p->active = false; + p->opcbfunc = NULL; + p->mdxcbfunc = NULL; + p->valcbfunc = NULL; + p->lkcbfunc = NULL; + p->spcbfunc = NULL; + p->cbdata = NULL; +} +static void opdes(pmix_ext_opcaddy_t *p) +{ + if (NULL != p->procs) { + PMIX_PROC_FREE(p->procs, p->nprocs); + } + if (NULL != p->error_procs) { + PMIX_PROC_FREE(p->error_procs, p->nerror_procs); + } + if (NULL != p->info) { + PMIX_INFO_FREE(p->info, p->sz); + } + if (NULL != p->apps) { + PMIX_APP_FREE(p->apps, p->sz); + } +} +OBJ_CLASS_INSTANCE(pmix_ext_opcaddy_t, + opal_object_t, + opcon, opdes); + +static void ocadcon(pmix_ext_opalcaddy_t *p) +{ + OBJ_CONSTRUCT(&p->procs, opal_list_t); + OBJ_CONSTRUCT(&p->info, opal_list_t); + OBJ_CONSTRUCT(&p->apps, opal_list_t); + p->opcbfunc = NULL; + p->dmdxfunc = NULL; + p->mdxcbfunc = NULL; + p->lkupcbfunc = NULL; + p->spwncbfunc = NULL; + p->cbdata = NULL; + p->odmdxfunc = NULL; + p->ocbdata = NULL; +} +static void ocaddes(pmix_ext_opalcaddy_t *p) +{ + OPAL_LIST_DESTRUCT(&p->procs); + OPAL_LIST_DESTRUCT(&p->info); + OPAL_LIST_DESTRUCT(&p->apps); +} +OBJ_CLASS_INSTANCE(pmix_ext_opalcaddy_t, + opal_object_t, + ocadcon, ocaddes); diff --git a/opal/mca/pmix/external/pmix_ext.h b/opal/mca/pmix/external/pmix_ext.h new file mode 100644 index 00000000000..f5bd04900ad --- /dev/null +++ b/opal/mca/pmix/external/pmix_ext.h @@ -0,0 +1,176 @@ +/* + * Copyright (c) 2014-2015 Intel, Inc. All rights reserved. + * Copyright (c) 2014-2015 Mellanox Technologies, Inc. + * All rights reserved. + * Copyright (c) 2017 Research Organization for Information Science + * and Technology (RIST). All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#ifndef MCA_PMIX_EXTERNAL_H +#define MCA_PMIX_EXTERNAL_H + +#include "opal_config.h" + +#ifdef HAVE_SYS_SOCKET_H +#include +#endif +#ifdef HAVE_SYS_UN_H +#include +#endif + +#include "opal/mca/mca.h" +#include "opal/mca/event/event.h" +#include "opal/util/proc.h" + +#include "opal/mca/pmix/pmix.h" +#include "pmix_server.h" +#include "pmix_server.h" +#include "pmix/pmix_common.h" + +BEGIN_C_DECLS + +typedef struct { + opal_pmix_base_component_t super; + opal_list_t jobids; + bool native_launch; +} mca_pmix_external_component_t; + +OPAL_DECLSPEC extern mca_pmix_external_component_t mca_pmix_external_component; + +OPAL_DECLSPEC extern const opal_pmix_base_module_t opal_pmix_external_module; + +/**** INTERNAL OBJECTS ****/ +typedef struct { + opal_list_item_t super; + opal_jobid_t jobid; + char nspace[PMIX_MAX_NSLEN + 1]; +} opal_pmix_ext_jobid_trkr_t; +OBJ_CLASS_DECLARATION(opal_pmix_ext_jobid_trkr_t); + +typedef struct { + opal_object_t super; + pmix_proc_t p; + pmix_proc_t *procs; + size_t nprocs; + pmix_proc_t *error_procs; + size_t nerror_procs; + pmix_info_t *info; + size_t ninfo; + pmix_app_t *apps; + size_t sz; + volatile bool active; + opal_pmix_op_cbfunc_t opcbfunc; + opal_pmix_modex_cbfunc_t mdxcbfunc; + opal_pmix_value_cbfunc_t valcbfunc; + opal_pmix_lookup_cbfunc_t lkcbfunc; + opal_pmix_spawn_cbfunc_t spcbfunc; + void *cbdata; +} pmix_ext_opcaddy_t; +OBJ_CLASS_DECLARATION(pmix_ext_opcaddy_t); + +typedef struct { + opal_object_t super; + opal_list_t procs; + opal_list_t info; + opal_list_t apps; + pmix_op_cbfunc_t opcbfunc; + pmix_dmodex_response_fn_t dmdxfunc; + pmix_modex_cbfunc_t mdxcbfunc; + pmix_lookup_cbfunc_t lkupcbfunc; + pmix_spawn_cbfunc_t spwncbfunc; + void *cbdata; + opal_pmix_release_cbfunc_t odmdxfunc; + void *ocbdata; +} pmix_ext_opalcaddy_t; +OBJ_CLASS_DECLARATION(pmix_ext_opalcaddy_t); + + +/**** CLIENT FUNCTIONS ****/ +OPAL_MODULE_DECLSPEC int pmix_ext_client_init(void); +OPAL_MODULE_DECLSPEC int pmix_ext_client_finalize(void); +OPAL_MODULE_DECLSPEC int pmix_ext_initialized(void); +OPAL_MODULE_DECLSPEC int pmix_ext_abort(int flag, const char *msg, + opal_list_t *procs); +OPAL_MODULE_DECLSPEC int pmix_ext_commit(void); +OPAL_MODULE_DECLSPEC int pmix_ext_fence(opal_list_t *procs, int collect_data); +OPAL_MODULE_DECLSPEC int pmix_ext_fencenb(opal_list_t *procs, int collect_data, + opal_pmix_op_cbfunc_t cbfunc, void *cbdata); +OPAL_MODULE_DECLSPEC int pmix_ext_put(opal_pmix_scope_t scope, + opal_value_t *val); +OPAL_MODULE_DECLSPEC int pmix_ext_get(const opal_process_name_t *proc, const char *key, + opal_list_t *info, opal_value_t **val); +OPAL_MODULE_DECLSPEC int pmix_ext_getnb(const opal_process_name_t *proc, const char *key, + opal_list_t *info, + opal_pmix_value_cbfunc_t cbfunc, void *cbdata); +OPAL_MODULE_DECLSPEC int pmix_ext_publish(opal_list_t *info); +OPAL_MODULE_DECLSPEC int pmix_ext_publishnb(opal_list_t *info, + opal_pmix_op_cbfunc_t cbfunc, void *cbdata); +OPAL_MODULE_DECLSPEC int pmix_ext_lookup(opal_list_t *data, opal_list_t *info); +OPAL_MODULE_DECLSPEC int pmix_ext_lookupnb(char **keys, opal_list_t *info, + opal_pmix_lookup_cbfunc_t cbfunc, void *cbdata); +OPAL_MODULE_DECLSPEC int pmix_ext_unpublish(char **keys, opal_list_t *info); +OPAL_MODULE_DECLSPEC int pmix_ext_unpublishnb(char **keys, opal_list_t *info, + opal_pmix_op_cbfunc_t cbfunc, void *cbdata); +OPAL_MODULE_DECLSPEC int pmix_ext_spawn(opal_list_t *job_info, opal_list_t *apps, opal_jobid_t *jobid); +OPAL_MODULE_DECLSPEC int pmix_ext_spawnnb(opal_list_t *job_info, opal_list_t *apps, + opal_pmix_spawn_cbfunc_t cbfunc, void *cbdata); +OPAL_MODULE_DECLSPEC int pmix_ext_connect(opal_list_t *procs); +OPAL_MODULE_DECLSPEC int pmix_ext_connectnb(opal_list_t *procs, + opal_pmix_op_cbfunc_t cbfunc, + void *cbdata); +OPAL_MODULE_DECLSPEC int pmix_ext_disconnect(opal_list_t *procs); +OPAL_MODULE_DECLSPEC int pmix_ext_disconnectnb(opal_list_t *procs, + opal_pmix_op_cbfunc_t cbfunc, + void *cbdata); +OPAL_MODULE_DECLSPEC int pmix_ext_resolve_peers(const char *nodename, opal_jobid_t jobid, + opal_list_t *procs); +OPAL_MODULE_DECLSPEC int pmix_ext_resolve_nodes(opal_jobid_t jobid, char **nodelist); + +/**** COMMON FUNCTIONS ****/ +OPAL_MODULE_DECLSPEC int pmix_ext_store_local(const opal_process_name_t *proc, + opal_value_t *val); + +/**** SERVER SOUTHBOUND FUNCTIONS ****/ +OPAL_MODULE_DECLSPEC int pmix_ext_server_init(opal_pmix_server_module_t *module, + opal_list_t *info); +OPAL_MODULE_DECLSPEC int pmix_ext_server_finalize(void); +OPAL_MODULE_DECLSPEC int pmix_ext_server_gen_regex(const char *input, char **regex); +OPAL_MODULE_DECLSPEC int pmix_ext_server_gen_ppn(const char *input, char **ppn); +OPAL_MODULE_DECLSPEC int pmix_ext_server_register_nspace(opal_jobid_t jobid, + int nlocalprocs, + opal_list_t *info, + opal_pmix_op_cbfunc_t cbfunc, + void *cbdata); +OPAL_MODULE_DECLSPEC void pmix_ext_server_deregister_nspace(opal_jobid_t jobid); +OPAL_MODULE_DECLSPEC int pmix_ext_server_register_client(const opal_process_name_t *proc, + uid_t uid, gid_t gid, + void *server_object, + opal_pmix_op_cbfunc_t cbfunc, + void *cbdata); +OPAL_MODULE_DECLSPEC void pmix_ext_server_deregister_client(const opal_process_name_t *proc); +OPAL_MODULE_DECLSPEC int pmix_ext_server_setup_fork(const opal_process_name_t *proc, char ***env); +OPAL_MODULE_DECLSPEC int pmix_ext_server_dmodex(const opal_process_name_t *proc, + opal_pmix_modex_cbfunc_t cbfunc, void *cbdata); +OPAL_MODULE_DECLSPEC int pmix_ext_server_notify_error(int status, + opal_list_t *procs, + opal_list_t *error_procs, + opal_list_t *info, + opal_pmix_op_cbfunc_t cbfunc, void *cbdata); + + +/**** COMPONENT UTILITY FUNCTIONS ****/ +OPAL_MODULE_DECLSPEC pmix_status_t pmix_ext_convert_opalrc(int rc); +OPAL_MODULE_DECLSPEC int pmix_ext_convert_rc(pmix_status_t rc); +OPAL_MODULE_DECLSPEC void pmix_ext_value_load(pmix_value_t *v, + opal_value_t *kv); +OPAL_MODULE_DECLSPEC int pmix_ext_value_unload(opal_value_t *kv, + const pmix_value_t *v); + +END_C_DECLS + +#endif /* MCA_PMIX_EXTERNAL_H */ diff --git a/opal/mca/pmix/external/pmix_ext_client.c b/opal/mca/pmix/external/pmix_ext_client.c new file mode 100644 index 00000000000..87cbb96579e --- /dev/null +++ b/opal/mca/pmix/external/pmix_ext_client.c @@ -0,0 +1,1311 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ +/* + * Copyright (c) 2014-2015 Intel, Inc. All rights reserved. + * Copyright (c) 2014-2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. + * Copyright (c) 2014 Mellanox Technologies, Inc. + * All rights reserved. + * Copyright (c) 2016 IBM Corporation. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include "opal_config.h" +#include "opal/constants.h" +#include "opal/types.h" + +#ifdef HAVE_STRING_H +#include +#endif +#ifdef HAVE_UNISTD_H +#include +#endif + +#include "opal/hash_string.h" +#include "opal/util/argv.h" +#include "opal/util/proc.h" + +#include "opal/mca/pmix/base/base.h" +#include "pmix_ext.h" +#include "pmix.h" + +static pmix_proc_t my_proc; +static char *dbgvalue=NULL; +static int errhdler_ref = 0; + +static void release_cbfunc(void *cbdata) +{ + pmix_ext_opalcaddy_t *cd = (pmix_ext_opalcaddy_t*)cbdata; + OBJ_RELEASE(cd); +} +static void myerr(pmix_status_t status, + pmix_proc_t procs[], size_t nprocs, + pmix_info_t info[], size_t ninfo) +{ + int rc; + opal_namelist_t *nm; + opal_value_t *iptr; + size_t n; + pmix_ext_opalcaddy_t *cd; + + /* convert the incoming status */ + rc = pmix_ext_convert_rc(status); + + /* setup the caddy */ + cd = OBJ_NEW(pmix_ext_opalcaddy_t); + + /* convert the array of procs */ + for (n=0; n < nprocs; n++) { + nm = OBJ_NEW(opal_namelist_t); + if (OPAL_SUCCESS != (rc = opal_convert_string_to_jobid(&nm->name.jobid, procs[n].nspace))) { + OPAL_ERROR_LOG(rc); + OBJ_RELEASE(cd); + return; + } + nm->name.vpid = procs[n].rank; + opal_list_append(&cd->procs, &nm->super); + } + + /* convert the array of info */ + for (n=0; n < ninfo; n++) { + iptr = OBJ_NEW(opal_value_t); + iptr->key = strdup(info[n].key); + pmix_ext_value_unload(iptr, &info[n].value); + opal_list_append(&cd->info, &iptr->super); + } + + /* call the base errhandler */ + opal_pmix_base_errhandler(rc, &cd->procs, &cd->info, release_cbfunc, cd); +} + +static void errreg_cbfunc (pmix_status_t status, + int errhandler_ref, + void *cbdata) +{ + errhdler_ref = errhandler_ref; + opal_output_verbose(5, opal_pmix_base_framework.framework_output, + "PMIX client errreg_cbfunc - error handler registered status=%d, reference=%d", + status, errhandler_ref); +} + +int pmix_ext_client_init(void) +{ + opal_process_name_t pname; + pmix_status_t rc; + int dbg; + opal_pmix_ext_jobid_trkr_t *job; + + opal_output_verbose(1, opal_pmix_base_framework.framework_output, + "PMIx_client init"); + + if (0 < (dbg = opal_output_get_verbosity(opal_pmix_base_framework.framework_output))) { + asprintf(&dbgvalue, "PMIX_DEBUG=%d", dbg); + putenv(dbgvalue); + } + rc = PMIx_Init(&my_proc); + if (PMIX_SUCCESS != rc) { + return pmix_ext_convert_rc(rc); + } + + /* store our jobid and rank */ + if (NULL != getenv(OPAL_MCA_PREFIX"orte_launch")) { + /* if we were launched by the OMPI RTE, then + * the jobid is in a special format - so get it */ + mca_pmix_external_component.native_launch = true; + opal_convert_string_to_jobid(&pname.jobid, my_proc.nspace); + } else { + /* we were launched by someone else, so make the + * jobid just be the hash of the nspace */ + OPAL_HASH_STR(my_proc.nspace, pname.jobid); + /* keep it from being negative */ + pname.jobid &= ~(0x8000); + } + /* insert this into our list of jobids - it will be the + * first, and so we'll check it first */ + job = OBJ_NEW(opal_pmix_ext_jobid_trkr_t); + (void)strncpy(job->nspace, my_proc.nspace, PMIX_MAX_NSLEN); + job->jobid = pname.jobid; + opal_list_append(&mca_pmix_external_component.jobids, &job->super); + + pname.vpid = my_proc.rank; + opal_proc_set_name(&pname); + + /* register the errhandler */ + PMIx_Register_errhandler(NULL, 0, myerr, errreg_cbfunc, NULL ); + return OPAL_SUCCESS; + +} + +int pmix_ext_client_finalize(void) +{ + pmix_status_t rc; + + opal_output_verbose(1, opal_pmix_base_framework.framework_output, + "PMIx_client finalize"); + + /* deregister the errhandler */ + PMIx_Deregister_errhandler(errhdler_ref, NULL, NULL); + + rc = PMIx_Finalize(); + + return pmix_ext_convert_rc(rc); +} + +int pmix_ext_initialized(void) +{ + pmix_status_t rc; + + opal_output_verbose(1, opal_pmix_base_framework.framework_output, + "PMIx_client initialized"); + + rc = PMIx_Initialized(); + return pmix_ext_convert_rc(rc); +} + +int pmix_ext_abort(int flag, const char *msg, + opal_list_t *procs) +{ + pmix_status_t rc; + pmix_proc_t *parray=NULL; + size_t n, cnt=0; + opal_namelist_t *ptr; + opal_pmix_ext_jobid_trkr_t *job, *jptr; + + opal_output_verbose(1, opal_pmix_base_framework.framework_output, + "PMIx_client abort"); + + /* convert the list of procs to an array + * of pmix_proc_t */ + if (NULL != procs && 0 < (cnt = opal_list_get_size(procs))) { + PMIX_PROC_CREATE(parray, cnt); + n=0; + OPAL_LIST_FOREACH(ptr, procs, opal_namelist_t) { + /* look thru our list of jobids and find the + * corresponding nspace */ + job = NULL; + OPAL_LIST_FOREACH(jptr, &mca_pmix_external_component.jobids, opal_pmix_ext_jobid_trkr_t) { + if (jptr->jobid == ptr->name.jobid) { + job = jptr; + break; + } + } + if (NULL == job) { + return OPAL_ERR_NOT_FOUND; + } + (void)strncpy(parray[n].nspace, job->nspace, PMIX_MAX_NSLEN); + parray[n].rank = ptr->name.vpid; + ++n; + } + } + + /* call the library abort */ + rc = PMIx_Abort(flag, msg, parray, cnt); + + /* release the array */ + PMIX_PROC_FREE(parray, cnt); + + return pmix_ext_convert_rc(rc); +} + +int pmix_ext_store_local(const opal_process_name_t *proc, opal_value_t *val) +{ + pmix_value_t kv; + pmix_status_t rc; + pmix_proc_t p; + opal_pmix_ext_jobid_trkr_t *job, *jptr; + + if (NULL != proc) { + /* look thru our list of jobids and find the + * corresponding nspace */ + job = NULL; + OPAL_LIST_FOREACH(jptr, &mca_pmix_external_component.jobids, opal_pmix_ext_jobid_trkr_t) { + if (jptr->jobid == proc->jobid) { + job = jptr; + break; + } + } + if (NULL == job) { + OPAL_ERROR_LOG(OPAL_ERR_NOT_FOUND); + return OPAL_ERR_NOT_FOUND; + } + (void)strncpy(p.nspace, job->nspace, PMIX_MAX_NSLEN); + p.rank = proc->vpid; + } else { + /* use our name */ + (void)strncpy(p.nspace, my_proc.nspace, PMIX_MAX_NSLEN); + p.rank = OPAL_PROC_MY_NAME.vpid; + } + + PMIX_VALUE_CONSTRUCT(&kv); + pmix_ext_value_load(&kv, val); + + rc = PMIx_Store_internal(&p, val->key, &kv); + PMIX_VALUE_DESTRUCT(&kv); + + return pmix_ext_convert_rc(rc); +} + +int pmix_ext_commit(void) +{ + pmix_status_t rc; + + rc = PMIx_Commit(); + return pmix_ext_convert_rc(rc); +} + +static void opcbfunc(pmix_status_t status, void *cbdata) +{ + pmix_ext_opcaddy_t *op = (pmix_ext_opcaddy_t*)cbdata; + + if (NULL != op->opcbfunc) { + op->opcbfunc(pmix_ext_convert_rc(status), op->cbdata); + } + OBJ_RELEASE(op); +} + +int pmix_ext_fence(opal_list_t *procs, int collect_data) +{ + pmix_status_t rc; + pmix_proc_t *parray=NULL; + size_t n, cnt=0; + opal_namelist_t *ptr; + pmix_info_t info, *iptr; + opal_pmix_ext_jobid_trkr_t *job, *jptr; + + opal_output_verbose(1, opal_pmix_base_framework.framework_output, + "PMIx_client fence"); + + /* convert the list of procs to an array + * of pmix_proc_t */ + if (NULL != procs && 0 < (cnt = opal_list_get_size(procs))) { + PMIX_PROC_CREATE(parray, cnt); + n=0; + OPAL_LIST_FOREACH(ptr, procs, opal_namelist_t) { + /* look thru our list of jobids and find the + * corresponding nspace */ + job = NULL; + OPAL_LIST_FOREACH(jptr, &mca_pmix_external_component.jobids, opal_pmix_ext_jobid_trkr_t) { + if (jptr->jobid == ptr->name.jobid) { + job = jptr; + break; + } + } + if (NULL == job) { + return OPAL_ERR_NOT_FOUND; + } + (void)strncpy(parray[n].nspace, job->nspace, PMIX_MAX_NSLEN); + parray[n].rank = ptr->name.vpid; + ++n; + } + } + if (collect_data) { + PMIX_INFO_CONSTRUCT(&info); + (void)strncpy(info.key, PMIX_COLLECT_DATA, PMIX_MAX_KEYLEN); + info.value.type = PMIX_BOOL; + info.value.data.flag = true; + iptr = &info; + n = 1; + } else { + iptr = NULL; + n = 0; + } + + /* call the library function */ + rc = PMIx_Fence(parray, cnt, iptr, n); + + /* release the array */ + PMIX_PROC_FREE(parray, cnt); + if (NULL != iptr) { + PMIX_INFO_DESTRUCT(&info); + } + + return pmix_ext_convert_rc(rc); + +} + +int pmix_ext_fencenb(opal_list_t *procs, int collect_data, + opal_pmix_op_cbfunc_t cbfunc, void *cbdata) +{ + pmix_status_t rc; + pmix_proc_t *parray=NULL; + size_t n, cnt=0; + opal_namelist_t *ptr; + pmix_ext_opcaddy_t *op; + pmix_info_t info, *iptr; + opal_pmix_ext_jobid_trkr_t *job, *jptr; + + opal_output_verbose(1, opal_pmix_base_framework.framework_output, + "PMIx_client fence_nb"); + + /* convert the list of procs to an array + * of pmix_proc_t */ + if (NULL != procs && 0 < (cnt = opal_list_get_size(procs))) { + PMIX_PROC_CREATE(parray, cnt); + n=0; + OPAL_LIST_FOREACH(ptr, procs, opal_namelist_t) { + /* look thru our list of jobids and find the + * corresponding nspace */ + job = NULL; + OPAL_LIST_FOREACH(jptr, &mca_pmix_external_component.jobids, opal_pmix_ext_jobid_trkr_t) { + if (jptr->jobid == ptr->name.jobid) { + job = jptr; + break; + } + } + if (NULL == job) { + return OPAL_ERR_NOT_FOUND; + } + (void)strncpy(parray[n].nspace, job->nspace, PMIX_MAX_NSLEN); + parray[n].rank = ptr->name.vpid; + ++n; + } + } + + if (collect_data) { + PMIX_INFO_CONSTRUCT(&info); + (void)strncpy(info.key, PMIX_COLLECT_DATA, PMIX_MAX_KEYLEN); + info.value.type = PMIX_BOOL; + info.value.data.flag = true; + iptr = &info; + n = 1; + } else { + iptr = NULL; + n = 0; + } + + /* create the caddy */ + op = OBJ_NEW(pmix_ext_opcaddy_t); + op->opcbfunc = cbfunc; + op->cbdata = cbdata; + op->procs = parray; + op->nprocs = cnt; + + /* call the library function */ + rc = PMIx_Fence_nb(parray, cnt, iptr, n, opcbfunc, op); + if (PMIX_SUCCESS != rc) { + OBJ_RELEASE(op); + } + + return pmix_ext_convert_rc(rc); + +} + +int pmix_ext_put(opal_pmix_scope_t scope, + opal_value_t *val) +{ + pmix_value_t kv; + pmix_status_t rc; + + opal_output_verbose(1, opal_pmix_base_framework.framework_output, + "PMIx_client put"); + + PMIX_VALUE_CONSTRUCT(&kv); + pmix_ext_value_load(&kv, val); + + rc = PMIx_Put(scope, val->key, &kv); + PMIX_VALUE_DESTRUCT(&kv); + return pmix_ext_convert_rc(rc); +} + +int pmix_ext_get(const opal_process_name_t *proc, const char *key, + opal_list_t *info, opal_value_t **val) +{ + int ret; + pmix_value_t *kv; + pmix_status_t rc; + pmix_proc_t p, *pptr; + size_t ninfo, n; + pmix_info_t *pinfo; + opal_value_t *ival; + opal_pmix_ext_jobid_trkr_t *job, *jptr; + + opal_output_verbose(1, opal_pmix_base_framework.framework_output, + "%s PMIx_client get on proc %s key %s", + OPAL_NAME_PRINT(OPAL_PROC_MY_NAME), + (NULL == proc) ? "NULL" : OPAL_NAME_PRINT(*proc), key); + + /* prep default response */ + *val = NULL; + if (NULL != proc) { + /* look thru our list of jobids and find the + * corresponding nspace */ + job = NULL; + OPAL_LIST_FOREACH(jptr, &mca_pmix_external_component.jobids, opal_pmix_ext_jobid_trkr_t) { + if (jptr->jobid == proc->jobid) { + job = jptr; + break; + } + } + if (NULL == job) { + return OPAL_ERR_NOT_FOUND; + } + (void)strncpy(p.nspace, job->nspace, PMIX_MAX_NSLEN); + p.rank = proc->vpid; + pptr = &p; + } else { + /* if they are asking for our jobid, then return it */ + if (0 == strcmp(key, OPAL_PMIX_JOBID)) { + (*val) = OBJ_NEW(opal_value_t); + (*val)->type = OPAL_UINT32; + (*val)->data.uint32 = OPAL_PROC_MY_NAME.jobid; + return OPAL_SUCCESS; + } else if (0 == strcmp(key, OPAL_PMIX_RANK)) { + (*val) = OBJ_NEW(opal_value_t); + (*val)->type = OPAL_INT; + (*val)->data.integer = my_proc.rank; + return OPAL_SUCCESS; + } + pptr = NULL; + } + + if (NULL != info) { + ninfo = opal_list_get_size(info); + if (0 < ninfo) { + PMIX_INFO_CREATE(pinfo, ninfo); + n=0; + OPAL_LIST_FOREACH(ival, info, opal_value_t) { + (void)strncpy(pinfo[n].key, ival->key, PMIX_MAX_KEYLEN); + pmix_ext_value_load(&pinfo[n].value, ival); + } + } else { + pinfo = NULL; + } + } else { + pinfo = NULL; + ninfo = 0; + } + + /* pass the request down */ + rc = PMIx_Get(pptr, key, pinfo, ninfo, &kv); + if (PMIX_SUCCESS == rc) { + if (NULL == kv) { + ret = OPAL_SUCCESS; + } else { + *val = OBJ_NEW(opal_value_t); + ret = pmix_ext_value_unload(*val, kv); + PMIX_VALUE_FREE(kv, 1); + } + } else { + ret = pmix_ext_convert_rc(rc); + } + PMIX_INFO_FREE(pinfo, ninfo); + return ret; +} + +static void val_cbfunc(pmix_status_t status, + pmix_value_t *kv, void *cbdata) +{ + pmix_ext_opcaddy_t *op = (pmix_ext_opcaddy_t*)cbdata; + int rc; + opal_value_t val, *v=NULL; + + rc = pmix_ext_convert_opalrc(status); + if (PMIX_SUCCESS == status && NULL != kv) { + rc = pmix_ext_value_unload(&val, kv); + v = &val; + } + + if (NULL != op->valcbfunc) { + op->valcbfunc(rc, v, op->cbdata); + } + OBJ_RELEASE(op); +} + +int pmix_ext_getnb(const opal_process_name_t *proc, const char *key, + opal_list_t *info, + opal_pmix_value_cbfunc_t cbfunc, void *cbdata) +{ + pmix_ext_opcaddy_t *op; + pmix_status_t rc; + size_t n; + opal_value_t *ival; + opal_pmix_ext_jobid_trkr_t *job, *jptr; + + opal_output_verbose(1, opal_pmix_base_framework.framework_output, + "%s PMIx_client get_nb on proc %s key %s", + OPAL_NAME_PRINT(OPAL_PROC_MY_NAME), + (NULL == proc) ? "NULL" : OPAL_NAME_PRINT(*proc), key); + + /* create the caddy */ + op = OBJ_NEW(pmix_ext_opcaddy_t); + op->valcbfunc = cbfunc; + op->cbdata = cbdata; + + if (NULL != proc) { + /* look thru our list of jobids and find the + * corresponding nspace */ + job = NULL; + OPAL_LIST_FOREACH(jptr, &mca_pmix_external_component.jobids, opal_pmix_ext_jobid_trkr_t) { + if (jptr->jobid == proc->jobid) { + job = jptr; + break; + } + } + if (NULL == job) { + return OPAL_ERR_NOT_FOUND; + } + (void)strncpy(op->p.nspace, job->nspace, PMIX_MAX_NSLEN); + op->p.rank = proc->vpid; + } else { + (void)strncpy(op->p.nspace, my_proc.nspace, PMIX_MAX_NSLEN); + op->p.rank = PMIX_RANK_WILDCARD; + } + + if (NULL != info) { + op->sz = opal_list_get_size(info); + if (0 < op->sz) { + PMIX_INFO_CREATE(op->info, op->sz); + n=0; + OPAL_LIST_FOREACH(ival, info, opal_value_t) { + (void)strncpy(op->info[n].key, ival->key, PMIX_MAX_KEYLEN); + pmix_ext_value_load(&op->info[n].value, ival); + } + } + } + + /* call the library function */ + rc = PMIx_Get_nb(&op->p, key, op->info, op->sz, val_cbfunc, op); + if (PMIX_SUCCESS != rc) { + OBJ_RELEASE(op); + } + + return pmix_ext_convert_rc(rc); +} + +int pmix_ext_publish(opal_list_t *info) +{ + pmix_info_t *pinfo; + pmix_status_t ret; + opal_value_t *iptr; + size_t sz, n; + + opal_output_verbose(1, opal_pmix_base_framework.framework_output, + "PMIx_client publish"); + + if (NULL == info) { + return OPAL_ERR_BAD_PARAM; + } + + sz = opal_list_get_size(info); + if (0 < sz) { + PMIX_INFO_CREATE(pinfo, sz); + n=0; + OPAL_LIST_FOREACH(iptr, info, opal_value_t) { + (void)strncpy(pinfo[n].key, iptr->key, PMIX_MAX_KEYLEN); + pmix_ext_value_load(&pinfo[n].value, iptr); + ++n; + } + } else { + pinfo = NULL; + } + + ret = PMIx_Publish(pinfo, sz); + + return pmix_ext_convert_rc(ret); +} + +int pmix_ext_publishnb(opal_list_t *info, + opal_pmix_op_cbfunc_t cbfunc, void *cbdata) +{ + pmix_status_t ret; + opal_value_t *iptr; + size_t n; + pmix_ext_opcaddy_t *op; + + opal_output_verbose(1, opal_pmix_base_framework.framework_output, + "PMIx_client publish_nb"); + + if (NULL == info) { + return OPAL_ERR_BAD_PARAM; + } + + /* create the caddy */ + op = OBJ_NEW(pmix_ext_opcaddy_t); + op->opcbfunc = cbfunc; + op->cbdata = cbdata; + + op->sz = opal_list_get_size(info); + if (0 < op->sz) { + PMIX_INFO_CREATE(op->info, op->sz); + n=0; + OPAL_LIST_FOREACH(iptr, info, opal_value_t) { + (void)strncpy(op->info[n].key, iptr->key, PMIX_MAX_KEYLEN); + pmix_ext_value_load(&op->info[n].value, iptr); + ++n; + } + } + + ret = PMIx_Publish_nb(op->info, op->sz, opcbfunc, op); + + return pmix_ext_convert_rc(ret); +} + +int pmix_ext_lookup(opal_list_t *data, opal_list_t *info) +{ + pmix_pdata_t *pdata; + pmix_info_t *pinfo; + size_t sz, ninfo, n; + int rc; + pmix_status_t ret; + opal_pmix_pdata_t *d; + opal_value_t *iptr; + opal_pmix_ext_jobid_trkr_t *job, *jptr; + + opal_output_verbose(1, opal_pmix_base_framework.framework_output, + "PMIx_client lookup"); + + if (NULL == data) { + return OPAL_ERR_BAD_PARAM; + } + + sz = opal_list_get_size(data); + PMIX_PDATA_CREATE(pdata, sz); + n=0; + OPAL_LIST_FOREACH(d, data, opal_pmix_pdata_t) { + (void)strncpy(pdata[n++].key, d->value.key, PMIX_MAX_KEYLEN); + } + + if (NULL != info) { + ninfo = opal_list_get_size(info); + PMIX_INFO_CREATE(pinfo, ninfo); + n=0; + OPAL_LIST_FOREACH(iptr, info, opal_value_t) { + (void)strncpy(pinfo[n++].key, iptr->key, PMIX_MAX_KEYLEN); + pmix_ext_value_load(&pinfo[n].value, iptr); + ++n; + } + } else { + pinfo = NULL; + ninfo = 0; + } + + ret = PMIx_Lookup(pdata, sz, pinfo, ninfo); + PMIX_INFO_FREE(pinfo, ninfo); + + if (PMIX_SUCCESS == ret) { + /* transfer the data back */ + n=0; + OPAL_LIST_FOREACH(d, data, opal_pmix_pdata_t) { + if (mca_pmix_external_component.native_launch) { + /* if we were launched by the OMPI RTE, then + * the jobid is in a special format - so get it */ + opal_convert_string_to_jobid(&d->proc.jobid, pdata[n].proc.nspace); + } else { + /* we were launched by someone else, so make the + * jobid just be the hash of the nspace */ + OPAL_HASH_STR(pdata[n].proc.nspace, d->proc.jobid); + } + /* if we don't already have it, add this to our jobid tracker */ + job = NULL; + OPAL_LIST_FOREACH(jptr, &mca_pmix_external_component.jobids, opal_pmix_ext_jobid_trkr_t) { + if (jptr->jobid == d->proc.jobid) { + job = jptr; + break; + } + } + if (NULL == job) { + job = OBJ_NEW(opal_pmix_ext_jobid_trkr_t); + (void)strncpy(job->nspace, pdata[n].proc.nspace, PMIX_MAX_NSLEN); + job->jobid = d->proc.jobid; + opal_list_append(&mca_pmix_external_component.jobids, &job->super); + } + if (PMIX_RANK_WILDCARD == pdata[n].proc.rank) { + d->proc.vpid = OPAL_VPID_WILDCARD; + } else { + d->proc.vpid = pdata[n].proc.rank; + } + rc = pmix_ext_value_unload(&d->value, &pdata[n].value); + if (OPAL_SUCCESS != rc) { + OPAL_ERROR_LOG(rc); + PMIX_PDATA_FREE(pdata, sz); + return OPAL_ERR_BAD_PARAM; + } + ++n; + } + } + + return pmix_ext_convert_rc(ret); +} + +static void lk_cbfunc(pmix_status_t status, + pmix_pdata_t data[], size_t ndata, + void *cbdata) +{ + pmix_ext_opcaddy_t *op = (pmix_ext_opcaddy_t*)cbdata; + opal_pmix_pdata_t *d; + opal_list_t results, *r = NULL; + int rc; + size_t n; + opal_pmix_ext_jobid_trkr_t *job, *jptr; + + if (NULL == op->lkcbfunc) { + OBJ_RELEASE(op); + return; + } + + rc = pmix_ext_convert_rc(status); + if (OPAL_SUCCESS == rc) { + OBJ_CONSTRUCT(&results, opal_list_t); + for (n=0; n < ndata; n++) { + d = OBJ_NEW(opal_pmix_pdata_t); + opal_list_append(&results, &d->super); + if (mca_pmix_external_component.native_launch) { + /* if we were launched by the OMPI RTE, then + * the jobid is in a special format - so get it */ + opal_convert_string_to_jobid(&d->proc.jobid, data[n].proc.nspace); + } else { + /* we were launched by someone else, so make the + * jobid just be the hash of the nspace */ + OPAL_HASH_STR(data[n].proc.nspace, d->proc.jobid); + } + /* if we don't already have it, add this to our jobid tracker */ + job = NULL; + OPAL_LIST_FOREACH(jptr, &mca_pmix_external_component.jobids, opal_pmix_ext_jobid_trkr_t) { + if (jptr->jobid == d->proc.jobid) { + job = jptr; + break; + } + } + if (NULL == job) { + job = OBJ_NEW(opal_pmix_ext_jobid_trkr_t); + (void)strncpy(job->nspace, data[n].proc.nspace, PMIX_MAX_NSLEN); + job->jobid = d->proc.jobid; + opal_list_append(&mca_pmix_external_component.jobids, &job->super); + } + if (PMIX_RANK_WILDCARD == data[n].proc.rank) { + d->proc.vpid = OPAL_VPID_WILDCARD; + } else { + d->proc.vpid = data[n].proc.rank; + } + d->value.key = strdup(data[n].key); + rc = pmix_ext_value_unload(&d->value, &data[n].value); + if (OPAL_SUCCESS != rc) { + rc = OPAL_ERR_BAD_PARAM; + OPAL_ERROR_LOG(rc); + goto release; + } + } + r = &results; + } + release: + /* execute the callback */ + op->lkcbfunc(rc, r, op->cbdata); + + if (NULL != r) { + OPAL_LIST_DESTRUCT(&results); + } + OBJ_RELEASE(op); +} + +int pmix_ext_lookupnb(char **keys, opal_list_t *info, + opal_pmix_lookup_cbfunc_t cbfunc, void *cbdata) +{ + pmix_status_t ret; + pmix_ext_opcaddy_t *op; + opal_value_t *iptr; + size_t n; + + + opal_output_verbose(1, opal_pmix_base_framework.framework_output, + "PMIx_client lookup_nb"); + + /* create the caddy */ + op = OBJ_NEW(pmix_ext_opcaddy_t); + op->lkcbfunc = cbfunc; + op->cbdata = cbdata; + + if (NULL != info) { + op->sz = opal_list_get_size(info); + if (0 < op->sz) { + PMIX_INFO_CREATE(op->info, op->sz); + n=0; + OPAL_LIST_FOREACH(iptr, info, opal_value_t) { + (void)strncpy(op->info[n].key, iptr->key, PMIX_MAX_KEYLEN); + pmix_ext_value_load(&op->info[n].value, iptr); + ++n; + } + } + } + + ret = PMIx_Lookup_nb(keys, op->info, op->sz, lk_cbfunc, op); + + return pmix_ext_convert_rc(ret); +} + +int pmix_ext_unpublish(char **keys, opal_list_t *info) +{ + pmix_status_t ret; + size_t ninfo, n; + pmix_info_t *pinfo; + opal_value_t *iptr; + + if (NULL != info) { + ninfo = opal_list_get_size(info); + PMIX_INFO_CREATE(pinfo, ninfo); + n=0; + OPAL_LIST_FOREACH(iptr, info, opal_value_t) { + (void)strncpy(pinfo[n++].key, iptr->key, PMIX_MAX_KEYLEN); + pmix_ext_value_load(&pinfo[n].value, iptr); + ++n; + } + } else { + pinfo = NULL; + ninfo = 0; + } + + ret = PMIx_Unpublish(keys, pinfo, ninfo); + PMIX_INFO_FREE(pinfo, ninfo); + + return pmix_ext_convert_rc(ret); +} + +int pmix_ext_unpublishnb(char **keys, opal_list_t *info, + opal_pmix_op_cbfunc_t cbfunc, void *cbdata) +{ + pmix_status_t ret; + pmix_ext_opcaddy_t *op; + opal_value_t *iptr; + size_t n; + + /* create the caddy */ + op = OBJ_NEW(pmix_ext_opcaddy_t); + op->opcbfunc = cbfunc; + op->cbdata = cbdata; + + if (NULL != info) { + op->sz = opal_list_get_size(info); + if (0 < op->sz) { + PMIX_INFO_CREATE(op->info, op->sz); + n=0; + OPAL_LIST_FOREACH(iptr, info, opal_value_t) { + (void)strncpy(op->info[n].key, iptr->key, PMIX_MAX_KEYLEN); + pmix_ext_value_load(&op->info[n].value, iptr); + ++n; + } + } + } + + ret = PMIx_Unpublish_nb(keys, op->info, op->sz, opcbfunc, op); + + return pmix_ext_convert_rc(ret); +} + +int pmix_ext_spawn(opal_list_t *job_info, opal_list_t *apps, opal_jobid_t *jobid) +{ + pmix_status_t ret; + pmix_info_t *pinfo = NULL; + pmix_app_t *papps; + size_t napps, n, m, ninfo = 0; + char nspace[PMIX_MAX_NSLEN+1]; + opal_value_t *info; + opal_pmix_app_t *app; + opal_pmix_ext_jobid_trkr_t *job; + + if (NULL != job_info && 0 < (ninfo = opal_list_get_size(job_info))) { + PMIX_INFO_CREATE(pinfo, ninfo); + n=0; + OPAL_LIST_FOREACH(info, job_info, opal_value_t) { + (void)strncpy(pinfo[n].key, info->key, PMIX_MAX_KEYLEN); + pmix_ext_value_load(&pinfo[n].value, info); + ++n; + } + } + + napps = opal_list_get_size(apps); + PMIX_APP_CREATE(papps, napps); + n=0; + OPAL_LIST_FOREACH(app, apps, opal_pmix_app_t) { + papps[n].cmd = strdup(app->cmd); + papps[n].argc = app->argc; + papps[n].argv = opal_argv_copy(app->argv); + papps[n].env = opal_argv_copy(app->env); + papps[n].maxprocs = app->maxprocs; + if (0 < (papps[n].ninfo = opal_list_get_size(&app->info))) { + PMIX_INFO_CREATE(papps[n].info, papps[n].ninfo); + m=0; + OPAL_LIST_FOREACH(info, &app->info, opal_value_t) { + (void)strncpy(papps[n].info[m].key, info->key, PMIX_MAX_KEYLEN); + pmix_ext_value_load(&papps[n].info[m].value, info); + ++m; + } + } + ++n; + } + + ret = PMIx_Spawn(pinfo, ninfo, papps, napps, nspace); + if (PMIX_SUCCESS == ret) { + if (mca_pmix_external_component.native_launch) { + /* if we were launched by the OMPI RTE, then + * the jobid is in a special format - so get it */ + opal_convert_string_to_jobid(jobid, nspace); + } else { + /* we were launched by someone else, so make the + * jobid just be the hash of the nspace */ + OPAL_HASH_STR(nspace, *jobid); + } + /* add this to our jobid tracker */ + job = OBJ_NEW(opal_pmix_ext_jobid_trkr_t); + (void)strncpy(job->nspace, nspace, PMIX_MAX_NSLEN); + job->jobid = *jobid; + opal_list_append(&mca_pmix_external_component.jobids, &job->super); + } + PMIX_APP_FREE(papps, napps); + + return pmix_ext_convert_rc(ret); +} + +static void spcbfunc(pmix_status_t status, + char *nspace, void *cbdata) +{ + pmix_ext_opcaddy_t *op = (pmix_ext_opcaddy_t*)cbdata; + int rc; + opal_jobid_t jobid=OPAL_JOBID_INVALID; + opal_pmix_ext_jobid_trkr_t *job; + + rc = pmix_ext_convert_rc(status); + if (PMIX_SUCCESS == status) { + if (mca_pmix_external_component.native_launch) { + /* if we were launched by the OMPI RTE, then + * the jobid is in a special format - so get it */ + opal_convert_string_to_jobid(&jobid, nspace); + } else { + /* we were launched by someone else, so make the + * jobid just be the hash of the nspace */ + OPAL_HASH_STR(nspace, jobid); + } + /* add this to our jobid tracker */ + job = OBJ_NEW(opal_pmix_ext_jobid_trkr_t); + (void)strncpy(job->nspace, nspace, PMIX_MAX_NSLEN); + job->jobid = jobid; + opal_list_append(&mca_pmix_external_component.jobids, &job->super); + } + + op->spcbfunc(rc, jobid, op->cbdata); + OBJ_RELEASE(op); +} + +int pmix_ext_spawnnb(opal_list_t *job_info, opal_list_t *apps, + opal_pmix_spawn_cbfunc_t cbfunc, void *cbdata) +{ + pmix_status_t ret; + pmix_ext_opcaddy_t *op; + size_t n, m; + opal_value_t *info; + opal_pmix_app_t *app; + + /* create the caddy */ + op = OBJ_NEW(pmix_ext_opcaddy_t); + op->spcbfunc = cbfunc; + op->cbdata = cbdata; + + if (NULL != job_info && 0 < (op->ninfo = opal_list_get_size(job_info))) { + PMIX_INFO_CREATE(op->info, op->ninfo); + n=0; + OPAL_LIST_FOREACH(info, job_info, opal_value_t) { + (void)strncpy(op->info[n].key, info->key, PMIX_MAX_KEYLEN); + pmix_ext_value_load(&op->info[n].value, info); + ++n; + } + } + + op->sz = opal_list_get_size(apps); + PMIX_APP_CREATE(op->apps, op->sz); + n=0; + OPAL_LIST_FOREACH(app, apps, opal_pmix_app_t) { + op->apps[n].cmd = strdup(app->cmd); + op->apps[n].argc = app->argc; + op->apps[n].argv = opal_argv_copy(app->argv); + op->apps[n].env = opal_argv_copy(app->env); + op->apps[n].maxprocs = app->maxprocs; + if (0 < (op->apps[n].ninfo = opal_list_get_size(&app->info))) { + PMIX_INFO_CREATE(op->apps[n].info, op->apps[n].ninfo); + m=0; + OPAL_LIST_FOREACH(info, &app->info, opal_value_t) { + (void)strncpy(op->apps[n].info[m].key, info->key, PMIX_MAX_KEYLEN); + pmix_ext_value_load(&op->apps[n].info[m].value, info); + ++m; + } + } + ++n; + } + + ret = PMIx_Spawn_nb(op->info, op->ninfo, op->apps, op->sz, spcbfunc, op); + + return pmix_ext_convert_rc(ret); +} + +int pmix_ext_connect(opal_list_t *procs) +{ + pmix_status_t ret; + pmix_proc_t *parray=NULL; + size_t n, cnt=0; + opal_namelist_t *ptr; + opal_pmix_ext_jobid_trkr_t *job, *jptr; + + /* protect against bozo error */ + if (NULL == procs || 0 == (cnt = opal_list_get_size(procs))) { + return OPAL_ERR_BAD_PARAM; + } + + /* convert the list of procs to an array + * of pmix_proc_t */ + PMIX_PROC_CREATE(parray, cnt); + n=0; + OPAL_LIST_FOREACH(ptr, procs, opal_namelist_t) { + /* look thru our list of jobids and find the + * corresponding nspace */ + job = NULL; + OPAL_LIST_FOREACH(jptr, &mca_pmix_external_component.jobids, opal_pmix_ext_jobid_trkr_t) { + if (jptr->jobid == ptr->name.jobid) { + job = jptr; + break; + } + } + if (NULL == job) { + OPAL_ERROR_LOG(OPAL_ERR_NOT_FOUND); + return OPAL_ERR_NOT_FOUND; + } + (void)strncpy(parray[n].nspace, job->nspace, PMIX_MAX_NSLEN); + if (OPAL_VPID_WILDCARD == ptr->name.vpid) { + parray[n].rank = PMIX_RANK_WILDCARD; + } else { + parray[n].rank = ptr->name.vpid; + } + ++n; + } + + ret = PMIx_Connect(parray, cnt, NULL, 0); + PMIX_PROC_FREE(parray, cnt); + + return pmix_ext_convert_rc(ret); +} + +int pmix_ext_connectnb(opal_list_t *procs, + opal_pmix_op_cbfunc_t cbfunc, + void *cbdata) +{ + pmix_status_t ret; + size_t n, cnt=0; + opal_namelist_t *ptr; + pmix_ext_opcaddy_t *op; + opal_pmix_ext_jobid_trkr_t *job; + + /* protect against bozo error */ + if (NULL == procs || 0 == (cnt = opal_list_get_size(procs))) { + return OPAL_ERR_BAD_PARAM; + } + + /* create the caddy */ + op = OBJ_NEW(pmix_ext_opcaddy_t); + op->opcbfunc = cbfunc; + op->cbdata = cbdata; + op->nprocs = cnt; + + /* convert the list of procs to an array + * of pmix_proc_t */ + PMIX_PROC_CREATE(op->procs, op->nprocs); + n=0; + OPAL_LIST_FOREACH(ptr, procs, opal_namelist_t) { + /* look thru our list of jobids and find the + * corresponding nspace */ + OPAL_LIST_FOREACH(job, &mca_pmix_external_component.jobids, opal_pmix_ext_jobid_trkr_t) { + if (job->jobid == ptr->name.jobid) { + (void)strncpy(op->procs[n].nspace, job->nspace, PMIX_MAX_NSLEN); + break; + } + } + if (OPAL_VPID_WILDCARD == ptr->name.vpid) { + op->procs[n].rank = PMIX_RANK_WILDCARD; + } else { + op->procs[n].rank = ptr->name.vpid; + } + ++n; + } + + ret = PMIx_Connect_nb(op->procs, op->nprocs, NULL, 0, opcbfunc, op); + + return pmix_ext_convert_rc(ret); +} + +int pmix_ext_disconnect(opal_list_t *procs) +{ + pmix_status_t ret; + pmix_proc_t *parray=NULL; + size_t n, cnt=0; + opal_namelist_t *ptr; + opal_pmix_ext_jobid_trkr_t *job; + + /* protect against bozo error */ + if (NULL == procs || 0 == (cnt = opal_list_get_size(procs))) { + return OPAL_ERR_BAD_PARAM; + } + + /* convert the list of procs to an array + * of pmix_proc_t */ + PMIX_PROC_CREATE(parray, cnt); + n=0; + OPAL_LIST_FOREACH(ptr, procs, opal_namelist_t) { + /* look thru our list of jobids and find the + * corresponding nspace */ + OPAL_LIST_FOREACH(job, &mca_pmix_external_component.jobids, opal_pmix_ext_jobid_trkr_t) { + if (job->jobid == ptr->name.jobid) { + (void)strncpy(parray[n].nspace, job->nspace, PMIX_MAX_NSLEN); + break; + } + } + if (OPAL_VPID_WILDCARD == ptr->name.vpid) { + parray[n].rank = PMIX_RANK_WILDCARD; + } else { + parray[n].rank = ptr->name.vpid; + } + ++n; + } + + ret = PMIx_Disconnect(parray, cnt, NULL, 0); + PMIX_PROC_FREE(parray, cnt); + + return pmix_ext_convert_rc(ret); +} + +int pmix_ext_disconnectnb(opal_list_t *procs, + opal_pmix_op_cbfunc_t cbfunc, + void *cbdata) +{ + pmix_status_t ret; + size_t n, cnt=0; + opal_namelist_t *ptr; + pmix_ext_opcaddy_t *op; + opal_pmix_ext_jobid_trkr_t *job; + + /* protect against bozo error */ + if (NULL == procs || 0 == (cnt = opal_list_get_size(procs))) { + return OPAL_ERR_BAD_PARAM; + } + + /* create the caddy */ + op = OBJ_NEW(pmix_ext_opcaddy_t); + op->opcbfunc = cbfunc; + op->cbdata = cbdata; + op->nprocs = cnt; + + /* convert the list of procs to an array + * of pmix_proc_t */ + PMIX_PROC_CREATE(op->procs, op->nprocs); + n=0; + OPAL_LIST_FOREACH(ptr, procs, opal_namelist_t) { + /* look thru our list of jobids and find the + * corresponding nspace */ + OPAL_LIST_FOREACH(job, &mca_pmix_external_component.jobids, opal_pmix_ext_jobid_trkr_t) { + if (job->jobid == ptr->name.jobid) { + (void)strncpy(op->procs[n].nspace, job->nspace, PMIX_MAX_NSLEN); + break; + } + } + if (OPAL_VPID_WILDCARD == ptr->name.vpid) { + op->procs[n].rank = PMIX_RANK_WILDCARD; + } else { + op->procs[n].rank = ptr->name.vpid; + } + ++n; + } + + ret = PMIx_Disconnect_nb(op->procs, op->nprocs, NULL, 0, opcbfunc, op); + + return pmix_ext_convert_rc(ret); +} + + +int pmix_ext_resolve_peers(const char *nodename, opal_jobid_t jobid, + opal_list_t *procs) +{ + char *nspace; + pmix_proc_t *array=NULL; + size_t nprocs, n; + opal_namelist_t *nm; + int rc; + pmix_status_t ret; + opal_pmix_ext_jobid_trkr_t *job, *jptr; + + if (OPAL_JOBID_WILDCARD == jobid) { + nspace = NULL; + } else { + job = NULL; + OPAL_LIST_FOREACH(jptr, &mca_pmix_external_component.jobids, opal_pmix_ext_jobid_trkr_t) { + if (jptr->jobid == jobid) { + job = jptr; + break; + } + } + if (NULL == job) { + return OPAL_ERR_NOT_FOUND; + } + nspace = job->nspace; + } + + ret = PMIx_Resolve_peers(nodename, nspace, &array, &nprocs); + rc = pmix_ext_convert_rc(ret); + + if (NULL != array && 0 < nprocs) { + for (n=0; n < nprocs; n++) { + nm = OBJ_NEW(opal_namelist_t); + opal_list_append(procs, &nm->super); + if (mca_pmix_external_component.native_launch) { + /* if we were launched by the OMPI RTE, then + * the jobid is in a special format - so get it */ + opal_convert_string_to_jobid(&nm->name.jobid, array[n].nspace); + } else { + /* we were launched by someone else, so make the + * jobid just be the hash of the nspace */ + OPAL_HASH_STR(array[n].nspace, nm->name.jobid); + } + /* if we don't already have it, add this to our jobid tracker */ + job = NULL; + OPAL_LIST_FOREACH(jptr, &mca_pmix_external_component.jobids, opal_pmix_ext_jobid_trkr_t) { + if (jptr->jobid == nm->name.jobid) { + job = jptr; + break; + } + } + if (NULL == job) { + job = OBJ_NEW(opal_pmix_ext_jobid_trkr_t); + (void)strncpy(job->nspace, nspace, PMIX_MAX_NSLEN); + job->jobid = jobid; + opal_list_append(&mca_pmix_external_component.jobids, &job->super); + } + nm->name.vpid = array[n].rank; + } + } + PMIX_PROC_FREE(array, nprocs); + + return rc; +} + +int pmix_ext_resolve_nodes(opal_jobid_t jobid, char **nodelist) +{ + pmix_status_t ret; + char *nspace=NULL; + opal_pmix_ext_jobid_trkr_t *job, *jptr; + + if (OPAL_JOBID_WILDCARD != jobid) { + /* look thru our list of jobids and find the + * corresponding nspace */ + job = NULL; + OPAL_LIST_FOREACH(jptr, &mca_pmix_external_component.jobids, opal_pmix_ext_jobid_trkr_t) { + if (jptr->jobid == jobid) { + job = jptr; + break; + } + } + if (NULL == job) { + return OPAL_ERR_NOT_FOUND; + } + nspace = job->nspace; + } + + ret = PMIx_Resolve_nodes(nspace, nodelist); + + return pmix_ext_convert_rc(ret);; +} diff --git a/opal/mca/pmix/external/pmix_ext_component.c b/opal/mca/pmix/external/pmix_ext_component.c new file mode 100644 index 00000000000..c250fe0703f --- /dev/null +++ b/opal/mca/pmix/external/pmix_ext_component.c @@ -0,0 +1,105 @@ +/* + * Copyright (c) 2014-2015 Intel, Inc. All rights reserved. + * Copyright (c) 2014-2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + * + * These symbols are in a file by themselves to provide nice linker + * semantics. Since linkers generally pull in symbols by object + * files, keeping these symbols as the only symbols in this file + * prevents utility programs such as "ompi_info" from having to import + * entire components just to query their version and parameters. + */ + +#include "opal_config.h" + +#include "opal/constants.h" +#include "opal/class/opal_list.h" +#include "opal/util/proc.h" +#include "opal/mca/pmix/pmix.h" +#include "pmix_ext.h" + +/* + * Public string showing the pmix external component version number + */ +const char *opal_pmix_external_component_version_string = + "OPAL external pmix MCA component version " OPAL_VERSION; + +/* + * Local function + */ +static int external_open(void); +static int external_close(void); +static int external_component_query(mca_base_module_t **module, int *priority); + + +/* + * Instantiate the public struct with all of our public information + * and pointers to our public functions in it + */ + +mca_pmix_external_component_t mca_pmix_external_component = { + { + /* First, the mca_component_t struct containing meta information + about the component itself */ + + .base_version = { + /* Indicate that we are a pmix v1.1.0 component (which also + implies a specific MCA version) */ + + OPAL_PMIX_BASE_VERSION_2_0_0, + + /* Component name and version */ + + .mca_component_name = "external", + MCA_BASE_MAKE_VERSION(component, OPAL_MAJOR_VERSION, OPAL_MINOR_VERSION, + OPAL_RELEASE_VERSION), + + /* Component open and close functions */ + + .mca_open_component = external_open, + .mca_close_component = external_close, + .mca_query_component = external_component_query, + }, + /* Next the MCA v1.0.0 component meta data */ + .base_data = { + /* The component is checkpoint ready */ + MCA_BASE_METADATA_PARAM_CHECKPOINT + } + }, + .native_launch = false +}; + +static int external_open(void) +{ + OBJ_CONSTRUCT(&mca_pmix_external_component.jobids, opal_list_t); + return OPAL_SUCCESS; +} + +static int external_close(void) +{ + OPAL_LIST_DESTRUCT(&mca_pmix_external_component.jobids); + return OPAL_SUCCESS; +} + + +static int external_component_query(mca_base_module_t **module, int *priority) +{ + char *t, *id; + + /* see if a PMIx server is present */ + if (NULL != (t = getenv("PMIX_SERVER_URI")) || + NULL != (id = getenv("PMIX_ID"))) { + /* if PMIx is present, then we are a client and need to use it */ + *priority = 100; + } else { + /* we could be a server, so we still need to be considered */ + *priority = 5; + } + *module = (mca_base_module_t *)&opal_pmix_external_module; + return OPAL_SUCCESS; +} diff --git a/opal/mca/pmix/external/pmix_ext_server_north.c b/opal/mca/pmix/external/pmix_ext_server_north.c new file mode 100644 index 00000000000..ce049610d62 --- /dev/null +++ b/opal/mca/pmix/external/pmix_ext_server_north.c @@ -0,0 +1,780 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ +/* + * Copyright (c) 2014-2015 Intel, Inc. All rights reserved. + * Copyright (c) 2014-2016 Research Organization for Information Science + * and Technology (RIST). All rights reserved. + * Copyright (c) 2014 Mellanox Technologies, Inc. + * All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include "opal_config.h" +#include "opal/constants.h" +#include "opal/types.h" + +#ifdef HAVE_STRING_H +#include +#endif +#ifdef HAVE_UNISTD_H +#include +#endif + +#include "opal/dss/dss.h" +#include "opal/mca/event/event.h" +#include "opal/mca/hwloc/base/base.h" +#include "opal/runtime/opal.h" +#include "opal/runtime/opal_progress_threads.h" +#include "opal/util/argv.h" +#include "opal/util/error.h" +#include "opal/util/output.h" +#include "opal/util/proc.h" +#include "opal/util/show_help.h" +#include "opal/mca/pmix/base/base.h" +#include "pmix_ext.h" + +#include "pmix.h" +#include "pmix_server.h" + +/**** N.O.R.T.H.B.O.U.N.D I.N.T.E.R.F.A.C.E.S ****/ + +/* These are the interfaces used by the embedded PMIx server + * to call up into ORTE for service requests */ + +static pmix_status_t server_client_connected_fn(const pmix_proc_t *proc, void* server_object); +static pmix_status_t server_client_finalized_fn(const pmix_proc_t *proc, void* server_object, + pmix_op_cbfunc_t cbfunc, void *cbdata); +static pmix_status_t server_abort_fn(const pmix_proc_t *proc, void *server_object, + int status, const char msg[], + pmix_proc_t procs[], size_t nprocs, + pmix_op_cbfunc_t cbfunc, void *cbdata); +static pmix_status_t server_fencenb_fn(const pmix_proc_t procs[], size_t nprocs, + const pmix_info_t info[], size_t ninfo, + char *data, size_t ndata, + pmix_modex_cbfunc_t cbfunc, void *cbdata); +static pmix_status_t server_dmodex_req_fn(const pmix_proc_t *proc, + const pmix_info_t info[], size_t ninfo, + pmix_modex_cbfunc_t cbfunc, void *cbdata); +static pmix_status_t server_publish_fn(const pmix_proc_t *proc, + const pmix_info_t info[], size_t ninfo, + pmix_op_cbfunc_t cbfunc, void *cbdata); +static pmix_status_t server_lookup_fn(const pmix_proc_t *proc, char **keys, + const pmix_info_t info[], size_t ninfo, + pmix_lookup_cbfunc_t cbfunc, void *cbdata); +static pmix_status_t server_unpublish_fn(const pmix_proc_t *proc, char **keys, + const pmix_info_t info[], size_t ninfo, + pmix_op_cbfunc_t cbfunc, void *cbdata); +static pmix_status_t server_spawn_fn(const pmix_proc_t *proc, + const pmix_info_t job_info[], size_t ninfo, + const pmix_app_t apps[], size_t napps, + pmix_spawn_cbfunc_t cbfunc, void *cbdata); +static pmix_status_t server_connect_fn(const pmix_proc_t procs[], size_t nprocs, + const pmix_info_t info[], size_t ninfo, + pmix_op_cbfunc_t cbfunc, void *cbdata); +static pmix_status_t server_disconnect_fn(const pmix_proc_t procs[], size_t nprocs, + const pmix_info_t info[], size_t ninfo, + pmix_op_cbfunc_t cbfunc, void *cbdata); +static pmix_status_t server_register_events(const pmix_info_t info[], size_t ninfo, + pmix_op_cbfunc_t cbfunc, void *cbdata); +static pmix_status_t server_deregister_events(const pmix_info_t info[], size_t ninfo, + pmix_op_cbfunc_t cbfunc, void *cbdata); +static pmix_status_t server_listener_fn(int listening_sd, + pmix_connection_cbfunc_t cbfunc); + +pmix_server_module_t pmix_external_module = { + server_client_connected_fn, + server_client_finalized_fn, + server_abort_fn, + server_fencenb_fn, + server_dmodex_req_fn, + server_publish_fn, + server_lookup_fn, + server_unpublish_fn, + server_spawn_fn, + server_connect_fn, + server_disconnect_fn, + server_register_events, + server_deregister_events, + server_listener_fn +}; + +opal_pmix_server_module_t *pmix_external_host_module = NULL; + + +static void opal_opcbfunc(int status, void *cbdata) +{ + pmix_ext_opalcaddy_t *opalcaddy = (pmix_ext_opalcaddy_t*)cbdata; + + if (NULL != opalcaddy->opcbfunc) { + opalcaddy->opcbfunc(pmix_ext_convert_opalrc(status), opalcaddy->cbdata); + } + OBJ_RELEASE(opalcaddy); +} + +static pmix_status_t server_client_connected_fn(const pmix_proc_t *p, void *server_object) +{ + int rc; + opal_process_name_t proc; + + if (NULL == pmix_external_host_module || NULL == pmix_external_host_module->client_connected) { + return PMIX_SUCCESS; + } + + /* convert the nspace/rank to an opal_process_name_t */ + if (OPAL_SUCCESS != (rc = opal_convert_string_to_jobid(&proc.jobid, p->nspace))) { + return pmix_ext_convert_opalrc(rc); + } + proc.vpid = p->rank; + + /* pass it up */ + rc = pmix_external_host_module->client_connected(&proc, server_object); + return pmix_ext_convert_opalrc(rc); +} + +static pmix_status_t server_client_finalized_fn(const pmix_proc_t *p, void* server_object, + pmix_op_cbfunc_t cbfunc, void *cbdata) +{ + int rc; + pmix_ext_opalcaddy_t *opalcaddy; + opal_process_name_t proc; + + if (NULL == pmix_external_host_module || NULL == pmix_external_host_module->client_finalized) { + return PMIX_SUCCESS; + } + + /* convert the nspace/rank to an opal_process_name_t */ + if (OPAL_SUCCESS != (rc = opal_convert_string_to_jobid(&proc.jobid, p->nspace))) { + return pmix_ext_convert_opalrc(rc); + } + proc.vpid = p->rank; + + /* setup the caddy */ + opalcaddy = OBJ_NEW(pmix_ext_opalcaddy_t); + opalcaddy->opcbfunc = cbfunc; + opalcaddy->cbdata = cbdata; + + /* pass it up */ + rc = pmix_external_host_module->client_finalized(&proc, server_object, opal_opcbfunc, opalcaddy); + if (OPAL_SUCCESS != rc) { + OBJ_RELEASE(opalcaddy); + } + return pmix_ext_convert_opalrc(rc); +} + +static pmix_status_t server_abort_fn(const pmix_proc_t *p, void *server_object, + int status, const char msg[], + pmix_proc_t procs[], size_t nprocs, + pmix_op_cbfunc_t cbfunc, void *cbdata) +{ + size_t n; + opal_namelist_t *nm; + opal_process_name_t proc; + int rc; + pmix_ext_opalcaddy_t *opalcaddy; + + if (NULL == pmix_external_host_module || NULL == pmix_external_host_module->abort) { + return PMIX_ERR_NOT_SUPPORTED; + } + + /* convert the nspace/rank to an opal_process_name_t */ + if (OPAL_SUCCESS != (rc = opal_convert_string_to_jobid(&proc.jobid, p->nspace))) { + return pmix_ext_convert_opalrc(rc); + } + proc.vpid = p->rank; + + /* setup the caddy */ + opalcaddy = OBJ_NEW(pmix_ext_opalcaddy_t); + opalcaddy->opcbfunc = cbfunc; + opalcaddy->cbdata = cbdata; + + /* convert the array of pmix_proc_t to the list of procs */ + for (n=0; n < nprocs; n++) { + nm = OBJ_NEW(opal_namelist_t); + opal_list_append(&opalcaddy->procs, &nm->super); + if (OPAL_SUCCESS != (rc = opal_convert_string_to_jobid(&nm->name.jobid, procs[n].nspace))) { + OBJ_RELEASE(opalcaddy); + return pmix_ext_convert_opalrc(rc); + } + if (PMIX_RANK_WILDCARD == procs[n].rank) { + nm->name.vpid = OPAL_VPID_WILDCARD; + } else { + nm->name.vpid = procs[n].rank; + } + } + + /* pass it up */ + rc = pmix_external_host_module->abort(&proc, server_object, status, msg, + &opalcaddy->procs, opal_opcbfunc, opalcaddy); + if (OPAL_SUCCESS != rc) { + OBJ_RELEASE(opalcaddy); + } + return pmix_ext_convert_opalrc(rc); +} + +static void _data_release(void *cbdata) +{ + pmix_ext_opalcaddy_t *opalcaddy = (pmix_ext_opalcaddy_t*)cbdata; + + if (NULL != opalcaddy->odmdxfunc) { + opalcaddy->odmdxfunc(opalcaddy->ocbdata); + } + OBJ_RELEASE(opalcaddy); +} + +static void opmdx_response(int status, const char *data, size_t sz, void *cbdata, + opal_pmix_release_cbfunc_t relcbfunc, void *relcbdata) +{ + pmix_status_t rc; + pmix_ext_opalcaddy_t *opalcaddy = (pmix_ext_opalcaddy_t*)cbdata; + + rc = pmix_ext_convert_rc(status); + if (NULL != opalcaddy->mdxcbfunc) { + opalcaddy->odmdxfunc = relcbfunc; + opalcaddy->ocbdata = relcbdata; + opalcaddy->mdxcbfunc(rc, data, sz, opalcaddy->cbdata, + _data_release, opalcaddy); + } else { + OBJ_RELEASE(opalcaddy); + } +} + +static pmix_status_t server_fencenb_fn(const pmix_proc_t procs[], size_t nprocs, + const pmix_info_t info[], size_t ninfo, + char *data, size_t ndata, + pmix_modex_cbfunc_t cbfunc, void *cbdata) +{ + pmix_ext_opalcaddy_t *opalcaddy; + size_t n; + opal_namelist_t *nm; + opal_value_t *iptr; + int rc; + + if (NULL == pmix_external_host_module || NULL == pmix_external_host_module->fence_nb) { + return PMIX_ERR_NOT_SUPPORTED; + } + + /* setup the caddy */ + opalcaddy = OBJ_NEW(pmix_ext_opalcaddy_t); + opalcaddy->mdxcbfunc = cbfunc; + opalcaddy->cbdata = cbdata; + + /* convert the array of pmix_proc_t to the list of procs */ + for (n=0; n < nprocs; n++) { + nm = OBJ_NEW(opal_namelist_t); + opal_list_append(&opalcaddy->procs, &nm->super); + if (OPAL_SUCCESS != (rc = opal_convert_string_to_jobid(&nm->name.jobid, procs[n].nspace))) { + OBJ_RELEASE(opalcaddy); + return pmix_ext_convert_opalrc(rc); + } + if (PMIX_RANK_WILDCARD == procs[n].rank) { + nm->name.vpid = OPAL_VPID_WILDCARD; + } else { + nm->name.vpid = procs[n].rank; + } + } + + /* convert the array of pmix_info_t to the list of info */ + for (n=0; n < ninfo; n++) { + iptr = OBJ_NEW(opal_value_t); + opal_list_append(&opalcaddy->info, &iptr->super); + iptr->key = strdup(info[n].key); + if (OPAL_SUCCESS != (rc = pmix_ext_value_unload(iptr, &info[n].value))) { + OBJ_RELEASE(opalcaddy); + return pmix_ext_convert_opalrc(rc); + } + } + + /* pass it up */ + rc = pmix_external_host_module->fence_nb(&opalcaddy->procs, &opalcaddy->info, + data, ndata, opmdx_response, opalcaddy); + if (OPAL_SUCCESS != rc) { + OBJ_RELEASE(opalcaddy); + } + return pmix_ext_convert_opalrc(rc); +} + +static pmix_status_t server_dmodex_req_fn(const pmix_proc_t *p, + const pmix_info_t info[], size_t ninfo, + pmix_modex_cbfunc_t cbfunc, void *cbdata) +{ + int rc; + pmix_ext_opalcaddy_t *opalcaddy; + opal_process_name_t proc; + opal_value_t *iptr; + size_t n; + + if (NULL == pmix_external_host_module || NULL == pmix_external_host_module->direct_modex) { + return PMIX_ERR_NOT_SUPPORTED; + } + + /* convert the nspace/rank to an opal_process_name_t */ + if (OPAL_SUCCESS != (rc = opal_convert_string_to_jobid(&proc.jobid, p->nspace))) { + return pmix_ext_convert_opalrc(rc); + } + if (PMIX_RANK_WILDCARD == p->rank) { + proc.vpid = OPAL_VPID_WILDCARD; + } else { + proc.vpid = p->rank; + } + + /* setup the caddy */ + opalcaddy = OBJ_NEW(pmix_ext_opalcaddy_t); + opalcaddy->mdxcbfunc = cbfunc; + opalcaddy->cbdata = cbdata; + + /* convert the array of pmix_info_t to the list of info */ + for (n=0; n < ninfo; n++) { + iptr = OBJ_NEW(opal_value_t); + opal_list_append(&opalcaddy->info, &iptr->super); + iptr->key = strdup(info[n].key); + if (OPAL_SUCCESS != (rc = pmix_ext_value_unload(iptr, &info[n].value))) { + OBJ_RELEASE(opalcaddy); + return pmix_ext_convert_opalrc(rc); + } + } + + /* pass it up */ + rc = pmix_external_host_module->direct_modex(&proc, &opalcaddy->info, opmdx_response, opalcaddy); + if (OPAL_SUCCESS != rc && OPAL_ERR_IN_PROCESS != rc) { + OBJ_RELEASE(opalcaddy); + } + if (OPAL_ERR_IN_PROCESS == rc) { + rc = OPAL_SUCCESS; + } + return pmix_ext_convert_opalrc(rc); +} + +static pmix_status_t server_publish_fn(const pmix_proc_t *p, + const pmix_info_t info[], size_t ninfo, + pmix_op_cbfunc_t cbfunc, void *cbdata) +{ + int rc; + size_t n; + pmix_ext_opalcaddy_t *opalcaddy; + opal_process_name_t proc; + opal_value_t *oinfo; + + if (NULL == pmix_external_host_module || NULL == pmix_external_host_module->publish) { + return PMIX_ERR_NOT_SUPPORTED; + } + + /* convert the nspace/rank to an opal_process_name_t */ + if (OPAL_SUCCESS != (rc = opal_convert_string_to_jobid(&proc.jobid, p->nspace))) { + return pmix_ext_convert_opalrc(rc); + } + if (PMIX_RANK_WILDCARD == p->rank) { + proc.vpid = OPAL_VPID_WILDCARD; + } else { + proc.vpid = p->rank; + } + + /* setup the caddy */ + opalcaddy = OBJ_NEW(pmix_ext_opalcaddy_t); + opalcaddy->opcbfunc = cbfunc; + opalcaddy->cbdata = cbdata; + + /* convert the info array */ + for (n=0; n < ninfo; n++) { + oinfo = OBJ_NEW(opal_value_t); + opal_list_append(&opalcaddy->info, &oinfo->super); + oinfo->key = strdup(info[n].key); + if (OPAL_SUCCESS != (rc = pmix_ext_value_unload(oinfo, &info[n].value))) { + OBJ_RELEASE(opalcaddy); + return pmix_ext_convert_opalrc(rc); + } + } + + /* pass it up */ + rc = pmix_external_host_module->publish(&proc, &opalcaddy->info, opal_opcbfunc, opalcaddy); + if (OPAL_SUCCESS != rc) { + OBJ_RELEASE(opalcaddy); + } + + return pmix_ext_convert_opalrc(rc); +} + +static void opal_lkupcbfunc(int status, + opal_list_t *data, + void *cbdata) +{ + pmix_ext_opalcaddy_t *opalcaddy = (pmix_ext_opalcaddy_t*)cbdata; + pmix_status_t rc; + pmix_pdata_t *d=NULL; + size_t nd=0, n; + opal_pmix_pdata_t *p; + + if (NULL != opalcaddy->lkupcbfunc) { + rc = pmix_ext_convert_opalrc(status); + /* convert any returned data */ + if (NULL != data) { + nd = opal_list_get_size(data); + PMIX_PDATA_CREATE(d, nd); + n=0; + OPAL_LIST_FOREACH(p, data, opal_pmix_pdata_t) { + /* convert the jobid */ + (void)opal_snprintf_jobid(d[n].proc.nspace, PMIX_MAX_NSLEN, p->proc.jobid); + d[n].proc.rank = p->proc.vpid; + (void)strncpy(d[n].key, p->value.key, PMIX_MAX_KEYLEN); + pmix_ext_value_load(&d[n].value, &p->value); + } + } + opalcaddy->lkupcbfunc(rc, d, nd, opalcaddy->cbdata); + } + OBJ_RELEASE(opalcaddy); +} + +static pmix_status_t server_lookup_fn(const pmix_proc_t *p, char **keys, + const pmix_info_t info[], size_t ninfo, + pmix_lookup_cbfunc_t cbfunc, void *cbdata) +{ + int rc; + pmix_ext_opalcaddy_t *opalcaddy; + opal_process_name_t proc; + opal_value_t *iptr; + size_t n; + + if (NULL == pmix_external_host_module || NULL == pmix_external_host_module->lookup) { + return PMIX_ERR_NOT_SUPPORTED; + } + + /* convert the nspace/rank to an opal_process_name_t */ + if (OPAL_SUCCESS != (rc = opal_convert_string_to_jobid(&proc.jobid, p->nspace))) { + return pmix_ext_convert_opalrc(rc); + } + if (PMIX_RANK_WILDCARD == p->rank) { + proc.vpid = OPAL_VPID_WILDCARD; + } else { + proc.vpid = p->rank; + } + + /* setup the caddy */ + opalcaddy = OBJ_NEW(pmix_ext_opalcaddy_t); + opalcaddy->lkupcbfunc = cbfunc; + opalcaddy->cbdata = cbdata; + + /* convert the array of pmix_info_t to the list of info */ + for (n=0; n < ninfo; n++) { + iptr = OBJ_NEW(opal_value_t); + opal_list_append(&opalcaddy->info, &iptr->super); + iptr->key = strdup(info[n].key); + if (OPAL_SUCCESS != (rc = pmix_ext_value_unload(iptr, &info[n].value))) { + OBJ_RELEASE(opalcaddy); + return pmix_ext_convert_opalrc(rc); + } + } + + /* pass it up */ + rc = pmix_external_host_module->lookup(&proc, keys, &opalcaddy->info, opal_lkupcbfunc, opalcaddy); + if (OPAL_SUCCESS != rc) { + OBJ_RELEASE(opalcaddy); + } + + return pmix_ext_convert_opalrc(rc); +} + + +static pmix_status_t server_unpublish_fn(const pmix_proc_t *p, char **keys, + const pmix_info_t info[], size_t ninfo, + pmix_op_cbfunc_t cbfunc, void *cbdata) +{ + int rc; + pmix_ext_opalcaddy_t *opalcaddy; + opal_process_name_t proc; + opal_value_t *iptr; + size_t n; + + if (NULL == pmix_external_host_module || NULL == pmix_external_host_module->unpublish) { + return PMIX_SUCCESS; + } + + /* convert the nspace/rank to an opal_process_name_t */ + if (OPAL_SUCCESS != (rc = opal_convert_string_to_jobid(&proc.jobid, p->nspace))) { + return pmix_ext_convert_opalrc(rc); + } + if (PMIX_RANK_WILDCARD == p->rank) { + proc.vpid = OPAL_VPID_WILDCARD; + } else { + proc.vpid = p->rank; + } + + /* setup the caddy */ + opalcaddy = OBJ_NEW(pmix_ext_opalcaddy_t); + opalcaddy->opcbfunc = cbfunc; + opalcaddy->cbdata = cbdata; + + /* convert the array of pmix_info_t to the list of info */ + for (n=0; n < ninfo; n++) { + iptr = OBJ_NEW(opal_value_t); + opal_list_append(&opalcaddy->info, &iptr->super); + iptr->key = strdup(info[n].key); + if (OPAL_SUCCESS != (rc = pmix_ext_value_unload(iptr, &info[n].value))) { + OBJ_RELEASE(opalcaddy); + return pmix_ext_convert_opalrc(rc); + } + } + + /* pass it up */ + rc = pmix_external_host_module->unpublish(&proc, keys, &opalcaddy->info, opal_opcbfunc, opalcaddy); + if (OPAL_SUCCESS != rc) { + OBJ_RELEASE(opalcaddy); + } + + return pmix_ext_convert_opalrc(rc); +} + +static void opal_spncbfunc(int status, opal_jobid_t jobid, void *cbdata) +{ + pmix_ext_opalcaddy_t *opalcaddy = (pmix_ext_opalcaddy_t*)cbdata; + pmix_status_t rc; + char nspace[PMIX_MAX_NSLEN]; + + if (NULL != opalcaddy->spwncbfunc) { + rc = pmix_ext_convert_opalrc(status); + /* convert the jobid */ + (void)opal_snprintf_jobid(nspace, PMIX_MAX_NSLEN, jobid); + opalcaddy->spwncbfunc(rc, nspace, opalcaddy->cbdata); + } + OBJ_RELEASE(opalcaddy); +} + +static pmix_status_t server_spawn_fn(const pmix_proc_t *p, + const pmix_info_t job_info[], size_t ninfo, + const pmix_app_t apps[], size_t napps, + pmix_spawn_cbfunc_t cbfunc, void *cbdata) +{ + pmix_ext_opalcaddy_t *opalcaddy; + opal_process_name_t proc; + opal_pmix_app_t *app; + opal_value_t *oinfo; + size_t k, n; + int rc; + + if (NULL == pmix_external_host_module || NULL == pmix_external_host_module->spawn) { + return PMIX_ERR_NOT_SUPPORTED; + } + + /* convert the nspace/rank to an opal_process_name_t */ + if (OPAL_SUCCESS != (rc = opal_convert_string_to_jobid(&proc.jobid, p->nspace))) { + return pmix_ext_convert_opalrc(rc); + } + if (PMIX_RANK_WILDCARD == p->rank) { + proc.vpid = OPAL_VPID_WILDCARD; + } else { + proc.vpid = p->rank; + } + + /* setup the caddy */ + opalcaddy = OBJ_NEW(pmix_ext_opalcaddy_t); + opalcaddy->spwncbfunc = cbfunc; + opalcaddy->cbdata = cbdata; + + /* convert the job info */ + for (k=0; k < ninfo; k++) { + oinfo = OBJ_NEW(opal_value_t); + opal_list_append(&opalcaddy->info, &oinfo->super); + oinfo->key = strdup(job_info[k].key); + if (OPAL_SUCCESS != (rc = pmix_ext_value_unload(oinfo, &job_info[k].value))) { + OBJ_RELEASE(opalcaddy); + return pmix_ext_convert_opalrc(rc); + } + } + + /* convert the apps */ + for (n=0; n < napps; n++) { + app = OBJ_NEW(opal_pmix_app_t); + opal_list_append(&opalcaddy->apps, &app->super); + if (NULL != apps[n].cmd) { + app->cmd = strdup(apps[n].cmd); + } + app->argc = apps[n].argc; + if (NULL != apps[n].argv) { + app->argv = opal_argv_copy(apps[n].argv); + } + if (NULL != apps[n].env) { + app->env = opal_argv_copy(apps[n].env); + } + app->maxprocs = apps[n].maxprocs; + for (k=0; k < apps[n].ninfo; k++) { + oinfo = OBJ_NEW(opal_value_t); + opal_list_append(&app->info, &oinfo->super); + oinfo->key = strdup(apps[n].info[k].key); + if (OPAL_SUCCESS != (rc = pmix_ext_value_unload(oinfo, &apps[n].info[k].value))) { + OBJ_RELEASE(opalcaddy); + return pmix_ext_convert_opalrc(rc); + } + } + } + + /* pass it up */ + rc = pmix_external_host_module->spawn(&proc, &opalcaddy->info, &opalcaddy->apps, opal_spncbfunc, opalcaddy); + if (OPAL_SUCCESS != rc) { + OPAL_ERROR_LOG(rc); + OBJ_RELEASE(opalcaddy); + } + + return pmix_ext_convert_opalrc(rc); +} + + +static pmix_status_t server_connect_fn(const pmix_proc_t procs[], size_t nprocs, + const pmix_info_t info[], size_t ninfo, + pmix_op_cbfunc_t cbfunc, void *cbdata) +{ + int rc; + pmix_ext_opalcaddy_t *opalcaddy; + opal_namelist_t *nm; + size_t n; + opal_value_t *oinfo; + + if (NULL == pmix_external_host_module || NULL == pmix_external_host_module->connect) { + return PMIX_ERR_NOT_SUPPORTED; + } + + /* setup the caddy */ + opalcaddy = OBJ_NEW(pmix_ext_opalcaddy_t); + opalcaddy->opcbfunc = cbfunc; + opalcaddy->cbdata = cbdata; + + /* convert the array of pmix_proc_t to the list of procs */ + for (n=0; n < nprocs; n++) { + nm = OBJ_NEW(opal_namelist_t); + opal_list_append(&opalcaddy->procs, &nm->super); + if (OPAL_SUCCESS != (rc = opal_convert_string_to_jobid(&nm->name.jobid, procs[n].nspace))) { + OBJ_RELEASE(opalcaddy); + return pmix_ext_convert_opalrc(rc); + } + if (PMIX_RANK_WILDCARD == procs[n].rank) { + nm->name.vpid = OPAL_VPID_WILDCARD; + } else { + nm->name.vpid = procs[n].rank; + } + } + + /* convert the info */ + for (n=0; n < ninfo; n++) { + oinfo = OBJ_NEW(opal_value_t); + opal_list_append(&opalcaddy->info, &oinfo->super); + oinfo->key = strdup(info[n].key); + if (OPAL_SUCCESS != (rc = pmix_ext_value_unload(oinfo, &info[n].value))) { + OBJ_RELEASE(opalcaddy); + return pmix_ext_convert_opalrc(rc); + } + } + + /* pass it up */ + rc = pmix_external_host_module->connect(&opalcaddy->procs, &opalcaddy->info, opal_opcbfunc, opalcaddy); + if (OPAL_SUCCESS != rc) { + OBJ_RELEASE(opalcaddy); + } + + return pmix_ext_convert_opalrc(rc); +} + + +static pmix_status_t server_disconnect_fn(const pmix_proc_t procs[], size_t nprocs, + const pmix_info_t info[], size_t ninfo, + pmix_op_cbfunc_t cbfunc, void *cbdata) +{ + int rc; + pmix_ext_opalcaddy_t *opalcaddy; + opal_namelist_t *nm; + size_t n; + opal_value_t *oinfo; + + if (NULL == pmix_external_host_module || NULL == pmix_external_host_module->disconnect) { + return PMIX_ERR_NOT_SUPPORTED; + } + + /* setup the caddy */ + opalcaddy = OBJ_NEW(pmix_ext_opalcaddy_t); + opalcaddy->opcbfunc = cbfunc; + opalcaddy->cbdata = cbdata; + + /* convert the array of pmix_proc_t to the list of procs */ + for (n=0; n < nprocs; n++) { + nm = OBJ_NEW(opal_namelist_t); + opal_list_append(&opalcaddy->procs, &nm->super); + if (OPAL_SUCCESS != (rc = opal_convert_string_to_jobid(&nm->name.jobid, procs[n].nspace))) { + OBJ_RELEASE(opalcaddy); + return pmix_ext_convert_opalrc(rc); + } + if (PMIX_RANK_WILDCARD == procs[n].rank) { + nm->name.vpid = OPAL_VPID_WILDCARD; + } else { + nm->name.vpid = procs[n].rank; + } + } + + /* convert the info */ + for (n=0; n < ninfo; n++) { + oinfo = OBJ_NEW(opal_value_t); + opal_list_append(&opalcaddy->info, &oinfo->super); + oinfo->key = strdup(info[n].key); + if (OPAL_SUCCESS != (rc = pmix_ext_value_unload(oinfo, &info[n].value))) { + OBJ_RELEASE(opalcaddy); + return pmix_ext_convert_opalrc(rc); + } + } + + /* pass it up */ + rc = pmix_external_host_module->disconnect(&opalcaddy->procs, &opalcaddy->info, opal_opcbfunc, opalcaddy); + if (OPAL_SUCCESS != rc) { + OBJ_RELEASE(opalcaddy); + } + + return pmix_ext_convert_opalrc(rc); +} + +static pmix_status_t server_register_events(const pmix_info_t info[], size_t ninfo, + pmix_op_cbfunc_t cbfunc, void *cbdata) +{ + pmix_ext_opalcaddy_t *opalcaddy; + size_t n; + opal_value_t *oinfo; + int rc; + + /* setup the caddy */ + opalcaddy = OBJ_NEW(pmix_ext_opalcaddy_t); + opalcaddy->opcbfunc = cbfunc; + opalcaddy->cbdata = cbdata; + + /* convert the info */ + for (n=0; n < ninfo; n++) { + oinfo = OBJ_NEW(opal_value_t); + opal_list_append(&opalcaddy->info, &oinfo->super); + oinfo->key = strdup(info[n].key); + if (OPAL_SUCCESS != (rc = pmix_ext_value_unload(oinfo, &info[n].value))) { + OBJ_RELEASE(opalcaddy); + return pmix_ext_convert_opalrc(rc); + } + } + + /* pass it up */ + rc = pmix_external_host_module->register_events(&opalcaddy->info, opal_opcbfunc, opalcaddy); + if (OPAL_SUCCESS != rc) { + OBJ_RELEASE(opalcaddy); + } + + return pmix_ext_convert_opalrc(rc); +} + +static pmix_status_t server_deregister_events(const pmix_info_t info[], size_t ninfo, + pmix_op_cbfunc_t cbfunc, void *cbdata) +{ + return PMIX_ERR_NOT_IMPLEMENTED; +} +static pmix_status_t server_listener_fn(int listening_sd, + pmix_connection_cbfunc_t cbfunc) +{ + int rc; + + if (NULL == pmix_external_host_module || NULL == pmix_external_host_module->listener) { + return PMIX_ERR_NOT_SUPPORTED; + } + + rc = pmix_external_host_module->listener(listening_sd, cbfunc); + return pmix_ext_convert_opalrc(rc); +} diff --git a/opal/mca/pmix/external/pmix_ext_server_south.c b/opal/mca/pmix/external/pmix_ext_server_south.c new file mode 100644 index 00000000000..bb8e9997878 --- /dev/null +++ b/opal/mca/pmix/external/pmix_ext_server_south.c @@ -0,0 +1,453 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ +/* + * Copyright (c) 2014-2015 Intel, Inc. All rights reserved. + * Copyright (c) 2014-2017 Research Organization for Information Science + * and Technology (RIST). All rights reserved. + * Copyright (c) 2014-2017 Mellanox Technologies, Inc. + * All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include "opal_config.h" +#include "opal/constants.h" +#include "opal/types.h" + +#ifdef HAVE_STRING_H +#include +#endif +#ifdef HAVE_UNISTD_H +#include +#endif + +#include "opal/dss/dss.h" +#include "opal/mca/event/event.h" +#include "opal/mca/hwloc/base/base.h" +#include "opal/runtime/opal.h" +#include "opal/runtime/opal_progress_threads.h" +#include "opal/util/argv.h" +#include "opal/util/error.h" +#include "opal/util/output.h" +#include "opal/util/proc.h" +#include "opal/util/show_help.h" +#include "opal/mca/pmix/base/base.h" +#include "pmix_ext.h" + +#include "pmix.h" +#include "pmix_server.h" + +/**** S.O.U.T.H.B.O.U.N.D I.N.T.E.R.F.A.C.E.S ****/ + +/* These are the interfaces used by the OMPI/ORTE/OPAL layer to call + * down into the embedded PMIx server. */ + +extern pmix_server_module_t pmix_external_module; +extern opal_pmix_server_module_t *pmix_external_host_module; +static char *dbgvalue=NULL; +static int errhdler_ref = 0; + +static void release_cbfunc(void *cbdata) +{ + pmix_ext_opalcaddy_t *cd = (pmix_ext_opalcaddy_t*)cbdata; + OBJ_RELEASE(cd); +} + +#define PMIX_WAIT_FOR_COMPLETION(a) \ + do { \ + while ((a)) { \ + usleep(10); \ + } \ + } while (0); + +static void myerr(pmix_status_t status, + pmix_proc_t procs[], size_t nprocs, + pmix_info_t info[], size_t ninfo) +{ + int rc; + opal_namelist_t *nm; + opal_value_t *iptr; + size_t n; + pmix_ext_opalcaddy_t *cd; + + /* convert the incoming status */ + rc = pmix_ext_convert_rc(status); + + /* setup the caddy */ + cd = OBJ_NEW(pmix_ext_opalcaddy_t); + + /* convert the array of procs */ + for (n=0; n < nprocs; n++) { + nm = OBJ_NEW(opal_namelist_t); + nm->name.jobid = strtoul(procs[n].nspace, NULL, 10); + nm->name.vpid = procs[n].rank; + opal_list_append(&cd->procs, &nm->super); + } + + /* convert the array of info */ + for (n=0; n < ninfo; n++) { + iptr = OBJ_NEW(opal_value_t); + iptr->key = strdup(info[n].key); + pmix_ext_value_unload(iptr, &info[n].value); + opal_list_append(&cd->info, &iptr->super); + } + + /* call the base errhandler */ + opal_pmix_base_errhandler(rc, &cd->procs, &cd->info, release_cbfunc, cd); +} + +static void errreg_cbfunc(pmix_status_t status, + int errhandler_ref, + void *cbdata) +{ + errhdler_ref = errhandler_ref; + opal_output_verbose(5, opal_pmix_base_framework.framework_output, + "PMIX server errreg_cbfunc - error handler registered status=%d, reference=%d", + status, errhandler_ref); +} + +int pmix_ext_server_init(opal_pmix_server_module_t *module, + opal_list_t *info) +{ + pmix_status_t rc; + int dbg; + opal_value_t *kv; + pmix_info_t *pinfo; + size_t sz, n; + + if (0 < (dbg = opal_output_get_verbosity(opal_pmix_base_framework.framework_output))) { + asprintf(&dbgvalue, "PMIX_DEBUG=%d", dbg); + putenv(dbgvalue); + } + + /* convert the list to an array of pmix_info_t */ + if (NULL != info) { + sz = opal_list_get_size(info); + PMIX_INFO_CREATE(pinfo, sz); + n = 0; + OPAL_LIST_FOREACH(kv, info, opal_value_t) { + (void)strncpy(pinfo[n].key, kv->key, PMIX_MAX_KEYLEN); + pmix_ext_value_load(&pinfo[n].value, kv); + ++n; + } + } else { + sz = 0; + pinfo = NULL; + } + + if (PMIX_SUCCESS != (rc = PMIx_server_init(&pmix_external_module, pinfo, sz))) { + PMIX_INFO_FREE(pinfo, sz); + return pmix_ext_convert_rc(rc); + } + PMIX_INFO_FREE(pinfo, sz); + + /* record the host module */ + pmix_external_host_module = module; + + /* register the errhandler */ + PMIx_Register_errhandler(NULL, 0, myerr, errreg_cbfunc, NULL); + return OPAL_SUCCESS; +} + +int pmix_ext_server_finalize(void) +{ + pmix_status_t rc; + + /* deregister the errhandler */ + PMIx_Deregister_errhandler(errhdler_ref, NULL, NULL); + + rc = PMIx_server_finalize(); + return pmix_ext_convert_rc(rc); +} + +int pmix_ext_server_gen_regex(const char *input, char **regex) +{ + pmix_status_t rc; + + rc = PMIx_generate_regex(input, regex); + return pmix_ext_convert_rc(rc); +} + + +int pmix_ext_server_gen_ppn(const char *input, char **ppn) +{ + pmix_status_t rc; + + rc = PMIx_generate_ppn(input, ppn); + return pmix_ext_convert_rc(rc); +} + +static void opcbfunc(pmix_status_t status, void *cbdata) +{ + pmix_ext_opcaddy_t *op = (pmix_ext_opcaddy_t*)cbdata; + + if (NULL != op->opcbfunc) { + op->opcbfunc(pmix_ext_convert_rc(status), op->cbdata); + } + if (op->active) { + op->active = false; + } else { + OBJ_RELEASE(op); + } +} + +int pmix_ext_server_register_nspace(opal_jobid_t jobid, + int nlocalprocs, + opal_list_t *info, + opal_pmix_op_cbfunc_t cbfunc, + void *cbdata) +{ + opal_value_t *kv, *k2; + pmix_info_t *pinfo, *pmap; + size_t sz, szmap, m, n; + char nspace[PMIX_MAX_NSLEN]; + pmix_status_t rc; + pmix_ext_opcaddy_t op; + opal_list_t *pmapinfo; + opal_pmix_ext_jobid_trkr_t *job; + + /* convert the jobid */ + (void)opal_snprintf_jobid(nspace, PMIX_MAX_NSLEN, jobid); + + /* store this job in our list of known nspaces */ + job = OBJ_NEW(opal_pmix_ext_jobid_trkr_t); + (void)strncpy(job->nspace, nspace, PMIX_MAX_NSLEN); + job->jobid = jobid; + opal_list_append(&mca_pmix_external_component.jobids, &job->super); + + /* convert the list to an array of pmix_info_t */ + if (NULL != info) { + sz = opal_list_get_size(info); + PMIX_INFO_CREATE(pinfo, sz); + n = 0; + OPAL_LIST_FOREACH(kv, info, opal_value_t) { + (void)strncpy(pinfo[n].key, kv->key, PMIX_MAX_KEYLEN); + if (0 == strcmp(kv->key, OPAL_PMIX_PROC_DATA)) { + pinfo[n].value.type = PMIX_INFO_ARRAY; + /* the value contains a list of values - convert + * that list to another array */ + pmapinfo = (opal_list_t*)kv->data.ptr; + szmap = opal_list_get_size(pmapinfo); + PMIX_INFO_CREATE(pmap, szmap); + pinfo[n].value.data.array.array = (struct pmix_info*)pmap; + pinfo[n].value.data.array.size = szmap; + m = 0; + OPAL_LIST_FOREACH(k2, pmapinfo, opal_value_t) { + (void)strncpy(pmap[m].key, k2->key, PMIX_MAX_KEYLEN); + pmix_ext_value_load(&pmap[m].value, k2); + ++m; + } + } else { + pmix_ext_value_load(&pinfo[n].value, kv); + } + ++n; + } + } else { + sz = 0; + pinfo = NULL; + } + + /* setup the caddy */ + OBJ_CONSTRUCT(&op, pmix_ext_opcaddy_t); + op.info = pinfo; + op.sz = sz; + op.opcbfunc = cbfunc; + op.cbdata = cbdata; + op.active = true; + rc = PMIx_server_register_nspace(nspace, nlocalprocs, pinfo, sz, + opcbfunc, &op); + if (PMIX_SUCCESS == rc) { + PMIX_WAIT_FOR_COMPLETION(op.active); + } + return pmix_ext_convert_rc(rc); +} + +void pmix_ext_server_deregister_nspace(opal_jobid_t jobid) +{ + opal_pmix_ext_jobid_trkr_t *jptr; + + /* if we don't already have it, we can ignore this */ + OPAL_LIST_FOREACH(jptr, &mca_pmix_external_component.jobids, opal_pmix_ext_jobid_trkr_t) { + if (jptr->jobid == jobid) { + /* found it - tell the server to deregister */ + PMIx_server_deregister_nspace(jptr->nspace); + /* now get rid of it from our list */ + opal_list_remove_item(&mca_pmix_external_component.jobids, &jptr->super); + OBJ_RELEASE(jptr); + return; + } + } +} + +int pmix_ext_server_register_client(const opal_process_name_t *proc, + uid_t uid, gid_t gid, + void *server_object, + opal_pmix_op_cbfunc_t cbfunc, + void *cbdata) +{ + pmix_status_t rc; + pmix_ext_opcaddy_t *op; + + /* setup the caddy */ + op = OBJ_NEW(pmix_ext_opcaddy_t); + op->opcbfunc = cbfunc; + op->cbdata = cbdata; + + /* convert the jobid */ + (void)opal_snprintf_jobid(op->p.nspace, PMIX_MAX_NSLEN, proc->jobid); + op->p.rank = proc->vpid; + + rc = PMIx_server_register_client(&op->p, uid, gid, server_object, + opcbfunc, op); + if (PMIX_SUCCESS != rc) { + OBJ_RELEASE(op); + } + return pmix_ext_convert_rc(rc); +} + +void pmix_ext_server_deregister_client(const opal_process_name_t *proc) +{ + opal_pmix_ext_jobid_trkr_t *jptr; + pmix_proc_t p; + + /* if we don't already have it, we can ignore this */ + OPAL_LIST_FOREACH(jptr, &mca_pmix_external_component.jobids, opal_pmix_ext_jobid_trkr_t) { + if (jptr->jobid == proc->jobid) { + /* found it - tell the server to deregister */ + (void)strncpy(p.nspace, jptr->nspace, PMIX_MAX_NSLEN); + p.rank = proc->vpid; + PMIx_server_deregister_client(&p); + return; + } + } +} + + +int pmix_ext_server_setup_fork(const opal_process_name_t *proc, char ***env) +{ + pmix_status_t rc; + pmix_proc_t p; + + /* convert the jobid */ + (void)opal_snprintf_jobid(p.nspace, PMIX_MAX_NSLEN, proc->jobid); + p.rank = proc->vpid; + + rc = PMIx_server_setup_fork(&p, env); + return pmix_ext_convert_rc(rc); +} + +/* this is the call back up from the embedded PMIx server that + * will contain the returned data. Note that the embedded server + * "owns" the data and will free it upon return from this function */ +static void dmdx_response(pmix_status_t status, char *data, size_t sz, void *cbdata) +{ + int rc; + pmix_ext_opcaddy_t *op = (pmix_ext_opcaddy_t*)cbdata; + + rc = pmix_ext_convert_rc(status); + if (NULL != op->mdxcbfunc) { + op->mdxcbfunc(rc, data, sz, op->cbdata, NULL, NULL); + } + OBJ_RELEASE(op); +} + +int pmix_ext_server_dmodex(const opal_process_name_t *proc, + opal_pmix_modex_cbfunc_t cbfunc, void *cbdata) +{ + pmix_ext_opcaddy_t *op; + pmix_status_t rc; + + /* setup the caddy */ + op = OBJ_NEW(pmix_ext_opcaddy_t); + op->mdxcbfunc = cbfunc; + op->cbdata = cbdata; + + /* convert the jobid */ + (void)opal_snprintf_jobid(op->p.nspace, PMIX_MAX_NSLEN, proc->jobid); + op->p.rank = proc->vpid; + + /* find the internally-cached data for this proc */ + rc = PMIx_server_dmodex_request(&op->p, dmdx_response, op); + if (PMIX_SUCCESS != rc) { + OBJ_RELEASE(op); + } + return pmix_ext_convert_rc(rc); +} + +int pmix_ext_server_notify_error(int status, + opal_list_t *procs, + opal_list_t *error_procs, + opal_list_t *info, + opal_pmix_op_cbfunc_t cbfunc, void *cbdata) +{ + opal_value_t *kv; + pmix_info_t *pinfo; + size_t sz, psz, esz, n; + pmix_proc_t *ps, *eps; + pmix_status_t rc; + pmix_ext_opcaddy_t *op; + opal_namelist_t *nm; + + /* convert the list of procs */ + if (NULL != procs) { + psz = opal_list_get_size(procs); + PMIX_PROC_CREATE(ps, psz); + n = 0; + OPAL_LIST_FOREACH(nm, procs, opal_namelist_t) { + (void)opal_snprintf_jobid(ps[n].nspace, PMIX_MAX_NSLEN, nm->name.jobid); + ps[n].rank = (int)nm->name.vpid; + ++n; + } + } else { + psz = 0; + ps = NULL; + } + if (NULL != error_procs) { + esz = opal_list_get_size(error_procs); + PMIX_PROC_CREATE(eps, esz); + n = 0; + OPAL_LIST_FOREACH(nm, error_procs, opal_namelist_t) { + (void)opal_snprintf_jobid(eps[n].nspace, PMIX_MAX_NSLEN, nm->name.jobid); + eps[n].rank = (int)nm->name.vpid; + ++n; + } + } else { + esz = 0; + eps = NULL; + } + + /* convert the list to an array of pmix_info_t */ + if (NULL != info) { + sz = opal_list_get_size(info); + PMIX_INFO_CREATE(pinfo, sz); + n = 0; + OPAL_LIST_FOREACH(kv, info, opal_value_t) { + (void)strncpy(pinfo[n].key, kv->key, PMIX_MAX_KEYLEN); + pmix_ext_value_load(&pinfo[n].value, kv); + } + } else { + sz = 0; + pinfo = NULL; + } + + /* setup the caddy */ + op = OBJ_NEW(pmix_ext_opcaddy_t); + op->procs = ps; + op->nprocs = psz; + op->error_procs = eps; + op->nerror_procs = esz; + op->info = pinfo; + op->sz = sz; + op->opcbfunc = cbfunc; + op->cbdata = cbdata; + + rc = pmix_ext_convert_opalrc(status); + rc = PMIx_Notify_error(rc, ps, psz, eps, esz, + pinfo, sz, opcbfunc, op); + if (PMIX_SUCCESS != rc) { + OBJ_RELEASE(op); + } + return pmix_ext_convert_rc(rc); +} diff --git a/opal/mca/pmix/native/Makefile.am b/opal/mca/pmix/native/Makefile.am deleted file mode 100644 index d491b538daa..00000000000 --- a/opal/mca/pmix/native/Makefile.am +++ /dev/null @@ -1,36 +0,0 @@ -# -# Copyright (c) 2014 Intel, Inc. All rights reserved. -# $COPYRIGHT$ -# -# Additional copyrights may follow -# -# $HEADER$ -# - -sources = \ - pmix_native.h \ - pmix_native_component.c \ - pmix_native.c \ - usock.c \ - usock_sendrecv.c - -# Make the output library in this directory, and name it either -# mca__.la (for DSO builds) or libmca__.la -# (for static builds). - -if MCA_BUILD_opal_pmix_native_DSO -component_noinst = -component_install = mca_pmix_native.la -else -component_noinst = libmca_pmix_native.la -component_install = -endif - -mcacomponentdir = $(opallibdir) -mcacomponent_LTLIBRARIES = $(component_install) -mca_pmix_native_la_SOURCES = $(sources) -mca_pmix_native_la_LDFLAGS = -module -avoid-version - -noinst_LTLIBRARIES = $(component_noinst) -libmca_pmix_native_la_SOURCES =$(sources) -libmca_pmix_native_la_LDFLAGS = -module -avoid-version diff --git a/opal/mca/pmix/native/configure.m4 b/opal/mca/pmix/native/configure.m4 deleted file mode 100644 index 1bedaa61dc6..00000000000 --- a/opal/mca/pmix/native/configure.m4 +++ /dev/null @@ -1,42 +0,0 @@ -# -*- shell-script -*- -# -# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana -# University Research and Technology -# Corporation. All rights reserved. -# Copyright (c) 2004-2005 The University of Tennessee and The University -# of Tennessee Research Foundation. All rights -# reserved. -# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, -# University of Stuttgart. All rights reserved. -# Copyright (c) 2004-2005 The Regents of the University of California. -# All rights reserved. -# Copyright (c) 2011-2013 Los Alamos National Security, LLC. -# All rights reserved. -# Copyright (c) 2010 Cisco Systems, Inc. All rights reserved. -# Copyright (c) 2013 Intel, Inc. All rights reserved. -# $COPYRIGHT$ -# -# Additional copyrights may follow -# -# $HEADER$ -# - -# MCA_pmix_native_CONFIG([action-if-found], [action-if-not-found]) -# ----------------------------------------------------------- -AC_DEFUN([MCA_opal_pmix_native_CONFIG],[ - AC_CONFIG_FILES([opal/mca/pmix/native/Makefile]) - - # check for sockaddr_un (a good sign we have Unix domain sockets) - AC_CHECK_TYPES([struct sockaddr_un], - [pmix_native_happy="yes"], - [pmix_native_happy="no"], - [AC_INCLUDES_DEFAULT -#ifdef HAVE_SYS_SOCKET_H -#include -#endif -#ifdef HAVE_SYS_UN_H -#include -#endif]) - - AS_IF([test "$pmix_native_happy" = "yes"], [$1], [$2]) -])dnl diff --git a/opal/mca/pmix/native/pmix_native.c b/opal/mca/pmix/native/pmix_native.c deleted file mode 100644 index 4ad1861ceca..00000000000 --- a/opal/mca/pmix/native/pmix_native.c +++ /dev/null @@ -1,1338 +0,0 @@ -/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ -/* - * Copyright (c) 2014-2015 Intel, Inc. All rights reserved. - * Copyright (c) 2014-2015 Research Organization for Information Science - * and Technology (RIST). All rights reserved. - * Copyright (c) 2014 Mellanox Technologies, Inc. - * All rights reserved. - * Copyright (c) 2015 Los Alamos National Security, LLC. All rights - * reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#include "opal_config.h" -#include "opal/constants.h" -#include "opal/types.h" - -#ifdef HAVE_STRING_H -#include -#endif -#ifdef HAVE_UNISTD_H -#include -#endif - -#include "opal/dss/dss.h" -#include "opal/mca/event/event.h" -#include "opal/mca/hwloc/base/base.h" -#include "opal/runtime/opal.h" -#include "opal/runtime/opal_progress_threads.h" -#include "opal/util/argv.h" -#include "opal/util/error.h" -#include "opal/util/output.h" -#include "opal/util/proc.h" -#include "opal/util/show_help.h" - -#include "opal/mca/pmix/base/base.h" -#include "pmix_native.h" -#include "opal/mca/dstore/base/base.h" - -static int native_init(void); -static int native_fini(void); -static bool native_initialized(void); -static int native_abort(int flag, const char msg[]); -static int native_fence(opal_process_name_t *procs, size_t nprocs); -static int native_fence_nb(opal_process_name_t *procs, size_t nprocs, - opal_pmix_cbfunc_t cbfunc, void *cbdata); -static int native_put(opal_pmix_scope_t scope, - opal_value_t *kv); -static int native_get(const opal_process_name_t *id, - const char *key, - opal_value_t **kv); -static void native_get_nb(const opal_process_name_t *id, - const char *key, - opal_pmix_cbfunc_t cbfunc, - void *cbdata); -static int native_publish(const char service_name[], - opal_list_t *info, - const char port[]); -static int native_lookup(const char service_name[], - opal_list_t *info, - char port[], int portLen); -static int native_unpublish(const char service_name[], - opal_list_t *info); -static bool native_get_attr(const char *attr, opal_value_t **kv); -static int native_get_attr_nb(const char *attr, - opal_pmix_cbfunc_t cbfunc, - void *cbdata); -static int native_spawn(int count, const char * cmds[], - int argcs[], const char ** argvs[], - const int maxprocs[], - opal_list_t *info_keyval_vector, - opal_list_t *preput_keyval_vector, - char jobId[], int jobIdSize, - int errors[]); -static int native_job_connect(const char jobId[]); -static int native_job_disconnect(const char jobId[]); - -const opal_pmix_base_module_t opal_pmix_native_module = { - native_init, - native_fini, - native_initialized, - native_abort, - native_fence, - native_fence_nb, - native_put, - native_get, - native_get_nb, - native_publish, - native_lookup, - native_unpublish, - native_get_attr, - native_get_attr_nb, - native_spawn, - native_job_connect, - native_job_disconnect, - opal_pmix_base_register_handler, - opal_pmix_base_deregister_handler -}; - -// local variables -static int init_cntr = 0; -static opal_process_name_t native_pname = {0}; - -/* callback for wait completion */ -static void wait_cbfunc(opal_buffer_t *buf, void *cbdata) -{ - pmix_cb_t *cb = (pmix_cb_t*)cbdata; - int status=OPAL_SUCCESS; - - opal_output_verbose(2, opal_pmix_base_framework.framework_output, - "%s pmix:native recv callback activated with %d bytes", - OPAL_NAME_PRINT(OPAL_PROC_MY_NAME), - (NULL == buf) ? -1 : (int)buf->bytes_used); - - if (NULL != buf) { - /* transfer the data to the cb */ - opal_dss.copy_payload(&cb->data, buf); - } - if (NULL != cb->cbfunc) { - cb->cbfunc(status, NULL, cb->cbdata); - } - cb->active = false; -} - -static int native_init(void) -{ - char **uri, *srv; - - ++init_cntr; - if (1 < init_cntr) { - return OPAL_SUCCESS; - } - - opal_output_verbose(2, opal_pmix_base_framework.framework_output, - "%s pmix:native init called", - OPAL_NAME_PRINT(OPAL_PROC_MY_NAME)); - - /* if we don't have a path to the daemon rendezvous point, - * then we need to return an error UNLESS we have been directed - * to allow init prior to having an identified server. This is - * needed for singletons as they will start without a server - * to support them, but may have one assigned at a later time */ - if (NULL == mca_pmix_native_component.uri) { - opal_output_verbose(2, opal_pmix_base_framework.framework_output, - "%s pmix:native NULL uri", - OPAL_NAME_PRINT(OPAL_PROC_MY_NAME)); - if (NULL != (srv = getenv("PMIX_SERVER_URI"))) { - mca_pmix_native_component.uri = strdup(srv); - mca_pmix_native_component.id = OPAL_PROC_MY_NAME; - } else if (opal_pmix_base_allow_delayed_server) { - /* not ready yet, so decrement our init_cntr so we can come thru - * here again */ - --init_cntr; - /* let the caller know that the server isn't available yet */ - return OPAL_ERR_SERVER_NOT_AVAIL; - } else { - /* not ready yet, so decrement our init_cntr so we can come thru - * here again */ - --init_cntr; - return OPAL_ERROR; - } - } - - /* if we have it, setup the path to the daemon rendezvous point */ - if (NULL != mca_pmix_native_component.uri) { - opal_output_verbose(2, opal_pmix_base_framework.framework_output, - "%s pmix:native constructing component fields with server %s", - OPAL_NAME_PRINT(OPAL_PROC_MY_NAME), - mca_pmix_native_component.uri); - - memset(&mca_pmix_native_component.address, 0, sizeof(struct sockaddr_un)); - mca_pmix_native_component.address.sun_family = AF_UNIX; - uri = opal_argv_split(mca_pmix_native_component.uri, ':'); - if (2 != opal_argv_count(uri)) { - opal_argv_free(uri); - return OPAL_ERROR; - } - /* if the rendezvous file doesn't exist, that's an error */ - if (0 != access(uri[1], R_OK)) { - opal_output_verbose(2, opal_pmix_base_framework.framework_output, - "%s pmix:native rendezvous file %s does not exist", - OPAL_NAME_PRINT(OPAL_PROC_MY_NAME), uri[1]); - opal_argv_free(uri); - return OPAL_ERR_NOT_FOUND; - } - opal_convert_string_to_process_name(&mca_pmix_native_component.server, uri[0]); - snprintf(mca_pmix_native_component.address.sun_path, - sizeof(mca_pmix_native_component.address.sun_path)-1, - "%s", uri[1]); - opal_argv_free(uri); - - /* create an event base and progress thread for us */ - if (NULL == (mca_pmix_native_component.evbase = opal_start_progress_thread("pmix_native", true))) { - return OPAL_ERROR; - } - } - - /* we will connect on first send */ - - return OPAL_SUCCESS; -} - -static int native_fini(void) -{ - opal_buffer_t *msg; - pmix_cb_t *cb; - pmix_cmd_t cmd = PMIX_FINALIZE_CMD; - int rc; - - if (1 != init_cntr) { - --init_cntr; - return OPAL_SUCCESS; - } - init_cntr = 0; - - opal_output_verbose(2, opal_pmix_base_framework.framework_output, - "%s pmix:native finalize called", - OPAL_NAME_PRINT(OPAL_PROC_MY_NAME)); - - if (NULL == mca_pmix_native_component.uri) { - /* nothing was setup, so return */ - return OPAL_SUCCESS; - } - - if (PMIX_USOCK_CONNECTED == mca_pmix_native_component.state) { - /* setup a cmd message to notify the PMIx - * server that we are normally terminating */ - msg = OBJ_NEW(opal_buffer_t); - /* pack the cmd */ - if (OPAL_SUCCESS != (rc = opal_dss.pack(msg, &cmd, 1, PMIX_CMD_T))) { - OPAL_ERROR_LOG(rc); - OBJ_RELEASE(msg); - return rc; - } - - /* create a callback object as we need to pass it to the - * recv routine so we know which callback to use when - * the return message is recvd */ - cb = OBJ_NEW(pmix_cb_t); - cb->active = true; - - opal_output_verbose(2, opal_pmix_base_framework.framework_output, - "%s pmix:native sending finalize sync to server", - OPAL_NAME_PRINT(OPAL_PROC_MY_NAME)); - - /* push the message into our event base to send to the server */ - PMIX_ACTIVATE_SEND_RECV(msg, wait_cbfunc, cb); - - /* wait for the ack to return */ - PMIX_WAIT_FOR_COMPLETION(cb->active); - OBJ_RELEASE(cb); - } - - if (NULL != mca_pmix_native_component.evbase) { - opal_stop_progress_thread("pmix_native", true); - mca_pmix_native_component.evbase = NULL; - } - - if (0 <= mca_pmix_native_component.sd) { - CLOSE_THE_SOCKET(mca_pmix_native_component.sd); - } - - return OPAL_SUCCESS; -} - -static bool native_initialized(void) -{ - if (0 < init_cntr) { - return true; - } - return false; -} - -static void timeout(int sd, short args, void *cbdata) -{ - pmix_cb_t *cb = (pmix_cb_t*)cbdata; - cb->active = false; -} - -static int native_abort(int flag, const char msg[]) -{ - opal_buffer_t *bfr; - pmix_cmd_t cmd = PMIX_ABORT_CMD; - int rc; - pmix_cb_t *cb; - opal_event_t ev; - struct timeval tv = {1, 0}; - - opal_output_verbose(2, opal_pmix_base_framework.framework_output, - "%s pmix:native abort called", - OPAL_NAME_PRINT(OPAL_PROC_MY_NAME)); - - if (NULL == mca_pmix_native_component.uri) { - /* no server available, so just return */ - return OPAL_SUCCESS; - } - - if (PMIX_USOCK_CONNECTED == mca_pmix_native_component.state) { - /* create a buffer to hold the message */ - bfr = OBJ_NEW(opal_buffer_t); - /* pack the cmd */ - if (OPAL_SUCCESS != (rc = opal_dss.pack(bfr, &cmd, 1, PMIX_CMD_T))) { - OPAL_ERROR_LOG(rc); - OBJ_RELEASE(bfr); - return rc; - } - /* pack the status flag */ - if (OPAL_SUCCESS != (rc = opal_dss.pack(bfr, &flag, 1, OPAL_INT))) { - OPAL_ERROR_LOG(rc); - OBJ_RELEASE(bfr); - return rc; - } - /* pack the string message - a NULL is okay */ - if (OPAL_SUCCESS != (rc = opal_dss.pack(bfr, &msg, 1, OPAL_STRING))) { - OPAL_ERROR_LOG(rc); - OBJ_RELEASE(bfr); - return rc; - } - - /* create a callback object as we need to pass it to the - * recv routine so we know which callback to use when - * the return message is recvd */ - cb = OBJ_NEW(pmix_cb_t); - cb->active = true; - - /* push a timeout event to wake us up just in case this - * message cannot get thru - e.g., someone else may have - * detected the failure of the server and ordered an abort */ - opal_event_evtimer_set(mca_pmix_native_component.evbase, - &ev, timeout, cb); - opal_event_evtimer_add(&ev, &tv); - - /* push the message into our event base to send to the server */ - PMIX_ACTIVATE_SEND_RECV(bfr, wait_cbfunc, cb); - - /* wait for the release */ - PMIX_WAIT_FOR_COMPLETION(cb->active); - OBJ_RELEASE(cb); - } - return OPAL_SUCCESS; -} - -static int native_spawn(int count, const char * cmds[], - int argcs[], const char ** argvs[], - const int maxprocs[], - opal_list_t *info_keyval_vector, - opal_list_t *preput_keyval_vector, - char jobId[], int jobIdSize, - int errors[]) -{ - return OPAL_ERR_NOT_SUPPORTED; -} - -static int native_put(opal_pmix_scope_t scope, - opal_value_t *kv) -{ - int rc; - - /* pack the cache that matches the scope */ - if (PMIX_LOCAL == scope) { - if (NULL == mca_pmix_native_component.cache_local) { - mca_pmix_native_component.cache_local = OBJ_NEW(opal_buffer_t); - } - opal_output_verbose(2, opal_pmix_base_framework.framework_output, - "%s pmix:native put local data for key %s", - OPAL_NAME_PRINT(OPAL_PROC_MY_NAME), kv->key); - if (OPAL_SUCCESS != (rc = opal_dss.pack(mca_pmix_native_component.cache_local, &kv, 1, OPAL_VALUE))) { - OPAL_ERROR_LOG(rc); - } - } else if (PMIX_REMOTE == scope) { - if (NULL == mca_pmix_native_component.cache_remote) { - mca_pmix_native_component.cache_remote = OBJ_NEW(opal_buffer_t); - } - opal_output_verbose(2, opal_pmix_base_framework.framework_output, - "%s pmix:native put remote data for key %s", - OPAL_NAME_PRINT(OPAL_PROC_MY_NAME), kv->key); - if (OPAL_SUCCESS != (rc = opal_dss.pack(mca_pmix_native_component.cache_remote, &kv, 1, OPAL_VALUE))) { - OPAL_ERROR_LOG(rc); - } - } else { - /* must be global */ - if (NULL == mca_pmix_native_component.cache_global) { - mca_pmix_native_component.cache_global = OBJ_NEW(opal_buffer_t); - } - opal_output_verbose(2, opal_pmix_base_framework.framework_output, - "%s pmix:native put global data for key %s", - OPAL_NAME_PRINT(OPAL_PROC_MY_NAME), kv->key); - if (OPAL_SUCCESS != (rc = opal_dss.pack(mca_pmix_native_component.cache_global, &kv, 1, OPAL_VALUE))) { - OPAL_ERROR_LOG(rc); - } - } - - /* have to save a copy locally as some of our components will - * look for it */ - (void)opal_dstore.store(opal_dstore_internal, &OPAL_PROC_MY_NAME, kv); - return rc; -} - - -static int native_fence(opal_process_name_t *procs, size_t nprocs) -{ - opal_buffer_t *msg, *bptr; - pmix_cmd_t cmd = PMIX_FENCE_CMD; - pmix_cb_t *cb; - int rc, ret; - opal_pmix_scope_t scope; - int32_t cnt; - opal_value_t *kp; - opal_process_name_t id; - size_t i; - uint64_t np; - - opal_output_verbose(2, opal_pmix_base_framework.framework_output, - "%s pmix:native executing fence on %u procs", - OPAL_NAME_PRINT(OPAL_PROC_MY_NAME), (unsigned int)nprocs); - - if (NULL == mca_pmix_native_component.uri) { - /* no server available, so just return */ - return OPAL_SUCCESS; - } - - msg = OBJ_NEW(opal_buffer_t); - /* pack the fence cmd */ - if (OPAL_SUCCESS != (rc = opal_dss.pack(msg, &cmd, 1, PMIX_CMD_T))) { - OPAL_ERROR_LOG(rc); - OBJ_RELEASE(msg); - return rc; - } - /* pack the number of procs */ - if (OPAL_SUCCESS != (rc = opal_dss.pack(msg, &nprocs, 1, OPAL_SIZE))) { - OPAL_ERROR_LOG(rc); - OBJ_RELEASE(msg); - return rc; - } - if (0 < nprocs) { - if (OPAL_SUCCESS != (rc = opal_dss.pack(msg, procs, nprocs, OPAL_NAME))) { - OPAL_ERROR_LOG(rc); - OBJ_RELEASE(msg); - return rc; - } - } - - /* if we haven't already done it, ensure we have committed our values */ - if (NULL != mca_pmix_native_component.cache_local) { - scope = PMIX_LOCAL; - if (OPAL_SUCCESS != (rc = opal_dss.pack(msg, &scope, 1, PMIX_SCOPE_T))) { - OPAL_ERROR_LOG(rc); - OBJ_RELEASE(msg); - return rc; - } - if (OPAL_SUCCESS != (rc = opal_dss.pack(msg, &mca_pmix_native_component.cache_local, 1, OPAL_BUFFER))) { - OPAL_ERROR_LOG(rc); - OBJ_RELEASE(msg); - return rc; - } - OBJ_RELEASE(mca_pmix_native_component.cache_local); - } - if (NULL != mca_pmix_native_component.cache_remote) { - scope = PMIX_REMOTE; - if (OPAL_SUCCESS != (rc = opal_dss.pack(msg, &scope, 1, PMIX_SCOPE_T))) { - OPAL_ERROR_LOG(rc); - OBJ_RELEASE(msg); - return rc; - } - if (OPAL_SUCCESS != (rc = opal_dss.pack(msg, &mca_pmix_native_component.cache_remote, 1, OPAL_BUFFER))) { - OPAL_ERROR_LOG(rc); - OBJ_RELEASE(msg); - return rc; - } - OBJ_RELEASE(mca_pmix_native_component.cache_remote); - } - if (NULL != mca_pmix_native_component.cache_global) { - scope = PMIX_GLOBAL; - if (OPAL_SUCCESS != (rc = opal_dss.pack(msg, &scope, 1, PMIX_SCOPE_T))) { - OPAL_ERROR_LOG(rc); - OBJ_RELEASE(msg); - return rc; - } - if (OPAL_SUCCESS != (rc = opal_dss.pack(msg, &mca_pmix_native_component.cache_global, 1, OPAL_BUFFER))) { - OPAL_ERROR_LOG(rc); - OBJ_RELEASE(msg); - return rc; - } - OBJ_RELEASE(mca_pmix_native_component.cache_global); - } - - /* create a callback object as we need to pass it to the - * recv routine so we know which callback to use when - * the return message is recvd */ - cb = OBJ_NEW(pmix_cb_t); - cb->active = true; - - /* push the message into our event base to send to the server */ - PMIX_ACTIVATE_SEND_RECV(msg, wait_cbfunc, cb); - - /* wait for the fence to complete */ - PMIX_WAIT_FOR_COMPLETION(cb->active); - - /* get the number of contributors */ - cnt = 1; - if (OPAL_SUCCESS != (rc = opal_dss.unpack(&cb->data, &np, &cnt, OPAL_UINT64))) { - OPAL_ERROR_LOG(rc); - return rc; - } - - /* if data was returned, unpack and store it */ - for (i=0; i < np; i++) { - /* get the buffer that contains the data for the next proc */ - cnt = 1; - if (OPAL_SUCCESS != (rc = opal_dss.unpack(&cb->data, &msg, &cnt, OPAL_BUFFER))) { - if (OPAL_ERR_UNPACK_READ_PAST_END_OF_BUFFER == rc) { - break; - } - OPAL_ERROR_LOG(rc); - return rc; - } - /* extract the id of the contributor from the blob */ - cnt = 1; - if (OPAL_SUCCESS != (rc = opal_dss.unpack(msg, &id, &cnt, OPAL_NAME))) { - OPAL_ERROR_LOG(rc); - return rc; - } - /* extract all blobs from this proc, starting with the scope */ - cnt = 1; - while (OPAL_SUCCESS == (rc = opal_dss.unpack(msg, &scope, &cnt, PMIX_SCOPE_T))) { - /* extract the blob for this scope */ - cnt = 1; - if (OPAL_SUCCESS != (rc = opal_dss.unpack(msg, &bptr, &cnt, OPAL_BUFFER))) { - OPAL_ERROR_LOG(rc); - return rc; - } - /* now unpack and store the values - everything goes into our internal store */ - cnt = 1; - while (OPAL_SUCCESS == (rc = opal_dss.unpack(bptr, &kp, &cnt, OPAL_VALUE))) { - if (OPAL_SUCCESS != (ret = opal_dstore.store(opal_dstore_internal, &id, kp))) { - OPAL_ERROR_LOG(ret); - } - OBJ_RELEASE(kp); - cnt = 1; - } - OBJ_RELEASE(bptr); - cnt = 1; - } - if (OPAL_ERR_UNPACK_READ_PAST_END_OF_BUFFER != rc) { - OPAL_ERROR_LOG(rc); - } - OBJ_RELEASE(msg); - if (OPAL_SUCCESS != rc && OPAL_ERR_UNPACK_READ_PAST_END_OF_BUFFER != rc) { - OPAL_ERROR_LOG(rc); - } else { - rc = OPAL_SUCCESS; - } - } - - OBJ_RELEASE(cb); - - opal_output_verbose(2, opal_pmix_base_framework.framework_output, - "%s pmix:native fence released", - OPAL_NAME_PRINT(OPAL_PROC_MY_NAME)); - - return OPAL_SUCCESS; -} - -static void fencenb_cbfunc(opal_buffer_t *buf, void *cbdata) -{ - pmix_cb_t *cb = (pmix_cb_t*)cbdata; - opal_buffer_t *msg, *bptr; - int rc, ret; - opal_pmix_scope_t scope; - int32_t cnt; - opal_value_t *kp; - opal_process_name_t id; - size_t i; - uint64_t np; - - /* get the number of contributors */ - cnt = 1; - if (OPAL_SUCCESS != (rc = opal_dss.unpack(buf, &np, &cnt, OPAL_UINT64))) { - OPAL_ERROR_LOG(rc); - return; - } - /* if data was returned, unpack and store it */ - for (i=0; i < np; i++) { - /* get the buffer that contains the data for the next proc */ - cnt = 1; - if (OPAL_SUCCESS != (rc = opal_dss.unpack(buf, &msg, &cnt, OPAL_BUFFER))) { - if (OPAL_ERR_UNPACK_READ_PAST_END_OF_BUFFER == rc) { - break; - } - OPAL_ERROR_LOG(rc); - return; - } - /* extract the id of the contributor from the blob */ - cnt = 1; - if (OPAL_SUCCESS != (rc = opal_dss.unpack(msg, &id, &cnt, OPAL_NAME))) { - OPAL_ERROR_LOG(rc); - return; - } - /* extract all blobs from this proc, starting with the scope */ - cnt = 1; - while (OPAL_SUCCESS == (rc = opal_dss.unpack(msg, &scope, &cnt, PMIX_SCOPE_T))) { - /* extract the blob for this scope */ - cnt = 1; - if (OPAL_SUCCESS != (rc = opal_dss.unpack(msg, &bptr, &cnt, OPAL_BUFFER))) { - OPAL_ERROR_LOG(rc); - return; - } - /* now unpack and store the values - everything goes into our internal store */ - cnt = 1; - while (OPAL_SUCCESS == (rc = opal_dss.unpack(bptr, &kp, &cnt, OPAL_VALUE))) { - if (OPAL_SUCCESS != (ret = opal_dstore.store(opal_dstore_internal, &id, kp))) { - OPAL_ERROR_LOG(ret); - } - OBJ_RELEASE(kp); - cnt = 1; - } - OBJ_RELEASE(bptr); - cnt = 1; - } - if (OPAL_ERR_UNPACK_READ_PAST_END_OF_BUFFER != rc) { - OPAL_ERROR_LOG(rc); - } - OBJ_RELEASE(msg); - } - if (OPAL_SUCCESS != rc && OPAL_ERR_UNPACK_READ_PAST_END_OF_BUFFER != rc) { - OPAL_ERROR_LOG(rc); - } - - /* if a callback was provided, execute it */ - if (NULL != cb) { - if (NULL != cb->cbfunc) { - cb->cbfunc(rc, NULL, cb->cbdata); - } - OBJ_RELEASE(cb); - } -} - -static int native_fence_nb(opal_process_name_t *procs, size_t nprocs, - opal_pmix_cbfunc_t cbfunc, void *cbdata) -{ - opal_buffer_t *msg; - pmix_cmd_t cmd = PMIX_FENCENB_CMD; - int rc; - pmix_cb_t *cb; - opal_pmix_scope_t scope; - - if (NULL == mca_pmix_native_component.uri) { - /* no server available, so just execute the callback */ - if (NULL != cbfunc) { - cbfunc(OPAL_SUCCESS, NULL, cbdata); - } - return OPAL_SUCCESS; - } - - msg = OBJ_NEW(opal_buffer_t); - /* pack the fence cmd */ - if (OPAL_SUCCESS != (rc = opal_dss.pack(msg, &cmd, 1, PMIX_CMD_T))) { - OPAL_ERROR_LOG(rc); - OBJ_RELEASE(msg); - return rc; - } - /* pack the number of procs */ - if (OPAL_SUCCESS != (rc = opal_dss.pack(msg, &nprocs, 1, OPAL_SIZE))) { - OPAL_ERROR_LOG(rc); - OBJ_RELEASE(msg); - return rc; - } - if (0 < nprocs) { - if (OPAL_SUCCESS != (rc = opal_dss.pack(msg, procs, nprocs, OPAL_NAME))) { - OPAL_ERROR_LOG(rc); - OBJ_RELEASE(msg); - return rc; - } - } - - /* if we haven't already done it, ensure we have committed our values */ - if (NULL != mca_pmix_native_component.cache_local) { - scope = PMIX_LOCAL; - if (OPAL_SUCCESS != (rc = opal_dss.pack(msg, &scope, 1, PMIX_SCOPE_T))) { - OPAL_ERROR_LOG(rc); - OBJ_RELEASE(msg); - return rc; - } - if (OPAL_SUCCESS != (rc = opal_dss.pack(msg, &mca_pmix_native_component.cache_local, 1, OPAL_BUFFER))) { - OPAL_ERROR_LOG(rc); - OBJ_RELEASE(msg); - return rc; - } - OBJ_RELEASE(mca_pmix_native_component.cache_local); - } - if (NULL != mca_pmix_native_component.cache_remote) { - scope = PMIX_REMOTE; - if (OPAL_SUCCESS != (rc = opal_dss.pack(msg, &scope, 1, PMIX_SCOPE_T))) { - OPAL_ERROR_LOG(rc); - OBJ_RELEASE(msg); - return rc; - } - if (OPAL_SUCCESS != (rc = opal_dss.pack(msg, &mca_pmix_native_component.cache_remote, 1, OPAL_BUFFER))) { - OPAL_ERROR_LOG(rc); - OBJ_RELEASE(msg); - return rc; - } - OBJ_RELEASE(mca_pmix_native_component.cache_remote); - } - if (NULL != mca_pmix_native_component.cache_global) { - scope = PMIX_GLOBAL; - if (OPAL_SUCCESS != (rc = opal_dss.pack(msg, &scope, 1, PMIX_SCOPE_T))) { - OPAL_ERROR_LOG(rc); - OBJ_RELEASE(msg); - return rc; - } - if (OPAL_SUCCESS != (rc = opal_dss.pack(msg, &mca_pmix_native_component.cache_global, 1, OPAL_BUFFER))) { - OPAL_ERROR_LOG(rc); - OBJ_RELEASE(msg); - return rc; - } - OBJ_RELEASE(mca_pmix_native_component.cache_global); - } - - /* create a callback object as we need to pass it to the - * recv routine so we know which callback to use when - * the return message is recvd */ - cb = OBJ_NEW(pmix_cb_t); - cb->cbfunc = cbfunc; - cb->cbdata = cbdata; - - /* push the message into our event base to send to the server */ - PMIX_ACTIVATE_SEND_RECV(msg, fencenb_cbfunc, cb); - - return OPAL_SUCCESS; -} - -static int native_get(const opal_process_name_t *id, - const char *key, - opal_value_t **kv) -{ - opal_buffer_t *msg, *bptr; - pmix_cmd_t cmd = PMIX_GET_CMD; - pmix_cb_t *cb; - int rc, ret; - int32_t cnt; - opal_list_t vals; - opal_value_t *kp; - bool found; - - opal_output_verbose(2, opal_pmix_base_framework.framework_output, - "%s pmix:native getting value for proc %s key %s", - OPAL_NAME_PRINT(OPAL_PROC_MY_NAME), - OPAL_NAME_PRINT(*id), key); - - /* first see if we already have the info in our dstore */ - OBJ_CONSTRUCT(&vals, opal_list_t); - if (OPAL_SUCCESS == opal_dstore.fetch(opal_dstore_internal, id, - key, &vals)) { - *kv = (opal_value_t*)opal_list_remove_first(&vals); - OPAL_LIST_DESTRUCT(&vals); - opal_output_verbose(2, opal_pmix_base_framework.framework_output, - "%s pmix:native value retrieved from dstore", - OPAL_NAME_PRINT(OPAL_PROC_MY_NAME)); - return OPAL_SUCCESS; - } - - if (NULL == mca_pmix_native_component.uri) { - /* no server available, so just return */ - return OPAL_ERR_NOT_FOUND; - } - - /* nope - see if we can get it */ - msg = OBJ_NEW(opal_buffer_t); - /* pack the get cmd */ - if (OPAL_SUCCESS != (rc = opal_dss.pack(msg, &cmd, 1, PMIX_CMD_T))) { - OPAL_ERROR_LOG(rc); - OBJ_RELEASE(msg); - return rc; - } - /* pack the request information - we'll get the entire blob - * for this proc, so we don't need to pass the key */ - if (OPAL_SUCCESS != (rc = opal_dss.pack(msg, id, 1, OPAL_NAME))) { - OPAL_ERROR_LOG(rc); - OBJ_RELEASE(msg); - return rc; - } - /* create a callback object as we need to pass it to the - * recv routine so we know which callback to use when - * the return message is recvd */ - cb = OBJ_NEW(pmix_cb_t); - cb->active = true; - - /* push the message into our event base to send to the server */ - PMIX_ACTIVATE_SEND_RECV(msg, wait_cbfunc, cb); - - /* wait for the data to return */ - PMIX_WAIT_FOR_COMPLETION(cb->active); - - /* we have received the entire data blob for this process - unpack - * and cache all values, keeping the one we requested to return - * to the caller */ - cnt = 1; - if (OPAL_SUCCESS != (rc = opal_dss.unpack(&cb->data, &ret, &cnt, OPAL_INT))) { - OPAL_ERROR_LOG(rc); - OBJ_RELEASE(cb); - return rc; - } - found = false; - cnt = 1; - while (OPAL_SUCCESS == (rc = opal_dss.unpack(&cb->data, &bptr, &cnt, OPAL_BUFFER))) { - while (OPAL_SUCCESS == (rc = opal_dss.unpack(bptr, &kp, &cnt, OPAL_VALUE))) { - opal_output_verbose(2, opal_pmix_base_framework.framework_output, - "%s pmix:native retrieved %s (%s) from server for proc %s", - OPAL_NAME_PRINT(OPAL_PROC_MY_NAME), kp->key, - (OPAL_STRING == kp->type) ? kp->data.string : "NS", - OPAL_NAME_PRINT(*id)); - if (OPAL_SUCCESS != (ret = opal_dstore.store(opal_dstore_internal, id, kp))) { - OPAL_ERROR_LOG(ret); - } - if (0 == strcmp(key, kp->key)) { - *kv = kp; - found = true; - } else { - OBJ_RELEASE(kp); - } - } - if (OPAL_ERR_UNPACK_READ_PAST_END_OF_BUFFER != rc) { - OPAL_ERROR_LOG(rc); - } - OBJ_RELEASE(bptr); - cnt = 1; - } - if (OPAL_ERR_UNPACK_READ_PAST_END_OF_BUFFER != rc) { - OPAL_ERROR_LOG(rc); - } else { - rc = OPAL_SUCCESS; - } - OBJ_RELEASE(cb); - - opal_output_verbose(2, opal_pmix_base_framework.framework_output, - "%s pmix:native get completed", - OPAL_NAME_PRINT(OPAL_PROC_MY_NAME)); - if (found) { - return OPAL_SUCCESS; - } - /* we didn't find the requested data - pass back a - * status that indicates the source of the problem, - * either during the data fetch, message unpacking, - * or not found */ - *kv = NULL; - if (OPAL_SUCCESS == rc) { - if (OPAL_SUCCESS == ret) { - rc = OPAL_ERR_NOT_FOUND; - } else { - rc = ret; - } - } - return rc; -} - -static void native_get_nb(const opal_process_name_t *id, - const char *key, - opal_pmix_cbfunc_t cbfunc, - void *cbdata) -{ - return; -} - -static int native_publish(const char service_name[], - opal_list_t *info, - const char port[]) -{ - return OPAL_SUCCESS; -} - -static int native_lookup(const char service_name[], - opal_list_t *info, - char port[], int portLen) -{ - return OPAL_ERR_NOT_IMPLEMENTED; -} - -static int native_unpublish(const char service_name[], - opal_list_t *info) -{ - return OPAL_SUCCESS;; -} - -static bool native_get_attr(const char *attr, opal_value_t **kv) -{ - opal_buffer_t *msg, *bptr; - opal_list_t vals; - opal_value_t *kp, *lclpeers=NULL, kvn; - pmix_cmd_t cmd = PMIX_GETATTR_CMD; - char **ranks; - int rc, ret; - int32_t cnt; - bool found=false; - opal_hwloc_locality_t locality; - pmix_cb_t *cb; - uint32_t i, myrank; - opal_process_name_t id; - char *cpuset; - opal_buffer_t buf, buf2; - - opal_output_verbose(2, opal_pmix_base_framework.framework_output, - "%s pmix:native get_attr called", - OPAL_NAME_PRINT(OPAL_PROC_MY_NAME)); - - /* try to retrieve the requested value from the dstore */ - OBJ_CONSTRUCT(&vals, opal_list_t); - if (OPAL_SUCCESS == opal_dstore.fetch(opal_dstore_internal, &OPAL_PROC_MY_NAME, attr, &vals)) { - *kv = (opal_value_t*)opal_list_remove_first(&vals); - OPAL_LIST_DESTRUCT(&vals); - return true; - } - - if (NULL == mca_pmix_native_component.uri) { - /* no server available, so just return */ - return false; - } - - /* if the value isn't yet available, then we should try to retrieve - * all the available attributes and store them for future use */ - msg = OBJ_NEW(opal_buffer_t); - /* pack the cmd */ - if (OPAL_SUCCESS != (rc = opal_dss.pack(msg, &cmd, 1, PMIX_CMD_T))) { - OPAL_ERROR_LOG(rc); - OBJ_RELEASE(msg); - return false; - } - - /* create a callback object as we need to pass it to the - * recv routine so we know which callback to use when - * the return message is recvd */ - cb = OBJ_NEW(pmix_cb_t); - cb->active = true; - - /* push the message into our event base to send to the server */ - PMIX_ACTIVATE_SEND_RECV(msg, wait_cbfunc, cb); - - /* wait for the data to return */ - PMIX_WAIT_FOR_COMPLETION(cb->active); - - /* we have received the entire data blob for this process - unpack - * and cache all values, keeping the one we requested to return - * to the caller */ - cnt = 1; - if (OPAL_SUCCESS != (rc = opal_dss.unpack(&cb->data, &ret, &cnt, OPAL_INT))) { - OPAL_ERROR_LOG(rc); - OBJ_RELEASE(cb); - return false; - } - if (OPAL_SUCCESS == ret) { - /* unpack the buffer containing the values */ - cnt = 1; - if (OPAL_SUCCESS != (rc = opal_dss.unpack(&cb->data, &bptr, &cnt, OPAL_BUFFER))) { - OPAL_ERROR_LOG(rc); - OBJ_RELEASE(cb); - return false; - } - cnt = 1; - while (OPAL_SUCCESS == (rc = opal_dss.unpack(bptr, &kp, &cnt, OPAL_VALUE))) { - opal_output_verbose(2, opal_pmix_base_framework.framework_output, - "%s unpacked attr %s", - OPAL_NAME_PRINT(OPAL_PROC_MY_NAME), kp->key); - /* if this is the local topology, we need to save it in a special way */ -#if OPAL_HAVE_HWLOC - { - hwloc_topology_t topo; - if (0 == strcmp(PMIX_LOCAL_TOPO, kp->key)) { - opal_output_verbose(2, opal_pmix_base_framework.framework_output, - "%s saving topology", - OPAL_NAME_PRINT(OPAL_PROC_MY_NAME)); - /* transfer the byte object for unpacking */ - OBJ_CONSTRUCT(&buf, opal_buffer_t); - opal_dss.load(&buf, kp->data.bo.bytes, kp->data.bo.size); - kp->data.bo.bytes = NULL; // protect the data region - kp->data.bo.size = 0; - OBJ_RELEASE(kp); - /* extract the topology */ - cnt=1; - if (OPAL_SUCCESS != (rc = opal_dss.unpack(&buf, &topo, &cnt, OPAL_HWLOC_TOPO))) { - OPAL_ERROR_LOG(rc); - OBJ_DESTRUCT(&buf); - continue; - } - OBJ_DESTRUCT(&buf); - if (NULL == opal_hwloc_topology) { - opal_hwloc_topology = topo; - } else { - hwloc_topology_destroy(topo); - } - cnt = 1; - continue; - } - } -#endif - /* if this is the local cpuset blob, then unpack and store its contents */ - if (0 == strcmp(PMIX_LOCAL_CPUSETS, kp->key)) { - opal_output_verbose(2, opal_pmix_base_framework.framework_output, - "%s received local cpusets", - OPAL_NAME_PRINT(OPAL_PROC_MY_NAME)); - /* transfer the byte object for unpacking */ - OBJ_CONSTRUCT(&buf, opal_buffer_t); - opal_dss.load(&buf, kp->data.bo.bytes, kp->data.bo.size); - kp->data.bo.bytes = NULL; // protect the data region - kp->data.bo.size = 0; - OBJ_RELEASE(kp); - cnt=1; - while (OPAL_SUCCESS == (rc = opal_dss.unpack(&buf, &id, &cnt, OPAL_NAME))) { - cnt=1; - if (OPAL_SUCCESS != (rc = opal_dss.unpack(&buf, &cpuset, &cnt, OPAL_STRING))) { - OPAL_ERROR_LOG(rc); - OBJ_DESTRUCT(&buf); - cnt = 1; - continue; - } - opal_output_verbose(2, opal_pmix_base_framework.framework_output, - "%s saving cpuset %s for local peer %s", - OPAL_NAME_PRINT(OPAL_PROC_MY_NAME), - (NULL == cpuset) ? "NULL" : cpuset, - OPAL_NAME_PRINT(id)); - OBJ_CONSTRUCT(&kvn, opal_value_t); - kvn.key = strdup(OPAL_DSTORE_CPUSET); - kvn.type = OPAL_STRING; - kvn.data.string = cpuset; - if (OPAL_SUCCESS != (rc = opal_dstore.store(opal_dstore_internal, &id, &kvn))) { - OPAL_ERROR_LOG(rc); - OBJ_DESTRUCT(&kvn); - cnt = 1; - continue; - } - OBJ_DESTRUCT(&kvn); - } - OBJ_DESTRUCT(&buf); - if (OPAL_ERR_UNPACK_READ_PAST_END_OF_BUFFER != rc) { - OPAL_ERROR_LOG(rc); - return false; - } - cnt=1; - continue; - } else if (0 == strcmp(PMIX_PROC_MAP, kp->key)) { - opal_output_verbose(2, opal_pmix_base_framework.framework_output, - "%s received proc map", - OPAL_NAME_PRINT(OPAL_PROC_MY_NAME)); - /* transfer the byte object for unpacking */ - OBJ_CONSTRUCT(&buf, opal_buffer_t); - opal_dss.load(&buf, kp->data.bo.bytes, kp->data.bo.size); - kp->data.bo.bytes = NULL; // protect the data region - kp->data.bo.size = 0; - OBJ_RELEASE(kp); - /* get the jobid */ - cnt=1; - if (OPAL_SUCCESS != (rc = opal_dss.unpack(&buf, &kp, &cnt, OPAL_VALUE))) { - OPAL_ERROR_LOG(rc); - OBJ_DESTRUCT(&buf); - cnt = 1; - return false; - } - if (0 != strcmp(PMIX_JOBID, kp->key)) { - OPAL_ERROR_LOG(OPAL_ERR_BAD_PARAM); - OBJ_DESTRUCT(&buf); - OBJ_RELEASE(kp); - cnt = 1; - return false; - } - id.jobid = kp->data.uint32; - OBJ_RELEASE(kp); - /* unpack the data for each rank */ - cnt=1; - while (OPAL_SUCCESS == (rc = opal_dss.unpack(&buf, &kp, &cnt, OPAL_VALUE))) { - if (0 != strcmp(PMIX_RANK, kp->key)) { - OPAL_ERROR_LOG(OPAL_ERR_BAD_PARAM); - OBJ_DESTRUCT(&buf); - OBJ_RELEASE(kp); - cnt = 1; - return false; - } - id.vpid = kp->data.uint32; - /* unpack the blob for this rank */ - cnt=1; - if (OPAL_SUCCESS != (rc = opal_dss.unpack(&buf, &kp, &cnt, OPAL_VALUE))) { - OPAL_ERROR_LOG(rc); - OBJ_DESTRUCT(&buf); - cnt = 1; - return false; - } - if (0 != strcmp(PMIX_PROC_MAP, kp->key)) { - OPAL_ERROR_LOG(OPAL_ERR_BAD_PARAM); - OBJ_DESTRUCT(&buf); - OBJ_RELEASE(kp); - cnt = 1; - return false; - } - /* transfer the byte object for unpacking */ - OBJ_CONSTRUCT(&buf2, opal_buffer_t); - opal_dss.load(&buf2, kp->data.bo.bytes, kp->data.bo.size); - kp->data.bo.bytes = NULL; // protect the data region - kp->data.bo.size = 0; - OBJ_RELEASE(kp); - /* unpack and store the map */ - cnt=1; - while (OPAL_SUCCESS == (rc = opal_dss.unpack(&buf2, &kp, &cnt, OPAL_VALUE))) { - opal_output_verbose(2, opal_pmix_base_framework.framework_output, - "%s storing key %s for peer %s", - OPAL_NAME_PRINT(OPAL_PROC_MY_NAME), - kp->key, OPAL_NAME_PRINT(id)); - if (OPAL_SUCCESS != (rc = opal_dstore.store(opal_dstore_internal, &id, kp))) { - OPAL_ERROR_LOG(rc); - OBJ_RELEASE(kp); - OBJ_DESTRUCT(&buf2); - return false; - } - } - OBJ_DESTRUCT(&buf2); - if (OPAL_ERR_UNPACK_READ_PAST_END_OF_BUFFER != rc) { - OPAL_ERROR_LOG(rc); - return false; - } - cnt=1; - } - OBJ_DESTRUCT(&buf); - if (OPAL_ERR_UNPACK_READ_PAST_END_OF_BUFFER != rc) { - OPAL_ERROR_LOG(rc); - return false; - } - cnt=1; - continue; - } - /* otherwise, it is a single piece of info, so store it */ - if (OPAL_SUCCESS != (rc = opal_dstore.store(opal_dstore_internal, &OPAL_PROC_MY_NAME, kp))) { - OPAL_ERROR_LOG(rc); - OBJ_RELEASE(kp); - cnt = 1; - continue; - } - /* save the list of local peers */ - if (0 == strcmp(PMIX_LOCAL_PEERS, kp->key)) { - OBJ_RETAIN(kp); - lclpeers = kp; - opal_output_verbose(2, opal_pmix_base_framework.framework_output, - "%s saving local peers %s", - OPAL_NAME_PRINT(OPAL_PROC_MY_NAME), lclpeers->data.string); - } else if (0 == strcmp(PMIX_JOBID, kp->key)) { - native_pname.jobid = kp->data.uint32; - } else if (0 == strcmp(PMIX_RANK, kp->key)) { - native_pname.vpid = kp->data.uint32; - } - if (0 == strcmp(attr, kp->key)) { - OBJ_RETAIN(kp); - *kv = kp; - found = true; - } - OBJ_RELEASE(kp); - cnt = 1; - } - OBJ_RELEASE(bptr); - if (OPAL_ERR_UNPACK_READ_PAST_END_OF_BUFFER != rc) { - OPAL_ERROR_LOG(rc); - return false; - } - } else { - OPAL_ERROR_LOG(ret); - OBJ_RELEASE(cb); - return false; - } - OBJ_RELEASE(cb); - opal_proc_set_name(&native_pname); - - /* if the list of local peers wasn't included, then we are done */ - if (NULL == lclpeers) { - opal_output_verbose(0, opal_pmix_base_framework.framework_output, - "%s no local peers reported", - OPAL_NAME_PRINT(OPAL_PROC_MY_NAME)); - return found; - } - - /* baseline all the procs as nonlocal */ - myrank = native_pname.vpid; - id.jobid = native_pname.jobid; - -#if OPAL_HAVE_HWLOC - /* fetch my cpuset */ - OBJ_CONSTRUCT(&vals, opal_list_t); - if (OPAL_SUCCESS == (rc = opal_dstore.fetch(opal_dstore_internal, &native_pname, - OPAL_DSTORE_CPUSET, &vals))) { - kp = (opal_value_t*)opal_list_get_first(&vals); - cpuset = strdup(kp->data.string); - } else { - cpuset = NULL; - } - OPAL_LIST_DESTRUCT(&vals); -#endif - - /* we only need to set locality for each local rank as "not found" - * equates to "non local" */ - ranks = opal_argv_split(lclpeers->data.string, ','); - for (i=0; NULL != ranks[i]; i++) { - uint32_t vid = strtoul(ranks[i], NULL, 10); - if (myrank == vid) { - continue; - } - id.vpid = vid; -#if OPAL_HAVE_HWLOC - OBJ_CONSTRUCT(&vals, opal_list_t); - if (OPAL_SUCCESS != (rc = opal_dstore.fetch(opal_dstore_internal, &id, - OPAL_DSTORE_CPUSET, &vals))) { - opal_output_verbose(2, opal_pmix_base_framework.framework_output, - "%s cpuset for local proc %s not found", - OPAL_NAME_PRINT(OPAL_PROC_MY_NAME), - OPAL_NAME_PRINT(id)); - OPAL_LIST_DESTRUCT(&vals); - /* even though the cpuset wasn't found, we at least know it is - * on the same node with us */ - locality = OPAL_PROC_ON_CLUSTER | OPAL_PROC_ON_CU | OPAL_PROC_ON_NODE; - } else { - kp = (opal_value_t*)opal_list_get_first(&vals); - if (NULL == kp->data.string) { - /* if we share a node, but we don't know anything more, then - * mark us as on the node as this is all we know - */ - locality = OPAL_PROC_ON_CLUSTER | OPAL_PROC_ON_CU | OPAL_PROC_ON_NODE; - } else { - /* determine relative location on our node */ - locality = opal_hwloc_base_get_relative_locality(opal_hwloc_topology, - cpuset, - kp->data.string); - } - OPAL_LIST_DESTRUCT(&vals); - } -#else - /* all we know is we share a node */ - locality = OPAL_PROC_ON_CLUSTER | OPAL_PROC_ON_CU | OPAL_PROC_ON_NODE; -#endif - OPAL_OUTPUT_VERBOSE((1, opal_pmix_base_framework.framework_output, - "%s pmix:native proc %s locality %s", - OPAL_NAME_PRINT(OPAL_PROC_MY_NAME), - OPAL_NAME_PRINT(id), - opal_hwloc_base_print_locality(locality))); - - OBJ_CONSTRUCT(&kvn, opal_value_t); - kvn.key = strdup(OPAL_DSTORE_LOCALITY); - kvn.type = OPAL_UINT16; - kvn.data.uint16 = locality; - (void)opal_dstore.store(opal_dstore_internal, &id, &kvn); - OBJ_DESTRUCT(&kvn); - } -#if OPAL_HAVE_HWLOC - if (NULL != cpuset) { - free(cpuset); - } -#endif - opal_argv_free(ranks); - - return found; -} - -static int native_get_attr_nb(const char *attr, - opal_pmix_cbfunc_t cbfunc, - void *cbdata) -{ - return OPAL_ERR_NOT_IMPLEMENTED; -} - -static int native_job_connect(const char jobId[]) -{ - return OPAL_ERR_NOT_IMPLEMENTED; -} - -static int native_job_disconnect(const char jobId[]) -{ - return OPAL_ERR_NOT_IMPLEMENTED; -} - -/*** INSTANTIATE INTERNAL CLASSES ***/ -static void scon(pmix_usock_send_t *p) -{ - p->hdr.type = 0; - p->hdr.tag = UINT32_MAX; - p->hdr.nbytes = 0; - p->data = NULL; - p->hdr_sent = false; - p->sdptr = NULL; - p->sdbytes = 0; -} -OBJ_CLASS_INSTANCE(pmix_usock_send_t, - opal_list_item_t, - scon, NULL); - -static void rcon(pmix_usock_recv_t *p) -{ - p->hdr.type = 0; - p->hdr.tag = UINT32_MAX; - p->hdr.nbytes = 0; - p->data = NULL; - p->hdr_recvd = false; - p->rdptr = NULL; - p->rdbytes = 0; -} -OBJ_CLASS_INSTANCE(pmix_usock_recv_t, - opal_list_item_t, - rcon, NULL); - -static void prcon(pmix_usock_posted_recv_t *p) -{ - p->tag = UINT32_MAX; - p->cbfunc = NULL; - p->cbdata = NULL; -} -OBJ_CLASS_INSTANCE(pmix_usock_posted_recv_t, - opal_list_item_t, - prcon, NULL); - -static void cbcon(pmix_cb_t *p) -{ - p->active = false; - OBJ_CONSTRUCT(&p->data, opal_buffer_t); - p->cbfunc = NULL; - p->cbdata = NULL; -} -static void cbdes(pmix_cb_t *p) -{ - OBJ_DESTRUCT(&p->data); -} -OBJ_CLASS_INSTANCE(pmix_cb_t, - opal_object_t, - cbcon, cbdes); - - -static void srcon(pmix_usock_sr_t *p) -{ - p->bfr = NULL; - p->cbfunc = NULL; - p->cbdata = NULL; -} -OBJ_CLASS_INSTANCE(pmix_usock_sr_t, - opal_object_t, - srcon, NULL); diff --git a/opal/mca/pmix/native/pmix_native.h b/opal/mca/pmix/native/pmix_native.h deleted file mode 100644 index 1e0127303bd..00000000000 --- a/opal/mca/pmix/native/pmix_native.h +++ /dev/null @@ -1,231 +0,0 @@ -/* - * Copyright (c) 2014 Intel, Inc. All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#ifndef MCA_PMIX_NATIVE_H -#define MCA_PMIX_NATIVE_H - -#include "opal_config.h" - -#ifdef HAVE_SYS_SOCKET_H -#include -#endif -#ifdef HAVE_SYS_UN_H -#include -#endif - -#include "opal/mca/mca.h" -#include "opal/mca/event/event.h" -#include "opal/util/proc.h" - -#include "opal/mca/pmix/base/base.h" - -BEGIN_C_DECLS - -/** - * the state of the connection to the server - */ -typedef enum { - PMIX_USOCK_UNCONNECTED, - PMIX_USOCK_CLOSED, - PMIX_USOCK_RESOLVE, - PMIX_USOCK_CONNECTING, - PMIX_USOCK_CONNECT_ACK, - PMIX_USOCK_CONNECTED, - PMIX_USOCK_FAILED, - PMIX_USOCK_ACCEPTING -} pmix_usock_state_t; - -/* define a macro for abnormal termination */ -#define PMIX_NATIVE_ABNORMAL_TERM \ - do { \ - mca_pmix_native_component.state = PMIX_USOCK_FAILED; \ - opal_pmix_base_errhandler(OPAL_ERR_COMM_FAILURE); \ - } while(0); - -/* define a command type for communicating to the - * pmix server */ -typedef uint8_t pmix_cmd_t; -#define PMIX_CMD_T OPAL_UINT8 - -/* define some commands */ -#define PMIX_ABORT_CMD 1 -#define PMIX_FENCE_CMD 2 -#define PMIX_FENCENB_CMD 3 -#define PMIX_PUT_CMD 4 -#define PMIX_GET_CMD 5 -#define PMIX_GETNB_CMD 6 -#define PMIX_FINALIZE_CMD 7 -#define PMIX_GETATTR_CMD 8 - -/* define some message types */ -#define PMIX_USOCK_IDENT 1 -#define PMIX_USOCK_USER 2 - -/* internally used cbfunc */ -typedef void (*pmix_usock_cbfunc_t)(opal_buffer_t *buf, void *cbdata); - -/* header for messages */ -typedef struct { - opal_process_name_t id; - uint8_t type; - uint32_t tag; - size_t nbytes; -} pmix_usock_hdr_t; - -/* usock structure for sending a message */ -typedef struct { - opal_list_item_t super; - opal_event_t ev; - pmix_usock_hdr_t hdr; - char *data; - bool hdr_sent; - char *sdptr; - size_t sdbytes; -} pmix_usock_send_t; -OBJ_CLASS_DECLARATION(pmix_usock_send_t); - -/* usock structure for recving a message */ -typedef struct { - opal_list_item_t super; - opal_event_t ev; - pmix_usock_hdr_t hdr; - char *data; - bool hdr_recvd; - char *rdptr; - size_t rdbytes; -} pmix_usock_recv_t; -OBJ_CLASS_DECLARATION(pmix_usock_recv_t); - -/* usock struct for posting send/recv request */ -typedef struct { - opal_object_t super; - opal_event_t ev; - opal_buffer_t *bfr; - pmix_usock_cbfunc_t cbfunc; - void *cbdata; -} pmix_usock_sr_t; -OBJ_CLASS_DECLARATION(pmix_usock_sr_t); - -/* usock structure for tracking posted recvs */ -typedef struct { - opal_list_item_t super; - opal_event_t ev; - uint32_t tag; - pmix_usock_cbfunc_t cbfunc; - void *cbdata; -} pmix_usock_posted_recv_t; -OBJ_CLASS_DECLARATION(pmix_usock_posted_recv_t); - - -/* usock struct for tracking ops */ -typedef struct { - opal_object_t super; - opal_event_t ev; - volatile bool active; - opal_buffer_t data; - opal_pmix_cbfunc_t cbfunc; - void *cbdata; -} pmix_cb_t; -OBJ_CLASS_DECLARATION(pmix_cb_t); - - -typedef struct { - opal_pmix_base_component_t super; - opal_buffer_t *cache_local; - opal_buffer_t *cache_remote; - opal_buffer_t *cache_global; - opal_event_base_t *evbase; - opal_process_name_t id; - opal_process_name_t server; - char *uri; - struct sockaddr_un address; - int sd; - int max_retries; - int retries; // number of times we have tried to connect to this address - pmix_usock_state_t state; - opal_event_t op_event; // used for connecting and operations other than read/write - uint32_t tag; // current tag - opal_event_t send_event; // registration with event thread for send events - bool send_ev_active; - opal_event_t recv_event; // registration with event thread for recv events - bool recv_ev_active; - opal_event_t timer_event; // timer for retrying connection failures - bool timer_ev_active; - opal_list_t send_queue; // list of pmix_usock_sent_t to be sent - pmix_usock_send_t *send_msg; // current send in progress - pmix_usock_recv_t *recv_msg; // current recv in progress - opal_list_t posted_recvs; // list of pmix_usock_posted_recv_t -} opal_pmix_native_component_t; - -OPAL_DECLSPEC extern opal_pmix_native_component_t mca_pmix_native_component; - -OPAL_DECLSPEC extern const opal_pmix_base_module_t opal_pmix_native_module; - - -/* module-level shared functions */ -OPAL_MODULE_DECLSPEC void pmix_usock_process_msg(int fd, short flags, void *cbdata); -OPAL_MODULE_DECLSPEC void pmix_usock_send_recv(int fd, short args, void *cbdata); -OPAL_MODULE_DECLSPEC void pmix_usock_send_handler(int sd, short flags, void *cbdata); -OPAL_MODULE_DECLSPEC void pmix_usock_recv_handler(int sd, short flags, void *cbdata); -OPAL_MODULE_DECLSPEC char* pmix_usock_state_print(pmix_usock_state_t state); -OPAL_MODULE_DECLSPEC void pmix_usock_dump(const char* msg); -OPAL_MODULE_DECLSPEC int usock_send_connect_ack(void); - - -/* internal convenience macros */ -#define PMIX_ACTIVATE_SEND_RECV(b, cb, d) \ - do { \ - pmix_usock_sr_t *ms; \ - opal_output_verbose(5, opal_pmix_base_framework.framework_output, \ - "%s [%s:%d] post send to server", \ - OPAL_NAME_PRINT(OPAL_PROC_MY_NAME), \ - __FILE__, __LINE__); \ - ms = OBJ_NEW(pmix_usock_sr_t); \ - ms->bfr = (b); \ - ms->cbfunc = (cb); \ - ms->cbdata = (d); \ - opal_event_set(mca_pmix_native_component.evbase, &((ms)->ev), -1, \ - OPAL_EV_WRITE, pmix_usock_send_recv, (ms)); \ - opal_event_set_priority(&((ms)->ev), OPAL_EV_MSG_LO_PRI); \ - opal_event_active(&((ms)->ev), OPAL_EV_WRITE, 1); \ - } while(0); - -#define PMIX_ACTIVATE_POST_MSG(ms) \ - do { \ - opal_output_verbose(5, opal_pmix_base_framework.framework_output, \ - "%s [%s:%d] post msg", \ - OPAL_NAME_PRINT(OPAL_PROC_MY_NAME), \ - __FILE__, __LINE__); \ - opal_event_set(mca_pmix_native_component.evbase, &ms->ev, -1, \ - OPAL_EV_WRITE, \ - pmix_usock_process_msg, ms); \ - opal_event_set_priority(&ms->ev, OPAL_EV_MSG_LO_PRI); \ - opal_event_active(&ms->ev, OPAL_EV_WRITE, 1); \ - } while(0); - -#define CLOSE_THE_SOCKET(socket) \ - do { \ - if (0 <= socket) { \ - shutdown(socket, 2); \ - close(socket); \ - socket = -1; \ - } \ - } while(0) - - -#define PMIX_WAIT_FOR_COMPLETION(a) \ - do { \ - while ((a)) { \ - usleep(10); \ - } \ - } while (0); - -END_C_DECLS - -#endif /* MCA_PMIX_NATIVE_H */ diff --git a/opal/mca/pmix/native/pmix_native_component.c b/opal/mca/pmix/native/pmix_native_component.c deleted file mode 100644 index b048be8871d..00000000000 --- a/opal/mca/pmix/native/pmix_native_component.c +++ /dev/null @@ -1,139 +0,0 @@ -/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ -/* - * Copyright (c) 2014 Intel, Inc. All rights reserved. - * Copyright (c) 2014 Research Organization for Information Science - * and Technology (RIST). All rights reserved. - * Copyright (c) 2015 Los Alamos National Security, LLC. All rights - * reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - * - * These symbols are in a file by themselves to provide nice linker - * semantics. Since linkers generally pull in symbols by object - * files, keeping these symbols as the only symbols in this file - * prevents utility programs such as "ompi_info" from having to import - * entire components just to query their version and parameters. - */ - -#include "opal_config.h" - -#include "opal/constants.h" -#include "opal/util/proc.h" -#include "opal/mca/pmix/pmix.h" -#include "pmix_native.h" - -/* - * Public string showing the pmix native component version number - */ -const char *opal_pmix_native_component_version_string = - "OPAL native pmix MCA component version " OPAL_VERSION; - -/* - * Local function - */ -static int pmix_native_open(void); -static int pmix_native_close(void); -static int pmix_native_component_query(mca_base_module_t **module, int *priority); - - -/* - * Instantiate the public struct with all of our public information - * and pointers to our public functions in it - */ - -opal_pmix_native_component_t mca_pmix_native_component = { - { - - /* First, the mca_component_t struct containing meta information - about the component itself */ - - .base_version = { - /* Indicate that we are a pmix v1.1.0 component (which also - implies a specific MCA version) */ - - OPAL_PMIX_BASE_VERSION_2_0_0, - - /* Component name and version */ - - .mca_component_name = "native", - MCA_BASE_MAKE_VERSION(component, OPAL_MAJOR_VERSION, OPAL_MINOR_VERSION, - OPAL_RELEASE_VERSION), - - /* Component open and close functions */ - - .mca_open_component = pmix_native_open, - .mca_close_component = pmix_native_close, - .mca_query_component = pmix_native_component_query, - }, - /* Next the MCA v1.0.0 component meta data */ - .base_data = { - /* The component is checkpoint ready */ - MCA_BASE_METADATA_PARAM_CHECKPOINT - } - } -}; - -static int pmix_native_open(void) -{ - /* construct the component fields */ - mca_pmix_native_component.cache_local = NULL; - mca_pmix_native_component.cache_remote = NULL; - mca_pmix_native_component.cache_global = NULL; - mca_pmix_native_component.evbase = NULL; - mca_pmix_native_component.id = opal_name_invalid; - mca_pmix_native_component.server = opal_name_invalid; - mca_pmix_native_component.uri = NULL; - memset(&mca_pmix_native_component.address, 0, sizeof(struct sockaddr_un)); - mca_pmix_native_component.sd = -1; - mca_pmix_native_component.max_retries = 10; - mca_pmix_native_component.state = PMIX_USOCK_UNCONNECTED; - mca_pmix_native_component.tag = 0; - mca_pmix_native_component.send_ev_active = false; - mca_pmix_native_component.recv_ev_active = false; - mca_pmix_native_component.timer_ev_active = false; - OBJ_CONSTRUCT(&mca_pmix_native_component.send_queue, opal_list_t); - mca_pmix_native_component.send_msg = NULL; - mca_pmix_native_component.recv_msg = NULL; - OBJ_CONSTRUCT(&mca_pmix_native_component.posted_recvs, opal_list_t); - - return OPAL_SUCCESS; -} - -static int pmix_native_close(void) -{ - if (NULL != mca_pmix_native_component.uri) { - free(mca_pmix_native_component.uri); - } - OPAL_LIST_DESTRUCT(&mca_pmix_native_component.send_queue); - OPAL_LIST_DESTRUCT(&mca_pmix_native_component.posted_recvs); - return OPAL_SUCCESS; -} - - -static int pmix_native_component_query(mca_base_module_t **module, int *priority) -{ - char *t, *id; - - /* see if a PMIx server is present */ - if (NULL == (t = getenv("PMIX_SERVER_URI")) || - NULL == (id = getenv("PMIX_ID"))) { - /* we still have to be considered because this might - * be a singleton, and even a singleton requires some - * degree of support. So set us at a very low priority - * so the other components can be selected it they - * are in a better position to run */ - *priority = 1; - mca_pmix_native_component.uri = NULL; - } else { - /* if PMIx is present, then we need to use it */ - opal_convert_string_to_process_name(&mca_pmix_native_component.id, id); - mca_pmix_native_component.uri = strdup(t); - opal_proc_set_name(&mca_pmix_native_component.id); - *priority = 100; - } - *module = (mca_base_module_t *)&opal_pmix_native_module; - return OPAL_SUCCESS; -} diff --git a/opal/mca/pmix/native/usock.c b/opal/mca/pmix/native/usock.c deleted file mode 100644 index 6e2273cc468..00000000000 --- a/opal/mca/pmix/native/usock.c +++ /dev/null @@ -1,471 +0,0 @@ -/* - * Copyright (c) 2004-2010 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2011 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * Copyright (c) 2006-2013 Los Alamos National Security, LLC. - * All rights reserved. - * Copyright (c) 2009-2012 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2011 Oak Ridge National Labs. All rights reserved. - * Copyright (c) 2013-2015 Intel, Inc. All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - * - */ - -#include "opal_config.h" -#include "opal/types.h" - -#include -#ifdef HAVE_SYS_UIO_H -#include -#endif -#ifdef HAVE_NET_UIO_H -#include -#endif -#ifdef HAVE_SYS_TYPES_H -#include -#endif -#ifdef HAVE_SYS_STAT_H -#include -#endif -#ifdef HAVE_FCNTL_H -#include -#endif - -#include "opal_stdint.h" -#include "opal/opal_socket_errno.h" -#include "opal/dss/dss.h" -#include "opal/mca/dstore/dstore.h" -#include "opal/mca/sec/sec.h" -#include "opal/runtime/opal.h" -#include "opal/util/show_help.h" -#include "opal/util/error.h" -#include "opal/util/output.h" -#include "opal/util/proc.h" - -#include "opal/mca/pmix/base/base.h" -#include "pmix_native.h" - -static int usock_send_blocking(char *ptr, size_t size); -static void pmix_usock_try_connect(int fd, short args, void *cbdata); - -/* State machine for internal operations */ -typedef struct { - opal_object_t super; - opal_event_t ev; -} pmix_usock_op_t; -static OBJ_CLASS_INSTANCE(pmix_usock_op_t, - opal_object_t, - NULL, NULL); - -#define PMIX_ACTIVATE_USOCK_STATE(cbfunc) \ - do { \ - pmix_usock_op_t *op; \ - op = OBJ_NEW(pmix_usock_op_t); \ - opal_event_set(mca_pmix_native_component.evbase, &op->ev, -1, \ - OPAL_EV_WRITE, (cbfunc), op); \ - opal_event_set_priority(&op->ev, OPAL_EV_MSG_LO_PRI); \ - opal_event_active(&op->ev, OPAL_EV_WRITE, 1); \ - } while(0); - -void pmix_usock_send_recv(int fd, short args, void *cbdata) -{ - pmix_usock_sr_t *ms = (pmix_usock_sr_t*)cbdata; - pmix_usock_posted_recv_t *req; - pmix_usock_send_t *snd; - uint32_t tag = UINT32_MAX; - - if (NULL != ms->cbfunc) { - /* if a callback msg is expected, setup a recv for it */ - req = OBJ_NEW(pmix_usock_posted_recv_t); - /* take the next tag in the sequence */ - if (UINT32_MAX == mca_pmix_native_component.tag) { - mca_pmix_native_component.tag = 0; - } - req->tag = mca_pmix_native_component.tag++; - tag = req->tag; - req->cbfunc = ms->cbfunc; - req->cbdata = ms->cbdata; - opal_output_verbose(5, opal_pmix_base_framework.framework_output, - "%s posting recv on tag %d", - OPAL_NAME_PRINT(OPAL_PROC_MY_NAME), req->tag); - /* add it to the list of recvs - we cannot have unexpected messages - * in this subsystem as the server never sends us something that - * we didn't previously request */ - opal_list_append(&mca_pmix_native_component.posted_recvs, &req->super); - } - - snd = OBJ_NEW(pmix_usock_send_t); - snd->hdr.id = mca_pmix_native_component.id; - snd->hdr.type = PMIX_USOCK_USER; - snd->hdr.tag = tag; - snd->hdr.nbytes = ms->bfr->bytes_used; - snd->data = ms->bfr->base_ptr; - /* always start with the header */ - snd->sdptr = (char*)&snd->hdr; - snd->sdbytes = sizeof(pmix_usock_hdr_t); - - /* add the msg to the send queue if we are already connected*/ - if (PMIX_USOCK_CONNECTED == mca_pmix_native_component.state) { - opal_output_verbose(2, opal_pmix_base_framework.framework_output, - "%s usock:send_nb: already connected to server - queueing for send", - OPAL_NAME_PRINT(OPAL_PROC_MY_NAME)); - /* if there is no message on-deck, put this one there */ - if (NULL == mca_pmix_native_component.send_msg) { - mca_pmix_native_component.send_msg = snd; - } else { - /* add it to the queue */ - opal_list_append(&mca_pmix_native_component.send_queue, &snd->super); - } - /* ensure the send event is active */ - if (!mca_pmix_native_component.send_ev_active) { - opal_event_add(&mca_pmix_native_component.send_event, 0); - mca_pmix_native_component.send_ev_active = true; - } - return; - } - - /* add the message to the queue for sending after the - * connection is formed - */ - opal_list_append(&mca_pmix_native_component.send_queue, &snd->super); - - if (PMIX_USOCK_CONNECTING != mca_pmix_native_component.state && - PMIX_USOCK_CONNECT_ACK != mca_pmix_native_component.state) { - /* we have to initiate the connection - again, we do not - * want to block while the connection is created. - * So throw us into an event that will create - * the connection via a mini-state-machine :-) - */ - opal_output_verbose(2, opal_pmix_base_framework.framework_output, - "%s usock:send_nb: initiating connection to server", - OPAL_NAME_PRINT(OPAL_PROC_MY_NAME)); - mca_pmix_native_component.state = PMIX_USOCK_CONNECTING; - PMIX_ACTIVATE_USOCK_STATE(pmix_usock_try_connect); - } -} - -void pmix_usock_process_msg(int fd, short flags, void *cbdata) -{ - pmix_usock_recv_t *msg = (pmix_usock_recv_t*)cbdata; - pmix_usock_posted_recv_t *rcv; - opal_buffer_t buf; - - OPAL_OUTPUT_VERBOSE((5, opal_pmix_base_framework.framework_output, - "%s message received %d bytes for tag %u", - OPAL_NAME_PRINT(OPAL_PROC_MY_NAME), - (int)msg->hdr.nbytes, msg->hdr.tag)); - - /* see if we have a waiting recv for this message */ - OPAL_LIST_FOREACH(rcv, &mca_pmix_native_component.posted_recvs, pmix_usock_posted_recv_t) { - opal_output_verbose(5, opal_pmix_base_framework.framework_output, - "%s checking msg on tag %u for tag %u", - OPAL_NAME_PRINT(OPAL_PROC_MY_NAME), - msg->hdr.tag, rcv->tag); - - if (msg->hdr.tag == rcv->tag) { - if (NULL != rcv->cbfunc) { - /* construct and load the buffer */ - OBJ_CONSTRUCT(&buf, opal_buffer_t); - if (NULL != msg->data) { - opal_dss.load(&buf, msg->data, msg->hdr.nbytes); - } - msg->data = NULL; // protect the data region - if (NULL != rcv->cbfunc) { - rcv->cbfunc(&buf, rcv->cbdata); - } - OBJ_DESTRUCT(&buf); // free's the msg data - /* also done with the recv */ - opal_list_remove_item(&mca_pmix_native_component.posted_recvs, &rcv->super); - OBJ_RELEASE(rcv); - OBJ_RELEASE(msg); - return; - } - } - } - - /* we get here if no matching recv was found - this is an error */ - opal_output(0, "%s UNEXPECTED MESSAGE", - OPAL_NAME_PRINT(OPAL_PROC_MY_NAME)); - PMIX_NATIVE_ABNORMAL_TERM; // report the error upstream - OBJ_RELEASE(msg); -} - -/* - * Try connecting to a peer - */ -static void pmix_usock_try_connect(int fd, short args, void *cbdata) -{ - int rc, flags; - opal_socklen_t addrlen = 0; - - opal_output_verbose(2, opal_pmix_base_framework.framework_output, - "%s usock_peer_try_connect: attempting to connect to server", - OPAL_NAME_PRINT(OPAL_PROC_MY_NAME)); - - addrlen = sizeof(struct sockaddr_un); - - while (mca_pmix_native_component.retries < mca_pmix_native_component.max_retries) { - mca_pmix_native_component.retries++; - /* Create the new socket */ - mca_pmix_native_component.sd = socket(PF_UNIX, SOCK_STREAM, 0); - if (mca_pmix_native_component.sd < 0) { - opal_output(0, "pmix:create_socket: socket() failed: %s (%d)\n", - strerror(opal_socket_errno), - opal_socket_errno); - continue; - } - opal_output_verbose(2, opal_pmix_base_framework.framework_output, - "usock_peer_try_connect: attempting to connect to server on socket %d", - mca_pmix_native_component.sd); - /* try to connect */ - if (connect(mca_pmix_native_component.sd, (struct sockaddr*)&mca_pmix_native_component.address, addrlen) < 0) { - if (opal_socket_errno == ETIMEDOUT) { - /* The server may be too busy to accept new connections, - * so cycle around and let it try again */ - opal_output_verbose(2, opal_pmix_base_framework.framework_output, - "timeout connecting to server"); - CLOSE_THE_SOCKET(mca_pmix_native_component.sd); - continue; - } - - /* Some kernels (Linux 2.6) will automatically software - abort a connection that was ECONNREFUSED on the last - attempt, without even trying to establish the - connection. Handle that case in a semi-rational - way by trying again before giving up */ - if (ECONNABORTED == opal_socket_errno) { - opal_output_verbose(2, opal_pmix_base_framework.framework_output, - "connection to server aborted by OS - retrying"); - CLOSE_THE_SOCKET(mca_pmix_native_component.sd); - continue; - } - } - /* otherwise, the connect succeeded - so break out of the loop */ - break; - } - - if (mca_pmix_native_component.retries == mca_pmix_native_component.max_retries || - mca_pmix_native_component.sd < 0){ - /* We were unsuccessful in establishing this connection, and are - * not likely to suddenly become successful */ - opal_output(0, "pmix:create_socket: connection to server failed"); - if (0 <= mca_pmix_native_component.sd) { - CLOSE_THE_SOCKET(mca_pmix_native_component.sd); - } - PMIX_NATIVE_ABNORMAL_TERM; // report the error upstream - return; - } - - /* connection succeeded */ - mca_pmix_native_component.retries = 0; - - opal_output_verbose(2, opal_pmix_base_framework.framework_output, - "%s sock_peer_try_connect: Connection across to server succeeded", - OPAL_NAME_PRINT(OPAL_PROC_MY_NAME)); - - /* setup event callbacks */ - opal_event_set(mca_pmix_native_component.evbase, - &mca_pmix_native_component.recv_event, - mca_pmix_native_component.sd, - OPAL_EV_READ|OPAL_EV_PERSIST, - pmix_usock_recv_handler, NULL); - opal_event_set_priority(&mca_pmix_native_component.recv_event, OPAL_EV_MSG_LO_PRI); - mca_pmix_native_component.recv_ev_active = false; - - opal_event_set(mca_pmix_native_component.evbase, - &mca_pmix_native_component.send_event, - mca_pmix_native_component.sd, - OPAL_EV_WRITE|OPAL_EV_PERSIST, - pmix_usock_send_handler, NULL); - opal_event_set_priority(&mca_pmix_native_component.send_event, OPAL_EV_MSG_LO_PRI); - mca_pmix_native_component.send_ev_active = false; - - /* setup the socket as non-blocking */ - if ((flags = fcntl(mca_pmix_native_component.sd, F_GETFL, 0)) < 0) { - opal_output(0, "usock_peer_connect: fcntl(F_GETFL) failed: %s (%d)\n", - strerror(opal_socket_errno), - opal_socket_errno); - } else { - flags |= O_NONBLOCK; - if (fcntl(mca_pmix_native_component.sd, F_SETFL, flags) < 0) - opal_output(0, "usock_peer_connect: fcntl(F_SETFL) failed: %s (%d)\n", - strerror(opal_socket_errno), - opal_socket_errno); - } - - /* setup our recv to catch the return ack call */ - if (!mca_pmix_native_component.recv_ev_active) { - opal_event_add(&mca_pmix_native_component.recv_event, 0); - mca_pmix_native_component.recv_ev_active = true; - } - - /* send our globally unique process identifier to the server */ - if (OPAL_SUCCESS == (rc = usock_send_connect_ack())) { - mca_pmix_native_component.state = PMIX_USOCK_CONNECT_ACK; - } else { - opal_output(0, - "%s usock_peer_try_connect: " - "usock_send_connect_ack to server failed: %s (%d)", - OPAL_NAME_PRINT(OPAL_PROC_MY_NAME), - opal_strerror(rc), rc); - CLOSE_THE_SOCKET(mca_pmix_native_component.sd); - PMIX_NATIVE_ABNORMAL_TERM; // report the error upstream - return; - } -} - -int usock_send_connect_ack(void) -{ - char *msg; - pmix_usock_hdr_t hdr; - int rc; - size_t sdsize; - char *cred; - size_t credsize; - - opal_output_verbose(2, opal_pmix_base_framework.framework_output, - "%s SEND CONNECT ACK", - OPAL_NAME_PRINT(OPAL_PROC_MY_NAME)); - - /* setup the header */ - hdr.id = OPAL_PROC_MY_NAME; - hdr.tag = UINT32_MAX; - hdr.type = PMIX_USOCK_IDENT; - - /* get our security credential */ - if (OPAL_SUCCESS != (rc = opal_sec.get_my_credential(NULL, opal_dstore_internal, &OPAL_PROC_MY_NAME, &cred, &credsize))) { - return rc; - } - - /* set the number of bytes to be read beyond the header */ - hdr.nbytes = strlen(opal_version_string) + 1 + credsize; - - /* create a space for our message */ - sdsize = (sizeof(hdr) + strlen(opal_version_string) + 1 + credsize); - if (NULL == (msg = (char*)malloc(sdsize))) { - return OPAL_ERR_OUT_OF_RESOURCE; - } - memset(msg, 0, sdsize); - - /* load the message */ - memcpy(msg, &hdr, sizeof(hdr)); - memcpy(msg+sizeof(hdr), opal_version_string, strlen(opal_version_string)); - memcpy(msg+sizeof(hdr)+strlen(opal_version_string)+1, cred, credsize); - if (NULL != cred) { - free(cred); - } - - if (OPAL_SUCCESS != usock_send_blocking(msg, sdsize)) { - free(msg); - return OPAL_ERR_UNREACH; - } - free(msg); - return OPAL_SUCCESS; -} - -/* - * A blocking send on a non-blocking socket. Used to send the small amount of connection - * information that identifies the peers endpoint. - */ -static int usock_send_blocking(char *ptr, size_t size) -{ - size_t cnt = 0; - int retval; - - opal_output_verbose(2, opal_pmix_base_framework.framework_output, - "%s send blocking of %"PRIsize_t" bytes to socket %d", - OPAL_NAME_PRINT(OPAL_PROC_MY_NAME), - size, mca_pmix_native_component.sd); - - while (cnt < size) { - retval = send(mca_pmix_native_component.sd, (char*)ptr+cnt, size-cnt, 0); - if (retval < 0) { - if (opal_socket_errno != EINTR && opal_socket_errno != EAGAIN && opal_socket_errno != EWOULDBLOCK) { - opal_output(0, "%s usock_peer_send_blocking: send() to socket %d failed: %s (%d)\n", - OPAL_NAME_PRINT(OPAL_PROC_MY_NAME), - mca_pmix_native_component.sd, - strerror(opal_socket_errno), - opal_socket_errno); - mca_pmix_native_component.state = PMIX_USOCK_FAILED; - CLOSE_THE_SOCKET(mca_pmix_native_component.sd); - return OPAL_ERR_UNREACH; - } - continue; - } - cnt += retval; - } - - opal_output_verbose(2, opal_pmix_base_framework.framework_output, - "%s blocking send complete to socket %d", - OPAL_NAME_PRINT(OPAL_PROC_MY_NAME), - mca_pmix_native_component.sd); - - return OPAL_SUCCESS; -} - -/* - * Routine for debugging to print the connection state and socket options - */ -void pmix_usock_dump(const char* msg) -{ - char buff[255]; - int nodelay,flags; - - if ((flags = fcntl(mca_pmix_native_component.sd, F_GETFL, 0)) < 0) { - opal_output(0, "%s usock_peer_dump: fcntl(F_GETFL) failed: %s (%d)\n", - OPAL_NAME_PRINT(OPAL_PROC_MY_NAME), - strerror(opal_socket_errno), - opal_socket_errno); - } - -#if defined(USOCK_NODELAY) - optlen = sizeof(nodelay); - if (getsockopt(mca_pmix_native_component.sd, IPPROTO_USOCK, USOCK_NODELAY, (char *)&nodelay, &optlen) < 0) { - opal_output(0, "%s usock_peer_dump: USOCK_NODELAY option: %s (%d)\n", - OPAL_NAME_PRINT(OPAL_PROC_MY_NAME), - strerror(opal_socket_errno), - opal_socket_errno); - } -#else - nodelay = 0; -#endif - - snprintf(buff, sizeof(buff), "%s %s: nodelay %d flags %08x\n", - OPAL_NAME_PRINT(OPAL_PROC_MY_NAME), - msg, nodelay, flags); - opal_output(0, "%s", buff); -} - -char* pmix_usock_state_print(pmix_usock_state_t state) -{ - switch (state) { - case PMIX_USOCK_UNCONNECTED: - return "UNCONNECTED"; - case PMIX_USOCK_CLOSED: - return "CLOSED"; - case PMIX_USOCK_RESOLVE: - return "RESOLVE"; - case PMIX_USOCK_CONNECTING: - return "CONNECTING"; - case PMIX_USOCK_CONNECT_ACK: - return "ACK"; - case PMIX_USOCK_CONNECTED: - return "CONNECTED"; - case PMIX_USOCK_FAILED: - return "FAILED"; - default: - return "UNKNOWN"; - } -} - diff --git a/opal/mca/pmix/native/usock_sendrecv.c b/opal/mca/pmix/native/usock_sendrecv.c deleted file mode 100644 index 8b2d21776bd..00000000000 --- a/opal/mca/pmix/native/usock_sendrecv.c +++ /dev/null @@ -1,758 +0,0 @@ -/* - * Copyright (c) 2004-2010 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2011 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * Copyright (c) 2006-2013 Los Alamos National Security, LLC. - * All rights reserved. - * Copyright (c) 2009 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2011 Oak Ridge National Labs. All rights reserved. - * Copyright (c) 2013-2015 Intel, Inc. All rights reserved. - * Copyright (c) 2014 Research Organization for Information Science - * and Technology (RIST). All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#include "opal_config.h" - -#ifdef HAVE_UNISTD_H -#include -#endif -#include -#ifdef HAVE_SYS_UIO_H -#include -#endif -#ifdef HAVE_NET_UIO_H -#include -#endif -#ifdef HAVE_SYS_TYPES_H -#include -#endif -#ifdef HAVE_NETINET_IN_H -#include -#endif -#ifdef HAVE_ARPA_INET_H -#include -#endif -#ifdef HAVE_NETINET_TCP_H -#include -#endif - -#include "opal_stdint.h" -#include "opal/types.h" -#include "opal/runtime/opal.h" -#include "opal/opal_socket_errno.h" -#include "opal/mca/backtrace/backtrace.h" -#include "opal/util/output.h" -#include "opal/util/net.h" -#include "opal/util/error.h" -#include "opal/class/opal_hash_table.h" -#include "opal/mca/event/event.h" -#include "opal/mca/sec/sec.h" - -#include "opal/mca/pmix/base/base.h" -#include "pmix_native.h" - -static void usock_complete_connect(void); -static int usock_recv_connect_ack(void); - -static int send_bytes(pmix_usock_send_t *msg) -{ - int rc; - - while (0 < msg->sdbytes) { - rc = write(mca_pmix_native_component.sd, msg->sdptr, msg->sdbytes); - if (rc < 0) { - if (opal_socket_errno == EINTR) { - continue; - } else if (opal_socket_errno == EAGAIN) { - /* tell the caller to keep this message on active, - * but let the event lib cycle so other messages - * can progress while this socket is busy - */ - return OPAL_ERR_RESOURCE_BUSY; - } else if (opal_socket_errno == EWOULDBLOCK) { - /* tell the caller to keep this message on active, - * but let the event lib cycle so other messages - * can progress while this socket is busy - */ - return OPAL_ERR_WOULD_BLOCK; - } - /* we hit an error and cannot progress this message */ - opal_output(0, "%s pmix_usock_msg_send_bytes: write failed: %s (%d) [sd = %d]", - OPAL_NAME_PRINT(OPAL_PROC_MY_NAME), - strerror(opal_socket_errno), - opal_socket_errno, - mca_pmix_native_component.sd); - return OPAL_ERR_COMM_FAILURE; - } - /* update location */ - msg->sdbytes -= rc; - msg->sdptr += rc; - } - /* we sent the full data block */ - return OPAL_SUCCESS; -} - -/* - * A file descriptor is available/ready for send. Check the state - * of the socket and take the appropriate action. - */ -void pmix_usock_send_handler(int sd, short flags, void *cbdata) -{ - pmix_usock_send_t *msg = mca_pmix_native_component.send_msg; - int rc; - - opal_output_verbose(2, opal_pmix_base_framework.framework_output, - "%s usock:send_handler called to send to server", - OPAL_NAME_PRINT(OPAL_PROC_MY_NAME)); - - switch (mca_pmix_native_component.state) { - case PMIX_USOCK_CONNECTING: - case PMIX_USOCK_CLOSED: - opal_output_verbose(2, opal_pmix_base_framework.framework_output, - "usock:send_handler %s", - pmix_usock_state_print(mca_pmix_native_component.state)); - usock_complete_connect(); - /* de-activate the send event until the connection - * handshake completes - */ - if (mca_pmix_native_component.send_ev_active) { - opal_event_del(&mca_pmix_native_component.send_event); - mca_pmix_native_component.send_ev_active = false; - } - break; - case PMIX_USOCK_CONNECTED: - opal_output_verbose(2, opal_pmix_base_framework.framework_output, - "%s usock:send_handler SENDING TO SERVER with %s msg", - OPAL_NAME_PRINT(OPAL_PROC_MY_NAME), - (NULL == msg) ? "NULL" : "NON-NULL"); - if (NULL != msg) { - if (!msg->hdr_sent) { - opal_output_verbose(2, opal_pmix_base_framework.framework_output, - "%s usock:send_handler SENDING HEADER", - OPAL_NAME_PRINT(OPAL_PROC_MY_NAME)); - if (OPAL_SUCCESS == (rc = send_bytes(msg))) { - /* header is completely sent */ - opal_output_verbose(2, opal_pmix_base_framework.framework_output, - "%s usock:send_handler HEADER SENT", - OPAL_NAME_PRINT(OPAL_PROC_MY_NAME)); - msg->hdr_sent = true; - /* setup to send the data */ - if (NULL == msg->data) { - /* this was a zero-byte msg - nothing more to do */ - OBJ_RELEASE(msg); - mca_pmix_native_component.send_msg = NULL; - goto next; - } else { - /* send the data as a single block */ - msg->sdptr = msg->data; - msg->sdbytes = msg->hdr.nbytes; - } - /* fall thru and let the send progress */ - } else if (OPAL_ERR_RESOURCE_BUSY == rc || - OPAL_ERR_WOULD_BLOCK == rc) { - /* exit this event and let the event lib progress */ - opal_output_verbose(2, opal_pmix_base_framework.framework_output, - "%s usock:send_handler RES BUSY OR WOULD BLOCK", - OPAL_NAME_PRINT(OPAL_PROC_MY_NAME)); - return; - } else { - // report the error - opal_output(0, "%s pmix_usock_peer_send_handler: unable to send message ON SOCKET %d", - OPAL_NAME_PRINT(OPAL_PROC_MY_NAME), - mca_pmix_native_component.sd); - opal_event_del(&mca_pmix_native_component.send_event); - mca_pmix_native_component.send_ev_active = false; - OBJ_RELEASE(msg); - mca_pmix_native_component.send_msg = NULL; - CLOSE_THE_SOCKET(mca_pmix_native_component.sd); - PMIX_NATIVE_ABNORMAL_TERM; // report the error upstream - return; - } - } - - if (msg->hdr_sent) { - opal_output_verbose(2, opal_pmix_base_framework.framework_output, - "%s usock:send_handler SENDING BODY OF MSG", - OPAL_NAME_PRINT(OPAL_PROC_MY_NAME)); - if (OPAL_SUCCESS == (rc = send_bytes(msg))) { - // message is complete - opal_output_verbose(2, opal_pmix_base_framework.framework_output, - "%s usock:send_handler BODY SENT", - OPAL_NAME_PRINT(OPAL_PROC_MY_NAME)); - OBJ_RELEASE(msg); - mca_pmix_native_component.send_msg = NULL; - goto next; - } else if (OPAL_ERR_RESOURCE_BUSY == rc || - OPAL_ERR_WOULD_BLOCK == rc) { - /* exit this event and let the event lib progress */ - opal_output_verbose(2, opal_pmix_base_framework.framework_output, - "%s usock:send_handler RES BUSY OR WOULD BLOCK", - OPAL_NAME_PRINT(OPAL_PROC_MY_NAME)); - return; - } else { - // report the error - opal_output(0, "%s pmix_usock_peer_send_handler: unable to send message ON SOCKET %d", - OPAL_NAME_PRINT(OPAL_PROC_MY_NAME), - mca_pmix_native_component.sd); - opal_event_del(&mca_pmix_native_component.send_event); - mca_pmix_native_component.send_ev_active = false; - OBJ_RELEASE(msg); - mca_pmix_native_component.send_msg = NULL; - CLOSE_THE_SOCKET(mca_pmix_native_component.sd); - PMIX_NATIVE_ABNORMAL_TERM; // report the error upstream - return; - } - } - - next: - /* if current message completed - progress any pending sends by - * moving the next in the queue into the "on-deck" position. Note - * that this doesn't mean we send the message right now - we will - * wait for another send_event to fire before doing so. This gives - * us a chance to service any pending recvs. - */ - mca_pmix_native_component.send_msg = (pmix_usock_send_t*) - opal_list_remove_first(&mca_pmix_native_component.send_queue); - } - - /* if nothing else to do unregister for send event notifications */ - if (NULL == mca_pmix_native_component.send_msg && - mca_pmix_native_component.send_ev_active) { - opal_event_del(&mca_pmix_native_component.send_event); - mca_pmix_native_component.send_ev_active = false; - } - break; - - default: - opal_output(0, "%s pmix_usock_peer_send_handler: invalid connection state (%d) on socket %d", - OPAL_NAME_PRINT(OPAL_PROC_MY_NAME), - mca_pmix_native_component.state, mca_pmix_native_component.sd); - if (mca_pmix_native_component.send_ev_active) { - opal_event_del(&mca_pmix_native_component.send_event); - mca_pmix_native_component.send_ev_active = false; - } - CLOSE_THE_SOCKET(mca_pmix_native_component.sd); - PMIX_NATIVE_ABNORMAL_TERM; // report the error upstream - break; - } -} - -static int read_bytes(pmix_usock_recv_t* recv) -{ - int rc; - - /* read until all bytes recvd or error */ - while (0 < recv->rdbytes) { - rc = read(mca_pmix_native_component.sd, recv->rdptr, recv->rdbytes); - if (rc < 0) { - if(opal_socket_errno == EINTR) { - continue; - } else if (opal_socket_errno == EAGAIN) { - /* tell the caller to keep this message on active, - * but let the event lib cycle so other messages - * can progress while this socket is busy - */ - return OPAL_ERR_RESOURCE_BUSY; - } else if (opal_socket_errno == EWOULDBLOCK) { - /* tell the caller to keep this message on active, - * but let the event lib cycle so other messages - * can progress while this socket is busy - */ - return OPAL_ERR_WOULD_BLOCK; - } - /* we hit an error and cannot progress this message - report - * the error back to the RML and let the caller know - * to abort this message - */ - opal_output_verbose(2, opal_pmix_base_framework.framework_output, - "%s pmix_usock_msg_recv: readv failed: %s (%d)", - OPAL_NAME_PRINT(OPAL_PROC_MY_NAME), - strerror(opal_socket_errno), - opal_socket_errno); - return OPAL_ERR_COMM_FAILURE; - } else if (rc == 0) { - /* the remote peer closed the connection - report that condition - * and let the caller know - */ - opal_output_verbose(2, opal_pmix_base_framework.framework_output, - "%s pmix_usock_msg_recv: peer closed connection", - OPAL_NAME_PRINT(OPAL_PROC_MY_NAME)); - /* stop all events */ - if (mca_pmix_native_component.recv_ev_active) { - opal_event_del(&mca_pmix_native_component.recv_event); - mca_pmix_native_component.recv_ev_active = false; - } - if (mca_pmix_native_component.timer_ev_active) { - opal_event_del(&mca_pmix_native_component.timer_event); - mca_pmix_native_component.timer_ev_active = false; - } - if (mca_pmix_native_component.send_ev_active) { - opal_event_del(&mca_pmix_native_component.send_event); - mca_pmix_native_component.send_ev_active = false; - } - if (NULL != mca_pmix_native_component.recv_msg) { - OBJ_RELEASE(mca_pmix_native_component.recv_msg); - mca_pmix_native_component.recv_msg = NULL; - } - CLOSE_THE_SOCKET(mca_pmix_native_component.sd); - return OPAL_ERR_WOULD_BLOCK; - } - /* we were able to read something, so adjust counters and location */ - recv->rdbytes -= rc; - recv->rdptr += rc; - } - - /* we read the full data block */ - return OPAL_SUCCESS; -} - -/* - * Dispatch to the appropriate action routine based on the state - * of the connection with the peer. - */ - -void pmix_usock_recv_handler(int sd, short flags, void *cbdata) -{ - int rc; - - opal_output_verbose(2, opal_pmix_base_framework.framework_output, - "%s usock:recv:handler called", - OPAL_NAME_PRINT(OPAL_PROC_MY_NAME)); - - switch (mca_pmix_native_component.state) { - case PMIX_USOCK_CONNECT_ACK: - if (OPAL_SUCCESS == (rc = usock_recv_connect_ack())) { - opal_output_verbose(2, opal_pmix_base_framework.framework_output, - "%s usock:recv:handler starting send/recv events", - OPAL_NAME_PRINT(OPAL_PROC_MY_NAME)); - /* we connected! Start the send/recv events */ - if (!mca_pmix_native_component.recv_ev_active) { - opal_event_add(&mca_pmix_native_component.recv_event, 0); - mca_pmix_native_component.recv_ev_active = true; - } - if (mca_pmix_native_component.timer_ev_active) { - opal_event_del(&mca_pmix_native_component.timer_event); - mca_pmix_native_component.timer_ev_active = false; - } - /* if there is a message waiting to be sent, queue it */ - if (NULL == mca_pmix_native_component.send_msg) { - mca_pmix_native_component.send_msg = (pmix_usock_send_t*)opal_list_remove_first(&mca_pmix_native_component.send_queue); - } - if (NULL != mca_pmix_native_component.send_msg && !mca_pmix_native_component.send_ev_active) { - opal_event_add(&mca_pmix_native_component.send_event, 0); - mca_pmix_native_component.send_ev_active = true; - } - /* update our state */ - mca_pmix_native_component.state = PMIX_USOCK_CONNECTED; - } else { - opal_output_verbose(2, opal_pmix_base_framework.framework_output, - "%s UNABLE TO COMPLETE CONNECT ACK WITH SERVER", - OPAL_NAME_PRINT(OPAL_PROC_MY_NAME)); - opal_event_del(&mca_pmix_native_component.recv_event); - mca_pmix_native_component.recv_ev_active = false; - CLOSE_THE_SOCKET(mca_pmix_native_component.sd); - PMIX_NATIVE_ABNORMAL_TERM; // report the error upstream - return; - } - break; - case PMIX_USOCK_CONNECTED: - opal_output_verbose(2, opal_pmix_base_framework.framework_output, - "%s usock:recv:handler CONNECTED", - OPAL_NAME_PRINT(OPAL_PROC_MY_NAME)); - /* allocate a new message and setup for recv */ - if (NULL == mca_pmix_native_component.recv_msg) { - opal_output_verbose(2, opal_pmix_base_framework.framework_output, - "%s usock:recv:handler allocate new recv msg", - OPAL_NAME_PRINT(OPAL_PROC_MY_NAME)); - mca_pmix_native_component.recv_msg = OBJ_NEW(pmix_usock_recv_t); - if (NULL == mca_pmix_native_component.recv_msg) { - opal_output(0, "%s usock_recv_handler: unable to allocate recv message\n", - OPAL_NAME_PRINT(OPAL_PROC_MY_NAME)); - CLOSE_THE_SOCKET(mca_pmix_native_component.sd); - PMIX_NATIVE_ABNORMAL_TERM; // report the error upstream - return; - } - /* start by reading the header */ - mca_pmix_native_component.recv_msg->rdptr = (char*)&mca_pmix_native_component.recv_msg->hdr; - mca_pmix_native_component.recv_msg->rdbytes = sizeof(pmix_usock_hdr_t); - } - /* if the header hasn't been completely read, read it */ - if (!mca_pmix_native_component.recv_msg->hdr_recvd) { - opal_output_verbose(2, opal_pmix_base_framework.framework_output, - "usock:recv:handler read hdr"); - if (OPAL_SUCCESS == (rc = read_bytes(mca_pmix_native_component.recv_msg))) { - /* completed reading the header */ - mca_pmix_native_component.recv_msg->hdr_recvd = true; - /* if this is a zero-byte message, then we are done */ - if (0 == mca_pmix_native_component.recv_msg->hdr.nbytes) { - opal_output_verbose(2, opal_pmix_base_framework.framework_output, - "%s RECVD ZERO-BYTE MESSAGE FROM SERVER for tag %d", - OPAL_NAME_PRINT(OPAL_PROC_MY_NAME), - mca_pmix_native_component.recv_msg->hdr.tag); - mca_pmix_native_component.recv_msg->data = NULL; // make sure - mca_pmix_native_component.recv_msg->rdptr = NULL; - mca_pmix_native_component.recv_msg->rdbytes = 0; - } else { - opal_output_verbose(2, opal_pmix_base_framework.framework_output, - "%s usock:recv:handler allocate data region of size %lu", - OPAL_NAME_PRINT(OPAL_PROC_MY_NAME), - (unsigned long)mca_pmix_native_component.recv_msg->hdr.nbytes); - /* allocate the data region */ - mca_pmix_native_component.recv_msg->data = (char*)malloc(mca_pmix_native_component.recv_msg->hdr.nbytes); - /* point to it */ - mca_pmix_native_component.recv_msg->rdptr = mca_pmix_native_component.recv_msg->data; - mca_pmix_native_component.recv_msg->rdbytes = mca_pmix_native_component.recv_msg->hdr.nbytes; - } - /* fall thru and attempt to read the data */ - } else if (OPAL_ERR_RESOURCE_BUSY == rc || - OPAL_ERR_WOULD_BLOCK == rc) { - /* exit this event and let the event lib progress */ - return; - } else { - /* close the connection */ - opal_output_verbose(2, opal_pmix_base_framework.framework_output, - "%s usock:recv:handler error reading bytes - closing connection", - OPAL_NAME_PRINT(OPAL_PROC_MY_NAME)); - CLOSE_THE_SOCKET(mca_pmix_native_component.sd); - PMIX_NATIVE_ABNORMAL_TERM; // report the error upstream - return; - } - } - - if (mca_pmix_native_component.recv_msg->hdr_recvd) { - /* continue to read the data block - we start from - * wherever we left off, which could be at the - * beginning or somewhere in the message - */ - if (OPAL_SUCCESS == (rc = read_bytes(mca_pmix_native_component.recv_msg))) { - /* we recvd all of the message */ - opal_output_verbose(2, opal_pmix_base_framework.framework_output, - "%s RECVD COMPLETE MESSAGE FROM SERVER OF %d BYTES FOR TAG %d", - OPAL_NAME_PRINT(OPAL_PROC_MY_NAME), - (int)mca_pmix_native_component.recv_msg->hdr.nbytes, - mca_pmix_native_component.recv_msg->hdr.tag); - /* post it for delivery */ - PMIX_ACTIVATE_POST_MSG(mca_pmix_native_component.recv_msg); - mca_pmix_native_component.recv_msg = NULL; - } else if (OPAL_ERR_RESOURCE_BUSY == rc || - OPAL_ERR_WOULD_BLOCK == rc) { - /* exit this event and let the event lib progress */ - return; - } else { - // report the error - opal_output(0, "%s usock_peer_recv_handler: unable to recv message", - OPAL_NAME_PRINT(OPAL_PROC_MY_NAME)); - /* turn off the recv event */ - opal_event_del(&mca_pmix_native_component.recv_event); - mca_pmix_native_component.recv_ev_active = false; - CLOSE_THE_SOCKET(mca_pmix_native_component.sd); - PMIX_NATIVE_ABNORMAL_TERM; // report the error upstream - return; - } - } - break; - default: - opal_output(0, "%s usock_peer_recv_handler: invalid socket state(%d)", - OPAL_NAME_PRINT(OPAL_PROC_MY_NAME), - mca_pmix_native_component.state); - CLOSE_THE_SOCKET(mca_pmix_native_component.sd); - PMIX_NATIVE_ABNORMAL_TERM; // report the error upstream - break; - } -} - -/* - * A blocking recv on a non-blocking socket. Used to receive the small amount of connection - * information that identifies the peers endpoint. - */ -static bool usock_recv_blocking(char *data, size_t size) -{ - size_t cnt = 0; - - opal_output_verbose(2, opal_pmix_base_framework.framework_output, - "%s waiting for connect ack from server", - OPAL_NAME_PRINT(OPAL_PROC_MY_NAME)); - - while (cnt < size) { - int retval = recv(mca_pmix_native_component.sd, (char *)data+cnt, size-cnt, 0); - - /* remote closed connection */ - if (retval == 0) { - opal_output_verbose(2, opal_pmix_base_framework.framework_output, - "%s usock_recv_blocking: server closed connection: state %d", - OPAL_NAME_PRINT(OPAL_PROC_MY_NAME), - mca_pmix_native_component.state); - mca_pmix_native_component.state = PMIX_USOCK_CLOSED; - CLOSE_THE_SOCKET(mca_pmix_native_component.sd); - return false; - } - - /* socket is non-blocking so handle errors */ - if (retval < 0) { - if (opal_socket_errno != EINTR && - opal_socket_errno != EAGAIN && - opal_socket_errno != EWOULDBLOCK) { - if (mca_pmix_native_component.state == PMIX_USOCK_CONNECT_ACK) { - /* If we overflow the listen backlog, it's - possible that even though we finished the three - way handshake, the remote host was unable to - transition the connection from half connected - (received the initial SYN) to fully connected - (in the listen backlog). We likely won't see - the failure until we try to receive, due to - timing and the like. The first thing we'll get - in that case is a RST packet, which receive - will turn into a connection reset by peer - errno. In that case, leave the socket in - CONNECT_ACK and propogate the error up to - recv_connect_ack, who will try to establish the - connection again */ - opal_output_verbose(2, opal_pmix_base_framework.framework_output, - "%s connect ack received error %s from server", - OPAL_NAME_PRINT(OPAL_PROC_MY_NAME), - strerror(opal_socket_errno)); - return false; - } else { - opal_output(0, - "%s usock_recv_blocking: " - "recv() failed for server: %s (%d)\n", - OPAL_NAME_PRINT(OPAL_PROC_MY_NAME), - strerror(opal_socket_errno), - opal_socket_errno); - mca_pmix_native_component.state = PMIX_USOCK_FAILED; - CLOSE_THE_SOCKET(mca_pmix_native_component.sd); - return false; - } - } - continue; - } - cnt += retval; - } - - opal_output_verbose(2, opal_pmix_base_framework.framework_output, - "%s connect ack received from server", - OPAL_NAME_PRINT(OPAL_PROC_MY_NAME)); - return true; -} - - -/* - * Receive the peers globally unique process identification from a newly - * connected socket and verify the expected response. If so, move the - * socket to a connected state. - */ -static int usock_recv_connect_ack(void) -{ - char *msg; - char *version; - int rc; - char *cred; - size_t credsize; - pmix_usock_hdr_t hdr; - - opal_output_verbose(2, opal_pmix_base_framework.framework_output, - "%s RECV CONNECT ACK FROM SERVER ON SOCKET %d", - OPAL_NAME_PRINT(OPAL_PROC_MY_NAME), - mca_pmix_native_component.sd); - - /* ensure all is zero'd */ - memset(&hdr, 0, sizeof(pmix_usock_hdr_t)); - - if (usock_recv_blocking((char*)&hdr, sizeof(pmix_usock_hdr_t))) { - /* If the state is CONNECT_ACK, then we were waiting for - * the connection to be ack'd - */ - if (mca_pmix_native_component.state != PMIX_USOCK_CONNECT_ACK) { - /* handshake broke down - abort this connection */ - opal_output(0, "%s RECV CONNECT BAD HANDSHAKE FROM SERVER ON SOCKET %d", - OPAL_NAME_PRINT(OPAL_PROC_MY_NAME), - mca_pmix_native_component.sd); - mca_pmix_native_component.state = PMIX_USOCK_FAILED; - CLOSE_THE_SOCKET(mca_pmix_native_component.sd); - return OPAL_ERR_UNREACH; - } - } else { - /* unable to complete the recv */ - opal_output_verbose(2, opal_pmix_base_framework.framework_output, - "%s unable to complete recv of connect-ack from server ON SOCKET %d", - OPAL_NAME_PRINT(OPAL_PROC_MY_NAME), - mca_pmix_native_component.sd); - return OPAL_ERR_UNREACH; - } - - opal_output_verbose(2, opal_pmix_base_framework.framework_output, - "%s connect-ack recvd from server", - OPAL_NAME_PRINT(OPAL_PROC_MY_NAME)); - - /* compare the servers name to the expected value */ - if (0 != opal_compare_proc(hdr.id, mca_pmix_native_component.server)) { - opal_output(0, "usock_peer_recv_connect_ack: " - "%s received unexpected process identifier (%s) from server: expected (%s)", - OPAL_NAME_PRINT(OPAL_PROC_MY_NAME), - OPAL_NAME_PRINT(hdr.id), - OPAL_NAME_PRINT(mca_pmix_native_component.server)); - mca_pmix_native_component.state = PMIX_USOCK_FAILED; - CLOSE_THE_SOCKET(mca_pmix_native_component.sd); - return OPAL_ERR_UNREACH; - } - - opal_output_verbose(2, opal_pmix_base_framework.framework_output, - "%s connect-ack header from server is okay", - OPAL_NAME_PRINT(OPAL_PROC_MY_NAME)); - - /* get the authentication and version payload */ - if (NULL == (msg = (char*)malloc(hdr.nbytes))) { - mca_pmix_native_component.state = PMIX_USOCK_FAILED; - CLOSE_THE_SOCKET(mca_pmix_native_component.sd); - return OPAL_ERR_OUT_OF_RESOURCE; - } - if (!usock_recv_blocking(msg, hdr.nbytes)) { - /* unable to complete the recv */ - opal_output_verbose(2, opal_pmix_base_framework.framework_output, - "%s unable to complete recv of connect-ack from server ON SOCKET %d", - OPAL_NAME_PRINT(OPAL_PROC_MY_NAME), - mca_pmix_native_component.sd); - free(msg); - return OPAL_ERR_UNREACH; - } - - /* check that this is from a matching version */ - version = (char*)(msg); - if (0 != strcmp(version, opal_version_string)) { - opal_output(0, "usock_peer_recv_connect_ack: " - "%s received different version from server: %s instead of %s", - OPAL_NAME_PRINT(OPAL_PROC_MY_NAME), - version, opal_version_string); - mca_pmix_native_component.state = PMIX_USOCK_FAILED; - CLOSE_THE_SOCKET(mca_pmix_native_component.sd); - free(msg); - return OPAL_ERR_UNREACH; - } - - opal_output_verbose(2, opal_pmix_base_framework.framework_output, - "%s connect-ack version from server matches ours", - OPAL_NAME_PRINT(OPAL_PROC_MY_NAME)); - - /* check security token */ - cred = (char*)(msg + strlen(version) + 1); - credsize = hdr.nbytes - strlen(version) - 1; - if (OPAL_SUCCESS != (rc = opal_sec.authenticate(cred, credsize, NULL))) { - OPAL_ERROR_LOG(rc); - mca_pmix_native_component.state = PMIX_USOCK_FAILED; - CLOSE_THE_SOCKET(mca_pmix_native_component.sd); - free(msg); - return OPAL_ERR_UNREACH; - } - free(msg); - - opal_output_verbose(2, opal_pmix_base_framework.framework_output, - "%s connect-ack from server authenticated", - OPAL_NAME_PRINT(OPAL_PROC_MY_NAME)); - - /* connected */ - mca_pmix_native_component.state = PMIX_USOCK_CONNECTED; - /* initiate send of first message on queue */ - if (NULL == mca_pmix_native_component.send_msg) { - mca_pmix_native_component.send_msg = (pmix_usock_send_t*) - opal_list_remove_first(&mca_pmix_native_component.send_queue); - } - if (NULL != mca_pmix_native_component.send_msg && !mca_pmix_native_component.send_ev_active) { - opal_event_add(&mca_pmix_native_component.send_event, 0); - mca_pmix_native_component.send_ev_active = true; - } - if (2 <= opal_output_get_verbosity(opal_pmix_base_framework.framework_output)) { - pmix_usock_dump("connected"); - } - return OPAL_SUCCESS; -} - - -/* - * Check the status of the connection. If the connection failed, will retry - * later. Otherwise, send this process' identifier to the server on the - * newly connected socket. - */ -static void usock_complete_connect(void) -{ - int so_error = 0; - opal_socklen_t so_length = sizeof(so_error); - - opal_output_verbose(2, opal_pmix_base_framework.framework_output, - "%s usock:complete_connect called for server on socket %d", - OPAL_NAME_PRINT(OPAL_PROC_MY_NAME), - mca_pmix_native_component.sd); - - /* check connect completion status */ - if (getsockopt(mca_pmix_native_component.sd, SOL_SOCKET, SO_ERROR, (char *)&so_error, &so_length) < 0) { - opal_output(0, "%s usock_peer_complete_connect: getsockopt() to server failed: %s (%d)\n", - OPAL_NAME_PRINT(OPAL_PROC_MY_NAME), - strerror(opal_socket_errno), - opal_socket_errno); - mca_pmix_native_component.state = PMIX_USOCK_FAILED; - CLOSE_THE_SOCKET(mca_pmix_native_component.sd); - PMIX_NATIVE_ABNORMAL_TERM; - return; - } - - if (so_error == EINPROGRESS) { - opal_output_verbose(2, opal_pmix_base_framework.framework_output, - "%s usock:send:handler still in progress", - OPAL_NAME_PRINT(OPAL_PROC_MY_NAME)); - return; - } else if (so_error == ECONNREFUSED || so_error == ETIMEDOUT) { - opal_output_verbose(2, opal_pmix_base_framework.framework_output, - "%s usock_peer_complete_connect: connection to server failed: %s (%d)", - OPAL_NAME_PRINT(OPAL_PROC_MY_NAME), - strerror(so_error), - so_error); - CLOSE_THE_SOCKET(mca_pmix_native_component.sd); - PMIX_NATIVE_ABNORMAL_TERM; // report the error upstream - return; - } else if (so_error != 0) { - /* No need to worry about the return code here - we return regardless - at this point, and if an error did occur a message has already been - printed for the user */ - opal_output_verbose(2, opal_pmix_base_framework.framework_output, - "%s usock_peer_complete_connect: " - "connection to server failed with error %d", - OPAL_NAME_PRINT(OPAL_PROC_MY_NAME), - so_error); - CLOSE_THE_SOCKET(mca_pmix_native_component.sd); - PMIX_NATIVE_ABNORMAL_TERM; // report the error upstream - return; - } - - opal_output_verbose(2, opal_pmix_base_framework.framework_output, - "%s usock_peer_complete_connect: sending ack to server", - OPAL_NAME_PRINT(OPAL_PROC_MY_NAME)); - - if (usock_send_connect_ack() == OPAL_SUCCESS) { - mca_pmix_native_component.state = PMIX_USOCK_CONNECT_ACK; - opal_output_verbose(2, opal_pmix_base_framework.framework_output, - "%s usock_peer_complete_connect: setting read event on connection to server", - OPAL_NAME_PRINT(OPAL_PROC_MY_NAME)); - - if (!mca_pmix_native_component.recv_ev_active) { - opal_event_add(&mca_pmix_native_component.recv_event, 0); - mca_pmix_native_component.recv_ev_active = true; - } - } else { - opal_output(0, "%s usock_complete_connect: unable to send connect ack to server", - OPAL_NAME_PRINT(OPAL_PROC_MY_NAME)); - CLOSE_THE_SOCKET(mca_pmix_native_component.sd); - PMIX_NATIVE_ABNORMAL_TERM; // report the error upstream - } -} - diff --git a/opal/mca/pmix/pmix.h b/opal/mca/pmix/pmix.h index 873870af741..b9cf9cbdcb7 100644 --- a/opal/mca/pmix/pmix.h +++ b/opal/mca/pmix/pmix.h @@ -1,12 +1,12 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ /* - * Copyright (c) 2014 Intel, Inc. All rights reserved. + * Copyright (c) 2014-2015 Intel, Inc. All rights reserved. * Copyright (c) 2015 Los Alamos National Security, LLC. All rights * reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -16,131 +16,57 @@ #include "opal_config.h" #include "opal/types.h" +#ifdef HAVE_SYS_UN_H +#include +#endif + #include "opal/mca/mca.h" #include "opal/mca/event/event.h" #include "opal/dss/dss.h" #include "opal/runtime/opal.h" -#include "opal/mca/dstore/dstore.h" #include "opal/dss/dss.h" #include "opal/util/error.h" #include "opal/util/proc.h" +#include "opal/mca/pmix/pmix_types.h" +#include "opal/mca/pmix/pmix_server.h" + BEGIN_C_DECLS -/* define some maximum sizes */ -#define PMIX_MAX_VALLEN 1024 -#define PMIX_MAX_INFO_KEY 255 -#define PMIX_MAX_INFO_VAL 1024 - -/* define an INFO object corresponding to - * the MPI_Info structure */ -typedef struct { - opal_list_item_t super; - char key[PMIX_MAX_INFO_KEY]; - char value[PMIX_MAX_INFO_VAL]; -} pmix_info_t; -OBJ_CLASS_DECLARATION(pmix_info_t); - -/* define a scope for data "put" by PMI per the following: - * - * PMI_LOCAL - the data is intended only for other application - * processes on the same node. Data marked in this way - * will not be included in data packages sent to remote requestors - * PMI_REMOTE - the data is intended solely for applications processes on - * remote nodes. Data marked in this way will not be shared with - * other processes on the same node - * PMI_GLOBAL - the data is to be shared with all other requesting processes, - * regardless of location - */ -typedef uint8_t opal_pmix_scope_t; -#define PMIX_SCOPE_T OPAL_UINT8 -#define PMIX_SCOPE_UNDEF 0 -#define PMIX_INTERNAL 1 // data used internally only -#define PMIX_LOCAL 2 // share to procs also on this node -#define PMIX_REMOTE 3 // share with procs not on this node -#define PMIX_GLOBAL 4 // share with all procs (local + remote) - -/* callback function for non-blocking operations */ -typedef void (*opal_pmix_cbfunc_t)(int status, opal_value_t *kv, void *cbdata); - -/* flags to indicate if the modex value being pushed into - * the PMIx server comes from an element that is ready to - * support async modex operations, or from one that requires - * synchronous modex (i.e., blocking modex operation) */ -#define PMIX_SYNC_REQD true -#define PMIX_ASYNC_RDY false - -/* define a set of "standard" PMIx attributes that can - * be queried. Implementations (and users) are free to extend as - * desired, so the get_attr functions need to be capable - * of handling the "not found" condition. Note that these - * are attributes of the system and the job as opposed to - * values the application (or underlying MPI library) - * might choose to expose - i.e., they are values provided - * by the resource manager as opposed to the application */ -#define PMIX_ATTR_UNDEF NULL - -#define PMIX_CPUSET "pmix.cpuset" // (char*) hwloc bitmap applied to proc upon launch -#define PMIX_CREDENTIAL "pmix.cred" // (opal_byte_object*) security credential assigned to proc -#define PMIX_HOSTNAME "pmix.hname" // (char*) name of the host this proc is on -/* scratch directory locations for use by applications */ -#define PMIX_TMPDIR "pmix.tmpdir" // (char*) top-level tmp dir assigned to session -/* information about relative ranks as assigned */ -#define PMIX_JOBID "pmix.jobid" // (char*) jobid assigned by scheduler -#define PMIX_APPNUM "pmix.appnum" // (uint32_t) app number within the job -#define PMIX_RANK "pmix.rank" // (uint32_t) process rank within the job -#define PMIX_GLOBAL_RANK "pmix.grank" // (uint32_t) rank spanning across all jobs in this session -#define PMIX_APP_RANK "pmix.apprank" // (uint32_t) rank within this app -#define PMIX_NPROC_OFFSET "pmix.offset" // (uint32_t) starting global rank of this job -#define PMIX_LOCAL_RANK "pmix.lrank" // (uint16_t) rank on this node within this job -#define PMIX_NODE_RANK "pmix.nrank" // (uint16_t) rank on this node spanning all jobs -#define PMIX_LOCALLDR "pmix.lldr" // (uint64_t) opal_identifier of lowest rank on this node within this job -#define PMIX_APPLDR "pmix.aldr" // (uint32_t) lowest rank in this app within this job -#define PMIX_NODE_ID "pmix.nodeid" // (uint32_t) vpid of daemon hosting specified proc - -/* proc location-related info */ -#define PMIX_PROC_MAP "pmix.map" // (byte_object) packed map of proc locations within this job -#define PMIX_LOCAL_PEERS "pmix.lpeers" // (char*) comma-delimited string of ranks on this node within this job -#define PMIX_LOCAL_CPUSETS "pmix.lcpus" // (byte_object) packed names and cpusets of local peers -/* size info */ -#define PMIX_UNIV_SIZE "pmix.univ.size" // (uint32_t) #procs in this namespace -#define PMIX_JOB_SIZE "pmix.job.size" // (uint32_t) #procs in this job -#define PMIX_LOCAL_SIZE "pmix.local.size" // (uint32_t) #procs in this job on this node -#define PMIX_NODE_SIZE "pmix.node.size" // (uint32_t) #procs across all jobs on this node -#define PMIX_MAX_PROCS "pmix.max.size" // (uint32_t) max #procs for this job -/* topology info */ -#define PMIX_NET_TOPO "pmix.ntopo" // (byte_object) network topology -#define PMIX_LOCAL_TOPO "pmix.ltopo" // (hwloc topo) local node topology +/* provide access to the framework verbose output without + * exposing the entire base */ +extern int opal_pmix_verbose_output; +extern bool opal_pmix_collect_all_data; +extern bool opal_pmix_base_async_modex; +extern int opal_pmix_base_exchange(opal_value_t *info, + opal_pmix_pdata_t *pdat, + int timeout); /** * Provide a simplified macro for sending data via modex * to other processes. The macro requires four arguments: * * r - the integer return status from the modex op - * f - whether this modex requires sync or is async ready * sc - the PMIX scope of the data * s - the key to tag the data being posted * d - pointer to the data object being posted * t - the type of the data */ -#define OPAL_MODEX_SEND_VALUE(r, f, sc, s, d, t) \ - do { \ - opal_value_t kv; \ - if (PMIX_SYNC_REQD == (f)) { \ - opal_pmix_use_collective = true; \ - } \ - OBJ_CONSTRUCT(&kv, opal_value_t); \ - kv.key = (s); \ - if (OPAL_SUCCESS != ((r) = opal_value_load(&kv, (d), (t)))) { \ - OPAL_ERROR_LOG((r)); \ - } else { \ - if (OPAL_SUCCESS != ((r) = opal_pmix.put(sc, &kv))) { \ - OPAL_ERROR_LOG((r)); \ - } \ - } \ - /* do not destruct the keyval as we don't own */ \ - /* the data - the caller will take care of the */ \ - /* key and value storage, and the kv itself has none */ \ +#define OPAL_MODEX_SEND_VALUE(r, sc, s, d, t) \ + do { \ + opal_value_t _kv; \ + OBJ_CONSTRUCT(&(_kv), opal_value_t); \ + _kv.key = (s); \ + if (OPAL_SUCCESS != ((r) = opal_value_load(&(_kv), (d), (t)))) { \ + OPAL_ERROR_LOG((r)); \ + } else { \ + if (OPAL_SUCCESS != ((r) = opal_pmix.put(sc, &(_kv)))) { \ + OPAL_ERROR_LOG((r)); \ + } \ + } \ + /* opal_value_load makes a copy of the data, so release it */ \ + _kv.key = NULL; \ + OBJ_DESTRUCT(&(_kv)); \ } while(0); /** @@ -148,29 +74,25 @@ typedef void (*opal_pmix_cbfunc_t)(int status, opal_value_t *kv, void *cbdata); * to other processes. The macro requires four arguments: * * r - the integer return status from the modex op - * f - whether this modex requires sync or is async ready * sc - the PMIX scope of the data * s - the key to tag the data being posted * d - the data object being posted * sz - the number of bytes in the data object */ -#define OPAL_MODEX_SEND_STRING(r, f, sc, s, d, sz) \ - do { \ - opal_value_t kv; \ - if (PMIX_SYNC_REQD == (f)) { \ - opal_pmix_use_collective = true; \ - } \ - OBJ_CONSTRUCT(&kv, opal_value_t); \ - kv.key = (s); \ - kv.type = OPAL_BYTE_OBJECT; \ - kv.data.bo.bytes = (uint8_t*)(d); \ - kv.data.bo.size = (sz); \ - if (OPAL_SUCCESS != ((r) = opal_pmix.put(sc, &kv))) { \ - OPAL_ERROR_LOG((r)); \ - } \ - kv.data.bo.bytes = NULL; /* protect the data */ \ - kv.key = NULL; /* protect the key */ \ - OBJ_DESTRUCT(&kv); \ +#define OPAL_MODEX_SEND_STRING(r, sc, s, d, sz) \ + do { \ + opal_value_t _kv; \ + OBJ_CONSTRUCT(&(_kv), opal_value_t); \ + _kv.key = (s); \ + _kv.type = OPAL_BYTE_OBJECT; \ + _kv.data.bo.bytes = (uint8_t*)(d); \ + _kv.data.bo.size = (sz); \ + if (OPAL_SUCCESS != ((r) = opal_pmix.put(sc, &(_kv)))) { \ + OPAL_ERROR_LOG((r)); \ + } \ + _kv.data.bo.bytes = NULL; /* protect the data */ \ + _kv.key = NULL; /* protect the key */ \ + OBJ_DESTRUCT(&(_kv)); \ } while(0); /** @@ -178,21 +100,56 @@ typedef void (*opal_pmix_cbfunc_t)(int status, opal_value_t *kv, void *cbdata); * to other processes. The macro requires four arguments: * * r - the integer return status from the modex op - * f - whether this modex requires sync or is async ready * sc - the PMIX scope of the data * s - the MCA component that is posting the data * d - the data object being posted * sz - the number of bytes in the data object */ -#define OPAL_MODEX_SEND(r, f, sc, s, d, sz) \ +#define OPAL_MODEX_SEND(r, sc, s, d, sz) \ do { \ - char *key; \ - if (PMIX_SYNC_REQD == (f)) { \ - opal_pmix_use_collective = true; \ - } \ - key = mca_base_component_to_string((s)); \ - OPAL_MODEX_SEND_STRING((r), (f), (sc), key, (d), (sz)); \ - free(key); \ + char *_key; \ + _key = mca_base_component_to_string((s)); \ + OPAL_MODEX_SEND_STRING((r), (sc), _key, (d), (sz)); \ + free(_key); \ + } while(0); + +/** + * Provide a simplified macro for retrieving modex data + * from another process when we don't want the PMIx module + * to request it from the server if not found: + * + * r - the integer return status from the modex op (int) + * s - string key (char*) + * p - pointer to the opal_process_name_t of the proc that posted + * the data (opal_process_name_t*) + * d - pointer to a location wherein the data object + * is to be returned + * t - the expected data type + */ +#define OPAL_MODEX_RECV_VALUE_OPTIONAL(r, s, p, d, t) \ + do { \ + opal_value_t *_kv, *_info; \ + opal_list_t _ilist; \ + OPAL_OUTPUT_VERBOSE((1, opal_pmix_verbose_output, \ + "%s[%s:%d] MODEX RECV VALUE OPTIONAL FOR PROC %s KEY %s", \ + OPAL_NAME_PRINT(OPAL_PROC_MY_NAME), \ + __FILE__, __LINE__, \ + OPAL_NAME_PRINT(*(p)), (s))); \ + OBJ_CONSTRUCT(&(_ilist), opal_list_t); \ + _info = OBJ_NEW(opal_value_t); \ + _info->key = strdup(OPAL_PMIX_OPTIONAL); \ + _info->type = OPAL_BOOL; \ + _info->data.flag = true; \ + opal_list_append(&(_ilist), &(_info)->super); \ + if (OPAL_SUCCESS == ((r) = opal_pmix.get((p), (s), &(_ilist), &(_kv)))) { \ + if (NULL == _kv) { \ + (r) = OPAL_ERR_NOT_FOUND; \ + } else { \ + (r) = opal_value_unload(_kv, (void**)(d), (t)); \ + OBJ_RELEASE(_kv); \ + } \ + } \ + OPAL_LIST_DESTRUCT(&(_ilist)); \ } while(0); /** @@ -201,22 +158,28 @@ typedef void (*opal_pmix_cbfunc_t)(int status, opal_value_t *kv, void *cbdata); * * r - the integer return status from the modex op (int) * s - string key (char*) - * p - pointer to the opal_proc_t of the proc that posted - * the data (opal_proc_t*) + * p - pointer to the opal_process_name_t of the proc that posted + * the data (opal_process_name_t*) * d - pointer to a location wherein the data object * is to be returned * t - the expected data type */ -#define OPAL_MODEX_RECV_VALUE(r, s, p, d, t) \ - do { \ - opal_value_t *kv; \ - if (OPAL_SUCCESS != ((r) = opal_pmix.get(&(p)->proc_name, \ - (s), &kv))) { \ - *(d) = NULL; \ - } else { \ - (r) = opal_value_unload(kv, (void**)(d), (t)); \ - OBJ_RELEASE(kv); \ - } \ +#define OPAL_MODEX_RECV_VALUE(r, s, p, d, t) \ + do { \ + opal_value_t *_kv; \ + OPAL_OUTPUT_VERBOSE((1, opal_pmix_verbose_output, \ + "%s[%s:%d] MODEX RECV VALUE FOR PROC %s KEY %s", \ + OPAL_NAME_PRINT(OPAL_PROC_MY_NAME), \ + __FILE__, __LINE__, \ + OPAL_NAME_PRINT(*(p)), (s))); \ + if (OPAL_SUCCESS == ((r) = opal_pmix.get((p), (s), NULL, &(_kv)))) { \ + if (NULL == _kv) { \ + (r) = OPAL_ERR_NOT_FOUND; \ + } else { \ + (r) = opal_value_unload(_kv, (void**)(d), (t)); \ + OBJ_RELEASE(_kv); \ + } \ + } \ } while(0); /** @@ -225,27 +188,35 @@ typedef void (*opal_pmix_cbfunc_t)(int status, opal_value_t *kv, void *cbdata); * * r - the integer return status from the modex op (int) * s - string key (char*) - * p - pointer to the opal_proc_t of the proc that posted - * the data (opal_proc_t*) + * p - pointer to the opal_process_name_t of the proc that posted + * the data (opal_process_name_t*) * d - pointer to a location wherein the data object * it to be returned (char**) * sz - pointer to a location wherein the number of bytes * in the data object can be returned (size_t) */ -#define OPAL_MODEX_RECV_STRING(r, s, p, d, sz) \ - do { \ - opal_value_t *kv; \ - if (OPAL_SUCCESS == ((r) = opal_pmix.get(&(p)->proc_name, \ - (s), &kv)) && \ - NULL != kv) { \ - *(d) = kv->data.bo.bytes; \ - *(sz) = kv->data.bo.size; \ - kv->data.bo.bytes = NULL; /* protect the data */ \ - OBJ_RELEASE(kv); \ - } else { \ - *(d) = NULL; \ - *(sz) = 0; \ - } \ +#define OPAL_MODEX_RECV_STRING(r, s, p, d, sz) \ + do { \ + opal_value_t *_kv; \ + OPAL_OUTPUT_VERBOSE((1, opal_pmix_verbose_output, \ + "%s[%s:%d] MODEX RECV STRING FOR PROC %s KEY %s", \ + OPAL_NAME_PRINT(OPAL_PROC_MY_NAME), \ + __FILE__, __LINE__, \ + OPAL_NAME_PRINT(*(p)), (s))); \ + if (OPAL_SUCCESS == ((r) = opal_pmix.get((p), (s), NULL, &(_kv)))) { \ + if (NULL == _kv) { \ + *(sz) = 0; \ + (r) = OPAL_ERR_NOT_FOUND; \ + } else { \ + *(d) = _kv->data.bo.bytes; \ + *(sz) = _kv->data.bo.size; \ + _kv->data.bo.bytes = NULL; /* protect the data */ \ + OBJ_RELEASE(_kv); \ + } \ + } else { \ + *(sz) = 0; \ + (r) = OPAL_ERR_NOT_FOUND; \ + } \ } while(0); /** @@ -254,8 +225,8 @@ typedef void (*opal_pmix_cbfunc_t)(int status, opal_value_t *kv, void *cbdata); * * r - the integer return status from the modex op (int) * s - the MCA component that posted the data (mca_base_component_t*) - * p - pointer to the opal_proc_t of the proc that posted - * the data (opal_proc_t*) + * p - pointer to the opal_process_name_t of the proc that posted + * the data (opal_process_name_t*) * d - pointer to a location wherein the data object * it to be returned (char**) * sz - pointer to a location wherein the number of bytes @@ -263,183 +234,611 @@ typedef void (*opal_pmix_cbfunc_t)(int status, opal_value_t *kv, void *cbdata); */ #define OPAL_MODEX_RECV(r, s, p, d, sz) \ do { \ - char *key; \ - key = mca_base_component_to_string((s)); \ - if (NULL == key) { \ + char *_key; \ + _key = mca_base_component_to_string((s)); \ + OPAL_OUTPUT_VERBOSE((1, opal_pmix_verbose_output, \ + "%s[%s:%d] MODEX RECV FOR PROC %s KEY %s", \ + OPAL_NAME_PRINT(OPAL_PROC_MY_NAME), \ + __FILE__, __LINE__, \ + OPAL_NAME_PRINT(*(p)), _key)); \ + if (NULL == _key) { \ OPAL_ERROR_LOG(OPAL_ERR_OUT_OF_RESOURCE); \ (r) = OPAL_ERR_OUT_OF_RESOURCE; \ } else { \ - OPAL_MODEX_RECV_STRING((r), key, (p), (d), (sz)); \ - free(key); \ + OPAL_MODEX_RECV_STRING((r), _key, (p), (d), (sz)); \ + free(_key); \ } \ } while(0); - /** - * Provide a simplified macro for calling the fence function - * that takes into account directives and availability of - * non-blocking operations - */ -#define OPAL_FENCE(p, s, cf, cd) \ - do { \ - if (opal_pmix_use_collective || NULL == opal_pmix.fence_nb) { \ - opal_pmix.fence((p), (s)); \ - } else { \ - opal_pmix.fence_nb((p), (s), (cf), (cd)); \ - } \ + * Provide a macro for accessing a base function that exchanges + * data values between two procs using the PMIx Publish/Lookup + * APIs */ + #define OPAL_PMIX_EXCHANGE(r, i, p, t) \ + do { \ + OPAL_OUTPUT_VERBOSE((1, opal_pmix_verbose_output, \ + "%s[%s:%d] EXCHANGE %s WITH %s", \ + OPAL_NAME_PRINT(OPAL_PROC_MY_NAME), \ + __FILE__, __LINE__, \ + (i)->key, (p)->value.key)); \ + (r) = opal_pmix_base_exchange((i), (p), (t)); \ } while(0); -/* callback handler for errors */ -typedef void (*opal_pmix_errhandler_fn_t)(int error); - -/**** DEFINE THE PUBLIC API'S **** - **** NOTE THAT WE DO NOT HAVE A 1:1 MAPPING OF APIs **** - **** HERE TO THOSE CURRENTLY DEFINED BY PMI AS WE **** - **** DON'T USE SOME OF THOSE FUNCTIONS AND THIS ISN'T **** - **** A GENERAL LIBRARY ****/ -/***** APIs CURRENTLY USED IN THE OMPI/ORTE CODE BASE ****/ -/* NOTE: calls to these APIs must be thread-protected as there - * currently is NO internal thread safety. */ +/************************************************************ + * CLIENT APIs * + ************************************************************/ -/* Init */ +/* Initialize the PMIx client + * When called the client will check for the required connection + * information of the local server and will establish the connection. + * If the information is not found, or the server connection fails, then + * an appropriate error constant will be returned. + */ typedef int (*opal_pmix_base_module_init_fn_t)(void); -/* Finalize */ +/* Finalize the PMIx client, closing the connection to the local server. + * An error code will be returned if, for some reason, the connection + * cannot be closed. */ typedef int (*opal_pmix_base_module_fini_fn_t)(void); -/* Initialized */ -typedef bool (*opal_pmix_base_module_initialized_fn_t)(void); - -/* Abort */ -typedef int (*opal_pmix_base_module_abort_fn_t)(int flag, const char msg[]); +/* Returns _true_ if the PMIx client has been successfully initialized, + * returns _false_ otherwise. Note that the function only reports the + * internal state of the PMIx client - it does not verify an active + * connection with the server, nor that the server is functional. */ +typedef int (*opal_pmix_base_module_initialized_fn_t)(void); -/* Fence - note that this call is required to commit any - * data "put" to the system since the last call to "fence" - * prior to (or as part of) executing the barrier. Serves both PMI2 - * and PMI1 "barrier" purposes */ -typedef int (*opal_pmix_base_module_fence_fn_t)(opal_process_name_t *procs, size_t nprocs); +/* Request that the provided list of opal_namelist_t procs be aborted, returning the + * provided _status_ and printing the provided message. A _NULL_ + * for the proc list indicates that all processes in the caller's + * nspace are to be aborted. + * + * The response to this request is somewhat dependent on the specific resource + * manager and its configuration (e.g., some resource managers will + * not abort the application if the provided _status_ is zero unless + * specifically configured to do so), and thus lies outside the control + * of PMIx itself. However, the client will inform the RM of + * the request that the application be aborted, regardless of the + * value of the provided _status_. + * + * Passing a _NULL_ msg parameter is allowed. Note that race conditions + * caused by multiple processes calling PMIx_Abort are left to the + * server implementation to resolve with regard to which status is + * returned and what messages (if any) are printed. + */ +typedef int (*opal_pmix_base_module_abort_fn_t)(int status, const char *msg, + opal_list_t *procs); + +/* Push all previously _PMIx_Put_ values to the local PMIx server. + * This is an asynchronous operation - the library will immediately + * return to the caller while the data is transmitted to the local + * server in the background */ +typedef int (*opal_pmix_base_module_commit_fn_t)(void); + +/* Execute a blocking barrier across the processes identified in the + * specified list of opal_namelist_t. Passing a _NULL_ pointer + * indicates that the barrier is to span all processes in the client's + * namespace. Each provided opal_namelist_t can pass PMIX_RANK_WILDCARD to + * indicate that all processes in the given jobid are + * participating. + * + * The _collect_data_ parameter is passed to the server to indicate whether + * or not the barrier operation is to return the _put_ data from all + * participating processes. A value of _false_ indicates that the callback + * is just used as a release and no data is to be returned at that time. A + * value of _true_ indicates that all _put_ data is to be collected by the + * barrier. Returned data is locally cached so that subsequent calls to _PMIx_Get_ + * can be serviced without communicating to/from the server, but at the cost + * of increased memory footprint + */ +typedef int (*opal_pmix_base_module_fence_fn_t)(opal_list_t *procs, int collect_data); + +/* Fence_nb */ +/* Non-blocking version of PMIx_Fence. Note that the function will return + * an error if a _NULL_ callback function is given. */ +typedef int (*opal_pmix_base_module_fence_nb_fn_t)(opal_list_t *procs, int collect_data, + opal_pmix_op_cbfunc_t cbfunc, void *cbdata); + +/* Push a value into the client's namespace. The client library will cache + * the information locally until _PMIx_Commit_ is called. The provided scope + * value is passed to the local PMIx server, which will distribute the data + * as directed. */ +typedef int (*opal_pmix_base_module_put_fn_t)(opal_pmix_scope_t scope, + opal_value_t *val); -/* Fence_nb - not included in the current PMI standard. This is a non-blocking - * version of the standard "fence" call. All subsequent "get" calls will block - * pending completion of this operation. Non-blocking "get" calls will still - * complete as data becomes available */ -typedef int (*opal_pmix_base_module_fence_nb_fn_t)(opal_process_name_t *procs, size_t nprocs, - opal_pmix_cbfunc_t cbfunc, void *cbdata); +/* Retrieve information for the specified _key_ as published by the rank + * and jobid i the provided opal_process_name, and subject to any provided + * constraints, returning a pointer to the value in the given address. + * + * This is a blocking operation - the caller will block until + * the specified data has been _PMIx_Put_ by the specified rank. The caller is + * responsible for freeing all memory associated with the returned value when + * no longer required. */ +typedef int (*opal_pmix_base_module_get_fn_t)(const opal_process_name_t *proc, + const char *key, opal_list_t *info, + opal_value_t **val); + +/* Retrieve information for the specified _key_ as published by the given rank + * and jobid in the opal_process_name_t, and subject to any provided + * constraints. This is a non-blocking operation - the + * callback function will be executed once the specified data has been _PMIx_Put_ + * by the specified proc and retrieved by the local server. */ +typedef int (*opal_pmix_base_module_get_nb_fn_t)(const opal_process_name_t *proc, + const char *key, opal_list_t *info, + opal_pmix_value_cbfunc_t cbfunc, void *cbdata); + +/* Publish the given data to the "universal" nspace + * for lookup by others subject to the provided scope. + * Note that the keys must be unique within the specified + * scope or else an error will be returned (first published + * wins). Attempts to access the data by procs outside of + * the provided scope will be rejected. + * + * Note: Some host environments may support user/group level + * access controls on the information in addition to the scope. + * These can be specified in the info array using the appropriately + * defined keys. + * + * The persistence parameter instructs the server as to how long + * the data is to be retained, within the context of the scope. + * For example, data published within _PMIX_NAMESPACE_ will be + * deleted along with the namespace regardless of the persistence. + * However, data published within PMIX_USER would be retained if + * the persistence was set to _PMIX_PERSIST_SESSION_ until the + * allocation terminates. + * + * The blocking form will block until the server confirms that the + * data has been posted and is available. The non-blocking form will + * return immediately, executing the callback when the server confirms + * availability of the data */ +typedef int (*opal_pmix_base_module_publish_fn_t)(opal_list_t *info); +typedef int (*opal_pmix_base_module_publish_nb_fn_t)(opal_list_t *info, + opal_pmix_op_cbfunc_t cbfunc, void *cbdata); + +/* Lookup information published by another process within the + * specified scope. A scope of _PMIX_SCOPE_UNDEF_ requests that + * the search be conducted across _all_ namespaces. The "data" + * parameter consists of an array of pmix_pdata_t struct with the + * keys specifying the requested information. Data will be returned + * for each key in the associated info struct - any key that cannot + * be found will return with a data type of "PMIX_UNDEF". The function + * will return SUCCESS if _any_ values can be found, so the caller + * must check each data element to ensure it was returned. + * + * The proc field in each pmix_pdata_t struct will contain the + * nspace/rank of the process that published the data. + * + * Note: although this is a blocking function, it will _not_ wait + * for the requested data to be published. Instead, it will block + * for the time required by the server to lookup its current data + * and return any found items. Thus, the caller is responsible for + * ensuring that data is published prior to executing a lookup, or + * for retrying until the requested data is found */ +typedef int (*opal_pmix_base_module_lookup_fn_t)(opal_list_t *data, + opal_list_t *info); + +/* Non-blocking form of the _PMIx_Lookup_ function. Data for + * the provided NULL-terminated keys array will be returned + * in the provided callback function. The _wait_ parameter + * is used to indicate if the caller wishes the callback to + * wait for _all_ requested data before executing the callback + * (_true_), or to callback once the server returns whatever + * data is immediately available (_false_) */ +typedef int (*opal_pmix_base_module_lookup_nb_fn_t)(char **keys, opal_list_t *info, + opal_pmix_lookup_cbfunc_t cbfunc, void *cbdata); + +/* Unpublish data posted by this process using the given keys + * within the specified scope. The function will block until + * the data has been removed by the server. A value of _NULL_ + * for the keys parameter instructs the server to remove + * _all_ data published by this process within the given scope */ +typedef int (*opal_pmix_base_module_unpublish_fn_t)(char **keys, opal_list_t *info); + +/* Non-blocking form of the _PMIx_Unpublish_ function. The + * callback function will be executed once the server confirms + * removal of the specified data. A value of _NULL_ + * for the keys parameter instructs the server to remove + * _all_ data published by this process within the given scope */ +typedef int (*opal_pmix_base_module_unpublish_nb_fn_t)(char **keys, opal_list_t *info, + opal_pmix_op_cbfunc_t cbfunc, void *cbdata); + +/* Spawn a new job. The spawned applications are automatically + * connected to the calling process, and their assigned namespace + * is returned in the nspace parameter - a _NULL_ value in that + * location indicates that the caller doesn't wish to have the + * namespace returned. Behavior of individual resource managers + * may differ, but it is expected that failure of any application + * process to start will result in termination/cleanup of _all_ + * processes in the newly spawned job and return of an error + * code to the caller */ +typedef int (*opal_pmix_base_module_spawn_fn_t)(opal_list_t *job_info, + opal_list_t *apps, + opal_jobid_t *jobid); + +/* Non-blocking form of the _PMIx_Spawn_ function. The callback + * will be executed upon launch of the specified applications, + * or upon failure to launch any of them. */ +typedef int (*opal_pmix_base_module_spawn_nb_fn_t)(opal_list_t *job_info, + opal_list_t *apps, + opal_pmix_spawn_cbfunc_t cbfunc, + void *cbdata); + +/* Record the specified processes as "connected". Both blocking and non-blocking + * versions are provided. This means that the resource manager should treat the + * failure of any process in the specified group as a reportable event, and take + * appropriate action. Note that different resource managers may respond to + * failures in different manners. + * + * The list is to be provided as opal_namelist_t objects + * + * The callback function is to be called once all participating processes have + * called connect. The server is required to return any job-level info for the + * connecting processes that might not already have - i.e., if the connect + * request involves procs from different nspaces, then each proc shall receive + * the job-level info from those nspaces other than their own. + * + * Note: a process can only engage in _one_ connect operation involving the identical + * set of ranges at a time. However, a process _can_ be simultaneously engaged + * in multiple connect operations, each involving a different set of ranges */ +typedef int (*opal_pmix_base_module_connect_fn_t)(opal_list_t *procs); + +typedef int (*opal_pmix_base_module_connect_nb_fn_t)(opal_list_t *procs, + opal_pmix_op_cbfunc_t cbfunc, + void *cbdata); + +/* Disconnect a previously connected set of processes. An error will be returned + * if the specified set of procs was not previously "connected". As above, a process + * may be involved in multiple simultaneous disconnect operations. However, a process + * is not allowed to reconnect to a set of procs that has not fully completed + * disconnect - i.e., you have to fully disconnect before you can reconnect to the + * _same_ group of processes. */ +typedef int (*opal_pmix_base_module_disconnect_fn_t)(opal_list_t *procs); + +typedef int (*opal_pmix_base_module_disconnect_nb_fn_t)(opal_list_t *procs, + opal_pmix_op_cbfunc_t cbfunc, + void *cbdata); + +/* Given a node name, return an array of processes within the specified jobid + * on that node. If the jobid is OPAL_JOBID_WILDCARD, then all processes on the node will + * be returned. If the specified node does not currently host any processes, + * then the returned list will be empty. + */ +typedef int (*opal_pmix_base_module_resolve_peers_fn_t)(const char *nodename, + opal_jobid_t jobid, + opal_list_t *procs); -/* Put - note that this API has been modified from the current PMI standard to - * reflect the proposed PMIx extensions. */ -typedef int (*opal_pmix_base_module_put_fn_t)(opal_pmix_scope_t scope, - opal_value_t *kv); - -/* Get - note that this API has been modified from the current PMI standard to - * reflect the proposed PMIx extensions, and to include the process identifier so - * we can form the PMI key within the active component instead of sprinkling that - * code all over the code base. */ -typedef int (*opal_pmix_base_module_get_fn_t)(const opal_process_name_t *id, - const char *key, - opal_value_t **kv); - -/* Get_nb - not included in the current PMI standard. This is a non-blocking - * version of the standard "get" call. Retrieved value will be provided as - * opal_value_t object in the callback. We include the process identifier so - * we can form the PMI key within the active component instead of sprinkling that - * code all over the code base. */ -typedef void (*opal_pmix_base_module_get_nb_fn_t)(const opal_process_name_t *id, - const char *key, - opal_pmix_cbfunc_t cbfunc, - void *cbdata); - -/* Publish - the "info" parameter - * consists of a list of pmix_info_t objects */ -typedef int (*opal_pmix_base_module_publish_fn_t)(const char service_name[], - opal_list_t *info, - const char port[]); - -/* Lookup - the "info" parameter - * consists of a list of pmix_info_t objects */ -typedef int (*opal_pmix_base_module_lookup_fn_t)(const char service_name[], - opal_list_t *info, - char port[], int portLen); - -/* Unpublish - the "info" parameter - * consists of a list of pmix_info_t objects */ -typedef int (*opal_pmix_base_module_unpublish_fn_t)(const char service_name[], - opal_list_t *info); - -/* Get attribute - * Query the server for the specified attribute, returning it in the - * provided opal_value_t. The function will return "true" if the attribute - * is found, and "false" if not. - * Attributes are provided by the PMIx server, so there is no corresponding - * "put" function. */ -typedef bool (*opal_pmix_base_module_get_attr_fn_t)(const char *attr, opal_value_t **kv); - -/* Get attribute (non-blocking) - * Query the server for the specified attribute.. - * Attributes are provided by the PMIx server, so there is no corresponding "put" - * function. The call will be executed as non-blocking, returning immediately, - * with data resulting from the call returned in the callback function. A returned - * NULL opal_value_t* indicates that the attribute was not found. The returned - * pointer is "owned" by the PMIx module and must not be released by the - * callback function */ -typedef int (*opal_pmix_base_module_get_attr_nb_fn_t)(const char *attr, - opal_pmix_cbfunc_t cbfunc, - void *cbdata); +/* Given a jobid, return the list of nodes hosting processes within + * that jobid. The returned string will contain a comma-delimited list + * of nodenames. The caller is responsible for releasing the string + * when done with it */ +typedef int (*opal_pmix_base_module_resolve_nodes_fn_t)(opal_jobid_t jobid, char **nodelist); -/**** APIs NOT CURRENTLY USED IN THE OMPI/ORTE CODE BASE, BUT THAT **** - **** MAY BE IMPLEMENTED IN THE NEAR FUTURE. COMPONENTS ARE FREE TO **** - **** JUST HAVE THEM RETURN "OPAL_ERR_NOT_IMPLEMENTED" ****/ -/* PMI2_Job_Spawn */ -typedef int (*opal_pmix_base_module_spawn_fn_t)(int count, const char * cmds[], - int argcs[], const char ** argvs[], - const int maxprocs[], - opal_list_t *info_keyval_vector, - opal_list_t *preput_keyval_vector, - char jobId[], int jobIdSize, - int errors[]); +/************************************************************ + * SERVER APIs * + ************************************************************/ -/* PMI2_Job_Connect */ -typedef int (*opal_pmix_base_module_job_connect_fn_t)(const char jobId[]); +/* Initialize the server support library - must pass the callback + * module for the server to use, plus any attributes we want to + * pass down to it */ +typedef int (*opal_pmix_base_module_server_init_fn_t)(opal_pmix_server_module_t *module, + opal_list_t *info); -/* PMI2_Job_Disconnect */ -typedef int (*opal_pmix_base_module_job_disconnect_fn_t)(const char jobId[]); +/* Finalize the server support library */ +typedef int (*opal_pmix_base_module_server_finalize_fn_t)(void); +/* given a semicolon-separated list of input values, generate + * a regex that can be passed down to the client for parsing. + * The caller is responsible for free'ing the resulting + * string + * + * If values have leading zero's, then that is preserved. You + * have to add back any prefix/suffix for node names + * odin[009-015,017-023,076-086] + * + * "pmix:odin[009-015,017-023,076-086]" + * + * Note that the "pmix" at the beginning of each regex indicates + * that the PMIx native parser is to be used by the client for + * parsing the provided regex. Other parsers may be supported - see + * the pmix_client.h header for a list. + */ +typedef int (*opal_pmix_base_module_generate_regex_fn_t)(const char *input, char **regex); -/* register an errhandler to report loss of connection to the server */ -typedef void (*opal_pmix_base_module_register_fn_t)(opal_pmix_errhandler_fn_t errhandler); +/* The input is expected to consist of a comma-separated list + * of ranges. Thus, an input of: + * "1-4;2-5;8,10,11,12;6,7,9" + * would generate a regex of + * "[pmix:2x(3);8,10-12;6-7,9]" + * + * Note that the "pmix" at the beginning of each regex indicates + * that the PMIx native parser is to be used by the client for + * parsing the provided regex. Other parsers may be supported - see + * the pmix_client.h header for a list. + */ +typedef int (*opal_pmix_base_module_generate_ppn_fn_t)(const char *input, char **ppn); + +/* Setup the data about a particular nspace so it can + * be passed to any child process upon startup. The PMIx + * connection procedure provides an opportunity for the + * host PMIx server to pass job-related info down to a + * child process. This might include the number of + * processes in the job, relative local ranks of the + * processes within the job, and other information of + * use to the process. The server is free to determine + * which, if any, of the supported elements it will + * provide - defined values are provided in pmix_common.h. + * + * NOTE: the server must register ALL nspaces that will + * participate in collective operations with local processes. + * This means that the server must register an nspace even + * if it will not host any local procs from within that + * nspace IF any local proc might at some point perform + * a collective operation involving one or more procs from + * that nspace. This is necessary so that the collective + * operation can know when it is locally complete. + * + * The caller must also provide the number of local procs + * that will be launched within this nspace. This is required + * for the PMIx server library to correctly handle collectives + * as a collective operation call can occur before all the + * procs have been started */ +typedef int (*opal_pmix_base_module_server_register_nspace_fn_t)(opal_jobid_t jobid, + int nlocalprocs, + opal_list_t *info, + opal_pmix_op_cbfunc_t cbfunc, + void *cbdata); + +/* Deregister an nspace. Instruct the PMIx server to purge + * all info relating to the provided jobid so that memory + * can be freed. Note that the server will automatically + * purge all info relating to any clients it has from + * this nspace */ +typedef void (*opal_pmix_base_module_server_deregister_nspace_fn_t)(opal_jobid_t jobid); + +/* Register a client process with the PMIx server library. The + * expected user ID and group ID of the child process helps the + * server library to properly authenticate clients as they connect + * by requiring the two values to match. + * + * The host server can also, if it desires, provide an object + * it wishes to be returned when a server function is called + * that relates to a specific process. For example, the host + * server may have an object that tracks the specific client. + * Passing the object to the library allows the library to + * return that object when the client calls "finalize", thus + * allowing the host server to access the object without + * performing a lookup. */ +typedef int (*opal_pmix_base_module_server_register_client_fn_t)(const opal_process_name_t *proc, + uid_t uid, gid_t gid, + void *server_object, + opal_pmix_op_cbfunc_t cbfunc, + void *cbdata); + +/* Deregister a client. Instruct the PMIx server to purge + * all info relating to the provided client so that memory + * can be freed. As per above note, the server will automatically + * free all client-related data when the nspace is deregistered, + * so there is no need to call this function during normal + * finalize operations. Instead, this is provided for use + * during exception operations */ +typedef void (*opal_pmix_base_module_server_deregister_client_fn_t)(const opal_process_name_t *proc); + +/* Setup the environment of a child process to be forked + * by the host so it can correctly interact with the PMIx + * server. The PMIx client needs some setup information + * so it can properly connect back to the server. This function + * will set appropriate environmental variables for this purpose. */ +typedef int (*opal_pmix_base_module_server_setup_fork_fn_t)(const opal_process_name_t *proc, char ***env); + +/* Define a function by which the host server can request modex data + * from the local PMIx server. This is used to support the direct modex + * operation - i.e., where data is cached locally on each PMIx + * server for its own local clients, and is obtained on-demand + * for remote requests. Upon receiving a request from a remote + * server, the host server will call this function to pass the + * request into the PMIx server. The PMIx server will return a blob + * (once it becomes available) via the cbfunc - the host + * server shall send the blob back to the original requestor */ +typedef int (*opal_pmix_base_module_server_dmodex_request_fn_t)(const opal_process_name_t *proc, + opal_pmix_modex_cbfunc_t cbfunc, + void *cbdata); + +/* Report an error to a process for notification via any + * registered errhandler. The errhandler registration can be + * called by both the server and the client application. On the + * server side, the errhandler is used to report errors detected + * by PMIx to the host server for handling. On the client side, + * the errhandler is used to notify the process of errors + * reported by the server - e.g., the failure of another process. + * + * This function allows the host server to direct the server + * convenience library to notify all indicated local procs of + * an error. The error can be local, or anywhere in the cluster. + * The status indicates the error being reported. + * + * The first array of procs informs the server library as to which + * processes should be alerted - e.g., the processes that are in + * a directly-affected job or are connected to one that is affected. + * Passing a NULL for this array will indicate that all local procs + * are to be notified. + * + * The second array identifies the processes that will be impacted + * by the error. This could consist of a single process, or a number + * of processes. + * + * The info array contains any further info the RM can and/or chooses + * to provide. + * + * The callback function will be called upon completion of the + * notify_error function's actions. Note that any messages will + * have been queued, but may not have been transmitted by this + * time. Note that the caller is required to maintain the input + * data until the callback function has been executed! */ +typedef int (*opal_pmix_base_module_server_notify_error_fn_t)(int status, + opal_list_t *procs, + opal_list_t *error_procs, + opal_list_t *info, + opal_pmix_op_cbfunc_t cbfunc, void *cbdata); + + +/************************************************************ + * UTILITY APIs * + ************************************************************/ + +/* get the version of the embedded library */ +typedef const char* (*opal_pmix_base_module_get_version_fn_t)(void); + +/* Register an errhandler to report errors. Three types of errors + * can be reported: + * + * (a) those that occur within the client library, but are not + * reportable via the API itself (e.g., loss of connection to + * the server). These errors typically occur during behind-the-scenes + * non-blocking operations. + * + * (b) job-related errors such as the failure of another process in + * the job or in any connected job, impending failure of hardware + * within the job's usage footprint, etc. + * + * (c) system notifications that are made available by the local + * administrators + * + * By default, only errors that directly affect the process and/or + * any process to which it is connected (via the PMIx_Connect call) + * will be reported. Options to modify that behavior can be provided + * in the info array + * + * Both the client application and the resource manager can register + * err handlers for specific errors. PMIx client/server calls the registered + * err handler upon receiving error notify notification (via PMIx_Notify_error) + * from the other end (Resource Manager/Client application). + * + * Multiple err handlers can be registered for different errors. PMIX returns + * an integer reference to each register handler in the callback fn. The caller + * must retain the reference in order to deregister the errhandler. + * Modification of the notification behavior can be accomplished by + * deregistering the current errhandler, and then registering it + * using a new set of info values. + * + * See pmix_types.h for a description of the notification function */ +typedef void (*opal_pmix_base_module_register_fn_t)(opal_list_t *info, + opal_pmix_notification_fn_t errhandler, + opal_pmix_errhandler_reg_cbfunc_t cbfunc, + void *cbdata); + +/* deregister the errhandler + * errhandler_ref is the reference returned by PMIx for the errhandler + * to pmix_errhandler_reg_cbfunc_t */ +typedef void (*opal_pmix_base_module_deregister_fn_t)(int errhandler, + opal_pmix_op_cbfunc_t cbfunc, + void *cbdata); -/* deregister the errhandler */ -typedef void (*opal_pmix_base_module_deregister_fn_t)(void); +/* Report an error to a process for notification via any + * registered errhandler. The errhandler registration can be + * called by both the server and the client application. On the + * server side, the errhandler is used to report errors detected + * by PMIx to the host server for handling. On the client side, + * the errhandler is used to notify the process of errors + * reported by the server - e.g., the failure of another process. + * + * This function allows the host server to direct the server + * convenience library to notify all indicated local procs of + * an error. The error can be local, or anywhere in the cluster. + * The status indicates the error being reported. + * + * The client application can also call this function to notify the + * resource manager of an error it encountered. It can request the host + * server to notify the indicated processes about the error. + * + * The first array of procs informs the server library as to which + * processes should be alerted - e.g., the processes that are in + * a directly-affected job or are connected to one that is affected. + * Passing a NULL for this array will indicate that all local procs + * are to be notified. + * + * The second array identifies the processes that will be impacted + * by the error. This could consist of a single process, or a number + * of processes. + * + * The info array contains any further info the RM can and/or chooses + * to provide. + * + * The callback function will be called upon completion of the + * notify_error function's actions. Note that any messages will + * have been queued, but may not have been transmitted by this + * time. Note that the caller is required to maintain the input + * data until the callback function has been executed! +*/ +typedef int (*opal_pmix_base_module_notify_error_fn_t)(int status, + opal_list_t *procs, + opal_list_t *error_procs, + opal_list_t *info, + opal_pmix_op_cbfunc_t cbfunc, void *cbdata); + +/* store data internally, but don't push it out to be shared - this is + * intended solely for storage of info on other procs that comes thru + * a non-PMIx channel (e.g., may be computed locally) but is desired + * to be available via a PMIx_Get call */ +typedef int (*opal_pmix_base_module_store_fn_t)(const opal_process_name_t *proc, + opal_value_t *val); + +/* retrieve the nspace corresponding to a given jobid */ +typedef const char* (*opal_pmix_base_module_get_nspace_fn_t)(opal_jobid_t jobid); + +/* register a jobid-to-nspace pair */ +typedef void (*opal_pmix_base_module_register_jobid_fn_t)(opal_jobid_t jobid, const char *nspace); /* * the standard public API data structure */ typedef struct { - /* currently used APIs */ - opal_pmix_base_module_init_fn_t init; - opal_pmix_base_module_fini_fn_t finalize; - opal_pmix_base_module_initialized_fn_t initialized; - opal_pmix_base_module_abort_fn_t abort; - opal_pmix_base_module_fence_fn_t fence; - opal_pmix_base_module_fence_nb_fn_t fence_nb; - opal_pmix_base_module_put_fn_t put; - opal_pmix_base_module_get_fn_t get; - opal_pmix_base_module_get_nb_fn_t get_nb; - opal_pmix_base_module_publish_fn_t publish; - opal_pmix_base_module_lookup_fn_t lookup; - opal_pmix_base_module_unpublish_fn_t unpublish; - opal_pmix_base_module_get_attr_fn_t get_attr; - opal_pmix_base_module_get_attr_nb_fn_t get_attr_nb; - /* currently unused APIs */ - opal_pmix_base_module_spawn_fn_t spawn; - opal_pmix_base_module_job_connect_fn_t job_connect; - opal_pmix_base_module_job_disconnect_fn_t job_disconnect; - /* register the errhandler */ - opal_pmix_base_module_register_fn_t register_errhandler; - opal_pmix_base_module_deregister_fn_t deregister_errhandler; + /* client APIs */ + opal_pmix_base_module_init_fn_t init; + opal_pmix_base_module_fini_fn_t finalize; + opal_pmix_base_module_initialized_fn_t initialized; + opal_pmix_base_module_abort_fn_t abort; + opal_pmix_base_module_commit_fn_t commit; + opal_pmix_base_module_fence_fn_t fence; + opal_pmix_base_module_fence_nb_fn_t fence_nb; + opal_pmix_base_module_put_fn_t put; + opal_pmix_base_module_get_fn_t get; + opal_pmix_base_module_get_nb_fn_t get_nb; + opal_pmix_base_module_publish_fn_t publish; + opal_pmix_base_module_publish_nb_fn_t publish_nb; + opal_pmix_base_module_lookup_fn_t lookup; + opal_pmix_base_module_lookup_nb_fn_t lookup_nb; + opal_pmix_base_module_unpublish_fn_t unpublish; + opal_pmix_base_module_unpublish_nb_fn_t unpublish_nb; + opal_pmix_base_module_spawn_fn_t spawn; + opal_pmix_base_module_spawn_nb_fn_t spawn_nb; + opal_pmix_base_module_connect_fn_t connect; + opal_pmix_base_module_connect_nb_fn_t connect_nb; + opal_pmix_base_module_disconnect_fn_t disconnect; + opal_pmix_base_module_disconnect_nb_fn_t disconnect_nb; + opal_pmix_base_module_resolve_peers_fn_t resolve_peers; + opal_pmix_base_module_resolve_nodes_fn_t resolve_nodes; + /* server APIs */ + opal_pmix_base_module_server_init_fn_t server_init; + opal_pmix_base_module_server_finalize_fn_t server_finalize; + opal_pmix_base_module_generate_regex_fn_t generate_regex; + opal_pmix_base_module_generate_ppn_fn_t generate_ppn; + opal_pmix_base_module_server_register_nspace_fn_t server_register_nspace; + opal_pmix_base_module_server_deregister_nspace_fn_t server_deregister_nspace; + opal_pmix_base_module_server_register_client_fn_t server_register_client; + opal_pmix_base_module_server_deregister_client_fn_t server_deregister_client; + opal_pmix_base_module_server_setup_fork_fn_t server_setup_fork; + opal_pmix_base_module_server_dmodex_request_fn_t server_dmodex_request; + opal_pmix_base_module_server_notify_error_fn_t server_notify_error; + /* Utility APIs */ + opal_pmix_base_module_get_version_fn_t get_version; + opal_pmix_base_module_register_fn_t register_errhandler; + opal_pmix_base_module_deregister_fn_t deregister_errhandler; + opal_pmix_base_module_notify_error_fn_t notify_error; + opal_pmix_base_module_store_fn_t store_local; + opal_pmix_base_module_get_nspace_fn_t get_nspace; + opal_pmix_base_module_register_jobid_fn_t register_jobid; } opal_pmix_base_module_t; typedef struct { @@ -457,9 +856,6 @@ typedef struct { /* Global structure for accessing store functions */ OPAL_DECLSPEC extern opal_pmix_base_module_t opal_pmix; /* holds base function pointers */ -/* flag to indicate collective vs direct fence operations */ -OPAL_DECLSPEC extern bool opal_pmix_use_collective; - END_C_DECLS #endif diff --git a/opal/mca/pmix/pmix112/Makefile.am b/opal/mca/pmix/pmix112/Makefile.am new file mode 100644 index 00000000000..84ed3f35ed6 --- /dev/null +++ b/opal/mca/pmix/pmix112/Makefile.am @@ -0,0 +1,53 @@ +# +# Copyright (c) 2014-2015 Intel, Inc. All rights reserved. +# Copyright (c) 2015 Cisco Systems, Inc. All rights reserved. +# Copyright (c) 2015-2016 Research Organization for Information Science +# and Technology (RIST). All rights reserved. +# $COPYRIGHT$ +# +# Additional copyrights may follow +# +# $HEADER$ +# + +EXTRA_DIST = autogen.subdirs + +SUBDIRS = pmix + +sources = \ + pmix1.h \ + pmix_pmix1_component.c \ + pmix_pmix1.c \ + pmix1_client.c \ + pmix1_server_south.c \ + pmix1_server_north.c + +# Make the output library in this directory, and name it either +# mca__.la (for DSO builds) or libmca__.la +# (for static builds). + +if MCA_BUILD_opal_pmix_pmix112_DSO +component_noinst = +component_install = mca_pmix_pmix112.la +else +component_noinst = libmca_pmix_pmix112.la +component_install = +endif + +mcacomponentdir = $(opallibdir) +mcacomponent_LTLIBRARIES = $(component_install) +mca_pmix_pmix112_la_SOURCES = $(sources) +mca_pmix_pmix112_la_CFLAGS = $(opal_pmix_pmix112_CFLAGS) +mca_pmix_pmix112_la_CPPFLAGS = \ + -I$(srcdir)/pmix/include $(opal_pmix_pmix112_CPPFLAGS) +mca_pmix_pmix112_la_LDFLAGS = -module -avoid-version $(opal_pmix_pmix112_LDFLAGS) +mca_pmix_pmix112_la_LIBADD = $(opal_pmix_pmix112_LIBS) +mca_pmix_pmix112_la_DEPENDENCIES = $(opal_pmix_pmix112_LIBS) + +noinst_LTLIBRARIES = $(component_noinst) +libmca_pmix_pmix112_la_SOURCES =$(sources) +libmca_pmix_pmix112_la_CFLAGS = $(opal_pmix_pmix112_CFLAGS) +libmca_pmix_pmix112_la_CPPFLAGS = -I$(srcdir)/pmix/include $(opal_pmix_pmix112_CPPFLAGS) +libmca_pmix_pmix112_la_LDFLAGS = -module -avoid-version $(opal_pmix_pmix112_LDFLAGS) +libmca_pmix_pmix112_la_LIBADD = $(opal_pmix_pmix112_LIBS) +libmca_pmix_pmix112_la_DEPENDENCIES = $(opal_pmix_pmix112_LIBS) diff --git a/opal/mca/pmix/pmix112/README b/opal/mca/pmix/pmix112/README new file mode 100644 index 00000000000..472cd8294b9 --- /dev/null +++ b/opal/mca/pmix/pmix112/README @@ -0,0 +1,17 @@ +Copyright (c) 2017 IBM Corporation. All rights reserved. +$COPYRIGHT$ + +Additional copyrights may follow + +$HEADER$ + +=========================================================================== +This internal component includes PMIx v1.2.1 plus following commits +cherry-picked from the PMIx v1.2 branch (which will be included in the +eventual PMIx v1.2.2 release). Newer patches at top. + + * https://github.com/pmix/pmix/commit/1b86a6e7ee99fc5969a0789a9905a4a2159a6dc0 + * https://github.com/pmix/pmix/commit/14f865c4b631827fb99779d42eaf0567f117a76f + * https://github.com/pmix/pmix/commit/4269e8484bd883523ae485bf2e1b7bbc0719c494 + * https://github.com/pmix/pmix/commit/a2d431cbec162b01e15920cc75df1af9ad244f06 + * https://github.com/pmix/pmix/commit/8587f278a17301633ccf6f0d7cb086e3be8f4793 diff --git a/opal/mca/pmix/pmix112/autogen.subdirs b/opal/mca/pmix/pmix112/autogen.subdirs new file mode 100644 index 00000000000..f4fd6e846e1 --- /dev/null +++ b/opal/mca/pmix/pmix112/autogen.subdirs @@ -0,0 +1 @@ +pmix diff --git a/opal/mca/pmix/pmix112/configure.m4 b/opal/mca/pmix/pmix112/configure.m4 new file mode 100644 index 00000000000..394bc440c82 --- /dev/null +++ b/opal/mca/pmix/pmix112/configure.m4 @@ -0,0 +1,94 @@ +# -*- shell-script -*- +# +# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana +# University Research and Technology +# Corporation. All rights reserved. +# Copyright (c) 2004-2005 The University of Tennessee and The University +# of Tennessee Research Foundation. All rights +# reserved. +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# University of Stuttgart. All rights reserved. +# Copyright (c) 2004-2005 The Regents of the University of California. +# All rights reserved. +# Copyright (c) 2011-2013 Los Alamos National Security, LLC. +# All rights reserved. +# Copyright (c) 2010-2016 Cisco Systems, Inc. All rights reserved. +# Copyright (c) 2013-2015 Intel, Inc. All rights reserved. +# Copyright (c) 2015-2016 Research Organization for Information Science +# and Technology (RIST). All rights reserved. +# Copyright (c) 2016 IBM Corporation. All rights reserved. +# $COPYRIGHT$ +# +# Additional copyrights may follow +# +# $HEADER$ +# + +# MCA_pmix_pmix112_CONFIG([action-if-found], [action-if-not-found]) +# ----------------------------------------------------------- +AC_DEFUN([MCA_opal_pmix_pmix112_CONFIG],[ + AC_CONFIG_FILES([opal/mca/pmix/pmix112/Makefile]) + + OPAL_VAR_SCOPE_PUSH([PMIX_VERSION opal_pmix_pmix112_save_CPPFLAGS opal_pmix_pmix112_save_LDFLAGS opal_pmix_pmix112_save_LIBS opal_pmix_pmix112_basedir opal_pmix_pmix112_save_cflags]) + + PMIX_VERSION= + opal_pmix_pmix112_basedir=opal/mca/pmix/pmix112 + + opal_pmix_pmix112_save_CFLAGS=$CFLAGS + opal_pmix_pmix112_save_CPPFLAGS=$CPPFLAGS + opal_pmix_pmix112_save_LDFLAGS=$LDFLAGS + opal_pmix_pmix112_save_LIBS=$LIBS + + AC_ARG_ENABLE([pmix-dstore], + [AC_HELP_STRING([--enable-pmix-dstore], + [Enable PMIx shared memory data store (default: enabled)])]) + AC_MSG_CHECKING([if PMIx shared memory data store is enabled]) + if test "$enable_pmix_dstore" != "no"; then + AC_MSG_RESULT([yes]) + opal_pmix_pmix_sm_flag=--enable-dstore + else + AC_MSG_RESULT([no (disabled)]) + opal_pmix_pmix_sm_flag=--disable-dstore + fi + + opal_pmix_pmix112_args="--enable-embedded-mode --with-pmix-symbol-prefix=opal_pmix_pmix112_ $opal_pmix_pmix_sm_flag --with-libevent-header=\\\"opal/mca/event/$opal_event_base_include\\\" --with-hwloc-header=\\\"$opal_hwloc_base_include\\\"" + AS_IF([test "$enable_debug" = "yes"], + [opal_pmix_pmix112_args="--enable-debug $opal_pmix_pmix112_args" + CFLAGS="$OPAL_CFLAGS_BEFORE_PICKY $OPAL_VISIBILITY_CFLAGS -g"], + [opal_pmix_pmix112_args="--disable-debug $opal_pmix_pmix112_args" + CFLAGS="$OPAL_CFLAGS_BEFORE_PICKY $OPAL_VISIBILITY_CFLAGS"]) + CPPFLAGS="-I$OPAL_TOP_SRCDIR -I$OPAL_TOP_BUILDDIR -I$OPAL_TOP_SRCDIR/opal/include -I$OPAL_TOP_BUILDDIR/opal/include $CPPFLAGS" + + OPAL_CONFIG_SUBDIR([$opal_pmix_pmix112_basedir/pmix], + [$opal_pmix_pmix112_args $opal_subdir_args 'CFLAGS=$CFLAGS' 'CPPFLAGS=$CPPFLAGS'], + [opal_pmix_pmix112_happy=1], [opal_pmix_pmix112_happy=0]) + + AS_IF([test $opal_pmix_pmix112_happy -eq 1], + [PMIX_VERSION="internal v`$srcdir/$opal_pmix_pmix112_basedir/pmix/config/pmix_get_version.sh $srcdir/$opal_pmix_pmix112_basedir/pmix/VERSION`" + # Build flags for our Makefile.am + opal_pmix_pmix112_LIBS='$(OPAL_TOP_BUILDDIR)/'"$opal_pmix_pmix112_basedir"'/pmix/libpmix.la' + opal_pmix_pmix112_CPPFLAGS='-I$(OPAL_TOP_BUILDDIR)/opal/mca/pmix/pmix112/pmix/include/pmix -I$(OPAL_TOP_BUILDDIR)/opal/mca/pmix/pmix112/pmix/include -I$(OPAL_TOP_BUILDDIR)/opal/mca/pmix/pmix112/pmix -I$(OPAL_TOP_SRCDIR)/opal/mca/pmix/pmix112/pmix' + AC_SUBST([opal_pmix_pmix112_LIBS]) + AC_SUBST([opal_pmix_pmix112_CPPFLAGS])]) + + CFLAGS=$opal_pmix_pmix112_save_CFLAGS + CPPFLAGS=$opal_pmix_pmix112_save_CPPFLAGS + LDFLAGS=$opal_pmix_pmix112_save_LDFLAGS + LIBS=$opal_pmix_pmix112_save_LIBS + + # If we are not building the internal pmix, then indicate that + # this component should not be built. NOTE: we still did all the + # above configury so that all the proper GNU Autotools + # infrastructure is setup properly (e.g., w.r.t. SUBDIRS=pmix in + # this directory's Makefile.am, we still need the Autotools "make + # distclean" infrastructure to work properly). + AS_IF([test "$opal_external_pmix_happy" = "yes"], + [AC_MSG_WARN([using an external pmix; disqualifying this component]) + opal_pmix_pmix112_happy=0]) + + AS_IF([test $opal_pmix_pmix112_happy -eq 1], + [$1], + [$2]) + + OPAL_VAR_SCOPE_POP +])dnl diff --git a/opal/mca/pmix/pmix112/pmix/INSTALL b/opal/mca/pmix/pmix112/pmix/INSTALL new file mode 100644 index 00000000000..005301463ff --- /dev/null +++ b/opal/mca/pmix/pmix112/pmix/INSTALL @@ -0,0 +1,88 @@ +Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana + University Research and Technology + Corporation. All rights reserved. +Copyright (c) 2004-2005 The University of Tennessee and The University + of Tennessee Research Foundation. All rights + reserved. +Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + University of Stuttgart. All rights reserved. +Copyright (c) 2004-2005 The Regents of the University of California. + All rights reserved. +Copyright (c) 2008-2015 Cisco Systems, Inc. All rights reserved. +Copyright (c) 2013-2015 Intel, Inc. All rights reserved. +$COPYRIGHT$ + +Additional copyrights may follow + +$HEADER$ + + +For More Information +==================== + +This file is a *very* short overview of building and installing +the PMIx library. Much more information is available on the +PMIx web site (e.g., see the FAQ section): + + http://pmix.github.io/pmix/master + + +Developer Builds +================ + +If you have checked out a DEVELOPER'S COPY of PMIx (i.e., you checked +out from Git), you should read the HACKING file before attempting to +build PMIx. You must then run: + +shell$ ./autogen.sh + +You will need very recent versions of GNU Autoconf, Automake, and +Libtool. If autogen.sh fails, read the HACKING file. If anything +else fails, read the HACKING file. Finally, we suggest reading the +HACKING file. + +*** NOTE: Developer's copies of PMIx typically include a large +performance penalty at run-time because of extra debugging overhead. + + +User Builds +=========== + +Building PMIx is typically a combination of running "configure" +and "make". Execute the following commands to install the PMIx +system from within the directory at the top of the tree: + +shell$ ./configure --prefix=/where/to/install +[...lots of output...] +shell$ make all install + +If you need special access to install, then you can execute "make +all" as a user with write permissions in the build tree, and a +separate "make install" as a user with write permissions to the +install tree. + +Compiling support for specific compilers and environments may +require additional command line flags when running configure. See the +README file for more details. Note that VPATH builds are fully +supported. For example: + +shell$ gtar zxf pmix-X.Y.Z.tar.gz +shell$ cd pmix-X.Y.Z +shell$ mkdir build +shell$ cd build +shell$ ../configure ...your options... +[...lots of output...] +shell$ make all install + +Parallel builds are also supported (although some versions of "make", +such as GNU make, will only use the first target listed on the command +line when executable parallel builds). For example (assume GNU make): + +shell$ make -j 4 all +[...lots of output...] +shell$ make install + +Parallel make is generally only helpful in the build phase; the +installation process is mostly serial and does not benefit much from +parallel make. + diff --git a/opal/mca/pmix/pmix112/pmix/LICENSE b/opal/mca/pmix/pmix112/pmix/LICENSE new file mode 100644 index 00000000000..f9e6f047910 --- /dev/null +++ b/opal/mca/pmix/pmix112/pmix/LICENSE @@ -0,0 +1,93 @@ +Most files in this release are marked with the copyrights of the +organizations who have edited them. The copyrights below are in no +particular order and generally reflect members of the Open MPI core +team who have contributed code that may or may not have been ported +to PMIx. Per the terms of that LICENSE, we include the list here. +The copyrights for code used under license from other parties +are included in the corresponding files. + +Copyright (c) 2004-2010 The Trustees of Indiana University and Indiana + University Research and Technology + Corporation. All rights reserved. +Copyright (c) 2004-2010 The University of Tennessee and The University + of Tennessee Research Foundation. All rights + reserved. +Copyright (c) 2004-2010 High Performance Computing Center Stuttgart, + University of Stuttgart. All rights reserved. +Copyright (c) 2004-2008 The Regents of the University of California. + All rights reserved. +Copyright (c) 2006-2010 Los Alamos National Security, LLC. All rights + reserved. +Copyright (c) 2006-2010 Cisco Systems, Inc. All rights reserved. +Copyright (c) 2006-2010 Voltaire, Inc. All rights reserved. +Copyright (c) 2006-2011 Sandia National Laboratories. All rights reserved. +Copyright (c) 2006-2010 Sun Microsystems, Inc. All rights reserved. + Use is subject to license terms. +Copyright (c) 2006-2010 The University of Houston. All rights reserved. +Copyright (c) 2006-2009 Myricom, Inc. All rights reserved. +Copyright (c) 2007-2008 UT-Battelle, LLC. All rights reserved. +Copyright (c) 2007-2010 IBM Corporation. All rights reserved. +Copyright (c) 1998-2005 Forschungszentrum Juelich, Juelich Supercomputing + Centre, Federal Republic of Germany +Copyright (c) 2005-2008 ZIH, TU Dresden, Federal Republic of Germany +Copyright (c) 2007 Evergrid, Inc. All rights reserved. +Copyright (c) 2008 Chelsio, Inc. All rights reserved. +Copyright (c) 2008-2009 Institut National de Recherche en + Informatique. All rights reserved. +Copyright (c) 2007 Lawrence Livermore National Security, LLC. + All rights reserved. +Copyright (c) 2007-2009 Mellanox Technologies. All rights reserved. +Copyright (c) 2006-2010 QLogic Corporation. All rights reserved. +Copyright (c) 2008-2010 Oak Ridge National Labs. All rights reserved. +Copyright (c) 2006-2010 Oracle and/or its affiliates. All rights reserved. +Copyright (c) 2009 Bull SAS. All rights reserved. +Copyright (c) 2010 ARM ltd. All rights reserved. +Copyright (c) 2010-2011 Alex Brick . All rights reserved. +Copyright (c) 2012 The University of Wisconsin-La Crosse. All rights + reserved. +Copyright (c) 2013-2014 Intel, Inc. All rights reserved. +Copyright (c) 2011-2014 NVIDIA Corporation. All rights reserved. + +$COPYRIGHT$ + +Additional copyrights may follow + +$HEADER$ + +The following LICENSE pertains to both PMIx and any code ported +from Open MPI. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + +- Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + +- Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer listed + in this license in the documentation and/or other materials + provided with the distribution. + +- Neither the name of the copyright holders nor the names of its + contributors may be used to endorse or promote products derived from + this software without specific prior written permission. + +The copyright holders provide no reassurances that the source code +provided does not infringe any patent, copyright, or any other +intellectual property rights of third parties. The copyright holders +disclaim any liability to any recipient for claims brought against +recipient by any third party for infringement of that parties +intellectual property rights. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/opal/mca/pmix/pmix112/pmix/Makefile.am b/opal/mca/pmix/pmix112/pmix/Makefile.am new file mode 100644 index 00000000000..500240b5916 --- /dev/null +++ b/opal/mca/pmix/pmix112/pmix/Makefile.am @@ -0,0 +1,107 @@ +# +# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana +# University Research and Technology +# Corporation. All rights reserved. +# Copyright (c) 2004-2005 The University of Tennessee and The University +# of Tennessee Research Foundation. All rights +# reserved. +# Copyright (c) 2004-2009 High Performance Computing Center Stuttgart, +# University of Stuttgart. All rights reserved. +# Copyright (c) 2004-2005 The Regents of the University of California. +# All rights reserved. +# Copyright (c) 2006-2015 Cisco Systems, Inc. All rights reserved. +# Copyright (c) 2012-2013 Los Alamos National Security, Inc. All rights reserved. +# Copyright (c) 2013-2016 Intel, Inc. All rights reserved +# $COPYRIGHT$ +# +# Additional copyrights may follow +# +# $HEADER$ +# + +# Note that the -I directory must *exactly* match what was specified +# via AC_CONFIG_MACRO_DIR in configure.ac. +ACLOCAL_AMFLAGS = -I ./config + +headers = +sources = +nodist_headers = +EXTRA_DIST = + +# Only install the valgrind suppressions file if we're building in +# standalone mode +dist_pmixdata_DATA = +if ! PMIX_EMBEDDED_MODE +dist_pmixdata_DATA += contrib/pmix-valgrind.supp + + +man_MANS = \ + man/man3/pmix_init.3 \ + man/man3/pmix_finalize.3 \ + man/man3/pmix_initialized.3 \ + man/man3/pmix_abort.3 \ + man/man3/pmix_put.3 \ + man/man3/pmix_commit.3 \ + man/man3/pmix_fence.3 \ + man/man3/pmix_get.3 \ + man/man7/pmix.7 \ + man/man7/pmix_constants.7 +endif + +include config/Makefile.am +include include/Makefile.am +include src/class/Makefile.am +include src/include/Makefile.am +include src/buffer_ops/Makefile.am +include src/util/Makefile.am +include src/usock/Makefile.am +include src/client/Makefile.am +include src/server/Makefile.am +include src/sec/Makefile.am +include src/common/Makefile.am + +if WANT_DSTORE +include src/sm/Makefile.am +include src/dstore/Makefile.am +endif + +if PMIX_EMBEDDED_MODE +noinst_LTLIBRARIES = libpmix.la +libpmix_la_SOURCES = $(headers) $(sources) +libpmix_la_LDFLAGS = + +else + +lib_LTLIBRARIES = libpmix.la +libpmix_la_SOURCES = $(headers) $(sources) +libpmix_la_LDFLAGS = -version-info $(libpmix_so_version) +SUBDIRS = . test examples +endif + +if WANT_INSTALL_HEADERS +pmixdir = $(pmixincludedir)/$(subdir) +nobase_pmix_HEADERS = $(headers) + +else + +noinst_HEADERS = $(headers) +endif + +nroff: + @for file in $(man_MANS); do \ + source=`echo $$file | sed -e 's@/man[0-9]@@'`; \ + contrib/md2nroff.pl --source=$$source.md; \ + done + +EXTRA_DIST += README INSTALL VERSION LICENSE autogen.sh \ + config/pmix_get_version.sh $(man_MANS) \ + contrib/platform/optimized \ + test/test_common.h test/cli_stages.h \ + test/server_callbacks.h test/test_fence.h \ + test/test_publish.h test/test_resolve_peers.h \ + test/test_spawn.h test/utils.h test/test_cd.h + + +dist-hook: + env LS_COLORS= sh "$(top_srcdir)/config/distscript.sh" "$(top_srcdir)" "$(distdir)" "$(PMIX_VERSION)" "$(PMIX_REPO_REV)" + diff --git a/opal/mca/pmix/pmix112/pmix/NEWS b/opal/mca/pmix/pmix112/pmix/NEWS new file mode 100644 index 00000000000..1acf9297d0e --- /dev/null +++ b/opal/mca/pmix/pmix112/pmix/NEWS @@ -0,0 +1,122 @@ +Copyright (c) 2015-2016 Intel, Inc. All rights reserved. +Copyright (c) 2016-2017 IBM Corporation. All rights reserved. +$COPYRIGHT$ + +Additional copyrights may follow + +$HEADER$ + +=========================================================================== + +This file contains the main features as well as overviews of specific +bug fixes (and other actions) for each version of PMIx since +version 1.0. + +As more fully described in the "Software Version Number" section in +the README file, PMIx typically maintains two separate version +series simultaneously - the current release and one that is locked +to only bug fixes. Since these series are semi-independent of each +other, a single NEWS-worthy item might apply to different series. For +example, a bug might be fixed in the master, and then moved to the +current release as well as the "stable" bug fix release branch. + +1.2.1 +----- +- dstore: Fix data corruption bug in key overwrite cases +- dstore: Performance and scalability fixes +- sm: Use posix_fallocate() before mmap +- pmi1/pmi2: Restore support +- dstore: Fix extension slot size allocation (Issue #280) + + +1.2.0 +----- +- Add shared memory data storage (dstore) option. Default: enabled + Configure option: --disable-dstore +- PMIx_Commit performance improvements +- Disable errhandler support +- Keep job info in the shared memory dstore +- PMIx_Get performance and memory improvements + + + +1.1.5 +----- +- Add pmix_version.h to support direct detection of PMIx library version +- Fix support for Solaris 10 by using abstract version of strnlen +- Fix native security module for Solaris by using getpeerucred in + that environment +- Ensure man pages don't get installed in embedded builds +- Pass temporary directory locations in info keys instead of + the environment + + +1.1.4 +----- +- Properly increment the reference count for PMIx_Init +- Fix examples so all run properly +- Fix/complete PMI2 backward compatibility support to handle + keys that are not associated with a specific rank +- Do a better job of hiding non-API symbols +- Correct handling of semi-colon terminations on macros. + Thanks to Ashley Pittman for the patch +- Add more man pages +- Improve error checking and messages for connection + attempts from client to server +- If the tmpdir name is too long, provide an appropriate + help message to the user (particularly relevant on + Mac OSX). Thanks to Rainer Keller for the patch. +- Fix some C++ compatibility issues +- Fix/complete PMI-1 backward compatibility support +- Do not install internal headers unless specifically + requested to do so +- Add support for multiple calls to Put/Commit +- Silence some "return code unchecked" warnings. Thanks + to Jim Garlick for pointing them out +- Resolve a race condition during register_clients + + +1.1.3 +----- +- Update the symbol hiding file to cover all symbols +- Fix examples and test directory Makefile.am's so + the Makefiles are automatically built and the + code compiled, but not installed +- Do not install the pmix library in embedded use-cases + + +1.1.2 +----- +- Provide a check for hwloc support - if not found, then + don't pass any topology info down to the client as it + won't know how to unpack it anyway. +- Fix a few places where thread safety wasn't provided +- Fix several issues identified by Paul Hargrove: + * PMIx_Init(NULL) is supported + * Incomplete PMIx_constants man page had some lingering cruft + * Missing prototype for pmix_value_load +- Fix race condition in PMIx_Get/PMIx_Get_nb +- Fix double-free error in pmix_server_commit. +- Fix PMIX_LOAD_BUFFER to be safe. + + +1.1.1 +----- +- Fix an issue where the example and test programs + were incorrectly being installed. Thanks to Orion + Poplawski for reporting it + + +1.1.0 +----- +- major update of APIs to reflect comments received from 1.0.0 + non-production release +- fixed thread-safety issues +- fixed a range of pack/unpack issues +- added unit tests for all APIs + + +1.0.0 +------ +Initial public release of draft APIs for comment - not production +intended diff --git a/opal/mca/pmix/pmix112/pmix/README b/opal/mca/pmix/pmix112/pmix/README new file mode 100644 index 00000000000..55b7c61f5e3 --- /dev/null +++ b/opal/mca/pmix/pmix112/pmix/README @@ -0,0 +1,315 @@ +Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana + University Research and Technology + Corporation. All rights reserved. +Copyright (c) 2004-2007 The University of Tennessee and The University + of Tennessee Research Foundation. All rights + reserved. +Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, + University of Stuttgart. All rights reserved. +Copyright (c) 2004-2007 The Regents of the University of California. + All rights reserved. +Copyright (c) 2006-2015 Cisco Systems, Inc. All rights reserved. +Copyright (c) 2006-2011 Mellanox Technologies. All rights reserved. +Copyright (c) 2006-2012 Oracle and/or its affiliates. All rights reserved. +Copyright (c) 2007 Myricom, Inc. All rights reserved. +Copyright (c) 2008 IBM Corporation. All rights reserved. +Copyright (c) 2010 Oak Ridge National Labs. All rights reserved. +Copyright (c) 2011 University of Houston. All rights reserved. +Copyright (c) 2013-2015 Intel, Inc. All rights reserved +$COPYRIGHT$ + +Additional copyrights may follow + +$HEADER$ + +=========================================================================== + +When submitting questions and problems, be sure to include as much +extra information as possible. This web page details all the +information that we request in order to provide assistance: + + http://pmix.github.io/master/community/help/ + +The best way to report bugs, send comments, or ask questions is to +sign up on the PMIx mailing list, which is hosted by GoogleGroups: + + pmix@googlegroups.com + +Because of spam, only subscribers are allowed to post to this list +(ensure that you subscribe with and post from exactly the same e-mail +address -- joe@example.com is considered different than +joe@mycomputer.example.com!). You can subscribe to the list here: + + https://groups.google.com/d/forum/pmix + +Thanks for your time. + +=========================================================================== + +More information is available in the PMIx FAQ: + + http://pmix.github.io/master/faq/ + +We are in early days, so please be patient - info will grow as questions +are addressed. + +=========================================================================== + +The following abbreviated list of release notes applies to this code +base as of this writing (12 November 2015): + +General notes +------------- + +- The majority of PMIx's documentation is here in this file, the + included man pages, and on the web site FAQ + (http://pmix.github.io/master/faq). This will eventually be + supplemented with cohesive installation and user documentation files. + +- Systems that have been tested are: + - Linux (various flavors/distros), 32 bit, with gcc + - Linux (various flavors/distros), 64 bit (x86), with gcc, Intel, + and Portland (*) + - OS X (10.7 and above), 32 and 64 bit (x86_64), with gcc (*) + +(*) Compiler Notes +-------------- + +- The Portland Group compilers prior to version 7.0 require the + "-Msignextend" compiler flag to extend the sign bit when converting + from a shorter to longer integer. This is is different than other + compilers (such as GNU). When compiling PMIx with the Portland + compiler suite, the following flags should be passed to PMIx's + configure script: + + shell$ ./configure CFLAGS=-Msignextend ... + + This will compile PMIx with the proper compile flags + +- Running on nodes with different endian and/or different datatype + sizes within a single parallel job is supported in this release. + However, PMIx does not resize data when datatypes differ in size + (for example, sending a 4 byte double and receiving an 8 byte + double will fail). + + +=========================================================================== + +Building PMIx +----------------- + +PMIx uses a traditional configure script paired with "make" to +build. Typical installs can be of the pattern: + +--------------------------------------------------------------------------- +shell$ ./configure [...options...] +shell$ make all install +--------------------------------------------------------------------------- + +There are many available configure options (see "./configure --help" +for a full list); a summary of the more commonly used ones follows: + +INSTALLATION OPTIONS + +--prefix= + Install PMIx into the base directory named . Hence, + PMIx will place its executables in /bin, its header + files in /include, its libraries in /lib, etc. + +--disable-shared + By default, libpmix is built as a shared library. This switch disables + this default; it is really only useful when used with + --enable-static. Specifically, this option does *not* imply + --enable-static; enabling static libraries and disabling shared + libraries are two independent options. + +--enable-static + Build libpmix as a static library. Note that this option does *not* imply + --disable-shared; enabling static libraries and disabling shared + libraries are two independent options. + +--with-platform=FILE + Load configure options for the build from FILE. Options on the + command line that are not in FILE are also used. Options on the + command line and in FILE are replaced by what is in FILE. + +Once PMIx has been built and installed, it is safe to run "make +clean" and/or remove the entire build tree. + +VPATH and parallel builds are fully supported. + +Generally speaking, the only thing that users need to do to use PMIx +is ensure that /lib is in their LD_LIBRARY_PATH. Users may +need to ensure to set LD_LIBRARY_PATH in their shell setup files (e.g., +.bashrc, .cshrc) so that non-interactive rsh/ssh-based logins will +be able to find the PMIx library. + +=========================================================================== + +PMIx Version Numbers and Binary Compatibility +------------------------------------------------- + +PMIx has two sets of version numbers that are likely of interest +to end users / system administrator: + + * Software version number + * Shared library version numbers + +Both are described below, followed by a discussion of application +binary interface (ABI) compatibility implications. + +Software Version Number +----------------------- + +PMIx's version numbers are the union of several different values: +major, minor, release, and an optional quantifier. + + * Major: The major number is the first integer in the version string + (e.g., v1.2.3). Changes in the major number typically indicate a + significant change in the code base and/or end-user + functionality. The major number is always included in the version + number. + + * Minor: The minor number is the second integer in the version + string (e.g., v1.2.3). Changes in the minor number typically + indicate a incremental change in the code base and/or end-user + functionality. The minor number is always included in the version + number: + + * Release: The release number is the third integer in the version + string (e.g., v1.2.3). Changes in the release number typically + indicate a bug fix in the code base and/or end-user + functionality. + + * Quantifier: PMIx version numbers sometimes have an arbitrary + string affixed to the end of the version number. Common strings + include: + + o aX: Indicates an alpha release. X is an integer indicating + the number of the alpha release (e.g., v1.2.3a5 indicates the + 5th alpha release of version 1.2.3). + o bX: Indicates a beta release. X is an integer indicating + the number of the beta release (e.g., v1.2.3b3 indicates the 3rd + beta release of version 1.2.3). + o rcX: Indicates a release candidate. X is an integer + indicating the number of the release candidate (e.g., v1.2.3rc4 + indicates the 4th release candidate of version 1.2.3). + +Although the major, minor, and release values (and optional +quantifiers) are reported in PMIx nightly snapshot tarballs, the +filenames of these snapshot tarballs follow a slightly different +convention. + +Specifically, the snapshot tarball filename contains three distinct +values: + + * Most recent Git tag name on the branch from which the tarball was + created. + + * An integer indicating how many Git commits have occurred since + that Git tag. + + * The Git hash of the tip of the branch. + +For example, a snapshot tarball filename of +"pmix-v1.0.2-57-gb9f1fd9.tar.bz2" indicates that this tarball was +created from the v1.0 branch, 57 Git commits after the "v1.0.2" tag, +specifically at Git hash gb9f1fd9. + +PMIx's Git master branch contains a single "dev" tag. For example, +"pmix-dev-8-gf21c349.tar.bz2" represents a snapshot tarball created +from the master branch, 8 Git commits after the "dev" tag, +specifically at Git hash gf21c349. + +The exact value of the "number of Git commits past a tag" integer is +fairly meaningless; its sole purpose is to provide an easy, +human-recognizable ordering for snapshot tarballs. + +Shared Library Version Number +----------------------------- + +PMIx uses the GNU Libtool shared library versioning scheme. + +NOTE: Only official releases of PMIx adhere to this versioning + scheme. "Beta" releases, release candidates, and nightly + tarballs, developer snapshots, and Git snapshot tarballs likely + will all have arbitrary/meaningless shared library version + numbers. + +The GNU Libtool official documentation details how the versioning +scheme works. The quick version is that the shared library versions +are a triple of integers: (current,revision,age), or "c:r:a". This +triple is not related to the PMIx software version number. There +are six simple rules for updating the values (taken almost verbatim +from the Libtool docs): + + 1. Start with version information of "0:0:0" for each shared library. + + 2. Update the version information only immediately before a public + release of your software. More frequent updates are unnecessary, + and only guarantee that the current interface number gets larger + faster. + + 3. If the library source code has changed at all since the last + update, then increment revision ("c:r:a" becomes "c:r+1:a"). + + 4. If any interfaces have been added, removed, or changed since the + last update, increment current, and set revision to 0. + + 5. If any interfaces have been added since the last public release, + then increment age. + + 6. If any interfaces have been removed since the last public release, + then set age to 0. + +Application Binary Interface (ABI) Compatibility +------------------------------------------------ + +PMIx provides forward ABI compatibility in all versions of a given +feature release series and its corresponding +super stable series. For example, on a single platform, an pmix +application linked against PMIx v1.3.2 shared libraries can be +updated to point to the shared libraries in any successive v1.3.x or +v1.4 release and still work properly (e.g., via the LD_LIBRARY_PATH +environment variable or other operating system mechanism). + +PMIx reserves the right to break ABI compatibility at new feature +release series. For example, the same pmix application from above +(linked against PMIx v1.3.2 shared libraries) will *not* work with +PMIx v1.5 shared libraries. + +=========================================================================== + +Common Questions +---------------- + +Many common questions about building and using PMIx are answered +on the FAQ: + + http://pmix.github.io/master/faq/ + +=========================================================================== + +Got more questions? +------------------- + +Found a bug? Got a question? Want to make a suggestion? Want to +contribute to PMIx? Please let us know! + +When submitting questions and problems, be sure to include as much +extra information as possible. This web page details all the +information that we request in order to provide assistance: + + http://pmix.github.io/master/community/help/ + +Questions and comments should generally be sent to the PMIx mailing +list (pmix@googlegroups.com). Because of spam, only +subscribers are allowed to post to this list (ensure that you +subscribe with and post from *exactly* the same e-mail address -- +joe@example.com is considered different than +joe@mycomputer.example.com!). Visit this page to subscribe to the +user's list: + + https://groups.google.com/d/forum/pmix + +Make today an PMIx day! diff --git a/opal/mca/pmix/pmix112/pmix/VERSION b/opal/mca/pmix/pmix112/pmix/VERSION new file mode 100644 index 00000000000..8f8cad68461 --- /dev/null +++ b/opal/mca/pmix/pmix112/pmix/VERSION @@ -0,0 +1,79 @@ +# Copyright (c) 2008 Sun Microsystems, Inc. All rights reserved. +# Copyright (c) 2008-2011 Cisco Systems, Inc. All rights reserved. +# Copyright (c) 2011 NVIDIA Corporation. All rights reserved. +# Copyright (c) 2013 Mellanox Technologies, Inc. +# All rights reserved. +# Copyright (c) 2014-2016 Intel, Inc. All rights reserved. +# Copyright (c) 2016-2017 IBM Corporation. All rights reserved. + +# This is the VERSION file for PMIx, describing the precise +# version of PMIx in this distribution. The various components of +# the version number below are combined to form a single version +# number string. + +# major, minor, and release are generally combined in the form +# ... + +major=1 +minor=2 +release=1 + +# greek is used for alpha or beta release tags. If it is non-empty, +# it will be appended to the version number. It does not have to be +# numeric. Common examples include a1 (alpha release 1), b1 or (beta release 1). +# The only requirement is that it must be entirely printable ASCII +# characters and have no white space. + +greek= + +# If repo_rev is empty, then the repository version number will be +# obtained during "make dist" via the "git describe --tags --always" +# command, or with the date (if "git describe" fails) in the form of +# "date". + +repo_rev=gitef61cf0a + +# If tarball_version is not empty, it is used as the version string in +# the tarball filename, regardless of all other versions listed in +# this file. For example, if tarball_version is empty, the tarball +# filename will be of the form +# openmpi-...tar.*. However, if +# tarball_version is not empty, the tarball filename will be of the +# form openmpi-.tar.*. + +tarball_version= + +# The date when this release was created + +date="Feb 21, 2017" + +# The shared library version of each of PMIx's public libraries. +# These versions are maintained in accordance with the "Library +# Interface Versions" chapter from the GNU Libtool documentation: +# +# - If the library source code has changed at all since the last +# update, then increment revision (`c:r:a' becomes `c:r+1:a'). +# +# - If any interfaces have been added, removed, or changed since +# the last update, increment current, and set revision to 0. +# +# - If any interfaces have been added since the last public release, +# then increment age. +# +# - If any interfaces have been removed since the last public release, +# then set age to 0. +# +# All changes in these version numbers are dictated by the PMIx +# release managers (not individual developers). Notes: + +# 1. Since these version numbers are associated with *releases*, the +# version numbers maintained on the PMIx Github trunk (and developer +# branches) is always 0:0:0 for all libraries. + +# 2. The version number of libpmix refers to the public pmix interfaces. +# It does not refer to any internal interfaces. + +# Version numbers are described in the Libtool current:revision:age +# format. + +libpmix_so_version=2:3:0 diff --git a/opal/mca/pmix/pmix112/pmix/autogen.sh b/opal/mca/pmix/pmix112/pmix/autogen.sh new file mode 100755 index 00000000000..b5b509eac8e --- /dev/null +++ b/opal/mca/pmix/pmix112/pmix/autogen.sh @@ -0,0 +1,5 @@ +#!/bin/sh + +# Run all the rest of the Autotools +echo "==> Running autoreconf"; +autoreconf ${autoreconf_args:-"-ivf"} diff --git a/opal/mca/pmix/pmix112/pmix/config/Makefile.am b/opal/mca/pmix/pmix112/pmix/config/Makefile.am new file mode 100644 index 00000000000..1c2bb55f7e7 --- /dev/null +++ b/opal/mca/pmix/pmix112/pmix/config/Makefile.am @@ -0,0 +1,53 @@ +# PMIx copyrights: +# Copyright (c) 2013-2015 Intel, Inc. All rights reserved +# Copyright (c) 2016 Research Organization for Information Science +# and Technology (RIST). All rights reserved. +# +######################### +# +# Copyright (c) 2004-2006 The Trustees of Indiana University and Indiana +# University Research and Technology +# Corporation. All rights reserved. +# Copyright (c) 2004-2005 The University of Tennessee and The University +# of Tennessee Research Foundation. All rights +# reserved. +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# University of Stuttgart. All rights reserved. +# Copyright (c) 2004-2005 The Regents of the University of California. +# All rights reserved. +# Copyright (c) 2006-2010 Cisco Systems, Inc. All rights reserved. +# Copyright (c) 2010 Oracle and/or its affiliates. All rights +# reserved. +######################### +# $COPYRIGHT$ +# +# Additional copyrights may follow +# +# $HEADER$ +# + +EXTRA_DIST += \ + config/c_get_alignment.m4 \ + config/pmix_get_version.sh \ + config/distscript.sh \ + config/pmix_check_attributes.m4 \ + config/pmix_check_broken_qsort.m4 \ + config/pmix_check_compiler_version.m4 \ + config/pmix_check_icc.m4 \ + config/pmix_check_ident.m4 \ + config/pmix_check_munge.m4 \ + config/pmix_check_package.m4 \ + config/pmix_check_sasl.m4 \ + config/pmix_check_vendor.m4 \ + config/pmix_check_visibility.m4 \ + config/pmix_check_lock.m4 \ + config/pmix_ensure_contains_optflags.m4 \ + config/pmix_functions.m4 \ + config/pmix.m4 \ + config/pmix_search_libs.m4 \ + config/pmix_setup_cc.m4 \ + config/pmix_setup_hwloc.m4 \ + config/pmix_setup_libevent.m4 + +maintainer-clean-local: + rm -f config/pmix_get_version.sh diff --git a/opal/mca/pmix/pmix112/pmix/config/c_get_alignment.m4 b/opal/mca/pmix/pmix112/pmix/config/c_get_alignment.m4 new file mode 100644 index 00000000000..db379100994 --- /dev/null +++ b/opal/mca/pmix/pmix112/pmix/config/c_get_alignment.m4 @@ -0,0 +1,72 @@ +dnl -*- shell-script -*- +dnl +dnl Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana +dnl University Research and Technology +dnl Corporation. All rights reserved. +dnl Copyright (c) 2004-2005 The University of Tennessee and The University +dnl of Tennessee Research Foundation. All rights +dnl reserved. +dnl Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +dnl University of Stuttgart. All rights reserved. +dnl Copyright (c) 2004-2005 The Regents of the University of California. +dnl All rights reserved. +dnl Copyright (c) 2009 Sun Microsystems, Inc. All rights reserved. +dnl Copyright (c) 2014-2015 Intel, Inc. All rights reserved. +dnl Copyright (c) 2015 Research Organization for Information Science +dnl and Technology (RIST). All rights reserved. +dnl $COPYRIGHT$ +dnl +dnl Additional copyrights may follow +dnl +dnl $HEADER$ +dnl + +# PMIX_C_GET_ALIGN(type, config_var) +# ---------------------------------- +# Determine datatype alignment. +# First arg is type, 2nd arg is config var to define. +# Now that we require C99 compilers, we include stdbool.h +# in the alignment test so that we can find the definition +# of "bool" when we test for its alignment. We might be able +# to avoid this if we test for alignment of _Bool, but +# since we use "bool" in the code, let's be safe and check +# what we use. Yes, they should be the same - but "should" and +# "are" frequently differ +AC_DEFUN([PMIX_C_GET_ALIGNMENT],[ + AC_CACHE_CHECK([alignment of $1], + [AS_TR_SH([pmix_cv_c_align_$1])], + [AC_RUN_IFELSE([AC_LANG_PROGRAM([AC_INCLUDES_DEFAULT + #include ], +[[ + struct foo { char c; $1 x; }; + struct foo *p = (struct foo *) malloc(sizeof(struct foo)); + int diff; + FILE *f=fopen("conftestval", "w"); + if (!f) exit(1); + diff = ((char *)&p->x) - ((char *)&p->c); + fprintf(f, "%d\n", (diff >= 0) ? diff : -diff); +]])], [AS_TR_SH([pmix_cv_c_align_$1])=`cat conftestval`], + [AC_MSG_WARN([*** Problem running configure test!]) + AC_MSG_WARN([*** See config.log for details.]) + AC_MSG_ERROR([*** Cannot continue.])], + [ # cross compile - do a non-executable test. Trick + # taken from the Autoconf 2.59c. Switch to using + # AC_CHECK_ALIGNOF when we can require Autoconf 2.60. + _AC_COMPUTE_INT([(long int) offsetof (pmix__type_alignof_, y)], + [AS_TR_SH([pmix_cv_c_align_$1])], + [AC_INCLUDES_DEFAULT +#include + +#ifndef offsetof +# define offsetof(type, member) ((char *) &((type *) 0)->member - (char *) 0) +#endif +typedef struct { char x; $1 y; } pmix__type_alignof_; +], + [AC_MSG_WARN([*** Problem running configure test!]) + AC_MSG_WARN([*** See config.log for details.]) + AC_MSG_ERROR([*** Cannot continue.])])])]) + +AC_DEFINE_UNQUOTED([$2], [$AS_TR_SH([pmix_cv_c_align_$1])], [Alignment of type $1]) +eval "$2=$AS_TR_SH([pmix_cv_c_align_$1])" + +rm -rf conftest* ]) dnl diff --git a/opal/mca/pmix/pmix112/pmix/config/distscript.sh b/opal/mca/pmix/pmix112/pmix/config/distscript.sh new file mode 100755 index 00000000000..fbb37a78716 --- /dev/null +++ b/opal/mca/pmix/pmix112/pmix/config/distscript.sh @@ -0,0 +1,56 @@ +#!/bin/sh +# +# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana +# University Research and Technology +# Corporation. All rights reserved. +# Copyright (c) 2004-2005 The University of Tennessee and The University +# of Tennessee Research Foundation. All rights +# reserved. +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# University of Stuttgart. All rights reserved. +# Copyright (c) 2004-2005 The Regents of the University of California. +# All rights reserved. +# Copyright (c) 2009-2015 Cisco Systems, Inc. All rights reserved. +# Copyright (c) 2015 Research Organization for Information Science +# and Technology (RIST). All rights reserved. +# Copyright (c) 2015 Los Alamos National Security, LLC. All rights +# reserved. +# $COPYRIGHT$ +# +# Additional copyrights may follow +# +# $HEADER$ +# + +srcdir=$1 +builddir=$PWD +distdir=$builddir/$2 +PMIX_REPO_REV=$3 + +if test x"$2" = x ; then + echo "*** ERROR: Must supply relative distdir as argv[2] -- aborting" + exit 1 +elif test ! -d "$distdir" ; then + echo "*** ERROR: dist dir does not exist" + echo "*** ERROR: $distdir" + exit 1 +fi + +# We can catch some hard (but possible) to do mistakes by looking at +# our repo's revision, but only if we are in the source tree. +# Otherwise, use what configure told us, at the cost of allowing one +# or two corner cases in (but otherwise VPATH builds won't work). +repo_rev=$PMIX_REPO_REV +if test -d .git ; then + repo_rev=$(config/pmix_get_version.sh VERSION --repo-rev) +fi + +# +# Update VERSION:repo_rev with the best value we have. +# +perl -pi -e 's/^repo_rev=.*/repo_rev='$repo_rev'/' -- "${distdir}/VERSION" +# need to reset the timestamp to not annoy AM dependencies +touch -r "${srcdir}/VERSION" "${distdir}/VERSION" + +echo "*** Updated VERSION file with repo rev: $repo_rev" +echo "*** (via dist-hook / config/distscript.sh)" diff --git a/opal/mca/pmix/pmix112/pmix/config/pmix.m4 b/opal/mca/pmix/pmix112/pmix/config/pmix.m4 new file mode 100644 index 00000000000..44d9210143b --- /dev/null +++ b/opal/mca/pmix/pmix112/pmix/config/pmix.m4 @@ -0,0 +1,866 @@ +dnl -*- shell-script -*- +dnl +dnl Copyright (c) 2004-2010 The Trustees of Indiana University and Indiana +dnl University Research and Technology +dnl Corporation. All rights reserved. +dnl Copyright (c) 2004-2005 The University of Tennessee and The University +dnl of Tennessee Research Foundation. All rights +dnl reserved. +dnl Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +dnl University of Stuttgart. All rights reserved. +dnl Copyright (c) 2004-2005 The Regents of the University of California. +dnl All rights reserved. +dnl Copyright (c) 2006-2015 Cisco Systems, Inc. All rights reserved. +dnl Copyright (c) 2007 Sun Microsystems, Inc. All rights reserved. +dnl Copyright (c) 2009 IBM Corporation. All rights reserved. +dnl Copyright (c) 2009 Los Alamos National Security, LLC. All rights +dnl reserved. +dnl Copyright (c) 2009-2011 Oak Ridge National Labs. All rights reserved. +dnl Copyright (c) 2011-2013 NVIDIA Corporation. All rights reserved. +dnl Copyright (c) 2013-2016 Intel, Inc. All rights reserved +dnl Copyright (c) 2015-2017 Research Organization for Information Science +dnl and Technology (RIST). All rights reserved. +dnl Copyright (c) 2016 Mellanox Technologies, Inc. +dnl All rights reserved. +dnl Copyright (c) 2016 IBM Corporation. All rights reserved. +dnl +dnl $COPYRIGHT$ +dnl +dnl Additional copyrights may follow +dnl +dnl $HEADER$ +dnl + +AC_DEFUN([PMIX_SETUP_CORE],[ + + AC_REQUIRE([AC_USE_SYSTEM_EXTENSIONS]) + AC_REQUIRE([AC_CANONICAL_TARGET]) + AC_REQUIRE([AC_PROG_CC]) + + # If no prefix was defined, set a good value + m4_ifval([$1], + [m4_define([pmix_config_prefix],[$1/])], + [m4_define([pmix_config_prefix], [])]) + + # Get pmix's absolute top builddir (which may not be the same as + # the real $top_builddir) + PMIX_startdir=`pwd` + if test x"pmix_config_prefix" != "x" && test ! -d "pmix_config_prefix"; then + mkdir -p "pmix_config_prefix" + fi + if test x"pmix_config_prefix" != "x"; then + cd "pmix_config_prefix" + fi + PMIX_top_builddir=`pwd` + AC_SUBST(PMIX_top_builddir) + + # Get pmix's absolute top srcdir (which may not be the same as the + # real $top_srcdir. First, go back to the startdir incase the + # $srcdir is relative. + + cd "$PMIX_startdir" + cd "$srcdir"/pmix_config_prefix + PMIX_top_srcdir="`pwd`" + AC_SUBST(PMIX_top_srcdir) + + # Go back to where we started + cd "$PMIX_startdir" + + AC_MSG_NOTICE([pmix builddir: $PMIX_top_builddir]) + AC_MSG_NOTICE([pmix srcdir: $PMIX_top_srcdir]) + if test "$PMIX_top_builddir" != "$PMIX_top_srcdir"; then + AC_MSG_NOTICE([Detected VPATH build]) + fi + + # Get the version of pmix that we are installing + AC_MSG_CHECKING([for pmix version]) + PMIX_VERSION="`$PMIX_top_srcdir/config/pmix_get_version.sh $PMIX_top_srcdir/VERSION`" + if test "$?" != "0"; then + AC_MSG_ERROR([Cannot continue]) + fi + PMIX_RELEASE_DATE="`$PMIX_top_srcdir/config/pmix_get_version.sh $PMIX_top_srcdir/VERSION --release-date`" + AC_SUBST(PMIX_VERSION) + AC_DEFINE_UNQUOTED([PMIX_VERSION], ["$PMIX_VERSION"], + [The library version is always available, contrary to VERSION]) + AC_SUBST(PMIX_RELEASE_DATE) + AC_MSG_RESULT([$PMIX_VERSION]) + + # Save the breakdown the version information + PMIX_MAJOR_VERSION="`$PMIX_top_srcdir/config/pmix_get_version.sh $PMIX_top_srcdir/VERSION --major`" + if test "$?" != "0"; then + AC_MSG_ERROR([Cannot continue]) + fi + AC_SUBST(PMIX_MAJOR_VERSION) + AC_DEFINE_UNQUOTED([PMIX_MAJOR_VERSION], ["$PMIX_MAJOR_VERSION"], + [The library major version is always available, contrary to VERSION]) + + PMIX_MINOR_VERSION="`$PMIX_top_srcdir/config/pmix_get_version.sh $PMIX_top_srcdir/VERSION --minor`" + if test "$?" != "0"; then + AC_MSG_ERROR([Cannot continue]) + fi + AC_SUBST(PMIX_MINOR_VERSION) + AC_DEFINE_UNQUOTED([PMIX_MINOR_VERSION], ["$PMIX_MINOR_VERSION"], + [The library minor version is always available, contrary to VERSION]) + + pmixmajor=${PMIX_MAJOR_VERSION}L + pmixminor=${PMIX_MINOR_VERSION}L + AC_SUBST(pmixmajor) + AC_SUBST(pmixminor) + AC_CONFIG_FILES(pmix_config_prefix[include/pmix_version.h]) + + PMIX_RELEASE_VERSION="`$PMIX_top_srcdir/config/pmix_get_version.sh $PMIX_top_srcdir/VERSION --release`" + if test "$?" != "0"; then + AC_MSG_ERROR([Cannot continue]) + fi + AC_SUBST(PMIX_RELEASE_VERSION) + AC_DEFINE_UNQUOTED([PMIX_RELEASE_VERSION], ["$PMIX_RELEASE_VERSION"], + [The library release version is always available, contrary to VERSION]) + + # Debug mode? + AC_MSG_CHECKING([if want pmix maintainer support]) + pmix_debug= + AS_IF([test "$pmix_debug" = "" && test "$enable_debug" = "yes"], + [pmix_debug=1 + pmix_debug_msg="enabled"]) + AS_IF([test "$pmix_debug" = ""], + [pmix_debug=0 + pmix_debug_msg="disabled"]) + # Grr; we use #ifndef for PMIX_DEBUG! :-( + AH_TEMPLATE(PMIX_ENABLE_DEBUG, [Whether we are in debugging mode or not]) + AS_IF([test "$pmix_debug" = "1"], [AC_DEFINE([PMIX_ENABLE_DEBUG])]) + AC_MSG_RESULT([$pmix_debug_msg]) + + AC_MSG_CHECKING([for pmix directory prefix]) + AC_MSG_RESULT(m4_ifval([$1], pmix_config_prefix, [(none)])) + + AC_CONFIG_HEADERS(pmix_config_prefix[include/pmix/autogen/config.h]) + + # What prefix are we using? + AC_MSG_CHECKING([for pmix symbol prefix]) + AS_IF([test "$pmix_symbol_prefix_value" = ""], + [AS_IF([test "$with_pmix_symbol_prefix" = ""], + [pmix_symbol_prefix_value=pmix_], + [pmix_symbol_prefix_value=$with_pmix_symbol_prefix])]) + AC_DEFINE_UNQUOTED(PMIX_SYM_PREFIX, [$pmix_symbol_prefix_value], + [The pmix symbol prefix]) + # Ensure to [] escape the whole next line so that we can get the + # proper tr tokens + [pmix_symbol_prefix_value_caps="`echo $pmix_symbol_prefix_value | tr '[:lower:]' '[:upper:]'`"] + AC_DEFINE_UNQUOTED(PMIX_SYM_PREFIX_CAPS, [$pmix_symbol_prefix_value_caps], + [The pmix symbol prefix in all caps]) + AC_MSG_RESULT([$pmix_symbol_prefix_value]) + + # Give an easy #define to know if we need to transform all the + # pmix names + AH_TEMPLATE([PMIX_SYM_TRANSFORM], [Whether we need to re-define all the pmix public symbols or not]) + AS_IF([test "$pmix_symbol_prefix_value" = "pmix_"], + [AC_DEFINE([PMIX_SYM_TRANSFORM], [0])], + [AC_DEFINE([PMIX_SYM_TRANSFORM], [1])]) + + # GCC specifics. + if test "x$GCC" = "xyes"; then + PMIX_GCC_CFLAGS="-Wall -Wmissing-prototypes -Wundef" + PMIX_GCC_CFLAGS="$PMIX_GCC_CFLAGS -Wpointer-arith -Wcast-align" + fi + + ############################################################################ + # Check for compilers and preprocessors + ############################################################################ + pmix_show_title "Compiler and preprocessor tests" + + # + # Check for some types + # + + AC_CHECK_TYPES(int8_t) + AC_CHECK_TYPES(uint8_t) + AC_CHECK_TYPES(int16_t) + AC_CHECK_TYPES(uint16_t) + AC_CHECK_TYPES(int32_t) + AC_CHECK_TYPES(uint32_t) + AC_CHECK_TYPES(int64_t) + AC_CHECK_TYPES(uint64_t) + AC_CHECK_TYPES(long long) + + AC_CHECK_TYPES(intptr_t) + AC_CHECK_TYPES(uintptr_t) + AC_CHECK_TYPES(ptrdiff_t) + + # + # Check for type sizes + # + + AC_CHECK_SIZEOF(_Bool) + AC_CHECK_SIZEOF(char) + AC_CHECK_SIZEOF(short) + AC_CHECK_SIZEOF(int) + AC_CHECK_SIZEOF(long) + if test "$ac_cv_type_long_long" = yes; then + AC_CHECK_SIZEOF(long long) + fi + AC_CHECK_SIZEOF(float) + AC_CHECK_SIZEOF(double) + + AC_CHECK_SIZEOF(void *) + AC_CHECK_SIZEOF(size_t) + if test "$ac_cv_type_ssize_t" = yes ; then + AC_CHECK_SIZEOF(ssize_t) + fi + if test "$ac_cv_type_ptrdiff_t" = yes; then + AC_CHECK_SIZEOF(ptrdiff_t) + fi + AC_CHECK_SIZEOF(wchar_t) + + AC_CHECK_SIZEOF(pid_t) + + # + # Check for type alignments + # + + PMIX_C_GET_ALIGNMENT(bool, PMIX_ALIGNMENT_BOOL) + PMIX_C_GET_ALIGNMENT(int8_t, PMIX_ALIGNMENT_INT8) + PMIX_C_GET_ALIGNMENT(int16_t, PMIX_ALIGNMENT_INT16) + PMIX_C_GET_ALIGNMENT(int32_t, PMIX_ALIGNMENT_INT32) + PMIX_C_GET_ALIGNMENT(int64_t, PMIX_ALIGNMENT_INT64) + PMIX_C_GET_ALIGNMENT(char, PMIX_ALIGNMENT_CHAR) + PMIX_C_GET_ALIGNMENT(short, PMIX_ALIGNMENT_SHORT) + PMIX_C_GET_ALIGNMENT(wchar_t, PMIX_ALIGNMENT_WCHAR) + PMIX_C_GET_ALIGNMENT(int, PMIX_ALIGNMENT_INT) + PMIX_C_GET_ALIGNMENT(long, PMIX_ALIGNMENT_LONG) + if test "$ac_cv_type_long_long" = yes; then + PMIX_C_GET_ALIGNMENT(long long, PMIX_ALIGNMENT_LONG_LONG) + fi + PMIX_C_GET_ALIGNMENT(float, PMIX_ALIGNMENT_FLOAT) + PMIX_C_GET_ALIGNMENT(double, PMIX_ALIGNMENT_DOUBLE) + if test "$ac_cv_type_long_double" = yes; then + PMIX_C_GET_ALIGNMENT(long double, PMIX_ALIGNMENT_LONG_DOUBLE) + fi + PMIX_C_GET_ALIGNMENT(void *, PMIX_ALIGNMENT_VOID_P) + PMIX_C_GET_ALIGNMENT(size_t, PMIX_ALIGNMENT_SIZE_T) + + + # + # Does the C compiler native support "bool"? (i.e., without + # or any other help) + # + + PMIX_VAR_SCOPE_PUSH([MSG]) + AC_MSG_CHECKING(for C bool type) + AC_COMPILE_IFELSE([AC_LANG_PROGRAM([ + AC_INCLUDES_DEFAULT], + [[bool bar, foo = true; bar = foo;]])], + [PMIX_NEED_C_BOOL=0 MSG=yes],[PMIX_NEED_C_BOOL=1 MSG=no]) + AC_DEFINE_UNQUOTED(PMIX_NEED_C_BOOL, $PMIX_NEED_C_BOOL, + [Whether the C compiler supports "bool" without any other help (such as )]) + AC_MSG_RESULT([$MSG]) + AC_CHECK_SIZEOF(_Bool) + PMIX_VAR_SCOPE_POP + + # + # Check for other compiler characteristics + # + + PMIX_VAR_SCOPE_PUSH([PMIX_CFLAGS_save]) + if test "$GCC" = "yes"; then + + # gcc 2.96 will emit oodles of warnings if you use "inline" with + # -pedantic (which we do in developer builds). However, + # "__inline__" is ok. So we have to force gcc to select the + # right one. If you use -pedantic, the AC_C_INLINE test will fail + # (because it names a function foo() -- without the (void)). So + # we turn off all the picky flags, turn on -ansi mode (which is + # implied by -pedantic), and set warnings to be errors. Hence, + # this does the following (for 2.96): + # + # - causes the check for "inline" to emit a warning, which then + # fails + # - checks for __inline__, which then emits no error, and works + # + # This also works nicely for gcc 3.x because "inline" will work on + # the first check, and all is fine. :-) + + PMIX_CFLAGS_save=$CFLAGS + CFLAGS="$PMIX_CFLAGS_BEFORE_PICKY -Werror -ansi" + fi + AC_C_INLINE + if test "$GCC" = "yes"; then + CFLAGS=$PMIX_CFLAGS_save + fi + PMIX_VAR_SCOPE_POP + + if test "x$CC" = "xicc"; then + PMIX_CHECK_ICC_VARARGS + fi + + + ################################## + # Only after setting up + # C do we check compiler attributes. + ################################## + + pmix_show_subtitle "Compiler characteristics" + + PMIX_CHECK_ATTRIBUTES + PMIX_CHECK_COMPILER_VERSION_ID + + ################################## + # Header files + ################################## + + pmix_show_title "Header file tests" + + AC_CHECK_HEADERS([arpa/inet.h \ + fcntl.h inttypes.h libgen.h \ + netinet/in.h \ + stdint.h stddef.h \ + stdlib.h string.h strings.h \ + sys/param.h \ + sys/select.h sys/socket.h \ + stdarg.h sys/stat.h sys/time.h \ + sys/types.h sys/un.h sys/uio.h net/uio.h \ + sys/wait.h syslog.h \ + time.h unistd.h \ + crt_externs.h signal.h \ + ioLib.h sockLib.h hostLib.h limits.h \ + sys/statfs.h sys/statvfs.h \ + netdb.h ucred.h]) + + # Note that sometimes we have , but it doesn't work (e.g., + # have both Portland and GNU installed; using pgcc will find GNU's + # , which all it does -- by standard -- is define "bool" to + # "_Bool" [see + # http://www.opengroup.org/onlinepubs/009695399/basedefs/stdbool.h.html], + # and Portland has no idea what to do with _Bool). + + # So first figure out if we have (i.e., check the value of + # the macro HAVE_STDBOOL_H from the result of AC_CHECK_HEADERS, + # above). If we do have it, then check to see if it actually works. + # Define PMIX_USE_STDBOOL_H as approrpaite. + AC_CHECK_HEADERS([stdbool.h], [have_stdbool_h=1], [have_stdbool_h=0]) + AC_MSG_CHECKING([if works]) + if test "$have_stdbool_h" = "1"; then + AC_COMPILE_IFELSE([AC_LANG_PROGRAM([AC_INCLUDES_DEFAULT[ + #if HAVE_STDBOOL_H + #include + #endif + ]], + [[bool bar, foo = true; bar = foo;]])], + [PMIX_USE_STDBOOL_H=1 MSG=yes],[PMIX_USE_STDBOOL_H=0 MSG=no]) + else + PMIX_USE_STDBOOL_H=0 + MSG="no (don't have )" + fi + AC_DEFINE_UNQUOTED(PMIX_USE_STDBOOL_H, $PMIX_USE_STDBOOL_H, + [Whether to use or not]) + AC_MSG_RESULT([$MSG]) + + # checkpoint results + AC_CACHE_SAVE + + ################################## + # Types + ################################## + + pmix_show_title "Type tests" + + AC_CHECK_TYPES([socklen_t, struct sockaddr_in, struct sockaddr_un, + struct sockaddr_in6, struct sockaddr_storage], + [], [], [AC_INCLUDES_DEFAULT + #if HAVE_SYS_SOCKET_H + #include + #endif + #if HAVE_SYS_UN_H + #include + #endif + #ifdef HAVE_NETINET_IN_H + #include + #endif + ]) + + AC_CHECK_DECLS([AF_UNSPEC, PF_UNSPEC, AF_INET6, PF_INET6], + [], [], [AC_INCLUDES_DEFAULT + #if HAVE_SYS_SOCKET_H + #include + #endif + #ifdef HAVE_NETINET_IN_H + #include + #endif + ]) + + # SA_RESTART in signal.h + PMIX_VAR_SCOPE_PUSH([MSG2]) + AC_MSG_CHECKING([if SA_RESTART defined in signal.h]) + AC_EGREP_CPP(yes, [ + #include + #ifdef SA_RESTART + yes + #endif + ], [MSG2=yes VALUE=1], [MSG2=no VALUE=0]) + AC_DEFINE_UNQUOTED(PMIX_HAVE_SA_RESTART, $VALUE, + [Whether we have SA_RESTART in or not]) + AC_MSG_RESULT([$MSG2]) + PMIX_VAR_SCOPE_POP + + AC_CHECK_MEMBERS([struct sockaddr.sa_len], [], [], [ + #include + #if HAVE_SYS_SOCKET_H + #include + #endif + ]) + + AC_CHECK_MEMBERS([struct dirent.d_type], [], [], [ + #include + #include ]) + + AC_CHECK_MEMBERS([siginfo_t.si_fd],,,[#include ]) + AC_CHECK_MEMBERS([siginfo_t.si_band],,,[#include ]) + + # + # Checks for struct member names in struct statfs + # + AC_CHECK_MEMBERS([struct statfs.f_type], [], [], [ + AC_INCLUDES_DEFAULT + #ifdef HAVE_SYS_VFS_H + #include + #endif + #ifdef HAVE_SYS_STATFS_H + #include + #endif + ]) + + AC_CHECK_MEMBERS([struct statfs.f_fstypename], [], [], [ + AC_INCLUDES_DEFAULT + #ifdef HAVE_SYS_PARAM_H + #include + #endif + #ifdef HAVE_SYS_MOUNT_H + #include + #endif + #ifdef HAVE_SYS_VFS_H + #include + #endif + #ifdef HAVE_SYS_STATFS_H + #include + #endif + ]) + + # + # Checks for struct member names in struct statvfs + # + AC_CHECK_MEMBERS([struct statvfs.f_basetype], [], [], [ + AC_INCLUDES_DEFAULT + #ifdef HAVE_SYS_STATVFS_H + #include + #endif + ]) + + AC_CHECK_MEMBERS([struct statvfs.f_fstypename], [], [], [ + AC_INCLUDES_DEFAULT + #ifdef HAVE_SYS_STATVFS_H + #include + #endif + ]) + + AC_CHECK_MEMBERS([struct ucred.uid, struct ucred.cr_uid, struct sockpeercred.uid], + [], [], + [#include + #include ]) + + # Check for ptrdiff type. Yes, there are platforms where + # sizeof(void*) != sizeof(long) (64 bit Windows, apparently). + # + AC_MSG_CHECKING([for pointer diff type]) + if test $ac_cv_type_ptrdiff_t = yes ; then + pmix_ptrdiff_t="ptrdiff_t" + pmix_ptrdiff_size=$ac_cv_sizeof_ptrdiff_t + elif test $ac_cv_sizeof_void_p -eq $ac_cv_sizeof_long ; then + pmix_ptrdiff_t="long" + pmix_ptrdiff_size=$ac_cv_sizeof_long + elif test $ac_cv_type_long_long = yes && test $ac_cv_sizeof_void_p -eq $ac_cv_sizeof_long_long ; then + pmix_ptrdiff_t="long long" + pmix_ptrdiff_size=$ac_cv_sizeof_long_long + #else + # AC_MSG_ERROR([Could not find datatype to emulate ptrdiff_t. Cannot continue]) + fi + AC_DEFINE_UNQUOTED([PMIX_PTRDIFF_TYPE], [$pmix_ptrdiff_t], + [type to use for ptrdiff_t]) + AC_MSG_RESULT([$pmix_ptrdiff_t (size: $pmix_ptrdiff_size)]) + + ################################## + # Libraries + ################################## + + pmix_show_title "Library and Function tests" + + PMIX_SEARCH_LIBS_CORE([socket], [socket]) + + # IRIX and CentOS have dirname in -lgen, usually in libc + PMIX_SEARCH_LIBS_CORE([dirname], [gen]) + + # Darwin doesn't need -lm, as it's a symlink to libSystem.dylib + PMIX_SEARCH_LIBS_CORE([ceil], [m]) + + AC_CHECK_FUNCS([asprintf snprintf vasprintf vsnprintf strsignal socketpair strncpy_s usleep statfs statvfs getpeereid getpeerucred strnlen posix_fallocate]) + + # On some hosts, htonl is a define, so the AC_CHECK_FUNC will get + # confused. On others, it's in the standard library, but stubbed with + # the magic glibc foo as not implemented. and on other systems, it's + # just not there. This covers all cases. + AC_CACHE_CHECK([for htonl define], + [ompi_cv_htonl_define], + [AC_PREPROC_IFELSE([AC_LANG_PROGRAM([ + #ifdef HAVE_SYS_TYPES_H + #include + #endif + #ifdef HAVE_NETINET_IN_H + #include + #endif + #ifdef HAVE_ARPA_INET_H + #include + #endif],[ + #ifndef ntohl + #error "ntohl not defined" + #endif + ])], [ompi_cv_htonl_define=yes], [ompi_cv_htonl_define=no])]) + AC_CHECK_FUNC([htonl], [ompi_have_htonl=yes], [ompi_have_htonl=no]) + AS_IF([test "$ompi_cv_htonl_define" = "yes" || test "$ompi_have_htonl" = "yes"], + [AC_DEFINE_UNQUOTED([HAVE_UNIX_BYTESWAP], [1], + [whether unix byteswap routines -- htonl, htons, nothl, ntohs -- are available])]) + + # + # Make sure we can copy va_lists (need check declared, not linkable) + # + + AC_CHECK_DECL(va_copy, PMIX_HAVE_VA_COPY=1, PMIX_HAVE_VA_COPY=0, + [#include ]) + AC_DEFINE_UNQUOTED(PMIX_HAVE_VA_COPY, $PMIX_HAVE_VA_COPY, + [Whether we have va_copy or not]) + + AC_CHECK_DECL(__va_copy, PMIX_HAVE_UNDERSCORE_VA_COPY=1, + PMIX_HAVE_UNDERSCORE_VA_COPY=0, [#include ]) + AC_DEFINE_UNQUOTED(PMIX_HAVE_UNDERSCORE_VA_COPY, $PMIX_HAVE_UNDERSCORE_VA_COPY, + [Whether we have __va_copy or not]) + + AC_CHECK_DECLS(__func__) + + # checkpoint results + AC_CACHE_SAVE + + ################################## + # System-specific tests + ################################## + + pmix_show_title "System-specific tests" + + AC_C_BIGENDIAN + PMIX_CHECK_BROKEN_QSORT + + ################################## + # Visibility + ################################## + + # Check the visibility declspec at the end to avoid problem with + # the previous tests that are not necessarily prepared for + # the visibility feature. + pmix_show_title "Symbol visibility feature" + + PMIX_CHECK_VISIBILITY + + ################################## + # Libevent + ################################## + pmix_show_title "Libevent" + + PMIX_LIBEVENT_CONFIG + + ################################## + # HWLOC + ################################## + pmix_show_title "HWLOC" + + PMIX_HWLOC_CONFIG + + ################################## + # SASL + ################################## + pmix_show_title "SASL" + + PMIX_SASL_CONFIG + + ################################## + # Munge + ################################## + pmix_show_title "Munge" + + PMIX_MUNGE_CONFIG + + ################################## + # Dstore Locking + ################################## + + pmix_show_title "Dstore Locking" + + PMIX_CHECK_DSTOR_LOCK + + ############################################################################ + # final compiler config + ############################################################################ + + pmix_show_subtitle "Set path-related compiler flags" + + # + # This is needed for VPATH builds, so that it will -I the appropriate + # include directory. We delayed doing it until now just so that + # '-I$(top_srcdir)' doesn't show up in any of the configure output -- + # purely aesthetic. + # + # Because pmix_config.h is created by AC_CONFIG_HEADERS, we + # don't need to -I the builddir for pmix/include. However, if we + # are VPATH building, we do need to include the source directories. + # + if test "$PMIX_top_builddir" != "$PMIX_top_srcdir"; then + # Note the embedded m4 directives here -- we must embed them + # rather than have successive assignments to these shell + # variables, lest the $(foo) names try to get evaluated here. + # Yuck! + CPPFLAGS='-I$(PMIX_top_builddir) -I$(PMIX_top_srcdir) -I$(PMIX_top_srcdir)/src -I$(PMIX_top_builddir)/include -I$(PMIX_top_srcdir)/include'" $CPPFLAGS" + else + CPPFLAGS='-I$(PMIX_top_srcdir) -I$(PMIX_top_srcdir)/src -I$(PMIX_top_srcdir)/include'" $CPPFLAGS" + fi + + # pmixdatadir, pmixlibdir, and pmixinclude are essentially the same as + # pkg*dir, but will always be */pmix. + pmixdatadir='${datadir}/pmix' + pmixlibdir='${libdir}/pmix' + pmixincludedir='${includedir}/pmix' + AC_SUBST(pmixdatadir) + AC_SUBST(pmixlibdir) + AC_SUBST(pmixincludedir) + + ############################################################################ + # final output + ############################################################################ + + pmix_show_subtitle "Final output" + + AC_CONFIG_FILES(pmix_config_prefix[Makefile]) + + # Success + $2 +])dnl + +AC_DEFUN([PMIX_DEFINE_ARGS],[ + # Embedded mode, or standalone? + AC_MSG_CHECKING([if embedded mode is enabled]) + AC_ARG_ENABLE([embedded-mode], + [AC_HELP_STRING([--enable-embedded-mode], + [Using --enable-embedded-mode causes PMIx to skip a few configure checks and install nothing. It should only be used when building PMIx within the scope of a larger package.])]) + AS_IF([test ! -z "$enable_embedded_mode" && test "$enable_embedded_mode" = "yes"], + [pmix_mode=embedded + AC_MSG_RESULT([yes])], + [pmix_mode=standalone + AC_MSG_RESULT([no])]) + + # Change the symbol prefix? + AC_ARG_WITH([pmix-symbol-prefix], + AC_HELP_STRING([--with-pmix-symbol-prefix=STRING], + [STRING can be any valid C symbol name. It will be prefixed to all public PMIx symbols. Default: "pmix_"])) + +# +# Is this a developer copy? +# + +if test -d .git; then + PMIX_DEVEL=1 +else + PMIX_DEVEL=0 +fi + + +# +# Developer picky compiler options +# + +AC_MSG_CHECKING([if want developer-level compiler pickyness]) +AC_ARG_ENABLE(picky, + AC_HELP_STRING([--enable-picky], + [enable developer-level compiler pickyness when building PMIx (default: disabled)])) +if test "$enable_picky" = "yes"; then + AC_MSG_RESULT([yes]) + WANT_PICKY_COMPILER=1 +else + AC_MSG_RESULT([no]) + WANT_PICKY_COMPILER=0 +fi +#################### Early development override #################### +if test "$WANT_PICKY_COMPILER" = "0" && test -z "$enable_picky" && test "$PMIX_DEVEL" = "1"; then + WANT_PICKY_COMPILER=1 + echo "--> developer override: enable picky compiler by default" +fi +#################### Early development override #################### + +# +# Developer debugging +# + +AC_MSG_CHECKING([if want developer-level debugging code]) +AC_ARG_ENABLE(debug, + AC_HELP_STRING([--enable-debug], + [enable developer-level debugging code (not for general PMIx users!) (default: disabled)])) +if test "$enable_debug" = "yes"; then + AC_MSG_RESULT([yes]) + WANT_DEBUG=1 +else + AC_MSG_RESULT([no]) + WANT_DEBUG=0 +fi +#################### Early development override #################### +if test "$WANT_DEBUG" = "0" && test -z "$enable_debug" && test "$PMIX_DEVEL" = "1"; then + WANT_DEBUG=1 + echo "--> developer override: enable debugging code by default" +fi +#################### Early development override #################### +if test "$WANT_DEBUG" = "0"; then + CFLAGS="-DNDEBUG $CFLAGS" + CXXFLAGS="-DNDEBUG $CXXFLAGS" +fi +AC_DEFINE_UNQUOTED(PMIX_ENABLE_DEBUG, $WANT_DEBUG, + [Whether we want developer-level debugging code or not]) + +AC_ARG_ENABLE(debug-symbols, + AC_HELP_STRING([--disable-debug-symbols], + [Disable adding compiler flags to enable debugging symbols if --enable-debug is specified. For non-debugging builds, this flag has no effect.])) + +# +# Do we want to install the internal devel headers? +# +AC_MSG_CHECKING([if want to install project-internal header files]) +AC_ARG_WITH(devel-headers, + AC_HELP_STRING([--with-devel-headers], + [normal PMIx users/applications do not need this (pmix.h and friends are ALWAYS installed). Developer headers are only necessary for authors doing deeper integration (default: disabled).])) +if test "$with_devel_headers" = "yes"; then + AC_MSG_RESULT([yes]) + WANT_INSTALL_HEADERS=1 +else + AC_MSG_RESULT([no]) + WANT_INSTALL_HEADERS=0 +fi +AM_CONDITIONAL(WANT_INSTALL_HEADERS, test "$WANT_INSTALL_HEADERS" = 1) + +# +# Do we want the pretty-print stack trace feature? +# + +AC_MSG_CHECKING([if want pretty-print stacktrace]) +AC_ARG_ENABLE([pretty-print-stacktrace], + [AC_HELP_STRING([--enable-pretty-print-stacktrace], + [Pretty print stacktrace on process signal (default: enabled)])]) +if test "$enable_pretty_print_stacktrace" = "no" ; then + AC_MSG_RESULT([no]) + WANT_PRETTY_PRINT_STACKTRACE=0 +else + AC_MSG_RESULT([yes]) + WANT_PRETTY_PRINT_STACKTRACE=1 +fi +AC_DEFINE_UNQUOTED([PMIX_WANT_PRETTY_PRINT_STACKTRACE], + [$WANT_PRETTY_PRINT_STACKTRACE], + [if want pretty-print stack trace feature]) + +# +# Do we want the shared memory datastore usage? +# + +AC_MSG_CHECKING([if want shared memory datastore]) +AC_ARG_ENABLE([dstore], + [AC_HELP_STRING([--disable-dstore], + [Using shared memory datastore (default: enabled)])]) +if test "$enable_dstore" == "no" ; then + AC_MSG_RESULT([no]) + WANT_DSTORE=0 +else + AC_MSG_RESULT([yes]) + WANT_DSTORE=1 +fi +AC_DEFINE_UNQUOTED([PMIX_ENABLE_DSTORE], + [$WANT_DSTORE], + [if want shared memory dstore feature]) +AM_CONDITIONAL([WANT_DSTORE],[test "x$enable_dstore" != "xno"]) + +# +# Use pthread-based locking +# +DSTORE_PTHREAD_LOCK="1" +AC_MSG_CHECKING([if want dstore pthread-based locking]) +AC_ARG_ENABLE([dstore-pthlck], + [AC_HELP_STRING([--disable-dstore-pthlck], + [Disable pthread-based lockig in dstor (default: enabled)])]) +if test "$enable_dstore_pthlck" == "no" ; then + AC_MSG_RESULT([no]) + DSTORE_PTHREAD_LOCK="0" +else + AC_MSG_RESULT([yes]) + DSTORE_PTHREAD_LOCK="1" +fi + +# +# Ident string +# +AC_MSG_CHECKING([if want ident string]) +AC_ARG_WITH([ident-string], + [AC_HELP_STRING([--with-ident-string=STRING], + [Embed an ident string into PMIx object files])]) +if test "$with_ident_string" = "" || test "$with_ident_string" = "no"; then + with_ident_string="%VERSION%" +fi +# This is complicated, because $PMIX_VERSION may have spaces in it. +# So put the whole sed expr in single quotes -- i.e., directly +# substitute %VERSION% for (not expanded) $PMIX_VERSION. +with_ident_string="`echo $with_ident_string | sed -e 's/%VERSION%/$PMIX_VERSION/'`" + +# Now eval an echo of that so that the "$PMIX_VERSION" token is +# replaced with its value. Enclose the whole thing in "" so that it +# ends up as 1 token. +with_ident_string="`eval echo $with_ident_string`" + +AC_DEFINE_UNQUOTED([PMIX_IDENT_STRING], ["$with_ident_string"], + [ident string for PMIX]) +AC_MSG_RESULT([$with_ident_string]) + +# +# Timing support +# +AC_MSG_CHECKING([if want developer-level timing support]) +AC_ARG_ENABLE(timing, + AC_HELP_STRING([--enable-timing], + [enable developer-level timing code (default: disabled)])) +if test "$enable_timing" = "yes"; then + AC_MSG_RESULT([yes]) + WANT_TIMING=1 +else + AC_MSG_RESULT([no]) + WANT_TIMING=0 +fi + +AC_DEFINE_UNQUOTED([PMIX_ENABLE_TIMING], [$WANT_TIMING], + [Whether we want developer-level timing support or not]) + +])dnl + +# Specify the symbol prefix +AC_DEFUN([PMIX_SET_SYMBOL_PREFIX],[ + pmix_symbol_prefix_value=$1 +])dnl + +# This must be a standalone routine so that it can be called both by +# PMIX_INIT and an external caller (if PMIX_INIT is not invoked). +AC_DEFUN([PMIX_DO_AM_CONDITIONALS],[ + AS_IF([test "$pmix_did_am_conditionals" != "yes"],[ + AM_CONDITIONAL([PMIX_EMBEDDED_MODE], [test "x$pmix_mode" = "xembedded"]) + AM_CONDITIONAL([PMIX_COMPILE_TIMING], [test "$WANT_TIMING" = "1"]) + AM_CONDITIONAL([PMIX_WANT_MUNGE], [test "$pmix_munge_support" = "1"]) + AM_CONDITIONAL([PMIX_WANT_SASL], [test "$pmix_sasl_support" = "1"]) + AM_CONDITIONAL([WANT_DSTORE],[test "x$enable_dstore" != "xno"]) + ]) + pmix_did_am_conditionals=yes +])dnl + diff --git a/opal/mca/pmix/pmix112/pmix/config/pmix_check_attributes.m4 b/opal/mca/pmix/pmix112/pmix/config/pmix_check_attributes.m4 new file mode 100644 index 00000000000..bbafcc2b8b5 --- /dev/null +++ b/opal/mca/pmix/pmix112/pmix/config/pmix_check_attributes.m4 @@ -0,0 +1,539 @@ +# -*- shell-script -*- +# PMIx copyrights: +# Copyright (c) 2013 Intel, Inc. All rights reserved +# +######################### +# +# Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana +# University Research and Technology +# Corporation. All rights reserved. +# Copyright (c) 2004-2005 The University of Tennessee and The University +# of Tennessee Research Foundation. All rights +# reserved. +# Copyright (c) 2004-2010 High Performance Computing Center Stuttgart, +# University of Stuttgart. All rights reserved. +# Copyright (c) 2004-2005 The Regents of the University of California. +# All rights reserved. +# Copyright (c) 2009 Oak Ridge National Labs. All rights reserved. +# Copyright (c) 2010-2015 Cisco Systems, Inc. All rights reserved. +# Copyright (c) 2013 Mellanox Technologies, Inc. +# All rights reserved. +# Copyright (c) 2015 Intel, Inc. All rights reserved. +######################### +# $COPYRIGHT$ +# +# Additional copyrights may follow +# +# $HEADER$ +# + +# +# Search the generated warnings for +# keywords regarding skipping or ignoring certain attributes +# Intel: ignore +# Sun C++: skip +# +AC_DEFUN([_PMIX_ATTRIBUTE_FAIL_SEARCH],[ + AC_REQUIRE([AC_PROG_GREP]) + if test -s conftest.err ; then + # icc uses 'invalid attribute' and 'attribute "__XXX__" ignored' + # Sun 12.1 emits 'warning: attribute parameter "__printf__" is undefined' + for i in invalid ignore skip undefined ; do + $GREP -iq $i conftest.err + if test "$?" = "0" ; then + pmix_cv___attribute__[$1]=0 + break; + fi + done + fi +]) + +# +# Check for one specific attribute by compiling with C +# +# The last argument is for specific CFLAGS, that need to be set +# for the compiler to generate a warning on the cross-check. +# This may need adaption for future compilers / CFLAG-settings. +# +AC_DEFUN([_PMIX_CHECK_SPECIFIC_ATTRIBUTE], [ + AC_MSG_CHECKING([for __attribute__([$1])]) + AC_CACHE_VAL(pmix_cv___attribute__[$1], [ + # + # Try to compile using the C compiler + # + AC_TRY_COMPILE([$2],[], + [ + # + # In case we did succeed: Fine, but was this due to the + # attribute being ignored/skipped? Grep for IgNoRe/skip in conftest.err + # and if found, reset the pmix_cv__attribute__var=0 + # + pmix_cv___attribute__[$1]=1 + _PMIX_ATTRIBUTE_FAIL_SEARCH([$1]) + ], + [pmix_cv___attribute__[$1]=0]) + ]) + + if test "$pmix_cv___attribute__[$1]" = "1" ; then + AC_MSG_RESULT([yes]) + else + AC_MSG_RESULT([no]) + fi +]) + + +# +# Test the availability of __attribute__ and with the help +# of _PMIX_CHECK_SPECIFIC_ATTRIBUTE for the support of +# particular attributes. Compilers, that do not support an +# attribute most often fail with a warning (when the warning +# level is set). +# The compilers output is parsed in _PMIX_ATTRIBUTE_FAIL_SEARCH +# +# To add a new attributes __NAME__ add the +# pmix_cv___attribute__NAME +# add a new check with _PMIX_CHECK_SPECIFIC_ATTRIBUTE (possibly with a cross-check) +# _PMIX_CHECK_SPECIFIC_ATTRIBUTE([name], [int foo (int arg) __attribute__ ((__name__));], [], []) +# and define the corresponding +# AC_DEFINE_UNQUOTED(PMIX_HAVE_ATTRIBUTE_NAME, [$pmix_cv___attribute__NAME], +# [Whether your compiler has __attribute__ NAME or not]) +# and decide on a correct macro (in pmix/include/pmix_config_bottom.h): +# # define __pmix_attribute_NAME(x) __attribute__(__NAME__) +# +# Please use the "__"-notation of the attribute in order not to +# clash with predefined names or macros (e.g. const, which some compilers +# do not like..) +# + + +AC_DEFUN([PMIX_CHECK_ATTRIBUTES], [ + AC_LANG(C) + AC_MSG_CHECKING(for __attribute__) + + AC_CACHE_VAL(pmix_cv___attribute__, [ + AC_TRY_COMPILE( + [#include + /* Check for the longest available __attribute__ (since gcc-2.3) */ + struct foo { + char a; + int x[2] __attribute__ ((__packed__)); + }; + ], + [], + [pmix_cv___attribute__=1], + [pmix_cv___attribute__=0], + ) + + if test "$pmix_cv___attribute__" = "1" ; then + AC_TRY_COMPILE( + [#include + /* Check for the longest available __attribute__ (since gcc-2.3) */ + struct foo { + char a; + int x[2] __attribute__ ((__packed__)); + }; + ], + [], + [pmix_cv___attribute__=1], + [pmix_cv___attribute__=0], + ) + fi + ]) + AC_DEFINE_UNQUOTED(PMIX_HAVE_ATTRIBUTE, [$pmix_cv___attribute__], + [Whether your compiler has __attribute__ or not]) + +# +# Now that we know the compiler support __attribute__ let's check which kind of +# attributed are supported. +# + if test "$pmix_cv___attribute__" = "0" ; then + AC_MSG_RESULT([no]) + pmix_cv___attribute__aligned=0 + pmix_cv___attribute__always_inline=0 + pmix_cv___attribute__cold=0 + pmix_cv___attribute__const=0 + pmix_cv___attribute__deprecated=0 + pmix_cv___attribute__deprecated_argument=0 + pmix_cv___attribute__format=0 + pmix_cv___attribute__format_funcptr=0 + pmix_cv___attribute__hot=0 + pmix_cv___attribute__malloc=0 + pmix_cv___attribute__may_alias=0 + pmix_cv___attribute__no_instrument_function=0 + pmix_cv___attribute__nonnull=0 + pmix_cv___attribute__noreturn=0 + pmix_cv___attribute__noreturn_funcptr=0 + pmix_cv___attribute__packed=0 + pmix_cv___attribute__pure=0 + pmix_cv___attribute__sentinel=0 + pmix_cv___attribute__unused=0 + pmix_cv___attribute__visibility=0 + pmix_cv___attribute__warn_unused_result=0 + pmix_cv___attribute__destructor=0 + else + AC_MSG_RESULT([yes]) + + _PMIX_CHECK_SPECIFIC_ATTRIBUTE([aligned], + [struct foo { char text[4]; } __attribute__ ((__aligned__(8)));], + [], + []) + + # + # Ignored by PGI-6.2.5; -- recognized by output-parser + # + _PMIX_CHECK_SPECIFIC_ATTRIBUTE([always_inline], + [int foo (int arg) __attribute__ ((__always_inline__));], + [], + []) + + _PMIX_CHECK_SPECIFIC_ATTRIBUTE([cold], + [ + int foo(int arg1, int arg2) __attribute__ ((__cold__)); + int foo(int arg1, int arg2) { return arg1 * arg2 + arg1; } + ], + [], + []) + + _PMIX_CHECK_SPECIFIC_ATTRIBUTE([const], + [ + int foo(int arg1, int arg2) __attribute__ ((__const__)); + int foo(int arg1, int arg2) { return arg1 * arg2 + arg1; } + ], + [], + []) + + _PMIX_CHECK_SPECIFIC_ATTRIBUTE([deprecated], + [ + int foo(int arg1, int arg2) __attribute__ ((__deprecated__)); + int foo(int arg1, int arg2) { return arg1 * arg2 + arg1; } + ], + [], + []) + + _PMIX_CHECK_SPECIFIC_ATTRIBUTE([deprecated_argument], + [ + int foo(int arg1, int arg2) __attribute__ ((__deprecated__("compiler allows argument"))); + int foo(int arg1, int arg2) { return arg1 * arg2 + arg1; } + ], + [], + []) + + ATTRIBUTE_CFLAGS= + case "$pmix_c_vendor" in + gnu) + ATTRIBUTE_CFLAGS="-Wall" + ;; + intel) + # we want specifically the warning on format string conversion + ATTRIBUTE_CFLAGS="-we181" + ;; + esac + _PMIX_CHECK_SPECIFIC_ATTRIBUTE([format], + [ + int this_printf (void *my_object, const char *my_format, ...) __attribute__ ((__format__ (__printf__, 2, 3))); + ], + [ + static int usage (int * argument); + extern int this_printf (int arg1, const char *my_format, ...) __attribute__ ((__format__ (__printf__, 2, 3))); + + static int usage (int * argument) { + return this_printf (*argument, "%d", argument); /* This should produce a format warning */ + } + /* The autoconf-generated main-function is int main(), which produces a warning by itself */ + int main(void); + ], + [$ATTRIBUTE_CFLAGS]) + + ATTRIBUTE_CFLAGS= + case "$pmix_c_vendor" in + gnu) + ATTRIBUTE_CFLAGS="-Wall" + ;; + intel) + # we want specifically the warning on format string conversion + ATTRIBUTE_CFLAGS="-we181" + ;; + esac + _PMIX_CHECK_SPECIFIC_ATTRIBUTE([format_funcptr], + [ + int (*this_printf)(void *my_object, const char *my_format, ...) __attribute__ ((__format__ (__printf__, 2, 3))); + ], + [ + static int usage (int * argument); + extern int (*this_printf) (int arg1, const char *my_format, ...) __attribute__ ((__format__ (__printf__, 2, 3))); + + static int usage (int * argument) { + return (*this_printf) (*argument, "%d", argument); /* This should produce a format warning */ + } + /* The autoconf-generated main-function is int main(), which produces a warning by itself */ + int main(void); + ], + [$ATTRIBUTE_CFLAGS]) + + _PMIX_CHECK_SPECIFIC_ATTRIBUTE([hot], + [ + int foo(int arg1, int arg2) __attribute__ ((__hot__)); + int foo(int arg1, int arg2) { return arg1 * arg2 + arg1; } + ], + [], + []) + + _PMIX_CHECK_SPECIFIC_ATTRIBUTE([malloc], + [ +#ifdef HAVE_STDLIB_H +# include +#endif + int * foo(int arg1) __attribute__ ((__malloc__)); + int * foo(int arg1) { return (int*) malloc(arg1); } + ], + [], + []) + + + # + # Attribute may_alias: No suitable cross-check available, that works for non-supporting compilers + # Ignored by intel-9.1.045 -- turn off with -wd1292 + # Ignored by PGI-6.2.5; ignore not detected due to missing cross-check + # + _PMIX_CHECK_SPECIFIC_ATTRIBUTE([may_alias], + [int * p_value __attribute__ ((__may_alias__));], + [], + []) + + + _PMIX_CHECK_SPECIFIC_ATTRIBUTE([no_instrument_function], + [int * foo(int arg1) __attribute__ ((__no_instrument_function__));], + [], + []) + + + # + # Attribute nonnull: + # Ignored by intel-compiler 9.1.045 -- recognized by cross-check + # Ignored by PGI-6.2.5 (pgCC) -- recognized by cross-check + # + ATTRIBUTE_CFLAGS= + case "$pmix_c_vendor" in + gnu) + ATTRIBUTE_CFLAGS="-Wall" + ;; + intel) + # we do not want to get ignored attributes warnings, but rather real warnings + ATTRIBUTE_CFLAGS="-wd1292" + ;; + esac + _PMIX_CHECK_SPECIFIC_ATTRIBUTE([nonnull], + [ + int square(int *arg) __attribute__ ((__nonnull__)); + int square(int *arg) { return *arg; } + ], + [ + static int usage(int * argument); + int square(int * argument) __attribute__ ((__nonnull__)); + int square(int * argument) { return (*argument) * (*argument); } + + static int usage(int * argument) { + return square( ((void*)0) ); /* This should produce an argument must be nonnull warning */ + } + /* The autoconf-generated main-function is int main(), which produces a warning by itself */ + int main(void); + ], + [$ATTRIBUTE_CFLAGS]) + + + _PMIX_CHECK_SPECIFIC_ATTRIBUTE([noreturn], + [ +#ifdef HAVE_UNISTD_H +# include +#endif +#ifdef HAVE_STDLIB_H +# include +#endif + void fatal(int arg1) __attribute__ ((__noreturn__)); + void fatal(int arg1) { exit(arg1); } + ], + [], + []) + + + _PMIX_CHECK_SPECIFIC_ATTRIBUTE([noreturn_funcptr], + [ +#ifdef HAVE_UNISTD_H +# include +#endif +#ifdef HAVE_STDLIB_H +# include +#endif + extern void (*fatal_exit)(int arg1) __attribute__ ((__noreturn__)); + void fatal(int arg1) { fatal_exit (arg1); } + ], + [], + [$ATTRIBUTE_CFLAGS]) + + + _PMIX_CHECK_SPECIFIC_ATTRIBUTE([packed], + [ + struct foo { + char a; + int x[2] __attribute__ ((__packed__)); + }; + ], + [], + []) + + _PMIX_CHECK_SPECIFIC_ATTRIBUTE([pure], + [ + int square(int arg) __attribute__ ((__pure__)); + int square(int arg) { return arg * arg; } + ], + [], + []) + + # + # Attribute sentinel: + # Ignored by the intel-9.1.045 -- recognized by cross-check + # intel-10.0beta works fine + # Ignored by PGI-6.2.5 (pgCC) -- recognized by output-parser and cross-check + # Ignored by pathcc-2.2.1 -- recognized by cross-check (through grep ignore) + # + ATTRIBUTE_CFLAGS= + case "$pmix_c_vendor" in + gnu) + ATTRIBUTE_CFLAGS="-Wall" + ;; + intel) + # we do not want to get ignored attributes warnings + ATTRIBUTE_CFLAGS="-wd1292" + ;; + esac + _PMIX_CHECK_SPECIFIC_ATTRIBUTE([sentinel], + [ + int my_execlp(const char * file, const char *arg, ...) __attribute__ ((__sentinel__)); + ], + [ + static int usage(int * argument); + int my_execlp(const char * file, const char *arg, ...) __attribute__ ((__sentinel__)); + + static int usage(int * argument) { + void * last_arg_should_be_null = argument; + return my_execlp ("lala", "/home/there", last_arg_should_be_null); /* This should produce a warning */ + } + /* The autoconf-generated main-function is int main(), which produces a warning by itself */ + int main(void); + ], + [$ATTRIBUTE_CFLAGS]) + + _PMIX_CHECK_SPECIFIC_ATTRIBUTE([unused], + [ + int square(int arg1 __attribute__ ((__unused__)), int arg2); + int square(int arg1, int arg2) { return arg2; } + ], + [], + []) + + + # + # Ignored by PGI-6.2.5 (pgCC) -- recognized by the output-parser + # + _PMIX_CHECK_SPECIFIC_ATTRIBUTE([visibility], + [ + int square(int arg1) __attribute__ ((__visibility__("hidden"))); + ], + [], + []) + + + # + # Attribute warn_unused_result: + # Ignored by the intel-compiler 9.1.045 -- recognized by cross-check + # Ignored by pathcc-2.2.1 -- recognized by cross-check (through grep ignore) + # + ATTRIBUTE_CFLAGS= + case "$pmix_c_vendor" in + gnu) + ATTRIBUTE_CFLAGS="-Wall" + ;; + intel) + # we do not want to get ignored attributes warnings + ATTRIBUTE_CFLAGS="-wd1292" + ;; + esac + _PMIX_CHECK_SPECIFIC_ATTRIBUTE([warn_unused_result], + [ + int foo(int arg) __attribute__ ((__warn_unused_result__)); + int foo(int arg) { return arg + 3; } + ], + [ + static int usage(int * argument); + int foo(int arg) __attribute__ ((__warn_unused_result__)); + + int foo(int arg) { return arg + 3; } + static int usage(int * argument) { + foo (*argument); /* Should produce an unused result warning */ + return 0; + } + + /* The autoconf-generated main-function is int main(), which produces a warning by itself */ + int main(void); + ], + [$ATTRIBUTE_CFLAGS]) + + + _PMIX_CHECK_SPECIFIC_ATTRIBUTE([destructor], + [ + void foo(void) __attribute__ ((__destructor__)); + void foo(void) { return ; } + ], + [], + []) + fi + + # Now that all the values are set, define them + + AC_DEFINE_UNQUOTED(PMIX_HAVE_ATTRIBUTE_ALIGNED, [$pmix_cv___attribute__aligned], + [Whether your compiler has __attribute__ aligned or not]) + AC_DEFINE_UNQUOTED(PMIX_HAVE_ATTRIBUTE_ALWAYS_INLINE, [$pmix_cv___attribute__always_inline], + [Whether your compiler has __attribute__ always_inline or not]) + AC_DEFINE_UNQUOTED(PMIX_HAVE_ATTRIBUTE_COLD, [$pmix_cv___attribute__cold], + [Whether your compiler has __attribute__ cold or not]) + AC_DEFINE_UNQUOTED(PMIX_HAVE_ATTRIBUTE_CONST, [$pmix_cv___attribute__const], + [Whether your compiler has __attribute__ const or not]) + AC_DEFINE_UNQUOTED(PMIX_HAVE_ATTRIBUTE_DEPRECATED, [$pmix_cv___attribute__deprecated], + [Whether your compiler has __attribute__ deprecated or not]) + AC_DEFINE_UNQUOTED(PMIX_HAVE_ATTRIBUTE_DEPRECATED_ARGUMENT, [$pmix_cv___attribute__deprecated_argument], + [Whether your compiler has __attribute__ deprecated with optional argument]) + AC_DEFINE_UNQUOTED(PMIX_HAVE_ATTRIBUTE_FORMAT, [$pmix_cv___attribute__format], + [Whether your compiler has __attribute__ format or not]) + AC_DEFINE_UNQUOTED(PMIX_HAVE_ATTRIBUTE_FORMAT_FUNCPTR, [$pmix_cv___attribute__format_funcptr], + [Whether your compiler has __attribute__ format and it works on function pointers]) + AC_DEFINE_UNQUOTED(PMIX_HAVE_ATTRIBUTE_HOT, [$pmix_cv___attribute__hot], + [Whether your compiler has __attribute__ hot or not]) + AC_DEFINE_UNQUOTED(PMIX_HAVE_ATTRIBUTE_MALLOC, [$pmix_cv___attribute__malloc], + [Whether your compiler has __attribute__ malloc or not]) + AC_DEFINE_UNQUOTED(PMIX_HAVE_ATTRIBUTE_MAY_ALIAS, [$pmix_cv___attribute__may_alias], + [Whether your compiler has __attribute__ may_alias or not]) + AC_DEFINE_UNQUOTED(PMIX_HAVE_ATTRIBUTE_NO_INSTRUMENT_FUNCTION, [$pmix_cv___attribute__no_instrument_function], + [Whether your compiler has __attribute__ no_instrument_function or not]) + AC_DEFINE_UNQUOTED(PMIX_HAVE_ATTRIBUTE_NONNULL, [$pmix_cv___attribute__nonnull], + [Whether your compiler has __attribute__ nonnull or not]) + AC_DEFINE_UNQUOTED(PMIX_HAVE_ATTRIBUTE_NORETURN, [$pmix_cv___attribute__noreturn], + [Whether your compiler has __attribute__ noreturn or not]) + AC_DEFINE_UNQUOTED(PMIX_HAVE_ATTRIBUTE_NORETURN_FUNCPTR, [$pmix_cv___attribute__noreturn_funcptr], + [Whether your compiler has __attribute__ noreturn and it works on function pointers]) + AC_DEFINE_UNQUOTED(PMIX_HAVE_ATTRIBUTE_PACKED, [$pmix_cv___attribute__packed], + [Whether your compiler has __attribute__ packed or not]) + AC_DEFINE_UNQUOTED(PMIX_HAVE_ATTRIBUTE_PURE, [$pmix_cv___attribute__pure], + [Whether your compiler has __attribute__ pure or not]) + AC_DEFINE_UNQUOTED(PMIX_HAVE_ATTRIBUTE_SENTINEL, [$pmix_cv___attribute__sentinel], + [Whether your compiler has __attribute__ sentinel or not]) + AC_DEFINE_UNQUOTED(PMIX_HAVE_ATTRIBUTE_UNUSED, [$pmix_cv___attribute__unused], + [Whether your compiler has __attribute__ unused or not]) + AC_DEFINE_UNQUOTED(PMIX_HAVE_ATTRIBUTE_VISIBILITY, [$pmix_cv___attribute__visibility], + [Whether your compiler has __attribute__ visibility or not]) + AC_DEFINE_UNQUOTED(PMIX_HAVE_ATTRIBUTE_WARN_UNUSED_RESULT, [$pmix_cv___attribute__warn_unused_result], + [Whether your compiler has __attribute__ warn unused result or not]) + AC_DEFINE_UNQUOTED(PMIX_HAVE_ATTRIBUTE_WEAK_ALIAS, [$pmix_cv___attribute__weak_alias], + [Whether your compiler has __attribute__ weak alias or not]) + AC_DEFINE_UNQUOTED(PMIX_HAVE_ATTRIBUTE_DESTRUCTOR, [$pmix_cv___attribute__destructor], + [Whether your compiler has __attribute__ destructor or not]) +]) diff --git a/opal/mca/pmix/pmix112/pmix/config/pmix_check_broken_qsort.m4 b/opal/mca/pmix/pmix112/pmix/config/pmix_check_broken_qsort.m4 new file mode 100644 index 00000000000..da4d14047e3 --- /dev/null +++ b/opal/mca/pmix/pmix112/pmix/config/pmix_check_broken_qsort.m4 @@ -0,0 +1,55 @@ +dnl -*- shell-script -*- +dnl +dnl Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana +dnl University Research and Technology +dnl Corporation. All rights reserved. +dnl Copyright (c) 2004-2005 The University of Tennessee and The University +dnl of Tennessee Research Foundation. All rights +dnl reserved. +dnl Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +dnl University of Stuttgart. All rights reserved. +dnl Copyright (c) 2004-2005 The Regents of the University of California. +dnl All rights reserved. +dnl Copyright (c) 2007 Sun Microsystems, Inc. All rights reserved. +dnl Copyright (c) 2014-2015 Intel, Inc. All rights reserved. +dnl Copyright (c) 2015 Cisco Systems, Inc. All rights reserved. +dnl $COPYRIGHT$ +dnl +dnl Additional copyrights may follow +dnl +dnl $HEADER$ +dnl +dnl There was some mentioning of broken qsort happened for Solaris that could +dnl cause qsort to return a bad pointer which could cause some badness. +dnl The problem should have been corrected with these patches from SunSolve. +dnl Solaris 10 should be free from this problem. +dnl +dnl 5.8_sparc #108827-27 or later +dnl 5.8_x86 #108828-28 or later +dnl 5.9_sparc #112874-20 or later +dnl 5.9_x86 #114432-07 or later +dnl +dnl For users who could not patch their systems or are convinced that their +dnl native qsort is broken, they could specify this configure flag to use +dnl the pmix_qsort instead. + +# check for broken qsort +# PMIX_CHECK_BROKEN_QSORT(prefix, [action-if-found], [action-if-not-found]) +# -------------------------------------------------------- +AC_DEFUN([PMIX_CHECK_BROKEN_QSORT],[ + AC_ARG_WITH([broken-qsort], + [AC_HELP_STRING([--with-broken-qsort], + [Build with FreeBSD qsort instead of native qsort (default: no)])]) + AC_MSG_CHECKING([for broken qsort]) + + if test "$with_broken_qsort" = "yes"; then + result="yes" + define_result=1 + else + result="no" + define_result=0 + fi + AC_MSG_RESULT([$result]) + AC_DEFINE_UNQUOTED([PMIX_HAVE_BROKEN_QSORT], [$define_result], + [whether qsort is broken or not]) +]) diff --git a/opal/mca/pmix/pmix112/pmix/config/pmix_check_compiler_version.m4 b/opal/mca/pmix/pmix112/pmix/config/pmix_check_compiler_version.m4 new file mode 100644 index 00000000000..78343b9126d --- /dev/null +++ b/opal/mca/pmix/pmix112/pmix/config/pmix_check_compiler_version.m4 @@ -0,0 +1,90 @@ +dnl -*- shell-script -*- +dnl +dnl Copyright (c) 2009 Oak Ridge National Labs. All rights reserved. +dnl Copyright (c) 2013-2017 Intel, Inc. All rights reserved. +dnl +dnl $COPYRIGHT$ +dnl +dnl Additional copyrights may follow +dnl +dnl $HEADER$ +dnl + + +# PMIX_CHECK_COMPILER_VERSION_ID() +# ---------------------------------------------------- +# Try to figure out the compiler's name and version to detect cases, +# where users compile PMIx with one version and compile the application +# with a different compiler. +# +AC_DEFUN([PMIX_CHECK_COMPILER_VERSION_ID], +[ + PMIX_CHECK_COMPILER(FAMILYID) + PMIX_CHECK_COMPILER_STRINGIFY(FAMILYNAME) + PMIX_CHECK_COMPILER(VERSION) + PMIX_CHECK_COMPILER_STRINGIFY(VERSION_STR) +])dnl + + +AC_DEFUN([PMIX_CHECK_COMPILER], [ + lower=m4_tolower($1) + AC_CACHE_CHECK([for compiler $lower], pmix_cv_compiler_[$1], + [ + CPPFLAGS_orig=$CPPFLAGS + CPPFLAGS="-I${top_pmix_srcdir}/src/include $CPPFLAGS" + AC_TRY_RUN([ +#include +#include + +int main (int argc, char * argv[]) +{ + FILE * f; + f=fopen("conftestval", "w"); + if (!f) exit(1); + fprintf (f, "%d", PLATFORM_COMPILER_$1); + return 0; +} + ], [ + eval pmix_cv_compiler_$1=`cat conftestval`; + ], [ + eval pmix_cv_compiler_$1=0 + ], [ + eval pmix_cv_compiler_$1=0 + ]) + CPPFLAGS=$CPPFLAGS_orig + ]) + AC_DEFINE_UNQUOTED([PMIX_BUILD_PLATFORM_COMPILER_$1], $pmix_cv_compiler_[$1], + [The compiler $lower which PMIx was built with]) +])dnl + + +AC_DEFUN([PMIX_CHECK_COMPILER_STRINGIFY], [ + lower=m4_tolower($1) + AC_CACHE_CHECK([for compiler $lower], pmix_cv_compiler_[$1], + [ + CPPFLAGS_orig=$CPPFLAGS + CPPFLAGS="-I${top_pmix_srcdir}/src/include $CPPFLAGS" + AC_TRY_RUN([ +#include +#include + +int main (int argc, char * argv[]) +{ + FILE * f; + f=fopen("conftestval", "w"); + if (!f) exit(1); + fprintf (f, "%s", _STRINGIFY(PLATFORM_COMPILER_$1)); + return 0; +} + ], [ + eval pmix_cv_compiler_$1=`cat conftestval`; + ], [ + eval pmix_cv_compiler_$1=UNKNOWN + ], [ + eval pmix_cv_compiler_$1=UNKNOWN + ]) + CPPFLAGS=$CPPFLAGS_orig + ]) + AC_DEFINE_UNQUOTED([PMIX_BUILD_PLATFORM_COMPILER_$1], $pmix_cv_compiler_[$1], + [The compiler $lower which PMIX was built with]) +])dnl diff --git a/opal/mca/pmix/pmix112/pmix/config/pmix_check_icc.m4 b/opal/mca/pmix/pmix112/pmix/config/pmix_check_icc.m4 new file mode 100644 index 00000000000..e8a06b25148 --- /dev/null +++ b/opal/mca/pmix/pmix112/pmix/config/pmix_check_icc.m4 @@ -0,0 +1,62 @@ +dnl -*- shell-script -*- +dnl +dnl Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana +dnl University Research and Technology +dnl Corporation. All rights reserved. +dnl Copyright (c) 2004-2005 The University of Tennessee and The University +dnl of Tennessee Research Foundation. All rights +dnl reserved. +dnl Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +dnl University of Stuttgart. All rights reserved. +dnl Copyright (c) 2004-2005 The Regents of the University of California. +dnl All rights reserved. +dnl Copyright (c) 2014 Intel, Inc. All rights reserved. +dnl Copyright (c) 2016 Research Organization for Information Science +dnl and Technology (RIST). All rights reserved. +dnl $COPYRIGHT$ +dnl +dnl Additional copyrights may follow +dnl +dnl $HEADER$ +dnl + +AC_DEFUN([PMIX_CHECK_ICC_VARARGS],[ +dnl +dnl On EM64T, icc-8.1 before version 8.1.027 segfaulted, since +dnl va_start was miscompiled... +dnl +AC_MSG_CHECKING([whether icc-8.1 for EM64T works with variable arguments]) +AC_TRY_RUN([ +#include +#include +#include + +void func (int c, char * f, ...) +{ + va_list arglist; + va_start (arglist, f); + /* vprintf (f, arglist); */ + va_end (arglist); +} + +int main () +{ + FILE *f; + func (4711, "Help %d [%s]\n", 10, "ten"); + f=fopen ("conftestval", "w"); + if (!f) exit (1); + return 0; +} + +],[pmix_ac_icc_varargs=`test -f conftestval`],[pmix_ac_icc_varargs=1],[pmix_ac_icc_varargs=1]) + +if test "$pmix_ac_icc_varargs" = "1"; then + AC_MSG_WARN([*** Problem running configure test!]) + AC_MSG_WARN([*** Your icc-8.1 compiler seems to miscompile va_start!]) + AC_MSG_WARN([*** Please upgrade compiler to at least version 8.1.027]) + AC_MSG_ERROR([*** Cannot continue.]) +fi + +AC_MSG_RESULT([yes]) + +rm -rf conftest*])dnl diff --git a/opal/mca/pmix/pmix112/pmix/config/pmix_check_ident.m4 b/opal/mca/pmix/pmix112/pmix/config/pmix_check_ident.m4 new file mode 100644 index 00000000000..de2fa573bc9 --- /dev/null +++ b/opal/mca/pmix/pmix112/pmix/config/pmix_check_ident.m4 @@ -0,0 +1,103 @@ +dnl -*- shell-script -*- +dnl +dnl Copyright (c) 2007 Sun Microsystems, Inc. All rights reserved. +dnl Copyright (c) 2015 Intel, Inc. All rights reserved +dnl $COPYRIGHT$ +dnl +dnl Additional copyrights may follow +dnl +dnl $HEADER$ +dnl +dnl defines: +dnl PMIX_$1_USE_PRAGMA_IDENT +dnl PMIX_$1_USE_IDENT +dnl PMIX_$1_USE_CONST_CHAR_IDENT +dnl + +# PMIX_CHECK_IDENT(compiler-env, compiler-flags, +# file-suffix, lang) Try to compile a source file containing +# a #pragma ident, and determine whether the ident was +# inserted into the resulting object file +# ----------------------------------------------------------- +AC_DEFUN([PMIX_CHECK_IDENT], [ + AC_MSG_CHECKING([for $4 ident string support]) + + pmix_pragma_ident_happy=0 + pmix_ident_happy=0 + pmix_static_const_char_happy=0 + _PMIX_CHECK_IDENT( + [$1], [$2], [$3], + [[#]pragma ident], [], + [pmix_pragma_ident_happy=1 + pmix_message="[#]pragma ident"], + _PMIX_CHECK_IDENT( + [$1], [$2], [$3], + [[#]ident], [], + [pmix_ident_happy=1 + pmix_message="[#]ident"], + _PMIX_CHECK_IDENT( + [$1], [$2], [$3], + [[#]pragma comment(exestr, ], [)], + [pmix_pragma_comment_happy=1 + pmix_message="[#]pragma comment"], + [pmix_static_const_char_happy=1 + pmix_message="static const char[[]]"]))) + + AC_DEFINE_UNQUOTED([PMIX_$1_USE_PRAGMA_IDENT], + [$pmix_pragma_ident_happy], [Use #pragma ident strings for $4 files]) + AC_DEFINE_UNQUOTED([PMIX_$1_USE_IDENT], + [$pmix_ident_happy], [Use #ident strings for $4 files]) + AC_DEFINE_UNQUOTED([PMIX_$1_USE_PRAGMA_COMMENT], + [$pmix_pragma_comment_happy], [Use #pragma comment for $4 files]) + AC_DEFINE_UNQUOTED([PMIX_$1_USE_CONST_CHAR_IDENT], + [$pmix_static_const_char_happy], [Use static const char[] strings for $4 files]) + + AC_MSG_RESULT([$pmix_message]) + + unset pmix_pragma_ident_happy pmix_ident_happy pmix_static_const_char_happy pmix_message +]) + +# _PMIX_CHECK_IDENT(compiler-env, compiler-flags, +# file-suffix, header_prefix, header_suffix, action-if-success, action-if-fail) +# Try to compile a source file containing a #-style ident, +# and determine whether the ident was inserted into the +# resulting object file +# ----------------------------------------------------------- +AC_DEFUN([_PMIX_CHECK_IDENT], [ + eval pmix_compiler="\$$1" + eval pmix_flags="\$$2" + + pmix_ident="string_not_coincidentally_inserted_by_the_compiler" + cat > conftest.$3 <&5 + pmix_output=`$pmix_compiler $pmix_flags -c conftest.$3 -o conftest.${OBJEXT} 2>&1 1>/dev/null` + pmix_status=$? + AS_IF([test $pmix_status = 0], + [test -z "$pmix_output" + pmix_status=$?]) + PMIX_LOG_MSG([\$? = $pmix_status], 1) + AS_IF([test $pmix_status = 0 && test -f conftest.${OBJEXT}], + [pmix_output="`strings -a conftest.${OBJEXT} | grep $pmix_ident`" + grep $pmix_ident conftest.${OBJEXT} 2>&1 1>/dev/null + pmix_status=$? + AS_IF([test "$pmix_output" != "" || test "$pmix_status" = "0"], + [$6], + [$7])], + [PMIX_LOG_MSG([the failed program was:]) + PMIX_LOG_FILE([conftest.$3]) + $7]) + + unset pmix_compiler pmix_flags pmix_output pmix_status + rm -rf conftest.* conftest${EXEEXT} +])dnl diff --git a/opal/mca/pmix/pmix112/pmix/config/pmix_check_lock.m4 b/opal/mca/pmix/pmix112/pmix/config/pmix_check_lock.m4 new file mode 100644 index 00000000000..7655b12979c --- /dev/null +++ b/opal/mca/pmix/pmix112/pmix/config/pmix_check_lock.m4 @@ -0,0 +1,60 @@ +dnl -*- shell-script -*- +dnl +dnl Copyright (c) 2017 Mellanox Technologies, Inc. +dnl All rights reserved. +dnl Copyright (c) 2017 IBM Corporation. All rights reserved. +dnl Copyright (c) 2017 Research Organization for Information Science +dnl and Technology (RIST). All rights reserved. +dnl $COPYRIGHT$ +dnl +dnl Additional copyrights may follow +dnl +dnl $HEADER$ +dnl + +AC_DEFUN([PMIX_CHECK_DSTOR_LOCK],[ + orig_libs=$LIBS + LIBS="-lpthread $LIBS" + + _x_ac_pthread_lock_found="0" + _x_ac_fcntl_lock_found="0" + + AC_CHECK_MEMBERS([struct flock.l_type], + [ + AC_DEFINE([HAVE_FCNTL_FLOCK], [1], + [Define to 1 if you have the locking by fcntl.]) + _x_ac_fcntl_lock_found="1" + ], [], [#include ]) + + if test "$DSTORE_PTHREAD_LOCK" == "1"; then + AC_CHECK_FUNC([pthread_rwlockattr_setkind_np], + [AC_EGREP_HEADER([PTHREAD_RWLOCK_PREFER_WRITER_NONRECURSIVE_NP], + [pthread.h],[ + AC_DEFINE([HAVE_PTHREAD_SETKIND], [1], + [Define to 1 if you have the `pthread_rwlockattr_setkind_np` function.])])]) + + AC_CHECK_FUNC([pthread_rwlockattr_setpshared], + [AC_EGREP_HEADER([PTHREAD_PROCESS_SHARED], + [pthread.h],[ + AC_DEFINE([HAVE_PTHREAD_SHARED], [1], + [Define to 1 if you have the `PTHREAD_PROCESS_SHARED` definition. + ]) + _x_ac_pthread_lock_found="1" + ]) + ]) + + if test "$_x_ac_pthread_lock_found" == "0"; then + if test "$_x_ac_fcntl_lock_found" == "1"; then + AC_MSG_WARN([dstore: pthread-based locking not found, will use fcntl-based locking.]) + else + AC_MSG_ERROR([dstore: no available locking mechanisms was found. Can not continue. Try disabling dstore]) + fi + fi + else + if test "$_x_ac_fcntl_lock_found" == "0"; then + AC_MSG_ERROR([dstore: no available locking mechanisms was found. Can not continue. Try disabling dstore]) + fi + LIBS="$orig_libs" + fi + +]) diff --git a/opal/mca/pmix/pmix112/pmix/config/pmix_check_munge.m4 b/opal/mca/pmix/pmix112/pmix/config/pmix_check_munge.m4 new file mode 100644 index 00000000000..19b1ab4b1db --- /dev/null +++ b/opal/mca/pmix/pmix112/pmix/config/pmix_check_munge.m4 @@ -0,0 +1,83 @@ +# -*- shell-script -*- +# +# Copyright (c) 2015-2016 Intel, Inc. All rights reserved +# Copyright (c) 2015 Cisco Systems, Inc. All rights reserved. +# $COPYRIGHT$ +# +# Additional copyrights may follow +# +# $HEADER$ +# + +# MCA_munge_CONFIG([action-if-found], [action-if-not-found]) +# -------------------------------------------------------------------- +AC_DEFUN([PMIX_MUNGE_CONFIG],[ + + PMIX_VAR_SCOPE_PUSH([pmix_munge_dir pmix_munge_libdir]) + + AC_ARG_WITH([munge], + [AC_HELP_STRING([--with-munge=DIR], + [Search for munge headers and libraries in DIR ])]) + + AC_ARG_WITH([munge-libdir], + [AC_HELP_STRING([--with-munge-libdir=DIR], + [Search for munge libraries in DIR ])]) + + pmix_munge_support=0 + if test ! -z "$with_munge" && test "$with_munge" != "no"; then + AC_MSG_CHECKING([for munge in]) + if test "$with_munge" != "yes"; then + if test -d $with_munge/include/munge; then + pmix_munge_dir=$with_munge/include/munge + else + pmix_munge_dir=$with_munge + fi + if test -d $with_munge/lib; then + pmix_munge_libdir=$with_munge/lib + elif test -d $with_munge/lib64; then + pmix_munge_libdir=$with_munge/lib64 + else + AC_MSG_RESULT([Could not find $with_munge/lib or $with_munge/lib64]) + AC_MSG_ERROR([Can not continue]) + fi + AC_MSG_RESULT([$pmix_munge_dir and $pmix_munge_libdir]) + else + AC_MSG_RESULT([(default search paths)]) + pmix_munge_dir= + fi + AS_IF([test ! -z "$with_munge_libdir" && test "$with_munge_libdir" != "yes"], + [pmix_munge_libdir="$with_munge_libdir"]) + + PMIX_CHECK_PACKAGE([pmix_munge], + [munge.h], + [munge], + [munge_encode], + [-lmunge], + [$pmix_munge_dir], + [$pmix_munge_libdir], + [pmix_munge_support=1], + [pmix_munge_support=0]) + if test $pmix_munge_support == "1"; then + CPPFLAGS="$pmix_munge_CPPFLAGS $CPPFLAGS" + LIBS="$LIBS -lmunge" + LDFLAGS="$pmix_munge_LDFLAGS $LDFLAGS" + fi + fi + + if test ! -z "$with_munge" && test "$with_munge" != "no" && test "$pmix_munge_support" != "1"; then + AC_MSG_WARN([MUNGE SUPPORT REQUESTED AND NOT FOUND.]) + AC_MSG_ERROR([CANNOT CONTINUE]) + fi + + AC_MSG_CHECKING([will munge support be built]) + if test "$pmix_munge_support" != "1"; then + AC_MSG_RESULT([no]) + else + AC_MSG_RESULT([yes]) + fi + + AC_DEFINE_UNQUOTED([PMIX_WANT_MUNGE], [$pmix_munge_support], + [Whether we want munge support or not]) + + PMIX_VAR_SCOPE_POP +])dnl diff --git a/opal/mca/pmix/pmix112/pmix/config/pmix_check_package.m4 b/opal/mca/pmix/pmix112/pmix/config/pmix_check_package.m4 new file mode 100644 index 00000000000..f4833c3b0ae --- /dev/null +++ b/opal/mca/pmix/pmix112/pmix/config/pmix_check_package.m4 @@ -0,0 +1,176 @@ +# -*- shell-script -*- +# +# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana +# University Research and Technology +# Corporation. All rights reserved. +# Copyright (c) 2004-2005 The University of Tennessee and The University +# of Tennessee Research Foundation. All rights +# reserved. +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# University of Stuttgart. All rights reserved. +# Copyright (c) 2004-2005 The Regents of the University of California. +# All rights reserved. +# Copyright (c) 2012-2015 Cisco Systems, Inc. All rights reserved. +# Copyright (c) 2012 Oracle and/or its affiliates. All rights reserved. +# Copyright (c) 2014 Intel, Inc. All rights reserved. +# $COPYRIGHT$ +# +# Additional copyrights may follow +# +# $HEADER$ +# + +# _PMIX_CHECK_PACKAGE_HEADER(prefix, header, dir-prefix, +# [action-if-found], [action-if-not-found], +# includes) +# -------------------------------------------------------------------- +AC_DEFUN([_PMIX_CHECK_PACKAGE_HEADER], [ + # This is stolen from autoconf to peek under the covers to get the + # cache variable for the library check. one should not copy this + # code into other places unless you want much pain and suffering + AS_VAR_PUSHDEF([pmix_Header], [ac_cv_header_$2]) + + # so this sucks, but there's no way to get through the progression + # of header includes without killing off the cache variable and trying + # again... + unset pmix_Header + + pmix_check_package_header_happy="no" + AS_IF([test "$3" = "/usr" || test "$3" = "/usr/local"], + [ # try as is... + AC_VERBOSE([looking for header without includes]) + AC_CHECK_HEADERS([$2], [pmix_check_package_header_happy="yes"], []) + AS_IF([test "$pmix_check_package_header_happy" = "no"], + [# no go on the as is - reset the cache and try again + unset pmix_Header])]) + + AS_IF([test "$pmix_check_package_header_happy" = "no"], + [AS_IF([test "$3" != ""], + [$1_CPPFLAGS="$$1_CPPFLAGS -I$3/include" + CPPFLAGS="$CPPFLAGS -I$3/include"]) + AC_CHECK_HEADERS([$2], [pmix_check_package_header_happy="yes"], [], [$6]) + AS_IF([test "$pmix_check_package_header_happy" = "yes"], [$4], [$5])], + [$4]) + unset pmix_check_package_header_happy + + AS_VAR_POPDEF([pmix_Header])dnl +]) + + +# _PMIX_CHECK_PACKAGE_LIB(prefix, library, function, extra-libraries, +# dir-prefix, libdir, +# [action-if-found], [action-if-not-found]]) +# -------------------------------------------------------------------- +AC_DEFUN([_PMIX_CHECK_PACKAGE_LIB], [ + # This is stolen from autoconf to peek under the covers to get the + # cache variable for the library check. one should not copy this + # code into other places unless you want much pain and suffering + AS_LITERAL_IF([$2], + [AS_VAR_PUSHDEF([pmix_Lib], [ac_cv_lib_$2_$3])], + [AS_VAR_PUSHDEF([pmix_Lib], [ac_cv_lib_$2''_$3])])dnl + + # see comment above + unset pmix_Lib + pmix_check_package_lib_happy="no" + AS_IF([test "$6" != ""], + [ # libdir was specified - search only there + $1_LDFLAGS="$$1_LDFLAGS -L$6" + LDFLAGS="$LDFLAGS -L$6" + AC_CHECK_LIB([$2], [$3], + [pmix_check_package_lib_happy="yes"], + [pmix_check_package_lib_happy="no"], [$4]) + AS_IF([test "$pmix_check_package_lib_happy" = "no"], + [LDFLAGS="$pmix_check_package_$1_save_LDFLAGS" + $1_LDFLAGS="$pmix_check_package_$1_orig_LDFLAGS" + unset pmix_Lib])], + [ # libdir was not specified - go through search path + pmix_check_package_libdir="$5" + AS_IF([test "$pmix_check_package_libdir" = "" || test "$pmix_check_package_libdir" = "/usr" || test "$pmix_check_package_libdir" = "/usr/local"], + [ # try as is... + AC_VERBOSE([looking for library without search path]) + AC_CHECK_LIB([$2], [$3], + [pmix_check_package_lib_happy="yes"], + [pmix_check_package_lib_happy="no"], [$4]) + AS_IF([test "$pmix_check_package_lib_happy" = "no"], + [ # no go on the as is.. see what happens later... + LDFLAGS="$pmix_check_package_$1_save_LDFLAGS" + $1_LDFLAGS="$pmix_check_package_$1_orig_LDFLAGS" + unset pmix_Lib])]) + + AS_IF([test "$pmix_check_package_lib_happy" = "no"], + [AS_IF([test "$pmix_check_package_libdir" != ""], + [$1_LDFLAGS="$$1_LDFLAGS -L$pmix_check_package_libdir/lib" + LDFLAGS="$LDFLAGS -L$pmix_check_package_libdir/lib" + AC_VERBOSE([looking for library in lib]) + AC_CHECK_LIB([$2], [$3], + [pmix_check_package_lib_happy="yes"], + [pmix_check_package_lib_happy="no"], [$4]) + AS_IF([test "$pmix_check_package_lib_happy" = "no"], + [ # no go on the as is.. see what happens later... + LDFLAGS="$pmix_check_package_$1_save_LDFLAGS" + $1_LDFLAGS="$pmix_check_package_$1_orig_LDFLAGS" + unset pmix_Lib])])]) + + AS_IF([test "$pmix_check_package_lib_happy" = "no"], + [AS_IF([test "$pmix_check_package_libdir" != ""], + [$1_LDFLAGS="$$1_LDFLAGS -L$pmix_check_package_libdir/lib64" + LDFLAGS="$LDFLAGS -L$pmix_check_package_libdir/lib64" + AC_VERBOSE([looking for library in lib64]) + AC_CHECK_LIB([$2], [$3], + [pmix_check_package_lib_happy="yes"], + [pmix_check_package_lib_happy="no"], [$4]) + AS_IF([test "$pmix_check_package_lib_happy" = "no"], + [ # no go on the as is.. see what happens later... + LDFLAGS="$pmix_check_package_$1_save_LDFLAGS" + $1_LDFLAGS="$pmix_check_package_$1_orig_LDFLAGS" + unset pmix_Lib])])])]) + + AS_IF([test "$pmix_check_package_lib_happy" = "yes"], + [$1_LIBS="-l$2 $4" + $7], [$8]) + + AS_VAR_POPDEF([pmix_Lib])dnl +]) + + +# PMIX_CHECK_PACKAGE(prefix, +# header, +# library, +# function, +# extra-libraries, +# dir-prefix, +# libdir-prefix, +# [action-if-found], [action-if-not-found], +# includes) +# ----------------------------------------------------------- +# check for package defined by header and libs, and probably +# located in dir-prefix, possibly with libs in libdir-prefix. +# Both dir-prefix and libdir-prefix can be empty. Will set +# prefix_{CPPFLAGS, LDFLAGS, LIBS} as needed +AC_DEFUN([PMIX_CHECK_PACKAGE],[ + pmix_check_package_$1_save_CPPFLAGS="$CPPFLAGS" + pmix_check_package_$1_save_LDFLAGS="$LDFLAGS" + pmix_check_package_$1_save_LIBS="$LIBS" + + pmix_check_package_$1_orig_CPPFLAGS="$$1_CPPFLAGS" + pmix_check_package_$1_orig_LDFLAGS="$$1_LDFLAGS" + pmix_check_package_$1_orig_LIBS="$$1_LIBS" + + _PMIX_CHECK_PACKAGE_HEADER([$1], [$2], [$6], + [_PMIX_CHECK_PACKAGE_LIB([$1], [$3], [$4], [$5], [$6], [$7], + [pmix_check_package_happy="yes"], + [pmix_check_package_happy="no"])], + [pmix_check_package_happy="no"], + [$10]) + + AS_IF([test "$pmix_check_package_happy" = "yes"], + [$8], + [$1_CPPFLAGS="$pmix_check_package_$1_orig_CPPFLAGS" + $1_LDFLAGS="$pmix_check_package_$1_orig_LDFLAGS" + $1_LIBS="$pmix_check_package_$1_orig_LIBS" + $9]) + + CPPFLAGS="$pmix_check_package_$1_save_CPPFLAGS" + LDFLAGS="$pmix_check_package_$1_save_LDFLAGS" + LIBS="$pmix_check_package_$1_save_LIBS" +]) diff --git a/opal/mca/pmix/pmix112/pmix/config/pmix_check_sasl.m4 b/opal/mca/pmix/pmix112/pmix/config/pmix_check_sasl.m4 new file mode 100644 index 00000000000..2010637c6a6 --- /dev/null +++ b/opal/mca/pmix/pmix112/pmix/config/pmix_check_sasl.m4 @@ -0,0 +1,82 @@ +# -*- shell-script -*- +# +# Copyright (c) 2015 Intel, Inc. All rights reserved +# Copyright (c) 2015 Cisco Systems, Inc. All rights reserved. +# Copyright (c) 2015 Research Organization for Information Science +# and Technology (RIST). All rights reserved. +# $COPYRIGHT$ +# +# Additional copyrights may follow +# +# $HEADER$ +# + +# MCA_sasl_CONFIG([action-if-found], [action-if-not-found]) +# -------------------------------------------------------------------- +AC_DEFUN([PMIX_SASL_CONFIG],[ + + PMIX_VAR_SCOPE_PUSH([pmix_sasl_dir pmix_sasl_libdir]) + + AC_ARG_WITH([sasl], + [AC_HELP_STRING([--with-sasl=DIR], + [Search for sasl headers and libraries in DIR ])], + [], [with_sasl=no]) + + AC_ARG_WITH([sasl-libdir], + [AC_HELP_STRING([--with-sasl-libdir=DIR], + [Search for sasl libraries in DIR ])]) + + pmix_sasl_support=0 + if test ! -z "$with_sasl" && test "$with_sasl" != "no"; then + AC_MSG_CHECKING([for sasl in]) + if test "$with_sasl" != "yes"; then + pmix_sasl_dir=$with_sasl/include/sasl + if test -d $with_sasl/lib; then + pmix_sasl_libdir=$with_sasl/lib + elif test -d $with_sasl/lib64; then + pmix_sasl_libdir=$with_sasl/lib64 + else + AC_MSG_RESULT([Could not find $with_sasl/lib or $with_sasl/lib64]) + AC_MSG_ERROR([Can not continue]) + fi + AC_MSG_RESULT([$pmix_sasl_dir and $pmix_sasl_libdir]) + else + AC_MSG_RESULT([(default search paths)]) + pmix_sasl_dir= + fi + AS_IF([test ! -z "$with_sasl_libdir" && test "$with_sasl_libdir" != "yes"], + [pmix_sasl_libdir="$with_sasl_libdir"]) + + PMIX_CHECK_PACKAGE([pmix_sasl], + [sasl/sasl.h], + [sasl2], + [sasl_server_init], + [-lsasl2], + [$pmix_sasl_dir], + [$pmix_sasl_libdir], + [pmix_sasl_support=1], + [pmix_sasl_support=0]) + if test $pmix_sasl_support == "1"; then + CPPFLAGS="$pmix_sasl_CPPFLAGS $CPPFLAGS" + LIBS="$LIBS -lsasl2" + LDFLAGS="$pmix_sasl_LDFLAGS $LDFLAGS" + fi + fi + + if test ! -z "$with_sasl" && test "$with_sasl" != "no" && test "$pmix_sasl_support" != "1"; then + AC_MSG_WARN([SASL SUPPORT REQUESTED AND NOT FOUND.]) + AC_MSG_ERROR([CANNOT CONTINUE]) + fi + + AC_MSG_CHECKING([will sasl support be built]) + if test "$pmix_sasl_support" != "1"; then + AC_MSG_RESULT([no]) + else + AC_MSG_RESULT([yes]) + fi + + AC_DEFINE_UNQUOTED(PMIX_HAVE_SASL, [$pmix_sasl_support], + [Whether we have sasl support or not]) + + PMIX_VAR_SCOPE_POP +])dnl diff --git a/opal/mca/pmix/pmix112/pmix/config/pmix_check_vendor.m4 b/opal/mca/pmix/pmix112/pmix/config/pmix_check_vendor.m4 new file mode 100644 index 00000000000..ba3f1a5a8d8 --- /dev/null +++ b/opal/mca/pmix/pmix112/pmix/config/pmix_check_vendor.m4 @@ -0,0 +1,252 @@ +dnl -*- shell-script -*- +dnl +dnl Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana +dnl University Research and Technology +dnl Corporation. All rights reserved. +dnl Copyright (c) 2004-2005 The University of Tennessee and The University +dnl of Tennessee Research Foundation. All rights +dnl reserved. +dnl Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +dnl University of Stuttgart. All rights reserved. +dnl Copyright (c) 2004-2005 The Regents of the University of California. +dnl All rights reserved. +dnl Copyright (c) 2012 Oracle and/or its affiliates. All rights reserved. +dnl Copyright (c) 2013 Intel, Inc. All rights reserved +dnl Copyright (c) 2015 Cisco Systems, Inc. All rights reserved. +dnl Copyright (c) 2015 Research Organization for Information Science +dnl and Technology (RIST). All rights reserved. +dnl $COPYRIGHT$ +dnl +dnl Additional copyrights may follow +dnl +dnl $HEADER$ +dnl + + +# PMIX_C_COMPILER_VENDOR(VENDOR_VARIABLE) +# --------------------------------------- +# Set shell variable VENDOR_VARIABLE to the name of the compiler +# vendor for the current C compiler. +# +# See comment for _PMIX_CHECK_COMPILER_VENDOR for a complete +# list of currently detected compilers. +AC_DEFUN([PMIX_C_COMPILER_VENDOR], [ + AC_REQUIRE([AC_PROG_CC]) + + AC_CACHE_CHECK([for the C compiler vendor], + [pmix_cv_c_compiler_vendor], + [AC_LANG_PUSH(C) + _PMIX_CHECK_COMPILER_VENDOR([pmix_cv_c_compiler_vendor]) + AC_LANG_POP(C)]) + + $1="$pmix_cv_c_compiler_vendor" +]) + + +# workaround to avoid syntax error with Autoconf < 2.68: +m4_ifndef([AC_LANG_DEFINES_PROVIDED], + [m4_define([AC_LANG_DEFINES_PROVIDED])]) + +# PMIX_IFDEF_IFELSE(symbol, [action-if-defined], +# [action-if-not-defined]) +# ---------------------------------------------- +# Run compiler to determine if preprocessor symbol "symbol" is +# defined by the compiler. +AC_DEFUN([PMIX_IFDEF_IFELSE], [ + AC_COMPILE_IFELSE([AC_LANG_DEFINES_PROVIDED +#ifndef $1 +#error "symbol $1 not defined" +choke me +#endif], [$2], [$3])]) + + +# PMIX_IF_IFELSE(symbol, [action-if-defined], +# [action-if-not-defined]) +# ---------------------------------------------- +# Run compiler to determine if preprocessor symbol "symbol" is +# defined by the compiler. +AC_DEFUN([PMIX_IF_IFELSE], [ + AC_COMPILE_IFELSE([AC_LANG_DEFINES_PROVIDED +#if !( $1 ) +#error "condition $1 not met" +choke me +#endif], [$2], [$3])]) + + +# _PMIX_CHECK_COMPILER_VENDOR(VENDOR_VARIABLE) +# -------------------------------------------- +# Set shell variable VENDOR_VARIABLE to the name of the compiler +# vendor for the compiler for the current language. Language must be +# one of C, OBJC, or C++. +# +# thanks to http://predef.sourceforge.net/precomp.html for the list +# of defines to check. +AC_DEFUN([_PMIX_CHECK_COMPILER_VENDOR], [ + pmix_check_compiler_vendor_result="unknown" + + # GNU is probably the most common, so check that one as soon as + # possible. Intel pretends to be GNU, so need to check Intel + # before checking for GNU. + + # Intel + AS_IF([test "$pmix_check_compiler_vendor_result" = "unknown"], + [PMIX_IF_IFELSE([defined(__INTEL_COMPILER) || defined(__ICC)], + [pmix_check_compiler_vendor_result="intel"])]) + + # Fujitsu + AS_IF([test "$pmix_check_compiler_vendor_result" = "unknown"], + [PMIX_IF_IFELSE([defined(__FUJITSU)], + [pmix_check_compiler_vendor_result="fujitsu"])]) + + # GNU + AS_IF([test "$pmix_check_compiler_vendor_result" = "unknown"], + [PMIX_IFDEF_IFELSE([__GNUC__], + [pmix_check_compiler_vendor_result="gnu" + + # We do not support gccfss as a compiler so die if + # someone tries to use said compiler. gccfss (gcc + # for SPARC Systems) is a compiler that is no longer + # supported by Oracle and it has some major flaws + # that prevents it from actually compiling PMIX code. + # So if we detect it we automatically bail. + + if ($CC --version | grep gccfss) >/dev/null 2>&1; then + AC_MSG_RESULT([gccfss]) + AC_MSG_WARN([Detected gccfss being used to compile PMIx.]) + AC_MSG_WARN([Because of several issues PMIx does not support]) + AC_MSG_WARN([the gccfss compiler. Please use a different compiler.]) + AC_MSG_WARN([If you didn't think you used gccfss you may want to]) + AC_MSG_WARN([check to see if the compiler you think you used is]) + AC_MSG_WARN([actually a link to gccfss.]) + AC_MSG_ERROR([Cannot continue]) + fi])]) + + # Borland Turbo C + AS_IF([test "$pmix_check_compiler_vendor_result" = "unknown"], + [PMIX_IFDEF_IFELSE([__TURBOC__], + [pmix_check_compiler_vendor_result="borland"])]) + + # Borland C++ + AS_IF([test "$pmix_check_compiler_vendor_result" = "unknown"], + [PMIX_IFDEF_IFELSE([__BORLANDC__], + [pmix_check_compiler_vendor_result="borland"])]) + + # Comeau C++ + AS_IF([test "$pmix_check_compiler_vendor_result" = "unknown"], + [PMIX_IFDEF_IFELSE([__COMO__], + [pmix_check_compiler_vendor_result="comeau"])]) + + # Compaq C/C++ + AS_IF([test "$pmix_check_compiler_vendor_result" = "unknown"], + [PMIX_IF_IFELSE([defined(__DECC) || defined(VAXC) || defined(__VAXC)], + [pmix_check_compiler_vendor_result="compaq"], + [PMIX_IF_IFELSE([defined(__osf__) && defined(__LANGUAGE_C__)], + [pmix_check_compiler_vendor_result="compaq"], + [PMIX_IFDEF_IFELSE([__DECCXX], + [pmix_check_compiler_vendor_result="compaq"])])])]) + + # Cray C/C++ + AS_IF([test "$pmix_check_compiler_vendor_result" = "unknown"], + [PMIX_IFDEF_IFELSE([_CRAYC], + [pmix_check_compiler_vendor_result="cray"])]) + + # Diab C/C++ + AS_IF([test "$pmix_check_compiler_vendor_result" = "unknown"], + [PMIX_IFDEF_IFELSE([__DCC__], + [pmix_check_compiler_vendor_result="diab"])]) + + # Digital Mars + AS_IF([test "$pmix_check_compiler_vendor_result" = "unknown"], + [PMIX_IF_IFELSE([defined(__DMC__) || defined(__SC__) || defined(__ZTC__)], + [pmix_check_compiler_vendor_result="digital mars"])]) + + # HP ANSI C / aC++ + AS_IF([test "$pmix_check_compiler_vendor_result" = "unknown"], + [PMIX_IF_IFELSE([defined(__HP_cc) || defined(__HP_aCC)], + [pmix_check_compiler_vendor_result="hp"])]) + + # IBM XL C/C++ + AS_IF([test "$pmix_check_compiler_vendor_result" = "unknown"], + [PMIX_IF_IFELSE([defined(__xlC__) || defined(__IBMC__) || defined(__IBMCPP__)], + [pmix_check_compiler_vendor_result="ibm"], + [PMIX_IF_IFELSE([defined(_AIX) && !defined(__GNUC__)], + [pmix_check_compiler_vendor_result="ibm"])])]) + + # KAI C++ (rest in peace) + AS_IF([test "$pmix_check_compiler_vendor_result" = "unknown"], + [PMIX_IFDEF_IFELSE([__KCC], + [pmix_check_compiler_vendor_result="kai"])]) + + # LCC + AS_IF([test "$pmix_check_compiler_vendor_result" = "unknown"], + [PMIX_IFDEF_IFELSE([__LCC__], + [pmix_check_compiler_vendor_result="lcc"])]) + + # MetaWare High C/C++ + AS_IF([test "$pmix_check_compiler_vendor_result" = "unknown"], + [PMIX_IFDEF_IFELSE([__HIGHC__], + [pmix_check_compiler_vendor_result="metaware high"])]) + + # Metrowerks Codewarrior + AS_IF([test "$pmix_check_compiler_vendor_result" = "unknown"], + [PMIX_IFDEF_IFELSE([__MWERKS__], + [pmix_check_compiler_vendor_result="metrowerks"])]) + + # MIPSpro (SGI) + AS_IF([test "$pmix_check_compiler_vendor_result" = "unknown"], + [PMIX_IF_IFELSE([defined(sgi) || defined(__sgi)], + [pmix_check_compiler_vendor_result="sgi"])]) + + # MPW C++ + AS_IF([test "$pmix_check_compiler_vendor_result" = "unknown"], + [PMIX_IF_IFELSE([defined(__MRC__) || defined(MPW_C) || defined(MPW_CPLUS)], + [pmix_check_compiler_vendor_result="mpw"])]) + + # Norcroft C + AS_IF([test "$pmix_check_compiler_vendor_result" = "unknown"], + [PMIX_IFDEF_IFELSE([__CC_NORCROFT], + [pmix_check_compiler_vendor_result="norcroft"])]) + + # Pelles C + AS_IF([test "$pmix_check_compiler_vendor_result" = "unknown"], + [PMIX_IFDEF_IFELSE([__POCC__], + [pmix_check_compiler_vendor_result="pelles"])]) + + # Portland Group + AS_IF([test "$pmix_check_compiler_vendor_result" = "unknown"], + [PMIX_IFDEF_IFELSE([__PGI], + [pmix_check_compiler_vendor_result="portland group"])]) + + # SAS/C + AS_IF([test "$pmix_check_compiler_vendor_result" = "unknown"], + [PMIX_IF_IFELSE([defined(SASC) || defined(__SASC) || defined(__SASC__)], + [pmix_check_compiler_vendor_result="sas"])]) + + # Sun Workshop C/C++ + AS_IF([test "$pmix_check_compiler_vendor_result" = "unknown"], + [PMIX_IF_IFELSE([defined(__SUNPRO_C) || defined(__SUNPRO_CC)], + [pmix_check_compiler_vendor_result="sun"])]) + + # TenDRA C/C++ + AS_IF([test "$pmix_check_compiler_vendor_result" = "unknown"], + [PMIX_IFDEF_IFELSE([__TenDRA__], + [pmix_check_compiler_vendor_result="tendra"])]) + + # Tiny C + AS_IF([test "$pmix_check_compiler_vendor_result" = "unknown"], + [PMIX_IFDEF_IFELSE([__TINYC__], + [pmix_check_compiler_vendor_result="tiny"])]) + + # USL C + AS_IF([test "$pmix_check_compiler_vendor_result" = "unknown"], + [PMIX_IFDEF_IFELSE([__USLC__], + [pmix_check_compiler_vendor_result="usl"])]) + + # Watcom C++ + AS_IF([test "$pmix_check_compiler_vendor_result" = "unknown"], + [PMIX_IFDEF_IFELSE([__WATCOMC__], + [pmix_check_compiler_vendor_result="watcom"])]) + + $1="$pmix_check_compiler_vendor_result" + unset pmix_check_compiler_vendor_result +]) diff --git a/opal/mca/pmix/pmix112/pmix/config/pmix_check_visibility.m4 b/opal/mca/pmix/pmix112/pmix/config/pmix_check_visibility.m4 new file mode 100644 index 00000000000..1a4c45cc44f --- /dev/null +++ b/opal/mca/pmix/pmix112/pmix/config/pmix_check_visibility.m4 @@ -0,0 +1,92 @@ +# -*- shell-script -*- +# +# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana +# University Research and Technology +# Corporation. All rights reserved. +# Copyright (c) 2004-2005 The University of Tennessee and The University +# of Tennessee Research Foundation. All rights +# reserved. +# Copyright (c) 2004-2007 High Performance Computing Center Stuttgart, +# University of Stuttgart. All rights reserved. +# Copyright (c) 2004-2005 The Regents of the University of California. +# All rights reserved. +# Copyright (c) 2006-2015 Cisco Systems, Inc. All rights reserved. +# Copyright (c) 2009-2011 Oracle and/or its affiliates. All rights reserved. +# $COPYRIGHT$ +# +# Additional copyrights may follow +# +# $HEADER$ +# + +# PMIX_CHECK_VISIBILITY +# -------------------------------------------------------- +AC_DEFUN([PMIX_CHECK_VISIBILITY],[ + AC_REQUIRE([AC_PROG_GREP]) + + # Check if the compiler has support for visibility, like some + # versions of gcc, icc Sun Studio cc. + AC_ARG_ENABLE(visibility, + AC_HELP_STRING([--enable-visibility], + [enable visibility feature of certain compilers/linkers (default: enabled)])) + + WANT_VISIBILITY=0 + pmix_msg="whether to enable symbol visibility" + + if test "$enable_visibility" = "no"; then + AC_MSG_CHECKING([$pmix_msg]) + AC_MSG_RESULT([no (disabled)]) + else + CFLAGS_orig=$CFLAGS + + pmix_add= + case "$pmix_c_vendor" in + sun) + # Check using Sun Studio -xldscope=hidden flag + pmix_add=-xldscope=hidden + CFLAGS="$PMIX_CFLAGS_BEFORE_PICKY $pmix_add -errwarn=%all" + ;; + + *) + # Check using -fvisibility=hidden + pmix_add=-fvisibility=hidden + CFLAGS="$PMIX_CFLAGS_BEFORE_PICKY $pmix_add -Werror" + ;; + esac + + AC_MSG_CHECKING([if $CC supports $pmix_add]) + AC_LINK_IFELSE([AC_LANG_PROGRAM([[ + #include + __attribute__((visibility("default"))) int foo; + ]],[[fprintf(stderr, "Hello, world\n");]])], + [AS_IF([test -s conftest.err], + [$GREP -iq visibility conftest.err + # If we find "visibility" in the stderr, then + # assume it doesn't work + AS_IF([test "$?" = "0"], [pmix_add=])]) + ], [pmix_add=]) + AS_IF([test "$pmix_add" = ""], + [AC_MSG_RESULT([no])], + [AC_MSG_RESULT([yes])]) + + CFLAGS=$CFLAGS_orig + PMIX_VISIBILITY_CFLAGS=$pmix_add + + if test "$pmix_add" != "" ; then + WANT_VISIBILITY=1 + CFLAGS="$CFLAGS $PMIX_VISIBILITY_CFLAGS" + AC_MSG_CHECKING([$pmix_msg]) + AC_MSG_RESULT([yes (via $pmix_add)]) + elif test "$enable_visibility" = "yes"; then + AC_MSG_ERROR([Symbol visibility support requested but compiler does not seem to support it. Aborting]) + else + AC_MSG_CHECKING([$pmix_msg]) + AC_MSG_RESULT([no (unsupported)]) + fi + unset pmix_add + fi + + AC_DEFINE_UNQUOTED([PMIX_C_HAVE_VISIBILITY], [$WANT_VISIBILITY], + [Whether C compiler supports symbol visibility or not]) + AM_CONDITIONAL([WANT_HIDDEN],[test "$WANT_VISIBILITY" = "1"]) +]) diff --git a/opal/mca/pmix/pmix112/pmix/config/pmix_ensure_contains_optflags.m4 b/opal/mca/pmix/pmix112/pmix/config/pmix_ensure_contains_optflags.m4 new file mode 100644 index 00000000000..68bf36090ba --- /dev/null +++ b/opal/mca/pmix/pmix112/pmix/config/pmix_ensure_contains_optflags.m4 @@ -0,0 +1,67 @@ +dnl -*- shell-script -*- +dnl +dnl Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana +dnl University Research and Technology +dnl Corporation. All rights reserved. +dnl Copyright (c) 2004-2005 The University of Tennessee and The University +dnl of Tennessee Research Foundation. All rights +dnl reserved. +dnl Copyright (c) 2004-2007 High Performance Computing Center Stuttgart, +dnl University of Stuttgart. All rights reserved. +dnl Copyright (c) 2004-2005 The Regents of the University of California. +dnl All rights reserved. +dnl Copyright (c) 2007-2009 Sun Microsystems, Inc. All rights reserved. +dnl Copyright (c) 2008-2015 Cisco Systems, Inc. All rights reserved. +dnl Copyright (c) 2013 Intel, Inc. All rights reserved +dnl $COPYRIGHT$ +dnl +dnl Additional copyrights may follow +dnl +dnl $HEADER$ +dnl + +AC_DEFUN([PMIX_ENSURE_CONTAINS_OPTFLAGS],[ + +# Modularize this setup so that sub-configure.in scripts can use this +# same setup code. + +################################## +# Optimization flags +################################## + +# If the user did not specify optimization flags, add some (the value +# from $OPTFLAGS) + +co_arg="$1" +co_found=0 +for co_word in $co_arg; do + # See http://www.gnu.org/software/autoconf/manual/html_node/Quadrigraphs.html#Quadrigraphs + # for an explanation of @<:@ and @:>@ -- they m4 expand to [ and ] + case $co_word in + -g) co_found=1 ;; + -g@<:@1-3@:>@) co_found=1 ;; + +K@<:@0-5@:>@) co_found=1 ;; + -O) co_found=1 ;; + -O@<:@0-9@:>@) co_found=1 ;; + -xO) co_found=1 ;; + -xO@<:@0-9@:>@) co_found=1 ;; + -fast) co_found=1 ;; + + # The below Sun Studio flags require or + # trigger -xO optimization + -xvector*) co_found=1 ;; + -xdepend=yes) co_found=1 ;; + + esac +done + +if test "$co_found" = "0"; then + co_result="$OPTFLAGS $co_arg" +else + co_result="$co_arg" +fi + +# Clean up + +unset co_found co_word co_arg +]) diff --git a/opal/mca/pmix/pmix112/pmix/config/pmix_functions.m4 b/opal/mca/pmix/pmix112/pmix/config/pmix_functions.m4 new file mode 100644 index 00000000000..5fb6d7a58cd --- /dev/null +++ b/opal/mca/pmix/pmix112/pmix/config/pmix_functions.m4 @@ -0,0 +1,533 @@ +dnl -*- shell-script -*- +dnl +dnl Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana +dnl University Research and Technology +dnl Corporation. All rights reserved. +dnl Copyright (c) 2004-2005 The University of Tennessee and The University +dnl of Tennessee Research Foundation. All rights +dnl reserved. +dnl Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +dnl University of Stuttgart. All rights reserved. +dnl Copyright (c) 2004-2005 The Regents of the University of California. +dnl All rights reserved. +dnl Copyright (c) 2007 Sun Microsystems, Inc. All rights reserved. +dnl Copyright (c) 2009 Oak Ridge National Labs. All rights reserved. +dnl Copyright (c) 2009-2015 Cisco Systems, Inc. All rights reserved. +dnl Copyright (c) 2013 Intel, Inc. All rights reserved +dnl +dnl $COPYRIGHT$ +dnl +dnl Additional copyrights may follow +dnl +dnl $HEADER$ +dnl +dnl Portions of this file derived from GASNet v1.12 (see "GASNet" +dnl comments, below) +dnl Copyright 2004, Dan Bonachea +dnl +dnl IN NO EVENT SHALL THE UNIVERSITY OF CALIFORNIA BE LIABLE TO ANY PARTY FOR +dnl DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES ARISING OUT +dnl OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF THE UNIVERSITY OF +dnl CALIFORNIA HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +dnl +dnl THE UNIVERSITY OF CALIFORNIA SPECIFICALLY DISCLAIMS ANY WARRANTIES, +dnl INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY +dnl AND FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS +dnl ON AN "AS IS" BASIS, AND THE UNIVERSITY OF CALIFORNIA HAS NO OBLIGATION TO +dnl PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. +dnl + +AC_DEFUN([PMIX_CONFIGURE_SETUP],[ + +# Some helper script functions. Unfortunately, we cannot use $1 kinds +# of arugments here because of the m4 substitution. So we have to set +# special variable names before invoking the function. :-\ + +pmix_show_title() { + cat <@:*) + echo installing to directory \"$prefix\" + ;; + *) + AC_MSG_ERROR(prefix "$prefix" must be an absolute directory path) + ;; +esac + +# BEGIN: Derived from GASNet + +# Suggestion from Paul Hargrove to disable --program-prefix and +# friends. Heavily influenced by GASNet 1.12 acinclude.m4 +# functionality to do the same thing (copyright listed at top of this +# file). + +# echo program_prefix=$program_prefix program_suffix=$program_suffix program_transform_name=$program_transform_name +# undo prefix autoconf automatically adds during cross-compilation +if test "$cross_compiling" = yes && test "$program_prefix" = "${target_alias}-" ; then + program_prefix=NONE +fi +# normalize empty prefix/suffix +if test -z "$program_prefix" ; then + program_prefix=NONE +fi +if test -z "$program_suffix" ; then + program_suffix=NONE +fi +# undo transforms caused by empty prefix/suffix +if expr "$program_transform_name" : 's.^..$' >/dev/null || \ + expr "$program_transform_name" : 's.$$..$' >/dev/null || \ + expr "$program_transform_name" : 's.$$..;s.^..$' >/dev/null ; then + program_transform_name="s,x,x," +fi +if test "$program_prefix$program_suffix$program_transform_name" != "NONENONEs,x,x," ; then + AC_MSG_WARN([*** The PMIx configure script does not support --program-prefix, --program-suffix or --program-transform-name. Users are recommended to instead use --prefix with a unique directory and make symbolic links as desired for renaming.]) + AC_MSG_ERROR([*** Cannot continue]) +fi + +# END: Derived from GASNet +])dnl + +dnl ####################################################################### +dnl ####################################################################### +dnl ####################################################################### + +AC_DEFUN([PMIX_LOG_MSG],[ +# 1 is the message +# 2 is whether to put a prefix or not +if test -n "$2"; then + echo "configure:__oline__: $1" >&5 +else + echo $1 >&5 +fi])dnl + +dnl ####################################################################### +dnl ####################################################################### +dnl ####################################################################### + +AC_DEFUN([PMIX_LOG_FILE],[ +# 1 is the filename +if test -n "$1" && test -f "$1"; then + cat $1 >&5 +fi])dnl + +dnl ####################################################################### +dnl ####################################################################### +dnl ####################################################################### + +AC_DEFUN([PMIX_LOG_COMMAND],[ +# 1 is the command +# 2 is actions to do if success +# 3 is actions to do if fail +echo "configure:__oline__: $1" >&5 +$1 1>&5 2>&1 +pmix_status=$? +PMIX_LOG_MSG([\$? = $pmix_status], 1) +if test "$pmix_status" = "0"; then + unset pmix_status + $2 +else + unset pmix_status + $3 +fi])dnl + +dnl ####################################################################### +dnl ####################################################################### +dnl ####################################################################### + +AC_DEFUN([PMIX_UNIQ],[ +# 1 is the variable name to be uniq-ized +pmix_name=$1 + +# Go through each item in the variable and only keep the unique ones + +pmix_count=0 +for val in ${$1}; do + pmix_done=0 + pmix_i=1 + pmix_found=0 + + # Loop over every token we've seen so far + + pmix_done="`expr $pmix_i \> $pmix_count`" + while test "$pmix_found" = "0" && test "$pmix_done" = "0"; do + + # Have we seen this token already? Prefix the comparison with + # "x" so that "-Lfoo" values won't be cause an error. + + pmix_eval="expr x$val = x\$pmix_array_$pmix_i" + pmix_found=`eval $pmix_eval` + + # Check the ending condition + + pmix_done="`expr $pmix_i \>= $pmix_count`" + + # Increment the counter + + pmix_i="`expr $pmix_i + 1`" + done + + # Check for special cases where we do want to allow repeated + # arguments (per + # http://www.open-mpi.org/community/lists/devel/2012/08/11362.php). + + case $val in + -Xclang) + pmix_found=0 + pmix_i=`expr $pmix_count + 1` + ;; + esac + + # If we didn't find the token, add it to the "array" + + if test "$pmix_found" = "0"; then + pmix_eval="pmix_array_$pmix_i=$val" + eval $pmix_eval + pmix_count="`expr $pmix_count + 1`" + else + pmix_i="`expr $pmix_i - 1`" + fi +done + +# Take all the items in the "array" and assemble them back into a +# single variable + +pmix_i=1 +pmix_done="`expr $pmix_i \> $pmix_count`" +pmix_newval= +while test "$pmix_done" = "0"; do + pmix_eval="pmix_newval=\"$pmix_newval \$pmix_array_$pmix_i\"" + eval $pmix_eval + + pmix_eval="unset pmix_array_$pmix_i" + eval $pmix_eval + + pmix_done="`expr $pmix_i \>= $pmix_count`" + pmix_i="`expr $pmix_i + 1`" +done + +# Done; do the assignment + +pmix_newval="`echo $pmix_newval`" +pmix_eval="$pmix_name=\"$pmix_newval\"" +eval $pmix_eval + +# Clean up + +unset pmix_name pmix_i pmix_done pmix_newval pmix_eval pmix_count])dnl + +dnl ####################################################################### +dnl ####################################################################### +dnl ####################################################################### + +# PMIX_APPEND_UNIQ(variable, new_argument) +# ---------------------------------------- +# Append new_argument to variable if not already in variable. This assumes a +# space seperated list. +# +# This could probably be made more efficient :(. +AC_DEFUN([PMIX_APPEND_UNIQ], [ +for arg in $2; do + pmix_found=0; + for val in ${$1}; do + if test "x$val" = "x$arg" ; then + pmix_found=1 + break + fi + done + if test "$pmix_found" = "0" ; then + if test -z "$$1"; then + $1="$arg" + else + $1="$$1 $arg" + fi + fi +done +unset pmix_found +]) + +dnl ####################################################################### +dnl ####################################################################### +dnl ####################################################################### + +# Macro that serves as an alternative to using `which `. It is +# preferable to simply using `which ` because backticks (`) (aka +# backquotes) invoke a sub-shell which may source a "noisy" +# ~/.whatever file (and we do not want the error messages to be part +# of the assignment in foo=`which `). This macro ensures that we +# get a sane executable value. +AC_DEFUN([PMIX_WHICH],[ +# 1 is the variable name to do "which" on +# 2 is the variable name to assign the return value to + +PMIX_VAR_SCOPE_PUSH([pmix_prog pmix_file pmix_dir pmix_sentinel]) + +pmix_prog=$1 + +IFS_SAVE=$IFS +IFS="$PATH_SEPARATOR" +for pmix_dir in $PATH; do + if test -x "$pmix_dir/$pmix_prog"; then + $2="$pmix_dir/$pmix_prog" + break + fi +done +IFS=$IFS_SAVE + +PMIX_VAR_SCOPE_POP +])dnl + +dnl ####################################################################### +dnl ####################################################################### +dnl ####################################################################### + +# Declare some variables; use PMIX_VAR_SCOPE_END to ensure that they +# are cleaned up / undefined. +AC_DEFUN([PMIX_VAR_SCOPE_PUSH],[ + + # Is the private index set? If not, set it. + if test "x$pmix_scope_index" = "x"; then + pmix_scope_index=1 + fi + + # First, check to see if any of these variables are already set. + # This is a simple sanity check to ensure we're not already + # overwriting pre-existing variables (that have a non-empty + # value). It's not a perfect check, but at least it's something. + for pmix_var in $1; do + pmix_str="pmix_str=\"\$$pmix_var\"" + eval $pmix_str + + if test "x$pmix_str" != "x"; then + AC_MSG_WARN([Found configure shell variable clash!]) + AC_MSG_WARN([[PMIX_VAR_SCOPE_PUSH] called on "$pmix_var",]) + AC_MSG_WARN([but it is already defined with value "$pmix_str"]) + AC_MSG_WARN([This usually indicates an error in configure.]) + AC_MSG_ERROR([Cannot continue]) + fi + done + + # Ok, we passed the simple sanity check. Save all these names so + # that we can unset them at the end of the scope. + pmix_str="pmix_scope_$pmix_scope_index=\"$1\"" + eval $pmix_str + unset pmix_str + + env | grep pmix_scope + pmix_scope_index=`expr $pmix_scope_index + 1` +])dnl + +# Unset a bunch of variables that were previously set +AC_DEFUN([PMIX_VAR_SCOPE_POP],[ + # Unwind the index + pmix_scope_index=`expr $pmix_scope_index - 1` + pmix_scope_test=`expr $pmix_scope_index \> 0` + if test "$pmix_scope_test" = "0"; then + AC_MSG_WARN([[PMIX_VAR_SCOPE_POP] popped too many PMIX configure scopes.]) + AC_MSG_WARN([This usually indicates an error in configure.]) + AC_MSG_ERROR([Cannot continue]) + fi + + # Get the variable names from that index + pmix_str="pmix_str=\"\$pmix_scope_$pmix_scope_index\"" + eval $pmix_str + + # Iterate over all the variables and unset them all + for pmix_var in $pmix_str; do + unset $pmix_var + done +])dnl + + +dnl ####################################################################### +dnl ####################################################################### +dnl ####################################################################### + +# +# PMIX_WITH_OPTION_MIN_MAX_VALUE(NAME,DEFAULT_VALUE,LOWER_BOUND,UPPER_BOUND) +# Defines a variable PMIX_MAX_xxx, with "xxx" being specified as parameter $1 as "variable_name". +# If not set at configure-time using --with-max-xxx, the default-value ($2) is assumed. +# If set, value is checked against lower (value >= $3) and upper bound (value <= $4) +# +AC_DEFUN([PMIX_WITH_OPTION_MIN_MAX_VALUE], [ + max_value=[$2] + AC_MSG_CHECKING([maximum length of ]m4_translit($1, [_], [ ])) + AC_ARG_WITH([max-]m4_translit($1, [_], [-]), + AC_HELP_STRING([--with-max-]m4_translit($1, [_], [-])[=VALUE], + [maximum length of ]m4_translit($1, [_], [ ])[s. VALUE argument has to be specified (default: [$2]).])) + if test ! -z "$with_max_[$1]" && test "$with_max_[$1]" != "no" ; then + # Ensure it's a number (hopefully an integer!), and >0 + expr $with_max_[$1] + 1 > /dev/null 2> /dev/null + AS_IF([test "$?" != "0"], [happy=0], + [AS_IF([test $with_max_[$1] -ge $3 && test $with_max_[$1] -le $4], + [happy=1], [happy=0])]) + + # If badness in the above tests, bail + AS_IF([test "$happy" = "0"], + [AC_MSG_RESULT([bad value ($with_max_[$1])]) + AC_MSG_WARN([--with-max-]m4_translit($1, [_], [-])[s value must be >= $3 and <= $4]) + AC_MSG_ERROR([Cannot continue])]) + max_value=$with_max_[$1] + fi + AC_MSG_RESULT([$max_value]) + AC_DEFINE_UNQUOTED([PMIX_MAX_]m4_toupper($1), $max_value, + [Maximum length of ]m4_translit($1, [_], [ ])[s (default is $2)]) + [PMIX_MAX_]m4_toupper($1)=$max_value + AC_SUBST([PMIX_MAX_]m4_toupper($1)) +])dnl + +dnl ####################################################################### +dnl ####################################################################### +dnl ####################################################################### + +# Usage: PMIX_COMPUTE_MAX_VALUE(number_bytes, variable_to_set, action if overflow) +# Compute maximum value of datatype of +# number_bytes, setting the result in the second argument. Assumes a +# signed datatype. +AC_DEFUN([PMIX_COMPUTE_MAX_VALUE], [ + # This is more complicated than it really should be. But some + # expr implementations (OpenBSD) have an expr with a max value of + # 2^31 - 1, and we sometimes want to compute the max value of a + # type as big or bigger than that... + pmix_num_bits=`expr $1 \* 8 - 1` + newval=1 + value=1 + overflow=0 + + while test $pmix_num_bits -ne 0 ; do + newval=`expr $value \* 2` + if test 0 -eq `expr $newval \< 0` ; then + # if the new value is not negative, next iteration... + value=$newval + pmix_num_bits=`expr $pmix_num_bits - 1` + # if this was the last iteration, subtract 1 (as signed + # max positive is 2^num_bits - 1). Do this here instead + # of outside of the while loop because we might have + # already subtracted 1 by then if we're trying to find the + # max value of the same datatype expr uses as it's + # internal representation (ie, if we hit the else + # below...) + if test 0 -eq $pmix_num_bits ; then + value=`expr $value - 1` + fi + else + # if the new value is negative, we've over flowed. First, + # try adding value - 1 instead of value (see if we can get + # to positive max of expr) + newval=`expr $value - 1 + $value` + if test 0 -eq `expr $newval \< 0` ; then + value=$newval + # Still positive, this is as high as we can go. If + # pmix_num_bits is 1, we didn't actually overflow. + # Otherwise, we overflowed. + if test 1 -ne $pmix_num_bits ; then + overflow=1 + fi + else + # stil negative. Time to give up. + overflow=1 + fi + pmix_num_bits=0 + fi + done + + AS_VAR_SET([$2], [$value]) + AS_IF([test $overflow -ne 0], [$3]) +])dnl diff --git a/opal/mca/pmix/pmix112/pmix/config/pmix_get_version.sh b/opal/mca/pmix/pmix112/pmix/config/pmix_get_version.sh new file mode 100755 index 00000000000..6106af60c38 --- /dev/null +++ b/opal/mca/pmix/pmix112/pmix/config/pmix_get_version.sh @@ -0,0 +1,161 @@ +#!/bin/sh +# +# Copyright (c) 2004-2006 The Trustees of Indiana University and Indiana +# University Research and Technology +# Corporation. All rights reserved. +# Copyright (c) 2004-2005 The University of Tennessee and The University +# of Tennessee Research Foundation. All rights +# reserved. +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# University of Stuttgart. All rights reserved. +# Copyright (c) 2004-2005 The Regents of the University of California. +# All rights reserved. +# Copyright (c) 2008-2015 Cisco Systems, Inc. All rights reserved. +# Copyright (c) 2015 Intel, Inc. All rights reserved +# $COPYRIGHT$ +# +# Additional copyrights may follow +# +# $HEADER$ +# + + + +# PMIX_GET_VERSION(version_file, variable_prefix) +# ----------------------------------------------- +# parse version_file for version information, setting +# the following shell variables: +# +# prefix_VERSION +# prefix_BASE_VERSION +# prefix_MAJOR_VERSION +# prefix_MINOR_VERSION +# prefix_RELEASE_VERSION +# prefix_GREEK_VERSION +# prefix_REPO_REV +# prefix_TARBALL_VERSION +# prefix_RELEASE_DATE + + + +srcfile="$1" +option="$2" + +if test -z "$srcfile"; then + option="--help" +else + + if test -f "$srcfile"; then + srcdir=`dirname $srcfile` + pmix_vers=`sed -n " + t clear + : clear + s/^major/PMIX_MAJOR_VERSION/ + s/^minor/PMIX_MINOR_VERSION/ + s/^release/PMIX_RELEASE_VERSION/ + s/^greek/PMIX_GREEK_VERSION/ + s/^repo_rev/PMIX_REPO_REV/ + s/^tarball_version/PMIX_TARBALL_VERSION/ + s/^date/PMIX_RELEASE_DATE/ + t print + b + : print + p" < "$srcfile"` + eval "$pmix_vers" + + PMIX_VERSION="$PMIX_MAJOR_VERSION.$PMIX_MINOR_VERSION.$PMIX_RELEASE_VERSION" + PMIX_VERSION="${PMIX_VERSION}${PMIX_GREEK_VERSION}" + + if test "$PMIX_TARBALL_VERSION" = ""; then + PMIX_TARBALL_VERSION=$PMIX_VERSION + fi + + # If repo_rev was not set in the VERSION file, then get it now + if test "$PMIX_REPO_REV" = ""; then + # See if we can find the "git" command. + git_happy=0 + git --version > /dev/null 2>&1 + if test $? -eq 0; then + git_happy=1 + fi + + # If we're in a git repo and we found the git command, use + # git describe to get the repo rev + if test -d "$srcdir/.git" && test $git_happy -eq 1; then + if test "$srcdir" != "`pwd`"; then + git_save_dir=`pwd` + cd $srcdir + PMIX_REPO_REV=`git describe --tags --always` + cd $git_save_dir + unset git_save_dir + else + PMIX_REPO_REV=`git describe --tags --always` + fi + else + PMIX_REPO_REV="date`date '+%Y-%m-%d'`" + fi + fi + + + fi + + + if test "$option" = ""; then + option="--full" + fi +fi + +case "$option" in + --full|-v|--version) + echo $PMIX_VERSION + ;; + --major) + echo $PMIX_MAJOR_VERSION + ;; + --minor) + echo $PMIX_MINOR_VERSION + ;; + --release) + echo $PMIX_RELEASE_VERSION + ;; + --greek) + echo $PMIX_GREEK_VERSION + ;; + --repo-rev) + echo $PMIX_REPO_REV + ;; + --tarball) + echo $PMIX_TARBALL_VERSION + ;; + --release-date) + echo $PMIX_RELEASE_DATE + ;; + --all) + echo ${PMIX_VERSION} : ${PMIX_MAJOR_VERSION} : ${PMIX_MINOR_VERSION} : ${PMIX_RELEASE_VERSION} : ${PMIX_GREEK_VERSION} : ${PMIX_REPO_REV} : ${PMIX_TARBALL_VERSION} + ;; + -h|--help) + cat <

  • OPAL_TIMER_CYCLE_NATIVE
    Whether * opal_timer_base_get_cycle() is implemented directly or computed * from some other data (such as a high res timer)
  • - *
  • OPAL_TIMER_CYCLE_SUPPORTED
    Whether + *
  • OPAL_TIMER_CYCLE_SUPPORTED
    Whether * opal_timer_base_get_cycle() is supported on the current * platform.
  • *
  • OPAL_TIMER_USEC_SUPPORTED
    Whether diff --git a/opal/memoryhooks/Makefile.am b/opal/memoryhooks/Makefile.am index ee523eedaf5..8b11687375e 100644 --- a/opal/memoryhooks/Makefile.am +++ b/opal/memoryhooks/Makefile.am @@ -5,15 +5,15 @@ # Copyright (c) 2004-2005 The University of Tennessee and The University # of Tennessee Research Foundation. All rights # reserved. -# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, # University of Stuttgart. All rights reserved. # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. # Copyright (c) 2014 Cisco Systems, Inc. All rights reserved. # $COPYRIGHT$ -# +# # Additional copyrights may follow -# +# # $HEADER$ # diff --git a/opal/memoryhooks/memory.c b/opal/memoryhooks/memory.c index 81ac6d5a735..00e7404f9f9 100644 --- a/opal/memoryhooks/memory.c +++ b/opal/memoryhooks/memory.c @@ -5,14 +5,14 @@ * Copyright (c) 2004-2006 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -32,7 +32,7 @@ #include "opal/class/opal_object.h" #include "opal/sys/atomic.h" -/* +/* * local types */ struct callback_list_item_t { @@ -106,7 +106,7 @@ opal_mem_hooks_set_support(int support) void opal_mem_hooks_release_hook(void *buf, size_t length, bool from_alloc) { - opal_list_item_t *item; + callback_list_item_t *cbitem, *next; if (!release_run_callbacks) return; @@ -121,12 +121,7 @@ opal_mem_hooks_release_hook(void *buf, size_t length, bool from_alloc) */ opal_atomic_lock(&release_lock); - item = opal_list_get_first(&release_cb_list); - while(item != opal_list_get_end(&release_cb_list)) { - opal_list_item_t* next = opal_list_get_next(item); - callback_list_item_t *cbitem = (callback_list_item_t*) item; - item = next; - + OPAL_LIST_FOREACH_SAFE(cbitem, next, &release_cb_list, callback_list_item_t) { opal_atomic_unlock(&release_lock); cbitem->cbfunc(buf, length, cbitem->cbdata, (bool) from_alloc); opal_atomic_lock(&release_lock); diff --git a/opal/memoryhooks/memory.h b/opal/memoryhooks/memory.h index c3120791ead..a62ed58bea5 100644 --- a/opal/memoryhooks/memory.h +++ b/opal/memoryhooks/memory.h @@ -5,14 +5,14 @@ * Copyright (c) 2004-2006 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -49,7 +49,7 @@ BEGIN_C_DECLS * * Initialize the memory hooks subsystem. This is generally called * during opal_init() and should be called before any other function - * in the interface is called. + * in the interface is called. * * \note Note that some back-end functionality is activated pre-main, * so not calling this function does not prevent the memory hooks from @@ -87,14 +87,14 @@ OPAL_DECLSPEC int opal_mem_hooks_finalize(void); * if support is provided. * * @retval OPAL_MEMORY_FREE_SUPPORT Memory hooks subsytem can trigger - * callback events when memory is going + * callback events when memory is going * to be released by the process, either * by the user calling an allocator * function or munmap. Implies * OPAL_MEMORY_MUNMAP_SUPPORT. * @retval OPAL_MEMORY_MUNMAP_SUPPORT Subsystem can trigger callback events * by the user calling munmap directly. - * @retval OPAL_MEMORY_CHUNK_SUPPORT Memory hooks subsystem will only + * @retval OPAL_MEMORY_CHUNK_SUPPORT Memory hooks subsystem will only * trigger callback events when the * process is giving memory back to the * operating system, not at ever call @@ -113,7 +113,7 @@ OPAL_DECLSPEC int opal_mem_hooks_support_level(void); * the note in opal_mem_hooks_register_alloc() or * opal_mem_hooks_register_release(). * - * @param buf Pointer to the start of the allocation + * @param buf Pointer to the start of the allocation * @param lentgh Length of the allocation * @param cbdata Data passed to memory hooks when callback * was registered @@ -121,7 +121,7 @@ OPAL_DECLSPEC int opal_mem_hooks_support_level(void); * general allocation routines (malloc, calloc, free, * etc.) or directly from the user (mmap, munmap, etc.) */ -typedef void (opal_mem_hooks_callback_fn_t)(void *buf, size_t length, +typedef void (opal_mem_hooks_callback_fn_t)(void *buf, size_t length, void *cbdata, bool from_alloc); @@ -138,10 +138,10 @@ typedef void (opal_mem_hooks_callback_fn_t)(void *buf, size_t length, * @retval OPAL_SUCCESS The registration completed successfully. * @retval OPAL_EXISTS The function is already registered and will not * be registered again. - * @retval OPAL_ERR_NOT_SUPPORTED There are no hooks available for + * @retval OPAL_ERR_NOT_SUPPORTED There are no hooks available for * receiving callbacks when memory is to be released */ -OPAL_DECLSPEC int opal_mem_hooks_register_release(opal_mem_hooks_callback_fn_t *func, +OPAL_DECLSPEC int opal_mem_hooks_register_release(opal_mem_hooks_callback_fn_t *func, void *cbdata); /** diff --git a/opal/memoryhooks/memory_internal.h b/opal/memoryhooks/memory_internal.h index bd0d6c7895b..b812275cea7 100644 --- a/opal/memoryhooks/memory_internal.h +++ b/opal/memoryhooks/memory_internal.h @@ -5,15 +5,15 @@ * Copyright (c) 2004-2006 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2007 Sun Microsystems, Inc. All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ diff --git a/opal/runtime/Makefile.am b/opal/runtime/Makefile.am index 06ea5a577bc..5868063c190 100644 --- a/opal/runtime/Makefile.am +++ b/opal/runtime/Makefile.am @@ -6,7 +6,7 @@ # Copyright (c) 2004-2005 The University of Tennessee and The University # of Tennessee Research Foundation. All rights # reserved. -# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, # University of Stuttgart. All rights reserved. # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. @@ -14,10 +14,12 @@ # All rights reserved. # Copyright (c) 2014 Intel, Inc. All rights reserved # Copyright (c) 2014 Cisco Systems, Inc. All rights reserved. +# Copyright (c) 2015 Los Alamos National Security, LLC. +# All rights reserved. # $COPYRIGHT$ -# +# # Additional copyrights may follow -# +# # $HEADER$ # @@ -33,7 +35,6 @@ dist_opaldata_DATA += runtime/help-opal-runtime.txt \ headers += \ runtime/opal_progress.h \ runtime/opal.h \ - runtime/opal_cr.h \ runtime/opal_info_support.h \ runtime/opal_params.h \ runtime/opal_progress_threads.h @@ -43,6 +44,5 @@ lib@OPAL_LIB_PREFIX@open_pal_la_SOURCES += \ runtime/opal_finalize.c \ runtime/opal_init.c \ runtime/opal_params.c \ - runtime/opal_cr.c \ runtime/opal_info_support.c \ runtime/opal_progress_threads.c diff --git a/opal/runtime/help-opal-runtime.txt b/opal/runtime/help-opal-runtime.txt index 481f9597a0a..95fd280c169 100644 --- a/opal/runtime/help-opal-runtime.txt +++ b/opal/runtime/help-opal-runtime.txt @@ -6,16 +6,16 @@ # Copyright (c) 2004-2005 The University of Tennessee and The University # of Tennessee Research Foundation. All rights # reserved. -# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, # University of Stuttgart. All rights reserved. # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. # Copyright (c) 2011 Oak Ridge National Labs. All rights reserved. # Copyright (c) 2014 Cisco Systems, Inc. All rights reserved. # $COPYRIGHT$ -# +# # Additional copyrights may follow -# +# # $HEADER$ # # This is the US/English general help file for Open MPI. @@ -51,7 +51,7 @@ A process has executed an operation involving a call to the operating in a condition that could result in memory corruption or other system errors; your job may hang, crash, or produce silent data corruption. The use of fork() (or system() or other calls that -create child processes) is strongly discouraged. +create child processes) is strongly discouraged. The process that invoked fork was: diff --git a/opal/runtime/help-opal_info.txt b/opal/runtime/help-opal_info.txt index ec69b7884c3..5bbf85c2501 100644 --- a/opal/runtime/help-opal_info.txt +++ b/opal/runtime/help-opal_info.txt @@ -6,7 +6,7 @@ # Copyright (c) 2004-2005 The University of Tennessee and The University # of Tennessee Research Foundation. All rights # reserved. -# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, # University of Stuttgart. All rights reserved. # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. @@ -14,9 +14,9 @@ # Copyright (c) 2013 Los Alamos National Security, LLC. All rights # reserved. # $COPYRIGHT$ -# +# # Additional copyrights may follow -# +# # $HEADER$ # # This is the US/English help file for Open MPI ompi_info error diff --git a/opal/runtime/opal.h b/opal/runtime/opal.h index 4309333b8a7..872203c5d64 100644 --- a/opal/runtime/opal.h +++ b/opal/runtime/opal.h @@ -5,17 +5,17 @@ * Copyright (c) 2004-2007 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2008 Sun Microsystems, Inc. All rights reserved. - * Copyright (c) 2010-2012 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2010-2016 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2014 Intel, Inc. All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -55,7 +55,7 @@ OPAL_DECLSPEC extern bool opal_warn_on_fork; OPAL_DECLSPEC int opal_init(int* pargc, char*** pargv); /** - * Finalize the OPAL layer, including the MCA system. + * Finalize the OPAL layer, including the MCA system. * * @retval OPAL_SUCCESS Upon success. * @retval OPAL_ERROR Upon failure. @@ -77,7 +77,14 @@ OPAL_DECLSPEC int opal_finalize(void); OPAL_DECLSPEC int opal_init_util(int* pargc, char*** pargv); /** - * Finalize the OPAL layer, excluding the MCA system. + * Disable PSM/PSM2 signal hijacking. + * + * See comment in the function for more detail. + */ +OPAL_DECLSPEC int opal_init_psm(void); + +/** + * Finalize the OPAL layer, excluding the MCA system. * * @retval OPAL_SUCCESS Upon success. * @retval OPAL_ERROR Upon failure. diff --git a/opal/runtime/opal_cr.c b/opal/runtime/opal_cr.c deleted file mode 100644 index 534b98d670a..00000000000 --- a/opal/runtime/opal_cr.c +++ /dev/null @@ -1,1453 +0,0 @@ -/* - * Copyright (c) 2004-2010 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2012 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * Copyright (c) 2007-2013 Los Alamos National Security, LLC. All rights - * reserved. - * Copyright (c) 2011 Oak Ridge National Labs. All rights reserved. - * Copyright (c) 2012-2013 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2015 Research Organization for Information Science - * and Technology (RIST). All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -/** @file - * - * OPAL Layer Checkpoint/Restart Runtime functions - * - */ - -#include "opal_config.h" - -#ifdef HAVE_STRING_H -#include -#endif -#include -#ifdef HAVE_UNISTD_H -#include -#endif /* HAVE_UNISTD_H */ -#ifdef HAVE_FCNTL_H -#include -#endif /* HAVE_FCNTL_H */ -#ifdef HAVE_SYS_TYPES_H -#include -#endif /* HAVE_SYS_TYPES_H */ -#ifdef HAVE_SYS_STAT_H -#include /* for mkfifo */ -#endif /* HAVE_SYS_STAT_H */ -#ifdef HAVE_SIGNAL_H -#include -#endif - -#include "opal/class/opal_object.h" -#include "opal/util/opal_environ.h" -#include "opal/util/show_help.h" -#include "opal/util/output.h" -#include "opal/util/malloc.h" -#include "opal/util/keyval_parse.h" -#include "opal/util/opal_environ.h" -#include "opal/util/argv.h" -#include "opal/memoryhooks/memory.h" - -#include "opal/mca/base/base.h" -#include "opal/runtime/opal_cr.h" -#include "opal/runtime/opal.h" -#include "opal/constants.h" - -#include "opal/mca/if/base/base.h" -#include "opal/mca/memcpy/base/base.h" -#include "opal/mca/memory/base/base.h" -#include "opal/mca/timer/base/base.h" - -#include "opal/threads/mutex.h" -#include "opal/threads/threads.h" -#include "opal/mca/crs/base/base.h" - -/****************** - * Global Var Decls - ******************/ -#if OPAL_ENABLE_CRDEBUG == 1 -static opal_thread_t **opal_cr_debug_free_threads = NULL; -static int opal_cr_debug_num_free_threads = 0; -static int opal_cr_debug_threads_already_waiting = false; - -int MPIR_debug_with_checkpoint = 0; -static volatile int MPIR_checkpoint_debug_gate = 0; - -int opal_cr_debug_signal = 0; -#endif - -bool opal_cr_stall_check = false; -bool opal_cr_currently_stalled = false; -int opal_cr_output = -1; -int opal_cr_verbose = 0; -int opal_cr_initalized = 0; - -static double opal_cr_get_time(void); -static void display_indv_timer_core(double diff, char *str); -static double timer_start[OPAL_CR_TIMER_MAX]; -bool opal_cr_timing_barrier_enabled = false; -bool opal_cr_timing_enabled = false; -int opal_cr_timing_my_rank = 0; -int opal_cr_timing_target_rank = 0; - -/****************** - * Local Functions & Var Decls - ******************/ -static int extract_env_vars(int prev_pid, char * file_name); - -static void opal_cr_sigpipe_debug_signal_handler (int signo); - -static opal_cr_user_inc_callback_fn_t cur_user_coord_callback[OPAL_CR_INC_MAX] = {NULL}; -static opal_cr_coord_callback_fn_t cur_coord_callback = NULL; -static opal_cr_notify_callback_fn_t cur_notify_callback = NULL; - -static int core_prev_pid = 0; - -/****************** - * Interface Functions & Vars - ******************/ -char * opal_cr_pipe_dir = NULL; -int opal_cr_entry_point_signal = 0; -bool opal_cr_is_enabled = true; -bool opal_cr_is_tool = false; - -/* Current checkpoint state */ -int opal_cr_checkpointing_state = OPAL_CR_STATUS_NONE; - -/* Current checkpoint request channel state */ -int opal_cr_checkpoint_request = OPAL_CR_STATUS_NONE; - -static bool opal_cr_debug_sigpipe = false; - -bool opal_cr_continue_like_restart = false; - -#if OPAL_ENABLE_FT_THREAD == 1 -/***************** - * Threading Functions and Variables - *****************/ -static void* opal_cr_thread_fn(opal_object_t *obj); -bool opal_cr_thread_is_done = false; -bool opal_cr_thread_is_active = false; -bool opal_cr_thread_in_library = false; -bool opal_cr_thread_use_if_avail = true; -int32_t opal_cr_thread_num_in_library = 0; -int opal_cr_thread_sleep_check = 0; -int opal_cr_thread_sleep_wait = 0; -opal_thread_t opal_cr_thread; -opal_mutex_t opal_cr_thread_lock; -#if 0 -#define OPAL_CR_LOCK() opal_cr_thread_in_library = true; opal_mutex_lock(&opal_cr_thread_lock); -#define OPAL_CR_UNLOCK() opal_cr_thread_in_library = false; opal_mutex_unlock(&opal_cr_thread_lock); -#define OPAL_CR_THREAD_LOCK() opal_mutex_lock(&opal_cr_thread_lock); -#define OPAL_CR_THREAD_UNLOCK() opal_mutex_unlock(&opal_cr_thread_lock); -#else -/* This technique will potentially starve the thread, but that is OK since - * it is only there as support for when the process is not in the MPI library - */ -static const uint32_t ThreadFlag = 0x1; -static const uint32_t ProcInc = 0x2; - -#define OPAL_CR_LOCK() \ - { \ - opal_cr_thread_in_library = true; \ - OPAL_THREAD_ADD32(&opal_cr_thread_num_in_library, ProcInc); \ - while( (opal_cr_thread_num_in_library & ThreadFlag ) != 0 ) { \ - sched_yield(); \ - } \ - } -#define OPAL_CR_UNLOCK() \ - { \ - OPAL_THREAD_ADD32(&opal_cr_thread_num_in_library, -ProcInc); \ - if( opal_cr_thread_num_in_library <= 0 ) { \ - opal_cr_thread_in_library = false; \ - } \ - } -#define OPAL_CR_THREAD_LOCK() \ - { \ - while(!OPAL_ATOMIC_CMPSET_32(&opal_cr_thread_num_in_library, 0, ThreadFlag)) { \ - if( !opal_cr_thread_is_active && opal_cr_thread_is_done) { \ - break; \ - } \ - sched_yield(); \ - usleep(opal_cr_thread_sleep_check); \ - } \ - } -#define OPAL_CR_THREAD_UNLOCK() \ - { \ - OPAL_THREAD_ADD32(&opal_cr_thread_num_in_library, -ThreadFlag); \ - } -#endif - -#endif /* OPAL_ENABLE_FT_THREAD == 1 */ - -int opal_cr_set_enabled(bool en) -{ - opal_cr_is_enabled = en; - return OPAL_SUCCESS; -} - -static int opal_cr_register (void) -{ - int ret; -#if OPAL_ENABLE_CRDEBUG == 1 - int t; -#endif - - /* - * Some startup MCA parameters - */ - ret = mca_base_var_register ("opal", "opal", "cr", "verbose", - "Verbose output level for the runtime OPAL Checkpoint/Restart functionality", - MCA_BASE_VAR_TYPE_INT, NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE, - OPAL_INFO_LVL_8, MCA_BASE_VAR_SCOPE_LOCAL, - &opal_cr_verbose); - if (0 > ret) { - return ret; - } - - opal_cr_is_enabled = false; - (void) mca_base_var_register("opal", "ft", "cr", "enabled", - "Enable fault tolerance for this program", - MCA_BASE_VAR_TYPE_BOOL, NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE, - OPAL_INFO_LVL_8, MCA_BASE_VAR_SCOPE_ALL_EQ, - &opal_cr_is_enabled); - - opal_cr_timing_enabled = false; - (void) mca_base_var_register ("opal", "opal", "cr", "enable_timer", - "Enable Checkpoint timer (Default: Disabled)", - MCA_BASE_VAR_TYPE_BOOL, NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE, - OPAL_INFO_LVL_8, MCA_BASE_VAR_SCOPE_ALL_EQ, - &opal_cr_timing_enabled); - - opal_cr_timing_barrier_enabled = false; - (void) mca_base_var_register ("opal", "opal", "cr", "enable_timer_barrier", - "Enable Checkpoint timer Barrier. Must have opal_cr_enable_timer set. (Default: Disabled)", - MCA_BASE_VAR_TYPE_BOOL, NULL, 0, opal_cr_timing_enabled ? MCA_BASE_VAR_FLAG_SETTABLE : 0, - OPAL_INFO_LVL_8, MCA_BASE_VAR_SCOPE_ALL_EQ, - &opal_cr_timing_barrier_enabled); - opal_cr_timing_barrier_enabled = opal_cr_timing_barrier_enabled && opal_cr_timing_enabled; - - (void) mca_base_var_register ("opal", "opal", "cr", "timer_target_rank", - "Target Rank for the timer (Default: 0)", - MCA_BASE_VAR_TYPE_INT, NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE, - OPAL_INFO_LVL_8, MCA_BASE_VAR_SCOPE_ALL_EQ, - &opal_cr_timing_target_rank); - -#if OPAL_ENABLE_FT_THREAD == 1 - opal_cr_thread_use_if_avail = false; - (void) mca_base_var_register ("opal", "opal", "cr", "use_thread", - "Use an async thread to checkpoint this program (Default: Disabled)", - MCA_BASE_VAR_TYPE_BOOL, NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE, - OPAL_INFO_LVL_8, MCA_BASE_VAR_SCOPE_ALL_EQ, - &opal_cr_thread_use_if_avail); - - opal_cr_thread_sleep_check = 0; - (void) mca_base_var_register ("opal", "opal", "cr", "thread_sleep_check", - "Time to sleep between checking for a checkpoint (Default: 0)", - MCA_BASE_VAR_TYPE_INT, NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE, - OPAL_INFO_LVL_8, MCA_BASE_VAR_SCOPE_ALL_EQ, - &opal_cr_thread_sleep_check); - - opal_cr_thread_sleep_wait = 100; - (void) mca_base_var_register ("opal", "opal", "cr", "thread_sleep_wait", - "Time to sleep waiting for process to exit MPI library (Default: 1000)", - MCA_BASE_VAR_TYPE_INT, NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE, - OPAL_INFO_LVL_8, MCA_BASE_VAR_SCOPE_ALL_EQ, - &opal_cr_thread_sleep_wait); -#endif - - opal_cr_is_tool = false; - (void) mca_base_var_register ("opal", "opal", "cr", "is_tool", - "Is this a tool program, meaning does it require a fully operational OPAL or just enough to exec.", - MCA_BASE_VAR_TYPE_BOOL, NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE, - OPAL_INFO_LVL_8, MCA_BASE_VAR_SCOPE_ALL_EQ, - &opal_cr_is_tool); - -#ifndef __WINDOWS__ - opal_cr_entry_point_signal = SIGUSR1; - (void) mca_base_var_register ("opal", "opal", "cr", "signal", - "Checkpoint/Restart signal used to initialize an OPAL Only checkpoint of a program", - MCA_BASE_VAR_TYPE_INT, NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE, - OPAL_INFO_LVL_8, MCA_BASE_VAR_SCOPE_ALL_EQ, - &opal_cr_entry_point_signal); - - opal_cr_debug_sigpipe = false; - (void) mca_base_var_register ("opal", "opal", "cr", "debug_sigpipe", - "Activate a signal handler for debugging SIGPIPE Errors that can happen on restart. (Default: Disabled)", - MCA_BASE_VAR_TYPE_BOOL, NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE, - OPAL_INFO_LVL_8, MCA_BASE_VAR_SCOPE_ALL_EQ, - &opal_cr_debug_sigpipe); -#else - opal_cr_is_tool = true; /* no support for CR on Windows yet */ -#endif /* __WINDOWS__ */ - -#if OPAL_ENABLE_CRDEBUG == 1 - MPIR_debug_with_checkpoint = 0; - (void) mca_base_var_register ("opal", "opal", "cr", "enable_crdebug", - "Enable checkpoint/restart debugging", - MCA_BASE_VAR_TYPE_INT, NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE, - OPAL_INFO_LVL_8, MCA_BASE_VAR_SCOPE_ALL_EQ, - &MPIR_debug_with_checkpoint); - - opal_cr_debug_num_free_threads = 3; - opal_cr_debug_free_threads = (opal_thread_t **)malloc(sizeof(opal_thread_t *) * opal_cr_debug_num_free_threads ); - for(t = 0; t < opal_cr_debug_num_free_threads; ++t ) { - opal_cr_debug_free_threads[t] = NULL; - } - - opal_cr_debug_signal = SIGTSTP; - (void) mca_base_var_register ("opal", "opal", "cr", "crdebug_signal", - "Checkpoint/Restart signal used to hold threads when debugging", - MCA_BASE_VAR_TYPE_INT, NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE, - OPAL_INFO_LVL_8, MCA_BASE_VAR_SCOPE_ALL_EQ, - &opal_cr_debug_signal); -#endif - - opal_cr_pipe_dir = (char *) opal_tmp_directory(); - (void) mca_base_var_register ("opal", "opal", "cr", "tmp_dir", - "Temporary directory to place rendezvous files for a checkpoint", - MCA_BASE_VAR_TYPE_STRING, NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE, - OPAL_INFO_LVL_8, MCA_BASE_VAR_SCOPE_ALL_EQ, - &opal_cr_pipe_dir); - - return OPAL_SUCCESS; -} - - -int opal_cr_init(void ) -{ - int ret, exit_status = OPAL_SUCCESS; - opal_cr_coord_callback_fn_t prev_coord_func; - - if( ++opal_cr_initalized != 1 ) { - if( opal_cr_initalized < 1 ) { - exit_status = OPAL_ERROR; - goto cleanup; - } - exit_status = OPAL_SUCCESS; - goto cleanup; - } - - ret = opal_cr_register (); - if (OPAL_SUCCESS != ret) { - return ret; - } - - if(0 != opal_cr_verbose) { - opal_cr_output = opal_output_open(NULL); - opal_output_set_verbosity(opal_cr_output, opal_cr_verbose); - } - - opal_output_verbose(10, opal_cr_output, - "opal_cr: init: Verbose Level: %d", - opal_cr_verbose); - - - opal_output_verbose(10, opal_cr_output, - "opal_cr: init: FT Enabled: %s", - opal_cr_is_enabled ? "true" : "false"); - - - opal_output_verbose(10, opal_cr_output, - "opal_cr: init: Is a tool program: %s", - opal_cr_is_tool ? "true" : "false"); - - opal_output_verbose(10, opal_cr_output, - "opal_cr: init: Debug SIGPIPE: %d (%s)", - opal_cr_verbose, (opal_cr_debug_sigpipe ? "True" : "False")); - - opal_output_verbose(10, opal_cr_output, - "opal_cr: init: Checkpoint Signal: %d", - opal_cr_entry_point_signal); - -#if OPAL_ENABLE_FT_THREAD == 1 - opal_output_verbose(10, opal_cr_output, - "opal_cr: init: FT Use thread: %s", - opal_cr_thread_use_if_avail ? "true" : "false"); - - opal_output_verbose(10, opal_cr_output, - "opal_cr: init: FT thread sleep: check = %d, wait = %d", - opal_cr_thread_sleep_check, opal_cr_thread_sleep_wait); - - /* If we have a thread, then attach the SIGPIPE signal handler there since - * it is most likely to be the one that needs it. - */ - if( opal_cr_debug_sigpipe && !opal_cr_thread_use_if_avail ) { - if( SIG_ERR == signal(SIGPIPE, opal_cr_sigpipe_debug_signal_handler) ) { - ; - } - } -#else - if( opal_cr_debug_sigpipe ) { - if( SIG_ERR == signal(SIGPIPE, opal_cr_sigpipe_debug_signal_handler) ) { - ; - } - } -#endif - -#if OPAL_ENABLE_CRDEBUG == 1 - opal_output_verbose(10, opal_cr_output, - "opal_cr: init: C/R Debugging Enabled [%s]\n", - (MPIR_debug_with_checkpoint ? "True": "False")); - - opal_output_verbose(10, opal_cr_output, - "opal_cr: init: Checkpoint Signal (Debug): %d", - opal_cr_debug_signal); - - if( SIG_ERR == signal(opal_cr_debug_signal, MPIR_checkpoint_debugger_signal_handler) ) { - opal_output(opal_cr_output, - "opal_cr: init: Failed to register C/R debug signal (%d)", - opal_cr_debug_signal); - } -#endif - - opal_output_verbose(10, opal_cr_output, - "opal_cr: init: Temp Directory: %s", - opal_cr_pipe_dir); - - if( !opal_cr_is_tool ) { - /* Register the OPAL interlevel coordination callback */ - opal_cr_reg_coord_callback(opal_cr_coord, &prev_coord_func); - - opal_cr_stall_check = false; - opal_cr_currently_stalled = false; - - } /* End opal_cr_is_tool = true */ - - /* - * If fault tolerance was not compiled in then - * we need to make sure that the listener thread is active to tell - * the tools that this is not a checkpointable job. - * We don't need the CRS framework to be initalized. - */ -#if OPAL_ENABLE_FT_CR == 1 - /* - * Open the checkpoint / restart service components - */ - if (OPAL_SUCCESS != (ret = mca_base_framework_open(&opal_crs_base_framework, 0))) { - opal_show_help( "help-opal-runtime.txt", - "opal_cr_init:no-crs", true, - "opal_crs_base_open", ret ); - exit_status = ret; - goto cleanup; - } - - if (OPAL_SUCCESS != (ret = opal_crs_base_select())) { - opal_show_help( "help-opal-runtime.txt", - "opal_cr_init:no-crs", true, - "opal_crs_base_select", ret ); - exit_status = ret; - goto cleanup; - } -#endif - -#if OPAL_ENABLE_FT_THREAD == 1 - if( !opal_cr_is_tool && opal_cr_thread_use_if_avail) { - opal_output_verbose(10, opal_cr_output, - "opal_cr: init: starting the thread\n"); - - /* JJH: We really do need this line below since it enables - * actual locks for threads. However currently the - * upper layers will deadlock if it is enabled. - * So hack around the problem for now, while working - * on a complete solution. See ticket #2741 for more - * details. - * opal_set_using_threads(true); - */ - - /* - * Start the thread - */ - OBJ_CONSTRUCT(&opal_cr_thread, opal_thread_t); - OBJ_CONSTRUCT(&opal_cr_thread_lock, opal_mutex_t); - - opal_cr_thread_is_done = false; - opal_cr_thread_is_active = false; - opal_cr_thread_in_library = false; - opal_cr_thread_num_in_library = 0; - - opal_cr_thread.t_run = opal_cr_thread_fn; - opal_cr_thread.t_arg = NULL; - opal_thread_start(&opal_cr_thread); - - } /* End opal_cr_is_tool = true */ - else { - opal_output_verbose(10, opal_cr_output, - "opal_cr: init: *Not* Using C/R thread\n"); - } -#endif /* OPAL_ENABLE_FT_THREAD == 1 */ - - cleanup: - return exit_status; -} - -int opal_cr_finalize(void) -{ - int exit_status = OPAL_SUCCESS; - - if( --opal_cr_initalized != 0 ) { - if( opal_cr_initalized < 0 ) { - return OPAL_ERROR; - } - return OPAL_SUCCESS; - } - - if( !opal_cr_is_tool ) { -#if OPAL_ENABLE_FT_THREAD == 1 - if( opal_cr_thread_use_if_avail ) { - void *data; - /* - * Stop the thread - */ - opal_cr_thread_is_done = true; - opal_cr_thread_is_active = false; - opal_cr_thread_in_library = true; - - opal_thread_join(&opal_cr_thread, &data); - OBJ_DESTRUCT(&opal_cr_thread); - OBJ_DESTRUCT(&opal_cr_thread_lock); - } -#endif /* OPAL_ENABLE_FT_THREAD == 1 */ - - /* Nothing to do for just process notifications */ - opal_cr_checkpointing_state = OPAL_CR_STATUS_TERM; - opal_cr_checkpoint_request = OPAL_CR_STATUS_TERM; - } - -#if OPAL_ENABLE_CRDEBUG == 1 - if( NULL != opal_cr_debug_free_threads ) { - free( opal_cr_debug_free_threads ); - opal_cr_debug_free_threads = NULL; - } - opal_cr_debug_num_free_threads = 0; -#endif - - if (NULL != opal_cr_pipe_dir) { - free(opal_cr_pipe_dir); - opal_cr_pipe_dir = NULL; - } - -#if OPAL_ENABLE_FT_CR == 1 - /* - * Close the checkpoint / restart service components - */ - (void) mca_base_framework_close(&opal_crs_base_framework); -#endif - - return exit_status; -} - -/* - * Check if a checkpoint request needs to be operated upon - */ -void opal_cr_test_if_checkpoint_ready(void) -{ - int ret; - - if( opal_cr_currently_stalled) { - opal_output_verbose(20, opal_cr_output, - "opal_cr:opal_test_if_ready: JUMPING to Post Stall stage"); - goto STAGE_1; - } - - /* - * If there is no checkpoint request to act on - * then just return - */ - if(OPAL_CR_STATUS_REQUESTED != opal_cr_checkpoint_request ) { - return; - } - - /* - * If we are currently checkpointing: - * - If a request is pending then cancel it - * - o.w., skip it. - */ - if(OPAL_CR_STATUS_RUNNING == opal_cr_checkpointing_state ) { - if( OPAL_SUCCESS != (ret = cur_notify_callback(OPAL_CHECKPOINT_CMD_IN_PROGRESS) ) ) { - opal_output(opal_cr_output, - "Error: opal_cr: test_if_checkpoint_ready: Respond [In Progress] Failed. (%d)", - ret); - } - opal_cr_checkpoint_request = OPAL_CR_STATUS_NONE; - return; - } - - /* - * If no CRS module is loaded return an error - */ - if (NULL == opal_crs.crs_checkpoint ) { - if( OPAL_SUCCESS != (ret = cur_notify_callback(OPAL_CHECKPOINT_CMD_NULL) ) ) { - opal_output(opal_cr_output, - "Error: opal_cr: test_if_checkpoint_ready: Respond [Not Able/NULL] Failed. (%d)", - ret); - } - opal_cr_checkpoint_request = OPAL_CR_STATUS_NONE; - return; - } - - /* - * Start the checkpoint - */ - opal_cr_checkpointing_state = OPAL_CR_STATUS_RUNNING; - opal_cr_checkpoint_request = OPAL_CR_STATUS_NONE; - - STAGE_1: - if( OPAL_SUCCESS != (ret = cur_notify_callback(OPAL_CHECKPOINT_CMD_START) ) ) { - opal_output(opal_cr_output, - "Error: opal_cr: test_if_checkpoint_ready: Respond [Start Ckpt] Failed. (%d)", - ret); - } - - return; -} - -/******************************* - * Notification Routines - *******************************/ -int opal_cr_inc_core_prep(void) -{ - int ret; - - /* - * Call User Level INC - */ - if(OPAL_SUCCESS != (ret = trigger_user_inc_callback(OPAL_CR_INC_PRE_CRS_PRE_MPI, - OPAL_CR_INC_STATE_PREPARE)) ) { - return ret; - } - - /* - * Use the registered coordination routine - */ - if(OPAL_SUCCESS != (ret = cur_coord_callback(OPAL_CRS_CHECKPOINT)) ) { - if ( OPAL_EXISTS != ret ) { - opal_output(opal_cr_output, - "opal_cr: inc_core: Error: cur_coord_callback(%d) failed! %d\n", - OPAL_CRS_CHECKPOINT, ret); - } - return ret; - } - - /* - * Call User Level INC - */ - if(OPAL_SUCCESS != (ret = trigger_user_inc_callback(OPAL_CR_INC_PRE_CRS_POST_MPI, - OPAL_CR_INC_STATE_PREPARE)) ) { - return ret; - } - - core_prev_pid = getpid(); - - return OPAL_SUCCESS; -} - -int opal_cr_inc_core_ckpt(pid_t pid, - opal_crs_base_snapshot_t *snapshot, - opal_crs_base_ckpt_options_t *options, - int *state) -{ - int ret, exit_status = OPAL_SUCCESS; - - OPAL_CR_SET_TIMER(OPAL_CR_TIMER_CORE0); - if(OPAL_SUCCESS != (ret = opal_crs.crs_checkpoint(pid, - snapshot, - options, - (opal_crs_state_type_t *)state))) { - opal_output(opal_cr_output, - "opal_cr: inc_core: Error: The checkpoint failed. %d\n", ret); - exit_status = ret; - } - - if(*state == OPAL_CRS_CONTINUE) { - OPAL_CR_SET_TIMER(OPAL_CR_TIMER_CORE1); - - if(options->term) { - *state = OPAL_CRS_TERM; - opal_cr_checkpointing_state = OPAL_CR_STATUS_TERM; - } else { - opal_cr_checkpointing_state = OPAL_CR_STATUS_CONTINUE; - } - } - else { - options->term = false; - } - - /* - * If restarting read environment stuff that opal-restart left us. - */ - if(*state == OPAL_CRS_RESTART) { - opal_cr_refresh_environ(core_prev_pid); - opal_cr_checkpointing_state = OPAL_CR_STATUS_RESTART_PRE; - } - - return exit_status; -} - -int opal_cr_inc_core_recover(int state) -{ - int ret; - opal_cr_user_inc_callback_state_t cb_state; - - if( opal_cr_checkpointing_state != OPAL_CR_STATUS_TERM && - opal_cr_checkpointing_state != OPAL_CR_STATUS_CONTINUE && - opal_cr_checkpointing_state != OPAL_CR_STATUS_RESTART_PRE && - opal_cr_checkpointing_state != OPAL_CR_STATUS_RESTART_POST ) { - - if(state == OPAL_CRS_CONTINUE) { - OPAL_CR_SET_TIMER(OPAL_CR_TIMER_CORE1); - opal_cr_checkpointing_state = OPAL_CR_STATUS_CONTINUE; - } - /* - * If restarting read environment stuff that opal-restart left us. - */ - else if(state == OPAL_CRS_RESTART) { - opal_cr_refresh_environ(core_prev_pid); - opal_cr_checkpointing_state = OPAL_CR_STATUS_RESTART_PRE; - } - } - - /* - * Call User Level INC - */ - if( OPAL_CRS_CONTINUE == state ) { - cb_state = OPAL_CR_INC_STATE_CONTINUE; - } - else if( OPAL_CRS_RESTART == state ) { - cb_state = OPAL_CR_INC_STATE_RESTART; - } - else { - cb_state = OPAL_CR_INC_STATE_ERROR; - } - - if(OPAL_SUCCESS != (ret = trigger_user_inc_callback(OPAL_CR_INC_POST_CRS_PRE_MPI, - cb_state)) ) { - return ret; - } - - /* - * Use the registered coordination routine - */ - if(OPAL_SUCCESS != (ret = cur_coord_callback(state)) ) { - if ( OPAL_EXISTS != ret ) { - opal_output(opal_cr_output, - "opal_cr: inc_core: Error: cur_coord_callback(%d) failed! %d\n", - state, ret); - } - return ret; - } - - if(OPAL_SUCCESS != (ret = trigger_user_inc_callback(OPAL_CR_INC_POST_CRS_POST_MPI, - cb_state)) ) { - return ret; - } - -#if OPAL_ENABLE_CRDEBUG == 1 - opal_cr_debug_clear_current_ckpt_thread(); -#endif - - return OPAL_SUCCESS; -} - -int opal_cr_inc_core(pid_t pid, - opal_crs_base_snapshot_t *snapshot, - opal_crs_base_ckpt_options_t *options, - int *state) -{ - int ret, exit_status = OPAL_SUCCESS; - - /* - * INC: Prepare stack using the registered coordination routine - */ - if(OPAL_SUCCESS != (ret = opal_cr_inc_core_prep() ) ) { - return ret; - } - - /* - * INC: Take the checkpoint - */ - if(OPAL_SUCCESS != (ret = opal_cr_inc_core_ckpt(pid, snapshot, options, state) ) ) { - exit_status = ret; - /* Don't return here since we want to restart the OPAL level stuff */ - } - - /* - * INC: Recover stack using the registered coordination routine - */ - if(OPAL_SUCCESS != (ret = opal_cr_inc_core_recover(*state) ) ) { - return ret; - } - - return exit_status; -} - -/******************************* - * Coordination Routines - *******************************/ -/** - * Current Coordination callback routines - */ -int opal_cr_coord(int state) -{ - if(OPAL_CRS_CHECKPOINT == state) { - /* Do Checkpoint Phase work */ - } - else if (OPAL_CRS_CONTINUE == state ) { - /* Do Continue Phase work */ - } - else if (OPAL_CRS_RESTART == state ) { - /* Do Restart Phase work */ - - /* - * Re-initialize the event engine - * Otherwise it may/will use stale file descriptors which will disrupt - * the intended users of the soon-to-be newly assigned file descriptors. - */ - opal_event_reinit(opal_event_base); - - /* - * Flush if() functionality, since it caches system specific info. - */ - (void) mca_base_framework_close(&opal_if_base_framework); - /* Since opal_ifinit() is not exposed, the necessary - * functions will call it when needed. Just make sure we - * finalized this code so we don't get old socket addrs. - */ - opal_output_reopen_all(); - } - else if (OPAL_CRS_TERM == state ) { - /* Do Continue Phase work in prep to terminate the application */ - } - else { - /* We must have been in an error state from the checkpoint - * recreate everything, as in the Continue Phase - */ - } - - /* - * Here we are returning to either: - * - [orte | ompi]_notify() - */ - opal_cr_checkpointing_state = OPAL_CR_STATUS_RESTART_POST; - - return OPAL_SUCCESS; -} - -int opal_cr_reg_notify_callback(opal_cr_notify_callback_fn_t new_func, - opal_cr_notify_callback_fn_t *prev_func) -{ - /* - * Preserve the previous callback - */ - if( NULL != cur_notify_callback) { - *prev_func = cur_notify_callback; - } - else { - *prev_func = NULL; - } - - /* - * Update the callbacks - */ - cur_notify_callback = new_func; - - return OPAL_SUCCESS; -} - -int opal_cr_user_inc_register_callback(opal_cr_user_inc_callback_event_t event, - opal_cr_user_inc_callback_fn_t function, - opal_cr_user_inc_callback_fn_t *prev_function) -{ - if (event >= OPAL_CR_INC_MAX) { - return OPAL_ERROR; - } - - if( NULL != cur_user_coord_callback[event] ) { - *prev_function = cur_user_coord_callback[event]; - } else { - *prev_function = NULL; - } - - cur_user_coord_callback[event] = function; - - return OPAL_SUCCESS; -} - -int trigger_user_inc_callback(opal_cr_user_inc_callback_event_t event, - opal_cr_user_inc_callback_state_t state) -{ - if( NULL == cur_user_coord_callback[event] ) { - return OPAL_SUCCESS; - } - - if (event >= OPAL_CR_INC_MAX) { - return OPAL_ERROR; - } - - return ((cur_user_coord_callback[event])(event, state)); -} - -int opal_cr_reg_coord_callback(opal_cr_coord_callback_fn_t new_func, - opal_cr_coord_callback_fn_t *prev_func) -{ - /* - * Preserve the previous callback - */ - if( NULL != cur_coord_callback) { - *prev_func = cur_coord_callback; - } - else { - *prev_func = NULL; - } - - /* - * Update the callbacks - */ - cur_coord_callback = new_func; - - return OPAL_SUCCESS; -} - -int opal_cr_refresh_environ(int prev_pid) { - char *file_name; -#if OPAL_ENABLE_CRDEBUG == 1 - char *tmp; -#endif - struct stat file_status; - - if( 0 >= prev_pid ) { - prev_pid = getpid(); - } - - /* - * Make sure the file exists. If it doesn't then this means 2 things: - * 1) We have already executed this function, and - * 2) The file has been deleted on the previous round. - */ - asprintf(&file_name, "%s/%s-%d", opal_tmp_directory(), OPAL_CR_BASE_ENV_NAME, prev_pid); - if (NULL == file_name) { - return OPAL_ERR_OUT_OF_RESOURCE; - } - if(0 != stat(file_name, &file_status) ){ - free(file_name); - return OPAL_SUCCESS; - } - -#if OPAL_ENABLE_CRDEBUG == 1 - mca_base_var_env_name ("opal_cr_enable_crdebug", &tmp); - opal_unsetenv(tmp, &environ); - free (tmp); -#endif - - extract_env_vars(prev_pid, file_name); - -#if OPAL_ENABLE_CRDEBUG == 1 - MPIR_debug_with_checkpoint = 0; - (void) mca_base_var_register ("opal", "opal", "cr", "enable_crdebug", - "Enable checkpoint/restart debugging", - MCA_BASE_VAR_TYPE_INT, NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE, - OPAL_INFO_LVL_8, MCA_BASE_VAR_SCOPE_ALL_EQ, - &MPIR_debug_with_checkpoint); - - opal_output_verbose(10, opal_cr_output, - "opal_cr: init: C/R Debugging Enabled [%s] (refresh)\n", - (MPIR_debug_with_checkpoint ? "True": "False")); -#endif - - free(file_name); - - return OPAL_SUCCESS; -} - -/* - * Extract environment variables from a saved file - * and place them in the environment. - */ -static int extract_env_vars(int prev_pid, char * file_name) -{ - int exit_status = OPAL_SUCCESS; - FILE *env_data = NULL; - int len = OPAL_PATH_MAX; - char * tmp_str = NULL; - - if( 0 >= prev_pid ) { - opal_output(opal_cr_output, - "opal_cr: extract_env_vars: Invalid PID (%d)\n", - prev_pid); - exit_status = OPAL_ERROR; - goto cleanup; - } - - if (NULL == (env_data = fopen(file_name, "r")) ) { - exit_status = OPAL_ERROR; - goto cleanup; - } - - tmp_str = (char *) malloc(sizeof(char) * OPAL_PATH_MAX); - if( NULL == tmp_str) { - exit_status = OPAL_ERR_OUT_OF_RESOURCE; - goto cleanup; - } - /* Extract an env var */ - while(!feof(env_data) ) { - char **t_set = NULL; - - if( NULL == fgets(tmp_str, OPAL_PATH_MAX, env_data) ) { - exit_status = OPAL_ERROR; - goto cleanup; - } - len = strlen(tmp_str); - if(tmp_str[len - 1] == '\n') { - tmp_str[len - 1] = '\0'; - } else { - opal_output(opal_cr_output, - "opal_cr: extract_env_vars: Error: Parameter too long (%s)\n", - tmp_str); - continue; - } - - if( NULL == (t_set = opal_argv_split(tmp_str, '=')) ) { - break; - } - - opal_setenv(t_set[0], t_set[1], true, &environ); - - opal_argv_free(t_set); - } - - cleanup: - if( NULL != env_data ) { - fclose(env_data); - } - unlink(file_name); - - if( NULL != tmp_str ){ - free(tmp_str); - } - - return exit_status; -} - -/***************************************** - * OPAL CR Entry Point Functionality -*****************************************/ -/* - * Used only for debugging SIGPIPE problems - */ -static void opal_cr_sigpipe_debug_signal_handler (int signo) -{ - int sleeper = 1; - - if( !opal_cr_debug_sigpipe ) { - opal_output_verbose(10, opal_cr_output, - "opal_cr: sigpipe_debug: Debug SIGPIPE Not enabled :(\n"); - return; - } - - opal_output(0, - "opal_cr: sigpipe_debug: Debug SIGPIPE [%d]: PID (%d)\n", - signo, getpid()); - while(sleeper == 1 ) { - sleep(1); - } -} - -#if OPAL_ENABLE_FT_THREAD == 1 -static void* opal_cr_thread_fn(opal_object_t *obj) -{ - /* Sanity Check */ - if( !opal_cr_thread_use_if_avail ) { - return NULL; - } - - if( opal_cr_debug_sigpipe ) { - if( SIG_ERR == signal(SIGPIPE, opal_cr_sigpipe_debug_signal_handler) ) { - ; - } - } - - /* - * Register this thread with the OPAL CRS - */ - if( NULL != opal_crs.crs_reg_thread ) { - if( OPAL_SUCCESS != opal_crs.crs_reg_thread() ) { - opal_output(0, "Error: Thread registration failed\n"); - return NULL; - } - } - -#if OPAL_ENABLE_CRDEBUG == 1 - opal_cr_debug_free_threads[1] = opal_thread_get_self(); -#endif - - /* - * Wait to become active - */ - while( !opal_cr_thread_is_active && !opal_cr_thread_is_done) { - sched_yield(); - } - - if( opal_cr_thread_is_done ) { - return NULL; - } - - /* - * While active - */ - while( opal_cr_thread_is_active && !opal_cr_thread_is_done) { - /* - * While no threads are in the MPI library then try to process - * checkpoint requests. - */ - OPAL_CR_THREAD_LOCK(); - - while ( !opal_cr_thread_in_library ) { - sched_yield(); - usleep(opal_cr_thread_sleep_check); - - OPAL_CR_TEST_CHECKPOINT_READY(); - /* Sanity check */ - if( OPAL_UNLIKELY(opal_cr_currently_stalled) ) { - OPAL_CR_TEST_CHECKPOINT_READY(); - } - } - - /* - * While they are in the MPI library yield - */ - OPAL_CR_THREAD_UNLOCK(); - - while ( opal_cr_thread_in_library && opal_cr_thread_is_active ) { - usleep(opal_cr_thread_sleep_wait); - } - } - - return NULL; -} - -void opal_cr_thread_init_library(void) -{ - if( !opal_cr_thread_use_if_avail ) { - OPAL_CR_TEST_CHECKPOINT_READY(); - } else { - /* Activate the CR Thread */ - opal_cr_thread_in_library = false; - opal_cr_thread_is_done = false; - opal_cr_thread_is_active = true; - } -} - -void opal_cr_thread_finalize_library(void) -{ - if( !opal_cr_thread_use_if_avail ) { - OPAL_CR_TEST_CHECKPOINT_READY(); - } else { - /* Deactivate the CR Thread */ - opal_cr_thread_is_done = true; - opal_cr_thread_is_active = false; - OPAL_CR_LOCK(); - opal_cr_thread_in_library = true; - } -} - -void opal_cr_thread_abort_library(void) -{ - if( !opal_cr_thread_use_if_avail ) { - OPAL_CR_TEST_CHECKPOINT_READY(); - } else { - /* Deactivate the CR Thread */ - opal_cr_thread_is_done = true; - opal_cr_thread_is_active = false; - OPAL_CR_LOCK(); - opal_cr_thread_in_library = true; - } -} - -void opal_cr_thread_enter_library(void) -{ - if( !opal_cr_thread_use_if_avail ) { - OPAL_CR_TEST_CHECKPOINT_READY(); - } else { - /* Lock out the CR Thread */ - OPAL_CR_LOCK(); - } -} - -void opal_cr_thread_exit_library(void) -{ - if( !opal_cr_thread_use_if_avail ) { - OPAL_CR_TEST_CHECKPOINT_READY(); - } else { - /* Allow CR Thread to continue */ - OPAL_CR_UNLOCK(); - } -} - -void opal_cr_thread_noop_progress(void) -{ - if( !opal_cr_thread_use_if_avail ) { - OPAL_CR_TEST_CHECKPOINT_READY(); - } -} - -#endif /* OPAL_ENABLE_FT_THREAD == 1 */ - -static double opal_cr_get_time() { - double wtime; - -#if OPAL_TIMER_USEC_NATIVE - wtime = (double)opal_timer_base_get_usec() / 1000000.0; -#else - struct timeval tv; - gettimeofday(&tv, NULL); - wtime = tv.tv_sec; - wtime += (double)tv.tv_usec / 1000000.0; -#endif - - return wtime; -} - -void opal_cr_set_time(int idx) -{ - if(idx < OPAL_CR_TIMER_MAX ) { - if( timer_start[idx] <= 0.0 ) { - timer_start[idx] = opal_cr_get_time(); - } - } -} - -void opal_cr_clear_timers(void) -{ - int i; - for(i = 0; i < OPAL_CR_TIMER_MAX; ++i) { - timer_start[i] = 0.0; - } -} - -static void display_indv_timer_core(double diff, char *str) { - double total = 0; - double perc = 0; - - total = timer_start[OPAL_CR_TIMER_MAX-1] - timer_start[OPAL_CR_TIMER_ENTRY0]; - perc = (diff/total) * 100; - - opal_output(0, - "opal_cr: timing: %-20s = %10.2f s\t%10.2f s\t%6.2f\n", - str, - diff, - total, - perc); - return; -} - -void opal_cr_display_all_timers(void) -{ - double diff = 0.0; - char * label = NULL; - - if( opal_cr_timing_target_rank != opal_cr_timing_my_rank ) { - return; - } - - opal_output(0, "OPAL CR Timing: ******************** Summary Begin\n"); - - /********** Entry into the system **********/ - label = strdup("Start Entry Point"); - if( opal_cr_timing_barrier_enabled ) { - diff = timer_start[OPAL_CR_TIMER_CRCPBR0] - timer_start[OPAL_CR_TIMER_ENTRY0]; - } else { - diff = timer_start[OPAL_CR_TIMER_CRCP0] - timer_start[OPAL_CR_TIMER_ENTRY0]; - } - display_indv_timer_core(diff, label); - free(label); - - /********** CRCP Protocol **********/ - label = strdup("CRCP Protocol"); - if( opal_cr_timing_barrier_enabled ) { - diff = timer_start[OPAL_CR_TIMER_CRCPBR1] - timer_start[OPAL_CR_TIMER_CRCP0]; - } else { - diff = timer_start[OPAL_CR_TIMER_P2P0] - timer_start[OPAL_CR_TIMER_CRCP0]; - } - display_indv_timer_core(diff, label); - free(label); - - /********** P2P Suspend **********/ - label = strdup("P2P Suspend"); - if( opal_cr_timing_barrier_enabled ) { - diff = timer_start[OPAL_CR_TIMER_P2PBR0] - timer_start[OPAL_CR_TIMER_P2P0]; - } else { - diff = timer_start[OPAL_CR_TIMER_CORE0] - timer_start[OPAL_CR_TIMER_P2P0]; - } - display_indv_timer_core(diff, label); - free(label); - - /********** Checkpoint to Disk **********/ - label = strdup("Checkpoint"); - diff = timer_start[OPAL_CR_TIMER_CORE1] - timer_start[OPAL_CR_TIMER_CORE0]; - display_indv_timer_core(diff, label); - free(label); - - /********** P2P Reactivation **********/ - label = strdup("P2P Reactivation"); - if( opal_cr_timing_barrier_enabled ) { - diff = timer_start[OPAL_CR_TIMER_P2PBR2] - timer_start[OPAL_CR_TIMER_CORE1]; - } else { - diff = timer_start[OPAL_CR_TIMER_CRCP1] - timer_start[OPAL_CR_TIMER_CORE1]; - } - display_indv_timer_core(diff, label); - free(label); - - /********** CRCP Protocol Finalize **********/ - label = strdup("CRCP Cleanup"); - if( opal_cr_timing_barrier_enabled ) { - diff = timer_start[OPAL_CR_TIMER_COREBR1] - timer_start[OPAL_CR_TIMER_CRCP1]; - } else { - diff = timer_start[OPAL_CR_TIMER_CORE2] - timer_start[OPAL_CR_TIMER_CRCP1]; - } - display_indv_timer_core(diff, label); - free(label); - - /********** Exit the system **********/ - label = strdup("Finish Entry Point"); - diff = timer_start[OPAL_CR_TIMER_ENTRY4] - timer_start[OPAL_CR_TIMER_CORE2]; - display_indv_timer_core(diff, label); - free(label); - - opal_output(0, "OPAL CR Timing: ******************** Summary End\n"); -} - -#if OPAL_ENABLE_CRDEBUG == 1 -int opal_cr_debug_set_current_ckpt_thread_self(void) -{ - int t; - - if( NULL == opal_cr_debug_free_threads ) { - opal_cr_debug_num_free_threads = 3; - opal_cr_debug_free_threads = (opal_thread_t **)malloc(sizeof(opal_thread_t *) * opal_cr_debug_num_free_threads ); - for(t = 0; t < opal_cr_debug_num_free_threads; ++t ) { - opal_cr_debug_free_threads[t] = NULL; - } - } - - opal_cr_debug_free_threads[0] = opal_thread_get_self(); - - return OPAL_SUCCESS; -} - -int opal_cr_debug_clear_current_ckpt_thread(void) -{ - opal_cr_debug_free_threads[0] = NULL; - - return OPAL_SUCCESS; -} - -int MPIR_checkpoint_debugger_detach(void) { - /* This function is meant to be a noop function for checkpoint/restart - * enabled debugging functionality */ -#if 0 - /* Once the debugger can successfully force threads into the function below, - * then we can uncomment this line */ - if( MPIR_debug_with_checkpoint ) { - opal_cr_debug_threads_already_waiting = true; - } -#endif - return OPAL_SUCCESS; -} - -void MPIR_checkpoint_debugger_signal_handler(int signo) -{ - opal_output_verbose(1, opal_cr_output, - "crs: MPIR_checkpoint_debugger_signal_handler(): Enter Debug signal handler..."); - - MPIR_checkpoint_debugger_waitpoint(); - - opal_output_verbose(1, opal_cr_output, - "crs: MPIR_checkpoint_debugger_signal_handler(): Leave Debug signal handler..."); -} - -void *MPIR_checkpoint_debugger_waitpoint(void) -{ - int t; - opal_thread_t *thr = NULL; - - thr = opal_thread_get_self(); - - /* - * Sanity check, if the debugger is not going to attach, then do not wait - * Make sure to open the debug gate, so that threads can get out - */ - if( !MPIR_debug_with_checkpoint ) { - opal_output_verbose(1, opal_cr_output, - "crs: MPIR_checkpoint_debugger_waitpoint(): Debugger is not attaching... (%d)", - (int)thr->t_handle); - MPIR_checkpoint_debug_gate = 1; - return NULL; - } - else { - opal_output_verbose(1, opal_cr_output, - "crs: MPIR_checkpoint_debugger_waitpoint(): Waiting for the Debugger to attach... (%d)", - (int)thr->t_handle); - MPIR_checkpoint_debug_gate = 0; - } - - /* - * Let special threads escape without waiting, they will wait later - */ - for(t = 0; t < opal_cr_debug_num_free_threads; ++t) { - if( opal_cr_debug_free_threads[t] != NULL && - opal_thread_self_compare(opal_cr_debug_free_threads[t]) ) { - opal_output_verbose(1, opal_cr_output, - "crs: MPIR_checkpoint_debugger_waitpoint(): Checkpointing thread does not wait here... (%d)", - (int)thr->t_handle); - return NULL; - } - } - - /* - * Force all other threads into the waiting function, - * unless they are already in there, then just return so we do not nest - * calls into this wait function and potentially confuse the debugger. - */ - if( opal_cr_debug_threads_already_waiting ) { - opal_output_verbose(1, opal_cr_output, - "crs: MPIR_checkpoint_debugger_waitpoint(): Threads are already waiting from debugger detach, do not wait here... (%d)", - (int)thr->t_handle); - return NULL; - } else { - opal_output_verbose(1, opal_cr_output, - "crs: MPIR_checkpoint_debugger_waitpoint(): Wait... (%d)", - (int)thr->t_handle); - return MPIR_checkpoint_debugger_breakpoint(); - } -} - -/* - * A tight loop to wait for debugger to release this process from the - * breakpoint. - */ -void *MPIR_checkpoint_debugger_breakpoint(void) -{ - /* spin until debugger attaches and releases us */ - while (MPIR_checkpoint_debug_gate == 0) { -#if defined(HAVE_USLEEP) - usleep(100000); /* microseconds */ -#else - sleep(1); /* seconds */ -#endif - } - opal_cr_debug_threads_already_waiting = false; - return NULL; -} -#endif diff --git a/opal/runtime/opal_cr.h b/opal/runtime/opal_cr.h deleted file mode 100644 index 20f16378e71..00000000000 --- a/opal/runtime/opal_cr.h +++ /dev/null @@ -1,418 +0,0 @@ -/* - * Copyright (c) 2004-2010 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2005 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2007 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * Copyright (c) 2008 Cisco Systems, Inc. All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -/** - * @file - * - * Checkpoint functionality for Open MPI - */ - -#include "opal_config.h" -#include "opal/mca/crs/crs.h" -#include "opal/mca/event/event.h" -#include "opal/util/output.h" -#include "opal/prefetch.h" - -#ifndef OPAL_CR_H -#define OPAL_CR_H - - -BEGIN_C_DECLS - -/* - * Some defines shared with opal-[checkpoint|restart] commands - */ -#define OPAL_CR_DONE ((char) 0) -#define OPAL_CR_ACK ((char) 1) -#define OPAL_CR_CHECKPOINT ((char) 2) -#define OPAL_CR_NAMED_PROG_R ("opal_cr_prog_read") -#define OPAL_CR_NAMED_PROG_W ("opal_cr_prog_write") -#define OPAL_CR_BASE_ENV_NAME ("opal_cr_restart-env") - -/* - * Possible responses to a checkpoint request from opal-checkpoint - */ -enum opal_cr_ckpt_cmd_state_t { - OPAL_CHECKPOINT_CMD_START, /* Checkpoint is starting on this request */ - OPAL_CHECKPOINT_CMD_IN_PROGRESS, /* Checkpoint is currently running */ - OPAL_CHECKPOINT_CMD_NULL, /* Checkpoint cannot be started because it is not supported */ - OPAL_CHECKPOINT_CMD_ERROR, /* An error occurred such that the checkpoint cannot be completed */ - /* State of the checkpoint operation */ - OPAL_CR_STATUS_NONE, /* No checkpoint in progress */ - OPAL_CR_STATUS_REQUESTED, /* Checkpoint has been requested */ - OPAL_CR_STATUS_RUNNING, /* Checkpoint is currently running */ - OPAL_CR_STATUS_TERM, /* Checkpoint is running and will terminate process upon completion */ - /* State of the continue operation */ - OPAL_CR_STATUS_CONTINUE, - /* State of the restart operation */ - OPAL_CR_STATUS_RESTART_PRE, - OPAL_CR_STATUS_RESTART_POST -}; -typedef enum opal_cr_ckpt_cmd_state_t opal_cr_ckpt_cmd_state_t; - - /* An output handle to be used by the cr runtime - * functionality as an argument to opal_output() */ - OPAL_DECLSPEC extern int opal_cr_output; - - /* Directory containing the named pipes for communication - * with the opal-checkpoint tool */ - OPAL_DECLSPEC extern char * opal_cr_pipe_dir; - - /* Signal that opal-checkpoint uses to contact the - * application process */ - OPAL_DECLSPEC extern int opal_cr_entry_point_signal; - - /* If Checkpointing is enabled in this application */ - OPAL_DECLSPEC extern bool opal_cr_is_enabled; - - /* If the application running is a tool - * (e.g., opal-checkpoint, orted, ...) */ - OPAL_DECLSPEC extern bool opal_cr_is_tool; - - /* If a checkpoint has been requested */ - OPAL_DECLSPEC extern int opal_cr_checkpoint_request; - - /* The current state of a checkpoint operation */ - OPAL_DECLSPEC extern int opal_cr_checkpointing_state; - - /* - * If one of the BTLs that shutdown require a full, clean rebuild of the - * point-to-point stack on 'continue' as well as 'restart'. - */ - OPAL_DECLSPEC extern bool opal_cr_continue_like_restart; - -#if OPAL_ENABLE_CRDEBUG == 1 - /* Whether or not C/R Debugging is enabled for this process */ - OPAL_DECLSPEC extern int MPIR_debug_with_checkpoint; - - /* - * Set/clear the current thread id for the checkpointing thread - */ - OPAL_DECLSPEC int opal_cr_debug_set_current_ckpt_thread_self(void); - OPAL_DECLSPEC int opal_cr_debug_clear_current_ckpt_thread(void); - - /* - * This MPI Debugger function needs to be accessed here and have a specific - * name. Thus we are breaking the traditional naming conventions to provide this functionality. - */ - OPAL_DECLSPEC int MPIR_checkpoint_debugger_detach(void); - - /** - * A tight loop to wait for debugger to release this process from the - * breakpoint. - */ - OPAL_DECLSPEC void *MPIR_checkpoint_debugger_breakpoint(void); - - /** - * A function for the debugger or CRS to force all threads into - */ - OPAL_DECLSPEC void *MPIR_checkpoint_debugger_waitpoint(void); - - /** - * A signal handler to force all threads to wait when debugger detaches - */ - OPAL_DECLSPEC void MPIR_checkpoint_debugger_signal_handler(int signo); -#endif - - /* - * Refresh environment variables after a restart - */ - OPAL_DECLSPEC int opal_cr_refresh_environ(int prev_pid); - - /* - * If this is an application that doesn't want to have - * a notification callback installed, set this to false. - * To see the effect, this must be called before opal_cr_init(). - * Default: Enabled - */ - OPAL_DECLSPEC int opal_cr_set_enabled(bool); - - /** - * Initialize the notification and coordination - * elements. - */ - OPAL_DECLSPEC int opal_cr_init(void); - - /** - * Finalize the notification and coordination - * elements. - */ - OPAL_DECLSPEC int opal_cr_finalize(void); - - /************************************************* - * Check to see if a checkpoint has been requested - * - * When the checkpoint thread is disabled: - * This will be checked whenever the MPI Library - * is entered by the application. It will stop - * the application for the duration of the entire - * checkpoint. - * When the checkpoint thread is enabled: - * The request is handled in the thread parallel - * with the execution of the program regardless - * of where the program is in exection. - * The problem with this method is that it - * requires the support of progress threads - * which is currently not working properly :/ - * - *************************************************/ - OPAL_DECLSPEC void opal_cr_test_if_checkpoint_ready(void); - - /* If the checkpoint operation should be stalled to - * wait for another sevice to complete before - * continuing with the checkpoint */ - OPAL_DECLSPEC extern bool opal_cr_stall_check; - OPAL_DECLSPEC extern bool opal_cr_currently_stalled; - -#if OPAL_ENABLE_FT_THREAD == 1 - /* Some thread functions */ - OPAL_DECLSPEC void opal_cr_thread_init_library(void); - OPAL_DECLSPEC void opal_cr_thread_finalize_library(void); - OPAL_DECLSPEC void opal_cr_thread_abort_library(void); - OPAL_DECLSPEC void opal_cr_thread_enter_library(void); - OPAL_DECLSPEC void opal_cr_thread_exit_library(void); - OPAL_DECLSPEC void opal_cr_thread_noop_progress(void); -#endif /* OPAL_ENABLE_FT_THREAD == 1 */ - - /* - * If not using FT then make the #defines noops - */ -#if OPAL_ENABLE_FT == 0 || OPAL_ENABLE_FT_CR == 0 -#define OPAL_CR_TEST_CHECKPOINT_READY() ; -#define OPAL_CR_TEST_CHECKPOINT_READY_STALL() ; -#define OPAL_CR_INIT_LIBRARY() ; -#define OPAL_CR_FINALIZE_LIBRARY() ; -#define OPAL_CR_ABORT_LIBRARY() ; -#define OPAL_CR_ENTER_LIBRARY() ; -#define OPAL_CR_EXIT_LIBRARY() ; -#define OPAL_CR_NOOP_PROGRESS() ; -#endif /* #if OPAL_ENABLE_FT == 0 || OPAL_ENABLE_FT_CR == 0 */ - - /* - * If using FT - */ -#if OPAL_ENABLE_FT_CR == 1 -#define OPAL_CR_TEST_CHECKPOINT_READY() \ - { \ - if(OPAL_UNLIKELY(opal_cr_is_enabled) ) { \ - opal_cr_test_if_checkpoint_ready(); \ - } \ - } - -#define OPAL_CR_TEST_CHECKPOINT_READY_STALL() \ - { \ - if(OPAL_UNLIKELY(opal_cr_is_enabled && !opal_cr_stall_check)) { \ - opal_cr_test_if_checkpoint_ready(); \ - } \ - } - -/* If *not* using FT thread */ -#if OPAL_ENABLE_FT_THREAD == 0 -#define OPAL_CR_INIT_LIBRARY() OPAL_CR_TEST_CHECKPOINT_READY(); -#define OPAL_CR_FINALIZE_LIBRARY() OPAL_CR_TEST_CHECKPOINT_READY(); -#define OPAL_CR_ABORT_LIBRARY() OPAL_CR_TEST_CHECKPOINT_READY(); -#define OPAL_CR_ENTER_LIBRARY() OPAL_CR_TEST_CHECKPOINT_READY(); -#define OPAL_CR_EXIT_LIBRARY() OPAL_CR_TEST_CHECKPOINT_READY(); -#define OPAL_CR_NOOP_PROGRESS() OPAL_CR_TEST_CHECKPOINT_READY(); -#endif /* OPAL_ENABLE_FT_THREAD == 0 */ - -/* If using FT thread */ -#if OPAL_ENABLE_FT_THREAD == 1 -#define OPAL_CR_INIT_LIBRARY() \ - { \ - opal_cr_thread_init_library(); \ - } -#define OPAL_CR_FINALIZE_LIBRARY() \ - { \ - opal_cr_thread_finalize_library(); \ - } -#define OPAL_CR_ABORT_LIBRARY() \ - { \ - opal_cr_thread_abort_library(); \ - } -#define OPAL_CR_ENTER_LIBRARY() \ - { \ - opal_cr_thread_enter_library(); \ - } -#define OPAL_CR_EXIT_LIBRARY() \ - { \ - opal_cr_thread_exit_library(); \ - } -#define OPAL_CR_NOOP_PROGRESS() \ - { \ - opal_cr_thread_noop_progress(); \ - } -#endif /* OPAL_ENABLE_FT_THREAD == 1 */ - -#endif /* OPAL_ENABLE_FT_CR == 1 */ - - /******************************* - * Notification Routines - *******************************/ - /******************************* - * Notification Routines - *******************************/ - /** - * A function to respond to the async checkpoint request - * this is useful when figuring out who should respond - * when stalling. - */ - typedef int (*opal_cr_notify_callback_fn_t) (opal_cr_ckpt_cmd_state_t); - - OPAL_DECLSPEC int opal_cr_reg_notify_callback - (opal_cr_notify_callback_fn_t new_func, - opal_cr_notify_callback_fn_t *prev_func); - - /** - * Function to go through the INC - * - Call Registered INC_Coord(CHECKPOINT) - * - Call the CRS.checkpoint() - * - Call Registered INC_Coord(state) - */ - OPAL_DECLSPEC int opal_cr_inc_core(pid_t pid, - opal_crs_base_snapshot_t *snapshot, - opal_crs_base_ckpt_options_t *options, - int *state); - - OPAL_DECLSPEC int opal_cr_inc_core_prep(void); - OPAL_DECLSPEC int opal_cr_inc_core_ckpt(pid_t pid, - opal_crs_base_snapshot_t *snapshot, - opal_crs_base_ckpt_options_t *options, - int *state); - OPAL_DECLSPEC int opal_cr_inc_core_recover(int state); - - - /******************************* - * User Coordination Routines - *******************************/ - typedef enum { - OPAL_CR_INC_PRE_CRS_PRE_MPI = 0, - OPAL_CR_INC_PRE_CRS_POST_MPI = 1, - OPAL_CR_INC_CRS_PRE_CKPT = 2, - OPAL_CR_INC_CRS_POST_CKPT = 3, - OPAL_CR_INC_POST_CRS_PRE_MPI = 4, - OPAL_CR_INC_POST_CRS_POST_MPI = 5, - OPAL_CR_INC_MAX = 6 - } opal_cr_user_inc_callback_event_t; - - typedef enum { - OPAL_CR_INC_STATE_PREPARE = 0, - OPAL_CR_INC_STATE_CONTINUE = 1, - OPAL_CR_INC_STATE_RESTART = 2, - OPAL_CR_INC_STATE_ERROR = 3 - } opal_cr_user_inc_callback_state_t; - - /** - * User coordination callback routine - */ - typedef int (*opal_cr_user_inc_callback_fn_t)(opal_cr_user_inc_callback_event_t event, - opal_cr_user_inc_callback_state_t state); - - OPAL_DECLSPEC int opal_cr_user_inc_register_callback - (opal_cr_user_inc_callback_event_t event, - opal_cr_user_inc_callback_fn_t function, - opal_cr_user_inc_callback_fn_t *prev_function); - - OPAL_DECLSPEC int trigger_user_inc_callback(opal_cr_user_inc_callback_event_t event, - opal_cr_user_inc_callback_state_t state); - - - /******************************* - * Coordination Routines - *******************************/ - /** - * Coordination callback routine signature - */ - typedef int (*opal_cr_coord_callback_fn_t) (int); - - /** - * Register a checkpoint coodination routine - * for a higher level. - */ - OPAL_DECLSPEC int opal_cr_reg_coord_callback - (opal_cr_coord_callback_fn_t new_func, - opal_cr_coord_callback_fn_t *prev_func); - - /** - * OPAL Checkpoint Coordination Routine - */ - OPAL_DECLSPEC int opal_cr_coord(int state); - - /** - * Checkpoint life-cycle timing - */ - OPAL_DECLSPEC void opal_cr_set_time(int idx); - OPAL_DECLSPEC void opal_cr_display_all_timers(void); - OPAL_DECLSPEC void opal_cr_clear_timers(void); - - OPAL_DECLSPEC extern bool opal_cr_timing_enabled; - OPAL_DECLSPEC extern bool opal_cr_timing_barrier_enabled; - OPAL_DECLSPEC extern int opal_cr_timing_my_rank; - OPAL_DECLSPEC extern int opal_cr_timing_target_rank; - - -#define OPAL_CR_TIMER_ENTRY0 0 -#define OPAL_CR_TIMER_ENTRY1 1 -#define OPAL_CR_TIMER_ENTRY2 2 -#define OPAL_CR_TIMER_CRCPBR0 3 -#define OPAL_CR_TIMER_CRCP0 4 -#define OPAL_CR_TIMER_CRCPBR1 5 -#define OPAL_CR_TIMER_P2P0 6 -#define OPAL_CR_TIMER_P2P1 7 -#define OPAL_CR_TIMER_P2PBR0 8 -#define OPAL_CR_TIMER_CORE0 9 -#define OPAL_CR_TIMER_CORE1 10 -#define OPAL_CR_TIMER_COREBR0 11 -#define OPAL_CR_TIMER_P2P2 12 -#define OPAL_CR_TIMER_P2PBR1 13 -#define OPAL_CR_TIMER_P2P3 14 -#define OPAL_CR_TIMER_P2PBR2 15 -#define OPAL_CR_TIMER_CRCP1 16 -#define OPAL_CR_TIMER_COREBR1 17 -#define OPAL_CR_TIMER_CORE2 18 -#define OPAL_CR_TIMER_ENTRY3 19 -#define OPAL_CR_TIMER_ENTRY4 20 -#define OPAL_CR_TIMER_MAX 21 - - -#define OPAL_CR_CLEAR_TIMERS() \ - { \ - if(OPAL_UNLIKELY(opal_cr_timing_enabled > 0)) { \ - opal_cr_clear_timers(); \ - } \ - } - -#define OPAL_CR_SET_TIMER(idx) \ - { \ - if(OPAL_UNLIKELY(opal_cr_timing_enabled > 0)) { \ - opal_cr_set_time(idx); \ - } \ - } - -#define OPAL_CR_DISPLAY_ALL_TIMERS() \ - { \ - if(OPAL_UNLIKELY(opal_cr_timing_enabled > 0)) { \ - opal_cr_display_all_timers(); \ - } \ - } - -END_C_DECLS - -#endif /* OPAL_CR_H */ - diff --git a/opal/runtime/opal_finalize.c b/opal/runtime/opal_finalize.c index 0105d660697..099b77246b1 100644 --- a/opal/runtime/opal_finalize.c +++ b/opal/runtime/opal_finalize.c @@ -40,7 +40,7 @@ #include "opal/mca/installdirs/base/base.h" #include "opal/mca/memchecker/base/base.h" #include "opal/mca/memcpy/base/base.h" -#include "opal/mca/memory/base/base.h" +#include "opal/mca/patcher/base/base.h" #include "opal/mca/backtrace/base/base.h" #include "opal/mca/sec/base/base.h" #include "opal/mca/timer/base/base.h" @@ -48,12 +48,6 @@ #include "opal/mca/event/base/base.h" #include "opal/runtime/opal_progress.h" #include "opal/mca/shmem/base/base.h" -#if OPAL_ENABLE_FT_CR == 1 -#include "opal/mca/compress/base/base.h" -#endif - -#include "opal/runtime/opal_cr.h" -#include "opal/mca/crs/base/base.h" extern int opal_initialized; extern int opal_util_initialized; @@ -107,7 +101,7 @@ opal_finalize_util(void) the malloc code turning off doesn't affect opal_output that much */ opal_output_finalize(); - + /* close the dss */ opal_dss_close(); @@ -136,16 +130,9 @@ opal_finalize(void) opal_progress_finalize(); - /* close the checkpoint and restart service */ - opal_cr_finalize(); - /* close the security framework */ (void) mca_base_framework_close(&opal_sec_base_framework); -#if OPAL_ENABLE_FT_CR == 1 - (void) mca_base_framework_close(&opal_compress_base_framework); -#endif - (void) mca_base_framework_close(&opal_event_base_framework); /* close high resolution timers */ @@ -153,13 +140,7 @@ opal_finalize(void) (void) mca_base_framework_close(&opal_backtrace_base_framework); (void) mca_base_framework_close(&opal_memchecker_base_framework); - - /* close the memory manager components. Registered hooks can - still be fired any time between now and the call to - opal_mem_free_finalize(), and callbacks from the memory manager - hooks to the bowels of the mem_free code can still occur any - time between now and end of application (even post main()!) */ - (void) mca_base_framework_close(&opal_memory_base_framework); + (void) mca_base_framework_close(&opal_patcher_base_framework); /* close the memcpy framework */ (void) mca_base_framework_close(&opal_memcpy_base_framework); @@ -201,7 +182,7 @@ void opal_finalize_test(void) the malloc code turning off doesn't affect opal_output that much */ opal_output_finalize(); - + /* close the dss */ opal_dss_close(); diff --git a/opal/runtime/opal_info_support.c b/opal/runtime/opal_info_support.c index 189fc9a314f..3f4694ce27e 100644 --- a/opal/runtime/opal_info_support.c +++ b/opal/runtime/opal_info_support.c @@ -429,6 +429,7 @@ void opal_info_do_path(bool want_all, opal_cmd_line_t *cmd_line) void opal_info_do_params(bool want_all_in, bool want_internal, opal_pointer_array_t *mca_types, + opal_pointer_array_t *component_map, opal_cmd_line_t *opal_info_cmd_line) { mca_base_var_info_lvl_t max_level = OPAL_INFO_LVL_1; @@ -482,6 +483,9 @@ void opal_info_do_params(bool want_all_in, bool want_internal, /* Show the params */ if (want_all) { + opal_info_show_component_version(mca_types, component_map, opal_info_type_all, + opal_info_component_all, opal_info_ver_full, + opal_info_ver_all); for (i = 0; i < mca_types->size; ++i) { if (NULL == (type = (char *)opal_pointer_array_get_item(mca_types, i))) { continue; @@ -510,6 +514,9 @@ void opal_info_do_params(bool want_all_in, bool want_internal, exit(1); } + opal_info_show_component_version(mca_types, component_map, type, + component, opal_info_ver_full, + opal_info_ver_all); opal_info_show_mca_params(type, component, max_level, want_internal); } } @@ -639,6 +646,10 @@ static void opal_info_show_mca_group_params(const mca_base_var_group_t *group, m } } + const mca_base_var_group_t *curr_group = NULL; + char *component_msg = NULL; + asprintf(&component_msg, " %s", group_component); + for (i = 0 ; i < count ; ++i) { ret = mca_base_var_get(variables[i], &var); if (OPAL_SUCCESS != ret || ((var->mbv_flags & MCA_BASE_VAR_FLAG_INTERNAL) && @@ -647,6 +658,15 @@ static void opal_info_show_mca_group_params(const mca_base_var_group_t *group, m continue; } + if (opal_info_pretty && curr_group != group) { + asprintf(&message, "MCA%s %s%s", requested ? "" : " (disabled)", + group->group_framework, + component_msg ? component_msg : ""); + opal_info_out(message, message, "---------------------------------------------------"); + free(message); + curr_group = group; + } + ret = mca_base_var_dump(variables[i], &strings, !opal_info_pretty ? MCA_BASE_VAR_DUMP_PARSABLE : MCA_BASE_VAR_DUMP_READABLE); if (OPAL_SUCCESS != ret) { continue; @@ -654,7 +674,9 @@ static void opal_info_show_mca_group_params(const mca_base_var_group_t *group, m for (j = 0 ; strings[j] ; ++j) { if (0 == j && opal_info_pretty) { - asprintf (&message, "MCA%s %s", requested ? "" : " (disabled)", group->group_framework); + asprintf (&message, "MCA%s %s%s", requested ? "" : " (disabled)", + group->group_framework, + component_msg ? component_msg : ""); opal_info_out(message, message, strings[j]); free(message); } else { @@ -682,6 +704,15 @@ static void opal_info_show_mca_group_params(const mca_base_var_group_t *group, m continue; } + if (opal_info_pretty && curr_group != group) { + asprintf(&message, "MCA%s %s%s", requested ? "" : " (disabled)", + group->group_framework, + component_msg ? component_msg : ""); + opal_info_out(message, message, "---------------------------------------------------"); + free(message); + curr_group = group; + } + ret = mca_base_pvar_dump (variables[i], &strings, !opal_info_pretty ? MCA_BASE_VAR_DUMP_PARSABLE : MCA_BASE_VAR_DUMP_READABLE); if (OPAL_SUCCESS != ret) { continue; @@ -689,7 +720,9 @@ static void opal_info_show_mca_group_params(const mca_base_var_group_t *group, m for (j = 0 ; strings[j] ; ++j) { if (0 == j && opal_info_pretty) { - asprintf (&message, "MCA%s %s", requested ? "" : " (disabled)", group->group_framework); + asprintf (&message, "MCA%s %s%s", requested ? "" : " (disabled)", + group->group_framework, + component_msg ? component_msg : ""); opal_info_out(message, message, strings[j]); free(message); } else { @@ -718,6 +751,7 @@ static void opal_info_show_mca_group_params(const mca_base_var_group_t *group, m } opal_info_show_mca_group_params(group, max_level, want_internal); } + free(component_msg); } void opal_info_show_mca_params(const char *type, const char *component, @@ -805,7 +839,7 @@ static int centerpoint = 24; static int screen_width = 78; /* - * Prints the passed integer in a pretty or parsable format. + * Prints the passed message in a pretty or parsable format. */ void opal_info_out(const char *pretty_message, const char *plain_message, const char *value) { @@ -949,6 +983,9 @@ void opal_info_out(const char *pretty_message, const char *plain_message, const } } +/* + * Prints the passed integer in a pretty or parsable format. + */ void opal_info_out_int(const char *pretty_message, const char *plain_message, int value) diff --git a/opal/runtime/opal_info_support.h b/opal/runtime/opal_info_support.h index 243c8d908eb..61283971436 100644 --- a/opal/runtime/opal_info_support.h +++ b/opal/runtime/opal_info_support.h @@ -3,9 +3,9 @@ * All rights reserved. * Copyright (c) 2014 Cisco Systems, Inc. All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -68,6 +68,7 @@ OPAL_DECLSPEC void opal_info_err_params(opal_pointer_array_t *component_map); OPAL_DECLSPEC void opal_info_do_params(bool want_all_in, bool want_internal, opal_pointer_array_t *mca_type, + opal_pointer_array_t *component_map, opal_cmd_line_t *opal_info_cmd_line); OPAL_DECLSPEC void opal_info_show_path(const char *type, const char *value); @@ -75,7 +76,7 @@ OPAL_DECLSPEC void opal_info_show_path(const char *type, const char *value); OPAL_DECLSPEC void opal_info_do_path(bool want_all, opal_cmd_line_t *cmd_line); OPAL_DECLSPEC void opal_info_show_mca_params(const char *type, - const char *component, + const char *component, mca_base_var_info_lvl_t max_level, bool want_internal); @@ -84,7 +85,7 @@ OPAL_DECLSPEC void opal_info_show_mca_version(const mca_base_component_t *compon OPAL_DECLSPEC void opal_info_show_component_version(opal_pointer_array_t *mca_types, opal_pointer_array_t *component_map, - const char *type_name, + const char *type_name, const char *component_name, const char *scope, const char *ver_type); @@ -103,8 +104,8 @@ OPAL_DECLSPEC void opal_info_do_type(opal_cmd_line_t *opal_info_cmd_line); OPAL_DECLSPEC void opal_info_out(const char *pretty_message, const char *plain_message, const char *value); -OPAL_DECLSPEC void opal_info_out_int(const char *pretty_message, - const char *plain_message, +OPAL_DECLSPEC void opal_info_out_int(const char *pretty_message, + const char *plain_message, int value); OPAL_DECLSPEC int opal_info_register_project_frameworks (const char *project_name, diff --git a/opal/runtime/opal_init.c b/opal/runtime/opal_init.c index a2b927e711b..b5fc64e0ef8 100644 --- a/opal/runtime/opal_init.c +++ b/opal/runtime/opal_init.c @@ -10,12 +10,12 @@ * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. - * Copyright (c) 2007-2012 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2007-2016 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2007 Sun Microsystems, Inc. All rights reserved. * Copyright (c) 2009 Oak Ridge National Labs. All rights reserved. * Copyright (c) 2010-2015 Los Alamos National Security, LLC. * All rights reserved. - * Copyright (c) 2013-2014 Intel, Inc. All rights reserved + * Copyright (c) 2013-2016 Intel, Inc. All rights reserved * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ @@ -27,10 +27,15 @@ /** @file **/ +#ifdef HAVE_UNISTD_H +#include +#endif + #include "opal_config.h" #include "opal/util/malloc.h" #include "opal/util/arch.h" +#include "opal/util/opal_environ.h" #include "opal/util/output.h" #include "opal/util/show_help.h" #include "opal/util/proc.h" @@ -41,6 +46,7 @@ #include "opal/datatype/opal_datatype.h" #include "opal/mca/installdirs/base/base.h" #include "opal/mca/memory/base/base.h" +#include "opal/mca/patcher/base/base.h" #include "opal/mca/memcpy/base/base.h" #include "opal/mca/hwloc/base/base.h" #include "opal/mca/sec/base/base.h" @@ -48,14 +54,8 @@ #include "opal/mca/memchecker/base/base.h" #include "opal/dss/dss.h" #include "opal/mca/shmem/base/base.h" -#if OPAL_ENABLE_FT_CR == 1 -#include "opal/mca/compress/base/base.h" -#endif #include "opal/threads/threads.h" -#include "opal/runtime/opal_cr.h" -#include "opal/mca/crs/base/base.h" - #include "opal/runtime/opal_progress.h" #include "opal/mca/event/base/base.h" #include "opal/mca/backtrace/base/base.h" @@ -253,12 +253,40 @@ opal_err2str(int errnum, const char **errmsg) } +int opal_init_psm(void) +{ + /* Very early in the init sequence -- before *ANY* MCA components + are opened -- we need to disable some behavior from the PSM and + PSM2 libraries (by default): at least some old versions of + these libraries hijack signal handlers during their library + constructors and then do not un-hijack them when the libraries + are unloaded. + + It is a bit of an abstraction break that we have to put + vendor/transport-specific code in the OPAL core, but we're + out of options, unfortunately. + + NOTE: We only disable this behavior if the corresponding + environment variables are not already set (i.e., if the + user/environment has indicated a preference for this behavior, + we won't override it). */ + if (NULL == getenv("IPATH_NO_BACKTRACE")) { + opal_setenv("IPATH_NO_BACKTRACE", "1", true, &environ); + } + if (NULL == getenv("HFI_NO_BACKTRACE")) { + opal_setenv("HFI_NO_BACKTRACE", "1", true, &environ); + } + + return OPAL_SUCCESS; +} + + int opal_init_util(int* pargc, char*** pargv) { int ret; char *error = NULL; - char hostname[512]; + char hostname[OPAL_MAXHOSTNAMELEN]; if( ++opal_util_initialized != 1 ) { if( opal_util_initialized < 1 ) { @@ -283,7 +311,7 @@ opal_init_util(int* pargc, char*** pargv) * that we don't bother with fqdn and prefix issues here - we let * the RTE later replace this with a modified name if the user * requests it */ - gethostname(hostname, 512); + gethostname(hostname, sizeof(hostname)); opal_process_info.nodename = strdup(hostname); /* initialize the memory allocator */ @@ -298,12 +326,12 @@ opal_init_util(int* pargc, char*** pargv) __FILE__, __LINE__, ret); return ret; } - + /* initialize the help system */ opal_show_help_init(); /* register handler for errnum -> string converstion */ - if (OPAL_SUCCESS != + if (OPAL_SUCCESS != (ret = opal_error_register("OPAL", OPAL_ERR_BASE, OPAL_ERR_MAX, opal_err2str))) { error = "opal_error_register"; @@ -316,6 +344,10 @@ opal_init_util(int* pargc, char*** pargv) goto return_error; } + // Disable PSM signal hijacking (see comment in function for more + // details) + opal_init_psm(); + /* Setup the parameter system */ if (OPAL_SUCCESS != (ret = mca_base_var_init())) { error = "mca_base_var_init"; @@ -420,12 +452,8 @@ opal_init(int* pargc, char*** pargv) goto return_error; } - /* open the memory manager components. Memory hooks may be - triggered before this (any time after mem_free_init(), - actually). This is a hook available for memory manager hooks - without good initialization routine support */ - if (OPAL_SUCCESS != (ret = mca_base_framework_open(&opal_memory_base_framework, 0))) { - error = "opal_memory_base_open"; + if (OPAL_SUCCESS != (ret = mca_base_framework_open(&opal_patcher_base_framework, 0))) { + error = "opal_patcher_base_open"; goto return_error; } @@ -459,8 +487,6 @@ opal_init(int* pargc, char*** pargv) /* * Need to start the event and progress engines if none else is. - * opal_cr_init uses the progress engine, so it is lumped together - * into this set as well. */ /* * Initialize the event library @@ -469,7 +495,7 @@ opal_init(int* pargc, char*** pargv) error = "opal_event_base_open"; goto return_error; } - + /* * Initialize the general progress engine */ @@ -491,34 +517,6 @@ opal_init(int* pargc, char*** pargv) goto return_error; } -#if OPAL_ENABLE_FT_CR == 1 - /* - * Initialize the compression framework - * Note: Currently only used in C/R so it has been marked to only - * initialize when C/R is enabled. If other places in the code - * wish to use this framework, it is safe to remove the protection. - */ - if( OPAL_SUCCESS != (ret = mca_base_framework_open(&opal_compress_base_framework, 0)) ) { - error = "opal_compress_base_open"; - goto return_error; - } - if( OPAL_SUCCESS != (ret = opal_compress_base_select()) ) { - error = "opal_compress_base_select"; - goto return_error; - } -#endif - - /* - * Initalize the checkpoint/restart functionality - * Note: Always do this so we can detect if the user - * attempts to checkpoint a non checkpointable job, - * otherwise the tools may hang or not clean up properly. - */ - if (OPAL_SUCCESS != (ret = opal_cr_init() ) ) { - error = "opal_cr_init"; - goto return_error; - } - /* initialize the security framework */ if( OPAL_SUCCESS != (ret = mca_base_framework_open(&opal_sec_base_framework, 0)) ) { error = "opal_sec_base_open"; @@ -555,12 +553,12 @@ int opal_init_test(void) __FILE__, __LINE__, ret); return ret; } - + /* initialize the help system */ opal_show_help_init(); /* register handler for errnum -> string converstion */ - if (OPAL_SUCCESS != + if (OPAL_SUCCESS != (ret = opal_error_register("OPAL", OPAL_ERR_BASE, OPAL_ERR_MAX, opal_err2str))) { error = "opal_error_register"; diff --git a/opal/runtime/opal_params.c b/opal/runtime/opal_params.c index 18372d6e43b..854b4b3d6a5 100644 --- a/opal/runtime/opal_params.c +++ b/opal/runtime/opal_params.c @@ -6,32 +6,33 @@ * Copyright (c) 2004-2014 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2006 Los Alamos National Security, LLC. All rights - * reserved. + * reserved. * Copyright (c) 2008-2015 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2009 Oak Ridge National Labs. All rights reserved. - * Copyright (c) 2010-2014 Los Alamos National Security, LLC. + * Copyright (c) 2010-2016 Los Alamos National Security, LLC. * All rights reserved. * Copyright (c) 2014 Hochschule Esslingen. All rights reserved. * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. + * Copyright (c) 2015 Mellanox Technologies, Inc. + * All rights reserved. + * Copyright (c) 2017 IBM Corporation. All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ #include "opal_config.h" #include -#ifdef HAVE_SIGNAL_H #include -#endif #include "opal/constants.h" #include "opal/runtime/opal.h" @@ -45,8 +46,10 @@ #include "opal/dss/dss.h" #include "opal/util/show_help.h" #include "opal/util/timings.h" +#include "opal/util/bit_ops.h" char *opal_signal_string = NULL; +char *opal_stacktrace_output_filename = NULL; char *opal_net_private_ipv4 = NULL; char *opal_set_max_sys_limits = NULL; @@ -58,21 +61,22 @@ bool opal_timing_overhead = true; bool opal_built_with_cuda_support = OPAL_INT_TO_BOOL(OPAL_CUDA_SUPPORT); bool opal_cuda_support = false; -#if OPAL_ENABLE_FT_CR == 1 -bool opal_base_distill_checkpoint_ready = false; -#endif /** * Globals imported from the OMPI layer. */ int opal_leave_pinned = -1; bool opal_leave_pinned_pipeline = false; +bool opal_abort_print_stack = false; +int opal_abort_delay = 0; +unsigned int opal_progress_lp_call_ratio = 8; static bool opal_register_done = false; int opal_register_params(void) { int ret; + char *string = NULL; if (opal_register_done) { return OPAL_SUCCESS; @@ -84,7 +88,6 @@ int opal_register_params(void) * This string is going to be used in opal/util/stacktrace.c */ { - char *string = NULL; int j; int signals[] = { #ifdef SIGABRT @@ -124,6 +127,28 @@ int opal_register_params(void) } } + /* + * Where should the stack trace output be directed + * This string is going to be used in opal/util/stacktrace.c + */ + string = strdup("stderr"); + opal_stacktrace_output_filename = string; + ret = mca_base_var_register ("opal", "opal", NULL, "stacktrace_output", + "Specifies where the stack trace output stream goes. " + "Accepts one of the following: none (disabled), stderr (default), stdout, file[:filename]. " + "If 'filename' is not specified, a default filename of 'stacktrace' is used. " + "The 'filename' is appended with either '.PID' or '.RANK.PID', if RANK is available. " + "The 'filename' can be an absolute path or a relative path to the current working directory.", + MCA_BASE_VAR_TYPE_STRING, NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE, + OPAL_INFO_LVL_3, + MCA_BASE_VAR_SCOPE_LOCAL, + &opal_stacktrace_output_filename); + free (string); + if (0 > ret) { + return ret; + } + + #if defined(HAVE_SCHED_YIELD) opal_progress_yield_when_idle = false; ret = mca_base_var_register ("opal", "opal", "progress", "yield_when_idle", @@ -156,24 +181,11 @@ int opal_register_params(void) } #endif -#if OPAL_ENABLE_FT_CR == 1 - opal_base_distill_checkpoint_ready = false; - ret = mca_base_var_register("opal", "opal", "base", "distill_checkpoint_ready", - "Distill only those components that are Checkpoint Ready", - MCA_BASE_VAR_TYPE_BOOL, NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE, - OPAL_INFO_LVL_8, MCA_BASE_VAR_SCOPE_LOCAL, - &opal_base_distill_checkpoint_ready); - - if (0 > ret) { - return ret; - } -#endif - /* RFC1918 defines - 10.0.0./8 - 172.16.0.0/12 - 192.168.0.0/16 - + RFC3330 also mentions - 169.254.0.0/16 for DHCP onlink iff there's no DHCP server */ @@ -282,6 +294,57 @@ int opal_register_params(void) MCA_BASE_VAR_SCOPE_READONLY, &opal_warn_on_fork); + opal_abort_delay = 0; + ret = mca_base_var_register("opal", "opal", NULL, "abort_delay", + "If nonzero, print out an identifying message when abort operation is invoked (hostname, PID of the process that called abort) and delay for that many seconds before exiting (a negative delay value means to never abort). This allows attaching of a debugger before quitting the job.", + MCA_BASE_VAR_TYPE_INT, NULL, 0, 0, + OPAL_INFO_LVL_5, + MCA_BASE_VAR_SCOPE_READONLY, + &opal_abort_delay); + if (0 > ret) { + return ret; + } + + opal_progress_lp_call_ratio = 8; + ret = mca_base_var_register("opal", "opal", NULL, "progress_lp_call_ratio", + "Ratio of calls to high-priority to low-priority progress " + "functions. Higher numbers decrease the frequency of the callback " + "rate. Must be a power of two (default: 8)", + MCA_BASE_VAR_TYPE_UNSIGNED_INT, NULL, 0, 0, + OPAL_INFO_LVL_5, + MCA_BASE_VAR_SCOPE_READONLY, + &opal_progress_lp_call_ratio); + if (0 > ret) { + return ret; + } + + if (opal_progress_lp_call_ratio & (opal_progress_lp_call_ratio - 1)) { + opal_output(0, "MCA variable progress_lp_call_ratio must be a power of two. value = %u", + opal_progress_lp_call_ratio); + return OPAL_ERR_BAD_PARAM; + } + + opal_abort_print_stack = false; + ret = mca_base_var_register("opal", "opal", NULL, "abort_print_stack", + "If nonzero, print out a stack trace when abort is invoked", + MCA_BASE_VAR_TYPE_BOOL, NULL, 0, + /* If we do not have stack trace + capability, make this a constant + MCA variable */ +#if OPAL_WANT_PRETTY_PRINT_STACKTRACE + 0, + OPAL_INFO_LVL_5, + MCA_BASE_VAR_SCOPE_READONLY, +#else + MCA_BASE_VAR_FLAG_DEFAULT_ONLY, + OPAL_INFO_LVL_5, + MCA_BASE_VAR_SCOPE_CONSTANT, +#endif + &opal_abort_print_stack); + if (0 > ret) { + return ret; + } + /* The ddt engine has a few parameters */ ret = opal_datatype_register_params(); if (OPAL_SUCCESS != ret) { @@ -290,8 +353,8 @@ int opal_register_params(void) /* dss has parameters */ ret = opal_dss_register_vars (); - if (OPAL_SUCCESS != ret) { - return ret; + if (OPAL_SUCCESS != ret) { + return ret; } return OPAL_SUCCESS; diff --git a/opal/runtime/opal_params.h b/opal/runtime/opal_params.h index a8ee60cd0e3..687edba81bc 100644 --- a/opal/runtime/opal_params.h +++ b/opal/runtime/opal_params.h @@ -5,21 +5,24 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2006 Los Alamos National Security, LLC. All rights - * reserved. + * reserved. * Copyright (c) 2008-2012 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2009 Oak Ridge National Labs. All rights reserved. - * Copyright (c) 2010-2013 Los Alamos National Security, LLC. + * Copyright (c) 2010-2015 Los Alamos National Security, LLC. * All rights reserved. * Copyright (c) 2014 Hochschule Esslingen. All rights reserved. + * Copyright (c) 2015 Mellanox Technologies, Inc. + * All rights reserved. + * Copyright (c) 2017 IBM Corporation. All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -27,6 +30,7 @@ #define OPAL_PARAMS_H extern char *opal_signal_string; +extern char *opal_stacktrace_output_filename; extern char *opal_net_private_ipv4; extern char *opal_set_max_sys_limits; @@ -55,12 +59,30 @@ OPAL_DECLSPEC extern int opal_leave_pinned; */ OPAL_DECLSPEC extern bool opal_leave_pinned_pipeline; +/** + * Whether an abort operation should print out a stack trace or not. + */ +OPAL_DECLSPEC extern bool opal_abort_print_stack; + +/** + * Whether abort operation should print out an identifying message + * (e.g., hostname and PID) and loop waiting for a debugger to + * attach. The value of the integer is how many seconds to wait: + * + * 0 = do not print the message and do not loop + * negative value = print the message and loop forever + * positive value = print the message and delay for that many seconds + */ +OPAL_DECLSPEC extern int opal_abort_delay; + +/** + * Ratio of calls to high-priority to low-priority progress functions. + * Must be a power of two. + */ +OPAL_DECLSPEC extern unsigned int opal_progress_lp_call_ratio; + #if OPAL_ENABLE_DEBUG extern bool opal_progress_debug; #endif -#if OPAL_ENABLE_FT_CR == 1 -extern bool opal_base_distill_checkpoint_ready; -#endif - #endif diff --git a/opal/runtime/opal_progress.c b/opal/runtime/opal_progress.c index b4713906682..d78402adc46 100644 --- a/opal/runtime/opal_progress.c +++ b/opal/runtime/opal_progress.c @@ -6,19 +6,19 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. - * Copyright (c) 2006-2014 Los Alamos National Security, LLC. All rights - * reserved. - * Copyright (c) 2015 Research Organization for Information Science + * Copyright (c) 2006-2016 Los Alamos National Security, LLC. All rights + * reserved. + * Copyright (c) 2015-2016 Research Organization for Information Science * and Technology (RIST). All rights reserved. * * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -42,8 +42,8 @@ bool opal_progress_debug = false; #endif -/* - * default parameters +/* + * default parameters */ static int opal_progress_event_flag = OPAL_EVLOOP_ONCE | OPAL_EVLOOP_NONBLOCK; int opal_progress_spin_count = 10000; @@ -55,10 +55,15 @@ int opal_progress_spin_count = 10000; static opal_atomic_lock_t progress_lock; /* callbacks to progress */ -static opal_progress_callback_t *callbacks = NULL; +static volatile opal_progress_callback_t *callbacks = NULL; static size_t callbacks_len = 0; static size_t callbacks_size = 0; +static volatile opal_progress_callback_t *callbacks_lp = NULL; +static size_t callbacks_lp_len = 0; +static size_t callbacks_lp_size = 0; +static uint64_t callbacks_lp_mask = 0x7; + /* do we want to call sched_yield() if nothing happened */ bool opal_progress_yield_when_idle = false; @@ -71,7 +76,7 @@ static int32_t event_progress_counter = 0; /* reset value for counter when it hits 0 */ static int32_t event_progress_delta = 0; #endif -/* users of the event library from MPI cause the tick rate to +/* users of the event library from MPI cause the tick rate to be every time */ static int32_t num_event_users = 0; @@ -89,6 +94,9 @@ static int debug_output = -1; */ static int fake_cb(void) { return 0; } +static int _opal_progress_unregister (opal_progress_callback_t cb, volatile opal_progress_callback_t *callback_array, + size_t *callback_array_len); + /* init the progress engine - called from orte_init */ int opal_progress_init(void) @@ -105,8 +113,32 @@ opal_progress_init(void) } #endif + + callbacks_lp_mask = opal_progress_lp_call_ratio - 1; + + callbacks_size = callbacks_lp_size = 8; + + callbacks = malloc (callbacks_size * sizeof (callbacks[0])); + callbacks_lp = malloc (callbacks_lp_size * sizeof (callbacks_lp[0])); + + if (NULL == callbacks || NULL == callbacks_lp) { + free ((void *) callbacks); + free ((void *) callbacks_lp); + callbacks_size = callbacks_lp_size = 0; + callbacks = callbacks_lp = NULL; + return OPAL_ERR_OUT_OF_RESOURCE; + } + + for (size_t i = 0 ; i < callbacks_size ; ++i) { + callbacks[i] = fake_cb; + } + + for (size_t i = 0 ; i < callbacks_lp_size ; ++i) { + callbacks_lp[i] = fake_cb; + } + OPAL_OUTPUT((debug_output, "progress: initialized event flag to: %x", - opal_progress_event_flag)); + opal_progress_event_flag)); OPAL_OUTPUT((debug_output, "progress: initialized yield_when_idle to: %s", opal_progress_yield_when_idle ? "true" : "false")); OPAL_OUTPUT((debug_output, "progress: initialized num users to: %d", @@ -126,10 +158,13 @@ opal_progress_finalize(void) callbacks_len = 0; callbacks_size = 0; - if (NULL != callbacks) { - free(callbacks); - callbacks = NULL; - } + free ((void *) callbacks); + callbacks = NULL; + + callbacks_lp_len = 0; + callbacks_lp_size = 0; + free ((void *) callbacks_lp); + callbacks_lp = NULL; opal_atomic_unlock(&progress_lock); @@ -138,7 +173,7 @@ opal_progress_finalize(void) /* - * Progress the event library and any functions that have registered to + * Progress the event library and any functions that have registered to * be called. We don't propogate errors from the progress functions, * so no action is taken if they return failures. The functions are * expected to return the number of events progressed, to determine @@ -151,6 +186,7 @@ opal_progress_finalize(void) void opal_progress(void) { + static volatile uint32_t num_calls = 0; size_t i; int events = 0; @@ -165,19 +201,19 @@ opal_progress(void) /* trip the event library if we've reached our tick rate and we are enabled */ if (now - event_progress_last_time > event_progress_delta ) { - event_progress_last_time = (num_event_users > 0) ? + event_progress_last_time = (num_event_users > 0) ? now - event_progress_delta : now; - events += opal_event_loop(opal_event_base, opal_progress_event_flag); + events += opal_event_loop(opal_sync_event_base, opal_progress_event_flag); } #else /* OPAL_PROGRESS_USE_TIMERS */ /* trip the event library if we've reached our tick rate and we are enabled */ if (OPAL_THREAD_ADD32(&event_progress_counter, -1) <= 0 ) { - event_progress_counter = + event_progress_counter = (num_event_users > 0) ? 0 : event_progress_delta; - events += opal_event_loop(opal_event_base, opal_progress_event_flag); + events += opal_event_loop(opal_sync_event_base, opal_progress_event_flag); } #endif /* OPAL_PROGRESS_USE_TIMERS */ @@ -189,6 +225,13 @@ opal_progress(void) events += (callbacks[i])(); } + if (callbacks_lp_len > 0 && (OPAL_THREAD_ADD32((volatile int32_t *) &num_calls, 1) & callbacks_lp_mask) == 0) { + /* run low priority callbacks once every 8 calls to opal_progress() */ + for (i = 0 ; i < callbacks_lp_len ; ++i) { + events += (callbacks_lp[i])(); + } + } + #if OPAL_HAVE_SCHED_YIELD if (opal_progress_yield_when_idle && events <= 0) { /* If there is nothing to do - yield the processor - otherwise @@ -217,10 +260,14 @@ opal_progress_set_event_flag(int flag) void opal_progress_event_users_increment(void) { +#if OPAL_ENABLE_DEBUG int32_t val; val = opal_atomic_add_32(&num_event_users, 1); OPAL_OUTPUT((debug_output, "progress: event_users_increment setting count to %d", val)); +#else + (void)opal_atomic_add_32(&num_event_users, 1); +#endif #if OPAL_PROGRESS_USE_TIMERS /* force an update next round (we'll be past the delta) */ @@ -235,10 +282,14 @@ opal_progress_event_users_increment(void) void opal_progress_event_users_decrement(void) { +#if OPAL_ENABLE_DEBUG || ! OPAL_PROGRESS_USE_TIMERS int32_t val; val = opal_atomic_sub_32(&num_event_users, 1); OPAL_OUTPUT((debug_output, "progress: event_users_decrement setting count to %d", val)); +#else + (void)opal_atomic_sub_32(&num_event_users, 1); +#endif #if !OPAL_PROGRESS_USE_TIMERS /* start now in delaying if it's easy */ @@ -302,71 +353,130 @@ opal_progress_set_event_poll_rate(int polltime) #endif } +static int opal_progress_find_cb (opal_progress_callback_t cb, volatile opal_progress_callback_t *cbs, + size_t cbs_len) +{ + for (size_t i = 0 ; i < cbs_len ; ++i) { + if (cbs[i] == cb) { + return (int) i; + } + } -int -opal_progress_register(opal_progress_callback_t cb) + return OPAL_ERR_NOT_FOUND; +} + +static int _opal_progress_register (opal_progress_callback_t cb, volatile opal_progress_callback_t **cbs, + size_t *cbs_size, size_t *cbs_len) { int ret = OPAL_SUCCESS; - size_t index; - opal_atomic_lock(&progress_lock); + if (OPAL_ERR_NOT_FOUND != opal_progress_find_cb (cb, *cbs, *cbs_len)) { + return OPAL_SUCCESS; + } /* see if we need to allocate more space */ - if (callbacks_len + 1 > callbacks_size) { - opal_progress_callback_t *tmp; - tmp = (opal_progress_callback_t*)realloc(callbacks, sizeof(opal_progress_callback_t) * (callbacks_size + 4)); + if (*cbs_len + 1 > *cbs_size) { + opal_progress_callback_t *tmp, *old; + + tmp = (opal_progress_callback_t *) malloc (sizeof (tmp[0]) * 2 * *cbs_size); if (tmp == NULL) { - ret = OPAL_ERR_TEMP_OUT_OF_RESOURCE; - goto cleanup; + return OPAL_ERR_TEMP_OUT_OF_RESOURCE; + } + + if (*cbs) { + /* copy old callbacks */ + memcpy (tmp, (void *) *cbs, sizeof(tmp[0]) * *cbs_size); } - /* registering fake callbacks to fill callbacks[] */ - for( index = callbacks_len + 1 ; index < callbacks_size + 4 ; index++) { - tmp[index] = &fake_cb; + + for (size_t i = *cbs_len ; i < 2 * *cbs_size ; ++i) { + tmp[i] = fake_cb; } - callbacks = tmp; - callbacks_size += 4; + opal_atomic_wmb (); + + /* swap out callback array */ + old = opal_atomic_swap_ptr (cbs, tmp); + + opal_atomic_wmb (); + + free (old); + *cbs_size *= 2; } - callbacks[callbacks_len++] = cb; + cbs[0][*cbs_len] = cb; + ++*cbs_len; + + opal_atomic_wmb (); + + return ret; +} - cleanup: +int opal_progress_register (opal_progress_callback_t cb) +{ + int ret; + + opal_atomic_lock(&progress_lock); + + (void) _opal_progress_unregister (cb, callbacks_lp, &callbacks_lp_len); + + ret = _opal_progress_register (cb, &callbacks, &callbacks_size, &callbacks_len); opal_atomic_unlock(&progress_lock); return ret; } -int -opal_progress_unregister(opal_progress_callback_t cb) +int opal_progress_register_lp (opal_progress_callback_t cb) { - size_t i; - int ret = OPAL_ERR_NOT_FOUND; + int ret; opal_atomic_lock(&progress_lock); - for (i = 0 ; i < callbacks_len ; ++i) { - if (cb == callbacks[i]) { - callbacks[i] = &fake_cb; - ret = OPAL_SUCCESS; - break; - } + (void) _opal_progress_unregister (cb, callbacks, &callbacks_len); + + ret = _opal_progress_register (cb, &callbacks_lp, &callbacks_lp_size, &callbacks_lp_len); + + opal_atomic_unlock(&progress_lock); + + return ret; +} + +static int _opal_progress_unregister (opal_progress_callback_t cb, volatile opal_progress_callback_t *callback_array, + size_t *callback_array_len) +{ + int ret = opal_progress_find_cb (cb, callback_array, *callback_array_len); + if (OPAL_ERR_NOT_FOUND == ret) { + return ret; } - + /* If we found the function we're unregistering: If callbacks_len is 0, we're not goig to do anything interesting anyway, so skip. If callbacks_len is 1, it will soon be 0, so no need to - do any repacking. size_t can be unsigned, so 0 - 1 is bad for - a loop condition :). */ - if (OPAL_SUCCESS == ret) { - if (callbacks_len > 1 ) { - /* now tightly pack the array */ - for ( ; i < callbacks_len - 1 ; ++i) { - callbacks[i] = callbacks[i + 1]; - } - } - callbacks[callbacks_len - 1] = &fake_cb; - callbacks_len--; + do any repacking. */ + for (size_t i = (size_t) ret ; i < *callback_array_len - 1 ; ++i) { + /* copy callbacks atomically since another thread may be in + * opal_progress(). */ + (void) opal_atomic_swap_ptr (callback_array + i, callback_array[i+1]); + } + + callback_array[*callback_array_len] = fake_cb; + --*callback_array_len; + + return OPAL_SUCCESS; +} + +int opal_progress_unregister (opal_progress_callback_t cb) +{ + int ret; + + opal_atomic_lock(&progress_lock); + + ret = _opal_progress_unregister (cb, callbacks, &callbacks_len); + + if (OPAL_SUCCESS != ret) { + /* if not in the high-priority array try to remove from the lp array. + * a callback will never be in both. */ + ret = _opal_progress_unregister (cb, callbacks_lp, &callbacks_lp_len); } opal_atomic_unlock(&progress_lock); diff --git a/opal/runtime/opal_progress.h b/opal/runtime/opal_progress.h index a3b30bfcca0..5badbd5a459 100644 --- a/opal/runtime/opal_progress.h +++ b/opal/runtime/opal_progress.h @@ -5,17 +5,17 @@ * Copyright (c) 2004-2006 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2006-2014 Los Alamos National Security, LLC. All rights - * reserved. + * reserved. * * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -36,7 +36,7 @@ BEGIN_C_DECLS /** * Initialize the progress engine * - * Initialize the progress engine, including constructing the + * Initialize the progress engine, including constructing the * proper locks and allocating space for the progress registration * functions. At this point, any function in the progress engine * interface may be called. @@ -44,7 +44,7 @@ BEGIN_C_DECLS OPAL_DECLSPEC int opal_progress_init(void); -/** +/** * Shut down the progress engine * * Shut down the progress engine. This includes deregistering all @@ -76,7 +76,7 @@ OPAL_DECLSPEC void opal_progress(void); * meaning that the call to opal_event_loop() will block pending * events, but may block for a period of time. * - * @param flags One of the valid vlags argument to + * @param flags One of the valid vlags argument to * opal_event_loop(). * @return Previous value of flags used to call * opal_event_loop(). @@ -142,7 +142,7 @@ OPAL_DECLSPEC void opal_progress_set_event_poll_rate(int microseconds); /** * Progress callback function typedef - * + * * Prototype for the a progress function callback. Progress function * callbacks can be registered with opal_progress_register() and * deregistered with opal_progress_deregister(). It should be noted @@ -163,6 +163,8 @@ typedef int (*opal_progress_callback_t)(void); */ OPAL_DECLSPEC int opal_progress_register(opal_progress_callback_t cb); +OPAL_DECLSPEC int opal_progress_register_lp (opal_progress_callback_t cb); + /** * Deregister previously registered event diff --git a/opal/runtime/opal_progress_threads.c b/opal/runtime/opal_progress_threads.c index 5524d60f3b9..4c53fa35b67 100644 --- a/opal/runtime/opal_progress_threads.c +++ b/opal/runtime/opal_progress_threads.c @@ -1,9 +1,10 @@ /* - * Copyright (c) 2014-2015 Intel, Inc. All rights reserved. + * Copyright (c) 2014-2015 Intel, Inc. All rights reserved. + * Copyright (c) 2015 Cisco Systems, Inc. All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -22,64 +23,79 @@ #include "opal/runtime/opal_progress_threads.h" + /* create a tracking object for progress threads */ typedef struct { opal_list_item_t super; + + int refcount; char *name; + opal_event_base_t *ev_base; + + /* This will be set to false when it is time for the progress + thread to exit */ volatile bool ev_active; - bool block_active; + + /* This event will always be set on the ev_base (so that the + ev_base is not empty!) */ opal_event_t block; - bool engine_defined; + + bool engine_constructed; opal_thread_t engine; - int pipe[2]; } opal_progress_tracker_t; -static void trkcon(opal_progress_tracker_t *p) + +static void tracker_constructor(opal_progress_tracker_t *p) { + p->refcount = 1; // start at one since someone created it p->name = NULL; p->ev_base = NULL; - p->ev_active = true; - p->block_active = false; - p->engine_defined = false; - p->pipe[0] = -1; - p->pipe[1] = -1; + p->ev_active = false; + p->engine_constructed = false; } -static void trkdes(opal_progress_tracker_t *p) + +static void tracker_destructor(opal_progress_tracker_t *p) { + opal_event_del(&p->block); + if (NULL != p->name) { free(p->name); } - if (p->block_active) { - opal_event_del(&p->block); - } if (NULL != p->ev_base) { opal_event_base_free(p->ev_base); } - if (0 <= p->pipe[0]) { - close(p->pipe[0]); - } - if (0 <= p->pipe[1]) { - close(p->pipe[1]); - } - if (p->engine_defined) { + if (p->engine_constructed) { OBJ_DESTRUCT(&p->engine); } } + static OBJ_CLASS_INSTANCE(opal_progress_tracker_t, opal_list_item_t, - trkcon, trkdes); + tracker_constructor, + tracker_destructor); -static opal_list_t tracking; static bool inited = false; -static void wakeup(int fd, short args, void *cbdata) +static opal_list_t tracking; +static struct timeval long_timeout = { + .tv_sec = 3600, + .tv_usec = 0 +}; +static const char *shared_thread_name = "OPAL-wide async progress thread"; + +/* + * If this event is fired, just restart it so that this event base + * continues to have something to block on. + */ +static void dummy_timeout_cb(int fd, short args, void *cbdata) { opal_progress_tracker_t *trk = (opal_progress_tracker_t*)cbdata; - /* if this event fired, then the blocker event will - * be deleted from the event base by libevent, so flag - * it so we don't try to delete it again */ - trk->block_active = false; + opal_event_add(&trk->block, &long_timeout); } + +/* + * Main for the progress thread + */ static void* progress_engine(opal_object_t *obj) { opal_thread_t *t = (opal_thread_t*)obj; @@ -88,135 +104,190 @@ static void* progress_engine(opal_object_t *obj) while (trk->ev_active) { opal_event_loop(trk->ev_base, OPAL_EVLOOP_ONCE); } + return OPAL_THREAD_CANCELLED; } -opal_event_base_t *opal_start_progress_thread(char *name, - bool create_block) +static void stop_progress_engine(opal_progress_tracker_t *trk) +{ + assert(trk->ev_active); + trk->ev_active = false; + + /* break the event loop - this will cause the loop to exit upon + completion of any current event */ + opal_event_base_loopbreak(trk->ev_base); + + opal_thread_join(&trk->engine, NULL); +} + +static int start_progress_engine(opal_progress_tracker_t *trk) +{ + assert(!trk->ev_active); + trk->ev_active = true; + + /* fork off a thread to progress it */ + trk->engine.t_run = progress_engine; + trk->engine.t_arg = trk; + + int rc = opal_thread_start(&trk->engine); + if (OPAL_SUCCESS != rc) { + OPAL_ERROR_LOG(rc); + } + + return rc; +} + +opal_event_base_t *opal_progress_thread_init(const char *name) { opal_progress_tracker_t *trk; int rc; + if (!inited) { + OBJ_CONSTRUCT(&tracking, opal_list_t); + inited = true; + } + + if (NULL == name) { + name = shared_thread_name; + } + + /* check if we already have this thread */ + OPAL_LIST_FOREACH(trk, &tracking, opal_progress_tracker_t) { + if (0 == strcmp(name, trk->name)) { + /* we do, so up the refcount on it */ + ++trk->refcount; + /* return the existing base */ + return trk->ev_base; + } + } + trk = OBJ_NEW(opal_progress_tracker_t); + if (NULL == trk) { + OPAL_ERROR_LOG(OPAL_ERR_OUT_OF_RESOURCE); + return NULL; + } + trk->name = strdup(name); - if (NULL == (trk->ev_base = opal_event_base_create())) { + if (NULL == trk->name) { OPAL_ERROR_LOG(OPAL_ERR_OUT_OF_RESOURCE); OBJ_RELEASE(trk); return NULL; } - if (create_block) { - /* add an event it can block on */ - if (0 > pipe(trk->pipe)) { - OPAL_ERROR_LOG(OPAL_ERR_IN_ERRNO); - OBJ_RELEASE(trk); - return NULL; - } - /* Make sure the pipe FDs are set to close-on-exec so that - they don't leak into children */ - if (opal_fd_set_cloexec(trk->pipe[0]) != OPAL_SUCCESS || - opal_fd_set_cloexec(trk->pipe[1]) != OPAL_SUCCESS) { - OPAL_ERROR_LOG(OPAL_ERR_IN_ERRNO); - OBJ_RELEASE(trk); - return NULL; - } - opal_event_set(trk->ev_base, &trk->block, trk->pipe[0], OPAL_EV_READ, wakeup, trk); - opal_event_add(&trk->block, 0); - trk->block_active = true; + if (NULL == (trk->ev_base = opal_event_base_create())) { + OPAL_ERROR_LOG(OPAL_ERR_OUT_OF_RESOURCE); + OBJ_RELEASE(trk); + return NULL; } + /* add an event to the new event base (if there are no events, + opal_event_loop() will return immediately) */ + opal_event_set(trk->ev_base, &trk->block, -1, OPAL_EV_PERSIST, + dummy_timeout_cb, trk); + opal_event_add(&trk->block, &long_timeout); + /* construct the thread object */ OBJ_CONSTRUCT(&trk->engine, opal_thread_t); - trk->engine_defined = true; - /* fork off a thread to progress it */ - trk->engine.t_run = progress_engine; - trk->engine.t_arg = trk; - if (OPAL_SUCCESS != (rc = opal_thread_start(&trk->engine))) { + trk->engine_constructed = true; + if (OPAL_SUCCESS != (rc = start_progress_engine(trk))) { OPAL_ERROR_LOG(rc); OBJ_RELEASE(trk); return NULL; } - if (!inited) { - OBJ_CONSTRUCT(&tracking, opal_list_t); - inited = true; - } opal_list_append(&tracking, &trk->super); + return trk->ev_base; } -void opal_stop_progress_thread(char *name, bool cleanup) +int opal_progress_thread_finalize(const char *name) { opal_progress_tracker_t *trk; - int i; if (!inited) { /* nothing we can do */ - return; + return OPAL_ERR_NOT_FOUND; + } + + if (NULL == name) { + name = shared_thread_name; } /* find the specified engine */ OPAL_LIST_FOREACH(trk, &tracking, opal_progress_tracker_t) { if (0 == strcmp(name, trk->name)) { - /* if it is already inactive, then just cleanup if that - * is the request */ - if (!trk->ev_active) { - if (cleanup) { - opal_list_remove_item(&tracking, &trk->super); - OBJ_RELEASE(trk); - } - return; - } - /* mark it as inactive */ - trk->ev_active = false; - /* break the event loop - this will cause the loop to exit - * upon completion of any current event */ - opal_event_base_loopbreak(trk->ev_base); - /* if present, use the block to break it loose just in - * case the thread is blocked in a call to select for - * a long time */ - if (trk->block_active) { - i=1; - write(trk->pipe[1], &i, sizeof(int)); + /* decrement the refcount */ + --trk->refcount; + + /* If the refcount is still above 0, we're done here */ + if (trk->refcount > 0) { + return OPAL_SUCCESS; } - /* wait for thread to exit */ - opal_thread_join(&trk->engine, NULL); - /* cleanup, if they indicated they are done with this event base */ - if (cleanup) { - opal_list_remove_item(&tracking, &trk->super); - OBJ_RELEASE(trk); + + /* If the progress thread is active, stop it */ + if (trk->ev_active) { + stop_progress_engine(trk); } - return; + + opal_list_remove_item(&tracking, &trk->super); + OBJ_RELEASE(trk); + return OPAL_SUCCESS; } } + + return OPAL_ERR_NOT_FOUND; } -int opal_restart_progress_thread(char *name) +/* + * Stop the progress thread, but don't delete the tracker (or event base) + */ +int opal_progress_thread_pause(const char *name) { opal_progress_tracker_t *trk; - int rc; if (!inited) { /* nothing we can do */ - return OPAL_ERROR; + return OPAL_ERR_NOT_FOUND; + } + + if (NULL == name) { + name = shared_thread_name; } /* find the specified engine */ OPAL_LIST_FOREACH(trk, &tracking, opal_progress_tracker_t) { if (0 == strcmp(name, trk->name)) { - if (!trk->engine_defined) { - OPAL_ERROR_LOG(OPAL_ERR_NOT_SUPPORTED); - return OPAL_ERR_NOT_SUPPORTED; - } - /* ensure the block is set, if requested */ - if (0 <= trk->pipe[0] && !trk->block_active) { - opal_event_add(&trk->block, 0); - trk->block_active = true; + if (trk->ev_active) { + stop_progress_engine(trk); } - /* start the thread again */ - if (OPAL_SUCCESS != (rc = opal_thread_start(&trk->engine))) { - OPAL_ERROR_LOG(rc); - return rc; + + return OPAL_SUCCESS; + } + } + + return OPAL_ERR_NOT_FOUND; +} + +int opal_progress_thread_resume(const char *name) +{ + opal_progress_tracker_t *trk; + + if (!inited) { + /* nothing we can do */ + return OPAL_ERR_NOT_FOUND; + } + + if (NULL == name) { + name = shared_thread_name; + } + + /* find the specified engine */ + OPAL_LIST_FOREACH(trk, &tracking, opal_progress_tracker_t) { + if (0 == strcmp(name, trk->name)) { + if (trk->ev_active) { + return OPAL_ERR_RESOURCE_BUSY; } + + return start_progress_engine(trk); } } diff --git a/opal/runtime/opal_progress_threads.h b/opal/runtime/opal_progress_threads.h index b66097b3769..f8b654cbdff 100644 --- a/opal/runtime/opal_progress_threads.h +++ b/opal/runtime/opal_progress_threads.h @@ -1,9 +1,10 @@ /* - * Copyright (c) 2014 Intel, Inc. All rights reserved. + * Copyright (c) 2014 Intel, Inc. All rights reserved. + * Copyright (c) 2015 Cisco Systems, Inc. All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -14,20 +15,56 @@ #include "opal/mca/event/event.h" -/* start a progress thread, assigning it the provided name for - * tracking purposes. If create_block is true, then this function - * will also create a pipe so that libevent has something to block - * against, thus keeping the thread from free-running + +/** + * Initialize a progress thread name; if a progress thread is not + * already associated with that name, start a progress thread. + * + * If you have general events that need to run in *a* progress thread + * (but not necessarily a your own, dedicated progress thread), pass + * NULL the "name" argument to the opal_progress_thead_init() function + * to glom on to the general OPAL-wide progress thread. + * + * If a name is passed that was already used in a prior call to + * opal_progress_thread_init(), the event base associated with that + * already-running progress thread will be returned (i.e., no new + * progress thread will be started). */ -OPAL_DECLSPEC opal_event_base_t *opal_start_progress_thread(char *name, - bool create_block); +OPAL_DECLSPEC opal_event_base_t *opal_progress_thread_init(const char *name); -/* stop the progress thread of the provided name. This function will - * also cleanup the blocking pipes and release the event base if - * the cleanup param is true */ -OPAL_DECLSPEC void opal_stop_progress_thread(char *name, bool cleanup); +/** + * Finalize a progress thread name (reference counted). + * + * Once this function is invoked as many times as + * opal_progress_thread_init() was invoked on this name (or NULL), the + * progress function is shut down and the event base associated with + * it is destroyed. + * + * Will return OPAL_ERR_NOT_FOUND if the progress thread name does not + * exist; OPAL_SUCCESS otherwise. + */ +OPAL_DECLSPEC int opal_progress_thread_finalize(const char *name); -/* restart the progress thread of the provided name */ -OPAL_DECLSPEC int opal_restart_progress_thread(char *name); +/** + * Temporarily pause the progress thread associated with this name. + * + * This function does not destroy the event base associated with this + * progress thread name, but it does stop processing all events on + * that event base until opal_progress_thread_resume() is invoked on + * that name. + * + * Will return OPAL_ERR_NOT_FOUND if the progress thread name does not + * exist; OPAL_SUCCESS otherwise. + */ +OPAL_DECLSPEC int opal_progress_thread_pause(const char *name); + +/** + * Restart a previously-paused progress thread associated with this + * name. + * + * Will return OPAL_ERR_NOT_FOUND if the progress thread name does not + * exist; OPAL_SUCCESS otherwise. + */ +OPAL_DECLSPEC int opal_progress_thread_resume(const char *name); #endif diff --git a/opal/threads/Makefile.am b/opal/threads/Makefile.am index 661f22fc474..a4a084038ca 100644 --- a/opal/threads/Makefile.am +++ b/opal/threads/Makefile.am @@ -3,10 +3,10 @@ # Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana # University Research and Technology # Corporation. All rights reserved. -# Copyright (c) 2004-2005 The University of Tennessee and The University +# Copyright (c) 2004-2016 The University of Tennessee and The University # of Tennessee Research Foundation. All rights # reserved. -# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, # University of Stuttgart. All rights reserved. # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. @@ -14,9 +14,9 @@ # Copyright (c) 2015 Research Organization for Information Science # and Technology (RIST). All rights reserved. # $COPYRIGHT$ -# +# # Additional copyrights may follow -# +# # $HEADER$ # @@ -28,9 +28,12 @@ headers += \ threads/mutex.h \ threads/mutex_unix.h \ threads/threads.h \ - threads/tsd.h + threads/tsd.h \ + threads/wait_sync.h \ + threads/thread_usage.h lib@OPAL_LIB_PREFIX@open_pal_la_SOURCES += \ threads/condition.c \ threads/mutex.c \ - threads/thread.c + threads/thread.c \ + threads/wait_sync.c diff --git a/opal/threads/condition.c b/opal/threads/condition.c index 36941908992..7745d316544 100644 --- a/opal/threads/condition.c +++ b/opal/threads/condition.c @@ -5,14 +5,14 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ diff --git a/opal/threads/condition.h b/opal/threads/condition.h index 40ca3571b8a..71bddff0cd0 100644 --- a/opal/threads/condition.h +++ b/opal/threads/condition.h @@ -1,22 +1,22 @@ -/* +/* * Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. - * Copyright (c) 2007 Los Alamos National Security, LLC. All rights - * reserved. + * Copyright (c) 2007-2015 Los Alamos National Security, LLC. All rights + * reserved. * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ #ifndef OPAL_CONDITION_SPINLOCK_H @@ -26,16 +26,12 @@ #ifdef HAVE_SYS_TIME_H #include #endif -#ifdef HAVE_TIME_H #include -#endif #include #include "opal/threads/mutex.h" #include "opal/runtime/opal_progress.h" -#include "opal/runtime/opal_cr.h" - BEGIN_C_DECLS /* @@ -63,20 +59,17 @@ static inline int opal_condition_wait(opal_condition_t *c, opal_mutex_t *m) c->c_waiting--; opal_mutex_unlock(m); opal_progress(); - OPAL_CR_TEST_CHECKPOINT_READY_STALL(); opal_mutex_lock(m); return 0; } while (c->c_signaled == 0) { opal_mutex_unlock(m); opal_progress(); - OPAL_CR_TEST_CHECKPOINT_READY_STALL(); opal_mutex_lock(m); } } else { while (c->c_signaled == 0) { opal_progress(); - OPAL_CR_TEST_CHECKPOINT_READY_STALL(); } } @@ -96,7 +89,7 @@ static inline int opal_condition_timedwait(opal_condition_t *c, c->c_waiting++; if (opal_using_threads()) { absolute.tv_sec = abstime->tv_sec; - absolute.tv_usec = abstime->tv_nsec * 1000; + absolute.tv_usec = abstime->tv_nsec / 1000; gettimeofday(&tv,NULL); if (c->c_signaled == 0) { do { @@ -104,19 +97,19 @@ static inline int opal_condition_timedwait(opal_condition_t *c, opal_progress(); gettimeofday(&tv,NULL); opal_mutex_lock(m); - } while (c->c_signaled == 0 && + } while (c->c_signaled == 0 && (tv.tv_sec <= absolute.tv_sec || (tv.tv_sec == absolute.tv_sec && tv.tv_usec < absolute.tv_usec))); } } else { absolute.tv_sec = abstime->tv_sec; - absolute.tv_usec = abstime->tv_nsec * 1000; + absolute.tv_usec = abstime->tv_nsec / 1000; gettimeofday(&tv,NULL); if (c->c_signaled == 0) { do { opal_progress(); gettimeofday(&tv,NULL); - } while (c->c_signaled == 0 && + } while (c->c_signaled == 0 && (tv.tv_sec <= absolute.tv_sec || (tv.tv_sec == absolute.tv_sec && tv.tv_usec < absolute.tv_usec))); } diff --git a/opal/threads/mutex.c b/opal/threads/mutex.c index 70b384ffcd6..23153ea843e 100644 --- a/opal/threads/mutex.c +++ b/opal/threads/mutex.c @@ -5,18 +5,18 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2007-2013 Los Alamos National Security, LLC. All rights - * reserved. + * reserved. * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -72,3 +72,29 @@ OBJ_CLASS_INSTANCE(opal_mutex_t, opal_object_t, opal_mutex_construct, opal_mutex_destruct); + +static void opal_recursive_mutex_construct(opal_recursive_mutex_t *m) +{ + pthread_mutexattr_t attr; + pthread_mutexattr_init(&attr); + +#if OPAL_ENABLE_DEBUG + m->m_lock_debug = 0; + m->m_lock_file = NULL; + m->m_lock_line = 0; +#endif + + pthread_mutexattr_settype(&attr, PTHREAD_MUTEX_RECURSIVE); + + pthread_mutex_init(&m->m_lock_pthread, &attr); + pthread_mutexattr_destroy(&attr); + +#if OPAL_HAVE_ATOMIC_SPINLOCKS + opal_atomic_init( &m->m_lock_atomic, OPAL_ATOMIC_UNLOCKED ); +#endif +} + +OBJ_CLASS_INSTANCE(opal_recursive_mutex_t, + opal_object_t, + opal_recursive_mutex_construct, + opal_mutex_destruct); diff --git a/opal/threads/mutex.h b/opal/threads/mutex.h index 231b74236b0..6015ab6775c 100644 --- a/opal/threads/mutex.h +++ b/opal/threads/mutex.h @@ -1,24 +1,25 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ /* * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. - * Copyright (c) 2004-2006 The University of Tennessee and The University + * Copyright (c) 2004-2016 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2007 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2007-2013 Los Alamos National Security, LLC. All rights - * reserved. + * Copyright (c) 2007-2015 Los Alamos National Security, LLC. All rights + * reserved. * Copyright (c) 2007 Voltaire. All rights reserved. * Copyright (c) 2010 Oracle and/or its affiliates. All rights reserved. * * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -27,7 +28,7 @@ #include "opal_config.h" -#include "opal/sys/atomic.h" +#include "opal/threads/thread_usage.h" BEGIN_C_DECLS @@ -39,18 +40,11 @@ BEGIN_C_DECLS * Functions for locking of critical sections. */ -#if OMPI_ENABLE_THREAD_MULTIPLE -/* - * declaring this here so that CL does not complain - */ -OPAL_DECLSPEC extern bool opal_uses_threads; -#endif - /** * Opaque mutex object */ typedef struct opal_mutex_t opal_mutex_t; - +typedef struct opal_mutex_t opal_recursive_mutex_t; /** * Try to acquire a mutex. @@ -107,73 +101,6 @@ END_C_DECLS BEGIN_C_DECLS -/** - * Check and see if the process is using multiple threads. - * - * @retval true If the process may have more than one thread. - * @retval false If the process only has a single thread. - * - * The value that this function returns is influenced by: - * - * - how MPI_INIT or MPI_INIT_THREAD was invoked, - * - what the final MPI thread level was determined to be, - * - whether the OMPI or MPI libraries are multi-threaded (Jan 2003: - * they're not), - * - whether configure determined if we have thread support or not - * - * MPI_INIT and MPI_INIT_THREAD (specifically, back-end OMPI startup - * functions) invoke opal_set_using_threads() to influence the value of - * this function, depending on their situation. Some examples: - * - * - if configure determined that we do not have threads, then this - * value will always be false. - * - * - if MPI_INIT is invoked, and the ompi libraries are [still] - * single-threaded, this value will be false. - * - * - if MPI_INIT_THREAD is invoked with MPI_THREAD_MULTIPLE, we have - * thread support, and the final thread level is determined to be - * MPI_THREAD_MULTIPLE, this value will be true. - * - * - if the process is a single-threaded OMPI executable (e.g., mpicc), - * this value will be false. - * - * Hence, this function will return false if there is guaranteed to - * only be one thread in the process. If there is even the - * possibility that we may have multiple threads, true will be - * returned. - */ -#if OMPI_ENABLE_THREAD_MULTIPLE -#define opal_using_threads() opal_uses_threads -#else -#define opal_using_threads() 0 -#endif - -/** - * Set whether the process is using multiple threads or not. - * - * @param have Boolean indicating whether the process is using - * multiple threads or not. - * - * @retval opal_using_threads The new return value from - * opal_using_threads(). - * - * This function is used to influence the return value of - * opal_using_threads(). If configure detected that we have thread - * support, the return value of future invocations of - * opal_using_threads() will be the parameter's value. If configure - * detected that we have no thread support, then the retuen from - * opal_using_threads() will always be false. - */ -static inline bool opal_set_using_threads(bool have) -{ -#if OMPI_ENABLE_THREAD_MULTIPLE - opal_uses_threads = have; -#endif - return opal_using_threads(); -} - - /** * Lock a mutex if opal_using_threads() says that multiple threads may * be active in the process. @@ -253,78 +180,13 @@ static inline bool opal_set_using_threads(bool have) do { \ if(opal_using_threads()) { \ opal_mutex_lock(mutex); \ - (action); \ + action; \ opal_mutex_unlock(mutex); \ } else { \ - (action); \ + action; \ } \ } while (0) -/** - * Use an atomic operation for increment/decrement if opal_using_threads() - * indicates that threads are in use by the application or library. - */ - -static inline int32_t -OPAL_THREAD_ADD32(volatile int32_t *addr, int delta) -{ - int32_t ret; - - if (opal_using_threads()) { - ret = opal_atomic_add_32(addr, delta); - } else { - ret = (*addr += delta); - } - - return ret; -} - -#if OPAL_HAVE_ATOMIC_MATH_64 -static inline int64_t -OPAL_THREAD_ADD64(volatile int64_t *addr, int delta) -{ - int64_t ret; - - if (opal_using_threads()) { - ret = opal_atomic_add_64(addr, delta); - } else { - ret = (*addr += delta); - } - - return ret; -} -#endif - -static inline size_t -OPAL_THREAD_ADD_SIZE_T(volatile size_t *addr, int delta) -{ - size_t ret; - - if (opal_using_threads()) { - ret = opal_atomic_add_size_t(addr, delta); - } else { - ret = (*addr += delta); - } - - return ret; -} - -/* BWB: FIX ME: remove if possible */ -#define OPAL_CMPSET(x, y, z) ((*(x) == (y)) ? ((*(x) = (z)), 1) : 0) - -#if OPAL_HAVE_ATOMIC_CMPSET_32 -#define OPAL_ATOMIC_CMPSET_32(x, y, z) \ - (opal_using_threads() ? opal_atomic_cmpset_32(x, y, z) : OPAL_CMPSET(x, y, z)) -#endif -#if OPAL_HAVE_ATOMIC_CMPSET_64 -#define OPAL_ATOMIC_CMPSET_64(x, y, z) \ - (opal_using_threads() ? opal_atomic_cmpset_64(x, y, z) : OPAL_CMPSET(x, y, z)) -#endif -#if OPAL_HAVE_ATOMIC_CMPSET_32 || OPAL_HAVE_ATOMIC_CMPSET_64 -#define OPAL_ATOMIC_CMPSET(x, y, z) \ - (opal_using_threads() ? opal_atomic_cmpset(x, y, z) : OPAL_CMPSET(x, y, z)) -#endif - END_C_DECLS #endif /* OPAL_MUTEX_H */ diff --git a/opal/threads/mutex_unix.h b/opal/threads/mutex_unix.h index 80f6793dc24..a2de8259282 100644 --- a/opal/threads/mutex_unix.h +++ b/opal/threads/mutex_unix.h @@ -1,3 +1,4 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ /* * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana * University Research and Technology @@ -5,18 +6,18 @@ * Copyright (c) 2004-2006 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. - * Copyright (c) 2007 Los Alamos National Security, LLC. All rights - * reserved. + * Copyright (c) 2007-2015 Los Alamos National Security, LLC. All rights + * reserved. * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -61,6 +62,55 @@ struct opal_mutex_t { opal_atomic_lock_t m_lock_atomic; }; OPAL_DECLSPEC OBJ_CLASS_DECLARATION(opal_mutex_t); +OPAL_DECLSPEC OBJ_CLASS_DECLARATION(opal_recursive_mutex_t); + +#if defined(PTHREAD_RECURSIVE_MUTEX_INITIALIZER_NP) +#define OPAL_PTHREAD_RECURSIVE_MUTEX_INITIALIZER PTHREAD_RECURSIVE_MUTEX_INITIALIZER_NP +#elif defined(PTHREAD_RECURSIVE_MUTEX_INITIALIZER) +#define OPAL_PTHREAD_RECURSIVE_MUTEX_INITIALIZER PTHREAD_RECURSIVE_MUTEX_INITIALIZER +#endif + +#if OPAL_ENABLE_DEBUG +#define OPAL_MUTEX_STATIC_INIT \ + { \ + .super = OPAL_OBJ_STATIC_INIT(opal_mutex_t), \ + .m_lock_pthread = PTHREAD_MUTEX_INITIALIZER, \ + .m_lock_debug = 0, \ + .m_lock_file = NULL, \ + .m_lock_line = 0, \ + .m_lock_atomic = { .u = { .lock = OPAL_ATOMIC_UNLOCKED } }, \ + } +#else +#define OPAL_MUTEX_STATIC_INIT \ + { \ + .super = OPAL_OBJ_STATIC_INIT(opal_mutex_t), \ + .m_lock_pthread = PTHREAD_MUTEX_INITIALIZER, \ + .m_lock_atomic = { .u = { .lock = OPAL_ATOMIC_UNLOCKED } }, \ + } +#endif + +#if defined(OPAL_PTHREAD_RECURSIVE_MUTEX_INITIALIZER) + +#if OPAL_ENABLE_DEBUG +#define OPAL_RECURSIVE_MUTEX_STATIC_INIT \ + { \ + .super = OPAL_OBJ_STATIC_INIT(opal_mutex_t), \ + .m_lock_pthread = OPAL_PTHREAD_RECURSIVE_MUTEX_INITIALIZER, \ + .m_lock_debug = 0, \ + .m_lock_file = NULL, \ + .m_lock_line = 0, \ + .m_lock_atomic = { .u = { .lock = OPAL_ATOMIC_UNLOCKED } }, \ + } +#else +#define OPAL_RECURSIVE_MUTEX_STATIC_INIT \ + { \ + .super = OPAL_OBJ_STATIC_INIT(opal_mutex_t), \ + .m_lock_pthread = OPAL_PTHREAD_RECURSIVE_MUTEX_INITIALIZER, \ + .m_lock_atomic = { .u = { .lock = OPAL_ATOMIC_UNLOCKED } }, \ + } +#endif + +#endif /************************************************************************ * diff --git a/opal/threads/thread.c b/opal/threads/thread.c index 4488c55b0ff..f74efaa9757 100644 --- a/opal/threads/thread.c +++ b/opal/threads/thread.c @@ -5,7 +5,7 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. @@ -13,9 +13,9 @@ * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ diff --git a/opal/threads/thread_usage.h b/opal/threads/thread_usage.h new file mode 100644 index 00000000000..e904c51d2d6 --- /dev/null +++ b/opal/threads/thread_usage.h @@ -0,0 +1,186 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ +/* + * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2007 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2006 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2007-2014 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2014 Research Organization for Information Science + * and Technology (RIST). All rights reserved. + * Copyright (c) 2015-2016 Los Alamos National Security, LLC. All rights + * reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#if !defined(OPAL_THREAD_USAGE_H) +#define OPAL_THREAD_USAGE_H + +#include "opal/sys/atomic.h" +#include "opal/prefetch.h" +#include "opal_config.h" + +/** + * Check and see if the process is using multiple threads. + * + * @retval true If the process may have more than one thread. + * @retval false If the process only has a single thread. + * + * The value that this function returns is influenced by: + * + * - how MPI_INIT or MPI_INIT_THREAD was invoked, + * - what the final MPI thread level was determined to be, + * - whether the OMPI or MPI libraries are multi-threaded + * + * MPI_INIT and MPI_INIT_THREAD (specifically, back-end OMPI startup + * functions) invoke opal_set_using_threads() to influence the value of + * this function, depending on their situation. Some examples: + * + * - if MPI_INIT is invoked, and the ompi components in use are + * single-threaded, this value will be false. + * + * - if MPI_INIT_THREAD is invoked with MPI_THREAD_MULTIPLE, we have + * thread support, and the final thread level is determined to be + * MPI_THREAD_MULTIPLE, this value will be true. + * + * - if the process is a single-threaded OMPI executable (e.g., mpicc), + * this value will be false. + * + * Hence, this function will return false if there is guaranteed to + * only be one thread in the process. If there is even the + * possibility that we may have multiple threads, true will be + * returned. + */ +#if OMPI_ENABLE_THREAD_MULTIPLE + +OPAL_DECLSPEC extern bool opal_uses_threads; +#define opal_using_threads() opal_uses_threads + +#else + +#define opal_using_threads() false + +#endif /* OMPI_ENABLE_THREAD_MULTIPLE */ + +/** + * Set whether the process is using multiple threads or not. + * + * @param have Boolean indicating whether the process is using + * multiple threads or not. + * + * @retval opal_using_threads The new return value from + * opal_using_threads(). + * + * This function is used to influence the return value of + * opal_using_threads(). If configure detected that we have thread + * support, the return value of future invocations of + * opal_using_threads() will be the parameter's value. If configure + * detected that we have no thread support, then the retuen from + * opal_using_threads() will always be false. + */ +static inline bool opal_set_using_threads(bool have) +{ +#if OMPI_ENABLE_THREAD_MULTIPLE + opal_uses_threads = have; +#else + have = true; /* just shut up the compiler */ +#endif + return opal_using_threads(); +} + + +/** + * Use an atomic operation for increment/decrement if opal_using_threads() + * indicates that threads are in use by the application or library. + */ +#define OPAL_THREAD_DEFINE_ATOMIC_ADD(type, suffix) \ +static inline type opal_thread_add_ ## suffix (volatile type *addr, type delta) \ +{ \ + if (OPAL_UNLIKELY(opal_using_threads())) { \ + return opal_atomic_add_ ## suffix (addr, delta); \ + } \ + \ + return (*addr += delta); \ +} + +#define OPAL_THREAD_DEFINE_ATOMIC_CMPSET(type, addr_type, suffix) \ +static inline bool opal_thread_cmpset_bool_ ## suffix (volatile addr_type *addr, type compare, type value) \ +{ \ + if (OPAL_UNLIKELY(opal_using_threads())) { \ + return opal_atomic_cmpset_ ## suffix ((volatile type *) addr, compare, value); \ + } \ + \ + if ((type) *addr == compare) { \ + ((type *) addr)[0] = value; \ + return true; \ + } \ + \ + return false; \ +} + +#define OPAL_THREAD_DEFINE_ATOMIC_SWAP(type, addr_type, suffix) \ +static inline type opal_thread_swap_ ## suffix (volatile addr_type *ptr, type newvalue) \ +{ \ + if (opal_using_threads ()) { \ + return opal_atomic_swap_ ## suffix ((volatile type *) ptr, newvalue); \ + } \ + \ + type old = ((type *) ptr)[0]; \ + ((type *) ptr)[0] = newvalue; \ + \ + return old; \ +} + +OPAL_THREAD_DEFINE_ATOMIC_ADD(int32_t, 32) +OPAL_THREAD_DEFINE_ATOMIC_ADD(size_t, size_t) +OPAL_THREAD_DEFINE_ATOMIC_CMPSET(int32_t, int32_t, 32) +OPAL_THREAD_DEFINE_ATOMIC_CMPSET(void *, intptr_t, ptr) +OPAL_THREAD_DEFINE_ATOMIC_SWAP(int32_t, int32_t, 32) +OPAL_THREAD_DEFINE_ATOMIC_SWAP(void *, intptr_t, ptr) + +#define OPAL_THREAD_ADD32 opal_thread_add_32 +#define OPAL_ATOMIC_ADD32 opal_thread_add_32 + +#define OPAL_THREAD_ADD_SIZE_T opal_thread_add_size_t +#define OPAL_ATOMIC_ADD_SIZE_T opal_thread_add_size_t + +#define OPAL_THREAD_CMPSET_32 opal_thread_cmpset_bool_32 +#define OPAL_ATOMIC_CMPSET_32 opal_thread_cmpset_bool_32 + +#define OPAL_THREAD_CMPSET_PTR(x, y, z) opal_thread_cmpset_bool_ptr ((volatile intptr_t *) x, (void *) y, (void *) z) +#define OPAL_ATOMIC_CMPSET_PTR OPAL_THREAD_CMPSET_PTR + +#define OPAL_THREAD_SWAP_32 opal_thread_swap_32 +#define OPAL_ATOMIC_SWAP_32 opal_thread_swap_32 + +#define OPAL_THREAD_SWAP_PTR(x, y) opal_thread_swap_ptr ((volatile intptr_t *) x, (void *) y) +#define OPAL_ATOMIC_SWAP_PTR OPAL_THREAD_SWAP_PTR + +/* define 64-bit macros is 64-bit atomic math is available */ +#if OPAL_HAVE_ATOMIC_MATH_64 + +OPAL_THREAD_DEFINE_ATOMIC_ADD(int64_t, 64) +OPAL_THREAD_DEFINE_ATOMIC_CMPSET(int64_t, int64_t, 64) +OPAL_THREAD_DEFINE_ATOMIC_SWAP(int64_t, int64_t, 64) + +#define OPAL_THREAD_ADD64 opal_thread_add_64 +#define OPAL_ATOMIC_ADD64 opal_thread_add_64 + +#define OPAL_THREAD_CMPSET_64 opal_thread_cmpset_bool_64 +#define OPAL_ATOMIC_CMPSET_64 opal_thread_cmpset_bool_64 + +#define OPAL_THREAD_SWAP_64 opal_thread_swap_64 +#define OPAL_ATOMIC_SWAP_64 opal_thread_swap_64 + +#endif + +#endif /* !defined(OPAL_THREAD_USAGE_H) */ diff --git a/opal/threads/threads.h b/opal/threads/threads.h index c73bef7f942..367315cb50f 100644 --- a/opal/threads/threads.h +++ b/opal/threads/threads.h @@ -5,7 +5,7 @@ * Copyright (c) 2004-2006 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. @@ -14,9 +14,9 @@ * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -26,9 +26,7 @@ #include "opal_config.h" #include -#ifdef HAVE_SIGNAL_H #include -#endif #include "opal/class/opal_object.h" #if OPAL_ENABLE_DEBUG diff --git a/opal/threads/tsd.h b/opal/threads/tsd.h index 97b026017db..e2e8eb1bcc4 100644 --- a/opal/threads/tsd.h +++ b/opal/threads/tsd.h @@ -1,13 +1,13 @@ -/* +/* * Copyright (c) 2007-2013 Los Alamos National Security, LLC. All rights - * reserved. + * reserved. * Copyright (c) 2008 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -67,11 +67,11 @@ typedef void* opal_tsd_key_t; * @param destructor[in] Cleanup function to call when a thread exits * * @retval OPAL_SUCCESS Success - * @retval EAGAIN The system lacked the necessary resource to + * @retval EAGAIN The system lacked the necessary resource to * create another thread specific data key * @retval ENOMEM Insufficient memory exists to create the key */ -OPAL_DECLSPEC int opal_tsd_key_create(opal_tsd_key_t *key, +OPAL_DECLSPEC int opal_tsd_key_create(opal_tsd_key_t *key, opal_tsd_destructor_t destructor); diff --git a/opal/threads/wait_sync.c b/opal/threads/wait_sync.c new file mode 100644 index 00000000000..31361c6964c --- /dev/null +++ b/opal/threads/wait_sync.c @@ -0,0 +1,102 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ +/* + * Copyright (c) 2014-2016 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2016 Los Alamos National Security, LLC. All rights + * reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ +#include "wait_sync.h" + +static opal_mutex_t wait_sync_lock = OPAL_MUTEX_STATIC_INIT; +static ompi_wait_sync_t* wait_sync_list = NULL; + +#define WAIT_SYNC_PASS_OWNERSHIP(who) \ + do { \ + pthread_mutex_lock( &(who)->lock); \ + pthread_cond_signal( &(who)->condition ); \ + pthread_mutex_unlock( &(who)->lock); \ + } while(0) + +int sync_wait_mt(ompi_wait_sync_t *sync) +{ + /* Don't stop if the waiting synchronization is completed. We avoid the + * race condition around the release of the synchronization using the + * signaling field. + */ + if(sync->count <= 0) + return (0 == sync->status) ? OPAL_SUCCESS : OPAL_ERROR; + + /* lock so nobody can signal us during the list updating */ + pthread_mutex_lock(&sync->lock); + + /* Now that we hold the lock make sure another thread has not already + * call cond_signal. + */ + if(sync->count <= 0) { + pthread_mutex_unlock(&sync->lock); + return (0 == sync->status) ? OPAL_SUCCESS : OPAL_ERROR; + } + + /* Insert sync on the list of pending synchronization constructs */ + OPAL_THREAD_LOCK(&wait_sync_lock); + if( NULL == wait_sync_list ) { + sync->next = sync->prev = sync; + wait_sync_list = sync; + } else { + sync->prev = wait_sync_list->prev; + sync->prev->next = sync; + sync->next = wait_sync_list; + wait_sync_list->prev = sync; + } + OPAL_THREAD_UNLOCK(&wait_sync_lock); + + /** + * If we are not responsible for progresing, go silent until something worth noticing happen: + * - this thread has been promoted to take care of the progress + * - our sync has been triggered. + */ + check_status: + if( sync != wait_sync_list ) { + pthread_cond_wait(&sync->condition, &sync->lock); + + /** + * At this point either the sync was completed in which case + * we should remove it from the wait list, or/and I was + * promoted as the progress manager. + */ + + if( sync->count <= 0 ) { /* Completed? */ + pthread_mutex_unlock(&sync->lock); + goto i_am_done; + } + /* either promoted, or spurious wakeup ! */ + goto check_status; + } + + pthread_mutex_unlock(&sync->lock); + while(sync->count > 0) { /* progress till completion */ + opal_progress(); /* don't progress with the sync lock locked or you'll deadlock */ + } + assert(sync == wait_sync_list); + + i_am_done: + /* My sync is now complete. Trim the list: remove self, wake next */ + OPAL_THREAD_LOCK(&wait_sync_lock); + sync->prev->next = sync->next; + sync->next->prev = sync->prev; + /* In case I am the progress manager, pass the duties on */ + if( sync == wait_sync_list ) { + wait_sync_list = (sync == sync->next) ? NULL : sync->next; + if( NULL != wait_sync_list ) + WAIT_SYNC_PASS_OWNERSHIP(wait_sync_list); + } + OPAL_THREAD_UNLOCK(&wait_sync_lock); + + return (0 == sync->status) ? OPAL_SUCCESS : OPAL_ERROR; +} diff --git a/opal/threads/wait_sync.h b/opal/threads/wait_sync.h new file mode 100644 index 00000000000..1fcbb7cd9e6 --- /dev/null +++ b/opal/threads/wait_sync.h @@ -0,0 +1,121 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ +/* + * Copyright (c) 2014-2016 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2016 Los Alamos National Security, LLC. All rights + * reserved. + * Copyright (c) 2016 Mellanox Technologies. All rights reserved. + * Copyright (c) 2016 Research Organization for Information Science + * and Technology (RIST). All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ +#include "opal/sys/atomic.h" +#include "opal/threads/condition.h" +#include "opal/constants.h" +#include "opal/prefetch.h" +#include + +BEGIN_C_DECLS + +typedef struct ompi_wait_sync_t { + int32_t count; + int32_t status; + pthread_cond_t condition; + pthread_mutex_t lock; + struct ompi_wait_sync_t *next; + struct ompi_wait_sync_t *prev; + volatile bool signaling; +} ompi_wait_sync_t; + +#define REQUEST_PENDING (void*)0L +#define REQUEST_COMPLETED (void*)1L + +#define SYNC_WAIT(sync) (opal_using_threads() ? sync_wait_mt (sync) : sync_wait_st (sync)) + +/* The loop in release handles a race condition between the signaling + * thread and the destruction of the condition variable. The signaling + * member will be set to false after the final signaling thread has + * finished opertating on the sync object. This is done to avoid + * extra atomics in the singalling function and keep it as fast + * as possible. Note that the race window is small so spinning here + * is more optimal than sleeping since this macro is called in + * the critical path. */ +#define WAIT_SYNC_RELEASE(sync) \ + if (opal_using_threads()) { \ + while ((sync)->signaling) { \ + continue; \ + } \ + pthread_cond_destroy(&(sync)->condition); \ + pthread_mutex_destroy(&(sync)->lock); \ + } + +#define WAIT_SYNC_RELEASE_NOWAIT(sync) \ + if (opal_using_threads()) { \ + pthread_cond_destroy(&(sync)->condition); \ + pthread_mutex_destroy(&(sync)->lock); \ + } + + +#define WAIT_SYNC_SIGNAL(sync) \ + if (opal_using_threads()) { \ + pthread_mutex_lock(&(sync->lock)); \ + pthread_cond_signal(&sync->condition); \ + pthread_mutex_unlock(&(sync->lock)); \ + sync->signaling = false; \ + } + +#define WAIT_SYNC_SIGNALLED(sync){ \ + (sync)->signaling = false; \ +} + +OPAL_DECLSPEC int sync_wait_mt(ompi_wait_sync_t *sync); +static inline int sync_wait_st (ompi_wait_sync_t *sync) +{ + while (sync->count > 0) { + opal_progress(); + } + + return sync->status; +} + + +#define WAIT_SYNC_INIT(sync,c) \ + do { \ + (sync)->count = (c); \ + (sync)->next = NULL; \ + (sync)->prev = NULL; \ + (sync)->status = 0; \ + (sync)->signaling = (0 != (c)); \ + if (opal_using_threads()) { \ + pthread_cond_init (&(sync)->condition, NULL); \ + pthread_mutex_init (&(sync)->lock, NULL); \ + } \ + } while(0) + +/** + * Update the status of the synchronization primitive. If an error is + * reported the synchronization is completed and the signal + * triggered. The status of the synchronization will be reported to + * the waiting threads. + */ +static inline void wait_sync_update(ompi_wait_sync_t *sync, int updates, int status) +{ + if( OPAL_LIKELY(OPAL_SUCCESS == status) ) { + if( 0 != (OPAL_THREAD_ADD32(&sync->count, -updates)) ) { + return; + } + } else { + /* this is an error path so just use the atomic */ + sync->status = OPAL_ERROR; + opal_atomic_wmb (); + opal_atomic_swap_32 (&sync->count, 0); + } + WAIT_SYNC_SIGNAL(sync); +} + +END_C_DECLS diff --git a/opal/tools/Makefile.am b/opal/tools/Makefile.am index d965b4244f9..9519157e34a 100644 --- a/opal/tools/Makefile.am +++ b/opal/tools/Makefile.am @@ -6,14 +6,16 @@ # Copyright (c) 2004-2005 The University of Tennessee and The University # of Tennessee Research Foundation. All rights # reserved. -# Copyright (c) 2004-2009 High Performance Computing Center Stuttgart, +# Copyright (c) 2004-2009 High Performance Computing Center Stuttgart, # University of Stuttgart. All rights reserved. # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. +# Copyright (c) 2015 Los Alamos National Security, LLC. +# All rights reserved. # $COPYRIGHT$ -# +# # Additional copyrights may follow -# +# # $HEADER$ # @@ -24,12 +26,5 @@ SUBDIRS += \ tools/wrappers DIST_SUBDIRS += \ - tools/wrappers \ - tools/opal-checkpoint \ - tools/opal-restart + tools/wrappers -if WANT_FT -SUBDIRS += \ - tools/opal-checkpoint \ - tools/opal-restart -endif diff --git a/opal/tools/opal-checkpoint/Makefile.am b/opal/tools/opal-checkpoint/Makefile.am deleted file mode 100644 index 38c8522d67f..00000000000 --- a/opal/tools/opal-checkpoint/Makefile.am +++ /dev/null @@ -1,49 +0,0 @@ -# -# Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana -# University Research and Technology -# Corporation. All rights reserved. -# Copyright (c) 2004-2005 The University of Tennessee and The University -# of Tennessee Research Foundation. All rights -# reserved. -# Copyright (c) 2004-2009 High Performance Computing Center Stuttgart, -# University of Stuttgart. All rights reserved. -# Copyright (c) 2004-2005 The Regents of the University of California. -# All rights reserved. -# Copyright (c) 2008-2014 Cisco Systems, Inc. All rights reserved. -# Copyright (c) 2008 Sun Microsystems, Inc. All rights reserved. -# Copyright (c) 2010-2011 Oak Ridge National Labs. All rights reserved. -# $COPYRIGHT$ -# -# Additional copyrights may follow -# -# $HEADER$ -# - -include $(top_srcdir)/Makefile.ompi-rules - -man_pages = opal-checkpoint.1 -EXTRA_DIST = $(man_pages:.1=.1in) - -if WANT_FT_CR -if OPAL_INSTALL_BINARIES - -bin_PROGRAMS = opal-checkpoint - -nodist_man_MANS = $(man_pages) - -# Ensure that the man pages are rebuilt if the opal_config.h file -# changes; a "good enough" way to know if configure was run again (and -# therefore the release date or version may have changed) -$(nodist_man_MANS): $(top_builddir)/opal/include/opal_config.h - -dist_opaldata_DATA = help-opal-checkpoint.txt - -endif # OPAL_INSTALL_BINARIES - -opal_checkpoint_SOURCES = opal-checkpoint.c -opal_checkpoint_LDADD = $(top_builddir)/opal/lib@OPAL_LIB_PREFIX@open-pal.la - -endif # WANT_FT_CR - -distclean-local: - rm -f $(man_pages) diff --git a/opal/tools/opal-checkpoint/help-opal-checkpoint.txt b/opal/tools/opal-checkpoint/help-opal-checkpoint.txt deleted file mode 100644 index e8b54779011..00000000000 --- a/opal/tools/opal-checkpoint/help-opal-checkpoint.txt +++ /dev/null @@ -1,63 +0,0 @@ -# -*- text -*- -# -# Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana -# University Research and Technology -# Corporation. All rights reserved. -# Copyright (c) 2004-2005 The University of Tennessee and The University -# of Tennessee Research Foundation. All rights -# reserved. -# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, -# University of Stuttgart. All rights reserved. -# Copyright (c) 2004-2005 The Regents of the University of California. -# All rights reserved. -# Copyright (c) 2012 Cisco Systems, Inc. All rights reserved. -# $COPYRIGHT$ -# -# Additional copyrights may follow -# -# $HEADER$ -# -# This is the US/English help file for Open MPI checkpoint tool -# -[usage] -opal-checkpoint PID - Open PAL Single Process Checkpoint Tool - -%s -# -[invalid_pid] -Error: The PID (%d) is invalid because either you have not provided a PID - or provided an invalid PID. - Please see --help for usage. -# -[ckpt_failure] -Error: The application (PID = %d) failed to checkpoint properly. - Returned %d, state %d. -# -[pid_does_not_exist] -Error: The process with PID %d is not checkpointable. - This could be due to one of the following: - - An application with this PID doesn't currently exist - - The application with this PID isn't checkpointable - - The application with this PID isn't an OPAL application. - We were looking for the named files: - %s - %s -# -[ckpt:in_progress] -The process with PID %d is currently not checkpointable. -This is because it is already checkpointing itself. -Wait until the checkpoint completes then try again. -# -[ckpt:req_error] -The process with PID %d is currently not checkpointable. -This is due to an error during the checkpointing process. -# -[ckpt:req_null] -The process with PID %d is not checkpointable. -This can be due to one of the following reasons: - - The process was compiled without checkpoint support - - The process has elected not to be checkpointable by - using unsupported functions. - - The process has elected not to be checkpointable due - to the lack of a checkpoint/restart system. diff --git a/opal/tools/opal-checkpoint/opal-checkpoint.1in b/opal/tools/opal-checkpoint/opal-checkpoint.1in deleted file mode 100644 index fb4c84f78d2..00000000000 --- a/opal/tools/opal-checkpoint/opal-checkpoint.1in +++ /dev/null @@ -1,104 +0,0 @@ -.\" -.\" Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana -.\" University Research and Technology -.\" Corporation. All rights reserved. -.\" Copyright (c) 2008 Sun Microsystems, Inc. All rights reserved. -.\" -.\" Man page for OPAL's opal-checkpoint command -.\" -.\" .TH name section center-footer left-footer center-header -.TH OPAL-CHECKPOINT 1 "#OPAL_DATE#" "#PACKAGE_NAME#" "#PACKAGE_VERSION#" -.\" ************************** -.\" Name Section -.\" ************************** -.SH NAME -. -opal-checkpoint \- Checkpoint a running sequential process using the Open PAL -Checkpoint/Restart Service (CRS). -. -.P -\fBNote\fR: This should only be used by the user if the application being -checkpointed is an OPAL-only application. If it is an Open RTE or Open MPI -program their respective tools should be used. -. -.\" ************************** -.\" Synopsis Section -.\" ************************** -.SH SYNOPSIS -. -.B opal-checkpoint -.B [ options ] -.B -. -.\" ************************** -.\" Options Section -.\" ************************** -.SH Options -. -\fIopal-checkpoint\fR will attempt to notify a running process that it has been -requested that the process checkpoint itself. A snapshot handle reference is -presented to the user, which is used in \fIopal_restart\fP to restart the -process. -. -.TP 10 -.B -Process ID of the running target process. -. -. -.TP -.B -h | --help -Display help for this command -. -. -.TP -.B --term -After checkpointing the running process, terminate it. -. -. -.TP -.B -v | --verbose -Enable verbose output for debugging. -. -. -.TP -.B -n | --name -Request a specific name for the local snapshot reference. -. -. -.TP -.B -w | --where -Request that the local snapshot reference be placed in a specific location. -. -. -.TP -.B -gmca | --gmca \fR \fP -Pass global MCA parameters that are applicable to all contexts. \fI\fP is -the parameter name; \fI\fP is the parameter value. -. -. -.TP -.B -mca | --mca -Send arguments to various MCA modules. -. -. -.\" ************************** -.\" Description Section -.\" ************************** -.SH DESCRIPTION -. -.PP -\fIopal-checkpoint\fR can be invoked multiple, non-overlapping times. This -allows the user to take involuntary checkpoints of a running sequential -process. See opal_crs(7) for more information about the CRS framework and -components. It is convenient to note that the user does not need to spectify -the checkpointer to be used here, as that is determined completely by the -running process being checkpointed. -. -. -.\" ************************** -.\" See Also Section -.\" ************************** -. -.SH SEE ALSO - opal-restart(1), opal_crs(7) -.\", orte_crs(7), ompi_crs(7) diff --git a/opal/tools/opal-checkpoint/opal-checkpoint.c b/opal/tools/opal-checkpoint/opal-checkpoint.c deleted file mode 100644 index 7d3c8926dc5..00000000000 --- a/opal/tools/opal-checkpoint/opal-checkpoint.c +++ /dev/null @@ -1,638 +0,0 @@ -/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ -/* - * Copyright (c) 2004-2009 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2007 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * Copyright (c) 2007-2013 Los Alamos National Security, LLC. All rights - * reserved. - * Copyright (c) 2011-2012 Cisco Systems, Inc. All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -/** - * @file - * OPAL Checkpoint command - * - * This command will initiate the checkpoint of a single - * process that has been compiled with OPAL support. - */ -#include "opal_config.h" - -#include -#include -#ifdef HAVE_STDLIB_H -#include -#endif /* HAVE_STDLIB_H */ -#ifdef HAVE_UNISTD_H -#include -#endif /* HAVE_UNISTD_H */ -#ifdef HAVE_FCNTL_H -#include -#endif /* HAVE_FCNTL_H */ -#ifdef HAVE_SYS_TYPES_H -#include -#endif /* HAVE_SYS_TYPES_H */ -#ifdef HAVE_SYS_STAT_H -#include /* for mkfifo */ -#endif /* HAVE_SYS_STAT_H */ -#ifdef HAVE_SYS_WAIT_H -#include -#endif -#ifdef HAVE_STRING_H -#include -#endif /* HAVE_STRING_H */ -#ifdef HAVE_SIGNAL_H -#include -#endif - -#include "opal/constants.h" - -#include "opal/util/cmd_line.h" -#include "opal/util/argv.h" -#include "opal/util/show_help.h" -#include "opal/util/opal_environ.h" -#include "opal/util/error.h" -#include "opal/util/output.h" -#include "opal/mca/base/base.h" - -#include "opal/runtime/opal.h" -#include "opal/runtime/opal_cr.h" - -#include "opal/mca/crs/crs.h" -#include "opal/mca/crs/base/base.h" - -/****************** - * Global Vars - ******************/ - -/****************** - * Local Functions - ******************/ -static int initialize(int argc, char *argv[]); -static int finalize(void); -static int parse_args(int argc, char *argv[]); -static int notify_process_for_checkpoint(pid_t pid, char **fname, int term, - opal_crs_state_type_t *state); - -/***************************************** - * Global Vars for Command line Arguments - *****************************************/ -typedef struct { - bool help; - int pid; - bool term; - bool verbose; - bool quiet; - char *snapshot_name; - char *snapshot_loc; - int output; -} opal_checkpoint_globals_t; - -opal_checkpoint_globals_t opal_checkpoint_globals; - -opal_cmd_line_init_t cmd_line_opts[] = { - { NULL, - 'h', NULL, "help", - 0, - &opal_checkpoint_globals.help, OPAL_CMD_LINE_TYPE_BOOL, - "This help message" }, - - { NULL, - 'v', NULL, "verbose", - 0, - &opal_checkpoint_globals.verbose, OPAL_CMD_LINE_TYPE_BOOL, - "Be Verbose" }, - - { NULL, - 'q', NULL, "quiet", - 0, - &opal_checkpoint_globals.quiet, OPAL_CMD_LINE_TYPE_BOOL, - "Be Super Quiet" }, - - { NULL, - '\0', NULL, "term", - 0, - &opal_checkpoint_globals.term, OPAL_CMD_LINE_TYPE_BOOL, - "Terminate the application after checkpoint" }, - - { NULL, - 'n', NULL, "name", - 1, - &opal_checkpoint_globals.snapshot_name, OPAL_CMD_LINE_TYPE_STRING, - "Request a specific snapshot reference." }, - - { "crs_base_snapshot_dir", - 'w', NULL, "where", - 1, - &opal_checkpoint_globals.snapshot_loc, OPAL_CMD_LINE_TYPE_STRING, - "Where to place the checkpoint files. Note: You must remember this " - "location to pass into opal-restart, as it may not be able to find " - "the desired directory." }, - - /* End of list */ - { NULL, '\0', NULL, NULL, 0, - NULL, OPAL_CMD_LINE_TYPE_NULL, - NULL } -}; - -int -main(int argc, char *argv[]) -{ - int ret, exit_status = OPAL_SUCCESS; - char *fname = NULL; - opal_crs_state_type_t cr_state; - - /*************** - * Initialize - ***************/ - if (OPAL_SUCCESS != (ret = initialize(argc, argv))) { - exit_status = ret; - goto cleanup; - } - - /******************************* - * Checkpoint the requested PID - *******************************/ - opal_output_verbose(10, opal_checkpoint_globals.output, - "opal_checkpoint: Checkpointing PID %d", - opal_checkpoint_globals.pid); - if( opal_checkpoint_globals.term ) { - opal_output_verbose(10, opal_checkpoint_globals.output, - "\tTerminating application after checkpoint"); - } - - ret = notify_process_for_checkpoint(opal_checkpoint_globals.pid, - &fname, - opal_checkpoint_globals.term, - &cr_state); - if (OPAL_SUCCESS != ret || - cr_state == OPAL_CRS_ERROR) { - opal_show_help("help-opal-checkpoint.txt", "ckpt_failure", true, - opal_checkpoint_globals.pid, ret, cr_state); - exit_status = ret; - goto cleanup; - } - - if( !opal_checkpoint_globals.quiet ) { - opal_output(opal_checkpoint_globals.output, - "Local Snapshot Reference = %s\n", - fname); - } - - cleanup: - /*************** - * Cleanup - ***************/ - if (OPAL_SUCCESS != (ret = finalize())) { - return ret; - } - - return exit_status; -} - -static int initialize(int argc, char *argv[]) { - int ret, exit_status = OPAL_SUCCESS; - char * tmp_env_var = NULL; - - /* - * Make sure to init util before parse_args - * to ensure installdirs is setup properly - * before calling mca_base_open(); - */ - if( OPAL_SUCCESS != (ret = opal_init_util(&argc, &argv)) ) { - return ret; - } - - /* - * Parse Command Line Arguments - */ - if (OPAL_SUCCESS != (ret = parse_args(argc, argv))) { - exit_status = ret; - goto cleanup; - } - - /* - * Setup OPAL Output handle from the verbose argument - */ - if( opal_checkpoint_globals.verbose ) { - opal_checkpoint_globals.quiet = false; /* Automaticly turn off quiet if it is set */ - opal_checkpoint_globals.output = opal_output_open(NULL); - opal_output_set_verbosity(opal_checkpoint_globals.output, 10); - } else { - opal_checkpoint_globals.output = 0; /* Default=STDOUT */ - } - - /* - * Disable the checkpoint notification routine for this - * tool. As we will never need to checkpoint this tool. - * Note: This must happen before opal_init(). - */ - opal_cr_set_enabled(false); - - /* - * Select the 'none' CRS component, - * since we don't actually use a checkpointer - */ - (void) mca_base_var_env_name("crs", &tmp_env_var); - opal_setenv(tmp_env_var, - "none", - true, &environ); - free(tmp_env_var); - tmp_env_var = NULL; - - /* - * Initialize OPAL - */ - if (OPAL_SUCCESS != (ret = opal_init(&argc, &argv))) { - exit_status = ret; - goto cleanup; - } - - cleanup: - return exit_status; -} - -static int finalize(void) { - int ret = OPAL_SUCCESS; - - if (OPAL_SUCCESS != (ret = opal_finalize())) { - return ret; - } - - return OPAL_SUCCESS; -} - -static int parse_args(int argc, char *argv[]) { - int i, ret, len; - opal_cmd_line_t cmd_line; - char **app_env = NULL, **global_env = NULL; - char * tmp_env_var = NULL; - char *argv0 = NULL; - - memset(&opal_checkpoint_globals, 0, sizeof(opal_checkpoint_globals_t)); - - opal_checkpoint_globals.snapshot_name = NULL; - opal_checkpoint_globals.snapshot_loc = NULL; - - /* Parse the command line options */ - opal_cmd_line_create(&cmd_line, cmd_line_opts); - mca_base_open(); - mca_base_cmd_line_setup(&cmd_line); - ret = opal_cmd_line_parse(&cmd_line, true, argc, argv); - - if (OPAL_SUCCESS != ret) { - if (OPAL_ERR_SILENT != ret) { - fprintf(stderr, "%s: command line error (%s)\n", argv[0], - opal_strerror(ret)); - } - return 1; - } - if (opal_checkpoint_globals.help) { - char *str, *args = NULL; - args = opal_cmd_line_get_usage_msg(&cmd_line); - str = opal_show_help_string("help-opal-checkpoint.txt", "usage", true, - args); - if (NULL != str) { - printf("%s", str); - free(str); - } - free(args); - /* If we show the help message, that should be all we do */ - exit(0); - } - - /** - * Put all of the MCA arguments in the environment - */ - mca_base_cmd_line_process_args(&cmd_line, &app_env, &global_env); - - len = opal_argv_count(app_env); - for(i = 0; i < len; ++i) { - putenv(app_env[i]); - } - - len = opal_argv_count(global_env); - for(i = 0; i < len; ++i) { - putenv(global_env[i]); - } - - (void) mca_base_var_env_name("opal_cr_is_tool", &tmp_env_var); - opal_setenv(tmp_env_var, - "1", - true, &environ); - free(tmp_env_var); - tmp_env_var = NULL; - - /** - * Now start parsing our specific arguments - */ - - if( NULL == opal_checkpoint_globals.snapshot_name ) - opal_checkpoint_globals.snapshot_name = strdup(""); - if( NULL == opal_checkpoint_globals.snapshot_loc ) { - opal_checkpoint_globals.snapshot_loc = strdup(""); - } - - /* get the remaining bits */ - argv0 = strdup(argv[0]); - opal_cmd_line_get_tail(&cmd_line, &argc, &argv); - - if (0 == argc) { - fprintf(stderr, "%s: Nothing to do\n", argv0); - fprintf(stderr, "Type '%s --help' for usage.\n", argv0); - free(argv0); - return OPAL_ERROR; - } - free(argv0); - - opal_checkpoint_globals.pid = atoi(argv[0]); - if ( 0 >= opal_checkpoint_globals.pid ) { - opal_show_help("help-opal-checkpoint.txt", "invalid_pid", true, - opal_checkpoint_globals.pid); - return OPAL_ERROR; - } - - return OPAL_SUCCESS; -} - -static int -notify_process_for_checkpoint(pid_t pid, char **fname, int term, opal_crs_state_type_t *cr_state) -{ - char *prog_named_pipe_r = NULL, *prog_named_pipe_w = NULL; - int prog_named_read_pipe_fd = -1, prog_named_write_pipe_fd = -1; - char *loc_fname = NULL, *tmp_pid = NULL; - unsigned char cmd; - int len, ret; - int exit_status = OPAL_SUCCESS; - int s, max_wait_time = 20; /* wait time before giving up on the checkpoint */ - ssize_t tmp_size = 0; - int value; - - /* A string copy of the pid */ - asprintf(&tmp_pid, "%d", pid); - - /* Flip the read/write files for bi-directionality */ - asprintf(&prog_named_pipe_w, "%s/%s.%s", opal_cr_pipe_dir, OPAL_CR_NAMED_PROG_R, tmp_pid); - asprintf(&prog_named_pipe_r, "%s/%s.%s", opal_cr_pipe_dir, OPAL_CR_NAMED_PROG_W, tmp_pid); - - /* - * Signal the application telling it that we wish to checkpoint - */ - if( 0 != (ret = kill(pid, opal_cr_entry_point_signal) ) ) { - exit_status = ret; - goto cleanup; - } - - opal_output_verbose(10, opal_checkpoint_globals.output, - "opal_checkpoint: Looking for Named Pipes (%s) (%s)\n", - prog_named_pipe_r, prog_named_pipe_w); - - for( s = 0; s < max_wait_time; ++s) { - /* - * See if the named pipe exists yet for the PID in question - */ - if( 0 > (ret = access(prog_named_pipe_r, F_OK) )) { - /* File doesn't exist yet, keep waiting */ - if( !opal_checkpoint_globals.quiet && - s >= max_wait_time - 5 ) { - opal_output(0, "opal-checkpoint: File does not exist yet: <%s> rtn = %d (waited %d/%d sec)\n", - prog_named_pipe_r, ret, s, max_wait_time); - } - sleep(1); - continue; - } - else if( 0 > (ret = access(prog_named_pipe_w, F_OK) )) { - /* File doesn't exist yet, keep waiting */ - if( !opal_checkpoint_globals.quiet && - s >= max_wait_time - 5 ) { - opal_output(0, "opal-checkpoint: File does not exist yet: <%s> rtn = %d (waited %d/%d sec)\n", - prog_named_pipe_w, ret, s, max_wait_time); - } - sleep(1); - continue; - } - else { - break; - } - } - if( s == max_wait_time ) { - /* The file doesn't exist, - * This means that the process didn't open up a named pipe for us - * to access their checkpoint notification routine. Therefore, - * the application either: - * - Doesn't exist - * - Isn't checkpointable - * In either case there is nothing we can do. - */ - opal_show_help("help-opal-checkpoint.txt", "pid_does_not_exist", true, - opal_checkpoint_globals.pid, prog_named_pipe_r, prog_named_pipe_w); - - *cr_state = OPAL_CRS_ERROR; - - exit_status = OPAL_ERROR; - goto cleanup; - } - - /* The file does exist, so let's use it */ - - /* - * Open - * - prog_named_write_pipe: - * prog makes this file and opens Read Only - * this app. opens it Write Only - * - prog_named_read_pipe: - * prog makes this file and opens Write Only - * this app. opens it Read Only - */ - prog_named_write_pipe_fd = open(prog_named_pipe_w, O_WRONLY); - if(prog_named_write_pipe_fd < 0) { - opal_output(opal_checkpoint_globals.output, - "opal_checkpoint: Error: Unable to open name pipe (%s). %d\n", - prog_named_pipe_w, prog_named_write_pipe_fd); - exit_status = OPAL_ERROR; - goto cleanup; - } - - prog_named_read_pipe_fd = open(prog_named_pipe_r, O_RDWR); - if(prog_named_read_pipe_fd < 0) { - opal_output(opal_checkpoint_globals.output, - "opal_checkpoint: Error: Unable to open name pipe (%s). %d\n", - prog_named_pipe_r, prog_named_read_pipe_fd); - exit_status = OPAL_ERROR; - goto cleanup; - } - - /* - * Start the handshake - */ - len = 0; - if( sizeof(int) != (ret = write(prog_named_write_pipe_fd, &len, sizeof(int))) ) { - opal_output(opal_checkpoint_globals.output, - "opal_checkpoint: Error: Unable to write handshake to named pipe (%s). %d\n", - prog_named_pipe_w, ret); - exit_status = OPAL_ERROR; - goto cleanup; - } - - if( sizeof(int) != (ret = read(prog_named_read_pipe_fd, &value, sizeof(int))) ) { - opal_output(opal_checkpoint_globals.output, - "opal_checkpoint: Error: Unable to read length from named pipe (%s). %d\n", - prog_named_pipe_r, ret); - exit_status = OPAL_ERROR; - goto cleanup; - } - - /* Check the response to make sure we can checkpoint this process */ - if( OPAL_CHECKPOINT_CMD_IN_PROGRESS == value ) { - opal_show_help("help-opal-checkpoint.txt", - "ckpt:in_progress", - true, - opal_checkpoint_globals.pid); - exit_status = OPAL_ERROR; - goto cleanup; - } - else if( OPAL_CHECKPOINT_CMD_NULL == value ) { - opal_show_help("help-opal-checkpoint.txt", - "ckpt:req_null", - true, - opal_checkpoint_globals.pid); - exit_status = OPAL_ERROR; - goto cleanup; - } - else if ( OPAL_CHECKPOINT_CMD_ERROR == value ) { - opal_show_help("help-opal-checkpoint.txt", - "ckpt:req_error", - true, - opal_checkpoint_globals.pid); - exit_status = OPAL_ERROR; - goto cleanup; - } - - /* - * Write the checkpoint request and information to the - * pipe - */ - cmd = OPAL_CR_CHECKPOINT; - /* Send the command */ - if( sizeof(cmd) != (ret = write(prog_named_write_pipe_fd, &cmd, sizeof(cmd))) ) { - opal_output(opal_checkpoint_globals.output, - "opal_checkpoint: Error: Unable to write CHECKPOINT Command to named pipe (%s). %d\n", - prog_named_pipe_w, ret); - exit_status = OPAL_ERROR; - goto cleanup; - } - - /* Send the arguments: {pid, term} */ - if( sizeof(int) != (ret = write(prog_named_write_pipe_fd, &pid, sizeof(int))) ) { - opal_output(opal_checkpoint_globals.output, - "opal_checkpoint: Error: Unable to write pid (%d) to named pipe (%s). %d\n", - pid, prog_named_pipe_w, ret); - exit_status = OPAL_ERROR; - goto cleanup; - } - - if( sizeof(int) != (ret = write(prog_named_write_pipe_fd, &term, sizeof(int))) ) { - opal_output(opal_checkpoint_globals.output, - "opal_checkpoint: Error: Unable to write term (%d) to named pipe (%s), %d\n", - term, prog_named_pipe_w, ret); - exit_status = OPAL_ERROR; - goto cleanup; - } - - /* Send the snapshot_name argument */ - len = strlen(opal_checkpoint_globals.snapshot_name) + 1; - if( sizeof(int) != (ret = write(prog_named_write_pipe_fd, &len, sizeof(int))) ) { - opal_output(opal_checkpoint_globals.output, - "opal_checkpoint: Error: Unable to write snapshot name len (%d) to named pipe (%s). %d\n", - len, prog_named_pipe_w, ret); - exit_status = OPAL_ERROR; - goto cleanup; - } - - tmp_size = sizeof(char) * len; - if( tmp_size != (ret = write(prog_named_write_pipe_fd, (opal_checkpoint_globals.snapshot_name), (sizeof(char) * len))) ) { - opal_output(opal_checkpoint_globals.output, - "opal_checkpoint: Error: Unable to write snapshot name (%s) to named pipe (%s). %d\n", - opal_checkpoint_globals.snapshot_name, prog_named_pipe_w, ret); - exit_status = OPAL_ERROR; - goto cleanup; - } - - /* Send the snashot location argument */ - len = strlen(opal_checkpoint_globals.snapshot_loc) + 1; - if( sizeof(int) != (ret = write(prog_named_write_pipe_fd, &len, sizeof(int))) ) { - opal_output(opal_checkpoint_globals.output, - "opal_checkpoint: Error: Unable to write snapshot location len (%d) to named pipe (%s). %d\n", - len, prog_named_pipe_w, ret); - exit_status = OPAL_ERROR; - goto cleanup; - } - - tmp_size = sizeof(char) * len; - if( tmp_size != (ret = write(prog_named_write_pipe_fd, (opal_checkpoint_globals.snapshot_loc), (sizeof(char) * len))) ) { - opal_output(opal_checkpoint_globals.output, - "opal_checkpoint: Error: Unable to write snapshot location (%s) to named pipe (%s). %d\n", - opal_checkpoint_globals.snapshot_loc, prog_named_pipe_w, ret); - exit_status = OPAL_ERROR; - goto cleanup; - } - - /* - * Get the response from the notification routine on the other - * machine. - */ - if( sizeof(int) != (ret = read(prog_named_read_pipe_fd, &len, sizeof(int))) ) { - opal_output(opal_checkpoint_globals.output, - "opal_checkpoint: Error: Unable to read length from named pipe (%s). %d\n", - prog_named_pipe_r, ret); - exit_status = OPAL_ERROR; - goto cleanup; - } - - if(len > 0) { - loc_fname = (char *) malloc(sizeof(char) * len); - if( (ssize_t)(sizeof(char) * len) != (ret = read(prog_named_read_pipe_fd, loc_fname, (sizeof(char) * len))) ) { - opal_output(opal_checkpoint_globals.output, - "opal_checkpoint: Error: Unable to read filename from named pipe (%s). %d\n", - prog_named_pipe_w, ret); - exit_status = OPAL_ERROR; - goto cleanup; - } - } - - *fname = strdup(loc_fname); - if( sizeof(int) != (ret = read(prog_named_read_pipe_fd, &cr_state, sizeof(int))) ) { - opal_output(opal_checkpoint_globals.output, - "opal_checkpoint: Error: Unable to read state from named pipe (%s). %d\n", - prog_named_pipe_r, ret); - exit_status = OPAL_ERROR; - goto cleanup; - } - - cleanup: - /* - * Close the pipes now that we are done with it - */ - close(prog_named_write_pipe_fd); - close(prog_named_read_pipe_fd); - - if( NULL != tmp_pid) - free(tmp_pid); - if( NULL != prog_named_pipe_r) - free(prog_named_pipe_r); - if( NULL != prog_named_pipe_w) - free(prog_named_pipe_w); - - return exit_status; -} diff --git a/opal/tools/opal-restart/Makefile.am b/opal/tools/opal-restart/Makefile.am deleted file mode 100644 index 38083b01adc..00000000000 --- a/opal/tools/opal-restart/Makefile.am +++ /dev/null @@ -1,49 +0,0 @@ -# -# Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana -# University Research and Technology -# Corporation. All rights reserved. -# Copyright (c) 2004-2005 The University of Tennessee and The University -# of Tennessee Research Foundation. All rights -# reserved. -# Copyright (c) 2004-2009 High Performance Computing Center Stuttgart, -# University of Stuttgart. All rights reserved. -# Copyright (c) 2004-2005 The Regents of the University of California. -# All rights reserved. -# Copyright (c) 2008-2014 Cisco Systems, Inc. All rights reserved. -# Copyright (c) 2008 Sun Microsystems, Inc. All rights reserved. -# Copyright (c) 2010-2011 Oak Ridge National Labs. All rights reserved. -# $COPYRIGHT$ -# -# Additional copyrights may follow -# -# $HEADER$ -# - -include $(top_srcdir)/Makefile.ompi-rules - -man_pages = opal-restart.1 -EXTRA_DIST = $(man_pages:.1=.1in) - -if WANT_FT_CR -if OPAL_INSTALL_BINARIES - -bin_PROGRAMS = opal-restart - -nodist_man_MANS = $(man_pages) - -# Ensure that the man pages are rebuilt if the opal_config.h file -# changes; a "good enough" way to know if configure was run again (and -# therefore the release date or version may have changed) -$(nodist_man_MANS): $(top_builddir)/opal/include/opal_config.h - -dist_opaldata_DATA = help-opal-restart.txt - -endif # OPAL_INSTALL_BINARIES - -opal_restart_SOURCES = opal-restart.c -opal_restart_LDADD = $(top_builddir)/opal/lib@OPAL_LIB_PREFIX@open-pal.la - -endif # WANT_FT_CR - -distclean-local: - rm -f $(man_pages) diff --git a/opal/tools/opal-restart/help-opal-restart.txt b/opal/tools/opal-restart/help-opal-restart.txt deleted file mode 100644 index f9717b594bf..00000000000 --- a/opal/tools/opal-restart/help-opal-restart.txt +++ /dev/null @@ -1,69 +0,0 @@ -# -*- text -*- -# -# Copyright (c) 2004-2010 The Trustees of Indiana University and Indiana -# University Research and Technology -# Corporation. All rights reserved. -# Copyright (c) 2004-2005 The University of Tennessee and The University -# of Tennessee Research Foundation. All rights -# reserved. -# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, -# University of Stuttgart. All rights reserved. -# Copyright (c) 2004-2005 The Regents of the University of California. -# All rights reserved. -# Copyright (c) 2007 Evergrid, Inc. All rights reserved. -# Copyright (c) 2011 Oak Ridge National Labs. All rights reserved. -# -# Copyright (c) 2012 Cisco Systems, Inc. All rights reserved. -# $COPYRIGHT$ -# -# Additional copyrights may follow -# -# $HEADER$ -# -# This is the US/English help file for Open MPI checkpoint tool -# -[usage] -opal-restart -r FILENAME - Open PAL Single Process Restart Tool - -%s -# -[invalid_filename] -Error: The filename is invalid because either you have not provided a filename - or provided an invalid filename. - -Filename: %s - -Please see --help for usage. -# -[invalid_metadata] -Error: The local checkpoint contains invalid or incomplete metadata. - This usually indicates that the original checkpoint was invalid. - Check the metadata file (%s) in the following directory: - %s -# -[restart_cmd_failure] -Error: Unable to obtain the proper restart command to restart from the - checkpoint file (%s). Returned %d. - Check the installation of the %s checkpoint/restart service - on all of the machines in your system. -# -[comp_select_failure] -Error: Unable to select the %s component needed to restart this - application. (Returned %d) - This likely indicates that the checkpointer needed is not - available on this machine. You should move to a machine that - has this checkpointer enabled. -# -[comp_select_mismatch] -Error: For an unknown reason the selected and requested components do - not match. - -Expected Component: %s -Selected Component: %s -# -[cache_not_avail] -Warning: Recommended cache directory could not be accessed. Falling back - to the snapshot location. -Cache Dir : %s -Snapshot Dir: %s diff --git a/opal/tools/opal-restart/opal-restart.1in b/opal/tools/opal-restart/opal-restart.1in deleted file mode 100644 index 764614f63e7..00000000000 --- a/opal/tools/opal-restart/opal-restart.1in +++ /dev/null @@ -1,130 +0,0 @@ -.\" -.\" Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana -.\" University Research and Technology -.\" Corporation. All rights reserved. -.\" Copyright (c) 2008 Sun Microsystems, Inc. All rights reserved. -.\" -.\" Man page for OPAL's opal-restart command -.\" -.\" .TH name section center-footer left-footer center-header -.TH OPAL-RESTART 1 "#OPAL_DATE#" "#PACKAGE_VERSION#" "#PACKAGE_NAME#" -.\" ************************** -.\" Name Section -.\" ************************** -.SH NAME -. -opal-restart \- Restart a previously checkpointed sequential process using the -Open PAL Checkpoint/Restart Service (CRS) -. -.P -\fBNote\fR: This should only be used by the user if the application being -restarted is an OPAL-only application. If it is an Open RTE or Open MPI -program their respective tools should be used. -. -.\" ************************** -.\" Synopsis Section -.\" ************************** -.SH SYNOPSIS -. -.B opal-restart -.B [ options ] -.B -. -.\" ************************** -.\" Options Section -.\" ************************** -.SH Options -. -\fIopal-restart\fR will attempt to restart a previously checkpointed squential -process from the snapshot handle reference returned by \fIopal_checkpoint\fP. -. -.TP 10 -.B -The snapshot handle reference returned by \fIopal_checkpoint\fP, used to -restart the process. This is required to be the last argument to this command. -. -. -.TP -.B -h | --help -Display help for this command -. -. -.TP -.B --fork -Fork off a new process, which is the restarted process. By default, the -restarted process will replace \fIopal-restart\fR process. -. -. -.TP -.B -w | --where -The location of the local snapshot reference. -. -. -.TP -.B -s | --self -Restart this process using the \fIself\fR CRS component. This component is a -special case, all other CRS components are automatically detected. -. -. -.TP -.B -v | --verbose -Enable verbose output for debugging. -. -. -.TP -.B -gmca | --gmca \fR \fP -Pass global MCA parameters that are applicable to all contexts. \fI\fP is -the parameter name; \fI\fP is the parameter value. -. -. -.TP -.B -mca | --mca -Send arguments to various MCA modules. -. -. -.\" ************************** -.\" Description Section -.\" ************************** -.SH DESCRIPTION -. -.PP -\fIopal-restart\fR can be invoked multiple, non-overlapping times. This -allows the user to restart a previously running sequential -process. See opal_crs(7) for more information about the CRS framework and -components. -. -.PP -When using the \fIself\fR CRS component, the \fB\fR argument is -replaced by the name of the program to be restarted followed by any arguments -that need to be passed to the program. For example, if under normal execution -we would start our program "foo" as: - - \fBshell$\fP setenv OPAL_MCA_crs=self - \fBshell$\fP setenv OPAL_MCA_crs_self_prefix=my_callback_prefix - \fBshell$\fP ./foo arg1 arg2 - -To restart this process, we may only need to call: - - \fBshell$\fP opal-restart --self - -mca crs_self_prefix my_callback_prefix \\ - ./foo arg1 arg2 - -This will cause the "my_callback_prefix-restart" function to be called as soon -as the program "foo" calls \fIOPAL_INIT\fP. You do not have to call your -program with the same argument set as before. There for we could have just as -correctly called: - - \fBshell$\fP opal-restart --self \\ - -mca crs_self_prefix my_callback_prefix \\ - ./foo arg3 - -This depends upon the behavior of the program "foo". -. -. -.\" ************************** -.\" See Also Section -.\" ************************** -. -.SH SEE ALSO - opal-checkpoint(1), opal_crs(7) -.\", orte_crs(7), ompi_crs(7) diff --git a/opal/tools/opal-restart/opal-restart.c b/opal/tools/opal-restart/opal-restart.c deleted file mode 100644 index cbbb2bc3bcf..00000000000 --- a/opal/tools/opal-restart/opal-restart.c +++ /dev/null @@ -1,742 +0,0 @@ -/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ -/* - * Copyright (c) 2004-2010 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2007 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * Copyright (c) 2007-2013 Los Alamos National Security, LLC. All rights - * reserved. - * Copyright (c) 2007 Evergrid, Inc. All rights reserved. - * Copyright (c) 2011 Oak Ridge National Labs. All rights reserved. - * Copyright (c) 2011-2012 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2014 Hochschule Esslingen. All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -/** - * @file - * OPAL Restart command - * - * This command will restart a single process from - * the checkpoint generated by the opal-checkpoint - * command. - */ -#include "opal_config.h" - -#include -#include -#ifdef HAVE_UNISTD_H -#include -#endif /* HAVE_UNISTD_H */ -#ifdef HAVE_STDLIB_H -#include -#endif /* HAVE_STDLIB_H */ -#ifdef HAVE_SYS_STAT_H -#include -#endif -#ifdef HAVE_FCNTL_H -#include -#endif /* HAVE_FCNTL_H */ -#ifdef HAVE_SYS_TYPES_H -#include -#endif -#ifdef HAVE_SYS_WAIT_H -#include -#endif -#ifdef HAVE_STRING_H -#include -#endif /* HAVE_STRING_H */ - -#include "opal/constants.h" - -#include "opal/util/cmd_line.h" -#include "opal/util/argv.h" -#include "opal/util/show_help.h" -#include "opal/util/output.h" -#include "opal/util/opal_environ.h" -#include "opal/util/error.h" -#include "opal/util/basename.h" -#include "opal/mca/base/base.h" - -#include "opal/runtime/opal.h" -#include "opal/runtime/opal_cr.h" - -#include "opal/mca/crs/crs.h" -#include "opal/mca/crs/base/base.h" - -#include "opal/mca/compress/compress.h" -#include "opal/mca/compress/base/base.h" - -/****************** - * Local Functions - ******************/ -static int initialize(int argc, char *argv[]); -static int finalize(void); -static int parse_args(int argc, char *argv[]); -static int check_file(void); -static int post_env_vars(int prev_pid, opal_crs_base_snapshot_t *snapshot); - -/***************************************** - * Global Vars for Command line Arguments - *****************************************/ -static char *expected_crs_comp = NULL; - -typedef struct { - bool help; - bool verbose; - char *snapshot_ref; - char *snapshot_loc; - char *snapshot_metadata; - char *snapshot_cache; - char *snapshot_compress; - char *snapshot_compress_postfix; - int output; -} opal_restart_globals_t; - -opal_restart_globals_t opal_restart_globals; - -opal_cmd_line_init_t cmd_line_opts[] = { - { NULL, - 'h', NULL, "help", - 0, - &opal_restart_globals.help, OPAL_CMD_LINE_TYPE_BOOL, - "This help message" }, - - { NULL, - 'v', NULL, "verbose", - 0, - &opal_restart_globals.verbose, OPAL_CMD_LINE_TYPE_BOOL, - "Be Verbose" }, - - { NULL, - 'l', NULL, "location", - 1, - &opal_restart_globals.snapshot_loc, OPAL_CMD_LINE_TYPE_STRING, - "Full path to the location of the local snapshot."}, - - { NULL, - 'm', NULL, "metadata", - 1, - &opal_restart_globals.snapshot_metadata, OPAL_CMD_LINE_TYPE_STRING, - "Relative path (with respect to --location) to the metadata file."}, - - { NULL, - 'r', NULL, "reference", - 1, - &opal_restart_globals.snapshot_ref, OPAL_CMD_LINE_TYPE_STRING, - "Local snapshot reference."}, - - { NULL, - 'c', NULL, "cache", - 1, - &opal_restart_globals.snapshot_cache, OPAL_CMD_LINE_TYPE_STRING, - "Possible local cache of the snapshot reference."}, - - { NULL, - 'd', NULL, "decompress", - 1, - &opal_restart_globals.snapshot_compress, OPAL_CMD_LINE_TYPE_STRING, - "Decompression component to use."}, - - { NULL, - 'p', NULL, "decompress_postfix", - 1, - &opal_restart_globals.snapshot_compress_postfix, OPAL_CMD_LINE_TYPE_STRING, - "Decompression component postfix."}, - - /* End of list */ - { NULL, - '\0', NULL, NULL, - 0, - NULL, OPAL_CMD_LINE_TYPE_NULL, - NULL } -}; - -int -main(int argc, char *argv[]) -{ - int ret, exit_status = OPAL_SUCCESS; - int child_pid; - int prev_pid = 0; - int idx; - opal_crs_base_snapshot_t *snapshot = NULL; - char * tmp_env_var = NULL; - bool select = false; - - /*************** - * Initialize - ***************/ - if (OPAL_SUCCESS != (ret = initialize(argc, argv))) { - exit_status = ret; - goto cleanup; - } - - /* - * Check for existence of the file, or program in the case of self - */ - if( OPAL_SUCCESS != (ret = check_file() )) { - opal_show_help("help-opal-restart.txt", "invalid_filename", true, - opal_restart_globals.snapshot_ref); - exit_status = ret; - goto cleanup; - } - - /* Re-enable the selection of the CRS component, so we can choose the right one */ - idx = mca_base_var_find(NULL, "crs", "base", "do_not_select"); - - if (0 > idx) { - opal_output(opal_restart_globals.output, - "MCA variable opal_crs_base_do_not_select not found\n"); - exit_status = OPAL_ERROR; - goto cleanup; - } - - ret = mca_base_var_set_value(idx, &select, 0, MCA_BASE_VAR_SOURCE_DEFAULT, NULL); - if (OPAL_SUCCESS != ret) { - exit_status = ret; - goto cleanup; - } - - /* - * Make sure we are using the correct checkpointer - */ - if(NULL == expected_crs_comp) { - char * full_metadata_path = NULL; - FILE * metadata = NULL; - - asprintf(&full_metadata_path, "%s/%s/%s", - opal_restart_globals.snapshot_loc, - opal_restart_globals.snapshot_ref, - opal_restart_globals.snapshot_metadata); - if( NULL == (metadata = fopen(full_metadata_path, "r")) ) { - opal_show_help("help-opal-restart.txt", "invalid_metadata", true, - opal_restart_globals.snapshot_metadata, - full_metadata_path); - exit_status = OPAL_ERROR; - goto cleanup; - } - if( OPAL_SUCCESS != (ret = opal_crs_base_extract_expected_component(metadata, - &expected_crs_comp, - &prev_pid)) ) { - opal_show_help("help-opal-restart.txt", "invalid_metadata", true, - opal_restart_globals.snapshot_metadata, - full_metadata_path); - exit_status = ret; - goto cleanup; - } - - free(full_metadata_path); - full_metadata_path = NULL; - - fclose(metadata); - metadata = NULL; - } - - opal_output_verbose(10, opal_restart_globals.output, - "Restart Expects checkpointer: (%s)", - expected_crs_comp); - - (void) mca_base_var_env_name("crs", &tmp_env_var); - opal_setenv(tmp_env_var, - expected_crs_comp, - true, &environ); - free(tmp_env_var); - tmp_env_var = NULL; - - /* Select this component or don't continue. - * If the selection of this component fails, then we can't - * restart on this node because it doesn't have the proper checkpointer - * available. - */ - if( OPAL_SUCCESS != (ret = opal_crs_base_open(MCA_BASE_OPEN_DEFAULT)) ) { - opal_show_help("help-opal-restart.txt", "comp_select_failure", true, - "crs", ret); - exit_status = ret; - goto cleanup; - } - - if( OPAL_SUCCESS != (ret = opal_crs_base_select()) ) { - opal_show_help("help-opal-restart.txt", "comp_select_failure", true, - expected_crs_comp, ret); - exit_status = ret; - goto cleanup; - } - - /* - * Make sure we have selected the proper component - */ - if(NULL == expected_crs_comp || - 0 != strncmp(expected_crs_comp, - opal_crs_base_selected_component.base_version.mca_component_name, - strlen(expected_crs_comp)) ) { - opal_show_help("help-opal-restart.txt", "comp_select_mismatch", - true, - expected_crs_comp, - opal_crs_base_selected_component.base_version.mca_component_name, - ret); - exit_status = ret; - goto cleanup; - } - - /****************************** - * Restart in this process - ******************************/ - opal_output_verbose(10, opal_restart_globals.output, - "Restarting from file (%s)\n", - opal_restart_globals.snapshot_ref); - - snapshot = OBJ_NEW(opal_crs_base_snapshot_t); - snapshot->cold_start = true; - asprintf(&(snapshot->snapshot_directory), "%s/%s", - opal_restart_globals.snapshot_loc, - opal_restart_globals.snapshot_ref); - asprintf(&(snapshot->metadata_filename), "%s/%s", - snapshot->snapshot_directory, - opal_restart_globals.snapshot_metadata); - - /* Since some checkpoint/restart systems don't pass along env vars to the - * restarted app, we need to take care of that. - * - * Included here is the creation of any files or directories that need to be - * created before the process is restarted. - */ - if(OPAL_SUCCESS != (ret = post_env_vars(prev_pid, snapshot) ) ) { - exit_status = ret; - goto cleanup; - } - - /* - * Do the actual restart - */ - ret = opal_crs.crs_restart(snapshot, - false, - &child_pid); - - if (OPAL_SUCCESS != ret) { - opal_show_help("help-opal-restart.txt", "restart_cmd_failure", true, - opal_restart_globals.snapshot_ref, - ret, - opal_crs_base_selected_component.base_version.mca_component_name); - exit_status = ret; - goto cleanup; - } - /* Should never get here, since crs_restart calls exec */ - - /*************** - * Cleanup - ***************/ - cleanup: - if (OPAL_SUCCESS != (ret = finalize())) { - return ret; - } - - if(NULL != snapshot ) - OBJ_DESTRUCT(snapshot); - - return exit_status; -} - -static int initialize(int argc, char *argv[]) -{ - int ret, exit_status = OPAL_SUCCESS; - char * tmp_env_var = NULL; - - /* - * Make sure to init util before parse_args - * to ensure installdirs is setup properly - * before calling mca_base_open(); - */ - if( OPAL_SUCCESS != (ret = opal_init_util(&argc, &argv)) ) { - return ret; - } - - /* - * Parse Command line arguments - */ - if (OPAL_SUCCESS != (ret = parse_args(argc, argv))) { - exit_status = ret; - goto cleanup; - } - - /* - * Setup OPAL Output handle from the verbose argument - */ - if( opal_restart_globals.verbose ) { - opal_restart_globals.output = opal_output_open(NULL); - opal_output_set_verbosity(opal_restart_globals.output, 10); - } else { - opal_restart_globals.output = 0; /* Default=STDOUT */ - } - - /* - * Turn off the selection of the CRS component, - * we need to do that later - */ - (void) mca_base_var_env_name("crs_base_do_not_select", &tmp_env_var); - opal_setenv(tmp_env_var, - "1", /* turn off the selection */ - true, &environ); - free(tmp_env_var); - tmp_env_var = NULL; - - /* - * Make sure we select the proper compress component. - */ - if( NULL != opal_restart_globals.snapshot_compress ) { - (void) mca_base_var_env_name("compress", &tmp_env_var); - opal_setenv(tmp_env_var, - opal_restart_globals.snapshot_compress, - true, &environ); - free(tmp_env_var); - tmp_env_var = NULL; - } - - /* - * Initialize the OPAL layer - */ - if (OPAL_SUCCESS != (ret = opal_init(&argc, &argv))) { - exit_status = ret; - goto cleanup; - } - - /* - * If the checkpoint was compressed, then decompress it before continuing - */ - if( NULL != opal_restart_globals.snapshot_compress ) { - char * zip_dir = NULL; - char * tmp_str = NULL; - - /* Make sure to clear the selection for the restart, - * this way the user can swich compression mechanism - * across restart - */ - (void) mca_base_var_env_name("compress", &tmp_env_var); - opal_unsetenv(tmp_env_var, &environ); - free(tmp_env_var); - tmp_env_var = NULL; - - asprintf(&zip_dir, "%s/%s%s", - opal_restart_globals.snapshot_loc, - opal_restart_globals.snapshot_ref, - opal_restart_globals.snapshot_compress_postfix); - - if (0 > (ret = access(zip_dir, F_OK)) ) { - opal_output(opal_restart_globals.output, - "Error: Unable to access the file [%s]!", - zip_dir); - exit_status = OPAL_ERROR; - goto cleanup; - } - - opal_output_verbose(10, opal_restart_globals.output, - "Decompressing (%s)", - zip_dir); - - opal_compress.decompress(zip_dir, &tmp_str); - - if( NULL != zip_dir ) { - free(zip_dir); - zip_dir = NULL; - } - if( NULL != tmp_str ) { - free(tmp_str); - tmp_str = NULL; - } - } - - /* - * If a cache directory has been suggested, see if it exists - */ - if( NULL != opal_restart_globals.snapshot_cache ) { - if(0 == (ret = access(opal_restart_globals.snapshot_cache, F_OK)) ) { - opal_output_verbose(10, opal_restart_globals.output, - "Using the cached snapshot (%s) instead of (%s)", - opal_restart_globals.snapshot_cache, - opal_restart_globals.snapshot_loc); - if( NULL != opal_restart_globals.snapshot_loc ) { - free(opal_restart_globals.snapshot_loc); - opal_restart_globals.snapshot_loc = NULL; - } - opal_restart_globals.snapshot_loc = opal_dirname(opal_restart_globals.snapshot_cache); - } else { - opal_show_help("help-opal-restart.txt", "cache_not_avail", true, - opal_restart_globals.snapshot_cache, - opal_restart_globals.snapshot_loc); - } - } - - /* - * Mark this process as a tool - */ - opal_cr_is_tool = true; - - cleanup: - return exit_status; -} - -static int finalize(void) -{ -#if 0 - int ret; - - /* - * JJH: Comment this out for now. It should only be called - * when exec fails, and opal-restart is shutting down. - * Currently BLCR is calling opal_even_fini() in the restart - * functionality, so calling it twice is causing a segv. - * Since we do not really need to do this, just comment it out - * for now. - */ - if (OPAL_SUCCESS != (ret = opal_finalize())) { - return ret; - } -#endif - - return OPAL_SUCCESS; -} - -static int parse_args(int argc, char *argv[]) -{ - int i, ret, len; - opal_cmd_line_t cmd_line; - char **app_env = NULL, **global_env = NULL; - - opal_restart_globals.help = false; - opal_restart_globals.verbose = false; - opal_restart_globals.snapshot_ref = NULL; - opal_restart_globals.snapshot_loc = NULL; - opal_restart_globals.snapshot_metadata = NULL; - opal_restart_globals.snapshot_cache = NULL; - opal_restart_globals.snapshot_compress = NULL; - opal_restart_globals.snapshot_compress_postfix = NULL; - opal_restart_globals.output = 0; - - /* Parse the command line options */ - opal_cmd_line_create(&cmd_line, cmd_line_opts); - - mca_base_open(); - mca_base_cmd_line_setup(&cmd_line); - ret = opal_cmd_line_parse(&cmd_line, false, argc, argv); - if (OPAL_SUCCESS != ret) { - if (OPAL_ERR_SILENT != ret) { - fprintf(stderr, "%s: command line error (%s)\n", argv[0], - opal_strerror(ret)); - } - return 1; - } - if (opal_restart_globals.help ) { - char *str, *args = NULL; - args = opal_cmd_line_get_usage_msg(&cmd_line); - str = opal_show_help_string("help-opal-restart.txt", "usage", true, - args); - if (NULL != str) { - printf("%s", str); - free(str); - } - free(args); - /* If we show the help message, that should be all we do */ - exit(0); - } - - /** - * Put all of the MCA arguments in the environment - */ - mca_base_cmd_line_process_args(&cmd_line, &app_env, &global_env); - - len = opal_argv_count(app_env); - for(i = 0; i < len; ++i) { - putenv(app_env[i]); - } - - len = opal_argv_count(global_env); - for(i = 0; i < len; ++i) { - putenv(global_env[i]); - } - - /** - * Now start parsing our specific arguments - */ - /* get the remaining bits */ - opal_cmd_line_get_tail(&cmd_line, &argc, &argv); - - if ( NULL == opal_restart_globals.snapshot_ref || - 0 >= strlen(opal_restart_globals.snapshot_ref) ) { - opal_show_help("help-opal-restart.txt", "invalid_filename", true, - ""); - return OPAL_ERROR; - } - - /* If we have arguments after the command, then assume they - * need to be grouped together. - * Useful in the 'mca crs self' instance. - */ - if(argc > 0) { - opal_restart_globals.snapshot_ref = strdup(opal_argv_join(argv, ' ')); - } - - return OPAL_SUCCESS; -} - -static int check_file(void) -{ - int exit_status = OPAL_SUCCESS; - int ret; - char * path_to_check = NULL; - - if(NULL == opal_restart_globals.snapshot_ref) { - opal_output(opal_restart_globals.output, - "Error: No filename provided!"); - exit_status = OPAL_ERROR; - goto cleanup; - } - - /* - * Check for the existance of the snapshot handle in the snapshot directory - */ - asprintf(&path_to_check, "%s/%s", - opal_restart_globals.snapshot_loc, - opal_restart_globals.snapshot_ref); - - opal_output_verbose(10, opal_restart_globals.output, - "Checking for the existence of (%s)", - path_to_check); - - if (0 > (ret = access(path_to_check, F_OK)) ) { - exit_status = OPAL_ERROR; - goto cleanup; - } - - cleanup: - if( NULL != path_to_check) { - free(path_to_check); - path_to_check = NULL; - } - - return exit_status; -} - -static int post_env_vars(int prev_pid, opal_crs_base_snapshot_t *snapshot) -{ - int ret, exit_status = OPAL_SUCCESS; - char *command = NULL; - char *proc_file = NULL; - char **loc_touch = NULL; - char **loc_mkdir = NULL; - int argc, i; - - if( 0 > prev_pid ) { - opal_output(opal_restart_globals.output, - "Invalid PID (%d)\n", - prev_pid); - exit_status = OPAL_ERROR; - goto cleanup; - } - - /* - * This is needed so we can pass the previous environment to the restarted - * application process. - */ - asprintf(&proc_file, "%s/%s-%d", opal_tmp_directory(), OPAL_CR_BASE_ENV_NAME, prev_pid); - asprintf(&command, "env | grep OMPI_ > %s", proc_file); - - opal_output_verbose(5, opal_restart_globals.output, - "post_env_vars: Execute: <%s>", command); - - ret = system(command); - if( 0 > ret) { - exit_status = ret; - goto cleanup; - } - - /* - * Any directories that need to be created - */ - if( NULL == (snapshot->metadata = fopen(snapshot->metadata_filename, "r")) ) { - opal_show_help("help-opal-restart.txt", "invalid_metadata", true, - opal_restart_globals.snapshot_metadata, - snapshot->metadata_filename); - exit_status = OPAL_ERROR; - goto cleanup; - } - opal_crs_base_metadata_read_token(snapshot->metadata, CRS_METADATA_MKDIR, &loc_mkdir); - argc = opal_argv_count(loc_mkdir); - for( i = 0; i < argc; ++i ) { - if( NULL != command ) { - free(command); - command = NULL; - } - asprintf(&command, "mkdir -p %s", loc_mkdir[i]); - - opal_output_verbose(5, opal_restart_globals.output, - "post_env_vars: Execute: <%s>", command); - - ret = system(command); - if( 0 > ret) { - exit_status = ret; - goto cleanup; - } - } - if( 0 < argc ) { - system("sync ; sync"); - } - - /* - * Any files that need to exist - */ - opal_crs_base_metadata_read_token(snapshot->metadata, CRS_METADATA_TOUCH, &loc_touch); - argc = opal_argv_count(loc_touch); - for( i = 0; i < argc; ++i ) { - if( NULL != command ) { - free(command); - command = NULL; - } - asprintf(&command, "touch %s", loc_touch[i]); - - opal_output_verbose(5, opal_restart_globals.output, - "post_env_vars: Execute: <%s>", command); - - ret = system(command); - if( 0 > ret) { - exit_status = ret; - goto cleanup; - } - } - if( 0 < argc ) { - system("sync ; sync"); - } - - cleanup: - if( NULL != command) { - free(command); - command = NULL; - } - if( NULL != proc_file) { - free(proc_file); - proc_file = NULL; - } - if( NULL != loc_mkdir ) { - opal_argv_free(loc_mkdir); - loc_mkdir = NULL; - } - if( NULL != loc_touch ) { - opal_argv_free(loc_touch); - loc_touch = NULL; - } - - if( NULL != snapshot->metadata ) { - fclose(snapshot->metadata); - snapshot->metadata = NULL; - } - - return exit_status; -} diff --git a/opal/tools/wrappers/Makefile.am b/opal/tools/wrappers/Makefile.am index 60d1feb24aa..53850b28bb7 100644 --- a/opal/tools/wrappers/Makefile.am +++ b/opal/tools/wrappers/Makefile.am @@ -5,7 +5,7 @@ # Copyright (c) 2004-2006 The University of Tennessee and The University # of Tennessee Research Foundation. All rights # reserved. -# Copyright (c) 2004-2009 High Performance Computing Center Stuttgart, +# Copyright (c) 2004-2009 High Performance Computing Center Stuttgart, # University of Stuttgart. All rights reserved. # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. @@ -14,9 +14,9 @@ # Copyright (c) 2014 Research Organization for Information Science # and Technology (RIST). All rights reserved. # $COPYRIGHT$ -# +# # Additional copyrights may follow -# +# # $HEADER$ # diff --git a/opal/tools/wrappers/generic_wrapper.1in b/opal/tools/wrappers/generic_wrapper.1in index 4815d9b4ceb..27adaa4a10f 100644 --- a/opal/tools/wrappers/generic_wrapper.1in +++ b/opal/tools/wrappers/generic_wrapper.1in @@ -10,11 +10,11 @@ . .SH OPTIONS .TP ---showme +--showme This option comes in several different variants (see below). None of the variants invokes the underlying compiler; they all provide information on how the underlying compiler would have been invoked had -.I --showme +.I --showme not been used. The basic .I --showme @@ -132,7 +132,7 @@ underlying Fortran compiler, .IR -qfixed may be necessary to compile fixed-format Fortran source files. . -.PP +.PP Finally, note that .I mpifort will be inoperative and will return an error on use if Fortran support @@ -210,11 +210,11 @@ line. . . .SH ENVIRONMENT VARIABLES -.PP +.PP By default, the wrappers use the compilers that were selected when #PROJECT# was configured. These compilers were either found automatically by Open MPI's "configure" script, or were selected by -the user in the CC, CXX, F77, and/or FC environment variables +the user in the CC, CXX, F77, and/or FC environment variables before "configure" was invoked. Additionally, other arguments specific to the compiler may have been selected by configure. . diff --git a/opal/tools/wrappers/help-opal-wrapper.txt b/opal/tools/wrappers/help-opal-wrapper.txt index ab2302f8287..3302f14c099 100644 --- a/opal/tools/wrappers/help-opal-wrapper.txt +++ b/opal/tools/wrappers/help-opal-wrapper.txt @@ -6,16 +6,16 @@ # Copyright (c) 2004-2005 The University of Tennessee and The University # of Tennessee Research Foundation. All rights # reserved. -# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, # University of Stuttgart. All rights reserved. # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. # Copyright (c) 2006-2010 Oracle and/or its affiliates. All rights reserved. # Copyright (c) 2012-2014 Cisco Systems, Inc. All rights reserved. # $COPYRIGHT$ -# +# # Additional copyrights may follow -# +# # $HEADER$ # # This is the US/English help file for Open MPI wrapper compiler error diff --git a/opal/tools/wrappers/opal.pc.in b/opal/tools/wrappers/opal.pc.in index 878fdc1e474..ed1b3adb5cd 100644 --- a/opal/tools/wrappers/opal.pc.in +++ b/opal/tools/wrappers/opal.pc.in @@ -9,12 +9,13 @@ prefix=@prefix@ exec_prefix=@exec_prefix@ includedir=@includedir@ libdir=@libdir@ +pkgincludedir=@opalincludedir@ # # Note that the EXTRA_LIBS are only necessary when static linking # (they're pulled in via libopen-pal.so's implicit dependencies), so # list them in Libs.private. # -Libs: -L${libdir} @OPAL_WRAPPER_EXTRA_LDFLAGS@ -lopen-pal +Libs: -L${libdir} @OPAL_PKG_CONFIG_LDFLAGS@ -lopen-pal Libs.private: @OPAL_WRAPPER_EXTRA_LIBS@ # # It is safe to hard-wire the -I before the EXTRA_INCLUDES because we diff --git a/opal/tools/wrappers/opal_wrapper.1in b/opal/tools/wrappers/opal_wrapper.1in index fcfdd929919..2ae351a0ec9 100644 --- a/opal/tools/wrappers/opal_wrapper.1in +++ b/opal/tools/wrappers/opal_wrapper.1in @@ -15,7 +15,7 @@ invoked as the back-end by the Open MPI wrapper commands such as: .BR mpiCC , .BR mpic++ , and -.BR mpifort +.BR mpifort (and its legacy/deprecated names .BR mpif77 and @@ -26,9 +26,9 @@ and/or have renamed the wrapper compilers listed above to avoid executable name conflicts with other MPI implementations. Hence, you may also have wrapper compilers installed including the following names: -.BR mpifort.openmpi +.BR mpifort.openmpi (and the legacy/deprecated names -.BR mpif90.openmpi +.BR mpif90.openmpi and .BR mpif77.openmpi ), .BR mpicxx.openmpi , @@ -68,7 +68,7 @@ and the website at .\" Authors Section .\" ************************** .SH AUTHORS -The Open MPI maintainers -- see +The Open MPI maintainers -- see .I http://www.openmpi.org/ or the file .IR AUTHORS . diff --git a/opal/tools/wrappers/opal_wrapper.c b/opal/tools/wrappers/opal_wrapper.c index 6ce11e1b284..4c4374cd61e 100644 --- a/opal/tools/wrappers/opal_wrapper.c +++ b/opal/tools/wrappers/opal_wrapper.c @@ -5,18 +5,20 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. - * Copyright (c) 2007-2012 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2007-2015 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2007-2013 Los Alamos National Security, LLC. All rights - * reserved. + * reserved. * Copyright (c) 2010 Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -24,9 +26,7 @@ #include #include -#ifdef HAVE_STDLIB_H #include -#endif /* HAVE_STDLIB_H */ #ifdef HAVE_SYS_STAT_H #include #endif /* HAVE_SYS_STAT_H */ @@ -39,9 +39,7 @@ #ifdef HAVE_SYS_WAIT_H #include #endif /* HAVE_SYS_WAIT_H */ -#ifdef HAVE_STRING_H #include -#endif /* HAVE_STRING_H */ #include "opal/mca/installdirs/installdirs.h" #include "opal/runtime/opal.h" @@ -79,6 +77,8 @@ struct options_data_t { char *req_file; char *path_includedir; char *path_libdir; + char *path_opalincludedir; + char *path_opallibdir; }; static struct options_data_t *options_data = NULL; @@ -128,6 +128,8 @@ options_data_init(struct options_data_t *data) data->req_file = NULL; data->path_includedir = NULL; data->path_libdir = NULL; + data->path_opalincludedir = NULL; + data->path_opallibdir = NULL; } static void @@ -154,6 +156,8 @@ options_data_free(struct options_data_t *data) if (NULL != data->req_file) free(data->req_file); if (NULL != data->path_includedir) free(data->path_includedir); if (NULL != data->path_libdir) free(data->path_libdir); + if (NULL != data->path_opalincludedir) free(data->path_opalincludedir); + if (NULL != data->path_opallibdir) free(data->path_opallibdir); } static void @@ -259,7 +263,7 @@ data_callback(const char *key, const char *value) if (NULL != value) options_data[parse_options_idx].version = strdup(value); } else if (0 == strcmp(key, "preprocessor_flags")) { char **values = opal_argv_split(value, ' '); - opal_argv_insert(&options_data[parse_options_idx].preproc_flags, + opal_argv_insert(&options_data[parse_options_idx].preproc_flags, opal_argv_count(options_data[parse_options_idx].preproc_flags), values); expand_flags(options_data[parse_options_idx].preproc_flags); @@ -316,34 +320,58 @@ data_callback(const char *key, const char *value) if (0 != strcmp(options_data[parse_options_idx].path_includedir, "/usr/include") || 0 == strncmp(options_data[parse_options_idx].language, "Fortran", strlen("Fortran"))) { char *line; - asprintf(&line, OPAL_INCLUDE_FLAG"%s", + asprintf(&line, OPAL_INCLUDE_FLAG"%s", options_data[parse_options_idx].path_includedir); opal_argv_append_nosize(&options_data[parse_options_idx].preproc_flags, line); free(line); } } } else if (0 == strcmp(key, "libdir")) { - if (NULL != value) options_data[parse_options_idx].path_libdir = + if (NULL != value) options_data[parse_options_idx].path_libdir = opal_install_dirs_expand(value); if (0 != strcmp(options_data[parse_options_idx].path_libdir, "/usr/lib")) { char *line; - asprintf(&line, OPAL_LIBDIR_FLAG"%s", + asprintf(&line, OPAL_LIBDIR_FLAG"%s", options_data[parse_options_idx].path_libdir); opal_argv_append_nosize(&options_data[parse_options_idx].link_flags, line); free(line); } + } else if (0 == strcmp(key, "opalincludedir")) { + printf("EXPANDING!\n"); + if (NULL != value) { + options_data[parse_options_idx].path_opalincludedir = + opal_install_dirs_expand(value); + if (0 != strcmp(options_data[parse_options_idx].path_opalincludedir, "/usr/include") || + 0 == strncmp(options_data[parse_options_idx].language, "Fortran", strlen("Fortran"))) { + char *line; + asprintf(&line, OPAL_INCLUDE_FLAG"%s", + options_data[parse_options_idx].path_opalincludedir); + opal_argv_append_nosize(&options_data[parse_options_idx].preproc_flags, line); + free(line); + } + } + } else if (0 == strcmp(key, "opallibdir")) { + if (NULL != value) options_data[parse_options_idx].path_opallibdir = + opal_install_dirs_expand(value); + if (0 != strcmp(options_data[parse_options_idx].path_opallibdir, "/usr/lib")) { + char *line; + asprintf(&line, OPAL_LIBDIR_FLAG"%s", + options_data[parse_options_idx].path_opallibdir); + opal_argv_append_nosize(&options_data[parse_options_idx].link_flags, line); + free(line); + } } } static int -data_init(const char *appname) +data_init(const char *appname) { int ret; char *datafile; /* now load the data */ - asprintf(&datafile, "%s%s%s-wrapper-data.txt", + asprintf(&datafile, "%s%s%s-wrapper-data.txt", opal_install_dirs.opaldatadir, OPAL_PATH_SEP, appname); if (NULL == datafile) return OPAL_ERR_TEMP_OUT_OF_RESOURCE; @@ -405,7 +433,7 @@ load_env_data(const char *project, const char *flag, char **data) free(envname); return; } - } + } free(envname); if (NULL != *data) free(*data); @@ -429,7 +457,7 @@ load_env_data_argv(const char *project, const char *flag, char ***data) free(envname); return; } - } + } free(envname); if (NULL != *data) opal_argv_free(*data); @@ -518,7 +546,7 @@ main(int argc, char *argv[]) * Sanity Checks * ****************************************************/ - + if (NULL != options_data[user_data_idx].req_file) { /* make sure the language is supported */ if (0 == strcmp(options_data[user_data_idx].req_file, "not supported")) { @@ -602,9 +630,9 @@ main(int argc, char *argv[]) } else if (0 == strncmp(user_argv[i], "-showme:help", strlen("-showme:help")) || 0 == strncmp(user_argv[i], "--showme:help", strlen("--showme:help"))) { char *str; - str = opal_show_help_string("help-opal-wrapper.txt", "usage", + str = opal_show_help_string("help-opal-wrapper.txt", "usage", false, argv[0], - options_data[user_data_idx].project, + options_data[user_data_idx].project, NULL); if (NULL != str) { printf("%s", str); @@ -636,7 +664,7 @@ main(int argc, char *argv[]) } else if (0 == strcmp(user_argv[i], "-c")) { flags &= ~COMP_WANT_LINK; real_flag = true; - } else if (0 == strcmp(user_argv[i], "-E") || + } else if (0 == strcmp(user_argv[i], "-E") || 0 == strcmp(user_argv[i], "-M")) { flags &= ~(COMP_WANT_COMPILE | COMP_WANT_LINK); real_flag = true; @@ -690,7 +718,7 @@ main(int argc, char *argv[]) disable_flags = false; flags |= COMP_SHOW_ERROR; real_flag = true; - } else { + } else { /* if the option flag is one that we use to determine which set of compiler data to use, don't count it as a real option */ @@ -718,15 +746,6 @@ main(int argc, char *argv[]) flags &= ~(COMP_WANT_PREPROC|COMP_WANT_COMPILE|COMP_WANT_LINK); } -#if !OMPI_ENABLE_MPI_PROFILING - /* sanity check */ - if (flags & COMP_WANT_PMPI) { - opal_show_help("help-opal-wrapper.txt", "no-profiling-support", true, - argv[0], NULL); - } -#endif - - /**************************************************** * * Assemble the command line @@ -743,7 +762,7 @@ main(int argc, char *argv[]) exec_argc = 0; } - /* This error would normally not happen unless the user edits the + /* This error would normally not happen unless the user edits the wrapper data files manually */ if (NULL == exec_argv) { opal_show_help("help-opal-wrapper.txt", "no-compiler-specified", true); diff --git a/opal/util/Makefile.am b/opal/util/Makefile.am index cedf860bf7b..5c4cb2945ed 100644 --- a/opal/util/Makefile.am +++ b/opal/util/Makefile.am @@ -12,6 +12,8 @@ # Copyright (c) 2007-2015 Cisco Systems, Inc. All rights reserved. # Copyright (c) 2013 NVIDIA Corporation. All rights reserved. # Copyright (c) 2013 Intel, Inc. All rights reserved +# Copyright (c) 2016 Los Alamos National Security, LLC. All rights +# reserved. # $COPYRIGHT$ # # Additional copyrights may follow @@ -42,9 +44,8 @@ headers = \ crc.h \ daemon_init.h \ error.h \ - few.h \ fd.h \ - fd.c \ + few.h \ if.h \ keyval_parse.h \ malloc.h \ @@ -78,6 +79,7 @@ libopalutil_la_SOURCES = \ crc.c \ daemon_init.c \ error.c \ + fd.c \ few.c \ if.c \ keyval_parse.c \ diff --git a/opal/util/alfg.c b/opal/util/alfg.c index 75dc489c4f9..2affae2cecf 100644 --- a/opal/util/alfg.c +++ b/opal/util/alfg.c @@ -1,46 +1,50 @@ /* * Copyright (c) 2014 Mellanox Technologies, Inc. * All rights reserved. + * Copyright (c) 2016 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ #include "opal_config.h" +#include + #include "alfg.h" -/* Mask corresponding to the primitive polynomial - *--------------------------------------------------- - * +/* Mask corresponding to the primitive polynomial + *--------------------------------------------------- + * * p(x) = 1 + x^25 + x^27 + x^29 + x^30 + x^31 + x^32 * - *--------------------------------------------------- - */ + *--------------------------------------------------- + */ #define MASK 0x80000057U /* Additive lagged Fibonacci parameters: - *--------------------------------------------------- + *--------------------------------------------------- * * x_n = (x_(n - TAP1) + x_(n - TAP2) ) mod M * - *--------------------------------------------------- - */ + *--------------------------------------------------- + */ #define TAP1 127 #define TAP2 97 #define CBIT 21 /* Canonical bit */ -/** +/** * @brief Galois shift register: Used to seed the ALFG's * canonical rectangle - * - * @param[in] unsigned int *seed: used to seed the Galois register - * @param[out] uint32_t lsb: least significant bit of the Galois - * register after shift - */ + * + * @param[in] unsigned int *seed: used to seed the Galois register + * @param[out] uint32_t lsb: least significant bit of the Galois + * register after shift + */ static uint32_t galois(unsigned int *seed){ uint32_t lsb; @@ -52,9 +56,12 @@ static uint32_t galois(unsigned int *seed){ return lsb; } -/** +/* OPAL global rng buffer */ +static opal_rng_buff_t alfg_buffer; + +/** * @brief Routine to seed the ALFG register - * + * * @param[in] uint32_t seed * @param[out] opal_rng_buff_t *buff: handle to ALFG buffer state */ @@ -80,17 +87,19 @@ int opal_srand(opal_rng_buff_t *buff, uint32_t seed) { buff->alfg[j] = buff->alfg[j] ^ ((galois(&seed_cpy))< -#endif /* HAVE_STDLIB_H */ -#ifdef HAVE_STRING_H #include -#endif /* HAVE_STRING_H */ #include "opal/util/argv.h" #include "opal/constants.h" @@ -41,21 +37,21 @@ int opal_argv_append(int *argc, char ***argv, const char *arg) { int rc; - + /* add the new element */ if (OPAL_SUCCESS != (rc = opal_argv_append_nosize(argv, arg))) { return rc; } - + *argc = opal_argv_count(*argv); - + return OPAL_SUCCESS; } int opal_argv_append_nosize(char ***argv, const char *arg) { int argc; - + /* Create new argv. */ if (NULL == *argv) { @@ -72,7 +68,7 @@ int opal_argv_append_nosize(char ***argv, const char *arg) else { /* count how many entries currently exist */ argc = opal_argv_count(*argv); - + *argv = (char**) realloc(*argv, (argc + 2) * sizeof(char *)); if (NULL == *argv) { return OPAL_ERR_OUT_OF_RESOURCE; @@ -109,7 +105,7 @@ int opal_argv_prepend_nosize(char ***argv, const char *arg) } else { /* count how many entries currently exist */ argc = opal_argv_count(*argv); - + *argv = (char**) realloc(*argv, (argc + 2) * sizeof(char *)); if (NULL == *argv) { return OPAL_ERR_OUT_OF_RESOURCE; @@ -129,14 +125,14 @@ int opal_argv_prepend_nosize(char ***argv, const char *arg) int opal_argv_append_unique_nosize(char ***argv, const char *arg, bool overwrite) { int i; - + /* if the provided array is NULL, then the arg cannot be present, * so just go ahead and append */ if (NULL == *argv) { return opal_argv_append_nosize(argv, arg); } - + /* see if this arg is already present in the array */ for (i=0; NULL != (*argv)[i]; i++) { if (0 == strcmp(arg, (*argv)[i])) { @@ -343,37 +339,37 @@ char *opal_argv_join_range(char **argv, size_t start, size_t end, int delimiter) char *str; size_t str_len = 0; size_t i; - + /* Bozo case */ - + if (NULL == argv || NULL == argv[0] || (int)start > opal_argv_count(argv)) { return strdup(""); } - + /* Find the total string length in argv including delimiters. The last delimiter is replaced by the NULL character. */ - + for (p = &argv[start], i=start; *p && i < end; ++p, ++i) { str_len += strlen(*p) + 1; } - + /* Allocate the string. */ - + if (NULL == (str = (char*) malloc(str_len))) return NULL; - + /* Loop filling in the string. */ - + str[--str_len] = '\0'; p = &argv[start]; pp = *p; - + for (i = 0; i < str_len; ++i) { if ('\0' == *pp) { - + /* End of a string, fill in a delimiter and go to the next string. */ - + str[i] = (char) delimiter; ++p; pp = *p; @@ -381,9 +377,9 @@ char *opal_argv_join_range(char **argv, size_t start, size_t end, int delimiter) str[i] = *pp++; } } - + /* All done */ - + return str; } @@ -499,7 +495,7 @@ int opal_argv_insert(char ***target, int start, char **source) int suffix_count; /* Check for the bozo cases */ - + if (NULL == target || NULL == *target || start < 0) { return OPAL_ERR_BAD_PARAM; } else if (NULL == source) { @@ -522,7 +518,7 @@ int opal_argv_insert(char ***target, int start, char **source) /* Alloc out new space */ - *target = (char**) realloc(*target, + *target = (char**) realloc(*target, sizeof(char *) * (target_count + source_count + 1)); /* Move suffix items down to the end */ @@ -550,26 +546,26 @@ int opal_argv_insert_element(char ***target, int location, char *source) { int i, target_count; int suffix_count; - + /* Check for the bozo cases */ - + if (NULL == target || NULL == *target || location < 0) { return OPAL_ERR_BAD_PARAM; } else if (NULL == source) { return OPAL_SUCCESS; } - + /* Easy case: appending to the end */ target_count = opal_argv_count(*target); if (location > target_count) { opal_argv_append(&target_count, target, source); return OPAL_SUCCESS; } - + /* Alloc out new space */ - *target = (char**) realloc(*target, + *target = (char**) realloc(*target, sizeof(char*) * (target_count + 2)); - + /* Move suffix items down to the end */ suffix_count = target_count - location; for (i = suffix_count - 1; i >= 0; --i) { @@ -577,10 +573,10 @@ int opal_argv_insert_element(char ***target, int location, char *source) (*target)[location + i]; } (*target)[location + suffix_count + 1] = NULL; - + /* Strdup in the source */ (*target)[location] = strdup(source); - + /* All done */ return OPAL_SUCCESS; } diff --git a/opal/util/argv.h b/opal/util/argv.h index 8c6973447a1..3dd9b4e202b 100644 --- a/opal/util/argv.h +++ b/opal/util/argv.h @@ -5,19 +5,19 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2007 Los Alamos National Security, LLC. - * All rights reserved. + * All rights reserved. * Copyright (c) 2007 Voltaire. All rights reserved. * Copyright (c) 2012 Los Alamos National Security, LLC. All rights reserved. * * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -128,9 +128,9 @@ OPAL_DECLSPEC int opal_argv_append_unique_nosize(char ***argv, const char *arg, * array. */ OPAL_DECLSPEC void opal_argv_free(char **argv); - + /** - * Split a string into a NULL-terminated argv array. Do not include empty + * Split a string into a NULL-terminated argv array. Do not include empty * strings in result array. * * @param src_string Input string. @@ -147,7 +147,7 @@ OPAL_DECLSPEC void opal_argv_free(char **argv); OPAL_DECLSPEC char **opal_argv_split(const char *src_string, int delimiter) __opal_attribute_malloc__ __opal_attribute_warn_unused_result__; /** - * Split a string into a NULL-terminated argv array. Include empty + * Split a string into a NULL-terminated argv array. Include empty * strings in result array. * * @param src_string Input string. @@ -245,7 +245,7 @@ OPAL_DECLSPEC char **opal_argv_copy(char **argv) __opal_attribute_malloc__ __op * free()ed (it is assumed that the argv "owns" the memory that * the pointer points to). */ -OPAL_DECLSPEC int opal_argv_delete(int *argc, char ***argv, +OPAL_DECLSPEC int opal_argv_delete(int *argc, char ***argv, int start, int num_to_delete); /** diff --git a/opal/util/basename.c b/opal/util/basename.c index f293162b2a6..1558f4554f4 100644 --- a/opal/util/basename.c +++ b/opal/util/basename.c @@ -5,7 +5,7 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. @@ -14,18 +14,16 @@ * and Technology (RIST). All rights reserved. * Copyright (c) 2014 Intel, Inc. All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ #include "opal_config.h" #include -#ifdef HAVE_STRING_H #include -#endif /* HAVE_STRING_H */ #ifdef HAVE_LIBGEN_H #include #endif /* HAVE_LIBGEN_H */ diff --git a/opal/util/basename.h b/opal/util/basename.h index 23476fe97fa..5d0cf04b6e5 100644 --- a/opal/util/basename.h +++ b/opal/util/basename.h @@ -5,18 +5,18 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ -/** +/** * @file * * Returns an OS-independant basename() of a given filename. @@ -47,7 +47,7 @@ BEGIN_C_DECLS * Windows-based operating systems. For example: * * foo.txt returns "foo.txt" - * + * * /foo/bar/baz returns "baz" * * /yow.c returns "yow.c" @@ -86,7 +86,7 @@ OPAL_DECLSPEC char *opal_basename(const char* filename) __opal_attribute_malloc_ * Windows-based operating systems. For example: * * foo.txt returns "foo.txt" - * + * * /foo/bar/baz returns "/foo/bar" * * /yow.c returns "/" diff --git a/opal/util/bit_ops.h b/opal/util/bit_ops.h index e08a91fe0d1..34ea4418abf 100644 --- a/opal/util/bit_ops.h +++ b/opal/util/bit_ops.h @@ -5,14 +5,14 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2011 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2011 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -62,7 +62,7 @@ static inline int opal_hibit(int value, int start) } } #endif - + return start; } @@ -82,7 +82,7 @@ static inline int opal_hibit(int value, int start) * Using __builtin_clz (count-leading-zeros) uses 3 cycles instead of 50 cycles * compared to the loop-version (on Intel Nehalem -- with icc-12.1.0 -O2). */ -static inline int opal_cube_dim(int value) +static inline int opal_cube_dim(int value) { int dim, size; @@ -91,7 +91,7 @@ static inline int opal_cube_dim(int value) return 0; } size = 8 * sizeof(int); - dim = size - __builtin_clz(value-1); + dim = size - __builtin_clz(value-1); #else for (dim = 0, size = 1; size < value; ++dim, size <<= 1) /* empty */; #endif diff --git a/opal/util/cmd_line.c b/opal/util/cmd_line.c index 91c7e490cf9..cc1e99e0f88 100644 --- a/opal/util/cmd_line.c +++ b/opal/util/cmd_line.c @@ -5,28 +5,26 @@ * Copyright (c) 2004-2013 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. - * Copyright (c) 2012 Los Alamos National Security, LLC. + * Copyright (c) 2012 Los Alamos National Security, LLC. * All rights reserved. * Copyright (c) 2012-2015 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ #include "opal_config.h" #include -#ifdef HAVE_STRING_H #include -#endif /* HAVE_STRING_H */ #include #include "opal/class/opal_object.h" @@ -132,10 +130,10 @@ static char special_empty_token[] = { static int make_opt(opal_cmd_line_t *cmd, opal_cmd_line_init_t *e); static void free_parse_results(opal_cmd_line_t *cmd); static int split_shorts(opal_cmd_line_t *cmd, - char *token, char **args, - int *output_argc, char ***output_argv, + char *token, char **args, + int *output_argc, char ***output_argv, int *num_args_used, bool ignore_unknown); -static cmd_line_option_t *find_option(opal_cmd_line_t *cmd, +static cmd_line_option_t *find_option(opal_cmd_line_t *cmd, const char *option_name) __opal_attribute_nonnull__(1) __opal_attribute_nonnull__(2); static int set_dest(cmd_line_option_t *option, char *sval); static void fill(const cmd_line_option_t *a, char result[3][BUFSIZ]); @@ -206,8 +204,8 @@ int opal_cmd_line_make_opt_mca(opal_cmd_line_t *cmd, /* * Create a command line option, --long-name and/or -s (short name). */ -int opal_cmd_line_make_opt3(opal_cmd_line_t *cmd, char short_name, - const char *sd_name, const char *long_name, +int opal_cmd_line_make_opt3(opal_cmd_line_t *cmd, char short_name, + const char *sd_name, const char *long_name, int num_params, const char *desc) { opal_cmd_line_init_t e; @@ -292,7 +290,7 @@ int opal_cmd_line_parse(opal_cmd_line_t *cmd, bool ignore_unknown, if (0 == strcmp(cmd->lcl_argv[i], "--")) { ++i; while (i < cmd->lcl_argc) { - opal_argv_append(&cmd->lcl_tail_argc, &cmd->lcl_tail_argv, + opal_argv_append(&cmd->lcl_tail_argc, &cmd->lcl_tail_argv, cmd->lcl_argv[i]); ++i; } @@ -330,9 +328,9 @@ int opal_cmd_line_parse(opal_cmd_line_t *cmd, bool ignore_unknown, if (NULL == option) { shortsv = NULL; shortsc = 0; - ret = split_shorts(cmd, cmd->lcl_argv[i] + 1, + ret = split_shorts(cmd, cmd->lcl_argv[i] + 1, &(cmd->lcl_argv[i + 1]), - &shortsc, &shortsv, + &shortsc, &shortsv, &num_args_used, ignore_unknown); if (OPAL_SUCCESS == ret) { option = find_option(cmd, shortsv[0] + 1); @@ -403,7 +401,7 @@ int opal_cmd_line_parse(opal_cmd_line_t *cmd, bool ignore_unknown, printed_error = true; goto error; } else { - if (0 == strcmp(cmd->lcl_argv[i], + if (0 == strcmp(cmd->lcl_argv[i], special_empty_token)) { fprintf(stderr, "%s: Error: option \"%s\" did not " "have enough parameters (%d)\n", @@ -420,7 +418,7 @@ int opal_cmd_line_parse(opal_cmd_line_t *cmd, bool ignore_unknown, OBJ_RELEASE(param); printed_error = true; goto error; - } + } /* Otherwise, save this parameter */ @@ -428,7 +426,7 @@ int opal_cmd_line_parse(opal_cmd_line_t *cmd, bool ignore_unknown, /* Save in the argv on the param entry */ opal_argv_append(¶m->clp_argc, - ¶m->clp_argv, + ¶m->clp_argv, cmd->lcl_argv[i]); /* If it's the first, save it in the @@ -471,7 +469,7 @@ int opal_cmd_line_parse(opal_cmd_line_t *cmd, bool ignore_unknown, an error and return. */ if (is_unknown_option || is_unknown_token) { if (!ignore_unknown || is_unknown_option) { - fprintf(stderr, "%s: Error: unknown option \"%s\"\n", + fprintf(stderr, "%s: Error: unknown option \"%s\"\n", cmd->lcl_argv[0], cmd->lcl_argv[i]); printed_error = true; if (have_help_option) { @@ -481,7 +479,7 @@ int opal_cmd_line_parse(opal_cmd_line_t *cmd, bool ignore_unknown, } error: while (i < cmd->lcl_argc) { - opal_argv_append(&cmd->lcl_tail_argc, &cmd->lcl_tail_argv, + opal_argv_append(&cmd->lcl_tail_argc, &cmd->lcl_tail_argv, cmd->lcl_argv[i]); ++i; } @@ -529,13 +527,13 @@ char *opal_cmd_line_get_usage_msg(opal_cmd_line_t *cmd) /* First, take the original list and sort it */ - sorted = (cmd_line_option_t**)malloc(sizeof(cmd_line_option_t *) * + sorted = (cmd_line_option_t**)malloc(sizeof(cmd_line_option_t *) * opal_list_get_size(&cmd->lcl_options)); if (NULL == sorted) { opal_mutex_unlock(&cmd->lcl_mutex); return NULL; } - for (i = 0, item = opal_list_get_first(&cmd->lcl_options); + for (i = 0, item = opal_list_get_first(&cmd->lcl_options); opal_list_get_end(&cmd->lcl_options) != item; ++i, item = opal_list_get_next(item)) { sorted[i] = (cmd_line_option_t *) item; @@ -548,7 +546,7 @@ char *opal_cmd_line_get_usage_msg(opal_cmd_line_t *cmd) option = sorted[j]; if (NULL != option->clo_description) { bool filled = false; - + /* Build up the output line */ memset(line, 0, sizeof(line)); @@ -647,7 +645,7 @@ char *opal_cmd_line_get_usage_msg(opal_cmd_line_t *cmd) line's worth and add it to the array. Then reset and loop around to get the next line's worth. */ - for (ptr = start + (MAX_WIDTH - PARAM_WIDTH); + for (ptr = start + (MAX_WIDTH - PARAM_WIDTH); ptr > start; --ptr) { if (isspace(*ptr)) { *ptr = '\0'; @@ -666,14 +664,14 @@ char *opal_cmd_line_get_usage_msg(opal_cmd_line_t *cmd) and break there. */ if (ptr == start) { - for (ptr = start + (MAX_WIDTH - PARAM_WIDTH); + for (ptr = start + (MAX_WIDTH - PARAM_WIDTH); ptr < start + len; ++ptr) { if (isspace(*ptr)) { *ptr = '\0'; strncat(line, start, sizeof(line) - 1); opal_argv_append(&argc, &argv, line); - + start = ptr + 1; memset(line, ' ', PARAM_WIDTH); line[PARAM_WIDTH] = '\0'; @@ -739,7 +737,7 @@ int opal_cmd_line_get_ninsts(opal_cmd_line_t *cmd, const char *opt) ret = 0; option = find_option(cmd, opt); if (NULL != option) { - for (item = opal_list_get_first(&cmd->lcl_params); + for (item = opal_list_get_first(&cmd->lcl_params); opal_list_get_end(&cmd->lcl_params) != item; item = opal_list_get_next(item)) { param = (cmd_line_param_t *) item; @@ -763,7 +761,7 @@ int opal_cmd_line_get_ninsts(opal_cmd_line_t *cmd, const char *opt) * Return a specific parameter for a specific instance of a option * from the parsed command line. */ -char *opal_cmd_line_get_param(opal_cmd_line_t *cmd, const char *opt, int inst, +char *opal_cmd_line_get_param(opal_cmd_line_t *cmd, const char *opt, int inst, int idx) { int num_found; @@ -786,7 +784,7 @@ char *opal_cmd_line_get_param(opal_cmd_line_t *cmd, const char *opt, int inst, parameter index greater than we will have */ if (idx < option->clo_num_params) { - for (item = opal_list_get_first(&cmd->lcl_params); + for (item = opal_list_get_first(&cmd->lcl_params); opal_list_get_end(&cmd->lcl_params) != item; item = opal_list_get_next(item)) { param = (cmd_line_param_t *) item; @@ -800,11 +798,11 @@ char *opal_cmd_line_get_param(opal_cmd_line_t *cmd, const char *opt, int inst, } } } - + /* Thread serialization */ - + opal_mutex_unlock(&cmd->lcl_mutex); - + /* All done */ return NULL; @@ -959,8 +957,8 @@ static int make_opt(opal_cmd_line_t *cmd, opal_cmd_line_init_t *e) if (NULL == cmd) { return OPAL_ERR_BAD_PARAM; - } else if ('\0' == e->ocl_cmd_short_name && - NULL == e->ocl_cmd_single_dash_name && + } else if ('\0' == e->ocl_cmd_short_name && + NULL == e->ocl_cmd_single_dash_name && NULL == e->ocl_cmd_long_name) { return OPAL_ERR_BAD_PARAM; } else if (e->ocl_num_params < 0) { @@ -1013,7 +1011,7 @@ static void free_parse_results(opal_cmd_line_t *cmd) itself; it was not allocated from the heap) */ for (item = opal_list_remove_first(&cmd->lcl_params); - NULL != item; + NULL != item; item = opal_list_remove_first(&cmd->lcl_params)) { OBJ_RELEASE(item); } @@ -1040,8 +1038,8 @@ static void free_parse_results(opal_cmd_line_t *cmd) * short name). Ensure to differentiate the resulting options from * "single dash" names. */ -static int split_shorts(opal_cmd_line_t *cmd, char *token, char **args, - int *output_argc, char ***output_argv, +static int split_shorts(opal_cmd_line_t *cmd, char *token, char **args, + int *output_argc, char ***output_argv, int *num_args_used, bool ignore_unknown) { int i, j, len; @@ -1078,7 +1076,7 @@ static int split_shorts(opal_cmd_line_t *cmd, char *token, char **args, } else { opal_argv_append(output_argc, output_argv, fake_token); } - } + } /* If we do find the option, copy it and all of its parameters to the output args. If we run out of paramters (i.e., no @@ -1093,7 +1091,7 @@ static int split_shorts(opal_cmd_line_t *cmd, char *token, char **args, args[*num_args_used]); ++(*num_args_used); } else { - opal_argv_append(output_argc, output_argv, + opal_argv_append(output_argc, output_argv, special_empty_token); } } @@ -1106,7 +1104,7 @@ static int split_shorts(opal_cmd_line_t *cmd, char *token, char **args, } -static cmd_line_option_t *find_option(opal_cmd_line_t *cmd, +static cmd_line_option_t *find_option(opal_cmd_line_t *cmd, const char *option_name) { opal_list_item_t *item; @@ -1131,7 +1129,7 @@ static cmd_line_option_t *find_option(opal_cmd_line_t *cmd, } /* Not found */ - + return NULL; } @@ -1152,7 +1150,7 @@ static int set_dest(cmd_line_option_t *option, char *sval) registered table alone and set an environment variable with the desired value. The environment variable will get picked up during a nromal parameter lookup, and all will be well. */ - + if (NULL != option->clo_mca_param_env_var) { switch(option->clo_type) { case OPAL_CMD_LINE_TYPE_STRING: diff --git a/opal/util/cmd_line.h b/opal/util/cmd_line.h index 0a47c88e0cf..449c5d6faab 100644 --- a/opal/util/cmd_line.h +++ b/opal/util/cmd_line.h @@ -5,20 +5,20 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. - * Copyright (c) 2012 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2012-2017 Cisco Systems, Inc. All rights reserved * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ /** - * @file + * @file * * General command line parsing facility for use throughout Open MPI. * @@ -41,7 +41,7 @@ * sparingly. * * The "long" name is a multi-character name that is found after a - * pair of dashes. For example, "--some-option-name". + * pair of dashes. For example, "--some-option-name". * * A command line option is a combination of 1 or more of a short * name, single dash name, and a long name. Any of the names may be @@ -126,21 +126,21 @@ BEGIN_C_DECLS struct opal_cmd_line_t { /** Make this an OBJ handle */ opal_object_t super; - + /** Thread safety */ opal_mutex_t lcl_mutex; - + /** List of cmd_line_option_t's (defined internally) */ opal_list_t lcl_options; - + /** Duplicate of argc from opal_cmd_line_parse() */ int lcl_argc; /** Duplicate of argv from opal_cmd_line_parse() */ char **lcl_argv; - + /** Parsed output; list of cmd_line_param_t's (defined internally) */ opal_list_t lcl_params; - + /** List of tail (unprocessed) arguments */ int lcl_tail_argc; /** List of tail (unprocessed) arguments */ @@ -210,7 +210,7 @@ BEGIN_C_DECLS * Convenience typedef */ typedef struct opal_cmd_line_init_t opal_cmd_line_init_t; - + /** * Top-level command line handle. * @@ -251,7 +251,7 @@ BEGIN_C_DECLS * * \code * opal_cmd_line_init_t cmd_line_init[] = { - * { NULL, NULL, NULL, 'h', NULL, "help", 0, + * { NULL, NULL, NULL, 'h', NULL, "help", 0, * &orterun_globals.help, OPAL_CMD_LINE_TYPE_BOOL, * "This help message" }, * @@ -307,11 +307,11 @@ BEGIN_C_DECLS * used to generate the output from opal_cmd_line_get_usage_msg(). * */ - OPAL_DECLSPEC int opal_cmd_line_make_opt3(opal_cmd_line_t *cmd, - char short_name, + OPAL_DECLSPEC int opal_cmd_line_make_opt3(opal_cmd_line_t *cmd, + char short_name, const char *sd_name, - const char *long_name, - int num_params, + const char *long_name, + int num_params, const char *desc); /** @@ -352,7 +352,7 @@ BEGIN_C_DECLS * example, if "--fo" is specified, and no "fo" option is * registered (e.g., perhaps the user meant to type "--foo"), an * error message is always printed, UNLESS this unknown token - * happens after a "--" token (see below). + * happens after a "--" token (see below). * * The contents of argc and argv are not changed during parsing. * argv[0] is assumed to be the executable name, and is ignored during @@ -401,7 +401,7 @@ BEGIN_C_DECLS * different sets of argv tokens is safe, but will erase any * previous parsing results. */ - OPAL_DECLSPEC int opal_cmd_line_parse(opal_cmd_line_t *cmd, + OPAL_DECLSPEC int opal_cmd_line_parse(opal_cmd_line_t *cmd, bool ignore_unknown, int argc, char **argv); @@ -441,13 +441,13 @@ BEGIN_C_DECLS * opal_cmd_line_parse(), or opal_cmd_line_parse() was not invoked on * this handle. * - * This function should only be called after opal_cmd_line_parse(). + * This function should only be called after opal_cmd_line_parse(). * * The function will return true if the option matching opt was found * (either by its short or long name) during token parsing. * Otherwise, it will return false. */ - OPAL_DECLSPEC bool opal_cmd_line_is_taken(opal_cmd_line_t *cmd, + OPAL_DECLSPEC bool opal_cmd_line_is_taken(opal_cmd_line_t *cmd, const char *opt) __opal_attribute_nonnull__(1) __opal_attribute_nonnull__(2); /** @@ -480,7 +480,7 @@ BEGIN_C_DECLS * What is returned is a pointer to the actual string that is on * the handle; it should not be modified or freed. */ - OPAL_DECLSPEC char *opal_cmd_line_get_argv(opal_cmd_line_t *cmd, + OPAL_DECLSPEC char *opal_cmd_line_get_argv(opal_cmd_line_t *cmd, int index); /** @@ -503,7 +503,7 @@ BEGIN_C_DECLS * either the option was not specified as part of the OPAL command line * handle, or opal_cmd_line_parse() was not invoked on this handle. */ - OPAL_DECLSPEC int opal_cmd_line_get_ninsts(opal_cmd_line_t *cmd, + OPAL_DECLSPEC int opal_cmd_line_get_ninsts(opal_cmd_line_t *cmd, const char *opt) __opal_attribute_nonnull__(1) __opal_attribute_nonnull__(2); /** @@ -518,7 +518,7 @@ BEGIN_C_DECLS * @retval param String of the parameter. * @retval NULL If any of the input values are invalid. * - * This function should only be called after opal_cmd_line_parse(). + * This function should only be called after opal_cmd_line_parse(). * * This function returns the Nth parameter for the Ith instance of a * given option on the parsed command line (both N and I are @@ -533,8 +533,8 @@ BEGIN_C_DECLS * The returned string should \em not be modified or freed by the * caller. */ - OPAL_DECLSPEC char *opal_cmd_line_get_param(opal_cmd_line_t *cmd, - const char *opt, + OPAL_DECLSPEC char *opal_cmd_line_get_param(opal_cmd_line_t *cmd, + const char *opt, int instance_num, int param_num); @@ -565,8 +565,8 @@ BEGIN_C_DECLS * of the tail parameters, and must be freed (likely with a call * to opal_argv_free()) by the caller. */ - OPAL_DECLSPEC int opal_cmd_line_get_tail(opal_cmd_line_t *cmd, int *tailc, - char ***tailv) __opal_attribute_nonnull__(1) __opal_attribute_nonnull__(2); + OPAL_DECLSPEC int opal_cmd_line_get_tail(opal_cmd_line_t *cmd, int *tailc, + char ***tailv); END_C_DECLS diff --git a/opal/util/crc.c b/opal/util/crc.c index cd697cbc66f..d74d434fe89 100644 --- a/opal/util/crc.c +++ b/opal/util/crc.c @@ -22,15 +22,11 @@ #ifdef HAVE_STDIO_H #include #endif /* HAVE_STDIO_H */ -#ifdef HAVE_STDLIB_H #include -#endif /* HAVE_STDLIB_H */ #ifdef HAVE_STRINGS_H #include #endif /* HAVE_STRINGS_H */ -#ifdef HAVE_STRING_H #include -#endif /* HAVE_STRING_H */ #ifdef HAVE_UNISTD_H #include #endif /* HAVE_UNISTD_H */ @@ -313,7 +309,7 @@ opal_bcopy_csum_partial ( memcpy(((char *)&temp + *lastPartialLength), src, (sizeof(unsigned long) - *lastPartialLength)); /* avoid unsigned arithmetic overflow by subtracting the old partial - * word from the new one before adding to the checksum... + * word from the new one before adding to the checksum... */ csum += (temp - *lastPartialLong); copylen -= sizeof(unsigned long) - *lastPartialLength; @@ -1098,10 +1094,10 @@ void opal_initialize_crc_table(void) } unsigned int opal_bcopy_uicrc_partial( - const void * source, + const void * source, void * destination, - size_t copylen, - size_t crclen, + size_t copylen, + size_t crclen, unsigned int partial_crc) { size_t crclenresidue = (crclen > copylen) ? (crclen - copylen) : 0; @@ -1163,7 +1159,7 @@ unsigned int opal_bcopy_uicrc_partial( unsigned int opal_uicrc_partial( - const void * source, size_t crclen, unsigned int partial_crc) + const void * source, size_t crclen, unsigned int partial_crc) { register int i, j; register unsigned char * t; @@ -1172,7 +1168,7 @@ unsigned int opal_uicrc_partial( if (!_opal_crc_table_initialized) { opal_initialize_crc_table(); } - + if (INTALIGNED(source)) { register unsigned int * src = (unsigned int *)source; while (crclen >= sizeof(unsigned int)) { @@ -1197,7 +1193,7 @@ unsigned int opal_uicrc_partial( partial_crc = (partial_crc << 8) ^ _opal_crc_table[i]; } } - + return partial_crc; } diff --git a/opal/util/crc.h b/opal/util/crc.h index 55cdd2c08c7..22228d1a789 100644 --- a/opal/util/crc.h +++ b/opal/util/crc.h @@ -11,7 +11,7 @@ * All rights reserved. * Copyright (c) 2009 IBM Corporation. All rights reserved. * Copyright (c) 2009 Los Alamos National Security, LLC. All rights - * reserved. + * reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -62,8 +62,8 @@ opal_bcopy_csum ( size_t plength = 0; return opal_bcopy_csum_partial(source, destination, copylen, csumlen, &plong, &plength); } - -OPAL_DECLSPEC unsigned int + +OPAL_DECLSPEC unsigned int opal_bcopy_uicsum_partial ( const void * source, void * destination, @@ -85,8 +85,8 @@ opal_bcopy_uicsum ( size_t plength = 0; return opal_bcopy_uicsum_partial(source, destination, copylen, csumlen, &pint, &plength); } - -OPAL_DECLSPEC unsigned long + +OPAL_DECLSPEC unsigned long opal_csum_partial ( const void * source, size_t csumlen, @@ -95,7 +95,7 @@ opal_csum_partial ( ); -static inline unsigned long +static inline unsigned long opal_csum(const void * source, size_t csumlen) { unsigned long lastPartialLong = 0; @@ -115,13 +115,13 @@ opal_csum16 (const void * source, size_t csumlen) csum += *src++; csumlen -= 2; } - /* Add leftover byte, if any */ + /* Add leftover byte, if any */ if(csumlen > 0) csum += *((unsigned char*)src); /* Fold 32-bit checksum to 16 bits */ while(csum >> 16) { - csum = (csum & 0xFFFF) + (csum >> 16); - } + csum = (csum & 0xFFFF) + (csum >> 16); + } return csum; } @@ -133,21 +133,21 @@ opal_uicsum_partial ( size_t* lastPartialLength ); -static inline unsigned int +static inline unsigned int opal_uicsum(const void * source, size_t csumlen) { unsigned int lastPartialInt = 0; size_t lastPartialLength = 0; return opal_uicsum_partial(source, csumlen, &lastPartialInt, &lastPartialLength); } - + /* * CRC Support */ void opal_initialize_crc_table(void); -OPAL_DECLSPEC unsigned int +OPAL_DECLSPEC unsigned int opal_bcopy_uicrc_partial( const void * source, void * destination, @@ -155,29 +155,29 @@ opal_bcopy_uicrc_partial( size_t crclen, unsigned int partial_crc); -static inline unsigned int +static inline unsigned int opal_bcopy_uicrc( - const void * source, + const void * source, void * destination, - size_t copylen, + size_t copylen, size_t crclen) { return opal_bcopy_uicrc_partial(source, destination, copylen, crclen, CRC_INITIAL_REGISTER); } -OPAL_DECLSPEC unsigned int +OPAL_DECLSPEC unsigned int opal_uicrc_partial( - const void * source, - size_t crclen, + const void * source, + size_t crclen, unsigned int partial_crc); -static inline unsigned int +static inline unsigned int opal_uicrc(const void * source, size_t crclen) { return opal_uicrc_partial(source, crclen, CRC_INITIAL_REGISTER); } - + END_C_DECLS #endif diff --git a/opal/util/daemon_init.c b/opal/util/daemon_init.c index a36d26e790e..6caae124224 100644 --- a/opal/util/daemon_init.c +++ b/opal/util/daemon_init.c @@ -5,16 +5,16 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -45,7 +45,7 @@ int opal_daemon_init(char *working_dir) } else if (pid != 0) { exit(0); /* parent goes bye-bye */ } - + /* child continues */ #if defined(HAVE_SETSID) setsid(); /* become session leader */ diff --git a/opal/util/daemon_init.h b/opal/util/daemon_init.h index 4055999de44..ebcf46074d8 100644 --- a/opal/util/daemon_init.h +++ b/opal/util/daemon_init.h @@ -5,14 +5,14 @@ * Copyright (c) 2004-2006 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ /** @file **/ diff --git a/opal/util/error.c b/opal/util/error.c index f49393ff8fa..677423ee3de 100644 --- a/opal/util/error.c +++ b/opal/util/error.c @@ -6,7 +6,7 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. @@ -15,22 +15,18 @@ * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ #include "opal_config.h" -#ifdef HAVE_STRING_H #include -#endif #include #include -#ifdef HAVE_STDLIB_H #include -#endif #include "opal/util/error.h" #include "opal/constants.h" @@ -78,10 +74,10 @@ opal_strerror_unknown(int errnum, char **str) for (i = 0 ; i < MAX_CONVERTERS ; ++i) { if (0 != converters[i].init) { - if (errnum < converters[i].err_base && + if (errnum < converters[i].err_base && errnum > converters[i].err_max) { asprintf(str, "Unknown error: %d (%s error %d)", - errnum, converters[i].project, + errnum, converters[i].project, errnum - converters[i].err_base); return OPAL_SUCCESS; } diff --git a/opal/util/error.h b/opal/util/error.h index 88e1093e747..19268190e3f 100644 --- a/opal/util/error.h +++ b/opal/util/error.h @@ -5,14 +5,14 @@ * Copyright (c) 2004-2006 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -62,7 +62,7 @@ OPAL_DECLSPEC const char *opal_strerror(int errnum); /** * Return string for given error message - * + * * Similar to opal_strerror, but a buffer is passed in which is filled * with a string (up to buflen - 1 characters long) containing the * error message corresponding to \c errnum. Unlike opal_strerror(), diff --git a/opal/util/fd.c b/opal/util/fd.c index 7c9a7d408e3..63558107a2a 100644 --- a/opal/util/fd.c +++ b/opal/util/fd.c @@ -3,9 +3,9 @@ * Copyright (c) 2009 Sandia National Laboratories. All rights reserved. * * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ diff --git a/opal/util/fd.h b/opal/util/fd.h index a5db9223504..d32c3a98107 100644 --- a/opal/util/fd.h +++ b/opal/util/fd.h @@ -3,9 +3,9 @@ * Copyright (c) 2009 Sandia National Laboratories. All rights reserved. * * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ diff --git a/opal/util/few.c b/opal/util/few.c index c239d4ef1d4..45e827e8f50 100644 --- a/opal/util/few.c +++ b/opal/util/few.c @@ -1,18 +1,18 @@ -/* +/* * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -45,20 +45,20 @@ int opal_few(char *argv[], int *status) /* Child execs. If it fails to exec, exit. */ - else if (0 == pid) { + else if (0 == pid) { execvp(argv[0], argv); exit(errno); } /* Parent loops waiting for the child to die. */ - else { + else { do { /* If the child exited, return */ if (pid == (ret = waitpid(pid, status, 0))) { break; - } + } /* If waitpid was interrupted, loop around again */ diff --git a/opal/util/few.h b/opal/util/few.h index 2d282a8c6ac..a680ae90572 100644 --- a/opal/util/few.h +++ b/opal/util/few.h @@ -5,14 +5,14 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -22,7 +22,7 @@ #include "opal_config.h" BEGIN_C_DECLS - + /** * Forks, execs, and waits for a subordinate program * diff --git a/opal/util/if.c b/opal/util/if.c index 1986b2381e1..472dfe9aeb8 100644 --- a/opal/util/if.c +++ b/opal/util/if.c @@ -6,7 +6,7 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2009 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2009 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. @@ -15,9 +15,9 @@ * Copyright (c) 2014 Los Alamos National Security, LLC. All rights * reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -88,9 +88,9 @@ #ifndef MIN # define MIN(a,b) ((a) < (b) ? (a) : (b)) #endif - + /* - * Look for interface by name and returns its address + * Look for interface by name and returns its address * as a dotted decimal formatted string. */ @@ -115,7 +115,7 @@ int opal_ifnametoaddr(const char* if_name, struct sockaddr* addr, int length) /* - * Look for interface by name and returns its + * Look for interface by name and returns its * corresponding opal_list index. */ @@ -139,7 +139,7 @@ int opal_ifnametoindex(const char* if_name) /* - * Look for interface by name and returns its + * Look for interface by name and returns its * corresponding kernel index. */ @@ -163,7 +163,7 @@ int16_t opal_ifnametokindex(const char* if_name) /* - * Look for interface by opal_list index and returns its + * Look for interface by opal_list index and returns its * corresponding kernel index. */ @@ -204,7 +204,7 @@ int opal_ifaddrtoname(const char* if_addr, char* if_name, int length) */ return OPAL_ERR_NOT_FOUND; } - + if (OPAL_SUCCESS != mca_base_framework_open(&opal_if_base_framework, 0)) { return OPAL_ERROR; } @@ -225,14 +225,14 @@ int opal_ifaddrtoname(const char* if_addr, char* if_name, int length) for (intf = (opal_if_t*)opal_list_get_first(&opal_if_list); intf != (opal_if_t*)opal_list_get_end(&opal_if_list); intf = (opal_if_t*)opal_list_get_next(intf)) { - + if (AF_INET == r->ai_family) { struct sockaddr_in ipv4; struct sockaddr_in *inaddr; inaddr = (struct sockaddr_in*) &intf->if_addr; memcpy (&ipv4, r->ai_addr, r->ai_addrlen); - + if (inaddr->sin_addr.s_addr == ipv4.sin_addr.s_addr) { strncpy(if_name, intf->if_name, length); freeaddrinfo (res); @@ -388,8 +388,8 @@ int opal_ifnext(int if_index) } -/* - * Lookup the interface by opal_list index and return the +/* + * Lookup the interface by opal_list index and return the * primary address assigned to the interface. */ @@ -413,8 +413,8 @@ int opal_ifindextoaddr(int if_index, struct sockaddr* if_addr, unsigned int leng } -/* - * Lookup the interface by opal_list kindex and return the +/* + * Lookup the interface by opal_list kindex and return the * primary address assigned to the interface. */ int opal_ifkindextoaddr(int if_kindex, struct sockaddr* if_addr, unsigned int length) @@ -437,8 +437,8 @@ int opal_ifkindextoaddr(int if_kindex, struct sockaddr* if_addr, unsigned int le } -/* - * Lookup the interface by opal_list index and return the +/* + * Lookup the interface by opal_list index and return the * network mask assigned to the interface. */ @@ -461,8 +461,8 @@ int opal_ifindextomask(int if_index, uint32_t* if_mask, int length) return OPAL_ERROR; } -/* - * Lookup the interface by opal_list index and return the +/* + * Lookup the interface by opal_list index and return the * MAC assigned to the interface. */ @@ -481,8 +481,8 @@ int opal_ifindextomac(int if_index, uint8_t mac[6]) return OPAL_ERROR; } -/* - * Lookup the interface by opal_list index and return the +/* + * Lookup the interface by opal_list index and return the * MTU assigned to the interface. */ @@ -501,8 +501,8 @@ int opal_ifindextomtu(int if_index, int *mtu) return OPAL_ERROR; } -/* - * Lookup the interface by opal_list index and return the +/* + * Lookup the interface by opal_list index and return the * flags assigned to the interface. */ @@ -527,7 +527,7 @@ int opal_ifindextoflags(int if_index, uint32_t* if_flags) -/* +/* * Lookup the interface by opal_list index and return * the associated name. */ @@ -552,7 +552,7 @@ int opal_ifindextoname(int if_index, char* if_name, int length) } -/* +/* * Lookup the interface by kernel index and return * the associated name. */ @@ -630,12 +630,12 @@ opal_iftupletoaddr(const char *inaddr, uint32_t *net, uint32_t *mask) { int pval, dots, rc = OPAL_SUCCESS; const char *ptr; - + /* if a mask was desired... */ if (NULL != mask) { /* set default */ *mask = 0xFFFFFFFF; - + /* if entry includes mask, split that off */ if (NULL != (ptr = strchr(inaddr, '/'))) { ptr = ptr + 1; /* skip the / */ @@ -679,24 +679,24 @@ opal_iftupletoaddr(const char *inaddr, uint32_t *net, uint32_t *mask) } } } - + /* if network addr is desired... */ if (NULL != net) { /* now assemble the address */ rc = parse_ipv4_dots(inaddr, net, &dots); } - + return rc; } -/* +/* * Determine if the specified interface is loopback */ bool opal_ifisloopback(int if_index) { opal_if_t* intf; - + if (OPAL_SUCCESS != mca_base_framework_open(&opal_if_base_framework, 0)) { return OPAL_ERROR; } @@ -800,7 +800,7 @@ void opal_ifgetaliases(char ***aliases) opal_argv_append_nosize(aliases, ipv6); } #endif - } + } } #else /* HAVE_STRUCT_SOCKADDR_IN */ @@ -809,14 +809,14 @@ void opal_ifgetaliases(char ***aliases) ethernet devices. Just make everything a no-op error call */ int -opal_ifnametoaddr(const char* if_name, +opal_ifnametoaddr(const char* if_name, struct sockaddr* if_addr, int size) { return OPAL_ERR_NOT_SUPPORTED; } int -opal_ifaddrtoname(const char* if_addr, +opal_ifaddrtoname(const char* if_addr, char* if_name, int size) { return OPAL_ERR_NOT_SUPPORTED; diff --git a/opal/util/if.h b/opal/util/if.h index 3c5156338cb..c84e9f65fbd 100644 --- a/opal/util/if.h +++ b/opal/util/if.h @@ -5,18 +5,18 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2007 Los Alamos National Security, LLC. All rights - * reserved. + * reserved. * Copyright (c) 2008 Sun Microsystems, Inc. All rights reserved. * Copyright (c) 2013 Cisco Systems, Inc. All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -51,32 +51,32 @@ BEGIN_C_DECLS (((n1) << 24) & 0xFF000000) | \ (((n2) << 16) & 0x00FF0000) | \ (((n3) << 8) & 0x0000FF00) | \ - ( (n4) & 0x000000FF) - + ( (n4) & 0x000000FF) + /** * Lookup an interface by name and return its primary address. - * + * * @param if_name (IN) Interface name * @param if_addr (OUT) Interface address buffer * @param size (IN) Interface address buffer size */ -OPAL_DECLSPEC int opal_ifnametoaddr(const char* if_name, +OPAL_DECLSPEC int opal_ifnametoaddr(const char* if_name, struct sockaddr* if_addr, int size); /** * Lookup an interface by address and return its name. - * + * * @param if_addr (IN) Interface address (hostname or dotted-quad) * @param if_name (OUT) Interface name buffer * @param size (IN) Interface name buffer size */ -OPAL_DECLSPEC int opal_ifaddrtoname(const char* if_addr, +OPAL_DECLSPEC int opal_ifaddrtoname(const char* if_addr, char* if_name, int size); /** * Lookup an interface by name and return its opal_list index. - * + * * @param if_name (IN) Interface name * @return Interface opal_list index */ @@ -84,7 +84,7 @@ OPAL_DECLSPEC int opal_ifnametoindex(const char* if_name); /** * Lookup an interface by name and return its kernel index. - * + * * @param if_name (IN) Interface name * @return Interface kernel index */ @@ -99,7 +99,7 @@ OPAL_DECLSPEC int16_t opal_ifaddrtokindex(const char* if_addr); /** * Lookup an interface by opal_list index and return its kernel index. - * + * * @param if_name (IN) Interface opal_list index * @return Interface kernel index */ @@ -113,13 +113,13 @@ OPAL_DECLSPEC int opal_ifcount(void); /** * Returns the index of the first available interface. */ -OPAL_DECLSPEC int opal_ifbegin(void); +OPAL_DECLSPEC int opal_ifbegin(void); /** * Lookup the current position in the interface list by * index and return the next available index (if it exists). * - * @param if_index Returns the next available index from the + * @param if_index Returns the next available index from the * current position. */ OPAL_DECLSPEC int opal_ifnext(int if_index); diff --git a/opal/util/keyval/keyval_lex.h b/opal/util/keyval/keyval_lex.h index a4b6fb9e149..95174ad9b39 100644 --- a/opal/util/keyval/keyval_lex.h +++ b/opal/util/keyval/keyval_lex.h @@ -6,14 +6,14 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ diff --git a/opal/util/keyval/keyval_lex.l b/opal/util/keyval/keyval_lex.l index b1ce818e1c5..e0cb6ec120d 100644 --- a/opal/util/keyval/keyval_lex.l +++ b/opal/util/keyval/keyval_lex.l @@ -10,16 +10,16 @@ * Copyright (c) 2004-2006 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2012 Los Alamos National Security, LLC. All rights * reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -69,7 +69,7 @@ CHAR [A-Za-z0-9_\-\.] [^*\n]* ; /* Eat up non '*'s */ "*"+[^*/\n]* ; /* Eat '*'s not followed by a '/' */ \n { opal_util_keyval_yynewlines++; - return OPAL_UTIL_KEYVAL_PARSE_NEWLINE; } + return OPAL_UTIL_KEYVAL_PARSE_NEWLINE; } "*"+"/" { BEGIN(INITIAL); /* Done with Block Comment */ return OPAL_UTIL_KEYVAL_PARSE_NEWLINE; } @@ -102,7 +102,7 @@ CHAR [A-Za-z0-9_\-\.] int opal_util_keyval_yylex_destroy(void) { if (NULL != YY_CURRENT_BUFFER) { - yy_delete_buffer(YY_CURRENT_BUFFER); + yy_delete_buffer(YY_CURRENT_BUFFER); #if defined(YY_CURRENT_BUFFER_LVALUE) YY_CURRENT_BUFFER_LVALUE = NULL; #else diff --git a/opal/util/keyval_parse.c b/opal/util/keyval_parse.c index 8561ecb7c6d..aef56d8c37c 100644 --- a/opal/util/keyval_parse.c +++ b/opal/util/keyval_parse.c @@ -10,7 +10,7 @@ * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. - * Copyright (c) 2015 Los Alamos National Security, LLC. All rights + * Copyright (c) 2015-2016 Los Alamos National Security, LLC. All rights * reserved. * $COPYRIGHT$ * @@ -26,9 +26,8 @@ #include "opal/util/keyval/keyval_lex.h" #include "opal/util/output.h" #include "opal/threads/mutex.h" -#ifdef HAVE_STRING_H #include -#endif /* HAVE_STRING_H */ +#include int opal_util_keyval_parse_lineno = 0; @@ -57,6 +56,8 @@ int opal_util_keyval_parse_finalize(void) { if (NULL != key_buffer) free(key_buffer); + key_buffer = NULL; + key_buffer_len = 0; OBJ_DESTRUCT(&keyval_mutex); @@ -200,53 +201,58 @@ int opal_util_keyval_save_internal_envars(opal_keyval_parse_fn_t callback) return OPAL_SUCCESS; } -static int trim_name(char **buffer, const char* prefix, const char* suffix) +static void trim_name(char *buffer, const char* prefix, const char* suffix) { - char *pchr, *echr, *tmp; - int size; - if (NULL == *buffer) { - return 1; + char *pchr, *echr; + size_t buffer_len; + + if (NULL == buffer) { + return; } - pchr = *buffer; + + buffer_len = strlen (buffer); + + pchr = buffer; if (NULL != prefix) { - pchr = strstr(*buffer, prefix); - if (NULL != pchr) { - pchr += strlen(prefix); - } else { - pchr = *buffer; + size_t prefix_len = strlen (prefix); + + if (0 == strncmp (buffer, prefix, prefix_len)) { + pchr += prefix_len; } } + /* trim spaces at the beginning */ - while (' ' == *pchr || '\t' == *pchr) { + while (isspace (*pchr)) { pchr++; } + /* trim spaces at the end */ - echr = *buffer+strlen(*buffer)-1; - while (' ' == *echr || '\t' == *echr || '\n' == *echr) { + echr = buffer + buffer_len; + while (echr > buffer && isspace (*(echr - 1))) { echr--; } - echr++; - *echr = '\0'; - if (NULL != suffix) { - if (!strncmp(echr-strlen(suffix), suffix, strlen(suffix))) { - echr -= strlen(suffix)+1; - while (' ' == *echr || '\t' == *echr) { + echr[0] = '\0'; + + if (NULL != suffix && (uintptr_t) (echr - buffer) > strlen (suffix)) { + size_t suffix_len = strlen (suffix); + + echr -= suffix_len; + + if (0 == strncmp (echr, suffix, strlen(suffix))) { + do { echr--; - } - echr++; - *echr = '\0'; + } while (isspace (*echr)); + echr[1] = '\0'; } } - size = strlen(pchr)+1; - tmp = malloc(size); - strncpy(tmp, pchr, size); - *buffer = realloc(*buffer, size); - strncpy(*buffer, tmp, size); - free(tmp); - return 0; + + if (buffer != pchr) { + /* move the trimmed string to the beginning of the buffer */ + memmove (buffer, pchr, strlen (pchr) + 1); + } } -static int save_param_name(const char* prefix, const char* suffix) +static int save_param_name (void) { if (key_buffer_len < strlen(opal_util_keyval_yytext) + 1) { char *tmp; @@ -261,8 +267,8 @@ static int save_param_name(const char* prefix, const char* suffix) key_buffer = tmp; } - strncpy(key_buffer, opal_util_keyval_yytext, key_buffer_len); - trim_name(&key_buffer, prefix, suffix); + strncpy (key_buffer, opal_util_keyval_yytext, key_buffer_len); + return OPAL_SUCCESS; } @@ -309,18 +315,26 @@ static int parse_line_new(opal_keyval_parse_state_t first_val) { opal_keyval_parse_state_t val; char *tmp; + int rc; val = first_val; while (OPAL_UTIL_KEYVAL_PARSE_NEWLINE != val && OPAL_UTIL_KEYVAL_PARSE_DONE != val) { + rc = save_param_name (); + if (OPAL_SUCCESS != rc) { + return rc; + } + if (OPAL_UTIL_KEYVAL_PARSE_MCAVAR == val) { - save_param_name("-mca", NULL); + trim_name (key_buffer, "-mca", NULL); + trim_name (key_buffer, "--mca", NULL); + val = opal_util_keyval_yylex(); if (OPAL_UTIL_KEYVAL_PARSE_VALUE == val) { if (NULL != opal_util_keyval_yytext) { tmp = strdup(opal_util_keyval_yytext); if ('\'' == tmp[0] || '\"' == tmp[0]) { - trim_name(&tmp, "\'", "\'"); - trim_name(&tmp, "\"", "\""); + trim_name (tmp, "\'", "\'"); + trim_name (tmp, "\"", "\""); } keyval_callback(key_buffer, tmp); free(tmp); @@ -330,7 +344,9 @@ static int parse_line_new(opal_keyval_parse_state_t first_val) return OPAL_ERROR; } } else if (OPAL_UTIL_KEYVAL_PARSE_ENVEQL == val) { - save_param_name("-x", "="); + trim_name (key_buffer, "-x", "="); + trim_name (key_buffer, "--x", NULL); + val = opal_util_keyval_yylex(); if (OPAL_UTIL_KEYVAL_PARSE_VALUE == val) { add_to_env_str(key_buffer, opal_util_keyval_yytext); @@ -339,7 +355,8 @@ static int parse_line_new(opal_keyval_parse_state_t first_val) return OPAL_ERROR; } } else if (OPAL_UTIL_KEYVAL_PARSE_ENVVAR == val) { - save_param_name("-x", "="); + trim_name (key_buffer, "-x", "="); + trim_name (key_buffer, "--x", NULL); add_to_env_str(key_buffer, NULL); } else { /* we got something unexpected. Bonk! */ diff --git a/opal/util/keyval_parse.h b/opal/util/keyval_parse.h index 10219e229df..f0abe56db7e 100644 --- a/opal/util/keyval_parse.h +++ b/opal/util/keyval_parse.h @@ -5,14 +5,14 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -45,7 +45,7 @@ typedef void (*opal_keyval_parse_fn_t)(const char *key, const char *value); * called exactly once. In a multithreaded context, calls to * opal_util_keyval_parse() will serialize multiple calls. */ -OPAL_DECLSPEC int opal_util_keyval_parse(const char *filename, +OPAL_DECLSPEC int opal_util_keyval_parse(const char *filename, opal_keyval_parse_fn_t callback); OPAL_DECLSPEC int opal_util_keyval_parse_init(void); diff --git a/opal/util/malloc.c b/opal/util/malloc.c index fd05ec45dba..11d612e7391 100644 --- a/opal/util/malloc.c +++ b/opal/util/malloc.c @@ -5,14 +5,14 @@ * Copyright (c) 2004-2008 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -92,7 +92,7 @@ void *opal_malloc(size_t size, const char *file, int line) #if OPAL_ENABLE_DEBUG if (opal_malloc_debug_level > 1) { if (size <= 0) { - opal_output(opal_malloc_output, "Request for %ld bytes (%s, %d)", + opal_output(opal_malloc_output, "Request for %ld bytes (%s, %d)", (long) size, file, line); } } @@ -103,7 +103,7 @@ void *opal_malloc(size_t size, const char *file, int line) #if OPAL_ENABLE_DEBUG if (opal_malloc_debug_level > 0) { if (NULL == addr) { - opal_output(opal_malloc_output, + opal_output(opal_malloc_output, "Request for %ld bytes failed (%s, %d)", (long) size, file, line); } @@ -123,7 +123,7 @@ void *opal_calloc(size_t nmembers, size_t size, const char *file, int line) if (opal_malloc_debug_level > 1) { if (size <= 0) { opal_output(opal_malloc_output, - "Request for %ld zeroed elements of size %ld (%s, %d)", + "Request for %ld zeroed elements of size %ld (%s, %d)", (long) nmembers, (long) size, file, line); } } @@ -132,7 +132,7 @@ void *opal_calloc(size_t nmembers, size_t size, const char *file, int line) #if OPAL_ENABLE_DEBUG if (opal_malloc_debug_level > 0) { if (NULL == addr) { - opal_output(opal_malloc_output, + opal_output(opal_malloc_output, "Request for %ld zeroed elements of size %ld failed (%s, %d)", (long) nmembers, (long) size, file, line); } @@ -152,11 +152,11 @@ void *opal_realloc(void *ptr, size_t size, const char *file, int line) if (opal_malloc_debug_level > 1) { if (size <= 0) { if (NULL == ptr) { - opal_output(opal_malloc_output, - "Realloc NULL for %ld bytes (%s, %d)", + opal_output(opal_malloc_output, + "Realloc NULL for %ld bytes (%s, %d)", (long) size, file, line); } else { - opal_output(opal_malloc_output, "Realloc %p for %ld bytes (%s, %d)", + opal_output(opal_malloc_output, "Realloc %p for %ld bytes (%s, %d)", ptr, (long) size, file, line); } } @@ -166,7 +166,7 @@ void *opal_realloc(void *ptr, size_t size, const char *file, int line) #if OPAL_ENABLE_DEBUG if (opal_malloc_debug_level > 0) { if (NULL == addr) { - opal_output(opal_malloc_output, + opal_output(opal_malloc_output, "Realloc %p for %ld bytes failed (%s, %d)", ptr, (long) size, file, line); } diff --git a/opal/util/malloc.h b/opal/util/malloc.h index efeaf981b24..1ea177f6286 100644 --- a/opal/util/malloc.h +++ b/opal/util/malloc.h @@ -5,14 +5,14 @@ * Copyright (c) 2004-2008 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ diff --git a/opal/util/net.c b/opal/util/net.c index c519fb99fa7..190336ca7d3 100644 --- a/opal/util/net.c +++ b/opal/util/net.c @@ -5,20 +5,20 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2007 Los Alamos National Security, LLC. All rights - * reserved. + * reserved. * Copyright (c) 2009-2015 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2013 Intel, Inc. All rights reserved. + * Copyright (c) 2013-2015 Intel, Inc. All rights reserved. * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -162,12 +162,12 @@ opal_net_init(void) for( i = 0; i < count; i++ ) { arg = args[i]; - sscanf( arg, "%u.%u.%u.%u/%u", &a, &b, &c, &d, &bits ); + (void)sscanf( arg, "%u.%u.%u.%u/%u", &a, &b, &c, &d, &bits ); if( (a > 255) || (b > 255) || (c > 255) || (d > 255) || (bits > 32) ) { if (0 == found_bad) { - opal_show_help("help-opal-util.txt", + opal_show_help("help-opal-util.txt", "malformed net_private_ipv4", true, args[i]); found_bad = 1; @@ -247,7 +247,7 @@ opal_net_islocalhost(const struct sockaddr *addr) bool -opal_net_samenetwork(const struct sockaddr *addr1, +opal_net_samenetwork(const struct sockaddr *addr1, const struct sockaddr *addr2, uint32_t plen) { @@ -256,7 +256,7 @@ opal_net_samenetwork(const struct sockaddr *addr1, if(addr1->sa_family != addr2->sa_family) { return false; /* address families must be equal */ } - + switch (addr1->sa_family) { case AF_INET: { @@ -336,17 +336,17 @@ opal_net_addr_isipv4public(const struct sockaddr *addr) { const struct sockaddr_in *inaddr = (struct sockaddr_in*) addr; int i; - + if( NULL == private_ipv4 ) { return true; } - + for( i = 0; private_ipv4[i].addr != 0; i++ ) { if( private_ipv4[i].addr == (inaddr->sin_addr.s_addr & opal_net_prefix2netmask(private_ipv4[i].netmask_bits)) ) return false; } - + } return true; default: @@ -354,7 +354,7 @@ opal_net_addr_isipv4public(const struct sockaddr *addr) "unhandled sa_family %d passed to opal_net_addr_isipv4public\n", addr->sa_family); } - + return false; } @@ -379,13 +379,12 @@ opal_net_get_hostname(const struct sockaddr *addr) addrlen = sizeof (struct sockaddr_in); break; case AF_INET6: -#if defined( __NetBSD__) +#if defined( __NetBSD__) /* hotfix for netbsd: on my netbsd machine, getnameinfo returns an unkown error code. */ if(NULL == inet_ntop(AF_INET6, &((struct sockaddr_in6*) addr)->sin6_addr, name, NI_MAXHOST)) { opal_output(0, "opal_sockaddr2str failed with error code %d", errno); - free(name); return NULL; } return name; @@ -394,7 +393,6 @@ opal_net_get_hostname(const struct sockaddr *addr) #endif break; default: - free(name); return NULL; } @@ -405,7 +403,6 @@ opal_net_get_hostname(const struct sockaddr *addr) int err = errno; opal_output (0, "opal_sockaddr2str failed:%s (return code %i)\n", gai_strerror(err), error); - free (name); return NULL; } /* strip any trailing % data as it isn't pertinent */ @@ -468,7 +465,7 @@ opal_net_islocalhost(const struct sockaddr *addr) bool -opal_net_samenetwork(const struct sockaddr *addr1, +opal_net_samenetwork(const struct sockaddr *addr1, const struct sockaddr *addr2, uint32_t prefixlen) { diff --git a/opal/util/net.h b/opal/util/net.h index a1680003c19..27dad966625 100644 --- a/opal/util/net.h +++ b/opal/util/net.h @@ -5,16 +5,16 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2007 Los Alamos National Security, LLC. All rights - * reserved. + * reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ diff --git a/opal/util/numtostr.c b/opal/util/numtostr.c index 8dc37e95b1c..349378b286a 100644 --- a/opal/util/numtostr.c +++ b/opal/util/numtostr.c @@ -5,14 +5,14 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -33,13 +33,13 @@ opal_ltostr(long num) buf = (char*) malloc(sizeof(char) * buflen); if (NULL == buf) return NULL; - + ret = snprintf(buf, buflen, "%ld", num); if (ret < 0) { free(buf); return NULL; } - + return buf; } @@ -51,15 +51,15 @@ opal_dtostr(double num) int buflen = sizeof(long) * 8; char *buf = NULL; int ret = 0; - + buf = (char*) malloc(sizeof(char) * buflen); if (NULL == buf) return NULL; - + ret = snprintf(buf, buflen, "%f", num); if (ret < 0) { free(buf); return NULL; } - + return buf; } diff --git a/opal/util/numtostr.h b/opal/util/numtostr.h index c3ce12c45f2..c2e517f9fd0 100644 --- a/opal/util/numtostr.h +++ b/opal/util/numtostr.h @@ -5,14 +5,14 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ /** @@ -26,7 +26,7 @@ /** * Convert a long integer to a char* string. The returned buffer is * allocated by calling malloc() and must be freed by the caller. - * + * * @param num (IN) Input number * @return String containing number (NULL on failure) */ diff --git a/opal/util/opal_environ.c b/opal/util/opal_environ.c index 98f3f833bc4..f1df671b53d 100644 --- a/opal/util/opal_environ.c +++ b/opal/util/opal_environ.c @@ -5,7 +5,7 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. @@ -14,9 +14,9 @@ * reserved. * Copyright (c) 2014 Intel, Inc. All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -31,7 +31,7 @@ #include "opal/util/opal_environ.h" #include "opal/constants.h" -#define OPAL_DEFAULT_TMPDIR "/tmp" +#define OPAL_DEFAULT_TMPDIR "/tmp" /* * Merge two environ-like char arrays, ensuring that there are no diff --git a/opal/util/opal_environ.h b/opal/util/opal_environ.h index c35dc1bae5c..094266c40d8 100644 --- a/opal/util/opal_environ.h +++ b/opal/util/opal_environ.h @@ -5,16 +5,16 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2007-2013 Los Alamos National Security, LLC. All rights - * reserved. + * reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ diff --git a/opal/util/opal_getcwd.c b/opal/util/opal_getcwd.c index 3f0308fcbb0..e529c008e76 100644 --- a/opal/util/opal_getcwd.c +++ b/opal/util/opal_getcwd.c @@ -1,9 +1,9 @@ /* * Copyright (c) 2007 Cisco Systems, Inc. All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -20,9 +20,7 @@ #ifdef HAVE_UNISTD_H #include #endif -#ifdef HAVE_STRING_H #include -#endif #include "opal/util/basename.h" #include "opal/util/opal_getcwd.h" @@ -66,7 +64,7 @@ int opal_getcwd(char *buf, size_t size) /* If we can't stat() what getcwd() gave us, give up */ if (0 != stat(cwd, &a)) { return OPAL_ERR_IN_ERRNO; - } + } /* If we can't stat() $PWD, then $PWD could just be stale -- so ignore it. */ else if (0 != stat(pwd, &b)) { diff --git a/opal/util/opal_getcwd.h b/opal/util/opal_getcwd.h index 12d2cbe9dbd..1d5533190a5 100644 --- a/opal/util/opal_getcwd.h +++ b/opal/util/opal_getcwd.h @@ -1,9 +1,9 @@ /* * Copyright (c) 2007 Cisco Systems, Inc. All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ diff --git a/opal/util/opal_pty.c b/opal/util/opal_pty.c index 0a29bd068f4..014eacb8c72 100644 --- a/opal/util/opal_pty.c +++ b/opal/util/opal_pty.c @@ -5,14 +5,14 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ /*- @@ -71,9 +71,7 @@ # include #endif #include -#ifdef HAVE_STRING_H # include -#endif #ifdef HAVE_GRP_H #include #endif @@ -110,7 +108,7 @@ int opal_openpty(int *amaster, int *aslave, char *name, #elif defined(HAVE_OPENPTY) -int opal_openpty(int *amaster, int *aslave, char *name, +int opal_openpty(int *amaster, int *aslave, char *name, struct termios *termp, struct winsize *winp) { return openpty(amaster, aslave, name, termp, winp); diff --git a/opal/util/opal_pty.h b/opal/util/opal_pty.h index a06ca137b81..4bcb7a97f9d 100644 --- a/opal/util/opal_pty.h +++ b/opal/util/opal_pty.h @@ -5,14 +5,14 @@ * Copyright (c) 2004-2006 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -37,9 +37,9 @@ BEGIN_C_DECLS -#if OPAL_ENABLE_PTY_SUPPORT +#if OPAL_ENABLE_PTY_SUPPORT -OPAL_DECLSPEC int opal_openpty(int *amaster, int *aslave, char *name, +OPAL_DECLSPEC int opal_openpty(int *amaster, int *aslave, char *name, struct termios *termp, struct winsize *winp); #else diff --git a/opal/util/os_dirpath.c b/opal/util/os_dirpath.c index 768c79a7e44..881eb424c8b 100644 --- a/opal/util/os_dirpath.c +++ b/opal/util/os_dirpath.c @@ -5,16 +5,16 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -115,7 +115,7 @@ int opal_os_dirpath_create(const char *path, const mode_t mode) /* Now that we finally have the name to check, check it. Create it if it doesn't exist. */ ret = mkdir(tmp, mode); - if ((0 > ret && EEXIST != errno) || 0 != stat(tmp, &buf)) { + if ((0 > ret && EEXIST != errno) || 0 != stat(tmp, &buf)) { opal_output(0, "opal_os_dirpath_create: " "Error: Unable to create the sub-directory (%s) of (%s), mkdir failed [%d]\n", @@ -133,7 +133,7 @@ int opal_os_dirpath_create(const char *path, const mode_t mode) return OPAL_SUCCESS; } -/** +/** * This function attempts to remove a directory along with all the * files in it. If the recursive variable is non-zero, then it will * try to recursively remove all directories. If provided, the @@ -150,7 +150,7 @@ int opal_os_dirpath_destroy(const char *path, DIR *dp; struct dirent *ep; char *filenm; -#ifndef HAVE_STRUCT_DIRENT_D_TYPE +#ifndef HAVE_STRUCT_DIRENT_D_TYPE struct stat buf; #endif @@ -180,13 +180,13 @@ int opal_os_dirpath_destroy(const char *path, (0 == strcmp(ep->d_name, "..")) ) { continue; } - + /* Check to see if it is a directory */ is_dir = false; /* Create a pathname. This is not always needed, but it makes * for cleaner code just to create it here. Note that we are - * allocating memory here, so we need to free it later on. + * allocating memory here, so we need to free it later on. */ filenm = opal_os_path(false, path, ep->d_name, NULL); #ifdef HAVE_STRUCT_DIRENT_D_TYPE @@ -198,7 +198,7 @@ int opal_os_dirpath_destroy(const char *path, if (rc < 0 || S_ISDIR(buf.st_mode)) { is_dir = true; } -#endif /* have dirent.d_type */ +#endif /* have dirent.d_type */ /* * If not recursively decending, then if we find a directory then fail @@ -242,12 +242,12 @@ int opal_os_dirpath_destroy(const char *path, free(filenm); } } - + /* Done with this directory */ closedir(dp); - + cleanup: - + /* * If the directory is empty, them remove it */ @@ -261,7 +261,7 @@ int opal_os_dirpath_destroy(const char *path, bool opal_os_dirpath_is_empty(const char *path ) { DIR *dp; struct dirent *ep; - + if (NULL != path) { /* protect against error */ dp = opendir(path); if (NULL != dp) { diff --git a/opal/util/os_dirpath.h b/opal/util/os_dirpath.h index 64fc8f02d4a..5564a77370a 100644 --- a/opal/util/os_dirpath.h +++ b/opal/util/os_dirpath.h @@ -5,14 +5,14 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -24,7 +24,7 @@ * access permissions. Existing directories within the tree are left * untouched - however, if they do not permit the user to create a directory * within them, the function will return an error condition. - * + * * If the specified full path name already exists, the * opal_os_dirpath_create() function will check to ensure that * the final directory in the tree has at least the specified access permission. In other @@ -50,7 +50,7 @@ BEGIN_C_DECLS /** * @param path A pointer to a string that contains the path name to be built. * @param mode A mode_t bit mask that specifies the access permissions for the - * directories being constructed. + * directories being constructed. * @retval OPAL_SUCCESS If the directory tree has been successfully created with * the specified access permissions. * @retval OPAL_ERROR If the directory tree could not be created with the @@ -75,7 +75,7 @@ OPAL_DECLSPEC bool opal_os_dirpath_is_empty(const char *path); * @param path A pointer to a string that contains the path name to be checked. * @param mode A mode_t bit mask that specifies the access permissions for the * directory to be accessed. - * + * * @retval OPAL_SUCCESS If directory exists, and permissions match * @retval OPAL_ERR_NOT_FOUND If directory does not exist * @retval OPAL_ERROR If directory exists, and permissions do not match @@ -88,7 +88,7 @@ OPAL_DECLSPEC int opal_os_dirpath_access(const char *path, const mode_t mode ); * * @param root A pointer to a string that contains the base path name (e.g., /tmp/foo from /tmp/foo/bar) * @param path A pointer to a string that contains the file or directory (e.g., bar from /tmp/foo/bar) - * + * * @retval true Allow the program to remove the file/directory * @retval false Do not allow the program to remove the file/directory */ @@ -100,7 +100,7 @@ typedef bool (*opal_os_dirpath_destroy_callback_fn_t)(const char *root, const ch * @param path A pointer to a string that contains the path name to be destroyed * @param recursive Recursively desend the directory removing all files and directories. * if set to 'false' then the directory must be empty to succeed. - * @param cbfunc A function that will be called before removing a file or directory. + * @param cbfunc A function that will be called before removing a file or directory. * If NULL, then assume all remove. * * @retval OPAL_SUCCESS If the directory was successfully removed or removed to the @@ -109,8 +109,8 @@ typedef bool (*opal_os_dirpath_destroy_callback_fn_t)(const char *root, const ch * @retval OPAL_ERROR If the directory cannnot be removed, accessed properly, or contains * directories that could not be removed.. */ -OPAL_DECLSPEC int opal_os_dirpath_destroy(const char *path, - bool recursive, +OPAL_DECLSPEC int opal_os_dirpath_destroy(const char *path, + bool recursive, opal_os_dirpath_destroy_callback_fn_t cbfunc); END_C_DECLS diff --git a/opal/util/os_path.c b/opal/util/os_path.c index deee807d021..251a6107fcf 100644 --- a/opal/util/os_path.c +++ b/opal/util/os_path.c @@ -5,14 +5,14 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -70,7 +70,7 @@ char *opal_os_path(bool relative, ...) if(relative) { total_length++; } - + if (total_length > OPAL_PATH_MAX) { /* path length is too long - reject it */ return(NULL); } diff --git a/opal/util/os_path.h b/opal/util/os_path.h index 89be7a450e3..4c2db908f4a 100644 --- a/opal/util/os_path.h +++ b/opal/util/os_path.h @@ -5,14 +5,14 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -48,11 +48,11 @@ BEGIN_C_DECLS -/** +/** * @param relative A boolean that specifies if the path name is to be constructed * relative to the current directory or as an absolute path. If no path * elements are included in the function call, then the function returns - * "." for a relative path name and "" - + * "." for a relative path name and "" - * the top of the directory tree - for an absolute path name. * @param elem1,elem2,... A variable number of (char *)path_elements * can be provided to the function, terminated by a NULL value. These diff --git a/opal/util/output.c b/opal/util/output.c index e964d9d039a..55ce9229f07 100644 --- a/opal/util/output.c +++ b/opal/util/output.c @@ -6,7 +6,7 @@ * Copyright (c) 2004-2008 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2006 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2006 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2006 The Regents of the University of California. * All rights reserved. @@ -15,10 +15,11 @@ * reserved. * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. + * Copyright (c) 2017 IBM Corporation. All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -71,7 +72,7 @@ typedef struct { char *ldi_suffix; int ldi_suffix_len; - + bool ldi_stdout; bool ldi_stderr; @@ -89,7 +90,7 @@ static void construct(opal_object_t *stream); static int do_open(int output_id, opal_output_stream_t * lds); static int open_file(int i); static void free_descriptor(int output_id); -static int make_string(char **no_newline_string, output_desc_t *ldi, +static int make_string(char **no_newline_string, output_desc_t *ldi, const char *format, va_list arglist); static int output(int output_id, const char *format, va_list arglist); @@ -127,7 +128,7 @@ OBJ_CLASS_INSTANCE(opal_output_stream_t, opal_object_t, construct, NULL); bool opal_output_init(void) { int i; - char hostname[32]; + char hostname[OPAL_MAXHOSTNAMELEN]; char *str; if (initialized) { @@ -174,10 +175,15 @@ bool opal_output_init(void) verbose.lds_want_stderr = false; verbose.lds_want_stdout = false; } else { - verbose.lds_want_stderr = true; + str = getenv("OPAL_OUTPUT_INTERNAL_TO_STDOUT"); + if (NULL != str && str[0] == '1') { + verbose.lds_want_stdout = true; + } + else { + verbose.lds_want_stderr = true; + } } gethostname(hostname, sizeof(hostname)); - hostname[sizeof(hostname)-1] = '\0'; asprintf(&verbose.lds_prefix, "[%s:%05d] ", hostname, getpid()); for (i = 0; i < OPAL_OUTPUT_MAX_STREAMS; ++i) { @@ -254,7 +260,7 @@ bool opal_output_switch(int output_id, bool enable) void opal_output_reopen_all(void) { char *str; - char hostname[32]; + char hostname[OPAL_MAXHOSTNAMELEN]; str = getenv("OPAL_OUTPUT_STDERR_FD"); if (NULL != str) { @@ -281,7 +287,7 @@ void opal_output_reopen_all(void) break; } - /* + /* * set this to zero to ensure that opal_output_open will * return this same index as the output stream id */ @@ -303,7 +309,7 @@ void opal_output_reopen_all(void) lds.lds_want_file_append = true; lds.lds_file_suffix = info[i].ldi_file_suffix; - /* + /* * call opal_output_open to open the stream. The return value * is guaranteed to be i. So we can ignore it. */ @@ -335,7 +341,7 @@ void opal_output_close(int output_id) free_descriptor(output_id); /* If no one has the syslog open, we should close it */ - + for (i = 0; i < OPAL_OUTPUT_MAX_STREAMS; ++i) { if (info[i].ldi_used && info[i].ldi_syslog) { break; @@ -385,7 +391,7 @@ void opal_output_verbose(int level, int output_id, const char *format, ...) /* * Send a message to a stream if the verbose level is high enough */ -void opal_output_vverbose(int level, int output_id, const char *format, +void opal_output_vverbose(int level, int output_id, const char *format, va_list arglist) { if (output_id >= 0 && output_id < OPAL_OUTPUT_MAX_STREAMS && @@ -421,7 +427,7 @@ char *opal_output_string(int level, int output_id, const char *format, ...) /* * Send a message to a string if the verbose level is high enough */ -char *opal_output_vstring(int level, int output_id, const char *format, +char *opal_output_vstring(int level, int output_id, const char *format, va_list arglist) { int rc; @@ -649,7 +655,7 @@ static int do_open(int output_id, opal_output_stream_t * lds) info[i].ldi_suffix = NULL; info[i].ldi_suffix_len = 0; } - + if (opal_output_redirected_to_syslog) { /* since all is redirected to syslog, ensure * we don't duplicate the output to the std places @@ -683,6 +689,18 @@ static int do_open(int output_id, opal_output_stream_t * lds) info[i].ldi_file_num_lines_lost = 0; } + /* Special case: output_id == 0 == verbose_stream + * This is the verbose stream, so update the internal 'verbose_stream' + * to match the parameters set in the info[i] + */ + if( verbose_stream == i ) { + verbose.lds_want_syslog = info[i].ldi_syslog; + verbose.lds_syslog_priority = info[i].ldi_syslog_priority; + verbose.lds_syslog_ident = info[i].ldi_syslog_ident; + verbose.lds_want_stdout = info[i].ldi_stdout; + verbose.lds_want_stderr = info[i].ldi_stderr; + } + /* Don't open a file in the session directory now -- do that lazily * so that if there's no output, we don't have an empty file */ @@ -804,7 +822,7 @@ static void free_descriptor(int output_id) free(ldi->ldi_suffix); } ldi->ldi_suffix = NULL; - + if (NULL != ldi->ldi_file_suffix) { free(ldi->ldi_file_suffix); } @@ -818,7 +836,7 @@ static void free_descriptor(int output_id) } -static int make_string(char **no_newline_string, output_desc_t *ldi, +static int make_string(char **no_newline_string, output_desc_t *ldi, const char *format, va_list arglist) { size_t len, total_len; @@ -862,7 +880,7 @@ static int make_string(char **no_newline_string, output_desc_t *ldi, snprintf(temp_str, temp_str_len, "%s%s%s\n", ldi->ldi_prefix, *no_newline_string, ldi->ldi_suffix); } else { - snprintf(temp_str, temp_str_len, "%s%s%s", ldi->ldi_prefix, + snprintf(temp_str, temp_str_len, "%s%s%s", ldi->ldi_prefix, *no_newline_string, ldi->ldi_suffix); } } else if (NULL != ldi->ldi_prefix) { @@ -870,7 +888,7 @@ static int make_string(char **no_newline_string, output_desc_t *ldi, snprintf(temp_str, temp_str_len, "%s%s\n", ldi->ldi_prefix, *no_newline_string); } else { - snprintf(temp_str, temp_str_len, "%s%s", ldi->ldi_prefix, + snprintf(temp_str, temp_str_len, "%s%s", ldi->ldi_prefix, *no_newline_string); } } else if (NULL != ldi->ldi_suffix) { @@ -878,7 +896,7 @@ static int make_string(char **no_newline_string, output_desc_t *ldi, snprintf(temp_str, temp_str_len, "%s%s\n", *no_newline_string, ldi->ldi_suffix); } else { - snprintf(temp_str, temp_str_len, "%s%s", + snprintf(temp_str, temp_str_len, "%s%s", *no_newline_string, ldi->ldi_suffix); } } else { @@ -888,10 +906,10 @@ static int make_string(char **no_newline_string, output_desc_t *ldi, snprintf(temp_str, temp_str_len, "%s", *no_newline_string); } } - + return OPAL_SUCCESS; } - + /* * Do the actual output. Take a va_list so that we can be called from * multiple different places, even functions that took "..." as input @@ -936,15 +954,15 @@ static int output(int output_id, const char *format, va_list arglist) /* stdout output */ if (ldi->ldi_stdout) { - write(fileno(stdout), out, (int)strlen(out)); + write(fileno(stdout), out, (int)strlen(out)); fflush(stdout); } /* stderr output */ if (ldi->ldi_stderr) { - write((-1 == default_stderr_fd) ? + write((-1 == default_stderr_fd) ? fileno(stderr) : default_stderr_fd, - out, (int)strlen(out)); + out, (int)strlen(out)); fflush(stderr); } diff --git a/opal/util/output.h b/opal/util/output.h index c946cef40e7..b8f7cd372fe 100644 --- a/opal/util/output.h +++ b/opal/util/output.h @@ -5,16 +5,16 @@ * Copyright (c) 2004-2006 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2007-2011 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2010 Oracle and/or its affiliates. All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -65,9 +65,7 @@ #include "opal_config.h" -#ifdef HAVE_STDARG_H #include -#endif #include "opal/class/opal_object.h" @@ -96,7 +94,7 @@ OPAL_DECLSPEC extern bool opal_output_redirected_to_syslog; OPAL_DECLSPEC extern int opal_output_redirected_syslog_pri; /** - * \class opal_output_stream_t + * \class opal_output_stream_t * * Structure used to request the opening of a OPAL output stream. A * pointer to this structure is passed to opal_output_open() to tell @@ -126,7 +124,7 @@ struct opal_output_stream_t { * more output and diagnostics should be displayed. */ int lds_verbose_level; - + /** * When opal_output_stream_t::lds_want_syslog is true, this field is * examined to see what priority output from the stream should be @@ -140,7 +138,7 @@ struct opal_output_stream_t { /** * When opal_output_stream_t::lds_want_syslog is true, this field is * examined to see what ident value should be passed to openlog(3). - * + * * If a NULL value is given, the string "opal" is used. */ #if !defined(__WINDOWS__) @@ -150,7 +148,7 @@ struct opal_output_stream_t { #else HANDLE lds_syslog_ident; #endif /* !defined(__WINDOWS__) */ - + /** * String prefix added to all output on the stream. * @@ -160,7 +158,7 @@ struct opal_output_stream_t { * to an internal structure in the call to opal_output_open()! */ char *lds_prefix; - + /** * String suffix added to all output on the stream. * @@ -208,7 +206,7 @@ struct opal_output_stream_t { * If this field is true, stream output is sent to stderr. */ bool lds_want_stderr; - + /** * Whether to send stream output to a file or not. * @@ -246,7 +244,7 @@ struct opal_output_stream_t { /** * Convenience typedef - */ + */ typedef struct opal_output_stream_t opal_output_stream_t; /** @@ -265,7 +263,7 @@ struct opal_output_stream_t { * and has a verbose level of 0. */ OPAL_DECLSPEC bool opal_output_init(void); - + /** * Shut down the output stream system. * @@ -311,7 +309,7 @@ struct opal_output_stream_t { * new stream with a specific stream handle. */ OPAL_DECLSPEC int opal_output_reopen(int output_id, opal_output_stream_t *lds); - + /** * Enables and disables output streams. * @@ -380,7 +378,7 @@ struct opal_output_stream_t { * writing to it. */ OPAL_DECLSPEC void opal_output(int output_id, const char *format, ...) __opal_attribute_format__(__printf__, 2, 3); - + /** * Send output to a stream only if the passed verbosity level is * high enough. @@ -409,16 +407,16 @@ struct opal_output_stream_t { * * @see opal_output_set_verbosity() */ - OPAL_DECLSPEC void opal_output_verbose(int verbose_level, int output_id, + OPAL_DECLSPEC void opal_output_verbose(int verbose_level, int output_id, const char *format, ...) __opal_attribute_format__(__printf__, 3, 4); /** * Same as opal_output_verbose(), but takes a va_list form of varargs. */ - OPAL_DECLSPEC void opal_output_vverbose(int verbose_level, int output_id, + OPAL_DECLSPEC void opal_output_vverbose(int verbose_level, int output_id, const char *format, va_list ap) __opal_attribute_format__(__printf__, 3, 0); - /** + /** * Send output to a string if the verbosity level is high enough. * * @param output_id Stream id returned from opal_output_open(). @@ -432,13 +430,13 @@ struct opal_output_stream_t { * level is not high enough, NULL is returned. The caller is * responsible for free()'ing the returned string. */ - OPAL_DECLSPEC char *opal_output_string(int verbose_level, int output_id, + OPAL_DECLSPEC char *opal_output_string(int verbose_level, int output_id, const char *format, ...) __opal_attribute_format__(__printf__, 3, 4); /** * Same as opal_output_string, but accepts a va_list form of varargs. */ - OPAL_DECLSPEC char *opal_output_vstring(int verbose_level, int output_id, + OPAL_DECLSPEC char *opal_output_vstring(int verbose_level, int output_id, const char *format, va_list ap) __opal_attribute_format__(__printf__, 3, 0); /** @@ -504,7 +502,7 @@ struct opal_output_stream_t { const char *prefix, char **olddir, char **oldprefix); - + #if OPAL_ENABLE_DEBUG /** * Main macro for use in sending debugging output to output streams; @@ -514,8 +512,8 @@ struct opal_output_stream_t { * @see opal_output() */ #define OPAL_OUTPUT(a) opal_output a - - /** + + /** * Macro for use in sending debugging output to the output * streams. Will be "compiled out" when OPAL is configured * without --enable-debug. @@ -532,8 +530,8 @@ struct opal_output_stream_t { * @see opal_output() */ #define OPAL_OUTPUT(a) - - /** + + /** * Macro for use in sending debugging output to the output * streams. Will be "compiled out" when OPAL is configured * without --enable-debug. diff --git a/opal/util/path.c b/opal/util/path.c index 0bbe3eaa482..2b4bfa22ea8 100644 --- a/opal/util/path.c +++ b/opal/util/path.c @@ -5,7 +5,7 @@ * Copyright (c) 2004-2007 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. @@ -14,10 +14,13 @@ * Copyright (c) 2012-2013 Los Alamos National Security, LLC. * All rights reserved. * Copyright (c) 2014 Intel, Inc. All rights reserved. + * Copyright (c) 2016 University of Houston. All rights reserved. + * Copyright (c) 2016 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -108,10 +111,10 @@ bool opal_path_is_absolute( const char *path ) */ char *opal_path_find(char *fname, char **pathv, int mode, char **envv) { - char *fullpath; - char *delimit; - char *env; - char *pfix; + char *fullpath; + char *delimit; + char *env; + char *pfix; int i; /* If absolute path is given, return it without searching. */ @@ -167,10 +170,10 @@ char *opal_path_find(char *fname, char **pathv, int mode, char **envv) */ char *opal_path_findv(char *fname, int mode, char **envv, char *wrkdir) { - char **dirv; - char *fullpath; - char *path; - int dirc; + char **dirv; + char *fullpath; + char *path; + int dirc; int i; bool found_dot = false; @@ -185,7 +188,7 @@ char *opal_path_findv(char *fname, int mode, char **envv, char *wrkdir) /* Replace the "." path by the working directory. */ - if (NULL != wrkdir) { + if (NULL != wrkdir) { for (i = 0; i < dirc; ++i) { if (0 == strcmp(dirv[i], ".")) { found_dot = true; @@ -216,10 +219,10 @@ char *opal_path_findv(char *fname, int mode, char **envv, char *wrkdir) /** * Forms a complete pathname and checks it for existance and * permissions - * + * * Accepts: * -fname File name - * -path Path prefix + * -path Path prefix * -mode Target permissions which must be satisfied * * Returns: @@ -230,16 +233,18 @@ char *opal_path_access(char *fname, char *path, int mode) { char *fullpath = NULL; struct stat buf; - + bool relative; + /* Allocate space for the full pathname. */ if (NULL == path) { fullpath = opal_os_path(false, fname, NULL); } else { - fullpath = opal_os_path(false, path, fname, NULL); + relative = !opal_path_is_absolute(path); + fullpath = opal_os_path(relative, path, fname, NULL); } - if (NULL == fullpath) + if (NULL == fullpath) { return NULL; - + } /* first check to see - is this a file or a directory? We * only want files */ @@ -250,16 +255,16 @@ char *opal_path_access(char *fname, char *path, int mode) free(fullpath); return NULL; } - + if (!(S_IFREG & buf.st_mode) && !(S_IFLNK & buf.st_mode)) { /* this isn't a regular file or a symbolic link, so - * ignore it + * ignore it */ free(fullpath); return NULL; } - + /* check the permissions */ if ((X_OK & mode) && !(S_IXUSR & buf.st_mode)) { /* if they asked us to check executable permission, @@ -282,7 +287,7 @@ char *opal_path_access(char *fname, char *path, int mode) free(fullpath); return NULL; } - + /* must have met all criteria! */ return fullpath; } @@ -395,7 +400,7 @@ char* opal_find_absolute_path( char* app_name ) /* Otherwise try to search for the application in the PATH ... */ abs_app_name = opal_path_findv( app_name, X_OK, NULL, NULL ); } - + if( NULL != abs_app_name ) { char* resolved_path = (char*)malloc(OPAL_PATH_MAX); realpath( abs_app_name, resolved_path ); @@ -444,7 +449,8 @@ static char *opal_check_mtab(char *dev_path) * If the file is not created, the parent directory is checked. * This allows checking for NFS prior to opening the file. * - * @param[in] fname File name to check + * @fname[in] File name to check + * @fstype[out] File system type if retval is true * * @retval true If fname is on NFS, Lustre, Panasas or GPFS * @retval false otherwise @@ -493,11 +499,14 @@ static char *opal_check_mtab(char *dev_path) #ifndef AUTOFS_SUPER_MAGIC #define AUTOFS_SUPER_MAGIC 0x0187 #endif +#ifndef PVFS2_SUPER_MAGIC +#define PVFS2_SUPER_MAGIC 0x20030528 +#endif #define MASK2 0xffff #define MASK4 0xffffffff -bool opal_path_nfs(char *fname) +bool opal_path_nfs(char *fname, char **ret_fstype) { int i; int fsrc = -1; @@ -511,7 +520,7 @@ bool opal_path_nfs(char *fname) struct statvfs vfsbuf; #endif /* - * Be sure to update the test (test/util/opal_path_nfs.c) + * Be sure to update the test (test/util/opal_path_nfs.c) * while adding a new Network/Cluster Filesystem here */ static struct fs_types_t { @@ -523,7 +532,8 @@ bool opal_path_nfs(char *fname) {NFS_SUPER_MAGIC, MASK2, "nfs"}, {AUTOFS_SUPER_MAGIC, MASK2, "autofs"}, {PAN_KERNEL_FS_CLIENT_SUPER_MAGIC, MASK4, "panfs"}, - {GPFS_SUPER_MAGIC, MASK4, "gpfs"} + {GPFS_SUPER_MAGIC, MASK4, "gpfs"}, + {PVFS2_SUPER_MAGIC, MASK4, "pvfs2"} }; #define FS_TYPES_NUM (int)(sizeof (fs_types)/sizeof (fs_types[0])) @@ -555,14 +565,20 @@ bool opal_path_nfs(char *fname) fname, errno, file)); if (EPERM == errno) { free(file); + if ( NULL != ret_fstype ) { + *ret_fstype = NULL; + } return false; } last_sep = strrchr(file, OPAL_PATH_SEP[0]); /* Stop the search, when we have searched past root '/' */ - if (NULL == last_sep || (1 == strlen(last_sep) && + if (NULL == last_sep || (1 == strlen(last_sep) && OPAL_PATH_SEP[0] == *last_sep)) { - free (file); + free (file); + if ( NULL != ret_fstype ) { + *ret_fstype=NULL; + } return false; } *last_sep = '\0'; @@ -576,7 +592,7 @@ bool opal_path_nfs(char *fname) /* These are uses of struct statfs */ # if defined(HAVE_STRUCT_STATFS_F_FSTYPENAME) if (0 == fsrc && - 0 == strncasecmp(fs_types[i].f_fsname, fsbuf.f_fstypename, + 0 == strncasecmp(fs_types[i].f_fsname, fsbuf.f_fstypename, sizeof(fsbuf.f_fstypename))) { goto found; } @@ -609,6 +625,9 @@ bool opal_path_nfs(char *fname) } free (file); + if ( NULL != ret_fstype ) { + *ret_fstype=NULL; + } return false; found: @@ -625,16 +644,25 @@ bool opal_path_nfs(char *fname) if (0 == strcasecmp(fs_types[x].f_fsname, fs_type)) { OPAL_OUTPUT_VERBOSE((10, 0, "opal_path_nfs: file:%s on fs:%s\n", fname, fs_type)); free(fs_type); + if ( NULL != ret_fstype ) { + *ret_fstype = strdup(fs_types[x].f_fsname); + } return true; } } free(fs_type); + if ( NULL != ret_fstype ) { + *ret_fstype=NULL; + } return false; } } OPAL_OUTPUT_VERBOSE((10, 0, "opal_path_nfs: file:%s on fs:%s\n", fname, fs_types[i].f_fsname)); + if ( NULL != ret_fstype ) { + *ret_fstype = strdup (fs_types[i].f_fsname); + } return true; #undef FS_TYPES_NUM @@ -676,7 +704,7 @@ opal_path_df(const char *path, /* now set the amount of free space available on path */ /* sometimes buf.f_bavail is negative */ - *out_avail = buf.f_bsize * ((int)buf.f_bavail < 0 ? 0 : buf.f_bavail); + *out_avail = (uint64_t)buf.f_bsize * (uint64_t)(buf.f_bavail < 0 ? 0 : buf.f_bavail); OPAL_OUTPUT_VERBOSE((10, 2, "opal_path_df: stat(v)fs states " "path: %s has %"PRIu64 " B of free space.", diff --git a/opal/util/path.h b/opal/util/path.h index e026f4dbd0c..d723545159b 100644 --- a/opal/util/path.h +++ b/opal/util/path.h @@ -5,16 +5,17 @@ * Copyright (c) 2004-2007 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2012 Los Alamos National Security, LLC. * All rights reserved. + * Copyright (c) 2016 University of Houston. All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ * * @file @@ -35,7 +36,7 @@ BEGIN_C_DECLS /** * Locates a file with certain permissions - * + * * @param fname File name * @param pathv Array of search directories * @param mode Permissions which must be satisfied (see access(2)) @@ -73,7 +74,7 @@ OPAL_DECLSPEC char *opal_path_find(char *fname, char **pathv, int mode, * * The caller is responsible for freeing the returned string. */ -OPAL_DECLSPEC char *opal_path_findv(char *fname, int mode, +OPAL_DECLSPEC char *opal_path_findv(char *fname, int mode, char **envv, char *wrkdir) __opal_attribute_malloc__ __opal_attribute_warn_unused_result__; /** * Detect if the requested path is absolute or relative. @@ -126,19 +127,22 @@ OPAL_DECLSPEC char *opal_path_access(char *fname, char *path, int mode) __opal_a /** * @brief Figure out, whether fname is on network file system + * and return fstype if known * * Try to figure out, whether the file name specified through fname is - * on any network file system (currently NFS, Lustre and Panasas). + * on any network file system (currently NFS, Lustre, GPFS, Panasas + * and PVFS2 ). * * If the file is not created, the parent directory is checked. * This allows checking for NFS prior to opening the file. * - * @param[in] fname File name to check + * @fname[in] File name to check + * @fstype[out] File system type if retval is true * * @retval true If fname is on NFS, Lustre or Panasas * @retval false otherwise */ -OPAL_DECLSPEC bool opal_path_nfs(char *fname) __opal_attribute_warn_unused_result__; +OPAL_DECLSPEC bool opal_path_nfs(char *fname, char **fstype) __opal_attribute_warn_unused_result__; /** * @brief Returns the disk usage of path. diff --git a/opal/util/printf.c b/opal/util/printf.c index 0c5e1ebfdd1..794ad566f9f 100644 --- a/opal/util/printf.c +++ b/opal/util/printf.c @@ -5,15 +5,15 @@ * Copyright (c) 2004-2013 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2007-2014 Cisco Systems, Inc. All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ diff --git a/opal/util/printf.h b/opal/util/printf.h index 739ae3034ac..4f69aada65f 100644 --- a/opal/util/printf.h +++ b/opal/util/printf.h @@ -5,19 +5,19 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2007 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2007 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ /** @file - * + * * Buffer safe printf functions for portability to archaic platforms. */ diff --git a/opal/util/proc.c b/opal/util/proc.c index f1c7e2aa597..bf76399c40a 100644 --- a/opal/util/proc.c +++ b/opal/util/proc.c @@ -1,26 +1,28 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ /* * Copyright (c) 2013 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. * Copyright (c) 2013 Inria. All rights reserved. - * Copyright (c) 2014 Intel, Inc. All rights reserved. - * Copyright (c) 2014 Research Organization for Information Science + * Copyright (c) 2014-2016 Intel, Inc. All rights reserved. + * Copyright (c) 2014-2016 Research Organization for Information Science * and Technology (RIST). All rights reserved. + * Copyright (c) 2015 Los Alamos National Security, LLC. All rights + * reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ #include "proc.h" #include "opal/util/proc.h" #include "opal/util/arch.h" -#include "opal/mca/dstore/dstore.h" #include "opal/mca/pmix/pmix.h" opal_process_name_t opal_name_wildcard = {OPAL_JOBID_WILDCARD, OPAL_VPID_WILDCARD}; -opal_process_name_t opal_name_invalid = {OPAL_JOBID_INVALID, OPAL_VPID_INVALID}; +opal_process_name_t opal_name_invalid = {OPAL_JOBID_INVALID, OPAL_VPID_INVALID}; opal_process_info_t opal_process_info = { .nodename = NULL, @@ -28,9 +30,7 @@ opal_process_info_t opal_process_info = { .proc_session_dir = NULL, .num_local_peers = 0, /* there is nobody else but me */ .my_local_rank = 0, /* I'm the only process around here */ -#if OPAL_HAVE_HWLOC .cpuset = NULL, -#endif }; static opal_proc_t opal_local_proc = { @@ -63,6 +63,9 @@ static void opal_proc_destruct(opal_proc_t* proc) OBJ_CLASS_INSTANCE(opal_proc_t, opal_list_item_t, opal_proc_construct, opal_proc_destruct); +OBJ_CLASS_INSTANCE(opal_namelist_t, opal_list_item_t, + NULL, NULL); + static int opal_compare_opal_procs(const opal_process_name_t p1, const opal_process_name_t p2) @@ -145,10 +148,36 @@ static int opal_convert_string_to_process_name_should_never_be_called(opal_proce return OPAL_ERR_NOT_SUPPORTED; } +static int opal_convert_process_name_to_string_should_never_be_called(char** name_string, + const opal_process_name_t *name) +{ + return OPAL_ERR_NOT_SUPPORTED; +} + +static int opal_snprintf_jobid_should_never_be_called(char* name_string, size_t size, opal_jobid_t jobid) +{ + (void)strncpy(name_string, "My JOBID", size); + return OPAL_SUCCESS; +} + +static int opal_convert_string_to_jobid_should_never_be_called(opal_jobid_t *jobid, const char *jobid_string) +{ + return OPAL_ERR_NOT_SUPPORTED; +} + +static struct opal_proc_t *opal_proc_for_name_should_never_be_called (opal_process_name_t name) +{ + return NULL; +} + char* (*opal_process_name_print)(const opal_process_name_t) = opal_process_name_print_should_never_be_called; char* (*opal_vpid_print)(const opal_vpid_t) = opal_vpid_print_should_never_be_called; char* (*opal_jobid_print)(const opal_jobid_t) = opal_jobid_print_should_never_be_called; int (*opal_convert_string_to_process_name)(opal_process_name_t *name, const char* name_string) = opal_convert_string_to_process_name_should_never_be_called; +int (*opal_convert_process_name_to_string)(char** name_string, const opal_process_name_t *name) = opal_convert_process_name_to_string_should_never_be_called; +int (*opal_snprintf_jobid)(char* name_string, size_t size, opal_jobid_t jobid) = opal_snprintf_jobid_should_never_be_called; +int (*opal_convert_string_to_jobid)(opal_jobid_t *jobid, const char *jobid_string) = opal_convert_string_to_jobid_should_never_be_called; +struct opal_proc_t *(*opal_proc_for_name) (const opal_process_name_t name) = opal_proc_for_name_should_never_be_called; char* opal_get_proc_hostname(const opal_proc_t *proc) { @@ -171,7 +200,7 @@ char* opal_get_proc_hostname(const opal_proc_t *proc) } /* if we don't already have it, then try to get it */ - OPAL_MODEX_RECV_VALUE(ret, OPAL_DSTORE_HOSTNAME, proc, + OPAL_MODEX_RECV_VALUE(ret, OPAL_PMIX_HOSTNAME, &proc->proc_name, (char**)&(proc->proc_hostname), OPAL_STRING); if (OPAL_SUCCESS != ret) { OPAL_ERROR_LOG(ret); diff --git a/opal/util/proc.h b/opal/util/proc.h index a0cf0d6d886..90cee96d196 100644 --- a/opal/util/proc.h +++ b/opal/util/proc.h @@ -3,13 +3,14 @@ * of Tennessee Research Foundation. All rights * reserved. * Copyright (c) 2013 Inria. All rights reserved. - * Copyright (c) 2014 Intel, Inc. All rights reserved. - * Copyright (c) 2014 Research Organization for Information Science + * Copyright (c) 2014-2015 Intel, Inc. All rights reserved. + * Copyright (c) 2014-2016 Research Organization for Information Science * and Technology (RIST). All rights reserved. + * Copyright (c) 2017 Cisco Systems, Inc. All rights reserved * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -18,7 +19,7 @@ #include "opal_config.h" #include "opal/class/opal_list.h" -#include "opal/mca/hwloc/hwloc.h" +#include "opal/mca/hwloc/hwloc-internal.h" #include "opal/types.h" #include "opal/dss/dss.h" @@ -97,15 +98,19 @@ typedef struct opal_proc_t { } opal_proc_t; OBJ_CLASS_DECLARATION(opal_proc_t); +typedef struct { + opal_list_item_t super; + opal_process_name_t name; +} opal_namelist_t; +OBJ_CLASS_DECLARATION(opal_namelist_t); + typedef struct opal_process_info_t { char *nodename; /**< string name for this node */ char *job_session_dir; /**< Session directory for job */ char *proc_session_dir; /**< Session directory for the process */ int32_t num_local_peers; /**< number of procs from my job that share my node with me */ int32_t my_local_rank; /**< local rank on this node within my job */ -#if OPAL_HAVE_HWLOC char *cpuset; /**< String-representation of bitmap where we are bound */ -#endif } opal_process_info_t; OPAL_DECLSPEC extern opal_process_info_t opal_process_info; @@ -125,10 +130,23 @@ OPAL_DECLSPEC extern opal_compare_proc_fct_t opal_compare_proc; OPAL_DECLSPEC extern char* (*opal_process_name_print)(const opal_process_name_t); OPAL_DECLSPEC extern int (*opal_convert_string_to_process_name)(opal_process_name_t *name, const char* name_string); +OPAL_DECLSPEC extern int (*opal_convert_process_name_to_string)(char** name_string, + const opal_process_name_t *name); OPAL_DECLSPEC extern char* (*opal_vpid_print)(const opal_vpid_t); OPAL_DECLSPEC extern char* (*opal_jobid_print)(const opal_jobid_t); +OPAL_DECLSPEC extern int (*opal_snprintf_jobid)(char* name_string, size_t size, opal_jobid_t jobid); +OPAL_DECLSPEC extern int (*opal_convert_string_to_jobid)(opal_jobid_t *jobid, const char *jobid_string); + +/** + * Lookup an opal_proc_t by name + * + * @param name (IN) name to lookup + */ +OPAL_DECLSPEC extern struct opal_proc_t *(*opal_proc_for_name) (const opal_process_name_t name); #define OPAL_NAME_PRINT(OPAL_PN) opal_process_name_print(OPAL_PN) +#define OPAL_JOBID_PRINT(OPAL_PN) opal_jobid_print(OPAL_PN) +#define OPAL_VPID_PRINT(OPAL_PN) opal_vpid_print(OPAL_PN) /* provide a safe way to retrieve the hostname of a proc, including * our own. This is to be used by all BTLs so we don't retrieve hostnames diff --git a/opal/util/qsort.c b/opal/util/qsort.c index d79656927f4..ce58ff49cce 100644 --- a/opal/util/qsort.c +++ b/opal/util/qsort.c @@ -104,7 +104,7 @@ loop: SWAPINIT(a, es); swap_cnt = 0; if (n < 7) { for (pm = (char *)a + es; pm < (char *)a + n * es; pm += es) - for (pl = pm; + for (pl = pm; pl > (char *)a && CMP(thunk, pl - es, pl) > 0; pl -= es) swap(pl, pl - es); @@ -152,7 +152,7 @@ loop: SWAPINIT(a, es); } if (swap_cnt == 0) { /* Switch to insertion sort */ for (pm = (char *)a + es; pm < (char *)a + n * es; pm += es) - for (pl = pm; + for (pl = pm; pl > (char *)a && CMP(thunk, pl - es, pl) > 0; pl -= es) swap(pl, pl - es); diff --git a/opal/util/qsort.h b/opal/util/qsort.h index 5e52cf7ae95..cd6b77c0813 100644 --- a/opal/util/qsort.h +++ b/opal/util/qsort.h @@ -5,14 +5,14 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ diff --git a/opal/util/show_help.c b/opal/util/show_help.c index b37ac7bbd44..18c82ccbffe 100644 --- a/opal/util/show_help.c +++ b/opal/util/show_help.c @@ -5,7 +5,7 @@ * Copyright (c) 2004-2006 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. @@ -13,9 +13,9 @@ * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -47,9 +47,9 @@ static char **search_dirs = NULL; /* * Local functions */ -static int opal_show_vhelp_internal(const char *filename, const char *topic, +static int opal_show_vhelp_internal(const char *filename, const char *topic, bool want_error_header, va_list arglist); -static int opal_show_help_internal(const char *filename, const char *topic, +static int opal_show_help_internal(const char *filename, const char *topic, bool want_error_header, ...); opal_show_help_fn_t opal_show_help = opal_show_help_internal; @@ -59,13 +59,13 @@ opal_show_vhelp_fn_t opal_show_vhelp = opal_show_vhelp_internal; int opal_show_help_init(void) { opal_output_stream_t lds; - + OBJ_CONSTRUCT(&lds, opal_output_stream_t); lds.lds_want_stderr = true; output_stream = opal_output_open(&lds); - + opal_argv_append_nosize(&search_dirs, opal_install_dirs.opaldatadir); - + return OPAL_SUCCESS; } @@ -73,13 +73,13 @@ int opal_show_help_finalize(void) { opal_output_close(output_stream); output_stream = -1; - + /* destruct the search list */ if (NULL != search_dirs) { opal_argv_free(search_dirs); search_dirs = NULL; }; - + return OPAL_SUCCESS; } @@ -142,13 +142,13 @@ static int open_file(const char *base, const char *topic) char *err_msg = NULL; size_t base_len; int i; - + /* If no filename was supplied, use the default */ if (NULL == base) { base = default_filename; } - + /* if this is called prior to someone initializing the system, * then don't try to look */ @@ -276,7 +276,7 @@ static int load_array(char ***array, const char *filename, const char *topic) if (OPAL_SUCCESS != (ret = open_file(filename, topic))) { return ret; } - + ret = find_topic(filename, topic); if (OPAL_SUCCESS == ret) { ret = read_topic(array); @@ -292,7 +292,7 @@ static int load_array(char ***array, const char *filename, const char *topic) return ret; } -char *opal_show_help_vstring(const char *filename, const char *topic, +char *opal_show_help_vstring(const char *filename, const char *topic, bool want_error_header, va_list arglist) { int rc; @@ -316,21 +316,21 @@ char *opal_show_help_vstring(const char *filename, const char *topic, return (OPAL_SUCCESS == rc) ? output : NULL; } -char *opal_show_help_string(const char *filename, const char *topic, +char *opal_show_help_string(const char *filename, const char *topic, bool want_error_handler, ...) { char *output; va_list arglist; va_start(arglist, want_error_handler); - output = opal_show_help_vstring(filename, topic, want_error_handler, + output = opal_show_help_vstring(filename, topic, want_error_handler, arglist); va_end(arglist); return output; } -static int opal_show_vhelp_internal(const char *filename, const char *topic, +static int opal_show_vhelp_internal(const char *filename, const char *topic, bool want_error_header, va_list arglist) { char *output; @@ -348,7 +348,7 @@ static int opal_show_vhelp_internal(const char *filename, const char *topic, return (NULL == output) ? OPAL_ERROR : OPAL_SUCCESS; } -static int opal_show_help_internal(const char *filename, const char *topic, +static int opal_show_help_internal(const char *filename, const char *topic, bool want_error_header, ...) { va_list arglist; diff --git a/opal/util/show_help.h b/opal/util/show_help.h index 7d96d4305f1..8806f059060 100644 --- a/opal/util/show_help.h +++ b/opal/util/show_help.h @@ -5,15 +5,15 @@ * Copyright (c) 2004-2006 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2008-2011 Cisco Systems, Inc. All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ /** @@ -33,7 +33,7 @@ * display it. Its important parameters are a filename, message name, * and printf()-style varargs parameters used to substitute into the * message. - * + * * It was originally intended that this system would support a very * simple version of i18n-like support, but we got (strong) feedback * that i18n support was not desired. So it never happened. @@ -55,7 +55,7 @@ * ignored. It is not possible to escape a #. * - Message names are on a line by themselves and marked with []. * Names can be any ASCII string within the [] (excluding the - * characters newline, linefeed, [, ], and #). + * characters newline, linefeed, [, ], and #). * - Messages are any characters between message names and/or the end * of the file. * @@ -128,7 +128,7 @@ OPAL_DECLSPEC int opal_show_help_finalize(void); * based on the topic, and displays it. If want_error_header is * true, a header and footer of asterisks are also displayed. */ -typedef int (*opal_show_help_fn_t)(const char *filename, const char *topic, +typedef int (*opal_show_help_fn_t)(const char *filename, const char *topic, bool want_error_header, ...); OPAL_DECLSPEC extern opal_show_help_fn_t opal_show_help; @@ -136,7 +136,7 @@ OPAL_DECLSPEC extern opal_show_help_fn_t opal_show_help; * This function does the same thing as opal_show_help(), but accepts * a va_list form of varargs. */ -typedef int (*opal_show_vhelp_fn_t)(const char *filename, const char *topic, +typedef int (*opal_show_vhelp_fn_t)(const char *filename, const char *topic, bool want_error_header, va_list ap); OPAL_DECLSPEC extern opal_show_vhelp_fn_t opal_show_vhelp; @@ -144,16 +144,16 @@ OPAL_DECLSPEC extern opal_show_vhelp_fn_t opal_show_vhelp; * This function does the same thing as opal_show_help(), but returns * its output in a string (that must be freed by the caller). */ -OPAL_DECLSPEC char* opal_show_help_string(const char *filename, - const char *topic, +OPAL_DECLSPEC char* opal_show_help_string(const char *filename, + const char *topic, bool want_error_header, ...); /** * This function does the same thing as opal_show_help_string(), but * accepts a va_list form of varargs. */ -OPAL_DECLSPEC char* opal_show_help_vstring(const char *filename, - const char *topic, +OPAL_DECLSPEC char* opal_show_help_vstring(const char *filename, + const char *topic, bool want_error_header, va_list ap); /** diff --git a/opal/util/show_help_lex.h b/opal/util/show_help_lex.h index 0be537de9fc..864888039b9 100644 --- a/opal/util/show_help_lex.h +++ b/opal/util/show_help_lex.h @@ -5,15 +5,15 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2006 Cisco Systems, Inc. All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ diff --git a/opal/util/show_help_lex.l b/opal/util/show_help_lex.l index 7f1aa1f8421..2de61e830ec 100644 --- a/opal/util/show_help_lex.l +++ b/opal/util/show_help_lex.l @@ -9,14 +9,14 @@ * Copyright (c) 2004-2006 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -77,7 +77,7 @@ CHAR [A-Za-z0-9_\-\.] int opal_show_help_yylex_destroy(void) { if (NULL != YY_CURRENT_BUFFER) { - yy_delete_buffer(YY_CURRENT_BUFFER); + yy_delete_buffer(YY_CURRENT_BUFFER); #if defined(YY_CURRENT_BUFFER_LVALUE) YY_CURRENT_BUFFER_LVALUE = NULL; #else diff --git a/opal/util/stacktrace.c b/opal/util/stacktrace.c index c4f58314082..58f3c924b42 100644 --- a/opal/util/stacktrace.c +++ b/opal/util/stacktrace.c @@ -5,16 +5,17 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2006 Sun Microsystems, Inc. All rights reserved. * Copyright (c) 2008-2009 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2017 IBM Corporation. All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -24,14 +25,18 @@ #ifdef HAVE_UNISTD_H #include #endif - -#ifdef HAVE_STRING_H -#include +#ifdef HAVE_SYS_TYPES_H +#include +#endif +#ifdef HAVE_SYS_STAT_H +#include +#endif +#ifdef HAVE_SYS_FCNTL_H +#include #endif -#ifdef HAVE_SIGNAL_H +#include #include -#endif #include "opal/util/stacktrace.h" #include "opal/mca/backtrace/backtrace.h" @@ -39,6 +44,7 @@ #include "opal/util/output.h" #include "opal/util/show_help.h" #include "opal/util/argv.h" +#include "opal/util/proc.h" #include "opal/runtime/opal_params.h" #ifndef _NSIG @@ -47,9 +53,35 @@ #define HOSTFORMAT "[%s:%05d] " -static char stacktrace_hostname[64]; +int opal_stacktrace_output_fileno = -1; +static char *opal_stacktrace_output_filename_base = NULL; +static size_t opal_stacktrace_output_filename_max_len = 0; +static char stacktrace_hostname[OPAL_MAXHOSTNAMELEN]; static char *unable_to_print_msg = "Unable to print stack trace!\n"; +/* + * Set the stacktrace filename: + * stacktrace.PID + * -or, if VPID is available- + * stacktrace.VPID.PID + */ +static void set_stacktrace_filename(void) { + opal_proc_t *my_proc = opal_proc_local_get(); + + if( NULL == my_proc ) { + snprintf(opal_stacktrace_output_filename, opal_stacktrace_output_filename_max_len, + "%s.%lu", + opal_stacktrace_output_filename_base, (unsigned long)getpid()); + } + else { + snprintf(opal_stacktrace_output_filename, opal_stacktrace_output_filename_max_len, + "%s.%lu.%lu", + opal_stacktrace_output_filename_base, (unsigned long)my_proc->proc_name.vpid, (unsigned long)getpid()); + } + + return; +} + /** * This function is being called as a signal-handler in response * to a user-specified signal (e.g. SIGFPE or SIGSEGV). @@ -58,36 +90,61 @@ static char *unable_to_print_msg = "Unable to print stack trace!\n"; * Where available, the BSD libexecinfo is used to provide Linux/Glibc * compatible backtrace and backtrace_symbols_fd functions. * - * @param signo with the signal number raised + * @param signo with the signal number raised * @param info with information regarding the reason/send of the signal - * @param p + * @param p * * FIXME: Should distinguish for systems, which don't have siginfo... */ #if OPAL_WANT_PRETTY_PRINT_STACKTRACE static void show_stackframe (int signo, siginfo_t * info, void * p) -{ +{ char print_buffer[1024]; char * tmp = print_buffer; int size = sizeof (print_buffer); int ret; char *si_code_str = ""; + /* Do not print the stack trace */ + if( 0 > opal_stacktrace_output_fileno && 0 == opal_stacktrace_output_filename_max_len ) { + /* Raise the signal again, so we don't accidentally mask critical signals. + * For critical signals, it is preferred that we call 'raise' instead of + * 'exit' or 'abort' so that the return status is set properly for this + * process. + */ + signal(signo, SIG_DFL); + raise(signo); + + return; + } + + /* Update the file name with the RANK, if available */ + if( 0 < opal_stacktrace_output_filename_max_len ) { + set_stacktrace_filename(); + opal_stacktrace_output_fileno = open(opal_stacktrace_output_filename, + O_CREAT|O_WRONLY|O_TRUNC, S_IRUSR|S_IWUSR); + if( 0 > opal_stacktrace_output_fileno ) { + opal_output(0, "Error: Failed to open the stacktrace output file. Default: stderr\n\tFilename: %s\n\tErrno: %s", + opal_stacktrace_output_filename, strerror(errno)); + opal_stacktrace_output_fileno = fileno(stderr); + } + } + /* write out the footer information */ memset (print_buffer, 0, sizeof (print_buffer)); ret = snprintf(print_buffer, sizeof(print_buffer), HOSTFORMAT "*** Process received signal ***\n", stacktrace_hostname, getpid()); - write(fileno(stderr), print_buffer, ret); + write(opal_stacktrace_output_fileno, print_buffer, ret); memset (print_buffer, 0, sizeof (print_buffer)); #ifdef HAVE_STRSIGNAL - ret = snprintf (tmp, size, HOSTFORMAT "Signal: %s (%d)\n", + ret = snprintf (tmp, size, HOSTFORMAT "Signal: %s (%d)\n", stacktrace_hostname, getpid(), strsignal(signo), signo); #else - ret = snprintf (tmp, size, HOSTFORMAT "Signal: %d\n", + ret = snprintf (tmp, size, HOSTFORMAT "Signal: %d\n", stacktrace_hostname, getpid(), signo); #endif size -= ret; @@ -267,14 +324,14 @@ static void show_stackframe (int signo, siginfo_t * info, void * p) /* print signal errno information */ if (0 != info->si_errno) { ret = snprintf(tmp, size, HOSTFORMAT "Associated errno: %s (%d)\n", - stacktrace_hostname, getpid(), + stacktrace_hostname, getpid(), strerror (info->si_errno), info->si_errno); size -= ret; tmp += ret; } ret = snprintf(tmp, size, HOSTFORMAT "Signal code: %s (%d)\n", - stacktrace_hostname, getpid(), + stacktrace_hostname, getpid(), si_code_str, info->si_code); size -= ret; tmp += ret; @@ -282,8 +339,8 @@ static void show_stackframe (int signo, siginfo_t * info, void * p) switch (signo) { case SIGILL: - case SIGFPE: - case SIGSEGV: + case SIGFPE: + case SIGSEGV: case SIGBUS: { ret = snprintf(tmp, size, HOSTFORMAT "Failing at address: %p\n", @@ -292,10 +349,10 @@ static void show_stackframe (int signo, siginfo_t * info, void * p) tmp += ret; break; } - case SIGCHLD: + case SIGCHLD: { ret = snprintf(tmp, size, HOSTFORMAT "Sending PID: %d, Sending UID: %d, Status: %d\n", - stacktrace_hostname, getpid(), + stacktrace_hostname, getpid(), info->si_pid, info->si_uid, info->si_status); size -= ret; tmp += ret; @@ -328,26 +385,40 @@ static void show_stackframe (int signo, siginfo_t * info, void * p) } /* write out the signal information generated above */ - write(fileno(stderr), print_buffer, sizeof(print_buffer)-size); + write(opal_stacktrace_output_fileno, print_buffer, sizeof(print_buffer)-size); /* print out the stack trace */ snprintf(print_buffer, sizeof(print_buffer), HOSTFORMAT, stacktrace_hostname, getpid()); - ret = opal_backtrace_print(stderr, print_buffer, 2); + ret = opal_backtrace_print(NULL, print_buffer, 2); if (OPAL_SUCCESS != ret) { - write(fileno(stderr), unable_to_print_msg, strlen(unable_to_print_msg)); + write(opal_stacktrace_output_fileno, unable_to_print_msg, strlen(unable_to_print_msg)); } /* write out the footer information */ memset (print_buffer, 0, sizeof (print_buffer)); - ret = snprintf(print_buffer, sizeof(print_buffer), - HOSTFORMAT "*** End of error message ***\n", + ret = snprintf(print_buffer, sizeof(print_buffer), + HOSTFORMAT "*** End of error message ***\n", stacktrace_hostname, getpid()); if (ret > 0) { - write(fileno(stderr), print_buffer, ret); + write(opal_stacktrace_output_fileno, print_buffer, ret); } else { - write(fileno(stderr), unable_to_print_msg, strlen(unable_to_print_msg)); + write(opal_stacktrace_output_fileno, unable_to_print_msg, strlen(unable_to_print_msg)); } + + if( fileno(stdout) != opal_stacktrace_output_fileno && + fileno(stderr) != opal_stacktrace_output_fileno ) { + close(opal_stacktrace_output_fileno); + opal_stacktrace_output_fileno = -1; + } + + /* Raise the signal again, so we don't accidentally mask critical signals. + * For critical signals, it is preferred that we call 'raise' instead of + * 'exit' or 'abort' so that the return status is set properly for this + * process. + */ + signal(signo, SIG_DFL); + raise(signo); } #endif /* OPAL_WANT_PRETTY_PRINT_STACKTRACE */ @@ -355,7 +426,7 @@ static void show_stackframe (int signo, siginfo_t * info, void * p) #if OPAL_WANT_PRETTY_PRINT_STACKTRACE void opal_stackframe_output(int stream) -{ +{ int traces_size; char **traces; @@ -369,7 +440,30 @@ void opal_stackframe_output(int stream) opal_output(stream, "%s", traces[i]); } } else { - opal_backtrace_print(stderr, NULL, 2); + /* Do not print the stack trace */ + if( 0 > opal_stacktrace_output_fileno && 0 == opal_stacktrace_output_filename_max_len ) { + return; + } + + /* Update the file name with the RANK, if available */ + if( 0 < opal_stacktrace_output_filename_max_len ) { + set_stacktrace_filename(); + opal_stacktrace_output_fileno = open(opal_stacktrace_output_filename, + O_CREAT|O_WRONLY|O_TRUNC, S_IRUSR|S_IWUSR); + if( 0 > opal_stacktrace_output_fileno ) { + opal_output(0, "Error: Failed to open the stacktrace output file. Default: stderr\n\tFilename: %s\n\tErrno: %s", + opal_stacktrace_output_filename, strerror(errno)); + opal_stacktrace_output_fileno = fileno(stderr); + } + } + + opal_backtrace_print(NULL, NULL, 2); + + if( fileno(stdout) != opal_stacktrace_output_fileno && + fileno(stderr) != opal_stacktrace_output_fileno ) { + close(opal_stacktrace_output_fileno); + opal_stacktrace_output_fileno = -1; + } } } @@ -378,12 +472,12 @@ char *opal_stackframe_output_string(void) int traces_size, i; size_t len; char *output, **traces; - + len = 0; if (OPAL_SUCCESS != opal_backtrace_buffer(&traces, &traces_size)) { return NULL; } - + /* Calculate the space needed for the string */ for (i = 3; i < traces_size; i++) { if (NULL == traces[i]) { @@ -391,12 +485,12 @@ char *opal_stackframe_output_string(void) } len += strlen(traces[i]) + 1; } - + output = (char *) malloc(len + 1); if (NULL == output) { return NULL; } - + *output = '\0'; for (i = 3; i < traces_size; i++) { if (NULL == traces[i]) { @@ -420,7 +514,7 @@ char *opal_stackframe_output_string(void) * @returnvalue OPAL_SUCCESS * @returnvalue OPAL_ERR_BAD_PARAM if the value in the signal-list * is not a valid signal-number - * + * */ int opal_util_register_stackhandlers (void) { @@ -432,7 +526,6 @@ int opal_util_register_stackhandlers (void) bool complain, showed_help = false; gethostname(stacktrace_hostname, sizeof(stacktrace_hostname)); - stacktrace_hostname[sizeof(stacktrace_hostname) - 1] = '\0'; /* to keep these somewhat readable, only print the machine name */ for (i = 0 ; i < (int)strlen(stacktrace_hostname) ; ++i) { if (stacktrace_hostname[i] == '.') { @@ -441,6 +534,50 @@ int opal_util_register_stackhandlers (void) } } + /* Setup the output stream to use */ + if( NULL == opal_stacktrace_output_filename || + 0 == strcasecmp(opal_stacktrace_output_filename, "none") ) { + opal_stacktrace_output_fileno = -1; + } + else if( 0 == strcasecmp(opal_stacktrace_output_filename, "stdout") ) { + opal_stacktrace_output_fileno = fileno(stdout); + } + else if( 0 == strcasecmp(opal_stacktrace_output_filename, "stderr") ) { + opal_stacktrace_output_fileno = fileno(stdout); + } + else if( 0 == strcasecmp(opal_stacktrace_output_filename, "file" ) || + 0 == strcasecmp(opal_stacktrace_output_filename, "file:") ) { + opal_stacktrace_output_filename_base = strdup("stacktrace"); + + free(opal_stacktrace_output_filename); + // Magic number: 8 = space for .PID and .RANK (allow 7 digits each) + opal_stacktrace_output_filename_max_len = strlen("stacktrace") + 8 + 8; + opal_stacktrace_output_filename = (char*)malloc(sizeof(char) * opal_stacktrace_output_filename_max_len); + set_stacktrace_filename(); + opal_stacktrace_output_fileno = -1; + } + else if( 0 == strncasecmp(opal_stacktrace_output_filename, "file:", 5) ) { + char *filename_cpy = NULL; + next = strchr(opal_stacktrace_output_filename, ':'); + next++; // move past the ':' to the filename specified + + opal_stacktrace_output_filename_base = strdup(next); + + free(opal_stacktrace_output_filename); + // Magic number: 8 = space for .PID and .RANK (allow 7 digits each) + opal_stacktrace_output_filename_max_len = strlen(opal_stacktrace_output_filename_base) + 8 + 8; + opal_stacktrace_output_filename = (char*)malloc(sizeof(char) * opal_stacktrace_output_filename_max_len); + set_stacktrace_filename(); + opal_stacktrace_output_fileno = -1; + + free(filename_cpy); + } + else { + opal_stacktrace_output_fileno = fileno(stderr); + } + + + /* Setup the signals to catch */ memset(&act, 0, sizeof(act)); act.sa_sigaction = show_stackframe; act.sa_flags = SA_SIGINFO; @@ -450,8 +587,8 @@ int opal_util_register_stackhandlers (void) act.sa_flags |= SA_RESETHAND; #endif - for (tmp = next = opal_signal_string ; - next != NULL && *next != '\0'; + for (tmp = next = opal_signal_string ; + next != NULL && *next != '\0'; tmp = next + 1) { int sig; diff --git a/opal/util/stacktrace.h b/opal/util/stacktrace.h index 7c230010a00..c4484ae91aa 100644 --- a/opal/util/stacktrace.h +++ b/opal/util/stacktrace.h @@ -5,15 +5,16 @@ * Copyright (c) 2004-2008 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2008 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2017 IBM Corporation. All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ * * @file @@ -24,6 +25,12 @@ #include "opal_config.h" +/* + * File descriptor to be used by the backtrace framework if opal_backtrace_print + * is passed NULL for it's FILE file pointer. + */ +extern int opal_stacktrace_output_fileno; + /** * Output the current stack trace (not including the call to this * function) to the stream indicated. @@ -44,7 +51,7 @@ OPAL_DECLSPEC char *opal_stackframe_output_string(void); * @returnvalue OPAL_SUCCESS * @returnvalue OPAL_ERR_BAD_PARAM if the value in the signal-list * is not a valid signal-number - * + * */ OPAL_DECLSPEC int opal_util_register_stackhandlers (void); diff --git a/opal/util/strncpy.c b/opal/util/strncpy.c index 9cfcfead3e0..4781536200b 100644 --- a/opal/util/strncpy.c +++ b/opal/util/strncpy.c @@ -5,14 +5,14 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ * * This file is only here because some platforms have a broken strncpy diff --git a/opal/util/strncpy.h b/opal/util/strncpy.h index c389c4cdc08..8a217560ba1 100644 --- a/opal/util/strncpy.h +++ b/opal/util/strncpy.h @@ -5,14 +5,14 @@ * Copyright (c) 2004-2006 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ diff --git a/opal/util/sys_limits.c b/opal/util/sys_limits.c index 1a6ea4d0e91..9be0a6120fb 100644 --- a/opal/util/sys_limits.c +++ b/opal/util/sys_limits.c @@ -6,7 +6,7 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. @@ -17,17 +17,15 @@ * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ #include "opal_config.h" -#ifdef HAVE_STRING_H #include -#endif #include #ifdef HAVE_SYS_TYPES_H diff --git a/opal/util/sys_limits.h b/opal/util/sys_limits.h index 28217521fca..728f2d4b72d 100644 --- a/opal/util/sys_limits.h +++ b/opal/util/sys_limits.h @@ -5,7 +5,7 @@ * Copyright (c) 2004-2006 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. @@ -13,9 +13,9 @@ * All rights reserved. * Copyright (c) 2014 Intel, Inc. All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -44,7 +44,7 @@ typedef struct opal_sys_limits_t { * values in the following locations - provide access here */ OPAL_DECLSPEC extern opal_sys_limits_t opal_sys_limits; - + /* Get the system resource limits and, if requested, set * them to the specified limit */ diff --git a/opal/util/timings.c b/opal/util/timings.c index 6c0590acd76..8c5888b0354 100644 --- a/opal/util/timings.c +++ b/opal/util/timings.c @@ -2,9 +2,9 @@ * Copyright (C) 2014 Artem Polyakov * Copyright (c) 2014 Intel, Inc. All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -15,9 +15,7 @@ #include #include -#ifdef HAVE_STRING_H #include -#endif #include #ifdef HAVE_SYS_TYPES_H @@ -77,8 +75,8 @@ int opal_timing_clocksync_read(char *fname) bool found = false; char *ptr = NULL; - char hname[1024] = "NA"; - if( gethostname(hname, 1024) ){ + char hname[OPAL_MAXHOSTNAMELEN] = "NA"; + if( gethostname(hname, sizeof(hname)) ){ opal_output(0, "opal_timing_clocksync_read(%s): Cannot gethostname", fname); } @@ -201,10 +199,10 @@ static get_ts_t _init_timestamping(opal_timer_type_t type) } } -static opal_timing_event_t *opal_timing_event_alloc(opal_timing_t *t) +opal_timing_event_t *opal_timing_event_alloc(opal_timing_t *t) { if( t->buffer_offset >= t->buffer_size ){ - // notch timings overhead + // notch timings overhead double alloc_begin = t->get_ts(); t->buffer = malloc(sizeof(opal_timing_event_t)*t->buffer_size); @@ -218,7 +216,7 @@ static opal_timing_event_t *opal_timing_event_alloc(opal_timing_t *t) t->buffer_offset = 0; t->buffer[0].fib = 1; t->buffer[0].ts_ovh = alloc_end - alloc_begin; - } + } int tmp = t->buffer_offset; (t->buffer_offset)++; return t->buffer + tmp; @@ -462,7 +460,7 @@ int opal_timing_report(opal_timing_t *t, char *fname) } _prepare_descriptions(t, &descr); - + buf = malloc(OPAL_TIMING_OUTBUF_SIZE+1); if( buf == NULL ){ rc = OPAL_ERR_OUT_OF_RESOURCE; diff --git a/opal/util/timings.h b/opal/util/timings.h index 9ca762d5d56..7e6a803cade 100644 --- a/opal/util/timings.h +++ b/opal/util/timings.h @@ -2,9 +2,9 @@ * Copyright (C) 2014 Artem Polyakov * Copyright (c) 2014 Intel, Inc. All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ diff --git a/opal/util/uri.c b/opal/util/uri.c index b01f30fa70c..f679b9a3c96 100644 --- a/opal/util/uri.c +++ b/opal/util/uri.c @@ -4,9 +4,9 @@ * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ diff --git a/opal/util/uri.h b/opal/util/uri.h index 7f578f13c94..87cdec8849c 100644 --- a/opal/util/uri.h +++ b/opal/util/uri.h @@ -2,9 +2,9 @@ * Copyright (c) 2012 Los Alamos National Security, LLC. * All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ * * @file diff --git a/opal/win32/opal_inet.h b/opal/win32/opal_inet.h index 8b8e6e6f893..a3e6a71e88d 100644 --- a/opal/win32/opal_inet.h +++ b/opal/win32/opal_inet.h @@ -5,14 +5,14 @@ * Copyright (c) 2004-2014 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2009 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2009 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ diff --git a/opal/win32/opal_misc.h b/opal/win32/opal_misc.h index d5e843c2eff..545f20ed523 100644 --- a/opal/win32/opal_misc.h +++ b/opal/win32/opal_misc.h @@ -64,10 +64,10 @@ static __inline unsigned int sleep(unsigned int seconds) { return 0; } -/* this function can currently ONLY return the page size. for it to +/* this function can currently ONLY return the page size. for it to do the entire sysconf range it needs to be extended */ static __inline size_t sysconf(int option) { - + SYSTEM_INFO sys_info; if( _SC_OPEN_MAX == option ) { @@ -88,7 +88,7 @@ static __inline size_t sysconf(int option) { #define F_SETFL 1 #define O_NONBLOCK 0 /* - * this function is currently defined only for setting the socket to be + * this function is currently defined only for setting the socket to be * in the non-blocking mode. Else this function returns error not implemented. * This calls ioctlsocket in the winsock library */ diff --git a/opal/win32/opal_process.c b/opal/win32/opal_process.c index 1bc21197754..92d0b32a4a2 100644 --- a/opal/win32/opal_process.c +++ b/opal/win32/opal_process.c @@ -27,14 +27,14 @@ int kill(pid_t pid, int sig) { /* XXX fill this in */ /* Need to connect to the child process Then raise the signal since Windows doesn;t - have the ability to 'send a signal' to a + have the ability to 'send a signal' to a process, a la the kill command in UNIX - + MSVC functions to look at: - OpenProcess - TerminateProcess - raise */ - + return 0; } diff --git a/opal/win32/opal_socket.h b/opal/win32/opal_socket.h index f05855139d6..dfd0f28e997 100644 --- a/opal/win32/opal_socket.h +++ b/opal/win32/opal_socket.h @@ -1,10 +1,10 @@ /* - * Copyright (c) 2010 High Performance Computing Center Stuttgart, + * Copyright (c) 2010 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ diff --git a/opal/win32/opal_time.c b/opal/win32/opal_time.c index 0d2213faa49..6eb4f8b943d 100644 --- a/opal/win32/opal_time.c +++ b/opal/win32/opal_time.c @@ -28,13 +28,13 @@ int gettimeofday(struct timeval *tv, struct timezone *tz) FILETIME file_time; LARGE_INTEGER place_holder; __int64 time; - + /* returns 64 bit value which is the number of 100 nanosecond intervals since 1601(UTC) */ GetSystemTimeAsFileTime (&file_time); - /* Windows recommends that we should copy the FILETIME returned + /* Windows recommends that we should copy the FILETIME returned into a ULARGE_INTEGER and then perform the arithmetic on that */ place_holder.LowPart = file_time.dwLowDateTime; place_holder.HighPart = file_time.dwHighDateTime; @@ -46,7 +46,7 @@ int gettimeofday(struct timeval *tv, struct timezone *tz) /* convert 100 nanoseconds intervals into microseconds .. divide by 10 */ time /= 10; - + tv->tv_sec = (long)(time / 1000000); tv->tv_usec = (long)(time % 1000000); diff --git a/opal/win32/opal_time.h b/opal/win32/opal_time.h index a81cbc2e22f..daa2aff5806 100644 --- a/opal/win32/opal_time.h +++ b/opal/win32/opal_time.h @@ -70,7 +70,7 @@ } \ } while (0) #endif - + #ifndef timersub #define timersub(tvp, uvp, vvp) \ do { \ @@ -167,7 +167,7 @@ struct timespec /* NOTE: The use of timezone is obsolete even in linux and my gettimeofday -function is not going to support it either. So, please be aware of the +function is not going to support it either. So, please be aware of the fact that if you expect to pass anything here, then you are DEAD :-D */ struct timezone { diff --git a/opal/win32/opal_uio.c b/opal/win32/opal_uio.c index fd7ac9f3530..221a2297417 100644 --- a/opal/win32/opal_uio.c +++ b/opal/win32/opal_uio.c @@ -37,7 +37,7 @@ int writev( int fd, struct iovec * iov, int cnt ) return err; } return (int) sendlen; -} +} int readv( int fd, struct iovec * iov, int cnt ) @@ -52,5 +52,5 @@ int readv( int fd, struct iovec * iov, int cnt ) return err; } return (int) recvlen; -} +} diff --git a/opal/win32/opal_uio.h b/opal/win32/opal_uio.h index 6f07d221d04..b97be357a7d 100644 --- a/opal/win32/opal_uio.h +++ b/opal/win32/opal_uio.h @@ -42,7 +42,7 @@ BEGIN_C_DECLS */ OPAL_DECLSPEC int writev (int fd, struct iovec *iov, int cnt); -/* +/* readv reads data from file descriptor fd, and puts the result in the buffers described by iov. The number of buffers is specified by cnt. The buffers are filled in the order specified. Operates just diff --git a/opal/win32/opal_utsname.c b/opal/win32/opal_utsname.c index 68ff943c078..693e0ad5e67 100644 --- a/opal/win32/opal_utsname.c +++ b/opal/win32/opal_utsname.c @@ -5,14 +5,14 @@ Copyright (c) 2004-2014 The University of Tennessee and The University of Tennessee Research Foundation. All rights reserved. - Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, University of Stuttgart. All rights reserved. Copyright (c) 2004-2005 The Regents of the University of California. All rights reserved. $COPYRIGHT$ - + Additional copyrights may follow - + $HEADER$ */ @@ -22,13 +22,13 @@ /* This has to fill in the following information - 1. sysname: name of the operating system -- + 1. sysname: name of the operating system -- 2. nodename: GetComputerName 3. release: GetVersionEx 4. version: GetVersionEx 5. machine: GetSystemInfo */ - + int uname( struct utsname *un ) { TCHAR env_variable[] = "OS=%OS%"; @@ -37,7 +37,7 @@ int uname( struct utsname *un ) SYSTEM_INFO sys_info; TCHAR info_buf[OPAL_UTSNAME_LEN]; - info_buf_count = ExpandEnvironmentStrings( env_variable, info_buf, OPAL_UTSNAME_LEN); + info_buf_count = ExpandEnvironmentStrings( env_variable, info_buf, OPAL_UTSNAME_LEN); if (0 == info_buf_count) { snprintf( un->sysname, OPAL_UTSNAME_LEN, "Unknown" ); } else { @@ -46,13 +46,13 @@ int uname( struct utsname *un ) } info_buf_count = OPAL_UTSNAME_LEN; if (!GetComputerName( un->nodename, &info_buf_count)) { - snprintf(un->nodename, OPAL_UTSNAME_LEN, "undefined"); + snprintf(un->nodename, OPAL_UTSNAME_LEN, "undefined"); } - + version_info.dwOSVersionInfoSize = sizeof(OSVERSIONINFO); if (!GetVersionEx(&version_info)) { - snprintf(un->release, OPAL_UTSNAME_LEN, "undefined"); - snprintf(un->version, OPAL_UTSNAME_LEN, "undefined"); + snprintf(un->release, OPAL_UTSNAME_LEN, "undefined"); + snprintf(un->version, OPAL_UTSNAME_LEN, "undefined"); } else { /* fill in both release and version information */ snprintf( un->release, OPAL_UTSNAME_LEN, "%d.%d.%d", diff --git a/opal/win32/opal_utsname.h b/opal/win32/opal_utsname.h index 18b611ef1a5..11bd45c52fd 100644 --- a/opal/win32/opal_utsname.h +++ b/opal/win32/opal_utsname.h @@ -5,14 +5,14 @@ * Copyright (c) 2004-2014 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ diff --git a/opal/win32/win_compat.h b/opal/win32/win_compat.h index 528519fa97d..c31d3bb74d8 100644 --- a/opal/win32/win_compat.h +++ b/opal/win32/win_compat.h @@ -5,15 +5,15 @@ * Copyright (c) 2004-2006 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2009 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2009 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2014 Cisco Systems, Inc. All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -47,7 +47,7 @@ */ #define _CRT_RAND_S -/* It is always better to include windows.h with the lean and mean option. +/* It is always better to include windows.h with the lean and mean option. So, include it with that option and then include some which are required. Note: this file is included only on windows */ @@ -59,7 +59,7 @@ #endif /* VC_EXTRALEAN */ #include -/* FD_SETSIZE determines how many sockets windows can select() on. If not defined +/* FD_SETSIZE determines how many sockets windows can select() on. If not defined before including winsock2.h, it is defined to be 64. We are going to go ahead and make it 1024 for now. PLEASE CHECK IF THIS IS RIGHT */ #define FD_SETSIZE 1024 @@ -72,9 +72,7 @@ #include #include #include -#ifdef HAVE_STDINT_H #include -#endif /** * For all file io operations @@ -138,10 +136,10 @@ typedef unsigned int uint; #define unlink _unlink #define dup2 _dup2 #define dup _dup -#define write _write -#define read _read -#define fileno _fileno -#define isatty _isatty +#define write _write +#define read _read +#define fileno _fileno +#define isatty _isatty #define execvp _execvp #define S_ISDIR(STAT_MODE) ((STAT_MODE) & _S_IFDIR) #define S_ISREG(STAT_MODE) ((STAT_MODE) & _S_IFREG) @@ -178,6 +176,7 @@ typedef unsigned int uint; #define MAXPATHLEN _MAX_PATH #define MAXHOSTNAMELEN _MAX_PATH +#define OPAL_MAXHOSTNAMELEN (MAXHOSTNAMELEN + 1) #define PATH_MAX _MAX_PATH #define WTERMSIG(EXIT_CODE) (1) #define WIFEXITED(EXIT_CODE) (1) @@ -266,7 +265,7 @@ typedef unsigned int uint; #define SIGKILL 9 #define SIGUSR1 10 /* 11 is used for SIGSEGV on windows */ -#define SIGUSR2 12 +#define SIGUSR2 12 #define SIGPIPE 13 #define SIGALRM 14 /* 15 is used for SIGTERM on windows */ @@ -287,7 +286,7 @@ typedef unsigned int uint; #define SIGWINCH 30 #define SIGIO 31 -/* Note: +/* Note: * The two defines below are likely to break the orte_wait * functionality. The proper method of replacing these bits * of functionality is left for further investigated. @@ -298,8 +297,8 @@ typedef unsigned int uint; #define sigset_t int #define in_addr_t uint32_t -/* Need to define _Bool here for different version of VS. - The definition in opal_config_bottom.h won't help, +/* Need to define _Bool here for different version of VS. + The definition in opal_config_bottom.h won't help, as long as we have a mixed C and C++ projects in one solution. */ #if defined(_MSC_VER) && _MSC_VER < 1600 #define _Bool BOOL diff --git a/opal/win_makefile b/opal/win_makefile index c1f2fe6c840..c86ff8e7555 100644 --- a/opal/win_makefile +++ b/opal/win_makefile @@ -6,14 +6,14 @@ # Copyright (c) 2004-2005 The University of Tennessee and The University # of Tennessee Research Foundation. All rights # reserved. -# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, # University of Stuttgart. All rights reserved. # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. # $COPYRIGHT$ -# +# # Additional copyrights may follow -# +# # $HEADER$ # # This is a simple makefile for windows which makes all the components @@ -85,13 +85,13 @@ C_SUBDIRS = \ mca/schema/base \ win32/generated_source \ win \ - win32 + win32 -EXTRA_DIRS = +EXTRA_DIRS = # # Files included by the dist that we don't want to compile -# +# EXCLUDE_FILES = dt_arch.c \ epoll.c \ epoll_sub.c \ @@ -187,7 +187,7 @@ ADDLIBS = \ oleaut32.lib \ uuid.lib \ odbc32.lib \ - odbccp32.lib + odbccp32.lib .c.obj: $(CXX) $(CXXFLAGS) $*.c @@ -231,9 +231,9 @@ install: @install -p ${topdir}/src/libmpi.dll ${installdir}/lib/libmpi.dll @install -d ${topdir}/include ${installdir}/include/ @install -p ${topdir}/include/ompi_config_bottom.h \ - ${installdir}/include/ompi_config_bottom.h + ${installdir}/include/ompi_config_bottom.h @install -p ${topdir}/src/win32/generated_include/opal_config.h \ - ${installdir}/include/opal_config.h + ${installdir}/include/opal_config.h @install -p ${topdir}/include/mpi.h ${installdir}/include/mpi.h @install -p ${topdir}/include/ompi_stdint.h ${installdir}/include/ompi_stdint.h @install -d ${topdir}/src/win32 ${installdir}/include/win32 diff --git a/orte/Doxyfile b/orte/Doxyfile index 6ac44c8263e..f992c6096cf 100644 --- a/orte/Doxyfile +++ b/orte/Doxyfile @@ -401,7 +401,7 @@ RECURSIVE = YES # excluded from the INPUT source files. This way you can easily exclude a # subdirectory from a directory tree whose root is specified with the INPUT tag. -EXCLUDE = +EXCLUDE = # The EXCLUDE_SYMLINKS tag can be used select whether or not files or # directories that are symbolic links (a Unix filesystem feature) are diff --git a/orte/Makefile.am b/orte/Makefile.am index 2bcde23d682..6af81a22e39 100644 --- a/orte/Makefile.am +++ b/orte/Makefile.am @@ -5,7 +5,7 @@ # Copyright (c) 2004-2005 The University of Tennessee and The University # of Tennessee Research Foundation. All rights # reserved. -# Copyright (c) 2004-2009 High Performance Computing Center Stuttgart, +# Copyright (c) 2004-2009 High Performance Computing Center Stuttgart, # University of Stuttgart. All rights reserved. # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. @@ -13,9 +13,9 @@ # Copyright (c) 2015 Los Alamos National Security, LLC. All rights # reserved. # $COPYRIGHT$ -# +# # Additional copyrights may follow -# +# # $HEADER$ # diff --git a/orte/etc/Makefile.am b/orte/etc/Makefile.am index e657449169e..7bfaa3d648e 100644 --- a/orte/etc/Makefile.am +++ b/orte/etc/Makefile.am @@ -5,15 +5,15 @@ # Copyright (c) 2004-2005 The University of Tennessee and The University # of Tennessee Research Foundation. All rights # reserved. -# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, # University of Stuttgart. All rights reserved. # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. # Copyright (c) 2008 Cisco Systems, Inc. All rights reserved. # $COPYRIGHT$ -# +# # Additional copyrights may follow -# +# # $HEADER$ # @@ -46,7 +46,7 @@ install-data-local: done # Only remove if exactly the same as what in our tree -# NOTE TO READER: Bourne shell if ... fi evaluates the body if +# NOTE TO READER: Bourne shell if ... fi evaluates the body if # the return of the evaluted command is 0 (as opposed to non-zero # as used by everyone else) uninstall-local: diff --git a/orte/etc/openmpi-default-hostfile b/orte/etc/openmpi-default-hostfile index 0051d155e40..ad5a0f79392 100644 --- a/orte/etc/openmpi-default-hostfile +++ b/orte/etc/openmpi-default-hostfile @@ -5,21 +5,21 @@ # Copyright (c) 2004-2005 The University of Tennessee and The University # of Tennessee Research Foundation. All rights # reserved. -# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, # University of Stuttgart. All rights reserved. # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. # $COPYRIGHT$ -# +# # Additional copyrights may follow -# +# # $HEADER$ # # This is the default hostfile for Open MPI. Notice that it does not # contain any hosts (not even localhost). This file should only # contain hosts if a system administrator wants users to always have # the same set of default hosts, and is not using a batch scheduler -# (such as SLURM, PBS, etc.). +# (such as SLURM, PBS, etc.). # # Note that this file is *not* used when running in "managed" # environments (e.g., running in a job under a job scheduler, such as @@ -32,5 +32,5 @@ # components were able to find any hosts to run on (this behavior can # be disabled by excluding the localhost RAS component by specifying # the value "^localhost" [without the quotes] to the "ras" MCA -# parameter). +# parameter). diff --git a/orte/include/Makefile.am b/orte/include/Makefile.am index 1ce844d8628..1145edd6383 100644 --- a/orte/include/Makefile.am +++ b/orte/include/Makefile.am @@ -6,27 +6,27 @@ # Copyright (c) 2004-2005 The University of Tennessee and The University # of Tennessee Research Foundation. All rights # reserved. -# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, # University of Stuttgart. All rights reserved. # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. # Copyright (c) 2010 Cisco Systems, Inc. All rights reserved. # Copyright (c) 2014 Intel, Inc. All rights reserved. # $COPYRIGHT$ -# +# # Additional copyrights may follow -# +# # $HEADER$ # headers = orte_config.h -nodist_headers = +nodist_headers = include orte/Makefile.am EXTRA_DIST = $(headers) -if WANT_INSTALL_HEADERS +if WANT_INSTALL_HEADERS ortedir = $(orteincludedir) nobase_dist_orte_HEADERS = $(headers) nobase_nodist_orte_HEADERS = $(nodist_headers) diff --git a/orte/include/orte/Makefile.am b/orte/include/orte/Makefile.am index 021c02f419d..26d995f64d4 100644 --- a/orte/include/orte/Makefile.am +++ b/orte/include/orte/Makefile.am @@ -5,14 +5,14 @@ # Copyright (c) 2004-2005 The University of Tennessee and The University # of Tennessee Research Foundation. All rights # reserved. -# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, # University of Stuttgart. All rights reserved. # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. # $COPYRIGHT$ -# +# # Additional copyrights may follow -# +# # $HEADER$ # diff --git a/orte/include/orte/constants.h b/orte/include/orte/constants.h index eb9e7ac7eea..9d0fa2b77e8 100644 --- a/orte/include/orte/constants.h +++ b/orte/include/orte/constants.h @@ -11,7 +11,6 @@ * All rights reserved. * Copyright (c) 2014 Research Organization for Information Science * and Technology (RIST). All rights reserved. - * Copyright (c) 2015 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -134,19 +133,7 @@ enum { ORTE_ERR_SENSOR_LIMIT_EXCEEDED = (ORTE_ERR_BASE - 42), ORTE_ERR_ALLOCATION_PENDING = (ORTE_ERR_BASE - 43), ORTE_ERR_NO_PATH_TO_TARGET = (ORTE_ERR_BASE - 44), - ORTE_ERR_OP_IN_PROGRESS = (ORTE_ERR_BASE - 45), - ORTE_ERR_OPEN_CHANNEL_PEER_FAIL = (ORTE_ERR_BASE - 46), - ORTE_ERR_OPEN_CHANNEL_PEER_REJECT = (ORTE_ERR_BASE - 47), - ORTE_ERR_QOS_TYPE_UNSUPPORTED = (ORTE_ERR_BASE - 48), - ORTE_ERR_QOS_ACK_WINDOW_FULL = (ORTE_ERR_BASE - 49), - ORTE_ERR_ACK_TIMEOUT_SENDER = (ORTE_ERR_BASE - 50), - ORTE_ERR_ACK_TIMEOUT_RECEIVER = (ORTE_ERR_BASE - 51), - ORTE_ERR_LOST_MSG_IN_WINDOW = (ORTE_ERR_BASE - 52), - ORTE_ERR_CHANNEL_BUSY = (ORTE_ERR_BASE - 53), - ORTE_ERR_DUPLICATE_MSG = (ORTE_ERR_BASE - 54), - ORTE_ERR_OUT_OF_ORDER_MSG = (ORTE_ERR_BASE - 55), - ORTE_ERR_OPEN_CHANNEL_DUPLICATE = (ORTE_ERR_BASE - 56), - ORTE_ERR_FORCE_SELECT = (ORTE_ERR_BASE - 57) + ORTE_ERR_OP_IN_PROGRESS = (ORTE_ERR_BASE - 45) }; #define ORTE_ERR_MAX (ORTE_ERR_BASE - 100) diff --git a/orte/include/orte/types.h b/orte/include/orte/types.h index e3fcb0d9d32..2f9306c33b1 100644 --- a/orte/include/orte/types.h +++ b/orte/include/orte/types.h @@ -143,6 +143,7 @@ typedef struct { void *ptr; // never packed or passed anywhere orte_vpid_t vpid; orte_jobid_t jobid; + opal_process_name_t name; } data; } orte_attribute_t; OPAL_DECLSPEC OBJ_CLASS_DECLARATION(orte_attribute_t); diff --git a/orte/include/orte/version.h.in b/orte/include/orte/version.h.in index b9b373e7371..09e489121cd 100644 --- a/orte/include/orte/version.h.in +++ b/orte/include/orte/version.h.in @@ -5,15 +5,15 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2011 Cisco Systems, Inc. All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ * * This file should be included by any file that needs full diff --git a/orte/mca/Makefile.am b/orte/mca/Makefile.am index 8d3fca3fb4f..89dcb0f652b 100644 --- a/orte/mca/Makefile.am +++ b/orte/mca/Makefile.am @@ -5,14 +5,14 @@ # Copyright (c) 2004-2005 The University of Tennessee and The University # of Tennessee Research Foundation. All rights # reserved. -# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, # University of Stuttgart. All rights reserved. # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. # $COPYRIGHT$ -# +# # Additional copyrights may follow -# +# # $HEADER$ # diff --git a/orte/mca/common/Makefile.am b/orte/mca/common/Makefile.am index 33bbb5f2a33..4567c654307 100644 --- a/orte/mca/common/Makefile.am +++ b/orte/mca/common/Makefile.am @@ -5,15 +5,15 @@ # Copyright (c) 2004-2005 The University of Tennessee and The University # of Tennessee Research Foundation. All rights # reserved. -# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, # University of Stuttgart. All rights reserved. # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. # Copyright (c) 2010 Cisco Systems, Inc. All rights reserved. # $COPYRIGHT$ -# +# # Additional copyrights may follow -# +# # $HEADER$ # diff --git a/orte/mca/common/alps/Makefile.am b/orte/mca/common/alps/Makefile.am index f7e8be173d6..e01aa93436a 100644 --- a/orte/mca/common/alps/Makefile.am +++ b/orte/mca/common/alps/Makefile.am @@ -1,11 +1,11 @@ # # Copyright (c) 2009-2012 Mellanox Technologies. All rights reserved. # Copyright (c) 2009-2012 Oak Ridge National Laboratory. All rights reserved. -# Copyright (c) 2012-2014 Cisco Systems, Inc. All rights reserved. +# Copyright (c) 2012-2015 Cisco Systems, Inc. All rights reserved. # $COPYRIGHT$ -# +# # Additional copyrights may follow -# +# # $HEADER$ # @@ -15,7 +15,7 @@ headers = \ common_alps.h sources = \ - common_alps.c + common_alps.c lib_LTLIBRARIES = @@ -32,7 +32,7 @@ endif lib@ORTE_LIB_PREFIX@mca_common_alps_la_SOURCES = $(headers) $(sources) lib@ORTE_LIB_PREFIX@mca_common_alps_la_CPPFLAGS = $(common_alps_CPPFLAGS) lib@ORTE_LIB_PREFIX@mca_common_alps_la_LDFLAGS = \ - -version-info $(libmca_common_alps_so_version) \ + -version-info $(libmca_orte_common_alps_so_version) \ $(common_alps_LDFLAGS) lib@ORTE_LIB_PREFIX@mca_common_alps_la_LIBADD = $(common_alps_LIBS) lib@ORTE_LIB_PREFIX@mca_common_alps_noinst_la_SOURCES = $(headers) $(sources) diff --git a/orte/mca/common/alps/common_alps.c b/orte/mca/common/alps/common_alps.c index 6fd77ad0545..9c34da803a4 100644 --- a/orte/mca/common/alps/common_alps.c +++ b/orte/mca/common/alps/common_alps.c @@ -5,16 +5,16 @@ * Copyright (c) 2004-2006 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2014 Los Alamos National Security, LLC. All rights * reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ * * These symbols are in a file by themselves to provide nice linker @@ -57,7 +57,7 @@ int orte_common_alps_proc_in_pagg(bool *flag) snprintf(task_is_app_fname,sizeof(task_is_app_fname), "/proc/self/task/%ld/task_is_app",syscall(SYS_gettid)); fd_task_is_app = fopen(task_is_app_fname, "r"); - if (fd_task_is_app != NULL) { /* okay we're in a PAGG container, + if (fd_task_is_app != NULL) { /* okay we're in a PAGG container, and we are an app task (not just a process running on a mom node, for example), */ *flag = 1; diff --git a/orte/mca/common/alps/common_alps.h b/orte/mca/common/alps/common_alps.h index 21333f733d7..701dc165b11 100644 --- a/orte/mca/common/alps/common_alps.h +++ b/orte/mca/common/alps/common_alps.h @@ -7,9 +7,9 @@ * of Tennessee Research Foundation. All rights * reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ diff --git a/orte/mca/common/alps/configure.m4 b/orte/mca/common/alps/configure.m4 index 519dd23b0dd..64f83199426 100644 --- a/orte/mca/common/alps/configure.m4 +++ b/orte/mca/common/alps/configure.m4 @@ -22,7 +22,7 @@ # $HEADER$ # -# MCA_orte_common_alps_CONFIG([action-if-can-compile], +# MCA_orte_common_alps_CONFIG([action-if-can-compile], # [action-if-cant-compile]) # ------------------------------------------------ AC_DEFUN([MCA_orte_common_alps_CONFIG],[ diff --git a/orte/mca/dfs/Makefile.am b/orte/mca/dfs/Makefile.am index 754c339692a..c374dfcff8a 100644 --- a/orte/mca/dfs/Makefile.am +++ b/orte/mca/dfs/Makefile.am @@ -1,9 +1,9 @@ # # Copyright (c) 2012 Los Alamos National Security, LLC. All rights reserved. # $COPYRIGHT$ -# +# # Additional copyrights may follow -# +# # $HEADER$ # diff --git a/orte/mca/dfs/app/Makefile.am b/orte/mca/dfs/app/Makefile.am index 9e35fa68091..c146d483fe9 100644 --- a/orte/mca/dfs/app/Makefile.am +++ b/orte/mca/dfs/app/Makefile.am @@ -1,9 +1,9 @@ # # Copyright (c) 2012 Los Alamos National Security, LLC. All rights reserved. # $COPYRIGHT$ -# +# # Additional copyrights may follow -# +# # $HEADER$ # diff --git a/orte/mca/dfs/app/dfs_app.c b/orte/mca/dfs/app/dfs_app.c index 3bd69303b0d..81a1f4be272 100644 --- a/orte/mca/dfs/app/dfs_app.c +++ b/orte/mca/dfs/app/dfs_app.c @@ -5,9 +5,9 @@ * Copyright (c) 2014 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -17,9 +17,7 @@ #ifdef HAVE_UNISTD_H #include #endif /* HAVE_UNISTD_H */ -#ifdef HAVE_STRING_H #include -#endif #ifdef HAVE_FCNTL_H #include #endif @@ -29,7 +27,7 @@ #include "opal/util/output.h" #include "opal/util/uri.h" #include "opal/dss/dss.h" -#include "opal/mca/dstore/dstore.h" +#include "opal/mca/pmix/pmix.h" #include "orte/util/error_strings.h" #include "orte/util/name_fns.h" @@ -506,9 +504,9 @@ static void process_opens(int fd, short args, void *cbdata) opal_buffer_t *buffer; char *scheme, *host, *filename; orte_process_name_t daemon; - opal_list_t myvals; - opal_value_t *kv; - + opal_list_t lt; + opal_namelist_t *nm; + /* get the scheme to determine if we can process locally or not */ if (NULL == (scheme = opal_uri_get_scheme(dfs->uri))) { ORTE_ERROR_LOG(ORTE_ERR_BAD_PARAM); @@ -555,17 +553,15 @@ static void process_opens(int fd, short args, void *cbdata) opal_output_verbose(1, orte_dfs_base_framework.framework_output, "%s looking for daemon on host %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), host); - OBJ_CONSTRUCT(&myvals, opal_list_t); - if (ORTE_SUCCESS != (rc = opal_dstore.fetch(opal_dstore_internal, - ORTE_NAME_WILDCARD, - host, &myvals))) { + OBJ_CONSTRUCT(<, opal_list_t); + if (ORTE_SUCCESS != (rc = opal_pmix.resolve_peers(host, daemon.jobid, <))) { ORTE_ERROR_LOG(rc); - OPAL_LIST_DESTRUCT(&myvals); + OBJ_DESTRUCT(<); goto complete; } - kv = (opal_value_t*)opal_list_get_first(&myvals); - daemon.vpid = kv->data.uint32; - OPAL_LIST_DESTRUCT(&myvals); + nm = (opal_namelist_t*)opal_list_get_first(<); + daemon.vpid = nm->name.vpid; + OPAL_LIST_DESTRUCT(<); opal_output_verbose(1, orte_dfs_base_framework.framework_output, "%s file %s on host %s daemon %s", @@ -612,7 +608,7 @@ static void process_opens(int fd, short args, void *cbdata) opal_list_remove_item(&requests, &dfs->super); goto complete; } - + opal_output_verbose(1, orte_dfs_base_framework.framework_output, "%s sending open file request to %s file %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), @@ -718,7 +714,7 @@ static void process_close(int fd, short args, void *cbdata) ORTE_ERROR_LOG(rc); goto complete; } - + opal_output_verbose(1, orte_dfs_base_framework.framework_output, "%s sending close file request to %s for fd %d", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), @@ -1106,7 +1102,7 @@ static void process_reads(int fd, short args, void *cbdata) ORTE_ERROR_LOG(rc); goto complete; } - + opal_output_verbose(1, orte_dfs_base_framework.framework_output, "%s sending read file request to %s for fd %d", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), diff --git a/orte/mca/dfs/app/dfs_app.h b/orte/mca/dfs/app/dfs_app.h index 027ddb9c268..fef69fdf582 100644 --- a/orte/mca/dfs/app/dfs_app.h +++ b/orte/mca/dfs/app/dfs_app.h @@ -2,15 +2,15 @@ * Copyright (c) 2012 Los Alamos National Security, LLC. All rights reserved. * * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ /** * @file - * + * */ #ifndef MCA_dfs_app_EXPORT_H diff --git a/orte/mca/dfs/app/dfs_app_component.c b/orte/mca/dfs/app/dfs_app_component.c index 405288954e4..395c98da022 100644 --- a/orte/mca/dfs/app/dfs_app_component.c +++ b/orte/mca/dfs/app/dfs_app_component.c @@ -4,9 +4,9 @@ * reserved. * * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -22,7 +22,7 @@ /* * Public string for version number */ -const char *orte_dfs_app_component_version_string = +const char *orte_dfs_app_component_version_string = "ORTE DFS app MCA component version " ORTE_VERSION; /* @@ -38,7 +38,7 @@ static int dfs_app_component_query(mca_base_module_t **module, int *priority); */ orte_dfs_base_component_t mca_dfs_app_component = { - /* Handle the general mca_component_t struct containing + /* Handle the general mca_component_t struct containing * meta information about the component */ .base_version = { @@ -59,7 +59,7 @@ orte_dfs_base_component_t mca_dfs_app_component = }, }; -static int dfs_app_open(void) +static int dfs_app_open(void) { return ORTE_SUCCESS; } @@ -75,9 +75,9 @@ static int dfs_app_component_query(mca_base_module_t **module, int *priority) /* set our priority high as we are the default for apps */ *priority = 1000; *module = (mca_base_module_t *)&orte_dfs_app_module; - return ORTE_SUCCESS; + return ORTE_SUCCESS; } - + *priority = -1; *module = NULL; return ORTE_ERROR; diff --git a/orte/mca/dfs/app/owner.txt b/orte/mca/dfs/app/owner.txt index 55663d3bb8a..4ad6f408ca3 100644 --- a/orte/mca/dfs/app/owner.txt +++ b/orte/mca/dfs/app/owner.txt @@ -3,5 +3,5 @@ # owner: institution that is responsible for this package # status: e.g. active, maintenance, unmaintained # -owner: ? -status: ? +owner: INTEL +status: maintenance diff --git a/orte/mca/dfs/base/Makefile.am b/orte/mca/dfs/base/Makefile.am index 6d44178d3cf..eb036387189 100644 --- a/orte/mca/dfs/base/Makefile.am +++ b/orte/mca/dfs/base/Makefile.am @@ -1,9 +1,9 @@ # # Copyright (c) 2012-2013 Los Alamos National Security, LLC. All rights reserved. # $COPYRIGHT$ -# +# # Additional copyrights may follow -# +# # $HEADER$ # diff --git a/orte/mca/dfs/base/base.h b/orte/mca/dfs/base/base.h index 991f3e3cd06..cca2e8909d3 100644 --- a/orte/mca/dfs/base/base.h +++ b/orte/mca/dfs/base/base.h @@ -1,9 +1,9 @@ /* - * Copyright (c) 2012-2013 Los Alamos National Security, Inc. All rights reserved. + * Copyright (c) 2012-2013 Los Alamos National Security, Inc. All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ /** @file: diff --git a/orte/mca/dfs/base/dfs_base_frame.c b/orte/mca/dfs/base/dfs_base_frame.c index 8b88fb3c9d0..77ce6171423 100644 --- a/orte/mca/dfs/base/dfs_base_frame.c +++ b/orte/mca/dfs/base/dfs_base_frame.c @@ -1,12 +1,12 @@ /* - * Copyright (c) 2012-2013 Los Alamos National Security, Inc. All rights reserved. + * Copyright (c) 2012-2013 Los Alamos National Security, Inc. All rights reserved. * Copyright (c) 2013 Intel, Inc. All rights reserved * Copyright (c) 2014 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -14,9 +14,7 @@ #include "orte_config.h" #include "orte/constants.h" -#ifdef HAVE_STRING_H #include -#endif #ifdef HAVE_UNISTD_H #include #endif diff --git a/orte/mca/dfs/base/dfs_base_select.c b/orte/mca/dfs/base/dfs_base_select.c index 16484b65350..bf0a7c2d678 100644 --- a/orte/mca/dfs/base/dfs_base_select.c +++ b/orte/mca/dfs/base/dfs_base_select.c @@ -1,9 +1,11 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ /* - * Copyright (c) 2012 Los Alamos National Security, Inc. All rights reserved. + * Copyright (c) 2012-2015 Los Alamos National Security, Inc. All rights + * reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -11,9 +13,7 @@ #include "orte_config.h" #include "orte/constants.h" -#ifdef HAVE_STRING_H #include -#endif #include "orte/mca/mca.h" #include "opal/mca/base/base.h" @@ -33,7 +33,7 @@ int orte_dfs_base_select(void) if (OPAL_SUCCESS != mca_base_select("dfs", orte_dfs_base_framework.framework_output, &orte_dfs_base_framework.framework_components, (mca_base_module_t **) &best_module, - (mca_base_component_t **) &best_component)) { + (mca_base_component_t **) &best_component, NULL)) { /* This will only happen if no component was selected, which * is okay - we don't have to select anything */ diff --git a/orte/mca/dfs/base/owner.txt b/orte/mca/dfs/base/owner.txt index 55663d3bb8a..4ad6f408ca3 100644 --- a/orte/mca/dfs/base/owner.txt +++ b/orte/mca/dfs/base/owner.txt @@ -3,5 +3,5 @@ # owner: institution that is responsible for this package # status: e.g. active, maintenance, unmaintained # -owner: ? -status: ? +owner: INTEL +status: maintenance diff --git a/orte/mca/dfs/dfs.h b/orte/mca/dfs/dfs.h index 7f2b6320a23..136c0d76b67 100644 --- a/orte/mca/dfs/dfs.h +++ b/orte/mca/dfs/dfs.h @@ -114,7 +114,7 @@ typedef void (*orte_dfs_base_module_post_file_map_fn_t)(opal_buffer_t *buf, void *cbdata); /* Get the file map for a process - * + * * Returns the file map associated with the specified process name. If * NULL is provided, then all known process maps will be returned in the * byte object. It is the responsibility of the caller to unpack it, so diff --git a/orte/mca/dfs/orted/Makefile.am b/orte/mca/dfs/orted/Makefile.am index 8f539db62b2..e33445e9f4b 100644 --- a/orte/mca/dfs/orted/Makefile.am +++ b/orte/mca/dfs/orted/Makefile.am @@ -1,9 +1,9 @@ # # Copyright (c) 2012 Los Alamos National Security, LLC. All rights reserved. # $COPYRIGHT$ -# +# # Additional copyrights may follow -# +# # $HEADER$ # diff --git a/orte/mca/dfs/orted/dfs_orted.c b/orte/mca/dfs/orted/dfs_orted.c index c49ffe415b5..574db2fa005 100644 --- a/orte/mca/dfs/orted/dfs_orted.c +++ b/orte/mca/dfs/orted/dfs_orted.c @@ -6,9 +6,9 @@ * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -18,9 +18,7 @@ #ifdef HAVE_UNISTD_H #include #endif /* HAVE_UNISTD_H */ -#ifdef HAVE_STRING_H #include -#endif #ifdef HAVE_FCNTL_H #include #endif @@ -406,7 +404,7 @@ static void process_opens(int fd, short args, void *cbdata) opal_list_remove_item(&requests, &dfs->super); goto complete; } - + opal_output_verbose(1, orte_dfs_base_framework.framework_output, "%s sending open file request to %s file %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), @@ -520,7 +518,7 @@ static void process_close(int fd, short args, void *cbdata) ORTE_ERROR_LOG(rc); goto complete; } - + opal_output_verbose(1, orte_dfs_base_framework.framework_output, "%s sending close file request to %s for fd %d", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), @@ -883,7 +881,7 @@ static void process_reads(int fd, short args, void *cbdata) ORTE_ERROR_LOG(rc); goto complete; } - + opal_output_verbose(1, orte_dfs_base_framework.framework_output, "%s sending read file request to %s for fd %d", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), diff --git a/orte/mca/dfs/orted/dfs_orted.h b/orte/mca/dfs/orted/dfs_orted.h index f90e87b14ee..b2b2f440964 100644 --- a/orte/mca/dfs/orted/dfs_orted.h +++ b/orte/mca/dfs/orted/dfs_orted.h @@ -3,15 +3,15 @@ * All rights reserved. * * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ /** * @file - * + * */ #ifndef MCA_dfs_orted_EXPORT_H diff --git a/orte/mca/dfs/orted/dfs_orted_component.c b/orte/mca/dfs/orted/dfs_orted_component.c index f9ac8b178c3..f102b898b15 100644 --- a/orte/mca/dfs/orted/dfs_orted_component.c +++ b/orte/mca/dfs/orted/dfs_orted_component.c @@ -4,9 +4,9 @@ * reserved. * * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -22,7 +22,7 @@ /* * Public string for version number */ -const char *orte_dfs_orted_component_version_string = +const char *orte_dfs_orted_component_version_string = "ORTE DFS orted MCA component version " ORTE_VERSION; int orte_dfs_orted_num_worker_threads = 0; @@ -41,7 +41,7 @@ static int dfs_orted_component_query(mca_base_module_t **module, int *priority); */ orte_dfs_base_component_t mca_dfs_orted_component = { - /* Handle the general mca_component_t struct containing + /* Handle the general mca_component_t struct containing * meta information about the component itdefault_orted */ .base_version = { @@ -75,7 +75,7 @@ static int dfs_orted_register(void) return ORTE_SUCCESS; } -static int dfs_orted_open(void) +static int dfs_orted_open(void) { return ORTE_SUCCESS; } @@ -91,9 +91,9 @@ static int dfs_orted_component_query(mca_base_module_t **module, int *priority) /* we are the default component for daemons and HNP */ *priority = 1000; *module = (mca_base_module_t *)&orte_dfs_orted_module; - return ORTE_SUCCESS; + return ORTE_SUCCESS; } - + *priority = -1; *module = NULL; return ORTE_ERROR; diff --git a/orte/mca/dfs/orted/owner.txt b/orte/mca/dfs/orted/owner.txt index 55663d3bb8a..4ad6f408ca3 100644 --- a/orte/mca/dfs/orted/owner.txt +++ b/orte/mca/dfs/orted/owner.txt @@ -3,5 +3,5 @@ # owner: institution that is responsible for this package # status: e.g. active, maintenance, unmaintained # -owner: ? -status: ? +owner: INTEL +status: maintenance diff --git a/orte/mca/dfs/test/Makefile.am b/orte/mca/dfs/test/Makefile.am index b73680ec297..965483fa862 100644 --- a/orte/mca/dfs/test/Makefile.am +++ b/orte/mca/dfs/test/Makefile.am @@ -1,9 +1,9 @@ # # Copyright (c) 2012 Los Alamos National Security, LLC. All rights reserved. # $COPYRIGHT$ -# +# # Additional copyrights may follow -# +# # $HEADER$ # diff --git a/orte/mca/dfs/test/dfs_test.c b/orte/mca/dfs/test/dfs_test.c index f699a4a3741..0ebab3f56cf 100644 --- a/orte/mca/dfs/test/dfs_test.c +++ b/orte/mca/dfs/test/dfs_test.c @@ -5,9 +5,9 @@ * Copyright (c) 2014-2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -17,9 +17,7 @@ #ifdef HAVE_UNISTD_H #include #endif /* HAVE_UNISTD_H */ -#ifdef HAVE_STRING_H #include -#endif #ifdef HAVE_FCNTL_H #include #endif @@ -29,7 +27,7 @@ #include "opal/util/output.h" #include "opal/util/uri.h" #include "opal/dss/dss.h" -#include "opal/mca/dstore/dstore.h" +#include "opal/mca/pmix/pmix.h" #include "orte/util/error_strings.h" #include "orte/util/name_fns.h" @@ -448,14 +446,12 @@ static void process_opens(int fd, short args, void *cbdata) opal_buffer_t *buffer; char *scheme, *host=NULL, *filename=NULL; orte_process_name_t daemon; - bool found; - orte_vpid_t v; - opal_list_t myvals; - opal_value_t *kv; - + opal_list_t lt; + opal_namelist_t *nm; + opal_output_verbose(1, orte_dfs_base_framework.framework_output, "%s PROCESSING OPEN", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)); - + /* get the scheme to determine if we can process locally or not */ if (NULL == (scheme = opal_uri_get_scheme(dfs->uri))) { ORTE_ERROR_LOG(ORTE_ERR_BAD_PARAM); @@ -485,33 +481,16 @@ static void process_opens(int fd, short args, void *cbdata) /* ident the daemon on that host */ daemon.jobid = ORTE_PROC_MY_DAEMON->jobid; - found = false; - for (v=0; v < orte_process_info.num_daemons; v++) { - char *hostname; - daemon.vpid = v; - /* fetch the hostname where this daemon is located */ - OBJ_CONSTRUCT(&myvals, opal_list_t); - if (ORTE_SUCCESS != (rc = opal_dstore.fetch(opal_dstore_internal, - &daemon, - OPAL_DSTORE_HOSTNAME, &myvals))) { - ORTE_ERROR_LOG(rc); - OPAL_LIST_DESTRUCT(&myvals); - goto complete; - } - kv = (opal_value_t*)opal_list_get_first(&myvals); - hostname = kv->data.string; - OPAL_LIST_DESTRUCT(&myvals); - opal_output_verbose(1, orte_dfs_base_framework.framework_output, - "%s GOT HOST %s HOSTNAME %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), host, hostname); - if (0 == strcmp(host, hostname)) { - found = true; - break; - } - } - if (!found) { - ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND); + OBJ_CONSTRUCT(<, opal_list_t); + if (ORTE_SUCCESS != (rc = opal_pmix.resolve_peers(host, daemon.jobid, <))) { + ORTE_ERROR_LOG(rc); + OBJ_DESTRUCT(<); goto complete; } + nm = (opal_namelist_t*)opal_list_get_first(<); + daemon.vpid = nm->name.vpid; + OPAL_LIST_DESTRUCT(<); + opal_output_verbose(1, orte_dfs_base_framework.framework_output, "%s file %s on host %s daemon %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), @@ -543,7 +522,7 @@ static void process_opens(int fd, short args, void *cbdata) opal_list_remove_item(&requests, &dfs->super); goto complete; } - + opal_output_verbose(1, orte_dfs_base_framework.framework_output, "%s sending open file request to %s file %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), @@ -651,7 +630,7 @@ static void process_close(int fd, short args, void *cbdata) ORTE_ERROR_LOG(rc); goto complete; } - + opal_output_verbose(1, orte_dfs_base_framework.framework_output, "%s sending close file request to %s for fd %d", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), @@ -957,7 +936,7 @@ static void process_reads(int fd, short args, void *cbdata) ORTE_ERROR_LOG(rc); goto complete; } - + opal_output_verbose(1, orte_dfs_base_framework.framework_output, "%s sending read file request to %s for fd %d", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), diff --git a/orte/mca/dfs/test/dfs_test.h b/orte/mca/dfs/test/dfs_test.h index 5b6d4235303..d9ef7b301bb 100644 --- a/orte/mca/dfs/test/dfs_test.h +++ b/orte/mca/dfs/test/dfs_test.h @@ -2,15 +2,15 @@ * Copyright (c) 2012 Los Alamos National Security, LLC. All rights reserved. * * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ /** * @file - * + * */ #ifndef MCA_dfs_test_EXPORT_H diff --git a/orte/mca/dfs/test/dfs_test_component.c b/orte/mca/dfs/test/dfs_test_component.c index fda025e504e..11ec09ced4e 100644 --- a/orte/mca/dfs/test/dfs_test_component.c +++ b/orte/mca/dfs/test/dfs_test_component.c @@ -4,9 +4,9 @@ * reserved. * * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -22,7 +22,7 @@ /* * Public string for version number */ -const char *orte_dfs_test_component_version_string = +const char *orte_dfs_test_component_version_string = "ORTE DFS test MCA component version " ORTE_VERSION; /* @@ -39,7 +39,7 @@ static int dfs_test_component_query(mca_base_module_t **module, int *priority); */ orte_dfs_base_component_t mca_dfs_test_component = { - /* Handle the general mca_component_t struct containing + /* Handle the general mca_component_t struct containing * meta information about the component */ .base_version = { @@ -74,8 +74,8 @@ static int dfs_test_register(void) return ORTE_SUCCESS; } - -static int dfs_test_open(void) + +static int dfs_test_open(void) { return ORTE_SUCCESS; } @@ -91,9 +91,9 @@ static int dfs_test_component_query(mca_base_module_t **module, int *priority) /* set our priority high so apps use us */ *priority = 10000; *module = (mca_base_module_t *)&orte_dfs_test_module; - return ORTE_SUCCESS; + return ORTE_SUCCESS; } - + *priority = -1; *module = NULL; return ORTE_ERROR; diff --git a/orte/mca/errmgr/Makefile.am b/orte/mca/errmgr/Makefile.am index c13982069e0..499b67d20a2 100644 --- a/orte/mca/errmgr/Makefile.am +++ b/orte/mca/errmgr/Makefile.am @@ -5,15 +5,15 @@ # Copyright (c) 2004-2005 The University of Tennessee and The University # of Tennessee Research Foundation. All rights # reserved. -# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, # University of Stuttgart. All rights reserved. # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. # Copyright (c) 2010 Cisco Systems, Inc. All rights reserved. # $COPYRIGHT$ -# +# # Additional copyrights may follow -# +# # $HEADER$ # diff --git a/orte/mca/errmgr/base/Makefile.am b/orte/mca/errmgr/base/Makefile.am index 0ace6905bda..4d3d5f8e1c6 100644 --- a/orte/mca/errmgr/base/Makefile.am +++ b/orte/mca/errmgr/base/Makefile.am @@ -5,15 +5,15 @@ # Copyright (c) 2004-2005 The University of Tennessee and The University # of Tennessee Research Foundation. All rights # reserved. -# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, # University of Stuttgart. All rights reserved. # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. -# Copyright (c) 2012-2013 Los Alamos National Security, Inc. All rights reserved. +# Copyright (c) 2012-2013 Los Alamos National Security, Inc. All rights reserved. # $COPYRIGHT$ -# +# # Additional copyrights may follow -# +# # $HEADER$ # @@ -26,5 +26,4 @@ headers += \ libmca_errmgr_la_SOURCES += \ base/errmgr_base_select.c \ base/errmgr_base_frame.c \ - base/errmgr_base_fns.c \ - base/errmgr_base_tool.c + base/errmgr_base_fns.c diff --git a/orte/mca/errmgr/base/base.h b/orte/mca/errmgr/base/base.h index 1b36e03d257..6f537d77dbe 100644 --- a/orte/mca/errmgr/base/base.h +++ b/orte/mca/errmgr/base/base.h @@ -5,15 +5,15 @@ * Copyright (c) 2004-2011 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. - * Copyright (c) 2012-2013 Los Alamos National Security, Inc. All rights reserved. + * Copyright (c) 2012-2015 Los Alamos National Security, Inc. All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ /** @file: @@ -31,7 +31,6 @@ #include "opal/class/opal_list.h" #include "orte/mca/mca.h" -#include "orte/mca/snapc/base/base.h" #include "orte/mca/errmgr/errmgr.h" @@ -44,55 +43,6 @@ ORTE_DECLSPEC extern mca_base_framework_t orte_errmgr_base_framework; /* select a component */ ORTE_DECLSPEC int orte_errmgr_base_select(void); -/** - * Interfaces for orte-migrate tool - */ -#if OPAL_ENABLE_FT_CR -/** - * Migrating States - */ -#define ORTE_ERRMGR_MIGRATE_STATE_ERROR (ORTE_SNAPC_CKPT_MAX + 1) -#define ORTE_ERRMGR_MIGRATE_STATE_ERR_INPROGRESS (ORTE_SNAPC_CKPT_MAX + 2) -#define ORTE_ERRMGR_MIGRATE_STATE_NONE (ORTE_SNAPC_CKPT_MAX + 3) -#define ORTE_ERRMGR_MIGRATE_STATE_REQUEST (ORTE_SNAPC_CKPT_MAX + 4) -#define ORTE_ERRMGR_MIGRATE_STATE_RUNNING (ORTE_SNAPC_CKPT_MAX + 5) -#define ORTE_ERRMGR_MIGRATE_STATE_RUN_CKPT (ORTE_SNAPC_CKPT_MAX + 6) -#define ORTE_ERRMGR_MIGRATE_STATE_STARTUP (ORTE_SNAPC_CKPT_MAX + 7) -#define ORTE_ERRMGR_MIGRATE_STATE_FINISH (ORTE_SNAPC_CKPT_MAX + 8) -#define ORTE_ERRMGR_MIGRATE_MAX (ORTE_SNAPC_CKPT_MAX + 9) - -/* - * Commands for command line tool and ErrMgr interaction - */ -typedef uint8_t orte_errmgr_tool_cmd_flag_t; -#define ORTE_ERRMGR_MIGRATE_TOOL_CMD OPAL_UINT8 -#define ORTE_ERRMGR_MIGRATE_TOOL_INIT_CMD 1 -#define ORTE_ERRMGR_MIGRATE_TOOL_UPDATE_CMD 2 - -/* Initialize/Finalize the orte-migrate communication functionality */ -ORTE_DECLSPEC int orte_errmgr_base_tool_init(void); -ORTE_DECLSPEC int orte_errmgr_base_tool_finalize(void); - -ORTE_DECLSPEC void orte_errmgr_base_migrate_state_notify(int state); -ORTE_DECLSPEC int orte_errmgr_base_migrate_state_str(char ** state_str, int state); - -ORTE_DECLSPEC int orte_errmgr_base_migrate_update(int status); - -/* - * Interfaces for C/R related recovery - */ -ORTE_DECLSPEC int orte_errmgr_base_update_app_context_for_cr_recovery(orte_job_t *jobdata, - orte_proc_t *proc, - opal_list_t *local_snapshots); - -ORTE_DECLSPEC int orte_errmgr_base_restart_job(orte_jobid_t jobid, char * global_handle, int seq_num); -ORTE_DECLSPEC int orte_errmgr_base_migrate_job(orte_jobid_t jobid, orte_snapc_base_request_op_t *datum); - -/* Interface to report process state to the notifier */ -ORTE_DECLSPEC void orte_errmgr_base_proc_state_notify(orte_proc_state_t state, orte_process_name_t *proc); - -#endif /* OPAL_ENABLE_FT_CR */ - END_C_DECLS #endif diff --git a/orte/mca/errmgr/base/errmgr_base_fns.c b/orte/mca/errmgr/base/errmgr_base_fns.c index e965d5756c0..902d34f6a2d 100644 --- a/orte/mca/errmgr/base/errmgr_base_fns.c +++ b/orte/mca/errmgr/base/errmgr_base_fns.c @@ -5,21 +5,21 @@ * Copyright (c) 2004-2011 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. - * Copyright (c) 2010 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2010 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2010-2011 Oak Ridge National Labs. All rights reserved. - * Copyright (c) 2011-2013 Los Alamos National Security, LLC. + * Copyright (c) 2011-2015 Los Alamos National Security, LLC. * All rights reserved. * Copyright (c) 2013-2014 Intel, Inc. All rights reserved * Copyright (c) 2014 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -27,9 +27,7 @@ #include "orte_config.h" #include "orte/constants.h" -#ifdef HAVE_STRING_H #include -#endif #if HAVE_SYS_TYPES_H #include #endif /* HAVE_SYS_TYPES_H */ @@ -56,8 +54,6 @@ #include "opal/util/output.h" #include "opal/util/basename.h" #include "opal/util/argv.h" -#include "opal/mca/crs/crs.h" -#include "opal/mca/crs/base/base.h" #include "orte/util/name_fns.h" #include "orte/util/session_dir.h" @@ -75,10 +71,6 @@ #include "orte/mca/rml/rml.h" #include "orte/mca/rml/rml_types.h" #include "orte/mca/routed/routed.h" -#include "orte/mca/snapc/snapc.h" -#include "orte/mca/snapc/base/base.h" -#include "orte/mca/sstore/sstore.h" -#include "orte/mca/sstore/base/base.h" #include "orte/mca/errmgr/errmgr.h" #include "orte/mca/errmgr/base/base.h" @@ -190,7 +182,7 @@ void orte_errmgr_base_log(int error_code, char *filename, int line) /* if the error is silent, say nothing */ return; } - + opal_output(0, "%s ORTE_ERROR_LOG: %s in file %s at line %d", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), errstring, filename, line); @@ -199,7 +191,7 @@ void orte_errmgr_base_log(int error_code, char *filename, int line) void orte_errmgr_base_abort(int error_code, char *fmt, ...) { va_list arglist; - + /* If there was a message, output it */ va_start(arglist, fmt); if( NULL != fmt ) { @@ -209,7 +201,7 @@ void orte_errmgr_base_abort(int error_code, char *fmt, ...) free( buffer ); } va_end(arglist); - + /* if I am a daemon or the HNP... */ if (ORTE_PROC_IS_HNP || ORTE_PROC_IS_DAEMON) { /* whack my local procs */ @@ -339,403 +331,6 @@ void orte_errmgr_base_execute_error_callbacks(opal_pointer_array_t *errors) /******************** * Utility functions ********************/ -#if OPAL_ENABLE_FT_CR - -void orte_errmgr_base_migrate_state_notify(int state) -{ - switch(state) { - case ORTE_ERRMGR_MIGRATE_STATE_ERROR: - case ORTE_ERRMGR_MIGRATE_STATE_ERR_INPROGRESS: - opal_output(0, "%d: Migration failed for process %s.", - orte_process_info.pid, ORTE_JOBID_PRINT(ORTE_PROC_MY_NAME->jobid)); - break; - case ORTE_ERRMGR_MIGRATE_STATE_FINISH: - opal_output(0, "%d: Migration successful for process %s.", - orte_process_info.pid, ORTE_JOBID_PRINT(ORTE_PROC_MY_NAME->jobid)); - break; - - case ORTE_ERRMGR_MIGRATE_STATE_NONE: - case ORTE_ERRMGR_MIGRATE_STATE_REQUEST: - case ORTE_ERRMGR_MIGRATE_STATE_RUNNING: - case ORTE_ERRMGR_MIGRATE_STATE_RUN_CKPT: - case ORTE_ERRMGR_MIGRATE_STATE_STARTUP: - case ORTE_ERRMGR_MIGRATE_MAX: - default: - break; - } -} - -void orte_errmgr_base_proc_state_notify(orte_proc_state_t state, orte_process_name_t *proc) -{ - if (NULL != proc) { - switch(state) { - case ORTE_PROC_STATE_ABORTED: - case ORTE_PROC_STATE_ABORTED_BY_SIG: - case ORTE_PROC_STATE_TERM_WO_SYNC: - case ORTE_PROC_STATE_TERMINATED: - case ORTE_PROC_STATE_KILLED_BY_CMD: - case ORTE_PROC_STATE_SENSOR_BOUND_EXCEEDED: - opal_output(0, "%d: Process %s is dead.", - orte_process_info.pid, ORTE_JOBID_PRINT(proc->jobid)); - break; - - case ORTE_PROC_STATE_HEARTBEAT_FAILED: - opal_output(0, "%d: Process %s is unreachable.", - orte_process_info.pid, ORTE_JOBID_PRINT(proc->jobid)); - - case ORTE_PROC_STATE_COMM_FAILED: - opal_output(0, "%d: Failed to communicate with process %s.", - orte_process_info.pid, ORTE_JOBID_PRINT(proc->jobid)); - break; - - case ORTE_PROC_STATE_CALLED_ABORT: - case ORTE_PROC_STATE_FAILED_TO_START: - opal_output(0, "%d: Process %s has called abort.", - orte_process_info.pid, ORTE_JOBID_PRINT(proc->jobid)); - break; - case ORTE_PROC_STATE_MIGRATING: - default: - break; - } - } -} - -int orte_errmgr_base_migrate_state_str(char ** state_str, int state) -{ - switch(state) { - case ORTE_ERRMGR_MIGRATE_STATE_NONE: - *state_str = strdup(" -- "); - break; - case ORTE_ERRMGR_MIGRATE_STATE_REQUEST: - *state_str = strdup("Requested"); - break; - case ORTE_ERRMGR_MIGRATE_STATE_RUNNING: - *state_str = strdup("Running"); - break; - case ORTE_ERRMGR_MIGRATE_STATE_RUN_CKPT: - *state_str = strdup("Checkpointing"); - break; - case ORTE_ERRMGR_MIGRATE_STATE_STARTUP: - *state_str = strdup("Restarting"); - break; - case ORTE_ERRMGR_MIGRATE_STATE_FINISH: - *state_str = strdup("Finished"); - break; - case ORTE_ERRMGR_MIGRATE_STATE_ERROR: - *state_str = strdup("Error"); - break; - case ORTE_ERRMGR_MIGRATE_STATE_ERR_INPROGRESS: - *state_str = strdup("Error: Migration in progress"); - break; - default: - asprintf(state_str, "Unknown %d", state); - break; - } - - return ORTE_SUCCESS; -} -#endif - -#if OPAL_ENABLE_FT_CR -int orte_errmgr_base_update_app_context_for_cr_recovery(orte_job_t *jobdata, - orte_proc_t *proc, - opal_list_t *local_snapshots) -{ - int exit_status = ORTE_SUCCESS; - opal_list_item_t *item = NULL; - orte_std_cntr_t i_app; - int argc = 0; - orte_app_context_t *cur_app_context = NULL; - orte_app_context_t *new_app_context = NULL; - orte_sstore_base_local_snapshot_info_t *vpid_snapshot = NULL; - char *reference_fmt_str = NULL; - char *location_str = NULL; - char *cache_location_str = NULL; - char *ref_location_fmt_str = NULL; - char *tmp_str = NULL; - char *global_snapshot_ref = NULL; - char *global_snapshot_seq = NULL; - char *sload; - - /* - * Get the snapshot restart command for this process - * JJH CLEANUP: Pass in the vpid_snapshot, so we don't have to look it up every time? - */ - for(item = opal_list_get_first(local_snapshots); - item != opal_list_get_end(local_snapshots); - item = opal_list_get_next(item) ) { - vpid_snapshot = (orte_sstore_base_local_snapshot_info_t*)item; - if(OPAL_EQUAL == orte_util_compare_name_fields(ORTE_NS_CMP_ALL, - &vpid_snapshot->process_name, - &proc->name) ) { - break; - } - else { - vpid_snapshot = NULL; - } - } - - if( NULL == vpid_snapshot ) { - ORTE_ERROR_LOG(ORTE_ERROR); - exit_status = ORTE_ERROR; - goto cleanup; - } - - orte_sstore.get_attr(vpid_snapshot->ss_handle, - SSTORE_METADATA_LOCAL_SNAP_REF_FMT, - &reference_fmt_str); - orte_sstore.get_attr(vpid_snapshot->ss_handle, - SSTORE_METADATA_LOCAL_SNAP_LOC, - &location_str); - orte_sstore.get_attr(vpid_snapshot->ss_handle, - SSTORE_METADATA_LOCAL_SNAP_REF_LOC_FMT, - &ref_location_fmt_str); - orte_sstore.get_attr(vpid_snapshot->ss_handle, - SSTORE_METADATA_GLOBAL_SNAP_REF, - &global_snapshot_ref); - orte_sstore.get_attr(vpid_snapshot->ss_handle, - SSTORE_METADATA_GLOBAL_SNAP_SEQ, - &global_snapshot_seq); - - /* - * Find current app_context - */ - cur_app_context = NULL; - for(i_app = 0; i_app < opal_pointer_array_get_size(jobdata->apps); ++i_app) { - cur_app_context = (orte_app_context_t *)opal_pointer_array_get_item(jobdata->apps, - i_app); - if( NULL == cur_app_context ) { - continue; - } - if(proc->app_idx == cur_app_context->idx) { - break; - } - } - - if( NULL == cur_app_context ) { - ORTE_ERROR_LOG(ORTE_ERROR); - exit_status = ORTE_ERROR; - goto cleanup; - } - - /* - * if > 1 processes in this app context - * Create a new app_context - * Copy over attributes - * Add it to the job_t data structure - * Associate it with this process in the job - * else - * Reuse this app_context - */ - if( cur_app_context->num_procs > 1 ) { - - /* Create a new app_context */ - opal_dss.copy((void**)&new_app_context, cur_app_context, ORTE_APP_CONTEXT); - - /* clear unused attributes */ - new_app_context->idx = cur_app_context->idx; - free(new_app_context->app); - new_app_context->app = NULL; - new_app_context->num_procs = 1; - opal_argv_free(new_app_context->argv); - new_app_context->argv = NULL; - - orte_remove_attribute(&new_app_context->attributes, ORTE_APP_PRELOAD_BIN); - - asprintf(&tmp_str, reference_fmt_str, vpid_snapshot->process_name.vpid); - asprintf(&sload, - "%s:%s:%s:%s:%s:%s", - location_str, - global_snapshot_ref, - tmp_str, - (vpid_snapshot->compress_comp == NULL ? "" : vpid_snapshot->compress_comp), - (vpid_snapshot->compress_postfix == NULL ? "" : vpid_snapshot->compress_postfix), - global_snapshot_seq); - orte_set_attribute(&new_app_context->attributes, ORTE_APP_SSTORE_LOAD, ORTE_ATTR_LOCAL, sload, OPAL_STRING); - free(sload); - - /* Add it to the job_t data structure */ - /*current_global_jobdata->num_apps++; */ - new_app_context->idx = (jobdata->num_apps); - proc->app_idx = new_app_context->idx; - - opal_pointer_array_add(jobdata->apps, new_app_context); - ++(jobdata->num_apps); - - /* Remove association with the old app_context */ - --(cur_app_context->num_procs); - } - else { - new_app_context = cur_app_context; - - /* Cleanout old stuff */ - free(new_app_context->app); - new_app_context->app = NULL; - - opal_argv_free(new_app_context->argv); - new_app_context->argv = NULL; - - asprintf(&tmp_str, reference_fmt_str, vpid_snapshot->process_name.vpid); - asprintf(&sload, - "%s:%s:%s:%s:%s:%s", - location_str, - global_snapshot_ref, - tmp_str, - (vpid_snapshot->compress_comp == NULL ? "" : vpid_snapshot->compress_comp), - (vpid_snapshot->compress_postfix == NULL ? "" : vpid_snapshot->compress_postfix), - global_snapshot_seq); - orte_set_attribute(&new_app_context->attributes, ORTE_APP_SSTORE_LOAD, ORTE_ATTR_LOCAL, sload, OPAL_STRING); - free(sload); - } - - /* - * Update the app_context with the restart informaiton - */ - new_app_context->app = strdup("opal-restart"); - opal_argv_append(&argc, &(new_app_context->argv), new_app_context->app); - opal_argv_append(&argc, &(new_app_context->argv), "-l"); - opal_argv_append(&argc, &(new_app_context->argv), location_str); - opal_argv_append(&argc, &(new_app_context->argv), "-m"); - opal_argv_append(&argc, &(new_app_context->argv), orte_sstore_base_local_metadata_filename); - opal_argv_append(&argc, &(new_app_context->argv), "-r"); - if( NULL != tmp_str ) { - free(tmp_str); - tmp_str = NULL; - } - asprintf(&tmp_str, reference_fmt_str, vpid_snapshot->process_name.vpid); - opal_argv_append(&argc, &(new_app_context->argv), tmp_str); - - cleanup: - if( NULL != tmp_str) { - free(tmp_str); - tmp_str = NULL; - } - if( NULL != location_str ) { - free(location_str); - location_str = NULL; - } - if( NULL != cache_location_str ) { - free(cache_location_str); - cache_location_str = NULL; - } - if( NULL != reference_fmt_str ) { - free(reference_fmt_str); - reference_fmt_str = NULL; - } - if( NULL != ref_location_fmt_str ) { - free(ref_location_fmt_str); - ref_location_fmt_str = NULL; - } - - return exit_status; -} -#endif - -#if OPAL_ENABLE_FT_CR -int orte_errmgr_base_restart_job(orte_jobid_t jobid, char * global_handle, int seq_num) -{ - int ret, exit_status = ORTE_SUCCESS; - orte_process_name_t loc_proc; - orte_job_t *jdata; - orte_sstore_base_handle_t prev_sstore_handle = ORTE_SSTORE_HANDLE_INVALID; - - /* JJH First determine if we can recover this way */ - - /* - * Find the corresponding sstore handle - */ - prev_sstore_handle = orte_sstore_handle_last_stable; - if( ORTE_SUCCESS != (ret = orte_sstore.request_restart_handle(&orte_sstore_handle_last_stable, - NULL, - global_handle, - seq_num, - NULL)) ) { - ORTE_ERROR_LOG(ret); - goto cleanup; - } - - /* get the job object */ - if (NULL == (jdata = orte_get_job_data_object(jobid))) { - exit_status = ORTE_ERR_NOT_FOUND; - ORTE_ERROR_LOG(exit_status); - goto cleanup; - } - - /* - * Start the recovery - */ - orte_snapc_base_has_recovered = false; - loc_proc.jobid = jobid; - loc_proc.vpid = 0; - ORTE_ACTIVATE_PROC_STATE(&loc_proc, ORTE_PROC_STATE_KILLED_BY_CMD); - ORTE_ACTIVATE_JOB_STATE(jdata, ORTE_JOB_STATE_FT_RESTART); - while( !orte_snapc_base_has_recovered ) { - opal_progress(); - } - orte_sstore_handle_last_stable = prev_sstore_handle; - - cleanup: - return exit_status; -} - -int orte_errmgr_base_migrate_job(orte_jobid_t jobid, orte_snapc_base_request_op_t *datum) -{ - int ret, exit_status = ORTE_SUCCESS; - int i; - opal_list_t *proc_list = NULL; - opal_list_t *node_list = NULL; - opal_list_t *suggested_map_list = NULL; - orte_errmgr_predicted_map_t *onto_map = NULL; -#if 0 - orte_errmgr_predicted_proc_t *off_proc = NULL; - orte_errmgr_predicted_node_t *off_node = NULL; -#endif - - proc_list = OBJ_NEW(opal_list_t); - node_list = OBJ_NEW(opal_list_t); - suggested_map_list = OBJ_NEW(opal_list_t); - - for( i = 0; i < datum->mig_num; ++i ) { - /* - * List all processes that are included in the migration. - * We will sort them out in the component. - */ - onto_map = OBJ_NEW(orte_errmgr_predicted_map_t); - - if( (datum->mig_off_node)[i] ) { - onto_map->off_current_node = true; - } else { - onto_map->off_current_node = false; - } - - /* Who to migrate */ - onto_map->proc_name.jobid = jobid; - onto_map->proc_name.vpid = (datum->mig_vpids)[i]; - - /* Destination */ - onto_map->map_proc_name.jobid = jobid; - onto_map->map_proc_name.vpid = (datum->mig_vpid_pref)[i]; - - if( ((datum->mig_host_pref)[i])[0] == '\0') { - onto_map->map_node_name = NULL; - } else { - onto_map->map_node_name = strdup((datum->mig_host_pref)[i]); - } - - opal_list_append(suggested_map_list, &(onto_map->super)); - } - - if( ORTE_SUCCESS != (ret = orte_errmgr.predicted_fault(proc_list, node_list, suggested_map_list)) ) { - ORTE_ERROR_LOG(ret); - exit_status = ret; - goto cleanup; - } - - cleanup: - return exit_status; -} - -#endif /******************** * Local Functions diff --git a/orte/mca/errmgr/base/errmgr_base_frame.c b/orte/mca/errmgr/base/errmgr_base_frame.c index 264e4ec8253..455779cdc13 100644 --- a/orte/mca/errmgr/base/errmgr_base_frame.c +++ b/orte/mca/errmgr/base/errmgr_base_frame.c @@ -5,7 +5,7 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. @@ -16,9 +16,9 @@ * Copyright (c) 2014-2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -26,9 +26,7 @@ #include "orte_config.h" #include "orte/constants.h" -#ifdef HAVE_STRING_H #include -#endif #ifdef HAVE_UNISTD_H #include #endif diff --git a/orte/mca/errmgr/base/errmgr_base_select.c b/orte/mca/errmgr/base/errmgr_base_select.c index 7a21caa1431..cf6e4c12829 100644 --- a/orte/mca/errmgr/base/errmgr_base_select.c +++ b/orte/mca/errmgr/base/errmgr_base_select.c @@ -1,3 +1,4 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ /* * Copyright (c) 2004-2010 The Trustees of Indiana University and Indiana * University Research and Technology @@ -5,16 +6,17 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. - * Copyright (c) 2010 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2012-2013 Los Alamos National Security, Inc. All rights reserved. + * Copyright (c) 2010 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2012-2015 Los Alamos National Security, Inc. All rights + * reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -22,9 +24,7 @@ #include "orte_config.h" #include "orte/constants.h" -#ifdef HAVE_STRING_H #include -#endif #include "orte/mca/mca.h" #include "opal/mca/base/base.h" @@ -45,7 +45,7 @@ int orte_errmgr_base_select(void) if( OPAL_SUCCESS != mca_base_select("errmgr", orte_errmgr_base_framework.framework_output, &orte_errmgr_base_framework.framework_components, (mca_base_module_t **) &best_module, - (mca_base_component_t **) &best_component) ) { + (mca_base_component_t **) &best_component, NULL) ) { /* This will only happen if no component was selected */ exit_status = ORTE_ERROR; goto cleanup; diff --git a/orte/mca/errmgr/base/errmgr_base_tool.c b/orte/mca/errmgr/base/errmgr_base_tool.c deleted file mode 100644 index ef19b18e835..00000000000 --- a/orte/mca/errmgr/base/errmgr_base_tool.c +++ /dev/null @@ -1,443 +0,0 @@ -/* - * Copyright (c) 2009-2010 The Trustees of Indiana University. - * Copyright (c) 2004-2011 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * All rights reserved. - * - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#include "orte_config.h" - -#ifdef HAVE_STRING_H -#include -#endif -#if HAVE_SYS_TYPES_H -#include -#endif /* HAVE_SYS_TYPES_H */ -#ifdef HAVE_UNISTD_H -#include -#endif /* HAVE_UNISTD_H */ -#if HAVE_SYS_TYPES_H -#include -#endif /* HAVE_SYS_TYPES_H */ -#if HAVE_SYS_STAT_H -#include -#endif /* HAVE_SYS_STAT_H */ -#ifdef HAVE_DIRENT_H -#include -#endif /* HAVE_DIRENT_H */ -#include - -#include "opal/dss/dss.h" - -#include "orte/mca/mca.h" -#include "opal/mca/base/base.h" - -#include "opal/util/os_dirpath.h" -#include "opal/util/output.h" -#include "opal/util/basename.h" -#include "opal/util/argv.h" -#include "opal/mca/crs/crs.h" -#include "opal/mca/crs/base/base.h" - -#include "orte/mca/rml/rml.h" -#include "orte/mca/rml/rml_types.h" -#include "orte/mca/snapc/snapc.h" -#include "orte/runtime/orte_globals.h" -#include "orte/util/name_fns.h" - -#include "orte/mca/errmgr/errmgr.h" -#include "orte/mca/errmgr/base/base.h" -#include "orte/mca/errmgr/base/errmgr_private.h" - -/** - * This file contains function for the HNP to communicate with the - * orte-migrate command. - */ -#if OPAL_ENABLE_FT_CR - -/****************** - * Local Functions - ******************/ -static int errmgr_base_tool_start_cmdline_listener(void); -static int errmgr_base_tool_stop_cmdline_listener(void); - -static void errmgr_base_tool_cmdline_recv(int status, - orte_process_name_t* sender, - opal_buffer_t* buffer, - orte_rml_tag_t tag, - void* cbdata); - -/****************** - * Object stuff - ******************/ -static orte_process_name_t errmgr_cmdline_sender = {ORTE_JOBID_INVALID, ORTE_VPID_INVALID}; -static bool errmgr_cmdline_recv_issued = false; -static int errmgr_tool_initialized = false; - -/******************** - * Module Functions - ********************/ -int orte_errmgr_base_tool_init(void) -{ - int ret; - - if( (++errmgr_tool_initialized) != 1 ) { - if( errmgr_tool_initialized < 1 ) { - return OPAL_ERROR; - } - return OPAL_SUCCESS; - } - - /* Only HNP communicates with tools */ - if (! ORTE_PROC_IS_HNP) { - return ORTE_SUCCESS; - } - - /* - * Setup command line migrate tool request listener - */ - if( ORTE_SUCCESS != (ret = errmgr_base_tool_start_cmdline_listener()) ) { - ORTE_ERROR_LOG(ret); - return ret; - } - - return ORTE_SUCCESS; -} - -int orte_errmgr_base_tool_finalize(void) -{ - int ret; - - if( (--errmgr_tool_initialized) != 0 ) { - if( errmgr_tool_initialized < 0 ) { - return OPAL_ERROR; - } - return OPAL_SUCCESS; - } - - /* Only HNP communicates with tools */ - if (! ORTE_PROC_IS_HNP) { - return ORTE_SUCCESS; - } - - /* - * Clean up listeners - */ - if( ORTE_SUCCESS != (ret = errmgr_base_tool_stop_cmdline_listener()) ) { - ORTE_ERROR_LOG(ret); - return ret; - } - - return ORTE_SUCCESS; -} - -int orte_errmgr_base_migrate_update(int status) -{ - int ret, exit_status = ORTE_SUCCESS; - opal_buffer_t *loc_buffer = NULL; - orte_errmgr_tool_cmd_flag_t command = ORTE_ERRMGR_MIGRATE_TOOL_UPDATE_CMD; - - /* Only HNP communicates with tools */ - if (! ORTE_PROC_IS_HNP) { - return ORTE_SUCCESS; - } - - /* - * If this is an invalid state, then return an error - */ - if( ORTE_ERRMGR_MIGRATE_MAX < status ) { - opal_output(orte_errmgr_base_framework.framework_output, - "errmgr:base:tool:update() Error: Invalid state %d < (Max %d)", - status, ORTE_ERRMGR_MIGRATE_MAX); - return ORTE_ERR_BAD_PARAM; - } - - /* - * Report the status over the notifier interface - */ - orte_errmgr_base_migrate_state_notify(status); - - /* - * If the caller is indicating that they are finished and ready for another - * command, then repost the RML listener. - */ - if( ORTE_ERRMGR_MIGRATE_STATE_NONE == status ) { - if( ORTE_SUCCESS != (ret = errmgr_base_tool_start_cmdline_listener()) ) { - ORTE_ERROR_LOG(ret); - return ret; - } - return ORTE_SUCCESS; - } - - /* - * Noop if invalid peer, or peer not specified - */ - if( OPAL_EQUAL == orte_util_compare_name_fields(ORTE_NS_CMP_ALL, ORTE_NAME_INVALID, &errmgr_cmdline_sender) ) { - return ORTE_SUCCESS; - } - - /* - * Do not send to self, as that is silly. - */ - if( OPAL_EQUAL == orte_util_compare_name_fields(ORTE_NS_CMP_ALL, ORTE_PROC_MY_HNP, &errmgr_cmdline_sender) ) { - OPAL_OUTPUT_VERBOSE((10, orte_errmgr_base_framework.framework_output, - "errmgr:base:tool:update() Warning: Do not send to self!\n")); - return ORTE_SUCCESS; - } - - OPAL_OUTPUT_VERBOSE((10, orte_errmgr_base_framework.framework_output, - "errmgr:base:tool:update() Sending update command \n", - status)); - - /******************** - * Send over the status of the checkpoint - * - migration state - ********************/ - if (NULL == (loc_buffer = OBJ_NEW(opal_buffer_t))) { - exit_status = ORTE_ERROR; - goto cleanup; - } - - if (ORTE_SUCCESS != (ret = opal_dss.pack(loc_buffer, &command, 1, ORTE_ERRMGR_MIGRATE_TOOL_CMD)) ) { - opal_output(orte_errmgr_base_framework.framework_output, - "errmgr:base:tool:update() Error: DSS Pack (cmd) Failure (ret = %d)\n", - ret); - ORTE_ERROR_LOG(ret); - exit_status = ret; - goto cleanup; - } - - if (ORTE_SUCCESS != (ret = opal_dss.pack(loc_buffer, &status, 1, OPAL_INT))) { - opal_output(orte_errmgr_base_framework.framework_output, - "errmgr:base:tool:update() Error: DSS Pack (status) Failure (ret = %d)\n", - ret); - ORTE_ERROR_LOG(ret); - exit_status = ret; - goto cleanup; - } - - if (ORTE_SUCCESS != (ret = orte_rml.send_buffer_nb(&errmgr_cmdline_sender, - loc_buffer, ORTE_RML_TAG_MIGRATE, - orte_rml_send_callback, NULL))) { - opal_output(orte_errmgr_base_framework.framework_output, - "errmgr:base:tool:update() Error: Send (status) Failure (ret = %d)\n", - ret); - ORTE_ERROR_LOG(ret); - exit_status = ret; - goto cleanup; - } - - cleanup: - if(NULL != loc_buffer) { - OBJ_RELEASE(loc_buffer); - loc_buffer = NULL; - } - - return exit_status; -} - -/******************** - * Utility functions - ********************/ - -/******************** - * Local Functions - ********************/ -static int errmgr_base_tool_start_cmdline_listener(void) -{ - if (errmgr_cmdline_recv_issued && ORTE_PROC_IS_HNP) { - return ORTE_SUCCESS; - } - - OPAL_OUTPUT_VERBOSE((5, orte_errmgr_base_framework.framework_output, - "errmgr:base:tool: Startup Command Line Channel")); - - /* - * Coordinator command listener - */ - errmgr_cmdline_sender.jobid = ORTE_JOBID_INVALID; - errmgr_cmdline_sender.vpid = ORTE_VPID_INVALID; - orte_rml.recv_buffer_nb(ORTE_NAME_WILDCARD, ORTE_RML_TAG_MIGRATE, - 0, errmgr_base_tool_cmdline_recv, NULL); - - errmgr_cmdline_recv_issued = true; - - return ORTE_SUCCESS; -} - - -static int errmgr_base_tool_stop_cmdline_listener(void) -{ - int exit_status = ORTE_SUCCESS; - - if (!errmgr_cmdline_recv_issued && ORTE_PROC_IS_HNP) { - return ORTE_SUCCESS; - } - - OPAL_OUTPUT_VERBOSE((5, orte_errmgr_base_framework.framework_output, - "errmgr:base:tool: Shutdown Command Line Channel")); - - orte_rml.recv_cancel(ORTE_NAME_WILDCARD, ORTE_RML_TAG_MIGRATE); - - errmgr_cmdline_recv_issued = false; - - return exit_status; -} - -/***************** - * Listener Callbacks - *****************/ -static void errmgr_base_tool_cmdline_recv(int status, - orte_process_name_t* sender, - opal_buffer_t* buffer, - orte_rml_tag_t tag, - void* cbdata) -{ - int ret; - orte_process_name_t swap_dest; - orte_errmgr_tool_cmd_flag_t command; - orte_std_cntr_t count = 1; - char *off_nodes = NULL; - char *off_procs = NULL; - char *onto_nodes = NULL; - char **split_off_nodes = NULL; - char **split_off_procs = NULL; - char **split_onto_nodes = NULL; - opal_list_t *proc_list = NULL; - opal_list_t *node_list = NULL; - opal_list_t *suggested_map_list = NULL; - orte_errmgr_predicted_proc_t *off_proc = NULL; - orte_errmgr_predicted_node_t *off_node = NULL; - orte_errmgr_predicted_map_t *onto_map = NULL; - int cnt = 0, i; - - - if( ORTE_RML_TAG_MIGRATE != tag ) { - opal_output(orte_errmgr_base_framework.framework_output, - "errmgr:base:tool:recv() Error: Unknown tag: Received a command message from %s (tag = %d).", - ORTE_NAME_PRINT(sender), tag); - ORTE_ERROR_LOG(ORTE_ERR_BAD_PARAM); - return; - } - - OPAL_OUTPUT_VERBOSE((10, orte_errmgr_base_framework.framework_output, - "errmgr:base:tool:recv() Command Line: Start a migration operation [Sender = %s]", - ORTE_NAME_PRINT(sender))); - - errmgr_cmdline_recv_issued = false; /* Not a persistent RML message */ - - /* - * If we are already interacting with a command line tool then reject this - * request. Since we only allow the processing of one tool command at a - * time. - */ - if( OPAL_EQUAL != orte_util_compare_name_fields(ORTE_NS_CMP_ALL, ORTE_NAME_INVALID, &errmgr_cmdline_sender) ) { - swap_dest.jobid = errmgr_cmdline_sender.jobid; - swap_dest.vpid = errmgr_cmdline_sender.vpid; - - errmgr_cmdline_sender = *sender; - orte_errmgr_base_migrate_update(ORTE_ERRMGR_MIGRATE_STATE_ERR_INPROGRESS); - - errmgr_cmdline_sender.jobid = swap_dest.jobid; - errmgr_cmdline_sender.vpid = swap_dest.vpid; - - return; - } - - errmgr_cmdline_sender = *sender; - - count = 1; - if (ORTE_SUCCESS != (ret = opal_dss.unpack(buffer, &command, &count, ORTE_ERRMGR_MIGRATE_TOOL_CMD))) { - ORTE_ERROR_LOG(ret); - return; - } - - /* - * orte-migrate has requested that a checkpoint be taken - */ - if (ORTE_ERRMGR_MIGRATE_TOOL_INIT_CMD == command) { - OPAL_OUTPUT_VERBOSE((10, orte_errmgr_base_framework.framework_output, - "errmgr:base:tool:recv() Command line requested process migration [command %d]\n", - command)); - - /* - * Unpack the buffer from the orte-migrate command - */ - count = 1; - if (ORTE_SUCCESS != (ret = opal_dss.unpack(buffer, &(off_procs), &count, OPAL_STRING))) { - ORTE_ERROR_LOG(ret); - return; - } - - if (ORTE_SUCCESS != (ret = opal_dss.unpack(buffer, &(off_nodes), &count, OPAL_STRING))) { - ORTE_ERROR_LOG(ret); - return; - } - - if (ORTE_SUCCESS != (ret = opal_dss.unpack(buffer, &(onto_nodes), &count, OPAL_STRING))) { - ORTE_ERROR_LOG(ret); - return; - } - - /* - * Parse the comma separated list - */ - proc_list = OBJ_NEW(opal_list_t); - node_list = OBJ_NEW(opal_list_t); - suggested_map_list = OBJ_NEW(opal_list_t); - - split_off_procs = opal_argv_split(off_procs, ','); - cnt = opal_argv_count(split_off_procs); - if( cnt > 0 ) { - for(i = 0; i < cnt; ++i) { - off_proc = OBJ_NEW(orte_errmgr_predicted_proc_t); - off_proc->proc_name.vpid = atoi(split_off_procs[i]); - opal_list_append(proc_list, &(off_proc->super)); - } - } - - split_off_nodes = opal_argv_split(off_nodes, ','); - cnt = opal_argv_count(split_off_nodes); - if( cnt > 0 ) { - for(i = 0; i < cnt; ++i) { - off_node = OBJ_NEW(orte_errmgr_predicted_node_t); - off_node->node_name = strdup(split_off_nodes[i]); - opal_list_append(node_list, &(off_node->super)); - } - } - - split_onto_nodes = opal_argv_split(onto_nodes, ','); - cnt = opal_argv_count(split_onto_nodes); - if( cnt > 0 ) { - for(i = 0; i < cnt; ++i) { - onto_map = OBJ_NEW(orte_errmgr_predicted_map_t); - onto_map->map_node_name = strdup(split_onto_nodes[i]); - opal_list_append(suggested_map_list, &(onto_map->super)); - } - } - - /* - * Pass to the predicted fault function to see how they would like to progress - */ - orte_errmgr.predicted_fault(proc_list, node_list, suggested_map_list); - } - /* - * Unknown command - */ - else { - OPAL_OUTPUT_VERBOSE((10, orte_errmgr_base_framework.framework_output, - "errmgr:base:tool:recv() Command line sent an unknown command (command %d)\n", - command)); - ORTE_ERROR_LOG(ORTE_ERR_NOT_SUPPORTED); - } - - return; -} -#endif diff --git a/orte/mca/errmgr/base/errmgr_private.h b/orte/mca/errmgr/base/errmgr_private.h index 033b91b3078..b49bb57478a 100644 --- a/orte/mca/errmgr/base/errmgr_private.h +++ b/orte/mca/errmgr/base/errmgr_private.h @@ -5,7 +5,7 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2010 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2010 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. @@ -13,9 +13,9 @@ * Copyright (c) 2011 Los Alamos National Security, LLC. * All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ /** @file: diff --git a/orte/mca/errmgr/base/help-errmgr-base.txt b/orte/mca/errmgr/base/help-errmgr-base.txt index ca6211fbd66..4aec50c04d4 100644 --- a/orte/mca/errmgr/base/help-errmgr-base.txt +++ b/orte/mca/errmgr/base/help-errmgr-base.txt @@ -6,15 +6,16 @@ # Copyright (c) 2004-2005 The University of Tennessee and The University # of Tennessee Research Foundation. All rights # reserved. -# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, # University of Stuttgart. All rights reserved. # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. # Copyright (c) 2014 Intel, Inc. All rights reserved. +# Copyright (c) 2017 IBM Corporation. All rights reserved. # $COPYRIGHT$ -# +# # Additional copyrights may follow -# +# # $HEADER$ # # @@ -61,9 +62,10 @@ route found between them. Please check network connectivity (including firewalls and network routing requirements). # [node-died] -ORTE has lost communication with its daemon located on node: +ORTE has lost communication with a remote daemon. - hostname: %s + HNP daemon : %s on node %s + Remote daemon: %s on node %s This is usually due to either a failure of the TCP network connection to the node, or possibly an internal failure of diff --git a/orte/mca/errmgr/default_app/Makefile.am b/orte/mca/errmgr/default_app/Makefile.am index e12806170ea..587d65b780f 100644 --- a/orte/mca/errmgr/default_app/Makefile.am +++ b/orte/mca/errmgr/default_app/Makefile.am @@ -1,9 +1,9 @@ # -# Copyright (c) 2010 Cisco Systems, Inc. All rights reserved. +# Copyright (c) 2010 Cisco Systems, Inc. All rights reserved. # $COPYRIGHT$ -# +# # Additional copyrights may follow -# +# # $HEADER$ # diff --git a/orte/mca/errmgr/default_app/errmgr_default_app.c b/orte/mca/errmgr/default_app/errmgr_default_app.c index d1768f0d9cc..c1ee58ee992 100644 --- a/orte/mca/errmgr/default_app/errmgr_default_app.c +++ b/orte/mca/errmgr/default_app/errmgr_default_app.c @@ -2,17 +2,18 @@ * Copyright (c) 2009-2011 The Trustees of Indiana University. * All rights reserved. * - * Copyright (c) 2010 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2010 Cisco Systems, Inc. All rights reserved. * * Copyright (c) 2004-2006 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. * Copyright (c) 2011-2013 Los Alamos National Security, LLC. * All rights reserved. + * Copyright (c) 2015 Intel, Inc. All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -22,12 +23,11 @@ #ifdef HAVE_UNISTD_H #include #endif /* HAVE_UNISTD_H */ -#ifdef HAVE_STRING_H #include -#endif #include "opal/util/output.h" #include "opal/dss/dss.h" +#include "opal/errhandler/opal_errhandler.h" #include "opal/mca/pmix/pmix.h" #include "orte/util/error_strings.h" @@ -45,17 +45,17 @@ /* * Module functions: Global */ -static int init(void); -static int finalize(void); + static int init(void); + static int finalize(void); -static int abort_peers(orte_process_name_t *procs, - orte_std_cntr_t num_procs, - int error_code); + static int abort_peers(orte_process_name_t *procs, + orte_std_cntr_t num_procs, + int error_code); /****************** * HNP module ******************/ -orte_errmgr_base_module_t orte_errmgr_default_app_module = { + orte_errmgr_base_module_t orte_errmgr_default_app_module = { init, finalize, orte_errmgr_base_log, @@ -70,8 +70,35 @@ orte_errmgr_base_module_t orte_errmgr_default_app_module = { }; static void proc_errors(int fd, short args, void *cbdata); -static void pmix_error(int error) +static void pmix_error(int error, opal_proc_t *proc, void *cbdata) +{ + OPAL_OUTPUT_VERBOSE((1, orte_errmgr_base_framework.framework_output, + "%s errmgr:default_app: errhandler called", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME))); + + /* push it into our event base */ + ORTE_ACTIVATE_PROC_STATE(ORTE_PROC_MY_NAME, ORTE_PROC_STATE_COMM_FAILED); +} + +static int myerrhandle = -1; + +static void register_cbfunc(int status, int errhndler, void *cbdata) { + myerrhandle = errhndler; +} + +static void notify_cbfunc(int status, + opal_list_t *procs, + opal_list_t *info, + opal_pmix_release_cbfunc_t cbfunc, + void *cbdata) +{ + if (NULL != cbfunc) { + cbfunc(cbdata); + } + OPAL_OUTPUT_VERBOSE((1, orte_errmgr_base_framework.framework_output, + "%s errmgr:default_app: pmix errhandler called", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME))); /* push it into our event base */ ORTE_ACTIVATE_PROC_STATE(ORTE_PROC_MY_NAME, ORTE_PROC_STATE_COMM_FAILED); } @@ -79,26 +106,24 @@ static void pmix_error(int error) /************************ * API Definitions ************************/ -static int init(void) -{ + static int init(void) + { /* setup state machine to trap proc errors */ orte_state.add_proc_state(ORTE_PROC_STATE_ERROR, proc_errors, ORTE_ERROR_PRI); - /* register an errhandler with the PMIx framework so - * we can know of loss of connection to the server */ - if (NULL != opal_pmix.register_errhandler) { - opal_pmix.register_errhandler(pmix_error); - } + /* register an errhandler */ + opal_register_errhandler(pmix_error, NULL); + + /* tie the default PMIx errhandler back to us */ + opal_pmix.register_errhandler(NULL, notify_cbfunc, register_cbfunc, NULL); return ORTE_SUCCESS; } static int finalize(void) { - if (NULL != opal_pmix.deregister_errhandler) { - opal_pmix.deregister_errhandler(); - } - + opal_deregister_errhandler(); + opal_pmix.deregister_errhandler(myerrhandle, NULL, NULL); return ORTE_SUCCESS; } @@ -110,15 +135,15 @@ static void proc_errors(int fd, short args, void *cbdata) opal_pointer_array_t errors; OPAL_OUTPUT_VERBOSE((1, orte_errmgr_base_framework.framework_output, - "%s errmgr:default_app: proc %s state %s", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - ORTE_NAME_PRINT(&caddy->name), - orte_proc_state_to_str(caddy->proc_state))); - + "%s errmgr:default_app: proc %s state %s", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), + ORTE_NAME_PRINT(&caddy->name), + orte_proc_state_to_str(caddy->proc_state))); + /* * if orte is trying to shutdown, just let it */ - if (orte_finalizing) { + if (orte_finalizing) { OBJ_RELEASE(caddy); return; } @@ -143,8 +168,8 @@ static void proc_errors(int fd, short args, void *cbdata) /* flag that we must abnormally terminate as far as the * RTE is concerned */ - orte_abnormal_term_ordered = true; - } else if (ORTE_PROC_STATE_LIFELINE_LOST == caddy->proc_state) { + orte_abnormal_term_ordered = true; + } else if (ORTE_PROC_STATE_LIFELINE_LOST == caddy->proc_state) { /* we need to die, so mark us so */ orte_abnormal_term_ordered = true; } diff --git a/orte/mca/errmgr/default_app/errmgr_default_app.h b/orte/mca/errmgr/default_app/errmgr_default_app.h index 8c93ea4e7d1..19c4d5ada83 100644 --- a/orte/mca/errmgr/default_app/errmgr_default_app.h +++ b/orte/mca/errmgr/default_app/errmgr_default_app.h @@ -2,15 +2,15 @@ * Copyright (c) 2010 Cisco Systems, Inc. All rights reserved. * * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ /** * @file - * + * */ #ifndef MCA_ERRMGR_default_app_EXPORT_H diff --git a/orte/mca/errmgr/default_app/errmgr_default_app_component.c b/orte/mca/errmgr/default_app/errmgr_default_app_component.c index c56fbbf2931..2a81af6a413 100644 --- a/orte/mca/errmgr/default_app/errmgr_default_app_component.c +++ b/orte/mca/errmgr/default_app/errmgr_default_app_component.c @@ -5,9 +5,9 @@ * reserved. * * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -21,7 +21,7 @@ /* * Public string for version number */ -const char *orte_errmgr_default_app_component_version_string = +const char *orte_errmgr_default_app_component_version_string = "ORTE ERRMGR default_app MCA component version " ORTE_VERSION; /* @@ -38,7 +38,7 @@ static int errmgr_default_app_component_query(mca_base_module_t **module, int *p */ orte_errmgr_base_component_t mca_errmgr_default_app_component = { - /* Handle the general mca_component_t struct containing + /* Handle the general mca_component_t struct containing * meta information about the component */ .base_version = { @@ -75,7 +75,7 @@ static int errmgr_default_app_register(void) return ORTE_SUCCESS; } -static int errmgr_default_app_open(void) +static int errmgr_default_app_open(void) { return ORTE_SUCCESS; } @@ -91,9 +91,9 @@ static int errmgr_default_app_component_query(mca_base_module_t **module, int *p /* set our priority high as we are the default for apps */ *priority = my_priority; *module = (mca_base_module_t *)&orte_errmgr_default_app_module; - return ORTE_SUCCESS; + return ORTE_SUCCESS; } - + *priority = -1; *module = NULL; return ORTE_ERROR; diff --git a/orte/mca/errmgr/default_hnp/Makefile.am b/orte/mca/errmgr/default_hnp/Makefile.am index b6b12cf9677..65d2dd3d18f 100644 --- a/orte/mca/errmgr/default_hnp/Makefile.am +++ b/orte/mca/errmgr/default_hnp/Makefile.am @@ -1,9 +1,9 @@ # -# Copyright (c) 2010 Cisco Systems, Inc. All rights reserved. +# Copyright (c) 2010 Cisco Systems, Inc. All rights reserved. # $COPYRIGHT$ -# +# # Additional copyrights may follow -# +# # $HEADER$ # diff --git a/orte/mca/errmgr/default_hnp/errmgr_default_hnp.c b/orte/mca/errmgr/default_hnp/errmgr_default_hnp.c index bfdf8dafd18..fcdbe3acc30 100644 --- a/orte/mca/errmgr/default_hnp/errmgr_default_hnp.c +++ b/orte/mca/errmgr/default_hnp/errmgr_default_hnp.c @@ -1,19 +1,20 @@ /* * Copyright (c) 2009-2011 The Trustees of Indiana University. * All rights reserved. - * Copyright (c) 2010 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2010 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2010-2011 Oak Ridge National Labs. All rights reserved. * Copyright (c) 2004-2011 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2011 Oracle and/or all its affiliates. All rights reserved. - * Copyright (c) 2011-2013 Los Alamos National Security, LLC. + * Copyright (c) 2011 Oracle and/or all its affiliates. All rights reserved. + * Copyright (c) 2011-2015 Los Alamos National Security, LLC. * All rights reserved. * Copyright (c) 2014 Intel, Inc. All rights reserved. + * Copyright (c) 2017 IBM Corporation. All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -23,9 +24,7 @@ #ifdef HAVE_UNISTD_H #include #endif /* HAVE_UNISTD_H */ -#ifdef HAVE_STRING_H #include -#endif #ifdef HAVE_SYS_WAIT_H #include #endif @@ -73,8 +72,6 @@ static int suggest_map_targets(orte_proc_t *proc, orte_node_t *oldnode, opal_list_t *node_list); -static int ft_event(int state); - /****************** * default_hnp module @@ -87,7 +84,7 @@ orte_errmgr_base_module_t orte_errmgr_default_hnp_module = { orte_errmgr_base_abort_peers, predicted_fault, suggest_map_targets, - ft_event, + NULL, orte_errmgr_base_register_migration_warning, NULL, orte_errmgr_base_execute_error_callbacks @@ -134,6 +131,7 @@ static void job_errors(int fd, short args, void *cbdata) orte_proc_t *aborted_proc; opal_buffer_t *answer; int32_t rc, ret; + int room, *rmptr; /* * if orte is trying to shutdown, just let it @@ -194,13 +192,23 @@ static void job_errors(int fd, short args, void *cbdata) OBJ_RELEASE(caddy); return; } - OPAL_OUTPUT_VERBOSE((5, orte_plm_base_framework.framework_output, + /* pack the room number */ + rmptr = &room; + if (orte_get_attribute(&jdata->attributes, ORTE_JOB_ROOM_NUM, (void**)&rmptr, OPAL_INT)) { + if (ORTE_SUCCESS != (ret = opal_dss.pack(answer, &room, 1, OPAL_INT))) { + ORTE_ERROR_LOG(ret); + ORTE_FORCED_TERMINATE(ORTE_ERROR_DEFAULT_EXIT_CODE); + OBJ_RELEASE(caddy); + return; + } + } + OPAL_OUTPUT_VERBOSE((5, orte_errmgr_base_framework.framework_output, "%s errmgr:hnp sending dyn error release of job %s to %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_JOBID_PRINT(jdata->jobid), ORTE_NAME_PRINT(&jdata->originator))); if (0 > (ret = orte_rml.send_buffer_nb(&jdata->originator, answer, - ORTE_RML_TAG_PLM_PROXY, + ORTE_RML_TAG_LAUNCH_RESP, orte_rml_send_callback, NULL))) { ORTE_ERROR_LOG(ret); OBJ_RELEASE(answer); @@ -262,7 +270,7 @@ static void job_errors(int fd, short args, void *cbdata) jdata->num_procs != jdata->num_reported) { orte_show_help("help-errmgr-base.txt", "failed-daemon", true); } - + /* abort the job */ ORTE_ACTIVATE_JOB_STATE(caddy->jdata, ORTE_JOB_STATE_FORCED_EXIT); /* set the global abnormal exit flag */ @@ -285,7 +293,7 @@ static void proc_errors(int fd, short args, void *cbdata) ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_NAME_PRINT(proc), orte_proc_state_to_str(state))); - + /* * if orte is trying to shutdown, just let it */ @@ -362,7 +370,11 @@ static void proc_errors(int fd, short args, void *cbdata) /* record the first one to fail */ if (!ORTE_FLAG_TEST(jdata, ORTE_JOB_FLAG_ABORTED)) { /* output an error message so the user knows what happened */ - orte_show_help("help-errmgr-base.txt", "node-died", true, pptr->node->name); + orte_show_help("help-errmgr-base.txt", "node-died", true, + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), + orte_process_info.nodename, + ORTE_NAME_PRINT(proc), + pptr->node->name); /* mark the daemon job as failed */ jdata->state = ORTE_JOB_STATE_COMM_FAILED; /* point to the lowest rank to cause the problem */ @@ -650,11 +662,6 @@ static int suggest_map_targets(orte_proc_t *proc, return ORTE_ERR_NOT_IMPLEMENTED; } -static int ft_event(int state) -{ - return ORTE_SUCCESS; -} - /***************** * Local Functions *****************/ @@ -676,7 +683,7 @@ static void default_hnp_abort(orte_job_t *jdata) "%s errmgr:default_hnp: abort called on job %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_JOBID_PRINT(jdata->jobid))); - + /* set control params to indicate we are terminating */ orte_job_term_ordered = true; orte_enable_recovery = false; @@ -706,7 +713,7 @@ static void default_hnp_abort(orte_job_t *jdata) OPAL_OUTPUT_VERBOSE((1, orte_errmgr_base_framework.framework_output, "%s errmgr:default_hnp: ordering orted termination", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME))); - + /* tell the plm to terminate the orteds - they will automatically * kill their local procs */ diff --git a/orte/mca/errmgr/default_hnp/errmgr_default_hnp.h b/orte/mca/errmgr/default_hnp/errmgr_default_hnp.h index 226e424a84b..c8bad995abe 100644 --- a/orte/mca/errmgr/default_hnp/errmgr_default_hnp.h +++ b/orte/mca/errmgr/default_hnp/errmgr_default_hnp.h @@ -5,15 +5,15 @@ * reserved. * * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ /** * @file - * + * */ #ifndef MCA_ERRMGR_default_hnp_EXPORT_H diff --git a/orte/mca/errmgr/default_hnp/errmgr_default_hnp_component.c b/orte/mca/errmgr/default_hnp/errmgr_default_hnp_component.c index b2a725e384f..640baa3b33a 100644 --- a/orte/mca/errmgr/default_hnp/errmgr_default_hnp_component.c +++ b/orte/mca/errmgr/default_hnp/errmgr_default_hnp_component.c @@ -5,9 +5,9 @@ * reserved. * * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -22,7 +22,7 @@ /* * Public string for version number */ -const char *orte_errmgr_default_hnp_component_version_string = +const char *orte_errmgr_default_hnp_component_version_string = "ORTE ERRMGR default_hnp MCA component version " ORTE_VERSION; /* @@ -38,7 +38,7 @@ static int default_hnp_component_query(mca_base_module_t **module, int *priority * and pointer to our public functions in it */ orte_errmgr_base_component_t mca_errmgr_default_hnp_component = { - /* Handle the general mca_component_t struct containing + /* Handle the general mca_component_t struct containing * meta information about the component default_hnp */ .base_version = { @@ -76,7 +76,7 @@ static int default_hnp_register(void) return ORTE_SUCCESS; } -static int default_hnp_open(void) +static int default_hnp_open(void) { return ORTE_SUCCESS; } diff --git a/orte/mca/errmgr/default_orted/Makefile.am b/orte/mca/errmgr/default_orted/Makefile.am index d8c03e751c8..dd1ee34d359 100644 --- a/orte/mca/errmgr/default_orted/Makefile.am +++ b/orte/mca/errmgr/default_orted/Makefile.am @@ -1,9 +1,9 @@ # -# Copyright (c) 2010-2011 Cisco Systems, Inc. All rights reserved. +# Copyright (c) 2010-2011 Cisco Systems, Inc. All rights reserved. # $COPYRIGHT$ -# +# # Additional copyrights may follow -# +# # $HEADER$ # diff --git a/orte/mca/errmgr/default_orted/errmgr_default_orted.c b/orte/mca/errmgr/default_orted/errmgr_default_orted.c index c83f38372ba..dafa9fd1af3 100644 --- a/orte/mca/errmgr/default_orted/errmgr_default_orted.c +++ b/orte/mca/errmgr/default_orted/errmgr_default_orted.c @@ -6,13 +6,13 @@ * Copyright (c) 2004-2011 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2011-2013 Los Alamos National Security, LLC. + * Copyright (c) 2011-2015 Los Alamos National Security, LLC. * All rights reserved. * Copyright (c) 2014 Intel, Inc. All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -22,9 +22,7 @@ #ifdef HAVE_UNISTD_H #include #endif /* HAVE_UNISTD_H */ -#ifdef HAVE_STRING_H #include -#endif #include "opal/util/output.h" #include "opal/dss/dss.h" @@ -69,8 +67,6 @@ static int suggest_map_targets(orte_proc_t *proc, orte_node_t *oldnode, opal_list_t *node_list); -static int ft_event(int state); - /****************** * default_orted module @@ -83,7 +79,7 @@ orte_errmgr_base_module_t orte_errmgr_default_orted_module = { orte_errmgr_base_abort_peers, predicted_fault, suggest_map_targets, - ft_event, + NULL, orte_errmgr_base_register_migration_warning, NULL, orte_errmgr_base_execute_error_callbacks @@ -402,7 +398,7 @@ static void proc_errors(int fd, short args, void *cbdata) ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), orte_proc_state_to_str(state), ORTE_NAME_PRINT(proc))); - + if (ORTE_PROC_STATE_TERM_NON_ZERO == state) { /* update the state */ child->state = state; @@ -570,7 +566,7 @@ static void proc_errors(int fd, short args, void *cbdata) "%s errmgr:default_orted reporting all procs in %s terminated", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_JOBID_PRINT(jdata->jobid))); - + /* remove all of this job's children from the global list */ for (i=0; i < orte_local_children->size; i++) { if (NULL == (ptr = (orte_proc_t*)opal_pointer_array_get_item(orte_local_children, i))) { @@ -616,11 +612,6 @@ static int suggest_map_targets(orte_proc_t *proc, return ORTE_ERR_NOT_IMPLEMENTED; } -static int ft_event(int state) -{ - return ORTE_SUCCESS; -} - /***************** * Local Functions @@ -744,7 +735,7 @@ static void killprocs(orte_jobid_t job, orte_vpid_t vpid) orte_proc_t proc; int rc; - if (ORTE_JOBID_WILDCARD == job + if (ORTE_JOBID_WILDCARD == job && ORTE_VPID_WILDCARD == vpid) { if (ORTE_SUCCESS != (rc = orte_odls.kill_local_procs(NULL))) { ORTE_ERROR_LOG(rc); diff --git a/orte/mca/errmgr/default_orted/errmgr_default_orted.h b/orte/mca/errmgr/default_orted/errmgr_default_orted.h index 9f4d4da7258..13f6968065a 100644 --- a/orte/mca/errmgr/default_orted/errmgr_default_orted.h +++ b/orte/mca/errmgr/default_orted/errmgr_default_orted.h @@ -2,15 +2,15 @@ * Copyright (c) 2010 Cisco Systems, Inc. All rights reserved. * * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ /** * @file - * + * */ #ifndef MCA_ERRMGR_default_orted_EXPORT_H diff --git a/orte/mca/errmgr/default_orted/errmgr_default_orted_component.c b/orte/mca/errmgr/default_orted/errmgr_default_orted_component.c index 3ed11d8c464..1f519fb9d16 100644 --- a/orte/mca/errmgr/default_orted/errmgr_default_orted_component.c +++ b/orte/mca/errmgr/default_orted/errmgr_default_orted_component.c @@ -5,9 +5,9 @@ * reserved. * * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -21,7 +21,7 @@ /* * Public string for version number */ -const char *orte_errmgr_default_orted_component_version_string = +const char *orte_errmgr_default_orted_component_version_string = "ORTE ERRMGR default_orted MCA component version " ORTE_VERSION; /* @@ -38,7 +38,7 @@ static int errmgr_default_orted_component_query(mca_base_module_t **module, int */ orte_errmgr_base_component_t mca_errmgr_default_orted_component = { - /* Handle the general mca_component_t struct containing + /* Handle the general mca_component_t struct containing * meta information about the component itdefault_orted */ .base_version = { @@ -76,7 +76,7 @@ static int errmgr_default_orted_register(void) return ORTE_SUCCESS; } -static int errmgr_default_orted_open(void) +static int errmgr_default_orted_open(void) { return ORTE_SUCCESS; } @@ -92,9 +92,9 @@ static int errmgr_default_orted_component_query(mca_base_module_t **module, int /* we are the default component for daemons */ *priority = my_priority; *module = (mca_base_module_t *)&orte_errmgr_default_orted_module; - return ORTE_SUCCESS; + return ORTE_SUCCESS; } - + *priority = -1; *module = NULL; return ORTE_ERROR; diff --git a/orte/mca/errmgr/default_tool/Makefile.am b/orte/mca/errmgr/default_tool/Makefile.am index 99afded9ec9..e67912bfae5 100644 --- a/orte/mca/errmgr/default_tool/Makefile.am +++ b/orte/mca/errmgr/default_tool/Makefile.am @@ -1,9 +1,9 @@ # -# Copyright (c) 2013 Intel, Inc. All rights reserved. +# Copyright (c) 2013 Intel, Inc. All rights reserved. # $COPYRIGHT$ -# +# # Additional copyrights may follow -# +# # $HEADER$ # diff --git a/orte/mca/errmgr/default_tool/errmgr_default_tool.c b/orte/mca/errmgr/default_tool/errmgr_default_tool.c index 0c4bc315bba..98f039c0f3c 100644 --- a/orte/mca/errmgr/default_tool/errmgr_default_tool.c +++ b/orte/mca/errmgr/default_tool/errmgr_default_tool.c @@ -2,7 +2,7 @@ * Copyright (c) 2009-2011 The Trustees of Indiana University. * All rights reserved. * - * Copyright (c) 2010 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2010 Cisco Systems, Inc. All rights reserved. * * Copyright (c) 2004-2006 The University of Tennessee and The University * of Tennessee Research Foundation. All rights @@ -11,9 +11,9 @@ * All rights reserved. * Copyright (c) 2013 Intel, Inc. All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -23,9 +23,7 @@ #ifdef HAVE_UNISTD_H #include #endif /* HAVE_UNISTD_H */ -#ifdef HAVE_STRING_H #include -#endif #include "opal/util/output.h" #include "opal/dss/dss.h" @@ -96,7 +94,7 @@ static void proc_errors(int fd, short args, void *cbdata) ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_NAME_PRINT(&caddy->name), orte_proc_state_to_str(caddy->proc_state))); - + /* * if orte is trying to shutdown, just let it */ diff --git a/orte/mca/errmgr/default_tool/errmgr_default_tool.h b/orte/mca/errmgr/default_tool/errmgr_default_tool.h index 9a68403daf6..53db4f45aac 100644 --- a/orte/mca/errmgr/default_tool/errmgr_default_tool.h +++ b/orte/mca/errmgr/default_tool/errmgr_default_tool.h @@ -2,15 +2,15 @@ * Copyright (c) 2013 Intel, Inc. All rights reserved. * * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ /** * @file - * + * */ #ifndef MCA_ERRMGR_default_tool_EXPORT_H diff --git a/orte/mca/errmgr/default_tool/errmgr_default_tool_component.c b/orte/mca/errmgr/default_tool/errmgr_default_tool_component.c index 58ca229e08f..15164921c40 100644 --- a/orte/mca/errmgr/default_tool/errmgr_default_tool_component.c +++ b/orte/mca/errmgr/default_tool/errmgr_default_tool_component.c @@ -2,9 +2,9 @@ * Copyright (c) 2013 Intel, Inc. All rights reserved. * * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -18,7 +18,7 @@ /* * Public string for version number */ -const char *orte_errmgr_default_tool_component_version_string = +const char *orte_errmgr_default_tool_component_version_string = "ORTE ERRMGR default_tool MCA component version " ORTE_VERSION; /* @@ -35,7 +35,7 @@ static int errmgr_default_tool_component_query(mca_base_module_t **module, int * */ orte_errmgr_base_component_t mca_errmgr_default_tool_component = { - /* Handle the general mca_component_t struct containing + /* Handle the general mca_component_t struct containing * meta information about the component */ .base_version = { @@ -72,7 +72,7 @@ static int errmgr_default_tool_register(void) return ORTE_SUCCESS; } -static int errmgr_default_tool_open(void) +static int errmgr_default_tool_open(void) { return ORTE_SUCCESS; } @@ -88,9 +88,9 @@ static int errmgr_default_tool_component_query(mca_base_module_t **module, int * /* set our priority high as we are the default for tools */ *priority = my_priority; *module = (mca_base_module_t *)&orte_errmgr_default_tool_module; - return ORTE_SUCCESS; + return ORTE_SUCCESS; } - + *priority = -1; *module = NULL; return ORTE_ERROR; diff --git a/orte/mca/errmgr/errmgr.h b/orte/mca/errmgr/errmgr.h index df8226a2cc1..de27a379195 100644 --- a/orte/mca/errmgr/errmgr.h +++ b/orte/mca/errmgr/errmgr.h @@ -237,7 +237,7 @@ typedef enum { ORTE_ERRMGR_CALLBACK_APPEND } orte_errmgr_error_order_t; -/** +/** * Register a callback function for faults. * * This callback function will be used anytime (other than during finalize) the diff --git a/orte/mca/ess/Makefile.am b/orte/mca/ess/Makefile.am index 11d64051270..21354432311 100644 --- a/orte/mca/ess/Makefile.am +++ b/orte/mca/ess/Makefile.am @@ -5,15 +5,15 @@ # Copyright (c) 2004-2005 The University of Tennessee and The University # of Tennessee Research Foundation. All rights # reserved. -# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, # University of Stuttgart. All rights reserved. # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. # Copyright (c) 2010 Cisco Systems, Inc. All rights reserved. # $COPYRIGHT$ -# +# # Additional copyrights may follow -# +# # $HEADER$ # diff --git a/orte/mca/ess/alps/Makefile.am b/orte/mca/ess/alps/Makefile.am index 9a7720500e4..c9b02d29feb 100644 --- a/orte/mca/ess/alps/Makefile.am +++ b/orte/mca/ess/alps/Makefile.am @@ -5,15 +5,15 @@ # Copyright (c) 2004-2005 The University of Tennessee and The University # of Tennessee Research Foundation. All rights # reserved. -# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, # University of Stuttgart. All rights reserved. # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. # Copyright (c) 2008-2010 Cisco Systems, Inc. All rights reserved. # $COPYRIGHT$ -# +# # Additional copyrights may follow -# +# # $HEADER$ # diff --git a/orte/mca/ess/alps/configure.m4 b/orte/mca/ess/alps/configure.m4 index 6ad187ae3bf..23ec4fdcc1b 100644 --- a/orte/mca/ess/alps/configure.m4 +++ b/orte/mca/ess/alps/configure.m4 @@ -31,7 +31,7 @@ AC_DEFUN([MCA_orte_ess_alps_CONFIG],[ [$1 AC_SUBST([ess_alps_CPPFLAGS]) AC_SUBST([ess_alps_LDFLAGS]) - AC_SUBST([ess_alps_LIBS])], + AC_SUBST([ess_alps_LIBS])], [$2]) ])dnl diff --git a/orte/mca/ess/alps/ess_alps.h b/orte/mca/ess/alps/ess_alps.h index 92f74d5a8e6..30f58b755a5 100644 --- a/orte/mca/ess/alps/ess_alps.h +++ b/orte/mca/ess/alps/ess_alps.h @@ -5,14 +5,14 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ diff --git a/orte/mca/ess/alps/ess_alps_module.c b/orte/mca/ess/alps/ess_alps_module.c index 637d0a6d079..4f0f47b501c 100644 --- a/orte/mca/ess/alps/ess_alps_module.c +++ b/orte/mca/ess/alps/ess_alps_module.c @@ -56,7 +56,7 @@ static orte_vpid_t starting_vpid = 0; static int rte_init(void) { - int ret, i; + int ret; char *error = NULL; char **hosts = NULL; diff --git a/orte/mca/ess/alps/ess_alps_utils.c b/orte/mca/ess/alps/ess_alps_utils.c index cc7054638b4..3e59e9fdc26 100644 --- a/orte/mca/ess/alps/ess_alps_utils.c +++ b/orte/mca/ess/alps/ess_alps_utils.c @@ -40,10 +40,10 @@ /* * use the Alps placement file to obtain * the global rank of the "first" local rank - * on the node. + * on the node. */ -int +int orte_ess_alps_get_first_rank_on_node(int *first_rank) { int alps_status = 0; diff --git a/orte/mca/ess/base/Makefile.am b/orte/mca/ess/base/Makefile.am index 5a2f8684c42..9e2d31367a5 100644 --- a/orte/mca/ess/base/Makefile.am +++ b/orte/mca/ess/base/Makefile.am @@ -5,16 +5,16 @@ # Copyright (c) 2004-2005 The University of Tennessee and The University # of Tennessee Research Foundation. All rights # reserved. -# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, # University of Stuttgart. All rights reserved. # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. # Copyright (c) 2013 Los Alamos National Security, LLC. All rights reserved. # Copyright (c) 2015 Intel, Inc. All rights reserved. # $COPYRIGHT$ -# +# # Additional copyrights may follow -# +# # $HEADER$ # diff --git a/orte/mca/ess/base/base.h b/orte/mca/ess/base/base.h index 0b7382a3ee7..4387a5e98d8 100644 --- a/orte/mca/ess/base/base.h +++ b/orte/mca/ess/base/base.h @@ -5,7 +5,7 @@ * Copyright (c) 2004-2006 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. @@ -14,9 +14,9 @@ * Copyright (c) 2013 Los Alamos National Security, LLC. All rights reserved. * Copyright (c) 2013 Intel, Inc. All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ /** @file: @@ -70,7 +70,7 @@ ORTE_DECLSPEC int orte_ess_base_tool_finalize(void); ORTE_DECLSPEC int orte_ess_base_orted_setup(char **hosts); ORTE_DECLSPEC int orte_ess_base_orted_finalize(void); -/* Detect whether or not this proc is bound - if not, +/* Detect whether or not this proc is bound - if not, * see if it should bind itself */ ORTE_DECLSPEC int orte_ess_base_proc_binding(void); diff --git a/orte/mca/ess/base/ess_base_fns.c b/orte/mca/ess/base/ess_base_fns.c index 4bf2cd9026f..ab121720431 100644 --- a/orte/mca/ess/base/ess_base_fns.c +++ b/orte/mca/ess/base/ess_base_fns.c @@ -5,20 +5,20 @@ * Copyright (c) 2004-2011 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2011-2012 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2011-2012 Los Alamos National Security, LLC. * All rights reserved. - * Copyright (c) 2014 Intel, Inc. All rights reserved. + * Copyright (c) 2014-2015 Intel, Inc. All rights reserved. * Copyright (c) 2014 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -32,7 +32,7 @@ #include #include "opal/util/output.h" -#include "opal/mca/dstore/dstore.h" +#include "opal/mca/pmix/pmix.h" #include "opal/mca/hwloc/base/base.h" #include "orte/mca/errmgr/errmgr.h" @@ -46,7 +46,6 @@ int orte_ess_base_proc_binding(void) { -#if OPAL_HAVE_HWLOC hwloc_obj_t node, obj; hwloc_cpuset_t cpus, nodeset; hwloc_obj_type_t target; @@ -56,7 +55,6 @@ int orte_ess_base_proc_binding(void) int ret; char *error=NULL; hwloc_cpuset_t mycpus; - opal_value_t kv; /* Determine if we were pre-bound or not */ if (NULL != getenv(OPAL_MCA_PREFIX"orte_bound_at_launch")) { @@ -263,8 +261,8 @@ int orte_ess_base_proc_binding(void) /* get the cpus we are bound to */ mycpus = hwloc_bitmap_alloc(); - if (hwloc_get_cpubind(opal_hwloc_topology, - mycpus, + if (hwloc_get_cpubind(opal_hwloc_topology, + mycpus, HWLOC_CPUBIND_PROCESS) < 0) { if (NULL != orte_process_info.cpuset) { free(orte_process_info.cpuset); @@ -296,18 +294,8 @@ int orte_ess_base_proc_binding(void) hwloc_bitmap_free(mycpus); /* push our cpuset so others can calculate our locality */ if (NULL != orte_process_info.cpuset) { - OBJ_CONSTRUCT(&kv, opal_value_t); - kv.key = strdup(OPAL_DSTORE_CPUSET); - kv.type = OPAL_STRING; - kv.data.string = strdup(orte_process_info.cpuset); - if (OPAL_SUCCESS != (ret = opal_pmix.put(PMIX_GLOBAL, &kv))) { - ORTE_ERROR_LOG(ret); - OBJ_DESTRUCT(&kv); - goto error; - } - /* and store a copy locally */ - (void)opal_dstore.store(opal_dstore_internal, ORTE_PROC_MY_NAME, &kv); - OBJ_DESTRUCT(&kv); + OPAL_MODEX_SEND_VALUE(ret, OPAL_PMIX_GLOBAL, OPAL_PMIX_CPUSET, + orte_process_info.cpuset, OPAL_STRING); } return ORTE_SUCCESS; @@ -319,8 +307,4 @@ int orte_ess_base_proc_binding(void) } return ORTE_ERR_SILENT; - -#else - return ORTE_SUCCESS; -#endif } diff --git a/orte/mca/ess/base/ess_base_frame.c b/orte/mca/ess/base/ess_base_frame.c index 40dfc1c3da1..c05f6b7d6ff 100644 --- a/orte/mca/ess/base/ess_base_frame.c +++ b/orte/mca/ess/base/ess_base_frame.c @@ -5,16 +5,16 @@ * Copyright (c) 2004-2011 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2011 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2012 Oak Ridge National Labs. All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ diff --git a/orte/mca/ess/base/ess_base_get.c b/orte/mca/ess/base/ess_base_get.c index 3a045ddd399..4e1b1ae09f6 100644 --- a/orte/mca/ess/base/ess_base_get.c +++ b/orte/mca/ess/base/ess_base_get.c @@ -5,14 +5,14 @@ * Copyright (c) 2004-2011 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ diff --git a/orte/mca/ess/base/ess_base_select.c b/orte/mca/ess/base/ess_base_select.c index 352911ee2c2..662ba6890b9 100644 --- a/orte/mca/ess/base/ess_base_select.c +++ b/orte/mca/ess/base/ess_base_select.c @@ -1,3 +1,4 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ /* * Copyright (c) 2004-2008 The Trustees of Indiana University and Indiana * University Research and Technology @@ -5,16 +6,16 @@ * Copyright (c) 2004-2011 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2011 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2013 Los Alamos National Security, LLC. All rights reserved. + * Copyright (c) 2013-2015 Los Alamos National Security, LLC. All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -27,7 +28,7 @@ #include "orte/mca/ess/base/base.h" -int +int orte_ess_base_select(void) { orte_ess_base_component_t *best_component = NULL; @@ -39,7 +40,7 @@ orte_ess_base_select(void) if( OPAL_SUCCESS != mca_base_select("ess", orte_ess_base_framework.framework_output, &orte_ess_base_framework.framework_components, (mca_base_module_t **) &best_module, - (mca_base_component_t **) &best_component) ) { + (mca_base_component_t **) &best_component, NULL) ) { /* error message emitted by fn above */ return ORTE_ERR_SILENT; } diff --git a/orte/mca/ess/base/ess_base_std_app.c b/orte/mca/ess/base/ess_base_std_app.c index b6b4068e8d9..d2a24bcb373 100644 --- a/orte/mca/ess/base/ess_base_std_app.c +++ b/orte/mca/ess/base/ess_base_std_app.c @@ -10,11 +10,12 @@ * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2010-2012 Oak Ridge National Labs. All rights reserved. - * Copyright (c) 2011-2013 Los Alamos National Security, LLC. All rights + * Copyright (c) 2011-2015 Los Alamos National Security, LLC. All rights * reserved. - * Copyright (c) 2013-2015 Intel, Inc. All rights reserved. + * Copyright (c) 2013-2016 Intel, Inc. All rights reserved. * Copyright (c) 2014 Research Organization for Information Science * and Technology (RIST). All rights reserved. + * Copyright (c) 2015 Cisco Systems, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -38,13 +39,12 @@ #endif #include "opal/mca/event/event.h" -#include "opal/mca/dstore/base/base.h" +#include "opal/mca/pmix/pmix.h" #include "opal/util/arch.h" #include "opal/util/os_path.h" #include "opal/util/output.h" #include "opal/util/proc.h" #include "opal/runtime/opal.h" -#include "opal/runtime/opal_cr.h" #include "opal/runtime/opal_progress_threads.h" #include "orte/mca/rml/base/base.h" @@ -54,29 +54,20 @@ #include "orte/mca/grpcomm/base/base.h" #include "orte/mca/oob/base/base.h" #include "orte/mca/rml/rml.h" -#include "orte/mca/qos/base/base.h" #include "orte/mca/odls/odls_types.h" -#include "orte/mca/plm/plm.h" #include "orte/mca/filem/base/base.h" #include "orte/mca/errmgr/base/base.h" -#if OPAL_ENABLE_FT_CR == 1 -#include "orte/mca/snapc/base/base.h" -#include "orte/mca/sstore/base/base.h" -#endif #include "orte/mca/state/base/base.h" #include "orte/util/proc_info.h" #include "orte/util/session_dir.h" #include "orte/util/name_fns.h" #include "orte/util/show_help.h" -#include "orte/runtime/orte_cr.h" #include "orte/runtime/orte_globals.h" #include "orte/runtime/orte_wait.h" #include "orte/mca/ess/base/base.h" -static bool progress_thread_running = false; - int orte_ess_base_app_setup(bool db_restrict_local) { int ret; @@ -108,15 +99,12 @@ int orte_ess_base_app_setup(bool db_restrict_local) * do so here */ if (ORTE_PROC_NON_MPI) { orte_process_info.super.proc_name = *(opal_process_name_t*)ORTE_PROC_MY_NAME; - orte_process_info.super.proc_hostname = strdup(orte_process_info.nodename); + orte_process_info.super.proc_hostname = orte_process_info.nodename; orte_process_info.super.proc_flags = OPAL_PROC_ALL_LOCAL; orte_process_info.super.proc_arch = opal_local_arch; opal_proc_local_set(&orte_process_info.super); } - /* get a separate orte event base */ - orte_event_base = opal_start_progress_thread("orte", true); - progress_thread_running = true; /* open and setup the state machine */ if (ORTE_SUCCESS != (ret = mca_base_framework_open(&orte_state_base_framework, 0))) { ORTE_ERROR_LOG(ret); @@ -143,6 +131,7 @@ int orte_ess_base_app_setup(bool db_restrict_local) ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), (NULL == orte_process_info.tmpdir_base) ? "UNDEF" : orte_process_info.tmpdir_base, orte_process_info.nodename)); + if (ORTE_SUCCESS != (ret = orte_session_dir(true, orte_process_info.tmpdir_base, orte_process_info.nodename, NULL, @@ -151,35 +140,32 @@ int orte_ess_base_app_setup(bool db_restrict_local) error = "orte_session_dir"; goto error; } + /* Once the session directory location has been established, set the opal_output env file location to be in the proc-specific session directory. */ opal_output_set_output_file_info(orte_process_info.proc_session_dir, "output-", NULL, NULL); - /* store the session directory location in the database */ + /* store the session directory location */ OBJ_CONSTRUCT(&kv, opal_value_t); - kv.key = strdup(OPAL_DSTORE_JOB_SDIR); + kv.key = strdup(OPAL_PMIX_NSDIR); kv.type = OPAL_STRING; kv.data.string = strdup(orte_process_info.job_session_dir); - if (OPAL_SUCCESS != (ret = opal_dstore.store(opal_dstore_internal, - ORTE_PROC_MY_NAME, - &kv))) { + if (OPAL_SUCCESS != (ret = opal_pmix.store_local(ORTE_PROC_MY_NAME, &kv))) { ORTE_ERROR_LOG(ret); OBJ_DESTRUCT(&kv); - error = "opal dstore store"; + error = "opal pmix put job sessiondir"; goto error; } OBJ_DESTRUCT(&kv); OBJ_CONSTRUCT(&kv, opal_value_t); - kv.key = strdup(OPAL_DSTORE_MY_SDIR); + kv.key = strdup(OPAL_PMIX_PROCDIR); kv.type = OPAL_STRING; kv.data.string = strdup(orte_process_info.proc_session_dir); - if (OPAL_SUCCESS != (ret = opal_dstore.store(opal_dstore_internal, - ORTE_PROC_MY_NAME, - &kv))) { + if (OPAL_SUCCESS != (ret = opal_pmix.store_local(ORTE_PROC_MY_NAME, &kv))) { ORTE_ERROR_LOG(ret); OBJ_DESTRUCT(&kv); - error = "opal dstore store"; + error = "opal pmix put proc sessiondir"; goto error; } OBJ_DESTRUCT(&kv); @@ -209,17 +195,6 @@ int orte_ess_base_app_setup(bool db_restrict_local) error = "orte_rml_base_select"; goto error; } - /* Messaging QoS Layer */ - if (ORTE_SUCCESS != (ret = mca_base_framework_open(&orte_qos_base_framework, 0))) { - ORTE_ERROR_LOG(ret); - error = "orte_qos_base_open"; - goto error; - } - if (ORTE_SUCCESS != (ret = orte_qos_base_select())) { - ORTE_ERROR_LOG(ret); - error = "orte_qos_base_select"; - goto error; - } /* setup the errmgr */ if (ORTE_SUCCESS != (ret = orte_errmgr_base_select())) { ORTE_ERROR_LOG(ret); @@ -250,14 +225,6 @@ int orte_ess_base_app_setup(bool db_restrict_local) error = "orte_grpcomm_base_select"; goto error; } - /* non-daemon/HNP apps can only have the default proxy PLM - * module open - provide a chance for it to initialize - */ - if (ORTE_SUCCESS != (ret = orte_plm.init())) { - ORTE_ERROR_LOG(ret); - error = "orte_plm_init"; - goto error; - } /* enable communication via the rml */ if (ORTE_SUCCESS != (ret = orte_rml.enable_comm())) { ORTE_ERROR_LOG(ret); @@ -270,44 +237,6 @@ int orte_ess_base_app_setup(bool db_restrict_local) error = "orte_routed.init_routes"; goto error; } -#if OPAL_ENABLE_FT_CR == 1 - /* - * Setup the SnapC - */ - if (ORTE_SUCCESS != (ret = mca_base_framework_open(&orte_snapc_base_framework, 0))) { - ORTE_ERROR_LOG(ret); - error = "orte_snapc_base_open"; - goto error; - } - if (ORTE_SUCCESS != (ret = mca_base_framework_open(&orte_sstore_base_framework, 0))) { - ORTE_ERROR_LOG(ret); - error = "orte_sstore_base_open"; - goto error; - } - if (ORTE_SUCCESS != (ret = orte_snapc_base_select(ORTE_PROC_IS_HNP, ORTE_PROC_IS_APP))) { - ORTE_ERROR_LOG(ret); - error = "orte_snapc_base_select"; - goto error; - } - if (ORTE_SUCCESS != (ret = orte_sstore_base_select())) { - ORTE_ERROR_LOG(ret); - error = "orte_sstore_base_select"; - goto error; - } - /* apps need the OPAL CR stuff */ - opal_cr_set_enabled(true); -#else - opal_cr_set_enabled(false); -#endif - /* Initalize the CR setup - * Note: Always do this, even in non-FT builds. - * If we don't some user level tools may hang. - */ - if (ORTE_SUCCESS != (ret = orte_cr_init())) { - ORTE_ERROR_LOG(ret); - error = "orte_cr_init"; - goto error; - } /* open the distributed file system */ if (ORTE_SUCCESS != (ret = mca_base_framework_open(&orte_dfs_base_framework, 0))) { ORTE_ERROR_LOG(ret); @@ -319,35 +248,19 @@ int orte_ess_base_app_setup(bool db_restrict_local) error = "orte_dfs_base_select"; goto error; } + return ORTE_SUCCESS; + error: - if (!progress_thread_running) { - /* can't send the help message, so ensure it - * comes out locally - */ - orte_show_help_finalize(); - } orte_show_help("help-orte-runtime.txt", "orte_init:startup:internal-failure", true, error, ORTE_ERROR_NAME(ret), ret); + return ret; } int orte_ess_base_app_finalize(void) { - orte_cr_finalize(); - - /* release the event base so we stop all potential - * race conditions in the messaging teardown */ - if (progress_thread_running) { - opal_stop_progress_thread("orte", false); - progress_thread_running = false; - } - -#if OPAL_ENABLE_FT_CR == 1 - (void) mca_base_framework_close(&orte_snapc_base_framework); - (void) mca_base_framework_close(&orte_sstore_base_framework); -#endif /* close frameworks */ (void) mca_base_framework_close(&orte_filem_base_framework); @@ -355,7 +268,6 @@ int orte_ess_base_app_finalize(void) /* now can close the rml and its friendly group comm */ (void) mca_base_framework_close(&orte_grpcomm_base_framework); - (void) mca_base_framework_close(&opal_dstore_base_framework); (void) mca_base_framework_close(&orte_dfs_base_framework); (void) mca_base_framework_close(&orte_routed_base_framework); @@ -365,8 +277,6 @@ int orte_ess_base_app_finalize(void) orte_session_dir_finalize(ORTE_PROC_MY_NAME); - /* free the event base to cleanup memory */ - opal_stop_progress_thread("orte", true); return ORTE_SUCCESS; } @@ -403,8 +313,7 @@ void orte_ess_base_app_abort(int status, bool report) * clean environment. Taken from orte_finalize(): * - Assume errmgr cleans up child processes before we exit. */ - /* CRS cleanup since it may have a named pipe and thread active */ - orte_cr_finalize(); + /* If we were asked to report this termination, do so. * Since singletons don't start an HNP unless necessary, and * direct-launched procs don't have daemons at all, only send @@ -420,9 +329,11 @@ void orte_ess_base_app_abort(int status, bool report) * have a chance to be sent */ nanosleep(&tp, NULL); } + /* - Clean out the global structures * (not really necessary, but good practice) */ orte_proc_info_finalize(); + /* Now Exit */ _exit(status); } diff --git a/orte/mca/ess/base/ess_base_std_orted.c b/orte/mca/ess/base/ess_base_std_orted.c index 526c7732d19..11ac9f19c92 100644 --- a/orte/mca/ess/base/ess_base_std_orted.c +++ b/orte/mca/ess/base/ess_base_std_orted.c @@ -12,9 +12,10 @@ * Copyright (c) 2009 Institut National de Recherche en Informatique * et Automatique. All rights reserved. * Copyright (c) 2011 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2011-2013 Los Alamos National Security, LLC. All rights + * Copyright (c) 2011-2015 Los Alamos National Security, LLC. All rights * reserved. * Copyright (c) 2013-2015 Intel, Inc. All rights reserved. + * Copyright (c) 2017 IBM Corporation. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -35,11 +36,10 @@ #endif #include "opal/dss/dss.h" -#include "opal/mca/dstore/base/base.h" #include "opal/mca/event/event.h" #include "opal/runtime/opal.h" -#include "opal/runtime/opal_cr.h" #include "opal/mca/hwloc/base/base.h" +#include "opal/mca/pmix/base/base.h" #include "opal/mca/pstat/base/base.h" #include "opal/util/arch.h" #include "opal/util/os_path.h" @@ -50,7 +50,6 @@ #include "orte/mca/routed/base/base.h" #include "orte/mca/routed/routed.h" #include "orte/mca/oob/base/base.h" -#include "orte/mca/qos/base/base.h" #include "orte/mca/dfs/base/base.h" #include "orte/mca/grpcomm/grpcomm.h" #include "orte/mca/grpcomm/base/base.h" @@ -58,10 +57,6 @@ #include "orte/mca/plm/base/base.h" #include "orte/mca/odls/base/base.h" #include "orte/mca/errmgr/errmgr.h" -#if OPAL_ENABLE_FT_CR == 1 -#include "orte/mca/snapc/base/base.h" -#include "orte/mca/sstore/base/base.h" -#endif #include "orte/mca/schizo/base/base.h" #include "orte/mca/filem/base/base.h" #include "orte/util/proc_info.h" @@ -73,7 +68,6 @@ #include "orte/mca/errmgr/base/base.h" #include "orte/mca/state/base/base.h" #include "orte/mca/state/state.h" -#include "orte/runtime/orte_cr.h" #include "orte/runtime/orte_wait.h" #include "orte/runtime/orte_globals.h" #include "orte/runtime/orte_quit.h" @@ -115,6 +109,8 @@ int orte_ess_base_orted_setup(char **hosts) orte_app_context_t *app; orte_node_t *node; char *param; + hwloc_obj_t obj; + unsigned i, j; /* my name is set, xfer it to the OPAL layer */ orte_process_info.super.proc_name = *(opal_process_name_t*)ORTE_PROC_MY_NAME; @@ -124,6 +120,7 @@ int orte_ess_base_orted_setup(char **hosts) opal_proc_local_set(&orte_process_info.super); plm_in_use = false; + /* setup callback for SIGPIPE */ setup_sighandler(SIGPIPE, &epipe_handler, epipe_signal_callback); /* Set signal handlers to catch kill signals so we can properly clean up @@ -131,53 +128,51 @@ int orte_ess_base_orted_setup(char **hosts) */ setup_sighandler(SIGTERM, &term_handler, shutdown_signal); setup_sighandler(SIGINT, &int_handler, shutdown_signal); + /** setup callbacks for signals we should ignore */ setup_sighandler(SIGUSR1, &sigusr1_handler, signal_callback); setup_sighandler(SIGUSR2, &sigusr2_handler, signal_callback); + signals_set = true; -#if OPAL_HAVE_HWLOC - { - hwloc_obj_t obj; - unsigned i, j; - /* get the local topology */ - if (NULL == opal_hwloc_topology) { - if (OPAL_SUCCESS != (ret = opal_hwloc_base_get_topology())) { - error = "topology discovery"; - goto error; - } + + /* get the local topology */ + if (NULL == opal_hwloc_topology) { + if (OPAL_SUCCESS != (ret = opal_hwloc_base_get_topology())) { + error = "topology discovery"; + goto error; } - /* generate the signature */ - orte_topo_signature = opal_hwloc_base_get_topo_signature(opal_hwloc_topology); - /* remove the hostname from the topology. Unfortunately, hwloc - * decided to add the source hostname to the "topology", thus - * rendering it unusable as a pure topological description. So - * we remove that information here. - */ - obj = hwloc_get_root_obj(opal_hwloc_topology); - for (i=0; i < obj->infos_count; i++) { - if (NULL == obj->infos[i].name || - NULL == obj->infos[i].value) { - continue; - } - if (0 == strncmp(obj->infos[i].name, "HostName", strlen("HostName"))) { - free(obj->infos[i].name); - free(obj->infos[i].value); - /* left justify the array */ - for (j=i; j < obj->infos_count-1; j++) { - obj->infos[j] = obj->infos[j+1]; - } - obj->infos[obj->infos_count-1].name = NULL; - obj->infos[obj->infos_count-1].value = NULL; - obj->infos_count--; - break; - } + } + /* generate the signature */ + orte_topo_signature = opal_hwloc_base_get_topo_signature(opal_hwloc_topology); + /* remove the hostname from the topology. Unfortunately, hwloc + * decided to add the source hostname to the "topology", thus + * rendering it unusable as a pure topological description. So + * we remove that information here. + */ + obj = hwloc_get_root_obj(opal_hwloc_topology); + for (i=0; i < obj->infos_count; i++) { + if (NULL == obj->infos[i].name || + NULL == obj->infos[i].value) { + continue; } - if (15 < opal_output_get_verbosity(orte_ess_base_framework.framework_output)) { - opal_output(0, "%s Topology Info:", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)); - opal_dss.dump(0, opal_hwloc_topology, OPAL_HWLOC_TOPO); + if (0 == strncmp(obj->infos[i].name, "HostName", strlen("HostName"))) { + free(obj->infos[i].name); + free(obj->infos[i].value); + /* left justify the array */ + for (j=i; j < obj->infos_count-1; j++) { + obj->infos[j] = obj->infos[j+1]; + } + obj->infos[obj->infos_count-1].name = NULL; + obj->infos[obj->infos_count-1].value = NULL; + obj->infos_count--; + break; } } -#endif + if (15 < opal_output_get_verbosity(orte_ess_base_framework.framework_output)) { + opal_output(0, "%s Topology Info:", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)); + opal_dss.dump(0, opal_hwloc_topology, OPAL_HWLOC_TOPO); + } + /* open and setup the opal_pstat framework so we can provide * process stats if requested */ @@ -191,6 +186,7 @@ int orte_ess_base_orted_setup(char **hosts) error = "opal_pstat_base_select"; goto error; } + /* open and setup the state machine */ if (ORTE_SUCCESS != (ret = mca_base_framework_open(&orte_state_base_framework, 0))) { ORTE_ERROR_LOG(ret); @@ -202,12 +198,14 @@ int orte_ess_base_orted_setup(char **hosts) error = "orte_state_base_select"; goto error; } + /* open the errmgr */ if (ORTE_SUCCESS != (ret = mca_base_framework_open(&orte_errmgr_base_framework, 0))) { ORTE_ERROR_LOG(ret); error = "orte_errmgr_base_open"; goto error; } + /* some environments allow remote launches - e.g., ssh - so * open and select something -only- if we are given * a specific module to use @@ -230,6 +228,7 @@ int orte_ess_base_orted_setup(char **hosts) goto error; } } + /* setup my session directory here as the OOB may need it */ if (orte_create_session_dirs) { OPAL_OUTPUT_VERBOSE((2, orte_ess_base_framework.framework_output, @@ -253,6 +252,7 @@ int orte_ess_base_orted_setup(char **hosts) * stale directories laying around */ orte_session_dir_cleanup(ORTE_JOBID_WILDCARD); + /* now actually create the directory tree */ if (ORTE_SUCCESS != (ret = orte_session_dir(true, orte_process_info.tmpdir_base, @@ -262,15 +262,18 @@ int orte_ess_base_orted_setup(char **hosts) error = "orte_session_dir"; goto error; } + /* set the opal_output env file location to be in the * proc-specific session directory. */ opal_output_set_output_file_info(orte_process_info.proc_session_dir, "output-", NULL, NULL); + /* setup stdout/stderr */ if (orte_debug_daemons_file_flag) { /* if we are debugging to a file, then send stdout/stderr to * the orted log file */ + /* get my jobid */ if (ORTE_SUCCESS != (ret = orte_util_convert_jobid_to_string(&jobidstring, ORTE_PROC_MY_NAME->jobid))) { @@ -278,6 +281,7 @@ int orte_ess_base_orted_setup(char **hosts) error = "convert_jobid"; goto error; } + /* define a log file name in the session directory */ snprintf(log_file, PATH_MAX, "output-orted-%s-%s.log", jobidstring, orte_process_info.nodename); @@ -302,8 +306,79 @@ int orte_ess_base_orted_setup(char **hosts) } } } + + /* setup the global job and node arrays */ + orte_job_data = OBJ_NEW(opal_pointer_array_t); + if (ORTE_SUCCESS != (ret = opal_pointer_array_init(orte_job_data, + 1, + ORTE_GLOBAL_ARRAY_MAX_SIZE, + 1))) { + ORTE_ERROR_LOG(ret); + error = "setup job array"; + goto error; + } + orte_node_pool = OBJ_NEW(opal_pointer_array_t); + if (ORTE_SUCCESS != (ret = opal_pointer_array_init(orte_node_pool, + ORTE_GLOBAL_ARRAY_BLOCK_SIZE, + ORTE_GLOBAL_ARRAY_MAX_SIZE, + ORTE_GLOBAL_ARRAY_BLOCK_SIZE))) { + ORTE_ERROR_LOG(ret); + error = "setup node array"; + goto error; + } + orte_node_topologies = OBJ_NEW(opal_pointer_array_t); + if (ORTE_SUCCESS != (ret = opal_pointer_array_init(orte_node_topologies, + ORTE_GLOBAL_ARRAY_BLOCK_SIZE, + ORTE_GLOBAL_ARRAY_MAX_SIZE, + ORTE_GLOBAL_ARRAY_BLOCK_SIZE))) { + ORTE_ERROR_LOG(ret); + error = "setup node topologies array"; + goto error; + } + /* Setup the job data object for the daemons */ + /* create and store the job data object */ + jdata = OBJ_NEW(orte_job_t); + jdata->jobid = ORTE_PROC_MY_NAME->jobid; + opal_pointer_array_set_item(orte_job_data, 0, jdata); + /* every job requires at least one app */ + app = OBJ_NEW(orte_app_context_t); + opal_pointer_array_set_item(jdata->apps, 0, app); + jdata->num_apps++; + /* create and store a node object where we are */ + node = OBJ_NEW(orte_node_t); + node->name = strdup(orte_process_info.nodename); + node->index = opal_pointer_array_set_item(orte_node_pool, ORTE_PROC_MY_NAME->vpid, node); + /* point our topology to the one detected locally */ + node->topology = opal_hwloc_topology; + + /* create and store a proc object for us */ + proc = OBJ_NEW(orte_proc_t); + proc->name.jobid = ORTE_PROC_MY_NAME->jobid; + proc->name.vpid = ORTE_PROC_MY_NAME->vpid; + proc->pid = orte_process_info.pid; + proc->state = ORTE_PROC_STATE_RUNNING; + opal_pointer_array_set_item(jdata->procs, proc->name.vpid, proc); + /* record that the daemon (i.e., us) is on this node + * NOTE: we do not add the proc object to the node's + * proc array because we are not an application proc. + * Instead, we record it in the daemon field of the + * node object + */ + OBJ_RETAIN(proc); /* keep accounting straight */ + node->daemon = proc; + ORTE_FLAG_SET(node, ORTE_NODE_FLAG_DAEMON_LAUNCHED); + node->state = ORTE_NODE_STATE_UP; + /* now point our proc node field to the node */ + OBJ_RETAIN(node); /* keep accounting straight */ + proc->node = node; + /* record that the daemon job is running */ + jdata->num_procs = 1; + jdata->state = ORTE_JOB_STATE_RUNNING; + /* obviously, we have "reported" */ + jdata->num_reported = 1; + /* Setup the communication infrastructure */ - if (ORTE_SUCCESS != (ret = mca_base_framework_open(&orte_oob_base_framework, 0))) { + if (ORTE_SUCCESS != (ret = mca_base_framework_open(&orte_oob_base_framework, 0))) { ORTE_ERROR_LOG(ret); error = "orte_oob_base_open"; goto error; @@ -323,23 +398,16 @@ int orte_ess_base_orted_setup(char **hosts) error = "orte_rml_base_select"; goto error; } - /* Messaging QoS Layer */ - if (ORTE_SUCCESS != (ret = mca_base_framework_open(&orte_qos_base_framework, 0))) { - ORTE_ERROR_LOG(ret); - error = "orte_qos_base_open"; - goto error; - } - if (ORTE_SUCCESS != (ret = orte_qos_base_select())) { - ORTE_ERROR_LOG(ret); - error = "orte_qos_base_select"; - goto error; - } + /* add our contact info */ + proc->rml_uri = orte_rml.get_contact_info(); + /* select the errmgr */ if (ORTE_SUCCESS != (ret = orte_errmgr_base_select())) { ORTE_ERROR_LOG(ret); error = "orte_errmgr_base_select"; goto error; } + /* Routed system */ if (ORTE_SUCCESS != (ret = mca_base_framework_open(&orte_routed_base_framework, 0))) { ORTE_ERROR_LOG(ret); @@ -351,6 +419,15 @@ int orte_ess_base_orted_setup(char **hosts) error = "orte_routed_base_select"; goto error; } + /* setup the routed info - the selected routed component + * will know what to do. + */ + if (ORTE_SUCCESS != (ret = orte_routed.init_routes(ORTE_PROC_MY_NAME->jobid, NULL))) { + ORTE_ERROR_LOG(ret); + error = "orte_routed.init_routes"; + goto error; + } + /* * Group communications */ @@ -364,6 +441,7 @@ int orte_ess_base_orted_setup(char **hosts) error = "orte_grpcomm_base_select"; goto error; } + /* Open/select the odls */ if (ORTE_SUCCESS != (ret = mca_base_framework_open(&orte_odls_base_framework, 0))) { ORTE_ERROR_LOG(ret); @@ -375,6 +453,7 @@ int orte_ess_base_orted_setup(char **hosts) error = "orte_odls_base_select"; goto error; } + /* Open/select the rtc */ if (ORTE_SUCCESS != (ret = mca_base_framework_open(&orte_rtc_base_framework, 0))) { ORTE_ERROR_LOG(ret); @@ -386,12 +465,14 @@ int orte_ess_base_orted_setup(char **hosts) error = "orte_rtc_base_select"; goto error; } + /* enable communication with the rml */ if (ORTE_SUCCESS != (ret = orte_rml.enable_comm())) { ORTE_ERROR_LOG(ret); error = "orte_rml.enable_comm"; goto error; } + #if ORTE_ENABLE_STATIC_PORTS /* if we are using static ports, then we need to setup * the daemon info so the RML can function properly @@ -404,6 +485,7 @@ int orte_ess_base_orted_setup(char **hosts) * if we are trying to setup common or static ports */ orte_routed.update_routing_plan(); + /* extract the node info from the environment and * build a nidmap from it */ @@ -419,6 +501,7 @@ int orte_ess_base_orted_setup(char **hosts) * need to do it anyway just to initialize things */ orte_routed.update_routing_plan(); + /* Now provide a chance for the PLM * to perform any module-specific init functions. This * needs to occur AFTER the communications are setup @@ -433,83 +516,28 @@ int orte_ess_base_orted_setup(char **hosts) goto error; } } - /* setup the global job and node arrays */ - orte_job_data = OBJ_NEW(opal_pointer_array_t); - if (ORTE_SUCCESS != (ret = opal_pointer_array_init(orte_job_data, - 1, - ORTE_GLOBAL_ARRAY_MAX_SIZE, - 1))) { - ORTE_ERROR_LOG(ret); - error = "setup job array"; - goto error; - } - orte_node_pool = OBJ_NEW(opal_pointer_array_t); - if (ORTE_SUCCESS != (ret = opal_pointer_array_init(orte_node_pool, - ORTE_GLOBAL_ARRAY_BLOCK_SIZE, - ORTE_GLOBAL_ARRAY_MAX_SIZE, - ORTE_GLOBAL_ARRAY_BLOCK_SIZE))) { + + /* setup the PMIx framework - ensure it skips all non-PMIx components */ + putenv("OMPI_MCA_pmix=^s1,s2,cray"); + if (OPAL_SUCCESS != (ret = mca_base_framework_open(&opal_pmix_base_framework, 0))) { ORTE_ERROR_LOG(ret); - error = "setup node array"; + error = "orte_pmix_base_open"; goto error; } - orte_node_topologies = OBJ_NEW(opal_pointer_array_t); - if (ORTE_SUCCESS != (ret = opal_pointer_array_init(orte_node_topologies, - ORTE_GLOBAL_ARRAY_BLOCK_SIZE, - ORTE_GLOBAL_ARRAY_MAX_SIZE, - ORTE_GLOBAL_ARRAY_BLOCK_SIZE))) { + if (ORTE_SUCCESS != (ret = opal_pmix_base_select())) { ORTE_ERROR_LOG(ret); - error = "setup node topologies array"; + error = "opal_pmix_base_select"; goto error; } - /* Setup the job data object for the daemons */ - /* create and store the job data object */ - jdata = OBJ_NEW(orte_job_t); - jdata->jobid = ORTE_PROC_MY_NAME->jobid; - opal_pointer_array_set_item(orte_job_data, 0, jdata); - /* every job requires at least one app */ - app = OBJ_NEW(orte_app_context_t); - opal_pointer_array_set_item(jdata->apps, 0, app); - jdata->num_apps++; - /* create and store a node object where we are */ - node = OBJ_NEW(orte_node_t); - node->name = strdup(orte_process_info.nodename); - node->index = opal_pointer_array_set_item(orte_node_pool, ORTE_PROC_MY_NAME->vpid, node); -#if OPAL_HAVE_HWLOC - /* point our topology to the one detected locally */ - node->topology = opal_hwloc_topology; -#endif - /* create and store a proc object for us */ - proc = OBJ_NEW(orte_proc_t); - proc->name.jobid = ORTE_PROC_MY_NAME->jobid; - proc->name.vpid = ORTE_PROC_MY_NAME->vpid; - proc->pid = orte_process_info.pid; - proc->rml_uri = orte_rml.get_contact_info(); - proc->state = ORTE_PROC_STATE_RUNNING; - opal_pointer_array_set_item(jdata->procs, proc->name.vpid, proc); - /* record that the daemon (i.e., us) is on this node - * NOTE: we do not add the proc object to the node's - * proc array because we are not an application proc. - * Instead, we record it in the daemon field of the - * node object - */ - OBJ_RETAIN(proc); /* keep accounting straight */ - node->daemon = proc; - ORTE_FLAG_SET(node, ORTE_NODE_FLAG_DAEMON_LAUNCHED); - node->state = ORTE_NODE_STATE_UP; - /* now point our proc node field to the node */ - OBJ_RETAIN(node); /* keep accounting straight */ - proc->node = node; - /* record that the daemon job is running */ - jdata->num_procs = 1; - jdata->state = ORTE_JOB_STATE_RUNNING; - /* obviously, we have "reported" */ - jdata->num_reported = 1; + /* set the event base */ + opal_pmix_base_set_evbase(orte_event_base); /* setup the PMIx server */ if (ORTE_SUCCESS != (ret = pmix_server_init())) { ORTE_ERROR_LOG(ret); error = "pmix server init"; goto error; } + /* setup the routed info - the selected routed component * will know what to do. */ @@ -518,6 +546,7 @@ int orte_ess_base_orted_setup(char **hosts) error = "orte_routed.init_routes"; goto error; } + /* setup I/O forwarding system - must come after we init routes */ if (ORTE_SUCCESS != (ret = mca_base_framework_open(&orte_iof_base_framework, 0))) { ORTE_ERROR_LOG(ret); @@ -529,6 +558,7 @@ int orte_ess_base_orted_setup(char **hosts) error = "orte_iof_base_select"; goto error; } + /* setup the FileM */ if (ORTE_SUCCESS != (ret = mca_base_framework_open(&orte_filem_base_framework, 0))) { ORTE_ERROR_LOG(ret); @@ -541,46 +571,6 @@ int orte_ess_base_orted_setup(char **hosts) goto error; } -#if OPAL_ENABLE_FT_CR == 1 - /* - * Setup the SnapC - */ - if (ORTE_SUCCESS != (ret = mca_base_framework_open(&orte_snapc_base_framework, 0))) { - ORTE_ERROR_LOG(ret); - error = "orte_snapc_base_open"; - goto error; - } - if (ORTE_SUCCESS != (ret = mca_base_framework_open(&orte_sstore_base_framework, 0))) { - ORTE_ERROR_LOG(ret); - error = "orte_sstore_base_open"; - goto error; - } - if (ORTE_SUCCESS != (ret = orte_snapc_base_select(!ORTE_PROC_IS_HNP, ORTE_PROC_IS_DAEMON))) { - ORTE_ERROR_LOG(ret); - error = "orte_snapc_base_select"; - goto error; - } - if (ORTE_SUCCESS != (ret = orte_sstore_base_select())) { - ORTE_ERROR_LOG(ret); - error = "orte_sstore_base_select"; - goto error; - } - - /* For daemons, ORTE doesn't need the OPAL CR stuff */ - opal_cr_set_enabled(false); -#else - opal_cr_set_enabled(false); -#endif - /* - * Initalize the CR setup - * Note: Always do this, even in non-FT builds. - * If we don't some user level tools may hang. - */ - if (ORTE_SUCCESS != (ret = orte_cr_init())) { - ORTE_ERROR_LOG(ret); - error = "orte_cr_init"; - goto error; - } /* setup the DFS framework */ if (ORTE_SUCCESS != (ret = mca_base_framework_open(&orte_dfs_base_framework, 0))) { ORTE_ERROR_LOG(ret); @@ -592,6 +582,7 @@ int orte_ess_base_orted_setup(char **hosts) error = "orte_dfs_select"; goto error; } + /* setup the SCHIZO framework */ if (ORTE_SUCCESS != (ret = mca_base_framework_open(&orte_schizo_base_framework, 0))) { ORTE_ERROR_LOG(ret); @@ -603,11 +594,18 @@ int orte_ess_base_orted_setup(char **hosts) error = "orte_schizo_select"; goto error; } + return ORTE_SUCCESS; + error: orte_show_help("help-orte-runtime.txt", "orte_init:startup:internal-failure", true, error, ORTE_ERROR_NAME(ret), ret); + + /* remove our use of the session directory tree */ + orte_session_dir_finalize(ORTE_PROC_MY_NAME); + /* ensure we scrub the session directory tree */ + orte_session_dir_cleanup(ORTE_JOBID_WILDCARD); return ORTE_ERR_SILENT; } @@ -621,12 +619,16 @@ int orte_ess_base_orted_finalize(void) opal_event_signal_del(&sigusr1_handler); opal_event_signal_del(&sigusr2_handler); } + /* cleanup */ if (NULL != log_path) { unlink(log_path); } + /* shutdown the pmix server */ pmix_server_finalize(); + (void) mca_base_framework_close(&opal_pmix_base_framework); + /* close frameworks */ (void) mca_base_framework_close(&orte_schizo_base_framework); (void) mca_base_framework_close(&orte_filem_base_framework); @@ -634,8 +636,10 @@ int orte_ess_base_orted_finalize(void) (void) mca_base_framework_close(&orte_iof_base_framework); (void) mca_base_framework_close(&orte_errmgr_base_framework); (void) mca_base_framework_close(&orte_plm_base_framework); + /* close the dfs so its threads can exit */ (void) mca_base_framework_close(&orte_dfs_base_framework); + /* make sure our local procs are dead */ orte_odls.kill_local_procs(NULL); (void) mca_base_framework_close(&orte_rtc_base_framework); @@ -644,9 +648,12 @@ int orte_ess_base_orted_finalize(void) (void) mca_base_framework_close(&orte_rml_base_framework); (void) mca_base_framework_close(&orte_oob_base_framework); (void) mca_base_framework_close(&orte_state_base_framework); - (void) mca_base_framework_close(&opal_dstore_base_framework); - /* cleanup any lingering session directories */ + + /* remove our use of the session directory tree */ + orte_session_dir_finalize(ORTE_PROC_MY_NAME); + /* ensure we scrub the session directory tree */ orte_session_dir_cleanup(ORTE_JOBID_WILDCARD); + return ORTE_SUCCESS; } diff --git a/orte/mca/ess/base/ess_base_std_prolog.c b/orte/mca/ess/base/ess_base_std_prolog.c index 77a85ad0169..42e76a6267d 100644 --- a/orte/mca/ess/base/ess_base_std_prolog.c +++ b/orte/mca/ess/base/ess_base_std_prolog.c @@ -5,14 +5,14 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -39,13 +39,13 @@ int orte_ess_base_std_prolog(void) { int ret; char *error = NULL; - + /* Initialize the ORTE data type support */ if (ORTE_SUCCESS != (ret = orte_dt_init())) { error = "orte_dt_init"; goto error; } - + if (!ORTE_PROC_IS_APP) { /* * Setup the waitpid/sigchld system @@ -56,9 +56,9 @@ int orte_ess_base_std_prolog(void) goto error; } } - + return ORTE_SUCCESS; - + error: orte_show_help("help-orte-runtime", "orte_init:startup:internal-failure", diff --git a/orte/mca/ess/base/ess_base_std_tool.c b/orte/mca/ess/base/ess_base_std_tool.c index 8c8cefa7bee..c48f85a2b72 100644 --- a/orte/mca/ess/base/ess_base_std_tool.c +++ b/orte/mca/ess/base/ess_base_std_tool.c @@ -9,11 +9,12 @@ * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. - * Copyright (c) 2011-2013 Los Alamos National Security, LLC. + * Copyright (c) 2011-2015 Los Alamos National Security, LLC. * All rights reserved. * Copyright (c) 2013-2015 Intel, Inc. All rights reserved. * Copyright (c) 2014 Hochschule Esslingen. All rights reserved. * + * Copyright (c) 2015 Cisco Systems, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -35,7 +36,6 @@ #include "opal/mca/event/event.h" #include "opal/runtime/opal.h" -#include "opal/runtime/opal_cr.h" #include "opal/runtime/opal_progress_threads.h" #include "opal/util/arch.h" #include "opal/util/proc.h" @@ -43,28 +43,20 @@ #include "orte/mca/oob/base/base.h" #include "orte/mca/plm/base/base.h" #include "orte/mca/rml/base/base.h" -#include "orte/mca/qos/base/base.h" #include "orte/mca/routed/base/base.h" #include "orte/mca/errmgr/base/base.h" #include "orte/mca/iof/base/base.h" #include "orte/mca/state/base/base.h" -#if OPAL_ENABLE_FT_CR == 1 -#include "orte/mca/snapc/base/base.h" -#include "orte/mca/sstore/base/base.h" -#endif #include "orte/mca/schizo/base/base.h" #include "orte/util/proc_info.h" #include "orte/util/session_dir.h" #include "orte/util/show_help.h" -#include "orte/runtime/orte_cr.h" #include "orte/runtime/orte_globals.h" #include "orte/runtime/orte_wait.h" #include "orte/mca/ess/base/base.h" -static bool progress_thread_running = false; - int orte_ess_base_tool_setup(void) { int ret; @@ -77,18 +69,6 @@ int orte_ess_base_tool_setup(void) orte_process_info.super.proc_arch = opal_local_arch; opal_proc_local_set(&orte_process_info.super); - if (NULL != orte_process_info.my_hnp_uri) { - /* if we were given an HNP, then we want - * to look like an application as well as being a tool. - * Need to do this before opening the routed framework - * so it will do the right things. - */ - orte_process_info.proc_type |= ORTE_PROC_NON_MPI; - /* get a separate orte event base */ - orte_event_base = opal_start_progress_thread("orte", true); - progress_thread_running = true; - orte_event_base_active = true; - } /* open and setup the state machine */ if (ORTE_SUCCESS != (ret = mca_base_framework_open(&orte_state_base_framework, 0))) { ORTE_ERROR_LOG(ret); @@ -100,6 +80,7 @@ int orte_ess_base_tool_setup(void) error = "orte_state_base_select"; goto error; } + /* open and setup the error manager */ if (ORTE_SUCCESS != (ret = mca_base_framework_open(&orte_errmgr_base_framework, 0))) { ORTE_ERROR_LOG(ret); @@ -111,6 +92,7 @@ int orte_ess_base_tool_setup(void) error = "orte_errmgr_base_select"; goto error; } + /* Setup the communication infrastructure */ if (ORTE_SUCCESS != (ret = mca_base_framework_open(&orte_oob_base_framework, 0))) { ORTE_ERROR_LOG(ret); @@ -122,6 +104,7 @@ int orte_ess_base_tool_setup(void) error = "orte_oob_base_select"; goto error; } + /* Runtime Messaging Layer */ if (ORTE_SUCCESS != (ret = mca_base_framework_open(&orte_rml_base_framework, 0))) { ORTE_ERROR_LOG(ret); @@ -133,17 +116,6 @@ int orte_ess_base_tool_setup(void) error = "orte_rml_base_select"; goto error; } - /* Messaging QoS Layer */ - if (ORTE_SUCCESS != (ret = mca_base_framework_open(&orte_qos_base_framework, 0))) { - ORTE_ERROR_LOG(ret); - error = "orte_qos_base_open"; - goto error; - } - if (ORTE_SUCCESS != (ret = orte_qos_base_select())) { - ORTE_ERROR_LOG(ret); - error = "orte_qos_base_select"; - goto error; - } /* Routed system */ if (ORTE_SUCCESS != (ret = mca_base_framework_open(&orte_routed_base_framework, 0))) { ORTE_ERROR_LOG(ret); @@ -155,17 +127,20 @@ int orte_ess_base_tool_setup(void) error = "orte_routed_base_select"; goto error; } + /* since I am a tool, then all I really want to do is communicate. * So setup communications and be done - finding the HNP * to which I want to communicate and setting up a route for * that link is my responsibility */ + /* enable communication via the rml */ if (ORTE_SUCCESS != (ret = orte_rml.enable_comm())) { ORTE_ERROR_LOG(ret); error = "orte_rml.enable_comm"; goto error; } + /* we -may- need to know the name of the head * of our session directory tree, particularly the * tmp base where any other session directories on @@ -213,36 +188,6 @@ int orte_ess_base_tool_setup(void) * base proxy functions */ } -#if OPAL_ENABLE_FT_CR == 1 - /* - * Setup the SnapC - */ - if (ORTE_SUCCESS != (ret = mca_base_framework_open(&orte_snapc_base_framework, 0))) { - ORTE_ERROR_LOG(ret); - error = "orte_snapc_base_open"; - goto error; - } - if (ORTE_SUCCESS != (ret = mca_base_framework_open(&orte_sstore_base_framework, 0))) { - ORTE_ERROR_LOG(ret); - error = "orte_sstore_base_open"; - goto error; - } - - if (ORTE_SUCCESS != (ret = orte_snapc_base_select(ORTE_PROC_IS_HNP, ORTE_PROC_IS_APP))) { - ORTE_ERROR_LOG(ret); - error = "orte_snapc_base_select"; - goto error; - } - if (ORTE_SUCCESS != (ret = orte_sstore_base_select())) { - ORTE_ERROR_LOG(ret); - error = "orte_sstore_base_select"; - goto error; - } - - /* Tools do not need all the OPAL CR stuff */ - opal_cr_set_enabled(false); -#endif - /* setup schizo in case we are parsing cmd lines */ if (ORTE_SUCCESS != (ret = mca_base_framework_open(&orte_schizo_base_framework, 0))) { ORTE_ERROR_LOG(ret); @@ -269,11 +214,6 @@ int orte_ess_base_tool_finalize(void) { orte_wait_finalize(); -#if OPAL_ENABLE_FT_CR == 1 - mca_base_framework_close(&orte_snapc_base_framework); - mca_base_framework_close(&orte_sstore_base_framework); -#endif - /* if I am a tool, then all I will have done is * a very small subset of orte_init - ensure that * I only back those elements out @@ -286,10 +226,5 @@ int orte_ess_base_tool_finalize(void) (void) mca_base_framework_close(&orte_schizo_base_framework); (void) mca_base_framework_close(&orte_errmgr_base_framework); - /* release the event base */ - if (progress_thread_running) { - opal_stop_progress_thread("orte", true); - progress_thread_running = false; - } return ORTE_SUCCESS; } diff --git a/orte/mca/ess/base/help-ess-base.txt b/orte/mca/ess/base/help-ess-base.txt index 5e315c74ecb..257a64a7279 100644 --- a/orte/mca/ess/base/help-ess-base.txt +++ b/orte/mca/ess/base/help-ess-base.txt @@ -6,14 +6,14 @@ # Copyright (c) 2004-2005 The University of Tennessee and The University # of Tennessee Research Foundation. All rights # reserved. -# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, # University of Stuttgart. All rights reserved. # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. # $COPYRIGHT$ -# +# # Additional copyrights may follow -# +# # $HEADER$ # # This is the US/English general help file for the SDS base. diff --git a/orte/mca/ess/env/Makefile.am b/orte/mca/ess/env/Makefile.am index 8f9276413f2..eda412e700c 100644 --- a/orte/mca/ess/env/Makefile.am +++ b/orte/mca/ess/env/Makefile.am @@ -5,15 +5,15 @@ # Copyright (c) 2004-2005 The University of Tennessee and The University # of Tennessee Research Foundation. All rights # reserved. -# Copyright (c) 2004-2009 High Performance Computing Center Stuttgart, +# Copyright (c) 2004-2009 High Performance Computing Center Stuttgart, # University of Stuttgart. All rights reserved. # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. # Copyright (c) 2010 Cisco Systems, Inc. All rights reserved. # $COPYRIGHT$ -# +# # Additional copyrights may follow -# +# # $HEADER$ # diff --git a/orte/mca/ess/env/ess_env.h b/orte/mca/ess/env/ess_env.h index 56285f8abda..6ed32b79fb3 100644 --- a/orte/mca/ess/env/ess_env.h +++ b/orte/mca/ess/env/ess_env.h @@ -5,14 +5,14 @@ * Copyright (c) 2004-2006 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ diff --git a/orte/mca/ess/env/ess_env_component.c b/orte/mca/ess/env/ess_env_component.c index 3b6f8ddd7aa..e9a4a154385 100644 --- a/orte/mca/ess/env/ess_env_component.c +++ b/orte/mca/ess/env/ess_env_component.c @@ -6,16 +6,16 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2015 Los Alamos National Security, LLC. All rights * reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ * * These symbols are in a file by themselves to provide nice linker @@ -76,7 +76,7 @@ int orte_ess_env_component_query(mca_base_module_t **module, int *priority) *module = (mca_base_module_t *)&orte_ess_env_module; return ORTE_SUCCESS; } - + /* if not, then return NULL - we cannot be selected */ *priority = -1; *module = NULL; diff --git a/orte/mca/ess/env/ess_env_module.c b/orte/mca/ess/env/ess_env_module.c index dc548c38d10..2458961ace4 100644 --- a/orte/mca/ess/env/ess_env_module.c +++ b/orte/mca/ess/env/ess_env_module.c @@ -5,16 +5,16 @@ * Copyright (c) 2004-2011 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2011-2012 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2013-2014 Intel, Inc. All rights reserved. + * Copyright (c) 2013-2015 Intel, Inc. All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ * */ @@ -56,9 +56,6 @@ #include "orte/mca/plm/base/base.h" #include "orte/mca/rmaps/base/base.h" -#if OPAL_ENABLE_FT_CR == 1 -#include "orte/mca/snapc/base/base.h" -#endif #include "orte/mca/filem/base/base.h" #include "orte/util/proc_info.h" #include "orte/util/session_dir.h" @@ -69,7 +66,6 @@ #include "orte/runtime/orte_wait.h" #include "orte/runtime/orte_globals.h" -#include "orte/runtime/orte_cr.h" #include "orte/mca/ess/ess.h" #include "orte/mca/ess/base/base.h" #include "orte/mca/ess/env/ess_env.h" @@ -79,19 +75,11 @@ static int env_set_name(void); static int rte_init(void); static int rte_finalize(void); -#if OPAL_ENABLE_FT_CR == 1 -static int rte_ft_event(int state); -#endif - orte_ess_base_module_t orte_ess_env_module = { rte_init, rte_finalize, orte_ess_base_app_abort, -#if OPAL_ENABLE_FT_CR == 1 - rte_ft_event -#else NULL -#endif }; static int rte_init(void) @@ -105,7 +93,7 @@ static int rte_init(void) error = "orte_ess_base_std_prolog"; goto error; } - + /* Start by getting a unique name from the enviro */ env_set_name(); @@ -152,7 +140,7 @@ static int env_set_name(void) int rc; orte_jobid_t jobid; orte_vpid_t vpid; - + if (NULL == orte_ess_base_jobid) { ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND); return ORTE_ERR_NOT_FOUND; @@ -173,10 +161,10 @@ static int env_set_name(void) ORTE_PROC_MY_NAME->jobid = jobid; ORTE_PROC_MY_NAME->vpid = vpid; - + OPAL_OUTPUT_VERBOSE((1, orte_ess_base_framework.framework_output, "ess:env set name to %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME))); - + /* get the non-name common environmental variables */ if (ORTE_SUCCESS != (rc = orte_ess_env_get())) { ORTE_ERROR_LOG(rc); @@ -186,211 +174,3 @@ static int env_set_name(void) return ORTE_SUCCESS; } -#if OPAL_ENABLE_FT_CR == 1 -static int rte_ft_event(int state) -{ - int ret, exit_status = ORTE_SUCCESS; - orte_proc_type_t svtype; - - /******** Checkpoint Prep ********/ - if(OPAL_CRS_CHECKPOINT == state) { - /* - * Notify SnapC - */ - if( ORTE_SUCCESS != (ret = orte_snapc.ft_event(OPAL_CRS_CHECKPOINT))) { - ORTE_ERROR_LOG(ret); - return ret; - } - - /* - * Notify Routed - */ - if( ORTE_SUCCESS != (ret = orte_routed.ft_event(OPAL_CRS_CHECKPOINT))) { - ORTE_ERROR_LOG(ret); - return ret; - } - - /* - * Notify RML -> OOB - */ - if( ORTE_SUCCESS != (ret = orte_rml.ft_event(OPAL_CRS_CHECKPOINT))) { - ORTE_ERROR_LOG(ret); - return ret; - } - } - /******** Continue Recovery ********/ - else if (OPAL_CRS_CONTINUE == state ) { - OPAL_OUTPUT_VERBOSE((1, orte_ess_base_framework.framework_output, - "ess:env ft_event(%2d) - %s is Continuing", - state, ORTE_NAME_PRINT(ORTE_PROC_MY_NAME))); - - /* - * Notify RML -> OOB - */ - if( ORTE_SUCCESS != (ret = orte_rml.ft_event(OPAL_CRS_CONTINUE))) { - ORTE_ERROR_LOG(ret); - return ret; - } - - /* - * Notify Routed - */ - if( ORTE_SUCCESS != (ret = orte_routed.ft_event(OPAL_CRS_CONTINUE))) { - ORTE_ERROR_LOG(ret); - return ret; - } - - /* - * Notify SnapC - */ - if( ORTE_SUCCESS != (ret = orte_snapc.ft_event(OPAL_CRS_CONTINUE))) { - ORTE_ERROR_LOG(ret); - return ret; - } - - if (opal_cr_continue_like_restart) { - /* - * Barrier to make all processes have been successfully restarted before - * we try to remove some restart only files. - */ - opal_pmix.fence(NULL, 0); - - if( orte_cr_flush_restart_files ) { - OPAL_OUTPUT_VERBOSE((1, orte_ess_base_framework.framework_output, - "ess:env ft_event(%2d): %s " - "Cleanup restart files...", - state, ORTE_NAME_PRINT(ORTE_PROC_MY_NAME))); - opal_crs_base_cleanup_flush(); - } - } - } - /******** Restart Recovery ********/ - else if (OPAL_CRS_RESTART == state ) { - OPAL_OUTPUT_VERBOSE((1, orte_ess_base_framework.framework_output, - "ess:env ft_event(%2d) - %s is Restarting", - state, ORTE_NAME_PRINT(ORTE_PROC_MY_NAME))); - - /* - * This should follow the ess init() function - */ - - /* - * - Reset Contact information - */ - if( ORTE_SUCCESS != (ret = env_set_name() ) ) { - exit_status = ret; - } - - /* - * Notify RML -> OOB - */ - if( ORTE_SUCCESS != (ret = orte_rml.ft_event(OPAL_CRS_RESTART))) { - ORTE_ERROR_LOG(ret); - return ret; - } - - /* - * Restart the routed framework - * JJH: Lie to the finalize function so it does not try to contact the daemon. - */ - svtype = orte_process_info.proc_type; - orte_process_info.proc_type = ORTE_PROC_TOOL; - if (ORTE_SUCCESS != (ret = orte_routed.finalize()) ) { - ORTE_ERROR_LOG(ret); - return ret; - } - orte_process_info.proc_type = svtype; - if (ORTE_SUCCESS != (ret = orte_routed.initialize()) ) { - ORTE_ERROR_LOG(ret); - return ret; - } - - /* RHC: you can't pass NULL as the identifier - what you'll need to do is - * close all open dstore handles, and then open the ones you need - */ -#if 0 - if (OPAL_SUCCESS != (ret = opal_dstore.remove(NULL, NULL))) { - ORTE_ERROR_LOG(ret); - exit_status = ret; - goto cleanup; - } -#endif - - /* - * Restart the PLM - Does nothing at the moment, but included for completeness - */ - if (ORTE_SUCCESS != (ret = orte_plm.finalize())) { - ORTE_ERROR_LOG(ret); - return ret; - } - - if (ORTE_SUCCESS != (ret = orte_plm.init())) { - ORTE_ERROR_LOG(ret); - return ret; - } - - /* - * RML - Enable communications - */ - if (ORTE_SUCCESS != (ret = orte_rml.enable_comm())) { - ORTE_ERROR_LOG(ret); - return ret; - } - - /* - * Notify Routed - */ - if( ORTE_SUCCESS != (ret = orte_routed.ft_event(OPAL_CRS_RESTART))) { - ORTE_ERROR_LOG(ret); - return ret; - } - - /* - * Barrier to make all processes have been successfully restarted before - * we try to remove some restart only files. - */ - opal_pmix.fence(NULL, 0); - - if( orte_cr_flush_restart_files ) { - OPAL_OUTPUT_VERBOSE((1, orte_ess_base_framework.framework_output, - "ess:env ft_event(%2d): %s " - "Cleanup restart files...", - state, ORTE_NAME_PRINT(ORTE_PROC_MY_NAME))); - - opal_crs_base_cleanup_flush(); - } - - /* - * Session directory re-init - */ - if (orte_create_session_dirs) { - if (ORTE_SUCCESS != (ret = orte_session_dir(true, - orte_process_info.tmpdir_base, - orte_process_info.nodename, - NULL, /* Batch ID -- Not used */ - ORTE_PROC_MY_NAME))) { - exit_status = ret; - } - - opal_output_set_output_file_info(orte_process_info.proc_session_dir, - "output-", NULL, NULL); - } - - /* - * Notify SnapC - */ - if( ORTE_SUCCESS != (ret = orte_snapc.ft_event(OPAL_CRS_RESTART))) { - ORTE_ERROR_LOG(ret); - return ret; - } - } - else if (OPAL_CRS_TERM == state ) { - /* Nothing */ - } - else { - /* Error state = Nothing */ - } - - return exit_status; -} -#endif diff --git a/orte/mca/ess/ess.h b/orte/mca/ess/ess.h index 458e66e28ec..34c2dd6324c 100644 --- a/orte/mca/ess/ess.h +++ b/orte/mca/ess/ess.h @@ -6,17 +6,17 @@ * Copyright (c) 2004-2011 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2010 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2010 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2011-2015 Los Alamos National Security, LLC. All rights - * reserved. + * reserved. * Copyright (c) 2012 Cisco Systems, Inc. All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ /** @file: diff --git a/orte/mca/ess/hnp/Makefile.am b/orte/mca/ess/hnp/Makefile.am index 7bdfba406c2..88a92ed56fc 100644 --- a/orte/mca/ess/hnp/Makefile.am +++ b/orte/mca/ess/hnp/Makefile.am @@ -5,18 +5,23 @@ # Copyright (c) 2004-2005 The University of Tennessee and The University # of Tennessee Research Foundation. All rights # reserved. -# Copyright (c) 2004-2009 High Performance Computing Center Stuttgart, +# Copyright (c) 2004-2009 High Performance Computing Center Stuttgart, # University of Stuttgart. All rights reserved. # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. # Copyright (c) 2010 Cisco Systems, Inc. All rights reserved. +# Copyright (c) 2017 Los Alamos National Security, LLC. All rights +# reseved. +# Copyright (c) 2017 Intel, Inc. All rights reserved. # $COPYRIGHT$ -# +# # Additional copyrights may follow -# +# # $HEADER$ # +dist_ortedata_DATA = help-ess-hnp.txt + sources = \ ess_hnp.h \ ess_hnp_component.c \ diff --git a/orte/mca/ess/hnp/ess_hnp.h b/orte/mca/ess/hnp/ess_hnp.h index 5f9400d7c1b..0c177210ef5 100644 --- a/orte/mca/ess/hnp/ess_hnp.h +++ b/orte/mca/ess/hnp/ess_hnp.h @@ -1,3 +1,4 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ /* * Copyright (c) 2004-2008 The Trustees of Indiana University and Indiana * University Research and Technology @@ -5,14 +6,17 @@ * Copyright (c) 2004-2006 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. + * Copyright (c) 2017 Los Alamos National Security, LLC. All rights + * reserved. + * Copyright (c) 2017 Intel, Inc. All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -24,12 +28,19 @@ BEGIN_C_DECLS /* * Module open / close */ -int orte_ess_hnp_component_open(void); -int orte_ess_hnp_component_close(void); -int orte_ess_hnp_component_query(mca_base_module_t **module, int *priority); +typedef struct { + opal_list_item_t super; + char *signame; + int signal; +} ess_hnp_signal_t; +OBJ_CLASS_DECLARATION(ess_hnp_signal_t); +typedef struct { + orte_ess_base_component_t base; + opal_list_t signals; +} orte_ess_hnp_component_t; -ORTE_MODULE_DECLSPEC extern orte_ess_base_component_t mca_ess_hnp_component; +ORTE_MODULE_DECLSPEC extern orte_ess_hnp_component_t mca_ess_hnp_component; END_C_DECLS diff --git a/orte/mca/ess/hnp/ess_hnp_component.c b/orte/mca/ess/hnp/ess_hnp_component.c index be5d1922892..84d8d4da191 100644 --- a/orte/mca/ess/hnp/ess_hnp_component.c +++ b/orte/mca/ess/hnp/ess_hnp_component.c @@ -6,16 +6,19 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. - * Copyright (c) 2015 Los Alamos National Security, LLC. All rights + * Copyright (c) 2015-2017 Los Alamos National Security, LLC. All rights * reserved. + * Copyright (c) 2017 Intel, Inc. All rights reserved. + * Copyright (c) 2017 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ * * These symbols are in a file by themselves to provide nice linker @@ -25,49 +28,206 @@ * entire components just to query their version and parameters. */ +#include "opal/util/argv.h" + #include "orte_config.h" #include "orte/constants.h" #include "orte/util/proc_info.h" +#include "orte/util/show_help.h" #include "orte/mca/ess/ess.h" #include "orte/mca/ess/hnp/ess_hnp.h" +#include "orte/runtime/orte_globals.h" extern orte_ess_base_module_t orte_ess_hnp_module; +static int hnp_component_register (void); +static int hnp_component_open(void); +static int hnp_component_close(void); +static int hnp_component_query(mca_base_module_t **module, int *priority); + +struct known_signal { + /** signal number */ + int signal; + /** signal name */ + char *signame; + /** can this signal be forwarded */ + bool can_forward; +}; + +static struct known_signal known_signals[] = { + {SIGTERM, "SIGTERM", false}, + {SIGHUP, "SIGHUP", false}, + {SIGINT, "SIGINT", false}, + {SIGKILL, "SIGKILL", false}, +#ifdef SIGSYS + {SIGSYS, "SIGSYS", true}, +#endif +#ifdef SIGXCPU + {SIGXCPU, "SIGXCPU", true}, +#endif + {SIGXFSZ, "SIGXFSZ", true}, +#ifdef SIGVTALRM + {SIGVTALRM, "SIGVTALRM", true}, +#endif +#ifdef SIGPROF + {SIGPROF, "SIGPROF", true}, +#endif +#ifdef SIGINFO + {SIGINFO, "SIGINFO", true}, +#endif +#ifdef SIGPWR + {SIGPWR, "SIGPWR", true}, +#endif + {0, NULL}, +}; /* * Instantiate the public struct with all of our public information * and pointers to our public functions in it */ -orte_ess_base_component_t mca_ess_hnp_component = { - .base_version = { - ORTE_ESS_BASE_VERSION_3_0_0, - - /* Component name and version */ - .mca_component_name = "hnp", - MCA_BASE_MAKE_VERSION(component, ORTE_MAJOR_VERSION, ORTE_MINOR_VERSION, - ORTE_RELEASE_VERSION), - - /* Component open and close functions */ - .mca_open_component = orte_ess_hnp_component_open, - .mca_close_component = orte_ess_hnp_component_close, - .mca_query_component = orte_ess_hnp_component_query, - }, - .base_data = { - /* The component is checkpoint ready */ - MCA_BASE_METADATA_PARAM_CHECKPOINT - }, +orte_ess_hnp_component_t mca_ess_hnp_component = { + .base = { + .base_version = { + ORTE_ESS_BASE_VERSION_3_0_0, + + /* Component name and version */ + .mca_component_name = "hnp", + MCA_BASE_MAKE_VERSION(component, ORTE_MAJOR_VERSION, ORTE_MINOR_VERSION, + ORTE_RELEASE_VERSION), + + /* Component open and close functions */ + .mca_open_component = hnp_component_open, + .mca_close_component = hnp_component_close, + .mca_query_component = hnp_component_query, + .mca_register_component_params = hnp_component_register, + }, + .base_data = { + /* The component is checkpoint ready */ + MCA_BASE_METADATA_PARAM_CHECKPOINT + } + } }; +static char *additional_signals; + +static int hnp_component_register (void) +{ + additional_signals = NULL; + (void) mca_base_component_var_register (&mca_ess_hnp_component.base.base_version, + "forward_signals", "Comma-delimited list " + "of additional signals (names or integers) to forward to " + "application processes [\"none\" => forward nothing]", MCA_BASE_VAR_TYPE_STRING, + NULL, 0, 0, OPAL_INFO_LVL_4, MCA_BASE_VAR_SCOPE_READONLY, + &additional_signals); + + return ORTE_SUCCESS; +} + +#define ESS_ADDSIGNAL(x, s) \ + do { \ + ess_hnp_signal_t *_sig; \ + _sig = OBJ_NEW(ess_hnp_signal_t); \ + _sig->signal = (x); \ + _sig->signame = strdup((s)); \ + opal_list_append(&mca_ess_hnp_component.signals, &_sig->super); \ + } while(0) -int -orte_ess_hnp_component_open(void) +static int hnp_component_open(void) { + int i, sval; + char **signals, *tmp; + ess_hnp_signal_t *sig; + bool ignore, found; + + OBJ_CONSTRUCT(&mca_ess_hnp_component.signals, opal_list_t); + + /* we know that some signals are (nearly) always defined, regardless + * of environment, so add them here */ + ESS_ADDSIGNAL(SIGTSTP, "SIGTSTP"); + ESS_ADDSIGNAL(SIGUSR1, "SIGUSR1"); + ESS_ADDSIGNAL(SIGUSR2, "SIGUSR2"); + ESS_ADDSIGNAL(SIGABRT, "SIGABRT"); + ESS_ADDSIGNAL(SIGALRM, "SIGALRM"); + ESS_ADDSIGNAL(SIGCONT, "SIGCONT"); +#ifdef SIGURG + ESS_ADDSIGNAL(SIGURG, "SIGURG"); +#endif + + /* see if they asked for anything beyond those - note that they may + * have asked for some we already cover, and so we ignore any duplicates */ + if (NULL != additional_signals) { + /* if they told us "none", then dump the list */ + if (0 == strcmp(additional_signals, "none")) { + OPAL_LIST_DESTRUCT(&mca_ess_hnp_component.signals); + /* need to reconstruct it for when we close */ + OBJ_CONSTRUCT(&mca_ess_hnp_component.signals, opal_list_t); + return ORTE_SUCCESS; + } + signals = opal_argv_split(additional_signals, ','); + for (i=0; NULL != signals[i]; i++) { + sval = 0; + if (0 != strncmp(signals[i], "SIG", 3)) { + /* treat it like a number */ + errno = 0; + sval = strtoul(signals[i], &tmp, 10); + if (0 != errno || '\0' != *tmp) { + orte_show_help("help-ess-hnp.txt", "ess-hnp:unknown-signal", + true, signals[i], additional_signals); + opal_argv_free(signals); + return OPAL_ERR_SILENT; + } + } + + /* see if it is one we already covered */ + ignore = false; + OPAL_LIST_FOREACH(sig, &mca_ess_hnp_component.signals, ess_hnp_signal_t) { + if (0 == strcasecmp(signals[i], sig->signame) || sval == sig->signal) { + /* got it - we will ignore */ + ignore = true; + break; + } + } + + if (ignore) { + continue; + } + + /* see if they gave us a signal name */ + found = false; + for (int j = 0 ; known_signals[j].signame ; ++j) { + if (0 == strcasecmp (signals[i], known_signals[j].signame) || sval == known_signals[j].signal) { + if (!known_signals[j].can_forward) { + orte_show_help("help-ess-hnp.txt", "ess-hnp:cannot-forward", + true, known_signals[j].signame, additional_signals); + opal_argv_free(signals); + return OPAL_ERR_SILENT; + } + found = true; + ESS_ADDSIGNAL(known_signals[j].signal, known_signals[j].signame); + break; + } + } + + if (!found) { + if (0 == strncmp(signals[i], "SIG", 3)) { + orte_show_help("help-ess-hnp.txt", "ess-hnp:unknown-signal", + true, signals[i], additional_signals); + opal_argv_free(signals); + return OPAL_ERR_SILENT; + } + + ESS_ADDSIGNAL(sval, signals[i]); + } + } + opal_argv_free (signals); + } + return ORTE_SUCCESS; } -int orte_ess_hnp_component_query(mca_base_module_t **module, int *priority) +static int hnp_component_query(mca_base_module_t **module, int *priority) { /* we are the hnp module - we need to be selected @@ -78,7 +238,7 @@ int orte_ess_hnp_component_query(mca_base_module_t **module, int *priority) *module = (mca_base_module_t *)&orte_ess_hnp_module; return ORTE_SUCCESS; } - + /* else, we are not */ *priority = -1; *module = NULL; @@ -86,9 +246,22 @@ int orte_ess_hnp_component_query(mca_base_module_t **module, int *priority) } -int -orte_ess_hnp_component_close(void) +static int hnp_component_close(void) { return ORTE_SUCCESS; } +/* instantiate the class */ +static void scon(ess_hnp_signal_t *t) +{ + t->signame = NULL; +} +static void sdes(ess_hnp_signal_t *t) +{ + if (NULL != t->signame) { + free(t->signame); + } +} +OBJ_CLASS_INSTANCE(ess_hnp_signal_t, + opal_list_item_t, + scon, sdes); diff --git a/orte/mca/ess/hnp/ess_hnp_module.c b/orte/mca/ess/hnp/ess_hnp_module.c index 5914490974f..c80f5a30180 100644 --- a/orte/mca/ess/hnp/ess_hnp_module.c +++ b/orte/mca/ess/hnp/ess_hnp_module.c @@ -1,3 +1,4 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ /* * Copyright (c) 2004-2010 The Trustees of Indiana University and Indiana * University Research and Technology @@ -11,9 +12,12 @@ * All rights reserved. * Copyright (c) 2010-2011 Oak Ridge National Labs. All rights reserved. * Copyright (c) 2011-2014 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2011-2013 Los Alamos National Security, LLC. All rights + * Copyright (c) 2011-2017 Los Alamos National Security, LLC. All rights * reserved. - * Copyright (c) 2013-2015 Intel, Inc. All rights reserved. + * Copyright (c) 2013-2017 Intel, Inc. All rights reserved. + * Copyright (c) 2017 Research Organization for Information Science + * and Technology (RIST). All rights reserved. + * Copyright (c) 2017 IBM Corporation. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -37,10 +41,8 @@ #include "opal/hash_string.h" #include "opal/class/opal_hash_table.h" #include "opal/class/opal_list.h" -#include "opal/mca/dstore/base/base.h" #include "opal/mca/event/event.h" #include "opal/runtime/opal.h" -#include "opal/runtime/opal_cr.h" #include "opal/util/arch.h" #include "opal/util/argv.h" @@ -50,12 +52,12 @@ #include "opal/util/malloc.h" #include "opal/util/basename.h" #include "opal/util/fd.h" +#include "opal/mca/pmix/base/base.h" #include "opal/mca/pstat/base/base.h" #include "opal/mca/hwloc/base/base.h" #include "orte/mca/oob/base/base.h" #include "orte/mca/rml/base/base.h" -#include "orte/mca/qos/base/base.h" #include "orte/mca/rml/rml_types.h" #include "orte/mca/routed/base/base.h" #include "orte/mca/routed/routed.h" @@ -69,10 +71,6 @@ #include "orte/mca/plm/plm.h" #include "orte/mca/odls/base/base.h" #include "orte/mca/rmaps/base/base.h" -#if OPAL_ENABLE_FT_CR == 1 -#include "orte/mca/snapc/base/base.h" -#include "orte/mca/sstore/base/base.h" -#endif #include "orte/mca/filem/base/base.h" #include "orte/mca/schizo/base/base.h" #include "orte/mca/state/base/base.h" @@ -92,7 +90,6 @@ #include "orte/runtime/orte_wait.h" #include "orte/runtime/orte_globals.h" #include "orte/runtime/orte_quit.h" -#include "orte/runtime/orte_cr.h" #include "orte/runtime/orte_locks.h" #include "orte/runtime/orte_data_server.h" @@ -117,10 +114,7 @@ static bool forcibly_die=false; static opal_event_t term_handler; static opal_event_t epipe_handler; static int term_pipe[2]; -static opal_event_t sigusr1_handler; -static opal_event_t sigusr2_handler; -static opal_event_t sigtstp_handler; -static opal_event_t sigcont_handler; +static opal_event_t *forward_signals_events = NULL; static void abort_signal_callback(int signal); static void clean_abort(int fd, short flags, void *arg); @@ -145,6 +139,11 @@ static int rte_init(void) orte_proc_t *proc; orte_app_context_t *app; char **aliases, *aptr; + char *coprocessors, **sns; + uint32_t h; + int idx; + orte_topology_t *t; + ess_hnp_signal_t *sig; /* run the prolog */ if (ORTE_SUCCESS != (ret = orte_ess_base_std_prolog())) { @@ -187,31 +186,37 @@ static int rte_init(void) signal(SIGINT, abort_signal_callback); signal(SIGHUP, abort_signal_callback); - /** setup callbacks for signals we should foward */ - setup_sighandler(SIGUSR1, &sigusr1_handler, signal_forward_callback); - setup_sighandler(SIGUSR2, &sigusr2_handler, signal_forward_callback); - setup_sighandler(SIGTSTP, &sigtstp_handler, signal_forward_callback); - setup_sighandler(SIGCONT, &sigcont_handler, signal_forward_callback); + /** setup callbacks for signals we should forward */ + if (0 < (idx = opal_list_get_size(&mca_ess_hnp_component.signals))) { + forward_signals_events = (opal_event_t*)malloc(sizeof(opal_event_t) * idx); + if (NULL == forward_signals_events) { + ret = ORTE_ERR_OUT_OF_RESOURCE; + error = "unable to malloc"; + goto error; + } + idx = 0; + OPAL_LIST_FOREACH(sig, &mca_ess_hnp_component.signals, ess_hnp_signal_t) { + setup_sighandler(sig->signal, forward_signals_events + idx, signal_forward_callback); + ++idx; + } + } signals_set = true; -#if OPAL_HAVE_HWLOC - { - /* get the local topology */ - if (NULL == opal_hwloc_topology) { - if (OPAL_SUCCESS != (ret = opal_hwloc_base_get_topology())) { - error = "topology discovery"; - goto error; - } + /* get the local topology */ + if (NULL == opal_hwloc_topology) { + if (OPAL_SUCCESS != (ret = opal_hwloc_base_get_topology())) { + error = "topology discovery"; + goto error; } - /* generate the signature */ - orte_topo_signature = opal_hwloc_base_get_topo_signature(opal_hwloc_topology); + } + /* generate the signature */ + orte_topo_signature = opal_hwloc_base_get_topo_signature(opal_hwloc_topology); - if (15 < opal_output_get_verbosity(orte_ess_base_framework.framework_output)) { - opal_output(0, "%s Topology Info:", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)); - opal_dss.dump(0, opal_hwloc_topology, OPAL_HWLOC_TOPO); - } + if (15 < opal_output_get_verbosity(orte_ess_base_framework.framework_output)) { + opal_output(0, "%s Topology Info:", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)); + opal_dss.dump(0, opal_hwloc_topology, OPAL_HWLOC_TOPO); } -#endif + /* if we are using xml for output, put an mpirun start tag */ if (orte_xml_output) { @@ -223,30 +228,26 @@ static int rte_init(void) * process stats if requested */ if (ORTE_SUCCESS != (ret = mca_base_framework_open(&opal_pstat_base_framework, 0))) { - ORTE_ERROR_LOG(ret); error = "opal_pstat_base_open"; goto error; } if (ORTE_SUCCESS != (ret = opal_pstat_base_select())) { - ORTE_ERROR_LOG(ret); error = "opal_pstat_base_select"; goto error; } + /* open and setup the state machine */ if (ORTE_SUCCESS != (ret = mca_base_framework_open(&orte_state_base_framework, 0))) { - ORTE_ERROR_LOG(ret); error = "orte_state_base_open"; goto error; } if (ORTE_SUCCESS != (ret = orte_state_base_select())) { - ORTE_ERROR_LOG(ret); error = "orte_state_base_select"; goto error; } /* open the errmgr */ if (ORTE_SUCCESS != (ret = mca_base_framework_open(&orte_errmgr_base_framework, 0))) { - ORTE_ERROR_LOG(ret); error = "orte_errmgr_base_open"; goto error; } @@ -257,26 +258,27 @@ static int rte_init(void) * first and select that component. */ if (ORTE_SUCCESS != (ret = mca_base_framework_open(&orte_plm_base_framework, 0))) { - ORTE_ERROR_LOG(ret); error = "orte_plm_base_open"; goto error; } + if (ORTE_SUCCESS != (ret = orte_plm_base_select())) { - ORTE_ERROR_LOG(ret); error = "orte_plm_base_select"; + if (ORTE_ERR_FATAL == ret) { + /* we already output a show_help - so keep down the verbage */ + ret = ORTE_ERR_SILENT; + } goto error; } /* if we were spawned by a singleton, our jobid was given to us */ if (NULL != orte_ess_base_jobid) { if (ORTE_SUCCESS != (ret = orte_util_convert_string_to_jobid(&ORTE_PROC_MY_NAME->jobid, orte_ess_base_jobid))) { - ORTE_ERROR_LOG(ret); error = "convert_string_to_jobid"; goto error; } ORTE_PROC_MY_NAME->vpid = 0; } else { if (ORTE_SUCCESS != (ret = orte_plm.set_hnp_name())) { - ORTE_ERROR_LOG(ret); error = "orte_plm_set_hnp_name"; goto error; } @@ -295,6 +297,7 @@ static int rte_init(void) ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), (NULL == orte_process_info.tmpdir_base) ? "UNDEF" : orte_process_info.tmpdir_base, orte_process_info.nodename)); + /* take a pass thru the session directory code to fillin the * tmpdir names - don't create anything yet */ @@ -302,7 +305,6 @@ static int rte_init(void) orte_process_info.tmpdir_base, orte_process_info.nodename, NULL, ORTE_PROC_MY_NAME))) { - ORTE_ERROR_LOG(ret); error = "orte_session_dir define"; goto error; } @@ -316,23 +318,21 @@ static int rte_init(void) orte_process_info.tmpdir_base, orte_process_info.nodename, NULL, ORTE_PROC_MY_NAME))) { - ORTE_ERROR_LOG(ret); error = "orte_session_dir"; goto error; } } /* Setup the communication infrastructure */ + /* * OOB Layer */ if (ORTE_SUCCESS != (ret = mca_base_framework_open(&orte_oob_base_framework, 0))) { - ORTE_ERROR_LOG(ret); error = "orte_oob_base_open"; goto error; } if (ORTE_SUCCESS != (ret = orte_oob_base_select())) { - ORTE_ERROR_LOG(ret); error = "orte_oob_base_select"; goto error; } @@ -341,33 +341,19 @@ static int rte_init(void) * Runtime Messaging Layer */ if (ORTE_SUCCESS != (ret = mca_base_framework_open(&orte_rml_base_framework, 0))) { - ORTE_ERROR_LOG(ret); error = "orte_rml_base_open"; goto error; } if (ORTE_SUCCESS != (ret = orte_rml_base_select())) { - ORTE_ERROR_LOG(ret); error = "orte_rml_base_select"; goto error; } - /* Messaging QoS Layer */ - if (ORTE_SUCCESS != (ret = mca_base_framework_open(&orte_qos_base_framework, 0))) { - ORTE_ERROR_LOG(ret); - error = "orte_qos_base_open"; - goto error; - } - if (ORTE_SUCCESS != (ret = orte_qos_base_select())) { - ORTE_ERROR_LOG(ret); - error = "orte_qos_base_select"; - goto error; - } - if (ORTE_SUCCESS != (ret = orte_errmgr_base_select())) { - ORTE_ERROR_LOG(ret); error = "orte_errmgr_base_select"; goto error; } + /* setup the global job and node arrays */ orte_job_data = OBJ_NEW(opal_pointer_array_t); if (ORTE_SUCCESS != (ret = opal_pointer_array_init(orte_job_data, @@ -378,6 +364,7 @@ static int rte_init(void) error = "setup job array"; goto error; } + orte_node_pool = OBJ_NEW(opal_pointer_array_t); if (ORTE_SUCCESS != (ret = opal_pointer_array_init(orte_node_pool, ORTE_GLOBAL_ARRAY_BLOCK_SIZE, @@ -396,6 +383,7 @@ static int rte_init(void) error = "setup node topologies array"; goto error; } + /* Setup the job data object for the daemons */ /* create and store the job data object */ jdata = OBJ_NEW(orte_job_t); @@ -411,30 +399,30 @@ static int rte_init(void) app = OBJ_NEW(orte_app_context_t); opal_pointer_array_set_item(jdata->apps, 0, app); jdata->num_apps++; + /* create and store a node object where we are */ node = OBJ_NEW(orte_node_t); node->name = strdup(orte_process_info.nodename); node->index = opal_pointer_array_set_item(orte_node_pool, 0, node); -#if OPAL_HAVE_HWLOC - { - orte_topology_t *t; - /* add it to the array of known topologies */ - t = OBJ_NEW(orte_topology_t); - t->topo = opal_hwloc_topology; - t->sig = strdup(orte_topo_signature); - opal_pointer_array_add(orte_node_topologies, t); - } -#endif + + /* add it to the array of known topologies */ + t = OBJ_NEW(orte_topology_t); + t->topo = opal_hwloc_topology; + t->sig = strdup(orte_topo_signature); + opal_pointer_array_add(orte_node_topologies, t); + /* create and store a proc object for us */ proc = OBJ_NEW(orte_proc_t); proc->name.jobid = ORTE_PROC_MY_NAME->jobid; proc->name.vpid = ORTE_PROC_MY_NAME->vpid; + proc->pid = orte_process_info.pid; proc->rml_uri = orte_rml.get_contact_info(); proc->state = ORTE_PROC_STATE_RUNNING; OBJ_RETAIN(node); /* keep accounting straight */ proc->node = node; opal_pointer_array_set_item(jdata->procs, proc->name.vpid, proc); + /* record that the daemon (i.e., us) is on this node * NOTE: we do not add the proc object to the node's * proc array because we are not an application proc. @@ -445,6 +433,7 @@ static int rte_init(void) node->daemon = proc; ORTE_FLAG_SET(node, ORTE_NODE_FLAG_DAEMON_LAUNCHED); node->state = ORTE_NODE_STATE_UP; + /* if we are to retain aliases, get ours */ if (orte_retain_aliases) { aliases = NULL; @@ -456,11 +445,13 @@ static int rte_init(void) orte_set_attribute(&node->attributes, ORTE_NODE_ALIAS, ORTE_ATTR_LOCAL, aptr, OPAL_STRING); free(aptr); } + /* record that the daemon job is running */ jdata->num_procs = 1; jdata->state = ORTE_JOB_STATE_RUNNING; /* obviously, we have "reported" */ jdata->num_reported = 1; + /* * Routed system */ @@ -474,6 +465,8 @@ static int rte_init(void) error = "orte_routed_base_select"; goto error; } + + /* * Group communications */ @@ -487,6 +480,7 @@ static int rte_init(void) error = "orte_grpcomm_base_select"; goto error; } + /* Now provide a chance for the PLM * to perform any module-specific init functions. This * needs to occur AFTER the communications are setup @@ -497,6 +491,7 @@ static int rte_init(void) error = "orte_plm_init"; goto error; } + /* * Setup the remaining resource * management and errmgr frameworks - application procs @@ -513,6 +508,7 @@ static int rte_init(void) error = "orte_ras_base_find_available"; goto error; } + if (ORTE_SUCCESS != (ret = mca_base_framework_open(&orte_rmaps_base_framework, 0))) { ORTE_ERROR_LOG(ret); error = "orte_rmaps_base_open"; @@ -523,53 +519,47 @@ static int rte_init(void) error = "orte_rmaps_base_find_available"; goto error; } -#if OPAL_HAVE_HWLOC - { - char *coprocessors, **sns; - uint32_t h; - int idx; - /* if a topology file was given, then the rmaps framework open - * will have reset our topology. Ensure we always get the right - * one by setting our node topology afterwards + /* if a topology file was given, then the rmaps framework open + * will have reset our topology. Ensure we always get the right + * one by setting our node topology afterwards + */ + node->topology = opal_hwloc_topology; + + /* init the hash table, if necessary */ + if (NULL == orte_coprocessors) { + orte_coprocessors = OBJ_NEW(opal_hash_table_t); + opal_hash_table_init(orte_coprocessors, orte_process_info.num_procs); + } + /* detect and add any coprocessors */ + coprocessors = opal_hwloc_base_find_coprocessors(opal_hwloc_topology); + if (NULL != coprocessors) { + /* separate the serial numbers of the coprocessors + * on this host */ - node->topology = opal_hwloc_topology; - - /* init the hash table, if necessary */ - if (NULL == orte_coprocessors) { - orte_coprocessors = OBJ_NEW(opal_hash_table_t); - opal_hash_table_init(orte_coprocessors, orte_process_info.num_procs); - } - /* detect and add any coprocessors */ - coprocessors = opal_hwloc_base_find_coprocessors(opal_hwloc_topology); - if (NULL != coprocessors) { - /* separate the serial numbers of the coprocessors - * on this host - */ - sns = opal_argv_split(coprocessors, ','); - for (idx=0; NULL != sns[idx]; idx++) { - /* compute the hash */ - OPAL_HASH_STR(sns[idx], h); - /* mark that this coprocessor is hosted by this node */ - opal_hash_table_set_value_uint32(orte_coprocessors, h, (void*)&(ORTE_PROC_MY_NAME->vpid)); - } - opal_argv_free(sns); - free(coprocessors); - orte_coprocessors_detected = true; - } - /* see if I am on a coprocessor */ - coprocessors = opal_hwloc_base_check_on_coprocessor(); - if (NULL != coprocessors) { + sns = opal_argv_split(coprocessors, ','); + for (idx=0; NULL != sns[idx]; idx++) { /* compute the hash */ - OPAL_HASH_STR(coprocessors, h); - /* mark that I am on this coprocessor */ + OPAL_HASH_STR(sns[idx], h); + /* mark that this coprocessor is hosted by this node */ opal_hash_table_set_value_uint32(orte_coprocessors, h, (void*)&(ORTE_PROC_MY_NAME->vpid)); - orte_set_attribute(&node->attributes, ORTE_NODE_SERIAL_NUMBER, ORTE_ATTR_LOCAL, coprocessors, OPAL_STRING); - free(coprocessors); - orte_coprocessors_detected = true; } + opal_argv_free(sns); + free(coprocessors); + orte_coprocessors_detected = true; + } + /* see if I am on a coprocessor */ + coprocessors = opal_hwloc_base_check_on_coprocessor(); + if (NULL != coprocessors) { + /* compute the hash */ + OPAL_HASH_STR(coprocessors, h); + /* mark that I am on this coprocessor */ + opal_hash_table_set_value_uint32(orte_coprocessors, h, (void*)&(ORTE_PROC_MY_NAME->vpid)); + orte_set_attribute(&node->attributes, ORTE_NODE_SERIAL_NUMBER, ORTE_ATTR_LOCAL, coprocessors, OPAL_STRING); + free(coprocessors); + orte_coprocessors_detected = true; } -#endif + /* Open/select the odls */ if (ORTE_SUCCESS != (ret = mca_base_framework_open(&orte_odls_base_framework, 0))) { ORTE_ERROR_LOG(ret); @@ -581,6 +571,7 @@ static int rte_init(void) error = "orte_odls_base_select"; goto error; } + /* Open/select the rtc */ if (ORTE_SUCCESS != (ret = mca_base_framework_open(&orte_rtc_base_framework, 0))) { ORTE_ERROR_LOG(ret); @@ -592,31 +583,43 @@ static int rte_init(void) error = "orte_rtc_base_select"; goto error; } + /* enable communication with the rml */ if (ORTE_SUCCESS != (ret = orte_rml.enable_comm())) { ORTE_ERROR_LOG(ret); error = "orte_rml.enable_comm"; goto error; } + /* we are an hnp, so update the contact info field for later use */ orte_process_info.my_hnp_uri = orte_rml.get_contact_info(); proc->rml_uri = strdup(orte_process_info.my_hnp_uri); /* we are also officially a daemon, so better update that field too */ orte_process_info.my_daemon_uri = strdup(orte_process_info.my_hnp_uri); + /* setup the orte_show_help system to recv remote output */ orte_rml.recv_buffer_nb(ORTE_NAME_WILDCARD, ORTE_RML_TAG_SHOW_HELP, ORTE_RML_PERSISTENT, orte_show_help_recv, NULL); + /* setup the data server */ + if (ORTE_SUCCESS != (ret = orte_data_server_init())) { + ORTE_ERROR_LOG(ret); + error = "orte_data_server_init"; + goto error; + } + if (orte_create_session_dirs) { /* set the opal_output hnp file location to be in the * proc-specific session directory. */ opal_output_set_output_file_info(orte_process_info.proc_session_dir, "output-", NULL, NULL); + /* save my contact info in a file for others to find */ jobfam_dir = opal_dirname(orte_process_info.job_session_dir); contact_path = opal_os_path(false, jobfam_dir, "contact.txt", NULL); free(jobfam_dir); + OPAL_OUTPUT_VERBOSE((2, orte_debug_output, "%s writing contact file %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), @@ -634,12 +637,22 @@ static int rte_init(void) } free(contact_path); } - /* setup the PMIx server */ - if (ORTE_SUCCESS != (ret = pmix_server_init())) { + + /* setup the PMIx framework - ensure it skips all non-PMIx components */ + putenv("OMPI_MCA_pmix=^s1,s2,cray"); + if (OPAL_SUCCESS != (ret = mca_base_framework_open(&opal_pmix_base_framework, 0))) { ORTE_ERROR_LOG(ret); - error = "pmix server init"; + error = "orte_pmix_base_open"; goto error; } + if (ORTE_SUCCESS != (ret = opal_pmix_base_select())) { + ORTE_ERROR_LOG(ret); + error = "opal_pmix_base_select"; + goto error; + } + /* set the event base */ + opal_pmix_base_set_evbase(orte_event_base); + /* setup the routed info - the selected routed component * will know what to do. */ @@ -648,6 +661,14 @@ static int rte_init(void) error = "orte_routed.init_routes"; goto error; } + + /* setup the PMIx server */ + if (ORTE_SUCCESS != (ret = pmix_server_init())) { + ORTE_ERROR_LOG(ret); + error = "pmix server init"; + goto error; + } + /* setup I/O forwarding system - must come after we init routes */ if (ORTE_SUCCESS != (ret = mca_base_framework_open(&orte_iof_base_framework, 0))) { ORTE_ERROR_LOG(ret); @@ -659,6 +680,7 @@ static int rte_init(void) error = "orte_iof_base_select"; goto error; } + /* setup the FileM */ if (ORTE_SUCCESS != (ret = mca_base_framework_open(&orte_filem_base_framework, 0))) { ORTE_ERROR_LOG(ret); @@ -670,46 +692,7 @@ static int rte_init(void) error = "orte_filem_base_select"; goto error; } -#if OPAL_ENABLE_FT_CR == 1 - /* - * Setup the SnapC - */ - if (ORTE_SUCCESS != (ret = mca_base_framework_open(&orte_snapc_base_framework, 0))) { - ORTE_ERROR_LOG(ret); - error = "orte_snapc_base_open"; - goto error; - } - if (ORTE_SUCCESS != (ret = mca_base_framework_open(&orte_sstore_base_framework, 0))) { - ORTE_ERROR_LOG(ret); - error = "orte_sstore_base_open"; - goto error; - } - if (ORTE_SUCCESS != (ret = orte_snapc_base_select(ORTE_PROC_IS_HNP, ORTE_PROC_IS_APP))) { - ORTE_ERROR_LOG(ret); - error = "orte_snapc_base_select"; - goto error; - } - if (ORTE_SUCCESS != (ret = orte_sstore_base_select())) { - ORTE_ERROR_LOG(ret); - error = "orte_sstore_base_select"; - goto error; - } - /* For HNP, ORTE doesn't need the OPAL CR stuff */ - opal_cr_set_enabled(false); -#else - opal_cr_set_enabled(false); -#endif - /* - * Initalize the CR setup - * Note: Always do this, even in non-FT builds. - * If we don't some user level tools may hang. - */ - if (ORTE_SUCCESS != (ret = orte_cr_init())) { - ORTE_ERROR_LOG(ret); - error = "orte_cr_init"; - goto error; - } /* setup the dfs framework */ if (ORTE_SUCCESS != (ret = mca_base_framework_open(&orte_dfs_base_framework, 0))) { ORTE_ERROR_LOG(ret); @@ -721,6 +704,7 @@ static int rte_init(void) error = "orte_dfs_select"; goto error; } + /* setup the schizo framework */ if (ORTE_SUCCESS != (ret = mca_base_framework_open(&orte_schizo_base_framework, 0))) { ORTE_ERROR_LOG(ret); @@ -732,6 +716,7 @@ static int rte_init(void) error = "orte_schizo_select"; goto error; } + /* if a tool has launched us and is requesting event reports, * then set its contact info into the comm system */ @@ -741,12 +726,14 @@ static int rte_init(void) goto error; } } + /* We actually do *not* want an HNP to voluntarily yield() the processor more than necessary. Orterun already blocks when it is doing nothing, so it doesn't use any more CPU cycles than it should; but when it *is* doing something, we do not want it to be unnecessarily delayed because it voluntarily yielded the processor in the middle of its work. + For example: when a message arrives at orterun, we want the OS to wake us up in a timely fashion (which most OS's seem good about doing) and then we want orterun to process @@ -759,6 +746,7 @@ static int rte_init(void) problematic in some scenarios (e.g., COMM_SPAWN, BTL's that require OOB messages for wireup, etc.). */ opal_progress_set_yield_when_idle(false); + return ORTE_SUCCESS; error: @@ -767,6 +755,19 @@ static int rte_init(void) "orte_init:startup:internal-failure", true, error, ORTE_ERROR_NAME(ret), ret); } + + /* remove my contact info file, if we have session directories */ + if (NULL != orte_process_info.job_session_dir) { + jobfam_dir = opal_dirname(orte_process_info.job_session_dir); + contact_path = opal_os_path(false, jobfam_dir, "contact.txt", NULL); + free(jobfam_dir); + unlink(contact_path); + free(contact_path); + } + /* remove our use of the session directory tree */ + orte_session_dir_finalize(ORTE_PROC_MY_NAME); + /* ensure we scrub the session directory tree */ + orte_session_dir_cleanup(ORTE_JOBID_WILDCARD); return ORTE_ERR_SILENT; } @@ -774,6 +775,8 @@ static int rte_finalize(void) { char *contact_path; char *jobfam_dir; + ess_hnp_signal_t *sig; + unsigned int i; if (signals_set) { /* Remove the epipe handler */ @@ -781,17 +784,21 @@ static int rte_finalize(void) /* remove the term handler */ opal_event_del(&term_handler); /** Remove the USR signal handlers */ - opal_event_signal_del(&sigusr1_handler); - opal_event_signal_del(&sigusr2_handler); - if (orte_forward_job_control) { - opal_event_signal_del(&sigtstp_handler); - opal_event_signal_del(&sigcont_handler); + i = 0; + OPAL_LIST_FOREACH(sig, &mca_ess_hnp_component.signals, ess_hnp_signal_t) { + opal_event_signal_del(forward_signals_events + i); + ++i; } + free (forward_signals_events); + forward_signals_events = NULL; signals_set = false; } /* shutdown the pmix server */ pmix_server_finalize(); + (void) mca_base_framework_close(&opal_pmix_base_framework); + /* cleanup our data server */ + orte_data_server_finalize(); (void) mca_base_framework_close(&orte_schizo_base_framework); (void) mca_base_framework_close(&orte_dfs_base_framework); @@ -805,7 +812,6 @@ static int rte_finalize(void) (void) mca_base_framework_close(&orte_rmaps_base_framework); (void) mca_base_framework_close(&orte_ras_base_framework); (void) mca_base_framework_close(&orte_grpcomm_base_framework); - (void) mca_base_framework_close(&opal_dstore_base_framework); (void) mca_base_framework_close(&orte_routed_base_framework); (void) mca_base_framework_close(&orte_plm_base_framework); (void) mca_base_framework_close(&orte_errmgr_base_framework); @@ -827,6 +833,8 @@ static int rte_finalize(void) (void) mca_base_framework_close(&orte_rml_base_framework); (void) mca_base_framework_close(&orte_oob_base_framework); + /* remove our use of the session directory tree */ + orte_session_dir_finalize(ORTE_PROC_MY_NAME); /* ensure we scrub the session directory tree */ orte_session_dir_cleanup(ORTE_JOBID_WILDCARD); @@ -838,6 +846,7 @@ static int rte_finalize(void) fclose(orte_xml_fp); } } + return ORTE_SUCCESS; } @@ -852,14 +861,14 @@ static void rte_abort(int status, bool report) * - Assume errmgr cleans up child processes before we exit. */ - /* CRS cleanup since it may have a named pipe and thread active */ - orte_cr_finalize(); /* ensure we scrub the session directory tree */ orte_session_dir_cleanup(ORTE_JOBID_WILDCARD); + /* - Clean out the global structures * (not really necessary, but good practice) */ orte_proc_info_finalize(); + /* just exit */ exit(status); } @@ -873,10 +882,13 @@ static void clean_abort(int fd, short flags, void *arg) if (forcibly_die) { /* kill any local procs */ orte_odls.kill_local_procs(NULL); + /* whack any lingering session directory files from our jobs */ orte_session_dir_cleanup(ORTE_JOBID_WILDCARD); + /* cleanup our data server */ orte_data_server_finalize(); + /* exit with a non-zero status */ exit(ORTE_ERROR_DEFAULT_EXIT_CODE); } @@ -891,14 +903,17 @@ static void clean_abort(int fd, short flags, void *arg) /* ensure that the forwarding of stdin stops */ orte_job_term_ordered = true; + /* tell us to be quiet - hey, the user killed us with a ctrl-c, * so need to tell them that! */ orte_execute_quiet = true; + if (!orte_never_launched) { /* cleanup our data server */ orte_data_server_finalize(); } + /* We are in an event handler; the job completed procedure will delete the signal handler that is currently running (which is a Bad Thing), so we can't call it directly. diff --git a/orte/mca/ess/hnp/help-ess-hnp.txt b/orte/mca/ess/hnp/help-ess-hnp.txt new file mode 100644 index 00000000000..7bce2ccbb62 --- /dev/null +++ b/orte/mca/ess/hnp/help-ess-hnp.txt @@ -0,0 +1,27 @@ +# -*- text -*- +# +# Copyright (c) 2017 Intel, Inc. All rights reserved. +# $COPYRIGHT$ +# +# Additional copyrights may follow +# +# $HEADER$ +# +# This is the US/English general help file for the SDS base. +# +[ess-hnp:cannot-forward] +The system does not support trapping and forwarding of the +specified signal: + + signal: %s + param: %s + +Please remove that signal from the ess_hnp_forward_signals MCA parameter. +[ess-hnp:unknown-signal] +The following signal was included in the ess_hnp_forward_signals +MCA parameter: + + signal: %s + param: %s + +This is not a recognized signal value. Please fix or remove it. diff --git a/orte/mca/ess/lsf/Makefile.am b/orte/mca/ess/lsf/Makefile.am index 2d8c9f36341..0d4971843dc 100644 --- a/orte/mca/ess/lsf/Makefile.am +++ b/orte/mca/ess/lsf/Makefile.am @@ -5,15 +5,15 @@ # Copyright (c) 2004-2005 The University of Tennessee and The University # of Tennessee Research Foundation. All rights # reserved. -# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, # University of Stuttgart. All rights reserved. # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. # Copyright (c) 2007 Cisco Systems, Inc. All rights reserved. # $COPYRIGHT$ -# +# # Additional copyrights may follow -# +# # $HEADER$ # diff --git a/orte/mca/ess/lsf/configure.m4 b/orte/mca/ess/lsf/configure.m4 index b37c775e5a9..b29cad6e661 100644 --- a/orte/mca/ess/lsf/configure.m4 +++ b/orte/mca/ess/lsf/configure.m4 @@ -6,7 +6,7 @@ # Copyright (c) 2004-2005 The University of Tennessee and The University # of Tennessee Research Foundation. All rights # reserved. -# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, # University of Stuttgart. All rights reserved. # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. @@ -14,9 +14,9 @@ # Copyright (c) 2011 Los Alamos National Security, LLC. # All rights reserved. # $COPYRIGHT$ -# +# # Additional copyrights may follow -# +# # $HEADER$ # @@ -26,8 +26,8 @@ AC_DEFUN([MCA_orte_ess_lsf_CONFIG],[ AC_CONFIG_FILES([orte/mca/ess/lsf/Makefile]) ORTE_CHECK_LSF([ess_lsf], [ess_lsf_good=1], [ess_lsf_good=0]) - - # if check worked, set wrapper flags if so. + + # if check worked, set wrapper flags if so. # Evaluate succeed / fail AS_IF([test "$ess_lsf_good" = "1"], [$1], diff --git a/orte/mca/ess/lsf/ess_lsf.h b/orte/mca/ess/lsf/ess_lsf.h index cd05c691d55..1d317b006c0 100644 --- a/orte/mca/ess/lsf/ess_lsf.h +++ b/orte/mca/ess/lsf/ess_lsf.h @@ -5,15 +5,15 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2007 Cisco Systems, Inc. All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -23,7 +23,7 @@ BEGIN_C_DECLS ORTE_MODULE_DECLSPEC extern orte_ess_base_component_t mca_ess_lsf_component; - + /* * Module open / close */ diff --git a/orte/mca/ess/lsf/ess_lsf_component.c b/orte/mca/ess/lsf/ess_lsf_component.c index e8dbe517db8..4db3d7df0fb 100644 --- a/orte/mca/ess/lsf/ess_lsf_component.c +++ b/orte/mca/ess/lsf/ess_lsf_component.c @@ -6,7 +6,7 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. @@ -14,9 +14,9 @@ * Copyright (c) 2015 Los Alamos National Security, LLC. All rights * reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -70,14 +70,14 @@ int orte_ess_lsf_component_query(mca_base_module_t **module, int *priority) * answer to both is "yes", then we were launched * by mpirun in an LSF world */ - + if (NULL != getenv("LSB_JOBID") && NULL != orte_process_info.my_hnp_uri) { *priority = 40; *module = (mca_base_module_t *)&orte_ess_lsf_module; return ORTE_SUCCESS; } - + /* nope, not here */ *priority = -1; *module = NULL; diff --git a/orte/mca/ess/lsf/ess_lsf_module.c b/orte/mca/ess/lsf/ess_lsf_module.c index cd24c716d12..f9aef64269c 100644 --- a/orte/mca/ess/lsf/ess_lsf_module.c +++ b/orte/mca/ess/lsf/ess_lsf_module.c @@ -5,16 +5,18 @@ * Copyright (c) 2004-2011 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2007-2011 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2013 Intel, Inc. All rights reserved. + * Copyright (c) 2016 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ * */ @@ -25,14 +27,13 @@ #ifdef HAVE_UNISTD_H #include #endif /* HAVE_UNISTD_H */ -#ifdef HAVE_STRING_H #include -#endif /* HAVE_STRING_H */ #include #include #include "opal/util/opal_environ.h" +#include "opal/util/argv.h" #include "orte/util/show_help.h" #include "orte/util/name_fns.h" @@ -74,10 +75,10 @@ static int rte_init(void) error = "orte_ess_base_std_prolog"; goto error; } - + /* Start by getting a unique name */ lsf_set_name(); - + /* if I am a daemon, complete my setup using the * default procedure */ @@ -97,7 +98,7 @@ static int rte_init(void) opal_argv_free(hosts); return ORTE_SUCCESS; } - + if (ORTE_PROC_IS_TOOL) { /* otherwise, if I am a tool proc, use that procedure */ if (ORTE_SUCCESS != (ret = orte_ess_base_tool_setup())) { @@ -106,9 +107,9 @@ static int rte_init(void) goto error; } return ORTE_SUCCESS; - + } - + /* otherwise, I must be an application process - use * the default procedure to finish my setup */ @@ -117,9 +118,9 @@ static int rte_init(void) error = "orte_ess_base_app_setup"; goto error; } - + return ORTE_SUCCESS; - + error: if (ORTE_ERR_SILENT != ret && !orte_report_silent_errors) { orte_show_help("help-orte-runtime.txt", @@ -165,7 +166,7 @@ static int lsf_set_name(void) int lsf_nodeid; orte_jobid_t jobid; orte_vpid_t vpid; - + if (NULL ==orte_ess_base_jobid) { ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND); return ORTE_ERR_NOT_FOUND; @@ -196,6 +197,6 @@ static int lsf_set_name(void) ORTE_ERROR_LOG(rc); return rc; } - + return ORTE_SUCCESS; } diff --git a/orte/mca/ess/pmi/Makefile.am b/orte/mca/ess/pmi/Makefile.am index 928b60b1aed..4ded8b8d62b 100644 --- a/orte/mca/ess/pmi/Makefile.am +++ b/orte/mca/ess/pmi/Makefile.am @@ -4,9 +4,9 @@ # All rights reserved. # Copyright (c) 2014 Intel, Inc. All rights reserved # $COPYRIGHT$ -# +# # Additional copyrights may follow -# +# # $HEADER$ # diff --git a/orte/mca/ess/pmi/ess_pmi.h b/orte/mca/ess/pmi/ess_pmi.h index 1141d604238..c6738aa9392 100644 --- a/orte/mca/ess/pmi/ess_pmi.h +++ b/orte/mca/ess/pmi/ess_pmi.h @@ -1,9 +1,9 @@ /* * Copyright (c) 2011 Cisco Systems, Inc. All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ diff --git a/orte/mca/ess/pmi/ess_pmi_component.c b/orte/mca/ess/pmi/ess_pmi_component.c index 2864c9de2b0..b877ed36439 100644 --- a/orte/mca/ess/pmi/ess_pmi_component.c +++ b/orte/mca/ess/pmi/ess_pmi_component.c @@ -5,9 +5,9 @@ * rights reserved. * Copyright (c) 2014 Intel, Inc. All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ * * These symbols are in a file by themselves to provide nice linker @@ -68,11 +68,11 @@ static int pmi_component_open(void) static int pmi_component_query(mca_base_module_t **module, int *priority) { int ret; - + /* all APPS must use pmix */ if (ORTE_PROC_IS_APP) { - /* open and setup pmix */ if (NULL == opal_pmix.initialized) { + /* open and setup pmix */ if (OPAL_SUCCESS != (ret = mca_base_framework_open(&opal_pmix_base_framework, 0))) { ORTE_ERROR_LOG(ret); *priority = -1; @@ -87,11 +87,8 @@ static int pmi_component_query(mca_base_module_t **module, int *priority) return ret; } } - if (!opal_pmix.initialized()) { - /* we may have everything setup, but we are not - * in a PMIx environment and so we need to disqualify - * ourselves - we are likely a singleton and will - * pick things up from there */ + if (!opal_pmix.initialized() && (OPAL_SUCCESS != (ret = opal_pmix.init()))) { + /* we cannot be in a PMI environment */ *priority = -1; *module = NULL; return ORTE_ERROR; diff --git a/orte/mca/ess/pmi/ess_pmi_module.c b/orte/mca/ess/pmi/ess_pmi_module.c index a9050cabab1..0737868672d 100644 --- a/orte/mca/ess/pmi/ess_pmi_module.c +++ b/orte/mca/ess/pmi/ess_pmi_module.c @@ -5,18 +5,18 @@ * Copyright (c) 2004-2011 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2008-2012 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2012-2013 Los Alamos National Security, LLC. - * All rights reserved. - * Copyright (c) 2013-2014 Intel, Inc. All rights reserved. + * All rights reserved. + * Copyright (c) 2013-2016 Intel, Inc. All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ * */ @@ -27,9 +27,7 @@ #ifdef HAVE_UNISTD_H #include #endif /* HAVE_UNISTD_H */ -#ifdef HAVE_STRING_H #include -#endif /* HAVE_STRING_H */ #include #ifdef HAVE_NETDB_H #include @@ -41,10 +39,12 @@ #include "opal/util/opal_environ.h" #include "opal/util/output.h" #include "opal/util/argv.h" +#include "opal/runtime/opal_progress_threads.h" #include "opal/class/opal_pointer_array.h" -#include "opal/mca/dstore/dstore.h" #include "opal/mca/hwloc/base/base.h" #include "opal/util/printf.h" +#include "opal/util/proc.h" +#include "opal/mca/pmix/pmix.h" #include "opal/mca/pmix/base/base.h" #include "orte/mca/errmgr/errmgr.h" @@ -76,6 +76,7 @@ orte_ess_base_module_t orte_ess_pmi_module = { static bool added_transport_keys=false; static bool added_num_procs = false; static bool added_app_ctx = false; +static bool progress_thread_running = false; /**** MODULE FUNCTIONS ****/ @@ -87,61 +88,90 @@ static int rte_init(void) uint64_t unique_key[2]; char *string_key; char *rmluri; - opal_value_t *kv, kvn; - opal_list_t vals; + opal_value_t *kv; + char *val; + int u32, *u32ptr; + uint16_t u16, *u16ptr; + char **peers=NULL, *mycpuset, **cpusets=NULL; + opal_process_name_t name; + size_t i; /* run the prolog */ if (ORTE_SUCCESS != (ret = orte_ess_base_std_prolog())) { error = "orte_ess_base_std_prolog"; goto error; } - - /* we don't have to call pmix.init because the pmix select did it */ - /**** THE FOLLOWING ARE REQUIRED VALUES ***/ - /* get our jobid from PMI */ - if (!opal_pmix.get_attr(PMIX_JOBID, &kv)) { - error = "getting jobid"; - ret = ORTE_ERR_NOT_FOUND; + /* get an async event base - we use the opal_async one so + * we don't startup extra threads if not needed */ + orte_event_base = opal_progress_thread_init(NULL); + progress_thread_running = true; + + /* open and setup pmix */ + if (OPAL_SUCCESS != (ret = mca_base_framework_open(&opal_pmix_base_framework, 0))) { + ORTE_ERROR_LOG(ret); + /* we cannot run */ + error = "pmix init"; goto error; } - ORTE_PROC_MY_NAME->jobid = kv->data.uint32; - OBJ_RELEASE(kv); - - /* get our global rank from PMI */ - if (!opal_pmix.get_attr(PMIX_RANK, &kv)) { - error = "getting rank"; - ret = ORTE_ERR_NOT_FOUND; + if (OPAL_SUCCESS != (ret = opal_pmix_base_select())) { + /* we cannot run */ + error = "pmix init"; goto error; } - ORTE_PROC_MY_NAME->vpid = kv->data.uint32; - OBJ_RELEASE(kv); + /* set the event base */ + opal_pmix_base_set_evbase(orte_event_base); + /* initialize the selected module */ + if (!opal_pmix.initialized() && (OPAL_SUCCESS != (ret = opal_pmix.init()))) { + /* we cannot run */ + error = "pmix init"; + goto error; + } + u32ptr = &u32; + u16ptr = &u16; + + /**** THE FOLLOWING ARE REQUIRED VALUES ***/ + /* pmix.init set our process name down in the OPAL layer, + * so carry it forward here */ + ORTE_PROC_MY_NAME->jobid = OPAL_PROC_MY_NAME.jobid; + ORTE_PROC_MY_NAME->vpid = OPAL_PROC_MY_NAME.vpid; /* get our local rank from PMI */ - if (!opal_pmix.get_attr(PMIX_LOCAL_RANK, &kv)) { + OPAL_MODEX_RECV_VALUE(ret, OPAL_PMIX_LOCAL_RANK, + ORTE_PROC_MY_NAME, &u16ptr, OPAL_UINT16); + if (OPAL_SUCCESS != ret) { error = "getting local rank"; - ret = ORTE_ERR_NOT_FOUND; goto error; } - orte_process_info.my_local_rank = (orte_local_rank_t)kv->data.uint16; - OBJ_RELEASE(kv); + orte_process_info.my_local_rank = u16; /* get our node rank from PMI */ - if (!opal_pmix.get_attr(PMIX_NODE_RANK, &kv)) { + OPAL_MODEX_RECV_VALUE(ret, OPAL_PMIX_NODE_RANK, + ORTE_PROC_MY_NAME, &u16ptr, OPAL_UINT16); + if (OPAL_SUCCESS != ret) { error = "getting node rank"; - ret = ORTE_ERR_NOT_FOUND; goto error; } - orte_process_info.my_node_rank = (orte_local_rank_t)kv->data.uint16; + orte_process_info.my_node_rank = u16; /* get universe size */ - if (!opal_pmix.get_attr(PMIX_UNIV_SIZE, &kv)) { + OPAL_MODEX_RECV_VALUE(ret, OPAL_PMIX_UNIV_SIZE, + ORTE_PROC_MY_NAME, &u32ptr, OPAL_UINT32); + if (OPAL_SUCCESS != ret) { error = "getting univ size"; - ret = ORTE_ERR_NOT_FOUND; goto error; } - orte_process_info.num_procs = kv->data.uint32; - OBJ_RELEASE(kv); + orte_process_info.max_procs = u32; + + /* get job size */ + OPAL_MODEX_RECV_VALUE(ret, OPAL_PMIX_JOB_SIZE, + ORTE_PROC_MY_NAME, &u32ptr, OPAL_UINT32); + if (OPAL_SUCCESS != ret) { + error = "getting job size"; + goto error; + } + orte_process_info.num_procs = u32; + /* push into the environ for pickup in MPI layer for * MPI-3 required info key */ @@ -158,18 +188,20 @@ static int rte_init(void) /* get our app number from PMI - ok if not found */ - if (opal_pmix.get_attr(PMIX_APPNUM, &kv)) { - orte_process_info.app_num = kv->data.uint32; - OBJ_RELEASE(kv); + OPAL_MODEX_RECV_VALUE_OPTIONAL(ret, OPAL_PMIX_APPNUM, + ORTE_PROC_MY_NAME, &u32ptr, OPAL_UINT32); + if (OPAL_SUCCESS == ret) { + orte_process_info.app_num = u32; } else { orte_process_info.app_num = 0; } /* get the number of local peers - required for wireup of * shared memory BTL */ - if (opal_pmix.get_attr(PMIX_LOCAL_SIZE, &kv)) { - orte_process_info.num_local_peers = kv->data.uint32 - 1; // want number besides ourselves - OBJ_RELEASE(kv); + OPAL_MODEX_RECV_VALUE(ret, OPAL_PMIX_LOCAL_SIZE, + ORTE_PROC_MY_NAME, &u32ptr, OPAL_UINT32); + if (OPAL_SUCCESS == ret) { + orte_process_info.num_local_peers = u32 - 1; // want number besides ourselves } else { orte_process_info.num_local_peers = 0; } @@ -185,6 +217,9 @@ static int rte_init(void) ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); return ORTE_ERR_OUT_OF_RESOURCE; } + opal_output_verbose(2, orte_ess_base_framework.framework_output, + "%s transport key %s", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), string_key); asprintf(&envar, OPAL_MCA_PREFIX"orte_precondition_transports=%s", string_key); putenv(envar); added_transport_keys = true; @@ -192,20 +227,148 @@ static int rte_init(void) free(string_key); } -#if OPAL_HAVE_HWLOC - /* if it wasn't passed down to us, get the topology */ - if (NULL == opal_hwloc_topology) { + /* retrieve our topology */ + val = NULL; + OPAL_MODEX_RECV_VALUE_OPTIONAL(ret, OPAL_PMIX_LOCAL_TOPO, + ORTE_PROC_MY_NAME, &val, OPAL_STRING); + if (OPAL_SUCCESS == ret && NULL != val) { + /* load the topology */ + if (0 != hwloc_topology_init(&opal_hwloc_topology)) { + ret = OPAL_ERROR; + free(val); + error = "setting topology"; + goto error; + } + if (0 != hwloc_topology_set_xmlbuffer(opal_hwloc_topology, val, strlen(val))) { + ret = OPAL_ERROR; + free(val); + hwloc_topology_destroy(opal_hwloc_topology); + error = "setting topology"; + goto error; + } + /* since we are loading this from an external source, we have to + * explicitly set a flag so hwloc sets things up correctly + */ + if (0 != hwloc_topology_set_flags(opal_hwloc_topology, + (HWLOC_TOPOLOGY_FLAG_IS_THISSYSTEM | + HWLOC_TOPOLOGY_FLAG_WHOLE_SYSTEM | + HWLOC_TOPOLOGY_FLAG_IO_DEVICES))) { + ret = OPAL_ERROR; + hwloc_topology_destroy(opal_hwloc_topology); + free(val); + error = "setting topology"; + goto error; + } + /* now load the topology */ + if (0 != hwloc_topology_load(opal_hwloc_topology)) { + ret = OPAL_ERROR; + hwloc_topology_destroy(opal_hwloc_topology); + free(val); + error = "setting topology"; + goto error; + } + free(val); + /* filter the cpus thru any default cpu set */ + if (OPAL_SUCCESS != (ret = opal_hwloc_base_filter_cpus(opal_hwloc_topology))) { + error = "filtering topology"; + goto error; + } + } else { + /* it wasn't passed down to us, so go get it */ if (OPAL_SUCCESS != (ret = opal_hwloc_base_get_topology())) { error = "topology discovery"; goto error; } + /* push it into the PMIx database in case someone + * tries to retrieve it so we avoid an attempt to + * get it again */ + kv = OBJ_NEW(opal_value_t); + kv->key = strdup(OPAL_PMIX_LOCAL_TOPO); + kv->type = OPAL_STRING; + if (0 != (ret = hwloc_topology_export_xmlbuffer(opal_hwloc_topology, &kv->data.string, &u32))) { + error = "topology export"; + goto error; + } + if (OPAL_SUCCESS != (ret = opal_pmix.store_local(ORTE_PROC_MY_NAME, kv))) { + error = "topology store"; + goto error; + } + OBJ_RELEASE(kv); } -#endif - /* we don't need to force the routed system to pick the - * "direct" component as that should happen automatically - * in those cases where we are direct launched (i.e., no - * HNP is defined in the environment */ + /* get our local peers */ + if (0 < orte_process_info.num_local_peers) { + /* if my local rank if too high, then that's an error */ + if (orte_process_info.num_local_peers < orte_process_info.my_local_rank) { + ret = ORTE_ERR_BAD_PARAM; + error = "num local peers"; + goto error; + } + /* retrieve the local peers */ + OPAL_MODEX_RECV_VALUE(ret, OPAL_PMIX_LOCAL_PEERS, + ORTE_PROC_MY_NAME, &val, OPAL_STRING); + if (OPAL_SUCCESS == ret && NULL != val) { + peers = opal_argv_split(val, ','); + free(val); + /* and their cpusets, if available */ + OPAL_MODEX_RECV_VALUE_OPTIONAL(ret, OPAL_PMIX_LOCAL_CPUSETS, ORTE_PROC_MY_NAME, &val, OPAL_STRING); + if (OPAL_SUCCESS == ret && NULL != val) { + cpusets = opal_argv_split(val, ':'); + free(val); + } else { + cpusets = NULL; + } + } else { + peers = NULL; + cpusets = NULL; + } + } else { + peers = NULL; + cpusets = NULL; + } + + /* set the locality */ + if (NULL != peers) { + /* indentify our cpuset */ + if (NULL != cpusets) { + mycpuset = cpusets[orte_process_info.my_local_rank]; + } else { + mycpuset = NULL; + } + name.jobid = ORTE_PROC_MY_NAME->jobid; + for (i=0; NULL != peers[i]; i++) { + kv = OBJ_NEW(opal_value_t); + kv->key = strdup(OPAL_PMIX_LOCALITY); + kv->type = OPAL_UINT16; + name.vpid = strtoul(peers[i], NULL, 10); + if (name.vpid == ORTE_PROC_MY_NAME->vpid) { + /* we are fully local to ourselves */ + u16 = OPAL_PROC_ALL_LOCAL; + } else if (NULL == mycpuset || NULL == cpusets[i] || + 0 == strcmp(cpusets[i], "UNBOUND")) { + /* all we can say is that it shares our node */ + u16 = OPAL_PROC_ON_CLUSTER | OPAL_PROC_ON_CU | OPAL_PROC_ON_NODE; + } else { + /* we have it, so compute the locality */ + u16 = opal_hwloc_base_get_relative_locality(opal_hwloc_topology, mycpuset, cpusets[i]); + } + OPAL_OUTPUT_VERBOSE((1, orte_ess_base_framework.framework_output, + "%s ess:pmi:locality: proc %s locality %x", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), + ORTE_NAME_PRINT(&name), u16)); + kv->data.uint16 = u16; + ret = opal_pmix.store_local(&name, kv); + if (OPAL_SUCCESS != ret) { + error = "local store of locality"; + opal_argv_free(peers); + opal_argv_free(cpusets); + goto error; + } + OBJ_RELEASE(kv); + } + opal_argv_free(peers); + opal_argv_free(cpusets); + } /* now that we have all required info, complete the setup */ if (ORTE_SUCCESS != (ret = orte_ess_base_app_setup(false))) { @@ -233,73 +396,21 @@ static int rte_init(void) /*** PUSH DATA FOR OTHERS TO FIND ***/ /* push our RML URI in case others need to talk directly to us */ - OBJ_CONSTRUCT(&vals, opal_list_t); - if (OPAL_SUCCESS != opal_dstore.fetch(opal_dstore_internal, &OPAL_PROC_MY_NAME, - OPAL_DSTORE_URI, &vals)) { - /* not already recorded, so construct the RTE string */ - rmluri = orte_rml.get_contact_info(); - /* push it out for others to use - this will also put it in the dstore */ - OBJ_CONSTRUCT(&kvn, opal_value_t); - kvn.key = strdup(OPAL_DSTORE_URI); - kvn.type = OPAL_STRING; - kvn.data.string = strdup(rmluri); - if (ORTE_SUCCESS != (ret = opal_pmix.put(PMIX_GLOBAL, &kvn))) { - error = "db store uri"; - OBJ_DESTRUCT(&kvn); - goto error; - } - OBJ_DESTRUCT(&kvn); - free(rmluri); + rmluri = orte_rml.get_contact_info(); + /* push it out for others to use */ + OPAL_MODEX_SEND_VALUE(ret, OPAL_PMIX_GLOBAL, OPAL_PMIX_PROC_URI, rmluri, OPAL_STRING); + if (ORTE_SUCCESS != ret) { + error = "pmix put uri"; + goto error; } - OPAL_LIST_DESTRUCT(&vals); - + free(rmluri); + /* push our hostname so others can find us, if they need to */ - OBJ_CONSTRUCT(&kvn, opal_value_t); - kvn.key = strdup(OPAL_DSTORE_HOSTNAME); - kvn.type = OPAL_STRING; - kvn.data.string = strdup(orte_process_info.nodename); - if (ORTE_SUCCESS != (ret = opal_pmix.put(PMIX_GLOBAL, &kvn))) { + OPAL_MODEX_SEND_VALUE(ret, OPAL_PMIX_GLOBAL, OPAL_PMIX_HOSTNAME, orte_process_info.nodename, OPAL_STRING); + if (ORTE_SUCCESS != ret) { error = "db store hostname"; - OBJ_DESTRUCT(&kvn); goto error; } - OBJ_DESTRUCT(&kvn); - - /* if our local rank was not provided by the system, then - * push our local rank so others can access it */ - OBJ_CONSTRUCT(&vals, opal_list_t); - if (OPAL_SUCCESS != opal_dstore.fetch(opal_dstore_internal, &OPAL_PROC_MY_NAME, - OPAL_DSTORE_LOCALRANK, &vals)) { - OBJ_CONSTRUCT(&kvn, opal_value_t); - kvn.key = strdup(OPAL_DSTORE_LOCALRANK); - kvn.type = OPAL_UINT16; - kvn.data.uint16 = orte_process_info.my_local_rank; - if (ORTE_SUCCESS != (ret = opal_pmix.put(PMIX_GLOBAL, &kvn))) { - error = "db store local rank"; - OBJ_DESTRUCT(&kvn); - goto error; - } - OBJ_DESTRUCT(&kvn); - } - OPAL_LIST_DESTRUCT(&vals); - - /* if our node rank was not provided by the system, then - * push our node rank so others can access it */ - OBJ_CONSTRUCT(&vals, opal_list_t); - if (OPAL_SUCCESS != opal_dstore.fetch(opal_dstore_internal, &OPAL_PROC_MY_NAME, - OPAL_DSTORE_NODERANK, &vals)) { - OBJ_CONSTRUCT(&kvn, opal_value_t); - kvn.key = strdup(OPAL_DSTORE_NODERANK); - kvn.type = OPAL_UINT16; - kvn.data.uint16 = orte_process_info.my_node_rank; - if (ORTE_SUCCESS != (ret = opal_pmix.put(PMIX_GLOBAL, &kvn))) { - error = "db store node rank"; - OBJ_DESTRUCT(&kvn); - goto error; - } - OBJ_DESTRUCT(&kvn); - } - OPAL_LIST_DESTRUCT(&vals); /* if we are an ORTE app - and not an MPI app - then * we need to exchange our connection info here. @@ -318,6 +429,12 @@ static int rte_init(void) return ORTE_SUCCESS; error: + if (!progress_thread_running) { + /* can't send the help message, so ensure it + * comes out locally + */ + orte_show_help_finalize(); + } if (ORTE_ERR_SILENT != ret && !orte_report_silent_errors) { orte_show_help("help-orte-runtime.txt", "orte_init:startup:internal-failure", @@ -342,18 +459,24 @@ static int rte_finalize(void) if (added_app_ctx) { unsetenv("OMPI_APP_CTX_NUM_PROCS"); } + /* use the default app procedure to finish */ if (ORTE_SUCCESS != (ret = orte_ess_base_app_finalize())) { ORTE_ERROR_LOG(ret); return ret; } - + /* mark us as finalized */ if (NULL != opal_pmix.finalize) { opal_pmix.finalize(); (void) mca_base_framework_close(&opal_pmix_base_framework); } - + + /* release the event base */ + if (progress_thread_running) { + opal_progress_thread_finalize(NULL); + progress_thread_running = false; + } return ORTE_SUCCESS; } @@ -369,7 +492,7 @@ static void rte_abort(int status, bool report) /* PMI doesn't like NULL messages, but our interface * doesn't provide one - so rig one up here */ - opal_pmix.abort(status, "N/A"); + opal_pmix.abort(status, "N/A", NULL); /* provide a little delay for the PMIx thread to * get the info out */ diff --git a/orte/mca/ess/singleton/Makefile.am b/orte/mca/ess/singleton/Makefile.am index ef313f219ee..e9ec2b39081 100644 --- a/orte/mca/ess/singleton/Makefile.am +++ b/orte/mca/ess/singleton/Makefile.am @@ -5,14 +5,14 @@ # Copyright (c) 2004-2005 The University of Tennessee and The University # of Tennessee Research Foundation. All rights # reserved. -# Copyright (c) 2004-2009 High Performance Computing Center Stuttgart, +# Copyright (c) 2004-2009 High Performance Computing Center Stuttgart, # University of Stuttgart. All rights reserved. # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. # $COPYRIGHT$ -# +# # Additional copyrights may follow -# +# # $HEADER$ # diff --git a/orte/mca/ess/singleton/ess_singleton.h b/orte/mca/ess/singleton/ess_singleton.h index 90871a95383..916c36f5602 100644 --- a/orte/mca/ess/singleton/ess_singleton.h +++ b/orte/mca/ess/singleton/ess_singleton.h @@ -5,14 +5,14 @@ * Copyright (c) 2004-2006 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ diff --git a/orte/mca/ess/singleton/ess_singleton_component.c b/orte/mca/ess/singleton/ess_singleton_component.c index a2c02d4205c..05322bbd3a1 100644 --- a/orte/mca/ess/singleton/ess_singleton_component.c +++ b/orte/mca/ess/singleton/ess_singleton_component.c @@ -6,16 +6,16 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2015 Los Alamos National Security, LLC. All rights * reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ * * These symbols are in a file by themselves to provide nice linker @@ -98,7 +98,7 @@ orte_ess_singleton_component_open(void) int orte_ess_singleton_component_query(mca_base_module_t **module, int *priority) { int ret; - + /* if we are an HNP, daemon, or tool, then we * are definitely not a singleton! */ @@ -108,7 +108,7 @@ int orte_ess_singleton_component_query(mca_base_module_t **module, int *priority *module = NULL; return ORTE_ERROR; } - + /* okay, we still could be a singleton or * an application process. If we have been * given an HNP URI, then we are definitely @@ -118,7 +118,7 @@ int orte_ess_singleton_component_query(mca_base_module_t **module, int *priority *module = NULL; return ORTE_ERROR; } - + /* open and setup pmix */ if (NULL == opal_pmix.initialized) { if (OPAL_SUCCESS != (ret = mca_base_framework_open(&opal_pmix_base_framework, 0))) { diff --git a/orte/mca/ess/singleton/ess_singleton_module.c b/orte/mca/ess/singleton/ess_singleton_module.c index 378c01fc93f..597023faf88 100644 --- a/orte/mca/ess/singleton/ess_singleton_module.c +++ b/orte/mca/ess/singleton/ess_singleton_module.c @@ -6,19 +6,21 @@ * Copyright (c) 2004-2011 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. - * Copyright (c) 2010 Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2010 Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2011 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2013-2015 Intel, Inc. All rights reserved. + * Copyright (c) 2013-2017 Intel, Inc. All rights reserved. * Copyright (c) 2015 Los Alamos National Security, LLC. All rights * reserved. + * Copyright (c) 2016 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ * */ @@ -26,9 +28,7 @@ #include "orte_config.h" #include "orte/constants.h" -#ifdef HAVE_STRING_H #include -#endif #ifdef HAVE_SYS_TYPES_H #include #endif @@ -40,18 +40,21 @@ #include "opal/hash_string.h" #include "opal/util/argv.h" +#include "opal/util/opal_environ.h" #include "opal/util/path.h" #include "opal/mca/installdirs/installdirs.h" #include "opal/mca/pmix/base/base.h" +#include "opal/mca/pmix/pmix.h" +#include "opal/runtime/opal_progress_threads.h" #include "orte/util/show_help.h" #include "orte/util/proc_info.h" #include "orte/mca/errmgr/errmgr.h" -#include "orte/mca/rml/rml.h" -#include "orte/mca/routed/routed.h" +#include "orte/mca/plm/base/base.h" #include "orte/util/name_fns.h" #include "orte/runtime/orte_globals.h" #include "orte/util/session_dir.h" +#include "orte/util/pre_condition_transports.h" #include "orte/mca/ess/ess.h" #include "orte/mca/ess/base/base.h" @@ -61,8 +64,6 @@ static int rte_init(void); static int rte_finalize(void); -extern char *orte_ess_singleton_server_uri; - orte_ess_base_module_t orte_ess_singleton_module = { rte_init, rte_finalize, @@ -70,20 +71,34 @@ orte_ess_base_module_t orte_ess_singleton_module = { NULL /* ft_event */ }; +extern char *orte_ess_singleton_server_uri; +static bool added_transport_keys=false; +static bool added_num_procs = false; +static bool added_app_ctx = false; +static bool added_pmix_envs = false; +static bool progress_thread_running = false; + +static int fork_hnp(void); + static int rte_init(void) { - int rc; - char *param; - uint16_t jobfam; - uint32_t hash32; - uint32_t bias; - opal_value_t kvn; + int rc, ret; + char *error = NULL; + uint64_t unique_key[2]; + char *string_key; + char *envar; + opal_value_t *kv; + char *val; + int u32, *u32ptr; + uint16_t u16, *u16ptr; /* run the prolog */ if (ORTE_SUCCESS != (rc = orte_ess_base_std_prolog())) { ORTE_ERROR_LOG(rc); return rc; } + u32ptr = &u32; + u16ptr = &u16; if (NULL != orte_ess_singleton_server_uri) { /* we are going to connect to a server HNP */ @@ -91,7 +106,7 @@ static int rte_init(void) 0 == strncmp(orte_ess_singleton_server_uri, "FILE", strlen("FILE"))) { char input[1024], *filename; FILE *fp; - + /* it is a file - get the filename */ filename = strchr(orte_ess_singleton_server_uri, ':'); if (NULL == filename) { @@ -101,14 +116,14 @@ static int rte_init(void) return ORTE_ERROR; } ++filename; /* space past the : */ - + if (0 >= strlen(filename)) { /* they forgot to give us the name! */ orte_show_help("help-orterun.txt", "orterun:ompi-server-filename-missing", true, "singleton", orte_ess_singleton_server_uri); return ORTE_ERROR; } - + /* open the file and extract the uri */ fp = fopen(filename, "r"); if (NULL == fp) { /* can't find or read file! */ @@ -132,178 +147,490 @@ static int rte_init(void) } /* save the daemon uri - we will process it later */ orte_process_info.my_daemon_uri = strdup(orte_process_info.my_hnp_uri); + /* construct our name - we are in their job family, so we know that + * much. However, we cannot know how many other singletons and jobs + * this HNP is running. Oh well - if someone really wants to use this + * option, they can try to figure it out. For now, we'll just assume + * we are the only ones */ + ORTE_PROC_MY_NAME->jobid = ORTE_CONSTRUCT_LOCAL_JOBID(ORTE_PROC_MY_HNP->jobid, 1); + /* obviously, we are vpid=0 for this job */ + ORTE_PROC_MY_NAME->vpid = 0; + /* for convenience, push the pubsub version of this param into the environ */ opal_setenv (OPAL_MCA_PREFIX"pubsub_orte_server", orte_process_info.my_hnp_uri, 1, &environ); + } else { + /* we want to use PMIX_NAMESPACE that will be sent by the hnp as a jobid */ + opal_setenv(OPAL_MCA_PREFIX"orte_launch", "1", true, &environ); + /* spawn our very own HNP to support us */ + if (ORTE_SUCCESS != (rc = fork_hnp())) { + ORTE_ERROR_LOG(rc); + return rc; + } + /* our name was given to us by the HNP */ } - /* indicate we are a singleton so orte_init knows what to do */ - orte_process_info.proc_type |= ORTE_PROC_SINGLETON; - /* we were not started by a daemon */ - orte_standalone_operation = true; - - /* now define my own name */ - /* hash the nodename */ - OPAL_HASH_STR(orte_process_info.nodename, hash32); - - bias = (uint32_t)orte_process_info.pid; - - OPAL_OUTPUT_VERBOSE((5, orte_ess_base_framework.framework_output, - "ess:singleton: initial bias %ld nodename hash %lu", - (long)bias, (unsigned long)hash32)); - - /* fold in the bias */ - hash32 = hash32 ^ bias; - - /* now compress to 16-bits */ - jobfam = (uint16_t)(((0x0000ffff & (0xffff0000 & hash32) >> 16)) ^ (0x0000ffff & hash32)); - - OPAL_OUTPUT_VERBOSE((5, orte_ess_base_framework.framework_output, - "ess:singleton:: final jobfam %lu", - (unsigned long)jobfam)); - - /* set the name - if we eventually spawn an HNP, it will use - * local jobid 0, so offset us by 1 - */ - ORTE_PROC_MY_NAME->jobid = (0xffff0000 & ((uint32_t)jobfam << 16)) + 1; - ORTE_PROC_MY_NAME->vpid = 0; + /* get an async event base - we use the opal_async one so + * we don't startup extra threads if not needed */ + orte_event_base = opal_progress_thread_init(NULL); + progress_thread_running = true; + + /* open and setup pmix */ + if (NULL == opal_pmix.initialized) { + if (OPAL_SUCCESS != (ret = mca_base_framework_open(&opal_pmix_base_framework, 0))) { + error = "opening pmix"; + goto error; + } + if (OPAL_SUCCESS != (ret = opal_pmix_base_select())) { + error = "select pmix"; + goto error; + } + } + /* initialize the selected module */ + if (!opal_pmix.initialized() && (OPAL_SUCCESS != (ret = opal_pmix.init()))) { + error = "init pmix"; + goto error; + } + + /* pmix.init set our process name down in the OPAL layer, + * so carry it forward here */ + ORTE_PROC_MY_NAME->jobid = OPAL_PROC_MY_NAME.jobid; + ORTE_PROC_MY_NAME->vpid = OPAL_PROC_MY_NAME.vpid; - orte_process_info.num_procs = 1; - if (orte_process_info.max_procs < orte_process_info.num_procs) { - orte_process_info.max_procs = orte_process_info.num_procs; + /* get our local rank from PMI */ + OPAL_MODEX_RECV_VALUE(ret, OPAL_PMIX_LOCAL_RANK, + ORTE_PROC_MY_NAME, &u16ptr, OPAL_UINT16); + if (OPAL_SUCCESS != ret) { + error = "getting local rank"; + goto error; } - - /* flag that we are not routing since we have no HNP */ - orte_routing_is_enabled = false; + orte_process_info.my_local_rank = u16; - /* take a pass thru the session directory code to fillin the - * tmpdir names - don't create anything yet - */ - if (ORTE_SUCCESS != (rc = orte_session_dir(false, - orte_process_info.tmpdir_base, - orte_process_info.nodename, NULL, - ORTE_PROC_MY_NAME))) { - ORTE_ERROR_LOG(rc); - return rc; + /* get our node rank from PMI */ + OPAL_MODEX_RECV_VALUE(ret, OPAL_PMIX_NODE_RANK, + ORTE_PROC_MY_NAME, &u16ptr, OPAL_UINT16); + if (OPAL_SUCCESS != ret) { + error = "getting node rank"; + goto error; } - /* clear the session directory just in case there are - * stale directories laying around + orte_process_info.my_node_rank = u16; + + /* get universe size */ + OPAL_MODEX_RECV_VALUE(ret, OPAL_PMIX_UNIV_SIZE, + ORTE_PROC_MY_NAME, &u32ptr, OPAL_UINT32); + if (OPAL_SUCCESS != ret) { + error = "getting univ size"; + goto error; + } + orte_process_info.num_procs = u32; + /* push into the environ for pickup in MPI layer for + * MPI-3 required info key */ - orte_session_dir_cleanup(ORTE_JOBID_WILDCARD); + if (NULL == getenv(OPAL_MCA_PREFIX"orte_ess_num_procs")) { + char * num_procs; + asprintf(&num_procs, "%d", orte_process_info.num_procs); + opal_setenv(OPAL_MCA_PREFIX"orte_ess_num_procs", num_procs, true, &environ); + free(num_procs); + added_num_procs = true; + } + if (NULL == getenv("OMPI_APP_CTX_NUM_PROCS")) { + char * num_procs; + asprintf(&num_procs, "%d", orte_process_info.num_procs); + opal_setenv("OMPI_APP_CTX_NUM_PROCS", num_procs, true, &environ); + free(num_procs); + added_app_ctx = true; + } - /* use the std app init to complete the procedure */ - if (ORTE_SUCCESS != (rc = orte_ess_base_app_setup(true))) { - ORTE_ERROR_LOG(rc); - return rc; + + /* get our app number from PMI - ok if not found */ + OPAL_MODEX_RECV_VALUE(ret, OPAL_PMIX_APPNUM, + ORTE_PROC_MY_NAME, &u32ptr, OPAL_UINT32); + if (OPAL_SUCCESS == ret) { + orte_process_info.app_num = u32; + } else { + orte_process_info.app_num = 0; } + /* set some other standard values */ + orte_process_info.num_local_peers = 0; - /* check and ensure pmix was initialized */ - if (NULL == opal_pmix.initialized || !opal_pmix.initialized()) { - opal_setenv("OMPI_MCA_pmix", "native", 1, &environ); - /* tell the pmix framework to allow delayed connection to a server - * in case we need one */ - opal_pmix_base_allow_delayed_server = true; - if (OPAL_SUCCESS != (rc = mca_base_framework_open(&opal_pmix_base_framework, 0))) { - /* if PMIx is not available even with a delayed - * connection to the server, then we are hosed */ - ORTE_ERROR_LOG(rc); - return rc; - } - if (OPAL_SUCCESS != (rc = opal_pmix_base_select()) && - OPAL_ERR_SERVER_NOT_AVAIL != rc) { - /* if PMIx is not available even with a delayed - * connection to the server, then we are hosed */ - ORTE_ERROR_LOG(rc); - return rc; + /* setup transport keys in case the MPI layer needs them - + * we can use the jobfam and stepid as unique keys + * because they are unique values assigned by the RM + */ + if (NULL == getenv(OPAL_MCA_PREFIX"orte_precondition_transports")) { + unique_key[0] = ORTE_JOB_FAMILY(ORTE_PROC_MY_NAME->jobid); + unique_key[1] = ORTE_LOCAL_JOBID(ORTE_PROC_MY_NAME->jobid); + if (NULL == (string_key = orte_pre_condition_transports_print(unique_key))) { + ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); + return ORTE_ERR_OUT_OF_RESOURCE; } + asprintf(&envar, OPAL_MCA_PREFIX"orte_precondition_transports=%s", string_key); + putenv(envar); + added_transport_keys = true; + /* cannot free the envar as that messes up our environ */ + free(string_key); } - /* to the best of our knowledge, we are alone */ - orte_process_info.my_node_rank = 0; - orte_process_info.my_local_rank = 0; - - /* set some envars */ - opal_setenv("OMPI_NUM_APP_CTX", "1", 1, &environ); - opal_setenv("OMPI_FIRST_RANKS", "0", 1, &environ); - opal_setenv("OMPI_APP_CTX_NUM_PROCS", "1", 1, &environ); - opal_setenv(OPAL_MCA_PREFIX"orte_ess_num_procs", "1", 1, &environ); - - /* push some required info to our local datastore */ - OBJ_CONSTRUCT(&kvn, opal_value_t); - kvn.key = strdup(OPAL_DSTORE_HOSTNAME); - kvn.type = OPAL_STRING; - kvn.data.string = strdup(orte_process_info.nodename); - if (ORTE_SUCCESS != (rc = opal_pmix.put(PMIX_GLOBAL, &kvn))) { - ORTE_ERROR_LOG(rc); - OBJ_DESTRUCT(&kvn); - return rc; - } - OBJ_DESTRUCT(&kvn); - - /* construct the RTE string, if we have one */ - param = orte_rml.get_contact_info(); - if (NULL != param) { - /* push it out for others to use */ - OBJ_CONSTRUCT(&kvn, opal_value_t); - kvn.key = strdup(OPAL_DSTORE_URI); - kvn.type = OPAL_STRING; - kvn.data.string = strdup(param); - free(param); - if (ORTE_SUCCESS != (rc = opal_pmix.put(PMIX_GLOBAL, &kvn))) { - ORTE_ERROR_LOG(rc); - OBJ_DESTRUCT(&kvn); - return rc; + /* retrieve our topology */ + OPAL_MODEX_RECV_VALUE(ret, OPAL_PMIX_LOCAL_TOPO, + ORTE_PROC_MY_NAME, &val, OPAL_STRING); + if (OPAL_SUCCESS == ret && NULL != val) { + /* load the topology */ + if (0 != hwloc_topology_init(&opal_hwloc_topology)) { + ret = OPAL_ERROR; + free(val); + error = "setting topology"; + goto error; + } + if (0 != hwloc_topology_set_xmlbuffer(opal_hwloc_topology, val, strlen(val))) { + ret = OPAL_ERROR; + free(val); + hwloc_topology_destroy(opal_hwloc_topology); + error = "setting topology"; + goto error; + } + /* since we are loading this from an external source, we have to + * explicitly set a flag so hwloc sets things up correctly + */ + if (0 != hwloc_topology_set_flags(opal_hwloc_topology, + (HWLOC_TOPOLOGY_FLAG_IS_THISSYSTEM | + HWLOC_TOPOLOGY_FLAG_WHOLE_SYSTEM | + HWLOC_TOPOLOGY_FLAG_IO_DEVICES))) { + ret = OPAL_ERROR; + hwloc_topology_destroy(opal_hwloc_topology); + free(val); + error = "setting topology"; + goto error; + } + /* now load the topology */ + if (0 != hwloc_topology_load(opal_hwloc_topology)) { + ret = OPAL_ERROR; + hwloc_topology_destroy(opal_hwloc_topology); + free(val); + error = "setting topology"; + goto error; + } + free(val); + } else { + /* it wasn't passed down to us, so go get it */ + if (OPAL_SUCCESS != (ret = opal_hwloc_base_get_topology())) { + error = "topology discovery"; + goto error; + } + /* push it into the PMIx database in case someone + * tries to retrieve it so we avoid an attempt to + * get it again */ + kv = OBJ_NEW(opal_value_t); + kv->key = strdup(OPAL_PMIX_LOCAL_TOPO); + kv->type = OPAL_STRING; + if (0 != (ret = hwloc_topology_export_xmlbuffer(opal_hwloc_topology, &kv->data.string, &u32))) { + error = "topology export"; + goto error; + } + if (OPAL_SUCCESS != (ret = opal_pmix.store_local(ORTE_PROC_MY_NAME, kv))) { + error = "topology store"; + goto error; } - OBJ_DESTRUCT(&kvn); + OBJ_RELEASE(kv); } - - /* push our local rank */ - OBJ_CONSTRUCT(&kvn, opal_value_t); - kvn.key = strdup(OPAL_DSTORE_LOCALRANK); - kvn.type = OPAL_UINT16; - kvn.data.uint16 = orte_process_info.my_local_rank; - if (ORTE_SUCCESS != (rc = opal_pmix.put(PMIX_GLOBAL, &kvn))) { + + /* use the std app init to complete the procedure */ + if (ORTE_SUCCESS != (rc = orte_ess_base_app_setup(true))) { ORTE_ERROR_LOG(rc); - OBJ_DESTRUCT(&kvn); return rc; } - OBJ_DESTRUCT(&kvn); - - /* push our node rank */ - OBJ_CONSTRUCT(&kvn, opal_value_t); - kvn.key = strdup(OPAL_DSTORE_NODERANK); - kvn.type = OPAL_UINT16; - kvn.data.uint16 = orte_process_info.my_node_rank; - if (ORTE_SUCCESS != (rc = opal_pmix.put(PMIX_GLOBAL, &kvn))) { - ORTE_ERROR_LOG(rc); - OBJ_DESTRUCT(&kvn); - return rc; + + /* push our hostname so others can find us, if they need to */ + OPAL_MODEX_SEND_VALUE(ret, OPAL_PMIX_GLOBAL, OPAL_PMIX_HOSTNAME, orte_process_info.nodename, OPAL_STRING); + if (ORTE_SUCCESS != ret) { + error = "db store hostname"; + goto error; } - OBJ_DESTRUCT(&kvn); return ORTE_SUCCESS; + + error: + if (ORTE_ERR_SILENT != ret && !orte_report_silent_errors) { + orte_show_help("help-orte-runtime.txt", + "orte_init:startup:internal-failure", + true, error, ORTE_ERROR_NAME(ret), ret); + } + return ret; } static int rte_finalize(void) { int ret; - - /* mark us as finalized */ - if (NULL != opal_pmix.finalize) { - opal_pmix.finalize(); - (void) mca_base_framework_close(&opal_pmix_base_framework); + + /* remove the envars that we pushed into environ + * so we leave that structure intact + */ + if (added_transport_keys) { + unsetenv(OPAL_MCA_PREFIX"orte_precondition_transports"); + } + if (added_num_procs) { + unsetenv(OPAL_MCA_PREFIX"orte_ess_num_procs"); + } + if (added_app_ctx) { + unsetenv("OMPI_APP_CTX_NUM_PROCS"); } - + if (added_pmix_envs) { + unsetenv("PMIX_NAMESPACE"); + unsetenv("PMIX_RANK"); + unsetenv("PMIX_SERVER_URI"); + unsetenv("PMIX_SECURITY_MODE"); + } + /* use the default procedure to finish */ if (ORTE_SUCCESS != (ret = orte_ess_base_app_finalize())) { ORTE_ERROR_LOG(ret); } - /* cleanup the environment */ - opal_unsetenv("OMPI_NUM_APP_CTX", &environ); - opal_unsetenv("OMPI_FIRST_RANKS", &environ); - opal_unsetenv("OMPI_APP_CTX_NUM_PROCS", &environ); - opal_unsetenv(OPAL_MCA_PREFIX"orte_ess_num_procs", &environ); - opal_unsetenv(OPAL_MCA_PREFIX"pubsub_orte_server", &environ); // just in case it is there + /* mark us as finalized */ + if (NULL != opal_pmix.finalize) { + opal_pmix.finalize(); + (void) mca_base_framework_close(&opal_pmix_base_framework); + } + + /* release the event base */ + if (progress_thread_running) { + opal_progress_thread_finalize(NULL); + progress_thread_running = false; + } return ret; } + +#define ORTE_URI_MSG_LGTH 256 + +static void set_handler_default(int sig) +{ + struct sigaction act; + + act.sa_handler = SIG_DFL; + act.sa_flags = 0; + sigemptyset(&act.sa_mask); + + sigaction(sig, &act, (struct sigaction *)0); +} + +static int fork_hnp(void) +{ + int p[2], death_pipe[2]; + char *cmd; + char **argv = NULL; + int argc; + char *param, *cptr; + sigset_t sigs; + int buffer_length, num_chars_read, chunk; + char *orted_uri; + int rc, i; + + /* A pipe is used to communicate between the parent and child to + indicate whether the exec ultimately succeeded or failed. The + child sets the pipe to be close-on-exec; the child only ever + writes anything to the pipe if there is an error (e.g., + executable not found, exec() fails, etc.). The parent does a + blocking read on the pipe; if the pipe closed with no data, + then the exec() succeeded. If the parent reads something from + the pipe, then the child was letting us know that it failed. + */ + if (pipe(p) < 0) { + ORTE_ERROR_LOG(ORTE_ERR_SYS_LIMITS_PIPES); + return ORTE_ERR_SYS_LIMITS_PIPES; + } + + /* we also have to give the HNP a pipe it can watch to know when + * we terminated. Since the HNP is going to be a child of us, it + * can't just use waitpid to see when we leave - so it will watch + * the pipe instead + */ + if (pipe(death_pipe) < 0) { + ORTE_ERROR_LOG(ORTE_ERR_SYS_LIMITS_PIPES); + return ORTE_ERR_SYS_LIMITS_PIPES; + } + + /* find the orted binary using the install_dirs support - this also + * checks to ensure that we can see this executable and it *is* executable by us + */ + cmd = opal_path_access("orted", opal_install_dirs.bindir, X_OK); + if (NULL == cmd) { + /* guess we couldn't do it - best to abort */ + ORTE_ERROR_LOG(ORTE_ERR_FILE_NOT_EXECUTABLE); + close(p[0]); + close(p[1]); + return ORTE_ERR_FILE_NOT_EXECUTABLE; + } + + /* okay, setup an appropriate argv */ + opal_argv_append(&argc, &argv, "orted"); + + /* tell the daemon it is to be the HNP */ + opal_argv_append(&argc, &argv, "--hnp"); + + /* tell the daemon to get out of our process group */ + opal_argv_append(&argc, &argv, "--set-sid"); + + /* tell the daemon to report back its uri so we can connect to it */ + opal_argv_append(&argc, &argv, "--report-uri"); + asprintf(¶m, "%d", p[1]); + opal_argv_append(&argc, &argv, param); + free(param); + + /* give the daemon a pipe it can watch to tell when we have died */ + opal_argv_append(&argc, &argv, "--singleton-died-pipe"); + asprintf(¶m, "%d", death_pipe[0]); + opal_argv_append(&argc, &argv, param); + free(param); + + /* add any debug flags */ + if (orte_debug_flag) { + opal_argv_append(&argc, &argv, "--debug"); + } + + if (orte_debug_daemons_flag) { + opal_argv_append(&argc, &argv, "--debug-daemons"); + } + + if (orte_debug_daemons_file_flag) { + if (!orte_debug_daemons_flag) { + opal_argv_append(&argc, &argv, "--debug-daemons"); + } + opal_argv_append(&argc, &argv, "--debug-daemons-file"); + } + + /* indicate that it must use the novm state machine */ + opal_argv_append(&argc, &argv, "-"OPAL_MCA_CMD_LINE_ID); + opal_argv_append(&argc, &argv, "state_novm_select"); + opal_argv_append(&argc, &argv, "1"); + + /* Fork off the child */ + orte_process_info.hnp_pid = fork(); + if(orte_process_info.hnp_pid < 0) { + ORTE_ERROR_LOG(ORTE_ERR_SYS_LIMITS_CHILDREN); + close(p[0]); + close(p[1]); + close(death_pipe[0]); + close(death_pipe[1]); + free(cmd); + opal_argv_free(argv); + return ORTE_ERR_SYS_LIMITS_CHILDREN; + } + + if (orte_process_info.hnp_pid == 0) { + close(p[0]); + close(death_pipe[1]); + /* I am the child - exec me */ + + /* Set signal handlers back to the default. Do this close + to the execve() because the event library may (and likely + will) reset them. If we don't do this, the event + library may have left some set that, at least on some + OS's, don't get reset via fork() or exec(). Hence, the + orted could be unkillable (for example). */ + set_handler_default(SIGTERM); + set_handler_default(SIGINT); + set_handler_default(SIGHUP); + set_handler_default(SIGPIPE); + set_handler_default(SIGCHLD); + + /* Unblock all signals, for many of the same reasons that + we set the default handlers, above. This is noticable + on Linux where the event library blocks SIGTERM, but we + don't want that blocked by the orted (or, more + specifically, we don't want it to be blocked by the + orted and then inherited by the ORTE processes that it + forks, making them unkillable by SIGTERM). */ + sigprocmask(0, 0, &sigs); + sigprocmask(SIG_UNBLOCK, &sigs, 0); + + execv(cmd, argv); + + /* if I get here, the execv failed! */ + orte_show_help("help-ess-base.txt", "ess-base:execv-error", + true, cmd, strerror(errno)); + exit(1); + + } else { + int count; + + free(cmd); + /* I am the parent - wait to hear something back and + * report results + */ + close(p[1]); /* parent closes the write - orted will write its contact info to it*/ + close(death_pipe[0]); /* parent closes the death_pipe's read */ + opal_argv_free(argv); + + /* setup the buffer to read the HNP's uri */ + buffer_length = ORTE_URI_MSG_LGTH; + chunk = ORTE_URI_MSG_LGTH-1; + num_chars_read = 0; + orted_uri = (char*)malloc(buffer_length); + memset(orted_uri, 0, buffer_length); + + while (chunk == (rc = read(p[0], &orted_uri[num_chars_read], chunk))) { + /* we read an entire buffer - better get more */ + num_chars_read += chunk; + orted_uri = realloc((void*)orted_uri, buffer_length+ORTE_URI_MSG_LGTH); + memset(&orted_uri[buffer_length], 0, ORTE_URI_MSG_LGTH); + buffer_length += ORTE_URI_MSG_LGTH; + } + num_chars_read += rc; + + if (num_chars_read <= 0) { + /* we didn't get anything back - this is bad */ + ORTE_ERROR_LOG(ORTE_ERR_HNP_COULD_NOT_START); + free(orted_uri); + return ORTE_ERR_HNP_COULD_NOT_START; + } + + /* parse the sysinfo from the returned info - must + * start from the end of the string as the uri itself + * can contain brackets */ + if (NULL == (param = strrchr(orted_uri, '['))) { + ORTE_ERROR_LOG(ORTE_ERR_COMM_FAILURE); + free(orted_uri); + return ORTE_ERR_COMM_FAILURE; + } + *param = '\0'; /* terminate the uri string */ + ++param; /* point to the start of the sysinfo */ + + /* find the end of the sysinfo */ + if (NULL == (cptr = strchr(param, ']'))) { + ORTE_ERROR_LOG(ORTE_ERR_COMM_FAILURE); + free(orted_uri); + return ORTE_ERR_COMM_FAILURE; + } + *cptr = '\0'; /* terminate the sysinfo string */ + ++cptr; /* point to the start of the pmix uri */ + + /* convert the sysinfo string */ + if (ORTE_SUCCESS != (rc = orte_util_convert_string_to_sysinfo(&orte_local_cpu_type, + &orte_local_cpu_model, param))) { + ORTE_ERROR_LOG(rc); + free(orted_uri); + return rc; + } + + /* save the daemon uri - we will process it later */ + orte_process_info.my_daemon_uri = strdup(orted_uri); + /* likewise, since this is also the HNP, set that uri too */ + orte_process_info.my_hnp_uri = orted_uri; + + /* split the pmix_uri into its parts */ + argv = opal_argv_split(cptr, ','); + count = opal_argv_count(argv); + /* push each piece into the environment */ + for (i=0; i < count; i++) { + char *c = strchr(argv[i], '='); + assert(NULL != c); + *c++ = '\0'; + opal_setenv(argv[i], c, true, &environ); + } + opal_argv_free(argv); + added_pmix_envs = true; + + /* all done - report success */ + return ORTE_SUCCESS; + } +} diff --git a/orte/mca/ess/slurm/Makefile.am b/orte/mca/ess/slurm/Makefile.am index 7783e8a147e..43d3d6537da 100644 --- a/orte/mca/ess/slurm/Makefile.am +++ b/orte/mca/ess/slurm/Makefile.am @@ -5,14 +5,14 @@ # Copyright (c) 2004-2005 The University of Tennessee and The University # of Tennessee Research Foundation. All rights # reserved. -# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, # University of Stuttgart. All rights reserved. # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. # $COPYRIGHT$ -# +# # Additional copyrights may follow -# +# # $HEADER$ # diff --git a/orte/mca/ess/slurm/configure.m4 b/orte/mca/ess/slurm/configure.m4 index 487f870d197..861cc0e7641 100644 --- a/orte/mca/ess/slurm/configure.m4 +++ b/orte/mca/ess/slurm/configure.m4 @@ -6,7 +6,7 @@ # Copyright (c) 2004-2005 The University of Tennessee and The University # of Tennessee Research Foundation. All rights # reserved. -# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, # University of Stuttgart. All rights reserved. # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. @@ -14,9 +14,9 @@ # Copyright (c) 2011 Los Alamos National Security, LLC. # All rights reserved. # $COPYRIGHT$ -# +# # Additional copyrights may follow -# +# # $HEADER$ # @@ -26,8 +26,8 @@ AC_DEFUN([MCA_orte_ess_slurm_CONFIG],[ AC_CONFIG_FILES([orte/mca/ess/slurm/Makefile]) ORTE_CHECK_SLURM([ess_slurm], [ess_slurm_good=1], [ess_slurm_good=0]) - - # if check worked, set wrapper flags if so. + + # if check worked, set wrapper flags if so. # Evaluate succeed / fail AS_IF([test "$ess_slurm_good" = "1"], [$1], diff --git a/orte/mca/ess/slurm/ess_slurm.h b/orte/mca/ess/slurm/ess_slurm.h index 87c85d4d710..c82ae4ff464 100644 --- a/orte/mca/ess/slurm/ess_slurm.h +++ b/orte/mca/ess/slurm/ess_slurm.h @@ -5,14 +5,14 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ diff --git a/orte/mca/ess/slurm/ess_slurm_component.c b/orte/mca/ess/slurm/ess_slurm_component.c index 9790daa3f0a..ded948c922c 100644 --- a/orte/mca/ess/slurm/ess_slurm_component.c +++ b/orte/mca/ess/slurm/ess_slurm_component.c @@ -6,16 +6,16 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2015 Los Alamos National Security, LLC. All rights * reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ * * These symbols are in a file by themselves to provide nice linker @@ -74,7 +74,7 @@ int orte_ess_slurm_component_query(mca_base_module_t **module, int *priority) * answer to both is "yes", then we were launched * by mpirun in a slurm world, so make ourselves available */ - + if (ORTE_PROC_IS_DAEMON && NULL != getenv("SLURM_JOBID") && NULL != orte_process_info.my_hnp_uri) { @@ -82,7 +82,7 @@ int orte_ess_slurm_component_query(mca_base_module_t **module, int *priority) *module = (mca_base_module_t *)&orte_ess_slurm_module; return ORTE_SUCCESS; } - + /* Sadly, no */ *priority = -1; *module = NULL; diff --git a/orte/mca/ess/slurm/ess_slurm_module.c b/orte/mca/ess/slurm/ess_slurm_module.c index 4ed455baac1..472b6aa9ee1 100644 --- a/orte/mca/ess/slurm/ess_slurm_module.c +++ b/orte/mca/ess/slurm/ess_slurm_module.c @@ -5,16 +5,16 @@ * Copyright (c) 2004-2011 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2008-2011 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2013 Intel, Inc. All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ * */ @@ -25,9 +25,7 @@ #ifdef HAVE_UNISTD_H #include #endif /* HAVE_UNISTD_H */ -#ifdef HAVE_STRING_H #include -#endif /* HAVE_STRING_H */ #include @@ -71,10 +69,10 @@ static int rte_init(void) error = "orte_ess_base_std_prolog"; goto error; } - + /* Start by getting a unique name */ slurm_set_name(); - + /* if I am a daemon, complete my setup using the * default procedure */ @@ -98,7 +96,7 @@ static int rte_init(void) } return ORTE_SUCCESS; } - + if (ORTE_PROC_IS_TOOL) { /* otherwise, if I am a tool proc, use that procedure */ if (ORTE_SUCCESS != (ret = orte_ess_base_tool_setup())) { @@ -107,9 +105,9 @@ static int rte_init(void) goto error; } return ORTE_SUCCESS; - + } - + /* no other options are supported! */ error = "ess_error"; ret = ORTE_ERROR; @@ -127,7 +125,7 @@ static int rte_init(void) static int rte_finalize(void) { int ret; - + /* if I am a daemon, finalize using the default procedure */ if (ORTE_PROC_IS_DAEMON) { if (ORTE_SUCCESS != (ret = orte_ess_base_orted_finalize())) { @@ -149,7 +147,7 @@ static int rte_finalize(void) return ret; } } - + return ORTE_SUCCESS; } @@ -160,10 +158,10 @@ static int slurm_set_name(void) orte_jobid_t jobid; orte_vpid_t vpid; char *tmp; - + OPAL_OUTPUT_VERBOSE((1, orte_ess_base_framework.framework_output, "ess:slurm setting name")); - + if (NULL == orte_ess_base_jobid) { ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND); return ORTE_ERR_NOT_FOUND; @@ -181,16 +179,16 @@ static int slurm_set_name(void) ORTE_ERROR_LOG(rc); return(rc); } - + ORTE_PROC_MY_NAME->jobid = jobid; - + /* fix up the vpid and make it the "real" vpid */ slurm_nodeid = atoi(getenv("SLURM_NODEID")); ORTE_PROC_MY_NAME->vpid = vpid + slurm_nodeid; OPAL_OUTPUT_VERBOSE((1, orte_ess_base_framework.framework_output, "ess:slurm set name to %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME))); - + /* fix up the system info nodename to match exactly what slurm returned */ if (NULL != orte_process_info.nodename) { free(orte_process_info.nodename); @@ -201,16 +199,16 @@ static int slurm_set_name(void) } orte_process_info.nodename = strdup(tmp); - + OPAL_OUTPUT_VERBOSE((1, orte_ess_base_framework.framework_output, "ess:slurm set nodename to %s", (NULL == orte_process_info.nodename) ? "NULL" : orte_process_info.nodename)); - + /* get the non-name common environmental variables */ if (ORTE_SUCCESS != (rc = orte_ess_env_get())) { ORTE_ERROR_LOG(rc); return rc; } - + return ORTE_SUCCESS; } diff --git a/orte/mca/ess/tm/Makefile.am b/orte/mca/ess/tm/Makefile.am index ef61c23e273..3e087c88d5f 100644 --- a/orte/mca/ess/tm/Makefile.am +++ b/orte/mca/ess/tm/Makefile.am @@ -5,14 +5,14 @@ # Copyright (c) 2004-2005 The University of Tennessee and The University # of Tennessee Research Foundation. All rights # reserved. -# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, # University of Stuttgart. All rights reserved. # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. # $COPYRIGHT$ -# +# # Additional copyrights may follow -# +# # $HEADER$ # diff --git a/orte/mca/ess/tm/configure.m4 b/orte/mca/ess/tm/configure.m4 index d0987cd3566..0e7935d0c0a 100644 --- a/orte/mca/ess/tm/configure.m4 +++ b/orte/mca/ess/tm/configure.m4 @@ -6,7 +6,7 @@ # Copyright (c) 2004-2005 The University of Tennessee and The University # of Tennessee Research Foundation. All rights # reserved. -# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, # University of Stuttgart. All rights reserved. # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. @@ -14,9 +14,9 @@ # Copyright (c) 2011 Los Alamos National Security, LLC. # All rights reserved. # $COPYRIGHT$ -# +# # Additional copyrights may follow -# +# # $HEADER$ # @@ -26,8 +26,8 @@ AC_DEFUN([MCA_orte_ess_tm_CONFIG],[ AC_CONFIG_FILES([orte/mca/ess/tm/Makefile]) ORTE_CHECK_TM([ess_tm], [ess_tm_good=1], [ess_tm_good=0]) - - # if check worked, set wrapper flags if so. + + # if check worked, set wrapper flags if so. # Evaluate succeed / fail AS_IF([test "$ess_tm_good" = "1"], [$1], diff --git a/orte/mca/ess/tm/ess_tm.h b/orte/mca/ess/tm/ess_tm.h index 4047e3bea37..ed3ccde007b 100644 --- a/orte/mca/ess/tm/ess_tm.h +++ b/orte/mca/ess/tm/ess_tm.h @@ -5,14 +5,14 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ diff --git a/orte/mca/ess/tm/ess_tm_component.c b/orte/mca/ess/tm/ess_tm_component.c index fd2fa5eaedb..8656cccf02e 100644 --- a/orte/mca/ess/tm/ess_tm_component.c +++ b/orte/mca/ess/tm/ess_tm_component.c @@ -6,16 +6,16 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2015 Los Alamos National Security, LLC. All rights * reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ * * These symbols are in a file by themselves to provide nice linker @@ -74,14 +74,14 @@ int orte_ess_tm_component_query(mca_base_module_t **module, int *priority) * answer to both is "yes", then we were launched * by mpirun in a tm world */ - + if (NULL != getenv("PBS_JOBID") && NULL != orte_process_info.my_hnp_uri) { *priority = 30; *module = (mca_base_module_t *)&orte_ess_tm_module; return ORTE_SUCCESS; } - + /* Sadly, no */ *priority = -1; *module = NULL; diff --git a/orte/mca/ess/tm/ess_tm_module.c b/orte/mca/ess/tm/ess_tm_module.c index 3fdb4195db7..38e86416272 100644 --- a/orte/mca/ess/tm/ess_tm_module.c +++ b/orte/mca/ess/tm/ess_tm_module.c @@ -5,15 +5,15 @@ * Copyright (c) 2004-2011 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2008-2011 Cisco Systems, Inc. All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ * */ @@ -24,9 +24,7 @@ #ifdef HAVE_UNISTD_H #include #endif /* HAVE_UNISTD_H */ -#ifdef HAVE_STRING_H #include -#endif /* HAVE_STRING_H */ #include @@ -75,17 +73,17 @@ static int rte_init(void) error = "orte_ess_base_std_prolog"; goto error; } - + /* Start by getting a unique name */ tm_set_name(); - + /* if I am a daemon, complete my setup using the * default procedure */ if (ORTE_PROC_IS_DAEMON) { /* get the list of nodes used for this job */ nodelist = getenv(OPAL_MCA_PREFIX"orte_nodelist"); - + if (NULL != nodelist) { /* split the node list into an argv array */ hosts = opal_argv_split(nodelist, ','); @@ -98,7 +96,7 @@ static int rte_init(void) opal_argv_free(hosts); return ORTE_SUCCESS; } - + if (ORTE_PROC_IS_TOOL) { /* otherwise, if I am a tool proc, use that procedure */ if (ORTE_SUCCESS != (ret = orte_ess_base_tool_setup())) { @@ -107,13 +105,13 @@ static int rte_init(void) goto error; } return ORTE_SUCCESS; - + } - + /* no other options are supported! */ error = "ess_error"; ret = ORTE_ERROR; - + error: if (ORTE_ERR_SILENT != ret && !orte_report_silent_errors) { orte_show_help("help-orte-runtime.txt", @@ -127,7 +125,7 @@ static int rte_init(void) static int rte_finalize(void) { int ret; - + /* if I am a daemon, finalize using the default procedure */ if (ORTE_PROC_IS_DAEMON) { if (ORTE_SUCCESS != (ret = orte_ess_base_orted_finalize())) { @@ -149,7 +147,7 @@ static int rte_finalize(void) return ret; } } - + return ORTE_SUCCESS; } @@ -158,7 +156,7 @@ static int tm_set_name(void) int rc; orte_jobid_t jobid; orte_vpid_t vpid; - + OPAL_OUTPUT_VERBOSE((1, orte_ess_base_framework.framework_output, "ess:tm setting name")); if (NULL == orte_ess_base_jobid) { @@ -169,7 +167,7 @@ static int tm_set_name(void) ORTE_ERROR_LOG(rc); return(rc); } - + if (NULL == orte_ess_base_vpid) { ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND); return ORTE_ERR_NOT_FOUND; diff --git a/orte/mca/ess/tool/Makefile.am b/orte/mca/ess/tool/Makefile.am index c5074ed194c..30e4a01cfdf 100644 --- a/orte/mca/ess/tool/Makefile.am +++ b/orte/mca/ess/tool/Makefile.am @@ -5,15 +5,15 @@ # Copyright (c) 2004-2005 The University of Tennessee and The University # of Tennessee Research Foundation. All rights # reserved. -# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, # University of Stuttgart. All rights reserved. # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. # Copyright (c) 2010 Cisco Systems, Inc. All rights reserved. # $COPYRIGHT$ -# +# # Additional copyrights may follow -# +# # $HEADER$ # diff --git a/orte/mca/ess/tool/ess_tool.h b/orte/mca/ess/tool/ess_tool.h index 21e2ad03217..62c2655cce4 100644 --- a/orte/mca/ess/tool/ess_tool.h +++ b/orte/mca/ess/tool/ess_tool.h @@ -5,14 +5,14 @@ * Copyright (c) 2004-2006 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ diff --git a/orte/mca/ess/tool/ess_tool_component.c b/orte/mca/ess/tool/ess_tool_component.c index 19ae6c6dfb2..ad81fbe72a8 100644 --- a/orte/mca/ess/tool/ess_tool_component.c +++ b/orte/mca/ess/tool/ess_tool_component.c @@ -6,16 +6,16 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2015 Los Alamos National Security, LLC. All rights * reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ * * These symbols are in a file by themselves to provide nice linker @@ -79,7 +79,7 @@ int orte_ess_tool_component_query(mca_base_module_t **module, int *priority) *module = (mca_base_module_t *)&orte_ess_tool_module; return ORTE_SUCCESS; } - + /* else, don't */ *priority = -1; *module = NULL; diff --git a/orte/mca/ess/tool/ess_tool_module.c b/orte/mca/ess/tool/ess_tool_module.c index 5d28e3d6f38..b0108ec46ab 100644 --- a/orte/mca/ess/tool/ess_tool_module.c +++ b/orte/mca/ess/tool/ess_tool_module.c @@ -5,15 +5,15 @@ * Copyright (c) 2004-2011 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2015 Intel, Inc. All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ * */ @@ -37,7 +37,6 @@ #include "orte/mca/plm/plm.h" #include "orte/mca/errmgr/errmgr.h" #include "orte/util/proc_info.h" -#include "orte/runtime/orte_cr.h" #include "orte/mca/ess/ess.h" #include "orte/mca/ess/base/base.h" @@ -68,7 +67,7 @@ static int rte_init(void) goto error; } - + if (NULL != orte_ess_base_jobid && NULL != orte_ess_base_vpid) { opal_output_verbose(2, orte_ess_base_framework.framework_output, @@ -86,7 +85,7 @@ static int rte_init(void) uint16_t jobfam; uint32_t hash32; uint32_t bias; - + opal_output_verbose(2, orte_ess_base_framework.framework_output, "ess:tool:computing name"); /* hash the nodename */ @@ -94,15 +93,15 @@ static int rte_init(void) bias = (uint32_t)orte_process_info.pid; /* fold in the bias */ hash32 = hash32 ^ bias; - + /* now compress to 16-bits */ jobfam = (uint16_t)(((0x0000ffff & (0xffff0000 & hash32) >> 16)) ^ (0x0000ffff & hash32)); - + /* set the name */ ORTE_PROC_MY_NAME->jobid = 0xffff0000 & ((uint32_t)jobfam << 16); ORTE_PROC_MY_NAME->vpid = 0; } - + /* do the rest of the standard tool init */ if (ORTE_SUCCESS != (ret = orte_ess_base_tool_setup())) { ORTE_ERROR_LOG(ret); @@ -110,7 +109,7 @@ static int rte_init(void) goto error; } - return ORTE_SUCCESS; + return ORTE_SUCCESS; error: if (ORTE_ERR_SILENT != ret && !orte_report_silent_errors) { @@ -131,21 +130,18 @@ static void rte_abort(int status, bool report) { /* do NOT do a normal finalize as this will very likely * hang the process. We are aborting due to an abnormal condition - * that precludes normal cleanup + * that precludes normal cleanup * - * We do need to do the following bits to make sure we leave a + * We do need to do the following bits to make sure we leave a * clean environment. Taken from orte_finalize(): * - Assume errmgr cleans up child processes before we exit. */ - - /* CRS cleanup since it may have a named pipe and thread active */ - orte_cr_finalize(); - - /* - Clean out the global structures + + /* - Clean out the global structures * (not really necessary, but good practice) */ orte_proc_info_finalize(); - + /* Now just exit */ exit(status); } diff --git a/orte/mca/filem/Makefile.am b/orte/mca/filem/Makefile.am index 8cf50c99d17..cbce1d48bfa 100644 --- a/orte/mca/filem/Makefile.am +++ b/orte/mca/filem/Makefile.am @@ -5,16 +5,16 @@ # Copyright (c) 2004-2005 The University of Tennessee and The University # of Tennessee Research Foundation. All rights # reserved. -# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, # University of Stuttgart. All rights reserved. # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. # Copyright (c) 2008 Sun Microsystems, Inc. All rights reserved. # Copyright (c) 2008-2014 Cisco Systems, Inc. All rights reserved. # $COPYRIGHT$ -# +# # Additional copyrights may follow -# +# # $HEADER$ # diff --git a/orte/mca/filem/base/Makefile.am b/orte/mca/filem/base/Makefile.am index 091adbc5b05..c2082681408 100644 --- a/orte/mca/filem/base/Makefile.am +++ b/orte/mca/filem/base/Makefile.am @@ -5,14 +5,15 @@ # Copyright (c) 2004-2005 The University of Tennessee and The University # of Tennessee Research Foundation. All rights # reserved. -# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, # University of Stuttgart. All rights reserved. # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. +# Copyright (c) 2015 Cisco Systems, Inc. All rights reserved. # $COPYRIGHT$ -# +# # Additional copyrights may follow -# +# # $HEADER$ # @@ -24,5 +25,3 @@ libmca_filem_la_SOURCES += \ base/filem_base_select.c \ base/filem_base_receive.c \ base/filem_base_fns.c - -dist_ortedata_DATA = base/help-orte-filem-base.txt diff --git a/orte/mca/filem/base/base.h b/orte/mca/filem/base/base.h index a7369fc1666..c990abec30e 100644 --- a/orte/mca/filem/base/base.h +++ b/orte/mca/filem/base/base.h @@ -5,16 +5,16 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2012-2013 Los Alamos National Security, LLC. * All rights reserved * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ #ifndef ORTE_FILEM_BASE_H diff --git a/orte/mca/filem/base/filem_base_fns.c b/orte/mca/filem/base/filem_base_fns.c index 870e0d09b81..61a52d94536 100644 --- a/orte/mca/filem/base/filem_base_fns.c +++ b/orte/mca/filem/base/filem_base_fns.c @@ -3,24 +3,22 @@ * All rights reserved. * Copyright (c) 2004-2005 The Trustees of the University of Tennessee. * All rights reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2012 Los Alamos National Security, LLC. * All rights reserved * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ #include "orte_config.h" -#ifdef HAVE_STRING_H #include -#endif #ifdef HAVE_SYS_TYPES_H #include #endif diff --git a/orte/mca/filem/base/filem_base_frame.c b/orte/mca/filem/base/filem_base_frame.c index 1fa2d4ea212..b4a8479ee8a 100644 --- a/orte/mca/filem/base/filem_base_frame.c +++ b/orte/mca/filem/base/filem_base_frame.c @@ -3,16 +3,16 @@ * All rights reserved. * Copyright (c) 2004-2005 The Trustees of the University of Tennessee. * All rights reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2012-2013 Los Alamos National Security, LLC. * All rights reserved * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ diff --git a/orte/mca/filem/base/filem_base_receive.c b/orte/mca/filem/base/filem_base_receive.c index 9970e546f88..581fe096138 100644 --- a/orte/mca/filem/base/filem_base_receive.c +++ b/orte/mca/filem/base/filem_base_receive.c @@ -11,7 +11,7 @@ * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2011-2012 Los Alamos National Security, LLC. All rights - * reserved. + * reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -27,9 +27,7 @@ */ #include "orte_config.h" -#ifdef HAVE_STRING_H #include -#endif #ifdef HAVE_SYS_TYPES_H #include #endif @@ -77,7 +75,7 @@ int orte_filem_base_comm_start(void) if ( recv_issued ) { return ORTE_SUCCESS; } - + OPAL_OUTPUT_VERBOSE((5, orte_filem_base_framework.framework_output, "%s filem:base: Receive: Start command recv", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME))); @@ -89,7 +87,7 @@ int orte_filem_base_comm_start(void) NULL); recv_issued = true; - + return ORTE_SUCCESS; } @@ -103,14 +101,14 @@ int orte_filem_base_comm_stop(void) if ( recv_issued ) { return ORTE_SUCCESS; } - + OPAL_OUTPUT_VERBOSE((5, orte_filem_base_framework.framework_output, "%s filem:base:receive stop comm", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME))); - + orte_rml.recv_cancel(ORTE_NAME_WILDCARD, ORTE_RML_TAG_FILEM_BASE); recv_issued = false; - + return ORTE_SUCCESS; } @@ -137,7 +135,7 @@ void orte_filem_base_recv(int status, orte_process_name_t* sender, ORTE_ERROR_LOG(rc); return; } - + switch (command) { case ORTE_FILEM_GET_PROC_NODE_NAME_CMD: OPAL_OUTPUT_VERBOSE((10, orte_filem_base_framework.framework_output, @@ -154,7 +152,7 @@ void orte_filem_base_recv(int status, orte_process_name_t* sender, filem_base_process_get_remote_path_cmd(sender, buffer); break; - + default: ORTE_ERROR_LOG(ORTE_ERR_VALUE_OUT_OF_BOUNDS); } diff --git a/orte/mca/filem/base/filem_base_select.c b/orte/mca/filem/base/filem_base_select.c index c7385273170..3488956af63 100644 --- a/orte/mca/filem/base/filem_base_select.c +++ b/orte/mca/filem/base/filem_base_select.c @@ -1,26 +1,25 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ /* * Copyright (c) 2004-2008 The Trustees of Indiana University. * All rights reserved. * Copyright (c) 2004-2005 The Trustees of the University of Tennessee. * All rights reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. - * Copyright (c) 2012 Los Alamos National Security, LLC. + * Copyright (c) 2012-2015 Los Alamos National Security, LLC. * All rights reserved * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ #include "orte_config.h" -#ifdef HAVE_STRING_H #include -#endif #include "orte/constants.h" @@ -44,7 +43,7 @@ int orte_filem_base_select(void) if( OPAL_SUCCESS != mca_base_select("filem", orte_filem_base_framework.framework_output, &orte_filem_base_framework.framework_components, (mca_base_module_t **) &best_module, - (mca_base_component_t **) &best_component) ) { + (mca_base_component_t **) &best_component, NULL) ) { /* It is okay to not select anything - we'll just retain * the default none module */ diff --git a/orte/mca/filem/base/help-orte-filem-base.txt b/orte/mca/filem/base/help-orte-filem-base.txt deleted file mode 100644 index 034bf218520..00000000000 --- a/orte/mca/filem/base/help-orte-filem-base.txt +++ /dev/null @@ -1,20 +0,0 @@ - -*- text -*- -# -# Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana -# University Research and Technology -# Corporation. All rights reserved. -# Copyright (c) 2004-2005 The University of Tennessee and The University -# of Tennessee Research Foundation. All rights -# reserved. -# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, -# University of Stuttgart. All rights reserved. -# Copyright (c) 2004-2005 The Regents of the University of California. -# All rights reserved. -# $COPYRIGHT$ -# -# Additional copyrights may follow -# -# $HEADER$ -# -# This is the US/English general help file for ORTE FileM framework. -# diff --git a/orte/mca/filem/filem.h b/orte/mca/filem/filem.h index ff5996df65b..ad6f172d7aa 100644 --- a/orte/mca/filem/filem.h +++ b/orte/mca/filem/filem.h @@ -6,16 +6,16 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2012-2015 Los Alamos National Security, LLC. All rights * reserved * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ /** @@ -94,7 +94,7 @@ ORTE_DECLSPEC OBJ_CLASS_DECLARATION(orte_filem_base_process_set_t); * Local: Local file reference * Remove: Remote file reference * - * Note: If multiple process sinks are used it is assumed that the + * Note: If multiple process sinks are used it is assumed that the * file reference is the same for each of the sinks. If this is not * true then more than one filem request needs to be created. */ @@ -133,7 +133,7 @@ ORTE_DECLSPEC OBJ_CLASS_DECLARATION(orte_filem_base_file_set_t); * in a single call of the API function. Allowing the implementation * to optimize the sending/receiving of data. * Used for the following: - * + * */ struct orte_filem_base_request_1_0_0_t { /** This is an object, so must have a super */ @@ -214,9 +214,9 @@ typedef int (*orte_filem_base_module_finalize_fn_t) * component will negotiate the correct absolute path for that file/directory * for the remote machine. * - * @param request FileM request describing the files/directories to send, + * @param request FileM request describing the files/directories to send, * the remote files/directories to use, and the processes to see the change. - * + * * @return ORTE_SUCCESS on successful file transer * @return ORTE_ERROR on failed file transfer */ @@ -230,9 +230,9 @@ typedef int (*orte_filem_base_put_fn_t) * component will negotiate the correct absolute path for that file/directory * for the remote machine. * - * @param request FileM request describing the files/directories to send, + * @param request FileM request describing the files/directories to send, * the remote files/directories to use, and the processes to see the change. - * + * * @return ORTE_SUCCESS on successful file transer * @return ORTE_ERROR on failed file transfer */ @@ -246,9 +246,9 @@ typedef int (*orte_filem_base_put_nb_fn_t) * component will negotiate the correct absolute path for that file/directory * for the remote machine. * - * @param request FileM request describing the files/directories to receive, + * @param request FileM request describing the files/directories to receive, * the remote files/directories to use, and the processes to see the change. - * + * * @return ORTE_SUCCESS on successful file transer * @return ORTE_ERROR on failed file transfer */ @@ -262,9 +262,9 @@ typedef int (*orte_filem_base_get_fn_t) * component will negotiate the correct absolute path for that file/directory * for the remote machine. * - * @param request FileM request describing the files/directories to receive, + * @param request FileM request describing the files/directories to receive, * the remote files/directories to use, and the processes to see the change. - * + * * @return ORTE_SUCCESS on successful file transer * @return ORTE_ERROR on failed file transfer */ @@ -273,12 +273,12 @@ typedef int (*orte_filem_base_get_nb_fn_t) /** * Remove a file from the remote machine - * + * * Note: By using a relative path for the remote file/directory, the filem * component will negotiate the correct absolute path for that file/directory * for the remote machine. * - * @param request FileM request describing the remote files/directories to remove, + * @param request FileM request describing the remote files/directories to remove, * the processes to see the change. * * @return ORTE_SUCCESS on success @@ -289,12 +289,12 @@ typedef int (*orte_filem_base_rm_fn_t) /** * Remove a file from the remote machine (Async) - * + * * Note: By using a relative path for the remote file/directory, the filem * component will negotiate the correct absolute path for that file/directory * for the remote machine. * - * @param request FileM request describing the remote files/directories to remove, + * @param request FileM request describing the remote files/directories to remove, * the processes to see the change. * * @return ORTE_SUCCESS on success diff --git a/orte/mca/filem/orte_filem.7in b/orte/mca/filem/orte_filem.7in index 81e808df24d..a87b46b21d4 100644 --- a/orte/mca/filem/orte_filem.7in +++ b/orte/mca/filem/orte_filem.7in @@ -5,7 +5,7 @@ .\" Copyright (c) 2008-2009 Sun Microsystems, Inc. All rights reserved. .\" .\" Man page for ORTE's FileM Functionality -.\" +.\" .\" .TH name section center-footer left-footer center-header .TH ORTE_FILEM 7 "#OMPI_DATE#" "#PACKAGE_VERSION#" "#PACKAGE_NAME#" .\" ************************** @@ -75,7 +75,7 @@ Generally will be rsh or ssh. .SS none FileM Component .PP The \fInone\fP component simply selects no FileM component. All of the FileM -function calls return immediately with ORTE_SUCCESS. +function calls return immediately with ORTE_SUCCESS. . .PP This component is the last component to be selected by default. This means that if diff --git a/orte/mca/filem/raw/Makefile.am b/orte/mca/filem/raw/Makefile.am index 2e154c5908c..c0c9f89cb30 100644 --- a/orte/mca/filem/raw/Makefile.am +++ b/orte/mca/filem/raw/Makefile.am @@ -3,7 +3,7 @@ # All rights reserved. # Copyright (c) 2004-2005 The Trustees of the University of Tennessee. # All rights reserved. -# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, # University of Stuttgart. All rights reserved. # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. @@ -11,9 +11,9 @@ # Copyright (c) 2012 Los Alamos National Security, LLC. # All rights reserved # $COPYRIGHT$ -# +# # Additional copyrights may follow -# +# # $HEADER$ # diff --git a/orte/mca/filem/raw/filem_raw.h b/orte/mca/filem/raw/filem_raw.h index 4a989c1b1ab..295ab8e9370 100644 --- a/orte/mca/filem/raw/filem_raw.h +++ b/orte/mca/filem/raw/filem_raw.h @@ -2,9 +2,9 @@ * Copyright (c) 2012 Los Alamos National Security, LLC. * All rights reserved * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ diff --git a/orte/mca/filem/raw/filem_raw_component.c b/orte/mca/filem/raw/filem_raw_component.c index 17fdf72774e..0631989cd89 100644 --- a/orte/mca/filem/raw/filem_raw_component.c +++ b/orte/mca/filem/raw/filem_raw_component.c @@ -3,9 +3,9 @@ * Copyright (c) 2012-2015 Los Alamos National Security, LLC. All rights * reserved * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -22,7 +22,7 @@ /* * Public string for version number */ -const char *orte_filem_raw_component_version_string = +const char *orte_filem_raw_component_version_string = "ORTE FILEM raw MCA component version " ORTE_VERSION; /* @@ -70,7 +70,7 @@ static int filem_raw_register(void) return ORTE_SUCCESS; } -static int filem_raw_open(void) +static int filem_raw_open(void) { return ORTE_SUCCESS; } diff --git a/orte/mca/filem/raw/filem_raw_module.c b/orte/mca/filem/raw/filem_raw_module.c index 6d7ce41f199..aeedef2eaed 100644 --- a/orte/mca/filem/raw/filem_raw_module.c +++ b/orte/mca/filem/raw/filem_raw_module.c @@ -6,9 +6,9 @@ * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -19,9 +19,7 @@ #include "orte_config.h" #include "orte/constants.h" -#ifdef HAVE_STRING_H #include -#endif #include #include #include @@ -522,15 +520,15 @@ static int raw_preposition_files(orte_job_t *jdata, } /* set the flags to non-blocking */ if ((flags = fcntl(fd, F_GETFL, 0)) < 0) { - opal_output(orte_filem_base_framework.framework_output, "[%s:%d]: fcntl(F_GETFL) failed with errno=%d\n", + opal_output(orte_filem_base_framework.framework_output, "[%s:%d]: fcntl(F_GETFL) failed with errno=%d\n", __FILE__, __LINE__, errno); } else { flags |= O_NONBLOCK; if (fcntl(fd, F_SETFL, flags) < 0) { - opal_output(orte_filem_base_framework.framework_output, "[%s:%d]: fcntl(F_GETFL) failed with errno=%d\n", + opal_output(orte_filem_base_framework.framework_output, "[%s:%d]: fcntl(F_GETFL) failed with errno=%d\n", __FILE__, __LINE__, errno); } - } + } OPAL_OUTPUT_VERBOSE((1, orte_filem_base_framework.framework_output, "%s filem:raw: setting up to position file %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), fs->local_target)); @@ -672,7 +670,7 @@ static int raw_link_local_files(orte_job_t *jdata, * local process in the job */ my_dir = opal_dirname(orte_process_info.job_session_dir); - + /* setup */ if (NULL != orte_process_info.tmpdir_base) { prefix = strdup(orte_process_info.tmpdir_base); @@ -825,12 +823,12 @@ static void send_chunk(int fd, short argc, void *cbdata) if (numbytes < 0) { /* either we have a connection error or it was a non-blocking read */ - + /* non-blocking, retry */ if (EAGAIN == errno || EINTR == errno) { opal_event_add(&rev->ev, 0); return; - } + } OPAL_OUTPUT_VERBOSE((1, orte_filem_base_framework.framework_output, "%s filem:raw:read error on file %s", @@ -842,7 +840,7 @@ static void send_chunk(int fd, short argc, void *cbdata) */ numbytes = 0; } - + /* if job termination has been ordered, just ignore the * data and delete the read event */ diff --git a/orte/mca/grpcomm/Makefile.am b/orte/mca/grpcomm/Makefile.am index dbcf2b71f02..09147163b74 100644 --- a/orte/mca/grpcomm/Makefile.am +++ b/orte/mca/grpcomm/Makefile.am @@ -5,15 +5,15 @@ # Copyright (c) 2004-2005 The University of Tennessee and The University # of Tennessee Research Foundation. All rights # reserved. -# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, # University of Stuttgart. All rights reserved. # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. # Copyright (c) 2010 Cisco Systems, Inc. All rights reserved. # $COPYRIGHT$ -# +# # Additional copyrights may follow -# +# # $HEADER$ # diff --git a/orte/mca/grpcomm/base/base.h b/orte/mca/grpcomm/base/base.h index db1b3264b13..7bec78e049e 100644 --- a/orte/mca/grpcomm/base/base.h +++ b/orte/mca/grpcomm/base/base.h @@ -11,7 +11,8 @@ * All rights reserved. * Copyright (c) 2011-2013 Los Alamos National Security, LLC. * All rights reserved. - * Copyright (c) 2013-2014 Intel, Inc. All rights reserved. + * Copyright (c) 2013-2016 Intel, Inc. All rights reserved. + * Copyright (c) 2017 Cisco Systems, Inc. All rights reserved * $COPYRIGHT$ * * Additional copyrights may follow @@ -33,7 +34,7 @@ #include "opal/class/opal_hash_table.h" #include "opal/dss/dss_types.h" #include "orte/mca/mca.h" -#include "opal/mca/hwloc/hwloc.h" +#include "opal/mca/hwloc/hwloc-internal.h" #include "orte/mca/odls/odls_types.h" diff --git a/orte/mca/grpcomm/base/grpcomm_base_frame.c b/orte/mca/grpcomm/base/grpcomm_base_frame.c index 814afb11585..242e4410f0a 100644 --- a/orte/mca/grpcomm/base/grpcomm_base_frame.c +++ b/orte/mca/grpcomm/base/grpcomm_base_frame.c @@ -1,3 +1,4 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ /* * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana * University Research and Technology @@ -9,9 +10,9 @@ * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. - * Copyright (c) 2011-2013 Los Alamos National Security, LLC. - * All rights reserved. - * Copyright (c) 2014 Intel, Inc. All rights reserved. + * Copyright (c) 2011-2016 Los Alamos National Security, LLC. All rights + * reserved. + * Copyright (c) 2014-2016 Intel, Inc. All rights reserved. * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ @@ -118,10 +119,11 @@ static void ccon(orte_grpcomm_coll_t *p) { p->sig = NULL; OBJ_CONSTRUCT(&p->bucket, opal_buffer_t); + OBJ_CONSTRUCT(&p->distance_mask_recv, opal_bitmap_t); p->dmns = NULL; p->ndmns = 0; + p->nexpected = 0; p->nreported = 0; - p->distance_mask_recv = NULL; p->cbfunc = NULL; p->cbdata = NULL; p->buffers = NULL; @@ -132,13 +134,9 @@ static void cdes(orte_grpcomm_coll_t *p) OBJ_RELEASE(p->sig); } OBJ_DESTRUCT(&p->bucket); - if (NULL != p->dmns) { - free(p->dmns); - } + OBJ_DESTRUCT(&p->distance_mask_recv); + free(p->dmns); free(p->buffers); - if (NULL != p->distance_mask_recv) { - free(p->distance_mask_recv); - } } OBJ_CLASS_INSTANCE(orte_grpcomm_coll_t, opal_list_item_t, diff --git a/orte/mca/grpcomm/base/grpcomm_base_select.c b/orte/mca/grpcomm/base/grpcomm_base_select.c index 66d1cf223c7..95e87f650a7 100644 --- a/orte/mca/grpcomm/base/grpcomm_base_select.c +++ b/orte/mca/grpcomm/base/grpcomm_base_select.c @@ -5,7 +5,7 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. @@ -13,9 +13,9 @@ * All rights reserved. * Copyright (c) 2014 Intel, Inc. All rights reserved * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ diff --git a/orte/mca/grpcomm/base/grpcomm_base_stubs.c b/orte/mca/grpcomm/base/grpcomm_base_stubs.c index 97e78193637..7a258055689 100644 --- a/orte/mca/grpcomm/base/grpcomm_base_stubs.c +++ b/orte/mca/grpcomm/base/grpcomm_base_stubs.c @@ -1,5 +1,5 @@ -/* -*- C -*- - * +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ +/* * Copyright (c) 2004-2010 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. @@ -10,8 +10,8 @@ * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. - * Copyright (c) 2011-2012 Los Alamos National Security, LLC. - * All rights reserved. + * Copyright (c) 2011-2016 Los Alamos National Security, LLC. All rights + * reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -148,7 +148,7 @@ static void allgather_stub(int fd, short args, void *cbdata) cd->sig->seq_num = *((uint32_t *)(seq_number)) + 1; } else { OPAL_OUTPUT((orte_grpcomm_base_framework.framework_output, - "%s rpcomm:base:allgather can't not get signature from hash table", + "%s rpcomm:base:allgather cannot get signature from hash table", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME))); ORTE_ERROR_LOG(ret); OBJ_RELEASE(cd); @@ -157,7 +157,7 @@ static void allgather_stub(int fd, short args, void *cbdata) ret = opal_hash_table_set_value_ptr(&orte_grpcomm_base.sig_table, (void *)cd->sig->signature, cd->sig->sz * sizeof(orte_process_name_t), (void *)&cd->sig->seq_num); if (OPAL_SUCCESS != ret) { OPAL_OUTPUT((orte_grpcomm_base_framework.framework_output, - "%s rpcomm:base:allgather can't not add new signature to hash table", + "%s rpcomm:base:allgather cannot add new signature to hash table", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME))); ORTE_ERROR_LOG(ret); OBJ_RELEASE(cd); @@ -208,6 +208,9 @@ orte_grpcomm_coll_t* orte_grpcomm_base_get_tracker(orte_grpcomm_signature_t *sig { orte_grpcomm_coll_t *coll; int rc; + orte_namelist_t *nm; + opal_list_t children; + size_t n; /* search the existing tracker list to see if this already exists */ OPAL_LIST_FOREACH(coll, &orte_grpcomm_base.ongoing, orte_grpcomm_coll_t) { @@ -254,6 +257,30 @@ orte_grpcomm_coll_t* orte_grpcomm_base_get_tracker(orte_grpcomm_signature_t *sig ORTE_ERROR_LOG(rc); return NULL; } + /* cycle thru the array of daemons and compare them to our + * children in the routing tree, counting the ones that match + * so we know how many daemons we should receive contributions from */ + OBJ_CONSTRUCT(&children, opal_list_t); + orte_routed.get_routing_list(&children); + while (NULL != (nm = (orte_namelist_t*)opal_list_remove_first(&children))) { + for (n=0; n < coll->ndmns; n++) { + if (nm->name.vpid == coll->dmns[n]) { + coll->nexpected++; + break; + } + } + OBJ_RELEASE(nm); + } + OPAL_LIST_DESTRUCT(&children); + /* see if I am in the array of participants - note that I may + * be in the rollup tree even though I'm not participating + * in the collective itself */ + for (n=0; n < coll->ndmns; n++) { + if (coll->dmns[n] == ORTE_PROC_MY_NAME->vpid) { + coll->nexpected++; + break; + } + } return coll; } @@ -292,6 +319,9 @@ static int create_dmns(orte_grpcomm_signature_t *sig, /* all daemons hosting this jobid are participating */ if (NULL == (jdata = orte_get_job_data_object(sig->signature[0].jobid))) { ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND); + ORTE_FORCED_TERMINATE(ORTE_ERR_NOT_FOUND); + *ndmns = 0; + *dmns = NULL; return ORTE_ERR_NOT_FOUND; } if (NULL == jdata->map) { @@ -321,7 +351,10 @@ static int create_dmns(orte_grpcomm_signature_t *sig, /* should never happen */ ORTE_ERROR_LOG(ORTE_ERROR); free(dns); - return ORTE_ERROR; + ORTE_FORCED_TERMINATE(ORTE_ERR_NOT_FOUND); + *ndmns = 0; + *dmns = NULL; + return ORTE_ERR_NOT_FOUND; } OPAL_OUTPUT_VERBOSE((5, orte_grpcomm_base_framework.framework_output, "%s grpcomm:base:create_dmns adding daemon %s to array", @@ -338,6 +371,9 @@ static int create_dmns(orte_grpcomm_signature_t *sig, if (NULL == (jdata = orte_get_job_data_object(sig->signature[n].jobid))) { ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND); OPAL_LIST_DESTRUCT(&ds); + ORTE_FORCED_TERMINATE(ORTE_ERR_NOT_FOUND); + *ndmns = 0; + *dmns = NULL; return ORTE_ERR_NOT_FOUND; } opal_output_verbose(5, orte_grpcomm_base_framework.framework_output, @@ -347,12 +383,17 @@ static int create_dmns(orte_grpcomm_signature_t *sig, if (NULL == (proc = (orte_proc_t*)opal_pointer_array_get_item(jdata->procs, sig->signature[n].vpid))) { ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND); OPAL_LIST_DESTRUCT(&ds); + ORTE_FORCED_TERMINATE(ORTE_ERR_NOT_FOUND); + *ndmns = 0; + *dmns = NULL; return ORTE_ERR_NOT_FOUND; } if (NULL == proc->node || NULL == proc->node->daemon) { ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND); OPAL_LIST_DESTRUCT(&ds); ORTE_FORCED_TERMINATE(ORTE_ERR_NOT_FOUND); + *ndmns = 0; + *dmns = NULL; return ORTE_ERR_NOT_FOUND; } vpid = proc->node->daemon->name.vpid; @@ -372,7 +413,10 @@ static int create_dmns(orte_grpcomm_signature_t *sig, if (0 == opal_list_get_size(&ds)) { ORTE_ERROR_LOG(ORTE_ERR_BAD_PARAM); OPAL_LIST_DESTRUCT(&ds); - return ORTE_ERR_BAD_PARAM; + ORTE_FORCED_TERMINATE(ORTE_ERR_NOT_FOUND); + *ndmns = 0; + *dmns = NULL; + return ORTE_ERR_NOT_FOUND; } dns = (orte_vpid_t*)malloc(opal_list_get_size(&ds) * sizeof(orte_vpid_t)); nds = 0; @@ -422,27 +466,12 @@ static int pack_xcast(orte_grpcomm_signature_t *sig, return ORTE_SUCCESS; } -void orte_grpcomm_base_mark_distance_recv(orte_grpcomm_coll_t *coll, - uint32_t distance) { - uint32_t maskNumber = distance / 32; - uint32_t bitNumber = distance % 32; - - coll->distance_mask_recv[maskNumber] |= (1 << bitNumber); - - return; +void orte_grpcomm_base_mark_distance_recv (orte_grpcomm_coll_t *coll, + uint32_t distance) { + opal_bitmap_set_bit (&coll->distance_mask_recv, distance); } -unsigned int orte_grpcomm_base_check_distance_recv(orte_grpcomm_coll_t *coll, - uint32_t distance) { - uint32_t maskNumber = distance / 32; - uint32_t bitNumber = distance % 32; - - if (NULL == coll->distance_mask_recv) { - return 0; - } else { - if (0 == distance) { - return 1; - } - return (((coll->distance_mask_recv[maskNumber] & (1 << bitNumber)) != 0) ? 1 : 0); - } +unsigned int orte_grpcomm_base_check_distance_recv (orte_grpcomm_coll_t *coll, + uint32_t distance) { + return opal_bitmap_is_set_bit (&coll->distance_mask_recv, distance); } diff --git a/ompi/mca/bcol/iboffload/.opal_ignore b/orte/mca/grpcomm/brks/.opal_ignore similarity index 100% rename from ompi/mca/bcol/iboffload/.opal_ignore rename to orte/mca/grpcomm/brks/.opal_ignore diff --git a/orte/mca/grpcomm/brks/Makefile.am b/orte/mca/grpcomm/brks/Makefile.am deleted file mode 100644 index 321e39ddeea..00000000000 --- a/orte/mca/grpcomm/brks/Makefile.am +++ /dev/null @@ -1,39 +0,0 @@ -# -# Copyright (c) 2011 Cisco Systems, Inc. All rights reserved. -# Copyright (c) 2013 Los Alamos National Security, LLC. All rights -# reserved. -# Copyright (c) 2014 Intel, Inc. All rights reserved. -# $COPYRIGHT$ -# -# Additional copyrights may follow -# -# $HEADER$ -# - -AM_CPPFLAGS = $(grpcomm_brks_CPPFLAGS) - -sources = \ - grpcomm_brks.h \ - grpcomm_brks.c \ - grpcomm_brks_component.c - -# Make the output library in this brksory, and name it either -# mca__.la (for DSO builds) or libmca__.la -# (for static builds). - -if MCA_BUILD_orte_grpcomm_brks_DSO -component_noinst = -component_install = mca_grpcomm_brks.la -else -component_noinst = libmca_grpcomm_brks.la -component_install = -endif - -mcacomponentdir = $(ortelibdir) -mcacomponent_LTLIBRARIES = $(component_install) -mca_grpcomm_brks_la_SOURCES = $(sources) -mca_grpcomm_brks_la_LDFLAGS = -module -avoid-version - -noinst_LTLIBRARIES = $(component_noinst) -libmca_grpcomm_brks_la_SOURCES =$(sources) -libmca_grpcomm_brks_la_LDFLAGS = -module -avoid-version diff --git a/orte/mca/grpcomm/brks/grpcomm_brks.c b/orte/mca/grpcomm/brks/grpcomm_brks.c deleted file mode 100644 index 902a6833cb0..00000000000 --- a/orte/mca/grpcomm/brks/grpcomm_brks.c +++ /dev/null @@ -1,353 +0,0 @@ -/* -*- Mode: C; c-basic-offset:4 ; -*- */ -/* - * Copyright (c) 2007 The Trustees of Indiana University. - * All rights reserved. - * Copyright (c) 2011-2015 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2011-2013 Los Alamos National Security, LLC. All - * rights reserved. - * Copyright (c) 2014 Intel, Inc. All rights reserved. - * Copyright (c) 2014 Mellanox Technologies, Inc. - * All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#include "orte_config.h" -#include "orte/constants.h" -#include "orte/types.h" -#include "orte/runtime/orte_wait.h" - -#include -#include - -#include "opal/dss/dss.h" - -#include "orte/mca/errmgr/errmgr.h" -#include "orte/mca/rml/rml.h" -#include "orte/util/name_fns.h" -#include "orte/util/proc_info.h" - -#include "orte/mca/grpcomm/base/base.h" -#include "grpcomm_brks.h" - - -/* Static API's */ -static int init(void); -static void finalize(void); -static int allgather(orte_grpcomm_coll_t *coll, - opal_buffer_t *buf); -static void brks_allgather_process_data(orte_grpcomm_coll_t *coll, uint32_t distance); -static int brks_allgather_send_dist(orte_grpcomm_coll_t *coll, orte_process_name_t *peer, uint32_t distance); -static void brks_allgather_recv_dist(int status, orte_process_name_t* sender, - opal_buffer_t* buffer, orte_rml_tag_t tag, - void* cbdata); -static int brks_finalize_coll(orte_grpcomm_coll_t *coll, int ret); - -/* Module def */ -orte_grpcomm_base_module_t orte_grpcomm_brks_module = { - init, - finalize, - NULL, - allgather -}; - -/** - * Initialize the module - */ -static int init(void) -{ - /* setup recv for distance data */ - orte_rml.recv_buffer_nb(ORTE_NAME_WILDCARD, - ORTE_RML_TAG_ALLGATHER_BRKS, - ORTE_RML_PERSISTENT, - brks_allgather_recv_dist, NULL); - return OPAL_SUCCESS; -} - -/** - * Finalize the module - */ -static void finalize(void) -{ - /* cancel the recv */ - orte_rml.recv_cancel(ORTE_NAME_WILDCARD, ORTE_RML_TAG_ALLGATHER_BRKS); - - return; -} - -static int allgather(orte_grpcomm_coll_t *coll, - opal_buffer_t *sendbuf) -{ - OPAL_OUTPUT_VERBOSE((5, orte_grpcomm_base_framework.framework_output, - "%s grpcomm:coll:bruck algo employed for %d processes", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), (int)coll->ndmns)); - - /* record that we contributed */ - coll->nreported = 1; - - /* mark local data received */ - coll->distance_mask_recv = (uint32_t *)calloc(sizeof(uint32_t), (coll->ndmns - 1)); - - /* start by seeding the collection with our own data */ - opal_dss.copy_payload(&coll->bucket, sendbuf); - - /* process data */ - brks_allgather_process_data(coll, 1); - - return ORTE_SUCCESS; -} - -static int brks_allgather_send_dist(orte_grpcomm_coll_t *coll, orte_process_name_t *peer, uint32_t distance) { - opal_buffer_t *send_buf; - int rc; - - send_buf = OBJ_NEW(opal_buffer_t); - - /* pack the signature */ - if (OPAL_SUCCESS != (rc = opal_dss.pack(send_buf, &coll->sig, 1, ORTE_SIGNATURE))) { - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(send_buf); - return rc; - } - /* pack the current distance */ - if (OPAL_SUCCESS != (rc = opal_dss.pack(send_buf, &distance, 1, OPAL_INT32))) { - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(send_buf); - return rc; - } - /* pack the data */ - if (OPAL_SUCCESS != (rc = opal_dss.copy_payload(send_buf, &coll->bucket))) { - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(send_buf); - return rc; - } - - OPAL_OUTPUT_VERBOSE((5, orte_grpcomm_base_framework.framework_output, - "%s grpcomm:coll:brks SENDING TO %s", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - ORTE_NAME_PRINT(peer))); - - - if (0 > (rc = orte_rml.send_buffer_nb(peer, send_buf, - ORTE_RML_TAG_ALLGATHER_BRKS, - orte_rml_send_callback, NULL))) { - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(send_buf); - return rc; - }; - - return ORTE_SUCCESS; -} - -static void brks_allgather_process_data(orte_grpcomm_coll_t *coll, uint32_t distance) { - /* Communication step: - At every step i, rank r: - - doubles the distance - - sends message containing all data collected so far to rank r - distance - - receives message containing all data collected so far from rank (r + distance) - */ - orte_process_name_t peer; - orte_vpid_t nv, rank; - int rc; - - peer.jobid = ORTE_PROC_MY_NAME->jobid; - - /* get my own rank */ - rank = ORTE_VPID_INVALID; - for (orte_vpid_t nv = 0; nv < coll->ndmns; nv++) { - if (coll->dmns[nv] == ORTE_PROC_MY_NAME->vpid) { - rank = nv; - break; - } - } - /* check for bozo case */ - if (ORTE_VPID_INVALID == rank) { - OPAL_OUTPUT((orte_grpcomm_base_framework.framework_output, - "Peer not found")); - ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND); - brks_finalize_coll(coll, ORTE_ERR_NOT_FOUND); - return; - } - - while (distance < coll->ndmns) { - OPAL_OUTPUT_VERBOSE((80, orte_grpcomm_base_framework.framework_output, - "%s grpcomm:coll:brks process distance %u)", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), distance)); - - /* first send my current contents */ - nv = (coll->ndmns + rank - distance) % coll->ndmns; - peer.vpid = coll->dmns[nv]; - - brks_allgather_send_dist(coll, &peer, distance); - - /* check whether data for next distance is available*/ - if ((NULL != coll->buffers) && (coll->buffers[distance - 1] != NULL)) { - OPAL_OUTPUT_VERBOSE((80, orte_grpcomm_base_framework.framework_output, - "%s grpcomm:coll:brks %u distance data found", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), distance)); - if (OPAL_SUCCESS != (rc = opal_dss.copy_payload(&coll->bucket, coll->buffers[distance - 1]))) { - ORTE_ERROR_LOG(rc); - brks_finalize_coll(coll, rc); - return; - } - coll->nreported += distance; - orte_grpcomm_base_mark_distance_recv(coll, distance); - OBJ_RELEASE(coll->buffers[distance - 1]); - coll->buffers[distance - 1] = NULL; - distance = distance << 1; - continue; - } - break; - } - OPAL_OUTPUT_VERBOSE((80, orte_grpcomm_base_framework.framework_output, - "%s grpcomm:coll:brks reported %lu process from %lu", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), (unsigned long)coll->nreported, - (unsigned long)coll->ndmns)); - - /* if we are done, then complete things */ - if (coll->nreported >= coll->ndmns){ - brks_finalize_coll(coll, ORTE_SUCCESS); - } - return; -} - -static void brks_allgather_recv_dist(int status, orte_process_name_t* sender, - opal_buffer_t* buffer, orte_rml_tag_t tag, - void* cbdata) -{ - int32_t cnt; - int rc; - orte_grpcomm_signature_t *sig; - orte_grpcomm_coll_t *coll; - uint32_t distance; - - OPAL_OUTPUT_VERBOSE((5, orte_grpcomm_base_framework.framework_output, - "%s grpcomm:coll:brks RECEIVING FROM %s", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - ORTE_NAME_PRINT(sender))); - - /* unpack the signature */ - cnt = 1; - if (OPAL_SUCCESS != (rc = opal_dss.unpack(buffer, &sig, &cnt, ORTE_SIGNATURE))) { - ORTE_ERROR_LOG(rc); - return; - } - - /* check for the tracker and create it if not found */ - if (NULL == (coll = orte_grpcomm_base_get_tracker(sig, true))) { - ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND); - OBJ_RELEASE(sig); - return; - } - /* unpack the distance */ - distance = 1; - if (OPAL_SUCCESS != (rc = opal_dss.unpack(buffer, &distance, &cnt, OPAL_INT32))) { - OBJ_RELEASE(sig); - ORTE_ERROR_LOG(rc); - brks_finalize_coll(coll, rc); - return; - } - assert(0 == orte_grpcomm_base_check_distance_recv(coll, distance)); - - /* Check whether we can process next distance */ - if (orte_grpcomm_base_check_distance_recv(coll, (distance >> 1))) { - OPAL_OUTPUT_VERBOSE((80, orte_grpcomm_base_framework.framework_output, - "%s grpcomm:coll:brks data from %d distance received, " - "Process the next distance.", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), distance)); - /* capture any provided content */ - if (OPAL_SUCCESS != (rc = opal_dss.copy_payload(&coll->bucket, buffer))) { - OBJ_RELEASE(sig); - ORTE_ERROR_LOG(rc); - brks_finalize_coll(coll, rc); - return; - } - coll->nreported += distance; - orte_grpcomm_base_mark_distance_recv(coll, distance); - brks_allgather_process_data(coll, (uint32_t)(distance << 1)); - } else { - OPAL_OUTPUT_VERBOSE((80, orte_grpcomm_base_framework.framework_output, - "%s grpcomm:coll:brks data from %d distance received, " - "still waiting for data.", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), distance)); - if (NULL == coll->buffers) { - if (NULL == (coll->buffers = (opal_buffer_t **)calloc(sizeof(opal_buffer_t *), coll->ndmns - 1))) { - rc = OPAL_ERR_OUT_OF_RESOURCE; - OBJ_RELEASE(sig); - ORTE_ERROR_LOG(rc); - brks_finalize_coll(coll, rc); - return; - } - } - if (NULL == (coll->buffers[distance - 1] = OBJ_NEW(opal_buffer_t))) { - rc = OPAL_ERR_OUT_OF_RESOURCE; - OBJ_RELEASE(sig); - ORTE_ERROR_LOG(rc); - brks_finalize_coll(coll, rc); - return; - } - if (OPAL_SUCCESS != (rc = opal_dss.copy_payload(coll->buffers[distance - 1], buffer))) { - OBJ_RELEASE(sig); - ORTE_ERROR_LOG(rc); - brks_finalize_coll(coll, rc); - return; - } - } - - OBJ_RELEASE(sig); - - return; -} - -static int brks_finalize_coll(orte_grpcomm_coll_t *coll, int ret) { - opal_buffer_t *reply; - int rc; - orte_job_t *jdata; - uint64_t nprocs; - - OPAL_OUTPUT_VERBOSE((5, orte_grpcomm_base_framework.framework_output, - "%s grpcomm:coll:brks declared collective complete", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME))); - - /* pack the number of procs involved in the collective - * so the recipients can unpack any collected data */ - if (1 == coll->sig->sz) { - /* get the job object for this entry */ - if (NULL == (jdata = orte_get_job_data_object(coll->sig->signature[0].jobid))) { - ORTE_ERROR_LOG(ORTE_ERROR); - return ORTE_ERROR; - } - nprocs = jdata->num_procs; - } else { - nprocs = coll->sig->sz; - } - - reply = OBJ_NEW(opal_buffer_t); - if (NULL == reply) { - ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); - return ORTE_ERR_OUT_OF_RESOURCE; - } - - if (OPAL_SUCCESS != (rc = opal_dss.pack(reply, &nprocs, 1, OPAL_UINT64))) { - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(reply); - return rc; - } - - /* transfer the collected bucket */ - opal_dss.copy_payload(reply, &coll->bucket); - - /* execute the callback */ - if (NULL != coll->cbfunc) { - coll->cbfunc(ret, reply, coll->cbdata); - } - - opal_list_remove_item(&orte_grpcomm_base.ongoing, &coll->super); - - OBJ_RELEASE(reply); - - return ORTE_SUCCESS; -} diff --git a/orte/mca/grpcomm/brks/grpcomm_brks.h b/orte/mca/grpcomm/brks/grpcomm_brks.h deleted file mode 100644 index de2582fa255..00000000000 --- a/orte/mca/grpcomm/brks/grpcomm_brks.h +++ /dev/null @@ -1,31 +0,0 @@ -/* -*- C -*- - * - * Copyright (c) 2011 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2014 Intel, Inc. All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - * - */ -#ifndef GRPCOMM_BRKS_H -#define GRPCOMM_BRKS_H - -#include "orte_config.h" - - -#include "orte/mca/grpcomm/grpcomm.h" - -BEGIN_C_DECLS - -/* - * Grpcomm interfaces - */ - -ORTE_MODULE_DECLSPEC extern orte_grpcomm_base_component_t mca_grpcomm_brks_component; -extern orte_grpcomm_base_module_t orte_grpcomm_brks_module; - -END_C_DECLS - -#endif diff --git a/orte/mca/grpcomm/brks/grpcomm_brks_component.c b/orte/mca/grpcomm/brks/grpcomm_brks_component.c deleted file mode 100644 index 5407229ecd3..00000000000 --- a/orte/mca/grpcomm/brks/grpcomm_brks_component.c +++ /dev/null @@ -1,84 +0,0 @@ -/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ -/* - * Copyright (c) 2011 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2011-2015 Los Alamos National Security, LLC. All rights - * reserved. - * Copyright (c) 2014 Intel, Inc. All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#include "orte_config.h" -#include "orte/constants.h" - -#include "orte/mca/mca.h" -#include "opal/runtime/opal_params.h" - -#include "orte/util/proc_info.h" - -#include "grpcomm_brks.h" - -static int my_priority=5; -static int brks_open(void); -static int brks_close(void); -static int brks_query(mca_base_module_t **module, int *priority); -static int brks_register(void); - -/* - * Struct of function pointers that need to be initialized - */ -orte_grpcomm_base_component_t mca_grpcomm_brks_component = { - .base_version = { - ORTE_GRPCOMM_BASE_VERSION_3_0_0, - - .mca_component_name = "brks", - MCA_BASE_MAKE_VERSION(component, ORTE_MAJOR_VERSION, ORTE_MINOR_VERSION, - ORTE_RELEASE_VERSION), - .mca_open_component = brks_open, - .mca_close_component = brks_close, - .mca_query_component = brks_query, - .mca_register_component_params = brks_register, - }, - .base_data = { - /* The component is checkpoint ready */ - MCA_BASE_METADATA_PARAM_CHECKPOINT - }, -}; - -static int brks_register(void) -{ - mca_base_component_t *c = &mca_grpcomm_brks_component.base_version; - - /* make the priority adjustable so users can select - * brks for use by apps without affecting daemons - */ - my_priority = 50; - (void) mca_base_component_var_register(c, "priority", - "Priority of the grpcomm brks component", - MCA_BASE_VAR_TYPE_INT, NULL, 0, 0, - OPAL_INFO_LVL_9, - MCA_BASE_VAR_SCOPE_READONLY, - &my_priority); - return ORTE_SUCCESS; -} - -/* Open the component */ -static int brks_open(void) -{ - return ORTE_SUCCESS; -} - -static int brks_close(void) -{ - return ORTE_SUCCESS; -} - -static int brks_query(mca_base_module_t **module, int *priority) -{ - *priority = my_priority; - *module = (mca_base_module_t *)&orte_grpcomm_brks_module; - return ORTE_SUCCESS; -} diff --git a/orte/mca/grpcomm/brks/owner.txt b/orte/mca/grpcomm/brks/owner.txt deleted file mode 100644 index 4ad6f408ca3..00000000000 --- a/orte/mca/grpcomm/brks/owner.txt +++ /dev/null @@ -1,7 +0,0 @@ -# -# owner/status file -# owner: institution that is responsible for this package -# status: e.g. active, maintenance, unmaintained -# -owner: INTEL -status: maintenance diff --git a/ompi/mca/coll/ml/.opal_ignore b/orte/mca/grpcomm/brucks/.opal_ignore similarity index 100% rename from ompi/mca/coll/ml/.opal_ignore rename to orte/mca/grpcomm/brucks/.opal_ignore diff --git a/orte/mca/grpcomm/brucks/Makefile.am b/orte/mca/grpcomm/brucks/Makefile.am new file mode 100644 index 00000000000..5519da1690f --- /dev/null +++ b/orte/mca/grpcomm/brucks/Makefile.am @@ -0,0 +1,39 @@ +# +# Copyright (c) 2011 Cisco Systems, Inc. All rights reserved. +# Copyright (c) 2013 Los Alamos National Security, LLC. All rights +# reserved. +# Copyright (c) 2014 Intel, Inc. All rights reserved. +# $COPYRIGHT$ +# +# Additional copyrights may follow +# +# $HEADER$ +# + +AM_CPPFLAGS = $(grpcomm_brucks_CPPFLAGS) + +sources = \ + grpcomm_brucks.h \ + grpcomm_brucks_module.c \ + grpcomm_brucks_component.c + +# Make the output library in this brucksory, and name it either +# mca__.la (for DSO builds) or libmca__.la +# (for static builds). + +if MCA_BUILD_orte_grpcomm_brucks_DSO +component_noinst = +component_install = mca_grpcomm_brucks.la +else +component_noinst = libmca_grpcomm_brucks.la +component_install = +endif + +mcacomponentdir = $(ortelibdir) +mcacomponent_LTLIBRARIES = $(component_install) +mca_grpcomm_brucks_la_SOURCES = $(sources) +mca_grpcomm_brucks_la_LDFLAGS = -module -avoid-version + +noinst_LTLIBRARIES = $(component_noinst) +libmca_grpcomm_brucks_la_SOURCES =$(sources) +libmca_grpcomm_brucks_la_LDFLAGS = -module -avoid-version diff --git a/orte/mca/grpcomm/brucks/grpcomm_brucks.h b/orte/mca/grpcomm/brucks/grpcomm_brucks.h new file mode 100644 index 00000000000..063c81c3f87 --- /dev/null +++ b/orte/mca/grpcomm/brucks/grpcomm_brucks.h @@ -0,0 +1,31 @@ +/* -*- C -*- + * + * Copyright (c) 2011 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2014 Intel, Inc. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + * + */ +#ifndef GRPCOMM_BRUCKS_H +#define GRPCOMM_BRUCKS_H + +#include "orte_config.h" + + +#include "orte/mca/grpcomm/grpcomm.h" + +BEGIN_C_DECLS + +/* + * Grpcomm interfaces + */ + +ORTE_MODULE_DECLSPEC extern orte_grpcomm_base_component_t mca_grpcomm_brucks_component; +extern orte_grpcomm_base_module_t orte_grpcomm_brucks_module; + +END_C_DECLS + +#endif diff --git a/orte/mca/grpcomm/brucks/grpcomm_brucks_component.c b/orte/mca/grpcomm/brucks/grpcomm_brucks_component.c new file mode 100644 index 00000000000..705ea3d3c9b --- /dev/null +++ b/orte/mca/grpcomm/brucks/grpcomm_brucks_component.c @@ -0,0 +1,84 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ +/* + * Copyright (c) 2011 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2011-2015 Los Alamos National Security, LLC. All rights + * reserved. + * Copyright (c) 2014 Intel, Inc. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include "orte_config.h" +#include "orte/constants.h" + +#include "orte/mca/mca.h" +#include "opal/runtime/opal_params.h" + +#include "orte/util/proc_info.h" + +#include "grpcomm_brucks.h" + +static int my_priority=5; +static int brucks_open(void); +static int brucks_close(void); +static int brucks_query(mca_base_module_t **module, int *priority); +static int brucks_register(void); + +/* + * Struct of function pointers that need to be initialized + */ +orte_grpcomm_base_component_t mca_grpcomm_brucks_component = { + .base_version = { + ORTE_GRPCOMM_BASE_VERSION_3_0_0, + + .mca_component_name = "brucks", + MCA_BASE_MAKE_VERSION(component, ORTE_MAJOR_VERSION, ORTE_MINOR_VERSION, + ORTE_RELEASE_VERSION), + .mca_open_component = brucks_open, + .mca_close_component = brucks_close, + .mca_query_component = brucks_query, + .mca_register_component_params = brucks_register, + }, + .base_data = { + /* The component is checkpoint ready */ + MCA_BASE_METADATA_PARAM_CHECKPOINT + }, +}; + +static int brucks_register(void) +{ + mca_base_component_t *c = &mca_grpcomm_brucks_component.base_version; + + /* make the priority adjustable so users can select + * brucks for use by apps without affecting daemons + */ + my_priority = 50; + (void) mca_base_component_var_register(c, "priority", + "Priority of the grpcomm brucks component", + MCA_BASE_VAR_TYPE_INT, NULL, 0, 0, + OPAL_INFO_LVL_9, + MCA_BASE_VAR_SCOPE_READONLY, + &my_priority); + return ORTE_SUCCESS; +} + +/* Open the component */ +static int brucks_open(void) +{ + return ORTE_SUCCESS; +} + +static int brucks_close(void) +{ + return ORTE_SUCCESS; +} + +static int brucks_query(mca_base_module_t **module, int *priority) +{ + *priority = my_priority; + *module = (mca_base_module_t *)&orte_grpcomm_brucks_module; + return ORTE_SUCCESS; +} diff --git a/orte/mca/grpcomm/brucks/grpcomm_brucks_module.c b/orte/mca/grpcomm/brucks/grpcomm_brucks_module.c new file mode 100644 index 00000000000..f473cbc1be6 --- /dev/null +++ b/orte/mca/grpcomm/brucks/grpcomm_brucks_module.c @@ -0,0 +1,388 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ +/* + * Copyright (c) 2007 The Trustees of Indiana University. + * All rights reserved. + * Copyright (c) 2011-2015 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2011-2016 Los Alamos National Security, LLC. All rights + * reserved. + * Copyright (c) 2014-2015 Intel, Inc. All rights reserved. + * Copyright (c) 2014 Mellanox Technologies, Inc. + * All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include "orte_config.h" +#include "orte/constants.h" +#include "orte/types.h" +#include "orte/runtime/orte_wait.h" + +#include +#include + +#include "opal/dss/dss.h" + +#include "orte/mca/errmgr/errmgr.h" +#include "orte/mca/rml/rml.h" +#include "orte/util/name_fns.h" +#include "orte/util/proc_info.h" + +#include "orte/mca/grpcomm/base/base.h" +#include "grpcomm_brucks.h" + + +/* Static API's */ +static int init(void); +static void finalize(void); +static int allgather(orte_grpcomm_coll_t *coll, + opal_buffer_t *buf); +static void brucks_allgather_process_data(orte_grpcomm_coll_t *coll, uint32_t distance); +static int brucks_allgather_send_dist(orte_grpcomm_coll_t *coll, orte_process_name_t *peer, uint32_t distance); +static void brucks_allgather_recv_dist(int status, orte_process_name_t* sender, + opal_buffer_t* buffer, orte_rml_tag_t tag, + void* cbdata); +static int brucks_finalize_coll(orte_grpcomm_coll_t *coll, int ret); + +/* Module def */ +orte_grpcomm_base_module_t orte_grpcomm_brucks_module = { + init, + finalize, + NULL, + allgather +}; + +/** + * Initialize the module + */ +static int init(void) +{ + /* setup recv for distance data */ + orte_rml.recv_buffer_nb(ORTE_NAME_WILDCARD, + ORTE_RML_TAG_ALLGATHER_BRUCKS, + ORTE_RML_PERSISTENT, + brucks_allgather_recv_dist, NULL); + return OPAL_SUCCESS; +} + +/** + * Finalize the module + */ +static void finalize(void) +{ + /* cancel the recv */ + orte_rml.recv_cancel(ORTE_NAME_WILDCARD, ORTE_RML_TAG_ALLGATHER_BRUCKS); +} + +static int allgather(orte_grpcomm_coll_t *coll, + opal_buffer_t *sendbuf) +{ + OPAL_OUTPUT_VERBOSE((5, orte_grpcomm_base_framework.framework_output, + "%s grpcomm:coll:brucks algo employed for %d processes", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), (int)coll->ndmns)); + /* get my own rank */ + coll->my_rank = ORTE_VPID_INVALID; + for (orte_vpid_t nv = 0; nv < coll->ndmns; nv++) { + if (coll->dmns[nv] == ORTE_PROC_MY_NAME->vpid) { + coll->my_rank = nv; + break; + } + } + + /* check for bozo case */ + if (ORTE_VPID_INVALID == coll->my_rank) { + OPAL_OUTPUT((orte_grpcomm_base_framework.framework_output, + "Peer not found")); + ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND); + brucks_finalize_coll(coll, ORTE_ERR_NOT_FOUND); + return ORTE_ERR_NOT_FOUND; + } + + /* record that we contributed */ + coll->nreported = 1; + + /* mark local data received */ + if (coll->ndmns > 1) { + opal_bitmap_init (&coll->distance_mask_recv, (uint32_t) log2 (coll->ndmns) + 1); + } + + /* start by seeding the collection with our own data */ + opal_dss.copy_payload(&coll->bucket, sendbuf); + + /* process data */ + brucks_allgather_process_data (coll, 0); + + return ORTE_SUCCESS; +} + +static int brucks_allgather_send_dist(orte_grpcomm_coll_t *coll, orte_process_name_t *peer, uint32_t distance) { + opal_buffer_t *send_buf; + int rc; + + send_buf = OBJ_NEW(opal_buffer_t); + + /* pack the signature */ + if (OPAL_SUCCESS != (rc = opal_dss.pack(send_buf, &coll->sig, 1, ORTE_SIGNATURE))) { + ORTE_ERROR_LOG(rc); + OBJ_RELEASE(send_buf); + return rc; + } + /* pack the current distance */ + if (OPAL_SUCCESS != (rc = opal_dss.pack(send_buf, &distance, 1, OPAL_INT32))) { + ORTE_ERROR_LOG(rc); + OBJ_RELEASE(send_buf); + return rc; + } + /* pack the number of daemons included in the payload */ + if (OPAL_SUCCESS != (rc = opal_dss.pack(send_buf, &coll->nreported, 1, OPAL_SIZE))) { + ORTE_ERROR_LOG(rc); + OBJ_RELEASE(send_buf); + return rc; + } + /* pack the data */ + if (OPAL_SUCCESS != (rc = opal_dss.copy_payload(send_buf, &coll->bucket))) { + ORTE_ERROR_LOG(rc); + OBJ_RELEASE(send_buf); + return rc; + } + + OPAL_OUTPUT_VERBOSE((5, orte_grpcomm_base_framework.framework_output, + "%s grpcomm:coll:brucks SENDING TO %s", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), + ORTE_NAME_PRINT(peer))); + + + if (0 > (rc = orte_rml.send_buffer_nb(peer, send_buf, + ORTE_RML_TAG_ALLGATHER_BRUCKS, + orte_rml_send_callback, NULL))) { + ORTE_ERROR_LOG(rc); + OBJ_RELEASE(send_buf); + return rc; + }; + + return ORTE_SUCCESS; +} + +static int brucks_allgather_process_buffered (orte_grpcomm_coll_t *coll, uint32_t distance) { + opal_buffer_t *buffer; + size_t nreceived; + int32_t cnt = 1; + int rc; + + /* check whether data for next distance is available*/ + if (NULL == coll->buffers || NULL == coll->buffers[distance]) { + return 0; + } + + buffer = coll->buffers[distance]; + coll->buffers[distance] = NULL; + + OPAL_OUTPUT_VERBOSE((80, orte_grpcomm_base_framework.framework_output, + "%s grpcomm:coll:brucks %u distance data found", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), distance)); + rc = opal_dss.unpack (buffer, &nreceived, &cnt, OPAL_SIZE); + if (OPAL_SUCCESS != rc) { + ORTE_ERROR_LOG(rc); + brucks_finalize_coll(coll, rc); + return rc; + } + + if (OPAL_SUCCESS != (rc = opal_dss.copy_payload(&coll->bucket, buffer))) { + ORTE_ERROR_LOG(rc); + brucks_finalize_coll(coll, rc); + return rc; + } + + coll->nreported += nreceived; + orte_grpcomm_base_mark_distance_recv (coll, distance); + OBJ_RELEASE(buffer); + + return 1; +} + +static void brucks_allgather_process_data(orte_grpcomm_coll_t *coll, uint32_t distance) { + /* Communication step: + At every step i, rank r: + - doubles the distance + - sends message containing all data collected so far to rank r - distance + - receives message containing all data collected so far from rank (r + distance) + */ + uint32_t log2ndmns = (uint32_t) log2 (coll->ndmns); + uint32_t last_round, remainder; + orte_process_name_t peer; + orte_vpid_t nv; + int rc; + + /* NTH: calculate in which round we should send the final data. this is the first + * round in which we have data from at least (coll->ndmns - (1 << log2ndmns)) + * daemons. alternatively we could just send when distance reaches log2ndmns but + * that could end up sending more data than needed */ + last_round = (uint32_t) ceil (log2 ((double) (coll->ndmns - (1 << log2ndmns)))); + + peer.jobid = ORTE_PROC_MY_NAME->jobid; + + while (distance < log2ndmns) { + OPAL_OUTPUT_VERBOSE((80, orte_grpcomm_base_framework.framework_output, + "%s grpcomm:coll:brucks process distance %u)", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), distance)); + + /* first send my current contents */ + nv = (coll->ndmns + coll->my_rank - (1 << distance)) % coll->ndmns; + peer.vpid = coll->dmns[nv]; + + brucks_allgather_send_dist(coll, &peer, distance); + + if (distance == last_round) { + /* have enough data to send the final round now */ + nv = (coll->ndmns + coll->my_rank - (1 << log2ndmns)) % coll->ndmns; + peer.vpid = coll->dmns[nv]; + brucks_allgather_send_dist(coll, &peer, log2ndmns); + } + + rc = brucks_allgather_process_buffered (coll, distance); + if (!rc) { + break; + } else if (rc < 0) { + return; + } + + ++distance; + } + + if (distance == log2ndmns) { + if (distance == last_round) { + /* need to send the final round now */ + nv = (coll->ndmns + coll->my_rank - (1 << log2ndmns)) % coll->ndmns; + peer.vpid = coll->dmns[nv]; + brucks_allgather_send_dist(coll, &peer, log2ndmns); + } + + /* check if the final message is already queued */ + rc = brucks_allgather_process_buffered (coll, distance); + if (rc < 0) { + return; + } + } + + OPAL_OUTPUT_VERBOSE((80, orte_grpcomm_base_framework.framework_output, + "%s grpcomm:coll:brucks reported %lu process from %lu", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), (unsigned long)coll->nreported, + (unsigned long)coll->ndmns)); + + /* if we are done, then complete things. we may get data from more daemons than expected */ + if (coll->nreported >= coll->ndmns){ + brucks_finalize_coll(coll, ORTE_SUCCESS); + } +} + +static void brucks_allgather_recv_dist(int status, orte_process_name_t* sender, + opal_buffer_t* buffer, orte_rml_tag_t tag, + void* cbdata) +{ + int32_t cnt; + int rc; + orte_grpcomm_signature_t *sig; + orte_grpcomm_coll_t *coll; + uint32_t distance; + + OPAL_OUTPUT_VERBOSE((5, orte_grpcomm_base_framework.framework_output, + "%s grpcomm:coll:brucks RECEIVING FROM %s", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), + ORTE_NAME_PRINT(sender))); + + /* unpack the signature */ + cnt = 1; + if (OPAL_SUCCESS != (rc = opal_dss.unpack(buffer, &sig, &cnt, ORTE_SIGNATURE))) { + ORTE_ERROR_LOG(rc); + return; + } + + /* check for the tracker and create it if not found */ + if (NULL == (coll = orte_grpcomm_base_get_tracker(sig, true))) { + ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND); + OBJ_RELEASE(sig); + return; + } + /* unpack the distance */ + distance = 1; + if (OPAL_SUCCESS != (rc = opal_dss.unpack(buffer, &distance, &cnt, OPAL_INT32))) { + OBJ_RELEASE(sig); + ORTE_ERROR_LOG(rc); + brucks_finalize_coll(coll, rc); + return; + } + assert(0 == orte_grpcomm_base_check_distance_recv(coll, distance)); + + /* Check whether we can process next distance */ + if (coll->nreported && (!distance || orte_grpcomm_base_check_distance_recv(coll, distance - 1))) { + size_t nreceived; + OPAL_OUTPUT_VERBOSE((80, orte_grpcomm_base_framework.framework_output, + "%s grpcomm:coll:brucks data from %d distance received, " + "Process the next distance.", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), distance)); + /* capture any provided content */ + rc = opal_dss.unpack (buffer, &nreceived, &cnt, OPAL_SIZE); + if (OPAL_SUCCESS != rc) { + OBJ_RELEASE(sig); + ORTE_ERROR_LOG(rc); + brucks_finalize_coll(coll, rc); + return; + } + if (OPAL_SUCCESS != (rc = opal_dss.copy_payload(&coll->bucket, buffer))) { + OBJ_RELEASE(sig); + ORTE_ERROR_LOG(rc); + brucks_finalize_coll(coll, rc); + return; + } + coll->nreported += nreceived; + orte_grpcomm_base_mark_distance_recv(coll, distance); + brucks_allgather_process_data(coll, distance + 1); + } else { + OPAL_OUTPUT_VERBOSE((80, orte_grpcomm_base_framework.framework_output, + "%s grpcomm:coll:brucks data from %d distance received, " + "still waiting for data.", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), distance)); + if (NULL == coll->buffers) { + if (NULL == (coll->buffers = (opal_buffer_t **) calloc ((uint32_t) log2 (coll->ndmns) + 1, sizeof(opal_buffer_t *)))) { + rc = OPAL_ERR_OUT_OF_RESOURCE; + OBJ_RELEASE(sig); + ORTE_ERROR_LOG(rc); + brucks_finalize_coll(coll, rc); + return; + } + } + if (NULL == (coll->buffers[distance] = OBJ_NEW(opal_buffer_t))) { + rc = OPAL_ERR_OUT_OF_RESOURCE; + OBJ_RELEASE(sig); + ORTE_ERROR_LOG(rc); + brucks_finalize_coll(coll, rc); + return; + } + if (OPAL_SUCCESS != (rc = opal_dss.copy_payload(coll->buffers[distance], buffer))) { + OBJ_RELEASE(sig); + ORTE_ERROR_LOG(rc); + brucks_finalize_coll(coll, rc); + return; + } + } + + OBJ_RELEASE(sig); +} + +static int brucks_finalize_coll(orte_grpcomm_coll_t *coll, int ret) +{ + OPAL_OUTPUT_VERBOSE((5, orte_grpcomm_base_framework.framework_output, + "%s grpcomm:coll:brucks declared collective complete", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME))); + + /* execute the callback */ + if (NULL != coll->cbfunc) { + coll->cbfunc(ret, &coll->bucket, coll->cbdata); + } + + opal_list_remove_item(&orte_grpcomm_base.ongoing, &coll->super); + + return ORTE_SUCCESS; +} diff --git a/ompi/mca/dpm/base/owner.txt b/orte/mca/grpcomm/brucks/owner.txt similarity index 100% rename from ompi/mca/dpm/base/owner.txt rename to orte/mca/grpcomm/brucks/owner.txt diff --git a/orte/mca/grpcomm/direct/Makefile.am b/orte/mca/grpcomm/direct/Makefile.am index e9e89f8f9bf..6e1733ef768 100644 --- a/orte/mca/grpcomm/direct/Makefile.am +++ b/orte/mca/grpcomm/direct/Makefile.am @@ -1,12 +1,12 @@ # # Copyright (c) 2011 Cisco Systems, Inc. All rights reserved. # Copyright (c) 2013 Los Alamos National Security, LLC. All rights -# reserved. +# reserved. # Copyright (c) 2014 Intel, Inc. All rights reserved. # $COPYRIGHT$ -# +# # Additional copyrights may follow -# +# # $HEADER$ # diff --git a/orte/mca/grpcomm/direct/grpcomm_direct.c b/orte/mca/grpcomm/direct/grpcomm_direct.c index da30940ee5b..4fc737865c2 100644 --- a/orte/mca/grpcomm/direct/grpcomm_direct.c +++ b/orte/mca/grpcomm/direct/grpcomm_direct.c @@ -5,7 +5,7 @@ * Copyright (c) 2011 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2011-2013 Los Alamos National Security, LLC. All * rights reserved. - * Copyright (c) 2014 Intel, Inc. All rights reserved. + * Copyright (c) 2014-2016 Intel, Inc. All rights reserved. * Copyright (c) 2014 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ @@ -124,10 +124,8 @@ static int xcast(orte_vpid_t *vpids, static int allgather(orte_grpcomm_coll_t *coll, opal_buffer_t *buf) { - int rc, ret; + int rc; opal_buffer_t *relay; - orte_job_t *jdata; - uint64_t nprocs; OPAL_OUTPUT_VERBOSE((1, orte_grpcomm_base_framework.framework_output, "%s grpcomm:direct: allgather", @@ -145,53 +143,16 @@ static int allgather(orte_grpcomm_coll_t *coll, return rc; } - /* if we are the HNP and nobody else is participating, - * then just execute the xcast */ - if (ORTE_PROC_IS_HNP && 1 == coll->ndmns) { - /* pack the status - success since the allgather completed. This - * would be an error if we timeout instead */ - ret = ORTE_SUCCESS; - if (OPAL_SUCCESS != (rc = opal_dss.pack(relay, &ret, 1, OPAL_INT))) { - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(relay); - return rc; - } - /* pack the number of procs involved in the collective - * so the recipients can unpack any collected data */ - if (1 == coll->sig->sz) { - /* get the job object for this entry */ - if (NULL == (jdata = orte_get_job_data_object(coll->sig->signature[0].jobid))) { - ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND); - OBJ_RELEASE(relay); - return ORTE_ERR_NOT_FOUND; - } - nprocs = jdata->num_procs; - } else { - nprocs = coll->sig->sz; - } - if (OPAL_SUCCESS != (rc = opal_dss.pack(relay, &nprocs, 1, OPAL_UINT64))) { - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(relay); - return rc; - } - /* pass along the payload */ - opal_dss.copy_payload(relay, buf); - orte_grpcomm.xcast(coll->sig, ORTE_RML_TAG_COLL_RELEASE, relay); - OBJ_RELEASE(relay); - return ORTE_SUCCESS; - } - /* pass along the payload */ opal_dss.copy_payload(relay, buf); - /* otherwise, we need to send this to the HNP for - * processing */ + /* send this to ourselves for processing */ OPAL_OUTPUT_VERBOSE((1, orte_grpcomm_base_framework.framework_output, - "%s grpcomm:direct:allgather sending to HNP", + "%s grpcomm:direct:allgather sending to ourself", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME))); - /* send the info to the HNP for tracking */ - rc = orte_rml.send_buffer_nb(ORTE_PROC_MY_HNP, relay, + /* send the info to ourselves for tracking */ + rc = orte_rml.send_buffer_nb(ORTE_PROC_MY_NAME, relay, ORTE_RML_TAG_ALLGATHER_DIRECT, orte_rml_send_callback, NULL); return rc; @@ -206,8 +167,6 @@ static void allgather_recv(int status, orte_process_name_t* sender, orte_grpcomm_signature_t *sig; opal_buffer_t *reply; orte_grpcomm_coll_t *coll; - orte_job_t *jdata; - uint64_t nprocs; OPAL_OUTPUT_VERBOSE((1, orte_grpcomm_base_framework.framework_output, "%s grpcomm:direct allgather recvd from %s", @@ -234,55 +193,60 @@ static void allgather_recv(int status, orte_process_name_t* sender, opal_dss.copy_payload(&coll->bucket, buffer); OPAL_OUTPUT_VERBOSE((1, orte_grpcomm_base_framework.framework_output, - "%s grpcomm:direct allgather recv ndmns %d nrep %d", + "%s grpcomm:direct allgather recv nexpected %d nrep %d", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - (int)coll->ndmns, (int)coll->nreported)); - - /* if all participating daemons have reported */ - if (coll->ndmns == coll->nreported) { - reply = OBJ_NEW(opal_buffer_t); - /* pack the signature */ - if (OPAL_SUCCESS != (rc = opal_dss.pack(reply, &sig, 1, ORTE_SIGNATURE))) { - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(reply); - OBJ_RELEASE(sig); - return; - } - /* pack the status - success since the allgather completed. This - * would be an error if we timeout instead */ - ret = ORTE_SUCCESS; - if (OPAL_SUCCESS != (rc = opal_dss.pack(reply, &ret, 1, OPAL_INT))) { - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(reply); - OBJ_RELEASE(sig); - return; - } - /* pack the number of procs involved in the collective - * so the recipients can unpack any collected data */ - if (1 == sig->sz) { - /* get the job object for this entry */ - if (NULL == (jdata = orte_get_job_data_object(sig->signature[0].jobid))) { - ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND); + (int)coll->nexpected, (int)coll->nreported)); + + /* see if everyone has reported */ + if (coll->nreported == coll->nexpected) { + if (ORTE_PROC_IS_HNP) { + OPAL_OUTPUT_VERBOSE((1, orte_grpcomm_base_framework.framework_output, + "%s grpcomm:direct allgather HNP reports complete", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME))); + /* the allgather is complete - send the xcast */ + reply = OBJ_NEW(opal_buffer_t); + /* pack the signature */ + if (OPAL_SUCCESS != (rc = opal_dss.pack(reply, &sig, 1, ORTE_SIGNATURE))) { + ORTE_ERROR_LOG(rc); OBJ_RELEASE(reply); OBJ_RELEASE(sig); return; } - nprocs = jdata->num_procs; - } else { - nprocs = sig->sz; - } - if (OPAL_SUCCESS != (rc = opal_dss.pack(reply, &nprocs, 1, OPAL_UINT64))) { - ORTE_ERROR_LOG(rc); + /* pack the status - success since the allgather completed. This + * would be an error if we timeout instead */ + ret = ORTE_SUCCESS; + if (OPAL_SUCCESS != (rc = opal_dss.pack(reply, &ret, 1, OPAL_INT))) { + ORTE_ERROR_LOG(rc); + OBJ_RELEASE(reply); + OBJ_RELEASE(sig); + return; + } + /* transfer the collected bucket */ + opal_dss.copy_payload(reply, &coll->bucket); + /* send the release via xcast */ + (void)orte_grpcomm.xcast(sig, ORTE_RML_TAG_COLL_RELEASE, reply); OBJ_RELEASE(reply); - OBJ_RELEASE(sig); - return; + } else { + OPAL_OUTPUT_VERBOSE((1, orte_grpcomm_base_framework.framework_output, + "%s grpcomm:direct allgather rollup complete - sending to %s", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), + ORTE_NAME_PRINT(ORTE_PROC_MY_PARENT))); + /* relay the bucket upward */ + reply = OBJ_NEW(opal_buffer_t); + /* pack the signature */ + if (OPAL_SUCCESS != (rc = opal_dss.pack(reply, &sig, 1, ORTE_SIGNATURE))) { + ORTE_ERROR_LOG(rc); + OBJ_RELEASE(reply); + OBJ_RELEASE(sig); + return; + } + /* transfer the collected bucket */ + opal_dss.copy_payload(reply, &coll->bucket); + /* send the info to our parent */ + rc = orte_rml.send_buffer_nb(ORTE_PROC_MY_PARENT, reply, + ORTE_RML_TAG_ALLGATHER_DIRECT, + orte_rml_send_callback, NULL); } - /* transfer the collected bucket */ - opal_dss.copy_payload(reply, &coll->bucket); - - /* send the release via xcast */ - (void)orte_grpcomm.xcast(sig, ORTE_RML_TAG_COLL_RELEASE, reply); - OBJ_RELEASE(reply); } OBJ_RELEASE(sig); } @@ -358,7 +322,7 @@ static void xcast_recv(int status, orte_process_name_t* sender, ORTE_ERROR_LOG(ret); goto relay; } - + /* update our local nidmap, if required - the decode function * knows what to do - it will also free the bytes in the byte object */ @@ -374,7 +338,7 @@ static void xcast_recv(int status, orte_process_name_t* sender, OPAL_OUTPUT_VERBOSE((5, orte_grpcomm_base_framework.framework_output, "%s grpcomm:direct:xcast updating daemon nidmap", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME))); - + if (ORTE_SUCCESS != (ret = orte_util_decode_daemon_nodemap(bo))) { ORTE_ERROR_LOG(ret); goto relay; @@ -383,7 +347,7 @@ static void xcast_recv(int status, orte_process_name_t* sender, /* update the routing plan */ orte_routed.update_routing_plan(); - + /* see if we have wiring info as well */ cnt=1; if (ORTE_SUCCESS != (ret = opal_dss.unpack(buffer, &flag, &cnt, OPAL_INT8))) { @@ -441,7 +405,7 @@ static void xcast_recv(int status, orte_process_name_t* sender, OBJ_RELEASE(rly); goto CLEANUP; } - + /* send the message to each recipient on list, deconstructing it as we go */ while (NULL != (item = opal_list_remove_first(&coll))) { nm = (orte_namelist_t*)item; @@ -502,8 +466,8 @@ static void barrier_release(int status, orte_process_name_t* sender, orte_grpcomm_coll_t *coll; OPAL_OUTPUT_VERBOSE((5, orte_grpcomm_base_framework.framework_output, - "%s grpcomm:direct: barrier release called", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME))); + "%s grpcomm:direct: barrier release called with %d bytes", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), (int)buffer->bytes_used)); /* unpack the signature */ cnt = 1; diff --git a/orte/mca/grpcomm/direct/grpcomm_direct.h b/orte/mca/grpcomm/direct/grpcomm_direct.h index b9b385976cd..d04224b5574 100644 --- a/orte/mca/grpcomm/direct/grpcomm_direct.h +++ b/orte/mca/grpcomm/direct/grpcomm_direct.h @@ -1,11 +1,11 @@ /* -*- C -*- - * + * * Copyright (c) 2011 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2014 Intel, Inc. All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ * */ diff --git a/orte/mca/grpcomm/direct/grpcomm_direct_component.c b/orte/mca/grpcomm/direct/grpcomm_direct_component.c index ac4b6e693f3..3c6cad000d4 100644 --- a/orte/mca/grpcomm/direct/grpcomm_direct_component.c +++ b/orte/mca/grpcomm/direct/grpcomm_direct_component.c @@ -1,7 +1,7 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ /* * Copyright (c) 2011 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2011-2015 Los Alamos National Security, LLC. All rights + * Copyright (c) 2011-2016 Los Alamos National Security, LLC. All rights * reserved. * Copyright (c) 2014 Intel, Inc. All rights reserved. * $COPYRIGHT$ @@ -55,7 +55,7 @@ static int direct_register(void) /* make the priority adjustable so users can select * direct for use by apps without affecting daemons */ - my_priority = 1; + my_priority = 85; (void) mca_base_component_var_register(c, "priority", "Priority of the grpcomm direct component", MCA_BASE_VAR_TYPE_INT, NULL, 0, 0, diff --git a/orte/mca/grpcomm/grpcomm.h b/orte/mca/grpcomm/grpcomm.h index c464259caa6..00ddccacc42 100644 --- a/orte/mca/grpcomm/grpcomm.h +++ b/orte/mca/grpcomm/grpcomm.h @@ -10,7 +10,7 @@ * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. - * Copyright (c) 2011-2015 Los Alamos National Security, LLC. All rights + * Copyright (c) 2011-2016 Los Alamos National Security, LLC. All rights * reserved. * $COPYRIGHT$ * @@ -42,6 +42,7 @@ #include "orte/mca/mca.h" #include "opal/class/opal_list.h" +#include "opal/class/opal_bitmap.h" #include "opal/dss/dss_types.h" #include "orte/mca/rml/rml_types.h" @@ -72,11 +73,16 @@ typedef struct { opal_buffer_t bucket; /* participating daemons */ orte_vpid_t *dmns; + /** number of participating daemons */ size_t ndmns; + /** my index in the dmns array */ + unsigned long my_rank; + /* number of buckets expected */ + size_t nexpected; /* number reported in */ size_t nreported; /* distance masks for receive */ - uint32_t *distance_mask_recv; + opal_bitmap_t distance_mask_recv; /* received buckets */ opal_buffer_t ** buffers; /* callback function */ diff --git a/ompi/mca/op/x86/.opal_ignore b/orte/mca/grpcomm/rcd/.opal_ignore similarity index 100% rename from ompi/mca/op/x86/.opal_ignore rename to orte/mca/grpcomm/rcd/.opal_ignore diff --git a/orte/mca/grpcomm/rcd/Makefile.am b/orte/mca/grpcomm/rcd/Makefile.am index abd5661a998..250700cf568 100644 --- a/orte/mca/grpcomm/rcd/Makefile.am +++ b/orte/mca/grpcomm/rcd/Makefile.am @@ -1,12 +1,12 @@ # # Copyright (c) 2011 Cisco Systems, Inc. All rights reserved. # Copyright (c) 2013 Los Alamos National Security, LLC. All rights -# reserved. +# reserved. # Copyright (c) 2014 Intel, Inc. All rights reserved. # $COPYRIGHT$ -# +# # Additional copyrights may follow -# +# # $HEADER$ # diff --git a/orte/mca/grpcomm/rcd/grpcomm_rcd.c b/orte/mca/grpcomm/rcd/grpcomm_rcd.c index 7dbf6b25509..61e2dfd2583 100644 --- a/orte/mca/grpcomm/rcd/grpcomm_rcd.c +++ b/orte/mca/grpcomm/rcd/grpcomm_rcd.c @@ -1,11 +1,11 @@ -/* -*- Mode: C; c-basic-offset:4 ; -*- */ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ /* * Copyright (c) 2007 The Trustees of Indiana University. * All rights reserved. * Copyright (c) 2011-2015 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2011-2013 Los Alamos National Security, LLC. All + * Copyright (c) 2011-2016 Los Alamos National Security, LLC. All * rights reserved. - * Copyright (c) 2014 Intel, Inc. All rights reserved. + * Copyright (c) 2014-2015 Intel, Inc. All rights reserved. * Copyright (c) 2014 Mellanox Technologies, Inc. * All rights reserved. * Copyright (c) 2014 Research Organization for Information Science @@ -76,35 +76,55 @@ static void finalize(void) { /* cancel the recv */ orte_rml.recv_cancel(ORTE_NAME_WILDCARD, ORTE_RML_TAG_ALLGATHER_RCD); - return; } static int allgather(orte_grpcomm_coll_t *coll, opal_buffer_t *sendbuf) { + uint32_t log2ndmns; + /* check the number of involved daemons - if it is not a power of two, * then we cannot do it */ if (0 == ((coll->ndmns != 0) && !(coll->ndmns & (coll->ndmns - 1)))) { return ORTE_ERR_TAKE_NEXT_OPTION; } + log2ndmns = log2 (coll->ndmns); + OPAL_OUTPUT_VERBOSE((5, orte_grpcomm_base_framework.framework_output, "%s grpcomm:coll:recdub algo employed for %d daemons", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), (int)coll->ndmns)); - /* record that we contributed */ - coll->nreported = 1; - /* mark local data received */ - if (coll->ndmns > 1) { - coll->distance_mask_recv = (uint32_t *)calloc(sizeof(uint32_t), log2(coll->ndmns)); + if (log2ndmns) { + opal_bitmap_init (&coll->distance_mask_recv, log2ndmns); + } + + /* get my own rank */ + coll->my_rank = ORTE_VPID_INVALID; + for (orte_vpid_t nv = 0 ; nv < coll->ndmns ; ++nv) { + if (coll->dmns[nv] == ORTE_PROC_MY_NAME->vpid) { + coll->my_rank = nv; + break; + } + } + + /* check for bozo case */ + if (ORTE_VPID_INVALID == coll->my_rank) { + OPAL_OUTPUT((orte_grpcomm_base_framework.framework_output, + "My peer not found in daemons array")); + ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND); + rcd_finalize_coll(coll, ORTE_ERR_NOT_FOUND); + return ORTE_ERR_NOT_FOUND; } /* start by seeding the collection with our own data */ opal_dss.copy_payload(&coll->bucket, sendbuf); + coll->nreported = 1; + /* process data */ - rcd_allgather_process_data(coll, 1); + rcd_allgather_process_data (coll, 0); return ORTE_SUCCESS; } @@ -154,71 +174,54 @@ static void rcd_allgather_process_data(orte_grpcomm_coll_t *coll, uint32_t dista At every step i, rank r: - exchanges message containing all data collected so far with rank peer = (r ^ 2^i). */ + uint32_t log2ndmns = log2(coll->ndmns); orte_process_name_t peer; - orte_vpid_t nv, rank; - uint32_t distance_index; + orte_vpid_t nv; int rc; peer.jobid = ORTE_PROC_MY_NAME->jobid; - /* get my own rank */ - rank = ORTE_VPID_INVALID; - for (orte_vpid_t nv = 0; nv < coll->ndmns; nv++) { - if (coll->dmns[nv] == ORTE_PROC_MY_NAME->vpid) { - rank = nv; - break; - } - } - /* check for bozo case */ - if (ORTE_VPID_INVALID == rank) { - OPAL_OUTPUT((orte_grpcomm_base_framework.framework_output, - "Peer not found")); - ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND); - rcd_finalize_coll(coll, ORTE_ERR_NOT_FOUND); - return; - } - - while(distance < coll->ndmns) { + while (distance < log2ndmns) { OPAL_OUTPUT_VERBOSE((80, orte_grpcomm_base_framework.framework_output, "%s grpcomm:coll:recdub process distance %u", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), distance)); /* first send my current contents */ - nv = rank ^ distance; + nv = coll->my_rank ^ (1 << distance); + assert (nv < coll->ndmns); peer.vpid = coll->dmns[nv]; rcd_allgather_send_dist(coll, &peer, distance); - /* check whether data for next distance is available*/ - distance_index = log2(distance); - if ((NULL != coll->buffers) && (NULL != coll->buffers[distance_index])) { - OPAL_OUTPUT_VERBOSE((80, orte_grpcomm_base_framework.framework_output, - "%s grpcomm:coll:recdub %u distance data found", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), distance)); - if (OPAL_SUCCESS != (rc = opal_dss.copy_payload(&coll->bucket, coll->buffers[distance_index]))) { - ORTE_ERROR_LOG(rc); - rcd_finalize_coll(coll, rc); - return; - } - coll->nreported += distance; - orte_grpcomm_base_mark_distance_recv(coll, distance); - OBJ_RELEASE(coll->buffers[distance_index]); - coll->buffers[distance_index] = NULL; - distance = distance << 1; - continue; + /* check whether data for next distance is available */ + if (NULL == coll->buffers || NULL == coll->buffers[distance]) { + break; + } + + OPAL_OUTPUT_VERBOSE((80, orte_grpcomm_base_framework.framework_output, + "%s grpcomm:coll:recdub %u distance data found", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), distance)); + if (OPAL_SUCCESS != (rc = opal_dss.copy_payload(&coll->bucket, coll->buffers[distance]))) { + ORTE_ERROR_LOG(rc); + rcd_finalize_coll(coll, rc); + return; } - break; + coll->nreported += 1 << distance; + orte_grpcomm_base_mark_distance_recv(coll, distance); + OBJ_RELEASE(coll->buffers[distance]); + coll->buffers[distance] = NULL; + ++distance; } + OPAL_OUTPUT_VERBOSE((80, orte_grpcomm_base_framework.framework_output, "%s grpcomm:coll:recdub reported %lu process from %lu", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), (unsigned long)coll->nreported, (unsigned long)coll->ndmns)); /* if we are done, then complete things */ - if (coll->nreported >= coll->ndmns){ + if (coll->nreported == coll->ndmns) { rcd_finalize_coll(coll, ORTE_SUCCESS); } - return; } static void rcd_allgather_recv_dist(int status, orte_process_name_t* sender, @@ -226,7 +229,7 @@ static void rcd_allgather_recv_dist(int status, orte_process_name_t* sender, void* cbdata) { int32_t cnt; - uint32_t distance, distance_index; + uint32_t distance; int rc; orte_grpcomm_signature_t *sig; orte_grpcomm_coll_t *coll; @@ -250,17 +253,17 @@ static void rcd_allgather_recv_dist(int status, orte_process_name_t* sender, return; } /* unpack the distance */ - distance = 0; + distance = -1; if (OPAL_SUCCESS != (rc = opal_dss.unpack(buffer, &distance, &cnt, OPAL_UINT32))) { OBJ_RELEASE(sig); ORTE_ERROR_LOG(rc); rcd_finalize_coll(coll, rc); return; } - assert(0 == orte_grpcomm_base_check_distance_recv(coll, distance)); + assert(distance >= 0 && 0 == orte_grpcomm_base_check_distance_recv(coll, distance)); /* Check whether we can process next distance */ - if (orte_grpcomm_base_check_distance_recv(coll, (distance >> 1))) { + if (coll->nreported && (!distance || orte_grpcomm_base_check_distance_recv(coll, (distance - 1)))) { OPAL_OUTPUT_VERBOSE((80, orte_grpcomm_base_framework.framework_output, "%s grpcomm:coll:recdub data from %d distance received, " "Process the next distance.", @@ -272,32 +275,30 @@ static void rcd_allgather_recv_dist(int status, orte_process_name_t* sender, rcd_finalize_coll(coll, rc); return; } - coll->nreported += distance; - orte_grpcomm_base_mark_distance_recv(coll, distance); - rcd_allgather_process_data(coll, (uint32_t)(distance << 1)); + coll->nreported += (1 << distance); + orte_grpcomm_base_mark_distance_recv (coll, distance); + rcd_allgather_process_data (coll, distance + 1); } else { OPAL_OUTPUT_VERBOSE((80, orte_grpcomm_base_framework.framework_output, "%s grpcomm:coll:recdub data from %d distance received, " "still waiting for data.", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), distance)); if (NULL == coll->buffers) { - if (NULL == (coll->buffers = (opal_buffer_t **)calloc(sizeof(opal_buffer_t *), log2(coll->ndmns)))) { - rc = OPAL_ERR_OUT_OF_RESOURCE; + coll->buffers = (opal_buffer_t **) calloc (log2 (coll->ndmns), sizeof (coll->buffers[0])); + if (NULL == coll->buffers) { OBJ_RELEASE(sig); - ORTE_ERROR_LOG(rc); - rcd_finalize_coll(coll, rc); + ORTE_ERROR_LOG(OPAL_ERR_OUT_OF_RESOURCE); + rcd_finalize_coll(coll, OPAL_ERR_OUT_OF_RESOURCE); return; } } - distance_index = log2(distance); - if (NULL == (coll->buffers[distance_index] = OBJ_NEW(opal_buffer_t))) { - rc = OPAL_ERR_OUT_OF_RESOURCE; + if (NULL == (coll->buffers[distance] = OBJ_NEW(opal_buffer_t))) { OBJ_RELEASE(sig); - ORTE_ERROR_LOG(rc); - rcd_finalize_coll(coll, rc); + ORTE_ERROR_LOG(OPAL_ERR_OUT_OF_RESOURCE); + rcd_finalize_coll(coll, OPAL_ERR_OUT_OF_RESOURCE); return; } - if (OPAL_SUCCESS != (rc = opal_dss.copy_payload(coll->buffers[distance_index], buffer))) { + if (OPAL_SUCCESS != (rc = opal_dss.copy_payload(coll->buffers[distance], buffer))) { OBJ_RELEASE(sig); ORTE_ERROR_LOG(rc); rcd_finalize_coll(coll, rc); @@ -306,57 +307,21 @@ static void rcd_allgather_recv_dist(int status, orte_process_name_t* sender, } OBJ_RELEASE(sig); - - return; } -static int rcd_finalize_coll(orte_grpcomm_coll_t *coll, int ret) { - opal_buffer_t *reply; - int rc; - orte_job_t *jdata; - uint64_t nprocs; - - OPAL_OUTPUT_VERBOSE((5, orte_grpcomm_base_framework.framework_output, +static int rcd_finalize_coll(orte_grpcomm_coll_t *coll, int ret) +{ + OPAL_OUTPUT_VERBOSE((5, orte_grpcomm_base_framework.framework_output, "%s grpcomm:coll:recdub declared collective complete", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME))); - /* pack the number of procs involved in the collective - * so the recipients can unpack any collected data */ - if (1 == coll->sig->sz) { - /* get the job object for this entry */ - if (NULL == (jdata = orte_get_job_data_object(coll->sig->signature[0].jobid))) { - ORTE_ERROR_LOG(ORTE_ERROR); - return ORTE_ERROR; - } - nprocs = jdata->num_procs; - } else { - nprocs = coll->sig->sz; - } - - reply = OBJ_NEW(opal_buffer_t); - if (NULL == reply) { - ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); - return ORTE_ERR_OUT_OF_RESOURCE; - } - - if (OPAL_SUCCESS != (rc = opal_dss.pack(reply, &nprocs, 1, OPAL_UINT64))) { - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(reply); - return rc; - } - - /* transfer the collected bucket */ - opal_dss.copy_payload(reply, &coll->bucket); - /* execute the callback */ if (NULL != coll->cbfunc) { - coll->cbfunc(ret, reply, coll->cbdata); + coll->cbfunc(ret, &coll->bucket, coll->cbdata); } opal_list_remove_item(&orte_grpcomm_base.ongoing, &coll->super); - OBJ_RELEASE(reply); - OBJ_RELEASE(coll); return ORTE_SUCCESS; diff --git a/orte/mca/grpcomm/rcd/grpcomm_rcd.h b/orte/mca/grpcomm/rcd/grpcomm_rcd.h index 0d11cb22c37..98da4d99a0b 100644 --- a/orte/mca/grpcomm/rcd/grpcomm_rcd.h +++ b/orte/mca/grpcomm/rcd/grpcomm_rcd.h @@ -1,11 +1,11 @@ /* -*- C -*- - * + * * Copyright (c) 2011 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2014 Intel, Inc. All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ * */ diff --git a/orte/mca/iof/Makefile.am b/orte/mca/iof/Makefile.am index e1025433dce..2c31d1a7d10 100644 --- a/orte/mca/iof/Makefile.am +++ b/orte/mca/iof/Makefile.am @@ -5,15 +5,15 @@ # Copyright (c) 2004-2005 The University of Tennessee and The University # of Tennessee Research Foundation. All rights # reserved. -# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, # University of Stuttgart. All rights reserved. # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. # Copyright (c) 2008-2010 Cisco Systems, Inc. All rights reserved. # $COPYRIGHT$ -# +# # Additional copyrights may follow -# +# # $HEADER$ # diff --git a/orte/mca/iof/README.txt b/orte/mca/iof/README.txt index 1fcd8292b76..85ae4d1cfe6 100644 --- a/orte/mca/iof/README.txt +++ b/orte/mca/iof/README.txt @@ -26,7 +26,7 @@ After the job starts up, this is how things look on the HNP side of things. ENDPOINTS (orte_iof_base_endpoint_t) mode origin tag fd seq ack src_frags sink_frags notes -============================================================================================= +============================================================================================= 1 0,0,0 1 1 0 0 0 0 pull() call from rmgr<- ============================================================================================= | 1 0,0,0 2 2 0 0 0 0 pull() call from rmgr<-|--- @@ -69,9 +69,9 @@ struct orte_iof_svc_fwd_t { }; -Note: This first subscriber says that it will receive from any process +Note: This first subscriber says that it will receive from any process in the job. Note that the jobid=1 and the mask=2. So, we expect this -to collect the stdout from any of the ranks. Obviously the second +to collect the stdout from any of the ranks. Obviously the second subscriber says the same thing but for stderr. The third subscriber is for receving data from stdin and sending it out to rank 0 of the job. Notice the mask=ff which means compare jobid,vpid @@ -88,18 +88,18 @@ tied to the subscription. Hmmm, this I do not really understand. APPENDIX A -These are the defines that go with the mask. +These are the defines that go with the mask. #define ORTE_NS_CMP_NONE 0x00 #define ORTE_NS_CMP_JOBID 0x02 #define ORTE_NS_CMP_VPID 0x04 #define ORTE_NS_CMP_ALL 0Xff -When we get a HDR_MSG, we call orte_iof_svc_proxy_msg() +When we get a HDR_MSG, we call orte_iof_svc_proxy_msg() APPENDIX B -There are two dbx files that help get to where we want to get -for seeing how things work. +There are two dbx files that help get to where we want to get +for seeing how things work. start.x : Run this first to get initial breakpoint. Needs this so we can set additional breakpoints. This also has some very helpful aliases for looking at the structures shown above. diff --git a/orte/mca/iof/base/Makefile.am b/orte/mca/iof/base/Makefile.am index e06d883babc..0d08004c774 100644 --- a/orte/mca/iof/base/Makefile.am +++ b/orte/mca/iof/base/Makefile.am @@ -5,15 +5,15 @@ # Copyright (c) 2004-2005 The University of Tennessee and The University # of Tennessee Research Foundation. All rights # reserved. -# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, # University of Stuttgart. All rights reserved. # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. # Copyright (c) 2013 Los Alamos National Security, LLC. All rights reserved. # $COPYRIGHT$ -# +# # Additional copyrights may follow -# +# # $HEADER$ # diff --git a/orte/mca/iof/base/base.h b/orte/mca/iof/base/base.h index 013900a2a33..676a57a2592 100644 --- a/orte/mca/iof/base/base.h +++ b/orte/mca/iof/base/base.h @@ -5,7 +5,7 @@ * Copyright (c) 2004-2011 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. @@ -14,9 +14,9 @@ * All rights reserved. * Copyright (c) 2015 Intel, Inc. All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ /** @@ -24,7 +24,7 @@ * * I/O Forwarding Service */ - + #ifndef MCA_IOF_BASE_H #define MCA_IOF_BASE_H @@ -41,9 +41,7 @@ #ifdef HAVE_UNISTD_H #include #endif -#ifdef HAVE_SIGNAL_H #include -#endif #include "opal/class/opal_list.h" #include "opal/class/opal_bitmap.h" @@ -73,7 +71,7 @@ typedef struct { ORTE_DECLSPEC OBJ_CLASS_DECLARATION(orte_iof_job_t); /* - * Maximum size of single msg + * Maximum size of single msg */ #define ORTE_IOF_BASE_MSG_MAX 4096 #define ORTE_IOF_BASE_TAG_MAX 50 @@ -133,6 +131,7 @@ struct orte_iof_base_t { char *input_files; orte_iof_sink_t *iof_write_stdout; orte_iof_sink_t *iof_write_stderr; + bool redirect_app_stderr_to_stdout; }; typedef struct orte_iof_base_t orte_iof_base_t; diff --git a/orte/mca/iof/base/iof_base_frame.c b/orte/mca/iof/base/iof_base_frame.c index 51e8b3a52af..439c7a451d3 100644 --- a/orte/mca/iof/base/iof_base_frame.c +++ b/orte/mca/iof/base/iof_base_frame.c @@ -5,7 +5,7 @@ * Copyright (c) 2004-2011 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. @@ -14,10 +14,11 @@ * Copyright (c) 2015 Intel, Inc. All rights reserved. * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. + * Copyright (c) 2017 IBM Corporation. All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -25,9 +26,7 @@ #include "orte_config.h" #include "orte/constants.h" -#ifdef HAVE_STRING_H #include -#endif #include #include "orte/mca/mca.h" @@ -162,7 +161,7 @@ static void orte_iof_base_write_event_destruct(orte_iof_write_event_t* wev) return; } } - + if (2 < wev->fd) { OPAL_OUTPUT_VERBOSE((20, orte_iof_base_framework.framework_output, "%s iof: closing fd %d for write event", @@ -206,6 +205,15 @@ static int orte_iof_base_register(mca_base_register_flag_t flags) MCA_BASE_VAR_SCOPE_READONLY, &orte_iof_base.input_files); + /* Redirect application stderr to stdout (at source) */ + orte_iof_base.redirect_app_stderr_to_stdout = false; + (void) mca_base_var_register("orte", "iof","base", "redirect_app_stderr_to_stdout", + "Redirect application stderr to stdout at source (default: false)", + MCA_BASE_VAR_TYPE_BOOL, NULL, 0, 0, + OPAL_INFO_LVL_9, + MCA_BASE_VAR_SCOPE_READONLY, + &orte_iof_base.redirect_app_stderr_to_stdout); + return ORTE_SUCCESS; } @@ -275,7 +283,7 @@ static int orte_iof_base_open(mca_base_open_flag_t flags) ORTE_IOF_SINK_DEFINE(&orte_iof_base.iof_write_stderr, ORTE_PROC_MY_NAME, 2, ORTE_IOF_STDERR, orte_iof_base_write_handler, NULL); } - + /* do NOT set these file descriptors to non-blocking. If we do so, * we set the file descriptor to non-blocking for everyone that has * that file descriptor, which includes everyone else in our shell @@ -284,8 +292,8 @@ static int orte_iof_base_open(mca_base_open_flag_t flags) * This causes things like "mpirun -np 1 big_app | cat" to lose * output, because cat's stdout is then ALSO non-blocking and cat * isn't built to deal with that case (same with almost all other - * unix text utils). - */ + * unix text utils). + */ } /* Open up all available components */ diff --git a/orte/mca/iof/base/iof_base_output.c b/orte/mca/iof/base/iof_base_output.c index 8d1b9eddcaf..26ed8450201 100644 --- a/orte/mca/iof/base/iof_base_output.c +++ b/orte/mca/iof/base/iof_base_output.c @@ -5,15 +5,15 @@ * Copyright (c) 2004-2006 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2008 Cisco Systems, Inc. All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ * * These symbols are in a file by themselves to provide nice linker @@ -26,16 +26,12 @@ #include "orte_config.h" #include "orte/constants.h" -#ifdef HAVE_STRING_H #include -#endif #include #ifdef HAVE_UNISTD_H #include #endif -#ifdef HAVE_TIME_H #include -#endif #include #include "opal/util/output.h" @@ -61,12 +57,12 @@ int orte_iof_base_write_output(orte_process_name_t *name, orte_iof_tag_t stream, "%s write:output setting up to write %d bytes to %s for %s on fd %d", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), numbytes, (ORTE_IOF_STDIN & stream) ? "stdin" : ((ORTE_IOF_STDOUT & stream) ? "stdout" : ((ORTE_IOF_STDERR & stream) ? "stderr" : "stddiag")), - ORTE_NAME_PRINT(name), + ORTE_NAME_PRINT(name), (NULL == channel) ? -1 : channel->fd)); /* setup output object */ output = OBJ_NEW(orte_iof_write_output_t); - + /* write output data to the corresponding tag */ if (ORTE_IOF_STDIN & stream) { /* copy over the data to be written */ @@ -104,7 +100,7 @@ int orte_iof_base_write_output(orte_process_name_t *name, orte_iof_tag_t stream, snprintf(endtag, ORTE_IOF_BASE_TAG_MAX, "", suffix); goto construct; } - + /* if we are to timestamp output, start the tag with that */ if (orte_timestamp_output) { time_t mytime; @@ -113,7 +109,7 @@ int orte_iof_base_write_output(orte_process_name_t *name, orte_iof_tag_t stream, time(&mytime); cptr = ctime(&mytime); cptr[strlen(cptr)-1] = '\0'; /* remove trailing newline */ - + if (orte_tag_output) { /* if we want it tagged as well, use both */ snprintf(starttag, ORTE_IOF_BASE_TAG_MAX, "%s[%s,%s]<%s>:", @@ -127,7 +123,7 @@ int orte_iof_base_write_output(orte_process_name_t *name, orte_iof_tag_t stream, memset(endtag, '\0', ORTE_IOF_BASE_TAG_MAX); goto construct; } - + if (orte_tag_output) { snprintf(starttag, ORTE_IOF_BASE_TAG_MAX, "[%s,%s]<%s>:", ORTE_LOCAL_JOBID_PRINT(name->jobid), @@ -136,7 +132,7 @@ int orte_iof_base_write_output(orte_process_name_t *name, orte_iof_tag_t stream, memset(endtag, '\0', ORTE_IOF_BASE_TAG_MAX); goto construct; } - + /* if we get here, then the data is not to be tagged - just copy it * and move on to processing */ @@ -157,7 +153,7 @@ int orte_iof_base_write_output(orte_process_name_t *name, orte_iof_tag_t stream, /* start with the tag */ for (j=0, k=0; j < starttaglen && k < ORTE_IOF_BASE_TAGGED_OUT_MAX; j++) { output->data[k++] = starttag[j]; - } + } /* cycle through the data looking for * and replace those with the tag */ @@ -251,14 +247,14 @@ int orte_iof_base_write_output(orte_process_name_t *name, orte_iof_tag_t stream, output->data[k] = '\n'; } output->numbytes = k; - + process: /* add this data to the write list for this fd */ opal_list_append(&channel->outputs, &output->super); /* record how big the buffer is */ num_buffered = opal_list_get_size(&channel->outputs); - + /* is the write event issued? */ if (!channel->pending) { /* issue it */ @@ -268,7 +264,7 @@ int orte_iof_base_write_output(orte_process_name_t *name, orte_iof_tag_t stream, opal_event_add(channel->ev, 0); channel->pending = true; } - + return num_buffered; } @@ -279,7 +275,7 @@ void orte_iof_base_write_handler(int fd, short event, void *cbdata) opal_list_item_t *item; orte_iof_write_output_t *output; int num_written; - + OPAL_OUTPUT_VERBOSE((1, orte_iof_base_framework.framework_output, "%s write:handler writing data to %d", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), diff --git a/orte/mca/iof/base/iof_base_select.c b/orte/mca/iof/base/iof_base_select.c index f33e423da99..3cfbc883d05 100644 --- a/orte/mca/iof/base/iof_base_select.c +++ b/orte/mca/iof/base/iof_base_select.c @@ -1,3 +1,4 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ /* * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana * University Research and Technology @@ -5,14 +6,16 @@ * Copyright (c) 2004-2007 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. + * Copyright (c) 2015 Los Alamos National Security, LLC. All rights + * reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -30,7 +33,7 @@ /** * Call the query function on all available components to find out if - * they want to run. Select the single component with the highest + * they want to run. Select the single component with the highest * priority. */ int orte_iof_base_select(void) @@ -38,14 +41,14 @@ int orte_iof_base_select(void) orte_iof_base_component_t *best_component = NULL; orte_iof_base_module_t *best_module = NULL; int rc; - + /* * Select the best component */ if( OPAL_SUCCESS != mca_base_select("iof", orte_iof_base_framework.framework_output, &orte_iof_base_framework.framework_components, (mca_base_module_t **) &best_module, - (mca_base_component_t **) &best_component) ) { + (mca_base_component_t **) &best_component, NULL) ) { /* it is okay to not find a module if we are a CM process */ if (ORTE_PROC_IS_CM) { return ORTE_SUCCESS; @@ -53,7 +56,7 @@ int orte_iof_base_select(void) /* otherwise, this is a problem */ return ORTE_ERR_NOT_FOUND; } - + /* Save the winner */ orte_iof = *best_module; /* init it */ diff --git a/orte/mca/iof/base/iof_base_setup.c b/orte/mca/iof/base/iof_base_setup.c index d12410dceb6..88b4cd87299 100644 --- a/orte/mca/iof/base/iof_base_setup.c +++ b/orte/mca/iof/base/iof_base_setup.c @@ -5,15 +5,16 @@ * Copyright (c) 2004-2008 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2008 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2017 IBM Corporation. All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ * * These symbols are in a file by themselves to provide nice linker @@ -58,12 +59,14 @@ #include "opal/util/opal_pty.h" #include "opal/util/opal_environ.h" #include "opal/util/output.h" +#include "opal/util/argv.h" #include "orte/mca/errmgr/errmgr.h" #include "orte/util/name_fns.h" #include "orte/runtime/orte_globals.h" #include "orte/mca/iof/iof.h" +#include "orte/mca/iof/base/base.h" #include "orte/mca/iof/base/iof_base_setup.h" int @@ -141,7 +144,7 @@ orte_iof_base_setup_child(orte_iof_base_io_conf_t *opts, char ***env) #ifdef OCRNL /* OS X 10.3 does not have this value defined */ - OCRNL | + OCRNL | #endif ONLCR); if (tcsetattr(opts->p_stdout[1], TCSANOW, &term_attrs) == -1) { @@ -149,19 +152,27 @@ orte_iof_base_setup_child(orte_iof_base_io_conf_t *opts, char ***env) } ret = dup2(opts->p_stdout[1], fileno(stdout)); if (ret < 0) return ORTE_ERR_PIPE_SETUP_FAILURE; + if( orte_iof_base.redirect_app_stderr_to_stdout ) { + ret = dup2(opts->p_stdout[1], fileno(stderr)); + if (ret < 0) return ORTE_ERR_PIPE_SETUP_FAILURE; + } close(opts->p_stdout[1]); } else { if(opts->p_stdout[1] != fileno(stdout)) { ret = dup2(opts->p_stdout[1], fileno(stdout)); if (ret < 0) return ORTE_ERR_PIPE_SETUP_FAILURE; - close(opts->p_stdout[1]); + if( orte_iof_base.redirect_app_stderr_to_stdout ) { + ret = dup2(opts->p_stdout[1], fileno(stderr)); + if (ret < 0) return ORTE_ERR_PIPE_SETUP_FAILURE; + } + close(opts->p_stdout[1]); } } if (opts->connect_stdin) { if(opts->p_stdin[0] != fileno(stdin)) { ret = dup2(opts->p_stdin[0], fileno(stdin)); if (ret < 0) return ORTE_ERR_PIPE_SETUP_FAILURE; - close(opts->p_stdin[0]); + close(opts->p_stdin[0]); } } else { int fd; @@ -174,13 +185,16 @@ orte_iof_base_setup_child(orte_iof_base_io_conf_t *opts, char ***env) close(fd); } } + if(opts->p_stderr[1] != fileno(stderr)) { - ret = dup2(opts->p_stderr[1], fileno(stderr)); - if (ret < 0) return ORTE_ERR_PIPE_SETUP_FAILURE; + if( !orte_iof_base.redirect_app_stderr_to_stdout ) { + ret = dup2(opts->p_stderr[1], fileno(stderr)); + if (ret < 0) return ORTE_ERR_PIPE_SETUP_FAILURE; + } close(opts->p_stderr[1]); } - if (!orte_map_stddiag_to_stderr) { + if (!orte_map_stddiag_to_stderr && !orte_map_stddiag_to_stdout ) { /* Set an environment variable that the new child process can use to get the fd of the pipe connected to the INTERNAL IOF tag. */ asprintf(&str, "%d", opts->p_internal[1]); @@ -189,6 +203,9 @@ orte_iof_base_setup_child(orte_iof_base_io_conf_t *opts, char ***env) free(str); } } + else if( orte_map_stddiag_to_stdout ) { + opal_setenv("OPAL_OUTPUT_INTERNAL_TO_STDOUT", "1", true, env); + } return ORTE_SUCCESS; } diff --git a/orte/mca/iof/base/iof_base_setup.h b/orte/mca/iof/base/iof_base_setup.h index 44982a4a10b..bb74bf1e802 100644 --- a/orte/mca/iof/base/iof_base_setup.h +++ b/orte/mca/iof/base/iof_base_setup.h @@ -5,15 +5,15 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2008 Cisco Systems, Inc. All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ * */ diff --git a/orte/mca/iof/hnp/Makefile.am b/orte/mca/iof/hnp/Makefile.am index 0b50b751c70..219ecb7a1f0 100644 --- a/orte/mca/iof/hnp/Makefile.am +++ b/orte/mca/iof/hnp/Makefile.am @@ -5,15 +5,15 @@ # Copyright (c) 2004-2005 The University of Tennessee and The University # of Tennessee Research Foundation. All rights # reserved. -# Copyright (c) 2004-2009 High Performance Computing Center Stuttgart, +# Copyright (c) 2004-2009 High Performance Computing Center Stuttgart, # University of Stuttgart. All rights reserved. # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. # Copyright (c) 2010 Cisco Systems, Inc. All rights reserved. # $COPYRIGHT$ -# +# # Additional copyrights may follow -# +# # $HEADER$ # diff --git a/orte/mca/iof/hnp/iof_hnp.c b/orte/mca/iof/hnp/iof_hnp.c index 4d771e13c03..512fcdbe271 100644 --- a/orte/mca/iof/hnp/iof_hnp.c +++ b/orte/mca/iof/hnp/iof_hnp.c @@ -5,20 +5,20 @@ * Copyright (c) 2004-2011 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2007 Sun Microsystems, Inc. All rights reserved. * Copyright (c) 2007 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2011-2013 Los Alamos National Security, LLC. All rights - * reserved. + * Copyright (c) 2011-2015 Los Alamos National Security, LLC. All rights + * reserved. * Copyright (c) 2014 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ #include "orte_config.h" @@ -29,9 +29,7 @@ #ifdef HAVE_UNISTD_H #include #endif /* HAVE_UNISTD_H */ -#ifdef HAVE_STRING_H #include -#endif /* HAVE_STRING_H */ #ifdef HAVE_FCNTL_H #include @@ -73,8 +71,6 @@ static int hnp_close(const orte_process_name_t* peer, static int finalize(void); -static int hnp_ft_event(int state); - /* The API's in this module are solely used to support LOCAL * procs - i.e., procs that are co-located to the HNP. Remote * procs interact with the HNP's IOF via the HNP's receive function, @@ -88,33 +84,33 @@ orte_iof_base_module_t orte_iof_hnp_module = { hnp_close, NULL, finalize, - hnp_ft_event + NULL }; /* Initialize the module */ static int init(void) { int rc; - + /* post non-blocking recv to catch forwarded IO from * the orteds - */ + */ orte_rml.recv_buffer_nb(ORTE_NAME_WILDCARD, ORTE_RML_TAG_IOF_HNP, ORTE_RML_PERSISTENT, orte_iof_hnp_recv, NULL); - + if (ORTE_SUCCESS != (rc = orte_rml.add_exception_handler(orte_iof_hnp_exception_handler))) { ORTE_ERROR_LOG(rc); orte_rml.recv_cancel(ORTE_NAME_WILDCARD, ORTE_RML_TAG_IOF_HNP); return rc; } - + OBJ_CONSTRUCT(&mca_iof_hnp_component.sinks, opal_list_t); OBJ_CONSTRUCT(&mca_iof_hnp_component.procs, opal_list_t); mca_iof_hnp_component.stdinev = NULL; - + return ORTE_SUCCESS; } @@ -151,18 +147,18 @@ static int hnp_push(const orte_process_name_t* dst_name, orte_iof_tag_t src_tag, if (ORTE_VPID_INVALID == dst_name->vpid || fd < 0) { return ORTE_SUCCESS; } - + OPAL_OUTPUT_VERBOSE((1, orte_iof_base_framework.framework_output, "%s iof:hnp pushing fd %d for process %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), fd, ORTE_NAME_PRINT(dst_name))); - + if (!(src_tag & ORTE_IOF_STDIN)) { /* set the file descriptor to non-blocking - do this before we setup * and activate the read event in case it fires right away */ if((flags = fcntl(fd, F_GETFL, 0)) < 0) { - opal_output(orte_iof_base_framework.framework_output, "[%s:%d]: fcntl(F_GETFL) failed with errno=%d\n", + opal_output(orte_iof_base_framework.framework_output, "[%s:%d]: fcntl(F_GETFL) failed with errno=%d\n", __FILE__, __LINE__, errno); } else { flags |= O_NONBLOCK; @@ -215,7 +211,7 @@ static int hnp_push(const orte_process_name_t* dst_name, orte_iof_tag_t src_tag, orte_iof_base_write_handler, &mca_iof_hnp_component.sinks); } - + SETUP: /* define a read event and activate it */ if (src_tag & ORTE_IOF_STDOUT) { @@ -273,7 +269,7 @@ static int hnp_push(const orte_process_name_t* dst_name, orte_iof_tag_t src_tag, sink->daemon.vpid = proc->node->daemon->name.vpid; } } - + /* now setup the read - but check to only do this once */ if (NULL == mca_iof_hnp_component.stdinev) { /* Since we are the HNP, we don't want to set nonblocking on our @@ -284,16 +280,16 @@ static int hnp_push(const orte_process_name_t* dst_name, orte_iof_tag_t src_tag, * This causes things like "mpirun -np 1 big_app | cat" to lose * output, because cat's stdout is then ALSO non-blocking and cat * isn't built to deal with that case (same with almost all other - * unix text utils). + * unix text utils). */ if (0 != fd) { if((flags = fcntl(fd, F_GETFL, 0)) < 0) { - opal_output(orte_iof_base_framework.framework_output, "[%s:%d]: fcntl(F_GETFL) failed with errno=%d\n", + opal_output(orte_iof_base_framework.framework_output, "[%s:%d]: fcntl(F_GETFL) failed with errno=%d\n", __FILE__, __LINE__, errno); } else { flags |= O_NONBLOCK; fcntl(fd, F_SETFL, flags); - } + } } if (isatty(fd)) { /* We should avoid trying to read from stdin if we @@ -306,7 +302,7 @@ static int hnp_push(const orte_process_name_t* dst_name, orte_iof_tag_t src_tag, opal_event_signal_set(orte_event_base, &mca_iof_hnp_component.stdinsig, SIGCONT, orte_iof_hnp_stdin_cb, NULL); - + /* setup a read event to read stdin, but don't activate it yet. The * dst_name indicates who should receive the stdin. If that recipient * doesn't do a corresponding pull, however, then the stdin will @@ -315,7 +311,7 @@ static int hnp_push(const orte_process_name_t* dst_name, orte_iof_tag_t src_tag, ORTE_IOF_READ_EVENT(&mca_iof_hnp_component.stdinev, dst_name, fd, ORTE_IOF_STDIN, orte_iof_hnp_read_local_handler, false); - + /* check to see if we want the stdin read event to be * active - we will always at least define the event, * but may delay its activation @@ -348,28 +344,28 @@ static int hnp_pull(const orte_process_name_t* dst_name, { orte_iof_sink_t *sink; int flags; - + /* this is a local call - only stdin is supported */ if (ORTE_IOF_STDIN != src_tag) { return ORTE_ERR_NOT_SUPPORTED; } - + OPAL_OUTPUT_VERBOSE((1, orte_iof_base_framework.framework_output, "%s iof:hnp pulling fd %d for process %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), fd, ORTE_NAME_PRINT(dst_name))); - + /* set the file descriptor to non-blocking - do this before we setup * the sink in case it fires right away */ if((flags = fcntl(fd, F_GETFL, 0)) < 0) { - opal_output(orte_iof_base_framework.framework_output, "[%s:%d]: fcntl(F_GETFL) failed with errno=%d\n", + opal_output(orte_iof_base_framework.framework_output, "[%s:%d]: fcntl(F_GETFL) failed with errno=%d\n", __FILE__, __LINE__, errno); } else { flags |= O_NONBLOCK; fcntl(fd, F_SETFL, flags); } - + ORTE_IOF_SINK_DEFINE(&sink, dst_name, fd, ORTE_IOF_STDIN, stdin_write_handler, &mca_iof_hnp_component.sinks); @@ -389,7 +385,7 @@ static int hnp_close(const orte_process_name_t* peer, opal_list_item_t *item, *next_item; orte_iof_sink_t* sink; orte_ns_cmp_bitmask_t mask; - + for(item = opal_list_get_first(&mca_iof_hnp_component.sinks); item != opal_list_get_end(&mca_iof_hnp_component.sinks); item = next_item ) { @@ -397,10 +393,10 @@ static int hnp_close(const orte_process_name_t* peer, next_item = opal_list_get_next(item); mask = ORTE_NS_CMP_ALL; - + if (OPAL_EQUAL == orte_util_compare_name_fields(mask, &sink->name, peer) && (source_tag & sink->tag)) { - + /* No need to delete the event or close the file * descriptor - the destructor will automatically * do it for us. @@ -420,7 +416,7 @@ static int finalize(void) orte_iof_write_event_t *wev; int num_written; bool dump; - + /* check if anything is still trying to be written out */ wev = orte_iof_base.iof_write_stdout->wev; if (!opal_list_is_empty(&wev->outputs)) { @@ -457,20 +453,12 @@ static int finalize(void) } } } - - orte_rml.recv_cancel(ORTE_NAME_WILDCARD, ORTE_RML_TAG_IOF_HNP); - return ORTE_SUCCESS; -} + orte_rml.recv_cancel(ORTE_NAME_WILDCARD, ORTE_RML_TAG_IOF_HNP); -int hnp_ft_event(int state) { - /* - * Replica doesn't need to do anything for a checkpoint - */ return ORTE_SUCCESS; } - /* this function is called by the event library and thus * can access information global to the state machine */ @@ -481,14 +469,14 @@ static void stdin_write_handler(int fd, short event, void *cbdata) opal_list_item_t *item; orte_iof_write_output_t *output; int num_written; - + OPAL_OUTPUT_VERBOSE((1, orte_iof_base_framework.framework_output, "%s hnp:stdin:write:handler writing data to %d", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), wev->fd)); - + wev->pending = false; - + while (NULL != (item = opal_list_remove_first(&wev->outputs))) { output = (orte_iof_write_output_t*)item; /* if an abnormal termination has occurred, just dump @@ -527,7 +515,7 @@ static void stdin_write_handler(int fd, short event, void *cbdata) wev->pending = true; opal_event_add(wev->ev, 0); goto CHECK; - } + } /* otherwise, something bad happened so all we can do is declare an * error and abort */ @@ -547,7 +535,7 @@ static void stdin_write_handler(int fd, short event, void *cbdata) /* push this item back on the front of the list */ opal_list_prepend(&wev->outputs, item); /* leave the write event running so it will call us again - * when the fd is ready. + * when the fd is ready. */ wev->pending = true; opal_event_add(wev->ev, 0); @@ -591,10 +579,10 @@ orte_iof_hnp_exception_handler(orte_process_name_t* peer, orte_rml_exception_t r { #if 0 orte_iof_base_endpoint_t *endpoint; - opal_output_verbose(1, orte_iof_base_framework.framework_output, + opal_output_verbose(1, orte_iof_base_framework.framework_output, "iof svc exception handler! %s\n", ORTE_NAME_PRINT((orte_process_name_t*)peer)); - + /* If we detect an exception on the RML connection to a peer, delete all of its subscriptions and publications. Note that exceptions can be detected during a normal RML shutdown; they @@ -602,13 +590,13 @@ orte_iof_hnp_exception_handler(orte_process_name_t* peer, orte_rml_exception_t r orte_iof_hnp_sub_delete_all(peer); orte_iof_hnp_pub_delete_all(peer); opal_output_verbose(1, orte_iof_base_framework.framework_output, "deleted all pubs and subs\n"); - + /* Find any streams on any endpoints for this peer and close them */ - while (NULL != + while (NULL != (endpoint = orte_iof_base_endpoint_match(peer, ORTE_NS_CMP_ALL, ORTE_IOF_ANY))) { orte_iof_base_endpoint_closed(endpoint); - + /* Delete the endpoint that we just matched */ orte_iof_base_endpoint_delete(peer, ORTE_NS_CMP_ALL, ORTE_IOF_ANY); } diff --git a/orte/mca/iof/hnp/iof_hnp.h b/orte/mca/iof/hnp/iof_hnp.h index a35f32d9d65..86ed651808c 100644 --- a/orte/mca/iof/hnp/iof_hnp.h +++ b/orte/mca/iof/hnp/iof_hnp.h @@ -5,15 +5,15 @@ * Copyright (c) 2004-2006 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2007 Cisco Systems, Inc. All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ /** @@ -56,9 +56,9 @@ BEGIN_C_DECLS /** - * IOF HNP Component + * IOF HNP Component */ -struct orte_iof_hnp_component_t { +struct orte_iof_hnp_component_t { orte_iof_base_component_t super; opal_list_t sinks; opal_list_t procs; @@ -84,5 +84,5 @@ int orte_iof_hnp_send_data_to_endpoint(orte_process_name_t *host, unsigned char *data, int numbytes); END_C_DECLS - + #endif diff --git a/orte/mca/iof/hnp/iof_hnp_component.c b/orte/mca/iof/hnp/iof_hnp_component.c index 71f4592200c..84787f7d3e3 100644 --- a/orte/mca/iof/hnp/iof_hnp_component.c +++ b/orte/mca/iof/hnp/iof_hnp_component.c @@ -6,7 +6,7 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. @@ -14,9 +14,9 @@ * Copyright (c) 2015 Los Alamos National Security, LLC. All rights * reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -48,7 +48,7 @@ orte_iof_hnp_component_t mca_iof_hnp_component = { { /* First, the mca_base_component_t struct containing meta information about the component itself */ - + .iof_version = { ORTE_IOF_BASE_VERSION_2_0_0, @@ -95,9 +95,9 @@ static int orte_iof_hnp_query(mca_base_module_t **module, int *priority) *module = NULL; return ORTE_ERROR; } - + *priority = 100; *module = (mca_base_module_t *) &orte_iof_hnp_module; - + return ORTE_SUCCESS; } diff --git a/orte/mca/iof/hnp/iof_hnp_read.c b/orte/mca/iof/hnp/iof_hnp_read.c index 2660a5640f6..9027d72ab60 100644 --- a/orte/mca/iof/hnp/iof_hnp_read.c +++ b/orte/mca/iof/hnp/iof_hnp_read.c @@ -5,18 +5,18 @@ * Copyright (c) 2004-2011 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2007-2012 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2011-2013 Los Alamos National Security, LLC. All rights - * reserved. + * reserved. * Copyright (c) 2014-2015 Intel Corporation. All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -27,9 +27,7 @@ #ifdef HAVE_UNISTD_H #include #endif /* HAVE_UNISTD_H */ -#ifdef HAVE_STRING_H #include -#endif /* HAVE_STRING_H */ #include "opal/dss/dss.h" @@ -77,7 +75,7 @@ bool orte_iof_hnp_stdin_check(int fd) void orte_iof_hnp_stdin_cb(int fd, short event, void *cbdata) { bool should_process = orte_iof_hnp_stdin_check(0); - + if (should_process) { mca_iof_hnp_component.stdinev->active = true; opal_event_add(mca_iof_hnp_component.stdinev->ev, 0); @@ -100,18 +98,18 @@ void orte_iof_hnp_read_local_handler(int fd, short event, void *cbdata) int rc; orte_ns_cmp_bitmask_t mask; bool exclusive; - + /* read up to the fragment size */ numbytes = read(fd, data, sizeof(data)); - + if (numbytes < 0) { /* either we have a connection error or it was a non-blocking read */ - + /* non-blocking, retry */ if (EAGAIN == errno || EINTR == errno) { opal_event_add(rev->ev, 0); return; - } + } OPAL_OUTPUT_VERBOSE((1, orte_iof_base_framework.framework_output, "%s iof:hnp:read handler %s Error on connection:%d", @@ -123,13 +121,13 @@ void orte_iof_hnp_read_local_handler(int fd, short event, void *cbdata) */ numbytes = 0; } - + /* is this read from our stdin? */ if (ORTE_IOF_STDIN & rev->tag) { /* The event has fired, so it's no longer active until we re-add it */ mca_iof_hnp_component.stdinev->active = false; - + /* if job termination has been ordered, just ignore the * data and delete the read event */ @@ -142,12 +140,12 @@ void orte_iof_hnp_read_local_handler(int fd, short event, void *cbdata) item != opal_list_get_end(&mca_iof_hnp_component.sinks); item = opal_list_get_next(item)) { orte_iof_sink_t* sink = (orte_iof_sink_t*)item; - + /* only look at stdin sinks */ if (!(ORTE_IOF_STDIN & sink->tag)) { continue; } - + mask = ORTE_NS_CMP_ALL; /* if the daemon is me, then this is a local sink */ @@ -174,7 +172,7 @@ void orte_iof_hnp_read_local_handler(int fd, short event, void *cbdata) "%s sending %d bytes from stdin to daemon %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), numbytes, ORTE_NAME_PRINT(&sink->daemon))); - + /* send the data to the daemon so it can * write it to the proc's fd - in this case, * we pass sink->name to indicate who is to @@ -212,7 +210,7 @@ void orte_iof_hnp_read_local_handler(int fd, short event, void *cbdata) /* nothing more to do */ return; } - + /* this must be output from one of my local procs - see * if anyone else has requested a copy of this info */ @@ -250,7 +248,7 @@ void orte_iof_hnp_read_local_handler(int fd, short event, void *cbdata) ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), numbytes, (ORTE_IOF_STDOUT & rev->tag) ? "stdout" : ((ORTE_IOF_STDERR & rev->tag) ? "stderr" : "stddiag"), ORTE_NAME_PRINT(&rev->name))); - + if (0 == numbytes) { /* if we read 0 bytes from the stdout/err/diag, there is * nothing to output - find this proc on our list and @@ -323,7 +321,7 @@ void orte_iof_hnp_read_local_handler(int fd, short event, void *cbdata) } } } - + /* re-add the event */ opal_event_add(rev->ev, 0); diff --git a/orte/mca/iof/hnp/iof_hnp_receive.c b/orte/mca/iof/hnp/iof_hnp_receive.c index 6e93c0a9d2b..a250cb994cf 100644 --- a/orte/mca/iof/hnp/iof_hnp_receive.c +++ b/orte/mca/iof/hnp/iof_hnp_receive.c @@ -5,18 +5,18 @@ * Copyright (c) 2004-2011 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2007 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2011-2013 Los Alamos National Security, LLC. All rights - * reserved. + * reserved. * Copyright (c) 2014-2015 Intel Corporation. All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -27,9 +27,7 @@ #ifdef HAVE_UNISTD_H #include #endif /* HAVE_UNISTD_H */ -#ifdef HAVE_STRING_H #include -#endif /* HAVE_STRING_H */ #ifdef HAVE_FCNTL_H #include #else @@ -63,7 +61,7 @@ void orte_iof_hnp_recv(int status, orte_process_name_t* sender, opal_list_item_t *item, *next; int rc; bool exclusive; - + OPAL_OUTPUT_VERBOSE((1, orte_iof_base_framework.framework_output, "%s received IOF from proc %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), @@ -94,14 +92,14 @@ void orte_iof_hnp_recv(int status, orte_process_name_t* sender, } goto CLEAN_RETURN; } - + /* get name of the process whose io we are discussing */ count = 1; if (ORTE_SUCCESS != (rc = opal_dss.unpack(buffer, &origin, &count, ORTE_NAME))) { ORTE_ERROR_LOG(rc); goto CLEAN_RETURN; } - + OPAL_OUTPUT_VERBOSE((1, orte_iof_base_framework.framework_output, "%s received IOF cmd from sender %s for source %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), @@ -116,7 +114,7 @@ void orte_iof_hnp_recv(int status, orte_process_name_t* sender, ORTE_ERROR_LOG(rc); goto CLEAN_RETURN; } - + OPAL_OUTPUT_VERBOSE((1, orte_iof_base_framework.framework_output, "%s received pull cmd from remote tool %s for proc %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), @@ -154,7 +152,7 @@ void orte_iof_hnp_recv(int status, orte_process_name_t* sender, } goto CLEAN_RETURN; } - + if (ORTE_IOF_CLOSE & stream) { OPAL_OUTPUT_VERBOSE((1, orte_iof_base_framework.framework_output, "%s received close cmd from remote tool %s for proc %s", @@ -189,7 +187,7 @@ void orte_iof_hnp_recv(int status, orte_process_name_t* sender, } goto CLEAN_RETURN; } - + /* this must have come from a daemon forwarding output - unpack the data */ numbytes=ORTE_IOF_BASE_MSG_MAX; if (ORTE_SUCCESS != (rc = opal_dss.unpack(buffer, data, &numbytes, OPAL_BYTE))) { @@ -197,12 +195,12 @@ void orte_iof_hnp_recv(int status, orte_process_name_t* sender, goto CLEAN_RETURN; } /* numbytes will contain the actual #bytes that were sent */ - + OPAL_OUTPUT_VERBOSE((1, orte_iof_base_framework.framework_output, "%s unpacked %d bytes from remote proc %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), numbytes, ORTE_NAME_PRINT(&origin))); - + /* cycle through the endpoints to see if someone else wants a copy */ exclusive = false; for (item = opal_list_get_first(&mca_iof_hnp_component.sinks); @@ -225,7 +223,7 @@ void orte_iof_hnp_recv(int status, orte_process_name_t* sender, } } } - + /* output this to our local output unless one of the sinks was exclusive */ if (!exclusive) { if (ORTE_IOF_STDOUT & stream || orte_xml_output) { @@ -234,7 +232,7 @@ void orte_iof_hnp_recv(int status, orte_process_name_t* sender, orte_iof_base_write_output(&origin, stream, data, numbytes, orte_iof_base.iof_write_stderr->wev); } } - + CLEAN_RETURN: return; } diff --git a/orte/mca/iof/hnp/iof_hnp_send.c b/orte/mca/iof/hnp/iof_hnp_send.c index 4ecee93c0dc..eafaefce6de 100644 --- a/orte/mca/iof/hnp/iof_hnp_send.c +++ b/orte/mca/iof/hnp/iof_hnp_send.c @@ -5,7 +5,7 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. @@ -14,9 +14,9 @@ * All rights reserved * Copyright (c) 2014 Intel Corporation. All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -27,9 +27,7 @@ #ifdef HAVE_UNISTD_H #include #endif /* HAVE_UNISTD_H */ -#ifdef HAVE_STRING_H #include -#endif /* HAVE_STRING_H */ #include "opal/dss/dss.h" @@ -65,7 +63,7 @@ int orte_iof_hnp_send_data_to_endpoint(orte_process_name_t *host, } buf = OBJ_NEW(opal_buffer_t); - + /* pack the tag - we do this first so that flow control messages can * consist solely of the tag */ @@ -83,7 +81,7 @@ int orte_iof_hnp_send_data_to_endpoint(orte_process_name_t *host, OBJ_RELEASE(buf); return rc; } - + /* if data is NULL, then we are done */ if (NULL != data) { /* pack the data - if numbytes is zero, we will pack zero bytes */ @@ -93,7 +91,7 @@ int orte_iof_hnp_send_data_to_endpoint(orte_process_name_t *host, return rc; } } - + /* if the target is wildcard, then this needs to go to everyone - xcast it */ if (ORTE_PROC_MY_NAME->jobid == host->jobid && ORTE_VPID_WILDCARD == host->vpid) { @@ -107,7 +105,7 @@ int orte_iof_hnp_send_data_to_endpoint(orte_process_name_t *host, OBJ_RELEASE(sig); return ORTE_SUCCESS; } - + /* send the buffer to the host - this is either a daemon or * a tool that requested IOF */ diff --git a/orte/mca/iof/iof.h b/orte/mca/iof/iof.h index a9e86b96799..ac253da5603 100644 --- a/orte/mca/iof/iof.h +++ b/orte/mca/iof/iof.h @@ -6,7 +6,7 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. @@ -15,9 +15,9 @@ * reserved. * Copyright (c) 2014 Intel, Inc. All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ /** @@ -120,9 +120,6 @@ #include "orte/mca/mca.h" -#include "opal/mca/crs/crs.h" -#include "opal/mca/crs/base/base.h" - #include "orte/runtime/orte_globals.h" #include "iof_types.h" diff --git a/orte/mca/iof/iof_types.h b/orte/mca/iof/iof_types.h index 86eb524ecb3..db74fd8c3d5 100644 --- a/orte/mca/iof/iof_types.h +++ b/orte/mca/iof/iof_types.h @@ -5,15 +5,15 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2007-2008 Cisco Systems, Inc. All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ /** diff --git a/orte/mca/iof/mr_hnp/Makefile.am b/orte/mca/iof/mr_hnp/Makefile.am index a26b6e1443a..35e39c117b1 100644 --- a/orte/mca/iof/mr_hnp/Makefile.am +++ b/orte/mca/iof/mr_hnp/Makefile.am @@ -2,9 +2,9 @@ # Copyright (c) 2012 Los Alamos National Security, LLC. # All rights reserved. # $COPYRIGHT$ -# +# # Additional copyrights may follow -# +# # $HEADER$ # diff --git a/orte/mca/iof/mr_hnp/iof_mrhnp.c b/orte/mca/iof/mr_hnp/iof_mrhnp.c index e31eafd24dc..00fdf31b511 100644 --- a/orte/mca/iof/mr_hnp/iof_mrhnp.c +++ b/orte/mca/iof/mr_hnp/iof_mrhnp.c @@ -1,11 +1,11 @@ /* - * Copyright (c) 2012 Los Alamos National Security, LLC. All rights + * Copyright (c) 2012-2015 Los Alamos National Security, LLC. All rights * reserved. * Copyright (c) 2014 Intel, Inc. All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ #include "orte_config.h" @@ -16,9 +16,7 @@ #ifdef HAVE_UNISTD_H #include #endif /* HAVE_UNISTD_H */ -#ifdef HAVE_STRING_H #include -#endif /* HAVE_STRING_H */ #ifdef HAVE_FCNTL_H #include @@ -61,8 +59,6 @@ static void mrhnp_complete(const orte_job_t *jdata); static int finalize(void); -static int mrhnp_ft_event(int state); - /* The API's in this module are solely used to support LOCAL * procs - i.e., procs that are co-located to the HNP. Remote * procs interact with the HNP's IOF via the HNP's receive function, @@ -76,7 +72,7 @@ orte_iof_base_module_t orte_iof_mrhnp_module = { mrhnp_close, mrhnp_complete, finalize, - mrhnp_ft_event + NULL }; /* Initialize the module */ @@ -84,7 +80,7 @@ static int init(void) { /* post non-blocking recv to catch forwarded IO from * the orteds - */ + */ orte_rml.recv_buffer_nb(ORTE_NAME_WILDCARD, ORTE_RML_TAG_IOF_HNP, ORTE_RML_PERSISTENT, @@ -99,7 +95,7 @@ static int init(void) return ORTE_SUCCESS; } -/* Setup to read from stdin. +/* Setup to read from stdin. */ static int mrhnp_push(const orte_process_name_t* dst_name, orte_iof_tag_t src_tag, int fd) { @@ -120,12 +116,12 @@ static int mrhnp_push(const orte_process_name_t* dst_name, orte_iof_tag_t src_ta if (ORTE_VPID_INVALID == dst_name->vpid || fd < 0) { return ORTE_SUCCESS; } - + OPAL_OUTPUT_VERBOSE((1, orte_iof_base_framework.framework_output, "%s iof:mrhnp pushing fd %d for process %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), fd, ORTE_NAME_PRINT(dst_name))); - + /* we get a push for stdout, stderr, and stddiag on every LOCAL process, so * setup to read those streams and forward them to the next app_context */ @@ -134,7 +130,7 @@ static int mrhnp_push(const orte_process_name_t* dst_name, orte_iof_tag_t src_ta * and activate the read event in case it fires right away */ if((flags = fcntl(fd, F_GETFL, 0)) < 0) { - opal_output(orte_iof_base_framework.framework_output, "[%s:%d]: fcntl(F_GETFL) failed with errno=%d\n", + opal_output(orte_iof_base_framework.framework_output, "[%s:%d]: fcntl(F_GETFL) failed with errno=%d\n", __FILE__, __LINE__, errno); } else { flags |= O_NONBLOCK; @@ -187,7 +183,7 @@ static int mrhnp_push(const orte_process_name_t* dst_name, orte_iof_tag_t src_ta orte_iof_base_write_handler, &mca_iof_mr_hnp_component.sinks); } - + SETUP: /* define a read event but don't activate it */ if (src_tag & ORTE_IOF_STDOUT) { @@ -254,16 +250,16 @@ static int mrhnp_push(const orte_process_name_t* dst_name, orte_iof_tag_t src_ta * This causes things like "mpirun -np 1 big_app | cat" to lose * output, because cat's stdout is then ALSO non-blocking and cat * isn't built to deal with that case (same with almost all other - * unix text utils). + * unix text utils). */ if (0 != fd) { if((flags = fcntl(fd, F_GETFL, 0)) < 0) { - opal_output(orte_iof_base_framework.framework_output, "[%s:%d]: fcntl(F_GETFL) failed with errno=%d\n", + opal_output(orte_iof_base_framework.framework_output, "[%s:%d]: fcntl(F_GETFL) failed with errno=%d\n", __FILE__, __LINE__, errno); } else { flags |= O_NONBLOCK; fcntl(fd, F_SETFL, flags); - } + } } if (isatty(fd)) { /* We should avoid trying to read from stdin if we @@ -276,7 +272,7 @@ static int mrhnp_push(const orte_process_name_t* dst_name, orte_iof_tag_t src_ta opal_event_signal_set(orte_event_base, &mca_iof_mr_hnp_component.stdinsig, SIGCONT, orte_iof_mrhnp_stdin_cb, NULL); - + /* setup a read event to read stdin, but don't activate it yet. The * dst_name indicates who should receive the stdin. If that recipient * doesn't do a corresponding pull, however, then the stdin will @@ -285,7 +281,7 @@ static int mrhnp_push(const orte_process_name_t* dst_name, orte_iof_tag_t src_ta ORTE_IOF_READ_EVENT(&mca_iof_mr_hnp_component.stdinev, dst_name, fd, ORTE_IOF_STDIN, orte_iof_mrhnp_read_local_handler, false); - + /* check to see if we want the stdin read event to be * active - we will always at least define the event, * but may delay its activation @@ -328,12 +324,12 @@ static int mrhnp_pull(const orte_process_name_t* dst_name, if (ORTE_IOF_STDIN != src_tag) { return ORTE_ERR_NOT_SUPPORTED; } - + OPAL_OUTPUT_VERBOSE((1, orte_iof_base_framework.framework_output, "%s iof:mrhnp pulling fd %d for process %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), fd, ORTE_NAME_PRINT(dst_name))); - + /* get the job object for this proc and check to see if it * is a mapper - if so, add it to the jobs that receive * our stdin @@ -364,13 +360,13 @@ static int mrhnp_pull(const orte_process_name_t* dst_name, * the sink in case it fires right away */ if((flags = fcntl(fd, F_GETFL, 0)) < 0) { - opal_output(orte_iof_base_framework.framework_output, "[%s:%d]: fcntl(F_GETFL) failed with errno=%d\n", + opal_output(orte_iof_base_framework.framework_output, "[%s:%d]: fcntl(F_GETFL) failed with errno=%d\n", __FILE__, __LINE__, errno); } else { flags |= O_NONBLOCK; fcntl(fd, F_SETFL, flags); } - + ORTE_IOF_SINK_DEFINE(&sink, dst_name, fd, ORTE_IOF_STDIN, stdin_write_handler, NULL); sink->daemon.jobid = ORTE_PROC_MY_NAME->jobid; @@ -418,10 +414,10 @@ static int mrhnp_close(const orte_process_name_t* peer, next_item = opal_list_get_next(item); mask = ORTE_NS_CMP_ALL; - + if (OPAL_EQUAL == orte_util_compare_name_fields(mask, &sink->name, peer) && (source_tag & sink->tag)) { - + /* No need to delete the event or close the file * descriptor - the destructor will automatically * do it for us. @@ -515,7 +511,7 @@ static void mrhnp_complete(const orte_job_t *jdata) "%s sending close stdin to daemon %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_NAME_PRINT(&daemon->name))); - + /* need to send a 0-byte message to clear the stream and close it */ send_data(&daemon->name, ORTE_IOF_STDIN, jptr->jobid, data, 0); } @@ -568,7 +564,7 @@ static int finalize(void) } } } - + orte_rml.recv_cancel(ORTE_NAME_WILDCARD, ORTE_RML_TAG_IOF_HNP); /* clear our stdin job array */ @@ -583,14 +579,6 @@ static int finalize(void) return ORTE_SUCCESS; } -int mrhnp_ft_event(int state) { - /* - * Replica doesn't need to do anything for a checkpoint - */ - return ORTE_SUCCESS; -} - - static void stdin_write_handler(int fd, short event, void *cbdata) { orte_iof_sink_t *sink = (orte_iof_sink_t*)cbdata; @@ -598,14 +586,14 @@ static void stdin_write_handler(int fd, short event, void *cbdata) opal_list_item_t *item; orte_iof_write_output_t *output; int num_written; - + OPAL_OUTPUT_VERBOSE((1, orte_iof_base_framework.framework_output, "%s mrhnp:stdin:write:handler writing data to %d", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), wev->fd)); - + wev->pending = false; - + while (NULL != (item = opal_list_remove_first(&wev->outputs))) { output = (orte_iof_write_output_t*)item; /* if an abnormal termination has occurred, just dump @@ -644,7 +632,7 @@ static void stdin_write_handler(int fd, short event, void *cbdata) wev->pending = true; opal_event_add(wev->ev, 0); goto CHECK; - } + } /* otherwise, something bad happened so all we can do is declare an * error and abort */ @@ -664,7 +652,7 @@ static void stdin_write_handler(int fd, short event, void *cbdata) /* push this item back on the front of the list */ opal_list_prepend(&wev->outputs, item); /* leave the write event running so it will call us again - * when the fd is ready. + * when the fd is ready. */ wev->pending = true; opal_event_add(wev->ev, 0); diff --git a/orte/mca/iof/mr_hnp/iof_mrhnp.h b/orte/mca/iof/mr_hnp/iof_mrhnp.h index eb65edf0884..2611ae7b0ae 100644 --- a/orte/mca/iof/mr_hnp/iof_mrhnp.h +++ b/orte/mca/iof/mr_hnp/iof_mrhnp.h @@ -3,9 +3,9 @@ * All rights reserved. * * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -31,9 +31,9 @@ BEGIN_C_DECLS /** - * IOF HNP Component + * IOF HNP Component */ -typedef struct { +typedef struct { orte_iof_base_component_t super; opal_list_t sinks; opal_list_t procs; @@ -60,5 +60,5 @@ int orte_iof_hnp_send_data_to_endpoint(orte_process_name_t *host, unsigned char *data, int numbytes); END_C_DECLS - + #endif diff --git a/orte/mca/iof/mr_hnp/iof_mrhnp_component.c b/orte/mca/iof/mr_hnp/iof_mrhnp_component.c index e0d2335e447..87dfced7a37 100644 --- a/orte/mca/iof/mr_hnp/iof_mrhnp_component.c +++ b/orte/mca/iof/mr_hnp/iof_mrhnp_component.c @@ -4,9 +4,9 @@ * reserved. * * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -37,7 +37,7 @@ orte_iof_mrhnp_component_t mca_iof_mr_hnp_component = { { /* First, the mca_base_component_t struct containing meta information about the component itself */ - + .iof_version = { ORTE_IOF_BASE_VERSION_2_0_0, @@ -88,7 +88,7 @@ static int mrhnp_query(mca_base_module_t **module, int *priority) } return ORTE_SUCCESS; } - + *priority = -1; *module = NULL; return ORTE_ERROR; diff --git a/orte/mca/iof/mr_hnp/iof_mrhnp_read.c b/orte/mca/iof/mr_hnp/iof_mrhnp_read.c index 4cc7123edd6..e5cbb6d8abd 100644 --- a/orte/mca/iof/mr_hnp/iof_mrhnp_read.c +++ b/orte/mca/iof/mr_hnp/iof_mrhnp_read.c @@ -3,9 +3,9 @@ * reserved. * Copyright (c) 2014 Intel, Inc. All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -16,9 +16,7 @@ #ifdef HAVE_UNISTD_H #include #endif /* HAVE_UNISTD_H */ -#ifdef HAVE_STRING_H #include -#endif /* HAVE_STRING_H */ #include "opal/dss/dss.h" @@ -72,7 +70,7 @@ bool orte_iof_mrhnp_stdin_check(int fd) void orte_iof_mrhnp_stdin_cb(int fd, short event, void *cbdata) { bool should_process = orte_iof_mrhnp_stdin_check(0); - + if (should_process) { mca_iof_mr_hnp_component.stdinev->active = true; opal_event_add(mca_iof_mr_hnp_component.stdinev->ev, 0); @@ -111,12 +109,12 @@ void orte_iof_mrhnp_read_local_handler(int fd, short event, void *cbdata) if (numbytes < 0) { /* either we have a connection error or it was a non-blocking read */ - + /* non-blocking, retry */ if (EAGAIN == errno || EINTR == errno) { opal_event_add(rev->ev, 0); return; - } + } OPAL_OUTPUT_VERBOSE((1, orte_iof_base_framework.framework_output, "%s iof:mrhnp:read handler %s Error on connection:%d", @@ -128,7 +126,7 @@ void orte_iof_mrhnp_read_local_handler(int fd, short event, void *cbdata) */ numbytes = 0; } - + /* if job termination has been ordered, just ignore the * data and delete the stdin read event, if that is what fired */ @@ -143,7 +141,7 @@ void orte_iof_mrhnp_read_local_handler(int fd, short event, void *cbdata) /* The event has fired, so it's no longer active until we * re-add it */ - mca_iof_mr_hnp_component.stdinev->active = false; + mca_iof_mr_hnp_component.stdinev->active = false; /* if this was read from my stdin, I need to send this input to all * daemons who host mapper procs */ @@ -201,7 +199,7 @@ void orte_iof_mrhnp_read_local_handler(int fd, short event, void *cbdata) "%s sending %d bytes from stdin to daemon %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), numbytes, ORTE_NAME_PRINT(&daemon->name))); - + /* send the data to the daemon so it can * write it to all local procs from this job. * If the connection closed, @@ -278,7 +276,7 @@ void orte_iof_mrhnp_read_local_handler(int fd, short event, void *cbdata) ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), numbytes, ORTE_NAME_PRINT(&rev->name), ORTE_NAME_PRINT(&daemon->name))); - + /* send the data to the daemon so it can * write it to all local procs from this job */ @@ -286,14 +284,14 @@ void orte_iof_mrhnp_read_local_handler(int fd, short event, void *cbdata) } } } - + PROCESS: OPAL_OUTPUT_VERBOSE((1, orte_iof_base_framework.framework_output, "%s read %d bytes from %s of %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), numbytes, (ORTE_IOF_STDOUT & rev->tag) ? "stdout" : ((ORTE_IOF_STDERR & rev->tag) ? "stderr" : "stddiag"), ORTE_NAME_PRINT(&rev->name))); - + if (0 == numbytes) { /* if we read 0 bytes from the stdout/err/diag, find this proc * on our list and @@ -339,7 +337,7 @@ void orte_iof_mrhnp_read_local_handler(int fd, short event, void *cbdata) orte_iof_base_write_output(&rev->name, rev->tag, data, numbytes, orte_iof_base.iof_write_stderr->wev); } } - + /* re-add the event */ opal_event_add(rev->ev, 0); diff --git a/orte/mca/iof/mr_hnp/iof_mrhnp_receive.c b/orte/mca/iof/mr_hnp/iof_mrhnp_receive.c index b2e599914c8..93750d2480c 100644 --- a/orte/mca/iof/mr_hnp/iof_mrhnp_receive.c +++ b/orte/mca/iof/mr_hnp/iof_mrhnp_receive.c @@ -1,11 +1,11 @@ /* * Copyright (c) 2012 Los Alamos National Security, LLC. All rights - * reserved. + * reserved. * Copyright (c) 2014 Intel Corporation. All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -16,9 +16,7 @@ #ifdef HAVE_UNISTD_H #include #endif /* HAVE_UNISTD_H */ -#ifdef HAVE_STRING_H #include -#endif /* HAVE_STRING_H */ #ifdef HAVE_FCNTL_H #include #else @@ -50,7 +48,7 @@ void orte_iof_mrhnp_recv(int status, orte_process_name_t* sender, int32_t count, numbytes; int rc; - + /* unpack the stream first as this may be flow control info */ count = 1; if (ORTE_SUCCESS != (rc = opal_dss.unpack(buffer, &stream, &count, ORTE_IOF_TAG))) { @@ -76,14 +74,14 @@ void orte_iof_mrhnp_recv(int status, orte_process_name_t* sender, } goto CLEAN_RETURN; } - + /* get name of the process whose io we are discussing */ count = 1; if (ORTE_SUCCESS != (rc = opal_dss.unpack(buffer, &origin, &count, ORTE_NAME))) { ORTE_ERROR_LOG(rc); goto CLEAN_RETURN; } - + /* this must have come from a daemon forwarding output - unpack the data */ numbytes=ORTE_IOF_BASE_MSG_MAX; if (ORTE_SUCCESS != (rc = opal_dss.unpack(buffer, data, &numbytes, OPAL_BYTE))) { @@ -91,19 +89,19 @@ void orte_iof_mrhnp_recv(int status, orte_process_name_t* sender, goto CLEAN_RETURN; } /* numbytes will contain the actual #bytes that were sent */ - + OPAL_OUTPUT_VERBOSE((1, orte_iof_base_framework.framework_output, "%s unpacked %d bytes from remote proc %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), numbytes, ORTE_NAME_PRINT(&origin))); - + /* output this to our local output */ if (ORTE_IOF_STDOUT & stream || orte_xml_output) { orte_iof_base_write_output(&origin, stream, data, numbytes, orte_iof_base.iof_write_stdout->wev); } else { orte_iof_base_write_output(&origin, stream, data, numbytes, orte_iof_base.iof_write_stderr->wev); } - + CLEAN_RETURN: return; } diff --git a/orte/mca/iof/mr_orted/Makefile.am b/orte/mca/iof/mr_orted/Makefile.am index 06f5cefd9b8..bc2c46e8c68 100644 --- a/orte/mca/iof/mr_orted/Makefile.am +++ b/orte/mca/iof/mr_orted/Makefile.am @@ -2,9 +2,9 @@ # Copyright (c) 2012 Los Alamos National Security, LLC. # All rights reserved. # $COPYRIGHT$ -# +# # Additional copyrights may follow -# +# # $HEADER$ # diff --git a/orte/mca/iof/mr_orted/iof_mrorted.c b/orte/mca/iof/mr_orted/iof_mrorted.c index a211ae90021..1988c545339 100644 --- a/orte/mca/iof/mr_orted/iof_mrorted.c +++ b/orte/mca/iof/mr_orted/iof_mrorted.c @@ -1,12 +1,12 @@ /* - * Copyright (c) 2012 Los Alamos National Security, LLC. + * Copyright (c) 2012-2015 Los Alamos National Security, LLC. * All rights reserved. * Copyright (c) 2014 Intel, Inc. All rights reserved. * * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -18,9 +18,7 @@ #ifdef HAVE_UNISTD_H #include #endif /* HAVE_UNISTD_H */ -#ifdef HAVE_STRING_H #include -#endif /* HAVE_STRING_H */ #ifdef HAVE_FCNTL_H #include @@ -62,8 +60,6 @@ static void mrorted_complete(const orte_job_t *jdata); static int finalize(void); -static int mrorted_ft_event(int state); - /* The API's in this module are solely used to support LOCAL * procs - i.e., procs that are co-located to the daemon. Output * from local procs is automatically sent to the HNP for output @@ -79,7 +75,7 @@ orte_iof_base_module_t orte_iof_mrorted_module = { mrorted_close, mrorted_complete, finalize, - mrorted_ft_event + NULL }; static int init(void) @@ -91,11 +87,11 @@ static int init(void) ORTE_RML_PERSISTENT, orte_iof_mrorted_recv, NULL); - + /* setup the local global variables */ OBJ_CONSTRUCT(&mca_iof_mr_orted_component.sinks, opal_list_t); OBJ_CONSTRUCT(&mca_iof_mr_orted_component.procs, opal_list_t); - + return ORTE_SUCCESS; } @@ -120,12 +116,12 @@ static int mrorted_push(const orte_process_name_t* dst_name, orte_iof_tag_t src_ "%s iof:mrorted pushing fd %d for process %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), fd, ORTE_NAME_PRINT(dst_name))); - + /* set the file descriptor to non-blocking - do this before we setup * and activate the read event in case it fires right away */ if ((flags = fcntl(fd, F_GETFL, 0)) < 0) { - opal_output(orte_iof_base_framework.framework_output, "[%s:%d]: fcntl(F_GETFL) failed with errno=%d\n", + opal_output(orte_iof_base_framework.framework_output, "[%s:%d]: fcntl(F_GETFL) failed with errno=%d\n", __FILE__, __LINE__, errno); } else { flags |= O_NONBLOCK; @@ -179,7 +175,7 @@ static int mrorted_push(const orte_process_name_t* dst_name, orte_iof_tag_t src_ orte_iof_base_write_handler, &mca_iof_mr_orted_component.sinks); } - + SETUP: /* define a read event but don't activate it */ if (src_tag & ORTE_IOF_STDOUT) { @@ -231,17 +227,17 @@ static int mrorted_pull(const orte_process_name_t* dst_name, if (ORTE_IOF_STDIN != src_tag) { return ORTE_ERR_NOT_SUPPORTED; } - + OPAL_OUTPUT_VERBOSE((1, orte_iof_base_framework.framework_output, "%s iof:mrorted pulling fd %d for process %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), fd, ORTE_NAME_PRINT(dst_name))); - + /* set the file descriptor to non-blocking - do this before we setup * the sink in case it fires right away */ if((flags = fcntl(fd, F_GETFL, 0)) < 0) { - opal_output(orte_iof_base_framework.framework_output, "[%s:%d]: fcntl(F_GETFL) failed with errno=%d\n", + opal_output(orte_iof_base_framework.framework_output, "[%s:%d]: fcntl(F_GETFL) failed with errno=%d\n", __FILE__, __LINE__, errno); } else { flags |= O_NONBLOCK; @@ -250,7 +246,7 @@ static int mrorted_pull(const orte_process_name_t* dst_name, ORTE_IOF_SINK_DEFINE(&sink, dst_name, fd, ORTE_IOF_STDIN, stdin_write_handler, NULL); - + sink->daemon.jobid = ORTE_PROC_MY_NAME->jobid; sink->daemon.vpid = ORTE_PROC_MY_NAME->vpid; @@ -296,7 +292,7 @@ static int mrorted_close(const orte_process_name_t* peer, item = next_item ) { sink = (orte_iof_sink_t*)item; next_item = opal_list_get_next(item); - + mask = ORTE_NS_CMP_ALL; if (OPAL_EQUAL == orte_util_compare_name_fields(mask, &sink->name, peer) && @@ -325,7 +321,9 @@ static void mrorted_complete(const orte_job_t *jdata) /* get the stdout target */ stdout_target = ORTE_JOBID_INVALID; jbptr = &stdout_target; - orte_get_attribute(&((orte_job_t*)jdata)->attributes, ORTE_JOB_STDOUT_TARGET, (void**)&jbptr, ORTE_JOBID); + if (!orte_get_attribute(&((orte_job_t*)jdata)->attributes, ORTE_JOB_STDOUT_TARGET, (void**)&jbptr, ORTE_JOBID)) { + return; + } /* the job is complete - close out the stdin * of any procs it was feeding @@ -351,7 +349,7 @@ static void mrorted_complete(const orte_job_t *jdata) static int finalize(void) { opal_list_item_t *item; - + while ((item = opal_list_remove_first(&mca_iof_mr_orted_component.sinks)) != NULL) { OBJ_RELEASE(item); } @@ -365,15 +363,6 @@ static int finalize(void) return ORTE_SUCCESS; } -/* - * FT event - */ - -static int mrorted_ft_event(int state) -{ - return ORTE_ERR_NOT_IMPLEMENTED; -} - static void stdin_write_handler(int fd, short event, void *cbdata) { orte_iof_sink_t *sink = (orte_iof_sink_t*)cbdata; @@ -381,14 +370,14 @@ static void stdin_write_handler(int fd, short event, void *cbdata) opal_list_item_t *item; orte_iof_write_output_t *output; int num_written; - + OPAL_OUTPUT_VERBOSE((1, orte_iof_base_framework.framework_output, "%s mrorted:stdin:write:handler writing data to %d", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), wev->fd)); - + wev->pending = false; - + while (NULL != (item = opal_list_remove_first(&wev->outputs))) { output = (orte_iof_write_output_t*)item; if (0 == output->numbytes) { @@ -417,7 +406,7 @@ static void stdin_write_handler(int fd, short event, void *cbdata) wev->pending = true; opal_event_add(wev->ev, 0); goto CHECK; - } + } /* otherwise, something bad happened so all we can do is declare an error */ OBJ_RELEASE(output); OPAL_OUTPUT_VERBOSE((20, orte_iof_base_framework.framework_output, @@ -435,7 +424,7 @@ static void stdin_write_handler(int fd, short event, void *cbdata) /* push this item back on the front of the list */ opal_list_prepend(&wev->outputs, item); /* leave the write event running so it will call us again - * when the fd is ready. + * when the fd is ready. */ wev->pending = true; opal_event_add(wev->ev, 0); @@ -443,7 +432,7 @@ static void stdin_write_handler(int fd, short event, void *cbdata) } OBJ_RELEASE(output); } - + CHECK: if (sink->xoff) { /* if we have told the HNP to stop reading stdin, see if diff --git a/orte/mca/iof/mr_orted/iof_mrorted.h b/orte/mca/iof/mr_orted/iof_mrorted.h index f0532a1e4e4..26ee9422d52 100644 --- a/orte/mca/iof/mr_orted/iof_mrorted.h +++ b/orte/mca/iof/mr_orted/iof_mrorted.h @@ -3,9 +3,9 @@ * All rights reserved. * * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ #ifndef ORTE_IOF_MR_ORTED_H @@ -22,9 +22,9 @@ BEGIN_C_DECLS /** - * IOF MR_ORTED Component + * IOF MR_ORTED Component */ -typedef struct { +typedef struct { orte_iof_base_component_t super; opal_list_t sinks; opal_list_t procs; diff --git a/orte/mca/iof/mr_orted/iof_mrorted_component.c b/orte/mca/iof/mr_orted/iof_mrorted_component.c index 41ce83d9f6e..5ee4844ba36 100644 --- a/orte/mca/iof/mr_orted/iof_mrorted_component.c +++ b/orte/mca/iof/mr_orted/iof_mrorted_component.c @@ -4,9 +4,9 @@ * reserved. * * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -76,7 +76,7 @@ static int mr_orted_query(mca_base_module_t **module, int *priority) *module = (mca_base_module_t *) &orte_iof_mrorted_module; return ORTE_SUCCESS; } - + *priority = -1; *module = NULL; return ORTE_ERROR; diff --git a/orte/mca/iof/mr_orted/iof_mrorted_read.c b/orte/mca/iof/mr_orted/iof_mrorted_read.c index 67c3538d142..b39a1aae65a 100644 --- a/orte/mca/iof/mr_orted/iof_mrorted_read.c +++ b/orte/mca/iof/mr_orted/iof_mrorted_read.c @@ -4,9 +4,9 @@ * Copyright (c) 2014 Intel, Inc. All rights reserved. * * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -17,9 +17,7 @@ #ifdef HAVE_UNISTD_H #include #endif /* HAVE_UNISTD_H */ -#ifdef HAVE_STRING_H #include -#endif /* HAVE_STRING_H */ #include "opal/dss/dss.h" @@ -60,12 +58,12 @@ void orte_iof_mrorted_read_handler(int fd, short event, void *cbdata) /* read up to the fragment size */ numbytes = read(fd, data, sizeof(data)); - + OPAL_OUTPUT_VERBOSE((1, orte_iof_base_framework.framework_output, "%s iof:mrorted:read handler read %d bytes from %s, fd %d", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), numbytes, ORTE_NAME_PRINT(&rev->name), fd)); - + if (numbytes <= 0) { if (0 > numbytes) { /* either we have a connection error or it was a non-blocking read */ @@ -73,7 +71,7 @@ void orte_iof_mrorted_read_handler(int fd, short event, void *cbdata) /* non-blocking, retry */ opal_event_add(rev->ev, 0); return; - } + } OPAL_OUTPUT_VERBOSE((1, orte_iof_base_framework.framework_output, "%s iof:mrorted:read handler %s Error on connection:%d", @@ -83,7 +81,7 @@ void orte_iof_mrorted_read_handler(int fd, short event, void *cbdata) /* numbytes must have been zero, so go down and close the fd etc */ goto CLEAN_RETURN; } - + /* see if the user wanted the output directed to files */ if (NULL != orte_output_filename) { /* find the sink for this rank */ @@ -111,7 +109,7 @@ void orte_iof_mrorted_read_handler(int fd, short event, void *cbdata) } } } - + if (ORTE_IOF_STDOUT & rev->tag) { /* see if we need to forward this output */ stdout_target = ORTE_JOBID_INVALID; @@ -152,7 +150,7 @@ void orte_iof_mrorted_read_handler(int fd, short event, void *cbdata) ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), numbytes, ORTE_NAME_PRINT(&rev->name), ORTE_NAME_PRINT(&daemon->name))); - + /* send the data to the daemon so it can * write it to all local procs from this job */ @@ -160,12 +158,12 @@ void orte_iof_mrorted_read_handler(int fd, short event, void *cbdata) } } } - + PROCESS: if (write_out) { /* prep the buffer */ buf = OBJ_NEW(opal_buffer_t); - + /* pack the stream first - we do this so that flow control messages can * consist solely of the tag */ @@ -173,13 +171,13 @@ void orte_iof_mrorted_read_handler(int fd, short event, void *cbdata) ORTE_ERROR_LOG(rc); goto CLEAN_RETURN; } - + /* pack name of process that gave us this data */ if (ORTE_SUCCESS != (rc = opal_dss.pack(buf, &rev->name, 1, ORTE_NAME))) { ORTE_ERROR_LOG(rc); goto CLEAN_RETURN; } - + /* pack the data - only pack the #bytes we read! */ if (ORTE_SUCCESS != (rc = opal_dss.pack(buf, &data, numbytes, OPAL_BYTE))) { ORTE_ERROR_LOG(rc); @@ -190,16 +188,16 @@ void orte_iof_mrorted_read_handler(int fd, short event, void *cbdata) OPAL_OUTPUT_VERBOSE((1, orte_iof_base_framework.framework_output, "%s iof:mrorted:read handler sending %d bytes to HNP", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), numbytes)); - + orte_rml.send_buffer_nb(ORTE_PROC_MY_HNP, buf, ORTE_RML_TAG_IOF_HNP, orte_rml_send_callback, NULL); } - + /* re-add the event */ opal_event_add(rev->ev, 0); return; - + CLEAN_RETURN: /* must be an error, or zero bytes were read indicating that the * proc terminated this IOF channel - either way, find this proc diff --git a/orte/mca/iof/mr_orted/iof_mrorted_receive.c b/orte/mca/iof/mr_orted/iof_mrorted_receive.c index a137535156c..32d3c6a0d90 100644 --- a/orte/mca/iof/mr_orted/iof_mrorted_receive.c +++ b/orte/mca/iof/mr_orted/iof_mrorted_receive.c @@ -4,9 +4,9 @@ * Copyright (c) 2014 Intel Corporation. All rights reserved. * * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -17,9 +17,7 @@ #ifdef HAVE_UNISTD_H #include #endif /* HAVE_UNISTD_H */ -#ifdef HAVE_STRING_H #include -#endif /* HAVE_STRING_H */ #include "opal/dss/dss.h" @@ -46,9 +44,9 @@ void orte_iof_mrorted_send_xonxoff(orte_process_name_t *name, orte_iof_tag_t tag { opal_buffer_t *buf; int rc; - + buf = OBJ_NEW(opal_buffer_t); - + /* pack the tag - we do this first so that flow control messages can * consist solely of the tag */ @@ -94,20 +92,20 @@ void orte_iof_mrorted_recv(int status, orte_process_name_t* sender, orte_jobid_t jobid; opal_list_item_t *item; int rc; - + /* see what stream generated this data */ count = 1; if (ORTE_SUCCESS != (rc = opal_dss.unpack(buffer, &stream, &count, ORTE_IOF_TAG))) { ORTE_ERROR_LOG(rc); goto CLEAN_RETURN; } - + /* if this isn't stdin, then we have an error */ if (ORTE_IOF_STDIN != stream) { ORTE_ERROR_LOG(ORTE_ERR_COMM_FAILURE); goto CLEAN_RETURN; } - + /* unpack the intended target */ count = 1; if (ORTE_SUCCESS != (rc = opal_dss.unpack(buffer, &jobid, &count, ORTE_JOBID))) { @@ -122,18 +120,18 @@ void orte_iof_mrorted_recv(int status, orte_process_name_t* sender, goto CLEAN_RETURN; } /* numbytes will contain the actual #bytes that were sent */ - + OPAL_OUTPUT_VERBOSE((1, orte_iof_base_framework.framework_output, "%s unpacked %d bytes for local job %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), numbytes, ORTE_JOBID_PRINT(jobid))); - + /* cycle through our list of procs */ for (item = opal_list_get_first(&mca_iof_mr_orted_component.procs); item != opal_list_get_end(&mca_iof_mr_orted_component.procs); item = opal_list_get_next(item)) { orte_iof_proc_t* sink = (orte_iof_proc_t*)item; - + /* is this intended for this jobid? */ if (jobid == sink->name.jobid) { OPAL_OUTPUT_VERBOSE((1, orte_iof_base_framework.framework_output, diff --git a/orte/mca/iof/orted/Makefile.am b/orte/mca/iof/orted/Makefile.am index cca493d7e5c..81ba139a4d9 100644 --- a/orte/mca/iof/orted/Makefile.am +++ b/orte/mca/iof/orted/Makefile.am @@ -5,15 +5,15 @@ # Copyright (c) 2004-2005 The University of Tennessee and The University # of Tennessee Research Foundation. All rights # reserved. -# Copyright (c) 2004-2009 High Performance Computing Center Stuttgart, +# Copyright (c) 2004-2009 High Performance Computing Center Stuttgart, # University of Stuttgart. All rights reserved. # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. # Copyright (c) 2010 Cisco Systems, Inc. All rights reserved. # $COPYRIGHT$ -# +# # Additional copyrights may follow -# +# # $HEADER$ # diff --git a/orte/mca/iof/orted/iof_orted.c b/orte/mca/iof/orted/iof_orted.c index 400f62719c8..6813289f51d 100644 --- a/orte/mca/iof/orted/iof_orted.c +++ b/orte/mca/iof/orted/iof_orted.c @@ -5,17 +5,17 @@ * Copyright (c) 2004-2011 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2007 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2011-2013 Los Alamos National Security, LLC. All rights - * reserved. + * Copyright (c) 2011-2015 Los Alamos National Security, LLC. All rights + * reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -27,9 +27,7 @@ #ifdef HAVE_UNISTD_H #include #endif /* HAVE_UNISTD_H */ -#ifdef HAVE_STRING_H #include -#endif /* HAVE_STRING_H */ #ifdef HAVE_FCNTL_H #include @@ -69,8 +67,6 @@ static int orted_close(const orte_process_name_t* peer, static int finalize(void); -static int orted_ft_event(int state); - /* The API's in this module are solely used to support LOCAL * procs - i.e., procs that are co-located to the daemon. Output * from local procs is automatically sent to the HNP for output @@ -86,7 +82,7 @@ orte_iof_base_module_t orte_iof_orted_module = { orted_close, NULL, finalize, - orted_ft_event + NULL }; static int init(void) @@ -98,12 +94,12 @@ static int init(void) ORTE_RML_PERSISTENT, orte_iof_orted_recv, NULL); - + /* setup the local global variables */ OBJ_CONSTRUCT(&mca_iof_orted_component.sinks, opal_list_t); OBJ_CONSTRUCT(&mca_iof_orted_component.procs, opal_list_t); mca_iof_orted_component.xoff = false; - + return ORTE_SUCCESS; } @@ -128,12 +124,12 @@ static int orted_push(const orte_process_name_t* dst_name, orte_iof_tag_t src_ta "%s iof:orted pushing fd %d for process %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), fd, ORTE_NAME_PRINT(dst_name))); - + /* set the file descriptor to non-blocking - do this before we setup * and activate the read event in case it fires right away */ if((flags = fcntl(fd, F_GETFL, 0)) < 0) { - opal_output(orte_iof_base_framework.framework_output, "[%s:%d]: fcntl(F_GETFL) failed with errno=%d\n", + opal_output(orte_iof_base_framework.framework_output, "[%s:%d]: fcntl(F_GETFL) failed with errno=%d\n", __FILE__, __LINE__, errno); } else { flags |= O_NONBLOCK; @@ -145,7 +141,7 @@ static int orted_push(const orte_process_name_t* dst_name, orte_iof_tag_t src_ta item != opal_list_get_end(&mca_iof_orted_component.procs); item = opal_list_get_next(item)) { proct = (orte_iof_proc_t*)item; - + mask = ORTE_NS_CMP_ALL; if (OPAL_EQUAL == orte_util_compare_name_fields(mask, &proct->name, dst_name)) { @@ -189,7 +185,7 @@ static int orted_push(const orte_process_name_t* dst_name, orte_iof_tag_t src_ta orte_iof_base_write_handler, &mca_iof_orted_component.sinks); } - + SETUP: /* define a read event and activate it */ if (src_tag & ORTE_IOF_STDOUT) { @@ -234,22 +230,22 @@ static int orted_pull(const orte_process_name_t* dst_name, { orte_iof_sink_t *sink; int flags; - + /* this is a local call - only stdin is supported */ if (ORTE_IOF_STDIN != src_tag) { return ORTE_ERR_NOT_SUPPORTED; } - + OPAL_OUTPUT_VERBOSE((1, orte_iof_base_framework.framework_output, "%s iof:orted pulling fd %d for process %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), fd, ORTE_NAME_PRINT(dst_name))); - + /* set the file descriptor to non-blocking - do this before we setup * the sink in case it fires right away */ if((flags = fcntl(fd, F_GETFL, 0)) < 0) { - opal_output(orte_iof_base_framework.framework_output, "[%s:%d]: fcntl(F_GETFL) failed with errno=%d\n", + opal_output(orte_iof_base_framework.framework_output, "[%s:%d]: fcntl(F_GETFL) failed with errno=%d\n", __FILE__, __LINE__, errno); } else { flags |= O_NONBLOCK; @@ -259,7 +255,7 @@ static int orted_pull(const orte_process_name_t* dst_name, ORTE_IOF_SINK_DEFINE(&sink, dst_name, fd, ORTE_IOF_STDIN, stdin_write_handler, &mca_iof_orted_component.sinks); - + return ORTE_SUCCESS; } @@ -275,13 +271,13 @@ static int orted_close(const orte_process_name_t* peer, opal_list_item_t *item, *next_item; orte_iof_sink_t* sink; orte_ns_cmp_bitmask_t mask; - + for (item = opal_list_get_first(&mca_iof_orted_component.sinks); item != opal_list_get_end(&mca_iof_orted_component.sinks); item = next_item ) { sink = (orte_iof_sink_t*)item; next_item = opal_list_get_next(item); - + mask = ORTE_NS_CMP_ALL; if (OPAL_EQUAL == orte_util_compare_name_fields(mask, &sink->name, peer) && @@ -303,7 +299,7 @@ static int orted_close(const orte_process_name_t* peer, static int finalize(void) { opal_list_item_t *item; - + while ((item = opal_list_remove_first(&mca_iof_orted_component.sinks)) != NULL) { OBJ_RELEASE(item); } @@ -317,15 +313,6 @@ static int finalize(void) return ORTE_SUCCESS; } -/* - * FT event - */ - -static int orted_ft_event(int state) -{ - return ORTE_ERR_NOT_IMPLEMENTED; -} - static void stdin_write_handler(int fd, short event, void *cbdata) { orte_iof_sink_t *sink = (orte_iof_sink_t*)cbdata; @@ -333,14 +320,14 @@ static void stdin_write_handler(int fd, short event, void *cbdata) opal_list_item_t *item; orte_iof_write_output_t *output; int num_written; - + OPAL_OUTPUT_VERBOSE((1, orte_iof_base_framework.framework_output, "%s orted:stdin:write:handler writing data to %d", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), wev->fd)); - + wev->pending = false; - + while (NULL != (item = opal_list_remove_first(&wev->outputs))) { output = (orte_iof_write_output_t*)item; if (0 == output->numbytes) { @@ -369,7 +356,7 @@ static void stdin_write_handler(int fd, short event, void *cbdata) wev->pending = true; opal_event_add(wev->ev, 0); goto CHECK; - } + } /* otherwise, something bad happened so all we can do is declare an * error and abort */ @@ -394,7 +381,7 @@ static void stdin_write_handler(int fd, short event, void *cbdata) /* push this item back on the front of the list */ opal_list_prepend(&wev->outputs, item); /* leave the write event running so it will call us again - * when the fd is ready. + * when the fd is ready. */ wev->pending = true; opal_event_add(wev->ev, 0); @@ -402,7 +389,7 @@ static void stdin_write_handler(int fd, short event, void *cbdata) } OBJ_RELEASE(output); } - + CHECK: if (mca_iof_orted_component.xoff) { /* if we have told the HNP to stop reading stdin, see if diff --git a/orte/mca/iof/orted/iof_orted.h b/orte/mca/iof/orted/iof_orted.h index 62261c392df..d534c755ea6 100644 --- a/orte/mca/iof/orted/iof_orted.h +++ b/orte/mca/iof/orted/iof_orted.h @@ -5,16 +5,16 @@ * Copyright (c) 2004-2006 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2007 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2007 Sun Microsystems, Inc. All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ /** @@ -32,7 +32,7 @@ * * Much of the intelligence of this component is actually contained in * iof_base_endpoint.c (reading and writing to local file descriptors, - * setting up events based on file descriptors, etc.). + * setting up events based on file descriptors, etc.). * * A non-blocking OOB receive is posted at the initialization of this * component to receive all messages from the HNP (e.g., data @@ -57,9 +57,9 @@ BEGIN_C_DECLS /** - * IOF ORTED Component + * IOF ORTED Component */ -struct orte_iof_orted_component_t { +struct orte_iof_orted_component_t { orte_iof_base_component_t super; opal_list_t sinks; opal_list_t procs; diff --git a/orte/mca/iof/orted/iof_orted_component.c b/orte/mca/iof/orted/iof_orted_component.c index 9cf9bde989d..f9b040a3c7b 100644 --- a/orte/mca/iof/orted/iof_orted_component.c +++ b/orte/mca/iof/orted/iof_orted_component.c @@ -6,7 +6,7 @@ * Copyright (c) 2004-2006 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. @@ -14,9 +14,9 @@ * Copyright (c) 2015 Los Alamos National Security, LLC. All rights * reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -90,7 +90,7 @@ static int orte_iof_orted_query(mca_base_module_t **module, int *priority) *priority = 100; *module = (mca_base_module_t *) &orte_iof_orted_module; - + return ORTE_SUCCESS; } diff --git a/orte/mca/iof/orted/iof_orted_read.c b/orte/mca/iof/orted/iof_orted_read.c index 4d7d9157dc1..2f64b8e6bad 100644 --- a/orte/mca/iof/orted/iof_orted_read.c +++ b/orte/mca/iof/orted/iof_orted_read.c @@ -5,17 +5,17 @@ * Copyright (c) 2004-2011 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2007 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2011-2013 Los Alamos National Security, LLC. All rights - * reserved. + * reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -26,9 +26,7 @@ #ifdef HAVE_UNISTD_H #include #endif /* HAVE_UNISTD_H */ -#ifdef HAVE_STRING_H #include -#endif /* HAVE_STRING_H */ #include "opal/dss/dss.h" @@ -66,7 +64,7 @@ void orte_iof_orted_read_handler(int fd, short event, void *cbdata) opal_list_item_t *item; orte_iof_proc_t *proct; orte_ns_cmp_bitmask_t mask; - + /* read up to the fragment size */ #if !defined(__WINDOWS__) numbytes = read(fd, data, sizeof(data)); @@ -78,12 +76,12 @@ void orte_iof_orted_read_handler(int fd, short event, void *cbdata) numbytes = (int)readed; } #endif /* !defined(__WINDOWS__) */ - + OPAL_OUTPUT_VERBOSE((1, orte_iof_base_framework.framework_output, "%s iof:orted:read handler read %d bytes from %s, fd %d", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), numbytes, ORTE_NAME_PRINT(&rev->name), fd)); - + if (numbytes <= 0) { if (0 > numbytes) { /* either we have a connection error or it was a non-blocking read */ @@ -91,7 +89,7 @@ void orte_iof_orted_read_handler(int fd, short event, void *cbdata) /* non-blocking, retry */ opal_event_add(rev->ev, 0); return; - } + } OPAL_OUTPUT_VERBOSE((1, orte_iof_base_framework.framework_output, "%s iof:orted:read handler %s Error on connection:%d", @@ -101,7 +99,7 @@ void orte_iof_orted_read_handler(int fd, short event, void *cbdata) /* numbytes must have been zero, so go down and close the fd etc */ goto CLEAN_RETURN; } - + /* see if the user wanted the output directed to files */ if (NULL != orte_output_filename) { /* find the sink for this rank */ @@ -130,10 +128,10 @@ void orte_iof_orted_read_handler(int fd, short event, void *cbdata) } goto RESTART; } - + /* prep the buffer */ buf = OBJ_NEW(opal_buffer_t); - + /* pack the stream first - we do this so that flow control messages can * consist solely of the tag */ @@ -141,13 +139,13 @@ void orte_iof_orted_read_handler(int fd, short event, void *cbdata) ORTE_ERROR_LOG(rc); goto CLEAN_RETURN; } - + /* pack name of process that gave us this data */ if (ORTE_SUCCESS != (rc = opal_dss.pack(buf, &rev->name, 1, ORTE_NAME))) { ORTE_ERROR_LOG(rc); goto CLEAN_RETURN; } - + /* pack the data - only pack the #bytes we read! */ if (ORTE_SUCCESS != (rc = opal_dss.pack(buf, &data, numbytes, OPAL_BYTE))) { ORTE_ERROR_LOG(rc); @@ -158,16 +156,16 @@ void orte_iof_orted_read_handler(int fd, short event, void *cbdata) OPAL_OUTPUT_VERBOSE((1, orte_iof_base_framework.framework_output, "%s iof:orted:read handler sending %d bytes to HNP", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), numbytes)); - + orte_rml.send_buffer_nb(ORTE_PROC_MY_HNP, buf, ORTE_RML_TAG_IOF_HNP, send_cb, NULL); - + RESTART: /* re-add the event */ opal_event_add(rev->ev, 0); return; - + CLEAN_RETURN: /* must be an error, or zero bytes were read indicating that the * proc terminated this IOF channel - either way, find this proc diff --git a/orte/mca/iof/orted/iof_orted_receive.c b/orte/mca/iof/orted/iof_orted_receive.c index 56c7e5fc02d..efb3b6a67ee 100644 --- a/orte/mca/iof/orted/iof_orted_receive.c +++ b/orte/mca/iof/orted/iof_orted_receive.c @@ -5,18 +5,18 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2007 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2011 Los Alamos National Security, LLC. All rights - * reserved. + * reserved. * Copyright (c) 2014 Intel Corporation. All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -27,9 +27,7 @@ #ifdef HAVE_UNISTD_H #include #endif /* HAVE_UNISTD_H */ -#ifdef HAVE_STRING_H #include -#endif /* HAVE_STRING_H */ #include "opal/dss/dss.h" @@ -56,9 +54,9 @@ void orte_iof_orted_send_xonxoff(orte_iof_tag_t tag) { opal_buffer_t *buf; int rc; - + buf = OBJ_NEW(opal_buffer_t); - + /* pack the tag - we do this first so that flow control messages can * consist solely of the tag */ @@ -98,20 +96,20 @@ void orte_iof_orted_recv(int status, orte_process_name_t* sender, orte_process_name_t target; opal_list_item_t *item; int rc; - + /* see what stream generated this data */ count = 1; if (ORTE_SUCCESS != (rc = opal_dss.unpack(buffer, &stream, &count, ORTE_IOF_TAG))) { ORTE_ERROR_LOG(rc); goto CLEAN_RETURN; } - + /* if this isn't stdin, then we have an error */ if (ORTE_IOF_STDIN != stream) { ORTE_ERROR_LOG(ORTE_ERR_COMM_FAILURE); goto CLEAN_RETURN; } - + /* unpack the intended target */ count = 1; if (ORTE_SUCCESS != (rc = opal_dss.unpack(buffer, &target, &count, ORTE_NAME))) { @@ -126,18 +124,18 @@ void orte_iof_orted_recv(int status, orte_process_name_t* sender, goto CLEAN_RETURN; } /* numbytes will contain the actual #bytes that were sent */ - + OPAL_OUTPUT_VERBOSE((1, orte_iof_base_framework.framework_output, "%s unpacked %d bytes for local proc %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), numbytes, ORTE_NAME_PRINT(&target))); - + /* cycle through our list of sinks */ for (item = opal_list_get_first(&mca_iof_orted_component.sinks); item != opal_list_get_end(&mca_iof_orted_component.sinks); item = opal_list_get_next(item)) { orte_iof_sink_t* sink = (orte_iof_sink_t*)item; - + /* is this intended for this jobid? */ if (target.jobid == sink->name.jobid) { /* yes - is this intended for all vpids or this vpid? */ diff --git a/orte/mca/iof/tool/Makefile.am b/orte/mca/iof/tool/Makefile.am index 7f0d5dfa163..4dceacc0636 100644 --- a/orte/mca/iof/tool/Makefile.am +++ b/orte/mca/iof/tool/Makefile.am @@ -5,15 +5,15 @@ # Copyright (c) 2004-2005 The University of Tennessee and The University # of Tennessee Research Foundation. All rights # reserved. -# Copyright (c) 2004-2009 High Performance Computing Center Stuttgart, +# Copyright (c) 2004-2009 High Performance Computing Center Stuttgart, # University of Stuttgart. All rights reserved. # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. # Copyright (c) 2010 Cisco Systems, Inc. All rights reserved. # $COPYRIGHT$ -# +# # Additional copyrights may follow -# +# # $HEADER$ # diff --git a/orte/mca/iof/tool/iof_tool.c b/orte/mca/iof/tool/iof_tool.c index aed40c2c83f..e43db364d7f 100644 --- a/orte/mca/iof/tool/iof_tool.c +++ b/orte/mca/iof/tool/iof_tool.c @@ -5,18 +5,18 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2007 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2011-2013 Los Alamos National Security, LLC. All rights - * reserved. + * Copyright (c) 2011-2015 Los Alamos National Security, LLC. All rights + * reserved. * Copyright (c) 2014 Intel, Inc. All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -27,9 +27,7 @@ #ifdef HAVE_UNISTD_H #include #endif /* HAVE_UNISTD_H */ -#ifdef HAVE_STRING_H #include -#endif /* HAVE_STRING_H */ #include "orte/mca/rml/rml.h" #include "orte/mca/rml/rml_types.h" @@ -56,8 +54,6 @@ static int tool_close(const orte_process_name_t* peer, static int finalize(void); -static int tool_ft_event(int state); - orte_iof_base_module_t orte_iof_tool_module = { init, tool_push, @@ -65,12 +61,12 @@ orte_iof_base_module_t orte_iof_tool_module = { tool_close, NULL, finalize, - tool_ft_event + NULL }; static int init(void) -{ +{ /* post a non-blocking RML receive to get messages from the HNP IOF component */ orte_rml.recv_buffer_nb(ORTE_NAME_WILDCARD, @@ -78,9 +74,9 @@ static int init(void) ORTE_RML_PERSISTENT, orte_iof_tool_recv, NULL); - + mca_iof_tool_component.closed = false; - + return ORTE_SUCCESS; } @@ -96,7 +92,7 @@ static int tool_push(const orte_process_name_t* dst_name, orte_iof_tag_t src_tag * stdin is being read/used, and the impossibility of resolving * potential interleaving of the data */ - + return ORTE_ERR_NOT_SUPPORTED; } @@ -129,22 +125,22 @@ static int tool_pull(const orte_process_name_t* src_name, * close any or all of those streams, so the success of this call * will depend upon how the user executed the application */ - + opal_buffer_t *buf; orte_iof_tag_t tag; orte_process_name_t hnp; int rc; - + OPAL_OUTPUT_VERBOSE((1, orte_iof_base_framework.framework_output, "%s pulling output for proc %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_NAME_PRINT(src_name))); buf = OBJ_NEW(opal_buffer_t); - + /* setup the tag to pull from HNP */ tag = src_tag | ORTE_IOF_PULL; - + /* pack the tag - we do this first so that flow control messages can * consist solely of the tag */ @@ -170,7 +166,7 @@ static int tool_pull(const orte_process_name_t* src_name, ORTE_HNP_NAME_FROM_JOB(&hnp, src_name->jobid); orte_rml.send_buffer_nb(&hnp, buf, ORTE_RML_TAG_IOF_HNP, send_cb, NULL); - + return ORTE_SUCCESS; } @@ -181,22 +177,22 @@ static int tool_close(const orte_process_name_t* src_name, /* if we are a tool, then we need to request the HNP to stop * forwarding data from this process/stream */ - + opal_buffer_t *buf; orte_iof_tag_t tag; orte_process_name_t hnp; int rc; - + OPAL_OUTPUT_VERBOSE((1, orte_iof_base_framework.framework_output, "%s closing output for proc %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_NAME_PRINT(src_name))); - + buf = OBJ_NEW(opal_buffer_t); - + /* setup the tag to stop the copy */ tag = src_tag | ORTE_IOF_CLOSE; - + /* pack the tag - we do this first so that flow control messages can * consist solely of the tag */ @@ -211,7 +207,7 @@ static int tool_close(const orte_process_name_t* src_name, OBJ_RELEASE(buf); return rc; } - + /* flag that the close is incomplete */ mca_iof_tool_component.closed = false; @@ -219,7 +215,7 @@ static int tool_close(const orte_process_name_t* src_name, ORTE_HNP_NAME_FROM_JOB(&hnp, src_name->jobid); orte_rml.send_buffer_nb(&hnp, buf, ORTE_RML_TAG_IOF_HNP, send_cb, NULL); - + return ORTE_SUCCESS; } @@ -230,7 +226,7 @@ static int finalize(void) orte_iof_write_event_t *wev; int num_written; bool dump; - + /* check if anything is still trying to be written out */ wev = orte_iof_base.iof_write_stdout->wev; if (!opal_list_is_empty(&wev->outputs)) { @@ -269,18 +265,10 @@ static int finalize(void) } OBJ_RELEASE(orte_iof_base.iof_write_stderr); } - + /* Cancel the RML receive */ orte_rml.recv_cancel(ORTE_NAME_WILDCARD, ORTE_RML_TAG_IOF_PROXY); - + return ORTE_SUCCESS; } -/* - * FT event - */ - -static int tool_ft_event(int state) -{ - return ORTE_ERR_NOT_IMPLEMENTED; -} diff --git a/orte/mca/iof/tool/iof_tool.h b/orte/mca/iof/tool/iof_tool.h index 6805c2a1529..fef45d8dbcf 100644 --- a/orte/mca/iof/tool/iof_tool.h +++ b/orte/mca/iof/tool/iof_tool.h @@ -5,16 +5,16 @@ * Copyright (c) 2004-2006 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2007 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2007 Sun Microsystems, Inc. All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ /** @@ -39,7 +39,7 @@ BEGIN_C_DECLS -struct orte_iof_tool_component_t { +struct orte_iof_tool_component_t { orte_iof_base_component_t super; bool closed; }; diff --git a/orte/mca/iof/tool/iof_tool_component.c b/orte/mca/iof/tool/iof_tool_component.c index 73a26376e05..bead06b8c5d 100644 --- a/orte/mca/iof/tool/iof_tool_component.c +++ b/orte/mca/iof/tool/iof_tool_component.c @@ -6,7 +6,7 @@ * Copyright (c) 2004-2006 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. @@ -14,9 +14,9 @@ * Copyright (c) 2015 Los Alamos National Security, LLC. All rights * reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -51,7 +51,7 @@ orte_iof_tool_component_t mca_iof_tool_component = { { .iof_version = { ORTE_IOF_BASE_VERSION_2_0_0, - + .mca_component_name = "tool", MCA_BASE_MAKE_VERSION(component, ORTE_MAJOR_VERSION, ORTE_MINOR_VERSION, ORTE_RELEASE_VERSION), @@ -94,7 +94,7 @@ static int orte_iof_tool_query(mca_base_module_t **module, int *priority) *priority = 100; *module = (mca_base_module_t *) &orte_iof_tool_module; - + return ORTE_SUCCESS; } diff --git a/orte/mca/iof/tool/iof_tool_receive.c b/orte/mca/iof/tool/iof_tool_receive.c index 8ff709b5faa..5aa99d569ba 100644 --- a/orte/mca/iof/tool/iof_tool_receive.c +++ b/orte/mca/iof/tool/iof_tool_receive.c @@ -5,18 +5,18 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2007 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2011 Los Alamos National Security, LLC. All rights - * reserved. + * reserved. * Copyright (c) 2014 Intel Corporation. All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -27,9 +27,7 @@ #ifdef HAVE_UNISTD_H #include #endif /* HAVE_UNISTD_H */ -#ifdef HAVE_STRING_H #include -#endif /* HAVE_STRING_H */ #include "opal/dss/dss.h" @@ -54,15 +52,15 @@ void orte_iof_tool_recv(int status, orte_process_name_t* sender, orte_iof_tag_t stream; int32_t count, numbytes; int rc; - - + + /* unpack the stream first as this may be flow control info */ count = 1; if (ORTE_SUCCESS != (rc = opal_dss.unpack(buffer, &stream, &count, ORTE_IOF_TAG))) { ORTE_ERROR_LOG(rc); goto CLEAN_RETURN; } - + /* if this is a CLOSE tag, then ignore the rest - this is just the * tail end of a handshake to indicate we have closed a stream */ @@ -74,14 +72,14 @@ void orte_iof_tool_recv(int status, orte_process_name_t* sender, mca_iof_tool_component.closed = true; goto CLEAN_RETURN; } - + /* get name of the process whose io we are receiving */ count = 1; if (ORTE_SUCCESS != (rc = opal_dss.unpack(buffer, &origin, &count, ORTE_NAME))) { ORTE_ERROR_LOG(rc); goto CLEAN_RETURN; } - + /* unpack the data */ numbytes=ORTE_IOF_BASE_MSG_MAX; if (ORTE_SUCCESS != (rc = opal_dss.unpack(buffer, data, &numbytes, OPAL_BYTE))) { @@ -89,12 +87,12 @@ void orte_iof_tool_recv(int status, orte_process_name_t* sender, goto CLEAN_RETURN; } /* numbytes will contain the actual #bytes that were sent */ - + OPAL_OUTPUT_VERBOSE((1, orte_iof_base_framework.framework_output, "%s unpacked %d bytes from remote proc %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), numbytes, ORTE_NAME_PRINT(&origin))); - + /* if numbytes is zero, it means that the channel was closed on the far end - for * now, we just ignore this condition */ @@ -106,7 +104,7 @@ void orte_iof_tool_recv(int status, orte_process_name_t* sender, orte_iof_base_write_output(&origin, stream, data, numbytes, orte_iof_base.iof_write_stderr->wev); } } - + CLEAN_RETURN: return; } diff --git a/orte/mca/mca.h b/orte/mca/mca.h index dddc9f90e8c..a6615491a5d 100644 --- a/orte/mca/mca.h +++ b/orte/mca/mca.h @@ -6,7 +6,7 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. @@ -14,13 +14,13 @@ * Copyright (c) 2015 Los Alamos National Security, LLC. All rights * reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ -/** - * @file +/** + * @file * * Top-level interface for \em all orte MCA components. */ diff --git a/orte/mca/notifier/Makefile.am b/orte/mca/notifier/Makefile.am index 303a6bfd91f..52444ea1251 100644 --- a/orte/mca/notifier/Makefile.am +++ b/orte/mca/notifier/Makefile.am @@ -5,16 +5,16 @@ # Copyright (c) 2004-2005 The University of Tennessee and The University # of Tennessee Research Foundation. All rights # reserved. -# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, # University of Stuttgart. All rights reserved. # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. # Copyright (c) 2010 Cisco Systems, Inc. All rights reserved. # Copyright (c) 2014 Intel, Inc. All rights reserved. # $COPYRIGHT$ -# +# # Additional copyrights may follow -# +# # $HEADER$ # @@ -23,7 +23,7 @@ noinst_LTLIBRARIES = libmca_notifier.la libmca_notifier_la_SOURCES = # local files -headers = notifier.h +headers = notifier.h libmca_notifier_la_SOURCES += $(headers) diff --git a/orte/mca/notifier/base/Makefile.am b/orte/mca/notifier/base/Makefile.am index 877953d0829..89171605f59 100644 --- a/orte/mca/notifier/base/Makefile.am +++ b/orte/mca/notifier/base/Makefile.am @@ -19,7 +19,7 @@ # headers += \ - base/base.h + base/base.h libmca_notifier_la_SOURCES += \ base/notifier_base_frame.c \ diff --git a/orte/mca/notifier/base/notifier_base_fns.c b/orte/mca/notifier/base/notifier_base_fns.c index 58bb087ec06..61e139807ff 100644 --- a/orte/mca/notifier/base/notifier_base_fns.c +++ b/orte/mca/notifier/base/notifier_base_fns.c @@ -28,7 +28,7 @@ #include "orte/mca/notifier/base/base.h" -static void orte_notifier_base_identify_modules(char ***modules, +static void orte_notifier_base_identify_modules(char ***modules, orte_notifier_request_t *req); void orte_notifier_base_log(int sd, short args, void *cbdata) @@ -37,7 +37,7 @@ void orte_notifier_base_log(int sd, short args, void *cbdata) char **modules = NULL; orte_notifier_active_module_t *imod; int i; - + /* if no modules are active, then there is nothing to do */ if (0 == opal_list_get_size(&orte_notifier_base.modules)) { return; @@ -73,7 +73,7 @@ void orte_notifier_base_event(int sd, short args, void *cbdata) char **modules = NULL; orte_notifier_active_module_t *imod; int i; - + /* if no modules are active, then there is nothing to do */ if (0 == opal_list_get_size(&orte_notifier_base.modules)) { return; @@ -109,7 +109,7 @@ void orte_notifier_base_report(int sd, short args, void *cbdata) char **modules = NULL; orte_notifier_active_module_t *imod; int i; - + /* if no modules are active, then there is nothing to do */ if (0 == opal_list_get_size(&orte_notifier_base.modules)) { return; @@ -155,34 +155,34 @@ const char* orte_notifier_base_sev2str(orte_notifier_severity_t severity) } } -static void orte_notifier_base_identify_modules(char ***modules, +static void orte_notifier_base_identify_modules(char ***modules, orte_notifier_request_t *req) { if (NULL != req->action) { *modules = opal_argv_split(req->action, ','); } else { - if (ORTE_NOTIFIER_EMERG == req->severity && + if (ORTE_NOTIFIER_EMERG == req->severity && (NULL != orte_notifier_base.emerg_actions)) { *modules = opal_argv_split(orte_notifier_base.emerg_actions, ','); - } else if (ORTE_NOTIFIER_ALERT == req->severity && + } else if (ORTE_NOTIFIER_ALERT == req->severity && (NULL != orte_notifier_base.alert_actions)) { *modules = opal_argv_split(orte_notifier_base.alert_actions, ','); - } else if (ORTE_NOTIFIER_CRIT == req->severity && + } else if (ORTE_NOTIFIER_CRIT == req->severity && (NULL != orte_notifier_base.crit_actions)) { *modules = opal_argv_split(orte_notifier_base.crit_actions, ','); - } else if (ORTE_NOTIFIER_WARN == req->severity && + } else if (ORTE_NOTIFIER_WARN == req->severity && (NULL != orte_notifier_base.warn_actions)) { *modules = opal_argv_split(orte_notifier_base.warn_actions, ','); - } else if (ORTE_NOTIFIER_NOTICE == req->severity && + } else if (ORTE_NOTIFIER_NOTICE == req->severity && (NULL != orte_notifier_base.notice_actions)) { *modules = opal_argv_split(orte_notifier_base.notice_actions, ','); - } else if (ORTE_NOTIFIER_INFO == req->severity && + } else if (ORTE_NOTIFIER_INFO == req->severity && (NULL != orte_notifier_base.info_actions)) { *modules = opal_argv_split(orte_notifier_base.info_actions, ','); - } else if (ORTE_NOTIFIER_DEBUG == req->severity && + } else if (ORTE_NOTIFIER_DEBUG == req->severity && (NULL != orte_notifier_base.debug_actions)) { - *modules = opal_argv_split(orte_notifier_base.debug_actions, ','); - } else if (ORTE_NOTIFIER_ERROR == req->severity && + *modules = opal_argv_split(orte_notifier_base.debug_actions, ','); + } else if (ORTE_NOTIFIER_ERROR == req->severity && (NULL != orte_notifier_base.error_actions)) { *modules = opal_argv_split(orte_notifier_base.error_actions, ','); } else if (NULL != orte_notifier_base.default_actions) { diff --git a/orte/mca/notifier/base/notifier_base_frame.c b/orte/mca/notifier/base/notifier_base_frame.c index 23a459f86a7..207998c4738 100644 --- a/orte/mca/notifier/base/notifier_base_frame.c +++ b/orte/mca/notifier/base/notifier_base_frame.c @@ -9,7 +9,7 @@ * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. - * Copyright (c) 2008-2009 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2008-2015 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2014 Intel, Inc. All rights reserved. * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. @@ -24,9 +24,7 @@ #include "orte_config.h" #include "orte/constants.h" -#ifdef HAVE_STRING_H #include -#endif #include "orte/mca/mca.h" #include "opal/util/argv.h" @@ -114,7 +112,7 @@ static int orte_notifier_base_register(mca_base_register_flag_t flags) &orte_notifier_base.default_actions); if (NULL == orte_notifier_base.default_actions) { - orte_notifier_base.default_actions = strdup(ORTE_NOTIFIER_DEFAULT_MODULE); + orte_notifier_base.default_actions = strdup(ORTE_NOTIFIER_DEFAULT_MODULE); } /* let the user define a action for emergency events */ orte_notifier_base.emerg_actions = NULL; @@ -189,7 +187,7 @@ static int orte_notifier_base_register(mca_base_register_flag_t flags) &orte_notifier_base.error_actions); return ORTE_SUCCESS; -} +} static int orte_notifier_base_close(void) { @@ -197,7 +195,7 @@ static int orte_notifier_base_close(void) if (orte_notifier_base.ev_base_active) { orte_notifier_base.ev_base_active = false; - opal_stop_progress_thread("notifier", true); + opal_progress_thread_finalize("notifier"); } OPAL_LIST_FOREACH(i_module, &orte_notifier_base.modules, orte_notifier_active_module_t) { @@ -206,7 +204,7 @@ static int orte_notifier_base_close(void) } } OPAL_LIST_DESTRUCT(&orte_notifier_base.modules); - + /* close all remaining available components */ return mca_base_framework_components_close(&orte_notifier_base_framework, NULL); } @@ -218,15 +216,15 @@ static int orte_notifier_base_close(void) static int orte_notifier_base_open(mca_base_open_flag_t flags) { int rc; - + /* construct the array of modules */ OBJ_CONSTRUCT(&orte_notifier_base.modules, opal_list_t); /* if requested, create our own event base */ if (use_progress_thread) { orte_notifier_base.ev_base_active = true; - if (NULL == (orte_notifier_base.ev_base = - opal_start_progress_thread("notifier", true))) { + if (NULL == (orte_notifier_base.ev_base = + opal_progress_thread_init("notifier"))) { orte_notifier_base.ev_base_active = false; return ORTE_ERROR; } diff --git a/orte/mca/notifier/base/notifier_base_select.c b/orte/mca/notifier/base/notifier_base_select.c index a0b1ef7addb..cdd9142ff2e 100644 --- a/orte/mca/notifier/base/notifier_base_select.c +++ b/orte/mca/notifier/base/notifier_base_select.c @@ -5,25 +5,23 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2009 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2014-2015 Intel, Inc. All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ #include "orte_config.h" -#ifdef HAVE_STRING_H #include -#endif #include "orte/mca/mca.h" #include "opal/mca/base/base.h" @@ -40,12 +38,12 @@ */ static bool orte_notifier_base_selected = false; -/** +/** * Function for weeding out notifier components that don't want to run. * - * Call the init function on all available compoenent to find out if - * they want to run. Select all components that don't fail. Failing - * Components will be closed and unloaded. The selected modules will + * Call the init function on all available compoenent to find out if + * they want to run. Select all components that don't fail. Failing + * Components will be closed and unloaded. The selected modules will * be returned to the called in a opal_list_t. */ @@ -102,7 +100,7 @@ int orte_notifier_base_select(void) continue; } bmod = (orte_notifier_base_module_t*)module; - + /* see if it can be init'd */ if (NULL != bmod->init) { opal_output_verbose(5, orte_notifier_base_framework.framework_output, @@ -116,7 +114,7 @@ int orte_notifier_base_select(void) * Append them to the list */ opal_output_verbose(5, orte_notifier_base_framework.framework_output, - "notifier:base:select adding component [%s]", + "notifier:base:select adding component [%s]", component->base_version.mca_component_name); tmp_module = OBJ_NEW(orte_notifier_active_module_t); tmp_module->component = component; diff --git a/orte/mca/notifier/notifier.h b/orte/mca/notifier/notifier.h index 0d448f2f2f6..cc40297c574 100644 --- a/orte/mca/notifier/notifier.h +++ b/orte/mca/notifier/notifier.h @@ -39,12 +39,8 @@ #include "orte_config.h" -#ifdef HAVE_STDARG_H #include -#endif -#ifdef HAVE_LIMITS_H #include -#endif #ifdef HAVE_SYSLOG_H #include #endif @@ -100,7 +96,7 @@ OBJ_CLASS_DECLARATION(orte_notifier_request_t); /* initialize the selected module */ typedef int (*orte_notifier_base_module_init_fn_t)(void); - + /* finalize the selected module */ typedef void (*orte_notifier_base_module_finalize_fn_t)(void); diff --git a/orte/mca/notifier/smtp/Makefile.am b/orte/mca/notifier/smtp/Makefile.am index 336fd4603cc..a25ff64b55e 100644 --- a/orte/mca/notifier/smtp/Makefile.am +++ b/orte/mca/notifier/smtp/Makefile.am @@ -5,16 +5,16 @@ # Copyright (c) 2004-2005 The University of Tennessee and The University # of Tennessee Research Foundation. All rights # reserved. -# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, # University of Stuttgart. All rights reserved. # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. # Copyright (c) 2009-2010 Cisco Systems, Inc. All rights reserved. # Copyright (c) 2014 Intel, Inc. All rights reserved. # $COPYRIGHT$ -# +# # Additional copyrights may follow -# +# # $HEADER$ # diff --git a/orte/mca/notifier/smtp/configure.m4 b/orte/mca/notifier/smtp/configure.m4 index 0ee0f956fb3..a4a4771050a 100644 --- a/orte/mca/notifier/smtp/configure.m4 +++ b/orte/mca/notifier/smtp/configure.m4 @@ -6,16 +6,16 @@ # Copyright (c) 2004-2005 The University of Tennessee and The University # of Tennessee Research Foundation. All rights # reserved. -# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, # University of Stuttgart. All rights reserved. # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. # Copyright (c) 2009-2010 Cisco Systems, Inc. All rights reserved. # Copyright (c) 2014 Intel, Inc. All rights reserved. # $COPYRIGHT$ -# +# # Additional copyrights may follow -# +# # $HEADER$ # diff --git a/orte/mca/notifier/smtp/help-orte-notifier-smtp.txt b/orte/mca/notifier/smtp/help-orte-notifier-smtp.txt index b03438387cc..58b06bc81bb 100644 --- a/orte/mca/notifier/smtp/help-orte-notifier-smtp.txt +++ b/orte/mca/notifier/smtp/help-orte-notifier-smtp.txt @@ -2,9 +2,9 @@ # # Copyright (c) 2009 Cisco Systems, Inc. All rights reserved. # $COPYRIGHT$ -# +# # Additional copyrights may follow -# +# # $HEADER$ # # This is the US/English help file for Open MPI's SMTP notifier support diff --git a/orte/mca/notifier/smtp/notifier_smtp.h b/orte/mca/notifier/smtp/notifier_smtp.h index 0d381450dc8..56732541864 100644 --- a/orte/mca/notifier/smtp/notifier_smtp.h +++ b/orte/mca/notifier/smtp/notifier_smtp.h @@ -1,21 +1,21 @@ /* -*- C -*- - * + * * Copyright (c) 2004-2008 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. * Copyright (c) 2004-2006 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2009 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2014 Intel, Inc. All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ * */ @@ -59,7 +59,7 @@ typedef struct { /* * Notifier interfaces */ -ORTE_MODULE_DECLSPEC extern orte_notifier_smtp_component_t +ORTE_MODULE_DECLSPEC extern orte_notifier_smtp_component_t mca_notifier_smtp_component; extern orte_notifier_base_module_t orte_notifier_smtp_module; diff --git a/orte/mca/notifier/smtp/notifier_smtp_component.c b/orte/mca/notifier/smtp/notifier_smtp_component.c index 6a021847d1c..ee37eda593a 100644 --- a/orte/mca/notifier/smtp/notifier_smtp_component.c +++ b/orte/mca/notifier/smtp/notifier_smtp_component.c @@ -155,7 +155,7 @@ static int smtp_close(void) return ORTE_SUCCESS; } -static int smtp_component_query(mca_base_module_t **module, +static int smtp_component_query(mca_base_module_t **module, int *priority) { *priority = 0; @@ -166,7 +166,7 @@ static int smtp_component_query(mca_base_module_t **module, '\0' == mca_notifier_smtp_component.to[0] || NULL == mca_notifier_smtp_component.from_addr || '\0' == mca_notifier_smtp_component.from_addr[0]) { - orte_show_help("help-orte-notifier-smtp.txt", + orte_show_help("help-orte-notifier-smtp.txt", "to/from not specified", true); return ORTE_ERR_NOT_FOUND; } @@ -174,7 +174,7 @@ static int smtp_component_query(mca_base_module_t **module, /* Sanity checks */ if (NULL == mca_notifier_smtp_component.server || '\0' == mca_notifier_smtp_component.server[0]) { - orte_show_help("help-orte-notifier-smtp.txt", + orte_show_help("help-orte-notifier-smtp.txt", "server not specified", true); return ORTE_ERR_NOT_FOUND; } @@ -185,7 +185,7 @@ static int smtp_component_query(mca_base_module_t **module, mca_notifier_smtp_component.server_hostent = gethostbyname(mca_notifier_smtp_component.server); if (NULL == mca_notifier_smtp_component.server_hostent) { - orte_show_help("help-orte-notifier-smtp.txt", + orte_show_help("help-orte-notifier-smtp.txt", "unable to resolve server", true, mca_notifier_smtp_component.server); return ORTE_ERR_NOT_FOUND; @@ -193,5 +193,5 @@ static int smtp_component_query(mca_base_module_t **module, *priority = 10; *module = (mca_base_module_t *)&orte_notifier_smtp_module; - return ORTE_SUCCESS; + return ORTE_SUCCESS; } diff --git a/orte/mca/notifier/smtp/notifier_smtp_module.c b/orte/mca/notifier/smtp/notifier_smtp_module.c index 1e1a821b18f..53a035fe881 100644 --- a/orte/mca/notifier/smtp/notifier_smtp_module.c +++ b/orte/mca/notifier/smtp/notifier_smtp_module.c @@ -27,15 +27,11 @@ #include #include -#ifdef HAVE_STDARG_H #include -#endif #ifdef HAVE_UNISTD_H #include #endif -#ifdef HAVE_SIGNAL_H #include -#endif #include "opal/util/show_help.h" #include "opal/util/argv.h" @@ -52,12 +48,12 @@ /* Static API's */ -static void mylog(orte_notifier_base_severity_t severity, int errcode, +static void mylog(orte_notifier_base_severity_t severity, int errcode, const char *msg, va_list ap); -static void myhelplog(orte_notifier_base_severity_t severity, int errcode, - const char *filename, +static void myhelplog(orte_notifier_base_severity_t severity, int errcode, + const char *filename, const char *topic, va_list ap); -static void mypeerlog(orte_notifier_base_severity_t severity, int errcode, +static void mypeerlog(orte_notifier_base_severity_t severity, int errcode, orte_process_name_t *peer_proc, const char *msg, va_list ap); @@ -205,7 +201,7 @@ static int send_email(char *msg) sig.sa_handler = SIG_IGN; sigemptyset(&sig.sa_mask); sig.sa_flags = 0; - sigaction(SIGPIPE, &sig, &oldsig); + sigaction(SIGPIPE, &sig, &oldsig); set_oldsig = true; /* Try to get a libesmtp session. If so, assume that libesmtp is @@ -250,8 +246,8 @@ static int send_email(char *msg) if (0 == smtp_set_header(message, "Subject", c->subject) || 0 == smtp_set_header_option(message, "Subject", Hdr_OVERRIDE, 1) || 0 == smtp_set_header(message, "To", NULL, NULL) || - 0 == smtp_set_header(message, "From", - (NULL != c->from_name ? + 0 == smtp_set_header(message, "From", + (NULL != c->from_name ? c->from_name : c->from_addr), c->from_addr) || 0 == smtp_set_header(message, "X-Mailer", str) || @@ -305,15 +301,15 @@ static int send_email(char *msg) e = smtp_errno(); smtp_strerror(e, em, sizeof(em)); - orte_show_help("help-orte-notifier-smtp.txt", + orte_show_help("help-orte-notifier-smtp.txt", "send_email failed", - true, "libesmtp library call failed", + true, "libesmtp library call failed", errmsg, em, e, msg); } return err; } -static void mylog(orte_notifier_base_severity_t severity, int errcode, +static void mylog(orte_notifier_base_severity_t severity, int errcode, const char *msg, va_list ap) { char *output; diff --git a/orte/mca/notifier/syslog/Makefile.am b/orte/mca/notifier/syslog/Makefile.am index f495ed5fc92..da3d62be059 100644 --- a/orte/mca/notifier/syslog/Makefile.am +++ b/orte/mca/notifier/syslog/Makefile.am @@ -5,16 +5,16 @@ # Copyright (c) 2004-2005 The University of Tennessee and The University # of Tennessee Research Foundation. All rights # reserved. -# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, # University of Stuttgart. All rights reserved. # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. # Copyright (c) 2010 Cisco Systems, Inc. All rights reserved. # Copyright (c) 2014 Intel, Inc. All rights reserved. # $COPYRIGHT$ -# +# # Additional copyrights may follow -# +# # $HEADER$ # diff --git a/orte/mca/notifier/syslog/notifier_syslog.h b/orte/mca/notifier/syslog/notifier_syslog.h index a83da1d0354..a78bb915b78 100644 --- a/orte/mca/notifier/syslog/notifier_syslog.h +++ b/orte/mca/notifier/syslog/notifier_syslog.h @@ -1,21 +1,21 @@ /* -*- C -*- - * + * * Copyright (c) 2004-2008 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. * Copyright (c) 2004-2006 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2009 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2014 Intel, Inc. All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ * */ diff --git a/orte/mca/notifier/syslog/notifier_syslog_component.c b/orte/mca/notifier/syslog/notifier_syslog_component.c index 5b52c3493ca..99085338166 100644 --- a/orte/mca/notifier/syslog/notifier_syslog_component.c +++ b/orte/mca/notifier/syslog/notifier_syslog_component.c @@ -30,7 +30,7 @@ #include "notifier_syslog.h" -static int orte_notifier_syslog_component_query(mca_base_module_t **module, +static int orte_notifier_syslog_component_query(mca_base_module_t **module, int *priority); /* @@ -51,10 +51,10 @@ orte_notifier_base_component_t mca_notifier_syslog_component = { }, }; -static int orte_notifier_syslog_component_query(mca_base_module_t **module, +static int orte_notifier_syslog_component_query(mca_base_module_t **module, int *priority) { *priority = 1; *module = (mca_base_module_t *)&orte_notifier_syslog_module; - return ORTE_SUCCESS; + return ORTE_SUCCESS; } diff --git a/orte/mca/notifier/syslog/notifier_syslog_module.c b/orte/mca/notifier/syslog/notifier_syslog_module.c index 5bbd1917dae..a8121685a33 100644 --- a/orte/mca/notifier/syslog/notifier_syslog_module.c +++ b/orte/mca/notifier/syslog/notifier_syslog_module.c @@ -28,9 +28,7 @@ #ifdef HAVE_SYSLOG_H #include #endif -#ifdef HAVE_STDARG_H #include -#endif #include "opal/util/show_help.h" @@ -58,17 +56,17 @@ orte_notifier_base_module_t orte_notifier_syslog_module = { }; -static int init(void) +static int init(void) { int opts; - + opts = LOG_CONS | LOG_PID; openlog("OpenRTE Error Report:", opts, LOG_USER); - + return ORTE_SUCCESS; } -static void finalize(void) +static void finalize(void) { closelog(); } diff --git a/orte/mca/odls/Makefile.am b/orte/mca/odls/Makefile.am index 2a6376b44a6..df37a515ba4 100644 --- a/orte/mca/odls/Makefile.am +++ b/orte/mca/odls/Makefile.am @@ -5,15 +5,15 @@ # Copyright (c) 2004-2005 The University of Tennessee and The University # of Tennessee Research Foundation. All rights # reserved. -# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, # University of Stuttgart. All rights reserved. # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. # Copyright (c) 2010 Cisco Systems, Inc. All rights reserved. # $COPYRIGHT$ -# +# # Additional copyrights may follow -# +# # $HEADER$ # diff --git a/orte/mca/odls/alps/Makefile.am b/orte/mca/odls/alps/Makefile.am index 145f5a384ea..6087e915168 100644 --- a/orte/mca/odls/alps/Makefile.am +++ b/orte/mca/odls/alps/Makefile.am @@ -5,7 +5,7 @@ # Copyright (c) 2004-2005 The University of Tennessee and The University # of Tennessee Research Foundation. All rights # reserved. -# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, # University of Stuttgart. All rights reserved. # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. @@ -13,9 +13,9 @@ # Copyright (c) 2014 Los Alamos National Security, LLC. All rights # reserved. # $COPYRIGHT$ -# +# # Additional copyrights may follow -# +# # $HEADER$ # @@ -25,7 +25,7 @@ sources = \ odls_alps.h \ odls_alps_component.c \ odls_alps_module.c \ - odls_alps_utils.c + odls_alps_utils.c # Make the output library in this directory, and name it either # mca__.la (for DSO builds) or libmca__.la @@ -42,7 +42,7 @@ endif mcacomponentdir = $(ortelibdir) mcacomponent_LTLIBRARIES = $(component_install) mca_odls_alps_la_SOURCES = $(sources) -mca_odls_alps_la_CPPFLAGS = $(odls_alps_CPPFLAGS) +mca_odls_alps_la_CPPFLAGS = $(odls_alps_CPPFLAGS) mca_odls_alps_la_LDFLAGS = -module -avoid-version $(odls_alps_LDFLAGS) mca_odls_alps_la_LIBADD = $(odls_alps_LIBS) \ $(ORTE_TOP_BUILDDIR)/orte/mca/common/alps/lib@ORTE_LIB_PREFIX@mca_common_alps.la diff --git a/orte/mca/odls/alps/configure.m4 b/orte/mca/odls/alps/configure.m4 index b8b7bad20f0..7febd842e3b 100644 --- a/orte/mca/odls/alps/configure.m4 +++ b/orte/mca/odls/alps/configure.m4 @@ -6,7 +6,7 @@ # Copyright (c) 2004-2005 The University of Tennessee and The University # of Tennessee Research Foundation. All rights # reserved. -# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, # University of Stuttgart. All rights reserved. # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. @@ -14,9 +14,9 @@ # All rights reserved. # Copyright (c) 2010 Cisco Systems, Inc. All rights reserved. # $COPYRIGHT$ -# +# # Additional copyrights may follow -# +# # $HEADER$ # @@ -27,11 +27,11 @@ AC_DEFUN([MCA_orte_odls_alps_CONFIG],[ ORTE_CHECK_ALPS([odls_alps], [odls_alps_happy="yes"], [odls_alps_happy="no"]) - AS_IF([test "$odls_alps_happy" = "yes"], + AS_IF([test "$odls_alps_happy" = "yes"], [$1 AC_SUBST([odls_alps_CPPFLAGS]) AC_SUBST([odls_alps_LDFLAGS]) - AC_SUBST([odls_alps_LIBS])], + AC_SUBST([odls_alps_LIBS])], [$2]) ])dnl diff --git a/orte/mca/odls/alps/help-orte-odls-alps.txt b/orte/mca/odls/alps/help-orte-odls-alps.txt index ae82e0f4464..a248ab22800 100644 --- a/orte/mca/odls/alps/help-orte-odls-alps.txt +++ b/orte/mca/odls/alps/help-orte-odls-alps.txt @@ -6,16 +6,16 @@ # Copyright (c) 2004-2005 The University of Tennessee and The University # of Tennessee Research Foundation. All rights # reserved. -# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, # University of Stuttgart. All rights reserved. # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. # Copyright (c) 2009 Sun Microsystems, Inc. All rights reserved. # Copyright (c) 2010-2011 Cisco Systems, Inc. All rights reserved. # $COPYRIGHT$ -# +# # Additional copyrights may follow -# +# # $HEADER$ # # This is a US/English help file. @@ -144,4 +144,9 @@ WARNING: Open MPI call to Alps alps_app_lli_pipes function failed. Application name: %s Location: %s:%d Return Value: %d - +# +[close fds] +WARNING: Closing fds in child failed. + Local host: %s + Application name: %s + Location: %s:%d diff --git a/orte/mca/odls/alps/odls_alps_component.c b/orte/mca/odls/alps/odls_alps_component.c index ee187c9704a..d9988e05d0b 100644 --- a/orte/mca/odls/alps/odls_alps_component.c +++ b/orte/mca/odls/alps/odls_alps_component.c @@ -6,16 +6,16 @@ * Copyright (c) 2004-2006 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2014-2015 Los Alamos National Security, LLC. All rights * reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ * * These symbols are in a file by themselves to provide nice linker diff --git a/orte/mca/odls/alps/odls_alps_module.c b/orte/mca/odls/alps/odls_alps_module.c index 83611fc5e17..83f7a52dfc9 100644 --- a/orte/mca/odls/alps/odls_alps_module.c +++ b/orte/mca/odls/alps/odls_alps_module.c @@ -11,10 +11,10 @@ * All rights reserved. * Copyright (c) 2007-2010 Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2007 Evergrid, Inc. All rights reserved. - * Copyright (c) 2008-2013 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2008-2017 Cisco Systems, Inc. All rights reserved * Copyright (c) 2010 IBM Corporation. All rights reserved. * Copyright (c) 2011-2014 Los Alamos National Security, LLC. All rights - * reserved. + * reserved. * Copyright (c) 2013-2014 Intel, Inc. All rights reserved * * $COPYRIGHT$ @@ -49,7 +49,7 @@ * - the child tries to set affinity and do other housekeeping in * preparation of exec'ing the target executable * - if the child fails anywhere along the way, it sends a message up - * the pipe to the parent indicating what happened -- including a + * the pipe to the parent indicating what happened -- including a * rendered error message detailing the problem (i.e., human-readable). * - it is important that the child renders the error message: there * are so many errors that are possible that the child is really the @@ -68,9 +68,7 @@ #include "orte/constants.h" #include "orte/types.h" -#ifdef HAVE_STRING_H #include -#endif #include #ifdef HAVE_UNISTD_H #include @@ -95,20 +93,20 @@ #ifdef HAVE_NETDB_H #include #endif -#ifdef HAVE_STDLIB_H #include -#endif #ifdef HAVE_SYS_STAT_H #include #endif /* HAVE_SYS_STAT_H */ -#ifdef HAVE_STDARG_H #include -#endif #ifdef HAVE_SYS_SELECT_H #include #endif +#ifdef HAVE_DIRENT_H +#include +#endif + -#include "opal/mca/hwloc/hwloc.h" +#include "opal/mca/hwloc/hwloc-internal.h" #include "opal/mca/hwloc/base/base.h" #include "opal/class/opal_pointer_array.h" #include "opal/util/opal_environ.h" @@ -143,7 +141,7 @@ static int orte_odls_alps_restart_proc(orte_proc_t *child); * Explicitly declared functions so that we can get the noreturn * attribute registered with the compiler. */ -static void send_error_show_help(int fd, int exit_status, +static void send_error_show_help(int fd, int exit_status, const char *file, const char *topic, ...) __opal_attribute_noreturn__; static int do_child(orte_app_context_t* context, @@ -171,7 +169,7 @@ static bool odls_alps_child_died(orte_proc_t *child) { time_t end; pid_t ret; - + /* Because of rounding in time (which returns whole seconds) we * have to add 1 to our wait number: this means that we wait * somewhere between (target) and (target)+1 seconds. Otherwise, @@ -216,7 +214,7 @@ static bool odls_alps_child_died(orte_proc_t *child) ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), (int)(child->pid))); return true; } - + /* Bogus delay for 1 msec - let's actually give the CPU some time * to quit the other process (sched_yield() -- even if we have it * -- changed behavior in 2.6.3x Linux flavors to be undesirable) @@ -265,7 +263,7 @@ static int odls_alps_kill_local(pid_t pid, int signum) int orte_odls_alps_kill_local_procs(opal_pointer_array_t *procs) { int rc; - + if (ORTE_SUCCESS != (rc = orte_odls_base_default_kill_local_procs(procs, odls_alps_kill_local, odls_alps_child_died))) { ORTE_ERROR_LOG(rc); @@ -356,6 +354,59 @@ static void send_error_show_help(int fd, int exit_status, exit(exit_status); } +static int close_open_file_descriptors(int write_fd, orte_iof_base_io_conf_t opts) +{ + int rc, fd; + DIR *dir = NULL; + struct dirent *files; + int app_alps_filedes[2], alps_app_filedes[2]; + + dir = opendir("/proc/self/fd"); + if (NULL == dir) { + return ORTE_ERR_FILE_OPEN_FAILURE; + } + + /* close all file descriptors w/ exception of stdin/stdout/stderr, + the pipe used for the IOF INTERNAL messages, and the pipe up to + the parent. Be careful to retain all of the pipe fd's set up + by the apshephered. These are needed for obtaining RDMA credentials, + synchronizing with aprun, etc. */ + + rc = alps_app_lli_pipes(app_alps_filedes,alps_app_filedes); + if (0 != rc) { + closedir(dir); + return ORTE_ERR_FILE_OPEN_FAILURE; + } + + while ((files = readdir(dir)) != NULL) { + if(!strncmp(files->d_name,".",1) || !strncmp(files->d_name,"..",2)) continue; + + fd = strtoul(files->d_name, NULL, 10); + if (EINVAL == errno || ERANGE == errno) { + closedir(dir); + return ORTE_ERR_TYPE_MISMATCH; + } + + /* + * skip over the pipes we have open to apshepherd or slurmd + */ + + if (fd == XTAPI_FD_IDENTITY) continue; + if (fd == XTAPI_FD_RESILIENCY) continue; + if ((fd == app_alps_filedes[0]) || + (fd == app_alps_filedes[1]) || + (fd == alps_app_filedes[0]) || + (fd == alps_app_filedes[1])) continue; + + if (fd >=3 && fd != opts.p_internal[1] && fd != write_fd) { + close(fd); + } + } + + closedir(dir); + return ORTE_SUCCESS; +} + static int do_child(orte_app_context_t* context, orte_proc_t *child, char **environ_copy, @@ -363,9 +414,7 @@ static int do_child(orte_app_context_t* context, orte_iof_base_io_conf_t opts) { int i, rc; - int app_alps_filedes[2],alps_app_filedes[2]; sigset_t sigs; - long fd, fdmax = sysconf(_SC_OPEN_MAX); char *param, *msg; if (orte_forward_job_control) { @@ -374,14 +423,14 @@ static int do_child(orte_app_context_t* context, orted. */ setpgid(0, 0); } - + /* Setup the pipe to be close-on-exec */ opal_fd_set_cloexec(write_fd); if (NULL != child) { /* setup stdout/stderr so that any error messages that we may print out will get displayed back at orterun. - + NOTE: Definitely do this AFTER we check contexts so that any error message from those two functions doesn't come out to the user. IF we didn't do it in this order, @@ -392,11 +441,11 @@ static int do_child(orte_app_context_t* context, always outputs a nice, single message indicating what happened */ - if (ORTE_SUCCESS != (i = orte_iof_base_setup_child(&opts, + if (ORTE_SUCCESS != (i = orte_iof_base_setup_child(&opts, &environ_copy))) { ORTE_ERROR_LOG(i); - send_error_show_help(write_fd, 1, - "help-orte-odls-alps.txt", + send_error_show_help(write_fd, 1, + "help-orte-odls-alps.txt", "iof setup failed", orte_process_info.nodename, context->app); /* Does not return */ @@ -426,7 +475,7 @@ static int do_child(orte_app_context_t* context, if (OPAL_SUCCESS != (rc = opal_util_init_sys_limits(&msg))) { send_error_show_help(write_fd, 1, "help-orte-odls-alps.txt", "set limit", - orte_process_info.nodename, context->app, + orte_process_info.nodename, context->app, __FILE__, __LINE__, msg); } /* ensure we only do this once */ @@ -434,61 +483,42 @@ static int do_child(orte_app_context_t* context, opal_unsetenv(param, &environ_copy); free(param); - /* close all file descriptors w/ exception of stdin/stdout/stderr, - the pipe used for the IOF INTERNAL messages, and the pipe up to - the parent. Be careful to retain all of the pipe fd's set up - by the apshephered. These are needed for obtaining RDMA credentials, - synchronizing with aprun, etc. */ - - rc = alps_app_lli_pipes(app_alps_filedes,alps_app_filedes); - if (0 != rc) { + if (ORTE_SUCCESS != close_open_file_descriptors(write_fd, opts)) { send_error_show_help(write_fd, 1, "help-orte-odls-alps.txt", - "alps_app_lli_pipes", + "close fds", orte_process_info.nodename, context->app, - __FILE__, __LINE__, rc); + __FILE__, __LINE__); } - for(fd=3; fdargv == NULL) { context->argv = malloc(sizeof(char*)*2); context->argv[0] = strdup(context->app); context->argv[1] = NULL; } - + /* Set signal handlers back to the default. Do this close to the exev() because the event library may (and likely will) reset them. If we don't do this, the event library may have left some set that, at least on some OS's, don't get reset via fork() or exec(). Hence, the launched process could be unkillable (for example). */ - + set_handler_alps(SIGTERM); set_handler_alps(SIGINT); set_handler_alps(SIGHUP); set_handler_alps(SIGPIPE); set_handler_alps(SIGCHLD); - + /* Unblock all signals, for many of the same reasons that we set the default handlers, above. This is noticable on Linux where the event library blocks SIGTERM, but we don't want that blocked by the launched process. */ sigprocmask(0, 0, &sigs); sigprocmask(SIG_UNBLOCK, &sigs, 0); - + /* Exec the new executable */ - + if (10 < opal_output_get_verbosity(orte_odls_base_framework.framework_output)) { int jout; opal_output(0, "%s STARTING %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), context->app); @@ -499,9 +529,9 @@ static int do_child(orte_app_context_t* context, opal_output(0, "%s\tENVIRON[%d]: %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), jout, environ_copy[jout]); } } - + execve(context->app, context->argv, environ_copy); - send_error_show_help(write_fd, 1, + send_error_show_help(write_fd, 1, "help-orte-odls-alps.txt", "execve error", orte_process_info.nodename, context->app, strerror(errno)); /* Does not return */ @@ -540,12 +570,12 @@ static int do_parent(orte_app_context_t* context, if (OPAL_ERR_TIMEOUT == rc) { break; } - + /* If Something Bad happened in the read, error out */ if (OPAL_SUCCESS != rc) { ORTE_ERROR_LOG(rc); close(read_fd); - + if (NULL != child) { child->state = ORTE_PROC_STATE_UNDEF; } @@ -565,7 +595,7 @@ static int do_parent(orte_app_context_t* context, if (msg.file_str_len > 0) { rc = opal_fd_read(read_fd, msg.file_str_len, file); if (OPAL_SUCCESS != rc) { - orte_show_help("help-orte-odls-alps.txt", "syscall fail", + orte_show_help("help-orte-odls-alps.txt", "syscall fail", true, orte_process_info.nodename, context->app, "opal_fd_read", __FILE__, __LINE__); @@ -579,7 +609,7 @@ static int do_parent(orte_app_context_t* context, if (msg.topic_str_len > 0) { rc = opal_fd_read(read_fd, msg.topic_str_len, topic); if (OPAL_SUCCESS != rc) { - orte_show_help("help-orte-odls-alps.txt", "syscall fail", + orte_show_help("help-orte-odls-alps.txt", "syscall fail", true, orte_process_info.nodename, context->app, "opal_fd_read", __FILE__, __LINE__); @@ -593,7 +623,7 @@ static int do_parent(orte_app_context_t* context, if (msg.msg_str_len > 0) { str = calloc(1, msg.msg_str_len + 1); if (NULL == str) { - orte_show_help("help-orte-odls-alps.txt", "syscall fail", + orte_show_help("help-orte-odls-alps.txt", "syscall fail", true, orte_process_info.nodename, context->app, "opal_fd_read", __FILE__, __LINE__); @@ -636,7 +666,7 @@ static int do_parent(orte_app_context_t* context, ORTE_FLAG_SET(child, ORTE_PROC_FLAG_ALIVE); } close(read_fd); - + return ORTE_SUCCESS; } @@ -652,12 +682,12 @@ static int odls_alps_fork_local_proc(orte_app_context_t* context, orte_iof_base_io_conf_t opts; int rc, p[2]; pid_t pid; - + if (NULL != child) { /* should pull this information from MPIRUN instead of going with default */ opts.usepty = OPAL_ENABLE_PTY_SUPPORT; - + /* do we want to setup stdin? */ if (NULL != child && (jobdat->stdin_target == ORTE_VPID_WILDCARD || @@ -666,7 +696,7 @@ static int odls_alps_fork_local_proc(orte_app_context_t* context, } else { opts.connect_stdin = false; } - + if (ORTE_SUCCESS != (rc = orte_iof_base_setup_prefork(&opts))) { ORTE_ERROR_LOG(rc); if (NULL != child) { @@ -693,13 +723,13 @@ static int odls_alps_fork_local_proc(orte_app_context_t* context, } return ORTE_ERR_SYS_LIMITS_PIPES; } - + /* Fork off the child */ pid = fork(); if (NULL != child) { child->pid = pid; } - + if (pid < 0) { ORTE_ERROR_LOG(ORTE_ERR_SYS_LIMITS_CHILDREN); if (NULL != child) { @@ -708,7 +738,7 @@ static int odls_alps_fork_local_proc(orte_app_context_t* context, } return ORTE_ERR_SYS_LIMITS_CHILDREN; } - + if (pid == 0) { close(p[0]); #if HAVE_SETPGID @@ -716,7 +746,7 @@ static int odls_alps_fork_local_proc(orte_app_context_t* context, #endif do_child(context, child, environ_copy, jobdat, p[1], opts); /* Does not return */ - } + } close(p[1]); return do_parent(context, child, environ_copy, jobdat, p[0], opts); @@ -735,11 +765,11 @@ int orte_odls_alps_launch_local_procs(opal_buffer_t *data) /* construct the list of children we are to launch */ if (ORTE_SUCCESS != (rc = orte_odls_base_default_construct_child_list(data, &job))) { OPAL_OUTPUT_VERBOSE((2, orte_odls_base_framework.framework_output, - "%s odls:default:launch:local failed to construct child list on error %s", + "%s odls:alps:launch:local failed to construct child list on error %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_ERROR_NAME(rc))); return rc; } - + /* get the RDMA credentials and push them into the launch environment */ if (ORTE_SUCCESS != (rc = orte_odls_alps_get_rdma_creds())) {; @@ -751,19 +781,19 @@ int orte_odls_alps_launch_local_procs(opal_buffer_t *data) /* launch the local procs */ ORTE_ACTIVATE_LOCAL_LAUNCH(job, odls_alps_fork_local_proc); - + return ORTE_SUCCESS; } /** * Send a signal to a pid. Note that if we get an error, we set the - * return value and let the upper layer print out the message. + * return value and let the upper layer print out the message. */ static int send_signal(pid_t pid, int signal) { int rc = ORTE_SUCCESS; - + OPAL_OUTPUT_VERBOSE((1, orte_odls_base_framework.framework_output, "%s sending signal %d to pid %ld", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), @@ -782,8 +812,8 @@ static int send_signal(pid_t pid, int signal) case ESRCH: /* This case can occur when we deliver a signal to a process that is no longer there. This can happen if - we deliver a signal while the job is shutting down. - This does not indicate a real problem, so just + we deliver a signal while the job is shutting down. + This does not indicate a real problem, so just ignore the error. */ break; case EPERM: @@ -793,14 +823,14 @@ static int send_signal(pid_t pid, int signal) rc = ORTE_ERROR; } } - + return rc; } static int orte_odls_alps_signal_local_procs(const orte_process_name_t *proc, int32_t signal) { int rc; - + if (ORTE_SUCCESS != (rc = orte_odls_base_default_signal_local_procs(proc, signal, send_signal))) { ORTE_ERROR_LOG(rc); return rc; @@ -811,11 +841,11 @@ static int orte_odls_alps_signal_local_procs(const orte_process_name_t *proc, in static int orte_odls_alps_restart_proc(orte_proc_t *child) { int rc; - + /* restart the local proc */ if (ORTE_SUCCESS != (rc = orte_odls_base_default_restart_proc(child, odls_alps_fork_local_proc))) { OPAL_OUTPUT_VERBOSE((2, orte_odls_base_framework.framework_output, - "%s odls:default:restart_proc failed to launch on error %s", + "%s odls:alps:restart_proc failed to launch on error %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_ERROR_NAME(rc))); } return rc; diff --git a/orte/mca/odls/alps/odls_alps_utils.c b/orte/mca/odls/alps/odls_alps_utils.c index 3571399d03a..2cdb2373ae6 100644 --- a/orte/mca/odls/alps/odls_alps_utils.c +++ b/orte/mca/odls/alps/odls_alps_utils.c @@ -5,16 +5,16 @@ * Copyright (c) 2004-2006 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2014 Los Alamos National Security, LLC. All rights - * reserved. + * reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ * * These symbols are in a file by themselves to provide nice linker @@ -109,7 +109,7 @@ int orte_odls_alps_get_rdma_creds(void) /* * now get the GNI rdma credentials info - */ + */ ret = alps_app_lli_put_request(ALPS_APP_LLI_ALPS_REQ_GNI, NULL, 0); if (ALPS_APP_LLI_ALPS_STAT_OK != ret) { @@ -152,7 +152,7 @@ int orte_odls_alps_get_rdma_creds(void) rdmacred_buf = (alpsAppGni_t *)(rdmacred_rsp->u.buf); /* - * now set up the env. variables - + * now set up the env. variables - * The cray pmi sets up 4 environment variables: * PMI_GNI_DEV_ID - format (id0:id1....idX) * PMI_GNI_LOC_ADDR - format (locaddr0:locaddr1:....locaddrX) @@ -242,7 +242,7 @@ int orte_odls_alps_get_rdma_creds(void) goto fn_exit; } - } + } fn_exit: if (ORTE_SUCCESS == ret) already_got_creds = 1; diff --git a/orte/mca/odls/base/Makefile.am b/orte/mca/odls/base/Makefile.am index 726b417e65d..8a692fb93bc 100644 --- a/orte/mca/odls/base/Makefile.am +++ b/orte/mca/odls/base/Makefile.am @@ -5,16 +5,16 @@ # Copyright (c) 2004-2005 The University of Tennessee and The University # of Tennessee Research Foundation. All rights # reserved. -# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, # University of Stuttgart. All rights reserved. # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. # Copyright (c) 2012-2013 Los Alamos National Security, LLC. # All rights reserved # $COPYRIGHT$ -# +# # Additional copyrights may follow -# +# # $HEADER$ # diff --git a/orte/mca/odls/base/base.h b/orte/mca/odls/base/base.h index 6e83bf7e9c9..83e382b2c55 100644 --- a/orte/mca/odls/base/base.h +++ b/orte/mca/odls/base/base.h @@ -5,16 +5,16 @@ * Copyright (c) 2004-2011 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2011 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2013 Los Alamos National Security, LLC. All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ /** @file: diff --git a/orte/mca/odls/base/help-orte-odls-base.txt b/orte/mca/odls/base/help-orte-odls-base.txt index e0b55157251..cde63e5cfd6 100644 --- a/orte/mca/odls/base/help-orte-odls-base.txt +++ b/orte/mca/odls/base/help-orte-odls-base.txt @@ -7,9 +7,9 @@ # Copyright (c) 2014 Research Organization for Information Science # and Technology (RIST). All rights reserved. # $COPYRIGHT$ -# +# # Additional copyrights may follow -# +# # $HEADER$ # # This is the US/English general help file for Open RTE's ODLS Framework diff --git a/orte/mca/odls/base/odls_base_default_fns.c b/orte/mca/odls/base/odls_base_default_fns.c index ec0ac9b8298..5c05d80b737 100644 --- a/orte/mca/odls/base/odls_base_default_fns.c +++ b/orte/mca/odls/base/odls_base_default_fns.c @@ -5,22 +5,23 @@ * Copyright (c) 2004-2011 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. - * Copyright (c) 2007-2011 Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2007-2011 Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2011 Oak Ridge National Labs. All rights reserved. - * Copyright (c) 2011-2013 Los Alamos National Security, LLC. + * Copyright (c) 2011-2015 Los Alamos National Security, LLC. * All rights reserved. - * Copyright (c) 2011-2013 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2013-2015 Intel, Inc. All rights reserved. + * Copyright (c) 2011-2017 Cisco Systems, Inc. All rights reserved + * Copyright (c) 2013-2017 Intel, Inc. All rights reserved. * Copyright (c) 2014 Research Organization for Information Science * and Technology (RIST). All rights reserved. + * Copyright (c) 2017 Mellanox Technologies Ltd. All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -39,9 +40,7 @@ #ifdef HAVE_SYS_PARAM_H #include #endif -#ifdef HAVE_TIME_H #include -#endif #include @@ -53,9 +52,10 @@ #include "opal/util/path.h" #include "opal/util/sys_limits.h" #include "opal/dss/dss.h" -#include "opal/mca/hwloc/hwloc.h" +#include "opal/mca/hwloc/hwloc-internal.h" #include "opal/mca/shmem/base/base.h" #include "opal/mca/pstat/pstat.h" +#include "opal/mca/pmix/pmix.h" #include "orte/mca/errmgr/errmgr.h" #include "orte/mca/rml/rml.h" @@ -82,15 +82,7 @@ #include "orte/runtime/orte_globals.h" #include "orte/runtime/orte_wait.h" #include "orte/orted/orted.h" - -#if OPAL_ENABLE_FT_CR == 1 -#include "orte/mca/snapc/snapc.h" -#include "orte/mca/snapc/base/base.h" -#include "orte/mca/sstore/sstore.h" -#include "orte/mca/sstore/base/base.h" -#include "opal/mca/crs/crs.h" -#include "opal/mca/crs/base/base.h" -#endif +#include "orte/orted/pmix/pmix_server.h" #include "orte/mca/odls/base/base.h" #include "orte/mca/odls/base/odls_private.h" @@ -114,20 +106,20 @@ int orte_odls_base_default_get_add_procs_data(opal_buffer_t *data, ORTE_ERROR_LOG(ORTE_ERR_BAD_PARAM); return ORTE_ERR_BAD_PARAM; } - + /* get a pointer to the job map */ map = jdata->map; /* if there is no map, just return */ if (NULL == map) { return ORTE_SUCCESS; } - + /* construct a nodemap - only want updated items */ if (ORTE_SUCCESS != (rc = orte_util_encode_nodemap(&bo, true))) { ORTE_ERROR_LOG(rc); return rc; } - + /* store it */ boptr = &bo; if (ORTE_SUCCESS != (rc = opal_dss.pack(data, &boptr, 1, OPAL_BYTE_OBJECT))) { @@ -136,7 +128,7 @@ int orte_odls_base_default_get_add_procs_data(opal_buffer_t *data, } /* release the data since it has now been copied into our buffer */ free(bo.bytes); - + /* if we are not using static ports, we need to send the wireup info */ if (!orte_static_ports) { /* pack a flag indicating wiring info is provided */ @@ -175,7 +167,8 @@ int orte_odls_base_default_get_add_procs_data(opal_buffer_t *data, * copy of all active jobs so the grpcomm collectives can * properly work should a proc from one of the other jobs * interact with this one */ - if (orte_get_attribute(&jdata->attributes, ORTE_JOB_LAUNCHED_DAEMONS, NULL, OPAL_BOOL)) { + if (orte_get_attribute(&jdata->attributes, ORTE_JOB_LAUNCHED_DAEMONS, NULL, OPAL_BOOL) || + ORTE_JOBID_INVALID != jdata->originator.jobid) { OBJ_CONSTRUCT(&jobdata, opal_buffer_t); numjobs = 0; for (i=0; i < orte_job_data->size; i++) { @@ -226,7 +219,7 @@ int orte_odls_base_default_get_add_procs_data(opal_buffer_t *data, ORTE_ERROR_LOG(rc); return rc; } - + return ORTE_SUCCESS; } @@ -249,6 +242,7 @@ int orte_odls_base_default_construct_child_list(opal_buffer_t *data, orte_app_context_t *app; bool found; orte_node_t *node; + bool newmap = false; OPAL_OUTPUT_VERBOSE((5, orte_odls_base_framework.framework_output, "%s odls:constructing child list", @@ -313,7 +307,7 @@ int orte_odls_base_default_construct_child_list(opal_buffer_t *data, /* release the buffer */ OBJ_RELEASE(bptr); } - + /* unpack the job we are to launch */ cnt=1; if (ORTE_SUCCESS != (rc = opal_dss.unpack(data, &jdata, &cnt, ORTE_JOB))) { @@ -331,7 +325,7 @@ int orte_odls_base_default_construct_child_list(opal_buffer_t *data, OPAL_OUTPUT_VERBOSE((5, orte_odls_base_framework.framework_output, "%s odls:construct_child_list unpacking data to launch job %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_JOBID_PRINT(*job))); - + /* if we are the HNP, we don't need to unpack this buffer - we already * have all the required info in our local job array. So just build the * array of local children @@ -394,10 +388,11 @@ int orte_odls_base_default_construct_child_list(opal_buffer_t *data, goto REPORT_ERROR; } opal_pointer_array_set_item(orte_job_data, ORTE_LOCAL_JOBID(jdata->jobid), jdata); - + /* ensure the map object is present */ if (NULL == jdata->map) { jdata->map = OBJ_NEW(orte_job_map_t); + newmap = true; } /* if we have a file map, then we need to load it */ @@ -455,7 +450,9 @@ int orte_odls_base_default_construct_child_list(opal_buffer_t *data, if (!found) { OBJ_RETAIN(dmn->node); opal_pointer_array_add(jdata->map->nodes, dmn->node); - jdata->map->num_nodes++; + if (newmap) { + jdata->map->num_nodes++; + } } /* see if it belongs to us */ @@ -486,6 +483,13 @@ int orte_odls_base_default_construct_child_list(opal_buffer_t *data, } COMPLETE: + /* register this job with the PMIx server - need to wait until after we + * have computed the #local_procs before calling the function */ + if (ORTE_SUCCESS != (rc = orte_pmix_server_register_nspace(jdata))) { + ORTE_ERROR_LOG(rc); + goto REPORT_ERROR; + } + return ORTE_SUCCESS; REPORT_ERROR: @@ -496,7 +500,7 @@ int orte_odls_base_default_construct_child_list(opal_buffer_t *data, * deal with the hang! */ ORTE_ACTIVATE_JOB_STATE(NULL, ORTE_JOB_STATE_NEVER_LAUNCHED); - + return rc; } @@ -517,7 +521,7 @@ static int setup_path(orte_app_context_t *app) /* do not ERROR_LOG - it will be reported elsewhere */ goto CLEANUP; } - + /* The prior function will have done a chdir() to jump us to * wherever the app is to be executed. This could be either where * the user specified (via -wdir), or to the user's home directory @@ -538,7 +542,7 @@ static int setup_path(orte_app_context_t *app) } /* Search for the OMPI_exec_path and PATH settings in the environment. */ - for (argvptr = app->env; *argvptr != NULL; argvptr++) { + for (argvptr = app->env; *argvptr != NULL; argvptr++) { if (0 == strncmp("OMPI_exec_path=", *argvptr, 15)) { mpiexec_pathenv = *argvptr + 15; } @@ -546,7 +550,7 @@ static int setup_path(orte_app_context_t *app) pathenv = *argvptr + 5; } } - + /* If OMPI_exec_path is set (meaning --path was used), then create a temporary environment to be used in the search for the executable. The PATH setting in this temporary environment is a combination of @@ -564,7 +568,7 @@ static int setup_path(orte_app_context_t *app) } else { argvptr = app->env; } - + rc = orte_util_check_context_app(app, argvptr); /* do not ERROR_LOG - it will be reported elsewhere */ if (NULL != mpiexec_pathenv) { @@ -650,7 +654,7 @@ void orte_odls_base_default_launch_local(int fd, short sd, void *cbdata) ORTE_ACTIVATE_JOB_STATE(NULL, ORTE_JOB_STATE_FAILED_TO_LAUNCH); goto ERROR_OUT; } - + /* do we have any local procs to launch? */ if (0 == jobdat->num_local_procs) { /* indicate that we are done trying to launch them */ @@ -659,26 +663,6 @@ void orte_odls_base_default_launch_local(int fd, short sd, void *cbdata) ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)); goto GETOUT; } - -#if OPAL_ENABLE_FT_CR == 1 - /* - * Notify the local SnapC component regarding new job - */ - if( ORTE_SUCCESS != (rc = orte_snapc.setup_job(job) ) ) { - /* Silent Failure :/ JJH */ - ORTE_ERROR_LOG(rc); - } -#endif - -#if OPAL_ENABLE_FT_CR == 1 - for (j=0; j < jobdat->apps->size; j++) { - if (NULL == (app = (orte_app_context_t*)opal_pointer_array_get_item(jobdat->apps, j))) { - continue; - } - orte_sstore.fetch_app_deps(app); - } - orte_sstore.wait_all_deps(); -#endif /* track if we are indexing argvs so we don't check every time */ index_argv = orte_get_attribute(&jobdat->attributes, ORTE_JOB_INDEX_ARGV, NULL, OPAL_BOOL); @@ -690,7 +674,7 @@ void orte_odls_base_default_launch_local(int fd, short sd, void *cbdata) if (NULL == (app = (orte_app_context_t*)opal_pointer_array_get_item(jobdat->apps, j))) { continue; } - + /* if this app isn't being used on our node, skip it */ if (!ORTE_FLAG_TEST(app, ORTE_APP_FLAG_USED_ON_NODE)) { opal_output_verbose(5, orte_odls_base_framework.framework_output, @@ -698,7 +682,7 @@ void orte_odls_base_default_launch_local(int fd, short sd, void *cbdata) ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), j); continue; } - + /* check the system limits - if we are at our max allowed children, then * we won't be allowed to do this anyway, so we may as well abort now. * According to the documentation, num_procs = 0 is equivalent to @@ -723,15 +707,15 @@ void orte_odls_base_default_launch_local(int fd, short sd, void *cbdata) return; } } - + /* setup the environment for this app */ if (ORTE_SUCCESS != (rc = orte_schizo.setup_fork(jobdat, app))) { - + OPAL_OUTPUT_VERBOSE((10, orte_odls_base_framework.framework_output, "%s odls:launch:setup_fork failed with error %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_ERROR_NAME(rc))); - + /* do not ERROR_LOG this failure - it will be reported * elsewhere. The launch is going to fail. Since we could have * multiple app_contexts, we need to ensure that we flag only @@ -752,7 +736,7 @@ void orte_odls_base_default_launch_local(int fd, short sd, void *cbdata) } goto GETOUT; } - + /* setup the working directory for this app - will jump us * to that directory */ @@ -803,23 +787,23 @@ void orte_odls_base_default_launch_local(int fd, short sd, void *cbdata) if (NULL == (child = (orte_proc_t*)opal_pointer_array_get_item(orte_local_children, idx))) { continue; } - + /* does this child belong to this app? */ if (j != (int)child->app_idx) { continue; } - + /* is this child already alive? This can happen if * we are asked to launch additional processes. * If it has been launched, then do nothing */ if (ORTE_FLAG_TEST(child, ORTE_PROC_FLAG_ALIVE)) { - + OPAL_OUTPUT_VERBOSE((5, orte_odls_base_framework.framework_output, "%s odls:launch child %s has already been launched", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_NAME_PRINT(&child->name))); - + continue; } /* is this child a candidate to start? it may not be alive @@ -834,21 +818,29 @@ void orte_odls_base_default_launch_local(int fd, short sd, void *cbdata) * the dss.compare function to check for equality. */ if (OPAL_EQUAL != opal_dss.compare(&job, &(child->name.jobid), ORTE_JOBID)) { - + OPAL_OUTPUT_VERBOSE((5, orte_odls_base_framework.framework_output, "%s odls:launch child %s is not in job %s being launched", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_NAME_PRINT(&child->name), ORTE_JOBID_PRINT(job))); - + continue; } - + OPAL_OUTPUT_VERBOSE((5, orte_odls_base_framework.framework_output, "%s odls:launch working child %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_NAME_PRINT(&child->name))); + /* setup the pmix environment */ + if (OPAL_SUCCESS != (rc = opal_pmix.server_setup_fork(&child->name, &app->env))) { + ORTE_ERROR_LOG(rc); + continue; + } + /* tell the child that it is being launched via ORTE */ + opal_setenv(OPAL_MCA_PREFIX"orte_launch", "1", true, &app->env); + /* ensure we clear any prior info regarding state or exit status in * case this is a restart */ @@ -867,7 +859,7 @@ void orte_odls_base_default_launch_local(int fd, short sd, void *cbdata) free(child->rml_uri); child->rml_uri = NULL; } - + /* check to see if we have enough available file descriptors * to launch another child - if not, then let's wait a little * while to see if some come free. This can happen if we are @@ -892,7 +884,7 @@ void orte_odls_base_default_launch_local(int fd, short sd, void *cbdata) return; } } - + /* did the user request we display output in xterms? */ if (NULL != orte_xterm) { opal_list_item_t *nmitem; @@ -939,7 +931,7 @@ void orte_odls_base_default_launch_local(int fd, short sd, void *cbdata) ORTE_ACTIVATE_PROC_STATE(&child->name, ORTE_PROC_STATE_FAILED_TO_LAUNCH); continue; } - + } } else if (NULL != orte_fork_agent) { /* we were given a fork agent - use it */ @@ -978,29 +970,6 @@ void orte_odls_base_default_launch_local(int fd, short sd, void *cbdata) continue; } -#if OPAL_ENABLE_FT_CR == 1 - /* - * OPAL CRS components need the opportunity to take action before a process - * is forked. - * Needs access to: - * - Environment - * - Rank/ORTE Name - * - Binary to exec - */ - if( NULL != opal_crs.crs_prelaunch ) { - if( OPAL_SUCCESS != (rc = opal_crs.crs_prelaunch(child->name.vpid, - orte_sstore_base_prelaunch_location, - &(app->app), - &(app->cwd), - &(app->argv), - &(app->env) ) ) ) { - ORTE_ERROR_LOG(rc); - child->exit_code = ORTE_PROC_STATE_FAILED_TO_LAUNCH; - ORTE_ACTIVATE_PROC_STATE(&child->name, ORTE_PROC_STATE_FAILED_TO_LAUNCH); - continue; - } - } -#endif /* if we are indexing the argv by rank, do so now */ if (index_argv) { char *param; @@ -1010,22 +979,21 @@ void orte_odls_base_default_launch_local(int fd, short sd, void *cbdata) } if (5 < opal_output_get_verbosity(orte_odls_base_framework.framework_output)) { - opal_output(orte_odls_base_framework.framework_output, "%s odls:launch: spawning child %s", + opal_output(orte_odls_base_framework.framework_output, "%s odls:launch spawning child %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_NAME_PRINT(&child->name)); - + /* dump what is going to be exec'd */ if (7 < opal_output_get_verbosity(orte_odls_base_framework.framework_output)) { opal_dss.dump(orte_odls_base_framework.framework_output, app, ORTE_APP_CONTEXT); } } - - orte_wait_cb(child, odls_base_default_wait_local_proc, NULL); + if (ORTE_SUCCESS != (rc = fork_local(app, child, app->env, jobdat))) { - orte_wait_cb_cancel(child); child->exit_code = ORTE_ERR_SILENT; /* error message already output */ ORTE_ACTIVATE_PROC_STATE(&child->name, ORTE_PROC_STATE_FAILED_TO_START); } + orte_wait_cb(child, odls_base_default_wait_local_proc, NULL); /* if we indexed the argv, we need to restore it to * its original form */ @@ -1097,7 +1065,7 @@ int orte_odls_base_default_deliver_message(orte_jobid_t job, opal_buffer_t *buff if (NULL == (child = (orte_proc_t*)opal_pointer_array_get_item(orte_local_children, i))) { continue; } - + /* do we have a child from the specified job. Because the * job could be given as a WILDCARD value, we must use * the dss.compare function to check for equality. @@ -1106,12 +1074,12 @@ int orte_odls_base_default_deliver_message(orte_jobid_t job, opal_buffer_t *buff OPAL_EQUAL != opal_dss.compare(&job, &(child->name.jobid), ORTE_JOBID)) { continue; } - + OPAL_OUTPUT_VERBOSE((5, orte_odls_base_framework.framework_output, "%s odls: sending message to tag %lu on child %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), (unsigned long)tag, ORTE_NAME_PRINT(&child->name))); - + /* if so, send the message */ relay = OBJ_NEW(opal_buffer_t); opal_dss.copy_payload(relay, buffer); @@ -1146,12 +1114,12 @@ int orte_odls_base_default_signal_local_procs(const orte_process_name_t *proc, i { int rc, i; orte_proc_t *child; - + OPAL_OUTPUT_VERBOSE((5, orte_odls_base_framework.framework_output, "%s odls: signaling proc %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), (NULL == proc) ? "NULL" : ORTE_NAME_PRINT(proc))); - + /* if procs is NULL, then we want to signal all * of the local procs, so just do that case */ @@ -1167,7 +1135,7 @@ int orte_odls_base_default_signal_local_procs(const orte_process_name_t *proc, i } return rc; } - + /* we want it sent to some specified process, so find it */ for (i=0; i < orte_local_children->size; i++) { if (NULL == (child = (orte_proc_t*)opal_pointer_array_get_item(orte_local_children, i))) { @@ -1180,7 +1148,7 @@ int orte_odls_base_default_signal_local_procs(const orte_process_name_t *proc, i return rc; } } - + /* only way to get here is if we couldn't find the specified proc. * report that as an error and return it */ @@ -1203,7 +1171,7 @@ void odls_base_default_wait_local_proc(orte_proc_t *proc, void* cbdata) "%s odls:wait_local_proc child process %s pid %ld terminated", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_NAME_PRINT(&proc->name), (long)proc->pid); - + /* if the child was previously flagged as dead, then just * update its exit status and * ensure that its exit state gets reported to avoid hanging @@ -1216,6 +1184,9 @@ void odls_base_default_wait_local_proc(orte_proc_t *proc, void* cbdata) ORTE_NAME_PRINT(&proc->name),proc->exit_code)); if (WIFEXITED(proc->exit_code)) { proc->exit_code = WEXITSTATUS(proc->exit_code); + if (0 != proc->exit_code) { + state = ORTE_PROC_STATE_TERM_NON_ZERO; + } } else { if (WIFSIGNALED(proc->exit_code)) { state = ORTE_PROC_STATE_ABORTED_BY_SIG; @@ -1268,7 +1239,7 @@ void odls_base_default_wait_local_proc(orte_proc_t *proc, void* cbdata) ORTE_FLAG_SET(proc, ORTE_PROC_FLAG_WAITPID); goto MOVEON; } - + /* determine the state of this process */ if (WIFEXITED(proc->exit_code)) { @@ -1389,28 +1360,29 @@ void odls_base_default_wait_local_proc(orte_proc_t *proc, void* cbdata) ORTE_NAME_PRINT(&proc->name) )); /* Do not decrement the number of local procs here. That is handled in the errmgr */ } - + MOVEON: ORTE_ACTIVATE_PROC_STATE(&proc->name, state); } typedef struct { + opal_list_item_t super; orte_proc_t *child; - orte_odls_base_kill_local_fn_t kill_local; -} odls_kill_caddy_t; - -static void kill_cbfunc(int fd, short args, void *cbdata) +} orte_odls_quick_caddy_t; +static void qcdcon(orte_odls_quick_caddy_t *p) { - odls_kill_caddy_t *cd = (odls_kill_caddy_t*)cbdata; - - if (!ORTE_FLAG_TEST(cd->child, ORTE_PROC_FLAG_ALIVE) || 0 == cd->child->pid) { - free(cd); - return; + p->child = NULL; +} +static void qcddes(orte_odls_quick_caddy_t *p) +{ + if (NULL != p->child) { + OBJ_RELEASE(p->child); } - cd->kill_local(cd->child->pid, SIGKILL); - free(cd); } - +OBJ_CLASS_INSTANCE(orte_odls_quick_caddy_t, + opal_list_item_t, + qcdcon, qcddes); + int orte_odls_base_default_kill_local_procs(opal_pointer_array_t *procs, orte_odls_base_kill_local_fn_t kill_local, orte_odls_base_child_died_fn_t child_died) @@ -1421,7 +1393,8 @@ int orte_odls_base_default_kill_local_procs(opal_pointer_array_t *procs, int i, j; opal_pointer_array_t procarray, *procptr; bool do_cleanup; - + orte_odls_quick_caddy_t *cd; + OBJ_CONSTRUCT(&procs_killed, opal_list_t); /* if the pointer array is NULL, then just kill everything */ @@ -1444,7 +1417,7 @@ int orte_odls_base_default_kill_local_procs(opal_pointer_array_t *procs, procptr = procs; do_cleanup = false; } - + /* cycle through the provided array of processes to kill */ for (i=0; i < procptr->size; i++) { if (NULL == (proc = (orte_proc_t*)opal_pointer_array_get_item(procptr, i))) { @@ -1459,14 +1432,14 @@ int orte_odls_base_default_kill_local_procs(opal_pointer_array_t *procs, "%s odls:kill_local_proc checking child process %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_NAME_PRINT(&child->name))); - + /* do we have a child from the specified job? Because the * job could be given as a WILDCARD value, we must * check for that as well as for equality. */ if (ORTE_JOBID_WILDCARD != proc->name.jobid && proc->name.jobid != child->name.jobid) { - + OPAL_OUTPUT_VERBOSE((5, orte_odls_base_framework.framework_output, "%s odls:kill_local_proc child %s is not part of job %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), @@ -1474,13 +1447,13 @@ int orte_odls_base_default_kill_local_procs(opal_pointer_array_t *procs, ORTE_JOBID_PRINT(proc->name.jobid))); continue; } - + /* see if this is the specified proc - could be a WILDCARD again, so check * appropriately */ if (ORTE_VPID_WILDCARD != proc->name.vpid && proc->name.vpid != child->name.vpid) { - + OPAL_OUTPUT_VERBOSE((5, orte_odls_base_framework.framework_output, "%s odls:kill_local_proc child %s is not covered by rank %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), @@ -1488,17 +1461,17 @@ int orte_odls_base_default_kill_local_procs(opal_pointer_array_t *procs, ORTE_VPID_PRINT(proc->name.vpid))); continue; } - + /* is this process alive? if not, then nothing for us * to do to it */ if (!ORTE_FLAG_TEST(child, ORTE_PROC_FLAG_ALIVE) || 0 == child->pid) { - + OPAL_OUTPUT_VERBOSE((5, orte_odls_base_framework.framework_output, "%s odls:kill_local_proc child %s is not alive", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_NAME_PRINT(&child->name))); - + /* ensure, though, that the state is terminated so we don't lockup if * the proc never started */ @@ -1531,60 +1504,27 @@ int orte_odls_base_default_kill_local_procs(opal_pointer_array_t *procs, if (NULL != orte_iof.close) { orte_iof.close(&child->name, ORTE_IOF_STDIN); } - + /* cancel the waitpid callback as this induces unmanageable race * conditions when we are deliberately killing the process */ orte_wait_cb_cancel(child); - - if (!do_cleanup) { - odls_kill_caddy_t *cd; - - /* if we are killing only selected procs, then do so in a gentle - fashion. First send a SIGCONT in case the process is in stopped state. - If it is in a stopped state and we do not first change it to - running, then SIGTERM will not get delivered. Ignore return - value. */ - OPAL_OUTPUT_VERBOSE((5, orte_odls_base_framework.framework_output, - "%s SENDING SIGCONT TO %s", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - ORTE_NAME_PRINT(&child->name))); - kill_local(child->pid, SIGCONT); - /* Send a sigterm to the process before sigkill to be nice */ - OPAL_OUTPUT_VERBOSE((5, orte_odls_base_framework.framework_output, - "%s SENDING SIGTERM TO %s", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - ORTE_NAME_PRINT(&child->name))); - kill_local(child->pid, SIGTERM); - /* provide a polite delay so the proc has a chance to react */ - cd = (odls_kill_caddy_t*)malloc(sizeof(odls_kill_caddy_t)); - OBJ_RETAIN(child); // protect against race conditions - cd->child = child; - cd->kill_local = kill_local; - ORTE_TIMER_EVENT(1, 0, kill_cbfunc, ORTE_SYS_PRI); - continue; - } - - /* Force the SIGKILL just to make sure things are dead - * This fixes an issue that, if the application is masking - * SIGTERM, then the child_died() - * may return 'true' even though waipid returns with 0. - * It does this to avoid a race condition, per documentation - * in odls_default_module.c. - */ + /* First send a SIGCONT in case the process is in stopped state. + If it is in a stopped state and we do not first change it to + running, then SIGTERM will not get delivered. Ignore return + value. */ OPAL_OUTPUT_VERBOSE((5, orte_odls_base_framework.framework_output, - "%s SENDING FORCE SIGKILL TO %s pid %lu", + "%s SENDING SIGCONT TO %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - ORTE_NAME_PRINT(&child->name), (unsigned long)child->pid)); - kill_local(child->pid, SIGKILL); - - /* indicate the waitpid fired as this is effectively what - * has happened - */ - ORTE_FLAG_SET(child, ORTE_PROC_FLAG_WAITPID); - child->pid = 0; - + ORTE_NAME_PRINT(&child->name))); + cd = OBJ_NEW(orte_odls_quick_caddy_t); + OBJ_RETAIN(child); + cd->child = child; + opal_list_append(&procs_killed, &cd->super); + kill_local(child->pid, SIGCONT); + continue; + CLEANUP: /* ensure the child's session directory is cleaned up */ orte_session_dir_finalize(&child->name); @@ -1597,13 +1537,56 @@ int orte_odls_base_default_kill_local_procs(opal_pointer_array_t *procs, } } } - - /* cleanup, if required */ + + /* if we are issuing signals, then we need to wait a little + * and send the next in sequence */ + if (0 < opal_list_get_size(&procs_killed)) { + sleep(orte_odls_globals.timeout_before_sigkill); + /* issue a SIGTERM to all */ + OPAL_LIST_FOREACH(cd, &procs_killed, orte_odls_quick_caddy_t) { + OPAL_OUTPUT_VERBOSE((5, orte_odls_base_framework.framework_output, + "%s SENDING SIGTERM TO %s", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), + ORTE_NAME_PRINT(&cd->child->name))); + kill_local(cd->child->pid, SIGTERM); + } + /* wait a little again */ + sleep(orte_odls_globals.timeout_before_sigkill); + /* issue a SIGKILL to all */ + OPAL_LIST_FOREACH(cd, &procs_killed, orte_odls_quick_caddy_t) { + OPAL_OUTPUT_VERBOSE((5, orte_odls_base_framework.framework_output, + "%s SENDING SIGKILL TO %s", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), + ORTE_NAME_PRINT(&cd->child->name))); + kill_local(cd->child->pid, SIGKILL); + /* indicate the waitpid fired as this is effectively what + * has happened + */ + ORTE_FLAG_SET(cd->child, ORTE_PROC_FLAG_WAITPID); + cd->child->pid = 0; + + /* mark the child as "killed" */ + cd->child->state = ORTE_PROC_STATE_KILLED_BY_CMD; /* we ordered it to die */ + + /* ensure the child's session directory is cleaned up */ + orte_session_dir_finalize(&cd->child->name); + /* check for everything complete - this will remove + * the child object from our local list + */ + if (ORTE_FLAG_TEST(cd->child, ORTE_PROC_FLAG_IOF_COMPLETE) && + ORTE_FLAG_TEST(cd->child, ORTE_PROC_FLAG_WAITPID)) { + ORTE_ACTIVATE_PROC_STATE(&cd->child->name, cd->child->state); + } + } + } + OPAL_LIST_DESTRUCT(&procs_killed); + + /* cleanup arrays, if required */ if (do_cleanup) { OBJ_DESTRUCT(&procarray); OBJ_DESTRUCT(&proctmp); } - + return ORTE_SUCCESS; } @@ -1614,18 +1597,18 @@ int orte_odls_base_get_proc_stats(opal_buffer_t *answer, orte_proc_t *child; opal_pstats_t stats, *statsptr; int i, j; - + OPAL_OUTPUT_VERBOSE((5, orte_odls_base_framework.framework_output, "%s odls:get_proc_stats for proc %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_NAME_PRINT(proc))); - + /* find this child */ for (i=0; i < orte_local_children->size; i++) { if (NULL == (child = (orte_proc_t*)opal_pointer_array_get_item(orte_local_children, i))) { continue; } - + if (proc->jobid == child->name.jobid && (proc->vpid == child->name.vpid || ORTE_VPID_WILDCARD == proc->vpid)) { /* found it */ @@ -1675,7 +1658,7 @@ int orte_odls_base_default_restart_proc(orte_proc_t *child, "%s odls:restart_proc for proc %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_NAME_PRINT(&child->name))); - + /* establish our baseline working directory - we will be potentially * bouncing around as we execute this app, but we will always return * to this place as our default directory @@ -1688,7 +1671,7 @@ int orte_odls_base_default_restart_proc(orte_proc_t *child, ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND); return ORTE_ERR_NOT_FOUND; } - + child->state = ORTE_PROC_STATE_FAILED_TO_START; child->exit_code = 0; ORTE_FLAG_UNSET(child, ORTE_PROC_FLAG_WAITPID); @@ -1700,7 +1683,7 @@ int orte_odls_base_default_restart_proc(orte_proc_t *child, } app = (orte_app_context_t*)opal_pointer_array_get_item(jobdat->apps, child->app_idx); - /* reset envars to match this child */ + /* reset envars to match this child */ if (ORTE_SUCCESS != (rc = orte_schizo.setup_child(jobdat, child, app))) { ORTE_ERROR_LOG(rc); goto CLEANUP; @@ -1722,7 +1705,7 @@ int orte_odls_base_default_restart_proc(orte_proc_t *child, child->exit_code = ORTE_ERR_SILENT; /* error message already output */ ORTE_ACTIVATE_PROC_STATE(&child->name, ORTE_PROC_STATE_FAILED_TO_START); } - + CLEANUP: OPAL_OUTPUT_VERBOSE((5, orte_odls_base_framework.framework_output, "%s odls:restart of proc %s %s", diff --git a/orte/mca/odls/base/odls_base_frame.c b/orte/mca/odls/base/odls_base_frame.c index c3cd38936d1..593a1470286 100644 --- a/orte/mca/odls/base/odls_base_frame.c +++ b/orte/mca/odls/base/odls_base_frame.c @@ -5,20 +5,20 @@ * Copyright (c) 2004-2011 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2010-2011 Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2011 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2011-2017 Cisco Systems, Inc. All rights reserved * Copyright (c) 2011-2013 Los Alamos National Security, LLC. * All rights reserved. * Copyright (c) 2014-2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -26,14 +26,12 @@ #include "orte_config.h" #include "orte/constants.h" -#ifdef HAVE_STRING_H #include -#endif #include "opal/class/opal_ring_buffer.h" #include "orte/mca/mca.h" #include "opal/mca/base/base.h" -#include "opal/mca/hwloc/hwloc.h" +#include "opal/mca/hwloc/hwloc-internal.h" #include "opal/util/output.h" #include "opal/util/path.h" #include "opal/util/argv.h" @@ -92,7 +90,7 @@ static int orte_odls_base_close(void) OBJ_RELEASE(item); } OBJ_DESTRUCT(&orte_odls_globals.xterm_ranks); - + /* cleanup the global list of local children and job data */ for (i=0; i < orte_local_children->size; i++) { if (NULL != (proc = (orte_proc_t*)opal_pointer_array_get_item(orte_local_children, i))) { @@ -128,7 +126,7 @@ static int orte_odls_base_open(mca_base_open_flag_t flags) /* initialize ODLS globals */ OBJ_CONSTRUCT(&orte_odls_globals.xterm_ranks, opal_list_t); orte_odls_globals.xtermcmd = NULL; - + /* check if the user requested that we display output in xterms */ if (NULL != orte_xterm) { /* construct a list of ranks to be displayed */ @@ -175,7 +173,7 @@ static int orte_odls_base_open(mca_base_open_flag_t flags) } opal_argv_append_nosize(&orte_odls_globals.xtermcmd, "-e"); } - + /* Open up all available components */ return mca_base_framework_components_open(&orte_odls_base_framework, flags); } diff --git a/orte/mca/odls/base/odls_base_select.c b/orte/mca/odls/base/odls_base_select.c index ef88ed254a6..55f75082b30 100644 --- a/orte/mca/odls/base/odls_base_select.c +++ b/orte/mca/odls/base/odls_base_select.c @@ -1,3 +1,4 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ /* * Copyright (c) 2004-2008 The Trustees of Indiana University and Indiana * University Research and Technology @@ -5,14 +6,16 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. + * Copyright (c) 2015 Los Alamos National Security, LLC. All rights + * reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -42,7 +45,7 @@ int orte_odls_base_select(void) if( OPAL_SUCCESS != mca_base_select("odls", orte_odls_base_framework.framework_output, &orte_odls_base_framework.framework_components, (mca_base_module_t **) &best_module, - (mca_base_component_t **) &best_component) ) { + (mca_base_component_t **) &best_component, NULL) ) { /* This will only happen if no component was selected */ return ORTE_ERR_NOT_FOUND; } diff --git a/orte/mca/odls/base/odls_private.h b/orte/mca/odls/base/odls_private.h index e2782f83776..2556b9dfc17 100644 --- a/orte/mca/odls/base/odls_private.h +++ b/orte/mca/odls/base/odls_private.h @@ -5,17 +5,17 @@ * Copyright (c) 2004-2006 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2011 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2011 Los Alamos National Security, LLC. All rights - * reserved. + * reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ /** @file: diff --git a/orte/mca/odls/default/Makefile.am b/orte/mca/odls/default/Makefile.am index b2248eb1026..c69fac6b768 100644 --- a/orte/mca/odls/default/Makefile.am +++ b/orte/mca/odls/default/Makefile.am @@ -5,15 +5,15 @@ # Copyright (c) 2004-2005 The University of Tennessee and The University # of Tennessee Research Foundation. All rights # reserved. -# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, # University of Stuttgart. All rights reserved. # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. # Copyright (c) 2010 Cisco Systems, Inc. All rights reserved. # $COPYRIGHT$ -# +# # Additional copyrights may follow -# +# # $HEADER$ # diff --git a/orte/mca/odls/default/configure.m4 b/orte/mca/odls/default/configure.m4 index 5f35f6cd2f0..2bc2cbb7976 100644 --- a/orte/mca/odls/default/configure.m4 +++ b/orte/mca/odls/default/configure.m4 @@ -6,7 +6,7 @@ # Copyright (c) 2004-2005 The University of Tennessee and The University # of Tennessee Research Foundation. All rights # reserved. -# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, # University of Stuttgart. All rights reserved. # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. @@ -14,9 +14,9 @@ # All rights reserved. # Copyright (c) 2010 Cisco Systems, Inc. All rights reserved. # $COPYRIGHT$ -# +# # Additional copyrights may follow -# +# # $HEADER$ # diff --git a/orte/mca/odls/default/help-orte-odls-default.txt b/orte/mca/odls/default/help-orte-odls-default.txt index ae65215d9fd..0e5d526e13f 100644 --- a/orte/mca/odls/default/help-orte-odls-default.txt +++ b/orte/mca/odls/default/help-orte-odls-default.txt @@ -6,16 +6,16 @@ # Copyright (c) 2004-2005 The University of Tennessee and The University # of Tennessee Research Foundation. All rights # reserved. -# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, # University of Stuttgart. All rights reserved. # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. # Copyright (c) 2009 Sun Microsystems, Inc. All rights reserved. # Copyright (c) 2010-2011 Cisco Systems, Inc. All rights reserved. # $COPYRIGHT$ -# +# # Additional copyrights may follow -# +# # $HEADER$ # # This is a US/English help file. diff --git a/orte/mca/odls/default/odls_default_component.c b/orte/mca/odls/default/odls_default_component.c index e695101e38f..663e674acd8 100644 --- a/orte/mca/odls/default/odls_default_component.c +++ b/orte/mca/odls/default/odls_default_component.c @@ -6,16 +6,16 @@ * Copyright (c) 2004-2006 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2015 Los Alamos National Security, LLC. All rights * reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ * * These symbols are in a file by themselves to provide nice linker @@ -79,7 +79,7 @@ int orte_odls_default_component_query(mca_base_module_t **module, int *priority) /* the base open/select logic protects us against operation when * we are NOT in a daemon, so we don't have to check that here */ - + /* we have built some logic into the configure.m4 file that checks * to see if we have "fork" support and only builds this component * if we do. Hence, we only get here if we CAN build - in which diff --git a/orte/mca/odls/default/odls_default_module.c b/orte/mca/odls/default/odls_default_module.c index f79fe63eba0..cc6c3146272 100644 --- a/orte/mca/odls/default/odls_default_module.c +++ b/orte/mca/odls/default/odls_default_module.c @@ -11,11 +11,11 @@ * All rights reserved. * Copyright (c) 2007-2010 Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2007 Evergrid, Inc. All rights reserved. - * Copyright (c) 2008-2013 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2008-2017 Cisco Systems, Inc. All rights reserved * Copyright (c) 2010 IBM Corporation. All rights reserved. * Copyright (c) 2011-2013 Los Alamos National Security, LLC. All rights - * reserved. - * Copyright (c) 2013-2014 Intel, Inc. All rights reserved + * reserved. + * Copyright (c) 2013-2016 Intel, Inc. All rights reserved. * * $COPYRIGHT$ * @@ -49,7 +49,7 @@ * - the child tries to set affinity and do other housekeeping in * preparation of exec'ing the target executable * - if the child fails anywhere along the way, it sends a message up - * the pipe to the parent indicating what happened -- including a + * the pipe to the parent indicating what happened -- including a * rendered error message detailing the problem (i.e., human-readable). * - it is important that the child renders the error message: there * are so many errors that are possible that the child is really the @@ -68,9 +68,7 @@ #include "orte/constants.h" #include "orte/types.h" -#ifdef HAVE_STRING_H #include -#endif #include #ifdef HAVE_UNISTD_H #include @@ -95,20 +93,20 @@ #ifdef HAVE_NETDB_H #include #endif -#ifdef HAVE_STDLIB_H #include -#endif #ifdef HAVE_SYS_STAT_H #include #endif /* HAVE_SYS_STAT_H */ -#ifdef HAVE_STDARG_H #include -#endif #ifdef HAVE_SYS_SELECT_H #include #endif +#ifdef HAVE_DIRENT_H +#include +#endif +#include -#include "opal/mca/hwloc/hwloc.h" +#include "opal/mca/hwloc/hwloc-internal.h" #include "opal/mca/hwloc/base/base.h" #include "opal/class/opal_pointer_array.h" #include "opal/util/opal_environ.h" @@ -143,7 +141,7 @@ static int orte_odls_default_restart_proc(orte_proc_t *child); * Explicitly declared functions so that we can get the noreturn * attribute registered with the compiler. */ -static void send_error_show_help(int fd, int exit_status, +static void send_error_show_help(int fd, int exit_status, const char *file, const char *topic, ...) __opal_attribute_noreturn__; static int do_child(orte_app_context_t* context, @@ -171,7 +169,7 @@ static bool odls_default_child_died(orte_proc_t *child) { time_t end; pid_t ret; - + /* Because of rounding in time (which returns whole seconds) we * have to add 1 to our wait number: this means that we wait * somewhere between (target) and (target)+1 seconds. Otherwise, @@ -196,18 +194,18 @@ static bool odls_default_child_died(orte_proc_t *child) * that occasionally causes us to incorrectly report a proc * as refusing to die. Unfortunately, errno may not be reset * by waitpid in this case, so we cannot check it. - * - * (note the previous fix to this, to return 'process dead' - * here, fixes the race condition at the cost of reporting - * all live processes have immediately died! Better to - * occasionally report a dead process as still living - - * which will occasionally trip the timeout for cases that - * are right on the edge.) + * + * (note the previous fix to this, to return 'process dead' + * here, fixes the race condition at the cost of reporting + * all live processes have immediately died! Better to + * occasionally report a dead process as still living - + * which will occasionally trip the timeout for cases that + * are right on the edge.) */ OPAL_OUTPUT_VERBOSE((20, orte_odls_base_framework.framework_output, "%s odls:default:WAITPID INDICATES PID %d MAY HAVE ALREADY EXITED", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), (int)(child->pid))); - /* Do nothing, process still alive */ + /* Do nothing, process still alive */ } else if (-1 == ret && ECHILD == errno) { /* The pid no longer exists, so we'll call this "good enough for government work" */ @@ -216,7 +214,7 @@ static bool odls_default_child_died(orte_proc_t *child) ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), (int)(child->pid))); return true; } - + /* Bogus delay for 1 msec - let's actually give the CPU some time * to quit the other process (sched_yield() -- even if we have it * -- changed behavior in 2.6.3x Linux flavors to be undesirable) @@ -265,7 +263,7 @@ static int odls_default_kill_local(pid_t pid, int signum) int orte_odls_default_kill_local_procs(opal_pointer_array_t *procs) { int rc; - + if (ORTE_SUCCESS != (rc = orte_odls_base_default_kill_local_procs(procs, odls_default_kill_local, odls_default_child_died))) { ORTE_ERROR_LOG(rc); @@ -356,6 +354,33 @@ static void send_error_show_help(int fd, int exit_status, exit(exit_status); } +/* close all open file descriptors w/ exception of stdin/stdout/stderr, + the pipe used for the IOF INTERNAL messages, and the pipe up to + the parent. */ +static int close_open_file_descriptors(int write_fd, + orte_iof_base_io_conf_t opts) { + DIR *dir = opendir("/proc/self/fd"); + if (NULL == dir) { + return ORTE_ERR_FILE_OPEN_FAILURE; + } + struct dirent *files; + while (NULL != (files = readdir(dir))) { + if (!isdigit(files->d_name[0])) { + continue; + } + int fd = strtol(files->d_name, NULL, 10); + if (errno == EINVAL || errno == ERANGE) { + closedir(dir); + return ORTE_ERR_TYPE_MISMATCH; + } + if (fd >=3 && fd != opts.p_internal[1] && fd != write_fd) { + close(fd); + } + } + closedir(dir); + return ORTE_SUCCESS; +} + static int do_child(orte_app_context_t* context, orte_proc_t *child, char **environ_copy, @@ -367,20 +392,20 @@ static int do_child(orte_app_context_t* context, long fd, fdmax = sysconf(_SC_OPEN_MAX); char *param, *msg; - if (orte_forward_job_control) { - /* Set a new process group for this child, so that a - SIGSTOP can be sent to it without being sent to the - orted. */ - setpgid(0, 0); - } - +#if HAVE_SETPGID + /* Set a new process group for this child, so that a + SIGSTOP can be sent to it without being sent to the + orted. */ + setpgid(0, 0); +#endif + /* Setup the pipe to be close-on-exec */ opal_fd_set_cloexec(write_fd); if (NULL != child) { /* setup stdout/stderr so that any error messages that we may print out will get displayed back at orterun. - + NOTE: Definitely do this AFTER we check contexts so that any error message from those two functions doesn't come out to the user. IF we didn't do it in this order, @@ -391,14 +416,16 @@ static int do_child(orte_app_context_t* context, always outputs a nice, single message indicating what happened */ - if (ORTE_SUCCESS != (i = orte_iof_base_setup_child(&opts, - &environ_copy))) { - ORTE_ERROR_LOG(i); - send_error_show_help(write_fd, 1, - "help-orte-odls-default.txt", - "iof setup failed", - orte_process_info.nodename, context->app); - /* Does not return */ + if (ORTE_FLAG_TEST(jobdat, ORTE_JOB_FLAG_FORWARD_OUTPUT)) { + if (ORTE_SUCCESS != (i = orte_iof_base_setup_child(&opts, + &environ_copy))) { + ORTE_ERROR_LOG(i); + send_error_show_help(write_fd, 1, + "help-orte-odls-default.txt", + "iof setup failed", + orte_process_info.nodename, context->app); + /* Does not return */ + } } /* now set any child-level controls such as binding */ @@ -425,7 +452,7 @@ static int do_child(orte_app_context_t* context, if (OPAL_SUCCESS != (rc = opal_util_init_sys_limits(&msg))) { send_error_show_help(write_fd, 1, "help-orte-odls-default.txt", "set limit", - orte_process_info.nodename, context->app, + orte_process_info.nodename, context->app, __FILE__, __LINE__, msg); } /* ensure we only do this once */ @@ -433,56 +460,48 @@ static int do_child(orte_app_context_t* context, opal_unsetenv(param, &environ_copy); free(param); - /* close all file descriptors w/ exception of stdin/stdout/stderr, + /* close all open file descriptors w/ exception of stdin/stdout/stderr, the pipe used for the IOF INTERNAL messages, and the pipe up to the parent. */ - for(fd=3; fdargv == NULL) { context->argv = malloc(sizeof(char*)*2); context->argv[0] = strdup(context->app); context->argv[1] = NULL; } - + /* Set signal handlers back to the default. Do this close to the exev() because the event library may (and likely will) reset them. If we don't do this, the event library may have left some set that, at least on some OS's, don't get reset via fork() or exec(). Hence, the launched process could be unkillable (for example). */ - + set_handler_default(SIGTERM); set_handler_default(SIGINT); set_handler_default(SIGHUP); set_handler_default(SIGPIPE); set_handler_default(SIGCHLD); - + /* Unblock all signals, for many of the same reasons that we set the default handlers, above. This is noticable on Linux where the event library blocks SIGTERM, but we don't want that blocked by the launched process. */ sigprocmask(0, 0, &sigs); sigprocmask(SIG_UNBLOCK, &sigs, 0); - + /* Exec the new executable */ - - if (10 < opal_output_get_verbosity(orte_odls_base_framework.framework_output)) { - int jout; - opal_output(0, "%s STARTING %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), context->app); - for (jout=0; NULL != context->argv[jout]; jout++) { - opal_output(0, "%s\tARGV[%d]: %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), jout, context->argv[jout]); - } - for (jout=0; NULL != environ_copy[jout]; jout++) { - opal_output(0, "%s\tENVIRON[%d]: %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), jout, environ_copy[jout]); - } - } - + execve(context->app, context->argv, environ_copy); - send_error_show_help(write_fd, 1, + send_error_show_help(write_fd, 1, "help-orte-odls-default.txt", "execve error", orte_process_info.nodename, context->app, strerror(errno)); /* Does not return */ @@ -521,12 +540,12 @@ static int do_parent(orte_app_context_t* context, if (OPAL_ERR_TIMEOUT == rc) { break; } - + /* If Something Bad happened in the read, error out */ if (OPAL_SUCCESS != rc) { ORTE_ERROR_LOG(rc); close(read_fd); - + if (NULL != child) { child->state = ORTE_PROC_STATE_UNDEF; } @@ -546,7 +565,7 @@ static int do_parent(orte_app_context_t* context, if (msg.file_str_len > 0) { rc = opal_fd_read(read_fd, msg.file_str_len, file); if (OPAL_SUCCESS != rc) { - orte_show_help("help-orte-odls-default.txt", "syscall fail", + orte_show_help("help-orte-odls-default.txt", "syscall fail", true, orte_process_info.nodename, context->app, "opal_fd_read", __FILE__, __LINE__); @@ -560,7 +579,7 @@ static int do_parent(orte_app_context_t* context, if (msg.topic_str_len > 0) { rc = opal_fd_read(read_fd, msg.topic_str_len, topic); if (OPAL_SUCCESS != rc) { - orte_show_help("help-orte-odls-default.txt", "syscall fail", + orte_show_help("help-orte-odls-default.txt", "syscall fail", true, orte_process_info.nodename, context->app, "opal_fd_read", __FILE__, __LINE__); @@ -574,7 +593,7 @@ static int do_parent(orte_app_context_t* context, if (msg.msg_str_len > 0) { str = calloc(1, msg.msg_str_len + 1); if (NULL == str) { - orte_show_help("help-orte-odls-default.txt", "syscall fail", + orte_show_help("help-orte-odls-default.txt", "syscall fail", true, orte_process_info.nodename, context->app, "opal_fd_read", __FILE__, __LINE__); @@ -617,7 +636,7 @@ static int do_parent(orte_app_context_t* context, ORTE_FLAG_SET(child, ORTE_PROC_FLAG_ALIVE); } close(read_fd); - + return ORTE_SUCCESS; } @@ -633,12 +652,12 @@ static int odls_default_fork_local_proc(orte_app_context_t* context, orte_iof_base_io_conf_t opts; int rc, p[2]; pid_t pid; - + if (NULL != child) { /* should pull this information from MPIRUN instead of going with default */ opts.usepty = OPAL_ENABLE_PTY_SUPPORT; - + /* do we want to setup stdin? */ if (NULL != child && (jobdat->stdin_target == ORTE_VPID_WILDCARD || @@ -647,7 +666,7 @@ static int odls_default_fork_local_proc(orte_app_context_t* context, } else { opts.connect_stdin = false; } - + if (ORTE_SUCCESS != (rc = orte_iof_base_setup_prefork(&opts))) { ORTE_ERROR_LOG(rc); if (NULL != child) { @@ -674,13 +693,13 @@ static int odls_default_fork_local_proc(orte_app_context_t* context, } return ORTE_ERR_SYS_LIMITS_PIPES; } - + /* Fork off the child */ pid = fork(); if (NULL != child) { child->pid = pid; } - + if (pid < 0) { ORTE_ERROR_LOG(ORTE_ERR_SYS_LIMITS_CHILDREN); if (NULL != child) { @@ -689,15 +708,15 @@ static int odls_default_fork_local_proc(orte_app_context_t* context, } return ORTE_ERR_SYS_LIMITS_CHILDREN; } - + if (pid == 0) { - close(p[0]); + close(p[0]); #if HAVE_SETPGID setpgid(0, 0); #endif do_child(context, child, environ_copy, jobdat, p[1], opts); /* Does not return */ - } + } close(p[1]); return do_parent(context, child, environ_copy, jobdat, p[0], opts); @@ -720,32 +739,33 @@ int orte_odls_default_launch_local_procs(opal_buffer_t *data) ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_ERROR_NAME(rc))); return rc; } - + /* launch the local procs */ ORTE_ACTIVATE_LOCAL_LAUNCH(job, odls_default_fork_local_proc); - + return ORTE_SUCCESS; } /** * Send a signal to a pid. Note that if we get an error, we set the - * return value and let the upper layer print out the message. + * return value and let the upper layer print out the message. */ static int send_signal(pid_t pid, int signal) { int rc = ORTE_SUCCESS; - + OPAL_OUTPUT_VERBOSE((1, orte_odls_base_framework.framework_output, "%s sending signal %d to pid %ld", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), signal, (long)pid)); - if (orte_forward_job_control) { - /* Send the signal to the process group rather than the - process. The child is the leader of its process group. */ - pid = -pid; - } +#if HAVE_SETPGID + /* Send the signal to the process group rather than the + process. The child is the leader of its process group. */ + pid = -pid; +#endif + if (kill(pid, signal) != 0) { switch(errno) { case EINVAL: @@ -754,8 +774,8 @@ static int send_signal(pid_t pid, int signal) case ESRCH: /* This case can occur when we deliver a signal to a process that is no longer there. This can happen if - we deliver a signal while the job is shutting down. - This does not indicate a real problem, so just + we deliver a signal while the job is shutting down. + This does not indicate a real problem, so just ignore the error. */ break; case EPERM: @@ -765,14 +785,14 @@ static int send_signal(pid_t pid, int signal) rc = ORTE_ERROR; } } - + return rc; } static int orte_odls_default_signal_local_procs(const orte_process_name_t *proc, int32_t signal) { int rc; - + if (ORTE_SUCCESS != (rc = orte_odls_base_default_signal_local_procs(proc, signal, send_signal))) { ORTE_ERROR_LOG(rc); return rc; @@ -783,7 +803,7 @@ static int orte_odls_default_signal_local_procs(const orte_process_name_t *proc, static int orte_odls_default_restart_proc(orte_proc_t *child) { int rc; - + /* restart the local proc */ if (ORTE_SUCCESS != (rc = orte_odls_base_default_restart_proc(child, odls_default_fork_local_proc))) { OPAL_OUTPUT_VERBOSE((2, orte_odls_base_framework.framework_output, @@ -792,4 +812,3 @@ static int orte_odls_default_restart_proc(orte_proc_t *child) } return rc; } - diff --git a/orte/mca/odls/odls.h b/orte/mca/odls/odls.h index 88b69c71bf7..d28a964f770 100644 --- a/orte/mca/odls/odls.h +++ b/orte/mca/odls/odls.h @@ -45,7 +45,7 @@ BEGIN_C_DECLS /* * odls module functions */ - + /* * Construct a buffer for use in adding local processes * In order to reuse daemons, we need a way for the HNP to construct a buffer that diff --git a/orte/mca/odls/odls_types.h b/orte/mca/odls/odls_types.h index b20a4d686e5..4ac72b629b0 100644 --- a/orte/mca/odls/odls_types.h +++ b/orte/mca/odls/odls_types.h @@ -4,19 +4,19 @@ * Copyright (c) 2004-2011 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2010-2011 Oak Ridge National Labs. All rights reserved. - * Copyright (c) 2011 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2011-2016 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2011-2012 Los Alamos National Security, LLC. * All rights reserved. * Copyright (c) 2014 Intel, Inc. All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ /** @file: @@ -36,7 +36,7 @@ BEGIN_C_DECLS typedef uint8_t orte_daemon_cmd_flag_t; #define ORTE_DAEMON_CMD_T OPAL_UINT8 - + /* * Definitions needed for communication */ @@ -59,7 +59,7 @@ typedef uint8_t orte_daemon_cmd_flag_t; #define ORTE_DAEMON_TERMINATE_JOB_CMD (orte_daemon_cmd_flag_t) 18 #define ORTE_DAEMON_HALT_VM_CMD (orte_daemon_cmd_flag_t) 19 #define ORTE_DAEMON_HALT_DVM_CMD (orte_daemon_cmd_flag_t) 20 - + /* request proc resource usage */ #define ORTE_DAEMON_TOP_CMD (orte_daemon_cmd_flag_t) 22 @@ -78,6 +78,9 @@ typedef uint8_t orte_daemon_cmd_flag_t; #define ORTE_DAEMON_NEW_COLL_ID (orte_daemon_cmd_flag_t) 29 +/* for debug purposes, get stack traces from all application procs */ +#define ORTE_DAEMON_GET_STACK_TRACES (orte_daemon_cmd_flag_t) 31 + /* * Struct written up the pipe from the child to the parent. */ @@ -95,7 +98,7 @@ typedef struct { int msg_str_len; } orte_odls_pipe_err_msg_t; -/* +/* * Max length of strings from the orte_odls_pipe_err_msg_t */ #define ORTE_ODLS_MAX_FILE_LEN 511 diff --git a/orte/mca/oob/Makefile.am b/orte/mca/oob/Makefile.am index d1a4fc3f921..a88e3877e28 100644 --- a/orte/mca/oob/Makefile.am +++ b/orte/mca/oob/Makefile.am @@ -5,15 +5,15 @@ # Copyright (c) 2004-2005 The University of Tennessee and The University # of Tennessee Research Foundation. All rights # reserved. -# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, # University of Stuttgart. All rights reserved. # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. # Copyright (c) 2010 Cisco Systems, Inc. All rights reserved. # $COPYRIGHT$ -# +# # Additional copyrights may follow -# +# # $HEADER$ # diff --git a/orte/mca/oob/alps/Makefile.am b/orte/mca/oob/alps/Makefile.am index da6e649210e..380677e1325 100644 --- a/orte/mca/oob/alps/Makefile.am +++ b/orte/mca/oob/alps/Makefile.am @@ -5,7 +5,7 @@ # Copyright (c) 2004-2005 The University of Tennessee and The University # of Tennessee Research Foundation. All rights # reserved. -# Copyright (c) 2004-2009 High Performance Computing Center Stuttgart, +# Copyright (c) 2004-2009 High Performance Computing Center Stuttgart, # University of Stuttgart. All rights reserved. # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. @@ -14,9 +14,9 @@ # All rights reserved # Copyright (c) 2014 Intel, Inc. All rights reserved. # $COPYRIGHT$ -# +# # Additional copyrights may follow -# +# # $HEADER$ # diff --git a/orte/mca/oob/alps/configure.m4 b/orte/mca/oob/alps/configure.m4 index d8ec90bf7ff..428a71f1fb9 100644 --- a/orte/mca/oob/alps/configure.m4 +++ b/orte/mca/oob/alps/configure.m4 @@ -6,7 +6,7 @@ # Copyright (c) 2004-2005 The University of Tennessee and The University # of Tennessee Research Foundation. All rights # reserved. -# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, # University of Stuttgart. All rights reserved. # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. @@ -14,9 +14,9 @@ # All rights reserved. # Copyright (c) 2010 Cisco Systems, Inc. All rights reserved. # $COPYRIGHT$ -# +# # Additional copyrights may follow -# +# # $HEADER$ # @@ -27,11 +27,11 @@ AC_DEFUN([MCA_orte_oob_alps_CONFIG],[ ORTE_CHECK_ALPS([oob_alps], [oob_alps_happy="yes"], [oob_alps_happy="no"]) - AS_IF([test "$oob_alps_happy" = "yes"], + AS_IF([test "$oob_alps_happy" = "yes"], [$1 AC_SUBST([oob_alps_CPPFLAGS]) AC_SUBST([oob_alps_LDFLAGS]) - AC_SUBST([oob_alps_LIBS])], + AC_SUBST([oob_alps_LIBS])], [$2]) ])dnl diff --git a/orte/mca/oob/alps/oob_alps_component.c b/orte/mca/oob/alps/oob_alps_component.c index 05ddd071eb1..49d8dcf367a 100644 --- a/orte/mca/oob/alps/oob_alps_component.c +++ b/orte/mca/oob/alps/oob_alps_component.c @@ -25,7 +25,7 @@ * In windows, many of the socket functions return an EWOULDBLOCK * instead of things like EAGAIN, EINPROGRESS, etc. It has been * verified that this will not conflict with other error codes that - * are returned by these functions under UNIX/Linux environments + * are returned by these functions under UNIX/Linux environments */ #include "orte_config.h" @@ -74,7 +74,6 @@ static int alps_component_open(void); static int alps_component_close(void); -static int alps_component_register(void); static int component_available(void); static int component_startup(void); static void component_shutdown(void); @@ -82,9 +81,6 @@ static int component_send(orte_rml_send_t *msg); static char* component_get_addr(void); static int component_set_addr(orte_process_name_t *peer, char **uris); static bool component_is_reachable(orte_process_name_t *peer); -#if OPAL_ENABLE_FT_CR == 1 -static int component_ft_event(int state); -#endif /* * Struct of function pointers and all that to let us be initialized @@ -109,10 +105,7 @@ mca_oob_base_component_t mca_oob_alps_component = { .send_nb = component_send, .get_addr = component_get_addr, .set_addr = component_set_addr, - .is_reachable = component_is_reachable, -#if OPAL_ENABLE_FT_CR == 1 - .ft_event = component_ft_event, -#endif + .is_reachable = component_is_reachable }; /* @@ -195,15 +188,14 @@ static int component_send(orte_rml_send_t *msg) static char* component_get_addr(void) { int len; - char hn[MAXHOSTNAMELEN], *cptr; + char hn[OPAL_MAXHOSTNAMELEN], *cptr; /* * TODO: for aries want to plug in GNI addr here instead to * eventually be able to support connect/accept using aprun. */ - len = gethostname(hn, MAXHOSTNAMELEN - 1); - hn[len]='\0'; + len = gethostname(hn, sizeof(hn)); asprintf(&cptr, "gni://%s:%d", hn, getpid()); @@ -230,12 +222,3 @@ static bool component_is_reachable(orte_process_name_t *peer) return false; } -#if OPAL_ENABLE_FT_CR == 1 -static int component_ft_event(int state) -{ - opal_output_verbose(2, orte_oob_base_framework.framework_output, - "%s ALPS EVENT", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)); - - return ORTE_ERR_NOT_SUPPORTED; -} -#endif diff --git a/orte/mca/oob/base/Makefile.am b/orte/mca/oob/base/Makefile.am index 5b34cfb9ad9..ae7f927814f 100644 --- a/orte/mca/oob/base/Makefile.am +++ b/orte/mca/oob/base/Makefile.am @@ -5,7 +5,7 @@ # Copyright (c) 2004-2005 The University of Tennessee and The University # of Tennessee Research Foundation. All rights # reserved. -# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, # University of Stuttgart. All rights reserved. # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. @@ -13,9 +13,9 @@ # reserved. # Copyright (c) 2014 Intel, Inc. All rights reserved. # $COPYRIGHT$ -# +# # Additional copyrights may follow -# +# # $HEADER$ # diff --git a/orte/mca/oob/base/base.h b/orte/mca/oob/base/base.h index 2c6bde8fb27..c589e8e9166 100644 --- a/orte/mca/oob/base/base.h +++ b/orte/mca/oob/base/base.h @@ -9,7 +9,7 @@ * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. - * Copyright (c) 2012-2013 Los Alamos National Security, LLC. All rights + * Copyright (c) 2012-2015 Los Alamos National Security, LLC. All rights * reserved. * $COPYRIGHT$ * @@ -78,21 +78,21 @@ OBJ_CLASS_DECLARATION(orte_oob_base_peer_t); ORTE_DECLSPEC extern mca_base_framework_t orte_oob_base_framework; ORTE_DECLSPEC int orte_oob_base_select(void); -/* Access the OOB internal functions via set of event-based macros - * for inserting messages and other commands into the - * OOB event base. This ensures that all OOB operations occur - * asynchronously in a thread-safe environment. - * Note that this doesn't mean that messages will be *sent* - * in order as that depends on the specific transport being - * used, when that module's event base indicates the transport - * is available, etc. - */ -typedef struct { - opal_object_t super; - opal_event_t ev; - orte_rml_send_t *msg; -} orte_oob_send_t; -OBJ_CLASS_DECLARATION(orte_oob_send_t); +/* Access the OOB internal functions via set of event-based macros + * for inserting messages and other commands into the + * OOB event base. This ensures that all OOB operations occur + * asynchronously in a thread-safe environment. + * Note that this doesn't mean that messages will be *sent* + * in order as that depends on the specific transport being + * used, when that module's event base indicates the transport + * is available, etc. + */ +typedef struct { + opal_object_t super; + opal_event_t ev; + orte_rml_send_t *msg; +} orte_oob_send_t; +OBJ_CLASS_DECLARATION(orte_oob_send_t); /* All OOB sends are based on iovec's and are async as the RML * acts as the initial interface to prepare all communications. @@ -108,7 +108,7 @@ typedef void (*mca_oob_send_callback_fn_t)(int status, struct iovec *iov, int count, void *cbdata); -ORTE_DECLSPEC void orte_oob_base_send_nb(int fd, short args, void *cbdata); +ORTE_DECLSPEC void orte_oob_base_send_nb(int fd, short args, void *cbdata); #define ORTE_OOB_SEND(m) \ do { \ orte_oob_send_t *cd; \ @@ -161,7 +161,7 @@ ORTE_DECLSPEC void orte_oob_base_get_addr(char **uri); * across all oob components/modules, letting each look at the uri and extract * info from it if it can. */ -typedef struct { +typedef struct { opal_object_t super; opal_event_t ev; char *uri; @@ -181,10 +181,6 @@ OBJ_CLASS_DECLARATION(mca_oob_uri_req_t); }while(0); ORTE_DECLSPEC void orte_oob_base_set_addr(int fd, short args, void *cbdata); -#if OPAL_ENABLE_FT_CR == 1 -ORTE_DECLSPEC void orte_oob_base_ft_event(int fd, short args, void *cbdata); -#endif - END_C_DECLS #endif diff --git a/orte/mca/oob/base/help-oob-base.txt b/orte/mca/oob/base/help-oob-base.txt index f6ed2154e9a..5e233475028 100644 --- a/orte/mca/oob/base/help-oob-base.txt +++ b/orte/mca/oob/base/help-oob-base.txt @@ -6,15 +6,15 @@ # Copyright (c) 2004-2006 The University of Tennessee and The University # of Tennessee Research Foundation. All rights # reserved. -# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, # University of Stuttgart. All rights reserved. # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. # Copyright (c) 2014 Intel, Inc. All rights reserved # $COPYRIGHT$ -# +# # Additional copyrights may follow -# +# # $HEADER$ # # diff --git a/orte/mca/oob/base/oob_base_frame.c b/orte/mca/oob/base/oob_base_frame.c index 19111f89915..3e5760d5621 100644 --- a/orte/mca/oob/base/oob_base_frame.c +++ b/orte/mca/oob/base/oob_base_frame.c @@ -6,19 +6,19 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2007 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2013-2014 Los Alamos National Security, LLC. All rights + * Copyright (c) 2013-2015 Los Alamos National Security, LLC. All rights * reserved. * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -34,10 +34,6 @@ #include "orte/mca/rml/base/base.h" #include "orte/mca/oob/base/base.h" -#if OPAL_ENABLE_FT_CR == 1 -#include "orte/mca/state/state.h" -#endif - /* * The following file was created by configure. It contains extern * statements and the definition of an array of pointers to each @@ -126,13 +122,6 @@ static int orte_oob_base_open(mca_base_open_flag_t flags) opal_hash_table_init(&orte_oob_base.peers, 128); OBJ_CONSTRUCT(&orte_oob_base.actives, opal_list_t); -#if OPAL_ENABLE_FT_CR == 1 - /* register the FT events callback */ - orte_state.add_job_state(ORTE_JOB_STATE_FT_CHECKPOINT, orte_oob_base_ft_event, ORTE_ERROR_PRI); - orte_state.add_job_state(ORTE_JOB_STATE_FT_CONTINUE, orte_oob_base_ft_event, ORTE_ERROR_PRI); - orte_state.add_job_state(ORTE_JOB_STATE_FT_RESTART, orte_oob_base_ft_event, ORTE_ERROR_PRI); -#endif - OPAL_TIMING_INIT(&tm_oob); /* Open up all available components */ diff --git a/orte/mca/oob/base/oob_base_select.c b/orte/mca/oob/base/oob_base_select.c index 6aedb7c431a..f09e9e2f0e2 100644 --- a/orte/mca/oob/base/oob_base_select.c +++ b/orte/mca/oob/base/oob_base_select.c @@ -5,7 +5,7 @@ * Copyright (c) 2004-2006 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. @@ -14,9 +14,9 @@ * reserved. * Copyright (c) 2014-2015 Intel, Inc. All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -72,11 +72,11 @@ int orte_oob_base_select(void) component->oob_base.mca_component_name); rc = component->available(); - + /* If the component is not available, then skip it as * it has no available interfaces */ - if (ORTE_SUCCESS != rc && ORTE_ERR_FORCE_SELECT != rc) { + if (ORTE_SUCCESS != rc) { opal_output_verbose(5, orte_oob_base_framework.framework_output, "mca:oob:select: Skipping component [%s] - no available interfaces", component->oob_base.mca_component_name ); @@ -91,22 +91,6 @@ int orte_oob_base_select(void) continue; } - if (ORTE_ERR_FORCE_SELECT == rc) { - /* this component shall be the *only* component allowed - * for use, so shutdown and remove any prior ones */ - while (NULL != (cmp = (mca_base_component_list_item_t*)opal_list_remove_first(&orte_oob_base.actives))) { - c3 = (mca_oob_base_component_t *) cmp->cli_component; - if (NULL != c3->shutdown) { - c3->shutdown(); - } - OBJ_RELEASE(cmp); - } - c2 = OBJ_NEW(mca_base_component_list_item_t); - c2->cli_component = (mca_base_component_t*)component; - opal_list_append(&orte_oob_base.actives, &c2->super); - break; - } - /* record it, but maintain priority order */ added = false; OPAL_LIST_FOREACH(cmp, &orte_oob_base.actives, mca_base_component_list_item_t) { diff --git a/orte/mca/oob/base/oob_base_stubs.c b/orte/mca/oob/base/oob_base_stubs.c index df4a69ba0df..3032451c38f 100644 --- a/orte/mca/oob/base/oob_base_stubs.c +++ b/orte/mca/oob/base/oob_base_stubs.c @@ -1,12 +1,12 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ /* - * Copyright (c) 2012-2014 Los Alamos National Security, LLC. All rights + * Copyright (c) 2012-2015 Los Alamos National Security, LLC. All rights * reserved. - * Copyright (c) 2013-2014 Intel, Inc. All rights reserved. + * Copyright (c) 2013-2015 Intel, Inc. All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -15,7 +15,7 @@ #include "orte/constants.h" #include "opal/util/output.h" -#include "opal/mca/dstore/dstore.h" +#include "opal/mca/pmix/pmix.h" #include "opal/util/argv.h" #include "orte/mca/errmgr/errmgr.h" @@ -23,15 +23,12 @@ #include "orte/mca/rml/rml.h" #include "orte/mca/oob/base/base.h" -#if OPAL_ENABLE_FT_CR == 1 -#include "orte/mca/state/base/base.h" -#endif static void process_uri(char *uri); void orte_oob_base_send_nb(int fd, short args, void *cbdata) { - orte_oob_send_t *cd = (orte_oob_send_t*)cbdata; + orte_oob_send_t *cd = (orte_oob_send_t*)cbdata; orte_rml_send_t *msg = cd->msg; mca_base_component_list_item_t *cli; orte_oob_base_peer_t *pr; @@ -40,8 +37,7 @@ void orte_oob_base_send_nb(int fd, short args, void *cbdata) bool msg_sent; mca_oob_base_component_t *component; bool reachable; - opal_list_t myvals; - opal_value_t *kv; + char *uri; /* done with this. release it now */ OBJ_RELEASE(cd); @@ -62,14 +58,14 @@ void orte_oob_base_send_nb(int fd, short args, void *cbdata) ORTE_NAME_PRINT(&msg->dst)); /* for direct launched procs, the URI might be in the database, * so check there next - if it is, the peer object will be added - * to our hash table + * to our hash table. However, we don't want to chase up to the + * server after it, so indicate it is optional */ - OBJ_CONSTRUCT(&myvals, opal_list_t); - if (OPAL_SUCCESS == opal_dstore.fetch(opal_dstore_internal, &msg->dst, - OPAL_DSTORE_URI, &myvals)) { - kv = (opal_value_t*)opal_list_get_first(&myvals); - if (NULL != kv) { - process_uri(kv->data.string); + OPAL_MODEX_RECV_VALUE_OPTIONAL(rc, OPAL_PMIX_PROC_URI, &msg->dst, + (char**)&uri, OPAL_STRING); + if (OPAL_SUCCESS == rc ) { + if (NULL != uri) { + process_uri(uri); if (OPAL_SUCCESS != opal_hash_table_get_value_uint64(&orte_oob_base.peers, ui64, (void**)&pr) || NULL == pr) { @@ -77,17 +73,14 @@ void orte_oob_base_send_nb(int fd, short args, void *cbdata) ORTE_ERROR_LOG(ORTE_ERR_ADDRESSEE_UNKNOWN); msg->status = ORTE_ERR_ADDRESSEE_UNKNOWN; ORTE_RML_SEND_COMPLETE(msg); - OPAL_LIST_DESTRUCT(&myvals); return; } } else { ORTE_ERROR_LOG(ORTE_ERR_ADDRESSEE_UNKNOWN); msg->status = ORTE_ERR_ADDRESSEE_UNKNOWN; ORTE_RML_SEND_COMPLETE(msg); - OPAL_LIST_DESTRUCT(&myvals); return; } - OPAL_LIST_DESTRUCT(&myvals); } else { /* even though we don't know about this peer yet, we still might * be able to get to it via routing, so ask each component if @@ -278,19 +271,19 @@ void orte_oob_base_get_addr(char **uri) * info from it if it can. An error is to be returned if NO component * can successfully extract a contact. */ -static void req_cons(mca_oob_uri_req_t *ptr) -{ - ptr->uri = NULL; -} -static void req_des(mca_oob_uri_req_t *ptr) -{ - if (NULL != ptr->uri) { - free(ptr->uri); - } -} -OBJ_CLASS_INSTANCE(mca_oob_uri_req_t, - opal_object_t, - req_cons, req_des); +static void req_cons(mca_oob_uri_req_t *ptr) +{ + ptr->uri = NULL; +} +static void req_des(mca_oob_uri_req_t *ptr) +{ + if (NULL != ptr->uri) { + free(ptr->uri); + } +} +OBJ_CLASS_INSTANCE(mca_oob_uri_req_t, + opal_object_t, + req_cons, req_des); void orte_oob_base_set_addr(int fd, short args, void *cbdata) { @@ -404,35 +397,3 @@ static void process_uri(char *uri) opal_argv_free(uris); } -#if OPAL_ENABLE_FT_CR == 1 -void orte_oob_base_ft_event(int sd, short argc, void *cbdata) -{ - int rc; - mca_base_component_list_item_t *cli; - mca_oob_base_component_t *component; - orte_state_caddy_t *state = (orte_state_caddy_t*)cbdata; - - opal_output_verbose(5, orte_oob_base_framework.framework_output, - "%s oob:base:ft_event %s(%d)", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - orte_job_state_to_str(state->job_state), - state->job_state); - - /* loop across all available modules in priority order - * and call each one's ft_event handler - */ - OPAL_LIST_FOREACH(cli, &orte_oob_base.actives, mca_base_component_list_item_t) { - component = (mca_oob_base_component_t*)cli->cli_component; - if (NULL == component->ft_event) { - /* doesn't support this ability */ - continue; - } - - if (ORTE_SUCCESS != (rc = component->ft_event(state->job_state))) { - ORTE_ERROR_LOG(rc); - } - } - OBJ_RELEASE(state); -} - -#endif diff --git a/orte/mca/oob/oob.h b/orte/mca/oob/oob.h index e0c386600ff..fe09d1998d8 100644 --- a/orte/mca/oob/oob.h +++ b/orte/mca/oob/oob.h @@ -6,7 +6,7 @@ * Copyright (c) 2004-2006 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. @@ -14,9 +14,9 @@ * reserved. * Copyright (c) 2015 Intel, Inc. All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ /** @file: @@ -45,7 +45,7 @@ #include "orte/mca/mca.h" #include "orte/mca/rml/base/base.h" -#include "orte/mca/qos/base/base.h" + BEGIN_C_DECLS typedef int (*mca_oob_base_component_avail_fn_t)(void); @@ -57,9 +57,6 @@ typedef int (*mca_oob_base_component_set_addr_fn_t)(orte_process_name_t *peer, char **uris); typedef bool (*mca_oob_base_component_is_reachable_fn_t)(orte_process_name_t *peer); typedef void (*mca_oob_ping_callback_fn_t)(int status, void *cbdata); -#if OPAL_ENABLE_FT_CR == 1 -typedef int (*mca_oob_base_component_ft_event_fn_t)(int state); -#endif typedef struct { mca_base_component_t oob_base; @@ -73,9 +70,6 @@ typedef struct { mca_oob_base_component_get_addr_fn_t get_addr; mca_oob_base_component_set_addr_fn_t set_addr; mca_oob_base_component_is_reachable_fn_t is_reachable; -#if OPAL_ENABLE_FT_CR == 1 - mca_oob_base_component_ft_event_fn_t ft_event; -#endif } mca_oob_base_component_t; /** diff --git a/orte/mca/oob/tcp/Makefile.am b/orte/mca/oob/tcp/Makefile.am index 73d983b2bee..383af7e719b 100644 --- a/orte/mca/oob/tcp/Makefile.am +++ b/orte/mca/oob/tcp/Makefile.am @@ -5,7 +5,7 @@ # Copyright (c) 2004-2005 The University of Tennessee and The University # of Tennessee Research Foundation. All rights # reserved. -# Copyright (c) 2004-2009 High Performance Computing Center Stuttgart, +# Copyright (c) 2004-2009 High Performance Computing Center Stuttgart, # University of Stuttgart. All rights reserved. # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. @@ -14,9 +14,9 @@ # All rights reserved # Copyright (c) 2014 Intel, Inc. All rights reserved. # $COPYRIGHT$ -# +# # Additional copyrights may follow -# +# # $HEADER$ # diff --git a/orte/mca/oob/tcp/configure.m4 b/orte/mca/oob/tcp/configure.m4 index 56b9ca8e68a..1a65263cc41 100644 --- a/orte/mca/oob/tcp/configure.m4 +++ b/orte/mca/oob/tcp/configure.m4 @@ -6,7 +6,7 @@ # Copyright (c) 2004-2005 The University of Tennessee and The University # of Tennessee Research Foundation. All rights # reserved. -# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, # University of Stuttgart. All rights reserved. # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. @@ -14,9 +14,9 @@ # All rights reserved. # Copyright (c) 2010 Cisco Systems, Inc. All rights reserved. # $COPYRIGHT$ -# +# # Additional copyrights may follow -# +# # $HEADER$ # @@ -26,9 +26,9 @@ AC_DEFUN([MCA_orte_oob_tcp_CONFIG],[ AC_CONFIG_FILES([orte/mca/oob/tcp/Makefile]) # check for sockaddr_in (a good sign we have TCP) - AC_CHECK_TYPES([struct sockaddr_in], + AC_CHECK_TYPES([struct sockaddr_in], [oob_tcp_happy="yes"], - [oob_tcp_happy="no"], + [oob_tcp_happy="no"], [AC_INCLUDES_DEFAULT #ifdef HAVE_NETINET_IN_H #include diff --git a/orte/mca/oob/tcp/help-oob-tcp.txt b/orte/mca/oob/tcp/help-oob-tcp.txt index f577247bc13..d20ea17e1b1 100644 --- a/orte/mca/oob/tcp/help-oob-tcp.txt +++ b/orte/mca/oob/tcp/help-oob-tcp.txt @@ -6,16 +6,16 @@ # Copyright (c) 2004-2006 The University of Tennessee and The University # of Tennessee Research Foundation. All rights # reserved. -# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, # University of Stuttgart. All rights reserved. # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. # Copyright (c) 2014-2015 Intel, Inc. All rights reserved. # Copyright (c) 2015 Cisco Systems, Inc. All rights reserved. # $COPYRIGHT$ -# +# # Additional copyrights may follow -# +# # $HEADER$ # [static-and-dynamic] diff --git a/orte/mca/oob/tcp/oob_tcp.c b/orte/mca/oob/tcp/oob_tcp.c index e1f5032a3fc..4cc8b702f87 100644 --- a/orte/mca/oob/tcp/oob_tcp.c +++ b/orte/mca/oob/tcp/oob_tcp.c @@ -9,11 +9,11 @@ * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. - * Copyright (c) 2006-2013 Los Alamos National Security, LLC. + * Copyright (c) 2006-2015 Los Alamos National Security, LLC. * All rights reserved. * Copyright (c) 2009-2012 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2011 Oak Ridge National Labs. All rights reserved. - * Copyright (c) 2013-2015 Intel, Inc. All rights reserved. + * Copyright (c) 2013-2014 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -79,7 +79,6 @@ static void set_peer(const orte_process_name_t* name, static void ping(const orte_process_name_t *proc); static void send_nb(orte_rml_send_t *msg); static void resend(struct mca_oob_tcp_msg_error_t *mop); -static void ft_event(int state); mca_oob_tcp_module_t mca_oob_tcp_module = { { @@ -89,8 +88,7 @@ mca_oob_tcp_module_t mca_oob_tcp_module = { set_peer, ping, send_nb, - resend, - ft_event + resend } }; @@ -250,6 +248,8 @@ static int parse_uri(const uint16_t af_family, else { return ORTE_ERR_NOT_SUPPORTED; } + + return ORTE_SUCCESS; } @@ -272,7 +272,7 @@ static void process_set_peer(int fd, short args, void *cbdata) if (AF_INET != pop->af_family) { opal_output_verbose(20, orte_oob_base_framework.framework_output, - "%s NOT AF_INET", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)); + "%s NOT AF_INET", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)); goto cleanup; } @@ -399,10 +399,10 @@ static void process_send(int fd, short args, void *cbdata) orte_process_name_t hop; opal_output_verbose(2, orte_oob_base_framework.framework_output, - "%s:[%s:%d] processing send to peer %s:%d to channel =%d seq_num = %d", + "%s:[%s:%d] processing send to peer %s:%d", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), __FILE__, __LINE__, - ORTE_NAME_PRINT(&op->msg->dst), op->msg->tag, op->msg->dst_channel, op->msg->seq_num); + ORTE_NAME_PRINT(&op->msg->dst), op->msg->tag); /* do we have a route to this peer (could be direct)? */ hop = orte_routed.get_route(&op->msg->dst); @@ -581,6 +581,7 @@ static void recv_handler(int sd, short flg, void *cbdata) ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), strerror(opal_socket_errno), opal_socket_errno); } } + /* is the peer instance willing to accept this connection */ peer->sd = sd; if (mca_oob_tcp_peer_accept(peer) == false) { @@ -603,72 +604,3 @@ static void recv_handler(int sd, short flg, void *cbdata) OBJ_RELEASE(op); } -/* Dummy function for when we are not using FT. */ -#if OPAL_ENABLE_FT_CR == 0 -static void ft_event(int state) -{ - return; -} - -#else -static void ft_event(int state) { -#if 0 - opal_list_item_t *item; -#endif - - if(OPAL_CRS_CHECKPOINT == state) { -#if 0 - /* - * Disable event processing while we are working - */ - opal_event_disable(); -#endif - } - else if(OPAL_CRS_CONTINUE == state) { -#if 0 - /* - * Resume event processing - */ - opal_event_enable(); - } - else if(OPAL_CRS_RESTART == state) { - /* - * Clean out cached connection information - * Select pieces of finalize/init - */ - for (item = opal_list_remove_first(&mca_oob_tcp_module.peer_list); - item != NULL; - item = opal_list_remove_first(&mca_oob_tcp_module.peer_list)) { - mca_oob_tcp_peer_t* peer = (mca_oob_tcp_peer_t*)item; - /* JJH: Use the below command for debugging restarts with invalid sockets - * mca_oob_tcp_peer_dump(peer, "RESTART CLEAN") - */ - MCA_OOB_TCP_PEER_RETURN(peer); - } - - OBJ_DESTRUCT(&mca_oob_tcp_module.peer_free); - OBJ_DESTRUCT(&mca_oob_tcp_module.peer_names); - OBJ_DESTRUCT(&mca_oob_tcp_module.peers); - OBJ_DESTRUCT(&mca_oob_tcp_module.peer_list); - - OBJ_CONSTRUCT(&mca_oob_tcp_module.peer_list, opal_list_t); - OBJ_CONSTRUCT(&mca_oob_tcp_module.peers, opal_hash_table_t); - OBJ_CONSTRUCT(&mca_oob_tcp_module.peer_names, opal_hash_table_t); - OBJ_CONSTRUCT(&mca_oob_tcp_module.peer_free, opal_free_list_t); - - /* - * Resume event processing - */ - opal_event_enable(); -#endif - } - else if(OPAL_CRS_TERM == state ) { - ; - } - else { - ; - } - - return; -} -#endif diff --git a/orte/mca/oob/tcp/oob_tcp.h b/orte/mca/oob/tcp/oob_tcp.h index 135ffb2ee3a..1915567bd36 100644 --- a/orte/mca/oob/tcp/oob_tcp.h +++ b/orte/mca/oob/tcp/oob_tcp.h @@ -5,18 +5,18 @@ * Copyright (c) 2004-2006 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. - * Copyright (c) 2006-2013 Los Alamos National Security, LLC. + * Copyright (c) 2006-2013 Los Alamos National Security, LLC. * All rights reserved. * Copyright (c) 2010-2011 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2014 Intel, Inc. All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -65,7 +65,6 @@ typedef void (*mca_oob_tcp_module_set_peer_fn_t)(const orte_process_name_t* name typedef void (*mca_oob_tcp_module_ping_fn_t)(const orte_process_name_t *proc); typedef void (*mca_oob_tcp_module_send_nb_fn_t)(orte_rml_send_t *msg); typedef void (*mca_oob_tcp_module_resend_nb_fn_t)(struct mca_oob_tcp_msg_error_t *mop); -typedef void (*mca_oob_tcp_module_ft_event_fn_t)(int state); typedef struct { mca_oob_tcp_module_init_fn_t init; @@ -75,7 +74,6 @@ typedef struct { mca_oob_tcp_module_ping_fn_t ping; mca_oob_tcp_module_send_nb_fn_t send_nb; mca_oob_tcp_module_resend_nb_fn_t resend; - mca_oob_tcp_module_ft_event_fn_t ft_event; } mca_oob_tcp_module_api_t; typedef struct { mca_oob_tcp_module_api_t api; diff --git a/orte/mca/oob/tcp/oob_tcp_common.c b/orte/mca/oob/tcp/oob_tcp_common.c index fffca84be59..a4ee262cecf 100644 --- a/orte/mca/oob/tcp/oob_tcp_common.c +++ b/orte/mca/oob/tcp/oob_tcp_common.c @@ -9,7 +9,7 @@ * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. - * Copyright (c) 2006-2013 Los Alamos National Security, LLC. + * Copyright (c) 2006-2013 Los Alamos National Security, LLC. * All rights reserved. * Copyright (c) 2009-2015 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2011 Oak Ridge National Labs. All rights reserved. @@ -25,7 +25,7 @@ * In windows, many of the socket functions return an EWOULDBLOCK * instead of things like EAGAIN, EINPROGRESS, etc. It has been * verified that this will not conflict with other error codes that - * are returned by these functions under UNIX/Linux environments + * are returned by these functions under UNIX/Linux environments */ #include "orte_config.h" @@ -77,7 +77,7 @@ static void set_keepalive(int sd) #if defined(SO_KEEPALIVE) int option; socklen_t optlen; - + /* see if the keepalive option is available */ optlen = sizeof(option); if (getsockopt(sd, SOL_SOCKET, SO_KEEPALIVE, &option, &optlen) < 0) { @@ -89,8 +89,8 @@ static void set_keepalive(int sd) option = 1; if (setsockopt(sd, SOL_SOCKET, SO_KEEPALIVE, &option, optlen) < 0) { opal_output_verbose(5, orte_oob_base_framework.framework_output, - "[%s:%d] setsockopt(SO_KEEPALIVE) failed: %s (%d)", - __FILE__, __LINE__, + "[%s:%d] setsockopt(SO_KEEPALIVE) failed: %s (%d)", + __FILE__, __LINE__, strerror(opal_socket_errno), opal_socket_errno); return; @@ -101,8 +101,8 @@ static void set_keepalive(int sd) &mca_oob_tcp_component.keepalive_time, sizeof(mca_oob_tcp_component.keepalive_time)) < 0) { opal_output_verbose(5, orte_oob_base_framework.framework_output, - "[%s:%d] setsockopt(TCP_KEEPALIVE) failed: %s (%d)", - __FILE__, __LINE__, + "[%s:%d] setsockopt(TCP_KEEPALIVE) failed: %s (%d)", + __FILE__, __LINE__, strerror(opal_socket_errno), opal_socket_errno); return; @@ -113,8 +113,8 @@ static void set_keepalive(int sd) &mca_oob_tcp_component.keepalive_time, sizeof(mca_oob_tcp_component.keepalive_time)) < 0) { opal_output_verbose(5, orte_oob_base_framework.framework_output, - "[%s:%d] setsockopt(TCP_KEEPIDLE) failed: %s (%d)", - __FILE__, __LINE__, + "[%s:%d] setsockopt(TCP_KEEPIDLE) failed: %s (%d)", + __FILE__, __LINE__, strerror(opal_socket_errno), opal_socket_errno); return; @@ -126,8 +126,8 @@ static void set_keepalive(int sd) &mca_oob_tcp_component.keepalive_intvl, sizeof(mca_oob_tcp_component.keepalive_intvl)) < 0) { opal_output_verbose(5, orte_oob_base_framework.framework_output, - "[%s:%d] setsockopt(TCP_KEEPINTVL) failed: %s (%d)", - __FILE__, __LINE__, + "[%s:%d] setsockopt(TCP_KEEPINTVL) failed: %s (%d)", + __FILE__, __LINE__, strerror(opal_socket_errno), opal_socket_errno); return; @@ -139,8 +139,8 @@ static void set_keepalive(int sd) &mca_oob_tcp_component.keepalive_probes, sizeof(mca_oob_tcp_component.keepalive_probes)) < 0) { opal_output_verbose(5, orte_oob_base_framework.framework_output, - "[%s:%d] setsockopt(TCP_KEEPCNT) failed: %s (%d)", - __FILE__, __LINE__, + "[%s:%d] setsockopt(TCP_KEEPCNT) failed: %s (%d)", + __FILE__, __LINE__, strerror(opal_socket_errno), opal_socket_errno); } @@ -156,8 +156,8 @@ void orte_oob_tcp_set_socket_options(int sd) if (setsockopt(sd, IPPROTO_TCP, TCP_NODELAY, (char *)&optval, sizeof(optval)) < 0) { opal_backtrace_print(stderr, NULL, 1); opal_output_verbose(5, orte_oob_base_framework.framework_output, - "[%s:%d] setsockopt(TCP_NODELAY) failed: %s (%d)", - __FILE__, __LINE__, + "[%s:%d] setsockopt(TCP_NODELAY) failed: %s (%d)", + __FILE__, __LINE__, strerror(opal_socket_errno), opal_socket_errno); } @@ -166,8 +166,8 @@ void orte_oob_tcp_set_socket_options(int sd) if (mca_oob_tcp_component.tcp_sndbuf > 0 && setsockopt(sd, SOL_SOCKET, SO_SNDBUF, (char *)&mca_oob_tcp_component.tcp_sndbuf, sizeof(int)) < 0) { opal_output_verbose(5, orte_oob_base_framework.framework_output, - "[%s:%d] setsockopt(SO_SNDBUF) failed: %s (%d)", - __FILE__, __LINE__, + "[%s:%d] setsockopt(SO_SNDBUF) failed: %s (%d)", + __FILE__, __LINE__, strerror(opal_socket_errno), opal_socket_errno); } @@ -176,8 +176,8 @@ void orte_oob_tcp_set_socket_options(int sd) if (mca_oob_tcp_component.tcp_rcvbuf > 0 && setsockopt(sd, SOL_SOCKET, SO_RCVBUF, (char *)&mca_oob_tcp_component.tcp_rcvbuf, sizeof(int)) < 0) { opal_output_verbose(5, orte_oob_base_framework.framework_output, - "[%s:%d] setsockopt(SO_RCVBUF) failed: %s (%d)", - __FILE__, __LINE__, + "[%s:%d] setsockopt(SO_RCVBUF) failed: %s (%d)", + __FILE__, __LINE__, strerror(opal_socket_errno), opal_socket_errno); } diff --git a/orte/mca/oob/tcp/oob_tcp_common.h b/orte/mca/oob/tcp/oob_tcp_common.h index f23ee29c2a5..f999c1bd32a 100644 --- a/orte/mca/oob/tcp/oob_tcp_common.h +++ b/orte/mca/oob/tcp/oob_tcp_common.h @@ -5,18 +5,18 @@ * Copyright (c) 2004-2006 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. - * Copyright (c) 2006-2013 Los Alamos National Security, LLC. + * Copyright (c) 2006-2013 Los Alamos National Security, LLC. * All rights reserved. * Copyright (c) 2010-2011 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2014 Intel, Inc. All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ diff --git a/orte/mca/oob/tcp/oob_tcp_component.c b/orte/mca/oob/tcp/oob_tcp_component.c index a6a8bc1ac6a..85cad6c83d6 100644 --- a/orte/mca/oob/tcp/oob_tcp_component.c +++ b/orte/mca/oob/tcp/oob_tcp_component.c @@ -14,8 +14,9 @@ * reserved. * Copyright (c) 2009-2015 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2011 Oak Ridge National Labs. All rights reserved. - * Copyright (c) 2013-2015 Intel, Inc. All rights reserved. + * Copyright (c) 2013-2014 Intel, Inc. All rights reserved. * Copyright (c) 2014 NVIDIA Corporation. All rights reserved. + * Copyright (c) 2017 IBM Corporation. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -91,9 +92,6 @@ static char* component_get_addr(void); static int component_set_addr(orte_process_name_t *peer, char **uris); static bool component_is_reachable(orte_process_name_t *peer); -#if OPAL_ENABLE_FT_CR == 1 -static int component_ft_event(int state); -#endif /* * Struct of function pointers and all that to let us be initialized @@ -121,9 +119,6 @@ mca_oob_tcp_component_t mca_oob_tcp_component = { .get_addr = component_get_addr, .set_addr = component_set_addr, .is_reachable = component_is_reachable, -#if OPAL_ENABLE_FT_CR == 1 - .ft_event = component_ft_event, -#endif }, }; @@ -162,6 +157,7 @@ static int tcp_component_open(void) "open" failing is not printed */ return ORTE_ERR_NOT_AVAILABLE; } + return ORTE_SUCCESS; } @@ -319,6 +315,7 @@ static int tcp_component_register(void) } #endif // OPAL_ENABLE_IPV6 #endif // OPAL_ENABLE_STATIC_PORTS + dyn_port_string = NULL; (void)mca_base_component_var_register(component, "dynamic_ipv4_ports", "Range of ports to be dynamically used by daemons and procs (IPv4)", @@ -404,8 +401,8 @@ static int tcp_component_register(void) &mca_oob_tcp_component.disable_ipv6_family); #endif // OPAL_ENABLE_IPV6 - // Default to keepalives every 60 seconds - mca_oob_tcp_component.keepalive_time = 60; + // Wait for this amount of time before sending the first keepalive probe + mca_oob_tcp_component.keepalive_time = 300; (void)mca_base_component_var_register(component, "keepalive_time", "Idle time in seconds before starting to send keepalives (keepalive_time <= 0 disables keepalive functionality)", MCA_BASE_VAR_TYPE_INT, NULL, 0, 0, @@ -413,8 +410,8 @@ static int tcp_component_register(void) MCA_BASE_VAR_SCOPE_READONLY, &mca_oob_tcp_component.keepalive_time); - // Default to keepalive retry interval time of 5 seconds - mca_oob_tcp_component.keepalive_intvl = 5; + // Resend keepalive probe every INT seconds + mca_oob_tcp_component.keepalive_intvl = 20; (void)mca_base_component_var_register(component, "keepalive_intvl", "Time between successive keepalive pings when peer has not responded, in seconds (ignored if keepalive_time <= 0)", MCA_BASE_VAR_TYPE_INT, NULL, 0, 0, @@ -422,16 +419,15 @@ static int tcp_component_register(void) MCA_BASE_VAR_SCOPE_READONLY, &mca_oob_tcp_component.keepalive_intvl); - // Default to retrying a keepalive 3 times before declaring the - // peer kaput - mca_oob_tcp_component.keepalive_probes = 3; + // After sending PR probes every INT seconds consider the connection dead + mca_oob_tcp_component.keepalive_probes = 9; (void)mca_base_component_var_register(component, "keepalive_probes", "Number of keepalives that can be missed before declaring error (ignored if keepalive_time <= 0)", MCA_BASE_VAR_TYPE_INT, NULL, 0, 0, OPAL_INFO_LVL_5, MCA_BASE_VAR_SCOPE_READONLY, &mca_oob_tcp_component.keepalive_probes); - + mca_oob_tcp_component.retry_delay = 0; (void)mca_base_component_var_register(component, "retry_delay", "Time (in sec) to wait before trying to connect to peer again", @@ -491,6 +487,7 @@ static int component_available(void) i, opal_ifindextokindex(i)); continue; } + /* ignore non-ip4/6 interfaces */ if (AF_INET != my_ss.ss_family #if OPAL_ENABLE_IPV6 @@ -499,6 +496,7 @@ static int component_available(void) ) { continue; } + kindex = opal_ifindextokindex(i); if (kindex <= 0) { continue; @@ -612,7 +610,7 @@ static int component_available(void) /* Start all modules */ static int component_startup(void) { - int rc; + int rc = ORTE_SUCCESS; opal_output_verbose(2, orte_oob_base_framework.framework_output, "%s TCP STARTUP", @@ -623,16 +621,25 @@ static int component_startup(void) mca_oob_tcp_module.api.init(); } - /* start the listening thread/event */ - if (ORTE_SUCCESS != (rc = orte_oob_tcp_start_listening())) { - ORTE_ERROR_LOG(rc); + /* if we are a daemon/HNP, or we are a standalone app, + * then it is possible that someone else may initiate a + * connection to us. In these cases, we need to start the + * listening thread/event. Otherwise, we will be the one + * initiating communication, and there is no need for + * a listener */ + if (ORTE_PROC_IS_HNP || ORTE_PROC_IS_DAEMON || + orte_standalone_operation) { + if (ORTE_SUCCESS != (rc = orte_oob_tcp_start_listening())) { + ORTE_ERROR_LOG(rc); + } } + return rc; } static void component_shutdown(void) { - int i = 0; + int i=0; opal_list_item_t *item; opal_output_verbose(2, orte_oob_base_framework.framework_output, @@ -659,9 +666,9 @@ static void component_shutdown(void) static int component_send(orte_rml_send_t *msg) { opal_output_verbose(5, orte_oob_base_framework.framework_output, - "%s oob:tcp:send_nb to peer %s:%d to channel=%d seq = %d", + "%s oob:tcp:send_nb to peer %s:%d", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - ORTE_NAME_PRINT(&msg->dst), msg->tag,msg->dst_channel, msg->seq_num ); + ORTE_NAME_PRINT(&msg->dst), msg->tag); /* the module is potentially running on its own event * base, so all it can do is push our send request @@ -742,8 +749,8 @@ static int component_set_addr(orte_process_name_t *peer, tcpuri = strdup(uris[i]); if (NULL == tcpuri) { opal_output_verbose(2, orte_oob_base_framework.framework_output, - "%s oob:tcp: out of memory", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)); + "%s oob:tcp: out of memory", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)); continue; } if (0 == strncmp(uris[i], "tcp:", 4)) { @@ -775,6 +782,7 @@ static int component_set_addr(orte_process_name_t *peer, "%s oob:tcp: working peer %s address %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_NAME_PRINT(peer), uris[i]); + /* separate the ports from the network addrs */ ports = strrchr(tcpuri, ':'); *ports = '\0'; @@ -867,20 +875,6 @@ static bool component_is_reachable(orte_process_name_t *peer) return true; } -#if OPAL_ENABLE_FT_CR == 1 -static int component_ft_event(int state) -{ - opal_output_verbose(2, orte_oob_base_framework.framework_output, - "%s TCP FT EVENT", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)); - - /* pass it into the module */ - if (NULL != mca_oob_tcp_module.api.ft_event) { - mca_oob_tcp_module.api.ft_event(state); - } - - return ORTE_SUCCESS; -} -#endif // OPAL_ENABLE_FT_CR void mca_oob_tcp_component_set_module(int fd, short args, void *cbdata) { @@ -1053,8 +1047,6 @@ void mca_oob_tcp_component_hop_unknown(int fd, short args, void *cbdata) snd->dst = mop->snd->hdr.dst; snd->origin = mop->snd->hdr.origin; snd->tag = mop->snd->hdr.tag; - snd->dst_channel = mop->snd->hdr.channel; - snd->seq_num = mop->snd->hdr.seq_num; snd->data = mop->snd->data; snd->count = mop->snd->hdr.nbytes; snd->cbfunc.iov = NULL; @@ -1163,7 +1155,7 @@ static char **split_and_resolve(char **orig_str, char *name) /* Go through all interfaces and see if we can find a match */ for (if_index = opal_ifbegin(); if_index >= 0; - if_index = opal_ifnext(if_index)) { + if_index = opal_ifnext(if_index)) { opal_ifindextoaddr(if_index, (struct sockaddr*) &if_inaddr, sizeof(if_inaddr)); @@ -1173,6 +1165,7 @@ static char **split_and_resolve(char **orig_str, char *name) break; } } + /* If we didn't find a match, keep trying */ if (if_index < 0) { orte_show_help("help-oob-tcp.txt", "invalid if_inexclude", diff --git a/orte/mca/oob/tcp/oob_tcp_component.h b/orte/mca/oob/tcp/oob_tcp_component.h index 120dac3d617..fb35b86478f 100644 --- a/orte/mca/oob/tcp/oob_tcp_component.h +++ b/orte/mca/oob/tcp/oob_tcp_component.h @@ -5,18 +5,18 @@ * Copyright (c) 2004-2006 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. - * Copyright (c) 2006-2013 Los Alamos National Security, LLC. + * Copyright (c) 2006-2013 Los Alamos National Security, LLC. * All rights reserved. * Copyright (c) 2010-2011 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2014-2015 Intel, Inc. All rights reserved * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ diff --git a/orte/mca/oob/tcp/oob_tcp_connection.c b/orte/mca/oob/tcp/oob_tcp_connection.c index fd0b53bc1d5..3ce503feb7f 100644 --- a/orte/mca/oob/tcp/oob_tcp_connection.c +++ b/orte/mca/oob/tcp/oob_tcp_connection.c @@ -5,21 +5,22 @@ * Copyright (c) 2004-2011 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. - * Copyright (c) 2006-2013 Los Alamos National Security, LLC. + * Copyright (c) 2006-2013 Los Alamos National Security, LLC. * All rights reserved. * Copyright (c) 2009-2014 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2011 Oak Ridge National Labs. All rights reserved. * Copyright (c) 2013-2015 Intel, Inc. All rights reserved. * Copyright (c) 2014-2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. + * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -53,7 +54,6 @@ #include "opal_stdint.h" #include "opal/mca/backtrace/backtrace.h" #include "opal/mca/base/mca_base_var.h" -#include "opal/mca/dstore/dstore.h" #include "opal/mca/sec/sec.h" #include "opal/util/output.h" #include "opal/util/net.h" @@ -79,6 +79,7 @@ static void tcp_peer_event_init(mca_oob_tcp_peer_t* peer); static int tcp_peer_send_connect_ack(mca_oob_tcp_peer_t* peer); +static int tcp_peer_send_connect_nack(int sd, orte_process_name_t name); static int tcp_peer_send_blocking(int sd, void* data, size_t size); static bool tcp_peer_recv_blocking(mca_oob_tcp_peer_t* peer, int sd, void* data, size_t size); @@ -96,7 +97,7 @@ static int tcp_peer_create_socket(mca_oob_tcp_peer_t* peer) "%s oob:tcp:peer creating socket to %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_NAME_PRINT(&(peer->name)))); - + peer->sd = socket(AF_INET, SOCK_STREAM, 0); if (peer->sd < 0) { opal_output(0, "%s-%s tcp_peer_create_socket: socket() failed: %s (%d)\n", @@ -125,7 +126,7 @@ static int tcp_peer_create_socket(mca_oob_tcp_peer_t* peer) /* setup the socket as non-blocking */ if (peer->sd >= 0) { if((flags = fcntl(peer->sd, F_GETFL, 0)) < 0) { - opal_output(0, "%s-%s tcp_peer_connect: fcntl(F_GETFL) failed: %s (%d)\n", + opal_output(0, "%s-%s tcp_peer_connect: fcntl(F_GETFL) failed: %s (%d)\n", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_NAME_PRINT(&(peer->name)), strerror(opal_socket_errno), @@ -133,7 +134,7 @@ static int tcp_peer_create_socket(mca_oob_tcp_peer_t* peer) } else { flags |= O_NONBLOCK; if(fcntl(peer->sd, F_SETFL, flags) < 0) - opal_output(0, "%s-%s tcp_peer_connect: fcntl(F_SETFL) failed: %s (%d)\n", + opal_output(0, "%s-%s tcp_peer_connect: fcntl(F_SETFL) failed: %s (%d)\n", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_NAME_PRINT(&(peer->name)), strerror(opal_socket_errno), @@ -147,7 +148,7 @@ static int tcp_peer_create_socket(mca_oob_tcp_peer_t* peer) /* * Try connecting to a peer - cycle across all known addresses - * until one succeeds. + * until one succeeds. */ void mca_oob_tcp_peer_try_connect(int fd, short args, void *cbdata) { @@ -326,7 +327,7 @@ void mca_oob_tcp_peer_try_connect(int fd, short args, void *cbdata) "Connection to proc %s succeeded", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_NAME_PRINT(&peer->name)); - + /* setup our recv to catch the return ack call */ if (!peer->recv_ev_active) { opal_event_add(&peer->recv_event, 0); @@ -347,10 +348,10 @@ void mca_oob_tcp_peer_try_connect(int fd, short args, void *cbdata) ORTE_ACTIVATE_TCP_CONN_STATE(peer, mca_oob_tcp_peer_try_connect); } else { peer->state = MCA_OOB_TCP_UNCONNECTED; - } + } return; } else { - opal_output(0, + opal_output(0, "%s orte_tcp_peer_try_connect: " "tcp_peer_send_connect_ack to proc %s on %s:%d failed: %s (%d)", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), @@ -374,11 +375,12 @@ static int tcp_peer_send_connect_ack(mca_oob_tcp_peer_t* peer) { char *msg; mca_oob_tcp_hdr_t hdr; + uint16_t ack_flag = htons(1); int rc; - size_t sdsize; + size_t sdsize, offset = 0; char *cred; size_t credsize; - + opal_output_verbose(OOB_TCP_DEBUG_CONNECT, orte_oob_base_framework.framework_output, "%s SEND CONNECT ACK", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)); @@ -390,7 +392,6 @@ static int tcp_peer_send_connect_ack(mca_oob_tcp_peer_t* peer) /* get our security credential*/ if (OPAL_SUCCESS != (rc = opal_sec.get_my_credential(peer->auth_method, - opal_dstore_internal, ORTE_PROC_MY_NAME, &cred, &credsize))) { ORTE_ERROR_LOG(rc); @@ -400,27 +401,32 @@ static int tcp_peer_send_connect_ack(mca_oob_tcp_peer_t* peer) "%s SENDING CREDENTIAL OF SIZE %lu", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), (unsigned long)credsize); - - /* set the number of bytes to be read beyond the header */ - hdr.nbytes = strlen(orte_version_string) + 1 + credsize; + + /* payload size */ + sdsize = sizeof(ack_flag) + strlen(orte_version_string) + 1 + credsize; + hdr.nbytes = sdsize; MCA_OOB_TCP_HDR_HTON(&hdr); /* create a space for our message */ - sdsize = sizeof(hdr) + strlen(orte_version_string) + 1 + credsize; + sdsize += sizeof(hdr); if (NULL == (msg = (char*)malloc(sdsize))) { return ORTE_ERR_OUT_OF_RESOURCE; } memset(msg, 0, sdsize); /* load the message */ - memcpy(msg, &hdr, sizeof(hdr)); - memcpy(msg+sizeof(hdr), orte_version_string, strlen(orte_version_string)); - memcpy(msg+sizeof(hdr)+strlen(orte_version_string)+1, cred, credsize); + memcpy(msg + offset, &hdr, sizeof(hdr)); + offset += sizeof(hdr); + memcpy(msg + offset, &ack_flag, sizeof(ack_flag)); + offset += sizeof(ack_flag); + memcpy(msg + offset, orte_version_string, strlen(orte_version_string)); + offset += strlen(orte_version_string)+1; + memcpy(msg + offset, cred, credsize); /* clear the memory */ if (NULL != cred) { free(cred); } - + /* send it */ if (ORTE_SUCCESS != tcp_peer_send_blocking(peer->sd, msg, sdsize)) { free(msg); @@ -433,6 +439,53 @@ static int tcp_peer_send_connect_ack(mca_oob_tcp_peer_t* peer) return ORTE_SUCCESS; } +/* Respond with refuse to the connection request */ +static int tcp_peer_send_connect_nack(int sd, orte_process_name_t name) +{ + char *msg; + mca_oob_tcp_hdr_t hdr; + uint16_t ack_flag = htons(0); + int rc = ORTE_SUCCESS; + size_t sdsize, offset = 0; + + opal_output_verbose(OOB_TCP_DEBUG_CONNECT, orte_oob_base_framework.framework_output, + "%s SEND CONNECT NACK", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)); + + /* load the header */ + hdr.origin = *ORTE_PROC_MY_NAME; + hdr.dst = name; + hdr.type = MCA_OOB_TCP_IDENT; + hdr.tag = 0; + + /* payload size */ + sdsize = sizeof(ack_flag); + hdr.nbytes = sdsize; + MCA_OOB_TCP_HDR_HTON(&hdr); + + /* create a space for our message */ + sdsize += sizeof(hdr); + if (NULL == (msg = (char*)malloc(sdsize))) { + return ORTE_ERR_OUT_OF_RESOURCE; + } + memset(msg, 0, sdsize); + + /* load the message */ + memcpy(msg + offset, &hdr, sizeof(hdr)); + offset += sizeof(hdr); + memcpy(msg + offset, &ack_flag, sizeof(ack_flag)); + offset += sizeof(ack_flag); + + /* send it */ + if (ORTE_SUCCESS != tcp_peer_send_blocking(sd, msg, sdsize)) { + /* it's ok if it fails - remote side may already + * identifiet the collision and closed the connection + */ + rc = ORTE_SUCCESS; + } + free(msg); + return rc; +} + /* * Initialize events to be used by the peer instance for TCP select/poll callbacks. */ @@ -451,7 +504,7 @@ static void tcp_peer_event_init(mca_oob_tcp_peer_t* peer) opal_event_del(&peer->recv_event); peer->recv_ev_active = false; } - + opal_event_set(mca_oob_tcp_module.ev_base, &peer->send_event, peer->sd, @@ -483,7 +536,7 @@ void mca_oob_tcp_peer_complete_connect(mca_oob_tcp_peer_t *peer) /* check connect completion status */ if (getsockopt(peer->sd, SOL_SOCKET, SO_ERROR, (char *)&so_error, &so_length) < 0) { - opal_output(0, "%s tcp_peer_complete_connect: getsockopt() to %s failed: %s (%d)\n", + opal_output(0, "%s tcp_peer_complete_connect: getsockopt() to %s failed: %s (%d)\n", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_NAME_PRINT(&(peer->name)), strerror(opal_socket_errno), @@ -533,7 +586,7 @@ void mca_oob_tcp_peer_complete_connect(mca_oob_tcp_peer_t *peer) "setting read event on connection to %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_NAME_PRINT(&(peer->name))); - + if (!peer->recv_ev_active) { opal_event_add(&peer->recv_event, 0); peer->recv_ev_active = true; @@ -636,6 +689,7 @@ static bool retry(mca_oob_tcp_peer_t* peer, int sd, bool fatal) return false; } else { /* The connection will be retried */ + tcp_peer_send_connect_nack(sd, peer->name); CLOSE_THE_SOCKET(sd); return true; } @@ -649,10 +703,12 @@ int mca_oob_tcp_peer_recv_connect_ack(mca_oob_tcp_peer_t* pr, char *version; int rc; char *cred; - size_t credsize; + size_t credsize, offset = 0; mca_oob_tcp_hdr_t hdr; mca_oob_tcp_peer_t *peer; uint64_t *ui64; + uint16_t ack_flag; + bool is_new = (NULL == pr); opal_output_verbose(OOB_TCP_DEBUG_CONNECT, orte_oob_base_framework.framework_output, "%s RECV CONNECT ACK FROM %s ON SOCKET %d", @@ -681,19 +737,6 @@ int mca_oob_tcp_peer_recv_connect_ack(mca_oob_tcp_peer_t* pr, "%s unable to complete recv of connect-ack from %s ON SOCKET %d", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), (NULL == peer) ? "UNKNOWN" : ORTE_NAME_PRINT(&peer->name), sd); - /* check for a race condition - if I was in the process of - * creating a connection to the peer, or have already established - * such a connection, then we need to reject this connection. We will - * let the higher ranked process retry - if I'm the lower ranked - * process, I'll simply defer until I receive the request - */ - if (NULL != peer && - (MCA_OOB_TCP_CONNECTED == peer->state || - MCA_OOB_TCP_CONNECTING == peer->state || - MCA_OOB_TCP_CONNECT_ACK == peer->state || - MCA_OOB_TCP_CLOSED == peer->state)) { - retry(peer, sd, false); - } return ORTE_ERR_UNREACH; } @@ -721,7 +764,7 @@ int mca_oob_tcp_peer_recv_connect_ack(mca_oob_tcp_peer_t* pr, } if (hdr.type != MCA_OOB_TCP_IDENT) { - opal_output(0, "tcp_peer_recv_connect_ack: invalid header type: %d\n", + opal_output(0, "tcp_peer_recv_connect_ack: invalid header type: %d\n", hdr.type); if (NULL != peer) { peer->state = MCA_OOB_TCP_FAILED; @@ -748,23 +791,8 @@ int mca_oob_tcp_peer_recv_connect_ack(mca_oob_tcp_peer_t* pr, CLOSE_THE_SOCKET(sd); return ORTE_ERR_OUT_OF_RESOURCE; } - } else { - /* check for a race condition - if I was in the process of - * creating a connection to the peer, or have already established - * such a connection, then we need to reject this connection. We will - * let the higher ranked process retry - if I'm the lower ranked - * process, I'll simply defer until I receive the request - */ - if (MCA_OOB_TCP_CONNECTED == peer->state || - MCA_OOB_TCP_CONNECTING == peer->state || - MCA_OOB_TCP_CONNECT_ACK == peer->state) { - if (retry(peer, sd, false)) { - return ORTE_ERR_UNREACH; - } - } } } else { - /* compare the peers name to the expected value */ if (OPAL_EQUAL != orte_util_compare_name_fields(ORTE_NS_CMP_ALL, &peer->name, &hdr.origin)) { opal_output(0, "%s tcp_peer_recv_connect_ack: " @@ -795,23 +823,68 @@ int mca_oob_tcp_peer_recv_connect_ack(mca_oob_tcp_peer_t* pr, "%s unable to complete recv of connect-ack from %s ON SOCKET %d", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_NAME_PRINT(&peer->name), peer->sd); - /* check for a race condition - if I was in the process of - * creating a connection to the peer, or have already established - * such a connection, then we need to reject this connection. We will - * let the higher ranked process retry - if I'm the lower ranked - * process, I'll simply defer until I receive the request - */ - if (MCA_OOB_TCP_CONNECTED == peer->state || - MCA_OOB_TCP_CONNECTING == peer->state || - MCA_OOB_TCP_CONNECT_ACK == peer->state) { - retry(peer, sd, true); + free(msg); + return ORTE_ERR_UNREACH; + } + + /* Check the type of acknowledgement */ + memcpy(&ack_flag, msg + offset, sizeof(ack_flag)); + offset += sizeof(ack_flag); + + ack_flag = ntohs(ack_flag); + if( !ack_flag ){ + if (MCA_OOB_TCP_CONNECT_ACK == peer->state) { + /* We got nack from the remote side which means that + * it will be the initiator of the connection. + */ + + /* release the socket */ + CLOSE_THE_SOCKET(peer->sd); + peer->sd = -1; + + /* unregister active events */ + if (peer->recv_ev_active) { + opal_event_del(&peer->recv_event); + peer->recv_ev_active = false; + } + if (peer->send_ev_active) { + opal_event_del(&peer->send_event); + peer->send_ev_active = false; + } + + /* change the state so we'll accept the remote + * connection when it'll appear + */ + peer->state = MCA_OOB_TCP_UNCONNECTED; + } else { + /* FIXME: this shouldn't happen. We need to force next address + * to be tried. + */ + mca_oob_tcp_peer_close(peer); } free(msg); return ORTE_ERR_UNREACH; } + /* check for a race condition - if I was in the process of + * creating a connection to the peer, or have already established + * such a connection, then we need to reject this connection. We will + * let the higher ranked process retry - if I'm the lower ranked + * process, I'll simply defer until I receive the request + */ + if (is_new && + ( MCA_OOB_TCP_CONNECTED == peer->state || + MCA_OOB_TCP_CONNECTING == peer->state || + MCA_OOB_TCP_CONNECT_ACK == peer->state ) ) { + if (retry(peer, sd, false)) { + free(msg); + return ORTE_ERR_UNREACH; + } + } + /* check that this is from a matching version */ - version = (char*)(msg); + version = (char*)((char*)msg + offset); + offset += strlen(version) + 1; if (0 != strcmp(version, orte_version_string)) { opal_output(0, "%s tcp_peer_recv_connect_ack: " "received different version from %s: %s instead of %s\n", @@ -830,8 +903,8 @@ int mca_oob_tcp_peer_recv_connect_ack(mca_oob_tcp_peer_t* pr, ORTE_NAME_PRINT(&peer->name)); /* check security token */ - cred = (char*)(msg + strlen(version) + 1); - credsize = hdr.nbytes - strlen(version) - 1; + cred = (char*)((char*)msg + offset); + credsize = hdr.nbytes - offset; if (OPAL_SUCCESS != (rc = opal_sec.authenticate(cred, credsize, &peer->auth_method))) { char *hostname; hostname = orte_get_proc_hostname(&peer->name); @@ -911,8 +984,6 @@ static void tcp_peer_connected(mca_oob_tcp_peer_t* peer) */ void mca_oob_tcp_peer_close(mca_oob_tcp_peer_t *peer) { - mca_oob_tcp_send_t *snd; - opal_output_verbose(OOB_TCP_DEBUG_CONNECT, orte_oob_base_framework.framework_output, "%s tcp_peer_close for %s sd %d state %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), @@ -964,10 +1035,12 @@ void mca_oob_tcp_peer_close(mca_oob_tcp_peer_t *peer) * handle these recycled messages. This prevents us from unintentionally * attempting to send the message again across the now-failed interface */ + /* if (NULL != peer->send_msg) { } while (NULL != (snd = (mca_oob_tcp_send_t*)opal_list_remove_first(&peer->send_queue))) { } + */ } /* @@ -1006,10 +1079,14 @@ static bool tcp_peer_recv_blocking(mca_oob_tcp_peer_t* peer, int sd, /* socket is non-blocking so handle errors */ if (retval < 0) { - if (opal_socket_errno != EINTR && - opal_socket_errno != EAGAIN && + if (opal_socket_errno != EINTR && + opal_socket_errno != EAGAIN && opal_socket_errno != EWOULDBLOCK) { - if (peer->state == MCA_OOB_TCP_CONNECT_ACK) { + if (NULL == peer) { + /* protect against things like port scanners */ + CLOSE_THE_SOCKET(sd); + return false; + } else if (peer->state == MCA_OOB_TCP_CONNECT_ACK) { /* If we overflow the listen backlog, it's possible that even though we finished the three way handshake, the remote host was unable to @@ -1028,22 +1105,18 @@ static bool tcp_peer_recv_blocking(mca_oob_tcp_peer_t* peer, int sd, "%s connect ack received error %s from %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), strerror(opal_socket_errno), - (NULL == peer) ? "UNKNOWN" : ORTE_NAME_PRINT(&(peer->name))); + ORTE_NAME_PRINT(&(peer->name))); return false; } else { - opal_output(0, + opal_output(0, "%s tcp_peer_recv_blocking: " "recv() failed for %s: %s (%d)\n", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - (NULL == peer) ? "UNKNOWN" : ORTE_NAME_PRINT(&(peer->name)), + ORTE_NAME_PRINT(&(peer->name)), strerror(opal_socket_errno), opal_socket_errno); - if (NULL != peer) { - peer->state = MCA_OOB_TCP_FAILED; - mca_oob_tcp_peer_close(peer); - } else { - CLOSE_THE_SOCKET(sd); - } + peer->state = MCA_OOB_TCP_FAILED; + mca_oob_tcp_peer_close(peer); return false; } } @@ -1071,16 +1144,16 @@ void mca_oob_tcp_peer_dump(mca_oob_tcp_peer_t* peer, const char* msg) struct sockaddr_storage inaddr; opal_socklen_t addrlen = sizeof(struct sockaddr_storage); opal_socklen_t optlen; - + if (getsockname(peer->sd, (struct sockaddr*)&inaddr, &addrlen) < 0) { - opal_output(0, "tcp_peer_dump: getsockname: %s (%d)\n", + opal_output(0, "tcp_peer_dump: getsockname: %s (%d)\n", strerror(opal_socket_errno), opal_socket_errno); } else { snprintf(src, sizeof(src), "%s", opal_net_get_hostname((struct sockaddr*) &inaddr)); } if (getpeername(peer->sd, (struct sockaddr*)&inaddr, &addrlen) < 0) { - opal_output(0, "tcp_peer_dump: getpeername: %s (%d)\n", + opal_output(0, "tcp_peer_dump: getpeername: %s (%d)\n", strerror(opal_socket_errno), opal_socket_errno); } else { @@ -1092,11 +1165,11 @@ void mca_oob_tcp_peer_dump(mca_oob_tcp_peer_t* peer, const char* msg) strerror(opal_socket_errno), opal_socket_errno); } - + #if defined(SO_SNDBUF) optlen = sizeof(sndbuf); if(getsockopt(peer->sd, SOL_SOCKET, SO_SNDBUF, (char *)&sndbuf, &optlen) < 0) { - opal_output(0, "tcp_peer_dump: SO_SNDBUF option: %s (%d)\n", + opal_output(0, "tcp_peer_dump: SO_SNDBUF option: %s (%d)\n", strerror(opal_socket_errno), opal_socket_errno); } @@ -1106,7 +1179,7 @@ void mca_oob_tcp_peer_dump(mca_oob_tcp_peer_t* peer, const char* msg) #if defined(SO_RCVBUF) optlen = sizeof(rcvbuf); if (getsockopt(peer->sd, SOL_SOCKET, SO_RCVBUF, (char *)&rcvbuf, &optlen) < 0) { - opal_output(0, "tcp_peer_dump: SO_RCVBUF option: %s (%d)\n", + opal_output(0, "tcp_peer_dump: SO_RCVBUF option: %s (%d)\n", strerror(opal_socket_errno), opal_socket_errno); } @@ -1116,7 +1189,7 @@ void mca_oob_tcp_peer_dump(mca_oob_tcp_peer_t* peer, const char* msg) #if defined(TCP_NODELAY) optlen = sizeof(nodelay); if (getsockopt(peer->sd, IPPROTO_TCP, TCP_NODELAY, (char *)&nodelay, &optlen) < 0) { - opal_output(0, "tcp_peer_dump: TCP_NODELAY option: %s (%d)\n", + opal_output(0, "tcp_peer_dump: TCP_NODELAY option: %s (%d)\n", strerror(opal_socket_errno), opal_socket_errno); } diff --git a/orte/mca/oob/tcp/oob_tcp_connection.h b/orte/mca/oob/tcp/oob_tcp_connection.h index a7a097d3e20..4aaa4470abf 100644 --- a/orte/mca/oob/tcp/oob_tcp_connection.h +++ b/orte/mca/oob/tcp/oob_tcp_connection.h @@ -5,18 +5,18 @@ * Copyright (c) 2004-2006 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. - * Copyright (c) 2006-2013 Los Alamos National Security, LLC. + * Copyright (c) 2006-2013 Los Alamos National Security, LLC. * All rights reserved. * Copyright (c) 2010-2011 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2014 Intel, Inc. All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ diff --git a/orte/mca/oob/tcp/oob_tcp_hdr.h b/orte/mca/oob/tcp/oob_tcp_hdr.h index 057ec2cb686..54d79c5d9ea 100644 --- a/orte/mca/oob/tcp/oob_tcp_hdr.h +++ b/orte/mca/oob/tcp/oob_tcp_hdr.h @@ -12,8 +12,6 @@ * Copyright (c) 2006-2013 Los Alamos National Security, LLC. * All rights reserved. * Copyright (c) 2010-2011 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2014 -2015 Intel, Inc. All rights reserved. - * * $COPYRIGHT$ * * Additional copyrights may follow @@ -56,10 +54,6 @@ typedef struct { mca_oob_tcp_msg_type_t type; /* the rml tag where this message is headed */ orte_rml_tag_t tag; - /* the rml channel where this message is headed */ - orte_rml_channel_num_t channel; - /* the seq number of this message */ - uint32_t seq_num; /* number of bytes in message */ uint32_t nbytes; } mca_oob_tcp_hdr_t; diff --git a/orte/mca/oob/tcp/oob_tcp_listener.c b/orte/mca/oob/tcp/oob_tcp_listener.c index 79760618528..a926dd3cdd2 100644 --- a/orte/mca/oob/tcp/oob_tcp_listener.c +++ b/orte/mca/oob/tcp/oob_tcp_listener.c @@ -9,7 +9,7 @@ * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. - * Copyright (c) 2006-2013 Los Alamos National Security, LLC. + * Copyright (c) 2006-2013 Los Alamos National Security, LLC. * All rights reserved. * Copyright (c) 2009-2015 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2011 Oak Ridge National Labs. All rights reserved. @@ -25,7 +25,7 @@ * In windows, many of the socket functions return an EWOULDBLOCK * instead of things like EAGAIN, EINPROGRESS, etc. It has been * verified that this will not conflict with other error codes that - * are returned by these functions under UNIX/Linux environments + * are returned by these functions under UNIX/Linux environments */ #include "orte_config.h" @@ -169,8 +169,8 @@ int orte_oob_tcp_start_listening(void) } /* - * Create an IPv4 listen socket and bind to all interfaces. - * + * Create an IPv4 listen socket and bind to all interfaces. + * * At one time, this also registered a callback with the event library * for when connections were received on the listen socket. This is * no longer the case -- the caller must register any events required. @@ -273,7 +273,7 @@ static int create_listen(void) if (NULL == ports) { return ORTE_ERROR; } - + /* get the address info for this interface */ ((struct sockaddr_in*) &inaddr)->sin_family = AF_INET; ((struct sockaddr_in*) &inaddr)->sin_addr.s_addr = INADDR_ANY; @@ -295,12 +295,12 @@ static int create_listen(void) port = htons(port); ((struct sockaddr_in*) &inaddr)->sin_port = port; - + /* create a listen socket for incoming connections on this port */ sd = socket(AF_INET, SOCK_STREAM, 0); if (sd < 0) { if (EAFNOSUPPORT != opal_socket_errno) { - opal_output(0,"mca_oob_tcp_component_init: socket() failed: %s (%d)", + opal_output(0,"mca_oob_tcp_component_init: socket() failed: %s (%d)", strerror(opal_socket_errno), opal_socket_errno); } opal_argv_free(ports); @@ -321,7 +321,7 @@ static int create_listen(void) opal_argv_free(ports); return ORTE_ERROR; } - + /* Set the socket to close-on-exec so that no children inherit this FD */ if (opal_fd_set_cloexec(sd) != OPAL_SUCCESS) { @@ -348,25 +348,25 @@ static int create_listen(void) } /* resolve assigned port */ if (getsockname(sd, (struct sockaddr*)&inaddr, &addrlen) < 0) { - opal_output(0, "mca_oob_tcp_create_listen: getsockname(): %s (%d)", + opal_output(0, "mca_oob_tcp_create_listen: getsockname(): %s (%d)", strerror(opal_socket_errno), opal_socket_errno); CLOSE_THE_SOCKET(sd); opal_argv_free(ports); return ORTE_ERROR; } - + /* setup listen backlog to maximum allowed by kernel */ if (listen(sd, SOMAXCONN) < 0) { - opal_output(0, "mca_oob_tcp_component_init: listen(): %s (%d)", + opal_output(0, "mca_oob_tcp_component_init: listen(): %s (%d)", strerror(opal_socket_errno), opal_socket_errno); CLOSE_THE_SOCKET(sd); opal_argv_free(ports); return ORTE_ERROR; } - + /* set socket up to be non-blocking, otherwise accept could block */ if ((flags = fcntl(sd, F_GETFL, 0)) < 0) { - opal_output(0, "mca_oob_tcp_component_init: fcntl(F_GETFL) failed: %s (%d)", + opal_output(0, "mca_oob_tcp_component_init: fcntl(F_GETFL) failed: %s (%d)", strerror(opal_socket_errno), opal_socket_errno); CLOSE_THE_SOCKET(sd); opal_argv_free(ports); @@ -374,7 +374,7 @@ static int create_listen(void) } flags |= O_NONBLOCK; if (fcntl(sd, F_SETFL, flags) < 0) { - opal_output(0, "mca_oob_tcp_component_init: fcntl(F_SETFL) failed: %s (%d)", + opal_output(0, "mca_oob_tcp_component_init: fcntl(F_SETFL) failed: %s (%d)", strerror(opal_socket_errno), opal_socket_errno); CLOSE_THE_SOCKET(sd); opal_argv_free(ports); @@ -402,7 +402,7 @@ static int create_listen(void) } /* done with this, so release it */ opal_argv_free(ports); - + if (0 == opal_list_get_size(&mca_oob_tcp_component.listeners)) { /* cleanup */ if (0 <= sd) { @@ -416,8 +416,8 @@ static int create_listen(void) #if OPAL_ENABLE_IPV6 /* - * Create an IPv6 listen socket and bind to all interfaces. - * + * Create an IPv6 listen socket and bind to all interfaces. + * * At one time, this also registered a callback with the event library * for when connections were received on the listen socket. This is * no longer the case -- the caller must register any events required. @@ -520,7 +520,7 @@ static int create_listen6(void) if (NULL == ports) { return ORTE_ERROR; } - + /* get the address info for this interface */ ((struct sockaddr_in6*) &inaddr)->sin6_family = AF_INET6; ((struct sockaddr_in6*) &inaddr)->sin6_addr = in6addr_any; @@ -542,12 +542,12 @@ static int create_listen6(void) port = htons(port); ((struct sockaddr_in6*) &inaddr)->sin6_port = port; - + /* create a listen socket for incoming connections on this port */ sd = socket(AF_INET6, SOCK_STREAM, 0); if (sd < 0) { if (EAFNOSUPPORT != opal_socket_errno) { - opal_output(0,"mca_oob_tcp_component_init: socket() failed: %s (%d)", + opal_output(0,"mca_oob_tcp_component_init: socket() failed: %s (%d)", strerror(opal_socket_errno), opal_socket_errno); } return ORTE_ERR_IN_ERRNO; @@ -577,7 +577,7 @@ static int create_listen6(void) opal_argv_free(ports); return ORTE_ERROR; } - + if (bind(sd, (struct sockaddr*)&inaddr, addrlen) < 0) { if( (EADDRINUSE == opal_socket_errno) || (EADDRNOTAVAIL == opal_socket_errno) ) { continue; @@ -593,28 +593,28 @@ static int create_listen6(void) } /* resolve assigned port */ if (getsockname(sd, (struct sockaddr*)&inaddr, &addrlen) < 0) { - opal_output(0, "mca_oob_tcp_create_listen: getsockname(): %s (%d)", + opal_output(0, "mca_oob_tcp_create_listen: getsockname(): %s (%d)", strerror(opal_socket_errno), opal_socket_errno); CLOSE_THE_SOCKET(sd); return ORTE_ERROR; } - + /* setup listen backlog to maximum allowed by kernel */ if (listen(sd, SOMAXCONN) < 0) { - opal_output(0, "mca_oob_tcp_component_init: listen(): %s (%d)", + opal_output(0, "mca_oob_tcp_component_init: listen(): %s (%d)", strerror(opal_socket_errno), opal_socket_errno); return ORTE_ERROR; } - + /* set socket up to be non-blocking, otherwise accept could block */ if ((flags = fcntl(sd, F_GETFL, 0)) < 0) { - opal_output(0, "mca_oob_tcp_component_init: fcntl(F_GETFL) failed: %s (%d)", + opal_output(0, "mca_oob_tcp_component_init: fcntl(F_GETFL) failed: %s (%d)", strerror(opal_socket_errno), opal_socket_errno); return ORTE_ERROR; } flags |= O_NONBLOCK; if (fcntl(sd, F_SETFL, flags) < 0) { - opal_output(0, "mca_oob_tcp_component_init: fcntl(F_SETFL) failed: %s (%d)", + opal_output(0, "mca_oob_tcp_component_init: fcntl(F_SETFL) failed: %s (%d)", strerror(opal_socket_errno), opal_socket_errno); return ORTE_ERROR; } @@ -647,7 +647,7 @@ static int create_listen6(void) /* done with this, so release it */ opal_argv_free(ports); - + return ORTE_SUCCESS; } #endif diff --git a/orte/mca/oob/tcp/oob_tcp_listener.h b/orte/mca/oob/tcp/oob_tcp_listener.h index 182e083d591..fe039e57214 100644 --- a/orte/mca/oob/tcp/oob_tcp_listener.h +++ b/orte/mca/oob/tcp/oob_tcp_listener.h @@ -5,17 +5,17 @@ * Copyright (c) 2004-2006 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. - * Copyright (c) 2006-2013 Los Alamos National Security, LLC. + * Copyright (c) 2006-2013 Los Alamos National Security, LLC. * All rights reserved. * Copyright (c) 2010-2011 Cisco Systems, Inc. All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ diff --git a/orte/mca/oob/tcp/oob_tcp_peer.h b/orte/mca/oob/tcp/oob_tcp_peer.h index c4c13977550..6201903c937 100644 --- a/orte/mca/oob/tcp/oob_tcp_peer.h +++ b/orte/mca/oob/tcp/oob_tcp_peer.h @@ -5,18 +5,18 @@ * Copyright (c) 2004-2006 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. - * Copyright (c) 2006-2013 Los Alamos National Security, LLC. + * Copyright (c) 2006-2013 Los Alamos National Security, LLC. * All rights reserved. * Copyright (c) 2010-2011 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2015 Intel, Inc. All rights reserved * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -90,7 +90,7 @@ OBJ_CLASS_DECLARATION(mca_oob_tcp_peer_op_t); opal_event_set_priority(&pop->ev, ORTE_MSG_PRI); \ opal_event_active(&pop->ev, OPAL_EV_WRITE, 1); \ } while(0); - + #define ORTE_ACTIVATE_TCP_CMP_OP(p, cbfunc) \ do { \ mca_oob_tcp_peer_op_t *pop; \ diff --git a/orte/mca/oob/tcp/oob_tcp_ping.h b/orte/mca/oob/tcp/oob_tcp_ping.h index 9ea7433c59b..350f8ac022e 100644 --- a/orte/mca/oob/tcp/oob_tcp_ping.h +++ b/orte/mca/oob/tcp/oob_tcp_ping.h @@ -5,18 +5,18 @@ * Copyright (c) 2004-2006 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. - * Copyright (c) 2006-2013 Los Alamos National Security, LLC. + * Copyright (c) 2006-2013 Los Alamos National Security, LLC. * All rights reserved. * Copyright (c) 2010-2011 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2014 Intel, Inc. All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ diff --git a/orte/mca/oob/tcp/oob_tcp_sendrecv.c b/orte/mca/oob/tcp/oob_tcp_sendrecv.c index 35e72a702eb..e5ae0ebad06 100644 --- a/orte/mca/oob/tcp/oob_tcp_sendrecv.c +++ b/orte/mca/oob/tcp/oob_tcp_sendrecv.c @@ -196,12 +196,7 @@ void mca_oob_tcp_send_handler(int sd, short flags, void *cbdata) ORTE_NAME_PRINT(&(peer->name))); opal_event_del(&peer->send_event); msg->msg->status = rc; - if( NULL == msg->msg->channel) { - ORTE_RML_SEND_COMPLETE(msg->msg); - } - else { - ORTE_QOS_SEND_COMPLETE(msg->msg); - } + ORTE_RML_SEND_COMPLETE(msg->msg); OBJ_RELEASE(msg); peer->send_msg = NULL; goto next; @@ -228,12 +223,7 @@ void mca_oob_tcp_send_handler(int sd, short flags, void *cbdata) ORTE_NAME_PRINT(&(peer->name)), (int)ntohl(msg->hdr.nbytes), peer->sd); msg->msg->status = ORTE_SUCCESS; - if( NULL == msg->msg->channel) { - ORTE_RML_SEND_COMPLETE(msg->msg); - } - else { - ORTE_QOS_SEND_COMPLETE(msg->msg); - } + ORTE_RML_SEND_COMPLETE(msg->msg); OBJ_RELEASE(msg); peer->send_msg = NULL; } else if (NULL != msg->msg->data) { @@ -268,12 +258,7 @@ void mca_oob_tcp_send_handler(int sd, short flags, void *cbdata) ORTE_NAME_PRINT(&(peer->name)), (int)ntohl(msg->hdr.nbytes), peer->sd); msg->msg->status = ORTE_SUCCESS; - if( NULL == msg->msg->channel) { - ORTE_RML_SEND_COMPLETE(msg->msg); - } - else { - ORTE_QOS_SEND_COMPLETE(msg->msg); - } + ORTE_RML_SEND_COMPLETE(msg->msg); OBJ_RELEASE(msg); peer->send_msg = NULL; } @@ -290,12 +275,7 @@ void mca_oob_tcp_send_handler(int sd, short flags, void *cbdata) ORTE_NAME_PRINT(&(peer->name)), peer->sd); opal_event_del(&peer->send_event); msg->msg->status = rc; - if( NULL == msg->msg->channel) { - ORTE_RML_SEND_COMPLETE(msg->msg); - } - else { - ORTE_QOS_SEND_COMPLETE(msg->msg); - } + ORTE_RML_SEND_COMPLETE(msg->msg); OBJ_RELEASE(msg); peer->send_msg = NULL; ORTE_FORCED_TERMINATE(1); @@ -570,12 +550,9 @@ void mca_oob_tcp_recv_handler(int sd, short flags, void *cbdata) peer->recv_msg->hdr.dst.vpid == ORTE_PROC_MY_NAME->vpid) { /* yes - post it to the RML for delivery */ opal_output_verbose(OOB_TCP_DEBUG_CONNECT, orte_oob_base_framework.framework_output, - "%s DELIVERING TO RML tag = %d channel = %d seq_num = %d", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - peer->recv_msg->hdr.tag, peer->recv_msg->hdr.channel, - peer->recv_msg->hdr.seq_num); + "%s DELIVERING TO RML", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)); ORTE_RML_POST_MESSAGE(&peer->recv_msg->hdr.origin, peer->recv_msg->hdr.tag, - peer->recv_msg->hdr.channel, peer->recv_msg->hdr.seq_num, peer->recv_msg->data, peer->recv_msg->hdr.nbytes); OBJ_RELEASE(peer->recv_msg); @@ -591,8 +568,6 @@ void mca_oob_tcp_recv_handler(int sd, short flags, void *cbdata) snd->origin = peer->recv_msg->hdr.origin; snd->tag = peer->recv_msg->hdr.tag; snd->data = peer->recv_msg->data; - snd->dst_channel = peer->recv_msg->hdr.channel; - snd->seq_num = peer->recv_msg->hdr.seq_num; snd->count = peer->recv_msg->hdr.nbytes; snd->cbfunc.iov = NULL; snd->cbdata = NULL; diff --git a/orte/mca/oob/tcp/oob_tcp_sendrecv.h b/orte/mca/oob/tcp/oob_tcp_sendrecv.h index d8ac555b966..6e42ffb0e6f 100644 --- a/orte/mca/oob/tcp/oob_tcp_sendrecv.h +++ b/orte/mca/oob/tcp/oob_tcp_sendrecv.h @@ -12,7 +12,7 @@ * Copyright (c) 2006-2013 Los Alamos National Security, LLC. * All rights reserved. * Copyright (c) 2010-2013 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2013-2015 Intel, Inc. All rights reserved. + * Copyright (c) 2013-2014 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -107,19 +107,16 @@ OBJ_CLASS_DECLARATION(mca_oob_tcp_recv_t); mca_oob_tcp_send_t *msg; \ int i; \ opal_output_verbose(5, orte_oob_base_framework.framework_output, \ - "%s:[%s:%d] queue send to %s channel =%d", \ - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), \ - __FILE__, __LINE__, \ - ORTE_NAME_PRINT(&((m)->dst)), \ - (m)->dst_channel); \ + "%s:[%s:%d] queue send to %s", \ + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), \ + __FILE__, __LINE__, \ + ORTE_NAME_PRINT(&((m)->dst))); \ msg = OBJ_NEW(mca_oob_tcp_send_t); \ /* setup the header */ \ msg->hdr.origin = (m)->origin; \ msg->hdr.dst = (m)->dst; \ msg->hdr.type = MCA_OOB_TCP_USER; \ msg->hdr.tag = (m)->tag; \ - msg->hdr.channel = (m)->dst_channel; \ - msg->hdr.seq_num = (m)->seq_num; \ /* point to the actual message */ \ msg->msg = (m); \ /* set the total number of bytes to be sent */ \ @@ -163,8 +160,6 @@ OBJ_CLASS_DECLARATION(mca_oob_tcp_recv_t); msg->hdr.dst = (m)->dst; \ msg->hdr.type = MCA_OOB_TCP_USER; \ msg->hdr.tag = (m)->tag; \ - msg->hdr.channel = (m)->dst_channel; \ - msg->hdr.seq_num = (m)->seq_num; \ /* point to the actual message */ \ msg->msg = (m); \ /* set the total number of bytes to be sent */ \ diff --git a/orte/mca/oob/ud/Makefile.am b/orte/mca/oob/ud/Makefile.am index 0924501c020..e3004aec8e2 100644 --- a/orte/mca/oob/ud/Makefile.am +++ b/orte/mca/oob/ud/Makefile.am @@ -5,15 +5,15 @@ # Copyright (c) 2004-2005 The University of Tennessee and The University # of Tennessee Research Foundation. All rights # reserved. -# Copyright (c) 2004-2009 High Performance Computing Center Stuttgart, +# Copyright (c) 2004-2009 High Performance Computing Center Stuttgart, # University of Stuttgart. All rights reserved. # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. # Copyright (c) 2010 Cisco Systems, Inc. All rights reserved. # $COPYRIGHT$ -# +# # Additional copyrights may follow -# +# # $HEADER$ # diff --git a/orte/mca/oob/ud/configure.m4 b/orte/mca/oob/ud/configure.m4 index 246592eb7e6..70b4edbc24a 100644 --- a/orte/mca/oob/ud/configure.m4 +++ b/orte/mca/oob/ud/configure.m4 @@ -6,15 +6,15 @@ # Copyright (c) 2004-2005 The University of Tennessee and The University # of Tennessee Research Foundation. All rights # reserved. -# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, # University of Stuttgart. All rights reserved. # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. # Copyright (c) 2012 Cisco Systems, Inc. All rights reserved. # $COPYRIGHT$ -# +# # Additional copyrights may follow -# +# # $HEADER$ # diff --git a/orte/mca/oob/ud/oob_ud_component.c b/orte/mca/oob/ud/oob_ud_component.c index ee759cb05f1..90b925146bf 100644 --- a/orte/mca/oob/ud/oob_ud_component.c +++ b/orte/mca/oob/ud/oob_ud_component.c @@ -19,6 +19,8 @@ #include "orte_config.h" #include "orte/types.h" #include "opal/types.h" +#include "opal/align.h" +#include "opal/util/sys_limits.h" #include "orte/mca/errmgr/errmgr.h" #include "orte/runtime/orte_globals.h" @@ -40,9 +42,6 @@ static void mca_oob_ud_component_shutdown(void); static char* mca_oob_ud_component_get_addr(void); static int mca_oob_ud_component_set_addr(orte_process_name_t *peer, char **uris); static bool mca_oob_ud_component_is_reachable(orte_process_name_t *peer); -#if OPAL_ENABLE_FT_CR == 1 -static int mca_oob_ud_component_ft_event(int state); -#endif // OPAL_ENABLE_FT_CR static int mca_oob_ud_listen_create (mca_oob_ud_port_t *port); static int mca_oob_ud_listen_destroy (mca_oob_ud_port_t *port); @@ -83,10 +82,7 @@ mca_oob_ud_component_t mca_oob_ud_component = { .send_nb = mca_oob_ud_component_send_nb, //send_nb .get_addr = mca_oob_ud_component_get_addr, .set_addr = mca_oob_ud_component_set_addr, - .is_reachable = mca_oob_ud_component_is_reachable, //is_reachable -#if OPAL_ENABLE_FT_CR == 1 - .ft_event = mca_oob_ud_component_ft_event, -#endif // OPAL_ENABLE_FT_CR + .is_reachable = mca_oob_ud_component_is_reachable //is_reachable }, }; @@ -552,13 +548,6 @@ static int mca_oob_ud_component_set_addr(orte_process_name_t *peer, char **uris) return ORTE_SUCCESS; } -#if OPAL_ENABLE_FT_CR == 1 -static int mca_oob_ud_component_ft_event(int state) { - (void) state; - return ORTE_SUCCESS; -} -#endif // OPAL_ENABLE_FT_CR - static int mca_oob_ud_port_alloc_buffers (mca_oob_ud_port_t *port) { int total_buffer_count = mca_oob_ud_component.ud_recv_buffer_count + mca_oob_ud_component.ud_send_buffer_count; @@ -678,15 +667,21 @@ static inline int mca_oob_ud_port_recv_start (mca_oob_ud_port_t *port) static inline int mca_oob_ud_alloc_reg_mem (struct ibv_pd *pd, mca_oob_ud_reg_mem_t *reg_mem, const int buffer_len) { + size_t buffer_len_aligned, page_size; reg_mem->len = buffer_len; reg_mem->ptr = NULL; reg_mem->mr = NULL; - + /* The allocated buffer should be a multiple of page size. + If ibv_fork_init() has been invoked the pages are marked MADV_DONTFORK. + If we only partially use a page, any data allocated on the remainder of + the page will be inaccessible to the child process */ + page_size = opal_getpagesize(); + buffer_len_aligned = OPAL_ALIGN(buffer_len, page_size, size_t); opal_output_verbose(5, orte_oob_base_framework.framework_output, "%s oob:ud:alloc_reg_mem allocing and registering %d bytes of memory with pd %p", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), buffer_len, (void *) pd); - posix_memalign ((void **)®_mem->ptr, sysconf(_SC_PAGESIZE), buffer_len); + posix_memalign ((void **)®_mem->ptr, page_size, buffer_len_aligned); if (NULL == reg_mem->ptr) { ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); return ORTE_ERR_OUT_OF_RESOURCE; diff --git a/orte/mca/oob/ud/oob_ud_event.c b/orte/mca/oob/ud/oob_ud_event.c index d7434013f23..a7374f84138 100644 --- a/orte/mca/oob/ud/oob_ud_event.c +++ b/orte/mca/oob/ud/oob_ud_event.c @@ -1,6 +1,6 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ /* - * Copyright (c) 2011-2012 Los Alamos National Security, LLC. All rights + * Copyright (c) 2011-2017 Los Alamos National Security, LLC. All rights * reserved. * 2014 Mellanox Technologies, Inc. * All rights reserved. @@ -53,9 +53,6 @@ static bool event_completed_set = false; void mca_oob_ud_event_start_monitor (mca_oob_ud_device_t *device) { if (!event_started) { -#if !OPAL_ENABLE_PROGRESS_THREADS - opal_progress_event_users_increment (); -#endif opal_event_set (orte_event_base, &device->event, device->ib_channel->fd, OPAL_EV_READ, mca_oob_ud_event_dispatch, (void *) device); opal_event_add (&device->event, NULL); @@ -66,9 +63,6 @@ void mca_oob_ud_event_start_monitor (mca_oob_ud_device_t *device) void mca_oob_ud_event_stop_monitor (mca_oob_ud_device_t *device) { if (event_started) { -#if !OPAL_ENABLE_PROGRESS_THREADS - opal_progress_event_users_decrement (); -#endif opal_event_del (&device->event); mca_oob_ud_stop_events (device); event_started = false; diff --git a/orte/mca/oob/ud/oob_ud_qp.c b/orte/mca/oob/ud/oob_ud_qp.c index 4718a3b5692..1fde08c4fa1 100644 --- a/orte/mca/oob/ud/oob_ud_qp.c +++ b/orte/mca/oob/ud/oob_ud_qp.c @@ -40,6 +40,7 @@ int mca_oob_ud_qp_init (mca_oob_ud_qp_t *qp, struct mca_oob_ud_port_t *port, struct ibv_comp_channel *send_channel, bool onecq) { struct ibv_qp_init_attr init_attr; + int max_cqe = min(port->device->attr.max_cqe, 16384); opal_output_verbose(10, orte_oob_base_framework.framework_output, "%s oob:ud:qp_init creating UD QP on port %d", @@ -50,20 +51,19 @@ int mca_oob_ud_qp_init (mca_oob_ud_qp_t *qp, struct mca_oob_ud_port_t *port, init_attr.qp_type = IBV_QPT_UD; - int cqe = 16384; - qp->ib_recv_cq = ibv_create_cq (port->device->ib_context, cqe, + qp->ib_recv_cq = ibv_create_cq (port->device->ib_context, max_cqe, port, recv_channel, 0); if (NULL == qp->ib_recv_cq) { orte_show_help("help-oob-ud.txt", "create-cq-failed", true, - orte_process_info.nodename, cqe, strerror(errno)); + orte_process_info.nodename, max_cqe, strerror(errno)); return ORTE_ERROR; } if (false == onecq) { - qp->ib_send_cq = ibv_create_cq (port->device->ib_context, cqe, + qp->ib_send_cq = ibv_create_cq (port->device->ib_context, max_cqe, port, send_channel, 0); if (NULL == qp->ib_send_cq) { orte_show_help("help-oob-ud.txt", "create-cq-failed", true, - orte_process_info.nodename, cqe, strerror(errno)); + orte_process_info.nodename, max_cqe, strerror(errno)); return ORTE_ERROR; } } else { diff --git a/orte/mca/oob/ud/oob_ud_recv.c b/orte/mca/oob/ud/oob_ud_recv.c index fb1e4ef4910..02b76c920fc 100644 --- a/orte/mca/oob/ud/oob_ud_recv.c +++ b/orte/mca/oob/ud/oob_ud_recv.c @@ -4,7 +4,6 @@ * reserved. * 2014 Mellanox Technologies, Inc. * All rights reserved. - * Copyright (c) 2015 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -83,8 +82,7 @@ int mca_oob_ud_get_recv_req (const orte_process_name_t name, const int tag, req->req_origin = name; req->req_tag = tag; - req->req_channel = ORTE_RML_INVALID_CHANNEL_NUM; - req->req_seq_num = 0; + /* this receive was not expected */ req->type = MCA_OOB_UD_REQ_RECV; @@ -492,8 +490,6 @@ int mca_oob_ud_recv_match_send (mca_oob_ud_port_t *port, mca_oob_ud_peer_t *peer req->req_origin = msg_hdr->msg_origin; req->req_target = msg_hdr->msg_target; req->req_rem_data_len = msg_hdr->msg_data.req.data_len; - req->req_channel = msg_hdr->msg_channel; - req->req_seq_num = msg_hdr->msg_seq_num; do { rc = mca_oob_ud_recv_alloc (req); diff --git a/orte/mca/oob/ud/oob_ud_req.c b/orte/mca/oob/ud/oob_ud_req.c index 3018fc75ba0..9c510240735 100644 --- a/orte/mca/oob/ud/oob_ud_req.c +++ b/orte/mca/oob/ud/oob_ud_req.c @@ -4,7 +4,6 @@ * reserved. * 2014 Mellanox Technologies, Inc. * All rights reserved. - * Copyright (c) 2015 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -287,11 +286,7 @@ void mca_oob_ud_req_complete (mca_oob_ud_req_t *req, int rc) case MCA_OOB_UD_REQ_SEND: if (req->req_data_type != MCA_OOB_UD_REQ_TR) { req->rml_msg->status = rc; - if( NULL == req->rml_msg->channel) { - ORTE_RML_SEND_COMPLETE(req->rml_msg); - } else { - ORTE_QOS_SEND_COMPLETE(req->rml_msg); - } + ORTE_RML_SEND_COMPLETE(req->rml_msg); } break; case MCA_OOB_UD_REQ_RECV: @@ -307,11 +302,11 @@ void mca_oob_ud_req_complete (mca_oob_ud_req_t *req, int rc) memcpy (&data[datalen], req->req_data.iov.uiov[i].iov_base, req->req_data.iov.uiov[i].iov_len); datalen += req->req_data.iov.uiov[i].iov_len; } - ORTE_RML_POST_MESSAGE(&req->req_origin, req->req_tag, req->req_channel, req->req_seq_num, data, datalen); + ORTE_RML_POST_MESSAGE(&req->req_origin, req->req_tag, data, datalen); free(data); } else { - ORTE_RML_POST_MESSAGE(&req->req_origin, req->req_tag, req->req_channel, req->req_seq_num, - req->req_data.buf.p, req->req_data.buf.size); + ORTE_RML_POST_MESSAGE(&req->req_origin, req->req_tag, + req->req_data.buf.p, req->req_data.buf.size); } } else { opal_output_verbose(1, orte_oob_base_framework.framework_output, @@ -323,8 +318,7 @@ void mca_oob_ud_req_complete (mca_oob_ud_req_t *req, int rc) snd->dst = req->req_target; snd->origin = req->req_origin; snd->tag = req->req_tag; - snd->dst_channel = req->req_channel; - snd->seq_num = req->req_seq_num; + if (MCA_OOB_UD_REQ_IOV == req->req_data_type) { char *data = (char *)calloc(req->req_data.iov.count, sizeof(struct iovec)); int datalen = 0; diff --git a/orte/mca/oob/ud/oob_ud_req.h b/orte/mca/oob/ud/oob_ud_req.h index 67644017822..b718ed758ee 100644 --- a/orte/mca/oob/ud/oob_ud_req.h +++ b/orte/mca/oob/ud/oob_ud_req.h @@ -4,7 +4,6 @@ * reserved. * 2014 Mellanox Technologies, Inc. * All rights reserved. - * Copyright (c) 2015 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -71,8 +70,6 @@ struct mca_oob_ud_msg_hdr_t { orte_process_name_t msg_origin; orte_process_name_t msg_target; - int msg_channel; - int msg_seq_num; uint64_t msg_id; @@ -157,8 +154,6 @@ struct mca_oob_ud_req_t { }req_data; int req_tag; - int req_channel; - int req_seq_num; int req_rc; void *req_cbdata; diff --git a/orte/mca/oob/ud/oob_ud_send.c b/orte/mca/oob/ud/oob_ud_send.c index 7c800e76512..13f4a6bfb0a 100644 --- a/orte/mca/oob/ud/oob_ud_send.c +++ b/orte/mca/oob/ud/oob_ud_send.c @@ -4,7 +4,6 @@ * reserved. * 2014 Mellanox Technologies, Inc. * All rights reserved. - * Copyright (c) 2015 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -107,11 +106,7 @@ static int mca_oob_ud_send_self (orte_rml_send_t *msg) req->rml_msg->status = ORTE_SUCCESS; - if( NULL == req->rml_msg->channel) { - ORTE_RML_SEND_COMPLETE(req->rml_msg); - } else { - ORTE_QOS_SEND_COMPLETE(req->rml_msg); - } + ORTE_RML_SEND_COMPLETE(req->rml_msg); return size; } @@ -166,8 +161,6 @@ int mca_oob_ud_process_send_nb(int fd, short args, void *cbdata) send_req->req_target = op->msg->dst; send_req->req_origin = op->msg->origin; send_req->req_tag = op->msg->tag; - send_req->req_channel = op->msg->dst_channel; - send_req->req_seq_num = op->msg->seq_num; if (op->msg->data != NULL) { size = op->msg->count; @@ -234,8 +227,6 @@ int mca_oob_ud_process_send_nb(int fd, short args, void *cbdata) req_msg->hdr->msg_origin = op->msg->origin; req_msg->hdr->msg_target = op->msg->dst; - req_msg->hdr->msg_channel = op->msg->dst_channel; - req_msg->hdr->msg_seq_num = op->msg->seq_num; req_msg->hdr->msg_data.req.data_len = size; req_msg->hdr->msg_data.req.mtu = port->mtu; diff --git a/orte/mca/oob/usock/Makefile.am b/orte/mca/oob/usock/Makefile.am index 307a61693ec..b44934e8b6a 100644 --- a/orte/mca/oob/usock/Makefile.am +++ b/orte/mca/oob/usock/Makefile.am @@ -5,7 +5,7 @@ # Copyright (c) 2004-2005 The University of Tennessee and The University # of Tennessee Research Foundation. All rights # reserved. -# Copyright (c) 2004-2009 High Performance Computing Center Stuttgart, +# Copyright (c) 2004-2009 High Performance Computing Center Stuttgart, # University of Stuttgart. All rights reserved. # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. @@ -14,9 +14,9 @@ # All rights reserved # Copyright (c) 2013-2015 Intel, Inc. All rights reserved. # $COPYRIGHT$ -# +# # Additional copyrights may follow -# +# # $HEADER$ # diff --git a/orte/mca/oob/usock/configure.m4 b/orte/mca/oob/usock/configure.m4 index 645333022f8..c9a1b59f50a 100644 --- a/orte/mca/oob/usock/configure.m4 +++ b/orte/mca/oob/usock/configure.m4 @@ -6,7 +6,7 @@ # Copyright (c) 2004-2005 The University of Tennessee and The University # of Tennessee Research Foundation. All rights # reserved. -# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, # University of Stuttgart. All rights reserved. # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. @@ -15,9 +15,9 @@ # Copyright (c) 2010 Cisco Systems, Inc. All rights reserved. # Copyright (c) 2013 Intel, Inc. All rights reserved. # $COPYRIGHT$ -# +# # Additional copyrights may follow -# +# # $HEADER$ # @@ -27,9 +27,9 @@ AC_DEFUN([MCA_orte_oob_usock_CONFIG],[ AC_CONFIG_FILES([orte/mca/oob/usock/Makefile]) # check for sockaddr_un (a good sign we have Unix domain sockets) - AC_CHECK_TYPES([struct sockaddr_un], + AC_CHECK_TYPES([struct sockaddr_un], [oob_usock_happy="yes"], - [oob_usock_happy="no"], + [oob_usock_happy="no"], [AC_INCLUDES_DEFAULT #ifdef HAVE_SYS_SOCKET_H #include diff --git a/orte/mca/oob/usock/help-oob-usock.txt b/orte/mca/oob/usock/help-oob-usock.txt index 452fef60803..6eb8ac0542a 100644 --- a/orte/mca/oob/usock/help-oob-usock.txt +++ b/orte/mca/oob/usock/help-oob-usock.txt @@ -6,14 +6,14 @@ # Copyright (c) 2004-2006 The University of Tennessee and The University # of Tennessee Research Foundation. All rights # reserved. -# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, # University of Stuttgart. All rights reserved. # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. # $COPYRIGHT$ -# +# # Additional copyrights may follow -# +# # $HEADER$ # [static-and-dynamic] diff --git a/orte/mca/oob/usock/oob_usock.c b/orte/mca/oob/usock/oob_usock.c index 7814ae0d7cf..ecf459fb515 100644 --- a/orte/mca/oob/usock/oob_usock.c +++ b/orte/mca/oob/usock/oob_usock.c @@ -9,11 +9,13 @@ * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. - * Copyright (c) 2006-2013 Los Alamos National Security, LLC. + * Copyright (c) 2006-2013 Los Alamos National Security, LLC. * All rights reserved. * Copyright (c) 2009-2015 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2011 Oak Ridge National Labs. All rights reserved. - * Copyright (c) 2013-2014 Intel, Inc. All rights reserved. + * Copyright (c) 2013-2016 Intel, Inc. All rights reserved. + * Copyright (c) 2016 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -72,7 +74,6 @@ static void accept_connection(const int accepted_fd, const struct sockaddr *addr); static void ping(const orte_process_name_t *proc); static void send_nb(orte_rml_send_t *msg); -static void ft_event(int state); mca_oob_usock_module_t mca_oob_usock_module = { { @@ -81,7 +82,7 @@ mca_oob_usock_module_t mca_oob_usock_module = { accept_connection, ping, send_nb, - ft_event + NULL } }; @@ -217,7 +218,7 @@ static void process_ping(int fd, short args, void *cbdata) } /* if we are already connecting, there is nothing to do */ - if (MCA_OOB_USOCK_CONNECTING == peer->state && + if (MCA_OOB_USOCK_CONNECTING == peer->state || MCA_OOB_USOCK_CONNECT_ACK == peer->state) { opal_output_verbose(2, orte_oob_base_framework.framework_output, "%s:[%s:%d] already connecting to peer %s", @@ -251,6 +252,7 @@ static void process_send(int fd, short args, void *cbdata) { mca_oob_usock_msg_op_t *op = (mca_oob_usock_msg_op_t*)cbdata; mca_oob_usock_peer_t *peer; + struct timeval tv; opal_output_verbose(2, orte_oob_base_framework.framework_output, "%s:[%s:%d] processing send to peer %s", @@ -266,7 +268,7 @@ static void process_send(int fd, short args, void *cbdata) /* we don't know how to talk to our daemon, * which is strange since we already got here. * likely means we lost a race condition, so - * + * */ ORTE_ACTIVATE_USOCK_MSG_ERROR(NULL, op->msg, ORTE_PROC_MY_DAEMON, @@ -278,14 +280,24 @@ static void process_send(int fd, short args, void *cbdata) * message to send is if the proc is local to me */ if (NULL == (peer = mca_oob_usock_peer_lookup(&op->msg->dst))) { + /* try this again after a delay for N times */ + op->reps++; + if (20 < op->reps) { /* we don't know how to talk to this proc, - * so send this back up to the OOB base so it - * can try another transport - */ - ORTE_ACTIVATE_USOCK_MSG_ERROR(NULL, op->msg, - &op->msg->dst, - mca_oob_usock_component_cannot_send); - goto cleanup; + * so send this back up to the OOB base so it + * can try another transport + */ + ORTE_ACTIVATE_USOCK_MSG_ERROR(NULL, op->msg, + &op->msg->dst, + mca_oob_usock_component_cannot_send); + goto cleanup; + } + opal_event_evtimer_set(orte_event_base, &op->ev, process_send, op); + opal_event_set_priority(&op->ev, ORTE_ERROR_PRI); + tv.tv_sec = 1; + tv.tv_usec = 0; + opal_event_evtimer_add(&op->ev, &tv); + return; } } else { /* otherwise, this message can't be handled by me, so @@ -305,6 +317,10 @@ static void process_send(int fd, short args, void *cbdata) goto cleanup; } + if (MCA_OOB_USOCK_CLOSED == peer->state) { + /* the peer has gone, it will never come back */ + goto cleanup; + } /* add the message to the queue for sending after the * connection is formed */ @@ -345,14 +361,14 @@ static void send_nb(orte_rml_send_t *msg) * socket to recv. This is called for the listen sockets to accept an * incoming connection, on new sockets trying to complete the software * connection process, and for probes. Data on an established - * connection is handled elsewhere. + * connection is handled elsewhere. */ static void recv_handler(int sd, short flags, void *cbdata) { mca_oob_usock_conn_op_t *op = (mca_oob_usock_conn_op_t*)cbdata; mca_oob_usock_hdr_t hdr; mca_oob_usock_peer_t *peer; - uint64_t *ui64; + uint64_t ui64; opal_output_verbose(OOB_USOCK_DEBUG_CONNECT, orte_oob_base_framework.framework_output, "%s:usock:recv:handler called", @@ -392,8 +408,8 @@ static void recv_handler(int sd, short flags, void *cbdata) peer->state); } CLOSE_THE_SOCKET(sd); - ui64 = (uint64_t*)(&peer->name); - opal_hash_table_set_value_uint64(&mca_oob_usock_module.peers, (*ui64), NULL); + memcpy(&ui64, &peer->name, sizeof(uint64_t)); + opal_hash_table_set_value_uint64(&mca_oob_usock_module.peers, ui64, NULL); OBJ_RELEASE(peer); } } @@ -401,73 +417,3 @@ static void recv_handler(int sd, short flags, void *cbdata) cleanup: OBJ_RELEASE(op); } - -/* Dummy function for when we are not using FT. */ -#if OPAL_ENABLE_FT_CR == 0 -static void ft_event(int state) -{ - return; -} - -#else -static void ft_event(int state) { -#if 0 - opal_list_item_t *item; -#endif - - if(OPAL_CRS_CHECKPOINT == state) { -#if 0 - /* - * Disable event processing while we are working - */ - opal_event_disable(); -#endif - } - else if(OPAL_CRS_CONTINUE == state) { -#if 0 - /* - * Resume event processing - */ - opal_event_enable(); - } - else if(OPAL_CRS_RESTART == state) { - /* - * Clean out cached connection information - * Select pieces of finalize/init - */ - for (item = opal_list_remove_first(&mod->peer_list); - item != NULL; - item = opal_list_remove_first(&mod->peer_list)) { - mca_oob_usock_peer_t* peer = (mca_oob_usock_peer_t*)item; - /* JJH: Use the below command for debugging restarts with invalid sockets - * mca_oob_usock_peer_dump(peer, "RESTART CLEAN") - */ - MCA_OOB_USOCK_PEER_RETURN(peer); - } - - OBJ_DESTRUCT(&mod->peer_free); - OBJ_DESTRUCT(&mod->peer_names); - OBJ_DESTRUCT(&mod->peers); - OBJ_DESTRUCT(&mod->peer_list); - - OBJ_CONSTRUCT(&mod->peer_list, opal_list_t); - OBJ_CONSTRUCT(&mod->peers, opal_hash_table_t); - OBJ_CONSTRUCT(&mod->peer_names, opal_hash_table_t); - OBJ_CONSTRUCT(&mod->peer_free, opal_free_list_t); - - /* - * Resume event processing - */ - opal_event_enable(); -#endif - } - else if(OPAL_CRS_TERM == state ) { - ; - } - else { - ; - } - - return; -} -#endif diff --git a/orte/mca/oob/usock/oob_usock.h b/orte/mca/oob/usock/oob_usock.h index 0c692428d7c..f6fcbc56808 100644 --- a/orte/mca/oob/usock/oob_usock.h +++ b/orte/mca/oob/usock/oob_usock.h @@ -5,18 +5,18 @@ * Copyright (c) 2004-2006 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. - * Copyright (c) 2006-2013 Los Alamos National Security, LLC. + * Copyright (c) 2006-2013 Los Alamos National Security, LLC. * All rights reserved. * Copyright (c) 2010-2011 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2013-2014 Intel, Inc. All rights reserved. + * Copyright (c) 2013-2014 Intel, Inc. All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ diff --git a/orte/mca/oob/usock/oob_usock_component.c b/orte/mca/oob/usock/oob_usock_component.c index d3a364d07aa..336d018ee10 100644 --- a/orte/mca/oob/usock/oob_usock_component.c +++ b/orte/mca/oob/usock/oob_usock_component.c @@ -15,6 +15,8 @@ * Copyright (c) 2009-2013 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2011 Oak Ridge National Labs. All rights reserved. * Copyright (c) 2013-2015 Intel, Inc. All rights reserved. + * Copyright (c) 2016 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -177,8 +179,6 @@ static int component_available(void) /* direct-launched apps cannot use it */ return ORTE_ERR_NOT_AVAILABLE; } - /* apps launched by daemons *must* use it */ - return ORTE_ERR_FORCE_SELECT; } /* otherwise, we are available */ @@ -205,6 +205,7 @@ static void connection_event_handler(int incoming_sd, short flags, void* cbdata) static int component_startup(void) { int rc=ORTE_SUCCESS; + char *usock_path; opal_output_verbose(2, orte_oob_base_framework.framework_output, "%s USOCK STARTUP", @@ -213,11 +214,29 @@ static int component_startup(void) /* setup the path to the daemon rendezvous point */ memset(&mca_oob_usock_component.address, 0, sizeof(struct sockaddr_un)); mca_oob_usock_component.address.sun_family = AF_UNIX; - snprintf(mca_oob_usock_component.address.sun_path, - sizeof(mca_oob_usock_component.address.sun_path)-1, + asprintf(&usock_path, "%s/%s/%s/0/%s", orte_process_info.tmpdir_base, orte_process_info.top_session_dir, ORTE_JOB_FAMILY_PRINT(ORTE_PROC_MY_NAME->jobid), "usock"); + if (NULL == usock_path) { + rc = ORTE_ERR_OUT_OF_RESOURCE; + ORTE_ERROR_LOG(rc); + } + + /* If usock_path is too long, just fail, so the caller + * may provide the user with a proper help... *Cough*, *Cough* OSX... */ + if ((strlen(usock_path) + 1) > sizeof(mca_oob_usock_component.address.sun_path)) { + opal_output_verbose(2, orte_oob_base_framework.framework_output, + "usock path too long: strlen(%s) > %d\nyou might want to check you $TMPDIR or $TMP environment variable", + usock_path, (int)sizeof(mca_oob_usock_component.address.sun_path)-1); + free(usock_path); + return ORTE_ERR_NOT_SUPPORTED; + + } + + strncpy(mca_oob_usock_component.address.sun_path, usock_path, sizeof(mca_oob_usock_component.address.sun_path)-1); + free(usock_path); + opal_output_verbose(2, orte_oob_base_framework.framework_output, "SUNPATH: %s", mca_oob_usock_component.address.sun_path); @@ -265,9 +284,9 @@ static int component_send(orte_rml_send_t *msg) orte_proc_t *proc; opal_output_verbose(5, orte_oob_base_framework.framework_output, - "%s oob:usock:send_nb to peer %s:%d to channel=%d seq_num =%d", + "%s oob:usock:send_nb to peer %s:%d ", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - ORTE_NAME_PRINT(&msg->dst), msg->tag, msg->dst_channel, msg->seq_num); + ORTE_NAME_PRINT(&msg->dst), msg->tag); if (ORTE_PROC_IS_DAEMON || ORTE_PROC_IS_HNP) { /* daemons can only reach local procs */ @@ -307,8 +326,9 @@ static int component_set_addr(orte_process_name_t *peer, { orte_proc_t *proc; mca_oob_usock_peer_t *pr; - uint64_t *ui64; + uint64_t ui64; + memcpy(&ui64, peer, sizeof(uint64_t)); /* if I am an application, then everything is addressable * by me via my daemon */ @@ -316,12 +336,11 @@ static int component_set_addr(orte_process_name_t *peer, /* if this is my daemon, then take it - otherwise, ignore */ if (ORTE_PROC_MY_DAEMON->jobid == peer->jobid && ORTE_PROC_MY_DAEMON->vpid == peer->vpid) { - ui64 = (uint64_t*)peer; if (OPAL_SUCCESS != opal_hash_table_get_value_uint64(&mca_oob_usock_module.peers, - (*ui64), (void**)&pr) || NULL == pr) { + ui64, (void**)&pr) || NULL == pr) { pr = OBJ_NEW(mca_oob_usock_peer_t); pr->name = *peer; - opal_hash_table_set_value_uint64(&mca_oob_usock_module.peers, (*ui64), pr); + opal_hash_table_set_value_uint64(&mca_oob_usock_module.peers, ui64, pr); } /* we have to initiate the connection because otherwise the * daemon has no way to communicate to us via this component @@ -346,12 +365,11 @@ static int component_set_addr(orte_process_name_t *peer, return ORTE_ERR_TAKE_NEXT_OPTION; } /* indicate that this peer is addressable by this component */ - ui64 = (uint64_t*)peer; if (OPAL_SUCCESS != opal_hash_table_get_value_uint64(&mca_oob_usock_module.peers, - (*ui64), (void**)&pr) || NULL == pr) { + ui64, (void**)&pr) || NULL == pr) { pr = OBJ_NEW(mca_oob_usock_peer_t); pr->name = *peer; - opal_hash_table_set_value_uint64(&mca_oob_usock_module.peers, (*ui64), pr); + opal_hash_table_set_value_uint64(&mca_oob_usock_module.peers, ui64, pr); } return ORTE_SUCCESS; } @@ -579,9 +597,13 @@ OBJ_CLASS_INSTANCE(mca_oob_usock_peer_op_t, opal_object_t, NULL, NULL); +static void mopcon(mca_oob_usock_msg_op_t *p) +{ + p->reps = 0; +} OBJ_CLASS_INSTANCE(mca_oob_usock_msg_op_t, opal_object_t, - NULL, NULL); + mopcon, NULL); OBJ_CLASS_INSTANCE(mca_oob_usock_conn_op_t, opal_object_t, diff --git a/orte/mca/oob/usock/oob_usock_component.h b/orte/mca/oob/usock/oob_usock_component.h index f193bb159c6..a0bc004e793 100644 --- a/orte/mca/oob/usock/oob_usock_component.h +++ b/orte/mca/oob/usock/oob_usock_component.h @@ -5,18 +5,18 @@ * Copyright (c) 2004-2006 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. - * Copyright (c) 2006-2013 Los Alamos National Security, LLC. + * Copyright (c) 2006-2013 Los Alamos National Security, LLC. * All rights reserved. * Copyright (c) 2010-2011 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2013-2014 Intel, Inc. All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ diff --git a/orte/mca/oob/usock/oob_usock_connection.c b/orte/mca/oob/usock/oob_usock_connection.c index 3f247c9bd25..0ac2b34c09b 100644 --- a/orte/mca/oob/usock/oob_usock_connection.c +++ b/orte/mca/oob/usock/oob_usock_connection.c @@ -14,8 +14,9 @@ * Copyright (c) 2009 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2011 Oak Ridge National Labs. All rights reserved. * Copyright (c) 2013-2015 Intel, Inc. All rights reserved. - * Copyright (c) 2014 Research Organization for Information Science + * Copyright (c) 2014-2016 Research Organization for Information Science * and Technology (RIST). All rights reserved. + * Copyright (c) 2016 IBM Corporation. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -53,7 +54,6 @@ #include "opal_stdint.h" #include "opal/mca/backtrace/backtrace.h" #include "opal/mca/base/mca_base_var.h" -#include "opal/mca/dstore/dstore.h" #include "opal/mca/sec/sec.h" #include "opal/util/output.h" #include "opal/util/net.h" @@ -86,7 +86,7 @@ static int usock_peer_create_socket(mca_oob_usock_peer_t* peer) { int flags; - if (peer->sd > 0) { + if (peer->sd >=0) { return ORTE_SUCCESS; } @@ -288,12 +288,9 @@ static int usock_peer_send_connect_ack(mca_oob_usock_peer_t* peer) hdr.dst = peer->name; hdr.type = MCA_OOB_USOCK_IDENT; hdr.tag = 0; - hdr.channel = 0xffffffff; - hdr.seq_num = 0; /* get our security credential*/ if (OPAL_SUCCESS != (rc = opal_sec.get_my_credential(peer->auth_method, - opal_dstore_internal, ORTE_PROC_MY_NAME, &cred, &credsize))) { ORTE_ERROR_LOG(rc); return rc; @@ -362,8 +359,11 @@ static void usock_peer_event_init(mca_oob_usock_peer_t* peer) */ void mca_oob_usock_peer_complete_connect(mca_oob_usock_peer_t *peer) { - int so_error = 0; + int so_error = 0, rc; opal_socklen_t so_length = sizeof(so_error); + orte_oob_base_peer_t *bpr; + uint64_t ui64; + mca_oob_usock_peer_t *pr; opal_output_verbose(OOB_USOCK_DEBUG_CONNECT, orte_oob_base_framework.framework_output, "%s:usock:complete_connect called for peer %s on socket %d", @@ -434,6 +434,29 @@ void mca_oob_usock_peer_complete_connect(mca_oob_usock_peer_t *peer) peer->state = MCA_OOB_USOCK_FAILED; mca_oob_usock_peer_close(peer); } + + /* make sure the OOB knows that we are handling this peer - we + * are in the same event base as the OOB base, so we can + * directly access its storage + */ + memcpy(&ui64, (char*)&(peer->name), sizeof(uint64_t)); + if (OPAL_SUCCESS != opal_hash_table_get_value_uint64(&orte_oob_base.peers, + ui64, (void**)&bpr) || NULL == bpr) { + bpr = OBJ_NEW(orte_oob_base_peer_t); + } + opal_bitmap_set_bit(&bpr->addressable, mca_oob_usock_component.super.idx); + bpr->component = &mca_oob_usock_component.super; + if (OPAL_SUCCESS != (rc = opal_hash_table_set_value_uint64(&orte_oob_base.peers, + ui64, bpr))) { + ORTE_ERROR_LOG(rc); + } + /* record it locally too */ + if (OPAL_SUCCESS != opal_hash_table_get_value_uint64(&mca_oob_usock_module.peers, + ui64, (void**)&pr) || NULL == pr) { + pr = OBJ_NEW(mca_oob_usock_peer_t); + pr->name = peer->name; + opal_hash_table_set_value_uint64(&mca_oob_usock_module.peers, ui64, pr); + } } /* @@ -491,7 +514,7 @@ int mca_oob_usock_peer_recv_connect_ack(mca_oob_usock_peer_t* pr, int sd, size_t credsize; mca_oob_usock_peer_t *peer; mca_oob_usock_hdr_t hdr; - uint64_t *ui64; + uint64_t ui64; opal_output_verbose(OOB_USOCK_DEBUG_CONNECT, orte_oob_base_framework.framework_output, "%s RECV CONNECT ACK FROM %s ON SOCKET %d", @@ -566,8 +589,8 @@ int mca_oob_usock_peer_recv_connect_ack(mca_oob_usock_peer_t* pr, int sd, peer->name = hdr.origin; peer->state = MCA_OOB_USOCK_ACCEPTING; peer->sd = sd; - ui64 = (uint64_t*)(&peer->name); - if (OPAL_SUCCESS != opal_hash_table_set_value_uint64(&mca_oob_usock_module.peers, (*ui64), peer)) { + memcpy(&ui64, &peer->name, sizeof(uint64_t)); + if (OPAL_SUCCESS != opal_hash_table_set_value_uint64(&mca_oob_usock_module.peers, ui64, peer)) { OBJ_RELEASE(peer); CLOSE_THE_SOCKET(sd); return ORTE_ERR_UNREACH; @@ -737,6 +760,10 @@ void mca_oob_usock_peer_close(mca_oob_usock_peer_t *peer) { mca_oob_usock_send_t *snd; + if( NULL == peer ) { + return; + } + opal_output_verbose(OOB_USOCK_DEBUG_CONNECT, orte_oob_base_framework.framework_output, "%s usock_peer_close for %s sd %d state %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), @@ -747,6 +774,7 @@ void mca_oob_usock_peer_close(mca_oob_usock_peer_t *peer) /* release the socket */ close(peer->sd); + peer->sd = -1; /* inform the component-level that we have lost a connection so * it can decide what to do about it. diff --git a/orte/mca/oob/usock/oob_usock_connection.h b/orte/mca/oob/usock/oob_usock_connection.h index 2a009d61834..fe98f6e09c9 100644 --- a/orte/mca/oob/usock/oob_usock_connection.h +++ b/orte/mca/oob/usock/oob_usock_connection.h @@ -5,18 +5,18 @@ * Copyright (c) 2004-2006 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. - * Copyright (c) 2006-2013 Los Alamos National Security, LLC. + * Copyright (c) 2006-2013 Los Alamos National Security, LLC. * All rights reserved. * Copyright (c) 2010-2011 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2013 Intel, Inc. All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ diff --git a/orte/mca/oob/usock/oob_usock_hdr.h b/orte/mca/oob/usock/oob_usock_hdr.h index c7cad2d998b..765600ff819 100644 --- a/orte/mca/oob/usock/oob_usock_hdr.h +++ b/orte/mca/oob/usock/oob_usock_hdr.h @@ -48,10 +48,6 @@ typedef struct { mca_oob_usock_msg_type_t type; /* the rml tag where this message is headed */ orte_rml_tag_t tag; - /* the rml channel to which this message is headed */ - orte_rml_channel_num_t channel; - /* msg seq number on the src channel */ - uint32_t seq_num; /* number of bytes in message */ uint32_t nbytes; } mca_oob_usock_hdr_t; diff --git a/orte/mca/oob/usock/oob_usock_peer.h b/orte/mca/oob/usock/oob_usock_peer.h index bd7c768f94e..cc715d4fdc8 100644 --- a/orte/mca/oob/usock/oob_usock_peer.h +++ b/orte/mca/oob/usock/oob_usock_peer.h @@ -5,18 +5,18 @@ * Copyright (c) 2004-2006 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. - * Copyright (c) 2006-2013 Los Alamos National Security, LLC. + * Copyright (c) 2006-2013 Los Alamos National Security, LLC. * All rights reserved. * Copyright (c) 2010-2011 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2013-2015 Intel, Inc. All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -69,7 +69,7 @@ OBJ_CLASS_DECLARATION(mca_oob_usock_peer_op_t); opal_event_set_priority(&op->ev, ORTE_MSG_PRI); \ opal_event_active(&op->ev, OPAL_EV_WRITE, 1); \ } while(0); - + #define ORTE_ACTIVATE_USOCK_CMP_OP(p, cbfunc) \ do { \ mca_oob_usock_peer_op_t *pop; \ diff --git a/orte/mca/oob/usock/oob_usock_ping.h b/orte/mca/oob/usock/oob_usock_ping.h index ecf920f8f90..67badb8f05e 100644 --- a/orte/mca/oob/usock/oob_usock_ping.h +++ b/orte/mca/oob/usock/oob_usock_ping.h @@ -5,18 +5,18 @@ * Copyright (c) 2004-2006 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. - * Copyright (c) 2006-2013 Los Alamos National Security, LLC. + * Copyright (c) 2006-2013 Los Alamos National Security, LLC. * All rights reserved. * Copyright (c) 2010-2011 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2013 Intel, Inc. All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ diff --git a/orte/mca/oob/usock/oob_usock_sendrecv.c b/orte/mca/oob/usock/oob_usock_sendrecv.c index b07e42956a3..34e285a4498 100644 --- a/orte/mca/oob/usock/oob_usock_sendrecv.c +++ b/orte/mca/oob/usock/oob_usock_sendrecv.c @@ -187,12 +187,7 @@ void mca_oob_usock_send_handler(int sd, short flags, void *cbdata) opal_event_del(&peer->send_event); peer->send_ev_active = false; msg->msg->status = rc; - if( NULL == msg->msg->channel) { - ORTE_RML_SEND_COMPLETE(msg->msg); - } - else { - ORTE_QOS_SEND_COMPLETE(msg->msg); - } + ORTE_RML_SEND_COMPLETE(msg->msg); OBJ_RELEASE(msg); peer->send_msg = NULL; goto next; @@ -210,12 +205,7 @@ void mca_oob_usock_send_handler(int sd, short flags, void *cbdata) ORTE_NAME_PRINT(&(peer->name)), msg->hdr.nbytes, peer->sd); msg->msg->status = ORTE_SUCCESS; - if( NULL == msg->msg->channel) { - ORTE_RML_SEND_COMPLETE(msg->msg); - } - else { - ORTE_QOS_SEND_COMPLETE(msg->msg); - } + ORTE_RML_SEND_COMPLETE(msg->msg); OBJ_RELEASE(msg); peer->send_msg = NULL; } else if (NULL != msg->msg->data) { @@ -246,12 +236,7 @@ void mca_oob_usock_send_handler(int sd, short flags, void *cbdata) ORTE_NAME_PRINT(&(peer->name)), msg->hdr.nbytes, peer->sd); msg->msg->status = ORTE_SUCCESS; - if( NULL == msg->msg->channel) { - ORTE_RML_SEND_COMPLETE(msg->msg); - } - else { - ORTE_QOS_SEND_COMPLETE(msg->msg); - } + ORTE_RML_SEND_COMPLETE(msg->msg); OBJ_RELEASE(msg); peer->send_msg = NULL; } @@ -269,12 +254,7 @@ void mca_oob_usock_send_handler(int sd, short flags, void *cbdata) opal_event_del(&peer->send_event); peer->send_ev_active = false; msg->msg->status = rc; - if( NULL == msg->msg->channel) { - ORTE_RML_SEND_COMPLETE(msg->msg); - } - else { - ORTE_QOS_SEND_COMPLETE(msg->msg); - } + ORTE_RML_SEND_COMPLETE(msg->msg); OBJ_RELEASE(msg); peer->send_msg = NULL; ORTE_FORCED_TERMINATE(1); @@ -529,7 +509,6 @@ void mca_oob_usock_recv_handler(int sd, short flags, void *cbdata) "%s DELIVERING TO RML", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)); ORTE_RML_POST_MESSAGE(&peer->recv_msg->hdr.origin, peer->recv_msg->hdr.tag, - peer->recv_msg->hdr.channel, peer->recv_msg->hdr.seq_num, peer->recv_msg->data, peer->recv_msg->hdr.nbytes); OBJ_RELEASE(peer->recv_msg); @@ -545,8 +524,6 @@ void mca_oob_usock_recv_handler(int sd, short flags, void *cbdata) snd->origin = peer->recv_msg->hdr.origin; snd->tag = peer->recv_msg->hdr.tag; snd->data = peer->recv_msg->data; - snd->dst_channel = peer->recv_msg->hdr.channel; - snd->seq_num = peer->recv_msg->hdr.seq_num; snd->count = peer->recv_msg->hdr.nbytes; snd->cbfunc.iov = NULL; snd->cbdata = NULL; diff --git a/orte/mca/oob/usock/oob_usock_sendrecv.h b/orte/mca/oob/usock/oob_usock_sendrecv.h index 65658da08c7..162de1db580 100644 --- a/orte/mca/oob/usock/oob_usock_sendrecv.h +++ b/orte/mca/oob/usock/oob_usock_sendrecv.h @@ -123,8 +123,6 @@ OBJ_CLASS_DECLARATION(mca_oob_usock_recv_t); msg->hdr.dst = (m)->dst; \ msg->hdr.type = MCA_OOB_USOCK_USER; \ msg->hdr.tag = (m)->tag; \ - msg->hdr.channel = (m)->dst_channel; \ - msg->hdr.seq_num = (m)->seq_num; \ /* point to the actual message */ \ msg->msg = (m); \ /* set the total number of bytes to be sent */ \ @@ -166,8 +164,6 @@ OBJ_CLASS_DECLARATION(mca_oob_usock_recv_t); msg->hdr.dst = (m)->dst; \ msg->hdr.type = MCA_OOB_USOCK_USER; \ msg->hdr.tag = (m)->tag; \ - msg->hdr.channel = (m)->dst_channel; \ - msg->hdr.seq_num = (m)->seq_num; \ /* point to the actual message */ \ msg->msg = (m); \ /* set the total number of bytes to be sent */ \ @@ -192,6 +188,7 @@ OBJ_CLASS_DECLARATION(mca_oob_usock_recv_t); typedef struct { opal_object_t super; opal_event_t ev; + int reps; orte_rml_send_t *msg; } mca_oob_usock_msg_op_t; OBJ_CLASS_DECLARATION(mca_oob_usock_msg_op_t); diff --git a/orte/mca/plm/Makefile.am b/orte/mca/plm/Makefile.am index da9498c799a..eaad41787ed 100644 --- a/orte/mca/plm/Makefile.am +++ b/orte/mca/plm/Makefile.am @@ -5,15 +5,15 @@ # Copyright (c) 2004-2005 The University of Tennessee and The University # of Tennessee Research Foundation. All rights # reserved. -# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, # University of Stuttgart. All rights reserved. # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. # Copyright (c) 2010 Cisco Systems, Inc. All rights reserved. # $COPYRIGHT$ -# +# # Additional copyrights may follow -# +# # $HEADER$ # diff --git a/orte/mca/plm/alps/Makefile.am b/orte/mca/plm/alps/Makefile.am index 9c400373ae4..94ca3b9ed2d 100644 --- a/orte/mca/plm/alps/Makefile.am +++ b/orte/mca/plm/alps/Makefile.am @@ -5,15 +5,17 @@ # Copyright (c) 2004-2005 The University of Tennessee and The University # of Tennessee Research Foundation. All rights # reserved. -# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, # University of Stuttgart. All rights reserved. # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. # Copyright (c) 2008-2010 Cisco Systems, Inc. All rights reserved. +# Copyright (c) 2015 Los Alamos National Security, LLC. All rights +# reserved. # $COPYRIGHT$ -# +# # Additional copyrights may follow -# +# # $HEADER$ # @@ -39,8 +41,13 @@ endif mcacomponentdir = $(ortelibdir) mcacomponent_LTLIBRARIES = $(component_install) mca_plm_alps_la_SOURCES = $(sources) -mca_plm_alps_la_LDFLAGS = -module -avoid-version +mca_plm_alps_la_CPPFLAGS = $(plm_alps_CPPFLAGS) +mca_plm_alps_la_LDFLAGS = -module -avoid-version $(plm_alps_LDFLAGS) +mca_plm_alps_la_LIBADD = $(plm_alps_LIBS) \ + $(ORTE_TOP_BUILDDIR)/orte/mca/common/alps/lib@ORTE_LIB_PREFIX@mca_common_alps.la noinst_LTLIBRARIES = $(component_noinst) libmca_plm_alps_la_SOURCES =$(sources) -libmca_plm_alps_la_LDFLAGS = -module -avoid-version +libmca_plm_alps_la_CPPFLAGS = $(plm_alps_CPPFLAGS) +libmca_plm_alps_la_LDFLAGS = -module -avoid-version $(plm_alps_LDFLAGS) +libmca_plm_alps_la_LIBADD = $(plm_alps_LIBS) diff --git a/orte/mca/plm/alps/configure.m4 b/orte/mca/plm/alps/configure.m4 index 5afa755998e..acef949581e 100644 --- a/orte/mca/plm/alps/configure.m4 +++ b/orte/mca/plm/alps/configure.m4 @@ -6,7 +6,7 @@ # Copyright (c) 2004-2005 The University of Tennessee and The University # of Tennessee Research Foundation. All rights # reserved. -# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, # University of Stuttgart. All rights reserved. # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. @@ -14,9 +14,9 @@ # Copyright (c) 2011-2013 Los Alamos National Security, LLC. # All rights reserved. # $COPYRIGHT$ -# +# # Additional copyrights may follow -# +# # $HEADER$ # @@ -27,5 +27,11 @@ AC_DEFUN([MCA_orte_plm_alps_CONFIG],[ ORTE_CHECK_ALPS([plm_alps], [plm_alps_happy="yes"], [plm_alps_happy="no"]) - AS_IF([test "$plm_alps_happy" = "yes"], [$1], [$2]) + AS_IF([test "$plm_alps_happy" = "yes"], + [$1 + AC_SUBST([plm_alps_CPPFLAGS]) + AC_SUBST([plm_alps_LDFLAGS]) + AC_SUBST([plm_alps_LIBS])], + [$2]) + ])dnl diff --git a/orte/mca/plm/alps/help-plm-alps.txt b/orte/mca/plm/alps/help-plm-alps.txt index bb170841e18..f109299a862 100644 --- a/orte/mca/plm/alps/help-plm-alps.txt +++ b/orte/mca/plm/alps/help-plm-alps.txt @@ -6,14 +6,14 @@ # Copyright (c) 2004-2005 The University of Tennessee and The University # of Tennessee Research Foundation. All rights # reserved. -# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, # University of Stuttgart. All rights reserved. # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. # $COPYRIGHT$ -# +# # Additional copyrights may follow -# +# # $HEADER$ # [multiple-prefixes] @@ -39,3 +39,7 @@ the map for this application. This can be caused by a lack of an allocation, or by an error in the Open MPI code. Please check to ensure you have a ALPS allocation. If you do, then please pass the error to the Open MPI user's mailing list for assistance. +# +[slurm-not-supported] +mpirun is not a supported launcher on Cray XC using Native SLURM. +srun must be used to launch jobs on these systems. diff --git a/orte/mca/plm/alps/plm_alps.h b/orte/mca/plm/alps/plm_alps.h index d837045aa98..d15ae07ffa0 100644 --- a/orte/mca/plm/alps/plm_alps.h +++ b/orte/mca/plm/alps/plm_alps.h @@ -5,14 +5,14 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -23,6 +23,9 @@ #include "orte/mca/mca.h" #include "orte/mca/plm/plm.h" +#if CRAY_WLM_DETECT +#include "wlm_detect.h" +#endif BEGIN_C_DECLS @@ -39,10 +42,11 @@ typedef struct orte_plm_alps_component_t orte_plm_alps_component_t; * Globally exported variable */ -ORTE_MODULE_DECLSPEC extern orte_plm_alps_component_t +ORTE_MODULE_DECLSPEC extern orte_plm_alps_component_t mca_plm_alps_component; ORTE_DECLSPEC extern orte_plm_base_module_t orte_plm_alps_module; +extern bool mca_plm_alps_using_aprun; END_C_DECLS diff --git a/orte/mca/plm/alps/plm_alps_component.c b/orte/mca/plm/alps/plm_alps_component.c index 23c0e6ca410..e474cd59130 100644 --- a/orte/mca/plm/alps/plm_alps_component.c +++ b/orte/mca/plm/alps/plm_alps_component.c @@ -6,16 +6,16 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2015 Los Alamos National Security, LLC. All rights * reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ * * These symbols are in a file by themselves to provide nice linker @@ -43,6 +43,7 @@ */ const char *mca_plm_alps_component_version_string = "Open MPI alps plm MCA component version " ORTE_VERSION; +bool mca_plm_alps_using_aprun = {true}; /* @@ -67,12 +68,12 @@ orte_plm_alps_component_t mca_plm_alps_component = { .base_version = { ORTE_PLM_BASE_VERSION_2_0_0, - + /* Component name and version */ .mca_component_name = "alps", MCA_BASE_MAKE_VERSION(component, ORTE_MAJOR_VERSION, ORTE_MINOR_VERSION, ORTE_RELEASE_VERSION), - + /* Component open and close functions */ .mca_open_component = plm_alps_open, .mca_close_component = plm_alps_close, @@ -136,8 +137,37 @@ static int plm_alps_open(void) static int orte_plm_alps_component_query(mca_base_module_t **module, int *priority) { +#if CRAY_WLM_DETECT + char slurm[]="SLURM"; + char *wlm_detected = NULL; + + wlm_detected = wlm_detect_get_active(); + + /* + * The content of wlm_detected.h indicates wlm_detect_get_active + * may return NULL upon failure. Resort to the suggested plan + * B in that event. + */ + + if (NULL == wlm_detected) { + wlm_detected = (char *)wlm_detect_get_default(); + OPAL_OUTPUT_VERBOSE((10, orte_plm_base_framework.framework_output, + "%s plm:alps: wlm_detect_get_active returned NULL, using %s", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), wlm_detected)); + + } + + if((NULL != wlm_detected) && !strcmp(slurm, wlm_detected)) { + mca_plm_alps_using_aprun = false; + } +#endif + *priority = mca_plm_alps_component.priority; *module = (mca_base_module_t *) &orte_plm_alps_module; + OPAL_OUTPUT_VERBOSE((1, orte_plm_base_framework.framework_output, + "%s plm:alps: available for selection", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME))); + return ORTE_SUCCESS; } diff --git a/orte/mca/plm/alps/plm_alps_module.c b/orte/mca/plm/alps/plm_alps_module.c index 15b67884550..8cf9c287fe8 100644 --- a/orte/mca/plm/alps/plm_alps_module.c +++ b/orte/mca/plm/alps/plm_alps_module.c @@ -12,7 +12,7 @@ * All rights reserved. * Copyright (c) 2006-2011 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2007-2015 Los Alamos National Security, LLC. All rights - * reserved. + * reserved. * Copyright (c) 2014 Intel Corporation. All rights reserved. * $COPYRIGHT$ * @@ -36,12 +36,8 @@ #include #endif #include -#ifdef HAVE_STDLIB_H #include -#endif -#ifdef HAVE_STRING_H #include -#endif #ifdef HAVE_SYS_TYPES_H #include #endif @@ -119,12 +115,29 @@ static void launch_daemons(int fd, short args, void *cbdata); static int plm_alps_init(void) { int rc; - + if (ORTE_SUCCESS != (rc = orte_plm_base_comm_start())) { ORTE_ERROR_LOG(rc); return rc; } + /* + * owing to way the SLURM PLM component works, we can't use + * it on Cray XC systems as currently designed. The problem + * is the MPI processes launched on the head node (where the + * ORTE_PROC_IS_HNP evalues to true) get launched by a daemon + * (mpirun) which is not a child of a slurmd daemon. This + * means that any RDMA credentials obtained via the odls/alps + * local launcher are incorrect. + * + * So for now, we just don't support mpirun launched jobs + * on Cray XC systems using Native SLURM. + */ + if (false == mca_plm_alps_using_aprun) { + orte_show_help("help-plm-alps.txt", "slurm-not-supported", true); + exit(-1); + } + if (orte_do_not_launch) { /* must map daemons since we won't be launching them */ orte_plm_globals.daemon_nodes_assigned_at_launch = true; @@ -155,20 +168,6 @@ static int plm_alps_init(void) */ static int plm_alps_launch_job(orte_job_t *jdata) { - orte_app_context_t *app; - - for (int i = 0 ; i < jdata->apps->size ; ++i) { - int env_count; - - if (NULL == (app = (orte_app_context_t*)opal_pointer_array_get_item(jdata->apps, i))) { - continue; - } - - for (env_count = 0 ; app->env && app->env[env_count] ; ++env_count); - /* disable PMI for the application. this will prevent the pmi library from printing useless warnings */ - opal_argv_append (&env_count, &app->env, "PMI_NO_FORK=1"); - opal_argv_append (&env_count, &app->env, "PMI_NO_PREINITIALIZE=1"); - } if (ORTE_FLAG_TEST(jdata, ORTE_JOB_FLAG_RESTART)) { /* this is a restart situation - skip to the mapping stage */ @@ -256,7 +255,7 @@ static void launch_daemons(int fd, short args, void *cbdata) OBJ_RELEASE(state); return; } - + /* need integer value for command line parameter */ orte_util_convert_jobid_to_string(&jobid_string, daemons->jobid); @@ -292,6 +291,19 @@ static void launch_daemons(int fd, short args, void *cbdata) opal_argv_append(&argc, &argv, "1"); opal_argv_append(&argc, &argv, "-cc"); opal_argv_append(&argc, &argv, "none"); + /* + * stuff below is necessary in the event that we've sadly configured Open MPI with --disable-dlopen, + * which results in the orted's being linked against all kinds of unnecessary cray libraries, including + * the cray pmi, which has a ctor that cause bad things if run when using mpirun/orted based launch. + * + * Code below adds env. variables for aprun to forward which suppresses the action of the Cray PMI ctor. + */ + opal_argv_append(&argc, &argv, "-e"); + opal_argv_append(&argc, &argv, "PMI_NO_PREINITIALIZE=1"); + opal_argv_append(&argc, &argv, "-e"); + opal_argv_append(&argc, &argv, "PMI_NO_FORK=1"); + opal_argv_append(&argc, &argv, "-e"); + opal_argv_append(&argc, &argv, "OMPI_NO_USE_CRAY_PMI=1"); /* create nodelist */ nodelist_argv = NULL; @@ -308,7 +320,7 @@ static void launch_daemons(int fd, short args, void *cbdata) if (ORTE_FLAG_TEST(node, ORTE_NODE_FLAG_DAEMON_LAUNCHED)) { continue; } - + /* otherwise, add it to the list of nodes upon which * we need to launch a daemon */ @@ -337,7 +349,7 @@ static void launch_daemons(int fd, short args, void *cbdata) /* add the daemon command (as specified by user) */ orte_plm_base_setup_orted_cmd(&argc, &argv); - + /* Add basic orted command line options, including debug flags */ orte_plm_base_orted_append_basic_args(&argc, &argv, NULL, @@ -408,7 +420,7 @@ static void launch_daemons(int fd, short args, void *cbdata) /* setup environment */ env = opal_argv_copy(orte_launch_environ); - + if (0 < opal_output_get_verbosity(orte_plm_base_framework.framework_output)) { param = opal_argv_join(argv, ' '); OPAL_OUTPUT_VERBOSE((1, orte_plm_base_framework.framework_output, @@ -417,13 +429,13 @@ static void launch_daemons(int fd, short args, void *cbdata) (NULL == param) ? "NULL" : param)); if (NULL != param) free(param); } - + /* exec the daemon(s) */ if (ORTE_SUCCESS != (rc = plm_alps_start_proc(argc, argv, env, cur_prefix))) { ORTE_ERROR_LOG(rc); goto cleanup; } - + /* indicate that the daemons for this job were launched */ state->jdata->state = ORTE_JOB_STATE_DAEMONS_LAUNCHED; daemons->state = ORTE_JOB_STATE_DAEMONS_LAUNCHED; @@ -438,11 +450,11 @@ static void launch_daemons(int fd, short args, void *cbdata) if (NULL != env) { opal_argv_free(env); } - + if(NULL != jobid_string) { free(jobid_string); } - + /* cleanup the caddy */ OBJ_RELEASE(state); @@ -461,7 +473,7 @@ static int plm_alps_terminate_orteds(void) { int rc; orte_job_t *jdata; - + OPAL_OUTPUT_VERBOSE((10, orte_plm_base_framework.framework_output, "%s plm:alps: terminating orteds", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME))); @@ -479,7 +491,7 @@ static int plm_alps_terminate_orteds(void) if (ORTE_SUCCESS != (rc = orte_plm_base_orted_exit(ORTE_DAEMON_EXIT_CMD))) { ORTE_ERROR_LOG(rc); } - + jdata = orte_get_job_data_object(ORTE_PROC_MY_NAME->jobid); ORTE_ACTIVATE_JOB_STATE(jdata, ORTE_JOB_STATE_DAEMONS_TERMINATED); @@ -505,7 +517,7 @@ static int plm_alps_signal_job(orte_jobid_t jobid, int32_t signal) static int plm_alps_finalize(void) { int rc; - + if (NULL != alpsrun) { OBJ_RELEASE(alpsrun); } @@ -514,7 +526,7 @@ static int plm_alps_finalize(void) if (ORTE_SUCCESS != (rc = orte_plm_base_comm_stop())) { ORTE_ERROR_LOG(rc); } - + return ORTE_SUCCESS; } @@ -527,7 +539,7 @@ static void alps_wait_cb(orte_proc_t *proc, void* cbdata){ necessarily mean that alps failed - it could be that an orted returned a non-zero exit status. Of course, that means the orted failed(!), so the end result is the same - the job didn't start. - + As a result, we really can't do much with the exit status itself - it could be something in errno (if alps itself failed), or it could be something returned by an orted, or it could be something returned by @@ -538,13 +550,13 @@ static void alps_wait_cb(orte_proc_t *proc, void* cbdata){ wakes up - otherwise, do nothing! */ jdata = orte_get_job_data_object(ORTE_PROC_MY_NAME->jobid); - + if (0 != proc->exit_code) { if (failed_launch) { /* report that the daemon has failed so we break out of the daemon * callback receive and exit */ - ORTE_ACTIVATE_JOB_STATE(jdata, ORTE_JOB_STATE_FAILED_TO_START); + ORTE_ACTIVATE_JOB_STATE(jdata, ORTE_JOB_STATE_FAILED_TO_START); } else { /* an orted must have died unexpectedly after launch - report * that the daemon has failed so we exit @@ -571,7 +583,7 @@ static int plm_alps_start_proc(int argc, char **argv, char **env, ORTE_ERROR_LOG(ORTE_ERR_SYS_LIMITS_CHILDREN); return ORTE_ERR_SYS_LIMITS_CHILDREN; } - + alpsrun = OBJ_NEW(orte_proc_t); alpsrun->pid = alps_pid; /* be sure to mark it as alive so we don't instantly fire */ @@ -649,8 +661,8 @@ static int plm_alps_start_proc(int argc, char **argv, char **env, signals sent from the shell (like those resulting from cntl-c) don't get sent to alps */ setpgid(0, 0); - - + + execve(exec_argv, argv, env); opal_output(0, "plm:alps:start_proc: exec failed"); @@ -662,7 +674,7 @@ static int plm_alps_start_proc(int argc, char **argv, char **env, process group any more. Stevens says always do this on both sides of the fork... */ setpgid(alps_pid, alps_pid); - + free(exec_argv); } diff --git a/orte/mca/plm/base/Makefile.am b/orte/mca/plm/base/Makefile.am index 5077f1e87bd..70cba71ac0a 100644 --- a/orte/mca/plm/base/Makefile.am +++ b/orte/mca/plm/base/Makefile.am @@ -5,15 +5,16 @@ # Copyright (c) 2004-2005 The University of Tennessee and The University # of Tennessee Research Foundation. All rights # reserved. -# Copyright (c) 2004-2009 High Performance Computing Center Stuttgart, +# Copyright (c) 2004-2009 High Performance Computing Center Stuttgart, # University of Stuttgart. All rights reserved. # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. # Copyright (c) 2009-2010 Cisco Systems, Inc. All rights reserved. +# Copyright (c) 2015 Intel, Inc. All rights reserved. # $COPYRIGHT$ -# +# # Additional copyrights may follow -# +# # $HEADER$ # @@ -25,9 +26,8 @@ libmca_plm_la_SOURCES += \ base/plm_base_frame.c \ base/plm_base_select.c \ base/plm_base_receive.c \ - base/plm_base_launch_support.c \ - base/plm_base_jobid.c \ - base/plm_base_proxy.c \ - base/plm_base_orted_cmds.c + base/plm_base_launch_support.c \ + base/plm_base_jobid.c \ + base/plm_base_orted_cmds.c dist_ortedata_DATA += base/help-plm-base.txt diff --git a/orte/mca/plm/base/base.h b/orte/mca/plm/base/base.h index de7f81e9af0..0e98d5fdb90 100644 --- a/orte/mca/plm/base/base.h +++ b/orte/mca/plm/base/base.h @@ -5,15 +5,16 @@ * Copyright (c) 2004-2006 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2013 Los Alamos National Security, LLC. All rights reserved. + * Copyright (c) 2015 Intel, Inc. All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ /** @file: @@ -63,7 +64,6 @@ ORTE_DECLSPEC void orte_plm_base_mapping_complete(int fd, short args, void *cbda ORTE_DECLSPEC void orte_plm_base_launch_apps(int fd, short args, void *cbdata); ORTE_DECLSPEC void orte_plm_base_post_launch(int fd, short args, void *cbdata); ORTE_DECLSPEC void orte_plm_base_registered(int fd, short args, void *cbdata); -ORTE_DECLSPEC int orte_plm_base_fork_hnp(void); END_C_DECLS diff --git a/orte/mca/plm/base/help-plm-base.txt b/orte/mca/plm/base/help-plm-base.txt index 1e44d27564f..8e13f92b364 100644 --- a/orte/mca/plm/base/help-plm-base.txt +++ b/orte/mca/plm/base/help-plm-base.txt @@ -6,15 +6,15 @@ # Copyright (c) 2004-2006 The University of Tennessee and The University # of Tennessee Research Foundation. All rights # reserved. -# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, # University of Stuttgart. All rights reserved. # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. # Copyright (c) 2015 Intel, Inc. All rights reserved. # $COPYRIGHT$ -# +# # Additional copyrights may follow -# +# # $HEADER$ # [no-available-pls] diff --git a/orte/mca/plm/base/plm_base_frame.c b/orte/mca/plm/base/plm_base_frame.c index 8d2aebd0f04..9fc9752b41b 100644 --- a/orte/mca/plm/base/plm_base_frame.c +++ b/orte/mca/plm/base/plm_base_frame.c @@ -5,18 +5,18 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2011-2013 Los Alamos National Security, LLC. - * All rights reserved. + * All rights reserved. * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -49,17 +49,7 @@ orte_plm_globals_t orte_plm_globals = {0}; /* * The default module */ -orte_plm_base_module_t orte_plm = { - orte_plm_proxy_init, - NULL, /* cannot set hnp name in a proxy */ - orte_plm_proxy_spawn, - NULL, /* cannot remotely spawn by default */ - NULL, /* cannot terminate job from a proxy */ - NULL, /* cannot terminate orteds from a proxy */ - NULL, /* cannot terminate procs from a proxy */ - NULL, /* cannot signal job from a proxy */ - orte_plm_proxy_finalize -}; +orte_plm_base_module_t orte_plm = {0}; static int orte_plm_base_close(void) @@ -90,7 +80,7 @@ static int orte_plm_base_open(mca_base_open_flag_t flags) { /* init the next jobid */ orte_plm_globals.next_jobid = 1; - + /* default to assigning daemons to nodes at launch */ orte_plm_globals.daemon_nodes_assigned_at_launch = true; diff --git a/orte/mca/plm/base/plm_base_jobid.c b/orte/mca/plm/base/plm_base_jobid.c index bfcd5a64c89..6964b6a6889 100644 --- a/orte/mca/plm/base/plm_base_jobid.c +++ b/orte/mca/plm/base/plm_base_jobid.c @@ -5,14 +5,14 @@ * Copyright (c) 2004-2011 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -39,34 +39,34 @@ int orte_plm_base_set_hnp_name(void) uint16_t jobfam; uint32_t hash32; uint32_t bias; - + /* hash the nodename */ OPAL_HASH_STR(orte_process_info.nodename, hash32); - + bias = (uint32_t)orte_process_info.pid; - + OPAL_OUTPUT_VERBOSE((5, orte_plm_base_framework.framework_output, "plm:base:set_hnp_name: initial bias %ld nodename hash %lu", (long)bias, (unsigned long)hash32)); /* fold in the bias */ hash32 = hash32 ^ bias; - + /* now compress to 16-bits */ jobfam = (uint16_t)(((0x0000ffff & (0xffff0000 & hash32) >> 16)) ^ (0x0000ffff & hash32)); - + OPAL_OUTPUT_VERBOSE((5, orte_plm_base_framework.framework_output, "plm:base:set_hnp_name: final jobfam %lu", (unsigned long)jobfam)); - + /* set the name */ ORTE_PROC_MY_NAME->jobid = 0xffff0000 & ((uint32_t)jobfam << 16); ORTE_PROC_MY_NAME->vpid = 0; - + /* copy it to the HNP field */ ORTE_PROC_MY_HNP->jobid = ORTE_PROC_MY_NAME->jobid; ORTE_PROC_MY_HNP->vpid = ORTE_PROC_MY_NAME->vpid; - + /* done */ return ORTE_SUCCESS; } @@ -78,12 +78,12 @@ int orte_plm_base_create_jobid(orte_job_t *jdata) { #if 0 int32_t j; - + /* RHC: WHILE ORTE CAN NOW HANDLE RECYCLING OF JOBID'S, * THE MPI LAYER CANNOT SINCE THERE IS NO WAY TO * UPDATE THE OMPI_PROC_T LIST AND/OR THE BTL'S */ - + /* see if there is a prior * jobid that has completed and can be re-used. It can * never be 0 as that belongs to the HNP and its daemons @@ -103,14 +103,14 @@ int orte_plm_base_create_jobid(orte_job_t *jdata) */ return ORTE_SUCCESS; } - + if (UINT16_MAX == orte_plm_globals.next_jobid) { /* if we get here, then no local jobids are available */ ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); jdata->jobid = ORTE_JOBID_INVALID; return ORTE_ERR_OUT_OF_RESOURCE; } - + /* take the next jobid */ jdata->jobid = ORTE_CONSTRUCT_LOCAL_JOBID(ORTE_PROC_MY_NAME->jobid, orte_plm_globals.next_jobid); orte_plm_globals.next_jobid++; diff --git a/orte/mca/plm/base/plm_base_launch_support.c b/orte/mca/plm/base/plm_base_launch_support.c index 176c9388df1..a435ba14270 100644 --- a/orte/mca/plm/base/plm_base_launch_support.c +++ b/orte/mca/plm/base/plm_base_launch_support.c @@ -13,9 +13,10 @@ * Copyright (c) 2009 Institut National de Recherche en Informatique * et Automatique. All rights reserved. * Copyright (c) 2011-2012 Los Alamos National Security, LLC. - * Copyright (c) 2013-2015 Intel, Inc. All rights reserved. - * Copyright (c) 2014-2015 Research Organization for Information Science + * Copyright (c) 2013-2017 Intel, Inc. All rights reserved. + * Copyright (c) 2014-2017 Research Organization for Information Science * and Technology (RIST). All rights reserved. + * Copyright (c) 2016 IBM Corporation. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -39,7 +40,7 @@ #include "opal/util/argv.h" #include "opal/class/opal_pointer_array.h" #include "opal/dss/dss.h" -#include "opal/mca/hwloc/hwloc.h" +#include "opal/mca/hwloc/hwloc-internal.h" #include "orte/util/dash_host/dash_host.h" #include "orte/util/session_dir.h" @@ -55,9 +56,6 @@ #include "orte/mca/routed/routed.h" #include "orte/mca/grpcomm/base/base.h" #include "orte/mca/odls/odls.h" -#if OPAL_ENABLE_FT_CR == 1 -#include "orte/mca/snapc/base/base.h" -#endif #include "orte/mca/filem/filem.h" #include "orte/mca/filem/base/base.h" #include "orte/mca/grpcomm/base/base.h" @@ -82,107 +80,105 @@ void orte_plm_base_daemons_reported(int fd, short args, void *cbdata) { orte_state_caddy_t *caddy = (orte_state_caddy_t*)cbdata; - -#if OPAL_HAVE_HWLOC - { - hwloc_topology_t t; - orte_job_t *jdata; - orte_node_t *node; - orte_proc_t *dmn1; - int i; + hwloc_topology_t t; + orte_job_t *jdata; + orte_node_t *node; + orte_proc_t *dmn1; + int i; + /* if we are not launching, then we just assume that all + * daemons share our topology */ + if (orte_do_not_launch) { + node = (orte_node_t*)opal_pointer_array_get_item(orte_node_pool, 0); + t = node->topology; + for (i=1; i < orte_node_pool->size; i++) { + if (NULL == (node = (orte_node_t*)opal_pointer_array_get_item(orte_node_pool, i))) { + continue; + } + if (NULL == node->topology) { + node->topology = t; + } + } + } else if (1 < orte_process_info.num_procs) { /* if we got back topology info from the first node, then we use * it as the "standard" for all other nodes unless they sent * back their own topology */ - if (1 < orte_process_info.num_procs) { - /* find daemon.vpid = 1 */ - jdata = orte_get_job_data_object(ORTE_PROC_MY_NAME->jobid); - if (NULL == (dmn1 = (orte_proc_t*)opal_pointer_array_get_item(jdata->procs, 1))) { - /* something is wrong */ - ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND); - ORTE_FORCED_TERMINATE(ORTE_ERR_NOT_FOUND); - OBJ_RELEASE(caddy); - return; + + /* find daemon.vpid = 1 */ + jdata = orte_get_job_data_object(ORTE_PROC_MY_NAME->jobid); + if (NULL == (dmn1 = (orte_proc_t*)opal_pointer_array_get_item(jdata->procs, 1))) { + /* something is wrong */ + ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND); + ORTE_FORCED_TERMINATE(ORTE_ERR_NOT_FOUND); + OBJ_RELEASE(caddy); + return; + } + if (NULL == (node = dmn1->node) || + NULL == (t = node->topology)) { + /* something is wrong */ + ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND); + ORTE_FORCED_TERMINATE(ORTE_ERR_NOT_FOUND); + OBJ_RELEASE(caddy); + return; + } + OPAL_OUTPUT_VERBOSE((5, orte_plm_base_framework.framework_output, + "%s plm:base:setting topo to that from node %s", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), node->name)); + for (i=1; i < orte_node_pool->size; i++) { + if (NULL == (node = (orte_node_t*)opal_pointer_array_get_item(orte_node_pool, i))) { + continue; } - if (NULL == (node = dmn1->node) || - NULL == (t = node->topology)) { - /* something is wrong */ - ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND); - ORTE_FORCED_TERMINATE(ORTE_ERR_NOT_FOUND); - OBJ_RELEASE(caddy); - return; + if (NULL == node->topology) { + node->topology = t; } - OPAL_OUTPUT_VERBOSE((5, orte_plm_base_framework.framework_output, - "%s plm:base:setting topo to that from node %s", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), node->name)); - for (i=1; i < orte_node_pool->size; i++) { - if (NULL == (node = (orte_node_t*)opal_pointer_array_get_item(orte_node_pool, i))) { - continue; - } - if (NULL == node->topology) { - node->topology = t; - } - } - } else if (orte_do_not_launch) { - node = (orte_node_t*)opal_pointer_array_get_item(orte_node_pool, 0); - t = node->topology; - for (i=1; i < orte_node_pool->size; i++) { + } + } + + /* if this is an unmanaged allocation, then set the default + * slots on each node as directed or using default + */ + if (!orte_managed_allocation) { + if (NULL != orte_set_slots && + 0 != strncmp(orte_set_slots, "none", strlen(orte_set_slots))) { + for (i=0; i < orte_node_pool->size; i++) { if (NULL == (node = (orte_node_t*)opal_pointer_array_get_item(orte_node_pool, i))) { continue; } - if (NULL == node->topology) { - node->topology = t; - } - } - } - - /* if this is an unmanaged allocation, then set the default - * slots on each node as directed or using default - */ - if (!orte_managed_allocation) { - if (NULL != orte_set_slots && - 0 != strncmp(orte_set_slots, "none", strlen(orte_set_slots))) { - for (i=0; i < orte_node_pool->size; i++) { - if (NULL == (node = (orte_node_t*)opal_pointer_array_get_item(orte_node_pool, i))) { - continue; - } - if (!ORTE_FLAG_TEST(node, ORTE_NODE_FLAG_SLOTS_GIVEN)) { - OPAL_OUTPUT_VERBOSE((5, orte_plm_base_framework.framework_output, - "%s plm:base:setting slots for node %s by %s", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), node->name, orte_set_slots)); - if (0 == strncmp(orte_set_slots, "cores", strlen(orte_set_slots))) { - node->slots = opal_hwloc_base_get_nbobjs_by_type(node->topology, - HWLOC_OBJ_CORE, 0, - OPAL_HWLOC_LOGICAL); - } else if (0 == strncmp(orte_set_slots, "sockets", strlen(orte_set_slots))) { - if (0 == (node->slots = opal_hwloc_base_get_nbobjs_by_type(node->topology, - HWLOC_OBJ_SOCKET, 0, - OPAL_HWLOC_LOGICAL))) { - /* some systems don't report sockets - in this case, - * use numanodes - */ - node->slots = opal_hwloc_base_get_nbobjs_by_type(node->topology, - HWLOC_OBJ_NODE, 0, - OPAL_HWLOC_LOGICAL); - } - } else if (0 == strncmp(orte_set_slots, "numas", strlen(orte_set_slots))) { + if (!ORTE_FLAG_TEST(node, ORTE_NODE_FLAG_SLOTS_GIVEN)) { + OPAL_OUTPUT_VERBOSE((5, orte_plm_base_framework.framework_output, + "%s plm:base:setting slots for node %s by %s", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), node->name, orte_set_slots)); + if (0 == strncmp(orte_set_slots, "cores", strlen(orte_set_slots))) { + node->slots = opal_hwloc_base_get_nbobjs_by_type(node->topology, + HWLOC_OBJ_CORE, 0, + OPAL_HWLOC_LOGICAL); + } else if (0 == strncmp(orte_set_slots, "sockets", strlen(orte_set_slots))) { + if (0 == (node->slots = opal_hwloc_base_get_nbobjs_by_type(node->topology, + HWLOC_OBJ_SOCKET, 0, + OPAL_HWLOC_LOGICAL))) { + /* some systems don't report sockets - in this case, + * use numanodes + */ node->slots = opal_hwloc_base_get_nbobjs_by_type(node->topology, HWLOC_OBJ_NODE, 0, OPAL_HWLOC_LOGICAL); - } else if (0 == strncmp(orte_set_slots, "hwthreads", strlen(orte_set_slots))) { - node->slots = opal_hwloc_base_get_nbobjs_by_type(node->topology, - HWLOC_OBJ_PU, 0, - OPAL_HWLOC_LOGICAL); - } else { - /* must be a number */ - node->slots = strtol(orte_set_slots, NULL, 10); } + } else if (0 == strncmp(orte_set_slots, "numas", strlen(orte_set_slots))) { + node->slots = opal_hwloc_base_get_nbobjs_by_type(node->topology, + HWLOC_OBJ_NODE, 0, + OPAL_HWLOC_LOGICAL); + } else if (0 == strncmp(orte_set_slots, "hwthreads", strlen(orte_set_slots))) { + node->slots = opal_hwloc_base_get_nbobjs_by_type(node->topology, + HWLOC_OBJ_PU, 0, + OPAL_HWLOC_LOGICAL); + } else { + /* must be a number */ + node->slots = strtol(orte_set_slots, NULL, 10); } } } } } -#endif if (orte_display_allocation) { orte_ras_base_display_alloc(); @@ -295,7 +291,7 @@ void orte_plm_base_setup_job(int fd, short args, void *cbdata) */ opal_pointer_array_set_item(orte_job_data, ORTE_LOCAL_JOBID(caddy->jdata->jobid), caddy->jdata); } - + /* if job recovery is not enabled, set it to default */ if (!ORTE_FLAG_TEST(caddy->jdata, ORTE_JOB_FLAG_RECOVERABLE) && orte_enable_recovery) { @@ -332,6 +328,11 @@ void orte_plm_base_complete_setup(int fd, short args, void *cbdata) { orte_job_t *jdata, *jdatorted; orte_state_caddy_t *caddy = (orte_state_caddy_t*)cbdata; + orte_node_t *node; + uint32_t h; + orte_vpid_t *vptr; + int i, rc; + char *serial_number; opal_output_verbose(5, orte_plm_base_framework.framework_output, "%s complete_setup on job %s", @@ -390,7 +391,7 @@ void orte_plm_base_complete_setup(int fd, short args, void *cbdata) /* ensure our routing plan is up-to-date */ orte_routed.update_routing_plan(); - + /* If this job is being started by me, then there is nothing * further we need to do as any user directives (e.g., to tie * off IO to /dev/null) will have been included in the launch @@ -405,66 +406,43 @@ void orte_plm_base_complete_setup(int fd, short args, void *cbdata) /* the tool will PUSH its stdin, so nothing we need to do here * about stdin */ } - -#if OPAL_ENABLE_FT_CR == 1 - /* - * Notify the Global SnapC component regarding new job (even if it was restarted) - */ - { - int rc; - if( ORTE_SUCCESS != (rc = orte_snapc.setup_job(jdata->jobid) ) ) { - /* Silent Failure :/ JJH */ - ORTE_ERROR_LOG(rc); - } - } -#endif -#if OPAL_HAVE_HWLOC - { - orte_node_t *node; - uint32_t h; - orte_vpid_t *vptr; - int i, rc; - char *serial_number; - - /* if coprocessors were detected, now is the time to - * identify who is attached to what host - this info - * will be shipped to the daemons in the nidmap. Someday, - * there may be a direct way for daemons on coprocessors - * to detect their hosts - but not today. - */ - if (orte_coprocessors_detected) { - /* cycle thru the nodes looking for coprocessors */ - for (i=0; i < orte_node_pool->size; i++) { - if (NULL == (node = (orte_node_t*)opal_pointer_array_get_item(orte_node_pool, i))) { - continue; - } - /* if we don't have a serial number, then we are not a coprocessor */ - serial_number = NULL; - if (!orte_get_attribute(&node->attributes, ORTE_NODE_SERIAL_NUMBER, (void**)&serial_number, OPAL_STRING)) { - continue; - } - if (NULL != serial_number) { - /* if we have a serial number, then we are a coprocessor - so - * compute our hash and lookup our hostid - */ - OPAL_HASH_STR(serial_number, h); - free(serial_number); - if (OPAL_SUCCESS != (rc = opal_hash_table_get_value_uint32(orte_coprocessors, h, - (void**)&vptr))) { - ORTE_ERROR_LOG(rc); - break; - } - orte_set_attribute(&node->attributes, ORTE_NODE_HOSTID, ORTE_ATTR_LOCAL, vptr, ORTE_VPID); + /* if coprocessors were detected, now is the time to + * identify who is attached to what host - this info + * will be shipped to the daemons in the nidmap. Someday, + * there may be a direct way for daemons on coprocessors + * to detect their hosts - but not today. + */ + if (orte_coprocessors_detected) { + /* cycle thru the nodes looking for coprocessors */ + for (i=0; i < orte_node_pool->size; i++) { + if (NULL == (node = (orte_node_t*)opal_pointer_array_get_item(orte_node_pool, i))) { + continue; + } + /* if we don't have a serial number, then we are not a coprocessor */ + serial_number = NULL; + if (!orte_get_attribute(&node->attributes, ORTE_NODE_SERIAL_NUMBER, (void**)&serial_number, OPAL_STRING)) { + continue; + } + if (NULL != serial_number) { + /* if we have a serial number, then we are a coprocessor - so + * compute our hash and lookup our hostid + */ + OPAL_HASH_STR(serial_number, h); + free(serial_number); + if (OPAL_SUCCESS != (rc = opal_hash_table_get_value_uint32(orte_coprocessors, h, + (void**)&vptr))) { + ORTE_ERROR_LOG(rc); + break; } + orte_set_attribute(&node->attributes, ORTE_NODE_HOSTID, ORTE_ATTR_LOCAL, vptr, ORTE_VPID); } } - /* done with the coprocessor mapping at this time */ - if (NULL != orte_coprocessors) { - OBJ_RELEASE(orte_coprocessors); - } } -#endif + /* done with the coprocessor mapping at this time */ + if (NULL != orte_coprocessors) { + OBJ_RELEASE(orte_coprocessors); + } /* load any controls into the system */ orte_rtc.assign(jdata); @@ -539,7 +517,7 @@ void orte_plm_base_launch_apps(int fd, short args, void *cbdata) OBJ_RELEASE(caddy); return; } - + /* goes to all daemons */ sig = OBJ_NEW(orte_grpcomm_signature_t); sig->signature = (orte_process_name_t*)malloc(sizeof(orte_process_name_t)); @@ -592,6 +570,9 @@ void orte_plm_base_post_launch(int fd, short args, void *cbdata) orte_state_caddy_t *caddy = (orte_state_caddy_t*)cbdata; orte_process_name_t name; orte_timer_t *timer=NULL; + int ret; + opal_buffer_t *answer; + int room, *rmptr; /* convenience */ jdata = caddy->jdata; @@ -620,11 +601,11 @@ void orte_plm_base_post_launch(int fd, short args, void *cbdata) "%s plm:base:launch wiring up iof for job %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_JOBID_PRINT(jdata->jobid))); - + /* push stdin - the IOF will know what to do with the specified target */ name.jobid = jdata->jobid; name.vpid = jdata->stdin_target; - + if (ORTE_SUCCESS != (rc = orte_iof.push(&name, ORTE_IOF_STDIN, 0))) { ORTE_ERROR_LOG(rc); ORTE_FORCED_TERMINATE(ORTE_ERROR_DEFAULT_EXIT_CODE); @@ -632,13 +613,75 @@ void orte_plm_base_post_launch(int fd, short args, void *cbdata) return; } + /* if this isn't a dynamic spawn, just cleanup */ + if (ORTE_JOBID_INVALID == jdata->originator.jobid) { + OPAL_OUTPUT_VERBOSE((5, orte_plm_base_framework.framework_output, + "%s plm:base:launch job %s is not a dynamic spawn", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), + ORTE_JOBID_PRINT(jdata->jobid))); + goto cleanup; + } + /* if it was a dynamic spawn, and it isn't an MPI job, then + * it won't register and we need to send the response now. + * Otherwise, it is an MPI job and we should wait for it + * to register */ + if (!orte_get_attribute(&jdata->attributes, ORTE_JOB_NON_ORTE_JOB, NULL, OPAL_BOOL)) { + OPAL_OUTPUT_VERBOSE((5, orte_plm_base_framework.framework_output, + "%s plm:base:launch job %s is MPI", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), + ORTE_JOBID_PRINT(jdata->jobid))); + goto cleanup; + } + /* prep the response */ + rc = ORTE_SUCCESS; + answer = OBJ_NEW(opal_buffer_t); + /* pack the status */ + if (ORTE_SUCCESS != (ret = opal_dss.pack(answer, &rc, 1, OPAL_INT32))) { + ORTE_ERROR_LOG(ret); + ORTE_FORCED_TERMINATE(ORTE_ERROR_DEFAULT_EXIT_CODE); + OBJ_RELEASE(caddy); + return; + } + /* pack the jobid */ + if (ORTE_SUCCESS != (ret = opal_dss.pack(answer, &jdata->jobid, 1, ORTE_JOBID))) { + ORTE_ERROR_LOG(ret); + ORTE_FORCED_TERMINATE(ORTE_ERROR_DEFAULT_EXIT_CODE); + OBJ_RELEASE(caddy); + return; + } + /* pack the room number */ + rmptr = &room; + if (orte_get_attribute(&jdata->attributes, ORTE_JOB_ROOM_NUM, (void**)&rmptr, OPAL_INT)) { + if (ORTE_SUCCESS != (ret = opal_dss.pack(answer, &room, 1, OPAL_INT))) { + ORTE_ERROR_LOG(ret); + ORTE_FORCED_TERMINATE(ORTE_ERROR_DEFAULT_EXIT_CODE); + OBJ_RELEASE(caddy); + return; + } + } + OPAL_OUTPUT_VERBOSE((5, orte_plm_base_framework.framework_output, + "%s plm:base:launch sending dyn release of job %s to %s", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), + ORTE_JOBID_PRINT(jdata->jobid), + ORTE_NAME_PRINT(&jdata->originator))); + if (0 > (ret = orte_rml.send_buffer_nb(&jdata->originator, answer, + ORTE_RML_TAG_LAUNCH_RESP, + orte_rml_send_callback, NULL))) { + ORTE_ERROR_LOG(ret); + OBJ_RELEASE(answer); + ORTE_FORCED_TERMINATE(ORTE_ERROR_DEFAULT_EXIT_CODE); + OBJ_RELEASE(caddy); + return; + } + + cleanup: /* cleanup */ OBJ_RELEASE(caddy); } void orte_plm_base_registered(int fd, short args, void *cbdata) { - int ret; + int ret, room, *rmptr; int32_t rc; orte_job_t *jdata; opal_buffer_t *answer; @@ -688,13 +731,23 @@ void orte_plm_base_registered(int fd, short args, void *cbdata) OBJ_RELEASE(caddy); return; } + /* pack the room number */ + rmptr = &room; + if (orte_get_attribute(&jdata->attributes, ORTE_JOB_ROOM_NUM, (void**)&rmptr, OPAL_INT)) { + if (ORTE_SUCCESS != (ret = opal_dss.pack(answer, &room, 1, OPAL_INT))) { + ORTE_ERROR_LOG(ret); + ORTE_FORCED_TERMINATE(ORTE_ERROR_DEFAULT_EXIT_CODE); + OBJ_RELEASE(caddy); + return; + } + } OPAL_OUTPUT_VERBOSE((5, orte_plm_base_framework.framework_output, "%s plm:base:launch sending dyn release of job %s to %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_JOBID_PRINT(jdata->jobid), ORTE_NAME_PRINT(&jdata->originator))); if (0 > (ret = orte_rml.send_buffer_nb(&jdata->originator, answer, - ORTE_RML_TAG_PLM_PROXY, + ORTE_RML_TAG_LAUNCH_RESP, orte_rml_send_callback, NULL))) { ORTE_ERROR_LOG(ret); OBJ_RELEASE(answer); @@ -726,7 +779,14 @@ void orte_plm_base_daemon_callback(int status, orte_process_name_t* sender, orte_job_t *jdata; orte_process_name_t dname; opal_buffer_t *relay; - + char *coprocessors, **sns, *sig; + uint32_t h; + hwloc_topology_t topo; + orte_topology_t *t; + int i; + bool found; + uint8_t tflag; + /* get the daemon job, if necessary */ if (NULL == jdatorted) { jdatorted = orte_get_job_data_object(ORTE_PROC_MY_NAME->jobid); @@ -742,7 +802,7 @@ void orte_plm_base_daemon_callback(int status, orte_process_name_t* sender, orted_failed_launch = true; goto CLEANUP; } - + /* set the contact info into the hash table */ orte_rml.set_contact_info(rml_uri); @@ -750,7 +810,7 @@ void orte_plm_base_daemon_callback(int status, orte_process_name_t* sender, "%s plm:base:orted_report_launch from daemon %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_NAME_PRINT(&dname))); - + /* update state and record for this daemon contact info */ if (NULL == (daemon = (orte_proc_t*)opal_pointer_array_get_item(jdatorted->procs, dname.vpid))) { ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND); @@ -778,12 +838,12 @@ void orte_plm_base_daemon_callback(int status, orte_process_name_t* sender, nodename = ptr; } } - + OPAL_OUTPUT_VERBOSE((5, orte_plm_base_framework.framework_output, "%s plm:base:orted_report_launch from daemon %s on node %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_NAME_PRINT(&daemon->name), nodename)); - + /* look this node up, if necessary */ if (!orte_plm_globals.daemon_nodes_assigned_at_launch) { OPAL_OUTPUT_VERBOSE((5, orte_plm_base_framework.framework_output, @@ -837,7 +897,7 @@ void orte_plm_base_daemon_callback(int status, orte_process_name_t* sender, orted_failed_launch = true; goto CLEANUP; } - + /* mark the daemon as launched */ ORTE_FLAG_SET(node, ORTE_NODE_FLAG_DAEMON_LAUNCHED); @@ -875,47 +935,68 @@ void orte_plm_base_daemon_callback(int status, orte_process_name_t* sender, free(alias); } -#if OPAL_HAVE_HWLOC - { - char *coprocessors, **sns, *sig; - uint32_t h; - hwloc_topology_t topo; - orte_topology_t *t; - int i; - bool found; - uint8_t tflag; - - /* store the local resources for that node */ + /* store the local resources for that node */ + idx=1; + if (OPAL_SUCCESS != (rc = opal_dss.unpack(buffer, &tflag, &idx, OPAL_UINT8))) { + ORTE_ERROR_LOG(rc); + orted_failed_launch = true; + goto CLEANUP; + } + if (1 == tflag) { idx=1; - if (OPAL_SUCCESS != (rc = opal_dss.unpack(buffer, &tflag, &idx, OPAL_UINT8))) { + if (OPAL_SUCCESS != (rc = opal_dss.unpack(buffer, &sig, &idx, OPAL_STRING))) { ORTE_ERROR_LOG(rc); orted_failed_launch = true; goto CLEANUP; } - if (1 == tflag) { - idx=1; - if (OPAL_SUCCESS != (rc = opal_dss.unpack(buffer, &sig, &idx, OPAL_STRING))) { - ORTE_ERROR_LOG(rc); - orted_failed_launch = true; - goto CLEANUP; - } - idx=1; - if (OPAL_SUCCESS != (rc = opal_dss.unpack(buffer, &topo, &idx, OPAL_HWLOC_TOPO))) { - ORTE_ERROR_LOG(rc); - orted_failed_launch = true; - goto CLEANUP; - } + idx=1; + if (OPAL_SUCCESS != (rc = opal_dss.unpack(buffer, &topo, &idx, OPAL_HWLOC_TOPO))) { + ORTE_ERROR_LOG(rc); + orted_failed_launch = true; + goto CLEANUP; + } + OPAL_OUTPUT_VERBOSE((5, orte_plm_base_framework.framework_output, + "%s RECEIVED TOPOLOGY FROM NODE %s", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), nodename)); + if (10 < opal_output_get_verbosity(orte_plm_base_framework.framework_output)) { + opal_dss.dump(0, topo, OPAL_HWLOC_TOPO); + } + if (1 == dname.vpid || orte_hetero_nodes) { + /* the user has told us that something is different, so just store it */ OPAL_OUTPUT_VERBOSE((5, orte_plm_base_framework.framework_output, - "%s RECEIVED TOPOLOGY FROM NODE %s", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), nodename)); - if (10 < opal_output_get_verbosity(orte_plm_base_framework.framework_output)) { - opal_dss.dump(0, topo, OPAL_HWLOC_TOPO); + "%s ADDING TOPOLOGY PER USER REQUEST TO NODE %s", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), node->name)); + t = OBJ_NEW(orte_topology_t); + /* filter the topology as we'll need it that way later */ + opal_hwloc_base_filter_cpus(topo); + t->topo = topo; + t->sig = sig; + opal_pointer_array_add(orte_node_topologies, t); + node->topology = topo; + } else { + /* do we already have this topology from some other node? */ + found = false; + for (i=0; i < orte_node_topologies->size; i++) { + if (NULL == (t = (orte_topology_t*)opal_pointer_array_get_item(orte_node_topologies, i))) { + continue; + } + /* just check the signature */ + if (0 == strcmp(sig, t->sig)) { + OPAL_OUTPUT_VERBOSE((5, orte_plm_base_framework.framework_output, + "%s TOPOLOGY ALREADY RECORDED", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME))); + found = true; + node->topology = t->topo; + hwloc_topology_destroy(topo); + free(sig); + break; + } } - if (1 == dname.vpid || orte_hetero_nodes) { - /* the user has told us that something is different, so just store it */ + if (!found) { + /* nope - add it */ OPAL_OUTPUT_VERBOSE((5, orte_plm_base_framework.framework_output, - "%s ADDING TOPOLOGY PER USER REQUEST TO NODE %s", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), node->name)); + "%s NEW TOPOLOGY - ADDING", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME))); t = OBJ_NEW(orte_topology_t); /* filter the topology as we'll need it that way later */ opal_hwloc_base_filter_cpus(topo); @@ -923,91 +1004,58 @@ void orte_plm_base_daemon_callback(int status, orte_process_name_t* sender, t->sig = sig; opal_pointer_array_add(orte_node_topologies, t); node->topology = topo; - } else { - /* do we already have this topology from some other node? */ - found = false; - for (i=0; i < orte_node_topologies->size; i++) { - if (NULL == (t = (orte_topology_t*)opal_pointer_array_get_item(orte_node_topologies, i))) { - continue; - } - /* just check the signature */ - if (0 == strcmp(sig, t->sig)) { - OPAL_OUTPUT_VERBOSE((5, orte_plm_base_framework.framework_output, - "%s TOPOLOGY ALREADY RECORDED", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME))); - found = true; - node->topology = t->topo; - hwloc_topology_destroy(topo); - free(sig); - break; - } - } - if (!found) { - /* nope - add it */ - OPAL_OUTPUT_VERBOSE((5, orte_plm_base_framework.framework_output, - "%s NEW TOPOLOGY - ADDING", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME))); - t = OBJ_NEW(orte_topology_t); - /* filter the topology as we'll need it that way later */ - opal_hwloc_base_filter_cpus(topo); - t->topo = topo; - t->sig = sig; - opal_pointer_array_add(orte_node_topologies, t); - node->topology = topo; - } } } - - /* unpack any coprocessors */ - idx=1; - if (OPAL_SUCCESS != (rc = opal_dss.unpack(buffer, &coprocessors, &idx, OPAL_STRING))) { - ORTE_ERROR_LOG(rc); - orted_failed_launch = true; - goto CLEANUP; + } + + /* unpack any coprocessors */ + idx=1; + if (OPAL_SUCCESS != (rc = opal_dss.unpack(buffer, &coprocessors, &idx, OPAL_STRING))) { + ORTE_ERROR_LOG(rc); + orted_failed_launch = true; + goto CLEANUP; + } + if (NULL != coprocessors) { + /* init the hash table, if necessary */ + if (NULL == orte_coprocessors) { + orte_coprocessors = OBJ_NEW(opal_hash_table_t); + opal_hash_table_init(orte_coprocessors, orte_process_info.num_procs); } - if (NULL != coprocessors) { - /* init the hash table, if necessary */ - if (NULL == orte_coprocessors) { - orte_coprocessors = OBJ_NEW(opal_hash_table_t); - opal_hash_table_init(orte_coprocessors, orte_process_info.num_procs); - } - /* separate the serial numbers of the coprocessors - * on this host - */ - sns = opal_argv_split(coprocessors, ','); - for (idx=0; NULL != sns[idx]; idx++) { - /* compute the hash */ - OPAL_HASH_STR(sns[idx], h); - /* mark that this coprocessor is hosted by this node */ - opal_hash_table_set_value_uint32(orte_coprocessors, h, (void*)&node->daemon->name.vpid); - } - opal_argv_free(sns); - free(coprocessors); - orte_coprocessors_detected = true; + /* separate the serial numbers of the coprocessors + * on this host + */ + sns = opal_argv_split(coprocessors, ','); + for (idx=0; NULL != sns[idx]; idx++) { + /* compute the hash */ + OPAL_HASH_STR(sns[idx], h); + /* mark that this coprocessor is hosted by this node */ + opal_hash_table_set_value_uint32(orte_coprocessors, h, (void*)&node->daemon->name.vpid); } - /* see if this daemon is on a coprocessor */ - idx=1; - if (OPAL_SUCCESS != (rc = opal_dss.unpack(buffer, &coprocessors, &idx, OPAL_STRING))) { - ORTE_ERROR_LOG(rc); + opal_argv_free(sns); + free(coprocessors); + orte_coprocessors_detected = true; + } + /* see if this daemon is on a coprocessor */ + idx=1; + if (OPAL_SUCCESS != (rc = opal_dss.unpack(buffer, &coprocessors, &idx, OPAL_STRING))) { + ORTE_ERROR_LOG(rc); + orted_failed_launch = true; + goto CLEANUP; + } + if (NULL != coprocessors) { + if (orte_get_attribute(&node->attributes, ORTE_NODE_SERIAL_NUMBER, NULL, OPAL_STRING)) { + /* this is not allowed - a coprocessor cannot be host + * to another coprocessor at this time + */ + ORTE_ERROR_LOG(ORTE_ERR_NOT_SUPPORTED); orted_failed_launch = true; - goto CLEANUP; - } - if (NULL != coprocessors) { - if (orte_get_attribute(&node->attributes, ORTE_NODE_SERIAL_NUMBER, NULL, OPAL_STRING)) { - /* this is not allowed - a coprocessor cannot be host - * to another coprocessor at this time - */ - ORTE_ERROR_LOG(ORTE_ERR_NOT_SUPPORTED); - orted_failed_launch = true; - free(coprocessors); - goto CLEANUP; - } - orte_set_attribute(&node->attributes, ORTE_NODE_SERIAL_NUMBER, ORTE_ATTR_LOCAL, coprocessors, OPAL_STRING); free(coprocessors); - orte_coprocessors_detected = true; + goto CLEANUP; } + orte_set_attribute(&node->attributes, ORTE_NODE_SERIAL_NUMBER, ORTE_ATTR_LOCAL, coprocessors, OPAL_STRING); + free(coprocessors); + orte_coprocessors_detected = true; } -#endif CLEANUP: OPAL_OUTPUT_VERBOSE((5, orte_plm_base_framework.framework_output, @@ -1016,7 +1064,7 @@ void orte_plm_base_daemon_callback(int status, orte_process_name_t* sender, orted_failed_launch ? "failed" : "completed", ORTE_NAME_PRINT(&dname), (NULL == daemon) ? "UNKNOWN" : daemon->rml_uri)); - + if (orted_failed_launch) { ORTE_ACTIVATE_JOB_STATE(jdatorted, ORTE_JOB_STATE_FAILED_TO_START); return; @@ -1119,7 +1167,7 @@ int orte_plm_base_setup_orted_cmd(int *argc, char ***argv) { int i, loc; char **tmpv; - + /* set default location to be 0, indicating that * only a single word is in the cmd */ @@ -1133,7 +1181,7 @@ int orte_plm_base_setup_orted_cmd(int *argc, char ***argv) opal_argv_append(argc, argv, tmpv[i]); } opal_argv_free(tmpv); - + return loc; } @@ -1181,22 +1229,23 @@ int orte_plm_base_orted_append_basic_args(int *argc, char ***argv, if (orted_spin_flag) { opal_argv_append(argc, argv, "--spin"); } -#if OPAL_HAVE_HWLOC + if (opal_hwloc_report_bindings) { opal_argv_append(argc, argv, "-"OPAL_MCA_CMD_LINE_ID); opal_argv_append(argc, argv, "orte_report_bindings"); opal_argv_append(argc, argv, "1"); } - /* pass our topology signature */ - opal_argv_append(argc, argv, "--hnp-topo-sig"); - opal_argv_append(argc, argv, orte_topo_signature); + if (!ORTE_PROC_IS_CM) { + /* pass our topology signature */ + opal_argv_append(argc, argv, "--hnp-topo-sig"); + opal_argv_append(argc, argv, orte_topo_signature); + } if (orte_hetero_nodes) { opal_argv_append(argc, argv, "-"OPAL_MCA_CMD_LINE_ID); opal_argv_append(argc, argv, "orte_hetero_nodes"); opal_argv_append(argc, argv, "1"); } -#endif - + if (orte_map_reduce) { opal_argv_append(argc, argv, "--mapreduce"); } @@ -1205,46 +1254,42 @@ int orte_plm_base_orted_append_basic_args(int *argc, char ***argv, opal_argv_append(argc, argv, "orte_map_stddiag_to_stderr"); opal_argv_append(argc, argv, "1"); } - - /* the following two are not mca params */ - if ((int)ORTE_VPID_INVALID != orted_debug_failure) { - opal_argv_append(argc, argv, "--debug-failure"); - asprintf(¶m, "%d", orted_debug_failure); - opal_argv_append(argc, argv, param); - free(param); + else if (orte_map_stddiag_to_stdout) { + opal_argv_append(argc, argv, "-"OPAL_MCA_CMD_LINE_ID); + opal_argv_append(argc, argv, "orte_map_stddiag_to_stdout"); + opal_argv_append(argc, argv, "1"); } - if (0 < orted_debug_failure_delay) { - opal_argv_append(argc, argv, "--debug-failure-delay"); - asprintf(¶m, "%d", orted_debug_failure_delay); - opal_argv_append(argc, argv, param); - free(param); + + /* the following is not an mca param */ + if (NULL != getenv("ORTE_TEST_ORTED_SUICIDE")) { + opal_argv_append(argc, argv, "--test-suicide"); } - + /* tell the orted what ESS component to use */ if (NULL != ess) { opal_argv_append(argc, argv, "-"OPAL_MCA_CMD_LINE_ID); opal_argv_append(argc, argv, "ess"); opal_argv_append(argc, argv, ess); } - + /* pass the daemon jobid */ opal_argv_append(argc, argv, "-"OPAL_MCA_CMD_LINE_ID); - opal_argv_append(argc, argv, "orte_ess_jobid"); + opal_argv_append(argc, argv, "ess_base_jobid"); if (ORTE_SUCCESS != (rc = orte_util_convert_jobid_to_string(¶m, ORTE_PROC_MY_NAME->jobid))) { ORTE_ERROR_LOG(rc); return rc; } opal_argv_append(argc, argv, param); free(param); - + /* setup to pass the vpid */ if (NULL != proc_vpid_index) { opal_argv_append(argc, argv, "-"OPAL_MCA_CMD_LINE_ID); - opal_argv_append(argc, argv, "orte_ess_vpid"); + opal_argv_append(argc, argv, "ess_base_vpid"); *proc_vpid_index = *argc; opal_argv_append(argc, argv, "